diff --git a/ACL_PyTorch/built-in/audio/Jasper_for_PyTorch/diff.patch b/ACL_PyTorch/built-in/audio/Jasper_for_PyTorch/diff.patch index 233b07f8afeedcf2bf4b2da40bf2f0029cb10310..a19857a3f0498fd11439c7fbc9e38a3a6a02b5b8 100644 --- a/ACL_PyTorch/built-in/audio/Jasper_for_PyTorch/diff.patch +++ b/ACL_PyTorch/built-in/audio/Jasper_for_PyTorch/diff.patch @@ -1,89 +1,89 @@ -diff --git a/PyTorch/SpeechRecognition/Jasper/common/features.py b/PyTorch/SpeechRecognition/Jasper/common/features.py -index ad7a52f..af02aff 100644 ---- a/PyTorch/SpeechRecognition/Jasper/common/features.py -+++ b/PyTorch/SpeechRecognition/Jasper/common/features.py -@@ -5,7 +5,7 @@ import librosa - import torch - import torch.nn as nn - --from apex import amp -+# from apex import amp - - - class BaseFeatures(nn.Module): -@@ -46,8 +46,9 @@ class BaseFeatures(nn.Module): - dtype = audio.dtype - audio = audio.float() - if optim_level == 1: -- with amp.disable_casts(): -- feat, feat_lens = self.calculate_features(audio, audio_lens) -+ pass -+ # with amp.disable_casts(): -+ # feat, feat_lens = self.calculate_features(audio, audio_lens) - else: - feat, feat_lens = self.calculate_features(audio, audio_lens) - -diff --git a/PyTorch/SpeechRecognition/Jasper/common/helpers.py b/PyTorch/SpeechRecognition/Jasper/common/helpers.py -index 742f159..c989b6e 100644 ---- a/PyTorch/SpeechRecognition/Jasper/common/helpers.py -+++ b/PyTorch/SpeechRecognition/Jasper/common/helpers.py -@@ -17,7 +17,7 @@ import os - import re - from collections import OrderedDict - --from apex import amp -+# from apex import amp - - import torch - import torch.distributed as dist -@@ -234,7 +234,8 @@ class Checkpointer(object): - 'state_dict': unwrap_ddp(model).state_dict(), - 'ema_state_dict': unwrap_ddp(ema_model).state_dict() if ema_model is not None else None, - 'optimizer': optimizer.state_dict(), -- 'amp': amp.state_dict() if self.use_amp else None, -+ # 'amp': amp.state_dict() if self.use_amp else None, -+ 'amp': None, - } - - if is_best: -@@ -293,8 +294,8 @@ class Checkpointer(object): - - optimizer.load_state_dict(checkpoint['optimizer']) - -- if self.use_amp: -- amp.load_state_dict(checkpoint['amp']) -+ # if self.use_amp: -+ # amp.load_state_dict(checkpoint['amp']) - - meta['start_epoch'] = checkpoint.get('epoch') - meta['best_wer'] = checkpoint.get('best_wer', meta['best_wer']) -diff --git a/PyTorch/SpeechRecognition/Jasper/jasper/model.py b/PyTorch/SpeechRecognition/Jasper/jasper/model.py -index dd38ce4..86ccb91 100644 ---- a/PyTorch/SpeechRecognition/Jasper/jasper/model.py -+++ b/PyTorch/SpeechRecognition/Jasper/jasper/model.py -@@ -66,14 +66,22 @@ class MaskedConv1d(nn.Conv1d): - self.masked = masked - - def get_seq_len(self, lens): -- return ((lens + 2 * self.padding[0] - self.dilation[0] -- * (self.kernel_size[0] - 1) - 1) // self.stride[0] + 1) -+ if torch.onnx.is_in_onnx_export(): -+ return ((lens + 2. * self.padding[0] - self.dilation[0] -+ * (self.kernel_size[0] - 1.) - 1.) // self.stride[0] + 1.).int() -+ else: -+ return ((lens + 2 * self.padding[0] - self.dilation[0] -+ * (self.kernel_size[0] - 1) - 1) // self.stride[0] + 1) - - def forward(self, x, x_lens=None): - if self.masked: - max_len = x.size(2) - idxs = torch.arange(max_len, dtype=x_lens.dtype, device=x_lens.device) -- mask = idxs.expand(x_lens.size(0), max_len) >= x_lens.unsqueeze(1) -+ if torch.onnx.is_in_onnx_export(): -+ temp = torch.zeros(x_lens.size(0), max_len) -+ mask = idxs.expand_as(temp) >= x_lens.unsqueeze(1) -+ else: -+ mask = idxs.expand(x_lens.size(0), max_len) >= x_lens.unsqueeze(1) - x = x.masked_fill(mask.unsqueeze(1).to(device=x.device), 0) - x_lens = self.get_seq_len(x_lens) +diff --git a/PyTorch/SpeechRecognition/Jasper/common/features.py b/PyTorch/SpeechRecognition/Jasper/common/features.py +index ad7a52f..af02aff 100644 +--- a/PyTorch/SpeechRecognition/Jasper/common/features.py ++++ b/PyTorch/SpeechRecognition/Jasper/common/features.py +@@ -5,7 +5,7 @@ import librosa + import torch + import torch.nn as nn + +-from apex import amp ++# from apex import amp + + + class BaseFeatures(nn.Module): +@@ -46,8 +46,9 @@ class BaseFeatures(nn.Module): + dtype = audio.dtype + audio = audio.float() + if optim_level == 1: +- with amp.disable_casts(): +- feat, feat_lens = self.calculate_features(audio, audio_lens) ++ pass ++ # with amp.disable_casts(): ++ # feat, feat_lens = self.calculate_features(audio, audio_lens) + else: + feat, feat_lens = self.calculate_features(audio, audio_lens) + +diff --git a/PyTorch/SpeechRecognition/Jasper/common/helpers.py b/PyTorch/SpeechRecognition/Jasper/common/helpers.py +index 742f159..c989b6e 100644 +--- a/PyTorch/SpeechRecognition/Jasper/common/helpers.py ++++ b/PyTorch/SpeechRecognition/Jasper/common/helpers.py +@@ -17,7 +17,7 @@ import os + import re + from collections import OrderedDict + +-from apex import amp ++# from apex import amp + + import torch + import torch.distributed as dist +@@ -234,7 +234,8 @@ class Checkpointer(object): + 'state_dict': unwrap_ddp(model).state_dict(), + 'ema_state_dict': unwrap_ddp(ema_model).state_dict() if ema_model is not None else None, + 'optimizer': optimizer.state_dict(), +- 'amp': amp.state_dict() if self.use_amp else None, ++ # 'amp': amp.state_dict() if self.use_amp else None, ++ 'amp': None, + } + + if is_best: +@@ -293,8 +294,8 @@ class Checkpointer(object): + + optimizer.load_state_dict(checkpoint['optimizer']) + +- if self.use_amp: +- amp.load_state_dict(checkpoint['amp']) ++ # if self.use_amp: ++ # amp.load_state_dict(checkpoint['amp']) + + meta['start_epoch'] = checkpoint.get('epoch') + meta['best_wer'] = checkpoint.get('best_wer', meta['best_wer']) +diff --git a/PyTorch/SpeechRecognition/Jasper/jasper/model.py b/PyTorch/SpeechRecognition/Jasper/jasper/model.py +index dd38ce4..86ccb91 100644 +--- a/PyTorch/SpeechRecognition/Jasper/jasper/model.py ++++ b/PyTorch/SpeechRecognition/Jasper/jasper/model.py +@@ -66,14 +66,22 @@ class MaskedConv1d(nn.Conv1d): + self.masked = masked + + def get_seq_len(self, lens): +- return ((lens + 2 * self.padding[0] - self.dilation[0] +- * (self.kernel_size[0] - 1) - 1) // self.stride[0] + 1) ++ if torch.onnx.is_in_onnx_export(): ++ return ((lens + 2. * self.padding[0] - self.dilation[0] ++ * (self.kernel_size[0] - 1.) - 1.) // self.stride[0] + 1.).int() ++ else: ++ return ((lens + 2 * self.padding[0] - self.dilation[0] ++ * (self.kernel_size[0] - 1) - 1) // self.stride[0] + 1) + + def forward(self, x, x_lens=None): + if self.masked: + max_len = x.size(2) + idxs = torch.arange(max_len, dtype=x_lens.dtype, device=x_lens.device) +- mask = idxs.expand(x_lens.size(0), max_len) >= x_lens.unsqueeze(1) ++ if torch.onnx.is_in_onnx_export(): ++ temp = torch.zeros(x_lens.size(0), max_len) ++ mask = idxs.expand_as(temp) >= x_lens.unsqueeze(1) ++ else: ++ mask = idxs.expand(x_lens.size(0), max_len) >= x_lens.unsqueeze(1) + x = x.masked_fill(mask.unsqueeze(1).to(device=x.device), 0) + x_lens = self.get_seq_len(x_lens) \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/Jasper_for_PyTorch/pth2onnx.py b/ACL_PyTorch/built-in/audio/Jasper_for_PyTorch/pth2onnx.py index 8d30c4f05ce1b1d725ce971d2da67a4c63a2d04c..0f635ab23d51f8fd547d0a687c428fd56f2ce6ff 100644 --- a/ACL_PyTorch/built-in/audio/Jasper_for_PyTorch/pth2onnx.py +++ b/ACL_PyTorch/built-in/audio/Jasper_for_PyTorch/pth2onnx.py @@ -1,44 +1,44 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from jasper import config -from common import helpers -from jasper.model import Jasper - - -def main(): - cfg = config.load('configs/jasper10x5dr_speedp-online_speca.yaml') - model = Jasper(encoder_kw=config.encoder(cfg), - decoder_kw=config.decoder(cfg, n_classes=29)) - checkpoint = torch.load('checkpoints/jasper_fp16.pt', map_location="cpu") - state_dict = helpers.convert_v1_state_dict(checkpoint['ema_state_dict']) - model.load_state_dict(state_dict, strict=True) - model.eval() - - feats = torch.randn([4, 64, 4000], dtype=torch.float32) - feat_lens = torch.Tensor([1000], dtype=torch.int32) - dynamic_axes = {'feats': {2: '-1'}, 'output': {1, '-1'}} - torch.onnx.export(model, - (feats, feat_lens), - 'jasper_dynamic.onnx', - input_names=['feats', 'feat_lens'], - output_names=['output'], - dynamic_axes=dynamic_axes, - verbose=True, - opset_version=11) - - -if __name__ == '__main__': - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from jasper import config +from common import helpers +from jasper.model import Jasper + + +def main(): + cfg = config.load('configs/jasper10x5dr_speedp-online_speca.yaml') + model = Jasper(encoder_kw=config.encoder(cfg), + decoder_kw=config.decoder(cfg, n_classes=29)) + checkpoint = torch.load('checkpoints/jasper_fp16.pt', map_location="cpu") + state_dict = helpers.convert_v1_state_dict(checkpoint['ema_state_dict']) + model.load_state_dict(state_dict, strict=True) + model.eval() + + feats = torch.randn([4, 64, 4000], dtype=torch.float32) + feat_lens = torch.Tensor([1000], dtype=torch.int32) + dynamic_axes = {'feats': {2: '-1'}, 'output': {1, '-1'}} + torch.onnx.export(model, + (feats, feat_lens), + 'jasper_dynamic.onnx', + input_names=['feats', 'feat_lens'], + output_names=['output'], + dynamic_axes=dynamic_axes, + verbose=True, + opset_version=11) + + +if __name__ == '__main__': + main() diff --git a/ACL_PyTorch/built-in/audio/Jasper_for_PyTorch/requirements.txt b/ACL_PyTorch/built-in/audio/Jasper_for_PyTorch/requirements.txt index 36faa7e8ae5def0758173bdfd674c34e1858d0c7..5ad9b7d6cc62f483ab75300264ccecae9ac18917 100644 --- a/ACL_PyTorch/built-in/audio/Jasper_for_PyTorch/requirements.txt +++ b/ACL_PyTorch/built-in/audio/Jasper_for_PyTorch/requirements.txt @@ -1,16 +1,16 @@ -torchvision==0.9.0 -torch==1.8.0 -onnx==1.8.0 -numpy==1.18.5 -ascii-graph==1.5.1 -inflect==5.3.0 -ipdb -librosa==0.8.0 -pandas==1.1.4 -pycuda==2020.1 -pyyaml>=5.4 -soundfile -sox==1.4.1 -tqdm==4.53.0 -unidecode==1.2.0 +torchvision==0.9.0 +torch==1.8.0 +onnx==1.8.0 +numpy==1.18.5 +ascii-graph==1.5.1 +inflect==5.3.0 +ipdb +librosa==0.8.0 +pandas==1.1.4 +pycuda==2020.1 +pyyaml>=5.4 +soundfile +sox==1.4.1 +tqdm==4.53.0 +unidecode==1.2.0 wrapt==1.10.11 \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/LSTM/LICENSE b/ACL_PyTorch/built-in/audio/LSTM/LICENSE index 6052d47b9e4fad14635ad0392bfd592af254b951..835428fbaa90df1fa50963023aacfb2305ad9cd9 100644 --- a/ACL_PyTorch/built-in/audio/LSTM/LICENSE +++ b/ACL_PyTorch/built-in/audio/LSTM/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/LSTM/LSTM_get_info.py b/ACL_PyTorch/built-in/audio/LSTM/LSTM_get_info.py index ea8300ce457d7dc6234ce80b56d5c59256da67d6..f89051ed81320fb541f1b97150781dae21d7aeaf 100644 --- a/ACL_PyTorch/built-in/audio/LSTM/LSTM_get_info.py +++ b/ACL_PyTorch/built-in/audio/LSTM/LSTM_get_info.py @@ -1,34 +1,34 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import argparse - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--batchsize', help='conf file for training') - args = parser.parse_args() - - test_num = 400 // int(args.batchsize) - lstm_path = './lstm_bin/' - - with open('./lstm.info', 'w') as f: - for i in range(test_num): - ids_name = lstm_path + 'inputs_{}.bin'.format(i) - f.write(str(i) + ' ' + ids_name + ' ' + '(' + args.batchsize + ',390,243)') - f.write('\n') - -if __name__ == '__main__': +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--batchsize', help='conf file for training') + args = parser.parse_args() + + test_num = 400 // int(args.batchsize) + lstm_path = './lstm_bin/' + + with open('./lstm.info', 'w') as f: + for i in range(test_num): + ids_name = lstm_path + 'inputs_{}.bin'.format(i) + f.write(str(i) + ' ' + ids_name + ' ' + '(' + args.batchsize + ',390,243)') + f.write('\n') + +if __name__ == '__main__': main() \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/LSTM/LSTM_postprocess_data.py b/ACL_PyTorch/built-in/audio/LSTM/LSTM_postprocess_data.py index 4ec77e30d6c05ee46d6db400e4cf641a5d5bbd53..8fd0d995c67152b8aaebf1527eb48f7a789776ac 100644 --- a/ACL_PyTorch/built-in/audio/LSTM/LSTM_postprocess_data.py +++ b/ACL_PyTorch/built-in/audio/LSTM/LSTM_postprocess_data.py @@ -1,114 +1,114 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#!/usr/bin/python -#encoding=utf-8 - -import os -import time -import sys -import torch -import yaml -import argparse -import torch.nn as nn -import numpy as np - -sys.path.append('./') -from models.model_ctc import * -from utils.ctcDecoder import GreedyDecoder, BeamDecoder -from utils.data_loader import Vocab, SpeechDataset, SpeechDataLoader - -parser = argparse.ArgumentParser() -parser.add_argument('--conf', help='conf file for training') -parser.add_argument('--npu_path', help='infer file for postprocessing') -parser.add_argument('--batchsize', help='batchsize for postprocessing') - -class Config(object): - batch_size = 4 - dropout = 0.1 - -def test(): - args = parser.parse_args() - try: - conf = yaml.safe_load(open(args.conf,'r')) - except: - print("Config file not exist!") - sys.exit(1) - - opts = Config() - for k,v in conf.items(): - setattr(opts, k, v) - print('{:50}:{}'.format(k, v)) - - beam_width = opts.beam_width - lm_alpha = opts.lm_alpha - decoder_type = opts.decode_type - vocab_file = opts.vocab_file - - vocab = Vocab(vocab_file) - batchsize = int(args.batchsize) - - test_dataset = SpeechDataset(vocab, opts.valid_scp_path, opts.valid_lab_path, opts) - test_loader = SpeechDataLoader(test_dataset, batch_size=batchsize, shuffle=False, num_workers=opts.num_workers, pin_memory=False) - - if decoder_type == 'Greedy': - decoder = GreedyDecoder(vocab.index2word, space_idx=-1, blank_index=0) - else: - decoder = BeamDecoder(vocab.index2word, beam_width=beam_width, blank_index=0, space_idx=-1, lm_path=opts.lm_path, lm_alpha=opts.lm_alpha) - - total_wer = 0 - total_cer = 0 - start = time.time() - - npu_path = args.npu_path - test_num = 399 // batchsize - with torch.no_grad(): - for i, data in zip(range(test_num), test_loader): - inputs, input_sizes, targets, target_sizes, utt_list = data - probs_1_np = np.load('{}{}.0.npy'.format(npu_path, i)) - probs_1 = torch.from_numpy(probs_1_np) - - max_length = probs_1.size(0) - input_sizes = (input_sizes * max_length).long() - - decoded = decoder.decode(probs_1, input_sizes.numpy().tolist()) - targets, target_sizes = targets.numpy(), target_sizes.numpy() - labels = [] - for i in range(len(targets)): - label = [ vocab.index2word[num] for num in targets[i][:target_sizes[i]]] - labels.append(' '.join(label)) - - for x in range(len(targets)): - print("origin : " + labels[x]) - print("decoded: " + decoded[x]) - cer = 0 - wer = 0 - for x in range(len(labels)): - cer += decoder.cer(decoded[x], labels[x]) - wer += decoder.wer(decoded[x], labels[x]) - decoder.num_word += len(labels[x].split()) - decoder.num_char += len(labels[x]) - total_cer += cer - total_wer += wer - - CER = (float(total_cer) / decoder.num_char)*100 - WER = (float(total_wer) / decoder.num_word)*100 - print("Character error rate on test set: %.4f" % CER) - print("Word error rate on test set: %.4f" % WER) - end = time.time() - time_used = (end - start) / 60.0 - print("time used for decode %d sentences: %.4f minutes." % (len(test_dataset), time_used)) - -if __name__ == "__main__": - test() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/python +#encoding=utf-8 + +import os +import time +import sys +import torch +import yaml +import argparse +import torch.nn as nn +import numpy as np + +sys.path.append('./') +from models.model_ctc import * +from utils.ctcDecoder import GreedyDecoder, BeamDecoder +from utils.data_loader import Vocab, SpeechDataset, SpeechDataLoader + +parser = argparse.ArgumentParser() +parser.add_argument('--conf', help='conf file for training') +parser.add_argument('--npu_path', help='infer file for postprocessing') +parser.add_argument('--batchsize', help='batchsize for postprocessing') + +class Config(object): + batch_size = 4 + dropout = 0.1 + +def test(): + args = parser.parse_args() + try: + conf = yaml.safe_load(open(args.conf,'r')) + except: + print("Config file not exist!") + sys.exit(1) + + opts = Config() + for k,v in conf.items(): + setattr(opts, k, v) + print('{:50}:{}'.format(k, v)) + + beam_width = opts.beam_width + lm_alpha = opts.lm_alpha + decoder_type = opts.decode_type + vocab_file = opts.vocab_file + + vocab = Vocab(vocab_file) + batchsize = int(args.batchsize) + + test_dataset = SpeechDataset(vocab, opts.valid_scp_path, opts.valid_lab_path, opts) + test_loader = SpeechDataLoader(test_dataset, batch_size=batchsize, shuffle=False, num_workers=opts.num_workers, pin_memory=False) + + if decoder_type == 'Greedy': + decoder = GreedyDecoder(vocab.index2word, space_idx=-1, blank_index=0) + else: + decoder = BeamDecoder(vocab.index2word, beam_width=beam_width, blank_index=0, space_idx=-1, lm_path=opts.lm_path, lm_alpha=opts.lm_alpha) + + total_wer = 0 + total_cer = 0 + start = time.time() + + npu_path = args.npu_path + test_num = 399 // batchsize + with torch.no_grad(): + for i, data in zip(range(test_num), test_loader): + inputs, input_sizes, targets, target_sizes, utt_list = data + probs_1_np = np.load('{}{}.0.npy'.format(npu_path, i)) + probs_1 = torch.from_numpy(probs_1_np) + + max_length = probs_1.size(0) + input_sizes = (input_sizes * max_length).long() + + decoded = decoder.decode(probs_1, input_sizes.numpy().tolist()) + targets, target_sizes = targets.numpy(), target_sizes.numpy() + labels = [] + for i in range(len(targets)): + label = [ vocab.index2word[num] for num in targets[i][:target_sizes[i]]] + labels.append(' '.join(label)) + + for x in range(len(targets)): + print("origin : " + labels[x]) + print("decoded: " + decoded[x]) + cer = 0 + wer = 0 + for x in range(len(labels)): + cer += decoder.cer(decoded[x], labels[x]) + wer += decoder.wer(decoded[x], labels[x]) + decoder.num_word += len(labels[x].split()) + decoder.num_char += len(labels[x]) + total_cer += cer + total_wer += wer + + CER = (float(total_cer) / decoder.num_char)*100 + WER = (float(total_wer) / decoder.num_word)*100 + print("Character error rate on test set: %.4f" % CER) + print("Word error rate on test set: %.4f" % WER) + end = time.time() + time_used = (end - start) / 60.0 + print("time used for decode %d sentences: %.4f minutes." % (len(test_dataset), time_used)) + +if __name__ == "__main__": + test() diff --git a/ACL_PyTorch/built-in/audio/LSTM/LSTM_preprocess_data.py b/ACL_PyTorch/built-in/audio/LSTM/LSTM_preprocess_data.py index 75f129861d27b71770ae6082697cd92ffcfd1773..297aa1b0d7a5b42fc2910e65eeb322013c251364 100644 --- a/ACL_PyTorch/built-in/audio/LSTM/LSTM_preprocess_data.py +++ b/ACL_PyTorch/built-in/audio/LSTM/LSTM_preprocess_data.py @@ -1,80 +1,80 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# !/usr/bin/python -# encoding=utf-8 - -import os -import sys -import copy -import time -import yaml -import shutil -import argparse -import numpy as np -import random -import torch -import torch.nn as nn -import torch.backends.cudnn as cudnn -from torch.utils.tensorboard import SummaryWriter - -sys.path.append('./') -from models.model_ctc import * -from utils.data_loader import Vocab, SpeechDataset, SpeechDataLoader - -supported_rnn = {'nn.LSTM': nn.LSTM, 'nn.GRU': nn.GRU, 'nn.RNN': nn.RNN} -supported_activate = {'relu': nn.ReLU, 'tanh': nn.Tanh, 'sigmoid': nn.Sigmoid} - -parser = argparse.ArgumentParser() -parser.add_argument('--conf', help='conf file for training') -parser.add_argument('--batchsize', help='batchsize for preprocessing') - -class Config(object): - batch_size = 4 - dropout = 0.1 - -def main(): - args = parser.parse_args() - try: - conf = yaml.safe_load(open(args.conf,'r')) - except: - print("Config file not exist!") - sys.exit(1) - - opts = Config() - for k,v in conf.items(): - setattr(opts, k, v) - print('{:50}:{}'.format(k, v)) - - # Data Loader - batchsize = int(args.batchsize) - vocab = Vocab(opts.vocab_file) - dev_dataset = SpeechDataset(vocab, opts.valid_scp_path, opts.valid_lab_path, opts) - dev_loader = SpeechDataLoader(dev_dataset, batch_size=batchsize, shuffle=False, num_workers=opts.num_workers, - drop_last=True, pin_memory=True) - - bin_path = "./lstm_bin" - if os.path.exists(bin_path): - shutil.rmtree(bin_path) - os.makedirs(bin_path) - i = -1 - for data in dev_loader: - i = i + 1 - print("[info] file", "===", i) - inputs, input_sizes, targets, target_sizes, utt_list = data - inputs_np = inputs.numpy() - inputs_np.tofile(os.path.join(bin_path, "inputs_" + str(i) + '.bin')) - -if __name__ == '__main__': - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# !/usr/bin/python +# encoding=utf-8 + +import os +import sys +import copy +import time +import yaml +import shutil +import argparse +import numpy as np +import random +import torch +import torch.nn as nn +import torch.backends.cudnn as cudnn +from torch.utils.tensorboard import SummaryWriter + +sys.path.append('./') +from models.model_ctc import * +from utils.data_loader import Vocab, SpeechDataset, SpeechDataLoader + +supported_rnn = {'nn.LSTM': nn.LSTM, 'nn.GRU': nn.GRU, 'nn.RNN': nn.RNN} +supported_activate = {'relu': nn.ReLU, 'tanh': nn.Tanh, 'sigmoid': nn.Sigmoid} + +parser = argparse.ArgumentParser() +parser.add_argument('--conf', help='conf file for training') +parser.add_argument('--batchsize', help='batchsize for preprocessing') + +class Config(object): + batch_size = 4 + dropout = 0.1 + +def main(): + args = parser.parse_args() + try: + conf = yaml.safe_load(open(args.conf,'r')) + except: + print("Config file not exist!") + sys.exit(1) + + opts = Config() + for k,v in conf.items(): + setattr(opts, k, v) + print('{:50}:{}'.format(k, v)) + + # Data Loader + batchsize = int(args.batchsize) + vocab = Vocab(opts.vocab_file) + dev_dataset = SpeechDataset(vocab, opts.valid_scp_path, opts.valid_lab_path, opts) + dev_loader = SpeechDataLoader(dev_dataset, batch_size=batchsize, shuffle=False, num_workers=opts.num_workers, + drop_last=True, pin_memory=True) + + bin_path = "./lstm_bin" + if os.path.exists(bin_path): + shutil.rmtree(bin_path) + os.makedirs(bin_path) + i = -1 + for data in dev_loader: + i = i + 1 + print("[info] file", "===", i) + inputs, input_sizes, targets, target_sizes, utt_list = data + inputs_np = inputs.numpy() + inputs_np.tofile(os.path.join(bin_path, "inputs_" + str(i) + '.bin')) + +if __name__ == '__main__': + main() diff --git a/ACL_PyTorch/built-in/audio/LSTM/LSTM_pth2onnx.py b/ACL_PyTorch/built-in/audio/LSTM/LSTM_pth2onnx.py index 0b68d2f79557d4404743a73ccc26df3247736a48..2aa34a62a33aaa0dee7070098c82f8955a35e2aa 100644 --- a/ACL_PyTorch/built-in/audio/LSTM/LSTM_pth2onnx.py +++ b/ACL_PyTorch/built-in/audio/LSTM/LSTM_pth2onnx.py @@ -1,231 +1,231 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#!/usr/bin/python -#encoding=utf-8 - -import os -import sys -import copy -import time -import yaml -import argparse -import numpy as np -import random -import torch -import torch.nn as nn -import torch.backends.cudnn as cudnn -from torch.utils.tensorboard import SummaryWriter -import torch.onnx -from collections import OrderedDict -import ssl -sys.path.append('./') -from models.model_ctc import * - -supported_rnn = {'nn.LSTM':nn.LSTM, 'nn.GRU': nn.GRU, 'nn.RNN':nn.RNN} -supported_activate = {'relu':nn.ReLU, 'tanh':nn.Tanh, 'sigmoid':nn.Sigmoid} - -parser = argparse.ArgumentParser(description='cnn_lstm_ctc') -parser.add_argument('--conf', default='conf/ctc_config.yaml' , help='conf file with argument of LSTM and training') -parser.add_argument('--batchsize', default=1, help='batchszie for transfer onnx batch') - -class Vocab(object): - def __init__(self, vocab_file): - self.vocab_file = vocab_file - self.word2index = {"blank": 0, "UNK": 1} - self.index2word = {0: "blank", 1: "UNK"} - self.word2count = {} - self.n_words = 2 - self.read_lang() - - def add_sentence(self, sentence): - for word in sentence.split(' '): - self.add_word(word) - - def add_word(self, word): - if word not in self.word2index: - self.word2index[word] = self.n_words - self.word2count[word] = 1 - self.index2word[self.n_words] = word - self.n_words += 1 - else: - self.word2count[word] += 1 - - def read_lang(self): - print("Reading vocabulary from {}".format(self.vocab_file)) - with open(self.vocab_file, 'r') as rf: - line = rf.readline() - while line: - line = line.strip().split(' ') - if len(line) > 1: - sen = ' '.join(line[1:]) - else: - sen = line[0] - self.add_sentence(sen) - line = rf.readline() - print("Vocabulary size is {}".format(self.n_words)) - - -def proc_nodes_module(checkpoint,AttrName): - new_state_dict = OrderedDict() - for k,v in checkpoint[AttrName].items(): - if(k[0:7] == "module."): - name = k[7:] - else: - name = k[0:] - - new_state_dict[name]=v - return new_state_dict - -def run_epoch(epoch_id, model, data_iter, loss_fn, device, opts, sum_writer, optimizer=None, print_every=20, is_training=True): - if is_training: - model.train() - else: - model.eval() - batch_time = 0 - data_time = 0 - total_loss = 0 - total_tokens = 0 - total_errs = 0 - cur_loss = 0 - i = 0 - steps_per_epoch = len(data_iter) - end = time.time() - for i, data in enumerate(data_iter): - data_time += (time.time() - end) - - global_step = epoch_id * steps_per_epoch + i - inputs, input_sizes, targets, target_sizes, utt_list = data - with torch.autograd.profiler.profile(record_shapes=True, use_cuda=True) as prof: - inputs = inputs.to(device) - input_sizes = input_sizes.to(device) - targets = targets.to(device) - target_sizes = target_sizes.to(device) - out = model(inputs) - out_len, batch_size, _ = out.size() - input_sizes = (input_sizes * out_len).long() - print(out.shape, targets.shape) - print("input_sizes:", input_sizes.shape) - print("target_sizes:", target_sizes.shape) - loss = loss_fn(out, targets, input_sizes, target_sizes) - loss /= batch_size - cur_loss += loss.item() - total_loss += loss.item() - prob, index = torch.max(out, dim=-1) - batch_errs, batch_tokens = model.compute_wer(index.transpose(0,1).cpu().numpy(), input_sizes.cpu().numpy(), targets.cpu().numpy(), target_sizes.cpu().numpy()) - total_errs += batch_errs - total_tokens += batch_tokens - - if is_training: - optimizer.zero_grad() - if opts.opt_level and opts.use_gpu: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - sum_writer.add_scalar('Accuary/train/total_loss', total_loss / (i+1), global_step) - sum_writer.add_scalar('Accuary/train/total_wer', total_errs / total_tokens, global_step) - prof.export_chrome_trace('prof/'+str(i) + "_cuda_lstm.prof") - batch_time += (time.time() - end) - if is_training: - print('Epoch: [%d] [%d / %d], Time %.6f Data %.6f s, total_loss = %.5f s, total_wer = %.5f' % (epoch_id, - i+1, steps_per_epoch, batch_time / (i+1), data_time / (i+1), total_loss / (i+1), total_errs / total_tokens )) - end = time.time() - - - average_loss = total_loss / (i+1) - training = "Train" if is_training else "Valid" - return 1-total_errs / total_tokens, average_loss - -class Config(object): - batch_size = 4 - dropout = 0.1 - -def seed_everything(seed): - random.seed(seed) - os.environ['PYTHONHASHSEED'] = str(seed) - np.random.seed(seed) - torch.manual_seed(seed) - torch.cuda.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - cudnn.deterministic = True - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False - -def main(conf, batchsize): - checkpoint = torch.load("./checkpoint/ctc_fbank_cnn/ctc_best_model.pth", map_location='cpu') - checkpoint['state_dict'] = proc_nodes_module(checkpoint,'state_dict') - opts = Config() - for k, v in conf.items(): - setattr(opts, k, v) - print('{:50}:{}'.format(k, v)) - - device = torch.device('cpu') - sum_writer = SummaryWriter(opts.summary_path) - - if opts.seed is not None: - seed_everything(opts.seed) - - #Data Loader - vocab = Vocab(opts.vocab_file) - #Define Model - rnn_type = supported_rnn[opts.rnn_type] - rnn_param = {"rnn_input_size":opts.rnn_input_size, "rnn_hidden_size":opts.rnn_hidden_size, "rnn_layers":opts.rnn_layers, - "rnn_type":rnn_type, "bidirectional":opts.bidirectional, "batch_norm":opts.batch_norm} - - num_class = vocab.n_words - opts.output_class_dim = vocab.n_words - drop_out = opts.drop_out - add_cnn = opts.add_cnn - - cnn_param = {} - channel = eval(opts.channel) - kernel_size = eval(opts.kernel_size) - stride = eval(opts.stride) - padding = eval(opts.padding) - pooling = eval(opts.pooling) - activation_function = supported_activate[opts.activation_function] - cnn_param['batch_norm'] = opts.batch_norm - cnn_param['activate_function'] = activation_function - cnn_param["layer"] = [] - for layer in range(opts.layers): - layer_param = [channel[layer], kernel_size[layer], stride[layer], padding[layer]] - if pooling is not None: - layer_param.append(pooling[layer]) - else: - layer_param.append(None) - cnn_param["layer"].append(layer_param) - model = CTC_Model(add_cnn=add_cnn, cnn_param=cnn_param, rnn_param=rnn_param, num_class=num_class, drop_out=drop_out) - model = model.to('cpu') - model.load_state_dict(checkpoint['state_dict'],strict=False) - model.eval() - input_names = ["actual_input_1"] - output_names = ["output1"] - batch_size = int(batchsize) - dummy_input = torch.randn(batch_size, 390, 243, device='cpu') - dynamic_axes = {'actual_input_1': {0: '-1'}, 'output1': {1: '-1'}} - output_file = "lstm_ctc_{}batch.onnx".format(str(batch_size)) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, output_names = output_names, opset_version=11) -if __name__ == '__main__': - ssl._create_default_https_context = ssl._create_unverified_context - args = parser.parse_args() - batchsize = args.batchsize - try: - config_path = args.conf - conf = yaml.safe_load(open(config_path, 'r')) - except: - print("No input config or config file missing, please check.") - sys.exit(1) - main(conf, batchsize) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/python +#encoding=utf-8 + +import os +import sys +import copy +import time +import yaml +import argparse +import numpy as np +import random +import torch +import torch.nn as nn +import torch.backends.cudnn as cudnn +from torch.utils.tensorboard import SummaryWriter +import torch.onnx +from collections import OrderedDict +import ssl +sys.path.append('./') +from models.model_ctc import * + +supported_rnn = {'nn.LSTM':nn.LSTM, 'nn.GRU': nn.GRU, 'nn.RNN':nn.RNN} +supported_activate = {'relu':nn.ReLU, 'tanh':nn.Tanh, 'sigmoid':nn.Sigmoid} + +parser = argparse.ArgumentParser(description='cnn_lstm_ctc') +parser.add_argument('--conf', default='conf/ctc_config.yaml' , help='conf file with argument of LSTM and training') +parser.add_argument('--batchsize', default=1, help='batchszie for transfer onnx batch') + +class Vocab(object): + def __init__(self, vocab_file): + self.vocab_file = vocab_file + self.word2index = {"blank": 0, "UNK": 1} + self.index2word = {0: "blank", 1: "UNK"} + self.word2count = {} + self.n_words = 2 + self.read_lang() + + def add_sentence(self, sentence): + for word in sentence.split(' '): + self.add_word(word) + + def add_word(self, word): + if word not in self.word2index: + self.word2index[word] = self.n_words + self.word2count[word] = 1 + self.index2word[self.n_words] = word + self.n_words += 1 + else: + self.word2count[word] += 1 + + def read_lang(self): + print("Reading vocabulary from {}".format(self.vocab_file)) + with open(self.vocab_file, 'r') as rf: + line = rf.readline() + while line: + line = line.strip().split(' ') + if len(line) > 1: + sen = ' '.join(line[1:]) + else: + sen = line[0] + self.add_sentence(sen) + line = rf.readline() + print("Vocabulary size is {}".format(self.n_words)) + + +def proc_nodes_module(checkpoint,AttrName): + new_state_dict = OrderedDict() + for k,v in checkpoint[AttrName].items(): + if(k[0:7] == "module."): + name = k[7:] + else: + name = k[0:] + + new_state_dict[name]=v + return new_state_dict + +def run_epoch(epoch_id, model, data_iter, loss_fn, device, opts, sum_writer, optimizer=None, print_every=20, is_training=True): + if is_training: + model.train() + else: + model.eval() + batch_time = 0 + data_time = 0 + total_loss = 0 + total_tokens = 0 + total_errs = 0 + cur_loss = 0 + i = 0 + steps_per_epoch = len(data_iter) + end = time.time() + for i, data in enumerate(data_iter): + data_time += (time.time() - end) + + global_step = epoch_id * steps_per_epoch + i + inputs, input_sizes, targets, target_sizes, utt_list = data + with torch.autograd.profiler.profile(record_shapes=True, use_cuda=True) as prof: + inputs = inputs.to(device) + input_sizes = input_sizes.to(device) + targets = targets.to(device) + target_sizes = target_sizes.to(device) + out = model(inputs) + out_len, batch_size, _ = out.size() + input_sizes = (input_sizes * out_len).long() + print(out.shape, targets.shape) + print("input_sizes:", input_sizes.shape) + print("target_sizes:", target_sizes.shape) + loss = loss_fn(out, targets, input_sizes, target_sizes) + loss /= batch_size + cur_loss += loss.item() + total_loss += loss.item() + prob, index = torch.max(out, dim=-1) + batch_errs, batch_tokens = model.compute_wer(index.transpose(0,1).cpu().numpy(), input_sizes.cpu().numpy(), targets.cpu().numpy(), target_sizes.cpu().numpy()) + total_errs += batch_errs + total_tokens += batch_tokens + + if is_training: + optimizer.zero_grad() + if opts.opt_level and opts.use_gpu: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + sum_writer.add_scalar('Accuary/train/total_loss', total_loss / (i+1), global_step) + sum_writer.add_scalar('Accuary/train/total_wer', total_errs / total_tokens, global_step) + prof.export_chrome_trace('prof/'+str(i) + "_cuda_lstm.prof") + batch_time += (time.time() - end) + if is_training: + print('Epoch: [%d] [%d / %d], Time %.6f Data %.6f s, total_loss = %.5f s, total_wer = %.5f' % (epoch_id, + i+1, steps_per_epoch, batch_time / (i+1), data_time / (i+1), total_loss / (i+1), total_errs / total_tokens )) + end = time.time() + + + average_loss = total_loss / (i+1) + training = "Train" if is_training else "Valid" + return 1-total_errs / total_tokens, average_loss + +class Config(object): + batch_size = 4 + dropout = 0.1 + +def seed_everything(seed): + random.seed(seed) + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + cudnn.deterministic = True + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + +def main(conf, batchsize): + checkpoint = torch.load("./checkpoint/ctc_fbank_cnn/ctc_best_model.pth", map_location='cpu') + checkpoint['state_dict'] = proc_nodes_module(checkpoint,'state_dict') + opts = Config() + for k, v in conf.items(): + setattr(opts, k, v) + print('{:50}:{}'.format(k, v)) + + device = torch.device('cpu') + sum_writer = SummaryWriter(opts.summary_path) + + if opts.seed is not None: + seed_everything(opts.seed) + + #Data Loader + vocab = Vocab(opts.vocab_file) + #Define Model + rnn_type = supported_rnn[opts.rnn_type] + rnn_param = {"rnn_input_size":opts.rnn_input_size, "rnn_hidden_size":opts.rnn_hidden_size, "rnn_layers":opts.rnn_layers, + "rnn_type":rnn_type, "bidirectional":opts.bidirectional, "batch_norm":opts.batch_norm} + + num_class = vocab.n_words + opts.output_class_dim = vocab.n_words + drop_out = opts.drop_out + add_cnn = opts.add_cnn + + cnn_param = {} + channel = eval(opts.channel) + kernel_size = eval(opts.kernel_size) + stride = eval(opts.stride) + padding = eval(opts.padding) + pooling = eval(opts.pooling) + activation_function = supported_activate[opts.activation_function] + cnn_param['batch_norm'] = opts.batch_norm + cnn_param['activate_function'] = activation_function + cnn_param["layer"] = [] + for layer in range(opts.layers): + layer_param = [channel[layer], kernel_size[layer], stride[layer], padding[layer]] + if pooling is not None: + layer_param.append(pooling[layer]) + else: + layer_param.append(None) + cnn_param["layer"].append(layer_param) + model = CTC_Model(add_cnn=add_cnn, cnn_param=cnn_param, rnn_param=rnn_param, num_class=num_class, drop_out=drop_out) + model = model.to('cpu') + model.load_state_dict(checkpoint['state_dict'],strict=False) + model.eval() + input_names = ["actual_input_1"] + output_names = ["output1"] + batch_size = int(batchsize) + dummy_input = torch.randn(batch_size, 390, 243, device='cpu') + dynamic_axes = {'actual_input_1': {0: '-1'}, 'output1': {1: '-1'}} + output_file = "lstm_ctc_{}batch.onnx".format(str(batch_size)) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, output_names = output_names, opset_version=11) +if __name__ == '__main__': + ssl._create_default_https_context = ssl._create_unverified_context + args = parser.parse_args() + batchsize = args.batchsize + try: + config_path = args.conf + conf = yaml.safe_load(open(config_path, 'r')) + except: + print("No input config or config file missing, please check.") + sys.exit(1) + main(conf, batchsize) diff --git a/ACL_PyTorch/built-in/audio/LSTM/ReadMe.md b/ACL_PyTorch/built-in/audio/LSTM/ReadMe.md index 8eea54372f446a3eb0142e302a783f338781601f..4c9db9caaf36b9cbd44b63e7b6a2ecfb89046a00 100644 --- a/ACL_PyTorch/built-in/audio/LSTM/ReadMe.md +++ b/ACL_PyTorch/built-in/audio/LSTM/ReadMe.md @@ -1,341 +1,341 @@ -# LSTM Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理精度统计](#61-离线推理精度统计) - - [6.2 精度对比](#62-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - [7.2 T4性能数据](#72-T4性能数据) - - [7.3 性能对比](#73-性能对比) - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -npu训练组依据客户给的模型进行训练所得,无参考论文 - -### 1.2 代码地址 -[LSTM代码]https://gitee.com/ascend/modelzoo.git) -branch:master -commit_id=8ed54e7d0fc9b632e1e3b9420bed96ee2c7fa1e3 -code_path=modelzoo/tree/master/built-in/PyTorch/Official/nlp/LSTM_for_PyTorch - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -pytorch = 1.8.0 -torchvision = 0.9.0 -``` -### 2.2 python第三方库 - -``` -ONNX == 1.7.0 -kaldi == https://github.com/kaldi-asr/kaldi -Pillow == 7.2.0 -onnxruntime-gpu == 1.7.0 -kaldiio == 2.17.2 -``` -kaldi需要安装在ModelZoo的LSTM源码仓中“modelzoo/built-in/PyTorch/Official/nlp/LSTM_for_PyTorch/NPU/1p/”目录下。ModelZoo的LSTM源码仓下载方法. -``` -git clone https://gitee.com/ascend/modelzoo.git -cd modelzoo -git reset --hard 8ed54e7d0fc9b632e1e3b9420bed96ee2c7fa1e3 -``` -1.下载ModelZoo的LSTM源码仓 -``` -git clone https://gitee.com/ascend/modelzoo.git -cd modelzoo -git reset --hard 8ed54e7d0fc9b632e1e3b9420bed96ee2c7fa1e3 -cd built-in/PyTorch/Official/nlp/LSTM_for_PyTorch/NPU/1p/ -``` -2.下载kaldi工具包 -源码搭建kaldi工具包环境。以arm 64位环境为例说明,推荐安装至conda环境: -``` -git clone https://github.com/kaldi-asr/kaldi -cd kaldi -``` -3.检查工具包所需依赖并安装缺少依赖 -``` -tools/extras/check_dependencies.sh -``` -根据检查结果和提示,安装缺少的依赖。安装完依赖再次检查工具包所需依赖是否都安装ok -4.编译 -``` -cd tools -make -j 64 -``` -5.安装依赖库成功之后安装第三方工具,Kaldi使用FST作为状态图的表现形式,安装方式如下: -``` -make openfst -extras/install_irstlm.sh -extras/install_openblas.sh -``` - -``` -输出:Installation of IRSTLM finished successfully -输出:OpenBLAS is installed successfully -``` -6.配置源码 -``` -cd ../src/ -./configure --shared -输出"Kaldi has been successfully configured." -``` -7.编译安装 -``` -make -j clean depend -make -j 64 - -输出:echo Done -Done -``` -源码中使用的python2.7版本,如果系统python版本与该版本不同,可使用系统默认python,在目录kaldi/python/下创建空文件.use_default_python。其他安装问题可参见kaldi官方安装教程. - -**说明:** -> 将源码包中的全部脚本移动到已安装kaldi工具的“modelzoo/built-in/PyTorch/Official/nlp/LSTM_for_PyTorch/NPU/1p/”目录下。 -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 -1.下载pth权重文件 -权重文件由华为npu模型训练组提供。 -2.lstm模型代码在代码仓中 -``` -git clone https://gitee.com/ascend/modelzoo.git -``` - 3.编写pth2onnx脚本LSTM_pth2onnx.py -本模型基于开源框架PyTorch训练的lstm进行模型转换。使用PyTorch将模型权重文件.pth转换为.onnx文件,再使用ATC工具将.onnx文件转为离线推理模型文件.om文件。权重文件由npu模型训练组提供,gpu训练模型ctc_best_model.pth。源码包中已提供ctc_best_model.pth权重文件。在1p目录下创建checkpoint/ctc_fbank_cnn/目录并将权重文件移到到该目录下。 -``` -mkdir -p checkpoint/ctc_fbank_cnnmv ./ctc_best_model.pth ./checkpoint/ctc_fbank_cnn/ -``` - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - -4.执行pth2onnx脚本,生成onnx模型文件 -``` -python3.7 ./steps/LSTM_pth2onnx.py --batchsize=16 -``` - -### 3.2 onnx转om模型 - -1.修改lstm_atc.sh脚本,通过ATC工具使用脚本完成转换,具体的脚本示例如下: -``` -# 配置环境变量 -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -``` -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 -``` -atc --input_format=ND --framework=5 --model=lstm_ctc_16batch.onnx --input_shape="actual_input_1:16,390,243" --output=lstm_ctc_16batch_auto --auto_tune_mode="RL,GA" --log=info --soc_version=Ascend310 -``` -参数说明: - --model:为ONNX模型文件。 - --framework:5代表ONNX模型。 - --output:输出的OM模型。 - --input_format:输入数据的格式。 - --input_shape:输入数据的shape。 - --log:日志级别。 - --soc_version:处理器型号。 - -执行lstm_atc.sh脚本,将.onnx文件转为离线推理模型文件.om文件。 -``` -bash lstm_atc.sh -``` -运行成功后生成lstm_ctc_npu_16batch.om用于二进制输入推理的模型文件。 - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -本模型支持timit语音包的验证集。timit数据集与训练对齐,使用训练提供的语音数据包。需用户自行获取数据集,并将数据集命名为data.zip,并上传数据集data.zip至服务器ModelZoo的LSTM源码仓下的built-in/PyTorch/Official/nlp/LSTM_for_PyTorch/NPU/目录中。数据集结构如下。 -``` -├── DOC -├── README.DOC -├── TEST -└── TRAIN -``` - -### 4.2 数据集预处理 -目的、处理过程及方法、处理后输出文件介绍及用途模型输入数据为二进制格式。将原始数据(audio数据)转化为二进制文件(.bin)。 -1.解压数据集 -``` -unzip data.zip -cd p1 -``` -2.修改1p目录下path.sh里第一行代码如下: -``` -KALDI_ROOT=./kaldi -``` -3.创建data文件夹 -``` -mkdir data -``` -4.执行prepare_data.sh脚本。 -``` -chmod +x local/timit_data_prep.sh -chmod +x steps/make_feat.sh -bash prepare_data.sh -``` -执行prepare_data.sh脚本之后,在当前目录下会生成tmp文件夹和在data文件夹下生成dev,test,train三个数据集文件夹。 -5.移动LSTM_preprocess_data.py至1p/steps目录下, -6.修改./conf/ctc_config.yaml文件内容 -``` -#[test] -test_scp_path: 'data/dev/fbank.scp' -test_lab_path: 'data/dev/phn_text' -decode_type: "Greedy" -beam_width: 10 -lm_alpha: 0.1 -lm_path: 'data/lm_phone_bg.arpa' -``` -data文件夹即为执行prepare_data.sh之后所生成,使用此目录下的dev数据集进行验证。 -7.执行LSTM_preprocess_data.py脚本 -``` -python3.7 ./steps/LSTM_preprocess_data.py --conf=./conf/ctc_config.yaml --batchsize=16 -``` -参数为配置文件。 -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本LSTM_get_info.py -使用pyacl推理需要输入二进制数据集的info文件,用于获取数据集。使用LSTM_get_info.py脚本,输入已经得到的二进制文件,输出生成二进制数据集的info文件。上传LSTM_get_info.py至1p文件夹下,运行LSTM_get_info.py脚本。 - -2.执行生成数据集信息脚本,生成数据集信息文件 -``` -python3.7 LSTM_get_info.py --batchsize=16 -``` -参数为om模型推理batchsize。运行成功后,在当前目录中生成lstm.info。 -## 5 离线推理 -1.配置pyacl推理环境变量 -``` -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:${install_path}/pyACL/python/site-packages/acl:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -``` -2.执行pyacl离线推理 -``` -#pyacl推理命令 -python3.7 ./pyacl_infer.py --model_path=./lstm_ctc_16batch_auto.om --device_id=0 --cpu_run=True --sync_infer=True --workspace=10 --input_info_file_path=./lstm.info --input_dtypes=float32 --infer_res_save_path=./infer_res --res_save_type=npy -``` -参数说明: ---model_path:om模型文件 ---input_info_file_path:处理后的数据集信息info文件 ---infer_res_save_path:pyacl推理后结果保存路径 ---res_save_type:推理结果保存格式,npy格式为含有shape信息的数据,bin格式为不含shape-信息的二进制numpy数据 - - -## 6 精度对比 -- **[离线推理精度](#61-离线推理精度)** -- **[精度对比](#62-精度对比)** - -### 6.1 离线推理精度统计 - -1. 后处理统计精度 -上传LSTM_postprocess_data.py脚本至1p/steps目录下,执行LSTM_postprocess_data.py脚本进行数据后处理。 -``` -python3.7 ./steps/LSTM_postprocess_data.py --conf=./conf/ctc_config.yaml --npu_path=./infer_res/ --batchsize=16 -``` -conf参数为模型配置文件, npu_path参数为pyacl推理结果目录。执行后处理脚本之后,精度数据由WER 与CER给出,字母错误率与单词错误率。 -``` -Character error rate on test set: 13.5877 -Word error rate on test set: 18.9075 -``` -经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 - -### 6.2 精度对比 -推理模型om精度与onnx精度一致,且与训练测试pth模型精度一致。 - **精度调试:** - ->没有遇到精度不达标的问题,故不需要进行精度调试 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** -- **[T4性能数据](#72-T4性能数据)** -- **[性能对比](#73-性能对比)** - -### 7.1 npu性能数据 -1.该模型不支持benchmark推理,使用pyacl推理获取npu性能数据。 -batch1的性能 -``` -Bs1: average pure infer time(ms):179.183 -``` -Interface throughputRate: 1000/179.183 = 5.58,5.58x4既是batch1 310单卡吞吐率 -batch4的性能 -``` -Bs4: average pure infer time(ms):187.361 -``` -Interface throughputRate: 1000/187.361* 4 = 21.35,21.35*4既是batch4 310单卡吞吐率 -batch8性能 -``` -Bs8: average pure infer time(ms):202.751 -``` -batch8 310单卡吞吐率:1000/202.751 * 8 = 157.83 fps -batch16性能: -``` -Bs16: average pure infer time(ms):195.763 -``` -batch16 310单卡吞吐率:1000/195.763 * 16 * 4 = 326.93fps -batch32性能: -``` -Bs32: average pure infer time(ms):260.119 -``` -batch32 310单卡吞吐率:1000/260.119 * 32 * 4 = 492.08fps - -### 7.2 T4性能数据 -gpu下onnx在线推理获取T4性能基线 -在T4环境下搭建环境,将预处理好的bin文件数据打包和lstm_infer_onnx.py脚本上传至服务器上1p目录下,进行onnx在线推理,执行lstm_onnx_infer.py脚本. -``` -python3.7 lstm_onnx_infer.py --conf=./conf/ctc_config.yaml --model_path=./lstm_ctc_16batch.onnx --bin_file_path=./lstm_bin/ --pred_res_save_path=./lstm_onnx_infer --batchsize=16 -``` -性能基线数据为: -``` -total infer time(ms): 2308.3143849999997 -average infer time(ms): 92.3325754 -``` -batch16 t4吞吐率:1000/92.33 * 16 = 173.29fps - -### 7.3 性能对比 -batch16: 326.93 >1000/(92.33/16) - -310单个device的吞吐率乘4即单卡吞吐率比T4单卡的吞吐率大,故310性能高于T4性能,性能达标。 - - **性能优化:** ->该模型性能优于T4,不用进行优化 +# LSTM Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理精度统计](#61-离线推理精度统计) + - [6.2 精度对比](#62-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + - [7.2 T4性能数据](#72-T4性能数据) + - [7.3 性能对比](#73-性能对比) + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +npu训练组依据客户给的模型进行训练所得,无参考论文 + +### 1.2 代码地址 +[LSTM代码]https://gitee.com/ascend/modelzoo.git) +branch:master +commit_id=8ed54e7d0fc9b632e1e3b9420bed96ee2c7fa1e3 +code_path=modelzoo/tree/master/built-in/PyTorch/Official/nlp/LSTM_for_PyTorch + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +pytorch = 1.8.0 +torchvision = 0.9.0 +``` +### 2.2 python第三方库 + +``` +ONNX == 1.7.0 +kaldi == https://github.com/kaldi-asr/kaldi +Pillow == 7.2.0 +onnxruntime-gpu == 1.7.0 +kaldiio == 2.17.2 +``` +kaldi需要安装在ModelZoo的LSTM源码仓中“modelzoo/built-in/PyTorch/Official/nlp/LSTM_for_PyTorch/NPU/1p/”目录下。ModelZoo的LSTM源码仓下载方法. +``` +git clone https://gitee.com/ascend/modelzoo.git +cd modelzoo +git reset --hard 8ed54e7d0fc9b632e1e3b9420bed96ee2c7fa1e3 +``` +1.下载ModelZoo的LSTM源码仓 +``` +git clone https://gitee.com/ascend/modelzoo.git +cd modelzoo +git reset --hard 8ed54e7d0fc9b632e1e3b9420bed96ee2c7fa1e3 +cd built-in/PyTorch/Official/nlp/LSTM_for_PyTorch/NPU/1p/ +``` +2.下载kaldi工具包 +源码搭建kaldi工具包环境。以arm 64位环境为例说明,推荐安装至conda环境: +``` +git clone https://github.com/kaldi-asr/kaldi +cd kaldi +``` +3.检查工具包所需依赖并安装缺少依赖 +``` +tools/extras/check_dependencies.sh +``` +根据检查结果和提示,安装缺少的依赖。安装完依赖再次检查工具包所需依赖是否都安装ok +4.编译 +``` +cd tools +make -j 64 +``` +5.安装依赖库成功之后安装第三方工具,Kaldi使用FST作为状态图的表现形式,安装方式如下: +``` +make openfst +extras/install_irstlm.sh +extras/install_openblas.sh +``` + +``` +输出:Installation of IRSTLM finished successfully +输出:OpenBLAS is installed successfully +``` +6.配置源码 +``` +cd ../src/ +./configure --shared +输出"Kaldi has been successfully configured." +``` +7.编译安装 +``` +make -j clean depend +make -j 64 + +输出:echo Done +Done +``` +源码中使用的python2.7版本,如果系统python版本与该版本不同,可使用系统默认python,在目录kaldi/python/下创建空文件.use_default_python。其他安装问题可参见kaldi官方安装教程. + +**说明:** +> 将源码包中的全部脚本移动到已安装kaldi工具的“modelzoo/built-in/PyTorch/Official/nlp/LSTM_for_PyTorch/NPU/1p/”目录下。 +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 +1.下载pth权重文件 +权重文件由华为npu模型训练组提供。 +2.lstm模型代码在代码仓中 +``` +git clone https://gitee.com/ascend/modelzoo.git +``` + 3.编写pth2onnx脚本LSTM_pth2onnx.py +本模型基于开源框架PyTorch训练的lstm进行模型转换。使用PyTorch将模型权重文件.pth转换为.onnx文件,再使用ATC工具将.onnx文件转为离线推理模型文件.om文件。权重文件由npu模型训练组提供,gpu训练模型ctc_best_model.pth。源码包中已提供ctc_best_model.pth权重文件。在1p目录下创建checkpoint/ctc_fbank_cnn/目录并将权重文件移到到该目录下。 +``` +mkdir -p checkpoint/ctc_fbank_cnnmv ./ctc_best_model.pth ./checkpoint/ctc_fbank_cnn/ +``` + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + +4.执行pth2onnx脚本,生成onnx模型文件 +``` +python3.7 ./steps/LSTM_pth2onnx.py --batchsize=16 +``` + +### 3.2 onnx转om模型 + +1.修改lstm_atc.sh脚本,通过ATC工具使用脚本完成转换,具体的脚本示例如下: +``` +# 配置环境变量 +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +``` +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 +``` +atc --input_format=ND --framework=5 --model=lstm_ctc_16batch.onnx --input_shape="actual_input_1:16,390,243" --output=lstm_ctc_16batch_auto --auto_tune_mode="RL,GA" --log=info --soc_version=Ascend310 +``` +参数说明: + --model:为ONNX模型文件。 + --framework:5代表ONNX模型。 + --output:输出的OM模型。 + --input_format:输入数据的格式。 + --input_shape:输入数据的shape。 + --log:日志级别。 + --soc_version:处理器型号。 + +执行lstm_atc.sh脚本,将.onnx文件转为离线推理模型文件.om文件。 +``` +bash lstm_atc.sh +``` +运行成功后生成lstm_ctc_npu_16batch.om用于二进制输入推理的模型文件。 + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +本模型支持timit语音包的验证集。timit数据集与训练对齐,使用训练提供的语音数据包。需用户自行获取数据集,并将数据集命名为data.zip,并上传数据集data.zip至服务器ModelZoo的LSTM源码仓下的built-in/PyTorch/Official/nlp/LSTM_for_PyTorch/NPU/目录中。数据集结构如下。 +``` +├── DOC +├── README.DOC +├── TEST +└── TRAIN +``` + +### 4.2 数据集预处理 +目的、处理过程及方法、处理后输出文件介绍及用途模型输入数据为二进制格式。将原始数据(audio数据)转化为二进制文件(.bin)。 +1.解压数据集 +``` +unzip data.zip +cd p1 +``` +2.修改1p目录下path.sh里第一行代码如下: +``` +KALDI_ROOT=./kaldi +``` +3.创建data文件夹 +``` +mkdir data +``` +4.执行prepare_data.sh脚本。 +``` +chmod +x local/timit_data_prep.sh +chmod +x steps/make_feat.sh +bash prepare_data.sh +``` +执行prepare_data.sh脚本之后,在当前目录下会生成tmp文件夹和在data文件夹下生成dev,test,train三个数据集文件夹。 +5.移动LSTM_preprocess_data.py至1p/steps目录下, +6.修改./conf/ctc_config.yaml文件内容 +``` +#[test] +test_scp_path: 'data/dev/fbank.scp' +test_lab_path: 'data/dev/phn_text' +decode_type: "Greedy" +beam_width: 10 +lm_alpha: 0.1 +lm_path: 'data/lm_phone_bg.arpa' +``` +data文件夹即为执行prepare_data.sh之后所生成,使用此目录下的dev数据集进行验证。 +7.执行LSTM_preprocess_data.py脚本 +``` +python3.7 ./steps/LSTM_preprocess_data.py --conf=./conf/ctc_config.yaml --batchsize=16 +``` +参数为配置文件。 +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本LSTM_get_info.py +使用pyacl推理需要输入二进制数据集的info文件,用于获取数据集。使用LSTM_get_info.py脚本,输入已经得到的二进制文件,输出生成二进制数据集的info文件。上传LSTM_get_info.py至1p文件夹下,运行LSTM_get_info.py脚本。 + +2.执行生成数据集信息脚本,生成数据集信息文件 +``` +python3.7 LSTM_get_info.py --batchsize=16 +``` +参数为om模型推理batchsize。运行成功后,在当前目录中生成lstm.info。 +## 5 离线推理 +1.配置pyacl推理环境变量 +``` +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:${install_path}/pyACL/python/site-packages/acl:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +``` +2.执行pyacl离线推理 +``` +#pyacl推理命令 +python3.7 ./pyacl_infer.py --model_path=./lstm_ctc_16batch_auto.om --device_id=0 --cpu_run=True --sync_infer=True --workspace=10 --input_info_file_path=./lstm.info --input_dtypes=float32 --infer_res_save_path=./infer_res --res_save_type=npy +``` +参数说明: +--model_path:om模型文件 +--input_info_file_path:处理后的数据集信息info文件 +--infer_res_save_path:pyacl推理后结果保存路径 +--res_save_type:推理结果保存格式,npy格式为含有shape信息的数据,bin格式为不含shape-信息的二进制numpy数据 + + +## 6 精度对比 +- **[离线推理精度](#61-离线推理精度)** +- **[精度对比](#62-精度对比)** + +### 6.1 离线推理精度统计 + +1. 后处理统计精度 +上传LSTM_postprocess_data.py脚本至1p/steps目录下,执行LSTM_postprocess_data.py脚本进行数据后处理。 +``` +python3.7 ./steps/LSTM_postprocess_data.py --conf=./conf/ctc_config.yaml --npu_path=./infer_res/ --batchsize=16 +``` +conf参数为模型配置文件, npu_path参数为pyacl推理结果目录。执行后处理脚本之后,精度数据由WER 与CER给出,字母错误率与单词错误率。 +``` +Character error rate on test set: 13.5877 +Word error rate on test set: 18.9075 +``` +经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 + +### 6.2 精度对比 +推理模型om精度与onnx精度一致,且与训练测试pth模型精度一致。 + **精度调试:** + +>没有遇到精度不达标的问题,故不需要进行精度调试 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** +- **[T4性能数据](#72-T4性能数据)** +- **[性能对比](#73-性能对比)** + +### 7.1 npu性能数据 +1.该模型不支持benchmark推理,使用pyacl推理获取npu性能数据。 +batch1的性能 +``` +Bs1: average pure infer time(ms):179.183 +``` +Interface throughputRate: 1000/179.183 = 5.58,5.58x4既是batch1 310单卡吞吐率 +batch4的性能 +``` +Bs4: average pure infer time(ms):187.361 +``` +Interface throughputRate: 1000/187.361* 4 = 21.35,21.35*4既是batch4 310单卡吞吐率 +batch8性能 +``` +Bs8: average pure infer time(ms):202.751 +``` +batch8 310单卡吞吐率:1000/202.751 * 8 = 157.83 fps +batch16性能: +``` +Bs16: average pure infer time(ms):195.763 +``` +batch16 310单卡吞吐率:1000/195.763 * 16 * 4 = 326.93fps +batch32性能: +``` +Bs32: average pure infer time(ms):260.119 +``` +batch32 310单卡吞吐率:1000/260.119 * 32 * 4 = 492.08fps + +### 7.2 T4性能数据 +gpu下onnx在线推理获取T4性能基线 +在T4环境下搭建环境,将预处理好的bin文件数据打包和lstm_infer_onnx.py脚本上传至服务器上1p目录下,进行onnx在线推理,执行lstm_onnx_infer.py脚本. +``` +python3.7 lstm_onnx_infer.py --conf=./conf/ctc_config.yaml --model_path=./lstm_ctc_16batch.onnx --bin_file_path=./lstm_bin/ --pred_res_save_path=./lstm_onnx_infer --batchsize=16 +``` +性能基线数据为: +``` +total infer time(ms): 2308.3143849999997 +average infer time(ms): 92.3325754 +``` +batch16 t4吞吐率:1000/92.33 * 16 = 173.29fps + +### 7.3 性能对比 +batch16: 326.93 >1000/(92.33/16) + +310单个device的吞吐率乘4即单卡吞吐率比T4单卡的吞吐率大,故310性能高于T4性能,性能达标。 + + **性能优化:** +>该模型性能优于T4,不用进行优化 diff --git a/ACL_PyTorch/built-in/audio/LSTM/acl_net.py b/ACL_PyTorch/built-in/audio/LSTM/acl_net.py index f3ec7a6b5b07758039d9861542fc09b445c17b07..d7f893b3f3e065c8eb084968173557e44595e5d3 100644 --- a/ACL_PyTorch/built-in/audio/LSTM/acl_net.py +++ b/ACL_PyTorch/built-in/audio/LSTM/acl_net.py @@ -1,276 +1,276 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import acl -import functools -import numpy as np -import torch -import time - -# error code -ACL_ERROR_NONE = 0 - -# rule for memory copy -ACL_MEMCPY_HOST_TO_HOST = 0 -ACL_MEMCPY_HOST_TO_DEVICE = 1 -ACL_MEMCPY_DEVICE_TO_HOST = 2 -ACL_MEMCPY_DEVICE_TO_DEVICE = 3 - -# dtype -ACL_DTYPE = { - 0: 'float32', - 1: 'float16', - 2: 'int8', - 3: 'int32', - 4: 'uint8', - 6: 'int16', - 7: 'uint16', - 8: 'uint32', - 9: 'int64', - 10: 'uint64', - 11: 'float64', - 12: 'bool', -} - - -def check_ret(message, ret): - if ret != ACL_ERROR_NONE: - raise Exception(f"{message} failed ret={ret}") - - -class MeasureTime(): - def __init__(self, measurements, key, cpu_run=True): - self.measurements = measurements - self.key = key - self.cpu_run = cpu_run - - def __enter__(self): - if not self.cpu_run: - torch.cuda.synchronize() - self.t0 = time.perf_counter_ns() - - def __exit__(self, exc_type, exc_value, exc_traceback): - if not self.cpu_run: - torch.cuda.synchronize() - self.measurements[self.key] = time.perf_counter_ns() - self.t0 - - -class AclModel(object): - def __init__(self, device_id, model_path, sync_infer, measurements, key, cpu_run): - self.device_id = device_id - self.sync_infer = sync_infer - self.out_bufs_ptr = [] - self.output_sizes = [] - self.input_sizes = [] - self.input_bufs_ptr = [] - - self.measurements = measurements - self.key = key - self.cpu_run = cpu_run - - ret = acl.init() - check_ret("acl.init", ret) - ret = acl.rt.set_device(self.device_id) - check_ret("acl.rt.set_device", ret) - self.context, ret = acl.rt.create_context(self.device_id) - check_ret("acl.rt.create_context", ret) - self.model_id, ret = acl.mdl.load_from_file(model_path) - check_ret("acl.mdl.load_from_file", ret) - - self.model_desc = acl.mdl.create_desc() - assert self.model_desc is not None - acl.mdl.get_desc(self.model_desc, self.model_id) - self.dataset_in = acl.mdl.create_dataset() - assert self.dataset_in is not None - self.dataset_out = acl.mdl.create_dataset() - assert self.dataset_out is not None - self.in_size, self.out_size = 0, 0 - self.stm, ret = acl.rt.create_stream() - assert ret == 0 - - self.desc_init() - self.dataset_init() - - def __call__(self, ori_data, dim): - return self.forward(ori_data, dim) - - def __del__(self): - # unload model - if self.model_id: - ret = acl.mdl.unload(self.model_id) - assert ret == 0 - - # destroy model desc - ret = acl.mdl.destroy_desc(self.model_desc) - assert ret == 0 - - self.destroy_data_set(self.dataset_in) - self.destroy_data_set(self.dataset_out) - - # destroy input/output tensor - for i in range(len(self.input_bufs_ptr)): - acl.rt.free(self.input_bufs_ptr[i]["buffer"]) - self.input_bufs_ptr[i] = None - - for i in range(len(self.out_bufs_ptr)): - acl.rt.free(self.out_bufs_ptr[i]["buffer"]) - self.out_bufs_ptr[i] = None - - ret = acl.rt.destroy_stream(self.stm) - assert ret == 0 - - def desc_init(self): - tensor_size = acl.mdl.get_num_inputs(self.model_desc) - if not tensor_size: - raise Exception("get_num_inputs failed") - self.in_size = tensor_size - - for i in range(tensor_size): - size = acl.mdl.get_input_size_by_index(self.model_desc, i) - data, ret = acl.rt.malloc(size, 0) - assert ret == 0 - - self.input_bufs_ptr.append({'size': size, 'buffer': data}) - self.input_sizes.append(size) - - tensor_size = acl.mdl.get_num_outputs(self.model_desc) - self.out_size = tensor_size - for i in range(tensor_size): - dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) - assert ret == 0 - size = acl.mdl.get_output_size_by_index(self.model_desc, i) - - data, ret = acl.rt.malloc(size, 0) - assert ret == 0 - - self.output_sizes.append(size) - self.out_bufs_ptr.append({'size': size, 'buffer': data}) - - def dataset_init(self): - self.create_data_set(self.dataset_in, self.input_bufs_ptr, self.input_sizes) - self.create_data_set(self.dataset_out, self.out_bufs_ptr, self.output_sizes) - - def create_data_set(self, dataset, bufs_ptr_list, size_list): - # create dataset buffer then add to dataset - for i in range(len(size_list)): - buffer = acl.create_data_buffer(bufs_ptr_list[i]["buffer"], size_list[i]) - if not buffer: - self.destroy_data_set(dataset) - raise Exception("create_data_buffer failed") - - # add to dataset - _, ret = acl.mdl.add_dataset_buffer(dataset, buffer) - if ret != 0: - self.destroy_data_set(dataset) - raise Exception("add_dataset_buffer failed, ret = {}".format(ret)) - - return dataset - - def destroy_data_set(self, dataset): - data_buf_num = acl.mdl.get_dataset_num_buffers(dataset) - for i in range(data_buf_num): - # get data buffer by index - data_buf = acl.mdl.get_dataset_buffer(dataset, i) - if data_buf is not None: - acl.destroy_data_buffer(data_buf) - - acl.mdl.destroy_dataset(dataset) - - def copy_data_to_device(self, data): - for i in range(len(data)): - ptr, np = acl.util.numpy_contiguous_to_ptr(data[i]["buffer"]) - acl.rt.memcpy(self.input_bufs_ptr[i]["buffer"], data[i]["size"], ptr, - data[i]["size"], ACL_MEMCPY_HOST_TO_DEVICE) - - def copy_output_to_host(self): - output_data = [] - for i in range(len(self.out_bufs_ptr)): - temp = dict() - temp["size"] = self.out_bufs_ptr[i]["size"] - temp["buffer"], ret = acl.rt.malloc_host(temp["size"]) - output_data.append(temp) - acl.rt.memcpy(temp["buffer"], temp["size"], self.out_bufs_ptr[i]["buffer"], - temp["size"], ACL_MEMCPY_DEVICE_TO_HOST) - - return output_data - - def model_exe(self): - with MeasureTime(self.measurements, self.key, self.cpu_run): - ret = acl.mdl.execute(self.model_id, self.dataset_in, self.dataset_out) - assert ret == 0 - output_data = self.copy_output_to_host() - dataset = [] - for i in range(len(output_data)): - dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) - data_shape = dims.get("dims") - data_type = acl.mdl.get_output_data_type(self.model_desc, i) - data_len = functools.reduce(lambda x, y: x * y, data_shape) - ftype = np.dtype(ACL_DTYPE.get(data_type)) - - size = output_data[i]["size"] - ptr = output_data[i]["buffer"] - data = acl.util.ptr_to_numpy(ptr, (size,), 1) - np_arr = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) - np_arr = np_arr.reshape(data_shape) - dataset.append(np_arr) - return dataset - - def model_exe_async(self): - with MeasureTime(self.measurements, self.key, self.cpu_run): - ret = acl.mdl.execute_async(self.model_id, self.dataset_in, self.dataset_out, self.stm) - assert ret == 0 - ret = acl.rt.synchronize_stream(self.stm) - assert ret == 0 - output_data = self.copy_output_to_host() - - dataset = [] - for i in range(len(output_data)): - dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) - # check_ret("acl.mdl.get_cur_output_dims", ret) - data_shape = dims.get("dims") - - data_type = acl.mdl.get_output_data_type(self.model_desc, i) - data_len = functools.reduce(lambda x, y: x * y, data_shape) - ftype = np.dtype(ACL_DTYPE.get(data_type)) - - size = output_data[i]["size"] - ptr = output_data[i]["buffer"] - data = acl.util.ptr_to_numpy(ptr, (size,), 1) - np_arr = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) - np_arr = np_arr.reshape(data_shape) - dataset.append(np_arr) - return dataset - - def model_exe_with_dynamic_dims(self, input_data, dims): - index, ret = acl.mdl.get_input_index_by_name(self.model_desc, 'ascend_mbatch_shape_data') - ret = acl.mdl.set_input_dynamic_dims(self.model_id, self.dataset_in, index, dims) - gear_count, ret = acl.mdl.get_input_dynamic_gear_count(self.model_desc, -1) - dims_out, ret = acl.mdl.get_input_dynamic_dims(self.model_desc, -1, gear_count) - self.copy_data_to_device(input_data) - if self.sync_infer is True: - res = self.model_exe() - else: - res = self.model_exe_async() - - return res - - def forward(self, input_data, dims): - input_data_dic = [] - for i in range(len(input_data)): - temp = {} - temp["size"] = input_data[i].size * input_data[i].itemsize - temp["buffer"] = input_data[i] - input_data_dic.append(temp) - result = self.model_exe_with_dynamic_dims(input_data_dic, dims) - return result +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import acl +import functools +import numpy as np +import torch +import time + +# error code +ACL_ERROR_NONE = 0 + +# rule for memory copy +ACL_MEMCPY_HOST_TO_HOST = 0 +ACL_MEMCPY_HOST_TO_DEVICE = 1 +ACL_MEMCPY_DEVICE_TO_HOST = 2 +ACL_MEMCPY_DEVICE_TO_DEVICE = 3 + +# dtype +ACL_DTYPE = { + 0: 'float32', + 1: 'float16', + 2: 'int8', + 3: 'int32', + 4: 'uint8', + 6: 'int16', + 7: 'uint16', + 8: 'uint32', + 9: 'int64', + 10: 'uint64', + 11: 'float64', + 12: 'bool', +} + + +def check_ret(message, ret): + if ret != ACL_ERROR_NONE: + raise Exception(f"{message} failed ret={ret}") + + +class MeasureTime(): + def __init__(self, measurements, key, cpu_run=True): + self.measurements = measurements + self.key = key + self.cpu_run = cpu_run + + def __enter__(self): + if not self.cpu_run: + torch.cuda.synchronize() + self.t0 = time.perf_counter_ns() + + def __exit__(self, exc_type, exc_value, exc_traceback): + if not self.cpu_run: + torch.cuda.synchronize() + self.measurements[self.key] = time.perf_counter_ns() - self.t0 + + +class AclModel(object): + def __init__(self, device_id, model_path, sync_infer, measurements, key, cpu_run): + self.device_id = device_id + self.sync_infer = sync_infer + self.out_bufs_ptr = [] + self.output_sizes = [] + self.input_sizes = [] + self.input_bufs_ptr = [] + + self.measurements = measurements + self.key = key + self.cpu_run = cpu_run + + ret = acl.init() + check_ret("acl.init", ret) + ret = acl.rt.set_device(self.device_id) + check_ret("acl.rt.set_device", ret) + self.context, ret = acl.rt.create_context(self.device_id) + check_ret("acl.rt.create_context", ret) + self.model_id, ret = acl.mdl.load_from_file(model_path) + check_ret("acl.mdl.load_from_file", ret) + + self.model_desc = acl.mdl.create_desc() + assert self.model_desc is not None + acl.mdl.get_desc(self.model_desc, self.model_id) + self.dataset_in = acl.mdl.create_dataset() + assert self.dataset_in is not None + self.dataset_out = acl.mdl.create_dataset() + assert self.dataset_out is not None + self.in_size, self.out_size = 0, 0 + self.stm, ret = acl.rt.create_stream() + assert ret == 0 + + self.desc_init() + self.dataset_init() + + def __call__(self, ori_data, dim): + return self.forward(ori_data, dim) + + def __del__(self): + # unload model + if self.model_id: + ret = acl.mdl.unload(self.model_id) + assert ret == 0 + + # destroy model desc + ret = acl.mdl.destroy_desc(self.model_desc) + assert ret == 0 + + self.destroy_data_set(self.dataset_in) + self.destroy_data_set(self.dataset_out) + + # destroy input/output tensor + for i in range(len(self.input_bufs_ptr)): + acl.rt.free(self.input_bufs_ptr[i]["buffer"]) + self.input_bufs_ptr[i] = None + + for i in range(len(self.out_bufs_ptr)): + acl.rt.free(self.out_bufs_ptr[i]["buffer"]) + self.out_bufs_ptr[i] = None + + ret = acl.rt.destroy_stream(self.stm) + assert ret == 0 + + def desc_init(self): + tensor_size = acl.mdl.get_num_inputs(self.model_desc) + if not tensor_size: + raise Exception("get_num_inputs failed") + self.in_size = tensor_size + + for i in range(tensor_size): + size = acl.mdl.get_input_size_by_index(self.model_desc, i) + data, ret = acl.rt.malloc(size, 0) + assert ret == 0 + + self.input_bufs_ptr.append({'size': size, 'buffer': data}) + self.input_sizes.append(size) + + tensor_size = acl.mdl.get_num_outputs(self.model_desc) + self.out_size = tensor_size + for i in range(tensor_size): + dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) + assert ret == 0 + size = acl.mdl.get_output_size_by_index(self.model_desc, i) + + data, ret = acl.rt.malloc(size, 0) + assert ret == 0 + + self.output_sizes.append(size) + self.out_bufs_ptr.append({'size': size, 'buffer': data}) + + def dataset_init(self): + self.create_data_set(self.dataset_in, self.input_bufs_ptr, self.input_sizes) + self.create_data_set(self.dataset_out, self.out_bufs_ptr, self.output_sizes) + + def create_data_set(self, dataset, bufs_ptr_list, size_list): + # create dataset buffer then add to dataset + for i in range(len(size_list)): + buffer = acl.create_data_buffer(bufs_ptr_list[i]["buffer"], size_list[i]) + if not buffer: + self.destroy_data_set(dataset) + raise Exception("create_data_buffer failed") + + # add to dataset + _, ret = acl.mdl.add_dataset_buffer(dataset, buffer) + if ret != 0: + self.destroy_data_set(dataset) + raise Exception("add_dataset_buffer failed, ret = {}".format(ret)) + + return dataset + + def destroy_data_set(self, dataset): + data_buf_num = acl.mdl.get_dataset_num_buffers(dataset) + for i in range(data_buf_num): + # get data buffer by index + data_buf = acl.mdl.get_dataset_buffer(dataset, i) + if data_buf is not None: + acl.destroy_data_buffer(data_buf) + + acl.mdl.destroy_dataset(dataset) + + def copy_data_to_device(self, data): + for i in range(len(data)): + ptr, np = acl.util.numpy_contiguous_to_ptr(data[i]["buffer"]) + acl.rt.memcpy(self.input_bufs_ptr[i]["buffer"], data[i]["size"], ptr, + data[i]["size"], ACL_MEMCPY_HOST_TO_DEVICE) + + def copy_output_to_host(self): + output_data = [] + for i in range(len(self.out_bufs_ptr)): + temp = dict() + temp["size"] = self.out_bufs_ptr[i]["size"] + temp["buffer"], ret = acl.rt.malloc_host(temp["size"]) + output_data.append(temp) + acl.rt.memcpy(temp["buffer"], temp["size"], self.out_bufs_ptr[i]["buffer"], + temp["size"], ACL_MEMCPY_DEVICE_TO_HOST) + + return output_data + + def model_exe(self): + with MeasureTime(self.measurements, self.key, self.cpu_run): + ret = acl.mdl.execute(self.model_id, self.dataset_in, self.dataset_out) + assert ret == 0 + output_data = self.copy_output_to_host() + dataset = [] + for i in range(len(output_data)): + dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) + data_shape = dims.get("dims") + data_type = acl.mdl.get_output_data_type(self.model_desc, i) + data_len = functools.reduce(lambda x, y: x * y, data_shape) + ftype = np.dtype(ACL_DTYPE.get(data_type)) + + size = output_data[i]["size"] + ptr = output_data[i]["buffer"] + data = acl.util.ptr_to_numpy(ptr, (size,), 1) + np_arr = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) + np_arr = np_arr.reshape(data_shape) + dataset.append(np_arr) + return dataset + + def model_exe_async(self): + with MeasureTime(self.measurements, self.key, self.cpu_run): + ret = acl.mdl.execute_async(self.model_id, self.dataset_in, self.dataset_out, self.stm) + assert ret == 0 + ret = acl.rt.synchronize_stream(self.stm) + assert ret == 0 + output_data = self.copy_output_to_host() + + dataset = [] + for i in range(len(output_data)): + dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) + # check_ret("acl.mdl.get_cur_output_dims", ret) + data_shape = dims.get("dims") + + data_type = acl.mdl.get_output_data_type(self.model_desc, i) + data_len = functools.reduce(lambda x, y: x * y, data_shape) + ftype = np.dtype(ACL_DTYPE.get(data_type)) + + size = output_data[i]["size"] + ptr = output_data[i]["buffer"] + data = acl.util.ptr_to_numpy(ptr, (size,), 1) + np_arr = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) + np_arr = np_arr.reshape(data_shape) + dataset.append(np_arr) + return dataset + + def model_exe_with_dynamic_dims(self, input_data, dims): + index, ret = acl.mdl.get_input_index_by_name(self.model_desc, 'ascend_mbatch_shape_data') + ret = acl.mdl.set_input_dynamic_dims(self.model_id, self.dataset_in, index, dims) + gear_count, ret = acl.mdl.get_input_dynamic_gear_count(self.model_desc, -1) + dims_out, ret = acl.mdl.get_input_dynamic_dims(self.model_desc, -1, gear_count) + self.copy_data_to_device(input_data) + if self.sync_infer is True: + res = self.model_exe() + else: + res = self.model_exe_async() + + return res + + def forward(self, input_data, dims): + input_data_dic = [] + for i in range(len(input_data)): + temp = {} + temp["size"] = input_data[i].size * input_data[i].itemsize + temp["buffer"] = input_data[i] + input_data_dic.append(temp) + result = self.model_exe_with_dynamic_dims(input_data_dic, dims) + return result diff --git a/ACL_PyTorch/built-in/audio/LSTM/lstm_atc.sh b/ACL_PyTorch/built-in/audio/LSTM/lstm_atc.sh index 738f842581831a9bb68775ccba85f8233d7be167..42ed54470d55466987c00fdaec89a81ff5b5e2c5 100644 --- a/ACL_PyTorch/built-in/audio/LSTM/lstm_atc.sh +++ b/ACL_PyTorch/built-in/audio/LSTM/lstm_atc.sh @@ -1,6 +1,6 @@ -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATHexport ASCEND_OPP_PATH=${install_path}/opp - +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATHexport ASCEND_OPP_PATH=${install_path}/opp + /usr/local/Ascend/ascend-toolkit/latest/atc/bin/atc --input_format=ND --framework=5 --model=lstm_ctc_16batch.onnx --input_shape="actual_input_1:16,390,243" --output=lstm_ctc_16batch_auto --auto_tune_mode="RL,GA" --log=info --soc_version=Ascend310 \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/LSTM/lstm_onnx_infer.py b/ACL_PyTorch/built-in/audio/LSTM/lstm_onnx_infer.py index b62b2e882cb40856c1f577bcbea1d2b60d20ed84..6e94edaf2027af5532026dfb6b225622bac88146 100644 --- a/ACL_PyTorch/built-in/audio/LSTM/lstm_onnx_infer.py +++ b/ACL_PyTorch/built-in/audio/LSTM/lstm_onnx_infer.py @@ -1,155 +1,155 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import time -import sys -import torch -import yaml -import argparse -import onnxruntime -import torch.nn as nn -import numpy as np -from glob import glob -from tqdm import tqdm - -sys.path.append('./') -from models.model_ctc import * -from utils.ctcDecoder import GreedyDecoder, BeamDecoder - -parser = argparse.ArgumentParser() -parser.add_argument('--conf', help='conf file for training') - -parser.add_argument('--model_path', required=True) -parser.add_argument('--bin_file_path', required=True) -parser.add_argument('--pred_res_save_path', required=True) -parser.add_argument('--batchsize', required=True, help='batchsize for onnx infering') - -class Config(object): - batch_size = 4 - dropout = 0.1 - -class Vocab(object): - def __init__(self, vocab_file): - self.vocab_file = vocab_file - self.word2index = {"blank": 0, "UNK": 1} - self.index2word = {0: "blank", 1: "UNK"} - self.word2count = {} - self.n_words = 2 - self.read_lang() - - def add_sentence(self, sentence): - for word in sentence.split(' '): - self.add_word(word) - - def add_word(self, word): - if word not in self.word2index: - self.word2index[word] = self.n_words - self.word2count[word] = 1 - self.index2word[self.n_words] = word - self.n_words += 1 - else: - self.word2count[word] += 1 - - def read_lang(self): - print("Reading vocabulary from {}".format(self.vocab_file)) - with open(self.vocab_file, 'r') as rf: - line = rf.readline() - while line: - line = line.strip().split(' ') - if len(line) > 1: - sen = ' '.join(line[1:]) - else: - sen = line[0] - self.add_sentence(sen) - line = rf.readline() - print("Vocabulary size is {}".format(self.n_words)) - -def lstm_onnx_infer(): - args = parser.parse_args() - - model_path = args.model_path - bin_file_path = args.bin_file_path - pred_res_save_path = args.pred_res_save_path - - try: - conf = yaml.safe_load(open(args.conf,'r')) - except: - print("Config file not exist!") - sys.exit(1) - - opts = Config() - for k,v in conf.items(): - setattr(opts, k, v) - print('{:50}:{}'.format(k, v)) - - beam_width = opts.beam_width - lm_alpha = opts.lm_alpha - decoder_type = opts.decode_type - vocab_file = opts.vocab_file - batchsize = int(args.batchsize) - - vocab = Vocab(vocab_file) - - # 读取数据目录 - bin_file_list = glob(os.path.join(bin_file_path, '*.bin')) - bin_file_num = len(bin_file_list) - - # 创建目录 - pardir = os.path.dirname(pred_res_save_path) - if not os.path.exists(pardir): - os.makedirs(pardir) - - # 推理 - print('[INFO] Infer on dataset ...') - transcription_list = [] - total_infer_time = 0 - total_infer_num = 0 - - with open(pred_res_save_path, 'wt', encoding='utf-8') as f_pred: - onnx_run_sess = onnxruntime.InferenceSession(model_path) - for i in tqdm(range(bin_file_num)): - # 数据预处理 - input = np.fromfile(os.path.join(bin_file_path, 'inputs_' + str(i) + '.bin'), dtype=np.float32) - input = input.reshape(batchsize, 390, 243) - - # 推理 - start_time = time.perf_counter_ns() - output = onnx_run_sess.run(None, {"actual_input_1":input}) - end_time = time.perf_counter_ns() - total_infer_time += end_time - start_time - total_infer_num += 1 - - #推理时间 - print('[INFO] Time:') - msg = 'total infer num: ' + str(total_infer_num) + '\n' + \ - 'total infer time(ms): ' + str(total_infer_time / 1000 / 1000) + '\n' + \ - 'average infer time(ms): ' + str(total_infer_time / total_infer_num / 1000 / 1000) + '\n' - print(msg) - with open(os.path.join(pardir, 'infer_time.txt'), 'wt', encoding='utf-8') as f_infer_time: - f_infer_time.write(msg) - - -if __name__ == '__main__': - ''' - Using Example: - - python onnx_local_infer.py \ - --conf=./conf/ctc_config.yaml \ - --model_path=./lstm_onnx/lstm_ctc_npu_16batch1_67.onnx \ - --bin_file_path=--bin_file_path=./lstm_bin/ \ - --pred_res_save_path=./lstm_onnx_infer \ - --batchsize=16 - ''' - lstm_onnx_infer() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +import sys +import torch +import yaml +import argparse +import onnxruntime +import torch.nn as nn +import numpy as np +from glob import glob +from tqdm import tqdm + +sys.path.append('./') +from models.model_ctc import * +from utils.ctcDecoder import GreedyDecoder, BeamDecoder + +parser = argparse.ArgumentParser() +parser.add_argument('--conf', help='conf file for training') + +parser.add_argument('--model_path', required=True) +parser.add_argument('--bin_file_path', required=True) +parser.add_argument('--pred_res_save_path', required=True) +parser.add_argument('--batchsize', required=True, help='batchsize for onnx infering') + +class Config(object): + batch_size = 4 + dropout = 0.1 + +class Vocab(object): + def __init__(self, vocab_file): + self.vocab_file = vocab_file + self.word2index = {"blank": 0, "UNK": 1} + self.index2word = {0: "blank", 1: "UNK"} + self.word2count = {} + self.n_words = 2 + self.read_lang() + + def add_sentence(self, sentence): + for word in sentence.split(' '): + self.add_word(word) + + def add_word(self, word): + if word not in self.word2index: + self.word2index[word] = self.n_words + self.word2count[word] = 1 + self.index2word[self.n_words] = word + self.n_words += 1 + else: + self.word2count[word] += 1 + + def read_lang(self): + print("Reading vocabulary from {}".format(self.vocab_file)) + with open(self.vocab_file, 'r') as rf: + line = rf.readline() + while line: + line = line.strip().split(' ') + if len(line) > 1: + sen = ' '.join(line[1:]) + else: + sen = line[0] + self.add_sentence(sen) + line = rf.readline() + print("Vocabulary size is {}".format(self.n_words)) + +def lstm_onnx_infer(): + args = parser.parse_args() + + model_path = args.model_path + bin_file_path = args.bin_file_path + pred_res_save_path = args.pred_res_save_path + + try: + conf = yaml.safe_load(open(args.conf,'r')) + except: + print("Config file not exist!") + sys.exit(1) + + opts = Config() + for k,v in conf.items(): + setattr(opts, k, v) + print('{:50}:{}'.format(k, v)) + + beam_width = opts.beam_width + lm_alpha = opts.lm_alpha + decoder_type = opts.decode_type + vocab_file = opts.vocab_file + batchsize = int(args.batchsize) + + vocab = Vocab(vocab_file) + + # 读取数据目录 + bin_file_list = glob(os.path.join(bin_file_path, '*.bin')) + bin_file_num = len(bin_file_list) + + # 创建目录 + pardir = os.path.dirname(pred_res_save_path) + if not os.path.exists(pardir): + os.makedirs(pardir) + + # 推理 + print('[INFO] Infer on dataset ...') + transcription_list = [] + total_infer_time = 0 + total_infer_num = 0 + + with open(pred_res_save_path, 'wt', encoding='utf-8') as f_pred: + onnx_run_sess = onnxruntime.InferenceSession(model_path) + for i in tqdm(range(bin_file_num)): + # 数据预处理 + input = np.fromfile(os.path.join(bin_file_path, 'inputs_' + str(i) + '.bin'), dtype=np.float32) + input = input.reshape(batchsize, 390, 243) + + # 推理 + start_time = time.perf_counter_ns() + output = onnx_run_sess.run(None, {"actual_input_1":input}) + end_time = time.perf_counter_ns() + total_infer_time += end_time - start_time + total_infer_num += 1 + + #推理时间 + print('[INFO] Time:') + msg = 'total infer num: ' + str(total_infer_num) + '\n' + \ + 'total infer time(ms): ' + str(total_infer_time / 1000 / 1000) + '\n' + \ + 'average infer time(ms): ' + str(total_infer_time / total_infer_num / 1000 / 1000) + '\n' + print(msg) + with open(os.path.join(pardir, 'infer_time.txt'), 'wt', encoding='utf-8') as f_infer_time: + f_infer_time.write(msg) + + +if __name__ == '__main__': + ''' + Using Example: + + python onnx_local_infer.py \ + --conf=./conf/ctc_config.yaml \ + --model_path=./lstm_onnx/lstm_ctc_npu_16batch1_67.onnx \ + --bin_file_path=--bin_file_path=./lstm_bin/ \ + --pred_res_save_path=./lstm_onnx_infer \ + --batchsize=16 + ''' + lstm_onnx_infer() diff --git a/ACL_PyTorch/built-in/audio/LSTM/prepare_data.sh b/ACL_PyTorch/built-in/audio/LSTM/prepare_data.sh index 29bd7427efb2bda0c6a280281cced0644552a77a..0010bac6161e92c0fc05ec55471034391326d8e4 100644 --- a/ACL_PyTorch/built-in/audio/LSTM/prepare_data.sh +++ b/ACL_PyTorch/built-in/audio/LSTM/prepare_data.sh @@ -1,31 +1,31 @@ -#!/bin/bash - -#Author: Ruchao Fan -#2017.11.1 Training acoustic model and decode with phoneme-level bigram -#2018.4.30 Replace the h5py with ark and simplify the data_loader.py -#2019.12.20 Update to pytorch1.2 and python3.7 - -. path.sh - -stage=0 - -timit_dir='../data' -phoneme_map='60-39' -feat_dir='data' #dir to save feature -feat_type='fbank' #fbank, mfcc, spectrogram -config_file='conf/ctc_config.yaml' - -if [ ! -z $1 ]; then - stage=$1 -fi - -if [ $stage -le 0 ]; then - echo "Step 0: Data Preparation ..." - local/timit_data_prep.sh $timit_dir $phoneme_map || exit 1; - python3 steps/get_model_units.py $feat_dir/train/phn_text -fi - -if [ $stage -le 1 ]; then - echo "Step 1: Feature Extraction..." - steps/make_feat.sh $feat_type $feat_dir || exit 1; -fi +#!/bin/bash + +#Author: Ruchao Fan +#2017.11.1 Training acoustic model and decode with phoneme-level bigram +#2018.4.30 Replace the h5py with ark and simplify the data_loader.py +#2019.12.20 Update to pytorch1.2 and python3.7 + +. path.sh + +stage=0 + +timit_dir='../data' +phoneme_map='60-39' +feat_dir='data' #dir to save feature +feat_type='fbank' #fbank, mfcc, spectrogram +config_file='conf/ctc_config.yaml' + +if [ ! -z $1 ]; then + stage=$1 +fi + +if [ $stage -le 0 ]; then + echo "Step 0: Data Preparation ..." + local/timit_data_prep.sh $timit_dir $phoneme_map || exit 1; + python3 steps/get_model_units.py $feat_dir/train/phn_text +fi + +if [ $stage -le 1 ]; then + echo "Step 1: Feature Extraction..." + steps/make_feat.sh $feat_type $feat_dir || exit 1; +fi diff --git a/ACL_PyTorch/built-in/audio/LSTM/pyacl_infer.py b/ACL_PyTorch/built-in/audio/LSTM/pyacl_infer.py index de1af0191e7fe9728bf0c84a1760cc6eb4e175b2..26284c1cdef3ab0fc6d6780e7b00811c3b7b0a0d 100644 --- a/ACL_PyTorch/built-in/audio/LSTM/pyacl_infer.py +++ b/ACL_PyTorch/built-in/audio/LSTM/pyacl_infer.py @@ -1,157 +1,157 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import acl -from acl_net import AclModel - -import os -import shutil -import argparse -import numpy as np -from tqdm import tqdm - -DTYPE = { - 'float32': np.float32, - 'float64': np.float64, - 'int32': np.int32, - 'int64': np.int64 -} - -if __name__ == '__main__': - ''' - 参数说明: - --model_path:模型路径 - --device_id:npu id - --cpu_run:MeasureTime类的cpu_run参数,True or False - --sync_infer:推理方式: - True:同步推理 - False:异步推理 - --workspace:类似TensorRT `workspace`参数,计算平均推理时间时排除前n次推理 - --input_info_file_path:类似benchmark的bin_info文件 - --input_dtypes:模型输入的类型,用逗号分割(`DTYPE`变量) - e.g. 模型只有一个输入:--input_dtypes=float32 - e.g. 模型有多个输入:--input_dtypes=float32,float32,float32(需要和bin_info文件多输入排列一致) - --infer_res_save_path:推理结果保存目录 - --res_save_type:推理结果保存类型,bin或npy - - info文件说明: - 因为支持动态shape,相比于benchmark的info文件,需要多加一列shape信息,e.g. - ``` - 0 ./bert_bin/input_ids_0.bin (1,512) - 0 ./bert_bin/segment_ids_0.bin (1,512) - 0 ./bert_bin/input_mask_0.bin (1,512) - 1 ./bert_bin/input_ids_1.bin (1,512) - 1 ./bert_bin/segment_ids_1.bin (1,512) - 1 ./bert_bin/input_mask_1.bin (1,512) - ``` - - Using Example: - python3.7 pyacl_infer.py \ - --model_path=./bert_base_batch_1_sim_auto.om \ - --device_id=0 \ - --cpu_run=True \ - --sync_infer=True \ - --workspace=10 \ - --input_info_file_path=./input.info \ - --input_dtypes=int64,int64,int64 \ - --infer_res_save_path=./infer_res \ - --res_save_type=bin - ''' - - # 参数解析 - parser = argparse.ArgumentParser() - parser.add_argument('--model_path', required=True) - parser.add_argument('--device_id', required=True, type=int) - parser.add_argument('--cpu_run', required=True, choices=['True', 'False']) - parser.add_argument('--sync_infer', required=True, choices=['True', 'False']) - parser.add_argument('--workspace', required=True, type=int) - parser.add_argument('--input_info_file_path', required=True) - parser.add_argument('--input_dtypes', required=True) - parser.add_argument('--infer_res_save_path', required=True) - parser.add_argument('--res_save_type', required=True, choices=['bin', 'npy']) - opt = parser.parse_args() - - # 创建模型 - measurements = {} - om_model = AclModel(device_id=opt.device_id, - model_path=opt.model_path, - sync_infer=eval(opt.sync_infer), - measurements=measurements, - key='per_infer_time_ns', - cpu_run=eval(opt.cpu_run)) - - # 创建目录 - if os.path.exists(opt.infer_res_save_path): - shutil.rmtree(opt.infer_res_save_path) - os.makedirs(opt.infer_res_save_path) - - # 读取info_file - inputs_info = {} - with open(opt.input_info_file_path, 'rt', encoding='utf-8') as f_info: - line = f_info.readline() - while line: - line = line.rstrip('\n') - contents = line.split() - info = {'path': contents[1], 'shape': eval(contents[2])} - inputs_info.setdefault(contents[0], []).append(info) - line = f_info.readline() - - # 解析输入类型 - input_dtypes = opt.input_dtypes.split(',') - input_dtypes = list(map(lambda x: DTYPE[x], input_dtypes)) - - # 读取文件推理 - total_infer_time = 0 - total_infer_time_workspace = 0 - total_infer_num = 0 - for key, values in tqdm(inputs_info.items()): - # 构造输入 - inputs = [] - dims = [] - for idx, value in enumerate(values): - x = np.fromfile(value['path'], dtype=input_dtypes[idx]).reshape(value['shape']) - inputs.append(x) - dims.extend(value['shape']) - dims_info = {'dimCount': len(dims), 'name': '', 'dims': dims} - - # 推理得到输出 - output = om_model(inputs, dims_info) - #print("[INFO]: output()", len(output)) - #print("[INFO]: output.shape()", output[0].shape) - #exit() - total_infer_num += 1 - - # 保存文件 - if opt.res_save_type == 'bin': - for idx, data in enumerate(output): - data.tofile(os.path.join(opt.infer_res_save_path, key + '.' + str(idx) + '.bin')) - else: - for idx, data in enumerate(output): - np.save(os.path.join(opt.infer_res_save_path, key + '.' + str(idx) + '.npy'), data) - - # 计算时间 - total_infer_time += measurements['per_infer_time_ns'] - if total_infer_num > opt.workspace: - total_infer_time_workspace += measurements['per_infer_time_ns'] - - # 推理时间 - print('[INFO] Infer time:') - msg = 'total infer num: ' + str(total_infer_num) + '\n' + \ - 'total pure infer time(ms): ' + str(total_infer_time / 1000 / 1000) + '\n' + \ - 'average pure infer time(ms): ' + str(total_infer_time / total_infer_num / 1000 / 1000) + '\n' + \ - 'average pure infer time after workspace(ms): ' + str(abs( - total_infer_time_workspace / (total_infer_num - opt.workspace) / 1000 / 1000)) + '\n' - print(msg) - with open(os.path.join(opt.infer_res_save_path, 'infer_time.txt'), 'wt', encoding='utf-8') as f_infer_time: - f_infer_time.write(msg) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import acl +from acl_net import AclModel + +import os +import shutil +import argparse +import numpy as np +from tqdm import tqdm + +DTYPE = { + 'float32': np.float32, + 'float64': np.float64, + 'int32': np.int32, + 'int64': np.int64 +} + +if __name__ == '__main__': + ''' + 参数说明: + --model_path:模型路径 + --device_id:npu id + --cpu_run:MeasureTime类的cpu_run参数,True or False + --sync_infer:推理方式: + True:同步推理 + False:异步推理 + --workspace:类似TensorRT `workspace`参数,计算平均推理时间时排除前n次推理 + --input_info_file_path:类似benchmark的bin_info文件 + --input_dtypes:模型输入的类型,用逗号分割(`DTYPE`变量) + e.g. 模型只有一个输入:--input_dtypes=float32 + e.g. 模型有多个输入:--input_dtypes=float32,float32,float32(需要和bin_info文件多输入排列一致) + --infer_res_save_path:推理结果保存目录 + --res_save_type:推理结果保存类型,bin或npy + + info文件说明: + 因为支持动态shape,相比于benchmark的info文件,需要多加一列shape信息,e.g. + ``` + 0 ./bert_bin/input_ids_0.bin (1,512) + 0 ./bert_bin/segment_ids_0.bin (1,512) + 0 ./bert_bin/input_mask_0.bin (1,512) + 1 ./bert_bin/input_ids_1.bin (1,512) + 1 ./bert_bin/segment_ids_1.bin (1,512) + 1 ./bert_bin/input_mask_1.bin (1,512) + ``` + + Using Example: + python3.7 pyacl_infer.py \ + --model_path=./bert_base_batch_1_sim_auto.om \ + --device_id=0 \ + --cpu_run=True \ + --sync_infer=True \ + --workspace=10 \ + --input_info_file_path=./input.info \ + --input_dtypes=int64,int64,int64 \ + --infer_res_save_path=./infer_res \ + --res_save_type=bin + ''' + + # 参数解析 + parser = argparse.ArgumentParser() + parser.add_argument('--model_path', required=True) + parser.add_argument('--device_id', required=True, type=int) + parser.add_argument('--cpu_run', required=True, choices=['True', 'False']) + parser.add_argument('--sync_infer', required=True, choices=['True', 'False']) + parser.add_argument('--workspace', required=True, type=int) + parser.add_argument('--input_info_file_path', required=True) + parser.add_argument('--input_dtypes', required=True) + parser.add_argument('--infer_res_save_path', required=True) + parser.add_argument('--res_save_type', required=True, choices=['bin', 'npy']) + opt = parser.parse_args() + + # 创建模型 + measurements = {} + om_model = AclModel(device_id=opt.device_id, + model_path=opt.model_path, + sync_infer=eval(opt.sync_infer), + measurements=measurements, + key='per_infer_time_ns', + cpu_run=eval(opt.cpu_run)) + + # 创建目录 + if os.path.exists(opt.infer_res_save_path): + shutil.rmtree(opt.infer_res_save_path) + os.makedirs(opt.infer_res_save_path) + + # 读取info_file + inputs_info = {} + with open(opt.input_info_file_path, 'rt', encoding='utf-8') as f_info: + line = f_info.readline() + while line: + line = line.rstrip('\n') + contents = line.split() + info = {'path': contents[1], 'shape': eval(contents[2])} + inputs_info.setdefault(contents[0], []).append(info) + line = f_info.readline() + + # 解析输入类型 + input_dtypes = opt.input_dtypes.split(',') + input_dtypes = list(map(lambda x: DTYPE[x], input_dtypes)) + + # 读取文件推理 + total_infer_time = 0 + total_infer_time_workspace = 0 + total_infer_num = 0 + for key, values in tqdm(inputs_info.items()): + # 构造输入 + inputs = [] + dims = [] + for idx, value in enumerate(values): + x = np.fromfile(value['path'], dtype=input_dtypes[idx]).reshape(value['shape']) + inputs.append(x) + dims.extend(value['shape']) + dims_info = {'dimCount': len(dims), 'name': '', 'dims': dims} + + # 推理得到输出 + output = om_model(inputs, dims_info) + #print("[INFO]: output()", len(output)) + #print("[INFO]: output.shape()", output[0].shape) + #exit() + total_infer_num += 1 + + # 保存文件 + if opt.res_save_type == 'bin': + for idx, data in enumerate(output): + data.tofile(os.path.join(opt.infer_res_save_path, key + '.' + str(idx) + '.bin')) + else: + for idx, data in enumerate(output): + np.save(os.path.join(opt.infer_res_save_path, key + '.' + str(idx) + '.npy'), data) + + # 计算时间 + total_infer_time += measurements['per_infer_time_ns'] + if total_infer_num > opt.workspace: + total_infer_time_workspace += measurements['per_infer_time_ns'] + + # 推理时间 + print('[INFO] Infer time:') + msg = 'total infer num: ' + str(total_infer_num) + '\n' + \ + 'total pure infer time(ms): ' + str(total_infer_time / 1000 / 1000) + '\n' + \ + 'average pure infer time(ms): ' + str(total_infer_time / total_infer_num / 1000 / 1000) + '\n' + \ + 'average pure infer time after workspace(ms): ' + str(abs( + total_infer_time_workspace / (total_infer_num - opt.workspace) / 1000 / 1000)) + '\n' + print(msg) + with open(os.path.join(opt.infer_res_save_path, 'infer_time.txt'), 'wt', encoding='utf-8') as f_infer_time: + f_infer_time.write(msg) diff --git a/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/LICENSE b/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/LICENSE index 6ae23f3ae77ebaaf09980b82ec0ae2b1e7ba138f..68faf63cc5e11d595689e449524fe79c100d5bd9 100644 --- a/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/LICENSE +++ b/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/LICENSE @@ -1,21 +1,21 @@ -MIT License - -Copyright (c) 2021 eurecom-asp - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +MIT License + +Copyright (c) 2021 eurecom-asp + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/README.md b/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/README.md index e90ac7edc5385d33ef56732c6a72a2699727d6b2..b97f4f754c6cc3fcbd5b3d0777d8810a7c428ddf 100644 --- a/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/README.md +++ b/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/README.md @@ -1,100 +1,100 @@ -# RawNet2模型推理指导 - -- [1 文件说明](#1-文件说明) -- [2 环境准备](#2-环境准备) - - [2.1 文件下载](#21-文件下载) - - [2.2 文件拷贝](#22-文件拷贝) - - [2.3 设置环境变量](#23-设置环境变量) -- [3 端到端推理步骤](#3-端到端推理步骤) - - [3.1 修改pytorch模型源码](#31-修改pytorch模型源码) - - [3.2 pth转onnx模型](#32-pth转onnx模型) - - [3.3 修改导出的onnx模型](#33-修改导出的onnx模型) - - [3.4 利用ATC工具转换为om模型](#34-利用ATC工具转换为om模型) - - [3.5 om模型推理](#35-om模型推理) - ------- - -## 1 文件说明 -``` -RawNet2_for_Pytorch - ├── env.sh 设置环境变量 - ├── pth2onnx.py pytorch模型导出onnx模型 - ├── modify_onnx.py 修改导出的onnx模型 - ├── atc.sh onnx模型转om - ├── om_infer.py 推理导出的om模型 - └── acl_net.py PyACL推理工具代码 -``` - -## 2 环境准备 - -### 2.1 文件下载 -- [RawNet2_Pytorch源码下载](https://github.com/asvspoof-challenge/2021/tree/main/LA/Baseline-RawNet2) - ``` - git clone https://github.com/asvspoof-challenge/2021.git - cd 2021/LA/Baseline-RawNet2/ - ``` -- [权重下载](https://www.asvspoof.org/asvspoof2021/pre_trained_DF_RawNet2.zip) -- [数据集下载](https://datashare.ed.ac.uk/handle/10283/3336) - om推理采用ASVspoof2019数据集的验证集进行精度评估。 - -### 2.2 文件拷贝 -拷贝env.sh,pth2onnx.py,modify_onnx.py,atc.sh,om_infer.py,acl_net.py文件到2021/LA/Baseline-RawNet2/目录下。 -将下载的权重文件pre_trained_DF_RawNet2.pth放在和代码同一目录下。 -在同一目录下创建data目录并将下载的数据集放入,data目录中的文件结构如下所示。 -``` -data - └── LA - ├── ASVspoof2019_LA_asv_protocols - ├── ASVspoof2019_LA_asv_scores - ├── ASVspoof2019_LA_cm_protocols - ├── ASVspoof2019_LA_dev - ├── ASVspoof2019_LA_eval - └── ASVspoof2019_LA_train -``` - -### 2.3 设置环境变量 -```shell -source env.sh -``` - -## 3 端到端推理步骤 - -### 3.1 修改pytorch模型源码 -导onnx模型需要修改2021/LA/Baseline-RawNet2/model.py中的SincConv类,在该类的forward函数中增加一行,如下所示。 -```python -self.band_pass = torch.from_numpy(self.band_pass.numpy()) # 增加行,和下行缩进保持一致 -band_pass_filter=self.band_pass.to(self.device) # 根据该行代码找到增加位置 -``` - -### 3.2 pth导出onnx -```python -python3.7 pth2onnx.py \ - --pth_model=pre_trained_DF_RawNet2.pth \ - --onnx_model=rawnet2_ori.onnx \ - --batch_size=1 -``` - -### 3.3 修改导出的onnx模型 -```python -python3.7 -m onnxsim rawnet2_ori.onnx rawnet2_sim.onnx - -python3.7 modify_onnx.py \ - --input_onnx=rawnet2_sim.onnx \ - --output_onnx=rawnet2_modify.onnx -``` - -### 3.4 利用ATC工具转换为om模型 -```shell -bash atc.sh rawnet2_modify.onnx rawnet2_modify input:1,64600 -``` -注:目前ATC支持的onnx算子版本为11 - -### 3.5 om模型推理 -```python -python3.7 om_infer.py \ - --batch_size=1 \ - --om_path=rawnet2_modify.om \ - --eval_output='rawnet2_modify_om.txt' \ - --database_path='data/LA/' \ - --protocols_path='data/LA/' +# RawNet2模型推理指导 + +- [1 文件说明](#1-文件说明) +- [2 环境准备](#2-环境准备) + - [2.1 文件下载](#21-文件下载) + - [2.2 文件拷贝](#22-文件拷贝) + - [2.3 设置环境变量](#23-设置环境变量) +- [3 端到端推理步骤](#3-端到端推理步骤) + - [3.1 修改pytorch模型源码](#31-修改pytorch模型源码) + - [3.2 pth转onnx模型](#32-pth转onnx模型) + - [3.3 修改导出的onnx模型](#33-修改导出的onnx模型) + - [3.4 利用ATC工具转换为om模型](#34-利用ATC工具转换为om模型) + - [3.5 om模型推理](#35-om模型推理) + +------ + +## 1 文件说明 +``` +RawNet2_for_Pytorch + ├── env.sh 设置环境变量 + ├── pth2onnx.py pytorch模型导出onnx模型 + ├── modify_onnx.py 修改导出的onnx模型 + ├── atc.sh onnx模型转om + ├── om_infer.py 推理导出的om模型 + └── acl_net.py PyACL推理工具代码 +``` + +## 2 环境准备 + +### 2.1 文件下载 +- [RawNet2_Pytorch源码下载](https://github.com/asvspoof-challenge/2021/tree/main/LA/Baseline-RawNet2) + ``` + git clone https://github.com/asvspoof-challenge/2021.git + cd 2021/LA/Baseline-RawNet2/ + ``` +- [权重下载](https://www.asvspoof.org/asvspoof2021/pre_trained_DF_RawNet2.zip) +- [数据集下载](https://datashare.ed.ac.uk/handle/10283/3336) + om推理采用ASVspoof2019数据集的验证集进行精度评估。 + +### 2.2 文件拷贝 +拷贝env.sh,pth2onnx.py,modify_onnx.py,atc.sh,om_infer.py,acl_net.py文件到2021/LA/Baseline-RawNet2/目录下。 +将下载的权重文件pre_trained_DF_RawNet2.pth放在和代码同一目录下。 +在同一目录下创建data目录并将下载的数据集放入,data目录中的文件结构如下所示。 +``` +data + └── LA + ├── ASVspoof2019_LA_asv_protocols + ├── ASVspoof2019_LA_asv_scores + ├── ASVspoof2019_LA_cm_protocols + ├── ASVspoof2019_LA_dev + ├── ASVspoof2019_LA_eval + └── ASVspoof2019_LA_train +``` + +### 2.3 设置环境变量 +```shell +source env.sh +``` + +## 3 端到端推理步骤 + +### 3.1 修改pytorch模型源码 +导onnx模型需要修改2021/LA/Baseline-RawNet2/model.py中的SincConv类,在该类的forward函数中增加一行,如下所示。 +```python +self.band_pass = torch.from_numpy(self.band_pass.numpy()) # 增加行,和下行缩进保持一致 +band_pass_filter=self.band_pass.to(self.device) # 根据该行代码找到增加位置 +``` + +### 3.2 pth导出onnx +```python +python3.7 pth2onnx.py \ + --pth_model=pre_trained_DF_RawNet2.pth \ + --onnx_model=rawnet2_ori.onnx \ + --batch_size=1 +``` + +### 3.3 修改导出的onnx模型 +```python +python3.7 -m onnxsim rawnet2_ori.onnx rawnet2_sim.onnx + +python3.7 modify_onnx.py \ + --input_onnx=rawnet2_sim.onnx \ + --output_onnx=rawnet2_modify.onnx +``` + +### 3.4 利用ATC工具转换为om模型 +```shell +bash atc.sh rawnet2_modify.onnx rawnet2_modify input:1,64600 +``` +注:目前ATC支持的onnx算子版本为11 + +### 3.5 om模型推理 +```python +python3.7 om_infer.py \ + --batch_size=1 \ + --om_path=rawnet2_modify.om \ + --eval_output='rawnet2_modify_om.txt' \ + --database_path='data/LA/' \ + --protocols_path='data/LA/' ``` \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/acl_net.py b/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/acl_net.py index 69f1ab160a061b11921b6c7e715504ecd3c43544..567fa5630ef931ede194648b4bf006d6cf6e3ca6 100644 --- a/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/acl_net.py +++ b/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/acl_net.py @@ -1,256 +1,256 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time -import numpy as np -import functools -import acl - -# error code -ACL_ERROR_NONE = 0 - -# rule for mem -ACL_MEM_MALLOC_HUGE_FIRST = 0 -ACL_MEM_MALLOC_HUGE_ONLY = 1 -ACL_MEM_MALLOC_NORMAL_ONLY = 2 - -# rule for memory copy -ACL_MEMCPY_HOST_TO_HOST = 0 -ACL_MEMCPY_HOST_TO_DEVICE = 1 -ACL_MEMCPY_DEVICE_TO_HOST = 2 -ACL_MEMCPY_DEVICE_TO_DEVICE = 3 - -ACL_DTYPE = { - 0: 'float32', - 1: 'float16', - 2: 'int8', - 3: 'int32', - 4: 'uint8', - 6: 'int16', - 7: 'uint16', - 8: 'uint32', - 9: 'int64', - 10: 'uint64', - 11: 'float64', - 12: 'bool', -} - -buffer_method = { - "in": acl.mdl.get_input_size_by_index, - "out": acl.mdl.get_output_size_by_index -} - - -def check_ret(message, ret): - if ret != ACL_ERROR_NONE: - raise Exception("{} failed ret={}".format(message, ret)) - - -class Net(object): - def __init__(self, model_path, device_id=0, config_path=None): - self.device_id = device_id # int - self.model_path = model_path # string - self.model_id = None # pointer - self.context = None # pointer - - self.input_data = [] - self.output_data = [] - self.model_desc = None # pointer when using - self.load_input_dataset = None - self.load_output_dataset = None - - self._init_resource(config_path) - - def __call__(self, ori_data): - return self.forward(ori_data) - - def __del__(self): - ret = acl.mdl.unload(self.model_id) - check_ret("acl.mdl.unload", ret) - if self.model_desc: - acl.mdl.destroy_desc(self.model_desc) - self.model_desc = None - - while self.input_data: - item = self.input_data.pop() - ret = acl.rt.free(item["buffer"]) - check_ret("acl.rt.free", ret) - - while self.output_data: - item = self.output_data.pop() - ret = acl.rt.free(item["buffer"]) - check_ret("acl.rt.free", ret) - - if self.context: - ret = acl.rt.destroy_context(self.context) - check_ret("acl.rt.destroy_context", ret) - self.context = None - - ret = acl.rt.reset_device(self.device_id) - check_ret("acl.rt.reset_device", ret) - ret = acl.finalize() - check_ret("acl.finalize", ret) - - def _init_resource(self, config_path=None): - if config_path: - ret = acl.init(config_path) - else: - ret = acl.init() - check_ret("acl.init", ret) - ret = acl.rt.set_device(self.device_id) - check_ret("acl.rt.set_device", ret) - - self.context, ret = acl.rt.create_context(self.device_id) - check_ret("acl.rt.create_context", ret) - - # load_model - self.model_id, ret = acl.mdl.load_from_file(self.model_path) - check_ret("acl.mdl.load_from_file", ret) - - self.model_desc = acl.mdl.create_desc() - self._get_model_info() - - def _get_model_info(self, ): - ret = acl.mdl.get_desc(self.model_desc, self.model_id) - check_ret("acl.mdl.get_desc", ret) - input_size = acl.mdl.get_num_inputs(self.model_desc) - output_size = acl.mdl.get_num_outputs(self.model_desc) - self._gen_data_buffer(input_size, des="in") - self._gen_data_buffer(output_size, des="out") - - def _gen_data_buffer(self, size, des): - func = buffer_method[des] - for i in range(size): - temp_buffer_size = func(self.model_desc, i) - temp_buffer, ret = acl.rt.malloc(temp_buffer_size, - ACL_MEM_MALLOC_HUGE_FIRST) - check_ret("acl.rt.malloc", ret) - - if des == "in": - self.input_data.append({"buffer": temp_buffer, - "size": temp_buffer_size}) - elif des == "out": - self.output_data.append({"buffer": temp_buffer, - "size": temp_buffer_size}) - - def _data_interaction(self, dataset, policy=ACL_MEMCPY_HOST_TO_DEVICE): - temp_data_buffer = self.input_data \ - if policy == ACL_MEMCPY_HOST_TO_DEVICE \ - else self.output_data - if len(dataset) == 0 and policy == ACL_MEMCPY_DEVICE_TO_HOST: - for item in self.output_data: - temp, ret = acl.rt.malloc_host(item["size"]) - if ret != 0: - raise Exception("can't malloc_host ret={}".format(ret)) - dataset.append({"size": item["size"], "buffer": temp}) - - for i, item in enumerate(temp_data_buffer): - if policy == ACL_MEMCPY_HOST_TO_DEVICE: - bytes_in = dataset[i].tobytes() - ptr = acl.util.bytes_to_ptr(bytes_in) - ret = acl.rt.memcpy(item["buffer"], item["size"], ptr, item["size"], policy) - check_ret("acl.rt.memcpy", ret) - - else: - ptr = dataset[i]["buffer"] - ret = acl.rt.memcpy(ptr, - item["size"], - item["buffer"], - item["size"], - policy) - check_ret("acl.rt.memcpy", ret) - - def _gen_dataset(self, type_str="input"): - dataset = acl.mdl.create_dataset() - - temp_dataset = None - if type_str == "in": - self.load_input_dataset = dataset - temp_dataset = self.input_data - else: - self.load_output_dataset = dataset - temp_dataset = self.output_data - - for item in temp_dataset: - data = acl.create_data_buffer(item["buffer"], item["size"]) - if data is None: - ret = acl.destroy_data_buffer(dataset) - check_ret("acl.destroy_data_buffer", ret) - - _, ret = acl.mdl.add_dataset_buffer(dataset, data) - - if ret != ACL_ERROR_NONE: - ret = acl.destroy_data_buffer(dataset) - check_ret("acl.destroy_data_buffer", ret) - - def _data_from_host_to_device(self, images): - self._data_interaction(images, ACL_MEMCPY_HOST_TO_DEVICE) - self._gen_dataset("in") - self._gen_dataset("out") - - def _data_from_device_to_host(self): - res = [] - self._data_interaction(res, ACL_MEMCPY_DEVICE_TO_HOST) - output = self.get_result(res) - return output - - def _destroy_databuffer(self, ): - for dataset in [self.load_input_dataset, self.load_output_dataset]: - if not dataset: - continue - - num = acl.mdl.get_dataset_num_buffers(dataset) - for i in range(num): - data_buf = acl.mdl.get_dataset_buffer(dataset, i) - if data_buf: - ret = acl.destroy_data_buffer(data_buf) - check_ret("acl.destroy_data_buffer", ret) - ret = acl.mdl.destroy_dataset(dataset) - check_ret("acl.mdl.destroy_dataset", ret) - - def forward(self, input_data): - if not isinstance(input_data, (list, tuple)): - input_data = [input_data] - st = time.time() - self._data_from_host_to_device(input_data) - mem_t = time.time() - st - st = time.time() - ret = acl.mdl.execute(self.model_id, - self.load_input_dataset, - self.load_output_dataset) - exe_t = time.time() - st - st = time.time() - check_ret("acl.mdl.execute", ret) - self._destroy_databuffer() - result = self._data_from_device_to_host() - mem_t += time.time() - st - return result - - def get_result(self, output_data): - dataset = [] - for i in range(len(output_data)): - dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) - check_ret("acl.mdl.get_cur_output_dims", ret) - data_shape = dims.get("dims") - data_type = acl.mdl.get_output_data_type(self.model_desc, i) - data_len = functools.reduce(lambda x, y: x * y, data_shape) - ftype = np.dtype(ACL_DTYPE.get(data_type)) - - size = output_data[i]["size"] - ptr = output_data[i]["buffer"] - data = acl.util.ptr_to_bytes(ptr, size) - np_arr = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) - np_arr = np_arr.reshape(data_shape) - dataset.append(np_arr) - return dataset +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +import numpy as np +import functools +import acl + +# error code +ACL_ERROR_NONE = 0 + +# rule for mem +ACL_MEM_MALLOC_HUGE_FIRST = 0 +ACL_MEM_MALLOC_HUGE_ONLY = 1 +ACL_MEM_MALLOC_NORMAL_ONLY = 2 + +# rule for memory copy +ACL_MEMCPY_HOST_TO_HOST = 0 +ACL_MEMCPY_HOST_TO_DEVICE = 1 +ACL_MEMCPY_DEVICE_TO_HOST = 2 +ACL_MEMCPY_DEVICE_TO_DEVICE = 3 + +ACL_DTYPE = { + 0: 'float32', + 1: 'float16', + 2: 'int8', + 3: 'int32', + 4: 'uint8', + 6: 'int16', + 7: 'uint16', + 8: 'uint32', + 9: 'int64', + 10: 'uint64', + 11: 'float64', + 12: 'bool', +} + +buffer_method = { + "in": acl.mdl.get_input_size_by_index, + "out": acl.mdl.get_output_size_by_index +} + + +def check_ret(message, ret): + if ret != ACL_ERROR_NONE: + raise Exception("{} failed ret={}".format(message, ret)) + + +class Net(object): + def __init__(self, model_path, device_id=0, config_path=None): + self.device_id = device_id # int + self.model_path = model_path # string + self.model_id = None # pointer + self.context = None # pointer + + self.input_data = [] + self.output_data = [] + self.model_desc = None # pointer when using + self.load_input_dataset = None + self.load_output_dataset = None + + self._init_resource(config_path) + + def __call__(self, ori_data): + return self.forward(ori_data) + + def __del__(self): + ret = acl.mdl.unload(self.model_id) + check_ret("acl.mdl.unload", ret) + if self.model_desc: + acl.mdl.destroy_desc(self.model_desc) + self.model_desc = None + + while self.input_data: + item = self.input_data.pop() + ret = acl.rt.free(item["buffer"]) + check_ret("acl.rt.free", ret) + + while self.output_data: + item = self.output_data.pop() + ret = acl.rt.free(item["buffer"]) + check_ret("acl.rt.free", ret) + + if self.context: + ret = acl.rt.destroy_context(self.context) + check_ret("acl.rt.destroy_context", ret) + self.context = None + + ret = acl.rt.reset_device(self.device_id) + check_ret("acl.rt.reset_device", ret) + ret = acl.finalize() + check_ret("acl.finalize", ret) + + def _init_resource(self, config_path=None): + if config_path: + ret = acl.init(config_path) + else: + ret = acl.init() + check_ret("acl.init", ret) + ret = acl.rt.set_device(self.device_id) + check_ret("acl.rt.set_device", ret) + + self.context, ret = acl.rt.create_context(self.device_id) + check_ret("acl.rt.create_context", ret) + + # load_model + self.model_id, ret = acl.mdl.load_from_file(self.model_path) + check_ret("acl.mdl.load_from_file", ret) + + self.model_desc = acl.mdl.create_desc() + self._get_model_info() + + def _get_model_info(self, ): + ret = acl.mdl.get_desc(self.model_desc, self.model_id) + check_ret("acl.mdl.get_desc", ret) + input_size = acl.mdl.get_num_inputs(self.model_desc) + output_size = acl.mdl.get_num_outputs(self.model_desc) + self._gen_data_buffer(input_size, des="in") + self._gen_data_buffer(output_size, des="out") + + def _gen_data_buffer(self, size, des): + func = buffer_method[des] + for i in range(size): + temp_buffer_size = func(self.model_desc, i) + temp_buffer, ret = acl.rt.malloc(temp_buffer_size, + ACL_MEM_MALLOC_HUGE_FIRST) + check_ret("acl.rt.malloc", ret) + + if des == "in": + self.input_data.append({"buffer": temp_buffer, + "size": temp_buffer_size}) + elif des == "out": + self.output_data.append({"buffer": temp_buffer, + "size": temp_buffer_size}) + + def _data_interaction(self, dataset, policy=ACL_MEMCPY_HOST_TO_DEVICE): + temp_data_buffer = self.input_data \ + if policy == ACL_MEMCPY_HOST_TO_DEVICE \ + else self.output_data + if len(dataset) == 0 and policy == ACL_MEMCPY_DEVICE_TO_HOST: + for item in self.output_data: + temp, ret = acl.rt.malloc_host(item["size"]) + if ret != 0: + raise Exception("can't malloc_host ret={}".format(ret)) + dataset.append({"size": item["size"], "buffer": temp}) + + for i, item in enumerate(temp_data_buffer): + if policy == ACL_MEMCPY_HOST_TO_DEVICE: + bytes_in = dataset[i].tobytes() + ptr = acl.util.bytes_to_ptr(bytes_in) + ret = acl.rt.memcpy(item["buffer"], item["size"], ptr, item["size"], policy) + check_ret("acl.rt.memcpy", ret) + + else: + ptr = dataset[i]["buffer"] + ret = acl.rt.memcpy(ptr, + item["size"], + item["buffer"], + item["size"], + policy) + check_ret("acl.rt.memcpy", ret) + + def _gen_dataset(self, type_str="input"): + dataset = acl.mdl.create_dataset() + + temp_dataset = None + if type_str == "in": + self.load_input_dataset = dataset + temp_dataset = self.input_data + else: + self.load_output_dataset = dataset + temp_dataset = self.output_data + + for item in temp_dataset: + data = acl.create_data_buffer(item["buffer"], item["size"]) + if data is None: + ret = acl.destroy_data_buffer(dataset) + check_ret("acl.destroy_data_buffer", ret) + + _, ret = acl.mdl.add_dataset_buffer(dataset, data) + + if ret != ACL_ERROR_NONE: + ret = acl.destroy_data_buffer(dataset) + check_ret("acl.destroy_data_buffer", ret) + + def _data_from_host_to_device(self, images): + self._data_interaction(images, ACL_MEMCPY_HOST_TO_DEVICE) + self._gen_dataset("in") + self._gen_dataset("out") + + def _data_from_device_to_host(self): + res = [] + self._data_interaction(res, ACL_MEMCPY_DEVICE_TO_HOST) + output = self.get_result(res) + return output + + def _destroy_databuffer(self, ): + for dataset in [self.load_input_dataset, self.load_output_dataset]: + if not dataset: + continue + + num = acl.mdl.get_dataset_num_buffers(dataset) + for i in range(num): + data_buf = acl.mdl.get_dataset_buffer(dataset, i) + if data_buf: + ret = acl.destroy_data_buffer(data_buf) + check_ret("acl.destroy_data_buffer", ret) + ret = acl.mdl.destroy_dataset(dataset) + check_ret("acl.mdl.destroy_dataset", ret) + + def forward(self, input_data): + if not isinstance(input_data, (list, tuple)): + input_data = [input_data] + st = time.time() + self._data_from_host_to_device(input_data) + mem_t = time.time() - st + st = time.time() + ret = acl.mdl.execute(self.model_id, + self.load_input_dataset, + self.load_output_dataset) + exe_t = time.time() - st + st = time.time() + check_ret("acl.mdl.execute", ret) + self._destroy_databuffer() + result = self._data_from_device_to_host() + mem_t += time.time() - st + return result + + def get_result(self, output_data): + dataset = [] + for i in range(len(output_data)): + dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) + check_ret("acl.mdl.get_cur_output_dims", ret) + data_shape = dims.get("dims") + data_type = acl.mdl.get_output_data_type(self.model_desc, i) + data_len = functools.reduce(lambda x, y: x * y, data_shape) + ftype = np.dtype(ACL_DTYPE.get(data_type)) + + size = output_data[i]["size"] + ptr = output_data[i]["buffer"] + data = acl.util.ptr_to_bytes(ptr, size) + np_arr = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) + np_arr = np_arr.reshape(data_shape) + dataset.append(np_arr) + return dataset diff --git a/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/modelzoo_level.txt b/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/modelzoo_level.txt +++ b/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/modify_onnx.py b/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/modify_onnx.py index ff9cc34aaca0d2c8900e0d8c5310a363179c6eb7..dcbbc457d0979b0be47649bf0c6cd877dd7c0e06 100644 --- a/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/modify_onnx.py +++ b/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/modify_onnx.py @@ -1,222 +1,222 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding:utf-8 -*- - -import argparse -import numpy as np -import copy -from gener_core.mod_modify.interface import AttrType as AT -from gener_core.mod_modify.onnx_graph import OXGraph - - -def make_conv2d_split_node(mod, conv_node, weight, chk_idx, chk_sz, ksz): - x_node = mod.get_node(conv_node.input_name[0]) - - # slice - rng = np.random.randint(0, 7393) - begin = chk_sz * chk_idx - end = -(ksz - chk_sz) + chk_sz * chk_idx - if end >= 0: - end = np.iinfo(np.int32).max - begin1_node = mod.add_const_node(f"const_begin_{rng}", np.array([begin], np.int32)) - end1_node = mod.add_const_node(f"const_end_{rng}", np.array([end], np.int32)) - axes_node = mod.add_const_node(f"const_axes_{rng}", np.array([-1], np.int32)) - step_node = mod.add_const_node(f"const_step_{rng}", np.array([1], np.int32)) - slice1_node = mod.add_new_node(f"Slice_{rng}", "Slice") - slice1_node.set_input_node(0, [x_node, begin1_node, end1_node, axes_node, step_node]) - - # conv - conv1_node = mod.add_new_node(f"Conv_{np.random.randint(0, 7393)}", "Conv", - {"dilations": (AT.LIST_INT, [1, 1]), - "group": (AT.INT, 1), - "kernel_shape": (AT.LIST_INT, [1, weight.shape[-1]]), - "pads": (AT.LIST_INT, [0, 0, 0, 0]), - "strides": (AT.LIST_INT, [1, 1]), }) - w1_node = mod.add_const_node(f"weight_{np.random.randint(0, 7393)}", weight) - conv1_node.set_input_node(0, [slice1_node, w1_node]) - - return conv1_node - - -def shape_dim_extend(mod, io_map): - # NCD -> NCHW - rshp_node = mod.get_node("Reshape_9") - shape_node = mod.get_node(rshp_node.input_name[1]) - shape_value = shape_node.const_value - rng = np.random.randint(0, 7393) - shape_value = np.insert(shape_value, 2, 1) - new_shape_node = mod.add_const_node(f"const_shape_{rng}", shape_value.astype(np.int32)) - mod.node_replace(shape_node, new_shape_node) - - # modify all nodes for conv and maxpool - g_nodes = mod.get_nodes_by_optype("Conv") - for g_node in g_nodes: - weight_node = mod.get_node(g_node.input_name[1]) - weight_value = weight_node.const_value - - if len(weight_value.shape) == 3: - rng = np.random.randint(0, 7393) - kernel_shape = [1] + g_node.get_attr('kernel_shape', AT.LIST_INT) - dilations = g_node.get_attr('dilations', AT.LIST_INT) * 2 - pads = g_node.get_attr('pads', AT.LIST_INT) - if pads == [0, 0]: - pads = [0, 0, 0, 0] - if pads == [1, 1]: - pads = [0, 1, 0, 1] - strides = g_node.get_attr('strides', AT.LIST_INT) * 2 - g_node.set_attr({"kernel_shape": (AT.LIST_INT, kernel_shape)}) - g_node.set_attr({"dilations": (AT.LIST_INT, dilations)}) - g_node.set_attr({"pads": (AT.LIST_INT, pads)}) - g_node.set_attr({"strides": (AT.LIST_INT, strides)}) - new_weight_node = mod.add_const_node(f"const_weight_{rng}", np.expand_dims(weight_value, axis=2)) - mod.node_replace(weight_node, new_weight_node) - - g_node = mod.get_node("MaxPool_13") - rng = np.random.randint(0, 7393) - kernel_shape = [1] + g_node.get_attr('kernel_shape', AT.LIST_INT) - pads = g_node.get_attr('pads', AT.LIST_INT) * 2 - strides = g_node.get_attr('strides', AT.LIST_INT) * 2 - g_node.set_attr({"kernel_shape": (AT.LIST_INT, kernel_shape), - "dilations": (AT.LIST_INT, dilations), - "pads": (AT.LIST_INT, pads), - "strides": (AT.LIST_INT, strides)}) - - # NCHW -> NCD - res_node = mod.get_node('MaxPool_13') - squeeze_node = mod.add_new_node(f"Squeeze_{np.random.randint(0, 7393)}", "Squeeze", - {"axes": (AT.LIST_INT, [2])}) - squeeze_node.set_input_node(0, [res_node]) - after_res_node = mod.get_node(io_map.get(res_node.name)[0]) - after_res_node.set_input_node(0, [squeeze_node]) - - # NCD -> NCHW - g_nodes = mod.get_nodes_by_optype("Conv") - for g_node in g_nodes: - if g_node.name != "Conv_11" and mod.get_node(g_node.input_name[0]).op_type != "LeakyRelu": - rng = np.random.randint(0, 7393) - unsqueeze_node = mod.add_new_node(f"Unsqueeze_{rng}", "Unsqueeze", - {"axes": (AT.LIST_INT, [2])}) - - before_g_node = mod.get_node(g_node.input_name[0]) - w_node = mod.get_node(g_node.input_name[1]) - if len(g_node.input_name) == 2: - g_node.set_input_node(0, [unsqueeze_node, w_node]) - else: - b_node = mod.get_node(g_node.input_name[2]) - g_node.set_input_node(0, [unsqueeze_node, w_node, b_node]) - unsqueeze_node.set_input_node(0, [before_g_node]) - - # NCHW -> NCD - g_nodes = mod.get_nodes_by_optype("Add") - for g_node in g_nodes: - Add_b0 = mod.get_node(g_node.input_name[0]) - Add_b1 = mod.get_node(g_node.input_name[1]) - if mod.get_node(Add_b0.input_name[0]).op_type == "LeakyRelu": - rng = np.random.randint(0, 7393) - if Add_b1.op_type != "Conv": - unsqueeze_node = mod.add_new_node(f"Unsqueeze_{rng}", "Unsqueeze", - {"axes": (AT.LIST_INT, [2])}) - g_node.set_input_node(0, [unsqueeze_node, Add_b0]) - unsqueeze_node.set_input_node(0, [Add_b1]) - - squeeze_node = mod.add_new_node(f"Squeeze_{rng}", "Squeeze", - {"axes": (AT.LIST_INT, [2])}) - squeeze_node.set_input_node(0, [g_node]) - after_g_node = mod.get_node(io_map.get(g_node.name)[0]) - after_g_node.set_input_node(0, [squeeze_node]) - - -def make_model(input_onnx, output_onnx): - mod = OXGraph(input_onnx) - io_map = mod.get_net_in_out_map() - - # solve accuracy problem - gather_nodes = mod.get_nodes_by_optype("Gather") - for g_node in gather_nodes: - if g_node.name == 'Gather_203': - indices_node = mod.add_const_node(f'Const_{g_node.input_name[1]}', np.array(28).astype('int64')) - g_node.set_input_node(1, [indices_node]) - - # NCD -> NCHW - shape_dim_extend(mod, io_map) - - # conv split - conv_node = mod.get_node("Conv_11") - weight_node = mod.get_node(conv_node.input_name[1]) - weight_value = weight_node.const_value - - np.random.seed(1737) - KSZ = weight_value.shape[-1] - CHK_SZ = 128 - CHK_N = KSZ // CHK_SZ - wgt = [] - for i in range(CHK_N): - wgt.append(weight_value[:, :, :, CHK_SZ * i:CHK_SZ * (i + 1)]) - if KSZ % CHK_SZ != 0: - wgt.append(weight_value[:, :, :, CHK_SZ * CHK_N:]) - - rwn_node = [] - for i, w in enumerate(wgt): - node = make_conv2d_split_node(mod, conv_node, w, i, CHK_SZ, KSZ) - rwn_node.append(node) - - in_node_list = copy.deepcopy(rwn_node[:CHK_N]) - out_node_list = [] - combin_len = CHK_N - while len(in_node_list) > 1: - for j in range(0, combin_len, 2): - add_node = mod.add_new_node(f"Add_{np.random.randint(0, 7393)}", "Add") - add_node.set_input_node(0, [in_node_list[j], in_node_list[j + 1]]) - out_node_list.append(add_node) - in_node_list = copy.deepcopy(out_node_list) - out_node_list.clear() - combin_len //= 2 - - # add all result - if KSZ % CHK_SZ != 0: - add_node = mod.add_new_node(f"Add_{np.random.randint(0, 7393)}", "Add") - add_node.set_input_node(0, [in_node_list[0], rwn_node[-1]]) - else: - add_node = in_node_list[0] - - # relink - after_node = mod.get_node(io_map.get(conv_node.name)[0]) - after_node.set_input_node(0, [add_node]) - - # remove ori node - mod.node_remove([conv_node.name]) - mod.save_new_model(output_onnx) - - -def get_parser(): - parser = argparse.ArgumentParser(description='RawNet2') - parser.add_argument('--input_onnx', default=None, type=str, - help='input original onnx') - parser.add_argument('--output_onnx', default=None, type=str, - help='output modified onnx') - return parser - - -if __name__ == "__main__": - ''' - Example: - python3.7 modify_onnx.py \ - --input_onnx=rawnet2_sim.onnx \ - --output_onnx=rawnet2_modify.onnx - ''' - parser = get_parser() - args = parser.parse_args() - make_model(args.input_onnx, args.output_onnx) - print("modify successfully!") +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding:utf-8 -*- + +import argparse +import numpy as np +import copy +from gener_core.mod_modify.interface import AttrType as AT +from gener_core.mod_modify.onnx_graph import OXGraph + + +def make_conv2d_split_node(mod, conv_node, weight, chk_idx, chk_sz, ksz): + x_node = mod.get_node(conv_node.input_name[0]) + + # slice + rng = np.random.randint(0, 7393) + begin = chk_sz * chk_idx + end = -(ksz - chk_sz) + chk_sz * chk_idx + if end >= 0: + end = np.iinfo(np.int32).max + begin1_node = mod.add_const_node(f"const_begin_{rng}", np.array([begin], np.int32)) + end1_node = mod.add_const_node(f"const_end_{rng}", np.array([end], np.int32)) + axes_node = mod.add_const_node(f"const_axes_{rng}", np.array([-1], np.int32)) + step_node = mod.add_const_node(f"const_step_{rng}", np.array([1], np.int32)) + slice1_node = mod.add_new_node(f"Slice_{rng}", "Slice") + slice1_node.set_input_node(0, [x_node, begin1_node, end1_node, axes_node, step_node]) + + # conv + conv1_node = mod.add_new_node(f"Conv_{np.random.randint(0, 7393)}", "Conv", + {"dilations": (AT.LIST_INT, [1, 1]), + "group": (AT.INT, 1), + "kernel_shape": (AT.LIST_INT, [1, weight.shape[-1]]), + "pads": (AT.LIST_INT, [0, 0, 0, 0]), + "strides": (AT.LIST_INT, [1, 1]), }) + w1_node = mod.add_const_node(f"weight_{np.random.randint(0, 7393)}", weight) + conv1_node.set_input_node(0, [slice1_node, w1_node]) + + return conv1_node + + +def shape_dim_extend(mod, io_map): + # NCD -> NCHW + rshp_node = mod.get_node("Reshape_9") + shape_node = mod.get_node(rshp_node.input_name[1]) + shape_value = shape_node.const_value + rng = np.random.randint(0, 7393) + shape_value = np.insert(shape_value, 2, 1) + new_shape_node = mod.add_const_node(f"const_shape_{rng}", shape_value.astype(np.int32)) + mod.node_replace(shape_node, new_shape_node) + + # modify all nodes for conv and maxpool + g_nodes = mod.get_nodes_by_optype("Conv") + for g_node in g_nodes: + weight_node = mod.get_node(g_node.input_name[1]) + weight_value = weight_node.const_value + + if len(weight_value.shape) == 3: + rng = np.random.randint(0, 7393) + kernel_shape = [1] + g_node.get_attr('kernel_shape', AT.LIST_INT) + dilations = g_node.get_attr('dilations', AT.LIST_INT) * 2 + pads = g_node.get_attr('pads', AT.LIST_INT) + if pads == [0, 0]: + pads = [0, 0, 0, 0] + if pads == [1, 1]: + pads = [0, 1, 0, 1] + strides = g_node.get_attr('strides', AT.LIST_INT) * 2 + g_node.set_attr({"kernel_shape": (AT.LIST_INT, kernel_shape)}) + g_node.set_attr({"dilations": (AT.LIST_INT, dilations)}) + g_node.set_attr({"pads": (AT.LIST_INT, pads)}) + g_node.set_attr({"strides": (AT.LIST_INT, strides)}) + new_weight_node = mod.add_const_node(f"const_weight_{rng}", np.expand_dims(weight_value, axis=2)) + mod.node_replace(weight_node, new_weight_node) + + g_node = mod.get_node("MaxPool_13") + rng = np.random.randint(0, 7393) + kernel_shape = [1] + g_node.get_attr('kernel_shape', AT.LIST_INT) + pads = g_node.get_attr('pads', AT.LIST_INT) * 2 + strides = g_node.get_attr('strides', AT.LIST_INT) * 2 + g_node.set_attr({"kernel_shape": (AT.LIST_INT, kernel_shape), + "dilations": (AT.LIST_INT, dilations), + "pads": (AT.LIST_INT, pads), + "strides": (AT.LIST_INT, strides)}) + + # NCHW -> NCD + res_node = mod.get_node('MaxPool_13') + squeeze_node = mod.add_new_node(f"Squeeze_{np.random.randint(0, 7393)}", "Squeeze", + {"axes": (AT.LIST_INT, [2])}) + squeeze_node.set_input_node(0, [res_node]) + after_res_node = mod.get_node(io_map.get(res_node.name)[0]) + after_res_node.set_input_node(0, [squeeze_node]) + + # NCD -> NCHW + g_nodes = mod.get_nodes_by_optype("Conv") + for g_node in g_nodes: + if g_node.name != "Conv_11" and mod.get_node(g_node.input_name[0]).op_type != "LeakyRelu": + rng = np.random.randint(0, 7393) + unsqueeze_node = mod.add_new_node(f"Unsqueeze_{rng}", "Unsqueeze", + {"axes": (AT.LIST_INT, [2])}) + + before_g_node = mod.get_node(g_node.input_name[0]) + w_node = mod.get_node(g_node.input_name[1]) + if len(g_node.input_name) == 2: + g_node.set_input_node(0, [unsqueeze_node, w_node]) + else: + b_node = mod.get_node(g_node.input_name[2]) + g_node.set_input_node(0, [unsqueeze_node, w_node, b_node]) + unsqueeze_node.set_input_node(0, [before_g_node]) + + # NCHW -> NCD + g_nodes = mod.get_nodes_by_optype("Add") + for g_node in g_nodes: + Add_b0 = mod.get_node(g_node.input_name[0]) + Add_b1 = mod.get_node(g_node.input_name[1]) + if mod.get_node(Add_b0.input_name[0]).op_type == "LeakyRelu": + rng = np.random.randint(0, 7393) + if Add_b1.op_type != "Conv": + unsqueeze_node = mod.add_new_node(f"Unsqueeze_{rng}", "Unsqueeze", + {"axes": (AT.LIST_INT, [2])}) + g_node.set_input_node(0, [unsqueeze_node, Add_b0]) + unsqueeze_node.set_input_node(0, [Add_b1]) + + squeeze_node = mod.add_new_node(f"Squeeze_{rng}", "Squeeze", + {"axes": (AT.LIST_INT, [2])}) + squeeze_node.set_input_node(0, [g_node]) + after_g_node = mod.get_node(io_map.get(g_node.name)[0]) + after_g_node.set_input_node(0, [squeeze_node]) + + +def make_model(input_onnx, output_onnx): + mod = OXGraph(input_onnx) + io_map = mod.get_net_in_out_map() + + # solve accuracy problem + gather_nodes = mod.get_nodes_by_optype("Gather") + for g_node in gather_nodes: + if g_node.name == 'Gather_203': + indices_node = mod.add_const_node(f'Const_{g_node.input_name[1]}', np.array(28).astype('int64')) + g_node.set_input_node(1, [indices_node]) + + # NCD -> NCHW + shape_dim_extend(mod, io_map) + + # conv split + conv_node = mod.get_node("Conv_11") + weight_node = mod.get_node(conv_node.input_name[1]) + weight_value = weight_node.const_value + + np.random.seed(1737) + KSZ = weight_value.shape[-1] + CHK_SZ = 128 + CHK_N = KSZ // CHK_SZ + wgt = [] + for i in range(CHK_N): + wgt.append(weight_value[:, :, :, CHK_SZ * i:CHK_SZ * (i + 1)]) + if KSZ % CHK_SZ != 0: + wgt.append(weight_value[:, :, :, CHK_SZ * CHK_N:]) + + rwn_node = [] + for i, w in enumerate(wgt): + node = make_conv2d_split_node(mod, conv_node, w, i, CHK_SZ, KSZ) + rwn_node.append(node) + + in_node_list = copy.deepcopy(rwn_node[:CHK_N]) + out_node_list = [] + combin_len = CHK_N + while len(in_node_list) > 1: + for j in range(0, combin_len, 2): + add_node = mod.add_new_node(f"Add_{np.random.randint(0, 7393)}", "Add") + add_node.set_input_node(0, [in_node_list[j], in_node_list[j + 1]]) + out_node_list.append(add_node) + in_node_list = copy.deepcopy(out_node_list) + out_node_list.clear() + combin_len //= 2 + + # add all result + if KSZ % CHK_SZ != 0: + add_node = mod.add_new_node(f"Add_{np.random.randint(0, 7393)}", "Add") + add_node.set_input_node(0, [in_node_list[0], rwn_node[-1]]) + else: + add_node = in_node_list[0] + + # relink + after_node = mod.get_node(io_map.get(conv_node.name)[0]) + after_node.set_input_node(0, [add_node]) + + # remove ori node + mod.node_remove([conv_node.name]) + mod.save_new_model(output_onnx) + + +def get_parser(): + parser = argparse.ArgumentParser(description='RawNet2') + parser.add_argument('--input_onnx', default=None, type=str, + help='input original onnx') + parser.add_argument('--output_onnx', default=None, type=str, + help='output modified onnx') + return parser + + +if __name__ == "__main__": + ''' + Example: + python3.7 modify_onnx.py \ + --input_onnx=rawnet2_sim.onnx \ + --output_onnx=rawnet2_modify.onnx + ''' + parser = get_parser() + args = parser.parse_args() + make_model(args.input_onnx, args.output_onnx) + print("modify successfully!") diff --git a/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/om_infer.py b/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/om_infer.py index 13909b374ec03a49c13ab3e2317b5d3932447a86..750f07d2b19851b09577ea8b28cec653b3b83aee 100644 --- a/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/om_infer.py +++ b/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/om_infer.py @@ -1,160 +1,160 @@ -# Copyright 2018 NVIDIA Corporation. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import argparse -import time -import math -import numpy as np -from tqdm import tqdm -import librosa -import torch -import torch.nn.functional as F -from torch import Tensor -from torch.utils.data import Dataset -from torch.utils.data import DataLoader -from acl_net import Net - - -def genSpoof_list(dir_meta): - d_meta = {} - file_list = [] - with open(dir_meta, 'r') as f: - l_meta = f.readlines() - - for line in l_meta: - key = line.strip().split(' ')[1] - file_list.append(key) - return file_list - - -def pad(x, max_len=64600): - x_len = x.shape[0] - if x_len >= max_len: - return x[:max_len] - # need to pad - num_repeats = int(max_len / x_len) + 1 - padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0] - return padded_x - - -class Dataset_ASVspoof2019_eval(Dataset): - def __init__(self, list_IDs, base_dir): - '''self.list_IDs : list of strings (each string: utt key)''' - - self.list_IDs = list_IDs - self.base_dir = base_dir - - def __len__(self): - return len(self.list_IDs) - - def __getitem__(self, index): - self.cut = 64600 # take ~4 sec audio (64600 samples) - key = self.list_IDs[index] - X, fs = librosa.load(self.base_dir + 'flac/' + key + '.flac', sr=16000) - X_pad = pad(X, self.cut) - x_inp = Tensor(X_pad) - return x_inp, key - - -def get_parser(): - parser = argparse.ArgumentParser(description='RawNet2') - parser.add_argument('--batch_size', type=int, default=128, - help='batch size') - parser.add_argument('--device_id', type=int, default=1, - help='device id') - parser.add_argument('--om_path', type=str, default="rawnet2.om", - help='path to the om model') - parser.add_argument('--eval_output', type=str, default=None, - help='Path to save the evaluation result') - parser.add_argument('--database_path', type=str, default='/your/path/to/data/ASVspoof_database/', - help='Change this to user\'s full directory address of LA database.') - ''' - % database_path/ - % |- ASVspoof2021_LA_eval/flac - % |- ASVspoof2019_LA_train/flac - % |- ASVspoof2019_LA_dev/flac - ''' - parser.add_argument('--protocols_path', type=str, default='/your/path/to/protocols/ASVspoof_database/', - help='Change with path to user\'s LA database protocols directory address') - ''' - % protocols_path/ - % |- ASVspoof_LA_cm_protocols - % |- ASVspoof2021.LA.cm.eval.trl.txt - % |- ASVspoof2019.LA.cm.dev.trl.txt - % |- ASVspoof2019.LA.cm.train.trn.txt - ''' - return parser - - -if __name__ == "__main__": - ''' - Example: - python3.7 om_infer.py \ - --batch_size=1 \ - --om_path=rawnet2_modify.om \ - --eval_output='rawnet2_modify_om.txt' \ - --database_path='data/LA/' \ - --protocols_path='data/LA/' - ''' - parser = get_parser() - args = parser.parse_args() - - # Load dataset - protocal_dir = os.path.join(args.protocols_path + 'ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.eval.trl.txt') - file_eval = genSpoof_list(protocal_dir) - database_dir = os.path.join(args.database_path + 'ASVspoof2019_LA_eval/') - eval_set = Dataset_ASVspoof2019_eval(list_IDs=file_eval, base_dir=database_dir) - eval_loader = DataLoader(eval_set, batch_size=args.batch_size, shuffle=False, drop_last=False) - - model = Net(device_id=args.device_id, model_path=args.om_path) - - # Evaluation for RawNet2 om model - with open(args.eval_output, 'w+') as fh: - for idx, (batch_x, utt_id) in tqdm(enumerate(eval_loader)): - fname_list = [] - score_list = [] - n, d = batch_x.shape - if n != args.batch_size: - m = (0, 0, 0, args.batch_size - n) - batch_x = F.pad(batch_x, m, 'constant', 0) - batch_x = batch_x.numpy().astype(np.float32) - batch_out = model(batch_x) - batch_out = torch.from_numpy(np.array(batch_out).astype(np.float32)) - batch_score = (batch_out[:, :, 1]).data.cpu().numpy().ravel() - - # add outputs - if len(batch_score) != len(utt_id): - batch_score = batch_score[:len(utt_id)] - fname_list.extend(utt_id) - score_list.extend(batch_score.tolist()) - - for f, cm in zip(fname_list, score_list): - fh.write('{} {}\n'.format(f, cm)) +# Copyright 2018 NVIDIA Corporation. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import argparse +import time +import math +import numpy as np +from tqdm import tqdm +import librosa +import torch +import torch.nn.functional as F +from torch import Tensor +from torch.utils.data import Dataset +from torch.utils.data import DataLoader +from acl_net import Net + + +def genSpoof_list(dir_meta): + d_meta = {} + file_list = [] + with open(dir_meta, 'r') as f: + l_meta = f.readlines() + + for line in l_meta: + key = line.strip().split(' ')[1] + file_list.append(key) + return file_list + + +def pad(x, max_len=64600): + x_len = x.shape[0] + if x_len >= max_len: + return x[:max_len] + # need to pad + num_repeats = int(max_len / x_len) + 1 + padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0] + return padded_x + + +class Dataset_ASVspoof2019_eval(Dataset): + def __init__(self, list_IDs, base_dir): + '''self.list_IDs : list of strings (each string: utt key)''' + + self.list_IDs = list_IDs + self.base_dir = base_dir + + def __len__(self): + return len(self.list_IDs) + + def __getitem__(self, index): + self.cut = 64600 # take ~4 sec audio (64600 samples) + key = self.list_IDs[index] + X, fs = librosa.load(self.base_dir + 'flac/' + key + '.flac', sr=16000) + X_pad = pad(X, self.cut) + x_inp = Tensor(X_pad) + return x_inp, key + + +def get_parser(): + parser = argparse.ArgumentParser(description='RawNet2') + parser.add_argument('--batch_size', type=int, default=128, + help='batch size') + parser.add_argument('--device_id', type=int, default=1, + help='device id') + parser.add_argument('--om_path', type=str, default="rawnet2.om", + help='path to the om model') + parser.add_argument('--eval_output', type=str, default=None, + help='Path to save the evaluation result') + parser.add_argument('--database_path', type=str, default='/your/path/to/data/ASVspoof_database/', + help='Change this to user\'s full directory address of LA database.') + ''' + % database_path/ + % |- ASVspoof2021_LA_eval/flac + % |- ASVspoof2019_LA_train/flac + % |- ASVspoof2019_LA_dev/flac + ''' + parser.add_argument('--protocols_path', type=str, default='/your/path/to/protocols/ASVspoof_database/', + help='Change with path to user\'s LA database protocols directory address') + ''' + % protocols_path/ + % |- ASVspoof_LA_cm_protocols + % |- ASVspoof2021.LA.cm.eval.trl.txt + % |- ASVspoof2019.LA.cm.dev.trl.txt + % |- ASVspoof2019.LA.cm.train.trn.txt + ''' + return parser + + +if __name__ == "__main__": + ''' + Example: + python3.7 om_infer.py \ + --batch_size=1 \ + --om_path=rawnet2_modify.om \ + --eval_output='rawnet2_modify_om.txt' \ + --database_path='data/LA/' \ + --protocols_path='data/LA/' + ''' + parser = get_parser() + args = parser.parse_args() + + # Load dataset + protocal_dir = os.path.join(args.protocols_path + 'ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.eval.trl.txt') + file_eval = genSpoof_list(protocal_dir) + database_dir = os.path.join(args.database_path + 'ASVspoof2019_LA_eval/') + eval_set = Dataset_ASVspoof2019_eval(list_IDs=file_eval, base_dir=database_dir) + eval_loader = DataLoader(eval_set, batch_size=args.batch_size, shuffle=False, drop_last=False) + + model = Net(device_id=args.device_id, model_path=args.om_path) + + # Evaluation for RawNet2 om model + with open(args.eval_output, 'w+') as fh: + for idx, (batch_x, utt_id) in tqdm(enumerate(eval_loader)): + fname_list = [] + score_list = [] + n, d = batch_x.shape + if n != args.batch_size: + m = (0, 0, 0, args.batch_size - n) + batch_x = F.pad(batch_x, m, 'constant', 0) + batch_x = batch_x.numpy().astype(np.float32) + batch_out = model(batch_x) + batch_out = torch.from_numpy(np.array(batch_out).astype(np.float32)) + batch_score = (batch_out[:, :, 1]).data.cpu().numpy().ravel() + + # add outputs + if len(batch_score) != len(utt_id): + batch_score = batch_score[:len(utt_id)] + fname_list.extend(utt_id) + score_list.extend(batch_score.tolist()) + + for f, cm in zip(fname_list, score_list): + fh.write('{} {}\n'.format(f, cm)) print('Scores saved to {}'.format(args.eval_output)) \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/pth2onnx.py b/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/pth2onnx.py index df6b221b5809fa822ac5073b7d43cc003bb6f220..297f2dcf12feb0ccbc26e8692d4be2b92e677cd2 100644 --- a/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/pth2onnx.py +++ b/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch/pth2onnx.py @@ -1,69 +1,69 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -import argparse -import yaml -import torch -import onnx -import torch.onnx -from model import RawNet - - -def convert(pth_model, onnx_model, batch_size): - dir_yaml = os.path.splitext('model_config_RawNet')[0] + '.yaml' - with open(dir_yaml, 'r') as f_yaml: - parser1 = yaml.load(f_yaml) - model = RawNet(parser1['model'], 'cpu') - checkpoint = torch.load(pth_model, map_location='cpu') - model.load_state_dict(checkpoint) - model.eval() - - input_names = ["input"] - output_names = ["output"] - dummy_input = torch.randn(int(batch_size), 64600) - print('\nStarting ONNX export with onnx %s...' % onnx.__version__) - torch.onnx.export(model, dummy_input, onnx_model, - input_names=input_names, output_names=output_names, - opset_version=11, - dynamic_axes=None, - export_params=True, - verbose=True, - do_constant_folding=True) - - -def get_parser(): - parser = argparse.ArgumentParser(description='RawNet2') - parser.add_argument('--pth_model', default=None, type=str, - help='Path to pytorch model') - parser.add_argument('--onnx_model', default=None, type=str, - help='Path to onnx model') - parser.add_argument('--batch_size', default=1, type=int, - help='Data batch size') - return parser - - -if __name__ == "__main__": - ''' - Example: - python3.7 pth2onnx.py \ - --pth_model=pre_trained_DF_RawNet2.pth \ - --onnx_model=rawnet2_ori.onnx \ - --batch_size=1 - ''' - parser = get_parser() - args = parser.parse_args() - convert(args.pth_model, args.onnx_model, args.batch_size) - print('pytorch to onnx successfully!') +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import argparse +import yaml +import torch +import onnx +import torch.onnx +from model import RawNet + + +def convert(pth_model, onnx_model, batch_size): + dir_yaml = os.path.splitext('model_config_RawNet')[0] + '.yaml' + with open(dir_yaml, 'r') as f_yaml: + parser1 = yaml.load(f_yaml) + model = RawNet(parser1['model'], 'cpu') + checkpoint = torch.load(pth_model, map_location='cpu') + model.load_state_dict(checkpoint) + model.eval() + + input_names = ["input"] + output_names = ["output"] + dummy_input = torch.randn(int(batch_size), 64600) + print('\nStarting ONNX export with onnx %s...' % onnx.__version__) + torch.onnx.export(model, dummy_input, onnx_model, + input_names=input_names, output_names=output_names, + opset_version=11, + dynamic_axes=None, + export_params=True, + verbose=True, + do_constant_folding=True) + + +def get_parser(): + parser = argparse.ArgumentParser(description='RawNet2') + parser.add_argument('--pth_model', default=None, type=str, + help='Path to pytorch model') + parser.add_argument('--onnx_model', default=None, type=str, + help='Path to onnx model') + parser.add_argument('--batch_size', default=1, type=int, + help='Data batch size') + return parser + + +if __name__ == "__main__": + ''' + Example: + python3.7 pth2onnx.py \ + --pth_model=pre_trained_DF_RawNet2.pth \ + --onnx_model=rawnet2_ori.onnx \ + --batch_size=1 + ''' + parser = get_parser() + args = parser.parse_args() + convert(args.pth_model, args.onnx_model, args.batch_size) + print('pytorch to onnx successfully!') diff --git a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/LICENSE b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/LICENSE index 4ba4fdcab3dbdb4d64ce4cccdfd990698b4d596a..a0e03103591c1158a839681f3c404ee9118b182e 100644 --- a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/LICENSE +++ b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/LICENSE @@ -1,29 +1,29 @@ -BSD 3-Clause License - -Copyright (c) 2017, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +BSD 3-Clause License + +Copyright (c) 2017, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/ReadMe.md b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/ReadMe.md index 4ea7fca8b3b280b392827ae443e4193f3a0f22cc..d961290140ed39e2d0f350d430b8410af699591a 100644 --- a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/ReadMe.md +++ b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/ReadMe.md @@ -1,59 +1,59 @@ -# TDNN模型pytorch离线推理指导 - -## 1 环境准备 - -1.获取,修改与安装开源模型代码 - -```shell -git clone https://github.com/speechbrain/speechbrain.git -cd speechbrain -git checkout develop -git reset --hard 51a2becdcf3a337578a9307a0b2fc3906bf20391 -export PYTHONPATH=`pwd`:$PYTHONPATH -cd .. -git clone https://gitee.com/Ronnie_zheng/MagicONNX.git -cd MagicONNX && git checkout 8d62ae9dde478f35bece4b3d04eef573448411c9 -pip install . -``` -将源码包中文件放入speechbrain/templates/speaker_id中 -```shell -cd speechbrain -git apply --reject --whitespace=fix templates/speaker_id/modify.patch -``` - -2.获取权重文件 - -https://www.hiascend.com/zh/software/modelzoo/detail/1/f4f4103245624c1a8637f8a5eadd950c -将模型权重文件夹best_model放入speechbrain/templates/speaker_id下,将hyperparams.yaml文件放入best_model中 - -3.获取数据集 - -预处理阶段自动下载 -```shell -python3 tdnn_preprocess.py -``` - -## 2 模型转换 -```shell -# 生成tdnn_bs64.onnx -python3 tdnn_pth2onnx.py 64 -# 优化onnx模型 -python3 -m onnxsim tdnn_bs64.onnx tdnn_bs64s.onnx -python3 modify_onnx.py tdnn_bs64s.onnx -# 生成om模型 -bash atc.sh tdnn_bs64s.onnx -``` - -## 3 离线推理 - -```shell -bash om_infer.sh 64 -python3 tdnn_postprocess.py -``` -**评测结果:** - -由于TensorRT不支持原模型,故只能对比修改后的模型性能。 -| 模型 | pth精度 | 710离线推理精度 | 基准性能 | 710性能 | -| :------: | :------: | :------: | :------: | :------: | -| TDNN bs64 | 99.93% | 99.93% | - | 2467fps | +# TDNN模型pytorch离线推理指导 + +## 1 环境准备 + +1.获取,修改与安装开源模型代码 + +```shell +git clone https://github.com/speechbrain/speechbrain.git +cd speechbrain +git checkout develop +git reset --hard 51a2becdcf3a337578a9307a0b2fc3906bf20391 +export PYTHONPATH=`pwd`:$PYTHONPATH +cd .. +git clone https://gitee.com/Ronnie_zheng/MagicONNX.git +cd MagicONNX && git checkout 8d62ae9dde478f35bece4b3d04eef573448411c9 +pip install . +``` +将源码包中文件放入speechbrain/templates/speaker_id中 +```shell +cd speechbrain +git apply --reject --whitespace=fix templates/speaker_id/modify.patch +``` + +2.获取权重文件 + +https://www.hiascend.com/zh/software/modelzoo/detail/1/f4f4103245624c1a8637f8a5eadd950c +将模型权重文件夹best_model放入speechbrain/templates/speaker_id下,将hyperparams.yaml文件放入best_model中 + +3.获取数据集 + +预处理阶段自动下载 +```shell +python3 tdnn_preprocess.py +``` + +## 2 模型转换 +```shell +# 生成tdnn_bs64.onnx +python3 tdnn_pth2onnx.py 64 +# 优化onnx模型 +python3 -m onnxsim tdnn_bs64.onnx tdnn_bs64s.onnx +python3 modify_onnx.py tdnn_bs64s.onnx +# 生成om模型 +bash atc.sh tdnn_bs64s.onnx +``` + +## 3 离线推理 + +```shell +bash om_infer.sh 64 +python3 tdnn_postprocess.py +``` +**评测结果:** + +由于TensorRT不支持原模型,故只能对比修改后的模型性能。 +| 模型 | pth精度 | 710离线推理精度 | 基准性能 | 710性能 | +| :------: | :------: | :------: | :------: | :------: | +| TDNN bs64 | 99.93% | 99.93% | - | 2467fps | | TDNN修改 bs64 | - | - | 2345.179 fps | 3815.886fps | \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/mo.py b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/mo.py index cad5851dcb001b21baaa792a87cb8fd18a9d4c89..0b4dee7f3b3c5eb6fe85fb03c032ee7484634553 100644 --- a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/mo.py +++ b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/mo.py @@ -1,32 +1,32 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys - -from magiconnx import OnnxGraph - -bs = sys.argv[1] -model_name = 'tdnn_bs%s'%bs -graph = OnnxGraph(model_name+'.onnx') -ph = graph.add_placeholder('random','float32',[64,1500]) - -rm = graph.get_nodes("ReduceMin")[0] -rm.inputs = ['random'] -sub = graph.get_nodes("Sub")[-1] -sub.inputs = ['random', rm.outputs[0]] - -rn = graph.get_nodes("RandomNormalLike")[0] -graph.del_node(rn.name, auto_connection=False) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +from magiconnx import OnnxGraph + +bs = sys.argv[1] +model_name = 'tdnn_bs%s'%bs +graph = OnnxGraph(model_name+'.onnx') +ph = graph.add_placeholder('random','float32',[64,1500]) + +rm = graph.get_nodes("ReduceMin")[0] +rm.inputs = ['random'] +sub = graph.get_nodes("Sub")[-1] +sub.inputs = ['random', rm.outputs[0]] + +rn = graph.get_nodes("RandomNormalLike")[0] +graph.del_node(rn.name, auto_connection=False) + graph.save('%s_mod.onnx'%model_name) \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/modelzoo_level.txt b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/modelzoo_level.txt +++ b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/modify.patch b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/modify.patch index d246c604253e17f74eadc96273b359b43f78eff1..ce78514dbd877b2af710a5fc41ba865d21b84d83 100644 --- a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/modify.patch +++ b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/modify.patch @@ -260,11 +260,11 @@ index c22add8..7a777df 100644 --- a/templates/speaker_id/mini_librispeech_prepare.py +++ b/templates/speaker_id/mini_librispeech_prepare.py @@ -171,7 +171,7 @@ def split_sets(wav_list, split_ratio): - dictionary containing train, valid, and test splits. - """ - # Random shuffle of the list -- random.shuffle(wav_list) -+ # random.shuffle(wav_list) - tot_split = sum(split_ratio) - tot_snts = len(wav_list) - data_split = {} + dictionary containing train, valid, and test splits. + """ + # Random shuffle of the list +- random.shuffle(wav_list) ++ # random.shuffle(wav_list) + tot_split = sum(split_ratio) + tot_snts = len(wav_list) + data_split = {} diff --git a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/modify_onnx.py b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/modify_onnx.py index ab8273817c401d35e61b736d66809b57365641d8..3939324a1dd5a9b237a5da0c1eddd5397e61e1b2 100644 --- a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/modify_onnx.py +++ b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/modify_onnx.py @@ -1,51 +1,51 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys - -import numpy as np -import onnx - -from magiconnx import OnnxGraph - -model_name = sys.argv[1] -graph = OnnxGraph(model_name) - -axes = onnx.helper.make_attribute("axes", [0,1]) -rd_min = graph.get_nodes("ReduceMin")[0] -rd_min._node.attribute.append(axes) -rd_max = graph.get_nodes("ReduceMax")[0] -rd_max._node.attribute.append(axes) - -us = graph.add_node('Unsq_1', 'Unsqueeze', {'axes': [2]}) -graph.insert_node(graph.get_nodes("Conv")[0].name, us, mode='before') -sq = graph.add_node('Sq_291', 'Squeeze', {'axes': [2]}) -graph.insert_node(graph.get_nodes('BatchNormalization')[4].name, sq, mode='after') - -convs = graph.get_nodes("Conv") -for conv in convs: - print(conv.name) - dil = conv['dilations'][0] - ks = conv['kernel_shape'][0] - pds = conv['pads'][0] - stri = conv['strides'][0] - conv['dilations'] = [1, dil] - conv['kernel_shape'] = [1, ks] - conv['pads'] = [0, pds, 0, pds] - conv['strides'] = [1, stri] - conv_w = graph[conv.inputs[1]].value - conv_w = np.expand_dims(conv_w, axis=-2) - graph[conv.inputs[1]].value = conv_w - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +import numpy as np +import onnx + +from magiconnx import OnnxGraph + +model_name = sys.argv[1] +graph = OnnxGraph(model_name) + +axes = onnx.helper.make_attribute("axes", [0,1]) +rd_min = graph.get_nodes("ReduceMin")[0] +rd_min._node.attribute.append(axes) +rd_max = graph.get_nodes("ReduceMax")[0] +rd_max._node.attribute.append(axes) + +us = graph.add_node('Unsq_1', 'Unsqueeze', {'axes': [2]}) +graph.insert_node(graph.get_nodes("Conv")[0].name, us, mode='before') +sq = graph.add_node('Sq_291', 'Squeeze', {'axes': [2]}) +graph.insert_node(graph.get_nodes('BatchNormalization')[4].name, sq, mode='after') + +convs = graph.get_nodes("Conv") +for conv in convs: + print(conv.name) + dil = conv['dilations'][0] + ks = conv['kernel_shape'][0] + pds = conv['pads'][0] + stri = conv['strides'][0] + conv['dilations'] = [1, dil] + conv['kernel_shape'] = [1, ks] + conv['pads'] = [0, pds, 0, pds] + conv['strides'] = [1, stri] + conv_w = graph[conv.inputs[1]].value + conv_w = np.expand_dims(conv_w, axis=-2) + graph[conv.inputs[1]].value = conv_w + graph.save(model_name) \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/requirements.txt b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/requirements.txt index 561b93c4e71366123f8b5988b9379a59bda13edc..aafb425ee5371a22665b371f6e7ef21fceb3b19d 100644 --- a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/requirements.txt +++ b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/requirements.txt @@ -1,8 +1,8 @@ -onnx==1.10.2 -torch==1.10.0 -torchaudio==0.10.2 -tqdm==4.63.0 -HyperPyYAML==1.0.0 -huggingface-hub==0.4.0 - - +onnx==1.10.2 +torch==1.10.0 +torchaudio==0.10.2 +tqdm==4.63.0 +HyperPyYAML==1.0.0 +huggingface-hub==0.4.0 + + diff --git a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_postprocess.py b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_postprocess.py index c04619873410ca49ce6ff8e75d31947718e54f85..cb0b04f187f7ee237e267f26d614b8a62314fa60 100644 --- a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_postprocess.py +++ b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_postprocess.py @@ -1,53 +1,53 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import re -import argparse -import numpy as np -label = {0:'163', 1:'7367', 2:'332', 3:'1970', 4:'4640', 5:'8629', 6:'6848', 7:'1088', 8:'460', 9:'6272', 10:'7312', 11:'2136', 12:'1867', 13:'669', 14:'3526', 15:'3664', 16:'3242', 17:'19', 18:'32', 19:'5789', 20:'118', 21:'226', 22:'7859', 23:'3947', 24:'1898', 25:'2416', 26:'1737', 27:'4680'} - -if __name__ == '__main__': - ''' - 参数说明: - --data_info: 数据集信息 - --result_dir: 二进制推理结果目录 - ''' - - # arg parser - parser = argparse.ArgumentParser() - parser.add_argument('--data_info', default='mini_librispeech_test.info') - parser.add_argument('--result_dir', default='result') - - opt = parser.parse_args() - error = 0 - total = 0 - - with open('mini_librispeech_test.info', 'r') as f: - for line in f.readlines(): - # line format example - # 0 mini_librispeech_test_bin/4680-16042-0024.bin (1,1600,23) - split = line.split(' ') - index = split[0] - input_file = split[1] - target = re.search('/(\d*)-', input_file).group()[1:-1] - output_file = opt.result_dir + '/' + index + '.0.bin' - output = np.fromfile(output_file, np.float32) - res = np.argmax(output) - print('Predicted:', label[res], 'Target:', target) - total += 1 - if label[res] != target: - error += 1 - accuracy = (total - error) / total * 100 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import argparse +import numpy as np +label = {0:'163', 1:'7367', 2:'332', 3:'1970', 4:'4640', 5:'8629', 6:'6848', 7:'1088', 8:'460', 9:'6272', 10:'7312', 11:'2136', 12:'1867', 13:'669', 14:'3526', 15:'3664', 16:'3242', 17:'19', 18:'32', 19:'5789', 20:'118', 21:'226', 22:'7859', 23:'3947', 24:'1898', 25:'2416', 26:'1737', 27:'4680'} + +if __name__ == '__main__': + ''' + 参数说明: + --data_info: 数据集信息 + --result_dir: 二进制推理结果目录 + ''' + + # arg parser + parser = argparse.ArgumentParser() + parser.add_argument('--data_info', default='mini_librispeech_test.info') + parser.add_argument('--result_dir', default='result') + + opt = parser.parse_args() + error = 0 + total = 0 + + with open('mini_librispeech_test.info', 'r') as f: + for line in f.readlines(): + # line format example + # 0 mini_librispeech_test_bin/4680-16042-0024.bin (1,1600,23) + split = line.split(' ') + index = split[0] + input_file = split[1] + target = re.search('/(\d*)-', input_file).group()[1:-1] + output_file = opt.result_dir + '/' + index + '.0.bin' + output = np.fromfile(output_file, np.float32) + res = np.argmax(output) + print('Predicted:', label[res], 'Target:', target) + total += 1 + if label[res] != target: + error += 1 + accuracy = (total - error) / total * 100 print('\nClassification Accuracy: {:.2f}%\n'.format(accuracy)) \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_preprocess.py b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_preprocess.py index dcaa76a0e84e9ad2a27781d71f371cbd4b478d57..855ec5fa537a458e96f77d43a7915a34283a0655 100644 --- a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_preprocess.py +++ b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_preprocess.py @@ -1,52 +1,52 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import json -import torchaudio -import torch.nn.functional as F -from speechbrain.pretrained import EncoderClassifier -from mini_librispeech_prepare import prepare_mini_librispeech - -prepare_mini_librispeech(data_folder='data', save_json_train='train.json', save_json_valid='valid.json', - save_json_test='test.json', split_ratio=[0, 0, 100]) - -if not os.path.exists('mini_librispeech_test_bin'): - os.makedirs('mini_librispeech_test_bin') - -file = open('mini_librispeech_test.info', 'w') -classifier = EncoderClassifier.from_hparams(source='best_model', savedir='best_model') - -with open('test.json', 'r') as f: - data_info = json.load(f) - i = 0 - - for key, value in data_info.items(): - wav_file = 'data' + value['wav'][11:] # prefix length 11 - signal, fs = torchaudio.load(wav_file) - feats = classifier.extract_feats(signal) - # pad signal - pad = (feats.shape[1] // 100 + 1) * 100 - feats.shape[1] - feats = F.pad(feats, (0,0,0,pad,0,0), value=0) - - # dump bin file - output = 'mini_librispeech_test_bin/' + value['wav'].split('/')[-1][:-4] + 'bin' - feats.numpy().tofile(output) - # write shape info - file.write(str(i) + ' ' + output + ' (' + str(feats.shape[0]) + ',' + str(feats.shape[1]) + ',' + str(feats.shape[2]) + ')') - file.write('\n') - i += 1 - - print('data preprocess done') +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json +import torchaudio +import torch.nn.functional as F +from speechbrain.pretrained import EncoderClassifier +from mini_librispeech_prepare import prepare_mini_librispeech + +prepare_mini_librispeech(data_folder='data', save_json_train='train.json', save_json_valid='valid.json', + save_json_test='test.json', split_ratio=[0, 0, 100]) + +if not os.path.exists('mini_librispeech_test_bin'): + os.makedirs('mini_librispeech_test_bin') + +file = open('mini_librispeech_test.info', 'w') +classifier = EncoderClassifier.from_hparams(source='best_model', savedir='best_model') + +with open('test.json', 'r') as f: + data_info = json.load(f) + i = 0 + + for key, value in data_info.items(): + wav_file = 'data' + value['wav'][11:] # prefix length 11 + signal, fs = torchaudio.load(wav_file) + feats = classifier.extract_feats(signal) + # pad signal + pad = (feats.shape[1] // 100 + 1) * 100 - feats.shape[1] + feats = F.pad(feats, (0,0,0,pad,0,0), value=0) + + # dump bin file + output = 'mini_librispeech_test_bin/' + value['wav'].split('/')[-1][:-4] + 'bin' + feats.numpy().tofile(output) + # write shape info + file.write(str(i) + ' ' + output + ' (' + str(feats.shape[0]) + ',' + str(feats.shape[1]) + ',' + str(feats.shape[2]) + ')') + file.write('\n') + i += 1 + + print('data preprocess done') file.close() \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_pth2onnx.py b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_pth2onnx.py index 2e830e2802450284ea025f6263c500ee60f3ecc2..b93409b45328267ea2a193869ec0bad93bd9ec67 100644 --- a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_pth2onnx.py +++ b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_pth2onnx.py @@ -1,47 +1,47 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys - -import torch -import torchaudio -from speechbrain.pretrained import EncoderClassifier - -classifier = EncoderClassifier.from_hparams(source='best_model', savedir='best_model') - -# Download Thai language sample from Omniglot -class Xvector(torch.nn.Module): - def __init__(self, model): - super().__init__() - self.classifier = model - - def forward(self, feats): - res = self.classifier.feats_classify(feats) - return res - -model = Xvector(classifier) -batch_size=int(sys.argv[1]) -feats = torch.randn([batch_size, 1800, 23]) - -torch.onnx.export( - model, - feats, - 'tdnn_bs%d.onnx'%(batch_size), - input_names=['feats'], - output_names=['output'], - export_params=True, - do_constant_folding=True, - verbose=True, - opset_version=11 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +import torch +import torchaudio +from speechbrain.pretrained import EncoderClassifier + +classifier = EncoderClassifier.from_hparams(source='best_model', savedir='best_model') + +# Download Thai language sample from Omniglot +class Xvector(torch.nn.Module): + def __init__(self, model): + super().__init__() + self.classifier = model + + def forward(self, feats): + res = self.classifier.feats_classify(feats) + return res + +model = Xvector(classifier) +batch_size=int(sys.argv[1]) +feats = torch.randn([batch_size, 1800, 23]) + +torch.onnx.export( + model, + feats, + 'tdnn_bs%d.onnx'%(batch_size), + input_names=['feats'], + output_names=['output'], + export_params=True, + do_constant_folding=True, + verbose=True, + opset_version=11 ) \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_pyacl_infer.py b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_pyacl_infer.py index 87967a5d26605f765d8ab99093101f2263a764a1..6b7a783b2ef2db3ede0f688edf129708502cb502 100644 --- a/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_pyacl_infer.py +++ b/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch/tdnn_pyacl_infer.py @@ -1,201 +1,201 @@ -# Copyright 2018 NVIDIA Corporation. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import acl -from acl_net import AclModel - -import os -import shutil -import argparse -import numpy as np -from tqdm import tqdm - -DTYPE = { - 'float32': np.float32, - 'float64': np.float64, - 'int32': np.int32, - 'int64': np.int64 -} - -if __name__ == '__main__': - ''' - 参数说明: - --model_path:模型路径 - --device_id:npu id - --cpu_run:MeasureTime类的cpu_run参数,True or False - --sync_infer:推理方式: - True:同步推理 - False:异步推理 - --workspace:类似TensorRT `workspace`参数,计算平均推理时间时排除前n次推理 - --input_info_file_path:类似benchmark的bin_info文件 - --input_dtypes:模型输入的类型,用逗号分割(`DTYPE`变量) - e.g. 模型只有一个输入:--input_dtypes=float32 - e.g. 模型有多个输入:--input_dtypes=float32,float32,float32(需要和bin_info文件多输入排列一致) - --infer_res_save_path:推理结果保存目录 - --res_save_type:推理结果保存类型,bin或npy - - info文件说明: - 因为支持动态shape,相比于benchmark的info文件,需要多加一列shape信息,e.g. - ``` - 0 ./bert_bin/input_ids_0.bin (1,512) - 0 ./bert_bin/segment_ids_0.bin (1,512) - 0 ./bert_bin/input_mask_0.bin (1,512) - 1 ./bert_bin/input_ids_1.bin (1,512) - 1 ./bert_bin/segment_ids_1.bin (1,512) - 1 ./bert_bin/input_mask_1.bin (1,512) - ``` - - Using Example: - python3.7 pyacl_infer.py \ - --model_path=./bert_base_batch_1_sim_auto.om \ - --device_id=0 \ - --cpu_run=True \ - --sync_infer=True \ - --workspace=10 \ - --input_info_file_path=./input.info \ - --input_dtypes=int64,int64,int64 \ - --infer_res_save_path=./infer_res \ - --res_save_type=bin - ''' - - # 参数解析 - parser = argparse.ArgumentParser() - parser.add_argument('--model_path', required=True) - parser.add_argument('--batch_size', required=True) - parser.add_argument('--device_id', required=True, type=int) - parser.add_argument('--cpu_run', required=True, choices=['True', 'False']) - parser.add_argument('--sync_infer', required=True, choices=['True', 'False']) - parser.add_argument('--workspace', required=True, type=int) - parser.add_argument('--input_info_file_path', required=True) - parser.add_argument('--input_dtypes', required=True) - parser.add_argument('--infer_res_save_path', required=True) - parser.add_argument('--res_save_type', required=True, choices=['bin', 'npy']) - opt = parser.parse_args() - - # 创建模型 - measurements = {} - om_model = AclModel(device_id=opt.device_id, - model_path=opt.model_path, - sync_infer=eval(opt.sync_infer), - measurements=measurements, - key='per_infer_time_ns', - cpu_run=eval(opt.cpu_run)) - - # 创建目录 - if os.path.exists(opt.infer_res_save_path): - shutil.rmtree(opt.infer_res_save_path) - os.makedirs(opt.infer_res_save_path) - - # 读取info_file - inputs_info = {} - with open(opt.input_info_file_path, 'rt', encoding='utf-8') as f_info: - line = f_info.readline() - while line: - line = line.rstrip('\n') - contents = line.split() - info = {'path': contents[1], 'shape': eval(contents[2])} - inputs_info.setdefault(contents[0], []).append(info) - line = f_info.readline() - - # 解析输入类型 - input_dtypes = opt.input_dtypes.split(',') - input_dtypes = list(map(lambda x: DTYPE[x], input_dtypes)) - - # 读取文件推理 - total_infer_time = 0 - total_infer_time_workspace = 0 - total_infer_num = 0 - dataset = {} - dims_infos = {} - bs = int(opt.batch_size) - for key, values in inputs_info.items(): - # 构造输入 - inputs = [] - dims = [] - for idx, value in enumerate(values): - x = np.fromfile(value['path'], dtype=input_dtypes[idx]).reshape(value['shape']) - inputs.append((key,x)) - dims.extend((bs, value['shape'][1], value['shape'][2])) - dims_info = {'dimCount': len(dims), 'name': '', 'dims': dims} - - # (1, 1500, 23) {'dimCount': 3, 'name': '', 'dims': [1, 1500, 23]} - length = inputs[0][1].shape[1] - dataset[length] = dataset.get(length,[]) + inputs - dims_infos[length] = dims_infos.get(length,dims_info) - - total_inputs = [] - total_keys = [] - for k in sorted(dataset.keys()): - total_len = len(dataset[k]) - batch_input = [] - batch_key = [] - for i, (key, ipt) in enumerate(dataset[k]): - batch_input.append(ipt) - batch_key.append(key) - if (i+1) % bs == 0: - total_inputs.append(batch_input) - total_keys.append(batch_key) - batch_input = [] - batch_key = [] - if batch_input != []: - total_inputs.append(batch_input) - total_keys.append(batch_key) - - for i, b_ipt in tqdm(enumerate(total_inputs)): - batch_input = np.squeeze(np.array(b_ipt), axis=1) - if batch_input.shape[0] < bs: - batch_input = np.pad(batch_input, [(0, bs-batch_input.shape[0]), (0, 0), (0, 0)], mode='constant') - - # 推理得到输出 - # (bs, 28) - output = om_model([batch_input], dims_infos[batch_input.shape[1]]) - - total_infer_num += 1 - - # 保存文件 - for j, key in enumerate(total_keys[i]): - if opt.res_save_type == 'bin': - output[0][j].tofile(os.path.join(opt.infer_res_save_path, key + '.' + str(0) + '.bin')) - else: - np.save(os.path.join(opt.infer_res_save_path, key + '.' + str(0) + '.npy'), output[0][j]) - - # 计算时间 - total_infer_time += measurements['per_infer_time_ns'] - if total_infer_num > opt.workspace: - total_infer_time_workspace += measurements['per_infer_time_ns'] - - # 推理时间 - print('[INFO] Infer time:') - msg = 'total infer num: ' + str(total_infer_num) + '\n' + \ - 'total pure infer time(ms): ' + str(total_infer_time / 1000 / 1000) + '\n' + \ - 'average pure infer time(ms): ' + str(total_infer_time / total_infer_num / 1000 / 1000) + '\n' + \ - 'average pure infer time after workspace(ms): ' + str(abs( - total_infer_time_workspace / (total_infer_num - opt.workspace) / 1000 / 1000)) + '\n' - print(msg) - with open(os.path.join(opt.infer_res_save_path, 'infer_time.txt'), 'wt', encoding='utf-8') as f_infer_time: +# Copyright 2018 NVIDIA Corporation. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import acl +from acl_net import AclModel + +import os +import shutil +import argparse +import numpy as np +from tqdm import tqdm + +DTYPE = { + 'float32': np.float32, + 'float64': np.float64, + 'int32': np.int32, + 'int64': np.int64 +} + +if __name__ == '__main__': + ''' + 参数说明: + --model_path:模型路径 + --device_id:npu id + --cpu_run:MeasureTime类的cpu_run参数,True or False + --sync_infer:推理方式: + True:同步推理 + False:异步推理 + --workspace:类似TensorRT `workspace`参数,计算平均推理时间时排除前n次推理 + --input_info_file_path:类似benchmark的bin_info文件 + --input_dtypes:模型输入的类型,用逗号分割(`DTYPE`变量) + e.g. 模型只有一个输入:--input_dtypes=float32 + e.g. 模型有多个输入:--input_dtypes=float32,float32,float32(需要和bin_info文件多输入排列一致) + --infer_res_save_path:推理结果保存目录 + --res_save_type:推理结果保存类型,bin或npy + + info文件说明: + 因为支持动态shape,相比于benchmark的info文件,需要多加一列shape信息,e.g. + ``` + 0 ./bert_bin/input_ids_0.bin (1,512) + 0 ./bert_bin/segment_ids_0.bin (1,512) + 0 ./bert_bin/input_mask_0.bin (1,512) + 1 ./bert_bin/input_ids_1.bin (1,512) + 1 ./bert_bin/segment_ids_1.bin (1,512) + 1 ./bert_bin/input_mask_1.bin (1,512) + ``` + + Using Example: + python3.7 pyacl_infer.py \ + --model_path=./bert_base_batch_1_sim_auto.om \ + --device_id=0 \ + --cpu_run=True \ + --sync_infer=True \ + --workspace=10 \ + --input_info_file_path=./input.info \ + --input_dtypes=int64,int64,int64 \ + --infer_res_save_path=./infer_res \ + --res_save_type=bin + ''' + + # 参数解析 + parser = argparse.ArgumentParser() + parser.add_argument('--model_path', required=True) + parser.add_argument('--batch_size', required=True) + parser.add_argument('--device_id', required=True, type=int) + parser.add_argument('--cpu_run', required=True, choices=['True', 'False']) + parser.add_argument('--sync_infer', required=True, choices=['True', 'False']) + parser.add_argument('--workspace', required=True, type=int) + parser.add_argument('--input_info_file_path', required=True) + parser.add_argument('--input_dtypes', required=True) + parser.add_argument('--infer_res_save_path', required=True) + parser.add_argument('--res_save_type', required=True, choices=['bin', 'npy']) + opt = parser.parse_args() + + # 创建模型 + measurements = {} + om_model = AclModel(device_id=opt.device_id, + model_path=opt.model_path, + sync_infer=eval(opt.sync_infer), + measurements=measurements, + key='per_infer_time_ns', + cpu_run=eval(opt.cpu_run)) + + # 创建目录 + if os.path.exists(opt.infer_res_save_path): + shutil.rmtree(opt.infer_res_save_path) + os.makedirs(opt.infer_res_save_path) + + # 读取info_file + inputs_info = {} + with open(opt.input_info_file_path, 'rt', encoding='utf-8') as f_info: + line = f_info.readline() + while line: + line = line.rstrip('\n') + contents = line.split() + info = {'path': contents[1], 'shape': eval(contents[2])} + inputs_info.setdefault(contents[0], []).append(info) + line = f_info.readline() + + # 解析输入类型 + input_dtypes = opt.input_dtypes.split(',') + input_dtypes = list(map(lambda x: DTYPE[x], input_dtypes)) + + # 读取文件推理 + total_infer_time = 0 + total_infer_time_workspace = 0 + total_infer_num = 0 + dataset = {} + dims_infos = {} + bs = int(opt.batch_size) + for key, values in inputs_info.items(): + # 构造输入 + inputs = [] + dims = [] + for idx, value in enumerate(values): + x = np.fromfile(value['path'], dtype=input_dtypes[idx]).reshape(value['shape']) + inputs.append((key,x)) + dims.extend((bs, value['shape'][1], value['shape'][2])) + dims_info = {'dimCount': len(dims), 'name': '', 'dims': dims} + + # (1, 1500, 23) {'dimCount': 3, 'name': '', 'dims': [1, 1500, 23]} + length = inputs[0][1].shape[1] + dataset[length] = dataset.get(length,[]) + inputs + dims_infos[length] = dims_infos.get(length,dims_info) + + total_inputs = [] + total_keys = [] + for k in sorted(dataset.keys()): + total_len = len(dataset[k]) + batch_input = [] + batch_key = [] + for i, (key, ipt) in enumerate(dataset[k]): + batch_input.append(ipt) + batch_key.append(key) + if (i+1) % bs == 0: + total_inputs.append(batch_input) + total_keys.append(batch_key) + batch_input = [] + batch_key = [] + if batch_input != []: + total_inputs.append(batch_input) + total_keys.append(batch_key) + + for i, b_ipt in tqdm(enumerate(total_inputs)): + batch_input = np.squeeze(np.array(b_ipt), axis=1) + if batch_input.shape[0] < bs: + batch_input = np.pad(batch_input, [(0, bs-batch_input.shape[0]), (0, 0), (0, 0)], mode='constant') + + # 推理得到输出 + # (bs, 28) + output = om_model([batch_input], dims_infos[batch_input.shape[1]]) + + total_infer_num += 1 + + # 保存文件 + for j, key in enumerate(total_keys[i]): + if opt.res_save_type == 'bin': + output[0][j].tofile(os.path.join(opt.infer_res_save_path, key + '.' + str(0) + '.bin')) + else: + np.save(os.path.join(opt.infer_res_save_path, key + '.' + str(0) + '.npy'), output[0][j]) + + # 计算时间 + total_infer_time += measurements['per_infer_time_ns'] + if total_infer_num > opt.workspace: + total_infer_time_workspace += measurements['per_infer_time_ns'] + + # 推理时间 + print('[INFO] Infer time:') + msg = 'total infer num: ' + str(total_infer_num) + '\n' + \ + 'total pure infer time(ms): ' + str(total_infer_time / 1000 / 1000) + '\n' + \ + 'average pure infer time(ms): ' + str(total_infer_time / total_infer_num / 1000 / 1000) + '\n' + \ + 'average pure infer time after workspace(ms): ' + str(abs( + total_infer_time_workspace / (total_infer_num - opt.workspace) / 1000 / 1000)) + '\n' + print(msg) + with open(os.path.join(opt.infer_res_save_path, 'infer_time.txt'), 'wt', encoding='utf-8') as f_infer_time: f_infer_time.write(msg) \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/ReadMe.md b/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/ReadMe.md index e42fd69be299f535fe5c0ecd169c4e3d647a7497..cae1ac9e4b9f273ee42fe4b4cd1c76ffde81694e 100644 --- a/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/ReadMe.md +++ b/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/ReadMe.md @@ -1,37 +1,37 @@ -文件作用说明: - -- Pth转换om脚本,pth转换om脚本 -- ATC转换脚本atc_crnn.sh -- benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer -- ONNX模型lstm算子修改脚本lstm_revise.py -- 测试数据集生成脚本parse_testdata.py -- 推理后处理脚本postpossess_CRNN_pytorch.py -- ReadMe.md -- - -推理端到端步骤: - -(1) 使用脚本pth2onnx.py将pth文件导出为onnx文件 - - - -(2)运行atc_crnn.sh脚本转换om模型 - -本demo已提供调优完成的om模型 - - - -(3)用parse_testdata.py脚本处理数据集 - - - -(4)./benchmark.x86_64 -model_type=vision -batch_size=16 -device_id=0 -input_text_path=./crnn.info -input_width=100 -input_height=32 -om_path=./crnn_sim.om -useDvpp=False - -运行benchmark推理,结果保存在 ./result 目录下 - - - -(5)python3.7 postpossess_CRNN_pytorch.py - -验证推理结果 - +文件作用说明: + +- Pth转换om脚本,pth转换om脚本 +- ATC转换脚本atc_crnn.sh +- benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer +- ONNX模型lstm算子修改脚本lstm_revise.py +- 测试数据集生成脚本parse_testdata.py +- 推理后处理脚本postpossess_CRNN_pytorch.py +- ReadMe.md +- + +推理端到端步骤: + +(1) 使用脚本pth2onnx.py将pth文件导出为onnx文件 + + + +(2)运行atc_crnn.sh脚本转换om模型 + +本demo已提供调优完成的om模型 + + + +(3)用parse_testdata.py脚本处理数据集 + + + +(4)./benchmark.x86_64 -model_type=vision -batch_size=16 -device_id=0 -input_text_path=./crnn.info -input_width=100 -input_height=32 -om_path=./crnn_sim.om -useDvpp=False + +运行benchmark推理,结果保存在 ./result 目录下 + + + +(5)python3.7 postpossess_CRNN_pytorch.py + +验证推理结果 + diff --git a/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/lstm_revise.py b/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/lstm_revise.py index e68e93fc7e61d54481cba0a0aa859360260cfce9..8e91400188b3a56877bcc352693b8cc771209a66 100644 --- a/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/lstm_revise.py +++ b/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/lstm_revise.py @@ -1,286 +1,286 @@ -import os -import numpy as np -import torch -import onnx -import copy -from onnx import numpy_helper -G_ONNX_OPSET_VER = 11 -''' -def onnx_LSTM(batch, seq_len, input_size, hidden_size, num_layers, bidirectional, work_dir): - mod = torch.nn.LSTM(input_size, hidden_size, num_layers,bidirectional=bidirectional) - input = torch.randn(seq_len, batch, input_size)#(seq_len, batch, input_size) - h0 = torch.randn(2, 3, 20) - c0 = torch.randn(2, 3, 20) - print(mod.weight_ih_l0.size()) - print(mod.bias_hh_l0.size()) - print(mod.bias_ih_l0.size()) - output, _ = mod(input) - onnx_name = os.path.join(work_dir, "LSTM.onnx") - torch.onnx.export(mod, (input), f=onnx_name, - opset_version=G_ONNX_OPSET_VER) - return output -''' - -def GetNodeIndex(graph, node_name): - index = 0 - for i in range(len(graph.node)): - if graph.node[i].name == node_name: - index = i - break - return index - -def modify1(src_path,save_path): - model = onnx.load(src_path) - new_model = onnx.ModelProto() - for init in model.graph.initializer: - if init.name == "441": #95 - tmp1 = numpy_helper.to_array(init) - if init.name == "442": #96 - tmp2 = numpy_helper.to_array(init) - if init.name == "443": #97 - tmp3 = numpy_helper.to_array(init) - remove_weight = [] - for init in model.graph.initializer: - if init.name == "442": #96 - remove_weight.append(init) - if init.name == "441": #95 - remove_weight.append(init) - if init.name == "443": #97 - remove_weight.append(init) - for i in remove_weight: - model.graph.initializer.remove(i) - tmp = np.concatenate((tmp1,tmp2),axis=-1) - tmp_shape = tmp.shape - tmp = tmp.reshape(4,tmp_shape[-2]//4,tmp_shape[-1]).tolist() - #print(tmp[0]) - weight = np.array(tmp[0] + tmp[3] + tmp[2] + tmp[1]).reshape(tmp_shape[-2],tmp_shape[-1]).transpose(1,0) - init_tmp = numpy_helper.from_array(weight.astype(np.float32),name="441") #95 - model.graph.initializer.append(init_tmp) - bais_shape = [tmp3.shape[-1]] - tmp3 = tmp3.reshape(2,4,bais_shape[-1]//8).tolist() - bais1 = np.array(tmp3[0][0] + tmp3[0][3] + tmp3[0][2] + tmp3[0][1]).reshape(bais_shape[-1]//2) - bais2 = np.array(tmp3[1][0] + tmp3[1][3] + tmp3[1][2] + tmp3[1][1]).reshape(bais_shape[-1]//2) - bais = bais1 + bais2 - init_bais = numpy_helper.from_array(bais.astype(np.float32),name="443") #97 - model.graph.initializer.append(init_bais) - for idx,node in enumerate(model.graph.node): - if node.name == "LSTM_29": - print(model.graph.node[idx].input) - #model.graph.node[idx].input[1] = '' - #model.graph.node[idx].input[2] = '' - model.graph.node[idx].input.remove('442') #96 - model.graph.node[idx].input.remove('82') #14 - model.graph.node[idx].input.remove('82') #14 - model.graph.node[idx].input.remove('') #'' - model.graph.node[idx].input[1] = '441' #95 - model.graph.node[idx].input[2] = '443' #97 - #model.graph.node[idx].input[3] = '87' - - #去除Squeeze - remove_list = [] - for idx,node in enumerate(model.graph.node): - if node.name in {"Shape_23", "Gather_25", "Unsqueeze_26","Concat_27","ConstantOfShape_28","Constant_24","Squeeze_30"}: - remove_list.append(node) - - for node in remove_list: - model.graph.node.remove(node) - - - - model.graph.node[GetNodeIndex(model.graph,'Concat_49')].input[0] = '140' - onnx.save(model,save_path) - - -def modify2(src_path,save_path): - model = onnx.load(src_path) - new_model = onnx.ModelProto() - for init in model.graph.initializer: - if init.name == "463": #95 - tmp1 = numpy_helper.to_array(init) - if init.name == "464": #96 - tmp2 = numpy_helper.to_array(init) - if init.name == "465": #97 - tmp3 = numpy_helper.to_array(init) - remove_weight = [] - for init in model.graph.initializer: - if init.name == "464": #96 - remove_weight.append(init) - if init.name == "463": #95 - remove_weight.append(init) - if init.name == "465": #97 - remove_weight.append(init) - for i in remove_weight: - model.graph.initializer.remove(i) - tmp = np.concatenate((tmp1,tmp2),axis=-1) - tmp_shape = tmp.shape - tmp = tmp.reshape(4,tmp_shape[-2]//4,tmp_shape[-1]).tolist() - #print(tmp[0]) - weight = np.array(tmp[0] + tmp[3] + tmp[2] + tmp[1]).reshape(tmp_shape[-2],tmp_shape[-1]).transpose(1,0) - init_tmp = numpy_helper.from_array(weight.astype(np.float32),name="463") #95 - model.graph.initializer.append(init_tmp) - bais_shape = [tmp3.shape[-1]] - tmp3 = tmp3.reshape(2,4,bais_shape[-1]//8).tolist() - bais1 = np.array(tmp3[0][0] + tmp3[0][3] + tmp3[0][2] + tmp3[0][1]).reshape(bais_shape[-1]//2) - bais2 = np.array(tmp3[1][0] + tmp3[1][3] + tmp3[1][2] + tmp3[1][1]).reshape(bais_shape[-1]//2) - bais = bais1 + bais2 - init_bais = numpy_helper.from_array(bais.astype(np.float32),name="465") #97 - model.graph.initializer.append(init_bais) - for idx,node in enumerate(model.graph.node): - if node.name == "LSTM_42": - print(model.graph.node[idx].input) - #model.graph.node[idx].input[1] = '' - #model.graph.node[idx].input[2] = '' - model.graph.node[idx].input.remove('464') #96 - model.graph.node[idx].input.remove('158') #14 - model.graph.node[idx].input.remove('158') #14 - model.graph.node[idx].input.remove('') #'' - model.graph.node[idx].input[1] = '463' #95 - model.graph.node[idx].input[2] = '465' #97 - #model.graph.node[idx].input[3] = '87' - - remove_list = [] - for idx,node in enumerate(model.graph.node): - if node.name in {"Shape_36", "Gather_38", "Unsqueeze_39","Concat_40","ConstantOfShape_41","Constant_37","Squeeze_43"}: - remove_list.append(node) - - for node in remove_list: - model.graph.node.remove(node) - - model.graph.node[GetNodeIndex(model.graph,'Slice_48')].input[0] = '216' - - onnx.save(model,save_path) - -def modify3(src_path,save_path): - model = onnx.load(src_path) - new_model = onnx.ModelProto() - for init in model.graph.initializer: - if init.name == "486": #95 - tmp1 = numpy_helper.to_array(init) - if init.name == "487": #96 - tmp2 = numpy_helper.to_array(init) - if init.name == "488": #97 - tmp3 = numpy_helper.to_array(init) - remove_weight = [] - for init in model.graph.initializer: - if init.name == "487": #96 - remove_weight.append(init) - if init.name == "486": #95 - remove_weight.append(init) - if init.name == "488": #97 - remove_weight.append(init) - for i in remove_weight: - model.graph.initializer.remove(i) - tmp = np.concatenate((tmp1,tmp2),axis=-1) - tmp_shape = tmp.shape - tmp = tmp.reshape(4,tmp_shape[-2]//4,tmp_shape[-1]).tolist() - #print(tmp[0]) - weight = np.array(tmp[0] + tmp[3] + tmp[2] + tmp[1]).reshape(tmp_shape[-2],tmp_shape[-1]).transpose(1,0) - init_tmp = numpy_helper.from_array(weight.astype(np.float32),name="486") #95 - model.graph.initializer.append(init_tmp) - bais_shape = [tmp3.shape[-1]] - tmp3 = tmp3.reshape(2,4,bais_shape[-1]//8).tolist() - bais1 = np.array(tmp3[0][0] + tmp3[0][3] + tmp3[0][2] + tmp3[0][1]).reshape(bais_shape[-1]//2) - bais2 = np.array(tmp3[1][0] + tmp3[1][3] + tmp3[1][2] + tmp3[1][1]).reshape(bais_shape[-1]//2) - bais = bais1 + bais2 - init_bais = numpy_helper.from_array(bais.astype(np.float32),name="488") #97 - model.graph.initializer.append(init_bais) - for idx,node in enumerate(model.graph.node): - if node.name == "LSTM_75": - print(model.graph.node[idx].input) - #model.graph.node[idx].input[1] = '' - #model.graph.node[idx].input[2] = '' - model.graph.node[idx].input.remove('487') #96 - model.graph.node[idx].input.remove('256') #14 - model.graph.node[idx].input.remove('256') #14 - model.graph.node[idx].input.remove('') #'' - model.graph.node[idx].input[1] = '486' #95 - model.graph.node[idx].input[2] = '488' #97 - #model.graph.node[idx].input[3] = '87' - - remove_list = [] - for idx,node in enumerate(model.graph.node): - if node.name in {"Shape_69", "Gather_71", "Unsqueeze_72","Concat_73","ConstantOfShape_74","Constant_70","Squeeze_76"}: - remove_list.append(node) - - for node in remove_list: - model.graph.node.remove(node) - - model.graph.node[GetNodeIndex(model.graph,'Concat_95')].input[0] = '314' - - - onnx.save(model,save_path) - -def modify4(src_path,save_path): - model = onnx.load(src_path) - new_model = onnx.ModelProto() - for init in model.graph.initializer: - if init.name == "508": #95 - tmp1 = numpy_helper.to_array(init) - if init.name == "509": #96 - tmp2 = numpy_helper.to_array(init) - if init.name == "510": #97 - tmp3 = numpy_helper.to_array(init) - remove_weight = [] - for init in model.graph.initializer: - if init.name == "509": #96 - remove_weight.append(init) - if init.name == "508": #95 - remove_weight.append(init) - if init.name == "510": #97 - remove_weight.append(init) - for i in remove_weight: - model.graph.initializer.remove(i) - tmp = np.concatenate((tmp1,tmp2),axis=-1) - tmp_shape = tmp.shape - tmp = tmp.reshape(4,tmp_shape[-2]//4,tmp_shape[-1]).tolist() - #print(tmp[0]) - weight = np.array(tmp[0] + tmp[3] + tmp[2] + tmp[1]).reshape(tmp_shape[-2],tmp_shape[-1]).transpose(1,0) - init_tmp = numpy_helper.from_array(weight.astype(np.float32),name="508") #95 - model.graph.initializer.append(init_tmp) - bais_shape = [tmp3.shape[-1]] - tmp3 = tmp3.reshape(2,4,bais_shape[-1]//8).tolist() - bais1 = np.array(tmp3[0][0] + tmp3[0][3] + tmp3[0][2] + tmp3[0][1]).reshape(bais_shape[-1]//2) - bais2 = np.array(tmp3[1][0] + tmp3[1][3] + tmp3[1][2] + tmp3[1][1]).reshape(bais_shape[-1]//2) - bais = bais1 + bais2 - init_bais = numpy_helper.from_array(bais.astype(np.float32),name="510") #97 - model.graph.initializer.append(init_bais) - for idx,node in enumerate(model.graph.node): - if node.name == "LSTM_88": - print(model.graph.node[idx].input) - #model.graph.node[idx].input[1] = '' - #model.graph.node[idx].input[2] = '' - model.graph.node[idx].input.remove('509') #96 - model.graph.node[idx].input.remove('332') #14 - model.graph.node[idx].input.remove('332') #14 - model.graph.node[idx].input.remove('') #'' - model.graph.node[idx].input[1] = '508' #95 - model.graph.node[idx].input[2] = '510' #97 - #model.graph.node[idx].input[3] = '87' - - remove_list = [] - for idx,node in enumerate(model.graph.node): - if node.name in {"Shape_82", "Gather_84", "Unsqueeze_85","Concat_86","ConstantOfShape_87","Constant_83","Squeeze_89"}: - remove_list.append(node) - - for node in remove_list: - model.graph.node.remove(node) - model.graph.node[GetNodeIndex(model.graph,'Slice_94')].input[0] = '390' - - onnx.save(model,save_path) - -if __name__ == "__main__": - work_dir = "./" - batch = 1 - seq_len = 10 - input_size = 50 - hidden_size = 32 - num_layers = 1 - #onnx_LSTM(batch, seq_len, input_size, hidden_size, num_layers, False, work_dir) - modify1("./crnn_sim.onnx","./1.onnx") - modify2("./1.onnx","./2.onnx") - modify3("./2.onnx","./3.onnx") - modify4("./3.onnx","./crnn_revised.onnx") - os.remove("1.onnx") - os.remove("2.onnx") - os.remove("3.onnx") - print('Done') +import os +import numpy as np +import torch +import onnx +import copy +from onnx import numpy_helper +G_ONNX_OPSET_VER = 11 +''' +def onnx_LSTM(batch, seq_len, input_size, hidden_size, num_layers, bidirectional, work_dir): + mod = torch.nn.LSTM(input_size, hidden_size, num_layers,bidirectional=bidirectional) + input = torch.randn(seq_len, batch, input_size)#(seq_len, batch, input_size) + h0 = torch.randn(2, 3, 20) + c0 = torch.randn(2, 3, 20) + print(mod.weight_ih_l0.size()) + print(mod.bias_hh_l0.size()) + print(mod.bias_ih_l0.size()) + output, _ = mod(input) + onnx_name = os.path.join(work_dir, "LSTM.onnx") + torch.onnx.export(mod, (input), f=onnx_name, + opset_version=G_ONNX_OPSET_VER) + return output +''' + +def GetNodeIndex(graph, node_name): + index = 0 + for i in range(len(graph.node)): + if graph.node[i].name == node_name: + index = i + break + return index + +def modify1(src_path,save_path): + model = onnx.load(src_path) + new_model = onnx.ModelProto() + for init in model.graph.initializer: + if init.name == "441": #95 + tmp1 = numpy_helper.to_array(init) + if init.name == "442": #96 + tmp2 = numpy_helper.to_array(init) + if init.name == "443": #97 + tmp3 = numpy_helper.to_array(init) + remove_weight = [] + for init in model.graph.initializer: + if init.name == "442": #96 + remove_weight.append(init) + if init.name == "441": #95 + remove_weight.append(init) + if init.name == "443": #97 + remove_weight.append(init) + for i in remove_weight: + model.graph.initializer.remove(i) + tmp = np.concatenate((tmp1,tmp2),axis=-1) + tmp_shape = tmp.shape + tmp = tmp.reshape(4,tmp_shape[-2]//4,tmp_shape[-1]).tolist() + #print(tmp[0]) + weight = np.array(tmp[0] + tmp[3] + tmp[2] + tmp[1]).reshape(tmp_shape[-2],tmp_shape[-1]).transpose(1,0) + init_tmp = numpy_helper.from_array(weight.astype(np.float32),name="441") #95 + model.graph.initializer.append(init_tmp) + bais_shape = [tmp3.shape[-1]] + tmp3 = tmp3.reshape(2,4,bais_shape[-1]//8).tolist() + bais1 = np.array(tmp3[0][0] + tmp3[0][3] + tmp3[0][2] + tmp3[0][1]).reshape(bais_shape[-1]//2) + bais2 = np.array(tmp3[1][0] + tmp3[1][3] + tmp3[1][2] + tmp3[1][1]).reshape(bais_shape[-1]//2) + bais = bais1 + bais2 + init_bais = numpy_helper.from_array(bais.astype(np.float32),name="443") #97 + model.graph.initializer.append(init_bais) + for idx,node in enumerate(model.graph.node): + if node.name == "LSTM_29": + print(model.graph.node[idx].input) + #model.graph.node[idx].input[1] = '' + #model.graph.node[idx].input[2] = '' + model.graph.node[idx].input.remove('442') #96 + model.graph.node[idx].input.remove('82') #14 + model.graph.node[idx].input.remove('82') #14 + model.graph.node[idx].input.remove('') #'' + model.graph.node[idx].input[1] = '441' #95 + model.graph.node[idx].input[2] = '443' #97 + #model.graph.node[idx].input[3] = '87' + + #去除Squeeze + remove_list = [] + for idx,node in enumerate(model.graph.node): + if node.name in {"Shape_23", "Gather_25", "Unsqueeze_26","Concat_27","ConstantOfShape_28","Constant_24","Squeeze_30"}: + remove_list.append(node) + + for node in remove_list: + model.graph.node.remove(node) + + + + model.graph.node[GetNodeIndex(model.graph,'Concat_49')].input[0] = '140' + onnx.save(model,save_path) + + +def modify2(src_path,save_path): + model = onnx.load(src_path) + new_model = onnx.ModelProto() + for init in model.graph.initializer: + if init.name == "463": #95 + tmp1 = numpy_helper.to_array(init) + if init.name == "464": #96 + tmp2 = numpy_helper.to_array(init) + if init.name == "465": #97 + tmp3 = numpy_helper.to_array(init) + remove_weight = [] + for init in model.graph.initializer: + if init.name == "464": #96 + remove_weight.append(init) + if init.name == "463": #95 + remove_weight.append(init) + if init.name == "465": #97 + remove_weight.append(init) + for i in remove_weight: + model.graph.initializer.remove(i) + tmp = np.concatenate((tmp1,tmp2),axis=-1) + tmp_shape = tmp.shape + tmp = tmp.reshape(4,tmp_shape[-2]//4,tmp_shape[-1]).tolist() + #print(tmp[0]) + weight = np.array(tmp[0] + tmp[3] + tmp[2] + tmp[1]).reshape(tmp_shape[-2],tmp_shape[-1]).transpose(1,0) + init_tmp = numpy_helper.from_array(weight.astype(np.float32),name="463") #95 + model.graph.initializer.append(init_tmp) + bais_shape = [tmp3.shape[-1]] + tmp3 = tmp3.reshape(2,4,bais_shape[-1]//8).tolist() + bais1 = np.array(tmp3[0][0] + tmp3[0][3] + tmp3[0][2] + tmp3[0][1]).reshape(bais_shape[-1]//2) + bais2 = np.array(tmp3[1][0] + tmp3[1][3] + tmp3[1][2] + tmp3[1][1]).reshape(bais_shape[-1]//2) + bais = bais1 + bais2 + init_bais = numpy_helper.from_array(bais.astype(np.float32),name="465") #97 + model.graph.initializer.append(init_bais) + for idx,node in enumerate(model.graph.node): + if node.name == "LSTM_42": + print(model.graph.node[idx].input) + #model.graph.node[idx].input[1] = '' + #model.graph.node[idx].input[2] = '' + model.graph.node[idx].input.remove('464') #96 + model.graph.node[idx].input.remove('158') #14 + model.graph.node[idx].input.remove('158') #14 + model.graph.node[idx].input.remove('') #'' + model.graph.node[idx].input[1] = '463' #95 + model.graph.node[idx].input[2] = '465' #97 + #model.graph.node[idx].input[3] = '87' + + remove_list = [] + for idx,node in enumerate(model.graph.node): + if node.name in {"Shape_36", "Gather_38", "Unsqueeze_39","Concat_40","ConstantOfShape_41","Constant_37","Squeeze_43"}: + remove_list.append(node) + + for node in remove_list: + model.graph.node.remove(node) + + model.graph.node[GetNodeIndex(model.graph,'Slice_48')].input[0] = '216' + + onnx.save(model,save_path) + +def modify3(src_path,save_path): + model = onnx.load(src_path) + new_model = onnx.ModelProto() + for init in model.graph.initializer: + if init.name == "486": #95 + tmp1 = numpy_helper.to_array(init) + if init.name == "487": #96 + tmp2 = numpy_helper.to_array(init) + if init.name == "488": #97 + tmp3 = numpy_helper.to_array(init) + remove_weight = [] + for init in model.graph.initializer: + if init.name == "487": #96 + remove_weight.append(init) + if init.name == "486": #95 + remove_weight.append(init) + if init.name == "488": #97 + remove_weight.append(init) + for i in remove_weight: + model.graph.initializer.remove(i) + tmp = np.concatenate((tmp1,tmp2),axis=-1) + tmp_shape = tmp.shape + tmp = tmp.reshape(4,tmp_shape[-2]//4,tmp_shape[-1]).tolist() + #print(tmp[0]) + weight = np.array(tmp[0] + tmp[3] + tmp[2] + tmp[1]).reshape(tmp_shape[-2],tmp_shape[-1]).transpose(1,0) + init_tmp = numpy_helper.from_array(weight.astype(np.float32),name="486") #95 + model.graph.initializer.append(init_tmp) + bais_shape = [tmp3.shape[-1]] + tmp3 = tmp3.reshape(2,4,bais_shape[-1]//8).tolist() + bais1 = np.array(tmp3[0][0] + tmp3[0][3] + tmp3[0][2] + tmp3[0][1]).reshape(bais_shape[-1]//2) + bais2 = np.array(tmp3[1][0] + tmp3[1][3] + tmp3[1][2] + tmp3[1][1]).reshape(bais_shape[-1]//2) + bais = bais1 + bais2 + init_bais = numpy_helper.from_array(bais.astype(np.float32),name="488") #97 + model.graph.initializer.append(init_bais) + for idx,node in enumerate(model.graph.node): + if node.name == "LSTM_75": + print(model.graph.node[idx].input) + #model.graph.node[idx].input[1] = '' + #model.graph.node[idx].input[2] = '' + model.graph.node[idx].input.remove('487') #96 + model.graph.node[idx].input.remove('256') #14 + model.graph.node[idx].input.remove('256') #14 + model.graph.node[idx].input.remove('') #'' + model.graph.node[idx].input[1] = '486' #95 + model.graph.node[idx].input[2] = '488' #97 + #model.graph.node[idx].input[3] = '87' + + remove_list = [] + for idx,node in enumerate(model.graph.node): + if node.name in {"Shape_69", "Gather_71", "Unsqueeze_72","Concat_73","ConstantOfShape_74","Constant_70","Squeeze_76"}: + remove_list.append(node) + + for node in remove_list: + model.graph.node.remove(node) + + model.graph.node[GetNodeIndex(model.graph,'Concat_95')].input[0] = '314' + + + onnx.save(model,save_path) + +def modify4(src_path,save_path): + model = onnx.load(src_path) + new_model = onnx.ModelProto() + for init in model.graph.initializer: + if init.name == "508": #95 + tmp1 = numpy_helper.to_array(init) + if init.name == "509": #96 + tmp2 = numpy_helper.to_array(init) + if init.name == "510": #97 + tmp3 = numpy_helper.to_array(init) + remove_weight = [] + for init in model.graph.initializer: + if init.name == "509": #96 + remove_weight.append(init) + if init.name == "508": #95 + remove_weight.append(init) + if init.name == "510": #97 + remove_weight.append(init) + for i in remove_weight: + model.graph.initializer.remove(i) + tmp = np.concatenate((tmp1,tmp2),axis=-1) + tmp_shape = tmp.shape + tmp = tmp.reshape(4,tmp_shape[-2]//4,tmp_shape[-1]).tolist() + #print(tmp[0]) + weight = np.array(tmp[0] + tmp[3] + tmp[2] + tmp[1]).reshape(tmp_shape[-2],tmp_shape[-1]).transpose(1,0) + init_tmp = numpy_helper.from_array(weight.astype(np.float32),name="508") #95 + model.graph.initializer.append(init_tmp) + bais_shape = [tmp3.shape[-1]] + tmp3 = tmp3.reshape(2,4,bais_shape[-1]//8).tolist() + bais1 = np.array(tmp3[0][0] + tmp3[0][3] + tmp3[0][2] + tmp3[0][1]).reshape(bais_shape[-1]//2) + bais2 = np.array(tmp3[1][0] + tmp3[1][3] + tmp3[1][2] + tmp3[1][1]).reshape(bais_shape[-1]//2) + bais = bais1 + bais2 + init_bais = numpy_helper.from_array(bais.astype(np.float32),name="510") #97 + model.graph.initializer.append(init_bais) + for idx,node in enumerate(model.graph.node): + if node.name == "LSTM_88": + print(model.graph.node[idx].input) + #model.graph.node[idx].input[1] = '' + #model.graph.node[idx].input[2] = '' + model.graph.node[idx].input.remove('509') #96 + model.graph.node[idx].input.remove('332') #14 + model.graph.node[idx].input.remove('332') #14 + model.graph.node[idx].input.remove('') #'' + model.graph.node[idx].input[1] = '508' #95 + model.graph.node[idx].input[2] = '510' #97 + #model.graph.node[idx].input[3] = '87' + + remove_list = [] + for idx,node in enumerate(model.graph.node): + if node.name in {"Shape_82", "Gather_84", "Unsqueeze_85","Concat_86","ConstantOfShape_87","Constant_83","Squeeze_89"}: + remove_list.append(node) + + for node in remove_list: + model.graph.node.remove(node) + model.graph.node[GetNodeIndex(model.graph,'Slice_94')].input[0] = '390' + + onnx.save(model,save_path) + +if __name__ == "__main__": + work_dir = "./" + batch = 1 + seq_len = 10 + input_size = 50 + hidden_size = 32 + num_layers = 1 + #onnx_LSTM(batch, seq_len, input_size, hidden_size, num_layers, False, work_dir) + modify1("./crnn_sim.onnx","./1.onnx") + modify2("./1.onnx","./2.onnx") + modify3("./2.onnx","./3.onnx") + modify4("./3.onnx","./crnn_revised.onnx") + os.remove("1.onnx") + os.remove("2.onnx") + os.remove("3.onnx") + print('Done') diff --git a/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/parse_testdata.py b/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/parse_testdata.py index 62185425c7532021390878a032549892ed2af08d..d9a1926a6e4bcadcd6390ba082afe6a8d7d10cd6 100644 --- a/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/parse_testdata.py +++ b/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/parse_testdata.py @@ -1,79 +1,79 @@ -import os -import re -import six -import lmdb -from PIL import Image -import numpy as np -import torchvision - - -test_dir = '/home/ltsong/1230/CRNN/IIIT5K_lmdb/' -alphabets = '0123456789abcdefghijklmnopqrstuvwxyz' -output_bin = './input_bin/' - - -class resizeNormalize(object): - def __init__(self, size, interpolation=Image.BICUBIC): - self.size = size - self.interpolation = interpolation - self.toTensor = torchvision.transforms.ToTensor() - - def __call__(self, img): - img = img.resize(self.size, self.interpolation) - img = self.toTensor(img) - img.sub_(0.5).div_(0.5) - return img - - -def gen_data_label(test_dir, data_dir): - if not os.path.exists(data_dir): - os.mkdir(data_dir) - - env = lmdb.open(test_dir, max_readers=32, readonly=True, lock=False, readahead=False, meminit=False) - if not env: - print('cannot open lmdb from %s' % (test_dir)) - sys.exit(0) - with env.begin(write=False) as txn: - nSamples = int(txn.get('num-samples'.encode()).decode('utf-8')) - print('origin nSamples is:', nSamples) - filtered_index_list = [] - - with open('label.txt', 'w') as f: - for index in range(nSamples): - index += 1 - # images - img_key = 'image-%09d'.encode() % index - imgbuf = txn.get(img_key) - buf = six.BytesIO() - buf.write(imgbuf) - buf.seek(0) - try: - img = Image.open(buf).convert('L') - img.show() - # transform - transform = resizeNormalize((100, 32)) - img = transform(img) - img = np.array(img, np.float32) - img.tofile('{}/test_{}.bin'.format(data_dir, index)) - - except IOError: - print('Corrupted image for %d' % index) - - # label - label_key = 'label-%09d'.encode() % index - label = txn.get(label_key).decode('utf-8') - label = label.lower() - - line = 'test_{}.bin:{}'.format(index, label) - f.write(line) - f.write('\n') - out_of_char = f'[^{alphabets}]' - if re.search(out_of_char, label.lower()): - continue - filtered_index_list.append(index) - new_Samples = len(filtered_index_list) - print('new nSamples is:', new_Samples) - - -if __name__ == '__main__': - gen_data_label(test_dir, output_bin) +import os +import re +import six +import lmdb +from PIL import Image +import numpy as np +import torchvision + + +test_dir = '/home/ltsong/1230/CRNN/IIIT5K_lmdb/' +alphabets = '0123456789abcdefghijklmnopqrstuvwxyz' +output_bin = './input_bin/' + + +class resizeNormalize(object): + def __init__(self, size, interpolation=Image.BICUBIC): + self.size = size + self.interpolation = interpolation + self.toTensor = torchvision.transforms.ToTensor() + + def __call__(self, img): + img = img.resize(self.size, self.interpolation) + img = self.toTensor(img) + img.sub_(0.5).div_(0.5) + return img + + +def gen_data_label(test_dir, data_dir): + if not os.path.exists(data_dir): + os.mkdir(data_dir) + + env = lmdb.open(test_dir, max_readers=32, readonly=True, lock=False, readahead=False, meminit=False) + if not env: + print('cannot open lmdb from %s' % (test_dir)) + sys.exit(0) + with env.begin(write=False) as txn: + nSamples = int(txn.get('num-samples'.encode()).decode('utf-8')) + print('origin nSamples is:', nSamples) + filtered_index_list = [] + + with open('label.txt', 'w') as f: + for index in range(nSamples): + index += 1 + # images + img_key = 'image-%09d'.encode() % index + imgbuf = txn.get(img_key) + buf = six.BytesIO() + buf.write(imgbuf) + buf.seek(0) + try: + img = Image.open(buf).convert('L') + img.show() + # transform + transform = resizeNormalize((100, 32)) + img = transform(img) + img = np.array(img, np.float32) + img.tofile('{}/test_{}.bin'.format(data_dir, index)) + + except IOError: + print('Corrupted image for %d' % index) + + # label + label_key = 'label-%09d'.encode() % index + label = txn.get(label_key).decode('utf-8') + label = label.lower() + + line = 'test_{}.bin:{}'.format(index, label) + f.write(line) + f.write('\n') + out_of_char = f'[^{alphabets}]' + if re.search(out_of_char, label.lower()): + continue + filtered_index_list.append(index) + new_Samples = len(filtered_index_list) + print('new nSamples is:', new_Samples) + + +if __name__ == '__main__': + gen_data_label(test_dir, output_bin) diff --git a/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/postpossess_CRNN_pytorch.py b/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/postpossess_CRNN_pytorch.py index 75ed28482d1aa6d4e64872b74140c3ceef89e3fe..b78f0785e7dd6758d4a3d70674fa386daaa7ea10 100644 --- a/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/postpossess_CRNN_pytorch.py +++ b/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/postpossess_CRNN_pytorch.py @@ -1,138 +1,138 @@ -import os -import sys -import numpy as np -import torch - - -class strLabelConverter(object): - """Convert between str and label. - NOTE: - Insert `blank` to the alphabet for CTC. - Args: - alphabet (str): set of the possible characters. - ignore_case (bool, default=True): whether or not to ignore all of the case. - """ - - def __init__(self, alphabet, ignore_case=False): - self._ignore_case = ignore_case - if self._ignore_case: - alphabet = alphabet.lower() - self.alphabet = alphabet + '-' # for `-1` index - self.dict = {} - for i, char in enumerate(alphabet): - # NOTE: 0 is reserved for 'blank' required by wrap_ctc - self.dict[char] = i + 1 - - def encode(self, text): - """Support batch or single str. - Args: - text (str or list of str): texts to convert. - Returns: - torch.LongTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts. - torch.LongTensor [n]: length of each text. - """ - - length = [] - result = [] - for item in text: - length.append(len(item)) - r = [] - for char in item: - index = self.dict[char] - # result.append(index) - r.append(index) - result.append(r) - max_len = 0 - for r in result: - if len(r) > max_len: - max_len = len(r) - result_temp = [] - for r in result: - for i in range(max_len - len(r)): - r.append(0) - result_temp.append(r) - text = result_temp - return (torch.LongTensor(text), torch.LongTensor(length)) - - def decode(self, t, length, raw=False): - """Decode encoded texts back into strs. - Args: - torch.LongTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts. - torch.LongTensor [n]: length of each text. - Raises: - AssertionError: when the texts and its length does not match. - Returns: - text (str or list of str): texts to convert. - """ - if length.numel() == 1: - length = length[0] - assert t.numel() == length, "text with length: {} does not match declared length: {}".format(t.numel(), length) - if raw: - return ''.join([self.alphabet[i - 1] for i in t]) - else: - char_list = [] - for i in range(length): - if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])): - char_list.append(self.alphabet[t[i] - 1]) - return ''.join(char_list) - else: - # batch mode - assert t.numel() == length.sum(), "texts with length: {} does not match declared length: {}".format(t.numel(), length.sum()) - texts = [] - index = 0 - for i in range(length.numel()): - l = length[i] - texts.append( - self.decode( - t[index:index + l], torch.LongTensor([l]), raw=raw)) - index += l - return texts - - -total_img = 3000 - -def get_Acc(bin_path, label, batch_size): - # label - keys, vals = [], [] - with open(label, 'r') as f: - content = f.read() - contents = content.split('\n')[:-1] - for cot in contents: - cot = cot.split(':') - keys.append(cot[0]) - vals.append(cot[1]) - - labels = dict(zip(keys, vals)) - count = 0 - for index in range(total_img): - index += 1 - - preds = np.fromfile('{}/test_{}_1.bin'.format(bin_path, index), np.float32).reshape(26, -1, 37) - preds = torch.from_numpy(preds) - # print("preds.shape:", preds.shape) - preds_size = torch.LongTensor([preds.size(0)] * batch_size) - - _, preds = preds.max(2) - preds = preds.transpose(1, 0).contiguous().view(-1) - - converter = strLabelConverter('0123456789abcdefghijklmnopqrstuvwxyz') - sim_preds = converter.decode(preds.data, preds_size.data, raw=False) - # print("preds_size.data:",preds_size.data) - key = 'test_{}.bin'.format(index) - if sim_preds == labels[key]: - count += 1 - else: - print("label:{} pred:{}".format(labels[key], sim_preds)) - - # acc - print('*'*50) - print('rightNum: {}'.format(count)) - print('totalNum: {}'.format(total_img)) - print("accuracy_rate %.2f" % (count / total_img * 100)) - print('*'*50) - - -if __name__ == '__main__': - bin_path = 'result/dumpOutput_device1/' - label = './label.txt' - get_Acc(bin_path, label, 1) +import os +import sys +import numpy as np +import torch + + +class strLabelConverter(object): + """Convert between str and label. + NOTE: + Insert `blank` to the alphabet for CTC. + Args: + alphabet (str): set of the possible characters. + ignore_case (bool, default=True): whether or not to ignore all of the case. + """ + + def __init__(self, alphabet, ignore_case=False): + self._ignore_case = ignore_case + if self._ignore_case: + alphabet = alphabet.lower() + self.alphabet = alphabet + '-' # for `-1` index + self.dict = {} + for i, char in enumerate(alphabet): + # NOTE: 0 is reserved for 'blank' required by wrap_ctc + self.dict[char] = i + 1 + + def encode(self, text): + """Support batch or single str. + Args: + text (str or list of str): texts to convert. + Returns: + torch.LongTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts. + torch.LongTensor [n]: length of each text. + """ + + length = [] + result = [] + for item in text: + length.append(len(item)) + r = [] + for char in item: + index = self.dict[char] + # result.append(index) + r.append(index) + result.append(r) + max_len = 0 + for r in result: + if len(r) > max_len: + max_len = len(r) + result_temp = [] + for r in result: + for i in range(max_len - len(r)): + r.append(0) + result_temp.append(r) + text = result_temp + return (torch.LongTensor(text), torch.LongTensor(length)) + + def decode(self, t, length, raw=False): + """Decode encoded texts back into strs. + Args: + torch.LongTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts. + torch.LongTensor [n]: length of each text. + Raises: + AssertionError: when the texts and its length does not match. + Returns: + text (str or list of str): texts to convert. + """ + if length.numel() == 1: + length = length[0] + assert t.numel() == length, "text with length: {} does not match declared length: {}".format(t.numel(), length) + if raw: + return ''.join([self.alphabet[i - 1] for i in t]) + else: + char_list = [] + for i in range(length): + if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])): + char_list.append(self.alphabet[t[i] - 1]) + return ''.join(char_list) + else: + # batch mode + assert t.numel() == length.sum(), "texts with length: {} does not match declared length: {}".format(t.numel(), length.sum()) + texts = [] + index = 0 + for i in range(length.numel()): + l = length[i] + texts.append( + self.decode( + t[index:index + l], torch.LongTensor([l]), raw=raw)) + index += l + return texts + + +total_img = 3000 + +def get_Acc(bin_path, label, batch_size): + # label + keys, vals = [], [] + with open(label, 'r') as f: + content = f.read() + contents = content.split('\n')[:-1] + for cot in contents: + cot = cot.split(':') + keys.append(cot[0]) + vals.append(cot[1]) + + labels = dict(zip(keys, vals)) + count = 0 + for index in range(total_img): + index += 1 + + preds = np.fromfile('{}/test_{}_1.bin'.format(bin_path, index), np.float32).reshape(26, -1, 37) + preds = torch.from_numpy(preds) + # print("preds.shape:", preds.shape) + preds_size = torch.LongTensor([preds.size(0)] * batch_size) + + _, preds = preds.max(2) + preds = preds.transpose(1, 0).contiguous().view(-1) + + converter = strLabelConverter('0123456789abcdefghijklmnopqrstuvwxyz') + sim_preds = converter.decode(preds.data, preds_size.data, raw=False) + # print("preds_size.data:",preds_size.data) + key = 'test_{}.bin'.format(index) + if sim_preds == labels[key]: + count += 1 + else: + print("label:{} pred:{}".format(labels[key], sim_preds)) + + # acc + print('*'*50) + print('rightNum: {}'.format(count)) + print('totalNum: {}'.format(total_img)) + print("accuracy_rate %.2f" % (count / total_img * 100)) + print('*'*50) + + +if __name__ == '__main__': + bin_path = 'result/dumpOutput_device1/' + label = './label.txt' + get_Acc(bin_path, label, 1) diff --git a/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/pth2onnx.py b/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/pth2onnx.py index f3c48a4f12ef604fa85b17690d5993dfa584ad66..9e1b2a80c247e27dc63753aebac9fd5b6d68309d 100644 --- a/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/pth2onnx.py +++ b/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch/pth2onnx.py @@ -1,22 +1,22 @@ -import torch -import crnn -import onnx -import torch.onnx - -def convert(): - checkpoint = torch.load("./checkpoint_16_CRNN_acc_0.7963.pth", map_location='cpu') - model = crnn.CRNN(32,1,37,256) - model.load_state_dict(checkpoint['state_dict']) - model.eval() - print(model) - - input_names = ["actual_input_1"] - output_names = ["output1"] - dummy_input = torch.randn(1, 1, 32, 100) - dynamic_axes = {'actual_input_1':{0:'-1'},'output1':{1:'-1'}} - print('\nStarting ONNX export with onnx %s...' % onnx.__version__) - torch.onnx.export(model, dummy_input, "crnn_npu_dy.onnx", input_names=input_names,dynamic_axes = dynamic_axes, output_names=output_names, opset_version=11) - - -if __name__ == "__main__": - convert() +import torch +import crnn +import onnx +import torch.onnx + +def convert(): + checkpoint = torch.load("./checkpoint_16_CRNN_acc_0.7963.pth", map_location='cpu') + model = crnn.CRNN(32,1,37,256) + model.load_state_dict(checkpoint['state_dict']) + model.eval() + print(model) + + input_names = ["actual_input_1"] + output_names = ["output1"] + dummy_input = torch.randn(1, 1, 32, 100) + dynamic_axes = {'actual_input_1':{0:'-1'},'output1':{1:'-1'}} + print('\nStarting ONNX export with onnx %s...' % onnx.__version__) + torch.onnx.export(model, dummy_input, "crnn_npu_dy.onnx", input_names=input_names,dynamic_axes = dynamic_axes, output_names=output_names, opset_version=11) + + +if __name__ == "__main__": + convert() diff --git a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/LICENSE b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/LICENSE index 4ba4fdcab3dbdb4d64ce4cccdfd990698b4d596a..a0e03103591c1158a839681f3c404ee9118b182e 100644 --- a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/LICENSE +++ b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/LICENSE @@ -1,29 +1,29 @@ -BSD 3-Clause License - -Copyright (c) 2017, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +BSD 3-Clause License + +Copyright (c) 2017, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/OXInterface.py b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/OXInterface.py index 658c5ff9a95c602dd225d1e71aa71cab4309b53b..08e9180e6554b600470b82acbb09f189dd14d511 100644 --- a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/OXInterface.py +++ b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/OXInterface.py @@ -1,1249 +1,1249 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - -''' -环境: - python==3.8.5 - onnx==1.8.1 - onnxruntime==1.7.0 - skl2onnx==1.8.0 - numpy==1.19.5 -''' - -import os -import sys -import onnx -import copy -import time -import shutil -import numpy as np -import onnxruntime - -from enum import IntEnum -from onnx import NodeProto -from datetime import datetime -from functools import lru_cache -from typing import List, Dict, Any, NoReturn -from onnx.numpy_helper import from_array, to_array -from onnx.onnx_ml_pb2 import TensorProto, ValueInfoProto, AttributeProto -from onnx.helper import make_attribute, make_node, make_graph, make_model -from skl2onnx.helpers.onnx_helper import enumerate_model_node_outputs, select_model_inputs_outputs, save_onnx_model - -# 修改递归深度限制 -sys.setrecursionlimit(100000) - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> enum OXDataType -# onnx类型枚举值 -class OXDataType(IntEnum): - float32 = 1 - uint8 = 2 - int8 = 3 - uint16 = 4 - int16 = 5 - int32 = 6 - int64 = 7 - string = 8 - bool = 9 - float16 = 10 - double = 11 - uint32 = 12 - uint64 = 13 - complex64 = 14 - complex128 = 15 - bfloat16 = 16 - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> calss GV -# 全局变量,GV = global variable -class GV: - # onnx和numpy数据类型映射字典 - ONNX_2_NUMPY_DATATYPE_DICT = { - 1: np.float32, - 2: np.uint8, - 3: np.int8, - 4: np.uint16, - 5: np.int16, - 6: np.int32, - 7: np.int64, - 9: np.bool_, - 10: np.float16, - 11: np.float64, - 12: np.uint32, - 13: np.uint64, - 14: np.complex64, - 15: np.complex128, - np.float32: 1, - np.uint8: 2, - np.int8: 3, - np.uint16: 4, - np.int16: 5, - np.int32: 6, - np.int64: 7, - np.bool_: 9, - np.float16: 10, - np.float64: 11, - np.uint32: 12, - np.uint64: 13, - np.complex64: 14, - np.complex128: 15, - 'tensor(float)': np.float32, - 'tensor(uint8)': np.uint8, - 'tensor(int8)': np.int8, - 'tensor(uint16)': np.uint16, - 'tensor(int16)': np.int16, - 'tensor(int32)': np.int32, - 'tensor(int64)': np.int64, - 'tensor(bool)': np.bool_, - 'tensor(float16)': np.float16, - 'tensor(double)': np.float64, - 'tensor(uint32)': np.uint32, - 'tensor(uint64)': np.uint64, - } - - # initializer,node索引字典(实现快速查找) - OXINITIALIZER_DICT = {} - OXNODE_DICT = {} - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> calss OXInitializer -class OXInitializer: - ''' - If you print a Initializer variable in the terminal, you will get something like this, and you can modify it - directly. - dims: 1 - data_type: 6 - name: '4239' - raw_data: '\000\000\000\00' - - @dims: google.protobuf.pyext._message.RepeatedScalarContainer - @data_type: int - @name: str - @raw_data: bytes - ''' - - def __init__(self, initializer: TensorProto): - self._initializer = initializer - - def __str__(self): - ndarray = to_array(self._initializer) - msg = 'name: ' + str(self._initializer.name) + '\n' + \ - 'dims: ' + str(self._initializer.dims) + '\n' + \ - 'data_type: ' + str(self._initializer.data_type) + '\n' + \ - 'dtype: ' + str(ndarray.dtype) + '\n' + \ - 'shape: ' + str(ndarray.shape) + '\n' + \ - 'ndarray:\n' + str(ndarray) - return msg - - def get_initializer(self) -> TensorProto: - return self._initializer - - def get_name(self) -> str: - ''' - 获取initializer的名字 - ''' - - return self._initializer.name - - def set_name(self, new_name) -> NoReturn: - ''' - 设置/修改initializer的名字 - ''' - - old_name = self._initializer.name - self._initializer.name = new_name - GV.OXINITIALIZER_DICT[new_name] = GV.OXINITIALIZER_DICT[old_name] - GV.OXINITIALIZER_DICT.pop(old_name) - - def get_data_type(self) -> int: - ''' - 获取initializer的数据类型 - ''' - - return self._initializer.data_type - - def set_data_type(self, ox_data_type: OXDataType) -> NoReturn: - ''' - 设置/修改initializer的数据类型 - ''' - - ndarray = to_array(self._initializer).astype(GV.ONNX_2_NUMPY_DATATYPE_DICT[int(ox_data_type)]) - self._initializer.raw_data = ndarray.tobytes() - self._initializer.data_type = int(ox_data_type) - - def get_data(self) -> np.ndarray: - ''' - 获取initializer的数据 - ''' - - return to_array(self._initializer) - - def set_data(self, ndarray: np.ndarray) -> NoReturn: - ''' - 设置/修改initializer的数据 - ''' - - self._initializer.raw_data = ndarray.tobytes() - self._initializer.data_type = GV.ONNX_2_NUMPY_DATATYPE_DICT[eval('np.' + str(ndarray.dtype))] - _clear_list(self._initializer.dims) - _extend_list(self._initializer.dims, ndarray.shape) - - def save_data(self, file_path: str) -> NoReturn: - ''' - 保存initializer的数据 - ''' - - np.save(file_path, to_array(self._initializer)) - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> calss OXNode -class OXNode: - ''' - If you print a NodeProto variable in the terminal, you will get something like this, and you can modify it directly. - input: '494' - input: 'fc.weight' - input: 'fc.bias' - output: 'class' - name: 'Gemm_121' - op_type: 'Gemm' - attribute { - name: 'alpha' - f: 1.0 - type: FLOAT - } - attribute { - name: 'beta' - f: 1.0 - type: FLOAT - } - attribute { - name: 'transB' - i: 1 - type: INT - } - - @input: google.protobuf.pyext._message.RepeatedScalarContainer - @output: google.protobuf.pyext._message.RepeatedScalarContainer - @name: str - @op_type: str - @attribute: google.protobuf.pyext._message.RepeatedCompositeContainer - ''' - - def __init__(self, node: NodeProto): - self._node = node - - def __str__(self): - return str(self._node) - - def get_node(self) -> NodeProto: - return self._node - - @property - def input(self): # -> google.protobuf.pyext._message.RepeatedScalarContainer - ''' - 获取节点的输入列表 - ''' - - return self._node.input - - @property - def output(self): # -> google.protobuf.pyext._message.RepeatedScalarContainer - ''' - 获取节点的输出列表 - ''' - - return self._node.output - - def get_name(self) -> str: - ''' - 获取节点的名字 - ''' - - return self._node.name - - def set_name(self, new_name) -> NoReturn: - ''' - 设置/修改节点的名字 - ''' - - old_name = self._node.name - self._node.name = new_name - GV.OXNODE_DICT[new_name] = GV.OXNODE_DICT[old_name] - GV.OXNODE_DICT.pop(old_name) - - def get_op_type(self) -> int: - ''' - 获取节点的类型 - ''' - - return self._node.op_type - - def set_op_type(self, op_type) -> NoReturn: - ''' - 设置/修改节点的类型 - ''' - - self._node.op_type = op_type - - def get_attribute(self): # -> google.protobuf.pyext._message.RepeatedCompositeContainer - ''' - 获取节点属性 - ''' - - return self._node.attribute - - def set_attribute(self, attr_name: str, attr_value: Any) -> AttributeProto: - ''' - 设置/修改节点属性 - - Args: - attr_name: 属性名字 - attr_value: 属性值 - - Returns: 修改后的属性 - ''' - - # 构造新attr - new_attr = make_attribute(attr_name, attr_value) - - # 删除旧的 - for attr in self._node.attribute: - if attr.name == attr_name: - self._node.attribute.remove(attr) - break - - # 添加新的 - self._node.attribute.append(new_attr) - - return new_attr - - def add_attribute(self, attr_name: str, attr_value: Any) -> AttributeProto: - ''' - 给节点增加新属性 - - Args: - attr_name: 属性名字 - attr_value: 属性值 - - Returns: 新增的属性 - ''' - - # 构造新attr - new_attr = make_attribute(attr_name, attr_value) - - # 增加 - self._node.attribute.append(new_attr) - - return new_attr - - def remove_attribute(self, attr_name: str) -> AttributeProto: - ''' - 删除节点的某个属性 - - Args: - attr_name: 属性名字 - attr_value: 属性值 - - Returns: 被删除的属性 - ''' - - for attr in self._node.attribute: - if attr.name == attr_name: - removed_attr = attr - self._node.attribute.remove(attr) - break - - return removed_attr - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> calss OXGraph -class OXGraph: - def __init__(self, model_path: str): - print('[INFO] Start initializing.') - start_time = datetime.now() - - self._model_path = model_path - self._model = onnx.load_model(model_path) - self._graph = self._model.graph - self._initializer = self._graph.initializer - self._node = self._graph.node - self._input_tensor_2_oxnode_dict = {} - self._output_tensor_2_oxnode_dict = {} - - # initializer - for initializer in self._initializer: - GV.OXINITIALIZER_DICT[initializer.name] = OXInitializer(initializer) - - # node - for idx, node in enumerate(self._node): - oxnode = OXNode(node) - GV.OXNODE_DICT[node.name] = oxnode - - # 创建tensor_2_oxnode字典 - self._update_tensor_2_oxnode_dict( - self._input_tensor_2_oxnode_dict, - self._output_tensor_2_oxnode_dict, - ) - - # 获取所有tensor信息 - try: - self._all_tensor_info = self.get_all_tensor_info() - except: - os.remove(os.path.join(os.path.dirname(self._model_path), 'temp.onnx')) - print('[WARNING] There are custom operators in the model, ' - 'and these functions are not available: get_input_tensor_info()、get_output_tensor_info()、' - 'get_all_tensor_info()、infer_shape()、dump_all_node_data()、trunc_model().') - - # 屏蔽check_model - def check_model(model): - pass - - onnx.checker.check_model = check_model - - end_time = datetime.now() - cost_time = (end_time - start_time).seconds - print('[INFO] Initialization completed! Cost {} seconds.'.format(cost_time)) - - def __str__(self): - return str(self._model) - - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Initializer相关函数 - def get_oxinitializer_by_name(self, oxinitializer_name: str, can_return_none: bool = False) -> OXInitializer: - ''' - 根据initializer的名字获取OXInitializer - ''' - - if oxinitializer_name not in GV.OXINITIALIZER_DICT: - if can_return_none is True: - return None - else: - raise RuntimeError('[ERROR] {} not found.'.format(oxinitializer_name)) - return GV.OXINITIALIZER_DICT[oxinitializer_name] - - def add_initializer(self, initializer_name: str, ndarray: np.ndarray) -> OXInitializer: - ''' - 向模型中新增一个initializer - - Args: - initializer_name: initializer的名字 - ndarray: initializer的数据 - - Returns: 新增的OXInitializer - ''' - - if initializer_name in GV.OXINITIALIZER_DICT: - raise RuntimeError( - '[ERROR] {} has already exists in the model, please use a different name!'.format(initializer_name)) - - initializer = from_array(ndarray, initializer_name) - self._initializer.append(initializer) # 这里是复制,而不是引用,id已经变了 - initializer = self._initializer[-1] - oxinitializer = OXInitializer(initializer) - GV.OXINITIALIZER_DICT[initializer_name] = oxinitializer - - return oxinitializer - - def remove_initializer(self, initializer_name: str) -> OXInitializer: - ''' - 从模型中删除指定的initializer - - Args: - initializer_name: initializer的名字 - - Returns: 删除的OXInitializer - ''' - - oxinitializer = self.get_oxinitializer_by_name(initializer_name) - GV.OXINITIALIZER_DICT.pop(initializer_name) - self._initializer.remove(oxinitializer.get_initializer()) - - return oxinitializer - - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Node相关函数 - def get_oxnode_by_name(self, oxnode_name: str, can_return_none: bool = False) -> OXNode: - ''' - 根据节点名字获取OXNode - ''' - - if oxnode_name not in GV.OXNODE_DICT: - if can_return_none is True: - return None - else: - raise RuntimeError('[ERROR] {} not found.'.format(oxnode_name)) - return GV.OXNODE_DICT[oxnode_name] - - def get_oxnode_by_op_type(self, op_type: str) -> List[OXNode]: - ''' - 根据节点类型获取OXNode - ''' - - res = set() - for oxnode in GV.OXNODE_DICT.values(): - if oxnode.get_op_type() == op_type: - res.add(oxnode) - return list(res) - - def get_oxnode_whose_input_contain_this(self, input_name: str) -> List[OXNode]: - ''' - 遍历所有OXNode,获取输入包含`input_name`的那些OXNode - ''' - - res = set() - for oxnode in GV.OXNODE_DICT.values(): - for oxinput_name in oxnode.input: - if oxinput_name == input_name: - res.add(oxnode) - break - return list(res) - - def get_oxnode_whose_output_contain_this(self, output_name: str) -> List[OXNode]: - ''' - 遍历所有OXNode,获取输出包含`output_name`的那些OXNode - ''' - - res = set() - for oxnode in GV.OXNODE_DICT.values(): - for oxoutput_name in oxnode.output: - if oxoutput_name == output_name: - res.add(oxnode) - break - return list(res) - - def get_previous_oxnode(self, oxnode_name: str) -> List[OXNode]: - ''' - 获取一个节点的前驱节点 - ''' - - res = set() - inputs = self.get_oxnode_by_name(oxnode_name).input - for input in inputs: - oxnode_set = self._output_tensor_2_oxnode_dict.get(input) - if oxnode_set is not None: - res.update(oxnode_set) - return list(res) - - def get_next_oxnode(self, oxnode_name: str) -> List[OXNode]: - ''' - 获取一个节点的后继节点 - ''' - - res = set() - outputs = self.get_oxnode_by_name(oxnode_name).output - for output in outputs: - oxnode_set = self._input_tensor_2_oxnode_dict.get(output) - if oxnode_set is not None: - res.update(oxnode_set) - return list(res) - - def insert_node(self, bef_node_info_list: List[Dict], aft_node_info_list: List[Dict], op_type: str, op_name: str, - **attributes: Dict) -> OXNode: - ''' - 向模型中插入新节点,并自动连边,注意和`add_node`的区别 - - 限制:无法涵盖所有场景,若结果不符合预期,请用`add_node`函数,并手动指定连边关系。 - - Args: - bef_node_info_list:参见README.md用例 - aft_node_info_list:参见README.md用例 - op_type:节点的类型 - op_name:节点的名字 - attributes:节点的属性 - - Returns: 插入的OXNode - ''' - - # 校验插入的节点是否已经存在 - if op_name in GV.OXNODE_DICT: - raise RuntimeError( - '[ERROR] {} has already exists in the model, please use a different name!'.format(op_name)) - - # 解析信息 - bef_node_info_list, aft_node_info_list = self._parse_insert_node_info(bef_node_info_list, aft_node_info_list) - - # 插入节点 - # + 构造新节点的输入 - new_node_input = [] - for bef_node_info in bef_node_info_list: - oxnode = self.get_oxnode_by_name(bef_node_info['bef_node_name'], True) - if oxnode is None: # 说明此节点是模型的输入节点 - new_node_input.append(bef_node_info['bef_node_name']) - else: - for idx in bef_node_info['link_output_idx']: - if oxnode.output[idx] in self.get_output_tensor_info().keys(): # 说明此节点紧接模型的输出节点 - oxnode.output[idx] = oxnode.get_name() + '_m_' + str(idx) - new_node_input.append(oxnode.output[idx]) - - # + 构造新节点的输出 - new_node_output = [op_name + '_0'] - - # + 构造新节点 - insert_oxnode = self.add_node(op_type=op_type, - op_name=op_name, - inputs=new_node_input, - outputs=new_node_output, - **attributes) - - # 和后继节点连边 - for aft_node_info in aft_node_info_list: - oxnode = self.get_oxnode_by_name(aft_node_info['aft_node_name'], True) - if oxnode is None: # 说明此节点是模型的输出节点 - if len(aft_node_info_list) != 1: - raise RuntimeError('[ERROR] Please check aft_node_info_list!') - - # 修改insert_oxnode的输出为模型的输出节点 - insert_oxnode.output[0] = aft_node_info['aft_node_name'] - else: - for idx in aft_node_info['link_input_idx']: - oxnode.input[idx] = new_node_output[0] - - # 更新tensor_2_oxnode字典 - self._update_tensor_2_oxnode_dict( - self._input_tensor_2_oxnode_dict, - self._output_tensor_2_oxnode_dict, - ) - - return insert_oxnode - - def add_node(self, op_type: str, op_name: str, inputs: List[str], outputs: List[str], **attributes: Dict) -> OXNode: - ''' - 向模型中增加新节点,不会自动连边,注意和`insert_node`的区别 - - Args: - op_type:节点的类型 - op_name:节点的名字 - inputs:节点的输入 - outputs:节点的输出 - attributes:节点的属性 - - Returns: 新增的OXNode - ''' - - if op_name in GV.OXNODE_DICT: - raise RuntimeError( - '[ERROR] {} has already exists in the model, please use a different name!'.format(op_name)) - - new_node = make_node(op_type=op_type, name=op_name, inputs=inputs, outputs=outputs, **attributes) - self._node.append(new_node) # 这里复制,而不是用引用,id已经变了 - new_node = self._node[-1] - new_oxnode = OXNode(new_node) - GV.OXNODE_DICT[new_oxnode.get_name()] = new_oxnode - - # 更新tensor_2_oxnode字典 - self._update_tensor_2_oxnode_dict( - self._input_tensor_2_oxnode_dict, - self._output_tensor_2_oxnode_dict, - ) - - return new_oxnode - - def remove_node(self, node_name: str, auto_link: bool = True) -> OXNode: - ''' - 从模型中删除节点 - - 限制:若开启自动连边,则删除的节点必须只有一个前驱节点,否则需要手动连边。若结果不符合预期,也需要自己手动连边。 - - Args: - node_name:要删除的节点名字 - auto_link:是否自动连边 - - Returns: 删除的OXNode - ''' - - if node_name not in GV.OXNODE_DICT: - raise RuntimeError('[ERROR] {} not found.'.format(node_name)) - - if auto_link is False: - oxnode = self.get_oxnode_by_name(node_name) - else: - oxnode = self.get_oxnode_by_name(node_name) - previous_node = self.get_previous_oxnode(node_name) - next_node = self.get_next_oxnode(node_name) - - if len(previous_node) > 1: - raise RuntimeError('[ERROR] Remove node can only have one previous node.') - - _clear_list(previous_node[0].output) - _extend_list(previous_node[0].output, oxnode.output) - - # 删除节点 - GV.OXNODE_DICT.pop(node_name) - self._node.remove(oxnode.get_node()) - - # 更新tensor_2_oxnode字典 - self._update_tensor_2_oxnode_dict( - self._input_tensor_2_oxnode_dict, - self._output_tensor_2_oxnode_dict, - ) - - return oxnode - - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 输入输出相关函数 - def get_input_tensor_info(self) -> Dict: - ''' - 获取模型输入tensor的信息 - 信息包括:tensor名字、shape、类型 - - Returns: {'tensor_name': {'shape': np.shape, 'dtype': np.dtype}, ...} - ''' - - session = onnxruntime.InferenceSession(self._model_path) - - input_tensor_info = {} - for input_item in session.get_inputs(): - input_tensor_info[input_item.name] = { - 'shape': tuple(input_item.shape), - 'dtype': GV.ONNX_2_NUMPY_DATATYPE_DICT[input_item.type] - } - - return input_tensor_info - - def get_output_tensor_info(self) -> Dict: - ''' - 获取模型输出tensor的信息 - 信息包括:tensor名字、shape、类型 - - Returns: {'tensor_name': {'shape': np.shape, 'dtype': np.dtype}, ...} - ''' - - session = onnxruntime.InferenceSession(self._model_path) - - output_tensor_info = {} - for output_item in session.get_outputs(): - output_tensor_info[output_item.name] = { - 'shape': tuple(output_item.shape), - 'dtype': GV.ONNX_2_NUMPY_DATATYPE_DICT[output_item.type] - } - - return output_tensor_info - - def get_all_tensor_info(self) -> Dict: - ''' - 获取模型中所有tensor的信息 - 所有tensor包括:模型输入tensor、模型输出tensor、模型中间tensor - 信息包括:tensor名字、shape、类型 - - Returns: {'tensor_name': {'shape': np.shape, 'dtype': np.dtype}, ...} - ''' - - old_onnx_model = onnx.load(self._model_path) - - output_name = [] - for name in enumerate_model_node_outputs(old_onnx_model): - output_name.append(name) - - new_onnx_model = select_model_inputs_outputs(old_onnx_model, output_name) - new_model_path = os.path.join(os.path.dirname(self._model_path), 'temp.onnx') - save_onnx_model(new_onnx_model, new_model_path) - - session = onnxruntime.InferenceSession(new_model_path) - os.remove(new_model_path) - - all_tensor_info = {} - - for input_item in session.get_inputs(): - all_tensor_info[input_item.name] = { - 'shape': tuple(input_item.shape), - 'dtype': GV.ONNX_2_NUMPY_DATATYPE_DICT[input_item.type] - } - - for output_item in session.get_outputs(): - all_tensor_info[output_item.name] = { - 'shape': tuple(output_item.shape), - 'dtype': GV.ONNX_2_NUMPY_DATATYPE_DICT[output_item.type] - } - - for oxinitializer in GV.OXINITIALIZER_DICT.values(): - all_tensor_info[oxinitializer.get_name()] = { - 'shape': oxinitializer.get_data().shape, - 'dtype': eval('np.' + str(oxinitializer.get_data().dtype)) - } - - return all_tensor_info - - def infer_shape(self, input_data_info_list: List[Dict]) -> Dict: - ''' - 推导模型各个算子的输出shape信息。 - - 用途:有些模型从onnx图中无法看出算子输出shape信息,也无法获取shape信息,通过此函数可以推导出shape信息。 - - 原理:用真实数据运行一遍模型,记录各个算子的输出shape信息。 - - Args: - input_data_info_list: - [ - { - 'model_input_name': 'input1_name', - 'shape': '(1, 3, 224, 224)', - 'dtype': 'np.float32' - }, - { - 'model_input_name': 'input2_name', - 'shape': '(1, 3, 224, 224)', - 'dtype': 'np.float32' - } - ] - - Returns: {'op_name': {'shape': np.shape, 'dtype': np.dtype}, ...} - ''' - - # 构造输入数据 - input_data_dict = {} - for input_data_info in input_data_info_list: - input_data_dict[input_data_info['model_input_name']] = np.full(eval(input_data_info['shape']), - 1, - dtype=eval(input_data_info['dtype'])) - - # 修改模型,增加输出节点 - old_onnx_model = onnx.load(self._model_path) - output = [] - for out in enumerate_model_node_outputs(old_onnx_model): - output.append(out) - new_onnx_model = select_model_inputs_outputs(old_onnx_model, outputs=output) - onnx_save_path = './temp.onnx' - save_onnx_model(new_onnx_model, onnx_save_path) - - # 推理得到输出 - sess = onnxruntime.InferenceSession(onnx_save_path) - os.remove(onnx_save_path) - output_name = [node.name for node in sess.get_outputs()] - res = sess.run(output_name, input_data_dict) - - # 保存数据 - infer_tensor_info = {} - idx = 0 - for node in old_onnx_model.graph.node: - for i in range(len(node.output)): - infer_tensor_info[node.name] = {'output_idx': i, 'shape': res[idx].shape, 'dtype': res[idx].dtype} - idx += 1 - - return infer_tensor_info - - def dump_all_node_data(self, input_data_info_list: List[Dict], dump_data_save_path: str) -> NoReturn: - ''' - dump模型所有节点的数据 - - Args: - input_data_info_list: - [ - { - 'model_input_name': 'input1_name', - 'npy_file_path': './0.npy', - }, - { - 'model_input_name': 'input2_name', - 'npy_file_path': './1.npy', - }, - ] - dump_data_save_path: e.g. './dump_data' - - Returns: NoReturn - ''' - - # 创建目录 - if os.path.exists(dump_data_save_path): - shutil.rmtree(dump_data_save_path) - os.makedirs(dump_data_save_path) - - # 修改模型,增加输出节点 - old_onnx_model = onnx.load(self._model_path) - output = [] - for out in enumerate_model_node_outputs(old_onnx_model): - output.append(out) - new_onnx_model = select_model_inputs_outputs(old_onnx_model, outputs=output) - onnx_save_path = os.path.join(dump_data_save_path, "./temp.onnx") - save_onnx_model(new_onnx_model, onnx_save_path) - - # 获取输入数据 - input_data_dict = {} - for input_data_info in input_data_info_list: - input_data_dict[input_data_info['model_input_name']] = np.load(input_data_info['npy_file_path']) - - # 推理得到输出 - sess = onnxruntime.InferenceSession(onnx_save_path) - os.remove(onnx_save_path) - output_name = [node.name for node in sess.get_outputs()] - res = sess.run(output_name, input_data_dict) - - # 保存数据 - idx = 0 - for node in old_onnx_model.graph.node: - for i in range(len(node.output)): - file_name = node.name + "." + str(i) + "." + str(round(time.time() * 1000000)) + ".npy" - data_save_path = os.path.join(dump_data_save_path, file_name) - np.save(data_save_path, res[idx]) - idx += 1 - - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 截图函数 - def extract_model(self, input_tensor_name_list: List[str], output_tensor_name_list: List[str], - new_model_save_path: str) -> NoReturn: - ''' - 从onnx 1.8.1开始,onnx官方提供了截图函数,此函数是对官方`onnx.utils.extract_model`函数的封装, - 以使其集成到`OXGraph`类中。另外,此函数屏蔽了`check_model`操作,使包含自定义算子的onnx提取子图后 - 在保存模型时跳过检查操作,使之可以顺利保存。以下是官方`onnx.utils.extract_model`函数的说明: - - Extracts sub-model from an ONNX model. - - The sub-model is defined by the names of the input and output tensors *exactly*. - - Note: For control-flow operators, e.g. If and Loop, the _boundary of sub-model_, - which is defined by the input and output tensors, should not _cut through_ the - subgraph that is connected to the _main graph_ as attributes of these operators. - - Arguments: - input_path (string): The path to original ONNX model. - output_path (string): The path to save the extracted ONNX model. - input_names (list of string): The names of the input tensors that to be extracted. - output_names (list of string): The names of the output tensors that to be extracted. - ''' - - print('[INFO] Begin to extract the model.') - start_time = datetime.now() - onnx.utils.extract_model(self._model_path, new_model_save_path, input_tensor_name_list, output_tensor_name_list) - end_time = datetime.now() - cost_time = (end_time - start_time).seconds - print('[INFO] Extract model completed! Cost {} seconds.'.format(cost_time)) - - def trunc_model(self, - trunc_beg_node_name_list: List[str], - trunc_end_node_name_list: List[str], - new_model_save_path: str, - keep_input_initializer: bool = False, - userdef_trunc_beg_node_info_list: List[Dict] = None) -> NoReturn: - ''' - 截取一段模型 - - 用途:可以用来单独验证某段网络的精度 - - 注意: - 从onnx 1.8.1开始,onnx官方提供了截图函数,若onnx版本>=1.8.1,请使用`extract_model`函数。 - `extract_model`函数是对官方`onnx.utils.extract_model`函数的封装,以使其集成到`OXGraph`类中。 - 此`trunc_model`函数是自己写的,功能可能有缺陷,但截图速度一般来说更快,模型较大时可以对比尝试。 - ''' - - print('[WARNING] 从onnx 1.8.1开始,onnx官方提供了截图函数,若onnx版本>=1.8.1,请使用`extract_model`函数。' - '`extract_model`函数是对官方`onnx.utils.extract_model`函数的封装,以使其集成到`OXGraph`类中。' - '此`trunc_model`函数是自己写的,功能可能有缺陷,但截图速度一般来说更快,模型较大时可以对比尝试。') - - print('[INFO] Begin to truncate the model.') - start_time = datetime.now() - - # 修改输出节点 - new_output = [] - for elem in trunc_end_node_name_list: - output = self.get_oxnode_by_name(elem).output - new_output.extend(x for x in output) - new_onnx = select_model_inputs_outputs(self._model, outputs=new_output) - save_onnx_model(new_onnx, new_model_save_path) - - # 加载模型 - model = onnx.load_model(new_model_save_path) - graph = model.graph - nodes = graph.node - initializers = graph.initializer - - # 搜索节点 - def find_trunc_beg_node(node_name): - is_find = False - for node in nodes: - if node.name == node_name: - trunc_beg_node = node - is_find = True - break - if is_find is True: - return trunc_beg_node - else: - raise RuntimeError('[ERROR] {} not found.'.format(node_name)) - - # 获取trunc_beg_node详细信息,构造一个这样的list: - ''' - [ - { - 'trunc_beg_node': node, - 'new_input_info_list': [ - { - 'input_name': 'input_A', - 'dtype': OXDataType.float32, - 'shape': (1, 256, 56, 56), - 'input_idx': 0 - }, - { - 'input_name': 'input_B', - 'dtype': OXDataType.float32, - 'shape': (1, 256, 56, 56), - 'input_idx': 1 - } - ] - } - ] - ''' - if userdef_trunc_beg_node_info_list is None: - trunc_beg_node_info_list = [] - initializer_name_set = set() - initializer_name_set.update([oxinitializer.get_name() for oxinitializer in GV.OXINITIALIZER_DICT.values()]) - count = 0 - for trunc_beg_node_name in trunc_beg_node_name_list: - trunc_beg_node = find_trunc_beg_node(trunc_beg_node_name) - new_input_info_list = [] - for idx, input in enumerate(trunc_beg_node.input): - if (keep_input_initializer is True) and (input in initializer_name_set): - continue - else: - new_input_info = {} - new_input_info['input_name'] = 'new_input_' + str(count) - count += 1 - new_input_info['dtype'] = GV.ONNX_2_NUMPY_DATATYPE_DICT[self._all_tensor_info[input]['dtype']] - new_input_info['shape'] = self._all_tensor_info[input]['shape'] - new_input_info['input_idx'] = idx - new_input_info_list.append(new_input_info) - trunc_beg_node_info = {} - trunc_beg_node_info['trunc_beg_node'] = trunc_beg_node - trunc_beg_node_info['new_input_info_list'] = new_input_info_list - trunc_beg_node_info_list.append(trunc_beg_node_info) - else: - trunc_beg_node_info_list = userdef_trunc_beg_node_info_list - - # 构造新输入 - new_inputs = [] - for trunc_beg_node_info in trunc_beg_node_info_list: - if userdef_trunc_beg_node_info_list is None: - trunc_begin_node = trunc_beg_node_info['trunc_beg_node'] - else: - trunc_begin_node = find_trunc_beg_node(trunc_beg_node_info['trunc_beg_node_name']) - for new_input_info in trunc_beg_node_info['new_input_info_list']: - new_input = self._make_new_input(new_input_info['input_name'], new_input_info['dtype'], - new_input_info['shape']) - new_inputs.append(new_input) - trunc_begin_node.input[new_input_info['input_idx']] = new_input_info['input_name'] - - # 查找有用节点 - useful_node_name_set = set() - useful_node_name_set.update(trunc_beg_node_name_list) - useful_node_name_set.update(trunc_end_node_name_list) - - # + 正向查找 - @lru_cache() - def find_useful_node(next_node_name_tuple): - for next_node_name in next_node_name_tuple: - if next_node_name not in trunc_end_node_name_list: - output_oxnode_list = self.get_next_oxnode(next_node_name) - output_oxnode_name_tuple = tuple([oxnode.get_name() for oxnode in output_oxnode_list]) - useful_node_name_set.update(output_oxnode_name_tuple) - find_useful_node(output_oxnode_name_tuple) - - # + 反向查找 - @lru_cache() - def find_useful_node_reverse(next_node_name_tuple): - for next_node_name in next_node_name_tuple: - if next_node_name not in trunc_beg_node_name_list: - input_oxnode_list = self.get_previous_oxnode(next_node_name) - input_oxnode_name_tuple = tuple([oxnode.get_name() for oxnode in input_oxnode_list]) - useful_node_name_set.update(input_oxnode_name_tuple) - find_useful_node_reverse(input_oxnode_name_tuple) - - # + 正向和反向都查找一遍,防止漏查 - find_useful_node(tuple(trunc_beg_node_name_list)) - find_useful_node_reverse(tuple(trunc_end_node_name_list)) - - # 删除多余节点 - for node in copy.deepcopy(nodes): - if node.name not in useful_node_name_set: - nodes.remove(node) - - # 删除多余输入 - _clear_list(graph.input) - _extend_list(graph.input, new_inputs) - - # 删除多余Initializer - all_input = set() - for node in nodes: - all_input.update(node.input) - for initializer in copy.deepcopy(initializers): - if initializer.name not in all_input: - initializers.remove(initializer) - - # 保存模型 - name = 'Extracted from {' + self._graph.name + '}' - graph = make_graph(nodes, - name, - graph.input, - graph.output, - initializer=initializers, - value_info=graph.value_info) - meta = { - 'ir_version': self._model.ir_version, - 'opset_imports': self._model.opset_import, - 'producer_name': 'OXGraph.trunc_model()', - } - new_mode = make_model(graph, **meta) - onnx.save(new_mode, new_model_save_path) - end_time = datetime.now() - cost_time = (end_time - start_time).seconds - print('[INFO] Truncate model completed! Cost {} seconds.'.format(cost_time)) - - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 保存模型 - def save_new_model(self, new_model_path) -> NoReturn: - ''' - 保存修改后的模型 - ''' - - onnx.save_model(self._model, new_model_path) - - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 私有函数 - def _update_tensor_2_oxnode_dict(self, input_tensor_2_oxnode_dict, output_tensor_2_oxnode_dict) -> NoReturn: - # 清空字典 - input_tensor_2_oxnode_dict.clear() - output_tensor_2_oxnode_dict.clear() - - # 创建字典 - for oxnode in GV.OXNODE_DICT.values(): - inputs = oxnode.input - outputs = oxnode.output - for input in inputs: - input_tensor_2_oxnode_dict.setdefault(input, set()).add(oxnode) - for output in outputs: - output_tensor_2_oxnode_dict.setdefault(output, set()).add(oxnode) - - def _make_new_input(self, new_input_name: str, ox_data_type: OXDataType, shape: tuple) -> ValueInfoProto: - ''' - If you print the model input in the terminal, you will get something like this, and you can modify it directly. - `dim_param` means dynamic shape. - [name: 'image' - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_param: '-1' - } - dim { - dim_value: 3 - } - dim { - dim_value: 224 - } - dim { - dim_value: 224 - } - } - } - } - ] - ''' - - new_input = copy.deepcopy(self._graph.input[0]) - new_input.name = new_input_name - new_input.type.tensor_type.elem_type = int(ox_data_type) - - dim_diff = len(shape) - len(new_input.type.tensor_type.shape.dim) - if dim_diff > 0: - for i in range(dim_diff): - new_input.type.tensor_type.shape.dim.append(copy.deepcopy(new_input.type.tensor_type.shape.dim[0])) - elif dim_diff < 0: - for i in range(abs(dim_diff)): - new_input.type.tensor_type.shape.dim.pop() - - for index in range(len(shape)): - if isinstance(shape[index], str): - new_input.type.tensor_type.shape.dim[index].dim_param = shape[index] - elif shape[index] is None: - new_input.type.tensor_type.shape.dim[index].dim_param = '-1' - print('[WARNING] Can not infer tensor shape, set it to "-1" here, which may cause an error! ' - 'Please specify `userdef_trunc_beg_node_info_list` parameters and retry.') - else: - new_input.type.tensor_type.shape.dim[index].dim_value = shape[index] - - return new_input - - def _parse_insert_node_info(self, bef_node_info_list, aft_node_info_list): - ''' - parse bef_node_info_list = ['Relu_1:0'] and aft_node_info_list = ['MaxPool_2:0'] into: - - bef_node_info_list=[{ - 'bef_node_name': 'Relu_1', - 'link_output_idx': [0] - }] - - aft_node_info_list=[{ - 'aft_node_name': 'MaxPool_2', - 'link_input_idx': [0] - }] - - 默认的`:0`可以省略 - ''' - - # 变量定义 - new_bef_node_info_list = [] - new_aft_node_info_list = [] - - # 解析bef_node_info_list - for bef_node_info in bef_node_info_list: - bef_node_info_dict = {} - info_list = bef_node_info.split(':') - bef_node_info_dict['bef_node_name'] = info_list[0] - if len(info_list) == 1: - bef_node_info_dict['link_output_idx'] = [0] - else: - bef_node_info_dict['link_output_idx'] = [int(elem) for idx, elem in enumerate(info_list) if idx > 0] - new_bef_node_info_list.append(bef_node_info_dict) - - # 解析aft_node_info_list - for aft_node_info in aft_node_info_list: - aft_node_info_dict = {} - info_list = aft_node_info.split(':') - aft_node_info_dict['aft_node_name'] = info_list[0] - if len(info_list) == 1: - aft_node_info_dict['link_input_idx'] = [0] - else: - aft_node_info_dict['link_input_idx'] = [int(elem) for idx, elem in enumerate(info_list) if idx > 0] - new_aft_node_info_list.append(aft_node_info_dict) - - return new_bef_node_info_list, new_aft_node_info_list - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 公共函数 -def _clear_list(list) -> NoReturn: - ''' - 清空RepeatedScalarContainer或RepeatedCompositeContainer列表 - ''' - - list_len = len(list) - for _ in range(list_len): - list.pop() - - -def _extend_list(list, what_to_add) -> NoReturn: - ''' - 扩展RepeatedScalarContainer或RepeatedCompositeContainer列表 - ''' - - for elem in what_to_add: - list.append(elem) +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + +''' +环境: + python==3.8.5 + onnx==1.8.1 + onnxruntime==1.7.0 + skl2onnx==1.8.0 + numpy==1.19.5 +''' + +import os +import sys +import onnx +import copy +import time +import shutil +import numpy as np +import onnxruntime + +from enum import IntEnum +from onnx import NodeProto +from datetime import datetime +from functools import lru_cache +from typing import List, Dict, Any, NoReturn +from onnx.numpy_helper import from_array, to_array +from onnx.onnx_ml_pb2 import TensorProto, ValueInfoProto, AttributeProto +from onnx.helper import make_attribute, make_node, make_graph, make_model +from skl2onnx.helpers.onnx_helper import enumerate_model_node_outputs, select_model_inputs_outputs, save_onnx_model + +# 修改递归深度限制 +sys.setrecursionlimit(100000) + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> enum OXDataType +# onnx类型枚举值 +class OXDataType(IntEnum): + float32 = 1 + uint8 = 2 + int8 = 3 + uint16 = 4 + int16 = 5 + int32 = 6 + int64 = 7 + string = 8 + bool = 9 + float16 = 10 + double = 11 + uint32 = 12 + uint64 = 13 + complex64 = 14 + complex128 = 15 + bfloat16 = 16 + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> calss GV +# 全局变量,GV = global variable +class GV: + # onnx和numpy数据类型映射字典 + ONNX_2_NUMPY_DATATYPE_DICT = { + 1: np.float32, + 2: np.uint8, + 3: np.int8, + 4: np.uint16, + 5: np.int16, + 6: np.int32, + 7: np.int64, + 9: np.bool_, + 10: np.float16, + 11: np.float64, + 12: np.uint32, + 13: np.uint64, + 14: np.complex64, + 15: np.complex128, + np.float32: 1, + np.uint8: 2, + np.int8: 3, + np.uint16: 4, + np.int16: 5, + np.int32: 6, + np.int64: 7, + np.bool_: 9, + np.float16: 10, + np.float64: 11, + np.uint32: 12, + np.uint64: 13, + np.complex64: 14, + np.complex128: 15, + 'tensor(float)': np.float32, + 'tensor(uint8)': np.uint8, + 'tensor(int8)': np.int8, + 'tensor(uint16)': np.uint16, + 'tensor(int16)': np.int16, + 'tensor(int32)': np.int32, + 'tensor(int64)': np.int64, + 'tensor(bool)': np.bool_, + 'tensor(float16)': np.float16, + 'tensor(double)': np.float64, + 'tensor(uint32)': np.uint32, + 'tensor(uint64)': np.uint64, + } + + # initializer,node索引字典(实现快速查找) + OXINITIALIZER_DICT = {} + OXNODE_DICT = {} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> calss OXInitializer +class OXInitializer: + ''' + If you print a Initializer variable in the terminal, you will get something like this, and you can modify it + directly. + dims: 1 + data_type: 6 + name: '4239' + raw_data: '\000\000\000\00' + + @dims: google.protobuf.pyext._message.RepeatedScalarContainer + @data_type: int + @name: str + @raw_data: bytes + ''' + + def __init__(self, initializer: TensorProto): + self._initializer = initializer + + def __str__(self): + ndarray = to_array(self._initializer) + msg = 'name: ' + str(self._initializer.name) + '\n' + \ + 'dims: ' + str(self._initializer.dims) + '\n' + \ + 'data_type: ' + str(self._initializer.data_type) + '\n' + \ + 'dtype: ' + str(ndarray.dtype) + '\n' + \ + 'shape: ' + str(ndarray.shape) + '\n' + \ + 'ndarray:\n' + str(ndarray) + return msg + + def get_initializer(self) -> TensorProto: + return self._initializer + + def get_name(self) -> str: + ''' + 获取initializer的名字 + ''' + + return self._initializer.name + + def set_name(self, new_name) -> NoReturn: + ''' + 设置/修改initializer的名字 + ''' + + old_name = self._initializer.name + self._initializer.name = new_name + GV.OXINITIALIZER_DICT[new_name] = GV.OXINITIALIZER_DICT[old_name] + GV.OXINITIALIZER_DICT.pop(old_name) + + def get_data_type(self) -> int: + ''' + 获取initializer的数据类型 + ''' + + return self._initializer.data_type + + def set_data_type(self, ox_data_type: OXDataType) -> NoReturn: + ''' + 设置/修改initializer的数据类型 + ''' + + ndarray = to_array(self._initializer).astype(GV.ONNX_2_NUMPY_DATATYPE_DICT[int(ox_data_type)]) + self._initializer.raw_data = ndarray.tobytes() + self._initializer.data_type = int(ox_data_type) + + def get_data(self) -> np.ndarray: + ''' + 获取initializer的数据 + ''' + + return to_array(self._initializer) + + def set_data(self, ndarray: np.ndarray) -> NoReturn: + ''' + 设置/修改initializer的数据 + ''' + + self._initializer.raw_data = ndarray.tobytes() + self._initializer.data_type = GV.ONNX_2_NUMPY_DATATYPE_DICT[eval('np.' + str(ndarray.dtype))] + _clear_list(self._initializer.dims) + _extend_list(self._initializer.dims, ndarray.shape) + + def save_data(self, file_path: str) -> NoReturn: + ''' + 保存initializer的数据 + ''' + + np.save(file_path, to_array(self._initializer)) + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> calss OXNode +class OXNode: + ''' + If you print a NodeProto variable in the terminal, you will get something like this, and you can modify it directly. + input: '494' + input: 'fc.weight' + input: 'fc.bias' + output: 'class' + name: 'Gemm_121' + op_type: 'Gemm' + attribute { + name: 'alpha' + f: 1.0 + type: FLOAT + } + attribute { + name: 'beta' + f: 1.0 + type: FLOAT + } + attribute { + name: 'transB' + i: 1 + type: INT + } + + @input: google.protobuf.pyext._message.RepeatedScalarContainer + @output: google.protobuf.pyext._message.RepeatedScalarContainer + @name: str + @op_type: str + @attribute: google.protobuf.pyext._message.RepeatedCompositeContainer + ''' + + def __init__(self, node: NodeProto): + self._node = node + + def __str__(self): + return str(self._node) + + def get_node(self) -> NodeProto: + return self._node + + @property + def input(self): # -> google.protobuf.pyext._message.RepeatedScalarContainer + ''' + 获取节点的输入列表 + ''' + + return self._node.input + + @property + def output(self): # -> google.protobuf.pyext._message.RepeatedScalarContainer + ''' + 获取节点的输出列表 + ''' + + return self._node.output + + def get_name(self) -> str: + ''' + 获取节点的名字 + ''' + + return self._node.name + + def set_name(self, new_name) -> NoReturn: + ''' + 设置/修改节点的名字 + ''' + + old_name = self._node.name + self._node.name = new_name + GV.OXNODE_DICT[new_name] = GV.OXNODE_DICT[old_name] + GV.OXNODE_DICT.pop(old_name) + + def get_op_type(self) -> int: + ''' + 获取节点的类型 + ''' + + return self._node.op_type + + def set_op_type(self, op_type) -> NoReturn: + ''' + 设置/修改节点的类型 + ''' + + self._node.op_type = op_type + + def get_attribute(self): # -> google.protobuf.pyext._message.RepeatedCompositeContainer + ''' + 获取节点属性 + ''' + + return self._node.attribute + + def set_attribute(self, attr_name: str, attr_value: Any) -> AttributeProto: + ''' + 设置/修改节点属性 + + Args: + attr_name: 属性名字 + attr_value: 属性值 + + Returns: 修改后的属性 + ''' + + # 构造新attr + new_attr = make_attribute(attr_name, attr_value) + + # 删除旧的 + for attr in self._node.attribute: + if attr.name == attr_name: + self._node.attribute.remove(attr) + break + + # 添加新的 + self._node.attribute.append(new_attr) + + return new_attr + + def add_attribute(self, attr_name: str, attr_value: Any) -> AttributeProto: + ''' + 给节点增加新属性 + + Args: + attr_name: 属性名字 + attr_value: 属性值 + + Returns: 新增的属性 + ''' + + # 构造新attr + new_attr = make_attribute(attr_name, attr_value) + + # 增加 + self._node.attribute.append(new_attr) + + return new_attr + + def remove_attribute(self, attr_name: str) -> AttributeProto: + ''' + 删除节点的某个属性 + + Args: + attr_name: 属性名字 + attr_value: 属性值 + + Returns: 被删除的属性 + ''' + + for attr in self._node.attribute: + if attr.name == attr_name: + removed_attr = attr + self._node.attribute.remove(attr) + break + + return removed_attr + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> calss OXGraph +class OXGraph: + def __init__(self, model_path: str): + print('[INFO] Start initializing.') + start_time = datetime.now() + + self._model_path = model_path + self._model = onnx.load_model(model_path) + self._graph = self._model.graph + self._initializer = self._graph.initializer + self._node = self._graph.node + self._input_tensor_2_oxnode_dict = {} + self._output_tensor_2_oxnode_dict = {} + + # initializer + for initializer in self._initializer: + GV.OXINITIALIZER_DICT[initializer.name] = OXInitializer(initializer) + + # node + for idx, node in enumerate(self._node): + oxnode = OXNode(node) + GV.OXNODE_DICT[node.name] = oxnode + + # 创建tensor_2_oxnode字典 + self._update_tensor_2_oxnode_dict( + self._input_tensor_2_oxnode_dict, + self._output_tensor_2_oxnode_dict, + ) + + # 获取所有tensor信息 + try: + self._all_tensor_info = self.get_all_tensor_info() + except: + os.remove(os.path.join(os.path.dirname(self._model_path), 'temp.onnx')) + print('[WARNING] There are custom operators in the model, ' + 'and these functions are not available: get_input_tensor_info()、get_output_tensor_info()、' + 'get_all_tensor_info()、infer_shape()、dump_all_node_data()、trunc_model().') + + # 屏蔽check_model + def check_model(model): + pass + + onnx.checker.check_model = check_model + + end_time = datetime.now() + cost_time = (end_time - start_time).seconds + print('[INFO] Initialization completed! Cost {} seconds.'.format(cost_time)) + + def __str__(self): + return str(self._model) + + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Initializer相关函数 + def get_oxinitializer_by_name(self, oxinitializer_name: str, can_return_none: bool = False) -> OXInitializer: + ''' + 根据initializer的名字获取OXInitializer + ''' + + if oxinitializer_name not in GV.OXINITIALIZER_DICT: + if can_return_none is True: + return None + else: + raise RuntimeError('[ERROR] {} not found.'.format(oxinitializer_name)) + return GV.OXINITIALIZER_DICT[oxinitializer_name] + + def add_initializer(self, initializer_name: str, ndarray: np.ndarray) -> OXInitializer: + ''' + 向模型中新增一个initializer + + Args: + initializer_name: initializer的名字 + ndarray: initializer的数据 + + Returns: 新增的OXInitializer + ''' + + if initializer_name in GV.OXINITIALIZER_DICT: + raise RuntimeError( + '[ERROR] {} has already exists in the model, please use a different name!'.format(initializer_name)) + + initializer = from_array(ndarray, initializer_name) + self._initializer.append(initializer) # 这里是复制,而不是引用,id已经变了 + initializer = self._initializer[-1] + oxinitializer = OXInitializer(initializer) + GV.OXINITIALIZER_DICT[initializer_name] = oxinitializer + + return oxinitializer + + def remove_initializer(self, initializer_name: str) -> OXInitializer: + ''' + 从模型中删除指定的initializer + + Args: + initializer_name: initializer的名字 + + Returns: 删除的OXInitializer + ''' + + oxinitializer = self.get_oxinitializer_by_name(initializer_name) + GV.OXINITIALIZER_DICT.pop(initializer_name) + self._initializer.remove(oxinitializer.get_initializer()) + + return oxinitializer + + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Node相关函数 + def get_oxnode_by_name(self, oxnode_name: str, can_return_none: bool = False) -> OXNode: + ''' + 根据节点名字获取OXNode + ''' + + if oxnode_name not in GV.OXNODE_DICT: + if can_return_none is True: + return None + else: + raise RuntimeError('[ERROR] {} not found.'.format(oxnode_name)) + return GV.OXNODE_DICT[oxnode_name] + + def get_oxnode_by_op_type(self, op_type: str) -> List[OXNode]: + ''' + 根据节点类型获取OXNode + ''' + + res = set() + for oxnode in GV.OXNODE_DICT.values(): + if oxnode.get_op_type() == op_type: + res.add(oxnode) + return list(res) + + def get_oxnode_whose_input_contain_this(self, input_name: str) -> List[OXNode]: + ''' + 遍历所有OXNode,获取输入包含`input_name`的那些OXNode + ''' + + res = set() + for oxnode in GV.OXNODE_DICT.values(): + for oxinput_name in oxnode.input: + if oxinput_name == input_name: + res.add(oxnode) + break + return list(res) + + def get_oxnode_whose_output_contain_this(self, output_name: str) -> List[OXNode]: + ''' + 遍历所有OXNode,获取输出包含`output_name`的那些OXNode + ''' + + res = set() + for oxnode in GV.OXNODE_DICT.values(): + for oxoutput_name in oxnode.output: + if oxoutput_name == output_name: + res.add(oxnode) + break + return list(res) + + def get_previous_oxnode(self, oxnode_name: str) -> List[OXNode]: + ''' + 获取一个节点的前驱节点 + ''' + + res = set() + inputs = self.get_oxnode_by_name(oxnode_name).input + for input in inputs: + oxnode_set = self._output_tensor_2_oxnode_dict.get(input) + if oxnode_set is not None: + res.update(oxnode_set) + return list(res) + + def get_next_oxnode(self, oxnode_name: str) -> List[OXNode]: + ''' + 获取一个节点的后继节点 + ''' + + res = set() + outputs = self.get_oxnode_by_name(oxnode_name).output + for output in outputs: + oxnode_set = self._input_tensor_2_oxnode_dict.get(output) + if oxnode_set is not None: + res.update(oxnode_set) + return list(res) + + def insert_node(self, bef_node_info_list: List[Dict], aft_node_info_list: List[Dict], op_type: str, op_name: str, + **attributes: Dict) -> OXNode: + ''' + 向模型中插入新节点,并自动连边,注意和`add_node`的区别 + + 限制:无法涵盖所有场景,若结果不符合预期,请用`add_node`函数,并手动指定连边关系。 + + Args: + bef_node_info_list:参见README.md用例 + aft_node_info_list:参见README.md用例 + op_type:节点的类型 + op_name:节点的名字 + attributes:节点的属性 + + Returns: 插入的OXNode + ''' + + # 校验插入的节点是否已经存在 + if op_name in GV.OXNODE_DICT: + raise RuntimeError( + '[ERROR] {} has already exists in the model, please use a different name!'.format(op_name)) + + # 解析信息 + bef_node_info_list, aft_node_info_list = self._parse_insert_node_info(bef_node_info_list, aft_node_info_list) + + # 插入节点 + # + 构造新节点的输入 + new_node_input = [] + for bef_node_info in bef_node_info_list: + oxnode = self.get_oxnode_by_name(bef_node_info['bef_node_name'], True) + if oxnode is None: # 说明此节点是模型的输入节点 + new_node_input.append(bef_node_info['bef_node_name']) + else: + for idx in bef_node_info['link_output_idx']: + if oxnode.output[idx] in self.get_output_tensor_info().keys(): # 说明此节点紧接模型的输出节点 + oxnode.output[idx] = oxnode.get_name() + '_m_' + str(idx) + new_node_input.append(oxnode.output[idx]) + + # + 构造新节点的输出 + new_node_output = [op_name + '_0'] + + # + 构造新节点 + insert_oxnode = self.add_node(op_type=op_type, + op_name=op_name, + inputs=new_node_input, + outputs=new_node_output, + **attributes) + + # 和后继节点连边 + for aft_node_info in aft_node_info_list: + oxnode = self.get_oxnode_by_name(aft_node_info['aft_node_name'], True) + if oxnode is None: # 说明此节点是模型的输出节点 + if len(aft_node_info_list) != 1: + raise RuntimeError('[ERROR] Please check aft_node_info_list!') + + # 修改insert_oxnode的输出为模型的输出节点 + insert_oxnode.output[0] = aft_node_info['aft_node_name'] + else: + for idx in aft_node_info['link_input_idx']: + oxnode.input[idx] = new_node_output[0] + + # 更新tensor_2_oxnode字典 + self._update_tensor_2_oxnode_dict( + self._input_tensor_2_oxnode_dict, + self._output_tensor_2_oxnode_dict, + ) + + return insert_oxnode + + def add_node(self, op_type: str, op_name: str, inputs: List[str], outputs: List[str], **attributes: Dict) -> OXNode: + ''' + 向模型中增加新节点,不会自动连边,注意和`insert_node`的区别 + + Args: + op_type:节点的类型 + op_name:节点的名字 + inputs:节点的输入 + outputs:节点的输出 + attributes:节点的属性 + + Returns: 新增的OXNode + ''' + + if op_name in GV.OXNODE_DICT: + raise RuntimeError( + '[ERROR] {} has already exists in the model, please use a different name!'.format(op_name)) + + new_node = make_node(op_type=op_type, name=op_name, inputs=inputs, outputs=outputs, **attributes) + self._node.append(new_node) # 这里复制,而不是用引用,id已经变了 + new_node = self._node[-1] + new_oxnode = OXNode(new_node) + GV.OXNODE_DICT[new_oxnode.get_name()] = new_oxnode + + # 更新tensor_2_oxnode字典 + self._update_tensor_2_oxnode_dict( + self._input_tensor_2_oxnode_dict, + self._output_tensor_2_oxnode_dict, + ) + + return new_oxnode + + def remove_node(self, node_name: str, auto_link: bool = True) -> OXNode: + ''' + 从模型中删除节点 + + 限制:若开启自动连边,则删除的节点必须只有一个前驱节点,否则需要手动连边。若结果不符合预期,也需要自己手动连边。 + + Args: + node_name:要删除的节点名字 + auto_link:是否自动连边 + + Returns: 删除的OXNode + ''' + + if node_name not in GV.OXNODE_DICT: + raise RuntimeError('[ERROR] {} not found.'.format(node_name)) + + if auto_link is False: + oxnode = self.get_oxnode_by_name(node_name) + else: + oxnode = self.get_oxnode_by_name(node_name) + previous_node = self.get_previous_oxnode(node_name) + next_node = self.get_next_oxnode(node_name) + + if len(previous_node) > 1: + raise RuntimeError('[ERROR] Remove node can only have one previous node.') + + _clear_list(previous_node[0].output) + _extend_list(previous_node[0].output, oxnode.output) + + # 删除节点 + GV.OXNODE_DICT.pop(node_name) + self._node.remove(oxnode.get_node()) + + # 更新tensor_2_oxnode字典 + self._update_tensor_2_oxnode_dict( + self._input_tensor_2_oxnode_dict, + self._output_tensor_2_oxnode_dict, + ) + + return oxnode + + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 输入输出相关函数 + def get_input_tensor_info(self) -> Dict: + ''' + 获取模型输入tensor的信息 + 信息包括:tensor名字、shape、类型 + + Returns: {'tensor_name': {'shape': np.shape, 'dtype': np.dtype}, ...} + ''' + + session = onnxruntime.InferenceSession(self._model_path) + + input_tensor_info = {} + for input_item in session.get_inputs(): + input_tensor_info[input_item.name] = { + 'shape': tuple(input_item.shape), + 'dtype': GV.ONNX_2_NUMPY_DATATYPE_DICT[input_item.type] + } + + return input_tensor_info + + def get_output_tensor_info(self) -> Dict: + ''' + 获取模型输出tensor的信息 + 信息包括:tensor名字、shape、类型 + + Returns: {'tensor_name': {'shape': np.shape, 'dtype': np.dtype}, ...} + ''' + + session = onnxruntime.InferenceSession(self._model_path) + + output_tensor_info = {} + for output_item in session.get_outputs(): + output_tensor_info[output_item.name] = { + 'shape': tuple(output_item.shape), + 'dtype': GV.ONNX_2_NUMPY_DATATYPE_DICT[output_item.type] + } + + return output_tensor_info + + def get_all_tensor_info(self) -> Dict: + ''' + 获取模型中所有tensor的信息 + 所有tensor包括:模型输入tensor、模型输出tensor、模型中间tensor + 信息包括:tensor名字、shape、类型 + + Returns: {'tensor_name': {'shape': np.shape, 'dtype': np.dtype}, ...} + ''' + + old_onnx_model = onnx.load(self._model_path) + + output_name = [] + for name in enumerate_model_node_outputs(old_onnx_model): + output_name.append(name) + + new_onnx_model = select_model_inputs_outputs(old_onnx_model, output_name) + new_model_path = os.path.join(os.path.dirname(self._model_path), 'temp.onnx') + save_onnx_model(new_onnx_model, new_model_path) + + session = onnxruntime.InferenceSession(new_model_path) + os.remove(new_model_path) + + all_tensor_info = {} + + for input_item in session.get_inputs(): + all_tensor_info[input_item.name] = { + 'shape': tuple(input_item.shape), + 'dtype': GV.ONNX_2_NUMPY_DATATYPE_DICT[input_item.type] + } + + for output_item in session.get_outputs(): + all_tensor_info[output_item.name] = { + 'shape': tuple(output_item.shape), + 'dtype': GV.ONNX_2_NUMPY_DATATYPE_DICT[output_item.type] + } + + for oxinitializer in GV.OXINITIALIZER_DICT.values(): + all_tensor_info[oxinitializer.get_name()] = { + 'shape': oxinitializer.get_data().shape, + 'dtype': eval('np.' + str(oxinitializer.get_data().dtype)) + } + + return all_tensor_info + + def infer_shape(self, input_data_info_list: List[Dict]) -> Dict: + ''' + 推导模型各个算子的输出shape信息。 + + 用途:有些模型从onnx图中无法看出算子输出shape信息,也无法获取shape信息,通过此函数可以推导出shape信息。 + + 原理:用真实数据运行一遍模型,记录各个算子的输出shape信息。 + + Args: + input_data_info_list: + [ + { + 'model_input_name': 'input1_name', + 'shape': '(1, 3, 224, 224)', + 'dtype': 'np.float32' + }, + { + 'model_input_name': 'input2_name', + 'shape': '(1, 3, 224, 224)', + 'dtype': 'np.float32' + } + ] + + Returns: {'op_name': {'shape': np.shape, 'dtype': np.dtype}, ...} + ''' + + # 构造输入数据 + input_data_dict = {} + for input_data_info in input_data_info_list: + input_data_dict[input_data_info['model_input_name']] = np.full(eval(input_data_info['shape']), + 1, + dtype=eval(input_data_info['dtype'])) + + # 修改模型,增加输出节点 + old_onnx_model = onnx.load(self._model_path) + output = [] + for out in enumerate_model_node_outputs(old_onnx_model): + output.append(out) + new_onnx_model = select_model_inputs_outputs(old_onnx_model, outputs=output) + onnx_save_path = './temp.onnx' + save_onnx_model(new_onnx_model, onnx_save_path) + + # 推理得到输出 + sess = onnxruntime.InferenceSession(onnx_save_path) + os.remove(onnx_save_path) + output_name = [node.name for node in sess.get_outputs()] + res = sess.run(output_name, input_data_dict) + + # 保存数据 + infer_tensor_info = {} + idx = 0 + for node in old_onnx_model.graph.node: + for i in range(len(node.output)): + infer_tensor_info[node.name] = {'output_idx': i, 'shape': res[idx].shape, 'dtype': res[idx].dtype} + idx += 1 + + return infer_tensor_info + + def dump_all_node_data(self, input_data_info_list: List[Dict], dump_data_save_path: str) -> NoReturn: + ''' + dump模型所有节点的数据 + + Args: + input_data_info_list: + [ + { + 'model_input_name': 'input1_name', + 'npy_file_path': './0.npy', + }, + { + 'model_input_name': 'input2_name', + 'npy_file_path': './1.npy', + }, + ] + dump_data_save_path: e.g. './dump_data' + + Returns: NoReturn + ''' + + # 创建目录 + if os.path.exists(dump_data_save_path): + shutil.rmtree(dump_data_save_path) + os.makedirs(dump_data_save_path) + + # 修改模型,增加输出节点 + old_onnx_model = onnx.load(self._model_path) + output = [] + for out in enumerate_model_node_outputs(old_onnx_model): + output.append(out) + new_onnx_model = select_model_inputs_outputs(old_onnx_model, outputs=output) + onnx_save_path = os.path.join(dump_data_save_path, "./temp.onnx") + save_onnx_model(new_onnx_model, onnx_save_path) + + # 获取输入数据 + input_data_dict = {} + for input_data_info in input_data_info_list: + input_data_dict[input_data_info['model_input_name']] = np.load(input_data_info['npy_file_path']) + + # 推理得到输出 + sess = onnxruntime.InferenceSession(onnx_save_path) + os.remove(onnx_save_path) + output_name = [node.name for node in sess.get_outputs()] + res = sess.run(output_name, input_data_dict) + + # 保存数据 + idx = 0 + for node in old_onnx_model.graph.node: + for i in range(len(node.output)): + file_name = node.name + "." + str(i) + "." + str(round(time.time() * 1000000)) + ".npy" + data_save_path = os.path.join(dump_data_save_path, file_name) + np.save(data_save_path, res[idx]) + idx += 1 + + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 截图函数 + def extract_model(self, input_tensor_name_list: List[str], output_tensor_name_list: List[str], + new_model_save_path: str) -> NoReturn: + ''' + 从onnx 1.8.1开始,onnx官方提供了截图函数,此函数是对官方`onnx.utils.extract_model`函数的封装, + 以使其集成到`OXGraph`类中。另外,此函数屏蔽了`check_model`操作,使包含自定义算子的onnx提取子图后 + 在保存模型时跳过检查操作,使之可以顺利保存。以下是官方`onnx.utils.extract_model`函数的说明: + + Extracts sub-model from an ONNX model. + + The sub-model is defined by the names of the input and output tensors *exactly*. + + Note: For control-flow operators, e.g. If and Loop, the _boundary of sub-model_, + which is defined by the input and output tensors, should not _cut through_ the + subgraph that is connected to the _main graph_ as attributes of these operators. + + Arguments: + input_path (string): The path to original ONNX model. + output_path (string): The path to save the extracted ONNX model. + input_names (list of string): The names of the input tensors that to be extracted. + output_names (list of string): The names of the output tensors that to be extracted. + ''' + + print('[INFO] Begin to extract the model.') + start_time = datetime.now() + onnx.utils.extract_model(self._model_path, new_model_save_path, input_tensor_name_list, output_tensor_name_list) + end_time = datetime.now() + cost_time = (end_time - start_time).seconds + print('[INFO] Extract model completed! Cost {} seconds.'.format(cost_time)) + + def trunc_model(self, + trunc_beg_node_name_list: List[str], + trunc_end_node_name_list: List[str], + new_model_save_path: str, + keep_input_initializer: bool = False, + userdef_trunc_beg_node_info_list: List[Dict] = None) -> NoReturn: + ''' + 截取一段模型 + + 用途:可以用来单独验证某段网络的精度 + + 注意: + 从onnx 1.8.1开始,onnx官方提供了截图函数,若onnx版本>=1.8.1,请使用`extract_model`函数。 + `extract_model`函数是对官方`onnx.utils.extract_model`函数的封装,以使其集成到`OXGraph`类中。 + 此`trunc_model`函数是自己写的,功能可能有缺陷,但截图速度一般来说更快,模型较大时可以对比尝试。 + ''' + + print('[WARNING] 从onnx 1.8.1开始,onnx官方提供了截图函数,若onnx版本>=1.8.1,请使用`extract_model`函数。' + '`extract_model`函数是对官方`onnx.utils.extract_model`函数的封装,以使其集成到`OXGraph`类中。' + '此`trunc_model`函数是自己写的,功能可能有缺陷,但截图速度一般来说更快,模型较大时可以对比尝试。') + + print('[INFO] Begin to truncate the model.') + start_time = datetime.now() + + # 修改输出节点 + new_output = [] + for elem in trunc_end_node_name_list: + output = self.get_oxnode_by_name(elem).output + new_output.extend(x for x in output) + new_onnx = select_model_inputs_outputs(self._model, outputs=new_output) + save_onnx_model(new_onnx, new_model_save_path) + + # 加载模型 + model = onnx.load_model(new_model_save_path) + graph = model.graph + nodes = graph.node + initializers = graph.initializer + + # 搜索节点 + def find_trunc_beg_node(node_name): + is_find = False + for node in nodes: + if node.name == node_name: + trunc_beg_node = node + is_find = True + break + if is_find is True: + return trunc_beg_node + else: + raise RuntimeError('[ERROR] {} not found.'.format(node_name)) + + # 获取trunc_beg_node详细信息,构造一个这样的list: + ''' + [ + { + 'trunc_beg_node': node, + 'new_input_info_list': [ + { + 'input_name': 'input_A', + 'dtype': OXDataType.float32, + 'shape': (1, 256, 56, 56), + 'input_idx': 0 + }, + { + 'input_name': 'input_B', + 'dtype': OXDataType.float32, + 'shape': (1, 256, 56, 56), + 'input_idx': 1 + } + ] + } + ] + ''' + if userdef_trunc_beg_node_info_list is None: + trunc_beg_node_info_list = [] + initializer_name_set = set() + initializer_name_set.update([oxinitializer.get_name() for oxinitializer in GV.OXINITIALIZER_DICT.values()]) + count = 0 + for trunc_beg_node_name in trunc_beg_node_name_list: + trunc_beg_node = find_trunc_beg_node(trunc_beg_node_name) + new_input_info_list = [] + for idx, input in enumerate(trunc_beg_node.input): + if (keep_input_initializer is True) and (input in initializer_name_set): + continue + else: + new_input_info = {} + new_input_info['input_name'] = 'new_input_' + str(count) + count += 1 + new_input_info['dtype'] = GV.ONNX_2_NUMPY_DATATYPE_DICT[self._all_tensor_info[input]['dtype']] + new_input_info['shape'] = self._all_tensor_info[input]['shape'] + new_input_info['input_idx'] = idx + new_input_info_list.append(new_input_info) + trunc_beg_node_info = {} + trunc_beg_node_info['trunc_beg_node'] = trunc_beg_node + trunc_beg_node_info['new_input_info_list'] = new_input_info_list + trunc_beg_node_info_list.append(trunc_beg_node_info) + else: + trunc_beg_node_info_list = userdef_trunc_beg_node_info_list + + # 构造新输入 + new_inputs = [] + for trunc_beg_node_info in trunc_beg_node_info_list: + if userdef_trunc_beg_node_info_list is None: + trunc_begin_node = trunc_beg_node_info['trunc_beg_node'] + else: + trunc_begin_node = find_trunc_beg_node(trunc_beg_node_info['trunc_beg_node_name']) + for new_input_info in trunc_beg_node_info['new_input_info_list']: + new_input = self._make_new_input(new_input_info['input_name'], new_input_info['dtype'], + new_input_info['shape']) + new_inputs.append(new_input) + trunc_begin_node.input[new_input_info['input_idx']] = new_input_info['input_name'] + + # 查找有用节点 + useful_node_name_set = set() + useful_node_name_set.update(trunc_beg_node_name_list) + useful_node_name_set.update(trunc_end_node_name_list) + + # + 正向查找 + @lru_cache() + def find_useful_node(next_node_name_tuple): + for next_node_name in next_node_name_tuple: + if next_node_name not in trunc_end_node_name_list: + output_oxnode_list = self.get_next_oxnode(next_node_name) + output_oxnode_name_tuple = tuple([oxnode.get_name() for oxnode in output_oxnode_list]) + useful_node_name_set.update(output_oxnode_name_tuple) + find_useful_node(output_oxnode_name_tuple) + + # + 反向查找 + @lru_cache() + def find_useful_node_reverse(next_node_name_tuple): + for next_node_name in next_node_name_tuple: + if next_node_name not in trunc_beg_node_name_list: + input_oxnode_list = self.get_previous_oxnode(next_node_name) + input_oxnode_name_tuple = tuple([oxnode.get_name() for oxnode in input_oxnode_list]) + useful_node_name_set.update(input_oxnode_name_tuple) + find_useful_node_reverse(input_oxnode_name_tuple) + + # + 正向和反向都查找一遍,防止漏查 + find_useful_node(tuple(trunc_beg_node_name_list)) + find_useful_node_reverse(tuple(trunc_end_node_name_list)) + + # 删除多余节点 + for node in copy.deepcopy(nodes): + if node.name not in useful_node_name_set: + nodes.remove(node) + + # 删除多余输入 + _clear_list(graph.input) + _extend_list(graph.input, new_inputs) + + # 删除多余Initializer + all_input = set() + for node in nodes: + all_input.update(node.input) + for initializer in copy.deepcopy(initializers): + if initializer.name not in all_input: + initializers.remove(initializer) + + # 保存模型 + name = 'Extracted from {' + self._graph.name + '}' + graph = make_graph(nodes, + name, + graph.input, + graph.output, + initializer=initializers, + value_info=graph.value_info) + meta = { + 'ir_version': self._model.ir_version, + 'opset_imports': self._model.opset_import, + 'producer_name': 'OXGraph.trunc_model()', + } + new_mode = make_model(graph, **meta) + onnx.save(new_mode, new_model_save_path) + end_time = datetime.now() + cost_time = (end_time - start_time).seconds + print('[INFO] Truncate model completed! Cost {} seconds.'.format(cost_time)) + + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 保存模型 + def save_new_model(self, new_model_path) -> NoReturn: + ''' + 保存修改后的模型 + ''' + + onnx.save_model(self._model, new_model_path) + + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 私有函数 + def _update_tensor_2_oxnode_dict(self, input_tensor_2_oxnode_dict, output_tensor_2_oxnode_dict) -> NoReturn: + # 清空字典 + input_tensor_2_oxnode_dict.clear() + output_tensor_2_oxnode_dict.clear() + + # 创建字典 + for oxnode in GV.OXNODE_DICT.values(): + inputs = oxnode.input + outputs = oxnode.output + for input in inputs: + input_tensor_2_oxnode_dict.setdefault(input, set()).add(oxnode) + for output in outputs: + output_tensor_2_oxnode_dict.setdefault(output, set()).add(oxnode) + + def _make_new_input(self, new_input_name: str, ox_data_type: OXDataType, shape: tuple) -> ValueInfoProto: + ''' + If you print the model input in the terminal, you will get something like this, and you can modify it directly. + `dim_param` means dynamic shape. + [name: 'image' + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_param: '-1' + } + dim { + dim_value: 3 + } + dim { + dim_value: 224 + } + dim { + dim_value: 224 + } + } + } + } + ] + ''' + + new_input = copy.deepcopy(self._graph.input[0]) + new_input.name = new_input_name + new_input.type.tensor_type.elem_type = int(ox_data_type) + + dim_diff = len(shape) - len(new_input.type.tensor_type.shape.dim) + if dim_diff > 0: + for i in range(dim_diff): + new_input.type.tensor_type.shape.dim.append(copy.deepcopy(new_input.type.tensor_type.shape.dim[0])) + elif dim_diff < 0: + for i in range(abs(dim_diff)): + new_input.type.tensor_type.shape.dim.pop() + + for index in range(len(shape)): + if isinstance(shape[index], str): + new_input.type.tensor_type.shape.dim[index].dim_param = shape[index] + elif shape[index] is None: + new_input.type.tensor_type.shape.dim[index].dim_param = '-1' + print('[WARNING] Can not infer tensor shape, set it to "-1" here, which may cause an error! ' + 'Please specify `userdef_trunc_beg_node_info_list` parameters and retry.') + else: + new_input.type.tensor_type.shape.dim[index].dim_value = shape[index] + + return new_input + + def _parse_insert_node_info(self, bef_node_info_list, aft_node_info_list): + ''' + parse bef_node_info_list = ['Relu_1:0'] and aft_node_info_list = ['MaxPool_2:0'] into: + + bef_node_info_list=[{ + 'bef_node_name': 'Relu_1', + 'link_output_idx': [0] + }] + + aft_node_info_list=[{ + 'aft_node_name': 'MaxPool_2', + 'link_input_idx': [0] + }] + + 默认的`:0`可以省略 + ''' + + # 变量定义 + new_bef_node_info_list = [] + new_aft_node_info_list = [] + + # 解析bef_node_info_list + for bef_node_info in bef_node_info_list: + bef_node_info_dict = {} + info_list = bef_node_info.split(':') + bef_node_info_dict['bef_node_name'] = info_list[0] + if len(info_list) == 1: + bef_node_info_dict['link_output_idx'] = [0] + else: + bef_node_info_dict['link_output_idx'] = [int(elem) for idx, elem in enumerate(info_list) if idx > 0] + new_bef_node_info_list.append(bef_node_info_dict) + + # 解析aft_node_info_list + for aft_node_info in aft_node_info_list: + aft_node_info_dict = {} + info_list = aft_node_info.split(':') + aft_node_info_dict['aft_node_name'] = info_list[0] + if len(info_list) == 1: + aft_node_info_dict['link_input_idx'] = [0] + else: + aft_node_info_dict['link_input_idx'] = [int(elem) for idx, elem in enumerate(info_list) if idx > 0] + new_aft_node_info_list.append(aft_node_info_dict) + + return new_bef_node_info_list, new_aft_node_info_list + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 公共函数 +def _clear_list(list) -> NoReturn: + ''' + 清空RepeatedScalarContainer或RepeatedCompositeContainer列表 + ''' + + list_len = len(list) + for _ in range(list_len): + list.pop() + + +def _extend_list(list, what_to_add) -> NoReturn: + ''' + 扩展RepeatedScalarContainer或RepeatedCompositeContainer列表 + ''' + + for elem in what_to_add: + list.append(elem) diff --git a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/ReadMe.md b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/ReadMe.md index 111fc7bc1079b93c0ce0a5c11d80845fa09609a9..1d37439f430d4f43f43bc6a2a2c04035e7c11935 100644 --- a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/ReadMe.md +++ b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/ReadMe.md @@ -1,310 +1,310 @@ -# 参考论文 - -- [Objects as Points](https://arxiv.org/abs/1904.07850) - -# 参考实现 - -- [xingyizhou/CenterNet](https://github.com/xingyizhou/CenterNet) -- [CaoWGG/TensorRT-CenterNet](https://github.com/CaoWGG/TensorRT-CenterNet) - -# 环境搭建 - -1、创建一个conda环境 - -```shell -conda create --nameCenterNet python=3.6 -``` - -激活环境 - -``` -conda activate CenterNet -``` - -2、clone仓库 - -``` -git clone https://github.com/xingyizhou/CenterNet -``` - -3、安装依赖 - -``` -cd CenterNet -pip install -r requirements.txt -``` - -4、安装pytorch v1.0.0 - -1) 下载[torch-1.0.0-cp36-cp36m-linux_x86_64.whl](https://download.pytorch.org/whl/cu100/torch-1.0.0-cp36-cp36m-linux_x86_64.whl) - -2) 安装 - -``` -pip install torch-1.0.0-cp36-cp36m-linux_x86_64.whl -``` - -**注意:**1) pytorch版本必须是1.0.0;2) 需确保是GPU环境;3) CUDA版本为10.2 - -5、安装其它依赖 - -``` -pip install tqdm==4.19.9 torchvision==0.2.2 onnx==1.8.1 onnxruntime==1.7.0 skl2onnx==1.8.0 -``` - -6、确保gcc和g++版本>=7.2.0 - -7、安装COCO API - -``` -cd CenterNet -git clone https://github.com/cocodataset/cocoapi.git -cd cocoapi/PythonAPI -make -python setup.py install --user -``` - -8、把THC-based DCNv2替换为ATen-based DCNv2 - -``` -cd CenterNet -git clone https://github.com/CaoWGG/TensorRT-CenterNet.gitcp -r TensorRT-CenterNet/readme/dcn src/lib/models/networks -``` - -**说明:**主要用的是TensorRT-CenterNet仓下的dcn目录,也可以仅下载dcn目录,然后放入到`CenterNet/src/lib/models/networks`目录下。 - -9、编译Deform Conv - -``` -cd src/lib/models/networks/dcn -python setup.py build_ext --inplace -``` - -**注意:**gcc和g++版本必须>=7.2.0,否则可能导致出错。 - -10、Change import - -把`CenterNet/src/lib/models/networks/pose_dla_dcn.py`和`CenterNet/src/lib/models/networks/resnet_dcn.py`中的`from .DCNv2.dcn_v2 import DCN`改为`from .dcn.modules.deform_conv import ModulatedDeformConvPack as DCN` - -11、打开`/root/anaconda3/envs/CenterNet/lib/python3.6/site-packages/torch/autograd/function.py`,定位到273行,把`_iter_filter(...)`函数改为如下: - -``` -def _iter_filter(condition, allow_unknown=False, condition_msg=None, - conversion=None): - def _iter(obj): - if conversion is not None: - obj = conversion(obj) - if condition(obj): - yield obj - #M<<<<<< - elif isinstance(obj,int): ## int to tensor - yield torch.tensor(obj) - #>>>>>> - elif obj is None: - return - elif isinstance(obj, (list, tuple)): - for o in obj: - for var in _iter(o): - yield var - elif allow_unknown: - yield obj - else: - raise ValueError("Auto nesting doesn't know how to process " - "an input object of type " + torch.typename(obj) + - (". Accepted types: " + condition_msg + - ", or lists/tuples of them" - if condition_msg else "")) - - - return _iter -``` - -12、下载[ctdet_coco_dla_2x.pth](https://drive.google.com/open?id=1pl_-ael8wERdUREEnaIfqOV_VF2bEVRT)模型,放入`CenterNet/models`目录下 -13、把`CenterNet/src/lib/opts.py中的add_argument('task', default='ctdet'....)`改为`add_argument('--task', default='ctdet'....)` -14、把提供的代码和脚本放入`CenterNet/src`目录下。 - -# 准备数据集 - -根据CenterNet官方数据集安装指导准备数据集:[DATA.md](https://github.com/xingyizhou/CenterNet/blob/master/readme/DATA.md),本示例以 **COCO 2017 Val** 数据集为例。 - -# PyTorch在线推理 - -由于后续导出onnx时需要修改CenterNet源码,修改后的代码无法进行PyTorch在线推理。因此这里先进行PyTorch在线推理验证。 - -运行pth_eval.py进行推理,推理完毕之后会输入精度和推理时间信息。 - -``` -python pth_eval.py --res_data_save_path=./pth_result -``` - -参数说明: - - --res_data_save_path:推理结果保存路径 - -# om模型推理模型转换 - -1. 模型转换。 - - 使用PyTorch将模型权重文件pth转换为onnx文件,再使用atc工具将onnx文件转为离线推理模型om文件。 - - - 导出onnx文件。 - - - 打开`CenterNet/src/lib/models/networks/dcn/functions/deform_conv.py`文件 - - - 修改`ModulatedDeformConvFunction`的`symbolic(...)`函数,把原函数改为如下: - - ``` - @staticmethod - def symbolic(g, input, weight, offset, bias, stride, padding, dilation, groups, deformable_groups): - return g.op("DeformableConv2D", - input, - weight, - offset, - bias, - deformable_groups_i=deformable_groups, - dilations_i=dilation, - groups_i=groups, - pads_i=padding, strides_i=stride) - ``` - - - 修改`ModulatedDeformConvFunction`的`forward(...)`函数,把原函数改为如下: - - ``` - @staticmethod - def forward(ctx, input, weight, offset, bias=None, stride=1, padding=0, dilation=1, groups=1, deformable_groups=1): - ctx.stride = stride - ctx.padding = padding - ctx.dilation = dilation - ctx.groups = groups - ctx.deformable_groups = deformable_groups - ctx.with_bias = bias is not None - if not ctx.with_bias: - bias = input.new_empty(1) # fake tensor - output = input.new_empty(ModulatedDeformConvFunction._infer_shape(ctx, input, weight)) - return output - ``` - - 打开`CenterNet/src/lib/models/networks/dcn/modules/deform_conv.py`文件,修改`ModulatedDeformConvPack`的`forward(...)`函数,把原函数改为如下: - - ``` - def forward(self, x): - out = self.conv_offset_mask(x) - o1, o2, mask = torch.chunk(out, 3, dim=1) - offset = torch.cat((o1, o2), dim=1) - mask = torch.sigmoid(mask) - - offset_y = offset.reshape(1, -1, 2, offset.shape[2], - offset.shape[3])[:, :, 0, ...].reshape(1, offset.shape[1] // 2, offset.shape[2], - offset.shape[3]) - offset_x = offset.reshape(1, -1, 2, offset.shape[2], - offset.shape[3])[:, :, 1, ...].reshape(1, offset.shape[1] // 2, offset.shape[2], - offset.shape[3]) - offset = torch.cat((offset_x, offset_y, mask), 1) - - return modulated_deform_conv(x, self.weight, offset, self.bias, self.stride, self.padding, self.dilation, self.groups, self.deformable_groups) - ``` - - 打开`/root/anaconda3/envs/CenterNet/lib/python3.6/site-packages/torch/onnx/symbolic.py`,在`reciprocal(...)`函数后边增加两个函数: - - ``` - def reshape(g, self, shape): - return view(g, self, shape) - - - def reshape_as(g, self, other): - shape = g.op('Shape', other) - return reshape(g, self, shape) - ``` - - - 运行`export_onnx.py`文件,导出onnx模型 - - ``` - python export_onnx.py - ``` - - 运行完之后,会在`CenterNet/models`目录下生成`ctdet_coco_dla_2x.onnx`模型文件。 - - - 运行`modify_onnx.py`文件,修改onnx模型文件 - - ``` - python modify_onnx.py - ``` - - 运行完之后,会在`CenterNet/models`目录下生成`ctdet_coco_dla_2x_modify.onnx`模型文件。 - - 2. 使用atc工具将onnx模型转om模型。 - - - 根据实际情况,修改`onnx2om.sh`脚本中的环境变量,具体的脚本示例如下: - - ``` - #!/bin/bash - clear - - export install_path=/usr/local/Ascend/ascend-toolkit/latest - export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH - export PYTHONPATH=${install_path}/atc/python/site-packages:${install_path}/pyACL/python/site-packages/acl:$PYTHONPATH - export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH - export ASCEND_OPP_PATH=${install_path}/opp - - atc --framework=5 --model=../models/ctdet_coco_dla_2x_modify.onnx --output=../models/ctdet_coco_dla_2x \ - --input_format=NCHW --input_shape=image:1,3,512,512 --log=error --soc_version=Ascend310 - rm -rf fusion_result.json kernel_meta - ``` - - 参数说明: - - - --model:为onnx模型文件 - - --framework:5代表onnx模型 - - - --output:输出的om模型 - - --input_format:输入数据的格式 - - --input_shape:输入数据的shape - - --log:日志等级 - - --soc_version:部署芯片类型 - - - 执行onnx2om.sh脚本,将onnx文件转为离线推理模型文件om文件。 - - ``` - bash onnx2om.sh - ``` - - 运行完之后,会在`CenterNet/models`目录下生成`ctdet_coco_dla_2x.om`模型文件。 - -2. 开始推理验证。 - - - 根据实际情况,修改`benchmark_infer.sh`脚本中的代码,具体的脚本示例如下: - - ``` - #!/bin/bash - clear - - export install_path=/usr/local/Ascend/ascend-toolkit/latest - export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH - export PYTHONPATH=${install_path}/atc/python/site-packages:${install_path}/pyACL/python/site-packages/acl:$PYTHONPATH - export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH - export ASCEND_OPP_PATH=${install_path}/opp - - ./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=../models/ctdet_coco_dla_2x.om \-input_text_path=./pre_bin/bin_file.info -input_width=512 -input_height=512 -output_binary=true -useDvpp=false - ``` - - - 执行`benchmark_infer.sh`脚本,进行benchmark推理。 - - ``` - sh benchmark_infer.sh - ``` - - - 推理数据后处理与精度统计。 - 运行postprocess.py脚本,进行后处理和精度统计。 - - ``` - python postprocess.py \ - --infer_res_save_path=./result/dumpOutput_device0 \ - --pre_data_save_path=./pre_bin \ - --res_data_save_path=./om_result - ``` - - 参数说明: - - - --infer_res_save_path:benchmark推理结果保存路径 - - --pre_data_save_path:预处理数据保存路径 - - --res_data_save_path:后处理结果保存路径 - - - +# 参考论文 + +- [Objects as Points](https://arxiv.org/abs/1904.07850) + +# 参考实现 + +- [xingyizhou/CenterNet](https://github.com/xingyizhou/CenterNet) +- [CaoWGG/TensorRT-CenterNet](https://github.com/CaoWGG/TensorRT-CenterNet) + +# 环境搭建 + +1、创建一个conda环境 + +```shell +conda create --nameCenterNet python=3.6 +``` + +激活环境 + +``` +conda activate CenterNet +``` + +2、clone仓库 + +``` +git clone https://github.com/xingyizhou/CenterNet +``` + +3、安装依赖 + +``` +cd CenterNet +pip install -r requirements.txt +``` + +4、安装pytorch v1.0.0 + +1) 下载[torch-1.0.0-cp36-cp36m-linux_x86_64.whl](https://download.pytorch.org/whl/cu100/torch-1.0.0-cp36-cp36m-linux_x86_64.whl) + +2) 安装 + +``` +pip install torch-1.0.0-cp36-cp36m-linux_x86_64.whl +``` + +**注意:**1) pytorch版本必须是1.0.0;2) 需确保是GPU环境;3) CUDA版本为10.2 + +5、安装其它依赖 + +``` +pip install tqdm==4.19.9 torchvision==0.2.2 onnx==1.8.1 onnxruntime==1.7.0 skl2onnx==1.8.0 +``` + +6、确保gcc和g++版本>=7.2.0 + +7、安装COCO API + +``` +cd CenterNet +git clone https://github.com/cocodataset/cocoapi.git +cd cocoapi/PythonAPI +make +python setup.py install --user +``` + +8、把THC-based DCNv2替换为ATen-based DCNv2 + +``` +cd CenterNet +git clone https://github.com/CaoWGG/TensorRT-CenterNet.gitcp -r TensorRT-CenterNet/readme/dcn src/lib/models/networks +``` + +**说明:**主要用的是TensorRT-CenterNet仓下的dcn目录,也可以仅下载dcn目录,然后放入到`CenterNet/src/lib/models/networks`目录下。 + +9、编译Deform Conv + +``` +cd src/lib/models/networks/dcn +python setup.py build_ext --inplace +``` + +**注意:**gcc和g++版本必须>=7.2.0,否则可能导致出错。 + +10、Change import + +把`CenterNet/src/lib/models/networks/pose_dla_dcn.py`和`CenterNet/src/lib/models/networks/resnet_dcn.py`中的`from .DCNv2.dcn_v2 import DCN`改为`from .dcn.modules.deform_conv import ModulatedDeformConvPack as DCN` + +11、打开`/root/anaconda3/envs/CenterNet/lib/python3.6/site-packages/torch/autograd/function.py`,定位到273行,把`_iter_filter(...)`函数改为如下: + +``` +def _iter_filter(condition, allow_unknown=False, condition_msg=None, + conversion=None): + def _iter(obj): + if conversion is not None: + obj = conversion(obj) + if condition(obj): + yield obj + #M<<<<<< + elif isinstance(obj,int): ## int to tensor + yield torch.tensor(obj) + #>>>>>> + elif obj is None: + return + elif isinstance(obj, (list, tuple)): + for o in obj: + for var in _iter(o): + yield var + elif allow_unknown: + yield obj + else: + raise ValueError("Auto nesting doesn't know how to process " + "an input object of type " + torch.typename(obj) + + (". Accepted types: " + condition_msg + + ", or lists/tuples of them" + if condition_msg else "")) + + + return _iter +``` + +12、下载[ctdet_coco_dla_2x.pth](https://drive.google.com/open?id=1pl_-ael8wERdUREEnaIfqOV_VF2bEVRT)模型,放入`CenterNet/models`目录下 +13、把`CenterNet/src/lib/opts.py中的add_argument('task', default='ctdet'....)`改为`add_argument('--task', default='ctdet'....)` +14、把提供的代码和脚本放入`CenterNet/src`目录下。 + +# 准备数据集 + +根据CenterNet官方数据集安装指导准备数据集:[DATA.md](https://github.com/xingyizhou/CenterNet/blob/master/readme/DATA.md),本示例以 **COCO 2017 Val** 数据集为例。 + +# PyTorch在线推理 + +由于后续导出onnx时需要修改CenterNet源码,修改后的代码无法进行PyTorch在线推理。因此这里先进行PyTorch在线推理验证。 + +运行pth_eval.py进行推理,推理完毕之后会输入精度和推理时间信息。 + +``` +python pth_eval.py --res_data_save_path=./pth_result +``` + +参数说明: + - --res_data_save_path:推理结果保存路径 + +# om模型推理模型转换 + +1. 模型转换。 + + 使用PyTorch将模型权重文件pth转换为onnx文件,再使用atc工具将onnx文件转为离线推理模型om文件。 + + - 导出onnx文件。 + + - 打开`CenterNet/src/lib/models/networks/dcn/functions/deform_conv.py`文件 + + - 修改`ModulatedDeformConvFunction`的`symbolic(...)`函数,把原函数改为如下: + + ``` + @staticmethod + def symbolic(g, input, weight, offset, bias, stride, padding, dilation, groups, deformable_groups): + return g.op("DeformableConv2D", + input, + weight, + offset, + bias, + deformable_groups_i=deformable_groups, + dilations_i=dilation, + groups_i=groups, + pads_i=padding, strides_i=stride) + ``` + + - 修改`ModulatedDeformConvFunction`的`forward(...)`函数,把原函数改为如下: + + ``` + @staticmethod + def forward(ctx, input, weight, offset, bias=None, stride=1, padding=0, dilation=1, groups=1, deformable_groups=1): + ctx.stride = stride + ctx.padding = padding + ctx.dilation = dilation + ctx.groups = groups + ctx.deformable_groups = deformable_groups + ctx.with_bias = bias is not None + if not ctx.with_bias: + bias = input.new_empty(1) # fake tensor + output = input.new_empty(ModulatedDeformConvFunction._infer_shape(ctx, input, weight)) + return output + ``` + - 打开`CenterNet/src/lib/models/networks/dcn/modules/deform_conv.py`文件,修改`ModulatedDeformConvPack`的`forward(...)`函数,把原函数改为如下: + + ``` + def forward(self, x): + out = self.conv_offset_mask(x) + o1, o2, mask = torch.chunk(out, 3, dim=1) + offset = torch.cat((o1, o2), dim=1) + mask = torch.sigmoid(mask) + + offset_y = offset.reshape(1, -1, 2, offset.shape[2], + offset.shape[3])[:, :, 0, ...].reshape(1, offset.shape[1] // 2, offset.shape[2], + offset.shape[3]) + offset_x = offset.reshape(1, -1, 2, offset.shape[2], + offset.shape[3])[:, :, 1, ...].reshape(1, offset.shape[1] // 2, offset.shape[2], + offset.shape[3]) + offset = torch.cat((offset_x, offset_y, mask), 1) + + return modulated_deform_conv(x, self.weight, offset, self.bias, self.stride, self.padding, self.dilation, self.groups, self.deformable_groups) + ``` + - 打开`/root/anaconda3/envs/CenterNet/lib/python3.6/site-packages/torch/onnx/symbolic.py`,在`reciprocal(...)`函数后边增加两个函数: + + ``` + def reshape(g, self, shape): + return view(g, self, shape) + + + def reshape_as(g, self, other): + shape = g.op('Shape', other) + return reshape(g, self, shape) + ``` + + - 运行`export_onnx.py`文件,导出onnx模型 + + ``` + python export_onnx.py + ``` + + 运行完之后,会在`CenterNet/models`目录下生成`ctdet_coco_dla_2x.onnx`模型文件。 + + - 运行`modify_onnx.py`文件,修改onnx模型文件 + + ``` + python modify_onnx.py + ``` + + 运行完之后,会在`CenterNet/models`目录下生成`ctdet_coco_dla_2x_modify.onnx`模型文件。 + + 2. 使用atc工具将onnx模型转om模型。 + + - 根据实际情况,修改`onnx2om.sh`脚本中的环境变量,具体的脚本示例如下: + + ``` + #!/bin/bash + clear + + export install_path=/usr/local/Ascend/ascend-toolkit/latest + export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH + export PYTHONPATH=${install_path}/atc/python/site-packages:${install_path}/pyACL/python/site-packages/acl:$PYTHONPATH + export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH + export ASCEND_OPP_PATH=${install_path}/opp + + atc --framework=5 --model=../models/ctdet_coco_dla_2x_modify.onnx --output=../models/ctdet_coco_dla_2x \ + --input_format=NCHW --input_shape=image:1,3,512,512 --log=error --soc_version=Ascend310 + rm -rf fusion_result.json kernel_meta + ``` + + 参数说明: + + - --model:为onnx模型文件 + - --framework:5代表onnx模型 + + - --output:输出的om模型 + - --input_format:输入数据的格式 + - --input_shape:输入数据的shape + - --log:日志等级 + - --soc_version:部署芯片类型 + + - 执行onnx2om.sh脚本,将onnx文件转为离线推理模型文件om文件。 + + ``` + bash onnx2om.sh + ``` + + 运行完之后,会在`CenterNet/models`目录下生成`ctdet_coco_dla_2x.om`模型文件。 + +2. 开始推理验证。 + + - 根据实际情况,修改`benchmark_infer.sh`脚本中的代码,具体的脚本示例如下: + + ``` + #!/bin/bash + clear + + export install_path=/usr/local/Ascend/ascend-toolkit/latest + export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH + export PYTHONPATH=${install_path}/atc/python/site-packages:${install_path}/pyACL/python/site-packages/acl:$PYTHONPATH + export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH + export ASCEND_OPP_PATH=${install_path}/opp + + ./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=../models/ctdet_coco_dla_2x.om \-input_text_path=./pre_bin/bin_file.info -input_width=512 -input_height=512 -output_binary=true -useDvpp=false + ``` + + - 执行`benchmark_infer.sh`脚本,进行benchmark推理。 + + ``` + sh benchmark_infer.sh + ``` + + - 推理数据后处理与精度统计。 + 运行postprocess.py脚本,进行后处理和精度统计。 + + ``` + python postprocess.py \ + --infer_res_save_path=./result/dumpOutput_device0 \ + --pre_data_save_path=./pre_bin \ + --res_data_save_path=./om_result + ``` + + 参数说明: + + - --infer_res_save_path:benchmark推理结果保存路径 + - --pre_data_save_path:预处理数据保存路径 + - --res_data_save_path:后处理结果保存路径 + + + diff --git a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/export_onnx.py b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/export_onnx.py index 7d6d385c280ec888539eacc112da28e59882f5cc..5173af1d6e49b2b801081b77178094b57789a9dd 100644 --- a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/export_onnx.py +++ b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/export_onnx.py @@ -1,114 +1,114 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - -import _init_paths -import os -import torch -import torch.onnx as onnx -from lib.opts import opts -from types import MethodType -from collections import OrderedDict -from torch.onnx import OperatorExportTypes -from lib.models.model import create_model, load_model - -# 清空终端 -os.system('clear') - - -## onnx is not support dict return value -## for dla34 -def pose_dla_forward(self, x): - x = self.base(x) - x = self.dla_up(x) - y = [] - for i in range(self.last_level - self.first_level): - y.append(x[i].clone()) - self.ida_up(y, 0, len(y)) - ret = [] ## change dict to list - for head in self.heads: - ret.append(self.__getattr__(head)(y[-1])) - return ret - - -## for dla34v0 -def dlav0_forward(self, x): - x = self.base(x) - x = self.dla_up(x[self.first_level:]) - # x = self.fc(x) - # y = self.softmax(self.up(x)) - ret = [] ## change dict to list - for head in self.heads: - ret.append(self.__getattr__(head)(x)) - return ret - - -## for resdcn -def resnet_dcn_forward(self, x): - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - x = self.maxpool(x) - - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - x = self.layer4(x) - x = self.deconv_layers(x) - ret = [] ## change dict to list - for head in self.heads: - ret.append(self.__getattr__(head)(x)) - return ret - - -forward = {'dla': pose_dla_forward, 'dlav0': dlav0_forward, 'resdcn': resnet_dcn_forward} - -opt = opts().init() -opt.arch = 'dla_34' -opt.heads = OrderedDict([('hm', 80), ('reg', 2), ('wh', 2)]) -opt.head_conv = 256 if 'dla' in opt.arch else 64 -print(opt) -model = create_model(opt.arch, opt.heads, opt.head_conv) -model.forward = MethodType(forward[opt.arch.split('_')[0]], model) -load_model(model, '../models/ctdet_coco_dla_2x.pth') -model.eval() -model.cuda() - -print('\n[INFO] Export to onnx ...') -input = torch.ones([1, 3, 512, 512]).cuda() -onnx.export(model, - input, - "../models/ctdet_coco_dla_2x.onnx", - operator_export_type=OperatorExportTypes.ONNX, - verbose=True, - input_names=['image'], - output_names=['hm', 'wh', 'reg']) -print('[INFO] Done!') +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + +import _init_paths +import os +import torch +import torch.onnx as onnx +from lib.opts import opts +from types import MethodType +from collections import OrderedDict +from torch.onnx import OperatorExportTypes +from lib.models.model import create_model, load_model + +# 清空终端 +os.system('clear') + + +## onnx is not support dict return value +## for dla34 +def pose_dla_forward(self, x): + x = self.base(x) + x = self.dla_up(x) + y = [] + for i in range(self.last_level - self.first_level): + y.append(x[i].clone()) + self.ida_up(y, 0, len(y)) + ret = [] ## change dict to list + for head in self.heads: + ret.append(self.__getattr__(head)(y[-1])) + return ret + + +## for dla34v0 +def dlav0_forward(self, x): + x = self.base(x) + x = self.dla_up(x[self.first_level:]) + # x = self.fc(x) + # y = self.softmax(self.up(x)) + ret = [] ## change dict to list + for head in self.heads: + ret.append(self.__getattr__(head)(x)) + return ret + + +## for resdcn +def resnet_dcn_forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.deconv_layers(x) + ret = [] ## change dict to list + for head in self.heads: + ret.append(self.__getattr__(head)(x)) + return ret + + +forward = {'dla': pose_dla_forward, 'dlav0': dlav0_forward, 'resdcn': resnet_dcn_forward} + +opt = opts().init() +opt.arch = 'dla_34' +opt.heads = OrderedDict([('hm', 80), ('reg', 2), ('wh', 2)]) +opt.head_conv = 256 if 'dla' in opt.arch else 64 +print(opt) +model = create_model(opt.arch, opt.heads, opt.head_conv) +model.forward = MethodType(forward[opt.arch.split('_')[0]], model) +load_model(model, '../models/ctdet_coco_dla_2x.pth') +model.eval() +model.cuda() + +print('\n[INFO] Export to onnx ...') +input = torch.ones([1, 3, 512, 512]).cuda() +onnx.export(model, + input, + "../models/ctdet_coco_dla_2x.onnx", + operator_export_type=OperatorExportTypes.ONNX, + verbose=True, + input_names=['image'], + output_names=['hm', 'wh', 'reg']) +print('[INFO] Done!') diff --git a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/modelzoo_level.txt b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/modelzoo_level.txt index a17c8f95fa388fbc6d253e2cd7cfd0b73b734073..a829ab59b97a1022dd6fc33b59b7ae0d55009432 100644 --- a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/modelzoo_level.txt +++ b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/modify_onnx.py b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/modify_onnx.py index 5371b354999dc8efa5a9338b4572891f36c7627d..99d51ad1487994eaef6ebf6686d41cb6eacb46de 100644 --- a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/modify_onnx.py +++ b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/modify_onnx.py @@ -1,57 +1,57 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - -import os -from OXInterface import OXDataType, OXGraph, OXInitializer, OXNode - -# 清空终端 -os.system('clear') - -# 加载模型 -oxgraph = OXGraph('../models/ctdet_coco_dla_2x.onnx') - -# 给节点添加名字 -for idx, node in enumerate(oxgraph._node): - node.name = node.op_type + '_' + str(idx) - node.doc_string = '' -oxgraph.save_new_model('../models/ctdet_coco_dla_2x_modify.onnx') - -# 修改DeformableConv2D的属性 -oxgraph = OXGraph('../models/ctdet_coco_dla_2x_modify.onnx') -oxnodes = oxgraph.get_oxnode_by_op_type('DeformableConv2D') -for oxnode in oxnodes: - oxnode.set_attribute('dilations', [1, 1]) - oxnode.set_attribute('pads', [1, 1]) - oxnode.set_attribute('strides', [1, 1]) - -# 保存新模型 -oxgraph.save_new_model('../models/ctdet_coco_dla_2x_modify.onnx') +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + +import os +from OXInterface import OXDataType, OXGraph, OXInitializer, OXNode + +# 清空终端 +os.system('clear') + +# 加载模型 +oxgraph = OXGraph('../models/ctdet_coco_dla_2x.onnx') + +# 给节点添加名字 +for idx, node in enumerate(oxgraph._node): + node.name = node.op_type + '_' + str(idx) + node.doc_string = '' +oxgraph.save_new_model('../models/ctdet_coco_dla_2x_modify.onnx') + +# 修改DeformableConv2D的属性 +oxgraph = OXGraph('../models/ctdet_coco_dla_2x_modify.onnx') +oxnodes = oxgraph.get_oxnode_by_op_type('DeformableConv2D') +for oxnode in oxnodes: + oxnode.set_attribute('dilations', [1, 1]) + oxnode.set_attribute('pads', [1, 1]) + oxnode.set_attribute('strides', [1, 1]) + +# 保存新模型 +oxgraph.save_new_model('../models/ctdet_coco_dla_2x_modify.onnx') diff --git a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/postprocess.py b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/postprocess.py index a79f03d9795174e9a8847837435c809b636d90bf..099262c6326750e16774f6b0ba9ba41e076905cf 100644 --- a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/postprocess.py +++ b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/postprocess.py @@ -1,157 +1,157 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - -import _init_paths -import os -from tqdm import tqdm -import argparse -import shutil -import pickle -import glob -from os.path import join -import numpy as np -import torch -from logger import Logger -from datasets.dataset_factory import dataset_factory -from lib.detectors.ctdet import CtdetDetector -from lib.detectors.detector_factory import detector_factory -from lib.opts import opts -from lib.models.utils import flip_tensor -from lib.models.decode import ctdet_decode - -# 清空终端 -os.system('clear') - - -class ModelWarper(CtdetDetector): - def __init__(self, opt): - super(CtdetDetector, self).__init__(opt) - - def process(self, output): - hm = torch.from_numpy(output['hm']).sigmoid_() - wh = torch.from_numpy(output['wh']) - reg = torch.from_numpy(output['reg']) if self.opt.reg_offset else None - if self.opt.flip_test: - hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2 - wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2 - reg = reg[0:1] if reg is not None else None - dets = ctdet_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) - - return dets - - def run(self, output, meta, scale): - detections = [] - dets = self.process(output) - dets = self.post_process(dets, meta, scale) - detections.append(dets) - - results = self.merge_outputs(detections) - return results - - -def postprocess(infer_res_save_path, pre_data_save_path, opt): - os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str - - Dataset = dataset_factory[opt.dataset] - opt = opts().update_dataset_info_and_set_heads(opt, Dataset) - print(opt) - Logger(opt) - Detector = detector_factory[opt.task] - - split = 'val' if not opt.trainval else 'test' - dataset = Dataset(opt, split) - detector = Detector(opt) - - model_warper = ModelWarper(detector.opt) - - # 创建目录 - if os.path.exists(res_data_save_path): - shutil.rmtree(res_data_save_path) - os.makedirs(res_data_save_path) - - # 读取文件个数 - bin_file_list = glob.glob(join(infer_res_save_path, '*.bin')) - bin_file_num = len(bin_file_list) // 3 - - # 加载字典 - image_id_dict = pickle.load(open(join(pre_data_save_path, 'image_id_dict.pkl'), 'rb')) - meta_dict = pickle.load(open(join(pre_data_save_path, 'meta_dict.pkl'), 'rb')) - - # 后处理 - print('\n[INFO] Postprocessing ...') - results = {} - for i in tqdm(range(bin_file_num)): - hm = np.fromfile(join(infer_res_save_path, str(i) + '_3.bin'), dtype=np.float32).reshape(1, 80, 128, 128) - wh = np.fromfile(join(infer_res_save_path, str(i) + '_1.bin'), dtype=np.float32).reshape(1, 2, 128, 128) - reg = np.fromfile(join(infer_res_save_path, str(i) + '_2.bin'), dtype=np.float32).reshape(1, 2, 128, 128) - - output = {'hm': hm, "wh": wh, "reg": reg} - meta = meta_dict[i] - scale = [1.0] - - result = model_warper.run(output, meta, scale) - - results[image_id_dict[i]] = result - print('[INFO] Postprocess done!') - - # 计算精度 - print('\n[INFO] Calculate accuracy ...') - dataset.run_eval(results, res_data_save_path) - - -if __name__ == '__main__': - ''' - Using Example: - - python postprocess.py \ - --infer_res_save_path=./result/dumpOutput_device0 \ - --pre_data_save_path=./pre_bin \ - --res_data_save_path=./om_result - ''' - - # 解析参数 - parser = argparse.ArgumentParser() - parser.add_argument('--infer_res_save_path', required=True) - parser.add_argument('--pre_data_save_path', required=True) - parser.add_argument('--res_data_save_path', required=True) - - opt = parser.parse_args() - infer_res_save_path = opt.infer_res_save_path - pre_data_save_path = opt.pre_data_save_path - res_data_save_path = opt.res_data_save_path - - # 创建并解析opt - opt = opts().init('--task ctdet --exp_id coco_dla --not_prefetch_test ' - '--load_model ../models/ctdet_coco_dla_2x.pth'.split(' ')) - - # 处理数据 - results = postprocess(infer_res_save_path, pre_data_save_path, opt) +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + +import _init_paths +import os +from tqdm import tqdm +import argparse +import shutil +import pickle +import glob +from os.path import join +import numpy as np +import torch +from logger import Logger +from datasets.dataset_factory import dataset_factory +from lib.detectors.ctdet import CtdetDetector +from lib.detectors.detector_factory import detector_factory +from lib.opts import opts +from lib.models.utils import flip_tensor +from lib.models.decode import ctdet_decode + +# 清空终端 +os.system('clear') + + +class ModelWarper(CtdetDetector): + def __init__(self, opt): + super(CtdetDetector, self).__init__(opt) + + def process(self, output): + hm = torch.from_numpy(output['hm']).sigmoid_() + wh = torch.from_numpy(output['wh']) + reg = torch.from_numpy(output['reg']) if self.opt.reg_offset else None + if self.opt.flip_test: + hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2 + wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2 + reg = reg[0:1] if reg is not None else None + dets = ctdet_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) + + return dets + + def run(self, output, meta, scale): + detections = [] + dets = self.process(output) + dets = self.post_process(dets, meta, scale) + detections.append(dets) + + results = self.merge_outputs(detections) + return results + + +def postprocess(infer_res_save_path, pre_data_save_path, opt): + os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str + + Dataset = dataset_factory[opt.dataset] + opt = opts().update_dataset_info_and_set_heads(opt, Dataset) + print(opt) + Logger(opt) + Detector = detector_factory[opt.task] + + split = 'val' if not opt.trainval else 'test' + dataset = Dataset(opt, split) + detector = Detector(opt) + + model_warper = ModelWarper(detector.opt) + + # 创建目录 + if os.path.exists(res_data_save_path): + shutil.rmtree(res_data_save_path) + os.makedirs(res_data_save_path) + + # 读取文件个数 + bin_file_list = glob.glob(join(infer_res_save_path, '*.bin')) + bin_file_num = len(bin_file_list) // 3 + + # 加载字典 + image_id_dict = pickle.load(open(join(pre_data_save_path, 'image_id_dict.pkl'), 'rb')) + meta_dict = pickle.load(open(join(pre_data_save_path, 'meta_dict.pkl'), 'rb')) + + # 后处理 + print('\n[INFO] Postprocessing ...') + results = {} + for i in tqdm(range(bin_file_num)): + hm = np.fromfile(join(infer_res_save_path, str(i) + '_3.bin'), dtype=np.float32).reshape(1, 80, 128, 128) + wh = np.fromfile(join(infer_res_save_path, str(i) + '_1.bin'), dtype=np.float32).reshape(1, 2, 128, 128) + reg = np.fromfile(join(infer_res_save_path, str(i) + '_2.bin'), dtype=np.float32).reshape(1, 2, 128, 128) + + output = {'hm': hm, "wh": wh, "reg": reg} + meta = meta_dict[i] + scale = [1.0] + + result = model_warper.run(output, meta, scale) + + results[image_id_dict[i]] = result + print('[INFO] Postprocess done!') + + # 计算精度 + print('\n[INFO] Calculate accuracy ...') + dataset.run_eval(results, res_data_save_path) + + +if __name__ == '__main__': + ''' + Using Example: + + python postprocess.py \ + --infer_res_save_path=./result/dumpOutput_device0 \ + --pre_data_save_path=./pre_bin \ + --res_data_save_path=./om_result + ''' + + # 解析参数 + parser = argparse.ArgumentParser() + parser.add_argument('--infer_res_save_path', required=True) + parser.add_argument('--pre_data_save_path', required=True) + parser.add_argument('--res_data_save_path', required=True) + + opt = parser.parse_args() + infer_res_save_path = opt.infer_res_save_path + pre_data_save_path = opt.pre_data_save_path + res_data_save_path = opt.res_data_save_path + + # 创建并解析opt + opt = opts().init('--task ctdet --exp_id coco_dla --not_prefetch_test ' + '--load_model ../models/ctdet_coco_dla_2x.pth'.split(' ')) + + # 处理数据 + results = postprocess(infer_res_save_path, pre_data_save_path, opt) diff --git a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/preprocess.py b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/preprocess.py index 7001d61d09c865da61e03f859ba412ff03c36d75..84347016ba46344edf2d37760bb94cf3c9678ab5 100644 --- a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/preprocess.py +++ b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/preprocess.py @@ -1,127 +1,127 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - -import _init_paths -import argparse -import shutil -import pickle -import os -import cv2 -from os.path import join -from logger import Logger -from tqdm import tqdm -from datasets.dataset_factory import dataset_factory -from lib.detectors.ctdet import CtdetDetector -from lib.detectors.detector_factory import detector_factory -from lib.opts import opts - -# 清空终端 -os.system('clear') - - -class ModelWarper(CtdetDetector): - def __init__(self, opt): - super(CtdetDetector, self).__init__(opt) - - def run(self, image_path, image_save_path, meta_save_path): - image = cv2.imread(image_path) - scale = self.scales[0] - images, meta = self.pre_process(image, scale, None) - - # 保存数据 - images = images.numpy() - images.tofile(image_save_path) - - return meta - - -def preprocess(opt, pre_data_save_path): - os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str - - Dataset = dataset_factory[opt.dataset] - opt = opts().update_dataset_info_and_set_heads(opt, Dataset) - print(opt) - Logger(opt) - Detector = detector_factory[opt.task] - - split = 'val' if not opt.trainval else 'test' - dataset = Dataset(opt, split) - detector = Detector(opt) - - model_warper = ModelWarper(detector.opt) - - # 创建目录 - if os.path.exists(pre_data_save_path): - shutil.rmtree(pre_data_save_path) - os.makedirs(pre_data_save_path) - - # 处理数据 - info_file_path = join(pre_data_save_path, 'bin_file.info') - with open(info_file_path, 'wt', encoding='utf-8') as f_info: - image_id_dict = {} - meta_dict = {} - num_iters = len(dataset) - for i in tqdm(range(num_iters)): - image_id = dataset.images[i] - image_info = dataset.coco.loadImgs(ids=[image_id])[0] - image_path = join(dataset.img_dir, image_info['file_name']) - image_save_path = join(pre_data_save_path, str(i) + '.bin') - meta_save_path = join(pre_data_save_path, str(i) + '.pkl') - meta = model_warper.run(image_path, image_save_path, meta_save_path) - f_info.write(str(i) + " ./" + str(i) + '.bin 512 512' + '\n') - image_id_dict[i] = image_id - meta_dict[i] = meta - pickle.dump(image_id_dict, open(join(pre_data_save_path, 'image_id_dict.pkl'), 'wb')) - pickle.dump(meta_dict, open(join(pre_data_save_path, 'meta_dict.pkl'), 'wb')) - - -if __name__ == '__main__': - ''' - Using Example: - - python preprocess.py --pre_data_save_path=./pre_bin - ''' - - # 解析参数 - parser = argparse.ArgumentParser() - parser.add_argument('--pre_data_save_path', required=True) - opt = parser.parse_args() - pre_data_save_path = opt.pre_data_save_path - - # 创建并解析opt - opt = opts().init('--task ctdet --exp_id coco_dla --not_prefetch_test ' - '--load_model ../models/ctdet_coco_dla_2x.pth'.split(' ')) - - # 处理数据 - print('\n[INFO] Preprocessing ...') - preprocess(opt, pre_data_save_path) - print('[INFO] Preprocess done!') +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + +import _init_paths +import argparse +import shutil +import pickle +import os +import cv2 +from os.path import join +from logger import Logger +from tqdm import tqdm +from datasets.dataset_factory import dataset_factory +from lib.detectors.ctdet import CtdetDetector +from lib.detectors.detector_factory import detector_factory +from lib.opts import opts + +# 清空终端 +os.system('clear') + + +class ModelWarper(CtdetDetector): + def __init__(self, opt): + super(CtdetDetector, self).__init__(opt) + + def run(self, image_path, image_save_path, meta_save_path): + image = cv2.imread(image_path) + scale = self.scales[0] + images, meta = self.pre_process(image, scale, None) + + # 保存数据 + images = images.numpy() + images.tofile(image_save_path) + + return meta + + +def preprocess(opt, pre_data_save_path): + os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str + + Dataset = dataset_factory[opt.dataset] + opt = opts().update_dataset_info_and_set_heads(opt, Dataset) + print(opt) + Logger(opt) + Detector = detector_factory[opt.task] + + split = 'val' if not opt.trainval else 'test' + dataset = Dataset(opt, split) + detector = Detector(opt) + + model_warper = ModelWarper(detector.opt) + + # 创建目录 + if os.path.exists(pre_data_save_path): + shutil.rmtree(pre_data_save_path) + os.makedirs(pre_data_save_path) + + # 处理数据 + info_file_path = join(pre_data_save_path, 'bin_file.info') + with open(info_file_path, 'wt', encoding='utf-8') as f_info: + image_id_dict = {} + meta_dict = {} + num_iters = len(dataset) + for i in tqdm(range(num_iters)): + image_id = dataset.images[i] + image_info = dataset.coco.loadImgs(ids=[image_id])[0] + image_path = join(dataset.img_dir, image_info['file_name']) + image_save_path = join(pre_data_save_path, str(i) + '.bin') + meta_save_path = join(pre_data_save_path, str(i) + '.pkl') + meta = model_warper.run(image_path, image_save_path, meta_save_path) + f_info.write(str(i) + " ./" + str(i) + '.bin 512 512' + '\n') + image_id_dict[i] = image_id + meta_dict[i] = meta + pickle.dump(image_id_dict, open(join(pre_data_save_path, 'image_id_dict.pkl'), 'wb')) + pickle.dump(meta_dict, open(join(pre_data_save_path, 'meta_dict.pkl'), 'wb')) + + +if __name__ == '__main__': + ''' + Using Example: + + python preprocess.py --pre_data_save_path=./pre_bin + ''' + + # 解析参数 + parser = argparse.ArgumentParser() + parser.add_argument('--pre_data_save_path', required=True) + opt = parser.parse_args() + pre_data_save_path = opt.pre_data_save_path + + # 创建并解析opt + opt = opts().init('--task ctdet --exp_id coco_dla --not_prefetch_test ' + '--load_model ../models/ctdet_coco_dla_2x.pth'.split(' ')) + + # 处理数据 + print('\n[INFO] Preprocessing ...') + preprocess(opt, pre_data_save_path) + print('[INFO] Preprocess done!') diff --git a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/pth_eval.py b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/pth_eval.py index 0fa6035d108345e88ff806a2237a9255bd5b9bdc..7a5b41ce8523e5ae83d0a80dcdd2458f485fa941 100644 --- a/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/pth_eval.py +++ b/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch/pth_eval.py @@ -1,115 +1,115 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - -import _init_paths -import os -import time -import shutil -import argparse -from tqdm import tqdm -from opts import opts -from logger import Logger -from datasets.dataset_factory import dataset_factory -from detectors.detector_factory import detector_factory - -# 清空终端 -os.system('clear') - - -def eval(opt, res_data_save_path): - os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str - - Dataset = dataset_factory[opt.dataset] - opt = opts().update_dataset_info_and_set_heads(opt, Dataset) - print(opt) - Logger(opt) - Detector = detector_factory[opt.task] - - split = 'val' if not opt.trainval else 'test' - dataset = Dataset(opt, split) - detector = Detector(opt) - - # 创建目录 - if os.path.exists(res_data_save_path): - shutil.rmtree(res_data_save_path) - os.makedirs(res_data_save_path) - - print('\n[INFO] Infering ...') - results = {} - num_iters = len(dataset) - total_infer_time = 0 - total_infer_num = 0 - for ind in tqdm(range(num_iters)): - img_id = dataset.images[ind] - img_info = dataset.coco.loadImgs(ids=[img_id])[0] - img_path = os.path.join(dataset.img_dir, img_info['file_name']) - - start_time = time.perf_counter() - ret = detector.run(img_path) - end_time = time.perf_counter() - total_infer_time += end_time - start_time - total_infer_num += 1 - - results[img_id] = ret['results'] - print('\n[INFO] Infer done!') - - print('\n[INFO] Calculate accuracy ...') - dataset.run_eval(results, res_data_save_path) - - # 推理时间 - print('\n[INFO] Time:') - msg = 'total infer num: ' + str(total_infer_num) + '\n' + \ - 'total infer time(ms): ' + str(total_infer_time * 1000) + '\n' + \ - 'average infer time(ms): ' + str(total_infer_time * 1000 / total_infer_num) + '\n' - print(msg) - - -if __name__ == '__main__': - ''' - Using Example: - - python pth_eval.py --res_data_save_path=./pth_result - ''' - - # 解析参数 - parser = argparse.ArgumentParser() - parser.add_argument('--res_data_save_path', required=True) - - opt = parser.parse_args() - res_data_save_path = opt.res_data_save_path - - # 创建并解析opt - opt = opts().init('--task ctdet --exp_id coco_dla --not_prefetch_test ' - '--load_model ../models/ctdet_coco_dla_2x.pth'.split(' ')) - - # 处理数据 - eval(opt, res_data_save_path) +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + +import _init_paths +import os +import time +import shutil +import argparse +from tqdm import tqdm +from opts import opts +from logger import Logger +from datasets.dataset_factory import dataset_factory +from detectors.detector_factory import detector_factory + +# 清空终端 +os.system('clear') + + +def eval(opt, res_data_save_path): + os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str + + Dataset = dataset_factory[opt.dataset] + opt = opts().update_dataset_info_and_set_heads(opt, Dataset) + print(opt) + Logger(opt) + Detector = detector_factory[opt.task] + + split = 'val' if not opt.trainval else 'test' + dataset = Dataset(opt, split) + detector = Detector(opt) + + # 创建目录 + if os.path.exists(res_data_save_path): + shutil.rmtree(res_data_save_path) + os.makedirs(res_data_save_path) + + print('\n[INFO] Infering ...') + results = {} + num_iters = len(dataset) + total_infer_time = 0 + total_infer_num = 0 + for ind in tqdm(range(num_iters)): + img_id = dataset.images[ind] + img_info = dataset.coco.loadImgs(ids=[img_id])[0] + img_path = os.path.join(dataset.img_dir, img_info['file_name']) + + start_time = time.perf_counter() + ret = detector.run(img_path) + end_time = time.perf_counter() + total_infer_time += end_time - start_time + total_infer_num += 1 + + results[img_id] = ret['results'] + print('\n[INFO] Infer done!') + + print('\n[INFO] Calculate accuracy ...') + dataset.run_eval(results, res_data_save_path) + + # 推理时间 + print('\n[INFO] Time:') + msg = 'total infer num: ' + str(total_infer_num) + '\n' + \ + 'total infer time(ms): ' + str(total_infer_time * 1000) + '\n' + \ + 'average infer time(ms): ' + str(total_infer_time * 1000 / total_infer_num) + '\n' + print(msg) + + +if __name__ == '__main__': + ''' + Using Example: + + python pth_eval.py --res_data_save_path=./pth_result + ''' + + # 解析参数 + parser = argparse.ArgumentParser() + parser.add_argument('--res_data_save_path', required=True) + + opt = parser.parse_args() + res_data_save_path = opt.res_data_save_path + + # 创建并解析opt + opt = opts().init('--task ctdet --exp_id coco_dla --not_prefetch_test ' + '--load_model ../models/ctdet_coco_dla_2x.pth'.split(' ')) + + # 处理数据 + eval(opt, res_data_save_path) diff --git a/ACL_PyTorch/built-in/cv/DB_for_PyTorch/LICENSE b/ACL_PyTorch/built-in/cv/DB_for_PyTorch/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/built-in/cv/DB_for_PyTorch/LICENSE +++ b/ACL_PyTorch/built-in/cv/DB_for_PyTorch/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/DB_for_PyTorch/db.diff b/ACL_PyTorch/built-in/cv/DB_for_PyTorch/db.diff index 47cb2d745e4535e7a4a27b852b0b940edcae60e6..c4550dd3256a7ca6a3119774dd32934596d3fef1 100644 --- a/ACL_PyTorch/built-in/cv/DB_for_PyTorch/db.diff +++ b/ACL_PyTorch/built-in/cv/DB_for_PyTorch/db.diff @@ -103,24 +103,24 @@ index df6e5a2..796b02f 100644 --- a/backbones/resnet.py +++ b/backbones/resnet.py @@ -129,7 +129,8 @@ class Bottleneck(nn.Module): - self.conv2_offset = nn.Conv2d( - planes, deformable_groups * offset_channels, - kernel_size=3, -- padding=1) -+ padding=1, -+ stride=stride) - self.conv2 = conv_op( - planes, planes, kernel_size=3, padding=1, stride=stride, - deformable_groups=deformable_groups, bias=False) + self.conv2_offset = nn.Conv2d( + planes, deformable_groups * offset_channels, + kernel_size=3, +- padding=1) ++ padding=1, ++ stride=stride) + self.conv2 = conv_op( + planes, planes, kernel_size=3, padding=1, stride=stride, + deformable_groups=deformable_groups, bias=False) @@ -295,7 +296,7 @@ def resnet50(pretrained=True, **kwargs): - return model - - --def deformable_resnet50(pretrained=True, **kwargs): -+def deformable_resnet50(pretrained=False, **kwargs): - """Constructs a ResNet-50 model with deformable conv. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet + return model + + +-def deformable_resnet50(pretrained=True, **kwargs): ++def deformable_resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model with deformable conv. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet diff --git a/structure/model.py b/structure/model.py index 060191b..ea1705b 100644 --- a/structure/model.py diff --git a/ACL_PyTorch/built-in/cv/DB_for_PyTorch/db_pth2onnx.py b/ACL_PyTorch/built-in/cv/DB_for_PyTorch/db_pth2onnx.py index 93555759cf636963692fd1ad4a46eed5f140c19d..6f65e5a62ccef615f75b280792dfa2f7b6742010 100644 --- a/ACL_PyTorch/built-in/cv/DB_for_PyTorch/db_pth2onnx.py +++ b/ACL_PyTorch/built-in/cv/DB_for_PyTorch/db_pth2onnx.py @@ -1,57 +1,57 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.onnx -import argparse -from experiment import Structure, Experiment -from concern.config import Configurable, Config -from collections import OrderedDict - -def proc_nodes_modile(checkpoint): - new_state_dict = OrderedDict() - for k, v in checkpoint.items(): - if "module." in k: - name = k.replace("module.", "") - else: - name = k - new_state_dict[name] = v - return new_state_dict - -def pth2onnx(model): - #https://github.com/MhLiao/DB - input_names = ["actual_input_1"] - output_names = ["output1"] - dynamic_axes = {'actual_input_1': {0: '-1'}, 'output1': {0: '-1'}} - dummy_input = torch.randn(1, 3, 736, 1280) - - torch.onnx.export(model, dummy_input, "dbnet.onnx", input_names=input_names, dynamic_axes=dynamic_axes, output_names=output_names, opset_version=11, verbose=True) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='db pth2onnx') - parser.add_argument('exp', type=str) - parser.add_argument('--resume', type=str, help='Resume from checkpoint') - args = parser.parse_args() - args = vars(args) - args = {k: v for k, v in args.items() if v is not None} - conf = Config() - experiment_args = conf.compile(conf.load(args['exp']))['Experiment'] - experiment_args.update(cmd=args) - experiment = Configurable.construct_class_from_config(experiment_args) - model = experiment.structure.builder.build(torch.device('cpu')) - checkpoint = torch.load(args['resume'], map_location=torch.device('cpu')) - checkpoint = proc_nodes_modile(checkpoint) - model.load_state_dict(checkpoint) - model.eval() - pth2onnx(model) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.onnx +import argparse +from experiment import Structure, Experiment +from concern.config import Configurable, Config +from collections import OrderedDict + +def proc_nodes_modile(checkpoint): + new_state_dict = OrderedDict() + for k, v in checkpoint.items(): + if "module." in k: + name = k.replace("module.", "") + else: + name = k + new_state_dict[name] = v + return new_state_dict + +def pth2onnx(model): + #https://github.com/MhLiao/DB + input_names = ["actual_input_1"] + output_names = ["output1"] + dynamic_axes = {'actual_input_1': {0: '-1'}, 'output1': {0: '-1'}} + dummy_input = torch.randn(1, 3, 736, 1280) + + torch.onnx.export(model, dummy_input, "dbnet.onnx", input_names=input_names, dynamic_axes=dynamic_axes, output_names=output_names, opset_version=11, verbose=True) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='db pth2onnx') + parser.add_argument('exp', type=str) + parser.add_argument('--resume', type=str, help='Resume from checkpoint') + args = parser.parse_args() + args = vars(args) + args = {k: v for k, v in args.items() if v is not None} + conf = Config() + experiment_args = conf.compile(conf.load(args['exp']))['Experiment'] + experiment_args.update(cmd=args) + experiment = Configurable.construct_class_from_config(experiment_args) + model = experiment.structure.builder.build(torch.device('cpu')) + checkpoint = torch.load(args['resume'], map_location=torch.device('cpu')) + checkpoint = proc_nodes_modile(checkpoint) + model.load_state_dict(checkpoint) + model.eval() + pth2onnx(model) diff --git a/ACL_PyTorch/built-in/cv/DB_for_PyTorch/modelzoo_level.txt b/ACL_PyTorch/built-in/cv/DB_for_PyTorch/modelzoo_level.txt index 2233d9218a6b952e4bde5939ae170ecacb443827..9137dc380d2c2e7b046d270ddeea5496152ff1ca 100644 --- a/ACL_PyTorch/built-in/cv/DB_for_PyTorch/modelzoo_level.txt +++ b/ACL_PyTorch/built-in/cv/DB_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK -PerfStatus:OK +FuncStatus:OK +PrecisionStatus:OK +PerfStatus:OK diff --git a/ACL_PyTorch/built-in/cv/DB_for_PyTorch/test/parse.py b/ACL_PyTorch/built-in/cv/DB_for_PyTorch/test/parse.py index 85920a3be88f97f93cfead86a10b6286cf214aca..2b1f1406a73cace34bef8e1667033a53df5a0e24 100644 --- a/ACL_PyTorch/built-in/cv/DB_for_PyTorch/test/parse.py +++ b/ACL_PyTorch/built-in/cv/DB_for_PyTorch/test/parse.py @@ -1,33 +1,33 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - #tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - #print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - print(content) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 - print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + #tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + #print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + print(content) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 + print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) diff --git a/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/ReadMe.md b/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/ReadMe.md index b72db5dd9f23a821813b09736847740acf5f8017..2f762d2bd4f9f1c4f02af63dc88f31b08e1228a9 100644 --- a/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/ReadMe.md +++ b/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/ReadMe.md @@ -1,46 +1,46 @@ -文件作用说明: - -1.auto_tune.sh:模型转换脚本,集成了auto tune功能,可以手动关闭 - -2.pthtar2onnx.py:用于转换pth.tar文件到onnx文件 - -3.deepmar.info:PETA数据集信息,用于benchmark推理获取数据集 - -4.preprocess_deepmar_pytorch.py:数据集预处理脚本,通过均值方差处理归一化图片 - -5.label.json:PETA数据集标签,用于验证推理结果 - -6.postprocess_deepmar_pytorch.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy - -7.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer - - - - - -推理端到端步骤: - -(1) 从开源仓https://github.com/dangweili/pedestrian-attribute-recognition-pytorch/blob/master/baseline/model/DeepMAR.py下载deepamar模型或者指定自己训练好的pth文件路径,使用提供的DeepMar.py替换掉模型中的DeepMar.py, 通过export_onnx.py脚本转化为onnx模型 - - - -(2)为提高性能,可以使用remove_pad.py剔除掉pad算子,运行auto_tune.sh脚本转换om模型,也可以选择手动关闭auto_tune,由于提出pad算子后,性能已经较好,本包中提供的om为未经auto_tune调优的om模型 - - -(3)运行python script/dataset/transform_peta.py得到数据集,python split_test_data.py得到测试集txt信息image.txt和标签json文件label.json - - -(4)运行python preprocess_deepmar_pytorch.py dataset/peta/images input_bin image.txt,根据测试集image.txt生成对应bin文件 - -(5)运行python get_info.py input_bin deepmar.info 224 224 生成deepmar.info文件,存储bin文件的信息 - -(6)./benchmark.x86_64 -model_type=vision -batch_size=1 -device_id=0 -om_path=deepmar_bs1.om -input_width=224 -input_height=224 -input_text_path=deepmar.info -useDvpp=false -output_binary=true - -运行benchmark推理,结果保存在 ./result/dumpOutput_device0下 目录下 - - - -(7)python postprocess_deepmar_pytorch.py result/dumpOutput_device0/ label.json - -验证推理结果,第一项即为acc - +文件作用说明: + +1.auto_tune.sh:模型转换脚本,集成了auto tune功能,可以手动关闭 + +2.pthtar2onnx.py:用于转换pth.tar文件到onnx文件 + +3.deepmar.info:PETA数据集信息,用于benchmark推理获取数据集 + +4.preprocess_deepmar_pytorch.py:数据集预处理脚本,通过均值方差处理归一化图片 + +5.label.json:PETA数据集标签,用于验证推理结果 + +6.postprocess_deepmar_pytorch.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy + +7.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer + + + + + +推理端到端步骤: + +(1) 从开源仓https://github.com/dangweili/pedestrian-attribute-recognition-pytorch/blob/master/baseline/model/DeepMAR.py下载deepamar模型或者指定自己训练好的pth文件路径,使用提供的DeepMar.py替换掉模型中的DeepMar.py, 通过export_onnx.py脚本转化为onnx模型 + + + +(2)为提高性能,可以使用remove_pad.py剔除掉pad算子,运行auto_tune.sh脚本转换om模型,也可以选择手动关闭auto_tune,由于提出pad算子后,性能已经较好,本包中提供的om为未经auto_tune调优的om模型 + + +(3)运行python script/dataset/transform_peta.py得到数据集,python split_test_data.py得到测试集txt信息image.txt和标签json文件label.json + + +(4)运行python preprocess_deepmar_pytorch.py dataset/peta/images input_bin image.txt,根据测试集image.txt生成对应bin文件 + +(5)运行python get_info.py input_bin deepmar.info 224 224 生成deepmar.info文件,存储bin文件的信息 + +(6)./benchmark.x86_64 -model_type=vision -batch_size=1 -device_id=0 -om_path=deepmar_bs1.om -input_width=224 -input_height=224 -input_text_path=deepmar.info -useDvpp=false -output_binary=true + +运行benchmark推理,结果保存在 ./result/dumpOutput_device0下 目录下 + + + +(7)python postprocess_deepmar_pytorch.py result/dumpOutput_device0/ label.json + +验证推理结果,第一项即为acc + diff --git a/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/export_onnx.py b/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/export_onnx.py index ddab3eb951dce15e8f67eec8208a48b818e641bc..c61f9e75671d202a041e68a7dc63e790471109d5 100644 --- a/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/export_onnx.py +++ b/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/export_onnx.py @@ -1,38 +1,38 @@ -import torch -from baseline.model import DeepMAR -import torch.onnx -from collections import OrderedDict -import torch._utils - - -def proc_nodes_module(checkpoint, AttrName): - new_state_dict = OrderedDict() - for k, v in checkpoint[AttrName].items(): - if (k[0:7] == "module."): - name = k[7:] - else: - name = k[0:] - - new_state_dict[name] = v - return new_state_dict - - -def convert(): - checkpoint = torch.load("./checkpoint.pth.tar", map_location='cpu') - checkpoint['state_dict'] = proc_nodes_module(checkpoint, 'state_dict') - model = DeepMAR.DeepMAR_ResNet50() - model.load_state_dict(checkpoint['state_dict']) - model.eval() - print(model) - - input_names = ["actual_input_1"] - output_names = ["output1"] - dummy_input = torch.randn(1, 3, 224, 224) - import onnx - print('\nStarting ONNX export with onnx %s...' % onnx.__version__) - torch.onnx.export(model, dummy_input, "Deepmar_bs1.onnx", input_names=input_names, output_names=output_names, - opset_version=11, do_constant_folding=True) - - -if __name__ == "__main__": - convert() +import torch +from baseline.model import DeepMAR +import torch.onnx +from collections import OrderedDict +import torch._utils + + +def proc_nodes_module(checkpoint, AttrName): + new_state_dict = OrderedDict() + for k, v in checkpoint[AttrName].items(): + if (k[0:7] == "module."): + name = k[7:] + else: + name = k[0:] + + new_state_dict[name] = v + return new_state_dict + + +def convert(): + checkpoint = torch.load("./checkpoint.pth.tar", map_location='cpu') + checkpoint['state_dict'] = proc_nodes_module(checkpoint, 'state_dict') + model = DeepMAR.DeepMAR_ResNet50() + model.load_state_dict(checkpoint['state_dict']) + model.eval() + print(model) + + input_names = ["actual_input_1"] + output_names = ["output1"] + dummy_input = torch.randn(1, 3, 224, 224) + import onnx + print('\nStarting ONNX export with onnx %s...' % onnx.__version__) + torch.onnx.export(model, dummy_input, "Deepmar_bs1.onnx", input_names=input_names, output_names=output_names, + opset_version=11, do_constant_folding=True) + + +if __name__ == "__main__": + convert() diff --git a/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/get_info.py b/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/get_info.py index da181d8d0342e52eb33b38fa475ae812a61575cf..c68e7705e47c90970ab04e5f139e52192a634364 100644 --- a/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/get_info.py +++ b/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/get_info.py @@ -1,16 +1,16 @@ -import os -import sys -from glob import glob - -file_path = sys.argv[1] -info_name = sys.argv[2] -width = sys.argv[3] -height = sys.argv[4] - -bin_images = glob(os.path.join(file_path, '*')) - -with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') +import os +import sys +from glob import glob + +file_path = sys.argv[1] +info_name = sys.argv[2] +width = sys.argv[3] +height = sys.argv[4] + +bin_images = glob(os.path.join(file_path, '*')) + +with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') diff --git a/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/postprocess_deepmar_pytorch.py b/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/postprocess_deepmar_pytorch.py index 14c2c834bfcafb267756f92251d4f1f1c8380f8e..f6032390a585ac505cce8ff00e89c09a82a9ed09 100644 --- a/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/postprocess_deepmar_pytorch.py +++ b/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/postprocess_deepmar_pytorch.py @@ -1,94 +1,94 @@ -import os -import sys -import json -import numpy as np - - -def attribute_evaluate_lidw(gt_result, pt_result): - """ - Input: - gt_result, pt_result, N*L, with 0/1 - Output: - result - a dictionary, including label-based and instance-based evaluation - label-based: label_pos_acc, label_neg_acc, label_acc - instance-based: instance_acc, instance_precision, instance_recall, instance_F1 - """ - # obtain the label-based and instance-based accuracy - # compute the label-based accuracy - if gt_result.shape != pt_result.shape: - print('Shape beteen groundtruth and predicted results are different') - # compute the label-based accuracy - result = {} - gt_pos = np.sum((gt_result == 1).astype(float), axis=0) - gt_neg = np.sum((gt_result == 0).astype(float), axis=0) - pt_pos = np.sum((gt_result == 1).astype(float) * (pt_result == 1).astype(float), axis=0) - pt_neg = np.sum((gt_result == 0).astype(float) * (pt_result == 0).astype(float), axis=0) - label_pos_acc = 1.0*pt_pos/gt_pos - label_neg_acc = 1.0*pt_neg/gt_neg - label_acc = (label_pos_acc + label_neg_acc)/2 - result['label_pos_acc'] = label_pos_acc - result['label_neg_acc'] = label_neg_acc - result['label_acc'] = label_acc - # compute the instance-based accuracy - # precision - gt_pos = np.sum((gt_result == 1).astype(float), axis=1) - pt_pos = np.sum((pt_result == 1).astype(float), axis=1) - floatersect_pos = np.sum((gt_result == 1).astype(float)*(pt_result == 1).astype(float), axis=1) - union_pos = np.sum(((gt_result == 1)+(pt_result == 1)).astype(float),axis=1) - # avoid empty label in predicted results - cnt_eff = float(gt_result.shape[0]) - for iter, key in enumerate(gt_pos): - if key == 0: - union_pos[iter] = 1 - pt_pos[iter] = 1 - gt_pos[iter] = 1 - cnt_eff = cnt_eff - 1 - continue - if pt_pos[iter] == 0: - pt_pos[iter] = 1 - instance_acc = np.sum(floatersect_pos/union_pos)/cnt_eff - instance_precision = np.sum(floatersect_pos/pt_pos)/cnt_eff - instance_recall = np.sum(floatersect_pos/gt_pos)/cnt_eff - floatance_F1 = 2*instance_precision*instance_recall/(instance_precision+instance_recall) - result['instance_acc'] = instance_acc - result['instance_precision'] = instance_precision - result['instance_recall'] = instance_recall - result['instance_F1'] = floatance_F1 - return result - - -def postprocess_deepmar(npu_result, label_file): - with open(label_file, 'r') as json_file: - image_label = json.load(json_file) - bin_files = os.listdir(npu_result) - counts = len(bin_files) - gt_result = np.zeros((counts, 35)) - pt_result = np.zeros((counts, 35)) - - for index, bin_file in enumerate(bin_files): - result = np.fromfile(os.path.join(npu_result, bin_file), dtype='float32').reshape((35, )) - result[result >= 0] = 1 - result[result < 0] = 0 - pt_result[index, :] = result - - # label - key = bin_file[:bin_file.rfind('_')] + '.png' - label = image_label[key] - label = np.array(label, dtype=np.uint8) - gt_result[index, :] = label - - result = attribute_evaluate_lidw(gt_result, pt_result) - print('-'*80) - print('instance_acc: {}'.format(result['instance_acc'])) - print('instance_precision: {}'.format(result['instance_precision'])) - print('instance_recall: {}'.format(result['instance_recall'])) - print('instance_F1: {}'.format(result['instance_F1'])) - print('-' * 80) - - -if __name__ == "__main__": - npu_result = os.path.abspath(sys.argv[1]) - label_file = os.path.abspath(sys.argv[2]) - postprocess_deepmar(npu_result, label_file) - +import os +import sys +import json +import numpy as np + + +def attribute_evaluate_lidw(gt_result, pt_result): + """ + Input: + gt_result, pt_result, N*L, with 0/1 + Output: + result + a dictionary, including label-based and instance-based evaluation + label-based: label_pos_acc, label_neg_acc, label_acc + instance-based: instance_acc, instance_precision, instance_recall, instance_F1 + """ + # obtain the label-based and instance-based accuracy + # compute the label-based accuracy + if gt_result.shape != pt_result.shape: + print('Shape beteen groundtruth and predicted results are different') + # compute the label-based accuracy + result = {} + gt_pos = np.sum((gt_result == 1).astype(float), axis=0) + gt_neg = np.sum((gt_result == 0).astype(float), axis=0) + pt_pos = np.sum((gt_result == 1).astype(float) * (pt_result == 1).astype(float), axis=0) + pt_neg = np.sum((gt_result == 0).astype(float) * (pt_result == 0).astype(float), axis=0) + label_pos_acc = 1.0*pt_pos/gt_pos + label_neg_acc = 1.0*pt_neg/gt_neg + label_acc = (label_pos_acc + label_neg_acc)/2 + result['label_pos_acc'] = label_pos_acc + result['label_neg_acc'] = label_neg_acc + result['label_acc'] = label_acc + # compute the instance-based accuracy + # precision + gt_pos = np.sum((gt_result == 1).astype(float), axis=1) + pt_pos = np.sum((pt_result == 1).astype(float), axis=1) + floatersect_pos = np.sum((gt_result == 1).astype(float)*(pt_result == 1).astype(float), axis=1) + union_pos = np.sum(((gt_result == 1)+(pt_result == 1)).astype(float),axis=1) + # avoid empty label in predicted results + cnt_eff = float(gt_result.shape[0]) + for iter, key in enumerate(gt_pos): + if key == 0: + union_pos[iter] = 1 + pt_pos[iter] = 1 + gt_pos[iter] = 1 + cnt_eff = cnt_eff - 1 + continue + if pt_pos[iter] == 0: + pt_pos[iter] = 1 + instance_acc = np.sum(floatersect_pos/union_pos)/cnt_eff + instance_precision = np.sum(floatersect_pos/pt_pos)/cnt_eff + instance_recall = np.sum(floatersect_pos/gt_pos)/cnt_eff + floatance_F1 = 2*instance_precision*instance_recall/(instance_precision+instance_recall) + result['instance_acc'] = instance_acc + result['instance_precision'] = instance_precision + result['instance_recall'] = instance_recall + result['instance_F1'] = floatance_F1 + return result + + +def postprocess_deepmar(npu_result, label_file): + with open(label_file, 'r') as json_file: + image_label = json.load(json_file) + bin_files = os.listdir(npu_result) + counts = len(bin_files) + gt_result = np.zeros((counts, 35)) + pt_result = np.zeros((counts, 35)) + + for index, bin_file in enumerate(bin_files): + result = np.fromfile(os.path.join(npu_result, bin_file), dtype='float32').reshape((35, )) + result[result >= 0] = 1 + result[result < 0] = 0 + pt_result[index, :] = result + + # label + key = bin_file[:bin_file.rfind('_')] + '.png' + label = image_label[key] + label = np.array(label, dtype=np.uint8) + gt_result[index, :] = label + + result = attribute_evaluate_lidw(gt_result, pt_result) + print('-'*80) + print('instance_acc: {}'.format(result['instance_acc'])) + print('instance_precision: {}'.format(result['instance_precision'])) + print('instance_recall: {}'.format(result['instance_recall'])) + print('instance_F1: {}'.format(result['instance_F1'])) + print('-' * 80) + + +if __name__ == "__main__": + npu_result = os.path.abspath(sys.argv[1]) + label_file = os.path.abspath(sys.argv[2]) + postprocess_deepmar(npu_result, label_file) + diff --git a/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/preprocess_deepmar_pytorch.py b/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/preprocess_deepmar_pytorch.py index 76f285439f7b744b04abb51e968696f4b9ed811a..1a9fd52a6028d063aa62556f6b4f005f24a9e367 100644 --- a/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/preprocess_deepmar_pytorch.py +++ b/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/preprocess_deepmar_pytorch.py @@ -1,73 +1,73 @@ -import os -import sys -import numpy as np -from PIL import Image - - -def resize(img, size, interpolation=Image.BILINEAR): - r"""Resize the input PIL Image to the given size. - - Args: - img (PIL Image): Image to be resized. - size (sequence or int): Desired output size. If size is a sequence like - (h, w), the output size will be matched to this. If size is an int, - the smaller edge of the image will be matched to this number maintaining - the aspect ratio. i.e, if height > width, then image will be rescaled to - :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)` - interpolation (int, optional): Desired interpolation. Default is - ``PIL.Image.BILINEAR`` - - Returns: - PIL Image: Resized image. - """ - - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def deepmar_onnx(file_path, bin_path, image_info): - if not os.path.exists(bin_path): - os.makedirs(bin_path) - i = 0 - in_files = open(image_info, 'r').read().split('\n')[:-1] - input_size = (224, 224) - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - for file in in_files: - i = i + 1 - print(file, "====", i) - img = Image.open(os.path.join(file_path, file)).convert('RGB') - img = resize(img, input_size) - - img = np.array(img, dtype=np.float32) - img = img / 255. - - # 均值方差 - img[..., 0] -= mean[0] - img[..., 1] -= mean[1] - img[..., 2] -= mean[2] - img[..., 0] /= std[0] - img[..., 1] /= std[1] - img[..., 2] /= std[2] - - img = img.transpose(2, 0, 1) # HWC -> CHW - img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) - - -if __name__ == "__main__": - file_path = os.path.abspath(sys.argv[1]) - bin_path = os.path.abspath(sys.argv[2]) - image_info = sys.argv[3] - deepmar_onnx(file_path, bin_path, image_info) +import os +import sys +import numpy as np +from PIL import Image + + +def resize(img, size, interpolation=Image.BILINEAR): + r"""Resize the input PIL Image to the given size. + + Args: + img (PIL Image): Image to be resized. + size (sequence or int): Desired output size. If size is a sequence like + (h, w), the output size will be matched to this. If size is an int, + the smaller edge of the image will be matched to this number maintaining + the aspect ratio. i.e, if height > width, then image will be rescaled to + :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)` + interpolation (int, optional): Desired interpolation. Default is + ``PIL.Image.BILINEAR`` + + Returns: + PIL Image: Resized image. + """ + + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def deepmar_onnx(file_path, bin_path, image_info): + if not os.path.exists(bin_path): + os.makedirs(bin_path) + i = 0 + in_files = open(image_info, 'r').read().split('\n')[:-1] + input_size = (224, 224) + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + for file in in_files: + i = i + 1 + print(file, "====", i) + img = Image.open(os.path.join(file_path, file)).convert('RGB') + img = resize(img, input_size) + + img = np.array(img, dtype=np.float32) + img = img / 255. + + # 均值方差 + img[..., 0] -= mean[0] + img[..., 1] -= mean[1] + img[..., 2] -= mean[2] + img[..., 0] /= std[0] + img[..., 1] /= std[1] + img[..., 2] /= std[2] + + img = img.transpose(2, 0, 1) # HWC -> CHW + img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) + + +if __name__ == "__main__": + file_path = os.path.abspath(sys.argv[1]) + bin_path = os.path.abspath(sys.argv[2]) + image_info = sys.argv[3] + deepmar_onnx(file_path, bin_path, image_info) diff --git a/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/remove_pad.py b/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/remove_pad.py index 572c41f3f516645453cc2d2f4bf654beecd9260d..70e8ccfb7ea1b567ea0e84d0781383287e304f80 100644 --- a/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/remove_pad.py +++ b/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/remove_pad.py @@ -1,19 +1,19 @@ -import onnx - -model = onnx.load("Deepmar.onnx") - -model.graph.node[174].input[0] = '492' - -node_list = ["Pad_173",'Constant_172'] -max_idx = len(model.graph.node) -rm_cnt = 0 -for i in range(len(model.graph.node)): - if i < max_idx: - n = model.graph.node[i - rm_cnt] - if n.name in node_list: - print("remove {} total {}".format(n.name, len(model.graph.node))) - model.graph.node.remove(n) - max_idx -= 1 - rm_cnt += 1 -onnx.checker.check_model(model) +import onnx + +model = onnx.load("Deepmar.onnx") + +model.graph.node[174].input[0] = '492' + +node_list = ["Pad_173",'Constant_172'] +max_idx = len(model.graph.node) +rm_cnt = 0 +for i in range(len(model.graph.node)): + if i < max_idx: + n = model.graph.node[i - rm_cnt] + if n.name in node_list: + print("remove {} total {}".format(n.name, len(model.graph.node))) + model.graph.node.remove(n) + max_idx -= 1 + rm_cnt += 1 +onnx.checker.check_model(model) onnx.save(model, "Deepmar_nopad.onnx") \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/split_test_data.py b/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/split_test_data.py index 955f4722d39dc267b8e20b398a952c12fe43cda7..19a81059a630778cfa47cc5e10395d9443e88ef5 100644 --- a/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/split_test_data.py +++ b/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch/split_test_data.py @@ -1,25 +1,25 @@ -import pickle -import numpy as np -import json - - -image = [] -label = [] -dataset = pickle.load(open('./dataset/peta/peta_dataset.pkl', 'rb'), encoding='utf-8') -train_split = pickle.load(open('./dataset/peta/peta_partition.pkl', 'rb'), encoding='utf-8') - -for idx in train_split['test'][0]: - image.append(dataset['image'][idx]) - label_tmp = np.array(dataset['att'][idx])[dataset['selected_attribute']].tolist() - label.append(label_tmp) - -with open('image.txt', 'w') as f: - for name in image: - f.write(name) - f.write('\n') - -image_label = dict(zip(image, label)) -with open('label.json', 'w') as json_file: - json.dump(image_label, json_file) - - +import pickle +import numpy as np +import json + + +image = [] +label = [] +dataset = pickle.load(open('./dataset/peta/peta_dataset.pkl', 'rb'), encoding='utf-8') +train_split = pickle.load(open('./dataset/peta/peta_partition.pkl', 'rb'), encoding='utf-8') + +for idx in train_split['test'][0]: + image.append(dataset['image'][idx]) + label_tmp = np.array(dataset['att'][idx])[dataset['selected_attribute']].tolist() + label.append(label_tmp) + +with open('image.txt', 'w') as f: + for name in image: + f.write(name) + f.write('\n') + +image_label = dict(zip(image, label)) +with open('label.json', 'w') as json_file: + json.dump(image_label, json_file) + + diff --git a/ACL_PyTorch/built-in/cv/Deepsort_for_Pytorch/modelzoo_level.txt b/ACL_PyTorch/built-in/cv/Deepsort_for_Pytorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/ACL_PyTorch/built-in/cv/Deepsort_for_Pytorch/modelzoo_level.txt +++ b/ACL_PyTorch/built-in/cv/Deepsort_for_Pytorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/PytorchTransfer.py b/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/PytorchTransfer.py index 1c8e9d29c92f2d69001ba19ba7cd9d0fc0ff3508..c002102863d7a72985903ee80e328dcfd4664da5 100644 --- a/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/PytorchTransfer.py +++ b/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/PytorchTransfer.py @@ -1,46 +1,46 @@ -import sys -import os -import torch -import cv2 -from PIL import Image -import numpy as np -import torch.utils.data -import torchvision.transforms as transforms -from torch.autograd import Variable - - -def resnet50_onnx(input_path: str, output_path: str): - img = cv2.imread(input_path) - img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - pilimg = Image.fromarray(img) - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - val_transformer = transforms.Compose([ - transforms.Scale(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize - ]) - - img_tensor = val_transformer(pilimg) - img_tensor = torch.unsqueeze(img_tensor, dim=0).float() - img_tensor = Variable(img_tensor, requires_grad=False) - img_tensor.reshape(1, 3, 224, 224) - img_numpy = img_tensor.cpu().numpy() - - img_name = input_path.split('/')[-1] - bin_name = img_name.split('.')[0] + ".bin" - output_fl = os.path.join(output_path, bin_name) - # save img_tensor as binary file for om inference input - img_numpy.tofile(output_fl) - -if __name__ == "__main__": - input_img_dir = sys.argv[1] - output_img_dir = sys.argv[2] - images = os.listdir(input_img_dir) - for image_name in images: - if not image_name.endswith(".jpeg"): - continue - print("start to process image {}....".format(image_name)) - path_image = os.path.join(input_img_dir, image_name) +import sys +import os +import torch +import cv2 +from PIL import Image +import numpy as np +import torch.utils.data +import torchvision.transforms as transforms +from torch.autograd import Variable + + +def resnet50_onnx(input_path: str, output_path: str): + img = cv2.imread(input_path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + pilimg = Image.fromarray(img) + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + val_transformer = transforms.Compose([ + transforms.Scale(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize + ]) + + img_tensor = val_transformer(pilimg) + img_tensor = torch.unsqueeze(img_tensor, dim=0).float() + img_tensor = Variable(img_tensor, requires_grad=False) + img_tensor.reshape(1, 3, 224, 224) + img_numpy = img_tensor.cpu().numpy() + + img_name = input_path.split('/')[-1] + bin_name = img_name.split('.')[0] + ".bin" + output_fl = os.path.join(output_path, bin_name) + # save img_tensor as binary file for om inference input + img_numpy.tofile(output_fl) + +if __name__ == "__main__": + input_img_dir = sys.argv[1] + output_img_dir = sys.argv[2] + images = os.listdir(input_img_dir) + for image_name in images: + if not image_name.endswith(".jpeg"): + continue + print("start to process image {}....".format(image_name)) + path_image = os.path.join(input_img_dir, image_name) resnet50_onnx(path_image, output_img_dir) \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/ReadMe.md b/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/ReadMe.md index 740e2ce6aea460a99c62dd0cb16fc6c28ee929bf..0e9f686545e1d92bb84cbb28d42ff7e1d2d310e8 100644 --- a/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/ReadMe.md +++ b/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/ReadMe.md @@ -1,48 +1,48 @@ -文件作用说明: - -1.auto_tune.sh:模型转换脚本,集成了auto tune功能,可以手动关闭 - -2.pth2onnx.py:用于转换pth文件到onnx文件 - -3.pthtar2onnx.py:用于转换pth.tar文件到onnx文件 - -4.BinaryImageNet.info:ImageNet数据集信息,用于benchmark推理获取数据集 - -5.PytorchTransfer.py:数据集预处理脚本,通过均值方差处理归一化图片 - -6.val_label.txt:ImageNet数据集标签,用于验证推理结果 - -7.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy - -8.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer - - - - - -推理端到端步骤: - -(1) 从Torchvision下载resnet50模型或者指定自己训练好的pth文件路径,通过pth2onnx.py脚本转化为onnx模型 - - - -(2)运行auto_tune.sh脚本转换om模型,也可以选择手动关闭auto_tune - -本demo已提供调优完成的om模型 - - - -(3)用PytorchTransfer.py脚本处理数据集,参考BinaryImageNet.Info配置处理后的二进制数据集路径 - - - -(4)./benchmark.x86_64 -model_type=vision -batch_size=16 -device_id=0 -input_text_path=./BinaryImageNet.info -input_width=224 -input_height=224 -om_path=./resnet50_pytorch.om -useDvpp=False - -运行benchmark推理,结果保存在 ./result 目录下 - - - -(5)python3.7 vision_metric_ImageNet.py result/dumpOutput/ ./val_label.txt ./ result.json - -验证推理结果 - +文件作用说明: + +1.auto_tune.sh:模型转换脚本,集成了auto tune功能,可以手动关闭 + +2.pth2onnx.py:用于转换pth文件到onnx文件 + +3.pthtar2onnx.py:用于转换pth.tar文件到onnx文件 + +4.BinaryImageNet.info:ImageNet数据集信息,用于benchmark推理获取数据集 + +5.PytorchTransfer.py:数据集预处理脚本,通过均值方差处理归一化图片 + +6.val_label.txt:ImageNet数据集标签,用于验证推理结果 + +7.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy + +8.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer + + + + + +推理端到端步骤: + +(1) 从Torchvision下载resnet50模型或者指定自己训练好的pth文件路径,通过pth2onnx.py脚本转化为onnx模型 + + + +(2)运行auto_tune.sh脚本转换om模型,也可以选择手动关闭auto_tune + +本demo已提供调优完成的om模型 + + + +(3)用PytorchTransfer.py脚本处理数据集,参考BinaryImageNet.Info配置处理后的二进制数据集路径 + + + +(4)./benchmark.x86_64 -model_type=vision -batch_size=16 -device_id=0 -input_text_path=./BinaryImageNet.info -input_width=224 -input_height=224 -om_path=./resnet50_pytorch.om -useDvpp=False + +运行benchmark推理,结果保存在 ./result 目录下 + + + +(5)python3.7 vision_metric_ImageNet.py result/dumpOutput/ ./val_label.txt ./ result.json + +验证推理结果 + diff --git a/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/pthtar2onnx.py b/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/pthtar2onnx.py index 0784264fc9e869584f24cb1c0fa621d48d5ab5a2..10fd93f53f9bf3d6ce92e6aaf0a88ce8cfa29ed3 100644 --- a/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/pthtar2onnx.py +++ b/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/pthtar2onnx.py @@ -1,36 +1,36 @@ -import torch -#from efficientnet_pytorch import EfficientNet -from NPU.efficientnet_pytorch import EfficientNet -import torch.onnx - -from collections import OrderedDict - -def proc_nodes_module(checkpoint,AttrName): - new_state_dict = OrderedDict() - for k,v in checkpoint[AttrName].items(): - if(k[0:7] == "module."): - name = k[7:] - else: - name = k[0:] - - new_state_dict[name]=v - return new_state_dict - -def convert(): - checkpoint = torch.load("./checkpoint.pth.140.ok.cpu", map_location='cpu') - checkpoint['state_dict'] = proc_nodes_module(checkpoint,'state_dict') - model = EfficientNet.from_name('efficientnet-b0') - model.set_swish(memory_efficient=False) - model.load_state_dict(checkpoint['state_dict']) - model.eval() - #print(model) - - input_names = ["actual_input_1"] - output_names = ["output1"] - dummy_input = torch.randn(1, 3, 224, 224) - #dynamic_axes = {'actual_input_1': {0: '-1'}, 'output1': {0: '-1'}} - torch.onnx.export(model, dummy_input, "efficientnet_tr.onnx", input_names = input_names, output_names = output_names, opset_version=11) - #torch.onnx.export(model, dummy_input, "efficientnet_dynamic.onnx", input_names = input_names, output_names = output_names, dynamic_axes = dynamic_axes, opset_version=11) - -if __name__ == "__main__": - convert() +import torch +#from efficientnet_pytorch import EfficientNet +from NPU.efficientnet_pytorch import EfficientNet +import torch.onnx + +from collections import OrderedDict + +def proc_nodes_module(checkpoint,AttrName): + new_state_dict = OrderedDict() + for k,v in checkpoint[AttrName].items(): + if(k[0:7] == "module."): + name = k[7:] + else: + name = k[0:] + + new_state_dict[name]=v + return new_state_dict + +def convert(): + checkpoint = torch.load("./checkpoint.pth.140.ok.cpu", map_location='cpu') + checkpoint['state_dict'] = proc_nodes_module(checkpoint,'state_dict') + model = EfficientNet.from_name('efficientnet-b0') + model.set_swish(memory_efficient=False) + model.load_state_dict(checkpoint['state_dict']) + model.eval() + #print(model) + + input_names = ["actual_input_1"] + output_names = ["output1"] + dummy_input = torch.randn(1, 3, 224, 224) + #dynamic_axes = {'actual_input_1': {0: '-1'}, 'output1': {0: '-1'}} + torch.onnx.export(model, dummy_input, "efficientnet_tr.onnx", input_names = input_names, output_names = output_names, opset_version=11) + #torch.onnx.export(model, dummy_input, "efficientnet_dynamic.onnx", input_names = input_names, output_names = output_names, dynamic_axes = dynamic_axes, opset_version=11) + +if __name__ == "__main__": + convert() diff --git a/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/vision_metric_ImageNet.py b/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/vision_metric_ImageNet.py index 7bba5f8346a8893b4567d92b900fc4a651115976..f07c93617e661619c0e00be6a972e39fbde966a0 100644 --- a/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/vision_metric_ImageNet.py +++ b/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch/vision_metric_ImageNet.py @@ -1,173 +1,173 @@ -#coding = utf-8 -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = "" - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - #print(filepath) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - #print(img_gt_dict) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - # print(img_name) - #print(n_labels) - # print(gt) - - resCnt = min(len(sort_index), topn) - # print(sort_index[:5]) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - #print("***************") - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - #print("Top" + str(i + 1) + " accuracy" + ": " + str(round(accuracy[i] * 100, 2)) + '%') - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - #print("Time used:", elapsed) +#coding = utf-8 +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = "" + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + #print(filepath) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + #print(img_gt_dict) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + # print(img_name) + #print(n_labels) + # print(gt) + + resCnt = min(len(sort_index), topn) + # print(sort_index[:5]) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + #print("***************") + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + #print("Top" + str(i + 1) + " accuracy" + ": " + str(round(accuracy[i] * 100, 2)) + '%') + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + #print("Time used:", elapsed) diff --git a/ACL_PyTorch/built-in/cv/Flownet2_for_Pytorch/LICENSE b/ACL_PyTorch/built-in/cv/Flownet2_for_Pytorch/LICENSE index 8904c8516082056802ee732a4213ceab8c4a93af..5f7aa69fea22dade3f519868400025de434ae8ca 100644 --- a/ACL_PyTorch/built-in/cv/Flownet2_for_Pytorch/LICENSE +++ b/ACL_PyTorch/built-in/cv/Flownet2_for_Pytorch/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/Flownet2_for_Pytorch/flownet2.patch b/ACL_PyTorch/built-in/cv/Flownet2_for_Pytorch/flownet2.patch index be8fdbe2c67ba45ee9c119295e6d17457e88b3ea..4b175db7dab2aa172a433038939af6d8c190c628 100644 --- a/ACL_PyTorch/built-in/cv/Flownet2_for_Pytorch/flownet2.patch +++ b/ACL_PyTorch/built-in/cv/Flownet2_for_Pytorch/flownet2.patch @@ -3,58 +3,58 @@ index 8457d2f..34380a6 100755 --- a/models.py +++ b/models.py @@ -6,8 +6,10 @@ import math - import numpy as np - - try: -- from networks.resample2d_package.resample2d import Resample2d -- from networks.channelnorm_package.channelnorm import ChannelNorm -+ # from networks.resample2d_package.resample2d import Resample2d -+ # from networks.channelnorm_package.channelnorm import ChannelNorm -+ from networks.resample2d_onnx import Resample2d -+ from networks.channelnorm_onnx import ChannelNorm - - from networks import FlowNetC - from networks import FlowNetS + import numpy as np + + try: +- from networks.resample2d_package.resample2d import Resample2d +- from networks.channelnorm_package.channelnorm import ChannelNorm ++ # from networks.resample2d_package.resample2d import Resample2d ++ # from networks.channelnorm_package.channelnorm import ChannelNorm ++ from networks.resample2d_onnx import Resample2d ++ from networks.channelnorm_onnx import ChannelNorm + + from networks import FlowNetC + from networks import FlowNetS @@ -16,8 +18,10 @@ try: - - from networks.submodules import * - except: -- from .networks.resample2d_package.resample2d import Resample2d -- from .networks.channelnorm_package.channelnorm import ChannelNorm -+ # from .networks.resample2d_package.resample2d import Resample2d -+ # from .networks.channelnorm_package.channelnorm import ChannelNorm -+ from .networks.resample2d_onnx import Resample2d -+ from .networks.channelnorm_onnx import ChannelNorm - - from .networks import FlowNetC - from .networks import FlowNetS + + from networks.submodules import * + except: +- from .networks.resample2d_package.resample2d import Resample2d +- from .networks.channelnorm_package.channelnorm import ChannelNorm ++ # from .networks.resample2d_package.resample2d import Resample2d ++ # from .networks.channelnorm_package.channelnorm import ChannelNorm ++ from .networks.resample2d_onnx import Resample2d ++ from .networks.channelnorm_onnx import ChannelNorm + + from .networks import FlowNetC + from .networks import FlowNetS @@ -33,7 +37,7 @@ class FlowNet2(nn.Module): - super(FlowNet2,self).__init__() - self.batchNorm = batchNorm - self.div_flow = div_flow -- self.rgb_max = args.rgb_max -+ # self.rgb_max = args.rgb_max - self.args = args - - self.channelnorm = ChannelNorm() + super(FlowNet2,self).__init__() + self.batchNorm = batchNorm + self.div_flow = div_flow +- self.rgb_max = args.rgb_max ++ # self.rgb_max = args.rgb_max + self.args = args + + self.channelnorm = ChannelNorm() @@ -117,12 +121,12 @@ class FlowNet2(nn.Module): - weight.data[i,i,:,:] = torch.from_numpy(bilinear) - return - -- def forward(self, inputs): -- rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,)) -+ def forward(self, x1, x2): -+ # rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,)) - -- x = (inputs - rgb_mean) / self.rgb_max -- x1 = x[:,:,0,:,:] -- x2 = x[:,:,1,:,:] -+ # x = (inputs - rgb_mean) / self.rgb_max -+ # x1 = x[:,:,0,:,:] -+ # x2 = x[:,:,1,:,:] - x = torch.cat((x1,x2), dim = 1) - - # flownetc + weight.data[i,i,:,:] = torch.from_numpy(bilinear) + return + +- def forward(self, inputs): +- rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,)) ++ def forward(self, x1, x2): ++ # rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,)) + +- x = (inputs - rgb_mean) / self.rgb_max +- x1 = x[:,:,0,:,:] +- x2 = x[:,:,1,:,:] ++ # x = (inputs - rgb_mean) / self.rgb_max ++ # x1 = x[:,:,0,:,:] ++ # x2 = x[:,:,1,:,:] + x = torch.cat((x1,x2), dim = 1) + + # flownetc diff --git a/networks/FlowNetC.py b/networks/FlowNetC.py index 61e117a..32e130d 100755 --- a/networks/FlowNetC.py diff --git a/ACL_PyTorch/built-in/cv/GoogleNet_for_Pytorch/ReadMe.md b/ACL_PyTorch/built-in/cv/GoogleNet_for_Pytorch/ReadMe.md index 7e082d9620853e4373b2381a488999551ec324bd..bec682de971ac4ebc7fd220ad28dcd3fa9cdc0df 100644 --- a/ACL_PyTorch/built-in/cv/GoogleNet_for_Pytorch/ReadMe.md +++ b/ACL_PyTorch/built-in/cv/GoogleNet_for_Pytorch/ReadMe.md @@ -1,63 +1,63 @@ -文件作用说明: - -1.googlenet_pth2onnx.py:用于转换pth模型文件到onnx模型文件 - -2.googlenet_atc.sh:onnx模型转换om模型脚本 - -3.preprocess_googlenet_pth.py:数据集预处理脚本,通过均值方差处理归一化图片,生成图片二进制文件 - -4.aipp_googlenet_pth.config:数据集aipp预处理配置文件 - -5.get_info.py:生成推理输入的数据集二进制info文件或jpg info文件 - -6.googlenet_val.info:ImageNet验证集二进制info文件,用于benchmark推理获取数据集 - -7.ImageNet.info:ImageNet验证集jpg info文件,用于benchmark推理获取数据集 - -8.val_label.txt:ImageNet数据集标签,用于验证推理结果 - -9.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer - -10.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy - - - - - -推理端到端步骤: - -(1) 从Torchvision下载googlenet模型,通过googlenet_pth2onnx.py脚本转化为onnx模型 - - - -(2)运行googlenet_atc.sh脚本转换om模型 - -本demo已提供调优完成的om模型 - - - -(3)用preprocess_googlenet_pth.py脚本处理数据集,参考googlenet_val.info配置处理后的二进制数据集路径。或者配置数据集aipp预处理文件aipp_googlenet_pth.config。 - python3 preprocess_googlenet_pth.py dataset/ImageNet/val_union/ pre_bin - - - -(4)生成推理输入的数据集二进制info文件或jpg info文件 - python3 get_info.py bin pre_bin googlenet_val.info 224 224 - python3 get_info.py jpg dataset/ImageNet/val_union ImageNet.info - - - -(5)使用benchmark离线推理 - ./benchmark -model_type=vision -om_path=googlenet_bs16.om -device_id=0 -batch_size=16 -input_text_path=googlenet_val.info -input_width=224 -input_height=224 -useDvpp=false - 或者 - ./benchmark -model_type=vision -om_path=googlenet_bs1.om -device_id=0 -batch_size=1 -input_text_path=ImageNet.info -input_width=256 -input_height=256 -useDvpp=true - - -运行benchmark推理,结果保存在 ./result 目录下 - - - -(6)python3.7 vision_metric_ImageNet.py result/dumpOutput_device0/ ./val_label.txt ./ result.json - -验证推理结果 - +文件作用说明: + +1.googlenet_pth2onnx.py:用于转换pth模型文件到onnx模型文件 + +2.googlenet_atc.sh:onnx模型转换om模型脚本 + +3.preprocess_googlenet_pth.py:数据集预处理脚本,通过均值方差处理归一化图片,生成图片二进制文件 + +4.aipp_googlenet_pth.config:数据集aipp预处理配置文件 + +5.get_info.py:生成推理输入的数据集二进制info文件或jpg info文件 + +6.googlenet_val.info:ImageNet验证集二进制info文件,用于benchmark推理获取数据集 + +7.ImageNet.info:ImageNet验证集jpg info文件,用于benchmark推理获取数据集 + +8.val_label.txt:ImageNet数据集标签,用于验证推理结果 + +9.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer + +10.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy + + + + + +推理端到端步骤: + +(1) 从Torchvision下载googlenet模型,通过googlenet_pth2onnx.py脚本转化为onnx模型 + + + +(2)运行googlenet_atc.sh脚本转换om模型 + +本demo已提供调优完成的om模型 + + + +(3)用preprocess_googlenet_pth.py脚本处理数据集,参考googlenet_val.info配置处理后的二进制数据集路径。或者配置数据集aipp预处理文件aipp_googlenet_pth.config。 + python3 preprocess_googlenet_pth.py dataset/ImageNet/val_union/ pre_bin + + + +(4)生成推理输入的数据集二进制info文件或jpg info文件 + python3 get_info.py bin pre_bin googlenet_val.info 224 224 + python3 get_info.py jpg dataset/ImageNet/val_union ImageNet.info + + + +(5)使用benchmark离线推理 + ./benchmark -model_type=vision -om_path=googlenet_bs16.om -device_id=0 -batch_size=16 -input_text_path=googlenet_val.info -input_width=224 -input_height=224 -useDvpp=false + 或者 + ./benchmark -model_type=vision -om_path=googlenet_bs1.om -device_id=0 -batch_size=1 -input_text_path=ImageNet.info -input_width=256 -input_height=256 -useDvpp=true + + +运行benchmark推理,结果保存在 ./result 目录下 + + + +(6)python3.7 vision_metric_ImageNet.py result/dumpOutput_device0/ ./val_label.txt ./ result.json + +验证推理结果 + diff --git a/ACL_PyTorch/built-in/cv/GoogleNet_for_Pytorch/get_info.py b/ACL_PyTorch/built-in/cv/GoogleNet_for_Pytorch/get_info.py index 0578e4f00cd9661c6dcfa7db7b72f196677ff422..7b14c54b909b60730e9e3471ee0435ee4cb8622f 100644 --- a/ACL_PyTorch/built-in/cv/GoogleNet_for_Pytorch/get_info.py +++ b/ACL_PyTorch/built-in/cv/GoogleNet_for_Pytorch/get_info.py @@ -1,46 +1,46 @@ -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/GoogleNet_for_Pytorch/preprocess_googlenet_pth.py b/ACL_PyTorch/built-in/cv/GoogleNet_for_Pytorch/preprocess_googlenet_pth.py index 04f41f75979bcbc2ca8c939b37b29d8a59f8d22a..965681f30d9a5413948b7fb1cdd158d1ac2e5e22 100644 --- a/ACL_PyTorch/built-in/cv/GoogleNet_for_Pytorch/preprocess_googlenet_pth.py +++ b/ACL_PyTorch/built-in/cv/GoogleNet_for_Pytorch/preprocess_googlenet_pth.py @@ -1,85 +1,85 @@ -import os -import sys -import numpy as np -from PIL import Image - - -def resize(img, size, interpolation=Image.BILINEAR): - r"""Resize the input PIL Image to the given size. - - Args: - img (PIL Image): Image to be resized. - size (sequence or int): Desired output size. If size is a sequence like - (h, w), the output size will be matched to this. If size is an int, - the smaller edge of the image will be matched to this number maintaining - the aspect ratio. i.e, if height > width, then image will be rescaled to - :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)` - interpolation (int, optional): Desired interpolation. Default is - ``PIL.Image.BILINEAR`` - - Returns: - PIL Image: Resized image. - """ - - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def center_crop(img, out_height, out_width): - height, width, _ = img.shape - left = int((width - out_width) / 2) - right = int((width + out_width) / 2) - top = int((height - out_height) / 2) - bottom = int((height + out_height) / 2) - img = img[top:bottom, left:right] - return img - - -def preprocess(file_path, bin_path): - in_files = os.listdir(file_path) - if not os.path.exists(bin_path): - os.makedirs(bin_path) - i = 0 - - resize_size = 256 - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - - for file in in_files: - i = i + 1 - print(file, "===", i) - - img = Image.open(os.path.join(file_path, file)).convert('RGB') - - img = resize(img, resize_size) # transforms.Resize(256) - img = np.array(img, dtype=np.float32) - img = center_crop(img, 224, 224) # transforms.CenterCrop(224) - img = img / 255. # transforms.ToTensor() - # 均值方差 - img[..., 0] -= mean[0] - img[..., 1] -= mean[1] - img[..., 2] -= mean[2] - img[..., 0] /= std[0] - img[..., 1] /= std[1] - img[..., 2] /= std[2] - img = img.transpose(2, 0, 1) # HWC -> CHW - - img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) - - -if __name__ == "__main__": - file_path = os.path.abspath(sys.argv[1]) - bin_path = os.path.abspath(sys.argv[2]) - preprocess(file_path, bin_path) +import os +import sys +import numpy as np +from PIL import Image + + +def resize(img, size, interpolation=Image.BILINEAR): + r"""Resize the input PIL Image to the given size. + + Args: + img (PIL Image): Image to be resized. + size (sequence or int): Desired output size. If size is a sequence like + (h, w), the output size will be matched to this. If size is an int, + the smaller edge of the image will be matched to this number maintaining + the aspect ratio. i.e, if height > width, then image will be rescaled to + :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)` + interpolation (int, optional): Desired interpolation. Default is + ``PIL.Image.BILINEAR`` + + Returns: + PIL Image: Resized image. + """ + + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def center_crop(img, out_height, out_width): + height, width, _ = img.shape + left = int((width - out_width) / 2) + right = int((width + out_width) / 2) + top = int((height - out_height) / 2) + bottom = int((height + out_height) / 2) + img = img[top:bottom, left:right] + return img + + +def preprocess(file_path, bin_path): + in_files = os.listdir(file_path) + if not os.path.exists(bin_path): + os.makedirs(bin_path) + i = 0 + + resize_size = 256 + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + + for file in in_files: + i = i + 1 + print(file, "===", i) + + img = Image.open(os.path.join(file_path, file)).convert('RGB') + + img = resize(img, resize_size) # transforms.Resize(256) + img = np.array(img, dtype=np.float32) + img = center_crop(img, 224, 224) # transforms.CenterCrop(224) + img = img / 255. # transforms.ToTensor() + # 均值方差 + img[..., 0] -= mean[0] + img[..., 1] -= mean[1] + img[..., 2] -= mean[2] + img[..., 0] /= std[0] + img[..., 1] /= std[1] + img[..., 2] /= std[2] + img = img.transpose(2, 0, 1) # HWC -> CHW + + img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) + + +if __name__ == "__main__": + file_path = os.path.abspath(sys.argv[1]) + bin_path = os.path.abspath(sys.argv[2]) + preprocess(file_path, bin_path) diff --git a/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/ReadMe.md b/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/ReadMe.md index ec24e9ba99349208babf44d30998a10b12777715..ffd9dfc52a03b17b8a7c92201304a020183cda1a 100644 --- a/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/ReadMe.md +++ b/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/ReadMe.md @@ -1,95 +1,95 @@ -文件作用说明: - -1. inceptionv3_pth2onnx.py:用于转换pth模型文件到onnx模型文件 -2. gen_calibration_bin.py:生成bin格式数据集脚本,数据集用于量化校准 -3. inceptionv3_atc.sh:onnx模型转换om模型脚本 -4. imagenet_torch_preprocess.py:数据集预处理脚本,对图片进行缩放裁剪,生成图片二进制文件 -5. aipp_inceptionv3_pth.config:数据集aipp预处理配置文件 -6. gen_dataset_info.py:生成推理输入的数据集二进制info文件 -7. env.sh:环境变量文件 -8. benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer -9. vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy - - - -推理端到端步骤: - -1. 从Torchvision下载inceptionv3模型,通过inceptionv3_pth2onnx.py脚本转化为onnx模型 - -2. ONNX模型量化 - - 1. AMCT工具包安装,具体参考《[CANN 开发辅助工具指南 01](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools)》中的昇腾模型压缩工具使用指南(ONNX)章节; - - 2. 生成bin格式数据集,数据集用于校正量化因子。当前模型为动态batch,建议使用较大的batch size: - - ``` - python3.7 gen_calibration_bin.py inceptionv3 /root/dataset/ImageNet/val_union ./calibration_bin 32 1 - ``` - - 参数说明: - - - inceptionv3:模型类型 - - /root/dataset/ImageNet/val_union :模型使用的数据集路径; - - ./calibration_bin:生成的bin格式数据集路径; - - 32:batch size; - - 1:batch num。 - - 3. ONNX模型量化 - - ``` - amct_onnx calibration --model inceptionv3.onnx --save_path ./result/inceptionv3 --input_shape "actual_input_1:32,3,299,299" --data_dir "./calibration_bin" --data_types "float32" - ``` - - 会在result目录下生成inceptionv3_deploy_model.onnx量化模型 - - 4. 量化模型后续的推理验证流程和非量化一致。 - -3. 运行inceptionv3_atc.sh脚本转换om模型 - -4. 用imagenet_torch_preprocess.py脚本处理数据集 - -``` -python3.7 imagenet_torch_preprocess.py inceptionv3 /root/dataset/ImageNet/val_union ./prep_dataset -``` - -5. 生成推理输入的数据集二进制info文件 - -``` -python3.7 gen_dataset_info.py bin ./prep_dataset ./inceptionv3_prep_bin.info 299 299 -``` - -6. 设置环境变量 - -``` -source env.sh -``` - -7. 使用benchmark离线推理 - -``` -./benchmark.x86_64 -model_type=vision -om_path=inceptionv3_bs8.om -device_id=0 -batch_size=8 -input_text_path=inceptionv3_prep_bin.info -input_width=299 -input_height=299 -output_binary=False -useDvpp=False -``` - -运行benchmark推理,结果保存在 ./result 目录下 - -8. 验证推理结果 - -``` -python3.7 vision_metric_ImageNet.py result/dumpOutput_device0/ /root/dataset/ImageNet/val_label.txt ./ result.json -``` - - - - -模型获取 - -可以使用如下命令获取PyTorch框架的原始模型和转换后的Onnx模型 - -Pytorch: -``` -wget https://modelzoo-train-atc.obs.cn-north-4.myhuaweicloud.com/003_Atc_Models/AE/ATC%20Model/InceptionV3/inception_v3.pth -``` -ONNX: -``` -wget https://modelzoo-train-atc.obs.cn-north-4.myhuaweicloud.com/003_Atc_Models/AE/ATC%20Model/InceptionV3/inceptionv3.onnx -``` +文件作用说明: + +1. inceptionv3_pth2onnx.py:用于转换pth模型文件到onnx模型文件 +2. gen_calibration_bin.py:生成bin格式数据集脚本,数据集用于量化校准 +3. inceptionv3_atc.sh:onnx模型转换om模型脚本 +4. imagenet_torch_preprocess.py:数据集预处理脚本,对图片进行缩放裁剪,生成图片二进制文件 +5. aipp_inceptionv3_pth.config:数据集aipp预处理配置文件 +6. gen_dataset_info.py:生成推理输入的数据集二进制info文件 +7. env.sh:环境变量文件 +8. benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer +9. vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy + + + +推理端到端步骤: + +1. 从Torchvision下载inceptionv3模型,通过inceptionv3_pth2onnx.py脚本转化为onnx模型 + +2. ONNX模型量化 + + 1. AMCT工具包安装,具体参考《[CANN 开发辅助工具指南 01](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools)》中的昇腾模型压缩工具使用指南(ONNX)章节; + + 2. 生成bin格式数据集,数据集用于校正量化因子。当前模型为动态batch,建议使用较大的batch size: + + ``` + python3.7 gen_calibration_bin.py inceptionv3 /root/dataset/ImageNet/val_union ./calibration_bin 32 1 + ``` + + 参数说明: + + - inceptionv3:模型类型 + - /root/dataset/ImageNet/val_union :模型使用的数据集路径; + - ./calibration_bin:生成的bin格式数据集路径; + - 32:batch size; + - 1:batch num。 + + 3. ONNX模型量化 + + ``` + amct_onnx calibration --model inceptionv3.onnx --save_path ./result/inceptionv3 --input_shape "actual_input_1:32,3,299,299" --data_dir "./calibration_bin" --data_types "float32" + ``` + + 会在result目录下生成inceptionv3_deploy_model.onnx量化模型 + + 4. 量化模型后续的推理验证流程和非量化一致。 + +3. 运行inceptionv3_atc.sh脚本转换om模型 + +4. 用imagenet_torch_preprocess.py脚本处理数据集 + +``` +python3.7 imagenet_torch_preprocess.py inceptionv3 /root/dataset/ImageNet/val_union ./prep_dataset +``` + +5. 生成推理输入的数据集二进制info文件 + +``` +python3.7 gen_dataset_info.py bin ./prep_dataset ./inceptionv3_prep_bin.info 299 299 +``` + +6. 设置环境变量 + +``` +source env.sh +``` + +7. 使用benchmark离线推理 + +``` +./benchmark.x86_64 -model_type=vision -om_path=inceptionv3_bs8.om -device_id=0 -batch_size=8 -input_text_path=inceptionv3_prep_bin.info -input_width=299 -input_height=299 -output_binary=False -useDvpp=False +``` + +运行benchmark推理,结果保存在 ./result 目录下 + +8. 验证推理结果 + +``` +python3.7 vision_metric_ImageNet.py result/dumpOutput_device0/ /root/dataset/ImageNet/val_label.txt ./ result.json +``` + + + + +模型获取 + +可以使用如下命令获取PyTorch框架的原始模型和转换后的Onnx模型 + +Pytorch: +``` +wget https://modelzoo-train-atc.obs.cn-north-4.myhuaweicloud.com/003_Atc_Models/AE/ATC%20Model/InceptionV3/inception_v3.pth +``` +ONNX: +``` +wget https://modelzoo-train-atc.obs.cn-north-4.myhuaweicloud.com/003_Atc_Models/AE/ATC%20Model/InceptionV3/inceptionv3.onnx +``` diff --git a/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/gen_dataset_info.py b/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/gen_dataset_info.py index 61450b4410663ae5e66ec29ed296ff6584203e31..5381839f653a885666e3fc456db9a1c22b8583a1 100644 --- a/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/gen_dataset_info.py +++ b/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/gen_dataset_info.py @@ -1,61 +1,61 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) + diff --git a/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/imagenet_torch_preprocess.py b/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/imagenet_torch_preprocess.py index ed4b4e2a5380e3ee608287596412963d92b9bb79..6f89d347b52642cc3a1fc8ba73dae2d4230e7a0c 100644 --- a/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/imagenet_torch_preprocess.py +++ b/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/imagenet_torch_preprocess.py @@ -1,113 +1,113 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from PIL import Image -import numpy as np -import multiprocessing - - -model_config = { - 'resnet': { - 'resize': 256, - 'centercrop': 224, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv3': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv4': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.5, 0.5, 0.5], - 'std': [0.5, 0.5, 0.5], - }, -} - - -def center_crop(img, output_size): - if isinstance(output_size, int): - output_size = (int(output_size), int(output_size)) - image_width, image_height = img.size - crop_height, crop_width = output_size - crop_top = int(round((image_height - crop_height) / 2.)) - crop_left = int(round((image_width - crop_width) / 2.)) - return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) - - -def resize(img, size, interpolation=Image.BILINEAR): - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def gen_input_bin(mode_type, file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - # RGBA to RGB - image = Image.open(os.path.join(src_path, file)).convert('RGB') - image = resize(image, model_config[mode_type]['resize']) # Resize - image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop - img = np.array(image, dtype=np.int8) - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - - -def preprocess(mode_type, src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") - mode_type = sys.argv[1] - src_path = sys.argv[2] - save_path = sys.argv[3] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - if mode_type not in model_config: - model_type_help = "model type: " - for key in model_config.keys(): - model_type_help += key - model_type_help += ' ' - raise Exception(model_type_help) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - preprocess(mode_type, src_path, save_path) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from PIL import Image +import numpy as np +import multiprocessing + + +model_config = { + 'resnet': { + 'resize': 256, + 'centercrop': 224, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv3': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv4': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + }, +} + + +def center_crop(img, output_size): + if isinstance(output_size, int): + output_size = (int(output_size), int(output_size)) + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) + + +def resize(img, size, interpolation=Image.BILINEAR): + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def gen_input_bin(mode_type, file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + # RGBA to RGB + image = Image.open(os.path.join(src_path, file)).convert('RGB') + image = resize(image, model_config[mode_type]['resize']) # Resize + image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop + img = np.array(image, dtype=np.int8) + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + + +def preprocess(mode_type, src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") + mode_type = sys.argv[1] + src_path = sys.argv[2] + save_path = sys.argv[3] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + if mode_type not in model_config: + model_type_help = "model type: " + for key in model_config.keys(): + model_type_help += key + model_type_help += ' ' + raise Exception(model_type_help) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + preprocess(mode_type, src_path, save_path) + diff --git a/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/requirements.txt b/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/requirements.txt index 112989ef5d8909e534ff5ee3b793acb47df14ee0..1845fd861663faaea2c8d5fc747bbb4b35552526 100644 --- a/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/requirements.txt +++ b/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch/requirements.txt @@ -1,5 +1,5 @@ -torch == 1.6.0 -torchvision == 0.7.0 -onnx == 1.7.0 -numpy == 1.18.5 -Pillow == 7.2.0 +torch == 1.6.0 +torchvision == 0.7.0 +onnx == 1.7.0 +numpy == 1.18.5 +Pillow == 7.2.0 diff --git a/ACL_PyTorch/built-in/cv/InceptionV4_for_Pytorch/ReadMe.md b/ACL_PyTorch/built-in/cv/InceptionV4_for_Pytorch/ReadMe.md index 6cab5096719f409f408e4f7f715e7ae88dcf363e..287a11d8a27e8dbdedad7b4c563f4333a1baa77a 100644 --- a/ACL_PyTorch/built-in/cv/InceptionV4_for_Pytorch/ReadMe.md +++ b/ACL_PyTorch/built-in/cv/InceptionV4_for_Pytorch/ReadMe.md @@ -1,63 +1,63 @@ -文件作用说明: - -1.inceptionv4_pth2onnx.py:用于转换pth模型文件到onnx模型文件 - -2.inceptionv4_atc.sh:onnx模型转换om模型脚本 - -3.preprocess_inceptionv4_pth.py:数据集预处理脚本,通过均值方差处理归一化图片,生成图片二进制文件 - -4.aipp_inceptionv4_pth.config:数据集aipp预处理配置文件 - -5.get_info.py:生成推理输入的数据集二进制info文件或jpg info文件 - -6.inceptionv4_val.info:ImageNet验证集二进制info文件,用于benchmark推理获取数据集 - -7.ImageNet.info:ImageNet验证集jpg info文件,用于benchmark推理获取数据集 - -8.val_label.txt:ImageNet数据集标签,用于验证推理结果 - -9.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer - -10.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy - - - - - -推理端到端步骤: - -(1) 从https://github.com/Cadene/pretrained-models.pytorch下载inceptionv4模型,通过inceptionv4_pth2onnx.py脚本转化为onnx模型 - 安装pretrainedmodels后需修改pretrained-models.pytorch/pretrainedmodels/models/inceptionv4.py:adaptiveAvgPoolWidth = features.shape[2].item() - - - -(2)运行inceptionv4_atc.sh脚本转换om模型 - -本demo已提供调优完成的om模型 - - - -(3)用preprocess_inceptionv4_pth.py脚本处理数据集,参考inceptionv4_val.info配置处理后的二进制数据集路径。或者配置数据集aipp预处理文件aipp_inceptionv4_pth.config。 - python3 preprocess_inceptionv4_pth.py dataset/ImageNet/val_union/ prep_bin - - - -(4)生成推理输入的数据集二进制info文件或jpg info文件 - python3 get_info.py bin prep_bin inceptionv4_val.info 299 299 - python3 get_info.py jpg dataset/ImageNet/val_union ImageNet.info - - - -(5)使用benchmark离线推理 - ./benchmark -model_type=vision -om_path=inceptionv4_bs16.om -device_id=0 -batch_size=16 -input_text_path=inceptionv4_val.info -input_width=299 -input_height=299 -useDvpp=false - 或者 - ./benchmark -model_type=vision -om_path=inceptionv4_bs1.om -device_id=0 -batch_size=1 -input_text_path=ImageNet.info -input_width=336 -input_height=336 -useDvpp=true - -运行benchmark推理,结果保存在 ./result 目录下 - - - -(6)python3.7 vision_metric_ImageNet.py result/dumpOutput_device0/ ./val_label.txt ./ result.json - -验证推理结果 - +文件作用说明: + +1.inceptionv4_pth2onnx.py:用于转换pth模型文件到onnx模型文件 + +2.inceptionv4_atc.sh:onnx模型转换om模型脚本 + +3.preprocess_inceptionv4_pth.py:数据集预处理脚本,通过均值方差处理归一化图片,生成图片二进制文件 + +4.aipp_inceptionv4_pth.config:数据集aipp预处理配置文件 + +5.get_info.py:生成推理输入的数据集二进制info文件或jpg info文件 + +6.inceptionv4_val.info:ImageNet验证集二进制info文件,用于benchmark推理获取数据集 + +7.ImageNet.info:ImageNet验证集jpg info文件,用于benchmark推理获取数据集 + +8.val_label.txt:ImageNet数据集标签,用于验证推理结果 + +9.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer + +10.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy + + + + + +推理端到端步骤: + +(1) 从https://github.com/Cadene/pretrained-models.pytorch下载inceptionv4模型,通过inceptionv4_pth2onnx.py脚本转化为onnx模型 + 安装pretrainedmodels后需修改pretrained-models.pytorch/pretrainedmodels/models/inceptionv4.py:adaptiveAvgPoolWidth = features.shape[2].item() + + + +(2)运行inceptionv4_atc.sh脚本转换om模型 + +本demo已提供调优完成的om模型 + + + +(3)用preprocess_inceptionv4_pth.py脚本处理数据集,参考inceptionv4_val.info配置处理后的二进制数据集路径。或者配置数据集aipp预处理文件aipp_inceptionv4_pth.config。 + python3 preprocess_inceptionv4_pth.py dataset/ImageNet/val_union/ prep_bin + + + +(4)生成推理输入的数据集二进制info文件或jpg info文件 + python3 get_info.py bin prep_bin inceptionv4_val.info 299 299 + python3 get_info.py jpg dataset/ImageNet/val_union ImageNet.info + + + +(5)使用benchmark离线推理 + ./benchmark -model_type=vision -om_path=inceptionv4_bs16.om -device_id=0 -batch_size=16 -input_text_path=inceptionv4_val.info -input_width=299 -input_height=299 -useDvpp=false + 或者 + ./benchmark -model_type=vision -om_path=inceptionv4_bs1.om -device_id=0 -batch_size=1 -input_text_path=ImageNet.info -input_width=336 -input_height=336 -useDvpp=true + +运行benchmark推理,结果保存在 ./result 目录下 + + + +(6)python3.7 vision_metric_ImageNet.py result/dumpOutput_device0/ ./val_label.txt ./ result.json + +验证推理结果 + diff --git a/ACL_PyTorch/built-in/cv/InceptionV4_for_Pytorch/get_info.py b/ACL_PyTorch/built-in/cv/InceptionV4_for_Pytorch/get_info.py index 0578e4f00cd9661c6dcfa7db7b72f196677ff422..7b14c54b909b60730e9e3471ee0435ee4cb8622f 100644 --- a/ACL_PyTorch/built-in/cv/InceptionV4_for_Pytorch/get_info.py +++ b/ACL_PyTorch/built-in/cv/InceptionV4_for_Pytorch/get_info.py @@ -1,46 +1,46 @@ -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/InceptionV4_for_Pytorch/preprocess_inceptionv4_pth.py b/ACL_PyTorch/built-in/cv/InceptionV4_for_Pytorch/preprocess_inceptionv4_pth.py index bf9f930b05be0bba0482fc85e8f72b84db205d32..591fdb1db16ba4e37b813afc1b1242b932dd8b10 100644 --- a/ACL_PyTorch/built-in/cv/InceptionV4_for_Pytorch/preprocess_inceptionv4_pth.py +++ b/ACL_PyTorch/built-in/cv/InceptionV4_for_Pytorch/preprocess_inceptionv4_pth.py @@ -1,85 +1,85 @@ -import os -import sys -import numpy as np -from PIL import Image - - -def resize(img, size, interpolation=Image.BILINEAR): - r"""Resize the input PIL Image to the given size. - - Args: - img (PIL Image): Image to be resized. - size (sequence or int): Desired output size. If size is a sequence like - (h, w), the output size will be matched to this. If size is an int, - the smaller edge of the image will be matched to this number maintaining - the aspect ratio. i.e, if height > width, then image will be rescaled to - :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)` - interpolation (int, optional): Desired interpolation. Default is - ``PIL.Image.BILINEAR`` - - Returns: - PIL Image: Resized image. - """ - - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def center_crop(img, out_height, out_width): - height, width, _ = img.shape - left = int((width - out_width) / 2) - right = int((width + out_width) / 2) - top = int((height - out_height) / 2) - bottom = int((height + out_height) / 2) - img = img[top:bottom, left:right] - return img - - -def preprocess(file_path, bin_path): - in_files = os.listdir(file_path) - if not os.path.exists(bin_path): - os.makedirs(bin_path) - i = 0 - - resize_size = 342 - mean = [0.5, 0.5, 0.5] - std = [0.5, 0.5, 0.5] - - for file in in_files: - i = i + 1 - print(file, "===", i) - - img = Image.open(os.path.join(file_path, file)).convert('RGB') - - img = resize(img, resize_size) # transforms.Resize(342) - img = np.array(img, dtype=np.float32) - img = center_crop(img, 299, 299) # transforms.CenterCrop(299) - img = img / 255. # transforms.ToTensor() - # 均值方差 - img[..., 0] -= mean[0] - img[..., 1] -= mean[1] - img[..., 2] -= mean[2] - img[..., 0] /= std[0] - img[..., 1] /= std[1] - img[..., 2] /= std[2] - img = img.transpose(2, 0, 1) # HWC -> CHW - - img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) - - -if __name__ == "__main__": - file_path = os.path.abspath(sys.argv[1]) - bin_path = os.path.abspath(sys.argv[2]) - preprocess(file_path, bin_path) +import os +import sys +import numpy as np +from PIL import Image + + +def resize(img, size, interpolation=Image.BILINEAR): + r"""Resize the input PIL Image to the given size. + + Args: + img (PIL Image): Image to be resized. + size (sequence or int): Desired output size. If size is a sequence like + (h, w), the output size will be matched to this. If size is an int, + the smaller edge of the image will be matched to this number maintaining + the aspect ratio. i.e, if height > width, then image will be rescaled to + :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)` + interpolation (int, optional): Desired interpolation. Default is + ``PIL.Image.BILINEAR`` + + Returns: + PIL Image: Resized image. + """ + + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def center_crop(img, out_height, out_width): + height, width, _ = img.shape + left = int((width - out_width) / 2) + right = int((width + out_width) / 2) + top = int((height - out_height) / 2) + bottom = int((height + out_height) / 2) + img = img[top:bottom, left:right] + return img + + +def preprocess(file_path, bin_path): + in_files = os.listdir(file_path) + if not os.path.exists(bin_path): + os.makedirs(bin_path) + i = 0 + + resize_size = 342 + mean = [0.5, 0.5, 0.5] + std = [0.5, 0.5, 0.5] + + for file in in_files: + i = i + 1 + print(file, "===", i) + + img = Image.open(os.path.join(file_path, file)).convert('RGB') + + img = resize(img, resize_size) # transforms.Resize(342) + img = np.array(img, dtype=np.float32) + img = center_crop(img, 299, 299) # transforms.CenterCrop(299) + img = img / 255. # transforms.ToTensor() + # 均值方差 + img[..., 0] -= mean[0] + img[..., 1] -= mean[1] + img[..., 2] -= mean[2] + img[..., 0] /= std[0] + img[..., 1] /= std[1] + img[..., 2] /= std[2] + img = img.transpose(2, 0, 1) # HWC -> CHW + + img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) + + +if __name__ == "__main__": + file_path = os.path.abspath(sys.argv[1]) + bin_path = os.path.abspath(sys.argv[2]) + preprocess(file_path, bin_path) diff --git a/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/PytorchTransfer.py b/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/PytorchTransfer.py index 15680c59ff5a61a0b0d01d869bc39087000b7dc1..15eeaaa895e24bcf280649fb8f5bc9a8f1288f31 100644 --- a/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/PytorchTransfer.py +++ b/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/PytorchTransfer.py @@ -1,47 +1,47 @@ -import sys -import os -import torch -import cv2 -from PIL import Image -import numpy as np -import torch.utils.data -import torchvision.transforms as transforms -from torch.autograd import Variable - - -def mobilenet_onnx(input_path: str, output_path: str): - img = cv2.imread(input_path) - img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - pilimg = Image.fromarray(img) - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - val_transformer = transforms.Compose([ - transforms.Scale(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize - ]) - - img_tensor = val_transformer(pilimg) - img_tensor = torch.unsqueeze(img_tensor, dim=0).float() - img_tensor = Variable(img_tensor, requires_grad=False) - img_tensor.reshape(1, 3, 224, 224) - img_numpy = img_tensor.cpu().numpy() - - img_name = input_path.split('/')[-1] - bin_name = img_name.split('.')[0] + ".bin" - output_fl = os.path.join(output_path, bin_name) - # save img_tensor as binary file for om inference input - img_numpy.tofile(output_fl) - - -if __name__ == "__main__": - input_img_dir = sys.argv[1] - output_img_dir = sys.argv[2] - images = os.listdir(input_img_dir) - for image_name in images: - if not image_name.endswith(".jpeg"): - continue - print("start to process image {}....".format(image_name)) - path_image = os.path.join(input_img_dir, image_name) - mobilenet_onnx(path_image, output_img_dir) +import sys +import os +import torch +import cv2 +from PIL import Image +import numpy as np +import torch.utils.data +import torchvision.transforms as transforms +from torch.autograd import Variable + + +def mobilenet_onnx(input_path: str, output_path: str): + img = cv2.imread(input_path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + pilimg = Image.fromarray(img) + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + val_transformer = transforms.Compose([ + transforms.Scale(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize + ]) + + img_tensor = val_transformer(pilimg) + img_tensor = torch.unsqueeze(img_tensor, dim=0).float() + img_tensor = Variable(img_tensor, requires_grad=False) + img_tensor.reshape(1, 3, 224, 224) + img_numpy = img_tensor.cpu().numpy() + + img_name = input_path.split('/')[-1] + bin_name = img_name.split('.')[0] + ".bin" + output_fl = os.path.join(output_path, bin_name) + # save img_tensor as binary file for om inference input + img_numpy.tofile(output_fl) + + +if __name__ == "__main__": + input_img_dir = sys.argv[1] + output_img_dir = sys.argv[2] + images = os.listdir(input_img_dir) + for image_name in images: + if not image_name.endswith(".jpeg"): + continue + print("start to process image {}....".format(image_name)) + path_image = os.path.join(input_img_dir, image_name) + mobilenet_onnx(path_image, output_img_dir) diff --git a/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/ReadMe.md b/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/ReadMe.md index 2d95c4d9fe555d6663f3d0822d3d897758ff1a5a..37b1c856380e43dfa866479f1dab53645e9e5a1c 100644 --- a/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/ReadMe.md +++ b/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/ReadMe.md @@ -1,48 +1,48 @@ -文件作用说明: - -1.auto_tune.sh:模型转换脚本,集成了auto tune功能,可以手动关闭 - -2.pth2onnx.py:用于转换pth文件到onnx文件 - -3.pthtar2onnx.py:用于转换pth.tar文件到onnx文件 - -4.BinaryImageNet.info:ImageNet数据集信息,用于benchmark推理获取数据集 - -5.PytorchTransfer.py:数据集预处理脚本,通过均值方差处理归一化图片 - -6.val_label.txt:ImageNet数据集标签,用于验证推理结果 - -7.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy - -8.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer - - - - - -推理端到端步骤: - -(1) 从Torchvision下载mobilenetV2模型或者指定自己训练好的pth文件路径,通过pth2onnx.py脚本转化为onnx模型 - - - -(2)运行auto_tune.sh脚本转换om模型,也可以选择手动关闭auto_tune - -本demo已提供调优完成的om模型 - - - -(3)用PytorchTransfer.py脚本处理数据集,参考BinaryImageNet.Info配置处理后的二进制数据集路径 - - - -(4)./benchmark.x86_64 -model_type=vision -batch_size=16 -device_id=0 -input_text_path=./BinaryImageNet.info -input_width=224 -input_height=224 -om_path=./resnet50_pytorch.om -useDvpp=False - -运行benchmark推理,结果保存在 ./result 目录下 - - - -(5)python3.7 vision_metric_ImageNet.py result/dumpOutput/ ./val_label.txt ./ result.json - -验证推理结果 - +文件作用说明: + +1.auto_tune.sh:模型转换脚本,集成了auto tune功能,可以手动关闭 + +2.pth2onnx.py:用于转换pth文件到onnx文件 + +3.pthtar2onnx.py:用于转换pth.tar文件到onnx文件 + +4.BinaryImageNet.info:ImageNet数据集信息,用于benchmark推理获取数据集 + +5.PytorchTransfer.py:数据集预处理脚本,通过均值方差处理归一化图片 + +6.val_label.txt:ImageNet数据集标签,用于验证推理结果 + +7.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy + +8.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer + + + + + +推理端到端步骤: + +(1) 从Torchvision下载mobilenetV2模型或者指定自己训练好的pth文件路径,通过pth2onnx.py脚本转化为onnx模型 + + + +(2)运行auto_tune.sh脚本转换om模型,也可以选择手动关闭auto_tune + +本demo已提供调优完成的om模型 + + + +(3)用PytorchTransfer.py脚本处理数据集,参考BinaryImageNet.Info配置处理后的二进制数据集路径 + + + +(4)./benchmark.x86_64 -model_type=vision -batch_size=16 -device_id=0 -input_text_path=./BinaryImageNet.info -input_width=224 -input_height=224 -om_path=./resnet50_pytorch.om -useDvpp=False + +运行benchmark推理,结果保存在 ./result 目录下 + + + +(5)python3.7 vision_metric_ImageNet.py result/dumpOutput/ ./val_label.txt ./ result.json + +验证推理结果 + diff --git a/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/pth2onnx.py b/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/pth2onnx.py index e147398b6df571332e3c5191153ce7496ce30a4b..8b54912bf92699148db1b0d2d314c68761a99f58 100644 --- a/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/pth2onnx.py +++ b/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/pth2onnx.py @@ -1,27 +1,27 @@ -import torch -import torch.onnx -import torchvision.models as models - - -def convert(): - model = models.mobilenet_v2(pretrained=False) - pthfile = './mobilenet_v2-b0353104.pth' - mobilenet_v2 = torch.load(pthfile, map_location='cpu') - model.load_state_dict(mobilenet_v2) - print(model) - - input_names = ["actual_input_1"] - output_names = ["output1"] - dummy_input = torch.randn(16, 3, 224, 224) - torch.onnx.export( - model, - dummy_input, - "mobilenet_v2_16.onnx", - input_names=input_names, - output_names=output_names, - opset_version=11) - - -if __name__ == "__main__": - convert() - +import torch +import torch.onnx +import torchvision.models as models + + +def convert(): + model = models.mobilenet_v2(pretrained=False) + pthfile = './mobilenet_v2-b0353104.pth' + mobilenet_v2 = torch.load(pthfile, map_location='cpu') + model.load_state_dict(mobilenet_v2) + print(model) + + input_names = ["actual_input_1"] + output_names = ["output1"] + dummy_input = torch.randn(16, 3, 224, 224) + torch.onnx.export( + model, + dummy_input, + "mobilenet_v2_16.onnx", + input_names=input_names, + output_names=output_names, + opset_version=11) + + +if __name__ == "__main__": + convert() + diff --git a/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/pthtar2onnx.py b/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/pthtar2onnx.py index ba5c4a2a594a478f4039950abc22e8fe439bf3c4..96cfdc837274461c7bc4e5cfe81e5cd01dba7fd5 100644 --- a/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/pthtar2onnx.py +++ b/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/pthtar2onnx.py @@ -1,41 +1,41 @@ -import torch -import torch.onnx -import mobilenet -from collections import OrderedDict - - -def proc_nodes_module(checkpoint, attr_name): - new_state_dict = OrderedDict() - for key, value in checkpoint[attr_name].items(): - if key == "module.features.0.0.weight": - print(value) - if key[0:7] == "module.": - name = key[7:] - else: - name = key[0:] - - new_state_dict[name] = value - return new_state_dict - - -def convert(): - checkpoint = torch.load("./mobilenet_cpu.pth.tar", map_location=torch.device('cpu')) - checkpoint['state_dict'] = proc_nodes_module(checkpoint, 'state_dict') - model = mobilenet.mobilenet_v2(pretrained=False) - model.load_state_dict(checkpoint['state_dict']) - model.eval() - print(model) - - input_names = ["actual_input_1"] - output_names = ["output1"] - dummy_input = torch.randn(1, 3, 224, 224) - torch.onnx.export( - model, dummy_input, - "mobilenet_v2_npu.onnx", - input_names=input_names, - output_names=output_names, - opset_version=11) # 7 - - -if __name__ == "__main__": - convert() +import torch +import torch.onnx +import mobilenet +from collections import OrderedDict + + +def proc_nodes_module(checkpoint, attr_name): + new_state_dict = OrderedDict() + for key, value in checkpoint[attr_name].items(): + if key == "module.features.0.0.weight": + print(value) + if key[0:7] == "module.": + name = key[7:] + else: + name = key[0:] + + new_state_dict[name] = value + return new_state_dict + + +def convert(): + checkpoint = torch.load("./mobilenet_cpu.pth.tar", map_location=torch.device('cpu')) + checkpoint['state_dict'] = proc_nodes_module(checkpoint, 'state_dict') + model = mobilenet.mobilenet_v2(pretrained=False) + model.load_state_dict(checkpoint['state_dict']) + model.eval() + print(model) + + input_names = ["actual_input_1"] + output_names = ["output1"] + dummy_input = torch.randn(1, 3, 224, 224) + torch.onnx.export( + model, dummy_input, + "mobilenet_v2_npu.onnx", + input_names=input_names, + output_names=output_names, + opset_version=11) # 7 + + +if __name__ == "__main__": + convert() diff --git a/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/vision_metric_ImageNet.py b/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/vision_metric_ImageNet.py index 7bba5f8346a8893b4567d92b900fc4a651115976..f07c93617e661619c0e00be6a972e39fbde966a0 100644 --- a/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/vision_metric_ImageNet.py +++ b/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch/vision_metric_ImageNet.py @@ -1,173 +1,173 @@ -#coding = utf-8 -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = "" - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - #print(filepath) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - #print(img_gt_dict) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - # print(img_name) - #print(n_labels) - # print(gt) - - resCnt = min(len(sort_index), topn) - # print(sort_index[:5]) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - #print("***************") - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - #print("Top" + str(i + 1) + " accuracy" + ": " + str(round(accuracy[i] * 100, 2)) + '%') - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - #print("Time used:", elapsed) +#coding = utf-8 +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = "" + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + #print(filepath) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + #print(img_gt_dict) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + # print(img_name) + #print(n_labels) + # print(gt) + + resCnt = min(len(sort_index), topn) + # print(sort_index[:5]) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + #print("***************") + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + #print("Top" + str(i + 1) + " accuracy" + ": " + str(round(accuracy[i] * 100, 2)) + '%') + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + #print("Time used:", elapsed) diff --git a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/README.md b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/README.md index 66128e38a68c134cb17d5f8b28c807ab3843cfad..151e89d97449d52fcbd31720455d504d5d14983b 100644 --- a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/README.md +++ b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/README.md @@ -1,9 +1,9 @@ -### demo 文件夹内容说明 -- pth转换onnx脚本: export_onnx.py -- ATC转换脚本 run.sh / auto_tune.sh,及aipp配置文件 aipp.conf -- benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer -- 数据集信息 ImgPSENet.info 及二进制数据集信息 PSENet.info -- 二进制数据集预处理脚本: preprocess_psenet_pytorch.py -- 数据集标签: gt.zip -- 二进制后处理脚本: pth_bintotxt_nearest.py, pth_bintotxt_bilinear.py, pypse.py -- 精度评测脚本: Post-processing文件夹,script.py +### demo 文件夹内容说明 +- pth转换onnx脚本: export_onnx.py +- ATC转换脚本 run.sh / auto_tune.sh,及aipp配置文件 aipp.conf +- benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer +- 数据集信息 ImgPSENet.info 及二进制数据集信息 PSENet.info +- 二进制数据集预处理脚本: preprocess_psenet_pytorch.py +- 数据集标签: gt.zip +- 二进制后处理脚本: pth_bintotxt_nearest.py, pth_bintotxt_bilinear.py, pypse.py +- 精度评测脚本: Post-processing文件夹,script.py diff --git a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/export_onnx.py b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/export_onnx.py index 8f0d02b1545f6f4ea5dc6b018b4f92e6b44b7c25..9d5e7ed3ca283c1c3a18a36e5d5a4fe368d315a9 100644 --- a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/export_onnx.py +++ b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/export_onnx.py @@ -1,39 +1,39 @@ -import torch -from fpn_resnet_nearest import resnet50 -import torch.onnx -from collections import OrderedDict -import torch._utils - - -def proc_nodes_module(checkpoint, AttrName): - new_state_dict = OrderedDict() - for k, v in checkpoint[AttrName].items(): - if (k[0:7] == "module."): - name = k[7:] - else: - name = k[0:] - - new_state_dict[name] = v - return new_state_dict - - -def convert(): - checkpoint = torch.load("./PSENet_nearest.pth", map_location='cpu') - checkpoint['state_dict'] = proc_nodes_module(checkpoint, 'state_dict') - # model = mobilenet.mobilenet_v2(pretrained = False) - model = resnet50() - model.load_state_dict(checkpoint['state_dict']) - model.eval() - print(model) - - input_names = ["actual_input_1"] - output_names = ["output1"] - dummy_input = torch.randn(1, 3, 704, 1216) - import onnx - dynamic_axes = {'actual_input_1':{0:'-1'},'output1':{0:'-1'}} - print('\nStarting ONNX export with onnx %s...' % onnx.__version__) - torch.onnx.export(model, dummy_input, "PSENet_704_1216_nearest.onnx", input_names=input_names, output_names=output_names,dynamic_axes = dynamic_axes, opset_version=11) - - -if __name__ == "__main__": - convert() +import torch +from fpn_resnet_nearest import resnet50 +import torch.onnx +from collections import OrderedDict +import torch._utils + + +def proc_nodes_module(checkpoint, AttrName): + new_state_dict = OrderedDict() + for k, v in checkpoint[AttrName].items(): + if (k[0:7] == "module."): + name = k[7:] + else: + name = k[0:] + + new_state_dict[name] = v + return new_state_dict + + +def convert(): + checkpoint = torch.load("./PSENet_nearest.pth", map_location='cpu') + checkpoint['state_dict'] = proc_nodes_module(checkpoint, 'state_dict') + # model = mobilenet.mobilenet_v2(pretrained = False) + model = resnet50() + model.load_state_dict(checkpoint['state_dict']) + model.eval() + print(model) + + input_names = ["actual_input_1"] + output_names = ["output1"] + dummy_input = torch.randn(1, 3, 704, 1216) + import onnx + dynamic_axes = {'actual_input_1':{0:'-1'},'output1':{0:'-1'}} + print('\nStarting ONNX export with onnx %s...' % onnx.__version__) + torch.onnx.export(model, dummy_input, "PSENet_704_1216_nearest.onnx", input_names=input_names, output_names=output_names,dynamic_axes = dynamic_axes, opset_version=11) + + +if __name__ == "__main__": + convert() diff --git a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/fpn_resnet.py b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/fpn_resnet.py index 0305cf9e321751b436b534fd1e2160dde68e912b..2ee46e12299a997ebab5d86f249b8dbce828c6e6 100644 --- a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/fpn_resnet.py +++ b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/fpn_resnet.py @@ -1,341 +1,341 @@ -import torch.nn as nn -import time -import math -import torch.utils.model_zoo as model_zoo -import torch.nn.functional as F -import torch -import pdb - - -__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', - 'resnet152'] - - -model_urls = { - 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', - 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', - 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', - 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3mb4d8f.pth', - 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', -} - - -def conv3x3(in_planes, out_planes, stride=1): - """3x3 convolution with padding""" - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, - padding=1, bias=False) - - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(BasicBlock, self).__init__() - self.conv1 = conv3x3(inplanes, planes, stride) - self.bn1 = nn.BatchNorm2d(planes) - self.relu = nn.ReLU(inplace=True) - self.conv2 = conv3x3(planes, planes) - self.bn2 = nn.BatchNorm2d(planes) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(Bottleneck, self).__init__() - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, - padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(planes * 4) - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class ResNet(nn.Module): - - def __init__(self, block, layers, num_classes=7, scale=1): - self.inplanes = 64 - super(ResNet, self).__init__() - self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, - bias=False) - self.bn1 = nn.BatchNorm2d(64) - self.relu1 = nn.ReLU(inplace=True) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, return_indices = False) - self.layer1 = self._make_layer(block, 64, layers[0]) - self.layer2 = self._make_layer(block, 128, layers[1], stride=2) - self.layer3 = self._make_layer(block, 256, layers[2], stride=2) - self.layer4 = self._make_layer(block, 512, layers[3], stride=2) - # self.avgpool = nn.AvgPool2d(7, stride=1) - # self.fc = nn.Linear(512 * block.expansion, num_classes) - - # Top layer - self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # Reduce channels - self.toplayer_bn = nn.BatchNorm2d(256) - self.toplayer_relu = nn.ReLU(inplace=True) - - # Smooth layers - self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) - self.smooth1_bn = nn.BatchNorm2d(256) - self.smooth1_relu = nn.ReLU(inplace=True) - - self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) - self.smooth2_bn = nn.BatchNorm2d(256) - self.smooth2_relu = nn.ReLU(inplace=True) - - self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) - self.smooth3_bn = nn.BatchNorm2d(256) - self.smooth3_relu = nn.ReLU(inplace=True) - - # Lateral layers - self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) - self.latlayer1_bn = nn.BatchNorm2d(256) - self.latlayer1_relu = nn.ReLU(inplace=True) - - self.latlayer2 = nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0) - self.latlayer2_bn = nn.BatchNorm2d(256) - self.latlayer2_relu = nn.ReLU(inplace=True) - - self.latlayer3 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0) - self.latlayer3_bn = nn.BatchNorm2d(256) - self.latlayer3_relu = nn.ReLU(inplace=True) - - self.conv2 = nn.Conv2d(1024, 256, kernel_size=3, stride=1, padding=1) - self.bn2 = nn.BatchNorm2d(256) - self.relu2 = nn.ReLU(inplace=True) - self.conv3 = nn.Conv2d(256, num_classes, kernel_size=1, stride=1, padding=0) - self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) - - self.scale = scale - - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - - def _make_layer(self, block, planes, blocks, stride=1): - downsample = None - if stride != 1 or self.inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv2d(self.inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(planes * block.expansion), - ) - - layers = [] - layers.append(block(self.inplanes, planes, stride, downsample)) - self.inplanes = planes * block.expansion - for i in range(1, blocks): - layers.append(block(self.inplanes, planes)) - - return nn.Sequential(*layers) - - def _upsample(self, x, y, scale=1): - _, _, H, W = y.size() - # return self.upsample(x) - return F.interpolate(x, size=(H // scale, W // scale), mode='bilinear', align_corners=False) - - def _upsample_add(self, x, y): - _, _, H, W = y.size() - # print(x.shape) - # print(y.shape) - # print('x:',x.type()) - # print('out:',F.interpolate(x, size=(H,W), mode='bilinear').type()) - # print('y:', y.type()) - # return self.upsample(x) + y - return F.interpolate(x, size=(H, W), mode='bilinear', align_corners=False).half() + y - - def forward(self, x): - h = x - h = self.conv1(h) - h = self.bn1(h) - h = self.relu1(h) - - h = self.maxpool(h) - - h = self.layer1(h) - c2 = h - h = self.layer2(h) - c3 = h - h = self.layer3(h) - c4 = h - h = self.layer4(h) - c5 = h - - # Top-down - p5 = self.toplayer(c5) - p5 = self.toplayer_relu(self.toplayer_bn(p5)) - - c4 = self.latlayer1(c4) - c4 = self.latlayer1_relu(self.latlayer1_bn(c4)) - t = time.time() - # print('c4:',c4.type()) - p4 = self._upsample_add(p5, c4) - p4 = self.smooth1(p4) - p4 = self.smooth1_relu(self.smooth1_bn(p4)) - - c3 = self.latlayer2(c3) - c3 = self.latlayer2_relu(self.latlayer2_bn(c3)) - t = time.time() - # print('t:',t) - p3 = self._upsample_add(p4, c3) - # print('t2:',time.time()-t) - p3 = self.smooth2(p3) - p3 = self.smooth2_relu(self.smooth2_bn(p3)) - - c2 = self.latlayer3(c2) - c2 = self.latlayer3_relu(self.latlayer3_bn(c2)) - p2 = self._upsample_add(p3, c2) - p2 = self.smooth3(p2) - p2 = self.smooth3_relu(self.smooth3_bn(p2)) - - p3 = self._upsample(p3, p2) - p4 = self._upsample(p4, p2) - p5 = self._upsample(p5, p2) - - # p2, p3, p4, p5 = p2.cpu(), p3.cpu(), p4.cpu(), p5.cpu() - out = torch.cat((p2, p3, p4, p5), 1) - # out = out.to(dev) - - - # self.conv2 = self.conv2.cpu() - out = self.conv2(out) - # self.conv2 = self.conv2.to(dev) - - # self.relu2 = self.relu2.cpu() - # self.bn2 = self.bn2.cpu() - out = self.relu2(self.bn2(out)) - # self.relu2 = self.relu2.to(dev) - # self.bn2 = self.bn2.to(dev) - - # self.conv3 = self.conv3.cpu() - out = self.conv3(out) - # self.conv3 = self.conv3.to(dev) - - # out = out.to(dev) - # p2, p3, p4, p5 = p2.to(dev), p3.to(dev), p4.to(dev), p5.to(dev) - out = self._upsample(out, x, scale=self.scale) - - return out - - -def resnet18(pretrained=False, **kwargs): - """Constructs a ResNet-18 model. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) - return model - - -def resnet34(pretrained=False, **kwargs): - """Constructs a ResNet-34 model. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) - return model - - -def resnet50(pretrained=False, **kwargs): - """Constructs a ResNet-50 model. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) - if pretrained: - pretrained_model = model_zoo.load_url(model_urls['resnet50']) - state = model.state_dict() - for key in state.keys(): - if key in pretrained_model.keys(): - state[key] = pretrained_model[key] - model.load_state_dict(state) - return model - - -def resnet101(pretrained=False, **kwargs): - """Constructs a ResNet-101 model. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) - if pretrained: - pretrained_model = model_zoo.load_url(model_urls['resnet101']) - state = model.state_dict() - for key in state.keys(): - if key in pretrained_model.keys(): - state[key] = pretrained_model[key] - model.load_state_dict(state) - return model - -def resnet152(pretrained=False, **kwargs): - """Constructs a ResNet-152 model. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) - if pretrained: - pretrained_model = model_zoo.load_url(model_urls['resnet152']) - state = model.state_dict() - for key in state.keys(): - if key in pretrained_model.keys(): - state[key] = pretrained_model[key] - model.load_state_dict(state) - return model +import torch.nn as nn +import time +import math +import torch.utils.model_zoo as model_zoo +import torch.nn.functional as F +import torch +import pdb + + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3mb4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=7, scale=1): + self.inplanes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu1 = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, return_indices = False) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + # self.avgpool = nn.AvgPool2d(7, stride=1) + # self.fc = nn.Linear(512 * block.expansion, num_classes) + + # Top layer + self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # Reduce channels + self.toplayer_bn = nn.BatchNorm2d(256) + self.toplayer_relu = nn.ReLU(inplace=True) + + # Smooth layers + self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) + self.smooth1_bn = nn.BatchNorm2d(256) + self.smooth1_relu = nn.ReLU(inplace=True) + + self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) + self.smooth2_bn = nn.BatchNorm2d(256) + self.smooth2_relu = nn.ReLU(inplace=True) + + self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) + self.smooth3_bn = nn.BatchNorm2d(256) + self.smooth3_relu = nn.ReLU(inplace=True) + + # Lateral layers + self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) + self.latlayer1_bn = nn.BatchNorm2d(256) + self.latlayer1_relu = nn.ReLU(inplace=True) + + self.latlayer2 = nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0) + self.latlayer2_bn = nn.BatchNorm2d(256) + self.latlayer2_relu = nn.ReLU(inplace=True) + + self.latlayer3 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0) + self.latlayer3_bn = nn.BatchNorm2d(256) + self.latlayer3_relu = nn.ReLU(inplace=True) + + self.conv2 = nn.Conv2d(1024, 256, kernel_size=3, stride=1, padding=1) + self.bn2 = nn.BatchNorm2d(256) + self.relu2 = nn.ReLU(inplace=True) + self.conv3 = nn.Conv2d(256, num_classes, kernel_size=1, stride=1, padding=0) + self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) + + self.scale = scale + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def _upsample(self, x, y, scale=1): + _, _, H, W = y.size() + # return self.upsample(x) + return F.interpolate(x, size=(H // scale, W // scale), mode='bilinear', align_corners=False) + + def _upsample_add(self, x, y): + _, _, H, W = y.size() + # print(x.shape) + # print(y.shape) + # print('x:',x.type()) + # print('out:',F.interpolate(x, size=(H,W), mode='bilinear').type()) + # print('y:', y.type()) + # return self.upsample(x) + y + return F.interpolate(x, size=(H, W), mode='bilinear', align_corners=False).half() + y + + def forward(self, x): + h = x + h = self.conv1(h) + h = self.bn1(h) + h = self.relu1(h) + + h = self.maxpool(h) + + h = self.layer1(h) + c2 = h + h = self.layer2(h) + c3 = h + h = self.layer3(h) + c4 = h + h = self.layer4(h) + c5 = h + + # Top-down + p5 = self.toplayer(c5) + p5 = self.toplayer_relu(self.toplayer_bn(p5)) + + c4 = self.latlayer1(c4) + c4 = self.latlayer1_relu(self.latlayer1_bn(c4)) + t = time.time() + # print('c4:',c4.type()) + p4 = self._upsample_add(p5, c4) + p4 = self.smooth1(p4) + p4 = self.smooth1_relu(self.smooth1_bn(p4)) + + c3 = self.latlayer2(c3) + c3 = self.latlayer2_relu(self.latlayer2_bn(c3)) + t = time.time() + # print('t:',t) + p3 = self._upsample_add(p4, c3) + # print('t2:',time.time()-t) + p3 = self.smooth2(p3) + p3 = self.smooth2_relu(self.smooth2_bn(p3)) + + c2 = self.latlayer3(c2) + c2 = self.latlayer3_relu(self.latlayer3_bn(c2)) + p2 = self._upsample_add(p3, c2) + p2 = self.smooth3(p2) + p2 = self.smooth3_relu(self.smooth3_bn(p2)) + + p3 = self._upsample(p3, p2) + p4 = self._upsample(p4, p2) + p5 = self._upsample(p5, p2) + + # p2, p3, p4, p5 = p2.cpu(), p3.cpu(), p4.cpu(), p5.cpu() + out = torch.cat((p2, p3, p4, p5), 1) + # out = out.to(dev) + + + # self.conv2 = self.conv2.cpu() + out = self.conv2(out) + # self.conv2 = self.conv2.to(dev) + + # self.relu2 = self.relu2.cpu() + # self.bn2 = self.bn2.cpu() + out = self.relu2(self.bn2(out)) + # self.relu2 = self.relu2.to(dev) + # self.bn2 = self.bn2.to(dev) + + # self.conv3 = self.conv3.cpu() + out = self.conv3(out) + # self.conv3 = self.conv3.to(dev) + + # out = out.to(dev) + # p2, p3, p4, p5 = p2.to(dev), p3.to(dev), p4.to(dev), p5.to(dev) + out = self._upsample(out, x, scale=self.scale) + + return out + + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) + return model + + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) + return model + + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + pretrained_model = model_zoo.load_url(model_urls['resnet50']) + state = model.state_dict() + for key in state.keys(): + if key in pretrained_model.keys(): + state[key] = pretrained_model[key] + model.load_state_dict(state) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + pretrained_model = model_zoo.load_url(model_urls['resnet101']) + state = model.state_dict() + for key in state.keys(): + if key in pretrained_model.keys(): + state[key] = pretrained_model[key] + model.load_state_dict(state) + return model + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + pretrained_model = model_zoo.load_url(model_urls['resnet152']) + state = model.state_dict() + for key in state.keys(): + if key in pretrained_model.keys(): + state[key] = pretrained_model[key] + model.load_state_dict(state) + return model diff --git a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/fpn_resnet_nearest.py b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/fpn_resnet_nearest.py index 87999dd4dccc33eac151b20bd714c67c179790ab..b3400aa3c5ca8e284319e870498f6b65e387d818 100644 --- a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/fpn_resnet_nearest.py +++ b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/fpn_resnet_nearest.py @@ -1,555 +1,555 @@ -# Apache License -# Version 2.0, January 2004 -# http://www.apache.org/licenses/ -# -# TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION -# -# 1. Definitions. -# -# "License" shall mean the terms and conditions for use, reproduction, -# and distribution as defined by Sections 1 through 9 of this document. -# -# "Licensor" shall mean the copyright owner or entity authorized by -# the copyright owner that is granting the License. -# -# "Legal Entity" shall mean the union of the acting entity and all -# other entities that control, are controlled by, or are under common -# control with that entity. For the purposes of this definition, -# "control" means (i) the power, direct or indirect, to cause the -# direction or management of such entity, whether by contract or -# otherwise, or (ii) ownership of fifty percent (50%) or more of the -# outstanding shares, or (iii) beneficial ownership of such entity. -# -# "You" (or "Your") shall mean an individual or Legal Entity -# exercising permissions granted by this License. -# -# "Source" form shall mean the preferred form for making modifications, -# including but not limited to software source code, documentation -# source, and configuration files. -# -# "Object" form shall mean any form resulting from mechanical -# transformation or translation of a Source form, including but -# not limited to compiled object code, generated documentation, -# and conversions to other media types. -# -# "Work" shall mean the work of authorship, whether in Source or -# Object form, made available under the License, as indicated by a -# copyright notice that is included in or attached to the work -# (an example is provided in the Appendix below). -# -# "Derivative Works" shall mean any work, whether in Source or Object -# form, that is based on (or derived from) the Work and for which the -# editorial revisions, annotations, elaborations, or other modifications -# represent, as a whole, an original work of authorship. For the purposes -# of this License, Derivative Works shall not include works that remain -# separable from, or merely link (or bind by name) to the interfaces of, -# the Work and Derivative Works thereof. -# -# "Contribution" shall mean any work of authorship, including -# the original version of the Work and any modifications or additions -# to that Work or Derivative Works thereof, that is intentionally -# submitted to Licensor for inclusion in the Work by the copyright owner -# or by an individual or Legal Entity authorized to submit on behalf of -# the copyright owner. For the purposes of this definition, "submitted" -# means any form of electronic, verbal, or written communication sent -# to the Licensor or its representatives, including but not limited to -# communication on electronic mailing lists, source code control systems, -# and issue tracking systems that are managed by, or on behalf of, the -# Licensor for the purpose of discussing and improving the Work, but -# excluding communication that is conspicuously marked or otherwise -# designated in writing by the copyright owner as "Not a Contribution." -# -# "Contributor" shall mean Licensor and any individual or Legal Entity -# on behalf of whom a Contribution has been received by Licensor and -# subsequently incorporated within the Work. -# -# 2. Grant of Copyright License. Subject to the terms and conditions of -# this License, each Contributor hereby grants to You a perpetual, -# worldwide, non-exclusive, no-charge, royalty-free, irrevocable -# copyright license to reproduce, prepare Derivative Works of, -# publicly display, publicly perform, sublicense, and distribute the -# Work and such Derivative Works in Source or Object form. -# -# 3. Grant of Patent License. Subject to the terms and conditions of -# this License, each Contributor hereby grants to You a perpetual, -# worldwide, non-exclusive, no-charge, royalty-free, irrevocable -# (except as stated in this section) patent license to make, have made, -# use, offer to sell, sell, import, and otherwise transfer the Work, -# where such license applies only to those patent claims licensable -# by such Contributor that are necessarily infringed by their -# Contribution(s) alone or by combination of their Contribution(s) -# with the Work to which such Contribution(s) was submitted. If You -# institute patent litigation against any entity (including a -# cross-claim or counterclaim in a lawsuit) alleging that the Work -# or a Contribution incorporated within the Work constitutes direct -# or contributory patent infringement, then any patent licenses -# granted to You under this License for that Work shall terminate -# as of the date such litigation is filed. -# -# 4. Redistribution. You may reproduce and distribute copies of the -# Work or Derivative Works thereof in any medium, with or without -# modifications, and in Source or Object form, provided that You -# meet the following conditions: -# -# (a) You must give any other recipients of the Work or -# Derivative Works a copy of this License; and -# -# (b) You must cause any modified files to carry prominent notices -# stating that You changed the files; and -# -# (c) You must retain, in the Source form of any Derivative Works -# that You distribute, all copyright, patent, trademark, and -# attribution notices from the Source form of the Work, -# excluding those notices that do not pertain to any part of -# the Derivative Works; and -# -# (d) If the Work includes a "NOTICE" text file as part of its -# distribution, then any Derivative Works that You distribute must -# include a readable copy of the attribution notices contained -# within such NOTICE file, excluding those notices that do not -# pertain to any part of the Derivative Works, in at least one -# of the following places: within a NOTICE text file distributed -# as part of the Derivative Works; within the Source form or -# documentation, if provided along with the Derivative Works; or, -# within a display generated by the Derivative Works, if and -# wherever such third-party notices normally appear. The contents -# of the NOTICE file are for informational purposes only and -# do not modify the License. You may add Your own attribution -# notices within Derivative Works that You distribute, alongside -# or as an addendum to the NOTICE text from the Work, provided -# that such additional attribution notices cannot be construed -# as modifying the License. -# -# You may add Your own copyright statement to Your modifications and -# may provide additional or different license terms and conditions -# for use, reproduction, or distribution of Your modifications, or -# for any such Derivative Works as a whole, provided Your use, -# reproduction, and distribution of the Work otherwise complies with -# the conditions stated in this License. -# -# 5. Submission of Contributions. Unless You explicitly state otherwise, -# any Contribution intentionally submitted for inclusion in the Work -# by You to the Licensor shall be under the terms and conditions of -# this License, without any additional terms or conditions. -# Notwithstanding the above, nothing herein shall supersede or modify -# the terms of any separate license agreement you may have executed -# with Licensor regarding such Contributions. -# -# 6. Trademarks. This License does not grant permission to use the trade -# names, trademarks, service marks, or product names of the Licensor, -# except as required for reasonable and customary use in describing the -# origin of the Work and reproducing the content of the NOTICE file. -# -# 7. Disclaimer of Warranty. Unless required by applicable law or -# agreed to in writing, Licensor provides the Work (and each -# Contributor provides its Contributions) on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied, including, without limitation, any warranties or conditions -# of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A -# PARTICULAR PURPOSE. You are solely responsible for determining the -# appropriateness of using or redistributing the Work and assume any -# risks associated with Your exercise of permissions under this License. -# -# 8. Limitation of Liability. In no event and under no legal theory, -# whether in tort (including negligence), contract, or otherwise, -# unless required by applicable law (such as deliberate and grossly -# negligent acts) or agreed to in writing, shall any Contributor be -# liable to You for damages, including any direct, indirect, special, -# incidental, or consequential damages of any character arising as a -# result of this License or out of the use or inability to use the -# Work (including but not limited to damages for loss of goodwill, -# work stoppage, computer failure or malfunction, or any and all -# other commercial damages or losses), even if such Contributor -# has been advised of the possibility of such damages. -# -# 9. Accepting Warranty or Additional Liability. While redistributing -# the Work or Derivative Works thereof, You may choose to offer, -# and charge a fee for, acceptance of support, warranty, indemnity, -# or other liability obligations and/or rights consistent with this -# License. However, in accepting such obligations, You may act only -# on Your own behalf and on Your sole responsibility, not on behalf -# of any other Contributor, and only if You agree to indemnify, -# defend, and hold each Contributor harmless for any liability -# incurred by, or claims asserted against, such Contributor by reason -# of your accepting any such warranty or additional liability. -# -# END OF TERMS AND CONDITIONS -# -# APPENDIX: How to apply the Apache License to your work. -# -# To apply the Apache License to your work, attach the following -# boilerplate notice, with the fields enclosed by brackets "[]" -# replaced with your own identifying information. (Don't include -# the brackets!) The text should be enclosed in the appropriate -# comment syntax for the file format. We also recommend that a -# file or class name and description of purpose be included on the -# same "printed page" as the copyright notice for easier -# identification within third-party archives. -# -# Copyright [yyyy] [name of copyright owner] -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math -import time - -import torch -import torch.nn as nn -import torch.nn.functional as F -import torch.utils.model_zoo as model_zoo - -__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', - 'resnet152'] - -model_urls = { - 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', - 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', - 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', - 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3mb4d8f.pth', - 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', -} - - -def conv3x3(in_planes, out_planes, stride=1): - """3x3 convolution with padding""" - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, - padding=1, bias=False) - - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(BasicBlock, self).__init__() - self.conv1 = conv3x3(inplanes, planes, stride) - self.bn1 = nn.BatchNorm2d(planes) - self.relu = nn.ReLU(inplace=True) - self.conv2 = conv3x3(planes, planes) - self.bn2 = nn.BatchNorm2d(planes) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(Bottleneck, self).__init__() - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, - padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(planes * 4) - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class ResNet(nn.Module): - - def __init__(self, block, layers, num_classes=7, scale=1): - self.inplanes = 64 - super(ResNet, self).__init__() - self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, - bias=False) - self.bn1 = nn.BatchNorm2d(64) - self.relu1 = nn.ReLU(inplace=True) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, return_indices=False) - self.layer1 = self._make_layer(block, 64, layers[0]) - self.layer2 = self._make_layer(block, 128, layers[1], stride=2) - self.layer3 = self._make_layer(block, 256, layers[2], stride=2) - self.layer4 = self._make_layer(block, 512, layers[3], stride=2) - # self.avgpool = nn.AvgPool2d(7, stride=1) - # self.fc = nn.Linear(512 * block.expansion, num_classes) - - # Top layer - self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # Reduce channels - self.toplayer_bn = nn.BatchNorm2d(256) - self.toplayer_relu = nn.ReLU(inplace=True) - - # Smooth layers - self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) - self.smooth1_bn = nn.BatchNorm2d(256) - self.smooth1_relu = nn.ReLU(inplace=True) - - self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) - self.smooth2_bn = nn.BatchNorm2d(256) - self.smooth2_relu = nn.ReLU(inplace=True) - - self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) - self.smooth3_bn = nn.BatchNorm2d(256) - self.smooth3_relu = nn.ReLU(inplace=True) - - # Lateral layers - self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) - self.latlayer1_bn = nn.BatchNorm2d(256) - self.latlayer1_relu = nn.ReLU(inplace=True) - - self.latlayer2 = nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0) - self.latlayer2_bn = nn.BatchNorm2d(256) - self.latlayer2_relu = nn.ReLU(inplace=True) - - self.latlayer3 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0) - self.latlayer3_bn = nn.BatchNorm2d(256) - self.latlayer3_relu = nn.ReLU(inplace=True) - - self.conv2 = nn.Conv2d(1024, 256, kernel_size=3, stride=1, padding=1) - self.bn2 = nn.BatchNorm2d(256) - self.relu2 = nn.ReLU(inplace=True) - self.conv3 = nn.Conv2d(256, num_classes, kernel_size=1, stride=1, padding=0) - self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) - - self.scale = scale - - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - - def _make_layer(self, block, planes, blocks, stride=1): - downsample = None - if stride != 1 or self.inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv2d(self.inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(planes * block.expansion), - ) - - layers = [] - layers.append(block(self.inplanes, planes, stride, downsample)) - self.inplanes = planes * block.expansion - for i in range(1, blocks): - layers.append(block(self.inplanes, planes)) - - return nn.Sequential(*layers) - - def _upsample(self, x, y, scale=1): - _, _, H, W = y.size() - # return self.upsample(x) - return F.interpolate(x, size=(H // scale, W // scale), mode='nearest') - - def _upsample_add(self, x, y): - _, _, H, W = y.size() - # return F.interpolate(x, size=(H, W), mode='bilinear', align_corners=False).half() + y - return F.interpolate(x, size=(H, W), mode='nearest') + y - - def forward(self, x): - h = x - h = self.conv1(h) - h = self.bn1(h) - h = self.relu1(h) - - #h = h.float() - h = self.maxpool(h) - #h = h.half() - # self.maxpool = self.maxpool.cpu() - # h = self.maxpool(h.cpu()) - # h = h.npu() - # self.maxpool = self.maxpool.npu() - - h = self.layer1(h) - c2 = h - h = self.layer2(h) - c3 = h - h = self.layer3(h) - c4 = h - h = self.layer4(h) - c5 = h - - # Top-down - p5 = self.toplayer(c5) - p5 = self.toplayer_relu(self.toplayer_bn(p5)) - - c4 = self.latlayer1(c4) - c4 = self.latlayer1_relu(self.latlayer1_bn(c4)) - t = time.time() - # print('c4:',c4.type()) - p4 = self._upsample_add(p5, c4) - p4 = self.smooth1(p4) - p4 = self.smooth1_relu(self.smooth1_bn(p4)) - - c3 = self.latlayer2(c3) - c3 = self.latlayer2_relu(self.latlayer2_bn(c3)) - t = time.time() - # print('t:',t) - p3 = self._upsample_add(p4, c3) - # print('t2:',time.time()-t) - p3 = self.smooth2(p3) - p3 = self.smooth2_relu(self.smooth2_bn(p3)) - - c2 = self.latlayer3(c2) - c2 = self.latlayer3_relu(self.latlayer3_bn(c2)) - p2 = self._upsample_add(p3, c2) - p2 = self.smooth3(p2) - p2 = self.smooth3_relu(self.smooth3_bn(p2)) - - p3 = self._upsample(p3, p2) - p4 = self._upsample(p4, p2) - p5 = self._upsample(p5, p2) - - # p2, p3, p4, p5 = p2.cpu(), p3.cpu(), p4.cpu(), p5.cpu() - out = torch.cat((p2, p3, p4, p5), 1) - # out = out.to(dev) - - # self.conv2 = self.conv2.cpu() - out = self.conv2(out) - # self.conv2 = self.conv2.to(dev) - - # self.relu2 = self.relu2.cpu() - # self.bn2 = self.bn2.cpu() - out = self.relu2(self.bn2(out)) - # self.relu2 = self.relu2.to(dev) - # self.bn2 = self.bn2.to(dev) - - # self.conv3 = self.conv3.cpu() - out = self.conv3(out) - # self.conv3 = self.conv3.to(dev) - - # out = out.to(dev) - # p2, p3, p4, p5 = p2.to(dev), p3.to(dev), p4.to(dev), p5.to(dev) - out = self._upsample(out, x, scale=self.scale) - - return out - - -def resnet18(pretrained=False, **kwargs): - """Constructs a ResNet-18 model. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) - return model - - -def resnet34(pretrained=False, **kwargs): - """Constructs a ResNet-34 model. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) - return model - - -def resnet50(pretrained=False, **kwargs): - """Constructs a ResNet-50 model. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) - if pretrained: - pretrained_model = model_zoo.load_url(model_urls['resnet50']) - state = model.state_dict() - for key in state.keys(): - if key in pretrained_model.keys(): - state[key] = pretrained_model[key] - model.load_state_dict(state) - return model - - -def resnet101(pretrained=False, **kwargs): - """Constructs a ResNet-101 model. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) - if pretrained: - pretrained_model = model_zoo.load_url(model_urls['resnet101']) - state = model.state_dict() - for key in state.keys(): - if key in pretrained_model.keys(): - state[key] = pretrained_model[key] - model.load_state_dict(state) - return model - - -def resnet152(pretrained=False, **kwargs): - """Constructs a ResNet-152 model. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) - if pretrained: - pretrained_model = model_zoo.load_url(model_urls['resnet152']) - state = model.state_dict() - for key in state.keys(): - if key in pretrained_model.keys(): - state[key] = pretrained_model[key] - model.load_state_dict(state) - return model +# Apache License +# Version 2.0, January 2004 +# http://www.apache.org/licenses/ +# +# TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +# +# 1. Definitions. +# +# "License" shall mean the terms and conditions for use, reproduction, +# and distribution as defined by Sections 1 through 9 of this document. +# +# "Licensor" shall mean the copyright owner or entity authorized by +# the copyright owner that is granting the License. +# +# "Legal Entity" shall mean the union of the acting entity and all +# other entities that control, are controlled by, or are under common +# control with that entity. For the purposes of this definition, +# "control" means (i) the power, direct or indirect, to cause the +# direction or management of such entity, whether by contract or +# otherwise, or (ii) ownership of fifty percent (50%) or more of the +# outstanding shares, or (iii) beneficial ownership of such entity. +# +# "You" (or "Your") shall mean an individual or Legal Entity +# exercising permissions granted by this License. +# +# "Source" form shall mean the preferred form for making modifications, +# including but not limited to software source code, documentation +# source, and configuration files. +# +# "Object" form shall mean any form resulting from mechanical +# transformation or translation of a Source form, including but +# not limited to compiled object code, generated documentation, +# and conversions to other media types. +# +# "Work" shall mean the work of authorship, whether in Source or +# Object form, made available under the License, as indicated by a +# copyright notice that is included in or attached to the work +# (an example is provided in the Appendix below). +# +# "Derivative Works" shall mean any work, whether in Source or Object +# form, that is based on (or derived from) the Work and for which the +# editorial revisions, annotations, elaborations, or other modifications +# represent, as a whole, an original work of authorship. For the purposes +# of this License, Derivative Works shall not include works that remain +# separable from, or merely link (or bind by name) to the interfaces of, +# the Work and Derivative Works thereof. +# +# "Contribution" shall mean any work of authorship, including +# the original version of the Work and any modifications or additions +# to that Work or Derivative Works thereof, that is intentionally +# submitted to Licensor for inclusion in the Work by the copyright owner +# or by an individual or Legal Entity authorized to submit on behalf of +# the copyright owner. For the purposes of this definition, "submitted" +# means any form of electronic, verbal, or written communication sent +# to the Licensor or its representatives, including but not limited to +# communication on electronic mailing lists, source code control systems, +# and issue tracking systems that are managed by, or on behalf of, the +# Licensor for the purpose of discussing and improving the Work, but +# excluding communication that is conspicuously marked or otherwise +# designated in writing by the copyright owner as "Not a Contribution." +# +# "Contributor" shall mean Licensor and any individual or Legal Entity +# on behalf of whom a Contribution has been received by Licensor and +# subsequently incorporated within the Work. +# +# 2. Grant of Copyright License. Subject to the terms and conditions of +# this License, each Contributor hereby grants to You a perpetual, +# worldwide, non-exclusive, no-charge, royalty-free, irrevocable +# copyright license to reproduce, prepare Derivative Works of, +# publicly display, publicly perform, sublicense, and distribute the +# Work and such Derivative Works in Source or Object form. +# +# 3. Grant of Patent License. Subject to the terms and conditions of +# this License, each Contributor hereby grants to You a perpetual, +# worldwide, non-exclusive, no-charge, royalty-free, irrevocable +# (except as stated in this section) patent license to make, have made, +# use, offer to sell, sell, import, and otherwise transfer the Work, +# where such license applies only to those patent claims licensable +# by such Contributor that are necessarily infringed by their +# Contribution(s) alone or by combination of their Contribution(s) +# with the Work to which such Contribution(s) was submitted. If You +# institute patent litigation against any entity (including a +# cross-claim or counterclaim in a lawsuit) alleging that the Work +# or a Contribution incorporated within the Work constitutes direct +# or contributory patent infringement, then any patent licenses +# granted to You under this License for that Work shall terminate +# as of the date such litigation is filed. +# +# 4. Redistribution. You may reproduce and distribute copies of the +# Work or Derivative Works thereof in any medium, with or without +# modifications, and in Source or Object form, provided that You +# meet the following conditions: +# +# (a) You must give any other recipients of the Work or +# Derivative Works a copy of this License; and +# +# (b) You must cause any modified files to carry prominent notices +# stating that You changed the files; and +# +# (c) You must retain, in the Source form of any Derivative Works +# that You distribute, all copyright, patent, trademark, and +# attribution notices from the Source form of the Work, +# excluding those notices that do not pertain to any part of +# the Derivative Works; and +# +# (d) If the Work includes a "NOTICE" text file as part of its +# distribution, then any Derivative Works that You distribute must +# include a readable copy of the attribution notices contained +# within such NOTICE file, excluding those notices that do not +# pertain to any part of the Derivative Works, in at least one +# of the following places: within a NOTICE text file distributed +# as part of the Derivative Works; within the Source form or +# documentation, if provided along with the Derivative Works; or, +# within a display generated by the Derivative Works, if and +# wherever such third-party notices normally appear. The contents +# of the NOTICE file are for informational purposes only and +# do not modify the License. You may add Your own attribution +# notices within Derivative Works that You distribute, alongside +# or as an addendum to the NOTICE text from the Work, provided +# that such additional attribution notices cannot be construed +# as modifying the License. +# +# You may add Your own copyright statement to Your modifications and +# may provide additional or different license terms and conditions +# for use, reproduction, or distribution of Your modifications, or +# for any such Derivative Works as a whole, provided Your use, +# reproduction, and distribution of the Work otherwise complies with +# the conditions stated in this License. +# +# 5. Submission of Contributions. Unless You explicitly state otherwise, +# any Contribution intentionally submitted for inclusion in the Work +# by You to the Licensor shall be under the terms and conditions of +# this License, without any additional terms or conditions. +# Notwithstanding the above, nothing herein shall supersede or modify +# the terms of any separate license agreement you may have executed +# with Licensor regarding such Contributions. +# +# 6. Trademarks. This License does not grant permission to use the trade +# names, trademarks, service marks, or product names of the Licensor, +# except as required for reasonable and customary use in describing the +# origin of the Work and reproducing the content of the NOTICE file. +# +# 7. Disclaimer of Warranty. Unless required by applicable law or +# agreed to in writing, Licensor provides the Work (and each +# Contributor provides its Contributions) on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied, including, without limitation, any warranties or conditions +# of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +# PARTICULAR PURPOSE. You are solely responsible for determining the +# appropriateness of using or redistributing the Work and assume any +# risks associated with Your exercise of permissions under this License. +# +# 8. Limitation of Liability. In no event and under no legal theory, +# whether in tort (including negligence), contract, or otherwise, +# unless required by applicable law (such as deliberate and grossly +# negligent acts) or agreed to in writing, shall any Contributor be +# liable to You for damages, including any direct, indirect, special, +# incidental, or consequential damages of any character arising as a +# result of this License or out of the use or inability to use the +# Work (including but not limited to damages for loss of goodwill, +# work stoppage, computer failure or malfunction, or any and all +# other commercial damages or losses), even if such Contributor +# has been advised of the possibility of such damages. +# +# 9. Accepting Warranty or Additional Liability. While redistributing +# the Work or Derivative Works thereof, You may choose to offer, +# and charge a fee for, acceptance of support, warranty, indemnity, +# or other liability obligations and/or rights consistent with this +# License. However, in accepting such obligations, You may act only +# on Your own behalf and on Your sole responsibility, not on behalf +# of any other Contributor, and only if You agree to indemnify, +# defend, and hold each Contributor harmless for any liability +# incurred by, or claims asserted against, such Contributor by reason +# of your accepting any such warranty or additional liability. +# +# END OF TERMS AND CONDITIONS +# +# APPENDIX: How to apply the Apache License to your work. +# +# To apply the Apache License to your work, attach the following +# boilerplate notice, with the fields enclosed by brackets "[]" +# replaced with your own identifying information. (Don't include +# the brackets!) The text should be enclosed in the appropriate +# comment syntax for the file format. We also recommend that a +# file or class name and description of purpose be included on the +# same "printed page" as the copyright notice for easier +# identification within third-party archives. +# +# Copyright [yyyy] [name of copyright owner] +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import time + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.model_zoo as model_zoo + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3mb4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=7, scale=1): + self.inplanes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu1 = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, return_indices=False) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + # self.avgpool = nn.AvgPool2d(7, stride=1) + # self.fc = nn.Linear(512 * block.expansion, num_classes) + + # Top layer + self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # Reduce channels + self.toplayer_bn = nn.BatchNorm2d(256) + self.toplayer_relu = nn.ReLU(inplace=True) + + # Smooth layers + self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) + self.smooth1_bn = nn.BatchNorm2d(256) + self.smooth1_relu = nn.ReLU(inplace=True) + + self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) + self.smooth2_bn = nn.BatchNorm2d(256) + self.smooth2_relu = nn.ReLU(inplace=True) + + self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) + self.smooth3_bn = nn.BatchNorm2d(256) + self.smooth3_relu = nn.ReLU(inplace=True) + + # Lateral layers + self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) + self.latlayer1_bn = nn.BatchNorm2d(256) + self.latlayer1_relu = nn.ReLU(inplace=True) + + self.latlayer2 = nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0) + self.latlayer2_bn = nn.BatchNorm2d(256) + self.latlayer2_relu = nn.ReLU(inplace=True) + + self.latlayer3 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0) + self.latlayer3_bn = nn.BatchNorm2d(256) + self.latlayer3_relu = nn.ReLU(inplace=True) + + self.conv2 = nn.Conv2d(1024, 256, kernel_size=3, stride=1, padding=1) + self.bn2 = nn.BatchNorm2d(256) + self.relu2 = nn.ReLU(inplace=True) + self.conv3 = nn.Conv2d(256, num_classes, kernel_size=1, stride=1, padding=0) + self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) + + self.scale = scale + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def _upsample(self, x, y, scale=1): + _, _, H, W = y.size() + # return self.upsample(x) + return F.interpolate(x, size=(H // scale, W // scale), mode='nearest') + + def _upsample_add(self, x, y): + _, _, H, W = y.size() + # return F.interpolate(x, size=(H, W), mode='bilinear', align_corners=False).half() + y + return F.interpolate(x, size=(H, W), mode='nearest') + y + + def forward(self, x): + h = x + h = self.conv1(h) + h = self.bn1(h) + h = self.relu1(h) + + #h = h.float() + h = self.maxpool(h) + #h = h.half() + # self.maxpool = self.maxpool.cpu() + # h = self.maxpool(h.cpu()) + # h = h.npu() + # self.maxpool = self.maxpool.npu() + + h = self.layer1(h) + c2 = h + h = self.layer2(h) + c3 = h + h = self.layer3(h) + c4 = h + h = self.layer4(h) + c5 = h + + # Top-down + p5 = self.toplayer(c5) + p5 = self.toplayer_relu(self.toplayer_bn(p5)) + + c4 = self.latlayer1(c4) + c4 = self.latlayer1_relu(self.latlayer1_bn(c4)) + t = time.time() + # print('c4:',c4.type()) + p4 = self._upsample_add(p5, c4) + p4 = self.smooth1(p4) + p4 = self.smooth1_relu(self.smooth1_bn(p4)) + + c3 = self.latlayer2(c3) + c3 = self.latlayer2_relu(self.latlayer2_bn(c3)) + t = time.time() + # print('t:',t) + p3 = self._upsample_add(p4, c3) + # print('t2:',time.time()-t) + p3 = self.smooth2(p3) + p3 = self.smooth2_relu(self.smooth2_bn(p3)) + + c2 = self.latlayer3(c2) + c2 = self.latlayer3_relu(self.latlayer3_bn(c2)) + p2 = self._upsample_add(p3, c2) + p2 = self.smooth3(p2) + p2 = self.smooth3_relu(self.smooth3_bn(p2)) + + p3 = self._upsample(p3, p2) + p4 = self._upsample(p4, p2) + p5 = self._upsample(p5, p2) + + # p2, p3, p4, p5 = p2.cpu(), p3.cpu(), p4.cpu(), p5.cpu() + out = torch.cat((p2, p3, p4, p5), 1) + # out = out.to(dev) + + # self.conv2 = self.conv2.cpu() + out = self.conv2(out) + # self.conv2 = self.conv2.to(dev) + + # self.relu2 = self.relu2.cpu() + # self.bn2 = self.bn2.cpu() + out = self.relu2(self.bn2(out)) + # self.relu2 = self.relu2.to(dev) + # self.bn2 = self.bn2.to(dev) + + # self.conv3 = self.conv3.cpu() + out = self.conv3(out) + # self.conv3 = self.conv3.to(dev) + + # out = out.to(dev) + # p2, p3, p4, p5 = p2.to(dev), p3.to(dev), p4.to(dev), p5.to(dev) + out = self._upsample(out, x, scale=self.scale) + + return out + + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) + return model + + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) + return model + + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + pretrained_model = model_zoo.load_url(model_urls['resnet50']) + state = model.state_dict() + for key in state.keys(): + if key in pretrained_model.keys(): + state[key] = pretrained_model[key] + model.load_state_dict(state) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + pretrained_model = model_zoo.load_url(model_urls['resnet101']) + state = model.state_dict() + for key in state.keys(): + if key in pretrained_model.keys(): + state[key] = pretrained_model[key] + model.load_state_dict(state) + return model + + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + pretrained_model = model_zoo.load_url(model_urls['resnet152']) + state = model.state_dict() + for key in state.keys(): + if key in pretrained_model.keys(): + state[key] = pretrained_model[key] + model.load_state_dict(state) + return model diff --git a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/preprocess_psenet_pytorch.py b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/preprocess_psenet_pytorch.py index 4ae58b56f5a72ddf1320dd3ea2621dd570c52020..dfad45a0096b0d2147277aeac149bcbe4495903a 100644 --- a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/preprocess_psenet_pytorch.py +++ b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/preprocess_psenet_pytorch.py @@ -1,50 +1,50 @@ -import os -import sys -import numpy as np -import cv2 -from PIL import Image - - -def scale(img, long_size=2240): - h, w = img.shape[0:2] - scale = long_size * 1.0 / max(h, w) - img = cv2.resize(img, dsize=None, fx=scale, fy=scale) - # img = cv2.resize(img, (1260, 2240)) - print(img.shape) - return img - - -def psenet_onnx(file_path, bin_path): - if not os.path.exists(bin_path): - os.makedirs(bin_path) - i = 0 - in_files = os.listdir(file_path) - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - for file in in_files: - i = i + 1 - print(file, "====", i) - img = cv2.imread(os.path.join(file_path, file)) - img = img[:, :, [2, 1, 0]] # bgr -> rgb - # img = scale(img) - img = cv2.resize(img, (1216, 704)) - - img = np.array(img, dtype=np.float32) - img = img / 255. - - # 均值方差 - img[..., 0] -= mean[0] - img[..., 1] -= mean[1] - img[..., 2] -= mean[2] - img[..., 0] /= std[0] - img[..., 1] /= std[1] - img[..., 2] /= std[2] - - img = img.transpose(2, 0, 1) # HWC -> CHW - img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) - - -if __name__ == "__main__": - file_path = os.path.abspath(sys.argv[1]) - bin_path = os.path.abspath(sys.argv[2]) - psenet_onnx(file_path, bin_path) +import os +import sys +import numpy as np +import cv2 +from PIL import Image + + +def scale(img, long_size=2240): + h, w = img.shape[0:2] + scale = long_size * 1.0 / max(h, w) + img = cv2.resize(img, dsize=None, fx=scale, fy=scale) + # img = cv2.resize(img, (1260, 2240)) + print(img.shape) + return img + + +def psenet_onnx(file_path, bin_path): + if not os.path.exists(bin_path): + os.makedirs(bin_path) + i = 0 + in_files = os.listdir(file_path) + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + for file in in_files: + i = i + 1 + print(file, "====", i) + img = cv2.imread(os.path.join(file_path, file)) + img = img[:, :, [2, 1, 0]] # bgr -> rgb + # img = scale(img) + img = cv2.resize(img, (1216, 704)) + + img = np.array(img, dtype=np.float32) + img = img / 255. + + # 均值方差 + img[..., 0] -= mean[0] + img[..., 1] -= mean[1] + img[..., 2] -= mean[2] + img[..., 0] /= std[0] + img[..., 1] /= std[1] + img[..., 2] /= std[2] + + img = img.transpose(2, 0, 1) # HWC -> CHW + img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) + + +if __name__ == "__main__": + file_path = os.path.abspath(sys.argv[1]) + bin_path = os.path.abspath(sys.argv[2]) + psenet_onnx(file_path, bin_path) diff --git a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/pth_bintotxt_bilinear.py b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/pth_bintotxt_bilinear.py index ef040d7f9321578cc033e894f0d574ae2e22f888..b88459ff3352f46ad4abc1d63e03403fb0af7b8d 100644 --- a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/pth_bintotxt_bilinear.py +++ b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/pth_bintotxt_bilinear.py @@ -1,105 +1,105 @@ -import os -import sys -import numpy as np -import torch -import cv2 -from pypse import pse as pypse -import torch.nn.functional as F - - -img_path = sys.argv[1] -bin_path = sys.argv[2] -txt_path = sys.argv[3] - -if not os.path.exists(txt_path): - os.makedirs(txt_path) - -kernel_num=7 -min_kernel_area=5.0 -scale=1 -min_score = 0.9 -min_area = 600 - - -def get_images(): - ''' - find image files in test data path - :return: list of files found - ''' - files = [] - exts = ['jpg', 'png', 'jpeg', 'JPG'] - - for parent, _, filenames in os.walk(img_path): - for filename in filenames: - for ext in exts: - if filename.endswith(ext): - files.append(os.path.join(parent, filename)) - break - return files - - -im_fn_list = get_images() -for im_fn in im_fn_list: - print(im_fn) - im = cv2.imread(im_fn) - idx = os.path.basename(im_fn).split('/')[-1].split('.')[0].split('_')[1] - seg_maps = np.fromfile(bin_path+"/img_{}_1.bin".format(idx), "float32") - seg_maps = np.reshape(seg_maps, (1, 7, 176, 304)) - seg_maps = torch.from_numpy(seg_maps) - - # Resize 算子 - seg_maps = F.interpolate(seg_maps, size=(704, 1216), mode='bilinear', align_corners=False) - # print(seg_maps) - # print(seg_maps.shape) - # - # seg_maps = seg_maps.float() - score = torch.sigmoid(seg_maps[:, 0, :, :]) - outputs = (torch.sign(seg_maps - 1.0) + 1) / 2 - - text = outputs[:, 0, :, :] - kernels = outputs[:, 0:kernel_num, :, :] * text - - score = score.data.numpy()[0].astype(np.float32) - text = text.data.numpy()[0].astype(np.uint8) - kernels = kernels.numpy()[0].astype(np.uint8) - - # python version pse - pred = pypse(kernels, min_kernel_area / (scale * scale)) - - img_scale = (im.shape[1] * 1.0 / pred.shape[1], im.shape[0] * 1.0 / pred.shape[0]) - label = pred - label_num = np.max(label) + 1 - bboxes = [] - - for i in range(1, label_num): - points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1] - - if points.shape[0] < min_area: - continue - - score_i = np.mean(score[label == i]) - if score_i < min_score: - continue - - rect = cv2.minAreaRect(points) - bbox = cv2.boxPoints(rect) * img_scale - bbox = bbox.astype('int32') - bboxes.append(bbox.reshape(-1)) - # print(bboxes) - # save txt - res_file = os.path.join(txt_path,'{}.txt'.format(os.path.splitext(os.path.basename(im_fn))[0])) - with open(res_file, 'w') as f: - for b_idx, bbox in enumerate(bboxes): - values = [int(v) for v in bbox] - line = "%d, %d, %d, %d, %d, %d, %d, %d\n" % tuple(values) - f.write(line) - - - # show result - # for bbox in bboxes: - # cv2.drawContours(im, [bbox.reshape(4, 2)], -1, (0, 255, 0), 2) - # cv2.namedWindow('result', cv2.WINDOW_NORMAL) - # cv2.imshow('result', im) - # cv2.waitKey() - - +import os +import sys +import numpy as np +import torch +import cv2 +from pypse import pse as pypse +import torch.nn.functional as F + + +img_path = sys.argv[1] +bin_path = sys.argv[2] +txt_path = sys.argv[3] + +if not os.path.exists(txt_path): + os.makedirs(txt_path) + +kernel_num=7 +min_kernel_area=5.0 +scale=1 +min_score = 0.9 +min_area = 600 + + +def get_images(): + ''' + find image files in test data path + :return: list of files found + ''' + files = [] + exts = ['jpg', 'png', 'jpeg', 'JPG'] + + for parent, _, filenames in os.walk(img_path): + for filename in filenames: + for ext in exts: + if filename.endswith(ext): + files.append(os.path.join(parent, filename)) + break + return files + + +im_fn_list = get_images() +for im_fn in im_fn_list: + print(im_fn) + im = cv2.imread(im_fn) + idx = os.path.basename(im_fn).split('/')[-1].split('.')[0].split('_')[1] + seg_maps = np.fromfile(bin_path+"/img_{}_1.bin".format(idx), "float32") + seg_maps = np.reshape(seg_maps, (1, 7, 176, 304)) + seg_maps = torch.from_numpy(seg_maps) + + # Resize 算子 + seg_maps = F.interpolate(seg_maps, size=(704, 1216), mode='bilinear', align_corners=False) + # print(seg_maps) + # print(seg_maps.shape) + # + # seg_maps = seg_maps.float() + score = torch.sigmoid(seg_maps[:, 0, :, :]) + outputs = (torch.sign(seg_maps - 1.0) + 1) / 2 + + text = outputs[:, 0, :, :] + kernels = outputs[:, 0:kernel_num, :, :] * text + + score = score.data.numpy()[0].astype(np.float32) + text = text.data.numpy()[0].astype(np.uint8) + kernels = kernels.numpy()[0].astype(np.uint8) + + # python version pse + pred = pypse(kernels, min_kernel_area / (scale * scale)) + + img_scale = (im.shape[1] * 1.0 / pred.shape[1], im.shape[0] * 1.0 / pred.shape[0]) + label = pred + label_num = np.max(label) + 1 + bboxes = [] + + for i in range(1, label_num): + points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1] + + if points.shape[0] < min_area: + continue + + score_i = np.mean(score[label == i]) + if score_i < min_score: + continue + + rect = cv2.minAreaRect(points) + bbox = cv2.boxPoints(rect) * img_scale + bbox = bbox.astype('int32') + bboxes.append(bbox.reshape(-1)) + # print(bboxes) + # save txt + res_file = os.path.join(txt_path,'{}.txt'.format(os.path.splitext(os.path.basename(im_fn))[0])) + with open(res_file, 'w') as f: + for b_idx, bbox in enumerate(bboxes): + values = [int(v) for v in bbox] + line = "%d, %d, %d, %d, %d, %d, %d, %d\n" % tuple(values) + f.write(line) + + + # show result + # for bbox in bboxes: + # cv2.drawContours(im, [bbox.reshape(4, 2)], -1, (0, 255, 0), 2) + # cv2.namedWindow('result', cv2.WINDOW_NORMAL) + # cv2.imshow('result', im) + # cv2.waitKey() + + diff --git a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/pth_bintotxt_nearest.py b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/pth_bintotxt_nearest.py index 13240c3f4f39733c2e1c984aa5175bdad516fca9..813bf982af3727544ff69c9aff1312c9a6c9f759 100644 --- a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/pth_bintotxt_nearest.py +++ b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/pth_bintotxt_nearest.py @@ -1,105 +1,105 @@ -import os -import sys -import numpy as np -import torch -import cv2 -from pypse import pse as pypse -import torch.nn.functional as F - - -img_path = sys.argv[1] -bin_path = sys.argv[2] -txt_path = sys.argv[3] - -if not os.path.exists(txt_path): - os.makedirs(txt_path) - -kernel_num=7 -min_kernel_area=5.0 -scale=1 -min_score = 0.9 -min_area = 600 - - -def get_images(): - ''' - find image files in test data path - :return: list of files found - ''' - files = [] - exts = ['jpg', 'png', 'jpeg', 'JPG'] - - for parent, _, filenames in os.walk(img_path): - for filename in filenames: - for ext in exts: - if filename.endswith(ext): - files.append(os.path.join(parent, filename)) - break - return files - - -im_fn_list = get_images() -for im_fn in im_fn_list: - print(im_fn) - im = cv2.imread(im_fn) - idx = os.path.basename(im_fn).split('/')[-1].split('.')[0].split('_')[1] - seg_maps = np.fromfile(bin_path+"/img_{}_1.bin".format(idx), "float32") - seg_maps = np.reshape(seg_maps, (1, 7, 704, 1216)) - seg_maps = torch.from_numpy(seg_maps) - - # Resize 算子 - # seg_maps = F.interpolate(seg_maps, size=(704, 1216), mode='bilinear', align_corners=False) - # print(seg_maps) - # print(seg_maps.shape) - # - # seg_maps = seg_maps.float() - score = torch.sigmoid(seg_maps[:, 0, :, :]) - outputs = (torch.sign(seg_maps - 1.0) + 1) / 2 - - text = outputs[:, 0, :, :] - kernels = outputs[:, 0:kernel_num, :, :] * text - - score = score.data.numpy()[0].astype(np.float32) - text = text.data.numpy()[0].astype(np.uint8) - kernels = kernels.numpy()[0].astype(np.uint8) - - # python version pse - pred = pypse(kernels, min_kernel_area / (scale * scale)) - - img_scale = (im.shape[1] * 1.0 / pred.shape[1], im.shape[0] * 1.0 / pred.shape[0]) - label = pred - label_num = np.max(label) + 1 - bboxes = [] - - for i in range(1, label_num): - points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1] - - if points.shape[0] < min_area: - continue - - score_i = np.mean(score[label == i]) - if score_i < min_score: - continue - - rect = cv2.minAreaRect(points) - bbox = cv2.boxPoints(rect) * img_scale - bbox = bbox.astype('int32') - bboxes.append(bbox.reshape(-1)) - # print(bboxes) - # save txt - res_file = os.path.join(txt_path,'{}.txt'.format(os.path.splitext(os.path.basename(im_fn))[0])) - with open(res_file, 'w') as f: - for b_idx, bbox in enumerate(bboxes): - values = [int(v) for v in bbox] - line = "%d, %d, %d, %d, %d, %d, %d, %d\n" % tuple(values) - f.write(line) - - - # show result - # for bbox in bboxes: - # cv2.drawContours(im, [bbox.reshape(4, 2)], -1, (0, 255, 0), 2) - # cv2.namedWindow('result', cv2.WINDOW_NORMAL) - # cv2.imshow('result', im) - # cv2.waitKey() - - +import os +import sys +import numpy as np +import torch +import cv2 +from pypse import pse as pypse +import torch.nn.functional as F + + +img_path = sys.argv[1] +bin_path = sys.argv[2] +txt_path = sys.argv[3] + +if not os.path.exists(txt_path): + os.makedirs(txt_path) + +kernel_num=7 +min_kernel_area=5.0 +scale=1 +min_score = 0.9 +min_area = 600 + + +def get_images(): + ''' + find image files in test data path + :return: list of files found + ''' + files = [] + exts = ['jpg', 'png', 'jpeg', 'JPG'] + + for parent, _, filenames in os.walk(img_path): + for filename in filenames: + for ext in exts: + if filename.endswith(ext): + files.append(os.path.join(parent, filename)) + break + return files + + +im_fn_list = get_images() +for im_fn in im_fn_list: + print(im_fn) + im = cv2.imread(im_fn) + idx = os.path.basename(im_fn).split('/')[-1].split('.')[0].split('_')[1] + seg_maps = np.fromfile(bin_path+"/img_{}_1.bin".format(idx), "float32") + seg_maps = np.reshape(seg_maps, (1, 7, 704, 1216)) + seg_maps = torch.from_numpy(seg_maps) + + # Resize 算子 + # seg_maps = F.interpolate(seg_maps, size=(704, 1216), mode='bilinear', align_corners=False) + # print(seg_maps) + # print(seg_maps.shape) + # + # seg_maps = seg_maps.float() + score = torch.sigmoid(seg_maps[:, 0, :, :]) + outputs = (torch.sign(seg_maps - 1.0) + 1) / 2 + + text = outputs[:, 0, :, :] + kernels = outputs[:, 0:kernel_num, :, :] * text + + score = score.data.numpy()[0].astype(np.float32) + text = text.data.numpy()[0].astype(np.uint8) + kernels = kernels.numpy()[0].astype(np.uint8) + + # python version pse + pred = pypse(kernels, min_kernel_area / (scale * scale)) + + img_scale = (im.shape[1] * 1.0 / pred.shape[1], im.shape[0] * 1.0 / pred.shape[0]) + label = pred + label_num = np.max(label) + 1 + bboxes = [] + + for i in range(1, label_num): + points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1] + + if points.shape[0] < min_area: + continue + + score_i = np.mean(score[label == i]) + if score_i < min_score: + continue + + rect = cv2.minAreaRect(points) + bbox = cv2.boxPoints(rect) * img_scale + bbox = bbox.astype('int32') + bboxes.append(bbox.reshape(-1)) + # print(bboxes) + # save txt + res_file = os.path.join(txt_path,'{}.txt'.format(os.path.splitext(os.path.basename(im_fn))[0])) + with open(res_file, 'w') as f: + for b_idx, bbox in enumerate(bboxes): + values = [int(v) for v in bbox] + line = "%d, %d, %d, %d, %d, %d, %d, %d\n" % tuple(values) + f.write(line) + + + # show result + # for bbox in bboxes: + # cv2.drawContours(im, [bbox.reshape(4, 2)], -1, (0, 255, 0), 2) + # cv2.namedWindow('result', cv2.WINDOW_NORMAL) + # cv2.imshow('result', im) + # cv2.waitKey() + + diff --git a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/pypse.py b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/pypse.py index 3200742b96b7c06f07ef9c78509a6b65317cbb84..70436680e1877cc6e06b3c2418bb6e749cb93425 100644 --- a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/pypse.py +++ b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/pypse.py @@ -1,56 +1,56 @@ -import numpy as np -import cv2 -import queue as Queue - -def pse(kernals, min_area): - kernal_num = len(kernals) - pred = np.zeros(kernals[0].shape, dtype='int32') - - label_num, label = cv2.connectedComponents(kernals[kernal_num - 1], connectivity=4) - - for label_idx in range(1, label_num): - if np.sum(label == label_idx) < min_area: - label[label == label_idx] = 0 - - queue = Queue.Queue(maxsize = 0) - next_queue = Queue.Queue(maxsize = 0) - points = np.array(np.where(label > 0)).transpose((1, 0)) - - for point_idx in range(points.shape[0]): - x, y = points[point_idx, 0], points[point_idx, 1] - l = label[x, y] - queue.put((x, y, l)) - pred[x, y] = l - - dx = [-1, 1, 0, 0] - dy = [0, 0, -1, 1] - for kernal_idx in range(kernal_num - 2, -1, -1): - kernal = kernals[kernal_idx].copy() - while not queue.empty(): - (x, y, l) = queue.get() - - is_edge = True - for j in range(4): - tmpx = x + dx[j] - tmpy = y + dy[j] - if tmpx < 0 or tmpx >= kernal.shape[0] or tmpy < 0 or tmpy >= kernal.shape[1]: - continue - if kernal[tmpx, tmpy] == 0 or pred[tmpx, tmpy] > 0: - continue - - queue.put((tmpx, tmpy, l)) - pred[tmpx, tmpy] = l - is_edge = False - if is_edge: - next_queue.put((x, y, l)) - - # kernal[pred > 0] = 0 - queue, next_queue = next_queue, queue - - # points = np.array(np.where(pred > 0)).transpose((1, 0)) - # for point_idx in range(points.shape[0]): - # x, y = points[point_idx, 0], points[point_idx, 1] - # l = pred[x, y] - # queue.put((x, y, l)) - +import numpy as np +import cv2 +import queue as Queue + +def pse(kernals, min_area): + kernal_num = len(kernals) + pred = np.zeros(kernals[0].shape, dtype='int32') + + label_num, label = cv2.connectedComponents(kernals[kernal_num - 1], connectivity=4) + + for label_idx in range(1, label_num): + if np.sum(label == label_idx) < min_area: + label[label == label_idx] = 0 + + queue = Queue.Queue(maxsize = 0) + next_queue = Queue.Queue(maxsize = 0) + points = np.array(np.where(label > 0)).transpose((1, 0)) + + for point_idx in range(points.shape[0]): + x, y = points[point_idx, 0], points[point_idx, 1] + l = label[x, y] + queue.put((x, y, l)) + pred[x, y] = l + + dx = [-1, 1, 0, 0] + dy = [0, 0, -1, 1] + for kernal_idx in range(kernal_num - 2, -1, -1): + kernal = kernals[kernal_idx].copy() + while not queue.empty(): + (x, y, l) = queue.get() + + is_edge = True + for j in range(4): + tmpx = x + dx[j] + tmpy = y + dy[j] + if tmpx < 0 or tmpx >= kernal.shape[0] or tmpy < 0 or tmpy >= kernal.shape[1]: + continue + if kernal[tmpx, tmpy] == 0 or pred[tmpx, tmpy] > 0: + continue + + queue.put((tmpx, tmpy, l)) + pred[tmpx, tmpy] = l + is_edge = False + if is_edge: + next_queue.put((x, y, l)) + + # kernal[pred > 0] = 0 + queue, next_queue = next_queue, queue + + # points = np.array(np.where(pred > 0)).transpose((1, 0)) + # for point_idx in range(points.shape[0]): + # x, y = points[point_idx, 0], points[point_idx, 1] + # l = pred[x, y] + # queue.put((x, y, l)) + return pred \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/revise_PSENET_bilinear.py b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/revise_PSENET_bilinear.py index b9b81a56887eff43dafc93ed269d491605e94720..8ce2e8a367a42e173e31589ed7441f3d2c37dc62 100644 --- a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/revise_PSENET_bilinear.py +++ b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/revise_PSENET_bilinear.py @@ -1,296 +1,296 @@ -import numpy as np -import onnx -import onnxruntime as rt -from onnx import shape_inference -import sys -model_path = sys.argv[1] -model = onnx.load(model_path) - -# model = onnx.shape_inference.infer_shapes(model) - -def getNodeAndIOname(nodename,model): - for i in range(len(model.graph.node)): - if model.graph.node[i].name == nodename: - Node = model.graph.node[i] - return Node,input_name,output_name - -def FindPeerOutNode(graph, edge_name): - for i, x in enumerate(graph.node): - if edge_name in x.output: - return i - return -1 - - -def RemoveNode(graph, node_list): - cnt = 0 - for i in range(len(model.graph.node)): - if model.graph.node[i].name in node_list: - graph.node.remove(graph.node[i - cnt]) # 因为节点个数变少了 - cnt += 1 -def FindDependNode(graph, end_node, start_node): - ''' - find dependency node, [end_node, start_node) - ''' - def dfs(graph, idx, start_node, n_list): - for edge in graph.node[idx].input: - node_idx = FindPeerOutNode(graph, edge) - if node_idx < 0: - # print('bad peerout index') - continue - n = graph.node[node_idx] - if n.name != start_node: - n_list.append(n.name) - # print('n.name', n.name) - n_list = dfs(graph, node_idx, start_node, n_list) - return n_list - - index = GetNodeIndex(graph, end_node) - n_list = [end_node, ] - return dfs(graph, index, start_node, n_list) - - -def createGraphMemberMap(graph_member_list): - member_map=dict(); - for n in graph_member_list: - member_map[n.name]=n; - return member_map - -def GetNodeIndex(graph, node_name): - index = 0 - for i in range(len(graph.node)): - if graph.node[i].name == node_name: - index = i - break - return index -def RemoveNode2(graph,node_list): - for name in node_list: - print("name",name) - ind = GetNodeIndex(graph,name) - print("ind:",ind) - graph.node.remove(graph.node[ind]) - - -for i in range(len(model.graph.node)): - if model.graph.node[i].op_type == "Resize": - print("Resize", i, model.graph.node[i].input, model.graph.node[i].output) - -sizes1 = onnx.helper.make_tensor('size1', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) -sizes2 = onnx.helper.make_tensor('size2', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) -sizes3 = onnx.helper.make_tensor('size3', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) -sizes4 = onnx.helper.make_tensor('size4', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) -sizes5 = onnx.helper.make_tensor('size5', onnx.TensorProto.FLOAT, [4], [1, 1, 4, 4]) -sizes6 = onnx.helper.make_tensor('size6', onnx.TensorProto.FLOAT, [4], [1, 1, 8, 8]) -sizes7 = onnx.helper.make_tensor('size7', onnx.TensorProto.FLOAT, [4], [1, 1, 4, 4]) - - -model.graph.initializer.append(sizes1) -model.graph.initializer.append(sizes2) -model.graph.initializer.append(sizes3) -model.graph.initializer.append(sizes4) -model.graph.initializer.append(sizes5) -model.graph.initializer.append(sizes6) -model.graph.initializer.append(sizes7) - - -newnode = onnx.helper.make_node( - 'Resize', - name='Resize_196', - # inputs=['551', '564', '572', 'size1'], - inputs=['551', '564', 'size1'], - outputs=['573'], - coordinate_transformation_mode='pytorch_half_pixel', - cubic_coeff_a=-0.75, - mode='nearest', - nearest_mode='floor' -) - -newnode2 = onnx.helper.make_node( - 'Resize', - name='Resize_224', - # inputs=['347', '367', '375', 'size2'], - inputs=['579', '592', 'size2'], - outputs=['601'], - coordinate_transformation_mode='pytorch_half_pixel', - cubic_coeff_a=-0.75, - mode='nearest', - nearest_mode='floor' -) - -newnode3 = onnx.helper.make_node( - 'Resize', - name='Resize_252', - # inputs=['347', '367', '375', 'size2'], - inputs=['607', '620', 'size3'], - outputs=['629'], - coordinate_transformation_mode='pytorch_half_pixel', - cubic_coeff_a=-0.75, - mode='nearest', - nearest_mode='floor' -) - -newnode4 = onnx.helper.make_node( - 'Resize', - name='Resize_285', - # inputs=['347', '367', '375', 'size2'], - inputs=['607', '653', 'size4'], - outputs=['662'], - coordinate_transformation_mode='pytorch_half_pixel', - cubic_coeff_a=-0.75, - mode='nearest', - nearest_mode='floor' -) - - -newnode5 = onnx.helper.make_node( - 'Resize', - name='Resize_312', - # inputs=['347', '367', '375', 'size2'], - inputs=['579', '680', 'size5'], - outputs=['689'], - coordinate_transformation_mode='pytorch_half_pixel', - cubic_coeff_a=-0.75, - mode='nearest', - nearest_mode='floor' -) - - -newnode6 = onnx.helper.make_node( - 'Resize', - name='Resize_339', - # inputs=['347', '367', '375', 'size2'], - inputs=['551', '707', 'size6'], - outputs=['716'], - coordinate_transformation_mode='pytorch_half_pixel', - cubic_coeff_a=-0.75, - mode='nearest', - nearest_mode='floor' -) - - -newnode7= onnx.helper.make_node( - 'Resize', - name='Resize_371', - # inputs=['347', '367', '375', 'size2'], - inputs=['721', '739', 'size7'], - outputs=['output1'], - coordinate_transformation_mode='pytorch_half_pixel', - cubic_coeff_a=-0.75, - mode='nearest', - nearest_mode='floor' -) - - - - -model.graph.node.remove(model.graph.node[196]) -model.graph.node.insert(196, newnode) - -model.graph.node.remove(model.graph.node[224]) -model.graph.node.insert(224, newnode2) - -model.graph.node.remove(model.graph.node[252]) -model.graph.node.insert(252, newnode3) - -model.graph.node.remove(model.graph.node[285]) -model.graph.node.insert(285, newnode4) - -model.graph.node.remove(model.graph.node[312]) -model.graph.node.insert(312, newnode5) - -model.graph.node.remove(model.graph.node[339]) -model.graph.node.insert(339, newnode6) - -model.graph.node.remove(model.graph.node[371]) -model.graph.node.insert(371, newnode7) - -slice_node1_1 = FindDependNode(model.graph, 'Slice_192', 'Relu_174') #结尾(will be deleted) qishi -print('node map:', slice_node1_1) - -slice_node1_2 = FindDependNode(model.graph, 'Cast_193', 'Relu_177') -print('node map:', slice_node1_2) - -slice_node2_1 = FindDependNode(model.graph, 'Slice_220', 'Relu_202') -print('node map:', slice_node2_1) - -slice_node2_2 = FindDependNode(model.graph, 'Cast_221', 'Relu_205') -print('node map:', slice_node2_2) - -slice_node3_1 = FindDependNode(model.graph, 'Slice_248', 'Relu_230') -print('node map:', slice_node3_1) -slice_node3_2 = FindDependNode(model.graph, 'Cast_249', 'Relu_233') -print('node map:', slice_node3_2) - - -slice_node4_1 = FindDependNode(model.graph, 'Slice_281', 'Relu_230') -print('node map:', slice_node4_1) -slice_node4_2 = FindDependNode(model.graph, 'Cast_282', 'Relu_258') -print('node map:', slice_node4_2) - - -slice_node5_1 = FindDependNode(model.graph, 'Slice_308', 'Relu_202') -print('node map:', slice_node5_1) -slice_node5_2 = FindDependNode(model.graph, 'Cast_309', 'Relu_258') -print('node map:', slice_node5_2) - -slice_node6_1 = FindDependNode(model.graph, 'Slice_335', 'Relu_174') -print('node map:', slice_node6_1) -slice_node6_2 = FindDependNode(model.graph, 'Cast_336', 'Relu_258') -print('node map:', slice_node6_2) - -slice_node7_1 = FindDependNode(model.graph, 'Slice_367', 'Conv_344') -print('node map:', slice_node7_1) -slice_node7_2 = FindDependNode(model.graph, 'Cast_368', 'actual_input_1') -print('node map:', slice_node7_2) - - -node_list = [] -node_list.extend(slice_node1_1) -node_list.extend(slice_node1_2) -node_list.extend(slice_node2_1) -node_list.extend(slice_node2_2) -node_list.extend(slice_node3_1) -node_list.extend(slice_node3_2) -node_list.extend(slice_node4_1) -node_list.extend(slice_node4_2) -node_list.extend(slice_node5_1) -node_list.extend(slice_node5_2) -node_list.extend(slice_node6_1) -node_list.extend(slice_node6_2) -node_list.extend(slice_node7_1) -node_list.extend(slice_node7_2) -node_list.extend(['Concat_194']) - -node_list.extend(['Concat_222']) - -node_list.extend(['Concat_250']) - -node_list.extend(['Concat_283']) - -node_list.extend(['Concat_337']) - -node_list.extend(['Concat_369']) -node_list.extend(['Concat_310']) -#node_list.extend(['Concat_308','Constant_140','Constant_166','Constant_192','Constant_224','Constant_251','Constant_278','Constant_301','Constant_309']) -print(node_list) -RemoveNode2(model.graph, node_list) - -#移除最后一个Resize -# 去除最后一个resize节点 -node_list=[] -node_list.extend(['Resize_371']) -print(node_list) -RemoveNode2(model.graph, node_list) #将最后一个Resize节点移除 -#将ouput1移除,并建立一个新的,插入进去 - -out0_info = onnx.helper.make_tensor_value_info('721', onnx.TensorProto.FLOAT, [-1, 7, 176, 304]) -model.graph.output.remove(model.graph.output[0]) -model.graph.output.insert(0, out0_info) - -onnx.checker.check_model(model) - - -onnx.save(model, sys.argv[1].split('.')[0] + "_revised.onnx") -# m = onnx.load("modify.onnx") - - - +import numpy as np +import onnx +import onnxruntime as rt +from onnx import shape_inference +import sys +model_path = sys.argv[1] +model = onnx.load(model_path) + +# model = onnx.shape_inference.infer_shapes(model) + +def getNodeAndIOname(nodename,model): + for i in range(len(model.graph.node)): + if model.graph.node[i].name == nodename: + Node = model.graph.node[i] + return Node,input_name,output_name + +def FindPeerOutNode(graph, edge_name): + for i, x in enumerate(graph.node): + if edge_name in x.output: + return i + return -1 + + +def RemoveNode(graph, node_list): + cnt = 0 + for i in range(len(model.graph.node)): + if model.graph.node[i].name in node_list: + graph.node.remove(graph.node[i - cnt]) # 因为节点个数变少了 + cnt += 1 +def FindDependNode(graph, end_node, start_node): + ''' + find dependency node, [end_node, start_node) + ''' + def dfs(graph, idx, start_node, n_list): + for edge in graph.node[idx].input: + node_idx = FindPeerOutNode(graph, edge) + if node_idx < 0: + # print('bad peerout index') + continue + n = graph.node[node_idx] + if n.name != start_node: + n_list.append(n.name) + # print('n.name', n.name) + n_list = dfs(graph, node_idx, start_node, n_list) + return n_list + + index = GetNodeIndex(graph, end_node) + n_list = [end_node, ] + return dfs(graph, index, start_node, n_list) + + +def createGraphMemberMap(graph_member_list): + member_map=dict(); + for n in graph_member_list: + member_map[n.name]=n; + return member_map + +def GetNodeIndex(graph, node_name): + index = 0 + for i in range(len(graph.node)): + if graph.node[i].name == node_name: + index = i + break + return index +def RemoveNode2(graph,node_list): + for name in node_list: + print("name",name) + ind = GetNodeIndex(graph,name) + print("ind:",ind) + graph.node.remove(graph.node[ind]) + + +for i in range(len(model.graph.node)): + if model.graph.node[i].op_type == "Resize": + print("Resize", i, model.graph.node[i].input, model.graph.node[i].output) + +sizes1 = onnx.helper.make_tensor('size1', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) +sizes2 = onnx.helper.make_tensor('size2', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) +sizes3 = onnx.helper.make_tensor('size3', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) +sizes4 = onnx.helper.make_tensor('size4', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) +sizes5 = onnx.helper.make_tensor('size5', onnx.TensorProto.FLOAT, [4], [1, 1, 4, 4]) +sizes6 = onnx.helper.make_tensor('size6', onnx.TensorProto.FLOAT, [4], [1, 1, 8, 8]) +sizes7 = onnx.helper.make_tensor('size7', onnx.TensorProto.FLOAT, [4], [1, 1, 4, 4]) + + +model.graph.initializer.append(sizes1) +model.graph.initializer.append(sizes2) +model.graph.initializer.append(sizes3) +model.graph.initializer.append(sizes4) +model.graph.initializer.append(sizes5) +model.graph.initializer.append(sizes6) +model.graph.initializer.append(sizes7) + + +newnode = onnx.helper.make_node( + 'Resize', + name='Resize_196', + # inputs=['551', '564', '572', 'size1'], + inputs=['551', '564', 'size1'], + outputs=['573'], + coordinate_transformation_mode='pytorch_half_pixel', + cubic_coeff_a=-0.75, + mode='nearest', + nearest_mode='floor' +) + +newnode2 = onnx.helper.make_node( + 'Resize', + name='Resize_224', + # inputs=['347', '367', '375', 'size2'], + inputs=['579', '592', 'size2'], + outputs=['601'], + coordinate_transformation_mode='pytorch_half_pixel', + cubic_coeff_a=-0.75, + mode='nearest', + nearest_mode='floor' +) + +newnode3 = onnx.helper.make_node( + 'Resize', + name='Resize_252', + # inputs=['347', '367', '375', 'size2'], + inputs=['607', '620', 'size3'], + outputs=['629'], + coordinate_transformation_mode='pytorch_half_pixel', + cubic_coeff_a=-0.75, + mode='nearest', + nearest_mode='floor' +) + +newnode4 = onnx.helper.make_node( + 'Resize', + name='Resize_285', + # inputs=['347', '367', '375', 'size2'], + inputs=['607', '653', 'size4'], + outputs=['662'], + coordinate_transformation_mode='pytorch_half_pixel', + cubic_coeff_a=-0.75, + mode='nearest', + nearest_mode='floor' +) + + +newnode5 = onnx.helper.make_node( + 'Resize', + name='Resize_312', + # inputs=['347', '367', '375', 'size2'], + inputs=['579', '680', 'size5'], + outputs=['689'], + coordinate_transformation_mode='pytorch_half_pixel', + cubic_coeff_a=-0.75, + mode='nearest', + nearest_mode='floor' +) + + +newnode6 = onnx.helper.make_node( + 'Resize', + name='Resize_339', + # inputs=['347', '367', '375', 'size2'], + inputs=['551', '707', 'size6'], + outputs=['716'], + coordinate_transformation_mode='pytorch_half_pixel', + cubic_coeff_a=-0.75, + mode='nearest', + nearest_mode='floor' +) + + +newnode7= onnx.helper.make_node( + 'Resize', + name='Resize_371', + # inputs=['347', '367', '375', 'size2'], + inputs=['721', '739', 'size7'], + outputs=['output1'], + coordinate_transformation_mode='pytorch_half_pixel', + cubic_coeff_a=-0.75, + mode='nearest', + nearest_mode='floor' +) + + + + +model.graph.node.remove(model.graph.node[196]) +model.graph.node.insert(196, newnode) + +model.graph.node.remove(model.graph.node[224]) +model.graph.node.insert(224, newnode2) + +model.graph.node.remove(model.graph.node[252]) +model.graph.node.insert(252, newnode3) + +model.graph.node.remove(model.graph.node[285]) +model.graph.node.insert(285, newnode4) + +model.graph.node.remove(model.graph.node[312]) +model.graph.node.insert(312, newnode5) + +model.graph.node.remove(model.graph.node[339]) +model.graph.node.insert(339, newnode6) + +model.graph.node.remove(model.graph.node[371]) +model.graph.node.insert(371, newnode7) + +slice_node1_1 = FindDependNode(model.graph, 'Slice_192', 'Relu_174') #结尾(will be deleted) qishi +print('node map:', slice_node1_1) + +slice_node1_2 = FindDependNode(model.graph, 'Cast_193', 'Relu_177') +print('node map:', slice_node1_2) + +slice_node2_1 = FindDependNode(model.graph, 'Slice_220', 'Relu_202') +print('node map:', slice_node2_1) + +slice_node2_2 = FindDependNode(model.graph, 'Cast_221', 'Relu_205') +print('node map:', slice_node2_2) + +slice_node3_1 = FindDependNode(model.graph, 'Slice_248', 'Relu_230') +print('node map:', slice_node3_1) +slice_node3_2 = FindDependNode(model.graph, 'Cast_249', 'Relu_233') +print('node map:', slice_node3_2) + + +slice_node4_1 = FindDependNode(model.graph, 'Slice_281', 'Relu_230') +print('node map:', slice_node4_1) +slice_node4_2 = FindDependNode(model.graph, 'Cast_282', 'Relu_258') +print('node map:', slice_node4_2) + + +slice_node5_1 = FindDependNode(model.graph, 'Slice_308', 'Relu_202') +print('node map:', slice_node5_1) +slice_node5_2 = FindDependNode(model.graph, 'Cast_309', 'Relu_258') +print('node map:', slice_node5_2) + +slice_node6_1 = FindDependNode(model.graph, 'Slice_335', 'Relu_174') +print('node map:', slice_node6_1) +slice_node6_2 = FindDependNode(model.graph, 'Cast_336', 'Relu_258') +print('node map:', slice_node6_2) + +slice_node7_1 = FindDependNode(model.graph, 'Slice_367', 'Conv_344') +print('node map:', slice_node7_1) +slice_node7_2 = FindDependNode(model.graph, 'Cast_368', 'actual_input_1') +print('node map:', slice_node7_2) + + +node_list = [] +node_list.extend(slice_node1_1) +node_list.extend(slice_node1_2) +node_list.extend(slice_node2_1) +node_list.extend(slice_node2_2) +node_list.extend(slice_node3_1) +node_list.extend(slice_node3_2) +node_list.extend(slice_node4_1) +node_list.extend(slice_node4_2) +node_list.extend(slice_node5_1) +node_list.extend(slice_node5_2) +node_list.extend(slice_node6_1) +node_list.extend(slice_node6_2) +node_list.extend(slice_node7_1) +node_list.extend(slice_node7_2) +node_list.extend(['Concat_194']) + +node_list.extend(['Concat_222']) + +node_list.extend(['Concat_250']) + +node_list.extend(['Concat_283']) + +node_list.extend(['Concat_337']) + +node_list.extend(['Concat_369']) +node_list.extend(['Concat_310']) +#node_list.extend(['Concat_308','Constant_140','Constant_166','Constant_192','Constant_224','Constant_251','Constant_278','Constant_301','Constant_309']) +print(node_list) +RemoveNode2(model.graph, node_list) + +#移除最后一个Resize +# 去除最后一个resize节点 +node_list=[] +node_list.extend(['Resize_371']) +print(node_list) +RemoveNode2(model.graph, node_list) #将最后一个Resize节点移除 +#将ouput1移除,并建立一个新的,插入进去 + +out0_info = onnx.helper.make_tensor_value_info('721', onnx.TensorProto.FLOAT, [-1, 7, 176, 304]) +model.graph.output.remove(model.graph.output[0]) +model.graph.output.insert(0, out0_info) + +onnx.checker.check_model(model) + + +onnx.save(model, sys.argv[1].split('.')[0] + "_revised.onnx") +# m = onnx.load("modify.onnx") + + + diff --git a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/revise_PSENET_nearest2.py b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/revise_PSENET_nearest2.py index 509e413da4ea0df6b666d137f9539f3636668324..241aad3caf85675a2c299d692477e37704513c52 100644 --- a/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/revise_PSENET_nearest2.py +++ b/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch/revise_PSENET_nearest2.py @@ -1,274 +1,274 @@ -import numpy as np -import onnx -import onnxruntime as rt -from onnx import shape_inference -import sys - - -model_path = sys.argv[1] -model = onnx.load(model_path) - - -def getNodeAndIOname(nodename,model): - for i in range(len(model.graph.node)): - if model.graph.node[i].name == nodename: - Node = model.graph.node[i] - return Node,input_name,output_name - -def FindPeerOutNode(graph, edge_name): - for i, x in enumerate(graph.node): - if edge_name in x.output: - return i - return -1 - - -def RemoveNode(graph, node_list): - cnt = 0 - for i in range(len(model.graph.node)): - if model.graph.node[i].name in node_list: - graph.node.remove(graph.node[i - cnt]) # 因为节点个数变少了 - cnt += 1 - - -def FindDependNode(graph, end_node, start_node): - ''' - find dependency node, [end_node, start_node) - ''' - def dfs(graph, idx, start_node, n_list): - for edge in graph.node[idx].input: - node_idx = FindPeerOutNode(graph, edge) - if node_idx < 0: - # print('bad peerout index') - continue - n = graph.node[node_idx] - if n.name != start_node: - n_list.append(n.name) - # print('n.name', n.name) - n_list = dfs(graph, node_idx, start_node, n_list) - return n_list - - index = GetNodeIndex(graph, end_node) - n_list = [end_node, ] - return dfs(graph, index, start_node, n_list) - - -def createGraphMemberMap(graph_member_list): - member_map=dict(); - for n in graph_member_list: - member_map[n.name]=n; - return member_map - -def GetNodeIndex(graph, node_name): - index = 0 - for i in range(len(graph.node)): - if graph.node[i].name == node_name: - index = i - break - return index -def RemoveNode2(graph,node_list): - for name in node_list: - print("name",name) - ind = GetNodeIndex(graph,name) - print("ind:",ind) - graph.node.remove(graph.node[ind]) - - -for i in range(len(model.graph.node)): - if model.graph.node[i].op_type == "Resize": - print("Resize", i, model.graph.node[i].input, model.graph.node[i].output) - -sizes1 = onnx.helper.make_tensor('size1', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) -sizes2 = onnx.helper.make_tensor('size2', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) -sizes3 = onnx.helper.make_tensor('size3', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) -sizes4 = onnx.helper.make_tensor('size4', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) -sizes5 = onnx.helper.make_tensor('size5', onnx.TensorProto.FLOAT, [4], [1, 1, 4, 4]) -sizes6 = onnx.helper.make_tensor('size6', onnx.TensorProto.FLOAT, [4], [1, 1, 8, 8]) -sizes7 = onnx.helper.make_tensor('size7', onnx.TensorProto.FLOAT, [4], [1, 1, 4, 4]) - - -model.graph.initializer.append(sizes1) -model.graph.initializer.append(sizes2) -model.graph.initializer.append(sizes3) -model.graph.initializer.append(sizes4) -model.graph.initializer.append(sizes5) -model.graph.initializer.append(sizes6) -model.graph.initializer.append(sizes7) - - -newnode = onnx.helper.make_node( - 'Resize', - name='Resize_141', - # inputs=['551', '564', '572', 'size1'], - inputs=['551', '564', 'size1'], - outputs=['573'], - coordinate_transformation_mode='asymmetric', - cubic_coeff_a=-0.75, - mode='nearest', - nearest_mode='floor' -) - -newnode2 = onnx.helper.make_node( - 'Resize', - name='Resize_165', - # inputs=['347', '367', '375', 'size2'], - inputs=['577', '590', 'size2'], - outputs=['599'], - coordinate_transformation_mode='asymmetric', - cubic_coeff_a=-0.75, - mode='nearest', - nearest_mode='floor' -) - -newnode3 = onnx.helper.make_node( - 'Resize', - name='Resize_189', - # inputs=['347', '367', '375', 'size2'], - inputs=['603', '616', 'size3'], - outputs=['625'], - coordinate_transformation_mode='asymmetric', - cubic_coeff_a=-0.75, - mode='nearest', - nearest_mode='floor' -) - -newnode4 = onnx.helper.make_node( - 'Resize', - name='Resize_219', - # inputs=['347', '367', '375', 'size2'], - inputs=['603', '647', 'size4'], - outputs=['656'], - coordinate_transformation_mode='asymmetric', - cubic_coeff_a=-0.75, - mode='nearest', - nearest_mode='floor' -) - - -newnode5 = onnx.helper.make_node( - 'Resize', - name='Resize_246', - # inputs=['347', '367', '375', 'size2'], - inputs=['577', '674', 'size5'], - outputs=['683'], - coordinate_transformation_mode='asymmetric', - cubic_coeff_a=-0.75, - mode='nearest', - nearest_mode='floor' -) - - -newnode6 = onnx.helper.make_node( - 'Resize', - name='Resize_273', - # inputs=['347', '367', '375', 'size2'], - inputs=['551', '701', 'size6'], - outputs=['710'], - coordinate_transformation_mode='asymmetric', - cubic_coeff_a=-0.75, - mode='nearest', - nearest_mode='floor' -) - - -newnode7= onnx.helper.make_node( - 'Resize', - name='Resize_304', - # inputs=['347', '367', '375', 'size2'], - inputs=['715', '733', 'size7'], - outputs=['output1'], - coordinate_transformation_mode='asymmetric', - cubic_coeff_a=-0.75, - mode='nearest', - nearest_mode='floor' -) - - -model.graph.node.remove(model.graph.node[141]) -model.graph.node.insert(141, newnode) - -model.graph.node.remove(model.graph.node[165]) -model.graph.node.insert(165, newnode2) - -model.graph.node.remove(model.graph.node[189]) -model.graph.node.insert(189, newnode3) - -model.graph.node.remove(model.graph.node[219]) -model.graph.node.insert(219, newnode4) - -model.graph.node.remove(model.graph.node[246]) -model.graph.node.insert(246, newnode5) - -model.graph.node.remove(model.graph.node[273]) -model.graph.node.insert(273, newnode6) - -model.graph.node.remove(model.graph.node[304]) -model.graph.node.insert(304, newnode7) - -slice_node1_1 = FindDependNode(model.graph, 'Slice_137', 'Relu_120') #结尾(will be deleted) qishi -print('node map:', slice_node1_1) - -slice_node1_2 = FindDependNode(model.graph, 'Cast_138', 'Relu_122') -print('node map:', slice_node1_2) - -slice_node2_1 = FindDependNode(model.graph, 'Slice_161', 'Relu_144') -print('node map:', slice_node2_1) - -slice_node2_2 = FindDependNode(model.graph, 'Cast_162', 'Relu_146') -print('node map:', slice_node2_2) - -slice_node3_1 = FindDependNode(model.graph, 'Slice_185', 'Relu_168') -print('node map:', slice_node3_1) -slice_node3_2 = FindDependNode(model.graph, 'Cast_186', 'Relu_170') -print('node map:', slice_node3_2) - - -slice_node4_1 = FindDependNode(model.graph, 'Slice_215', 'Relu_168') -print('node map:', slice_node4_1) -slice_node4_2 = FindDependNode(model.graph, 'Cast_216', 'Relu_192') -print('node map:', slice_node4_2) - - -slice_node5_1 = FindDependNode(model.graph, 'Slice_242', 'Relu_144') -print('node map:', slice_node5_1) -slice_node5_2 = FindDependNode(model.graph, 'Cast_243', 'Relu_192') -print('node map:', slice_node5_2) - -slice_node6_1 = FindDependNode(model.graph, 'Slice_269', 'Relu_120') -print('node map:', slice_node6_1) -slice_node6_2 = FindDependNode(model.graph, 'Cast_270', 'Relu_192') -print('node map:', slice_node6_2) - -slice_node7_1 = FindDependNode(model.graph, 'Slice_300', 'Conv_277') -print('node map:', slice_node7_1) -slice_node7_2 = FindDependNode(model.graph, 'Cast_301', 'actual_input_1') -print('node map:', slice_node7_2) - - -node_list = [] -node_list.extend(slice_node1_1) -node_list.extend(slice_node1_2) -node_list.extend(slice_node2_1) -node_list.extend(slice_node2_2) -node_list.extend(slice_node3_1) -node_list.extend(slice_node3_2) -node_list.extend(slice_node4_1) -node_list.extend(slice_node4_2) -node_list.extend(slice_node5_1) -node_list.extend(slice_node5_2) -node_list.extend(slice_node6_1) -node_list.extend(slice_node6_2) -node_list.extend(slice_node7_1) -node_list.extend(slice_node7_2) -node_list.extend(['Concat_139']) -node_list.extend(['Concat_163']) -node_list.extend(['Concat_187']) -node_list.extend(['Concat_217']) -node_list.extend(['Concat_271']) -node_list.extend(['Concat_302']) -node_list.extend(['Concat_244']) -print(node_list) -RemoveNode2(model.graph, node_list) - - -onnx.checker.check_model(model) -onnx.save(model, sys.argv[1].split('.')[0] + "_revised2.onnx") +import numpy as np +import onnx +import onnxruntime as rt +from onnx import shape_inference +import sys + + +model_path = sys.argv[1] +model = onnx.load(model_path) + + +def getNodeAndIOname(nodename,model): + for i in range(len(model.graph.node)): + if model.graph.node[i].name == nodename: + Node = model.graph.node[i] + return Node,input_name,output_name + +def FindPeerOutNode(graph, edge_name): + for i, x in enumerate(graph.node): + if edge_name in x.output: + return i + return -1 + + +def RemoveNode(graph, node_list): + cnt = 0 + for i in range(len(model.graph.node)): + if model.graph.node[i].name in node_list: + graph.node.remove(graph.node[i - cnt]) # 因为节点个数变少了 + cnt += 1 + + +def FindDependNode(graph, end_node, start_node): + ''' + find dependency node, [end_node, start_node) + ''' + def dfs(graph, idx, start_node, n_list): + for edge in graph.node[idx].input: + node_idx = FindPeerOutNode(graph, edge) + if node_idx < 0: + # print('bad peerout index') + continue + n = graph.node[node_idx] + if n.name != start_node: + n_list.append(n.name) + # print('n.name', n.name) + n_list = dfs(graph, node_idx, start_node, n_list) + return n_list + + index = GetNodeIndex(graph, end_node) + n_list = [end_node, ] + return dfs(graph, index, start_node, n_list) + + +def createGraphMemberMap(graph_member_list): + member_map=dict(); + for n in graph_member_list: + member_map[n.name]=n; + return member_map + +def GetNodeIndex(graph, node_name): + index = 0 + for i in range(len(graph.node)): + if graph.node[i].name == node_name: + index = i + break + return index +def RemoveNode2(graph,node_list): + for name in node_list: + print("name",name) + ind = GetNodeIndex(graph,name) + print("ind:",ind) + graph.node.remove(graph.node[ind]) + + +for i in range(len(model.graph.node)): + if model.graph.node[i].op_type == "Resize": + print("Resize", i, model.graph.node[i].input, model.graph.node[i].output) + +sizes1 = onnx.helper.make_tensor('size1', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) +sizes2 = onnx.helper.make_tensor('size2', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) +sizes3 = onnx.helper.make_tensor('size3', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) +sizes4 = onnx.helper.make_tensor('size4', onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) +sizes5 = onnx.helper.make_tensor('size5', onnx.TensorProto.FLOAT, [4], [1, 1, 4, 4]) +sizes6 = onnx.helper.make_tensor('size6', onnx.TensorProto.FLOAT, [4], [1, 1, 8, 8]) +sizes7 = onnx.helper.make_tensor('size7', onnx.TensorProto.FLOAT, [4], [1, 1, 4, 4]) + + +model.graph.initializer.append(sizes1) +model.graph.initializer.append(sizes2) +model.graph.initializer.append(sizes3) +model.graph.initializer.append(sizes4) +model.graph.initializer.append(sizes5) +model.graph.initializer.append(sizes6) +model.graph.initializer.append(sizes7) + + +newnode = onnx.helper.make_node( + 'Resize', + name='Resize_141', + # inputs=['551', '564', '572', 'size1'], + inputs=['551', '564', 'size1'], + outputs=['573'], + coordinate_transformation_mode='asymmetric', + cubic_coeff_a=-0.75, + mode='nearest', + nearest_mode='floor' +) + +newnode2 = onnx.helper.make_node( + 'Resize', + name='Resize_165', + # inputs=['347', '367', '375', 'size2'], + inputs=['577', '590', 'size2'], + outputs=['599'], + coordinate_transformation_mode='asymmetric', + cubic_coeff_a=-0.75, + mode='nearest', + nearest_mode='floor' +) + +newnode3 = onnx.helper.make_node( + 'Resize', + name='Resize_189', + # inputs=['347', '367', '375', 'size2'], + inputs=['603', '616', 'size3'], + outputs=['625'], + coordinate_transformation_mode='asymmetric', + cubic_coeff_a=-0.75, + mode='nearest', + nearest_mode='floor' +) + +newnode4 = onnx.helper.make_node( + 'Resize', + name='Resize_219', + # inputs=['347', '367', '375', 'size2'], + inputs=['603', '647', 'size4'], + outputs=['656'], + coordinate_transformation_mode='asymmetric', + cubic_coeff_a=-0.75, + mode='nearest', + nearest_mode='floor' +) + + +newnode5 = onnx.helper.make_node( + 'Resize', + name='Resize_246', + # inputs=['347', '367', '375', 'size2'], + inputs=['577', '674', 'size5'], + outputs=['683'], + coordinate_transformation_mode='asymmetric', + cubic_coeff_a=-0.75, + mode='nearest', + nearest_mode='floor' +) + + +newnode6 = onnx.helper.make_node( + 'Resize', + name='Resize_273', + # inputs=['347', '367', '375', 'size2'], + inputs=['551', '701', 'size6'], + outputs=['710'], + coordinate_transformation_mode='asymmetric', + cubic_coeff_a=-0.75, + mode='nearest', + nearest_mode='floor' +) + + +newnode7= onnx.helper.make_node( + 'Resize', + name='Resize_304', + # inputs=['347', '367', '375', 'size2'], + inputs=['715', '733', 'size7'], + outputs=['output1'], + coordinate_transformation_mode='asymmetric', + cubic_coeff_a=-0.75, + mode='nearest', + nearest_mode='floor' +) + + +model.graph.node.remove(model.graph.node[141]) +model.graph.node.insert(141, newnode) + +model.graph.node.remove(model.graph.node[165]) +model.graph.node.insert(165, newnode2) + +model.graph.node.remove(model.graph.node[189]) +model.graph.node.insert(189, newnode3) + +model.graph.node.remove(model.graph.node[219]) +model.graph.node.insert(219, newnode4) + +model.graph.node.remove(model.graph.node[246]) +model.graph.node.insert(246, newnode5) + +model.graph.node.remove(model.graph.node[273]) +model.graph.node.insert(273, newnode6) + +model.graph.node.remove(model.graph.node[304]) +model.graph.node.insert(304, newnode7) + +slice_node1_1 = FindDependNode(model.graph, 'Slice_137', 'Relu_120') #结尾(will be deleted) qishi +print('node map:', slice_node1_1) + +slice_node1_2 = FindDependNode(model.graph, 'Cast_138', 'Relu_122') +print('node map:', slice_node1_2) + +slice_node2_1 = FindDependNode(model.graph, 'Slice_161', 'Relu_144') +print('node map:', slice_node2_1) + +slice_node2_2 = FindDependNode(model.graph, 'Cast_162', 'Relu_146') +print('node map:', slice_node2_2) + +slice_node3_1 = FindDependNode(model.graph, 'Slice_185', 'Relu_168') +print('node map:', slice_node3_1) +slice_node3_2 = FindDependNode(model.graph, 'Cast_186', 'Relu_170') +print('node map:', slice_node3_2) + + +slice_node4_1 = FindDependNode(model.graph, 'Slice_215', 'Relu_168') +print('node map:', slice_node4_1) +slice_node4_2 = FindDependNode(model.graph, 'Cast_216', 'Relu_192') +print('node map:', slice_node4_2) + + +slice_node5_1 = FindDependNode(model.graph, 'Slice_242', 'Relu_144') +print('node map:', slice_node5_1) +slice_node5_2 = FindDependNode(model.graph, 'Cast_243', 'Relu_192') +print('node map:', slice_node5_2) + +slice_node6_1 = FindDependNode(model.graph, 'Slice_269', 'Relu_120') +print('node map:', slice_node6_1) +slice_node6_2 = FindDependNode(model.graph, 'Cast_270', 'Relu_192') +print('node map:', slice_node6_2) + +slice_node7_1 = FindDependNode(model.graph, 'Slice_300', 'Conv_277') +print('node map:', slice_node7_1) +slice_node7_2 = FindDependNode(model.graph, 'Cast_301', 'actual_input_1') +print('node map:', slice_node7_2) + + +node_list = [] +node_list.extend(slice_node1_1) +node_list.extend(slice_node1_2) +node_list.extend(slice_node2_1) +node_list.extend(slice_node2_2) +node_list.extend(slice_node3_1) +node_list.extend(slice_node3_2) +node_list.extend(slice_node4_1) +node_list.extend(slice_node4_2) +node_list.extend(slice_node5_1) +node_list.extend(slice_node5_2) +node_list.extend(slice_node6_1) +node_list.extend(slice_node6_2) +node_list.extend(slice_node7_1) +node_list.extend(slice_node7_2) +node_list.extend(['Concat_139']) +node_list.extend(['Concat_163']) +node_list.extend(['Concat_187']) +node_list.extend(['Concat_217']) +node_list.extend(['Concat_271']) +node_list.extend(['Concat_302']) +node_list.extend(['Concat_244']) +print(node_list) +RemoveNode2(model.graph, node_list) + + +onnx.checker.check_model(model) +onnx.save(model, sys.argv[1].split('.')[0] + "_revised2.onnx") diff --git a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/ imagenet_torch_preprocess.py b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/ imagenet_torch_preprocess.py index b0af0d5d04f9a5db20c5332764b2cc5b03b312a2..91cf3472e5b257246f7cc94fde2f64b23c5e9208 100644 --- a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/ imagenet_torch_preprocess.py +++ b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/ imagenet_torch_preprocess.py @@ -1,116 +1,116 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from PIL import Image -import numpy as np -import multiprocessing - - -model_config = { - 'res2net101': { - 'resize': 256, - 'centercrop': 224, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv3': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv4': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.5, 0.5, 0.5], - 'std': [0.5, 0.5, 0.5], - }, -} - - -def center_crop(img, output_size): - if isinstance(output_size, int): - output_size = (int(output_size), int(output_size)) - image_width, image_height = img.size - crop_height, crop_width = output_size - crop_top = int(round((image_height - crop_height) / 2.)) - crop_left = int(round((image_width - crop_width) / 2.)) - return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) - - -def resize(img, size, interpolation=Image.BILINEAR): - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def gen_input_bin(mode_type, file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - # RGBA to RGB - image = Image.open(os.path.join(src_path, file)).convert('RGB') - image = resize(image, model_config[mode_type]['resize']) # Resize - image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop - img = np.array(image, dtype=np.float32) - img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW - img = img / 255. # ToTensor: div 255 - img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean - img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - - -def preprocess(mode_type, src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") - mode_type = sys.argv[1] - src_path = sys.argv[2] - save_path = sys.argv[3] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - if mode_type not in model_config: - model_type_help = "model type: " - for key in model_config.keys(): - model_type_help += key - model_type_help += ' ' - raise Exception(model_type_help) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - preprocess(mode_type, src_path, save_path) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from PIL import Image +import numpy as np +import multiprocessing + + +model_config = { + 'res2net101': { + 'resize': 256, + 'centercrop': 224, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv3': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv4': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + }, +} + + +def center_crop(img, output_size): + if isinstance(output_size, int): + output_size = (int(output_size), int(output_size)) + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) + + +def resize(img, size, interpolation=Image.BILINEAR): + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def gen_input_bin(mode_type, file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + # RGBA to RGB + image = Image.open(os.path.join(src_path, file)).convert('RGB') + image = resize(image, model_config[mode_type]['resize']) # Resize + image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop + img = np.array(image, dtype=np.float32) + img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW + img = img / 255. # ToTensor: div 255 + img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean + img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + + +def preprocess(mode_type, src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") + mode_type = sys.argv[1] + src_path = sys.argv[2] + save_path = sys.argv[3] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + if mode_type not in model_config: + model_type_help = "model type: " + for key in model_config.keys(): + model_type_help += key + model_type_help += ' ' + raise Exception(model_type_help) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + preprocess(mode_type, src_path, save_path) diff --git a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/ requirements.txt b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/ requirements.txt index 6f7e11bdad5e237f1ba95da215d2262add251e53..e71bbad34d386cdbf0cdbab0461c29ac72d0aa40 100644 --- a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/ requirements.txt +++ b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/ requirements.txt @@ -1,6 +1,6 @@ -matplotlib==3.5.0 -Pillow==8.4.0 -numpy==1.19.5 -torchvision==0.11.0 -torch==1.10.0 +matplotlib==3.5.0 +Pillow==8.4.0 +numpy==1.19.5 +torchvision==0.11.0 +torch==1.10.0 onnx==1.7.0 \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/LICENSE b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/LICENSE index 29f81d812f3e768fa89638d1f72920dbfd1413a8..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 100644 --- a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/LICENSE +++ b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/README.md b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/README.md index 69fa898a117ea50952a777d8b53a7596c4653c9c..d6ed0a7f18ea82c8a06519372f6679218da75f57 100644 --- a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/README.md +++ b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/README.md @@ -1,86 +1,86 @@ -# Res2Net_v1b_101模型测试指导 - -- [1 文件说明](#1-文件说明) -- [2 设置环境变量](#2-设置环境变量) -- [3 端到端推理步骤](#3-端到端推理步骤) - - [3.1 下载代码](#31-下载代码) - - [3.2 om模型转换](#32-om模型转换) - - [3.3 om模型推理](#33-om模型推理) - ------- - -## 1 文件说明 -``` -Res2Net_v1b_101_for_PyTorch - ├── get_info.py // 生成推理输入的数据集二进制info文件或jpg info文件 - ├── pth2onnx.py // 用于转换pth模型文件到onnx模型文件 - ├── diff.patch // 修改开源代码的patch文件 - ├── imagenet_torch_preprocess.py // imagenet数据集预处理,生成图片二进制文件 - ├── README.md - ├── atc.sh // onnx模型转换om模型脚本 - └── vision_metric_ImageNet.py // 验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy - -``` - -## 2 设置环境变量 - -```shell -source env.sh -``` - -## 3 端到端推理步骤 - -### 3.1 下载代码 -git clone 开源仓 https://github.com/Res2Net/Res2Net-PretrainedModels ,切换到所需tag。 -```shell -git clone https://github.com/Res2Net/Res2Net-PretrainedModels.git -cd Res2Net-PretrainedModels -git reset 1d51000f3340fb61b4 --hard -git apply diff.patch -``` - -### 3.2 om模型转换 - -通过pth2onnx.py脚本转化为onnx模型 - -```shell -# 直接导出原始ONNX -python3.7 pth2onnx.py -m ./res2net101_v1b_26w_4s-0812c246.pth -o ./res2net.onnx - -# 导出NPU上优化后的ONNX -python3.7 pth2onnx.py -m ./res2net101_v1b_26w_4s-0812c246.pth -o ./res2net.onnx --optimizer -``` - -利用ATC工具转换为om模型 -```shell -bash atc.sh -``` - -### 3.3 om模型推理 - -(1) 数据集预处理 - - 数据预处理,把ImageNet 50000张图片转为二进制文件(.bin) - - ```shell - python3.7 imagenet_torch_preprocess.py res2net101 /home/HwHiAiUser/dataset/ImageNet/ILSVRC2012_img_val ./prep_bin - ``` - 生成数据集info文件 - - ```shell - python3.7 get_info.py bin ./prep_bin ./BinaryImageNet.info 224 224 - ``` -(2)推理 - 配置环境变量,运行benchmark工具进行推理,参数说明参见 [cann-benchmark](https://gitee.com/ascend/cann-benchmark/tree/master/infer) - - ```shell - source env.sh # 如果前面配置过,这里不用执行 - ./benchmark -model_type=vision -om_path=resnet_bs16.om -device_id=0 -batch_size=16 -input_text_path=BinaryImageNet.info -input_width=256 -input_height=256 -useDvpp=false -output_binary=false - ``` - -(3)统计Accuracy值 - 精度验证,调用vision_metric_ImageNet.py脚本与数据集标签val_label.txt比对,可以获得Accuracy数据,结果保存在result.json中 - - ```shell - python3.7 vision_metric_ImageNet.py result/dumpOutput_device0/ ./val_label.txt ./ result.json - ``` +# Res2Net_v1b_101模型测试指导 + +- [1 文件说明](#1-文件说明) +- [2 设置环境变量](#2-设置环境变量) +- [3 端到端推理步骤](#3-端到端推理步骤) + - [3.1 下载代码](#31-下载代码) + - [3.2 om模型转换](#32-om模型转换) + - [3.3 om模型推理](#33-om模型推理) + +------ + +## 1 文件说明 +``` +Res2Net_v1b_101_for_PyTorch + ├── get_info.py // 生成推理输入的数据集二进制info文件或jpg info文件 + ├── pth2onnx.py // 用于转换pth模型文件到onnx模型文件 + ├── diff.patch // 修改开源代码的patch文件 + ├── imagenet_torch_preprocess.py // imagenet数据集预处理,生成图片二进制文件 + ├── README.md + ├── atc.sh // onnx模型转换om模型脚本 + └── vision_metric_ImageNet.py // 验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy + +``` + +## 2 设置环境变量 + +```shell +source env.sh +``` + +## 3 端到端推理步骤 + +### 3.1 下载代码 +git clone 开源仓 https://github.com/Res2Net/Res2Net-PretrainedModels ,切换到所需tag。 +```shell +git clone https://github.com/Res2Net/Res2Net-PretrainedModels.git +cd Res2Net-PretrainedModels +git reset 1d51000f3340fb61b4 --hard +git apply diff.patch +``` + +### 3.2 om模型转换 + +通过pth2onnx.py脚本转化为onnx模型 + +```shell +# 直接导出原始ONNX +python3.7 pth2onnx.py -m ./res2net101_v1b_26w_4s-0812c246.pth -o ./res2net.onnx + +# 导出NPU上优化后的ONNX +python3.7 pth2onnx.py -m ./res2net101_v1b_26w_4s-0812c246.pth -o ./res2net.onnx --optimizer +``` + +利用ATC工具转换为om模型 +```shell +bash atc.sh +``` + +### 3.3 om模型推理 + +(1) 数据集预处理 + + 数据预处理,把ImageNet 50000张图片转为二进制文件(.bin) + + ```shell + python3.7 imagenet_torch_preprocess.py res2net101 /home/HwHiAiUser/dataset/ImageNet/ILSVRC2012_img_val ./prep_bin + ``` + 生成数据集info文件 + + ```shell + python3.7 get_info.py bin ./prep_bin ./BinaryImageNet.info 224 224 + ``` +(2)推理 + 配置环境变量,运行benchmark工具进行推理,参数说明参见 [cann-benchmark](https://gitee.com/ascend/cann-benchmark/tree/master/infer) + + ```shell + source env.sh # 如果前面配置过,这里不用执行 + ./benchmark -model_type=vision -om_path=resnet_bs16.om -device_id=0 -batch_size=16 -input_text_path=BinaryImageNet.info -input_width=256 -input_height=256 -useDvpp=false -output_binary=false + ``` + +(3)统计Accuracy值 + 精度验证,调用vision_metric_ImageNet.py脚本与数据集标签val_label.txt比对,可以获得Accuracy数据,结果保存在result.json中 + + ```shell + python3.7 vision_metric_ImageNet.py result/dumpOutput_device0/ ./val_label.txt ./ result.json + ``` diff --git a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/atc.sh b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/atc.sh index e98af05173364eed0b1a581105d3685178fe157b..39a1992278cbd212bbc7cd01da53bff2044e14e4 100644 --- a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/atc.sh +++ b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/atc.sh @@ -1,16 +1,16 @@ -# 配置环境变量 -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp - -# 710 fp16,执行如下命令 -atc --model=./res2net.onnx \ - --framework=5 \ - --output=res2net_bs16 \ - --input_format=NCHW \ - --input_shape="x:16,3,224,224" \ - --log=error \ - --soc_version=Ascend710 \ - --enable_small_channel=1 +# 配置环境变量 +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp + +# 710 fp16,执行如下命令 +atc --model=./res2net.onnx \ + --framework=5 \ + --output=res2net_bs16 \ + --input_format=NCHW \ + --input_shape="x:16,3,224,224" \ + --log=error \ + --soc_version=Ascend710 \ + --enable_small_channel=1 diff --git a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/diff.patch b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/diff.patch index dd78507002396a8ee889afc1337cec2d5009c129..5770c5852f1aa458ed7224afbd3cbba5e3156f14 100644 --- a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/diff.patch +++ b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/diff.patch @@ -1,32 +1,32 @@ -diff --git a/res2net_v1b.py b/res2net_v1b.py -index 6ced156..0ec1ce8 100644 ---- a/res2net_v1b.py -+++ b/res2net_v1b.py -@@ -64,6 +64,7 @@ class Bottle2neck(nn.Module): - out = self.relu(out) - - spx = torch.split(out, self.width, 1) -+ out_list = [] - for i in range(self.nums): - if i==0 or self.stype=='stage': - sp = spx[i] -@@ -71,15 +72,13 @@ class Bottle2neck(nn.Module): - sp = sp + spx[i] - sp = self.convs[i](sp) - sp = self.relu(self.bns[i](sp)) -- if i==0: -- out = sp -- else: -- out = torch.cat((out, sp), 1) -+ out_list.append(sp) - if self.scale != 1 and self.stype=='normal': -- out = torch.cat((out, spx[self.nums]),1) -+ out_list.append(spx[self.nums]) - elif self.scale != 1 and self.stype=='stage': -- out = torch.cat((out, self.pool(spx[self.nums])),1) -+ out_list.append(self.pool(spx[self.nums])) - -+ out = torch.cat(out_list, 1) - out = self.conv3(out) - out = self.bn3(out) - +diff --git a/res2net_v1b.py b/res2net_v1b.py +index 6ced156..0ec1ce8 100644 +--- a/res2net_v1b.py ++++ b/res2net_v1b.py +@@ -64,6 +64,7 @@ class Bottle2neck(nn.Module): + out = self.relu(out) + + spx = torch.split(out, self.width, 1) ++ out_list = [] + for i in range(self.nums): + if i==0 or self.stype=='stage': + sp = spx[i] +@@ -71,15 +72,13 @@ class Bottle2neck(nn.Module): + sp = sp + spx[i] + sp = self.convs[i](sp) + sp = self.relu(self.bns[i](sp)) +- if i==0: +- out = sp +- else: +- out = torch.cat((out, sp), 1) ++ out_list.append(sp) + if self.scale != 1 and self.stype=='normal': +- out = torch.cat((out, spx[self.nums]),1) ++ out_list.append(spx[self.nums]) + elif self.scale != 1 and self.stype=='stage': +- out = torch.cat((out, self.pool(spx[self.nums])),1) ++ out_list.append(self.pool(spx[self.nums])) + ++ out = torch.cat(out_list, 1) + out = self.conv3(out) + out = self.bn3(out) + diff --git a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/env.sh b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/env.sh index 91b1a00516e987530b481fcbe989e0944a460e06..96389fea980ff6e57785a3f35c29e34dc862089e 100644 --- a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/env.sh +++ b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/env.sh @@ -1,7 +1,7 @@ -#!/bin/bash -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:/usr/local/Ascend/ascend-toolkit/latest/pyACL/python/site-packages/acl:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp +#!/bin/bash +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:/usr/local/Ascend/ascend-toolkit/latest/pyACL/python/site-packages/acl:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp echo "successfully!!" \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/get_info.py b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/get_info.py index 6077f1416bb441d98401472d96aeddd4e0b66bb2..fa3108bd7ff139c64f3d1b11c09c67e808888f90 100644 --- a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/get_info.py +++ b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/get_info.py @@ -1,60 +1,60 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) diff --git a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/modelzoo_level.txt b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/modelzoo_level.txt +++ b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/pth2onnx.py b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/pth2onnx.py index 776b44620a53a8bb57fcdc03ff413838f0d7d6b5..af37a616d9b913088cfebf774d1f1926aeb54d40 100644 --- a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/pth2onnx.py +++ b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/pth2onnx.py @@ -1,85 +1,85 @@ -# Copyright 2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import itertools -import argparse -from res2net_v1b import res2net101_v1b_26w_4s -import torch -import torch.nn.functional as F - -parser = argparse.ArgumentParser(description='res2net101_v1b inference') -parser.add_argument('-m', '--trained_model', default=None, - type=str, help='Trained state_dict file path to open') -parser.add_argument('-o', '--output', default=None, - type=str, help='ONNX model file') -parser.add_argument('--optimizer', default='store_ture', - help='ONNX model optimizer') -args = parser.parse_args() - -model = res2net101_v1b_26w_4s() -checkpoint = torch.load(args.trained_model, map_location=torch.device('cpu')) - - -def optimizer(model, checkpoint): - presistent_buffers = {k: v for k, v in model.named_buffers() if k not in model._non_persistent_buffers_set} - local_name_params = itertools.chain(model.named_parameters(), presistent_buffers.items()) - local_state = {k: v for k, v in local_name_params if v is not None} - - for name, param in checkpoint.items(): - if local_state[name].shape != param.shape: - if 'conv1' in name or 'conv3' in name: - n1, c1, h, w = local_state[name].shape - n2, c2, h, w = param.shape - if n1 == n2: - c = (c1 - c2) // 4 - cell = c2 // 4 - checkpoint[name] = torch.cat([torch.cat((param[:, i * cell: (i + 1) * cell, ...], - torch.zeros(n1, c, h, w, dtype=param.dtype)), - 1) for i in range(4)], 1) - else: - n = (n1 - n2) // 4 - cell = n2 // 4 - checkpoint[name] = torch.cat([torch.cat((param[i * cell: (i + 1) * cell, ...], - torch.zeros(n, c1, h, w, dtype=param.dtype)), - 0) for i in range(4)], 0) - elif 'bn1' in name or 'bn3' in name: - cell = param.size(0) // 4 - n = (local_state[name].size(0) - param.size(0)) // 4 - checkpoint[name] = torch.cat([torch.cat((param[i * cell: (i + 1) * cell], - torch.zeros(n, dtype=param.dtype)), - 0) for i in range(4)]) - else: - if param.dim() == 1: - checkpoint[name] = torch.cat((param, - torch.zeros(local_state[name].size(0) - param.size(0), dtype=param.dtype)), - 0) - else: - n1, c1, h, w = local_state[name].shape - n2, c2, h, w = param.shape - param = torch.cat((param, torch.zeros(n2, c1 - c2, h, w, dtype=param.dtype)), 1) - checkpoint[name] = torch.cat((param, torch.zeros(n1 - n2, c1, h, w, dtype=param.dtype)), 0) - return checkpoint - - -if __name__ == '__main__': - if args.optimizer: - checkpoint = optimizer(model, checkpoint) - model.load_state_dict(checkpoint) - model.eval() - - inputs = torch.rand(1, 3, 224, 224) - torch.onnx.export(model, inputs, args.output, - input_names=["x"], output_names=["output"], - dynamic_axes={"x": {0: "-1"}}, opset_version=11) +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import itertools +import argparse +from res2net_v1b import res2net101_v1b_26w_4s +import torch +import torch.nn.functional as F + +parser = argparse.ArgumentParser(description='res2net101_v1b inference') +parser.add_argument('-m', '--trained_model', default=None, + type=str, help='Trained state_dict file path to open') +parser.add_argument('-o', '--output', default=None, + type=str, help='ONNX model file') +parser.add_argument('--optimizer', default='store_ture', + help='ONNX model optimizer') +args = parser.parse_args() + +model = res2net101_v1b_26w_4s() +checkpoint = torch.load(args.trained_model, map_location=torch.device('cpu')) + + +def optimizer(model, checkpoint): + presistent_buffers = {k: v for k, v in model.named_buffers() if k not in model._non_persistent_buffers_set} + local_name_params = itertools.chain(model.named_parameters(), presistent_buffers.items()) + local_state = {k: v for k, v in local_name_params if v is not None} + + for name, param in checkpoint.items(): + if local_state[name].shape != param.shape: + if 'conv1' in name or 'conv3' in name: + n1, c1, h, w = local_state[name].shape + n2, c2, h, w = param.shape + if n1 == n2: + c = (c1 - c2) // 4 + cell = c2 // 4 + checkpoint[name] = torch.cat([torch.cat((param[:, i * cell: (i + 1) * cell, ...], + torch.zeros(n1, c, h, w, dtype=param.dtype)), + 1) for i in range(4)], 1) + else: + n = (n1 - n2) // 4 + cell = n2 // 4 + checkpoint[name] = torch.cat([torch.cat((param[i * cell: (i + 1) * cell, ...], + torch.zeros(n, c1, h, w, dtype=param.dtype)), + 0) for i in range(4)], 0) + elif 'bn1' in name or 'bn3' in name: + cell = param.size(0) // 4 + n = (local_state[name].size(0) - param.size(0)) // 4 + checkpoint[name] = torch.cat([torch.cat((param[i * cell: (i + 1) * cell], + torch.zeros(n, dtype=param.dtype)), + 0) for i in range(4)]) + else: + if param.dim() == 1: + checkpoint[name] = torch.cat((param, + torch.zeros(local_state[name].size(0) - param.size(0), dtype=param.dtype)), + 0) + else: + n1, c1, h, w = local_state[name].shape + n2, c2, h, w = param.shape + param = torch.cat((param, torch.zeros(n2, c1 - c2, h, w, dtype=param.dtype)), 1) + checkpoint[name] = torch.cat((param, torch.zeros(n1 - n2, c1, h, w, dtype=param.dtype)), 0) + return checkpoint + + +if __name__ == '__main__': + if args.optimizer: + checkpoint = optimizer(model, checkpoint) + model.load_state_dict(checkpoint) + model.eval() + + inputs = torch.rand(1, 3, 224, 224) + torch.onnx.export(model, inputs, args.output, + input_names=["x"], output_names=["output"], + dynamic_axes={"x": {0: "-1"}}, opset_version=11) diff --git a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/vision_metric_ImageNet.py b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/vision_metric_ImageNet.py index 183db2eddaa4f19986b8028b371ea9f51c9c4ba0..6542f9e50c17f6d4045c27ea7be84edaeb925e5a 100644 --- a/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/vision_metric_ImageNet.py +++ b/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch/vision_metric_ImageNet.py @@ -1,177 +1,177 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = "" - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = "" + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) diff --git a/ACL_PyTorch/built-in/cv/ResNeXt50_for_Pytorch/ReadMe.md b/ACL_PyTorch/built-in/cv/ResNeXt50_for_Pytorch/ReadMe.md index 0c3fb44c828a7631c73aa27685e8e2548abaf932..64dc33b67d3f17d7e26ca31dee6e7aae0ed4a88b 100644 --- a/ACL_PyTorch/built-in/cv/ResNeXt50_for_Pytorch/ReadMe.md +++ b/ACL_PyTorch/built-in/cv/ResNeXt50_for_Pytorch/ReadMe.md @@ -1,62 +1,62 @@ -文件作用说明: - -1.resnext50_pth2onnx.py:用于转换pth模型文件到onnx模型文件 - -2.resnext50_atc.sh:onnx模型转换om模型脚本 - -3.preprocess_resnext50_pth.py:数据集预处理脚本,通过均值方差处理归一化图片,生成图片二进制文件 - -4.aipp_resnext50_pth.config:数据集aipp预处理配置文件 - -5.get_info.py:生成推理输入的数据集二进制info文件或jpg info文件 - -6.resnext50_val.info:ImageNet验证集二进制info文件,用于benchmark推理获取数据集 - -7.ImageNet.info:ImageNet验证集jpg info文件,用于benchmark推理获取数据集 - -8.val_label.txt:ImageNet数据集标签,用于验证推理结果 - -9.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer - -10.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy - - - - - -推理端到端步骤: - -(1) 从Torchvision下载resnext50模型,通过resnext50_pth2onnx.py脚本转化为onnx模型 - - - -(2)运行resnext50_atc.sh脚本转换om模型 - -本demo已提供调优完成的om模型 - - - -(3)用preprocess_resnext50_pth.py脚本处理数据集,参考resnext50_val.info配置处理后的二进制数据集路径。或者配置数据集aipp预处理文件aipp_resnext50_pth.config。 - python3 preprocess_resnext50_pth.py dataset/ImageNet/val_union/ pre_bin - - - -(4)生成推理输入的数据集二进制info文件或jpg info文件 - python3 get_info.py bin pre_bin resnext50_val.info 224 224 - python3 get_info.py jpg dataset/ImageNet/val_union ImageNet.info - - - -(5)使用benchmark离线推理 - ./benchmark -model_type=vision -om_path=resnext50_bs16.om -device_id=0 -batch_size=16 -input_text_path=resnext50_val.info -input_width=224 -input_height=224 -useDvpp=false - 或者 - ./benchmark -model_type=vision -om_path=resnext50_bs1.om -device_id=0 -batch_size=1 -input_text_path=ImageNet.info -input_width=256 -input_height=256 -useDvpp=true - -运行benchmark推理,结果保存在 ./result 目录下 - - - -(6)python3.7 vision_metric_ImageNet.py result/dumpOutput_device0/ ./val_label.txt ./ result.json - -验证推理结果 - +文件作用说明: + +1.resnext50_pth2onnx.py:用于转换pth模型文件到onnx模型文件 + +2.resnext50_atc.sh:onnx模型转换om模型脚本 + +3.preprocess_resnext50_pth.py:数据集预处理脚本,通过均值方差处理归一化图片,生成图片二进制文件 + +4.aipp_resnext50_pth.config:数据集aipp预处理配置文件 + +5.get_info.py:生成推理输入的数据集二进制info文件或jpg info文件 + +6.resnext50_val.info:ImageNet验证集二进制info文件,用于benchmark推理获取数据集 + +7.ImageNet.info:ImageNet验证集jpg info文件,用于benchmark推理获取数据集 + +8.val_label.txt:ImageNet数据集标签,用于验证推理结果 + +9.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer + +10.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy + + + + + +推理端到端步骤: + +(1) 从Torchvision下载resnext50模型,通过resnext50_pth2onnx.py脚本转化为onnx模型 + + + +(2)运行resnext50_atc.sh脚本转换om模型 + +本demo已提供调优完成的om模型 + + + +(3)用preprocess_resnext50_pth.py脚本处理数据集,参考resnext50_val.info配置处理后的二进制数据集路径。或者配置数据集aipp预处理文件aipp_resnext50_pth.config。 + python3 preprocess_resnext50_pth.py dataset/ImageNet/val_union/ pre_bin + + + +(4)生成推理输入的数据集二进制info文件或jpg info文件 + python3 get_info.py bin pre_bin resnext50_val.info 224 224 + python3 get_info.py jpg dataset/ImageNet/val_union ImageNet.info + + + +(5)使用benchmark离线推理 + ./benchmark -model_type=vision -om_path=resnext50_bs16.om -device_id=0 -batch_size=16 -input_text_path=resnext50_val.info -input_width=224 -input_height=224 -useDvpp=false + 或者 + ./benchmark -model_type=vision -om_path=resnext50_bs1.om -device_id=0 -batch_size=1 -input_text_path=ImageNet.info -input_width=256 -input_height=256 -useDvpp=true + +运行benchmark推理,结果保存在 ./result 目录下 + + + +(6)python3.7 vision_metric_ImageNet.py result/dumpOutput_device0/ ./val_label.txt ./ result.json + +验证推理结果 + diff --git a/ACL_PyTorch/built-in/cv/ResNeXt50_for_Pytorch/get_info.py b/ACL_PyTorch/built-in/cv/ResNeXt50_for_Pytorch/get_info.py index 0578e4f00cd9661c6dcfa7db7b72f196677ff422..7b14c54b909b60730e9e3471ee0435ee4cb8622f 100644 --- a/ACL_PyTorch/built-in/cv/ResNeXt50_for_Pytorch/get_info.py +++ b/ACL_PyTorch/built-in/cv/ResNeXt50_for_Pytorch/get_info.py @@ -1,46 +1,46 @@ -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/ResNeXt50_for_Pytorch/preprocess_resnext50_pth.py b/ACL_PyTorch/built-in/cv/ResNeXt50_for_Pytorch/preprocess_resnext50_pth.py index 04f41f75979bcbc2ca8c939b37b29d8a59f8d22a..965681f30d9a5413948b7fb1cdd158d1ac2e5e22 100644 --- a/ACL_PyTorch/built-in/cv/ResNeXt50_for_Pytorch/preprocess_resnext50_pth.py +++ b/ACL_PyTorch/built-in/cv/ResNeXt50_for_Pytorch/preprocess_resnext50_pth.py @@ -1,85 +1,85 @@ -import os -import sys -import numpy as np -from PIL import Image - - -def resize(img, size, interpolation=Image.BILINEAR): - r"""Resize the input PIL Image to the given size. - - Args: - img (PIL Image): Image to be resized. - size (sequence or int): Desired output size. If size is a sequence like - (h, w), the output size will be matched to this. If size is an int, - the smaller edge of the image will be matched to this number maintaining - the aspect ratio. i.e, if height > width, then image will be rescaled to - :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)` - interpolation (int, optional): Desired interpolation. Default is - ``PIL.Image.BILINEAR`` - - Returns: - PIL Image: Resized image. - """ - - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def center_crop(img, out_height, out_width): - height, width, _ = img.shape - left = int((width - out_width) / 2) - right = int((width + out_width) / 2) - top = int((height - out_height) / 2) - bottom = int((height + out_height) / 2) - img = img[top:bottom, left:right] - return img - - -def preprocess(file_path, bin_path): - in_files = os.listdir(file_path) - if not os.path.exists(bin_path): - os.makedirs(bin_path) - i = 0 - - resize_size = 256 - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - - for file in in_files: - i = i + 1 - print(file, "===", i) - - img = Image.open(os.path.join(file_path, file)).convert('RGB') - - img = resize(img, resize_size) # transforms.Resize(256) - img = np.array(img, dtype=np.float32) - img = center_crop(img, 224, 224) # transforms.CenterCrop(224) - img = img / 255. # transforms.ToTensor() - # 均值方差 - img[..., 0] -= mean[0] - img[..., 1] -= mean[1] - img[..., 2] -= mean[2] - img[..., 0] /= std[0] - img[..., 1] /= std[1] - img[..., 2] /= std[2] - img = img.transpose(2, 0, 1) # HWC -> CHW - - img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) - - -if __name__ == "__main__": - file_path = os.path.abspath(sys.argv[1]) - bin_path = os.path.abspath(sys.argv[2]) - preprocess(file_path, bin_path) +import os +import sys +import numpy as np +from PIL import Image + + +def resize(img, size, interpolation=Image.BILINEAR): + r"""Resize the input PIL Image to the given size. + + Args: + img (PIL Image): Image to be resized. + size (sequence or int): Desired output size. If size is a sequence like + (h, w), the output size will be matched to this. If size is an int, + the smaller edge of the image will be matched to this number maintaining + the aspect ratio. i.e, if height > width, then image will be rescaled to + :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)` + interpolation (int, optional): Desired interpolation. Default is + ``PIL.Image.BILINEAR`` + + Returns: + PIL Image: Resized image. + """ + + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def center_crop(img, out_height, out_width): + height, width, _ = img.shape + left = int((width - out_width) / 2) + right = int((width + out_width) / 2) + top = int((height - out_height) / 2) + bottom = int((height + out_height) / 2) + img = img[top:bottom, left:right] + return img + + +def preprocess(file_path, bin_path): + in_files = os.listdir(file_path) + if not os.path.exists(bin_path): + os.makedirs(bin_path) + i = 0 + + resize_size = 256 + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + + for file in in_files: + i = i + 1 + print(file, "===", i) + + img = Image.open(os.path.join(file_path, file)).convert('RGB') + + img = resize(img, resize_size) # transforms.Resize(256) + img = np.array(img, dtype=np.float32) + img = center_crop(img, 224, 224) # transforms.CenterCrop(224) + img = img / 255. # transforms.ToTensor() + # 均值方差 + img[..., 0] -= mean[0] + img[..., 1] -= mean[1] + img[..., 2] -= mean[2] + img[..., 0] /= std[0] + img[..., 1] /= std[1] + img[..., 2] /= std[2] + img = img.transpose(2, 0, 1) # HWC -> CHW + + img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) + + +if __name__ == "__main__": + file_path = os.path.abspath(sys.argv[1]) + bin_path = os.path.abspath(sys.argv[2]) + preprocess(file_path, bin_path) diff --git a/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/ReadMe.md b/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/ReadMe.md index b89cfb0e6e79831f8da11db45d48085a48276ca1..285adc2feccb00acf368c919818782466573e39b 100644 --- a/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/ReadMe.md +++ b/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/ReadMe.md @@ -1,381 +1,381 @@ -# ResNet101 Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx模型量化](#32-onnx模型量化) - - [3.3 onnx转om模型](#33-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) - - [6.2 开源TopN精度](#62-开源TopN精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - [7.2 T4性能数据](#72-T4性能数据) - - [7.3 性能对比](#73-性能对比) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[ResNet101论文](https://arxiv.org/pdf/1512.03385.pdf) - -### 1.2 代码地址 -[ResNet101代码](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) -branch:master -commit_id:7d955df73fe0e9b47f7d6c77c699324b256fc41f - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -CANN 5.0.4 - -torch == 1.5.1 -torchvision == 0.6.1 -onnx == 1.9.0 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.19.2 -Pillow == 8.2.0 -opencv-python == 4.5.2 -``` - -**说明:** -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 - -1.下载pth权重文件 -请参考[pytorch原始仓](https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py)给出的ResNet101权重文件下载地址获取权重文件:resnet101-63fe2227.pth - -2.ResNet101模型代码在torchvision里,安装torchvision,arm下需源码安装,参考torchvision官网,若安装过程报错请百度解决 - -``` -git clone https://github.com/pytorch/vision -cd vision -python3.7 setup.py install -cd .. -``` -3.编写pth2onnx脚本resnet101_pth2onnx.py - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - -4.执行pth2onnx脚本,生成onnx模型文件 -``` -python3.7 resnet101_pth2onnx.py ./resnet101-63fe2227.pth resnet101.onnx -``` - - **模型转换要点:** ->此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 - -### 3.2 onnx模型量化 - -1.AMCT工具包安装,具体参考《[CANN 开发辅助工具指南 01](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools)》中的昇腾模型压缩工具使用指南(ONNX)章节; - -2.生成bin格式数据集,数据集用于校正量化因子。当前模型为动态batch,建议使用较大的batch size: - -``` -python3.7 gen_calibration_bin.py resnet /root/datasets/imagenet/val ./calibration_bin 32 1 -``` - -参数说明: - -- resnet:模型类型 -- /root/datasets/imagenet/val:模型使用的数据集路径; -- ./calibration_bin:生成的bin格式数据集路径; -- 32:batch size; -- 1:batch num。 - -3.ONNX模型量化 - -``` -amct_onnx calibration --model resnet101.onnx --save_path ./result/resnet101 --input_shape "image:32,3,224,224" --data_dir "./calibration_bin" --data_types "float32" -``` - -会在result目录下生成resnet101_deploy_model.onnx量化模型 - -4.量化模型后续的推理验证流程和非量化一致。 - -### 3.3 onnx转om模型 - -1.设置环境变量 - -``` -source env.sh -``` -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考《[CANN 开发辅助工具指南 01](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools)》中的ATC工具使用指南章节 - -``` -atc --framework=5 --model=./resnet101.onnx --output=resnet101_bs16 --input_format=NCHW --input_shape="image:16,3,224,224" --log=debug --soc_version=Ascend310 --insert_op_conf=aipp.config -``` - -**说明:** - -> 若设备类型为Ascend710,设置soc_version==Ascend710即可; -> -> aipp.config是AIPP工具数据集预处理配置文件,详细说明可参考"ATC工具使用指南"中的"AIPP配置"章节。 - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -该模型使用ImageNet的5万张验证集进行测试,图片与标签分别存放在/root/datasets/imagenet/val与/root/datasets/imagenet/val_label.txt。 - -### 4.2 数据集预处理 - -1.预处理脚本imagenet_torch_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 -``` -python3.7 imagenet_torch_preprocess.py resnet /root/datasets/imagenet/val ./prep_dataset -``` -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本gen_dataset_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 -``` -python3.7 gen_dataset_info.py bin ./prep_dataset ./resnet101_prep_bin.info 224 224 -``` -第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310、710上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考《[CANN 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools)》 -### 5.2 离线推理 -1.设置环境变量 -``` -source env.sh -``` -2.执行离线推理 -``` -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=resnet101_bs16.om -input_text_path=./resnet101_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False -``` -输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 - -## 6 精度对比 - -- **[离线推理TopN精度](#61-离线推理TopN精度)** -- **[开源TopN精度](#62-开源TopN精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理TopN精度统计 - -后处理统计TopN精度 - -调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 -``` -python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /root/datasets/imagenet/val_label.txt ./ result.json -``` -第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 -查看输出结果: -``` -{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "77.37%"}, {"key": "Top2 accuracy", "value": "87.1%"}, {"key": "Top3 accuracy", "value": "90.61%"}, {"key": "Top4 accuracy", "value": "92.42%"}, {"key": "Top5 accuracy", "value": "93.54%"}]} -``` -经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 - -### 6.2 开源TopN精度 -[torchvision官网精度](https://pytorch.org/vision/stable/models.html) -``` -Model Acc@1 Acc@5 -ResNet-101 77.374 93.546 -``` -### 6.3 精度对比 -将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - **精度调试:** ->没有遇到精度不达标的问题,故不需要进行精度调试 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** -- **[T4性能数据](#72-T4性能数据)** -- **[性能对比](#73-性能对比)** - -### 7.1 npu性能数据 -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 -1.benchmark工具在整个数据集上推理获得性能数据 (Ascend310) -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: -``` -[e2e] throughputRate: 170.628, latency: 293035 -[data read] throughputRate: 181.571, moduleLatency: 5.50749 -[preprocess] throughputRate: 180.466, moduleLatency: 5.5412 -[infer] throughputRate: 171.595, Interface throughputRate: 247.898, moduleLatency: 5.12562 -[post] throughputRate: 171.595, moduleLatency: 5.82768 -``` -Interface throughputRate: 247.898,247.898x4=991.592既是batch1 310单卡吞吐率 -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: -``` -[e2e] throughputRate: 185.903, latency: 268957 -[data read] throughputRate: 191.266, moduleLatency: 5.22833 -[preprocess] throughputRate: 190.761, moduleLatency: 5.24217 -[infer] throughputRate: 187.131, Interface throughputRate: 401.046, moduleLatency: 3.94051 -[post] throughputRate: 11.6954, moduleLatency: 85.5035 -``` -Interface throughputRate: 401.046,401.046x4=1604.184既是batch16 310单卡吞吐率 -batch4的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_4_device_0.txt: -``` -[e2e] throughputRate: 184.444, latency: 271085 -[data read] throughputRate: 196.412, moduleLatency: 5.09134 -[preprocess] throughputRate: 195.837, moduleLatency: 5.1063 -[infer] throughputRate: 185.624, Interface throughputRate: 331.096, moduleLatency: 4.52436 -[post] throughputRate: 46.4056, moduleLatency: 21.5491 -``` -Interface throughputRate: 331.096,331.096x4=1324.384既是batch4 310单卡吞吐率 -batch8的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_8_device_0.txt: -``` -[e2e] throughputRate: 196.051, latency: 255036 -[data read] throughputRate: 209.29, moduleLatency: 4.77806 -[preprocess] throughputRate: 207.914, moduleLatency: 4.80969 -[infer] throughputRate: 197.513, Interface throughputRate: 371.905, moduleLatency: 4.15513 -[post] throughputRate: 24.6888, moduleLatency: 40.5042 -``` -Interface throughputRate: 371.905,371.905x4=1487.62既是batch8 310单卡吞吐率 -batch32的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_32_device_0.txt: -``` -[e2e] throughputRate: 176.215, latency: 283744 -[data read] throughputRate: 187.024, moduleLatency: 5.34691 -[preprocess] throughputRate: 186.183, moduleLatency: 5.37105 -[infer] throughputRate: 177.675, Interface throughputRate: 370.456, moduleLatency: 4.14361 -[post] throughputRate: 5.55402, moduleLatency: 180.05 - -``` -Interface throughputRate: 370.456,370.456x4=1481.82既是batch32 310单卡吞吐率 - - **说明:** - -> 注意如果设备为Ascend710,则Interface throughputRate的值就是710的单卡吞吐率,不需要像310那样x4 - -### 7.2 T4性能数据 -在装有T4卡的服务器上测试gpu性能,测试过程请确保卡没有运行其他任务,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2 -batch1性能: -``` -trtexec --onnx=resnet101.onnx --fp16 --shapes=image:1x3x224x224 --threads -``` -gpu T4是4个device并行执行的结果,mean是时延(tensorrt的时延是batch个数据的推理时间),即吞吐率的倒数乘以batch -``` -[06/10/2021-17:40:51] [I] GPU Compute -[06/10/2021-17:40:51] [I] min: 2.01935 ms -[06/10/2021-17:40:51] [I] max: 3.53485 ms -[06/10/2021-17:40:51] [I] mean: 2.1015 ms -[06/10/2021-17:40:51] [I] median: 2.07254 ms -[06/10/2021-17:40:51] [I] percentile: 3.52882 ms at 99% -[06/10/2021-17:40:51] [I] total compute time: 2.99674 s - -``` -batch1 t4单卡吞吐率:1000/(2.1015/1)=475.851fps - -batch16性能: -``` -trtexec --onnx=resnet101.onnx --fp16 --shapes=image:16x3x224x224 --threads -``` -``` -[06/10/2021-17:42:06] [I] GPU Compute -[06/10/2021-17:42:06] [I] min: 13.8094 ms -[06/10/2021-17:42:06] [I] max: 24.5842 ms -[06/10/2021-17:42:06] [I] mean: 14.5182 ms -[06/10/2021-17:42:06] [I] median: 14.4042 ms -[06/10/2021-17:42:06] [I] percentile: 15.7213 ms at 99% -[06/10/2021-17:42:06] [I] total compute time: 3.03431 s - -``` -batch16 t4单卡吞吐率:1000/(14.5182/16)=1102.065fps - -batch4性能: -``` -trtexec --onnx=resnet101.onnx --fp16 --shapes=image:4x3x224x224 --threads -``` -``` -[06/11/2021-12:47:51] [I] GPU Compute -[06/11/2021-12:47:51] [I] min: 4.27863 ms -[06/11/2021-12:47:51] [I] max: 6.56378 ms -[06/11/2021-12:47:51] [I] mean: 4.52613 ms -[06/11/2021-12:47:51] [I] median: 4.49536 ms -[06/11/2021-12:47:51] [I] percentile: 6.54581 ms at 99% -[06/11/2021-12:47:51] [I] total compute time: 3.00535 s - -``` -batch4 t4单卡吞吐率:1000/(4.52613/4)=883.7572054fps - -batch8性能: -``` -trtexec --onnx=resnet101.onnx --fp16 --shapes=image:8x3x224x224 --threads -``` -``` -[06/11/2021-12:49:50] [I] GPU Compute -[06/11/2021-12:49:50] [I] min: 7.38504 ms -[06/11/2021-12:49:50] [I] max: 8.36267 ms -[06/11/2021-12:49:50] [I] mean: 7.73195 ms -[06/11/2021-12:49:50] [I] median: 7.68652 ms -[06/11/2021-12:49:50] [I] percentile: 8.33948 ms at 99% -[06/11/2021-12:49:50] [I] total compute time: 3.00773 s - -``` -batch8 t4单卡吞吐率:1000/(7.73195/8)=1034.667839fps - -batch32性能: -``` -trtexec --onnx=resnet101.onnx --fp16 --shapes=image:32x3x224x224 --threads -``` -``` -[06/11/2021-12:52:51] [I] GPU Compute -[06/11/2021-12:52:51] [I] min: 24.7151 ms -[06/11/2021-12:52:51] [I] max: 34.8919 ms -[06/11/2021-12:52:51] [I] mean: 25.7435 ms -[06/11/2021-12:52:51] [I] median: 25.4695 ms -[06/11/2021-12:52:51] [I] percentile: 33.3713 ms at 99% -[06/11/2021-12:52:51] [I] total compute time: 3.03773 s - -``` -batch32 t4单卡吞吐率:1000/(25.7435/32)=1243.032222fps - -### 7.3 性能对比 -batch1:247.898x4 > 1000/(2.1015/1) -batch16:401.046x4 > 1000/(14.5182/16) -batch4,8,32的npu性能也都大于T4 -310单个device的吞吐率乘4即单卡吞吐率比T4单卡的吞吐率大,故310性能高于T4性能,性能达标。 -对于batch1的310性能高于T4性能2.08倍,batch16的310性能高于T4性能1.46倍,对于batch1与batch16,310性能均高于T4性能1.2倍,该模型放在Benchmark/cv/classification目录下。 - -710单卡吞吐率要求最优batchsize情况下为310的1.5倍,当前已符合要求,具体数据不在此赘述。 - +# ResNet101 Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx模型量化](#32-onnx模型量化) + - [3.3 onnx转om模型](#33-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) + - [6.2 开源TopN精度](#62-开源TopN精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + - [7.2 T4性能数据](#72-T4性能数据) + - [7.3 性能对比](#73-性能对比) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[ResNet101论文](https://arxiv.org/pdf/1512.03385.pdf) + +### 1.2 代码地址 +[ResNet101代码](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) +branch:master +commit_id:7d955df73fe0e9b47f7d6c77c699324b256fc41f + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +CANN 5.0.4 + +torch == 1.5.1 +torchvision == 0.6.1 +onnx == 1.9.0 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.19.2 +Pillow == 8.2.0 +opencv-python == 4.5.2 +``` + +**说明:** +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 + +1.下载pth权重文件 +请参考[pytorch原始仓](https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py)给出的ResNet101权重文件下载地址获取权重文件:resnet101-63fe2227.pth + +2.ResNet101模型代码在torchvision里,安装torchvision,arm下需源码安装,参考torchvision官网,若安装过程报错请百度解决 + +``` +git clone https://github.com/pytorch/vision +cd vision +python3.7 setup.py install +cd .. +``` +3.编写pth2onnx脚本resnet101_pth2onnx.py + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + +4.执行pth2onnx脚本,生成onnx模型文件 +``` +python3.7 resnet101_pth2onnx.py ./resnet101-63fe2227.pth resnet101.onnx +``` + + **模型转换要点:** +>此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 + +### 3.2 onnx模型量化 + +1.AMCT工具包安装,具体参考《[CANN 开发辅助工具指南 01](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools)》中的昇腾模型压缩工具使用指南(ONNX)章节; + +2.生成bin格式数据集,数据集用于校正量化因子。当前模型为动态batch,建议使用较大的batch size: + +``` +python3.7 gen_calibration_bin.py resnet /root/datasets/imagenet/val ./calibration_bin 32 1 +``` + +参数说明: + +- resnet:模型类型 +- /root/datasets/imagenet/val:模型使用的数据集路径; +- ./calibration_bin:生成的bin格式数据集路径; +- 32:batch size; +- 1:batch num。 + +3.ONNX模型量化 + +``` +amct_onnx calibration --model resnet101.onnx --save_path ./result/resnet101 --input_shape "image:32,3,224,224" --data_dir "./calibration_bin" --data_types "float32" +``` + +会在result目录下生成resnet101_deploy_model.onnx量化模型 + +4.量化模型后续的推理验证流程和非量化一致。 + +### 3.3 onnx转om模型 + +1.设置环境变量 + +``` +source env.sh +``` +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考《[CANN 开发辅助工具指南 01](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools)》中的ATC工具使用指南章节 + +``` +atc --framework=5 --model=./resnet101.onnx --output=resnet101_bs16 --input_format=NCHW --input_shape="image:16,3,224,224" --log=debug --soc_version=Ascend310 --insert_op_conf=aipp.config +``` + +**说明:** + +> 若设备类型为Ascend710,设置soc_version==Ascend710即可; +> +> aipp.config是AIPP工具数据集预处理配置文件,详细说明可参考"ATC工具使用指南"中的"AIPP配置"章节。 + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +该模型使用ImageNet的5万张验证集进行测试,图片与标签分别存放在/root/datasets/imagenet/val与/root/datasets/imagenet/val_label.txt。 + +### 4.2 数据集预处理 + +1.预处理脚本imagenet_torch_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 +``` +python3.7 imagenet_torch_preprocess.py resnet /root/datasets/imagenet/val ./prep_dataset +``` +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本gen_dataset_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 +``` +python3.7 gen_dataset_info.py bin ./prep_dataset ./resnet101_prep_bin.info 224 224 +``` +第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310、710上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考《[CANN 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools)》 +### 5.2 离线推理 +1.设置环境变量 +``` +source env.sh +``` +2.执行离线推理 +``` +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=resnet101_bs16.om -input_text_path=./resnet101_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False +``` +输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 + +## 6 精度对比 + +- **[离线推理TopN精度](#61-离线推理TopN精度)** +- **[开源TopN精度](#62-开源TopN精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理TopN精度统计 + +后处理统计TopN精度 + +调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 +``` +python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /root/datasets/imagenet/val_label.txt ./ result.json +``` +第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 +查看输出结果: +``` +{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "77.37%"}, {"key": "Top2 accuracy", "value": "87.1%"}, {"key": "Top3 accuracy", "value": "90.61%"}, {"key": "Top4 accuracy", "value": "92.42%"}, {"key": "Top5 accuracy", "value": "93.54%"}]} +``` +经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 + +### 6.2 开源TopN精度 +[torchvision官网精度](https://pytorch.org/vision/stable/models.html) +``` +Model Acc@1 Acc@5 +ResNet-101 77.374 93.546 +``` +### 6.3 精度对比 +将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + **精度调试:** +>没有遇到精度不达标的问题,故不需要进行精度调试 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** +- **[T4性能数据](#72-T4性能数据)** +- **[性能对比](#73-性能对比)** + +### 7.1 npu性能数据 +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 +1.benchmark工具在整个数据集上推理获得性能数据 (Ascend310) +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: +``` +[e2e] throughputRate: 170.628, latency: 293035 +[data read] throughputRate: 181.571, moduleLatency: 5.50749 +[preprocess] throughputRate: 180.466, moduleLatency: 5.5412 +[infer] throughputRate: 171.595, Interface throughputRate: 247.898, moduleLatency: 5.12562 +[post] throughputRate: 171.595, moduleLatency: 5.82768 +``` +Interface throughputRate: 247.898,247.898x4=991.592既是batch1 310单卡吞吐率 +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: +``` +[e2e] throughputRate: 185.903, latency: 268957 +[data read] throughputRate: 191.266, moduleLatency: 5.22833 +[preprocess] throughputRate: 190.761, moduleLatency: 5.24217 +[infer] throughputRate: 187.131, Interface throughputRate: 401.046, moduleLatency: 3.94051 +[post] throughputRate: 11.6954, moduleLatency: 85.5035 +``` +Interface throughputRate: 401.046,401.046x4=1604.184既是batch16 310单卡吞吐率 +batch4的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_4_device_0.txt: +``` +[e2e] throughputRate: 184.444, latency: 271085 +[data read] throughputRate: 196.412, moduleLatency: 5.09134 +[preprocess] throughputRate: 195.837, moduleLatency: 5.1063 +[infer] throughputRate: 185.624, Interface throughputRate: 331.096, moduleLatency: 4.52436 +[post] throughputRate: 46.4056, moduleLatency: 21.5491 +``` +Interface throughputRate: 331.096,331.096x4=1324.384既是batch4 310单卡吞吐率 +batch8的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_8_device_0.txt: +``` +[e2e] throughputRate: 196.051, latency: 255036 +[data read] throughputRate: 209.29, moduleLatency: 4.77806 +[preprocess] throughputRate: 207.914, moduleLatency: 4.80969 +[infer] throughputRate: 197.513, Interface throughputRate: 371.905, moduleLatency: 4.15513 +[post] throughputRate: 24.6888, moduleLatency: 40.5042 +``` +Interface throughputRate: 371.905,371.905x4=1487.62既是batch8 310单卡吞吐率 +batch32的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_32_device_0.txt: +``` +[e2e] throughputRate: 176.215, latency: 283744 +[data read] throughputRate: 187.024, moduleLatency: 5.34691 +[preprocess] throughputRate: 186.183, moduleLatency: 5.37105 +[infer] throughputRate: 177.675, Interface throughputRate: 370.456, moduleLatency: 4.14361 +[post] throughputRate: 5.55402, moduleLatency: 180.05 + +``` +Interface throughputRate: 370.456,370.456x4=1481.82既是batch32 310单卡吞吐率 + + **说明:** + +> 注意如果设备为Ascend710,则Interface throughputRate的值就是710的单卡吞吐率,不需要像310那样x4 + +### 7.2 T4性能数据 +在装有T4卡的服务器上测试gpu性能,测试过程请确保卡没有运行其他任务,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2 +batch1性能: +``` +trtexec --onnx=resnet101.onnx --fp16 --shapes=image:1x3x224x224 --threads +``` +gpu T4是4个device并行执行的结果,mean是时延(tensorrt的时延是batch个数据的推理时间),即吞吐率的倒数乘以batch +``` +[06/10/2021-17:40:51] [I] GPU Compute +[06/10/2021-17:40:51] [I] min: 2.01935 ms +[06/10/2021-17:40:51] [I] max: 3.53485 ms +[06/10/2021-17:40:51] [I] mean: 2.1015 ms +[06/10/2021-17:40:51] [I] median: 2.07254 ms +[06/10/2021-17:40:51] [I] percentile: 3.52882 ms at 99% +[06/10/2021-17:40:51] [I] total compute time: 2.99674 s + +``` +batch1 t4单卡吞吐率:1000/(2.1015/1)=475.851fps + +batch16性能: +``` +trtexec --onnx=resnet101.onnx --fp16 --shapes=image:16x3x224x224 --threads +``` +``` +[06/10/2021-17:42:06] [I] GPU Compute +[06/10/2021-17:42:06] [I] min: 13.8094 ms +[06/10/2021-17:42:06] [I] max: 24.5842 ms +[06/10/2021-17:42:06] [I] mean: 14.5182 ms +[06/10/2021-17:42:06] [I] median: 14.4042 ms +[06/10/2021-17:42:06] [I] percentile: 15.7213 ms at 99% +[06/10/2021-17:42:06] [I] total compute time: 3.03431 s + +``` +batch16 t4单卡吞吐率:1000/(14.5182/16)=1102.065fps + +batch4性能: +``` +trtexec --onnx=resnet101.onnx --fp16 --shapes=image:4x3x224x224 --threads +``` +``` +[06/11/2021-12:47:51] [I] GPU Compute +[06/11/2021-12:47:51] [I] min: 4.27863 ms +[06/11/2021-12:47:51] [I] max: 6.56378 ms +[06/11/2021-12:47:51] [I] mean: 4.52613 ms +[06/11/2021-12:47:51] [I] median: 4.49536 ms +[06/11/2021-12:47:51] [I] percentile: 6.54581 ms at 99% +[06/11/2021-12:47:51] [I] total compute time: 3.00535 s + +``` +batch4 t4单卡吞吐率:1000/(4.52613/4)=883.7572054fps + +batch8性能: +``` +trtexec --onnx=resnet101.onnx --fp16 --shapes=image:8x3x224x224 --threads +``` +``` +[06/11/2021-12:49:50] [I] GPU Compute +[06/11/2021-12:49:50] [I] min: 7.38504 ms +[06/11/2021-12:49:50] [I] max: 8.36267 ms +[06/11/2021-12:49:50] [I] mean: 7.73195 ms +[06/11/2021-12:49:50] [I] median: 7.68652 ms +[06/11/2021-12:49:50] [I] percentile: 8.33948 ms at 99% +[06/11/2021-12:49:50] [I] total compute time: 3.00773 s + +``` +batch8 t4单卡吞吐率:1000/(7.73195/8)=1034.667839fps + +batch32性能: +``` +trtexec --onnx=resnet101.onnx --fp16 --shapes=image:32x3x224x224 --threads +``` +``` +[06/11/2021-12:52:51] [I] GPU Compute +[06/11/2021-12:52:51] [I] min: 24.7151 ms +[06/11/2021-12:52:51] [I] max: 34.8919 ms +[06/11/2021-12:52:51] [I] mean: 25.7435 ms +[06/11/2021-12:52:51] [I] median: 25.4695 ms +[06/11/2021-12:52:51] [I] percentile: 33.3713 ms at 99% +[06/11/2021-12:52:51] [I] total compute time: 3.03773 s + +``` +batch32 t4单卡吞吐率:1000/(25.7435/32)=1243.032222fps + +### 7.3 性能对比 +batch1:247.898x4 > 1000/(2.1015/1) +batch16:401.046x4 > 1000/(14.5182/16) +batch4,8,32的npu性能也都大于T4 +310单个device的吞吐率乘4即单卡吞吐率比T4单卡的吞吐率大,故310性能高于T4性能,性能达标。 +对于batch1的310性能高于T4性能2.08倍,batch16的310性能高于T4性能1.46倍,对于batch1与batch16,310性能均高于T4性能1.2倍,该模型放在Benchmark/cv/classification目录下。 + +710单卡吞吐率要求最优batchsize情况下为310的1.5倍,当前已符合要求,具体数据不在此赘述。 + diff --git a/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/gen_dataset_info.py b/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/gen_dataset_info.py index 61450b4410663ae5e66ec29ed296ff6584203e31..5381839f653a885666e3fc456db9a1c22b8583a1 100644 --- a/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/gen_dataset_info.py +++ b/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/gen_dataset_info.py @@ -1,61 +1,61 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) + diff --git a/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/imagenet_acc_eval.py b/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/imagenet_acc_eval.py index 362f2484e8288dd3df6fa212678dc9449dbbed29..583340a19f2fc6e99faed85526c906f8bd12d7ba 100644 --- a/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/imagenet_acc_eval.py +++ b/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/imagenet_acc_eval.py @@ -1,184 +1,184 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - if data == '': - n_label = 0 - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - print("Time used:", elapsed) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + if data == '': + n_label = 0 + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + print("Time used:", elapsed) + diff --git a/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/imagenet_torch_preprocess.py b/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/imagenet_torch_preprocess.py index ed4b4e2a5380e3ee608287596412963d92b9bb79..6f89d347b52642cc3a1fc8ba73dae2d4230e7a0c 100644 --- a/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/imagenet_torch_preprocess.py +++ b/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/imagenet_torch_preprocess.py @@ -1,113 +1,113 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from PIL import Image -import numpy as np -import multiprocessing - - -model_config = { - 'resnet': { - 'resize': 256, - 'centercrop': 224, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv3': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv4': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.5, 0.5, 0.5], - 'std': [0.5, 0.5, 0.5], - }, -} - - -def center_crop(img, output_size): - if isinstance(output_size, int): - output_size = (int(output_size), int(output_size)) - image_width, image_height = img.size - crop_height, crop_width = output_size - crop_top = int(round((image_height - crop_height) / 2.)) - crop_left = int(round((image_width - crop_width) / 2.)) - return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) - - -def resize(img, size, interpolation=Image.BILINEAR): - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def gen_input_bin(mode_type, file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - # RGBA to RGB - image = Image.open(os.path.join(src_path, file)).convert('RGB') - image = resize(image, model_config[mode_type]['resize']) # Resize - image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop - img = np.array(image, dtype=np.int8) - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - - -def preprocess(mode_type, src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") - mode_type = sys.argv[1] - src_path = sys.argv[2] - save_path = sys.argv[3] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - if mode_type not in model_config: - model_type_help = "model type: " - for key in model_config.keys(): - model_type_help += key - model_type_help += ' ' - raise Exception(model_type_help) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - preprocess(mode_type, src_path, save_path) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from PIL import Image +import numpy as np +import multiprocessing + + +model_config = { + 'resnet': { + 'resize': 256, + 'centercrop': 224, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv3': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv4': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + }, +} + + +def center_crop(img, output_size): + if isinstance(output_size, int): + output_size = (int(output_size), int(output_size)) + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) + + +def resize(img, size, interpolation=Image.BILINEAR): + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def gen_input_bin(mode_type, file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + # RGBA to RGB + image = Image.open(os.path.join(src_path, file)).convert('RGB') + image = resize(image, model_config[mode_type]['resize']) # Resize + image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop + img = np.array(image, dtype=np.int8) + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + + +def preprocess(mode_type, src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") + mode_type = sys.argv[1] + src_path = sys.argv[2] + save_path = sys.argv[3] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + if mode_type not in model_config: + model_type_help = "model type: " + for key in model_config.keys(): + model_type_help += key + model_type_help += ' ' + raise Exception(model_type_help) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + preprocess(mode_type, src_path, save_path) + diff --git a/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/requirements.txt b/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/requirements.txt index 2fc4e802c476feda2a9866a85630f7f3b29428d7..d072d9aa6f2e7a7b0044ff93d036c3c0347ee5c9 100644 --- a/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/requirements.txt +++ b/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.5.1 -torchvision == 0.6.1 -onnx == 1.9.0 -numpy == 1.19.2 -Pillow == 8.2.0 +torch == 1.5.1 +torchvision == 0.6.1 +onnx == 1.9.0 +numpy == 1.19.2 +Pillow == 8.2.0 opencv-python == 4.5.2 \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/resnet101_pth2onnx.py b/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/resnet101_pth2onnx.py index 46ab195411a21bf39c1d67d864a72ef0e7f9310f..8eff59a68086db8153e345f4956f018710ffaf7c 100644 --- a/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/resnet101_pth2onnx.py +++ b/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer/resnet101_pth2onnx.py @@ -1,35 +1,35 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -import torch.onnx -import torchvision.models as models - -def pth2onnx(input_file, output_file): - model = models.resnet101(pretrained=False) - checkpoint = torch.load(input_file, map_location=None) - model.load_state_dict(checkpoint) - - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) - -if __name__ == "__main__": - input_file = sys.argv[1] - output_file = sys.argv[2] - pth2onnx(input_file, output_file) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import torch.onnx +import torchvision.models as models + +def pth2onnx(input_file, output_file): + model = models.resnet101(pretrained=False) + checkpoint = torch.load(input_file, map_location=None) + model.load_state_dict(checkpoint) + + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) + +if __name__ == "__main__": + input_file = sys.argv[1] + output_file = sys.argv[2] + pth2onnx(input_file, output_file) diff --git a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/LICENSE b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/LICENSE +++ b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/README.md b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/README.md index 9495982f9e63226d3fa8b3b0d32444de0003df2d..733721a8b2a48485a92585081f0d96b936b4ed66 100644 --- a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/README.md +++ b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/README.md @@ -1,156 +1,156 @@ -# ResNet18 Onnx模型端到端推理指导 - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[ResNet18论文](https://arxiv.org/pdf/1512.03385.pdf) - -### 1.2 代码地址 -[ResNet18代码](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) -branch:master -commit_id:7d955df73fe0e9b47f7d6c77c699324b256fc41f - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -CANN 5.0.3 - -torch == 1.5.1 -torchvision == 0.6.1 -onnx == 1.9.0 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.19.2 -Pillow == 8.2.0 -opencv-python == 4.5.2 -``` - -**说明:** -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 数据集预处理 - -- **[数据集获取](#31-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 3.1 数据集获取 -该模型使用ImageNet的5万张验证集进行测试,图片与标签分别存放在/root/datasets/imagenet/val与/root/datasets/imagenet/val_label.txt。 - -### 3.2 数据集预处理 -1.预处理脚本imagenet_torch_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 -``` -python3.7 imagenet_torch_preprocess.py resnet /root/datasets/imagenet/val ./prep_dataset -``` -### 3.3 生成数据集信息文件 -1.生成数据集信息文件脚本gen_dataset_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 -``` -python3.7 gen_dataset_info.py bin ./prep_dataset ./resnet18_prep_bin.info 224 224 -``` -第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 - -## 4 模型推理 - -- **[pth转onnx模型](#41-pth转onnx模型)** - -- **[onnx转om模型](#42-onnx转om模型)** - -### 4.1 pth转onnx模型 - -1.下载pth权重文件 -从https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py的第26行获取对应权重下载链接,使用wget命令下载对应权重 - -文件MD5sum:e0b1c919e74f9a193d36871d9964bf7d - -2.ResNet18模型代码在torchvision里,安装torchvision,arm下需源码安装,参考torchvision官网 -``` -git clone https://github.com/pytorch/vision -cd vision -python3.7 setup.py install -cd .. -``` -3.编写pth2onnx脚本resnet18_pth2onnx.py - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - -3.执行pth2onnx脚本,生成onnx模型文件 -``` -python3.7 resnet18_pth2onnx.py ./resnet18-f37072fd.pth resnet18.onnx -``` -### 4.2 onnx模型量化(可选) -1.AMCT工具包安装,具体参考[CANN 开发辅助工具指南 (推理)](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools) - -2.数据预处理,用于量化因子矫正。当前模型为动态batch,建议用多batch_size的预处理文件矫正量化因子。 -执行以下命令: -``` -python3.7.5 calibration_bin.py prep_dataset calibration_bin 64 -``` - -3.ONNX模型量化,具体参考[CANN 开发辅助工具指南 (推理)](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools) -在result目录下生成resnet18_deploy_model.onnx量化模型 - -4.量化模型验证,除onnx离线模型转换om模型命令有区别外,其余一致 - -### 4.3 onnx转om模型 - -1.设置环境变量 -``` -source env.sh -``` -**说明** ->此脚本中环境变量只供参考,请以实际安装环境配置环境变量 - -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN 开发辅助工具指南 (推理)](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools) -``` -atc --framework=5 --model=./resnet18.onnx --output=resnet18_bs1 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 --insert_op_conf=aipp.config --enable_small_channel=1 - - -## Int8量化(可选) -atc --framework=5 --model=./result/resnet18_deploy_model.onnx --output=resnet18_bs1_int8 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend710 --insert_op_conf=aipp.config --enable_small_channel=1 -``` - -### 4.4 模型离线推理 - -1.设置环境变量 -``` -source env.sh -``` -**说明** ->此脚本中环境变量只供参考,请以实际安装环境配置环境变量 - -2.增加benchmark.{arch}可执行权限 -``` -chmod u+x benchmark.x86_64 -``` - -3.执行离线推理 -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=resnet18_bs1.om -input_text_path=./resnet18_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False -执行./benchmark.x86_64工具请选择与运行环境架构相同的命令。详情参考[CANN 推理benchmark工具用户指南](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools) - -4.精度验证 -调用imagenet_acc_eval.py脚本与数据集标签val_label.txt比对,可以获得Accuracy Top5数据,结果保存在result.json中。 -``` -python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /home/HwHiAiUser/dataset/imagenet/val_label.txt ./ result.json -``` +# ResNet18 Onnx模型端到端推理指导 + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[ResNet18论文](https://arxiv.org/pdf/1512.03385.pdf) + +### 1.2 代码地址 +[ResNet18代码](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) +branch:master +commit_id:7d955df73fe0e9b47f7d6c77c699324b256fc41f + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +CANN 5.0.3 + +torch == 1.5.1 +torchvision == 0.6.1 +onnx == 1.9.0 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.19.2 +Pillow == 8.2.0 +opencv-python == 4.5.2 +``` + +**说明:** +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 数据集预处理 + +- **[数据集获取](#31-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 3.1 数据集获取 +该模型使用ImageNet的5万张验证集进行测试,图片与标签分别存放在/root/datasets/imagenet/val与/root/datasets/imagenet/val_label.txt。 + +### 3.2 数据集预处理 +1.预处理脚本imagenet_torch_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 +``` +python3.7 imagenet_torch_preprocess.py resnet /root/datasets/imagenet/val ./prep_dataset +``` +### 3.3 生成数据集信息文件 +1.生成数据集信息文件脚本gen_dataset_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 +``` +python3.7 gen_dataset_info.py bin ./prep_dataset ./resnet18_prep_bin.info 224 224 +``` +第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 + +## 4 模型推理 + +- **[pth转onnx模型](#41-pth转onnx模型)** + +- **[onnx转om模型](#42-onnx转om模型)** + +### 4.1 pth转onnx模型 + +1.下载pth权重文件 +从https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py的第26行获取对应权重下载链接,使用wget命令下载对应权重 + +文件MD5sum:e0b1c919e74f9a193d36871d9964bf7d + +2.ResNet18模型代码在torchvision里,安装torchvision,arm下需源码安装,参考torchvision官网 +``` +git clone https://github.com/pytorch/vision +cd vision +python3.7 setup.py install +cd .. +``` +3.编写pth2onnx脚本resnet18_pth2onnx.py + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + +3.执行pth2onnx脚本,生成onnx模型文件 +``` +python3.7 resnet18_pth2onnx.py ./resnet18-f37072fd.pth resnet18.onnx +``` +### 4.2 onnx模型量化(可选) +1.AMCT工具包安装,具体参考[CANN 开发辅助工具指南 (推理)](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools) + +2.数据预处理,用于量化因子矫正。当前模型为动态batch,建议用多batch_size的预处理文件矫正量化因子。 +执行以下命令: +``` +python3.7.5 calibration_bin.py prep_dataset calibration_bin 64 +``` + +3.ONNX模型量化,具体参考[CANN 开发辅助工具指南 (推理)](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools) +在result目录下生成resnet18_deploy_model.onnx量化模型 + +4.量化模型验证,除onnx离线模型转换om模型命令有区别外,其余一致 + +### 4.3 onnx转om模型 + +1.设置环境变量 +``` +source env.sh +``` +**说明** +>此脚本中环境变量只供参考,请以实际安装环境配置环境变量 + +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN 开发辅助工具指南 (推理)](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools) +``` +atc --framework=5 --model=./resnet18.onnx --output=resnet18_bs1 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 --insert_op_conf=aipp.config --enable_small_channel=1 + + +## Int8量化(可选) +atc --framework=5 --model=./result/resnet18_deploy_model.onnx --output=resnet18_bs1_int8 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend710 --insert_op_conf=aipp.config --enable_small_channel=1 +``` + +### 4.4 模型离线推理 + +1.设置环境变量 +``` +source env.sh +``` +**说明** +>此脚本中环境变量只供参考,请以实际安装环境配置环境变量 + +2.增加benchmark.{arch}可执行权限 +``` +chmod u+x benchmark.x86_64 +``` + +3.执行离线推理 +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=resnet18_bs1.om -input_text_path=./resnet18_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False +执行./benchmark.x86_64工具请选择与运行环境架构相同的命令。详情参考[CANN 推理benchmark工具用户指南](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools) + +4.精度验证 +调用imagenet_acc_eval.py脚本与数据集标签val_label.txt比对,可以获得Accuracy Top5数据,结果保存在result.json中。 +``` +python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /home/HwHiAiUser/dataset/imagenet/val_label.txt ./ result.json +``` 第一个参数为生成推理结果所在路径,第二个参数为标签数据,第三个参数为生成结果文件路径,第四个参数为生成结果文件名称 \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/calibration_bin.py b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/calibration_bin.py index 0af890d1323d9d200b92dd38293c697d2c7e29d5..0839502aa7b2340f29102533a53c45c13367c039 100644 --- a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/calibration_bin.py +++ b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/calibration_bin.py @@ -1,56 +1,56 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import numpy as np -import multiprocessing - -max_bin=10 -def preprocess(src_path, save_path, batch_size): - files = os.listdir(src_path) - - output_data = [0] - for i, file in enumerate(files): - input_data = np.fromfile(os.path.join(src_path, file), dtype=np.float32) - input_data = input_data.reshape(1, 3, 224, 224) - - if i % batch_size == 0: - output_data = input_data - else: - output_data = np.concatenate((output_data, input_data), axis=0) - - # only save 10 bin files - loop_id = (i + 1) // batch_size - if loop_id > max_bin: - break - - if (i + 1) % batch_size == 0: - output_data.tofile("{}/img_{}_bs{}.bin".format(save_path, loop_id, batch_size)) - output_data = [0] - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python3 xxx.py [src_path] [save_path] [batch_size]") - src_path = sys.argv[1] - save_path = sys.argv[2] - batch_size = int(sys.argv[3]) - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - preprocess(src_path, save_path, batch_size) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import multiprocessing + +max_bin=10 +def preprocess(src_path, save_path, batch_size): + files = os.listdir(src_path) + + output_data = [0] + for i, file in enumerate(files): + input_data = np.fromfile(os.path.join(src_path, file), dtype=np.float32) + input_data = input_data.reshape(1, 3, 224, 224) + + if i % batch_size == 0: + output_data = input_data + else: + output_data = np.concatenate((output_data, input_data), axis=0) + + # only save 10 bin files + loop_id = (i + 1) // batch_size + if loop_id > max_bin: + break + + if (i + 1) % batch_size == 0: + output_data.tofile("{}/img_{}_bs{}.bin".format(save_path, loop_id, batch_size)) + output_data = [0] + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python3 xxx.py [src_path] [save_path] [batch_size]") + src_path = sys.argv[1] + save_path = sys.argv[2] + batch_size = int(sys.argv[3]) + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + preprocess(src_path, save_path, batch_size) + diff --git a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/gen_dataset_info.py b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/gen_dataset_info.py index 61450b4410663ae5e66ec29ed296ff6584203e31..5381839f653a885666e3fc456db9a1c22b8583a1 100644 --- a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/gen_dataset_info.py +++ b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/gen_dataset_info.py @@ -1,61 +1,61 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) + diff --git a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/imagenet_acc_eval.py b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/imagenet_acc_eval.py index 362f2484e8288dd3df6fa212678dc9449dbbed29..583340a19f2fc6e99faed85526c906f8bd12d7ba 100644 --- a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/imagenet_acc_eval.py +++ b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/imagenet_acc_eval.py @@ -1,184 +1,184 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - if data == '': - n_label = 0 - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - print("Time used:", elapsed) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + if data == '': + n_label = 0 + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + print("Time used:", elapsed) + diff --git a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/imagenet_torch_preprocess.py b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/imagenet_torch_preprocess.py index 4787c75a5e8cb10b4be97bc439dab8bb91501e9b..2cdb4994cacf5cd5c22f8944f28ff99f8d1a7de6 100644 --- a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/imagenet_torch_preprocess.py +++ b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/imagenet_torch_preprocess.py @@ -1,114 +1,114 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from PIL import Image -import numpy as np -import multiprocessing - - -model_config = { - 'resnet': { - 'resize': 256, - 'centercrop': 224, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv3': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv4': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.5, 0.5, 0.5], - 'std': [0.5, 0.5, 0.5], - }, -} - - -def center_crop(img, output_size): - if isinstance(output_size, int): - output_size = (int(output_size), int(output_size)) - image_width, image_height = img.size - crop_height, crop_width = output_size - crop_top = int(round((image_height - crop_height) / 2.)) - crop_left = int(round((image_width - crop_width) / 2.)) - return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) - - -def resize(img, size, interpolation=Image.BILINEAR): - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def gen_input_bin(mode_type, file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - # RGBA to RGB - image = Image.open(os.path.join(src_path, file)).convert('RGB') - image = resize(image, model_config[mode_type]['resize']) # Resize - image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop - img = np.array(image, dtype=np.int8) - - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - - -def preprocess(mode_type, src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") - mode_type = sys.argv[1] - src_path = sys.argv[2] - save_path = sys.argv[3] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - if mode_type not in model_config: - model_type_help = "model type: " - for key in model_config.keys(): - model_type_help += key - model_type_help += ' ' - raise Exception(model_type_help) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - preprocess(mode_type, src_path, save_path) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from PIL import Image +import numpy as np +import multiprocessing + + +model_config = { + 'resnet': { + 'resize': 256, + 'centercrop': 224, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv3': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv4': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + }, +} + + +def center_crop(img, output_size): + if isinstance(output_size, int): + output_size = (int(output_size), int(output_size)) + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) + + +def resize(img, size, interpolation=Image.BILINEAR): + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def gen_input_bin(mode_type, file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + # RGBA to RGB + image = Image.open(os.path.join(src_path, file)).convert('RGB') + image = resize(image, model_config[mode_type]['resize']) # Resize + image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop + img = np.array(image, dtype=np.int8) + + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + + +def preprocess(mode_type, src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") + mode_type = sys.argv[1] + src_path = sys.argv[2] + save_path = sys.argv[3] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + if mode_type not in model_config: + model_type_help = "model type: " + for key in model_config.keys(): + model_type_help += key + model_type_help += ' ' + raise Exception(model_type_help) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + preprocess(mode_type, src_path, save_path) + diff --git a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/modelzoo_level.txt b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/modelzoo_level.txt index 484664c2399ae4109859a67aba6cb9facff03cf1..55a9add9fa74832ca908108d73946cd76281a9cd 100644 --- a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/modelzoo_level.txt +++ b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:POK \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/requirements.txt b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/requirements.txt index 2fc4e802c476feda2a9866a85630f7f3b29428d7..d072d9aa6f2e7a7b0044ff93d036c3c0347ee5c9 100644 --- a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/requirements.txt +++ b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.5.1 -torchvision == 0.6.1 -onnx == 1.9.0 -numpy == 1.19.2 -Pillow == 8.2.0 +torch == 1.5.1 +torchvision == 0.6.1 +onnx == 1.9.0 +numpy == 1.19.2 +Pillow == 8.2.0 opencv-python == 4.5.2 \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/resnet18_pth2onnx.py b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/resnet18_pth2onnx.py index f7972ea3e1ecb2a5adee7e400d77ca66dec258ce..5933787407e4b3f92af4a90ad32c21d52704b04d 100644 --- a/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/resnet18_pth2onnx.py +++ b/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch/resnet18_pth2onnx.py @@ -1,35 +1,35 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -import torch.onnx -import torchvision.models as models - -def pth2onnx(input_file, output_file): - model = models.resnet18(pretrained=False) - checkpoint = torch.load(input_file, map_location=None) - model.load_state_dict(checkpoint) - - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) - -if __name__ == "__main__": - input_file = sys.argv[1] - output_file = sys.argv[2] - pth2onnx(input_file, output_file) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import torch.onnx +import torchvision.models as models + +def pth2onnx(input_file, output_file): + model = models.resnet18(pretrained=False) + checkpoint = torch.load(input_file, map_location=None) + model.load_state_dict(checkpoint) + + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) + +if __name__ == "__main__": + input_file = sys.argv[1] + output_file = sys.argv[2] + pth2onnx(input_file, output_file) diff --git a/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer/aipp_resnet50_710.aippconfig b/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer/aipp_resnet50_710.aippconfig index 71e0923f2ae25bb4ece78356a9bd9ee865f6bcc0..173c2d80353dc9de5b252a0b612cec5cde113361 100644 --- a/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer/aipp_resnet50_710.aippconfig +++ b/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer/aipp_resnet50_710.aippconfig @@ -1,20 +1,20 @@ -aipp_op{ - aipp_mode:static - input_format : RGB888_U8 - - src_image_size_w : 256 - src_image_size_h : 256 - - crop: true - load_start_pos_h : 16 - load_start_pos_w : 16 - crop_size_w : 224 - crop_size_h: 224 - - min_chn_0 : 123.675 - min_chn_1 : 116.28 - min_chn_2 : 103.53 - var_reci_chn_0: 0.0171247538316637 - var_reci_chn_1: 0.0175070028011204 - var_reci_chn_2: 0.0174291938997821 +aipp_op{ + aipp_mode:static + input_format : RGB888_U8 + + src_image_size_w : 256 + src_image_size_h : 256 + + crop: true + load_start_pos_h : 16 + load_start_pos_w : 16 + crop_size_w : 224 + crop_size_h: 224 + + min_chn_0 : 123.675 + min_chn_1 : 116.28 + min_chn_2 : 103.53 + var_reci_chn_0: 0.0171247538316637 + var_reci_chn_1: 0.0175070028011204 + var_reci_chn_2: 0.0174291938997821 } \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer/gen_dataset_info.py b/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer/gen_dataset_info.py index 0af8797d669db96ae2e7da28cad7d1f69ef8f723..5ed4309f9e2ab6ec9622f832905225277fa3d688 100644 --- a/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer/gen_dataset_info.py +++ b/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer/gen_dataset_info.py @@ -1,61 +1,61 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) + diff --git a/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer/imagenet_torch_preprocess.py b/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer/imagenet_torch_preprocess.py index b715ddd3f5c5b8f9c9f774378510c06e10fb4aed..a34814146feacbe159ce0c05d10abb8baff2d972 100644 --- a/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer/imagenet_torch_preprocess.py +++ b/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer/imagenet_torch_preprocess.py @@ -1,129 +1,129 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from PIL import Image -import numpy as np -import multiprocessing - - -model_config = { - 'resnet': { - 'resize': 256, - 'centercrop': 256, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv3': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv4': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.5, 0.5, 0.5], - 'std': [0.5, 0.5, 0.5], - }, -} - - -def center_crop(img, output_size): - if isinstance(output_size, int): - output_size = (int(output_size), int(output_size)) - image_width, image_height = img.size - crop_height, crop_width = output_size - crop_top = int(round((image_height - crop_height) / 2.)) - crop_left = int(round((image_width - crop_width) / 2.)) - return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) - - -def resize(img, size, interpolation=Image.BILINEAR): - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def gen_input_bin(mode_type, file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - # RGBA to RGB - image = Image.open(os.path.join(src_path, file)).convert('RGB') - image = resize(image, model_config[mode_type]['resize']) # Resize - image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop - img = np.array(image, dtype=np.int8) - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - -def preprocess_s(mode_type, src_path, save_path): - files = os.listdir(src_path) - i = 0 - for file in files: - if not file.endswith(".jpeg"): - continue - print("start to process image {}....".format(file)) - i = i + 1 - print("file", file, "===", i) - path_image = os.path.join(src_path, file) - # RGBA to RGB - image = Image.open(path_image).convert('RGB') - image = resize(image, model_config[mode_type]['resize']) # Resize - image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop - img = np.array(image, dtype=np.int8) - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - -def preprocess(mode_type, src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") - mode_type = sys.argv[1] - src_path = sys.argv[2] - save_path = sys.argv[3] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - if mode_type not in model_config: - model_type_help = "model type: " - for key in model_config.keys(): - model_type_help += key - model_type_help += ' ' - raise Exception(model_type_help) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - preprocess_s(mode_type, src_path, save_path) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from PIL import Image +import numpy as np +import multiprocessing + + +model_config = { + 'resnet': { + 'resize': 256, + 'centercrop': 256, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv3': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv4': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + }, +} + + +def center_crop(img, output_size): + if isinstance(output_size, int): + output_size = (int(output_size), int(output_size)) + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) + + +def resize(img, size, interpolation=Image.BILINEAR): + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def gen_input_bin(mode_type, file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + # RGBA to RGB + image = Image.open(os.path.join(src_path, file)).convert('RGB') + image = resize(image, model_config[mode_type]['resize']) # Resize + image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop + img = np.array(image, dtype=np.int8) + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + +def preprocess_s(mode_type, src_path, save_path): + files = os.listdir(src_path) + i = 0 + for file in files: + if not file.endswith(".jpeg"): + continue + print("start to process image {}....".format(file)) + i = i + 1 + print("file", file, "===", i) + path_image = os.path.join(src_path, file) + # RGBA to RGB + image = Image.open(path_image).convert('RGB') + image = resize(image, model_config[mode_type]['resize']) # Resize + image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop + img = np.array(image, dtype=np.int8) + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + +def preprocess(mode_type, src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") + mode_type = sys.argv[1] + src_path = sys.argv[2] + save_path = sys.argv[3] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + if mode_type not in model_config: + model_type_help = "model type: " + for key in model_config.keys(): + model_type_help += key + model_type_help += ' ' + raise Exception(model_type_help) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + preprocess_s(mode_type, src_path, save_path) + diff --git a/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/README.md b/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/README.md index d9e1c75e635f0b818038f7f385f7ac8b0141483b..803c1aebb3fa8d3b2e0a40a4cee4ba37798a0b64 100644 --- a/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/README.md +++ b/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/README.md @@ -1,234 +1,234 @@ -# SE_ResNet50 Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 推理硬件设备](#21-推理硬件设备) - - [2.2 深度学习框架](#22-深度学习框架) - - [2.3 Python第三方库](#23-Python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 获取pth权重文件](#31-获取pth权重文件) - - [3.2 获取pth权重文件](#32-pth转onnx模型) - - [3.3 pth转om模型](#33-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) - - [5.3 性能验证](#53-性能验证) -- [6 评测结果](#6-评测结果) -- [7 test目录说明](#7-test目录说明) - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[SE_ResNet50论文](https://openaccess.thecvf.com/content_cvpr_2018/papers/Hu_Squeeze-and-Excitation_Networks_CVPR_2018_paper.pdf) - -### 1.2 代码地址 -[SE_ResNet50代码](https://github.com/Cadene/pretrained-models.pytorch/blob/master/pretrainedmodels/models/senet.py) - -## 2 环境说明 - -- **[推理硬件设备](#21-推理硬件设备)** - -- **[深度学习框架](#22-深度学习框架)** - -- **[Python第三方库](#23-Python第三方库)** - -### 2.1 推理硬件设备 -``` -Ascend710 -``` - -### 2.2 深度学习框架 -``` -CANN 5.0.4 - -torch == 1.8.0 -torchvision == 0.9.0 -onnx == 1.10.2 -``` - -### 2.3 Python第三方库 - -``` -numpy == 1.21.4 -opencv-python == 4.5.4.58 -pretrainedmodels == 0.7.4 -``` - -**说明:** -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[获取pth权重文件](#31-获取pth权重文件)** - -- **[pth转onnx模型](#32-pth转onnx模型)** - -- **[onnx转om模型](#33-onnx转om模型)** - -### 3.1 获取pth权重文件 -执行命令: - -``` -wget https://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth -``` -执行后在当前目录下获取pth权重文件:se_resnet50-ce0d4300.pth。 - -### 3.2 pth转onnx模型 -执行命令: - -``` -python3 SE_ResNet50_pth2onnx.py ./se_resnet50-ce0d4300.pth ./se_resnet50_dynamic_bs.onnx -``` - -命令参数分别为输入pth文件:./se_resnet50-ce0d4300.pth和输出onnx文件:./se_resnet50_dynamic_bs.onnx -执行后在当前路径下生成se_resnet50_dynamic_bs.onnx模型文件。 - -### 3.3 onnx转om模型 - -a.设置环境变量: - -``` -source /usr/local/Ascend/ascend-toolkit/set_env.sh -``` - -该命令中使用CANN默认安装路径(/usr/local/Ascend/ascend-toolkit)中的环境变量,使用过程中请按照实际安装路径设置环境变量。 - -b.执行atc模型转换命令: - -``` -atc --model=./se_resnet50_dynamic_bs.onnx --framework=5 --input_format=NCHW --input_shape="image:32,3,224,224" --output=./se_resnet50_fp16_bs32 --log=error --soc_version=Ascend710 --insert_op_conf=./aipp_SE_ResNet50_pth.config --enable_small_channel=1 -``` - -参数说明: - --model:为ONNX模型文件。 - --framework:5代表ONNX模型。 - --input_format:输入数据的格式。 - --input_shape:输入数据的shape。 - --output:输出的OM模型。 - --log:日志级别。 - --soc_version:处理器型号,Ascend310或Ascend710。 - --insert_op_config:插入算子的配置文件路径与文件名,例如aipp预处理算子。 - --enable_small_channel:Set enable small channel. 0(default): disable; 1: enable - -执行后在当前目录下生成om模型文件:se_resnet50_fp16_bs32.om。 - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -该模型使用ImageNet的5万张验证集进行测试,图片与标签分别存放在/home/HwHiAiUser/dataset/ImageNet/val_union路径与/home/HwHiAiUser/dataset/ImageNet/val_label.txt文件下。 - -数据集获取请参考[pytorch原始仓](https://github.com/pytorch/examples/tree/master/imagenet)说明。 - -### 4.2 数据集预处理 - -1.预处理工具为:imagenet_torch_preprocess.py -2.执行工具命令: -``` -python3 ./imagenet_torch_preprocess.py /home/HwHiAiUser/dataset/ImageNet/val_union ./data/ImageNet_bin -``` -命令参数分别数据集图片路径:/home/HwHiAiUser/dataset/ImageNet/val_union和处理结果bin文件保存路径:./data/ImageNet_bin。 -执行后在./data/ImageNet_bin路径下生成数据处理后的bin文件。 - -### 4.3 生成数据集信息文件 -1.生成数据集信息文件工具为:gen_dataset_info.py。 -2.执行工具命令: - -``` -python3 ./gen_dataset_info.py bin ./data/ImageNet_bin ./data/ImageNet_bin.info 224 224 -``` -命令参数分别为数据集文件类型:bin、文件路径:./data/ImageNet_bin、数据集信息文件:./data/ImageNet_bin.info、图片像素长:224、图片像素宽:224。 -执行后在./data路径下生成数据集信息文件:ImageNet_bin.info。 - -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -- **[性能验证](#52-性能验证)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend710上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) -### 5.2 离线推理 -1.设置环境变量: - -``` -source /usr/local/Ascend/ascend-toolkit/set_env.sh -``` - -2.执行推理命令: - -``` -./benchmark.x86_64 -model_type=vision -om_path=./se_resnet50_fp16_bs32.om -device_id=0 -batch_size=32 -input_text_path=./data/ImageNet_bin.info -input_width=256 -input_height=256 -output_binary=false -useDvpp=false -``` - -分辨率(input_width,input_height)要与aipp_SE_ResNet50_pth.config文件中配置(src_image_size_w,src_image_size_h)保持一致,执行后推理结果保存在./result/dumpOutput_device0路径下。 - -3.精度验证: -调用vision_metric_ImageNet.py工具脚本与数据集标签val_label.txt比对,可以获得Accuracy Top5数据: - -``` -python3 ./vision_metric_ImageNet.py ./result/dumpOutput_device0/ /home/HwHiAiUser/dataset/ImageNet/val_label.txt ./result accuracy_result.json -``` - -第一个参数为生成推理结果所在路径,第二个参数为标签数据,第三个参数为生成结果文件路径,第四个参数为生成结果文件名称。 -执行后模型精度结果保存在./result/accuracy_result.json文件中 - -### 5.3 性能验证 -1.设置环境变量: - -``` -source /usr/local/Ascend/ascend-toolkit/set_env.sh -``` - -2.执行性能测试命令: - -``` -./benchmark.x86_64 -round=50 -om_path=./se_resnet50_fp16_bs32.om -device_id=0 -batch_size=32 > ./result/performace_result.json -``` - -执行后性能测试结果保存在./result/performace_result.json文件中 - -## 6 评测结果 - -评测结果 -| 模型 | pth精度 | 710精度 | 性能基准 | 710性能 | -| --------------- | ---------------------- | ------------------------- | ------------ | ----------- | -| SE_ResNet50 bs32 | Acc@1 77.63,Acc@5 93.64| Acc@1 77.36,Acc@5 93.76 | 1554.726fps | 2690.43fps | - -## 6 test目录说明 - -test目录下存放的为测试脚本,其中: -1.pth2om.sh为pth模型转om模型脚本,使用命令为: - -``` -bash ./test/pth2om.sh /usr/local/Ascend -``` - -其中/usr/local/Ascend为cann包默认安装路径,执行后在当前目录下生成om模型: se_resnet50_fp16_bs32.om。 - -2.eval_acc_perf.sh为om模型,精度、性能测试脚本,使用命令为: - -``` -bash ./test/eval_acc_perf.sh /usr/local/Ascend ./se_resnet50_fp16_bs32.om 32 0 /home/HwHiAiUser/dataset/ImageNet/val_label.txt -``` - +# SE_ResNet50 Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 推理硬件设备](#21-推理硬件设备) + - [2.2 深度学习框架](#22-深度学习框架) + - [2.3 Python第三方库](#23-Python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 获取pth权重文件](#31-获取pth权重文件) + - [3.2 获取pth权重文件](#32-pth转onnx模型) + - [3.3 pth转om模型](#33-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) + - [5.3 性能验证](#53-性能验证) +- [6 评测结果](#6-评测结果) +- [7 test目录说明](#7-test目录说明) + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[SE_ResNet50论文](https://openaccess.thecvf.com/content_cvpr_2018/papers/Hu_Squeeze-and-Excitation_Networks_CVPR_2018_paper.pdf) + +### 1.2 代码地址 +[SE_ResNet50代码](https://github.com/Cadene/pretrained-models.pytorch/blob/master/pretrainedmodels/models/senet.py) + +## 2 环境说明 + +- **[推理硬件设备](#21-推理硬件设备)** + +- **[深度学习框架](#22-深度学习框架)** + +- **[Python第三方库](#23-Python第三方库)** + +### 2.1 推理硬件设备 +``` +Ascend710 +``` + +### 2.2 深度学习框架 +``` +CANN 5.0.4 + +torch == 1.8.0 +torchvision == 0.9.0 +onnx == 1.10.2 +``` + +### 2.3 Python第三方库 + +``` +numpy == 1.21.4 +opencv-python == 4.5.4.58 +pretrainedmodels == 0.7.4 +``` + +**说明:** +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[获取pth权重文件](#31-获取pth权重文件)** + +- **[pth转onnx模型](#32-pth转onnx模型)** + +- **[onnx转om模型](#33-onnx转om模型)** + +### 3.1 获取pth权重文件 +执行命令: + +``` +wget https://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth +``` +执行后在当前目录下获取pth权重文件:se_resnet50-ce0d4300.pth。 + +### 3.2 pth转onnx模型 +执行命令: + +``` +python3 SE_ResNet50_pth2onnx.py ./se_resnet50-ce0d4300.pth ./se_resnet50_dynamic_bs.onnx +``` + +命令参数分别为输入pth文件:./se_resnet50-ce0d4300.pth和输出onnx文件:./se_resnet50_dynamic_bs.onnx +执行后在当前路径下生成se_resnet50_dynamic_bs.onnx模型文件。 + +### 3.3 onnx转om模型 + +a.设置环境变量: + +``` +source /usr/local/Ascend/ascend-toolkit/set_env.sh +``` + +该命令中使用CANN默认安装路径(/usr/local/Ascend/ascend-toolkit)中的环境变量,使用过程中请按照实际安装路径设置环境变量。 + +b.执行atc模型转换命令: + +``` +atc --model=./se_resnet50_dynamic_bs.onnx --framework=5 --input_format=NCHW --input_shape="image:32,3,224,224" --output=./se_resnet50_fp16_bs32 --log=error --soc_version=Ascend710 --insert_op_conf=./aipp_SE_ResNet50_pth.config --enable_small_channel=1 +``` + +参数说明: + --model:为ONNX模型文件。 + --framework:5代表ONNX模型。 + --input_format:输入数据的格式。 + --input_shape:输入数据的shape。 + --output:输出的OM模型。 + --log:日志级别。 + --soc_version:处理器型号,Ascend310或Ascend710。 + --insert_op_config:插入算子的配置文件路径与文件名,例如aipp预处理算子。 + --enable_small_channel:Set enable small channel. 0(default): disable; 1: enable + +执行后在当前目录下生成om模型文件:se_resnet50_fp16_bs32.om。 + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +该模型使用ImageNet的5万张验证集进行测试,图片与标签分别存放在/home/HwHiAiUser/dataset/ImageNet/val_union路径与/home/HwHiAiUser/dataset/ImageNet/val_label.txt文件下。 + +数据集获取请参考[pytorch原始仓](https://github.com/pytorch/examples/tree/master/imagenet)说明。 + +### 4.2 数据集预处理 + +1.预处理工具为:imagenet_torch_preprocess.py +2.执行工具命令: +``` +python3 ./imagenet_torch_preprocess.py /home/HwHiAiUser/dataset/ImageNet/val_union ./data/ImageNet_bin +``` +命令参数分别数据集图片路径:/home/HwHiAiUser/dataset/ImageNet/val_union和处理结果bin文件保存路径:./data/ImageNet_bin。 +执行后在./data/ImageNet_bin路径下生成数据处理后的bin文件。 + +### 4.3 生成数据集信息文件 +1.生成数据集信息文件工具为:gen_dataset_info.py。 +2.执行工具命令: + +``` +python3 ./gen_dataset_info.py bin ./data/ImageNet_bin ./data/ImageNet_bin.info 224 224 +``` +命令参数分别为数据集文件类型:bin、文件路径:./data/ImageNet_bin、数据集信息文件:./data/ImageNet_bin.info、图片像素长:224、图片像素宽:224。 +执行后在./data路径下生成数据集信息文件:ImageNet_bin.info。 + +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +- **[性能验证](#52-性能验证)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend710上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) +### 5.2 离线推理 +1.设置环境变量: + +``` +source /usr/local/Ascend/ascend-toolkit/set_env.sh +``` + +2.执行推理命令: + +``` +./benchmark.x86_64 -model_type=vision -om_path=./se_resnet50_fp16_bs32.om -device_id=0 -batch_size=32 -input_text_path=./data/ImageNet_bin.info -input_width=256 -input_height=256 -output_binary=false -useDvpp=false +``` + +分辨率(input_width,input_height)要与aipp_SE_ResNet50_pth.config文件中配置(src_image_size_w,src_image_size_h)保持一致,执行后推理结果保存在./result/dumpOutput_device0路径下。 + +3.精度验证: +调用vision_metric_ImageNet.py工具脚本与数据集标签val_label.txt比对,可以获得Accuracy Top5数据: + +``` +python3 ./vision_metric_ImageNet.py ./result/dumpOutput_device0/ /home/HwHiAiUser/dataset/ImageNet/val_label.txt ./result accuracy_result.json +``` + +第一个参数为生成推理结果所在路径,第二个参数为标签数据,第三个参数为生成结果文件路径,第四个参数为生成结果文件名称。 +执行后模型精度结果保存在./result/accuracy_result.json文件中 + +### 5.3 性能验证 +1.设置环境变量: + +``` +source /usr/local/Ascend/ascend-toolkit/set_env.sh +``` + +2.执行性能测试命令: + +``` +./benchmark.x86_64 -round=50 -om_path=./se_resnet50_fp16_bs32.om -device_id=0 -batch_size=32 > ./result/performace_result.json +``` + +执行后性能测试结果保存在./result/performace_result.json文件中 + +## 6 评测结果 + +评测结果 +| 模型 | pth精度 | 710精度 | 性能基准 | 710性能 | +| --------------- | ---------------------- | ------------------------- | ------------ | ----------- | +| SE_ResNet50 bs32 | Acc@1 77.63,Acc@5 93.64| Acc@1 77.36,Acc@5 93.76 | 1554.726fps | 2690.43fps | + +## 6 test目录说明 + +test目录下存放的为测试脚本,其中: +1.pth2om.sh为pth模型转om模型脚本,使用命令为: + +``` +bash ./test/pth2om.sh /usr/local/Ascend +``` + +其中/usr/local/Ascend为cann包默认安装路径,执行后在当前目录下生成om模型: se_resnet50_fp16_bs32.om。 + +2.eval_acc_perf.sh为om模型,精度、性能测试脚本,使用命令为: + +``` +bash ./test/eval_acc_perf.sh /usr/local/Ascend ./se_resnet50_fp16_bs32.om 32 0 /home/HwHiAiUser/dataset/ImageNet/val_label.txt +``` + 其中第1个参数为cann包安装路径,第2个参数为om模型,第3个参数为batch_size,第4个参数为device_id,第5个参数为标签数据。执行后精度结果保存在./result/accuracy_result.json文件中,性能结果保存在./result/performace_result.json文件中。 \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/SE_ResNet50_pth2onnx.py b/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/SE_ResNet50_pth2onnx.py index 94a9bce4b9d4cfff1799b4d5ebbbd0949e0cc593..9a96a251560f3eff356e5006070284f7dc57c14e 100644 --- a/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/SE_ResNet50_pth2onnx.py +++ b/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/SE_ResNet50_pth2onnx.py @@ -1,39 +1,39 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding=UTF-8 - -import sys -import torch -import onnx -from pretrainedmodels.models.senet import se_resnet50 - - -def pth2onnx(pth_file, onnx_file): - model = se_resnet50(num_classes=1000, pretrained=None) - model.load_state_dict(torch.load(pth_file)) - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, onnx_file, input_names=input_names, dynamic_axes=dynamic_axes, - output_names=output_names, opset_version=11) - -if __name__ == '__main__': - if len(sys.argv) != 3: - raise Exception("usage: python SE_ResNet50_pth2onnx.py ") - pth_file = sys.argv[1] - onnx_file = sys.argv[2] - pth2onnx(pth_file, onnx_file) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding=UTF-8 + +import sys +import torch +import onnx +from pretrainedmodels.models.senet import se_resnet50 + + +def pth2onnx(pth_file, onnx_file): + model = se_resnet50(num_classes=1000, pretrained=None) + model.load_state_dict(torch.load(pth_file)) + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, onnx_file, input_names=input_names, dynamic_axes=dynamic_axes, + output_names=output_names, opset_version=11) + +if __name__ == '__main__': + if len(sys.argv) != 3: + raise Exception("usage: python SE_ResNet50_pth2onnx.py ") + pth_file = sys.argv[1] + onnx_file = sys.argv[2] + pth2onnx(pth_file, onnx_file) diff --git a/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/aipp_SE_ResNet50_pth.config b/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/aipp_SE_ResNet50_pth.config index 01600d0fa197ad6fe64e732c8bd3a637121a7de5..16b6c79d159944423dc0e63accf92a26d8ebb3e8 100644 --- a/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/aipp_SE_ResNet50_pth.config +++ b/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/aipp_SE_ResNet50_pth.config @@ -1,21 +1,21 @@ -aipp_op{ - aipp_mode:static - input_format : RGB888_U8 - - src_image_size_w : 256 - src_image_size_h : 256 - - crop: true - load_start_pos_h : 16 - load_start_pos_w : 16 - crop_size_w : 224 - crop_size_h: 224 - - min_chn_0 : 123.675 - min_chn_1 : 116.28 - min_chn_2 : 103.53 - var_reci_chn_0: 0.0171247538316637 - var_reci_chn_1: 0.0175070028011204 - var_reci_chn_2: 0.0174291938997821 - -} +aipp_op{ + aipp_mode:static + input_format : RGB888_U8 + + src_image_size_w : 256 + src_image_size_h : 256 + + crop: true + load_start_pos_h : 16 + load_start_pos_w : 16 + crop_size_w : 224 + crop_size_h: 224 + + min_chn_0 : 123.675 + min_chn_1 : 116.28 + min_chn_2 : 103.53 + var_reci_chn_0: 0.0171247538316637 + var_reci_chn_1: 0.0175070028011204 + var_reci_chn_2: 0.0174291938997821 + +} diff --git a/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/gen_dataset_info.py b/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/gen_dataset_info.py index be7ee43a39e0c8a199c8c0aa78c94ead2a0d1b54..9b9fa05d70b54edecba8722b48a8b35e2c62824c 100644 --- a/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/gen_dataset_info.py +++ b/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/gen_dataset_info.py @@ -1,61 +1,61 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding=UTF-8 - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img[7:], width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img[7:], str(width), str(height)]) - file.write(content) - file.write('\n') - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding=UTF-8 + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img[7:], width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img[7:], str(width), str(height)]) + file.write(content) + file.write('\n') + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) diff --git a/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/imagenet_torch_preprocess.py b/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/imagenet_torch_preprocess.py index ab5ad9b2bc57d2a366130ce64723df39dba26918..9f0c2cf3b74b2548f6e1ad5f39576e9d218aa537 100644 --- a/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/imagenet_torch_preprocess.py +++ b/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/imagenet_torch_preprocess.py @@ -1,58 +1,58 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding=UTF-8 - -import os -import sys -import cv2 -import numpy as np - - -def center_crop(img, out_height, out_width): - height, width, _ = img.shape - left = int((width - out_width) / 2) - right = int((width + out_width) / 2) - top = int((height - out_height) / 2) - bottom = int((height + out_height) / 2) - img = img[top:bottom, left:right] - return img - -def preprocess(file_path, bin_path): - in_files = os.listdir(file_path) - if not os.path.exists(bin_path): - os.makedirs(bin_path) - i = 0 - - resize_size = 256 - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - - for file in in_files: - i = i + 1 - print(file, "===", i) - - img = cv2.imread(os.path.join(file_path, file)) - b, g, r = cv2.split(img) - - img = cv2.merge([r, g, b]) - img = cv2.resize(img, (resize_size, resize_size), interpolation=cv2.INTER_CUBIC) - img = np.array(img, dtype=np.int8) - - img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) - -if __name__ == "__main__": - file_path = os.path.abspath(sys.argv[1]) - bin_path = os.path.abspath(sys.argv[2]) - preprocess(file_path, bin_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding=UTF-8 + +import os +import sys +import cv2 +import numpy as np + + +def center_crop(img, out_height, out_width): + height, width, _ = img.shape + left = int((width - out_width) / 2) + right = int((width + out_width) / 2) + top = int((height - out_height) / 2) + bottom = int((height + out_height) / 2) + img = img[top:bottom, left:right] + return img + +def preprocess(file_path, bin_path): + in_files = os.listdir(file_path) + if not os.path.exists(bin_path): + os.makedirs(bin_path) + i = 0 + + resize_size = 256 + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + + for file in in_files: + i = i + 1 + print(file, "===", i) + + img = cv2.imread(os.path.join(file_path, file)) + b, g, r = cv2.split(img) + + img = cv2.merge([r, g, b]) + img = cv2.resize(img, (resize_size, resize_size), interpolation=cv2.INTER_CUBIC) + img = np.array(img, dtype=np.int8) + + img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) + +if __name__ == "__main__": + file_path = os.path.abspath(sys.argv[1]) + bin_path = os.path.abspath(sys.argv[2]) + preprocess(file_path, bin_path) diff --git a/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/vision_metric_ImageNet.py b/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/vision_metric_ImageNet.py index 82580bab7dca794f52f3465d9b9fdaa82e1f21fc..3ae9c66b2ea5aa5ee6776fca435b2b07affe7d22 100644 --- a/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/vision_metric_ImageNet.py +++ b/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer/vision_metric_ImageNet.py @@ -1,184 +1,184 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding=UTF-8 - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - print("Time used:", elapsed) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding=UTF-8 + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + print("Time used:", elapsed) + diff --git a/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/PytorchTransfer.py b/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/PytorchTransfer.py index 15680c59ff5a61a0b0d01d869bc39087000b7dc1..15eeaaa895e24bcf280649fb8f5bc9a8f1288f31 100644 --- a/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/PytorchTransfer.py +++ b/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/PytorchTransfer.py @@ -1,47 +1,47 @@ -import sys -import os -import torch -import cv2 -from PIL import Image -import numpy as np -import torch.utils.data -import torchvision.transforms as transforms -from torch.autograd import Variable - - -def mobilenet_onnx(input_path: str, output_path: str): - img = cv2.imread(input_path) - img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - pilimg = Image.fromarray(img) - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - val_transformer = transforms.Compose([ - transforms.Scale(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize - ]) - - img_tensor = val_transformer(pilimg) - img_tensor = torch.unsqueeze(img_tensor, dim=0).float() - img_tensor = Variable(img_tensor, requires_grad=False) - img_tensor.reshape(1, 3, 224, 224) - img_numpy = img_tensor.cpu().numpy() - - img_name = input_path.split('/')[-1] - bin_name = img_name.split('.')[0] + ".bin" - output_fl = os.path.join(output_path, bin_name) - # save img_tensor as binary file for om inference input - img_numpy.tofile(output_fl) - - -if __name__ == "__main__": - input_img_dir = sys.argv[1] - output_img_dir = sys.argv[2] - images = os.listdir(input_img_dir) - for image_name in images: - if not image_name.endswith(".jpeg"): - continue - print("start to process image {}....".format(image_name)) - path_image = os.path.join(input_img_dir, image_name) - mobilenet_onnx(path_image, output_img_dir) +import sys +import os +import torch +import cv2 +from PIL import Image +import numpy as np +import torch.utils.data +import torchvision.transforms as transforms +from torch.autograd import Variable + + +def mobilenet_onnx(input_path: str, output_path: str): + img = cv2.imread(input_path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + pilimg = Image.fromarray(img) + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + val_transformer = transforms.Compose([ + transforms.Scale(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize + ]) + + img_tensor = val_transformer(pilimg) + img_tensor = torch.unsqueeze(img_tensor, dim=0).float() + img_tensor = Variable(img_tensor, requires_grad=False) + img_tensor.reshape(1, 3, 224, 224) + img_numpy = img_tensor.cpu().numpy() + + img_name = input_path.split('/')[-1] + bin_name = img_name.split('.')[0] + ".bin" + output_fl = os.path.join(output_path, bin_name) + # save img_tensor as binary file for om inference input + img_numpy.tofile(output_fl) + + +if __name__ == "__main__": + input_img_dir = sys.argv[1] + output_img_dir = sys.argv[2] + images = os.listdir(input_img_dir) + for image_name in images: + if not image_name.endswith(".jpeg"): + continue + print("start to process image {}....".format(image_name)) + path_image = os.path.join(input_img_dir, image_name) + mobilenet_onnx(path_image, output_img_dir) diff --git a/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/ReadMe.md b/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/ReadMe.md index 89d9026fb4b7f9c7dc761d809e3a410e1d688362..fd5be016308e01ae8d4d43219e136c10d8076660 100644 --- a/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/ReadMe.md +++ b/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/ReadMe.md @@ -1,48 +1,48 @@ -文件作用说明: - -1.auto_tune.sh:模型转换脚本,集成了auto tune功能,可以手动关闭 - -2.shufflenetv2.py:官方模型用于转换pth文件到onnx文件 - -3.shufflenetv2_wock_op_woct.py:NPU下训练模型用于转换pth文件到onnx文件 - -4.BinaryImageNet.info:ImageNet数据集信息,用于benchmark推理获取数据集 - -5.PytorchTransfer.py:数据集预处理脚本,通过均值方差处理归一化图片 - -6.val_label.txt:ImageNet数据集标签,用于验证推理结果 - -7.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy - -8.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer - - - - - -推理端到端步骤: - -(1) 从Torchvision下载shufflenetv2模型或者指定自己训练好的pth文件路径,通过pth2onnx.py脚本转化为onnx模型 - - - -(2)运行auto_tune.sh脚本转换om模型,也可以选择手动关闭auto_tune - -本demo已提供调优完成的om模型 - - - -(3)用PytorchTransfer.py脚本处理数据集,参考BinaryImageNet.Info配置处理后的二进制数据集路径 - - - -(4)./benchmark.x86_64 -model_type=vision -batch_size=16 -device_id=0 -input_text_path=./BinaryImageNet.info -input_width=224 -input_height=224 -om_path=./shufflenetv2_bs16.om -useDvpp=False - -运行benchmark推理,结果保存在 ./result 目录下 - - - -(5)python3.7 vision_metric_ImageNet.py result/dumpOutput/ ./val_label.txt ./ result.json - -验证推理结果 - +文件作用说明: + +1.auto_tune.sh:模型转换脚本,集成了auto tune功能,可以手动关闭 + +2.shufflenetv2.py:官方模型用于转换pth文件到onnx文件 + +3.shufflenetv2_wock_op_woct.py:NPU下训练模型用于转换pth文件到onnx文件 + +4.BinaryImageNet.info:ImageNet数据集信息,用于benchmark推理获取数据集 + +5.PytorchTransfer.py:数据集预处理脚本,通过均值方差处理归一化图片 + +6.val_label.txt:ImageNet数据集标签,用于验证推理结果 + +7.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy + +8.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer + + + + + +推理端到端步骤: + +(1) 从Torchvision下载shufflenetv2模型或者指定自己训练好的pth文件路径,通过pth2onnx.py脚本转化为onnx模型 + + + +(2)运行auto_tune.sh脚本转换om模型,也可以选择手动关闭auto_tune + +本demo已提供调优完成的om模型 + + + +(3)用PytorchTransfer.py脚本处理数据集,参考BinaryImageNet.Info配置处理后的二进制数据集路径 + + + +(4)./benchmark.x86_64 -model_type=vision -batch_size=16 -device_id=0 -input_text_path=./BinaryImageNet.info -input_width=224 -input_height=224 -om_path=./shufflenetv2_bs16.om -useDvpp=False + +运行benchmark推理,结果保存在 ./result 目录下 + + + +(5)python3.7 vision_metric_ImageNet.py result/dumpOutput/ ./val_label.txt ./ result.json + +验证推理结果 + diff --git a/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/shufflenetv2.py b/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/shufflenetv2.py index 4c107ef0330d13b434b39630fc1d3e3752f9a9dd..ecf11fda97c51bf348b1c9ecae2a139057e206b2 100644 --- a/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/shufflenetv2.py +++ b/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/shufflenetv2.py @@ -1,213 +1,213 @@ -import torch -import torch.nn as nn - - -__all__ = [ - 'ShuffleNetV2', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', - 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0' -] - -model_urls = { - 'shufflenetv2_x0.5': 'https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth', - 'shufflenetv2_x1.0': 'https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth', - 'shufflenetv2_x1.5': None, - 'shufflenetv2_x2.0': None, -} - - -def channel_shuffle(x, groups): - # type: (torch.Tensor, int) -> torch.Tensor - batchsize, num_channels, height, width = x.data.size() - channels_per_group = num_channels // groups - - # reshape - x = x.view(batchsize, groups, - channels_per_group, height, width) - - x = torch.transpose(x, 1, 2).contiguous() - - # flatten - x = x.view(batchsize, -1, height, width) - - return x - - -class InvertedResidual(nn.Module): - def __init__(self, inp, oup, stride): - super(InvertedResidual, self).__init__() - - if not (1 <= stride <= 3): - raise ValueError('illegal stride value') - self.stride = stride - - branch_features = oup // 2 - assert (self.stride != 1) or (inp == branch_features << 1) - - if self.stride > 1: - self.branch1 = nn.Sequential( - self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1), - nn.BatchNorm2d(inp), - nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False), - nn.BatchNorm2d(branch_features), - nn.ReLU(inplace=True), - ) - else: - self.branch1 = nn.Sequential() - - self.branch2 = nn.Sequential( - nn.Conv2d(inp if (self.stride > 1) else branch_features, - branch_features, kernel_size=1, stride=1, padding=0, bias=False), - nn.BatchNorm2d(branch_features), - nn.ReLU(inplace=True), - self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1), - nn.BatchNorm2d(branch_features), - nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False), - nn.BatchNorm2d(branch_features), - nn.ReLU(inplace=True), - ) - - @staticmethod - def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False): - return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i) - - def forward(self, x): - if self.stride == 1: - x1, x2 = x.chunk(2, dim=1) - out = torch.cat((x1, self.branch2(x2)), dim=1) - else: - out = torch.cat((self.branch1(x), self.branch2(x)), dim=1) - - out = channel_shuffle(out, 2) - - return out - - -class ShuffleNetV2(nn.Module): - def __init__(self, stages_repeats, stages_out_channels, num_classes=1000, inverted_residual=InvertedResidual): - super(ShuffleNetV2, self).__init__() - - if len(stages_repeats) != 3: - raise ValueError('expected stages_repeats as list of 3 positive ints') - if len(stages_out_channels) != 5: - raise ValueError('expected stages_out_channels as list of 5 positive ints') - self._stage_out_channels = stages_out_channels - - input_channels = 3 - output_channels = self._stage_out_channels[0] - self.conv1 = nn.Sequential( - nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False), - nn.BatchNorm2d(output_channels), - nn.ReLU(inplace=True), - ) - input_channels = output_channels - - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - - stage_names = ['stage{}'.format(i) for i in [2, 3, 4]] - for name, repeats, output_channels in zip( - stage_names, stages_repeats, self._stage_out_channels[1:]): - seq = [inverted_residual(input_channels, output_channels, 2)] - for i in range(repeats - 1): - seq.append(inverted_residual(output_channels, output_channels, 1)) - setattr(self, name, nn.Sequential(*seq)) - input_channels = output_channels - - output_channels = self._stage_out_channels[-1] - self.conv5 = nn.Sequential( - nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False), - nn.BatchNorm2d(output_channels), - nn.ReLU(inplace=True), - ) - - self.fc = nn.Linear(output_channels, num_classes) - - def _forward_impl(self, x): - # See note [TorchScript super()] - x = self.conv1(x) - x = self.maxpool(x) - x = self.stage2(x) - x = self.stage3(x) - x = self.stage4(x) - x = self.conv5(x) - x = x.mean([2, 3]) # globalpool - x = self.fc(x) - return x - - def forward(self, x): - return self._forward_impl(x) - - -def _shufflenetv2(arch, pretrained, progress, *args, **kwargs): - model = ShuffleNetV2(*args, **kwargs) - - if pretrained: - model_url = model_urls[arch] - if model_url is None: - raise NotImplementedError('pretrained {} is not supported as of now'.format(arch)) - else: - state_dict = load_state_dict_from_url(model_url, progress=progress) - model.load_state_dict(state_dict) - - return model - - -def shufflenet_v2_x0_5(pretrained=False, progress=True, **kwargs): - """ - Constructs a ShuffleNetV2 with 0.5x output channels, as described in - `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" - `_. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - progress (bool): If True, displays a progress bar of the download to stderr - """ - return _shufflenetv2('shufflenetv2_x0.5', pretrained, progress, - [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs) - - -def shufflenet_v2_x1_0(pretrained=False, progress=True, **kwargs): - """ - Constructs a ShuffleNetV2 with 1.0x output channels, as described in - `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" - `_. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - progress (bool): If True, displays a progress bar of the download to stderr - """ - return _shufflenetv2('shufflenetv2_x1.0', pretrained, progress, - [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs) - - -def shufflenet_v2_x1_5(pretrained=False, progress=True, **kwargs): - """ - Constructs a ShuffleNetV2 with 1.5x output channels, as described in - `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" - `_. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - progress (bool): If True, displays a progress bar of the download to stderr - """ - return _shufflenetv2('shufflenetv2_x1.5', pretrained, progress, - [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs) - - -def shufflenet_v2_x2_0(pretrained=False, progress=True, **kwargs): - """ - Constructs a ShuffleNetV2 with 2.0x output channels, as described in - `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" - `_. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - progress (bool): If True, displays a progress bar of the download to stderr - """ - return _shufflenetv2('shufflenetv2_x2.0', pretrained, progress, - [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs) - -if __name__ == '__main__': - dummy_input = torch.randn(1, 3, 224, 224) - model = shufflenet_v2_x1_0() - torch.onnx.export(model, dummy_input, "shufflenetv2.onnx", verbose=True,opset_version=11) - +import torch +import torch.nn as nn + + +__all__ = [ + 'ShuffleNetV2', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', + 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0' +] + +model_urls = { + 'shufflenetv2_x0.5': 'https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth', + 'shufflenetv2_x1.0': 'https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth', + 'shufflenetv2_x1.5': None, + 'shufflenetv2_x2.0': None, +} + + +def channel_shuffle(x, groups): + # type: (torch.Tensor, int) -> torch.Tensor + batchsize, num_channels, height, width = x.data.size() + channels_per_group = num_channels // groups + + # reshape + x = x.view(batchsize, groups, + channels_per_group, height, width) + + x = torch.transpose(x, 1, 2).contiguous() + + # flatten + x = x.view(batchsize, -1, height, width) + + return x + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride): + super(InvertedResidual, self).__init__() + + if not (1 <= stride <= 3): + raise ValueError('illegal stride value') + self.stride = stride + + branch_features = oup // 2 + assert (self.stride != 1) or (inp == branch_features << 1) + + if self.stride > 1: + self.branch1 = nn.Sequential( + self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1), + nn.BatchNorm2d(inp), + nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + ) + else: + self.branch1 = nn.Sequential() + + self.branch2 = nn.Sequential( + nn.Conv2d(inp if (self.stride > 1) else branch_features, + branch_features, kernel_size=1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1), + nn.BatchNorm2d(branch_features), + nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + ) + + @staticmethod + def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False): + return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i) + + def forward(self, x): + if self.stride == 1: + x1, x2 = x.chunk(2, dim=1) + out = torch.cat((x1, self.branch2(x2)), dim=1) + else: + out = torch.cat((self.branch1(x), self.branch2(x)), dim=1) + + out = channel_shuffle(out, 2) + + return out + + +class ShuffleNetV2(nn.Module): + def __init__(self, stages_repeats, stages_out_channels, num_classes=1000, inverted_residual=InvertedResidual): + super(ShuffleNetV2, self).__init__() + + if len(stages_repeats) != 3: + raise ValueError('expected stages_repeats as list of 3 positive ints') + if len(stages_out_channels) != 5: + raise ValueError('expected stages_out_channels as list of 5 positive ints') + self._stage_out_channels = stages_out_channels + + input_channels = 3 + output_channels = self._stage_out_channels[0] + self.conv1 = nn.Sequential( + nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False), + nn.BatchNorm2d(output_channels), + nn.ReLU(inplace=True), + ) + input_channels = output_channels + + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + stage_names = ['stage{}'.format(i) for i in [2, 3, 4]] + for name, repeats, output_channels in zip( + stage_names, stages_repeats, self._stage_out_channels[1:]): + seq = [inverted_residual(input_channels, output_channels, 2)] + for i in range(repeats - 1): + seq.append(inverted_residual(output_channels, output_channels, 1)) + setattr(self, name, nn.Sequential(*seq)) + input_channels = output_channels + + output_channels = self._stage_out_channels[-1] + self.conv5 = nn.Sequential( + nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(output_channels), + nn.ReLU(inplace=True), + ) + + self.fc = nn.Linear(output_channels, num_classes) + + def _forward_impl(self, x): + # See note [TorchScript super()] + x = self.conv1(x) + x = self.maxpool(x) + x = self.stage2(x) + x = self.stage3(x) + x = self.stage4(x) + x = self.conv5(x) + x = x.mean([2, 3]) # globalpool + x = self.fc(x) + return x + + def forward(self, x): + return self._forward_impl(x) + + +def _shufflenetv2(arch, pretrained, progress, *args, **kwargs): + model = ShuffleNetV2(*args, **kwargs) + + if pretrained: + model_url = model_urls[arch] + if model_url is None: + raise NotImplementedError('pretrained {} is not supported as of now'.format(arch)) + else: + state_dict = load_state_dict_from_url(model_url, progress=progress) + model.load_state_dict(state_dict) + + return model + + +def shufflenet_v2_x0_5(pretrained=False, progress=True, **kwargs): + """ + Constructs a ShuffleNetV2 with 0.5x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x0.5', pretrained, progress, + [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs) + + +def shufflenet_v2_x1_0(pretrained=False, progress=True, **kwargs): + """ + Constructs a ShuffleNetV2 with 1.0x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x1.0', pretrained, progress, + [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs) + + +def shufflenet_v2_x1_5(pretrained=False, progress=True, **kwargs): + """ + Constructs a ShuffleNetV2 with 1.5x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x1.5', pretrained, progress, + [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs) + + +def shufflenet_v2_x2_0(pretrained=False, progress=True, **kwargs): + """ + Constructs a ShuffleNetV2 with 2.0x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x2.0', pretrained, progress, + [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs) + +if __name__ == '__main__': + dummy_input = torch.randn(1, 3, 224, 224) + model = shufflenet_v2_x1_0() + torch.onnx.export(model, dummy_input, "shufflenetv2.onnx", verbose=True,opset_version=11) + diff --git a/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/shufflenetv2_wock_op_woct.py b/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/shufflenetv2_wock_op_woct.py index 5982afc50ecff3625f985155a2f9cd42de8dff05..e81cb5aebe348d09fee9680e6f0794f6d8b7ded0 100644 --- a/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/shufflenetv2_wock_op_woct.py +++ b/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/shufflenetv2_wock_op_woct.py @@ -1,256 +1,256 @@ -import torch -import torch.nn as nn - -try: - from .utils import load_state_dict_from_url -except: - pass - -import numpy as np - -__all__ = [ - 'ShuffleNetV2', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', - 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0' -] - -model_urls = { - 'shufflenetv2_x0.5': 'https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth', - 'shufflenetv2_x1.0': 'https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth', - 'shufflenetv2_x1.5': None, - 'shufflenetv2_x2.0': None, -} - - - -def IndexSelectFullImplementationforward(x1, x2, fp_index, bp_index1, bp_index2): - x = torch.cat([x1, x2], dim=1) - result = x.index_select(1, fp_index) - return result - -def IndexSelectHalfImplementationforward(x1, x2, fp_index1, fp_index2, bp_index1, bp_index2): - x = torch.cat([x1, x2], dim=1) - return x.index_select(1, fp_index1), x.index_select(1, fp_index2) - - -class Channel_Shuffle(nn.Module): - def __init__(self, inp, groups=2, split_shuffle=True): - super(Channel_Shuffle, self).__init__() - - self.split_shuffle = split_shuffle - self.group_len = inp // groups - self.out = np.array(list(range(inp))).reshape(groups, self.group_len).transpose(1, 0).flatten().tolist() - if self.split_shuffle: - self.register_buffer('fp_index1', torch.tensor(self.out[:self.group_len])) - self.register_buffer('fp_index2', torch.tensor(self.out[self.group_len:])) - else: - self.register_buffer('fp_index', torch.tensor(self.out)) - # self.register_buffer('bp_index', torch.tensor(list(range(0, inp, 2))+list(range(1,inp,2)))) - self.register_buffer('bp_index1', torch.tensor(list(range(0, inp, 2)))) - self.register_buffer('bp_index2', torch.tensor(list(range(1, inp, 2)))) - - def forward(self, x1, x2): - if self.split_shuffle: - return IndexSelectHalfImplementationforward(x1, x2, self.fp_index1, self.fp_index2, self.bp_index1, - self.bp_index2) - else: - return IndexSelectFullImplementationforward(x1, x2, self.fp_index, self.bp_index1, self.bp_index2) - - -class InvertedResidual(nn.Module): - def __init__(self, inp, oup, stride, split_shuffle=True): - super(InvertedResidual, self).__init__() - - if not (1 <= stride <= 3): - raise ValueError('illegal stride value') - self.stride = stride - - branch_features = oup // 2 - assert (self.stride != 1) or (inp == branch_features << 1) - - if self.stride > 1: - self.branch1 = nn.Sequential( - self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1), - nn.BatchNorm2d(inp), - nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False), - nn.BatchNorm2d(branch_features), - nn.ReLU(inplace=True), - ) - else: - self.branch1 = nn.Sequential() - - self.branch2 = nn.Sequential( - nn.Conv2d(inp if (self.stride > 1) else branch_features, - branch_features, kernel_size=1, stride=1, padding=0, bias=False), - nn.BatchNorm2d(branch_features), - nn.ReLU(inplace=True), - self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1), - nn.BatchNorm2d(branch_features), - nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False), - nn.BatchNorm2d(branch_features), - nn.ReLU(inplace=True), - ) - - if self.stride > 1: - self.channel_shuffle = Channel_Shuffle(inp=branch_features + branch_features, groups=2, - split_shuffle=split_shuffle) - else: - self.channel_shuffle = Channel_Shuffle(inp=inp, groups=2, split_shuffle=split_shuffle) - - @staticmethod - def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False): - return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i) - - def forward(self, x): - if self.stride == 1: - x1, x2 = x - x2 = self.branch2(x2) - else: - x1 = self.branch1(x) - x2 = self.branch2(x) - - # out = channel_shuffle(out, 2) - out = self.channel_shuffle(x1, x2) - - return out - - -class ShuffleNetV2(nn.Module): - def __init__(self, stages_repeats, stages_out_channels, num_classes=1000, inverted_residual=InvertedResidual): - super(ShuffleNetV2, self).__init__() - - if len(stages_repeats) != 3: - raise ValueError('expected stages_repeats as list of 3 positive ints') - if len(stages_out_channels) != 5: - raise ValueError('expected stages_out_channels as list of 5 positive ints') - self._stage_out_channels = stages_out_channels - - input_channels = 3 - output_channels = self._stage_out_channels[0] - self.conv1 = nn.Sequential( - nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False), - nn.BatchNorm2d(output_channels), - nn.ReLU(inplace=True), - ) - input_channels = output_channels - - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - - stage_names = ['stage{}'.format(i) for i in [2, 3, 4]] - for name, repeats, output_channels in zip( - stage_names, stages_repeats, self._stage_out_channels[1:]): - seq = [inverted_residual(input_channels, output_channels, 2)] - for i in range(repeats - 1): - if i == repeats - 2: - seq.append(inverted_residual(output_channels, output_channels, 1, split_shuffle=False)) - else: - seq.append(inverted_residual(output_channels, output_channels, 1)) - setattr(self, name, nn.Sequential(*seq)) - input_channels = output_channels - - output_channels = self._stage_out_channels[-1] - self.conv5 = nn.Sequential( - nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False), - nn.BatchNorm2d(output_channels), - nn.ReLU(inplace=True), - ) - - self.fc = nn.Linear(output_channels, num_classes) - - self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) - - def _forward_impl(self, x): - - # See note [TorchScript super()] - x = self.conv1(x) - x = self.maxpool(x) - x = self.stage2(x) - x = self.stage3(x) - x = self.stage4(x) - x = self.conv5(x) - # x = x.mean([2, 3]) # globalpool - x = self.avgpool(x) - x = torch.flatten(x, 1) - - x = self.fc(x) - return x - - def forward(self, x): - return self._forward_impl(x) - - -def _shufflenetv2(arch, pretrained, progress, *args, **kwargs): - model = ShuffleNetV2(*args, **kwargs) - - if pretrained: - model_url = model_urls[arch] - if model_url is None: - raise NotImplementedError('pretrained {} is not supported as of now'.format(arch)) - else: - state_dict = load_state_dict_from_url(model_url, progress=progress) - model.load_state_dict(state_dict) - - return model - - -def shufflenet_v2_x0_5(pretrained=False, progress=True, **kwargs): - """ - Constructs a ShuffleNetV2 with 0.5x output channels, as described in - `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" - `_. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - progress (bool): If True, displays a progress bar of the download to stderr - """ - return _shufflenetv2('shufflenetv2_x0.5', pretrained, progress, - [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs) - - -def shufflenet_v2_x1_0(pretrained=False, progress=True, **kwargs): - """ - Constructs a ShuffleNetV2 with 1.0x output channels, as described in - `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" - `_. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - progress (bool): If True, displays a progress bar of the download to stderr - """ - return _shufflenetv2('shufflenetv2_x1.0', pretrained, progress, - [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs) - # return _shufflenetv2('shufflenetv2_x1.0', pretrained, progress, - # [4, 8, 4], [16, 128, 256, 464, 1024], **kwargs) - - -def shufflenet_v2_x1_5(pretrained=False, progress=True, **kwargs): - """ - Constructs a ShuffleNetV2 with 1.5x output channels, as described in - `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" - `_. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - progress (bool): If True, displays a progress bar of the download to stderr - """ - return _shufflenetv2('shufflenetv2_x1.5', pretrained, progress, - [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs) - - -def shufflenet_v2_x2_0(pretrained=False, progress=True, **kwargs): - """ - Constructs a ShuffleNetV2 with 2.0x output channels, as described in - `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" - `_. - - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - progress (bool): If True, displays a progress bar of the download to stderr - """ - return _shufflenetv2('shufflenetv2_x2.0', pretrained, progress, - [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs) - - -if __name__ == '__main__': - dummy_input = torch.randn(1, 3, 224, 224) - model = shufflenet_v2_x1_0() - torch.onnx.export(model, dummy_input, "shufflenetv2.onnx", verbose=True,opset_version=11) +import torch +import torch.nn as nn + +try: + from .utils import load_state_dict_from_url +except: + pass + +import numpy as np + +__all__ = [ + 'ShuffleNetV2', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', + 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0' +] + +model_urls = { + 'shufflenetv2_x0.5': 'https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth', + 'shufflenetv2_x1.0': 'https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth', + 'shufflenetv2_x1.5': None, + 'shufflenetv2_x2.0': None, +} + + + +def IndexSelectFullImplementationforward(x1, x2, fp_index, bp_index1, bp_index2): + x = torch.cat([x1, x2], dim=1) + result = x.index_select(1, fp_index) + return result + +def IndexSelectHalfImplementationforward(x1, x2, fp_index1, fp_index2, bp_index1, bp_index2): + x = torch.cat([x1, x2], dim=1) + return x.index_select(1, fp_index1), x.index_select(1, fp_index2) + + +class Channel_Shuffle(nn.Module): + def __init__(self, inp, groups=2, split_shuffle=True): + super(Channel_Shuffle, self).__init__() + + self.split_shuffle = split_shuffle + self.group_len = inp // groups + self.out = np.array(list(range(inp))).reshape(groups, self.group_len).transpose(1, 0).flatten().tolist() + if self.split_shuffle: + self.register_buffer('fp_index1', torch.tensor(self.out[:self.group_len])) + self.register_buffer('fp_index2', torch.tensor(self.out[self.group_len:])) + else: + self.register_buffer('fp_index', torch.tensor(self.out)) + # self.register_buffer('bp_index', torch.tensor(list(range(0, inp, 2))+list(range(1,inp,2)))) + self.register_buffer('bp_index1', torch.tensor(list(range(0, inp, 2)))) + self.register_buffer('bp_index2', torch.tensor(list(range(1, inp, 2)))) + + def forward(self, x1, x2): + if self.split_shuffle: + return IndexSelectHalfImplementationforward(x1, x2, self.fp_index1, self.fp_index2, self.bp_index1, + self.bp_index2) + else: + return IndexSelectFullImplementationforward(x1, x2, self.fp_index, self.bp_index1, self.bp_index2) + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, split_shuffle=True): + super(InvertedResidual, self).__init__() + + if not (1 <= stride <= 3): + raise ValueError('illegal stride value') + self.stride = stride + + branch_features = oup // 2 + assert (self.stride != 1) or (inp == branch_features << 1) + + if self.stride > 1: + self.branch1 = nn.Sequential( + self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1), + nn.BatchNorm2d(inp), + nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + ) + else: + self.branch1 = nn.Sequential() + + self.branch2 = nn.Sequential( + nn.Conv2d(inp if (self.stride > 1) else branch_features, + branch_features, kernel_size=1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1), + nn.BatchNorm2d(branch_features), + nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + ) + + if self.stride > 1: + self.channel_shuffle = Channel_Shuffle(inp=branch_features + branch_features, groups=2, + split_shuffle=split_shuffle) + else: + self.channel_shuffle = Channel_Shuffle(inp=inp, groups=2, split_shuffle=split_shuffle) + + @staticmethod + def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False): + return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i) + + def forward(self, x): + if self.stride == 1: + x1, x2 = x + x2 = self.branch2(x2) + else: + x1 = self.branch1(x) + x2 = self.branch2(x) + + # out = channel_shuffle(out, 2) + out = self.channel_shuffle(x1, x2) + + return out + + +class ShuffleNetV2(nn.Module): + def __init__(self, stages_repeats, stages_out_channels, num_classes=1000, inverted_residual=InvertedResidual): + super(ShuffleNetV2, self).__init__() + + if len(stages_repeats) != 3: + raise ValueError('expected stages_repeats as list of 3 positive ints') + if len(stages_out_channels) != 5: + raise ValueError('expected stages_out_channels as list of 5 positive ints') + self._stage_out_channels = stages_out_channels + + input_channels = 3 + output_channels = self._stage_out_channels[0] + self.conv1 = nn.Sequential( + nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False), + nn.BatchNorm2d(output_channels), + nn.ReLU(inplace=True), + ) + input_channels = output_channels + + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + stage_names = ['stage{}'.format(i) for i in [2, 3, 4]] + for name, repeats, output_channels in zip( + stage_names, stages_repeats, self._stage_out_channels[1:]): + seq = [inverted_residual(input_channels, output_channels, 2)] + for i in range(repeats - 1): + if i == repeats - 2: + seq.append(inverted_residual(output_channels, output_channels, 1, split_shuffle=False)) + else: + seq.append(inverted_residual(output_channels, output_channels, 1)) + setattr(self, name, nn.Sequential(*seq)) + input_channels = output_channels + + output_channels = self._stage_out_channels[-1] + self.conv5 = nn.Sequential( + nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(output_channels), + nn.ReLU(inplace=True), + ) + + self.fc = nn.Linear(output_channels, num_classes) + + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + + def _forward_impl(self, x): + + # See note [TorchScript super()] + x = self.conv1(x) + x = self.maxpool(x) + x = self.stage2(x) + x = self.stage3(x) + x = self.stage4(x) + x = self.conv5(x) + # x = x.mean([2, 3]) # globalpool + x = self.avgpool(x) + x = torch.flatten(x, 1) + + x = self.fc(x) + return x + + def forward(self, x): + return self._forward_impl(x) + + +def _shufflenetv2(arch, pretrained, progress, *args, **kwargs): + model = ShuffleNetV2(*args, **kwargs) + + if pretrained: + model_url = model_urls[arch] + if model_url is None: + raise NotImplementedError('pretrained {} is not supported as of now'.format(arch)) + else: + state_dict = load_state_dict_from_url(model_url, progress=progress) + model.load_state_dict(state_dict) + + return model + + +def shufflenet_v2_x0_5(pretrained=False, progress=True, **kwargs): + """ + Constructs a ShuffleNetV2 with 0.5x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x0.5', pretrained, progress, + [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs) + + +def shufflenet_v2_x1_0(pretrained=False, progress=True, **kwargs): + """ + Constructs a ShuffleNetV2 with 1.0x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x1.0', pretrained, progress, + [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs) + # return _shufflenetv2('shufflenetv2_x1.0', pretrained, progress, + # [4, 8, 4], [16, 128, 256, 464, 1024], **kwargs) + + +def shufflenet_v2_x1_5(pretrained=False, progress=True, **kwargs): + """ + Constructs a ShuffleNetV2 with 1.5x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x1.5', pretrained, progress, + [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs) + + +def shufflenet_v2_x2_0(pretrained=False, progress=True, **kwargs): + """ + Constructs a ShuffleNetV2 with 2.0x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x2.0', pretrained, progress, + [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs) + + +if __name__ == '__main__': + dummy_input = torch.randn(1, 3, 224, 224) + model = shufflenet_v2_x1_0() + torch.onnx.export(model, dummy_input, "shufflenetv2.onnx", verbose=True,opset_version=11) diff --git a/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/vision_metric_ImageNet.py b/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/vision_metric_ImageNet.py index 7bba5f8346a8893b4567d92b900fc4a651115976..f07c93617e661619c0e00be6a972e39fbde966a0 100644 --- a/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/vision_metric_ImageNet.py +++ b/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch/vision_metric_ImageNet.py @@ -1,173 +1,173 @@ -#coding = utf-8 -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = "" - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - #print(filepath) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - #print(img_gt_dict) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - # print(img_name) - #print(n_labels) - # print(gt) - - resCnt = min(len(sort_index), topn) - # print(sort_index[:5]) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - #print("***************") - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - #print("Top" + str(i + 1) + " accuracy" + ": " + str(round(accuracy[i] * 100, 2)) + '%') - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - #print("Time used:", elapsed) +#coding = utf-8 +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = "" + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + #print(filepath) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + #print(img_gt_dict) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + # print(img_name) + #print(n_labels) + # print(gt) + + resCnt = min(len(sort_index), topn) + # print(sort_index[:5]) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + #print("***************") + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + #print("Top" + str(i + 1) + " accuracy" + ": " + str(round(accuracy[i] * 100, 2)) + '%') + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + #print("Time used:", elapsed) diff --git a/ACL_PyTorch/built-in/cv/U2-Net_for_PyTorch/gen_dataset_info.py b/ACL_PyTorch/built-in/cv/U2-Net_for_PyTorch/gen_dataset_info.py index efe82aef071e229c2f3212e1ec5a8531ad4d3e53..3656966b4cd924e11ac53b6449cdec237c0c64f7 100644 --- a/ACL_PyTorch/built-in/cv/U2-Net_for_PyTorch/gen_dataset_info.py +++ b/ACL_PyTorch/built-in/cv/U2-Net_for_PyTorch/gen_dataset_info.py @@ -1,65 +1,65 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(fpath, info_n, width, height): - ''' - Describe - ''' - bin_images = glob(os.path.join(fpath, '*.bin')) - with open(info_n, 'w') as f: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - f.write(content) - f.write('\n') - - -def get_jpg_info(fpath, info_n): - ''' - Describe - ''' - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(fpath, '*.' + extension))) - with open(info_n, 'w') as f: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - f.write(content) - f.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - Width = sys.argv[4] - Height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, Width, Height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(fpath, info_n, width, height): + ''' + Describe + ''' + bin_images = glob(os.path.join(fpath, '*.bin')) + with open(info_n, 'w') as f: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + f.write(content) + f.write('\n') + + +def get_jpg_info(fpath, info_n): + ''' + Describe + ''' + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(fpath, '*.' + extension))) + with open(info_n, 'w') as f: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + f.write(content) + f.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + Width = sys.argv[4] + Height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, Width, Height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) diff --git a/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/ReadMe.md b/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/ReadMe.md index 740e2ce6aea460a99c62dd0cb16fc6c28ee929bf..0e9f686545e1d92bb84cbb28d42ff7e1d2d310e8 100644 --- a/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/ReadMe.md +++ b/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/ReadMe.md @@ -1,48 +1,48 @@ -文件作用说明: - -1.auto_tune.sh:模型转换脚本,集成了auto tune功能,可以手动关闭 - -2.pth2onnx.py:用于转换pth文件到onnx文件 - -3.pthtar2onnx.py:用于转换pth.tar文件到onnx文件 - -4.BinaryImageNet.info:ImageNet数据集信息,用于benchmark推理获取数据集 - -5.PytorchTransfer.py:数据集预处理脚本,通过均值方差处理归一化图片 - -6.val_label.txt:ImageNet数据集标签,用于验证推理结果 - -7.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy - -8.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer - - - - - -推理端到端步骤: - -(1) 从Torchvision下载resnet50模型或者指定自己训练好的pth文件路径,通过pth2onnx.py脚本转化为onnx模型 - - - -(2)运行auto_tune.sh脚本转换om模型,也可以选择手动关闭auto_tune - -本demo已提供调优完成的om模型 - - - -(3)用PytorchTransfer.py脚本处理数据集,参考BinaryImageNet.Info配置处理后的二进制数据集路径 - - - -(4)./benchmark.x86_64 -model_type=vision -batch_size=16 -device_id=0 -input_text_path=./BinaryImageNet.info -input_width=224 -input_height=224 -om_path=./resnet50_pytorch.om -useDvpp=False - -运行benchmark推理,结果保存在 ./result 目录下 - - - -(5)python3.7 vision_metric_ImageNet.py result/dumpOutput/ ./val_label.txt ./ result.json - -验证推理结果 - +文件作用说明: + +1.auto_tune.sh:模型转换脚本,集成了auto tune功能,可以手动关闭 + +2.pth2onnx.py:用于转换pth文件到onnx文件 + +3.pthtar2onnx.py:用于转换pth.tar文件到onnx文件 + +4.BinaryImageNet.info:ImageNet数据集信息,用于benchmark推理获取数据集 + +5.PytorchTransfer.py:数据集预处理脚本,通过均值方差处理归一化图片 + +6.val_label.txt:ImageNet数据集标签,用于验证推理结果 + +7.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy + +8.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer + + + + + +推理端到端步骤: + +(1) 从Torchvision下载resnet50模型或者指定自己训练好的pth文件路径,通过pth2onnx.py脚本转化为onnx模型 + + + +(2)运行auto_tune.sh脚本转换om模型,也可以选择手动关闭auto_tune + +本demo已提供调优完成的om模型 + + + +(3)用PytorchTransfer.py脚本处理数据集,参考BinaryImageNet.Info配置处理后的二进制数据集路径 + + + +(4)./benchmark.x86_64 -model_type=vision -batch_size=16 -device_id=0 -input_text_path=./BinaryImageNet.info -input_width=224 -input_height=224 -om_path=./resnet50_pytorch.om -useDvpp=False + +运行benchmark推理,结果保存在 ./result 目录下 + + + +(5)python3.7 vision_metric_ImageNet.py result/dumpOutput/ ./val_label.txt ./ result.json + +验证推理结果 + diff --git a/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/get_info.py b/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/get_info.py index da181d8d0342e52eb33b38fa475ae812a61575cf..c68e7705e47c90970ab04e5f139e52192a634364 100644 --- a/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/get_info.py +++ b/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/get_info.py @@ -1,16 +1,16 @@ -import os -import sys -from glob import glob - -file_path = sys.argv[1] -info_name = sys.argv[2] -width = sys.argv[3] -height = sys.argv[4] - -bin_images = glob(os.path.join(file_path, '*')) - -with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') +import os +import sys +from glob import glob + +file_path = sys.argv[1] +info_name = sys.argv[2] +width = sys.argv[3] +height = sys.argv[4] + +bin_images = glob(os.path.join(file_path, '*')) + +with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') diff --git a/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/preprocess_vgg_pytorch.py b/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/preprocess_vgg_pytorch.py index a36b47f2fbb1990f49256c64a4727aa823759e46..1ac2a414d22d45cc37f09e2cb412b5057e81e694 100644 --- a/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/preprocess_vgg_pytorch.py +++ b/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/preprocess_vgg_pytorch.py @@ -1,83 +1,83 @@ -import os -import sys -import numpy as np -from PIL import Image - - -def resize(img, size, interpolation=Image.BILINEAR): - r"""Resize the input PIL Image to the given size. - - Args: - img (PIL Image): Image to be resized. - size (sequence or int): Desired output size. If size is a sequence like - (h, w), the output size will be matched to this. If size is an int, - the smaller edge of the image will be matched to this number maintaining - the aspect ratio. i.e, if height > width, then image will be rescaled to - :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)` - interpolation (int, optional): Desired interpolation. Default is - ``PIL.Image.BILINEAR`` - - Returns: - PIL Image: Resized image. - """ - - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def center_crop(img, out_height, out_width): - height, width, _ = img.shape - left = int((width - out_width) / 2) - right = int((width + out_width) / 2) - top = int((height - out_height) / 2) - bottom = int((height + out_height) / 2) - img = img[top:bottom, left:right] - return img - - -def deepmar_onnx(file_path, bin_path): - in_files = os.listdir(file_path) - if not os.path.exists(bin_path): - os.makedirs(bin_path) - i = 0 - input_size = (256, 256) - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - for file in in_files: - i = i + 1 - print(file, "====", i) - img = Image.open(os.path.join(file_path, file)).convert('RGB') - img = resize(img, input_size) # transforms.Resize(256) - img = np.array(img, dtype=np.float32) - img = center_crop(img, 224, 224) # transforms.CenterCrop(224) - - img = img / 255. # transforms.ToTensor() - - # 均值方差 - img[..., 0] -= mean[0] - img[..., 1] -= mean[1] - img[..., 2] -= mean[2] - img[..., 0] /= std[0] - img[..., 1] /= std[1] - img[..., 2] /= std[2] - - img = img.transpose(2, 0, 1) # HWC -> CHW - img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) - - -if __name__ == "__main__": - file_path = os.path.abspath(sys.argv[1]) - bin_path = os.path.abspath(sys.argv[2]) - deepmar_onnx(file_path, bin_path) +import os +import sys +import numpy as np +from PIL import Image + + +def resize(img, size, interpolation=Image.BILINEAR): + r"""Resize the input PIL Image to the given size. + + Args: + img (PIL Image): Image to be resized. + size (sequence or int): Desired output size. If size is a sequence like + (h, w), the output size will be matched to this. If size is an int, + the smaller edge of the image will be matched to this number maintaining + the aspect ratio. i.e, if height > width, then image will be rescaled to + :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)` + interpolation (int, optional): Desired interpolation. Default is + ``PIL.Image.BILINEAR`` + + Returns: + PIL Image: Resized image. + """ + + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def center_crop(img, out_height, out_width): + height, width, _ = img.shape + left = int((width - out_width) / 2) + right = int((width + out_width) / 2) + top = int((height - out_height) / 2) + bottom = int((height + out_height) / 2) + img = img[top:bottom, left:right] + return img + + +def deepmar_onnx(file_path, bin_path): + in_files = os.listdir(file_path) + if not os.path.exists(bin_path): + os.makedirs(bin_path) + i = 0 + input_size = (256, 256) + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + for file in in_files: + i = i + 1 + print(file, "====", i) + img = Image.open(os.path.join(file_path, file)).convert('RGB') + img = resize(img, input_size) # transforms.Resize(256) + img = np.array(img, dtype=np.float32) + img = center_crop(img, 224, 224) # transforms.CenterCrop(224) + + img = img / 255. # transforms.ToTensor() + + # 均值方差 + img[..., 0] -= mean[0] + img[..., 1] -= mean[1] + img[..., 2] -= mean[2] + img[..., 0] /= std[0] + img[..., 1] /= std[1] + img[..., 2] /= std[2] + + img = img.transpose(2, 0, 1) # HWC -> CHW + img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) + + +if __name__ == "__main__": + file_path = os.path.abspath(sys.argv[1]) + bin_path = os.path.abspath(sys.argv[2]) + deepmar_onnx(file_path, bin_path) diff --git a/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/vision_metric_ImageNet.py b/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/vision_metric_ImageNet.py index 7bba5f8346a8893b4567d92b900fc4a651115976..f07c93617e661619c0e00be6a972e39fbde966a0 100644 --- a/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/vision_metric_ImageNet.py +++ b/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch/vision_metric_ImageNet.py @@ -1,173 +1,173 @@ -#coding = utf-8 -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = "" - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - #print(filepath) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - #print(img_gt_dict) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - # print(img_name) - #print(n_labels) - # print(gt) - - resCnt = min(len(sort_index), topn) - # print(sort_index[:5]) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - #print("***************") - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - #print("Top" + str(i + 1) + " accuracy" + ": " + str(round(accuracy[i] * 100, 2)) + '%') - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - #print("Time used:", elapsed) +#coding = utf-8 +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = "" + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + #print(filepath) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + #print(img_gt_dict) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + # print(img_name) + #print(n_labels) + # print(gt) + + resCnt = min(len(sort_index), topn) + # print(sort_index[:5]) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + #print("***************") + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + #print("Top" + str(i + 1) + " accuracy" + ": " + str(round(accuracy[i] * 100, 2)) + '%') + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + #print("Time used:", elapsed) diff --git a/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/ReadMe.md b/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/ReadMe.md index 740e2ce6aea460a99c62dd0cb16fc6c28ee929bf..0e9f686545e1d92bb84cbb28d42ff7e1d2d310e8 100644 --- a/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/ReadMe.md +++ b/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/ReadMe.md @@ -1,48 +1,48 @@ -文件作用说明: - -1.auto_tune.sh:模型转换脚本,集成了auto tune功能,可以手动关闭 - -2.pth2onnx.py:用于转换pth文件到onnx文件 - -3.pthtar2onnx.py:用于转换pth.tar文件到onnx文件 - -4.BinaryImageNet.info:ImageNet数据集信息,用于benchmark推理获取数据集 - -5.PytorchTransfer.py:数据集预处理脚本,通过均值方差处理归一化图片 - -6.val_label.txt:ImageNet数据集标签,用于验证推理结果 - -7.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy - -8.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer - - - - - -推理端到端步骤: - -(1) 从Torchvision下载resnet50模型或者指定自己训练好的pth文件路径,通过pth2onnx.py脚本转化为onnx模型 - - - -(2)运行auto_tune.sh脚本转换om模型,也可以选择手动关闭auto_tune - -本demo已提供调优完成的om模型 - - - -(3)用PytorchTransfer.py脚本处理数据集,参考BinaryImageNet.Info配置处理后的二进制数据集路径 - - - -(4)./benchmark.x86_64 -model_type=vision -batch_size=16 -device_id=0 -input_text_path=./BinaryImageNet.info -input_width=224 -input_height=224 -om_path=./resnet50_pytorch.om -useDvpp=False - -运行benchmark推理,结果保存在 ./result 目录下 - - - -(5)python3.7 vision_metric_ImageNet.py result/dumpOutput/ ./val_label.txt ./ result.json - -验证推理结果 - +文件作用说明: + +1.auto_tune.sh:模型转换脚本,集成了auto tune功能,可以手动关闭 + +2.pth2onnx.py:用于转换pth文件到onnx文件 + +3.pthtar2onnx.py:用于转换pth.tar文件到onnx文件 + +4.BinaryImageNet.info:ImageNet数据集信息,用于benchmark推理获取数据集 + +5.PytorchTransfer.py:数据集预处理脚本,通过均值方差处理归一化图片 + +6.val_label.txt:ImageNet数据集标签,用于验证推理结果 + +7.vision_metric_ImageNet.py:验证推理结果脚本,比对benchmark输出的分类结果和标签,给出Accuracy + +8.benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer + + + + + +推理端到端步骤: + +(1) 从Torchvision下载resnet50模型或者指定自己训练好的pth文件路径,通过pth2onnx.py脚本转化为onnx模型 + + + +(2)运行auto_tune.sh脚本转换om模型,也可以选择手动关闭auto_tune + +本demo已提供调优完成的om模型 + + + +(3)用PytorchTransfer.py脚本处理数据集,参考BinaryImageNet.Info配置处理后的二进制数据集路径 + + + +(4)./benchmark.x86_64 -model_type=vision -batch_size=16 -device_id=0 -input_text_path=./BinaryImageNet.info -input_width=224 -input_height=224 -om_path=./resnet50_pytorch.om -useDvpp=False + +运行benchmark推理,结果保存在 ./result 目录下 + + + +(5)python3.7 vision_metric_ImageNet.py result/dumpOutput/ ./val_label.txt ./ result.json + +验证推理结果 + diff --git a/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/get_info.py b/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/get_info.py index da181d8d0342e52eb33b38fa475ae812a61575cf..c68e7705e47c90970ab04e5f139e52192a634364 100644 --- a/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/get_info.py +++ b/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/get_info.py @@ -1,16 +1,16 @@ -import os -import sys -from glob import glob - -file_path = sys.argv[1] -info_name = sys.argv[2] -width = sys.argv[3] -height = sys.argv[4] - -bin_images = glob(os.path.join(file_path, '*')) - -with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') +import os +import sys +from glob import glob + +file_path = sys.argv[1] +info_name = sys.argv[2] +width = sys.argv[3] +height = sys.argv[4] + +bin_images = glob(os.path.join(file_path, '*')) + +with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') diff --git a/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/preprocess_vgg_pytorch.py b/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/preprocess_vgg_pytorch.py index a36b47f2fbb1990f49256c64a4727aa823759e46..1ac2a414d22d45cc37f09e2cb412b5057e81e694 100644 --- a/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/preprocess_vgg_pytorch.py +++ b/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/preprocess_vgg_pytorch.py @@ -1,83 +1,83 @@ -import os -import sys -import numpy as np -from PIL import Image - - -def resize(img, size, interpolation=Image.BILINEAR): - r"""Resize the input PIL Image to the given size. - - Args: - img (PIL Image): Image to be resized. - size (sequence or int): Desired output size. If size is a sequence like - (h, w), the output size will be matched to this. If size is an int, - the smaller edge of the image will be matched to this number maintaining - the aspect ratio. i.e, if height > width, then image will be rescaled to - :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)` - interpolation (int, optional): Desired interpolation. Default is - ``PIL.Image.BILINEAR`` - - Returns: - PIL Image: Resized image. - """ - - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def center_crop(img, out_height, out_width): - height, width, _ = img.shape - left = int((width - out_width) / 2) - right = int((width + out_width) / 2) - top = int((height - out_height) / 2) - bottom = int((height + out_height) / 2) - img = img[top:bottom, left:right] - return img - - -def deepmar_onnx(file_path, bin_path): - in_files = os.listdir(file_path) - if not os.path.exists(bin_path): - os.makedirs(bin_path) - i = 0 - input_size = (256, 256) - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - for file in in_files: - i = i + 1 - print(file, "====", i) - img = Image.open(os.path.join(file_path, file)).convert('RGB') - img = resize(img, input_size) # transforms.Resize(256) - img = np.array(img, dtype=np.float32) - img = center_crop(img, 224, 224) # transforms.CenterCrop(224) - - img = img / 255. # transforms.ToTensor() - - # 均值方差 - img[..., 0] -= mean[0] - img[..., 1] -= mean[1] - img[..., 2] -= mean[2] - img[..., 0] /= std[0] - img[..., 1] /= std[1] - img[..., 2] /= std[2] - - img = img.transpose(2, 0, 1) # HWC -> CHW - img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) - - -if __name__ == "__main__": - file_path = os.path.abspath(sys.argv[1]) - bin_path = os.path.abspath(sys.argv[2]) - deepmar_onnx(file_path, bin_path) +import os +import sys +import numpy as np +from PIL import Image + + +def resize(img, size, interpolation=Image.BILINEAR): + r"""Resize the input PIL Image to the given size. + + Args: + img (PIL Image): Image to be resized. + size (sequence or int): Desired output size. If size is a sequence like + (h, w), the output size will be matched to this. If size is an int, + the smaller edge of the image will be matched to this number maintaining + the aspect ratio. i.e, if height > width, then image will be rescaled to + :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)` + interpolation (int, optional): Desired interpolation. Default is + ``PIL.Image.BILINEAR`` + + Returns: + PIL Image: Resized image. + """ + + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def center_crop(img, out_height, out_width): + height, width, _ = img.shape + left = int((width - out_width) / 2) + right = int((width + out_width) / 2) + top = int((height - out_height) / 2) + bottom = int((height + out_height) / 2) + img = img[top:bottom, left:right] + return img + + +def deepmar_onnx(file_path, bin_path): + in_files = os.listdir(file_path) + if not os.path.exists(bin_path): + os.makedirs(bin_path) + i = 0 + input_size = (256, 256) + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + for file in in_files: + i = i + 1 + print(file, "====", i) + img = Image.open(os.path.join(file_path, file)).convert('RGB') + img = resize(img, input_size) # transforms.Resize(256) + img = np.array(img, dtype=np.float32) + img = center_crop(img, 224, 224) # transforms.CenterCrop(224) + + img = img / 255. # transforms.ToTensor() + + # 均值方差 + img[..., 0] -= mean[0] + img[..., 1] -= mean[1] + img[..., 2] -= mean[2] + img[..., 0] /= std[0] + img[..., 1] /= std[1] + img[..., 2] /= std[2] + + img = img.transpose(2, 0, 1) # HWC -> CHW + img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) + + +if __name__ == "__main__": + file_path = os.path.abspath(sys.argv[1]) + bin_path = os.path.abspath(sys.argv[2]) + deepmar_onnx(file_path, bin_path) diff --git a/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/vision_metric_ImageNet.py b/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/vision_metric_ImageNet.py index 7bba5f8346a8893b4567d92b900fc4a651115976..f07c93617e661619c0e00be6a972e39fbde966a0 100644 --- a/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/vision_metric_ImageNet.py +++ b/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch/vision_metric_ImageNet.py @@ -1,173 +1,173 @@ -#coding = utf-8 -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = "" - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - #print(filepath) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - #print(img_gt_dict) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - # print(img_name) - #print(n_labels) - # print(gt) - - resCnt = min(len(sort_index), topn) - # print(sort_index[:5]) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - #print("***************") - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - #print("Top" + str(i + 1) + " accuracy" + ": " + str(round(accuracy[i] * 100, 2)) + '%') - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - #print("Time used:", elapsed) +#coding = utf-8 +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = "" + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + #print(filepath) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + #print(img_gt_dict) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + # print(img_name) + #print(n_labels) + # print(gt) + + resCnt = min(len(sort_index), topn) + # print(sort_index[:5]) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + #print("***************") + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + #print("Top" + str(i + 1) + " accuracy" + ": " + str(round(accuracy[i] * 100, 2)) + '%') + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + #print("Time used:", elapsed) diff --git a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/LICENSE b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/LICENSE index df2c2f2c3e55bfbad1aebe53321a94ee5a3854bc..c8ec075d5b892f823d0b485ad4fdd01355c57b3e 100644 --- a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/LICENSE +++ b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/LICENSE @@ -1,203 +1,203 @@ -Copyright 2018-2019 Open-MMLab. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018-2019 Open-MMLab. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and +Copyright 2018-2019 Open-MMLab. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2019 Open-MMLab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/README.md b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/README.md index a66e9ee510332636b4b9e64d372a4a90911ce7b0..c7ab4c279c1da9dea8d02b3435f0d853e81fcec5 100644 --- a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/README.md +++ b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/README.md @@ -1,112 +1,112 @@ -# yolact_edge模型推理指导 - -- [yolact_edge模型推理指导](#yolact_edge模型推理指导) - - [1 文件说明](#1-文件说明) - - [2 环境准备](#2-环境准备) - - [2.1 安装依赖](#21-安装依赖) - - [2.2 文件下载](#22-文件下载) - - [2.3 文件拷贝](#23-文件拷贝) - - [2.4 设置环境变量](#24-设置环境变量) - - [3 端到端推理步骤](#3-端到端推理步骤) - - [3.1 pth导出onnx](#31-pth导出onnx) - - [3.2 利用ATC工具转换为om模型](#32-利用atc工具转换为om模型) - - [3.3 om模型推理](#33-om模型推理) - - [3.4 纯推理性能获取](#34-纯推理性能获取) - - [4 评测结果](#4-评测结果) - ------- - -## 1 文件说明 -``` -yolact_edge_for_Pytorch - ├── env.sh 设置环境变量 - ├── pth2onnx.py pytorch模型导出onnx模型 - ├── atc.sh onnx模型转om - ├── yolact_edge.diff 补丁文件 - └── acl_net.py PyACL推理工具代码 -``` - -## 2 环境准备 - -### 2.1 安装依赖 - -根据pytorch官网教程安装1.10.0版本的PyTorch -```shell -pip install torch==1.10.0+cpu torchvision==0.11.1+cpu torchaudio==0.10.0+cpu -pip install -r requirements.txt -pip install git+https://github.com/haotian-liu/cocoapi.git#"egg=pycocotools&subdirectory=PythonAPI" -``` - -### 2.2 文件下载 -- [yolact_edge_Pytorch源码下载](https://github.com/haotian-liu/yolact_edge) - - ```shell - git clone git@github.com:haotian-liu/yolact_edge.git - cd yolact_edge - git reset a9a00281b33b3ac90253a4939773308a8f95e21d --hard - git apply yolact_edge.diff - ``` - -- 权重下载 - - 创建 `weights` 目录,并将下载的权重文件 `yolact_edge_resnet50_54_800000.pth` 拷贝到 `weights` 目录下。 - 可参见[yolact_edge_Pytorch主页](https://github.com/haotian-liu/yolact_edge)说明下载权重 - -- 数据集下载 - - om推理采用COCO2017数据集的验证集进行精度评估。将下载好的数据集拷贝到 `data/coco` 目录下,data目录中的文件结构如下所示。数据集下载可以网页搜索 - ```shell - data - └── coco - ├── annotations - ├── images - ├── scripts - ├── yolact_edge_example_1.gif - ├── yolact_edge_example_2.gif - ├── yolact_edge_example_3.gif - ``` - -### 2.3 文件拷贝 -拷贝env.sh,pth2onnx.py,atc.sh,acl_net.py文件到yolact_edge目录下。 - - -### 2.4 设置环境变量 -```shell -source env.sh -``` - -## 3 端到端推理步骤 - -### 3.1 pth导出onnx -```python -python3.7 pth2onnx.py \ - --config=yolact_edge_resnet50_config \ - --trained_model=./weights/yolact_edge_resnet50_54_800000.pth -``` - -### 3.2 利用ATC工具转换为om模型 -```shell -bash atc.sh yolact_edge.onnx yolact_edge -``` - -### 3.3 om模型推理 -```python -python3.7 eval.py \ - --config=yolact_edge_resnet50_config \ - --trained_model=./weights/yolact_edge_resnet50_54_800000.pth \ - --cuda=False \ - --disable_tensorrt -``` - -### 3.4 纯推理性能获取 - -下载 [benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) 并拷贝到当前目录 -```shell -./benchmark.${arch} -device_id=0 -batch_size=1 -om_path=./yolact_edge.om -round=20 -``` - -## 4 评测结果 - -| 模型 | pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| --------------- | -------- | --------------- | -------- | ------- | -| yolact_edge | [mAP:27.0](https://github.com/haotian-liu/yolact_edge) | mAP:27.6 | 167fps | 157fps | +# yolact_edge模型推理指导 + +- [yolact_edge模型推理指导](#yolact_edge模型推理指导) + - [1 文件说明](#1-文件说明) + - [2 环境准备](#2-环境准备) + - [2.1 安装依赖](#21-安装依赖) + - [2.2 文件下载](#22-文件下载) + - [2.3 文件拷贝](#23-文件拷贝) + - [2.4 设置环境变量](#24-设置环境变量) + - [3 端到端推理步骤](#3-端到端推理步骤) + - [3.1 pth导出onnx](#31-pth导出onnx) + - [3.2 利用ATC工具转换为om模型](#32-利用atc工具转换为om模型) + - [3.3 om模型推理](#33-om模型推理) + - [3.4 纯推理性能获取](#34-纯推理性能获取) + - [4 评测结果](#4-评测结果) + +------ + +## 1 文件说明 +``` +yolact_edge_for_Pytorch + ├── env.sh 设置环境变量 + ├── pth2onnx.py pytorch模型导出onnx模型 + ├── atc.sh onnx模型转om + ├── yolact_edge.diff 补丁文件 + └── acl_net.py PyACL推理工具代码 +``` + +## 2 环境准备 + +### 2.1 安装依赖 + +根据pytorch官网教程安装1.10.0版本的PyTorch +```shell +pip install torch==1.10.0+cpu torchvision==0.11.1+cpu torchaudio==0.10.0+cpu +pip install -r requirements.txt +pip install git+https://github.com/haotian-liu/cocoapi.git#"egg=pycocotools&subdirectory=PythonAPI" +``` + +### 2.2 文件下载 +- [yolact_edge_Pytorch源码下载](https://github.com/haotian-liu/yolact_edge) + + ```shell + git clone git@github.com:haotian-liu/yolact_edge.git + cd yolact_edge + git reset a9a00281b33b3ac90253a4939773308a8f95e21d --hard + git apply yolact_edge.diff + ``` + +- 权重下载 + + 创建 `weights` 目录,并将下载的权重文件 `yolact_edge_resnet50_54_800000.pth` 拷贝到 `weights` 目录下。 + 可参见[yolact_edge_Pytorch主页](https://github.com/haotian-liu/yolact_edge)说明下载权重 + +- 数据集下载 + + om推理采用COCO2017数据集的验证集进行精度评估。将下载好的数据集拷贝到 `data/coco` 目录下,data目录中的文件结构如下所示。数据集下载可以网页搜索 + ```shell + data + └── coco + ├── annotations + ├── images + ├── scripts + ├── yolact_edge_example_1.gif + ├── yolact_edge_example_2.gif + ├── yolact_edge_example_3.gif + ``` + +### 2.3 文件拷贝 +拷贝env.sh,pth2onnx.py,atc.sh,acl_net.py文件到yolact_edge目录下。 + + +### 2.4 设置环境变量 +```shell +source env.sh +``` + +## 3 端到端推理步骤 + +### 3.1 pth导出onnx +```python +python3.7 pth2onnx.py \ + --config=yolact_edge_resnet50_config \ + --trained_model=./weights/yolact_edge_resnet50_54_800000.pth +``` + +### 3.2 利用ATC工具转换为om模型 +```shell +bash atc.sh yolact_edge.onnx yolact_edge +``` + +### 3.3 om模型推理 +```python +python3.7 eval.py \ + --config=yolact_edge_resnet50_config \ + --trained_model=./weights/yolact_edge_resnet50_54_800000.pth \ + --cuda=False \ + --disable_tensorrt +``` + +### 3.4 纯推理性能获取 + +下载 [benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) 并拷贝到当前目录 +```shell +./benchmark.${arch} -device_id=0 -batch_size=1 -om_path=./yolact_edge.om -round=20 +``` + +## 4 评测结果 + +| 模型 | pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| --------------- | -------- | --------------- | -------- | ------- | +| yolact_edge | [mAP:27.0](https://github.com/haotian-liu/yolact_edge) | mAP:27.6 | 167fps | 157fps | diff --git a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/acl_net.py b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/acl_net.py index b39eb3530ccc10c2b45808793365744d03ef20a6..d6c2ea29b9cb14b48c9409aea402f2b394bbe5a8 100644 --- a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/acl_net.py +++ b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/acl_net.py @@ -1,259 +1,259 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time -import numpy as np -import functools -import acl - -# error code -ACL_ERROR_NONE = 0 - -# rule for mem -ACL_MEM_MALLOC_HUGE_FIRST = 0 -ACL_MEM_MALLOC_HUGE_ONLY = 1 -ACL_MEM_MALLOC_NORMAL_ONLY = 2 - -# rule for memory copy -ACL_MEMCPY_HOST_TO_HOST = 0 -ACL_MEMCPY_HOST_TO_DEVICE = 1 -ACL_MEMCPY_DEVICE_TO_HOST = 2 -ACL_MEMCPY_DEVICE_TO_DEVICE = 3 - -ACL_DTYPE = { - 0: 'float32', - 1: 'float16', - 2: 'int8', - 3: 'int32', - 4: 'uint8', - 6: 'int16', - 7: 'uint16', - 8: 'uint32', - 9: 'int64', - 10: 'uint64', - 11: 'float64', - 12: 'bool', -} - -buffer_method = { - "in": acl.mdl.get_input_size_by_index, - "out": acl.mdl.get_output_size_by_index -} - - -def check_ret(message, ret): - if ret != ACL_ERROR_NONE: - raise Exception("{} failed ret={}".format(message, ret)) - - -class Net(object): - def __init__(self, model_path, device_id=0, config_path=None): - self.device_id = device_id # int - self.model_path = model_path # string - self.model_id = None # pointer - self.context = None # pointer - - self.input_data = [] - self.output_data = [] - self.model_desc = None # pointer when using - self.load_input_dataset = None - self.load_output_dataset = None - - self._init_resource(config_path) - - def __call__(self, ori_data): - return self.forward(ori_data) - - def __del__(self): - ret = acl.mdl.unload(self.model_id) - check_ret("acl.mdl.unload", ret) - if self.model_desc: - acl.mdl.destroy_desc(self.model_desc) - self.model_desc = None - - while self.input_data: - item = self.input_data.pop() - ret = acl.rt.free(item["buffer"]) - check_ret("acl.rt.free", ret) - - while self.output_data: - item = self.output_data.pop() - ret = acl.rt.free(item["buffer"]) - check_ret("acl.rt.free", ret) - - if self.context: - ret = acl.rt.destroy_context(self.context) - check_ret("acl.rt.destroy_context", ret) - self.context = None - - ret = acl.rt.reset_device(self.device_id) - check_ret("acl.rt.reset_device", ret) - ret = acl.finalize() - check_ret("acl.finalize", ret) - - def _init_resource(self, config_path=None): - if config_path: - ret = acl.init(config_path) - else: - ret = acl.init() - check_ret("acl.init", ret) - ret = acl.rt.set_device(self.device_id) - check_ret("acl.rt.set_device", ret) - - self.context, ret = acl.rt.create_context(self.device_id) - check_ret("acl.rt.create_context", ret) - - # load_model - self.model_id, ret = acl.mdl.load_from_file(self.model_path) - check_ret("acl.mdl.load_from_file", ret) - - self.model_desc = acl.mdl.create_desc() - self._get_model_info() - - def _get_model_info(self, ): - ret = acl.mdl.get_desc(self.model_desc, self.model_id) - check_ret("acl.mdl.get_desc", ret) - input_size = acl.mdl.get_num_inputs(self.model_desc) - output_size = acl.mdl.get_num_outputs(self.model_desc) - self._gen_data_buffer(input_size, des="in") - self._gen_data_buffer(output_size, des="out") - - def _gen_data_buffer(self, size, des): - func = buffer_method[des] - for i in range(size): - temp_buffer_size = func(self.model_desc, i) - temp_buffer, ret = acl.rt.malloc(temp_buffer_size, - ACL_MEM_MALLOC_HUGE_FIRST) - check_ret("acl.rt.malloc", ret) - - if des == "in": - self.input_data.append({"buffer": temp_buffer, - "size": temp_buffer_size}) - elif des == "out": - self.output_data.append({"buffer": temp_buffer, - "size": temp_buffer_size}) - - def _data_interaction(self, dataset, policy=ACL_MEMCPY_HOST_TO_DEVICE): - temp_data_buffer = self.input_data \ - if policy == ACL_MEMCPY_HOST_TO_DEVICE \ - else self.output_data - if len(dataset) == 0 and policy == ACL_MEMCPY_DEVICE_TO_HOST: - for item in self.output_data: - temp, ret = acl.rt.malloc_host(item["size"]) - if ret != 0: - raise Exception("can't malloc_host ret={}".format(ret)) - dataset.append({"size": item["size"], "buffer": temp}) - - for i, item in enumerate(temp_data_buffer): - if policy == ACL_MEMCPY_HOST_TO_DEVICE: - ptr = acl.util.numpy_to_ptr(dataset[i]) - ret = acl.rt.memcpy(item["buffer"], - item["size"], - ptr, - item["size"], - policy) - check_ret("acl.rt.memcpy", ret) - - else: - ptr = dataset[i]["buffer"] - ret = acl.rt.memcpy(ptr, - item["size"], - item["buffer"], - item["size"], - policy) - check_ret("acl.rt.memcpy", ret) - - def _gen_dataset(self, type_str="input"): - dataset = acl.mdl.create_dataset() - - temp_dataset = None - if type_str == "in": - self.load_input_dataset = dataset - temp_dataset = self.input_data - else: - self.load_output_dataset = dataset - temp_dataset = self.output_data - - for item in temp_dataset: - data = acl.create_data_buffer(item["buffer"], item["size"]) - if data is None: - ret = acl.destroy_data_buffer(dataset) - check_ret("acl.destroy_data_buffer", ret) - - _, ret = acl.mdl.add_dataset_buffer(dataset, data) - - if ret != ACL_ERROR_NONE: - ret = acl.destroy_data_buffer(dataset) - check_ret("acl.destroy_data_buffer", ret) - - def _data_from_host_to_device(self, images): - self._data_interaction(images, ACL_MEMCPY_HOST_TO_DEVICE) - self._gen_dataset("in") - self._gen_dataset("out") - - def _data_from_device_to_host(self): - res = [] - self._data_interaction(res, ACL_MEMCPY_DEVICE_TO_HOST) - output = self.get_result(res) - return output - - def _destroy_databuffer(self, ): - for dataset in [self.load_input_dataset, self.load_output_dataset]: - if not dataset: - continue - - num = acl.mdl.get_dataset_num_buffers(dataset) - for i in range(num): - data_buf = acl.mdl.get_dataset_buffer(dataset, i) - if data_buf: - ret = acl.destroy_data_buffer(data_buf) - check_ret("acl.destroy_data_buffer", ret) - ret = acl.mdl.destroy_dataset(dataset) - check_ret("acl.mdl.destroy_dataset", ret) - - def forward(self, input_data): - if not isinstance(input_data, (list, tuple)): - input_data = [input_data] - st = time.time() - self._data_from_host_to_device(input_data) - mem_t = time.time() - st - st = time.time() - ret = acl.mdl.execute(self.model_id, - self.load_input_dataset, - self.load_output_dataset) - exe_t = time.time() - st - st = time.time() - check_ret("acl.mdl.execute", ret) - self._destroy_databuffer() - result = self._data_from_device_to_host() - mem_t += time.time() - st - return result - - def get_result(self, output_data): - dataset = [] - for i in range(len(output_data)): - dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) - check_ret("acl.mdl.get_cur_output_dims", ret) - data_shape = dims.get("dims") - data_type = acl.mdl.get_output_data_type(self.model_desc, i) - data_len = functools.reduce(lambda x, y: x * y, data_shape) - ftype = np.dtype(ACL_DTYPE.get(data_type)) - - size = output_data[i]["size"] - ptr = output_data[i]["buffer"] - data = acl.util.ptr_to_numpy(ptr, (size,), 1) - np_arr = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) - np_arr = np_arr.reshape(data_shape) - dataset.append(np_arr) - return dataset +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +import numpy as np +import functools +import acl + +# error code +ACL_ERROR_NONE = 0 + +# rule for mem +ACL_MEM_MALLOC_HUGE_FIRST = 0 +ACL_MEM_MALLOC_HUGE_ONLY = 1 +ACL_MEM_MALLOC_NORMAL_ONLY = 2 + +# rule for memory copy +ACL_MEMCPY_HOST_TO_HOST = 0 +ACL_MEMCPY_HOST_TO_DEVICE = 1 +ACL_MEMCPY_DEVICE_TO_HOST = 2 +ACL_MEMCPY_DEVICE_TO_DEVICE = 3 + +ACL_DTYPE = { + 0: 'float32', + 1: 'float16', + 2: 'int8', + 3: 'int32', + 4: 'uint8', + 6: 'int16', + 7: 'uint16', + 8: 'uint32', + 9: 'int64', + 10: 'uint64', + 11: 'float64', + 12: 'bool', +} + +buffer_method = { + "in": acl.mdl.get_input_size_by_index, + "out": acl.mdl.get_output_size_by_index +} + + +def check_ret(message, ret): + if ret != ACL_ERROR_NONE: + raise Exception("{} failed ret={}".format(message, ret)) + + +class Net(object): + def __init__(self, model_path, device_id=0, config_path=None): + self.device_id = device_id # int + self.model_path = model_path # string + self.model_id = None # pointer + self.context = None # pointer + + self.input_data = [] + self.output_data = [] + self.model_desc = None # pointer when using + self.load_input_dataset = None + self.load_output_dataset = None + + self._init_resource(config_path) + + def __call__(self, ori_data): + return self.forward(ori_data) + + def __del__(self): + ret = acl.mdl.unload(self.model_id) + check_ret("acl.mdl.unload", ret) + if self.model_desc: + acl.mdl.destroy_desc(self.model_desc) + self.model_desc = None + + while self.input_data: + item = self.input_data.pop() + ret = acl.rt.free(item["buffer"]) + check_ret("acl.rt.free", ret) + + while self.output_data: + item = self.output_data.pop() + ret = acl.rt.free(item["buffer"]) + check_ret("acl.rt.free", ret) + + if self.context: + ret = acl.rt.destroy_context(self.context) + check_ret("acl.rt.destroy_context", ret) + self.context = None + + ret = acl.rt.reset_device(self.device_id) + check_ret("acl.rt.reset_device", ret) + ret = acl.finalize() + check_ret("acl.finalize", ret) + + def _init_resource(self, config_path=None): + if config_path: + ret = acl.init(config_path) + else: + ret = acl.init() + check_ret("acl.init", ret) + ret = acl.rt.set_device(self.device_id) + check_ret("acl.rt.set_device", ret) + + self.context, ret = acl.rt.create_context(self.device_id) + check_ret("acl.rt.create_context", ret) + + # load_model + self.model_id, ret = acl.mdl.load_from_file(self.model_path) + check_ret("acl.mdl.load_from_file", ret) + + self.model_desc = acl.mdl.create_desc() + self._get_model_info() + + def _get_model_info(self, ): + ret = acl.mdl.get_desc(self.model_desc, self.model_id) + check_ret("acl.mdl.get_desc", ret) + input_size = acl.mdl.get_num_inputs(self.model_desc) + output_size = acl.mdl.get_num_outputs(self.model_desc) + self._gen_data_buffer(input_size, des="in") + self._gen_data_buffer(output_size, des="out") + + def _gen_data_buffer(self, size, des): + func = buffer_method[des] + for i in range(size): + temp_buffer_size = func(self.model_desc, i) + temp_buffer, ret = acl.rt.malloc(temp_buffer_size, + ACL_MEM_MALLOC_HUGE_FIRST) + check_ret("acl.rt.malloc", ret) + + if des == "in": + self.input_data.append({"buffer": temp_buffer, + "size": temp_buffer_size}) + elif des == "out": + self.output_data.append({"buffer": temp_buffer, + "size": temp_buffer_size}) + + def _data_interaction(self, dataset, policy=ACL_MEMCPY_HOST_TO_DEVICE): + temp_data_buffer = self.input_data \ + if policy == ACL_MEMCPY_HOST_TO_DEVICE \ + else self.output_data + if len(dataset) == 0 and policy == ACL_MEMCPY_DEVICE_TO_HOST: + for item in self.output_data: + temp, ret = acl.rt.malloc_host(item["size"]) + if ret != 0: + raise Exception("can't malloc_host ret={}".format(ret)) + dataset.append({"size": item["size"], "buffer": temp}) + + for i, item in enumerate(temp_data_buffer): + if policy == ACL_MEMCPY_HOST_TO_DEVICE: + ptr = acl.util.numpy_to_ptr(dataset[i]) + ret = acl.rt.memcpy(item["buffer"], + item["size"], + ptr, + item["size"], + policy) + check_ret("acl.rt.memcpy", ret) + + else: + ptr = dataset[i]["buffer"] + ret = acl.rt.memcpy(ptr, + item["size"], + item["buffer"], + item["size"], + policy) + check_ret("acl.rt.memcpy", ret) + + def _gen_dataset(self, type_str="input"): + dataset = acl.mdl.create_dataset() + + temp_dataset = None + if type_str == "in": + self.load_input_dataset = dataset + temp_dataset = self.input_data + else: + self.load_output_dataset = dataset + temp_dataset = self.output_data + + for item in temp_dataset: + data = acl.create_data_buffer(item["buffer"], item["size"]) + if data is None: + ret = acl.destroy_data_buffer(dataset) + check_ret("acl.destroy_data_buffer", ret) + + _, ret = acl.mdl.add_dataset_buffer(dataset, data) + + if ret != ACL_ERROR_NONE: + ret = acl.destroy_data_buffer(dataset) + check_ret("acl.destroy_data_buffer", ret) + + def _data_from_host_to_device(self, images): + self._data_interaction(images, ACL_MEMCPY_HOST_TO_DEVICE) + self._gen_dataset("in") + self._gen_dataset("out") + + def _data_from_device_to_host(self): + res = [] + self._data_interaction(res, ACL_MEMCPY_DEVICE_TO_HOST) + output = self.get_result(res) + return output + + def _destroy_databuffer(self, ): + for dataset in [self.load_input_dataset, self.load_output_dataset]: + if not dataset: + continue + + num = acl.mdl.get_dataset_num_buffers(dataset) + for i in range(num): + data_buf = acl.mdl.get_dataset_buffer(dataset, i) + if data_buf: + ret = acl.destroy_data_buffer(data_buf) + check_ret("acl.destroy_data_buffer", ret) + ret = acl.mdl.destroy_dataset(dataset) + check_ret("acl.mdl.destroy_dataset", ret) + + def forward(self, input_data): + if not isinstance(input_data, (list, tuple)): + input_data = [input_data] + st = time.time() + self._data_from_host_to_device(input_data) + mem_t = time.time() - st + st = time.time() + ret = acl.mdl.execute(self.model_id, + self.load_input_dataset, + self.load_output_dataset) + exe_t = time.time() - st + st = time.time() + check_ret("acl.mdl.execute", ret) + self._destroy_databuffer() + result = self._data_from_device_to_host() + mem_t += time.time() - st + return result + + def get_result(self, output_data): + dataset = [] + for i in range(len(output_data)): + dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) + check_ret("acl.mdl.get_cur_output_dims", ret) + data_shape = dims.get("dims") + data_type = acl.mdl.get_output_data_type(self.model_desc, i) + data_len = functools.reduce(lambda x, y: x * y, data_shape) + ftype = np.dtype(ACL_DTYPE.get(data_type)) + + size = output_data[i]["size"] + ptr = output_data[i]["buffer"] + data = acl.util.ptr_to_numpy(ptr, (size,), 1) + np_arr = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) + np_arr = np_arr.reshape(data_shape) + dataset.append(np_arr) + return dataset diff --git a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/atc.sh b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/atc.sh index b5e775f899bad22ab18e422ab272e4e4ef5ab9f7..dc10c8186c2f5d6ad4ff6f1d48e69d136b8b0d0e 100644 --- a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/atc.sh +++ b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/atc.sh @@ -1,5 +1,5 @@ -atc --model=$1 \ - --framework=5 \ - --output=$2 \ - --log=error \ - --soc_version=Ascend310 +atc --model=$1 \ + --framework=5 \ + --output=$2 \ + --log=error \ + --soc_version=Ascend310 diff --git a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/env.sh b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/env.sh index 9ce0d242f029dcb9ddd237071dcbda75546035a6..7fb2c8a43dd00b83c9015932311d4be770a1a10f 100644 --- a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/env.sh +++ b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/env.sh @@ -1,5 +1,5 @@ -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:${install_path}/pyACL/python/site-packages/acl:$PYTHONPATH -export ASCEND_OPP_PATH=${install_path}/opp +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:${install_path}/pyACL/python/site-packages/acl:$PYTHONPATH +export ASCEND_OPP_PATH=${install_path}/opp diff --git a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/modelzoo_level.txt b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/modelzoo_level.txt index d44ba5698b045b8a30e107962f295dbc24585d8c..70801afc42b6d9eb5cdd98b5430d9b2101f3146a 100644 --- a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/modelzoo_level.txt +++ b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/requirements.txt b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/requirements.txt index 511f52dc35b4e6e860ff90c76d4be5d2e3f441ab..306a8a990ac1ef166e97c0506f09bc19e1392c66 100644 --- a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/requirements.txt +++ b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/requirements.txt @@ -1,7 +1,7 @@ -cython -opencv-python -pillow -matplotlib -GitPython -termcolor +cython +opencv-python +pillow +matplotlib +GitPython +termcolor tensorboard \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/yolact_edge.diff b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/yolact_edge.diff index 3f2db5f2405684d8e6cce4230607d89e31ce701a..e5b26f350dcf25a04e09b184e6c3e430251a3e03 100644 --- a/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/yolact_edge.diff +++ b/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch/yolact_edge.diff @@ -1,171 +1,171 @@ -diff --git a/eval.py b/eval.py -index f2a833f..8644391 100644 ---- a/eval.py -+++ b/eval.py -@@ -412,8 +412,9 @@ def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, de - - classes = list(classes.cpu().numpy().astype(int)) - scores = list(scores.cpu().numpy().astype(float)) -- masks = masks.view(-1, h*w).cuda() -- boxes = boxes.cuda() -+ #masks = masks.view(-1, h*w).cuda() -+ masks = masks.view(-1, h*w) -+ #boxes = boxes.cuda() - - - if args.output_coco_json: -@@ -1075,6 +1076,8 @@ def evaluate(net:Yolact, dataset, train_mode=False, train_cfg=None): - - else: - # Main eval loop -+ from acl_net import Net -+ model = Net(device_id=0, model_path='./yolact_edge.om') - for it, image_idx in enumerate(dataset_indices): - timer.reset() - -@@ -1092,9 +1095,15 @@ def evaluate(net:Yolact, dataset, train_mode=False, train_cfg=None): - batch = batch.cuda() - - with timer.env('Network Extra'): -- extras = {"backbone": "full", "interrupt": False, -- "moving_statistics": {"aligned_feats": []}} -- preds = net(batch, extras=extras)["pred_outs"] -+ # pytorch forward -+ # outs, proto_out = net(batch) -+ # om forward -+ res = model([batch.numpy()]) -+ outs = [torch.from_numpy(ele) for ele in res[:5]] -+ proto_out = torch.from_numpy(res[-1]) -+ -+ # common postprocess -+ preds = net.postprocess(outs, proto_out) - - # Perform the meat of the operation here depending on our mode. - if args.display: -diff --git a/yolact_edge/yolact.py b/yolact_edge/yolact.py -index b6efdf7..7a167f3 100644 ---- a/yolact_edge/yolact.py -+++ b/yolact_edge/yolact.py -@@ -33,10 +33,10 @@ except: - - # This is required for Pytorch 1.0.1 on Windows to initialize Cuda on some driver versions. - # See the bug report here: https://github.com/pytorch/pytorch/issues/17108 --torch.cuda.current_device() -+#torch.cuda.current_device() - - # As of March 10, 2019, Pytorch DataParallel still doesn't support JIT Script Modules --use_jit = False if use_torch2trt else torch.cuda.device_count() <= 1 -+use_jit = False #if use_torch2trt else torch.cuda.device_count() <= 1 - - ScriptModuleWrapper = torch.jit.ScriptModule if use_jit else nn.Module - script_method_wrapper = torch.jit.script_method if use_jit else lambda fn, _rcn=None: fn -@@ -1089,6 +1089,9 @@ class Yolact(nn.Module): - def __init__(self, training=True): - super().__init__() - -+ self.extras = {"backbone": "full", "interrupt": False, -+ "moving_statistics": {"aligned_feats": []}} -+ - self.backbone = construct_backbone(cfg.backbone) - - self.training = training -@@ -1608,19 +1611,19 @@ class Yolact(nn.Module): - x = torch.ones((1, lateral_channels * 2, 69, 69)).cuda() - self.trt_load_if("flow_net", trt_fn, [x], int8_mode, parent=self.flow_net, batch_size=batch_size) - -- def forward(self, x, extras=None): -+ def forward(self, x): - """ The input should be of size [batch_size, 3, img_h, img_w] """ - - if cfg.flow.train_flow: -- return self.forward_flow(extras) -+ return self.forward_flow(self.extras) - - outs_wrapper = {} - - with timer.env('backbone'): -- if cfg.flow is None or extras is None or extras["backbone"] == "full": -+ if cfg.flow is None or self.extras is None or self.extras["backbone"] == "full": - outs = self.backbone(x) - -- elif extras is not None and extras["backbone"] == "partial": -+ elif self.extras is not None and self.extras["backbone"] == "partial": - if hasattr(self, 'partial_backbone'): - outs = self.partial_backbone(x) - else: -@@ -1631,22 +1634,22 @@ class Yolact(nn.Module): - - if cfg.flow is not None: - with timer.env('fpn'): -- assert type(extras) == dict -- if extras["backbone"] == "full": -+ assert type(self.extras) == dict -+ if self.extras["backbone"] == "full": - outs = [outs[i] for i in cfg.backbone.selected_layers] - outs_fpn_phase_1_wrapper = self.fpn_phase_1(*outs) - outs_phase_1, lats_phase_1 = outs_fpn_phase_1_wrapper[:len(outs)], outs_fpn_phase_1_wrapper[len(outs):] - lateral = lats_phase_1[0].detach() -- moving_statistics = extras["moving_statistics"] -+ moving_statistics = self.extras["moving_statistics"] - -- if extras.get("keep_statistics", False): -+ if self.extras.get("keep_statistics", False): - outs_wrapper["feats"] = [out.detach() for out in outs_phase_1] - outs_wrapper["lateral"] = lateral - - outs_wrapper["outs_phase_1"] = [out.detach() for out in outs_phase_1] - else: -- assert extras["moving_statistics"] is not None -- moving_statistics = extras["moving_statistics"] -+ assert self.extras["moving_statistics"] is not None -+ moving_statistics = self.extras["moving_statistics"] - outs_phase_1 = moving_statistics["feats"].copy() - - if cfg.flow.warp_mode != 'take': -@@ -1699,7 +1702,7 @@ class Yolact(nn.Module): - outs_wrapper["outs_phase_1"] = outs_phase_1.copy() - - outs = self.fpn_phase_2(*outs_phase_1) -- if extras["backbone"] == "partial": -+ if self.extras["backbone"] == "partial": - outs_wrapper["outs_phase_2"] = [out for out in outs] - else: - outs_wrapper["outs_phase_2"] = [out.detach() for out in outs] -@@ -1709,7 +1712,7 @@ class Yolact(nn.Module): - outs = [outs[i] for i in cfg.backbone.selected_layers] - outs = self.fpn(outs) - -- if extras is not None and extras.get("interrupt", None): -+ if self.extras is not None and self.extras.get("interrupt", None): - return outs_wrapper - - proto_out = None -@@ -1740,6 +1743,9 @@ class Yolact(nn.Module): - bias_shape[-1] = 1 - proto_out = torch.cat([proto_out, torch.ones(*bias_shape)], -1) - -+ return outs, proto_out -+ -+ def postprocess(self, outs, proto_out): - with timer.env('pred_heads'): - pred_outs = { 'loc': [], 'conf': [], 'mask': [], 'priors': [] } - -@@ -1779,7 +1785,6 @@ class Yolact(nn.Module): - if cfg.use_semantic_segmentation_loss: - pred_outs['segm'] = self.semantic_seg_conv(outs[0]) - -- outs_wrapper["pred_outs"] = pred_outs - else: - if cfg.use_sigmoid_focal_loss: - # Note: even though conf[0] exists, this mode doesn't train it so don't use it -@@ -1792,8 +1797,8 @@ class Yolact(nn.Module): - else: - pred_outs['conf'] = F.softmax(pred_outs['conf'], -1) - -- outs_wrapper["pred_outs"] = self.detect(pred_outs) -- return outs_wrapper -+ pred_outs = self.detect(pred_outs) -+ return pred_outs - - - # Some testing code +diff --git a/eval.py b/eval.py +index f2a833f..8644391 100644 +--- a/eval.py ++++ b/eval.py +@@ -412,8 +412,9 @@ def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, de + + classes = list(classes.cpu().numpy().astype(int)) + scores = list(scores.cpu().numpy().astype(float)) +- masks = masks.view(-1, h*w).cuda() +- boxes = boxes.cuda() ++ #masks = masks.view(-1, h*w).cuda() ++ masks = masks.view(-1, h*w) ++ #boxes = boxes.cuda() + + + if args.output_coco_json: +@@ -1075,6 +1076,8 @@ def evaluate(net:Yolact, dataset, train_mode=False, train_cfg=None): + + else: + # Main eval loop ++ from acl_net import Net ++ model = Net(device_id=0, model_path='./yolact_edge.om') + for it, image_idx in enumerate(dataset_indices): + timer.reset() + +@@ -1092,9 +1095,15 @@ def evaluate(net:Yolact, dataset, train_mode=False, train_cfg=None): + batch = batch.cuda() + + with timer.env('Network Extra'): +- extras = {"backbone": "full", "interrupt": False, +- "moving_statistics": {"aligned_feats": []}} +- preds = net(batch, extras=extras)["pred_outs"] ++ # pytorch forward ++ # outs, proto_out = net(batch) ++ # om forward ++ res = model([batch.numpy()]) ++ outs = [torch.from_numpy(ele) for ele in res[:5]] ++ proto_out = torch.from_numpy(res[-1]) ++ ++ # common postprocess ++ preds = net.postprocess(outs, proto_out) + + # Perform the meat of the operation here depending on our mode. + if args.display: +diff --git a/yolact_edge/yolact.py b/yolact_edge/yolact.py +index b6efdf7..7a167f3 100644 +--- a/yolact_edge/yolact.py ++++ b/yolact_edge/yolact.py +@@ -33,10 +33,10 @@ except: + + # This is required for Pytorch 1.0.1 on Windows to initialize Cuda on some driver versions. + # See the bug report here: https://github.com/pytorch/pytorch/issues/17108 +-torch.cuda.current_device() ++#torch.cuda.current_device() + + # As of March 10, 2019, Pytorch DataParallel still doesn't support JIT Script Modules +-use_jit = False if use_torch2trt else torch.cuda.device_count() <= 1 ++use_jit = False #if use_torch2trt else torch.cuda.device_count() <= 1 + + ScriptModuleWrapper = torch.jit.ScriptModule if use_jit else nn.Module + script_method_wrapper = torch.jit.script_method if use_jit else lambda fn, _rcn=None: fn +@@ -1089,6 +1089,9 @@ class Yolact(nn.Module): + def __init__(self, training=True): + super().__init__() + ++ self.extras = {"backbone": "full", "interrupt": False, ++ "moving_statistics": {"aligned_feats": []}} ++ + self.backbone = construct_backbone(cfg.backbone) + + self.training = training +@@ -1608,19 +1611,19 @@ class Yolact(nn.Module): + x = torch.ones((1, lateral_channels * 2, 69, 69)).cuda() + self.trt_load_if("flow_net", trt_fn, [x], int8_mode, parent=self.flow_net, batch_size=batch_size) + +- def forward(self, x, extras=None): ++ def forward(self, x): + """ The input should be of size [batch_size, 3, img_h, img_w] """ + + if cfg.flow.train_flow: +- return self.forward_flow(extras) ++ return self.forward_flow(self.extras) + + outs_wrapper = {} + + with timer.env('backbone'): +- if cfg.flow is None or extras is None or extras["backbone"] == "full": ++ if cfg.flow is None or self.extras is None or self.extras["backbone"] == "full": + outs = self.backbone(x) + +- elif extras is not None and extras["backbone"] == "partial": ++ elif self.extras is not None and self.extras["backbone"] == "partial": + if hasattr(self, 'partial_backbone'): + outs = self.partial_backbone(x) + else: +@@ -1631,22 +1634,22 @@ class Yolact(nn.Module): + + if cfg.flow is not None: + with timer.env('fpn'): +- assert type(extras) == dict +- if extras["backbone"] == "full": ++ assert type(self.extras) == dict ++ if self.extras["backbone"] == "full": + outs = [outs[i] for i in cfg.backbone.selected_layers] + outs_fpn_phase_1_wrapper = self.fpn_phase_1(*outs) + outs_phase_1, lats_phase_1 = outs_fpn_phase_1_wrapper[:len(outs)], outs_fpn_phase_1_wrapper[len(outs):] + lateral = lats_phase_1[0].detach() +- moving_statistics = extras["moving_statistics"] ++ moving_statistics = self.extras["moving_statistics"] + +- if extras.get("keep_statistics", False): ++ if self.extras.get("keep_statistics", False): + outs_wrapper["feats"] = [out.detach() for out in outs_phase_1] + outs_wrapper["lateral"] = lateral + + outs_wrapper["outs_phase_1"] = [out.detach() for out in outs_phase_1] + else: +- assert extras["moving_statistics"] is not None +- moving_statistics = extras["moving_statistics"] ++ assert self.extras["moving_statistics"] is not None ++ moving_statistics = self.extras["moving_statistics"] + outs_phase_1 = moving_statistics["feats"].copy() + + if cfg.flow.warp_mode != 'take': +@@ -1699,7 +1702,7 @@ class Yolact(nn.Module): + outs_wrapper["outs_phase_1"] = outs_phase_1.copy() + + outs = self.fpn_phase_2(*outs_phase_1) +- if extras["backbone"] == "partial": ++ if self.extras["backbone"] == "partial": + outs_wrapper["outs_phase_2"] = [out for out in outs] + else: + outs_wrapper["outs_phase_2"] = [out.detach() for out in outs] +@@ -1709,7 +1712,7 @@ class Yolact(nn.Module): + outs = [outs[i] for i in cfg.backbone.selected_layers] + outs = self.fpn(outs) + +- if extras is not None and extras.get("interrupt", None): ++ if self.extras is not None and self.extras.get("interrupt", None): + return outs_wrapper + + proto_out = None +@@ -1740,6 +1743,9 @@ class Yolact(nn.Module): + bias_shape[-1] = 1 + proto_out = torch.cat([proto_out, torch.ones(*bias_shape)], -1) + ++ return outs, proto_out ++ ++ def postprocess(self, outs, proto_out): + with timer.env('pred_heads'): + pred_outs = { 'loc': [], 'conf': [], 'mask': [], 'priors': [] } + +@@ -1779,7 +1785,6 @@ class Yolact(nn.Module): + if cfg.use_semantic_segmentation_loss: + pred_outs['segm'] = self.semantic_seg_conv(outs[0]) + +- outs_wrapper["pred_outs"] = pred_outs + else: + if cfg.use_sigmoid_focal_loss: + # Note: even though conf[0] exists, this mode doesn't train it so don't use it +@@ -1792,8 +1797,8 @@ class Yolact(nn.Module): + else: + pred_outs['conf'] = F.softmax(pred_outs['conf'], -1) + +- outs_wrapper["pred_outs"] = self.detect(pred_outs) +- return outs_wrapper ++ pred_outs = self.detect(pred_outs) ++ return pred_outs + + + # Some testing code diff --git a/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/README.md b/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/README.md index 4df2045dbb774e9cdfd26626683fdae09c655c41..0b3ee782915873c9119f8e1273d58943028b1af7 100644 --- a/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/README.md +++ b/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/README.md @@ -1,73 +1,73 @@ -文件作用说明: - -1. env.sh:ATC工具环境变量配置脚本 -2. require.txt:脚本运行所需的第三方库 -3. parse_json.py: coco数据集标签json文件解析脚本 -4. preprocess_yolov3_pytorch.py: 二进制数据集预处理脚本 -5. get_coco_info.py: yolov3.info生成脚本 -6. bin_to_predict_yolo_pytorch.py: benchmark输出bin文件解析脚本 -7. map_calculate.py: 精度统计脚本 -8. benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer - -推理端到端步骤: - -(1) git clone 开源仓https://github.com/ultralytics/yolov3/, 并下载对应的权重文件,修改**models/export.py**脚本生成onnx文件 - -``` -git clone https://github.com/ultralytics/yolov3/ -python3.7 models/export.py --weights ./yolov3.pt --img 416 --batch 1 -``` - -(2)配置环境变量转换om模型 - -``` -source env.sh -atc --model=yolov3.onnx --framework=5 --output=yolov3_bs1 --input_format=NCHW --log=info --soc_version=Ascend310 --input_shape="images:1,3,416,416" --out_nodes="Reshape_219:0;Reshape_203:0;Reshape_187:0" -``` - -(3)解析数据集 - -下载coco2014数据集val2014和label文件**instances_valminusminival2014.json**,运行**parse_json.py**解析数据集 - -``` -python3.7 parse_json.py -``` - -生成coco2014.names和coco_2014.info以及gronud-truth文件夹 - -(5)数据预处理 - -运行脚本preprocess_yolov3_pytorch.py处理数据集 - -``` -python3.7 preprocess_yolov3_pytorch.py coco_2014.info yolov3_bin -``` - -(6)benchmark推理 - -运行get_coco_info.py生成info文件 - -``` -python3.7 get_coco_info.py yolo_coco_bin_tf coco_2014.info yolov3.info -``` - -执行benchmark命令,结果保存在同级目录 result/dumpOutput_device0/ - -``` -python3.7 get_coco_info.py yolo_coco_bin_tf coco_2014.info yolov3.info -``` - -(7)后处理 - -运行 bin_to_predict_yolo_pytorch.py 解析模型输出 - -``` -python3.7 bin_to_predict_yolo_pytorch.py --bin_data_path result/dumpOutput_device0/ --det_results_path detection-results/ --origin_jpg_path /root/dataset/coco2014/val2014/ --coco_class_names /root/dataset/coco2014/coco2014.names --model_type yolov3 --net_input_size 416 -``` - -运行map_cauculate.py统计mAP值 - -``` -python3 map_calculate.py --label_path ./ground-truth --npu_txt_path ./detection-results -na -np -``` - +文件作用说明: + +1. env.sh:ATC工具环境变量配置脚本 +2. require.txt:脚本运行所需的第三方库 +3. parse_json.py: coco数据集标签json文件解析脚本 +4. preprocess_yolov3_pytorch.py: 二进制数据集预处理脚本 +5. get_coco_info.py: yolov3.info生成脚本 +6. bin_to_predict_yolo_pytorch.py: benchmark输出bin文件解析脚本 +7. map_calculate.py: 精度统计脚本 +8. benchmark工具源码地址:https://gitee.com/ascend/cann-benchmark/tree/master/infer + +推理端到端步骤: + +(1) git clone 开源仓https://github.com/ultralytics/yolov3/, 并下载对应的权重文件,修改**models/export.py**脚本生成onnx文件 + +``` +git clone https://github.com/ultralytics/yolov3/ +python3.7 models/export.py --weights ./yolov3.pt --img 416 --batch 1 +``` + +(2)配置环境变量转换om模型 + +``` +source env.sh +atc --model=yolov3.onnx --framework=5 --output=yolov3_bs1 --input_format=NCHW --log=info --soc_version=Ascend310 --input_shape="images:1,3,416,416" --out_nodes="Reshape_219:0;Reshape_203:0;Reshape_187:0" +``` + +(3)解析数据集 + +下载coco2014数据集val2014和label文件**instances_valminusminival2014.json**,运行**parse_json.py**解析数据集 + +``` +python3.7 parse_json.py +``` + +生成coco2014.names和coco_2014.info以及gronud-truth文件夹 + +(5)数据预处理 + +运行脚本preprocess_yolov3_pytorch.py处理数据集 + +``` +python3.7 preprocess_yolov3_pytorch.py coco_2014.info yolov3_bin +``` + +(6)benchmark推理 + +运行get_coco_info.py生成info文件 + +``` +python3.7 get_coco_info.py yolo_coco_bin_tf coco_2014.info yolov3.info +``` + +执行benchmark命令,结果保存在同级目录 result/dumpOutput_device0/ + +``` +python3.7 get_coco_info.py yolo_coco_bin_tf coco_2014.info yolov3.info +``` + +(7)后处理 + +运行 bin_to_predict_yolo_pytorch.py 解析模型输出 + +``` +python3.7 bin_to_predict_yolo_pytorch.py --bin_data_path result/dumpOutput_device0/ --det_results_path detection-results/ --origin_jpg_path /root/dataset/coco2014/val2014/ --coco_class_names /root/dataset/coco2014/coco2014.names --model_type yolov3 --net_input_size 416 +``` + +运行map_cauculate.py统计mAP值 + +``` +python3 map_calculate.py --label_path ./ground-truth --npu_txt_path ./detection-results -na -np +``` + diff --git a/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/bin_to_predict_yolo_pytorch.py b/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/bin_to_predict_yolo_pytorch.py index cb84f103816ac836844e9c672d15e554b706ef7b..2cfababcda4e29aa42feab5b02bafcb9d240cae1 100644 --- a/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/bin_to_predict_yolo_pytorch.py +++ b/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/bin_to_predict_yolo_pytorch.py @@ -1,255 +1,255 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import argparse -import time -import torch -import torchvision -import cv2 - - -def _make_grid(nx=20, ny=20): - xv, yv = np.meshgrid(np.arange(nx), np.arange(ny)) - return np.stack((xv, yv), 2).reshape((1, 1, ny, nx, 2)).astype(np.float) - - -def sigmoid(x0): - s = 1 / (1 + np.exp(-x0)) - return s - -def detect(x, model_type): - """ - x(bs,3,20,20,85) - """ - - # x(bs,3,20,20,85) - z = [] - grid = [] - for i in range(3): - _, _, ny, nx, _ = x[i].shape - grid.append(_make_grid(nx, ny)) - if model_type == 'yolov5': - stride = np.array([8, 16, 32]) - anchor_grid = np.array( - [[10., 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]])\ - .reshape(3, 1, 3, 1, 1, 2) - elif model_type == 'yolov3': - stride = np.array([32, 16, 8]) - anchor_grid = np.array( - [[116, 90, 156, 198, 373, 326], [30, 61, 62, 45, 59, 119], [10., 13, 16, 30, 33, 23]])\ - .reshape(3, 1, 3, 1, 1, 2) - - for i in range(3): - y = sigmoid(x[i]) - y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + grid[i]) * stride[i] # xy - y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * anchor_grid[i] # wh - z.append(y.reshape(1, -1, 85)) - return np.concatenate(z, 1) - - -def xywh2xyxy(x): - # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right - y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) - y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x - y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y - y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x - y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y - return y - - -def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, merge=False, classes=None, agnostic=False): - """Performs Non-Maximum Suppression (NMS) on inference results - - Returns: - detections with shape: nx6 (x1, y1, x2, y2, conf, cls) - """ - - nc = prediction[0].shape[1] - 5 # number of classes - xc = prediction[..., 4] > conf_thres # candidates - - # Settings - min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height - max_det = 300 # maximum number of detections per image - time_limit = 10.0 # seconds to quit after - redundant = True # require redundant detections - multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) - - t = time.time() - output = [None] * prediction.shape[0] - for xi, x in enumerate(prediction): # image index, image inference - # Apply constraints - # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height - x = x[xc[xi]] # confidence - - # If none remain process next image - if not x.shape[0]: - continue - - # Compute conf - x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf - - # Box (center x, center y, width, height) to (x1, y1, x2, y2) - box = xywh2xyxy(x[:, :4]) - - # Detections matrix nx6 (xyxy, conf, cls) - if multi_label: - i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T - x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) - else: # best class only - conf, j = x[:, 5:].max(1, keepdim=True) - x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] - - # Filter by class - if classes: - x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] - - # Apply finite constraint - # if not torch.isfinite(x).all(): - # x = x[torch.isfinite(x).all(1)] - - # If none remain process next image - n = x.shape[0] # number of boxes - if not n: - continue - # Sort by confidence - # x = x[x[:, 4].argsort(descending=True)] - - # Batched NMS - c = x[:, 5:6] * (0 if agnostic else max_wh) # classes - boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores - i = torchvision.ops.nms(boxes, scores, iou_thres) - if i.shape[0] > max_det: # limit detections - i = i[:max_det] - if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) - try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) - iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix - weights = iou * scores[None] # box weights - x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes - if redundant: - i = i[iou.sum(1) > 1] # require redundancy - except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 - print(x, i, x.shape, i.shape) - pass - - output[xi] = x[i] - if (time.time() - t) > time_limit: - break # time limit exceeded - - return output - - -def clip_coords(boxes, img_shape): - # Clip bounding xyxy bounding boxes to image shape (height, width) - boxes[:, 0].clamp_(0, img_shape[1]) # x1 - boxes[:, 1].clamp_(0, img_shape[0]) # y1 - boxes[:, 2].clamp_(0, img_shape[1]) # x2 - boxes[:, 3].clamp_(0, img_shape[0]) # y2 - - -def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): - # Rescale coords (xyxy) from img1_shape to img0_shape - if ratio_pad is None: # calculate from img0_shape - gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new - pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding - else: - gain = ratio_pad[0][0] - pad = ratio_pad[1] - - coords[:, [0, 2]] -= pad[0] # x padding - coords[:, [1, 3]] -= pad[1] # y padding - coords[:, :4] /= gain - clip_coords(coords, img0_shape) - return coords - - -def post_process(flags): - names = np.loadtxt(flags.coco_class_names, dtype='str', delimiter='\n') - img = torch.zeros((1, 3, flags.net_input_size, flags.net_input_size)) - - # 读取bin文件用于生成预测结果 - bin_path = flags.bin_data_path - ori_path = flags.origin_jpg_path - - det_results_path = flags.det_results_path - os.makedirs(det_results_path, exist_ok=True) - total_img = set([name[:name.rfind('_')] - for name in os.listdir(bin_path) if "bin" in name]) - for bin_file in sorted(total_img): - path_base = os.path.join(bin_path, bin_file) - src_img = cv2.imread(os.path.join(ori_path, '{}.jpg'.format(bin_file))) - assert src_img is not None, 'Image Not Found ' + bin_file - - # 加载检测的所有输出tensor - res_buff = [] - - if flags.model_type == 'yolov5': - yolo_shape = [[1, 3, 85, 80, 80], [1, 3, 85, 40, 40], [1, 3, 85, 20, 20]] - elif flags.model_type == 'yolov3': - yolo_shape = [[1, 3, 85, 13, 13], [1, 3, 85, 26, 26], [1, 3, 85, 52, 52]] - - for num in range(flags.net_out_num): - # print(path_base + "_" + str(num + 1) + ".bin") - if os.path.exists(path_base + "_" + str(num + 1) + ".bin"): - buf = np.fromfile(path_base + "_" + - str(num + 1) + ".bin", dtype="float32") - res_buff.append(buf.reshape(yolo_shape[num]).transpose((0, 1, 3, 4, 2))) # 1,3,85,h,w ->1,3,h,w,85 - else: - print("[ERROR] file not exist", path_base + - "_" + str(num + 1) + ".bin") - - res_tensor = detect(res_buff, flags.model_type) - res_tensor = torch.from_numpy(res_tensor) - # Apply NMS - pred = non_max_suppression(res_tensor, conf_thres=0.33, iou_thres=0.5, classes=None, agnostic=False) - det_results_file = os.path.join(det_results_path, bin_file + ".txt") - - # Process detections - for i, det in enumerate(pred): # detections per image - size = '' - size += '%gx%g ' % img.shape[2:] # print string - gn = torch.tensor(src_img.shape)[[1, 0, 1, 0]] # normalization gain whwh - # Rescale boxes from img_size to im0 size - if det is not None: - det[:, :4] = scale_coords(img.shape[2:], det[:, :4], src_img.shape).round() - - # Print results - for c in det[:, -1].unique(): - n = (det[:, -1] == c).sum() # detections per class - size += '%g %ss, ' % (n, names[int(c)]) # add to string - with open(det_results_file, 'w') as f: - for *xyxy, conf, cls in det: - content = '{} {} {} {} {} {}'.format(names[int(cls)], conf, *xyxy) - print(content) - f.write(content) - f.write('\n') - else: - with open(det_results_file, 'w') as f: - f.write('') - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0") - parser.add_argument("--origin_jpg_path", default="./val2014/") - parser.add_argument("--det_results_path", - default="./detection-results/") - parser.add_argument("--coco_class_names", default="./coco2014.names") - parser.add_argument("--net_input_size", default=640, type=int) - parser.add_argument("--net_out_num", default=3) - parser.add_argument("--model_type", default='yolov5') - flags = parser.parse_args() - - post_process(flags) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import argparse +import time +import torch +import torchvision +import cv2 + + +def _make_grid(nx=20, ny=20): + xv, yv = np.meshgrid(np.arange(nx), np.arange(ny)) + return np.stack((xv, yv), 2).reshape((1, 1, ny, nx, 2)).astype(np.float) + + +def sigmoid(x0): + s = 1 / (1 + np.exp(-x0)) + return s + +def detect(x, model_type): + """ + x(bs,3,20,20,85) + """ + + # x(bs,3,20,20,85) + z = [] + grid = [] + for i in range(3): + _, _, ny, nx, _ = x[i].shape + grid.append(_make_grid(nx, ny)) + if model_type == 'yolov5': + stride = np.array([8, 16, 32]) + anchor_grid = np.array( + [[10., 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]])\ + .reshape(3, 1, 3, 1, 1, 2) + elif model_type == 'yolov3': + stride = np.array([32, 16, 8]) + anchor_grid = np.array( + [[116, 90, 156, 198, 373, 326], [30, 61, 62, 45, 59, 119], [10., 13, 16, 30, 33, 23]])\ + .reshape(3, 1, 3, 1, 1, 2) + + for i in range(3): + y = sigmoid(x[i]) + y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + grid[i]) * stride[i] # xy + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * anchor_grid[i] # wh + z.append(y.reshape(1, -1, 85)) + return np.concatenate(z, 1) + + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + + +def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, merge=False, classes=None, agnostic=False): + """Performs Non-Maximum Suppression (NMS) on inference results + + Returns: + detections with shape: nx6 (x1, y1, x2, y2, conf, cls) + """ + + nc = prediction[0].shape[1] - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + + # Settings + min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height + max_det = 300 # maximum number of detections per image + time_limit = 10.0 # seconds to quit after + redundant = True # require redundant detections + multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) + + t = time.time() + output = [None] * prediction.shape[0] + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + box = xywh2xyxy(x[:, :4]) + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) + else: # best class only + conf, j = x[:, 5:].max(1, keepdim=True) + x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Apply finite constraint + # if not torch.isfinite(x).all(): + # x = x[torch.isfinite(x).all(1)] + + # If none remain process next image + n = x.shape[0] # number of boxes + if not n: + continue + # Sort by confidence + # x = x[x[:, 4].argsort(descending=True)] + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) + try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 + print(x, i, x.shape, i.shape) + pass + + output[xi] = x[i] + if (time.time() - t) > time_limit: + break # time limit exceeded + + return output + + +def clip_coords(boxes, img_shape): + # Clip bounding xyxy bounding boxes to image shape (height, width) + boxes[:, 0].clamp_(0, img_shape[1]) # x1 + boxes[:, 1].clamp_(0, img_shape[0]) # y1 + boxes[:, 2].clamp_(0, img_shape[1]) # x2 + boxes[:, 3].clamp_(0, img_shape[0]) # y2 + + +def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): + # Rescale coords (xyxy) from img1_shape to img0_shape + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + + coords[:, [0, 2]] -= pad[0] # x padding + coords[:, [1, 3]] -= pad[1] # y padding + coords[:, :4] /= gain + clip_coords(coords, img0_shape) + return coords + + +def post_process(flags): + names = np.loadtxt(flags.coco_class_names, dtype='str', delimiter='\n') + img = torch.zeros((1, 3, flags.net_input_size, flags.net_input_size)) + + # 读取bin文件用于生成预测结果 + bin_path = flags.bin_data_path + ori_path = flags.origin_jpg_path + + det_results_path = flags.det_results_path + os.makedirs(det_results_path, exist_ok=True) + total_img = set([name[:name.rfind('_')] + for name in os.listdir(bin_path) if "bin" in name]) + for bin_file in sorted(total_img): + path_base = os.path.join(bin_path, bin_file) + src_img = cv2.imread(os.path.join(ori_path, '{}.jpg'.format(bin_file))) + assert src_img is not None, 'Image Not Found ' + bin_file + + # 加载检测的所有输出tensor + res_buff = [] + + if flags.model_type == 'yolov5': + yolo_shape = [[1, 3, 85, 80, 80], [1, 3, 85, 40, 40], [1, 3, 85, 20, 20]] + elif flags.model_type == 'yolov3': + yolo_shape = [[1, 3, 85, 13, 13], [1, 3, 85, 26, 26], [1, 3, 85, 52, 52]] + + for num in range(flags.net_out_num): + # print(path_base + "_" + str(num + 1) + ".bin") + if os.path.exists(path_base + "_" + str(num + 1) + ".bin"): + buf = np.fromfile(path_base + "_" + + str(num + 1) + ".bin", dtype="float32") + res_buff.append(buf.reshape(yolo_shape[num]).transpose((0, 1, 3, 4, 2))) # 1,3,85,h,w ->1,3,h,w,85 + else: + print("[ERROR] file not exist", path_base + + "_" + str(num + 1) + ".bin") + + res_tensor = detect(res_buff, flags.model_type) + res_tensor = torch.from_numpy(res_tensor) + # Apply NMS + pred = non_max_suppression(res_tensor, conf_thres=0.33, iou_thres=0.5, classes=None, agnostic=False) + det_results_file = os.path.join(det_results_path, bin_file + ".txt") + + # Process detections + for i, det in enumerate(pred): # detections per image + size = '' + size += '%gx%g ' % img.shape[2:] # print string + gn = torch.tensor(src_img.shape)[[1, 0, 1, 0]] # normalization gain whwh + # Rescale boxes from img_size to im0 size + if det is not None: + det[:, :4] = scale_coords(img.shape[2:], det[:, :4], src_img.shape).round() + + # Print results + for c in det[:, -1].unique(): + n = (det[:, -1] == c).sum() # detections per class + size += '%g %ss, ' % (n, names[int(c)]) # add to string + with open(det_results_file, 'w') as f: + for *xyxy, conf, cls in det: + content = '{} {} {} {} {} {}'.format(names[int(cls)], conf, *xyxy) + print(content) + f.write(content) + f.write('\n') + else: + with open(det_results_file, 'w') as f: + f.write('') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0") + parser.add_argument("--origin_jpg_path", default="./val2014/") + parser.add_argument("--det_results_path", + default="./detection-results/") + parser.add_argument("--coco_class_names", default="./coco2014.names") + parser.add_argument("--net_input_size", default=640, type=int) + parser.add_argument("--net_out_num", default=3) + parser.add_argument("--model_type", default='yolov5') + flags = parser.parse_args() + + post_process(flags) diff --git a/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/get_coco_info.py b/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/get_coco_info.py index 6e9dcabeec788ddaa3c740fc671148b845ea07c9..2d795eed2ed7fc391a4e36bd92025ec9b213bb6d 100644 --- a/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/get_coco_info.py +++ b/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/get_coco_info.py @@ -1,31 +1,31 @@ -import os -import sys - -file_path = sys.argv[1] -coco_info = sys.argv[2] -info_name = sys.argv[3] - -image_names = [] -image_size = [] - -with open(coco_info, 'r') as file: - contents = file.read().split('\n') - -for content in contents[:-1]: - temp = content.split() - key = temp[1] - image_names.append(key[key.rfind('/') + 1:].split('.')[0]) - image_size.append([temp[2], temp[3]]) - -name_size = dict(zip(image_names, image_size)) - -with open(info_name, 'w') as file: - index = 0 - for key, val in name_size.items(): - bin_name = os.path.join(file_path, '{}.bin'.format(key)) - content = ' '.join([str(index), bin_name, val[0], val[1]]) - file.write(content) - file.write('\n') - index += 1 - - +import os +import sys + +file_path = sys.argv[1] +coco_info = sys.argv[2] +info_name = sys.argv[3] + +image_names = [] +image_size = [] + +with open(coco_info, 'r') as file: + contents = file.read().split('\n') + +for content in contents[:-1]: + temp = content.split() + key = temp[1] + image_names.append(key[key.rfind('/') + 1:].split('.')[0]) + image_size.append([temp[2], temp[3]]) + +name_size = dict(zip(image_names, image_size)) + +with open(info_name, 'w') as file: + index = 0 + for key, val in name_size.items(): + bin_name = os.path.join(file_path, '{}.bin'.format(key)) + content = ' '.join([str(index), bin_name, val[0], val[1]]) + file.write(content) + file.write('\n') + index += 1 + + diff --git a/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/parse_json.py b/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/parse_json.py index 2711914e8595d30942a9d02be62976cb18964ec2..29ea10ed8d251f96314e528e4c4d6d5c47e3e8f6 100644 --- a/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/parse_json.py +++ b/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/parse_json.py @@ -1,77 +1,77 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json - -with open('./instances_val2014.json', 'r') as file: - content = file.read() -content = json.loads(content) -info = content.get('info') -licenses = content.get('licenses') -images = content.get('images') -annotations = content.get('annotations') -categroies = content.get('categories') - -with open('./coco2014.names', 'w') as f: - for categroie in categroies: - f.write(categroie.get('name').replace(' ', '_')) - f.write('\n') - -file_names = [image.get('file_name') for image in images] -widths = [image.get('width') for image in images] -heights = [image.get('height') for image in images] -image_ids = [image.get('id') for image in images] -assert len(file_names) == len(widths) == len(heights) == len(image_ids), "must be equal" - -annotation_ids = [annotation.get('image_id') for annotation in annotations] -bboxs = [annotation.get('bbox') for annotation in annotations] -category_ids = [annotation.get('category_id') for annotation in annotations] -segmentations = [annotation.get('segmentation') for annotation in annotations] -iscrowds = [annotation.get('iscrowd') for annotation in annotations] - -assert len(annotation_ids) == len(bboxs) == len(category_ids) ==len(segmentations) # 255094 - -with open('coco_2014.info', 'w') as f: - for index, file_name in enumerate(file_names): - file_name = 'val2014/' + file_name - line = "{} {} {} {}".format(index, file_name, widths[index], heights[index]) - f.write(line) - f.write('\n') - -def get_all_index(lst, item): - return [index for (index, value) in enumerate(lst) if value == item] - -def get_categroie_name(lst, item): - categroie_name = [dt.get('name') for dt in lst if item == dt.get('id')][0] - if len(categroie_name.split()) == 2: - temp = categroie_name.split() - categroie_name = temp[0] + '_' + temp[1] - return categroie_name - -for index, image_id in enumerate(image_ids): - indexs = get_all_index(annotation_ids, image_id) - with open('./ground-truth-split/{}.txt'.format(file_names[index].split('.')[0]), 'w') as f: - for idx in indexs: - f.write(get_categroie_name(categroies, category_ids[idx])) - print(get_categroie_name(categroies, category_ids[idx])) - f.write(' ') - # change label - bboxs[idx][2] = bboxs[idx][0] + bboxs[idx][2] - bboxs[idx][3] = bboxs[idx][1] + bboxs[idx][3] - f.write(' '.join(map(str, bboxs[idx]))) - f.write('\n') - - - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +with open('./instances_val2014.json', 'r') as file: + content = file.read() +content = json.loads(content) +info = content.get('info') +licenses = content.get('licenses') +images = content.get('images') +annotations = content.get('annotations') +categroies = content.get('categories') + +with open('./coco2014.names', 'w') as f: + for categroie in categroies: + f.write(categroie.get('name').replace(' ', '_')) + f.write('\n') + +file_names = [image.get('file_name') for image in images] +widths = [image.get('width') for image in images] +heights = [image.get('height') for image in images] +image_ids = [image.get('id') for image in images] +assert len(file_names) == len(widths) == len(heights) == len(image_ids), "must be equal" + +annotation_ids = [annotation.get('image_id') for annotation in annotations] +bboxs = [annotation.get('bbox') for annotation in annotations] +category_ids = [annotation.get('category_id') for annotation in annotations] +segmentations = [annotation.get('segmentation') for annotation in annotations] +iscrowds = [annotation.get('iscrowd') for annotation in annotations] + +assert len(annotation_ids) == len(bboxs) == len(category_ids) ==len(segmentations) # 255094 + +with open('coco_2014.info', 'w') as f: + for index, file_name in enumerate(file_names): + file_name = 'val2014/' + file_name + line = "{} {} {} {}".format(index, file_name, widths[index], heights[index]) + f.write(line) + f.write('\n') + +def get_all_index(lst, item): + return [index for (index, value) in enumerate(lst) if value == item] + +def get_categroie_name(lst, item): + categroie_name = [dt.get('name') for dt in lst if item == dt.get('id')][0] + if len(categroie_name.split()) == 2: + temp = categroie_name.split() + categroie_name = temp[0] + '_' + temp[1] + return categroie_name + +for index, image_id in enumerate(image_ids): + indexs = get_all_index(annotation_ids, image_id) + with open('./ground-truth-split/{}.txt'.format(file_names[index].split('.')[0]), 'w') as f: + for idx in indexs: + f.write(get_categroie_name(categroies, category_ids[idx])) + print(get_categroie_name(categroies, category_ids[idx])) + f.write(' ') + # change label + bboxs[idx][2] = bboxs[idx][0] + bboxs[idx][2] + bboxs[idx][3] = bboxs[idx][1] + bboxs[idx][3] + f.write(' '.join(map(str, bboxs[idx]))) + f.write('\n') + + + + diff --git a/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/preprocess_yolov3_pytorch.py b/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/preprocess_yolov3_pytorch.py index f10d988816f42ef71b442a7fefcbb7118422a0d8..ffb49084c628acda1c06a07f7410c6d7da60bbcf 100644 --- a/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/preprocess_yolov3_pytorch.py +++ b/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch/preprocess_yolov3_pytorch.py @@ -1,79 +1,79 @@ -import sys -import os -import cv2 -import numpy as np - - -def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True): - # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232 - shape = img.shape[:2] # current shape [height, width] - if isinstance(new_shape, int): - new_shape = (new_shape, new_shape) - - # Scale ratio (new / old) - r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) - if not scaleup: # only scale down, do not scale up (for better test mAP) - r = min(r, 1.0) - - # Compute padding - ratio = r, r # width, height ratios - new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) - dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding - # if auto: # minimum rectangle - # dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding - # elif scaleFill: # stretch - # dw, dh = 0.0, 0.0 - # new_unpad = (new_shape[1], new_shape[0]) - # ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios - - dw /= 2 # divide padding into 2 sides - dh /= 2 - - if shape[::-1] != new_unpad: # resize - img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) - top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) - left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) - img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border - return img, ratio, (dw, dh) - - -def yolov3_onnx(src_info, output_path): - in_files = [] - if not os.path.exists(output_path): - os.makedirs(output_path) - - with open(src_info, 'r') as file: - contents = file.read().split('\n') - for i in contents[:-1]: - in_files.append(i.split()[1]) - - i = 0 - for file in in_files: - i = i + 1 - print(file, "====", i) - img0 = cv2.imread(file) - # Padded resize - img = letterbox(img0, new_shape=416)[0] - # cv2.imshow('image', img) - # cv2.waitKey(0) - # Convert - img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x640x640 - image_np = np.array(img, dtype=np.float32) - image_np /= 255.0 - # image_np = np.transpose(image_np, (2, 0, 1)) # HWC -> CHW - image_np_expanded = np.expand_dims(image_np, axis=0) # NCHW - # Focus - print("shape:", image_np_expanded.shape) - img_numpy = np.ascontiguousarray(image_np_expanded) - - # save img_tensor as binary file for om inference input - temp_name = file[file.rfind('/') + 1:] - img_numpy.tofile(os.path.join(output_path, temp_name.split('.')[0] + ".bin")) - - -if __name__ == "__main__": - src_info = os.path.abspath(sys.argv[1]) - bin_path = os.path.abspath(sys.argv[2]) - yolov3_onnx(src_info, bin_path) - - +import sys +import os +import cv2 +import numpy as np + + +def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True): + # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232 + shape = img.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better test mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + # if auto: # minimum rectangle + # dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding + # elif scaleFill: # stretch + # dw, dh = 0.0, 0.0 + # new_unpad = (new_shape[1], new_shape[0]) + # ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return img, ratio, (dw, dh) + + +def yolov3_onnx(src_info, output_path): + in_files = [] + if not os.path.exists(output_path): + os.makedirs(output_path) + + with open(src_info, 'r') as file: + contents = file.read().split('\n') + for i in contents[:-1]: + in_files.append(i.split()[1]) + + i = 0 + for file in in_files: + i = i + 1 + print(file, "====", i) + img0 = cv2.imread(file) + # Padded resize + img = letterbox(img0, new_shape=416)[0] + # cv2.imshow('image', img) + # cv2.waitKey(0) + # Convert + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x640x640 + image_np = np.array(img, dtype=np.float32) + image_np /= 255.0 + # image_np = np.transpose(image_np, (2, 0, 1)) # HWC -> CHW + image_np_expanded = np.expand_dims(image_np, axis=0) # NCHW + # Focus + print("shape:", image_np_expanded.shape) + img_numpy = np.ascontiguousarray(image_np_expanded) + + # save img_tensor as binary file for om inference input + temp_name = file[file.rfind('/') + 1:] + img_numpy.tofile(os.path.join(output_path, temp_name.split('.')[0] + ".bin")) + + +if __name__ == "__main__": + src_info = os.path.abspath(sys.argv[1]) + bin_path = os.path.abspath(sys.argv[2]) + yolov3_onnx(src_info, bin_path) + + diff --git a/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/README.md b/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/README.md index bfd8ba1dbeeb6dd01466c8be91a444d3daffec44..62f196a12425d450734d43d52dbe8be39d8e6dc1 100644 --- a/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/README.md +++ b/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/README.md @@ -1,106 +1,106 @@ -文件作用说明: - -1. dy_resize.py:onnx算子修改脚本 -2. env.sh:ATC工具环境变量配置脚本 -3. parse_json.py: coco数据集标签json文件解析脚本 -4. preprocess_yolov4_pytorch.py: 二进制数据集预处理脚本 -5. get_coco_info.py: yolov4.info生成脚本 -6. bin_to_predict_yolov4_pytorch.py: benchmark输出bin文件解析脚本 -7. map_calculate.py: 精度统计脚本 -8. aipp.config 配置文件 -9. require.txt:脚本运行所需的第三方库 - -推理端到端步骤: - -(1) git clone 开源仓https://github.com/Tianxiaomo/pytorch-YOLOv4,并下载对应的权重文件, 修改**demo_pytorch2onnx.py**脚本生成onnx文件 - -```shell -git clone https://github.com/Tianxiaomo/pytorch-YOLOv4 -python3 demo_pytorch2onnx.py yolov4.pth data/dog.jpg -1 80 608 608 -``` - -(2)运行dy_resize.py修改生成的onnx文件 - -```shell -python3.7 dy_resize.py yolov4_-1_3_608_608_dynamic.onnx -``` - -(3)配置环境变量转换om模型 - -``` -source env.sh - -# soc_version:支持Ascend310和Ascend710 - -# 二进制输入 -atc --model=yolov4_-1_3_608_608_dynamic_dbs.onnx --framework=5 --output=yolov4_bs1 --input_format=NCHW --log=info --soc_version=Ascend310 --input_shape="input:1,3,608,608" --insert_op_conf=aipp.config --enable_small_channel=1 - -# 二进制输入 int8量化 -atc --model=yolov4_deploy_model.onnx --framework=5 --output=yolov4_bs1 --input_format=NCHW --log=info --soc_version=Ascend310 --input_shape="input:1,3,608,608" --insert_op_conf=aipp.config --enable_small_channel=1 - -``` - -(4)解析数据集 - -下载coco2014数据集val2014和label文件**instances_valminusminival2014.json**,运行**parse_json.py**解析数据集 - -``` -python3.7 parse_json.py -``` - -生成coco2014.name和coco_2014.info以及gronud-truth文件夹 - -(5)数据预处理 - -运行脚本preprocess_yolov4_pytorch.py处理数据集 - -``` -python3.7 preprocess_yolov4_pytorch.py coco_2014.info yolov4_bin int8 -1 -``` - -(6)onnx模型量化(可选) - -1.AMCT工具包安装,具体参考[CANN 开发辅助工具指南 (推理)](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools) - -2.数据预处理,用于量化因子矫正。当前模型为动态batch,建议用多batch_size的预处理文件矫正量化因子。 -执行以下命令: -``` -python3.7 preprocess_yolov4_pytorch.py coco_2014.info uncalibration_bin float32 1000 -python3.7.5 calibration_bin.py uncalibration_bin calibration_bin 32 -``` - -3.ONNX模型量化,具体参考[CANN 开发辅助工具指南 (推理)](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools) -在result目录下生成yolov4_deploy_model.onnx量化模型 - -4.量化模型验证,除onnx离线模型转换om模型命令有区别外,其余一致 - - -(7)benchmark推理 - -运行get_coco_info.py生成info文件 - -``` -python3.7 get_coco_info.py yolov4_bin coco_2014.info yolov4.info -``` - -执行benchmark命令,结果保存在同级目录 result/dumpOutput_device0/ - -``` -# 二进制 -./benchmark.x86_64 -model_type=vision -batch_size=1 -device_id=0 -om_path=yolov4_bs1.om -input_width=608 -input_height=608 -input_text_path=yolov4.info -useDvpp=false -output_binary=true -``` - -(8)后处理 - -运行 bin_to_predict_yolov4_pytorch.py 解析模型输出 - -``` -python3.7 bin_to_predict_yolov4_pytorch.py --bin_data_path result/dumpOutput_device0/ --det_results_path detection-results/ --origin_jpg_path /root/dataset/coco2014/val2014/ --coco_class_names /root/dataset/coco2014/coco2014.names -``` - -运行map_cauculate.py统计mAP值 - -``` -python3 map_calculate.py --label_path ./ground-truth --npu_txt_path ./detection-results -na -np -``` - +文件作用说明: + +1. dy_resize.py:onnx算子修改脚本 +2. env.sh:ATC工具环境变量配置脚本 +3. parse_json.py: coco数据集标签json文件解析脚本 +4. preprocess_yolov4_pytorch.py: 二进制数据集预处理脚本 +5. get_coco_info.py: yolov4.info生成脚本 +6. bin_to_predict_yolov4_pytorch.py: benchmark输出bin文件解析脚本 +7. map_calculate.py: 精度统计脚本 +8. aipp.config 配置文件 +9. require.txt:脚本运行所需的第三方库 + +推理端到端步骤: + +(1) git clone 开源仓https://github.com/Tianxiaomo/pytorch-YOLOv4,并下载对应的权重文件, 修改**demo_pytorch2onnx.py**脚本生成onnx文件 + +```shell +git clone https://github.com/Tianxiaomo/pytorch-YOLOv4 +python3 demo_pytorch2onnx.py yolov4.pth data/dog.jpg -1 80 608 608 +``` + +(2)运行dy_resize.py修改生成的onnx文件 + +```shell +python3.7 dy_resize.py yolov4_-1_3_608_608_dynamic.onnx +``` + +(3)配置环境变量转换om模型 + +``` +source env.sh + +# soc_version:支持Ascend310和Ascend710 + +# 二进制输入 +atc --model=yolov4_-1_3_608_608_dynamic_dbs.onnx --framework=5 --output=yolov4_bs1 --input_format=NCHW --log=info --soc_version=Ascend310 --input_shape="input:1,3,608,608" --insert_op_conf=aipp.config --enable_small_channel=1 + +# 二进制输入 int8量化 +atc --model=yolov4_deploy_model.onnx --framework=5 --output=yolov4_bs1 --input_format=NCHW --log=info --soc_version=Ascend310 --input_shape="input:1,3,608,608" --insert_op_conf=aipp.config --enable_small_channel=1 + +``` + +(4)解析数据集 + +下载coco2014数据集val2014和label文件**instances_valminusminival2014.json**,运行**parse_json.py**解析数据集 + +``` +python3.7 parse_json.py +``` + +生成coco2014.name和coco_2014.info以及gronud-truth文件夹 + +(5)数据预处理 + +运行脚本preprocess_yolov4_pytorch.py处理数据集 + +``` +python3.7 preprocess_yolov4_pytorch.py coco_2014.info yolov4_bin int8 -1 +``` + +(6)onnx模型量化(可选) + +1.AMCT工具包安装,具体参考[CANN 开发辅助工具指南 (推理)](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools) + +2.数据预处理,用于量化因子矫正。当前模型为动态batch,建议用多batch_size的预处理文件矫正量化因子。 +执行以下命令: +``` +python3.7 preprocess_yolov4_pytorch.py coco_2014.info uncalibration_bin float32 1000 +python3.7.5 calibration_bin.py uncalibration_bin calibration_bin 32 +``` + +3.ONNX模型量化,具体参考[CANN 开发辅助工具指南 (推理)](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373?category=developer-documents&subcategory=auxiliary-development-tools) +在result目录下生成yolov4_deploy_model.onnx量化模型 + +4.量化模型验证,除onnx离线模型转换om模型命令有区别外,其余一致 + + +(7)benchmark推理 + +运行get_coco_info.py生成info文件 + +``` +python3.7 get_coco_info.py yolov4_bin coco_2014.info yolov4.info +``` + +执行benchmark命令,结果保存在同级目录 result/dumpOutput_device0/ + +``` +# 二进制 +./benchmark.x86_64 -model_type=vision -batch_size=1 -device_id=0 -om_path=yolov4_bs1.om -input_width=608 -input_height=608 -input_text_path=yolov4.info -useDvpp=false -output_binary=true +``` + +(8)后处理 + +运行 bin_to_predict_yolov4_pytorch.py 解析模型输出 + +``` +python3.7 bin_to_predict_yolov4_pytorch.py --bin_data_path result/dumpOutput_device0/ --det_results_path detection-results/ --origin_jpg_path /root/dataset/coco2014/val2014/ --coco_class_names /root/dataset/coco2014/coco2014.names +``` + +运行map_cauculate.py统计mAP值 + +``` +python3 map_calculate.py --label_path ./ground-truth --npu_txt_path ./detection-results -na -np +``` + diff --git a/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/bin_to_predict_yolov4_pytorch.py b/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/bin_to_predict_yolov4_pytorch.py index bb932cbf332e0984ac1e161c8a8d5f5b65bef75a..9f58821cafe0235384c9c1717a99f7d37fc49add 100644 --- a/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/bin_to_predict_yolov4_pytorch.py +++ b/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/bin_to_predict_yolov4_pytorch.py @@ -1,361 +1,361 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import argparse -import time -import torch -import torchvision -import cv2 - - -def yolo_forward_dynamic(output, num_classes, anchors, num_anchors, scale_x_y): - bxy_list = [] - bwh_list = [] - det_confs_list = [] - cls_confs_list = [] - - for i in range(num_anchors): - begin = i * (5 + num_classes) - end = (i + 1) * (5 + num_classes) - - bxy_list.append(output[:, begin: begin + 2]) - bwh_list.append(output[:, begin + 2: begin + 4]) - det_confs_list.append(output[:, begin + 4: begin + 5]) - cls_confs_list.append(output[:, begin + 5: end]) - - # Shape: [batch, num_anchors * 2, H, W] - bxy = torch.cat(bxy_list, dim=1) - # Shape: [batch, num_anchors * 2, H, W] - bwh = torch.cat(bwh_list, dim=1) - - # Shape: [batch, num_anchors, H, W] - det_confs = torch.cat(det_confs_list, dim=1) - # Shape: [batch, num_anchors * H * W] - det_confs = det_confs.view(output.size(0), num_anchors * output.size(2) * output.size(3)) - - # Shape: [batch, num_anchors * num_classes, H, W] - cls_confs = torch.cat(cls_confs_list, dim=1) - # Shape: [batch, num_anchors, num_classes, H * W] - cls_confs = cls_confs.view(output.size(0), num_anchors, num_classes, output.size(2) * output.size(3)) - # Shape: [batch, num_anchors, num_classes, H * W] --> [batch, num_anchors * H * W, num_classes] - cls_confs = cls_confs.permute(0, 1, 3, 2).reshape(output.size(0), num_anchors * output.size(2) * output.size(3), - num_classes) - - # Apply sigmoid(), exp() and softmax() to slices - bxy = torch.sigmoid(bxy) * scale_x_y - 0.5 * (scale_x_y - 1) - bwh = torch.exp(bwh) - det_confs = torch.sigmoid(det_confs) - cls_confs = torch.sigmoid(cls_confs) - - # Prepare C-x, C-y, P-w, P-h (None of them are torch related) - grid_x = np.expand_dims(np.expand_dims( - np.expand_dims(np.linspace(0, output.size(3) - 1, output.size(3)), axis=0).repeat(output.size(2), 0), axis=0), - axis=0) - grid_y = np.expand_dims(np.expand_dims( - np.expand_dims(np.linspace(0, output.size(2) - 1, output.size(2)), axis=1).repeat(output.size(3), 1), axis=0), - axis=0) - - anchor_w = [] - anchor_h = [] - for i in range(num_anchors): - anchor_w.append(anchors[i * 2]) - anchor_h.append(anchors[i * 2 + 1]) - - device = None - cuda_check = output.is_cuda - if cuda_check: - device = output.get_device() - - bx_list = [] - by_list = [] - bw_list = [] - bh_list = [] - - # Apply C-x, C-y, P-w, P-h - for i in range(num_anchors): - ii = i * 2 - # Shape: [batch, 1, H, W] - bx = bxy[:, ii: ii + 1] + torch.tensor(grid_x, device=device, - dtype=torch.float32) # grid_x.to(device=device, dtype=torch.float32) - # Shape: [batch, 1, H, W] - by = bxy[:, ii + 1: ii + 2] + torch.tensor(grid_y, device=device, - dtype=torch.float32) # grid_y.to(device=device, dtype=torch.float32) - # Shape: [batch, 1, H, W] - bw = bwh[:, ii: ii + 1] * anchor_w[i] - # Shape: [batch, 1, H, W] - bh = bwh[:, ii + 1: ii + 2] * anchor_h[i] - - bx_list.append(bx) - by_list.append(by) - bw_list.append(bw) - bh_list.append(bh) - - ######################################## - # Figure out bboxes from slices # - ######################################## - - # Shape: [batch, num_anchors, H, W] - bx = torch.cat(bx_list, dim=1) - # Shape: [batch, num_anchors, H, W] - by = torch.cat(by_list, dim=1) - # Shape: [batch, num_anchors, H, W] - bw = torch.cat(bw_list, dim=1) - # Shape: [batch, num_anchors, H, W] - bh = torch.cat(bh_list, dim=1) - - # Shape: [batch, 2 * num_anchors, H, W] - bx_bw = torch.cat((bx, bw), dim=1) - # Shape: [batch, 2 * num_anchors, H, W] - by_bh = torch.cat((by, bh), dim=1) - - # normalize coordinates to [0, 1] - bx_bw /= output.size(3) - by_bh /= output.size(2) - - # Shape: [batch, num_anchors * H * W, 1] - bx = bx_bw[:, :num_anchors].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1) - by = by_bh[:, :num_anchors].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1) - bw = bx_bw[:, num_anchors:].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1) - bh = by_bh[:, num_anchors:].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1) - - bx1 = bx - bw * 0.5 - by1 = by - bh * 0.5 - bx2 = bx1 + bw - by2 = by1 + bh - - # Shape: [batch, num_anchors * h * w, 4] -> [batch, num_anchors * h * w, 1, 4] - boxes = torch.cat((bx1, by1, bx2, by2), dim=2).view(output.size(0), num_anchors * output.size(2) * output.size(3), - 1, 4) - - det_confs = det_confs.view(output.size(0), num_anchors * output.size(2) * output.size(3), 1) - confs = cls_confs * det_confs - - return boxes, confs - - -class YoloLayer(object): - ''' Yolo layer - model_out: while inference,is post-processing inside or outside the model - true:outside - ''' - def __init__(self, anchor_mask=[], num_classes=0, anchors=[], num_anchors=1, stride=32, model_out=False): - - self.anchor_mask = anchor_mask - self.num_classes = num_classes - self.anchors = anchors - self.num_anchors = num_anchors - self.anchor_step = len(anchors) // num_anchors - self.coord_scale = 1 - self.noobject_scale = 1 - self.object_scale = 5 - self.class_scale = 1 - self.thresh = 0.6 - self.stride = stride - self.seen = 0 - self.scale_x_y = 1 - - self.model_out = model_out - - - def forward(self, output): - masked_anchors = [] - for m in self.anchor_mask: - masked_anchors += self.anchors[m * self.anchor_step:(m + 1) * self.anchor_step] - masked_anchors = [anchor / self.stride for anchor in masked_anchors] - - return yolo_forward_dynamic(output, self.num_classes, masked_anchors, - len(self.anchor_mask), scale_x_y=self.scale_x_y) - - -def get_region_boxes(boxes_and_confs): - # print('Getting boxes from boxes and confs ...') - - boxes_list = [] - confs_list = [] - - for item in boxes_and_confs: - boxes_list.append(item[0]) - confs_list.append(item[1]) - - # boxes: [batch, num1 + num2 + num3, 1, 4] - # confs: [batch, num1 + num2 + num3, num_classes] - boxes = torch.cat(boxes_list, dim=1) - confs = torch.cat(confs_list, dim=1) - - return [boxes, confs] - - -def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False): - # print(boxes.shape) - x1 = boxes[:, 0] - y1 = boxes[:, 1] - x2 = boxes[:, 2] - y2 = boxes[:, 3] - - areas = (x2 - x1) * (y2 - y1) - order = confs.argsort()[::-1] - - keep = [] - while order.size > 0: - idx_self = order[0] - idx_other = order[1:] - - keep.append(idx_self) - - xx1 = np.maximum(x1[idx_self], x1[idx_other]) - yy1 = np.maximum(y1[idx_self], y1[idx_other]) - xx2 = np.minimum(x2[idx_self], x2[idx_other]) - yy2 = np.minimum(y2[idx_self], y2[idx_other]) - - w = np.maximum(0.0, xx2 - xx1) - h = np.maximum(0.0, yy2 - yy1) - inter = w * h - - if min_mode: - over = inter / np.minimum(areas[order[0]], areas[order[1:]]) - else: - over = inter / (areas[order[0]] + areas[order[1:]] - inter) - - inds = np.where(over <= nms_thresh)[0] - order = order[inds + 1] - - return np.array(keep) - - -def nms(conf_thresh, nms_thresh, output): - # [batch, num, 1, 4] - box_array = output[0] - # [batch, num, num_classes] - confs = output[1] - - if type(box_array).__name__ != 'ndarray': - box_array = box_array.cpu().detach().numpy() - confs = confs.cpu().detach().numpy() - - num_classes = confs.shape[2] - - # [batch, num, 4] - box_array = box_array[:, :, 0] - - # [batch, num, num_classes] --> [batch, num] - max_conf = np.max(confs, axis=2) - max_id = np.argmax(confs, axis=2) - - bboxes_batch = [] - for i in range(box_array.shape[0]): - - argwhere = max_conf[i] > conf_thresh - l_box_array = box_array[i, argwhere, :] - l_max_conf = max_conf[i, argwhere] - l_max_id = max_id[i, argwhere] - - bboxes = [] - # nms for each class - for j in range(num_classes): - - cls_argwhere = l_max_id == j - ll_box_array = l_box_array[cls_argwhere, :] - ll_max_conf = l_max_conf[cls_argwhere] - ll_max_id = l_max_id[cls_argwhere] - - keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh) - - if (keep.size > 0): - ll_box_array = ll_box_array[keep, :] - ll_max_conf = ll_max_conf[keep] - ll_max_id = ll_max_id[keep] - - for k in range(ll_box_array.shape[0]): - bboxes.append( - [ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3], - ll_max_conf[k], ll_max_id[k]]) - - bboxes_batch.append(bboxes) - - return bboxes_batch - - -def post_process(flags): - names = np.loadtxt(flags.coco_class_names, dtype='str', delimiter='\n') - - # 读取bin文件用于生成预测结果 - bin_path = flags.bin_data_path - ori_path = flags.origin_jpg_path - - anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401] - num_classes = 80 - - det_results_path = flags.det_results_path - os.makedirs(det_results_path, exist_ok=True) - total_img = set([name[:name.rfind('_')] - for name in os.listdir(bin_path) if "bin" in name]) - - yolo1 = YoloLayer(anchor_mask=[0, 1, 2], num_classes=num_classes, anchors=anchors, num_anchors=9, stride=8) - yolo2 = YoloLayer(anchor_mask=[3, 4, 5], num_classes=num_classes, anchors=anchors, num_anchors=9, stride=16) - yolo3 = YoloLayer(anchor_mask=[6, 7, 8], num_classes=num_classes, anchors=anchors, num_anchors=9, stride=32) - - yolo_shape = [[1, 255, 76, 76], [1, 255, 38, 38], [1, 255, 19, 19]] - - for bin_file in sorted(total_img): - path_base = os.path.join(bin_path, bin_file) - src_img = cv2.imread(os.path.join(ori_path, '{}.jpg'.format(bin_file))) - assert src_img is not None, 'Image Not Found ' + bin_file - - # 加载检测的所有输出tensor - feature_map_1 = np.fromfile(path_base + "_" + '1' + ".bin", dtype="float32").reshape(yolo_shape[0]) - feature_map_2 = np.fromfile(path_base + "_" + '2' + ".bin", dtype="float32").reshape(yolo_shape[1]) - feature_map_3 = np.fromfile(path_base + "_" + '3' + ".bin", dtype="float32").reshape(yolo_shape[2]) - - pred_1 = yolo1.forward(torch.from_numpy(feature_map_1)) - pred_2 = yolo2.forward(torch.from_numpy(feature_map_2)) - pred_3 = yolo3.forward(torch.from_numpy(feature_map_3)) - - # nms - output = get_region_boxes([pred_1, pred_2, pred_3]) - pred = nms(conf_thresh=0.4, nms_thresh=0.6, output=output)[0] - - # save result - det_results_file = os.path.join(det_results_path, bin_file + ".txt") - print(det_results_file) - with open(det_results_file, 'w') as f: - width = src_img.shape[1] - height = src_img.shape[0] - for i in range(len(pred)): - box = pred[i] - x1 = int(box[0] * width) - y1 = int(box[1] * height) - x2 = int(box[2] * width) - y2 = int(box[3] * height) - cls_conf = box[4] - cls_id = box[5] - - content = '{} {} {} {} {} {}'.format(names[int(cls_id)], cls_conf, x1, y1, x2, y2) - f.write(content) - f.write('\n') - - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0") - parser.add_argument("--origin_jpg_path", default="./val2014/") - parser.add_argument("--det_results_path", - default="./detection-results/") - parser.add_argument("--coco_class_names", default="./coco2014.names") - parser.add_argument("--net_out_num", default=3) - flags = parser.parse_args() - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import argparse +import time +import torch +import torchvision +import cv2 + + +def yolo_forward_dynamic(output, num_classes, anchors, num_anchors, scale_x_y): + bxy_list = [] + bwh_list = [] + det_confs_list = [] + cls_confs_list = [] + + for i in range(num_anchors): + begin = i * (5 + num_classes) + end = (i + 1) * (5 + num_classes) + + bxy_list.append(output[:, begin: begin + 2]) + bwh_list.append(output[:, begin + 2: begin + 4]) + det_confs_list.append(output[:, begin + 4: begin + 5]) + cls_confs_list.append(output[:, begin + 5: end]) + + # Shape: [batch, num_anchors * 2, H, W] + bxy = torch.cat(bxy_list, dim=1) + # Shape: [batch, num_anchors * 2, H, W] + bwh = torch.cat(bwh_list, dim=1) + + # Shape: [batch, num_anchors, H, W] + det_confs = torch.cat(det_confs_list, dim=1) + # Shape: [batch, num_anchors * H * W] + det_confs = det_confs.view(output.size(0), num_anchors * output.size(2) * output.size(3)) + + # Shape: [batch, num_anchors * num_classes, H, W] + cls_confs = torch.cat(cls_confs_list, dim=1) + # Shape: [batch, num_anchors, num_classes, H * W] + cls_confs = cls_confs.view(output.size(0), num_anchors, num_classes, output.size(2) * output.size(3)) + # Shape: [batch, num_anchors, num_classes, H * W] --> [batch, num_anchors * H * W, num_classes] + cls_confs = cls_confs.permute(0, 1, 3, 2).reshape(output.size(0), num_anchors * output.size(2) * output.size(3), + num_classes) + + # Apply sigmoid(), exp() and softmax() to slices + bxy = torch.sigmoid(bxy) * scale_x_y - 0.5 * (scale_x_y - 1) + bwh = torch.exp(bwh) + det_confs = torch.sigmoid(det_confs) + cls_confs = torch.sigmoid(cls_confs) + + # Prepare C-x, C-y, P-w, P-h (None of them are torch related) + grid_x = np.expand_dims(np.expand_dims( + np.expand_dims(np.linspace(0, output.size(3) - 1, output.size(3)), axis=0).repeat(output.size(2), 0), axis=0), + axis=0) + grid_y = np.expand_dims(np.expand_dims( + np.expand_dims(np.linspace(0, output.size(2) - 1, output.size(2)), axis=1).repeat(output.size(3), 1), axis=0), + axis=0) + + anchor_w = [] + anchor_h = [] + for i in range(num_anchors): + anchor_w.append(anchors[i * 2]) + anchor_h.append(anchors[i * 2 + 1]) + + device = None + cuda_check = output.is_cuda + if cuda_check: + device = output.get_device() + + bx_list = [] + by_list = [] + bw_list = [] + bh_list = [] + + # Apply C-x, C-y, P-w, P-h + for i in range(num_anchors): + ii = i * 2 + # Shape: [batch, 1, H, W] + bx = bxy[:, ii: ii + 1] + torch.tensor(grid_x, device=device, + dtype=torch.float32) # grid_x.to(device=device, dtype=torch.float32) + # Shape: [batch, 1, H, W] + by = bxy[:, ii + 1: ii + 2] + torch.tensor(grid_y, device=device, + dtype=torch.float32) # grid_y.to(device=device, dtype=torch.float32) + # Shape: [batch, 1, H, W] + bw = bwh[:, ii: ii + 1] * anchor_w[i] + # Shape: [batch, 1, H, W] + bh = bwh[:, ii + 1: ii + 2] * anchor_h[i] + + bx_list.append(bx) + by_list.append(by) + bw_list.append(bw) + bh_list.append(bh) + + ######################################## + # Figure out bboxes from slices # + ######################################## + + # Shape: [batch, num_anchors, H, W] + bx = torch.cat(bx_list, dim=1) + # Shape: [batch, num_anchors, H, W] + by = torch.cat(by_list, dim=1) + # Shape: [batch, num_anchors, H, W] + bw = torch.cat(bw_list, dim=1) + # Shape: [batch, num_anchors, H, W] + bh = torch.cat(bh_list, dim=1) + + # Shape: [batch, 2 * num_anchors, H, W] + bx_bw = torch.cat((bx, bw), dim=1) + # Shape: [batch, 2 * num_anchors, H, W] + by_bh = torch.cat((by, bh), dim=1) + + # normalize coordinates to [0, 1] + bx_bw /= output.size(3) + by_bh /= output.size(2) + + # Shape: [batch, num_anchors * H * W, 1] + bx = bx_bw[:, :num_anchors].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1) + by = by_bh[:, :num_anchors].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1) + bw = bx_bw[:, num_anchors:].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1) + bh = by_bh[:, num_anchors:].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1) + + bx1 = bx - bw * 0.5 + by1 = by - bh * 0.5 + bx2 = bx1 + bw + by2 = by1 + bh + + # Shape: [batch, num_anchors * h * w, 4] -> [batch, num_anchors * h * w, 1, 4] + boxes = torch.cat((bx1, by1, bx2, by2), dim=2).view(output.size(0), num_anchors * output.size(2) * output.size(3), + 1, 4) + + det_confs = det_confs.view(output.size(0), num_anchors * output.size(2) * output.size(3), 1) + confs = cls_confs * det_confs + + return boxes, confs + + +class YoloLayer(object): + ''' Yolo layer + model_out: while inference,is post-processing inside or outside the model + true:outside + ''' + def __init__(self, anchor_mask=[], num_classes=0, anchors=[], num_anchors=1, stride=32, model_out=False): + + self.anchor_mask = anchor_mask + self.num_classes = num_classes + self.anchors = anchors + self.num_anchors = num_anchors + self.anchor_step = len(anchors) // num_anchors + self.coord_scale = 1 + self.noobject_scale = 1 + self.object_scale = 5 + self.class_scale = 1 + self.thresh = 0.6 + self.stride = stride + self.seen = 0 + self.scale_x_y = 1 + + self.model_out = model_out + + + def forward(self, output): + masked_anchors = [] + for m in self.anchor_mask: + masked_anchors += self.anchors[m * self.anchor_step:(m + 1) * self.anchor_step] + masked_anchors = [anchor / self.stride for anchor in masked_anchors] + + return yolo_forward_dynamic(output, self.num_classes, masked_anchors, + len(self.anchor_mask), scale_x_y=self.scale_x_y) + + +def get_region_boxes(boxes_and_confs): + # print('Getting boxes from boxes and confs ...') + + boxes_list = [] + confs_list = [] + + for item in boxes_and_confs: + boxes_list.append(item[0]) + confs_list.append(item[1]) + + # boxes: [batch, num1 + num2 + num3, 1, 4] + # confs: [batch, num1 + num2 + num3, num_classes] + boxes = torch.cat(boxes_list, dim=1) + confs = torch.cat(confs_list, dim=1) + + return [boxes, confs] + + +def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False): + # print(boxes.shape) + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1) * (y2 - y1) + order = confs.argsort()[::-1] + + keep = [] + while order.size > 0: + idx_self = order[0] + idx_other = order[1:] + + keep.append(idx_self) + + xx1 = np.maximum(x1[idx_self], x1[idx_other]) + yy1 = np.maximum(y1[idx_self], y1[idx_other]) + xx2 = np.minimum(x2[idx_self], x2[idx_other]) + yy2 = np.minimum(y2[idx_self], y2[idx_other]) + + w = np.maximum(0.0, xx2 - xx1) + h = np.maximum(0.0, yy2 - yy1) + inter = w * h + + if min_mode: + over = inter / np.minimum(areas[order[0]], areas[order[1:]]) + else: + over = inter / (areas[order[0]] + areas[order[1:]] - inter) + + inds = np.where(over <= nms_thresh)[0] + order = order[inds + 1] + + return np.array(keep) + + +def nms(conf_thresh, nms_thresh, output): + # [batch, num, 1, 4] + box_array = output[0] + # [batch, num, num_classes] + confs = output[1] + + if type(box_array).__name__ != 'ndarray': + box_array = box_array.cpu().detach().numpy() + confs = confs.cpu().detach().numpy() + + num_classes = confs.shape[2] + + # [batch, num, 4] + box_array = box_array[:, :, 0] + + # [batch, num, num_classes] --> [batch, num] + max_conf = np.max(confs, axis=2) + max_id = np.argmax(confs, axis=2) + + bboxes_batch = [] + for i in range(box_array.shape[0]): + + argwhere = max_conf[i] > conf_thresh + l_box_array = box_array[i, argwhere, :] + l_max_conf = max_conf[i, argwhere] + l_max_id = max_id[i, argwhere] + + bboxes = [] + # nms for each class + for j in range(num_classes): + + cls_argwhere = l_max_id == j + ll_box_array = l_box_array[cls_argwhere, :] + ll_max_conf = l_max_conf[cls_argwhere] + ll_max_id = l_max_id[cls_argwhere] + + keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh) + + if (keep.size > 0): + ll_box_array = ll_box_array[keep, :] + ll_max_conf = ll_max_conf[keep] + ll_max_id = ll_max_id[keep] + + for k in range(ll_box_array.shape[0]): + bboxes.append( + [ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3], + ll_max_conf[k], ll_max_id[k]]) + + bboxes_batch.append(bboxes) + + return bboxes_batch + + +def post_process(flags): + names = np.loadtxt(flags.coco_class_names, dtype='str', delimiter='\n') + + # 读取bin文件用于生成预测结果 + bin_path = flags.bin_data_path + ori_path = flags.origin_jpg_path + + anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401] + num_classes = 80 + + det_results_path = flags.det_results_path + os.makedirs(det_results_path, exist_ok=True) + total_img = set([name[:name.rfind('_')] + for name in os.listdir(bin_path) if "bin" in name]) + + yolo1 = YoloLayer(anchor_mask=[0, 1, 2], num_classes=num_classes, anchors=anchors, num_anchors=9, stride=8) + yolo2 = YoloLayer(anchor_mask=[3, 4, 5], num_classes=num_classes, anchors=anchors, num_anchors=9, stride=16) + yolo3 = YoloLayer(anchor_mask=[6, 7, 8], num_classes=num_classes, anchors=anchors, num_anchors=9, stride=32) + + yolo_shape = [[1, 255, 76, 76], [1, 255, 38, 38], [1, 255, 19, 19]] + + for bin_file in sorted(total_img): + path_base = os.path.join(bin_path, bin_file) + src_img = cv2.imread(os.path.join(ori_path, '{}.jpg'.format(bin_file))) + assert src_img is not None, 'Image Not Found ' + bin_file + + # 加载检测的所有输出tensor + feature_map_1 = np.fromfile(path_base + "_" + '1' + ".bin", dtype="float32").reshape(yolo_shape[0]) + feature_map_2 = np.fromfile(path_base + "_" + '2' + ".bin", dtype="float32").reshape(yolo_shape[1]) + feature_map_3 = np.fromfile(path_base + "_" + '3' + ".bin", dtype="float32").reshape(yolo_shape[2]) + + pred_1 = yolo1.forward(torch.from_numpy(feature_map_1)) + pred_2 = yolo2.forward(torch.from_numpy(feature_map_2)) + pred_3 = yolo3.forward(torch.from_numpy(feature_map_3)) + + # nms + output = get_region_boxes([pred_1, pred_2, pred_3]) + pred = nms(conf_thresh=0.4, nms_thresh=0.6, output=output)[0] + + # save result + det_results_file = os.path.join(det_results_path, bin_file + ".txt") + print(det_results_file) + with open(det_results_file, 'w') as f: + width = src_img.shape[1] + height = src_img.shape[0] + for i in range(len(pred)): + box = pred[i] + x1 = int(box[0] * width) + y1 = int(box[1] * height) + x2 = int(box[2] * width) + y2 = int(box[3] * height) + cls_conf = box[4] + cls_id = box[5] + + content = '{} {} {} {} {} {}'.format(names[int(cls_id)], cls_conf, x1, y1, x2, y2) + f.write(content) + f.write('\n') + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0") + parser.add_argument("--origin_jpg_path", default="./val2014/") + parser.add_argument("--det_results_path", + default="./detection-results/") + parser.add_argument("--coco_class_names", default="./coco2014.names") + parser.add_argument("--net_out_num", default=3) + flags = parser.parse_args() + post_process(flags) \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/get_coco_info.py b/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/get_coco_info.py index e0aa1581833a4698e2222a01cfda7cc8719027bc..e452569bf781741d1e2c4790aae4f4fb784b0bdc 100644 --- a/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/get_coco_info.py +++ b/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/get_coco_info.py @@ -1,46 +1,46 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import os -import sys - -file_path = sys.argv[1] -coco_info = sys.argv[2] -info_name = sys.argv[3] - -image_names = [] -image_size = [] - -with open(coco_info, 'r') as file: - contents = file.read().split('\n') - -for content in contents[:-1]: - temp = content.split() - key = temp[1] - image_names.append(key[key.rfind('/') + 1:].split('.')[0]) - image_size.append([temp[2], temp[3]]) - -name_size = dict(zip(image_names, image_size)) - -with open(info_name, 'w') as file: - index = 0 - for key, val in name_size.items(): - bin_name = os.path.join(file_path, '{}.bin'.format(key)) - content = ' '.join([str(index), bin_name, val[0], val[1]]) - file.write(content) - file.write('\n') - index += 1 - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import sys + +file_path = sys.argv[1] +coco_info = sys.argv[2] +info_name = sys.argv[3] + +image_names = [] +image_size = [] + +with open(coco_info, 'r') as file: + contents = file.read().split('\n') + +for content in contents[:-1]: + temp = content.split() + key = temp[1] + image_names.append(key[key.rfind('/') + 1:].split('.')[0]) + image_size.append([temp[2], temp[3]]) + +name_size = dict(zip(image_names, image_size)) + +with open(info_name, 'w') as file: + index = 0 + for key, val in name_size.items(): + bin_name = os.path.join(file_path, '{}.bin'.format(key)) + content = ' '.join([str(index), bin_name, val[0], val[1]]) + file.write(content) + file.write('\n') + index += 1 + + diff --git a/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/parse_json.py b/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/parse_json.py index 2661bc483bf3ffc50a8d0289dba8ea0ccfaa2f0b..4c984a0891f7114ecf64c525a785421615dbe5b0 100644 --- a/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/parse_json.py +++ b/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/parse_json.py @@ -1,77 +1,77 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json - -with open('./instances_valminusminival2014.json', 'r') as file: - content = file.read() -content = json.loads(content) -info = content.get('info') -licenses = content.get('licenses') -images = content.get('images') -annotations = content.get('annotations') -categroies = content.get('categories') - -with open('./coco2014.names', 'w') as f: - for categroie in categroies: - f.write(categroie.get('name').replace(' ', '_')) - f.write('\n') - -file_names = [image.get('file_name') for image in images] -widths = [image.get('width') for image in images] -heights = [image.get('height') for image in images] -image_ids = [image.get('id') for image in images] -assert len(file_names) == len(widths) == len(heights) == len(image_ids), "must be equal" - -annotation_ids = [annotation.get('image_id') for annotation in annotations] -bboxs = [annotation.get('bbox') for annotation in annotations] -category_ids = [annotation.get('category_id') for annotation in annotations] -segmentations = [annotation.get('segmentation') for annotation in annotations] -iscrowds = [annotation.get('iscrowd') for annotation in annotations] - -assert len(annotation_ids) == len(bboxs) == len(category_ids) ==len(segmentations) # 255094 - -with open('coco_2014.info', 'w') as f: - for index, file_name in enumerate(file_names): - file_name = 'val2014/' + file_name - line = "{} {} {} {}".format(index, file_name, widths[index], heights[index]) - f.write(line) - f.write('\n') - -def get_all_index(lst, item): - return [index for (index, value) in enumerate(lst) if value == item] - -def get_categroie_name(lst, item): - categroie_name = [dt.get('name') for dt in lst if item == dt.get('id')][0] - if len(categroie_name.split()) == 2: - temp = categroie_name.split() - categroie_name = temp[0] + '_' + temp[1] - return categroie_name - -for index, image_id in enumerate(image_ids): - indexs = get_all_index(annotation_ids, image_id) - with open('./ground-truth/{}.txt'.format(file_names[index].split('.')[0]), 'w') as f: - for idx in indexs: - f.write(get_categroie_name(categroies, category_ids[idx])) - - f.write(' ') - # change label - bboxs[idx][2] = bboxs[idx][0] + bboxs[idx][2] - bboxs[idx][3] = bboxs[idx][1] + bboxs[idx][3] - f.write(' '.join(map(str, bboxs[idx]))) - f.write('\n') - - - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +with open('./instances_valminusminival2014.json', 'r') as file: + content = file.read() +content = json.loads(content) +info = content.get('info') +licenses = content.get('licenses') +images = content.get('images') +annotations = content.get('annotations') +categroies = content.get('categories') + +with open('./coco2014.names', 'w') as f: + for categroie in categroies: + f.write(categroie.get('name').replace(' ', '_')) + f.write('\n') + +file_names = [image.get('file_name') for image in images] +widths = [image.get('width') for image in images] +heights = [image.get('height') for image in images] +image_ids = [image.get('id') for image in images] +assert len(file_names) == len(widths) == len(heights) == len(image_ids), "must be equal" + +annotation_ids = [annotation.get('image_id') for annotation in annotations] +bboxs = [annotation.get('bbox') for annotation in annotations] +category_ids = [annotation.get('category_id') for annotation in annotations] +segmentations = [annotation.get('segmentation') for annotation in annotations] +iscrowds = [annotation.get('iscrowd') for annotation in annotations] + +assert len(annotation_ids) == len(bboxs) == len(category_ids) ==len(segmentations) # 255094 + +with open('coco_2014.info', 'w') as f: + for index, file_name in enumerate(file_names): + file_name = 'val2014/' + file_name + line = "{} {} {} {}".format(index, file_name, widths[index], heights[index]) + f.write(line) + f.write('\n') + +def get_all_index(lst, item): + return [index for (index, value) in enumerate(lst) if value == item] + +def get_categroie_name(lst, item): + categroie_name = [dt.get('name') for dt in lst if item == dt.get('id')][0] + if len(categroie_name.split()) == 2: + temp = categroie_name.split() + categroie_name = temp[0] + '_' + temp[1] + return categroie_name + +for index, image_id in enumerate(image_ids): + indexs = get_all_index(annotation_ids, image_id) + with open('./ground-truth/{}.txt'.format(file_names[index].split('.')[0]), 'w') as f: + for idx in indexs: + f.write(get_categroie_name(categroies, category_ids[idx])) + + f.write(' ') + # change label + bboxs[idx][2] = bboxs[idx][0] + bboxs[idx][2] + bboxs[idx][3] = bboxs[idx][1] + bboxs[idx][3] + f.write(' '.join(map(str, bboxs[idx]))) + f.write('\n') + + + + diff --git a/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/preprocess_yolov4_pytorch.py b/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/preprocess_yolov4_pytorch.py index 95df9a16b50e27d5ef3221cf5775c24d0dba470a..f5991e956fe068a53d4da4da8eb308231e8d1360 100644 --- a/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/preprocess_yolov4_pytorch.py +++ b/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/preprocess_yolov4_pytorch.py @@ -1,65 +1,65 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -import cv2 -import numpy as np - -def yolov4_onnx(src_info, output_path, bin_type, frame_num): - in_files = [] - if not os.path.exists(output_path): - os.makedirs(output_path) - - with open(src_info, 'r') as file: - contents = file.read().split('\n') - for i in contents[:-1]: - in_files.append(i.split()[1]) - - i = 0 - for file in in_files: - i = i + 1 - print(file, "====", i) - img0 = cv2.imread(file) - resized = cv2.resize(img0, (608, 608), interpolation=cv2.INTER_LINEAR) - img_in = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) - - if frame_num != -1 and i > frame_num: - break - - if bin_type == "float32": - img_in = np.transpose(img_in, (2, 0, 1)).astype(np.float32) - img_in = np.expand_dims(img_in, axis=0) - img_in /= 255.0 - elif bin_type == "int8": - img_in = img_in.astype(np.int8) - else: - print("error type") - break - - # save img_tensor as binary file for om inference input - temp_name = file[file.rfind('/') + 1:] - img_in.tofile(os.path.join(output_path, temp_name.split('.')[0] + ".bin")) - - -if __name__ == "__main__": - if len(sys.argv) < 5: - raise Exception("usage: python3 xxx.py [src_path] [save_path] [bin_type] [frame_num]") - - src_path = os.path.realpath(sys.argv[1]) - save_path = os.path.realpath(sys.argv[2]) - bin_type = sys.argv[3] - frame_num = int(sys.argv[4]) - - yolov4_onnx(src_path, save_path, bin_type, frame_num) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import cv2 +import numpy as np + +def yolov4_onnx(src_info, output_path, bin_type, frame_num): + in_files = [] + if not os.path.exists(output_path): + os.makedirs(output_path) + + with open(src_info, 'r') as file: + contents = file.read().split('\n') + for i in contents[:-1]: + in_files.append(i.split()[1]) + + i = 0 + for file in in_files: + i = i + 1 + print(file, "====", i) + img0 = cv2.imread(file) + resized = cv2.resize(img0, (608, 608), interpolation=cv2.INTER_LINEAR) + img_in = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) + + if frame_num != -1 and i > frame_num: + break + + if bin_type == "float32": + img_in = np.transpose(img_in, (2, 0, 1)).astype(np.float32) + img_in = np.expand_dims(img_in, axis=0) + img_in /= 255.0 + elif bin_type == "int8": + img_in = img_in.astype(np.int8) + else: + print("error type") + break + + # save img_tensor as binary file for om inference input + temp_name = file[file.rfind('/') + 1:] + img_in.tofile(os.path.join(output_path, temp_name.split('.')[0] + ".bin")) + + +if __name__ == "__main__": + if len(sys.argv) < 5: + raise Exception("usage: python3 xxx.py [src_path] [save_path] [bin_type] [frame_num]") + + src_path = os.path.realpath(sys.argv[1]) + save_path = os.path.realpath(sys.argv[2]) + bin_type = sys.argv[3] + frame_num = int(sys.argv[4]) + + yolov4_onnx(src_path, save_path, bin_type, frame_num) diff --git a/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/requirements.txt b/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/requirements.txt index 8842750f942520b7f6b15ad0529acb6044841a33..14059e3d54498729123839e666b6d9c8377b3399 100644 --- a/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/requirements.txt +++ b/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch/requirements.txt @@ -1,5 +1,5 @@ -numpy == 1.18.5 -opencv-python == 4.2.0.34 -torch == 1.6.0 -torchvision == 0.7.0 +numpy == 1.18.5 +opencv-python == 4.2.0.34 +torch == 1.6.0 +torchvision == 0.7.0 onnx == 1.7.0 \ No newline at end of file diff --git a/ACL_PyTorch/built-in/cv/Yolov5_for_Pytorch/generate_data.py b/ACL_PyTorch/built-in/cv/Yolov5_for_Pytorch/generate_data.py index 3ccceafb0f8ee7b31d0f591fb58e72dabcbfa9d0..06a030669eedc6b0bd538a484800f12a13247de2 100644 --- a/ACL_PyTorch/built-in/cv/Yolov5_for_Pytorch/generate_data.py +++ b/ACL_PyTorch/built-in/cv/Yolov5_for_Pytorch/generate_data.py @@ -1,65 +1,65 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -import cv2 -import numpy as np -import argparse - -def preprocess(img_info_file, save_path, batch_size): - in_files = [] - output_data = [] - if not os.path.exists(save_path): - os.makedirs(save_path) - - with open(img_info_file, 'r') as file: - contents = file.read().split('\n') - for i in contents[:-1]: - in_files.append(i.split()[1]) - - for i, file in enumerate(in_files): - img0 = cv2.imread(file) - resized = cv2.resize(img0, (640, 640), interpolation=cv2.INTER_LINEAR) - input_data = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) - input_data = np.transpose(input_data, (2, 0, 1)).astype(np.float32) - input_data = np.expand_dims(input_data, axis=0) - input_data /= 255.0 - print("shape:", input_data.shape) - - if i % batch_size == 0: - output_data = input_data - else: - output_data = np.concatenate((output_data, input_data), axis=0) - - if (i + 1) % batch_size == 0: - output_data.tofile("{}/img_bs{}_n{}.bin".format(save_path, batch_size, i)) - - -if __name__ == "__main__": - """ - python3 generate_data.py \ - --img_info_file=img_info_amct.txt \ - --save_path=amct_data \ - --batch_size=1 - """ - parser = argparse.ArgumentParser(description='YoloV5 offline model inference.') - parser.add_argument('--img_info_file', type=str, default="img_info_amct.txt", help='original data') - parser.add_argument('--save_path', type=str, default="./amct_data", help='data for amct') - parser.add_argument('--batch_size', type=int, default=1, help='om batch size') - args = parser.parse_args() - - print(os.path.abspath(args.img_info_file)) - print(os.path.abspath(args.save_path)) - preprocess(args.img_info_file, args.save_path, args.batch_size) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import cv2 +import numpy as np +import argparse + +def preprocess(img_info_file, save_path, batch_size): + in_files = [] + output_data = [] + if not os.path.exists(save_path): + os.makedirs(save_path) + + with open(img_info_file, 'r') as file: + contents = file.read().split('\n') + for i in contents[:-1]: + in_files.append(i.split()[1]) + + for i, file in enumerate(in_files): + img0 = cv2.imread(file) + resized = cv2.resize(img0, (640, 640), interpolation=cv2.INTER_LINEAR) + input_data = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) + input_data = np.transpose(input_data, (2, 0, 1)).astype(np.float32) + input_data = np.expand_dims(input_data, axis=0) + input_data /= 255.0 + print("shape:", input_data.shape) + + if i % batch_size == 0: + output_data = input_data + else: + output_data = np.concatenate((output_data, input_data), axis=0) + + if (i + 1) % batch_size == 0: + output_data.tofile("{}/img_bs{}_n{}.bin".format(save_path, batch_size, i)) + + +if __name__ == "__main__": + """ + python3 generate_data.py \ + --img_info_file=img_info_amct.txt \ + --save_path=amct_data \ + --batch_size=1 + """ + parser = argparse.ArgumentParser(description='YoloV5 offline model inference.') + parser.add_argument('--img_info_file', type=str, default="img_info_amct.txt", help='original data') + parser.add_argument('--save_path', type=str, default="./amct_data", help='data for amct') + parser.add_argument('--batch_size', type=int, default=1, help='om batch size') + args = parser.parse_args() + + print(os.path.abspath(args.img_info_file)) + print(os.path.abspath(args.save_path)) + preprocess(args.img_info_file, args.save_path, args.batch_size) diff --git a/ACL_PyTorch/built-in/cv/Yolov5_for_Pytorch/img.png b/ACL_PyTorch/built-in/cv/Yolov5_for_Pytorch/img.png deleted file mode 100644 index 752d68e6af32a0e14dd71d2e4ab687bc214d8d05..0000000000000000000000000000000000000000 Binary files a/ACL_PyTorch/built-in/cv/Yolov5_for_Pytorch/img.png and /dev/null differ diff --git a/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/LICENSE b/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/LICENSE index db05a35866f7f1e2bc78bdfe9e7048e779552d8c..09d493bf1fc257505c1336f3f87425568ab9da3c 100644 --- a/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/LICENSE +++ b/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/LICENSE @@ -1,29 +1,29 @@ -BSD 3-Clause License - -Copyright (c) 2017, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +BSD 3-Clause License + +Copyright (c) 2017, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/ReadMe.md b/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/ReadMe.md index 38b94eb75703250254eb5f8efd66c4585a1350b0..280219cc5f70c0d3506b50b3ca64d20ff3eda173 100644 --- a/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/ReadMe.md +++ b/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/ReadMe.md @@ -1,169 +1,169 @@ -# 参考库文 - -- [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477) - -# 参考实现 - -- [facebook/wav2vec2-base-960h](https://huggingface.co/facebook/wav2vec2-base-960h) - -# 依赖 - -| 依赖名称 | 版本 | -| ------------ | :----------- | -| pytorch | 1.6.0 | -| soundfile | 0.10.3.post1 | -| transformers | 4.3.3 | -| tqdm | 4.49.0 | -| numpy | 1.19.5 | -| datasets | 1.6.2 | -| jiwer | 2.2.0 | - -# 准备数据集 - -运行`preprocess.py`脚本,会自动在线下载所需的分词器模型、Librispeech数据集(下载过程可能比较长),并把数据处理为bin文件,同时生成数据集的info文件。 - -``` -python3.7 preprocess.py --pre_data_save_path=./pre_data/clean --which_dataset=clean -``` - -参数说明: - -- --pre_data_save_path:预处理数据保存路径 -- --which_dataset:指定所用的数据集 - - validation:patrickvonplaten/librispeech_asr_dummy数据集,特别小,只有70多条音频数据 - - clean:Librispeech clean数据集 - - other:Librispeech other数据集 - -官方提供了模型在Librispeech clean和Librispeech other数据集上的精度,本示例中仅用Librispeech clean测试精度。 - -# 模型推理 - -1. 模型转换。 - - 使用PyTorch将模型权重文件pth转换为onnx文件,再使用atc工具将onnx文件转为离线推理模型om文件。 - - 1. 导出onnx文件。 - - 运行`export_onnx.py`脚本,会自动在线下载pth模型,并把pth模型转换为onnx模型。 - - ``` - python3.7 export_onnx.py --model_save_dir=./models - ``` - - 运行完之后,会在当前目录的`models`目录下生成`wav2vec2-base-960h.onnx`模型文件。 - - 使用atc工具将onnx文件转换为om文件,导出onnx模型文件时需设置算子版本为11。 - - 2. 使用atc工具将onnx模型转om模型。 - - 1. 根据实际情况,修改`onnx2om.sh`脚本中的环境变量,具体的脚本示例如下: - - ``` - #!/bin/bash - export install_path=/usr/local/Ascend/ascend-toolkit/latest - export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH - export PYTHONPATH=${install_path}/atc/python/site-packages:${install_path}/pyACL/python/site-packages/acl:$PYTHONPATH - export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH - export ASCEND_OPP_PATH=${install_path}/opp - - atc --framework=5 --model=./models/wav2vec2-base-960h.onnx --output=./models/wav2vec2-base-960h --input_format=ND --input_shape="input:1,-1" --dynamic_dims="10000;20000;30000;40000;50000;60000;70000;80000;90000;100000;110000;120000;130000;140000;150000;160000;170000;180000;190000;200000;210000;220000;230000;240000;250000;260000;270000;280000;290000;300000;310000;320000;330000;340000;350000;360000;370000;380000;390000;400000;410000;420000;430000;440000;450000;460000;470000;480000;490000;500000;510000;520000;530000;540000;550000;560000" --log=error --soc_version=Ascend710 - ``` - - 参数说明: - - - --model:为ONNX模型文件。 - - --framework:5代表ONNX模型。 - - --output:输出的OM模型。 - - --input_format:输入数据的格式。 - - --input_shape:输入数据的shape。 - - --log:日志等级。 - - --soc_version:部署芯片类型。 - - 2. 执行onnx2om.sh脚本,将onnx文件转为离线推理模型文件om文件。 - - ``` - bash onnx2om.sh - ``` - - 运行成功后在`models`目录下生成`wav2vec2-base-960h.om`模型文件。 - -2. 开始推理验证。 - - 1. 配置环境变量 - - ``` - export install_path=/usr/local/Ascend/ascend-toolkit/latest - export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH - export PYTHONPATH=${install_path}/atc/python/site-packages:${install_path}/pyACL/python/site-packages/acl:$PYTHONPATH - export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH - export ASCEND_OPP_PATH=${install_path}/opp - ``` - - `install_path`请修改为Toolkit的实际安装路径。 - - 2. 运行`pyacl_infer.py`进行推理,同时输出推理性能数据。 - - ``` - python3.7 pyacl_infer.py \ - --model_path=./models/wav2vec2-base-960h.om \ - --device_id=0 \ - --cpu_run=True \ - --sync_infer=True \ - --workspace=0 \ - --input_info_file_path=./pre_data/clean/bin_file.info \ - --input_dtypes=float32 \ - --infer_res_save_path=./om_infer_res_clean \ --res_save_type=bin - ``` - - 参数说明: - - - --model_path:模型路径 - - --device_id:npu id - - --cpu_run:MeasureTime类的cpu_run参数,True or False - - --sync_infer:推理方式: - - True:同步推理 - - False:异步推理 - - --workspace:类似TensorRT‘workspace’参数,计算平均推理时间时排除前n次推理 - - --input_info_file_path:bin_info文件 - - --input_dtypes:模型输入的类型,用逗号分割(参考`DTYPE`变量) - - e.g. 模型只有一个输入:--input_dtypes=float32 - - e.g. 模型有多个输入:--input_dtypes=float32,float32,float32(需要和bin_info文件多输入排列一致) - - --infer_res_save_path:推理结果保存目录 - - --res_save_type:推理结果保存类型,bin或npy - 3. 推理数据后处理与精度统计。 - - 运行`postprocess.py`,会进行推理数据后处理,并进行精度统计。 - - ``` - python3.7 postprocess.py \ - --bin_file_path=./om_infer_res_clean \ - --res_save_path=./om_infer_res_clean/transcriptions.txt \ - --which_dataset=clean - ``` - - 参数说明: - - - --bin_file_path:pyacl推理结果存放路径 - - --res_save_path:后处理结果存放txt文件 - - --which_dataset:精度统计所用的数据集,参看preprocess.py的参数说明 - -4. 性能测试 - - 由于TensorRT无法运行`wav2vec2-base-960h.onnx`模型,所以性能测试以pyacl得到的om推理性能和pytorch在线推理性能作比较。 - - 在GPU环境上运行`pth_online_infer.py`脚本,得到pytorch在线推理性能。 - - ``` - python pth_online_infer.py \ - --pred_res_save_path=./pth_online_infer_res/clean/transcriptions.txt \ - --which_dataset=clean - ``` - - 参数说明: - - - --pred_res_save_path:pytorch在线推理结果存放路径 - - --which_dataset:参看preprocess.py的参数说明 - - 脚本执行完毕后,会在屏幕上输出pytorch在线推理平均推理时间(average infer time(ms)),假定为![Figure Name:202155144621.png CAD Name:zh-cn_formulaimage_0000001124002380.png](http://resource.idp.huawei.com/idpresource/nasshare/editor/image/34040284354/1_zh-cn_formulaimage_0000001124002380.png),换算为单卡后pytorch在线推理的每秒推理数量为:![Figure Name:202153161710.png CAD Name:zh-cn_formulaimage_0000001166163171.png](http://resource.idp.huawei.com/idpresource/nasshare/editor/image/34040284354/3_zh-cn_formulaimage_0000001166163171.png)。 - +# 参考库文 + +- [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477) + +# 参考实现 + +- [facebook/wav2vec2-base-960h](https://huggingface.co/facebook/wav2vec2-base-960h) + +# 依赖 + +| 依赖名称 | 版本 | +| ------------ | :----------- | +| pytorch | 1.6.0 | +| soundfile | 0.10.3.post1 | +| transformers | 4.3.3 | +| tqdm | 4.49.0 | +| numpy | 1.19.5 | +| datasets | 1.6.2 | +| jiwer | 2.2.0 | + +# 准备数据集 + +运行`preprocess.py`脚本,会自动在线下载所需的分词器模型、Librispeech数据集(下载过程可能比较长),并把数据处理为bin文件,同时生成数据集的info文件。 + +``` +python3.7 preprocess.py --pre_data_save_path=./pre_data/clean --which_dataset=clean +``` + +参数说明: + +- --pre_data_save_path:预处理数据保存路径 +- --which_dataset:指定所用的数据集 + - validation:patrickvonplaten/librispeech_asr_dummy数据集,特别小,只有70多条音频数据 + - clean:Librispeech clean数据集 + - other:Librispeech other数据集 + +官方提供了模型在Librispeech clean和Librispeech other数据集上的精度,本示例中仅用Librispeech clean测试精度。 + +# 模型推理 + +1. 模型转换。 + + 使用PyTorch将模型权重文件pth转换为onnx文件,再使用atc工具将onnx文件转为离线推理模型om文件。 + + 1. 导出onnx文件。 + + 运行`export_onnx.py`脚本,会自动在线下载pth模型,并把pth模型转换为onnx模型。 + + ``` + python3.7 export_onnx.py --model_save_dir=./models + ``` + + 运行完之后,会在当前目录的`models`目录下生成`wav2vec2-base-960h.onnx`模型文件。 + + 使用atc工具将onnx文件转换为om文件,导出onnx模型文件时需设置算子版本为11。 + + 2. 使用atc工具将onnx模型转om模型。 + + 1. 根据实际情况,修改`onnx2om.sh`脚本中的环境变量,具体的脚本示例如下: + + ``` + #!/bin/bash + export install_path=/usr/local/Ascend/ascend-toolkit/latest + export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH + export PYTHONPATH=${install_path}/atc/python/site-packages:${install_path}/pyACL/python/site-packages/acl:$PYTHONPATH + export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH + export ASCEND_OPP_PATH=${install_path}/opp + + atc --framework=5 --model=./models/wav2vec2-base-960h.onnx --output=./models/wav2vec2-base-960h --input_format=ND --input_shape="input:1,-1" --dynamic_dims="10000;20000;30000;40000;50000;60000;70000;80000;90000;100000;110000;120000;130000;140000;150000;160000;170000;180000;190000;200000;210000;220000;230000;240000;250000;260000;270000;280000;290000;300000;310000;320000;330000;340000;350000;360000;370000;380000;390000;400000;410000;420000;430000;440000;450000;460000;470000;480000;490000;500000;510000;520000;530000;540000;550000;560000" --log=error --soc_version=Ascend710 + ``` + + 参数说明: + + - --model:为ONNX模型文件。 + - --framework:5代表ONNX模型。 + - --output:输出的OM模型。 + - --input_format:输入数据的格式。 + - --input_shape:输入数据的shape。 + - --log:日志等级。 + - --soc_version:部署芯片类型。 + + 2. 执行onnx2om.sh脚本,将onnx文件转为离线推理模型文件om文件。 + + ``` + bash onnx2om.sh + ``` + + 运行成功后在`models`目录下生成`wav2vec2-base-960h.om`模型文件。 + +2. 开始推理验证。 + + 1. 配置环境变量 + + ``` + export install_path=/usr/local/Ascend/ascend-toolkit/latest + export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH + export PYTHONPATH=${install_path}/atc/python/site-packages:${install_path}/pyACL/python/site-packages/acl:$PYTHONPATH + export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH + export ASCEND_OPP_PATH=${install_path}/opp + ``` + + `install_path`请修改为Toolkit的实际安装路径。 + + 2. 运行`pyacl_infer.py`进行推理,同时输出推理性能数据。 + + ``` + python3.7 pyacl_infer.py \ + --model_path=./models/wav2vec2-base-960h.om \ + --device_id=0 \ + --cpu_run=True \ + --sync_infer=True \ + --workspace=0 \ + --input_info_file_path=./pre_data/clean/bin_file.info \ + --input_dtypes=float32 \ + --infer_res_save_path=./om_infer_res_clean \ --res_save_type=bin + ``` + + 参数说明: + + - --model_path:模型路径 + - --device_id:npu id + - --cpu_run:MeasureTime类的cpu_run参数,True or False + - --sync_infer:推理方式: + - True:同步推理 + - False:异步推理 + - --workspace:类似TensorRT‘workspace’参数,计算平均推理时间时排除前n次推理 + - --input_info_file_path:bin_info文件 + - --input_dtypes:模型输入的类型,用逗号分割(参考`DTYPE`变量) + - e.g. 模型只有一个输入:--input_dtypes=float32 + - e.g. 模型有多个输入:--input_dtypes=float32,float32,float32(需要和bin_info文件多输入排列一致) + - --infer_res_save_path:推理结果保存目录 + - --res_save_type:推理结果保存类型,bin或npy + 3. 推理数据后处理与精度统计。 + + 运行`postprocess.py`,会进行推理数据后处理,并进行精度统计。 + + ``` + python3.7 postprocess.py \ + --bin_file_path=./om_infer_res_clean \ + --res_save_path=./om_infer_res_clean/transcriptions.txt \ + --which_dataset=clean + ``` + + 参数说明: + + - --bin_file_path:pyacl推理结果存放路径 + - --res_save_path:后处理结果存放txt文件 + - --which_dataset:精度统计所用的数据集,参看preprocess.py的参数说明 + +4. 性能测试 + + 由于TensorRT无法运行`wav2vec2-base-960h.onnx`模型,所以性能测试以pyacl得到的om推理性能和pytorch在线推理性能作比较。 + + 在GPU环境上运行`pth_online_infer.py`脚本,得到pytorch在线推理性能。 + + ``` + python pth_online_infer.py \ + --pred_res_save_path=./pth_online_infer_res/clean/transcriptions.txt \ + --which_dataset=clean + ``` + + 参数说明: + + - --pred_res_save_path:pytorch在线推理结果存放路径 + - --which_dataset:参看preprocess.py的参数说明 + + 脚本执行完毕后,会在屏幕上输出pytorch在线推理平均推理时间(average infer time(ms)),假定为![Figure Name:202155144621.png CAD Name:zh-cn_formulaimage_0000001124002380.png](http://resource.idp.huawei.com/idpresource/nasshare/editor/image/34040284354/1_zh-cn_formulaimage_0000001124002380.png),换算为单卡后pytorch在线推理的每秒推理数量为:![Figure Name:202153161710.png CAD Name:zh-cn_formulaimage_0000001166163171.png](http://resource.idp.huawei.com/idpresource/nasshare/editor/image/34040284354/3_zh-cn_formulaimage_0000001166163171.png)。 + 上述运行pyacl_infer.py脚本会得到om平均推理时间(average infer time(ms)),假定为![Figure Name:202153161914.png CAD Name:zh-cn_formulaimage_0000001166164017.png](http://resource.idp.huawei.com/idpresource/nasshare/editor/image/34040284354/1_zh-cn_formulaimage_0000001166164017.png),换算为单卡后om的每秒推理数量为:![Figure Name:202153161758.png CAD Name:zh-cn_formulaimage_0000001119363638.png](http://resource.idp.huawei.com/idpresource/nasshare/editor/image/34040284354/2_zh-cn_formulaimage_0000001119363638.png)。 \ No newline at end of file diff --git a/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/modelzoo_level.txt b/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/modelzoo_level.txt index a17c8f95fa388fbc6d253e2cd7cfd0b73b734073..a829ab59b97a1022dd6fc33b59b7ae0d55009432 100644 --- a/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/modelzoo_level.txt +++ b/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/pyacl_infer.py b/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/pyacl_infer.py index 1ef7e4a5c8244e98642c9b61e59cfe9e448ff924..c41c1a20dfd7cf567ddb589ff908ea9be5e67746 100644 --- a/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/pyacl_infer.py +++ b/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch/pyacl_infer.py @@ -1,138 +1,138 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - -import acl -from acl_net import AclModel - -import os -import shutil -import argparse -import numpy as np -from tqdm import tqdm - -DTYPE = { - 'float16': np.float16, - 'float32': np.float32, - 'float64': np.float64, - 'int16': np.int16, - 'int32': np.int32, - 'int64': np.int64, - 'uint8': np.uint8, - 'uint16': np.uint16, - 'uint32': np.uint32, - 'uint64': np.uint64 -} - -if __name__ == '__main__': - # 参数解析 - parser = argparse.ArgumentParser() - parser.add_argument('--model_path', required=True) - parser.add_argument('--device_id', required=True, type=int) - parser.add_argument('--cpu_run', required=True, choices=['True', 'False']) - parser.add_argument('--sync_infer', required=True, choices=['True', 'False']) - parser.add_argument('--workspace', required=True, type=int) - parser.add_argument('--input_info_file_path', required=True) - parser.add_argument('--input_dtypes', required=True) - parser.add_argument('--infer_res_save_path', required=True) - parser.add_argument('--res_save_type', required=True, choices=['bin', 'npy']) - opt = parser.parse_args() - - # 创建模型 - measurements = {} - om_model = AclModel(device_id=opt.device_id, - model_path=opt.model_path, - sync_infer=eval(opt.sync_infer), - measurements=measurements, - key='per_infer_time_ns', - cpu_run=eval(opt.cpu_run)) - - # 创建目录 - if os.path.exists(opt.infer_res_save_path): - shutil.rmtree(opt.infer_res_save_path) - os.makedirs(opt.infer_res_save_path) - - # 读取info_file - inputs_info = {} - with open(opt.input_info_file_path, 'rt', encoding='utf-8') as f_info: - line = f_info.readline() - while line: - line = line.rstrip('\n') - contents = line.split() - info = {'path': contents[1], 'shape': eval(contents[2])} - inputs_info.setdefault(contents[0], []).append(info) - line = f_info.readline() - - # 解析输入类型 - input_dtypes = opt.input_dtypes.split(',') - input_dtypes = list(map(lambda x: DTYPE[x], input_dtypes)) - - # 读取文件推理 - total_infer_time = 0 - total_infer_time_workspace = 0 - total_infer_num = 0 - for key, values in tqdm(inputs_info.items()): - # 构造输入 - inputs = [] - dims = [] - for idx, value in enumerate(values): - x = np.fromfile(value['path'], dtype=input_dtypes[idx]).reshape(value['shape']) - inputs.append(x) - dims.extend(value['shape']) - dims_info = {'dimCount': len(dims), 'name': '', 'dims': dims} - - # 推理得到输出 - output = om_model(inputs, dims_info) - total_infer_num += 1 - - # 保存文件 - if opt.res_save_type == 'bin': - for idx, data in enumerate(output): - data.tofile(os.path.join(opt.infer_res_save_path, key + '.' + str(idx) + '.bin')) - else: - for idx, data in enumerate(output): - np.save(os.path.join(opt.infer_res_save_path, key + '.' + str(idx) + '.npy'), data) - - # 计算时间 - total_infer_time += measurements['per_infer_time_ns'] - if total_infer_num > opt.workspace: - total_infer_time_workspace += measurements['per_infer_time_ns'] - - # 推理时间 - print('[INFO] Infer time:') - msg = 'total infer num: ' + str(total_infer_num) + '\n' + \ - 'total pure infer time(ms): ' + str(total_infer_time / 1000 / 1000) + '\n' + \ - 'average pure infer time(ms): ' + str(total_infer_time / total_infer_num / 1000 / 1000) + '\n' + \ - 'average pure infer time after workspace(ms): ' + str(abs( - total_infer_time_workspace / (total_infer_num - opt.workspace) / 1000 / 1000)) + '\n' - print(msg) - with open(os.path.join(opt.infer_res_save_path, 'infer_time.txt'), 'wt', encoding='utf-8') as f_infer_time: - f_infer_time.write(msg) +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + +import acl +from acl_net import AclModel + +import os +import shutil +import argparse +import numpy as np +from tqdm import tqdm + +DTYPE = { + 'float16': np.float16, + 'float32': np.float32, + 'float64': np.float64, + 'int16': np.int16, + 'int32': np.int32, + 'int64': np.int64, + 'uint8': np.uint8, + 'uint16': np.uint16, + 'uint32': np.uint32, + 'uint64': np.uint64 +} + +if __name__ == '__main__': + # 参数解析 + parser = argparse.ArgumentParser() + parser.add_argument('--model_path', required=True) + parser.add_argument('--device_id', required=True, type=int) + parser.add_argument('--cpu_run', required=True, choices=['True', 'False']) + parser.add_argument('--sync_infer', required=True, choices=['True', 'False']) + parser.add_argument('--workspace', required=True, type=int) + parser.add_argument('--input_info_file_path', required=True) + parser.add_argument('--input_dtypes', required=True) + parser.add_argument('--infer_res_save_path', required=True) + parser.add_argument('--res_save_type', required=True, choices=['bin', 'npy']) + opt = parser.parse_args() + + # 创建模型 + measurements = {} + om_model = AclModel(device_id=opt.device_id, + model_path=opt.model_path, + sync_infer=eval(opt.sync_infer), + measurements=measurements, + key='per_infer_time_ns', + cpu_run=eval(opt.cpu_run)) + + # 创建目录 + if os.path.exists(opt.infer_res_save_path): + shutil.rmtree(opt.infer_res_save_path) + os.makedirs(opt.infer_res_save_path) + + # 读取info_file + inputs_info = {} + with open(opt.input_info_file_path, 'rt', encoding='utf-8') as f_info: + line = f_info.readline() + while line: + line = line.rstrip('\n') + contents = line.split() + info = {'path': contents[1], 'shape': eval(contents[2])} + inputs_info.setdefault(contents[0], []).append(info) + line = f_info.readline() + + # 解析输入类型 + input_dtypes = opt.input_dtypes.split(',') + input_dtypes = list(map(lambda x: DTYPE[x], input_dtypes)) + + # 读取文件推理 + total_infer_time = 0 + total_infer_time_workspace = 0 + total_infer_num = 0 + for key, values in tqdm(inputs_info.items()): + # 构造输入 + inputs = [] + dims = [] + for idx, value in enumerate(values): + x = np.fromfile(value['path'], dtype=input_dtypes[idx]).reshape(value['shape']) + inputs.append(x) + dims.extend(value['shape']) + dims_info = {'dimCount': len(dims), 'name': '', 'dims': dims} + + # 推理得到输出 + output = om_model(inputs, dims_info) + total_infer_num += 1 + + # 保存文件 + if opt.res_save_type == 'bin': + for idx, data in enumerate(output): + data.tofile(os.path.join(opt.infer_res_save_path, key + '.' + str(idx) + '.bin')) + else: + for idx, data in enumerate(output): + np.save(os.path.join(opt.infer_res_save_path, key + '.' + str(idx) + '.npy'), data) + + # 计算时间 + total_infer_time += measurements['per_infer_time_ns'] + if total_infer_num > opt.workspace: + total_infer_time_workspace += measurements['per_infer_time_ns'] + + # 推理时间 + print('[INFO] Infer time:') + msg = 'total infer num: ' + str(total_infer_num) + '\n' + \ + 'total pure infer time(ms): ' + str(total_infer_time / 1000 / 1000) + '\n' + \ + 'average pure infer time(ms): ' + str(total_infer_time / total_infer_num / 1000 / 1000) + '\n' + \ + 'average pure infer time after workspace(ms): ' + str(abs( + total_infer_time_workspace / (total_infer_num - opt.workspace) / 1000 / 1000)) + '\n' + print(msg) + with open(os.path.join(opt.infer_res_save_path, 'infer_time.txt'), 'wt', encoding='utf-8') as f_infer_time: + f_infer_time.write(msg) diff --git a/ACL_PyTorch/built-in/nlp/textcnn/License b/ACL_PyTorch/built-in/nlp/textcnn/License index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/built-in/nlp/textcnn/License +++ b/ACL_PyTorch/built-in/nlp/textcnn/License @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/built-in/nlp/textcnn/README.md b/ACL_PyTorch/built-in/nlp/textcnn/README.md index 0d1a1be488841150f7e7f32d8b5d0f8d9c547e29..19f9da537e45ba6c660681a314620e7b3d2a0d03 100644 --- a/ACL_PyTorch/built-in/nlp/textcnn/README.md +++ b/ACL_PyTorch/built-in/nlp/textcnn/README.md @@ -1,62 +1,62 @@ -# 用于Textcnn模型离线推理指导 -## 1 获取开源代码 - -``` -https://gitee.com/zhang_kaiqi/ascend-textcnn.git -cd ascend-textcnn -git checkout 7cd94c509dc3f615a5d8f4b3816e43ad837a649e -cd - -git clone https://gitee.com/Ronnie_zheng/MagicONNX.git -cd MagicONNX && git checkout 8d62ae9dde478f35bece4b3d04eef573448411c9 -cd - -``` -## 2 文件放置 -把TextCNN*脚本和gen_dataset_info.py脚本放到ascend_textcnn文件夹里;把*.sh脚本和fit_onnx.py放在ascend_textcnn的平行文件夹 - -## 3 模型推理 -1. 前处理 - -``` -cd ascend_textcnn -python3 TextCNN_preprocess.py --save_folder bin -python3 gen_dataset_info.py bin info -``` - -2. 转onnx - -获取[TextCNN_9045_seed460473.pth](https://gitee.com/hex5b25/ascend-textcnn/raw/master/Chinese-Text-Classification-Pytorch/THUCNews/saved_dict/TextCNN_9045_seed460473.pth) - -``` -python3 TextCNN_pth2onnx.py --weight_path ./TextCNN_9045_seed460473.pth --onnx_path ./dy_textcnn.onnx -``` - -3. 转om - -``` -cd .. -bash onnxsim.sh -bash onnx2mgonnx.sh -bash onnx2om.sh -``` - -4. 后处理得到精度 - -精度结果保存在result_bs*.json中。*代表具体的batch_size值(从4开始) - -``` -./benchmark.x86_64 -batch_size=* -om_path=mg_om_dir/textcnn_*bs_mg.om -output_binary=True -input_text_path=ascend-textcnn/info -useDvpp=False -model_type=nlp -python3 ascend-textcnn/TextCNN_postprocess.py result/dumpOutput_device0 >result_bs*.json -``` -5. 性能数据 - -推理结果打屏显示 - -``` -./msame --model mg_om_dir/trextcnn_*bs_mg.om --loop 100 -``` - -## 3 自验 -| 模型 | 官网精度 | 710离线推理精度 | 710性能 | -|--------------|--------|-----------|-------| -| Textcnn 64bs | [91.22%](https://gitee.com/huangyd8/Chinese-Text-Classification-Pytorch) | 90.47% | 27242.83 | - +# 用于Textcnn模型离线推理指导 +## 1 获取开源代码 + +``` +https://gitee.com/zhang_kaiqi/ascend-textcnn.git +cd ascend-textcnn +git checkout 7cd94c509dc3f615a5d8f4b3816e43ad837a649e +cd - +git clone https://gitee.com/Ronnie_zheng/MagicONNX.git +cd MagicONNX && git checkout 8d62ae9dde478f35bece4b3d04eef573448411c9 +cd - +``` +## 2 文件放置 +把TextCNN*脚本和gen_dataset_info.py脚本放到ascend_textcnn文件夹里;把*.sh脚本和fit_onnx.py放在ascend_textcnn的平行文件夹 + +## 3 模型推理 +1. 前处理 + +``` +cd ascend_textcnn +python3 TextCNN_preprocess.py --save_folder bin +python3 gen_dataset_info.py bin info +``` + +2. 转onnx + +获取[TextCNN_9045_seed460473.pth](https://gitee.com/hex5b25/ascend-textcnn/raw/master/Chinese-Text-Classification-Pytorch/THUCNews/saved_dict/TextCNN_9045_seed460473.pth) + +``` +python3 TextCNN_pth2onnx.py --weight_path ./TextCNN_9045_seed460473.pth --onnx_path ./dy_textcnn.onnx +``` + +3. 转om + +``` +cd .. +bash onnxsim.sh +bash onnx2mgonnx.sh +bash onnx2om.sh +``` + +4. 后处理得到精度 + +精度结果保存在result_bs*.json中。*代表具体的batch_size值(从4开始) + +``` +./benchmark.x86_64 -batch_size=* -om_path=mg_om_dir/textcnn_*bs_mg.om -output_binary=True -input_text_path=ascend-textcnn/info -useDvpp=False -model_type=nlp +python3 ascend-textcnn/TextCNN_postprocess.py result/dumpOutput_device0 >result_bs*.json +``` +5. 性能数据 + +推理结果打屏显示 + +``` +./msame --model mg_om_dir/trextcnn_*bs_mg.om --loop 100 +``` + +## 3 自验 +| 模型 | 官网精度 | 710离线推理精度 | 710性能 | +|--------------|--------|-----------|-------| +| Textcnn 64bs | [91.22%](https://gitee.com/huangyd8/Chinese-Text-Classification-Pytorch) | 90.47% | 27242.83 | + diff --git a/ACL_PyTorch/built-in/nlp/textcnn/TextCNN_postprocess.py b/ACL_PyTorch/built-in/nlp/textcnn/TextCNN_postprocess.py index 39e8d8db7b33b57f71b523493ae217993bee7824..2c926fa8f1dd94a108d94c0af4f7709f7f6995a0 100644 --- a/ACL_PyTorch/built-in/nlp/textcnn/TextCNN_postprocess.py +++ b/ACL_PyTorch/built-in/nlp/textcnn/TextCNN_postprocess.py @@ -1,29 +1,29 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import struct -import numpy as np -import sys -result_root = sys.argv[1] -correct, total = 0, 0 - -for result_path in os.listdir(result_root): - label = int(result_path.split('.')[0].split('_')[1]) - - data_raw = np.fromfile(os.path.join(result_root, result_path), dtype=np.float16) - result = int(np.argmax(data_raw)) - total += 1 - correct += 1 if label == result else 0 -print('acc: ', correct/total) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import struct +import numpy as np +import sys +result_root = sys.argv[1] +correct, total = 0, 0 + +for result_path in os.listdir(result_root): + label = int(result_path.split('.')[0].split('_')[1]) + + data_raw = np.fromfile(os.path.join(result_root, result_path), dtype=np.float16) + result = int(np.argmax(data_raw)) + total += 1 + correct += 1 if label == result else 0 +print('acc: ', correct/total) diff --git a/ACL_PyTorch/built-in/nlp/textcnn/TextCNN_preprocess.py b/ACL_PyTorch/built-in/nlp/textcnn/TextCNN_preprocess.py index 525a57acca9c16d46fe49f58e6920df5dad30f95..a3c6c7daa100c0846c564ff97915c0fb1a9f6241 100644 --- a/ACL_PyTorch/built-in/nlp/textcnn/TextCNN_preprocess.py +++ b/ACL_PyTorch/built-in/nlp/textcnn/TextCNN_preprocess.py @@ -1,108 +1,108 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: UTF-8 - -import os -import argparse -import pickle as pkl -import numpy as np - - -parser = argparse.ArgumentParser(description='Chinese Text Classification') -parser.add_argument('--embedding', default='pre_trained', type=str, help='random or pre_trained') -parser.add_argument('--word', default=False, type=bool, help='True for word, False for char') -parser.add_argument('--vocab_path', type=str, default='data/vocab.pkl') -parser.add_argument('--dataset', type=str, default='./Chinese-Text-Classification-Pytorch/THUCNews') -parser.add_argument('--pad_size', type=int, default=32) -parser.add_argument('--train_path', type=str, default='data/train.txt') -parser.add_argument('--test_path', type=str, default='data/test.txt') -parser.add_argument('--save_folder', type=str, default='') -args = parser.parse_args() - -args.test_path = os.path.join(args.dataset, args.test_path) -args.train_path = os.path.join(args.dataset, args.train_path) -args.vocab_path = os.path.join(args.dataset, args.vocab_path) -if args.save_folder == '': - args.save_folder = args.dataset + '_bin' -if not os.path.exists(args.save_folder): - os.mkdir(args.save_folder) - -MAX_VOCAB_SIZE = 10000 # 词表长度限制 -UNK, PAD = '', '' # 未知字,padding符号 - - -def build_vocab(file_path, tokenizer_, max_size, min_freq): - vocab_dic = {} - with open(file_path, 'r', encoding='UTF-8') as f_: - for line_ in f_: - lin = line_.strip() - if not lin: - continue - content_ = lin.split('\t')[0] - for word_ in tokenizer_(content_): - vocab_dic[word_] = vocab_dic.get(word_, 0) + 1 - vocab_list = sorted([_ for _ in vocab_dic.items() if _[1] >= min_freq], key=lambda x: x[1], reverse=True) - vocab_list = vocab_list[:max_size] - vocab_dic = {word_count[0]: idx for idx, word_count in enumerate(vocab_list)} - vocab_dic.update({UNK: len(vocab_dic), PAD: len(vocab_dic) + 1}) - return vocab_dic - - -if __name__ == '__main__': - - """ - Usage: - python preprocess_to_bin.py - """ - - if args.word: - tokenizer = lambda x: x.split(' ') # 以空格隔开,word-level - else: - tokenizer = lambda x: [y for y in x] # char-level - if os.path.exists(args.vocab_path): - vocab = pkl.load(open(args.vocab_path, 'rb')) - else: - assert args.train_path != '' - vocab = build_vocab(args.train_path, tokenizer_=tokenizer, max_size=MAX_VOCAB_SIZE, min_freq=1) - pkl.dump(vocab, open(args.vocab_path, 'wb+')) - print(f"Vocab size: {len(vocab)}") - print('bin file save path: ', os.path.abspath(args.save_folder)) - - contents = [] - f = open(args.test_path, 'r', encoding='UTF-8') - idx = 0 - for line in f: - lin = line.strip() - if not lin: - continue - content, label = lin.split('\t') - words_line = [] - token = tokenizer(content) - if args.pad_size: - if len(token) < args.pad_size: - token.extend([PAD] * (args.pad_size - len(token))) - else: - token = token[:args.pad_size] - # word to id - for word in token: - words_line.append(vocab.get(word, vocab.get(UNK))) - - # convert to bin - words_line_np = np.asarray(words_line, dtype=np.int64) - bin_file_path = os.path.join(args.save_folder, '{}_{}.bin'.format(idx, label)) - words_line_np.tofile(bin_file_path) - idx += 1 - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: UTF-8 + +import os +import argparse +import pickle as pkl +import numpy as np + + +parser = argparse.ArgumentParser(description='Chinese Text Classification') +parser.add_argument('--embedding', default='pre_trained', type=str, help='random or pre_trained') +parser.add_argument('--word', default=False, type=bool, help='True for word, False for char') +parser.add_argument('--vocab_path', type=str, default='data/vocab.pkl') +parser.add_argument('--dataset', type=str, default='./Chinese-Text-Classification-Pytorch/THUCNews') +parser.add_argument('--pad_size', type=int, default=32) +parser.add_argument('--train_path', type=str, default='data/train.txt') +parser.add_argument('--test_path', type=str, default='data/test.txt') +parser.add_argument('--save_folder', type=str, default='') +args = parser.parse_args() + +args.test_path = os.path.join(args.dataset, args.test_path) +args.train_path = os.path.join(args.dataset, args.train_path) +args.vocab_path = os.path.join(args.dataset, args.vocab_path) +if args.save_folder == '': + args.save_folder = args.dataset + '_bin' +if not os.path.exists(args.save_folder): + os.mkdir(args.save_folder) + +MAX_VOCAB_SIZE = 10000 # 词表长度限制 +UNK, PAD = '', '' # 未知字,padding符号 + + +def build_vocab(file_path, tokenizer_, max_size, min_freq): + vocab_dic = {} + with open(file_path, 'r', encoding='UTF-8') as f_: + for line_ in f_: + lin = line_.strip() + if not lin: + continue + content_ = lin.split('\t')[0] + for word_ in tokenizer_(content_): + vocab_dic[word_] = vocab_dic.get(word_, 0) + 1 + vocab_list = sorted([_ for _ in vocab_dic.items() if _[1] >= min_freq], key=lambda x: x[1], reverse=True) + vocab_list = vocab_list[:max_size] + vocab_dic = {word_count[0]: idx for idx, word_count in enumerate(vocab_list)} + vocab_dic.update({UNK: len(vocab_dic), PAD: len(vocab_dic) + 1}) + return vocab_dic + + +if __name__ == '__main__': + + """ + Usage: + python preprocess_to_bin.py + """ + + if args.word: + tokenizer = lambda x: x.split(' ') # 以空格隔开,word-level + else: + tokenizer = lambda x: [y for y in x] # char-level + if os.path.exists(args.vocab_path): + vocab = pkl.load(open(args.vocab_path, 'rb')) + else: + assert args.train_path != '' + vocab = build_vocab(args.train_path, tokenizer_=tokenizer, max_size=MAX_VOCAB_SIZE, min_freq=1) + pkl.dump(vocab, open(args.vocab_path, 'wb+')) + print(f"Vocab size: {len(vocab)}") + print('bin file save path: ', os.path.abspath(args.save_folder)) + + contents = [] + f = open(args.test_path, 'r', encoding='UTF-8') + idx = 0 + for line in f: + lin = line.strip() + if not lin: + continue + content, label = lin.split('\t') + words_line = [] + token = tokenizer(content) + if args.pad_size: + if len(token) < args.pad_size: + token.extend([PAD] * (args.pad_size - len(token))) + else: + token = token[:args.pad_size] + # word to id + for word in token: + words_line.append(vocab.get(word, vocab.get(UNK))) + + # convert to bin + words_line_np = np.asarray(words_line, dtype=np.int64) + bin_file_path = os.path.join(args.save_folder, '{}_{}.bin'.format(idx, label)) + words_line_np.tofile(bin_file_path) + idx += 1 + f.close() \ No newline at end of file diff --git a/ACL_PyTorch/built-in/nlp/textcnn/TextCNN_pth2onnx.py b/ACL_PyTorch/built-in/nlp/textcnn/TextCNN_pth2onnx.py index 314d0090cf19dc9c1cdfbf0ae4bb11e0331cb27b..f121879a6284f477e9d2f70be7779a895d6e460b 100644 --- a/ACL_PyTorch/built-in/nlp/textcnn/TextCNN_pth2onnx.py +++ b/ACL_PyTorch/built-in/nlp/textcnn/TextCNN_pth2onnx.py @@ -1,57 +1,57 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import argparse -import pickle as pkl -import torch - -sys.path.append(r'./Chinese-Text-Classification-Pytorch') -from models import TextCNN - -parser = argparse.ArgumentParser(description='TextCNN_pth2onnx.py') -parser.add_argument('--weight_path', required=True, help='Path to model weight file, abs path recommended.') -parser.add_argument('--dataset', default='./Chinese-Text-Classification-Pytorch/THUCNews', - help="""Dataset path, train: $dataset/data/train.txt, dev: $dataset/data/dev.txt, \n - test: $dataset/data/text.txt, classes list: $dataset/data/class.txt, \n - vocab: $dataset/data/vocab.pkl, embedding file should be in $dataset/data/""") -parser.add_argument('--embedding', default='embedding_SougouNews.npz', - help="embedding file of $dataset/data/") -parser.add_argument('--onnx_path', required=True, help='Path to save onnx weights.') -args = parser.parse_args() - - -def main(): - config = TextCNN.Config(args.dataset, args.embedding) - vocab = pkl.load(open(config.vocab_path, 'rb')) - config.n_vocab = len(vocab) - - model = TextCNN.Model(config) - model.load_state_dict(torch.load(args.weight_path, map_location=torch.device('cpu'))) - model.eval() - input_names = ['sentence'] - output_names = ['class'] - dynamic_axes = {'sentence': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randint(100, (1, 32)) - torch.onnx.export(model, dummy_input, args.onnx_path, input_names=input_names, verbose=True, - output_names=output_names, dynamic_axes=dynamic_axes, opset_version=11) - -if __name__ == '__main__': - """ - Usage Example: - python TextCNN_pth2onnx.py \ - --weight_path ./TextCNN_9045_seed460473.pth \ - --onnx_path ./dy_textcnn.onnx - """ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import argparse +import pickle as pkl +import torch + +sys.path.append(r'./Chinese-Text-Classification-Pytorch') +from models import TextCNN + +parser = argparse.ArgumentParser(description='TextCNN_pth2onnx.py') +parser.add_argument('--weight_path', required=True, help='Path to model weight file, abs path recommended.') +parser.add_argument('--dataset', default='./Chinese-Text-Classification-Pytorch/THUCNews', + help="""Dataset path, train: $dataset/data/train.txt, dev: $dataset/data/dev.txt, \n + test: $dataset/data/text.txt, classes list: $dataset/data/class.txt, \n + vocab: $dataset/data/vocab.pkl, embedding file should be in $dataset/data/""") +parser.add_argument('--embedding', default='embedding_SougouNews.npz', + help="embedding file of $dataset/data/") +parser.add_argument('--onnx_path', required=True, help='Path to save onnx weights.') +args = parser.parse_args() + + +def main(): + config = TextCNN.Config(args.dataset, args.embedding) + vocab = pkl.load(open(config.vocab_path, 'rb')) + config.n_vocab = len(vocab) + + model = TextCNN.Model(config) + model.load_state_dict(torch.load(args.weight_path, map_location=torch.device('cpu'))) + model.eval() + input_names = ['sentence'] + output_names = ['class'] + dynamic_axes = {'sentence': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randint(100, (1, 32)) + torch.onnx.export(model, dummy_input, args.onnx_path, input_names=input_names, verbose=True, + output_names=output_names, dynamic_axes=dynamic_axes, opset_version=11) + +if __name__ == '__main__': + """ + Usage Example: + python TextCNN_pth2onnx.py \ + --weight_path ./TextCNN_9045_seed460473.pth \ + --onnx_path ./dy_textcnn.onnx + """ main() \ No newline at end of file diff --git a/ACL_PyTorch/built-in/nlp/textcnn/env.sh b/ACL_PyTorch/built-in/nlp/textcnn/env.sh index 0ee18a645237140bba9cb37d8083c2bc6256eeaf..a39fd265bf8056f1c34eac9340580563ae99b199 100644 --- a/ACL_PyTorch/built-in/nlp/textcnn/env.sh +++ b/ACL_PyTorch/built-in/nlp/textcnn/env.sh @@ -1,8 +1,8 @@ -#! /bin/bash - -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -export ASCEND_AICPU_PATH=${install_path} +#! /bin/bash + +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +export ASCEND_AICPU_PATH=${install_path} diff --git a/ACL_PyTorch/built-in/nlp/textcnn/fix_onnx.py b/ACL_PyTorch/built-in/nlp/textcnn/fix_onnx.py index 330b61a21c91a59514fd37dce7bcd3c1b3ef6384..a9ac8a63b54c3a40b3d7936dff9f69ae836d37d5 100644 --- a/ACL_PyTorch/built-in/nlp/textcnn/fix_onnx.py +++ b/ACL_PyTorch/built-in/nlp/textcnn/fix_onnx.py @@ -1,65 +1,65 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -from copy import deepcopy -import onnx -from onnx import (helper, TensorProto) -from onnx.onnx_ml_pb2 import ModelProto -from magiconnx import OnnxGraph -import numpy as np - - -batch_size = sys.argv[1] - -graph = OnnxGraph(f'onnx_sim_dir/textcnn_{batch_size}bs_sim.onnx') - - - - -graph.del_node('Squeeze_5',{0:0},auto_connection=True) -graph.del_node('Squeeze_11',{0:0},auto_connection=True) -graph.del_node('Squeeze_17',{0:0},auto_connection=True) - -Maxpool_1 = graph.add_node('maxpool_1', - 'MaxPool', - {'ceil_mode': 0, 'kernel_shape': [31,1], 'pads': 0, 'strides':[31,1]}) -graph['MaxPool_6'] = Maxpool_1 - - -Maxpool_2 = graph.add_node('maxpool_2', - 'MaxPool', - {'ceil_mode': 0, 'kernel_shape': [30,1], 'pads': 0, 'strides':[30,1]}) -graph['MaxPool_12'] = Maxpool_2 - - -Maxpool_3 = graph.add_node('maxpool_3', - 'MaxPool', - {'ceil_mode': 0, 'kernel_shape': [29,1], 'pads': 0, 'strides':[29,1]}) -graph['MaxPool_18'] = Maxpool_3 - -graph.del_node('Squeeze_7',{0:0},auto_connection=True) -graph.del_node('Squeeze_13',{0:1},auto_connection=True) -graph.del_node('Squeeze_19',{0:2},auto_connection=True) - - - - -squeeze = graph.add_node('squeeze_1', - 'Squeeze', - {'axis': [2,3]}) -graph.insert_node('Gemm_21', squeeze, mode='before') - - -graph.save(f'mg_onnx_dir/textcnn_{batch_size}bs_mg.onnx') +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from copy import deepcopy +import onnx +from onnx import (helper, TensorProto) +from onnx.onnx_ml_pb2 import ModelProto +from magiconnx import OnnxGraph +import numpy as np + + +batch_size = sys.argv[1] + +graph = OnnxGraph(f'onnx_sim_dir/textcnn_{batch_size}bs_sim.onnx') + + + + +graph.del_node('Squeeze_5',{0:0},auto_connection=True) +graph.del_node('Squeeze_11',{0:0},auto_connection=True) +graph.del_node('Squeeze_17',{0:0},auto_connection=True) + +Maxpool_1 = graph.add_node('maxpool_1', + 'MaxPool', + {'ceil_mode': 0, 'kernel_shape': [31,1], 'pads': 0, 'strides':[31,1]}) +graph['MaxPool_6'] = Maxpool_1 + + +Maxpool_2 = graph.add_node('maxpool_2', + 'MaxPool', + {'ceil_mode': 0, 'kernel_shape': [30,1], 'pads': 0, 'strides':[30,1]}) +graph['MaxPool_12'] = Maxpool_2 + + +Maxpool_3 = graph.add_node('maxpool_3', + 'MaxPool', + {'ceil_mode': 0, 'kernel_shape': [29,1], 'pads': 0, 'strides':[29,1]}) +graph['MaxPool_18'] = Maxpool_3 + +graph.del_node('Squeeze_7',{0:0},auto_connection=True) +graph.del_node('Squeeze_13',{0:1},auto_connection=True) +graph.del_node('Squeeze_19',{0:2},auto_connection=True) + + + + +squeeze = graph.add_node('squeeze_1', + 'Squeeze', + {'axis': [2,3]}) +graph.insert_node('Gemm_21', squeeze, mode='before') + + +graph.save(f'mg_onnx_dir/textcnn_{batch_size}bs_mg.onnx') diff --git a/ACL_PyTorch/built-in/nlp/textcnn/gen_dataset_info.py b/ACL_PyTorch/built-in/nlp/textcnn/gen_dataset_info.py index 296ac9bd0a7b79d1dcae0d391562cc0504cdcf31..459cea2154e13175ebd70a1d0ff6e55344a76626 100644 --- a/ACL_PyTorch/built-in/nlp/textcnn/gen_dataset_info.py +++ b/ACL_PyTorch/built-in/nlp/textcnn/gen_dataset_info.py @@ -1,26 +1,26 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from glob import glob -import os -import sys - -dataset_path = sys.argv[1] -info_path = sys.argv[2] - -bin_texts = glob(os.path.join(dataset_path, '*.bin')) -with open(info_path, 'w+') as f: - for index, texts in enumerate(bin_texts): - content = str(index) + ' ' + str(texts) + '\n' - f.write(content) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from glob import glob +import os +import sys + +dataset_path = sys.argv[1] +info_path = sys.argv[2] + +bin_texts = glob(os.path.join(dataset_path, '*.bin')) +with open(info_path, 'w+') as f: + for index, texts in enumerate(bin_texts): + content = str(index) + ' ' + str(texts) + '\n' + f.write(content) diff --git a/ACL_PyTorch/built-in/nlp/textcnn/modelzoo_level.txt b/ACL_PyTorch/built-in/nlp/textcnn/modelzoo_level.txt index eb7302d66b9f6f96acc9eef6f133455b35669a8d..f866572922453df1c80a3fb3a2a870ace374c835 100644 --- a/ACL_PyTorch/built-in/nlp/textcnn/modelzoo_level.txt +++ b/ACL_PyTorch/built-in/nlp/textcnn/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:NOK \ No newline at end of file diff --git a/ACL_PyTorch/built-in/nlp/textcnn/onnx2mgonnx.sh b/ACL_PyTorch/built-in/nlp/textcnn/onnx2mgonnx.sh index 006370d607eb7c87ffc0a4fa6a6c00e97ffcf5d3..a0d93e0783859b47655d7a43a60402e4d058e7dc 100644 --- a/ACL_PyTorch/built-in/nlp/textcnn/onnx2mgonnx.sh +++ b/ACL_PyTorch/built-in/nlp/textcnn/onnx2mgonnx.sh @@ -1,11 +1,11 @@ -#!/bin/bash - -if [ ! -d "./mg_onnx_dir" ] -then - mkdir ./mg_onnx_dir -fi - -for i in 4 8 16 32 64 -do - python3 ./fix_onnx.py ${i} -done +#!/bin/bash + +if [ ! -d "./mg_onnx_dir" ] +then + mkdir ./mg_onnx_dir +fi + +for i in 4 8 16 32 64 +do + python3 ./fix_onnx.py ${i} +done diff --git a/ACL_PyTorch/built-in/nlp/textcnn/onnx2om.sh b/ACL_PyTorch/built-in/nlp/textcnn/onnx2om.sh index bc9eefcb40734b206e80d67e57da967558fdadc8..7786db7940733bffdf290d122d81aeb4a50df325 100644 --- a/ACL_PyTorch/built-in/nlp/textcnn/onnx2om.sh +++ b/ACL_PyTorch/built-in/nlp/textcnn/onnx2om.sh @@ -1,11 +1,11 @@ -#!/bin/bash - -if [ ! -d "./mg_om_dir" ] -then - mkdir ./mg_om_dir -fi - -for i in 4 8 16 32 64 -do - atc --model=mg_onnx_dir/textcnn_${i}bs_mg.onnx --framework=5 --output=mg_om_dir/textcnn_${i}bs_mg --output_type=FP16 --soc_version=Ascend710 --enable_small_channel=1 -done +#!/bin/bash + +if [ ! -d "./mg_om_dir" ] +then + mkdir ./mg_om_dir +fi + +for i in 4 8 16 32 64 +do + atc --model=mg_onnx_dir/textcnn_${i}bs_mg.onnx --framework=5 --output=mg_om_dir/textcnn_${i}bs_mg --output_type=FP16 --soc_version=Ascend710 --enable_small_channel=1 +done diff --git a/ACL_PyTorch/built-in/nlp/textcnn/onnxsim.sh b/ACL_PyTorch/built-in/nlp/textcnn/onnxsim.sh index ac659a92af86e63545c36ba45edb497100151c6b..11287a55b33749a960f4ffda3016c9a36829ef9f 100644 --- a/ACL_PyTorch/built-in/nlp/textcnn/onnxsim.sh +++ b/ACL_PyTorch/built-in/nlp/textcnn/onnxsim.sh @@ -1,11 +1,11 @@ -#!/bin/bash - -if [ ! -d "./onnx_sim_dir" ] -then - mkdir ./onnx_sim_dir -fi - -for i in 1 4 8 16 32 64 -do - python3 -m onnxsim --input-shape="sentence:${i},32" ./ascend_textcnn/dy_textcnn.onnx ./onnx_sim_dir/textcnn_${i}bs_sim.onnx -done +#!/bin/bash + +if [ ! -d "./onnx_sim_dir" ] +then + mkdir ./onnx_sim_dir +fi + +for i in 1 4 8 16 32 64 +do + python3 -m onnxsim --input-shape="sentence:${i},32" ./ascend_textcnn/dy_textcnn.onnx ./onnx_sim_dir/textcnn_${i}bs_sim.onnx +done diff --git a/ACL_PyTorch/built-in/nlp/textcnn/requirements.txt b/ACL_PyTorch/built-in/nlp/textcnn/requirements.txt index fc5acc7efa792f8ddea52f2d606b3be4c5285b29..51fc9b2831e0f2a5e1fd178fbdd58481a07492a7 100644 --- a/ACL_PyTorch/built-in/nlp/textcnn/requirements.txt +++ b/ACL_PyTorch/built-in/nlp/textcnn/requirements.txt @@ -1,5 +1,5 @@ -numpy==1.19.2 -onnx==1.10.1 -Pillow==8.3.1 -torch==1.5.0 -torchvision==0.6.0 +numpy==1.19.2 +onnx==1.10.1 +Pillow==8.3.1 +torch==1.5.0 +torchvision==0.6.0 diff --git a/ACL_PyTorch/contrib/CONTRIBUTING.md b/ACL_PyTorch/contrib/CONTRIBUTING.md index 886fae1007aa07d5711d863a1941c4cd7356bdb9..dc9d11e8ea435028f1e4dd85543104c7547b7469 100644 --- a/ACL_PyTorch/contrib/CONTRIBUTING.md +++ b/ACL_PyTorch/contrib/CONTRIBUTING.md @@ -1,314 +1,314 @@ - **介绍** - -Ascend ModelZoo,欢迎各位开发者 - - **贡献要求** - -开发者提交的模型包括源码、readme、参考模型license文件、测试用例和readme,并遵循以下标准 - -请贡献者在提交代码之前签署CLA协议,“个人签署”,[链接](https://clasign.osinfra.cn/sign/Z2l0ZWUlMkZhc2NlbmQ=) - -如您完成签署,可在自己提交的PR评论区输入/check-cla进行核实校验 - - **一、源码** - -1、训练及在线推理请使用python代码实现,Ascend平台离线推理请使用C++或python代码,符合第四部分编码规范 - -2、参考[sample](https://gitee.com/ascend/modelzoo/tree/master/built-in/TensorFlow/Official/nlp/Transformer_for_TensorFlow) - -3、贡献者模型代码目录规则:"modelzoo/contrib/框架/Research/应用领域(nlp、cv、audio等)/网络名_IDxxx_for_TensorFlow"(以tf为例,社区管理团队会在贡献完成进行整合) - -4、从其他开源迁移的代码,请增加License声明 - - **二、License规则** - -* TensorFlow - - 迁移场景 - - 1、迁移TensorFlow模型中若源项目已包含License文件则必须拷贝引用,否则在模型顶层目录下添加TensorFlow Apache 2.0 License [TensorFlow License链接](https://github.com/tensorflow/tensorflow/blob/master/LICENSE) - - 2、迁移TensorFlow框架开发的模型,需要在模型目录下每个源文件附上源社区TensorFlow Apache 2.0 License头部声明,并在其下追加新增完整华为公司License声明 - - ``` - # Copyright 2017 The TensorFlow Authors. All Rights Reserved. - # - # Licensed under the Apache License, Version 2.0 (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - # ============================================================================ - # Copyright 2021 Huawei Technologies Co., Ltd - # - # Licensed under the Apache License, Version 2.0 (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - ``` - 开发场景 - - 1、基于TensorFlow框架开发模型,需在模型项目顶层目录下添加TensorFlow Apache 2.0 License [TensorFlow License链接](https://github.com/tensorflow/tensorflow/blob/master/LICENSE) - - 2、基于TensorFlow框架开发模型,需要在模型目录下每个源文件附上源社区华为公司Apache 2.0 License头部声明 - ``` - # Copyright 2021 Huawei Technologies Co., Ltd - # - # Licensed under the Apache License, Version 2.0 (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - ``` -* PyTorch - - 迁移场景 - - 1、迁移PyTorch模型中若源项目录已包含PyTorch License文件则必须拷贝引用,否则在模型顶层目录下添加PyTorch BSD-3 License [PyTorch License链接](https://github.com/pytorch/examples/blob/master/LICENSE) - - 2、迁移PyTorch第三方框架开发的模型,需要在模型目录下每个源文件附上源社区PyTorch BSD-3 License头部声明,并在其下追加新增一行华为公司License声明 - ``` - # BSD 3-Clause License - # - # Copyright (c) 2017 xxxx - # All rights reserved. - # Copyright 2021 Huawei Technologies Co., Ltd - # - # Redistribution and use in source and binary forms, with or without - # modification, are permitted provided that the following conditions are met: - # - # * Redistributions of source code must retain the above copyright notice, this - # list of conditions and the following disclaimer. - # - # * Redistributions in binary form must reproduce the above copyright notice, - # this list of conditions and the following disclaimer in the documentation - # and/or other materials provided with the distribution. - # - # * Neither the name of the copyright holder nor the names of its - # contributors may be used to endorse or promote products derived from - # this software without specific prior written permission. - # - # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - # ============================================================================ - ``` - - 开发场景 - - 1、基于PyTorch框架开发模型,需在模型项目下添加PyTorch BSD-3 License [PyTorch License链接](https://github.com/pytorch/examples/blob/master/LICENSE) - - 2、基于PyTorch框架开发模型,需要在模型目录下每个源文件附上源社区华为公司Apache 2.0 License头部声明 - ``` - # Copyright 2021 Huawei Technologies Co., Ltd - # - # Licensed under the BSD 3-Clause License (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # https://opensource.org/licenses/BSD-3-Clause - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - ``` - -* MindSpore/ACL - - 1、迁移或开发场景下MindSpore/ACL模型顶层目录下需要包含华为公司 License [华为公司 License链接](https://gitee.com/mindspore/mindspore/blob/master/LICENSE) - - 2、迁移或开发场景下MindSpore/ACL模型,需要在模型目录下每个源文件中添加区华为公司Apache 2.0 License头部声明 - ``` - # Copyright 2021 Huawei Technologies Co., Ltd - # - # Licensed under the Apache License, Version 2.0 (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - ``` - -> 关于License声明时间,应注意: 2021年新建的文件,应该是Copyright 2021 Huawei Technologies Co., Ltd 2020年创建年份,2020年修改年份,应该是Copyright 2020 Huawei Technologies Co., Ltd - - **三、readme** - -readme用于指导用户理解和部署样例,要包含如下内容: - -- 简介: - -1、模型的来源及原理; - -2、模型复现的步骤,含训练、eval、在线/离线推理等,入口请封装成`.sh`、`.py`; - -- 关键要求: - -1、模型的出处、对数据的要求、免责声明等,开源代码文件修改需要增加版权说明; - -2、模型转换得到的离线模型对输入数据的要求; - -3、环境变量设置,依赖的第三方软件包和库,以及安装方法; - -4、精度和性能达成要求:尽量达到原始模型水平; - -5、预训练checkpoint、结果checkpoint请提供归档OBS、网盘链接,如来自开源需明确来源地址 - -6、数据集说明 - -- [ ] 不允许直接提供数据集的下载链接,可使用词汇:用户自行准备好数据集,可选用“XXX”,“XXX”,“XXX” - - 例如:请用户自行准备好数据集,包含训练集和验证集两部分,可选用的数据集包括ImageNet2012,CIFAR10、Flower等,包含train和val两部分。 - -- [ ] 脚本中不允许提供链接下载数据集,如果开源脚本上存在对应的链接,请修改或者删除对应的脚本 - -训练ReadMe写作可参考下面两个链接: - -[Readme example1](https://gitee.com/ascend/modelzoo/blob/master/built-in/TensorFlow/Official/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/README.md) - -[Readme example2](https://www.hiascend.com/zh/software/modelzoo/detail/C/093ed0219cb14f068af33784c62cf7ec) - -离线推理ReadMe写作可参考下面链接: - -[Readme example1](https://gitee.com/ascend/modelzoo/tree/master/contrib/ACL_TensorFlow/Research/cv/AdvancedEAST_ID0130_for_ACL/README.md) - - **四、自测试用例** - -提供模型的自测试用例和readme,提交PR需要门禁及模型测试用例通过,性能和精度检查通过 - -- 简介: - -1、不同于完整的训练过程和全量数据集的推理,自测试用例的目的是验证提交代码基本功能可用,执行时长控制在10min之内(推理或训练只需执行有限的图片或step); - -2、提交PR中训练用例入口`train_testcase.sh`, 在线推理用例入口`online_inference_testcase.sh`, 离线推理用例入口`offline_inference_testcase.sh`; - -3、提交PR后,会自动触发门禁流水,后台会根据用例入口shell,自动将代码分发到对应执行环境; - -4、Jenkins预置账号:登录账号请联系华为工程师/接口人获取,登录之后,可以查看到用例执行日志 - -5、如果提交失败,请查看日志,修复代码或其他问题后,在你当前的PR中,评论“compile”即可重新触发用例执行 - -- 关键要求: - -1、自测试用例命名严格按照上述简介2要求来书写,否则门禁会校验失败; - -2、用例应当包含功能、精度(Loss值)、性能检查,检查通过打印"Run testcase success!",失败则打印"Run testcase failed!"; - -3、执行环境已预装软件包和Python3.7.5环境,调用命令"python3"、"python3.7"、"python3.7.5"均可,安装第三方库依赖使用"pip3"、"pip3.7"均可; - -4、数据集和模型:小于500M的文件,建议使用`obsutil`命令下载(已预装),过大的文件,建议提交Issue,注明数据集和下载地址,会提前下载到执行环境上, - -已预置数据集&python第三方库: [Environments](https://gitee.com/ascend/modelzoo/blob/master/contrib/ENVIRONMENTS.md) - -5、环境和其他问题,请提交Issue跟踪; - -6、测试用例开发参考: -- [训练](https://gitee.com/ascend/modelzoo/tree/master/built-in/TensorFlow/Official/nlp/Transformer_for_TensorFlow) -- [离线推理](https://gitee.com/alexcheng88/modelzoo/tree/master/contrib/TensorFlow/Research/cv/efficientnet-b8/ATC_efficientnet-b8_tf_nkxiaolei) - - **五、PR提交** - -- 关键要求: - -1、请将modelzoo仓fork到个人分支,基于个人分支新增、修改和提交PR; - -2、PR标题:线上活动,请在标题注明[线上贡献];高校活动,请注明[xxx学校][高校贡献]; - -3、built-in用户根据网络状态必须配置modelzoo_level.txt文件,且文件内容包含三个关键字段:FuncStatus(OK-流程通过/ **NOK-流程失败,不允许模型代码提交主仓** );PerfStatus(OK-持平GPU/POK-0.5倍GPU/NOK-小于0.5倍GPU/PERFECT-1.2倍GPU);PrecisionStatus(OK-精度达标,POK-Loss拟合但精度未实施, **NOK-Loss不拟合,不允许模型代码提交主仓** );内容格式如下所示(注:“:”两侧无需空格,英文格式;): - -``` - FuncStatus:OK/NOK - PerfStatus:PERFECT/OK/POK/NOK - PrecisionStatus:OK/POK/NOK -``` -4、contrib用户根据网络状态必须配置modelzoo_level.txt文件,且文件内容包含关键字段:GPUStatus(OK-GPU复现/NOK-GPU未复现); NPUMigrationStatus(OK-自动迁移成功/POK-自动迁移失败, 手写规避成功/NOK-均失败); FuncStatus(OK-基础功能打通/NOK-基础功能失败,不允许模型代码提交到master); PrecisionStatus(OK-精度达标/POK-Loss拟合但精度未完全达标/NOK-精度不达标, 不允许模型代码提交到master); AutoTune(OK-性能持平或高于GPU/POK-性能有提升但低于GPU/NOK-性能无提升或者功能失败); PerfStatus(训练:PERFECT-性能1.2倍GPU/OK-性能持平GPU/POK-性能0.5倍GPU/NOK-性能小于0.5倍GPU;推理:OK-4*310单卡>GPU/NOK-其它); ModelConvert:OK/NOK(仅推理, OK-om转换成功/NOK-om转换失败); QuantStatus:OK/NOK(仅推理, OK-精度损失1%以内,性能有提升/POK-性能有提升但未达标/NOK-量化失败); - -样例:modelzoo_level.txt文件 - ------仅限训练----- - - -``` -GPUStatus:OK/NOK -NPUMigrationStatus:OK/POK/NOK -``` - - ------仅限推理----- - -``` -ModelConvert:OK/POK/NOK -QuantStatus:OK/POK/NOK -``` - ------通用部分----- - -``` -FuncStatus:OK/NOK -PrecisionStatus:OK/POK/NOK -AutoTune:OK/POK/NOK -PerfStatus:PERFECT/OK/POK/NOK -``` -5、网络名称命名规范:*_for_框架,注:*代表任意内容,如网络名称或网络名称+网络ID; - - **六、编程规范** - -- 规范标准 - -1、C++代码遵循google编程规范:Google C++ Coding Guidelines;单元测测试遵循规范: Googletest Primer。 - -2、Python代码遵循PEP8规范:Python PEP 8 Coding Style;单元测试遵循规范: pytest - -- 规范备注 - -1、优先使用string类型,避免使用char*; - -2、禁止使用printf,一律使用cout; - -3、内存管理尽量使用智能指针; - -4、不准在函数里调用exit; - -5、禁止使用IDE等工具自动生成代码; - -6、控制第三方库依赖,如果引入第三方依赖,则需要提供第三方依赖安装和使用指导书; - -7、一律使用英文注释,注释率30%--40%,鼓励自注释; - -8、函数头必须有注释,说明函数作用,入参、出参; - -9、统一错误码,通过错误码可以确认那个分支返回错误; - -10、禁止出现打印一堆无影响的错误级别的日志; + **介绍** + +Ascend ModelZoo,欢迎各位开发者 + + **贡献要求** + +开发者提交的模型包括源码、readme、参考模型license文件、测试用例和readme,并遵循以下标准 + +请贡献者在提交代码之前签署CLA协议,“个人签署”,[链接](https://clasign.osinfra.cn/sign/Z2l0ZWUlMkZhc2NlbmQ=) + +如您完成签署,可在自己提交的PR评论区输入/check-cla进行核实校验 + + **一、源码** + +1、训练及在线推理请使用python代码实现,Ascend平台离线推理请使用C++或python代码,符合第四部分编码规范 + +2、参考[sample](https://gitee.com/ascend/modelzoo/tree/master/built-in/TensorFlow/Official/nlp/Transformer_for_TensorFlow) + +3、贡献者模型代码目录规则:"modelzoo/contrib/框架/Research/应用领域(nlp、cv、audio等)/网络名_IDxxx_for_TensorFlow"(以tf为例,社区管理团队会在贡献完成进行整合) + +4、从其他开源迁移的代码,请增加License声明 + + **二、License规则** + +* TensorFlow + + 迁移场景 + + 1、迁移TensorFlow模型中若源项目已包含License文件则必须拷贝引用,否则在模型顶层目录下添加TensorFlow Apache 2.0 License [TensorFlow License链接](https://github.com/tensorflow/tensorflow/blob/master/LICENSE) + + 2、迁移TensorFlow框架开发的模型,需要在模型目录下每个源文件附上源社区TensorFlow Apache 2.0 License头部声明,并在其下追加新增完整华为公司License声明 + + ``` + # Copyright 2017 The TensorFlow Authors. All Rights Reserved. + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + # ============================================================================ + # Copyright 2021 Huawei Technologies Co., Ltd + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + ``` + 开发场景 + + 1、基于TensorFlow框架开发模型,需在模型项目顶层目录下添加TensorFlow Apache 2.0 License [TensorFlow License链接](https://github.com/tensorflow/tensorflow/blob/master/LICENSE) + + 2、基于TensorFlow框架开发模型,需要在模型目录下每个源文件附上源社区华为公司Apache 2.0 License头部声明 + ``` + # Copyright 2021 Huawei Technologies Co., Ltd + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + ``` +* PyTorch + + 迁移场景 + + 1、迁移PyTorch模型中若源项目录已包含PyTorch License文件则必须拷贝引用,否则在模型顶层目录下添加PyTorch BSD-3 License [PyTorch License链接](https://github.com/pytorch/examples/blob/master/LICENSE) + + 2、迁移PyTorch第三方框架开发的模型,需要在模型目录下每个源文件附上源社区PyTorch BSD-3 License头部声明,并在其下追加新增一行华为公司License声明 + ``` + # BSD 3-Clause License + # + # Copyright (c) 2017 xxxx + # All rights reserved. + # Copyright 2021 Huawei Technologies Co., Ltd + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions are met: + # + # * Redistributions of source code must retain the above copyright notice, this + # list of conditions and the following disclaimer. + # + # * Redistributions in binary form must reproduce the above copyright notice, + # this list of conditions and the following disclaimer in the documentation + # and/or other materials provided with the distribution. + # + # * Neither the name of the copyright holder nor the names of its + # contributors may be used to endorse or promote products derived from + # this software without specific prior written permission. + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # ============================================================================ + ``` + + 开发场景 + + 1、基于PyTorch框架开发模型,需在模型项目下添加PyTorch BSD-3 License [PyTorch License链接](https://github.com/pytorch/examples/blob/master/LICENSE) + + 2、基于PyTorch框架开发模型,需要在模型目录下每个源文件附上源社区华为公司Apache 2.0 License头部声明 + ``` + # Copyright 2021 Huawei Technologies Co., Ltd + # + # Licensed under the BSD 3-Clause License (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # https://opensource.org/licenses/BSD-3-Clause + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + ``` + +* MindSpore/ACL + + 1、迁移或开发场景下MindSpore/ACL模型顶层目录下需要包含华为公司 License [华为公司 License链接](https://gitee.com/mindspore/mindspore/blob/master/LICENSE) + + 2、迁移或开发场景下MindSpore/ACL模型,需要在模型目录下每个源文件中添加区华为公司Apache 2.0 License头部声明 + ``` + # Copyright 2021 Huawei Technologies Co., Ltd + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + ``` + +> 关于License声明时间,应注意: 2021年新建的文件,应该是Copyright 2021 Huawei Technologies Co., Ltd 2020年创建年份,2020年修改年份,应该是Copyright 2020 Huawei Technologies Co., Ltd + + **三、readme** + +readme用于指导用户理解和部署样例,要包含如下内容: + +- 简介: + +1、模型的来源及原理; + +2、模型复现的步骤,含训练、eval、在线/离线推理等,入口请封装成`.sh`、`.py`; + +- 关键要求: + +1、模型的出处、对数据的要求、免责声明等,开源代码文件修改需要增加版权说明; + +2、模型转换得到的离线模型对输入数据的要求; + +3、环境变量设置,依赖的第三方软件包和库,以及安装方法; + +4、精度和性能达成要求:尽量达到原始模型水平; + +5、预训练checkpoint、结果checkpoint请提供归档OBS、网盘链接,如来自开源需明确来源地址 + +6、数据集说明 + +- [ ] 不允许直接提供数据集的下载链接,可使用词汇:用户自行准备好数据集,可选用“XXX”,“XXX”,“XXX” + + 例如:请用户自行准备好数据集,包含训练集和验证集两部分,可选用的数据集包括ImageNet2012,CIFAR10、Flower等,包含train和val两部分。 + +- [ ] 脚本中不允许提供链接下载数据集,如果开源脚本上存在对应的链接,请修改或者删除对应的脚本 + +训练ReadMe写作可参考下面两个链接: + +[Readme example1](https://gitee.com/ascend/modelzoo/blob/master/built-in/TensorFlow/Official/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/README.md) + +[Readme example2](https://www.hiascend.com/zh/software/modelzoo/detail/C/093ed0219cb14f068af33784c62cf7ec) + +离线推理ReadMe写作可参考下面链接: + +[Readme example1](https://gitee.com/ascend/modelzoo/tree/master/contrib/ACL_TensorFlow/Research/cv/AdvancedEAST_ID0130_for_ACL/README.md) + + **四、自测试用例** + +提供模型的自测试用例和readme,提交PR需要门禁及模型测试用例通过,性能和精度检查通过 + +- 简介: + +1、不同于完整的训练过程和全量数据集的推理,自测试用例的目的是验证提交代码基本功能可用,执行时长控制在10min之内(推理或训练只需执行有限的图片或step); + +2、提交PR中训练用例入口`train_testcase.sh`, 在线推理用例入口`online_inference_testcase.sh`, 离线推理用例入口`offline_inference_testcase.sh`; + +3、提交PR后,会自动触发门禁流水,后台会根据用例入口shell,自动将代码分发到对应执行环境; + +4、Jenkins预置账号:登录账号请联系华为工程师/接口人获取,登录之后,可以查看到用例执行日志 + +5、如果提交失败,请查看日志,修复代码或其他问题后,在你当前的PR中,评论“compile”即可重新触发用例执行 + +- 关键要求: + +1、自测试用例命名严格按照上述简介2要求来书写,否则门禁会校验失败; + +2、用例应当包含功能、精度(Loss值)、性能检查,检查通过打印"Run testcase success!",失败则打印"Run testcase failed!"; + +3、执行环境已预装软件包和Python3.7.5环境,调用命令"python3"、"python3.7"、"python3.7.5"均可,安装第三方库依赖使用"pip3"、"pip3.7"均可; + +4、数据集和模型:小于500M的文件,建议使用`obsutil`命令下载(已预装),过大的文件,建议提交Issue,注明数据集和下载地址,会提前下载到执行环境上, + +已预置数据集&python第三方库: [Environments](https://gitee.com/ascend/modelzoo/blob/master/contrib/ENVIRONMENTS.md) + +5、环境和其他问题,请提交Issue跟踪; + +6、测试用例开发参考: +- [训练](https://gitee.com/ascend/modelzoo/tree/master/built-in/TensorFlow/Official/nlp/Transformer_for_TensorFlow) +- [离线推理](https://gitee.com/alexcheng88/modelzoo/tree/master/contrib/TensorFlow/Research/cv/efficientnet-b8/ATC_efficientnet-b8_tf_nkxiaolei) + + **五、PR提交** + +- 关键要求: + +1、请将modelzoo仓fork到个人分支,基于个人分支新增、修改和提交PR; + +2、PR标题:线上活动,请在标题注明[线上贡献];高校活动,请注明[xxx学校][高校贡献]; + +3、built-in用户根据网络状态必须配置modelzoo_level.txt文件,且文件内容包含三个关键字段:FuncStatus(OK-流程通过/ **NOK-流程失败,不允许模型代码提交主仓** );PerfStatus(OK-持平GPU/POK-0.5倍GPU/NOK-小于0.5倍GPU/PERFECT-1.2倍GPU);PrecisionStatus(OK-精度达标,POK-Loss拟合但精度未实施, **NOK-Loss不拟合,不允许模型代码提交主仓** );内容格式如下所示(注:“:”两侧无需空格,英文格式;): + +``` + FuncStatus:OK/NOK + PerfStatus:PERFECT/OK/POK/NOK + PrecisionStatus:OK/POK/NOK +``` +4、contrib用户根据网络状态必须配置modelzoo_level.txt文件,且文件内容包含关键字段:GPUStatus(OK-GPU复现/NOK-GPU未复现); NPUMigrationStatus(OK-自动迁移成功/POK-自动迁移失败, 手写规避成功/NOK-均失败); FuncStatus(OK-基础功能打通/NOK-基础功能失败,不允许模型代码提交到master); PrecisionStatus(OK-精度达标/POK-Loss拟合但精度未完全达标/NOK-精度不达标, 不允许模型代码提交到master); AutoTune(OK-性能持平或高于GPU/POK-性能有提升但低于GPU/NOK-性能无提升或者功能失败); PerfStatus(训练:PERFECT-性能1.2倍GPU/OK-性能持平GPU/POK-性能0.5倍GPU/NOK-性能小于0.5倍GPU;推理:OK-4*310单卡>GPU/NOK-其它); ModelConvert:OK/NOK(仅推理, OK-om转换成功/NOK-om转换失败); QuantStatus:OK/NOK(仅推理, OK-精度损失1%以内,性能有提升/POK-性能有提升但未达标/NOK-量化失败); + +样例:modelzoo_level.txt文件 + +-----仅限训练----- + + +``` +GPUStatus:OK/NOK +NPUMigrationStatus:OK/POK/NOK +``` + + +-----仅限推理----- + +``` +ModelConvert:OK/POK/NOK +QuantStatus:OK/POK/NOK +``` + +-----通用部分----- + +``` +FuncStatus:OK/NOK +PrecisionStatus:OK/POK/NOK +AutoTune:OK/POK/NOK +PerfStatus:PERFECT/OK/POK/NOK +``` +5、网络名称命名规范:*_for_框架,注:*代表任意内容,如网络名称或网络名称+网络ID; + + **六、编程规范** + +- 规范标准 + +1、C++代码遵循google编程规范:Google C++ Coding Guidelines;单元测测试遵循规范: Googletest Primer。 + +2、Python代码遵循PEP8规范:Python PEP 8 Coding Style;单元测试遵循规范: pytest + +- 规范备注 + +1、优先使用string类型,避免使用char*; + +2、禁止使用printf,一律使用cout; + +3、内存管理尽量使用智能指针; + +4、不准在函数里调用exit; + +5、禁止使用IDE等工具自动生成代码; + +6、控制第三方库依赖,如果引入第三方依赖,则需要提供第三方依赖安装和使用指导书; + +7、一律使用英文注释,注释率30%--40%,鼓励自注释; + +8、函数头必须有注释,说明函数作用,入参、出参; + +9、统一错误码,通过错误码可以确认那个分支返回错误; + +10、禁止出现打印一堆无影响的错误级别的日志; diff --git a/ACL_PyTorch/contrib/audio/FastPitch/README.md b/ACL_PyTorch/contrib/audio/FastPitch/README.md index 0365271515d93198a62df562d8f58eea951fd55b..89f0244c823e6cccc219a3074464f192ed10fa28 100644 --- a/ACL_PyTorch/contrib/audio/FastPitch/README.md +++ b/ACL_PyTorch/contrib/audio/FastPitch/README.md @@ -1,225 +1,225 @@ -# FastPitch模型端到端推理指导 - -## 1 模型概述 - -- **[论文地址](https://arxiv.org/abs/2006.06873)** -- **[代码地址](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/FastPitch)** - -### 1.1 论文地址 - -[FastPitch论文](https://arxiv.org/abs/2006.06873) -Fastpitch模型由双向 Transformer 主干(也称为 Transformer 编码器)、音调预测器和持续时间预测器组成。 在通过第一组 N 个 Transformer 块、编码后,信号用基音信息增强并离散上采样。 然后它通过另一组 N个 Transformer 块,目的是平滑上采样信号,并构建梅尔谱图。 - -### 1.2 开源代码地址 - -[FastPitch开源代码](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/FastPitch) - -## 2 环境说明 - -### 2.1 深度学习框架 - -``` -onnx==1.9.0 -torch==1.8.0 -``` - -### 2.2 python第三方库 - -``` -matplotlib -numpy -inflect -librosa==0.8.0 -scipy -Unidecode -praat-parselmouth==0.3.3 -tensorboardX==2.0 -dllogger -``` - -**说明:** - -> X86架构:pytorch和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -### pth转om模型 - -1.下载pth权重文件 -``` -wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/audio/FastPitch/pretrained_models.zip -``` -(waveglow为语音生成器,不在本模型范围内, 但为了确保代码能正常运行,需要下载) - -2.安装相关依赖 - -``` -cd FastPitch -pip install -r requirements.txt -``` - -3.激活相关环境 - -``` -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest -``` - -3.pth转onnx, onnx简化,onnx转om。(以batch_size=1为例) - -``` -# 导出onnx -python pth2onnx.py -i phrases/tui_val100.tsv -o ./output/audio_tui_val100 --log-file ./output/audio_tui_val100/nvlog_infer.json --fastpitch pretrained_models/fastpitch/nvidia_fastpitch_210824.pt --waveglow pretrained_models/waveglow/nvidia_waveglow256pyt_fp16.pt --wn-channels 256 --energy-conditioning --batch-size 1 -# 简化onnx -python -m onnxsim ./test/models/FastPitch_bs1.onnx ./test/models/FastPitch_bs1_sim.onnx -# 转出om -atc --framework=5 --model=./test/models/FastPitch_bs1_sim.onnx --output=./test/models/FastPitch_bs1 --input_format=ND --input_shape="input:1,200" --out_nodes='Transpose_2044:0' --log=debug --soc_version=Ascend310 -``` - -输出在/test/models中。 - - - -## 4 数据集预处理 - -### 4.1 数据集获取 - -(可选)本项目默认将数据集存放于/opt/npu/ - -``` -cd .. -wget https://ascend-pytorch-one-datasets.obs.cn-north-4.myhuaweicloud.com/train/zip/LJSpeech-1.1.zip -unzip LJSpeech-1.1.zip -mv /LJSpeech-1.1 /opt/npu/ -``` - -### 4.2 数据集预处理 - -生成输入数据,并准备输出标签和pth权重的输出数据。本模型的验证集大小为100,具体信息在phrases/tui_val100.tsv文件中。 - -- FastPitch模型的输入数据是由文字编码组成,输入长度不等,模型已经将其补零成固定长度200。将输入数据转换为bin文件方便后续推理,存入test/input_bin文件夹下,且生成生成数据集预处理后的bin文件以及相应的info文件。 -- 在语音合成推理过程中,输出为mel图谱,本模型的输出维度为batch_size×900×80。将其输出tensor存为pth文件存入test/mel_tgt_pth文件夹下。 -- 同时,为了后面推理结束后将推理精度与原模型pth权重精度进行对比,将输入数据在pth模型中前传得到的输出tensor村委pth文件存入test/mel_out_pth文件夹下。 - -以上步骤均执行下面指令完成: - -``` -python data_process.py -i phrases/tui_val100.tsv -o ./output/audio_tui_val100 --log-file ./output/audio_tui_val100/nvlog_infer.json --fastpitch pretrained_models/fastpitch/nvidia_fastpitch_210824.pt --waveglow pretrained_models/waveglow/nvidia_waveglow256pyt_fp16.pt -``` - -## 5 离线推理及精度对比 - -### 5.1 使用benchmark工具推理 - -获取benchmark工具 - -### 5.2 模型推理 - -- 使用benchmark工具进行推理(以batch_size=1为例): - -benchmark模型推理工具,其输入是om模型以及模型所需要的输入bin文件,其输出是模型根据相应输入产生的输出文件。推理得到的结果会在test/result中。 - -将推理得到的结果重新转换为tensor形式,与标签mel_tgt计算mel_loss1。同时,将原模型pth权重前传得到的输出mel_out与标签mel_tgt计算出mel_loss2。mel_loss1与mel_loss2精度对齐则推理正确。 - -``` -source /usr/local/Ascend/ascend-toolkit/set_env.sh -cd test -./benchmark.x86_64 -model_type=nlp -device_id=0 -batch_size=1 -om_path=./models/FastPitch_bs1.om -input_text_path=./input_bin_info.info -output_binary=True -useDvpp=False -``` - - - -### 5.3 精度对比 - -``` -cd .. -python infer_test.py -``` - -以下为测试出的batch_size=1和16的精度对比: - -``` -mel_loss: - om pth -bs1 11.246 11.265 -bs16 11.330 11.265 -``` - - - -## 6 性能对比 - -### 6.1 npu性能数据 - -1. 运行test/performance.sh脚本 - -``` -cd test -./benchmark.x86_64 -round=20 -device_id=0 -batch_size=1 -om_path=./models/FastPitch_bs1.om -``` - -测试出来的ave_throughputRate,将其乘以4即为吞吐率。 - -以下计算结果为batch_size=1的结果。 - -![img](file:///C:\Users\1\AppData\Local\Temp\ksohtml\wps9EEB.tmp.jpg) - - - - - - - -### 6.2 T4性能数据 - -提供以下测试代码作参考: - -```python -import time - -model=...(导入模型及加载pth权重) - -input = torch.ones(size=(1, 200), dtype=torch.int64, device=device) -total_time = 0 -lens = 20 -for _ in range(lens): - start = time.time() - output = model(input) - end = time.time() - total_time += end - start -print(f"batch_size=1, FPS:{1.0/(total_time/lens)}") - - -input = torch.ones(size=(16, 200), dtype=torch.int64, device=device) -total_time = 0 -lens = 20 -for _ in range(lens): - start = time.time() - output = model(input) - end = time.time() - total_time += end - start -print(f"batch_size=16, FPS:{16.0/(total_time/lens)}") -``` - - - - - -### 6.3 性能对比 - -| Model | Batch Size | A300 Throughput/Card | T4 Throughput/Card | A300/T4 | -| --------- | ---------- | -------------------- | ------------------ | ------- | -| FasfPitch | 1 | 54.1476 | 28.828 | 1.878 | -| FasfPitch | 4 | 51.728 | - | - | -| FasfPitch | 8 | 51.3684 | - | - | -| FasfPitch | 16 | 51.714 | 64.94 | 0.796 | -| FasfPitch | 32 | 52.0696 | - | - | - -由于模型并没有性能要求,bs1、bs4、bs8、bs16、bs32时npu的性能高于T4性能的0.5倍,性能达标。 - +# FastPitch模型端到端推理指导 + +## 1 模型概述 + +- **[论文地址](https://arxiv.org/abs/2006.06873)** +- **[代码地址](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/FastPitch)** + +### 1.1 论文地址 + +[FastPitch论文](https://arxiv.org/abs/2006.06873) +Fastpitch模型由双向 Transformer 主干(也称为 Transformer 编码器)、音调预测器和持续时间预测器组成。 在通过第一组 N 个 Transformer 块、编码后,信号用基音信息增强并离散上采样。 然后它通过另一组 N个 Transformer 块,目的是平滑上采样信号,并构建梅尔谱图。 + +### 1.2 开源代码地址 + +[FastPitch开源代码](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/FastPitch) + +## 2 环境说明 + +### 2.1 深度学习框架 + +``` +onnx==1.9.0 +torch==1.8.0 +``` + +### 2.2 python第三方库 + +``` +matplotlib +numpy +inflect +librosa==0.8.0 +scipy +Unidecode +praat-parselmouth==0.3.3 +tensorboardX==2.0 +dllogger +``` + +**说明:** + +> X86架构:pytorch和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +### pth转om模型 + +1.下载pth权重文件 +``` +wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/audio/FastPitch/pretrained_models.zip +``` +(waveglow为语音生成器,不在本模型范围内, 但为了确保代码能正常运行,需要下载) + +2.安装相关依赖 + +``` +cd FastPitch +pip install -r requirements.txt +``` + +3.激活相关环境 + +``` +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest +``` + +3.pth转onnx, onnx简化,onnx转om。(以batch_size=1为例) + +``` +# 导出onnx +python pth2onnx.py -i phrases/tui_val100.tsv -o ./output/audio_tui_val100 --log-file ./output/audio_tui_val100/nvlog_infer.json --fastpitch pretrained_models/fastpitch/nvidia_fastpitch_210824.pt --waveglow pretrained_models/waveglow/nvidia_waveglow256pyt_fp16.pt --wn-channels 256 --energy-conditioning --batch-size 1 +# 简化onnx +python -m onnxsim ./test/models/FastPitch_bs1.onnx ./test/models/FastPitch_bs1_sim.onnx +# 转出om +atc --framework=5 --model=./test/models/FastPitch_bs1_sim.onnx --output=./test/models/FastPitch_bs1 --input_format=ND --input_shape="input:1,200" --out_nodes='Transpose_2044:0' --log=debug --soc_version=Ascend310 +``` + +输出在/test/models中。 + + + +## 4 数据集预处理 + +### 4.1 数据集获取 + +(可选)本项目默认将数据集存放于/opt/npu/ + +``` +cd .. +wget https://ascend-pytorch-one-datasets.obs.cn-north-4.myhuaweicloud.com/train/zip/LJSpeech-1.1.zip +unzip LJSpeech-1.1.zip +mv /LJSpeech-1.1 /opt/npu/ +``` + +### 4.2 数据集预处理 + +生成输入数据,并准备输出标签和pth权重的输出数据。本模型的验证集大小为100,具体信息在phrases/tui_val100.tsv文件中。 + +- FastPitch模型的输入数据是由文字编码组成,输入长度不等,模型已经将其补零成固定长度200。将输入数据转换为bin文件方便后续推理,存入test/input_bin文件夹下,且生成生成数据集预处理后的bin文件以及相应的info文件。 +- 在语音合成推理过程中,输出为mel图谱,本模型的输出维度为batch_size×900×80。将其输出tensor存为pth文件存入test/mel_tgt_pth文件夹下。 +- 同时,为了后面推理结束后将推理精度与原模型pth权重精度进行对比,将输入数据在pth模型中前传得到的输出tensor村委pth文件存入test/mel_out_pth文件夹下。 + +以上步骤均执行下面指令完成: + +``` +python data_process.py -i phrases/tui_val100.tsv -o ./output/audio_tui_val100 --log-file ./output/audio_tui_val100/nvlog_infer.json --fastpitch pretrained_models/fastpitch/nvidia_fastpitch_210824.pt --waveglow pretrained_models/waveglow/nvidia_waveglow256pyt_fp16.pt +``` + +## 5 离线推理及精度对比 + +### 5.1 使用benchmark工具推理 + +获取benchmark工具 + +### 5.2 模型推理 + +- 使用benchmark工具进行推理(以batch_size=1为例): + +benchmark模型推理工具,其输入是om模型以及模型所需要的输入bin文件,其输出是模型根据相应输入产生的输出文件。推理得到的结果会在test/result中。 + +将推理得到的结果重新转换为tensor形式,与标签mel_tgt计算mel_loss1。同时,将原模型pth权重前传得到的输出mel_out与标签mel_tgt计算出mel_loss2。mel_loss1与mel_loss2精度对齐则推理正确。 + +``` +source /usr/local/Ascend/ascend-toolkit/set_env.sh +cd test +./benchmark.x86_64 -model_type=nlp -device_id=0 -batch_size=1 -om_path=./models/FastPitch_bs1.om -input_text_path=./input_bin_info.info -output_binary=True -useDvpp=False +``` + + + +### 5.3 精度对比 + +``` +cd .. +python infer_test.py +``` + +以下为测试出的batch_size=1和16的精度对比: + +``` +mel_loss: + om pth +bs1 11.246 11.265 +bs16 11.330 11.265 +``` + + + +## 6 性能对比 + +### 6.1 npu性能数据 + +1. 运行test/performance.sh脚本 + +``` +cd test +./benchmark.x86_64 -round=20 -device_id=0 -batch_size=1 -om_path=./models/FastPitch_bs1.om +``` + +测试出来的ave_throughputRate,将其乘以4即为吞吐率。 + +以下计算结果为batch_size=1的结果。 + +![img](file:///C:\Users\1\AppData\Local\Temp\ksohtml\wps9EEB.tmp.jpg) + + + + + + + +### 6.2 T4性能数据 + +提供以下测试代码作参考: + +```python +import time + +model=...(导入模型及加载pth权重) + +input = torch.ones(size=(1, 200), dtype=torch.int64, device=device) +total_time = 0 +lens = 20 +for _ in range(lens): + start = time.time() + output = model(input) + end = time.time() + total_time += end - start +print(f"batch_size=1, FPS:{1.0/(total_time/lens)}") + + +input = torch.ones(size=(16, 200), dtype=torch.int64, device=device) +total_time = 0 +lens = 20 +for _ in range(lens): + start = time.time() + output = model(input) + end = time.time() + total_time += end - start +print(f"batch_size=16, FPS:{16.0/(total_time/lens)}") +``` + + + + + +### 6.3 性能对比 + +| Model | Batch Size | A300 Throughput/Card | T4 Throughput/Card | A300/T4 | +| --------- | ---------- | -------------------- | ------------------ | ------- | +| FasfPitch | 1 | 54.1476 | 28.828 | 1.878 | +| FasfPitch | 4 | 51.728 | - | - | +| FasfPitch | 8 | 51.3684 | - | - | +| FasfPitch | 16 | 51.714 | 64.94 | 0.796 | +| FasfPitch | 32 | 52.0696 | - | - | + +由于模型并没有性能要求,bs1、bs4、bs8、bs16、bs32时npu的性能高于T4性能的0.5倍,性能达标。 + diff --git a/ACL_PyTorch/contrib/audio/FastPitch/fastpitch/loss_function.py b/ACL_PyTorch/contrib/audio/FastPitch/fastpitch/loss_function.py index 9baf4a6c31ccb0e0e038190d55ce38a93d4ad3ee..0fe133abd4fdf5d8397ab2bdebbd96a61e443959 100644 --- a/ACL_PyTorch/contrib/audio/FastPitch/fastpitch/loss_function.py +++ b/ACL_PyTorch/contrib/audio/FastPitch/fastpitch/loss_function.py @@ -1,108 +1,108 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import torch -import torch.nn.functional as F -from torch import nn - -from common.utils import mask_from_lens -from fastpitch.attn_loss_function import AttentionCTCLoss - - -class FastPitchLoss(nn.Module): - def __init__(self, dur_predictor_loss_scale=1.0, - pitch_predictor_loss_scale=1.0, attn_loss_scale=1.0, - energy_predictor_loss_scale=0.1): - super(FastPitchLoss, self).__init__() - self.dur_predictor_loss_scale = dur_predictor_loss_scale - self.pitch_predictor_loss_scale = pitch_predictor_loss_scale - self.energy_predictor_loss_scale = energy_predictor_loss_scale - self.attn_loss_scale = attn_loss_scale - self.attn_ctc_loss = AttentionCTCLoss() - - def forward(self, model_out, targets, is_training=True, meta_agg='mean'): - (mel_out, dec_mask, dur_pred, log_dur_pred, pitch_pred, pitch_tgt, - energy_pred, energy_tgt, attn_soft, attn_hard, attn_dur, - attn_logprob) = model_out - - (mel_tgt, in_lens, out_lens) = targets - - dur_tgt = attn_dur - dur_lens = in_lens - - mel_tgt.requires_grad = False - # (B,H,T) => (B,T,H) - mel_tgt = mel_tgt.transpose(1, 2) - - dur_mask = mask_from_lens(dur_lens, max_len=dur_tgt.size(1)) - dur_mask_sum = dur_mask.sum() - - log_dur_tgt = torch.log(dur_tgt.float() + 1) - loss_fn = F.mse_loss - dur_pred_loss = loss_fn(log_dur_pred, log_dur_tgt, reduction='none') - dur_pred_loss = (dur_pred_loss * dur_mask).sum() / dur_mask_sum - - ldiff = mel_tgt.size(1) - mel_out.size(1) - mel_out = F.pad(mel_out, (0, 0, 0, ldiff, 0, 0), value=0.0) - - mel_mask = mel_tgt.ne(0).float() - mel_mask_sum = mel_mask.sum() - - loss_fn = F.mse_loss - mel_loss = loss_fn(mel_out, mel_tgt, reduction='none') - mel_loss = (mel_loss * mel_mask).sum() / mel_mask_sum - - ldiff = pitch_tgt.size(2) - pitch_pred.size(2) - pitch_pred = F.pad(pitch_pred, (0, ldiff, 0, 0, 0, 0), value=0.0) - pitch_loss = F.mse_loss(pitch_tgt, pitch_pred, reduction='none') - pitch_loss = (pitch_loss * dur_mask.unsqueeze(1)).sum() / dur_mask_sum - - if energy_pred is not None: - energy_pred = F.pad(energy_pred, (0, ldiff, 0, 0), value=0.0) - energy_loss = F.mse_loss(energy_tgt, energy_pred, reduction='none') - energy_loss = (energy_loss * dur_mask).sum() / dur_mask_sum - else: - energy_loss = 0 - - # Attention loss - attn_loss = self.attn_ctc_loss(attn_logprob, in_lens, out_lens) - - loss = (mel_loss - + dur_pred_loss * self.dur_predictor_loss_scale - + pitch_loss * self.pitch_predictor_loss_scale - + energy_loss * self.energy_predictor_loss_scale - + attn_loss * self.attn_loss_scale) - - meta = { - 'loss': loss.clone().detach(), - 'mel_loss': mel_loss.clone().detach(), - 'duration_predictor_loss': dur_pred_loss.clone().detach(), - 'pitch_loss': pitch_loss.clone().detach(), - 'energy_loss': energy_loss.clone().detach(), - 'attn_loss': attn_loss.clone().detach(), - 'dur_mask_sum': dur_mask_sum.clone().detach(), - 'mel_mask_sum': mel_mask_sum.clone().detach(), - 'dur_error': (torch.abs(dur_pred - dur_tgt).sum() - / dur_mask_sum).detach(), - } - - if energy_pred is not None: - meta['energy_loss'] = energy_loss.clone().detach() - - assert meta_agg in ('sum', 'mean') - if meta_agg == 'sum': - bsz = mel_out.size(0) - meta = {k: v * bsz for k, v in meta.items()} - return loss, meta +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch +import torch.nn.functional as F +from torch import nn + +from common.utils import mask_from_lens +from fastpitch.attn_loss_function import AttentionCTCLoss + + +class FastPitchLoss(nn.Module): + def __init__(self, dur_predictor_loss_scale=1.0, + pitch_predictor_loss_scale=1.0, attn_loss_scale=1.0, + energy_predictor_loss_scale=0.1): + super(FastPitchLoss, self).__init__() + self.dur_predictor_loss_scale = dur_predictor_loss_scale + self.pitch_predictor_loss_scale = pitch_predictor_loss_scale + self.energy_predictor_loss_scale = energy_predictor_loss_scale + self.attn_loss_scale = attn_loss_scale + self.attn_ctc_loss = AttentionCTCLoss() + + def forward(self, model_out, targets, is_training=True, meta_agg='mean'): + (mel_out, dec_mask, dur_pred, log_dur_pred, pitch_pred, pitch_tgt, + energy_pred, energy_tgt, attn_soft, attn_hard, attn_dur, + attn_logprob) = model_out + + (mel_tgt, in_lens, out_lens) = targets + + dur_tgt = attn_dur + dur_lens = in_lens + + mel_tgt.requires_grad = False + # (B,H,T) => (B,T,H) + mel_tgt = mel_tgt.transpose(1, 2) + + dur_mask = mask_from_lens(dur_lens, max_len=dur_tgt.size(1)) + dur_mask_sum = dur_mask.sum() + + log_dur_tgt = torch.log(dur_tgt.float() + 1) + loss_fn = F.mse_loss + dur_pred_loss = loss_fn(log_dur_pred, log_dur_tgt, reduction='none') + dur_pred_loss = (dur_pred_loss * dur_mask).sum() / dur_mask_sum + + ldiff = mel_tgt.size(1) - mel_out.size(1) + mel_out = F.pad(mel_out, (0, 0, 0, ldiff, 0, 0), value=0.0) + + mel_mask = mel_tgt.ne(0).float() + mel_mask_sum = mel_mask.sum() + + loss_fn = F.mse_loss + mel_loss = loss_fn(mel_out, mel_tgt, reduction='none') + mel_loss = (mel_loss * mel_mask).sum() / mel_mask_sum + + ldiff = pitch_tgt.size(2) - pitch_pred.size(2) + pitch_pred = F.pad(pitch_pred, (0, ldiff, 0, 0, 0, 0), value=0.0) + pitch_loss = F.mse_loss(pitch_tgt, pitch_pred, reduction='none') + pitch_loss = (pitch_loss * dur_mask.unsqueeze(1)).sum() / dur_mask_sum + + if energy_pred is not None: + energy_pred = F.pad(energy_pred, (0, ldiff, 0, 0), value=0.0) + energy_loss = F.mse_loss(energy_tgt, energy_pred, reduction='none') + energy_loss = (energy_loss * dur_mask).sum() / dur_mask_sum + else: + energy_loss = 0 + + # Attention loss + attn_loss = self.attn_ctc_loss(attn_logprob, in_lens, out_lens) + + loss = (mel_loss + + dur_pred_loss * self.dur_predictor_loss_scale + + pitch_loss * self.pitch_predictor_loss_scale + + energy_loss * self.energy_predictor_loss_scale + + attn_loss * self.attn_loss_scale) + + meta = { + 'loss': loss.clone().detach(), + 'mel_loss': mel_loss.clone().detach(), + 'duration_predictor_loss': dur_pred_loss.clone().detach(), + 'pitch_loss': pitch_loss.clone().detach(), + 'energy_loss': energy_loss.clone().detach(), + 'attn_loss': attn_loss.clone().detach(), + 'dur_mask_sum': dur_mask_sum.clone().detach(), + 'mel_mask_sum': mel_mask_sum.clone().detach(), + 'dur_error': (torch.abs(dur_pred - dur_tgt).sum() + / dur_mask_sum).detach(), + } + + if energy_pred is not None: + meta['energy_loss'] = energy_loss.clone().detach() + + assert meta_agg in ('sum', 'mean') + if meta_agg == 'sum': + bsz = mel_out.size(0) + meta = {k: v * bsz for k, v in meta.items()} + return loss, meta diff --git a/ACL_PyTorch/contrib/audio/FastPitch/infer.py b/ACL_PyTorch/contrib/audio/FastPitch/infer.py index c44a66dc1682d116e867e5cc775aedf4ef41e446..8281758ff3487db6d90ae73b78894eb3b058aaf4 100644 --- a/ACL_PyTorch/contrib/audio/FastPitch/infer.py +++ b/ACL_PyTorch/contrib/audio/FastPitch/infer.py @@ -1,90 +1,90 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import numpy as np -import torch -import torch.nn.functional as F -import os -import struct - - -def bin2tensor(binName): - size = os.path.getsize(binName) - binfile = open(binName, 'rb') - Len = int(size / 4) - res=[] - for i in range(Len): - data = binfile.read(4) - num = struct.unpack('f', data) - res.append(num[0]) - - binfile.close() - dim_res = np.array(res) - dim_res = torch.from_numpy(dim_res) - - return dim_res - - -def mel_loss(mel_out, mel_tgt): - """ - mel_out: torch.tensor, shape(batchsize, 80, 900) - mel_tgt: torch.tensor, shape(batchsize, 80, 900) - """ - mel_tgt = mel_tgt.transpose(1, 2) - mel_out = mel_out.transpose(1, 2) - - mel_mask = mel_tgt.ne(0).float() - mel_mask_sum = mel_mask.sum() - - loss_fn = F.mse_loss - mel_loss = loss_fn(mel_out, mel_tgt, reduction='none') - mel_loss = (mel_loss * mel_mask).sum() / mel_mask_sum - - return mel_loss - - -def test_om(): - tgt_path = "./test/mel_tgt_pth/" - out_path = './test/result/dumpOutput_device0/' - data_len = 100 - mel_loss_total = 0 - for i in range(data_len): - mel_out = bin2tensor(os.path.join(out_path, f"data{i}.bin")).reshape(1, 80, 900) - mel_tgt = torch.load(os.path.join(tgt_path, f"mel_tgt{i}.pth")) - mel_loss_ = mel_loss(mel_out, mel_tgt) - mel_loss_total += mel_loss_ - mel_loss_average = mel_loss_total / data_len - print("mel_loss_average", mel_loss_average.item()) - -def test_pth(): - out_path = './test/mel_out_pth/' - tgt_path = './test/mel_tgt_pth/' - data_len = 100 - mel_loss_total = 0 - for i in range(data_len): - mel_out = torch.load(os.path.join(out_path, f"mel_out{i}.pth")) - mel_tgt = torch.load(os.path.join(tgt_path, f"mel_tgt{i}.pth")) - mel_loss_ = mel_loss(mel_out, mel_tgt) - mel_loss_total += mel_loss_ - mel_loss_average = mel_loss_total / data_len - print("mel_loss_average", mel_loss_average.item()) - - - -if __name__ == "__main__": - print("==================om==================") - test_om() - print("==================pth==================") +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import torch +import torch.nn.functional as F +import os +import struct + + +def bin2tensor(binName): + size = os.path.getsize(binName) + binfile = open(binName, 'rb') + Len = int(size / 4) + res=[] + for i in range(Len): + data = binfile.read(4) + num = struct.unpack('f', data) + res.append(num[0]) + + binfile.close() + dim_res = np.array(res) + dim_res = torch.from_numpy(dim_res) + + return dim_res + + +def mel_loss(mel_out, mel_tgt): + """ + mel_out: torch.tensor, shape(batchsize, 80, 900) + mel_tgt: torch.tensor, shape(batchsize, 80, 900) + """ + mel_tgt = mel_tgt.transpose(1, 2) + mel_out = mel_out.transpose(1, 2) + + mel_mask = mel_tgt.ne(0).float() + mel_mask_sum = mel_mask.sum() + + loss_fn = F.mse_loss + mel_loss = loss_fn(mel_out, mel_tgt, reduction='none') + mel_loss = (mel_loss * mel_mask).sum() / mel_mask_sum + + return mel_loss + + +def test_om(): + tgt_path = "./test/mel_tgt_pth/" + out_path = './test/result/dumpOutput_device0/' + data_len = 100 + mel_loss_total = 0 + for i in range(data_len): + mel_out = bin2tensor(os.path.join(out_path, f"data{i}.bin")).reshape(1, 80, 900) + mel_tgt = torch.load(os.path.join(tgt_path, f"mel_tgt{i}.pth")) + mel_loss_ = mel_loss(mel_out, mel_tgt) + mel_loss_total += mel_loss_ + mel_loss_average = mel_loss_total / data_len + print("mel_loss_average", mel_loss_average.item()) + +def test_pth(): + out_path = './test/mel_out_pth/' + tgt_path = './test/mel_tgt_pth/' + data_len = 100 + mel_loss_total = 0 + for i in range(data_len): + mel_out = torch.load(os.path.join(out_path, f"mel_out{i}.pth")) + mel_tgt = torch.load(os.path.join(tgt_path, f"mel_tgt{i}.pth")) + mel_loss_ = mel_loss(mel_out, mel_tgt) + mel_loss_total += mel_loss_ + mel_loss_average = mel_loss_total / data_len + print("mel_loss_average", mel_loss_average.item()) + + + +if __name__ == "__main__": + print("==================om==================") + test_om() + print("==================pth==================") test_pth() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/audio/FastPitch/infer_test.py b/ACL_PyTorch/contrib/audio/FastPitch/infer_test.py index c44a66dc1682d116e867e5cc775aedf4ef41e446..8281758ff3487db6d90ae73b78894eb3b058aaf4 100644 --- a/ACL_PyTorch/contrib/audio/FastPitch/infer_test.py +++ b/ACL_PyTorch/contrib/audio/FastPitch/infer_test.py @@ -1,90 +1,90 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import numpy as np -import torch -import torch.nn.functional as F -import os -import struct - - -def bin2tensor(binName): - size = os.path.getsize(binName) - binfile = open(binName, 'rb') - Len = int(size / 4) - res=[] - for i in range(Len): - data = binfile.read(4) - num = struct.unpack('f', data) - res.append(num[0]) - - binfile.close() - dim_res = np.array(res) - dim_res = torch.from_numpy(dim_res) - - return dim_res - - -def mel_loss(mel_out, mel_tgt): - """ - mel_out: torch.tensor, shape(batchsize, 80, 900) - mel_tgt: torch.tensor, shape(batchsize, 80, 900) - """ - mel_tgt = mel_tgt.transpose(1, 2) - mel_out = mel_out.transpose(1, 2) - - mel_mask = mel_tgt.ne(0).float() - mel_mask_sum = mel_mask.sum() - - loss_fn = F.mse_loss - mel_loss = loss_fn(mel_out, mel_tgt, reduction='none') - mel_loss = (mel_loss * mel_mask).sum() / mel_mask_sum - - return mel_loss - - -def test_om(): - tgt_path = "./test/mel_tgt_pth/" - out_path = './test/result/dumpOutput_device0/' - data_len = 100 - mel_loss_total = 0 - for i in range(data_len): - mel_out = bin2tensor(os.path.join(out_path, f"data{i}.bin")).reshape(1, 80, 900) - mel_tgt = torch.load(os.path.join(tgt_path, f"mel_tgt{i}.pth")) - mel_loss_ = mel_loss(mel_out, mel_tgt) - mel_loss_total += mel_loss_ - mel_loss_average = mel_loss_total / data_len - print("mel_loss_average", mel_loss_average.item()) - -def test_pth(): - out_path = './test/mel_out_pth/' - tgt_path = './test/mel_tgt_pth/' - data_len = 100 - mel_loss_total = 0 - for i in range(data_len): - mel_out = torch.load(os.path.join(out_path, f"mel_out{i}.pth")) - mel_tgt = torch.load(os.path.join(tgt_path, f"mel_tgt{i}.pth")) - mel_loss_ = mel_loss(mel_out, mel_tgt) - mel_loss_total += mel_loss_ - mel_loss_average = mel_loss_total / data_len - print("mel_loss_average", mel_loss_average.item()) - - - -if __name__ == "__main__": - print("==================om==================") - test_om() - print("==================pth==================") +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import torch +import torch.nn.functional as F +import os +import struct + + +def bin2tensor(binName): + size = os.path.getsize(binName) + binfile = open(binName, 'rb') + Len = int(size / 4) + res=[] + for i in range(Len): + data = binfile.read(4) + num = struct.unpack('f', data) + res.append(num[0]) + + binfile.close() + dim_res = np.array(res) + dim_res = torch.from_numpy(dim_res) + + return dim_res + + +def mel_loss(mel_out, mel_tgt): + """ + mel_out: torch.tensor, shape(batchsize, 80, 900) + mel_tgt: torch.tensor, shape(batchsize, 80, 900) + """ + mel_tgt = mel_tgt.transpose(1, 2) + mel_out = mel_out.transpose(1, 2) + + mel_mask = mel_tgt.ne(0).float() + mel_mask_sum = mel_mask.sum() + + loss_fn = F.mse_loss + mel_loss = loss_fn(mel_out, mel_tgt, reduction='none') + mel_loss = (mel_loss * mel_mask).sum() / mel_mask_sum + + return mel_loss + + +def test_om(): + tgt_path = "./test/mel_tgt_pth/" + out_path = './test/result/dumpOutput_device0/' + data_len = 100 + mel_loss_total = 0 + for i in range(data_len): + mel_out = bin2tensor(os.path.join(out_path, f"data{i}.bin")).reshape(1, 80, 900) + mel_tgt = torch.load(os.path.join(tgt_path, f"mel_tgt{i}.pth")) + mel_loss_ = mel_loss(mel_out, mel_tgt) + mel_loss_total += mel_loss_ + mel_loss_average = mel_loss_total / data_len + print("mel_loss_average", mel_loss_average.item()) + +def test_pth(): + out_path = './test/mel_out_pth/' + tgt_path = './test/mel_tgt_pth/' + data_len = 100 + mel_loss_total = 0 + for i in range(data_len): + mel_out = torch.load(os.path.join(out_path, f"mel_out{i}.pth")) + mel_tgt = torch.load(os.path.join(tgt_path, f"mel_tgt{i}.pth")) + mel_loss_ = mel_loss(mel_out, mel_tgt) + mel_loss_total += mel_loss_ + mel_loss_average = mel_loss_total / data_len + print("mel_loss_average", mel_loss_average.item()) + + + +if __name__ == "__main__": + print("==================om==================") + test_om() + print("==================pth==================") test_pth() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/audio/rawnet2/README.md b/ACL_PyTorch/contrib/audio/rawnet2/README.md index 4ead1dd821661c9e1067ef9d47ad53c94bbf6ed2..a0b55e2b85296be83337db606f380a3445527c6c 100644 --- a/ACL_PyTorch/contrib/audio/rawnet2/README.md +++ b/ACL_PyTorch/contrib/audio/rawnet2/README.md @@ -1,50 +1,50 @@ -### 1.环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - -``` -pip3 install -r requirements.txt -``` - -2.获取,开源模型代码 - -``` -git clone https://github.com/Jungjee/RawNet.git -cd RawNet -patch -p1 < ../rawnet2.patch -cd .. -``` - -3.获取权重文件 - -通过2获得代码仓后,权重文件位置:RawNet\python\RawNet2\Pre-trained_model\rawnet2_best_weights.pt,将其放到当前目录 - -4.获取数据集 [VoxCeleb1](https://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1.html) ,下载Audio files测试集,重命名VoxCeleb1,确保VoxCeleb1下全部为id1xxxx文件夹,放到/root/datasets目录,注:该路径为绝对路径 - -5.获取 [msame工具](https://gitee.com/ascend/tools/tree/master/msame) 和 [benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) - -将msame和benchmark.x86_64放到与test文件夹同级目录下。 - -### 2.离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 - -备注: - -1.需要对onnx模型进行onnxsim优化,否则无法达到精度要求,pth2om.sh脚本首先将pth文件转换为onnx模型,然后分别对bs1和bs16进行onnxsim优化,最后分别转化为om模型 - -2.eval_acc_perf.sh脚本逐步完成数据前处理bin文件输出、bs1和bs16模型推理、bs1和bs16精度测试,以及bs1和bs16benchmark的性能测试 - -``` -bash test/pth2om.sh - -bash test/eval_acc_perf.sh --datasets_path=/root/datasets/VoxCeleb1/ -``` - -评测结果: - -| 模型 | 官网pth精度 | 310精度 | 基准性能 | 310性能 | -| --------------------- | ----------------------------------------------- | --------- | -------- | ------- | -| Baseline-RawNet2 bs1 | [EER 2.49%](https://github.com/Jungjee/RawNet/) | EER 2.50% | 285.7fps | 72.8fps | -| Baseline-RawNet2 bs16 | [EER 2.49%](https://github.com/Jungjee/RawNet/) | EER 2.50% | 489.3fps | 77.6fps | - +### 1.环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + +``` +pip3 install -r requirements.txt +``` + +2.获取,开源模型代码 + +``` +git clone https://github.com/Jungjee/RawNet.git +cd RawNet +patch -p1 < ../rawnet2.patch +cd .. +``` + +3.获取权重文件 + +通过2获得代码仓后,权重文件位置:RawNet\python\RawNet2\Pre-trained_model\rawnet2_best_weights.pt,将其放到当前目录 + +4.获取数据集 [VoxCeleb1](https://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1.html) ,下载Audio files测试集,重命名VoxCeleb1,确保VoxCeleb1下全部为id1xxxx文件夹,放到/root/datasets目录,注:该路径为绝对路径 + +5.获取 [msame工具](https://gitee.com/ascend/tools/tree/master/msame) 和 [benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) + +将msame和benchmark.x86_64放到与test文件夹同级目录下。 + +### 2.离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 + +备注: + +1.需要对onnx模型进行onnxsim优化,否则无法达到精度要求,pth2om.sh脚本首先将pth文件转换为onnx模型,然后分别对bs1和bs16进行onnxsim优化,最后分别转化为om模型 + +2.eval_acc_perf.sh脚本逐步完成数据前处理bin文件输出、bs1和bs16模型推理、bs1和bs16精度测试,以及bs1和bs16benchmark的性能测试 + +``` +bash test/pth2om.sh + +bash test/eval_acc_perf.sh --datasets_path=/root/datasets/VoxCeleb1/ +``` + +评测结果: + +| 模型 | 官网pth精度 | 310精度 | 基准性能 | 310性能 | +| --------------------- | ----------------------------------------------- | --------- | -------- | ------- | +| Baseline-RawNet2 bs1 | [EER 2.49%](https://github.com/Jungjee/RawNet/) | EER 2.50% | 285.7fps | 72.8fps | +| Baseline-RawNet2 bs16 | [EER 2.49%](https://github.com/Jungjee/RawNet/) | EER 2.50% | 489.3fps | 77.6fps | + diff --git a/ACL_PyTorch/contrib/audio/rawnet2/RawNet2_postprocess.py b/ACL_PyTorch/contrib/audio/rawnet2/RawNet2_postprocess.py index a0263c606aaa5356b020b795568a12e6a0e8d7aa..a6a9b03a3c424b59cf8b03355cf82ebb0e083399 100644 --- a/ACL_PyTorch/contrib/audio/rawnet2/RawNet2_postprocess.py +++ b/ACL_PyTorch/contrib/audio/rawnet2/RawNet2_postprocess.py @@ -1,121 +1,121 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import os -import sys -import numpy as np -from sklearn.metrics import roc_curve -from scipy.optimize import brentq -from scipy.interpolate import interp1d -from tqdm import tqdm -import argparse - -sys.path.append('RawNet/python/RawNet2/') -from utils import cos_sim - -def get_l_embeddings(list_embeddings,bs,path="def"): - temp = "" - l_embeddings = [] - index = 0 - l_utt = [] - l_code = [] - with tqdm(total=len(list_embeddings), ncols=70) as pbar: - if bs==1: - files = sorted(list_embeddings) - else: - files = list_embeddings.keys() - for f in files: - if bs==1: - t = np.loadtxt(path + "/" + f) - t = t.astype(np.float32) - else: - t = list_embeddings[f] - index += 1 - key = f.replace("$", "/", 2).split("$")[0] - if (temp == ""): - temp = key - l_utt.append(key) - if (key == temp): - l_code.append(t) - else: - l_embeddings.append(np.mean(l_code, axis=0)) - temp = key - l_utt.append(key) - l_code = [] - l_code.append(t) - if (index == len(files)): - l_embeddings.append(np.mean(l_code, axis=0)) - pbar.update(1) - if not len(l_utt) == len(l_embeddings): - print(len(l_utt), len(l_embeddings)) - exit() - d_embeddings = {} - for k, v in zip(l_utt, l_embeddings): - d_embeddings[k] = v - return d_embeddings - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--input', help='bin path', default="", required=True) - parser.add_argument('--batch_size', help='batch size', required=True) - parser.add_argument('--output', help='result path', default="result/") - args = parser.parse_args() - batch_size = int(args.batch_size) - base = args.input - save_dir = args.output - d_embeddings = {} - if batch_size == 1: - for path, dirs, files in os.walk(base): - d_embeddings = get_l_embeddings(files,batch_size,path); - else: - with open('bs16_key.txt', 'r') as f: - l_val = f.readlines() - bs16_out = [] - for path, dirs, files in os.walk(base): - files = sorted(files, key=lambda x: [int(x.split('_')[0])]) - for f in files: - t = np.loadtxt(path + "/" + f) - for i in t: - i.reshape(1024, ) - bs16_out.append(i) - bs16_out_embeddings = {} - if not len(l_val) == len(bs16_out): - print(len(l_val), len(bs16_out)) - exit() - for k, v in zip(l_val, bs16_out): - bs16_out_embeddings[k] = v - d_embeddings = get_l_embeddings(bs16_out_embeddings,batch_size); - - with open('RawNet/trials/vox_original.txt', 'r') as f: - l_val_trial = f.readlines() - y_score = [] - y = [] - f_res = open(save_dir + 'result_detail_bs{}.txt'.format(batch_size), 'w') - for line in l_val_trial: - trg, utt_a, utt_b = line.strip().split(' ') - y.append(int(trg)) - y_score.append(cos_sim(d_embeddings[utt_a], d_embeddings[utt_b])) - f_res.write('{score} {target}\n'.format(score=y_score[-1], target=y[-1])) - f_res.close() - fpr, tpr, _ = roc_curve(y, y_score, pos_label=1) - eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.) - f_eer_301 = open(save_dir + 'result_eer_{}.txt'.format(batch_size), 'w') - f_eer_301.write('bs{dir} evaluation EER: {eer}\n'.format(dir=batch_size, eer=eer)) - f_eer_301.close() - print('bs{dir} evaluation EER: {eer}\n'.format(dir=batch_size, eer=eer)) - - -if __name__ == '__main__': - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import sys +import numpy as np +from sklearn.metrics import roc_curve +from scipy.optimize import brentq +from scipy.interpolate import interp1d +from tqdm import tqdm +import argparse + +sys.path.append('RawNet/python/RawNet2/') +from utils import cos_sim + +def get_l_embeddings(list_embeddings,bs,path="def"): + temp = "" + l_embeddings = [] + index = 0 + l_utt = [] + l_code = [] + with tqdm(total=len(list_embeddings), ncols=70) as pbar: + if bs==1: + files = sorted(list_embeddings) + else: + files = list_embeddings.keys() + for f in files: + if bs==1: + t = np.loadtxt(path + "/" + f) + t = t.astype(np.float32) + else: + t = list_embeddings[f] + index += 1 + key = f.replace("$", "/", 2).split("$")[0] + if (temp == ""): + temp = key + l_utt.append(key) + if (key == temp): + l_code.append(t) + else: + l_embeddings.append(np.mean(l_code, axis=0)) + temp = key + l_utt.append(key) + l_code = [] + l_code.append(t) + if (index == len(files)): + l_embeddings.append(np.mean(l_code, axis=0)) + pbar.update(1) + if not len(l_utt) == len(l_embeddings): + print(len(l_utt), len(l_embeddings)) + exit() + d_embeddings = {} + for k, v in zip(l_utt, l_embeddings): + d_embeddings[k] = v + return d_embeddings + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--input', help='bin path', default="", required=True) + parser.add_argument('--batch_size', help='batch size', required=True) + parser.add_argument('--output', help='result path', default="result/") + args = parser.parse_args() + batch_size = int(args.batch_size) + base = args.input + save_dir = args.output + d_embeddings = {} + if batch_size == 1: + for path, dirs, files in os.walk(base): + d_embeddings = get_l_embeddings(files,batch_size,path); + else: + with open('bs16_key.txt', 'r') as f: + l_val = f.readlines() + bs16_out = [] + for path, dirs, files in os.walk(base): + files = sorted(files, key=lambda x: [int(x.split('_')[0])]) + for f in files: + t = np.loadtxt(path + "/" + f) + for i in t: + i.reshape(1024, ) + bs16_out.append(i) + bs16_out_embeddings = {} + if not len(l_val) == len(bs16_out): + print(len(l_val), len(bs16_out)) + exit() + for k, v in zip(l_val, bs16_out): + bs16_out_embeddings[k] = v + d_embeddings = get_l_embeddings(bs16_out_embeddings,batch_size); + + with open('RawNet/trials/vox_original.txt', 'r') as f: + l_val_trial = f.readlines() + y_score = [] + y = [] + f_res = open(save_dir + 'result_detail_bs{}.txt'.format(batch_size), 'w') + for line in l_val_trial: + trg, utt_a, utt_b = line.strip().split(' ') + y.append(int(trg)) + y_score.append(cos_sim(d_embeddings[utt_a], d_embeddings[utt_b])) + f_res.write('{score} {target}\n'.format(score=y_score[-1], target=y[-1])) + f_res.close() + fpr, tpr, _ = roc_curve(y, y_score, pos_label=1) + eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.) + f_eer_301 = open(save_dir + 'result_eer_{}.txt'.format(batch_size), 'w') + f_eer_301.write('bs{dir} evaluation EER: {eer}\n'.format(dir=batch_size, eer=eer)) + f_eer_301.close() + print('bs{dir} evaluation EER: {eer}\n'.format(dir=batch_size, eer=eer)) + + +if __name__ == '__main__': + main() diff --git a/ACL_PyTorch/contrib/audio/rawnet2/RawNet2_preprocess.py b/ACL_PyTorch/contrib/audio/rawnet2/RawNet2_preprocess.py index a0e421fb775828d7a58efc6a9d9100e3e3b06471..8249f16a440319c288bd49a8907e5be757678b41 100644 --- a/ACL_PyTorch/contrib/audio/rawnet2/RawNet2_preprocess.py +++ b/ACL_PyTorch/contrib/audio/rawnet2/RawNet2_preprocess.py @@ -1,84 +1,84 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import os -import numpy as np -import argparse -import sys -sys.path.append('RawNet/python/RawNet2/') -from dataloader import TA_Dataset_VoxCeleb2 - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--input', help='dataset path', default="/root/datasets/VoxCeleb1/") - parser.add_argument('--batch_size', help='batch size', default=1) - parser.add_argument('--output', help='out bin path', default="bin_out_bs1/") - args = parser.parse_args() - base_dir = args.input - out_dir = args.output - batch_size = int(args.batch_size) - - def get_utt_list(src_dir): - l_utt = [] - for path, dirs, files in os.walk(src_dir): - path = path.replace('\\', '/') - base = '/'.join(path.split('/')[-2:]) + '/' - for f in files: - if f[-3:] != 'wav': - continue - l_utt.append(base + f) - return l_utt - - l_val = sorted(get_utt_list(base_dir)) - TA_evalset = TA_Dataset_VoxCeleb2(list_IDs=l_val, - return_label=True, - window_size=11810, - nb_samp=59049, - base_dir=base_dir) - if batch_size == 1: - for item in TA_evalset: - n = 0 - for i in item[0]: - i.tofile(out_dir + item[1].replace('/', '$') + "$" + str(n) + ".bin") - n += 1 - else: - bs16_key = open('bs16_key.txt', mode='w') - bs16 = [] - n = 0 - i = 0 - for item in TA_evalset: - l = 0 - for t in item[0]: - bs16_key.write(item[1].replace('/', '$') + "$" + str(n) + ".bin" + "$" + str(l) + "\n") - l += 1 - n += 1 - bs16.append(t) - if n == batch_size: - np.vstack(bs16).tofile(out_dir + str(i) + ".bin") - i += 1 - bs16 = [] - n = 0 - if n % batch_size == 0: - return - for j in range(batch_size - (n % batch_size)): - bs16_key.write("temp$" + str(j) + "\n") - bs16.append(np.empty((59049,), dtype='float32')) - bs16_key.close() - np.vstack(bs16).tofile(out_dir + str(i) + ".bin") - - -if __name__ == '__main__': - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import numpy as np +import argparse +import sys +sys.path.append('RawNet/python/RawNet2/') +from dataloader import TA_Dataset_VoxCeleb2 + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--input', help='dataset path', default="/root/datasets/VoxCeleb1/") + parser.add_argument('--batch_size', help='batch size', default=1) + parser.add_argument('--output', help='out bin path', default="bin_out_bs1/") + args = parser.parse_args() + base_dir = args.input + out_dir = args.output + batch_size = int(args.batch_size) + + def get_utt_list(src_dir): + l_utt = [] + for path, dirs, files in os.walk(src_dir): + path = path.replace('\\', '/') + base = '/'.join(path.split('/')[-2:]) + '/' + for f in files: + if f[-3:] != 'wav': + continue + l_utt.append(base + f) + return l_utt + + l_val = sorted(get_utt_list(base_dir)) + TA_evalset = TA_Dataset_VoxCeleb2(list_IDs=l_val, + return_label=True, + window_size=11810, + nb_samp=59049, + base_dir=base_dir) + if batch_size == 1: + for item in TA_evalset: + n = 0 + for i in item[0]: + i.tofile(out_dir + item[1].replace('/', '$') + "$" + str(n) + ".bin") + n += 1 + else: + bs16_key = open('bs16_key.txt', mode='w') + bs16 = [] + n = 0 + i = 0 + for item in TA_evalset: + l = 0 + for t in item[0]: + bs16_key.write(item[1].replace('/', '$') + "$" + str(n) + ".bin" + "$" + str(l) + "\n") + l += 1 + n += 1 + bs16.append(t) + if n == batch_size: + np.vstack(bs16).tofile(out_dir + str(i) + ".bin") + i += 1 + bs16 = [] + n = 0 + if n % batch_size == 0: + return + for j in range(batch_size - (n % batch_size)): + bs16_key.write("temp$" + str(j) + "\n") + bs16.append(np.empty((59049,), dtype='float32')) + bs16_key.close() + np.vstack(bs16).tofile(out_dir + str(i) + ".bin") + + +if __name__ == '__main__': + main() diff --git a/ACL_PyTorch/contrib/audio/rawnet2/RawNet2_pth2onnx.py b/ACL_PyTorch/contrib/audio/rawnet2/RawNet2_pth2onnx.py index 8cdefee1d34c7c0d35e9c1c58554041471cb548a..a202509a73511017f476ed527a61281774f13e82 100644 --- a/ACL_PyTorch/contrib/audio/rawnet2/RawNet2_pth2onnx.py +++ b/ACL_PyTorch/contrib/audio/rawnet2/RawNet2_pth2onnx.py @@ -1,34 +1,34 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import sys -import torch - -sys.path.append('RawNet/python/RawNet2/Pre-trained_model') -from RawNet.python.RawNet2.parser import get_args -from model_RawNet2_original_code import RawNet - -ptfile = "rawnet2_best_weights.pt" -args = get_args() -args.model['nb_classes'] = 6112 -model = RawNet(args.model, device="cpu") -model.load_state_dict(torch.load(ptfile, map_location=torch.device('cpu'))) -input_names = ["wav"] -output_names = ["class"] -dynamic_axes = {'wav': {0: '-1'}, 'class': {0: '-1'}} -dummy_input = torch.randn(1, 59049) -export_onnx_file = "RawNet2.onnx" -torch.onnx.export(model, dummy_input, export_onnx_file, input_names=input_names, dynamic_axes=dynamic_axes, - output_names=output_names, opset_version=11, verbose=True) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import sys +import torch + +sys.path.append('RawNet/python/RawNet2/Pre-trained_model') +from RawNet.python.RawNet2.parser import get_args +from model_RawNet2_original_code import RawNet + +ptfile = "rawnet2_best_weights.pt" +args = get_args() +args.model['nb_classes'] = 6112 +model = RawNet(args.model, device="cpu") +model.load_state_dict(torch.load(ptfile, map_location=torch.device('cpu'))) +input_names = ["wav"] +output_names = ["class"] +dynamic_axes = {'wav': {0: '-1'}, 'class': {0: '-1'}} +dummy_input = torch.randn(1, 59049) +export_onnx_file = "RawNet2.onnx" +torch.onnx.export(model, dummy_input, export_onnx_file, input_names=input_names, dynamic_axes=dynamic_axes, + output_names=output_names, opset_version=11, verbose=True) diff --git a/ACL_PyTorch/contrib/audio/rawnet2/env.sh b/ACL_PyTorch/contrib/audio/rawnet2/env.sh index ea514e10c5375ef01ed40d5e7e612e000606e074..52554cfca2bdaa7918b38a82ab05b0c83cd28a15 100644 --- a/ACL_PyTorch/contrib/audio/rawnet2/env.sh +++ b/ACL_PyTorch/contrib/audio/rawnet2/env.sh @@ -1,6 +1,6 @@ -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest diff --git a/ACL_PyTorch/contrib/audio/rawnet2/fusion_switch.cfg b/ACL_PyTorch/contrib/audio/rawnet2/fusion_switch.cfg index ece7f48ff26239ec868a76a38d6f6b3c5577a4f9..9f8ef7e7a67a86c517c12550462c28c316105271 100644 --- a/ACL_PyTorch/contrib/audio/rawnet2/fusion_switch.cfg +++ b/ACL_PyTorch/contrib/audio/rawnet2/fusion_switch.cfg @@ -1,7 +1,7 @@ -{ - "Switch":{ - "UBFusion":{ - "TbeEltwiseFusionPass":"off" - } - } +{ + "Switch":{ + "UBFusion":{ + "TbeEltwiseFusionPass":"off" + } + } } \ No newline at end of file diff --git a/ACL_PyTorch/contrib/audio/rawnet2/modelzoo_level.txt b/ACL_PyTorch/contrib/audio/rawnet2/modelzoo_level.txt index 20e36b3f785cf4dbfc970929d2d793911d57c314..85fa1416d33effb8f710f3ac3f6772fcbc898eaa 100644 --- a/ACL_PyTorch/contrib/audio/rawnet2/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/audio/rawnet2/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:NOK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/audio/rawnet2/perf_t4.sh b/ACL_PyTorch/contrib/audio/rawnet2/perf_t4.sh index 26aa1c974822b20801cd53cd8dda1842560b8248..56e04681b167c948b6a9aaec608faecdb90d6c67 100644 --- a/ACL_PyTorch/contrib/audio/rawnet2/perf_t4.sh +++ b/ACL_PyTorch/contrib/audio/rawnet2/perf_t4.sh @@ -1,21 +1,21 @@ -#! /bin/bash - -trtexec --onnx=RawNet2_sim_bs1.onnx --fp16 --shapes=wav:1x59049 > RawNet2_bs1.log -perf_str=`grep "GPU.* mean.*ms$" ReID_bs1.log` -if [ -n "$perf_str" ]; then - perf_num=`echo $perf_str | awk -F' ' '{print $16}'` -else - perf_str=`grep "mean.*ms$" ReID_bs1.log` - perf_num=`echo $perf_str | awk -F' ' '{print $4}'` -fi -awk 'BEGIN{printf "gpu bs1 fps:%.3f\n", 1000*1/('$perf_num'/1)}' - -trtexec --onnx=RawNet2_sim_bs16.onnx --fp16 --shapes=wav:16x59049 > RawNet2_bs16.log -perf_str=`grep "GPU.* mean.*ms$" ReID_bs16.log` -if [ -n "$perf_str" ]; then - perf_num=`echo $perf_str | awk -F' ' '{print $16}'` -else - perf_str=`grep "mean.*ms$" ReID_bs16.log` - perf_num=`echo $perf_str | awk -F' ' '{print $4}'` -fi -awk 'BEGIN{printf "gpu bs16 fps:%.3f\n", 1000*1/('$perf_num'/16)}' +#! /bin/bash + +trtexec --onnx=RawNet2_sim_bs1.onnx --fp16 --shapes=wav:1x59049 > RawNet2_bs1.log +perf_str=`grep "GPU.* mean.*ms$" ReID_bs1.log` +if [ -n "$perf_str" ]; then + perf_num=`echo $perf_str | awk -F' ' '{print $16}'` +else + perf_str=`grep "mean.*ms$" ReID_bs1.log` + perf_num=`echo $perf_str | awk -F' ' '{print $4}'` +fi +awk 'BEGIN{printf "gpu bs1 fps:%.3f\n", 1000*1/('$perf_num'/1)}' + +trtexec --onnx=RawNet2_sim_bs16.onnx --fp16 --shapes=wav:16x59049 > RawNet2_bs16.log +perf_str=`grep "GPU.* mean.*ms$" ReID_bs16.log` +if [ -n "$perf_str" ]; then + perf_num=`echo $perf_str | awk -F' ' '{print $16}'` +else + perf_str=`grep "mean.*ms$" ReID_bs16.log` + perf_num=`echo $perf_str | awk -F' ' '{print $4}'` +fi +awk 'BEGIN{printf "gpu bs16 fps:%.3f\n", 1000*1/('$perf_num'/16)}' diff --git a/ACL_PyTorch/contrib/audio/rawnet2/requirements.txt b/ACL_PyTorch/contrib/audio/rawnet2/requirements.txt index 4c42a0cc0aeca63750fc209152436c342b452cfa..dbb1bfa1b61ec0049a3ea874249a204172a07813 100644 --- a/ACL_PyTorch/contrib/audio/rawnet2/requirements.txt +++ b/ACL_PyTorch/contrib/audio/rawnet2/requirements.txt @@ -1,9 +1,9 @@ -torch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.10.1 -onnx-simplifier==0.3.6 -numpy -scikit-learn -scipy -tqdm +torch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.10.1 +onnx-simplifier==0.3.6 +numpy +scikit-learn +scipy +tqdm soundfile \ No newline at end of file diff --git a/ACL_PyTorch/contrib/audio/rawnet2/test/parse.py b/ACL_PyTorch/contrib/audio/rawnet2/test/parse.py index 6cdf1420bd3d3e7d14f5add67c57cfe2ad399407..64b47e3cff99e9e20539ae0c03b95d691d92aa1d 100644 --- a/ACL_PyTorch/contrib/audio/rawnet2/test/parse.py +++ b/ACL_PyTorch/contrib/audio/rawnet2/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 - print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 + print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) diff --git a/ACL_PyTorch/contrib/audio/tdnn/LICENSE b/ACL_PyTorch/contrib/audio/tdnn/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/audio/tdnn/LICENSE +++ b/ACL_PyTorch/contrib/audio/tdnn/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/audio/tdnn/ReadMe.md b/ACL_PyTorch/contrib/audio/tdnn/ReadMe.md index 4ee27e8c1136c62145ace0b59b84ac8b43ed39e8..95f84433d2494ac8294e0dc04c48de9fd72c51c8 100644 --- a/ACL_PyTorch/contrib/audio/tdnn/ReadMe.md +++ b/ACL_PyTorch/contrib/audio/tdnn/ReadMe.md @@ -1,276 +1,276 @@ -# Tdnn Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理精度统计](#61-离线推理精度统计) - - [6.2 精度对比](#62-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - [7.2 T4性能数据](#72-T4性能数据) - - [7.3 性能对比](#73-性能对比) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -华为npu模型训练组依据客户给的模型进行训练所得,无参考论文 - -### 1.2 代码地址 -[Tdnn代码]https://gitee.com/ascend/modelzoo.git) -branch:master -commit_id= -code_path= - - - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -pytorch = 昇腾1.5.0 -torchaudio = 0.5.0 -``` -### 2.2 python第三方库 -``` -speechbrain = 0.5.9 -onnx == 1.7.0 -onnxruntime-gpu == 1.8.1 -sox == 14.4.0 -``` -1.gpu环境搭建 -在安装了Anaconda或Miniconda的gpu环境下,首先执行命令 -``` -conda create -n tdnn python==3.7 -``` -创建名为tdnn的虚拟环境,再执行命令 -``` -conda activate tdnn -``` -进入该环境,先后执行下面两条命令 -``` -pip3 install torch==1.5.0 -pip3 install torchaudio==0.5.0 -``` -安装pytorch及torchaudio,再前往 -``` -https://codeload.github.com/speechbrain/speechbrain/zip/refs/tags/v0.5.9 -``` -下载speechbrain 0.5.9源码包,解压并进入speechbrain项目根目录,在speechbrain/speechbrain文件夹下找到requirement.txt,删除torch和torchaudio对应行,然后执行 -``` -pip install -r requirements.txt -pip install --editable . -``` -完成speechbrain安装,接着执行 -``` -pip install onnx==1.7.0 -``` -安装ONNX,执行 -``` -pip install onnxruntime-gpu==1.7.0 -``` -安装onnxruntime-gpu - -2.npu环境搭建 -在安装了Anaconda或Miniconda的npu环境下,首先执行命令 -``` -conda create -n tdnn python==3.7 -``` -创建名为tdnn的虚拟环境,再执行命令 -``` -conda activate tdnn -``` -进入该环境,先执行 -``` -pip3 install torch==1.5.0 -``` -安装pytorch,再尝试执行 -``` -pip3 install torchaudio==0.5.0 -``` -安装torchaudio,确保安装成功后再前往 -``` -https://codeload.github.com/speechbrain/speechbrain/zip/refs/tags/v0.5.9 -``` -下载speechbrain 0.5.9源码包,解压并进入speechbrain项目根目录,在speechbrain/speechbrain文件夹下找到requirement.txt,删除torch和torchaudio对应行,然后执行 -``` -pip install -r requirements.txt -pip install --editable . -``` -完成speechbrain安装 - -**说明:** -> 如果torchaudio安装失败,或者安装之后出现读取.flac文件报错的情况, -> 请前往https://e.gitee.com/HUAWEI-ASCEND/notifications/infos?issue=I48AZM -> 按步骤完成sox 14.4.0和torchaudio 0.5.0安装,再安装speechbrain - -## 3 模型转换 - -本模型基于开源框架PyTorch训练的TDNN模型进行转换。 -首先使用PyTorch将模型权重文件tdnn.pth转换为tdnn.onnx文件,再使用ATC工具将tdnn.onnx文件转为tdnn.om文件。 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 -以下步骤均在gpu环境完成。 -1.转换前的代码处理 -在上一步安装的speechbrain文件夹中,进入文件夹speechbrain/nnet,找到CNN.py文件,将第349行修改为 -``` -padding_mode='constant' -``` -再进入文件夹speechbrain/pretrained,用本仓库的interfaces.py替换该目录下的同名文件 - -2.获取pth权重文件 -权重文件由华为npu模型训练组通过训练TDNN模型得到。 -在speechbrain/templates/speaker_id目录下新建文件夹best_model,将训练保存的模型文件及本仓库提供的hyperparams.yaml文件一并放到best_model。 -best_model文件夹下应包含以下文件: -``` -classifier.ckpt -hyperparams.yaml -embedding_model.ckpt -``` - -3.生成onnx模型文件 -将Tdnn_pth2onnx.py脚本放到speechbrain/templates/speaker_id目录下,并在该目录下执行 -``` -python Tdnn_pth2onnx.py -``` -运行成功后,该目录下将生成tdnn.onnx文件 - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - - -### 3.2 onnx转om模型 - -以下步骤在npu环境进行。 -1.生成om模型文件 -将atc.sh脚本放到speechbrain/templates/speaker_id目录下,并在该目录下执行 -``` -bash atc.sh tdnn.onnx tdnn -``` -运行成功后,将生成tdnn.om模型 - - - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -用户将自行获取的数据集上传到speechbrain/templates/speaker_id目录 - -### 4.2 数据集预处理 -进入speechbrain/templates/speaker_id目录,将mini_librispeech_prepare.py文件的第174行代码random.shuffle(wav_list)注释掉,然后在该目录下执行 -``` -python Tdnn_preprocess.py -``` -预处理后的数据集在新生成的目录mini_librispeech_test_bin中 -### 4.3 生成数据集信息文件 -上一步的数据集预处理会同步生成一个文件mini_librispeech_test.info,该文件即数据集信息文件 - - - -## 5 离线推理 -1.设置离线推理环境 -将acl_net.py, pyacl_infer.py, om_infer.sh三个文件放到speechbrain/templates/speaker_id目录下 -2.执行pyacl离线推理 -在speechbrain/templates/speaker_id目录下执行 -``` -bash om_infer.sh -``` -推理结果将会输出到result目录下 - - - -## 6 精度对比 -- **[离线推理精度](#61-离线推理精度)** -- **[精度对比](#62-精度对比)** - -### 6.1 离线推理精度统计 - -将Tdnn_postprocess.py文件放到speechbrain/templates/speaker_id目录下,并在该目录下执行 -``` -python Tdnn_postprocess.py -``` -精度统计结果将直接输出到控制台 - -### 6.2 精度对比 -pth模型精度99.10%,om模型精度98.69%,模型转换后精度损失不超过1% - **精度调试:** - ->模型转换后精度损失不超过1%,精度达标,故不需要进行精度调试 - - - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** -- **[T4性能数据](#72-T4性能数据)** -- **[性能对比](#73-性能对比)** - -### 7.1 npu性能数据 -该模型不支持benchmark推理,故使用pyacl离线推理获取npu性能数据,npu性能数据为 -``` -average pure infer time(ms):10.24 -``` -Interface throughputRate: 1000/10.24 = 97.37 -310单卡吞吐率:1000/10.54*16 = 1562.50 fps - -### 7.2 T4性能数据 -1.搭建环境 -在T4服务器搭建onnx模型推理环境,然后新建test文件夹,将下列文件上传至该文件夹 -``` -tdnn.onnx文件 -Tdnn_onnx_infer.py文件 -speechbrain/templates/speaker_id目录下的mini_librispeech_test.info文件 -speechbrain/templates/speaker_id目录下的mini_librispeech_bin文件夹及其全部文件 -``` -2.执行在线推理 -在test目录下执行 -``` -python Tdnn_onnx_infer.py -``` -性能数据将会输出到gpu_result目录下 -T4性能基线数据为 -``` -average pure infer time(ms): 12.98 -``` -T4单卡吞吐率:1000/12.98*16 = 1232.67 fps - -### 7.3 性能对比 -单卡吞吐率 -``` -npu-310:1562.50 fps -gpu-t4 :1232.67 fps -``` -310性能高于T4性能,性能达标 - - **性能优化:** ->该模型性能优于T4,不用进行优化 +# Tdnn Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理精度统计](#61-离线推理精度统计) + - [6.2 精度对比](#62-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + - [7.2 T4性能数据](#72-T4性能数据) + - [7.3 性能对比](#73-性能对比) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +华为npu模型训练组依据客户给的模型进行训练所得,无参考论文 + +### 1.2 代码地址 +[Tdnn代码]https://gitee.com/ascend/modelzoo.git) +branch:master +commit_id= +code_path= + + + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +pytorch = 昇腾1.5.0 +torchaudio = 0.5.0 +``` +### 2.2 python第三方库 +``` +speechbrain = 0.5.9 +onnx == 1.7.0 +onnxruntime-gpu == 1.8.1 +sox == 14.4.0 +``` +1.gpu环境搭建 +在安装了Anaconda或Miniconda的gpu环境下,首先执行命令 +``` +conda create -n tdnn python==3.7 +``` +创建名为tdnn的虚拟环境,再执行命令 +``` +conda activate tdnn +``` +进入该环境,先后执行下面两条命令 +``` +pip3 install torch==1.5.0 +pip3 install torchaudio==0.5.0 +``` +安装pytorch及torchaudio,再前往 +``` +https://codeload.github.com/speechbrain/speechbrain/zip/refs/tags/v0.5.9 +``` +下载speechbrain 0.5.9源码包,解压并进入speechbrain项目根目录,在speechbrain/speechbrain文件夹下找到requirement.txt,删除torch和torchaudio对应行,然后执行 +``` +pip install -r requirements.txt +pip install --editable . +``` +完成speechbrain安装,接着执行 +``` +pip install onnx==1.7.0 +``` +安装ONNX,执行 +``` +pip install onnxruntime-gpu==1.7.0 +``` +安装onnxruntime-gpu + +2.npu环境搭建 +在安装了Anaconda或Miniconda的npu环境下,首先执行命令 +``` +conda create -n tdnn python==3.7 +``` +创建名为tdnn的虚拟环境,再执行命令 +``` +conda activate tdnn +``` +进入该环境,先执行 +``` +pip3 install torch==1.5.0 +``` +安装pytorch,再尝试执行 +``` +pip3 install torchaudio==0.5.0 +``` +安装torchaudio,确保安装成功后再前往 +``` +https://codeload.github.com/speechbrain/speechbrain/zip/refs/tags/v0.5.9 +``` +下载speechbrain 0.5.9源码包,解压并进入speechbrain项目根目录,在speechbrain/speechbrain文件夹下找到requirement.txt,删除torch和torchaudio对应行,然后执行 +``` +pip install -r requirements.txt +pip install --editable . +``` +完成speechbrain安装 + +**说明:** +> 如果torchaudio安装失败,或者安装之后出现读取.flac文件报错的情况, +> 请前往https://e.gitee.com/HUAWEI-ASCEND/notifications/infos?issue=I48AZM +> 按步骤完成sox 14.4.0和torchaudio 0.5.0安装,再安装speechbrain + +## 3 模型转换 + +本模型基于开源框架PyTorch训练的TDNN模型进行转换。 +首先使用PyTorch将模型权重文件tdnn.pth转换为tdnn.onnx文件,再使用ATC工具将tdnn.onnx文件转为tdnn.om文件。 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 +以下步骤均在gpu环境完成。 +1.转换前的代码处理 +在上一步安装的speechbrain文件夹中,进入文件夹speechbrain/nnet,找到CNN.py文件,将第349行修改为 +``` +padding_mode='constant' +``` +再进入文件夹speechbrain/pretrained,用本仓库的interfaces.py替换该目录下的同名文件 + +2.获取pth权重文件 +权重文件由华为npu模型训练组通过训练TDNN模型得到。 +在speechbrain/templates/speaker_id目录下新建文件夹best_model,将训练保存的模型文件及本仓库提供的hyperparams.yaml文件一并放到best_model。 +best_model文件夹下应包含以下文件: +``` +classifier.ckpt +hyperparams.yaml +embedding_model.ckpt +``` + +3.生成onnx模型文件 +将Tdnn_pth2onnx.py脚本放到speechbrain/templates/speaker_id目录下,并在该目录下执行 +``` +python Tdnn_pth2onnx.py +``` +运行成功后,该目录下将生成tdnn.onnx文件 + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + + +### 3.2 onnx转om模型 + +以下步骤在npu环境进行。 +1.生成om模型文件 +将atc.sh脚本放到speechbrain/templates/speaker_id目录下,并在该目录下执行 +``` +bash atc.sh tdnn.onnx tdnn +``` +运行成功后,将生成tdnn.om模型 + + + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +用户将自行获取的数据集上传到speechbrain/templates/speaker_id目录 + +### 4.2 数据集预处理 +进入speechbrain/templates/speaker_id目录,将mini_librispeech_prepare.py文件的第174行代码random.shuffle(wav_list)注释掉,然后在该目录下执行 +``` +python Tdnn_preprocess.py +``` +预处理后的数据集在新生成的目录mini_librispeech_test_bin中 +### 4.3 生成数据集信息文件 +上一步的数据集预处理会同步生成一个文件mini_librispeech_test.info,该文件即数据集信息文件 + + + +## 5 离线推理 +1.设置离线推理环境 +将acl_net.py, pyacl_infer.py, om_infer.sh三个文件放到speechbrain/templates/speaker_id目录下 +2.执行pyacl离线推理 +在speechbrain/templates/speaker_id目录下执行 +``` +bash om_infer.sh +``` +推理结果将会输出到result目录下 + + + +## 6 精度对比 +- **[离线推理精度](#61-离线推理精度)** +- **[精度对比](#62-精度对比)** + +### 6.1 离线推理精度统计 + +将Tdnn_postprocess.py文件放到speechbrain/templates/speaker_id目录下,并在该目录下执行 +``` +python Tdnn_postprocess.py +``` +精度统计结果将直接输出到控制台 + +### 6.2 精度对比 +pth模型精度99.10%,om模型精度98.69%,模型转换后精度损失不超过1% + **精度调试:** + +>模型转换后精度损失不超过1%,精度达标,故不需要进行精度调试 + + + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** +- **[T4性能数据](#72-T4性能数据)** +- **[性能对比](#73-性能对比)** + +### 7.1 npu性能数据 +该模型不支持benchmark推理,故使用pyacl离线推理获取npu性能数据,npu性能数据为 +``` +average pure infer time(ms):10.24 +``` +Interface throughputRate: 1000/10.24 = 97.37 +310单卡吞吐率:1000/10.54*16 = 1562.50 fps + +### 7.2 T4性能数据 +1.搭建环境 +在T4服务器搭建onnx模型推理环境,然后新建test文件夹,将下列文件上传至该文件夹 +``` +tdnn.onnx文件 +Tdnn_onnx_infer.py文件 +speechbrain/templates/speaker_id目录下的mini_librispeech_test.info文件 +speechbrain/templates/speaker_id目录下的mini_librispeech_bin文件夹及其全部文件 +``` +2.执行在线推理 +在test目录下执行 +``` +python Tdnn_onnx_infer.py +``` +性能数据将会输出到gpu_result目录下 +T4性能基线数据为 +``` +average pure infer time(ms): 12.98 +``` +T4单卡吞吐率:1000/12.98*16 = 1232.67 fps + +### 7.3 性能对比 +单卡吞吐率 +``` +npu-310:1562.50 fps +gpu-t4 :1232.67 fps +``` +310性能高于T4性能,性能达标 + + **性能优化:** +>该模型性能优于T4,不用进行优化 diff --git a/ACL_PyTorch/contrib/audio/tdnn/Tdnn_onnx_infer.py b/ACL_PyTorch/contrib/audio/tdnn/Tdnn_onnx_infer.py index 9e5bcbc80720bb24e81c6fb518c1758c1f7e83df..7b74337de0bd498bc371f8c1445262cfdae3876b 100644 --- a/ACL_PyTorch/contrib/audio/tdnn/Tdnn_onnx_infer.py +++ b/ACL_PyTorch/contrib/audio/tdnn/Tdnn_onnx_infer.py @@ -1,125 +1,125 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import time -import torch -import onnxruntime # WARNING: there must be onnxruntime-gpu only in the running environment ! -import torch.nn as nn -import numpy as np -from tqdm import tqdm -import datetime -import torchaudio - -# step 0: set the running settings here (mainly the file path) -'''-----------------------------------------------------------------------------------''' -model_path = 'tdnn.onnx' # path of the onnx model -input_info_file_path = 'mini_librispeech_test.info' # path of the input_info file -batchsize = 16 # the tested batchsize - -# path of the infer result ( actually the infer time ), create if not exists -infer_res_save_path = './gpu_result' -if not(os.path.exists(infer_res_save_path)): - os.makedirs(infer_res_save_path) - -# original 'MeasureTime' copied from acl_net.py, which is used in pyacl_infer.py -class MeasureTime(): - def __init__(self, measurements, key, cpu_run=True): - self.measurements = measurements - self.key = key - self.cpu_run = cpu_run - - def __enter__(self): - if not self.cpu_run: - torch.cuda.synchronize() - self.t0 = time.perf_counter_ns() - - def __exit__(self, exc_type, exc_value, exc_traceback): - if not self.cpu_run: - torch.cuda.synchronize() - self.measurements[self.key] = time.perf_counter_ns() - self.t0 -'''-----------------------------------------------------------------------------------''' - -# step 1: get the input file path according to the input_info file -'''-----------------------------------------------------------------------------------''' -input_file_path = {} -with open(input_info_file_path, 'rt', encoding='utf-8') as f_info: - line = f_info.readline() - while line: - line = line.rstrip('\n') - contents = line.split() - info = {'path': contents[1], 'shape': eval(contents[2])} - input_file_path.setdefault(contents[0], []).append(info) - line = f_info.readline() -'''-----------------------------------------------------------------------------------''' - -# step 2: perform infer for files listed in input_file_path -'''-----------------------------------------------------------------------------------''' -if __name__ == '__main__': - # step 2.1: set the counters - total_infer_time = 0 - total_infer_time_workspace = 0 - total_infer_num = 0 - workspace = 10 - measurements = {} - key = 'per_infer_time_ns' - dim_1 = 1800 - cpu_run = False - - # step 2.2: load the model to the onnx running session - # WARNING: there must be onnxruntime-gpu only in the running environment! - # if cpu and gpu exist at the same time, it will get wrong. - onnx_run_sess = onnxruntime.InferenceSession(model_path) - - # step 2.3: for each input file, load it and perform the infer - for key, values in tqdm(input_file_path.items()): - # step 2.3.1: load the input data - inputs = [] - dims = [] # dims and dims_info is actually unused here - for idx, value in enumerate(values): - x = np.fromfile(value['path'], dtype=np.float32).reshape(value['shape']) - inputs.append(x) - dims.extend(value['shape']) - dims_info = {'dimCount': len(dims), 'name': '', 'dims': dims} - inputs = torch.tensor(np.array(inputs).squeeze(axis = 0)) - pad = dim_1 - inputs.shape[1] - inputs = torch.nn.functional.pad(inputs, (0,0,0,pad,0,0), value=0).numpy() - - # step 2.3.2: perform the infer - with MeasureTime(measurements, key, cpu_run): - _ = onnx_run_sess.run(None, {onnx_run_sess.get_inputs()[0].name:inputs}) - total_infer_num += 1 - - # step 2.3.3: save the output => pass - - # step 2.3.4: calculate the time - total_infer_time += measurements[key] - if total_infer_num > workspace: - total_infer_time_workspace += measurements[key] - - # step 2.4: calculate the infer time needed - now = datetime.datetime.now() - print('[INFO] Infer time:') - msg = 'test at: ' + str(now) + '\n' + \ - 'total infer num: ' + str(total_infer_num) + '\n' + \ - 'total pure infer time(ms): ' + str(total_infer_time / 1000 / 1000) + '\n' + \ - 'average pure infer time(ms): ' + str(total_infer_time / total_infer_num / 1000 / 1000) + '\n' + \ - 'average pure infer time after workspace(ms): ' + str(abs( - total_infer_time_workspace / (total_infer_num - workspace) / 1000 / 1000)) + '\n' + '\n\n\n\n' - print(msg) - - result_txt='batch_' + str(batchsize) + '_infer_time.txt' - with open(os.path.join(infer_res_save_path, result_txt), 'a', encoding='utf-8') as f_infer_time: - f_infer_time.write(msg) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +import torch +import onnxruntime # WARNING: there must be onnxruntime-gpu only in the running environment ! +import torch.nn as nn +import numpy as np +from tqdm import tqdm +import datetime +import torchaudio + +# step 0: set the running settings here (mainly the file path) +'''-----------------------------------------------------------------------------------''' +model_path = 'tdnn.onnx' # path of the onnx model +input_info_file_path = 'mini_librispeech_test.info' # path of the input_info file +batchsize = 16 # the tested batchsize + +# path of the infer result ( actually the infer time ), create if not exists +infer_res_save_path = './gpu_result' +if not(os.path.exists(infer_res_save_path)): + os.makedirs(infer_res_save_path) + +# original 'MeasureTime' copied from acl_net.py, which is used in pyacl_infer.py +class MeasureTime(): + def __init__(self, measurements, key, cpu_run=True): + self.measurements = measurements + self.key = key + self.cpu_run = cpu_run + + def __enter__(self): + if not self.cpu_run: + torch.cuda.synchronize() + self.t0 = time.perf_counter_ns() + + def __exit__(self, exc_type, exc_value, exc_traceback): + if not self.cpu_run: + torch.cuda.synchronize() + self.measurements[self.key] = time.perf_counter_ns() - self.t0 +'''-----------------------------------------------------------------------------------''' + +# step 1: get the input file path according to the input_info file +'''-----------------------------------------------------------------------------------''' +input_file_path = {} +with open(input_info_file_path, 'rt', encoding='utf-8') as f_info: + line = f_info.readline() + while line: + line = line.rstrip('\n') + contents = line.split() + info = {'path': contents[1], 'shape': eval(contents[2])} + input_file_path.setdefault(contents[0], []).append(info) + line = f_info.readline() +'''-----------------------------------------------------------------------------------''' + +# step 2: perform infer for files listed in input_file_path +'''-----------------------------------------------------------------------------------''' +if __name__ == '__main__': + # step 2.1: set the counters + total_infer_time = 0 + total_infer_time_workspace = 0 + total_infer_num = 0 + workspace = 10 + measurements = {} + key = 'per_infer_time_ns' + dim_1 = 1800 + cpu_run = False + + # step 2.2: load the model to the onnx running session + # WARNING: there must be onnxruntime-gpu only in the running environment! + # if cpu and gpu exist at the same time, it will get wrong. + onnx_run_sess = onnxruntime.InferenceSession(model_path) + + # step 2.3: for each input file, load it and perform the infer + for key, values in tqdm(input_file_path.items()): + # step 2.3.1: load the input data + inputs = [] + dims = [] # dims and dims_info is actually unused here + for idx, value in enumerate(values): + x = np.fromfile(value['path'], dtype=np.float32).reshape(value['shape']) + inputs.append(x) + dims.extend(value['shape']) + dims_info = {'dimCount': len(dims), 'name': '', 'dims': dims} + inputs = torch.tensor(np.array(inputs).squeeze(axis = 0)) + pad = dim_1 - inputs.shape[1] + inputs = torch.nn.functional.pad(inputs, (0,0,0,pad,0,0), value=0).numpy() + + # step 2.3.2: perform the infer + with MeasureTime(measurements, key, cpu_run): + _ = onnx_run_sess.run(None, {onnx_run_sess.get_inputs()[0].name:inputs}) + total_infer_num += 1 + + # step 2.3.3: save the output => pass + + # step 2.3.4: calculate the time + total_infer_time += measurements[key] + if total_infer_num > workspace: + total_infer_time_workspace += measurements[key] + + # step 2.4: calculate the infer time needed + now = datetime.datetime.now() + print('[INFO] Infer time:') + msg = 'test at: ' + str(now) + '\n' + \ + 'total infer num: ' + str(total_infer_num) + '\n' + \ + 'total pure infer time(ms): ' + str(total_infer_time / 1000 / 1000) + '\n' + \ + 'average pure infer time(ms): ' + str(total_infer_time / total_infer_num / 1000 / 1000) + '\n' + \ + 'average pure infer time after workspace(ms): ' + str(abs( + total_infer_time_workspace / (total_infer_num - workspace) / 1000 / 1000)) + '\n' + '\n\n\n\n' + print(msg) + + result_txt='batch_' + str(batchsize) + '_infer_time.txt' + with open(os.path.join(infer_res_save_path, result_txt), 'a', encoding='utf-8') as f_infer_time: + f_infer_time.write(msg) '''-----------------------------------------------------------------------------------''' \ No newline at end of file diff --git a/ACL_PyTorch/contrib/audio/tdnn/Tdnn_postprocess.py b/ACL_PyTorch/contrib/audio/tdnn/Tdnn_postprocess.py index 027a48319704a0cca294ab879d01b3c37b0674f9..411a53eeefcd2a320ff6785389f61997f1b2ad78 100644 --- a/ACL_PyTorch/contrib/audio/tdnn/Tdnn_postprocess.py +++ b/ACL_PyTorch/contrib/audio/tdnn/Tdnn_postprocess.py @@ -1,58 +1,58 @@ -# -*- coding: utf-8 -*- -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import re -import argparse -import numpy as np -label = {0:'3526', 1:'7312', 2:'1088', 3:'32', 4:'460', 5:'7859', 6:'118', 7:'6848', 8:'8629', 9:'163', 10:'2416', 11:'3947', 12:'332', 13:'19', 14:'6272', 15:'7367', 16:'1898', 17:'3664', 18:'2136', 19:'4640', 20:'1867', 21:'1970', 22:'4680', 23:'226', 24:'5789', 25:'3242', 26:'667', 27:'1737'} - -if __name__ == '__main__': - ''' - 参数说明: - --data_info: 数据集信息 - --result_dir: 二进制推理结果目录 - ''' - - # arg parser - parser = argparse.ArgumentParser() - parser.add_argument('--data_info', default='mini_librispeech_test.info') - parser.add_argument('--result_dir', default='result') - - opt = parser.parse_args() - error = 0 - total = 0 - - with open('mini_librispeech_test.info', 'r') as f: - for line in f.readlines(): - # line format example - # 0 mini_librispeech_test_bin/4680-16042-0024.bin (1,1600,23) - split = line.split(' ') - index = split[0] - input_file = split[1] - target = re.search('/(\d*)-', input_file).group()[1:-1] - - # output result/index.0.bin => index range from 0 to 152 - output_file='result/'+index+'.0.bin' - - - output = np.fromfile(output_file, np.float32) - res = np.argmax(output) - print('Predicted:', label[res], 'Target:', target) - total += 1 - if label[res] != target: - error += 1 - accuracy = float(total - error) / total * 100 - print('\nClassification Accuracy: {:.2f}%\n'.format(accuracy)) +# -*- coding: utf-8 -*- +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import argparse +import numpy as np +label = {0:'3526', 1:'7312', 2:'1088', 3:'32', 4:'460', 5:'7859', 6:'118', 7:'6848', 8:'8629', 9:'163', 10:'2416', 11:'3947', 12:'332', 13:'19', 14:'6272', 15:'7367', 16:'1898', 17:'3664', 18:'2136', 19:'4640', 20:'1867', 21:'1970', 22:'4680', 23:'226', 24:'5789', 25:'3242', 26:'667', 27:'1737'} + +if __name__ == '__main__': + ''' + 参数说明: + --data_info: 数据集信息 + --result_dir: 二进制推理结果目录 + ''' + + # arg parser + parser = argparse.ArgumentParser() + parser.add_argument('--data_info', default='mini_librispeech_test.info') + parser.add_argument('--result_dir', default='result') + + opt = parser.parse_args() + error = 0 + total = 0 + + with open('mini_librispeech_test.info', 'r') as f: + for line in f.readlines(): + # line format example + # 0 mini_librispeech_test_bin/4680-16042-0024.bin (1,1600,23) + split = line.split(' ') + index = split[0] + input_file = split[1] + target = re.search('/(\d*)-', input_file).group()[1:-1] + + # output result/index.0.bin => index range from 0 to 152 + output_file='result/'+index+'.0.bin' + + + output = np.fromfile(output_file, np.float32) + res = np.argmax(output) + print('Predicted:', label[res], 'Target:', target) + total += 1 + if label[res] != target: + error += 1 + accuracy = float(total - error) / total * 100 + print('\nClassification Accuracy: {:.2f}%\n'.format(accuracy)) diff --git a/ACL_PyTorch/contrib/audio/tdnn/Tdnn_preprocess.py b/ACL_PyTorch/contrib/audio/tdnn/Tdnn_preprocess.py index dcaa76a0e84e9ad2a27781d71f371cbd4b478d57..855ec5fa537a458e96f77d43a7915a34283a0655 100644 --- a/ACL_PyTorch/contrib/audio/tdnn/Tdnn_preprocess.py +++ b/ACL_PyTorch/contrib/audio/tdnn/Tdnn_preprocess.py @@ -1,52 +1,52 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import json -import torchaudio -import torch.nn.functional as F -from speechbrain.pretrained import EncoderClassifier -from mini_librispeech_prepare import prepare_mini_librispeech - -prepare_mini_librispeech(data_folder='data', save_json_train='train.json', save_json_valid='valid.json', - save_json_test='test.json', split_ratio=[0, 0, 100]) - -if not os.path.exists('mini_librispeech_test_bin'): - os.makedirs('mini_librispeech_test_bin') - -file = open('mini_librispeech_test.info', 'w') -classifier = EncoderClassifier.from_hparams(source='best_model', savedir='best_model') - -with open('test.json', 'r') as f: - data_info = json.load(f) - i = 0 - - for key, value in data_info.items(): - wav_file = 'data' + value['wav'][11:] # prefix length 11 - signal, fs = torchaudio.load(wav_file) - feats = classifier.extract_feats(signal) - # pad signal - pad = (feats.shape[1] // 100 + 1) * 100 - feats.shape[1] - feats = F.pad(feats, (0,0,0,pad,0,0), value=0) - - # dump bin file - output = 'mini_librispeech_test_bin/' + value['wav'].split('/')[-1][:-4] + 'bin' - feats.numpy().tofile(output) - # write shape info - file.write(str(i) + ' ' + output + ' (' + str(feats.shape[0]) + ',' + str(feats.shape[1]) + ',' + str(feats.shape[2]) + ')') - file.write('\n') - i += 1 - - print('data preprocess done') +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json +import torchaudio +import torch.nn.functional as F +from speechbrain.pretrained import EncoderClassifier +from mini_librispeech_prepare import prepare_mini_librispeech + +prepare_mini_librispeech(data_folder='data', save_json_train='train.json', save_json_valid='valid.json', + save_json_test='test.json', split_ratio=[0, 0, 100]) + +if not os.path.exists('mini_librispeech_test_bin'): + os.makedirs('mini_librispeech_test_bin') + +file = open('mini_librispeech_test.info', 'w') +classifier = EncoderClassifier.from_hparams(source='best_model', savedir='best_model') + +with open('test.json', 'r') as f: + data_info = json.load(f) + i = 0 + + for key, value in data_info.items(): + wav_file = 'data' + value['wav'][11:] # prefix length 11 + signal, fs = torchaudio.load(wav_file) + feats = classifier.extract_feats(signal) + # pad signal + pad = (feats.shape[1] // 100 + 1) * 100 - feats.shape[1] + feats = F.pad(feats, (0,0,0,pad,0,0), value=0) + + # dump bin file + output = 'mini_librispeech_test_bin/' + value['wav'].split('/')[-1][:-4] + 'bin' + feats.numpy().tofile(output) + # write shape info + file.write(str(i) + ' ' + output + ' (' + str(feats.shape[0]) + ',' + str(feats.shape[1]) + ',' + str(feats.shape[2]) + ')') + file.write('\n') + i += 1 + + print('data preprocess done') file.close() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/audio/tdnn/Tdnn_pth2onnx.py b/ACL_PyTorch/contrib/audio/tdnn/Tdnn_pth2onnx.py index 4d6b3b31e4c019a1b3f84c33d651344cb58f5047..f9632d4ecd9c7c619cbe9ca44aa3cca6efb55a8d 100644 --- a/ACL_PyTorch/contrib/audio/tdnn/Tdnn_pth2onnx.py +++ b/ACL_PyTorch/contrib/audio/tdnn/Tdnn_pth2onnx.py @@ -1,44 +1,44 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torchaudio -from speechbrain.pretrained.interfaces import EncoderClassifier - -classifier = EncoderClassifier.from_hparams(source='best_model', savedir='best_model') - -# Download Thai language sample from Omniglot -class Xvector(torch.nn.Module): - def __init__(self, model): - super().__init__() - self.classifier = model - - def forward(self, feats): - res = self.classifier.feats_classify(feats) - return res - -model = Xvector(classifier) -feats = torch.randn([1, 1800, 23]) - -torch.onnx.export( - model, - feats, - 'tdnn.onnx', - input_names=['feats'], - output_names=['output'], - export_params=True, - do_constant_folding=True, - verbose=True, - opset_version=11 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torchaudio +from speechbrain.pretrained.interfaces import EncoderClassifier + +classifier = EncoderClassifier.from_hparams(source='best_model', savedir='best_model') + +# Download Thai language sample from Omniglot +class Xvector(torch.nn.Module): + def __init__(self, model): + super().__init__() + self.classifier = model + + def forward(self, feats): + res = self.classifier.feats_classify(feats) + return res + +model = Xvector(classifier) +feats = torch.randn([1, 1800, 23]) + +torch.onnx.export( + model, + feats, + 'tdnn.onnx', + input_names=['feats'], + output_names=['output'], + export_params=True, + do_constant_folding=True, + verbose=True, + opset_version=11 ) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/audio/tdnn/Tdnn_pyacl_infer.py b/ACL_PyTorch/contrib/audio/tdnn/Tdnn_pyacl_infer.py index fa36db8401ebb27d38477ee2655fef9860846e15..d488b0c10a14e7ada3a6fff092d8b66527475ab0 100644 --- a/ACL_PyTorch/contrib/audio/tdnn/Tdnn_pyacl_infer.py +++ b/ACL_PyTorch/contrib/audio/tdnn/Tdnn_pyacl_infer.py @@ -1,176 +1,176 @@ -# Copyright 2018 NVIDIA Corporation. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import acl -from acl_net import AclModel - - -import os -import shutil - - -import argparse -import numpy as np -from tqdm import tqdm - -DTYPE = { - 'float32': np.float32, - 'float64': np.float64, - 'int32': np.int32, - 'int64': np.int64 -} - -if __name__ == '__main__': - ''' - 参数说明: - --model_path:模型路径 - --device_id:npu id - --cpu_run:MeasureTime类的cpu_run参数,True or False - --sync_infer:推理方式: - True:同步推理 - False:异步推理 - --workspace:类似TensorRT `workspace`参数,计算平均推理时间时排除前n次推理 - --input_info_file_path:类似benchmark的bin_info文件 - --input_dtypes:模型输入的类型,用逗号分割(`DTYPE`变量) - e.g. 模型只有一个输入:--input_dtypes=float32 - e.g. 模型有多个输入:--input_dtypes=float32,float32,float32(需要和bin_info文件多输入排列一致) - --infer_res_save_path:推理结果保存目录 - --res_save_type:推理结果保存类型,bin或npy - - info文件说明: - 因为支持动态shape,相比于benchmark的info文件,需要多加一列shape信息,e.g. - ``` - 0 ./bert_bin/input_ids_0.bin (1,512) - 0 ./bert_bin/segment_ids_0.bin (1,512) - 0 ./bert_bin/input_mask_0.bin (1,512) - 1 ./bert_bin/input_ids_1.bin (1,512) - 1 ./bert_bin/segment_ids_1.bin (1,512) - 1 ./bert_bin/input_mask_1.bin (1,512) - ``` - - Using Example: - python3.7 pyacl_infer.py \ - --model_path=./bert_base_batch_1_sim_auto.om \ - --device_id=0 \ - --cpu_run=True \ - --sync_infer=True \ - --workspace=10 \ - --input_info_file_path=./input.info \ - --input_dtypes=int64,int64,int64 \ - --infer_res_save_path=./infer_res \ - --res_save_type=bin - ''' - - # 参数解析 - parser = argparse.ArgumentParser() - parser.add_argument('--model_path', required=True) - parser.add_argument('--device_id', required=True, type=int) - parser.add_argument('--cpu_run', required=True, choices=['True', 'False']) - parser.add_argument('--sync_infer', required=True, choices=['True', 'False']) - parser.add_argument('--workspace', required=True, type=int) - parser.add_argument('--input_info_file_path', required=True) - parser.add_argument('--input_dtypes', required=True) - parser.add_argument('--infer_res_save_path', required=True) - parser.add_argument('--res_save_type', required=True, choices=['bin', 'npy']) - opt = parser.parse_args() - - - # 创建模型 - measurements = {} - om_model = AclModel(device_id=opt.device_id, - model_path=opt.model_path, - sync_infer=eval(opt.sync_infer), - measurements=measurements, - key='per_infer_time_ns', - cpu_run=eval(opt.cpu_run)) - - - # 创建目录 - if os.path.exists(opt.infer_res_save_path): - shutil.rmtree(opt.infer_res_save_path) - os.makedirs(opt.infer_res_save_path) - - - # 读取info_file - inputs_info = {} - with open(opt.input_info_file_path, 'rt', encoding='utf-8') as f_info: - line = f_info.readline() - while line: - line = line.rstrip('\n') - contents = line.split() - info = {'path': contents[1], 'shape': eval(contents[2])} - inputs_info.setdefault(contents[0], []).append(info) - line = f_info.readline() - - - # 解析输入类型 - input_dtypes = opt.input_dtypes.split(',') - input_dtypes = list(map(lambda x: DTYPE[x], input_dtypes)) - - - # 读取文件推理 - total_infer_time = 0 - total_infer_time_workspace = 0 - total_infer_num = 0 - for key, values in tqdm(inputs_info.items()): - # 构造输入 - inputs = [] - dims = [] - for idx, value in enumerate(values): - x = np.fromfile(value['path'], dtype=input_dtypes[idx]).reshape(value['shape']) - inputs.append(x) - dims.extend(value['shape']) - dims_info = {'dimCount': len(dims), 'name': '', 'dims': dims} - - # 推理得到输出 - output = om_model(inputs, dims_info) - total_infer_num += 1 - - # 保存文件 - if opt.res_save_type == 'bin': - for idx, data in enumerate(output): - data.tofile(os.path.join(opt.infer_res_save_path, key + '.' + str(idx) + '.bin')) - else: - for idx, data in enumerate(output): - np.save(os.path.join(opt.infer_res_save_path, key + '.' + str(idx) + '.npy'), data) - - # 计算时间 - total_infer_time += measurements['per_infer_time_ns'] - if total_infer_num > opt.workspace: - total_infer_time_workspace += measurements['per_infer_time_ns'] - - # 推理时间 - print('[INFO] Infer time:') - msg = 'total infer num: ' + str(total_infer_num) + '\n' + \ - 'total pure infer time(ms): ' + str(total_infer_time / 1000 / 1000) + '\n' + \ - 'average pure infer time(ms): ' + str(total_infer_time / total_infer_num / 1000 / 1000) + '\n' + \ - 'average pure infer time after workspace(ms): ' + str(abs( - total_infer_time_workspace / (total_infer_num - opt.workspace) / 1000 / 1000)) + '\n' - print(msg) - with open(os.path.join(opt.infer_res_save_path, 'infer_time.txt'), 'wt', encoding='utf-8') as f_infer_time: - f_infer_time.write(msg) +# Copyright 2018 NVIDIA Corporation. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import acl +from acl_net import AclModel + + +import os +import shutil + + +import argparse +import numpy as np +from tqdm import tqdm + +DTYPE = { + 'float32': np.float32, + 'float64': np.float64, + 'int32': np.int32, + 'int64': np.int64 +} + +if __name__ == '__main__': + ''' + 参数说明: + --model_path:模型路径 + --device_id:npu id + --cpu_run:MeasureTime类的cpu_run参数,True or False + --sync_infer:推理方式: + True:同步推理 + False:异步推理 + --workspace:类似TensorRT `workspace`参数,计算平均推理时间时排除前n次推理 + --input_info_file_path:类似benchmark的bin_info文件 + --input_dtypes:模型输入的类型,用逗号分割(`DTYPE`变量) + e.g. 模型只有一个输入:--input_dtypes=float32 + e.g. 模型有多个输入:--input_dtypes=float32,float32,float32(需要和bin_info文件多输入排列一致) + --infer_res_save_path:推理结果保存目录 + --res_save_type:推理结果保存类型,bin或npy + + info文件说明: + 因为支持动态shape,相比于benchmark的info文件,需要多加一列shape信息,e.g. + ``` + 0 ./bert_bin/input_ids_0.bin (1,512) + 0 ./bert_bin/segment_ids_0.bin (1,512) + 0 ./bert_bin/input_mask_0.bin (1,512) + 1 ./bert_bin/input_ids_1.bin (1,512) + 1 ./bert_bin/segment_ids_1.bin (1,512) + 1 ./bert_bin/input_mask_1.bin (1,512) + ``` + + Using Example: + python3.7 pyacl_infer.py \ + --model_path=./bert_base_batch_1_sim_auto.om \ + --device_id=0 \ + --cpu_run=True \ + --sync_infer=True \ + --workspace=10 \ + --input_info_file_path=./input.info \ + --input_dtypes=int64,int64,int64 \ + --infer_res_save_path=./infer_res \ + --res_save_type=bin + ''' + + # 参数解析 + parser = argparse.ArgumentParser() + parser.add_argument('--model_path', required=True) + parser.add_argument('--device_id', required=True, type=int) + parser.add_argument('--cpu_run', required=True, choices=['True', 'False']) + parser.add_argument('--sync_infer', required=True, choices=['True', 'False']) + parser.add_argument('--workspace', required=True, type=int) + parser.add_argument('--input_info_file_path', required=True) + parser.add_argument('--input_dtypes', required=True) + parser.add_argument('--infer_res_save_path', required=True) + parser.add_argument('--res_save_type', required=True, choices=['bin', 'npy']) + opt = parser.parse_args() + + + # 创建模型 + measurements = {} + om_model = AclModel(device_id=opt.device_id, + model_path=opt.model_path, + sync_infer=eval(opt.sync_infer), + measurements=measurements, + key='per_infer_time_ns', + cpu_run=eval(opt.cpu_run)) + + + # 创建目录 + if os.path.exists(opt.infer_res_save_path): + shutil.rmtree(opt.infer_res_save_path) + os.makedirs(opt.infer_res_save_path) + + + # 读取info_file + inputs_info = {} + with open(opt.input_info_file_path, 'rt', encoding='utf-8') as f_info: + line = f_info.readline() + while line: + line = line.rstrip('\n') + contents = line.split() + info = {'path': contents[1], 'shape': eval(contents[2])} + inputs_info.setdefault(contents[0], []).append(info) + line = f_info.readline() + + + # 解析输入类型 + input_dtypes = opt.input_dtypes.split(',') + input_dtypes = list(map(lambda x: DTYPE[x], input_dtypes)) + + + # 读取文件推理 + total_infer_time = 0 + total_infer_time_workspace = 0 + total_infer_num = 0 + for key, values in tqdm(inputs_info.items()): + # 构造输入 + inputs = [] + dims = [] + for idx, value in enumerate(values): + x = np.fromfile(value['path'], dtype=input_dtypes[idx]).reshape(value['shape']) + inputs.append(x) + dims.extend(value['shape']) + dims_info = {'dimCount': len(dims), 'name': '', 'dims': dims} + + # 推理得到输出 + output = om_model(inputs, dims_info) + total_infer_num += 1 + + # 保存文件 + if opt.res_save_type == 'bin': + for idx, data in enumerate(output): + data.tofile(os.path.join(opt.infer_res_save_path, key + '.' + str(idx) + '.bin')) + else: + for idx, data in enumerate(output): + np.save(os.path.join(opt.infer_res_save_path, key + '.' + str(idx) + '.npy'), data) + + # 计算时间 + total_infer_time += measurements['per_infer_time_ns'] + if total_infer_num > opt.workspace: + total_infer_time_workspace += measurements['per_infer_time_ns'] + + # 推理时间 + print('[INFO] Infer time:') + msg = 'total infer num: ' + str(total_infer_num) + '\n' + \ + 'total pure infer time(ms): ' + str(total_infer_time / 1000 / 1000) + '\n' + \ + 'average pure infer time(ms): ' + str(total_infer_time / total_infer_num / 1000 / 1000) + '\n' + \ + 'average pure infer time after workspace(ms): ' + str(abs( + total_infer_time_workspace / (total_infer_num - opt.workspace) / 1000 / 1000)) + '\n' + print(msg) + with open(os.path.join(opt.infer_res_save_path, 'infer_time.txt'), 'wt', encoding='utf-8') as f_infer_time: + f_infer_time.write(msg) diff --git a/ACL_PyTorch/contrib/audio/tdnn/acl_net.py b/ACL_PyTorch/contrib/audio/tdnn/acl_net.py index f3ec7a6b5b07758039d9861542fc09b445c17b07..d7f893b3f3e065c8eb084968173557e44595e5d3 100644 --- a/ACL_PyTorch/contrib/audio/tdnn/acl_net.py +++ b/ACL_PyTorch/contrib/audio/tdnn/acl_net.py @@ -1,276 +1,276 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import acl -import functools -import numpy as np -import torch -import time - -# error code -ACL_ERROR_NONE = 0 - -# rule for memory copy -ACL_MEMCPY_HOST_TO_HOST = 0 -ACL_MEMCPY_HOST_TO_DEVICE = 1 -ACL_MEMCPY_DEVICE_TO_HOST = 2 -ACL_MEMCPY_DEVICE_TO_DEVICE = 3 - -# dtype -ACL_DTYPE = { - 0: 'float32', - 1: 'float16', - 2: 'int8', - 3: 'int32', - 4: 'uint8', - 6: 'int16', - 7: 'uint16', - 8: 'uint32', - 9: 'int64', - 10: 'uint64', - 11: 'float64', - 12: 'bool', -} - - -def check_ret(message, ret): - if ret != ACL_ERROR_NONE: - raise Exception(f"{message} failed ret={ret}") - - -class MeasureTime(): - def __init__(self, measurements, key, cpu_run=True): - self.measurements = measurements - self.key = key - self.cpu_run = cpu_run - - def __enter__(self): - if not self.cpu_run: - torch.cuda.synchronize() - self.t0 = time.perf_counter_ns() - - def __exit__(self, exc_type, exc_value, exc_traceback): - if not self.cpu_run: - torch.cuda.synchronize() - self.measurements[self.key] = time.perf_counter_ns() - self.t0 - - -class AclModel(object): - def __init__(self, device_id, model_path, sync_infer, measurements, key, cpu_run): - self.device_id = device_id - self.sync_infer = sync_infer - self.out_bufs_ptr = [] - self.output_sizes = [] - self.input_sizes = [] - self.input_bufs_ptr = [] - - self.measurements = measurements - self.key = key - self.cpu_run = cpu_run - - ret = acl.init() - check_ret("acl.init", ret) - ret = acl.rt.set_device(self.device_id) - check_ret("acl.rt.set_device", ret) - self.context, ret = acl.rt.create_context(self.device_id) - check_ret("acl.rt.create_context", ret) - self.model_id, ret = acl.mdl.load_from_file(model_path) - check_ret("acl.mdl.load_from_file", ret) - - self.model_desc = acl.mdl.create_desc() - assert self.model_desc is not None - acl.mdl.get_desc(self.model_desc, self.model_id) - self.dataset_in = acl.mdl.create_dataset() - assert self.dataset_in is not None - self.dataset_out = acl.mdl.create_dataset() - assert self.dataset_out is not None - self.in_size, self.out_size = 0, 0 - self.stm, ret = acl.rt.create_stream() - assert ret == 0 - - self.desc_init() - self.dataset_init() - - def __call__(self, ori_data, dim): - return self.forward(ori_data, dim) - - def __del__(self): - # unload model - if self.model_id: - ret = acl.mdl.unload(self.model_id) - assert ret == 0 - - # destroy model desc - ret = acl.mdl.destroy_desc(self.model_desc) - assert ret == 0 - - self.destroy_data_set(self.dataset_in) - self.destroy_data_set(self.dataset_out) - - # destroy input/output tensor - for i in range(len(self.input_bufs_ptr)): - acl.rt.free(self.input_bufs_ptr[i]["buffer"]) - self.input_bufs_ptr[i] = None - - for i in range(len(self.out_bufs_ptr)): - acl.rt.free(self.out_bufs_ptr[i]["buffer"]) - self.out_bufs_ptr[i] = None - - ret = acl.rt.destroy_stream(self.stm) - assert ret == 0 - - def desc_init(self): - tensor_size = acl.mdl.get_num_inputs(self.model_desc) - if not tensor_size: - raise Exception("get_num_inputs failed") - self.in_size = tensor_size - - for i in range(tensor_size): - size = acl.mdl.get_input_size_by_index(self.model_desc, i) - data, ret = acl.rt.malloc(size, 0) - assert ret == 0 - - self.input_bufs_ptr.append({'size': size, 'buffer': data}) - self.input_sizes.append(size) - - tensor_size = acl.mdl.get_num_outputs(self.model_desc) - self.out_size = tensor_size - for i in range(tensor_size): - dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) - assert ret == 0 - size = acl.mdl.get_output_size_by_index(self.model_desc, i) - - data, ret = acl.rt.malloc(size, 0) - assert ret == 0 - - self.output_sizes.append(size) - self.out_bufs_ptr.append({'size': size, 'buffer': data}) - - def dataset_init(self): - self.create_data_set(self.dataset_in, self.input_bufs_ptr, self.input_sizes) - self.create_data_set(self.dataset_out, self.out_bufs_ptr, self.output_sizes) - - def create_data_set(self, dataset, bufs_ptr_list, size_list): - # create dataset buffer then add to dataset - for i in range(len(size_list)): - buffer = acl.create_data_buffer(bufs_ptr_list[i]["buffer"], size_list[i]) - if not buffer: - self.destroy_data_set(dataset) - raise Exception("create_data_buffer failed") - - # add to dataset - _, ret = acl.mdl.add_dataset_buffer(dataset, buffer) - if ret != 0: - self.destroy_data_set(dataset) - raise Exception("add_dataset_buffer failed, ret = {}".format(ret)) - - return dataset - - def destroy_data_set(self, dataset): - data_buf_num = acl.mdl.get_dataset_num_buffers(dataset) - for i in range(data_buf_num): - # get data buffer by index - data_buf = acl.mdl.get_dataset_buffer(dataset, i) - if data_buf is not None: - acl.destroy_data_buffer(data_buf) - - acl.mdl.destroy_dataset(dataset) - - def copy_data_to_device(self, data): - for i in range(len(data)): - ptr, np = acl.util.numpy_contiguous_to_ptr(data[i]["buffer"]) - acl.rt.memcpy(self.input_bufs_ptr[i]["buffer"], data[i]["size"], ptr, - data[i]["size"], ACL_MEMCPY_HOST_TO_DEVICE) - - def copy_output_to_host(self): - output_data = [] - for i in range(len(self.out_bufs_ptr)): - temp = dict() - temp["size"] = self.out_bufs_ptr[i]["size"] - temp["buffer"], ret = acl.rt.malloc_host(temp["size"]) - output_data.append(temp) - acl.rt.memcpy(temp["buffer"], temp["size"], self.out_bufs_ptr[i]["buffer"], - temp["size"], ACL_MEMCPY_DEVICE_TO_HOST) - - return output_data - - def model_exe(self): - with MeasureTime(self.measurements, self.key, self.cpu_run): - ret = acl.mdl.execute(self.model_id, self.dataset_in, self.dataset_out) - assert ret == 0 - output_data = self.copy_output_to_host() - dataset = [] - for i in range(len(output_data)): - dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) - data_shape = dims.get("dims") - data_type = acl.mdl.get_output_data_type(self.model_desc, i) - data_len = functools.reduce(lambda x, y: x * y, data_shape) - ftype = np.dtype(ACL_DTYPE.get(data_type)) - - size = output_data[i]["size"] - ptr = output_data[i]["buffer"] - data = acl.util.ptr_to_numpy(ptr, (size,), 1) - np_arr = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) - np_arr = np_arr.reshape(data_shape) - dataset.append(np_arr) - return dataset - - def model_exe_async(self): - with MeasureTime(self.measurements, self.key, self.cpu_run): - ret = acl.mdl.execute_async(self.model_id, self.dataset_in, self.dataset_out, self.stm) - assert ret == 0 - ret = acl.rt.synchronize_stream(self.stm) - assert ret == 0 - output_data = self.copy_output_to_host() - - dataset = [] - for i in range(len(output_data)): - dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) - # check_ret("acl.mdl.get_cur_output_dims", ret) - data_shape = dims.get("dims") - - data_type = acl.mdl.get_output_data_type(self.model_desc, i) - data_len = functools.reduce(lambda x, y: x * y, data_shape) - ftype = np.dtype(ACL_DTYPE.get(data_type)) - - size = output_data[i]["size"] - ptr = output_data[i]["buffer"] - data = acl.util.ptr_to_numpy(ptr, (size,), 1) - np_arr = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) - np_arr = np_arr.reshape(data_shape) - dataset.append(np_arr) - return dataset - - def model_exe_with_dynamic_dims(self, input_data, dims): - index, ret = acl.mdl.get_input_index_by_name(self.model_desc, 'ascend_mbatch_shape_data') - ret = acl.mdl.set_input_dynamic_dims(self.model_id, self.dataset_in, index, dims) - gear_count, ret = acl.mdl.get_input_dynamic_gear_count(self.model_desc, -1) - dims_out, ret = acl.mdl.get_input_dynamic_dims(self.model_desc, -1, gear_count) - self.copy_data_to_device(input_data) - if self.sync_infer is True: - res = self.model_exe() - else: - res = self.model_exe_async() - - return res - - def forward(self, input_data, dims): - input_data_dic = [] - for i in range(len(input_data)): - temp = {} - temp["size"] = input_data[i].size * input_data[i].itemsize - temp["buffer"] = input_data[i] - input_data_dic.append(temp) - result = self.model_exe_with_dynamic_dims(input_data_dic, dims) - return result +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import acl +import functools +import numpy as np +import torch +import time + +# error code +ACL_ERROR_NONE = 0 + +# rule for memory copy +ACL_MEMCPY_HOST_TO_HOST = 0 +ACL_MEMCPY_HOST_TO_DEVICE = 1 +ACL_MEMCPY_DEVICE_TO_HOST = 2 +ACL_MEMCPY_DEVICE_TO_DEVICE = 3 + +# dtype +ACL_DTYPE = { + 0: 'float32', + 1: 'float16', + 2: 'int8', + 3: 'int32', + 4: 'uint8', + 6: 'int16', + 7: 'uint16', + 8: 'uint32', + 9: 'int64', + 10: 'uint64', + 11: 'float64', + 12: 'bool', +} + + +def check_ret(message, ret): + if ret != ACL_ERROR_NONE: + raise Exception(f"{message} failed ret={ret}") + + +class MeasureTime(): + def __init__(self, measurements, key, cpu_run=True): + self.measurements = measurements + self.key = key + self.cpu_run = cpu_run + + def __enter__(self): + if not self.cpu_run: + torch.cuda.synchronize() + self.t0 = time.perf_counter_ns() + + def __exit__(self, exc_type, exc_value, exc_traceback): + if not self.cpu_run: + torch.cuda.synchronize() + self.measurements[self.key] = time.perf_counter_ns() - self.t0 + + +class AclModel(object): + def __init__(self, device_id, model_path, sync_infer, measurements, key, cpu_run): + self.device_id = device_id + self.sync_infer = sync_infer + self.out_bufs_ptr = [] + self.output_sizes = [] + self.input_sizes = [] + self.input_bufs_ptr = [] + + self.measurements = measurements + self.key = key + self.cpu_run = cpu_run + + ret = acl.init() + check_ret("acl.init", ret) + ret = acl.rt.set_device(self.device_id) + check_ret("acl.rt.set_device", ret) + self.context, ret = acl.rt.create_context(self.device_id) + check_ret("acl.rt.create_context", ret) + self.model_id, ret = acl.mdl.load_from_file(model_path) + check_ret("acl.mdl.load_from_file", ret) + + self.model_desc = acl.mdl.create_desc() + assert self.model_desc is not None + acl.mdl.get_desc(self.model_desc, self.model_id) + self.dataset_in = acl.mdl.create_dataset() + assert self.dataset_in is not None + self.dataset_out = acl.mdl.create_dataset() + assert self.dataset_out is not None + self.in_size, self.out_size = 0, 0 + self.stm, ret = acl.rt.create_stream() + assert ret == 0 + + self.desc_init() + self.dataset_init() + + def __call__(self, ori_data, dim): + return self.forward(ori_data, dim) + + def __del__(self): + # unload model + if self.model_id: + ret = acl.mdl.unload(self.model_id) + assert ret == 0 + + # destroy model desc + ret = acl.mdl.destroy_desc(self.model_desc) + assert ret == 0 + + self.destroy_data_set(self.dataset_in) + self.destroy_data_set(self.dataset_out) + + # destroy input/output tensor + for i in range(len(self.input_bufs_ptr)): + acl.rt.free(self.input_bufs_ptr[i]["buffer"]) + self.input_bufs_ptr[i] = None + + for i in range(len(self.out_bufs_ptr)): + acl.rt.free(self.out_bufs_ptr[i]["buffer"]) + self.out_bufs_ptr[i] = None + + ret = acl.rt.destroy_stream(self.stm) + assert ret == 0 + + def desc_init(self): + tensor_size = acl.mdl.get_num_inputs(self.model_desc) + if not tensor_size: + raise Exception("get_num_inputs failed") + self.in_size = tensor_size + + for i in range(tensor_size): + size = acl.mdl.get_input_size_by_index(self.model_desc, i) + data, ret = acl.rt.malloc(size, 0) + assert ret == 0 + + self.input_bufs_ptr.append({'size': size, 'buffer': data}) + self.input_sizes.append(size) + + tensor_size = acl.mdl.get_num_outputs(self.model_desc) + self.out_size = tensor_size + for i in range(tensor_size): + dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) + assert ret == 0 + size = acl.mdl.get_output_size_by_index(self.model_desc, i) + + data, ret = acl.rt.malloc(size, 0) + assert ret == 0 + + self.output_sizes.append(size) + self.out_bufs_ptr.append({'size': size, 'buffer': data}) + + def dataset_init(self): + self.create_data_set(self.dataset_in, self.input_bufs_ptr, self.input_sizes) + self.create_data_set(self.dataset_out, self.out_bufs_ptr, self.output_sizes) + + def create_data_set(self, dataset, bufs_ptr_list, size_list): + # create dataset buffer then add to dataset + for i in range(len(size_list)): + buffer = acl.create_data_buffer(bufs_ptr_list[i]["buffer"], size_list[i]) + if not buffer: + self.destroy_data_set(dataset) + raise Exception("create_data_buffer failed") + + # add to dataset + _, ret = acl.mdl.add_dataset_buffer(dataset, buffer) + if ret != 0: + self.destroy_data_set(dataset) + raise Exception("add_dataset_buffer failed, ret = {}".format(ret)) + + return dataset + + def destroy_data_set(self, dataset): + data_buf_num = acl.mdl.get_dataset_num_buffers(dataset) + for i in range(data_buf_num): + # get data buffer by index + data_buf = acl.mdl.get_dataset_buffer(dataset, i) + if data_buf is not None: + acl.destroy_data_buffer(data_buf) + + acl.mdl.destroy_dataset(dataset) + + def copy_data_to_device(self, data): + for i in range(len(data)): + ptr, np = acl.util.numpy_contiguous_to_ptr(data[i]["buffer"]) + acl.rt.memcpy(self.input_bufs_ptr[i]["buffer"], data[i]["size"], ptr, + data[i]["size"], ACL_MEMCPY_HOST_TO_DEVICE) + + def copy_output_to_host(self): + output_data = [] + for i in range(len(self.out_bufs_ptr)): + temp = dict() + temp["size"] = self.out_bufs_ptr[i]["size"] + temp["buffer"], ret = acl.rt.malloc_host(temp["size"]) + output_data.append(temp) + acl.rt.memcpy(temp["buffer"], temp["size"], self.out_bufs_ptr[i]["buffer"], + temp["size"], ACL_MEMCPY_DEVICE_TO_HOST) + + return output_data + + def model_exe(self): + with MeasureTime(self.measurements, self.key, self.cpu_run): + ret = acl.mdl.execute(self.model_id, self.dataset_in, self.dataset_out) + assert ret == 0 + output_data = self.copy_output_to_host() + dataset = [] + for i in range(len(output_data)): + dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) + data_shape = dims.get("dims") + data_type = acl.mdl.get_output_data_type(self.model_desc, i) + data_len = functools.reduce(lambda x, y: x * y, data_shape) + ftype = np.dtype(ACL_DTYPE.get(data_type)) + + size = output_data[i]["size"] + ptr = output_data[i]["buffer"] + data = acl.util.ptr_to_numpy(ptr, (size,), 1) + np_arr = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) + np_arr = np_arr.reshape(data_shape) + dataset.append(np_arr) + return dataset + + def model_exe_async(self): + with MeasureTime(self.measurements, self.key, self.cpu_run): + ret = acl.mdl.execute_async(self.model_id, self.dataset_in, self.dataset_out, self.stm) + assert ret == 0 + ret = acl.rt.synchronize_stream(self.stm) + assert ret == 0 + output_data = self.copy_output_to_host() + + dataset = [] + for i in range(len(output_data)): + dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) + # check_ret("acl.mdl.get_cur_output_dims", ret) + data_shape = dims.get("dims") + + data_type = acl.mdl.get_output_data_type(self.model_desc, i) + data_len = functools.reduce(lambda x, y: x * y, data_shape) + ftype = np.dtype(ACL_DTYPE.get(data_type)) + + size = output_data[i]["size"] + ptr = output_data[i]["buffer"] + data = acl.util.ptr_to_numpy(ptr, (size,), 1) + np_arr = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) + np_arr = np_arr.reshape(data_shape) + dataset.append(np_arr) + return dataset + + def model_exe_with_dynamic_dims(self, input_data, dims): + index, ret = acl.mdl.get_input_index_by_name(self.model_desc, 'ascend_mbatch_shape_data') + ret = acl.mdl.set_input_dynamic_dims(self.model_id, self.dataset_in, index, dims) + gear_count, ret = acl.mdl.get_input_dynamic_gear_count(self.model_desc, -1) + dims_out, ret = acl.mdl.get_input_dynamic_dims(self.model_desc, -1, gear_count) + self.copy_data_to_device(input_data) + if self.sync_infer is True: + res = self.model_exe() + else: + res = self.model_exe_async() + + return res + + def forward(self, input_data, dims): + input_data_dic = [] + for i in range(len(input_data)): + temp = {} + temp["size"] = input_data[i].size * input_data[i].itemsize + temp["buffer"] = input_data[i] + input_data_dic.append(temp) + result = self.model_exe_with_dynamic_dims(input_data_dic, dims) + return result diff --git a/ACL_PyTorch/contrib/audio/tdnn/atc.sh b/ACL_PyTorch/contrib/audio/tdnn/atc.sh index cede3be1b0165794da2b13e8b1d7c10196c7eafe..a2e925085516ccc447a5d65da86f89cf4331927f 100644 --- a/ACL_PyTorch/contrib/audio/tdnn/atc.sh +++ b/ACL_PyTorch/contrib/audio/tdnn/atc.sh @@ -1,9 +1,9 @@ -#!/bin/bash -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -#export DUMP_GE_GRAPH=2 - +#!/bin/bash +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +#export DUMP_GE_GRAPH=2 + atc --model=$1 --framework=5 --input_format=ND --input_shape="feats:1,-1,23" --dynamic_dims='200;300;400;500;600;700;800;900;1000;1100;1200;1300;1400;1500;1600;1700;1800' --output=$2 --soc_version=Ascend310 --log=info \ No newline at end of file diff --git a/ACL_PyTorch/contrib/audio/tdnn/hyperparams.yaml b/ACL_PyTorch/contrib/audio/tdnn/hyperparams.yaml index a24b5a03b29a7e195d2c3660d8e7848428dd7dee..0407ff5d223fd285d1334a57205b468414f30346 100644 --- a/ACL_PyTorch/contrib/audio/tdnn/hyperparams.yaml +++ b/ACL_PyTorch/contrib/audio/tdnn/hyperparams.yaml @@ -1,67 +1,67 @@ - - -# ################################# -# Basic inference parameters for speaker-id. We have first a network that -# computes some embeddings. On the top of that, we employ a classifier. -# -# Author: -# * Mirco Ravanelli 2021 -# ################################# - -# pretrain folders: -pretrained_path: best_model - - -# Model parameters -n_mels: 23 -sample_rate: 16000 -n_classes: 28 # In this case, we have 28 speakers -emb_dim: 512 # dimensionality of the embeddings - -# Feature extraction -compute_features: !new:speechbrain.lobes.features.Fbank - n_mels: !ref - -# Mean and std normalization of the input features -mean_var_norm: !new:speechbrain.processing.features.InputNormalization - norm_type: sentence - std_norm: False - -# To design a custom model, either just edit the simple CustomModel -# class that's listed here, or replace this `!new` call with a line -# pointing to a different file you've defined. -embedding_model: !new:custom_model.Xvector - in_channels: !ref - activation: !name:torch.nn.LeakyReLU - tdnn_blocks: 5 - tdnn_channels: [512, 512, 512, 512, 1500] - tdnn_kernel_sizes: [5, 3, 3, 1, 1] - tdnn_dilations: [1, 2, 3, 1, 1] - lin_neurons: !ref - -classifier: !new:custom_model.Classifier - input_shape: [null, null, !ref ] - activation: !name:torch.nn.LeakyReLU - lin_blocks: 1 - lin_neurons: !ref - out_neurons: !ref - -label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder - -# Objects in "modules" dict will have their parameters moved to the correct -# device, as well as having train()/eval() called on them by the Brain class. -modules: - compute_features: !ref - embedding_model: !ref - classifier: !ref - mean_var_norm: !ref - -pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer - loadables: - embedding_model: !ref - classifier: !ref - label_encoder: !ref - paths: - embedding_model: !ref /embedding_model.ckpt - classifier: !ref /classifier.ckpt - label_encoder: !ref /label_encoder.txt + + +# ################################# +# Basic inference parameters for speaker-id. We have first a network that +# computes some embeddings. On the top of that, we employ a classifier. +# +# Author: +# * Mirco Ravanelli 2021 +# ################################# + +# pretrain folders: +pretrained_path: best_model + + +# Model parameters +n_mels: 23 +sample_rate: 16000 +n_classes: 28 # In this case, we have 28 speakers +emb_dim: 512 # dimensionality of the embeddings + +# Feature extraction +compute_features: !new:speechbrain.lobes.features.Fbank + n_mels: !ref + +# Mean and std normalization of the input features +mean_var_norm: !new:speechbrain.processing.features.InputNormalization + norm_type: sentence + std_norm: False + +# To design a custom model, either just edit the simple CustomModel +# class that's listed here, or replace this `!new` call with a line +# pointing to a different file you've defined. +embedding_model: !new:custom_model.Xvector + in_channels: !ref + activation: !name:torch.nn.LeakyReLU + tdnn_blocks: 5 + tdnn_channels: [512, 512, 512, 512, 1500] + tdnn_kernel_sizes: [5, 3, 3, 1, 1] + tdnn_dilations: [1, 2, 3, 1, 1] + lin_neurons: !ref + +classifier: !new:custom_model.Classifier + input_shape: [null, null, !ref ] + activation: !name:torch.nn.LeakyReLU + lin_blocks: 1 + lin_neurons: !ref + out_neurons: !ref + +label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder + +# Objects in "modules" dict will have their parameters moved to the correct +# device, as well as having train()/eval() called on them by the Brain class. +modules: + compute_features: !ref + embedding_model: !ref + classifier: !ref + mean_var_norm: !ref + +pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer + loadables: + embedding_model: !ref + classifier: !ref + label_encoder: !ref + paths: + embedding_model: !ref /embedding_model.ckpt + classifier: !ref /classifier.ckpt + label_encoder: !ref /label_encoder.txt diff --git a/ACL_PyTorch/contrib/audio/tdnn/interfaces.py b/ACL_PyTorch/contrib/audio/tdnn/interfaces.py index ddbf99b74bd4cc5d562f675124be5cd3389ae430..ead6a0634d21dbf8aa4c62d2a6be8afda0abf3ef 100644 --- a/ACL_PyTorch/contrib/audio/tdnn/interfaces.py +++ b/ACL_PyTorch/contrib/audio/tdnn/interfaces.py @@ -1,958 +1,958 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Defines interfaces for simple inference with pretrained models - -Authors: - * Aku Rouhe 2021 - * Peter Plantinga 2021 - * Loren Lugosch 2020 - * Mirco Ravanelli 2020 - * Titouan Parcollet 2021 -""" -import torch -import torchaudio -from types import SimpleNamespace -from torch.nn import SyncBatchNorm -from torch.nn import DataParallel as DP -from hyperpyyaml import load_hyperpyyaml -from speechbrain.pretrained.fetching import fetch -from speechbrain.dataio.preprocess import AudioNormalizer -import torch.nn.functional as F -from torch.nn.parallel import DistributedDataParallel as DDP -from speechbrain.utils.data_utils import split_path -from speechbrain.utils.distributed import run_on_main - - -class Pretrained: - """Takes a trained model and makes predictions on new data. - - This is a base class which handles some common boilerplate. - It intentionally has an interface similar to ``Brain`` - these base - classes handle similar things. - - Subclasses of Pretrained should implement the actual logic of how - the pretrained system runs, and add methods with descriptive names - (e.g. transcribe_file() for ASR). - - Arguments - --------- - modules : dict of str:torch.nn.Module pairs - The Torch modules that make up the learned system. These can be treated - in special ways (put on the right device, frozen, etc.) - hparams : dict - Each key:value pair should consist of a string key and a hyperparameter - that is used within the overridden methods. These will - be accessible via an ``hparams`` attribute, using "dot" notation: - e.g., self.hparams.model(x). - run_opts : dict - Options parsed from command line. See ``speechbrain.parse_arguments()``. - List that are supported here: - * device - * data_parallel_count - * data_parallel_backend - * distributed_launch - * distributed_backend - * jit_module_keys - freeze_params : bool - To freeze (requires_grad=False) parameters or not. Normally in inference - you want to freeze the params. Also calls .eval() on all modules. - """ - - HPARAMS_NEEDED = [] - MODULES_NEEDED = [] - - def __init__( - self, modules=None, hparams=None, run_opts=None, freeze_params=True - ): - - # Arguments passed via the run opts dictionary. Set a limited - # number of these, since some don't apply to inference. - run_opt_defaults = { - "device": "cpu", - "data_parallel_count": -1, - "data_parallel_backend": False, - "distributed_launch": False, - "distributed_backend": "nccl", - "jit_module_keys": None, - } - for arg, default in run_opt_defaults.items(): - if run_opts is not None and arg in run_opts: - setattr(self, arg, run_opts[arg]) - else: - # If any arg from run_opt_defaults exist in hparams and - # not in command line args "run_opts" - if hparams is not None and arg in hparams: - setattr(self, arg, hparams[arg]) - else: - setattr(self, arg, default) - - # Put modules on the right device, accessible with dot notation - self.modules = torch.nn.ModuleDict(modules) - for mod in self.modules: - self.modules[mod].to(self.device) - - for mod in self.MODULES_NEEDED: - if mod not in modules: - raise ValueError(f"Need modules['{mod}']") - - # Check MODULES_NEEDED and HPARAMS_NEEDED and - # make hyperparams available with dot notation - if self.HPARAMS_NEEDED and hparams is None: - raise ValueError("Need to provide hparams dict.") - if hparams is not None: - # Also first check that all required params are found: - for hp in self.HPARAMS_NEEDED: - if hp not in hparams: - raise ValueError(f"Need hparams['{hp}']") - self.hparams = SimpleNamespace(**hparams) - - # Prepare modules for computation, e.g. jit - self._prepare_modules(freeze_params) - - # Audio normalization - self.audio_normalizer = hparams.get( - "audio_normalizer", AudioNormalizer() - ) - - def _prepare_modules(self, freeze_params): - """Prepare modules for computation, e.g. jit. - - Arguments - --------- - freeze_params : bool - Whether to freeze the parameters and call ``eval()``. - """ - - # Make jit-able - self._compile_jit() - self._wrap_distributed() - - # If we don't want to backprop, freeze the pretrained parameters - if freeze_params: - self.modules.eval() - for p in self.modules.parameters(): - p.requires_grad = False - - def load_audio(self, path, savedir="."): - """Load an audio file with this model"s input spec - - When using a speech model, it is important to use the same type of data, - as was used to train the model. This means for example using the same - sampling rate and number of channels. It is, however, possible to - convert a file from a higher sampling rate to a lower one (downsampling). - Similarly, it is simple to downmix a stereo file to mono. - The path can be a local path, a web url, or a link to a huggingface repo. - """ - source, fl = split_path(path) - path = fetch(fl, source=source, savedir=savedir) - signal, sr = torchaudio.load(path, channels_first=False) - return self.audio_normalizer(signal, sr) - - def _compile_jit(self): - """Compile requested modules with ``torch.jit.script``.""" - if self.jit_module_keys is None: - return - - for name in self.jit_module_keys: - if name not in self.modules: - raise ValueError( - "module " + name + " cannot be jit compiled because " - "it is not defined in your hparams file." - ) - module = torch.jit.script(self.modules[name]) - self.modules[name] = module.to(self.device) - - def _wrap_distributed(self): - """Wrap modules with distributed wrapper when requested.""" - if not self.distributed_launch and not self.data_parallel_backend: - return - elif self.distributed_launch: - for name, module in self.modules.items(): - if any(p.requires_grad for p in module.parameters()): - # for ddp, all module must run on same GPU - module = SyncBatchNorm.convert_sync_batchnorm(module) - module = DDP(module, device_ids=[self.device]) - self.modules[name] = module - else: - # data_parallel_backend - for name, module in self.modules.items(): - if any(p.requires_grad for p in module.parameters()): - # if distributed_count = -1 then use all gpus - # otherwise, specify the set of gpu to use - if self.data_parallel_count == -1: - module = DP(module) - else: - module = DP( - module, - [i for i in range(self.data_parallel_count)], - ) - self.modules[name] = module - - @classmethod - def from_hparams( - cls, - source, - hparams_file="hyperparams.yaml", - overrides={}, - savedir=None, - use_auth_token=False, - **kwargs, - ): - """Fetch and load based from outside source based on HyperPyYAML file - - The source can be a location on the filesystem or online/huggingface - - The hyperparams file should contain a "modules" key, which is a - dictionary of torch modules used for computation. - - The hyperparams file should contain a "pretrainer" key, which is a - speechbrain.utils.parameter_transfer.Pretrainer - - Arguments - --------- - source : str - The location to use for finding the model. See - ``speechbrain.pretrained.fetching.fetch`` for details. - hparams_file : str - The name of the hyperparameters file to use for constructing - the modules necessary for inference. Must contain two keys: - "modules" and "pretrainer", as described. - overrides : dict - Any changes to make to the hparams file when it is loaded. - savedir : str or Path - Where to put the pretraining material. If not given, will use - ./pretrained_models/-hash(source). - use_auth_token : bool (default: False) - If true Hugginface's auth_token will be used to load private models from the HuggingFace Hub, - default is False because majority of models are public. - """ - if savedir is None: - clsname = cls.__name__ - savedir = f"./pretrained_models/{clsname}-{hash(source)}" - hparams_local_path = fetch( - hparams_file, source, savedir, use_auth_token - ) - - # Load the modules: - with open(hparams_local_path) as fin: - hparams = load_hyperpyyaml(fin, overrides) - - # Pretraining: - pretrainer = hparams["pretrainer"] - pretrainer.set_collect_in(savedir) - # For distributed setups, have this here: - run_on_main(pretrainer.collect_files, kwargs={"default_source": source}) - # Load on the CPU. Later the params can be moved elsewhere by specifying - # run_opts={"device": ...} - pretrainer.load_collected(device="cpu") - - # Now return the system - return cls(hparams["modules"], hparams, **kwargs) - - -class EndToEndSLU(Pretrained): - """A end-to-end SLU model. - - The class can be used either to run only the encoder (encode()) to extract - features or to run the entire model (decode()) to map the speech to its semantics. - - Example - ------- - >>> from speechbrain.pretrained import EndToEndSLU - >>> tmpdir = getfixture("tmpdir") - >>> slu_model = EndToEndSLU.from_hparams( - ... source="speechbrain/slu-timers-and-such-direct-librispeech-asr", - ... savedir=tmpdir, - ... ) - >>> slu_model.decode_file("samples/audio_samples/example6.wav") - "{'intent': 'SimpleMath', 'slots': {'number1': 37.67, 'number2': 75.7, 'op': ' minus '}}" - """ - - HPARAMS_NEEDED = ["tokenizer", "asr_model_source"] - MODULES_NEEDED = [ - "slu_enc", - "beam_searcher", - ] - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.tokenizer = self.hparams.tokenizer - self.asr_model = EncoderDecoderASR.from_hparams( - source=self.hparams.asr_model_source, - run_opts={"device": self.device}, - ) - - def decode_file(self, path): - """Maps the given audio file to a string representing the - semantic dictionary for the utterance. - - Arguments - --------- - path : str - Path to audio file to decode. - - Returns - ------- - str - The predicted semantics. - """ - waveform = self.load_audio(path) - waveform = waveform.to(self.device) - # Fake a batch: - batch = waveform.unsqueeze(0) - rel_length = torch.tensor([1.0]) - predicted_words, predicted_tokens = self.decode_batch(batch, rel_length) - return predicted_words[0] - - def encode_batch(self, wavs, wav_lens): - """Encodes the input audio into a sequence of hidden states - - Arguments - --------- - wavs : torch.tensor - Batch of waveforms [batch, time, channels] or [batch, time] - depending on the model. - wav_lens : torch.tensor - Lengths of the waveforms relative to the longest one in the - batch, tensor of shape [batch]. The longest one should have - relative length 1.0 and others len(waveform) / max_length. - Used for ignoring padding. - - Returns - ------- - torch.tensor - The encoded batch - """ - wavs = wavs.float() - wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device) - with torch.no_grad(): - ASR_encoder_out = self.asr_model.encode_batch( - wavs.detach(), wav_lens - ) - encoder_out = self.modules.slu_enc(ASR_encoder_out) - return encoder_out - - def decode_batch(self, wavs, wav_lens): - """Maps the input audio to its semantics - - Arguments - --------- - wavs : torch.tensor - Batch of waveforms [batch, time, channels] or [batch, time] - depending on the model. - wav_lens : torch.tensor - Lengths of the waveforms relative to the longest one in the - batch, tensor of shape [batch]. The longest one should have - relative length 1.0 and others len(waveform) / max_length. - Used for ignoring padding. - - Returns - ------- - list - Each waveform in the batch decoded. - tensor - Each predicted token id. - """ - with torch.no_grad(): - wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device) - encoder_out = self.encode_batch(wavs, wav_lens) - predicted_tokens, scores = self.modules.beam_searcher( - encoder_out, wav_lens - ) - predicted_words = [ - self.tokenizer.decode_ids(token_seq) - for token_seq in predicted_tokens - ] - return predicted_words, predicted_tokens - - -class EncoderDecoderASR(Pretrained): - """A ready-to-use Encoder-Decoder ASR model - - The class can be used either to run only the encoder (encode()) to extract - features or to run the entire encoder-decoder model - (transcribe()) to transcribe speech. The given YAML must contains the fields - specified in the *_NEEDED[] lists. - - Example - ------- - >>> from speechbrain.pretrained import EncoderDecoderASR - >>> tmpdir = getfixture("tmpdir") - >>> asr_model = EncoderDecoderASR.from_hparams( - ... source="speechbrain/asr-crdnn-rnnlm-librispeech", - ... savedir=tmpdir, - ... ) - >>> asr_model.transcribe_file("samples/audio_samples/example2.flac") - "MY FATHER HAS REVEALED THE CULPRIT'S NAME" - """ - - HPARAMS_NEEDED = ["tokenizer"] - MODULES_NEEDED = [ - "encoder", - "decoder", - ] - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.tokenizer = self.hparams.tokenizer - - def transcribe_file(self, path): - """Transcribes the given audiofile into a sequence of words. - - Arguments - --------- - path : str - Path to audio file which to transcribe. - - Returns - ------- - str - The audiofile transcription produced by this ASR system. - """ - waveform = self.load_audio(path) - # Fake a batch: - batch = waveform.unsqueeze(0) - rel_length = torch.tensor([1.0]) - predicted_words, predicted_tokens = self.transcribe_batch( - batch, rel_length - ) - return predicted_words[0] - - def encode_batch(self, wavs, wav_lens): - """Encodes the input audio into a sequence of hidden states - - The waveforms should already be in the model's desired format. - You can call: - ``normalized = EncoderDecoderASR.normalizer(signal, sample_rate)`` - to get a correctly converted signal in most cases. - - Arguments - --------- - wavs : torch.tensor - Batch of waveforms [batch, time, channels] or [batch, time] - depending on the model. - wav_lens : torch.tensor - Lengths of the waveforms relative to the longest one in the - batch, tensor of shape [batch]. The longest one should have - relative length 1.0 and others len(waveform) / max_length. - Used for ignoring padding. - - Returns - ------- - torch.tensor - The encoded batch - """ - wavs = wavs.float() - wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device) - encoder_out = self.modules.encoder(wavs, wav_lens) - return encoder_out - - def transcribe_batch(self, wavs, wav_lens): - """Transcribes the input audio into a sequence of words - - The waveforms should already be in the model's desired format. - You can call: - ``normalized = EncoderDecoderASR.normalizer(signal, sample_rate)`` - to get a correctly converted signal in most cases. - - Arguments - --------- - wavs : torch.tensor - Batch of waveforms [batch, time, channels] or [batch, time] - depending on the model. - wav_lens : torch.tensor - Lengths of the waveforms relative to the longest one in the - batch, tensor of shape [batch]. The longest one should have - relative length 1.0 and others len(waveform) / max_length. - Used for ignoring padding. - - Returns - ------- - list - Each waveform in the batch transcribed. - tensor - Each predicted token id. - """ - with torch.no_grad(): - wav_lens = wav_lens.to(self.device) - encoder_out = self.encode_batch(wavs, wav_lens) - predicted_tokens, scores = self.modules.decoder( - encoder_out, wav_lens - ) - predicted_words = [ - self.tokenizer.decode_ids(token_seq) - for token_seq in predicted_tokens - ] - return predicted_words, predicted_tokens - - -class EncoderClassifier(Pretrained): - """A ready-to-use class for utterance-level classification (e.g, speaker-id, - language-id, emotion recognition, keyword spotting, etc). - - The class assumes that an encoder called "embedding_model" and a model - called "classifier" are defined in the yaml file. If you want to - convert the predicted index into a corresponding text label, please - provide the path of the label_encoder in a variable called 'lab_encoder_file' - within the yaml. - - The class can be used either to run only the encoder (encode_batch()) to - extract embeddings or to run a classification step (classify_batch()). - ``` - - Example - ------- - >>> import torchaudio - >>> from speechbrain.pretrained import EncoderClassifier - >>> # Model is downloaded from the speechbrain HuggingFace repo - >>> tmpdir = getfixture("tmpdir") - >>> classifier = EncoderClassifier.from_hparams( - ... source="speechbrain/spkrec-ecapa-voxceleb", - ... savedir=tmpdir, - ... ) - - >>> # Compute embeddings - >>> signal, fs = torchaudio.load("samples/audio_samples/example1.wav") - >>> embeddings = classifier.encode_batch(signal) - - >>> # Classification - >>> prediction = classifier .classify_batch(signal) - """ - - MODULES_NEEDED = [ - "compute_features", - "mean_var_norm", - "embedding_model", - "classifier", - ] - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def extract_feats(self, wavs, wav_lens=None): - # wav to feats - wavs = wavs.to('cpu').float() - if wav_lens is None: - wav_lens = torch.ones(wavs.shape[0], device='cpu') - - feats = self.modules.compute_features(wavs) - feats = self.modules.mean_var_norm(feats, wav_lens) - - return feats - - def feats_classify(self, feats, wav_lens=None): - emb = self.modules.embedding_model(feats, wav_lens) - out_prob = self.modules.classifier(emb).squeeze(1) - - return out_prob - - def encode_batch(self, wavs, wav_lens=None, normalize=False): - """Encodes the input audio into a single vector embedding. - - The waveforms should already be in the model's desired format. - You can call: - ``normalized = .normalizer(signal, sample_rate)`` - to get a correctly converted signal in most cases. - - Arguments - --------- - wavs : torch.tensor - Batch of waveforms [batch, time, channels] or [batch, time] - depending on the model. Make sure the sample rate is fs=16000 Hz. - wav_lens : torch.tensor - Lengths of the waveforms relative to the longest one in the - batch, tensor of shape [batch]. The longest one should have - relative length 1.0 and others len(waveform) / max_length. - Used for ignoring padding. - normalize : bool - If True, it normalizes the embeddings with the statistics - contained in mean_var_norm_emb. - - Returns - ------- - torch.tensor - The encoded batch - """ - # Manage single waveforms in input - if len(wavs.shape) == 1: - wavs = wavs.unsqueeze(0) - - # Assign full length if wav_lens is not assigned - if wav_lens is None: - wav_lens = torch.ones(wavs.shape[0], device=self.device) - - # Storing waveform in the specified device - wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device) - wavs = wavs.float() - - # Computing features and embeddings - feats = self.modules.compute_features(wavs) - feats = self.modules.mean_var_norm(feats, wav_lens) - embeddings = self.modules.embedding_model(feats, wav_lens) - if normalize: - embeddings = self.hparams.mean_var_norm_emb( - embeddings, torch.ones(embeddings.shape[0], device=self.device) - ) - return embeddings - - def classify_batch(self, wavs, wav_lens=None): - """Performs classification on the top of the encoded features. - - It returns the posterior probabilities, the index and, if the label - encoder is specified it also the text label. - - Arguments - --------- - wavs : torch.tensor - Batch of waveforms [batch, time, channels] or [batch, time] - depending on the model. Make sure the sample rate is fs=16000 Hz. - wav_lens : torch.tensor - Lengths of the waveforms relative to the longest one in the - batch, tensor of shape [batch]. The longest one should have - relative length 1.0 and others len(waveform) / max_length. - Used for ignoring padding. - - Returns - ------- - out_prob - The log posterior probabilities of each class ([batch, N_class]) - score: - It is the value of the log-posterior for the best class ([batch,]) - index - The indexes of the best class ([batch,]) - text_lab: - List with the text labels corresponding to the indexes. - (label encoder should be provided). - """ - emb = self.encode_batch(wavs, wav_lens) - out_prob = self.modules.classifier(emb).squeeze(1) - score, index = torch.max(out_prob, dim=-1) - text_lab = self.hparams.label_encoder.decode_torch(index) - return out_prob, score, index, text_lab - - def classify_file(self, path): - """Classifies the given audiofile into the given set of labels. - - Arguments - --------- - path : str - Path to audio file to classify. - - Returns - ------- - out_prob - The log posterior probabilities of each class ([batch, N_class]) - score: - It is the value of the log-posterior for the best class ([batch,]) - index - The indexes of the best class ([batch,]) - text_lab: - List with the text labels corresponding to the indexes. - (label encoder should be provided). - """ - waveform = self.load_audio(path) - # Fake a batch: - batch = waveform.unsqueeze(0) - rel_length = torch.tensor([1.0]) - emb = self.encode_batch(batch, rel_length) - out_prob = self.modules.classifier(emb).squeeze(1) - score, index = torch.max(out_prob, dim=-1) - text_lab = self.hparams.label_encoder.decode_torch(index) - return out_prob, score, index, text_lab - - -class SpeakerRecognition(EncoderClassifier): - """A ready-to-use model for speaker recognition. It can be used to - perform speaker verification with verify_batch(). - - ``` - Example - ------- - >>> import torchaudio - >>> from speechbrain.pretrained import SpeakerRecognition - >>> # Model is downloaded from the speechbrain HuggingFace repo - >>> tmpdir = getfixture("tmpdir") - >>> verification = SpeakerRecognition.from_hparams( - ... source="speechbrain/spkrec-ecapa-voxceleb", - ... savedir=tmpdir, - ... ) - - >>> # Perform verification - >>> signal, fs = torchaudio.load("samples/audio_samples/example1.wav") - >>> signal2, fs = torchaudio.load("samples/audio_samples/example2.flac") - >>> score, prediction = verification.verify_batch(signal, signal2) - """ - - MODULES_NEEDED = [ - "compute_features", - "mean_var_norm", - "embedding_model", - "mean_var_norm_emb", - ] - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.similarity = torch.nn.CosineSimilarity(dim=-1, eps=1e-6) - - def verify_batch( - self, wavs1, wavs2, wav1_lens=None, wav2_lens=None, threshold=0.25 - ): - """Performs speaker verification with cosine distance. - - It returns the score and the decision (0 different speakers, - 1 same speakers). - - Arguments - --------- - wavs1 : Torch.Tensor - Tensor containing the speech waveform1 (batch, time). - Make sure the sample rate is fs=16000 Hz. - wavs2 : Torch.Tensor - Tensor containing the speech waveform2 (batch, time). - Make sure the sample rate is fs=16000 Hz. - wav1_lens: Torch.Tensor - Tensor containing the relative length for each sentence - in the length (e.g., [0.8 0.6 1.0]) - wav2_lens: Torch.Tensor - Tensor containing the relative length for each sentence - in the length (e.g., [0.8 0.6 1.0]) - threshold: Float - Threshold applied to the cosine distance to decide if the - speaker is different (0) or the same (1). - - Returns - ------- - score - The score associated to the binary verification output - (cosine distance). - prediction - The prediction is 1 if the two signals in input are from the same - speaker and 0 otherwise. - """ - emb1 = self.encode_batch(wavs1, wav1_lens, normalize=True) - emb2 = self.encode_batch(wavs2, wav2_lens, normalize=True) - score = self.similarity(emb1, emb2) - return score, score > threshold - - def verify_files(self, path_x, path_y): - """Speaker verification with cosine distance - - Returns the score and the decision (0 different speakers, - 1 same speakers). - - Returns - ------- - score - The score associated to the binary verification output - (cosine distance). - prediction - The prediction is 1 if the two signals in input are from the same - speaker and 0 otherwise. - """ - waveform_x = self.load_audio(path_x) - waveform_y = self.load_audio(path_y) - # Fake batches: - batch_x = waveform_x.unsqueeze(0) - batch_y = waveform_y.unsqueeze(0) - # Verify: - score, decision = self.verify_batch(batch_x, batch_y) - # Squeeze: - return score[0], decision[0] - - -class SepformerSeparation(Pretrained): - """A "ready-to-use" speech separation model. - - Uses Sepformer architecture. - - Example - ------- - >>> tmpdir = getfixture("tmpdir") - >>> model = SepformerSeparation.from_hparams( - ... source="speechbrain/sepformer-wsj02mix", - ... savedir=tmpdir) - >>> mix = torch.randn(1, 400) - >>> est_sources = model.separate_batch(mix) - >>> print(est_sources.shape) - torch.Size([1, 400, 2]) - """ - - MODULES_NEEDED = ["encoder", "masknet", "decoder"] - - def separate_batch(self, mix): - """Run source separation on batch of audio. - - Arguments - --------- - mix : torch.tensor - The mixture of sources. - - Returns - ------- - tensor - Separated sources - """ - - # Separation - mix = mix.to(self.device) - mix_w = self.modules.encoder(mix) - est_mask = self.modules.masknet(mix_w) - mix_w = torch.stack([mix_w] * self.hparams.num_spks) - sep_h = mix_w * est_mask - - # Decoding - est_source = torch.cat( - [ - self.modules.decoder(sep_h[i]).unsqueeze(-1) - for i in range(self.hparams.num_spks) - ], - dim=-1, - ) - - # T changed after conv1d in encoder, fix it here - T_origin = mix.size(1) - T_est = est_source.size(1) - if T_origin > T_est: - est_source = F.pad(est_source, (0, 0, 0, T_origin - T_est)) - else: - est_source = est_source[:, :T_origin, :] - return est_source - - def separate_file(self, path, savedir="."): - """Separate sources from file. - - Arguments - --------- - path : str - Path to file which has a mixture of sources. It can be a local - path, a web url, or a huggingface repo. - savedir : path - Path where to store the wav signals (when downloaded from the web). - Returns - ------- - tensor - Separated sources - """ - source, fl = split_path(path) - path = fetch(fl, source=source, savedir=savedir) - - batch, fs_file = torchaudio.load(path) - batch = batch.to(self.device) - fs_model = self.hparams.sample_rate - - # resample the data if needed - if fs_file != fs_model: - print( - "Resampling the audio from {} Hz to {} Hz".format( - fs_file, fs_model - ) - ) - tf = torchaudio.transforms.Resample( - orig_freq=fs_file, new_freq=fs_model - ) - batch = batch.mean(dim=0, keepdim=True) - batch = tf(batch) - - est_sources = self.separate_batch(batch) - est_sources = est_sources / est_sources.max(dim=1, keepdim=True)[0] - return est_sources - - -class SpectralMaskEnhancement(Pretrained): - """A ready-to-use model for speech enhancement. - - Arguments - --------- - See ``Pretrained``. - - Example - ------- - >>> import torchaudio - >>> from speechbrain.pretrained import SpectralMaskEnhancement - >>> # Model is downloaded from the speechbrain HuggingFace repo - >>> tmpdir = getfixture("tmpdir") - >>> enhancer = SpectralMaskEnhancement.from_hparams( - ... source="speechbrain/mtl-mimic-voicebank", - ... savedir=tmpdir, - ... ) - >>> noisy, fs = torchaudio.load("samples/audio_samples/example_noisy.wav") - >>> # Channel dimension is interpreted as batch dimension here - >>> enhanced = enhancer.enhance_batch(noisy) - """ - - HPARAMS_NEEDED = ["compute_stft", "spectral_magnitude", "resynth"] - MODULES_NEEDED = ["enhance_model"] - - def compute_features(self, wavs): - """Compute the log spectral magnitude features for masking. - - Arguments - --------- - wavs : torch.tensor - A batch of waveforms to convert to log spectral mags. - """ - feats = self.hparams.compute_stft(wavs) - feats = self.hparams.spectral_magnitude(feats) - return torch.log1p(feats) - - def enhance_batch(self, noisy, lengths=None): - """Enhance a batch of noisy waveforms. - - Arguments - --------- - noisy : torch.tensor - A batch of waveforms to perform enhancement on. - lengths : torch.tensor - The lengths of the waveforms if the enhancement model handles them. - - Returns - ------- - torch.tensor - A batch of enhanced waveforms of the same shape as input. - """ - noisy = noisy.to(self.device) - noisy_features = self.compute_features(noisy) - - # Perform masking-based enhancement, multiplying output with input. - if lengths is not None: - mask = self.modules.enhance_model(noisy_features, lengths=lengths) - else: - mask = self.modules.enhance_model(noisy_features) - enhanced = torch.mul(mask, noisy_features) - - # Return resynthesized waveforms - return self.hparams.resynth(torch.expm1(enhanced), noisy) - - def enhance_file(self, filename, output_filename=None): - """Enhance a wav file. - - Arguments - --------- - filename : str - Location on disk to load file for enhancement. - output_filename : str - If provided, writes enhanced data to this file. - """ - noisy = self.load_audio(filename) - noisy = noisy.to(self.device) - - # Fake a batch: - batch = noisy.unsqueeze(0) - enhanced = self.enhance_batch(batch) - - if output_filename is not None: - torchaudio.save(output_filename, enhanced, channels_first=False) - - return enhanced.squeeze(0) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Defines interfaces for simple inference with pretrained models + +Authors: + * Aku Rouhe 2021 + * Peter Plantinga 2021 + * Loren Lugosch 2020 + * Mirco Ravanelli 2020 + * Titouan Parcollet 2021 +""" +import torch +import torchaudio +from types import SimpleNamespace +from torch.nn import SyncBatchNorm +from torch.nn import DataParallel as DP +from hyperpyyaml import load_hyperpyyaml +from speechbrain.pretrained.fetching import fetch +from speechbrain.dataio.preprocess import AudioNormalizer +import torch.nn.functional as F +from torch.nn.parallel import DistributedDataParallel as DDP +from speechbrain.utils.data_utils import split_path +from speechbrain.utils.distributed import run_on_main + + +class Pretrained: + """Takes a trained model and makes predictions on new data. + + This is a base class which handles some common boilerplate. + It intentionally has an interface similar to ``Brain`` - these base + classes handle similar things. + + Subclasses of Pretrained should implement the actual logic of how + the pretrained system runs, and add methods with descriptive names + (e.g. transcribe_file() for ASR). + + Arguments + --------- + modules : dict of str:torch.nn.Module pairs + The Torch modules that make up the learned system. These can be treated + in special ways (put on the right device, frozen, etc.) + hparams : dict + Each key:value pair should consist of a string key and a hyperparameter + that is used within the overridden methods. These will + be accessible via an ``hparams`` attribute, using "dot" notation: + e.g., self.hparams.model(x). + run_opts : dict + Options parsed from command line. See ``speechbrain.parse_arguments()``. + List that are supported here: + * device + * data_parallel_count + * data_parallel_backend + * distributed_launch + * distributed_backend + * jit_module_keys + freeze_params : bool + To freeze (requires_grad=False) parameters or not. Normally in inference + you want to freeze the params. Also calls .eval() on all modules. + """ + + HPARAMS_NEEDED = [] + MODULES_NEEDED = [] + + def __init__( + self, modules=None, hparams=None, run_opts=None, freeze_params=True + ): + + # Arguments passed via the run opts dictionary. Set a limited + # number of these, since some don't apply to inference. + run_opt_defaults = { + "device": "cpu", + "data_parallel_count": -1, + "data_parallel_backend": False, + "distributed_launch": False, + "distributed_backend": "nccl", + "jit_module_keys": None, + } + for arg, default in run_opt_defaults.items(): + if run_opts is not None and arg in run_opts: + setattr(self, arg, run_opts[arg]) + else: + # If any arg from run_opt_defaults exist in hparams and + # not in command line args "run_opts" + if hparams is not None and arg in hparams: + setattr(self, arg, hparams[arg]) + else: + setattr(self, arg, default) + + # Put modules on the right device, accessible with dot notation + self.modules = torch.nn.ModuleDict(modules) + for mod in self.modules: + self.modules[mod].to(self.device) + + for mod in self.MODULES_NEEDED: + if mod not in modules: + raise ValueError(f"Need modules['{mod}']") + + # Check MODULES_NEEDED and HPARAMS_NEEDED and + # make hyperparams available with dot notation + if self.HPARAMS_NEEDED and hparams is None: + raise ValueError("Need to provide hparams dict.") + if hparams is not None: + # Also first check that all required params are found: + for hp in self.HPARAMS_NEEDED: + if hp not in hparams: + raise ValueError(f"Need hparams['{hp}']") + self.hparams = SimpleNamespace(**hparams) + + # Prepare modules for computation, e.g. jit + self._prepare_modules(freeze_params) + + # Audio normalization + self.audio_normalizer = hparams.get( + "audio_normalizer", AudioNormalizer() + ) + + def _prepare_modules(self, freeze_params): + """Prepare modules for computation, e.g. jit. + + Arguments + --------- + freeze_params : bool + Whether to freeze the parameters and call ``eval()``. + """ + + # Make jit-able + self._compile_jit() + self._wrap_distributed() + + # If we don't want to backprop, freeze the pretrained parameters + if freeze_params: + self.modules.eval() + for p in self.modules.parameters(): + p.requires_grad = False + + def load_audio(self, path, savedir="."): + """Load an audio file with this model"s input spec + + When using a speech model, it is important to use the same type of data, + as was used to train the model. This means for example using the same + sampling rate and number of channels. It is, however, possible to + convert a file from a higher sampling rate to a lower one (downsampling). + Similarly, it is simple to downmix a stereo file to mono. + The path can be a local path, a web url, or a link to a huggingface repo. + """ + source, fl = split_path(path) + path = fetch(fl, source=source, savedir=savedir) + signal, sr = torchaudio.load(path, channels_first=False) + return self.audio_normalizer(signal, sr) + + def _compile_jit(self): + """Compile requested modules with ``torch.jit.script``.""" + if self.jit_module_keys is None: + return + + for name in self.jit_module_keys: + if name not in self.modules: + raise ValueError( + "module " + name + " cannot be jit compiled because " + "it is not defined in your hparams file." + ) + module = torch.jit.script(self.modules[name]) + self.modules[name] = module.to(self.device) + + def _wrap_distributed(self): + """Wrap modules with distributed wrapper when requested.""" + if not self.distributed_launch and not self.data_parallel_backend: + return + elif self.distributed_launch: + for name, module in self.modules.items(): + if any(p.requires_grad for p in module.parameters()): + # for ddp, all module must run on same GPU + module = SyncBatchNorm.convert_sync_batchnorm(module) + module = DDP(module, device_ids=[self.device]) + self.modules[name] = module + else: + # data_parallel_backend + for name, module in self.modules.items(): + if any(p.requires_grad for p in module.parameters()): + # if distributed_count = -1 then use all gpus + # otherwise, specify the set of gpu to use + if self.data_parallel_count == -1: + module = DP(module) + else: + module = DP( + module, + [i for i in range(self.data_parallel_count)], + ) + self.modules[name] = module + + @classmethod + def from_hparams( + cls, + source, + hparams_file="hyperparams.yaml", + overrides={}, + savedir=None, + use_auth_token=False, + **kwargs, + ): + """Fetch and load based from outside source based on HyperPyYAML file + + The source can be a location on the filesystem or online/huggingface + + The hyperparams file should contain a "modules" key, which is a + dictionary of torch modules used for computation. + + The hyperparams file should contain a "pretrainer" key, which is a + speechbrain.utils.parameter_transfer.Pretrainer + + Arguments + --------- + source : str + The location to use for finding the model. See + ``speechbrain.pretrained.fetching.fetch`` for details. + hparams_file : str + The name of the hyperparameters file to use for constructing + the modules necessary for inference. Must contain two keys: + "modules" and "pretrainer", as described. + overrides : dict + Any changes to make to the hparams file when it is loaded. + savedir : str or Path + Where to put the pretraining material. If not given, will use + ./pretrained_models/-hash(source). + use_auth_token : bool (default: False) + If true Hugginface's auth_token will be used to load private models from the HuggingFace Hub, + default is False because majority of models are public. + """ + if savedir is None: + clsname = cls.__name__ + savedir = f"./pretrained_models/{clsname}-{hash(source)}" + hparams_local_path = fetch( + hparams_file, source, savedir, use_auth_token + ) + + # Load the modules: + with open(hparams_local_path) as fin: + hparams = load_hyperpyyaml(fin, overrides) + + # Pretraining: + pretrainer = hparams["pretrainer"] + pretrainer.set_collect_in(savedir) + # For distributed setups, have this here: + run_on_main(pretrainer.collect_files, kwargs={"default_source": source}) + # Load on the CPU. Later the params can be moved elsewhere by specifying + # run_opts={"device": ...} + pretrainer.load_collected(device="cpu") + + # Now return the system + return cls(hparams["modules"], hparams, **kwargs) + + +class EndToEndSLU(Pretrained): + """A end-to-end SLU model. + + The class can be used either to run only the encoder (encode()) to extract + features or to run the entire model (decode()) to map the speech to its semantics. + + Example + ------- + >>> from speechbrain.pretrained import EndToEndSLU + >>> tmpdir = getfixture("tmpdir") + >>> slu_model = EndToEndSLU.from_hparams( + ... source="speechbrain/slu-timers-and-such-direct-librispeech-asr", + ... savedir=tmpdir, + ... ) + >>> slu_model.decode_file("samples/audio_samples/example6.wav") + "{'intent': 'SimpleMath', 'slots': {'number1': 37.67, 'number2': 75.7, 'op': ' minus '}}" + """ + + HPARAMS_NEEDED = ["tokenizer", "asr_model_source"] + MODULES_NEEDED = [ + "slu_enc", + "beam_searcher", + ] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.tokenizer = self.hparams.tokenizer + self.asr_model = EncoderDecoderASR.from_hparams( + source=self.hparams.asr_model_source, + run_opts={"device": self.device}, + ) + + def decode_file(self, path): + """Maps the given audio file to a string representing the + semantic dictionary for the utterance. + + Arguments + --------- + path : str + Path to audio file to decode. + + Returns + ------- + str + The predicted semantics. + """ + waveform = self.load_audio(path) + waveform = waveform.to(self.device) + # Fake a batch: + batch = waveform.unsqueeze(0) + rel_length = torch.tensor([1.0]) + predicted_words, predicted_tokens = self.decode_batch(batch, rel_length) + return predicted_words[0] + + def encode_batch(self, wavs, wav_lens): + """Encodes the input audio into a sequence of hidden states + + Arguments + --------- + wavs : torch.tensor + Batch of waveforms [batch, time, channels] or [batch, time] + depending on the model. + wav_lens : torch.tensor + Lengths of the waveforms relative to the longest one in the + batch, tensor of shape [batch]. The longest one should have + relative length 1.0 and others len(waveform) / max_length. + Used for ignoring padding. + + Returns + ------- + torch.tensor + The encoded batch + """ + wavs = wavs.float() + wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device) + with torch.no_grad(): + ASR_encoder_out = self.asr_model.encode_batch( + wavs.detach(), wav_lens + ) + encoder_out = self.modules.slu_enc(ASR_encoder_out) + return encoder_out + + def decode_batch(self, wavs, wav_lens): + """Maps the input audio to its semantics + + Arguments + --------- + wavs : torch.tensor + Batch of waveforms [batch, time, channels] or [batch, time] + depending on the model. + wav_lens : torch.tensor + Lengths of the waveforms relative to the longest one in the + batch, tensor of shape [batch]. The longest one should have + relative length 1.0 and others len(waveform) / max_length. + Used for ignoring padding. + + Returns + ------- + list + Each waveform in the batch decoded. + tensor + Each predicted token id. + """ + with torch.no_grad(): + wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device) + encoder_out = self.encode_batch(wavs, wav_lens) + predicted_tokens, scores = self.modules.beam_searcher( + encoder_out, wav_lens + ) + predicted_words = [ + self.tokenizer.decode_ids(token_seq) + for token_seq in predicted_tokens + ] + return predicted_words, predicted_tokens + + +class EncoderDecoderASR(Pretrained): + """A ready-to-use Encoder-Decoder ASR model + + The class can be used either to run only the encoder (encode()) to extract + features or to run the entire encoder-decoder model + (transcribe()) to transcribe speech. The given YAML must contains the fields + specified in the *_NEEDED[] lists. + + Example + ------- + >>> from speechbrain.pretrained import EncoderDecoderASR + >>> tmpdir = getfixture("tmpdir") + >>> asr_model = EncoderDecoderASR.from_hparams( + ... source="speechbrain/asr-crdnn-rnnlm-librispeech", + ... savedir=tmpdir, + ... ) + >>> asr_model.transcribe_file("samples/audio_samples/example2.flac") + "MY FATHER HAS REVEALED THE CULPRIT'S NAME" + """ + + HPARAMS_NEEDED = ["tokenizer"] + MODULES_NEEDED = [ + "encoder", + "decoder", + ] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.tokenizer = self.hparams.tokenizer + + def transcribe_file(self, path): + """Transcribes the given audiofile into a sequence of words. + + Arguments + --------- + path : str + Path to audio file which to transcribe. + + Returns + ------- + str + The audiofile transcription produced by this ASR system. + """ + waveform = self.load_audio(path) + # Fake a batch: + batch = waveform.unsqueeze(0) + rel_length = torch.tensor([1.0]) + predicted_words, predicted_tokens = self.transcribe_batch( + batch, rel_length + ) + return predicted_words[0] + + def encode_batch(self, wavs, wav_lens): + """Encodes the input audio into a sequence of hidden states + + The waveforms should already be in the model's desired format. + You can call: + ``normalized = EncoderDecoderASR.normalizer(signal, sample_rate)`` + to get a correctly converted signal in most cases. + + Arguments + --------- + wavs : torch.tensor + Batch of waveforms [batch, time, channels] or [batch, time] + depending on the model. + wav_lens : torch.tensor + Lengths of the waveforms relative to the longest one in the + batch, tensor of shape [batch]. The longest one should have + relative length 1.0 and others len(waveform) / max_length. + Used for ignoring padding. + + Returns + ------- + torch.tensor + The encoded batch + """ + wavs = wavs.float() + wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device) + encoder_out = self.modules.encoder(wavs, wav_lens) + return encoder_out + + def transcribe_batch(self, wavs, wav_lens): + """Transcribes the input audio into a sequence of words + + The waveforms should already be in the model's desired format. + You can call: + ``normalized = EncoderDecoderASR.normalizer(signal, sample_rate)`` + to get a correctly converted signal in most cases. + + Arguments + --------- + wavs : torch.tensor + Batch of waveforms [batch, time, channels] or [batch, time] + depending on the model. + wav_lens : torch.tensor + Lengths of the waveforms relative to the longest one in the + batch, tensor of shape [batch]. The longest one should have + relative length 1.0 and others len(waveform) / max_length. + Used for ignoring padding. + + Returns + ------- + list + Each waveform in the batch transcribed. + tensor + Each predicted token id. + """ + with torch.no_grad(): + wav_lens = wav_lens.to(self.device) + encoder_out = self.encode_batch(wavs, wav_lens) + predicted_tokens, scores = self.modules.decoder( + encoder_out, wav_lens + ) + predicted_words = [ + self.tokenizer.decode_ids(token_seq) + for token_seq in predicted_tokens + ] + return predicted_words, predicted_tokens + + +class EncoderClassifier(Pretrained): + """A ready-to-use class for utterance-level classification (e.g, speaker-id, + language-id, emotion recognition, keyword spotting, etc). + + The class assumes that an encoder called "embedding_model" and a model + called "classifier" are defined in the yaml file. If you want to + convert the predicted index into a corresponding text label, please + provide the path of the label_encoder in a variable called 'lab_encoder_file' + within the yaml. + + The class can be used either to run only the encoder (encode_batch()) to + extract embeddings or to run a classification step (classify_batch()). + ``` + + Example + ------- + >>> import torchaudio + >>> from speechbrain.pretrained import EncoderClassifier + >>> # Model is downloaded from the speechbrain HuggingFace repo + >>> tmpdir = getfixture("tmpdir") + >>> classifier = EncoderClassifier.from_hparams( + ... source="speechbrain/spkrec-ecapa-voxceleb", + ... savedir=tmpdir, + ... ) + + >>> # Compute embeddings + >>> signal, fs = torchaudio.load("samples/audio_samples/example1.wav") + >>> embeddings = classifier.encode_batch(signal) + + >>> # Classification + >>> prediction = classifier .classify_batch(signal) + """ + + MODULES_NEEDED = [ + "compute_features", + "mean_var_norm", + "embedding_model", + "classifier", + ] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def extract_feats(self, wavs, wav_lens=None): + # wav to feats + wavs = wavs.to('cpu').float() + if wav_lens is None: + wav_lens = torch.ones(wavs.shape[0], device='cpu') + + feats = self.modules.compute_features(wavs) + feats = self.modules.mean_var_norm(feats, wav_lens) + + return feats + + def feats_classify(self, feats, wav_lens=None): + emb = self.modules.embedding_model(feats, wav_lens) + out_prob = self.modules.classifier(emb).squeeze(1) + + return out_prob + + def encode_batch(self, wavs, wav_lens=None, normalize=False): + """Encodes the input audio into a single vector embedding. + + The waveforms should already be in the model's desired format. + You can call: + ``normalized = .normalizer(signal, sample_rate)`` + to get a correctly converted signal in most cases. + + Arguments + --------- + wavs : torch.tensor + Batch of waveforms [batch, time, channels] or [batch, time] + depending on the model. Make sure the sample rate is fs=16000 Hz. + wav_lens : torch.tensor + Lengths of the waveforms relative to the longest one in the + batch, tensor of shape [batch]. The longest one should have + relative length 1.0 and others len(waveform) / max_length. + Used for ignoring padding. + normalize : bool + If True, it normalizes the embeddings with the statistics + contained in mean_var_norm_emb. + + Returns + ------- + torch.tensor + The encoded batch + """ + # Manage single waveforms in input + if len(wavs.shape) == 1: + wavs = wavs.unsqueeze(0) + + # Assign full length if wav_lens is not assigned + if wav_lens is None: + wav_lens = torch.ones(wavs.shape[0], device=self.device) + + # Storing waveform in the specified device + wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device) + wavs = wavs.float() + + # Computing features and embeddings + feats = self.modules.compute_features(wavs) + feats = self.modules.mean_var_norm(feats, wav_lens) + embeddings = self.modules.embedding_model(feats, wav_lens) + if normalize: + embeddings = self.hparams.mean_var_norm_emb( + embeddings, torch.ones(embeddings.shape[0], device=self.device) + ) + return embeddings + + def classify_batch(self, wavs, wav_lens=None): + """Performs classification on the top of the encoded features. + + It returns the posterior probabilities, the index and, if the label + encoder is specified it also the text label. + + Arguments + --------- + wavs : torch.tensor + Batch of waveforms [batch, time, channels] or [batch, time] + depending on the model. Make sure the sample rate is fs=16000 Hz. + wav_lens : torch.tensor + Lengths of the waveforms relative to the longest one in the + batch, tensor of shape [batch]. The longest one should have + relative length 1.0 and others len(waveform) / max_length. + Used for ignoring padding. + + Returns + ------- + out_prob + The log posterior probabilities of each class ([batch, N_class]) + score: + It is the value of the log-posterior for the best class ([batch,]) + index + The indexes of the best class ([batch,]) + text_lab: + List with the text labels corresponding to the indexes. + (label encoder should be provided). + """ + emb = self.encode_batch(wavs, wav_lens) + out_prob = self.modules.classifier(emb).squeeze(1) + score, index = torch.max(out_prob, dim=-1) + text_lab = self.hparams.label_encoder.decode_torch(index) + return out_prob, score, index, text_lab + + def classify_file(self, path): + """Classifies the given audiofile into the given set of labels. + + Arguments + --------- + path : str + Path to audio file to classify. + + Returns + ------- + out_prob + The log posterior probabilities of each class ([batch, N_class]) + score: + It is the value of the log-posterior for the best class ([batch,]) + index + The indexes of the best class ([batch,]) + text_lab: + List with the text labels corresponding to the indexes. + (label encoder should be provided). + """ + waveform = self.load_audio(path) + # Fake a batch: + batch = waveform.unsqueeze(0) + rel_length = torch.tensor([1.0]) + emb = self.encode_batch(batch, rel_length) + out_prob = self.modules.classifier(emb).squeeze(1) + score, index = torch.max(out_prob, dim=-1) + text_lab = self.hparams.label_encoder.decode_torch(index) + return out_prob, score, index, text_lab + + +class SpeakerRecognition(EncoderClassifier): + """A ready-to-use model for speaker recognition. It can be used to + perform speaker verification with verify_batch(). + + ``` + Example + ------- + >>> import torchaudio + >>> from speechbrain.pretrained import SpeakerRecognition + >>> # Model is downloaded from the speechbrain HuggingFace repo + >>> tmpdir = getfixture("tmpdir") + >>> verification = SpeakerRecognition.from_hparams( + ... source="speechbrain/spkrec-ecapa-voxceleb", + ... savedir=tmpdir, + ... ) + + >>> # Perform verification + >>> signal, fs = torchaudio.load("samples/audio_samples/example1.wav") + >>> signal2, fs = torchaudio.load("samples/audio_samples/example2.flac") + >>> score, prediction = verification.verify_batch(signal, signal2) + """ + + MODULES_NEEDED = [ + "compute_features", + "mean_var_norm", + "embedding_model", + "mean_var_norm_emb", + ] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.similarity = torch.nn.CosineSimilarity(dim=-1, eps=1e-6) + + def verify_batch( + self, wavs1, wavs2, wav1_lens=None, wav2_lens=None, threshold=0.25 + ): + """Performs speaker verification with cosine distance. + + It returns the score and the decision (0 different speakers, + 1 same speakers). + + Arguments + --------- + wavs1 : Torch.Tensor + Tensor containing the speech waveform1 (batch, time). + Make sure the sample rate is fs=16000 Hz. + wavs2 : Torch.Tensor + Tensor containing the speech waveform2 (batch, time). + Make sure the sample rate is fs=16000 Hz. + wav1_lens: Torch.Tensor + Tensor containing the relative length for each sentence + in the length (e.g., [0.8 0.6 1.0]) + wav2_lens: Torch.Tensor + Tensor containing the relative length for each sentence + in the length (e.g., [0.8 0.6 1.0]) + threshold: Float + Threshold applied to the cosine distance to decide if the + speaker is different (0) or the same (1). + + Returns + ------- + score + The score associated to the binary verification output + (cosine distance). + prediction + The prediction is 1 if the two signals in input are from the same + speaker and 0 otherwise. + """ + emb1 = self.encode_batch(wavs1, wav1_lens, normalize=True) + emb2 = self.encode_batch(wavs2, wav2_lens, normalize=True) + score = self.similarity(emb1, emb2) + return score, score > threshold + + def verify_files(self, path_x, path_y): + """Speaker verification with cosine distance + + Returns the score and the decision (0 different speakers, + 1 same speakers). + + Returns + ------- + score + The score associated to the binary verification output + (cosine distance). + prediction + The prediction is 1 if the two signals in input are from the same + speaker and 0 otherwise. + """ + waveform_x = self.load_audio(path_x) + waveform_y = self.load_audio(path_y) + # Fake batches: + batch_x = waveform_x.unsqueeze(0) + batch_y = waveform_y.unsqueeze(0) + # Verify: + score, decision = self.verify_batch(batch_x, batch_y) + # Squeeze: + return score[0], decision[0] + + +class SepformerSeparation(Pretrained): + """A "ready-to-use" speech separation model. + + Uses Sepformer architecture. + + Example + ------- + >>> tmpdir = getfixture("tmpdir") + >>> model = SepformerSeparation.from_hparams( + ... source="speechbrain/sepformer-wsj02mix", + ... savedir=tmpdir) + >>> mix = torch.randn(1, 400) + >>> est_sources = model.separate_batch(mix) + >>> print(est_sources.shape) + torch.Size([1, 400, 2]) + """ + + MODULES_NEEDED = ["encoder", "masknet", "decoder"] + + def separate_batch(self, mix): + """Run source separation on batch of audio. + + Arguments + --------- + mix : torch.tensor + The mixture of sources. + + Returns + ------- + tensor + Separated sources + """ + + # Separation + mix = mix.to(self.device) + mix_w = self.modules.encoder(mix) + est_mask = self.modules.masknet(mix_w) + mix_w = torch.stack([mix_w] * self.hparams.num_spks) + sep_h = mix_w * est_mask + + # Decoding + est_source = torch.cat( + [ + self.modules.decoder(sep_h[i]).unsqueeze(-1) + for i in range(self.hparams.num_spks) + ], + dim=-1, + ) + + # T changed after conv1d in encoder, fix it here + T_origin = mix.size(1) + T_est = est_source.size(1) + if T_origin > T_est: + est_source = F.pad(est_source, (0, 0, 0, T_origin - T_est)) + else: + est_source = est_source[:, :T_origin, :] + return est_source + + def separate_file(self, path, savedir="."): + """Separate sources from file. + + Arguments + --------- + path : str + Path to file which has a mixture of sources. It can be a local + path, a web url, or a huggingface repo. + savedir : path + Path where to store the wav signals (when downloaded from the web). + Returns + ------- + tensor + Separated sources + """ + source, fl = split_path(path) + path = fetch(fl, source=source, savedir=savedir) + + batch, fs_file = torchaudio.load(path) + batch = batch.to(self.device) + fs_model = self.hparams.sample_rate + + # resample the data if needed + if fs_file != fs_model: + print( + "Resampling the audio from {} Hz to {} Hz".format( + fs_file, fs_model + ) + ) + tf = torchaudio.transforms.Resample( + orig_freq=fs_file, new_freq=fs_model + ) + batch = batch.mean(dim=0, keepdim=True) + batch = tf(batch) + + est_sources = self.separate_batch(batch) + est_sources = est_sources / est_sources.max(dim=1, keepdim=True)[0] + return est_sources + + +class SpectralMaskEnhancement(Pretrained): + """A ready-to-use model for speech enhancement. + + Arguments + --------- + See ``Pretrained``. + + Example + ------- + >>> import torchaudio + >>> from speechbrain.pretrained import SpectralMaskEnhancement + >>> # Model is downloaded from the speechbrain HuggingFace repo + >>> tmpdir = getfixture("tmpdir") + >>> enhancer = SpectralMaskEnhancement.from_hparams( + ... source="speechbrain/mtl-mimic-voicebank", + ... savedir=tmpdir, + ... ) + >>> noisy, fs = torchaudio.load("samples/audio_samples/example_noisy.wav") + >>> # Channel dimension is interpreted as batch dimension here + >>> enhanced = enhancer.enhance_batch(noisy) + """ + + HPARAMS_NEEDED = ["compute_stft", "spectral_magnitude", "resynth"] + MODULES_NEEDED = ["enhance_model"] + + def compute_features(self, wavs): + """Compute the log spectral magnitude features for masking. + + Arguments + --------- + wavs : torch.tensor + A batch of waveforms to convert to log spectral mags. + """ + feats = self.hparams.compute_stft(wavs) + feats = self.hparams.spectral_magnitude(feats) + return torch.log1p(feats) + + def enhance_batch(self, noisy, lengths=None): + """Enhance a batch of noisy waveforms. + + Arguments + --------- + noisy : torch.tensor + A batch of waveforms to perform enhancement on. + lengths : torch.tensor + The lengths of the waveforms if the enhancement model handles them. + + Returns + ------- + torch.tensor + A batch of enhanced waveforms of the same shape as input. + """ + noisy = noisy.to(self.device) + noisy_features = self.compute_features(noisy) + + # Perform masking-based enhancement, multiplying output with input. + if lengths is not None: + mask = self.modules.enhance_model(noisy_features, lengths=lengths) + else: + mask = self.modules.enhance_model(noisy_features) + enhanced = torch.mul(mask, noisy_features) + + # Return resynthesized waveforms + return self.hparams.resynth(torch.expm1(enhanced), noisy) + + def enhance_file(self, filename, output_filename=None): + """Enhance a wav file. + + Arguments + --------- + filename : str + Location on disk to load file for enhancement. + output_filename : str + If provided, writes enhanced data to this file. + """ + noisy = self.load_audio(filename) + noisy = noisy.to(self.device) + + # Fake a batch: + batch = noisy.unsqueeze(0) + enhanced = self.enhance_batch(batch) + + if output_filename is not None: + torchaudio.save(output_filename, enhanced, channels_first=False) + + return enhanced.squeeze(0) diff --git a/ACL_PyTorch/contrib/audio/tdnn/modelzoo_level.txt b/ACL_PyTorch/contrib/audio/tdnn/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/ACL_PyTorch/contrib/audio/tdnn/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/audio/tdnn/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/audio/tdnn/om_infer.sh b/ACL_PyTorch/contrib/audio/tdnn/om_infer.sh index 499be09436055b75ba47d32b74fcbadb7d7c17df..718f9f6a8e0edcebbc7202c194226b02098ba69d 100644 --- a/ACL_PyTorch/contrib/audio/tdnn/om_infer.sh +++ b/ACL_PyTorch/contrib/audio/tdnn/om_infer.sh @@ -1,23 +1,23 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - - -install_path=/usr/local/Ascend/ascend-toolkit/latest -#export PYTHONUNBUFFERD=1 -#export PYTHONPATH=${install_path}/pyACL/python/site-packages/acl:$PYTHONPATH - -#export LD_LIBRARY_PATH=${install_path}/fwkacllib/lib64/:${install_path}/acllib/lib64/:${install_path}/atc/lib64/:$LD_LIBRARY_PATH - -python Tdnn_pyacl_infer.py --model_path=tdnn.om --device_id=0 --cpu_run=True --sync_infer=True --workspace=10 --input_info_file_path=mini_librispeech_test.info --input_dtypes=float32 --infer_res_save_path=result --res_save_type=bin +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +install_path=/usr/local/Ascend/ascend-toolkit/latest +#export PYTHONUNBUFFERD=1 +#export PYTHONPATH=${install_path}/pyACL/python/site-packages/acl:$PYTHONPATH + +#export LD_LIBRARY_PATH=${install_path}/fwkacllib/lib64/:${install_path}/acllib/lib64/:${install_path}/atc/lib64/:$LD_LIBRARY_PATH + +python Tdnn_pyacl_infer.py --model_path=tdnn.om --device_id=0 --cpu_run=True --sync_infer=True --workspace=10 --input_info_file_path=mini_librispeech_test.info --input_dtypes=float32 --infer_res_save_path=result --res_save_type=bin diff --git a/ACL_PyTorch/contrib/audio/tdnn/requirements.txt b/ACL_PyTorch/contrib/audio/tdnn/requirements.txt index 902c74751a5d7d9872c7787dcd49a9e49350d9f2..f4e019b9128e61cda0173e2b23d5ba89ca9eec65 100644 --- a/ACL_PyTorch/contrib/audio/tdnn/requirements.txt +++ b/ACL_PyTorch/contrib/audio/tdnn/requirements.txt @@ -1,11 +1,11 @@ --r lint-requirements.txt -huggingface_hub>=0.0.6 -hyperpyyaml>=0.0.1 -joblib>=0.14.1 -numpy>=1.17.0 -packaging -pre-commit>=2.3.0 -scipy>=1.4.1 -sentencepiece>=0.1.91 -SoundFile; sys_platform == 'win32' -tqdm>=4.42.0 +-r lint-requirements.txt +huggingface_hub>=0.0.6 +hyperpyyaml>=0.0.1 +joblib>=0.14.1 +numpy>=1.17.0 +packaging +pre-commit>=2.3.0 +scipy>=1.4.1 +sentencepiece>=0.1.91 +SoundFile; sys_platform == 'win32' +tqdm>=4.42.0 diff --git a/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/3d_attention_net_pkl2onnx.py b/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/3d_attention_net_pkl2onnx.py index 9d15792a6e44650412830047652ea61ac173b437..5927f173e59b62b51fdb732f5ed7be1833a080db 100644 --- a/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/3d_attention_net_pkl2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/3d_attention_net_pkl2onnx.py @@ -1,34 +1,34 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import torch -import torch.onnx -from model.residual_attention_network import ResidualAttentionModel_92_32input_update as ResidualAttentionModel - - -def pkl2onnx(input_file = "model_92_sgd.pkl", output_file = "3d_attention_net.onnx"): - model = ResidualAttentionModel() - model.load_state_dict((torch.load(input_file, map_location = "cpu"))) - model.eval() - input_name = ["image"] - output_name = ["class"] - dynamic_axes = {"image": {0:"-1"}, "class": {0:"-1"}} - dummy_input = torch.randn(1, 3, 32, 32) - torch.onnx.export(model, dummy_input, output_file, input_names = input_name, dynamic_axes = dynamic_axes, output_names = output_name, opset_version=11, verbose=True) - -if __name__ == "__main__": - print("----------start----------") - pkl2onnx() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch +import torch.onnx +from model.residual_attention_network import ResidualAttentionModel_92_32input_update as ResidualAttentionModel + + +def pkl2onnx(input_file = "model_92_sgd.pkl", output_file = "3d_attention_net.onnx"): + model = ResidualAttentionModel() + model.load_state_dict((torch.load(input_file, map_location = "cpu"))) + model.eval() + input_name = ["image"] + output_name = ["class"] + dynamic_axes = {"image": {0:"-1"}, "class": {0:"-1"}} + dummy_input = torch.randn(1, 3, 32, 32) + torch.onnx.export(model, dummy_input, output_file, input_names = input_name, dynamic_axes = dynamic_axes, output_names = output_name, opset_version=11, verbose=True) + +if __name__ == "__main__": + print("----------start----------") + pkl2onnx() print("----------end----------") \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/3d_attention_net_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/3d_attention_net_preprocess.py index 4b94690e34ba1a97b52e3adb5903087f713131b2..4ce91ea226b86c7d4bfe85559651904905861aec 100644 --- a/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/3d_attention_net_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/3d_attention_net_preprocess.py @@ -1,68 +1,68 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import os -import torch -import numpy as np -from torch.utils.data import Dataset, DataLoader -import torchvision -from torchvision import transforms, datasets, models - -def preprocess(data_path = "./data/", save_path = "./pre_process_result/"): - # Image Preprocessing - transform = transforms.Compose([ - transforms.RandomHorizontalFlip(), - transforms.RandomCrop((32, 32), padding=4), #left, top, right, bottom - transforms.ToTensor() - ]) - test_transform = transforms.Compose([ - transforms.ToTensor() - ]) - # when image is rgb, totensor do the division 255 - # CIFAR-10 Dataset - train_dataset = datasets.CIFAR10(root=data_path, - train=True, - transform=transform, - download=True) - - test_dataset = datasets.CIFAR10(root=data_path, - train=False, - transform=test_transform) - - # Data Loader (Input Pipeline) - train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size=64, # 64 - shuffle=True, num_workers=8) - test_loader = torch.utils.data.DataLoader(dataset=test_dataset, - batch_size=32, - shuffle=False) - - classes = ('plane', 'car', 'bird', 'cat', - 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') - if not os.path.exists(save_path): - os.makedirs(save_path) - cnt = -1 - for images, labels in test_loader: - for i in range(len(images)): - cnt += 1 - image = images[i] - label = labels[i] - image = np.array(image).astype(np.float32) - image.tofile(f"{save_path}image_{cnt}.bin") - if(cnt % 100 == 9): - print(f"current: {cnt}") - -if __name__ == "__main__": +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import torch +import numpy as np +from torch.utils.data import Dataset, DataLoader +import torchvision +from torchvision import transforms, datasets, models + +def preprocess(data_path = "./data/", save_path = "./pre_process_result/"): + # Image Preprocessing + transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomCrop((32, 32), padding=4), #left, top, right, bottom + transforms.ToTensor() + ]) + test_transform = transforms.Compose([ + transforms.ToTensor() + ]) + # when image is rgb, totensor do the division 255 + # CIFAR-10 Dataset + train_dataset = datasets.CIFAR10(root=data_path, + train=True, + transform=transform, + download=True) + + test_dataset = datasets.CIFAR10(root=data_path, + train=False, + transform=test_transform) + + # Data Loader (Input Pipeline) + train_loader = torch.utils.data.DataLoader(dataset=train_dataset, + batch_size=64, # 64 + shuffle=True, num_workers=8) + test_loader = torch.utils.data.DataLoader(dataset=test_dataset, + batch_size=32, + shuffle=False) + + classes = ('plane', 'car', 'bird', 'cat', + 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') + if not os.path.exists(save_path): + os.makedirs(save_path) + cnt = -1 + for images, labels in test_loader: + for i in range(len(images)): + cnt += 1 + image = images[i] + label = labels[i] + image = np.array(image).astype(np.float32) + image.tofile(f"{save_path}image_{cnt}.bin") + if(cnt % 100 == 9): + print(f"current: {cnt}") + +if __name__ == "__main__": preprocess() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/LICENSE b/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/LICENSE index b1fac45f02e2f98395fd96a7e4f4a39e257ac0bc..989e2c59e973a05cfbfe9de678b7f2af777b0713 100644 --- a/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/LICENSE @@ -1,201 +1,201 @@ -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/modelzoo_level.txt index 9e95396651cc4382fe60ee1ee053674f527a448c..27e6c78b37535fe4f5a17029546fe257ad164d34 100644 --- a/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:POK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/readme.md b/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/readme.md index b7efb7ace2a24a91235b6f37e5842b7fc1775760..db4cf57a7e99fe0cb04e2e93124e60a6353602a7 100644 --- a/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/readme.md +++ b/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/readme.md @@ -1,98 +1,98 @@ -# 一. 环境准备 -1. 安装必要的依赖,运行所需的依赖详见requirements.txt -2. 获取开源模型代码及权重文件 -``` -git clone https://github.com/tengshaofeng/ResidualAttentionNetwork-pytorch.git -cd ResidualAttentionNetwork-pytorch -git checkout 44d09fe9afc0d5fba6f3f63b8375069ae9d54a56 -cd Residual-Attention-Network -cp -r model ../.. -cp model_92_sgd.pkl ../.. -cd ../.. -``` -3. 由于python版本问题,原模型代码在执行过程中会出现数据类型转换问题,依次执行以下命令 -``` -cd model/ -patch -p1 <../3d_attention_net.patch -cd .. -``` - -4. 获取benchmark工具 - 将benchmark.x86_64放到当前目录。 -5. 获取OXInterface.py -``` -git clone https://gitee.com/zheng-wengang1/onnx_tools.git -cd onnx_tools -git checkout cbb099e5f2cef3d76c7630bffe0ee8250b03d921 -cd .. -``` - -# 二. 数据预处理 -1. 获取CIFAR-10数据集 -``` -website:https://www.cs.toronto.edu/~kriz/cifar.html -#Version:CIFAR-10 python version -#md5sum:c58f30108f718f92721af3b95e74349a -``` -2. 上传数据集 -``` -mkdir data -``` -3. 将下载的CIFAR-10数据集上传至data文件夹,而后执行如下命令: -``` -tar -zxvf data/cifar-10-python.tar.gz -C data/ -``` -4. 数据预处理 -``` -python3.7 3d_attention_net_preprocess.py -``` - -5. 生成预处理后的数据集信息文件 -``` -python3.7 gen_dataset_info.py bin ./pre_process_result/ ./3d_attention_net_prep_bin.info 32 32 -``` - -# 三. pkl文件转om模型 -1. 读取源代码仓中的pkl文件,将原始模型转换为onnx -``` -python3.7 3d_attention_net_pkl2onnx.py -``` - -2. 对onnx模型中的resize操作进行优化 -``` -python3.7 resize_optimize.py -``` - -3. 通过Autotune进行性能调优并转换为om模型 -``` -bash test/3d_attention_net_onnx2om.bash -``` - -# 四. om模型离线推理,性能及精度测试 -1. bs1离线推理测试 -``` -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=3d_attention_net_resize_autotune_optimized_bs1.om -input_text_path=3d_attention_net_prep_bin.info -input_width=32 -input_height=32 -output_binary=False -useDvpp=False -python3.7 3d_attention_net_postprocess.py 0 -``` -python3.7 3d_attention_net_postprocess.py $DEVICE_ID -传入的第一个参数DEVICE_ID为指定device的输出,应与benchmark传入的device_id保持一致,下同 - -2. bs16离线推理测试 -``` -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=3d_attention_net_resize_autotune_optimized_bs16.om -input_text_path=3d_attention_net_prep_bin.info -input_width=32 -input_height=32 -output_binary=False -useDvpp=False -python3.7 3d_attention_net_postprocess.py 0 -``` - -3. GPU性能测试 -``` -bash perf_g.sh -``` - -4. 性能&精度对比 - - -|模型|官网pkl精度|310离线推理精度|基准性能|310性能|精度对比
(310/基准)|性能对比
(310/基准)| -|-|:-:|:-:|:-:|:-:|:-:|:-:| -|3d_attention_net_bs1|Top-1:95.4%|Top-1:95.34%|659.2fps|1479.5fps|99.94%|1479.5/659.2| -|3d_attention_net_bs16|Top-1:95.4%|Top-1:95.34%|3494.16fps|3980.4fps|99.94%|3980.4/3494.16| - +# 一. 环境准备 +1. 安装必要的依赖,运行所需的依赖详见requirements.txt +2. 获取开源模型代码及权重文件 +``` +git clone https://github.com/tengshaofeng/ResidualAttentionNetwork-pytorch.git +cd ResidualAttentionNetwork-pytorch +git checkout 44d09fe9afc0d5fba6f3f63b8375069ae9d54a56 +cd Residual-Attention-Network +cp -r model ../.. +cp model_92_sgd.pkl ../.. +cd ../.. +``` +3. 由于python版本问题,原模型代码在执行过程中会出现数据类型转换问题,依次执行以下命令 +``` +cd model/ +patch -p1 <../3d_attention_net.patch +cd .. +``` + +4. 获取benchmark工具 + 将benchmark.x86_64放到当前目录。 +5. 获取OXInterface.py +``` +git clone https://gitee.com/zheng-wengang1/onnx_tools.git +cd onnx_tools +git checkout cbb099e5f2cef3d76c7630bffe0ee8250b03d921 +cd .. +``` + +# 二. 数据预处理 +1. 获取CIFAR-10数据集 +``` +website:https://www.cs.toronto.edu/~kriz/cifar.html +#Version:CIFAR-10 python version +#md5sum:c58f30108f718f92721af3b95e74349a +``` +2. 上传数据集 +``` +mkdir data +``` +3. 将下载的CIFAR-10数据集上传至data文件夹,而后执行如下命令: +``` +tar -zxvf data/cifar-10-python.tar.gz -C data/ +``` +4. 数据预处理 +``` +python3.7 3d_attention_net_preprocess.py +``` + +5. 生成预处理后的数据集信息文件 +``` +python3.7 gen_dataset_info.py bin ./pre_process_result/ ./3d_attention_net_prep_bin.info 32 32 +``` + +# 三. pkl文件转om模型 +1. 读取源代码仓中的pkl文件,将原始模型转换为onnx +``` +python3.7 3d_attention_net_pkl2onnx.py +``` + +2. 对onnx模型中的resize操作进行优化 +``` +python3.7 resize_optimize.py +``` + +3. 通过Autotune进行性能调优并转换为om模型 +``` +bash test/3d_attention_net_onnx2om.bash +``` + +# 四. om模型离线推理,性能及精度测试 +1. bs1离线推理测试 +``` +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=3d_attention_net_resize_autotune_optimized_bs1.om -input_text_path=3d_attention_net_prep_bin.info -input_width=32 -input_height=32 -output_binary=False -useDvpp=False +python3.7 3d_attention_net_postprocess.py 0 +``` +python3.7 3d_attention_net_postprocess.py $DEVICE_ID +传入的第一个参数DEVICE_ID为指定device的输出,应与benchmark传入的device_id保持一致,下同 + +2. bs16离线推理测试 +``` +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=3d_attention_net_resize_autotune_optimized_bs16.om -input_text_path=3d_attention_net_prep_bin.info -input_width=32 -input_height=32 -output_binary=False -useDvpp=False +python3.7 3d_attention_net_postprocess.py 0 +``` + +3. GPU性能测试 +``` +bash perf_g.sh +``` + +4. 性能&精度对比 + + +|模型|官网pkl精度|310离线推理精度|基准性能|310性能|精度对比
(310/基准)|性能对比
(310/基准)| +|-|:-:|:-:|:-:|:-:|:-:|:-:| +|3d_attention_net_bs1|Top-1:95.4%|Top-1:95.34%|659.2fps|1479.5fps|99.94%|1479.5/659.2| +|3d_attention_net_bs16|Top-1:95.4%|Top-1:95.34%|3494.16fps|3980.4fps|99.94%|3980.4/3494.16| + diff --git a/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/requirements.txt index a89309f6209794e8311b29a4aae342a80243c8af..ec79d9f308d9ee670ae7c7a2266458ef9da2da5b 100644 --- a/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/requirements.txt @@ -1,6 +1,6 @@ -torch=1.8.1 -torchvision=0.9.0 -numpy=1.19.5 -glob2=0.7 -opencv-python=4.2.0.34 -onnx=1.8.1 +torch=1.8.1 +torchvision=0.9.0 +numpy=1.19.5 +glob2=0.7 +opencv-python=4.2.0.34 +onnx=1.8.1 diff --git a/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/resize_optimize.py b/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/resize_optimize.py index 426bc3a82455eb35ebba8e3d51f5644db4769b2d..52fa75baf494a0e4e8220d7291f6e6f4ee6b043a 100644 --- a/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/resize_optimize.py +++ b/ACL_PyTorch/contrib/cv/classfication/3d_attention_net/resize_optimize.py @@ -1,39 +1,39 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import os -import onnx -import numpy as np -from pprint import pprint -from onnx_tools.OXInterface.OXInterface import OXDataType, OXGraph, OXInitializer, OXNode - -oxgraph = OXGraph('3d_attention_net.onnx') -oxnode = oxgraph.get_oxnode_by_name('Resize_77') -oxnode.set_attribute(attr_name='coordinate_transformation_mode', attr_value="asymmetric") -oxnode.set_attribute(attr_name='mode', attr_value="nearest") - -oxnode = oxgraph.get_oxnode_by_name('Resize_96') -oxnode.set_attribute(attr_name='coordinate_transformation_mode', attr_value="asymmetric") -oxnode.set_attribute(attr_name='mode', attr_value="nearest") - -oxnode = oxgraph.get_oxnode_by_name('Resize_173') -oxnode.set_attribute(attr_name='coordinate_transformation_mode', attr_value="asymmetric") -oxnode.set_attribute(attr_name='mode', attr_value="nearest") - -oxnode = oxgraph.get_oxnode_by_name('Resize_241') -oxnode.set_attribute(attr_name='coordinate_transformation_mode', attr_value="asymmetric") -oxnode.set_attribute(attr_name='mode', attr_value="nearest") - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import onnx +import numpy as np +from pprint import pprint +from onnx_tools.OXInterface.OXInterface import OXDataType, OXGraph, OXInitializer, OXNode + +oxgraph = OXGraph('3d_attention_net.onnx') +oxnode = oxgraph.get_oxnode_by_name('Resize_77') +oxnode.set_attribute(attr_name='coordinate_transformation_mode', attr_value="asymmetric") +oxnode.set_attribute(attr_name='mode', attr_value="nearest") + +oxnode = oxgraph.get_oxnode_by_name('Resize_96') +oxnode.set_attribute(attr_name='coordinate_transformation_mode', attr_value="asymmetric") +oxnode.set_attribute(attr_name='mode', attr_value="nearest") + +oxnode = oxgraph.get_oxnode_by_name('Resize_173') +oxnode.set_attribute(attr_name='coordinate_transformation_mode', attr_value="asymmetric") +oxnode.set_attribute(attr_name='mode', attr_value="nearest") + +oxnode = oxgraph.get_oxnode_by_name('Resize_241') +oxnode.set_attribute(attr_name='coordinate_transformation_mode', attr_value="asymmetric") +oxnode.set_attribute(attr_name='mode', attr_value="nearest") + oxgraph.save_new_model('3d_attention_net_resize_optimized.onnx') \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/AlexNet/LICENSE b/ACL_PyTorch/contrib/cv/classfication/AlexNet/LICENSE index b1fac45f02e2f98395fd96a7e4f4a39e257ac0bc..989e2c59e973a05cfbfe9de678b7f2af777b0713 100644 --- a/ACL_PyTorch/contrib/cv/classfication/AlexNet/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/AlexNet/LICENSE @@ -1,201 +1,201 @@ -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/C3D/C3D_postprocess.py b/ACL_PyTorch/contrib/cv/classfication/C3D/C3D_postprocess.py index e2aefdbbe904e8c21b7aa6a65ee0220f65403f3f..38cd64d13a80734ab8c0e1d651a4783960f25edf 100644 --- a/ACL_PyTorch/contrib/cv/classfication/C3D/C3D_postprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/C3D/C3D_postprocess.py @@ -1,68 +1,68 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import json -import torch -import sys - - -result_dir = sys.argv[1] # result_dir:推理得到的输出文件夹 -label_dir = sys.argv[2] # label_dir:标注文件的路径 -json_dir = sys.argv[3] -result_dir = os.listdir(result_dir) - -# 处理annotation文件,得到一个label字典,key为类名称,value为类的索引 -# label = {'v_Skiing_g04_c03': '80', 'v_SoccerPenalty_g02_c04': '84', ......} -label = dict() -f = open(label_dir) -x = f.readlines() -f.close() -for i in range(len(x)): - class_name = x[i].split(' ')[0].split('/')[1] - class_idx = x[i].split(' ')[2].replace('\n', '').replace('\r', '') - label[class_name] = class_idx - -file_name = result_dir - -num_correct_top1 = 0 -num_total = len(file_name) - -# 统计top1正确的个数 -for file_idx in range(num_total): - x = file_name[file_idx] - f = open(os.path.join(sys.argv[1], x)) - score = f.readline().replace('\n', '').replace('\r', '').split(' ') # score:list[str] - score = score[0:1010] - score = [float(i) for i in score] - f.close() - s = [[], [], [], [], [], [], [], [], [], []] - for i in range(10): - s[i] = score[101*i:101*i + 101] # 对于score中的1010个分数,每隔101个将其取出放到s数组中 - cls_score = torch.tensor(s).mean(dim=0) # 对10个clips得到的输出结果求平均 - max_value = cls_score[0] - idx = 0 - for i in range(len(cls_score)): - if cls_score[i] >= max_value: - max_value = cls_score[i] - idx = i - if label[x.split('.')[0].replace('_1', '')] == str(idx): - num_correct_top1 += 1 - -top1_acc = num_correct_top1/num_total -result_dict = {"top1_acc": top1_acc} -print(result_dict) -json_str = json.dumps(result_dict) -with open(json_dir, 'w') as json_file: - json_file.write(json_str) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json +import torch +import sys + + +result_dir = sys.argv[1] # result_dir:推理得到的输出文件夹 +label_dir = sys.argv[2] # label_dir:标注文件的路径 +json_dir = sys.argv[3] +result_dir = os.listdir(result_dir) + +# 处理annotation文件,得到一个label字典,key为类名称,value为类的索引 +# label = {'v_Skiing_g04_c03': '80', 'v_SoccerPenalty_g02_c04': '84', ......} +label = dict() +f = open(label_dir) +x = f.readlines() +f.close() +for i in range(len(x)): + class_name = x[i].split(' ')[0].split('/')[1] + class_idx = x[i].split(' ')[2].replace('\n', '').replace('\r', '') + label[class_name] = class_idx + +file_name = result_dir + +num_correct_top1 = 0 +num_total = len(file_name) + +# 统计top1正确的个数 +for file_idx in range(num_total): + x = file_name[file_idx] + f = open(os.path.join(sys.argv[1], x)) + score = f.readline().replace('\n', '').replace('\r', '').split(' ') # score:list[str] + score = score[0:1010] + score = [float(i) for i in score] + f.close() + s = [[], [], [], [], [], [], [], [], [], []] + for i in range(10): + s[i] = score[101*i:101*i + 101] # 对于score中的1010个分数,每隔101个将其取出放到s数组中 + cls_score = torch.tensor(s).mean(dim=0) # 对10个clips得到的输出结果求平均 + max_value = cls_score[0] + idx = 0 + for i in range(len(cls_score)): + if cls_score[i] >= max_value: + max_value = cls_score[i] + idx = i + if label[x.split('.')[0].replace('_1', '')] == str(idx): + num_correct_top1 += 1 + +top1_acc = num_correct_top1/num_total +result_dict = {"top1_acc": top1_acc} +print(result_dict) +json_str = json.dumps(result_dict) +with open(json_dir, 'w') as json_file: + json_file.write(json_str) diff --git a/ACL_PyTorch/contrib/cv/classfication/C3D/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/C3D/modelzoo_level.txt index 403465b84e39e2cc8a387c33aaf5a1043f8d267a..ec6168981c278bbe672c13a4eb251b6ec184eda4 100644 --- a/ACL_PyTorch/contrib/cv/classfication/C3D/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/C3D/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:Perfect \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/CSPResneXt50/LICENSE b/ACL_PyTorch/contrib/cv/classfication/CSPResneXt50/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/CSPResneXt50/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/CSPResneXt50/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Deit_Small/LICENSE b/ACL_PyTorch/contrib/cv/classfication/Deit_Small/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Deit_Small/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/Deit_Small/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Deit_Small/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/classfication/Deit_Small/gen_dataset_info.py index 80c2b0fc300d7037330a166b23c562015cd17148..f80f45a34c450d57f0ea49d93167892d93a30e88 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Deit_Small/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/Deit_Small/gen_dataset_info.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) diff --git a/ACL_PyTorch/contrib/cv/classfication/Deit_Small/imagenet_acc_eval.py b/ACL_PyTorch/contrib/cv/classfication/Deit_Small/imagenet_acc_eval.py index 0e1db27e816a0cf3ec4fb21ee23e691315f3f959..9ec7d5da536ecdab1cf4fa8253dba62604376179 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Deit_Small/imagenet_acc_eval.py +++ b/ACL_PyTorch/contrib/cv/classfication/Deit_Small/imagenet_acc_eval.py @@ -1,183 +1,183 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - if data == '': - n_label = 0 - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - print("Time used:", elapsed) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + if data == '': + n_label = 0 + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + print("Time used:", elapsed) diff --git a/ACL_PyTorch/contrib/cv/classfication/Deit_Small/imagenet_torch_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/Deit_Small/imagenet_torch_preprocess.py index f7ca84a2561f8e3d907f6d81cb34b792cd3e25a5..f1ed2bf166ecc2e790724d02526d68f9a5f9e022 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Deit_Small/imagenet_torch_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/Deit_Small/imagenet_torch_preprocess.py @@ -1,104 +1,104 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from PIL import Image -import numpy as np -import multiprocessing - - -model_config = { - 'deit': { - 'resize': 256, - 'centercrop': 224, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, -} - - -def center_crop(img, output_size): - if isinstance(output_size, int): - output_size = (int(output_size), int(output_size)) - image_width, image_height = img.size - crop_height, crop_width = output_size - crop_top = int(round((image_height - crop_height) / 2.)) - crop_left = int(round((image_width - crop_width) / 2.)) - return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) - - -def resize(img, size, interpolation=Image.BILINEAR): - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def gen_input_bin(mode_type, file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - # RGBA to RGB - image = Image.open(os.path.join(src_path, file)).convert('RGB') - image = resize(image, model_config[mode_type]['resize']) # Resize - image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop - img = np.array(image, dtype=np.float32) - img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW - img = img / 255. # ToTensor: div 255 - img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean - img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - - -def preprocess(mode_type, src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python imagenet_torch_preprocess.py [model_type] [src_path] [save_path]") - mode_type = sys.argv[1] - src_path = sys.argv[2] - save_path = sys.argv[3] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - if mode_type not in model_config: - model_type_help = "model type: " - for key in model_config.keys(): - model_type_help += key - model_type_help += ' ' - raise Exception(model_type_help) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - preprocess(mode_type, src_path, save_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from PIL import Image +import numpy as np +import multiprocessing + + +model_config = { + 'deit': { + 'resize': 256, + 'centercrop': 224, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, +} + + +def center_crop(img, output_size): + if isinstance(output_size, int): + output_size = (int(output_size), int(output_size)) + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) + + +def resize(img, size, interpolation=Image.BILINEAR): + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def gen_input_bin(mode_type, file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + # RGBA to RGB + image = Image.open(os.path.join(src_path, file)).convert('RGB') + image = resize(image, model_config[mode_type]['resize']) # Resize + image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop + img = np.array(image, dtype=np.float32) + img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW + img = img / 255. # ToTensor: div 255 + img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean + img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + + +def preprocess(mode_type, src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python imagenet_torch_preprocess.py [model_type] [src_path] [save_path]") + mode_type = sys.argv[1] + src_path = sys.argv[2] + save_path = sys.argv[3] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + if mode_type not in model_config: + model_type_help = "model type: " + for key in model_config.keys(): + model_type_help += key + model_type_help += ' ' + raise Exception(model_type_help) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + preprocess(mode_type, src_path, save_path) diff --git a/ACL_PyTorch/contrib/cv/classfication/Deit_Small/parse.py b/ACL_PyTorch/contrib/cv/classfication/Deit_Small/parse.py index b9c74f41d7848e1250356f14472b237a18bb3489..82af69cd183218c3263723c20b652b3f7ec2bc27 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Deit_Small/parse.py +++ b/ACL_PyTorch/contrib/cv/classfication/Deit_Small/parse.py @@ -1,32 +1,32 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Deit_Small/test/README.md b/ACL_PyTorch/contrib/cv/classfication/Deit_Small/test/README.md index 0c8bb48bce3a21d432f0e88ab08f907e38029dcb..d53433c40a39e596782660f4357ce1de0932e106 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Deit_Small/test/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/Deit_Small/test/README.md @@ -1,24 +1,24 @@ -环境准备: - -1.数据集路径 -通用的数据集统一放在/root/datasets/或/opt/npu/ -本模型数据集路径为 /opt/npu/ - -2.进入工作目录 -cd Deit-Small - -3.导入所需的环境 -pip3.7 install -r requirements.txt - -4.获取模型代码 -git clone https://github.com/facebookresearch/deit.git - -5.获取权重文件 -wget https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth - -6.获取benchmark工具 -将benchmark.x86_64 benchmark.aarch64放在当前目录 - -7.310上执行,执行时确保device空闲 -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets +环境准备: + +1.数据集路径 +通用的数据集统一放在/root/datasets/或/opt/npu/ +本模型数据集路径为 /opt/npu/ + +2.进入工作目录 +cd Deit-Small + +3.导入所需的环境 +pip3.7 install -r requirements.txt + +4.获取模型代码 +git clone https://github.com/facebookresearch/deit.git + +5.获取权重文件 +wget https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth + +6.获取benchmark工具 +将benchmark.x86_64 benchmark.aarch64放在当前目录 + +7.310上执行,执行时确保device空闲 +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets diff --git a/ACL_PyTorch/contrib/cv/classfication/Deit_Small/test/pth2om.sh b/ACL_PyTorch/contrib/cv/classfication/Deit_Small/test/pth2om.sh old mode 100755 new mode 100644 diff --git a/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/Efficient-3DCNNs_postprocess.py b/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/Efficient-3DCNNs_postprocess.py index c4bb75bf7dad764e801d6e98551e4224e1c4ef41..58cd26aba36b86eecd582294652280ca774155c6 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/Efficient-3DCNNs_postprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/Efficient-3DCNNs_postprocess.py @@ -1,113 +1,113 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import argparse -from pathlib import Path -import torch -import torch.nn.functional as F -import numpy as np -sys.path.append(r"./Efficient-3DCNNs/utils") -from eval_ucf101 import UCFclassification - -CLASS_NAMES = {0: 'ApplyEyeMakeup', 1: 'ApplyLipstick', 2: 'Archery', 3: 'BabyCrawling', 4: 'BalanceBeam', 5: 'BandMarching', - 6: 'BaseballPitch', 7: 'Basketball', 8: 'BasketballDunk', 9: 'BenchPress', 10: 'Biking', - 11: 'Billiards', 12: 'BlowDryHair', 13: 'BlowingCandles', 14: 'BodyWeightSquats', 15: 'Bowling', - 16: 'BoxingPunchingBag', 17: 'BoxingSpeedBag', 18: 'BreastStroke', 19: 'BrushingTeeth', 20: 'CleanAndJerk', - 21: 'CliffDiving', 22: 'CricketBowling', 23: 'CricketShot', 24: 'CuttingInKitchen', 25: 'Diving', - 26: 'Drumming', 27: 'Fencing', 28: 'FieldHockeyPenalty', 29: 'FloorGymnastics', 30: 'FrisbeeCatch', - 31: 'FrontCrawl', 32: 'GolfSwing', 33: 'Haircut', 34: 'Hammering', 35: 'HammerThrow', - 36: 'HandstandPushups', 37: 'HandstandWalking', 38: 'HeadMassage', 39: 'HighJump', 40: 'HorseRace', - 41: 'HorseRiding', 42: 'HulaHoop', 43: 'IceDancing', 44: 'JavelinThrow', 45: 'JugglingBalls', - 46: 'JumpingJack', 47: 'JumpRope', 48: 'Kayaking', 49: 'Knitting', 50: 'LongJump', - 51: 'Lunges', 52: 'MilitaryParade', 53: 'Mixing', 54: 'MoppingFloor', 55: 'Nunchucks', - 56: 'ParallelBars', 57: 'PizzaTossing', 58: 'PlayingCello', 59: 'PlayingDaf', 60: 'PlayingDhol', - 61: 'PlayingFlute', 62: 'PlayingGuitar', 63: 'PlayingPiano', 64: 'PlayingSitar', 65: 'PlayingTabla', - 66: 'PlayingViolin', 67: 'PoleVault', 68: 'PommelHorse', 69: 'PullUps', 70: 'Punch', - 71: 'PushUps', 72: 'Rafting', 73: 'RockClimbingIndoor', 74: 'RopeClimbing', 75: 'Rowing', - 76: 'SalsaSpin', 77: 'ShavingBeard', 78: 'Shotput', 79: 'SkateBoarding', 80: 'Skiing', - 81: 'Skijet', 82: 'SkyDiving', 83: 'SoccerJuggling', 84: 'SoccerPenalty', 85: 'StillRings', - 86: 'SumoWrestling', 87: 'Surfing', 88: 'Swing', 89: 'TableTennisShot', 90: 'TaiChi', - 91: 'TennisSwing', 92: 'ThrowDiscus', 93: 'TrampolineJumping', 94: 'Typing', 95: 'UnevenBars', - 96: 'VolleyballSpiking', 97: 'WalkingWithDog', 98: 'WallPushups', 99: 'WritingOnBoard', 100: 'YoYo'} - -def calculate_video_results(output_buffer, video_id, test_results, class_names): - video_outputs = torch.stack(output_buffer) - average_scores = torch.mean(video_outputs, dim=0) - sorted_scores, locs = torch.topk(average_scores, k=10) - - video_results = [] - for i in range(sorted_scores.size(0)): - video_results.append({ - 'label': class_names[int(locs[i])], - 'score': float(sorted_scores[i]) - }) - - test_results['results'][video_id] = video_results - -def evaluate(result_path, class_names, info_path, annotation_path, acc_file): - print('postprocessing') - f = open(info_path, 'r') - ucf101_info = f.readlines() - bin_path = os.listdir(result_path)[0] - result_path = os.path.join(result_path, bin_path) - bin_list = os.listdir(result_path) - bin_list.sort(key= lambda x:int(x[:-13])) - output_buffer = [] - previous_video_id = '' - test_results = {'results': {}} - for i, line in enumerate(ucf101_info): - targets = line.split(' ') - targets = targets[0:len(targets)-1] - bin_path = os.path.join(result_path, bin_list[i]) - outputs = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 101) - outputs = torch.from_numpy(outputs) - outputs = F.softmax(outputs, dim=1).cpu() - for j in range(outputs.size(0)): - if not (i == 0 and j == 0) and targets[j] != previous_video_id: - calculate_video_results(output_buffer, previous_video_id, - test_results, class_names) - output_buffer = [] - output_buffer.append(outputs[j].data.cpu()) - previous_video_id = targets[j] - - if (i % 100) == 0: - with open('val.json', 'w') as f: - json.dump(test_results, f) - if (i % 1000) == 0: - print('[{}/{}]'.format(i+1, len(bin_list))) - with open('val.json', 'w') as f: - json.dump(test_results, f) - - ucf_acc_t1 = UCFclassification(annotation_path, 'val.json', subset='validation', top_k=1) - ucf_acc_t1.evaluate() - - ucf_acc_t5 = UCFclassification(annotation_path, 'val.json', subset='validation', top_k=5) - ucf_acc_t5.evaluate() - - with open(acc_file, 'w') as f: - json.dump('top1 acc:'+str(ucf_acc_t1.hit_at_k)+'; top5 acc:'+str(ucf_acc_t5.hit_at_k), f) - print('postprocess finised') - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='preprocess of 3D-ResNets') - parser.add_argument('--result_path', default='', type=Path, help='Directory path of videos') - parser.add_argument('--info_path', default='', type=Path, help='Directory path of binary output data') - parser.add_argument('--annotation_path', default='', type=Path, help='Annotation file path') - parser.add_argument('--acc_file', default='', type=Path, help='Directory path of binary output data') - opt = parser.parse_args() - evaluate(opt.result_path, CLASS_NAMES, opt.info_path, opt.annotation_path, opt.acc_file) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import argparse +from pathlib import Path +import torch +import torch.nn.functional as F +import numpy as np +sys.path.append(r"./Efficient-3DCNNs/utils") +from eval_ucf101 import UCFclassification + +CLASS_NAMES = {0: 'ApplyEyeMakeup', 1: 'ApplyLipstick', 2: 'Archery', 3: 'BabyCrawling', 4: 'BalanceBeam', 5: 'BandMarching', + 6: 'BaseballPitch', 7: 'Basketball', 8: 'BasketballDunk', 9: 'BenchPress', 10: 'Biking', + 11: 'Billiards', 12: 'BlowDryHair', 13: 'BlowingCandles', 14: 'BodyWeightSquats', 15: 'Bowling', + 16: 'BoxingPunchingBag', 17: 'BoxingSpeedBag', 18: 'BreastStroke', 19: 'BrushingTeeth', 20: 'CleanAndJerk', + 21: 'CliffDiving', 22: 'CricketBowling', 23: 'CricketShot', 24: 'CuttingInKitchen', 25: 'Diving', + 26: 'Drumming', 27: 'Fencing', 28: 'FieldHockeyPenalty', 29: 'FloorGymnastics', 30: 'FrisbeeCatch', + 31: 'FrontCrawl', 32: 'GolfSwing', 33: 'Haircut', 34: 'Hammering', 35: 'HammerThrow', + 36: 'HandstandPushups', 37: 'HandstandWalking', 38: 'HeadMassage', 39: 'HighJump', 40: 'HorseRace', + 41: 'HorseRiding', 42: 'HulaHoop', 43: 'IceDancing', 44: 'JavelinThrow', 45: 'JugglingBalls', + 46: 'JumpingJack', 47: 'JumpRope', 48: 'Kayaking', 49: 'Knitting', 50: 'LongJump', + 51: 'Lunges', 52: 'MilitaryParade', 53: 'Mixing', 54: 'MoppingFloor', 55: 'Nunchucks', + 56: 'ParallelBars', 57: 'PizzaTossing', 58: 'PlayingCello', 59: 'PlayingDaf', 60: 'PlayingDhol', + 61: 'PlayingFlute', 62: 'PlayingGuitar', 63: 'PlayingPiano', 64: 'PlayingSitar', 65: 'PlayingTabla', + 66: 'PlayingViolin', 67: 'PoleVault', 68: 'PommelHorse', 69: 'PullUps', 70: 'Punch', + 71: 'PushUps', 72: 'Rafting', 73: 'RockClimbingIndoor', 74: 'RopeClimbing', 75: 'Rowing', + 76: 'SalsaSpin', 77: 'ShavingBeard', 78: 'Shotput', 79: 'SkateBoarding', 80: 'Skiing', + 81: 'Skijet', 82: 'SkyDiving', 83: 'SoccerJuggling', 84: 'SoccerPenalty', 85: 'StillRings', + 86: 'SumoWrestling', 87: 'Surfing', 88: 'Swing', 89: 'TableTennisShot', 90: 'TaiChi', + 91: 'TennisSwing', 92: 'ThrowDiscus', 93: 'TrampolineJumping', 94: 'Typing', 95: 'UnevenBars', + 96: 'VolleyballSpiking', 97: 'WalkingWithDog', 98: 'WallPushups', 99: 'WritingOnBoard', 100: 'YoYo'} + +def calculate_video_results(output_buffer, video_id, test_results, class_names): + video_outputs = torch.stack(output_buffer) + average_scores = torch.mean(video_outputs, dim=0) + sorted_scores, locs = torch.topk(average_scores, k=10) + + video_results = [] + for i in range(sorted_scores.size(0)): + video_results.append({ + 'label': class_names[int(locs[i])], + 'score': float(sorted_scores[i]) + }) + + test_results['results'][video_id] = video_results + +def evaluate(result_path, class_names, info_path, annotation_path, acc_file): + print('postprocessing') + f = open(info_path, 'r') + ucf101_info = f.readlines() + bin_path = os.listdir(result_path)[0] + result_path = os.path.join(result_path, bin_path) + bin_list = os.listdir(result_path) + bin_list.sort(key= lambda x:int(x[:-13])) + output_buffer = [] + previous_video_id = '' + test_results = {'results': {}} + for i, line in enumerate(ucf101_info): + targets = line.split(' ') + targets = targets[0:len(targets)-1] + bin_path = os.path.join(result_path, bin_list[i]) + outputs = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 101) + outputs = torch.from_numpy(outputs) + outputs = F.softmax(outputs, dim=1).cpu() + for j in range(outputs.size(0)): + if not (i == 0 and j == 0) and targets[j] != previous_video_id: + calculate_video_results(output_buffer, previous_video_id, + test_results, class_names) + output_buffer = [] + output_buffer.append(outputs[j].data.cpu()) + previous_video_id = targets[j] + + if (i % 100) == 0: + with open('val.json', 'w') as f: + json.dump(test_results, f) + if (i % 1000) == 0: + print('[{}/{}]'.format(i+1, len(bin_list))) + with open('val.json', 'w') as f: + json.dump(test_results, f) + + ucf_acc_t1 = UCFclassification(annotation_path, 'val.json', subset='validation', top_k=1) + ucf_acc_t1.evaluate() + + ucf_acc_t5 = UCFclassification(annotation_path, 'val.json', subset='validation', top_k=5) + ucf_acc_t5.evaluate() + + with open(acc_file, 'w') as f: + json.dump('top1 acc:'+str(ucf_acc_t1.hit_at_k)+'; top5 acc:'+str(ucf_acc_t5.hit_at_k), f) + print('postprocess finised') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='preprocess of 3D-ResNets') + parser.add_argument('--result_path', default='', type=Path, help='Directory path of videos') + parser.add_argument('--info_path', default='', type=Path, help='Directory path of binary output data') + parser.add_argument('--annotation_path', default='', type=Path, help='Annotation file path') + parser.add_argument('--acc_file', default='', type=Path, help='Directory path of binary output data') + opt = parser.parse_args() + evaluate(opt.result_path, CLASS_NAMES, opt.info_path, opt.annotation_path, opt.acc_file) diff --git a/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/Efficient-3DCNNs_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/Efficient-3DCNNs_preprocess.py index ff18e7092f24607ede4772bf5df5f715e6e59be5..4b9ee37b7f31bca4b048ef94d07f647963b2cdb0 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/Efficient-3DCNNs_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/Efficient-3DCNNs_preprocess.py @@ -1,92 +1,92 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import argparse -from pathlib import Path -import torch -import numpy as np -sys.path.append(r"./Efficient-3DCNNs") -from dataset import get_test_set -from spatial_transforms import Normalize, Compose, Scale, CornerCrop -from temporal_transforms import TemporalRandomCrop -from target_transforms import VideoID - -class ToTensor(object): - def __init__(self, norm_value=255): - self.norm_value = norm_value - - def __call__(self, pic): - img = np.array(pic, dtype=np.float32) - img = img.transpose(2, 0, 1) - img = img / self.norm_value - return torch.from_numpy(img) - - def randomize_parameters(self): - pass - -def preprocess(save_path): - print('preprocessing') - norm_method = Normalize([114.7748, 107.7354, 99.4750], [1, 1, 1]) - - spatial_transform = Compose([ - Scale(int(112 / 1.0)), - CornerCrop(112, 'c'), - ToTensor(1), norm_method - ]) - temporal_transform = TemporalRandomCrop(16, 1) - target_transform = VideoID() - - test_data = get_test_set(opt, spatial_transform, temporal_transform, - target_transform) - - test_loader = torch.utils.data.DataLoader(test_data, - batch_size=opt.inference_batch_size, - shuffle=False, - num_workers=0, - pin_memory=True) - if not os.path.exists(save_path): - os.makedirs(save_path) - file = open(opt.info_path, 'w') - cid = 0 - for i, (inputs, targets) in enumerate(test_loader): - if(len(targets) == opt.inference_batch_size): - info = '' - for j in range(len(targets)): - info = info + targets[j] + ' ' - batch_bin = inputs.cpu().numpy() - path_bin = str(save_path) + '/' + str(cid) + '.bin' - cid = cid + 1 - batch_bin.tofile(path_bin) - file.write(info) - file.write('\n') - if (i % 1000) == 0: - print('[{}/{}]'.format(i+1, len(test_loader))) - print('preprocess finished') - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='preprocess of 3D-ResNets') - parser.add_argument('--video_path', default='C:/Users/17270/Efficient-3DCNNs-master-2/annotation_UCF101/UCF-101-image/UCF-101-image', type=Path, help='Directory path of videos') - parser.add_argument('--annotation_path', default='ucf101_01.json', type=Path, help='Annotation file path') - parser.add_argument('--dataset', default='ucf101', type=str, help='Used dataset (activitynet | kinetics | ucf101 | hmdb51)') - parser.add_argument('--test_subset', default='val', type=str, help='Used subset in inference (train | val | test)') - parser.add_argument('--output_path', default='zjbintt', type=Path, help='Directory path of binary output data') - parser.add_argument('--info_path', default='zjbin1.info', type=Path, help='Directory path of binary output data') - parser.add_argument('--inference_batch_size', default=16, type=int, help='Batch Size for inference. 0 means this is the same as batch_size.') - parser.add_argument('--sample_duration', default=16, type=int, help='Temporal duration of inputs') - opt = parser.parse_args() - - preprocess(opt.output_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import argparse +from pathlib import Path +import torch +import numpy as np +sys.path.append(r"./Efficient-3DCNNs") +from dataset import get_test_set +from spatial_transforms import Normalize, Compose, Scale, CornerCrop +from temporal_transforms import TemporalRandomCrop +from target_transforms import VideoID + +class ToTensor(object): + def __init__(self, norm_value=255): + self.norm_value = norm_value + + def __call__(self, pic): + img = np.array(pic, dtype=np.float32) + img = img.transpose(2, 0, 1) + img = img / self.norm_value + return torch.from_numpy(img) + + def randomize_parameters(self): + pass + +def preprocess(save_path): + print('preprocessing') + norm_method = Normalize([114.7748, 107.7354, 99.4750], [1, 1, 1]) + + spatial_transform = Compose([ + Scale(int(112 / 1.0)), + CornerCrop(112, 'c'), + ToTensor(1), norm_method + ]) + temporal_transform = TemporalRandomCrop(16, 1) + target_transform = VideoID() + + test_data = get_test_set(opt, spatial_transform, temporal_transform, + target_transform) + + test_loader = torch.utils.data.DataLoader(test_data, + batch_size=opt.inference_batch_size, + shuffle=False, + num_workers=0, + pin_memory=True) + if not os.path.exists(save_path): + os.makedirs(save_path) + file = open(opt.info_path, 'w') + cid = 0 + for i, (inputs, targets) in enumerate(test_loader): + if(len(targets) == opt.inference_batch_size): + info = '' + for j in range(len(targets)): + info = info + targets[j] + ' ' + batch_bin = inputs.cpu().numpy() + path_bin = str(save_path) + '/' + str(cid) + '.bin' + cid = cid + 1 + batch_bin.tofile(path_bin) + file.write(info) + file.write('\n') + if (i % 1000) == 0: + print('[{}/{}]'.format(i+1, len(test_loader))) + print('preprocess finished') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='preprocess of 3D-ResNets') + parser.add_argument('--video_path', default='C:/Users/17270/Efficient-3DCNNs-master-2/annotation_UCF101/UCF-101-image/UCF-101-image', type=Path, help='Directory path of videos') + parser.add_argument('--annotation_path', default='ucf101_01.json', type=Path, help='Annotation file path') + parser.add_argument('--dataset', default='ucf101', type=str, help='Used dataset (activitynet | kinetics | ucf101 | hmdb51)') + parser.add_argument('--test_subset', default='val', type=str, help='Used subset in inference (train | val | test)') + parser.add_argument('--output_path', default='zjbintt', type=Path, help='Directory path of binary output data') + parser.add_argument('--info_path', default='zjbin1.info', type=Path, help='Directory path of binary output data') + parser.add_argument('--inference_batch_size', default=16, type=int, help='Batch Size for inference. 0 means this is the same as batch_size.') + parser.add_argument('--sample_duration', default=16, type=int, help='Temporal duration of inputs') + opt = parser.parse_args() + + preprocess(opt.output_path) diff --git a/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/LICENSE b/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/LICENSE index 797bf40e85c5d2986ebcec9cb51aed979ca88b82..04adf5cbc620ad190547b092fa449e36df5f7bf4 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/LICENSE @@ -1,203 +1,203 @@ -Copyright 2018-2019 Open-MMLab. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018-2019 Open-MMLab. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Copyright 2018-2019 Open-MMLab. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2019 Open-MMLab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/README.md b/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/README.md index ae5d6a93cefccd3583bd3b82147b0f4e8e4d3091..532928808675d492552eb8fcc1ed8da10993d163 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/README.md @@ -1,49 +1,49 @@ -# Efficient-3DCNNs模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -``` -pip3.7 install -r requirements.txt -``` - -2.安装开源模型代码 -``` -git clone https://github.com/okankop/Efficient-3DCNNs -``` -> branch: master - -> commit id: d60c6c48cf2e81380d0a513e22e9d7f8467731af - -3.获取权重文件 - -[ucf101_mobilenetv2_1.0x_RGB_16_best.pth](https://drive.google.com/drive/folders/1u4DO7kjAQP6Zdh8CN65iT5ozp11mvE-H?usp=sharing) - -4.[获取UCF-101数据集](https://www.crcv.ucf.edu/data/UCF101/UCF101.rar) -将UCF101.rar文件解压,重命名为ucf101,放在 /root/datasets/文件夹下 - -``` -python3.7 Efficient-3DCNNs/utils/video_jpg_ucf101_hmdb51.py /root/datasets/ucf101/videos/ /root/datasets/ucf101/rawframes -python3.7 Efficient-3DCNNs/utils/n_frames_ucf101_hmdb51.py /root/datasets/ucf101/rawframes -``` -[获取json形式的annotation文件](https://github.com/okankop/Efficient-3DCNNs/tree/master/annotation_UCF101) -将ucf101_01.json放到当前目录 - -5.获取[msame](https://gitee.com/ascend/tools/tree/master/msame)和[benchmark](https://gitee.com/ascend/cann-benchmark/tree/master/infer)工具 -将msame和benchmark.x86_64(或benchmark.aarch64)放到当前目录 - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets/ -``` - **评测结果:** -| 模型 | pth精度 | 310精度 | 性能基准 | 310性能 | -| :------: | :------: | :------: | :------: | :------: | -| Efficient-3DCNNs bs1 | top1:81.100% top5:96.326% | top1:81.126% top5:96.299% | 619.767fps | 641.728fps | -| Efficient-3DCNNs bs16 | top1:81.100% top5:96.326% | top1:81.126% top5:96.299% | 393.696fps | 744.432fps | - - - +# Efficient-3DCNNs模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +``` +pip3.7 install -r requirements.txt +``` + +2.安装开源模型代码 +``` +git clone https://github.com/okankop/Efficient-3DCNNs +``` +> branch: master + +> commit id: d60c6c48cf2e81380d0a513e22e9d7f8467731af + +3.获取权重文件 + +[ucf101_mobilenetv2_1.0x_RGB_16_best.pth](https://drive.google.com/drive/folders/1u4DO7kjAQP6Zdh8CN65iT5ozp11mvE-H?usp=sharing) + +4.[获取UCF-101数据集](https://www.crcv.ucf.edu/data/UCF101/UCF101.rar) +将UCF101.rar文件解压,重命名为ucf101,放在 /root/datasets/文件夹下 + +``` +python3.7 Efficient-3DCNNs/utils/video_jpg_ucf101_hmdb51.py /root/datasets/ucf101/videos/ /root/datasets/ucf101/rawframes +python3.7 Efficient-3DCNNs/utils/n_frames_ucf101_hmdb51.py /root/datasets/ucf101/rawframes +``` +[获取json形式的annotation文件](https://github.com/okankop/Efficient-3DCNNs/tree/master/annotation_UCF101) +将ucf101_01.json放到当前目录 + +5.获取[msame](https://gitee.com/ascend/tools/tree/master/msame)和[benchmark](https://gitee.com/ascend/cann-benchmark/tree/master/infer)工具 +将msame和benchmark.x86_64(或benchmark.aarch64)放到当前目录 + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets/ +``` + **评测结果:** +| 模型 | pth精度 | 310精度 | 性能基准 | 310性能 | +| :------: | :------: | :------: | :------: | :------: | +| Efficient-3DCNNs bs1 | top1:81.100% top5:96.326% | top1:81.126% top5:96.299% | 619.767fps | 641.728fps | +| Efficient-3DCNNs bs16 | top1:81.100% top5:96.326% | top1:81.126% top5:96.299% | 393.696fps | 744.432fps | + + + diff --git a/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/modelzoo_level.txt index 731399223951dc50392319c2f4e8e5245174c192..108cc882d65c41bd354b9c6373bcf882d112c26b 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:PERFECT \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/requirements.txt index ef82dd062536f09a47198a22e6147bd46ff69a1a..5768d0bd49edff9b27bc10315bd9b80d5b59b9ec 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs/requirements.txt @@ -1,7 +1,7 @@ -torch==1.5.0 -torchvision==0.6.0 -onnx == 1.7.0 -onnx-simplifier==0.3.6 -opencv-python==4.5.3.56 -scipy +torch==1.5.0 +torchvision==0.6.0 +onnx == 1.7.0 +onnx-simplifier==0.3.6 +opencv-python==4.5.3.56 +scipy pandas \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/Efficient-B1_postprocess.py b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/Efficient-B1_postprocess.py index d51eb6e5f91ae430de0518a476cb4d6f5162cef1..7f8389d56eb971ec0f5eab66f088b72cbf570a29 100644 --- a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/Efficient-B1_postprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/Efficient-B1_postprocess.py @@ -1,72 +1,72 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -import argparse -import re -import numpy -import json - -def rename(data_dir, pre_dir): - txtfile_2_class = dict() - for classname in os.listdir(data_dir): - for imgname in os.listdir(os.path.join(data_dir, classname)): - txtfile_2_class[os.path.splitext(imgname)[0].split("_")[2]] = classname - omoutput_txts = os.listdir(pre_dir) - for omtxt in omoutput_txts: - if omtxt.split("_")[0] not in txtfile_2_class.values(): - os.rename(os.path.join(pre_dir, omtxt), os.path.join(pre_dir, txtfile_2_class.get(omtxt.split("_")[2]) + "_" + omtxt)) - -def classification(data_path): - files = os.listdir(data_path) - class_ids = sorted(f for f in files if re.match(r"^n[0-9]+$", f)) - return class_ids - - -def eval(data_dir, pred_dir, save_file): - txtfiles = os.listdir(pred_dir) - top1_acc = 0 - top5_acc = 0 - for txtfile in txtfiles: - print("loading {}".format(txtfile)) - pre_num = numpy.loadtxt(os.path.join(pred_dir, txtfile)) - class_ids = classification(data_dir) - class_pres = zip(class_ids, pre_num) - class_pres_dict = dict((class_id, value) for class_id, value in class_pres) - class_sort = max(class_pres_dict.items(), key=lambda x: x[1]) - if txtfile.split('_')[0] == class_sort[0]: - top1_acc = top1_acc + 1 - iteams = sorted(class_pres_dict.items(), key=lambda x: x[1]) - if txtfile.split('_')[0] in [iteams[999][0], iteams[998][0], iteams[997][0], iteams[996][0], iteams[995][0]]: - top5_acc = top5_acc + 1 - - topn_acc = dict() - topn_acc['Top1 accuracy'] = str(top1_acc / 50000 * 100) + "%" - topn_acc['Top5 accuracy'] = str(top5_acc / 50000 * 100) + "%" - print(topn_acc['Top1 accuracy']) - print(topn_acc['Top5 accuracy']) - writer = open(save_file, 'w') - json.dump(topn_acc, writer) - writer.close() - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--data_dir", default="./imagenet/val") - parser.add_argument("--pre_dir", default="./result/dumpOutput_device0/") - parser.add_argument("--save_file", default="./result.json") - args = parser.parse_args() - rename(args.data_dir, args.pre_dir) - eval(args.data_dir, args.pre_dir, args.save_file) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import argparse +import re +import numpy +import json + +def rename(data_dir, pre_dir): + txtfile_2_class = dict() + for classname in os.listdir(data_dir): + for imgname in os.listdir(os.path.join(data_dir, classname)): + txtfile_2_class[os.path.splitext(imgname)[0].split("_")[2]] = classname + omoutput_txts = os.listdir(pre_dir) + for omtxt in omoutput_txts: + if omtxt.split("_")[0] not in txtfile_2_class.values(): + os.rename(os.path.join(pre_dir, omtxt), os.path.join(pre_dir, txtfile_2_class.get(omtxt.split("_")[2]) + "_" + omtxt)) + +def classification(data_path): + files = os.listdir(data_path) + class_ids = sorted(f for f in files if re.match(r"^n[0-9]+$", f)) + return class_ids + + +def eval(data_dir, pred_dir, save_file): + txtfiles = os.listdir(pred_dir) + top1_acc = 0 + top5_acc = 0 + for txtfile in txtfiles: + print("loading {}".format(txtfile)) + pre_num = numpy.loadtxt(os.path.join(pred_dir, txtfile)) + class_ids = classification(data_dir) + class_pres = zip(class_ids, pre_num) + class_pres_dict = dict((class_id, value) for class_id, value in class_pres) + class_sort = max(class_pres_dict.items(), key=lambda x: x[1]) + if txtfile.split('_')[0] == class_sort[0]: + top1_acc = top1_acc + 1 + iteams = sorted(class_pres_dict.items(), key=lambda x: x[1]) + if txtfile.split('_')[0] in [iteams[999][0], iteams[998][0], iteams[997][0], iteams[996][0], iteams[995][0]]: + top5_acc = top5_acc + 1 + + topn_acc = dict() + topn_acc['Top1 accuracy'] = str(top1_acc / 50000 * 100) + "%" + topn_acc['Top5 accuracy'] = str(top5_acc / 50000 * 100) + "%" + print(topn_acc['Top1 accuracy']) + print(topn_acc['Top5 accuracy']) + writer = open(save_file, 'w') + json.dump(topn_acc, writer) + writer.close() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--data_dir", default="./imagenet/val") + parser.add_argument("--pre_dir", default="./result/dumpOutput_device0/") + parser.add_argument("--save_file", default="./result.json") + args = parser.parse_args() + rename(args.data_dir, args.pre_dir) + eval(args.data_dir, args.pre_dir, args.save_file) diff --git a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/Efficient-B1_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/Efficient-B1_preprocess.py index 09fb97aadc02bfeb9f90a0e5e3731778e4a3cb9f..a9b71deb632572f83bfaa8b1666d98a81937ada0 100644 --- a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/Efficient-B1_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/Efficient-B1_preprocess.py @@ -1,63 +1,63 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -sys.path.append("./pycls") -import numpy as np -import cv2 - -from pycls.datasets import transforms - -_EIG_VALS = [[0.2175, 0.0188, 0.0045]] -_EIG_VECS = [ - [-0.5675, 0.7192, 0.4009], - [-0.5808, -0.0045, -0.8140], - [-0.5836, -0.6948, 0.4203], -] -_MEAN = [0.485, 0.456, 0.406] -_STD = [0.229, 0.224, 0.225] - -train_size = 240 -test_size = 274 - -def trans(im): - im = im[:, :, ::-1].astype(np.float32) / 255 - im = transforms.scale_and_center_crop(im, test_size, train_size) - im = transforms.color_norm(im, _MEAN, _STD) - im = np.ascontiguousarray(im[:, :, ::-1].transpose([2, 0, 1])) - return im - -def EffnetB1_preprocess(src_path, save_path): - i = 0 - classes = os.listdir(src_path) - for classname in classes: - dirs = os.path.join(src_path, classname) - save_dir = os.path.join(save_path, classname) - if not os.path.isdir(save_dir): - os.makedirs(os.path.realpath(save_dir)) - for img in os.listdir(dirs): - i = i + 1 - print(img, "===", i) - img_path = os.path.join(dirs, img) - im = cv2.imread(img_path) - im = trans(im) - im.tofile(os.path.join(save_dir, img.split('.')[0] + ".bin")) - -if __name__ == '__main__': - src_path = sys.argv[1] - save_path = sys.argv[2] - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +sys.path.append("./pycls") +import numpy as np +import cv2 + +from pycls.datasets import transforms + +_EIG_VALS = [[0.2175, 0.0188, 0.0045]] +_EIG_VECS = [ + [-0.5675, 0.7192, 0.4009], + [-0.5808, -0.0045, -0.8140], + [-0.5836, -0.6948, 0.4203], +] +_MEAN = [0.485, 0.456, 0.406] +_STD = [0.229, 0.224, 0.225] + +train_size = 240 +test_size = 274 + +def trans(im): + im = im[:, :, ::-1].astype(np.float32) / 255 + im = transforms.scale_and_center_crop(im, test_size, train_size) + im = transforms.color_norm(im, _MEAN, _STD) + im = np.ascontiguousarray(im[:, :, ::-1].transpose([2, 0, 1])) + return im + +def EffnetB1_preprocess(src_path, save_path): + i = 0 + classes = os.listdir(src_path) + for classname in classes: + dirs = os.path.join(src_path, classname) + save_dir = os.path.join(save_path, classname) + if not os.path.isdir(save_dir): + os.makedirs(os.path.realpath(save_dir)) + for img in os.listdir(dirs): + i = i + 1 + print(img, "===", i) + img_path = os.path.join(dirs, img) + im = cv2.imread(img_path) + im = trans(im) + im.tofile(os.path.join(save_dir, img.split('.')[0] + ".bin")) + +if __name__ == '__main__': + src_path = sys.argv[1] + save_path = sys.argv[2] + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) EffnetB1_preprocess(src_path, save_path) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/Efficient-B1_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/Efficient-B1_pth2onnx.py index f79c4391c06a5492c4c082d0074fd0595ae3363a..53a95a7aae76624996b6561f6e9aa9fa929dc4f5 100644 --- a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/Efficient-B1_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/Efficient-B1_pth2onnx.py @@ -1,51 +1,51 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import argparse -import sys -import torch -from collections import OrderedDict -sys.path.append('./pycls') -from pycls.models.effnet import EffNet -import pycls.core.config as config -from pycls.core.config import cfg - -def proc_node_module(checkpoint, attr_name): - new_model_state = OrderedDict() - for k, v in checkpoint[attr_name].items(): - if(k[0: 7] == "module."): - name = k[7:] - else: - name = k[0:] - new_model_state[name] = v - return new_model_state - -def main(input_file, yaml_file, output_file): - config.load_cfg(yaml_file) - cfg.freeze() - model = EffNet() - checkpoint = torch.load(input_file, map_location='cpu') - checkpoint['model_state'] = proc_node_module(checkpoint, 'model_state') - model.load_state_dict(checkpoint["model_state"]) - model.eval() - input_names = ["image"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(1, 3, 240, 240) - torch.onnx.export(model, dummy_input, output_file, input_names=input_names, dynamic_axes=dynamic_axes, opset_version=11, verbose=True) - - -if __name__ == '__main__': - input_file = sys.argv[1] - yaml_file= sys.argv[2] - output_file = sys.argv[3] - main(input_file, yaml_file, output_file) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import sys +import torch +from collections import OrderedDict +sys.path.append('./pycls') +from pycls.models.effnet import EffNet +import pycls.core.config as config +from pycls.core.config import cfg + +def proc_node_module(checkpoint, attr_name): + new_model_state = OrderedDict() + for k, v in checkpoint[attr_name].items(): + if(k[0: 7] == "module."): + name = k[7:] + else: + name = k[0:] + new_model_state[name] = v + return new_model_state + +def main(input_file, yaml_file, output_file): + config.load_cfg(yaml_file) + cfg.freeze() + model = EffNet() + checkpoint = torch.load(input_file, map_location='cpu') + checkpoint['model_state'] = proc_node_module(checkpoint, 'model_state') + model.load_state_dict(checkpoint["model_state"]) + model.eval() + input_names = ["image"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(1, 3, 240, 240) + torch.onnx.export(model, dummy_input, output_file, input_names=input_names, dynamic_axes=dynamic_axes, opset_version=11, verbose=True) + + +if __name__ == '__main__': + input_file = sys.argv[1] + yaml_file= sys.argv[2] + output_file = sys.argv[3] + main(input_file, yaml_file, output_file) diff --git a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/ImageNet_val_split.py b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/ImageNet_val_split.py index 3d5780c7cbb83466a7cf74b82d3908ca3e038a4a..244e28b791b82b82fba7e649fc0d69b3133530c5 100644 --- a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/ImageNet_val_split.py +++ b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/ImageNet_val_split.py @@ -1,62 +1,62 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import scipy -import sys -import shutil -from scipy import io - -def move_valimg(val_dir='./val', devkit_dir='./ILSVRC2012_devkit_t12'): - """ - move valimg to correspongding folders. - val_id(start from 1) -> ILSVRC_ID(start from 1) -> WIND - organize like: - /val - /n01440764 - images - /n01443537 - images - ..... - """ - # load synset, val ground truth and val images list - synset = io.loadmat(os.path.join(devkit_dir, 'data', 'meta.mat')) - - ground_truth = open(os.path.join(devkit_dir, 'data', 'ILSVRC2012_validation_ground_truth.txt')) - lines = ground_truth.readlines() - labels = [int(line[:-1]) for line in lines] - - root, _, filenames = next(os.walk(val_dir)) - for filename in filenames: - # val image name -> ILSVRC ID -> WIND - val_id = int(filename.split('.')[0].split('_')[-1]) - ILSVRC_ID = labels[val_id-1] - WIND = synset['synsets'][ILSVRC_ID-1][0][1][0] - print("val_id:%d, ILSVRC_ID:%d, WIND:%s" % (val_id, ILSVRC_ID, WIND)) - - # move val images - output_dir = os.path.join(root, WIND) - if os.path.isdir(output_dir): - pass - else: - os.mkdir(output_dir) - shutil.move(os.path.join(root, filename), os.path.join(output_dir, filename)) - -def main(val_path, devkit_path): - move_valimg(val_path, devkit_path) - -if __name__ == '__main__': - val_path = sys.argv[1] - devkit_path = sys.argv[2] - main(val_path, devkit_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import scipy +import sys +import shutil +from scipy import io + +def move_valimg(val_dir='./val', devkit_dir='./ILSVRC2012_devkit_t12'): + """ + move valimg to correspongding folders. + val_id(start from 1) -> ILSVRC_ID(start from 1) -> WIND + organize like: + /val + /n01440764 + images + /n01443537 + images + ..... + """ + # load synset, val ground truth and val images list + synset = io.loadmat(os.path.join(devkit_dir, 'data', 'meta.mat')) + + ground_truth = open(os.path.join(devkit_dir, 'data', 'ILSVRC2012_validation_ground_truth.txt')) + lines = ground_truth.readlines() + labels = [int(line[:-1]) for line in lines] + + root, _, filenames = next(os.walk(val_dir)) + for filename in filenames: + # val image name -> ILSVRC ID -> WIND + val_id = int(filename.split('.')[0].split('_')[-1]) + ILSVRC_ID = labels[val_id-1] + WIND = synset['synsets'][ILSVRC_ID-1][0][1][0] + print("val_id:%d, ILSVRC_ID:%d, WIND:%s" % (val_id, ILSVRC_ID, WIND)) + + # move val images + output_dir = os.path.join(root, WIND) + if os.path.isdir(output_dir): + pass + else: + os.mkdir(output_dir) + shutil.move(os.path.join(root, filename), os.path.join(output_dir, filename)) + +def main(val_path, devkit_path): + move_valimg(val_path, devkit_path) + +if __name__ == '__main__': + val_path = sys.argv[1] + devkit_path = sys.argv[2] + main(val_path, devkit_path) diff --git a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/LICENSE b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/LICENSE index 29f81d812f3e768fa89638d1f72920dbfd1413a8..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 100644 --- a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/README.md b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/README.md index edbf5b3267d4a11fad19674662708cf959cb5476..aa1efa1719fb0978a9a182472f402fe18d7973d9 100644 --- a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/README.md @@ -1,48 +1,48 @@ -# EfficientNet-B1模型PyTorch离线推理指导 -# 环境准备: - -1.数据集 - -测试数据集为ImageNet的官方 2012的val数据集,5w张图片放置在一个文件夹下,并由官方对应的 ILSVRC2012_devkit_t12 文件夹。 - -第一个参数为 新下载且未分类的 imagenet的val 数据集路径, - -第二个参数为官方 提供的 devkit 文件夹,如果要保留val文件夹请先备份 - -``` -python3.7 ImageNet_val_split.py ./val ./ILSVRC2012_devkit_t12 -``` - -2.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -``` -pip3.7 install -r requirements.txt -``` -3.获取模型代码 -``` -git clone https://github.com/facebookresearch/pycls -cd pycls -git reset f20820e01eef7b9a47b77f13464e3e77c44d5e1f --hard -cd .. -``` - -4.获取权重文件 -``` -wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/EfficientNet-B1/EN-B1_dds_8gpu.pyth -``` -5.获取benchmark工具 -将benchmark.x86_64 benchmark.aarch64放在当前目录 - -# 2 离线推理 - -310上执行,执行时确保device空闲 -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets -``` -评测结果: - -| 模型 | 开源仓pth精度 | 310精度 | 性能基准 | 310性能 | -| :---------------: | :--------: | :--------------------: | :---------: | :---------: | -| Efficient-B1 bs1 | top1:75.9% | top1:75.5% top5:92.78% | 694.137fps | 940.524fps | -| Efficient-B1 bs16 | top1:75.9% | top1:75.5% top5:92.78% | 1408.138fps | 1490.54fps | - +# EfficientNet-B1模型PyTorch离线推理指导 +# 环境准备: + +1.数据集 + +测试数据集为ImageNet的官方 2012的val数据集,5w张图片放置在一个文件夹下,并由官方对应的 ILSVRC2012_devkit_t12 文件夹。 + +第一个参数为 新下载且未分类的 imagenet的val 数据集路径, + +第二个参数为官方 提供的 devkit 文件夹,如果要保留val文件夹请先备份 + +``` +python3.7 ImageNet_val_split.py ./val ./ILSVRC2012_devkit_t12 +``` + +2.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +``` +pip3.7 install -r requirements.txt +``` +3.获取模型代码 +``` +git clone https://github.com/facebookresearch/pycls +cd pycls +git reset f20820e01eef7b9a47b77f13464e3e77c44d5e1f --hard +cd .. +``` + +4.获取权重文件 +``` +wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/EfficientNet-B1/EN-B1_dds_8gpu.pyth +``` +5.获取benchmark工具 +将benchmark.x86_64 benchmark.aarch64放在当前目录 + +# 2 离线推理 + +310上执行,执行时确保device空闲 +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets +``` +评测结果: + +| 模型 | 开源仓pth精度 | 310精度 | 性能基准 | 310性能 | +| :---------------: | :--------: | :--------------------: | :---------: | :---------: | +| Efficient-B1 bs1 | top1:75.9% | top1:75.5% top5:92.78% | 694.137fps | 940.524fps | +| Efficient-B1 bs16 | top1:75.9% | top1:75.5% top5:92.78% | 1408.138fps | 1490.54fps | + diff --git a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/gen_dataset_info.py index 9ecd822b4e47491a1e81663266eed5dfbf1d8b88..a07929cc79bb21b6799890a82e394e396a70519a 100644 --- a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/gen_dataset_info.py @@ -1,43 +1,43 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - index = 0 - with open(info_name, 'w') as file: - for classes in os.listdir(file_path): - bin_dir_path = os.path.join(file_path, classes) - bin_images = glob(os.path.join(bin_dir_path, '*.bin')) - for img in bin_images: - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - index = index + 1 - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - assert file_type == 'bin', 'The file_type must is bin' - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + index = 0 + with open(info_name, 'w') as file: + for classes in os.listdir(file_path): + bin_dir_path = os.path.join(file_path, classes) + bin_images = glob(os.path.join(bin_dir_path, '*.bin')) + for img in bin_images: + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + index = index + 1 + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + assert file_type == 'bin', 'The file_type must is bin' + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' get_bin_info(file_path, info_name, width, height) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/modelzoo_level.txt index fc92458a08f881b475b9a56c45153a0e3fdfe450..76842f45b5f43d10e5fcda3085032ef975a1aea0 100644 --- a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PerfStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PerfStatus:OK +PrecisionStatus:OK AutoTune:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/requirements.txt index 4e04e4fb8a87ca9732e3aea239b3d5bac59a18b9..2b7911c419eba854b915880f82040c6a9774a2d6 100644 --- a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/requirements.txt @@ -1,9 +1,9 @@ -pytorch==1.5.0 -torchvision==0.6.0 -onnx==1.7.0 -numpy==1.18.5 -Pillow==7.2.0 -opencv-python3==1.0 -yacs -iopath +pytorch==1.5.0 +torchvision==0.6.0 +onnx==1.7.0 +numpy==1.18.5 +Pillow==7.2.0 +opencv-python3==1.0 +yacs +iopath submitit \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/test/parse.py b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/test/parse.py index 335e170932c5210fad286b8e49ad092d1ea2b272..5461d015b668fdba0ad902dd7f355676ff71ed57 100644 --- a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/test/parse.py +++ b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1/test/parse.py @@ -1,41 +1,41 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): # Accuracy - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - print(content) - elif sys.argv[1].endswith('.txt'): # Perform - result_txt = sys.argv[1] - if 'PureInfer' in result_txt: # Pure Infer - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r'=(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[0].replace('samples/s', '')) * 4 - print('310 {} fps:{}'.format(result_txt.split('_')[3], fps)) - else: # Infer based on dataset - with open(result_txt, 'r') as f: - lines = f.readlines() - for line in lines: - if 'infer' in line: - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', line.replace('\n', ',') + ',')] - fps = float(txt_data_list[1]) * 4 - print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) - break +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): # Accuracy + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + print(content) + elif sys.argv[1].endswith('.txt'): # Perform + result_txt = sys.argv[1] + if 'PureInfer' in result_txt: # Pure Infer + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r'=(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[0].replace('samples/s', '')) * 4 + print('310 {} fps:{}'.format(result_txt.split('_')[3], fps)) + else: # Infer based on dataset + with open(result_txt, 'r') as f: + lines = f.readlines() + for line in lines: + if 'infer' in line: + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', line.replace('\n', ',') + ',')] + fps = float(txt_data_list[1]) * 4 + print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) + break diff --git a/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B3/README.md b/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B3/README.md old mode 100755 new mode 100644 diff --git a/ACL_PyTorch/contrib/cv/classfication/FixRes/README.md b/ACL_PyTorch/contrib/cv/classfication/FixRes/README.md index f6ac6db0fbb09da1c34532ed60eac34d605e3e31..e1f6a17863f3158dae006cf94cbea5a59ab0bf21 100644 --- a/ACL_PyTorch/contrib/cv/classfication/FixRes/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/FixRes/README.md @@ -1,43 +1,43 @@ -# FixRes模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -``` -pip3.7 install -r requirements.txt -``` - - -2.获取,修改与安装开源模型代码 -``` -git clone https://github.com/facebookresearch/FixRes.git -b main -cd FixRes -git reset c9be6acc7a6b32f896e62c28a97c20c2348327d3 --hard -cd .. -``` - -3.获取权重文件 - -FixResNet50.pth - -4.数据集 -获取ImageNet 2012 - -5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) -将benchmark.x86_64或benchmark.aarch64放到当前目录 - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲。测试时需下载imagenet_labels_fixres.json文件,并放在imagenet文件夹下。 -``` -# 生成om模型 -bash test/pth2om.sh - -# om模型离线推理并测试 -bash test/eval_acc_perf.sh --datasets_path=/root/datasets -``` - **评测结果:** -| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| :------: | :------: | :------: | :------: | :------: | -| FixRes bs1 | [rank1:79.0%](https://github.com/facebookresearch/FixRes) | rank1:79.0% | 507fps | 785.208fps | -| FixRes bs16 | [rank1:79.0%](https://github.com/facebookresearch/FixRes) | rank1:79.0% | 734fps | 788.566fps | +# FixRes模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +``` +pip3.7 install -r requirements.txt +``` + + +2.获取,修改与安装开源模型代码 +``` +git clone https://github.com/facebookresearch/FixRes.git -b main +cd FixRes +git reset c9be6acc7a6b32f896e62c28a97c20c2348327d3 --hard +cd .. +``` + +3.获取权重文件 + +FixResNet50.pth + +4.数据集 +获取ImageNet 2012 + +5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) +将benchmark.x86_64或benchmark.aarch64放到当前目录 + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲。测试时需下载imagenet_labels_fixres.json文件,并放在imagenet文件夹下。 +``` +# 生成om模型 +bash test/pth2om.sh + +# om模型离线推理并测试 +bash test/eval_acc_perf.sh --datasets_path=/root/datasets +``` + **评测结果:** +| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| :------: | :------: | :------: | :------: | :------: | +| FixRes bs1 | [rank1:79.0%](https://github.com/facebookresearch/FixRes) | rank1:79.0% | 507fps | 785.208fps | +| FixRes bs16 | [rank1:79.0%](https://github.com/facebookresearch/FixRes) | rank1:79.0% | 734fps | 788.566fps | diff --git a/ACL_PyTorch/contrib/cv/classfication/FixRes/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/FixRes/modelzoo_level.txt index 38700fca05402f52c3ae1c4be0889eb60e1f80f1..2e42553460a4f3687654b6ad3f91ab0bcc3aadac 100644 --- a/ACL_PyTorch/contrib/cv/classfication/FixRes/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/FixRes/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/GENet/LICENSE b/ACL_PyTorch/contrib/cv/classfication/GENet/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GENet/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/GENet/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/GENet/cifar10_acc_eval.py b/ACL_PyTorch/contrib/cv/classfication/GENet/cifar10_acc_eval.py index b4909b98e94a89b1257c7fcdadb8ef4231610189..c09177bd3636ff0495abbd9bab0c69f8e9ebf626 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GENet/cifar10_acc_eval.py +++ b/ACL_PyTorch/contrib/cv/classfication/GENet/cifar10_acc_eval.py @@ -1,186 +1,186 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - if data == '': - n_label = 0 - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - print(sort_index) - gt = img_gt_dict[img_name] - print(gt) - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - print("Time used:", elapsed) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + if data == '': + n_label = 0 + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + print(sort_index) + gt = img_gt_dict[img_name] + print(gt) + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + print("Time used:", elapsed) diff --git a/ACL_PyTorch/contrib/cv/classfication/GENet/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/GENet/modelzoo_level.txt index 8aa12b7f8cee362eb18ccc16baf537ddcda2ac92..1073929d0d827b6cbb27a2539adc9ecab98923d8 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GENet/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/GENet/modelzoo_level.txt @@ -1,6 +1,6 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK -PerfStatus:OK -ModelConvert:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK +PerfStatus:OK +ModelConvert:OK QuantStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/GENet/test/infer_bin.sh b/ACL_PyTorch/contrib/cv/classfication/GENet/test/infer_bin.sh index feff85f5dd0d5d82857bfa38d9f188b04e4f622f..9f64b810a01ccf882e6d1297bfeacc4627bf1d39 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GENet/test/infer_bin.sh +++ b/ACL_PyTorch/contrib/cv/classfication/GENet/test/infer_bin.sh @@ -1,2 +1,2 @@ -benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=genet_bs1_tuned.om -input_text_path=genet_prep_bin.info -input_width=32 -input_height=32 -output_binary=False -useDvpp=False +benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=genet_bs1_tuned.om -input_text_path=genet_prep_bin.info -input_width=32 -input_height=32 -output_binary=False -useDvpp=False benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=genet_bs16_tuned.om -input_text_path=genet_prep_bin.info -input_width=32 -input_height=32 -output_binary=False -useDvpp=False \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/LICENSE b/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/ghostnet_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/ghostnet_pth2onnx.py index e74fdc48c2d17a3e3a22d8fe2893f3b0e2c8dad6..27218ecece5f1eb03a2b6092dabaef49f8a5aab8 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/ghostnet_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/ghostnet_pth2onnx.py @@ -1,33 +1,33 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import sys -sys.path.append("./CV-Backbones/ghostnet_pytorch") -from ghostnet import ghostnet - -def pth2onnx(input_file, output_file): - model = ghostnet() - model.load_state_dict(torch.load(input_file)) - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, opset_version=11, verbose=True) - -if __name__=="__main__": - input_file = sys.argv[1] - output_file = sys.argv[2] - pth2onnx(input_file, output_file) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import sys +sys.path.append("./CV-Backbones/ghostnet_pytorch") +from ghostnet import ghostnet + +def pth2onnx(input_file, output_file): + model = ghostnet() + model.load_state_dict(torch.load(input_file)) + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, opset_version=11, verbose=True) + +if __name__=="__main__": + input_file = sys.argv[1] + output_file = sys.argv[2] + pth2onnx(input_file, output_file) diff --git a/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/imagenet_torch_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/imagenet_torch_preprocess.py index 92885b0a3fbaabea73989b28e48ae62d54590444..6ff9be559247f0e096019217e72d7dc551ec393b 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/imagenet_torch_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/imagenet_torch_preprocess.py @@ -1,122 +1,122 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from PIL import Image -import numpy as np -import multiprocessing - - -model_config = { - 'resnet': { - 'resize': 256, - 'centercrop': 224, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv3': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv4': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.5, 0.5, 0.5], - 'std': [0.5, 0.5, 0.5], - }, - 'ghostnet':{ - 'resize': 256, - 'centercrop': 224, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - } -} - - -def center_crop(img, output_size): - if isinstance(output_size, int): - output_size = (int(output_size), int(output_size)) - image_width, image_height = img.size - crop_height, crop_width = output_size - crop_top = int(round((image_height - crop_height) / 2.)) - crop_left = int(round((image_width - crop_width) / 2.)) - return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) - - -def resize(img, size, interpolation=Image.BILINEAR): - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def gen_input_bin(mode_type, file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - # RGBA to RGB - image = Image.open(os.path.join(src_path, file)).convert('RGB') - image = resize(image, model_config[mode_type]['resize']) # Resize - image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop - img = np.array(image, dtype=np.float32) - img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW - img = img / 255. # ToTensor: div 255 - img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean - img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - - -def preprocess(mode_type, src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") - mode_type = sys.argv[1] - src_path = sys.argv[2] - save_path = sys.argv[3] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - if mode_type not in model_config: - model_type_help = "model type: " - for key in model_config.keys(): - model_type_help += key - model_type_help += ' ' - raise Exception(model_type_help) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from PIL import Image +import numpy as np +import multiprocessing + + +model_config = { + 'resnet': { + 'resize': 256, + 'centercrop': 224, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv3': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv4': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + }, + 'ghostnet':{ + 'resize': 256, + 'centercrop': 224, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + } +} + + +def center_crop(img, output_size): + if isinstance(output_size, int): + output_size = (int(output_size), int(output_size)) + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) + + +def resize(img, size, interpolation=Image.BILINEAR): + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def gen_input_bin(mode_type, file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + # RGBA to RGB + image = Image.open(os.path.join(src_path, file)).convert('RGB') + image = resize(image, model_config[mode_type]['resize']) # Resize + image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop + img = np.array(image, dtype=np.float32) + img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW + img = img / 255. # ToTensor: div 255 + img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean + img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + + +def preprocess(mode_type, src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") + mode_type = sys.argv[1] + src_path = sys.argv[2] + save_path = sys.argv[3] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + if mode_type not in model_config: + model_type_help = "model type: " + for key in model_config.keys(): + model_type_help += key + model_type_help += ' ' + raise Exception(model_type_help) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) preprocess(mode_type, src_path, save_path) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/requirements.txt index 9b075f0172e3c8b7fd476cc931b2dc54263584c8..0ce89e67a6442eafcaf4f55a51187b3be97962fa 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.6.0 -torchvision == 0.7.0 -onnx == 1.7.0 -numpy == 1.18.5 -Pillow == 7.2.0 +torch == 1.6.0 +torchvision == 0.7.0 +onnx == 1.7.0 +numpy == 1.18.5 +Pillow == 7.2.0 opencv-python == 4.5.1.48 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/test/README.md b/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/test/README.md index d9024b48e503084720406945986c49b360adcd72..fe76d976f32de56d67af5f75fa04fc39f7bd002a 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/test/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x/test/README.md @@ -1,24 +1,24 @@ -环境准备: - -1.数据集路径 -通用的数据集统一放在/root/datasets/或/opt/npu/ -本模型数据集放在/root/datasets/ - -2.进入工作目录 -cd GhostNet1.0x - -3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -pip3.7 install -r requirements.txt - -4.获取模型代码 -git clone https://github.com/huawei-noah/CV-Backbones.git - -5.获取权重文件 -cp ./CV-Backbones/ghostnet_pytorch/models/state_dict_73.98.pth . - -6.获取benchmark工具 -将benchmark.x86_64 benchmark.aarch64放在当前目录 - -7.310上执行,执行时确保device空闲 -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets +环境准备: + +1.数据集路径 +通用的数据集统一放在/root/datasets/或/opt/npu/ +本模型数据集放在/root/datasets/ + +2.进入工作目录 +cd GhostNet1.0x + +3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +pip3.7 install -r requirements.txt + +4.获取模型代码 +git clone https://github.com/huawei-noah/CV-Backbones.git + +5.获取权重文件 +cp ./CV-Backbones/ghostnet_pytorch/models/state_dict_73.98.pth . + +6.获取benchmark工具 +将benchmark.x86_64 benchmark.aarch64放在当前目录 + +7.310上执行,执行时确保device空闲 +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets diff --git a/ACL_PyTorch/contrib/cv/classfication/GloRe/GloRe_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/GloRe/GloRe_pth2onnx.py index 3d9975c78bef436054953307725a0800b48291cd..c1f7f6f764933b692b6bb9a58db088dd820ace02 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GloRe/GloRe_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/GloRe/GloRe_pth2onnx.py @@ -1,33 +1,33 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import sys -import os -from GloRe.network.resnet50_3d_gcn_x5 import RESNET50_3D_GCN_X5 -def pth2onnx(input_file, output_file): - net = RESNET50_3D_GCN_X5(num_classes=101, pretrained=False) - state_dict = torch.load(input_file,map_location='cpu') - net.load_state_dict(state_dict['state_dict']) - net.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(1, 3, 8, 224, 224) - torch.onnx.export(net, dummy_input, output_file, input_names=input_names, dynamic_axes=dynamic_axes, - output_names=output_names, opset_version=11, verbose=True) - - -if __name__ == '__main__': - args = sys.argv - pth2onnx(args[1], args[2]) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import sys +import os +from GloRe.network.resnet50_3d_gcn_x5 import RESNET50_3D_GCN_X5 +def pth2onnx(input_file, output_file): + net = RESNET50_3D_GCN_X5(num_classes=101, pretrained=False) + state_dict = torch.load(input_file,map_location='cpu') + net.load_state_dict(state_dict['state_dict']) + net.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(1, 3, 8, 224, 224) + torch.onnx.export(net, dummy_input, output_file, input_names=input_names, dynamic_axes=dynamic_axes, + output_names=output_names, opset_version=11, verbose=True) + + +if __name__ == '__main__': + args = sys.argv + pth2onnx(args[1], args[2]) diff --git a/ACL_PyTorch/contrib/cv/classfication/GloRe/LICENSE b/ACL_PyTorch/contrib/cv/classfication/GloRe/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GloRe/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/GloRe/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/GloRe/README.md b/ACL_PyTorch/contrib/cv/classfication/GloRe/README.md index 715965b4324654480b7a05524886aa4619d6c291..8cd66f582af23f49ef69ff18ebd6bbf05fd868ca 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GloRe/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/GloRe/README.md @@ -1,43 +1,43 @@ -# GloRe模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - -``` -pip3.7 install -r requirements.txt -``` - -2.获取,修改与安装开源模型代码 - -``` - -git clone https://github.com/facebookresearch/GloRe -b master -cd GloRe -git reset --hard 9c6a7340ebb44a66a3bf1945094fc685fb7b730d -cd .. -``` -3.[获取基于UCF101数据集训练出来的权重](https://ascend-pytorch-model-file.obs.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/GloRe/GloRe.pth) - - -4.[获取数据集UCF101](https://www.crcv.ucf.edu/data/UCF101/UCF101.rar) - - -5.[获取msame工具](https://gitee.com/ascend/tools/tree/master/msame) - -6.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) - -将benchmark.x86_64或benchmark.aarch64放到当前目录 - - - -## 2 离线推理 -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets/UCF-101 -``` - | 模型 | pth精度 | 310精度 | 基准性能 | 310性能 | - | :------: | :------: | :------: | :------: | :------: | - | GloRe bs1 | top1:87.79% top5:98.02% | top1:87.77% top5:98.05% | 122.4380fps | 67.3636fps | - | GloRe bs16 | top1:87.79% top5:98.02% | top1:87.77% top5:98.05% | 148.0453fps | 71.7856fps | +# GloRe模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + +``` +pip3.7 install -r requirements.txt +``` + +2.获取,修改与安装开源模型代码 + +``` + +git clone https://github.com/facebookresearch/GloRe -b master +cd GloRe +git reset --hard 9c6a7340ebb44a66a3bf1945094fc685fb7b730d +cd .. +``` +3.[获取基于UCF101数据集训练出来的权重](https://ascend-pytorch-model-file.obs.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/GloRe/GloRe.pth) + + +4.[获取数据集UCF101](https://www.crcv.ucf.edu/data/UCF101/UCF101.rar) + + +5.[获取msame工具](https://gitee.com/ascend/tools/tree/master/msame) + +6.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) + +将benchmark.x86_64或benchmark.aarch64放到当前目录 + + + +## 2 离线推理 +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets/UCF-101 +``` + | 模型 | pth精度 | 310精度 | 基准性能 | 310性能 | + | :------: | :------: | :------: | :------: | :------: | + | GloRe bs1 | top1:87.79% top5:98.02% | top1:87.77% top5:98.05% | 122.4380fps | 67.3636fps | + | GloRe bs16 | top1:87.79% top5:98.02% | top1:87.77% top5:98.05% | 148.0453fps | 71.7856fps | diff --git a/ACL_PyTorch/contrib/cv/classfication/GloRe/env.sh b/ACL_PyTorch/contrib/cv/classfication/GloRe/env.sh index 7cf86a22a607ba18523498bcfb5617e97b28a0e0..e4c421dd8d10faa7f5e617cb1f91d34e8ee685bc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GloRe/env.sh +++ b/ACL_PyTorch/contrib/cv/classfication/GloRe/env.sh @@ -1,6 +1,6 @@ -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp export ASCEND_AICPU_PATH=${install_path} \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/GloRe/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/GloRe/modelzoo_level.txt index 20e36b3f785cf4dbfc970929d2d793911d57c314..85fa1416d33effb8f710f3ac3f6772fcbc898eaa 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GloRe/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/GloRe/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:NOK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/GloRe/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/GloRe/requirements.txt index dfd372e307dc153c8aadc1012baaa6c5e8bf4722..384b6e620823c853805f1ad4ebfebc6240843dc9 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GloRe/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/GloRe/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.7.0 -numpy == 1.21.1 -Pillow == 8.2.0 +torch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.7.0 +numpy == 1.21.1 +Pillow == 8.2.0 opencv_python == 4.5.3.56 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/GloRe/test/parse.py b/ACL_PyTorch/contrib/cv/classfication/GloRe/test/parse.py index b9c74f41d7848e1250356f14472b237a18bb3489..82af69cd183218c3263723c20b652b3f7ec2bc27 100644 --- a/ACL_PyTorch/contrib/cv/classfication/GloRe/test/parse.py +++ b/ACL_PyTorch/contrib/cv/classfication/GloRe/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/LICENSE b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/LICENSE index 657549b86065a3d34c7dd038edee91cedb8cb05a..dcc65541a1b5f985560b92c275b8328469d50742 100644 --- a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/LICENSE @@ -1,30 +1,30 @@ -BSD 3-Clause License - -Copyright (c) 2017, -All rights reserved. -Copyright 2020 Huawei Technologies Co., Ltd - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +BSD 3-Clause License + +Copyright (c) 2017, +All rights reserved. +Copyright 2020 Huawei Technologies Co., Ltd + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/README.md b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/README.md index 74462c68cfc2215046fe28babf92ae29ce3a39a9..90da3dd00b54dd93c370a36ebcb5c3237cd8e985 100644 --- a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/README.md @@ -1,270 +1,270 @@ -# HRNet Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) - - [6.2 开源TopN精度](#62-开源TopN精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[HRNet论文](https://arxiv.org/pdf/1908.07919.pdf) -Abstract—High-resolution representations are essential for position-sensitive vision problems, such as human pose estimation, semantic segmentation, and object detection. Existing state-of-the-art frameworks first encode the input image as a low-resolution representation through a subnetwork that is formed by connecting high-to-low resolution convolutions in series (e.g., ResNet, VGGNet), and then recover the high-resolution representation from the encoded low-resolution representation. Instead, our proposed network, named as High-Resolution Network (HRNet), maintains high-resolution representations through the whole process. There are two key characteristics: (i) Connect the high-to-low resolution convolution streams in parallel; (ii) Repeatedly exchange the information across resolutions. The benefit is that the resulting representation is semantically richer and spatially more precise. We show the superiority of the proposed HRNet in a wide range of applications, including human pose estimation, semantic segmentation, and object detection, suggesting that the HRNet is a stronger backbone for computer vision problems. All the codes are available at https://github.com/HRNet. - -### 1.2 代码地址 -[HRNet代码](https://github.com/HRNet/HRNet-Image-Classification) -branch:master -commit_id:f130a24bf09b7f23ebd0075271f76c4a188093b2 - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -pytorch >= 1.5.0 -torchvision >= 0.6.0 -onnx >= 1.7.0 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.20.2 -opencv-python == 4.5.2.52 -Pillow == 8.0.1 -``` - -**说明:** -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 - -1.下载HRNet模型 -git clone https://github.com/HRNet/HRNet-Image-Classification.git - -2.获取权重文件 -wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/HRNet/NPU/8P/model_best.pth.tar -file name:model_best.pth.tar -md5sum:1f1d61e242ac9ca4cab5d0c49299cb76 - -3.编写pth2onnx脚本hrnet_pth2onnx.py - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - -4.执行pth2onnx脚本,生成onnx模型文件 -``` -python3.7 hrnet_pth2onnx.py --cfg ./HRNet-Image-Classification/experiments/cls_hrnet_w18_sgd_lr5e-2_wd1e-4_bs32_x100.yaml --input model_best.pth.tar --output hrnet_w18.onnx -``` - - **模型转换要点:** ->此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 - -### 3.2 onnx转om模型 - -1.设置环境变量 -``` -source env.sh -``` -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) - -``` -atc --framework=5 --model=./hrnet_w18.onnx --input_format=NCHW --input_shape="image:16,3,224,224" --output=hrnet_bs16 --log=debug --soc_version=Ascend310 -``` - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/opt/npu/imagenet/val与/opt/npu/imagenet/val_label.txt。 - -### 4.2 数据集预处理 -1.预处理脚本imagenet_torch_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 - -``` -datasets_path = '/opt/npu/' -python3.7 imagenet_torch_preprocess.py hrnet ${datasets_path}/imagenet/val ./prep_dataset -``` -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本get_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 - -``` -python3.7 get_info.py bin ./prep_dataset ./hrnet_prep_bin.info 224 224 -``` -第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) -### 5.2 离线推理 -1.设置环境变量 - -``` -source env.sh -``` -2.执行离线推理 -``` -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=hrnet_bs16.om -input_text_path=./hrnet_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False -``` -输出结果默认保存在当前目录result/dumpOutput_devicex,模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 - -## 6 精度对比 - -- **[离线推理TopN精度](#61-离线推理TopN精度)** -- **[开源TopN精度](#62-开源TopN精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理TopN精度统计 - -后处理统计TopN精度 - -调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 -``` -python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ ${datasets_path}/imagenet/val_label.txt ./ result.json -``` -第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 -查看输出结果: - -``` -{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "76.46%"}, {"key": "Top2 accuracy", "value": "86.33%"}, {"key": "Top3 accuracy", "value": "90.0%"}, {"key": "Top4 accuracy", "value": "91.97%"}, {"key": "Top5 accuracy", "value": "93.14%"}]} -``` -batch1,batch16的精度相同如上. - -### 6.2 开源TopN精度 -[torchvision官网精度](https://pytorch.org/vision/stable/models.html) -``` -Model Acc@1 Acc@5 -HRNet-Image-Classification 76.8 93.4 -``` -### 6.3 精度对比 -将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - **精度调试:** ->没有遇到精度不达标的问题,故不需要进行精度调试 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** - -### 7.1 npu性能数据 -需要测试batch1,batch4,batch8,batch16,batch32的性能,这里用batch1与batch16做示例 -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据,也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式,模型的测试脚本使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准。 -1.benchmark工具在整个数据集上推理获得性能数据 -以batch1为例,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: - -``` -[e2e] throughputRate: 125.08, latency: 399743 -[data read] throughputRate: 132.686, moduleLatency: 7.53661 -[preprocess] throughputRate: 132.548, moduleLatency: 7.54441 -[infer] throughputRate: 125.448, Interface throughputRate: 156.216, moduleLatency: 7.35352 -[post] throughputRate: 125.448, moduleLatency: 7.97142 -``` -Interface throughputRate: 156.216,156.216乘以4既是310单卡吞吐率 -2.benchmark纯推理功能测得性能数据 -batch1的性能: - 测试npu性能要确保device空闲,使用npu-smi info命令可查看device是否在运行其它推理任务 - -``` -./benchmark.x86_64 -round=20 -om_path=hrnet_bs1.om -device_id=0 -batch_size=1 -``` -执行20次纯推理取均值,统计吞吐率与其倒数时延(benchmark的时延是单个数据的推理时间),npu性能是一个device执行的结果 -``` -[INFO] Dataset number: 19 finished cost 2.635ms -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_hrnet_bs1_in_device_0.txt ------------------PureInfer Performance Summary------------------ -ave_throughputRate = 155.975samples/s, ave_latency = 6.42435ms -``` -bs1 310单卡吞吐率:155.975x4=623.9fps/card -batch4的性能: -``` -./benchmark.x86_64 -round=20 -om_path=hrnet_w18_bs4.om -device_id=0 -batch_size=4 -``` -``` -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_hrnet_w18_bs4_in_device_0.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 224.608samples/s, ave_latency: 4.56663ms -``` -bs4 310单卡吞吐率:224.608x4=898.432fps/card -batch8的性能: -``` -./benchmark.x86_64 -round=20 -om_path=hrnet_w18_bs8.om -device_id=0 -batch_size=8 -``` -``` -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_hrnet_w18_bs8_in_device_0.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 248.514samples/s, ave_latency: 4.09695ms -``` -bs8 310单卡吞吐率:248.514x4=994.056fps/card -batch16的性能: - -``` -./benchmark.x86_64 -round=20 -om_path=hrnet_w18_bs16.om -device_id=0 -batch_size=16 -``` -``` -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_hrnet_w18_bs16_in_device_0.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 269.512samples/s, ave_latency: 3.73541ms -``` -bs16 310单卡吞吐率:269.512x4=1078.048fps/card -batch32的性能: -``` -./benchmark.x86_64 -round=20 -om_path=hrnet_w18_bs32.om -device_id=0 -batch_size=32 -``` -``` -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_hrnet_w18_bs32_in_device_0.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 271.562samples/s, ave_latency: 3.69597ms -``` -bs32 310单卡吞吐率:271.562x4=1086.248fps/card - - **性能优化:** - ->没有遇到性能不达标的问题,故不需要进行性能优化 - +# HRNet Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) + - [6.2 开源TopN精度](#62-开源TopN精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[HRNet论文](https://arxiv.org/pdf/1908.07919.pdf) +Abstract—High-resolution representations are essential for position-sensitive vision problems, such as human pose estimation, semantic segmentation, and object detection. Existing state-of-the-art frameworks first encode the input image as a low-resolution representation through a subnetwork that is formed by connecting high-to-low resolution convolutions in series (e.g., ResNet, VGGNet), and then recover the high-resolution representation from the encoded low-resolution representation. Instead, our proposed network, named as High-Resolution Network (HRNet), maintains high-resolution representations through the whole process. There are two key characteristics: (i) Connect the high-to-low resolution convolution streams in parallel; (ii) Repeatedly exchange the information across resolutions. The benefit is that the resulting representation is semantically richer and spatially more precise. We show the superiority of the proposed HRNet in a wide range of applications, including human pose estimation, semantic segmentation, and object detection, suggesting that the HRNet is a stronger backbone for computer vision problems. All the codes are available at https://github.com/HRNet. + +### 1.2 代码地址 +[HRNet代码](https://github.com/HRNet/HRNet-Image-Classification) +branch:master +commit_id:f130a24bf09b7f23ebd0075271f76c4a188093b2 + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +pytorch >= 1.5.0 +torchvision >= 0.6.0 +onnx >= 1.7.0 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.20.2 +opencv-python == 4.5.2.52 +Pillow == 8.0.1 +``` + +**说明:** +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 + +1.下载HRNet模型 +git clone https://github.com/HRNet/HRNet-Image-Classification.git + +2.获取权重文件 +wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/HRNet/NPU/8P/model_best.pth.tar +file name:model_best.pth.tar +md5sum:1f1d61e242ac9ca4cab5d0c49299cb76 + +3.编写pth2onnx脚本hrnet_pth2onnx.py + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + +4.执行pth2onnx脚本,生成onnx模型文件 +``` +python3.7 hrnet_pth2onnx.py --cfg ./HRNet-Image-Classification/experiments/cls_hrnet_w18_sgd_lr5e-2_wd1e-4_bs32_x100.yaml --input model_best.pth.tar --output hrnet_w18.onnx +``` + + **模型转换要点:** +>此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 + +### 3.2 onnx转om模型 + +1.设置环境变量 +``` +source env.sh +``` +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) + +``` +atc --framework=5 --model=./hrnet_w18.onnx --input_format=NCHW --input_shape="image:16,3,224,224" --output=hrnet_bs16 --log=debug --soc_version=Ascend310 +``` + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/opt/npu/imagenet/val与/opt/npu/imagenet/val_label.txt。 + +### 4.2 数据集预处理 +1.预处理脚本imagenet_torch_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 + +``` +datasets_path = '/opt/npu/' +python3.7 imagenet_torch_preprocess.py hrnet ${datasets_path}/imagenet/val ./prep_dataset +``` +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本get_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 + +``` +python3.7 get_info.py bin ./prep_dataset ./hrnet_prep_bin.info 224 224 +``` +第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) +### 5.2 离线推理 +1.设置环境变量 + +``` +source env.sh +``` +2.执行离线推理 +``` +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=hrnet_bs16.om -input_text_path=./hrnet_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False +``` +输出结果默认保存在当前目录result/dumpOutput_devicex,模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 + +## 6 精度对比 + +- **[离线推理TopN精度](#61-离线推理TopN精度)** +- **[开源TopN精度](#62-开源TopN精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理TopN精度统计 + +后处理统计TopN精度 + +调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 +``` +python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ ${datasets_path}/imagenet/val_label.txt ./ result.json +``` +第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 +查看输出结果: + +``` +{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "76.46%"}, {"key": "Top2 accuracy", "value": "86.33%"}, {"key": "Top3 accuracy", "value": "90.0%"}, {"key": "Top4 accuracy", "value": "91.97%"}, {"key": "Top5 accuracy", "value": "93.14%"}]} +``` +batch1,batch16的精度相同如上. + +### 6.2 开源TopN精度 +[torchvision官网精度](https://pytorch.org/vision/stable/models.html) +``` +Model Acc@1 Acc@5 +HRNet-Image-Classification 76.8 93.4 +``` +### 6.3 精度对比 +将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + **精度调试:** +>没有遇到精度不达标的问题,故不需要进行精度调试 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** + +### 7.1 npu性能数据 +需要测试batch1,batch4,batch8,batch16,batch32的性能,这里用batch1与batch16做示例 +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据,也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式,模型的测试脚本使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准。 +1.benchmark工具在整个数据集上推理获得性能数据 +以batch1为例,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: + +``` +[e2e] throughputRate: 125.08, latency: 399743 +[data read] throughputRate: 132.686, moduleLatency: 7.53661 +[preprocess] throughputRate: 132.548, moduleLatency: 7.54441 +[infer] throughputRate: 125.448, Interface throughputRate: 156.216, moduleLatency: 7.35352 +[post] throughputRate: 125.448, moduleLatency: 7.97142 +``` +Interface throughputRate: 156.216,156.216乘以4既是310单卡吞吐率 +2.benchmark纯推理功能测得性能数据 +batch1的性能: + 测试npu性能要确保device空闲,使用npu-smi info命令可查看device是否在运行其它推理任务 + +``` +./benchmark.x86_64 -round=20 -om_path=hrnet_bs1.om -device_id=0 -batch_size=1 +``` +执行20次纯推理取均值,统计吞吐率与其倒数时延(benchmark的时延是单个数据的推理时间),npu性能是一个device执行的结果 +``` +[INFO] Dataset number: 19 finished cost 2.635ms +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_hrnet_bs1_in_device_0.txt +-----------------PureInfer Performance Summary------------------ +ave_throughputRate = 155.975samples/s, ave_latency = 6.42435ms +``` +bs1 310单卡吞吐率:155.975x4=623.9fps/card +batch4的性能: +``` +./benchmark.x86_64 -round=20 -om_path=hrnet_w18_bs4.om -device_id=0 -batch_size=4 +``` +``` +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_hrnet_w18_bs4_in_device_0.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 224.608samples/s, ave_latency: 4.56663ms +``` +bs4 310单卡吞吐率:224.608x4=898.432fps/card +batch8的性能: +``` +./benchmark.x86_64 -round=20 -om_path=hrnet_w18_bs8.om -device_id=0 -batch_size=8 +``` +``` +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_hrnet_w18_bs8_in_device_0.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 248.514samples/s, ave_latency: 4.09695ms +``` +bs8 310单卡吞吐率:248.514x4=994.056fps/card +batch16的性能: + +``` +./benchmark.x86_64 -round=20 -om_path=hrnet_w18_bs16.om -device_id=0 -batch_size=16 +``` +``` +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_hrnet_w18_bs16_in_device_0.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 269.512samples/s, ave_latency: 3.73541ms +``` +bs16 310单卡吞吐率:269.512x4=1078.048fps/card +batch32的性能: +``` +./benchmark.x86_64 -round=20 -om_path=hrnet_w18_bs32.om -device_id=0 -batch_size=32 +``` +``` +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_hrnet_w18_bs32_in_device_0.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 271.562samples/s, ave_latency: 3.69597ms +``` +bs32 310单卡吞吐率:271.562x4=1086.248fps/card + + **性能优化:** + +>没有遇到性能不达标的问题,故不需要进行性能优化 + diff --git a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/get_info.py b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/get_info.py index e43d4415cb667c31dcb6ea7228736d6f8056ad39..7f9afb06d1dcce471c92cdaf3fa13aabe0913f09 100644 --- a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/get_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/get_info.py @@ -1,59 +1,59 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/hrnet_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/hrnet_pth2onnx.py index 078a8a65a7833a006626738856693b91c526be1c..ec9444cc2f85ad1232c7ec0d59252e8643fdd35a 100644 --- a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/hrnet_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/hrnet_pth2onnx.py @@ -1,92 +1,92 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import argparse -import torch -import torch.onnx -from collections import OrderedDict -sys.path.append(r"./HRNet-Image-Classification") -sys.path.append(r"./HRNet-Image-Classification/lib") -from lib.models import cls_hrnet -from lib.config import config -from lib.config import update_config -def parse_args(): - parser = argparse.ArgumentParser(description='Train classification network') - parser.add_argument('--cfg', - help='experiment configure file name', - required=True, - type=str) - - parser.add_argument('--modelDir', - help='model directory', - type=str, - default='') - parser.add_argument('--logDir', - help='log directory', - type=str, - default='') - parser.add_argument('--dataDir', - help='data directory', - type=str, - default='') - parser.add_argument('--testModel', - help='testModel', - type=str, - default='') - - parser.add_argument('--input', - help='input pytorch model', - required=True, - type=str) - - parser.add_argument('--output', - help='output onnx model', - required=True, - type=str) - - args = parser.parse_args() - update_config(config, args) - - return args - -def proc_node_module(checkpoint, AttrName): - new_state_dict = OrderedDict() - for k, v in checkpoint[AttrName].items(): - if(k[0:7] == "module."): - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - -def pth2onnx(): - args = parse_args() - print(config.MODEL) - modelpth = args.input - checkpoint = torch.load(modelpth, map_location='cpu') - model = cls_hrnet.get_cls_net(config) - output_file = args.output - print("output:",output_file) - model.load_state_dict(checkpoint) - model.eval() - print(model) - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) - -if __name__ == "__main__": - pth2onnx() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import argparse +import torch +import torch.onnx +from collections import OrderedDict +sys.path.append(r"./HRNet-Image-Classification") +sys.path.append(r"./HRNet-Image-Classification/lib") +from lib.models import cls_hrnet +from lib.config import config +from lib.config import update_config +def parse_args(): + parser = argparse.ArgumentParser(description='Train classification network') + parser.add_argument('--cfg', + help='experiment configure file name', + required=True, + type=str) + + parser.add_argument('--modelDir', + help='model directory', + type=str, + default='') + parser.add_argument('--logDir', + help='log directory', + type=str, + default='') + parser.add_argument('--dataDir', + help='data directory', + type=str, + default='') + parser.add_argument('--testModel', + help='testModel', + type=str, + default='') + + parser.add_argument('--input', + help='input pytorch model', + required=True, + type=str) + + parser.add_argument('--output', + help='output onnx model', + required=True, + type=str) + + args = parser.parse_args() + update_config(config, args) + + return args + +def proc_node_module(checkpoint, AttrName): + new_state_dict = OrderedDict() + for k, v in checkpoint[AttrName].items(): + if(k[0:7] == "module."): + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + +def pth2onnx(): + args = parse_args() + print(config.MODEL) + modelpth = args.input + checkpoint = torch.load(modelpth, map_location='cpu') + model = cls_hrnet.get_cls_net(config) + output_file = args.output + print("output:",output_file) + model.load_state_dict(checkpoint) + model.eval() + print(model) + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) + +if __name__ == "__main__": + pth2onnx() diff --git a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/imagenet_acc_eval.py b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/imagenet_acc_eval.py index d85b7dcf59856b5df3a7a9b27033a0334fc2dbc4..58e389352ec8578e685fa999f6724c73eebf59f4 100644 --- a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/imagenet_acc_eval.py +++ b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/imagenet_acc_eval.py @@ -1,187 +1,187 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - if data == '': - n_label = 0 - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - print(folder_davinci_target) - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - print(annotation_file_path) - # the path to store the results json path - result_json_path = sys.argv[3] - print(result_json_path) - # result json file name - json_file_name = sys.argv[4] - print(json_file_name) - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - print("Time used:", elapsed) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + if data == '': + n_label = 0 + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + print(folder_davinci_target) + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + print(annotation_file_path) + # the path to store the results json path + result_json_path = sys.argv[3] + print(result_json_path) + # result json file name + json_file_name = sys.argv[4] + print(json_file_name) + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + print("Time used:", elapsed) diff --git a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/imagenet_torch_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/imagenet_torch_preprocess.py index 4ce024bfb5edd96dbdd2d27b47db1ee6fda6d0b1..61e574128ad0a582be7d9c79e4b5e0ea5ef1c006 100644 --- a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/imagenet_torch_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/imagenet_torch_preprocess.py @@ -1,103 +1,103 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from PIL import Image -import numpy as np -import multiprocessing - - -model_config = { - 'hrnet': { - 'resize': 256, - 'centercrop': 224, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - } -} - -def center_crop(img, output_size): - if isinstance(output_size, int): - output_size = (int(output_size), int(output_size)) - image_width, image_height = img.size - crop_height, crop_width = output_size - crop_top = int(round((image_height - crop_height) / 2.)) - crop_left = int(round((image_width - crop_width) / 2.)) - return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) - - -def resize(img, size, interpolation=Image.BILINEAR): - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def gen_input_bin(mode_type, file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - # RGBA to RGB - s = os.path.join(src_path,file) - image = Image.open(os.path.join(src_path, file))# - image = image.convert('RGB') - image = resize(image, model_config[mode_type]['resize']) # Resize - image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop - img = np.array(image, dtype=np.float32) - img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW - img = img / 255. # ToTensor: div 255 - img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean - img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - - -def preprocess(mode_type, src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") - mode_type = sys.argv[1] - src_path = sys.argv[2] - save_path = sys.argv[3] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - if mode_type not in model_config: - model_type_help = "model type: " - for key in model_config.keys(): - model_type_help += key - model_type_help += ' ' - raise Exception(model_type_help) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - preprocess(mode_type, src_path, save_path) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from PIL import Image +import numpy as np +import multiprocessing + + +model_config = { + 'hrnet': { + 'resize': 256, + 'centercrop': 224, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + } +} + +def center_crop(img, output_size): + if isinstance(output_size, int): + output_size = (int(output_size), int(output_size)) + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) + + +def resize(img, size, interpolation=Image.BILINEAR): + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def gen_input_bin(mode_type, file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + # RGBA to RGB + s = os.path.join(src_path,file) + image = Image.open(os.path.join(src_path, file))# + image = image.convert('RGB') + image = resize(image, model_config[mode_type]['resize']) # Resize + image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop + img = np.array(image, dtype=np.float32) + img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW + img = img / 255. # ToTensor: div 255 + img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean + img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + + +def preprocess(mode_type, src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") + mode_type = sys.argv[1] + src_path = sys.argv[2] + save_path = sys.argv[3] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + if mode_type not in model_config: + model_type_help = "model type: " + for key in model_config.keys(): + model_type_help += key + model_type_help += ' ' + raise Exception(model_type_help) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + preprocess(mode_type, src_path, save_path) diff --git a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/requirements.txt index d5135180ffdd48a42df10fa1e50245411ffe868a..77ef9d68a5a1516f7066fe5315eb1cb7410364f0 100644 --- a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/requirements.txt @@ -1,11 +1,11 @@ -torch==1.8.0 -torchvision==0.9.0 -onnx==1.9.0 -numpy==1.20.2 -opencv-python==4.5.2.52 -Pillow==8.0.1 -Cython -scipy -pandas -pyyaml +torch==1.8.0 +torchvision==0.9.0 +onnx==1.9.0 +numpy==1.20.2 +opencv-python==4.5.2.52 +Pillow==8.0.1 +Cython +scipy +pandas +pyyaml scikit-image \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/test/README.md b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/test/README.md index 7ac4947ef7719de703db8346b6ca8eecc85f2853..db2329251abf7deae2a33d221bfdf1522715ffa9 100644 --- a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/test/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/test/README.md @@ -1,29 +1,29 @@ -环境准备: - -1.数据集路径 -数据集统一放在/root/datasets/或/opt/npu/ -本模型数据集放在/opt/npu/ - -2.进入工作目录 -cd HRNet - -3.安装必要的依赖 -pip3.7 install -r requirements.txt - -4.获取模型代码 -git clone https://github.com/HRNet/HRNet-Image-Classification.git - -5.获取权重文件 -wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/HRNet/NPU/8P/model_best.pth.tar -file name:model_best.pth.tar -md5sum:1f1d61e242ac9ca4cab5d0c49299cb76 - -6.获取benchmark工具 -将benchmark.x86_64 benckmark.aarch64放在当前目录 - -7.310上执行 - -bash test/pth2om.sh - -bash test/eval_acc_perf.sh --datasets_path=/opt/npu +环境准备: + +1.数据集路径 +数据集统一放在/root/datasets/或/opt/npu/ +本模型数据集放在/opt/npu/ + +2.进入工作目录 +cd HRNet + +3.安装必要的依赖 +pip3.7 install -r requirements.txt + +4.获取模型代码 +git clone https://github.com/HRNet/HRNet-Image-Classification.git + +5.获取权重文件 +wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/HRNet/NPU/8P/model_best.pth.tar +file name:model_best.pth.tar +md5sum:1f1d61e242ac9ca4cab5d0c49299cb76 + +6.获取benchmark工具 +将benchmark.x86_64 benckmark.aarch64放在当前目录 + +7.310上执行 + +bash test/pth2om.sh + +bash test/eval_acc_perf.sh --datasets_path=/opt/npu \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/test/parse.py b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/test/parse.py index a0f253b055047b199b33d4b65cdc79177b6b250b..27eae0d0acf98687edd95f1f024cf77c49cd4dc4 100644 --- a/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/test/parse.py +++ b/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/LV-Vit/LICENSE b/ACL_PyTorch/contrib/cv/classfication/LV-Vit/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/LV-Vit/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/LV-Vit/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/LV-Vit/LV_Vit_postprocess.py b/ACL_PyTorch/contrib/cv/classfication/LV-Vit/LV_Vit_postprocess.py index 1781a4eef505306766adb7b1b52c0d4d37430b12..3fd86adf053c42e3845b6e43a5b26b1020d7582b 100644 --- a/ACL_PyTorch/contrib/cv/classfication/LV-Vit/LV_Vit_postprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/LV-Vit/LV_Vit_postprocess.py @@ -1,47 +1,47 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import os -import numpy as np -import sys - -''' -sys.argv[1]: om_output -sys.argv[2]: ground_truth -''' -om_output_files = sorted(os.listdir(sys.argv[1])) - -output_labels = [] -# 读取om输出 -for file in om_output_files: - with open(sys.argv[1] + file, mode='r') as f: - content = f.read().split(' ')[:-1] - content = list(map(lambda x: float(x), content)) - content = np.array(content) - output_labels.append(np.argmax(content)) - -# 读取ground_truth -with open(sys.argv[2], mode='r') as f: - ground_truth = list(map(lambda x: int(x.rstrip('\n').split(' ')[1]), f.readlines())) - -count = 0 -for i in range(len(output_labels)): - if ground_truth[i] == output_labels[i]: - count += 1 - -print(f"accuracy: {count / len(output_labels)}") -# print(count, len(output_labels)) -# print(output_labels) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import os +import numpy as np +import sys + +''' +sys.argv[1]: om_output +sys.argv[2]: ground_truth +''' +om_output_files = sorted(os.listdir(sys.argv[1])) + +output_labels = [] +# 读取om输出 +for file in om_output_files: + with open(sys.argv[1] + file, mode='r') as f: + content = f.read().split(' ')[:-1] + content = list(map(lambda x: float(x), content)) + content = np.array(content) + output_labels.append(np.argmax(content)) + +# 读取ground_truth +with open(sys.argv[2], mode='r') as f: + ground_truth = list(map(lambda x: int(x.rstrip('\n').split(' ')[1]), f.readlines())) + +count = 0 +for i in range(len(output_labels)): + if ground_truth[i] == output_labels[i]: + count += 1 + +print(f"accuracy: {count / len(output_labels)}") +# print(count, len(output_labels)) +# print(output_labels) diff --git a/ACL_PyTorch/contrib/cv/classfication/LV-Vit/LV_Vit_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/LV-Vit/LV_Vit_pth2onnx.py index fefb429f0ff6b7317a4045acb75cae2dc4b7daad..69866709e2a9b0ab1b12bac149ad547a06dc75b2 100644 --- a/ACL_PyTorch/contrib/cv/classfication/LV-Vit/LV_Vit_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/LV-Vit/LV_Vit_pth2onnx.py @@ -1,78 +1,78 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys - -sys.path.append(r"./TokenLabeling") - -import torch -# import argparse -from timm.models import create_model, apply_test_time_pool, load_checkpoint, is_model, list_models -import tlt.models - -import os -import numpy as np - -from timm.data.transforms_factory import transforms_imagenet_eval -from torchvision import transforms -from PIL import Image - - -# parser = argparse.ArgumentParser() -# parser.add_argument('--model', type=str, default='lvvit_s') -# parser.add_argument('--use-ema', dest='use_ema', action='store_true', -# help='use ema version of weights if present') -# parser.add_argument('--checkpoint', type=str, default='') -# parser.add_argument('--pretrained', dest='pretrained', action='store_true', -# help='use pre-trained model') -# parser.add_argument('--gp', default=None, type=str, metavar='POOL', -# help='Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None.') -# parser.add_argument('--output_file', default='lvvit_s.onnx', type=str) -# parser.add_argument('-b', '--batch_size', default=16, type=int) - - -def main(): - if not os.path.exists('./model'): - os.mkdir('./model') - - device = torch.device('cpu') - input_names = ["image"] - output_names = ["features"] - dynamic_axes = {'image': {0: f'{sys.argv[3]}'}, 'features': {0: f'{sys.argv[3]}'}} - model = create_model( - 'lvvit_s', - pretrained=False, - num_classes=None, - in_chans=3, - global_pool=None, - scriptable=False, - img_size=224) - # model.cuda() - # load_checkpoint(model, args.checkpoint, args.use_ema, strict=False) - load_checkpoint(model, sys.argv[1], False, strict=False) - model.to(device) - model.eval() - dummy_input = torch.randn(int(sys.argv[3]), 3, 224, 224, device='cpu') - torch.onnx.export(model, - dummy_input, - sys.argv[2], - input_names=input_names, - output_names=output_names, - dynamic_axes=dynamic_axes, - opset_version=13, - verbose=True) - - -main() - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +sys.path.append(r"./TokenLabeling") + +import torch +# import argparse +from timm.models import create_model, apply_test_time_pool, load_checkpoint, is_model, list_models +import tlt.models + +import os +import numpy as np + +from timm.data.transforms_factory import transforms_imagenet_eval +from torchvision import transforms +from PIL import Image + + +# parser = argparse.ArgumentParser() +# parser.add_argument('--model', type=str, default='lvvit_s') +# parser.add_argument('--use-ema', dest='use_ema', action='store_true', +# help='use ema version of weights if present') +# parser.add_argument('--checkpoint', type=str, default='') +# parser.add_argument('--pretrained', dest='pretrained', action='store_true', +# help='use pre-trained model') +# parser.add_argument('--gp', default=None, type=str, metavar='POOL', +# help='Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None.') +# parser.add_argument('--output_file', default='lvvit_s.onnx', type=str) +# parser.add_argument('-b', '--batch_size', default=16, type=int) + + +def main(): + if not os.path.exists('./model'): + os.mkdir('./model') + + device = torch.device('cpu') + input_names = ["image"] + output_names = ["features"] + dynamic_axes = {'image': {0: f'{sys.argv[3]}'}, 'features': {0: f'{sys.argv[3]}'}} + model = create_model( + 'lvvit_s', + pretrained=False, + num_classes=None, + in_chans=3, + global_pool=None, + scriptable=False, + img_size=224) + # model.cuda() + # load_checkpoint(model, args.checkpoint, args.use_ema, strict=False) + load_checkpoint(model, sys.argv[1], False, strict=False) + model.to(device) + model.eval() + dummy_input = torch.randn(int(sys.argv[3]), 3, 224, 224, device='cpu') + torch.onnx.export(model, + dummy_input, + sys.argv[2], + input_names=input_names, + output_names=output_names, + dynamic_axes=dynamic_axes, + opset_version=13, + verbose=True) + + +main() + diff --git a/ACL_PyTorch/contrib/cv/classfication/LV-Vit/README.md b/ACL_PyTorch/contrib/cv/classfication/LV-Vit/README.md index cae18df27c440807f29d133dfc38280a28c890ef..96c03d43b844236dd3b4987ccdc701ff2b3c6df1 100644 --- a/ACL_PyTorch/contrib/cv/classfication/LV-Vit/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/LV-Vit/README.md @@ -1,381 +1,381 @@ -# LV-Vit Onnx模型端到端推理指导 - -+ [1模型概述](#1 模型概述) - - + [1.1 论文地址](##1.1 论文地址) - + [1.2 代码地址](##1.2 代码地址) - -+ [2 环境说明](#2 环境说明) - - + [2.1 深度学习框架](##2.1 深度学习框架) - + [2.2 python第三方库](##2.2 python第三方库) - -+ [3 模型转换](#3 模型转换) - - + [3.1 pth转onnx模型](##3.1 pth转onnx模型) - + [3.2 onnx转om模型](##3.2 onnx转om模型) - -+ [4 数据集预处理](#4 数据集预处理) - - + [4.1 数据集获取](##4.1 数据集获取) - + [4.2 数据集预处理](##4.2 数据集预处理) - + [4.3 生成预处理数据集信息文件](##4.3 生成预处理数据集信息文件) - -+ [5 离线推理](#5 离线推理) - - + [5.1 benchmark工具概述](##5.1 benchmark工具概述) - + [5.2 离线推理](##5.2 离线推理) - -+ [6 精度对比](#6 精度对比) - - + [6.1 离线推理精度统计](##6.1 离线推理精度统计) - + [6.2 开源精度](##6.2 开源精度) - + [6.3 精度对比](##6.3 精度对比) - -+ [7 性能对比](#7 性能对比) - - + [7.1 npu性能数据](##7.1 npu性能数据) - + [7.2 gpu和npu性能对比](##7.2 gpu和npu性能对比) - - - -## 1 模型概述 - -### 1.1 论文地址 - -[LV-Vit论文](https://arxiv.org/abs/2104.10858 ) - -### 1.2 代码地址 - -[LV-Vit代码](https://github.com/zihangJiang/TokenLabeling ) - - - -## 2 环境说明 - -### 2.1 深度学习框架 - -``` -torch==1.8.0 -torchvision==0.9.0 -onnx==1.10.1 -onnx-simplifier==0.3.6 -``` - -### 2.2 python第三方库 - -``` -numpy==1.21.2 -pyyaml==5.4.1 -pillow==8.3.1 -timm==0.4.5 -scipy==0.24.2 -``` - - - -## 3 模型转换 - -### 3.1 pth转onnx模型 - -1.LV-Vit模型代码下载 - -```bash -# 切换到工作目录 -cd LV-Vit - -git clone https://github.com/zihangJiang/TokenLabeling.git -cd TokenLabeling -patch -p1 < ../LV-Vit.patch -cd .. -``` - -2.获取模型权重,并放在工作目录的model文件夹下 -在model/下已经存放了在gpu8p上训练得到的pth,如需下载官方pth,则执行以下代码 -```bash -wget https://github.com/zihangJiang/TokenLabeling/releases/download/1.0/lvvit_s-26M-224-83.3.pth.tar -mv lvvit_s-26M-224-83.3.pth.tar model_best.pth.tar - -rm ./model/model_best.pth.tar -mv model_best.pth.tar ./model/ -``` - - - -3.使用 LV_Vit_pth2onnx.py 脚本将pth模型文件转为onnx模型文件 - -+ 参数1:pth模型权重的路径 - -+ 参数2:onnx模型权重的存储路径 - -+ 参数3:batch size - -```bash. -python LV_Vit_pth2onnx.py ./model/model_best.pth.tar ./model/model_best_bs1.onnx 1 -python LV_Vit_pth2onnx.py ./model/model_best.pth.tar ./model/model_best_bs16.onnx 16 -``` - -4.使用 onnxsim 工具优化onnx模型 - -+ 参数1:输入的shape -+ 参数2:onnx模型权重的存储路径 -+ 参数3:优化后onnx模型权重的存储路径 - -``` -python -m onnxsim --input-shape="1,3,224,224" ./model/model_best_bs1.onnx ./model/model_best_bs1_sim.onnx -python -m onnxsim --input-shape="16,3,224,224" ./model/model_best_bs16.onnx ./model/model_best_bs16_sim.onnx -``` - -5.使用tensorRT工具测试onnx模型性能 - -请自行软链接trtexec工具 - -``` -./trtexec --onnx=model/model_best_bs1_sim.onnx --fp16 --shapes=image:1x3x112x112 --device=0 > sim_onnx_bs1.log -./trtexec --onnx=model/model_best_bs16_sim.onnx --fp16 --shapes=image:16x3x112x112 --device=0 > sim_onnx_bs16.log -``` - - - -### 3.2 onnx转om模型 - -1.设置环境变量 - -```bash -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest -``` - -2.使用 atc 将 onnx 模型转换为 om 模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) - -请注意,为了优化softmax算子,在其前后添加了transpose算子,故一并优化transpose,须在白名单中添加(batch_size,6,197,197)和 -(batch_size,197,197,6) - -路径:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/opp/op_impl/built-in/ai_core/tbe/impl/dynamic/transpose.py - -```bash -atc --framework=5 --model=./model/model_best_bs1_sim.onnx --output=./model/model_best_bs1_sim --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 - -atc --framework=5 --model=./model/model_best_bs16_sim.onnx --output=./model/model_best_bs16_sim --input_format=NCHW --input_shape="image:16,3,224,224" --log=debug --soc_version=Ascend310 -``` - - - -## 4 数据集预处理 - -### 4.1 数据集获取 - -获取imagenet纯验证数据集,放在该目录:/opt/npu/imagenet/PureVal/ - - - -### 4.2 数据集预处理 - -执行预处理脚本,会在工作目录的data目录下生成数据集预处理后的 bin 文件和 数据集信息文件 - -LV_Vit_preprocess.py: -+ --src_path: imagenet纯验证集路径; --save_path: bin文件存放路径 - -gen_dataset_info.py -+ 参数1:bin文件 -+ 参数2:数据bin文件存放目录 - -``` -python LV_Vit_preprocess.py --src_path /opt/npu/imagenet/PureVal/ --save_path ./data/prep_dataset; -python gen_dataset_info.py ./data/prep_dataset ./data/lvvit_prep_bin.info; -``` - - -## 5 离线推理 - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) - -### 5.2 离线推理 - -1.设置环境变量 - -```bash -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest/ -``` - -2.执行离线推理, 输出结果默认保存在当前目录result/dumpOutput_device0 - -```bash -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./model/model_best_bs1_sim.om -input_text_path=lvvit_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False - -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=./model/model_best_bs16_sim.om -input_text_path=lvvit_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False -``` - - - -## 6 精度对比 - -### 6.1 离线推理精度统计 - -执行后处理脚本统计om模型推理结果的Accuracy - -+ 参数1:om模型预测结果目录 -+ 参数2:imagenet纯验证集标签 - -```shell -python LV_Vit_postprocess.py ./result/dumpOutput_device0 ./data/val.txt -``` - -控制台输出如下信息 - -``` -accuracy: 0.8317 -``` - - - -### 6.2 开源精度 - -源代码仓公布精度 - -``` -Model Dataset Accuracy -LV-Vit imagenet 0.833 -``` - - - -### 6.3 精度对比 - -将得到的om离线模型推理Accuracy与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - - - -## 7 性能对比 - -### 7.1 npu性能数据 - -**1. batch_size=1** - -``` -[e2e] throughputRate: 35.5884, latency: 1.40495e+06 -[data read] throughputRate: 37.666, moduleLatency:26.5491 -[preprocess] throughputRate: 37.5823, moduleLatency: 26.6802 -[infer] throughputRate: 35.6308 Interface throughputRate: 37.941, moduleLatency: 27.3942 -[post] throughputRate: 35.6308, moduleLatency: 28.0656 -``` - -batch_size=1 Ascend310单卡吞吐率:37.941*4=151.764 fps - - - -**2. batch_size=4** - -``` -[e2e] throughputRate: 37.4274, latency: 1.33592e+06 -[data read] throughputRate: 39.6399, moduleLatency: 25.2271 -[preprocess] throughputRate: 39.5442, moduleLatency: 25.2882 -[infer] throughputRate: 37.4711, Interface throughputRate: 40.477, moduleLatency: 26.1715 -[post] throughputRate: 9.36777, moduleLatency: 106.749 -``` - -batch_size=4 Ascend310单卡吞吐率:40.477*4=161.908 fps - - - -**3. batch_size=8** - -``` -[e2e] throughputRate: 34.8915, latency: 1.43301e+06 -[data read] throughputRate: 36.8978, moduleLatency: 27.1019 -[preprocess] throughputRate: 36.8307, moduleLatency: 27.1513 -[infer] throughputRate: 34.9252, Interface throughputRate: 38.3992, moduleLatency: 27.4573 -[post] throughputRate: 4.36564, moduleLatency: 229.062 -``` - -batch_size=16 Ascend310单卡吞吐率:38.3992*4=153.5968 fps - - - -**4. batch_size=16** - -``` -[e2e] throughputRate: 34.3406, latency: 1.456e+06 -[data read] throughputRate: 36.3651, moduleLatency: 27.4989 -[preprocess] throughputRate: 36.2989, moduleLatency: 27.5491 -[infer] throughputRate: 34.378, Interface throughputRate: 36.9249, moduleLatency: 28.4462 -[post] throughputRate: 2.14862, moduleLatency: 465.415 -``` - -batch_size=16 Ascend310单卡吞吐率:36.9249*4=147.6996 fps - - - -**5. batch_size=32** - -``` -[e2e] throughputRate: 33.136, latency: 1.50893e+06 -[data read] throughputRate: 35.0612, moduleLatency: 28.5215 -[preprocess] throughputRate: 34.9918, moduleLatency: 28.5781 -[infer] throughputRate: 33.1637, Interface throughputRate: 36.1795, moduleLatency: 28.9776 -[post] throughputRate: 1.03669, moduleLatency: 964.608 -``` - -batch_size=16 Ascend310单卡吞吐率:36.1795*4=144.718 fps - - - -### 7.2 npu性能优化 - -云盘:[model_best_bs1_sim.om](https://pan.baidu.com/s/1bMuSj4PbvuYE-pX2j_e-0Q),提取码:ad5f - -[model_best_bs16_sim.om](https://pan.baidu.com/s/11gYb6RpBbuaEL-aIql2qkg),提取码:jiev - -**1. batch_size=1** - -``` -[e2e] throughputRate: 40.7217, latency: 1.22785e+06 -[data read] throughputRate: 43.0838, moduleLatency: 23.2106 -[preprocess] throughputRate: 42.997, moduleLatency: 23.2575 -[infer] throughputRate: 40.769, Interface throughputRate: 44.0188, moduleLatency: 23.7945 -[post] throughputRate: 40.769, moduleLatency: 24.5285 -``` - -batch_size=1 Ascend310单卡吞吐率:44.0188*4=176.0752 fps - -**2. batch_size=16** - -``` -[e2e] throughputRate: 51.2825, latency: 974992 -[data read] throughputRate: 54.323, moduleLatency: 18.4084 -[preprocess] throughputRate: 54.1712, moduleLatency: 18.46 -[infer] throughputRate: 51.3613, Interface throughputRate: 57.8179, moduleLatency: 18.6629 -[post] throughputRate: 3.21005, moduleLatency: 311.521 -``` - -batch_size=16 Ascend310单卡吞吐率:57.8179*4=231.2716 fps - -### 7.3 npu性能优化前后对比 - -| batch size | 优化前 | 优化后 | -| :--------: | :------: | :------: | -| 1 | 151.764 | 176.0752 | -| 16 | 147.6996 | 231.2716 | - - - -### 7.4 gpu和npu性能对比 - -| batch size | GPU(FPS) | NPU(FPS) | -| :--------: | -------- | -------- | -| 1 | 290.41 | 176.0752 | -| 16 | 559.35 | 231.2716 | - - - +# LV-Vit Onnx模型端到端推理指导 + ++ [1模型概述](#1 模型概述) + + + [1.1 论文地址](##1.1 论文地址) + + [1.2 代码地址](##1.2 代码地址) + ++ [2 环境说明](#2 环境说明) + + + [2.1 深度学习框架](##2.1 深度学习框架) + + [2.2 python第三方库](##2.2 python第三方库) + ++ [3 模型转换](#3 模型转换) + + + [3.1 pth转onnx模型](##3.1 pth转onnx模型) + + [3.2 onnx转om模型](##3.2 onnx转om模型) + ++ [4 数据集预处理](#4 数据集预处理) + + + [4.1 数据集获取](##4.1 数据集获取) + + [4.2 数据集预处理](##4.2 数据集预处理) + + [4.3 生成预处理数据集信息文件](##4.3 生成预处理数据集信息文件) + ++ [5 离线推理](#5 离线推理) + + + [5.1 benchmark工具概述](##5.1 benchmark工具概述) + + [5.2 离线推理](##5.2 离线推理) + ++ [6 精度对比](#6 精度对比) + + + [6.1 离线推理精度统计](##6.1 离线推理精度统计) + + [6.2 开源精度](##6.2 开源精度) + + [6.3 精度对比](##6.3 精度对比) + ++ [7 性能对比](#7 性能对比) + + + [7.1 npu性能数据](##7.1 npu性能数据) + + [7.2 gpu和npu性能对比](##7.2 gpu和npu性能对比) + + + +## 1 模型概述 + +### 1.1 论文地址 + +[LV-Vit论文](https://arxiv.org/abs/2104.10858 ) + +### 1.2 代码地址 + +[LV-Vit代码](https://github.com/zihangJiang/TokenLabeling ) + + + +## 2 环境说明 + +### 2.1 深度学习框架 + +``` +torch==1.8.0 +torchvision==0.9.0 +onnx==1.10.1 +onnx-simplifier==0.3.6 +``` + +### 2.2 python第三方库 + +``` +numpy==1.21.2 +pyyaml==5.4.1 +pillow==8.3.1 +timm==0.4.5 +scipy==0.24.2 +``` + + + +## 3 模型转换 + +### 3.1 pth转onnx模型 + +1.LV-Vit模型代码下载 + +```bash +# 切换到工作目录 +cd LV-Vit + +git clone https://github.com/zihangJiang/TokenLabeling.git +cd TokenLabeling +patch -p1 < ../LV-Vit.patch +cd .. +``` + +2.获取模型权重,并放在工作目录的model文件夹下 +在model/下已经存放了在gpu8p上训练得到的pth,如需下载官方pth,则执行以下代码 +```bash +wget https://github.com/zihangJiang/TokenLabeling/releases/download/1.0/lvvit_s-26M-224-83.3.pth.tar +mv lvvit_s-26M-224-83.3.pth.tar model_best.pth.tar + +rm ./model/model_best.pth.tar +mv model_best.pth.tar ./model/ +``` + + + +3.使用 LV_Vit_pth2onnx.py 脚本将pth模型文件转为onnx模型文件 + ++ 参数1:pth模型权重的路径 + ++ 参数2:onnx模型权重的存储路径 + ++ 参数3:batch size + +```bash. +python LV_Vit_pth2onnx.py ./model/model_best.pth.tar ./model/model_best_bs1.onnx 1 +python LV_Vit_pth2onnx.py ./model/model_best.pth.tar ./model/model_best_bs16.onnx 16 +``` + +4.使用 onnxsim 工具优化onnx模型 + ++ 参数1:输入的shape ++ 参数2:onnx模型权重的存储路径 ++ 参数3:优化后onnx模型权重的存储路径 + +``` +python -m onnxsim --input-shape="1,3,224,224" ./model/model_best_bs1.onnx ./model/model_best_bs1_sim.onnx +python -m onnxsim --input-shape="16,3,224,224" ./model/model_best_bs16.onnx ./model/model_best_bs16_sim.onnx +``` + +5.使用tensorRT工具测试onnx模型性能 + +请自行软链接trtexec工具 + +``` +./trtexec --onnx=model/model_best_bs1_sim.onnx --fp16 --shapes=image:1x3x112x112 --device=0 > sim_onnx_bs1.log +./trtexec --onnx=model/model_best_bs16_sim.onnx --fp16 --shapes=image:16x3x112x112 --device=0 > sim_onnx_bs16.log +``` + + + +### 3.2 onnx转om模型 + +1.设置环境变量 + +```bash +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest +``` + +2.使用 atc 将 onnx 模型转换为 om 模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) + +请注意,为了优化softmax算子,在其前后添加了transpose算子,故一并优化transpose,须在白名单中添加(batch_size,6,197,197)和 +(batch_size,197,197,6) + +路径:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/opp/op_impl/built-in/ai_core/tbe/impl/dynamic/transpose.py + +```bash +atc --framework=5 --model=./model/model_best_bs1_sim.onnx --output=./model/model_best_bs1_sim --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 + +atc --framework=5 --model=./model/model_best_bs16_sim.onnx --output=./model/model_best_bs16_sim --input_format=NCHW --input_shape="image:16,3,224,224" --log=debug --soc_version=Ascend310 +``` + + + +## 4 数据集预处理 + +### 4.1 数据集获取 + +获取imagenet纯验证数据集,放在该目录:/opt/npu/imagenet/PureVal/ + + + +### 4.2 数据集预处理 + +执行预处理脚本,会在工作目录的data目录下生成数据集预处理后的 bin 文件和 数据集信息文件 + +LV_Vit_preprocess.py: ++ --src_path: imagenet纯验证集路径; --save_path: bin文件存放路径 + +gen_dataset_info.py ++ 参数1:bin文件 ++ 参数2:数据bin文件存放目录 + +``` +python LV_Vit_preprocess.py --src_path /opt/npu/imagenet/PureVal/ --save_path ./data/prep_dataset; +python gen_dataset_info.py ./data/prep_dataset ./data/lvvit_prep_bin.info; +``` + + +## 5 离线推理 + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) + +### 5.2 离线推理 + +1.设置环境变量 + +```bash +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest/ +``` + +2.执行离线推理, 输出结果默认保存在当前目录result/dumpOutput_device0 + +```bash +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./model/model_best_bs1_sim.om -input_text_path=lvvit_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False + +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=./model/model_best_bs16_sim.om -input_text_path=lvvit_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False +``` + + + +## 6 精度对比 + +### 6.1 离线推理精度统计 + +执行后处理脚本统计om模型推理结果的Accuracy + ++ 参数1:om模型预测结果目录 ++ 参数2:imagenet纯验证集标签 + +```shell +python LV_Vit_postprocess.py ./result/dumpOutput_device0 ./data/val.txt +``` + +控制台输出如下信息 + +``` +accuracy: 0.8317 +``` + + + +### 6.2 开源精度 + +源代码仓公布精度 + +``` +Model Dataset Accuracy +LV-Vit imagenet 0.833 +``` + + + +### 6.3 精度对比 + +将得到的om离线模型推理Accuracy与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + + + +## 7 性能对比 + +### 7.1 npu性能数据 + +**1. batch_size=1** + +``` +[e2e] throughputRate: 35.5884, latency: 1.40495e+06 +[data read] throughputRate: 37.666, moduleLatency:26.5491 +[preprocess] throughputRate: 37.5823, moduleLatency: 26.6802 +[infer] throughputRate: 35.6308 Interface throughputRate: 37.941, moduleLatency: 27.3942 +[post] throughputRate: 35.6308, moduleLatency: 28.0656 +``` + +batch_size=1 Ascend310单卡吞吐率:37.941*4=151.764 fps + + + +**2. batch_size=4** + +``` +[e2e] throughputRate: 37.4274, latency: 1.33592e+06 +[data read] throughputRate: 39.6399, moduleLatency: 25.2271 +[preprocess] throughputRate: 39.5442, moduleLatency: 25.2882 +[infer] throughputRate: 37.4711, Interface throughputRate: 40.477, moduleLatency: 26.1715 +[post] throughputRate: 9.36777, moduleLatency: 106.749 +``` + +batch_size=4 Ascend310单卡吞吐率:40.477*4=161.908 fps + + + +**3. batch_size=8** + +``` +[e2e] throughputRate: 34.8915, latency: 1.43301e+06 +[data read] throughputRate: 36.8978, moduleLatency: 27.1019 +[preprocess] throughputRate: 36.8307, moduleLatency: 27.1513 +[infer] throughputRate: 34.9252, Interface throughputRate: 38.3992, moduleLatency: 27.4573 +[post] throughputRate: 4.36564, moduleLatency: 229.062 +``` + +batch_size=16 Ascend310单卡吞吐率:38.3992*4=153.5968 fps + + + +**4. batch_size=16** + +``` +[e2e] throughputRate: 34.3406, latency: 1.456e+06 +[data read] throughputRate: 36.3651, moduleLatency: 27.4989 +[preprocess] throughputRate: 36.2989, moduleLatency: 27.5491 +[infer] throughputRate: 34.378, Interface throughputRate: 36.9249, moduleLatency: 28.4462 +[post] throughputRate: 2.14862, moduleLatency: 465.415 +``` + +batch_size=16 Ascend310单卡吞吐率:36.9249*4=147.6996 fps + + + +**5. batch_size=32** + +``` +[e2e] throughputRate: 33.136, latency: 1.50893e+06 +[data read] throughputRate: 35.0612, moduleLatency: 28.5215 +[preprocess] throughputRate: 34.9918, moduleLatency: 28.5781 +[infer] throughputRate: 33.1637, Interface throughputRate: 36.1795, moduleLatency: 28.9776 +[post] throughputRate: 1.03669, moduleLatency: 964.608 +``` + +batch_size=16 Ascend310单卡吞吐率:36.1795*4=144.718 fps + + + +### 7.2 npu性能优化 + +云盘:[model_best_bs1_sim.om](https://pan.baidu.com/s/1bMuSj4PbvuYE-pX2j_e-0Q),提取码:ad5f + +[model_best_bs16_sim.om](https://pan.baidu.com/s/11gYb6RpBbuaEL-aIql2qkg),提取码:jiev + +**1. batch_size=1** + +``` +[e2e] throughputRate: 40.7217, latency: 1.22785e+06 +[data read] throughputRate: 43.0838, moduleLatency: 23.2106 +[preprocess] throughputRate: 42.997, moduleLatency: 23.2575 +[infer] throughputRate: 40.769, Interface throughputRate: 44.0188, moduleLatency: 23.7945 +[post] throughputRate: 40.769, moduleLatency: 24.5285 +``` + +batch_size=1 Ascend310单卡吞吐率:44.0188*4=176.0752 fps + +**2. batch_size=16** + +``` +[e2e] throughputRate: 51.2825, latency: 974992 +[data read] throughputRate: 54.323, moduleLatency: 18.4084 +[preprocess] throughputRate: 54.1712, moduleLatency: 18.46 +[infer] throughputRate: 51.3613, Interface throughputRate: 57.8179, moduleLatency: 18.6629 +[post] throughputRate: 3.21005, moduleLatency: 311.521 +``` + +batch_size=16 Ascend310单卡吞吐率:57.8179*4=231.2716 fps + +### 7.3 npu性能优化前后对比 + +| batch size | 优化前 | 优化后 | +| :--------: | :------: | :------: | +| 1 | 151.764 | 176.0752 | +| 16 | 147.6996 | 231.2716 | + + + +### 7.4 gpu和npu性能对比 + +| batch size | GPU(FPS) | NPU(FPS) | +| :--------: | -------- | -------- | +| 1 | 290.41 | 176.0752 | +| 16 | 559.35 | 231.2716 | + + + diff --git a/ACL_PyTorch/contrib/cv/classfication/LV-Vit/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/classfication/LV-Vit/gen_dataset_info.py index 5415fd9ae55679e3eb072ffe0930925e061d4098..0ef2e2354d872b36e15a4b748793267b0c96e564 100644 --- a/ACL_PyTorch/contrib/cv/classfication/LV-Vit/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/LV-Vit/gen_dataset_info.py @@ -1,31 +1,31 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys - - -def main(): - src_path = sys.argv[1] - output_file = sys.argv[2] - in_files = sorted(os.listdir(src_path)) - i = 0 - with open(output_file, mode='w') as f: - for file in in_files: - f.write(str(i) + ' ' + src_path + file + ' 224 224\n') - i += 1 - - -if __name__ == '__main__': - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + + +def main(): + src_path = sys.argv[1] + output_file = sys.argv[2] + in_files = sorted(os.listdir(src_path)) + i = 0 + with open(output_file, mode='w') as f: + for file in in_files: + f.write(str(i) + ' ' + src_path + file + ' 224 224\n') + i += 1 + + +if __name__ == '__main__': + main() diff --git a/ACL_PyTorch/contrib/cv/classfication/LV-Vit/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/LV-Vit/modelzoo_level.txt index 9e95396651cc4382fe60ee1ee053674f527a448c..27e6c78b37535fe4f5a17029546fe257ad164d34 100644 --- a/ACL_PyTorch/contrib/cv/classfication/LV-Vit/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/LV-Vit/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:POK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/LV-Vit/test/README.md b/ACL_PyTorch/contrib/cv/classfication/LV-Vit/test/README.md index cf67d697df934347675c939256ed24168315f550..23be8e80830a2ac71d368036026d1ff5acd726aa 100644 --- a/ACL_PyTorch/contrib/cv/classfication/LV-Vit/test/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/LV-Vit/test/README.md @@ -1,77 +1,77 @@ -# Shell 脚本 说明 - -**必要目录结构及说明 (Ascend310)** - -``` -|—— TokenLabeling # 源码目录 -|-- LV-Vit.patch # 源码补丁 -|-- test -|------ eval_acc_perf.sh # 预处理推理后处理一条龙 -|------ pth2om.sh # pth转onnx,onnx转om脚本 -|—— data # 用于存放imagenet验证集二进制文件 -|------ val.txt # imagenet 纯验证集标签 -|—— model -|——---- model_best.pth.tar # 模型权重 -|—— env.sh # NPU环境变量脚本 -|—— LV_Vit_postprocess.py # 后处理脚本 -|—— LV_Vit_preprocess.py # 预处理脚本 -|—— LV_Vit_pth2onnx.py # pth转onnx脚本 -|-- benchmark.x86_64 # benckmark工具 -``` - -**step1:准备阶段修改源码** - -```bash -git clone https://github.com/zihangJiang/TokenLabeling.git -cd TokenLabeling -patch -p1 < ../LV-Vit.patch -cd .. -``` - -**step2:获取模型权重,并放在工作目录的model文件夹下** - -```bash -wget https://github.com/zihangJiang/TokenLabeling/releases/download/1.0/lvvit_s-26M-224-83.3.pth.tar - -mv lvvit_s-26M-224-83.3.pth.tar ./model/model_best.pth.tar -``` - -**step3:获取imagenet纯验证数据集,放在该目录** - -/opt/npu/imagenet/PureVal/ - - -**1.pth转om模型** - -```shell -bash test/pth2om.sh -``` - -**2.npu性能数据及精度数据** - ---datasets_path=imagenet纯验证集路径 - -```shell -bash test/eval_acc_perf.sh --datasets_path=/opt/npu/imagenet/PureVal/ -``` - -**必要目录结构及说明 (t4)** - -onnx模型权重由第一步 pth转om 模型生成在 model 文件夹下 -请自行软链接trtexec工具! - -``` -|-- test -|------ pref_gpu.sh # onnx性能数据脚本 -|—— model -|——---- model_best_bs1_sim.onnx # bs=1 模型权重 -|——---- model_best_bs16_sim.onnx # bs=16 模型权重 -|-- trtexec # trtexec工具 -``` - -**3.测试t4性能数据** - -``` -bash test/pref_gpu.sh -``` - +# Shell 脚本 说明 + +**必要目录结构及说明 (Ascend310)** + +``` +|—— TokenLabeling # 源码目录 +|-- LV-Vit.patch # 源码补丁 +|-- test +|------ eval_acc_perf.sh # 预处理推理后处理一条龙 +|------ pth2om.sh # pth转onnx,onnx转om脚本 +|—— data # 用于存放imagenet验证集二进制文件 +|------ val.txt # imagenet 纯验证集标签 +|—— model +|——---- model_best.pth.tar # 模型权重 +|—— env.sh # NPU环境变量脚本 +|—— LV_Vit_postprocess.py # 后处理脚本 +|—— LV_Vit_preprocess.py # 预处理脚本 +|—— LV_Vit_pth2onnx.py # pth转onnx脚本 +|-- benchmark.x86_64 # benckmark工具 +``` + +**step1:准备阶段修改源码** + +```bash +git clone https://github.com/zihangJiang/TokenLabeling.git +cd TokenLabeling +patch -p1 < ../LV-Vit.patch +cd .. +``` + +**step2:获取模型权重,并放在工作目录的model文件夹下** + +```bash +wget https://github.com/zihangJiang/TokenLabeling/releases/download/1.0/lvvit_s-26M-224-83.3.pth.tar + +mv lvvit_s-26M-224-83.3.pth.tar ./model/model_best.pth.tar +``` + +**step3:获取imagenet纯验证数据集,放在该目录** + +/opt/npu/imagenet/PureVal/ + + +**1.pth转om模型** + +```shell +bash test/pth2om.sh +``` + +**2.npu性能数据及精度数据** + +--datasets_path=imagenet纯验证集路径 + +```shell +bash test/eval_acc_perf.sh --datasets_path=/opt/npu/imagenet/PureVal/ +``` + +**必要目录结构及说明 (t4)** + +onnx模型权重由第一步 pth转om 模型生成在 model 文件夹下 +请自行软链接trtexec工具! + +``` +|-- test +|------ pref_gpu.sh # onnx性能数据脚本 +|—— model +|——---- model_best_bs1_sim.onnx # bs=1 模型权重 +|——---- model_best_bs16_sim.onnx # bs=16 模型权重 +|-- trtexec # trtexec工具 +``` + +**3.测试t4性能数据** + +``` +bash test/pref_gpu.sh +``` + diff --git a/ACL_PyTorch/contrib/cv/classfication/MobileNet-v1/mobilenet-v1_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/MobileNet-v1/mobilenet-v1_pth2onnx.py index 82981e308d3bcfae6f54b409d7d21c53f9e39761..f254c26ddfc1cf0b7fafd9d8a34eeda0813d9609 100644 --- a/ACL_PyTorch/contrib/cv/classfication/MobileNet-v1/mobilenet-v1_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/MobileNet-v1/mobilenet-v1_pth2onnx.py @@ -1,107 +1,107 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from collections import OrderedDict - -import torch -import torch.nn as nn -import torch.onnx - - -class Net(nn.Module): - def __init__(self): - super(Net, self).__init__() - - def conv_bn(inp, oup, stride): - return nn.Sequential( - nn.Conv2d(inp, oup, 3, stride, 1, bias=False), - nn.BatchNorm2d(oup), - nn.ReLU(inplace=True) - ) - - def conv_dw(inp, oup, stride): - return nn.Sequential( - nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), - nn.BatchNorm2d(inp), - nn.ReLU(inplace=True), - - nn.Conv2d(inp, oup, 1, 1, 0, bias=False), - nn.BatchNorm2d(oup), - nn.ReLU(inplace=True), - ) - - self.model = nn.Sequential( - conv_bn(3, 32, 2), - conv_dw(32, 64, 1), - conv_dw(64, 128, 2), - conv_dw(128, 128, 1), - conv_dw(128, 256, 2), - conv_dw(256, 256, 1), - conv_dw(256, 512, 2), - conv_dw(512, 512, 1), - conv_dw(512, 512, 1), - conv_dw(512, 512, 1), - conv_dw(512, 512, 1), - conv_dw(512, 512, 1), - conv_dw(512, 1024, 2), - conv_dw(1024, 1024, 1), - nn.AvgPool2d(7), - ) - self.fc = nn.Linear(1024, 1000) - - def forward(self, x): - x = self.model(x) - x = x.view(-1, 1024) - x = self.fc(x) - return x - - -def proc_nodes_module(checkpoint, AttrName): - new_state_dict = OrderedDict() - for k, v in checkpoint[AttrName].items(): - if k[0:7] == "module.": - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - - -def convert_model_to_onnx(model_state, output_file): - model = Net() - if model_state: - model.load_state_dict(model_state) - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(32, 3, 224, 224) # (batch_size, channels, width, height) - torch.onnx.export(model, dummy_input, output_file, input_names=input_names, dynamic_axes=dynamic_axes, - output_names=output_names, opset_version=11, verbose=True) - - -if __name__ == '__main__': - checkpoint_file = sys.argv[1] - output_file = sys.argv[2] - - if os.path.isfile(checkpoint_file): - checkpoint = torch.load(checkpoint_file, map_location='cpu') - print("{} successfully loaded.".format(checkpoint_file)) - model_state = proc_nodes_module(checkpoint, 'state_dict') - else: - print("Failed to load checkpoint from {}! Output model with initial state.".format(checkpoint_file)) - model_state = OrderedDict() - convert_model_to_onnx(model_state, output_file) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from collections import OrderedDict + +import torch +import torch.nn as nn +import torch.onnx + + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + + def conv_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True) + ) + + def conv_dw(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + nn.ReLU(inplace=True), + + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True), + ) + + self.model = nn.Sequential( + conv_bn(3, 32, 2), + conv_dw(32, 64, 1), + conv_dw(64, 128, 2), + conv_dw(128, 128, 1), + conv_dw(128, 256, 2), + conv_dw(256, 256, 1), + conv_dw(256, 512, 2), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 1024, 2), + conv_dw(1024, 1024, 1), + nn.AvgPool2d(7), + ) + self.fc = nn.Linear(1024, 1000) + + def forward(self, x): + x = self.model(x) + x = x.view(-1, 1024) + x = self.fc(x) + return x + + +def proc_nodes_module(checkpoint, AttrName): + new_state_dict = OrderedDict() + for k, v in checkpoint[AttrName].items(): + if k[0:7] == "module.": + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + + +def convert_model_to_onnx(model_state, output_file): + model = Net() + if model_state: + model.load_state_dict(model_state) + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(32, 3, 224, 224) # (batch_size, channels, width, height) + torch.onnx.export(model, dummy_input, output_file, input_names=input_names, dynamic_axes=dynamic_axes, + output_names=output_names, opset_version=11, verbose=True) + + +if __name__ == '__main__': + checkpoint_file = sys.argv[1] + output_file = sys.argv[2] + + if os.path.isfile(checkpoint_file): + checkpoint = torch.load(checkpoint_file, map_location='cpu') + print("{} successfully loaded.".format(checkpoint_file)) + model_state = proc_nodes_module(checkpoint, 'state_dict') + else: + print("Failed to load checkpoint from {}! Output model with initial state.".format(checkpoint_file)) + model_state = OrderedDict() + convert_model_to_onnx(model_state, output_file) diff --git a/ACL_PyTorch/contrib/cv/classfication/MobileNet-v1/test/eval_acc_perf.sh b/ACL_PyTorch/contrib/cv/classfication/MobileNet-v1/test/eval_acc_perf.sh old mode 100755 new mode 100644 diff --git a/ACL_PyTorch/contrib/cv/classfication/MobileNet-v1/test/pth2om.sh b/ACL_PyTorch/contrib/cv/classfication/MobileNet-v1/test/pth2om.sh old mode 100755 new mode 100644 diff --git a/ACL_PyTorch/contrib/cv/classfication/OSNet/README.md b/ACL_PyTorch/contrib/cv/classfication/OSNet/README.md index 2373008b8a055d792549c5d7ed4620af01316b27..160d86240d697e2f03da69a8f8ea7fd2114054bc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/OSNet/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/OSNet/README.md @@ -1,248 +1,248 @@ -# OSNet Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理精度统计](#61-离线推理精度统计) - - [6.2 开源精度](#62-开源精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[OSNet论文](https://arxiv.org/abs/1905.00953) -作为一个实例级的识别问题,行人再识别(ReID)依赖于具有识别能力的特征,它不仅能捕获不同的空间尺度,还能封装多个尺度的任意组合。这些同构和异构尺度的特征为全尺度特征。本文设计了一种新颖的深度CNN,称为全尺度网络(OSNet),用于ReID的全尺度特征学习。这是通过设计一个由多个卷积特征流组成的残差块来实现的,每个残差块检测一定尺度的特征。重要的是,引入了一种新的统一聚合门用输入依赖的每个通道权重进行动态多尺度特征融合。为了有效地学习空间通道相关性,避免过拟合,构建块同时使用点卷积和深度卷积。通过逐层叠加这些块,OSNet非常轻量,可以在现有的ReID基准上从零开始训练。尽管OSNet模型很小,但其在6个Reid数据集上到达了SOTA结果。 - -### 1.2 代码地址 -[OSNet代码](https://github.com/KaiyangZhou/deep-person-reid) -branch:master -commit_id:e580b699c34b6f753a9a06223d840317546c98aa - -## 2 环境说明 - -深度学习框架与第三方库 -``` -pytorch == 1.8.1 -torchvision == 0.9.1 -onnx == 1.7.0 -protobuf==3.13.0 -onnx-simplifier==0.3.6 -isort==4.3.21 -numpy -Cython -h5py -Pillow -six -scipy -matplotlib -opencv-python -tb-nightly -future -yacs -gdown -flake8 -yapf -imageio -``` - -**说明:** -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 - -1.下载pth权重文件 -[OSNet训练pth权重文件(google下载)](https://drive.google.com/file/d/1vduhq5DpN2q1g4fYEZfPI17MJeh9qyrA/view?usp=sharing) -[OSNet训练pth权重文件(百度网盘下载,提取码:gcfe)](https://pan.baidu.com/s/1Xkwa9TCZss_ygkC8obsEMg) -osnet_x1_0_market_256x128_amsgrad_ep150_stp60_lr0.0015_b64_fb10_softmax_labelsmooth_flip.pth - -2.下载OSNet源码: -``` -git clone https://github.com/KaiyangZhou/deep-person-reid.git -cd deep-person-reid/ -# install dependencies -pip install -r requirements.txt -# install torchreid (don't need to re-build it if you modify the source code) -python3.7 setup.py develop -``` -3.编写pth2onnx脚本pth2onnx.py -4.执行pth2onnx脚本,生成onnx模型文件 -``` -python3.7 pth2onnx.py osnet_x1_0_market_256x128_amsgrad_ep150_stp60_lr0.0015_b64_fb10_softmax_labelsmooth_flip.pth osnet_x1_0.onnx # 生成onnx模型文件 -``` -5.对onnx模型进行简化 -``` -python3.7 -m onnxsim osnet_x1_0.onnx osnet_x1_0_bs1_sim.onnx --input-shape 1,3,256,128 # batch_size = 1 -python3.7 -m onnxsim osnet_x1_0.onnx osnet_x1_0_bs16_sim.onnx --input-shape 16,3,256,128 # batch_size = 16 -``` -### 3.2 onnx转om模型 - -1.设置环境变量 -``` -source env.sh -``` -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) -``` -atc --framework=5 --model=./osnet_x1_0_bs1_sim.onnx --input_format=NCHW --input_shape="image:1,3,256,128" --output=osnet_x1_0_bs1 --log=debug --soc_version=Ascend310 # batch_size = 1 -atc --framework=5 --model=./osnet_x1_0_bs16_sim.onnx --input_format=NCHW --input_shape="image:16,3,256,128" --output=osnet_x1_0_bs16 --log=debug --soc_version=Ascend310 # batch_size = 16 -``` - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -该模型使用Market1501数据集进行测试。Market-1501数据集在清华大学校园中采集,夏天拍摄,在2015年构建并公开。它包括由6个摄像头(其中5个高清摄像头和1个低清摄像头)拍摄的1501个行人的32217张图片。每个行人至少由2个摄像头捕获到,并且在一个摄像头中可能具有多张图像。 -训练集bounding_box_train有751人,包含12,936张图像,平均每个人有17.2张训练数据; -测试集bounding_box_test有750人,包含19,732张图像,平均每个人有26.3张测试数据; -查询集query有3368张查询图像。 -[Market1501数据集(百度网盘下载,提取码:me3q)](https://pan.baidu.com/s/1Nl8tMEvq-MwNGd1pG4_6bg) -Market1501数据集放在/root/datasets/,并将数据集文件夹命名为market1501。 - -### 4.2 数据集预处理 -1.预处理脚本market1501_torch_preprocess.py -2.执行预处理脚本,生成数据集预处理后的bin文件 -``` -# 处理gallery数据集,即bounding_box_test测试集 -python3.7 market1501_torch_preprocess.py /root/datasets/market1501/bounding_box_test ./gallery_prep_dataset/ -# 处理query数据集 -python3.7 market1501_torch_preprocess.py /root/datasets/market1501/query ./query_prep_dataset/ -``` -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本gen_dataset_info.py -2.执行生成数据集信息脚本,生成gallery和query数据集信息文件 -``` -python3.7 gen_dataset_info.py bin ./gallery_prep_dataset ./gallery_prep_bin.info 128 256 -python3.7 gen_dataset_info.py bin ./query_prep_dataset ./query_prep_bin.info 128 256 -``` -第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) -### 5.2 离线推理 -``` -#对query_prep_bin.info进行处理 -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=osnet_x1_0_bs1.om -input_text_path=./query_prep_bin.info -input_width=128 -input_height=256 -output_binary=False -useDvpp=False -``` -输出结果默认保存在当前目录result/dumpOutput_device0,模型只有一个名为feature的输出,每个输入对应的输出对应一个_x.bin文件。 - -``` -#对gallery_prep_bin.info进行处理 -./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=1 -om_path=osnet_x1_0_bs1.om -input_text_path=./gallery_prep_bin.info -input_width=128 -input_height=256 -output_binary=False -useDvpp=False -``` -输出结果默认保存在当前目录result/dumpOutput_device1,模型只有一个名为feature的输出,每个输入对应的输出对应一个_x.bin文件。 - -## 6 精度对比 - -- **[离线推理精度](#61-离线推理精度)** -- **[开源精度](#62-开源精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理精度统计 -调用osnet_metrics_market1501_bs1.py脚本,可以获得rank1和mAP数据,结果保存在result_bs1.json中。 -``` -python3.7 osnet_x1_0_metrics_market1501.py result/dumpOutput_device0/ result/dumpOutput_device1/ ./ result_bs1.json -``` -第一个为benchmark输出目录,第二个为query数据集配套标签,第三个为gallery数据集配套标签,第四个是生成的文件名。 -查看输出结果: -``` -{"title": "Overall statistical evaluation", "value": [{"key": "R1", "value": "0.94299287"}, {"key": "mAP", "value": "0.8257416732159705"}]} -``` -### 6.2 开源精度 -[OSNet开源代码仓精度](https://kaiyangzhou.github.io/deep-person-reid/MODEL_ZOO) -``` -模型:osnet_x1_0,R1=94.2%,mAP=82.6% -``` -### 6.3 精度对比 -将得到的om离线模型推理结果R1、mAP进行比较,与该模型github代码仓上公布的精度对比,R1比代码仓结果略高,mAP下降在1%范围之内,故精度达标。 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** - -### 7.1 npu性能数据 -batch1的性能: - 测试npu性能要确保device空闲,使用npu-smi info命令可查看device是否在运行其它推理任务 -``` -./benchmark.x86_64 -round=50 -om_path=osnet_x1_0_bs1.om -device_id=0 -batch_size=1 -``` -执行50次纯推理取均值,统计吞吐率与其倒数时延(benchmark的时延是单个数据的推理时间),npu性能是一个device执行的结果 -``` -[INFO] Dataset number: 49 finished cost 4.174ms -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_osnet_x1_0_bs1_sim_in_device_0.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 240.622samples/s, ave_latency: 4.24716ms -``` -batch1 310单卡吞吐率:240.622×4=962.488fps -batch16的性能: -``` -./benchmark.x86_64 -round=50 -om_path=osnet_x1_0_bs16.om -device_id=2 -batch_size=16 -``` -得到batch16的性能为: -``` -[INFO] Dataset number: 49 finished cost 24.885ms -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_osnet_x1_0_bs16_sim_in_device_2.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 643.052samples/s, ave_latency: 1.55994ms -``` -batch16 310单卡吞吐率:643.052×4=2572.208fps -batch4的性能: -``` -[INFO] Dataset number: 49 finished cost 6.434ms -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_osnet_x1_0_bs4_in_device_0.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 604.718samples/s, ave_latency: 1.68188ms -``` -batch4 310单卡吞吐率:604.718×4=2418.872fps -batch8的性能: -``` -[INFO] Dataset number: 49 finished cost 11.107ms -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_osnet_x1_0_bs8_in_device_0.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 715.699samples/s, ave_latency: 1.41114ms -``` -batch8 310单卡吞吐率:715.699×4=2862.796fps -batch32的性能: -``` -[INFO] Dataset number: 49 finished cost 50.178ms -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_osnet_x1_0_bs32_in_device_0.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 632.875samples/s, ave_latency: 1.58384ms -``` -batch32 310单卡吞吐率:632.875×4=2531.5fps +# OSNet Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理精度统计](#61-离线推理精度统计) + - [6.2 开源精度](#62-开源精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[OSNet论文](https://arxiv.org/abs/1905.00953) +作为一个实例级的识别问题,行人再识别(ReID)依赖于具有识别能力的特征,它不仅能捕获不同的空间尺度,还能封装多个尺度的任意组合。这些同构和异构尺度的特征为全尺度特征。本文设计了一种新颖的深度CNN,称为全尺度网络(OSNet),用于ReID的全尺度特征学习。这是通过设计一个由多个卷积特征流组成的残差块来实现的,每个残差块检测一定尺度的特征。重要的是,引入了一种新的统一聚合门用输入依赖的每个通道权重进行动态多尺度特征融合。为了有效地学习空间通道相关性,避免过拟合,构建块同时使用点卷积和深度卷积。通过逐层叠加这些块,OSNet非常轻量,可以在现有的ReID基准上从零开始训练。尽管OSNet模型很小,但其在6个Reid数据集上到达了SOTA结果。 + +### 1.2 代码地址 +[OSNet代码](https://github.com/KaiyangZhou/deep-person-reid) +branch:master +commit_id:e580b699c34b6f753a9a06223d840317546c98aa + +## 2 环境说明 + +深度学习框架与第三方库 +``` +pytorch == 1.8.1 +torchvision == 0.9.1 +onnx == 1.7.0 +protobuf==3.13.0 +onnx-simplifier==0.3.6 +isort==4.3.21 +numpy +Cython +h5py +Pillow +six +scipy +matplotlib +opencv-python +tb-nightly +future +yacs +gdown +flake8 +yapf +imageio +``` + +**说明:** +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 + +1.下载pth权重文件 +[OSNet训练pth权重文件(google下载)](https://drive.google.com/file/d/1vduhq5DpN2q1g4fYEZfPI17MJeh9qyrA/view?usp=sharing) +[OSNet训练pth权重文件(百度网盘下载,提取码:gcfe)](https://pan.baidu.com/s/1Xkwa9TCZss_ygkC8obsEMg) +osnet_x1_0_market_256x128_amsgrad_ep150_stp60_lr0.0015_b64_fb10_softmax_labelsmooth_flip.pth + +2.下载OSNet源码: +``` +git clone https://github.com/KaiyangZhou/deep-person-reid.git +cd deep-person-reid/ +# install dependencies +pip install -r requirements.txt +# install torchreid (don't need to re-build it if you modify the source code) +python3.7 setup.py develop +``` +3.编写pth2onnx脚本pth2onnx.py +4.执行pth2onnx脚本,生成onnx模型文件 +``` +python3.7 pth2onnx.py osnet_x1_0_market_256x128_amsgrad_ep150_stp60_lr0.0015_b64_fb10_softmax_labelsmooth_flip.pth osnet_x1_0.onnx # 生成onnx模型文件 +``` +5.对onnx模型进行简化 +``` +python3.7 -m onnxsim osnet_x1_0.onnx osnet_x1_0_bs1_sim.onnx --input-shape 1,3,256,128 # batch_size = 1 +python3.7 -m onnxsim osnet_x1_0.onnx osnet_x1_0_bs16_sim.onnx --input-shape 16,3,256,128 # batch_size = 16 +``` +### 3.2 onnx转om模型 + +1.设置环境变量 +``` +source env.sh +``` +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) +``` +atc --framework=5 --model=./osnet_x1_0_bs1_sim.onnx --input_format=NCHW --input_shape="image:1,3,256,128" --output=osnet_x1_0_bs1 --log=debug --soc_version=Ascend310 # batch_size = 1 +atc --framework=5 --model=./osnet_x1_0_bs16_sim.onnx --input_format=NCHW --input_shape="image:16,3,256,128" --output=osnet_x1_0_bs16 --log=debug --soc_version=Ascend310 # batch_size = 16 +``` + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +该模型使用Market1501数据集进行测试。Market-1501数据集在清华大学校园中采集,夏天拍摄,在2015年构建并公开。它包括由6个摄像头(其中5个高清摄像头和1个低清摄像头)拍摄的1501个行人的32217张图片。每个行人至少由2个摄像头捕获到,并且在一个摄像头中可能具有多张图像。 +训练集bounding_box_train有751人,包含12,936张图像,平均每个人有17.2张训练数据; +测试集bounding_box_test有750人,包含19,732张图像,平均每个人有26.3张测试数据; +查询集query有3368张查询图像。 +[Market1501数据集(百度网盘下载,提取码:me3q)](https://pan.baidu.com/s/1Nl8tMEvq-MwNGd1pG4_6bg) +Market1501数据集放在/root/datasets/,并将数据集文件夹命名为market1501。 + +### 4.2 数据集预处理 +1.预处理脚本market1501_torch_preprocess.py +2.执行预处理脚本,生成数据集预处理后的bin文件 +``` +# 处理gallery数据集,即bounding_box_test测试集 +python3.7 market1501_torch_preprocess.py /root/datasets/market1501/bounding_box_test ./gallery_prep_dataset/ +# 处理query数据集 +python3.7 market1501_torch_preprocess.py /root/datasets/market1501/query ./query_prep_dataset/ +``` +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本gen_dataset_info.py +2.执行生成数据集信息脚本,生成gallery和query数据集信息文件 +``` +python3.7 gen_dataset_info.py bin ./gallery_prep_dataset ./gallery_prep_bin.info 128 256 +python3.7 gen_dataset_info.py bin ./query_prep_dataset ./query_prep_bin.info 128 256 +``` +第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) +### 5.2 离线推理 +``` +#对query_prep_bin.info进行处理 +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=osnet_x1_0_bs1.om -input_text_path=./query_prep_bin.info -input_width=128 -input_height=256 -output_binary=False -useDvpp=False +``` +输出结果默认保存在当前目录result/dumpOutput_device0,模型只有一个名为feature的输出,每个输入对应的输出对应一个_x.bin文件。 + +``` +#对gallery_prep_bin.info进行处理 +./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=1 -om_path=osnet_x1_0_bs1.om -input_text_path=./gallery_prep_bin.info -input_width=128 -input_height=256 -output_binary=False -useDvpp=False +``` +输出结果默认保存在当前目录result/dumpOutput_device1,模型只有一个名为feature的输出,每个输入对应的输出对应一个_x.bin文件。 + +## 6 精度对比 + +- **[离线推理精度](#61-离线推理精度)** +- **[开源精度](#62-开源精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理精度统计 +调用osnet_metrics_market1501_bs1.py脚本,可以获得rank1和mAP数据,结果保存在result_bs1.json中。 +``` +python3.7 osnet_x1_0_metrics_market1501.py result/dumpOutput_device0/ result/dumpOutput_device1/ ./ result_bs1.json +``` +第一个为benchmark输出目录,第二个为query数据集配套标签,第三个为gallery数据集配套标签,第四个是生成的文件名。 +查看输出结果: +``` +{"title": "Overall statistical evaluation", "value": [{"key": "R1", "value": "0.94299287"}, {"key": "mAP", "value": "0.8257416732159705"}]} +``` +### 6.2 开源精度 +[OSNet开源代码仓精度](https://kaiyangzhou.github.io/deep-person-reid/MODEL_ZOO) +``` +模型:osnet_x1_0,R1=94.2%,mAP=82.6% +``` +### 6.3 精度对比 +将得到的om离线模型推理结果R1、mAP进行比较,与该模型github代码仓上公布的精度对比,R1比代码仓结果略高,mAP下降在1%范围之内,故精度达标。 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** + +### 7.1 npu性能数据 +batch1的性能: + 测试npu性能要确保device空闲,使用npu-smi info命令可查看device是否在运行其它推理任务 +``` +./benchmark.x86_64 -round=50 -om_path=osnet_x1_0_bs1.om -device_id=0 -batch_size=1 +``` +执行50次纯推理取均值,统计吞吐率与其倒数时延(benchmark的时延是单个数据的推理时间),npu性能是一个device执行的结果 +``` +[INFO] Dataset number: 49 finished cost 4.174ms +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_osnet_x1_0_bs1_sim_in_device_0.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 240.622samples/s, ave_latency: 4.24716ms +``` +batch1 310单卡吞吐率:240.622×4=962.488fps +batch16的性能: +``` +./benchmark.x86_64 -round=50 -om_path=osnet_x1_0_bs16.om -device_id=2 -batch_size=16 +``` +得到batch16的性能为: +``` +[INFO] Dataset number: 49 finished cost 24.885ms +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_osnet_x1_0_bs16_sim_in_device_2.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 643.052samples/s, ave_latency: 1.55994ms +``` +batch16 310单卡吞吐率:643.052×4=2572.208fps +batch4的性能: +``` +[INFO] Dataset number: 49 finished cost 6.434ms +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_osnet_x1_0_bs4_in_device_0.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 604.718samples/s, ave_latency: 1.68188ms +``` +batch4 310单卡吞吐率:604.718×4=2418.872fps +batch8的性能: +``` +[INFO] Dataset number: 49 finished cost 11.107ms +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_osnet_x1_0_bs8_in_device_0.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 715.699samples/s, ave_latency: 1.41114ms +``` +batch8 310单卡吞吐率:715.699×4=2862.796fps +batch32的性能: +``` +[INFO] Dataset number: 49 finished cost 50.178ms +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_osnet_x1_0_bs32_in_device_0.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 632.875samples/s, ave_latency: 1.58384ms +``` +batch32 310单卡吞吐率:632.875×4=2531.5fps diff --git a/ACL_PyTorch/contrib/cv/classfication/OSNet/test/README.md b/ACL_PyTorch/contrib/cv/classfication/OSNet/test/README.md index 06c60701b2f4bf68b0a9c863e53b899c71fcb1ba..e27dc3d1ac07b9ed7d515459b578e4c073ada738 100644 --- a/ACL_PyTorch/contrib/cv/classfication/OSNet/test/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/OSNet/test/README.md @@ -1,31 +1,31 @@ -环境准备: - -1.数据集路径 -[Market1501数据集(百度网盘下载,提取码:me3q)](https://pan.baidu.com/s/1Nl8tMEvq-MwNGd1pG4_6bg) -Market1501数据集放在/root/datasets/,并将数据集文件夹命名为market1501。 - -2.进入工作目录 -cd OSNet - -3.安装必要的依赖 -pip3.7 install -r requirements.txt - -4.获取模型代码 -git clone https://github.com/KaiyangZhou/deep-person-reid.git -cd deep-person-reid - -5.加载模型 -python3.7 setup.py develop - -6.获取权重文件 -[OSNet训练pth权重文件(google下载)](https://drive.google.com/file/d/1vduhq5DpN2q1g4fYEZfPI17MJeh9qyrA/view?usp=sharing) -[OSNet训练pth权重文件(百度网盘下载,提取码:gcfe)](https://pan.baidu.com/s/1Xkwa9TCZss_ygkC8obsEMg) -将权重文件osnet_x1_0_market_256x128_amsgrad_ep150_stp60_lr0.0015_b64_fb10_softmax_labelsmooth_flip.pth放于OSNet目录下 - -7.获取benchmark工具 -将benchmark.x86_64 benchmark.aarch64放于OSNet目录下 - -8.310上执行,执行时确保device空闲 -cd OSNet -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets +环境准备: + +1.数据集路径 +[Market1501数据集(百度网盘下载,提取码:me3q)](https://pan.baidu.com/s/1Nl8tMEvq-MwNGd1pG4_6bg) +Market1501数据集放在/root/datasets/,并将数据集文件夹命名为market1501。 + +2.进入工作目录 +cd OSNet + +3.安装必要的依赖 +pip3.7 install -r requirements.txt + +4.获取模型代码 +git clone https://github.com/KaiyangZhou/deep-person-reid.git +cd deep-person-reid + +5.加载模型 +python3.7 setup.py develop + +6.获取权重文件 +[OSNet训练pth权重文件(google下载)](https://drive.google.com/file/d/1vduhq5DpN2q1g4fYEZfPI17MJeh9qyrA/view?usp=sharing) +[OSNet训练pth权重文件(百度网盘下载,提取码:gcfe)](https://pan.baidu.com/s/1Xkwa9TCZss_ygkC8obsEMg) +将权重文件osnet_x1_0_market_256x128_amsgrad_ep150_stp60_lr0.0015_b64_fb10_softmax_labelsmooth_flip.pth放于OSNet目录下 + +7.获取benchmark工具 +将benchmark.x86_64 benchmark.aarch64放于OSNet目录下 + +8.310上执行,执行时确保device空闲 +cd OSNet +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets diff --git a/ACL_PyTorch/contrib/cv/classfication/PAMTRI/PAMTRI_postprocess.py b/ACL_PyTorch/contrib/cv/classfication/PAMTRI/PAMTRI_postprocess.py index aecfa9467e31b0c609e4ccd6ec7b159e348e71f5..99e472dcc8e8d39e6b41646e8251f89a9f04d492 100644 --- a/ACL_PyTorch/contrib/cv/classfication/PAMTRI/PAMTRI_postprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/PAMTRI/PAMTRI_postprocess.py @@ -1,140 +1,140 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from __future__ import print_function -from __future__ import division - - -import argparse -import numpy as np -import torch -from torch.utils.data import DataLoader -from torchreid import models -from torchreid.data_manager import DatasetManager -from torchreid.dataset_loader import ImageDataset -from torchreid.eval_metrics import evaluate -from torchreid import transforms as T -torch.multiprocessing.set_sharing_strategy('file_system') -import os - -def postprocess(ranks=range(1, 51)): - - dataset = DatasetManager(dataset_dir=args.dataset, root=args.root) - - transform_test = T.Compose_Keypt([ - T.Resize_Keypt((256,256)), - T.ToTensor_Keypt(), - T.Normalize_Keypt(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ]) - - queryloader = DataLoader( - ImageDataset(dataset.query, keyptaware=False, heatmapaware=False, - segmentaware=False, - transform=transform_test, imagesize=(256,256)), - batch_size=args.test_batch, shuffle=False, num_workers=args.workers, - drop_last=False, - ) - - galleryloader = DataLoader( - ImageDataset(dataset.gallery, keyptaware=False, heatmapaware=False, - segmentaware=False, - transform=transform_test, imagesize=(256,256)), - batch_size=args.test_batch, shuffle=False, num_workers=args.workers, - drop_last=False, - ) - qf = [] - q_vids = [] - q_camids = [] - for batch_idx, (imgs, vids, camids, vcolors, vtypes, vkeypts) in enumerate(queryloader): - q_vids.extend(vids) - q_camids.extend(camids) - q_vids = np.asarray(q_vids) - q_camids = np.asarray(q_camids) - - for root, folder, files in os.walk(args.queryfeature_path): - files.sort(key=lambda x:int(x.split('_')[0])) - for file in files: - truefile1 = file.split('_')[1] - if truefile1 == "4.bin": #将benckmark推理出的第四个输出"features"读入,features为计算mAP值的特征 - file_path = os.path.join(root, file) - with open(file_path,'rb') as f: - featuresq = np.fromfile(f, dtype="float32") - featuresq = torch.from_numpy(featuresq) - featuresq = featuresq.unsqueeze(0) - featuresq = featuresq.data.cpu() - qf.append(featuresq) - qf = torch.cat(qf, 0) - - print("Extracted features for query set, obtained {}-by-{} matrix".format(qf.size(0), qf.size(1))) - - gf = [] - g_vids = [] - g_camids = [] - for batch_idx, (imgs, vids, camids, vcolors, vtypes, vkeypts) in enumerate(galleryloader): - g_vids.extend(vids) - g_camids.extend(camids) - g_vids = np.asarray(g_vids) - g_camids = np.asarray(g_camids) - - for root, folder, files in os.walk(args.galleryfeature_path): - files.sort(key=lambda x: int(x.split('_')[0])) - for file in files: - truefile2 = file.split('_')[1] - if truefile2== "4.bin": #将benckmark推理出的第四个输出"features"读入,features为计算mAP值的特征 - file_path = os.path.join(root, file) - with open(file_path,'rb') as f: - featuresg = np.fromfile(f, dtype="float32") - featuresg = torch.from_numpy(featuresg) - featuresg = featuresg.unsqueeze(0) - featuresg = featuresg.data.cpu() - gf.append(featuresg) - gf = torch.cat(gf, 0) - - print("Extracted features for gallery set, obtained {}-by-{} matrix".format(gf.size(0), gf.size(1))) - - m, n = qf.size(0), gf.size(0) - distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \ - torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t() - distmat.addmm_(1, -2, qf, gf.t()) - distmat = distmat.numpy() - - print("Computing CMC and mAP") - cmc, mAP = evaluate(distmat, q_vids, g_vids, q_camids, g_camids) - - print("Results ----------") - print("mAP: {:.2%}".format(mAP)) - print("CMC curve") - for r in ranks: - print("Rank-{:<3}: {:.2%}".format(r, cmc[r - 1])) - print("------------------") - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--query_dir", default="./data/veri/image_query") - parser.add_argument("--gallery_dir", default="./data/veri/image_test") - parser.add_argument("--queryfeature_path", default="./result/dumpOutput_device0_query") - parser.add_argument("--galleryfeature_path", default="./result/dumpOutput_device0_gallery") - parser.add_argument('--root', type=str, default='data', - help="root path to data directory") - parser.add_argument('-d', '--dataset', type=str, default='veri', - help="name of the dataset") - parser.add_argument('-j', '--workers', default=4, type=int, - help="number of data loading workers (default: 4)") - parser.add_argument('--test-batch', default=1, type=int, - help="test batch size") - parser.add_argument('-a', '--arch', type=str, default='densenet121', choices=models.get_names()) - args = parser.parse_args() - - postprocess() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from __future__ import print_function +from __future__ import division + + +import argparse +import numpy as np +import torch +from torch.utils.data import DataLoader +from torchreid import models +from torchreid.data_manager import DatasetManager +from torchreid.dataset_loader import ImageDataset +from torchreid.eval_metrics import evaluate +from torchreid import transforms as T +torch.multiprocessing.set_sharing_strategy('file_system') +import os + +def postprocess(ranks=range(1, 51)): + + dataset = DatasetManager(dataset_dir=args.dataset, root=args.root) + + transform_test = T.Compose_Keypt([ + T.Resize_Keypt((256,256)), + T.ToTensor_Keypt(), + T.Normalize_Keypt(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + + queryloader = DataLoader( + ImageDataset(dataset.query, keyptaware=False, heatmapaware=False, + segmentaware=False, + transform=transform_test, imagesize=(256,256)), + batch_size=args.test_batch, shuffle=False, num_workers=args.workers, + drop_last=False, + ) + + galleryloader = DataLoader( + ImageDataset(dataset.gallery, keyptaware=False, heatmapaware=False, + segmentaware=False, + transform=transform_test, imagesize=(256,256)), + batch_size=args.test_batch, shuffle=False, num_workers=args.workers, + drop_last=False, + ) + qf = [] + q_vids = [] + q_camids = [] + for batch_idx, (imgs, vids, camids, vcolors, vtypes, vkeypts) in enumerate(queryloader): + q_vids.extend(vids) + q_camids.extend(camids) + q_vids = np.asarray(q_vids) + q_camids = np.asarray(q_camids) + + for root, folder, files in os.walk(args.queryfeature_path): + files.sort(key=lambda x:int(x.split('_')[0])) + for file in files: + truefile1 = file.split('_')[1] + if truefile1 == "4.bin": #将benckmark推理出的第四个输出"features"读入,features为计算mAP值的特征 + file_path = os.path.join(root, file) + with open(file_path,'rb') as f: + featuresq = np.fromfile(f, dtype="float32") + featuresq = torch.from_numpy(featuresq) + featuresq = featuresq.unsqueeze(0) + featuresq = featuresq.data.cpu() + qf.append(featuresq) + qf = torch.cat(qf, 0) + + print("Extracted features for query set, obtained {}-by-{} matrix".format(qf.size(0), qf.size(1))) + + gf = [] + g_vids = [] + g_camids = [] + for batch_idx, (imgs, vids, camids, vcolors, vtypes, vkeypts) in enumerate(galleryloader): + g_vids.extend(vids) + g_camids.extend(camids) + g_vids = np.asarray(g_vids) + g_camids = np.asarray(g_camids) + + for root, folder, files in os.walk(args.galleryfeature_path): + files.sort(key=lambda x: int(x.split('_')[0])) + for file in files: + truefile2 = file.split('_')[1] + if truefile2== "4.bin": #将benckmark推理出的第四个输出"features"读入,features为计算mAP值的特征 + file_path = os.path.join(root, file) + with open(file_path,'rb') as f: + featuresg = np.fromfile(f, dtype="float32") + featuresg = torch.from_numpy(featuresg) + featuresg = featuresg.unsqueeze(0) + featuresg = featuresg.data.cpu() + gf.append(featuresg) + gf = torch.cat(gf, 0) + + print("Extracted features for gallery set, obtained {}-by-{} matrix".format(gf.size(0), gf.size(1))) + + m, n = qf.size(0), gf.size(0) + distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \ + torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t() + distmat.addmm_(1, -2, qf, gf.t()) + distmat = distmat.numpy() + + print("Computing CMC and mAP") + cmc, mAP = evaluate(distmat, q_vids, g_vids, q_camids, g_camids) + + print("Results ----------") + print("mAP: {:.2%}".format(mAP)) + print("CMC curve") + for r in ranks: + print("Rank-{:<3}: {:.2%}".format(r, cmc[r - 1])) + print("------------------") + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--query_dir", default="./data/veri/image_query") + parser.add_argument("--gallery_dir", default="./data/veri/image_test") + parser.add_argument("--queryfeature_path", default="./result/dumpOutput_device0_query") + parser.add_argument("--galleryfeature_path", default="./result/dumpOutput_device0_gallery") + parser.add_argument('--root', type=str, default='data', + help="root path to data directory") + parser.add_argument('-d', '--dataset', type=str, default='veri', + help="name of the dataset") + parser.add_argument('-j', '--workers', default=4, type=int, + help="number of data loading workers (default: 4)") + parser.add_argument('--test-batch', default=1, type=int, + help="test batch size") + parser.add_argument('-a', '--arch', type=str, default='densenet121', choices=models.get_names()) + args = parser.parse_args() + + postprocess() diff --git a/ACL_PyTorch/contrib/cv/classfication/PAMTRI/PAMTRI_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/PAMTRI/PAMTRI_preprocess.py index b3ce371f5bf73615cd59a4e4c234d65c375d3779..78665cafef37e1ed0aecc8f808dd0581ab449923 100644 --- a/ACL_PyTorch/contrib/cv/classfication/PAMTRI/PAMTRI_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/PAMTRI/PAMTRI_preprocess.py @@ -1,92 +1,92 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from __future__ import print_function -from __future__ import division -import os -import argparse -import numpy as np -from torch.utils.data import DataLoader -from torchreid.data_manager import DatasetManager -from torchreid.dataset_loader import ImageDataset -from torchreid import transforms as T -from torchreid import models -from PIL import Image -import sys - -def preprocess(): - print("==========\nArgs:{}\n==========".format(args)) - print("Initializing dataset {}".format(args.dataset)) - dataset = DatasetManager(dataset_dir=args.dataset, root=args.root) - - transform_test = T.Compose_Keypt([ - T.Resize_Keypt((256, 256)), - T.ToTensor_Keypt(), - T.Normalize_Keypt(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ]) - - queryloader = DataLoader( - ImageDataset(dataset.query, keyptaware=False, heatmapaware=False, segmentaware=False, - transform=transform_test, imagesize=(256, 256)), - batch_size=args.test_batch, shuffle=False, num_workers=args.workers, - pin_memory=False, drop_last=False, - ) - - galleryloader = DataLoader( - ImageDataset(dataset.gallery, keyptaware=False, heatmapaware=False, segmentaware=False, - transform=transform_test, imagesize=(256, 256)), - batch_size=args.test_batch, shuffle=False, num_workers=args.workers, - pin_memory=False, drop_last=False, - ) - - get_bin(queryloader, galleryloader) - - -def get_bin(queryloader, galleryloader): - queryloader_num = 0 - galleryloader_num = 0 - for batch_idx, (imgs, vids, camids, vcolors, vtypes, vkeypts) in enumerate(queryloader): - query = imgs.numpy() - query.tofile(os.path.join(args.save_path1, "{}.bin".format(queryloader_num))) - queryloader_num = queryloader_num + 1 - - for batch_idx, (imgs, vids, camids, vcolors, vtypes, vkeypts) in enumerate(galleryloader): - gallery = imgs.numpy() - gallery.tofile(os.path.join(args.save_path2, "{}.bin".format(galleryloader_num))) - galleryloader_num = galleryloader_num + 1 - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--query_dir", default="./data/veri/image_query") - parser.add_argument("--gallery_dir", default="./data/veri/image_test") - parser.add_argument("--save_path1", default="./prep_dataset_query") - parser.add_argument("--save_path2", default="./prep_dataset_gallery") - parser.add_argument('--root', type=str, default='data', - help="root path to data directory") - parser.add_argument('-d', '--dataset', type=str, default='veri', - help="name of the dataset") - parser.add_argument('-j', '--workers', default=4, type=int, - help="number of data loading workers (default: 4)") - parser.add_argument('--test-batch', default=1, type=int, - help="test batch size") - parser.add_argument('-a', '--arch', type=str, default='densenet121', choices=models.get_names()) - args = parser.parse_args() - - if not os.path.isdir(args.save_path1): - os.makedirs(os.path.realpath(args.save_path1)) - if not os.path.isdir(args.save_path2): - os.makedirs(os.path.realpath(args.save_path2)) - preprocess() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from __future__ import print_function +from __future__ import division +import os +import argparse +import numpy as np +from torch.utils.data import DataLoader +from torchreid.data_manager import DatasetManager +from torchreid.dataset_loader import ImageDataset +from torchreid import transforms as T +from torchreid import models +from PIL import Image +import sys + +def preprocess(): + print("==========\nArgs:{}\n==========".format(args)) + print("Initializing dataset {}".format(args.dataset)) + dataset = DatasetManager(dataset_dir=args.dataset, root=args.root) + + transform_test = T.Compose_Keypt([ + T.Resize_Keypt((256, 256)), + T.ToTensor_Keypt(), + T.Normalize_Keypt(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + + queryloader = DataLoader( + ImageDataset(dataset.query, keyptaware=False, heatmapaware=False, segmentaware=False, + transform=transform_test, imagesize=(256, 256)), + batch_size=args.test_batch, shuffle=False, num_workers=args.workers, + pin_memory=False, drop_last=False, + ) + + galleryloader = DataLoader( + ImageDataset(dataset.gallery, keyptaware=False, heatmapaware=False, segmentaware=False, + transform=transform_test, imagesize=(256, 256)), + batch_size=args.test_batch, shuffle=False, num_workers=args.workers, + pin_memory=False, drop_last=False, + ) + + get_bin(queryloader, galleryloader) + + +def get_bin(queryloader, galleryloader): + queryloader_num = 0 + galleryloader_num = 0 + for batch_idx, (imgs, vids, camids, vcolors, vtypes, vkeypts) in enumerate(queryloader): + query = imgs.numpy() + query.tofile(os.path.join(args.save_path1, "{}.bin".format(queryloader_num))) + queryloader_num = queryloader_num + 1 + + for batch_idx, (imgs, vids, camids, vcolors, vtypes, vkeypts) in enumerate(galleryloader): + gallery = imgs.numpy() + gallery.tofile(os.path.join(args.save_path2, "{}.bin".format(galleryloader_num))) + galleryloader_num = galleryloader_num + 1 + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--query_dir", default="./data/veri/image_query") + parser.add_argument("--gallery_dir", default="./data/veri/image_test") + parser.add_argument("--save_path1", default="./prep_dataset_query") + parser.add_argument("--save_path2", default="./prep_dataset_gallery") + parser.add_argument('--root', type=str, default='data', + help="root path to data directory") + parser.add_argument('-d', '--dataset', type=str, default='veri', + help="name of the dataset") + parser.add_argument('-j', '--workers', default=4, type=int, + help="number of data loading workers (default: 4)") + parser.add_argument('--test-batch', default=1, type=int, + help="test batch size") + parser.add_argument('-a', '--arch', type=str, default='densenet121', choices=models.get_names()) + args = parser.parse_args() + + if not os.path.isdir(args.save_path1): + os.makedirs(os.path.realpath(args.save_path1)) + if not os.path.isdir(args.save_path2): + os.makedirs(os.path.realpath(args.save_path2)) + preprocess() diff --git a/ACL_PyTorch/contrib/cv/classfication/PAMTRI/README.md b/ACL_PyTorch/contrib/cv/classfication/PAMTRI/README.md index 580a7bfd667b53aaccc336b5bb33e905d90f5317..b78602300127b37758d5b5c05fbd7df0d89ce102 100644 --- a/ACL_PyTorch/contrib/cv/classfication/PAMTRI/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/PAMTRI/README.md @@ -1,70 +1,70 @@ -# PAMTR模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -``` -pip3.7 install -r requirements.txt -``` - - -2.获取,修改与安装开源模型代码 -``` -git clone https://github.com/NVlabs/PAMTRI -b master -cd MultiTaskNet -mv ./densenet.patch ./torchreid/models/ -cd ./torchreid/models/ -patch -p1 < densenet.patch -``` - -3.获取权重文件 - -将权重文件densenet121-a639ec97.pth放到当前工作目录(执行pth2onnx时会自动下载) -可以通过以下命令下载: -``` -wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/PAMTRI/densenet121-a639ec97.pth -``` - -4.数据集 - -参考[github开源代码仓内数据集获取方法](https://github.com/NVlabs/PAMTRI),将数据集放到./data目录。 -下载[数据集的label文件](https://github.com/NVlabs/PAMTRI.git),在PAMTRI/MultiTaskNet/data/veri/下获取label.csv文件。将数据集解压在./data/veri/目录下,应包含image_train、image_test、image_query三个数据集文件以及相应的.csv文件。 - -5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) -将benchmark.x86_64或benchmark.aarch64放到当前工作目录 - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -bash ./test/pth2om.sh -#启动脚本内3-8行为pth2onnx,10-19行为onnx2om,脚本执行完成后会生成PAMTRI.onnx、PAMTRI_bs1.om、PAMTRI_bs16.om三个模型。 -bash ./test/eval_acc_perf.sh --datasets_path=./data #datesets_path参数为指定数据集路径 -#启动脚本内12-15行为前处理,将图片信息转换为二进制bin文件;17-24行为提取与汇总前处理生成的bin文件信息;32-58行为benckmark推理,会生成推理后的特征信息与推理性能文件;60-69行为后处理,对推理出的特征信息进行评测,生成精度信息文件;70-92行为打印推理性能和精度信息。 -``` - **评测结果:** -| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| :-----------------------: | :----------------------------------------------------------: | :-------------------: | :------: | :---------: | -| PAMTRI bs1 | [mAP:68.64%](https://github.com/NVlabs/PAMTRI) | mAP:68.64% | 215.932fps | 627.662fps | -| PAMTRI bs16 | [mAP:68.64%](https://github.com/NVlabs/PAMTRI) | mAP:68.64% | 810.976fps | 833.668fps | -# 自检报告 - -``` -# pth是否能正确转换为om -bash test/pth2om.sh -# 验收结果: OK - -# 精度数据是否达标 -# npu性能数据 -bash test/eval_acc_perf.sh -# 验收结果: 是 - -# 在t4环境测试性能数据 -bash test/perf_t4.sh -# 验收结果: OK - -# 310性能是否符合标准: 是 -bs1:310=2.9倍t4 -bs16:310=1.03倍t4 - -``` +# PAMTR模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +``` +pip3.7 install -r requirements.txt +``` + + +2.获取,修改与安装开源模型代码 +``` +git clone https://github.com/NVlabs/PAMTRI -b master +cd MultiTaskNet +mv ./densenet.patch ./torchreid/models/ +cd ./torchreid/models/ +patch -p1 < densenet.patch +``` + +3.获取权重文件 + +将权重文件densenet121-a639ec97.pth放到当前工作目录(执行pth2onnx时会自动下载) +可以通过以下命令下载: +``` +wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/PAMTRI/densenet121-a639ec97.pth +``` + +4.数据集 + +参考[github开源代码仓内数据集获取方法](https://github.com/NVlabs/PAMTRI),将数据集放到./data目录。 +下载[数据集的label文件](https://github.com/NVlabs/PAMTRI.git),在PAMTRI/MultiTaskNet/data/veri/下获取label.csv文件。将数据集解压在./data/veri/目录下,应包含image_train、image_test、image_query三个数据集文件以及相应的.csv文件。 + +5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) +将benchmark.x86_64或benchmark.aarch64放到当前工作目录 + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +bash ./test/pth2om.sh +#启动脚本内3-8行为pth2onnx,10-19行为onnx2om,脚本执行完成后会生成PAMTRI.onnx、PAMTRI_bs1.om、PAMTRI_bs16.om三个模型。 +bash ./test/eval_acc_perf.sh --datasets_path=./data #datesets_path参数为指定数据集路径 +#启动脚本内12-15行为前处理,将图片信息转换为二进制bin文件;17-24行为提取与汇总前处理生成的bin文件信息;32-58行为benckmark推理,会生成推理后的特征信息与推理性能文件;60-69行为后处理,对推理出的特征信息进行评测,生成精度信息文件;70-92行为打印推理性能和精度信息。 +``` + **评测结果:** +| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| :-----------------------: | :----------------------------------------------------------: | :-------------------: | :------: | :---------: | +| PAMTRI bs1 | [mAP:68.64%](https://github.com/NVlabs/PAMTRI) | mAP:68.64% | 215.932fps | 627.662fps | +| PAMTRI bs16 | [mAP:68.64%](https://github.com/NVlabs/PAMTRI) | mAP:68.64% | 810.976fps | 833.668fps | +# 自检报告 + +``` +# pth是否能正确转换为om +bash test/pth2om.sh +# 验收结果: OK + +# 精度数据是否达标 +# npu性能数据 +bash test/eval_acc_perf.sh +# 验收结果: 是 + +# 在t4环境测试性能数据 +bash test/perf_t4.sh +# 验收结果: OK + +# 310性能是否符合标准: 是 +bs1:310=2.9倍t4 +bs16:310=1.03倍t4 + +``` diff --git a/ACL_PyTorch/contrib/cv/classfication/PAMTRI/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/PAMTRI/modelzoo_level.txt index 38700fca05402f52c3ae1c4be0889eb60e1f80f1..2e42553460a4f3687654b6ad3f91ab0bcc3aadac 100644 --- a/ACL_PyTorch/contrib/cv/classfication/PAMTRI/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/PAMTRI/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/LICENSE b/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/LICENSE index b1fac45f02e2f98395fd96a7e4f4a39e257ac0bc..989e2c59e973a05cfbfe9de678b7f2af777b0713 100644 --- a/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/LICENSE @@ -1,201 +1,201 @@ -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/PointNetCNN_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/PointNetCNN_preprocess.py index 9a11d950d8bada821daf2a115e08d99ca6f5aec3..2dd816aacc7f8474dddcc1881fb9826583b0d281 100644 --- a/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/PointNetCNN_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/PointNetCNN_preprocess.py @@ -1,56 +1,56 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import os -import sys -import provider -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(BASE_DIR) -sys.path.append(os.path.join(BASE_DIR, 'models')) -sys.path.append(os.path.join(BASE_DIR, 'utils')) - -TEST_FILES = provider.getDataFiles(os.path.join(BASE_DIR, 'data/modelnet40_ply_hdf5_2048/test_files.txt')) -BATCH_SIZE = 1 - -def preprocess(save_path,label_save_path): - i = 0 - test_file_idxs = np.arange(0, len(TEST_FILES)) - for fn in range(len(TEST_FILES)): - current_data, current_label = provider.loadDataFile(TEST_FILES[test_file_idxs[fn]]) - current_data = current_data[:, 0:1024, :] - file_size = current_data.shape[0] - num_batches = file_size // BATCH_SIZE - for batch_idx in range(num_batches): - i += 1 - start_idx = batch_idx * BATCH_SIZE - end_idx = (batch_idx + 1) * BATCH_SIZE - label = current_label[start_idx:end_idx] - rotated_data = provider.rotate_point_cloud(current_data[start_idx:end_idx, :, :]) - jittered_data = provider.jitter_point_cloud(rotated_data) # P_Sampled - P_sampled = np.array(torch.from_numpy(jittered_data).float(), dtype=np.float32) - - P_sampled.tofile(os.path.join(save_path, "data" +str(i) + ".bin")) - np.save(os.path.join(label_save_path,'label'+str(i)),label) - -if __name__ == "__main__": - save_path = sys.argv[1] - label_save_path = sys.argv[2] - save_path = os.path.realpath(save_path) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - if not os.path.isdir(label_save_path): - os.makedirs(os.path.realpath(label_save_path)) - preprocess( save_path,label_save_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import os +import sys +import provider +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(BASE_DIR) +sys.path.append(os.path.join(BASE_DIR, 'models')) +sys.path.append(os.path.join(BASE_DIR, 'utils')) + +TEST_FILES = provider.getDataFiles(os.path.join(BASE_DIR, 'data/modelnet40_ply_hdf5_2048/test_files.txt')) +BATCH_SIZE = 1 + +def preprocess(save_path,label_save_path): + i = 0 + test_file_idxs = np.arange(0, len(TEST_FILES)) + for fn in range(len(TEST_FILES)): + current_data, current_label = provider.loadDataFile(TEST_FILES[test_file_idxs[fn]]) + current_data = current_data[:, 0:1024, :] + file_size = current_data.shape[0] + num_batches = file_size // BATCH_SIZE + for batch_idx in range(num_batches): + i += 1 + start_idx = batch_idx * BATCH_SIZE + end_idx = (batch_idx + 1) * BATCH_SIZE + label = current_label[start_idx:end_idx] + rotated_data = provider.rotate_point_cloud(current_data[start_idx:end_idx, :, :]) + jittered_data = provider.jitter_point_cloud(rotated_data) # P_Sampled + P_sampled = np.array(torch.from_numpy(jittered_data).float(), dtype=np.float32) + + P_sampled.tofile(os.path.join(save_path, "data" +str(i) + ".bin")) + np.save(os.path.join(label_save_path,'label'+str(i)),label) + +if __name__ == "__main__": + save_path = sys.argv[1] + label_save_path = sys.argv[2] + save_path = os.path.realpath(save_path) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + if not os.path.isdir(label_save_path): + os.makedirs(os.path.realpath(label_save_path)) + preprocess( save_path,label_save_path) diff --git a/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/README.md b/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/README.md index 1d2cd0c57b9141185e68a95ebf3d3545d8cb9181..bed77d35e3e6ac61062f5df629ee861fcd94fc14 100644 --- a/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/README.md @@ -1,64 +1,64 @@ -# PointNetCNN模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -``` -pip3.7 install -r requirements.txt -``` - -2.获取,修改与安装开源模型代码 -``` -git clone https://github.com/hxdengBerkeley/PointCNN.Pytorch -b master -cd PointCNN.Pytorch -git reset 6ec6c291cf97923a84fb6ed8c82e98bf01e7e96d --hard -patch -p1 < ../PointNetCNN.patch -cd .. -git clone https://gitee.com/Ronnie_zheng/MagicONNX.git -cd MagicONNX -pip install . -cd .. -cp -r PointCNN.Pytorch/utils PointCNN.Pytorch/provider.py ./ -``` -3.获取权重文件 - -将权重文件pointcnn_epoch240.pth放到当前工作目录 -``` -wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/PointNetCNN/pointcnn_epoch240.pth -``` - -4.数据集 -获取modelnet40_ply_hdf5_2048数据集,解压并放在./data目录下, -``` -mkdir data -cd data -wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/PointNetCNN/modelnet40_ply_hdf5_2048.zip -unzip -d modelnet40_ply_hdf5_2048 modelnet40_ply_hdf5_2048.zip -cd .. -``` - -5.[获取msame工具](https://gitee.com/ascend/tools/tree/master/msame#https://gitee.com/ascend/tools.git) -将msame工具放到当前工作目录 - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -1-6行是 转onnx -7-14行是 onnx转om -``` -bash test/pth2om.sh -``` -1-10行是 基本配置 -11-16行是 预处理 -17-22行是 使用msase工具推理 -23-30行是 使用benchmark工具推理 -38-43行是 精度统计 -44-50行是 性能统计 -``` -bash test/eval_acc_perf.sh --datasets_path=/root/datasets -``` - **评测结果:** -| 模型 | pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| :-------------: | :-----: | :-------------: | :------: | :-------: | -| PointNetCNN bs1 | 82.61% | 82.61% | 31fps | 27.3fps | - +# PointNetCNN模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +``` +pip3.7 install -r requirements.txt +``` + +2.获取,修改与安装开源模型代码 +``` +git clone https://github.com/hxdengBerkeley/PointCNN.Pytorch -b master +cd PointCNN.Pytorch +git reset 6ec6c291cf97923a84fb6ed8c82e98bf01e7e96d --hard +patch -p1 < ../PointNetCNN.patch +cd .. +git clone https://gitee.com/Ronnie_zheng/MagicONNX.git +cd MagicONNX +pip install . +cd .. +cp -r PointCNN.Pytorch/utils PointCNN.Pytorch/provider.py ./ +``` +3.获取权重文件 + +将权重文件pointcnn_epoch240.pth放到当前工作目录 +``` +wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/PointNetCNN/pointcnn_epoch240.pth +``` + +4.数据集 +获取modelnet40_ply_hdf5_2048数据集,解压并放在./data目录下, +``` +mkdir data +cd data +wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/PointNetCNN/modelnet40_ply_hdf5_2048.zip +unzip -d modelnet40_ply_hdf5_2048 modelnet40_ply_hdf5_2048.zip +cd .. +``` + +5.[获取msame工具](https://gitee.com/ascend/tools/tree/master/msame#https://gitee.com/ascend/tools.git) +将msame工具放到当前工作目录 + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +1-6行是 转onnx +7-14行是 onnx转om +``` +bash test/pth2om.sh +``` +1-10行是 基本配置 +11-16行是 预处理 +17-22行是 使用msase工具推理 +23-30行是 使用benchmark工具推理 +38-43行是 精度统计 +44-50行是 性能统计 +``` +bash test/eval_acc_perf.sh --datasets_path=/root/datasets +``` + **评测结果:** +| 模型 | pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| :-------------: | :-----: | :-------------: | :------: | :-------: | +| PointNetCNN bs1 | 82.61% | 82.61% | 31fps | 27.3fps | + diff --git a/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/modelzoo_level.txt index 38700fca05402f52c3ae1c4be0889eb60e1f80f1..2e42553460a4f3687654b6ad3f91ab0bcc3aadac 100644 --- a/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/requirements.txt index 45587c8b52062a755f93cb3c1c41c213331b46d7..76f08412ee79abd7894132e6094be92fa85cfba6 100644 --- a/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/PointNetCNN/requirements.txt @@ -1,8 +1,8 @@ -torch == 1.8.0 -onnx == 1.10.2 -onnxruntime == 1.9.0 -onnx-simplifier == 0.3.6 -numpy == 1.20.3 -h5py == 3.1.0 -scipy == 1.2.0 -sklearn == 0.0 +torch == 1.8.0 +onnx == 1.10.2 +onnxruntime == 1.9.0 +onnx-simplifier == 0.3.6 +numpy == 1.20.3 +h5py == 3.1.0 +scipy == 1.2.0 +sklearn == 0.0 diff --git a/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/README.md b/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/README.md index 26f76975ae1f27800bd34defb4898b5c770d9a3c..9bd6b8f83d855f22fc58ec9205750ba82190a686 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/README.md @@ -1,69 +1,69 @@ -# PointNet++ Onnx模型端到端推理指导 - -## 1. 环境准备 -1.1.安装必要的依赖 - -```shell -pip3.7 install -r requirements.txt -``` - -1.2.获取,修改与安装开源模型代码 - -```shell -git clone https://github.com/yanx27/Pointnet_Pointnet2_pytorch models -cd models -git checkout e365b9f7b9c3d7d6444278d92e298e3f078794e1 -patch -p1 < ../models.patch -cd .. -``` - -1.3. 获取权重文件 - -pth采用开源仓自带的权重,权重位置: -```shell -./models/log/classification/pointnet2_ssg_wo_normals/checkpoints/best_model.pth -``` - -1.4. 数据集 - -[测试集](https://shapenet.cs.stanford.edu) - -1.5. 获取离线推理工具 - -[benchmark](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) -[msame](https://gitee.com/ascend/tools/tree/master/msame) - -## 2. 离线推理 - -2.1 模型转换&&精度测试 -在310上执行,执行时采用npu-smi info查看设备状态,确保device空闲 - -```shell -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets -``` - -2.2 性能测试 - -gpu测速,在对应gpu设备上执行,执行时采用nvidia-smi查看设备状态,确保device空闲 - -```shell -bash test/perf_g.sh -``` -npu测速,在对应npu设备上执行,执行时采用npu-smi info查看设备状态,确保device空闲 - -```shell -./benchmark.x86_64 -round=100 -om_path=Pointnetplus_part1_bs1.om -device_id=0 -batch_size=1 -./benchmark.x86_64 -round=100 -om_path=Pointnetplus_part2_bs1.om -device_id=0 -batch_size=1 -./benchmark.x86_64 -round=100 -om_path=Pointnetplus_part1_bs16.om -device_id=0 -batch_size=16 -./benchmark.x86_64 -round=100 -om_path=Pointnetplus_part2_bs16.om -device_id=0 -batch_size=16 -``` - -**评测结果:** - -| 模型 | batch_size | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -|------------------|------------|---------------------------------------------|-----------------------------------------|----------|---------| -| PointNet++_part1 | 1 | - | - | 2997fps | 5308fps | -| PointNet++_part2 | 1 | Instance Acc: 0.928964, Class Acc: 0.890532 | Instance Acc: 0.9263, Class Acc: 0.8877 | 2571fps | 4105fps | -| PointNet++_part1 | 16 | - | - | 3468fps | 5968fps | -| PointNet++_part2 | 16 | - | Instance Acc: 0.9245, Class Acc: 0.8854 | 3670fps | 3730fps | +# PointNet++ Onnx模型端到端推理指导 + +## 1. 环境准备 +1.1.安装必要的依赖 + +```shell +pip3.7 install -r requirements.txt +``` + +1.2.获取,修改与安装开源模型代码 + +```shell +git clone https://github.com/yanx27/Pointnet_Pointnet2_pytorch models +cd models +git checkout e365b9f7b9c3d7d6444278d92e298e3f078794e1 +patch -p1 < ../models.patch +cd .. +``` + +1.3. 获取权重文件 + +pth采用开源仓自带的权重,权重位置: +```shell +./models/log/classification/pointnet2_ssg_wo_normals/checkpoints/best_model.pth +``` + +1.4. 数据集 + +[测试集](https://shapenet.cs.stanford.edu) + +1.5. 获取离线推理工具 + +[benchmark](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) +[msame](https://gitee.com/ascend/tools/tree/master/msame) + +## 2. 离线推理 + +2.1 模型转换&&精度测试 +在310上执行,执行时采用npu-smi info查看设备状态,确保device空闲 + +```shell +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets +``` + +2.2 性能测试 + +gpu测速,在对应gpu设备上执行,执行时采用nvidia-smi查看设备状态,确保device空闲 + +```shell +bash test/perf_g.sh +``` +npu测速,在对应npu设备上执行,执行时采用npu-smi info查看设备状态,确保device空闲 + +```shell +./benchmark.x86_64 -round=100 -om_path=Pointnetplus_part1_bs1.om -device_id=0 -batch_size=1 +./benchmark.x86_64 -round=100 -om_path=Pointnetplus_part2_bs1.om -device_id=0 -batch_size=1 +./benchmark.x86_64 -round=100 -om_path=Pointnetplus_part1_bs16.om -device_id=0 -batch_size=16 +./benchmark.x86_64 -round=100 -om_path=Pointnetplus_part2_bs16.om -device_id=0 -batch_size=16 +``` + +**评测结果:** + +| 模型 | batch_size | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +|------------------|------------|---------------------------------------------|-----------------------------------------|----------|---------| +| PointNet++_part1 | 1 | - | - | 2997fps | 5308fps | +| PointNet++_part2 | 1 | Instance Acc: 0.928964, Class Acc: 0.890532 | Instance Acc: 0.9263, Class Acc: 0.8877 | 2571fps | 4105fps | +| PointNet++_part1 | 16 | - | - | 3468fps | 5968fps | +| PointNet++_part2 | 16 | - | Instance Acc: 0.9245, Class Acc: 0.8854 | 3670fps | 3730fps | diff --git a/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/pointnetplus_postprocess.py b/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/pointnetplus_postprocess.py index 4c9ab76a6d5105a783e4b43e8bbc33f54f96d2f1..acfce5a38c3e14fcb605486c356349dca01ee93f 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/pointnetplus_postprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/pointnetplus_postprocess.py @@ -1,115 +1,115 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import numpy as np -import os -import torch -import logging -import sys -from tqdm import tqdm - - -def parse_args(): - '''PARAMETERS''' - parser = argparse.ArgumentParser('off_line_pred') - parser.add_argument('--target_path', type=str, default='out/bs1/', - required=False, help='target root') - parser.add_argument('--data_loc', type=str, default='/home/data/modelnet40_normal_resampled/', required=False, help='data location') - parser.add_argument('--batch_size', type=int, default=1, required=False, - help='batch size') - args = parser.parse_args() - out_folder = os.listdir(args.target_path)[-1] - args.target_path = os.path.join(args.target_path, out_folder) - return args - -def pc_normalize(pc): - centroid = np.mean(pc, axis=0) - pc = pc - centroid - m = np.max(np.sqrt(np.sum(pc**2, axis=1))) - pc = pc / m - return pc - -def model_data_loader(data_pth): - data_path = data_pth - catfile = os.path.join(data_path, 'modelnet40_shape_names.txt') - cat = [line.rstrip() for line in open(catfile)] - classes = dict(zip(cat, range(len(cat)))) - shape_ids = {} - shape_ids['test'] = [line.rstrip() for line in open(os.path.join(data_path, 'modelnet40_test.txt'))] - split = 'test' - shape_names = ['_'.join(x.split('_')[0:-1]) for x in shape_ids[split]] - datapath = [(shape_names[i], os.path.join(data_path, shape_names[i], shape_ids[split][i]) + '.txt') for i - in range(len(shape_ids[split]))] - print('The size of %s data is %d' % (split, len(datapath))) - return classes, datapath - -def test(pred_path,data_path): - mean_correct = [] - class_acc = np.zeros((40, 3)) - - classes,data_pth = model_data_loader(data_path) - print('data is %d' % len(data_pth)) - # load infer results - def load_infer_results(): - num_out = len(data_pth) // args.batch_size - for j in range(num_out): - pred_loca = os.path.join(pred_path, 'part2_' + str(j) + '_output_0.bin') - pred = np.fromfile(pred_loca,np.float32) - if args.batch_size == 1: - pred.shape = 1, 40 - pred = torch.from_numpy(pred) - yield pred - else: - pred.shape = args.batch_size, 40 - for d in pred: - d = torch.from_numpy(np.expand_dims(d, axis=0)) - yield d - infer_results = load_infer_results() - - # load gt results - num_results = len(data_pth) // args.batch_size * args.batch_size - for j in tqdm(range(num_results)): - fn = data_pth[j] - cls = classes[data_pth[j][0]] - target = np.array([cls]).astype(np.int32) - point_set = np.loadtxt(fn[1], delimiter=',').astype(np.float32) - point_set = point_set[0:1024, :] - point_set[:, 0:3] = pc_normalize(point_set[:, 0:3]) - point_set = point_set[:, 0:3] - point_set = point_set[None,] - new_point_set = torch.from_numpy(point_set) - points = new_point_set.transpose(2, 1) - target = torch.from_numpy(target) - - pred = next(infer_results) - pred_choice = pred.data.max(1)[1] - '''验证精度''' - for cat in np.unique(target.cpu()): - classacc = pred_choice[target == cat].eq(target[target == cat].long().data).cpu().sum() - class_acc[cat, 0] += classacc.item() / float(points[target == cat].size()[0]) - class_acc[cat, 1] += 1 - correct = pred_choice.eq(target.long().data).cpu().sum() - mean_correct.append(correct.item() / float(points.size()[0])) - - class_acc[:, 2] = class_acc[:, 0] / class_acc[:, 1] - class_acc = np.mean(class_acc[:, 2]) - instance_acc = np.mean(mean_correct) - print('class_acc is %f' % class_acc) - print('instance_acc is %f' % instance_acc) - return instance_acc, class_acc - -if __name__ == '__main__': - args = parse_args() - test(args.target_path, args.data_loc) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import numpy as np +import os +import torch +import logging +import sys +from tqdm import tqdm + + +def parse_args(): + '''PARAMETERS''' + parser = argparse.ArgumentParser('off_line_pred') + parser.add_argument('--target_path', type=str, default='out/bs1/', + required=False, help='target root') + parser.add_argument('--data_loc', type=str, default='/home/data/modelnet40_normal_resampled/', required=False, help='data location') + parser.add_argument('--batch_size', type=int, default=1, required=False, + help='batch size') + args = parser.parse_args() + out_folder = os.listdir(args.target_path)[-1] + args.target_path = os.path.join(args.target_path, out_folder) + return args + +def pc_normalize(pc): + centroid = np.mean(pc, axis=0) + pc = pc - centroid + m = np.max(np.sqrt(np.sum(pc**2, axis=1))) + pc = pc / m + return pc + +def model_data_loader(data_pth): + data_path = data_pth + catfile = os.path.join(data_path, 'modelnet40_shape_names.txt') + cat = [line.rstrip() for line in open(catfile)] + classes = dict(zip(cat, range(len(cat)))) + shape_ids = {} + shape_ids['test'] = [line.rstrip() for line in open(os.path.join(data_path, 'modelnet40_test.txt'))] + split = 'test' + shape_names = ['_'.join(x.split('_')[0:-1]) for x in shape_ids[split]] + datapath = [(shape_names[i], os.path.join(data_path, shape_names[i], shape_ids[split][i]) + '.txt') for i + in range(len(shape_ids[split]))] + print('The size of %s data is %d' % (split, len(datapath))) + return classes, datapath + +def test(pred_path,data_path): + mean_correct = [] + class_acc = np.zeros((40, 3)) + + classes,data_pth = model_data_loader(data_path) + print('data is %d' % len(data_pth)) + # load infer results + def load_infer_results(): + num_out = len(data_pth) // args.batch_size + for j in range(num_out): + pred_loca = os.path.join(pred_path, 'part2_' + str(j) + '_output_0.bin') + pred = np.fromfile(pred_loca,np.float32) + if args.batch_size == 1: + pred.shape = 1, 40 + pred = torch.from_numpy(pred) + yield pred + else: + pred.shape = args.batch_size, 40 + for d in pred: + d = torch.from_numpy(np.expand_dims(d, axis=0)) + yield d + infer_results = load_infer_results() + + # load gt results + num_results = len(data_pth) // args.batch_size * args.batch_size + for j in tqdm(range(num_results)): + fn = data_pth[j] + cls = classes[data_pth[j][0]] + target = np.array([cls]).astype(np.int32) + point_set = np.loadtxt(fn[1], delimiter=',').astype(np.float32) + point_set = point_set[0:1024, :] + point_set[:, 0:3] = pc_normalize(point_set[:, 0:3]) + point_set = point_set[:, 0:3] + point_set = point_set[None,] + new_point_set = torch.from_numpy(point_set) + points = new_point_set.transpose(2, 1) + target = torch.from_numpy(target) + + pred = next(infer_results) + pred_choice = pred.data.max(1)[1] + '''验证精度''' + for cat in np.unique(target.cpu()): + classacc = pred_choice[target == cat].eq(target[target == cat].long().data).cpu().sum() + class_acc[cat, 0] += classacc.item() / float(points[target == cat].size()[0]) + class_acc[cat, 1] += 1 + correct = pred_choice.eq(target.long().data).cpu().sum() + mean_correct.append(correct.item() / float(points.size()[0])) + + class_acc[:, 2] = class_acc[:, 0] / class_acc[:, 1] + class_acc = np.mean(class_acc[:, 2]) + instance_acc = np.mean(mean_correct) + print('class_acc is %f' % class_acc) + print('instance_acc is %f' % instance_acc) + return instance_acc, class_acc + +if __name__ == '__main__': + args = parse_args() + test(args.target_path, args.data_loc) diff --git a/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/pointnetplus_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/pointnetplus_preprocess.py index 5c9fe9aa98836d250eb0cd4c6b6deb2742158245..2167b19ba0b72716c56e85b87b617f21ebbba334 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/pointnetplus_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/pointnetplus_preprocess.py @@ -1,157 +1,157 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import torch -import argparse -import sys -sys.path.append('./models') -from tqdm import tqdm -from models.pointnet2_utils import sample_and_group, farthest_point_sample -import glob - -def parse_args(): - '''PARAMETERS''' - parser = argparse.ArgumentParser('data_process') - parser.add_argument('--preprocess_part', type=int, default=1, required=False, help='preprocess target') - parser.add_argument('--save_path', type=str, default='./modelnet40_processed/test_preprocess/pointset_chg', required=False, help='target root') - parser.add_argument('--save_path2', type=str, default='./modelnet40_processed/test_preprocess/xyz_chg', required=False, help='target root') - parser.add_argument('--data_loc', type=str, default='', required=False, help='data location') - parser.add_argument('--data_loc2', type=str, default='./modelnet40_processed/test_preprocess/xyz_chg', required=False, help='data location') - parser.add_argument('--batch_size', type=int, default=1, required=False, help='batch size for preprocess') - return parser.parse_args() - - -def pc_normalize(pc): - centroid = np.mean(pc, axis=0) - pc = pc - centroid - m = np.max(np.sqrt(np.sum(pc**2, axis=1))) - pc = pc / m - return pc - - -def preprocess(save_path,save_path2,data_location): - data_path = data_location - save_path = save_path - save_path2 = save_path2 - catfile = os.path.join(data_path, 'modelnet40_shape_names.txt') - cat = [line.rstrip() for line in open(catfile)] - classes = dict(zip(cat, range(len(cat)))) - shape_ids = {} - shape_ids['test'] = [line.rstrip() for line in open(os.path.join(data_path, 'modelnet40_test.txt'))] - split = 'test' - shape_names = ['_'.join(x.split('_')[0:-1]) for x in shape_ids[split]] - datapath = [(shape_names[i], os.path.join(data_path, shape_names[i], shape_ids[split][i]) + '.txt') for i - in range(len(shape_ids[split]))] - print('The size of %s data is %d' % (split, len(datapath))) - os.makedirs(save_path, exist_ok=True) - os.makedirs(save_path2, exist_ok=True) - - point_set_list = [] - new_xyz_list = [] - for index in tqdm(range(len(datapath))): - fn = datapath[index] - cls = classes[datapath[index][0]] - label = np.array([cls]).astype(np.int32) - point_set = np.loadtxt(fn[1], delimiter=',').astype(np.float32) - point_set = point_set[0:1024, :] - point_set[:, 0:3] = pc_normalize(point_set[:, 0:3]) - point_set = point_set[:, 0:3] - point_set = point_set[None,] - # print(point_set.shape) - new_point_set = torch.from_numpy(point_set) - new_point_set = new_point_set.transpose(2, 1) - npoint = 512 - radius = 0.2 - nsample = 32 - points = None - new_point_set = new_point_set.permute(0, 2, 1) - centroid = farthest_point_sample(new_point_set, npoint) - new_xyz, new_points = sample_and_group(npoint, radius, nsample, new_point_set, points, centroid) - - new_xyz = new_xyz.permute(0,2,1) - new_points = new_points.permute(0,3,2,1) - point_set, new_xyz = new_points.numpy(),new_xyz.numpy() - - point_name = 'point_set'+str(index) - if args.batch_size == 1: - point_set.tofile(os.path.join(save_path, point_name.split('.')[0] + ".bin")) - new_xyz.tofile(os.path.join(save_path2, point_name.split('.')[0] + ".bin")) - else: - point_set_list.append(point_set) - new_xyz_list.append(new_xyz) - if len(point_set_list) == args.batch_size: - point_sets = np.array(point_set_list) - new_xyzes = np.array(new_xyz_list) - point_names = 'point_set{}.bin'.format(str(index // 16)) - point_sets.tofile(os.path.join(save_path, point_names)) - new_xyzes.tofile(os.path.join(save_path2, point_names)) - point_set_list.clear() - new_xyz_list.clear() - - -def preprocess2(save_path,save_path2,data_location,data_location2): - data_toal_folder = os.listdir(data_location)[-1] - data_total_path = os.path.join(data_location, data_toal_folder) - save_path = save_path - save_path2 = save_path2 - file_start = 'point_set' - file_end_one = '_output_1.bin' - file_end_zero = '_output_0.bin' - os.makedirs(save_path, exist_ok=True) - os.makedirs(save_path2, exist_ok=True) - test = os.path.join(data_total_path, file_start+str(0)+file_end_zero) - try: - file_end = file_end_zero - test2 = np.fromfile(test,dtype=np.float32) - test_set.shape = args.batch_size,128,512 - except: - file_end = file_end_one - print(file_end) - data_total_path2 = data_location2 - file_end_two = '.bin' - path_file_number=glob.glob(data_location2+'/*.bin') - - for index in tqdm(range(len(path_file_number))): - data_path2 = os.path.join(data_total_path2, file_start+str(index)+file_end_two) - data_path1 = os.path.join(data_total_path, file_start+str(index)+file_end) - point_set = np.fromfile(data_path1,dtype=np.float32) - point_set2 = np.fromfile(data_path2,dtype=np.float32) - point_set.shape = args.batch_size,128,512 - point_set2.shape = args.batch_size,3,512 - new_point_set = torch.from_numpy(point_set2) - point_set2 = torch.from_numpy(point_set) - npoint = 128 - radius = 0.4 - nsample = 64 - new_point_set = new_point_set.permute(0, 2, 1) - point_set2 = point_set2.permute(0,2,1) - centroid = farthest_point_sample(new_point_set, npoint) - new_xyz, new_points = sample_and_group(npoint, radius, nsample, new_point_set, point_set2, centroid) - new_point_set = new_point_set.permute(0, 2, 1) - new_points = new_points.permute(0,3,2,1) - new_xyz = new_xyz.permute(0,2,1) - point_set,new_xyz = new_points.numpy(),new_xyz.numpy() - point_name = 'part2_'+str(index) - point_set.tofile(os.path.join(save_path, point_name.split('.')[0] + ".bin")) - new_xyz.tofile(os.path.join(save_path2, point_name.split('.')[0] + ".bin")) - - -if __name__ == '__main__': - args = parse_args() - if(1 == args.preprocess_part): - preprocess(args.save_path,args.save_path2,args.data_loc) - else: - preprocess2(args.save_path,args.save_path2,args.data_loc,args.data_loc2) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import torch +import argparse +import sys +sys.path.append('./models') +from tqdm import tqdm +from models.pointnet2_utils import sample_and_group, farthest_point_sample +import glob + +def parse_args(): + '''PARAMETERS''' + parser = argparse.ArgumentParser('data_process') + parser.add_argument('--preprocess_part', type=int, default=1, required=False, help='preprocess target') + parser.add_argument('--save_path', type=str, default='./modelnet40_processed/test_preprocess/pointset_chg', required=False, help='target root') + parser.add_argument('--save_path2', type=str, default='./modelnet40_processed/test_preprocess/xyz_chg', required=False, help='target root') + parser.add_argument('--data_loc', type=str, default='', required=False, help='data location') + parser.add_argument('--data_loc2', type=str, default='./modelnet40_processed/test_preprocess/xyz_chg', required=False, help='data location') + parser.add_argument('--batch_size', type=int, default=1, required=False, help='batch size for preprocess') + return parser.parse_args() + + +def pc_normalize(pc): + centroid = np.mean(pc, axis=0) + pc = pc - centroid + m = np.max(np.sqrt(np.sum(pc**2, axis=1))) + pc = pc / m + return pc + + +def preprocess(save_path,save_path2,data_location): + data_path = data_location + save_path = save_path + save_path2 = save_path2 + catfile = os.path.join(data_path, 'modelnet40_shape_names.txt') + cat = [line.rstrip() for line in open(catfile)] + classes = dict(zip(cat, range(len(cat)))) + shape_ids = {} + shape_ids['test'] = [line.rstrip() for line in open(os.path.join(data_path, 'modelnet40_test.txt'))] + split = 'test' + shape_names = ['_'.join(x.split('_')[0:-1]) for x in shape_ids[split]] + datapath = [(shape_names[i], os.path.join(data_path, shape_names[i], shape_ids[split][i]) + '.txt') for i + in range(len(shape_ids[split]))] + print('The size of %s data is %d' % (split, len(datapath))) + os.makedirs(save_path, exist_ok=True) + os.makedirs(save_path2, exist_ok=True) + + point_set_list = [] + new_xyz_list = [] + for index in tqdm(range(len(datapath))): + fn = datapath[index] + cls = classes[datapath[index][0]] + label = np.array([cls]).astype(np.int32) + point_set = np.loadtxt(fn[1], delimiter=',').astype(np.float32) + point_set = point_set[0:1024, :] + point_set[:, 0:3] = pc_normalize(point_set[:, 0:3]) + point_set = point_set[:, 0:3] + point_set = point_set[None,] + # print(point_set.shape) + new_point_set = torch.from_numpy(point_set) + new_point_set = new_point_set.transpose(2, 1) + npoint = 512 + radius = 0.2 + nsample = 32 + points = None + new_point_set = new_point_set.permute(0, 2, 1) + centroid = farthest_point_sample(new_point_set, npoint) + new_xyz, new_points = sample_and_group(npoint, radius, nsample, new_point_set, points, centroid) + + new_xyz = new_xyz.permute(0,2,1) + new_points = new_points.permute(0,3,2,1) + point_set, new_xyz = new_points.numpy(),new_xyz.numpy() + + point_name = 'point_set'+str(index) + if args.batch_size == 1: + point_set.tofile(os.path.join(save_path, point_name.split('.')[0] + ".bin")) + new_xyz.tofile(os.path.join(save_path2, point_name.split('.')[0] + ".bin")) + else: + point_set_list.append(point_set) + new_xyz_list.append(new_xyz) + if len(point_set_list) == args.batch_size: + point_sets = np.array(point_set_list) + new_xyzes = np.array(new_xyz_list) + point_names = 'point_set{}.bin'.format(str(index // 16)) + point_sets.tofile(os.path.join(save_path, point_names)) + new_xyzes.tofile(os.path.join(save_path2, point_names)) + point_set_list.clear() + new_xyz_list.clear() + + +def preprocess2(save_path,save_path2,data_location,data_location2): + data_toal_folder = os.listdir(data_location)[-1] + data_total_path = os.path.join(data_location, data_toal_folder) + save_path = save_path + save_path2 = save_path2 + file_start = 'point_set' + file_end_one = '_output_1.bin' + file_end_zero = '_output_0.bin' + os.makedirs(save_path, exist_ok=True) + os.makedirs(save_path2, exist_ok=True) + test = os.path.join(data_total_path, file_start+str(0)+file_end_zero) + try: + file_end = file_end_zero + test2 = np.fromfile(test,dtype=np.float32) + test_set.shape = args.batch_size,128,512 + except: + file_end = file_end_one + print(file_end) + data_total_path2 = data_location2 + file_end_two = '.bin' + path_file_number=glob.glob(data_location2+'/*.bin') + + for index in tqdm(range(len(path_file_number))): + data_path2 = os.path.join(data_total_path2, file_start+str(index)+file_end_two) + data_path1 = os.path.join(data_total_path, file_start+str(index)+file_end) + point_set = np.fromfile(data_path1,dtype=np.float32) + point_set2 = np.fromfile(data_path2,dtype=np.float32) + point_set.shape = args.batch_size,128,512 + point_set2.shape = args.batch_size,3,512 + new_point_set = torch.from_numpy(point_set2) + point_set2 = torch.from_numpy(point_set) + npoint = 128 + radius = 0.4 + nsample = 64 + new_point_set = new_point_set.permute(0, 2, 1) + point_set2 = point_set2.permute(0,2,1) + centroid = farthest_point_sample(new_point_set, npoint) + new_xyz, new_points = sample_and_group(npoint, radius, nsample, new_point_set, point_set2, centroid) + new_point_set = new_point_set.permute(0, 2, 1) + new_points = new_points.permute(0,3,2,1) + new_xyz = new_xyz.permute(0,2,1) + point_set,new_xyz = new_points.numpy(),new_xyz.numpy() + point_name = 'part2_'+str(index) + point_set.tofile(os.path.join(save_path, point_name.split('.')[0] + ".bin")) + new_xyz.tofile(os.path.join(save_path2, point_name.split('.')[0] + ".bin")) + + +if __name__ == '__main__': + args = parse_args() + if(1 == args.preprocess_part): + preprocess(args.save_path,args.save_path2,args.data_loc) + else: + preprocess2(args.save_path,args.save_path2,args.data_loc,args.data_loc2) diff --git a/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/pointnetplus_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/pointnetplus_pth2onnx.py index bb92b279f8849a1865e791bc60a632c473db756f..4728535261abf781fb2c64b17a541e62015b5fad 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/pointnetplus_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/pointnetplus_pth2onnx.py @@ -1,104 +1,104 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from collections import OrderedDict -import torch -import torch.onnx -import argparse -import sys -sys.path.append('./models/models') -import pointnet2_cls_ssg as pointnet2_cls -from pointnet2_utils import farthest_point_sample -from pointnet2_utils import sample_and_group - - -def parse_args(): - '''PARAMETERS''' - parser = argparse.ArgumentParser('off_line_pred') - parser.add_argument('--target_model', type=int, default=1, - required=True, help='target trans_models') - parser.add_argument('--pth_dir', type=str, default='', - required=False, help='target trans_models') - parser.add_argument('--batch_size', type=int, default=1, - required=False, help='batch size') - return parser.parse_args() - - -def proc_node_module(checkpoint, AttrName): - new_state_dict = OrderedDict() - for k, v in checkpoint[AttrName].items(): - if k[0:7] == "module.": - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - - -def model_convert(dir): - experiment_dir = dir - dummy_input = torch.randn(args.batch_size, 3, 1024) - checkpoint = torch.load(str(experiment_dir) + '/best_model.pth',map_location = 'cpu') - checkpoint['model_state_dict'] = proc_node_module(checkpoint,'model_state_dict') - model = pointnet2_cls.get_model_part1(normal_channel=False) - model.load_state_dict(checkpoint['model_state_dict']) - model.eval() - npoint = 512 - radius = 0.2 - nsample = 32 - points = None - test_input = dummy_input.permute(0, 2, 1) - centroid = farthest_point_sample(test_input, npoint) - new_xyz, new_points = sample_and_group(npoint, radius, nsample, test_input, points, centroid) - new_points = new_points.permute(0, 3, 2, 1) - input_names = ["xyz", "samp_points"] - output_names = ["l1_xyz", "l1_point"] - torch.onnx.export(model, (new_xyz, new_points), - "Pointnetplus_part1_bs{}.onnx".format(args.batch_size), - input_names=input_names, verbose=True, output_names=output_names, opset_version=11) - - -def model_convert2(dir): - experiment_dir = dir - dummy_xyz_input = torch.randn(args.batch_size, 3, 512) - dummy_point_input = torch.randn(args.batch_size, 128, 512) - checkpoint = torch.load(str(experiment_dir) + '/best_model.pth',map_location = 'cpu') - checkpoint['model_state_dict'] = proc_node_module(checkpoint,'model_state_dict') - model = pointnet2_cls.get_model_part2(normal_channel=False) - model.load_state_dict(checkpoint['model_state_dict']) - model.eval() - npoint = 128 - radius = 0.4 - nsample = 64 - points = None - test_input = dummy_xyz_input.permute(0, 2, 1) - test_points = dummy_point_input.permute(0, 2, 1) - centroid = farthest_point_sample(test_input, npoint) - new_xyz, new_points = sample_and_group(npoint, radius, nsample, test_input, test_points, centroid) - new_points = new_points.permute(0, 3, 2, 1) - new_xyz = new_xyz.permute(0, 2, 1) - input_names = ["l1_xyz", "l1_points"] - output_names = ["class", "l3_point"] - - torch.onnx.export(model, (new_xyz, new_points), - "Pointnetplus_part2_bs{}.onnx".format(args.batch_size), - input_names=input_names, verbose=True, output_names=output_names, opset_version=11) - - -if __name__ == '__main__': - args = parse_args() - if(args.target_model == 1): - model_convert(args.pth_dir) - else: - model_convert2(args.pth_dir) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict +import torch +import torch.onnx +import argparse +import sys +sys.path.append('./models/models') +import pointnet2_cls_ssg as pointnet2_cls +from pointnet2_utils import farthest_point_sample +from pointnet2_utils import sample_and_group + + +def parse_args(): + '''PARAMETERS''' + parser = argparse.ArgumentParser('off_line_pred') + parser.add_argument('--target_model', type=int, default=1, + required=True, help='target trans_models') + parser.add_argument('--pth_dir', type=str, default='', + required=False, help='target trans_models') + parser.add_argument('--batch_size', type=int, default=1, + required=False, help='batch size') + return parser.parse_args() + + +def proc_node_module(checkpoint, AttrName): + new_state_dict = OrderedDict() + for k, v in checkpoint[AttrName].items(): + if k[0:7] == "module.": + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + + +def model_convert(dir): + experiment_dir = dir + dummy_input = torch.randn(args.batch_size, 3, 1024) + checkpoint = torch.load(str(experiment_dir) + '/best_model.pth',map_location = 'cpu') + checkpoint['model_state_dict'] = proc_node_module(checkpoint,'model_state_dict') + model = pointnet2_cls.get_model_part1(normal_channel=False) + model.load_state_dict(checkpoint['model_state_dict']) + model.eval() + npoint = 512 + radius = 0.2 + nsample = 32 + points = None + test_input = dummy_input.permute(0, 2, 1) + centroid = farthest_point_sample(test_input, npoint) + new_xyz, new_points = sample_and_group(npoint, radius, nsample, test_input, points, centroid) + new_points = new_points.permute(0, 3, 2, 1) + input_names = ["xyz", "samp_points"] + output_names = ["l1_xyz", "l1_point"] + torch.onnx.export(model, (new_xyz, new_points), + "Pointnetplus_part1_bs{}.onnx".format(args.batch_size), + input_names=input_names, verbose=True, output_names=output_names, opset_version=11) + + +def model_convert2(dir): + experiment_dir = dir + dummy_xyz_input = torch.randn(args.batch_size, 3, 512) + dummy_point_input = torch.randn(args.batch_size, 128, 512) + checkpoint = torch.load(str(experiment_dir) + '/best_model.pth',map_location = 'cpu') + checkpoint['model_state_dict'] = proc_node_module(checkpoint,'model_state_dict') + model = pointnet2_cls.get_model_part2(normal_channel=False) + model.load_state_dict(checkpoint['model_state_dict']) + model.eval() + npoint = 128 + radius = 0.4 + nsample = 64 + points = None + test_input = dummy_xyz_input.permute(0, 2, 1) + test_points = dummy_point_input.permute(0, 2, 1) + centroid = farthest_point_sample(test_input, npoint) + new_xyz, new_points = sample_and_group(npoint, radius, nsample, test_input, test_points, centroid) + new_points = new_points.permute(0, 3, 2, 1) + new_xyz = new_xyz.permute(0, 2, 1) + input_names = ["l1_xyz", "l1_points"] + output_names = ["class", "l3_point"] + + torch.onnx.export(model, (new_xyz, new_points), + "Pointnetplus_part2_bs{}.onnx".format(args.batch_size), + input_names=input_names, verbose=True, output_names=output_names, opset_version=11) + + +if __name__ == '__main__': + args = parse_args() + if(args.target_model == 1): + model_convert(args.pth_dir) + else: + model_convert2(args.pth_dir) diff --git a/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/requirements.txt index e3e632f5e9d0b64724c7ef67c5e1c6d44ce30425..ad7ae69defba9cbbf3962d079ce68ca2b6a4d5b8 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/Pointnetplus/Pointnetplus/requirements.txt @@ -1,5 +1,5 @@ -torch == 1.9.0 -onnx == 1.9.0 -tqdm == 4.62.2 -torchvision == 0.10.0 -numpy +torch == 1.9.0 +onnx == 1.9.0 +tqdm == 4.62.2 +torchvision == 0.10.0 +numpy diff --git a/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/ LICENSE b/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/ LICENSE index 797bf40e85c5d2986ebcec9cb51aed979ca88b82..04adf5cbc620ad190547b092fa449e36df5f7bf4 100644 --- a/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/ LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/ LICENSE @@ -1,203 +1,203 @@ -Copyright 2018-2019 Open-MMLab. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018-2019 Open-MMLab. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Copyright 2018-2019 Open-MMLab. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2019 Open-MMLab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/ modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/ modelzoo_level.txt index d44ba5698b045b8a30e107962f295dbc24585d8c..70801afc42b6d9eb5cdd98b5430d9b2101f3146a 100644 --- a/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/ modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/ modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/ReadME.md b/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/ReadME.md index 1cdecd9b313bad706e5dc9b91be07d45bb0a9705..91cf9a02522497b72701710e8eb12b9e856afec6 100644 --- a/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/ReadME.md +++ b/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/ReadME.md @@ -1,76 +1,76 @@ -# R(2+1)D模型PyTorch离线推理指导 - -## 1 环境准备 - -- **1.1 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装** - -``` -pip3.7 install -r requirements.txt -``` - -- **1.2 获取,修改与安装开源模型代码** - -``` -git clone https://github.com/open-mmlab/mmcv -b master -cd mmcv -git reset --hard 6cb534b775b7502f0dcc59331236e619d3ae5b9f -MMCV_WITH_OPS=1 pip3.7 install -e . - -cd .. -git clone https://github.com/open-mmlab/mmaction2 -b master -cd mmaction2 -git reset --hard acce52d21a2545d9351b1060853c3bcd171b7158 -python3.7 setup.py develop - -``` -注:若上述命令不能下载源码,则将https替换为git(如:git clone git://github.com/open-mmlab/mmcv -b master ) - -将mmaction2/tools/deployment/目录下的pytorch2onnx.py中的torch.onnx.export添加一个参数: - -` dynamic_axes={'0':{0:'-1'}}, ` - -将r2plus1d_r34_8x8x1_180e_ucf101_rgb2.py文件放在mmaction2/configs/recognition/r2plus1d文件夹下 - -- **1.3 [获取权重文件](https://www.aliyundrive.com/drive/folder/6130e24c1b56461015b44659bdc650a9d3cd8e71)** - -- **1.4 [数据集UCF-101](https://www.crcv.ucf.edu/data/UCF101/UCF101.rar)** - -将UCF101.rar文件解压,重命名为ucf101,放在 /root/datasets/文件夹下 - -``` -在当前目录创建videos文件夹 -mkdir -p ./data/ucf101/videos - -将/root/datasets/ucf101文件夹下的视频文件夹复制到videos下 -cp -r /root/datasets/ucf101/* ./data/ucf101/videos - -python3.7 ./mmaction2/tools/data/build_rawframes.py ./data/ucf101/videos/ ./data/ucf101/rawframes/ --task rgb --level 2 --ext avi --use-opencv - -DATA_DIR_AN="./data/ucf101/annotations" - -wget https://www.crcv.ucf.edu/wp-content/uploads/2019/03/UCF101TrainTestSplits-RecognitionTask.zip --no-check-certificate - -unzip -j UCF101TrainTestSplits-RecognitionTask.zip -d ${DATA_DIR_AN}/ -rm UCF101TrainTestSplits-RecognitionTask.zip - -PYTHONPATH=. python3.7 ./mmaction2/tools/data/build_file_list.py ucf101 data/ucf101/rawframes/ --level 2 --format rawframes --shuffle -``` -- **1.5 获取[msame工具](https://gitee.com/ascend/tools/tree/master/msame)和[benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer)** - -将msame和benchmark.x86_64(或benchmark.aarch64)放到当前目录 - -## 2 离线推理 - -- **310上执行,执行时使npu-smi info查看设备状态,确保device空闲** - -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh arch=x86_64 -``` - -- **评测结果:** - -| 模型 | pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| ------------ | ----------------------- | ----------------------- | ---------- | ---------- | -| R(2+1)D-bs1 | [top1:0.8921 top5:0.9741](https://github.com/open-mmlab/mmaction2) | top1:0.8929 top5:0.9749 | 38.7704fps | 36.1684fps | -| R(2+1)D-bs16 | [top1:0.8921 top5:0.9742](https://github.com/open-mmlab/mmaction2) | top1:0.8929 top5:0.9749| 40.8914fps | 40.0332fps | +# R(2+1)D模型PyTorch离线推理指导 + +## 1 环境准备 + +- **1.1 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装** + +``` +pip3.7 install -r requirements.txt +``` + +- **1.2 获取,修改与安装开源模型代码** + +``` +git clone https://github.com/open-mmlab/mmcv -b master +cd mmcv +git reset --hard 6cb534b775b7502f0dcc59331236e619d3ae5b9f +MMCV_WITH_OPS=1 pip3.7 install -e . + +cd .. +git clone https://github.com/open-mmlab/mmaction2 -b master +cd mmaction2 +git reset --hard acce52d21a2545d9351b1060853c3bcd171b7158 +python3.7 setup.py develop + +``` +注:若上述命令不能下载源码,则将https替换为git(如:git clone git://github.com/open-mmlab/mmcv -b master ) + +将mmaction2/tools/deployment/目录下的pytorch2onnx.py中的torch.onnx.export添加一个参数: + +` dynamic_axes={'0':{0:'-1'}}, ` + +将r2plus1d_r34_8x8x1_180e_ucf101_rgb2.py文件放在mmaction2/configs/recognition/r2plus1d文件夹下 + +- **1.3 [获取权重文件](https://www.aliyundrive.com/drive/folder/6130e24c1b56461015b44659bdc650a9d3cd8e71)** + +- **1.4 [数据集UCF-101](https://www.crcv.ucf.edu/data/UCF101/UCF101.rar)** + +将UCF101.rar文件解压,重命名为ucf101,放在 /root/datasets/文件夹下 + +``` +在当前目录创建videos文件夹 +mkdir -p ./data/ucf101/videos + +将/root/datasets/ucf101文件夹下的视频文件夹复制到videos下 +cp -r /root/datasets/ucf101/* ./data/ucf101/videos + +python3.7 ./mmaction2/tools/data/build_rawframes.py ./data/ucf101/videos/ ./data/ucf101/rawframes/ --task rgb --level 2 --ext avi --use-opencv + +DATA_DIR_AN="./data/ucf101/annotations" + +wget https://www.crcv.ucf.edu/wp-content/uploads/2019/03/UCF101TrainTestSplits-RecognitionTask.zip --no-check-certificate + +unzip -j UCF101TrainTestSplits-RecognitionTask.zip -d ${DATA_DIR_AN}/ +rm UCF101TrainTestSplits-RecognitionTask.zip + +PYTHONPATH=. python3.7 ./mmaction2/tools/data/build_file_list.py ucf101 data/ucf101/rawframes/ --level 2 --format rawframes --shuffle +``` +- **1.5 获取[msame工具](https://gitee.com/ascend/tools/tree/master/msame)和[benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer)** + +将msame和benchmark.x86_64(或benchmark.aarch64)放到当前目录 + +## 2 离线推理 + +- **310上执行,执行时使npu-smi info查看设备状态,确保device空闲** + +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh arch=x86_64 +``` + +- **评测结果:** + +| 模型 | pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| ------------ | ----------------------- | ----------------------- | ---------- | ---------- | +| R(2+1)D-bs1 | [top1:0.8921 top5:0.9741](https://github.com/open-mmlab/mmaction2) | top1:0.8929 top5:0.9749 | 38.7704fps | 36.1684fps | +| R(2+1)D-bs16 | [top1:0.8921 top5:0.9742](https://github.com/open-mmlab/mmaction2) | top1:0.8929 top5:0.9749| 40.8914fps | 40.0332fps | diff --git a/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/get_info.py b/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/get_info.py index 5af675c6b0ae3f28a6f191d139ebb44e775e6c1c..fc6cdebb5b4417a3651c1e6e9663d8d1299a0ef5 100644 --- a/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/get_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/get_info.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/r2plus1d_r34_8x8x1_180e_ucf101_rgb2.py b/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/r2plus1d_r34_8x8x1_180e_ucf101_rgb2.py index 6da9ed4e07cd3ae0db8fb01d91a3d7bdb8ffe17f..58e521efcc7273e9ecb0adfae5a94a85a12426de 100644 --- a/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/r2plus1d_r34_8x8x1_180e_ucf101_rgb2.py +++ b/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/r2plus1d_r34_8x8x1_180e_ucf101_rgb2.py @@ -1,96 +1,96 @@ -_base_ = [ - '../../_base_/models/r2plus1d_r34.py', - '../../_base_/default_runtime.py' -] - -# dataset settings -dataset_type = 'RawframeDataset' -data_root = 'data/ucf101/rawframes/' -data_root_val = 'data/ucf101/rawframes/' -split = 1 # official train/test splits. valid numbers: 1, 2, 3 -ann_file_val = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' -ann_file_train = f'data/ucf101/ucf101_train_split_{split}_rawframes.txt' -ann_file_test = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) -train_pipeline = [ - dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='RandomResizedCrop'), - dict(type='Resize', scale=(224, 224), keep_ratio=False), - dict(type='Flip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs', 'label']) -] -val_pipeline = [ - dict( - type='SampleFrames', - clip_len=8, - frame_interval=8, - num_clips=1, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -test_pipeline = [ - dict( - type='SampleFrames', - clip_len=8, - frame_interval=8, - num_clips=1, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='ThreeCrop', crop_size=256), - # dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -data = dict( - videos_per_gpu=16, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=ann_file_train, - data_prefix=data_root, - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=val_pipeline), - test=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=test_pipeline)) -# optimizer -optimizer = dict( - type='SGD', lr=0.0025, momentum=0.9, - weight_decay=0.0001) # this lr is used for 8 gpus -optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) -# learning policy -# lr_config = dict(policy='step', steps=[1,2,3], lrs=[1e-3,1e-4,1e-5]) -lr_config = dict(policy='CosineAnnealing', min_lr=0) -total_epochs = 40 -# total_epochs = 90 - -# runtime settings -checkpoint_config = dict(interval=5) -evaluation = dict( - interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) -work_dir = './work_dirs/r2plus1d_r34_8x8x1_180e_ucf101_rgb3/' -find_unused_parameters = False -load_from = 'https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb_20200729-aa94765e.pth' -resume_from = None +_base_ = [ + '../../_base_/models/r2plus1d_r34.py', + '../../_base_/default_runtime.py' +] + +# dataset settings +dataset_type = 'RawframeDataset' +data_root = 'data/ucf101/rawframes/' +data_root_val = 'data/ucf101/rawframes/' +split = 1 # official train/test splits. valid numbers: 1, 2, 3 +ann_file_val = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' +ann_file_train = f'data/ucf101/ucf101_train_split_{split}_rawframes.txt' +ann_file_test = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='RandomResizedCrop'), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=8, + num_clips=1, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=8, + num_clips=1, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + # dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=16, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', lr=0.0025, momentum=0.9, + weight_decay=0.0001) # this lr is used for 8 gpus +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +# lr_config = dict(policy='step', steps=[1,2,3], lrs=[1e-3,1e-4,1e-5]) +lr_config = dict(policy='CosineAnnealing', min_lr=0) +total_epochs = 40 +# total_epochs = 90 + +# runtime settings +checkpoint_config = dict(interval=5) +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) +work_dir = './work_dirs/r2plus1d_r34_8x8x1_180e_ucf101_rgb3/' +find_unused_parameters = False +load_from = 'https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb_20200729-aa94765e.pth' +resume_from = None diff --git a/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/requirements.txt index 2b0bebd47b3f824727bf02c13b513f8d2abc92e6..6255de263372d9093f78840b7ec178d3375fc4b9 100644 --- a/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/R(2+1)D/requirements.txt @@ -1,9 +1,9 @@ -torch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.7.0 -onnx-simplifier == 0.3.6 -numpy == 1.21.1 -Pillow == 8.2.0 -opencv_python == 4.5.3.56 -scipy == 1.7.1 +torch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.7.0 +onnx-simplifier == 0.3.6 +numpy == 1.21.1 +Pillow == 8.2.0 +opencv_python == 4.5.3.56 +scipy == 1.7.1 einops ==0.3.2 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/ReID_for_Pytorch/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/ReID_for_Pytorch/modelzoo_level.txt index 9e95396651cc4382fe60ee1ee053674f527a448c..27e6c78b37535fe4f5a17029546fe257ad164d34 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ReID_for_Pytorch/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/ReID_for_Pytorch/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:POK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/RegNetX-1.6GF/LICENSE b/ACL_PyTorch/contrib/cv/classfication/RegNetX-1.6GF/LICENSE index 753842b6720f7980d411ecf2c78eb4ef220b9df8..f49a4e16e68b128803cc2dcea614603632b04eac 100644 --- a/ACL_PyTorch/contrib/cv/classfication/RegNetX-1.6GF/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/RegNetX-1.6GF/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/RegNetX-1.6GF/get_info.py b/ACL_PyTorch/contrib/cv/classfication/RegNetX-1.6GF/get_info.py index 16bebcfc75fa5903434d2fbcee780e2e7ac4bd84..70e007ac5c49dc1ddc85fcbeb33ba54018f56b06 100644 --- a/ACL_PyTorch/contrib/cv/classfication/RegNetX-1.6GF/get_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/RegNetX-1.6GF/get_info.py @@ -1,62 +1,62 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - print(index,'done') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - print(index,'done') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + print(index,'done') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + print(index,'done') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/RegNetX-1.6GF/vision_metric_ImageNet.py b/ACL_PyTorch/contrib/cv/classfication/RegNetX-1.6GF/vision_metric_ImageNet.py index 362f2484e8288dd3df6fa212678dc9449dbbed29..583340a19f2fc6e99faed85526c906f8bd12d7ba 100644 --- a/ACL_PyTorch/contrib/cv/classfication/RegNetX-1.6GF/vision_metric_ImageNet.py +++ b/ACL_PyTorch/contrib/cv/classfication/RegNetX-1.6GF/vision_metric_ImageNet.py @@ -1,184 +1,184 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - if data == '': - n_label = 0 - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - print("Time used:", elapsed) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + if data == '': + n_label = 0 + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + print("Time used:", elapsed) + diff --git a/ACL_PyTorch/contrib/cv/classfication/RegNetY-1.6GF/LICENSE b/ACL_PyTorch/contrib/cv/classfication/RegNetY-1.6GF/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/RegNetY-1.6GF/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/RegNetY-1.6GF/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/RegNetY-1.6GF/get_info.py b/ACL_PyTorch/contrib/cv/classfication/RegNetY-1.6GF/get_info.py index 16bebcfc75fa5903434d2fbcee780e2e7ac4bd84..70e007ac5c49dc1ddc85fcbeb33ba54018f56b06 100644 --- a/ACL_PyTorch/contrib/cv/classfication/RegNetY-1.6GF/get_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/RegNetY-1.6GF/get_info.py @@ -1,62 +1,62 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - print(index,'done') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - print(index,'done') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + print(index,'done') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + print(index,'done') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/RegNetY-1.6GF/vision_metric_ImageNet.py b/ACL_PyTorch/contrib/cv/classfication/RegNetY-1.6GF/vision_metric_ImageNet.py index 362f2484e8288dd3df6fa212678dc9449dbbed29..583340a19f2fc6e99faed85526c906f8bd12d7ba 100644 --- a/ACL_PyTorch/contrib/cv/classfication/RegNetY-1.6GF/vision_metric_ImageNet.py +++ b/ACL_PyTorch/contrib/cv/classfication/RegNetY-1.6GF/vision_metric_ImageNet.py @@ -1,184 +1,184 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - if data == '': - n_label = 0 - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - print("Time used:", elapsed) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + if data == '': + n_label = 0 + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + print("Time used:", elapsed) + diff --git a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/LICENSE b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/README.md b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/README.md index b7ed696a9143cf7790906d90f02f23ba5fe9765e..a35e5a7aeb75293b074f466252728b56c0235b8a 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/README.md @@ -1,246 +1,246 @@ -# Res2Net101_v1b Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) - - [6.2 开源TopN精度](#62-开源TopN精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[Res2Net101_v1b论文](https://arxiv.org/pdf/1904.01169.pdf) - -### 1.2 代码地址 -[Res2Net101_v1b代码](https://github.com/Res2Net/Res2Net-PretrainedModels) -branch:master -commit_id:7ed111407a22723672eac575b300adc04e75e925 - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -CANN 5.0.1 - -torch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.9.0 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.19.2 -Pillow == 8.2.0 -opencv-python == 4.5.2.52 -``` - -**说明:** -> X86架构:pytorch和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 - -1.下载pth权重文件 -[Res2Net101_v1b预训练pth权重文件](https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net101_v1b_26w_4s-0812c246.pth) -``` -wget https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net101_v1b_26w_4s-0812c246.pth -``` -文件MD5sum:ebf7af4c138fcf25db859705907af833 - -2.Res2Net101_v1b模型代码获取方式如下 -``` -git clone https://github.com/Res2Net/Res2Net-PretrainedModels.git -``` -3.编写pth2onnx脚本res2net101_v1b_pth2onnx.py - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - -4.执行pth2onnx脚本,生成onnx模型文件 -``` -python3.7 res2net101_v1b_pth2onnx.py res2net101_v1b_26w_4s-0812c246.pth res2net101_v1b.onnx -``` - - **模型转换要点:** ->此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 - -### 3.2 onnx转om模型 - -1.设置环境变量 -``` -source env.sh -``` -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) -``` -atc --framework=5 --model=./res2net101_v1b.onnx --output=res2net101_v1b_bs1 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 - -``` - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/opt/npu/imagenet/val与/opt/npu/imagenet/val_label.txt。 - -### 4.2 数据集预处理 -1.预处理脚本imagenet_torch_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 -``` -python3.7 imagenet_torch_preprocess.py res2net101 /opt/npu/imagenet/val ./prep_dataset -``` -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本gen_dataset_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 -``` -python3.7 gen_dataset_info.py bin ./prep_dataset ./res2net101_v1b_prep_bin.info 224 224 -``` -第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) -### 5.2 离线推理 -1.设置环境变量 -``` -source env.sh -``` -2.执行离线推理 -``` -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=res2net101_v1b_bs1.om -input_text_path=./res2net101_v1b_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False -``` -输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 - -## 6 精度对比 - -- **[离线推理TopN精度](#61-离线推理TopN精度)** -- **[开源TopN精度](#62-开源TopN精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理TopN精度统计 - -后处理统计TopN精度 - -调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 -``` -python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /opt/npu/imagenet/val_label.txt ./ result.json -``` -第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 -查看输出结果: -``` -{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "81.22%"}, {"key": "Top2 accuracy", "value": "90.21%"}, {"key": "Top3 accuracy", "value": "93.1%"}, {"key": "Top4 accuracy", "value": "94.44%"}, {"key": "Top5 accuracy", "value": "95.36%"}]} -``` -经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 - -### 6.2 开源TopN精度 -[论文代码仓官方精度](https://mmcheng.net/res2net/) -``` -Model Acc@1 Acc@5 -Res2Net101_v1b 81.23 95.36 -``` -### 6.3 精度对比 -将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - **精度调试:** ->没有遇到精度不达标的问题,故不需要进行精度调试 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** - -### 7.1 npu性能数据 -1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: -``` -[e2e] throughputRate: 55.9084, latency: 894320 -[data read] throughputRate: 59.1894, moduleLatency: 16.8949 -[preprocess] throughputRate: 59.0597, moduleLatency: 16.932 -[infer] throughputRate: 56.0004, Interface throughputRate: 62.9455, moduleLatency: 17.2581 -[post] throughputRate: 56.0004, moduleLatency: 17.857 -``` -Interface throughputRate: 62.9455,62.9455x4=251.782既是batch1 310单卡吞吐率 -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: -``` -[e2e] throughputRate: 71.4937, latency: 699363 -[data read] throughputRate: 75.8049, moduleLatency: 13.1918 -[preprocess] throughputRate: 75.5936, moduleLatency: 13.2286 -[infer] throughputRate: 71.6788, Interface throughputRate: 86.1957, moduleLatency: 13.0185 -[post] throughputRate: 4.47989, moduleLatency: 223.22 -``` -Interface throughputRate: 86.1957,86.1957x4=344.7828既是batch16 310单卡吞吐率 -batch4性能: -``` -[e2e] throughputRate: 64.92, latency: 770179 -[data read] throughputRate: 68.6591, moduleLatency: 14.5647 -[preprocess] throughputRate: 68.5431, moduleLatency: 14.5894 -[infer] throughputRate: 65.0303, Interface throughputRate: 78.2596, moduleLatency: 14.2895 -[post] throughputRate: 16.2575, moduleLatency: 61.51 -``` -batch4 310单卡吞吐率:78.2596x4=313.0384fps -batch8性能: -``` -[e2e] throughputRate: 69.3296, latency: 721193 -[data read] throughputRate: 73.486, moduleLatency: 13.608 -[preprocess] throughputRate: 73.2601, moduleLatency: 13.65 -[infer] throughputRate: 69.5028, Interface throughputRate: 82.7469, moduleLatency: 13.5299 -[post] throughputRate: 8.68781, moduleLatency: 115.104 -``` -batch8 310单卡吞吐率:82.7469x4=330.9876fps -batch32性能: -``` -[e2e] throughputRate: 70.3878, latency: 710350 -[data read] throughputRate: 74.4979, moduleLatency: 13.4232 -[preprocess] throughputRate: 74.3318, moduleLatency: 13.4532 -[infer] throughputRate: 70.5551, Interface throughputRate: 86.8456, moduleLatency: 12.9157 -[post] throughputRate: 2.20553, moduleLatency: 453.405 -``` -batch32 310单卡吞吐率:86.8456x4=347.3824fps - - **性能优化:** -从profiling性能数据op_statistic_0_1.csv看出,耗时最多的算子主要是TransData,Conv2D与ConcatD,而Conv2D算子不存在性能问题。 -由于格式转换om模型Conv2D前后需要有TransData算子,从op_summary_0_1.csv看出,单个TransData算子aicore耗时不大。 +# Res2Net101_v1b Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) + - [6.2 开源TopN精度](#62-开源TopN精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[Res2Net101_v1b论文](https://arxiv.org/pdf/1904.01169.pdf) + +### 1.2 代码地址 +[Res2Net101_v1b代码](https://github.com/Res2Net/Res2Net-PretrainedModels) +branch:master +commit_id:7ed111407a22723672eac575b300adc04e75e925 + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +CANN 5.0.1 + +torch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.9.0 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.19.2 +Pillow == 8.2.0 +opencv-python == 4.5.2.52 +``` + +**说明:** +> X86架构:pytorch和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 + +1.下载pth权重文件 +[Res2Net101_v1b预训练pth权重文件](https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net101_v1b_26w_4s-0812c246.pth) +``` +wget https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net101_v1b_26w_4s-0812c246.pth +``` +文件MD5sum:ebf7af4c138fcf25db859705907af833 + +2.Res2Net101_v1b模型代码获取方式如下 +``` +git clone https://github.com/Res2Net/Res2Net-PretrainedModels.git +``` +3.编写pth2onnx脚本res2net101_v1b_pth2onnx.py + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + +4.执行pth2onnx脚本,生成onnx模型文件 +``` +python3.7 res2net101_v1b_pth2onnx.py res2net101_v1b_26w_4s-0812c246.pth res2net101_v1b.onnx +``` + + **模型转换要点:** +>此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 + +### 3.2 onnx转om模型 + +1.设置环境变量 +``` +source env.sh +``` +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) +``` +atc --framework=5 --model=./res2net101_v1b.onnx --output=res2net101_v1b_bs1 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 + +``` + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/opt/npu/imagenet/val与/opt/npu/imagenet/val_label.txt。 + +### 4.2 数据集预处理 +1.预处理脚本imagenet_torch_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 +``` +python3.7 imagenet_torch_preprocess.py res2net101 /opt/npu/imagenet/val ./prep_dataset +``` +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本gen_dataset_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 +``` +python3.7 gen_dataset_info.py bin ./prep_dataset ./res2net101_v1b_prep_bin.info 224 224 +``` +第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) +### 5.2 离线推理 +1.设置环境变量 +``` +source env.sh +``` +2.执行离线推理 +``` +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=res2net101_v1b_bs1.om -input_text_path=./res2net101_v1b_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False +``` +输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 + +## 6 精度对比 + +- **[离线推理TopN精度](#61-离线推理TopN精度)** +- **[开源TopN精度](#62-开源TopN精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理TopN精度统计 + +后处理统计TopN精度 + +调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 +``` +python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /opt/npu/imagenet/val_label.txt ./ result.json +``` +第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 +查看输出结果: +``` +{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "81.22%"}, {"key": "Top2 accuracy", "value": "90.21%"}, {"key": "Top3 accuracy", "value": "93.1%"}, {"key": "Top4 accuracy", "value": "94.44%"}, {"key": "Top5 accuracy", "value": "95.36%"}]} +``` +经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 + +### 6.2 开源TopN精度 +[论文代码仓官方精度](https://mmcheng.net/res2net/) +``` +Model Acc@1 Acc@5 +Res2Net101_v1b 81.23 95.36 +``` +### 6.3 精度对比 +将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + **精度调试:** +>没有遇到精度不达标的问题,故不需要进行精度调试 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** + +### 7.1 npu性能数据 +1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: +``` +[e2e] throughputRate: 55.9084, latency: 894320 +[data read] throughputRate: 59.1894, moduleLatency: 16.8949 +[preprocess] throughputRate: 59.0597, moduleLatency: 16.932 +[infer] throughputRate: 56.0004, Interface throughputRate: 62.9455, moduleLatency: 17.2581 +[post] throughputRate: 56.0004, moduleLatency: 17.857 +``` +Interface throughputRate: 62.9455,62.9455x4=251.782既是batch1 310单卡吞吐率 +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: +``` +[e2e] throughputRate: 71.4937, latency: 699363 +[data read] throughputRate: 75.8049, moduleLatency: 13.1918 +[preprocess] throughputRate: 75.5936, moduleLatency: 13.2286 +[infer] throughputRate: 71.6788, Interface throughputRate: 86.1957, moduleLatency: 13.0185 +[post] throughputRate: 4.47989, moduleLatency: 223.22 +``` +Interface throughputRate: 86.1957,86.1957x4=344.7828既是batch16 310单卡吞吐率 +batch4性能: +``` +[e2e] throughputRate: 64.92, latency: 770179 +[data read] throughputRate: 68.6591, moduleLatency: 14.5647 +[preprocess] throughputRate: 68.5431, moduleLatency: 14.5894 +[infer] throughputRate: 65.0303, Interface throughputRate: 78.2596, moduleLatency: 14.2895 +[post] throughputRate: 16.2575, moduleLatency: 61.51 +``` +batch4 310单卡吞吐率:78.2596x4=313.0384fps +batch8性能: +``` +[e2e] throughputRate: 69.3296, latency: 721193 +[data read] throughputRate: 73.486, moduleLatency: 13.608 +[preprocess] throughputRate: 73.2601, moduleLatency: 13.65 +[infer] throughputRate: 69.5028, Interface throughputRate: 82.7469, moduleLatency: 13.5299 +[post] throughputRate: 8.68781, moduleLatency: 115.104 +``` +batch8 310单卡吞吐率:82.7469x4=330.9876fps +batch32性能: +``` +[e2e] throughputRate: 70.3878, latency: 710350 +[data read] throughputRate: 74.4979, moduleLatency: 13.4232 +[preprocess] throughputRate: 74.3318, moduleLatency: 13.4532 +[infer] throughputRate: 70.5551, Interface throughputRate: 86.8456, moduleLatency: 12.9157 +[post] throughputRate: 2.20553, moduleLatency: 453.405 +``` +batch32 310单卡吞吐率:86.8456x4=347.3824fps + + **性能优化:** +从profiling性能数据op_statistic_0_1.csv看出,耗时最多的算子主要是TransData,Conv2D与ConcatD,而Conv2D算子不存在性能问题。 +由于格式转换om模型Conv2D前后需要有TransData算子,从op_summary_0_1.csv看出,单个TransData算子aicore耗时不大。 如果优化就需要优化掉过多的TransData算子。 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/gen_dataset_info.py index 61450b4410663ae5e66ec29ed296ff6584203e31..5381839f653a885666e3fc456db9a1c22b8583a1 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/gen_dataset_info.py @@ -1,61 +1,61 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) + diff --git a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/imagenet_acc_eval.py b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/imagenet_acc_eval.py index 362f2484e8288dd3df6fa212678dc9449dbbed29..583340a19f2fc6e99faed85526c906f8bd12d7ba 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/imagenet_acc_eval.py +++ b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/imagenet_acc_eval.py @@ -1,184 +1,184 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - if data == '': - n_label = 0 - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - print("Time used:", elapsed) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + if data == '': + n_label = 0 + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + print("Time used:", elapsed) + diff --git a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/imagenet_torch_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/imagenet_torch_preprocess.py index 987f8cc776c874f60d629172b52625b22d5a39fc..999675ae55f6b71b54f7384d954bebd818dafd3c 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/imagenet_torch_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/imagenet_torch_preprocess.py @@ -1,117 +1,117 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from PIL import Image -import numpy as np -import multiprocessing - - -model_config = { - 'res2net101': { - 'resize': 256, - 'centercrop': 224, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv3': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv4': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.5, 0.5, 0.5], - 'std': [0.5, 0.5, 0.5], - }, -} - - -def center_crop(img, output_size): - if isinstance(output_size, int): - output_size = (int(output_size), int(output_size)) - image_width, image_height = img.size - crop_height, crop_width = output_size - crop_top = int(round((image_height - crop_height) / 2.)) - crop_left = int(round((image_width - crop_width) / 2.)) - return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) - - -def resize(img, size, interpolation=Image.BILINEAR): - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def gen_input_bin(mode_type, file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - # RGBA to RGB - image = Image.open(os.path.join(src_path, file)).convert('RGB') - image = resize(image, model_config[mode_type]['resize']) # Resize - image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop - img = np.array(image, dtype=np.float32) - img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW - img = img / 255. # ToTensor: div 255 - img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean - img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - - -def preprocess(mode_type, src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") - mode_type = sys.argv[1] - src_path = sys.argv[2] - save_path = sys.argv[3] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - if mode_type not in model_config: - model_type_help = "model type: " - for key in model_config.keys(): - model_type_help += key - model_type_help += ' ' - raise Exception(model_type_help) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - preprocess(mode_type, src_path, save_path) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from PIL import Image +import numpy as np +import multiprocessing + + +model_config = { + 'res2net101': { + 'resize': 256, + 'centercrop': 224, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv3': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv4': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + }, +} + + +def center_crop(img, output_size): + if isinstance(output_size, int): + output_size = (int(output_size), int(output_size)) + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) + + +def resize(img, size, interpolation=Image.BILINEAR): + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def gen_input_bin(mode_type, file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + # RGBA to RGB + image = Image.open(os.path.join(src_path, file)).convert('RGB') + image = resize(image, model_config[mode_type]['resize']) # Resize + image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop + img = np.array(image, dtype=np.float32) + img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW + img = img / 255. # ToTensor: div 255 + img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean + img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + + +def preprocess(mode_type, src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") + mode_type = sys.argv[1] + src_path = sys.argv[2] + save_path = sys.argv[3] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + if mode_type not in model_config: + model_type_help = "model type: " + for key in model_config.keys(): + model_type_help += key + model_type_help += ' ' + raise Exception(model_type_help) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + preprocess(mode_type, src_path, save_path) + diff --git a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/requirements.txt index bebc253c759bfdba913bc30de29b6a2de399ccaf..8bccb088d46196b08cfb04ff1c797c95df536a6f 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.9.0 -numpy == 1.19.2 -Pillow == 8.2.0 +torch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.9.0 +numpy == 1.19.2 +Pillow == 8.2.0 opencv-python == 4.5.2.52 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/res2net101_v1b_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/res2net101_v1b_pth2onnx.py index 6d2f9c925a19b619c54c5aed48ced6690a0bb48a..87d0ed15f37344bbf9e7f849e21cf03ca70b82b8 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/res2net101_v1b_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/res2net101_v1b_pth2onnx.py @@ -1,36 +1,36 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -import torch.onnx -sys.path.append(r"./Res2Net-PretrainedModels") -from res2net_v1b import res2net101_v1b - -def pth2onnx(input_file, output_file): - model = res2net101_v1b(pretrained=False) - checkpoint = torch.load(input_file, map_location=torch.device('cpu')) - model.load_state_dict(checkpoint) - - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) - -if __name__ == "__main__": - input_file = sys.argv[1] - output_file = sys.argv[2] - pth2onnx(input_file, output_file) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import torch.onnx +sys.path.append(r"./Res2Net-PretrainedModels") +from res2net_v1b import res2net101_v1b + +def pth2onnx(input_file, output_file): + model = res2net101_v1b(pretrained=False) + checkpoint = torch.load(input_file, map_location=torch.device('cpu')) + model.load_state_dict(checkpoint) + + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) + +if __name__ == "__main__": + input_file = sys.argv[1] + output_file = sys.argv[2] + pth2onnx(input_file, output_file) diff --git a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/test/README.md b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/test/README.md index 7e5f518ae25b2f972581ef84e2224ec1730c9a1e..027331b15d770f7ab84202a23f584142c7b1f8ca 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/test/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b/test/README.md @@ -1,24 +1,24 @@ -环境准备: - -1.数据集路径 -数据集统一放在/root/datasets/或/opt/npu/ -本模型数据集放在/opt/npu/imagenet - -2.进入工作目录 -cd Res2Net101_v1b - -3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -pip3.7 install -r requirements.txt - -4.获取模型代码 -git clone https://github.com/Res2Net/Res2Net-PretrainedModels.git - -5.获取权重文件 -wget https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net101_v1b_26w_4s-0812c246.pth - -6.获取benchmark工具 -将benchmark.x86_64放在当前目录 - -7.310上执行,执行时确保device空闲 -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/opt/npu/imagenet +环境准备: + +1.数据集路径 +数据集统一放在/root/datasets/或/opt/npu/ +本模型数据集放在/opt/npu/imagenet + +2.进入工作目录 +cd Res2Net101_v1b + +3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +pip3.7 install -r requirements.txt + +4.获取模型代码 +git clone https://github.com/Res2Net/Res2Net-PretrainedModels.git + +5.获取权重文件 +wget https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net101_v1b_26w_4s-0812c246.pth + +6.获取benchmark工具 +将benchmark.x86_64放在当前目录 + +7.310上执行,执行时确保device空闲 +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/opt/npu/imagenet diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet101/LICENSE b/ACL_PyTorch/contrib/cv/classfication/ResNet101/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet101/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet101/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet101/README.md b/ACL_PyTorch/contrib/cv/classfication/ResNet101/README.md index 864fb944d595b7f4aeee2a2628270a24eb0a2e67..644b8a73255013bdb5ab124ebdc515687cb1f19e 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet101/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet101/README.md @@ -1,250 +1,250 @@ -# ResNet101 Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) - - [6.2 开源TopN精度](#62-开源TopN精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[ResNet101论文](https://arxiv.org/pdf/1512.03385.pdf) - -### 1.2 代码地址 -[ResNet101代码](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) -branch:master -commit_id:7d955df73fe0e9b47f7d6c77c699324b256fc41f - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -CANN 5.0.1 - -torch == 1.5.1 -torchvision == 0.6.1 -onnx == 1.9.0 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.19.2 -Pillow == 8.2.0 -opencv-python == 4.5.2 -``` - -**说明:** -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 - -1.下载pth权重文件 -[ResNet-101预训练pth权重文件](https://download.pytorch.org/models/resnet101-63fe2227.pth) -``` -wget https://download.pytorch.org/models/resnet101-63fe2227.pth -``` -文件MD5sum:b258f8e54abb7de9c960ff19cc662d76 - -2.ResNet101模型代码在torchvision里,安装torchvision,arm下需源码安装,参考torchvision官网,若安装过程报错请百度解决 -``` -git clone https://github.com/pytorch/vision -cd vision -python3.7 setup.py install -cd .. -``` -3.编写pth2onnx脚本resnet101_pth2onnx.py - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - -4.执行pth2onnx脚本,生成onnx模型文件 -``` -python3.7 resnet101_pth2onnx.py ./resnet101-63fe2227.pth resnet101.onnx -``` - - **模型转换要点:** ->此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 - -### 3.2 onnx转om模型 - -1.设置环境变量 -``` -source env.sh -``` -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) -``` -atc --framework=5 --model=./resnet101.onnx --output=resnet101_bs1 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 - -``` - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/root/datasets/imagenet/val与/root/datasets/imagenet/val_label.txt。 - -### 4.2 数据集预处理 -1.预处理脚本imagenet_torch_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 -``` -python3.7 imagenet_torch_preprocess.py resnet /root/datasets/imagenet/val ./prep_dataset -``` -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本gen_dataset_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 -``` -python3.7 gen_dataset_info.py bin ./prep_dataset ./resnet101_prep_bin.info 224 224 -``` -第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) -### 5.2 离线推理 -1.设置环境变量 -``` -source env.sh -``` -2.执行离线推理 -``` -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=resnet101_bs1.om -input_text_path=./resnet101_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False -``` -输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 - -## 6 精度对比 - -- **[离线推理TopN精度](#61-离线推理TopN精度)** -- **[开源TopN精度](#62-开源TopN精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理TopN精度统计 - -后处理统计TopN精度 - -调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 -``` -python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /root/datasets/imagenet/val_label.txt ./ result.json -``` -第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 -查看输出结果: -``` -{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "77.37%"}, {"key": "Top2 accuracy", "value": "87.1%"}, {"key": "Top3 accuracy", "value": "90.61%"}, {"key": "Top4 accuracy", "value": "92.42%"}, {"key": "Top5 accuracy", "value": "93.54%"}]} -``` -经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 - -### 6.2 开源TopN精度 -[torchvision官网精度](https://pytorch.org/vision/stable/models.html) -``` -Model Acc@1 Acc@5 -ResNet-101 77.374 93.546 -``` -### 6.3 精度对比 -将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - **精度调试:** ->没有遇到精度不达标的问题,故不需要进行精度调试 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** - -### 7.1 npu性能数据 -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 -1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: -``` -[e2e] throughputRate: 170.628, latency: 293035 -[data read] throughputRate: 181.571, moduleLatency: 5.50749 -[preprocess] throughputRate: 180.466, moduleLatency: 5.5412 -[infer] throughputRate: 171.595, Interface throughputRate: 247.898, moduleLatency: 5.12562 -[post] throughputRate: 171.595, moduleLatency: 5.82768 -``` -Interface throughputRate: 247.898,247.898x4=991.592既是batch1 310单卡吞吐率 -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: -``` -[e2e] throughputRate: 185.903, latency: 268957 -[data read] throughputRate: 191.266, moduleLatency: 5.22833 -[preprocess] throughputRate: 190.761, moduleLatency: 5.24217 -[infer] throughputRate: 187.131, Interface throughputRate: 401.046, moduleLatency: 3.94051 -[post] throughputRate: 11.6954, moduleLatency: 85.5035 -``` -Interface throughputRate: 401.046,401.046x4=1604.184既是batch16 310单卡吞吐率 -batch4的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_4_device_0.txt: -``` -[e2e] throughputRate: 184.444, latency: 271085 -[data read] throughputRate: 196.412, moduleLatency: 5.09134 -[preprocess] throughputRate: 195.837, moduleLatency: 5.1063 -[infer] throughputRate: 185.624, Interface throughputRate: 331.096, moduleLatency: 4.52436 -[post] throughputRate: 46.4056, moduleLatency: 21.5491 -``` -Interface throughputRate: 331.096,331.096x4=1324.384既是batch4 310单卡吞吐率 -batch8的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_8_device_0.txt: -``` -[e2e] throughputRate: 196.051, latency: 255036 -[data read] throughputRate: 209.29, moduleLatency: 4.77806 -[preprocess] throughputRate: 207.914, moduleLatency: 4.80969 -[infer] throughputRate: 197.513, Interface throughputRate: 371.905, moduleLatency: 4.15513 -[post] throughputRate: 24.6888, moduleLatency: 40.5042 -``` -Interface throughputRate: 371.905,371.905x4=1487.62既是batch8 310单卡吞吐率 -batch32的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_32_device_0.txt: -``` -[e2e] throughputRate: 176.215, latency: 283744 -[data read] throughputRate: 187.024, moduleLatency: 5.34691 -[preprocess] throughputRate: 186.183, moduleLatency: 5.37105 -[infer] throughputRate: 177.675, Interface throughputRate: 370.456, moduleLatency: 4.14361 -[post] throughputRate: 5.55402, moduleLatency: 180.05 - -``` -Interface throughputRate: 370.456,370.456x4=1481.82既是batch32 310单卡吞吐率 - - **性能优化:** ->没有遇到性能不达标的问题,故不需要进行性能优化 - +# ResNet101 Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) + - [6.2 开源TopN精度](#62-开源TopN精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[ResNet101论文](https://arxiv.org/pdf/1512.03385.pdf) + +### 1.2 代码地址 +[ResNet101代码](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) +branch:master +commit_id:7d955df73fe0e9b47f7d6c77c699324b256fc41f + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +CANN 5.0.1 + +torch == 1.5.1 +torchvision == 0.6.1 +onnx == 1.9.0 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.19.2 +Pillow == 8.2.0 +opencv-python == 4.5.2 +``` + +**说明:** +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 + +1.下载pth权重文件 +[ResNet-101预训练pth权重文件](https://download.pytorch.org/models/resnet101-63fe2227.pth) +``` +wget https://download.pytorch.org/models/resnet101-63fe2227.pth +``` +文件MD5sum:b258f8e54abb7de9c960ff19cc662d76 + +2.ResNet101模型代码在torchvision里,安装torchvision,arm下需源码安装,参考torchvision官网,若安装过程报错请百度解决 +``` +git clone https://github.com/pytorch/vision +cd vision +python3.7 setup.py install +cd .. +``` +3.编写pth2onnx脚本resnet101_pth2onnx.py + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + +4.执行pth2onnx脚本,生成onnx模型文件 +``` +python3.7 resnet101_pth2onnx.py ./resnet101-63fe2227.pth resnet101.onnx +``` + + **模型转换要点:** +>此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 + +### 3.2 onnx转om模型 + +1.设置环境变量 +``` +source env.sh +``` +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) +``` +atc --framework=5 --model=./resnet101.onnx --output=resnet101_bs1 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 + +``` + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/root/datasets/imagenet/val与/root/datasets/imagenet/val_label.txt。 + +### 4.2 数据集预处理 +1.预处理脚本imagenet_torch_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 +``` +python3.7 imagenet_torch_preprocess.py resnet /root/datasets/imagenet/val ./prep_dataset +``` +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本gen_dataset_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 +``` +python3.7 gen_dataset_info.py bin ./prep_dataset ./resnet101_prep_bin.info 224 224 +``` +第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) +### 5.2 离线推理 +1.设置环境变量 +``` +source env.sh +``` +2.执行离线推理 +``` +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=resnet101_bs1.om -input_text_path=./resnet101_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False +``` +输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 + +## 6 精度对比 + +- **[离线推理TopN精度](#61-离线推理TopN精度)** +- **[开源TopN精度](#62-开源TopN精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理TopN精度统计 + +后处理统计TopN精度 + +调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 +``` +python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /root/datasets/imagenet/val_label.txt ./ result.json +``` +第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 +查看输出结果: +``` +{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "77.37%"}, {"key": "Top2 accuracy", "value": "87.1%"}, {"key": "Top3 accuracy", "value": "90.61%"}, {"key": "Top4 accuracy", "value": "92.42%"}, {"key": "Top5 accuracy", "value": "93.54%"}]} +``` +经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 + +### 6.2 开源TopN精度 +[torchvision官网精度](https://pytorch.org/vision/stable/models.html) +``` +Model Acc@1 Acc@5 +ResNet-101 77.374 93.546 +``` +### 6.3 精度对比 +将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + **精度调试:** +>没有遇到精度不达标的问题,故不需要进行精度调试 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** + +### 7.1 npu性能数据 +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 +1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: +``` +[e2e] throughputRate: 170.628, latency: 293035 +[data read] throughputRate: 181.571, moduleLatency: 5.50749 +[preprocess] throughputRate: 180.466, moduleLatency: 5.5412 +[infer] throughputRate: 171.595, Interface throughputRate: 247.898, moduleLatency: 5.12562 +[post] throughputRate: 171.595, moduleLatency: 5.82768 +``` +Interface throughputRate: 247.898,247.898x4=991.592既是batch1 310单卡吞吐率 +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: +``` +[e2e] throughputRate: 185.903, latency: 268957 +[data read] throughputRate: 191.266, moduleLatency: 5.22833 +[preprocess] throughputRate: 190.761, moduleLatency: 5.24217 +[infer] throughputRate: 187.131, Interface throughputRate: 401.046, moduleLatency: 3.94051 +[post] throughputRate: 11.6954, moduleLatency: 85.5035 +``` +Interface throughputRate: 401.046,401.046x4=1604.184既是batch16 310单卡吞吐率 +batch4的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_4_device_0.txt: +``` +[e2e] throughputRate: 184.444, latency: 271085 +[data read] throughputRate: 196.412, moduleLatency: 5.09134 +[preprocess] throughputRate: 195.837, moduleLatency: 5.1063 +[infer] throughputRate: 185.624, Interface throughputRate: 331.096, moduleLatency: 4.52436 +[post] throughputRate: 46.4056, moduleLatency: 21.5491 +``` +Interface throughputRate: 331.096,331.096x4=1324.384既是batch4 310单卡吞吐率 +batch8的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_8_device_0.txt: +``` +[e2e] throughputRate: 196.051, latency: 255036 +[data read] throughputRate: 209.29, moduleLatency: 4.77806 +[preprocess] throughputRate: 207.914, moduleLatency: 4.80969 +[infer] throughputRate: 197.513, Interface throughputRate: 371.905, moduleLatency: 4.15513 +[post] throughputRate: 24.6888, moduleLatency: 40.5042 +``` +Interface throughputRate: 371.905,371.905x4=1487.62既是batch8 310单卡吞吐率 +batch32的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_32_device_0.txt: +``` +[e2e] throughputRate: 176.215, latency: 283744 +[data read] throughputRate: 187.024, moduleLatency: 5.34691 +[preprocess] throughputRate: 186.183, moduleLatency: 5.37105 +[infer] throughputRate: 177.675, Interface throughputRate: 370.456, moduleLatency: 4.14361 +[post] throughputRate: 5.55402, moduleLatency: 180.05 + +``` +Interface throughputRate: 370.456,370.456x4=1481.82既是batch32 310单卡吞吐率 + + **性能优化:** +>没有遇到性能不达标的问题,故不需要进行性能优化 + diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet101/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/classfication/ResNet101/gen_dataset_info.py index 61450b4410663ae5e66ec29ed296ff6584203e31..5381839f653a885666e3fc456db9a1c22b8583a1 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet101/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet101/gen_dataset_info.py @@ -1,61 +1,61 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) + diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet101/imagenet_acc_eval.py b/ACL_PyTorch/contrib/cv/classfication/ResNet101/imagenet_acc_eval.py index 362f2484e8288dd3df6fa212678dc9449dbbed29..583340a19f2fc6e99faed85526c906f8bd12d7ba 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet101/imagenet_acc_eval.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet101/imagenet_acc_eval.py @@ -1,184 +1,184 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - if data == '': - n_label = 0 - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - print("Time used:", elapsed) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + if data == '': + n_label = 0 + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + print("Time used:", elapsed) + diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet101/imagenet_torch_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/ResNet101/imagenet_torch_preprocess.py index 1ab60b54dd7fd5b59ca733666c1dc63e07c980c1..a99dd271d0df5f9b21aa9c7da3fe7edb491a27e6 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet101/imagenet_torch_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet101/imagenet_torch_preprocess.py @@ -1,117 +1,117 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from PIL import Image -import numpy as np -import multiprocessing - - -model_config = { - 'resnet': { - 'resize': 256, - 'centercrop': 224, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv3': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv4': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.5, 0.5, 0.5], - 'std': [0.5, 0.5, 0.5], - }, -} - - -def center_crop(img, output_size): - if isinstance(output_size, int): - output_size = (int(output_size), int(output_size)) - image_width, image_height = img.size - crop_height, crop_width = output_size - crop_top = int(round((image_height - crop_height) / 2.)) - crop_left = int(round((image_width - crop_width) / 2.)) - return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) - - -def resize(img, size, interpolation=Image.BILINEAR): - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def gen_input_bin(mode_type, file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - # RGBA to RGB - image = Image.open(os.path.join(src_path, file)).convert('RGB') - image = resize(image, model_config[mode_type]['resize']) # Resize - image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop - img = np.array(image, dtype=np.float32) - img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW - img = img / 255. # ToTensor: div 255 - img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean - img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - - -def preprocess(mode_type, src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") - mode_type = sys.argv[1] - src_path = sys.argv[2] - save_path = sys.argv[3] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - if mode_type not in model_config: - model_type_help = "model type: " - for key in model_config.keys(): - model_type_help += key - model_type_help += ' ' - raise Exception(model_type_help) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - preprocess(mode_type, src_path, save_path) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from PIL import Image +import numpy as np +import multiprocessing + + +model_config = { + 'resnet': { + 'resize': 256, + 'centercrop': 224, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv3': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv4': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + }, +} + + +def center_crop(img, output_size): + if isinstance(output_size, int): + output_size = (int(output_size), int(output_size)) + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) + + +def resize(img, size, interpolation=Image.BILINEAR): + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def gen_input_bin(mode_type, file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + # RGBA to RGB + image = Image.open(os.path.join(src_path, file)).convert('RGB') + image = resize(image, model_config[mode_type]['resize']) # Resize + image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop + img = np.array(image, dtype=np.float32) + img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW + img = img / 255. # ToTensor: div 255 + img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean + img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + + +def preprocess(mode_type, src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") + mode_type = sys.argv[1] + src_path = sys.argv[2] + save_path = sys.argv[3] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + if mode_type not in model_config: + model_type_help = "model type: " + for key in model_config.keys(): + model_type_help += key + model_type_help += ' ' + raise Exception(model_type_help) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + preprocess(mode_type, src_path, save_path) + diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet101/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/ResNet101/requirements.txt index 2fc4e802c476feda2a9866a85630f7f3b29428d7..d072d9aa6f2e7a7b0044ff93d036c3c0347ee5c9 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet101/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet101/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.5.1 -torchvision == 0.6.1 -onnx == 1.9.0 -numpy == 1.19.2 -Pillow == 8.2.0 +torch == 1.5.1 +torchvision == 0.6.1 +onnx == 1.9.0 +numpy == 1.19.2 +Pillow == 8.2.0 opencv-python == 4.5.2 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet101/resnet101_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/ResNet101/resnet101_pth2onnx.py index 46ab195411a21bf39c1d67d864a72ef0e7f9310f..8eff59a68086db8153e345f4956f018710ffaf7c 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet101/resnet101_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet101/resnet101_pth2onnx.py @@ -1,35 +1,35 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -import torch.onnx -import torchvision.models as models - -def pth2onnx(input_file, output_file): - model = models.resnet101(pretrained=False) - checkpoint = torch.load(input_file, map_location=None) - model.load_state_dict(checkpoint) - - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) - -if __name__ == "__main__": - input_file = sys.argv[1] - output_file = sys.argv[2] - pth2onnx(input_file, output_file) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import torch.onnx +import torchvision.models as models + +def pth2onnx(input_file, output_file): + model = models.resnet101(pretrained=False) + checkpoint = torch.load(input_file, map_location=None) + model.load_state_dict(checkpoint) + + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) + +if __name__ == "__main__": + input_file = sys.argv[1] + output_file = sys.argv[2] + pth2onnx(input_file, output_file) diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet101/test/README.md b/ACL_PyTorch/contrib/cv/classfication/ResNet101/test/README.md index 1c1ba3c42f85af564f8149e303ceb33763593939..d7ffd9773a50e6d2f86f52b2145e94f64b42012f 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet101/test/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet101/test/README.md @@ -1,29 +1,29 @@ -环境准备: - -1.数据集路径 -数据集统一放在/root/datasets/或/opt/npu/ -本模型数据集放在/root/datasets/ - -2.进入工作目录 -cd ResNet101 - -3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -pip3.7 install -r requirements.txt - -4.获取模型代码 -git clone https://github.com/pytorch/vision - -5.如果模型代码需要安装,则安装模型代码 -cd vision -python3.7 setup.py install -cd .. - -6.获取权重文件 -wget https://download.pytorch.org/models/resnet101-63fe2227.pth - -7.获取benchmark工具 -将benchmark.x86_64放在当前目录 - -8.310上执行,执行时确保device空闲 -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets +环境准备: + +1.数据集路径 +数据集统一放在/root/datasets/或/opt/npu/ +本模型数据集放在/root/datasets/ + +2.进入工作目录 +cd ResNet101 + +3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +pip3.7 install -r requirements.txt + +4.获取模型代码 +git clone https://github.com/pytorch/vision + +5.如果模型代码需要安装,则安装模型代码 +cd vision +python3.7 setup.py install +cd .. + +6.获取权重文件 +wget https://download.pytorch.org/models/resnet101-63fe2227.pth + +7.获取benchmark工具 +将benchmark.x86_64放在当前目录 + +8.310上执行,执行时确保device空闲 +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet101/test/parse.py b/ACL_PyTorch/contrib/cv/classfication/ResNet101/test/parse.py index a0f253b055047b199b33d4b65cdc79177b6b250b..27eae0d0acf98687edd95f1f024cf77c49cd4dc4 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet101/test/parse.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet101/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet18/LICENSE b/ACL_PyTorch/contrib/cv/classfication/ResNet18/LICENSE index 657549b86065a3d34c7dd038edee91cedb8cb05a..dcc65541a1b5f985560b92c275b8328469d50742 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet18/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet18/LICENSE @@ -1,30 +1,30 @@ -BSD 3-Clause License - -Copyright (c) 2017, -All rights reserved. -Copyright 2020 Huawei Technologies Co., Ltd - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +BSD 3-Clause License + +Copyright (c) 2017, +All rights reserved. +Copyright 2020 Huawei Technologies Co., Ltd + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet18/README.md b/ACL_PyTorch/contrib/cv/classfication/ResNet18/README.md index 70663bd09a49c00d02fc4c49e8f04955ddc0ed3f..77b0b5b46fb206da70822d50be76f7af8304e77f 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet18/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet18/README.md @@ -1,249 +1,249 @@ -# ResNet18 Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) - - [6.2 开源TopN精度](#62-开源TopN精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[ResNet18论文](https://arxiv.org/pdf/1512.03385.pdf) - -### 1.2 代码地址 -[ResNet18代码](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) -branch:master -commit_id:7d955df73fe0e9b47f7d6c77c699324b256fc41f - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -CANN 5.0.1 - -torch == 1.5.1 -torchvision == 0.6.1 -onnx == 1.9.0 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.19.2 -Pillow == 8.2.0 -opencv-python == 4.5.2 -``` - -**说明:** -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 - -1.下载pth权重文件 -[ResNet-18预训练pth权重文件](https://download.pytorch.org/models/resnet18-f37072fd.pth) -``` -wget https://download.pytorch.org/models/resnet18-f37072fd.pth -``` -文件MD5sum:e0b1c919e74f9a193d36871d9964bf7d - -2.ResNet18模型代码在torchvision里,安装torchvision,arm下需源码安装,参考torchvision官网,若安装过程报错请百度解决 -``` -git clone https://github.com/pytorch/vision -cd vision -python3.7 setup.py install -cd .. -``` -3.编写pth2onnx脚本resnet18_pth2onnx.py - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - -4.执行pth2onnx脚本,生成onnx模型文件 -``` -python3.7 resnet18_pth2onnx.py ./resnet18-f37072fd.pth resnet18.onnx -``` - - **模型转换要点:** ->此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 - -### 3.2 onnx转om模型 - -1.设置环境变量 -``` -source env.sh -``` -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) -``` -atc --framework=5 --model=./resnet18.onnx --output=resnet18_bs1 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 - -``` - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/root/datasets/imagenet/val与/root/datasets/imagenet/val_label.txt。 - -### 4.2 数据集预处理 -1.预处理脚本imagenet_torch_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 -``` -python3.7 imagenet_torch_preprocess.py resnet /root/datasets/imagenet/val ./prep_dataset -``` -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本gen_dataset_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 -``` -python3.7 gen_dataset_info.py bin ./prep_dataset ./resnet18_prep_bin.info 224 224 -``` -第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) -### 5.2 离线推理 -1.设置环境变量 -``` -source env.sh -``` -2.执行离线推理 -``` -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=resnet18_bs1.om -input_text_path=./resnet18_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False -``` -输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 - -## 6 精度对比 - -- **[离线推理TopN精度](#61-离线推理TopN精度)** -- **[开源TopN精度](#62-开源TopN精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理TopN精度统计 - -后处理统计TopN精度 - -调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 -``` -python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /root/datasets/imagenet/val_label.txt ./ result.json -``` -第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 -查看输出结果: -``` -{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "69.74%"}, {"key": "Top2 accuracy", "value": "80.49%"}, {"key": "Top3 accuracy", "value": "84.96%"}, {"key": "Top4 accuracy", "value": "87.38%"}, {"key": "Top5 accuracy", "value": "89.09%"}]} -``` -经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 - -### 6.2 开源TopN精度 -[torchvision官网精度](https://pytorch.org/vision/stable/models.html) -``` -Model Acc@1 Acc@5 -ResNet-18 69.758 89.078 -``` -### 6.3 精度对比 -将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - **精度调试:** ->没有遇到精度不达标的问题,故不需要进行精度调试 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** - -### 7.1 npu性能数据 -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 -1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: -``` -[e2e] throughputRate: 336.39, latency: 148637 -[data read] throughputRate: 357.787, moduleLatency: 2.79496 -[preprocess] throughputRate: 357.147, moduleLatency: 2.79997 -[infer] throughputRate: 338.442, Interface throughputRate: 787.709, moduleLatency: 2.21785 -[post] throughputRate: 338.44, moduleLatency: 2.95473 -``` -Interface throughputRate: 787.709,787.709x4=3150.836既是batch1 310单卡吞吐率 -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: -``` -[e2e] throughputRate: 355.715, latency: 140562 -[data read] throughputRate: 377.215, moduleLatency: 2.65101 -[preprocess] throughputRate: 376.578, moduleLatency: 2.65549 -[infer] throughputRate: 357.628, Interface throughputRate: 1509.64, moduleLatency: 2.1165 -[post] throughputRate: 22.3509, moduleLatency: 44.7409 -``` -Interface throughputRate: 1509.64,1509.64x4=6038.56既是batch16 310单卡吞吐率 -batch4性能: -``` -[e2e] throughputRate: 218.705, latency: 228619 -[data read] throughputRate: 219.993, moduleLatency: 4.5456 -[preprocess] throughputRate: 219.699, moduleLatency: 4.55169 -[infer] throughputRate: 219.574, Interface throughputRate: 1103.24, moduleLatency: 2.38868 -[post] throughputRate: 54.8929, moduleLatency: 18.2173 -``` -batch4 310单卡吞吐率:1103.24x4=4412.96fps -batch8性能: -``` -[e2e] throughputRate: 175.032, latency: 285662 -[data read] throughputRate: 175.909, moduleLatency: 5.68474 -[preprocess] throughputRate: 175.703, moduleLatency: 5.69143 -[infer] throughputRate: 175.795, Interface throughputRate: 1446.02, moduleLatency: 2.17869 -[post] throughputRate: 21.9741, moduleLatency: 45.5081 -``` -batch8 310单卡吞吐率:1446.02x4=5784.08fps -batch32性能: -``` -[e2e] throughputRate: 151.68, latency: 329642 -[data read] throughputRate: 152.292, moduleLatency: 6.56634 -[preprocess] throughputRate: 152.082, moduleLatency: 6.57541 -[infer] throughputRate: 152.081, Interface throughputRate: 1375.46, moduleLatency: 2.20383 -[post] throughputRate: 4.75395, moduleLatency: 210.352 -``` -batch32 310单卡吞吐率:1375.46x4=5501.84fps - - **性能优化:** ->没有遇到性能不达标的问题,故不需要进行性能优化 - +# ResNet18 Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) + - [6.2 开源TopN精度](#62-开源TopN精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[ResNet18论文](https://arxiv.org/pdf/1512.03385.pdf) + +### 1.2 代码地址 +[ResNet18代码](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) +branch:master +commit_id:7d955df73fe0e9b47f7d6c77c699324b256fc41f + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +CANN 5.0.1 + +torch == 1.5.1 +torchvision == 0.6.1 +onnx == 1.9.0 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.19.2 +Pillow == 8.2.0 +opencv-python == 4.5.2 +``` + +**说明:** +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 + +1.下载pth权重文件 +[ResNet-18预训练pth权重文件](https://download.pytorch.org/models/resnet18-f37072fd.pth) +``` +wget https://download.pytorch.org/models/resnet18-f37072fd.pth +``` +文件MD5sum:e0b1c919e74f9a193d36871d9964bf7d + +2.ResNet18模型代码在torchvision里,安装torchvision,arm下需源码安装,参考torchvision官网,若安装过程报错请百度解决 +``` +git clone https://github.com/pytorch/vision +cd vision +python3.7 setup.py install +cd .. +``` +3.编写pth2onnx脚本resnet18_pth2onnx.py + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + +4.执行pth2onnx脚本,生成onnx模型文件 +``` +python3.7 resnet18_pth2onnx.py ./resnet18-f37072fd.pth resnet18.onnx +``` + + **模型转换要点:** +>此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 + +### 3.2 onnx转om模型 + +1.设置环境变量 +``` +source env.sh +``` +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) +``` +atc --framework=5 --model=./resnet18.onnx --output=resnet18_bs1 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 + +``` + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/root/datasets/imagenet/val与/root/datasets/imagenet/val_label.txt。 + +### 4.2 数据集预处理 +1.预处理脚本imagenet_torch_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 +``` +python3.7 imagenet_torch_preprocess.py resnet /root/datasets/imagenet/val ./prep_dataset +``` +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本gen_dataset_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 +``` +python3.7 gen_dataset_info.py bin ./prep_dataset ./resnet18_prep_bin.info 224 224 +``` +第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) +### 5.2 离线推理 +1.设置环境变量 +``` +source env.sh +``` +2.执行离线推理 +``` +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=resnet18_bs1.om -input_text_path=./resnet18_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False +``` +输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 + +## 6 精度对比 + +- **[离线推理TopN精度](#61-离线推理TopN精度)** +- **[开源TopN精度](#62-开源TopN精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理TopN精度统计 + +后处理统计TopN精度 + +调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 +``` +python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /root/datasets/imagenet/val_label.txt ./ result.json +``` +第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 +查看输出结果: +``` +{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "69.74%"}, {"key": "Top2 accuracy", "value": "80.49%"}, {"key": "Top3 accuracy", "value": "84.96%"}, {"key": "Top4 accuracy", "value": "87.38%"}, {"key": "Top5 accuracy", "value": "89.09%"}]} +``` +经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 + +### 6.2 开源TopN精度 +[torchvision官网精度](https://pytorch.org/vision/stable/models.html) +``` +Model Acc@1 Acc@5 +ResNet-18 69.758 89.078 +``` +### 6.3 精度对比 +将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + **精度调试:** +>没有遇到精度不达标的问题,故不需要进行精度调试 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** + +### 7.1 npu性能数据 +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 +1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: +``` +[e2e] throughputRate: 336.39, latency: 148637 +[data read] throughputRate: 357.787, moduleLatency: 2.79496 +[preprocess] throughputRate: 357.147, moduleLatency: 2.79997 +[infer] throughputRate: 338.442, Interface throughputRate: 787.709, moduleLatency: 2.21785 +[post] throughputRate: 338.44, moduleLatency: 2.95473 +``` +Interface throughputRate: 787.709,787.709x4=3150.836既是batch1 310单卡吞吐率 +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: +``` +[e2e] throughputRate: 355.715, latency: 140562 +[data read] throughputRate: 377.215, moduleLatency: 2.65101 +[preprocess] throughputRate: 376.578, moduleLatency: 2.65549 +[infer] throughputRate: 357.628, Interface throughputRate: 1509.64, moduleLatency: 2.1165 +[post] throughputRate: 22.3509, moduleLatency: 44.7409 +``` +Interface throughputRate: 1509.64,1509.64x4=6038.56既是batch16 310单卡吞吐率 +batch4性能: +``` +[e2e] throughputRate: 218.705, latency: 228619 +[data read] throughputRate: 219.993, moduleLatency: 4.5456 +[preprocess] throughputRate: 219.699, moduleLatency: 4.55169 +[infer] throughputRate: 219.574, Interface throughputRate: 1103.24, moduleLatency: 2.38868 +[post] throughputRate: 54.8929, moduleLatency: 18.2173 +``` +batch4 310单卡吞吐率:1103.24x4=4412.96fps +batch8性能: +``` +[e2e] throughputRate: 175.032, latency: 285662 +[data read] throughputRate: 175.909, moduleLatency: 5.68474 +[preprocess] throughputRate: 175.703, moduleLatency: 5.69143 +[infer] throughputRate: 175.795, Interface throughputRate: 1446.02, moduleLatency: 2.17869 +[post] throughputRate: 21.9741, moduleLatency: 45.5081 +``` +batch8 310单卡吞吐率:1446.02x4=5784.08fps +batch32性能: +``` +[e2e] throughputRate: 151.68, latency: 329642 +[data read] throughputRate: 152.292, moduleLatency: 6.56634 +[preprocess] throughputRate: 152.082, moduleLatency: 6.57541 +[infer] throughputRate: 152.081, Interface throughputRate: 1375.46, moduleLatency: 2.20383 +[post] throughputRate: 4.75395, moduleLatency: 210.352 +``` +batch32 310单卡吞吐率:1375.46x4=5501.84fps + + **性能优化:** +>没有遇到性能不达标的问题,故不需要进行性能优化 + diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet18/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/classfication/ResNet18/gen_dataset_info.py index 61450b4410663ae5e66ec29ed296ff6584203e31..5381839f653a885666e3fc456db9a1c22b8583a1 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet18/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet18/gen_dataset_info.py @@ -1,61 +1,61 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) + diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet18/imagenet_acc_eval.py b/ACL_PyTorch/contrib/cv/classfication/ResNet18/imagenet_acc_eval.py index 362f2484e8288dd3df6fa212678dc9449dbbed29..583340a19f2fc6e99faed85526c906f8bd12d7ba 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet18/imagenet_acc_eval.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet18/imagenet_acc_eval.py @@ -1,184 +1,184 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - if data == '': - n_label = 0 - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - print("Time used:", elapsed) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + if data == '': + n_label = 0 + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + print("Time used:", elapsed) + diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet18/imagenet_torch_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/ResNet18/imagenet_torch_preprocess.py index 1ab60b54dd7fd5b59ca733666c1dc63e07c980c1..a99dd271d0df5f9b21aa9c7da3fe7edb491a27e6 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet18/imagenet_torch_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet18/imagenet_torch_preprocess.py @@ -1,117 +1,117 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from PIL import Image -import numpy as np -import multiprocessing - - -model_config = { - 'resnet': { - 'resize': 256, - 'centercrop': 224, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv3': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv4': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.5, 0.5, 0.5], - 'std': [0.5, 0.5, 0.5], - }, -} - - -def center_crop(img, output_size): - if isinstance(output_size, int): - output_size = (int(output_size), int(output_size)) - image_width, image_height = img.size - crop_height, crop_width = output_size - crop_top = int(round((image_height - crop_height) / 2.)) - crop_left = int(round((image_width - crop_width) / 2.)) - return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) - - -def resize(img, size, interpolation=Image.BILINEAR): - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def gen_input_bin(mode_type, file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - # RGBA to RGB - image = Image.open(os.path.join(src_path, file)).convert('RGB') - image = resize(image, model_config[mode_type]['resize']) # Resize - image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop - img = np.array(image, dtype=np.float32) - img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW - img = img / 255. # ToTensor: div 255 - img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean - img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - - -def preprocess(mode_type, src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") - mode_type = sys.argv[1] - src_path = sys.argv[2] - save_path = sys.argv[3] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - if mode_type not in model_config: - model_type_help = "model type: " - for key in model_config.keys(): - model_type_help += key - model_type_help += ' ' - raise Exception(model_type_help) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - preprocess(mode_type, src_path, save_path) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from PIL import Image +import numpy as np +import multiprocessing + + +model_config = { + 'resnet': { + 'resize': 256, + 'centercrop': 224, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv3': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv4': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + }, +} + + +def center_crop(img, output_size): + if isinstance(output_size, int): + output_size = (int(output_size), int(output_size)) + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) + + +def resize(img, size, interpolation=Image.BILINEAR): + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def gen_input_bin(mode_type, file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + # RGBA to RGB + image = Image.open(os.path.join(src_path, file)).convert('RGB') + image = resize(image, model_config[mode_type]['resize']) # Resize + image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop + img = np.array(image, dtype=np.float32) + img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW + img = img / 255. # ToTensor: div 255 + img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean + img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + + +def preprocess(mode_type, src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") + mode_type = sys.argv[1] + src_path = sys.argv[2] + save_path = sys.argv[3] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + if mode_type not in model_config: + model_type_help = "model type: " + for key in model_config.keys(): + model_type_help += key + model_type_help += ' ' + raise Exception(model_type_help) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + preprocess(mode_type, src_path, save_path) + diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet18/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/ResNet18/requirements.txt index 2fc4e802c476feda2a9866a85630f7f3b29428d7..d072d9aa6f2e7a7b0044ff93d036c3c0347ee5c9 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet18/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet18/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.5.1 -torchvision == 0.6.1 -onnx == 1.9.0 -numpy == 1.19.2 -Pillow == 8.2.0 +torch == 1.5.1 +torchvision == 0.6.1 +onnx == 1.9.0 +numpy == 1.19.2 +Pillow == 8.2.0 opencv-python == 4.5.2 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet18/resnet18_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/ResNet18/resnet18_pth2onnx.py index f7972ea3e1ecb2a5adee7e400d77ca66dec258ce..5933787407e4b3f92af4a90ad32c21d52704b04d 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet18/resnet18_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet18/resnet18_pth2onnx.py @@ -1,35 +1,35 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -import torch.onnx -import torchvision.models as models - -def pth2onnx(input_file, output_file): - model = models.resnet18(pretrained=False) - checkpoint = torch.load(input_file, map_location=None) - model.load_state_dict(checkpoint) - - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) - -if __name__ == "__main__": - input_file = sys.argv[1] - output_file = sys.argv[2] - pth2onnx(input_file, output_file) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import torch.onnx +import torchvision.models as models + +def pth2onnx(input_file, output_file): + model = models.resnet18(pretrained=False) + checkpoint = torch.load(input_file, map_location=None) + model.load_state_dict(checkpoint) + + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) + +if __name__ == "__main__": + input_file = sys.argv[1] + output_file = sys.argv[2] + pth2onnx(input_file, output_file) diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet18/test/README.md b/ACL_PyTorch/contrib/cv/classfication/ResNet18/test/README.md index d9901cc5fc13a3cc2148d71691d4f97ffb24e378..0ae0ab0e82849c72a94ac50082a85518b5aa6495 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet18/test/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet18/test/README.md @@ -1,29 +1,29 @@ -环境准备: - -1.数据集路径 -数据集统一放在/root/datasets/或/opt/npu/ -本模型数据集放在/root/datasets/ - -2.进入工作目录 -cd ResNet18 - -3.安装必要的依赖 -pip3.7 install -r requirements.txt - -4.获取模型代码 -git clone https://github.com/pytorch/vision - -5.如果模型代码需要安装,则安装模型代码 -cd vision -python3.7 setup.py install -cd .. - -6.获取权重文件 -wget https://download.pytorch.org/models/resnet18-f37072fd.pth - -7.获取benchmark工具 -将benchmark.x86_64放在当前目录 - -8.310上执行,执行时确保device空闲 -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets +环境准备: + +1.数据集路径 +数据集统一放在/root/datasets/或/opt/npu/ +本模型数据集放在/root/datasets/ + +2.进入工作目录 +cd ResNet18 + +3.安装必要的依赖 +pip3.7 install -r requirements.txt + +4.获取模型代码 +git clone https://github.com/pytorch/vision + +5.如果模型代码需要安装,则安装模型代码 +cd vision +python3.7 setup.py install +cd .. + +6.获取权重文件 +wget https://download.pytorch.org/models/resnet18-f37072fd.pth + +7.获取benchmark工具 +将benchmark.x86_64放在当前目录 + +8.310上执行,执行时确保device空闲 +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet18/test/parse.py b/ACL_PyTorch/contrib/cv/classfication/ResNet18/test/parse.py index a0f253b055047b199b33d4b65cdc79177b6b250b..27eae0d0acf98687edd95f1f024cf77c49cd4dc4 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet18/test/parse.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet18/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet34/LICENSE b/ACL_PyTorch/contrib/cv/classfication/ResNet34/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet34/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet34/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet34/README.md b/ACL_PyTorch/contrib/cv/classfication/ResNet34/README.md index 2c2062bf5e642598f3d9cb4ddeb85b2f410e6bd2..60c3739d7bc5d57864e5f540345f1db93850fe7c 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet34/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet34/README.md @@ -1,250 +1,250 @@ -# ResNet34 Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) - - [6.2 开源TopN精度](#62-开源TopN精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[ResNet34论文](https://arxiv.org/pdf/1512.03385.pdf) - -### 1.2 代码地址 -[ResNet34代码](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) -branch:master -commit_id:7d955df73fe0e9b47f7d6c77c699324b256fc41f - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -CANN 5.0.1 - -torch == 1.5.1 -torchvision == 0.6.1 -onnx == 1.9.0 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.19.2 -Pillow == 8.2.0 -opencv-python == 4.5.2.52 -``` - -**说明:** -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 - -1.下载pth权重文件 -[ResNet-34预训练pth权重文件](https://download.pytorch.org/models/resnet34-b627a593.pth) -``` -wget https://download.pytorch.org/models/resnet34-b627a593.pth -``` -文件MD5sum:78fe1097b28dbda1373a700020afeed9 - -2.ResNet34模型代码在torchvision里,安装torchvision,arm下需源码安装,参考torchvision官网,若安装过程报错请百度解决 -``` -git clone https://github.com/pytorch/vision -cd vision -python3.7 setup.py install -cd .. -``` -3.编写pth2onnx脚本resnet34_pth2onnx.py - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - -4.执行pth2onnx脚本,生成onnx模型文件 -``` -python3.7 resnet34_pth2onnx.py ./resnet34-b627a593.pth resnet34.onnx -``` - - **模型转换要点:** ->此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 - -### 3.2 onnx转om模型 - -1.设置环境变量 -``` -source env.sh -``` -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) -``` -atc --framework=5 --model=./resnet34.onnx --output=resnet34_bs1 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 - -``` - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/root/datasets/imagenet/val与/root/datasets/imagenet/val_label.txt。 - -### 4.2 数据集预处理 -1.预处理脚本imagenet_torch_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 -``` -python3.7 imagenet_torch_preprocess.py resnet /root/datasets/imagenet/val ./prep_dataset -``` -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本gen_dataset_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 -``` -python3.7 gen_dataset_info.py bin ./prep_dataset ./resnet34_prep_bin.info 224 224 -``` -第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) -### 5.2 离线推理 -1.设置环境变量 -``` -source env.sh -``` -2.执行离线推理 -``` -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=resnet34_bs1.om -input_text_path=./resnet34_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False -``` -输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 - -## 6 精度对比 - -- **[离线推理TopN精度](#61-离线推理TopN精度)** -- **[开源TopN精度](#62-开源TopN精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理TopN精度统计 - -后处理统计TopN精度 - -调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 -``` -python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /root/datasets/imagenet/val_label.txt ./ result.json -``` -第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 -查看输出结果: -``` -{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "73.31%"}, {"key": "Top2 accuracy", "value": "83.7%"}, {"key": "Top3 accuracy", "value": "87.72%"}, {"key": "Top4 accuracy", "value": "89.92%"}, {"key": "Top5 accuracy", "value": "91.44%"}]} -``` -经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 - -### 6.2 开源TopN精度 -[torchvision官网精度](https://pytorch.org/vision/stable/models.html) -``` -Model Acc@1 Acc@5 -ResNet-34 73.314 91.420 -``` -### 6.3 精度对比 -将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - **精度调试:** ->没有遇到精度不达标的问题,故不需要进行精度调试 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** - -### 7.1 npu性能数据 -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 -1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: -``` -[e2e] throughputRate: 275.179, latency: 181700 -[data read] throughputRate: 293.361, moduleLatency: 3.40877 -[preprocess] throughputRate: 292.803, moduleLatency: 3.41527 -[infer] throughputRate: 277.19, Interface throughputRate: 503.525, moduleLatency: 2.7551 -[post] throughputRate: 277.189, moduleLatency: 3.60764 -``` -Interface throughputRate: 503.525,503.525x4=2014.1既是batch1 310单卡吞吐率 -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: -``` -[e2e] throughputRate: 177.5, latency: 281691 -[data read] throughputRate: 181.124, moduleLatency: 5.52107 -[preprocess] throughputRate: 180.841, moduleLatency: 5.52973 -[infer] throughputRate: 178.082, Interface throughputRate: 938.992, moduleLatency: 2.53232 -[post] throughputRate: 11.1299, moduleLatency: 89.8479 -``` -Interface throughputRate: 938.992,938.992x4=3755.968既是batch16 310单卡吞吐率 -batch4的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_4_device_1.txt: -``` -[e2e] throughputRate: 238.247, latency: 209866 -[data read] throughputRate: 251.629, moduleLatency: 3.97411 -[preprocess] throughputRate: 251.149, moduleLatency: 3.98169 -[infer] throughputRate: 239.788, Interface throughputRate: 672.405, moduleLatency: 3.026 -[post] throughputRate: 59.9466, moduleLatency: 16.6815 -``` -Interface throughputRate: 672.405,672.405x4=2689.62既是batch4 310单卡吞吐率 -batch8的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_8_device_1.txt: -``` -[e2e] throughputRate: 223.522, latency: 223692 -[data read] throughputRate: 233.498, moduleLatency: 4.28269 -[preprocess] throughputRate: 233.151, moduleLatency: 4.28906 -[infer] throughputRate: 224.317, Interface throughputRate: 884.554, moduleLatency: 2.62576 -[post] throughputRate: 28.0393, moduleLatency: 35.6643 -``` -Interface throughputRate: 884.554,884.554x4=3538.216既是batch8 310单卡吞吐率 -batch32的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_32_device_1.txt: -``` -[e2e] throughputRate: 200.951, latency: 248817 -[data read] throughputRate: 209.207, moduleLatency: 4.77995 -[preprocess] throughputRate: 208.778, moduleLatency: 4.78978 -[infer] throughputRate: 202.034, Interface throughputRate: 875.835, moduleLatency: 2.617 -[post] throughputRate: 6.31544, moduleLatency: 158.342 - - -``` -Interface throughputRate: 875.835,875.835x4=3503.34既是batch32 310单卡吞吐率 - - **性能优化:** ->没有遇到性能不达标的问题,故不需要进行性能优化 - +# ResNet34 Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) + - [6.2 开源TopN精度](#62-开源TopN精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[ResNet34论文](https://arxiv.org/pdf/1512.03385.pdf) + +### 1.2 代码地址 +[ResNet34代码](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) +branch:master +commit_id:7d955df73fe0e9b47f7d6c77c699324b256fc41f + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +CANN 5.0.1 + +torch == 1.5.1 +torchvision == 0.6.1 +onnx == 1.9.0 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.19.2 +Pillow == 8.2.0 +opencv-python == 4.5.2.52 +``` + +**说明:** +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 + +1.下载pth权重文件 +[ResNet-34预训练pth权重文件](https://download.pytorch.org/models/resnet34-b627a593.pth) +``` +wget https://download.pytorch.org/models/resnet34-b627a593.pth +``` +文件MD5sum:78fe1097b28dbda1373a700020afeed9 + +2.ResNet34模型代码在torchvision里,安装torchvision,arm下需源码安装,参考torchvision官网,若安装过程报错请百度解决 +``` +git clone https://github.com/pytorch/vision +cd vision +python3.7 setup.py install +cd .. +``` +3.编写pth2onnx脚本resnet34_pth2onnx.py + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + +4.执行pth2onnx脚本,生成onnx模型文件 +``` +python3.7 resnet34_pth2onnx.py ./resnet34-b627a593.pth resnet34.onnx +``` + + **模型转换要点:** +>此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 + +### 3.2 onnx转om模型 + +1.设置环境变量 +``` +source env.sh +``` +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) +``` +atc --framework=5 --model=./resnet34.onnx --output=resnet34_bs1 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 + +``` + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/root/datasets/imagenet/val与/root/datasets/imagenet/val_label.txt。 + +### 4.2 数据集预处理 +1.预处理脚本imagenet_torch_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 +``` +python3.7 imagenet_torch_preprocess.py resnet /root/datasets/imagenet/val ./prep_dataset +``` +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本gen_dataset_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 +``` +python3.7 gen_dataset_info.py bin ./prep_dataset ./resnet34_prep_bin.info 224 224 +``` +第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) +### 5.2 离线推理 +1.设置环境变量 +``` +source env.sh +``` +2.执行离线推理 +``` +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=resnet34_bs1.om -input_text_path=./resnet34_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False +``` +输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 + +## 6 精度对比 + +- **[离线推理TopN精度](#61-离线推理TopN精度)** +- **[开源TopN精度](#62-开源TopN精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理TopN精度统计 + +后处理统计TopN精度 + +调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 +``` +python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /root/datasets/imagenet/val_label.txt ./ result.json +``` +第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 +查看输出结果: +``` +{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "73.31%"}, {"key": "Top2 accuracy", "value": "83.7%"}, {"key": "Top3 accuracy", "value": "87.72%"}, {"key": "Top4 accuracy", "value": "89.92%"}, {"key": "Top5 accuracy", "value": "91.44%"}]} +``` +经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 + +### 6.2 开源TopN精度 +[torchvision官网精度](https://pytorch.org/vision/stable/models.html) +``` +Model Acc@1 Acc@5 +ResNet-34 73.314 91.420 +``` +### 6.3 精度对比 +将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + **精度调试:** +>没有遇到精度不达标的问题,故不需要进行精度调试 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** + +### 7.1 npu性能数据 +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 +1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: +``` +[e2e] throughputRate: 275.179, latency: 181700 +[data read] throughputRate: 293.361, moduleLatency: 3.40877 +[preprocess] throughputRate: 292.803, moduleLatency: 3.41527 +[infer] throughputRate: 277.19, Interface throughputRate: 503.525, moduleLatency: 2.7551 +[post] throughputRate: 277.189, moduleLatency: 3.60764 +``` +Interface throughputRate: 503.525,503.525x4=2014.1既是batch1 310单卡吞吐率 +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: +``` +[e2e] throughputRate: 177.5, latency: 281691 +[data read] throughputRate: 181.124, moduleLatency: 5.52107 +[preprocess] throughputRate: 180.841, moduleLatency: 5.52973 +[infer] throughputRate: 178.082, Interface throughputRate: 938.992, moduleLatency: 2.53232 +[post] throughputRate: 11.1299, moduleLatency: 89.8479 +``` +Interface throughputRate: 938.992,938.992x4=3755.968既是batch16 310单卡吞吐率 +batch4的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_4_device_1.txt: +``` +[e2e] throughputRate: 238.247, latency: 209866 +[data read] throughputRate: 251.629, moduleLatency: 3.97411 +[preprocess] throughputRate: 251.149, moduleLatency: 3.98169 +[infer] throughputRate: 239.788, Interface throughputRate: 672.405, moduleLatency: 3.026 +[post] throughputRate: 59.9466, moduleLatency: 16.6815 +``` +Interface throughputRate: 672.405,672.405x4=2689.62既是batch4 310单卡吞吐率 +batch8的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_8_device_1.txt: +``` +[e2e] throughputRate: 223.522, latency: 223692 +[data read] throughputRate: 233.498, moduleLatency: 4.28269 +[preprocess] throughputRate: 233.151, moduleLatency: 4.28906 +[infer] throughputRate: 224.317, Interface throughputRate: 884.554, moduleLatency: 2.62576 +[post] throughputRate: 28.0393, moduleLatency: 35.6643 +``` +Interface throughputRate: 884.554,884.554x4=3538.216既是batch8 310单卡吞吐率 +batch32的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_32_device_1.txt: +``` +[e2e] throughputRate: 200.951, latency: 248817 +[data read] throughputRate: 209.207, moduleLatency: 4.77995 +[preprocess] throughputRate: 208.778, moduleLatency: 4.78978 +[infer] throughputRate: 202.034, Interface throughputRate: 875.835, moduleLatency: 2.617 +[post] throughputRate: 6.31544, moduleLatency: 158.342 + + +``` +Interface throughputRate: 875.835,875.835x4=3503.34既是batch32 310单卡吞吐率 + + **性能优化:** +>没有遇到性能不达标的问题,故不需要进行性能优化 + diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet34/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/classfication/ResNet34/gen_dataset_info.py index 61450b4410663ae5e66ec29ed296ff6584203e31..5381839f653a885666e3fc456db9a1c22b8583a1 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet34/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet34/gen_dataset_info.py @@ -1,61 +1,61 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) + diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet34/imagenet_acc_eval.py b/ACL_PyTorch/contrib/cv/classfication/ResNet34/imagenet_acc_eval.py index 362f2484e8288dd3df6fa212678dc9449dbbed29..583340a19f2fc6e99faed85526c906f8bd12d7ba 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet34/imagenet_acc_eval.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet34/imagenet_acc_eval.py @@ -1,184 +1,184 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - if data == '': - n_label = 0 - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - print("Time used:", elapsed) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + if data == '': + n_label = 0 + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + print("Time used:", elapsed) + diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet34/imagenet_torch_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/ResNet34/imagenet_torch_preprocess.py index 1ab60b54dd7fd5b59ca733666c1dc63e07c980c1..a99dd271d0df5f9b21aa9c7da3fe7edb491a27e6 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet34/imagenet_torch_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet34/imagenet_torch_preprocess.py @@ -1,117 +1,117 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from PIL import Image -import numpy as np -import multiprocessing - - -model_config = { - 'resnet': { - 'resize': 256, - 'centercrop': 224, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv3': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv4': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.5, 0.5, 0.5], - 'std': [0.5, 0.5, 0.5], - }, -} - - -def center_crop(img, output_size): - if isinstance(output_size, int): - output_size = (int(output_size), int(output_size)) - image_width, image_height = img.size - crop_height, crop_width = output_size - crop_top = int(round((image_height - crop_height) / 2.)) - crop_left = int(round((image_width - crop_width) / 2.)) - return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) - - -def resize(img, size, interpolation=Image.BILINEAR): - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def gen_input_bin(mode_type, file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - # RGBA to RGB - image = Image.open(os.path.join(src_path, file)).convert('RGB') - image = resize(image, model_config[mode_type]['resize']) # Resize - image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop - img = np.array(image, dtype=np.float32) - img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW - img = img / 255. # ToTensor: div 255 - img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean - img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - - -def preprocess(mode_type, src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") - mode_type = sys.argv[1] - src_path = sys.argv[2] - save_path = sys.argv[3] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - if mode_type not in model_config: - model_type_help = "model type: " - for key in model_config.keys(): - model_type_help += key - model_type_help += ' ' - raise Exception(model_type_help) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - preprocess(mode_type, src_path, save_path) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from PIL import Image +import numpy as np +import multiprocessing + + +model_config = { + 'resnet': { + 'resize': 256, + 'centercrop': 224, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv3': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv4': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + }, +} + + +def center_crop(img, output_size): + if isinstance(output_size, int): + output_size = (int(output_size), int(output_size)) + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) + + +def resize(img, size, interpolation=Image.BILINEAR): + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def gen_input_bin(mode_type, file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + # RGBA to RGB + image = Image.open(os.path.join(src_path, file)).convert('RGB') + image = resize(image, model_config[mode_type]['resize']) # Resize + image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop + img = np.array(image, dtype=np.float32) + img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW + img = img / 255. # ToTensor: div 255 + img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean + img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + + +def preprocess(mode_type, src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") + mode_type = sys.argv[1] + src_path = sys.argv[2] + save_path = sys.argv[3] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + if mode_type not in model_config: + model_type_help = "model type: " + for key in model_config.keys(): + model_type_help += key + model_type_help += ' ' + raise Exception(model_type_help) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + preprocess(mode_type, src_path, save_path) + diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet34/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/ResNet34/requirements.txt index 0fd8899267c708efec12cfc2fd54254e0f5eade9..a32f45720f6e73b661370f04a24d0090f4fbfdd1 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet34/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet34/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.5.1 -torchvision == 0.6.1 -onnx == 1.9.0 -numpy == 1.19.2 -Pillow == 8.2.0 +torch == 1.5.1 +torchvision == 0.6.1 +onnx == 1.9.0 +numpy == 1.19.2 +Pillow == 8.2.0 opencv-python == 4.5.2.52 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet34/resnet34_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/ResNet34/resnet34_pth2onnx.py index 45bd8bd56cb4d69fa93081c59a4c41dac2b5eea7..b0c2e06f250bfbba4c2fb722bea733ae7484712d 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet34/resnet34_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet34/resnet34_pth2onnx.py @@ -1,35 +1,35 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -import torch.onnx -import torchvision.models as models - -def pth2onnx(input_file, output_file): - model = models.resnet34(pretrained=False) - checkpoint = torch.load(input_file, map_location=None) - model.load_state_dict(checkpoint) - - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) - -if __name__ == "__main__": - input_file = sys.argv[1] - output_file = sys.argv[2] - pth2onnx(input_file, output_file) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import torch.onnx +import torchvision.models as models + +def pth2onnx(input_file, output_file): + model = models.resnet34(pretrained=False) + checkpoint = torch.load(input_file, map_location=None) + model.load_state_dict(checkpoint) + + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) + +if __name__ == "__main__": + input_file = sys.argv[1] + output_file = sys.argv[2] + pth2onnx(input_file, output_file) diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet34/test/README.md b/ACL_PyTorch/contrib/cv/classfication/ResNet34/test/README.md index 4000a70540f9a0974034f7b29712a13b4c7f6a10..93ff0c492f4e1093b78d6bd496f2e2730b83e050 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet34/test/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet34/test/README.md @@ -1,29 +1,29 @@ -环境准备: - -1.数据集路径 -数据集统一放在/root/datasets/或/opt/npu/ -本模型数据集放在/root/datasets/ - -2.进入工作目录 -cd ResNet34 - -3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -pip3.7 install -r requirements.txt - -4.获取模型代码 -git clone https://github.com/pytorch/vision - -5.如果模型代码需要安装,则安装模型代码 -cd vision -python3.7 setup.py install -cd .. - -6.获取权重文件 -wget https://download.pytorch.org/models/resnet34-b627a593.pth - -7.获取benchmark工具 -将benchmark.x86_64放在当前目录 - -8.310上执行,执行时确保device空闲 -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets +环境准备: + +1.数据集路径 +数据集统一放在/root/datasets/或/opt/npu/ +本模型数据集放在/root/datasets/ + +2.进入工作目录 +cd ResNet34 + +3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +pip3.7 install -r requirements.txt + +4.获取模型代码 +git clone https://github.com/pytorch/vision + +5.如果模型代码需要安装,则安装模型代码 +cd vision +python3.7 setup.py install +cd .. + +6.获取权重文件 +wget https://download.pytorch.org/models/resnet34-b627a593.pth + +7.获取benchmark工具 +将benchmark.x86_64放在当前目录 + +8.310上执行,执行时确保device空闲 +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNet34/test/parse.py b/ACL_PyTorch/contrib/cv/classfication/ResNet34/test/parse.py index a0f253b055047b199b33d4b65cdc79177b6b250b..27eae0d0acf98687edd95f1f024cf77c49cd4dc4 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNet34/test/parse.py +++ b/ACL_PyTorch/contrib/cv/classfication/ResNet34/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/ResNext101_32x8d/README.md b/ACL_PyTorch/contrib/cv/classfication/ResNext101_32x8d/README.md index 174206b05dd1c665a833b61e6de067b3f5ae4b60..289a13ea50a1660f00ecd47181ad43aa337cbe87 100644 --- a/ACL_PyTorch/contrib/cv/classfication/ResNext101_32x8d/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/ResNext101_32x8d/README.md @@ -1,288 +1,288 @@ -# ResNext101_32x8d Onnx模型端到端推理指导 - -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) - - [6.2 开源TopN精度](#62-开源TopN精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 - -[ResNext101_32x8d论文](https://arxiv.org/pdf/1611.05431.pdf) - -### 1.2 代码地址 - -[ResNext101_32x8d代码](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) - -branch:master commit_id:7d955df73fe0e9b47f7d6c77c699324b256fc41f - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 - -``` -CANN 5.0.1 - -torch == 1.5.1 -torchvision == 0.6.1 -onnx == 1.9.0 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.20.3 -Pillow == 8.2.0 -opencv-python == 4.5.2 -``` - -**说明:** - -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - - - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 - -1.下载pth权重文件 -[ResNext101_32x8d 预训练pth权重文件](https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth) - -``` -wget https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth -``` - -文件MD5sum:4454a42689454b94296e378762f2333f - -3.编写pth2onnx脚本resnext101_32x8d_pth2onnx.py - - **说明:** - ->注意目前ATC支持的onnx算子版本为11 - -4.执行pth2onnx脚本,生成onnx模型文件 - -``` -python3.7 resnext101_32x8d_pth2onnx.py ./resnext101_32x8d-8ba56ff5.pth resnext101_32x8d.onnx -``` - - **模型转换要点:** - ->此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 - -### 3.2 onnx转om模型 - -1.设置环境变量 - -``` -source env.sh -``` - -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) - -``` -atc --framework=5 --model=./resnext101_32x8d.onnx --output=resnext101_32x8d_bs1 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 - -``` - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 - -该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/root/datasets/imagenet/val与/root/datasets/imagenet/val_label.txt。 - -### 4.2 数据集预处理 - -1.预处理脚本imagenet_torch_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 - -``` -python3.7 imagenet_torch_preprocess.py resnet /root/datasets/imagenet/val ./prep_dataset -``` - -### 4.3 生成数据集信息文件 - -1.生成数据集信息文件脚本gen_dataset_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 - -``` -python3.7 gen_dataset_info.py bin ./prep_dataset ./resnext101_32x8d.info 224 224 -``` - -第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 - -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) - -### 5.2 离线推理 - -1.设置环境变量 - -``` -source env.sh -``` - -2.执行离线推理 - -``` -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=resnext101_32x8d_bs1.om -input_text_path=./resnext101_32x8d.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False -``` - -输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 - -## 6 精度对比 - -- **[离线推理TopN精度](#61-离线推理TopN精度)** -- **[开源TopN精度](#62-开源TopN精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理TopN精度统计 - -后处理统计TopN精度 - -调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 - -``` -python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /root/datasets/imagenet/val_label.txt ./ result.json -``` - -第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 -查看输出结果: - -``` -{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"},{"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "79.31%"}, {"key": "Top2 accuracy", "value": "88.68%"}, {"key": "Top3 accuracy", "value": "91.83%"}, {"key": "Top4 accuracy", "value": "93.47%"}, {"key": "Top5 accuracy", "value": "94.52%"}]} -``` - -经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 - -### 6.2 开源TopN精度 - -[torchvision官网精度](https://pytorch.org/vision/stable/models.html) - -``` -Model Acc@1 Acc@5ResNeXt-101-32x8d 79.312 94.526 -``` - -### 6.3 精度对比 - -将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - **精度调试:** - ->没有遇到精度不达标的问题,故不需要进行精度调试 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** - -### 7.1 npu性能数据 - -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 -1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: - -``` -[e2e] throughputRate: 113.043, latency: 442310[data read] throughputRate: 119.766, moduleLatency: 8.34965[preprocess] throughputRate: 119.219, moduleLatency: 8.38793[infer] throughputRate: 113.375, Interface throughputRate: 144.766, moduleLatency: 8.19887[post] throughputRate: 113.375, moduleLatency: 8.82032 -``` - -Interface throughputRate: 144.766,144.766x4=579.064既是batch1 310单卡吞吐率 -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: - -``` -[e2e] throughputRate: 116.034, latency: 430908[data read] throughputRate: 122.878, moduleLatency: 8.13815[preprocess] throughputRate: 122.393, moduleLatency: 8.17039[infer] throughputRate: 116.288, Interface throughputRate: 165.952, moduleLatency: 7.47516[post] throughputRate: 7.2679, moduleLatency: 137.591 -``` - -Interface throughputRate: 165.952,165.952x4=663.808既是batch16 310单卡吞吐率 - - - -batch4的性能,使用benchmark工具进行纯推理,推理后生成result/PureInfer_perf_of_resnext101_32x8d_bs4_in_device_0.txt: - -``` -./benchmark.x86_64 -round=20 -om_path=resnext101_32x8d_bs4.om -device_id=0 -batch_size=4 -``` - - - -``` -ave_throughputRate = 167.46samples/s, ave_latency = 6.02002ms -``` - -ave_throughputRate: 167.46,167.46x4=669.84既是batch4 310单卡吞吐率 - - - -batch8的性能,使用benchmark工具进行纯推理,推理后生成result/PureInfer_perf_of_resnext101_32x8d_bs8_in_device_0.txt: - - - -``` -ave_throughputRate = 167.521samples/s, ave_latency = 5.9993ms -``` - -ave_throughputRate: 167.521,167.521x4=670.084既是batch8 310单卡吞吐率 - - - -batch32的性能,使用benchmark工具进行纯推理,推理后生成result/PureInfer_perf_of_resnext101_32x8d_bs32_in_device_0.txt: - -``` -ave_throughputRate = 108.28samples/s, ave_latency = 9.24227ms -``` - -ave_throughputRate: 108.28,108.28x4=433.12既是batch32 310单卡吞吐率 - - **性能优化:** - -对于batch32的性能不达标,从profiling数据的op_statistic_0_1.csv看出影响性能的是Conv2D算子,从op_summary_0_1.csv看出单个Conv_Relu算子aicore耗时0.6毫秒到6毫秒,shape大的耗时就多,不存在优化问题。 - +# ResNext101_32x8d Onnx模型端到端推理指导 + +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) + - [6.2 开源TopN精度](#62-开源TopN精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 + +[ResNext101_32x8d论文](https://arxiv.org/pdf/1611.05431.pdf) + +### 1.2 代码地址 + +[ResNext101_32x8d代码](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) + +branch:master commit_id:7d955df73fe0e9b47f7d6c77c699324b256fc41f + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 + +``` +CANN 5.0.1 + +torch == 1.5.1 +torchvision == 0.6.1 +onnx == 1.9.0 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.20.3 +Pillow == 8.2.0 +opencv-python == 4.5.2 +``` + +**说明:** + +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + + + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 + +1.下载pth权重文件 +[ResNext101_32x8d 预训练pth权重文件](https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth) + +``` +wget https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth +``` + +文件MD5sum:4454a42689454b94296e378762f2333f + +3.编写pth2onnx脚本resnext101_32x8d_pth2onnx.py + + **说明:** + +>注意目前ATC支持的onnx算子版本为11 + +4.执行pth2onnx脚本,生成onnx模型文件 + +``` +python3.7 resnext101_32x8d_pth2onnx.py ./resnext101_32x8d-8ba56ff5.pth resnext101_32x8d.onnx +``` + + **模型转换要点:** + +>此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 + +### 3.2 onnx转om模型 + +1.设置环境变量 + +``` +source env.sh +``` + +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) + +``` +atc --framework=5 --model=./resnext101_32x8d.onnx --output=resnext101_32x8d_bs1 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 + +``` + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 + +该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/root/datasets/imagenet/val与/root/datasets/imagenet/val_label.txt。 + +### 4.2 数据集预处理 + +1.预处理脚本imagenet_torch_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 + +``` +python3.7 imagenet_torch_preprocess.py resnet /root/datasets/imagenet/val ./prep_dataset +``` + +### 4.3 生成数据集信息文件 + +1.生成数据集信息文件脚本gen_dataset_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 + +``` +python3.7 gen_dataset_info.py bin ./prep_dataset ./resnext101_32x8d.info 224 224 +``` + +第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 + +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) + +### 5.2 离线推理 + +1.设置环境变量 + +``` +source env.sh +``` + +2.执行离线推理 + +``` +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=resnext101_32x8d_bs1.om -input_text_path=./resnext101_32x8d.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False +``` + +输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 + +## 6 精度对比 + +- **[离线推理TopN精度](#61-离线推理TopN精度)** +- **[开源TopN精度](#62-开源TopN精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理TopN精度统计 + +后处理统计TopN精度 + +调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 + +``` +python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /root/datasets/imagenet/val_label.txt ./ result.json +``` + +第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 +查看输出结果: + +``` +{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"},{"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "79.31%"}, {"key": "Top2 accuracy", "value": "88.68%"}, {"key": "Top3 accuracy", "value": "91.83%"}, {"key": "Top4 accuracy", "value": "93.47%"}, {"key": "Top5 accuracy", "value": "94.52%"}]} +``` + +经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 + +### 6.2 开源TopN精度 + +[torchvision官网精度](https://pytorch.org/vision/stable/models.html) + +``` +Model Acc@1 Acc@5ResNeXt-101-32x8d 79.312 94.526 +``` + +### 6.3 精度对比 + +将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + **精度调试:** + +>没有遇到精度不达标的问题,故不需要进行精度调试 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** + +### 7.1 npu性能数据 + +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 +1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: + +``` +[e2e] throughputRate: 113.043, latency: 442310[data read] throughputRate: 119.766, moduleLatency: 8.34965[preprocess] throughputRate: 119.219, moduleLatency: 8.38793[infer] throughputRate: 113.375, Interface throughputRate: 144.766, moduleLatency: 8.19887[post] throughputRate: 113.375, moduleLatency: 8.82032 +``` + +Interface throughputRate: 144.766,144.766x4=579.064既是batch1 310单卡吞吐率 +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: + +``` +[e2e] throughputRate: 116.034, latency: 430908[data read] throughputRate: 122.878, moduleLatency: 8.13815[preprocess] throughputRate: 122.393, moduleLatency: 8.17039[infer] throughputRate: 116.288, Interface throughputRate: 165.952, moduleLatency: 7.47516[post] throughputRate: 7.2679, moduleLatency: 137.591 +``` + +Interface throughputRate: 165.952,165.952x4=663.808既是batch16 310单卡吞吐率 + + + +batch4的性能,使用benchmark工具进行纯推理,推理后生成result/PureInfer_perf_of_resnext101_32x8d_bs4_in_device_0.txt: + +``` +./benchmark.x86_64 -round=20 -om_path=resnext101_32x8d_bs4.om -device_id=0 -batch_size=4 +``` + + + +``` +ave_throughputRate = 167.46samples/s, ave_latency = 6.02002ms +``` + +ave_throughputRate: 167.46,167.46x4=669.84既是batch4 310单卡吞吐率 + + + +batch8的性能,使用benchmark工具进行纯推理,推理后生成result/PureInfer_perf_of_resnext101_32x8d_bs8_in_device_0.txt: + + + +``` +ave_throughputRate = 167.521samples/s, ave_latency = 5.9993ms +``` + +ave_throughputRate: 167.521,167.521x4=670.084既是batch8 310单卡吞吐率 + + + +batch32的性能,使用benchmark工具进行纯推理,推理后生成result/PureInfer_perf_of_resnext101_32x8d_bs32_in_device_0.txt: + +``` +ave_throughputRate = 108.28samples/s, ave_latency = 9.24227ms +``` + +ave_throughputRate: 108.28,108.28x4=433.12既是batch32 310单卡吞吐率 + + **性能优化:** + +对于batch32的性能不达标,从profiling数据的op_statistic_0_1.csv看出影响性能的是Conv2D算子,从op_summary_0_1.csv看出单个Conv_Relu算子aicore耗时0.6毫秒到6毫秒,shape大的耗时就多,不存在优化问题。 + diff --git a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/LICENSE b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/README.md b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/README.md index e6c3abbd49e61c172a2994377781fa815d468210..5566fa1abe1ccb7836e4f0055818f42987ae334a 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/README.md @@ -1,271 +1,271 @@ - # Shufflenetv1 Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) - - [6.2 开源TopN精度](#62-开源TopN精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - - - ## 1 模型概述 - - - **[论文地址](#11-论文地址)** - - - **[代码地址](#12-代码地址)** - - ### 1.1 论文地址 - [shufflenetv1论文](https://arxiv.org/pdf/1707.01083.pdf) - - ### 1.2 代码地址 - [shufflenetv1代码](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV1 ) - branch:master - commit_id: d69403d4b5fb3043c7c0da3c2a15df8c5e520d89 - - ## 2 环境说明 - - - **[深度学习框架](#21-深度学习框架)** - - - **[python第三方库](#22-python第三方库)** - - ### 2.1 深度学习框架 - ``` - CANN 5.0.2.alpha003 - pytorch == 1.8.0 - torchvision == 0.9.0 - onnx == 1.9.0 - ``` - - ### 2.2 python第三方库 - - ``` - numpy == 1.18.5 - Pillow == 7.2.0 - opencv-python == 4.5.1.48 - ``` - - **说明:** - > X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 - > - > Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - - ## 3 模型转换 - - - **[pth转onnx模型](#31-pth转onnx模型)** - - - **[onnx转om模型](#32-onnx转om模型)** - - ### 3.1 pth转onnx模型 - - 1.下载pth权重文件 - [Shufflenetv1预训练pth权重文件](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw#list/path=%2F)(提取码:mc24) - 文件md5sum: bc2b8686fe73bf0709af2cbfcff2b895 - ``` - https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw#list/path=%2F - ``` -2.shufflenetv1模型代码在代码仓中 - - ``` - github上Shufflenetv1没有安装脚本,在pth2onnx脚本中引用代码仓定义的ShuffleNetv1: - - git clone https://github.com/megvii-model/ShuffleNet-Series.git - - ``` - 3.编写pth2onnx脚本shufflenetv1_pth2onnx.py - - **说明:** - >注意目前ATC支持的onnx算子版本为11 - - 4.执行pth2onnx脚本,生成onnx模型文件 - ``` - python3.7 shufflenetv1_pth2onnx_bs1.py 1.0x.pth.tar shufflenetv1_bs1.onnx - ``` - - **模型转换要点:** - >动态batch的onnx转om失败并且测的性能数据也不对,每个batch的om都需要对应batch的onnx来转换,每个batch的性能数据也需要对应batch的onnx来测 - ### 3.2 onnx转om模型 - - 1.设置环境变量 - ``` - source env.sh - ``` - 2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.2 开发辅助工具指南 (推理) 01 - ``` - atc --framework=5 --model=./shufflenetv1_bs1.onnx --input_format=NCHW --input_shape="image:1,3,224,224" --output=shufflenetv1_bs1 --log=debug --soc_version=Ascend310 - ``` - - ## 4 数据集预处理 - - - **[数据集获取](#41-数据集获取)** - - - **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - ### 4.1 数据集获取 - 该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/opt/npu/imagenet/val与/opt/npu/imagenet/val_label.txt。 - -### 4.2 数据集预处理 - 1.预处理脚本shufflenetv1_torch_preprocess.py - - 2.执行预处理脚本,生成数据集预处理后的bin文件 - ``` - python3.7 shufflenetv1_torch_preprocess.py /opt/npu/imagenet/val ./prep_dataset - ``` - ### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本get_info.py - - 2.执行生成数据集信息脚本,生成数据集信息文件 - ``` - python3.7 get_info.py bin ./prep_dataset ./shufflenetv1_prep_bin.info 224 224 - ``` - 第一个参数为生成的bin文件路径,第二个为输出的info文件,后面为宽高信息 - ## 5 离线推理 - - - **[benchmark工具概述](#51-benchmark工具概述)** - - - **[离线推理](#52-离线推理)** - - ### 5.1 benchmark工具概述 - - benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.2 推理benchmark工具用户指南 01 - ### 5.2 离线推理 - 1.设置环境变量 - ``` - source env.sh - ``` - 2.执行离线推理 - ``` - ./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=shufflenetv1_bs1.om -input_text_path=./shufflenetv1_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False - ``` - 输出结果默认保存在当前目录result/dumpOutput_devicex,模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 - - ## 6 精度对比 - - **[离线推理TopN精度](#61-离线推理TopN精度)** - - **[开源TopN精度](#62-开源TopN精度)** - - **[精度对比](#63-精度对比)** - - ### 6.1 离线推理TopN精度统计 - - 后处理统计TopN精度 - - 调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 - ``` - python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /opt/npu/imagenet/val_label.txt ./ result.json - ``` - 第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 - 查看输出结果: - ``` - {"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value -": "67.66%"}, {"key": "Top2 accuracy", "value": "78.61%"}, {"key": "Top3 accuracy", "value": "83.29%"}, {"key": "Top4 accuracy", "value": "85.83%"}, {"key": "Top5 accuracy", "value": "87.61%"}]} - ``` - 经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 - - ### 6.2 开源TopN精度 - [开源代码仓精度](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV1 ) - - ``` - | model | top1 | top5 | - | --------------------------- | ---- | ---- | - | ShuffleNetV1 1.0x (group=3) | 67.8 | 87.7 | - ``` - ### 6.3 精度对比 - 将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - **精度调试:** - >遇到精度不对,首先考虑预处理是不是没有和开源代码仓一致。 - -## 7 性能对比 - - - **[npu性能数据](#71-npu性能数据)** - - ### 7.1 npu性能数据 - benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据,也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 - 1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: -``` -[e2e] throughputRate: 225.491, latency: 221739 -[data read] throughputRate: 238.623, moduleLatency: 4.19071 -[preprocess] throughputRate: 238.364, moduleLatency: 4.19526 -[infer] throughputRate: 226.585, Interface throughputRate: 392.738, moduleLatency: 3.62481 -[post] throughputRate: 226.585, moduleLatency: 4.41336 - ``` - Interface throughputRate: 392.738,392.738乘以4即是310单卡吞吐 - - batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: - - ``` -[e2e] throughputRate: 179.484, latency: 278577 -[data read] throughputRate: 185.65, moduleLatency: 5.38649 -[preprocess] throughputRate: 185.36, moduleLatency: 5.39492 -[infer] throughputRate: 180.299, Interface throughputRate: 1335.44, moduleLatency: 2.25621 -[post] throughputRate: 11.2682, moduleLatency: 88.7455 - - ``` - Interface throughputRate: 1335.44,1335.44x4既是batch16 310单卡吞吐率 - batch4性能: -./benchmark.x86_64 -round=20 -om_path=shufflenetv1_bs4.om -device_id=3 -batch_size=4 - ``` -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_shufflenetv1_bs4_in_device_3.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 986.58samples/s, ave_latency: 1.03751ms ----------------------------------------------------------------- - - ``` - batch4 310单卡吞吐率:986.58x4=3946.32fps - batch8性能: - - ``` -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_shufflenetv1_bs8_in_device_3.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 1235.73samples/s, ave_latency: 0.821675ms ----------------------------------------------------------------- - ``` - batch8 310单卡吞吐率:1235.73x4=4942.92fps - batch32性能: - - ``` - -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_shufflenetv1_bs32_in_device_3.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 1258.14samples/s, ave_latency: 0.795141ms ----------------------------------------------------------------- - ``` - batch32 310单卡吞吐率: 1258.14x4=5032.56fps - - **性能优化:** - 1. CANN 5.0.2.alpha003版本将PadV3D与AvgPoolV2融合提高了性能,所以基于此版本测。 - 2. 从profiling数据的op_statistic_0_1.csv看出影响性能的是Conv2D算子,TransData,Transpose算子,Conv2D算子不存在问题,由于格式转换om模型Transpose前后需要有TransData算子,从op_summary_0_1.csv可以看出单个TransData或Transpose算子aicore耗时,确定是否可以优化。 - 3. dynamic/transpose.py:68中已经通过shape白名单优化掉了TransposeD -> - five_2_four.py:9928 - 修改如下: - > elif dst_format.lower() == "nchw" and dst_shape in [[2560, 512, 4, 26], [2560, 512, 1, 26], [2560, 256, 8, 25], - [16, 240, 7, 7], [16, 120, 14, 14], - [1, 240, 7, 7], [1, 120, 14, 14], - [4, 240, 7, 7], [4, 120, 14, 14], - [8, 240, 7, 7], [8, 120, 14, 14], - [32, 240, 7, 7], [32, 120, 14, 14]]: -> - four_2_five.py:1219 - 修改如下: - > if src_format.upper() == "NCHW" and shape_input in [[16, 240, 7, 7], [16, 120, 14, 14], - [1, 240, 7, 7], [1, 120, 14, 14], - [4, 240, 7, 7], [4, 120, 14, 14], - [8, 240, 7, 7], [8, 120, 14, 14], - [32, 240, 7, 7], [32, 120, 14, 14]] and dtype_input == "float16": + # Shufflenetv1 Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) + - [6.2 开源TopN精度](#62-开源TopN精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + + + + ## 1 模型概述 + + - **[论文地址](#11-论文地址)** + + - **[代码地址](#12-代码地址)** + + ### 1.1 论文地址 + [shufflenetv1论文](https://arxiv.org/pdf/1707.01083.pdf) + + ### 1.2 代码地址 + [shufflenetv1代码](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV1 ) + branch:master + commit_id: d69403d4b5fb3043c7c0da3c2a15df8c5e520d89 + + ## 2 环境说明 + + - **[深度学习框架](#21-深度学习框架)** + + - **[python第三方库](#22-python第三方库)** + + ### 2.1 深度学习框架 + ``` + CANN 5.0.2.alpha003 + pytorch == 1.8.0 + torchvision == 0.9.0 + onnx == 1.9.0 + ``` + + ### 2.2 python第三方库 + + ``` + numpy == 1.18.5 + Pillow == 7.2.0 + opencv-python == 4.5.1.48 + ``` + + **说明:** + > X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 + > + > Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + + ## 3 模型转换 + + - **[pth转onnx模型](#31-pth转onnx模型)** + + - **[onnx转om模型](#32-onnx转om模型)** + + ### 3.1 pth转onnx模型 + + 1.下载pth权重文件 + [Shufflenetv1预训练pth权重文件](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw#list/path=%2F)(提取码:mc24) + 文件md5sum: bc2b8686fe73bf0709af2cbfcff2b895 + ``` + https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw#list/path=%2F + ``` +2.shufflenetv1模型代码在代码仓中 + + ``` + github上Shufflenetv1没有安装脚本,在pth2onnx脚本中引用代码仓定义的ShuffleNetv1: + + git clone https://github.com/megvii-model/ShuffleNet-Series.git + + ``` + 3.编写pth2onnx脚本shufflenetv1_pth2onnx.py + + **说明:** + >注意目前ATC支持的onnx算子版本为11 + + 4.执行pth2onnx脚本,生成onnx模型文件 + ``` + python3.7 shufflenetv1_pth2onnx_bs1.py 1.0x.pth.tar shufflenetv1_bs1.onnx + ``` + + **模型转换要点:** + >动态batch的onnx转om失败并且测的性能数据也不对,每个batch的om都需要对应batch的onnx来转换,每个batch的性能数据也需要对应batch的onnx来测 + ### 3.2 onnx转om模型 + + 1.设置环境变量 + ``` + source env.sh + ``` + 2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.2 开发辅助工具指南 (推理) 01 + ``` + atc --framework=5 --model=./shufflenetv1_bs1.onnx --input_format=NCHW --input_shape="image:1,3,224,224" --output=shufflenetv1_bs1 --log=debug --soc_version=Ascend310 + ``` + + ## 4 数据集预处理 + + - **[数据集获取](#41-数据集获取)** + + - **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + ### 4.1 数据集获取 + 该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/opt/npu/imagenet/val与/opt/npu/imagenet/val_label.txt。 + +### 4.2 数据集预处理 + 1.预处理脚本shufflenetv1_torch_preprocess.py + + 2.执行预处理脚本,生成数据集预处理后的bin文件 + ``` + python3.7 shufflenetv1_torch_preprocess.py /opt/npu/imagenet/val ./prep_dataset + ``` + ### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本get_info.py + + 2.执行生成数据集信息脚本,生成数据集信息文件 + ``` + python3.7 get_info.py bin ./prep_dataset ./shufflenetv1_prep_bin.info 224 224 + ``` + 第一个参数为生成的bin文件路径,第二个为输出的info文件,后面为宽高信息 + ## 5 离线推理 + + - **[benchmark工具概述](#51-benchmark工具概述)** + + - **[离线推理](#52-离线推理)** + + ### 5.1 benchmark工具概述 + + benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.2 推理benchmark工具用户指南 01 + ### 5.2 离线推理 + 1.设置环境变量 + ``` + source env.sh + ``` + 2.执行离线推理 + ``` + ./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=shufflenetv1_bs1.om -input_text_path=./shufflenetv1_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False + ``` + 输出结果默认保存在当前目录result/dumpOutput_devicex,模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 + + ## 6 精度对比 + - **[离线推理TopN精度](#61-离线推理TopN精度)** + - **[开源TopN精度](#62-开源TopN精度)** + - **[精度对比](#63-精度对比)** + + ### 6.1 离线推理TopN精度统计 + + 后处理统计TopN精度 + + 调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 + ``` + python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /opt/npu/imagenet/val_label.txt ./ result.json + ``` + 第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 + 查看输出结果: + ``` + {"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value +": "67.66%"}, {"key": "Top2 accuracy", "value": "78.61%"}, {"key": "Top3 accuracy", "value": "83.29%"}, {"key": "Top4 accuracy", "value": "85.83%"}, {"key": "Top5 accuracy", "value": "87.61%"}]} + ``` + 经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 + + ### 6.2 开源TopN精度 + [开源代码仓精度](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV1 ) + + ``` + | model | top1 | top5 | + | --------------------------- | ---- | ---- | + | ShuffleNetV1 1.0x (group=3) | 67.8 | 87.7 | + ``` + ### 6.3 精度对比 + 将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + **精度调试:** + >遇到精度不对,首先考虑预处理是不是没有和开源代码仓一致。 + +## 7 性能对比 + + - **[npu性能数据](#71-npu性能数据)** + + ### 7.1 npu性能数据 + benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据,也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 + 1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: +``` +[e2e] throughputRate: 225.491, latency: 221739 +[data read] throughputRate: 238.623, moduleLatency: 4.19071 +[preprocess] throughputRate: 238.364, moduleLatency: 4.19526 +[infer] throughputRate: 226.585, Interface throughputRate: 392.738, moduleLatency: 3.62481 +[post] throughputRate: 226.585, moduleLatency: 4.41336 + ``` + Interface throughputRate: 392.738,392.738乘以4即是310单卡吞吐 + + batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: + + ``` +[e2e] throughputRate: 179.484, latency: 278577 +[data read] throughputRate: 185.65, moduleLatency: 5.38649 +[preprocess] throughputRate: 185.36, moduleLatency: 5.39492 +[infer] throughputRate: 180.299, Interface throughputRate: 1335.44, moduleLatency: 2.25621 +[post] throughputRate: 11.2682, moduleLatency: 88.7455 + + ``` + Interface throughputRate: 1335.44,1335.44x4既是batch16 310单卡吞吐率 + batch4性能: +./benchmark.x86_64 -round=20 -om_path=shufflenetv1_bs4.om -device_id=3 -batch_size=4 + ``` +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_shufflenetv1_bs4_in_device_3.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 986.58samples/s, ave_latency: 1.03751ms +---------------------------------------------------------------- + + ``` + batch4 310单卡吞吐率:986.58x4=3946.32fps + batch8性能: + + ``` +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_shufflenetv1_bs8_in_device_3.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 1235.73samples/s, ave_latency: 0.821675ms +---------------------------------------------------------------- + ``` + batch8 310单卡吞吐率:1235.73x4=4942.92fps + batch32性能: + + ``` + +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_shufflenetv1_bs32_in_device_3.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 1258.14samples/s, ave_latency: 0.795141ms +---------------------------------------------------------------- + ``` + batch32 310单卡吞吐率: 1258.14x4=5032.56fps + + **性能优化:** + 1. CANN 5.0.2.alpha003版本将PadV3D与AvgPoolV2融合提高了性能,所以基于此版本测。 + 2. 从profiling数据的op_statistic_0_1.csv看出影响性能的是Conv2D算子,TransData,Transpose算子,Conv2D算子不存在问题,由于格式转换om模型Transpose前后需要有TransData算子,从op_summary_0_1.csv可以看出单个TransData或Transpose算子aicore耗时,确定是否可以优化。 + 3. dynamic/transpose.py:68中已经通过shape白名单优化掉了TransposeD +> + five_2_four.py:9928 + 修改如下: + > elif dst_format.lower() == "nchw" and dst_shape in [[2560, 512, 4, 26], [2560, 512, 1, 26], [2560, 256, 8, 25], + [16, 240, 7, 7], [16, 120, 14, 14], + [1, 240, 7, 7], [1, 120, 14, 14], + [4, 240, 7, 7], [4, 120, 14, 14], + [8, 240, 7, 7], [8, 120, 14, 14], + [32, 240, 7, 7], [32, 120, 14, 14]]: +> + four_2_five.py:1219 + 修改如下: + > if src_format.upper() == "NCHW" and shape_input in [[16, 240, 7, 7], [16, 120, 14, 14], + [1, 240, 7, 7], [1, 120, 14, 14], + [4, 240, 7, 7], [4, 120, 14, 14], + [8, 240, 7, 7], [8, 120, 14, 14], + [32, 240, 7, 7], [32, 120, 14, 14]] and dtype_input == "float16": > \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/requirements.txt index c2203b4465dfc2eed99b7f8329cf8d1dab08e3b6..fe5ade6e250c12797fe224ef8f1a6965ca76d103 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.8.0 -torchvision == 0.9.0 -onnx == 1.9.0 -numpy == 1.18.5 -Pillow == 7.2.0 +torch == 1.8.0 +torchvision == 0.9.0 +onnx == 1.9.0 +numpy == 1.18.5 +Pillow == 7.2.0 opencv-python == 4.5.1.48 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/shufflenetv1_pth2onnx_bs1.py b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/shufflenetv1_pth2onnx_bs1.py index 5da26de3375ebdd7cda443422553a2a1f8e1448b..88a6ada9a13c0b356353f803050783be6dfa3c31 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/shufflenetv1_pth2onnx_bs1.py +++ b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/shufflenetv1_pth2onnx_bs1.py @@ -1,38 +1,38 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import sys -from collections import OrderedDict -sys.path.append(r"./ShuffleNet-Series/ShuffleNetV1") -from network import ShuffleNetV1 - -def pth2onnx(input_file, output_file): - model = ShuffleNetV1(model_size="1.0x", group=3) - checkpoint = torch.load(input_file, map_location="cpu") - new_state_dict = OrderedDict() - for k, v in checkpoint['state_dict'].items(): - name = k[7:] - new_state_dict[name] = v - model.load_state_dict(new_state_dict) - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.rand(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, - input_names = input_names, dynamic_axes = dynamic_axes, - output_names = output_names, opset_version=11, verbose=True) - -if __name__=="__main__": - pth2onnx(sys.argv[1], sys.argv[2]) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import sys +from collections import OrderedDict +sys.path.append(r"./ShuffleNet-Series/ShuffleNetV1") +from network import ShuffleNetV1 + +def pth2onnx(input_file, output_file): + model = ShuffleNetV1(model_size="1.0x", group=3) + checkpoint = torch.load(input_file, map_location="cpu") + new_state_dict = OrderedDict() + for k, v in checkpoint['state_dict'].items(): + name = k[7:] + new_state_dict[name] = v + model.load_state_dict(new_state_dict) + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.rand(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, + input_names = input_names, dynamic_axes = dynamic_axes, + output_names = output_names, opset_version=11, verbose=True) + +if __name__=="__main__": + pth2onnx(sys.argv[1], sys.argv[2]) diff --git a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/shufflenetv1_pth2onnx_bs16.py b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/shufflenetv1_pth2onnx_bs16.py index 5b3fa4b69f55bb9a94a3ceb73a9fa6bb5fb28996..d396f8a2e05200f0f95fd10bc5dd57ff5607f972 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/shufflenetv1_pth2onnx_bs16.py +++ b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/shufflenetv1_pth2onnx_bs16.py @@ -1,37 +1,37 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import sys -from collections import OrderedDict -sys.path.append(r"./ShuffleNet-Series/ShuffleNetV1") -from network import ShuffleNetV1 - -def pth2onnx(input_file, output_file): - model = ShuffleNetV1(model_size="1.0x", group=3) - checkpoint = torch.load(input_file, map_location="cpu") - new_state_dict = OrderedDict() - for k, v in checkpoint['state_dict'].items(): - name = k[7: ] - new_state_dict[name] = v - model.load_state_dict(new_state_dict) - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.rand(16, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, - dynamic_axes = dynamic_axes, output_names = output_names, opset_version=11, verbose=True) - -if __name__ == "__main__": - pth2onnx(sys.argv[1], sys.argv[2]) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import sys +from collections import OrderedDict +sys.path.append(r"./ShuffleNet-Series/ShuffleNetV1") +from network import ShuffleNetV1 + +def pth2onnx(input_file, output_file): + model = ShuffleNetV1(model_size="1.0x", group=3) + checkpoint = torch.load(input_file, map_location="cpu") + new_state_dict = OrderedDict() + for k, v in checkpoint['state_dict'].items(): + name = k[7: ] + new_state_dict[name] = v + model.load_state_dict(new_state_dict) + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.rand(16, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, + dynamic_axes = dynamic_axes, output_names = output_names, opset_version=11, verbose=True) + +if __name__ == "__main__": + pth2onnx(sys.argv[1], sys.argv[2]) diff --git a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/shufflenetv1_torch_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/shufflenetv1_torch_preprocess.py index 4cf57f07c1e9d23d6a8332b295bb906e3db9e155..58eb781897e830217627df46fd5a38d0f90caca1 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/shufflenetv1_torch_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1/shufflenetv1_torch_preprocess.py @@ -1,79 +1,79 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -import PIL -from PIL import Image -import cv2 -import numpy as np -import torch -from torchvision import transforms -import multiprocessing - -def ToBGRTensor(img): - assert isinstance(img, (np.ndarray, PIL.Image.Image)) - if isinstance(img, PIL.Image.Image): - img = np.asarray(img) - img = img[:, :, ::-1] # 2 BGR - img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W) - img = np.ascontiguousarray(img) - img = torch.from_numpy(img).float() - return img - -def OpencvResize(img,size): - assert isinstance(img, PIL.Image.Image) - img = np.asarray(img) # (H,W,3) RGB - img = img[:, :, ::-1] # 2 BGR - img = np.ascontiguousarray(img) - H, W, _ = img.shape - target_size = (int(size / H * W + 0.5), size) if H < W else (size, int(size / W * H + 0.5)) - img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR) - img = img[:, :, ::-1] # 2 RGB - img = np.ascontiguousarray(img) - img = Image.fromarray(img) - return img - -def gen_input_bin(save_path, file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - # RGBA to RGB - image = Image.open(os.path.join(src_path, file)).convert('RGB') - - image = OpencvResize(image, 256) - crop = transforms.CenterCrop(224) - image = crop(image) - image = ToBGRTensor(image) - img = np.array(image, dtype=np.float32) - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - -def preprocess(src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(save_path, file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - -if __name__ == "__main__": - src_path = sys.argv[1] - save_path = sys.argv[2] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import PIL +from PIL import Image +import cv2 +import numpy as np +import torch +from torchvision import transforms +import multiprocessing + +def ToBGRTensor(img): + assert isinstance(img, (np.ndarray, PIL.Image.Image)) + if isinstance(img, PIL.Image.Image): + img = np.asarray(img) + img = img[:, :, ::-1] # 2 BGR + img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W) + img = np.ascontiguousarray(img) + img = torch.from_numpy(img).float() + return img + +def OpencvResize(img,size): + assert isinstance(img, PIL.Image.Image) + img = np.asarray(img) # (H,W,3) RGB + img = img[:, :, ::-1] # 2 BGR + img = np.ascontiguousarray(img) + H, W, _ = img.shape + target_size = (int(size / H * W + 0.5), size) if H < W else (size, int(size / W * H + 0.5)) + img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR) + img = img[:, :, ::-1] # 2 RGB + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + return img + +def gen_input_bin(save_path, file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + # RGBA to RGB + image = Image.open(os.path.join(src_path, file)).convert('RGB') + + image = OpencvResize(image, 256) + crop = transforms.CenterCrop(224) + image = crop(image) + image = ToBGRTensor(image) + img = np.array(image, dtype=np.float32) + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + +def preprocess(src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(save_path, file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + +if __name__ == "__main__": + src_path = sys.argv[1] + save_path = sys.argv[2] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) preprocess(src_path, save_path) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/LICENSE b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/README.md b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/README.md index 57e58ac3ba1f47790477473bf71a6f67af80e536..4879ab50b9bb1f4af3145c742fe0758b6b3d785d 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/README.md @@ -1,259 +1,259 @@ -Shufflenetv2+ Onnx模型端到端推理指导 - -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) - - [6.2 开源TopN精度](#62-开源TopN精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[shufflenetv2论文](https://arxiv.org/abs/1807.11164) - -### 1.2 代码地址 -[shufflenetv2+代码](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2%2B) -branch:master -commit_id:d69403d4b5fb3043c7c0da3c2a15df8c5e520d89 - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -CANN 5.0.1 -pytorch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.7.0 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.18.5 -Pillow == 7.2.0 -opencv-python == 4.5.1.48 -``` - -**说明:** -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 - -1.下载pth权重文件 -[shufflenetv2+预训练pth权重文件](https://pan.baidu.com/share/init?surl=EUQVoFPb74yZm0JWHKjFOw) -文件md5sum: 1d6611049e6ef03f1d6afa11f6f9023e - -``` -https://pan.baidu.com/share/init?surl=EUQVoFPb74yZm0JWHKjFOw 提取码:mc24 -``` -2.shufflenetv2+模型代码在代码仓里 - -``` -github上Shufflenetv2+没有安装脚本,在pth2onnx脚本中引用代码仓定义的ShuffleNetv2+: - -git clone https://github.com/megvii-model/ShuffleNet-Series.git - - -``` -3.编写pth2onnx脚本shufflenetv2_pth2onnx_bs1.py - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - -4.执行pth2onnx脚本,生成onnx模型文件 -``` -python3.7 shufflenetv2_pth2onnx_bs1.py ShuffleNetV2+.Small.pth.tar shufflenetv2_bs1.onnx -``` - **模型转换要点:** ->动态batch的onnx转om失败并且测的性能数据也不对,每个batch的om都需要对应batch的onnx来转换,每个batch的性能数据也需要对应batch的onnx来测 - - -### 3.2 onnx转om模型 - -1.设置环境变量 -``` -source env.sh -``` -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 -``` -atc --framework=5 --model=./shufflenetv2_bs1.onnx --input_format=NCHW --input_shape="image:1,3,224,224" --output=shufflenetv2_bs1 --log=debug --soc_version=Ascend310 -``` - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/root/datasets/imagenet/val与/root/datasets/imagenet/val_label.txt。 - -### 4.2 数据集预处理 -1.预处理脚本imagenet_torch_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 -``` -python3.7 imagenet_torch_preprocess.py /root/datasets/imagenet/val ./prep_dataset -``` -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本get_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 - -``` -python3.7 get_info.py bin ./prep_dataset ./shufflenetv2_prep_bin.info 224 224 -``` -第一个参数为生成的bin文件路径,第二个为输出的info文件,后面为宽高信息 -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.1 推理benchmark工具用户指南 01 -### 5.2 离线推理 -1.设置环境变量 -``` -source env.sh -``` -2.执行离线推理 - -``` -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=shufflenetv2_bs1.om -input_text_path=./shufflenetv2_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False -``` -输出结果默认保存在当前目录result/dumpOutput_devicex,模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 - -## 6 精度对比 - -- **[离线推理TopN精度](#61-离线推理TopN精度)** -- **[开源TopN精度](#62-开源TopN精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理TopN精度统计 - -后处理统计TopN精度 - -调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 -``` -python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /root/datasets/imagenet/val_label.txt ./ result.json -``` -第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 -查看输出结果: -``` -{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value -": "74.06%"}, {"key": "Top2 accuracy", "value": "84.21%"}, {"key": "Top3 accuracy", "value": "88.11%"}, {"key": "Top4 accuracy", "value": "90.3%"}, {"key": "Top5 accuracy", "value": "91.67%"}]} -``` -经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 - -### 6.2 开源TopN精度 -[开源代码仓精度](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2%2B) - -``` -Model Acc@1 Acc@5 -shufflenetv2 74.1 91.7 -``` -### 6.3 精度对比 -将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - **精度调试:** ->没有遇到精度不达标的问题,故不需要进行精度调试 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** - -### 7.1 npu性能数据 -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据,也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 -1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: -``` -[e2e] throughputRate: 117.471, latency: 425636 -[data read] throughputRate: 124.47, moduleLatency: 8.03407 -[preprocess] throughputRate: 124.375, moduleLatency: 8.04019 -[infer] throughputRate: 117.823, Interface throughputRate: 147.93, moduleLatency: 7.93347 -[post] throughputRate: 117.822, moduleLatency: 8.48734 -``` -Interface throughputRate: 147.93,147.93x4=591.72既是batch1 310单卡吞吐率 -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: - -``` -[e2e] throughputRate: 130.7, latency: 382555 -[data read] throughputRate: 131.307, moduleLatency: 7.61574 -[preprocess] throughputRate: 131.19, moduleLatency: 7.62255 -[infer] throughputRate: 131.175, Interface throughputRate: 491.668, moduleLatency: 3.45377 -[post] throughputRate: 8.19833, moduleLatency: 121.976 -``` -Interface throughputRate: 491.668,491.668x4=1966.672既是batch16 310单卡吞吐率 -batch4性能: - -``` -[e2e] throughputRate: 189.011, latency: 264534 -[data read] throughputRate: 198.271, moduleLatency: 5.0436 -[preprocess] throughputRate: 198.037, moduleLatency: 5.04955 -[infer] throughputRate: 189.874, Interface throughputRate: 363.812, moduleLatency: 4.18727 -[post] throughputRate: 47.4682, moduleLatency: 21.0667 -``` -batch4 310单卡吞吐率:363.812x4=1455.248fps -batch8性能: - -``` -[e2e] throughputRate: 139.455, latency: 358539 -[data read] throughputRate: 139.918, moduleLatency: 7.14704 -[preprocess] throughputRate: 139.784, moduleLatency: 7.15391 -[infer] throughputRate: 139.734, Interface throughputRate: 437.088, moduleLatency: 3.72351 -[post] throughputRate: 17.4666, moduleLatency: 57.2522 -``` -batch8 310单卡吞吐率:437.088x4=1748.352fps -batch32性能: - -``` -[e2e] throughputRate: 221.683, latency: 225547 -[data read] throughputRate: 235.234, moduleLatency: 4.25108 -[preprocess] throughputRate: 234.935, moduleLatency: 4.2565 -[infer] throughputRate: 222.362, Interface throughputRate: 475.038, moduleLatency: 3.51711 -[post] throughputRate: 6.95087, moduleLatency: 143.867 -``` -batch32 310单卡吞吐率:475.038x4=1900.152fps - -**性能优化:** - ->没有遇到性能不达标的问题,故不需要进行性能优化 - +Shufflenetv2+ Onnx模型端到端推理指导 + +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) + - [6.2 开源TopN精度](#62-开源TopN精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[shufflenetv2论文](https://arxiv.org/abs/1807.11164) + +### 1.2 代码地址 +[shufflenetv2+代码](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2%2B) +branch:master +commit_id:d69403d4b5fb3043c7c0da3c2a15df8c5e520d89 + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +CANN 5.0.1 +pytorch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.7.0 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.18.5 +Pillow == 7.2.0 +opencv-python == 4.5.1.48 +``` + +**说明:** +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 + +1.下载pth权重文件 +[shufflenetv2+预训练pth权重文件](https://pan.baidu.com/share/init?surl=EUQVoFPb74yZm0JWHKjFOw) +文件md5sum: 1d6611049e6ef03f1d6afa11f6f9023e + +``` +https://pan.baidu.com/share/init?surl=EUQVoFPb74yZm0JWHKjFOw 提取码:mc24 +``` +2.shufflenetv2+模型代码在代码仓里 + +``` +github上Shufflenetv2+没有安装脚本,在pth2onnx脚本中引用代码仓定义的ShuffleNetv2+: + +git clone https://github.com/megvii-model/ShuffleNet-Series.git + + +``` +3.编写pth2onnx脚本shufflenetv2_pth2onnx_bs1.py + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + +4.执行pth2onnx脚本,生成onnx模型文件 +``` +python3.7 shufflenetv2_pth2onnx_bs1.py ShuffleNetV2+.Small.pth.tar shufflenetv2_bs1.onnx +``` + **模型转换要点:** +>动态batch的onnx转om失败并且测的性能数据也不对,每个batch的om都需要对应batch的onnx来转换,每个batch的性能数据也需要对应batch的onnx来测 + + +### 3.2 onnx转om模型 + +1.设置环境变量 +``` +source env.sh +``` +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 +``` +atc --framework=5 --model=./shufflenetv2_bs1.onnx --input_format=NCHW --input_shape="image:1,3,224,224" --output=shufflenetv2_bs1 --log=debug --soc_version=Ascend310 +``` + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/root/datasets/imagenet/val与/root/datasets/imagenet/val_label.txt。 + +### 4.2 数据集预处理 +1.预处理脚本imagenet_torch_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 +``` +python3.7 imagenet_torch_preprocess.py /root/datasets/imagenet/val ./prep_dataset +``` +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本get_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 + +``` +python3.7 get_info.py bin ./prep_dataset ./shufflenetv2_prep_bin.info 224 224 +``` +第一个参数为生成的bin文件路径,第二个为输出的info文件,后面为宽高信息 +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.1 推理benchmark工具用户指南 01 +### 5.2 离线推理 +1.设置环境变量 +``` +source env.sh +``` +2.执行离线推理 + +``` +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=shufflenetv2_bs1.om -input_text_path=./shufflenetv2_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False +``` +输出结果默认保存在当前目录result/dumpOutput_devicex,模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 + +## 6 精度对比 + +- **[离线推理TopN精度](#61-离线推理TopN精度)** +- **[开源TopN精度](#62-开源TopN精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理TopN精度统计 + +后处理统计TopN精度 + +调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 +``` +python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /root/datasets/imagenet/val_label.txt ./ result.json +``` +第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 +查看输出结果: +``` +{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value +": "74.06%"}, {"key": "Top2 accuracy", "value": "84.21%"}, {"key": "Top3 accuracy", "value": "88.11%"}, {"key": "Top4 accuracy", "value": "90.3%"}, {"key": "Top5 accuracy", "value": "91.67%"}]} +``` +经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 + +### 6.2 开源TopN精度 +[开源代码仓精度](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2%2B) + +``` +Model Acc@1 Acc@5 +shufflenetv2 74.1 91.7 +``` +### 6.3 精度对比 +将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + **精度调试:** +>没有遇到精度不达标的问题,故不需要进行精度调试 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** + +### 7.1 npu性能数据 +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据,也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 +1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: +``` +[e2e] throughputRate: 117.471, latency: 425636 +[data read] throughputRate: 124.47, moduleLatency: 8.03407 +[preprocess] throughputRate: 124.375, moduleLatency: 8.04019 +[infer] throughputRate: 117.823, Interface throughputRate: 147.93, moduleLatency: 7.93347 +[post] throughputRate: 117.822, moduleLatency: 8.48734 +``` +Interface throughputRate: 147.93,147.93x4=591.72既是batch1 310单卡吞吐率 +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: + +``` +[e2e] throughputRate: 130.7, latency: 382555 +[data read] throughputRate: 131.307, moduleLatency: 7.61574 +[preprocess] throughputRate: 131.19, moduleLatency: 7.62255 +[infer] throughputRate: 131.175, Interface throughputRate: 491.668, moduleLatency: 3.45377 +[post] throughputRate: 8.19833, moduleLatency: 121.976 +``` +Interface throughputRate: 491.668,491.668x4=1966.672既是batch16 310单卡吞吐率 +batch4性能: + +``` +[e2e] throughputRate: 189.011, latency: 264534 +[data read] throughputRate: 198.271, moduleLatency: 5.0436 +[preprocess] throughputRate: 198.037, moduleLatency: 5.04955 +[infer] throughputRate: 189.874, Interface throughputRate: 363.812, moduleLatency: 4.18727 +[post] throughputRate: 47.4682, moduleLatency: 21.0667 +``` +batch4 310单卡吞吐率:363.812x4=1455.248fps +batch8性能: + +``` +[e2e] throughputRate: 139.455, latency: 358539 +[data read] throughputRate: 139.918, moduleLatency: 7.14704 +[preprocess] throughputRate: 139.784, moduleLatency: 7.15391 +[infer] throughputRate: 139.734, Interface throughputRate: 437.088, moduleLatency: 3.72351 +[post] throughputRate: 17.4666, moduleLatency: 57.2522 +``` +batch8 310单卡吞吐率:437.088x4=1748.352fps +batch32性能: + +``` +[e2e] throughputRate: 221.683, latency: 225547 +[data read] throughputRate: 235.234, moduleLatency: 4.25108 +[preprocess] throughputRate: 234.935, moduleLatency: 4.2565 +[infer] throughputRate: 222.362, Interface throughputRate: 475.038, moduleLatency: 3.51711 +[post] throughputRate: 6.95087, moduleLatency: 143.867 +``` +batch32 310单卡吞吐率:475.038x4=1900.152fps + +**性能优化:** + +>没有遇到性能不达标的问题,故不需要进行性能优化 + diff --git a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/imagenet_torch_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/imagenet_torch_preprocess.py index 67877da44e7c9990dafe3d21a2b76da8ebb37ae3..cb19574d4a03c11f0e2bf073e08c9b9df2a383d0 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/imagenet_torch_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/imagenet_torch_preprocess.py @@ -1,72 +1,72 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -import PIL -from PIL import Image -import cv2 -import numpy as np -import torch -from torchvision import transforms - -class ToBGRTensor(object): - - def __call__(self, img): - assert isinstance(img, (np.ndarray, PIL.Image.Image)) - if isinstance(img, PIL.Image.Image): - img = np.asarray(img) - img = img[:,:,::-1] # 2 BGR - img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W) - img = np.ascontiguousarray(img) - img = torch.from_numpy(img).float() - return img - -class OpencvResize(object): - - def __init__(self, size=256): - self.size = size - - def __call__(self, img): - assert isinstance(img, PIL.Image.Image) - img = np.asarray(img) # (H,W,3) RGB - img = img[:,:,::-1] # 2 BGR - img = np.ascontiguousarray(img) - H, W, _ = img.shape - target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5)) - img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR) - img = img[:,:,::-1] # 2 RGB - img = np.ascontiguousarray(img) - img = Image.fromarray(img) - return img - -def preprocess(src_path, save_path): - - preprocess = transforms.Compose([ - OpencvResize(256), - transforms.CenterCrop(224), - ToBGRTensor(), - ]) - - i = 0 - in_files = os.listdir(src_path) - for file in in_files: - i = i + 1 - print(file, "===", i) - input_image = Image.open(src_path + '/' + file).convert('RGB') - input_tensor = preprocess(input_image) - img = np.array(input_tensor).astype(np.float32) - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - -if __name__=="__main__": - preprocess(sys.argv[1],sys.argv[2]) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import PIL +from PIL import Image +import cv2 +import numpy as np +import torch +from torchvision import transforms + +class ToBGRTensor(object): + + def __call__(self, img): + assert isinstance(img, (np.ndarray, PIL.Image.Image)) + if isinstance(img, PIL.Image.Image): + img = np.asarray(img) + img = img[:,:,::-1] # 2 BGR + img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W) + img = np.ascontiguousarray(img) + img = torch.from_numpy(img).float() + return img + +class OpencvResize(object): + + def __init__(self, size=256): + self.size = size + + def __call__(self, img): + assert isinstance(img, PIL.Image.Image) + img = np.asarray(img) # (H,W,3) RGB + img = img[:,:,::-1] # 2 BGR + img = np.ascontiguousarray(img) + H, W, _ = img.shape + target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5)) + img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR) + img = img[:,:,::-1] # 2 RGB + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + return img + +def preprocess(src_path, save_path): + + preprocess = transforms.Compose([ + OpencvResize(256), + transforms.CenterCrop(224), + ToBGRTensor(), + ]) + + i = 0 + in_files = os.listdir(src_path) + for file in in_files: + i = i + 1 + print(file, "===", i) + input_image = Image.open(src_path + '/' + file).convert('RGB') + input_tensor = preprocess(input_image) + img = np.array(input_tensor).astype(np.float32) + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + +if __name__=="__main__": + preprocess(sys.argv[1],sys.argv[2]) diff --git a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/requirements.txt index 19bf0c33da4a5c3990a53fb79d681b2804a6df47..59f8a711bf24972d8af91f0ba668c3fc5d91a1bb 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.7.0 -numpy == 1.18.5 -Pillow == 7.2.0 +torch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.7.0 +numpy == 1.18.5 +Pillow == 7.2.0 opencv-python == 4.5.1.48 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/shufflenetv2_pth2onnx_bs1.py b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/shufflenetv2_pth2onnx_bs1.py index ff67f2706f25e9c567c94db11af3558fb4574aa5..f428f939b096bc974633b603bcdd78980bc03382 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/shufflenetv2_pth2onnx_bs1.py +++ b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/shufflenetv2_pth2onnx_bs1.py @@ -1,39 +1,39 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import torch -from collections import OrderedDict -sys.path.append(r"./ShuffleNet-Series/ShuffleNetV2+") -from network import ShuffleNetV2_Plus - -def pth2onnx(input_file, output_file): - architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2] - model = ShuffleNetV2_Plus(architecture=architecture,model_size='Small') - checkpoint = torch.load(input_file, map_location="cpu") - new_state_dict = OrderedDict() - for k, v in checkpoint['state_dict'].items(): - name = k[7:] - new_state_dict[name] = v - model.load_state_dict(new_state_dict) - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.rand(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, - input_names = input_names, dynamic_axes = dynamic_axes, - output_names = output_names, opset_version=11, verbose=True) - -if __name__=="__main__": - pth2onnx(sys.argv[1], sys.argv[2]) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import torch +from collections import OrderedDict +sys.path.append(r"./ShuffleNet-Series/ShuffleNetV2+") +from network import ShuffleNetV2_Plus + +def pth2onnx(input_file, output_file): + architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2] + model = ShuffleNetV2_Plus(architecture=architecture,model_size='Small') + checkpoint = torch.load(input_file, map_location="cpu") + new_state_dict = OrderedDict() + for k, v in checkpoint['state_dict'].items(): + name = k[7:] + new_state_dict[name] = v + model.load_state_dict(new_state_dict) + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.rand(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, + input_names = input_names, dynamic_axes = dynamic_axes, + output_names = output_names, opset_version=11, verbose=True) + +if __name__=="__main__": + pth2onnx(sys.argv[1], sys.argv[2]) diff --git a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/shufflenetv2_pth2onnx_bs16.py b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/shufflenetv2_pth2onnx_bs16.py index 5eb21c1900f2e35feb1d571b77ea2e15a5754904..708cfba6b53cbfe72676069a9614e7b657ff9870 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/shufflenetv2_pth2onnx_bs16.py +++ b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/shufflenetv2_pth2onnx_bs16.py @@ -1,39 +1,39 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import torch -from collections import OrderedDict -sys.path.append(r"./ShuffleNet-Series/ShuffleNetV2+") -from network import ShuffleNetV2_Plus - -def pth2onnx(input_file, output_file): - architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2] - model = ShuffleNetV2_Plus(architecture=architecture,model_size='Small') - checkpoint = torch.load(input_file, map_location="cpu") - new_state_dict = OrderedDict() - for k, v in checkpoint['state_dict'].items(): - name = k[7:] - new_state_dict[name] = v - model.load_state_dict(new_state_dict) - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.rand(16, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, - input_names = input_names, dynamic_axes = dynamic_axes, - output_names = output_names, opset_version=11, verbose=True) - -if __name__=="__main__": - pth2onnx(sys.argv[1], sys.argv[2]) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import torch +from collections import OrderedDict +sys.path.append(r"./ShuffleNet-Series/ShuffleNetV2+") +from network import ShuffleNetV2_Plus + +def pth2onnx(input_file, output_file): + architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2] + model = ShuffleNetV2_Plus(architecture=architecture,model_size='Small') + checkpoint = torch.load(input_file, map_location="cpu") + new_state_dict = OrderedDict() + for k, v in checkpoint['state_dict'].items(): + name = k[7:] + new_state_dict[name] = v + model.load_state_dict(new_state_dict) + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.rand(16, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, + input_names = input_names, dynamic_axes = dynamic_axes, + output_names = output_names, opset_version=11, verbose=True) + +if __name__=="__main__": + pth2onnx(sys.argv[1], sys.argv[2]) diff --git a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/test/readme.md b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/test/readme.md index a8c39573767998c0bf79d1e41514c44e9690da4b..a308c17cb252b33784d7cc1cdb4185d44cf1782a 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/test/readme.md +++ b/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+/test/readme.md @@ -1,27 +1,27 @@ -环境准备: - -1.数据集路径 -通用的数据集统一放在/root/datasets/或/opt/npu/ -本模型数据集放在/opt/npu/ - -2.进入工作目录 -cd Shufflenetv2+ - -3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -pip3.7 install -r requirements.txt - -4.获取模型代码 -git clone https://github.com/megvii-model/ShuffleNet-Series.git - - -5.获取权重文件 -https://pan.baidu.com/share/init?surl=EUQVoFPb74yZm0JWHKjFOw - -提取码:mc24 - -6.获取benchmark工具 -将benchmark.x86_64 benchmark.aarch64放在当前目录 - -7.310上执行,执行时确保device空闲 -bash test/pth2om.sh -bash test/eval_acc_perf.sh +环境准备: + +1.数据集路径 +通用的数据集统一放在/root/datasets/或/opt/npu/ +本模型数据集放在/opt/npu/ + +2.进入工作目录 +cd Shufflenetv2+ + +3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +pip3.7 install -r requirements.txt + +4.获取模型代码 +git clone https://github.com/megvii-model/ShuffleNet-Series.git + + +5.获取权重文件 +https://pan.baidu.com/share/init?surl=EUQVoFPb74yZm0JWHKjFOw + +提取码:mc24 + +6.获取benchmark工具 +将benchmark.x86_64 benchmark.aarch64放在当前目录 + +7.310上执行,执行时确保device空闲 +bash test/pth2om.sh +bash test/eval_acc_perf.sh diff --git a/ACL_PyTorch/contrib/cv/classfication/SimCLR_inference/Simclr_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/SimCLR_inference/Simclr_preprocess.py index d423874602ceb7871b543f552a820d7382b260db..93065389d266b72509ac9de008f5171e08610a34 100644 --- a/ACL_PyTorch/contrib/cv/classfication/SimCLR_inference/Simclr_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/SimCLR_inference/Simclr_preprocess.py @@ -1,106 +1,106 @@ -""" -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -import numpy as np -from torchvision import transforms -import torch -from torch import nn -import os -from PIL import Image -import sys -import pickle - - -class GaussianBlur(object): - """blur a single image on CPU""" - def __init__(self, kernel_size): - radias = kernel_size // 2 - kernel_size = radias * 2 + 1 - self.blur_h = nn.Conv2d(3, 3, kernel_size=(kernel_size, 1), - stride=1, padding=0, bias=False, groups=3) - self.blur_v = nn.Conv2d(3, 3, kernel_size=(1, kernel_size), - stride=1, padding=0, bias=False, groups=3) - self.k = kernel_size - self.r = radias - - self.blur = nn.Sequential( - nn.ReflectionPad2d(radias), - self.blur_h, - self.blur_v - ) - - self.pil_to_tensor = transforms.ToTensor() - self.tensor_to_pil = transforms.ToPILImage() - - def __call__(self, img): - img = self.pil_to_tensor(img).unsqueeze(0) - - sigma = np.random.uniform(0.1, 2.0) - x = np.arange(-self.r, self.r + 1) - x = np.exp(-np.power(x, 2) / (2 * sigma * sigma)) - x = x / x.sum() - x = torch.from_numpy(x).view(1, -1).repeat(3, 1) - - self.blur_h.weight.data.copy_(x.view(3, 1, self.k, 1)) - self.blur_v.weight.data.copy_(x.view(3, 1, 1, self.k)) - - with torch.no_grad(): - img = self.blur(img) - img = img.squeeze() - - img = self.tensor_to_pil(img) - - return img - - -def preprocess(srcfile_path, savefile_path): - """ data preprocess """ - size = 32 - s = 1 - n_views = 2 - file_num = 0 - data = [] - color_jitter = transforms.ColorJitter(0.8 * s, 0.8 * s, 0.8 * s, 0.2 * s) - data_transforms = transforms.Compose([transforms.RandomResizedCrop(size=size), - transforms.RandomHorizontalFlip(), - transforms.RandomApply([color_jitter], p=0.8), - transforms.RandomGrayscale(p=0.2), - GaussianBlur(kernel_size=int(0.1 * size)), - transforms.ToTensor()]) - if not os.path.exists(savefile_path): - os.mkdir(savefile_path) - with open(srcfile_path, "rb") as f: - entry = pickle.load(f, encoding='latin1') - data.append(entry['data']) - images = np.vstack(data).reshape(-1, 3, 32, 32) - images = np.transpose(images, (0, 2, 3, 1)) - for i in range(images.shape[0]): - image = [data_transforms(Image.fromarray(images[i])) for j in range(n_views)] - file_path = os.path.join(savefile_path, "Simclr_prep_" + str(file_num) + ".bin") - file_num = file_num + 1 - print(i) - image_file = np.array(image[0]).astype(np.float32) - image_file.tofile(file_path) - file_path = os.path.join(savefile_path, "Simclr_prep_" + str(file_num) + ".bin") - image_file = np.array(image[1]).astype(np.float32) - image_file.tofile(file_path) - file_num = file_num + 1 - - -if __name__ == "__main__": - src_path = sys.argv[1] - save_path = sys.argv[2] - preprocess(src_path, save_path) +""" +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import numpy as np +from torchvision import transforms +import torch +from torch import nn +import os +from PIL import Image +import sys +import pickle + + +class GaussianBlur(object): + """blur a single image on CPU""" + def __init__(self, kernel_size): + radias = kernel_size // 2 + kernel_size = radias * 2 + 1 + self.blur_h = nn.Conv2d(3, 3, kernel_size=(kernel_size, 1), + stride=1, padding=0, bias=False, groups=3) + self.blur_v = nn.Conv2d(3, 3, kernel_size=(1, kernel_size), + stride=1, padding=0, bias=False, groups=3) + self.k = kernel_size + self.r = radias + + self.blur = nn.Sequential( + nn.ReflectionPad2d(radias), + self.blur_h, + self.blur_v + ) + + self.pil_to_tensor = transforms.ToTensor() + self.tensor_to_pil = transforms.ToPILImage() + + def __call__(self, img): + img = self.pil_to_tensor(img).unsqueeze(0) + + sigma = np.random.uniform(0.1, 2.0) + x = np.arange(-self.r, self.r + 1) + x = np.exp(-np.power(x, 2) / (2 * sigma * sigma)) + x = x / x.sum() + x = torch.from_numpy(x).view(1, -1).repeat(3, 1) + + self.blur_h.weight.data.copy_(x.view(3, 1, self.k, 1)) + self.blur_v.weight.data.copy_(x.view(3, 1, 1, self.k)) + + with torch.no_grad(): + img = self.blur(img) + img = img.squeeze() + + img = self.tensor_to_pil(img) + + return img + + +def preprocess(srcfile_path, savefile_path): + """ data preprocess """ + size = 32 + s = 1 + n_views = 2 + file_num = 0 + data = [] + color_jitter = transforms.ColorJitter(0.8 * s, 0.8 * s, 0.8 * s, 0.2 * s) + data_transforms = transforms.Compose([transforms.RandomResizedCrop(size=size), + transforms.RandomHorizontalFlip(), + transforms.RandomApply([color_jitter], p=0.8), + transforms.RandomGrayscale(p=0.2), + GaussianBlur(kernel_size=int(0.1 * size)), + transforms.ToTensor()]) + if not os.path.exists(savefile_path): + os.mkdir(savefile_path) + with open(srcfile_path, "rb") as f: + entry = pickle.load(f, encoding='latin1') + data.append(entry['data']) + images = np.vstack(data).reshape(-1, 3, 32, 32) + images = np.transpose(images, (0, 2, 3, 1)) + for i in range(images.shape[0]): + image = [data_transforms(Image.fromarray(images[i])) for j in range(n_views)] + file_path = os.path.join(savefile_path, "Simclr_prep_" + str(file_num) + ".bin") + file_num = file_num + 1 + print(i) + image_file = np.array(image[0]).astype(np.float32) + image_file.tofile(file_path) + file_path = os.path.join(savefile_path, "Simclr_prep_" + str(file_num) + ".bin") + image_file = np.array(image[1]).astype(np.float32) + image_file.tofile(file_path) + file_num = file_num + 1 + + +if __name__ == "__main__": + src_path = sys.argv[1] + save_path = sys.argv[2] + preprocess(src_path, save_path) diff --git a/ACL_PyTorch/contrib/cv/classfication/SimCLR_inference/Simclr_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/SimCLR_inference/Simclr_pth2onnx.py index a3141e636f498bfc0bdd47d546b577d8b0752cb6..17a6214854fbbd9e375f06acdfb18122dfe7242d 100644 --- a/ACL_PyTorch/contrib/cv/classfication/SimCLR_inference/Simclr_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/SimCLR_inference/Simclr_pth2onnx.py @@ -1,79 +1,79 @@ -""" -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -import torch -import sys -import torch.nn as nn -import torchvision.models as models - - -class BaseSimCLRException(Exception): - """Base exception""" - - -class InvalidBackboneError(BaseSimCLRException): - """Raised when the choice of backbone Convnet is invalid.""" - - -class InvalidDatasetSelection(BaseSimCLRException): - """Raised when the choice of dataset is invalid.""" - -class ResNetSimCLR(nn.Module): - """ Simclr model """ - def __init__(self, base_model, out_dim): - super(ResNetSimCLR, self).__init__() - self.resnet_dict = {"resnet18": models.resnet18(pretrained=False, num_classes=out_dim), - "resnet50": models.resnet50(pretrained=False, num_classes=out_dim)} - - self.backbone = self._get_basemodel(base_model) - dim_mlp = self.backbone.fc.in_features - - # add mlp projection head - self.backbone.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp), nn.ReLU(), self.backbone.fc) - - def _get_basemodel(self, model_name): - try: - model = self.resnet_dict[model_name] - except KeyError: - raise InvalidBackboneError( - "Invalid backbone architecture. Check the config file and pass one of: resnet18 or resnet50") - else: - return model - - def forward(self, x): - """forward """ - return self.backbone(x) - - -def pth2onnx(input_file, output_file): - """pth to onnx""" - checkpoint = torch.load(input_file, map_location='cpu') - model = ResNetSimCLR(base_model='resnet18', out_dim=128) - model.load_state_dict(checkpoint["state_dict"]) - model.eval() - - input_name = ["input"] - output_name = ["output"] - - dummy_input = torch.randn(1, 3, 32, 32) - torch.onnx.export(model, dummy_input, output_file, input_names=input_name, output_names=output_name, verbose=True) - - -if __name__ == "__main__": - input_pth = sys.argv[1] - output = sys.argv[2] - pth2onnx(input_pth, output) - +""" +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import torch +import sys +import torch.nn as nn +import torchvision.models as models + + +class BaseSimCLRException(Exception): + """Base exception""" + + +class InvalidBackboneError(BaseSimCLRException): + """Raised when the choice of backbone Convnet is invalid.""" + + +class InvalidDatasetSelection(BaseSimCLRException): + """Raised when the choice of dataset is invalid.""" + +class ResNetSimCLR(nn.Module): + """ Simclr model """ + def __init__(self, base_model, out_dim): + super(ResNetSimCLR, self).__init__() + self.resnet_dict = {"resnet18": models.resnet18(pretrained=False, num_classes=out_dim), + "resnet50": models.resnet50(pretrained=False, num_classes=out_dim)} + + self.backbone = self._get_basemodel(base_model) + dim_mlp = self.backbone.fc.in_features + + # add mlp projection head + self.backbone.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp), nn.ReLU(), self.backbone.fc) + + def _get_basemodel(self, model_name): + try: + model = self.resnet_dict[model_name] + except KeyError: + raise InvalidBackboneError( + "Invalid backbone architecture. Check the config file and pass one of: resnet18 or resnet50") + else: + return model + + def forward(self, x): + """forward """ + return self.backbone(x) + + +def pth2onnx(input_file, output_file): + """pth to onnx""" + checkpoint = torch.load(input_file, map_location='cpu') + model = ResNetSimCLR(base_model='resnet18', out_dim=128) + model.load_state_dict(checkpoint["state_dict"]) + model.eval() + + input_name = ["input"] + output_name = ["output"] + + dummy_input = torch.randn(1, 3, 32, 32) + torch.onnx.export(model, dummy_input, output_file, input_names=input_name, output_names=output_name, verbose=True) + + +if __name__ == "__main__": + input_pth = sys.argv[1] + output = sys.argv[2] + pth2onnx(input_pth, output) + diff --git a/ACL_PyTorch/contrib/cv/classfication/SimCLR_inference/test/parse.py b/ACL_PyTorch/contrib/cv/classfication/SimCLR_inference/test/parse.py index b17c1e0d8299f912b49c06bce07d8ca26d83ea07..178bbb3fd36cf0437245470312a8595b37f878b6 100644 --- a/ACL_PyTorch/contrib/cv/classfication/SimCLR_inference/test/parse.py +++ b/ACL_PyTorch/contrib/cv/classfication/SimCLR_inference/test/parse.py @@ -1,31 +1,31 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.log'): - result_log = sys.argv[1] - with open(result_log, 'r') as f: - lines = f.readlines() - RSNR_Res = lines[-1] - print(RSNR_Res.replace('\n', '')) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.log'): + result_log = sys.argv[1] + with open(result_log, 'r') as f: + lines = f.readlines() + RSNR_Res = lines[-1] + print(RSNR_Res.replace('\n', '')) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Sknet50/LICENSE b/ACL_PyTorch/contrib/cv/classfication/Sknet50/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Sknet50/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/Sknet50/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Sknet50/README.md b/ACL_PyTorch/contrib/cv/classfication/Sknet50/README.md index a73c1aea8c99a6200dd3960b507f47fdace0240a..64006615112c8a72798d7b04a3bed4a230edf337 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Sknet50/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/Sknet50/README.md @@ -1,269 +1,269 @@ -# SK-ResNet50 Onnx 模型端到端推理指导 - -- [1. 模型概述](#1) - - [论文地址](#11) - - [代码地址](#12) -- [2. 环境说明](#2) - - [深度学习框架](#21) - - [python第三方库](#22) -- [3. 模型转换](#3) - - [pth转onnx模型](#31) -- [4. 数据预处理](#4) - - [数据集获取](#41) - - [数据集预处理](#42) - - [生成数据集信息文件](#43) -- [5. 离线推理](#5) - - [benchmark工具概述](#51) - - [离线推理](#52) -- [6. 精度对比](#6) - - [离线推理TopN精度](#61) - - [精度对比](#62) -- [7. 性能对比](#7) - - [npu性能数据](#71) - -## 1. 模型概述 - -### 1.1 论文地址 - -[SK-ResNet 论文](https://arxiv.org/pdf/1903.06586.pdf) - -### 1.2 代码地址 - -[SK-ResNet 代码](https://github.com/implus/PytorchInsight) - -branch: master - -commit_id: 2864528f8b83f52c3df76f7c3804aa468b91e5cf - -## 2. 环境说明 - -### 2.1 深度学习框架 - -``` -pytorch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.9.0 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.19.2 -Pillow == 8.2.0 -opencv-python == 4.5.2 -``` - -> **说明:** -> -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3. 模型转换 - -### 3.1 pth转onnx模型 - -1. 下载 pth 权重文件 - - [SK-ResNet50预训练pth权重文件(百度网盘,提取码:tfwn)](https://pan.baidu.com/s/1Lx5CNUeRQXOSWjzTlcO2HQ) - - 文件名:sk_resnet50.pth.tar - - md5sum:979bbb525ee0898003777a8e663e91c0 - -2. 克隆代码仓库代码 - - ```bash - git clone https://github.com/implus/PytorchInsight.git - ``` - -3. 使用 sknet2onnx.py 转换pth为onnx文件,在命令行运行如下指令: - - ```bash - python3.7 sknet2onnx.py --pth sk_resnet50.pth.tar --onnx sknet50_bs1 - ``` - - sk_resnet50.pth.tar文件为步骤1中下载的预训练权重文件,该条指令将在运行处生成一个sknet50_bs1文件,此文件即为目标onnx文件 - -**模型转换要点:** - -> pytorch导出onnx时softmax引入了transpose以操作任意轴,然而在onnx中已支持softmax操作任意轴,故可删除transpose提升性能 - -### 3.2 onnx转om模型 - -下列需要在具备华为Ascend系列芯片的机器上执行: - -1. 设置 atc 工作所需要的环境变量 - - ```bash - export install_path=/usr/local/Ascend/ascend-toolkit/latest - export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH - export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH - export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH - export ASCEND_OPP_PATH=${install_path}/opp - ``` - -2. 使用atc工具将onnx模型转换为om模型,命令参考 - - ```bash - atc --framework=5 --model=sknet50.onnx --output=sknet50_bs1 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 - ``` - - 此命令将在运行路径下生成一个sknet50_1bs.om文件,此文件即为目标om模型文件 - -## 4. 数据预处理 - -### 4.1 数据集获取 - -该模型使用[ImageNet官网](http://www.image-net.org/)的5万张验证集进行测试,图片与标签分别存放在/opt/npu/imagenet/val与/opt/npu/imagenet/val_label.txt。 - -### 4.2 数据集预处理 - -使用 sknet_preprocess.py 脚本进行数据预处理,脚本执行命令: - -```bash -python3.7 sknet_preprocess.py -s /opt/npu/imagenet/val -d ./prep_data -``` - -### 4.3 生成数据集信息文件 - -1. 生成数据集信息文件脚本 get_info.py - -2. 执行生成数据集信息脚本,生成数据集信息文件 - - ```bash - python3.7 get_info.py bin ./prep_data ./sknet_prep_bin.info 224 224 - ``` - - 第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 - -## 5. 离线推理 - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN V100R020C10 推理benchmark工具用户指南 01 - -### 5.2 离线推理 - -```bash -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=sknet50_bs1.om -input_text_path=sknet_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False -``` - -输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 - -## 6. 精度对比 - -### 6.1 离线推理TopN精度 - -后处理统计TopN精度,调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中: - -```bash -python3.7 vision_metric_ImageNet.py result/dumpOutput_device0/ ../data/sknet/val_label.txt ./ result.json -``` - -第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。查看输出结果: - -```json -{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "77.54%"}, {"key": "Top2 accuracy", "value": "87.12%"}, {"key": "Top3 accuracy", "value": "90.73%"}, {"key": "Top4 accuracy", "value": "92.55%"}, {"key": "Top5 accuracy", "value": "93.71%"}]} -``` - -经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 - -### 6.2 精度对比 - -| | TOP1 | TOP5 | -| :----------------: | :------: | :------: | -| 原github仓库精度 | 77.5380% | 93.7000% | -| om模型离线推理精度 | 77.54% | 93.71% | - -将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - -## 7. 性能对比 - -### 7.1 npu性能数据 - -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 - -benchmark工具作纯推理时使用的命令参考如下: - -```bash -./benchmark.x86_64 -round=20 -om_path=sknet50_bs1.om -batch_size=1 -``` - -1. batch1 性能 - - 使用benchmark工具在整个数据集上推理时获得的性能数据: - - ``` - [e2e] throughputRate: 143.402, latency: 348669 - [data read] throughputRate: 152.003, moduleLatency: 6.57881 - [preprocess] throughputRate: 151.416, moduleLatency: 6.60433 - [infer] throughputRate: 143.733, Interface throughputRate: 210.306, moduleLatency: 6.16176 - [post] throughputRate: 143.732, moduleLatency: 6.95737 - ``` - - Interface throughputRate: 210.306 * 4 = 841.224 即是batch1 310单卡吞吐率 - -2. batch4 性能 - - ``` - [INFO] ave_throughputRate: 315.424samples/s, ave_latency: 3.30141ms - ``` - - Interface throughputRate: 315.424 * 4 = 1261.696 即是batch4 310单卡吞吐率 - -3. batch8 性能 - - ``` - [INFO] ave_throughputRate: 365.813samples/s, ave_latency: 2.76526ms - ``` - - Interface throughputRate: 365.813 * 4 = 1463.252 即是batch8 310单卡吞吐率 - -4. batch16 性能 - - ``` - [e2e] throughputRate: 196.399, latency: 254584 - [data read] throughputRate: 208.891, moduleLatency: 4.78718 - [preprocess] throughputRate: 207.779, moduleLatency: 4.81281 - [infer] throughputRate: 197.514, Interface throughputRate: 392.072, modul - [post] throughputRate: 12.3443, moduleLatency: 81.0088 - ``` - - Interface throughputRate: 392.072 * 4 = 1568.288 即是batch16 310单卡吞吐率 - -5. batch32 性能 - - ``` - [INFO] ave_throughputRate: 376.691samples/s, ave_latency: 2.66319ms - ``` - - Interface throughputRate: 376.691 * 4 = 1506.764 即是batch32 310单卡吞吐率 - -**性能优化** - -> 从profiling数据的op_statistic_0_1.csv看出影响性能的是transpose算子,从onnx结构图看出该算子用于实现softmax任意轴,由pytorch导出时引入,然而softmax在onnx中现已支持任意轴,故可直接删除该算子提升性能,删除代码参考如下: - -```python -model = onnx.load(args.onnx+'.onnx') -graph = model.graph -node = graph.node -softmax_node_index = [] -del_group = [] -for i in range(len(node)): - if node[i].op_type == 'Softmax': - del_group.append((node[i-1], node[i], node[i+1], i)) -for g in del_group: - new_input = g[0].input - new_output = g[2].output - new_name = g[1].name - new_index = g[3] - new_node = onnx.helper.make_node("Softmax", new_input, new_output, new_name, axis=1) - for n in g[:-1]: - graph.node.remove(n) - graph.node.insert(new_index, new_node) -onnx.save(model, args.onnx+'.onnx') -``` - - - +# SK-ResNet50 Onnx 模型端到端推理指导 + +- [1. 模型概述](#1) + - [论文地址](#11) + - [代码地址](#12) +- [2. 环境说明](#2) + - [深度学习框架](#21) + - [python第三方库](#22) +- [3. 模型转换](#3) + - [pth转onnx模型](#31) +- [4. 数据预处理](#4) + - [数据集获取](#41) + - [数据集预处理](#42) + - [生成数据集信息文件](#43) +- [5. 离线推理](#5) + - [benchmark工具概述](#51) + - [离线推理](#52) +- [6. 精度对比](#6) + - [离线推理TopN精度](#61) + - [精度对比](#62) +- [7. 性能对比](#7) + - [npu性能数据](#71) + +## 1. 模型概述 + +### 1.1 论文地址 + +[SK-ResNet 论文](https://arxiv.org/pdf/1903.06586.pdf) + +### 1.2 代码地址 + +[SK-ResNet 代码](https://github.com/implus/PytorchInsight) + +branch: master + +commit_id: 2864528f8b83f52c3df76f7c3804aa468b91e5cf + +## 2. 环境说明 + +### 2.1 深度学习框架 + +``` +pytorch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.9.0 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.19.2 +Pillow == 8.2.0 +opencv-python == 4.5.2 +``` + +> **说明:** +> +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3. 模型转换 + +### 3.1 pth转onnx模型 + +1. 下载 pth 权重文件 + + [SK-ResNet50预训练pth权重文件(百度网盘,提取码:tfwn)](https://pan.baidu.com/s/1Lx5CNUeRQXOSWjzTlcO2HQ) + + 文件名:sk_resnet50.pth.tar + + md5sum:979bbb525ee0898003777a8e663e91c0 + +2. 克隆代码仓库代码 + + ```bash + git clone https://github.com/implus/PytorchInsight.git + ``` + +3. 使用 sknet2onnx.py 转换pth为onnx文件,在命令行运行如下指令: + + ```bash + python3.7 sknet2onnx.py --pth sk_resnet50.pth.tar --onnx sknet50_bs1 + ``` + + sk_resnet50.pth.tar文件为步骤1中下载的预训练权重文件,该条指令将在运行处生成一个sknet50_bs1文件,此文件即为目标onnx文件 + +**模型转换要点:** + +> pytorch导出onnx时softmax引入了transpose以操作任意轴,然而在onnx中已支持softmax操作任意轴,故可删除transpose提升性能 + +### 3.2 onnx转om模型 + +下列需要在具备华为Ascend系列芯片的机器上执行: + +1. 设置 atc 工作所需要的环境变量 + + ```bash + export install_path=/usr/local/Ascend/ascend-toolkit/latest + export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH + export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH + export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH + export ASCEND_OPP_PATH=${install_path}/opp + ``` + +2. 使用atc工具将onnx模型转换为om模型,命令参考 + + ```bash + atc --framework=5 --model=sknet50.onnx --output=sknet50_bs1 --input_format=NCHW --input_shape="image:1,3,224,224" --log=debug --soc_version=Ascend310 + ``` + + 此命令将在运行路径下生成一个sknet50_1bs.om文件,此文件即为目标om模型文件 + +## 4. 数据预处理 + +### 4.1 数据集获取 + +该模型使用[ImageNet官网](http://www.image-net.org/)的5万张验证集进行测试,图片与标签分别存放在/opt/npu/imagenet/val与/opt/npu/imagenet/val_label.txt。 + +### 4.2 数据集预处理 + +使用 sknet_preprocess.py 脚本进行数据预处理,脚本执行命令: + +```bash +python3.7 sknet_preprocess.py -s /opt/npu/imagenet/val -d ./prep_data +``` + +### 4.3 生成数据集信息文件 + +1. 生成数据集信息文件脚本 get_info.py + +2. 执行生成数据集信息脚本,生成数据集信息文件 + + ```bash + python3.7 get_info.py bin ./prep_data ./sknet_prep_bin.info 224 224 + ``` + + 第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 + +## 5. 离线推理 + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN V100R020C10 推理benchmark工具用户指南 01 + +### 5.2 离线推理 + +```bash +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=sknet50_bs1.om -input_text_path=sknet_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False +``` + +输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 + +## 6. 精度对比 + +### 6.1 离线推理TopN精度 + +后处理统计TopN精度,调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中: + +```bash +python3.7 vision_metric_ImageNet.py result/dumpOutput_device0/ ../data/sknet/val_label.txt ./ result.json +``` + +第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。查看输出结果: + +```json +{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "77.54%"}, {"key": "Top2 accuracy", "value": "87.12%"}, {"key": "Top3 accuracy", "value": "90.73%"}, {"key": "Top4 accuracy", "value": "92.55%"}, {"key": "Top5 accuracy", "value": "93.71%"}]} +``` + +经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 + +### 6.2 精度对比 + +| | TOP1 | TOP5 | +| :----------------: | :------: | :------: | +| 原github仓库精度 | 77.5380% | 93.7000% | +| om模型离线推理精度 | 77.54% | 93.71% | + +将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + +## 7. 性能对比 + +### 7.1 npu性能数据 + +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 + +benchmark工具作纯推理时使用的命令参考如下: + +```bash +./benchmark.x86_64 -round=20 -om_path=sknet50_bs1.om -batch_size=1 +``` + +1. batch1 性能 + + 使用benchmark工具在整个数据集上推理时获得的性能数据: + + ``` + [e2e] throughputRate: 143.402, latency: 348669 + [data read] throughputRate: 152.003, moduleLatency: 6.57881 + [preprocess] throughputRate: 151.416, moduleLatency: 6.60433 + [infer] throughputRate: 143.733, Interface throughputRate: 210.306, moduleLatency: 6.16176 + [post] throughputRate: 143.732, moduleLatency: 6.95737 + ``` + + Interface throughputRate: 210.306 * 4 = 841.224 即是batch1 310单卡吞吐率 + +2. batch4 性能 + + ``` + [INFO] ave_throughputRate: 315.424samples/s, ave_latency: 3.30141ms + ``` + + Interface throughputRate: 315.424 * 4 = 1261.696 即是batch4 310单卡吞吐率 + +3. batch8 性能 + + ``` + [INFO] ave_throughputRate: 365.813samples/s, ave_latency: 2.76526ms + ``` + + Interface throughputRate: 365.813 * 4 = 1463.252 即是batch8 310单卡吞吐率 + +4. batch16 性能 + + ``` + [e2e] throughputRate: 196.399, latency: 254584 + [data read] throughputRate: 208.891, moduleLatency: 4.78718 + [preprocess] throughputRate: 207.779, moduleLatency: 4.81281 + [infer] throughputRate: 197.514, Interface throughputRate: 392.072, modul + [post] throughputRate: 12.3443, moduleLatency: 81.0088 + ``` + + Interface throughputRate: 392.072 * 4 = 1568.288 即是batch16 310单卡吞吐率 + +5. batch32 性能 + + ``` + [INFO] ave_throughputRate: 376.691samples/s, ave_latency: 2.66319ms + ``` + + Interface throughputRate: 376.691 * 4 = 1506.764 即是batch32 310单卡吞吐率 + +**性能优化** + +> 从profiling数据的op_statistic_0_1.csv看出影响性能的是transpose算子,从onnx结构图看出该算子用于实现softmax任意轴,由pytorch导出时引入,然而softmax在onnx中现已支持任意轴,故可直接删除该算子提升性能,删除代码参考如下: + +```python +model = onnx.load(args.onnx+'.onnx') +graph = model.graph +node = graph.node +softmax_node_index = [] +del_group = [] +for i in range(len(node)): + if node[i].op_type == 'Softmax': + del_group.append((node[i-1], node[i], node[i+1], i)) +for g in del_group: + new_input = g[0].input + new_output = g[2].output + new_name = g[1].name + new_index = g[3] + new_node = onnx.helper.make_node("Softmax", new_input, new_output, new_name, axis=1) + for n in g[:-1]: + graph.node.remove(n) + graph.node.insert(new_index, new_node) +onnx.save(model, args.onnx+'.onnx') +``` + + + diff --git a/ACL_PyTorch/contrib/cv/classfication/Sknet50/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/Sknet50/requirements.txt index 399dbfed087e9d139235f30e8d5991e803b92edd..fbd453de267138a05ccf06d3be32a3d4eb8f68fc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Sknet50/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/Sknet50/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.9.0 -numpy == 1.19.2 -Pillow == 8.2.0 +torch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.9.0 +numpy == 1.19.2 +Pillow == 8.2.0 opencv-python == 4.5.2 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Sknet50/test/README.md b/ACL_PyTorch/contrib/cv/classfication/Sknet50/test/README.md index 11114dc96f8f052a117be63cd87441091f6d8bcd..3c3e2de286d9f49d0c19321e6b03c9a0578372c8 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Sknet50/test/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/Sknet50/test/README.md @@ -1,26 +1,26 @@ -环境准备: - -1.数据集路径 -数据集统一放在/root/datasets/或/opt/npu/ -本模型数据集放在/opt/npu/ - -2.进入工作目录 -cd Sknet50 - -3.安装必要的依赖 -pip3.7 install -r requirements.txt - -4.获取模型代码 -git clone https://github.com/implus/PytorchInsight - -5.如果使用补丁文件修改了模型代码则将补丁打入模型代码,如果需要引用模型代码仓的类或函数通过sys.path.append()添加搜索路径。 - -5.获取权重文件 -[SK-ResNet50预训练pth权重文件(百度网盘,提取码:tfwn)](https://pan.baidu.com/s/1Lx5CNUeRQXOSWjzTlcO2HQ) - -7.获取benchmark工具 -将benchmark.x86_64放在当前目录 - -8.310上执行,执行时确保device空闲 -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets +环境准备: + +1.数据集路径 +数据集统一放在/root/datasets/或/opt/npu/ +本模型数据集放在/opt/npu/ + +2.进入工作目录 +cd Sknet50 + +3.安装必要的依赖 +pip3.7 install -r requirements.txt + +4.获取模型代码 +git clone https://github.com/implus/PytorchInsight + +5.如果使用补丁文件修改了模型代码则将补丁打入模型代码,如果需要引用模型代码仓的类或函数通过sys.path.append()添加搜索路径。 + +5.获取权重文件 +[SK-ResNet50预训练pth权重文件(百度网盘,提取码:tfwn)](https://pan.baidu.com/s/1Lx5CNUeRQXOSWjzTlcO2HQ) + +7.获取benchmark工具 +将benchmark.x86_64放在当前目录 + +8.310上执行,执行时确保device空闲 +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets diff --git a/ACL_PyTorch/contrib/cv/classfication/Sknet50/test/parse.py b/ACL_PyTorch/contrib/cv/classfication/Sknet50/test/parse.py index b9c74f41d7848e1250356f14472b237a18bb3489..82af69cd183218c3263723c20b652b3f7ec2bc27 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Sknet50/test/parse.py +++ b/ACL_PyTorch/contrib/cv/classfication/Sknet50/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/TNT/README.md b/ACL_PyTorch/contrib/cv/classfication/TNT/README.md index 9debc2c022fdb214d9b48ff02b39efbb43d37926..68b21918df519274f2ac957d5b461ba1e62924c6 100644 --- a/ACL_PyTorch/contrib/cv/classfication/TNT/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/TNT/README.md @@ -1,44 +1,44 @@ -# TNT模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -``` -pip3.7 install -r requirements.txt -``` - - -2.获取,修改与安装开源模型代码 -``` -git clone https://github.com/huawei-noah/CV-Backbones.git -cd CV-Backbones -git checkout 7a0760f0b77c2e9ae585dcadfd34ff7575839ace -patch tnt_pytorch/tnt.py ../TNT.patch -cd .. -cp CV-Backbones/tnt_pytorch/tnt.py . -``` - -3.获取权重文件 - -tnt_s_81.5.pth.tar - -4.数据集 -获取ImageNet 2012 - -5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) -将benchmark.x86_64或benchmark.aarch64放到当前目录 - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets -``` - **评测结果:** -| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| :------: | :------: | :------: | :------: | :------: | -| TNT bs1 | [rank1:81.5%](https://github.com/huawei-noah/CV-Backbones/tree/master/tnt_pytorch) | rank1:81.5% | 89fps | 33fps | -| TNT bs16 | [rank1:81.5%](https://github.com/huawei-noah/CV-Backbones/tree/master/tnt_pytorch) | rank1:81.5% | 181fps| 83fps | - - +# TNT模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +``` +pip3.7 install -r requirements.txt +``` + + +2.获取,修改与安装开源模型代码 +``` +git clone https://github.com/huawei-noah/CV-Backbones.git +cd CV-Backbones +git checkout 7a0760f0b77c2e9ae585dcadfd34ff7575839ace +patch tnt_pytorch/tnt.py ../TNT.patch +cd .. +cp CV-Backbones/tnt_pytorch/tnt.py . +``` + +3.获取权重文件 + +tnt_s_81.5.pth.tar + +4.数据集 +获取ImageNet 2012 + +5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) +将benchmark.x86_64或benchmark.aarch64放到当前目录 + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets +``` + **评测结果:** +| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| :------: | :------: | :------: | :------: | :------: | +| TNT bs1 | [rank1:81.5%](https://github.com/huawei-noah/CV-Backbones/tree/master/tnt_pytorch) | rank1:81.5% | 89fps | 33fps | +| TNT bs16 | [rank1:81.5%](https://github.com/huawei-noah/CV-Backbones/tree/master/tnt_pytorch) | rank1:81.5% | 181fps| 83fps | + + diff --git a/ACL_PyTorch/contrib/cv/classfication/TNT/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/TNT/modelzoo_level.txt index 38700fca05402f52c3ae1c4be0889eb60e1f80f1..2e42553460a4f3687654b6ad3f91ab0bcc3aadac 100644 --- a/ACL_PyTorch/contrib/cv/classfication/TNT/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/TNT/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/TResNet/LICENSE b/ACL_PyTorch/contrib/cv/classfication/TResNet/LICENSE index 26aed103da3d9c1ee453d7cae8904e91cf0d4815..33a78d69acf137ad8a7d7236218b4ea3b8b9d0e2 100644 --- a/ACL_PyTorch/contrib/cv/classfication/TResNet/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/TResNet/LICENSE @@ -1,25 +1,25 @@ -BSD 3-Clause License - -Copyright (c) 2018, Multimedia Laboratary, The Chinese University of Hong Kong -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +BSD 3-Clause License + +Copyright (c) 2018, Multimedia Laboratary, The Chinese University of Hong Kong +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/TResNet/README.md b/ACL_PyTorch/contrib/cv/classfication/TResNet/README.md index b0453f5fb76ff3ae6b3f73198d568ada7c37e371..4e4d1bf3efd59b838014e8e6cc259ad25760631c 100644 --- a/ACL_PyTorch/contrib/cv/classfication/TResNet/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/TResNet/README.md @@ -1,37 +1,37 @@ -# TResNet离线推理指导 - -## 1.环境准备 -以路径${MODEL_ZOO_PATH}/contrib/ACL_PyTorch/Research/cv/classification/TResNet/作为当前目录 - -1.安装必备的依赖 - -``` -pip3.7 install -r requirements.txt -``` - -2.由于没有开源社区的权重,因此需要将训练得到的权重model_best.pth.tar放到当前目录 - -3.获取数据集imagenet,并且以${Path}/imagenet/val作为datasets_path,这将在下面用到 - -4.获取数据集imagenet的val_label.txt,并且以${Path}/val_label.txt作为val_label_path,这将在下面用到 - -5.获取benchmark工具 - -将benchmark.x86_64放到当前目录 - -6.(重要)请确保您的CANN环境为5.0.3.alpha003,以确保能获得最佳性能 - -7.(重要)由于有算子涉及到了TransposeD,因此请将以下shape添加至白名单[ 1,3, 224, 224],[ 1, 3, 56, 4, 56, 4],[ 1, 4, 4, 3, 56, 56], - [ 16, 3, 224, 224],[ 16, 3, 56, 4, 56, 4], [ 16, 4, 4, 3, 56, 56] -8.请确保您能连接github以获取模型源码 - -## 2.离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 - -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets/imagenet/val --val_label_path=/root/datasets/imagenet/val_label.txt -bash test/perf_g.sh -``` - +# TResNet离线推理指导 + +## 1.环境准备 +以路径${MODEL_ZOO_PATH}/contrib/ACL_PyTorch/Research/cv/classification/TResNet/作为当前目录 + +1.安装必备的依赖 + +``` +pip3.7 install -r requirements.txt +``` + +2.由于没有开源社区的权重,因此需要将训练得到的权重model_best.pth.tar放到当前目录 + +3.获取数据集imagenet,并且以${Path}/imagenet/val作为datasets_path,这将在下面用到 + +4.获取数据集imagenet的val_label.txt,并且以${Path}/val_label.txt作为val_label_path,这将在下面用到 + +5.获取benchmark工具 + +将benchmark.x86_64放到当前目录 + +6.(重要)请确保您的CANN环境为5.0.3.alpha003,以确保能获得最佳性能 + +7.(重要)由于有算子涉及到了TransposeD,因此请将以下shape添加至白名单[ 1,3, 224, 224],[ 1, 3, 56, 4, 56, 4],[ 1, 4, 4, 3, 56, 56], + [ 16, 3, 224, 224],[ 16, 3, 56, 4, 56, 4], [ 16, 4, 4, 3, 56, 56] +8.请确保您能连接github以获取模型源码 + +## 2.离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 + +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets/imagenet/val --val_label_path=/root/datasets/imagenet/val_label.txt +bash test/perf_g.sh +``` + diff --git a/ACL_PyTorch/contrib/cv/classfication/TResNet/TResNet_postprocess.py b/ACL_PyTorch/contrib/cv/classfication/TResNet/TResNet_postprocess.py index 7b1037e75d6a864736eb4748c8ef4ada1a956f23..60747729428f9cfe8f384473480251bf21352120 100644 --- a/ACL_PyTorch/contrib/cv/classfication/TResNet/TResNet_postprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/TResNet/TResNet_postprocess.py @@ -1,184 +1,184 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - if data == '': - n_label = 0 - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - print(table_dict) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - print("Time used:", elapsed) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + if data == '': + n_label = 0 + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + print(table_dict) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + print("Time used:", elapsed) diff --git a/ACL_PyTorch/contrib/cv/classfication/TResNet/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/TResNet/modelzoo_level.txt index 731399223951dc50392319c2f4e8e5245174c192..108cc882d65c41bd354b9c6373bcf882d112c26b 100644 --- a/ACL_PyTorch/contrib/cv/classfication/TResNet/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/TResNet/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:PERFECT \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/LICENSE b/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/LICENSE index 67596b0702591a6b19013c126a71cccce4100591..3d332846513a88288e46b761887e8fc21804f4c4 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/README.md b/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/README.md index 11c40ae3542131ae55f1089250b645fbde7540eb..40d2d9e841b652d404a0a9d62de2bc75fba0cf79 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/README.md @@ -1,313 +1,313 @@ -# Wide_ResNet101_2 Onnx模型端到端推理指导 - -- [1 模型概述](#1-模型概述) - - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) - -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) - -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) - -- [4 数据集预处理](#4-数据集预处理) - - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) - -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) - -- [6 精度对比](#6-精度对比) - - [6.1 离线推理精度统计](#61-离线推理精度统计) - - [6.2 开源精度](#62-开源精度) - - [6.3 精度对比](#63-精度对比) - -- [7 性能对比](#7-性能对比) - - - [7.1 npu性能数据](#71-npu性能数据) - - - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 - -[Wide_ResNet论文](https://arxiv.org/pdf/1605.07146.pdf) - -### 1.2 代码地址 - -[Wide_ResNet代码](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) - -branch:master -commit id:7d955df73fe0e9b47f7d6c77c699324b256fc41f - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 - -``` -CANN 5.0.1 - -pytorch >= 1.5.0 -torchvision >= 0.6.0 -onnx >= 1.7.0 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.18.5 -Pillow == 7.2.0 -opencv-python == 4.2.0.34 -``` - -**说明:** -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 - -1. 下载pth权重文件 - -[wrn101_2权重文件下载](https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth) - -文件md5sum: 5961435974bb43104b5a3180fea7c2c4 - -``` -wget https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth -``` - - - -2. 下载模型代码 - -``` -git clone https://github.com/pytorch/vision -cd vision -git reset 7d955df73fe0e9b47f7d6c77c699324b256fc41f --hard -python3.7 setup.py install -cd .. -``` - -3. 编写pth2onnx脚本wrn101_2_pth2onnx.py - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - -4. 执行pth2onnx脚本,生成onnx模型文件 - - -```python -python3.7 wrn101_2_pth2onnx.py wide_resnet101_2-32ee1156.pth wrn101_2_pth.onnx -``` - - **模型转换要点:** ->此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 - -### 3.2 onnx转om模型 - -1.设置环境变量 - -```python -source env.sh -``` -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 - -```python -atc --framework=5 --model=wrn101_2_pth.onnx --output=wrn101_2_bs16 --input_format=NCHW --input_shape="image:16,3,224,224" --log=debug --soc_version=Ascend310 -``` - - - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/opt/npu/imagenet/val与/opt/npu/imagenet/val_label.txt - -### 4.2 数据集预处理 - -1.预处理脚本imagenet_torch_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 - -``` -python3.7 imagenet_torch_preprocess.py resnet /opt/npu/imagenet/val ./prep_dataset -``` -### 4.3 生成数据集信息文件 - -1.生成数据集信息文件脚本gen_dataset_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 - -```python -python3.7 gen_dataset_info.py bin ./prep_dataset ./wrn101_2_prep_bin.info 224 224 -``` -第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 - -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN V100R020C10 推理benchmark工具用户指南 01 - -### 5.2 离线推理 - -1.设置环境变量 - -``` -source env.sh -``` -2.执行离线推理 - -```python -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=wrn101_2_bs16.om -input_text_path=./wrn101_2_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False -``` -输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 - -## 6 精度对比 - -- **[离线推理精度](#61-离线推理精度)** -- **[开源精度](#62-开源精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理精度统计 - -后处理统计TopN精度 - -调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 -``` -python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /opt/npu/imagenet/val_label.txt ./ result.json -``` -第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 -查看输出结果: - -``` -{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "78.84%"}, {"key": "Top2 accuracy", "value": "88.41%"}, {"key": "Top3 accuracy", "value": "91.66%"}, {"key": "Top4 accuracy", "value": "93.26%"}, {"key": "Top5 accuracy", "value": "94.29%"}]} -``` - -### 6.2 开源精度 - -[torchvision官网精度](https://pytorch.org/vision/stable/models.html) - -``` -Model Acc@1 Acc@5 -wide_resnet101_2 78.848 94.284 -``` -### 6.3 精度对比 - -将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,统计结果如下。精度下降在1%范围之内,故精度达标。 - -``` - Acc@1 Acc@5 -bs1 78.84 94.29 -bs16 78.85 94.29 -``` - - **精度调试:** - ->没有遇到精度不达标的问题,故不需要进行精度调试 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** - -### 7.1 npu性能数据 - -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 - -1.benchmark工具在整个数据集上推理获得性能数据 - -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: - -``` -[e2e] throughputRate: 105.142, latency: 475550 -[data read] throughputRate: 111.355, moduleLatency: 8.98031 -[preprocess] throughputRate: 111.053, moduleLatency: 9.00469 -[infer] throughputRate: 105.494, Interface throughputRate: 127.878, moduleLatency: 8.77965 -[post] throughputRate: 105.494, moduleLatency: 9.47924 -``` - -Interface throughputRate: 127.878,127.878x4=511.512即是batch1 310单卡吞吐率 - -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: - -``` -[e2e] throughputRate: 117.321, latency: 426182 -[data read] throughputRate: 124.66, moduleLatency: 8.0218 -[preprocess] throughputRate: 124.054, moduleLatency: 8.06101 -[infer] throughputRate: 117.825, Interface throughputRate: 169.604, moduleLatency: 7.35524 -[post] throughputRate: 7.36397, moduleLatency: 135.796 -``` - -Interface throughputRate: 169.604,169.604x4=678.416即是batch1 310单卡吞吐率 - -batch4性能: - -``` -[e2e] throughputRate: 114.374, latency: 437161 -[data read] throughputRate: 121.259, moduleLatency: 8.2468 -[preprocess] throughputRate: 121.014, moduleLatency: 8.26352 -[infer] throughputRate: 114.92, Interface throughputRate: 157.07, moduleLatency: 7.83108 -[post] throughputRate: 28.73, moduleLatency: 34.8068 -``` - -batch4 310单卡吞吐率:157.07x4=628.28fps -batch8性能: - -``` -[e2e] throughputRate: 111.341, latency: 449071 -[data read] throughputRate: 117.759, moduleLatency: 8.49194 -[preprocess] throughputRate: 117.55, moduleLatency: 8.50701 -[infer] throughputRate: 111.703, Interface throughputRate: 156.132, moduleLatency: 7.85466 -[post] throughputRate: 13.9628, moduleLatency: 71.6188 -``` - -batch8 310单卡吞吐率:156.132x4=624.528fps -batch32性能: - -``` -[e2e] throughputRate: 102.387, latency: 488344 -[data read] throughputRate: 108.61, moduleLatency: 9.20728 -[preprocess] throughputRate: 108.389, moduleLatency: 9.22602 -[infer] throughputRate: 102.81, Interface throughputRate: 139.595, moduleLatency: 8.59119 -[post] throughputRate: 3.2138, moduleLatency: 311.159 -``` - -batch32 310单卡吞吐率:139.595x4=558.38fps - -**性能优化:** - -> 对于batch32的性能不达标,从profiling数据的op_statistic_0_1.csv看出影响性能的是Conv2D算子,从op_summary_0_1.csv看出单个Conv_Relu算子耗时0.6毫秒到12毫秒,shape大的耗时就多,不存在优化问题 +# Wide_ResNet101_2 Onnx模型端到端推理指导 + +- [1 模型概述](#1-模型概述) + + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) + +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) + +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) + +- [4 数据集预处理](#4-数据集预处理) + + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) + +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) + +- [6 精度对比](#6-精度对比) + - [6.1 离线推理精度统计](#61-离线推理精度统计) + - [6.2 开源精度](#62-开源精度) + - [6.3 精度对比](#63-精度对比) + +- [7 性能对比](#7-性能对比) + + - [7.1 npu性能数据](#71-npu性能数据) + + + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 + +[Wide_ResNet论文](https://arxiv.org/pdf/1605.07146.pdf) + +### 1.2 代码地址 + +[Wide_ResNet代码](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) + +branch:master +commit id:7d955df73fe0e9b47f7d6c77c699324b256fc41f + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 + +``` +CANN 5.0.1 + +pytorch >= 1.5.0 +torchvision >= 0.6.0 +onnx >= 1.7.0 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.18.5 +Pillow == 7.2.0 +opencv-python == 4.2.0.34 +``` + +**说明:** +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 + +1. 下载pth权重文件 + +[wrn101_2权重文件下载](https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth) + +文件md5sum: 5961435974bb43104b5a3180fea7c2c4 + +``` +wget https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth +``` + + + +2. 下载模型代码 + +``` +git clone https://github.com/pytorch/vision +cd vision +git reset 7d955df73fe0e9b47f7d6c77c699324b256fc41f --hard +python3.7 setup.py install +cd .. +``` + +3. 编写pth2onnx脚本wrn101_2_pth2onnx.py + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + +4. 执行pth2onnx脚本,生成onnx模型文件 + + +```python +python3.7 wrn101_2_pth2onnx.py wide_resnet101_2-32ee1156.pth wrn101_2_pth.onnx +``` + + **模型转换要点:** +>此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 + +### 3.2 onnx转om模型 + +1.设置环境变量 + +```python +source env.sh +``` +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 + +```python +atc --framework=5 --model=wrn101_2_pth.onnx --output=wrn101_2_bs16 --input_format=NCHW --input_shape="image:16,3,224,224" --log=debug --soc_version=Ascend310 +``` + + + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/opt/npu/imagenet/val与/opt/npu/imagenet/val_label.txt + +### 4.2 数据集预处理 + +1.预处理脚本imagenet_torch_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 + +``` +python3.7 imagenet_torch_preprocess.py resnet /opt/npu/imagenet/val ./prep_dataset +``` +### 4.3 生成数据集信息文件 + +1.生成数据集信息文件脚本gen_dataset_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 + +```python +python3.7 gen_dataset_info.py bin ./prep_dataset ./wrn101_2_prep_bin.info 224 224 +``` +第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 + +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN V100R020C10 推理benchmark工具用户指南 01 + +### 5.2 离线推理 + +1.设置环境变量 + +``` +source env.sh +``` +2.执行离线推理 + +```python +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=wrn101_2_bs16.om -input_text_path=./wrn101_2_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False +``` +输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 + +## 6 精度对比 + +- **[离线推理精度](#61-离线推理精度)** +- **[开源精度](#62-开源精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理精度统计 + +后处理统计TopN精度 + +调用imagenet_acc_eval.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 +``` +python3.7 imagenet_acc_eval.py result/dumpOutput_device0/ /opt/npu/imagenet/val_label.txt ./ result.json +``` +第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 +查看输出结果: + +``` +{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value": "78.84%"}, {"key": "Top2 accuracy", "value": "88.41%"}, {"key": "Top3 accuracy", "value": "91.66%"}, {"key": "Top4 accuracy", "value": "93.26%"}, {"key": "Top5 accuracy", "value": "94.29%"}]} +``` + +### 6.2 开源精度 + +[torchvision官网精度](https://pytorch.org/vision/stable/models.html) + +``` +Model Acc@1 Acc@5 +wide_resnet101_2 78.848 94.284 +``` +### 6.3 精度对比 + +将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,统计结果如下。精度下降在1%范围之内,故精度达标。 + +``` + Acc@1 Acc@5 +bs1 78.84 94.29 +bs16 78.85 94.29 +``` + + **精度调试:** + +>没有遇到精度不达标的问题,故不需要进行精度调试 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** + +### 7.1 npu性能数据 + +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 + +1.benchmark工具在整个数据集上推理获得性能数据 + +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: + +``` +[e2e] throughputRate: 105.142, latency: 475550 +[data read] throughputRate: 111.355, moduleLatency: 8.98031 +[preprocess] throughputRate: 111.053, moduleLatency: 9.00469 +[infer] throughputRate: 105.494, Interface throughputRate: 127.878, moduleLatency: 8.77965 +[post] throughputRate: 105.494, moduleLatency: 9.47924 +``` + +Interface throughputRate: 127.878,127.878x4=511.512即是batch1 310单卡吞吐率 + +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: + +``` +[e2e] throughputRate: 117.321, latency: 426182 +[data read] throughputRate: 124.66, moduleLatency: 8.0218 +[preprocess] throughputRate: 124.054, moduleLatency: 8.06101 +[infer] throughputRate: 117.825, Interface throughputRate: 169.604, moduleLatency: 7.35524 +[post] throughputRate: 7.36397, moduleLatency: 135.796 +``` + +Interface throughputRate: 169.604,169.604x4=678.416即是batch1 310单卡吞吐率 + +batch4性能: + +``` +[e2e] throughputRate: 114.374, latency: 437161 +[data read] throughputRate: 121.259, moduleLatency: 8.2468 +[preprocess] throughputRate: 121.014, moduleLatency: 8.26352 +[infer] throughputRate: 114.92, Interface throughputRate: 157.07, moduleLatency: 7.83108 +[post] throughputRate: 28.73, moduleLatency: 34.8068 +``` + +batch4 310单卡吞吐率:157.07x4=628.28fps +batch8性能: + +``` +[e2e] throughputRate: 111.341, latency: 449071 +[data read] throughputRate: 117.759, moduleLatency: 8.49194 +[preprocess] throughputRate: 117.55, moduleLatency: 8.50701 +[infer] throughputRate: 111.703, Interface throughputRate: 156.132, moduleLatency: 7.85466 +[post] throughputRate: 13.9628, moduleLatency: 71.6188 +``` + +batch8 310单卡吞吐率:156.132x4=624.528fps +batch32性能: + +``` +[e2e] throughputRate: 102.387, latency: 488344 +[data read] throughputRate: 108.61, moduleLatency: 9.20728 +[preprocess] throughputRate: 108.389, moduleLatency: 9.22602 +[infer] throughputRate: 102.81, Interface throughputRate: 139.595, moduleLatency: 8.59119 +[post] throughputRate: 3.2138, moduleLatency: 311.159 +``` + +batch32 310单卡吞吐率:139.595x4=558.38fps + +**性能优化:** + +> 对于batch32的性能不达标,从profiling数据的op_statistic_0_1.csv看出影响性能的是Conv2D算子,从op_summary_0_1.csv看出单个Conv_Relu算子耗时0.6毫秒到12毫秒,shape大的耗时就多,不存在优化问题 diff --git a/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/gen_dataset_info.py index 61450b4410663ae5e66ec29ed296ff6584203e31..5381839f653a885666e3fc456db9a1c22b8583a1 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/gen_dataset_info.py @@ -1,61 +1,61 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) + diff --git a/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/requirements.txt index e201023141a3422919c32979a89352fdc8d0e757..2f94093388da1b197ac1b92db08870011ce2fe79 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.8.1 -torchvision == 0.9.1 -onnx == 1.7.0 -numpy == 1.18.5 -Pillow == 7.2.0 +torch == 1.8.1 +torchvision == 0.9.1 +onnx == 1.7.0 +numpy == 1.18.5 +Pillow == 7.2.0 opencv-python == 4.2.0.34 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/test/README.md b/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/test/README.md index e28e57fceb9ea963046de4b516a4d5606db19b5a..66ad246d3689e271c579b67fd30df21807ddaed1 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/test/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/test/README.md @@ -1,48 +1,48 @@ -环境准备: - -1.数据集路径 -通用的数据集统一放在/root/datasets/或/opt/npu/ -本模型数据集放在/opt/npu/ - -2.进入工作目录 - -``` -cd Wide_ResNet101_2 -``` - -3.安装必要的依赖 - -``` -pip3.7 install -r requirements.txt -``` - -4.获取模型代码 - -``` -git clone https://github.com/pytorch/vision -``` - -5.如果模型代码需要安装,则安装模型代码 - -``` -cd vision -git reset 7d955df73fe0e9b47f7d6c77c699324b256fc41f --hard -python3.7 setup.py install -cd .. -``` - -6.获取权重文件 - -``` -wget https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth -``` - -7.获取benchmark工具 -将benchmark.x86_64 放在当前目录 - -8.310上执行,执行时确保device空闲 - -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/opt/npu/ -``` +环境准备: + +1.数据集路径 +通用的数据集统一放在/root/datasets/或/opt/npu/ +本模型数据集放在/opt/npu/ + +2.进入工作目录 + +``` +cd Wide_ResNet101_2 +``` + +3.安装必要的依赖 + +``` +pip3.7 install -r requirements.txt +``` + +4.获取模型代码 + +``` +git clone https://github.com/pytorch/vision +``` + +5.如果模型代码需要安装,则安装模型代码 + +``` +cd vision +git reset 7d955df73fe0e9b47f7d6c77c699324b256fc41f --hard +python3.7 setup.py install +cd .. +``` + +6.获取权重文件 + +``` +wget https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth +``` + +7.获取benchmark工具 +将benchmark.x86_64 放在当前目录 + +8.310上执行,执行时确保device空闲 + +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/opt/npu/ +``` diff --git a/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/test/parse.py b/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/test/parse.py index a0f253b055047b199b33d4b65cdc79177b6b250b..27eae0d0acf98687edd95f1f024cf77c49cd4dc4 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/test/parse.py +++ b/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/wrn101_2_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/wrn101_2_pth2onnx.py index 41e36408ff8e1ddaa5ea63fa5f3f6a0d85773799..6395fdddcff23086f117883aaa9b01cf035cb424 100644 --- a/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/wrn101_2_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2/wrn101_2_pth2onnx.py @@ -1,35 +1,35 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -import torch.onnx -import torchvision.models as models - -def pth2onnx(input_file, output_file): - model = models.wide_resnet101_2(pretrained=False) - checkpoint = torch.load(input_file, map_location=None) - model.load_state_dict(checkpoint) - - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) - -if __name__ == "__main__": - input_file = sys.argv[1] - output_file = sys.argv[2] +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import torch.onnx +import torchvision.models as models + +def pth2onnx(input_file, output_file): + model = models.wide_resnet101_2(pretrained=False) + checkpoint = torch.load(input_file, map_location=None) + model.load_state_dict(checkpoint) + + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) + +if __name__ == "__main__": + input_file = sys.argv[1] + output_file = sys.argv[2] pth2onnx(input_file, output_file) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/X3D/LICENSE b/ACL_PyTorch/contrib/cv/classfication/X3D/LICENSE index df2c2f2c3e55bfbad1aebe53321a94ee5a3854bc..c8ec075d5b892f823d0b485ad4fdd01355c57b3e 100644 --- a/ACL_PyTorch/contrib/cv/classfication/X3D/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/X3D/LICENSE @@ -1,203 +1,203 @@ -Copyright 2018-2019 Open-MMLab. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018-2019 Open-MMLab. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and +Copyright 2018-2019 Open-MMLab. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2019 Open-MMLab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/X3D/README.md b/ACL_PyTorch/contrib/cv/classfication/X3D/README.md index 6453cc151a97a97ce0076e772e23bb9f9e6981b6..7a9e3338e24264e4e0ecd093873a851f952c6244 100644 --- a/ACL_PyTorch/contrib/cv/classfication/X3D/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/X3D/README.md @@ -1,69 +1,69 @@ -# X3D模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - -``` -pip3 install -r requirements.txt -``` -2.获取,修改与安装开源模型代码 -``` -git clone https://github.com/facebookresearch/detectron2 detectron2_repo -pip3 install -e detectron2_repo - -git clone https://github.com/facebookresearch/SlowFast -b master -cd SlowFast - -git reset 9839d1318c0ae17bd82c6a121e5640aebc67f126 --hard -patch -p1 < x3d.patch -python3.7 setup.py build develop - -cd .. - -``` -3.[获取权重文件 x3d_s.pyth](https://github.com/facebookresearch/SlowFast/blob/master/MODEL_ZOO.md) - - 将权重文件x3d_s.pyth放在当前目录。 - -4.获取数据集Knetics-400 - -脚本下载: -获取验证集列表文件[val_link.list](https://ai-rank.bj.bcebos.com/Kinetics400/val_link.list)与验证集标签文件[val.list](https://videotag.bj.bcebos.com/PaddleVideo/Data/Kinetic400/val.list),并将val.list重命名为test.csv -下载验证集: -``` -download.sh: -file=$1 - -while read line -do - wget "$line" -done <$file - -download.sh val_link.list -``` -将下载的val_part1,val_part2,val_part3里的400个文件夹放到/root/datasets/Knetics-400/val,将test.csv放到/root/datasets/Knetics-400。 - -5.获取 [msame工具](https://gitee.com/ascend/tools/tree/master/msame) -和 -[benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) - -将msame和benchmark.x86_64(或benchmark.aarch64)放到当前目录 - - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 - -``` -bash test/pth2om.sh -bash test/evl_acc_pref.sh --datasets_path=/root/datasets/Knetics-400 -``` -备注:存在fp16算子溢出,精度不达标,因此atc模型转换需要添加--precision_mode allow_mix_precision - -**评测结果:** - -| 模型 | 官网pth精度 | 310精度 | 基准性能| 310性能 | -| ---- | ------------------------------------------------------------ | --------------- | -------- | ------- | -| X3d bs1 | [Top1:73.1%](https://github.com/facebookresearch/SlowFast/blob/master/MODEL_ZOO.md) | Top1:72.86% Top5:89.45% | 95.07fps | 158.57fps | +# X3D模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + +``` +pip3 install -r requirements.txt +``` +2.获取,修改与安装开源模型代码 +``` +git clone https://github.com/facebookresearch/detectron2 detectron2_repo +pip3 install -e detectron2_repo + +git clone https://github.com/facebookresearch/SlowFast -b master +cd SlowFast + +git reset 9839d1318c0ae17bd82c6a121e5640aebc67f126 --hard +patch -p1 < x3d.patch +python3.7 setup.py build develop + +cd .. + +``` +3.[获取权重文件 x3d_s.pyth](https://github.com/facebookresearch/SlowFast/blob/master/MODEL_ZOO.md) + + 将权重文件x3d_s.pyth放在当前目录。 + +4.获取数据集Knetics-400 + +脚本下载: +获取验证集列表文件[val_link.list](https://ai-rank.bj.bcebos.com/Kinetics400/val_link.list)与验证集标签文件[val.list](https://videotag.bj.bcebos.com/PaddleVideo/Data/Kinetic400/val.list),并将val.list重命名为test.csv +下载验证集: +``` +download.sh: +file=$1 + +while read line +do + wget "$line" +done <$file + +download.sh val_link.list +``` +将下载的val_part1,val_part2,val_part3里的400个文件夹放到/root/datasets/Knetics-400/val,将test.csv放到/root/datasets/Knetics-400。 + +5.获取 [msame工具](https://gitee.com/ascend/tools/tree/master/msame) +和 +[benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) + +将msame和benchmark.x86_64(或benchmark.aarch64)放到当前目录 + + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 + +``` +bash test/pth2om.sh +bash test/evl_acc_pref.sh --datasets_path=/root/datasets/Knetics-400 +``` +备注:存在fp16算子溢出,精度不达标,因此atc模型转换需要添加--precision_mode allow_mix_precision + +**评测结果:** + +| 模型 | 官网pth精度 | 310精度 | 基准性能| 310性能 | +| ---- | ------------------------------------------------------------ | --------------- | -------- | ------- | +| X3d bs1 | [Top1:73.1%](https://github.com/facebookresearch/SlowFast/blob/master/MODEL_ZOO.md) | Top1:72.86% Top5:89.45% | 95.07fps | 158.57fps | | X3d bs16| [Top1:73.1%](https://github.com/facebookresearch/SlowFast/blob/master/MODEL_ZOO.md) | Top1:72.86% Top5:89.45% | 103.82fps | 115.34fps | \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/LICENSE b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/README.md b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/README.md index 340b66fc5d532e4a8b0627c190fe6c3e73d83cfb..952c95a9166bbff3e702dc2d4de8f01141583253 100644 --- a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/README.md @@ -1,46 +1,46 @@ -# Dino_Resnet50模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -``` -pip3.7 install -r requirements.txt -``` - - -2.获取开源模型代码 -``` -git clone https://github.com/facebooksearch/dino -``` - -3.获取权重文件 - - [获取权重文件](https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/DINO/dino_resnet50_linearweights.pth) - [获取权重文件](https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/DINO/dino_resnet50_pretrain.pth) - -4.数据集 -自行获取LSVRC2012验证集和标签文本 - -5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) -将benchmark.x86_64放到当前工作目录 - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest -bash test/pth2om.sh -bash test/eval_acc_perf.sh -``` -pth2om.sh文件第1到6行是转onnx,第8到20行是转om -eval_acc_perf.sh文件第24到54行是精度,第55到66行是性能 - **评测结果:** -| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| :------: | :------: | :------: | :------: | :------: | -| dino_resnet50_bs1 | [top1: 75.3%](https://github.com/facebookresearch/dino#evaluation-linear-classification-on-imagenet) | top1: 75.27% | 891.845fps | 1521.508fps | +# Dino_Resnet50模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +``` +pip3.7 install -r requirements.txt +``` + + +2.获取开源模型代码 +``` +git clone https://github.com/facebooksearch/dino +``` + +3.获取权重文件 + + [获取权重文件](https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/DINO/dino_resnet50_linearweights.pth) + [获取权重文件](https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/DINO/dino_resnet50_pretrain.pth) + +4.数据集 +自行获取LSVRC2012验证集和标签文本 + +5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) +将benchmark.x86_64放到当前工作目录 + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest +bash test/pth2om.sh +bash test/eval_acc_perf.sh +``` +pth2om.sh文件第1到6行是转onnx,第8到20行是转om +eval_acc_perf.sh文件第24到54行是精度,第55到66行是性能 + **评测结果:** +| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| :------: | :------: | :------: | :------: | :------: | +| dino_resnet50_bs1 | [top1: 75.3%](https://github.com/facebookresearch/dino#evaluation-linear-classification-on-imagenet) | top1: 75.27% | 891.845fps | 1521.508fps | | dino_resnet50_bs16 | [top1: 75.3%](https://github.com/facebookresearch/dino#evaluation-linear-classification-on-imagenet) | top1: 75.27% | 2003.345fps | 2406.68fps | \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/dino_resnet50_postprocess.py b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/dino_resnet50_postprocess.py index 4b5b462ff781b676f9cd89f247467e7f623fd0cb..2b0acb41e561d0c148840ef8430f08a423af09d6 100644 --- a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/dino_resnet50_postprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/dino_resnet50_postprocess.py @@ -1,169 +1,169 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import argparse - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - if data == '': - n_label = 0 - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_file, img_gt_dict, - topn=5): - """ - :param prediction_file_path: - :param result_file: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(result_file, 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -def check_args(args): - if not (os.path.exists(args.anno_file)): - print("annotation file:{} does not exist.".format(args.anno_file)) - exit() - if not (os.path.exists(args.benchmark_out)): - print("benchmark output:{} does not exist.".format(args.benchmark_out)) - exit() - return args - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Precision statistics of Vision model') - parser.add_argument("--anno_file", default="./HiAI_label.json", help='annotation file') - parser.add_argument("--benchmark_out", default="result/dumpOutput_device0", help='Benchmark output directory') - parser.add_argument("--result_file", default="./result.json", help='Output json file') - args = parser.parse_args() - args = check_args(args) - if args.anno_file.endswith('txt'): - img_label_dict = cre_groundtruth_dict_fromtxt(args.anno_file) - else: - img_label_dict = cre_groundtruth_dict(args.anno_file) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import argparse + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + if data == '': + n_label = 0 + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_file, img_gt_dict, + topn=5): + """ + :param prediction_file_path: + :param result_file: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(result_file, 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +def check_args(args): + if not (os.path.exists(args.anno_file)): + print("annotation file:{} does not exist.".format(args.anno_file)) + exit() + if not (os.path.exists(args.benchmark_out)): + print("benchmark output:{} does not exist.".format(args.benchmark_out)) + exit() + return args + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Precision statistics of Vision model') + parser.add_argument("--anno_file", default="./HiAI_label.json", help='annotation file') + parser.add_argument("--benchmark_out", default="result/dumpOutput_device0", help='Benchmark output directory') + parser.add_argument("--result_file", default="./result.json", help='Output json file') + args = parser.parse_args() + args = check_args(args) + if args.anno_file.endswith('txt'): + img_label_dict = cre_groundtruth_dict_fromtxt(args.anno_file) + else: + img_label_dict = cre_groundtruth_dict(args.anno_file) create_visualization_statistical_result(args.benchmark_out, args.result_file, img_label_dict, topn=5) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/dino_resnet50_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/dino_resnet50_preprocess.py index a8c182f3cbebf46aa09111c03b8c836ec400166b..30f60993ce1433ce9600e69fb7637581c396fdc5 100644 --- a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/dino_resnet50_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/dino_resnet50_preprocess.py @@ -1,115 +1,115 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from PIL import Image -import numpy as np -import multiprocessing - - -model_config = { - 'resnet': { - 'resize': 256, - 'centercrop': 224, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv3': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.485, 0.456, 0.406], - 'std': [0.229, 0.224, 0.225], - }, - 'inceptionv4': { - 'resize': 342, - 'centercrop': 299, - 'mean': [0.5, 0.5, 0.5], - 'std': [0.5, 0.5, 0.5], - }, -} - - -def center_crop(img, output_size): - if isinstance(output_size, int): - output_size = (int(output_size), int(output_size)) - image_width, image_height = img.size - crop_height, crop_width = output_size - crop_top = int(round((image_height - crop_height) / 2.)) - crop_left = int(round((image_width - crop_width) / 2.)) - return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) - - -def resize(img, size, interpolation=Image.BILINEAR): - if isinstance(size, int): - w, h = img.size - if (w <= h and w == size) or (h <= w and h == size): - return img - if w < h: - ow = size - oh = int(size * h / w) - return img.resize((ow, oh), interpolation) - else: - oh = size - ow = int(size * w / h) - return img.resize((ow, oh), interpolation) - else: - return img.resize(size[::-1], interpolation) - - -def gen_input_bin(mode_type, file_batches, batch): - i = 0 - for file in file_batches[batch]: - i =i + 1 - print("batch", batch, file, "===", i) - - # RGBA to RGB - image = Image.open(os.path.join(src_path, file)).convert('RGB') - image = resize(image, model_config[mode_type]['resize']) # Resize - image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop - img = np.array(image, dtype=np.float32) - img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW - img = img / 255. # ToTensor: div 255 - img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean - img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - - -def preprocess(mode_type, src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) - thread_pool.close() - thread_pool.join() - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") - mode_type = sys.argv[1] - src_path = sys.argv[2] - save_path = sys.argv[3] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - if mode_type not in model_config: - model_type_help = "model type: " - for key in model_config.keys(): - model_type_help += key - model_type_help += ' ' - raise Exception(model_type_help) - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - preprocess(mode_type, src_path, save_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from PIL import Image +import numpy as np +import multiprocessing + + +model_config = { + 'resnet': { + 'resize': 256, + 'centercrop': 224, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv3': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + }, + 'inceptionv4': { + 'resize': 342, + 'centercrop': 299, + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + }, +} + + +def center_crop(img, output_size): + if isinstance(output_size, int): + output_size = (int(output_size), int(output_size)) + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) + + +def resize(img, size, interpolation=Image.BILINEAR): + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def gen_input_bin(mode_type, file_batches, batch): + i = 0 + for file in file_batches[batch]: + i =i + 1 + print("batch", batch, file, "===", i) + + # RGBA to RGB + image = Image.open(os.path.join(src_path, file)).convert('RGB') + image = resize(image, model_config[mode_type]['resize']) # Resize + image = center_crop(image, model_config[mode_type]['centercrop']) # CenterCrop + img = np.array(image, dtype=np.float32) + img = img.transpose(2, 0, 1) # ToTensor: HWC -> CHW + img = img / 255. # ToTensor: div 255 + img -= np.array(model_config[mode_type]['mean'], dtype=np.float32)[:, None, None] # Normalize: mean + img /= np.array(model_config[mode_type]['std'], dtype=np.float32)[:, None, None] # Normalize: std + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + + +def preprocess(mode_type, src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 500] for i in range(0, 50000, 500) if files[i:i + 500] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(mode_type, file_batches, batch)) + thread_pool.close() + thread_pool.join() + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python3 xxx.py [model_type] [src_path] [save_path]") + mode_type = sys.argv[1] + src_path = sys.argv[2] + save_path = sys.argv[3] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + if mode_type not in model_config: + model_type_help = "model type: " + for key in model_config.keys(): + model_type_help += key + model_type_help += ' ' + raise Exception(model_type_help) + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + preprocess(mode_type, src_path, save_path) diff --git a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/dino_resnet50_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/dino_resnet50_pth2onnx.py index 2ad540e8caa9572cfe7eae3d8df8d1fe2e555f0a..e6554df3833637c8af9cdb418aacd6fdacc46f53 100644 --- a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/dino_resnet50_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/dino_resnet50_pth2onnx.py @@ -1,46 +1,46 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torchvision.models as models -import torch.nn as nn - - -model=models.resnet50(pretrained=True) -embed_dim = model.fc.weight.shape[1] -model.fc = nn.Identity() - -state_dict = torch.load('dino_resnet50_pretrain.pth', map_location='cpu') -temp=nn.Linear(2048,1000) -state_dict2 = torch.load('dino_resnet50_linearweights.pth', map_location='cpu')["state_dict"] -temp.weight.data = state_dict2['module.linear.weight'] -temp.bias.data = state_dict2['module.linear.bias'] -model.load_state_dict(state_dict, strict=True) - -model.fc=temp -model.eval() -x = torch.randn(1, 3, 224, 224, requires_grad=True) -model.to(device='cpu') -torch_out = model(x.to(device="cpu")) - -# Export the model -torch.onnx.export(model, # model being run - x, # model input (or a tuple for multiple inputs) - "dino_resnet50.onnx", # where to save the model (can be a file or file-like object) - export_params=True, # store the trained parameter weights inside the model file - opset_version=10, # the ONNX version to export the model to - do_constant_folding=True, # whether to execute constant folding for optimization - input_names = ['input'], # the model's input names - output_names = ['output'], # the model's output names - dynamic_axes={'input' : {0 : 'batch_size'}, # variable length axes +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torchvision.models as models +import torch.nn as nn + + +model=models.resnet50(pretrained=True) +embed_dim = model.fc.weight.shape[1] +model.fc = nn.Identity() + +state_dict = torch.load('dino_resnet50_pretrain.pth', map_location='cpu') +temp=nn.Linear(2048,1000) +state_dict2 = torch.load('dino_resnet50_linearweights.pth', map_location='cpu')["state_dict"] +temp.weight.data = state_dict2['module.linear.weight'] +temp.bias.data = state_dict2['module.linear.bias'] +model.load_state_dict(state_dict, strict=True) + +model.fc=temp +model.eval() +x = torch.randn(1, 3, 224, 224, requires_grad=True) +model.to(device='cpu') +torch_out = model(x.to(device="cpu")) + +# Export the model +torch.onnx.export(model, # model being run + x, # model input (or a tuple for multiple inputs) + "dino_resnet50.onnx", # where to save the model (can be a file or file-like object) + export_params=True, # store the trained parameter weights inside the model file + opset_version=10, # the ONNX version to export the model to + do_constant_folding=True, # whether to execute constant folding for optimization + input_names = ['input'], # the model's input names + output_names = ['output'], # the model's output names + dynamic_axes={'input' : {0 : 'batch_size'}, # variable length axes 'output' : {0 : 'batch_size'}}) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/get_info.py b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/get_info.py index 8bd92616fcc3b327ca19322d8106d2b2be237d51..89b89cf20ad8e6e9ae784413914079d4f851e6a6 100644 --- a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/get_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/get_info.py @@ -1,59 +1,59 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/modelzoo_level.txt index 9e95396651cc4382fe60ee1ee053674f527a448c..27e6c78b37535fe4f5a17029546fe257ad164d34 100644 --- a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:POK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/test/parse.py b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/test/parse.py index 54fe38f45876316aa483f626e7512830f289a2e1..7922b1d1643fdec070c5dbbc1100ed83f8273922 100644 --- a/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/test/parse.py +++ b/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50/test/parse.py @@ -1,31 +1,31 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - - print(content) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + + print(content) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/pnasnet5large/LICENSE b/ACL_PyTorch/contrib/cv/classfication/pnasnet5large/LICENSE index 753842b6720f7980d411ecf2c78eb4ef220b9df8..f49a4e16e68b128803cc2dcea614603632b04eac 100644 --- a/ACL_PyTorch/contrib/cv/classfication/pnasnet5large/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/pnasnet5large/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/pnasnet5large/get_info.py b/ACL_PyTorch/contrib/cv/classfication/pnasnet5large/get_info.py index 16bebcfc75fa5903434d2fbcee780e2e7ac4bd84..70e007ac5c49dc1ddc85fcbeb33ba54018f56b06 100644 --- a/ACL_PyTorch/contrib/cv/classfication/pnasnet5large/get_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/pnasnet5large/get_info.py @@ -1,62 +1,62 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - print(index,'done') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - print(index,'done') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + print(index,'done') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + print(index,'done') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/pnasnet5large/vision_metric_ImageNet.py b/ACL_PyTorch/contrib/cv/classfication/pnasnet5large/vision_metric_ImageNet.py index 362f2484e8288dd3df6fa212678dc9449dbbed29..583340a19f2fc6e99faed85526c906f8bd12d7ba 100644 --- a/ACL_PyTorch/contrib/cv/classfication/pnasnet5large/vision_metric_ImageNet.py +++ b/ACL_PyTorch/contrib/cv/classfication/pnasnet5large/vision_metric_ImageNet.py @@ -1,184 +1,184 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - imgName = temp[0].split(".")[0] - imgLab = temp[1] - img_gt_dict[imgName] = imgLab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - if data == '': - n_label = 0 - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, prob in enumerate(temp): - data_vec[ind] = np.float32(prob) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - resCnt = 0 - n_labels = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - n_labels = ret[1] - sort_index = np.argsort(-prediction) - gt = img_gt_dict[img_name] - if (n_labels == 1000): - realLabel = int(gt) - elif (n_labels == 1001): - realLabel = int(gt) + 1 - else: - realLabel = int(gt) - - resCnt = min(len(sort_index), topn) - for i in range(resCnt): - if (str(realLabel) == str(sort_index[i])): - count_hit[i] += 1 - break - - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(resCnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - - if not (os.path.exists(annotation_file_path)): - print("Ground truth file does not exist.") - - if not (os.path.exists(result_json_path)): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) - print("Time used:", elapsed) - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + imgName = temp[0].split(".")[0] + imgLab = temp[1] + img_gt_dict[imgName] = imgLab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + if data == '': + n_label = 0 + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, prob in enumerate(temp): + data_vec[ind] = np.float32(prob) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + resCnt = 0 + n_labels = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + n_labels = ret[1] + sort_index = np.argsort(-prediction) + gt = img_gt_dict[img_name] + if (n_labels == 1000): + realLabel = int(gt) + elif (n_labels == 1001): + realLabel = int(gt) + 1 + else: + realLabel = int(gt) + + resCnt = min(len(sort_index), topn) + for i in range(resCnt): + if (str(realLabel) == str(sort_index[i])): + count_hit[i] += 1 + break + + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(resCnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + + if not (os.path.exists(annotation_file_path)): + print("Ground truth file does not exist.") + + if not (os.path.exists(result_json_path)): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) + print("Time used:", elapsed) + diff --git a/ACL_PyTorch/contrib/cv/classfication/vit-small/README.md b/ACL_PyTorch/contrib/cv/classfication/vit-small/README.md index ff71f12feee2abb5a7df40da65300d2d64fb2354..54a20b8c4562f2533af284fa621e7023a7c070ed 100644 --- a/ACL_PyTorch/contrib/cv/classfication/vit-small/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/vit-small/README.md @@ -1,70 +1,70 @@ -# vit-small 模型PyTorch离线推理指导 - -## 1. 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - -~~~shell -pip3.7 install -r requirements.txt -~~~ - -2.获取,修改与安装开源模型代码 - -~~~shell -git clone https://github.com/rwightman/pytorch-image-models.git -b master -cd pytorch-image-models/ -patch -p1 < ../vit_small_patch16_224.patch -cd .. -~~~ - -3.获取权重文件 - -将权重文件放到当前工作目录,可以通过以下命令下载: - -~~~shell -wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/vit-small/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz -~~~ - -4.数据集 - -该模型使用 `ImageNet` 官网的5万张验证集进行测试,可从 [ImageNet官网](http://www.image-net.org/) 获取 `val` 数据集与标签,分别存放在 `/home/datasets/imagenet/val` 与 `/home/datasets/imagenet/val_label.txt` - -最终目录结构应为 - -~~~txt -imagenet/ -|-- val/ -|-- val_label.txt -~~~ - -5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) - -将 `benchmark.x86_64` 或 `benchmark.aarch64` 放到当前工作目录 - -## 2. 离线推理 - -t4上执行,确保卡空闲时执行测试 - -~~~shell -bash perf_g.sh -# 脚本3-11行是对gpu bs1性能测试, 13-21行是对gpu bs16性能测试 -~~~ - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 - -~~~shell -bash ./test/pth2om.sh -# 脚本中2-7行是pth2onnx,9-16行是对onnx进行onnxsim优化, 18-27行是利用onnxsim优化后的onnx转om - -bash ./test/eval_acc_perf.sh --datasets_path=real_data_path -# 如不指定--datasets_path,则采用默认数据集路径:/home/datasets/imagenet/ -# 脚本中12-19行为数据集前处理,同时生成bin文件信息,20-30行是推理过程,32-44行获取精度数据,45-56行获取性能数据 -~~~ - -**评测结果** - -| 模型 | 仓库pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| -------------- | ----------- | --------------- | -------- | -------- | -| vit-small bs1 | top1:81.388 | top1:81.1 | 222.4976 | 200.9196 | -| vit-small bs16 | top1:81.388 | top1:81.1 | 657.9001 | 204.0776 | - +# vit-small 模型PyTorch离线推理指导 + +## 1. 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + +~~~shell +pip3.7 install -r requirements.txt +~~~ + +2.获取,修改与安装开源模型代码 + +~~~shell +git clone https://github.com/rwightman/pytorch-image-models.git -b master +cd pytorch-image-models/ +patch -p1 < ../vit_small_patch16_224.patch +cd .. +~~~ + +3.获取权重文件 + +将权重文件放到当前工作目录,可以通过以下命令下载: + +~~~shell +wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/vit-small/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz +~~~ + +4.数据集 + +该模型使用 `ImageNet` 官网的5万张验证集进行测试,可从 [ImageNet官网](http://www.image-net.org/) 获取 `val` 数据集与标签,分别存放在 `/home/datasets/imagenet/val` 与 `/home/datasets/imagenet/val_label.txt` + +最终目录结构应为 + +~~~txt +imagenet/ +|-- val/ +|-- val_label.txt +~~~ + +5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) + +将 `benchmark.x86_64` 或 `benchmark.aarch64` 放到当前工作目录 + +## 2. 离线推理 + +t4上执行,确保卡空闲时执行测试 + +~~~shell +bash perf_g.sh +# 脚本3-11行是对gpu bs1性能测试, 13-21行是对gpu bs16性能测试 +~~~ + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 + +~~~shell +bash ./test/pth2om.sh +# 脚本中2-7行是pth2onnx,9-16行是对onnx进行onnxsim优化, 18-27行是利用onnxsim优化后的onnx转om + +bash ./test/eval_acc_perf.sh --datasets_path=real_data_path +# 如不指定--datasets_path,则采用默认数据集路径:/home/datasets/imagenet/ +# 脚本中12-19行为数据集前处理,同时生成bin文件信息,20-30行是推理过程,32-44行获取精度数据,45-56行获取性能数据 +~~~ + +**评测结果** + +| 模型 | 仓库pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| -------------- | ----------- | --------------- | -------- | -------- | +| vit-small bs1 | top1:81.388 | top1:81.1 | 222.4976 | 200.9196 | +| vit-small bs16 | top1:81.388 | top1:81.1 | 657.9001 | 204.0776 | + diff --git a/ACL_PyTorch/contrib/cv/classfication/vit-small/env.sh b/ACL_PyTorch/contrib/cv/classfication/vit-small/env.sh old mode 100755 new mode 100644 diff --git a/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/LICENSE b/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/LICENSE index b1fac45f02e2f98395fd96a7e4f4a39e257ac0bc..989e2c59e973a05cfbfe9de678b7f2af777b0713 100644 --- a/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/LICENSE @@ -1,201 +1,201 @@ -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/gen_dataset_info.py index 6a69d4d17ff7b9c2f1b7a030b8601a743228f541..3fbb7d84620be98432b0740eaa9c07cdb6d85fe4 100644 --- a/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/gen_dataset_info.py @@ -1,64 +1,64 @@ -""" -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) - +""" +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) + diff --git a/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/modelzoo_level.txt index 32dd0521ece8acac5fffdaf2413f6d17a51b2e3c..7e460da1913cc3d023396644aea19b219b8597a3 100644 --- a/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/modelzoo_level.txt @@ -1,6 +1,6 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:POK -PerfStatus:NOK -ModelConvert:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:POK +PerfStatus:NOK +ModelConvert:OK QuantStatus:POK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/requirements.txt index 2200a3704d12785f84b570c5187a14334602120f..c7e8af10a50c05b4312266b0162a55407bd10fa8 100644 --- a/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/requirements.txt @@ -1,5 +1,5 @@ -onnx -torch=1.5.0 -torchvision>=0.5.0 -pyyaml +onnx +torch=1.5.0 +torchvision>=0.5.0 +pyyaml timm \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/vit_base_patch32_224_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/vit_base_patch32_224_preprocess.py index 390c38b4f67ed64231315210907322d3696d7da4..7afcd7bef332827eff13c78a5d1a22534743bdee 100644 --- a/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/vit_base_patch32_224_preprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/vit_base_patch32_224_preprocess.py @@ -1,53 +1,53 @@ -""" -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - - -import os -from PIL import Image -import numpy as np -import timm -from timm.data import resolve_data_config -from timm.data.transforms_factory import create_transform -import argparse - - -parser = argparse.ArgumentParser(description='Path', add_help=False) -parser.add_argument('--data-path', metavar='DIR', - help='path to dataset') -parser.add_argument('--store-path', metavar='DIR', - help='path to store') - - -def preprocess(src_path, save_path): - os.mkdir(save_path) - model = timm.create_model('vit_base_patch32_224') - model.eval() - config = resolve_data_config({},model=model) - transform = create_transform(**config) - i = 0 - in_files = os.listdir(src_path) - for file in in_files: - i = i + 1 - print(file, "===", i) - input_image = Image.open(src_path+'/'+file).convert('RGB') - input_tensor = transform(input_image) - img = np.array(input_tensor).astype(np.float32) - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - - -if __name__ == "__main__": - args = parser.parse_args() +""" +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + + +import os +from PIL import Image +import numpy as np +import timm +from timm.data import resolve_data_config +from timm.data.transforms_factory import create_transform +import argparse + + +parser = argparse.ArgumentParser(description='Path', add_help=False) +parser.add_argument('--data-path', metavar='DIR', + help='path to dataset') +parser.add_argument('--store-path', metavar='DIR', + help='path to store') + + +def preprocess(src_path, save_path): + os.mkdir(save_path) + model = timm.create_model('vit_base_patch32_224') + model.eval() + config = resolve_data_config({},model=model) + transform = create_transform(**config) + i = 0 + in_files = os.listdir(src_path) + for file in in_files: + i = i + 1 + print(file, "===", i) + input_image = Image.open(src_path+'/'+file).convert('RGB') + input_tensor = transform(input_image) + img = np.array(input_tensor).astype(np.float32) + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + + +if __name__ == "__main__": + args = parser.parse_args() preprocess(args.data_path,args.store_path) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/vit_base_patch32_224_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/vit_base_patch32_224_pth2onnx.py index d64775b88a01b169f2b6d3c18075e74552f01a10..a52537b0c1b41a4bcd99542d5be8fa8dee3bbd39 100644 --- a/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/vit_base_patch32_224_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224/vit_base_patch32_224_pth2onnx.py @@ -1,41 +1,41 @@ -""" -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - - -import torch -import timm -import argparse - - -parser = argparse.ArgumentParser(description='Path', add_help=False) -parser.add_argument('--model-path', default="B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz", metavar='DIR', - help='path to model') -parser.add_argument('--batch-size',default="1", type=int, - help='batch size') - - -def main(): - args = parser.parse_args() - model = timm.create_model('vit_base_patch32_224') - model.load_pretrained(args.model_path) - model.eval() - tensor = torch.zeros(args.batch_size,3,224,224) - torch.onnx.export(model, tensor, "vit_bs"+str(args.batch_size)+".onnx", opset_version=11, - do_constant_folding=True, input_names=["input"], output_names=["output"]) - - -if __name__ == "__main__": +""" +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + + +import torch +import timm +import argparse + + +parser = argparse.ArgumentParser(description='Path', add_help=False) +parser.add_argument('--model-path', default="B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz", metavar='DIR', + help='path to model') +parser.add_argument('--batch-size',default="1", type=int, + help='batch size') + + +def main(): + args = parser.parse_args() + model = timm.create_model('vit_base_patch32_224') + model.load_pretrained(args.model_path) + model.eval() + tensor = torch.zeros(args.batch_size,3,224,224) + torch.onnx.export(model, tensor, "vit_bs"+str(args.batch_size)+".onnx", opset_version=11, + do_constant_folding=True, input_names=["input"], output_names=["output"]) + + +if __name__ == "__main__": main() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/volo/env.sh b/ACL_PyTorch/contrib/cv/classfication/volo/env.sh old mode 100755 new mode 100644 diff --git a/ACL_PyTorch/contrib/cv/classfication/volo/modify.py b/ACL_PyTorch/contrib/cv/classfication/volo/modify.py index 9cbe1452943bd50a85dd8660380bc54312c3bd4c..a5485aedf17a622a3a305033410e098d91e0fefb 100644 --- a/ACL_PyTorch/contrib/cv/classfication/volo/modify.py +++ b/ACL_PyTorch/contrib/cv/classfication/volo/modify.py @@ -1,29 +1,29 @@ -import numpy as np -from MagicONNX.magiconnx import OnnxGraph -import argparse - -INT32_MAX = 2147483647 -INT32_MIN = -2147483648 - -def modify(path, output): - graph = OnnxGraph(path) - col2ims = graph.get_nodes("Col2im") - for idx, node in enumerate(col2ims): - attr = node['output_size'] - node.attrs.pop("output_size") - new_init = graph.add_initializer(f'output_size_{node.name}', np.array(attr).astype(np.int32)) - node.inputs = [node.inputs[0], f'output_size_{node.name}'] - - graph.save(output) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='modify the onnx node') - parser.add_argument('--src', type=str, default='./d1_224_84.2.pth.tar', - help='weights of pytorch dir') - parser.add_argument('--des', type=str, default='./volo_d1_224_Col2im.onnx', - help='weights of onnx dir') - args = parser.parse_args() - modify(args.src, args.des) - print("modify the onnx successfully!") - - +import numpy as np +from MagicONNX.magiconnx import OnnxGraph +import argparse + +INT32_MAX = 2147483647 +INT32_MIN = -2147483648 + +def modify(path, output): + graph = OnnxGraph(path) + col2ims = graph.get_nodes("Col2im") + for idx, node in enumerate(col2ims): + attr = node['output_size'] + node.attrs.pop("output_size") + new_init = graph.add_initializer(f'output_size_{node.name}', np.array(attr).astype(np.int32)) + node.inputs = [node.inputs[0], f'output_size_{node.name}'] + + graph.save(output) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='modify the onnx node') + parser.add_argument('--src', type=str, default='./d1_224_84.2.pth.tar', + help='weights of pytorch dir') + parser.add_argument('--des', type=str, default='./volo_d1_224_Col2im.onnx', + help='weights of onnx dir') + args = parser.parse_args() + modify(args.src, args.des) + print("modify the onnx successfully!") + + diff --git a/ACL_PyTorch/contrib/cv/classfication/volo/readme.md b/ACL_PyTorch/contrib/cv/classfication/volo/readme.md index f553e3bee1566ed089f6c4ebab60b357122c7dc9..3e440378b387f478eaa367ac7483b5c76a5906a8 100644 --- a/ACL_PyTorch/contrib/cv/classfication/volo/readme.md +++ b/ACL_PyTorch/contrib/cv/classfication/volo/readme.md @@ -1,80 +1,80 @@ -# VOLO - -This implements training of volo_d1 on the ImageNet-2012 dataset and token labeling, mainly modified from [sail-sg/volo](https://github.com/sail-sg/volo). - -## VOLO Detail - -There is an error of Col2im operator on pth2onnx, define the OP in volo.py. -- The check of onnx should be commented out. -Example: -File "python3.8/site-packages/torch/onnx/utils.py", line 785, in _export -```bash -#if (operator_export_type is OperatorExportTypes.ONNX) and (not val_use_external_data_format): - #try: - #_check_onnx_proto(proto) - #except RuntimeError as e: - #raise CheckerError(e) -``` -## Requirements - -- Prepare the checkpoint of pytorch -- `pip install -r requirements.txt` -- Download the Imagenet-2012 dataset. Refer to the original repository https://github.com/rwightman/pytorch-image-models -- install MagicONNX - ```bash - git clone https://gitee.com/Ronnie_zheng/MagicONNX.git - cd MagicONNX - pip install . - ``` -- compile msame - reference from https://gitee.com/ascend/tools/tree/master/msame -```bash - git clone https://gitee.com/ascend/tools.git - #如下为设置环境变量的示例,请将/home/HwHiAiUser/Ascend/ascend-toolkit/latest替换为Ascend 的ACLlib安装包的实际安装路径。 - export DDK_PATH=/home/HwHiAiUser/Ascend/ascend-toolkit/latest - export NPU_HOST_LIB=/home/HwHiAiUser/Ascend/ascend-toolkit/latest/acllib/lib64/stub - - cd $HOME/AscendProjects/tools/msame/ - ./build.sh g++ $HOME/AscendProjects/tools/msame/out -``` - -## preprocess the dataset - -Because we use msame to inference, so we should preprocess origin dataset to `.bin` file. -And different batchsize should be different binary file. The command is below: - -```bash -python volo_preprocess.py --src /opt/npu/val --des /opt/npu/data_bs1 --batchsize 1 -python volo_preprocess.py --src /opt/npu/val --des /opt/npu/data_bs16 --batchsize 16 -``` -Then we get the binary dataset in `/opt/npu/data_bs1` or `/opt/npu/data_bs16` and also the label txt.The file named `volo_val_bs1.txt` or `volo_val_bs16.txt` - -## Inference -```bash -# pth2om for batchsize 1 -bash test/pth2om.sh d1_224_84.pth.tar volo_bs1.onnx volo_modify_bs1.onnx volo_bs1 1 "input:1,3,224,224" -# pth2om for batchsize 16 -bash test/pth2om.sh d1_224_84.pth.tar volo_bs16.onnx volo_modify_bs16.onnx volo_bs16 16 "input:16,3,224,224" - -# inference with batchsize 1 with performance -./msame --model "volo_bs1.om" --input "/opt/npu/data_bs1" --output "./" --outfmt TXT - -# inference with batchsize 16 with performance -./msame --model "volo_bs16.om" --input "/opt/npu/data_bs16" --output "./" --outfmt TXT - -# compute the val accuracy, modify the batchsize, result dir and label dir -bash eval_acc_perf.sh 1 /path/to/result /path/to/label.txt -``` - -## Volo inference result -| accuracy | top1 | -| :------: | :--------: | -| bs1 | 80.619 | -| bs16 | 82.275 | - -|batchsize| performance | average time | average time without first | -| :-----: | :---------: | :-----------: | :-------------------------: | -| bs1 | 10.08fps | 396.46ms | 396.46ms | -| bs16 | 17.6fps | 3635.25ms | 3635.25ms | - - +# VOLO + +This implements training of volo_d1 on the ImageNet-2012 dataset and token labeling, mainly modified from [sail-sg/volo](https://github.com/sail-sg/volo). + +## VOLO Detail + +There is an error of Col2im operator on pth2onnx, define the OP in volo.py. +- The check of onnx should be commented out. +Example: +File "python3.8/site-packages/torch/onnx/utils.py", line 785, in _export +```bash +#if (operator_export_type is OperatorExportTypes.ONNX) and (not val_use_external_data_format): + #try: + #_check_onnx_proto(proto) + #except RuntimeError as e: + #raise CheckerError(e) +``` +## Requirements + +- Prepare the checkpoint of pytorch +- `pip install -r requirements.txt` +- Download the Imagenet-2012 dataset. Refer to the original repository https://github.com/rwightman/pytorch-image-models +- install MagicONNX + ```bash + git clone https://gitee.com/Ronnie_zheng/MagicONNX.git + cd MagicONNX + pip install . + ``` +- compile msame + reference from https://gitee.com/ascend/tools/tree/master/msame +```bash + git clone https://gitee.com/ascend/tools.git + #如下为设置环境变量的示例,请将/home/HwHiAiUser/Ascend/ascend-toolkit/latest替换为Ascend 的ACLlib安装包的实际安装路径。 + export DDK_PATH=/home/HwHiAiUser/Ascend/ascend-toolkit/latest + export NPU_HOST_LIB=/home/HwHiAiUser/Ascend/ascend-toolkit/latest/acllib/lib64/stub + + cd $HOME/AscendProjects/tools/msame/ + ./build.sh g++ $HOME/AscendProjects/tools/msame/out +``` + +## preprocess the dataset + +Because we use msame to inference, so we should preprocess origin dataset to `.bin` file. +And different batchsize should be different binary file. The command is below: + +```bash +python volo_preprocess.py --src /opt/npu/val --des /opt/npu/data_bs1 --batchsize 1 +python volo_preprocess.py --src /opt/npu/val --des /opt/npu/data_bs16 --batchsize 16 +``` +Then we get the binary dataset in `/opt/npu/data_bs1` or `/opt/npu/data_bs16` and also the label txt.The file named `volo_val_bs1.txt` or `volo_val_bs16.txt` + +## Inference +```bash +# pth2om for batchsize 1 +bash test/pth2om.sh d1_224_84.pth.tar volo_bs1.onnx volo_modify_bs1.onnx volo_bs1 1 "input:1,3,224,224" +# pth2om for batchsize 16 +bash test/pth2om.sh d1_224_84.pth.tar volo_bs16.onnx volo_modify_bs16.onnx volo_bs16 16 "input:16,3,224,224" + +# inference with batchsize 1 with performance +./msame --model "volo_bs1.om" --input "/opt/npu/data_bs1" --output "./" --outfmt TXT + +# inference with batchsize 16 with performance +./msame --model "volo_bs16.om" --input "/opt/npu/data_bs16" --output "./" --outfmt TXT + +# compute the val accuracy, modify the batchsize, result dir and label dir +bash eval_acc_perf.sh 1 /path/to/result /path/to/label.txt +``` + +## Volo inference result +| accuracy | top1 | +| :------: | :--------: | +| bs1 | 80.619 | +| bs16 | 82.275 | + +|batchsize| performance | average time | average time without first | +| :-----: | :---------: | :-----------: | :-------------------------: | +| bs1 | 10.08fps | 396.46ms | 396.46ms | +| bs16 | 17.6fps | 3635.25ms | 3635.25ms | + + diff --git a/ACL_PyTorch/contrib/cv/classfication/volo/volo.py b/ACL_PyTorch/contrib/cv/classfication/volo/volo.py old mode 100755 new mode 100644 diff --git a/ACL_PyTorch/contrib/cv/classfication/volo/volo_postprocess.py b/ACL_PyTorch/contrib/cv/classfication/volo/volo_postprocess.py old mode 100755 new mode 100644 index 1eae6503ed93c2409c7282edbae319ac33616c10..39509905fffd4b5388199d249ba74f89178de909 --- a/ACL_PyTorch/contrib/cv/classfication/volo/volo_postprocess.py +++ b/ACL_PyTorch/contrib/cv/classfication/volo/volo_postprocess.py @@ -1,83 +1,83 @@ -import os -import numpy as np -import argparse - -def read_txt_data(path): - line = "" - with open(path, 'r') as f: - line = f.read() - if line != "": - return np.array([float(s) for s in line.split(" ") if s != "" and s != "\n"]) - return None - -def read_label(path, bs): - with open(path, 'r') as f: - content = f.read() - lines = [line for line in content.split('\n')] - if lines[-1] == "": - lines = lines[:-1] - if bs == 1: - labels = [int(line.split(' ')[-2]) for line in lines] - labels = np.array(labels) - labels = np.expand_dims(labels, 1) - return labels - else: - total_label = np.zeros((len(files) * bs)) - base = 0 - for line in lines: - labels = line.split(' ')[1:-1] - labels = [int(label) for label in labels] - for i in range(len(labels)): - total_label[base * bs + i] = labels[i] - base = base + 1 - total_label = np.expand_dims(total_label, 1) - return total_label - -def get_topK(files, topk, bs): - if bs == 1: - matrix = np.zeros((len(files), topk)) - else: - matrix = np.zeros((len(files) * bs, topk)) - for file in files: - data = read_txt_data(root + file) - if bs == 1: - line = np.argsort(data)[-topk:][::-1] - index = int(file.split('_')[1]) - matrix[index-1, :] = line[:topk] - else: - base_index = int(file.split('_')[1]) - newdata = data.reshape(bs, 1000) - for i in range(bs): - line = np.argsort(newdata[i,:])[-topk:][::-1] - matrix[base_index * bs + i, :] = line[:topk] - return matrix.astype(np.int64) - -def get_topK_acc(matrix, labels, k): - matrix_tmp = matrix[:, :k] - match_array = np.logical_or.reduce(matrix_tmp==labels, axis=1) - topk_acc = match_array.sum() / match_array.shape[0] - return topk_acc - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='VOLO validation') - parser.add_argument('--batchsize', type=int, default='1', - help='batchsize.') - parser.add_argument('--result', type=str, default='./', - help='output dir of msame') - parser.add_argument('--label', type=str, default='./volo_val_bs1.txt', - help='label txt dir') - args = parser.parse_args() - root = args.result - bs = args.batchsize - label_dir = args.label - files = None - if os.path.exists(root): - files=os.listdir(root) - else: - print('this path not exist') - exit(0) - matrix = get_topK(files, 6, bs) - labels = read_label(label_dir, bs) - for i in range(1, 6): - acc = get_topK_acc(matrix, labels, i) - print("acc@top{}: {:.3f}%".format(i, 100*acc)) +import os +import numpy as np +import argparse + +def read_txt_data(path): + line = "" + with open(path, 'r') as f: + line = f.read() + if line != "": + return np.array([float(s) for s in line.split(" ") if s != "" and s != "\n"]) + return None + +def read_label(path, bs): + with open(path, 'r') as f: + content = f.read() + lines = [line for line in content.split('\n')] + if lines[-1] == "": + lines = lines[:-1] + if bs == 1: + labels = [int(line.split(' ')[-2]) for line in lines] + labels = np.array(labels) + labels = np.expand_dims(labels, 1) + return labels + else: + total_label = np.zeros((len(files) * bs)) + base = 0 + for line in lines: + labels = line.split(' ')[1:-1] + labels = [int(label) for label in labels] + for i in range(len(labels)): + total_label[base * bs + i] = labels[i] + base = base + 1 + total_label = np.expand_dims(total_label, 1) + return total_label + +def get_topK(files, topk, bs): + if bs == 1: + matrix = np.zeros((len(files), topk)) + else: + matrix = np.zeros((len(files) * bs, topk)) + for file in files: + data = read_txt_data(root + file) + if bs == 1: + line = np.argsort(data)[-topk:][::-1] + index = int(file.split('_')[1]) + matrix[index-1, :] = line[:topk] + else: + base_index = int(file.split('_')[1]) + newdata = data.reshape(bs, 1000) + for i in range(bs): + line = np.argsort(newdata[i,:])[-topk:][::-1] + matrix[base_index * bs + i, :] = line[:topk] + return matrix.astype(np.int64) + +def get_topK_acc(matrix, labels, k): + matrix_tmp = matrix[:, :k] + match_array = np.logical_or.reduce(matrix_tmp==labels, axis=1) + topk_acc = match_array.sum() / match_array.shape[0] + return topk_acc + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='VOLO validation') + parser.add_argument('--batchsize', type=int, default='1', + help='batchsize.') + parser.add_argument('--result', type=str, default='./', + help='output dir of msame') + parser.add_argument('--label', type=str, default='./volo_val_bs1.txt', + help='label txt dir') + args = parser.parse_args() + root = args.result + bs = args.batchsize + label_dir = args.label + files = None + if os.path.exists(root): + files=os.listdir(root) + else: + print('this path not exist') + exit(0) + matrix = get_topK(files, 6, bs) + labels = read_label(label_dir, bs) + for i in range(1, 6): + acc = get_topK_acc(matrix, labels, i) + print("acc@top{}: {:.3f}%".format(i, 100*acc)) diff --git a/ACL_PyTorch/contrib/cv/classfication/volo/volo_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/volo/volo_preprocess.py old mode 100755 new mode 100644 diff --git a/ACL_PyTorch/contrib/cv/classfication/volo/volo_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/volo/volo_pth2onnx.py old mode 100755 new mode 100644 diff --git a/ACL_PyTorch/contrib/cv/classfication/vovnet39/LICENSE b/ACL_PyTorch/contrib/cv/classfication/vovnet39/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/classfication/vovnet39/LICENSE +++ b/ACL_PyTorch/contrib/cv/classfication/vovnet39/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/vovnet39/README.md b/ACL_PyTorch/contrib/cv/classfication/vovnet39/README.md index 5211e9cebaba06c706dc5e35c79b701097bab276..ed1c41a8080623c253bec528068be62593ef9417 100644 --- a/ACL_PyTorch/contrib/cv/classfication/vovnet39/README.md +++ b/ACL_PyTorch/contrib/cv/classfication/vovnet39/README.md @@ -1,255 +1,255 @@ -vovnet39 Onnx模型端到端推理指导 - -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) - - [6.2 开源TopN精度](#62-开源TopN精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[vovnet39论文](https://arxiv.org/abs/1904.09730) - -### 1.2 代码地址 -[vovnet39代码](https://github.com/AlexanderBurkhart/cnn_train/tree/505637bcd08021e144c94e81401af6bc71fd46c6/VoVNet.pytorch/models_vovnet) -branch:master -commit_id:505637bcd08021e144c94e81401af6bc71fd46c6 - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -CANN 5.0.1 -pytorch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.7.0 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.18.5 -Pillow == 7.2.0 -opencv-python == 4.5.1.48 -``` - -**说明:** -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 - -1.下载pth权重文件 -[vovnet39预训练pth权重文件](https://www.dropbox.com/s/1lnzsgnixd8gjra/vovnet39_torchvision.pth) -文件md5sum: 23717a6cadd9729a704f894381444237 - -``` -http://www.dropbox.com/s/1lnzsgnixd8gjra/vovnet39_torchvision.pth -``` -2.vovnet39模型代码在代码仓里 - -``` -github上vovnet39没有安装脚本,在pth2onnx脚本中引用代码仓定义的vovnet39: - -git clone https://github.com/AlexanderBurkhart/cnn_train.git -``` - -3.编写pth2onnx脚本vovnet39_pth2onnx.py - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - -4.执行pth2onnx脚本,生成onnx模型文件 -``` -python3.7 vovnet39_pth2onnx.py vovnet39_torchvision.pth vovnet39.onnx -``` - **模型转换要点:** ->此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 -### 3.2 onnx转om模型 - -1.设置环境变量 -``` -source env.sh -``` -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 -``` -atc --framework=5 --model=./vovnet39.onnx --input_format=NCHW --input_shape="image:1,3,224,224" --output=vovnet39_bs1 --log=debug --soc_version=Ascend310 -``` - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/opt/npu/imagenet/val与/root/datasets/imagenet/val_label.txt。 - -### 4.2 数据集预处理 -1.预处理脚本vovnet_torch_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 -``` -python3.7 vovnet_torch_preprocess.py /opt/npu/imagenet/val ./prep_dataset -``` -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本get_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 - -``` -python3.7 get_info.py bin ./prep_dataset ./vovnet_prep_bin.info 224 224 -``` -第一个参数为生成的数据集文件格式,第二个为预处理后的数据文件路径,第三个参数为生成的数据集文件保存的路径,第四个和第五个参数分别为模型输入的宽度和高度 -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.1 推理benchmark工具用户指南 01 -### 5.2 离线推理 -1.设置环境变量 -``` -source env.sh -``` -2.执行离线推理 - -``` -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=vovnet39_bs1.om -input_text_path=./vovnet_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False -``` -输出结果默认保存在当前目录result/dumpOutput_devicex,模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 - -## 6 精度对比 - -- **[离线推理TopN精度](#61-离线推理TopN精度)** -- **[开源TopN精度](#62-开源TopN精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理TopN精度统计 - -后处理统计TopN精度 - -调用vision_metric_ImageNet.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 -``` -python3.7 vision_metric_ImageNet.py result/dumpOutput_device0/ /opt/npu/imagenet/val_label.txt ./ result.json -``` -第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 -查看输出结果: -``` -{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value -": "76.78%"}, {"key": "Top2 accuracy", "value": "86.6%"}, {"key": "Top3 accuracy", "value": "90.23%"}, {"key": "Top4 accuracy", "value": "92.22%"}, {"key": "Top5 accuracy", "value": "93.43%"}]} -``` -经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 - -### 6.2 开源TopN精度 -[开源代码仓精度](https://github.com/AlexanderBurkhart/cnn_train/tree/505637bcd08021e144c94e81401af6bc71fd46c6/VoVNet.pytorch) - -``` -Model Acc@1 Acc@5 -vovnet39 76.77 93.43 -``` -### 6.3 精度对比 -将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - **精度调试:** ->没有遇到精度不达标的问题,故不需要进行精度调试 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** - -### 7.1 npu性能数据 -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据,也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 -1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: -``` -[e2e] throughputRate: 190.286, latency: 262762 -[data read] throughputRate: 201.42, moduleLatency: 4.96474 -[preprocess] throughputRate: 201.059, moduleLatency: 4.97367 -[infer] throughputRate: 190.829, Interface throughputRate: 281.608, moduleLatency: 4.54289 -[post] throughputRate: 190.829, moduleLatency: 5.2403 -``` -Interface throughputRate: 281.608,281.608x4=1126.432既是batch1 310单卡吞吐率 -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: - -``` -[e2e] throughputRate: 132.399, latency: 377646 -[data read] throughputRate: 132.922, moduleLatency: 7.5232 -[preprocess] throughputRate: 132.751, moduleLatency: 7.53292 -[infer] throughputRate: 132.746, Interface throughputRate: 382.566, moduleLatency: 4.0689 -[post] throughputRate: 8.2965, moduleLatency: 120.533 -``` -Interface throughputRate: 382.566,382.566x4=1530.264既是batch16 310单卡吞吐率 -batch4性能: - -``` -[e2e] throughputRate: 194.167, latency: 257510 -[data read] throughputRate: 206.005, moduleLatency: 4.85426 -[preprocess] throughputRate: 205.807, moduleLatency: 4.85891 -[infer] throughputRate: 194.685, Interface throughputRate: 354.792, moduleLatency: 4.2876 -[post] throughputRate: 48.6709, moduleLatency: 20.5462 -``` -batch4 310单卡吞吐率:354.792x4=1419.168fps -batch8性能: - -``` -[e2e] throughputRate: 143.83, latency: 347633 -[data read] throughputRate: 144.354, moduleLatency: 6.92743 -[preprocess] throughputRate: 144.186, moduleLatency: 6.93549 -[infer] throughputRate: 144.246, Interface throughputRate: 364.129, moduleLatency: 4.20264 -[post] throughputRate: 18.0306, moduleLatency: 55.4613 -``` -batch8 310单卡吞吐率:364.129x4=1456.516fps -batch32性能: - -``` -[e2e] throughputRate: 122.051, latency: 409664 -[data read] throughputRate: 122.581, moduleLatency: 8.15788 -[preprocess] throughputRate: 122.445, moduleLatency: 8.16691 -[infer] throughputRate: 122.448, Interface throughputRate: 382.396, moduleLatency: 4.07549 -[post] throughputRate: 3.82767, moduleLatency: 261.256 -``` -batch32 310单卡吞吐率:382.396x4=1529.584fps - - **性能优化:** - +vovnet39 Onnx模型端到端推理指导 + +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) + - [6.2 开源TopN精度](#62-开源TopN精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[vovnet39论文](https://arxiv.org/abs/1904.09730) + +### 1.2 代码地址 +[vovnet39代码](https://github.com/AlexanderBurkhart/cnn_train/tree/505637bcd08021e144c94e81401af6bc71fd46c6/VoVNet.pytorch/models_vovnet) +branch:master +commit_id:505637bcd08021e144c94e81401af6bc71fd46c6 + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +CANN 5.0.1 +pytorch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.7.0 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.18.5 +Pillow == 7.2.0 +opencv-python == 4.5.1.48 +``` + +**说明:** +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 + +1.下载pth权重文件 +[vovnet39预训练pth权重文件](https://www.dropbox.com/s/1lnzsgnixd8gjra/vovnet39_torchvision.pth) +文件md5sum: 23717a6cadd9729a704f894381444237 + +``` +http://www.dropbox.com/s/1lnzsgnixd8gjra/vovnet39_torchvision.pth +``` +2.vovnet39模型代码在代码仓里 + +``` +github上vovnet39没有安装脚本,在pth2onnx脚本中引用代码仓定义的vovnet39: + +git clone https://github.com/AlexanderBurkhart/cnn_train.git +``` + +3.编写pth2onnx脚本vovnet39_pth2onnx.py + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + +4.执行pth2onnx脚本,生成onnx模型文件 +``` +python3.7 vovnet39_pth2onnx.py vovnet39_torchvision.pth vovnet39.onnx +``` + **模型转换要点:** +>此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 +### 3.2 onnx转om模型 + +1.设置环境变量 +``` +source env.sh +``` +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 +``` +atc --framework=5 --model=./vovnet39.onnx --input_format=NCHW --input_shape="image:1,3,224,224" --output=vovnet39_bs1 --log=debug --soc_version=Ascend310 +``` + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +该模型使用[ImageNet官网](http://www.image-net.org)的5万张验证集进行测试,图片与标签分别存放在/opt/npu/imagenet/val与/root/datasets/imagenet/val_label.txt。 + +### 4.2 数据集预处理 +1.预处理脚本vovnet_torch_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 +``` +python3.7 vovnet_torch_preprocess.py /opt/npu/imagenet/val ./prep_dataset +``` +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本get_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 + +``` +python3.7 get_info.py bin ./prep_dataset ./vovnet_prep_bin.info 224 224 +``` +第一个参数为生成的数据集文件格式,第二个为预处理后的数据文件路径,第三个参数为生成的数据集文件保存的路径,第四个和第五个参数分别为模型输入的宽度和高度 +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.1 推理benchmark工具用户指南 01 +### 5.2 离线推理 +1.设置环境变量 +``` +source env.sh +``` +2.执行离线推理 + +``` +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=vovnet39_bs1.om -input_text_path=./vovnet_prep_bin.info -input_width=224 -input_height=224 -output_binary=False -useDvpp=False +``` +输出结果默认保存在当前目录result/dumpOutput_devicex,模型只有一个名为class的输出,shape为bs * 1000,数据类型为FP32,对应1000个分类的预测结果,每个输入对应的输出对应一个_x.bin文件。 + +## 6 精度对比 + +- **[离线推理TopN精度](#61-离线推理TopN精度)** +- **[开源TopN精度](#62-开源TopN精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理TopN精度统计 + +后处理统计TopN精度 + +调用vision_metric_ImageNet.py脚本推理结果与label比对,可以获得Accuracy Top5数据,结果保存在result.json中。 +``` +python3.7 vision_metric_ImageNet.py result/dumpOutput_device0/ /opt/npu/imagenet/val_label.txt ./ result.json +``` +第一个为benchmark输出目录,第二个为数据集配套标签,第三个是生成文件的保存目录,第四个是生成的文件名。 +查看输出结果: +``` +{"title": "Overall statistical evaluation", "value": [{"key": "Number of images", "value": "50000"}, {"key": "Number of classes", "value": "1000"}, {"key": "Top1 accuracy", "value +": "76.78%"}, {"key": "Top2 accuracy", "value": "86.6%"}, {"key": "Top3 accuracy", "value": "90.23%"}, {"key": "Top4 accuracy", "value": "92.22%"}, {"key": "Top5 accuracy", "value": "93.43%"}]} +``` +经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 + +### 6.2 开源TopN精度 +[开源代码仓精度](https://github.com/AlexanderBurkhart/cnn_train/tree/505637bcd08021e144c94e81401af6bc71fd46c6/VoVNet.pytorch) + +``` +Model Acc@1 Acc@5 +vovnet39 76.77 93.43 +``` +### 6.3 精度对比 +将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + **精度调试:** +>没有遇到精度不达标的问题,故不需要进行精度调试 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** + +### 7.1 npu性能数据 +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据,也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 +1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: +``` +[e2e] throughputRate: 190.286, latency: 262762 +[data read] throughputRate: 201.42, moduleLatency: 4.96474 +[preprocess] throughputRate: 201.059, moduleLatency: 4.97367 +[infer] throughputRate: 190.829, Interface throughputRate: 281.608, moduleLatency: 4.54289 +[post] throughputRate: 190.829, moduleLatency: 5.2403 +``` +Interface throughputRate: 281.608,281.608x4=1126.432既是batch1 310单卡吞吐率 +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: + +``` +[e2e] throughputRate: 132.399, latency: 377646 +[data read] throughputRate: 132.922, moduleLatency: 7.5232 +[preprocess] throughputRate: 132.751, moduleLatency: 7.53292 +[infer] throughputRate: 132.746, Interface throughputRate: 382.566, moduleLatency: 4.0689 +[post] throughputRate: 8.2965, moduleLatency: 120.533 +``` +Interface throughputRate: 382.566,382.566x4=1530.264既是batch16 310单卡吞吐率 +batch4性能: + +``` +[e2e] throughputRate: 194.167, latency: 257510 +[data read] throughputRate: 206.005, moduleLatency: 4.85426 +[preprocess] throughputRate: 205.807, moduleLatency: 4.85891 +[infer] throughputRate: 194.685, Interface throughputRate: 354.792, moduleLatency: 4.2876 +[post] throughputRate: 48.6709, moduleLatency: 20.5462 +``` +batch4 310单卡吞吐率:354.792x4=1419.168fps +batch8性能: + +``` +[e2e] throughputRate: 143.83, latency: 347633 +[data read] throughputRate: 144.354, moduleLatency: 6.92743 +[preprocess] throughputRate: 144.186, moduleLatency: 6.93549 +[infer] throughputRate: 144.246, Interface throughputRate: 364.129, moduleLatency: 4.20264 +[post] throughputRate: 18.0306, moduleLatency: 55.4613 +``` +batch8 310单卡吞吐率:364.129x4=1456.516fps +batch32性能: + +``` +[e2e] throughputRate: 122.051, latency: 409664 +[data read] throughputRate: 122.581, moduleLatency: 8.15788 +[preprocess] throughputRate: 122.445, moduleLatency: 8.16691 +[infer] throughputRate: 122.448, Interface throughputRate: 382.396, moduleLatency: 4.07549 +[post] throughputRate: 3.82767, moduleLatency: 261.256 +``` +batch32 310单卡吞吐率:382.396x4=1529.584fps + + **性能优化:** + >没有遇到性能不达标的问题,故不需要进行性能优化 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/vovnet39/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/vovnet39/requirements.txt index 19bf0c33da4a5c3990a53fb79d681b2804a6df47..59f8a711bf24972d8af91f0ba668c3fc5d91a1bb 100644 --- a/ACL_PyTorch/contrib/cv/classfication/vovnet39/requirements.txt +++ b/ACL_PyTorch/contrib/cv/classfication/vovnet39/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.7.0 -numpy == 1.18.5 -Pillow == 7.2.0 +torch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.7.0 +numpy == 1.18.5 +Pillow == 7.2.0 opencv-python == 4.5.1.48 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/vovnet39/test/readme.md b/ACL_PyTorch/contrib/cv/classfication/vovnet39/test/readme.md index bb87e712e838ab85d566959c9ebcf9c615a2009f..9237a2ae4b6f047fde9be01913d14b2242df470e 100644 --- a/ACL_PyTorch/contrib/cv/classfication/vovnet39/test/readme.md +++ b/ACL_PyTorch/contrib/cv/classfication/vovnet39/test/readme.md @@ -1,24 +1,24 @@ -环境准备: - -1.数据集路径 -通用的数据集统一放在/root/datasets/或/opt/npu/ -本模型数据集放在/opt/npu/ - -2.进入工作目录 -cd vovnet39 - -3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -pip3.7 install -r requirements.txt - -4.获取模型代码 -git clone https://github.com/AlexanderBurkhart/cnn_train.git - -6.获取权重文件 -wget http://www.dropbox.com/s/1lnzsgnixd8gjra/vovnet39_torchvision.pth - -7.获取benchmark工具 -将benchmark.x86_64 benchmark.aarch64放在当前目录 - -8.310上执行,执行时确保device空闲 -bash test/pth2om.sh -bash test/eval_acc_perf.sh +环境准备: + +1.数据集路径 +通用的数据集统一放在/root/datasets/或/opt/npu/ +本模型数据集放在/opt/npu/ + +2.进入工作目录 +cd vovnet39 + +3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +pip3.7 install -r requirements.txt + +4.获取模型代码 +git clone https://github.com/AlexanderBurkhart/cnn_train.git + +6.获取权重文件 +wget http://www.dropbox.com/s/1lnzsgnixd8gjra/vovnet39_torchvision.pth + +7.获取benchmark工具 +将benchmark.x86_64 benchmark.aarch64放在当前目录 + +8.310上执行,执行时确保device空闲 +bash test/pth2om.sh +bash test/eval_acc_perf.sh diff --git a/ACL_PyTorch/contrib/cv/classfication/vovnet39/vovnet39_pth2onnx.py b/ACL_PyTorch/contrib/cv/classfication/vovnet39/vovnet39_pth2onnx.py index cf2e2190b3aecc2a357300cbc3a24cc85a63ce56..b8fbbf27a2aa9586b4cb943280ce70764eb77259 100644 --- a/ACL_PyTorch/contrib/cv/classfication/vovnet39/vovnet39_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/classfication/vovnet39/vovnet39_pth2onnx.py @@ -1,36 +1,36 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import sys -import torch.onnx -sys.path.append(r"./cnn_train/VoVNet.pytorch") -from collections import OrderedDict -import models_vovnet - -def pth2onnx(input_file, output_file): - model = models_vovnet.vovnet39(pretrained=False) - device_ids = [0] - model = torch.nn.DataParallel(model , device_ids=device_ids) - checkpoint = torch.load(input_file, map_location='cpu') - model.load_state_dict(checkpoint) - model=model.module - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.rand(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, opset_version=11, verbose=True) - -if __name__=="__main__": - pth2onnx(sys.argv[1],sys.argv[2]) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import sys +import torch.onnx +sys.path.append(r"./cnn_train/VoVNet.pytorch") +from collections import OrderedDict +import models_vovnet + +def pth2onnx(input_file, output_file): + model = models_vovnet.vovnet39(pretrained=False) + device_ids = [0] + model = torch.nn.DataParallel(model , device_ids=device_ids) + checkpoint = torch.load(input_file, map_location='cpu') + model.load_state_dict(checkpoint) + model=model.module + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.rand(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, opset_version=11, verbose=True) + +if __name__=="__main__": + pth2onnx(sys.argv[1],sys.argv[2]) diff --git a/ACL_PyTorch/contrib/cv/classfication/xcit/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/classfication/xcit/modelzoo_level.txt index 624f885ed6c21f5d36232fec82f622547cfc6470..2e7cc57906a28cb6152073257cc7b211d6979f7d 100644 --- a/ACL_PyTorch/contrib/cv/classfication/xcit/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/classfication/xcit/modelzoo_level.txt @@ -1,6 +1,6 @@ -FuncStatus:OK -PrecisionStatus:OK -ModelConvert:OK -PerfStatus=NOK -AutoTune:POK - +FuncStatus:OK +PrecisionStatus:OK +ModelConvert:OK +PerfStatus=NOK +AutoTune:POK + diff --git a/ACL_PyTorch/contrib/cv/detection/3DUnet/LISCENSE b/ACL_PyTorch/contrib/cv/detection/3DUnet/LISCENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/detection/3DUnet/LISCENSE +++ b/ACL_PyTorch/contrib/cv/detection/3DUnet/LISCENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/3DUnet/UNet3D_pth2onnx.py b/ACL_PyTorch/contrib/cv/detection/3DUnet/UNet3D_pth2onnx.py index ca9db19e96e4c6ab6731b60260ad32dd2c1ccffc..5a18441c828c58fb92e8efab7a2b102daf654600 100644 --- a/ACL_PyTorch/contrib/cv/detection/3DUnet/UNet3D_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/3DUnet/UNet3D_pth2onnx.py @@ -1,49 +1,49 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import torch -import lib.medzoo as medzoo - -def pth2onnx(): - args = get_arguments() - input_file = args.input - output_file = args.output - model, optimizer = medzoo.create_model(args) - checkpoint = torch.load(input_file, map_location="cpu") - model.load_state_dict(checkpoint, False) - - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(1, 4, 64, 64, 64) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=False, - opset_version=11) - - -def get_arguments(): - parser = argparse.ArgumentParser() - parser.add_argument('--classes', type=int, default=4) - parser.add_argument('--inChannels', type=int, default=4) - parser.add_argument('--model', type=str, default='UNET3D', - choices=('VNET', 'VNET2', 'UNET3D', 'DENSENET1', 'DENSENET2', 'DENSENET3', 'HYPERDENSENET')) - parser.add_argument('--input', type=str, default='none') - parser.add_argument('--output', type=str, default='none') - args = parser.parse_args() - return args - - -if __name__ == '__main__': +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import torch +import lib.medzoo as medzoo + +def pth2onnx(): + args = get_arguments() + input_file = args.input + output_file = args.output + model, optimizer = medzoo.create_model(args) + checkpoint = torch.load(input_file, map_location="cpu") + model.load_state_dict(checkpoint, False) + + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(1, 4, 64, 64, 64) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=False, + opset_version=11) + + +def get_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument('--classes', type=int, default=4) + parser.add_argument('--inChannels', type=int, default=4) + parser.add_argument('--model', type=str, default='UNET3D', + choices=('VNET', 'VNET2', 'UNET3D', 'DENSENET1', 'DENSENET2', 'DENSENET3', 'HYPERDENSENET')) + parser.add_argument('--input', type=str, default='none') + parser.add_argument('--output', type=str, default='none') + args = parser.parse_args() + return args + + +if __name__ == '__main__': pth2onnx() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/3DUnet/modify.py b/ACL_PyTorch/contrib/cv/detection/3DUnet/modify.py index ffac9320981c3933b4187b8e58629a9152768c86..75d5144dc2e8598dcb71a87417b4890780d06c1b 100644 --- a/ACL_PyTorch/contrib/cv/detection/3DUnet/modify.py +++ b/ACL_PyTorch/contrib/cv/detection/3DUnet/modify.py @@ -1,113 +1,113 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -from magiconnx import OnnxGraph -import sys - -def modify(path, batchsize): - graph = OnnxGraph(path) - resizes = graph.get_nodes("Resize") - #1 128 4 4 4 -> 1 128*4 4 4 -> 1 128*4 8 8 -> 1 128 4 8*8 -> 1 128 8 8*8 -> 1 128 8 8 8 - shapes1 = [[[1, 128*4, 4, 4], [1,1,2,2], [1, 128, 4, 8*8], [1, 128, 8, 8*8], [1, 128, 8, 8, 8]], - [[1, 64*8, 8, 8], [1,1,2,2], [1, 64, 8, 16*16], [1, 64, 16, 16*16], [1, 64, 16, 16, 16]], - [[1, 32*16, 16, 16], [1,1,2,2], [1, 32, 16, 32*32], [1, 32, 32, 32*32], [1, 32, 32, 32, 32]], - [[1, 16*32, 32, 32], [1,1,2,2], [1, 16, 32, 64*64], [1, 16, 64, 64*64], [1, 16, 64, 64, 64]], - [[1, 4*16, 16, 16], [1,1,2,2], [1, 4, 16, 32*32], [1, 4, 32, 32*32], [1, 4, 32, 32, 32]], - [[1, 4*32, 32, 32], [1,1,2,2], [1, 4, 32, 64*64], [1, 4, 64, 64*64], [1, 4, 64, 64, 64]]] - - #4 128 4 4 4 - shapes4 = [[[4, 128*4, 4, 4], [1,1,2,2], [4, 128, 4, 8*8], [4, 128, 8, 8*8], [4, 128, 8, 8, 8]], - [[4, 64*8, 8, 8], [1,1,2,2], [4, 64, 8, 16*16], [4, 64, 16, 16*16], [4, 64, 16, 16, 16]], - [[4, 32*16, 16, 16], [1,1,2,2], [4, 32, 16, 32*32], [4, 32, 32, 32*32], [4, 32, 32, 32, 32]], - [[4, 16*32, 32, 32], [1,1,2,2], [4, 16, 32, 64*64], [4, 16, 64, 64*64], [4, 16, 64, 64, 64]], - [[4, 4*16, 16, 16], [1,1,2,2], [4, 4, 16, 32*32], [4, 4, 32, 32*32], [4, 4, 32, 32, 32]], - [[4, 4*32, 32, 32], [1,1,2,2], [4, 4, 32, 64*64], [4, 4, 64, 64*64], [4, 4, 64, 64, 64]]] - - #8 128 4 4 4 -> 8 128*4 4 4 -> 8 128*4 8 8 -> 8 128 4 8*8 -> 8 128 8 8*8 -> 8 128 8 8 8 - shapes8 = [[[8, 128*4, 4, 4], [1,1,2,2], [8, 128, 4, 8*8], [8, 128, 8, 8*8], [8, 128, 8, 8, 8]], - [[8, 64*8, 8, 8], [1,1,2,2], [8, 64, 8, 16*16], [8, 64, 16, 16*16], [8, 64, 16, 16, 16]], - [[8, 32*16, 16, 16], [1,1,2,2], [8, 32, 16, 32*32], [8, 32, 32, 32*32], [8, 32, 32, 32, 32]], - [[8, 16*32, 32, 32], [1,1,2,2], [8, 16, 32, 64*64], [8, 16, 64, 64*64], [8, 16, 64, 64, 64]], - [[8, 4*16, 16, 16], [1,1,2,2], [8, 4, 16, 32*32], [8, 4, 32, 32*32], [8, 4, 32, 32, 32]], - [[8, 4*32, 32, 32], [1,1,2,2], [8, 4, 32, 64*64], [8, 4, 64, 64*64], [8, 4, 64, 64, 64]]] - - #16 128 4 4 4 -> 16 128*4 4 4 -> 16 128*4 8 8 -> 16 128 4 8*8 -> 16 128 8 8*8 -> 16 128 8 8 8 - shapes16 = [[[16, 128*4, 4, 4], [1,1,2,2], [16, 128, 4, 8*8], [16, 128, 8, 8*8], [16, 128, 8, 8, 8]], - [[16, 64*8, 8, 8], [1,1,2,2], [16, 64, 8, 16*16], [16, 64, 16, 16*16], [16, 64, 16, 16, 16]], - [[16, 32*16, 16, 16], [1,1,2,2], [16, 32, 16, 32*32], [16, 32, 32, 32*32], [16, 32, 32, 32, 32]], - [[16, 16*32, 32, 32], [1,1,2,2], [16, 16, 32, 64*64], [16, 16, 64, 64*64], [16, 16, 64, 64, 64]], - [[16, 4*16, 16, 16], [1,1,2,2], [16, 4, 16, 32*32], [16, 4, 32, 32*32], [16, 4, 32, 32, 32]], - [[16, 4*32, 32, 32], [1,1,2,2], [16, 4, 32, 64*64], [16, 4, 64, 64*64], [16, 4, 64, 64, 64]]] - - #32 128 4 4 4 -> 32 128*4 4 4 -> 32 128*4 8 8 -> 32 128 4 8*8 -> 32 128 8 8*8 -> 32 128 8 8 8 - shapes32 = [[[32, 128*4, 4, 4], [1,1,2,2], [32, 128, 4, 8*8], [32, 128, 8, 8*8], [32, 128, 8, 8, 8]], - [[32, 64*8, 8, 8], [1,1,2,2], [32, 64, 8, 16*16], [32, 64, 16, 16*16], [32, 64, 16, 16, 16]], - [[32, 32*16, 16, 16], [1,1,2,2], [32, 32, 16, 32*32], [32, 32, 32, 32*32], [32, 32, 32, 32, 32]], - [[32, 16*32, 32, 32], [1,1,2,2], [32, 16, 32, 64*64], [32, 16, 64, 64*64], [32, 16, 64, 64, 64]], - [[32, 4*16, 16, 16], [1,1,2,2], [32, 4, 16, 32*32], [32, 4, 32, 32*32], [32, 4, 32, 32, 32]], - [[32, 4*32, 32, 32], [1,1,2,2], [32, 4, 32, 64*64], [32, 4, 64, 64*64], [32, 4, 64, 64, 64]]] - - if batchsize == "1": - shapes = shapes1 - - elif batchsize == "4": - shapes = shapes4 - - elif batchsize == "8": - shapes = shapes8 - - elif batchsize == "16": - shapes = shapes16 - - elif batchsize == "32": - shapes = shapes32 - else: - print("batchsize输入错误") - - for idx, node in enumerate(resizes): - print("idx: node.name", idx, node.name) - reshape1 = graph.add_node(f'Reshape_{node.name}', 'Reshape') - graph.add_initializer(f'shape_{node.name}', np.array(shapes[idx][0])) - reshape1.inputs = [node.inputs[0], f'shape_{node.name}'] - reshape1.outputs = [f'Reshape_{node.name}'] - - graph[node.inputs[-1]].value = np.array(shapes[idx][1]).astype(np.float32) - out_name = node.outputs[0] - node.set_input(0, f'Reshape_{node.name}') - node.set_output(0, f'{node.name}_reshape') - - reshape2 = graph.add_node(f'Reshape2_{node.name}', 'Reshape') - graph.add_initializer(f'shape2_{node.name}', np.array(shapes[idx][2])) - reshape2.inputs = [f'{node.name}_reshape', f'shape2_{node.name}'] - reshape2.outputs = [f'Reshape2_{node.name}_out'] - - resize2 = graph.add_node(f'Resize2_{node.name}', 'Resize') - graph.add_initializer(f'size_{node.name}', np.array(shapes[idx][3])) - resize2.inputs = [f'Reshape2_{node.name}_out', node.inputs[1], node.inputs[1], f'size_{node.name}'] - resize2.outputs = [f'Resize2_{node.name}'] - - reshape3 = graph.add_node(f'Reshape3_{node.name}', 'Reshape') - graph.add_initializer(f'shape3_{node.name}', np.array(shapes[idx][4])) - reshape3.inputs = [f'Resize2_{node.name}', f'shape3_{node.name}'] - reshape3.outputs = [out_name] - - graph.save(output_file) - -if __name__ == "__main__": - #input_file是输入的简化后的onnx路径,output_file是输出的onnx名称,batchsize是要转的onnx对应的batchsize大小 - input_file = sys.argv[1] - output_file = sys.argv[2] - batch = sys.argv[3] - modify(input_file, batch) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from magiconnx import OnnxGraph +import sys + +def modify(path, batchsize): + graph = OnnxGraph(path) + resizes = graph.get_nodes("Resize") + #1 128 4 4 4 -> 1 128*4 4 4 -> 1 128*4 8 8 -> 1 128 4 8*8 -> 1 128 8 8*8 -> 1 128 8 8 8 + shapes1 = [[[1, 128*4, 4, 4], [1,1,2,2], [1, 128, 4, 8*8], [1, 128, 8, 8*8], [1, 128, 8, 8, 8]], + [[1, 64*8, 8, 8], [1,1,2,2], [1, 64, 8, 16*16], [1, 64, 16, 16*16], [1, 64, 16, 16, 16]], + [[1, 32*16, 16, 16], [1,1,2,2], [1, 32, 16, 32*32], [1, 32, 32, 32*32], [1, 32, 32, 32, 32]], + [[1, 16*32, 32, 32], [1,1,2,2], [1, 16, 32, 64*64], [1, 16, 64, 64*64], [1, 16, 64, 64, 64]], + [[1, 4*16, 16, 16], [1,1,2,2], [1, 4, 16, 32*32], [1, 4, 32, 32*32], [1, 4, 32, 32, 32]], + [[1, 4*32, 32, 32], [1,1,2,2], [1, 4, 32, 64*64], [1, 4, 64, 64*64], [1, 4, 64, 64, 64]]] + + #4 128 4 4 4 + shapes4 = [[[4, 128*4, 4, 4], [1,1,2,2], [4, 128, 4, 8*8], [4, 128, 8, 8*8], [4, 128, 8, 8, 8]], + [[4, 64*8, 8, 8], [1,1,2,2], [4, 64, 8, 16*16], [4, 64, 16, 16*16], [4, 64, 16, 16, 16]], + [[4, 32*16, 16, 16], [1,1,2,2], [4, 32, 16, 32*32], [4, 32, 32, 32*32], [4, 32, 32, 32, 32]], + [[4, 16*32, 32, 32], [1,1,2,2], [4, 16, 32, 64*64], [4, 16, 64, 64*64], [4, 16, 64, 64, 64]], + [[4, 4*16, 16, 16], [1,1,2,2], [4, 4, 16, 32*32], [4, 4, 32, 32*32], [4, 4, 32, 32, 32]], + [[4, 4*32, 32, 32], [1,1,2,2], [4, 4, 32, 64*64], [4, 4, 64, 64*64], [4, 4, 64, 64, 64]]] + + #8 128 4 4 4 -> 8 128*4 4 4 -> 8 128*4 8 8 -> 8 128 4 8*8 -> 8 128 8 8*8 -> 8 128 8 8 8 + shapes8 = [[[8, 128*4, 4, 4], [1,1,2,2], [8, 128, 4, 8*8], [8, 128, 8, 8*8], [8, 128, 8, 8, 8]], + [[8, 64*8, 8, 8], [1,1,2,2], [8, 64, 8, 16*16], [8, 64, 16, 16*16], [8, 64, 16, 16, 16]], + [[8, 32*16, 16, 16], [1,1,2,2], [8, 32, 16, 32*32], [8, 32, 32, 32*32], [8, 32, 32, 32, 32]], + [[8, 16*32, 32, 32], [1,1,2,2], [8, 16, 32, 64*64], [8, 16, 64, 64*64], [8, 16, 64, 64, 64]], + [[8, 4*16, 16, 16], [1,1,2,2], [8, 4, 16, 32*32], [8, 4, 32, 32*32], [8, 4, 32, 32, 32]], + [[8, 4*32, 32, 32], [1,1,2,2], [8, 4, 32, 64*64], [8, 4, 64, 64*64], [8, 4, 64, 64, 64]]] + + #16 128 4 4 4 -> 16 128*4 4 4 -> 16 128*4 8 8 -> 16 128 4 8*8 -> 16 128 8 8*8 -> 16 128 8 8 8 + shapes16 = [[[16, 128*4, 4, 4], [1,1,2,2], [16, 128, 4, 8*8], [16, 128, 8, 8*8], [16, 128, 8, 8, 8]], + [[16, 64*8, 8, 8], [1,1,2,2], [16, 64, 8, 16*16], [16, 64, 16, 16*16], [16, 64, 16, 16, 16]], + [[16, 32*16, 16, 16], [1,1,2,2], [16, 32, 16, 32*32], [16, 32, 32, 32*32], [16, 32, 32, 32, 32]], + [[16, 16*32, 32, 32], [1,1,2,2], [16, 16, 32, 64*64], [16, 16, 64, 64*64], [16, 16, 64, 64, 64]], + [[16, 4*16, 16, 16], [1,1,2,2], [16, 4, 16, 32*32], [16, 4, 32, 32*32], [16, 4, 32, 32, 32]], + [[16, 4*32, 32, 32], [1,1,2,2], [16, 4, 32, 64*64], [16, 4, 64, 64*64], [16, 4, 64, 64, 64]]] + + #32 128 4 4 4 -> 32 128*4 4 4 -> 32 128*4 8 8 -> 32 128 4 8*8 -> 32 128 8 8*8 -> 32 128 8 8 8 + shapes32 = [[[32, 128*4, 4, 4], [1,1,2,2], [32, 128, 4, 8*8], [32, 128, 8, 8*8], [32, 128, 8, 8, 8]], + [[32, 64*8, 8, 8], [1,1,2,2], [32, 64, 8, 16*16], [32, 64, 16, 16*16], [32, 64, 16, 16, 16]], + [[32, 32*16, 16, 16], [1,1,2,2], [32, 32, 16, 32*32], [32, 32, 32, 32*32], [32, 32, 32, 32, 32]], + [[32, 16*32, 32, 32], [1,1,2,2], [32, 16, 32, 64*64], [32, 16, 64, 64*64], [32, 16, 64, 64, 64]], + [[32, 4*16, 16, 16], [1,1,2,2], [32, 4, 16, 32*32], [32, 4, 32, 32*32], [32, 4, 32, 32, 32]], + [[32, 4*32, 32, 32], [1,1,2,2], [32, 4, 32, 64*64], [32, 4, 64, 64*64], [32, 4, 64, 64, 64]]] + + if batchsize == "1": + shapes = shapes1 + + elif batchsize == "4": + shapes = shapes4 + + elif batchsize == "8": + shapes = shapes8 + + elif batchsize == "16": + shapes = shapes16 + + elif batchsize == "32": + shapes = shapes32 + else: + print("batchsize输入错误") + + for idx, node in enumerate(resizes): + print("idx: node.name", idx, node.name) + reshape1 = graph.add_node(f'Reshape_{node.name}', 'Reshape') + graph.add_initializer(f'shape_{node.name}', np.array(shapes[idx][0])) + reshape1.inputs = [node.inputs[0], f'shape_{node.name}'] + reshape1.outputs = [f'Reshape_{node.name}'] + + graph[node.inputs[-1]].value = np.array(shapes[idx][1]).astype(np.float32) + out_name = node.outputs[0] + node.set_input(0, f'Reshape_{node.name}') + node.set_output(0, f'{node.name}_reshape') + + reshape2 = graph.add_node(f'Reshape2_{node.name}', 'Reshape') + graph.add_initializer(f'shape2_{node.name}', np.array(shapes[idx][2])) + reshape2.inputs = [f'{node.name}_reshape', f'shape2_{node.name}'] + reshape2.outputs = [f'Reshape2_{node.name}_out'] + + resize2 = graph.add_node(f'Resize2_{node.name}', 'Resize') + graph.add_initializer(f'size_{node.name}', np.array(shapes[idx][3])) + resize2.inputs = [f'Reshape2_{node.name}_out', node.inputs[1], node.inputs[1], f'size_{node.name}'] + resize2.outputs = [f'Resize2_{node.name}'] + + reshape3 = graph.add_node(f'Reshape3_{node.name}', 'Reshape') + graph.add_initializer(f'shape3_{node.name}', np.array(shapes[idx][4])) + reshape3.inputs = [f'Resize2_{node.name}', f'shape3_{node.name}'] + reshape3.outputs = [out_name] + + graph.save(output_file) + +if __name__ == "__main__": + #input_file是输入的简化后的onnx路径,output_file是输出的onnx名称,batchsize是要转的onnx对应的batchsize大小 + input_file = sys.argv[1] + output_file = sys.argv[2] + batch = sys.argv[3] + modify(input_file, batch) diff --git a/ACL_PyTorch/contrib/cv/detection/3DUnet/postprocess.py b/ACL_PyTorch/contrib/cv/detection/3DUnet/postprocess.py index 811c649ef39a87bb2cd021cb7ae4d86568015ccf..de0ce74cd8191115fa262621229fe1bac97f208d 100644 --- a/ACL_PyTorch/contrib/cv/detection/3DUnet/postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/3DUnet/postprocess.py @@ -1,131 +1,131 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import argparse -import os - -import torch -import torch.nn.functional as F - -# Lib files -import numpy as np -from torch.utils.data import dataset -import lib.utils as utils -import lib.medloaders as medical_loaders -import lib.medzoo as medzoo -from lib.visual3D_temp import non_overlap_padding,test_padding -from lib.losses3D import DiceLoss -from lib.utils.general import prepare_input - - -from lib.medloaders.brats2018 import MICCAIBraTS2018 - -from glob import glob - - - -def main(): - args = get_arguments() - model, optimizer = medzoo.create_model(args) - batchSz = args.batchSz - score = 0 - model.eval() - bin_file_path = args.input_bin - pth_file_path = args.input_label - - length = glob(bin_file_path + '/*.bin') - length1 = glob(pth_file_path + '/*.pth') - - criterion = DiceLoss(classes=args.classes) - - for s in range(0, len(length)): - binfile = os.path.join(bin_file_path, str(s) + '_output_0' + ".bin") - output = np.fromfile(binfile, dtype=np.float32) - output = np.reshape(output, (batchSz, 4, 64, 64, 64)) - output = torch.from_numpy(output) - - pthfile = os.path.join(pth_file_path, str(s) + ".pth") - target = torch.load(pthfile) - target = torch.from_numpy(target) - - loss_dice, per_ch_score = criterion(output, target) - avg = np.mean(per_ch_score) - score += avg - print("--------score.avg------------", score / len(length)) - return score / len(length) - - - -def get_arguments(): - parser = argparse.ArgumentParser() - parser.add_argument('--batchSz', type=int, default=1) - parser.add_argument('--dataset_name', type=str, default="brats2018") - parser.add_argument('--dim', nargs="+", type=int, default=(64, 64, 64)) - parser.add_argument('--nEpochs', type=int, default=100) - parser.add_argument('--classes', type=int, default=4) - parser.add_argument('--samples_train', type=int, default=1024) - parser.add_argument('--samples_val', type=int, default=128) - parser.add_argument('--inChannels', type=int, default=4) - parser.add_argument('--inModalities', type=int, default=4) - parser.add_argument('--threshold', default=0.00000000001, type=float) - parser.add_argument('--terminal_show_freq', default=50) - parser.add_argument('--augmentation', action='store_true', default=True) - parser.add_argument('--normalization', default='full_volume_mean', type=str, - help='Tensor normalization: options ,max_min,', - choices=('max_min', 'full_volume_mean', 'brats', 'max', 'mean')) - parser.add_argument('--split', default=0.8, type=float, help='Select percentage of training data(default: 0.8)') - parser.add_argument('--lr', default=5e-3, type=float, - help='learning rate (default: 5e-3)') - parser.add_argument('--cuda', action='store_true', default=False) - - parser.add_argument('--loadData', default=True) - parser.add_argument('--resume', default='', type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') - parser.add_argument('--model', type=str, default='UNET3D', - choices=('VNET', 'VNET2', 'UNET3D', 'DENSENET1', 'DENSENET2', 'DENSENET3', 'HYPERDENSENET')) - parser.add_argument('--opt', type=str, default='sgd', - choices=('sgd', 'adam', 'rmsprop')) - parser.add_argument('--log_dir', type=str, - default='./runs/') - parser.add_argument('--prof', default=False, action='store_true', - help='use profiling to evaluate the performance of model') - - parser.add_argument('--world_size', type=int, default=1) - parser.add_argument('--rank', type=int, default=0) - - - parser.add_argument('--amp', action='store_true', default=False) - - parser.add_argument('--workers', type=int, default=8) - - - parser.add_argument('--device', default='npu', type=str, help='npu or gpu') - parser.add_argument('--pretrained', - default="none", - type=str, metavar='PATH', - help='path to pretrained model') - parser.add_argument('--input_bin', default='none', type=str) - parser.add_argument('--input_label', default='none', type=str) - - - args = parser.parse_args() - - args.save = '../inference_checkpoints/' + args.model + '_checkpoints/' + args.model + '_{}_{}_'.format( - utils.datestr(), args.dataset_name) - args.tb_log_dir = '../runs/' - return args - - - -if __name__ == '__main__': +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import os + +import torch +import torch.nn.functional as F + +# Lib files +import numpy as np +from torch.utils.data import dataset +import lib.utils as utils +import lib.medloaders as medical_loaders +import lib.medzoo as medzoo +from lib.visual3D_temp import non_overlap_padding,test_padding +from lib.losses3D import DiceLoss +from lib.utils.general import prepare_input + + +from lib.medloaders.brats2018 import MICCAIBraTS2018 + +from glob import glob + + + +def main(): + args = get_arguments() + model, optimizer = medzoo.create_model(args) + batchSz = args.batchSz + score = 0 + model.eval() + bin_file_path = args.input_bin + pth_file_path = args.input_label + + length = glob(bin_file_path + '/*.bin') + length1 = glob(pth_file_path + '/*.pth') + + criterion = DiceLoss(classes=args.classes) + + for s in range(0, len(length)): + binfile = os.path.join(bin_file_path, str(s) + '_output_0' + ".bin") + output = np.fromfile(binfile, dtype=np.float32) + output = np.reshape(output, (batchSz, 4, 64, 64, 64)) + output = torch.from_numpy(output) + + pthfile = os.path.join(pth_file_path, str(s) + ".pth") + target = torch.load(pthfile) + target = torch.from_numpy(target) + + loss_dice, per_ch_score = criterion(output, target) + avg = np.mean(per_ch_score) + score += avg + print("--------score.avg------------", score / len(length)) + return score / len(length) + + + +def get_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument('--batchSz', type=int, default=1) + parser.add_argument('--dataset_name', type=str, default="brats2018") + parser.add_argument('--dim', nargs="+", type=int, default=(64, 64, 64)) + parser.add_argument('--nEpochs', type=int, default=100) + parser.add_argument('--classes', type=int, default=4) + parser.add_argument('--samples_train', type=int, default=1024) + parser.add_argument('--samples_val', type=int, default=128) + parser.add_argument('--inChannels', type=int, default=4) + parser.add_argument('--inModalities', type=int, default=4) + parser.add_argument('--threshold', default=0.00000000001, type=float) + parser.add_argument('--terminal_show_freq', default=50) + parser.add_argument('--augmentation', action='store_true', default=True) + parser.add_argument('--normalization', default='full_volume_mean', type=str, + help='Tensor normalization: options ,max_min,', + choices=('max_min', 'full_volume_mean', 'brats', 'max', 'mean')) + parser.add_argument('--split', default=0.8, type=float, help='Select percentage of training data(default: 0.8)') + parser.add_argument('--lr', default=5e-3, type=float, + help='learning rate (default: 5e-3)') + parser.add_argument('--cuda', action='store_true', default=False) + + parser.add_argument('--loadData', default=True) + parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') + parser.add_argument('--model', type=str, default='UNET3D', + choices=('VNET', 'VNET2', 'UNET3D', 'DENSENET1', 'DENSENET2', 'DENSENET3', 'HYPERDENSENET')) + parser.add_argument('--opt', type=str, default='sgd', + choices=('sgd', 'adam', 'rmsprop')) + parser.add_argument('--log_dir', type=str, + default='./runs/') + parser.add_argument('--prof', default=False, action='store_true', + help='use profiling to evaluate the performance of model') + + parser.add_argument('--world_size', type=int, default=1) + parser.add_argument('--rank', type=int, default=0) + + + parser.add_argument('--amp', action='store_true', default=False) + + parser.add_argument('--workers', type=int, default=8) + + + parser.add_argument('--device', default='npu', type=str, help='npu or gpu') + parser.add_argument('--pretrained', + default="none", + type=str, metavar='PATH', + help='path to pretrained model') + parser.add_argument('--input_bin', default='none', type=str) + parser.add_argument('--input_label', default='none', type=str) + + + args = parser.parse_args() + + args.save = '../inference_checkpoints/' + args.model + '_checkpoints/' + args.model + '_{}_{}_'.format( + utils.datestr(), args.dataset_name) + args.tb_log_dir = '../runs/' + return args + + + +if __name__ == '__main__': main() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/AlphaPose/AlphaPose.patch b/ACL_PyTorch/contrib/cv/detection/AlphaPose/AlphaPose.patch index 6cde848c510d66325bef132d431d08a6c0482a6f..99caa0d26ecd1ecb53507a7c28584f27f829c5b3 100644 --- a/ACL_PyTorch/contrib/cv/detection/AlphaPose/AlphaPose.patch +++ b/ACL_PyTorch/contrib/cv/detection/AlphaPose/AlphaPose.patch @@ -209,16 +209,16 @@ index 8a755c9..b8e4c57 100644 --- a/detector/yolo_cfg.py +++ b/detector/yolo_cfg.py @@ -1,8 +1,8 @@ - from easydict import EasyDict as edict - - cfg = edict() --cfg.CONFIG = 'detector/yolo/cfg/yolov3-spp.cfg' --cfg.WEIGHTS = 'detector/yolo/data/yolov3-spp.weights' -+cfg.CONFIG = 'AlphaPose/detector/yolo/cfg/yolov3-spp.cfg' -+cfg.WEIGHTS = 'AlphaPose/detector/yolo/data/yolov3-spp.weights' - cfg.INP_DIM = 608 - cfg.NMS_THRES = 0.6 - cfg.CONFIDENCE = 0.1 + from easydict import EasyDict as edict + + cfg = edict() +-cfg.CONFIG = 'detector/yolo/cfg/yolov3-spp.cfg' +-cfg.WEIGHTS = 'detector/yolo/data/yolov3-spp.weights' ++cfg.CONFIG = 'AlphaPose/detector/yolo/cfg/yolov3-spp.cfg' ++cfg.WEIGHTS = 'AlphaPose/detector/yolo/data/yolov3-spp.weights' + cfg.INP_DIM = 608 + cfg.NMS_THRES = 0.6 + cfg.CONFIDENCE = 0.1 diff --git a/setup.py b/setup.py index d1e397d..4ad65f8 100644 --- a/setup.py diff --git a/ACL_PyTorch/contrib/cv/detection/AlphaPose/LICENSE b/ACL_PyTorch/contrib/cv/detection/AlphaPose/LICENSE index 8904c8516082056802ee732a4213ceab8c4a93af..5f7aa69fea22dade3f519868400025de434ae8ca 100644 --- a/ACL_PyTorch/contrib/cv/detection/AlphaPose/LICENSE +++ b/ACL_PyTorch/contrib/cv/detection/AlphaPose/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/AlphaPose/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/detection/AlphaPose/gen_dataset_info.py index efe82aef071e229c2f3212e1ec5a8531ad4d3e53..3656966b4cd924e11ac53b6449cdec237c0c64f7 100644 --- a/ACL_PyTorch/contrib/cv/detection/AlphaPose/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/detection/AlphaPose/gen_dataset_info.py @@ -1,65 +1,65 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(fpath, info_n, width, height): - ''' - Describe - ''' - bin_images = glob(os.path.join(fpath, '*.bin')) - with open(info_n, 'w') as f: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - f.write(content) - f.write('\n') - - -def get_jpg_info(fpath, info_n): - ''' - Describe - ''' - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(fpath, '*.' + extension))) - with open(info_n, 'w') as f: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - f.write(content) - f.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - Width = sys.argv[4] - Height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, Width, Height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(fpath, info_n, width, height): + ''' + Describe + ''' + bin_images = glob(os.path.join(fpath, '*.bin')) + with open(info_n, 'w') as f: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + f.write(content) + f.write('\n') + + +def get_jpg_info(fpath, info_n): + ''' + Describe + ''' + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(fpath, '*.' + extension))) + with open(info_n, 'w') as f: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + f.write(content) + f.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + Width = sys.argv[4] + Height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, Width, Height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) diff --git a/ACL_PyTorch/contrib/cv/detection/AlphaPose/test/parse.py b/ACL_PyTorch/contrib/cv/detection/AlphaPose/test/parse.py index a0f253b055047b199b33d4b65cdc79177b6b250b..27eae0d0acf98687edd95f1f024cf77c49cd4dc4 100644 --- a/ACL_PyTorch/contrib/cv/detection/AlphaPose/test/parse.py +++ b/ACL_PyTorch/contrib/cv/detection/AlphaPose/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/BSN/BSN_eval.py b/ACL_PyTorch/contrib/cv/detection/BSN/BSN_eval.py index 0976dab05c6fe15ab49f21f7dcc7f506576d8eb9..0624523f6ff987edd53bdb4114ccf0595eacb54d 100644 --- a/ACL_PyTorch/contrib/cv/detection/BSN/BSN_eval.py +++ b/ACL_PyTorch/contrib/cv/detection/BSN/BSN_eval.py @@ -1,85 +1,85 @@ -# -*- coding: utf-8 -*- -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -sys.path.append('./BSN-boundary-sensitive-network.pytorch/Evaluation') -from eval_proposal import ANETproposal -import matplotlib.pyplot as plt -import numpy as np - -def run_evaluation(ground_truth_filename, proposal_filename, - max_avg_nr_proposals=100, - tiou_thresholds=np.linspace(0.5, 0.95, 10), - subset='validation'): - - anet_proposal = ANETproposal(ground_truth_filename, proposal_filename, - tiou_thresholds=tiou_thresholds, - max_avg_nr_proposals=max_avg_nr_proposals, - subset=subset, verbose=True, check_status=False) - anet_proposal.evaluate() - - recall = anet_proposal.recall - average_recall = anet_proposal.avg_recall - average_nr_proposals = anet_proposal.proposals_per_video - - return (average_nr_proposals, average_recall, recall) - -def plot_metric(average_nr_proposals, average_recall, recall, tiou_thresholds=np.linspace(0.5, 0.95, 10)): - - fn_size = 14 - plt.figure(num=None, figsize=(12, 8)) - ax = plt.subplot(1,1,1) - - colors = ['k', 'r', 'yellow', 'b', 'c', 'm', 'b', 'pink', 'lawngreen', 'indigo'] - area_under_curve = np.zeros_like(tiou_thresholds) - for i in range(recall.shape[0]): - area_under_curve[i] = np.trapz(recall[i], average_nr_proposals) - - for idx, tiou in enumerate(tiou_thresholds[::2]): - ax.plot(average_nr_proposals, recall[2*idx,:], color=colors[idx+1], - label="tiou=[" + str(tiou) + "], area=" + str(int(area_under_curve[2*idx]*100)/100.), - linewidth=4, linestyle='--', marker=None) - # Plots Average Recall vs Average number of proposals. - ax.plot(average_nr_proposals, average_recall, color=colors[0], - label="tiou = 0.5:0.05:0.95," + " area=" + str(int(np.trapz(average_recall, average_nr_proposals)*100)/100.), - linewidth=4, linestyle='-', marker=None) - - handles, labels = ax.get_legend_handles_labels() - ax.legend([handles[-1]] + handles[:-1], [labels[-1]] + labels[:-1], loc='best') - - plt.ylabel('Average Recall', fontsize=fn_size) - plt.xlabel('Average Number of Proposals per Video', fontsize=fn_size) - plt.grid(b=True, which="both") - plt.ylim([0, 1.0]) - plt.setp(plt.axes().get_xticklabels(), fontsize=fn_size) - plt.setp(plt.axes().get_yticklabels(), fontsize=fn_size) - #plt.show() - plt.savefig("./output/evaluation_result.jpg") - -def evaluation_proposal(): - - uniform_average_nr_proposals_valid, uniform_average_recall_valid, uniform_recall_valid = run_evaluation( - "./BSN-boundary-sensitive-network.pytorch/Evaluation/data/activity_net_1_3_new.json", - "./output/result_proposal.json", - max_avg_nr_proposals=100, - tiou_thresholds=np.linspace(0.5, 0.95, 10), - subset='validation') - - plot_metric(uniform_average_nr_proposals_valid, uniform_average_recall_valid, uniform_recall_valid) - print('AR@1 is \t{}'.format(np.mean(uniform_recall_valid[:,0]))) - print('AR@5 is \t{}'.format(np.mean(uniform_recall_valid[:,4]))) - print('AR@10 is \t{}'.format(np.mean(uniform_recall_valid[:,9]))) - print('AR@100 is \t{}'.format(np.mean(uniform_recall_valid[:,-1]))) -if __name__ =="__main__": +# -*- coding: utf-8 -*- +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +sys.path.append('./BSN-boundary-sensitive-network.pytorch/Evaluation') +from eval_proposal import ANETproposal +import matplotlib.pyplot as plt +import numpy as np + +def run_evaluation(ground_truth_filename, proposal_filename, + max_avg_nr_proposals=100, + tiou_thresholds=np.linspace(0.5, 0.95, 10), + subset='validation'): + + anet_proposal = ANETproposal(ground_truth_filename, proposal_filename, + tiou_thresholds=tiou_thresholds, + max_avg_nr_proposals=max_avg_nr_proposals, + subset=subset, verbose=True, check_status=False) + anet_proposal.evaluate() + + recall = anet_proposal.recall + average_recall = anet_proposal.avg_recall + average_nr_proposals = anet_proposal.proposals_per_video + + return (average_nr_proposals, average_recall, recall) + +def plot_metric(average_nr_proposals, average_recall, recall, tiou_thresholds=np.linspace(0.5, 0.95, 10)): + + fn_size = 14 + plt.figure(num=None, figsize=(12, 8)) + ax = plt.subplot(1,1,1) + + colors = ['k', 'r', 'yellow', 'b', 'c', 'm', 'b', 'pink', 'lawngreen', 'indigo'] + area_under_curve = np.zeros_like(tiou_thresholds) + for i in range(recall.shape[0]): + area_under_curve[i] = np.trapz(recall[i], average_nr_proposals) + + for idx, tiou in enumerate(tiou_thresholds[::2]): + ax.plot(average_nr_proposals, recall[2*idx,:], color=colors[idx+1], + label="tiou=[" + str(tiou) + "], area=" + str(int(area_under_curve[2*idx]*100)/100.), + linewidth=4, linestyle='--', marker=None) + # Plots Average Recall vs Average number of proposals. + ax.plot(average_nr_proposals, average_recall, color=colors[0], + label="tiou = 0.5:0.05:0.95," + " area=" + str(int(np.trapz(average_recall, average_nr_proposals)*100)/100.), + linewidth=4, linestyle='-', marker=None) + + handles, labels = ax.get_legend_handles_labels() + ax.legend([handles[-1]] + handles[:-1], [labels[-1]] + labels[:-1], loc='best') + + plt.ylabel('Average Recall', fontsize=fn_size) + plt.xlabel('Average Number of Proposals per Video', fontsize=fn_size) + plt.grid(b=True, which="both") + plt.ylim([0, 1.0]) + plt.setp(plt.axes().get_xticklabels(), fontsize=fn_size) + plt.setp(plt.axes().get_yticklabels(), fontsize=fn_size) + #plt.show() + plt.savefig("./output/evaluation_result.jpg") + +def evaluation_proposal(): + + uniform_average_nr_proposals_valid, uniform_average_recall_valid, uniform_recall_valid = run_evaluation( + "./BSN-boundary-sensitive-network.pytorch/Evaluation/data/activity_net_1_3_new.json", + "./output/result_proposal.json", + max_avg_nr_proposals=100, + tiou_thresholds=np.linspace(0.5, 0.95, 10), + subset='validation') + + plot_metric(uniform_average_nr_proposals_valid, uniform_average_recall_valid, uniform_recall_valid) + print('AR@1 is \t{}'.format(np.mean(uniform_recall_valid[:,0]))) + print('AR@5 is \t{}'.format(np.mean(uniform_recall_valid[:,4]))) + print('AR@10 is \t{}'.format(np.mean(uniform_recall_valid[:,9]))) + print('AR@100 is \t{}'.format(np.mean(uniform_recall_valid[:,-1]))) +if __name__ =="__main__": evaluation_proposal() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/BSN/BSN_pem_postprocess.py b/ACL_PyTorch/contrib/cv/detection/BSN/BSN_pem_postprocess.py index ed9b16d775028f3017a309bde789d14d031b6cc7..f37f5a8236132d62524d813503a28d91f38b323d 100644 --- a/ACL_PyTorch/contrib/cv/detection/BSN/BSN_pem_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/BSN/BSN_pem_postprocess.py @@ -1,190 +1,190 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import json -import torch -import argparse -import os -import numpy as np -import pandas as pd -import multiprocessing as mp - -parser = argparse.ArgumentParser(description='BSN') - -parser.add_argument('--result_path',default='output/PEM_results', type=str, help='Dir to save txt results') -parser.add_argument('--PEM_out_path', default='result/dumpOutput_device1', type=str, help='infer out path') -parser.add_argument('--PEM_video_xmin_path', default='output/BSN-PEM-preprocess/xmin', type=str, help='infer info path') -parser.add_argument('--PEM_video_xmax_path', default='output/BSN-PEM-preprocess/xmax', type=str, help='infer info path') -parser.add_argument('--PEM_video_xmin_score_path', default='output/BSN-PEM-preprocess/xmin_score', type=str, help='infer info path') -parser.add_argument('--PEM_video_xmax_score_path', default='output/BSN-PEM-preprocess/xmax_score', type=str, help='infer info path') -# parser.add_argument('--info_name', default='./deepspeech_om_bin.info', type=str, help='input info path') -# parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold') -args = parser.parse_args() - -def load_json(file): - with open(file) as json_file: - data = json.load(json_file) - return data - -def getDatasetDict(): - df=pd.read_csv("BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/video_info_new.csv") - json_data= load_json("BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/anet_anno_action.json") - database=json_data - video_dict={} - for i in range(len(df)): - video_name=df.video.values[i] - video_info=database[video_name] - video_new_info={} - video_new_info['duration_frame']=video_info['duration_frame'] - video_new_info['duration_second']=video_info['duration_second'] - video_new_info["feature_frame"]=video_info['feature_frame'] - video_subset=df.subset.values[i] - video_new_info['annotations']=video_info['annotations'] - if video_subset=="validation": - video_dict[video_name]=video_new_info - return video_dict - -def iou_with_anchors(anchors_min,anchors_max,len_anchors,box_min,box_max): - """Compute jaccard score between a box and the anchors. - """ - int_xmin = np.maximum(anchors_min, box_min) - int_xmax = np.minimum(anchors_max, box_max) - inter_len = np.maximum(int_xmax - int_xmin, 0.) - union_len = len_anchors - inter_len +box_max-box_min - #print inter_len,union_len - jaccard = np.divide(inter_len, union_len) - return jaccard - -def Soft_NMS(df): - df=df.sort_values(by="score",ascending=False) - - tstart=list(df.xmin.values[:]) - tend=list(df.xmax.values[:]) - tscore=list(df.score.values[:]) - rstart=[] - rend=[] - rscore=[] - - while len(tscore)>0 and len(rscore)<=100: - max_index=np.argmax(tscore) - tmp_width = tend[max_index] -tstart[max_index] - iou_list = iou_with_anchors(tstart[max_index],tend[max_index],tmp_width,np.array(tstart),np.array(tend)) - iou_exp_list = np.exp(-np.square(iou_list)/0.75) - for idx in range(0,len(tscore)): - if idx!=max_index: - tmp_iou = iou_list[idx] - if tmp_iou>0.65 + (0.9 - 0.65) * tmp_width: - tscore[idx]=tscore[idx]*iou_exp_list[idx] - - rstart.append(tstart[max_index]) - rend.append(tend[max_index]) - rscore.append(tscore[max_index]) - tstart.pop(max_index) - tend.pop(max_index) - tscore.pop(max_index) - - newDf=pd.DataFrame() - newDf['score']=rscore - newDf['xmin']=rstart - newDf['xmax']=rend - return newDf - -def video_post_process(video_list,video_dict): - - for video_name in video_list: - df=pd.read_csv("./output/PEM_results/"+video_name+".csv") - - df['score']=df.iou_score.values[:]*df.xmin_score.values[:]*df.xmax_score.values[:] - if len(df)>1: - df=Soft_NMS(df) - - df=df.sort_values(by="score",ascending=False) - video_info=video_dict[video_name] - video_duration=float(video_info["duration_frame"]/16*16)/video_info["duration_frame"]*video_info["duration_second"] - proposal_list=[] - - for j in range(min(100,len(df))): - tmp_proposal={} - tmp_proposal["score"]=df.score.values[j] - tmp_proposal["segment"]=[max(0,df.xmin.values[j])*video_duration,min(1,df.xmax.values[j])*video_duration] - proposal_list.append(tmp_proposal) - result_dict[video_name[2:]]=proposal_list - - -def BSN_post_processing(): - video_dict=getDatasetDict() - video_list=video_dict.keys()#[:100] - video_list = list(video_list) - global result_dict - result_dict=mp.Manager().dict() - - num_videos = len(video_list) - num_videos_per_thread = num_videos/8 - processes = [] - for tid in range(7): - tmp_video_list = video_list[int(tid*num_videos_per_thread):int((tid+1)*num_videos_per_thread)] - p = mp.Process(target = video_post_process,args =(tmp_video_list,video_dict,)) - p.start() - processes.append(p) - tmp_video_list = video_list[int(7*num_videos_per_thread):] - p = mp.Process(target = video_post_process,args =(tmp_video_list,video_dict,)) - p.start() - processes.append(p) - for p in processes: - p.join() - - result_dict = dict(result_dict) - output_dict={"version":"VERSION 1.3","results":result_dict,"external_data":{}} - outfile=open("./output/result_proposal.json","w") - json.dump(output_dict,outfile) - outfile.close() - -if __name__ == '__main__': - if not os.path.exists("output/PEM_results"): - os.makedirs("output/PEM_results") - out_files = os.listdir(args.PEM_out_path) - print("processing...") - for i in range(len(out_files)): - video_name = str(out_files[i]) - video_name = video_name[0:int(len(video_name)-6)] - video_data = np.fromfile(args.PEM_out_path+'/'+out_files[i],dtype=np.float32) - - video_xmin = np.fromfile(args.PEM_video_xmin_path+'/'+video_name+'.bin',dtype=np.float64) - video_xmax = np.fromfile(args.PEM_video_xmax_path+'/'+video_name+'.bin',dtype=np.float64) - video_xmin_score = np.fromfile(args.PEM_video_xmin_score_path+'/'+video_name+'.bin',dtype=np.float64) - video_xmax_score = np.fromfile(args.PEM_video_xmax_score_path+'/'+video_name+'.bin',dtype=np.float64) - - video_data = torch.tensor(video_data) - video_xmin = torch.tensor(video_xmin) - video_xmax = torch.tensor(video_xmax) - video_xmin_score = torch.tensor(video_xmin_score) - video_xmax_score = torch.tensor(video_xmax_score) - data_num = int(video_xmin.shape[0]) - video_data = video_data[:data_num] - - video_data = video_data.view(-1).detach().cpu().numpy() - video_xmin = video_xmin.view(-1).cpu().numpy() - video_xmax = video_xmax.view(-1).cpu().numpy() - video_xmin_score = video_xmin_score.view(-1).cpu().numpy() - video_xmax_score = video_xmax_score.view(-1).cpu().numpy() - - df=pd.DataFrame() - df["xmin"]=video_xmin - df["xmax"]=video_xmax - df["xmin_score"]=video_xmin_score - df["xmax_score"]=video_xmax_score - df["iou_score"]=video_data - df.to_csv(args.result_path+'/'+video_name+".csv",index=False) - print("PGM: start generating BSN_post feature") - BSN_post_processing() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import torch +import argparse +import os +import numpy as np +import pandas as pd +import multiprocessing as mp + +parser = argparse.ArgumentParser(description='BSN') + +parser.add_argument('--result_path',default='output/PEM_results', type=str, help='Dir to save txt results') +parser.add_argument('--PEM_out_path', default='result/dumpOutput_device1', type=str, help='infer out path') +parser.add_argument('--PEM_video_xmin_path', default='output/BSN-PEM-preprocess/xmin', type=str, help='infer info path') +parser.add_argument('--PEM_video_xmax_path', default='output/BSN-PEM-preprocess/xmax', type=str, help='infer info path') +parser.add_argument('--PEM_video_xmin_score_path', default='output/BSN-PEM-preprocess/xmin_score', type=str, help='infer info path') +parser.add_argument('--PEM_video_xmax_score_path', default='output/BSN-PEM-preprocess/xmax_score', type=str, help='infer info path') +# parser.add_argument('--info_name', default='./deepspeech_om_bin.info', type=str, help='input info path') +# parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold') +args = parser.parse_args() + +def load_json(file): + with open(file) as json_file: + data = json.load(json_file) + return data + +def getDatasetDict(): + df=pd.read_csv("BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/video_info_new.csv") + json_data= load_json("BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/anet_anno_action.json") + database=json_data + video_dict={} + for i in range(len(df)): + video_name=df.video.values[i] + video_info=database[video_name] + video_new_info={} + video_new_info['duration_frame']=video_info['duration_frame'] + video_new_info['duration_second']=video_info['duration_second'] + video_new_info["feature_frame"]=video_info['feature_frame'] + video_subset=df.subset.values[i] + video_new_info['annotations']=video_info['annotations'] + if video_subset=="validation": + video_dict[video_name]=video_new_info + return video_dict + +def iou_with_anchors(anchors_min,anchors_max,len_anchors,box_min,box_max): + """Compute jaccard score between a box and the anchors. + """ + int_xmin = np.maximum(anchors_min, box_min) + int_xmax = np.minimum(anchors_max, box_max) + inter_len = np.maximum(int_xmax - int_xmin, 0.) + union_len = len_anchors - inter_len +box_max-box_min + #print inter_len,union_len + jaccard = np.divide(inter_len, union_len) + return jaccard + +def Soft_NMS(df): + df=df.sort_values(by="score",ascending=False) + + tstart=list(df.xmin.values[:]) + tend=list(df.xmax.values[:]) + tscore=list(df.score.values[:]) + rstart=[] + rend=[] + rscore=[] + + while len(tscore)>0 and len(rscore)<=100: + max_index=np.argmax(tscore) + tmp_width = tend[max_index] -tstart[max_index] + iou_list = iou_with_anchors(tstart[max_index],tend[max_index],tmp_width,np.array(tstart),np.array(tend)) + iou_exp_list = np.exp(-np.square(iou_list)/0.75) + for idx in range(0,len(tscore)): + if idx!=max_index: + tmp_iou = iou_list[idx] + if tmp_iou>0.65 + (0.9 - 0.65) * tmp_width: + tscore[idx]=tscore[idx]*iou_exp_list[idx] + + rstart.append(tstart[max_index]) + rend.append(tend[max_index]) + rscore.append(tscore[max_index]) + tstart.pop(max_index) + tend.pop(max_index) + tscore.pop(max_index) + + newDf=pd.DataFrame() + newDf['score']=rscore + newDf['xmin']=rstart + newDf['xmax']=rend + return newDf + +def video_post_process(video_list,video_dict): + + for video_name in video_list: + df=pd.read_csv("./output/PEM_results/"+video_name+".csv") + + df['score']=df.iou_score.values[:]*df.xmin_score.values[:]*df.xmax_score.values[:] + if len(df)>1: + df=Soft_NMS(df) + + df=df.sort_values(by="score",ascending=False) + video_info=video_dict[video_name] + video_duration=float(video_info["duration_frame"]/16*16)/video_info["duration_frame"]*video_info["duration_second"] + proposal_list=[] + + for j in range(min(100,len(df))): + tmp_proposal={} + tmp_proposal["score"]=df.score.values[j] + tmp_proposal["segment"]=[max(0,df.xmin.values[j])*video_duration,min(1,df.xmax.values[j])*video_duration] + proposal_list.append(tmp_proposal) + result_dict[video_name[2:]]=proposal_list + + +def BSN_post_processing(): + video_dict=getDatasetDict() + video_list=video_dict.keys()#[:100] + video_list = list(video_list) + global result_dict + result_dict=mp.Manager().dict() + + num_videos = len(video_list) + num_videos_per_thread = num_videos/8 + processes = [] + for tid in range(7): + tmp_video_list = video_list[int(tid*num_videos_per_thread):int((tid+1)*num_videos_per_thread)] + p = mp.Process(target = video_post_process,args =(tmp_video_list,video_dict,)) + p.start() + processes.append(p) + tmp_video_list = video_list[int(7*num_videos_per_thread):] + p = mp.Process(target = video_post_process,args =(tmp_video_list,video_dict,)) + p.start() + processes.append(p) + for p in processes: + p.join() + + result_dict = dict(result_dict) + output_dict={"version":"VERSION 1.3","results":result_dict,"external_data":{}} + outfile=open("./output/result_proposal.json","w") + json.dump(output_dict,outfile) + outfile.close() + +if __name__ == '__main__': + if not os.path.exists("output/PEM_results"): + os.makedirs("output/PEM_results") + out_files = os.listdir(args.PEM_out_path) + print("processing...") + for i in range(len(out_files)): + video_name = str(out_files[i]) + video_name = video_name[0:int(len(video_name)-6)] + video_data = np.fromfile(args.PEM_out_path+'/'+out_files[i],dtype=np.float32) + + video_xmin = np.fromfile(args.PEM_video_xmin_path+'/'+video_name+'.bin',dtype=np.float64) + video_xmax = np.fromfile(args.PEM_video_xmax_path+'/'+video_name+'.bin',dtype=np.float64) + video_xmin_score = np.fromfile(args.PEM_video_xmin_score_path+'/'+video_name+'.bin',dtype=np.float64) + video_xmax_score = np.fromfile(args.PEM_video_xmax_score_path+'/'+video_name+'.bin',dtype=np.float64) + + video_data = torch.tensor(video_data) + video_xmin = torch.tensor(video_xmin) + video_xmax = torch.tensor(video_xmax) + video_xmin_score = torch.tensor(video_xmin_score) + video_xmax_score = torch.tensor(video_xmax_score) + data_num = int(video_xmin.shape[0]) + video_data = video_data[:data_num] + + video_data = video_data.view(-1).detach().cpu().numpy() + video_xmin = video_xmin.view(-1).cpu().numpy() + video_xmax = video_xmax.view(-1).cpu().numpy() + video_xmin_score = video_xmin_score.view(-1).cpu().numpy() + video_xmax_score = video_xmax_score.view(-1).cpu().numpy() + + df=pd.DataFrame() + df["xmin"]=video_xmin + df["xmax"]=video_xmax + df["xmin_score"]=video_xmin_score + df["xmax_score"]=video_xmax_score + df["iou_score"]=video_data + df.to_csv(args.result_path+'/'+video_name+".csv",index=False) + print("PGM: start generating BSN_post feature") + BSN_post_processing() print("PGM: finish generate BSN_post feature") \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/BSN/BSN_pem_preprocess.py b/ACL_PyTorch/contrib/cv/detection/BSN/BSN_pem_preprocess.py index 04d12cd6ad285a4aa95b8a388c980ee2161dfeea..575e1737fa7c78b3418dd9fb0750bb549d15ffce 100644 --- a/ACL_PyTorch/contrib/cv/detection/BSN/BSN_pem_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/BSN/BSN_pem_preprocess.py @@ -1,106 +1,106 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import pandas as pd -import pandas -import numpy -import json -import torch.utils.data as data -import os -import torch -import os -#import opts -import sys - -#opt = opts.parse_opt() -#opt = vars(opt) -def load_json(file): - with open(file) as json_file: - data = json.load(json_file) - return data - -if __name__ == '__main__': - if not os.path.exists("output/BSN-PEM-preprocess/feature"): - os.makedirs("output/BSN-PEM-preprocess/feature") - if not os.path.exists("output/BSN-PEM-preprocess/xmin"): - os.makedirs("output/BSN-PEM-preprocess/xmin") - if not os.path.exists("output/BSN-PEM-preprocess/xmax"): - os.makedirs("output/BSN-PEM-preprocess/xmax") - if not os.path.exists("output/BSN-PEM-preprocess/xmin_score"): - os.makedirs("output/BSN-PEM-preprocess/xmin_score") - if not os.path.exists("output/BSN-PEM-preprocess/xmax_score"): - os.makedirs("output/BSN-PEM-preprocess/xmax_score") - subset = "validation" - top_K = 1000 - video_info_path = "BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/video_info_new.csv" - video_anno_path = "BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/anet_anno_action.json" - #video_info_path = opt["video_info"] - #video_anno_path = opt["video_anno"] - pgm_proposals_path = "output/PGM_proposals/" - pgm_feature_path = "output/PGM_feature/" - pem_feature_path ="output/BSN-PEM-preprocess/feature" - pem_xmin_path ="output/BSN-PEM-preprocess/xmin" - pem_xmax_path ="output/BSN-PEM-preprocess/xmax" - pem_xmin_score_path ="output/BSN-PEM-preprocess/xmin_score" - pem_xmax_score_path ="output/BSN-PEM-preprocess/xmax_score" - anno_df = pd.read_csv(video_info_path) - anno_database= load_json(video_anno_path) - video_dict = {} - for i in range(len(anno_df)): - video_name=anno_df.video.values[i] - video_info=anno_database[video_name] - video_subset=anno_df.subset.values[i] - #if subset == "full": - # video_dict[video_name] = video_info - if subset in video_subset: - video_dict[video_name] = video_info - video_list = list(video_dict.keys()) - print("%s subset video numbers: %d" %(subset,len(video_list))) - - print("processing...") - for i in range(len(video_list)): - video_name = video_list[i] - pdf=pandas.read_csv(pgm_proposals_path+video_name+".csv") - pdf=pdf[:top_K] - video_feature = numpy.load(pgm_feature_path + video_name+".npy") - video_feature = video_feature[:top_K,:] - video_feature = torch.Tensor(video_feature) - video_xmin =pdf.xmin.values[:] - video_xmax =pdf.xmax.values[:] - video_xmin_score = pdf.xmin_score.values[:] - video_xmax_score = pdf.xmax_score.values[:] - - #video_feature = np.array(video_feature).astype(np.float32) - #if not [1000,32] expend to [1000.32] - expend_num = 1000 - int(video_feature.shape[0]) - if expend_num != 0: - video_expend = torch.zeros(expend_num,32) - video_feature = torch.cat((video_feature,video_expend),0) - video_feature = np.array(video_feature).astype(np.float32) - video_feature.tofile(os.path.join(pem_feature_path, video_name + ".bin")) - - video_xmin = np.array(video_xmin) - video_xmax = np.array(video_xmax) - video_xmin_score = np.array(video_xmin_score) - video_xmax_score = np.array(video_xmax_score) - - video_xmin.tofile(os.path.join(pem_xmin_path, video_name + ".bin")) - video_xmax.tofile(os.path.join(pem_xmax_path, video_name + ".bin")) - video_xmin_score.tofile(os.path.join(pem_xmin_score_path, video_name + ".bin")) - video_xmax_score.tofile(os.path.join(pem_xmax_score_path, video_name + ".bin")) - - - - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pandas as pd +import pandas +import numpy +import json +import torch.utils.data as data +import os +import torch +import os +#import opts +import sys + +#opt = opts.parse_opt() +#opt = vars(opt) +def load_json(file): + with open(file) as json_file: + data = json.load(json_file) + return data + +if __name__ == '__main__': + if not os.path.exists("output/BSN-PEM-preprocess/feature"): + os.makedirs("output/BSN-PEM-preprocess/feature") + if not os.path.exists("output/BSN-PEM-preprocess/xmin"): + os.makedirs("output/BSN-PEM-preprocess/xmin") + if not os.path.exists("output/BSN-PEM-preprocess/xmax"): + os.makedirs("output/BSN-PEM-preprocess/xmax") + if not os.path.exists("output/BSN-PEM-preprocess/xmin_score"): + os.makedirs("output/BSN-PEM-preprocess/xmin_score") + if not os.path.exists("output/BSN-PEM-preprocess/xmax_score"): + os.makedirs("output/BSN-PEM-preprocess/xmax_score") + subset = "validation" + top_K = 1000 + video_info_path = "BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/video_info_new.csv" + video_anno_path = "BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/anet_anno_action.json" + #video_info_path = opt["video_info"] + #video_anno_path = opt["video_anno"] + pgm_proposals_path = "output/PGM_proposals/" + pgm_feature_path = "output/PGM_feature/" + pem_feature_path ="output/BSN-PEM-preprocess/feature" + pem_xmin_path ="output/BSN-PEM-preprocess/xmin" + pem_xmax_path ="output/BSN-PEM-preprocess/xmax" + pem_xmin_score_path ="output/BSN-PEM-preprocess/xmin_score" + pem_xmax_score_path ="output/BSN-PEM-preprocess/xmax_score" + anno_df = pd.read_csv(video_info_path) + anno_database= load_json(video_anno_path) + video_dict = {} + for i in range(len(anno_df)): + video_name=anno_df.video.values[i] + video_info=anno_database[video_name] + video_subset=anno_df.subset.values[i] + #if subset == "full": + # video_dict[video_name] = video_info + if subset in video_subset: + video_dict[video_name] = video_info + video_list = list(video_dict.keys()) + print("%s subset video numbers: %d" %(subset,len(video_list))) + + print("processing...") + for i in range(len(video_list)): + video_name = video_list[i] + pdf=pandas.read_csv(pgm_proposals_path+video_name+".csv") + pdf=pdf[:top_K] + video_feature = numpy.load(pgm_feature_path + video_name+".npy") + video_feature = video_feature[:top_K,:] + video_feature = torch.Tensor(video_feature) + video_xmin =pdf.xmin.values[:] + video_xmax =pdf.xmax.values[:] + video_xmin_score = pdf.xmin_score.values[:] + video_xmax_score = pdf.xmax_score.values[:] + + #video_feature = np.array(video_feature).astype(np.float32) + #if not [1000,32] expend to [1000.32] + expend_num = 1000 - int(video_feature.shape[0]) + if expend_num != 0: + video_expend = torch.zeros(expend_num,32) + video_feature = torch.cat((video_feature,video_expend),0) + video_feature = np.array(video_feature).astype(np.float32) + video_feature.tofile(os.path.join(pem_feature_path, video_name + ".bin")) + + video_xmin = np.array(video_xmin) + video_xmax = np.array(video_xmax) + video_xmin_score = np.array(video_xmin_score) + video_xmax_score = np.array(video_xmax_score) + + video_xmin.tofile(os.path.join(pem_xmin_path, video_name + ".bin")) + video_xmax.tofile(os.path.join(pem_xmax_path, video_name + ".bin")) + video_xmin_score.tofile(os.path.join(pem_xmin_score_path, video_name + ".bin")) + video_xmax_score.tofile(os.path.join(pem_xmax_score_path, video_name + ".bin")) + + + + + diff --git a/ACL_PyTorch/contrib/cv/detection/BSN/BSN_pem_pth2onnx.py b/ACL_PyTorch/contrib/cv/detection/BSN/BSN_pem_pth2onnx.py index 3b9f6d64330e6f6c08dc73a51c74b6b7d1ac2952..838b4c8868a276c9890f9a22a3d9b056b5660462 100644 --- a/ACL_PyTorch/contrib/cv/detection/BSN/BSN_pem_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/BSN/BSN_pem_pth2onnx.py @@ -1,66 +1,66 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import argparse -import sys -sys.path.append(r"./BSN-boundary-sensitive-network.pytorch") - -from models import PEM - -parser = argparse.ArgumentParser( - description='pem2onnx') -parser.add_argument('--pth_path', - default='./pem_best.pth.tar', - help='pth path') -parser.add_argument('--onnx_path', - default='./BSN_pem.onnx', - help='onnx path') -parser.add_argument( - '--pem_feat_dim', - type=int, - default=32) -parser.add_argument( - '--pem_hidden_dim', - type=int, - default=256) -parser.add_argument( - '--pem_batch_size', - type=int, - default=16) -parser.add_argument( - '--pem_u_ratio_m', - type=float, - default=1) -parser.add_argument( - '--pem_u_ratio_l', - type=float, - default=2) - - -def pem_onnx(opt): - opt = vars(opt) - pth_path = opt['pth_path'] - onnx_path = opt['onnx_path'] - model = PEM(opt) - checkpoint = torch.load(pth_path,map_location='cpu') - base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} - model.load_state_dict(base_dict) - input_names=["video_feature"] - output_names = ["output"] - model.eval() - dummy_input = torch.randn(1,1000,32) - torch.onnx.export(model,dummy_input,onnx_path,input_names = input_names,output_names=output_names,verbose=True,opset_version=11) -if __name__ =="__main__": - opt = parser.parse_args() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import argparse +import sys +sys.path.append(r"./BSN-boundary-sensitive-network.pytorch") + +from models import PEM + +parser = argparse.ArgumentParser( + description='pem2onnx') +parser.add_argument('--pth_path', + default='./pem_best.pth.tar', + help='pth path') +parser.add_argument('--onnx_path', + default='./BSN_pem.onnx', + help='onnx path') +parser.add_argument( + '--pem_feat_dim', + type=int, + default=32) +parser.add_argument( + '--pem_hidden_dim', + type=int, + default=256) +parser.add_argument( + '--pem_batch_size', + type=int, + default=16) +parser.add_argument( + '--pem_u_ratio_m', + type=float, + default=1) +parser.add_argument( + '--pem_u_ratio_l', + type=float, + default=2) + + +def pem_onnx(opt): + opt = vars(opt) + pth_path = opt['pth_path'] + onnx_path = opt['onnx_path'] + model = PEM(opt) + checkpoint = torch.load(pth_path,map_location='cpu') + base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} + model.load_state_dict(base_dict) + input_names=["video_feature"] + output_names = ["output"] + model.eval() + dummy_input = torch.randn(1,1000,32) + torch.onnx.export(model,dummy_input,onnx_path,input_names = input_names,output_names=output_names,verbose=True,opset_version=11) +if __name__ =="__main__": + opt = parser.parse_args() pem_onnx(opt) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/BSN/BSN_tem_postprocess.py b/ACL_PyTorch/contrib/cv/detection/BSN/BSN_tem_postprocess.py index 0c4eb2b17719f79eb02a5239f72212c4d625a0fc..de6f33cc8f065cfd957e935638bad3bdbef11e95 100644 --- a/ACL_PyTorch/contrib/cv/detection/BSN/BSN_tem_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/BSN/BSN_tem_postprocess.py @@ -1,313 +1,313 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import json -import torch -import hydra -import argparse -import os -import numpy as np -import pandas as pd -import numpy -import pandas -import torch.multiprocessing as mp -from scipy.interpolate import interp1d - - -parser = argparse.ArgumentParser(description='BSN') - -parser.add_argument('--result_path',default='./output/TEM_results', type=str, help='Dir to save txt results') -parser.add_argument('--TEM_out_path', default='./result/dumpOutput_device0', type=str, help='infer out path') -parser.add_argument('--TEM_anchor_xmin_path', default='./output/BSN-TEM-preprocess/anchor_min', type=str, help='infer info path') -parser.add_argument('--TEM_anchor_xmax_path', default='./output/BSN-TEM-preprocess/anchor_max', type=str, help='infer info path') - -args = parser.parse_args() - -columns=["action","start","end","xmin","xmax"] - -def load_json(file): - with open(file) as json_file: - data = json.load(json_file) - return data - -def iou_with_anchors(anchors_min,anchors_max,box_min,box_max): - """Compute jaccard score between a box and the anchors. - """ - len_anchors=anchors_max-anchors_min - int_xmin = numpy.maximum(anchors_min, box_min) - int_xmax = numpy.minimum(anchors_max, box_max) - inter_len = numpy.maximum(int_xmax - int_xmin, 0.) - union_len = len_anchors - inter_len +box_max-box_min - jaccard = numpy.divide(inter_len, union_len) - return jaccard - -def ioa_with_anchors(anchors_min,anchors_max,box_min,box_max): - """Compute intersection between score a box and the anchors. - """ - len_anchors=anchors_max-anchors_min - int_xmin = numpy.maximum(anchors_min, box_min) - int_xmax = numpy.minimum(anchors_max, box_max) - inter_len = numpy.maximum(int_xmax - int_xmin, 0.) - scores = numpy.divide(inter_len, len_anchors) - return scores - -def generateProposals(video_list,video_dict): - tscale = 100 - tgap = 1./tscale - peak_thres= 0.5 - - for video_name in video_list: - tdf=pandas.read_csv("./output/TEM_results/"+video_name+".csv") - start_scores=tdf.start.values[:] - end_scores=tdf.end.values[:] - - max_start = max(start_scores) - max_end = max(end_scores) - - start_bins=numpy.zeros(len(start_scores)) - start_bins[[0,-1]]=1 - for idx in range(1,tscale-1): - if start_scores[idx]>start_scores[idx+1] and start_scores[idx]>start_scores[idx-1]: - start_bins[idx]=1 - elif start_scores[idx]>(peak_thres*max_start): - start_bins[idx]=1 - - end_bins=numpy.zeros(len(end_scores)) - end_bins[[0,-1]]=1 - for idx in range(1,tscale-1): - if end_scores[idx]>end_scores[idx+1] and end_scores[idx]>end_scores[idx-1]: - end_bins[idx]=1 - elif end_scores[idx]>(peak_thres*max_end): - end_bins[idx]=1 - - xmin_list=[] - xmin_score_list=[] - xmax_list=[] - xmax_score_list=[] - for j in range(tscale): - if start_bins[j]==1: - xmin_list.append(tgap/2+tgap*j) - xmin_score_list.append(start_scores[j]) - if end_bins[j]==1: - xmax_list.append(tgap/2+tgap*j) - xmax_score_list.append(end_scores[j]) - - new_props=[] - for ii in range(len(xmax_list)): - tmp_xmax=xmax_list[ii] - tmp_xmax_score=xmax_score_list[ii] - - for ij in range(len(xmin_list)): - tmp_xmin=xmin_list[ij] - tmp_xmin_score=xmin_score_list[ij] - if tmp_xmin>=tmp_xmax: - break - new_props.append([tmp_xmin,tmp_xmax,tmp_xmin_score,tmp_xmax_score]) - new_props=numpy.stack(new_props) - - col_name=["xmin","xmax","xmin_score","xmax_score"] - new_df=pandas.DataFrame(new_props,columns=col_name) - new_df["score"]=new_df.xmin_score*new_df.xmax_score - - new_df=new_df.sort_values(by="score",ascending=False) - - video_info=video_dict[video_name] - video_frame=video_info['duration_frame'] - video_second=video_info['duration_second'] - feature_frame=video_info['feature_frame'] - corrected_second=float(feature_frame)/video_frame*video_second - - try: - gt_xmins=[] - gt_xmaxs=[] - for idx in range(len(video_info["annotations"])): - gt_xmins.append(video_info["annotations"][idx]["segment"][0]/corrected_second) - gt_xmaxs.append(video_info["annotations"][idx]["segment"][1]/corrected_second) - new_iou_list=[] - for j in range(len(new_df)): - tmp_new_iou=max(iou_with_anchors(new_df.xmin.values[j],new_df.xmax.values[j],gt_xmins,gt_xmaxs)) - new_iou_list.append(tmp_new_iou) - - new_ioa_list=[] - for j in range(len(new_df)): - tmp_new_ioa=max(ioa_with_anchors(new_df.xmin.values[j],new_df.xmax.values[j],gt_xmins,gt_xmaxs)) - new_ioa_list.append(tmp_new_ioa) - new_df["match_iou"]=new_iou_list - new_df["match_ioa"]=new_ioa_list - except: - pass - new_df.to_csv("./output/PGM_proposals/"+video_name+".csv",index=False) - - -def getDatasetDict(): - df=pandas.read_csv("./BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/video_info_new.csv") - json_data= load_json("./BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/anet_anno_action.json") - database=json_data - video_dict = {} - for i in range(len(df)): - video_name=df.video.values[i] - video_info=database[video_name] - video_new_info={} - video_new_info['duration_frame']=video_info['duration_frame'] - video_new_info['duration_second']=video_info['duration_second'] - video_new_info["feature_frame"]=video_info['feature_frame'] - video_new_info['annotations']=video_info['annotations'] - video_new_info['subset'] = df.subset.values[i] - video_dict[video_name]=video_new_info - return video_dict - -def generateFeature(video_list,video_dict): - - num_sample_start=8 - num_sample_end=8 - num_sample_action=16 - num_sample_interpld = 3 - - for video_name in video_list: - adf=pandas.read_csv("./output/TEM_results/"+video_name+".csv") - score_action=adf.action.values[:] - seg_xmins = adf.xmin.values[:] - seg_xmaxs = adf.xmax.values[:] - video_scale = len(adf) - video_gap = seg_xmaxs[0] - seg_xmins[0] - video_extend = video_scale / 4 + 10 - pdf=pandas.read_csv("./output/PGM_proposals/"+video_name+".csv") - video_subset = video_dict[video_name]['subset'] - if video_subset == "training": - pdf=pdf[:500] - else: - pdf=pdf[:1000] - tmp_zeros=numpy.zeros([int(video_extend)]) - score_action=numpy.concatenate((tmp_zeros,score_action,tmp_zeros)) - tmp_cell = video_gap - #print('video_extend:{}'.format(video_extend)) - tmp_x = [-tmp_cell/2-(video_extend-1-ii)*tmp_cell for ii in range(int(video_extend))] + \ - [tmp_cell/2+ii*tmp_cell for ii in range(int(video_scale))] + \ - [tmp_cell/2+seg_xmaxs[-1] +ii*tmp_cell for ii in range(int(video_extend))] - f_action=interp1d(tmp_x,score_action,axis=0) - feature_bsp=[] - - for idx in range(len(pdf)): - xmin=pdf.xmin.values[idx] - xmax=pdf.xmax.values[idx] - xlen=xmax-xmin - xmin_0=xmin-xlen * 0.2 - xmin_1=xmin+xlen * 0.2 - xmax_0=xmax-xlen * 0.2 - xmax_1=xmax+xlen * 0.2 - #start - plen_start= (xmin_1-xmin_0)/(num_sample_start-1) - plen_sample = plen_start / num_sample_interpld - tmp_x_new = [ xmin_0 - plen_start/2 + plen_sample * ii for ii in range(num_sample_start*num_sample_interpld +1 )] - tmp_y_new_start_action=f_action(tmp_x_new) - tmp_y_new_start = [numpy.mean(tmp_y_new_start_action[ii*num_sample_interpld:(ii+1)*num_sample_interpld+1]) for ii in range(num_sample_start) ] - #end - plen_end= (xmax_1-xmax_0)/(num_sample_end-1) - plen_sample = plen_end / num_sample_interpld - tmp_x_new = [ xmax_0 - plen_end/2 + plen_sample * ii for ii in range(num_sample_end*num_sample_interpld +1 )] - tmp_y_new_end_action=f_action(tmp_x_new) - tmp_y_new_end = [numpy.mean(tmp_y_new_end_action[ii*num_sample_interpld:(ii+1)*num_sample_interpld+1]) for ii in range(num_sample_end) ] - #action - plen_action= (xmax-xmin)/(num_sample_action-1) - plen_sample = plen_action / num_sample_interpld - tmp_x_new = [ xmin - plen_action/2 + plen_sample * ii for ii in range(num_sample_action*num_sample_interpld +1 )] - tmp_y_new_action=f_action(tmp_x_new) - tmp_y_new_action = [numpy.mean(tmp_y_new_action[ii*num_sample_interpld:(ii+1)*num_sample_interpld+1]) for ii in range(num_sample_action) ] - tmp_feature = numpy.concatenate([tmp_y_new_action,tmp_y_new_start,tmp_y_new_end]) - feature_bsp.append(tmp_feature) - feature_bsp = numpy.array(feature_bsp) - numpy.save("./output/PGM_feature/"+video_name,feature_bsp) - - - -def PGM_proposal_generation(): - video_dict= load_json("./BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/anet_anno_action.json") - video_list=video_dict.keys()#[:199] - video_list = list(video_list) - num_videos = len(video_list) - num_videos_per_thread = num_videos/8 - processes = [] - for tid in range(7): - tmp_video_list = video_list[int(tid*num_videos_per_thread):int((tid+1)*num_videos_per_thread)] - p = mp.Process(target = generateProposals,args =(tmp_video_list,video_dict,)) - p.start() - processes.append(p) - - tmp_video_list = video_list[int(7*num_videos_per_thread):] - p = mp.Process(target = generateProposals,args =(tmp_video_list,video_dict,)) - p.start() - processes.append(p) - - for p in processes: - p.join() -def PGM_feature_generation(): - video_dict=getDatasetDict() - video_list=video_dict.keys() - video_list = list(video_list) - num_videos = len(video_list) - num_videos_per_thread = num_videos/8 - processes = [] - for tid in range(7): - tmp_video_list = video_list[int(tid*num_videos_per_thread):int((tid+1)*num_videos_per_thread)] - p = mp.Process(target = generateFeature,args =(tmp_video_list,video_dict,)) - p.start() - processes.append(p) - - tmp_video_list = video_list[int(7*num_videos_per_thread):] - p = mp.Process(target = generateFeature,args =(tmp_video_list,video_dict,)) - p.start() - processes.append(p) - - for p in processes: - p.join() - -if __name__ == '__main__': - out_files = os.listdir(args.TEM_out_path) - if not os.path.exists("output/TEM_results"): - os.makedirs("output/TEM_results") - print("processing...") - for i in range(len(out_files)): - video_name = str(out_files[i]) - video_name = video_name[0:int(len(video_name)-6)] - video_data = np.fromfile(args.TEM_out_path+'/'+out_files[i],dtype=np.float32) - #print(video_data) - video_data = torch.tensor(video_data.reshape(1,3,100)) - #video_data.reshape(1,3,1000) - video_data = video_data.detach().cpu().numpy() - - anchor_xmin = np.fromfile(args.TEM_anchor_xmin_path+'/'+video_name+'.bin',dtype=np.float64) - anchor_xmax = np.fromfile(args.TEM_anchor_xmax_path+'/'+video_name+'.bin',dtype=np.float64) - - anchor_xmin = torch.tensor(anchor_xmin) - anchor_xmax = torch.tensor(anchor_xmax) - video_action = video_data[:,0,:] - video_start = video_data[:,1,:] - video_end = video_data[:,2,:] - - video_result = np.stack((video_action[0],video_start[0],video_end[0],anchor_xmin,anchor_xmax),axis=1) - - video_df = pd.DataFrame(video_result,columns=columns) - video_df.to_csv(args.result_path+"/"+video_name+".csv",index=False) - - if not os.path.exists("output/PGM_proposals"): - os.makedirs("output/PGM_proposals") - print("PGM: start generating proposals") - PGM_proposal_generation() - print("PGM: finish generate proposals") - - if not os.path.exists("output/PGM_feature"): - os.makedirs("output/PGM_feature") - print("PGM: start generating BSP feature") - PGM_feature_generation() - print("PGM: finish generate BSP feature") +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import torch +import hydra +import argparse +import os +import numpy as np +import pandas as pd +import numpy +import pandas +import torch.multiprocessing as mp +from scipy.interpolate import interp1d + + +parser = argparse.ArgumentParser(description='BSN') + +parser.add_argument('--result_path',default='./output/TEM_results', type=str, help='Dir to save txt results') +parser.add_argument('--TEM_out_path', default='./result/dumpOutput_device0', type=str, help='infer out path') +parser.add_argument('--TEM_anchor_xmin_path', default='./output/BSN-TEM-preprocess/anchor_min', type=str, help='infer info path') +parser.add_argument('--TEM_anchor_xmax_path', default='./output/BSN-TEM-preprocess/anchor_max', type=str, help='infer info path') + +args = parser.parse_args() + +columns=["action","start","end","xmin","xmax"] + +def load_json(file): + with open(file) as json_file: + data = json.load(json_file) + return data + +def iou_with_anchors(anchors_min,anchors_max,box_min,box_max): + """Compute jaccard score between a box and the anchors. + """ + len_anchors=anchors_max-anchors_min + int_xmin = numpy.maximum(anchors_min, box_min) + int_xmax = numpy.minimum(anchors_max, box_max) + inter_len = numpy.maximum(int_xmax - int_xmin, 0.) + union_len = len_anchors - inter_len +box_max-box_min + jaccard = numpy.divide(inter_len, union_len) + return jaccard + +def ioa_with_anchors(anchors_min,anchors_max,box_min,box_max): + """Compute intersection between score a box and the anchors. + """ + len_anchors=anchors_max-anchors_min + int_xmin = numpy.maximum(anchors_min, box_min) + int_xmax = numpy.minimum(anchors_max, box_max) + inter_len = numpy.maximum(int_xmax - int_xmin, 0.) + scores = numpy.divide(inter_len, len_anchors) + return scores + +def generateProposals(video_list,video_dict): + tscale = 100 + tgap = 1./tscale + peak_thres= 0.5 + + for video_name in video_list: + tdf=pandas.read_csv("./output/TEM_results/"+video_name+".csv") + start_scores=tdf.start.values[:] + end_scores=tdf.end.values[:] + + max_start = max(start_scores) + max_end = max(end_scores) + + start_bins=numpy.zeros(len(start_scores)) + start_bins[[0,-1]]=1 + for idx in range(1,tscale-1): + if start_scores[idx]>start_scores[idx+1] and start_scores[idx]>start_scores[idx-1]: + start_bins[idx]=1 + elif start_scores[idx]>(peak_thres*max_start): + start_bins[idx]=1 + + end_bins=numpy.zeros(len(end_scores)) + end_bins[[0,-1]]=1 + for idx in range(1,tscale-1): + if end_scores[idx]>end_scores[idx+1] and end_scores[idx]>end_scores[idx-1]: + end_bins[idx]=1 + elif end_scores[idx]>(peak_thres*max_end): + end_bins[idx]=1 + + xmin_list=[] + xmin_score_list=[] + xmax_list=[] + xmax_score_list=[] + for j in range(tscale): + if start_bins[j]==1: + xmin_list.append(tgap/2+tgap*j) + xmin_score_list.append(start_scores[j]) + if end_bins[j]==1: + xmax_list.append(tgap/2+tgap*j) + xmax_score_list.append(end_scores[j]) + + new_props=[] + for ii in range(len(xmax_list)): + tmp_xmax=xmax_list[ii] + tmp_xmax_score=xmax_score_list[ii] + + for ij in range(len(xmin_list)): + tmp_xmin=xmin_list[ij] + tmp_xmin_score=xmin_score_list[ij] + if tmp_xmin>=tmp_xmax: + break + new_props.append([tmp_xmin,tmp_xmax,tmp_xmin_score,tmp_xmax_score]) + new_props=numpy.stack(new_props) + + col_name=["xmin","xmax","xmin_score","xmax_score"] + new_df=pandas.DataFrame(new_props,columns=col_name) + new_df["score"]=new_df.xmin_score*new_df.xmax_score + + new_df=new_df.sort_values(by="score",ascending=False) + + video_info=video_dict[video_name] + video_frame=video_info['duration_frame'] + video_second=video_info['duration_second'] + feature_frame=video_info['feature_frame'] + corrected_second=float(feature_frame)/video_frame*video_second + + try: + gt_xmins=[] + gt_xmaxs=[] + for idx in range(len(video_info["annotations"])): + gt_xmins.append(video_info["annotations"][idx]["segment"][0]/corrected_second) + gt_xmaxs.append(video_info["annotations"][idx]["segment"][1]/corrected_second) + new_iou_list=[] + for j in range(len(new_df)): + tmp_new_iou=max(iou_with_anchors(new_df.xmin.values[j],new_df.xmax.values[j],gt_xmins,gt_xmaxs)) + new_iou_list.append(tmp_new_iou) + + new_ioa_list=[] + for j in range(len(new_df)): + tmp_new_ioa=max(ioa_with_anchors(new_df.xmin.values[j],new_df.xmax.values[j],gt_xmins,gt_xmaxs)) + new_ioa_list.append(tmp_new_ioa) + new_df["match_iou"]=new_iou_list + new_df["match_ioa"]=new_ioa_list + except: + pass + new_df.to_csv("./output/PGM_proposals/"+video_name+".csv",index=False) + + +def getDatasetDict(): + df=pandas.read_csv("./BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/video_info_new.csv") + json_data= load_json("./BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/anet_anno_action.json") + database=json_data + video_dict = {} + for i in range(len(df)): + video_name=df.video.values[i] + video_info=database[video_name] + video_new_info={} + video_new_info['duration_frame']=video_info['duration_frame'] + video_new_info['duration_second']=video_info['duration_second'] + video_new_info["feature_frame"]=video_info['feature_frame'] + video_new_info['annotations']=video_info['annotations'] + video_new_info['subset'] = df.subset.values[i] + video_dict[video_name]=video_new_info + return video_dict + +def generateFeature(video_list,video_dict): + + num_sample_start=8 + num_sample_end=8 + num_sample_action=16 + num_sample_interpld = 3 + + for video_name in video_list: + adf=pandas.read_csv("./output/TEM_results/"+video_name+".csv") + score_action=adf.action.values[:] + seg_xmins = adf.xmin.values[:] + seg_xmaxs = adf.xmax.values[:] + video_scale = len(adf) + video_gap = seg_xmaxs[0] - seg_xmins[0] + video_extend = video_scale / 4 + 10 + pdf=pandas.read_csv("./output/PGM_proposals/"+video_name+".csv") + video_subset = video_dict[video_name]['subset'] + if video_subset == "training": + pdf=pdf[:500] + else: + pdf=pdf[:1000] + tmp_zeros=numpy.zeros([int(video_extend)]) + score_action=numpy.concatenate((tmp_zeros,score_action,tmp_zeros)) + tmp_cell = video_gap + #print('video_extend:{}'.format(video_extend)) + tmp_x = [-tmp_cell/2-(video_extend-1-ii)*tmp_cell for ii in range(int(video_extend))] + \ + [tmp_cell/2+ii*tmp_cell for ii in range(int(video_scale))] + \ + [tmp_cell/2+seg_xmaxs[-1] +ii*tmp_cell for ii in range(int(video_extend))] + f_action=interp1d(tmp_x,score_action,axis=0) + feature_bsp=[] + + for idx in range(len(pdf)): + xmin=pdf.xmin.values[idx] + xmax=pdf.xmax.values[idx] + xlen=xmax-xmin + xmin_0=xmin-xlen * 0.2 + xmin_1=xmin+xlen * 0.2 + xmax_0=xmax-xlen * 0.2 + xmax_1=xmax+xlen * 0.2 + #start + plen_start= (xmin_1-xmin_0)/(num_sample_start-1) + plen_sample = plen_start / num_sample_interpld + tmp_x_new = [ xmin_0 - plen_start/2 + plen_sample * ii for ii in range(num_sample_start*num_sample_interpld +1 )] + tmp_y_new_start_action=f_action(tmp_x_new) + tmp_y_new_start = [numpy.mean(tmp_y_new_start_action[ii*num_sample_interpld:(ii+1)*num_sample_interpld+1]) for ii in range(num_sample_start) ] + #end + plen_end= (xmax_1-xmax_0)/(num_sample_end-1) + plen_sample = plen_end / num_sample_interpld + tmp_x_new = [ xmax_0 - plen_end/2 + plen_sample * ii for ii in range(num_sample_end*num_sample_interpld +1 )] + tmp_y_new_end_action=f_action(tmp_x_new) + tmp_y_new_end = [numpy.mean(tmp_y_new_end_action[ii*num_sample_interpld:(ii+1)*num_sample_interpld+1]) for ii in range(num_sample_end) ] + #action + plen_action= (xmax-xmin)/(num_sample_action-1) + plen_sample = plen_action / num_sample_interpld + tmp_x_new = [ xmin - plen_action/2 + plen_sample * ii for ii in range(num_sample_action*num_sample_interpld +1 )] + tmp_y_new_action=f_action(tmp_x_new) + tmp_y_new_action = [numpy.mean(tmp_y_new_action[ii*num_sample_interpld:(ii+1)*num_sample_interpld+1]) for ii in range(num_sample_action) ] + tmp_feature = numpy.concatenate([tmp_y_new_action,tmp_y_new_start,tmp_y_new_end]) + feature_bsp.append(tmp_feature) + feature_bsp = numpy.array(feature_bsp) + numpy.save("./output/PGM_feature/"+video_name,feature_bsp) + + + +def PGM_proposal_generation(): + video_dict= load_json("./BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/anet_anno_action.json") + video_list=video_dict.keys()#[:199] + video_list = list(video_list) + num_videos = len(video_list) + num_videos_per_thread = num_videos/8 + processes = [] + for tid in range(7): + tmp_video_list = video_list[int(tid*num_videos_per_thread):int((tid+1)*num_videos_per_thread)] + p = mp.Process(target = generateProposals,args =(tmp_video_list,video_dict,)) + p.start() + processes.append(p) + + tmp_video_list = video_list[int(7*num_videos_per_thread):] + p = mp.Process(target = generateProposals,args =(tmp_video_list,video_dict,)) + p.start() + processes.append(p) + + for p in processes: + p.join() +def PGM_feature_generation(): + video_dict=getDatasetDict() + video_list=video_dict.keys() + video_list = list(video_list) + num_videos = len(video_list) + num_videos_per_thread = num_videos/8 + processes = [] + for tid in range(7): + tmp_video_list = video_list[int(tid*num_videos_per_thread):int((tid+1)*num_videos_per_thread)] + p = mp.Process(target = generateFeature,args =(tmp_video_list,video_dict,)) + p.start() + processes.append(p) + + tmp_video_list = video_list[int(7*num_videos_per_thread):] + p = mp.Process(target = generateFeature,args =(tmp_video_list,video_dict,)) + p.start() + processes.append(p) + + for p in processes: + p.join() + +if __name__ == '__main__': + out_files = os.listdir(args.TEM_out_path) + if not os.path.exists("output/TEM_results"): + os.makedirs("output/TEM_results") + print("processing...") + for i in range(len(out_files)): + video_name = str(out_files[i]) + video_name = video_name[0:int(len(video_name)-6)] + video_data = np.fromfile(args.TEM_out_path+'/'+out_files[i],dtype=np.float32) + #print(video_data) + video_data = torch.tensor(video_data.reshape(1,3,100)) + #video_data.reshape(1,3,1000) + video_data = video_data.detach().cpu().numpy() + + anchor_xmin = np.fromfile(args.TEM_anchor_xmin_path+'/'+video_name+'.bin',dtype=np.float64) + anchor_xmax = np.fromfile(args.TEM_anchor_xmax_path+'/'+video_name+'.bin',dtype=np.float64) + + anchor_xmin = torch.tensor(anchor_xmin) + anchor_xmax = torch.tensor(anchor_xmax) + video_action = video_data[:,0,:] + video_start = video_data[:,1,:] + video_end = video_data[:,2,:] + + video_result = np.stack((video_action[0],video_start[0],video_end[0],anchor_xmin,anchor_xmax),axis=1) + + video_df = pd.DataFrame(video_result,columns=columns) + video_df.to_csv(args.result_path+"/"+video_name+".csv",index=False) + + if not os.path.exists("output/PGM_proposals"): + os.makedirs("output/PGM_proposals") + print("PGM: start generating proposals") + PGM_proposal_generation() + print("PGM: finish generate proposals") + + if not os.path.exists("output/PGM_feature"): + os.makedirs("output/PGM_feature") + print("PGM: start generating BSP feature") + PGM_feature_generation() + print("PGM: finish generate BSP feature") \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/BSN/BSN_tem_preprocess.py b/ACL_PyTorch/contrib/cv/detection/BSN/BSN_tem_preprocess.py index b6639216bfd21201bdb0ceb2cef1d6acdd68467b..328411466dbbb98beebeea4fa626228359f2fb76 100644 --- a/ACL_PyTorch/contrib/cv/detection/BSN/BSN_tem_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/BSN/BSN_tem_preprocess.py @@ -1,75 +1,75 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import pandas as pd -import pandas -import numpy -import json -import torch.utils.data as data -import os -import torch -import sys - -def load_json(file): - with open(file) as json_file: - data = json.load(json_file) - return data -if __name__ == '__main__': - if not os.path.exists("output/BSN-TEM-preprocess/anchor_min"): - os.makedirs("output/BSN-TEM-preprocess/anchor_min") - if not os.path.exists("output/BSN-TEM-preprocess/anchor_max"): - os.makedirs("output/BSN-TEM-preprocess/anchor_max") - if not os.path.exists("output/BSN-TEM-preprocess/feature"): - os.makedirs("output/BSN-TEM-preprocess/feature") - feature_path = "BSN-boundary-sensitive-network.pytorch/data/activitynet_feature_cuhk/" - video_info_path = "BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/video_info_new.csv" - video_anno_path = "BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/anet_anno_action.json" - temporal_scale = 100 - temporal_gap = 1. / temporal_scale - subset = "full" - boundary_ratio = 0.1 - anno_df = pd.read_csv(video_info_path) - anno_database= load_json(video_anno_path) - video_dict = {} - for i in range(len(anno_df)): - video_name=anno_df.video.values[i] - video_info=anno_database[video_name] - video_subset=anno_df.subset.values[i] - if subset == "full": - video_dict[video_name] = video_info - if subset in video_subset: - video_dict[video_name] = video_info - video_list = list(video_dict.keys()) - print("%s subset video numbers: %d" %(subset,len(video_list))) - - print("processing...") - for i in range(len(video_list)): - video_name=video_list[i] - anchor_xmin=[temporal_gap*i for i in range(temporal_scale)] - anchor_xmax=[temporal_gap*i for i in range(1,temporal_scale+1)] - video_df=pd.read_csv(feature_path+ "csv_mean_"+str(temporal_scale)+"/"+video_name+".csv") - video_data = video_df.values[:,:] - video_data = torch.Tensor(video_data) - video_data = torch.transpose(video_data,0,1) - video_data.float() - video_data = np.array(video_data).astype(np.float32) - video_data.tofile(os.path.join('./output/BSN-TEM-preprocess/feature/', video_name + ".bin")) - - anchor_xmin = np.array(anchor_xmin) - anchor_xmax = np.array(anchor_xmax) - anchor_xmin.tofile(os.path.join('./output/BSN-TEM-preprocess/anchor_min/', video_name + ".bin")) - anchor_xmax.tofile(os.path.join('./output/BSN-TEM-preprocess/anchor_max/', video_name + ".bin")) - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pandas as pd +import pandas +import numpy +import json +import torch.utils.data as data +import os +import torch +import sys + +def load_json(file): + with open(file) as json_file: + data = json.load(json_file) + return data +if __name__ == '__main__': + if not os.path.exists("output/BSN-TEM-preprocess/anchor_min"): + os.makedirs("output/BSN-TEM-preprocess/anchor_min") + if not os.path.exists("output/BSN-TEM-preprocess/anchor_max"): + os.makedirs("output/BSN-TEM-preprocess/anchor_max") + if not os.path.exists("output/BSN-TEM-preprocess/feature"): + os.makedirs("output/BSN-TEM-preprocess/feature") + feature_path = "BSN-boundary-sensitive-network.pytorch/data/activitynet_feature_cuhk/" + video_info_path = "BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/video_info_new.csv" + video_anno_path = "BSN-boundary-sensitive-network.pytorch/data/activitynet_annotations/anet_anno_action.json" + temporal_scale = 100 + temporal_gap = 1. / temporal_scale + subset = "full" + boundary_ratio = 0.1 + anno_df = pd.read_csv(video_info_path) + anno_database= load_json(video_anno_path) + video_dict = {} + for i in range(len(anno_df)): + video_name=anno_df.video.values[i] + video_info=anno_database[video_name] + video_subset=anno_df.subset.values[i] + if subset == "full": + video_dict[video_name] = video_info + if subset in video_subset: + video_dict[video_name] = video_info + video_list = list(video_dict.keys()) + print("%s subset video numbers: %d" %(subset,len(video_list))) + + print("processing...") + for i in range(len(video_list)): + video_name=video_list[i] + anchor_xmin=[temporal_gap*i for i in range(temporal_scale)] + anchor_xmax=[temporal_gap*i for i in range(1,temporal_scale+1)] + video_df=pd.read_csv(feature_path+ "csv_mean_"+str(temporal_scale)+"/"+video_name+".csv") + video_data = video_df.values[:,:] + video_data = torch.Tensor(video_data) + video_data = torch.transpose(video_data,0,1) + video_data.float() + video_data = np.array(video_data).astype(np.float32) + video_data.tofile(os.path.join('./output/BSN-TEM-preprocess/feature/', video_name + ".bin")) + + anchor_xmin = np.array(anchor_xmin) + anchor_xmax = np.array(anchor_xmax) + anchor_xmin.tofile(os.path.join('./output/BSN-TEM-preprocess/anchor_min/', video_name + ".bin")) + anchor_xmax.tofile(os.path.join('./output/BSN-TEM-preprocess/anchor_max/', video_name + ".bin")) + + \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/BSN/BSN_tem_pth2onnx.py b/ACL_PyTorch/contrib/cv/detection/BSN/BSN_tem_pth2onnx.py index fbb5247a8a20a171d6af0f5cc22990607014b313..3b9bd12c35b43238e533b9725d89fca02e3dabd0 100644 --- a/ACL_PyTorch/contrib/cv/detection/BSN/BSN_tem_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/BSN/BSN_tem_pth2onnx.py @@ -1,64 +1,64 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import argparse -import torch -import sys -sys.path.append(r"./BSN-boundary-sensitive-network.pytorch") - -from models import TEM - -parser = argparse.ArgumentParser( - description='tem2onnx') -parser.add_argument('--pth_path', - default='./tem_best.pth.tar', - help='pth path') -parser.add_argument('--onnx_path', - default='./BSN_tem.onnx', - help='onnx path') -parser.add_argument( - '--tem_feat_dim', - type=int, - default=400) -parser.add_argument( - '--tem_hidden_dim', - type=int, - default=512) -parser.add_argument( - '--tem_batch_size', - type=int, - default=16) -parser.add_argument( - '--temporal_scale', - type=int, - default=100) -opt = parser.parse_args() - -def pth_onnx(opt): - - - opt = vars(opt) - pth_path = opt['pth_path'] - onnx_path = opt['onnx_path'] - model = TEM(opt) - checkpoint = torch.load(pth_path,map_location='cpu') - base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} - model.load_state_dict(base_dict) - input_names=["video"] - output_names = ["output"] - model.eval() - dummy_input = torch.randn(1,400,100) - torch.onnx.export(model,dummy_input,onnx_path,input_names = input_names,output_names=output_names,verbose=True,opset_version=11) -if __name__ =="__main__": - opt = parser.parse_args() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import torch +import sys +sys.path.append(r"./BSN-boundary-sensitive-network.pytorch") + +from models import TEM + +parser = argparse.ArgumentParser( + description='tem2onnx') +parser.add_argument('--pth_path', + default='./tem_best.pth.tar', + help='pth path') +parser.add_argument('--onnx_path', + default='./BSN_tem.onnx', + help='onnx path') +parser.add_argument( + '--tem_feat_dim', + type=int, + default=400) +parser.add_argument( + '--tem_hidden_dim', + type=int, + default=512) +parser.add_argument( + '--tem_batch_size', + type=int, + default=16) +parser.add_argument( + '--temporal_scale', + type=int, + default=100) +opt = parser.parse_args() + +def pth_onnx(opt): + + + opt = vars(opt) + pth_path = opt['pth_path'] + onnx_path = opt['onnx_path'] + model = TEM(opt) + checkpoint = torch.load(pth_path,map_location='cpu') + base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} + model.load_state_dict(base_dict) + input_names=["video"] + output_names = ["output"] + model.eval() + dummy_input = torch.randn(1,400,100) + torch.onnx.export(model,dummy_input,onnx_path,input_names = input_names,output_names=output_names,verbose=True,opset_version=11) +if __name__ =="__main__": + opt = parser.parse_args() pth_onnx(opt) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/BSN/README.md b/ACL_PyTorch/contrib/cv/detection/BSN/README.md index 66ee1c7705d2d51a3542015f126bd6ed6f982786..7892ad2b7e86fe2c48285ffaa995213f04bd32da 100644 --- a/ACL_PyTorch/contrib/cv/detection/BSN/README.md +++ b/ACL_PyTorch/contrib/cv/detection/BSN/README.md @@ -1,450 +1,450 @@ -# BSN推理说明 - - - -## 1、 环境说明 - -1、安装必要的依赖 - -``` -apex 0.1+ascend.20210930 -certifi 2021.10.8 -cycler 0.11.0 -decorator 5.1.0 -docutils 0.18 -flatbuffers 2.0 -future 0.18.2 -Geohash 1.0 -Hydra 2.5 -kiwisolver 1.3.2 -matplotlib 3.4.3 -mpmath 1.2.1 -numpy 1.21.0 -onnx 1.10.2 -onnxruntime 1.9.0 -pandas 1.3.4 -Pillow 8.4.0 -pip 21.3.1 -protobuf 3.19.1 -pyparsing 3.0.6 -python-dateutil 2.8.2 -pytz 2021.3 -scipy 1.7.2 -setuptools 58.0.4 -six 1.16.0 -sympy 1.9 -torch 1.5.0+ascend.post3.20210930 -typing-extensions 3.10.0.2 -wheel 0.37.0 -``` - -2、获取开源代码 - -直接从githup上git clone 可能无法clone成功,建议先把githup上的仓先导入到git,再clone - -``` -git clone https://github.com/wzmsltw/BSN-boundary-sensitive-network.pytorch -``` - - - -3、获取onnx_tools,优化TEM的onnx模型 - -``` -git clone https://gitee.com/zheng-wengang1/onnx_tools -``` - - - -4、下载视频特征数据集 - -请参考源代码仓 - -5、代码目录 - -``` -BSN #模型名称命名的文件夹 -├── BSN-boundary-sensitive-network.pytorch #BSN开源代码 - └── data - ├── activitynet_feature_cuhk - ├── csv_mean_100 #下载数据特征集 -├── env.sh #环境变量 -├── BSN_tem_pth2onnx.py #tem模型转换脚本 -├── BSN_pem_pth2onnx.py #pem模型转换脚本 -├── BSN_tem_preprocess.py #tem模型前处理脚本 -├── BSN_pem_preprocess.py #pem模型前处理脚本 -├── gen_dataset_info.py #生成数据集info文件 -├── BSN_tem_postprocess.py #tem模型后处理脚本 -├── BSN_pem_postprocess.py #pem模型后处理脚本 -├── BSN_eval.py #测试精度脚本 -├── TEM_onnx_conv1d2conv2d.py #tem模型onnx,conv1d算子转conv2d算子优化脚本 -├── requirements.txt #模型离线推理用到的所有且必要的依赖库 -├── README.md #模型离线推理说明README -├── modelzoo_level.txt #模型精度性能结果 -└── test - ├── pth2om.sh - ├── eval_acc_perf.sh - ├── parse.py -``` - - - -## 2、离线推理 - -1、pth权重转onnx - - - -TEM的pth权重转onnx,参数pth_path为TEM模型权重文件所在位置,onnx_path为输出的onnx模型位置 - -``` -python BSN_tem_pth2onnx.py --pth_path './tem_best.pth.tar' --onnx_path './BSN_tem.onnx' -``` - -tem-onnx模型优化,第一个参数为原本onnx模型位置,第二个参数为输出onnx模型 - -``` -python TEM_onnx_conv1d2conv2d.py './BSN_tem.onnx' './BSN_tem1.onnx' -``` - -PEM的pth权重转onnx,参数pth_path为PEM模型权重文件所在位置,onnx_path为输出的onnx模型位置 - -``` -python BSN_pem_pth2onnx.py --pth_path './pem_best.pth.tar' --onnx_path './BSN_pem.onnx' -``` - - - -2、onnx模型转om - -使用atc工具将onnx模型转为om模型,注意应当先设置环境变量 - -``` -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest - -atc --framework=5 --model=BSN_tem1.onnx --output=BSN_tem_bs1 --input_format=ND --input_shape="video:1,400,100" --log=debug --soc_version=Ascend310 - -atc --framework=5 --model=BSN_pem.onnx --output=BSN_pem_bs1 --input_format=ND --input_shape="video_feature:1,1000,32" --log=debug --soc_version=Ascend310 -``` - - - -3、TEM推理 - -运行预处理脚本,运行前确保你已经clone了开源代码,并下载数据特征集 - -``` -python BSN_tem_preprocess.py -``` - -获取处理数据集信息,第一个参数为模型类型,第二个参数为特征文件位置,第三个参数为输出文件名,第四、五个参数为特征形状(400*100) - -``` -python gen_dataset_info.py tem /home/wch/BSN/BSN-TEM-preprocess/feature TEM-video-feature 400 100 -``` - -使用benchmark工具进行推理,生成的数据文件会放在当前路径的result/dumpOutput_device0目录下 - -``` -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=BSN_tem_bs1.om -input_text_path=./TEM-video-feature.info -input_width=400 -input_height=100 -output_binary=True -useDvpp=False -``` - -使用BSN_tem_postprocess.py进行tem后处理(tem的后处理与pem的前处理有关请按照顺序执行) - -``` -python BSN_tem_postprocess.py --TEM_out_path ./result/dumpOutput_device0 -``` - - - -4、PEM推理 - -运行pem预处理脚本(pem的前处理与tem的后处理有关请按照顺序执行) - -``` -python BSN_pem_preprocess.py -``` - -获取处理数据集信息,第一个参数为模型类型,第二个参数为特征文件位置,第三个参数为输出文件名,第四、五个参数为特征形状(1000*32) - -``` -python get_info.py pem output/BSN-PEM-preprocess/feature PEM-video-feature 1000 32 -``` - -使用benchmark工具进行推理,生成的数据文件会放在当前路径的result/dumpOutput_device1目录下 - -``` -./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=1 -om_path=BSN_pem_bs1.om -input_text_path=./PEM-video-feature.info -input_width=1000 -input_height=32 -output_binary=True -useDvpp=False -``` - -运行后处理脚本,会在output目录下生成结果文件 - -``` -python BSN_pem_postprocess.py --PEM_out_path result/dumpOutput_device1 -``` - - - -5、精度测试 - -原本代码仓的代码是python2的代码,在在使用前需要转为python3 - -``` -2to3 -w ./BSN-boundary-sensitive-network.pytorch/Evaluation/eval_proposal.py - -``` - -精度测试 - -``` -python BSN_eval.py -``` - - - -6、整体测试 - -运行脚本,直接转om模型 - -``` -bash ./test/pth2om.sh -``` - -运行脚本,进行离线推理,运行脚本前,请确保已经将源代码中使用的文件,转为python3 - -``` -bash ./test/eval_acc_perf.sh -``` - - - -## 3 精度性能对比 - -### 1、精度对比 - -​ pth精度 - -``` -Model 论文 开源pth文件 离线推理精度 -BSN AR100:72.42 74.34 74.34 -``` - -### 2、性能对比 - -#### 2.1 npu性能数据 - -tem bs1性能数据 - -``` ------------------Performance Summary------------------ -[e2e] throughputRate: 180.879, latency: 106303 -[data read] throughputRate: 182.039, moduleLatency: 5.49332 -[preprocess] throughputRate: 181.865, moduleLatency: 5.49859 -[inference] throughputRate: 182, Interface throughputRate: 3275.55, moduleLatency: 0.561457 -[postprocess] throughputRate: 182.009, moduleLatency: 5.49425 - ------------------------------------------------------------ -``` - -pem bs1性能数据 - -``` ------------------Performance Summary------------------ -[e2e] throughputRate: 616.804, latency: 7665.32 -[data read] throughputRate: 1840.06, moduleLatency: 0.54346 -[preprocess] throughputRate: 1817.62, moduleLatency: 0.550169 -[inference] throughputRate: 1839.62, Interface throughputRate: 3874.46, moduleLatency: 0.469866 -[postprocess] throughputRate: 1839.86, moduleLatency: 0.543521 - ------------------------------------------------------------ -``` - -tem单卡吞吐率:3275.55x4=13102.2 - -pem单卡吞吐率:3874.46x4=15497.84 - -BSN整体吞吐率为:1/(1/13102.2+1/15497.84)=7099.87 - - - -tem bs16性能数据 - -``` ------------------Performance Summary------------------ -[e2e] throughputRate: 143.161, latency: 134310 -[data read] throughputRate: 144.544, moduleLatency: 6.91832 -[preprocess] throughputRate: 144.393, moduleLatency: 6.92554 -[inference] throughputRate: 144.476, Interface throughputRate: 12277.9, moduleLatency: 0.570148 -[postprocess] throughputRate: 9.03906, moduleLatency: 110.631 - ------------------------------------------------------------ -``` - -pem bs16性能数据 - -``` ------------------Performance Summary------------------ -[e2e] throughputRate: 141.751, latency: 33354.2 -[data read] throughputRate: 145.216, moduleLatency: 6.88627 -[preprocess] throughputRate: 144.936, moduleLatency: 6.89961 -[inference] throughputRate: 145.023, Interface throughputRate: 18564.9, moduleLatency: 0.483157 -[postprocess] throughputRate: 9.10977, moduleLatency: 109.772 - ------------------------------------------------------------ -``` - -tem单卡吞吐率:12277.9x4=49111.6 - -pem单卡吞吐率:18564.9x4=74259.6 - -BSN整体吞吐率为:1/(1/49111.6+1/74259.6)=29560.95 - -#### 2.2 T4性能数据 - -在装有T4卡的服务器上测试gpu性能,测试过程请确保卡没有运行其他任务,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2 - -batch1性能: - -tem: - -``` -trtexec --onnx=BSN_tem.onnx --fp16 --shapes=video:1*400*100 --threads -``` - - - -``` -[11/23/2021-06:45:38] [I] GPU Compute -[11/23/2021-06:45:38] [I] min: 0.045166 ms -[11/23/2021-06:45:38] [I] max: 2.00708 ms -[11/23/2021-06:45:38] [I] mean: 0.0565804 ms -[11/23/2021-06:45:38] [I] median: 0.0568848 ms -[11/23/2021-06:45:38] [I] percentile: 0.0620117 ms at 99% -[11/23/2021-06:45:38] [I] total compute time: 2.47115 s -``` - -pem: - -``` -trtexec --onnx=BSN_pem.onnx --fp16 --shapes=video:1*1000*32 --threads -``` - - - -``` -[11/19/2021-06:40:06] [I] GPU Compute -[11/19/2021-06:40:06] [I] min: 0.0185547 ms -[11/19/2021-06:40:06] [I] max: 1.26123 ms -[11/19/2021-06:40:06] [I] mean: 0.0205523 ms -[11/19/2021-06:40:06] [I] median: 0.0201416 ms -[11/19/2021-06:40:06] [I] percentile: 0.0458527 ms at 99% -[11/19/2021-06:40:06] [I] total compute time: 0.793032 s -``` - - - -tem单卡吞吐率:1000/0.215458=17674 - -pem单卡吞吐率:1000/0.0205523=48656 - -BSN单卡吞吐率:1000/(0.215458+0.0205523)=12965 - - - - - -batch16性能: - -tem: - -``` -trtexec --onnx=BSN_tem.onnx --fp16 --shapes=video:16*400*100 --threads -``` - - - -``` -[11/19/2021-06:37:12] [I] GPU Compute -[11/19/2021-06:37:12] [I] min: 0.182129 ms -[11/19/2021-06:37:12] [I] max: 0.252548 ms -[11/19/2021-06:37:12] [I] mean: 0.219561 ms -[11/19/2021-06:37:12] [I] median: 0.218262 ms -[11/19/2021-06:37:12] [I] percentile: 0.245422 ms at 99% -[11/19/2021-06:37:12] [I] total compute time: 1.5714 s -``` - -pem: - -``` -trtexec --onnx=BSN_pem.onnx --fp16 --shapes=video:16*1000*32 --threads -``` - - - -``` -[11/23/2021-06:51:29] [I] GPU Compute -[11/23/2021-06:51:29] [I] min: 0.21167 ms -[11/23/2021-06:51:29] [I] max: 2.40039 ms -[11/23/2021-06:51:29] [I] mean: 0.24159 ms -[11/23/2021-06:51:29] [I] median: 0.240479 ms -[11/23/2021-06:51:29] [I] percentile: 0.25769 ms at 99% -[11/23/2021-06:51:29] [I] total compute time: 2.08734 s -``` - -tem单卡吞吐率:1000/(0.219561/16)=72872 - -pem单卡吞吐率:1000/(0.24159/16)=66228 - -BSN单卡吞吐率:1000/((0.219561+0.0210533)/16)=34696 - - - -#### 2.3 性能对比 - -batch1 : - -​ TEM - -​ 310:13102 - -​ T4:17674 - -​ PEM: - -​ 310:15498 - -​ T4:48656 - -​ BSN: - -​ 7099.87<12965 - -​ 7099.87/12965=0.548 - -batch16: - -​ TEM: - -​ 310: 49111.6 - -​ t4: 72872 - -​ PEM: - -​ 310: 74259.6 - -​ T4: 66228 - -​ BSN: - -​ 29560.95<34696 - -​ 29560.95/34696=0.85 - -在batch1,310性能是0.548倍T4性能;在batch16,310性能是0.85倍T4性能。 - +# BSN推理说明 + + + +## 1、 环境说明 + +1、安装必要的依赖 + +``` +apex 0.1+ascend.20210930 +certifi 2021.10.8 +cycler 0.11.0 +decorator 5.1.0 +docutils 0.18 +flatbuffers 2.0 +future 0.18.2 +Geohash 1.0 +Hydra 2.5 +kiwisolver 1.3.2 +matplotlib 3.4.3 +mpmath 1.2.1 +numpy 1.21.0 +onnx 1.10.2 +onnxruntime 1.9.0 +pandas 1.3.4 +Pillow 8.4.0 +pip 21.3.1 +protobuf 3.19.1 +pyparsing 3.0.6 +python-dateutil 2.8.2 +pytz 2021.3 +scipy 1.7.2 +setuptools 58.0.4 +six 1.16.0 +sympy 1.9 +torch 1.5.0+ascend.post3.20210930 +typing-extensions 3.10.0.2 +wheel 0.37.0 +``` + +2、获取开源代码 + +直接从githup上git clone 可能无法clone成功,建议先把githup上的仓先导入到git,再clone + +``` +git clone https://github.com/wzmsltw/BSN-boundary-sensitive-network.pytorch +``` + + + +3、获取onnx_tools,优化TEM的onnx模型 + +``` +git clone https://gitee.com/zheng-wengang1/onnx_tools +``` + + + +4、下载视频特征数据集 + +请参考源代码仓 + +5、代码目录 + +``` +BSN #模型名称命名的文件夹 +├── BSN-boundary-sensitive-network.pytorch #BSN开源代码 + └── data + ├── activitynet_feature_cuhk + ├── csv_mean_100 #下载数据特征集 +├── env.sh #环境变量 +├── BSN_tem_pth2onnx.py #tem模型转换脚本 +├── BSN_pem_pth2onnx.py #pem模型转换脚本 +├── BSN_tem_preprocess.py #tem模型前处理脚本 +├── BSN_pem_preprocess.py #pem模型前处理脚本 +├── gen_dataset_info.py #生成数据集info文件 +├── BSN_tem_postprocess.py #tem模型后处理脚本 +├── BSN_pem_postprocess.py #pem模型后处理脚本 +├── BSN_eval.py #测试精度脚本 +├── TEM_onnx_conv1d2conv2d.py #tem模型onnx,conv1d算子转conv2d算子优化脚本 +├── requirements.txt #模型离线推理用到的所有且必要的依赖库 +├── README.md #模型离线推理说明README +├── modelzoo_level.txt #模型精度性能结果 +└── test + ├── pth2om.sh + ├── eval_acc_perf.sh + ├── parse.py +``` + + + +## 2、离线推理 + +1、pth权重转onnx + + + +TEM的pth权重转onnx,参数pth_path为TEM模型权重文件所在位置,onnx_path为输出的onnx模型位置 + +``` +python BSN_tem_pth2onnx.py --pth_path './tem_best.pth.tar' --onnx_path './BSN_tem.onnx' +``` + +tem-onnx模型优化,第一个参数为原本onnx模型位置,第二个参数为输出onnx模型 + +``` +python TEM_onnx_conv1d2conv2d.py './BSN_tem.onnx' './BSN_tem1.onnx' +``` + +PEM的pth权重转onnx,参数pth_path为PEM模型权重文件所在位置,onnx_path为输出的onnx模型位置 + +``` +python BSN_pem_pth2onnx.py --pth_path './pem_best.pth.tar' --onnx_path './BSN_pem.onnx' +``` + + + +2、onnx模型转om + +使用atc工具将onnx模型转为om模型,注意应当先设置环境变量 + +``` +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest + +atc --framework=5 --model=BSN_tem1.onnx --output=BSN_tem_bs1 --input_format=ND --input_shape="video:1,400,100" --log=debug --soc_version=Ascend310 + +atc --framework=5 --model=BSN_pem.onnx --output=BSN_pem_bs1 --input_format=ND --input_shape="video_feature:1,1000,32" --log=debug --soc_version=Ascend310 +``` + + + +3、TEM推理 + +运行预处理脚本,运行前确保你已经clone了开源代码,并下载数据特征集 + +``` +python BSN_tem_preprocess.py +``` + +获取处理数据集信息,第一个参数为模型类型,第二个参数为特征文件位置,第三个参数为输出文件名,第四、五个参数为特征形状(400*100) + +``` +python gen_dataset_info.py tem /home/wch/BSN/BSN-TEM-preprocess/feature TEM-video-feature 400 100 +``` + +使用benchmark工具进行推理,生成的数据文件会放在当前路径的result/dumpOutput_device0目录下 + +``` +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=BSN_tem_bs1.om -input_text_path=./TEM-video-feature.info -input_width=400 -input_height=100 -output_binary=True -useDvpp=False +``` + +使用BSN_tem_postprocess.py进行tem后处理(tem的后处理与pem的前处理有关请按照顺序执行) + +``` +python BSN_tem_postprocess.py --TEM_out_path ./result/dumpOutput_device0 +``` + + + +4、PEM推理 + +运行pem预处理脚本(pem的前处理与tem的后处理有关请按照顺序执行) + +``` +python BSN_pem_preprocess.py +``` + +获取处理数据集信息,第一个参数为模型类型,第二个参数为特征文件位置,第三个参数为输出文件名,第四、五个参数为特征形状(1000*32) + +``` +python get_info.py pem output/BSN-PEM-preprocess/feature PEM-video-feature 1000 32 +``` + +使用benchmark工具进行推理,生成的数据文件会放在当前路径的result/dumpOutput_device1目录下 + +``` +./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=1 -om_path=BSN_pem_bs1.om -input_text_path=./PEM-video-feature.info -input_width=1000 -input_height=32 -output_binary=True -useDvpp=False +``` + +运行后处理脚本,会在output目录下生成结果文件 + +``` +python BSN_pem_postprocess.py --PEM_out_path result/dumpOutput_device1 +``` + + + +5、精度测试 + +原本代码仓的代码是python2的代码,在在使用前需要转为python3 + +``` +2to3 -w ./BSN-boundary-sensitive-network.pytorch/Evaluation/eval_proposal.py + +``` + +精度测试 + +``` +python BSN_eval.py +``` + + + +6、整体测试 + +运行脚本,直接转om模型 + +``` +bash ./test/pth2om.sh +``` + +运行脚本,进行离线推理,运行脚本前,请确保已经将源代码中使用的文件,转为python3 + +``` +bash ./test/eval_acc_perf.sh +``` + + + +## 3 精度性能对比 + +### 1、精度对比 + +​ pth精度 + +``` +Model 论文 开源pth文件 离线推理精度 +BSN AR100:72.42 74.34 74.34 +``` + +### 2、性能对比 + +#### 2.1 npu性能数据 + +tem bs1性能数据 + +``` +-----------------Performance Summary------------------ +[e2e] throughputRate: 180.879, latency: 106303 +[data read] throughputRate: 182.039, moduleLatency: 5.49332 +[preprocess] throughputRate: 181.865, moduleLatency: 5.49859 +[inference] throughputRate: 182, Interface throughputRate: 3275.55, moduleLatency: 0.561457 +[postprocess] throughputRate: 182.009, moduleLatency: 5.49425 + +----------------------------------------------------------- +``` + +pem bs1性能数据 + +``` +-----------------Performance Summary------------------ +[e2e] throughputRate: 616.804, latency: 7665.32 +[data read] throughputRate: 1840.06, moduleLatency: 0.54346 +[preprocess] throughputRate: 1817.62, moduleLatency: 0.550169 +[inference] throughputRate: 1839.62, Interface throughputRate: 3874.46, moduleLatency: 0.469866 +[postprocess] throughputRate: 1839.86, moduleLatency: 0.543521 + +----------------------------------------------------------- +``` + +tem单卡吞吐率:3275.55x4=13102.2 + +pem单卡吞吐率:3874.46x4=15497.84 + +BSN整体吞吐率为:1/(1/13102.2+1/15497.84)=7099.87 + + + +tem bs16性能数据 + +``` +-----------------Performance Summary------------------ +[e2e] throughputRate: 143.161, latency: 134310 +[data read] throughputRate: 144.544, moduleLatency: 6.91832 +[preprocess] throughputRate: 144.393, moduleLatency: 6.92554 +[inference] throughputRate: 144.476, Interface throughputRate: 12277.9, moduleLatency: 0.570148 +[postprocess] throughputRate: 9.03906, moduleLatency: 110.631 + +----------------------------------------------------------- +``` + +pem bs16性能数据 + +``` +-----------------Performance Summary------------------ +[e2e] throughputRate: 141.751, latency: 33354.2 +[data read] throughputRate: 145.216, moduleLatency: 6.88627 +[preprocess] throughputRate: 144.936, moduleLatency: 6.89961 +[inference] throughputRate: 145.023, Interface throughputRate: 18564.9, moduleLatency: 0.483157 +[postprocess] throughputRate: 9.10977, moduleLatency: 109.772 + +----------------------------------------------------------- +``` + +tem单卡吞吐率:12277.9x4=49111.6 + +pem单卡吞吐率:18564.9x4=74259.6 + +BSN整体吞吐率为:1/(1/49111.6+1/74259.6)=29560.95 + +#### 2.2 T4性能数据 + +在装有T4卡的服务器上测试gpu性能,测试过程请确保卡没有运行其他任务,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2 + +batch1性能: + +tem: + +``` +trtexec --onnx=BSN_tem.onnx --fp16 --shapes=video:1*400*100 --threads +``` + + + +``` +[11/23/2021-06:45:38] [I] GPU Compute +[11/23/2021-06:45:38] [I] min: 0.045166 ms +[11/23/2021-06:45:38] [I] max: 2.00708 ms +[11/23/2021-06:45:38] [I] mean: 0.0565804 ms +[11/23/2021-06:45:38] [I] median: 0.0568848 ms +[11/23/2021-06:45:38] [I] percentile: 0.0620117 ms at 99% +[11/23/2021-06:45:38] [I] total compute time: 2.47115 s +``` + +pem: + +``` +trtexec --onnx=BSN_pem.onnx --fp16 --shapes=video:1*1000*32 --threads +``` + + + +``` +[11/19/2021-06:40:06] [I] GPU Compute +[11/19/2021-06:40:06] [I] min: 0.0185547 ms +[11/19/2021-06:40:06] [I] max: 1.26123 ms +[11/19/2021-06:40:06] [I] mean: 0.0205523 ms +[11/19/2021-06:40:06] [I] median: 0.0201416 ms +[11/19/2021-06:40:06] [I] percentile: 0.0458527 ms at 99% +[11/19/2021-06:40:06] [I] total compute time: 0.793032 s +``` + + + +tem单卡吞吐率:1000/0.215458=17674 + +pem单卡吞吐率:1000/0.0205523=48656 + +BSN单卡吞吐率:1000/(0.215458+0.0205523)=12965 + + + + + +batch16性能: + +tem: + +``` +trtexec --onnx=BSN_tem.onnx --fp16 --shapes=video:16*400*100 --threads +``` + + + +``` +[11/19/2021-06:37:12] [I] GPU Compute +[11/19/2021-06:37:12] [I] min: 0.182129 ms +[11/19/2021-06:37:12] [I] max: 0.252548 ms +[11/19/2021-06:37:12] [I] mean: 0.219561 ms +[11/19/2021-06:37:12] [I] median: 0.218262 ms +[11/19/2021-06:37:12] [I] percentile: 0.245422 ms at 99% +[11/19/2021-06:37:12] [I] total compute time: 1.5714 s +``` + +pem: + +``` +trtexec --onnx=BSN_pem.onnx --fp16 --shapes=video:16*1000*32 --threads +``` + + + +``` +[11/23/2021-06:51:29] [I] GPU Compute +[11/23/2021-06:51:29] [I] min: 0.21167 ms +[11/23/2021-06:51:29] [I] max: 2.40039 ms +[11/23/2021-06:51:29] [I] mean: 0.24159 ms +[11/23/2021-06:51:29] [I] median: 0.240479 ms +[11/23/2021-06:51:29] [I] percentile: 0.25769 ms at 99% +[11/23/2021-06:51:29] [I] total compute time: 2.08734 s +``` + +tem单卡吞吐率:1000/(0.219561/16)=72872 + +pem单卡吞吐率:1000/(0.24159/16)=66228 + +BSN单卡吞吐率:1000/((0.219561+0.0210533)/16)=34696 + + + +#### 2.3 性能对比 + +batch1 : + +​ TEM + +​ 310:13102 + +​ T4:17674 + +​ PEM: + +​ 310:15498 + +​ T4:48656 + +​ BSN: + +​ 7099.87<12965 + +​ 7099.87/12965=0.548 + +batch16: + +​ TEM: + +​ 310: 49111.6 + +​ t4: 72872 + +​ PEM: + +​ 310: 74259.6 + +​ T4: 66228 + +​ BSN: + +​ 29560.95<34696 + +​ 29560.95/34696=0.85 + +在batch1,310性能是0.548倍T4性能;在batch16,310性能是0.85倍T4性能。 + diff --git a/ACL_PyTorch/contrib/cv/detection/BSN/TEM_onnx_conv1d2conv2d.py b/ACL_PyTorch/contrib/cv/detection/BSN/TEM_onnx_conv1d2conv2d.py index 151d48492471f03df30b4163cd912900743d9976..5e3110052763715578379ec104d964ec41678c5e 100644 --- a/ACL_PyTorch/contrib/cv/detection/BSN/TEM_onnx_conv1d2conv2d.py +++ b/ACL_PyTorch/contrib/cv/detection/BSN/TEM_onnx_conv1d2conv2d.py @@ -1,110 +1,110 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import numpy as np -from onnx_tools.OXInterface.OXInterface import OXGraph - - -INPUT_NODE = 'video' - -def conv1d2conv2d(oxgraph, node_conv): - """ - transfer conv1d parameters to conv2d - :param oxgraph: input onnx graph - :param node_conv: conv1d node to be transfered - """ - if node_conv.get_op_type() != 'Conv': - return - if node_conv.get_name() == 'Conv_0': - node_conv.set_attribute(attr_name='dilations', attr_value=[1,1]) - node_conv.set_attribute(attr_name='kernel_shape', attr_value=[1,3]) - node_conv.set_attribute(attr_name='pads', attr_value=[0, 1, 0, 1]) - node_conv.set_attribute(attr_name='strides', attr_value=[1,1]) - if node_conv.get_name() == 'Conv_2': - node_conv.set_attribute(attr_name='dilations', attr_value=[1,1]) - node_conv.set_attribute(attr_name='kernel_shape', attr_value=[1,3]) - node_conv.set_attribute(attr_name='pads', attr_value=[0, 1, 0, 1]) - node_conv.set_attribute(attr_name='strides', attr_value=[1,1]) - if node_conv.get_name() == 'Conv_4': - node_conv.set_attribute(attr_name='dilations', attr_value=[1,1]) - node_conv.set_attribute(attr_name='kernel_shape', attr_value=[1,1]) - node_conv.set_attribute(attr_name='pads', attr_value=[0, 0, 0, 0]) - node_conv.set_attribute(attr_name='strides', attr_value=[1,1]) - - init_conv_w = oxgraph.get_oxinitializer_by_name(node_conv.input[1]) - init_conv_w.set_data(np.expand_dims(init_conv_w.get_data(), axis=2)) - -def transfer_structure(oxgraph, beg_node, end_node): - """ - transfer process: - 1. insert unsqueeze node before beg node - 2. insert squeeze node after end node - 3. transfer conv1d paramters for conv2d - :param oxgraph: input onnx graph - :param beg_node: beg node name for searched structure - :param end_node: end node name for searched structure - """ - previous_beg_node = oxgraph.get_previous_oxnode(oxnode_name=beg_node) - if not previous_beg_node: - previous_beg_node = INPUT_NODE - else: - previous_beg_node = previous_beg_node[0].get_name() - next_end_node = oxgraph.get_next_oxnode(oxnode_name=end_node) - unsqueeze_node_name = 'Unsqueeze_before_{}'.format(beg_node) - squeeze_node_name = 'Squeeze_after_{}'.format(end_node) - next_end_node = next_end_node[0].get_name() - - oxgraph.insert_node(bef_node_info_list=[previous_beg_node], - aft_node_info_list=[beg_node], - op_type='Unsqueeze', - op_name=unsqueeze_node_name) - oxgraph.insert_node(bef_node_info_list=[end_node], - aft_node_info_list=[next_end_node], - op_type='Squeeze', - op_name=squeeze_node_name) - node_unsqueeze = oxgraph.get_oxnode_by_name(unsqueeze_node_name) - node_unsqueeze.set_attribute(attr_name='axes', attr_value=[2]) - node_squeeze = oxgraph.get_oxnode_by_name(squeeze_node_name) - node_squeeze.set_attribute(attr_name='axes', attr_value=[2]) - - next_beg_node = oxgraph.get_oxnode_by_name(oxnode_name=beg_node) - while next_beg_node.get_name() != end_node: - conv1d2conv2d(oxgraph, next_beg_node) - next_beg_node = oxgraph.get_next_oxnode(oxnode_name=next_beg_node.get_name()) - next_beg_node = next_beg_node[0] - conv1d2conv2d(oxgraph, next_beg_node) - - -def fix_conv1d(model_path, out_path, beg_list, end_list): - """ - main process for fixing conv1d - :param model_path: input onnx model path - :param out_path: out fixed onnx model path - :param beg_list: beg node names for searched structure - :param end_list: end node names for searched structure - """ - oxgraph = OXGraph(model_path) - for idx, beg_node in enumerate(beg_list): - end_node = end_list[idx] - transfer_structure(oxgraph, beg_node, end_node) - oxgraph.save_new_model(out_path) - - -if __name__ == '__main__': - input_path = sys.argv[1] - save_path = sys.argv[2] - beg_nodes = ['Conv_0'] - end_nodes = ['Conv_4'] - fix_conv1d(input_path, save_path, beg_nodes, end_nodes) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import numpy as np +from onnx_tools.OXInterface.OXInterface import OXGraph + + +INPUT_NODE = 'video' + +def conv1d2conv2d(oxgraph, node_conv): + """ + transfer conv1d parameters to conv2d + :param oxgraph: input onnx graph + :param node_conv: conv1d node to be transfered + """ + if node_conv.get_op_type() != 'Conv': + return + if node_conv.get_name() == 'Conv_0': + node_conv.set_attribute(attr_name='dilations', attr_value=[1,1]) + node_conv.set_attribute(attr_name='kernel_shape', attr_value=[1,3]) + node_conv.set_attribute(attr_name='pads', attr_value=[0, 1, 0, 1]) + node_conv.set_attribute(attr_name='strides', attr_value=[1,1]) + if node_conv.get_name() == 'Conv_2': + node_conv.set_attribute(attr_name='dilations', attr_value=[1,1]) + node_conv.set_attribute(attr_name='kernel_shape', attr_value=[1,3]) + node_conv.set_attribute(attr_name='pads', attr_value=[0, 1, 0, 1]) + node_conv.set_attribute(attr_name='strides', attr_value=[1,1]) + if node_conv.get_name() == 'Conv_4': + node_conv.set_attribute(attr_name='dilations', attr_value=[1,1]) + node_conv.set_attribute(attr_name='kernel_shape', attr_value=[1,1]) + node_conv.set_attribute(attr_name='pads', attr_value=[0, 0, 0, 0]) + node_conv.set_attribute(attr_name='strides', attr_value=[1,1]) + + init_conv_w = oxgraph.get_oxinitializer_by_name(node_conv.input[1]) + init_conv_w.set_data(np.expand_dims(init_conv_w.get_data(), axis=2)) + +def transfer_structure(oxgraph, beg_node, end_node): + """ + transfer process: + 1. insert unsqueeze node before beg node + 2. insert squeeze node after end node + 3. transfer conv1d paramters for conv2d + :param oxgraph: input onnx graph + :param beg_node: beg node name for searched structure + :param end_node: end node name for searched structure + """ + previous_beg_node = oxgraph.get_previous_oxnode(oxnode_name=beg_node) + if not previous_beg_node: + previous_beg_node = INPUT_NODE + else: + previous_beg_node = previous_beg_node[0].get_name() + next_end_node = oxgraph.get_next_oxnode(oxnode_name=end_node) + unsqueeze_node_name = 'Unsqueeze_before_{}'.format(beg_node) + squeeze_node_name = 'Squeeze_after_{}'.format(end_node) + next_end_node = next_end_node[0].get_name() + + oxgraph.insert_node(bef_node_info_list=[previous_beg_node], + aft_node_info_list=[beg_node], + op_type='Unsqueeze', + op_name=unsqueeze_node_name) + oxgraph.insert_node(bef_node_info_list=[end_node], + aft_node_info_list=[next_end_node], + op_type='Squeeze', + op_name=squeeze_node_name) + node_unsqueeze = oxgraph.get_oxnode_by_name(unsqueeze_node_name) + node_unsqueeze.set_attribute(attr_name='axes', attr_value=[2]) + node_squeeze = oxgraph.get_oxnode_by_name(squeeze_node_name) + node_squeeze.set_attribute(attr_name='axes', attr_value=[2]) + + next_beg_node = oxgraph.get_oxnode_by_name(oxnode_name=beg_node) + while next_beg_node.get_name() != end_node: + conv1d2conv2d(oxgraph, next_beg_node) + next_beg_node = oxgraph.get_next_oxnode(oxnode_name=next_beg_node.get_name()) + next_beg_node = next_beg_node[0] + conv1d2conv2d(oxgraph, next_beg_node) + + +def fix_conv1d(model_path, out_path, beg_list, end_list): + """ + main process for fixing conv1d + :param model_path: input onnx model path + :param out_path: out fixed onnx model path + :param beg_list: beg node names for searched structure + :param end_list: end node names for searched structure + """ + oxgraph = OXGraph(model_path) + for idx, beg_node in enumerate(beg_list): + end_node = end_list[idx] + transfer_structure(oxgraph, beg_node, end_node) + oxgraph.save_new_model(out_path) + + +if __name__ == '__main__': + input_path = sys.argv[1] + save_path = sys.argv[2] + beg_nodes = ['Conv_0'] + end_nodes = ['Conv_4'] + fix_conv1d(input_path, save_path, beg_nodes, end_nodes) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/BSN/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/detection/BSN/gen_dataset_info.py index 7d55019ee88a193789691f80808e4e2facbcf4d3..bb435699d8e549877f8a2000641fd8fa303e3f28 100644 --- a/ACL_PyTorch/contrib/cv/detection/BSN/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/detection/BSN/gen_dataset_info.py @@ -1,39 +1,39 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys - - -def get_bin_info(file_type, file_path, info_name, width, height): - bin_files = sorted(os.listdir(file_path)) - - with open(info_name+'.info', 'w') as file: - i = 0 - for bin_file in bin_files: - if bin_file.endswith('.bin'): - if file_type == 'tem': - content = ' '.join([str(i), 'output/BSN-TEM-preprocess/feature'+'/'+bin_file, width, height]) - if file_type == 'pem': - content = ' '.join([str(i), 'output/BSN-PEM-preprocess/feature'+'/'+bin_file, width, height]) - file.write(content) - file.write('\n') - i = i+1 - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - line = sys.argv[4] - col = sys.argv[5] +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys + + +def get_bin_info(file_type, file_path, info_name, width, height): + bin_files = sorted(os.listdir(file_path)) + + with open(info_name+'.info', 'w') as file: + i = 0 + for bin_file in bin_files: + if bin_file.endswith('.bin'): + if file_type == 'tem': + content = ' '.join([str(i), 'output/BSN-TEM-preprocess/feature'+'/'+bin_file, width, height]) + if file_type == 'pem': + content = ' '.join([str(i), 'output/BSN-PEM-preprocess/feature'+'/'+bin_file, width, height]) + file.write(content) + file.write('\n') + i = i+1 + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + line = sys.argv[4] + col = sys.argv[5] get_bin_info(file_type,file_path, info_name, line, col) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/BSN/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/detection/BSN/modelzoo_level.txt index c021e5b6ccd79796c6c89fffb78c3ac7cf956f20..29569dfe38675049737d3c1d06941057f6aea472 100644 --- a/ACL_PyTorch/contrib/cv/detection/BSN/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/detection/BSN/modelzoo_level.txt @@ -1,5 +1,5 @@ -acc: -74.34 OK -perf: -bs1:6148 OK +acc: +74.34 OK +perf: +bs1:6148 OK bs16: 19172 False \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/BSN/parse.py b/ACL_PyTorch/contrib/cv/detection/BSN/parse.py index 1841a9747c1da500ef5f9e0e0368ec324be08595..e071d8fecf850996e9527d39d0e7d0f4d83ad14e 100644 --- a/ACL_PyTorch/contrib/cv/detection/BSN/parse.py +++ b/ACL_PyTorch/contrib/cv/detection/BSN/parse.py @@ -1,39 +1,39 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import re - -def get_acc(filename): - with open(filename, 'r') as f: - lines = f.readlines() - last_line = lines[-1] - psnr = last_line.split(" ")[2] - print(filename.split('.')[0],"Average PSNR:", psnr) - - -def get_perf(filename): - with open(filename, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 - print('310 bs{} fps:{}'.format(filename.split('_')[3], fps)) - -if __name__ == "__main__": - - filename = sys.argv[1] - - if filename.endswith(".log"): - get_acc(filename) - elif filename.endswith(".txt"): +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import re + +def get_acc(filename): + with open(filename, 'r') as f: + lines = f.readlines() + last_line = lines[-1] + psnr = last_line.split(" ")[2] + print(filename.split('.')[0],"Average PSNR:", psnr) + + +def get_perf(filename): + with open(filename, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 + print('310 bs{} fps:{}'.format(filename.split('_')[3], fps)) + +if __name__ == "__main__": + + filename = sys.argv[1] + + if filename.endswith(".log"): + get_acc(filename) + elif filename.endswith(".txt"): get_perf(filename) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/BSN/requirements.txt b/ACL_PyTorch/contrib/cv/detection/BSN/requirements.txt index bfc65830ffe767e72f7588d0eebe3f2e775bd145..796da27d2df6a0f6741ff104b8e14ec936e95f53 100644 --- a/ACL_PyTorch/contrib/cv/detection/BSN/requirements.txt +++ b/ACL_PyTorch/contrib/cv/detection/BSN/requirements.txt @@ -1,26 +1,26 @@ -cycler -decorator -docutils -flatbuffers -future -Geohash -Hydra -kiwisolver -matplotlib -mpmath -numpy -onnx -onnxruntime -pandas -Pillow -pip -protobuf -pyparsing -python-dateutil -pytz -scipy -setuptools -six -sympy -typing-extensions +cycler +decorator +docutils +flatbuffers +future +Geohash +Hydra +kiwisolver +matplotlib +mpmath +numpy +onnx +onnxruntime +pandas +Pillow +pip +protobuf +pyparsing +python-dateutil +pytz +scipy +setuptools +six +sympy +typing-extensions skl2onnx \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/BSN/test/parse.py b/ACL_PyTorch/contrib/cv/detection/BSN/test/parse.py index 1841a9747c1da500ef5f9e0e0368ec324be08595..e071d8fecf850996e9527d39d0e7d0f4d83ad14e 100644 --- a/ACL_PyTorch/contrib/cv/detection/BSN/test/parse.py +++ b/ACL_PyTorch/contrib/cv/detection/BSN/test/parse.py @@ -1,39 +1,39 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import re - -def get_acc(filename): - with open(filename, 'r') as f: - lines = f.readlines() - last_line = lines[-1] - psnr = last_line.split(" ")[2] - print(filename.split('.')[0],"Average PSNR:", psnr) - - -def get_perf(filename): - with open(filename, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 - print('310 bs{} fps:{}'.format(filename.split('_')[3], fps)) - -if __name__ == "__main__": - - filename = sys.argv[1] - - if filename.endswith(".log"): - get_acc(filename) - elif filename.endswith(".txt"): +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import re + +def get_acc(filename): + with open(filename, 'r') as f: + lines = f.readlines() + last_line = lines[-1] + psnr = last_line.split(" ")[2] + print(filename.split('.')[0],"Average PSNR:", psnr) + + +def get_perf(filename): + with open(filename, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 + print('310 bs{} fps:{}'.format(filename.split('_')[3], fps)) + +if __name__ == "__main__": + + filename = sys.argv[1] + + if filename.endswith(".log"): + get_acc(filename) + elif filename.endswith(".txt"): get_perf(filename) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/README.md b/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/README.md index a49732889f846add0877a7d339fd481429de9c4e..b7eef1f4c876dac1579bf327bb71b7bc0f183a2b 100644 --- a/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/README.md +++ b/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/README.md @@ -1,156 +1,156 @@ -# Cascade-RCNN-Resnet101-FPN-DCN模型PyTorch离线推理指导 - -## 1 环境准备 - -1. 安装必要的依赖 - - 在文件夹中处理。 - - 测试环境可能已经安装其中的一些不同版本的库了,故手动测试时建议创建虚拟环境后自己安装,参考开源仓代码的获取方式: - -``` -conda create -n dcn python=3.7 -conda activate dcn -pip install onnx==1.7.0 -pip install onnxruntime==1.9.0 -conda install pytorch==1.7.0 torchvision==0.8.0 torchaudio==0.7.0 cpuonly -c pytorch -pip install mmcv-full==1.2.4 -``` - -2. 获取,修改与安装开源模型代码,参考开源仓代码的获取方式: - -``` -git clone https://github.com/open-mmlab/mmdetection.git -cd mmdetection -git reset a21eb25535f31634cef332b09fc27d28956fb24b --hard -pip install -v -e . -cd .. -``` - -将提供的**pytorch_code_change**文件夹中的文件替换原文件。 - -``` -cp ./pytorch_code_change/bbox_nms.py ./mmdetection/mmdet/core//post_processing/bbox_nms.py -cp ./pytorch_code_change/rpn_head.py ./mmdetection/mmdet/models/dense_heads/rpn_head.py -cp ./pytorch_code_change/single_level_roi_extractor.py ./mmdetection/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py -cp ./pytorch_code_change/delta_xywh_bbox_coder.py ./mmdetection/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py -cp ./pytorch_code_change/pytorch2onnx.py ./mmdetection/tools/pytorch2onnx.py -cp ./pytorch_code_change/cascade_rcnn_r50_fpn.py ./mmdetection/configs/_base_/models/cascade_rcnn_r50_fpn.py -cp ./pytorch_code_change/deform_conv.py /root/anaconda3/envs/dcn/lib/python3.7/site-packages/mmcv/ops/deform_conv.py -#注意这里要根据实际情况下的安装路径来修改 -``` - -3. 获取权重文件 - -参考源码仓的方式获取,可以通过obs方法获取,下载对应的权重文件。 - -``` -wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/detection/Cascade%20RCNN-Resnet101-FPN-DCN/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-3b2f0594.pth -``` - -4. 数据集 - 本模型使用coco2017的验证集(val2017)验证,将服务器上的数据集复制到本文件下固定位置:data/coco/annotation/instances_val2017.json以及data/coco/val2017 - - ``` - mkdir -p data/coco - cp -r /opt/npu/datasets/coco/* /home/tyjf/data/coco - - ``` - -5. 导出onnx - - 使用mmdet框架自带的脚本导出onnx即可,这里指定shape为1216。 - - 由于当前框架限制,仅支持batchsize=1的场景。 - -``` -python mmdetection/tools/pytorch2onnx.py mmdetection/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py ./cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-3b2f0594.pth --output-file=cascadeR101dcn.onnx --shape=1216 --verify --show -``` - -6. 导出om - - 运行atc.sh脚本,完成onnx到om模型的转换,注意输出节点可能需要根据实际的onnx修改。 - -``` -bash atc.sh cascadeR101dcn.onnx cascadeR101dcn -``` - - -## 2 离线推理 - -在310上执行,执行时使npu-smi info查看设备状态,确保device空闲。 - -1. 数据预处理 - -``` -python mmdetection_coco_preprocess.py --image_folder_path ./data/coco/val2017 --bin_folder_path val2017_bin -python get_info.py bin ./val2017_bin coco2017.info 1216 1216 -python get_info.py jpg ./data/coco/val2017 coco2017_jpg.info -``` - -2. 使用benchmark工具进行推理 - -``` -chmod u+x benchmark.x86_64 -./benchmark.x86_64 -model_type=vision -batch_size=1 -device_id=0 -input_text_path=./coco2017.info -input_width=1216 -input_height=1216 -useDvpp=False -output_binary=true -om_path=cascadeR101dcn.om -``` - -3. 推理结果展示 - -本模型提供后处理脚本,将二进制数据转化为txt文件,同时生成画出检测框后的图片。执行脚本 - -``` -python mmdetection_coco_postprocess.py --bin_data_path=result/dumpOutput_device0 --prob_thres=0.05 --ifShowDetObj --det_results_path=detection-results --test_annotation=coco2017_jpg.info -``` - -4. 精度验证 - -``` -python txt_to_json.py -python coco_eval.py --ground_truth ./data/coco/annotation/instances_val2017.json -``` -可以看到NPU精度:'bbox_mAP': 0.452 - -5. 性能验证 - -查看NPU性能 - -``` -bash test/perf_npu.sh -#或者运行 ./benchmark.x86_64 -round=50 -om_path=cascadeR101dcn.om --device_id=2 -batch_size=1 -``` - -可以看到NPU性能: - -[INFO] ave_throughputRate: 0.620627samples/s, ave_latency: 1593.71ms - -0.65281*4=2.61fps - -6. GPU性能与精度验证 - -由于模型算子的原因采取在线推理的方式检测GPU性能: - -在GPU上搭建好环境,并进行预处理: - -``` -mkdir -p data/coco/val2017 -cp -r /root/coco/val2017/* /home/dcnv0/data/coco/val2017 -mkdir -p data/coco/annotations -cp -r /root/coco/annotations/* /home/dcnv0/data/coco/annotations -``` - -测试性能与精度: - -``` -cd /home/dcnv0 -conda activate cascade -python ./mmdetection/tools/test.py ./mmdetection/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py ./cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-3b2f0594.pth --eval=bbox -``` - -可以算出GPU性能为4.06fps左右。 - -**评测结果:** - -| 模型 | 官网pth精度 | 310离线推理精度 | gpu性能 | 310性能 | -| :--------------------------------: | :---------: | :-------------: | :-----: | :-----: | -| Cascade-RCNN-Resnet101-FPN-DCN | mAP:0.45 | mAP:0.452 | 4.06fps | 2.64fps | +# Cascade-RCNN-Resnet101-FPN-DCN模型PyTorch离线推理指导 + +## 1 环境准备 + +1. 安装必要的依赖 + + 在文件夹中处理。 + + 测试环境可能已经安装其中的一些不同版本的库了,故手动测试时建议创建虚拟环境后自己安装,参考开源仓代码的获取方式: + +``` +conda create -n dcn python=3.7 +conda activate dcn +pip install onnx==1.7.0 +pip install onnxruntime==1.9.0 +conda install pytorch==1.7.0 torchvision==0.8.0 torchaudio==0.7.0 cpuonly -c pytorch +pip install mmcv-full==1.2.4 +``` + +2. 获取,修改与安装开源模型代码,参考开源仓代码的获取方式: + +``` +git clone https://github.com/open-mmlab/mmdetection.git +cd mmdetection +git reset a21eb25535f31634cef332b09fc27d28956fb24b --hard +pip install -v -e . +cd .. +``` + +将提供的**pytorch_code_change**文件夹中的文件替换原文件。 + +``` +cp ./pytorch_code_change/bbox_nms.py ./mmdetection/mmdet/core//post_processing/bbox_nms.py +cp ./pytorch_code_change/rpn_head.py ./mmdetection/mmdet/models/dense_heads/rpn_head.py +cp ./pytorch_code_change/single_level_roi_extractor.py ./mmdetection/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py +cp ./pytorch_code_change/delta_xywh_bbox_coder.py ./mmdetection/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py +cp ./pytorch_code_change/pytorch2onnx.py ./mmdetection/tools/pytorch2onnx.py +cp ./pytorch_code_change/cascade_rcnn_r50_fpn.py ./mmdetection/configs/_base_/models/cascade_rcnn_r50_fpn.py +cp ./pytorch_code_change/deform_conv.py /root/anaconda3/envs/dcn/lib/python3.7/site-packages/mmcv/ops/deform_conv.py +#注意这里要根据实际情况下的安装路径来修改 +``` + +3. 获取权重文件 + +参考源码仓的方式获取,可以通过obs方法获取,下载对应的权重文件。 + +``` +wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/detection/Cascade%20RCNN-Resnet101-FPN-DCN/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-3b2f0594.pth +``` + +4. 数据集 + 本模型使用coco2017的验证集(val2017)验证,将服务器上的数据集复制到本文件下固定位置:data/coco/annotation/instances_val2017.json以及data/coco/val2017 + + ``` + mkdir -p data/coco + cp -r /opt/npu/datasets/coco/* /home/tyjf/data/coco + + ``` + +5. 导出onnx + + 使用mmdet框架自带的脚本导出onnx即可,这里指定shape为1216。 + + 由于当前框架限制,仅支持batchsize=1的场景。 + +``` +python mmdetection/tools/pytorch2onnx.py mmdetection/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py ./cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-3b2f0594.pth --output-file=cascadeR101dcn.onnx --shape=1216 --verify --show +``` + +6. 导出om + + 运行atc.sh脚本,完成onnx到om模型的转换,注意输出节点可能需要根据实际的onnx修改。 + +``` +bash atc.sh cascadeR101dcn.onnx cascadeR101dcn +``` + + +## 2 离线推理 + +在310上执行,执行时使npu-smi info查看设备状态,确保device空闲。 + +1. 数据预处理 + +``` +python mmdetection_coco_preprocess.py --image_folder_path ./data/coco/val2017 --bin_folder_path val2017_bin +python get_info.py bin ./val2017_bin coco2017.info 1216 1216 +python get_info.py jpg ./data/coco/val2017 coco2017_jpg.info +``` + +2. 使用benchmark工具进行推理 + +``` +chmod u+x benchmark.x86_64 +./benchmark.x86_64 -model_type=vision -batch_size=1 -device_id=0 -input_text_path=./coco2017.info -input_width=1216 -input_height=1216 -useDvpp=False -output_binary=true -om_path=cascadeR101dcn.om +``` + +3. 推理结果展示 + +本模型提供后处理脚本,将二进制数据转化为txt文件,同时生成画出检测框后的图片。执行脚本 + +``` +python mmdetection_coco_postprocess.py --bin_data_path=result/dumpOutput_device0 --prob_thres=0.05 --ifShowDetObj --det_results_path=detection-results --test_annotation=coco2017_jpg.info +``` + +4. 精度验证 + +``` +python txt_to_json.py +python coco_eval.py --ground_truth ./data/coco/annotation/instances_val2017.json +``` +可以看到NPU精度:'bbox_mAP': 0.452 + +5. 性能验证 + +查看NPU性能 + +``` +bash test/perf_npu.sh +#或者运行 ./benchmark.x86_64 -round=50 -om_path=cascadeR101dcn.om --device_id=2 -batch_size=1 +``` + +可以看到NPU性能: + +[INFO] ave_throughputRate: 0.620627samples/s, ave_latency: 1593.71ms + +0.65281*4=2.61fps + +6. GPU性能与精度验证 + +由于模型算子的原因采取在线推理的方式检测GPU性能: + +在GPU上搭建好环境,并进行预处理: + +``` +mkdir -p data/coco/val2017 +cp -r /root/coco/val2017/* /home/dcnv0/data/coco/val2017 +mkdir -p data/coco/annotations +cp -r /root/coco/annotations/* /home/dcnv0/data/coco/annotations +``` + +测试性能与精度: + +``` +cd /home/dcnv0 +conda activate cascade +python ./mmdetection/tools/test.py ./mmdetection/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py ./cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-3b2f0594.pth --eval=bbox +``` + +可以算出GPU性能为4.06fps左右。 + +**评测结果:** + +| 模型 | 官网pth精度 | 310离线推理精度 | gpu性能 | 310性能 | +| :--------------------------------: | :---------: | :-------------: | :-----: | :-----: | +| Cascade-RCNN-Resnet101-FPN-DCN | mAP:0.45 | mAP:0.452 | 4.06fps | 2.64fps | diff --git a/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/coco_eval.py b/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/coco_eval.py index 628e5fc8a81f437159ac8c2cf0fef176a73c66e8..18c4fcdb6bb6ad1fbc3b41c88c8f26921843b78e 100644 --- a/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/coco_eval.py +++ b/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/coco_eval.py @@ -1,91 +1,91 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import argparse -import numpy as np -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval - -CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', - 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') - -def coco_evaluation(annotation_json, result_json): - cocoGt = COCO(annotation_json) - cocoDt = cocoGt.loadRes(result_json) - iou_thrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) - iou_type = 'bbox' - - cocoEval = COCOeval(cocoGt, cocoDt, iou_type) - cocoEval.params.catIds = cocoGt.get_cat_ids(cat_names=CLASSES) - cocoEval.params.imgIds = cocoGt.get_img_ids() - cocoEval.params.maxDets = [100, 300, 1000] # proposal number for evaluating recalls/mAPs. - cocoEval.params.iouThrs = iou_thrs - - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - - # mapping of cocoEval.stats - coco_metric_names = { - 'mAP': 0, - 'mAP_50': 1, - 'mAP_75': 2, - 'mAP_s': 3, - 'mAP_m': 4, - 'mAP_l': 5, - 'AR@100': 6, - 'AR@300': 7, - 'AR@1000': 8, - 'AR_s@1000': 9, - 'AR_m@1000': 10, - 'AR_l@1000': 11 - } - - metric_items = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l'] - eval_results = {} - - for metric_item in metric_items: - key = f'bbox_{metric_item}' - val = float( - f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' - ) - eval_results[key] = val - ap = cocoEval.stats[:6] - eval_results['bbox_mAP_copypaste'] = ( - f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' - f'{ap[4]:.3f} {ap[5]:.3f}') - - return eval_results - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--ground_truth", default="instances_val2017.json") - parser.add_argument("--detection_result", default="coco_detection_result.json") - args = parser.parse_args() - result = coco_evaluation(args.ground_truth, args.detection_result) - print(result) - with open('./coco_detection_result.txt', 'w') as f: - for key, value in result.items(): +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import numpy as np +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', + 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') + +def coco_evaluation(annotation_json, result_json): + cocoGt = COCO(annotation_json) + cocoDt = cocoGt.loadRes(result_json) + iou_thrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) + iou_type = 'bbox' + + cocoEval = COCOeval(cocoGt, cocoDt, iou_type) + cocoEval.params.catIds = cocoGt.get_cat_ids(cat_names=CLASSES) + cocoEval.params.imgIds = cocoGt.get_img_ids() + cocoEval.params.maxDets = [100, 300, 1000] # proposal number for evaluating recalls/mAPs. + cocoEval.params.iouThrs = iou_thrs + + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + # mapping of cocoEval.stats + coco_metric_names = { + 'mAP': 0, + 'mAP_50': 1, + 'mAP_75': 2, + 'mAP_s': 3, + 'mAP_m': 4, + 'mAP_l': 5, + 'AR@100': 6, + 'AR@300': 7, + 'AR@1000': 8, + 'AR_s@1000': 9, + 'AR_m@1000': 10, + 'AR_l@1000': 11 + } + + metric_items = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l'] + eval_results = {} + + for metric_item in metric_items: + key = f'bbox_{metric_item}' + val = float( + f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' + ) + eval_results[key] = val + ap = cocoEval.stats[:6] + eval_results['bbox_mAP_copypaste'] = ( + f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' + f'{ap[4]:.3f} {ap[5]:.3f}') + + return eval_results + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--ground_truth", default="instances_val2017.json") + parser.add_argument("--detection_result", default="coco_detection_result.json") + args = parser.parse_args() + result = coco_evaluation(args.ground_truth, args.detection_result) + print(result) + with open('./coco_detection_result.txt', 'w') as f: + for key, value in result.items(): f.write(key + ': ' + str(value) + '\n') \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/get_info.py b/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/get_info.py index 5af675c6b0ae3f28a6f191d139ebb44e775e6c1c..fc6cdebb5b4417a3651c1e6e9663d8d1299a0ef5 100644 --- a/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/get_info.py +++ b/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/get_info.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/mmdetection_coco_postprocess.py b/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/mmdetection_coco_postprocess.py index a40fbc9bfc9624e2423765e39e55026ca437f815..bcad902c276bca7b80057ce2144f0c3cf11ba749 100644 --- a/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/mmdetection_coco_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/mmdetection_coco_postprocess.py @@ -1,148 +1,148 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import argparse -import cv2 - -CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', - 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] - -def coco_postprocess(bbox: np.ndarray, image_size, - net_input_width, net_input_height): - """ - This function is postprocessing for FasterRCNN output. - - Before calling this function, reshape the raw output of FasterRCNN to - following form - numpy.ndarray: - [x, y, width, height, confidence, probability of 80 classes] - shape: (100,) - The postprocessing restore the bounding rectangles of FasterRCNN output - to origin scale and filter with non-maximum suppression. - - :param bbox: a numpy array of the FasterRCNN output - :param image_path: a string of image path - :return: three list for best bound, class and score - """ - w = image_size[0] - h = image_size[1] - scale = min(net_input_width / w, net_input_height / h) - - pad_w = net_input_width - w * scale - pad_h = net_input_height - h * scale - pad_left = pad_w // 2 - pad_top = pad_h // 2 - - # cal predict box on the image src - pbox = bbox - pbox[:, 0] = (bbox[:, 0] - pad_left) / scale - pbox[:, 1] = (bbox[:, 1] - pad_top) / scale - pbox[:, 2] = (bbox[:, 2] - pad_left) / scale - pbox[:, 3] = (bbox[:, 3] - pad_top) / scale - return pbox - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0") - parser.add_argument("--test_annotation", default="./coco2017_jpg.info") - parser.add_argument("--det_results_path", default="./detection-results/") - parser.add_argument("--net_out_num", default=2) - parser.add_argument("--net_input_width", default=1216) - parser.add_argument("--net_input_height", default=1216) - parser.add_argument("--prob_thres", default=0.05) - parser.add_argument("--ifShowDetObj", action="store_true", help="if input the para means True, neither False.") - flags = parser.parse_args() - print(flags.ifShowDetObj, type(flags.ifShowDetObj)) - # generate dict according to annotation file for query resolution - # load width and height of input images - img_size_dict = dict() - with open(flags.test_annotation)as f: - for line in f.readlines(): - temp = line.split(" ") - img_file_path = temp[1] - img_name = temp[1].split("/")[-1].split(".")[0] - img_width = int(temp[2]) - img_height = int(temp[3]) - img_size_dict[img_name] = (img_width, img_height, img_file_path) - - # read bin file for generate predict result - bin_path = flags.bin_data_path - det_results_path = flags.det_results_path - os.makedirs(det_results_path, exist_ok=True) - total_img = set([name[:name.rfind('_')] - for name in os.listdir(bin_path) if "bin" in name]) - for bin_file in sorted(total_img): - path_base = os.path.join(bin_path, bin_file) - # load all detected output tensor - res_buff = [] - for num in range(1, flags.net_out_num + 1): - if os.path.exists(path_base + "_" + str(num) + ".bin"): - if num == 1: - buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="float32") - buf = np.reshape(buf, [100, 5]) - elif num == 2: - buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="int64") - buf = np.reshape(buf, [100, 1]) - res_buff.append(buf) - else: - print("[ERROR] file not exist", path_base + "_" + str(num) + ".bin") - res_tensor = np.concatenate(res_buff, axis=1) - current_img_size = img_size_dict[bin_file] - print("[TEST]---------------------------concat{} imgsize{}".format(len(res_tensor), current_img_size)) - predbox = coco_postprocess(res_tensor, current_img_size, flags.net_input_width, flags.net_input_height) - - if flags.ifShowDetObj == True: - imgCur = cv2.imread(current_img_size[2]) - - det_results_str = '' - for idx, class_ind in enumerate(predbox[:,5]): - if float(predbox[idx][4]) < float(flags.prob_thres): - continue - # skip negative class index - if class_ind < 0 or class_ind > 80: - continue - - class_name = CLASSES[int(class_ind)] - det_results_str += "{} {} {} {} {} {}\n".format(class_name, str(predbox[idx][4]), predbox[idx][0], - predbox[idx][1], predbox[idx][2], predbox[idx][3]) - if flags.ifShowDetObj == True: - imgCur=cv2.rectangle(imgCur, (int(predbox[idx][0]), int(predbox[idx][1])), - (int(predbox[idx][2]), int(predbox[idx][3])), (0,255,0), 1) - imgCur = cv2.putText(imgCur, class_name+'|'+str(predbox[idx][4]), - (int(predbox[idx][0]), int(predbox[idx][1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) - # 图像,文字内容, 坐标 ,字体,大小,颜色,字体厚度 - - if flags.ifShowDetObj == True: - print(os.path.join(det_results_path, bin_file +'.jpg')) - cv2.imwrite(os.path.join(det_results_path, bin_file +'.jpg'), imgCur, [int(cv2.IMWRITE_JPEG_QUALITY),70]) - - det_results_file = os.path.join(det_results_path, bin_file + ".txt") - with open(det_results_file, "w") as detf: - detf.write(det_results_str) - print(det_results_str) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import argparse +import cv2 + +CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', + 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] + +def coco_postprocess(bbox: np.ndarray, image_size, + net_input_width, net_input_height): + """ + This function is postprocessing for FasterRCNN output. + + Before calling this function, reshape the raw output of FasterRCNN to + following form + numpy.ndarray: + [x, y, width, height, confidence, probability of 80 classes] + shape: (100,) + The postprocessing restore the bounding rectangles of FasterRCNN output + to origin scale and filter with non-maximum suppression. + + :param bbox: a numpy array of the FasterRCNN output + :param image_path: a string of image path + :return: three list for best bound, class and score + """ + w = image_size[0] + h = image_size[1] + scale = min(net_input_width / w, net_input_height / h) + + pad_w = net_input_width - w * scale + pad_h = net_input_height - h * scale + pad_left = pad_w // 2 + pad_top = pad_h // 2 + + # cal predict box on the image src + pbox = bbox + pbox[:, 0] = (bbox[:, 0] - pad_left) / scale + pbox[:, 1] = (bbox[:, 1] - pad_top) / scale + pbox[:, 2] = (bbox[:, 2] - pad_left) / scale + pbox[:, 3] = (bbox[:, 3] - pad_top) / scale + return pbox + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0") + parser.add_argument("--test_annotation", default="./coco2017_jpg.info") + parser.add_argument("--det_results_path", default="./detection-results/") + parser.add_argument("--net_out_num", default=2) + parser.add_argument("--net_input_width", default=1216) + parser.add_argument("--net_input_height", default=1216) + parser.add_argument("--prob_thres", default=0.05) + parser.add_argument("--ifShowDetObj", action="store_true", help="if input the para means True, neither False.") + flags = parser.parse_args() + print(flags.ifShowDetObj, type(flags.ifShowDetObj)) + # generate dict according to annotation file for query resolution + # load width and height of input images + img_size_dict = dict() + with open(flags.test_annotation)as f: + for line in f.readlines(): + temp = line.split(" ") + img_file_path = temp[1] + img_name = temp[1].split("/")[-1].split(".")[0] + img_width = int(temp[2]) + img_height = int(temp[3]) + img_size_dict[img_name] = (img_width, img_height, img_file_path) + + # read bin file for generate predict result + bin_path = flags.bin_data_path + det_results_path = flags.det_results_path + os.makedirs(det_results_path, exist_ok=True) + total_img = set([name[:name.rfind('_')] + for name in os.listdir(bin_path) if "bin" in name]) + for bin_file in sorted(total_img): + path_base = os.path.join(bin_path, bin_file) + # load all detected output tensor + res_buff = [] + for num in range(1, flags.net_out_num + 1): + if os.path.exists(path_base + "_" + str(num) + ".bin"): + if num == 1: + buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="float32") + buf = np.reshape(buf, [100, 5]) + elif num == 2: + buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="int64") + buf = np.reshape(buf, [100, 1]) + res_buff.append(buf) + else: + print("[ERROR] file not exist", path_base + "_" + str(num) + ".bin") + res_tensor = np.concatenate(res_buff, axis=1) + current_img_size = img_size_dict[bin_file] + print("[TEST]---------------------------concat{} imgsize{}".format(len(res_tensor), current_img_size)) + predbox = coco_postprocess(res_tensor, current_img_size, flags.net_input_width, flags.net_input_height) + + if flags.ifShowDetObj == True: + imgCur = cv2.imread(current_img_size[2]) + + det_results_str = '' + for idx, class_ind in enumerate(predbox[:,5]): + if float(predbox[idx][4]) < float(flags.prob_thres): + continue + # skip negative class index + if class_ind < 0 or class_ind > 80: + continue + + class_name = CLASSES[int(class_ind)] + det_results_str += "{} {} {} {} {} {}\n".format(class_name, str(predbox[idx][4]), predbox[idx][0], + predbox[idx][1], predbox[idx][2], predbox[idx][3]) + if flags.ifShowDetObj == True: + imgCur=cv2.rectangle(imgCur, (int(predbox[idx][0]), int(predbox[idx][1])), + (int(predbox[idx][2]), int(predbox[idx][3])), (0,255,0), 1) + imgCur = cv2.putText(imgCur, class_name+'|'+str(predbox[idx][4]), + (int(predbox[idx][0]), int(predbox[idx][1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) + # 图像,文字内容, 坐标 ,字体,大小,颜色,字体厚度 + + if flags.ifShowDetObj == True: + print(os.path.join(det_results_path, bin_file +'.jpg')) + cv2.imwrite(os.path.join(det_results_path, bin_file +'.jpg'), imgCur, [int(cv2.IMWRITE_JPEG_QUALITY),70]) + + det_results_file = os.path.join(det_results_path, bin_file + ".txt") + with open(det_results_file, "w") as detf: + detf.write(det_results_str) + print(det_results_str) diff --git a/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/mmdetection_coco_preprocess.py b/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/mmdetection_coco_preprocess.py index 4a6cd14f9329be81e615212af1140b6f662b27ab..fc8c2c5293a0029a5ac97c48996931f4c6a99820 100644 --- a/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/mmdetection_coco_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/mmdetection_coco_preprocess.py @@ -1,68 +1,68 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -import cv2 -import argparse -import mmcv -import torch - -dataset_config = { - 'resize': (1216, 1216), - 'mean': [123.675, 116.28, 103.53], - 'std': [58.395, 57.12, 57.375], -} - -tensor_height = 1216 -tensor_width = 1216 - -def coco_preprocess(input_image, output_bin_path): - #define the output file name - img_name = input_image.split('/')[-1] - bin_name = img_name.split('.')[0] + ".bin" - bin_fl = os.path.join(output_bin_path, bin_name) - - one_img = mmcv.imread(os.path.join(input_image), backend='cv2') - one_img = mmcv.imrescale(one_img, (tensor_height, tensor_width)) - # calculate padding - h = one_img.shape[0] - w = one_img.shape[1] - pad_left = (tensor_width - w) // 2 - pad_top = (tensor_height - h) // 2 - pad_right = tensor_width - pad_left - w - pad_bottom = tensor_height - pad_top - h - - mean = np.array(dataset_config['mean'], dtype=np.float32) - std = np.array(dataset_config['std'], dtype=np.float32) - one_img = mmcv.imnormalize(one_img, mean, std) - one_img = mmcv.impad(one_img, padding=(pad_left, pad_top, pad_right, pad_bottom), pad_val=0) - one_img = one_img.transpose(2, 0, 1) - one_img.tofile(bin_fl) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='preprocess of FasterRCNN pytorch model') - parser.add_argument("--image_folder_path", default="./coco2014/", help='image of dataset') - parser.add_argument("--bin_folder_path", default="./coco2014_bin/", help='Preprocessed image buffer') - flags = parser.parse_args() - - if not os.path.exists(flags.bin_folder_path): - os.makedirs(flags.bin_folder_path) - images = os.listdir(flags.image_folder_path) - for image_name in images: - if not (image_name.endswith(".jpeg") or image_name.endswith(".JPEG") or image_name.endswith(".jpg")): - continue - print("start to process image {}....".format(image_name)) - path_image = os.path.join(flags.image_folder_path, image_name) - coco_preprocess(path_image, flags.bin_folder_path) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import os +import cv2 +import argparse +import mmcv +import torch + +dataset_config = { + 'resize': (1216, 1216), + 'mean': [123.675, 116.28, 103.53], + 'std': [58.395, 57.12, 57.375], +} + +tensor_height = 1216 +tensor_width = 1216 + +def coco_preprocess(input_image, output_bin_path): + #define the output file name + img_name = input_image.split('/')[-1] + bin_name = img_name.split('.')[0] + ".bin" + bin_fl = os.path.join(output_bin_path, bin_name) + + one_img = mmcv.imread(os.path.join(input_image), backend='cv2') + one_img = mmcv.imrescale(one_img, (tensor_height, tensor_width)) + # calculate padding + h = one_img.shape[0] + w = one_img.shape[1] + pad_left = (tensor_width - w) // 2 + pad_top = (tensor_height - h) // 2 + pad_right = tensor_width - pad_left - w + pad_bottom = tensor_height - pad_top - h + + mean = np.array(dataset_config['mean'], dtype=np.float32) + std = np.array(dataset_config['std'], dtype=np.float32) + one_img = mmcv.imnormalize(one_img, mean, std) + one_img = mmcv.impad(one_img, padding=(pad_left, pad_top, pad_right, pad_bottom), pad_val=0) + one_img = one_img.transpose(2, 0, 1) + one_img.tofile(bin_fl) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='preprocess of FasterRCNN pytorch model') + parser.add_argument("--image_folder_path", default="./coco2014/", help='image of dataset') + parser.add_argument("--bin_folder_path", default="./coco2014_bin/", help='Preprocessed image buffer') + flags = parser.parse_args() + + if not os.path.exists(flags.bin_folder_path): + os.makedirs(flags.bin_folder_path) + images = os.listdir(flags.image_folder_path) + for image_name in images: + if not (image_name.endswith(".jpeg") or image_name.endswith(".JPEG") or image_name.endswith(".jpg")): + continue + print("start to process image {}....".format(image_name)) + path_image = os.path.join(flags.image_folder_path, image_name) + coco_preprocess(path_image, flags.bin_folder_path) diff --git a/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/requirements.txt b/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/requirements.txt index fcabf40ea57e03ae306ddb75476c08e25e73bd84..3dfcf9563e211857c3b68c55591852f0e43b8f2e 100644 --- a/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/requirements.txt +++ b/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/requirements.txt @@ -1,6 +1,6 @@ -torch==1.7.0 -torchvision==0.8.0 -onnx==1.8.0 -mmdet==2.8.0 -mmcv-full==1.2.4 +torch==1.7.0 +torchvision==0.8.0 +onnx==1.8.0 +mmdet==2.8.0 +mmcv-full==1.2.4 onnxruntime==1.9.0 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/txt_to_json.py b/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/txt_to_json.py index a2346a773d0aff8587ff5c2904aead9624e6fa2e..9f479da6a0b3ef156b57c415fab48801ee5f69cd 100644 --- a/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/txt_to_json.py +++ b/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN/txt_to_json.py @@ -1,114 +1,114 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import glob -import os -import sys -import argparse -import mmcv - -CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', - 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] - -cat_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, -24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, -48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, -72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] - -''' - 0,0 ------> x (width) - | - | (Left,Top) - | *_________ - | | | - | | - y |_________| - (height) * - (Right,Bottom) -''' - -def file_lines_to_list(path): - # open txt file lines to a list - with open(path) as f: - content = f.readlines() - # remove whitespace characters like `\n` at the end of each line - content = [x.strip() for x in content] - return content - - -def error(msg): - print(msg) - sys.exit(0) - - -def get_predict_list(file_path, gt_classes): - dr_files_list = glob.glob(file_path + '/*.txt') - dr_files_list.sort() - - bounding_boxes = [] - for txt_file in dr_files_list: - file_id = txt_file.split(".txt", 1)[0] - file_id = os.path.basename(os.path.normpath(file_id)) - lines = file_lines_to_list(txt_file) - for line in lines: - try: - sl = line.split() - if len(sl) > 6: - class_name = sl[0] + ' ' + sl[1] - scores, left, top, right, bottom = sl[2:] - else: - class_name, scores, left, top, right, bottom = sl - if float(scores) < 0.05: - continue - except ValueError: - error_msg = "Error: File " + txt_file + " wrong format.\n" - error_msg += " Expected: \n" - error_msg += " Received: " + line - error(error_msg) - - # bbox = left + " " + top + " " + right + " " + bottom - left = float(left) - right = float(right) - top = float(top) - bottom = float(bottom) - bbox = [left, top, right-left, bottom-top] - bounding_boxes.append({"image_id": int(file_id), "bbox": bbox, - "score": float(scores), "category_id": cat_ids[CLASSES.index(class_name)]}) - # sort detection-results by decreasing scores - # bounding_boxes.sort(key=lambda x: float(x['score']), reverse=True) - return bounding_boxes - - - -if __name__ == '__main__': - parser = argparse.ArgumentParser('mAp calculate') - parser.add_argument('--npu_txt_path', default="detection-results", - help='the path of the predict result') - parser.add_argument("--json_output_file", default="coco_detection_result") - args = parser.parse_args() - - res_bbox = get_predict_list(args.npu_txt_path, CLASSES) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import os +import sys +import argparse +import mmcv + +CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', + 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] + +cat_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, +24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, +72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +''' + 0,0 ------> x (width) + | + | (Left,Top) + | *_________ + | | | + | | + y |_________| + (height) * + (Right,Bottom) +''' + +def file_lines_to_list(path): + # open txt file lines to a list + with open(path) as f: + content = f.readlines() + # remove whitespace characters like `\n` at the end of each line + content = [x.strip() for x in content] + return content + + +def error(msg): + print(msg) + sys.exit(0) + + +def get_predict_list(file_path, gt_classes): + dr_files_list = glob.glob(file_path + '/*.txt') + dr_files_list.sort() + + bounding_boxes = [] + for txt_file in dr_files_list: + file_id = txt_file.split(".txt", 1)[0] + file_id = os.path.basename(os.path.normpath(file_id)) + lines = file_lines_to_list(txt_file) + for line in lines: + try: + sl = line.split() + if len(sl) > 6: + class_name = sl[0] + ' ' + sl[1] + scores, left, top, right, bottom = sl[2:] + else: + class_name, scores, left, top, right, bottom = sl + if float(scores) < 0.05: + continue + except ValueError: + error_msg = "Error: File " + txt_file + " wrong format.\n" + error_msg += " Expected: \n" + error_msg += " Received: " + line + error(error_msg) + + # bbox = left + " " + top + " " + right + " " + bottom + left = float(left) + right = float(right) + top = float(top) + bottom = float(bottom) + bbox = [left, top, right-left, bottom-top] + bounding_boxes.append({"image_id": int(file_id), "bbox": bbox, + "score": float(scores), "category_id": cat_ids[CLASSES.index(class_name)]}) + # sort detection-results by decreasing scores + # bounding_boxes.sort(key=lambda x: float(x['score']), reverse=True) + return bounding_boxes + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser('mAp calculate') + parser.add_argument('--npu_txt_path', default="detection-results", + help='the path of the predict result') + parser.add_argument("--json_output_file", default="coco_detection_result") + args = parser.parse_args() + + res_bbox = get_predict_list(args.npu_txt_path, CLASSES) mmcv.dump(res_bbox, args.json_output_file + '.json') \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/README.md b/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/README.md index 085891be0df48d6e7d3afb6df61119a67c216e8f..d76158104c6fed3e4738b379356c70cf431200a6 100644 --- a/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/README.md +++ b/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/README.md @@ -1,57 +1,57 @@ -# Casacde_RCNN_R101模型PyTorch离线推理指导 - -## 1 环境准备 - -1. 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - -``` -pip3.7 install -r requirements.txt -``` - -2. 获取,修改与安装开源模型代码 - -``` -git clone https://github.com/open-mmlab/mmdetection.git -cd mmdetection -git reset a21eb25535f31634cef332b09fc27d28956fb24b --hard -pip3.7 install -v -e . -patch -p1 < ../Cascade_RCNN_R101.patch -cd .. -``` - -利用提供的change文件夹中的patch文件,完成补丁操作,命令参考如下示例,请用户根据安装包位置自行修改: -``` -cd change -patch -p0 /usr/local/python3.7.5/lib/python3.7/site-packages/mmcv/ops/deform_conv.py deform_conv.patch -cd ../ -``` - - -3. 获取权重文件 - - 从[LINK](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn)下载cascade_rcnn模型权重文件 - -4. 数据集 - 本模型使用coco2017的验证集验证 - -5. [获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) -将benchmark.x86_64或benchmark.aarch64放到当前目录 - - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=./ -``` - - -**评测结果:** - -| 模型 | 官网pth精度 | 310离线推理精度 | gpu性能 | 310性能 | -| :---------------: | :---------: | :-------------: | :-----: | :------: | -| Cascade_RCNN_R101 bs1 | map:0.42 | map:0.42 | 4.8task/s | 5.667fps | - - - +# Casacde_RCNN_R101模型PyTorch离线推理指导 + +## 1 环境准备 + +1. 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + +``` +pip3.7 install -r requirements.txt +``` + +2. 获取,修改与安装开源模型代码 + +``` +git clone https://github.com/open-mmlab/mmdetection.git +cd mmdetection +git reset a21eb25535f31634cef332b09fc27d28956fb24b --hard +pip3.7 install -v -e . +patch -p1 < ../Cascade_RCNN_R101.patch +cd .. +``` + +利用提供的change文件夹中的patch文件,完成补丁操作,命令参考如下示例,请用户根据安装包位置自行修改: +``` +cd change +patch -p0 /usr/local/python3.7.5/lib/python3.7/site-packages/mmcv/ops/deform_conv.py deform_conv.patch +cd ../ +``` + + +3. 获取权重文件 + + 从[LINK](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn)下载cascade_rcnn模型权重文件 + +4. 数据集 + 本模型使用coco2017的验证集验证 + +5. [获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) +将benchmark.x86_64或benchmark.aarch64放到当前目录 + + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=./ +``` + + +**评测结果:** + +| 模型 | 官网pth精度 | 310离线推理精度 | gpu性能 | 310性能 | +| :---------------: | :---------: | :-------------: | :-----: | :------: | +| Cascade_RCNN_R101 bs1 | map:0.42 | map:0.42 | 4.8task/s | 5.667fps | + + + diff --git a/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/get_info.py b/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/get_info.py index 5af675c6b0ae3f28a6f191d139ebb44e775e6c1c..fc6cdebb5b4417a3651c1e6e9663d8d1299a0ef5 100644 --- a/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/get_info.py +++ b/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/get_info.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/mmdetection_coco_postprocess.py b/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/mmdetection_coco_postprocess.py index 5ca79ff17fe6ecb89a9aa72740147e599eb62dba..a4bcab73b951c2cf4af9cced603f2e009ee58763 100644 --- a/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/mmdetection_coco_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/mmdetection_coco_postprocess.py @@ -1,276 +1,276 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import cv2 -import glob -import sys -import argparse -import mmcv -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval - -CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', - 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] - -cat_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, -24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, -48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, -72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] - -def coco_postprocess(bbox: np.ndarray, image_size, - net_input_width, net_input_height): - """ - This function is postprocessing for FasterRCNN output. - - Before calling this function, reshape the raw output of FasterRCNN to - following form - numpy.ndarray: - [x, y, width, height, confidence, probability of 80 classes] - shape: (100,) - The postprocessing restore the bounding rectangles of FasterRCNN output - to origin scale and filter with non-maximum suppression. - - :param bbox: a numpy array of the FasterRCNN output - :param image_path: a string of image path - :return: three list for best bound, class and score - """ - w = image_size[0] - h = image_size[1] - scale = min(net_input_width / w, net_input_height / h) - - pad_w = net_input_width - w * scale - pad_h = net_input_height - h * scale - pad_left = pad_w // 2 - pad_top = pad_h // 2 - - # cal predict box on the image src - pbox = bbox - pbox[:, 0] = (bbox[:, 0] - pad_left) / scale - pbox[:, 1] = (bbox[:, 1] - pad_top) / scale - pbox[:, 2] = (bbox[:, 2] - pad_left) / scale - pbox[:, 3] = (bbox[:, 3] - pad_top) / scale - - # make pbboxes value in valid range - pbox[:, 0] = np.maximum(pbox[:, 0], 0) - pbox[:, 1] = np.maximum(pbox[:, 1], 0) - pbox[:, 2] = np.minimum(pbox[:, 2], w) - pbox[:, 3] = np.minimum(pbox[:, 3], h) - return pbox -def file_lines_to_list(path): - # open txt file lines to a list - with open(path) as f: - content = f.readlines() - # remove whitespace characters like `\n` at the end of each line - content = [x.strip() for x in content] - return content - - -def error(msg): - print(msg) - sys.exit(0) - - -def get_predict_list(file_path, gt_classes): - dr_files_list = glob.glob(file_path + '/*.txt') - dr_files_list.sort() - - bounding_boxes = [] - for txt_file in dr_files_list: - file_id = txt_file.split(".txt", 1)[0] - file_id = os.path.basename(os.path.normpath(file_id)) - lines = file_lines_to_list(txt_file) - for line in lines: - try: - sl = line.split() - if len(sl) > 6: - class_name = sl[0] + ' ' + sl[1] - scores, left, top, right, bottom = sl[2:] - else: - class_name, scores, left, top, right, bottom = sl - if float(scores) < 0.05: - continue - except ValueError: - error_msg = "Error: File " + txt_file + " wrong format.\n" - error_msg += " Expected: \n" - error_msg += " Received: " + line - error(error_msg) - - # bbox = left + " " + top + " " + right + " " + bottom - left = float(left) - right = float(right) - top = float(top) - bottom = float(bottom) - bbox = [left, top, right-left, bottom-top] - bounding_boxes.append({"image_id": int(file_id), "bbox": bbox, - "score": float(scores), "category_id": cat_ids[CLASSES.index(class_name)]}) - # sort detection-results by decreasing scores - # bounding_boxes.sort(key=lambda x: float(x['score']), reverse=True) - return bounding_boxes -def coco_evaluation(annotation_json, result_json): - cocoGt = COCO(annotation_json) - cocoDt = cocoGt.loadRes(result_json) - iou_thrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) - iou_type = 'bbox' - - cocoEval = COCOeval(cocoGt, cocoDt, iou_type) - cocoEval.params.catIds = cocoGt.get_cat_ids(cat_names=CLASSES) - cocoEval.params.imgIds = cocoGt.get_img_ids() - cocoEval.params.maxDets = [100, 300, 1000] # proposal number for evaluating recalls/mAPs. - cocoEval.params.iouThrs = iou_thrs - - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - - # mapping of cocoEval.stats - coco_metric_names = { - 'mAP': 0, - 'mAP_50': 1, - 'mAP_75': 2, - 'mAP_s': 3, - 'mAP_m': 4, - 'mAP_l': 5, - 'AR@100': 6, - 'AR@300': 7, - 'AR@1000': 8, - 'AR_s@1000': 9, - 'AR_m@1000': 10, - 'AR_l@1000': 11 - } - - metric_items = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l'] - eval_results = {} - - for metric_item in metric_items: - key = f'bbox_{metric_item}' - val = float( - f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' - ) - eval_results[key] = val - ap = cocoEval.stats[:6] - eval_results['bbox_mAP_copypaste'] = ( - f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' - f'{ap[4]:.3f} {ap[5]:.3f}') - - return eval_results - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0") - parser.add_argument("--test_annotation", default="./coco2017_jpg.info") - parser.add_argument("--det_results_path", default="./detection-results") - parser.add_argument("--img_path", default="./val2017/") - parser.add_argument("--net_out_num", default=2) - parser.add_argument("--net_input_width", default=1216) - parser.add_argument("--net_input_height", default=1216) - parser.add_argument("--prob_thres", default=0.05) - parser.add_argument("--ifShowDetObj", action="store_true", help="if input the para means True, neither False.") - parser.add_argument('--npu_txt_path', default="detection-results", - help='the path of the predict result') - parser.add_argument("--json_output_file", default="coco_detection_result") - parser.add_argument("--ground_truth", default="instances_val2017.json") - parser.add_argument("--detection_result", default="coco_detection_result.json") - flags = parser.parse_args() - print(flags.ifShowDetObj, type(flags.ifShowDetObj)) - # generate dict according to annotation file for query resolution - # load width and height of input images - img_size_dict = dict() - with open(flags.test_annotation)as f: - for line in f.readlines(): - temp = line.split(" ") - img_file_path = temp[1] - img_name = temp[1].split("/")[-1].split(".")[0] - img_width = int(temp[2]) - img_height = int(temp[3]) - img_size_dict[img_name] = (img_width, img_height, img_file_path) - - # read bin file for generate predict result - bin_path = flags.bin_data_path - det_results_path = flags.det_results_path - img_path = flags.img_path - os.makedirs(det_results_path, exist_ok=True) - total_img = set([name[:name.rfind('_')] - for name in os.listdir(bin_path) if "bin" in name]) - for bin_file in sorted(total_img): - path_base = os.path.join(bin_path, bin_file) - # load all detected output tensor - res_buff = [] - for num in range(1, flags.net_out_num + 1): - if os.path.exists(path_base + "_" + str(num) + ".bin"): - if num == 1: - buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="float32") - buf = np.reshape(buf, [100, 5]) - elif num == 2: - buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="int64") - buf = np.reshape(buf, [100, 1]) - res_buff.append(buf) - else: - print("[ERROR] file not exist", path_base + "_" + str(num) + ".bin") - res_tensor = np.concatenate(res_buff, axis=1) - current_img_size = img_size_dict[bin_file] - print("[TEST]---------------------------concat{} imgsize{}".format(len(res_tensor), current_img_size)) - predbox = coco_postprocess(res_tensor, current_img_size, flags.net_input_width, flags.net_input_height) - - if flags.ifShowDetObj == True: - pic = os.path.join(img_path, bin_file +'.jpg') - imgCur = cv2.imread(pic) - - det_results_str = '' - for idx, class_ind in enumerate(predbox[:,5]): - if float(predbox[idx][4]) < float(flags.prob_thres): - continue - # skip negative class index - if class_ind < 0 or class_ind > 80: - continue - - class_name = CLASSES[int(class_ind)] - det_results_str += "{} {} {} {} {} {}\n".format(class_name, str(predbox[idx][4]), predbox[idx][0], - predbox[idx][1], predbox[idx][2], predbox[idx][3]) - if flags.ifShowDetObj == True: - imgCur=cv2.rectangle(imgCur, (int(predbox[idx][0]), int(predbox[idx][1])), - (int(predbox[idx][2]), int(predbox[idx][3])), (0,255,0), 1) - imgCur = cv2.putText(imgCur, class_name+'|'+str(predbox[idx][4]), - (int(predbox[idx][0]), int(predbox[idx][1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) - # 图像,文字内容, 坐标 ,字体,大小,颜色,字体厚度 - - if flags.ifShowDetObj == True: - print(os.path.join(det_results_path, bin_file +'.jpg')) - cv2.imwrite(os.path.join(det_results_path, bin_file +'.jpg'), imgCur, [int(cv2.IMWRITE_JPEG_QUALITY),70]) - - det_results_file = os.path.join(det_results_path, bin_file + ".txt") - with open(det_results_file, "w") as detf: - detf.write(det_results_str) - print(det_results_str) - - res_bbox = get_predict_list(flags.npu_txt_path, CLASSES) - mmcv.dump(res_bbox, flags.json_output_file + '.json') - result = coco_evaluation(flags.ground_truth, flags.detection_result) - print(result) - with open('./coco_detection_result.txt', 'w') as f: - for key, value in result.items(): +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import cv2 +import glob +import sys +import argparse +import mmcv +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', + 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] + +cat_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, +24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, +72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +def coco_postprocess(bbox: np.ndarray, image_size, + net_input_width, net_input_height): + """ + This function is postprocessing for FasterRCNN output. + + Before calling this function, reshape the raw output of FasterRCNN to + following form + numpy.ndarray: + [x, y, width, height, confidence, probability of 80 classes] + shape: (100,) + The postprocessing restore the bounding rectangles of FasterRCNN output + to origin scale and filter with non-maximum suppression. + + :param bbox: a numpy array of the FasterRCNN output + :param image_path: a string of image path + :return: three list for best bound, class and score + """ + w = image_size[0] + h = image_size[1] + scale = min(net_input_width / w, net_input_height / h) + + pad_w = net_input_width - w * scale + pad_h = net_input_height - h * scale + pad_left = pad_w // 2 + pad_top = pad_h // 2 + + # cal predict box on the image src + pbox = bbox + pbox[:, 0] = (bbox[:, 0] - pad_left) / scale + pbox[:, 1] = (bbox[:, 1] - pad_top) / scale + pbox[:, 2] = (bbox[:, 2] - pad_left) / scale + pbox[:, 3] = (bbox[:, 3] - pad_top) / scale + + # make pbboxes value in valid range + pbox[:, 0] = np.maximum(pbox[:, 0], 0) + pbox[:, 1] = np.maximum(pbox[:, 1], 0) + pbox[:, 2] = np.minimum(pbox[:, 2], w) + pbox[:, 3] = np.minimum(pbox[:, 3], h) + return pbox +def file_lines_to_list(path): + # open txt file lines to a list + with open(path) as f: + content = f.readlines() + # remove whitespace characters like `\n` at the end of each line + content = [x.strip() for x in content] + return content + + +def error(msg): + print(msg) + sys.exit(0) + + +def get_predict_list(file_path, gt_classes): + dr_files_list = glob.glob(file_path + '/*.txt') + dr_files_list.sort() + + bounding_boxes = [] + for txt_file in dr_files_list: + file_id = txt_file.split(".txt", 1)[0] + file_id = os.path.basename(os.path.normpath(file_id)) + lines = file_lines_to_list(txt_file) + for line in lines: + try: + sl = line.split() + if len(sl) > 6: + class_name = sl[0] + ' ' + sl[1] + scores, left, top, right, bottom = sl[2:] + else: + class_name, scores, left, top, right, bottom = sl + if float(scores) < 0.05: + continue + except ValueError: + error_msg = "Error: File " + txt_file + " wrong format.\n" + error_msg += " Expected: \n" + error_msg += " Received: " + line + error(error_msg) + + # bbox = left + " " + top + " " + right + " " + bottom + left = float(left) + right = float(right) + top = float(top) + bottom = float(bottom) + bbox = [left, top, right-left, bottom-top] + bounding_boxes.append({"image_id": int(file_id), "bbox": bbox, + "score": float(scores), "category_id": cat_ids[CLASSES.index(class_name)]}) + # sort detection-results by decreasing scores + # bounding_boxes.sort(key=lambda x: float(x['score']), reverse=True) + return bounding_boxes +def coco_evaluation(annotation_json, result_json): + cocoGt = COCO(annotation_json) + cocoDt = cocoGt.loadRes(result_json) + iou_thrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) + iou_type = 'bbox' + + cocoEval = COCOeval(cocoGt, cocoDt, iou_type) + cocoEval.params.catIds = cocoGt.get_cat_ids(cat_names=CLASSES) + cocoEval.params.imgIds = cocoGt.get_img_ids() + cocoEval.params.maxDets = [100, 300, 1000] # proposal number for evaluating recalls/mAPs. + cocoEval.params.iouThrs = iou_thrs + + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + # mapping of cocoEval.stats + coco_metric_names = { + 'mAP': 0, + 'mAP_50': 1, + 'mAP_75': 2, + 'mAP_s': 3, + 'mAP_m': 4, + 'mAP_l': 5, + 'AR@100': 6, + 'AR@300': 7, + 'AR@1000': 8, + 'AR_s@1000': 9, + 'AR_m@1000': 10, + 'AR_l@1000': 11 + } + + metric_items = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l'] + eval_results = {} + + for metric_item in metric_items: + key = f'bbox_{metric_item}' + val = float( + f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' + ) + eval_results[key] = val + ap = cocoEval.stats[:6] + eval_results['bbox_mAP_copypaste'] = ( + f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' + f'{ap[4]:.3f} {ap[5]:.3f}') + + return eval_results + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0") + parser.add_argument("--test_annotation", default="./coco2017_jpg.info") + parser.add_argument("--det_results_path", default="./detection-results") + parser.add_argument("--img_path", default="./val2017/") + parser.add_argument("--net_out_num", default=2) + parser.add_argument("--net_input_width", default=1216) + parser.add_argument("--net_input_height", default=1216) + parser.add_argument("--prob_thres", default=0.05) + parser.add_argument("--ifShowDetObj", action="store_true", help="if input the para means True, neither False.") + parser.add_argument('--npu_txt_path', default="detection-results", + help='the path of the predict result') + parser.add_argument("--json_output_file", default="coco_detection_result") + parser.add_argument("--ground_truth", default="instances_val2017.json") + parser.add_argument("--detection_result", default="coco_detection_result.json") + flags = parser.parse_args() + print(flags.ifShowDetObj, type(flags.ifShowDetObj)) + # generate dict according to annotation file for query resolution + # load width and height of input images + img_size_dict = dict() + with open(flags.test_annotation)as f: + for line in f.readlines(): + temp = line.split(" ") + img_file_path = temp[1] + img_name = temp[1].split("/")[-1].split(".")[0] + img_width = int(temp[2]) + img_height = int(temp[3]) + img_size_dict[img_name] = (img_width, img_height, img_file_path) + + # read bin file for generate predict result + bin_path = flags.bin_data_path + det_results_path = flags.det_results_path + img_path = flags.img_path + os.makedirs(det_results_path, exist_ok=True) + total_img = set([name[:name.rfind('_')] + for name in os.listdir(bin_path) if "bin" in name]) + for bin_file in sorted(total_img): + path_base = os.path.join(bin_path, bin_file) + # load all detected output tensor + res_buff = [] + for num in range(1, flags.net_out_num + 1): + if os.path.exists(path_base + "_" + str(num) + ".bin"): + if num == 1: + buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="float32") + buf = np.reshape(buf, [100, 5]) + elif num == 2: + buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="int64") + buf = np.reshape(buf, [100, 1]) + res_buff.append(buf) + else: + print("[ERROR] file not exist", path_base + "_" + str(num) + ".bin") + res_tensor = np.concatenate(res_buff, axis=1) + current_img_size = img_size_dict[bin_file] + print("[TEST]---------------------------concat{} imgsize{}".format(len(res_tensor), current_img_size)) + predbox = coco_postprocess(res_tensor, current_img_size, flags.net_input_width, flags.net_input_height) + + if flags.ifShowDetObj == True: + pic = os.path.join(img_path, bin_file +'.jpg') + imgCur = cv2.imread(pic) + + det_results_str = '' + for idx, class_ind in enumerate(predbox[:,5]): + if float(predbox[idx][4]) < float(flags.prob_thres): + continue + # skip negative class index + if class_ind < 0 or class_ind > 80: + continue + + class_name = CLASSES[int(class_ind)] + det_results_str += "{} {} {} {} {} {}\n".format(class_name, str(predbox[idx][4]), predbox[idx][0], + predbox[idx][1], predbox[idx][2], predbox[idx][3]) + if flags.ifShowDetObj == True: + imgCur=cv2.rectangle(imgCur, (int(predbox[idx][0]), int(predbox[idx][1])), + (int(predbox[idx][2]), int(predbox[idx][3])), (0,255,0), 1) + imgCur = cv2.putText(imgCur, class_name+'|'+str(predbox[idx][4]), + (int(predbox[idx][0]), int(predbox[idx][1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) + # 图像,文字内容, 坐标 ,字体,大小,颜色,字体厚度 + + if flags.ifShowDetObj == True: + print(os.path.join(det_results_path, bin_file +'.jpg')) + cv2.imwrite(os.path.join(det_results_path, bin_file +'.jpg'), imgCur, [int(cv2.IMWRITE_JPEG_QUALITY),70]) + + det_results_file = os.path.join(det_results_path, bin_file + ".txt") + with open(det_results_file, "w") as detf: + detf.write(det_results_str) + print(det_results_str) + + res_bbox = get_predict_list(flags.npu_txt_path, CLASSES) + mmcv.dump(res_bbox, flags.json_output_file + '.json') + result = coco_evaluation(flags.ground_truth, flags.detection_result) + print(result) + with open('./coco_detection_result.txt', 'w') as f: + for key, value in result.items(): f.write(key + ': ' + str(value) + '\n') \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/mmdetection_coco_preprocess.py b/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/mmdetection_coco_preprocess.py index c6f3b7429ec1a447a6c320ab6a84bff1cd6923ad..f7fc5568a9ca2fd4109f8c4c1a060fb7fc2e24d3 100644 --- a/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/mmdetection_coco_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/mmdetection_coco_preprocess.py @@ -1,68 +1,68 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -import cv2 -import argparse -import mmcv -import torch - -dataset_config = { - 'resize': (1216, 1216), - 'mean': [123.675, 116.28, 103.53], - 'std': [58.395, 57.12, 57.375], -} - -tensor_height = 1216 -tensor_width = 1216 - -def coco_preprocess(input_image, output_bin_path): - #define the output file name - img_name = input_image.split('/')[-1] - bin_name = img_name.split('.')[0] + ".bin" - bin_fl = os.path.join(output_bin_path, bin_name) - - one_img = mmcv.imread(os.path.join(input_image), backend='cv2') - one_img = mmcv.imrescale(one_img, (tensor_height, tensor_width)) - # calculate padding - h = one_img.shape[0] - w = one_img.shape[1] - pad_left = (tensor_width - w) // 2 - pad_top = (tensor_height - h) // 2 - pad_right = tensor_width - pad_left - w - pad_bottom = tensor_height - pad_top - h - - mean = np.array(dataset_config['mean'], dtype=np.float32) - std = np.array(dataset_config['std'], dtype=np.float32) - one_img = mmcv.imnormalize(one_img, mean, std) - one_img = mmcv.impad(one_img, padding=(pad_left, pad_top, pad_right, pad_bottom), pad_val=0) - one_img = one_img.transpose(2, 0, 1) - one_img.tofile(bin_fl) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='preprocess of NAS-FPN pytorch model') - parser.add_argument("--image_folder_path", default="./coco2017/", help='image of dataset') - parser.add_argument("--bin_folder_path", default="./coco2017_bin/", help='Preprocessed image buffer') - flags = parser.parse_args() - - if not os.path.exists(flags.bin_folder_path): - os.makedirs(flags.bin_folder_path) - images = os.listdir(flags.image_folder_path) - for image_name in images: - if not (image_name.endswith(".jpeg") or image_name.endswith(".JPEG") or image_name.endswith(".jpg")): - continue - print("start to process image {}....".format(image_name)) - path_image = os.path.join(flags.image_folder_path, image_name) - coco_preprocess(path_image, flags.bin_folder_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import os +import cv2 +import argparse +import mmcv +import torch + +dataset_config = { + 'resize': (1216, 1216), + 'mean': [123.675, 116.28, 103.53], + 'std': [58.395, 57.12, 57.375], +} + +tensor_height = 1216 +tensor_width = 1216 + +def coco_preprocess(input_image, output_bin_path): + #define the output file name + img_name = input_image.split('/')[-1] + bin_name = img_name.split('.')[0] + ".bin" + bin_fl = os.path.join(output_bin_path, bin_name) + + one_img = mmcv.imread(os.path.join(input_image), backend='cv2') + one_img = mmcv.imrescale(one_img, (tensor_height, tensor_width)) + # calculate padding + h = one_img.shape[0] + w = one_img.shape[1] + pad_left = (tensor_width - w) // 2 + pad_top = (tensor_height - h) // 2 + pad_right = tensor_width - pad_left - w + pad_bottom = tensor_height - pad_top - h + + mean = np.array(dataset_config['mean'], dtype=np.float32) + std = np.array(dataset_config['std'], dtype=np.float32) + one_img = mmcv.imnormalize(one_img, mean, std) + one_img = mmcv.impad(one_img, padding=(pad_left, pad_top, pad_right, pad_bottom), pad_val=0) + one_img = one_img.transpose(2, 0, 1) + one_img.tofile(bin_fl) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='preprocess of NAS-FPN pytorch model') + parser.add_argument("--image_folder_path", default="./coco2017/", help='image of dataset') + parser.add_argument("--bin_folder_path", default="./coco2017_bin/", help='Preprocessed image buffer') + flags = parser.parse_args() + + if not os.path.exists(flags.bin_folder_path): + os.makedirs(flags.bin_folder_path) + images = os.listdir(flags.image_folder_path) + for image_name in images: + if not (image_name.endswith(".jpeg") or image_name.endswith(".JPEG") or image_name.endswith(".jpg")): + continue + print("start to process image {}....".format(image_name)) + path_image = os.path.join(flags.image_folder_path, image_name) + coco_preprocess(path_image, flags.bin_folder_path) diff --git a/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/requirements.txt b/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/requirements.txt index a9b2aff748d181f465deadd5f1eb2cef543efb5d..bbcf70abd1d45bdd4747a59f08891e33df32c837 100644 --- a/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/requirements.txt +++ b/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101/requirements.txt @@ -1,10 +1,10 @@ -torch==1.7.0 -torchvision==0.8.0 -onnx==1.8.0 -numpy==1.20.0 -mmdet==2.8.0 -mmcv-full==1.2.4 -opencv-python==4.4.0.46 -mmpycocotools==12.0.3 -onnxruntime==1.9.0 - +torch==1.7.0 +torchvision==0.8.0 +onnx==1.8.0 +numpy==1.20.0 +mmdet==2.8.0 +mmcv-full==1.2.4 +opencv-python==4.4.0.46 +mmpycocotools==12.0.3 +onnxruntime==1.9.0 + diff --git a/ACL_PyTorch/contrib/cv/detection/CenterFace/README.md b/ACL_PyTorch/contrib/cv/detection/CenterFace/README.md index db88ee778c56508e8f8c111427c67dc754cb261e..7d456ee6fbd43b41bd39b5fbacd153c6ae159dc4 100644 --- a/ACL_PyTorch/contrib/cv/detection/CenterFace/README.md +++ b/ACL_PyTorch/contrib/cv/detection/CenterFace/README.md @@ -1,300 +1,300 @@ -# CenterFace Onnx模型端到端推理指导 - -- 1 模型概述 - - [1.1 论文地址](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#11-论文地址) - - [1.2 代码地址](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#12-代码地址) -- 2 环境说明 - - [2.1 深度学习框架](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#21-深度学习框架) - - [2.2 python第三方库](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#22-python第三方库) -- 3 模型转换 - - [3.1 pth转onnx模型](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#31-pth转onnx模型) - - [3.2 onnx转om模型](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#32-onnx转om模型) -- 4 数据集预处理 - - [4.1 数据集获取](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#41-数据集获取) - - [4.2 数据集预处理](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#42-数据集预处理) - - [4.3 生成数据集信息文件](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#43-生成数据集信息文件) -- 5 离线推理 - - [5.1 benchmark工具概述](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#51-benchmark工具概述) - - [5.2 离线推理](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#52-离线推理) -- 6 精度对比 - - [6.1 离线推理精度统计](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#61-离线推理精度统计) - - [6.2 开源精度](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#62-开源精度) - - [6.3 精度对比](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#63-精度对比) -- 7 性能对比 - - [7.1 npu性能数据](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#71-npu性能数据) - - [7.2 T4性能数据](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#72-T4性能数据) - - [7.3 性能对比](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#73-性能对比) -- 8 710增加文件介绍 - -## 1 模型概述 - -- **论文地址** -- **代码地址** - -### 1.1 论文地址 - -[CenterFace论文](https://arxiv.org/abs/1911.03599) - -### 1.2 代码地址 - -[CenterFace代码](https://github.com/chenjun2hao/CenterFace.pytorch) - -## 2 环境说明 - -- **深度学习框架** -- **python第三方库** - -### 2.1 深度学习框架 - -``` -python3.7.5 -CANN 5.0.1 - -pytorch >= 1.5.0 -torchvision >= 0.6.0 -onnx >= 1.7.0 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.20.3 -Pillow == 8.2.0 -opencv-python == 4.5.2.54 -``` - -## 3 模型转换 - -- **pth转onnx模型** -- **onnx转om模型** - -### 3.1 pth转onnx模型 - -1.下载pth权重文件 -权重文件从百度网盘上获取:https://pan.baidu.com/s/1sU3pRBTFebbsMDac-1HsQA 密码:etdi - -2.使用pth2onnx.py进行onnx的转换 - -``` -mv ./CenterFace/center-face/src/pth2onnx.py ./CenterFace/center-face/src/lib -cd ./CenterFace/center-face/src/lib -python3 pth2onnx.py -``` - -### 3.2 onnx转om模型 - -1.设置环境变量 - -``` -source env.sh -``` - -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 - -``` -cd ./CenterFace/center-face/src/test -bash onnxToom.sh -``` - -## 4 数据集预处理 - -- **数据集获取** -- **数据集预处理** -- **生成数据集信息文件** - -### 4.1 数据集获取 - -拉取代码仓库 (因为使用了开源代码模块,所以需要git clone一下) - -```shell -git clone https://gitee.com/Levi990223/center-face.git -``` - -整理代码结构 - -```shell -mv -r test center-face/src -mv benchmark.x86_64 centerface_pth_preprocess.py centerface_pth_postprocess.py convert.py env.sh CenterFace.onnx pth2onnx.py get_info.py model_best.pth move.sh npu_set_env.sh README.md ./center-face/src -``` - -下载WIDER_FACE数据集,将图片上在这个目录下: - -下载地址:https://www.graviti.cn/open-datasets/WIDER_FACE - -``` -$CenterFace_ROOT/center-face/data/{eval_dataset} -``` - -### 4.2 数据集预处理 - -1.预处理脚本centerface_pth_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 - -``` -cd ./CenterFace/center-face/src/test -bash start.sh -``` - -### 4.3 生成数据集信息文件 - -1.生成数据集信息文件脚本get_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 - -``` -cd ./CenterFace/center-face/src/test -bash to_info.sh -``` - -to_info.sh里,第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 - -## 5 离线推理 - -- **benchmark工具概述** -- **离线推理** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.1 推理benchmark工具用户指南 01 - -### 5.2 离线推理 - -1.设置环境变量 - -``` -source env.sh -source npu_set_env.sh -``` - -2.执行离线推理 - -执行前需要将benchmark.x86_64移动到执行目录下 - -(注:执行目录是/center-face/src) - -然后运行如下命令: - -``` -cd ./CenterFace/center-face/src/test -bash infer.sh -``` - -输出结果默认保存在当前目录result/dumpOutput_device{0},每个输入对应的输出对应四个_x.bin文件。 - -3.处理目录result/dumpOutput_device{0}下的bin文件 - -将该目录下的文件分类别存放,以便于后处理 - -``` -cd ./CenterFace/center-face/src/ -python3 convert.py ./result/dumpOutput_device1/ ./result/result -``` - -第一个参数是benchmark得到的bin文件目录,第二个参数是保存路径 - -## 6 精度对比 - -- **离线推理精度** -- **开源精度** -- **精度对比** - -### 6.1 离线推理精度统计 - -1.后处理 - -注:这里需要使用wide_face_val.mat文件,在center-face/evaluate/ground_truth/可以找到,然后将其移动到center-face/src目录下,然后执行下面命令 - -``` -cd ./CenterFace/center-face/src -python3 centerface_pth_postprocess.py -``` - -2.进行Ascend310上精度评估 - -``` -cd ./CenterFace/center-face/evaluate -python3 evaluation.py -``` - -### 6.2 开源精度 - -[CenterFace官网精度]([chenjun2hao/CenterFace.pytorch: unofficial version of centerface, which achieves the best balance between speed and accuracy at face detection (github.com)](https://github.com/chenjun2hao/CenterFace.pytorch)) - -``` -Easy Val AP: 0.9257383419951156 -Medium Val AP: 0.9131308732465665 -Hard Val AP: 0.7717305552550734 -``` - -### 6.3 精度对比 - -``` -Easy Val AP: 0.9190736484158941 -Medium Val AP: 0.9067769085346155 -Hard Val AP: 0.7425807072008017 -``` - -### 6.3 精度对比 - -实际上官网的hard精度达不到77%,最高74%左右,所以对比下来精度是达标的。 - -## 7 性能对比 - -- **npu性能数据** -- **T4性能数据** -- **性能对比** - -### 7.1 npu性能数据 - -1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: - -``` -[e2e] throughputRate: 33.1307, latency: 97372 -[data read] throughputRate: 36.336, moduleLatency: 27.5209 -[preprocess] throughputRate: 35.6065, moduleLatency: 28.0847 -[infer] throughputRate: 33.4556, Interface throughputRate: 91.86, moduleLatency: 29.2697 -[post] throughputRate: 33.4544, moduleLatency: 29.8915 -``` - -Interface throughputRate: 91.86,91.86x4=367.44既是batch1 310单卡吞吐率 -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: - -``` -[e2e] throughputRate: 31.7581, latency: 101580 -[data read] throughputRate: 35.0206, moduleLatency: 28.5547 -[preprocess] throughputRate: 33.9534, moduleLatency: 29.4521 -[infer] throughputRate: 32.022, Interface throughputRate: 80.3537, moduleLatency: 30.4381 -[post] throughputRate: 2.00424, moduleLatency: 498.943 - -``` - -Interface throughputRate: 80.3537,80.3537x4=321.4148既是batch16 310单卡吞吐率 - -### 7.2 T4性能数据 - -``` -[W] [TRT] TensorRT was linked against cuBLAS/cuBLAS LT 11.2.0 but loaded cuBLAS/cuBLAS LT 11.1.0 -[W] [TRT] TensorRT was linked against cuBLAS/cuBLAS LT 11.2.0 but loaded cuBLAS/cuBLAS LT 11.1.0 -[W] [TRT] TensorRT was linked against cuBLAS/cuBLAS LT 11.2.0 but loaded cuBLAS/cuBLAS LT 11.1.0 -t4 bs1 fps:337.544 -[W] [TRT] TensorRT was linked against cuBLAS/cuBLAS LT 11.2.0 but loaded cuBLAS/cuBLAS LT 11.1.0 -[W] [TRT] TensorRT was linked against cuBLAS/cuBLAS LT 11.2.0 but loaded cuBLAS/cuBLAS LT 11.1.0 -[W] [TRT] TensorRT was linked against cuBLAS/cuBLAS LT 11.2.0 but loaded cuBLAS/cuBLAS LT 11.1.0 -t4 bs16 fps:359.999 -``` - -batch1 t4单卡吞吐率:337.544 - -batch16 t4单卡吞吐率:359.999 - -### 7.3 性能对比 - -batch1:91.86x4=367.44 > 337.544 -batch16:80.3537x4=321.4148 < 359.999 - -## 8 710增加文件介绍 - -1.aipp_centerface.aippconfig ONNX模型转OM模型时所配置aipp -2.calibration_bin.py 量化模型时输入真实数据的组件脚本 +# CenterFace Onnx模型端到端推理指导 + +- 1 模型概述 + - [1.1 论文地址](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#11-论文地址) + - [1.2 代码地址](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#12-代码地址) +- 2 环境说明 + - [2.1 深度学习框架](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#21-深度学习框架) + - [2.2 python第三方库](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#22-python第三方库) +- 3 模型转换 + - [3.1 pth转onnx模型](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#31-pth转onnx模型) + - [3.2 onnx转om模型](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#32-onnx转om模型) +- 4 数据集预处理 + - [4.1 数据集获取](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#41-数据集获取) + - [4.2 数据集预处理](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#42-数据集预处理) + - [4.3 生成数据集信息文件](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#43-生成数据集信息文件) +- 5 离线推理 + - [5.1 benchmark工具概述](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#51-benchmark工具概述) + - [5.2 离线推理](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#52-离线推理) +- 6 精度对比 + - [6.1 离线推理精度统计](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#61-离线推理精度统计) + - [6.2 开源精度](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#62-开源精度) + - [6.3 精度对比](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#63-精度对比) +- 7 性能对比 + - [7.1 npu性能数据](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#71-npu性能数据) + - [7.2 T4性能数据](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#72-T4性能数据) + - [7.3 性能对比](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#73-性能对比) +- 8 710增加文件介绍 + +## 1 模型概述 + +- **论文地址** +- **代码地址** + +### 1.1 论文地址 + +[CenterFace论文](https://arxiv.org/abs/1911.03599) + +### 1.2 代码地址 + +[CenterFace代码](https://github.com/chenjun2hao/CenterFace.pytorch) + +## 2 环境说明 + +- **深度学习框架** +- **python第三方库** + +### 2.1 深度学习框架 + +``` +python3.7.5 +CANN 5.0.1 + +pytorch >= 1.5.0 +torchvision >= 0.6.0 +onnx >= 1.7.0 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.20.3 +Pillow == 8.2.0 +opencv-python == 4.5.2.54 +``` + +## 3 模型转换 + +- **pth转onnx模型** +- **onnx转om模型** + +### 3.1 pth转onnx模型 + +1.下载pth权重文件 +权重文件从百度网盘上获取:https://pan.baidu.com/s/1sU3pRBTFebbsMDac-1HsQA 密码:etdi + +2.使用pth2onnx.py进行onnx的转换 + +``` +mv ./CenterFace/center-face/src/pth2onnx.py ./CenterFace/center-face/src/lib +cd ./CenterFace/center-face/src/lib +python3 pth2onnx.py +``` + +### 3.2 onnx转om模型 + +1.设置环境变量 + +``` +source env.sh +``` + +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 + +``` +cd ./CenterFace/center-face/src/test +bash onnxToom.sh +``` + +## 4 数据集预处理 + +- **数据集获取** +- **数据集预处理** +- **生成数据集信息文件** + +### 4.1 数据集获取 + +拉取代码仓库 (因为使用了开源代码模块,所以需要git clone一下) + +```shell +git clone https://gitee.com/Levi990223/center-face.git +``` + +整理代码结构 + +```shell +mv -r test center-face/src +mv benchmark.x86_64 centerface_pth_preprocess.py centerface_pth_postprocess.py convert.py env.sh CenterFace.onnx pth2onnx.py get_info.py model_best.pth move.sh npu_set_env.sh README.md ./center-face/src +``` + +下载WIDER_FACE数据集,将图片上在这个目录下: + +下载地址:https://www.graviti.cn/open-datasets/WIDER_FACE + +``` +$CenterFace_ROOT/center-face/data/{eval_dataset} +``` + +### 4.2 数据集预处理 + +1.预处理脚本centerface_pth_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 + +``` +cd ./CenterFace/center-face/src/test +bash start.sh +``` + +### 4.3 生成数据集信息文件 + +1.生成数据集信息文件脚本get_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 + +``` +cd ./CenterFace/center-face/src/test +bash to_info.sh +``` + +to_info.sh里,第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 + +## 5 离线推理 + +- **benchmark工具概述** +- **离线推理** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.1 推理benchmark工具用户指南 01 + +### 5.2 离线推理 + +1.设置环境变量 + +``` +source env.sh +source npu_set_env.sh +``` + +2.执行离线推理 + +执行前需要将benchmark.x86_64移动到执行目录下 + +(注:执行目录是/center-face/src) + +然后运行如下命令: + +``` +cd ./CenterFace/center-face/src/test +bash infer.sh +``` + +输出结果默认保存在当前目录result/dumpOutput_device{0},每个输入对应的输出对应四个_x.bin文件。 + +3.处理目录result/dumpOutput_device{0}下的bin文件 + +将该目录下的文件分类别存放,以便于后处理 + +``` +cd ./CenterFace/center-face/src/ +python3 convert.py ./result/dumpOutput_device1/ ./result/result +``` + +第一个参数是benchmark得到的bin文件目录,第二个参数是保存路径 + +## 6 精度对比 + +- **离线推理精度** +- **开源精度** +- **精度对比** + +### 6.1 离线推理精度统计 + +1.后处理 + +注:这里需要使用wide_face_val.mat文件,在center-face/evaluate/ground_truth/可以找到,然后将其移动到center-face/src目录下,然后执行下面命令 + +``` +cd ./CenterFace/center-face/src +python3 centerface_pth_postprocess.py +``` + +2.进行Ascend310上精度评估 + +``` +cd ./CenterFace/center-face/evaluate +python3 evaluation.py +``` + +### 6.2 开源精度 + +[CenterFace官网精度]([chenjun2hao/CenterFace.pytorch: unofficial version of centerface, which achieves the best balance between speed and accuracy at face detection (github.com)](https://github.com/chenjun2hao/CenterFace.pytorch)) + +``` +Easy Val AP: 0.9257383419951156 +Medium Val AP: 0.9131308732465665 +Hard Val AP: 0.7717305552550734 +``` + +### 6.3 精度对比 + +``` +Easy Val AP: 0.9190736484158941 +Medium Val AP: 0.9067769085346155 +Hard Val AP: 0.7425807072008017 +``` + +### 6.3 精度对比 + +实际上官网的hard精度达不到77%,最高74%左右,所以对比下来精度是达标的。 + +## 7 性能对比 + +- **npu性能数据** +- **T4性能数据** +- **性能对比** + +### 7.1 npu性能数据 + +1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: + +``` +[e2e] throughputRate: 33.1307, latency: 97372 +[data read] throughputRate: 36.336, moduleLatency: 27.5209 +[preprocess] throughputRate: 35.6065, moduleLatency: 28.0847 +[infer] throughputRate: 33.4556, Interface throughputRate: 91.86, moduleLatency: 29.2697 +[post] throughputRate: 33.4544, moduleLatency: 29.8915 +``` + +Interface throughputRate: 91.86,91.86x4=367.44既是batch1 310单卡吞吐率 +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: + +``` +[e2e] throughputRate: 31.7581, latency: 101580 +[data read] throughputRate: 35.0206, moduleLatency: 28.5547 +[preprocess] throughputRate: 33.9534, moduleLatency: 29.4521 +[infer] throughputRate: 32.022, Interface throughputRate: 80.3537, moduleLatency: 30.4381 +[post] throughputRate: 2.00424, moduleLatency: 498.943 + +``` + +Interface throughputRate: 80.3537,80.3537x4=321.4148既是batch16 310单卡吞吐率 + +### 7.2 T4性能数据 + +``` +[W] [TRT] TensorRT was linked against cuBLAS/cuBLAS LT 11.2.0 but loaded cuBLAS/cuBLAS LT 11.1.0 +[W] [TRT] TensorRT was linked against cuBLAS/cuBLAS LT 11.2.0 but loaded cuBLAS/cuBLAS LT 11.1.0 +[W] [TRT] TensorRT was linked against cuBLAS/cuBLAS LT 11.2.0 but loaded cuBLAS/cuBLAS LT 11.1.0 +t4 bs1 fps:337.544 +[W] [TRT] TensorRT was linked against cuBLAS/cuBLAS LT 11.2.0 but loaded cuBLAS/cuBLAS LT 11.1.0 +[W] [TRT] TensorRT was linked against cuBLAS/cuBLAS LT 11.2.0 but loaded cuBLAS/cuBLAS LT 11.1.0 +[W] [TRT] TensorRT was linked against cuBLAS/cuBLAS LT 11.2.0 but loaded cuBLAS/cuBLAS LT 11.1.0 +t4 bs16 fps:359.999 +``` + +batch1 t4单卡吞吐率:337.544 + +batch16 t4单卡吞吐率:359.999 + +### 7.3 性能对比 + +batch1:91.86x4=367.44 > 337.544 +batch16:80.3537x4=321.4148 < 359.999 + +## 8 710增加文件介绍 + +1.aipp_centerface.aippconfig ONNX模型转OM模型时所配置aipp +2.calibration_bin.py 量化模型时输入真实数据的组件脚本 diff --git a/ACL_PyTorch/contrib/cv/detection/CenterFace/aipp_centerface.aippconfig b/ACL_PyTorch/contrib/cv/detection/CenterFace/aipp_centerface.aippconfig index 3d0b228c6beb9bd218de96c0dee1f491956f6392..3d5d1a7120d951b1f55b2790818769a90b249a6d 100644 --- a/ACL_PyTorch/contrib/cv/detection/CenterFace/aipp_centerface.aippconfig +++ b/ACL_PyTorch/contrib/cv/detection/CenterFace/aipp_centerface.aippconfig @@ -1,15 +1,15 @@ -aipp_op{ - aipp_mode:static - input_format : RGB888_U8 - - src_image_size_w : 800 - src_image_size_h : 800 - crop:false - - min_chn_0 : 104 - min_chn_1 : 114 - min_chn_2 : 120 - var_reci_chn_0: 0.0135864 - var_reci_chn_1: 0.0143080 - var_reci_chn_2: 0.0141014 -} +aipp_op{ + aipp_mode:static + input_format : RGB888_U8 + + src_image_size_w : 800 + src_image_size_h : 800 + crop:false + + min_chn_0 : 104 + min_chn_1 : 114 + min_chn_2 : 120 + var_reci_chn_0: 0.0135864 + var_reci_chn_1: 0.0143080 + var_reci_chn_2: 0.0141014 +} diff --git a/ACL_PyTorch/contrib/cv/detection/CenterFace/calibration_bin.py b/ACL_PyTorch/contrib/cv/detection/CenterFace/calibration_bin.py index af13f3593f95019d0f90567bc014aa05bd9a7708..c72d0c6fbc85fe3e5608325eb698828d38880fce 100644 --- a/ACL_PyTorch/contrib/cv/detection/CenterFace/calibration_bin.py +++ b/ACL_PyTorch/contrib/cv/detection/CenterFace/calibration_bin.py @@ -1,56 +1,56 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import numpy as np -import multiprocessing - -max_bin=10 -def preprocess(src_path, save_path, batch_size): - files = os.listdir(src_path) - - output_data = [0] - for i, file in enumerate(files): - input_data = np.fromfile(os.path.join(src_path, file), dtype=np.float32) - input_data = input_data.reshape(1, 3, 800, 800) - - if i % batch_size == 0: - output_data = input_data - else: - output_data = np.concatenate((output_data, input_data), axis=0) - - # only save 10 bin files - loop_id = (i + 1) // batch_size - if loop_id > max_bin: - break - - if (i + 1) % batch_size == 0: - output_data.tofile("{}/img_{}_bs{}.bin".format(save_path, loop_id, batch_size)) - output_data = [0] - - -if __name__ == '__main__': - if len(sys.argv) < 4: - raise Exception("usage: python3 xxx.py [src_path] [save_path] [batch_size]") - src_path = sys.argv[1] - save_path = sys.argv[2] - batch_size = int(sys.argv[3]) - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - - if not os.path.isdir(save_path): - os.makedirs(os.path.realpath(save_path)) - preprocess(src_path, save_path, batch_size) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import multiprocessing + +max_bin=10 +def preprocess(src_path, save_path, batch_size): + files = os.listdir(src_path) + + output_data = [0] + for i, file in enumerate(files): + input_data = np.fromfile(os.path.join(src_path, file), dtype=np.float32) + input_data = input_data.reshape(1, 3, 800, 800) + + if i % batch_size == 0: + output_data = input_data + else: + output_data = np.concatenate((output_data, input_data), axis=0) + + # only save 10 bin files + loop_id = (i + 1) // batch_size + if loop_id > max_bin: + break + + if (i + 1) % batch_size == 0: + output_data.tofile("{}/img_{}_bs{}.bin".format(save_path, loop_id, batch_size)) + output_data = [0] + + +if __name__ == '__main__': + if len(sys.argv) < 4: + raise Exception("usage: python3 xxx.py [src_path] [save_path] [batch_size]") + src_path = sys.argv[1] + save_path = sys.argv[2] + batch_size = int(sys.argv[3]) + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + + if not os.path.isdir(save_path): + os.makedirs(os.path.realpath(save_path)) + preprocess(src_path, save_path, batch_size) + diff --git a/ACL_PyTorch/contrib/cv/detection/CenterFace/centerface_pth_postprocess.py b/ACL_PyTorch/contrib/cv/detection/CenterFace/centerface_pth_postprocess.py index e598550ebba111ee28954020d70141735adfe821..9e825c29e87d67d3adf25da82ef4f7478312c8fb 100644 --- a/ACL_PyTorch/contrib/cv/detection/CenterFace/centerface_pth_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/CenterFace/centerface_pth_postprocess.py @@ -1,153 +1,153 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division - -import numpy as np -import torch -import os -import sys -import cv2 -import scipy.io as sio - -from lib.models import decode as dc -from lib.models.utils import _gather_feat, _tranpose_and_gather_feat -from lib.utils import image as img -from lib.utils.post_process import multi_pose_post_process -from lib.opts_pose import opts -from lib.detectors.detector_factory import detector_factory -from lib.datasets.dataset_factory import get_dataset - -def preprocess(): - root_path = os.getcwd() - opt = opts().parse('--task {} --load_model {}'.format('multi_pose', os.path.join(root_path, 'model_best.pth')).split(' ')) - Dataset = get_dataset(opt.dataset, opt.task) - opt = opts().update_dataset_info_and_set_heads(opt, Dataset) - os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str - Detector = detector_factory[opt.task] - detector = Detector(opt) - file_path = os.path.join(root_path,'../data') - in_files = os.listdir(file_path) - Meta = [] - for file in sorted(in_files): - os.chdir(os.path.join(file_path, file)) - cur_path = os.getcwd() - doc = os.listdir(cur_path) - for document in sorted(doc): - image = cv2.imread(os.path.join(cur_path, document)) - if document=='output': - break - for scale in opt.test_scales: - images,meta = detector.pre_process(image, scale, meta=None) - Meta.append(meta) - return Meta - -def post_process(dets, meta, scale=1): - dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) - dets = multi_pose_post_process( - dets.copy(), [meta['c']], [meta['s']], - meta['out_height'], meta['out_width']) - for j in range(1, 2): - dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 15) # 关键点数+5=15 - dets[0][j][:, :4] /= scale - dets[0][j][:, 5:] /= scale - return dets[0] - -def merge_outputs(detections): - results = {} - results[1] = np.concatenate( - [detection[1] for detection in detections], axis=0).astype(np.float32) - results[1] = results[1].tolist() - return results - -def pre_postprocess(): - List = [] - root_path = os.getcwd() - path=os.path.join(root_path,'./result/result') - File = os.listdir(path) - for file in sorted(File): - Doc = [] #save no-repeated file name - os.chdir(os.path.join(path, file)) - cur_path = os.getcwd() - doc = os.listdir(cur_path) - for document in sorted(doc): - Doc.append(document[0:-6]) #grip end - Doc = list(set(Doc)) #grip repeated element - for ff in sorted(Doc): #deal after sorting - dist={} - if ff=='kerne': - break - for i in range(1, 5): #one image ----->four bin - txt_file = np.fromfile(f'../../../result/result/{file}/{ff}_{i}.bin', dtype=np.float32) - if i==1: - dist['hm']=torch.tensor(txt_file.reshape(-1,1,200,200)) - if i==2: - dist['wh']=torch.tensor(txt_file.reshape(-1,2,200,200)) - if i==3: - dist['hm_offset']=torch.tensor(txt_file.reshape(-1,2,200,200)) - if i==4: - dist['landmarks']=torch.tensor(txt_file.reshape(-1,10,200,200)) - List.append(dist) - os.chdir(root_path) - return List - -def run(): - List = pre_postprocess() - Meta = preprocess() - print('List:',len(List)) - print('Meta:',len(Meta)) - Results=[] - from tqdm import tqdm - for i in tqdm(range(len(List))): - detections = [] - reg = List[i]['hm_offset'] - dets = dc.centerface_decode( - List[i]['hm'], List[i]['wh'], List[i]['landmarks'], - reg=reg, K=200) - dets = post_process(dets,Meta[i]) - detections.append(dets) - results = merge_outputs(detections) - Results.append(results) - return Results - -if __name__ == "__main__": - root_path = os.getcwd() - Path = os.path.join(root_path,'../data') - wider_face_mat = sio.loadmat(root_path+'/wider_face_val.mat') - event_list = wider_face_mat['event_list'] #directory - file_list = wider_face_mat['file_list'] #file - save_path = root_path+'/output/widerface/' - results = run() #all data - i=0 #iteration - for index, event in enumerate(sorted(event_list)): - file_list_item = file_list[index][0] - im_dir = event[0][0] - if not os.path.exists(save_path + im_dir): - os.makedirs(save_path + im_dir) - for num, file in enumerate(sorted(file_list_item)): - im_name = file[0][0] - zip_name = '%s/%s.jpg' % (im_dir, im_name) - img_path = os.path.join(Path, zip_name) - dets = results[i] - f = open(save_path + im_dir + '/' + im_name + '.txt', 'w') - f.write('{:s}\n'.format('%s/%s.jpg' % (im_dir, im_name))) - f.write('{:d}\n'.format(len(dets))) - for b in dets[1]: - x1, y1, x2, y2, s = b[0], b[1], b[2], b[3], b[4] - f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(x1, y1, (x2 - x1 + 1), (y2 - y1 + 1), s)) - f.close() - print(i) - i=i+1 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division + +import numpy as np +import torch +import os +import sys +import cv2 +import scipy.io as sio + +from lib.models import decode as dc +from lib.models.utils import _gather_feat, _tranpose_and_gather_feat +from lib.utils import image as img +from lib.utils.post_process import multi_pose_post_process +from lib.opts_pose import opts +from lib.detectors.detector_factory import detector_factory +from lib.datasets.dataset_factory import get_dataset + +def preprocess(): + root_path = os.getcwd() + opt = opts().parse('--task {} --load_model {}'.format('multi_pose', os.path.join(root_path, 'model_best.pth')).split(' ')) + Dataset = get_dataset(opt.dataset, opt.task) + opt = opts().update_dataset_info_and_set_heads(opt, Dataset) + os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str + Detector = detector_factory[opt.task] + detector = Detector(opt) + file_path = os.path.join(root_path,'../data') + in_files = os.listdir(file_path) + Meta = [] + for file in sorted(in_files): + os.chdir(os.path.join(file_path, file)) + cur_path = os.getcwd() + doc = os.listdir(cur_path) + for document in sorted(doc): + image = cv2.imread(os.path.join(cur_path, document)) + if document=='output': + break + for scale in opt.test_scales: + images,meta = detector.pre_process(image, scale, meta=None) + Meta.append(meta) + return Meta + +def post_process(dets, meta, scale=1): + dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) + dets = multi_pose_post_process( + dets.copy(), [meta['c']], [meta['s']], + meta['out_height'], meta['out_width']) + for j in range(1, 2): + dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 15) # 关键点数+5=15 + dets[0][j][:, :4] /= scale + dets[0][j][:, 5:] /= scale + return dets[0] + +def merge_outputs(detections): + results = {} + results[1] = np.concatenate( + [detection[1] for detection in detections], axis=0).astype(np.float32) + results[1] = results[1].tolist() + return results + +def pre_postprocess(): + List = [] + root_path = os.getcwd() + path=os.path.join(root_path,'./result/result') + File = os.listdir(path) + for file in sorted(File): + Doc = [] #save no-repeated file name + os.chdir(os.path.join(path, file)) + cur_path = os.getcwd() + doc = os.listdir(cur_path) + for document in sorted(doc): + Doc.append(document[0:-6]) #grip end + Doc = list(set(Doc)) #grip repeated element + for ff in sorted(Doc): #deal after sorting + dist={} + if ff=='kerne': + break + for i in range(1, 5): #one image ----->four bin + txt_file = np.fromfile(f'../../../result/result/{file}/{ff}_{i}.bin', dtype=np.float32) + if i==1: + dist['hm']=torch.tensor(txt_file.reshape(-1,1,200,200)) + if i==2: + dist['wh']=torch.tensor(txt_file.reshape(-1,2,200,200)) + if i==3: + dist['hm_offset']=torch.tensor(txt_file.reshape(-1,2,200,200)) + if i==4: + dist['landmarks']=torch.tensor(txt_file.reshape(-1,10,200,200)) + List.append(dist) + os.chdir(root_path) + return List + +def run(): + List = pre_postprocess() + Meta = preprocess() + print('List:',len(List)) + print('Meta:',len(Meta)) + Results=[] + from tqdm import tqdm + for i in tqdm(range(len(List))): + detections = [] + reg = List[i]['hm_offset'] + dets = dc.centerface_decode( + List[i]['hm'], List[i]['wh'], List[i]['landmarks'], + reg=reg, K=200) + dets = post_process(dets,Meta[i]) + detections.append(dets) + results = merge_outputs(detections) + Results.append(results) + return Results + +if __name__ == "__main__": + root_path = os.getcwd() + Path = os.path.join(root_path,'../data') + wider_face_mat = sio.loadmat(root_path+'/wider_face_val.mat') + event_list = wider_face_mat['event_list'] #directory + file_list = wider_face_mat['file_list'] #file + save_path = root_path+'/output/widerface/' + results = run() #all data + i=0 #iteration + for index, event in enumerate(sorted(event_list)): + file_list_item = file_list[index][0] + im_dir = event[0][0] + if not os.path.exists(save_path + im_dir): + os.makedirs(save_path + im_dir) + for num, file in enumerate(sorted(file_list_item)): + im_name = file[0][0] + zip_name = '%s/%s.jpg' % (im_dir, im_name) + img_path = os.path.join(Path, zip_name) + dets = results[i] + f = open(save_path + im_dir + '/' + im_name + '.txt', 'w') + f.write('{:s}\n'.format('%s/%s.jpg' % (im_dir, im_name))) + f.write('{:d}\n'.format(len(dets))) + for b in dets[1]: + x1, y1, x2, y2, s = b[0], b[1], b[2], b[3], b[4] + f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(x1, y1, (x2 - x1 + 1), (y2 - y1 + 1), s)) + f.close() + print(i) + i=i+1 diff --git a/ACL_PyTorch/contrib/cv/detection/CenterFace/centerface_pth_preprocess.py b/ACL_PyTorch/contrib/cv/detection/CenterFace/centerface_pth_preprocess.py index cb3bfae74c0b296b33714f35434440115942d9ac..c8642403b4e52cff311a7e7948805e4f9509c5ca 100644 --- a/ACL_PyTorch/contrib/cv/detection/CenterFace/centerface_pth_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/CenterFace/centerface_pth_preprocess.py @@ -1,58 +1,58 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding:GB2312 -*- - -from __future__ import absolute_import -from __future__ import division - -import sys -import os -import cv2 -import torch - -from lib.opts_pose import opts -from lib.detectors.detector_factory import detector_factory -from datasets.dataset_factory import get_dataset - -def preprocess(file_path, bin_path): - opt = opts().parse('--task {} --load_model {}'.format('multi_pose', 'model_best.pth').split(' ')) - Dataset = get_dataset(opt.dataset, opt.task) - opt = opts().update_dataset_info_and_set_heads(opt, Dataset) - os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str - Detector = detector_factory[opt.task] - detector = Detector(opt) - in_files = os.listdir(file_path) - - if not os.path.exists(bin_path): - os.makedirs(bin_path) - for file in sorted(in_files): - os.chdir(os.path.join(file_path, file)) - cur_path = os.getcwd() - doc = os.listdir(cur_path) - for document in doc: - if document=='output': - break - image = cv2.imread(os.path.join(cur_path, document)) - for scale in opt.test_scales: - images, meta = detector.pre_process(image, scale, meta=None) - if not os.path.exists(os.path.join(bin_path,file)): - os.makedirs(os.path.join(bin_path,file)) - des_path = os.path.join(bin_path,file) - images.numpy().tofile(os.path.join(des_path,document.split('.')[0] +'.bin')) - -if __name__ == "__main__": - file_path = os.path.abspath(sys.argv[1]) - bin_path = os.path.abspath(sys.argv[2]) - preprocess(file_path, bin_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding:GB2312 -*- + +from __future__ import absolute_import +from __future__ import division + +import sys +import os +import cv2 +import torch + +from lib.opts_pose import opts +from lib.detectors.detector_factory import detector_factory +from datasets.dataset_factory import get_dataset + +def preprocess(file_path, bin_path): + opt = opts().parse('--task {} --load_model {}'.format('multi_pose', 'model_best.pth').split(' ')) + Dataset = get_dataset(opt.dataset, opt.task) + opt = opts().update_dataset_info_and_set_heads(opt, Dataset) + os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str + Detector = detector_factory[opt.task] + detector = Detector(opt) + in_files = os.listdir(file_path) + + if not os.path.exists(bin_path): + os.makedirs(bin_path) + for file in sorted(in_files): + os.chdir(os.path.join(file_path, file)) + cur_path = os.getcwd() + doc = os.listdir(cur_path) + for document in doc: + if document=='output': + break + image = cv2.imread(os.path.join(cur_path, document)) + for scale in opt.test_scales: + images, meta = detector.pre_process(image, scale, meta=None) + if not os.path.exists(os.path.join(bin_path,file)): + os.makedirs(os.path.join(bin_path,file)) + des_path = os.path.join(bin_path,file) + images.numpy().tofile(os.path.join(des_path,document.split('.')[0] +'.bin')) + +if __name__ == "__main__": + file_path = os.path.abspath(sys.argv[1]) + bin_path = os.path.abspath(sys.argv[2]) + preprocess(file_path, bin_path) diff --git a/ACL_PyTorch/contrib/cv/detection/CenterFace/convert.py b/ACL_PyTorch/contrib/cv/detection/CenterFace/convert.py index b5436a7049f912d991888b10c39d89f49316392d..56f2d5d0363c1c84ba880f85da19e691f9f389de 100644 --- a/ACL_PyTorch/contrib/cv/detection/CenterFace/convert.py +++ b/ACL_PyTorch/contrib/cv/detection/CenterFace/convert.py @@ -1,40 +1,40 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import shutil - - -if __name__ == '__main__': - bin_path = sys.argv[1] - resule_path = sys.argv[2] - if not os.path.exists(resule_path): - os.mkdir(resule_path) - f = os.listdir(bin_path) - for data in f: - data = data.strip('\n') - dir_name = data.split('_')[0] + '--' + data.split('_')[1] - dir_path = os.path.join(resule_path, dir_name) - if not os.path.exists(dir_path): - os.mkdir(dir_path) - file_list = os.listdir(resule_path) - for dir in file_list: - dir = dir.strip('\n') - cur_path = os.path.join(resule_path, dir) - for data in f: - data = data.strip('\n') - if data.split('_')[0] == dir.split('--')[0]: - shutil.copy(os.path.join(bin_path, data), - os.path.join(cur_path, data)) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import shutil + + +if __name__ == '__main__': + bin_path = sys.argv[1] + resule_path = sys.argv[2] + if not os.path.exists(resule_path): + os.mkdir(resule_path) + f = os.listdir(bin_path) + for data in f: + data = data.strip('\n') + dir_name = data.split('_')[0] + '--' + data.split('_')[1] + dir_path = os.path.join(resule_path, dir_name) + if not os.path.exists(dir_path): + os.mkdir(dir_path) + file_list = os.listdir(resule_path) + for dir in file_list: + dir = dir.strip('\n') + cur_path = os.path.join(resule_path, dir) + for data in f: + data = data.strip('\n') + if data.split('_')[0] == dir.split('--')[0]: + shutil.copy(os.path.join(bin_path, data), + os.path.join(cur_path, data)) diff --git a/ACL_PyTorch/contrib/cv/detection/CenterFace/pth2onnx.py b/ACL_PyTorch/contrib/cv/detection/CenterFace/pth2onnx.py index f58e87f45a6ada06593ee9073d64665c2aff40fa..32ec152f35d1741de53a81f8ab276f844f3738b9 100644 --- a/ACL_PyTorch/contrib/cv/detection/CenterFace/pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/CenterFace/pth2onnx.py @@ -1,35 +1,35 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import torch -import torch.utils.data -from opts_pose import opts -from models.model import create_model, load_model, save_model -from models.data_parallel import DataParallel -from logger import Logger -from datasets.dataset_factory import get_dataset -from trains.train_factory import train_factory -from datasets.sample.multi_pose import Multiposebatch - -def main(onnx_path,path): - opt = opts().parse() - input_names=["image"] - output_names = ["output1","output2","output3","output4"] - Dataset = get_dataset(opt.dataset, opt.task) - opt = opts().update_dataset_info_and_set_heads(opt, Dataset) - model = create_model(opt.arch, opt.heads, opt.head_conv) - model = load_model(model , path, None, opt.resume, opt.lr, opt.lr_step) - dynamic_axes = {'image': {0: '-1'}, 'output1': {0: '-1'},'output2': {0: '-1'},'output3': {0: '-1'},'output4': {0: '-1'}} - model.eval() - dummy_input = torch.randn(1,3,800,800) - torch.onnx.export(model,dummy_input,onnx_path,export_params=True,dynamic_axes = dynamic_axes,input_names = input_names,output_names = output_names,verbose=True) - -if __name__ =="__main__": - #onnx_path = sys.argv[1] - #path = sys.argv[2] - onnx_path = '../CenterFace.onnx' - path = '../model_best.pth' - main(onnx_path,path) +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import torch +import torch.utils.data +from opts_pose import opts +from models.model import create_model, load_model, save_model +from models.data_parallel import DataParallel +from logger import Logger +from datasets.dataset_factory import get_dataset +from trains.train_factory import train_factory +from datasets.sample.multi_pose import Multiposebatch + +def main(onnx_path,path): + opt = opts().parse() + input_names=["image"] + output_names = ["output1","output2","output3","output4"] + Dataset = get_dataset(opt.dataset, opt.task) + opt = opts().update_dataset_info_and_set_heads(opt, Dataset) + model = create_model(opt.arch, opt.heads, opt.head_conv) + model = load_model(model , path, None, opt.resume, opt.lr, opt.lr_step) + dynamic_axes = {'image': {0: '-1'}, 'output1': {0: '-1'},'output2': {0: '-1'},'output3': {0: '-1'},'output4': {0: '-1'}} + model.eval() + dummy_input = torch.randn(1,3,800,800) + torch.onnx.export(model,dummy_input,onnx_path,export_params=True,dynamic_axes = dynamic_axes,input_names = input_names,output_names = output_names,verbose=True) + +if __name__ =="__main__": + #onnx_path = sys.argv[1] + #path = sys.argv[2] + onnx_path = '../CenterFace.onnx' + path = '../model_best.pth' + main(onnx_path,path) diff --git a/ACL_PyTorch/contrib/cv/detection/CenterFace/test/README.md b/ACL_PyTorch/contrib/cv/detection/CenterFace/test/README.md index fd47bfc4bc55179514534465c8e90e86eaabceb2..1652e1d70d0eebcdc0ac0d57b50cb024092ab0da 100644 --- a/ACL_PyTorch/contrib/cv/detection/CenterFace/test/README.md +++ b/ACL_PyTorch/contrib/cv/detection/CenterFace/test/README.md @@ -1,28 +1,28 @@ -环境准备: - -1.数据集路径 通用的数据集统一放在/CenterFace/center-face/data - -2.进入工作目录 cd CenterFace/center-face/src - -3.安装模型代码之前要执行下面命令: - - git clone https://gitee.com/Levi990223/center-face.git - -4.获取权重文件 -权重文件从百度网盘上获取:https://pan.baidu.com/s/1sU3pRBTFebbsMDac-1HsQA 密码:etdi - -5.获取数据集:https://www.graviti.cn/open-datasets/WIDER_FACE - -6.获取benchmark工具 将benchmark.x86_64 放在CenterFace/src目录下 - -推理步骤: - -7.调用/CenterFace/src/lib下面得pth2onnx生成onnx放在/src下面 - -运行python3 pth2onnx命令,在src文件夹下生成CenterFace.onnx文件 - -8.脚本转换om模型 bash test/onnxToom.sh - -9.310上执行,执行时确保device空闲: bash test/infer.sh - +环境准备: + +1.数据集路径 通用的数据集统一放在/CenterFace/center-face/data + +2.进入工作目录 cd CenterFace/center-face/src + +3.安装模型代码之前要执行下面命令: + + git clone https://gitee.com/Levi990223/center-face.git + +4.获取权重文件 +权重文件从百度网盘上获取:https://pan.baidu.com/s/1sU3pRBTFebbsMDac-1HsQA 密码:etdi + +5.获取数据集:https://www.graviti.cn/open-datasets/WIDER_FACE + +6.获取benchmark工具 将benchmark.x86_64 放在CenterFace/src目录下 + +推理步骤: + +7.调用/CenterFace/src/lib下面得pth2onnx生成onnx放在/src下面 + +运行python3 pth2onnx命令,在src文件夹下生成CenterFace.onnx文件 + +8.脚本转换om模型 bash test/onnxToom.sh + +9.310上执行,执行时确保device空闲: bash test/infer.sh + 10.在T4环境上将onnx文件放在/root/lsj目录下,执行perf_t4.sh时确保gpu空闲,执行命令 bash perf_t4.sh \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/CenterNet/CenterNet_postprocess.py b/ACL_PyTorch/contrib/cv/detection/CenterNet/CenterNet_postprocess.py index bf2a91f00f3fca5a0fcaadf5e0b4e09da6c28a5b..8547afc57dcc47a0e0319d49799c0b7241e91b92 100644 --- a/ACL_PyTorch/contrib/cv/detection/CenterNet/CenterNet_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/CenterNet/CenterNet_postprocess.py @@ -1,104 +1,104 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import argparse -import torch -import numpy as np -import os -from glob import glob -import sys -CENTERNET_PATH = './CenterNet/src' -sys.path.insert(0, CENTERNET_PATH) -import cv2 -from lib.opts import opts -from lib.detectors.detector_factory import detector_factory -from lib.datasets.dataset_factory import get_dataset -from lib.models.decode import ctdet_decode -from lib.utils.post_process import ctdet_post_process -from lib.models.model import create_model, load_model -import lib.datasets.dataset.coco - -def post_process(dets, meta, scale=1): - print(meta) - num_classes=80 - dets = dets.detach().cpu().numpy() - dets = dets.reshape(1, -1, dets.shape[2]) - dets = ctdet_post_process( - dets.copy(), [meta['c']], [meta['s']], - meta['out_height'], meta['out_width'], 80) - for j in range(1, 81): - dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5) - dets[0][j][:, :4] /= scale - return dets[0] - -def merge_outputs(detections): - results = {} - for j in range(1, 80 + 1):#coco numclasses=80 - results[j] = np.concatenate( - [detection[j] for detection in detections], axis=0).astype(np.float32) - return results - -def run(result_list, index, meta, dataset, filename): - output={} - for i in range(1, 4): - buf = np.fromfile(f'{result_list}/{filename[0:-4]}_{i}.bin', dtype="float32") - if i == 1: - output['hm'] = torch.tensor(buf.reshape(1, 80, 128, 128)) - if i == 2: - output['wh'] = torch.tensor(buf.reshape(1, 2, 128, 128)) - if i == 3: - output['reg'] = torch.tensor(buf.reshape(1, 2, 128, 128)) - detections = [] - hm = output['hm'].sigmoid_() - wh = output['wh'] - reg = output['reg'] - dets = ctdet_decode(hm, wh, reg) - dets = post_process(dets,meta) - detections.append(dets) - results = merge_outputs(detections) - return results - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='CenterNet') - parser.add_argument('--bin_data_path', default='./result/dumpOutput_device0', type=str, help='infer out path') - parser.add_argument('--resultfolder', default='./run_eval_result', type=str, help='Dir to save results') - args = parser.parse_args() - if not os.path.exists(args.resultfolder): - os.makedirs(args.resultfolder) - - opt = opts().parse('{} --load_model {}'.format('ctdet', './ctdet_coco_dla_2x.pth').split(' ')) - Dataset = get_dataset(opt.dataset, opt.task) - opt = opts().update_dataset_info_and_set_heads(opt, Dataset) - Detector = detector_factory[opt.task] - dataset = Dataset(opt, 'val') - opt.gpus[0] = -1 - detector = Detector(opt) - Meta = [] - filename = [] - results = {} - num_iters = len(dataset) - for ind in range(num_iters): - img_id = dataset.images[ind] - img_info = dataset.coco.loadImgs(ids=[img_id])[0] - img_path = os.path.join(dataset.img_dir, img_info['file_name']) - print(img_info['file_name'], "===", ind) - image = cv2.imread(img_path) - images, meta = detector.pre_process(image, 1.0, meta=None) - ret = run(args.bin_data_path, ind, meta, dataset, img_info['file_name']) - results[img_id] = ret - dataset.run_eval(results, args.resultfolder) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import argparse +import torch +import numpy as np +import os +from glob import glob +import sys +CENTERNET_PATH = './CenterNet/src' +sys.path.insert(0, CENTERNET_PATH) +import cv2 +from lib.opts import opts +from lib.detectors.detector_factory import detector_factory +from lib.datasets.dataset_factory import get_dataset +from lib.models.decode import ctdet_decode +from lib.utils.post_process import ctdet_post_process +from lib.models.model import create_model, load_model +import lib.datasets.dataset.coco + +def post_process(dets, meta, scale=1): + print(meta) + num_classes=80 + dets = dets.detach().cpu().numpy() + dets = dets.reshape(1, -1, dets.shape[2]) + dets = ctdet_post_process( + dets.copy(), [meta['c']], [meta['s']], + meta['out_height'], meta['out_width'], 80) + for j in range(1, 81): + dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5) + dets[0][j][:, :4] /= scale + return dets[0] + +def merge_outputs(detections): + results = {} + for j in range(1, 80 + 1):#coco numclasses=80 + results[j] = np.concatenate( + [detection[j] for detection in detections], axis=0).astype(np.float32) + return results + +def run(result_list, index, meta, dataset, filename): + output={} + for i in range(1, 4): + buf = np.fromfile(f'{result_list}/{filename[0:-4]}_{i}.bin', dtype="float32") + if i == 1: + output['hm'] = torch.tensor(buf.reshape(1, 80, 128, 128)) + if i == 2: + output['wh'] = torch.tensor(buf.reshape(1, 2, 128, 128)) + if i == 3: + output['reg'] = torch.tensor(buf.reshape(1, 2, 128, 128)) + detections = [] + hm = output['hm'].sigmoid_() + wh = output['wh'] + reg = output['reg'] + dets = ctdet_decode(hm, wh, reg) + dets = post_process(dets,meta) + detections.append(dets) + results = merge_outputs(detections) + return results + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='CenterNet') + parser.add_argument('--bin_data_path', default='./result/dumpOutput_device0', type=str, help='infer out path') + parser.add_argument('--resultfolder', default='./run_eval_result', type=str, help='Dir to save results') + args = parser.parse_args() + if not os.path.exists(args.resultfolder): + os.makedirs(args.resultfolder) + + opt = opts().parse('{} --load_model {}'.format('ctdet', './ctdet_coco_dla_2x.pth').split(' ')) + Dataset = get_dataset(opt.dataset, opt.task) + opt = opts().update_dataset_info_and_set_heads(opt, Dataset) + Detector = detector_factory[opt.task] + dataset = Dataset(opt, 'val') + opt.gpus[0] = -1 + detector = Detector(opt) + Meta = [] + filename = [] + results = {} + num_iters = len(dataset) + for ind in range(num_iters): + img_id = dataset.images[ind] + img_info = dataset.coco.loadImgs(ids=[img_id])[0] + img_path = os.path.join(dataset.img_dir, img_info['file_name']) + print(img_info['file_name'], "===", ind) + image = cv2.imread(img_path) + images, meta = detector.pre_process(image, 1.0, meta=None) + ret = run(args.bin_data_path, ind, meta, dataset, img_info['file_name']) + results[img_id] = ret + dataset.run_eval(results, args.resultfolder) + diff --git a/ACL_PyTorch/contrib/cv/detection/CenterNet/CenterNet_preprocess.py b/ACL_PyTorch/contrib/cv/detection/CenterNet/CenterNet_preprocess.py index 6400bc3f6aab474517dc17ea1d657e8bde687eaf..0c271cf04568075560b99465de7f92a929c4c7be 100644 --- a/ACL_PyTorch/contrib/cv/detection/CenterNet/CenterNet_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/CenterNet/CenterNet_preprocess.py @@ -1,98 +1,98 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -import cv2 -import numpy as np -import torch - -def preprocess(file_path, bin_path): - in_files = os.listdir(file_path) - if not os.path.exists(bin_path): - os.makedirs(bin_path) - i = 0 - for file in sorted(in_files): - i = i + 1 - print(file, "===", i) - image = cv2.imread(os.path.join(file_path, file)) - height, width = image.shape[0:2] - new_height = int(height) - new_width = int(width) - #Fix size testing - inp_height, inp_width = 512, 512 - c = np.array([new_width / 2., new_height / 2.], dtype=np.float32) - s = max(height, width) * 1.0 - trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height]) - resized_image = cv2.resize(image, (new_width, new_height)) - inp_image = cv2.warpAffine( - resized_image, trans_input, (inp_width, inp_height),flags=cv2.INTER_LINEAR) - - inp_image = ((inp_image / 255. - [[[0.40789655,0.44719303,0.47026116]]]) / [[[0.2886383,0.27408165,0.27809834]]]).astype(np.float32) - images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width) - images = torch.from_numpy(images) - img = np.array(images).astype(np.float32) - img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) - -def get_affine_transform(center, - scale, - rot, - output_size, - shift=np.array([0, 0], dtype=np.float32), - inv=0): - if not isinstance(scale, np.ndarray) and not isinstance(scale, list): - scale = np.array([scale, scale], dtype=np.float32) - - scale_tmp = scale - src_w = scale_tmp[0] - dst_w = output_size[0] - dst_h = output_size[1] - - rot_rad = np.pi * rot / 180 - src_dir = get_dir([0, src_w * -0.5], rot_rad) - dst_dir = np.array([0, dst_w * -0.5], np.float32) - - src = np.zeros((3, 2), dtype=np.float32) - dst = np.zeros((3, 2), dtype=np.float32) - src[0, :] = center + scale_tmp * shift - src[1, :] = center + src_dir + scale_tmp * shift - dst[0, :] = [dst_w * 0.5, dst_h * 0.5] - dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir - - src[2:, :] = get_3rd_point(src[0, :], src[1, :]) - dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) - - if inv: - trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) - else: - trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) - - return trans - -def get_3rd_point(a, b): - direct = a - b - return b + np.array([-direct[1], direct[0]], dtype=np.float32) - -def get_dir(src_point, rot_rad): - sn, cs = np.sin(rot_rad), np.cos(rot_rad) - - src_result = [0, 0] - src_result[0] = src_point[0] * cs - src_point[1] * sn - src_result[1] = src_point[0] * sn + src_point[1] * cs - - return src_result -if __name__ == "__main__": - file_path = os.path.abspath(sys.argv[1]) - bin_path = os.path.abspath(sys.argv[2]) - preprocess(file_path, bin_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import cv2 +import numpy as np +import torch + +def preprocess(file_path, bin_path): + in_files = os.listdir(file_path) + if not os.path.exists(bin_path): + os.makedirs(bin_path) + i = 0 + for file in sorted(in_files): + i = i + 1 + print(file, "===", i) + image = cv2.imread(os.path.join(file_path, file)) + height, width = image.shape[0:2] + new_height = int(height) + new_width = int(width) + #Fix size testing + inp_height, inp_width = 512, 512 + c = np.array([new_width / 2., new_height / 2.], dtype=np.float32) + s = max(height, width) * 1.0 + trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height]) + resized_image = cv2.resize(image, (new_width, new_height)) + inp_image = cv2.warpAffine( + resized_image, trans_input, (inp_width, inp_height),flags=cv2.INTER_LINEAR) + + inp_image = ((inp_image / 255. - [[[0.40789655,0.44719303,0.47026116]]]) / [[[0.2886383,0.27408165,0.27809834]]]).astype(np.float32) + images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width) + images = torch.from_numpy(images) + img = np.array(images).astype(np.float32) + img.tofile(os.path.join(bin_path, file.split('.')[0] + '.bin')) + +def get_affine_transform(center, + scale, + rot, + output_size, + shift=np.array([0, 0], dtype=np.float32), + inv=0): + if not isinstance(scale, np.ndarray) and not isinstance(scale, list): + scale = np.array([scale, scale], dtype=np.float32) + + scale_tmp = scale + src_w = scale_tmp[0] + dst_w = output_size[0] + dst_h = output_size[1] + + rot_rad = np.pi * rot / 180 + src_dir = get_dir([0, src_w * -0.5], rot_rad) + dst_dir = np.array([0, dst_w * -0.5], np.float32) + + src = np.zeros((3, 2), dtype=np.float32) + dst = np.zeros((3, 2), dtype=np.float32) + src[0, :] = center + scale_tmp * shift + src[1, :] = center + src_dir + scale_tmp * shift + dst[0, :] = [dst_w * 0.5, dst_h * 0.5] + dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir + + src[2:, :] = get_3rd_point(src[0, :], src[1, :]) + dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) + + if inv: + trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) + else: + trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) + + return trans + +def get_3rd_point(a, b): + direct = a - b + return b + np.array([-direct[1], direct[0]], dtype=np.float32) + +def get_dir(src_point, rot_rad): + sn, cs = np.sin(rot_rad), np.cos(rot_rad) + + src_result = [0, 0] + src_result[0] = src_point[0] * cs - src_point[1] * sn + src_result[1] = src_point[0] * sn + src_point[1] * cs + + return src_result +if __name__ == "__main__": + file_path = os.path.abspath(sys.argv[1]) + bin_path = os.path.abspath(sys.argv[2]) + preprocess(file_path, bin_path) diff --git a/ACL_PyTorch/contrib/cv/detection/CenterNet/CenterNet_pth2onnx.py b/ACL_PyTorch/contrib/cv/detection/CenterNet/CenterNet_pth2onnx.py index 3fcd485e9208259c3a495488b450f8fb13a17821..e5ccee5b9e831486f887f027677fcd82636fdf54 100644 --- a/ACL_PyTorch/contrib/cv/detection/CenterNet/CenterNet_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/CenterNet/CenterNet_pth2onnx.py @@ -1,54 +1,54 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -CENTERNET_PATH = '../src' -sys.path.insert(0, CENTERNET_PATH) -MODEL_PATH = '../models/ctdet_coco_dla_2x.pth' -import os -import _init_paths -import torch -import torch.utils.data -from opts import opts -from models.model import create_model, load_model, save_model -from models.data_parallel import DataParallel -from logger import Logger -from datasets.dataset_factory import get_dataset - - -def convert(): - #device = torch.device("cpu") - device = torch.device("cuda") - torch.set_default_tensor_type(torch.cuda.FloatTensor) - TASK = 'ctdet' - opt = opts().parse('{} --load_model {}'.format(TASK, MODEL_PATH).split(' ')) - Dataset = get_dataset(opt.dataset, opt.task) - opt = opts().update_dataset_info_and_set_heads(opt, Dataset) - model = create_model(opt.arch, opt.heads, opt.head_conv) - model = load_model(model, input_file, None, opt.resume, opt.lr, opt.lr_step) - model.eval() - - input_names = ["actual_input"] - output_names = ["output1","output2","output3"] - dynamic_axes = {'actual_input': {0: '-1'}, 'output1': {0: '-1'}, 'output2': {0: '-1'}, 'output3': {0: '-1'}} - dummy_input = torch.randn(1, 3, 512, 512) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, opset_version=11, verbose=True) - -if __name__ == "__main__": - input_file = sys.argv[1] - output_file = sys.argv[2] - convert() +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +CENTERNET_PATH = '../src' +sys.path.insert(0, CENTERNET_PATH) +MODEL_PATH = '../models/ctdet_coco_dla_2x.pth' +import os +import _init_paths +import torch +import torch.utils.data +from opts import opts +from models.model import create_model, load_model, save_model +from models.data_parallel import DataParallel +from logger import Logger +from datasets.dataset_factory import get_dataset + + +def convert(): + #device = torch.device("cpu") + device = torch.device("cuda") + torch.set_default_tensor_type(torch.cuda.FloatTensor) + TASK = 'ctdet' + opt = opts().parse('{} --load_model {}'.format(TASK, MODEL_PATH).split(' ')) + Dataset = get_dataset(opt.dataset, opt.task) + opt = opts().update_dataset_info_and_set_heads(opt, Dataset) + model = create_model(opt.arch, opt.heads, opt.head_conv) + model = load_model(model, input_file, None, opt.resume, opt.lr, opt.lr_step) + model.eval() + + input_names = ["actual_input"] + output_names = ["output1","output2","output3"] + dynamic_axes = {'actual_input': {0: '-1'}, 'output1': {0: '-1'}, 'output2': {0: '-1'}, 'output3': {0: '-1'}} + dummy_input = torch.randn(1, 3, 512, 512) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, opset_version=11, verbose=True) + +if __name__ == "__main__": + input_file = sys.argv[1] + output_file = sys.argv[2] + convert() diff --git a/ACL_PyTorch/contrib/cv/detection/CenterNet/README.md b/ACL_PyTorch/contrib/cv/detection/CenterNet/README.md index 7a92a8f542707820f89f1c28687ecc068580bf68..9f7e243935466461ad7a9253a7035ef6b7f14014 100644 --- a/ACL_PyTorch/contrib/cv/detection/CenterNet/README.md +++ b/ACL_PyTorch/contrib/cv/detection/CenterNet/README.md @@ -1,103 +1,103 @@ -# CenterNet模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,建议手动安装 - -``` -pip3 install -r requirements.txt -``` - -2.获取,修改与安装开源模型代码 - -安装CenterNet - -``` -source env.sh -git clone https://github.com/xingyizhou/CenterNet -cd CenterNet/src/lib/models/networks -rm -r DCNv2 -rm -r pose_dla_dcn.py -git clone https://github.com/jinfagang/DCNv2_latest.git -mv DCNv2_latest DCNv2 -cd DCNv2 -rm -r dcn_v2.py -cd ../../../../../../ -mv dcn_v2.py CenterNet/src/lib/models/networks/DCNv2 -mv pose_dla_dcn.py CenterNet/src/lib/models/networks - -cd CenterNet/src/lib/external -make -cd ../models/networks/DCNv2 -python3 setup.py build develop -cd ../../../../../../ -``` - -备注:将源码中DCNv2算子更新到DCNv2_latest,以支持pytorch1.5;按照上述步骤替换pose_dcn_dla.py文件与dcn_v2.py文件,以修改自定义算子,实现onnx的推理过程 - -另外,需要单独修改python环境中的utils.py文件,不同环境下具体路径有一定差异。手动将/usr/local/python3.7.5/lib/python3.7/site-packages/torch/onnx/utils.py下述部分做相应更改: - -```python - not val_use_external_data_format: - # Only run checker if enabled and we are not using ATEN fallback and - # large model format export in not enabled. -- _check_onnx_proto(proto) -+ pass -``` -3.获取权重文件 - -[ctdet_coco_dla_2x.pth](https://drive.google.com/open?id=1pl_-ael8wERdUREEnaIfqOV_VF2bEVRT),放在当前目录下 - -4.数据集 -获取COCO数据集:[coco2017](https://cocodataset.org/#download),下载其中val2017图片及其标注文件([2017 Val images](http://images.cocodataset.org/zips/val2017.zip),[2017 Train/Val annotations](http://images.cocodataset.org/annotations/annotations_trainval2017.zip)),解压后放入/opt/npu/datasets/coco以及CenterNet/data/coco/路径下,其中val2017目录存放coco数据集的验证集图片,annotations目录存放coco数据集的instances_val2017.json,文件目录结构如下: - -``` -CenterNet -├── data -│ ├── coco -│ │ ├── annotations -│ │ ├── val2017 -``` - -5.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) -将benchmark.x86_64放到当前目录 - -## 2 离线推理 - -CenterNet模型pth2onnx脚本由于算子暂不支持cpu,故只能在gpu运行,故将pth2om.sh拆为pth2onnx.sh和onnx2om.sh - -**在gpu上:** - -``` -bash test/pth2onnx.sh -``` - -并将生成的CenterNet.onnx移到310上,路径为:{当前目录}/test - -**在310上:** - -**test目录下已经打包了一个正确的onnx,可解压后直接使用** - -``` -unzip test/onnx.zip -``` - -``` -bash test/onnx2om.sh -bash test/eval_acc_perf.sh --datasets_path=/opt/npu/datasets/coco -``` - -**评测结果:** - -| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| ------------- | ----------- | --------------- | -------- | -------- | -| CenterNet_bs1 | AP : 36.6 | AP : 36.4 | 23.25fps | 17.25fps | - -备注: - -1.原官网pth精度 AP : 37.4 是在线推理时keep_res(保持分辨率)的结果,但由于离线推理需要固定shape,故需要去掉keep_res(保持分辨率)。去掉keep_res(保持分辨率)后,跑在线推理精度评估得到 AP : 36.6 ,故以 AP : 36.6 作为精度基准 - -2.onnx因包含npu自定义算子dcnv2而不能推理,故使用在线推理测试性能 - -3.原模型在线推理中仅实现batchsize=1的精度测试和性能测试 - +# CenterNet模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,建议手动安装 + +``` +pip3 install -r requirements.txt +``` + +2.获取,修改与安装开源模型代码 + +安装CenterNet + +``` +source env.sh +git clone https://github.com/xingyizhou/CenterNet +cd CenterNet/src/lib/models/networks +rm -r DCNv2 +rm -r pose_dla_dcn.py +git clone https://github.com/jinfagang/DCNv2_latest.git +mv DCNv2_latest DCNv2 +cd DCNv2 +rm -r dcn_v2.py +cd ../../../../../../ +mv dcn_v2.py CenterNet/src/lib/models/networks/DCNv2 +mv pose_dla_dcn.py CenterNet/src/lib/models/networks + +cd CenterNet/src/lib/external +make +cd ../models/networks/DCNv2 +python3 setup.py build develop +cd ../../../../../../ +``` + +备注:将源码中DCNv2算子更新到DCNv2_latest,以支持pytorch1.5;按照上述步骤替换pose_dcn_dla.py文件与dcn_v2.py文件,以修改自定义算子,实现onnx的推理过程 + +另外,需要单独修改python环境中的utils.py文件,不同环境下具体路径有一定差异。手动将/usr/local/python3.7.5/lib/python3.7/site-packages/torch/onnx/utils.py下述部分做相应更改: + +```python + not val_use_external_data_format: + # Only run checker if enabled and we are not using ATEN fallback and + # large model format export in not enabled. +- _check_onnx_proto(proto) ++ pass +``` +3.获取权重文件 + +[ctdet_coco_dla_2x.pth](https://drive.google.com/open?id=1pl_-ael8wERdUREEnaIfqOV_VF2bEVRT),放在当前目录下 + +4.数据集 +获取COCO数据集:[coco2017](https://cocodataset.org/#download),下载其中val2017图片及其标注文件([2017 Val images](http://images.cocodataset.org/zips/val2017.zip),[2017 Train/Val annotations](http://images.cocodataset.org/annotations/annotations_trainval2017.zip)),解压后放入/opt/npu/datasets/coco以及CenterNet/data/coco/路径下,其中val2017目录存放coco数据集的验证集图片,annotations目录存放coco数据集的instances_val2017.json,文件目录结构如下: + +``` +CenterNet +├── data +│ ├── coco +│ │ ├── annotations +│ │ ├── val2017 +``` + +5.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) +将benchmark.x86_64放到当前目录 + +## 2 离线推理 + +CenterNet模型pth2onnx脚本由于算子暂不支持cpu,故只能在gpu运行,故将pth2om.sh拆为pth2onnx.sh和onnx2om.sh + +**在gpu上:** + +``` +bash test/pth2onnx.sh +``` + +并将生成的CenterNet.onnx移到310上,路径为:{当前目录}/test + +**在310上:** + +**test目录下已经打包了一个正确的onnx,可解压后直接使用** + +``` +unzip test/onnx.zip +``` + +``` +bash test/onnx2om.sh +bash test/eval_acc_perf.sh --datasets_path=/opt/npu/datasets/coco +``` + +**评测结果:** + +| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| ------------- | ----------- | --------------- | -------- | -------- | +| CenterNet_bs1 | AP : 36.6 | AP : 36.4 | 23.25fps | 17.25fps | + +备注: + +1.原官网pth精度 AP : 37.4 是在线推理时keep_res(保持分辨率)的结果,但由于离线推理需要固定shape,故需要去掉keep_res(保持分辨率)。去掉keep_res(保持分辨率)后,跑在线推理精度评估得到 AP : 36.6 ,故以 AP : 36.6 作为精度基准 + +2.onnx因包含npu自定义算子dcnv2而不能推理,故使用在线推理测试性能 + +3.原模型在线推理中仅实现batchsize=1的精度测试和性能测试 + diff --git a/ACL_PyTorch/contrib/cv/detection/DSFD/DSFD.patch b/ACL_PyTorch/contrib/cv/detection/DSFD/DSFD.patch index ad7be24d19dfc758bd7a649efb191fdfaaf1dd2c..6cbd178cb9f75d1c8e235cdf6bd21d48cc799769 100644 --- a/ACL_PyTorch/contrib/cv/detection/DSFD/DSFD.patch +++ b/ACL_PyTorch/contrib/cv/detection/DSFD/DSFD.patch @@ -6693,60 +6693,60 @@ index 8c41037111..0000000000 --- a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/eval_tools/box_overlaps.pyx +++ /dev/null @@ -1,55 +0,0 @@ --# -------------------------------------------------------- --# Fast R-CNN --# Copyright (c) 2015 Microsoft --# Licensed under The MIT License [see LICENSE for details] --# Written by Sergey Karayev --# -------------------------------------------------------- -- --cimport cython --import numpy as np --cimport numpy as np -- --DTYPE = np.float --ctypedef np.float_t DTYPE_t -- --def bbox_overlaps( -- np.ndarray[DTYPE_t, ndim=2] boxes, -- np.ndarray[DTYPE_t, ndim=2] query_boxes): -- """ -- Parameters -- ---------- -- boxes: (N, 4) ndarray of float -- query_boxes: (K, 4) ndarray of float -- Returns -- ------- -- overlaps: (N, K) ndarray of overlap between boxes and query_boxes -- """ -- cdef unsigned int N = boxes.shape[0] -- cdef unsigned int K = query_boxes.shape[0] -- cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) -- cdef DTYPE_t iw, ih, box_area -- cdef DTYPE_t ua -- cdef unsigned int k, n -- for k in range(K): -- box_area = ( -- (query_boxes[k, 2] - query_boxes[k, 0] + 1) * -- (query_boxes[k, 3] - query_boxes[k, 1] + 1) -- ) -- for n in range(N): -- iw = ( -- min(boxes[n, 2], query_boxes[k, 2]) - -- max(boxes[n, 0], query_boxes[k, 0]) + 1 -- ) -- if iw > 0: -- ih = ( -- min(boxes[n, 3], query_boxes[k, 3]) - -- max(boxes[n, 1], query_boxes[k, 1]) + 1 -- ) -- if ih > 0: -- ua = float( -- (boxes[n, 2] - boxes[n, 0] + 1) * -- (boxes[n, 3] - boxes[n, 1] + 1) + -- box_area - iw * ih -- ) -- overlaps[n, k] = iw * ih / ua +-# -------------------------------------------------------- +-# Fast R-CNN +-# Copyright (c) 2015 Microsoft +-# Licensed under The MIT License [see LICENSE for details] +-# Written by Sergey Karayev +-# -------------------------------------------------------- +- +-cimport cython +-import numpy as np +-cimport numpy as np +- +-DTYPE = np.float +-ctypedef np.float_t DTYPE_t +- +-def bbox_overlaps( +- np.ndarray[DTYPE_t, ndim=2] boxes, +- np.ndarray[DTYPE_t, ndim=2] query_boxes): +- """ +- Parameters +- ---------- +- boxes: (N, 4) ndarray of float +- query_boxes: (K, 4) ndarray of float +- Returns +- ------- +- overlaps: (N, K) ndarray of overlap between boxes and query_boxes +- """ +- cdef unsigned int N = boxes.shape[0] +- cdef unsigned int K = query_boxes.shape[0] +- cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) +- cdef DTYPE_t iw, ih, box_area +- cdef DTYPE_t ua +- cdef unsigned int k, n +- for k in range(K): +- box_area = ( +- (query_boxes[k, 2] - query_boxes[k, 0] + 1) * +- (query_boxes[k, 3] - query_boxes[k, 1] + 1) +- ) +- for n in range(N): +- iw = ( +- min(boxes[n, 2], query_boxes[k, 2]) - +- max(boxes[n, 0], query_boxes[k, 0]) + 1 +- ) +- if iw > 0: +- ih = ( +- min(boxes[n, 3], query_boxes[k, 3]) - +- max(boxes[n, 1], query_boxes[k, 1]) + 1 +- ) +- if ih > 0: +- ua = float( +- (boxes[n, 2] - boxes[n, 0] + 1) * +- (boxes[n, 3] - boxes[n, 1] + 1) + +- box_area - iw * ih +- ) +- overlaps[n, k] = iw * ih / ua - return overlaps \ No newline at end of file diff --git a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/eval_tools/setup.py b/contrib/ACL_PyTorch/Research/cv/detection/DSFD/eval_tools/setup.py @@ -6755,676 +6755,676 @@ index 53f56150a9..0000000000 --- a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/eval_tools/setup.py +++ /dev/null @@ -1,22 +0,0 @@ --# Copyright 2021 Huawei Technologies Co., Ltd --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. -- --#coding=utf-8 -- --from distutils.core import setup, Extension --from Cython.Build import cythonize --import numpy -- --package = Extension('bbox', ['box_overlaps.pyx'], include_dirs=[numpy.get_include()]) --setup(ext_modules=cythonize([package])) +-# Copyright 2021 Huawei Technologies Co., Ltd +-# +-# Licensed under the Apache License, Version 2.0 (the "License"); +-# you may not use this file except in compliance with the License. +-# You may obtain a copy of the License at +-# +-# http://www.apache.org/licenses/LICENSE-2.0 +-# +-# Unless required by applicable law or agreed to in writing, software +-# distributed under the License is distributed on an "AS IS" BASIS, +-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-# See the License for the specific language governing permissions and +-# limitations under the License. +- +-#coding=utf-8 +- +-from distutils.core import setup, Extension +-from Cython.Build import cythonize +-import numpy +- +-package = Extension('bbox', ['box_overlaps.pyx'], include_dirs=[numpy.get_include()]) +-setup(ext_modules=cythonize([package])) diff --git a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/__init__.py b/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/__init__.py deleted file mode 100644 index e3b3268753..0000000000 --- a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ --# Copyright 2021 Huawei Technologies Co., Ltd --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. -- --#-*- coding:utf-8 -*- -- --from __future__ import division --from __future__ import absolute_import --from __future__ import print_function -- -- --from .functions import * --from .modules import * +-# Copyright 2021 Huawei Technologies Co., Ltd +-# +-# Licensed under the Apache License, Version 2.0 (the "License"); +-# you may not use this file except in compliance with the License. +-# You may obtain a copy of the License at +-# +-# http://www.apache.org/licenses/LICENSE-2.0 +-# +-# Unless required by applicable law or agreed to in writing, software +-# distributed under the License is distributed on an "AS IS" BASIS, +-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-# See the License for the specific language governing permissions and +-# limitations under the License. +- +-#-*- coding:utf-8 -*- +- +-from __future__ import division +-from __future__ import absolute_import +-from __future__ import print_function +- +- +-from .functions import * +-from .modules import * diff --git a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/bbox_utils.py b/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/bbox_utils.py deleted file mode 100644 index 71766a3145..0000000000 --- a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/bbox_utils.py +++ /dev/null @@ -1,324 +0,0 @@ --# Copyright 2021 Huawei Technologies Co., Ltd --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. -- --#coding=utf-8 -- --from __future__ import division --from __future__ import absolute_import --from __future__ import print_function -- -- --import torch -- -- --def point_form(boxes): -- """ Convert prior_boxes to (xmin, ymin, xmax, ymax) -- representation for comparison to point form ground truth data. -- Args: -- boxes: (tensor) center-size default boxes from priorbox layers. -- Return: -- boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. -- """ -- return torch.cat((boxes[:, :2] - boxes[:, 2:] / 2, # xmin, ymin -- boxes[:, :2] + boxes[:, 2:] / 2), 1) # xmax, ymax -- -- --def center_size(boxes): -- """ Convert prior_boxes to (cx, cy, w, h) -- representation for comparison to center-size form ground truth data. -- Args: -- boxes: (tensor) point_form boxes -- Return: -- boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. -- """ -- return torch.cat([(boxes[:, 2:] + boxes[:, :2]) / 2, # cx, cy -- boxes[:, 2:] - boxes[:, :2]], 1) # w, h -- -- --def intersect(box_a, box_b): -- """ We resize both tensors to [A,B,2] without new malloc: -- [A,2] -> [A,1,2] -> [A,B,2] -- [B,2] -> [1,B,2] -> [A,B,2] -- Then we compute the area of intersect between box_a and box_b. -- Args: -- box_a: (tensor) bounding boxes, Shape: [A,4]. -- box_b: (tensor) bounding boxes, Shape: [B,4]. -- Return: -- (tensor) intersection area, Shape: [A,B]. -- """ -- A = box_a.size(0) -- B = box_b.size(0) -- max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), -- box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) -- min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), -- box_b[:, :2].unsqueeze(0).expand(A, B, 2)) -- inter = torch.clamp((max_xy - min_xy), min=0) -- return inter[:, :, 0] * inter[:, :, 1] -- -- --def jaccard(box_a, box_b): -- """Compute the jaccard overlap of two sets of boxes. The jaccard overlap -- is simply the intersection over union of two boxes. Here we operate on -- ground truth boxes and default boxes. -- E.g.: -- A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) -- Args: -- box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] -- box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] -- Return: -- jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] -- """ -- inter = intersect(box_a, box_b) -- area_a = ((box_a[:, 2] - box_a[:, 0]) * -- (box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] -- area_b = ((box_b[:, 2] - box_b[:, 0]) * -- (box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] -- union = area_a + area_b - inter -- return inter / union # [A,B] -- -- --def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx): -- """Match each prior box with the ground truth box of the highest jaccard -- overlap, encode the bounding boxes, then return the matched indices -- corresponding to both confidence and location preds. -- Args: -- threshold: (float) The overlap threshold used when mathing boxes. -- truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. -- priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. -- variances: (tensor) Variances corresponding to each prior coord, -- Shape: [num_priors, 4]. -- labels: (tensor) All the class labels for the image, Shape: [num_obj]. -- loc_t: (tensor) Tensor to be filled w/ endcoded location targets. -- conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. -- idx: (int) current batch index -- Return: -- The matched indices corresponding to 1)location and 2)confidence preds. -- """ -- # jaccard index -- overlaps = jaccard( -- truths, -- point_form(priors) -- ) -- # (Bipartite Matching) -- # [1,num_objects] best prior for each ground truth -- best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) -- # [1,num_priors] best ground truth for each prior -- best_truth_overlap, best_truth_idx = overlaps.max( -- 0, keepdim=True) # 0-2000 -- best_truth_idx.squeeze_(0) -- best_truth_overlap.squeeze_(0) -- best_prior_idx.squeeze_(1) -- best_prior_overlap.squeeze_(1) -- best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior -- # TODO refactor: index best_prior_idx with long tensor -- # ensure every gt matches with its prior of max overlap -- for j in range(best_prior_idx.size(0)): -- best_truth_idx[best_prior_idx[j]] = j -- _th1, _th2, _th3 = threshold # _th1 = 0.1 ,_th2 = 0.35,_th3 = 0.5 -- -- N = (torch.sum(best_prior_overlap >= _th2) + -- torch.sum(best_prior_overlap >= _th3)) // 2 -- matches = truths[best_truth_idx] # Shape: [num_priors,4] -- conf = labels[best_truth_idx] # Shape: [num_priors] -- conf[best_truth_overlap < _th2] = 0 # label as background -- -- best_truth_overlap_clone = best_truth_overlap.clone() -- add_idx = best_truth_overlap_clone.gt( -- _th1).eq(best_truth_overlap_clone.lt(_th2)) -- best_truth_overlap_clone[1 - add_idx] = 0 -- stage2_overlap, stage2_idx = best_truth_overlap_clone.sort(descending=True) -- -- stage2_overlap = stage2_overlap.gt(_th1) -- -- if N > 0: -- N = torch.sum(stage2_overlap[:N]) if torch.sum( -- stage2_overlap[:N]) < N else N -- conf[stage2_idx[:N]] += 1 -- -- loc = encode(matches, priors, variances) -- loc_t[idx] = loc # [num_priors,4] encoded offsets to learn -- conf_t[idx] = conf # [num_priors] top class label for each prior -- -- --def match_ssd(threshold, truths, priors, variances, labels, loc_t, conf_t, idx): -- """Match each prior box with the ground truth box of the highest jaccard -- overlap, encode the bounding boxes, then return the matched indices -- corresponding to both confidence and location preds. -- Args: -- threshold: (float) The overlap threshold used when mathing boxes. -- truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. -- priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. -- variances: (tensor) Variances corresponding to each prior coord, -- Shape: [num_priors, 4]. -- labels: (tensor) All the class labels for the image, Shape: [num_obj]. -- loc_t: (tensor) Tensor to be filled w/ endcoded location targets. -- conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. -- idx: (int) current batch index -- Return: -- The matched indices corresponding to 1)location and 2)confidence preds. -- """ -- # jaccard index -- overlaps = jaccard( -- truths, -- point_form(priors) -- ) -- # (Bipartite Matching) -- # [1,num_objects] best prior for each ground truth -- best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) -- # [1,num_priors] best ground truth for each prior -- best_truth_overlap, best_truth_idx = overlaps.max( -- 0, keepdim=True) # 0-2000 -- best_truth_idx.squeeze_(0) -- best_truth_overlap.squeeze_(0) -- best_prior_idx.squeeze_(1) -- best_prior_overlap.squeeze_(1) -- best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior -- # TODO refactor: index best_prior_idx with long tensor -- # ensure every gt matches with its prior of max overlap -- for j in range(best_prior_idx.size(0)): -- best_truth_idx[best_prior_idx[j]] = j -- matches = truths[best_truth_idx] # Shape: [num_priors,4] -- conf = labels[best_truth_idx] # Shape: [num_priors] -- conf[best_truth_overlap < threshold] = 0 # label as background -- loc = encode(matches, priors, variances) -- loc_t[idx] = loc # [num_priors,4] encoded offsets to learn -- conf_t[idx] = conf # [num_priors] top class label for each prior -- -- --def encode(matched, priors, variances): -- """Encode the variances from the priorbox layers into the ground truth boxes -- we have matched (based on jaccard overlap) with the prior boxes. -- Args: -- matched: (tensor) Coords of ground truth for each prior in point-form -- Shape: [num_priors, 4]. -- priors: (tensor) Prior boxes in center-offset form -- Shape: [num_priors,4]. -- variances: (list[float]) Variances of priorboxes -- Return: -- encoded boxes (tensor), Shape: [num_priors, 4] -- """ -- -- # dist b/t match center and prior's center -- g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2] -- # encode variance -- g_cxcy /= (variances[0] * priors[:, 2:]) -- # match wh / prior wh -- g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] -- #g_wh = torch.log(g_wh) / variances[1] -- g_wh = torch.log(g_wh) / variances[1] -- # return target for smooth_l1_loss -- return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] -- -- --# Adapted from https://github.com/Hakuyume/chainer-ssd --def decode(loc, priors, variances): -- """Decode locations from predictions using priors to undo -- the encoding we did for offset regression at train time. -- Args: -- loc (tensor): location predictions for loc layers, -- Shape: [num_priors,4] -- priors (tensor): Prior boxes in center-offset form. -- Shape: [num_priors,4]. -- variances: (list[float]) Variances of priorboxes -- Return: -- decoded bounding box predictions -- """ -- -- boxes = torch.cat(( -- priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], -- priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) -- boxes[:, :2] -= boxes[:, 2:] / 2 -- boxes[:, 2:] += boxes[:, :2] -- return boxes -- -- --def log_sum_exp(x): -- """Utility function for computing log_sum_exp while determining -- This will be used to determine unaveraged confidence loss across -- all examples in a batch. -- Args: -- x (Variable(tensor)): conf_preds from conf layers -- """ -- x_max = x.data.max() -- return torch.log(torch.sum(torch.exp(x - x_max), 1, keepdim=True)) + x_max -- -- --# Original author: Francisco Massa: --# https://github.com/fmassa/object-detection.torch --# Ported to PyTorch by Max deGroot (02/01/2017) --def nms(boxes, scores, overlap=0.5, top_k=200): -- """Apply non-maximum suppression at test time to avoid detecting too many -- overlapping bounding boxes for a given object. -- Args: -- boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. -- scores: (tensor) The class predscores for the img, Shape:[num_priors]. -- overlap: (float) The overlap thresh for suppressing unnecessary boxes. -- top_k: (int) The Maximum number of box preds to consider. -- Return: -- The indices of the kept boxes with respect to num_priors. -- """ -- -- keep = scores.new(scores.size(0)).zero_().long() -- if boxes.numel() == 0: -- return keep -- x1 = boxes[:, 0] -- y1 = boxes[:, 1] -- x2 = boxes[:, 2] -- y2 = boxes[:, 3] -- area = torch.mul(x2 - x1, y2 - y1) -- v, idx = scores.sort(0) # sort in ascending order -- # I = I[v >= 0.01] -- idx = idx[-top_k:] # indices of the top-k largest vals -- xx1 = boxes.new() -- yy1 = boxes.new() -- xx2 = boxes.new() -- yy2 = boxes.new() -- w = boxes.new() -- h = boxes.new() -- -- # keep = torch.Tensor() -- count = 0 -- while idx.numel() > 0: -- i = idx[-1] # index of current largest val -- # keep.append(i) -- keep[count] = i -- count += 1 -- if idx.size(0) == 1: -- break -- idx = idx[:-1] # remove kept element from view -- # load bboxes of next highest vals -- torch.index_select(x1, 0, idx, out=xx1) -- torch.index_select(y1, 0, idx, out=yy1) -- torch.index_select(x2, 0, idx, out=xx2) -- torch.index_select(y2, 0, idx, out=yy2) -- # store element-wise max with next highest score -- xx1 = torch.clamp(xx1, min=x1[i]) -- yy1 = torch.clamp(yy1, min=y1[i]) -- xx2 = torch.clamp(xx2, max=x2[i]) -- yy2 = torch.clamp(yy2, max=y2[i]) -- w.resize_as_(xx2) -- h.resize_as_(yy2) -- w = xx2 - xx1 -- h = yy2 - yy1 -- # check sizes of xx1 and xx2.. after each iteration -- w = torch.clamp(w, min=0.0) -- h = torch.clamp(h, min=0.0) -- inter = w * h -- # IoU = i / (area(a) + area(b) - i) -- rem_areas = torch.index_select(area, 0, idx) # load remaining areas) -- union = (rem_areas - inter) + area[i] -- IoU = inter / union # store result in iou -- # keep only elements with an IoU <= overlap -- idx = idx[IoU.le(overlap)] -- return keep, count +-# Copyright 2021 Huawei Technologies Co., Ltd +-# +-# Licensed under the Apache License, Version 2.0 (the "License"); +-# you may not use this file except in compliance with the License. +-# You may obtain a copy of the License at +-# +-# http://www.apache.org/licenses/LICENSE-2.0 +-# +-# Unless required by applicable law or agreed to in writing, software +-# distributed under the License is distributed on an "AS IS" BASIS, +-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-# See the License for the specific language governing permissions and +-# limitations under the License. +- +-#coding=utf-8 +- +-from __future__ import division +-from __future__ import absolute_import +-from __future__ import print_function +- +- +-import torch +- +- +-def point_form(boxes): +- """ Convert prior_boxes to (xmin, ymin, xmax, ymax) +- representation for comparison to point form ground truth data. +- Args: +- boxes: (tensor) center-size default boxes from priorbox layers. +- Return: +- boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. +- """ +- return torch.cat((boxes[:, :2] - boxes[:, 2:] / 2, # xmin, ymin +- boxes[:, :2] + boxes[:, 2:] / 2), 1) # xmax, ymax +- +- +-def center_size(boxes): +- """ Convert prior_boxes to (cx, cy, w, h) +- representation for comparison to center-size form ground truth data. +- Args: +- boxes: (tensor) point_form boxes +- Return: +- boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. +- """ +- return torch.cat([(boxes[:, 2:] + boxes[:, :2]) / 2, # cx, cy +- boxes[:, 2:] - boxes[:, :2]], 1) # w, h +- +- +-def intersect(box_a, box_b): +- """ We resize both tensors to [A,B,2] without new malloc: +- [A,2] -> [A,1,2] -> [A,B,2] +- [B,2] -> [1,B,2] -> [A,B,2] +- Then we compute the area of intersect between box_a and box_b. +- Args: +- box_a: (tensor) bounding boxes, Shape: [A,4]. +- box_b: (tensor) bounding boxes, Shape: [B,4]. +- Return: +- (tensor) intersection area, Shape: [A,B]. +- """ +- A = box_a.size(0) +- B = box_b.size(0) +- max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), +- box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) +- min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), +- box_b[:, :2].unsqueeze(0).expand(A, B, 2)) +- inter = torch.clamp((max_xy - min_xy), min=0) +- return inter[:, :, 0] * inter[:, :, 1] +- +- +-def jaccard(box_a, box_b): +- """Compute the jaccard overlap of two sets of boxes. The jaccard overlap +- is simply the intersection over union of two boxes. Here we operate on +- ground truth boxes and default boxes. +- E.g.: +- A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) +- Args: +- box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] +- box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] +- Return: +- jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] +- """ +- inter = intersect(box_a, box_b) +- area_a = ((box_a[:, 2] - box_a[:, 0]) * +- (box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] +- area_b = ((box_b[:, 2] - box_b[:, 0]) * +- (box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] +- union = area_a + area_b - inter +- return inter / union # [A,B] +- +- +-def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx): +- """Match each prior box with the ground truth box of the highest jaccard +- overlap, encode the bounding boxes, then return the matched indices +- corresponding to both confidence and location preds. +- Args: +- threshold: (float) The overlap threshold used when mathing boxes. +- truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. +- priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. +- variances: (tensor) Variances corresponding to each prior coord, +- Shape: [num_priors, 4]. +- labels: (tensor) All the class labels for the image, Shape: [num_obj]. +- loc_t: (tensor) Tensor to be filled w/ endcoded location targets. +- conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. +- idx: (int) current batch index +- Return: +- The matched indices corresponding to 1)location and 2)confidence preds. +- """ +- # jaccard index +- overlaps = jaccard( +- truths, +- point_form(priors) +- ) +- # (Bipartite Matching) +- # [1,num_objects] best prior for each ground truth +- best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) +- # [1,num_priors] best ground truth for each prior +- best_truth_overlap, best_truth_idx = overlaps.max( +- 0, keepdim=True) # 0-2000 +- best_truth_idx.squeeze_(0) +- best_truth_overlap.squeeze_(0) +- best_prior_idx.squeeze_(1) +- best_prior_overlap.squeeze_(1) +- best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior +- # TODO refactor: index best_prior_idx with long tensor +- # ensure every gt matches with its prior of max overlap +- for j in range(best_prior_idx.size(0)): +- best_truth_idx[best_prior_idx[j]] = j +- _th1, _th2, _th3 = threshold # _th1 = 0.1 ,_th2 = 0.35,_th3 = 0.5 +- +- N = (torch.sum(best_prior_overlap >= _th2) + +- torch.sum(best_prior_overlap >= _th3)) // 2 +- matches = truths[best_truth_idx] # Shape: [num_priors,4] +- conf = labels[best_truth_idx] # Shape: [num_priors] +- conf[best_truth_overlap < _th2] = 0 # label as background +- +- best_truth_overlap_clone = best_truth_overlap.clone() +- add_idx = best_truth_overlap_clone.gt( +- _th1).eq(best_truth_overlap_clone.lt(_th2)) +- best_truth_overlap_clone[1 - add_idx] = 0 +- stage2_overlap, stage2_idx = best_truth_overlap_clone.sort(descending=True) +- +- stage2_overlap = stage2_overlap.gt(_th1) +- +- if N > 0: +- N = torch.sum(stage2_overlap[:N]) if torch.sum( +- stage2_overlap[:N]) < N else N +- conf[stage2_idx[:N]] += 1 +- +- loc = encode(matches, priors, variances) +- loc_t[idx] = loc # [num_priors,4] encoded offsets to learn +- conf_t[idx] = conf # [num_priors] top class label for each prior +- +- +-def match_ssd(threshold, truths, priors, variances, labels, loc_t, conf_t, idx): +- """Match each prior box with the ground truth box of the highest jaccard +- overlap, encode the bounding boxes, then return the matched indices +- corresponding to both confidence and location preds. +- Args: +- threshold: (float) The overlap threshold used when mathing boxes. +- truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. +- priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. +- variances: (tensor) Variances corresponding to each prior coord, +- Shape: [num_priors, 4]. +- labels: (tensor) All the class labels for the image, Shape: [num_obj]. +- loc_t: (tensor) Tensor to be filled w/ endcoded location targets. +- conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. +- idx: (int) current batch index +- Return: +- The matched indices corresponding to 1)location and 2)confidence preds. +- """ +- # jaccard index +- overlaps = jaccard( +- truths, +- point_form(priors) +- ) +- # (Bipartite Matching) +- # [1,num_objects] best prior for each ground truth +- best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) +- # [1,num_priors] best ground truth for each prior +- best_truth_overlap, best_truth_idx = overlaps.max( +- 0, keepdim=True) # 0-2000 +- best_truth_idx.squeeze_(0) +- best_truth_overlap.squeeze_(0) +- best_prior_idx.squeeze_(1) +- best_prior_overlap.squeeze_(1) +- best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior +- # TODO refactor: index best_prior_idx with long tensor +- # ensure every gt matches with its prior of max overlap +- for j in range(best_prior_idx.size(0)): +- best_truth_idx[best_prior_idx[j]] = j +- matches = truths[best_truth_idx] # Shape: [num_priors,4] +- conf = labels[best_truth_idx] # Shape: [num_priors] +- conf[best_truth_overlap < threshold] = 0 # label as background +- loc = encode(matches, priors, variances) +- loc_t[idx] = loc # [num_priors,4] encoded offsets to learn +- conf_t[idx] = conf # [num_priors] top class label for each prior +- +- +-def encode(matched, priors, variances): +- """Encode the variances from the priorbox layers into the ground truth boxes +- we have matched (based on jaccard overlap) with the prior boxes. +- Args: +- matched: (tensor) Coords of ground truth for each prior in point-form +- Shape: [num_priors, 4]. +- priors: (tensor) Prior boxes in center-offset form +- Shape: [num_priors,4]. +- variances: (list[float]) Variances of priorboxes +- Return: +- encoded boxes (tensor), Shape: [num_priors, 4] +- """ +- +- # dist b/t match center and prior's center +- g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2] +- # encode variance +- g_cxcy /= (variances[0] * priors[:, 2:]) +- # match wh / prior wh +- g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] +- #g_wh = torch.log(g_wh) / variances[1] +- g_wh = torch.log(g_wh) / variances[1] +- # return target for smooth_l1_loss +- return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] +- +- +-# Adapted from https://github.com/Hakuyume/chainer-ssd +-def decode(loc, priors, variances): +- """Decode locations from predictions using priors to undo +- the encoding we did for offset regression at train time. +- Args: +- loc (tensor): location predictions for loc layers, +- Shape: [num_priors,4] +- priors (tensor): Prior boxes in center-offset form. +- Shape: [num_priors,4]. +- variances: (list[float]) Variances of priorboxes +- Return: +- decoded bounding box predictions +- """ +- +- boxes = torch.cat(( +- priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], +- priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) +- boxes[:, :2] -= boxes[:, 2:] / 2 +- boxes[:, 2:] += boxes[:, :2] +- return boxes +- +- +-def log_sum_exp(x): +- """Utility function for computing log_sum_exp while determining +- This will be used to determine unaveraged confidence loss across +- all examples in a batch. +- Args: +- x (Variable(tensor)): conf_preds from conf layers +- """ +- x_max = x.data.max() +- return torch.log(torch.sum(torch.exp(x - x_max), 1, keepdim=True)) + x_max +- +- +-# Original author: Francisco Massa: +-# https://github.com/fmassa/object-detection.torch +-# Ported to PyTorch by Max deGroot (02/01/2017) +-def nms(boxes, scores, overlap=0.5, top_k=200): +- """Apply non-maximum suppression at test time to avoid detecting too many +- overlapping bounding boxes for a given object. +- Args: +- boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. +- scores: (tensor) The class predscores for the img, Shape:[num_priors]. +- overlap: (float) The overlap thresh for suppressing unnecessary boxes. +- top_k: (int) The Maximum number of box preds to consider. +- Return: +- The indices of the kept boxes with respect to num_priors. +- """ +- +- keep = scores.new(scores.size(0)).zero_().long() +- if boxes.numel() == 0: +- return keep +- x1 = boxes[:, 0] +- y1 = boxes[:, 1] +- x2 = boxes[:, 2] +- y2 = boxes[:, 3] +- area = torch.mul(x2 - x1, y2 - y1) +- v, idx = scores.sort(0) # sort in ascending order +- # I = I[v >= 0.01] +- idx = idx[-top_k:] # indices of the top-k largest vals +- xx1 = boxes.new() +- yy1 = boxes.new() +- xx2 = boxes.new() +- yy2 = boxes.new() +- w = boxes.new() +- h = boxes.new() +- +- # keep = torch.Tensor() +- count = 0 +- while idx.numel() > 0: +- i = idx[-1] # index of current largest val +- # keep.append(i) +- keep[count] = i +- count += 1 +- if idx.size(0) == 1: +- break +- idx = idx[:-1] # remove kept element from view +- # load bboxes of next highest vals +- torch.index_select(x1, 0, idx, out=xx1) +- torch.index_select(y1, 0, idx, out=yy1) +- torch.index_select(x2, 0, idx, out=xx2) +- torch.index_select(y2, 0, idx, out=yy2) +- # store element-wise max with next highest score +- xx1 = torch.clamp(xx1, min=x1[i]) +- yy1 = torch.clamp(yy1, min=y1[i]) +- xx2 = torch.clamp(xx2, max=x2[i]) +- yy2 = torch.clamp(yy2, max=y2[i]) +- w.resize_as_(xx2) +- h.resize_as_(yy2) +- w = xx2 - xx1 +- h = yy2 - yy1 +- # check sizes of xx1 and xx2.. after each iteration +- w = torch.clamp(w, min=0.0) +- h = torch.clamp(h, min=0.0) +- inter = w * h +- # IoU = i / (area(a) + area(b) - i) +- rem_areas = torch.index_select(area, 0, idx) # load remaining areas) +- union = (rem_areas - inter) + area[i] +- IoU = inter / union # store result in iou +- # keep only elements with an IoU <= overlap +- idx = idx[IoU.le(overlap)] +- return keep, count diff --git a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/functions/__init__.py b/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/functions/__init__.py deleted file mode 100644 index 9bc5349934..0000000000 --- a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/functions/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ --# Copyright 2021 Huawei Technologies Co., Ltd --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. -- --from .prior_box import PriorBox --from .detection import Detect -- --__all__=['Detect','PriorBox'] -- +-# Copyright 2021 Huawei Technologies Co., Ltd +-# +-# Licensed under the Apache License, Version 2.0 (the "License"); +-# you may not use this file except in compliance with the License. +-# You may obtain a copy of the License at +-# +-# http://www.apache.org/licenses/LICENSE-2.0 +-# +-# Unless required by applicable law or agreed to in writing, software +-# distributed under the License is distributed on an "AS IS" BASIS, +-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-# See the License for the specific language governing permissions and +-# limitations under the License. +- +-from .prior_box import PriorBox +-from .detection import Detect +- +-__all__=['Detect','PriorBox'] +- diff --git a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/functions/detection.py b/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/functions/detection.py deleted file mode 100644 index 94b23f9410..0000000000 --- a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/functions/detection.py +++ /dev/null @@ -1,87 +0,0 @@ --# Copyright 2021 Huawei Technologies Co., Ltd --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. -- --# coding=utf-8 -- --from __future__ import division --from __future__ import absolute_import --from __future__ import print_function -- --import torch -- --from ..bbox_utils import decode, nms --from torch.autograd import Function -- -- --class Detect(Function): -- """At test time, Detect is the final layer of SSD. Decode location preds, -- apply non-maximum suppression to location predictions based on conf -- scores and threshold to a top_k number of output predictions for both -- confidence score and locations. -- """ -- -- def __init__(self, cfg): -- self.num_classes = cfg.NUM_CLASSES -- self.top_k = cfg.TOP_K -- self.nms_thresh = cfg.NMS_THRESH -- self.conf_thresh = cfg.CONF_THRESH -- self.variance = cfg.VARIANCE -- self.nms_top_k = cfg.NMS_TOP_K -- -- def forward(self, loc_data, conf_data, prior_data): -- """ -- Args: -- loc_data: (tensor) Loc preds from loc layers -- Shape: [batch,num_priors*4] -- conf_data: (tensor) Shape: Conf preds from conf layers -- Shape: [batch*num_priors,num_classes] -- prior_data: (tensor) Prior boxes and variances from priorbox layers -- Shape: [1,num_priors,4] -- """ -- num = loc_data.size(0) -- num_priors = prior_data.size(0) -- -- conf_preds = conf_data.view( -- num, num_priors, self.num_classes).transpose(2, 1) -- batch_priors = prior_data.view(-1, num_priors, -- 4).expand(num, num_priors, 4) -- batch_priors = batch_priors.contiguous().view(-1, 4) -- -- decoded_boxes = decode(loc_data.view(-1, 4), -- batch_priors, self.variance) -- decoded_boxes = decoded_boxes.view(num, num_priors, 4) -- -- output = torch.zeros(num, self.num_classes, self.top_k, 5) -- -- for i in range(num): -- boxes = decoded_boxes[i].clone() -- conf_scores = conf_preds[i].clone() -- -- for cl in range(1, self.num_classes): -- c_mask = conf_scores[cl].gt(self.conf_thresh) -- scores = conf_scores[cl][c_mask] -- -- #VIVID -- if scores.size(0) == 0: -- continue -- l_mask = c_mask.unsqueeze(1).expand_as(boxes) -- boxes_ = boxes[l_mask].view(-1, 4) -- ids, count = nms(boxes_, scores, self.nms_thresh, self.nms_top_k) -- count = count if count < self.top_k else self.top_k -- -- output[i, cl, :count] = torch.cat((scores[ids[:count]].unsqueeze(1), -- boxes_[ids[:count]]), 1) -- print("get test detect res:", output) -- -- return output +-# Copyright 2021 Huawei Technologies Co., Ltd +-# +-# Licensed under the Apache License, Version 2.0 (the "License"); +-# you may not use this file except in compliance with the License. +-# You may obtain a copy of the License at +-# +-# http://www.apache.org/licenses/LICENSE-2.0 +-# +-# Unless required by applicable law or agreed to in writing, software +-# distributed under the License is distributed on an "AS IS" BASIS, +-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-# See the License for the specific language governing permissions and +-# limitations under the License. +- +-# coding=utf-8 +- +-from __future__ import division +-from __future__ import absolute_import +-from __future__ import print_function +- +-import torch +- +-from ..bbox_utils import decode, nms +-from torch.autograd import Function +- +- +-class Detect(Function): +- """At test time, Detect is the final layer of SSD. Decode location preds, +- apply non-maximum suppression to location predictions based on conf +- scores and threshold to a top_k number of output predictions for both +- confidence score and locations. +- """ +- +- def __init__(self, cfg): +- self.num_classes = cfg.NUM_CLASSES +- self.top_k = cfg.TOP_K +- self.nms_thresh = cfg.NMS_THRESH +- self.conf_thresh = cfg.CONF_THRESH +- self.variance = cfg.VARIANCE +- self.nms_top_k = cfg.NMS_TOP_K +- +- def forward(self, loc_data, conf_data, prior_data): +- """ +- Args: +- loc_data: (tensor) Loc preds from loc layers +- Shape: [batch,num_priors*4] +- conf_data: (tensor) Shape: Conf preds from conf layers +- Shape: [batch*num_priors,num_classes] +- prior_data: (tensor) Prior boxes and variances from priorbox layers +- Shape: [1,num_priors,4] +- """ +- num = loc_data.size(0) +- num_priors = prior_data.size(0) +- +- conf_preds = conf_data.view( +- num, num_priors, self.num_classes).transpose(2, 1) +- batch_priors = prior_data.view(-1, num_priors, +- 4).expand(num, num_priors, 4) +- batch_priors = batch_priors.contiguous().view(-1, 4) +- +- decoded_boxes = decode(loc_data.view(-1, 4), +- batch_priors, self.variance) +- decoded_boxes = decoded_boxes.view(num, num_priors, 4) +- +- output = torch.zeros(num, self.num_classes, self.top_k, 5) +- +- for i in range(num): +- boxes = decoded_boxes[i].clone() +- conf_scores = conf_preds[i].clone() +- +- for cl in range(1, self.num_classes): +- c_mask = conf_scores[cl].gt(self.conf_thresh) +- scores = conf_scores[cl][c_mask] +- +- #VIVID +- if scores.size(0) == 0: +- continue +- l_mask = c_mask.unsqueeze(1).expand_as(boxes) +- boxes_ = boxes[l_mask].view(-1, 4) +- ids, count = nms(boxes_, scores, self.nms_thresh, self.nms_top_k) +- count = count if count < self.top_k else self.top_k +- +- output[i, cl, :count] = torch.cat((scores[ids[:count]].unsqueeze(1), +- boxes_[ids[:count]]), 1) +- print("get test detect res:", output) +- +- return output diff --git a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/functions/prior_box.py b/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/functions/prior_box.py deleted file mode 100644 index b49b348072..0000000000 --- a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/functions/prior_box.py +++ /dev/null @@ -1,78 +0,0 @@ --# Copyright 2021 Huawei Technologies Co., Ltd --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. -- --#-*- coding:utf-8 -*- -- --from __future__ import division --from __future__ import absolute_import --from __future__ import print_function -- --import torch --from itertools import product as product --import math -- -- --class PriorBox(object): -- """Compute priorbox coordinates in center-offset form for each source -- feature map. -- """ -- -- def __init__(self, input_size, feature_maps,cfg,pal=2): -- super(PriorBox, self).__init__() -- self.imh = input_size[0] -- self.imw = input_size[1] -- # number of priors for feature map location (either 4 or 6) -- self.variance = cfg.VARIANCE or [0.1] -- #self.feature_maps = cfg.FEATURE_MAPS -- if pal==1: -- self.min_sizes = cfg.ANCHOR_SIZES1 -- elif pal==2: -- self.min_sizes = cfg.ANCHOR_SIZES2 -- self.aspect_ratio = cfg.ASPECT_RATIO -- self.steps = cfg.STEPS -- self.clip = cfg.CLIP -- for v in self.variance: -- if v <= 0: -- raise ValueError('Variances must be greater than 0') -- self.feature_maps = feature_maps -- -- -- def forward(self): -- mean = [] -- for k in range(len(self.feature_maps)): -- feath = self.feature_maps[k][0] -- featw = self.feature_maps[k][1] -- for i, j in product(range(feath), range(featw)): -- f_kw = self.imw / self.steps[k] -- f_kh = self.imh / self.steps[k] -- -- cx = (j + 0.5) / f_kw -- cy = (i + 0.5) / f_kh -- -- s_kw = self.min_sizes[k] / self.imw -- s_kh = self.min_sizes[k] / self.imh -- for ar in self.aspect_ratio: -- mean += [cx, cy, s_kw/math.sqrt(ar), s_kh*math.sqrt(ar)] -- -- output = torch.Tensor(mean).view(-1, 4) -- if self.clip: -- output.clamp_(max=1, min=0) -- return output -- -- --if __name__ == '__main__': -- from data.config import cfg -- p = PriorBox([640, 640], cfg) -- out = p.forward() -- print(out.size()) +-# Copyright 2021 Huawei Technologies Co., Ltd +-# +-# Licensed under the Apache License, Version 2.0 (the "License"); +-# you may not use this file except in compliance with the License. +-# You may obtain a copy of the License at +-# +-# http://www.apache.org/licenses/LICENSE-2.0 +-# +-# Unless required by applicable law or agreed to in writing, software +-# distributed under the License is distributed on an "AS IS" BASIS, +-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-# See the License for the specific language governing permissions and +-# limitations under the License. +- +-#-*- coding:utf-8 -*- +- +-from __future__ import division +-from __future__ import absolute_import +-from __future__ import print_function +- +-import torch +-from itertools import product as product +-import math +- +- +-class PriorBox(object): +- """Compute priorbox coordinates in center-offset form for each source +- feature map. +- """ +- +- def __init__(self, input_size, feature_maps,cfg,pal=2): +- super(PriorBox, self).__init__() +- self.imh = input_size[0] +- self.imw = input_size[1] +- # number of priors for feature map location (either 4 or 6) +- self.variance = cfg.VARIANCE or [0.1] +- #self.feature_maps = cfg.FEATURE_MAPS +- if pal==1: +- self.min_sizes = cfg.ANCHOR_SIZES1 +- elif pal==2: +- self.min_sizes = cfg.ANCHOR_SIZES2 +- self.aspect_ratio = cfg.ASPECT_RATIO +- self.steps = cfg.STEPS +- self.clip = cfg.CLIP +- for v in self.variance: +- if v <= 0: +- raise ValueError('Variances must be greater than 0') +- self.feature_maps = feature_maps +- +- +- def forward(self): +- mean = [] +- for k in range(len(self.feature_maps)): +- feath = self.feature_maps[k][0] +- featw = self.feature_maps[k][1] +- for i, j in product(range(feath), range(featw)): +- f_kw = self.imw / self.steps[k] +- f_kh = self.imh / self.steps[k] +- +- cx = (j + 0.5) / f_kw +- cy = (i + 0.5) / f_kh +- +- s_kw = self.min_sizes[k] / self.imw +- s_kh = self.min_sizes[k] / self.imh +- for ar in self.aspect_ratio: +- mean += [cx, cy, s_kw/math.sqrt(ar), s_kh*math.sqrt(ar)] +- +- output = torch.Tensor(mean).view(-1, 4) +- if self.clip: +- output.clamp_(max=1, min=0) +- return output +- +- +-if __name__ == '__main__': +- from data.config import cfg +- p = PriorBox([640, 640], cfg) +- out = p.forward() +- print(out.size()) diff --git a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/modules/__init__.py b/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/modules/__init__.py deleted file mode 100644 index 1ee0f9eb05..0000000000 --- a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/modules/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ --# Copyright 2021 Huawei Technologies Co., Ltd --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. -- --#-*- coding:utf-8 -*- -- --from __future__ import division --from __future__ import absolute_import --from __future__ import print_function -- -- --from .l2norm import L2Norm --from .multibox_loss import MultiBoxLoss -- --__all__ = ['L2Norm', 'MultiBoxLoss'] -- +-# Copyright 2021 Huawei Technologies Co., Ltd +-# +-# Licensed under the Apache License, Version 2.0 (the "License"); +-# you may not use this file except in compliance with the License. +-# You may obtain a copy of the License at +-# +-# http://www.apache.org/licenses/LICENSE-2.0 +-# +-# Unless required by applicable law or agreed to in writing, software +-# distributed under the License is distributed on an "AS IS" BASIS, +-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-# See the License for the specific language governing permissions and +-# limitations under the License. +- +-#-*- coding:utf-8 -*- +- +-from __future__ import division +-from __future__ import absolute_import +-from __future__ import print_function +- +- +-from .l2norm import L2Norm +-from .multibox_loss import MultiBoxLoss +- +-__all__ = ['L2Norm', 'MultiBoxLoss'] +- diff --git a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/modules/l2norm.py b/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/modules/l2norm.py deleted file mode 100644 index 009890b8dd..0000000000 --- a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/modules/l2norm.py +++ /dev/null @@ -1,50 +0,0 @@ --# Copyright 2021 Huawei Technologies Co., Ltd --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. -- --#-*- coding:utf-8 -*- -- --from __future__ import division --from __future__ import absolute_import --from __future__ import print_function -- -- --import torch --import torch.nn as nn --import torch.nn.init as init --from torch.autograd import Function --from torch.autograd import Variable -- -- -- --class L2Norm(nn.Module): -- def __init__(self,n_channels, scale): -- super(L2Norm,self).__init__() -- self.n_channels = n_channels -- self.gamma = scale or None -- self.eps = 1e-10 -- self.weight = nn.Parameter(torch.Tensor(self.n_channels)) -- self.reset_parameters() -- -- def reset_parameters(self): -- init.constant(self.weight,self.gamma) -- -- def forward(self, x): -- norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps -- #x /= norm -- x = torch.div(x,norm) -- out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x -- return out -- -- +-# Copyright 2021 Huawei Technologies Co., Ltd +-# +-# Licensed under the Apache License, Version 2.0 (the "License"); +-# you may not use this file except in compliance with the License. +-# You may obtain a copy of the License at +-# +-# http://www.apache.org/licenses/LICENSE-2.0 +-# +-# Unless required by applicable law or agreed to in writing, software +-# distributed under the License is distributed on an "AS IS" BASIS, +-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-# See the License for the specific language governing permissions and +-# limitations under the License. +- +-#-*- coding:utf-8 -*- +- +-from __future__ import division +-from __future__ import absolute_import +-from __future__ import print_function +- +- +-import torch +-import torch.nn as nn +-import torch.nn.init as init +-from torch.autograd import Function +-from torch.autograd import Variable +- +- +- +-class L2Norm(nn.Module): +- def __init__(self,n_channels, scale): +- super(L2Norm,self).__init__() +- self.n_channels = n_channels +- self.gamma = scale or None +- self.eps = 1e-10 +- self.weight = nn.Parameter(torch.Tensor(self.n_channels)) +- self.reset_parameters() +- +- def reset_parameters(self): +- init.constant(self.weight,self.gamma) +- +- def forward(self, x): +- norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps +- #x /= norm +- x = torch.div(x,norm) +- out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x +- return out +- +- - \ No newline at end of file diff --git a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/modules/multibox_loss.py b/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/modules/multibox_loss.py @@ -7433,964 +7433,964 @@ index 0f938211d3..0000000000 --- a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/layers/modules/multibox_loss.py +++ /dev/null @@ -1,134 +0,0 @@ --# Copyright 2021 Huawei Technologies Co., Ltd --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. -- --#-*- coding:utf-8 -*- -- --from __future__ import division --from __future__ import absolute_import --from __future__ import print_function -- --import math --import torch --import torch.nn as nn --import torch.nn.functional as F --from torch.autograd import Variable -- -- --from ..bbox_utils import match, log_sum_exp, match_ssd -- -- --class MultiBoxLoss(nn.Module): -- """SSD Weighted Loss Function -- Compute Targets: -- 1) Produce Confidence Target Indices by matching ground truth boxes -- with (default) 'priorboxes' that have jaccard index > threshold parameter -- (default threshold: 0.5). -- 2) Produce localization target by 'encoding' variance into offsets of ground -- truth boxes and their matched 'priorboxes'. -- 3) Hard negative mining to filter the excessive number of negative examples -- that comes with using a large number of default bounding boxes. -- (default negative:positive ratio 3:1) -- Objective Loss: -- L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N -- Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss -- weighted by α which is set to 1 by cross val. -- Args: -- c: class confidences, -- l: predicted boxes, -- g: ground truth boxes -- N: number of matched default boxes -- See: https://arxiv.org/pdf/1512.02325.pdf for more details. -- """ -- -- def __init__(self, cfg, use_gpu=True): -- super(MultiBoxLoss, self).__init__() -- self.use_gpu = use_gpu -- self.num_classes = cfg.NUM_CLASSES -- self.negpos_ratio = cfg.NEG_POS_RATIOS -- self.variance = cfg.VARIANCE -- -- self.threshold = cfg.FACE.OVERLAP_THRESH -- self.match = match_ssd -- -- def forward(self, predictions, targets): -- """Multibox Loss -- Args: -- predictions (tuple): A tuple containing loc preds, conf preds, -- and prior boxes from SSD net. -- conf shape: torch.size(batch_size,num_priors,num_classes) -- loc shape: torch.size(batch_size,num_priors,4) -- priors shape: torch.size(num_priors,4) -- -- targets (tensor): Ground truth boxes and labels for a batch, -- shape: [batch_size,num_objs,5] (last idx is the label). -- """ -- loc_data, conf_data, priors = predictions -- num = loc_data.size(0) -- priors = priors[:loc_data.size(1), :] -- num_priors = (priors.size(0)) -- num_classes = self.num_classes -- -- # match priors (default boxes) and ground truth boxes -- loc_t = torch.Tensor(num, num_priors, 4) -- conf_t = torch.LongTensor(num, num_priors) -- for idx in range(num): -- truths = targets[idx][:, :-1].data -- labels = targets[idx][:, -1].data -- defaults = priors.data -- self.match(self.threshold, truths, defaults, self.variance, labels, -- loc_t, conf_t, idx) -- if self.use_gpu: -- loc_t = loc_t.cuda() -- conf_t = conf_t.cuda() -- # wrap targets -- loc_t = Variable(loc_t, requires_grad=False) -- conf_t = Variable(conf_t, requires_grad=False) -- -- pos = conf_t > 0 -- num_pos = pos.sum(dim=1, keepdim=True) -- # Localization Loss (Smooth L1) -- # Shape: [batch,num_priors,4] -- pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) -- loc_p = loc_data[pos_idx].view(-1, 4) -- loc_t = loc_t[pos_idx].view(-1, 4) -- loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) -- # print(loc_p) -- # Compute max conf across batch for hard negative mining -- batch_conf = conf_data.view(-1, self.num_classes) -- loss_c = log_sum_exp(batch_conf) - \ -- batch_conf.gather(1, conf_t.view(-1, 1)) -- -- # Hard Negative Mining -- loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now -- loss_c = loss_c.view(num, -1) -- _, loss_idx = loss_c.sort(1, descending=True) -- _, idx_rank = loss_idx.sort(1) -- num_pos = pos.long().sum(1, keepdim=True) -- num_neg = torch.clamp(self.negpos_ratio * -- num_pos, max=pos.size(1) - 1) -- neg = idx_rank < num_neg.expand_as(idx_rank) -- -- # Confidence Loss Including Positive and Negative Examples -- pos_idx = pos.unsqueeze(2).expand_as(conf_data) -- neg_idx = neg.unsqueeze(2).expand_as(conf_data) -- conf_p = conf_data[(pos_idx + neg_idx).gt(0) -- ].view(-1, self.num_classes) -- targets_weighted = conf_t[(pos + neg).gt(0)] -- loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) -- -- # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N -- N = num_pos.data.sum() if num_pos.data.sum() > 0 else num -- loss_l /= N -- loss_c /= N -- return loss_l, loss_c +-# Copyright 2021 Huawei Technologies Co., Ltd +-# +-# Licensed under the Apache License, Version 2.0 (the "License"); +-# you may not use this file except in compliance with the License. +-# You may obtain a copy of the License at +-# +-# http://www.apache.org/licenses/LICENSE-2.0 +-# +-# Unless required by applicable law or agreed to in writing, software +-# distributed under the License is distributed on an "AS IS" BASIS, +-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-# See the License for the specific language governing permissions and +-# limitations under the License. +- +-#-*- coding:utf-8 -*- +- +-from __future__ import division +-from __future__ import absolute_import +-from __future__ import print_function +- +-import math +-import torch +-import torch.nn as nn +-import torch.nn.functional as F +-from torch.autograd import Variable +- +- +-from ..bbox_utils import match, log_sum_exp, match_ssd +- +- +-class MultiBoxLoss(nn.Module): +- """SSD Weighted Loss Function +- Compute Targets: +- 1) Produce Confidence Target Indices by matching ground truth boxes +- with (default) 'priorboxes' that have jaccard index > threshold parameter +- (default threshold: 0.5). +- 2) Produce localization target by 'encoding' variance into offsets of ground +- truth boxes and their matched 'priorboxes'. +- 3) Hard negative mining to filter the excessive number of negative examples +- that comes with using a large number of default bounding boxes. +- (default negative:positive ratio 3:1) +- Objective Loss: +- L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N +- Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss +- weighted by α which is set to 1 by cross val. +- Args: +- c: class confidences, +- l: predicted boxes, +- g: ground truth boxes +- N: number of matched default boxes +- See: https://arxiv.org/pdf/1512.02325.pdf for more details. +- """ +- +- def __init__(self, cfg, use_gpu=True): +- super(MultiBoxLoss, self).__init__() +- self.use_gpu = use_gpu +- self.num_classes = cfg.NUM_CLASSES +- self.negpos_ratio = cfg.NEG_POS_RATIOS +- self.variance = cfg.VARIANCE +- +- self.threshold = cfg.FACE.OVERLAP_THRESH +- self.match = match_ssd +- +- def forward(self, predictions, targets): +- """Multibox Loss +- Args: +- predictions (tuple): A tuple containing loc preds, conf preds, +- and prior boxes from SSD net. +- conf shape: torch.size(batch_size,num_priors,num_classes) +- loc shape: torch.size(batch_size,num_priors,4) +- priors shape: torch.size(num_priors,4) +- +- targets (tensor): Ground truth boxes and labels for a batch, +- shape: [batch_size,num_objs,5] (last idx is the label). +- """ +- loc_data, conf_data, priors = predictions +- num = loc_data.size(0) +- priors = priors[:loc_data.size(1), :] +- num_priors = (priors.size(0)) +- num_classes = self.num_classes +- +- # match priors (default boxes) and ground truth boxes +- loc_t = torch.Tensor(num, num_priors, 4) +- conf_t = torch.LongTensor(num, num_priors) +- for idx in range(num): +- truths = targets[idx][:, :-1].data +- labels = targets[idx][:, -1].data +- defaults = priors.data +- self.match(self.threshold, truths, defaults, self.variance, labels, +- loc_t, conf_t, idx) +- if self.use_gpu: +- loc_t = loc_t.cuda() +- conf_t = conf_t.cuda() +- # wrap targets +- loc_t = Variable(loc_t, requires_grad=False) +- conf_t = Variable(conf_t, requires_grad=False) +- +- pos = conf_t > 0 +- num_pos = pos.sum(dim=1, keepdim=True) +- # Localization Loss (Smooth L1) +- # Shape: [batch,num_priors,4] +- pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) +- loc_p = loc_data[pos_idx].view(-1, 4) +- loc_t = loc_t[pos_idx].view(-1, 4) +- loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) +- # print(loc_p) +- # Compute max conf across batch for hard negative mining +- batch_conf = conf_data.view(-1, self.num_classes) +- loss_c = log_sum_exp(batch_conf) - \ +- batch_conf.gather(1, conf_t.view(-1, 1)) +- +- # Hard Negative Mining +- loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now +- loss_c = loss_c.view(num, -1) +- _, loss_idx = loss_c.sort(1, descending=True) +- _, idx_rank = loss_idx.sort(1) +- num_pos = pos.long().sum(1, keepdim=True) +- num_neg = torch.clamp(self.negpos_ratio * +- num_pos, max=pos.size(1) - 1) +- neg = idx_rank < num_neg.expand_as(idx_rank) +- +- # Confidence Loss Including Positive and Negative Examples +- pos_idx = pos.unsqueeze(2).expand_as(conf_data) +- neg_idx = neg.unsqueeze(2).expand_as(conf_data) +- conf_p = conf_data[(pos_idx + neg_idx).gt(0) +- ].view(-1, self.num_classes) +- targets_weighted = conf_t[(pos + neg).gt(0)] +- loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) +- +- # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N +- N = num_pos.data.sum() if num_pos.data.sum() > 0 else num +- loss_l /= N +- loss_c /= N +- return loss_l, loss_c diff --git a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/models/DSFD_resnet.py b/contrib/ACL_PyTorch/Research/cv/detection/DSFD/models/DSFD_resnet.py deleted file mode 100644 index 22e005a5c2..0000000000 --- a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/models/DSFD_resnet.py +++ /dev/null @@ -1,438 +0,0 @@ --# Copyright 2021 Huawei Technologies Co., Ltd --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. -- --#coding=utf-8 --from __future__ import division --from __future__ import print_function --from __future__ import absolute_import -- --import torch --import torch.nn as nn --import torch.nn.functional as F --import torch.nn.init as init -- --from torch.autograd import Variable -- --from layers import * --from data.config import cur_config as cfg -- --def conv3x3(in_planes, out_planes, stride=1, bias=False): -- """3x3 convolution with padding""" -- return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, -- padding=1, bias=bias) -- -- --def conv1x1(in_planes, out_planes, stride=1): -- """1x1 convolution""" -- return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) -- -- --class BasicBlock(nn.Module): -- expansion = 1 -- -- def __init__(self, inplanes, planes, stride=1, downsample=None): -- super(BasicBlock, self).__init__() -- self.conv1 = conv3x3(inplanes, planes, stride) -- self.bn1 = nn.BatchNorm2d(planes) -- self.relu = nn.ReLU(inplace=True) -- self.conv2 = conv3x3(planes, planes) -- self.bn2 = nn.BatchNorm2d(planes) -- self.downsample = downsample -- self.stride = stride -- -- def forward(self, x): -- residual = x -- -- out = self.conv1(x) -- out = self.bn1(out) -- out = self.relu(out) -- -- out = self.conv2(out) -- out = self.bn2(out) -- -- if self.downsample is not None: -- residual = self.downsample(x) -- -- out += residual -- out = self.relu(out) -- -- return out -- -- --class Bottleneck(nn.Module): -- expansion = 4 -- -- def __init__(self, inplanes, planes, stride=1, downsample=None): -- super(Bottleneck, self).__init__() -- self.conv1 = conv1x1(inplanes, planes) -- self.bn1 = nn.BatchNorm2d(planes) -- self.conv2 = conv3x3(planes, planes, stride) -- self.bn2 = nn.BatchNorm2d(planes) -- self.conv3 = conv1x1(planes, planes * self.expansion) -- self.bn3 = nn.BatchNorm2d(planes * self.expansion) -- self.relu = nn.ReLU(inplace=True) -- self.downsample = downsample -- self.stride = stride -- -- def forward(self, x): -- residual = x -- -- out = self.conv1(x) -- out = self.bn1(out) -- out = self.relu(out) -- -- out = self.conv2(out) -- out = self.bn2(out) -- out = self.relu(out) -- -- out = self.conv3(out) -- out = self.bn3(out) -- -- if self.downsample is not None: -- residual = self.downsample(x) -- -- out += residual -- out = self.relu(out) -- -- return out -- -- --class ResNet(nn.Module): -- -- def __init__(self, block, layers): -- super(ResNet, self).__init__() -- self.inplanes = 64 -- self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, -- bias=False) -- self.bn1 = nn.BatchNorm2d(64) -- self.relu = nn.ReLU(inplace=True) -- self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) -- self.layer1 = self._make_layer(block, 64, layers[0]) -- self.layer2 = self._make_layer(block, 128, layers[1], stride=2) -- self.layer3 = self._make_layer(block, 256, layers[2], stride=2) -- self.layer4 = self._make_layer(block, 512, layers[3], stride=2) -- -- def _make_layer(self, block, planes, blocks, stride=1): -- downsample = None -- if stride != 1 or self.inplanes != planes * block.expansion: -- downsample = nn.Sequential( -- conv1x1(self.inplanes, planes * block.expansion, stride), -- nn.BatchNorm2d(planes * block.expansion), -- ) -- -- layers = [] -- layers.append(block(self.inplanes, planes, stride, downsample)) -- self.inplanes = planes * block.expansion -- for _ in range(1, blocks): -- layers.append(block(self.inplanes, planes)) -- -- return nn.Sequential(*layers) -- -- def forward(self, x): -- sources = [] -- -- x = self.conv1(x) -- x = self.bn1(x) -- x = self.relu(x) -- x = self.maxpool(x) -- -- x = self.layer1(x) -- sources += [x] -- -- x = self.layer2(x) -- sources += [x] -- -- x = self.layer3(x) -- sources += [x] -- -- x = self.layer4(x) -- sources += [x] -- -- return sources -- -- --class FEM(nn.Module): -- """docstring for FEM""" -- -- def __init__(self, in_planes): -- super(FEM, self).__init__() -- inter_planes = in_planes // 3 -- inter_planes1 = in_planes - 2 * inter_planes -- self.branch1 = nn.Conv2d( -- in_planes, inter_planes, kernel_size=3, stride=1, padding=3, dilation=3) -- -- self.branch2 = nn.Sequential( -- nn.Conv2d(in_planes, inter_planes, kernel_size=3, -- stride=1, padding=3, dilation=3), -- nn.ReLU(inplace=True), -- nn.Conv2d(inter_planes, inter_planes, kernel_size=3, -- stride=1, padding=3, dilation=3) -- ) -- self.branch3 = nn.Sequential( -- nn.Conv2d(in_planes, inter_planes1, kernel_size=3, -- stride=1, padding=3, dilation=3), -- nn.ReLU(inplace=True), -- nn.Conv2d(inter_planes1, inter_planes1, kernel_size=3, -- stride=1, padding=3, dilation=3), -- nn.ReLU(inplace=True), -- nn.Conv2d(inter_planes1, inter_planes1, kernel_size=3, -- stride=1, padding=3, dilation=3) -- ) -- -- def forward(self, x): -- x1 = self.branch1(x) -- x2 = self.branch2(x) -- x3 = self.branch3(x) -- out = torch.cat((x1, x2, x3), dim=1) -- out = F.relu(out, inplace=True) -- return out -- -- --class DSFD(nn.Module): -- """docstring for SRN""" -- -- def __init__(self, phase, base, extras, fem_modules, head1, head2, num_classes=2): -- super(DSFD, self).__init__() -- self.resnet = base -- self.phase = phase -- self.num_classes = num_classes -- self.extras = nn.ModuleList(extras) -- -- self.fpn_topdown = nn.ModuleList(fem_modules[0]) -- self.fpn_latlayer = nn.ModuleList(fem_modules[1]) -- self.fpn_fem = nn.ModuleList(fem_modules[2]) -- -- self.loc_pal1 = nn.ModuleList(head1[0]) -- self.conf_pal1 = nn.ModuleList(head1[1]) -- self.loc_pal2 = nn.ModuleList(head2[0]) -- self.conf_pal2 = nn.ModuleList(head2[1]) -- -- if self.phase == 'test': -- self.softmax = nn.Softmax(dim=-1) -- self.detect = Detect(cfg) -- -- def _upsample_prod(self, x, y): -- _, _, H, W = y.size() -- return F.upsample(x, size=(H, W), mode='bilinear') * y -- -- def forward(self, x): -- size = x.size()[2:] -- of1, of2, of3, of4 = self.resnet(x) -- -- x = of4 -- for i in range(2): -- x = F.relu(self.extras[i](x), inplace=True) -- of5 = x -- -- for i in range(2, len(self.extras)): -- x = F.relu(self.extras[i](x), inplace=True) -- of6 = x -- -- conv7 = F.relu(self.fpn_topdown[0](of6), inplace=True) -- -- x = F.relu(self.fpn_topdown[1](conv7), inplace=True) -- conv6 = F.relu(self._upsample_prod( -- x, self.fpn_latlayer[0](of5)), inplace=True) -- -- x = F.relu(self.fpn_topdown[2](conv6), inplace=True) -- conv5 = F.relu(self._upsample_prod( -- x, self.fpn_latlayer[1](of4)), inplace=True) -- -- x = F.relu(self.fpn_topdown[3](conv5), inplace=True) -- conv4 = F.relu(self._upsample_prod( -- x, self.fpn_latlayer[2](of3)), inplace=True) -- -- x = F.relu(self.fpn_topdown[4](conv4), inplace=True) -- conv3 = F.relu(self._upsample_prod( -- x, self.fpn_latlayer[3](of2)), inplace=True) -- -- x = F.relu(self.fpn_topdown[5](conv3), inplace=True) -- conv2 = F.relu(self._upsample_prod( -- x, self.fpn_latlayer[4](of1)), inplace=True) -- -- ef1 = self.fpn_fem[0](conv2) -- ef2 = self.fpn_fem[1](conv3) -- ef3 = self.fpn_fem[2](conv4) -- ef4 = self.fpn_fem[3](conv5) -- ef5 = self.fpn_fem[4](conv6) -- ef6 = self.fpn_fem[5](conv7) -- -- sources_pal1 = [of1, of2, of3, of4, of5, of6] -- sources_pal2 = [ef1, ef2, ef3, ef4, ef5, ef6] -- loc_pal1, conf_pal1 = list(), list() -- loc_pal2, conf_pal2 = list(), list() -- -- for (x, l, c) in zip(sources_pal1, self.loc_pal1, self.conf_pal1): -- loc_pal1.append(l(x).permute(0, 2, 3, 1).contiguous()) -- conf_pal1.append(c(x).permute(0, 2, 3, 1).contiguous()) -- -- for (x, l, c) in zip(sources_pal2, self.loc_pal2, self.conf_pal2): -- loc_pal2.append(l(x).permute(0, 2, 3, 1).contiguous()) -- conf_pal2.append(c(x).permute(0, 2, 3, 1).contiguous()) -- -- features_maps = [] -- for i in range(len(loc_pal1)): -- feat = [] -- feat += [loc_pal1[i].size(1), loc_pal1[i].size(2)] -- features_maps += [feat] -- -- loc_pal1 = torch.cat([o.view(o.size(0), -1) for o in loc_pal1], 1) -- conf_pal1 = torch.cat([o.view(o.size(0), -1) for o in conf_pal1], 1) -- -- loc_pal2 = torch.cat([o.view(o.size(0), -1) for o in loc_pal2], 1) -- conf_pal2 = torch.cat([o.view(o.size(0), -1) for o in conf_pal2], 1) -- -- priorbox = PriorBox(size, features_maps, cfg, pal=1) -- self.priors_pal1 = Variable(priorbox.forward(), volatile=True) -- -- priorbox = PriorBox(size, features_maps, cfg, pal=2) -- self.priors_pal2 = Variable(priorbox.forward(), volatile=True) -- -- if self.phase == 'test': -- output = self.detect( -- loc_pal2.view(loc_pal2.size(0), -1, 4), -- self.softmax(conf_pal2.view(conf_pal2.size(0), -1, -- self.num_classes)), # conf preds -- self.priors_pal2.type(type(x.data)) -- ) -- -- else: -- output = ( -- loc_pal1.view(loc_pal1.size(0), -1, 4), -- conf_pal1.view(conf_pal1.size(0), -1, self.num_classes), -- self.priors_pal1, -- loc_pal2.view(loc_pal2.size(0), -1, 4), -- conf_pal2.view(conf_pal2.size(0), -1, self.num_classes), -- self.priors_pal2) -- return output -- -- def load_weights(self, base_file): -- other, ext = os.path.splitext(base_file) -- if ext == '.pkl' or '.pth': -- print('Loading weights into state dict...') -- mdata = torch.load(base_file, -- map_location=lambda storage, loc: storage) -- weights = mdata['weight'] -- epoch = mdata['epoch'] -- self.load_state_dict(weights) -- print('Finished!') -- else: -- print('Sorry only .pth and .pkl files supported.') -- return epoch -- -- def xavier(self, param): -- init.xavier_uniform(param) -- -- def weights_init(self, m): -- if isinstance(m, nn.Conv2d): -- self.xavier(m.weight.data) -- m.bias.data.zero_() -- -- if isinstance(m, nn.ConvTranspose2d): -- self.xavier(m.weight.data) -- if 'bias' in m.state_dict().keys(): -- m.bias.data.zero_() -- -- if isinstance(m, nn.BatchNorm2d): -- m.weight.data[...] = 1 -- m.bias.data.zero_() -- --extras_cfg = [256, 'S', 512, 128, 'S', 256] -- --net_cfg = [256, 512, 1024, 2048, 512, 256] -- -- --def add_extras(cfg, i): -- layers = [] -- in_channels = i -- flag = False -- for k, v in enumerate(cfg): -- if in_channels != 'S': -- if v == 'S': -- layers += [nn.Conv2d(in_channels, cfg[k + 1], -- kernel_size=(1, 3)[flag], stride=2, padding=1)] -- else: -- layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])] -- flag = not flag -- in_channels = v -- return layers -- -- --def multibox(cfg, num_classes=2): -- conf_layers = [] -- loc_layers = [] -- for k, v in enumerate(cfg): -- loc_layers += [nn.Conv2d(v, 4, kernel_size=3, padding=1)] -- conf_layers += [nn.Conv2d(v, num_classes, kernel_size=3, padding=1)] -- return (loc_layers, conf_layers) -- -- --def fem_module(cfg): -- topdown_layers = [] -- lat_layers = [] -- fem_layers = [] -- -- topdown_layers += [nn.Conv2d(cfg[-1], cfg[-1], -- kernel_size=1, stride=1, padding=0)] -- for k, v in enumerate(cfg): -- fem_layers += [FEM(v)] -- cur_channel = cfg[len(cfg) - 1 - k] -- if len(cfg) - 1 - k > 0: -- last_channel = cfg[len(cfg) - 2 - k] -- topdown_layers += [nn.Conv2d(cur_channel, last_channel, -- kernel_size=1, stride=1, padding=0)] -- lat_layers += [nn.Conv2d(last_channel, last_channel, -- kernel_size=1, stride=1, padding=0)] -- return (topdown_layers, lat_layers, fem_layers) -- -- --def resnet50(): -- """Constructs a ResNet-50 model. -- Args: -- pretrained (bool): If True, returns a model pre-trained on ImageNet -- """ -- model = ResNet(Bottleneck, [3, 4, 6, 3]) -- return model -- -- --def resnet101(): -- model = ResNet(Bottleneck, [3, 4, 23, 3]) -- return model -- -- --def resnet152(): -- model = ResNet(Bottleneck, [3, 8, 36, 3]) -- return model -- -- --def model_map(net_name='resnet50'): -- _dicts = {'resnet50': resnet50, -- 'resnet101': resnet101, 'resnet152': resnet152} -- return _dicts[net_name]() -- -- --def build_net_resnet(phase, num_classes=2, net_name='resnet50'): -- resnet = model_map(net_name) -- extras = add_extras(extras_cfg, 2048) -- head_pal1 = multibox(net_cfg, num_classes) -- head_pal2 = multibox(net_cfg, num_classes) -- fem_modules = fem_module(net_cfg) -- model = DSFD(phase, resnet, extras, fem_modules, -- head_pal1, head_pal2, num_classes) -- return model -- --if __name__ == '__main__': -- inputs = Variable(torch.randn(1, 3, 640, 640)) -- net = build_net('train', 2, 101) -- out = net(inputs) +-# Copyright 2021 Huawei Technologies Co., Ltd +-# +-# Licensed under the Apache License, Version 2.0 (the "License"); +-# you may not use this file except in compliance with the License. +-# You may obtain a copy of the License at +-# +-# http://www.apache.org/licenses/LICENSE-2.0 +-# +-# Unless required by applicable law or agreed to in writing, software +-# distributed under the License is distributed on an "AS IS" BASIS, +-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-# See the License for the specific language governing permissions and +-# limitations under the License. +- +-#coding=utf-8 +-from __future__ import division +-from __future__ import print_function +-from __future__ import absolute_import +- +-import torch +-import torch.nn as nn +-import torch.nn.functional as F +-import torch.nn.init as init +- +-from torch.autograd import Variable +- +-from layers import * +-from data.config import cur_config as cfg +- +-def conv3x3(in_planes, out_planes, stride=1, bias=False): +- """3x3 convolution with padding""" +- return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, +- padding=1, bias=bias) +- +- +-def conv1x1(in_planes, out_planes, stride=1): +- """1x1 convolution""" +- return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) +- +- +-class BasicBlock(nn.Module): +- expansion = 1 +- +- def __init__(self, inplanes, planes, stride=1, downsample=None): +- super(BasicBlock, self).__init__() +- self.conv1 = conv3x3(inplanes, planes, stride) +- self.bn1 = nn.BatchNorm2d(planes) +- self.relu = nn.ReLU(inplace=True) +- self.conv2 = conv3x3(planes, planes) +- self.bn2 = nn.BatchNorm2d(planes) +- self.downsample = downsample +- self.stride = stride +- +- def forward(self, x): +- residual = x +- +- out = self.conv1(x) +- out = self.bn1(out) +- out = self.relu(out) +- +- out = self.conv2(out) +- out = self.bn2(out) +- +- if self.downsample is not None: +- residual = self.downsample(x) +- +- out += residual +- out = self.relu(out) +- +- return out +- +- +-class Bottleneck(nn.Module): +- expansion = 4 +- +- def __init__(self, inplanes, planes, stride=1, downsample=None): +- super(Bottleneck, self).__init__() +- self.conv1 = conv1x1(inplanes, planes) +- self.bn1 = nn.BatchNorm2d(planes) +- self.conv2 = conv3x3(planes, planes, stride) +- self.bn2 = nn.BatchNorm2d(planes) +- self.conv3 = conv1x1(planes, planes * self.expansion) +- self.bn3 = nn.BatchNorm2d(planes * self.expansion) +- self.relu = nn.ReLU(inplace=True) +- self.downsample = downsample +- self.stride = stride +- +- def forward(self, x): +- residual = x +- +- out = self.conv1(x) +- out = self.bn1(out) +- out = self.relu(out) +- +- out = self.conv2(out) +- out = self.bn2(out) +- out = self.relu(out) +- +- out = self.conv3(out) +- out = self.bn3(out) +- +- if self.downsample is not None: +- residual = self.downsample(x) +- +- out += residual +- out = self.relu(out) +- +- return out +- +- +-class ResNet(nn.Module): +- +- def __init__(self, block, layers): +- super(ResNet, self).__init__() +- self.inplanes = 64 +- self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, +- bias=False) +- self.bn1 = nn.BatchNorm2d(64) +- self.relu = nn.ReLU(inplace=True) +- self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) +- self.layer1 = self._make_layer(block, 64, layers[0]) +- self.layer2 = self._make_layer(block, 128, layers[1], stride=2) +- self.layer3 = self._make_layer(block, 256, layers[2], stride=2) +- self.layer4 = self._make_layer(block, 512, layers[3], stride=2) +- +- def _make_layer(self, block, planes, blocks, stride=1): +- downsample = None +- if stride != 1 or self.inplanes != planes * block.expansion: +- downsample = nn.Sequential( +- conv1x1(self.inplanes, planes * block.expansion, stride), +- nn.BatchNorm2d(planes * block.expansion), +- ) +- +- layers = [] +- layers.append(block(self.inplanes, planes, stride, downsample)) +- self.inplanes = planes * block.expansion +- for _ in range(1, blocks): +- layers.append(block(self.inplanes, planes)) +- +- return nn.Sequential(*layers) +- +- def forward(self, x): +- sources = [] +- +- x = self.conv1(x) +- x = self.bn1(x) +- x = self.relu(x) +- x = self.maxpool(x) +- +- x = self.layer1(x) +- sources += [x] +- +- x = self.layer2(x) +- sources += [x] +- +- x = self.layer3(x) +- sources += [x] +- +- x = self.layer4(x) +- sources += [x] +- +- return sources +- +- +-class FEM(nn.Module): +- """docstring for FEM""" +- +- def __init__(self, in_planes): +- super(FEM, self).__init__() +- inter_planes = in_planes // 3 +- inter_planes1 = in_planes - 2 * inter_planes +- self.branch1 = nn.Conv2d( +- in_planes, inter_planes, kernel_size=3, stride=1, padding=3, dilation=3) +- +- self.branch2 = nn.Sequential( +- nn.Conv2d(in_planes, inter_planes, kernel_size=3, +- stride=1, padding=3, dilation=3), +- nn.ReLU(inplace=True), +- nn.Conv2d(inter_planes, inter_planes, kernel_size=3, +- stride=1, padding=3, dilation=3) +- ) +- self.branch3 = nn.Sequential( +- nn.Conv2d(in_planes, inter_planes1, kernel_size=3, +- stride=1, padding=3, dilation=3), +- nn.ReLU(inplace=True), +- nn.Conv2d(inter_planes1, inter_planes1, kernel_size=3, +- stride=1, padding=3, dilation=3), +- nn.ReLU(inplace=True), +- nn.Conv2d(inter_planes1, inter_planes1, kernel_size=3, +- stride=1, padding=3, dilation=3) +- ) +- +- def forward(self, x): +- x1 = self.branch1(x) +- x2 = self.branch2(x) +- x3 = self.branch3(x) +- out = torch.cat((x1, x2, x3), dim=1) +- out = F.relu(out, inplace=True) +- return out +- +- +-class DSFD(nn.Module): +- """docstring for SRN""" +- +- def __init__(self, phase, base, extras, fem_modules, head1, head2, num_classes=2): +- super(DSFD, self).__init__() +- self.resnet = base +- self.phase = phase +- self.num_classes = num_classes +- self.extras = nn.ModuleList(extras) +- +- self.fpn_topdown = nn.ModuleList(fem_modules[0]) +- self.fpn_latlayer = nn.ModuleList(fem_modules[1]) +- self.fpn_fem = nn.ModuleList(fem_modules[2]) +- +- self.loc_pal1 = nn.ModuleList(head1[0]) +- self.conf_pal1 = nn.ModuleList(head1[1]) +- self.loc_pal2 = nn.ModuleList(head2[0]) +- self.conf_pal2 = nn.ModuleList(head2[1]) +- +- if self.phase == 'test': +- self.softmax = nn.Softmax(dim=-1) +- self.detect = Detect(cfg) +- +- def _upsample_prod(self, x, y): +- _, _, H, W = y.size() +- return F.upsample(x, size=(H, W), mode='bilinear') * y +- +- def forward(self, x): +- size = x.size()[2:] +- of1, of2, of3, of4 = self.resnet(x) +- +- x = of4 +- for i in range(2): +- x = F.relu(self.extras[i](x), inplace=True) +- of5 = x +- +- for i in range(2, len(self.extras)): +- x = F.relu(self.extras[i](x), inplace=True) +- of6 = x +- +- conv7 = F.relu(self.fpn_topdown[0](of6), inplace=True) +- +- x = F.relu(self.fpn_topdown[1](conv7), inplace=True) +- conv6 = F.relu(self._upsample_prod( +- x, self.fpn_latlayer[0](of5)), inplace=True) +- +- x = F.relu(self.fpn_topdown[2](conv6), inplace=True) +- conv5 = F.relu(self._upsample_prod( +- x, self.fpn_latlayer[1](of4)), inplace=True) +- +- x = F.relu(self.fpn_topdown[3](conv5), inplace=True) +- conv4 = F.relu(self._upsample_prod( +- x, self.fpn_latlayer[2](of3)), inplace=True) +- +- x = F.relu(self.fpn_topdown[4](conv4), inplace=True) +- conv3 = F.relu(self._upsample_prod( +- x, self.fpn_latlayer[3](of2)), inplace=True) +- +- x = F.relu(self.fpn_topdown[5](conv3), inplace=True) +- conv2 = F.relu(self._upsample_prod( +- x, self.fpn_latlayer[4](of1)), inplace=True) +- +- ef1 = self.fpn_fem[0](conv2) +- ef2 = self.fpn_fem[1](conv3) +- ef3 = self.fpn_fem[2](conv4) +- ef4 = self.fpn_fem[3](conv5) +- ef5 = self.fpn_fem[4](conv6) +- ef6 = self.fpn_fem[5](conv7) +- +- sources_pal1 = [of1, of2, of3, of4, of5, of6] +- sources_pal2 = [ef1, ef2, ef3, ef4, ef5, ef6] +- loc_pal1, conf_pal1 = list(), list() +- loc_pal2, conf_pal2 = list(), list() +- +- for (x, l, c) in zip(sources_pal1, self.loc_pal1, self.conf_pal1): +- loc_pal1.append(l(x).permute(0, 2, 3, 1).contiguous()) +- conf_pal1.append(c(x).permute(0, 2, 3, 1).contiguous()) +- +- for (x, l, c) in zip(sources_pal2, self.loc_pal2, self.conf_pal2): +- loc_pal2.append(l(x).permute(0, 2, 3, 1).contiguous()) +- conf_pal2.append(c(x).permute(0, 2, 3, 1).contiguous()) +- +- features_maps = [] +- for i in range(len(loc_pal1)): +- feat = [] +- feat += [loc_pal1[i].size(1), loc_pal1[i].size(2)] +- features_maps += [feat] +- +- loc_pal1 = torch.cat([o.view(o.size(0), -1) for o in loc_pal1], 1) +- conf_pal1 = torch.cat([o.view(o.size(0), -1) for o in conf_pal1], 1) +- +- loc_pal2 = torch.cat([o.view(o.size(0), -1) for o in loc_pal2], 1) +- conf_pal2 = torch.cat([o.view(o.size(0), -1) for o in conf_pal2], 1) +- +- priorbox = PriorBox(size, features_maps, cfg, pal=1) +- self.priors_pal1 = Variable(priorbox.forward(), volatile=True) +- +- priorbox = PriorBox(size, features_maps, cfg, pal=2) +- self.priors_pal2 = Variable(priorbox.forward(), volatile=True) +- +- if self.phase == 'test': +- output = self.detect( +- loc_pal2.view(loc_pal2.size(0), -1, 4), +- self.softmax(conf_pal2.view(conf_pal2.size(0), -1, +- self.num_classes)), # conf preds +- self.priors_pal2.type(type(x.data)) +- ) +- +- else: +- output = ( +- loc_pal1.view(loc_pal1.size(0), -1, 4), +- conf_pal1.view(conf_pal1.size(0), -1, self.num_classes), +- self.priors_pal1, +- loc_pal2.view(loc_pal2.size(0), -1, 4), +- conf_pal2.view(conf_pal2.size(0), -1, self.num_classes), +- self.priors_pal2) +- return output +- +- def load_weights(self, base_file): +- other, ext = os.path.splitext(base_file) +- if ext == '.pkl' or '.pth': +- print('Loading weights into state dict...') +- mdata = torch.load(base_file, +- map_location=lambda storage, loc: storage) +- weights = mdata['weight'] +- epoch = mdata['epoch'] +- self.load_state_dict(weights) +- print('Finished!') +- else: +- print('Sorry only .pth and .pkl files supported.') +- return epoch +- +- def xavier(self, param): +- init.xavier_uniform(param) +- +- def weights_init(self, m): +- if isinstance(m, nn.Conv2d): +- self.xavier(m.weight.data) +- m.bias.data.zero_() +- +- if isinstance(m, nn.ConvTranspose2d): +- self.xavier(m.weight.data) +- if 'bias' in m.state_dict().keys(): +- m.bias.data.zero_() +- +- if isinstance(m, nn.BatchNorm2d): +- m.weight.data[...] = 1 +- m.bias.data.zero_() +- +-extras_cfg = [256, 'S', 512, 128, 'S', 256] +- +-net_cfg = [256, 512, 1024, 2048, 512, 256] +- +- +-def add_extras(cfg, i): +- layers = [] +- in_channels = i +- flag = False +- for k, v in enumerate(cfg): +- if in_channels != 'S': +- if v == 'S': +- layers += [nn.Conv2d(in_channels, cfg[k + 1], +- kernel_size=(1, 3)[flag], stride=2, padding=1)] +- else: +- layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])] +- flag = not flag +- in_channels = v +- return layers +- +- +-def multibox(cfg, num_classes=2): +- conf_layers = [] +- loc_layers = [] +- for k, v in enumerate(cfg): +- loc_layers += [nn.Conv2d(v, 4, kernel_size=3, padding=1)] +- conf_layers += [nn.Conv2d(v, num_classes, kernel_size=3, padding=1)] +- return (loc_layers, conf_layers) +- +- +-def fem_module(cfg): +- topdown_layers = [] +- lat_layers = [] +- fem_layers = [] +- +- topdown_layers += [nn.Conv2d(cfg[-1], cfg[-1], +- kernel_size=1, stride=1, padding=0)] +- for k, v in enumerate(cfg): +- fem_layers += [FEM(v)] +- cur_channel = cfg[len(cfg) - 1 - k] +- if len(cfg) - 1 - k > 0: +- last_channel = cfg[len(cfg) - 2 - k] +- topdown_layers += [nn.Conv2d(cur_channel, last_channel, +- kernel_size=1, stride=1, padding=0)] +- lat_layers += [nn.Conv2d(last_channel, last_channel, +- kernel_size=1, stride=1, padding=0)] +- return (topdown_layers, lat_layers, fem_layers) +- +- +-def resnet50(): +- """Constructs a ResNet-50 model. +- Args: +- pretrained (bool): If True, returns a model pre-trained on ImageNet +- """ +- model = ResNet(Bottleneck, [3, 4, 6, 3]) +- return model +- +- +-def resnet101(): +- model = ResNet(Bottleneck, [3, 4, 23, 3]) +- return model +- +- +-def resnet152(): +- model = ResNet(Bottleneck, [3, 8, 36, 3]) +- return model +- +- +-def model_map(net_name='resnet50'): +- _dicts = {'resnet50': resnet50, +- 'resnet101': resnet101, 'resnet152': resnet152} +- return _dicts[net_name]() +- +- +-def build_net_resnet(phase, num_classes=2, net_name='resnet50'): +- resnet = model_map(net_name) +- extras = add_extras(extras_cfg, 2048) +- head_pal1 = multibox(net_cfg, num_classes) +- head_pal2 = multibox(net_cfg, num_classes) +- fem_modules = fem_module(net_cfg) +- model = DSFD(phase, resnet, extras, fem_modules, +- head_pal1, head_pal2, num_classes) +- return model +- +-if __name__ == '__main__': +- inputs = Variable(torch.randn(1, 3, 640, 640)) +- net = build_net('train', 2, 101) +- out = net(inputs) diff --git a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/models/DSFD_vgg.py b/contrib/ACL_PyTorch/Research/cv/detection/DSFD/models/DSFD_vgg.py deleted file mode 100644 index 8a842e593a..0000000000 --- a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/models/DSFD_vgg.py +++ /dev/null @@ -1,374 +0,0 @@ --# Copyright 2021 Huawei Technologies Co., Ltd --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. -- --#coding=utf-8 -- --from __future__ import division --from __future__ import absolute_import --from __future__ import print_function -- --import os --import numpy as np -- --import torch --import torch.nn as nn --import torch.nn.init as init --import torch.nn.functional as F --from torch.autograd import Variable -- --from layers import * --from data.config import cur_config as cfg -- -- --class FEM(nn.Module): -- """docstring for FEM""" -- -- def __init__(self, in_planes): -- super(FEM, self).__init__() -- inter_planes = in_planes // 3 -- inter_planes1 = in_planes - 2 * inter_planes -- self.branch1 = nn.Conv2d( -- in_planes, inter_planes, kernel_size=3, stride=1, padding=3, dilation=3) -- -- self.branch2 = nn.Sequential( -- nn.Conv2d(in_planes, inter_planes, kernel_size=3, -- stride=1, padding=3, dilation=3), -- nn.ReLU(inplace=True), -- nn.Conv2d(inter_planes, inter_planes, kernel_size=3, -- stride=1, padding=3, dilation=3) -- ) -- self.branch3 = nn.Sequential( -- nn.Conv2d(in_planes, inter_planes1, kernel_size=3, -- stride=1, padding=3, dilation=3), -- nn.ReLU(inplace=True), -- nn.Conv2d(inter_planes1, inter_planes1, kernel_size=3, -- stride=1, padding=3, dilation=3), -- nn.ReLU(inplace=True), -- nn.Conv2d(inter_planes1, inter_planes1, kernel_size=3, -- stride=1, padding=3, dilation=3) -- ) -- -- def forward(self, x): -- x1 = self.branch1(x) -- x2 = self.branch2(x) -- x3 = self.branch3(x) -- out = torch.cat((x1, x2, x3), dim=1) -- out = F.relu(out, inplace=True) -- return out -- -- --class DSFD(nn.Module): -- """Single Shot Multibox Architecture -- The network is composed of a base VGG network followed by the -- added multibox conv layers. Each multibox layer branches into -- 1) conv2d for class conf scores -- 2) conv2d for localization predictions -- 3) associated priorbox layer to produce default bounding -- boxes specific to the layer's feature map size. -- See: https://arxiv.org/pdf/1512.02325.pdf for more details. -- -- Args: -- phase: (string) Can be "test" or "train" -- size: input image size -- base: VGG16 layers for input, size of either 300 or 500 -- extras: extra layers that feed to multibox loc and conf layers -- head: "multibox head" consists of loc and conf conv layers -- """ -- -- def __init__(self, phase, base, extras, fem, head1, head2, num_classes): -- super(DSFD, self).__init__() -- self.phase = phase -- self.num_classes = num_classes -- self.vgg = nn.ModuleList(base) -- -- self.L2Normof1 = L2Norm(256, 10) -- self.L2Normof2 = L2Norm(512, 8) -- self.L2Normof3 = L2Norm(512, 5) -- -- self.extras = nn.ModuleList(extras) -- self.fpn_topdown = nn.ModuleList(fem[0]) -- self.fpn_latlayer = nn.ModuleList(fem[1]) -- -- self.fpn_fem = nn.ModuleList(fem[2]) -- -- self.L2Normef1 = L2Norm(256, 10) -- self.L2Normef2 = L2Norm(512, 8) -- self.L2Normef3 = L2Norm(512, 5) -- -- self.loc_pal1 = nn.ModuleList(head1[0]) -- self.conf_pal1 = nn.ModuleList(head1[1]) -- -- self.loc_pal2 = nn.ModuleList(head2[0]) -- self.conf_pal2 = nn.ModuleList(head2[1]) -- -- if self.phase=='test': -- self.softmax = nn.Softmax(dim=-1) -- self.detect = Detect(cfg) -- -- def _upsample_prod(self, x, y): -- _, _, H, W = y.size() -- return F.upsample(x, size=(H, W), mode='bilinear') * y -- -- def forward(self, x): -- size = x.size()[2:] -- pal1_sources = list() -- pal2_sources = list() -- loc_pal1 = list() -- conf_pal1 = list() -- loc_pal2 = list() -- conf_pal2 = list() -- -- # apply vgg up to conv4_3 relu -- for k in range(16): -- x = self.vgg[k](x) -- of1 = x -- s = self.L2Normof1(of1) -- pal1_sources.append(s) -- # apply vgg up to fc7 -- for k in range(16, 23): -- x = self.vgg[k](x) -- of2 = x -- s = self.L2Normof2(of2) -- pal1_sources.append(s) -- -- for k in range(23, 30): -- x = self.vgg[k](x) -- of3 = x -- s = self.L2Normof3(of3) -- pal1_sources.append(s) -- -- for k in range(30, len(self.vgg)): -- x = self.vgg[k](x) -- of4 = x -- pal1_sources.append(of4) -- # apply extra layers and cache source layer outputs -- -- for k in range(2): -- x = F.relu(self.extras[k](x), inplace=True) -- of5 = x -- pal1_sources.append(of5) -- for k in range(2, 4): -- x = F.relu(self.extras[k](x), inplace=True) -- of6 = x -- pal1_sources.append(of6) -- -- conv7 = F.relu(self.fpn_topdown[0](of6), inplace=True) -- -- x = F.relu(self.fpn_topdown[1](conv7), inplace=True) -- conv6 = F.relu(self._upsample_prod( -- x, self.fpn_latlayer[0](of5)), inplace=True) -- -- x = F.relu(self.fpn_topdown[2](conv6), inplace=True) -- convfc7_2 = F.relu(self._upsample_prod( -- x, self.fpn_latlayer[1](of4)), inplace=True) -- -- x = F.relu(self.fpn_topdown[3](convfc7_2), inplace=True) -- conv5 = F.relu(self._upsample_prod( -- x, self.fpn_latlayer[2](of3)), inplace=True) -- -- x = F.relu(self.fpn_topdown[4](conv5), inplace=True) -- conv4 = F.relu(self._upsample_prod( -- x, self.fpn_latlayer[3](of2)), inplace=True) -- -- x = F.relu(self.fpn_topdown[5](conv4), inplace=True) -- conv3 = F.relu(self._upsample_prod( -- x, self.fpn_latlayer[4](of1)), inplace=True) -- -- ef1 = self.fpn_fem[0](conv3) -- ef1 = self.L2Normef1(ef1) -- ef2 = self.fpn_fem[1](conv4) -- ef2 = self.L2Normef2(ef2) -- ef3 = self.fpn_fem[2](conv5) -- ef3 = self.L2Normef3(ef3) -- ef4 = self.fpn_fem[3](convfc7_2) -- ef5 = self.fpn_fem[4](conv6) -- ef6 = self.fpn_fem[5](conv7) -- -- pal2_sources = (ef1, ef2, ef3, ef4, ef5, ef6) -- for (x, l, c) in zip(pal1_sources, self.loc_pal1, self.conf_pal1): -- loc_pal1.append(l(x).permute(0, 2, 3, 1).contiguous()) -- conf_pal1.append(c(x).permute(0, 2, 3, 1).contiguous()) -- -- for (x, l, c) in zip(pal2_sources, self.loc_pal2, self.conf_pal2): -- loc_pal2.append(l(x).permute(0, 2, 3, 1).contiguous()) -- conf_pal2.append(c(x).permute(0, 2, 3, 1).contiguous()) -- -- features_maps = [] -- for i in range(len(loc_pal1)): -- feat = [] -- feat += [loc_pal1[i].size(1), loc_pal1[i].size(2)] -- features_maps += [feat] -- -- loc_pal1 = torch.cat([o.view(o.size(0), -1) -- for o in loc_pal1], 1) -- conf_pal1 = torch.cat([o.view(o.size(0), -1) -- for o in conf_pal1], 1) -- -- loc_pal2 = torch.cat([o.view(o.size(0), -1) -- for o in loc_pal2], 1) -- conf_pal2 = torch.cat([o.view(o.size(0), -1) -- for o in conf_pal2], 1) -- -- priorbox = PriorBox(size, features_maps, cfg, pal=1) -- self.priors_pal1 = Variable(priorbox.forward(), volatile=True) -- -- priorbox = PriorBox(size, features_maps, cfg, pal=2) -- self.priors_pal2 = Variable(priorbox.forward(), volatile=True) -- -- if self.phase == 'test': -- output = self.detect( -- loc_pal2.view(loc_pal2.size(0), -1, 4), -- self.softmax(conf_pal2.view(conf_pal2.size(0), -1, -- self.num_classes)), # conf preds -- self.priors_pal2.type(type(x.data)) -- ) -- -- else: -- output = ( -- loc_pal1.view(loc_pal1.size(0), -1, 4), -- conf_pal1.view(conf_pal1.size(0), -1, self.num_classes), -- self.priors_pal1, -- loc_pal2.view(loc_pal2.size(0), -1, 4), -- conf_pal2.view(conf_pal2.size(0), -1, self.num_classes), -- self.priors_pal2) -- return output -- -- def load_weights(self, base_file): -- other, ext = os.path.splitext(base_file) -- if ext == '.pkl' or '.pth': -- print('Loading weights into state dict...') -- mdata = torch.load(base_file, -- map_location=lambda storage, loc: storage) -- weights = mdata['weight'] -- epoch = mdata['epoch'] -- self.load_state_dict(weights) -- print('Finished!') -- else: -- print('Sorry only .pth and .pkl files supported.') -- return epoch -- -- def xavier(self, param): -- init.xavier_uniform(param) -- -- def weights_init(self, m): -- if isinstance(m, nn.Conv2d): -- self.xavier(m.weight.data) -- m.bias.data.zero_() -- -- if isinstance(m,nn.ConvTranspose2d): -- self.xavier(m.weight.data) -- if 'bias' in m.state_dict().keys(): -- m.bias.data.zero_() -- -- if isinstance(m,nn.BatchNorm2d): -- m.weight.data[...] = 1 -- m.bias.data.zero_() -- -- --vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', -- 512, 512, 512, 'M'] -- --extras_cfg = [256, 'S', 512, 128, 'S', 256] -- --fem_cfg = [256, 512, 512, 1024, 512, 256] -- -- --def fem_module(cfg): -- topdown_layers = [] -- lat_layers = [] -- fem_layers = [] -- -- topdown_layers += [nn.Conv2d(cfg[-1], cfg[-1], -- kernel_size=1, stride=1, padding=0)] -- for k, v in enumerate(cfg): -- fem_layers += [FEM(v)] -- cur_channel = cfg[len(cfg) - 1 - k] -- if len(cfg) - 1 - k > 0: -- last_channel = cfg[len(cfg) - 2 - k] -- topdown_layers += [nn.Conv2d(cur_channel, last_channel, -- kernel_size=1, stride=1, padding=0)] -- lat_layers += [nn.Conv2d(last_channel, last_channel, -- kernel_size=1, stride=1, padding=0)] -- return (topdown_layers, lat_layers, fem_layers) -- -- --def vgg(cfg, i, batch_norm=False): -- layers = [] -- in_channels = i -- for v in cfg: -- if v == 'M': -- layers += [nn.MaxPool2d(kernel_size=2, stride=2)] -- elif v == 'C': -- layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)] -- else: -- conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) -- if batch_norm: -- layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] -- else: -- layers += [conv2d, nn.ReLU(inplace=True)] -- in_channels = v -- conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=3, dilation=3) -- conv7 = nn.Conv2d(1024, 1024, kernel_size=1) -- layers += [conv6, -- nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)] -- return layers -- -- --def add_extras(cfg, i, batch_norm=False): -- # Extra layers added to VGG for feature scaling -- layers = [] -- in_channels = i -- flag = False -- for k, v in enumerate(cfg): -- if in_channels != 'S': -- if v == 'S': -- layers += [nn.Conv2d(in_channels, cfg[k + 1], -- kernel_size=(1, 3)[flag], stride=2, padding=1)] -- else: -- layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])] -- flag = not flag -- in_channels = v -- return layers -- -- --def multibox(vgg, extra_layers, num_classes): -- loc_layers = [] -- conf_layers = [] -- vgg_source = [14, 21, 28, -2] -- -- for k, v in enumerate(vgg_source): -- loc_layers += [nn.Conv2d(vgg[v].out_channels, -- 4, kernel_size=3, padding=1)] -- conf_layers += [nn.Conv2d(vgg[v].out_channels, -- num_classes, kernel_size=3, padding=1)] -- for k, v in enumerate(extra_layers[1::2], 2): -- loc_layers += [nn.Conv2d(v.out_channels, -- 4, kernel_size=3, padding=1)] -- conf_layers += [nn.Conv2d(v.out_channels, -- num_classes, kernel_size=3, padding=1)] -- return (loc_layers, conf_layers) -- -- --def build_net_vgg(phase, num_classes=2): -- base = vgg(vgg_cfg, 3) -- extras = add_extras(extras_cfg, 1024) -- head1 = multibox(base, extras, num_classes) -- head2 = multibox(base, extras, num_classes) -- fem = fem_module(fem_cfg) -- return DSFD(phase, base, extras, fem, head1, head2, num_classes) -- --if __name__ == '__main__': -- inputs = Variable(torch.randn(1, 3, 640, 640)) -- net = build_net('train', 2) -- out = net(inputs) +-# Copyright 2021 Huawei Technologies Co., Ltd +-# +-# Licensed under the Apache License, Version 2.0 (the "License"); +-# you may not use this file except in compliance with the License. +-# You may obtain a copy of the License at +-# +-# http://www.apache.org/licenses/LICENSE-2.0 +-# +-# Unless required by applicable law or agreed to in writing, software +-# distributed under the License is distributed on an "AS IS" BASIS, +-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-# See the License for the specific language governing permissions and +-# limitations under the License. +- +-#coding=utf-8 +- +-from __future__ import division +-from __future__ import absolute_import +-from __future__ import print_function +- +-import os +-import numpy as np +- +-import torch +-import torch.nn as nn +-import torch.nn.init as init +-import torch.nn.functional as F +-from torch.autograd import Variable +- +-from layers import * +-from data.config import cur_config as cfg +- +- +-class FEM(nn.Module): +- """docstring for FEM""" +- +- def __init__(self, in_planes): +- super(FEM, self).__init__() +- inter_planes = in_planes // 3 +- inter_planes1 = in_planes - 2 * inter_planes +- self.branch1 = nn.Conv2d( +- in_planes, inter_planes, kernel_size=3, stride=1, padding=3, dilation=3) +- +- self.branch2 = nn.Sequential( +- nn.Conv2d(in_planes, inter_planes, kernel_size=3, +- stride=1, padding=3, dilation=3), +- nn.ReLU(inplace=True), +- nn.Conv2d(inter_planes, inter_planes, kernel_size=3, +- stride=1, padding=3, dilation=3) +- ) +- self.branch3 = nn.Sequential( +- nn.Conv2d(in_planes, inter_planes1, kernel_size=3, +- stride=1, padding=3, dilation=3), +- nn.ReLU(inplace=True), +- nn.Conv2d(inter_planes1, inter_planes1, kernel_size=3, +- stride=1, padding=3, dilation=3), +- nn.ReLU(inplace=True), +- nn.Conv2d(inter_planes1, inter_planes1, kernel_size=3, +- stride=1, padding=3, dilation=3) +- ) +- +- def forward(self, x): +- x1 = self.branch1(x) +- x2 = self.branch2(x) +- x3 = self.branch3(x) +- out = torch.cat((x1, x2, x3), dim=1) +- out = F.relu(out, inplace=True) +- return out +- +- +-class DSFD(nn.Module): +- """Single Shot Multibox Architecture +- The network is composed of a base VGG network followed by the +- added multibox conv layers. Each multibox layer branches into +- 1) conv2d for class conf scores +- 2) conv2d for localization predictions +- 3) associated priorbox layer to produce default bounding +- boxes specific to the layer's feature map size. +- See: https://arxiv.org/pdf/1512.02325.pdf for more details. +- +- Args: +- phase: (string) Can be "test" or "train" +- size: input image size +- base: VGG16 layers for input, size of either 300 or 500 +- extras: extra layers that feed to multibox loc and conf layers +- head: "multibox head" consists of loc and conf conv layers +- """ +- +- def __init__(self, phase, base, extras, fem, head1, head2, num_classes): +- super(DSFD, self).__init__() +- self.phase = phase +- self.num_classes = num_classes +- self.vgg = nn.ModuleList(base) +- +- self.L2Normof1 = L2Norm(256, 10) +- self.L2Normof2 = L2Norm(512, 8) +- self.L2Normof3 = L2Norm(512, 5) +- +- self.extras = nn.ModuleList(extras) +- self.fpn_topdown = nn.ModuleList(fem[0]) +- self.fpn_latlayer = nn.ModuleList(fem[1]) +- +- self.fpn_fem = nn.ModuleList(fem[2]) +- +- self.L2Normef1 = L2Norm(256, 10) +- self.L2Normef2 = L2Norm(512, 8) +- self.L2Normef3 = L2Norm(512, 5) +- +- self.loc_pal1 = nn.ModuleList(head1[0]) +- self.conf_pal1 = nn.ModuleList(head1[1]) +- +- self.loc_pal2 = nn.ModuleList(head2[0]) +- self.conf_pal2 = nn.ModuleList(head2[1]) +- +- if self.phase=='test': +- self.softmax = nn.Softmax(dim=-1) +- self.detect = Detect(cfg) +- +- def _upsample_prod(self, x, y): +- _, _, H, W = y.size() +- return F.upsample(x, size=(H, W), mode='bilinear') * y +- +- def forward(self, x): +- size = x.size()[2:] +- pal1_sources = list() +- pal2_sources = list() +- loc_pal1 = list() +- conf_pal1 = list() +- loc_pal2 = list() +- conf_pal2 = list() +- +- # apply vgg up to conv4_3 relu +- for k in range(16): +- x = self.vgg[k](x) +- of1 = x +- s = self.L2Normof1(of1) +- pal1_sources.append(s) +- # apply vgg up to fc7 +- for k in range(16, 23): +- x = self.vgg[k](x) +- of2 = x +- s = self.L2Normof2(of2) +- pal1_sources.append(s) +- +- for k in range(23, 30): +- x = self.vgg[k](x) +- of3 = x +- s = self.L2Normof3(of3) +- pal1_sources.append(s) +- +- for k in range(30, len(self.vgg)): +- x = self.vgg[k](x) +- of4 = x +- pal1_sources.append(of4) +- # apply extra layers and cache source layer outputs +- +- for k in range(2): +- x = F.relu(self.extras[k](x), inplace=True) +- of5 = x +- pal1_sources.append(of5) +- for k in range(2, 4): +- x = F.relu(self.extras[k](x), inplace=True) +- of6 = x +- pal1_sources.append(of6) +- +- conv7 = F.relu(self.fpn_topdown[0](of6), inplace=True) +- +- x = F.relu(self.fpn_topdown[1](conv7), inplace=True) +- conv6 = F.relu(self._upsample_prod( +- x, self.fpn_latlayer[0](of5)), inplace=True) +- +- x = F.relu(self.fpn_topdown[2](conv6), inplace=True) +- convfc7_2 = F.relu(self._upsample_prod( +- x, self.fpn_latlayer[1](of4)), inplace=True) +- +- x = F.relu(self.fpn_topdown[3](convfc7_2), inplace=True) +- conv5 = F.relu(self._upsample_prod( +- x, self.fpn_latlayer[2](of3)), inplace=True) +- +- x = F.relu(self.fpn_topdown[4](conv5), inplace=True) +- conv4 = F.relu(self._upsample_prod( +- x, self.fpn_latlayer[3](of2)), inplace=True) +- +- x = F.relu(self.fpn_topdown[5](conv4), inplace=True) +- conv3 = F.relu(self._upsample_prod( +- x, self.fpn_latlayer[4](of1)), inplace=True) +- +- ef1 = self.fpn_fem[0](conv3) +- ef1 = self.L2Normef1(ef1) +- ef2 = self.fpn_fem[1](conv4) +- ef2 = self.L2Normef2(ef2) +- ef3 = self.fpn_fem[2](conv5) +- ef3 = self.L2Normef3(ef3) +- ef4 = self.fpn_fem[3](convfc7_2) +- ef5 = self.fpn_fem[4](conv6) +- ef6 = self.fpn_fem[5](conv7) +- +- pal2_sources = (ef1, ef2, ef3, ef4, ef5, ef6) +- for (x, l, c) in zip(pal1_sources, self.loc_pal1, self.conf_pal1): +- loc_pal1.append(l(x).permute(0, 2, 3, 1).contiguous()) +- conf_pal1.append(c(x).permute(0, 2, 3, 1).contiguous()) +- +- for (x, l, c) in zip(pal2_sources, self.loc_pal2, self.conf_pal2): +- loc_pal2.append(l(x).permute(0, 2, 3, 1).contiguous()) +- conf_pal2.append(c(x).permute(0, 2, 3, 1).contiguous()) +- +- features_maps = [] +- for i in range(len(loc_pal1)): +- feat = [] +- feat += [loc_pal1[i].size(1), loc_pal1[i].size(2)] +- features_maps += [feat] +- +- loc_pal1 = torch.cat([o.view(o.size(0), -1) +- for o in loc_pal1], 1) +- conf_pal1 = torch.cat([o.view(o.size(0), -1) +- for o in conf_pal1], 1) +- +- loc_pal2 = torch.cat([o.view(o.size(0), -1) +- for o in loc_pal2], 1) +- conf_pal2 = torch.cat([o.view(o.size(0), -1) +- for o in conf_pal2], 1) +- +- priorbox = PriorBox(size, features_maps, cfg, pal=1) +- self.priors_pal1 = Variable(priorbox.forward(), volatile=True) +- +- priorbox = PriorBox(size, features_maps, cfg, pal=2) +- self.priors_pal2 = Variable(priorbox.forward(), volatile=True) +- +- if self.phase == 'test': +- output = self.detect( +- loc_pal2.view(loc_pal2.size(0), -1, 4), +- self.softmax(conf_pal2.view(conf_pal2.size(0), -1, +- self.num_classes)), # conf preds +- self.priors_pal2.type(type(x.data)) +- ) +- +- else: +- output = ( +- loc_pal1.view(loc_pal1.size(0), -1, 4), +- conf_pal1.view(conf_pal1.size(0), -1, self.num_classes), +- self.priors_pal1, +- loc_pal2.view(loc_pal2.size(0), -1, 4), +- conf_pal2.view(conf_pal2.size(0), -1, self.num_classes), +- self.priors_pal2) +- return output +- +- def load_weights(self, base_file): +- other, ext = os.path.splitext(base_file) +- if ext == '.pkl' or '.pth': +- print('Loading weights into state dict...') +- mdata = torch.load(base_file, +- map_location=lambda storage, loc: storage) +- weights = mdata['weight'] +- epoch = mdata['epoch'] +- self.load_state_dict(weights) +- print('Finished!') +- else: +- print('Sorry only .pth and .pkl files supported.') +- return epoch +- +- def xavier(self, param): +- init.xavier_uniform(param) +- +- def weights_init(self, m): +- if isinstance(m, nn.Conv2d): +- self.xavier(m.weight.data) +- m.bias.data.zero_() +- +- if isinstance(m,nn.ConvTranspose2d): +- self.xavier(m.weight.data) +- if 'bias' in m.state_dict().keys(): +- m.bias.data.zero_() +- +- if isinstance(m,nn.BatchNorm2d): +- m.weight.data[...] = 1 +- m.bias.data.zero_() +- +- +-vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', +- 512, 512, 512, 'M'] +- +-extras_cfg = [256, 'S', 512, 128, 'S', 256] +- +-fem_cfg = [256, 512, 512, 1024, 512, 256] +- +- +-def fem_module(cfg): +- topdown_layers = [] +- lat_layers = [] +- fem_layers = [] +- +- topdown_layers += [nn.Conv2d(cfg[-1], cfg[-1], +- kernel_size=1, stride=1, padding=0)] +- for k, v in enumerate(cfg): +- fem_layers += [FEM(v)] +- cur_channel = cfg[len(cfg) - 1 - k] +- if len(cfg) - 1 - k > 0: +- last_channel = cfg[len(cfg) - 2 - k] +- topdown_layers += [nn.Conv2d(cur_channel, last_channel, +- kernel_size=1, stride=1, padding=0)] +- lat_layers += [nn.Conv2d(last_channel, last_channel, +- kernel_size=1, stride=1, padding=0)] +- return (topdown_layers, lat_layers, fem_layers) +- +- +-def vgg(cfg, i, batch_norm=False): +- layers = [] +- in_channels = i +- for v in cfg: +- if v == 'M': +- layers += [nn.MaxPool2d(kernel_size=2, stride=2)] +- elif v == 'C': +- layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)] +- else: +- conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) +- if batch_norm: +- layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] +- else: +- layers += [conv2d, nn.ReLU(inplace=True)] +- in_channels = v +- conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=3, dilation=3) +- conv7 = nn.Conv2d(1024, 1024, kernel_size=1) +- layers += [conv6, +- nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)] +- return layers +- +- +-def add_extras(cfg, i, batch_norm=False): +- # Extra layers added to VGG for feature scaling +- layers = [] +- in_channels = i +- flag = False +- for k, v in enumerate(cfg): +- if in_channels != 'S': +- if v == 'S': +- layers += [nn.Conv2d(in_channels, cfg[k + 1], +- kernel_size=(1, 3)[flag], stride=2, padding=1)] +- else: +- layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])] +- flag = not flag +- in_channels = v +- return layers +- +- +-def multibox(vgg, extra_layers, num_classes): +- loc_layers = [] +- conf_layers = [] +- vgg_source = [14, 21, 28, -2] +- +- for k, v in enumerate(vgg_source): +- loc_layers += [nn.Conv2d(vgg[v].out_channels, +- 4, kernel_size=3, padding=1)] +- conf_layers += [nn.Conv2d(vgg[v].out_channels, +- num_classes, kernel_size=3, padding=1)] +- for k, v in enumerate(extra_layers[1::2], 2): +- loc_layers += [nn.Conv2d(v.out_channels, +- 4, kernel_size=3, padding=1)] +- conf_layers += [nn.Conv2d(v.out_channels, +- num_classes, kernel_size=3, padding=1)] +- return (loc_layers, conf_layers) +- +- +-def build_net_vgg(phase, num_classes=2): +- base = vgg(vgg_cfg, 3) +- extras = add_extras(extras_cfg, 1024) +- head1 = multibox(base, extras, num_classes) +- head2 = multibox(base, extras, num_classes) +- fem = fem_module(fem_cfg) +- return DSFD(phase, base, extras, fem, head1, head2, num_classes) +- +-if __name__ == '__main__': +- inputs = Variable(torch.randn(1, 3, 640, 640)) +- net = build_net('train', 2) +- out = net(inputs) diff --git a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/models/__init__.py b/contrib/ACL_PyTorch/Research/cv/detection/DSFD/models/__init__.py deleted file mode 100644 index 89552b1d3f..0000000000 @@ -8417,53 +8417,53 @@ index b42d14b830..0000000000 --- a/contrib/ACL_PyTorch/Research/cv/detection/DSFD/models/factory.py +++ /dev/null @@ -1,50 +0,0 @@ --# Copyright 2021 Huawei Technologies Co., Ltd --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. -- --# -*- coding: utf-8 -*- --from __future__ import division --from __future__ import print_function --from __future__ import absolute_import -- --import torch --import torch.backends.cudnn as cudnn -- --#from .DSFD_vgg import build_net_vgg --from .DSFD_resnet import build_net_resnet -- -- --def build_net(phase, num_classes=2, model='vgg'): -- if phase != "test" and phase != "train": -- print("ERROR: Phase: " + phase + " not recognized") -- return -- -- if model != 'vgg' and 'resnet' not in model: -- print("ERROR: model:" + model + " not recognized") -- return -- -- if model == 'vgg': -- return build_net_vgg(phase, num_classes) -- else: -- return build_net_resnet(phase, num_classes, model) -- -- -- --def basenet_factory(model='vgg'): -- if model=='vgg': -- basenet = 'vgg16_reducedfc.pth' -- -- elif 'resnet' in model: -- basenet = '{}.pth'.format(model) -- return basenet -- +-# Copyright 2021 Huawei Technologies Co., Ltd +-# +-# Licensed under the Apache License, Version 2.0 (the "License"); +-# you may not use this file except in compliance with the License. +-# You may obtain a copy of the License at +-# +-# http://www.apache.org/licenses/LICENSE-2.0 +-# +-# Unless required by applicable law or agreed to in writing, software +-# distributed under the License is distributed on an "AS IS" BASIS, +-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-# See the License for the specific language governing permissions and +-# limitations under the License. +- +-# -*- coding: utf-8 -*- +-from __future__ import division +-from __future__ import print_function +-from __future__ import absolute_import +- +-import torch +-import torch.backends.cudnn as cudnn +- +-#from .DSFD_vgg import build_net_vgg +-from .DSFD_resnet import build_net_resnet +- +- +-def build_net(phase, num_classes=2, model='vgg'): +- if phase != "test" and phase != "train": +- print("ERROR: Phase: " + phase + " not recognized") +- return +- +- if model != 'vgg' and 'resnet' not in model: +- print("ERROR: model:" + model + " not recognized") +- return +- +- if model == 'vgg': +- return build_net_vgg(phase, num_classes) +- else: +- return build_net_resnet(phase, num_classes, model) +- +- +- +-def basenet_factory(model='vgg'): +- if model=='vgg': +- basenet = 'vgg16_reducedfc.pth' +- +- elif 'resnet' in model: +- basenet = '{}.pth'.format(model) +- return basenet +- diff --git a/ACL_PyTorch/contrib/cv/detection/DSFD/README.md b/ACL_PyTorch/contrib/cv/detection/DSFD/README.md index eec06e18abe7c8999d4e7c500b7e757972e245e3..b9e504b8425e0a15fdad9b823d5877b9f86fcfe5 100644 --- a/ACL_PyTorch/contrib/cv/detection/DSFD/README.md +++ b/ACL_PyTorch/contrib/cv/detection/DSFD/README.md @@ -1,104 +1,104 @@ - - - - -# 概述 - -FaceDetection-DSFD是通用场景下的人脸检测模型,采用FSSD+Resnet的网络结构,加入FEM 模块进一步增强不同感受野的目标特征,实现高准确率的人脸检测 - -## 1.环境准备 - - - -1.安装必要的依赖 - -```python -pip install -r requirements.txt -source env.sh -``` - -2.执行 eval_tols/dsfd_acc_eval.py 之前先执行以下命令 - -``` -cd eval_tools -python setup.py build_ext --inplace -``` - -3.获取权重文件 - -[pth模型链接](链接:https://pan.baidu.com/s/1DKNAKusuSh8O_91xvpCtWw 提取码:i468) 下载后放在根目录下 - -4.获取推理图像集 放在 opt/npu/目录下 - -[推理图像数据集](链接:https://pan.baidu.com/s/1KvpfjR0U8KUJnY7Gw5vLnQ 提取码:e3lu) - -5.获取benchmark工具 - -将benchmark.x86_64或benchmark.aarch64放到主目录下 - -``` -chmod a+x benchmark.x86_64 -``` - -6.进行数据预处理 - -```python -python dsfd_preprocess.py --src_path '/opt/npu/WIDERFace/WIDER_val/images/' #主目录下产生info_result.info文件 -``` - - - -## 2.模型转换 - -1.进行pth转onnx模型 - -``` -cd test -python dsfd_pth2onnx.py --model_path '../dsfd.pth' -``` - -[onnx文件链接](链接:https://pan.baidu.com/s/1HR5Ur5-KjNYlVJnJ6JOdVg 提取码:yqep) 生成的onnx模型文件在test文件夹下 - -2.进行onnx模型转om模型 - -cd到test目录下执行以下命令 - -``` -bash onnx2om.sh -``` - -生成的om模型在上一层 onnx2om 文件夹下 - -## 3.离线推理 - -1.将得到om模型后进行模型性能推理,在310上运行,先执行npu-smi info查看设备状态,确保device空闲 - -``` -cd test -bsah om_inference.sh #产生文件在 result/dumpOutput_device0 -``` - -2.进行模型精度统计 - -eval_tools文件夹内要含有 ground_truth相关文件 - -``` -cd eval_tools -python dsfd_acc_eval.py -p '../result/dumpOutput_device0/' -g './ground_truth/' -``` - -3.模型推理性能及精度 - -| Model | Batch Size | 310 (FPS/Card) | T4 (FPS/Card) | 310/T4 | -| ----- | ---------- | -------------- | ------------- | --------- | -| DSFD | 1 | *206* | *168* | *206/168* | -| DSFD | 4 | *262* | *314* | *262/314* | -| DSFD | 8 | *286* | *380* | *286/380* | -| DSFD | 16 | *306* | *425* | *306/425* | -| DSFD | 32 | *305* | *427* | *305/427* | - - - -| Framework | Atlas NPU Model | Server | Container | Precision | Dataset | Accuracy | Ascend AI Processor | NPU Version | -| --------- | ---------------- | --------------- | --------- | --------- | ---------- | ------------------------------------------------------------ | -------------------- | -------------------- | + + + + +# 概述 + +FaceDetection-DSFD是通用场景下的人脸检测模型,采用FSSD+Resnet的网络结构,加入FEM 模块进一步增强不同感受野的目标特征,实现高准确率的人脸检测 + +## 1.环境准备 + + + +1.安装必要的依赖 + +```python +pip install -r requirements.txt +source env.sh +``` + +2.执行 eval_tols/dsfd_acc_eval.py 之前先执行以下命令 + +``` +cd eval_tools +python setup.py build_ext --inplace +``` + +3.获取权重文件 + +[pth模型链接](链接:https://pan.baidu.com/s/1DKNAKusuSh8O_91xvpCtWw 提取码:i468) 下载后放在根目录下 + +4.获取推理图像集 放在 opt/npu/目录下 + +[推理图像数据集](链接:https://pan.baidu.com/s/1KvpfjR0U8KUJnY7Gw5vLnQ 提取码:e3lu) + +5.获取benchmark工具 + +将benchmark.x86_64或benchmark.aarch64放到主目录下 + +``` +chmod a+x benchmark.x86_64 +``` + +6.进行数据预处理 + +```python +python dsfd_preprocess.py --src_path '/opt/npu/WIDERFace/WIDER_val/images/' #主目录下产生info_result.info文件 +``` + + + +## 2.模型转换 + +1.进行pth转onnx模型 + +``` +cd test +python dsfd_pth2onnx.py --model_path '../dsfd.pth' +``` + +[onnx文件链接](链接:https://pan.baidu.com/s/1HR5Ur5-KjNYlVJnJ6JOdVg 提取码:yqep) 生成的onnx模型文件在test文件夹下 + +2.进行onnx模型转om模型 + +cd到test目录下执行以下命令 + +``` +bash onnx2om.sh +``` + +生成的om模型在上一层 onnx2om 文件夹下 + +## 3.离线推理 + +1.将得到om模型后进行模型性能推理,在310上运行,先执行npu-smi info查看设备状态,确保device空闲 + +``` +cd test +bsah om_inference.sh #产生文件在 result/dumpOutput_device0 +``` + +2.进行模型精度统计 + +eval_tools文件夹内要含有 ground_truth相关文件 + +``` +cd eval_tools +python dsfd_acc_eval.py -p '../result/dumpOutput_device0/' -g './ground_truth/' +``` + +3.模型推理性能及精度 + +| Model | Batch Size | 310 (FPS/Card) | T4 (FPS/Card) | 310/T4 | +| ----- | ---------- | -------------- | ------------- | --------- | +| DSFD | 1 | *206* | *168* | *206/168* | +| DSFD | 4 | *262* | *314* | *262/314* | +| DSFD | 8 | *286* | *380* | *286/380* | +| DSFD | 16 | *306* | *425* | *306/425* | +| DSFD | 32 | *305* | *427* | *305/427* | + + + +| Framework | Atlas NPU Model | Server | Container | Precision | Dataset | Accuracy | Ascend AI Processor | NPU Version | +| --------- | ---------------- | --------------- | --------- | --------- | ---------- | ------------------------------------------------------------ | -------------------- | -------------------- | | PyTorch | Atlas 300-3010 | Atlas 800-3010 | NA | fp16 | WIDER FACE | Easy Val AP: 0.9443 Medium Val AP: 0.9347 Hard Val AP: 0.8645 | Ascend 310 | Atlas 300-3010-32GB | \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/DSFD/data/config.py b/ACL_PyTorch/contrib/cv/detection/DSFD/data/config.py index 07097d89e4bc726098f35a0e85244fe004cdd9e9..55f151623b05aca0734ecd3eea527a9f7caf27fd 100644 --- a/ACL_PyTorch/contrib/cv/detection/DSFD/data/config.py +++ b/ACL_PyTorch/contrib/cv/detection/DSFD/data/config.py @@ -1,91 +1,91 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#coding=utf-8 - -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - -import os -from easydict import EasyDict -import numpy as np - - -class Config(object): - # data augument config - expand_prob = 0.5 - expand_max_ratio = 4 - hue_prob = 0.5 - hue_delta = 18 - contrast_prob = 0.5 - contrast_delta = 0.5 - saturation_prob = 0.5 - saturation_delta = 0.5 - brightness_prob = 0.5 - brightness_delta = 0.125 - data_anchor_sampling_prob = 0.5 - min_face_size = 6.0 - apply_distort = True - apply_expand = False - img_mean = np.array([104., 117., 123.])[:, np.newaxis, np.newaxis].astype( - 'float32') - resize_width = 640 - resize_height = 640 - scale = 1 / 127.0 - anchor_sampling = True - filter_min_face = True - - # train config - LR_STEPS = (80000,100000,120000) - MAX_STEPS = 150000 - EPOCHES = 100 - - # anchor config - FEATURE_MAPS = [160, 80, 40, 20, 10, 5] - INPUT_SIZE = 640 - STEPS = [4, 8, 16, 32, 64, 128] - ANCHOR_SIZES1 = [8, 16, 32, 64, 128, 256] - ANCHOR_SIZES2 = [16, 32, 64, 128, 256, 512] - ASPECT_RATIO = [1.0] - CLIP = False - VARIANCE = [0.1, 0.2] - - # detection config - NMS_THRESH = 0.3 - NMS_TOP_K = 5000 - TOP_K = 750 - CONF_THRESH = 0.05 - - # loss config - NEG_POS_RATIOS = 3 - NUM_CLASSES = 2 - - #multigpu - MultiGPU_ID =[0, 1] - - # dataset config - HOME = '/data/deling/DSFD/Data/' - - # face config - FACE = EasyDict() - FACE_TRAIN_FILE = '/data/deling/DSFD/FaceDetection-DSFD_Full/val_data/face_train.txt' #进行训练图片集合,由 prepare_wide_data.pyd得到 - FACE_VAL_FILE = '/data/deling/DSFD/FaceDetection-DSFD_Full/val_data/face_val.txt' #进行验证图片集合 - FACE_FDDB_DIR = '' - FACE_WIDER_DIR = '/data/deling/DSFD/Data' - FACE_AFW_DIR = '' - FACE_PASCAL_DIR = '' - FACE.OVERLAP_THRESH = 0.35 - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#coding=utf-8 + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +import os +from easydict import EasyDict +import numpy as np + + +class Config(object): + # data augument config + expand_prob = 0.5 + expand_max_ratio = 4 + hue_prob = 0.5 + hue_delta = 18 + contrast_prob = 0.5 + contrast_delta = 0.5 + saturation_prob = 0.5 + saturation_delta = 0.5 + brightness_prob = 0.5 + brightness_delta = 0.125 + data_anchor_sampling_prob = 0.5 + min_face_size = 6.0 + apply_distort = True + apply_expand = False + img_mean = np.array([104., 117., 123.])[:, np.newaxis, np.newaxis].astype( + 'float32') + resize_width = 640 + resize_height = 640 + scale = 1 / 127.0 + anchor_sampling = True + filter_min_face = True + + # train config + LR_STEPS = (80000,100000,120000) + MAX_STEPS = 150000 + EPOCHES = 100 + + # anchor config + FEATURE_MAPS = [160, 80, 40, 20, 10, 5] + INPUT_SIZE = 640 + STEPS = [4, 8, 16, 32, 64, 128] + ANCHOR_SIZES1 = [8, 16, 32, 64, 128, 256] + ANCHOR_SIZES2 = [16, 32, 64, 128, 256, 512] + ASPECT_RATIO = [1.0] + CLIP = False + VARIANCE = [0.1, 0.2] + + # detection config + NMS_THRESH = 0.3 + NMS_TOP_K = 5000 + TOP_K = 750 + CONF_THRESH = 0.05 + + # loss config + NEG_POS_RATIOS = 3 + NUM_CLASSES = 2 + + #multigpu + MultiGPU_ID =[0, 1] + + # dataset config + HOME = '/data/deling/DSFD/Data/' + + # face config + FACE = EasyDict() + FACE_TRAIN_FILE = '/data/deling/DSFD/FaceDetection-DSFD_Full/val_data/face_train.txt' #进行训练图片集合,由 prepare_wide_data.pyd得到 + FACE_VAL_FILE = '/data/deling/DSFD/FaceDetection-DSFD_Full/val_data/face_val.txt' #进行验证图片集合 + FACE_FDDB_DIR = '' + FACE_WIDER_DIR = '/data/deling/DSFD/Data' + FACE_AFW_DIR = '' + FACE_PASCAL_DIR = '' + FACE.OVERLAP_THRESH = 0.35 + cur_config = Config() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/DSFD/data/widerface.py b/ACL_PyTorch/contrib/cv/detection/DSFD/data/widerface.py index 02ec1019014b899aecce1dd0d3c0ac33ac717d50..56523c88a62f8b4cb5c3bc43dd5fee5fe2106c89 100644 --- a/ACL_PyTorch/contrib/cv/detection/DSFD/data/widerface.py +++ b/ACL_PyTorch/contrib/cv/detection/DSFD/data/widerface.py @@ -1,132 +1,132 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#coding=utf-8 - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import cv2 -import torch -from PIL import Image, ImageDraw -import torch.utils.data as data -import numpy as np -import random -from utils.augmentations import preprocess - - -class WIDERDetection(data.Dataset): - """docstring for WIDERDetection""" - - def __init__(self, list_file, mode='train'): - super(WIDERDetection, self).__init__() - self.mode = mode - self.fnames = [] - self.boxes = [] - self.labels = [] - - with open(list_file) as f: - lines = f.readlines() - - for line in lines: - line = line.strip().split() - num_faces = int(line[1]) - box = [] - label = [] - for i in range(num_faces): - x = float(line[2 + 5 * i]) - y = float(line[3 + 5 * i]) - w = float(line[4 + 5 * i]) - h = float(line[5 + 5 * i]) - c = int(line[6 + 5 * i]) - if w <= 0 or h <= 0: - continue - box.append([x, y, x + w, y + h]) - label.append(c) - if len(box) > 0: - self.fnames.append(line[0]) - self.boxes.append(box) - self.labels.append(label) - - self.num_samples = len(self.boxes) - - def __len__(self): - return self.num_samples - - def __getitem__(self, index): - img, target, h, w = self.pull_item(index) - return img, target - - def pull_item(self, index): - while True: - image_path = self.fnames[index] - img = Image.open(image_path) - #img = cv2.imread(image_path) - - img = img.convert('RGB') - im_width, im_height = img.size - #im_width, im_height = img.shape[0],img.shape[1] - boxes = self.annotransform( - np.array(self.boxes[index]), im_width, im_height) - label = np.array(self.labels[index]) - bbox_labels = np.hstack((label[:, np.newaxis], boxes)).tolist() - img, sample_labels = preprocess( - img, bbox_labels, self.mode, image_path) - sample_labels = np.array(sample_labels) - if len(sample_labels) > 0: - target = np.hstack( - (sample_labels[:, 1:], sample_labels[:, 0][:, np.newaxis])) - - assert (target[:, 2] > target[:, 0]).any() - assert (target[:, 3] > target[:, 1]).any() - break - else: - index = random.randrange(0, self.num_samples) - return torch.from_numpy(img), target, im_height, im_width - - - def annotransform(self, boxes, im_width, im_height): - boxes[:, 0] /= im_width - boxes[:, 1] /= im_height - boxes[:, 2] /= im_width - boxes[:, 3] /= im_height - return boxes - - -def detection_collate(batch): - """Custom collate fn for dealing with batches of images that have a different - number of associated object annotations (bounding boxes). - - Arguments: - batch: (tuple) A tuple of tensor images and lists of annotations - - Return: - A tuple containing: - 1) (tensor) batch of images stacked on their 0 dim - 2) (list of tensors) annotations for a given image are stacked on - 0 dim - """ - targets = [] - imgs = [] - for sample in batch: - imgs.append(sample[0]) - targets.append(torch.FloatTensor(sample[1])) - return torch.stack(imgs, 0), targets - - -if __name__ == '__main__': - from config import cfg - dataset = WIDERDetection(cfg.FACE_TRAIN_FILE) - dataset.pull_item(14) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#coding=utf-8 + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import cv2 +import torch +from PIL import Image, ImageDraw +import torch.utils.data as data +import numpy as np +import random +from utils.augmentations import preprocess + + +class WIDERDetection(data.Dataset): + """docstring for WIDERDetection""" + + def __init__(self, list_file, mode='train'): + super(WIDERDetection, self).__init__() + self.mode = mode + self.fnames = [] + self.boxes = [] + self.labels = [] + + with open(list_file) as f: + lines = f.readlines() + + for line in lines: + line = line.strip().split() + num_faces = int(line[1]) + box = [] + label = [] + for i in range(num_faces): + x = float(line[2 + 5 * i]) + y = float(line[3 + 5 * i]) + w = float(line[4 + 5 * i]) + h = float(line[5 + 5 * i]) + c = int(line[6 + 5 * i]) + if w <= 0 or h <= 0: + continue + box.append([x, y, x + w, y + h]) + label.append(c) + if len(box) > 0: + self.fnames.append(line[0]) + self.boxes.append(box) + self.labels.append(label) + + self.num_samples = len(self.boxes) + + def __len__(self): + return self.num_samples + + def __getitem__(self, index): + img, target, h, w = self.pull_item(index) + return img, target + + def pull_item(self, index): + while True: + image_path = self.fnames[index] + img = Image.open(image_path) + #img = cv2.imread(image_path) + + img = img.convert('RGB') + im_width, im_height = img.size + #im_width, im_height = img.shape[0],img.shape[1] + boxes = self.annotransform( + np.array(self.boxes[index]), im_width, im_height) + label = np.array(self.labels[index]) + bbox_labels = np.hstack((label[:, np.newaxis], boxes)).tolist() + img, sample_labels = preprocess( + img, bbox_labels, self.mode, image_path) + sample_labels = np.array(sample_labels) + if len(sample_labels) > 0: + target = np.hstack( + (sample_labels[:, 1:], sample_labels[:, 0][:, np.newaxis])) + + assert (target[:, 2] > target[:, 0]).any() + assert (target[:, 3] > target[:, 1]).any() + break + else: + index = random.randrange(0, self.num_samples) + return torch.from_numpy(img), target, im_height, im_width + + + def annotransform(self, boxes, im_width, im_height): + boxes[:, 0] /= im_width + boxes[:, 1] /= im_height + boxes[:, 2] /= im_width + boxes[:, 3] /= im_height + return boxes + + +def detection_collate(batch): + """Custom collate fn for dealing with batches of images that have a different + number of associated object annotations (bounding boxes). + + Arguments: + batch: (tuple) A tuple of tensor images and lists of annotations + + Return: + A tuple containing: + 1) (tensor) batch of images stacked on their 0 dim + 2) (list of tensors) annotations for a given image are stacked on + 0 dim + """ + targets = [] + imgs = [] + for sample in batch: + imgs.append(sample[0]) + targets.append(torch.FloatTensor(sample[1])) + return torch.stack(imgs, 0), targets + + +if __name__ == '__main__': + from config import cfg + dataset = WIDERDetection(cfg.FACE_TRAIN_FILE) + dataset.pull_item(14) diff --git a/ACL_PyTorch/contrib/cv/detection/DSFD/dsfd_preprocess.py b/ACL_PyTorch/contrib/cv/detection/DSFD/dsfd_preprocess.py index 2bf581b94001abfc491972850eef3bc0cd494dcd..78a886cbf4ef129e75ed8ab2ed8761c965c9c4d8 100644 --- a/ACL_PyTorch/contrib/cv/detection/DSFD/dsfd_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/DSFD/dsfd_preprocess.py @@ -1,64 +1,64 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#coding=utf-8 - -import os -from PIL import Image -import numpy as np -from glob import glob -from torchvision import datasets, transforms -import argparse - -parser = argparse.ArgumentParser(description="trans pth to onnx usage") -parser.add_argument( '--src_path', type=str, default='/home/datasets/WIDERFace/WIDER_val/images/', help='Default val data location(default: %(default)s)') -args = parser.parse_args() - -def img2bin(src_path, save_path): - preprocess = transforms.Compose([ - transforms.Resize(256, Image.BICUBIC), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ]) - - i = 0 - in_files = os.listdir(src_path) - for file in in_files: - i = i + 1 - print(file, "===",i) - files = os.listdir(src_path + '/' + file) - for re_file in files: - img_file = src_path + "/" + file + "/" + re_file - input_image = Image.open(img_file).convert('RGB') - input_tensor = preprocess(input_image) - img = np.array(input_tensor).astype(np.float32) - img.tofile(os.path.join(save_path, re_file.split('.')[0] + ".bin")) - -def bin2info(bin_dir, info_data, width, height): - bin_images = glob(os.path.join(bin_dir, '*.bin')) - with open(info_data, 'w') as file: - for index, img in enumerate(bin_images): - print('str(index)',str(index), 'img', img) - img = "./bin_out" + img.split("bin_out")[1] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - -if __name__ == "__main__": - - bin_path = "./bin_out/" - info_path = "info_result.info" - img2bin(args.src_path, bin_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#coding=utf-8 + +import os +from PIL import Image +import numpy as np +from glob import glob +from torchvision import datasets, transforms +import argparse + +parser = argparse.ArgumentParser(description="trans pth to onnx usage") +parser.add_argument( '--src_path', type=str, default='/home/datasets/WIDERFace/WIDER_val/images/', help='Default val data location(default: %(default)s)') +args = parser.parse_args() + +def img2bin(src_path, save_path): + preprocess = transforms.Compose([ + transforms.Resize(256, Image.BICUBIC), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + + i = 0 + in_files = os.listdir(src_path) + for file in in_files: + i = i + 1 + print(file, "===",i) + files = os.listdir(src_path + '/' + file) + for re_file in files: + img_file = src_path + "/" + file + "/" + re_file + input_image = Image.open(img_file).convert('RGB') + input_tensor = preprocess(input_image) + img = np.array(input_tensor).astype(np.float32) + img.tofile(os.path.join(save_path, re_file.split('.')[0] + ".bin")) + +def bin2info(bin_dir, info_data, width, height): + bin_images = glob(os.path.join(bin_dir, '*.bin')) + with open(info_data, 'w') as file: + for index, img in enumerate(bin_images): + print('str(index)',str(index), 'img', img) + img = "./bin_out" + img.split("bin_out")[1] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + +if __name__ == "__main__": + + bin_path = "./bin_out/" + info_path = "info_result.info" + img2bin(args.src_path, bin_path) bin2info(bin_path, info_path,224,224) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/DSFD/eval_tools/dsfd_acc_eval.py b/ACL_PyTorch/contrib/cv/detection/DSFD/eval_tools/dsfd_acc_eval.py index 7ccd8164bd969ec55991eeda606101ed5a43156d..b512b1556f57ad20832ea2effebc690cb498b45a 100644 --- a/ACL_PyTorch/contrib/cv/detection/DSFD/eval_tools/dsfd_acc_eval.py +++ b/ACL_PyTorch/contrib/cv/detection/DSFD/eval_tools/dsfd_acc_eval.py @@ -1,392 +1,392 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#coding=utf-8 - -import os -import tqdm -import pickle -import argparse -import numpy as np -from scipy.io import loadmat -from bbox import bbox_overlaps -import torch -import time - -#from IPython import embed - -def get_gt_boxes(gt_dir): - """ gt dir: (wider_face_val.mat, wider_easy_val.mat, wider_medium_val.mat, wider_hard_val.mat)""" - - gt_mat = loadmat(os.path.join(gt_dir, 'wider_face_val.mat')) - hard_mat = loadmat(os.path.join(gt_dir, 'wider_hard_val.mat')) - medium_mat = loadmat(os.path.join(gt_dir, 'wider_medium_val.mat')) - easy_mat = loadmat(os.path.join(gt_dir, 'wider_easy_val.mat')) - - facebox_list = gt_mat['face_bbx_list'] - event_list = gt_mat['event_list'] - file_list = gt_mat['file_list'] - - hard_gt_list = hard_mat['gt_list'] - medium_gt_list = medium_mat['gt_list'] - easy_gt_list = easy_mat['gt_list'] - - return facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list - - -def get_gt_boxes_from_txt(gt_path, cache_dir): - - cache_file = os.path.join(cache_dir, 'gt_cache.pkl') - if os.path.exists(cache_file): - f = open(cache_file, 'rb') - boxes = pickle.load(f) - f.close() - return boxes - - f = open(gt_path, 'r') - state = 0 - lines = f.readlines() - lines = list(map(lambda x: x.rstrip('\r\n'), lines)) - boxes = {} - print(len(lines)) - f.close() - current_boxes = [] - current_name = None - for line in lines: - if state == 0 and '--' in line: - state = 1 - current_name = line - continue - if state == 1: - state = 2 - continue - - if state == 2 and '--' in line: - state = 1 - boxes[current_name] = np.array(current_boxes).astype('float32') - current_name = line - current_boxes = [] - continue - - if state == 2: - box = [float(x) for x in line.split(' ')[:4]] - current_boxes.append(box) - continue - - f = open(cache_file, 'wb') - pickle.dump(boxes, f) - f.close() - return boxes - - -def read_pred_file(filepath): - with open(filepath, 'r') as f: - try: - lines = f.readlines() - img_file = filepath.split('/')[-1] #改写 - #lines = lines[2:] - except Exception as e: - print(str(e)) - - boxes = [] - for line in lines: - line = line.rstrip('\r\n').split(' ') - if line[0] is '': - continue - # a = float(line[4]) - boxes.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), float(line[4])]) - boxes = np.array(boxes) - # boxes = np.array(list(map(lambda x: [float(a) for a in x.rstrip('\r\n').split(' ')], lines))).astype('float') - return img_file.split('.')[0], boxes - -def get_preds(pred_dir): - events = os.listdir(pred_dir) - boxes = dict() - pbar = tqdm.tqdm(events, ncols=100) - pbar.set_description('Reading Predictions') - for event in pbar: - current_event = dict() - imgname, _boxes = read_pred_file(os.path.join(pred_dir, event)) - current_event[imgname.rstrip('.jpg')] = _boxes - boxes[event] = current_event - return boxes - - -def norm_score(pred): - """ norm score - pred {key: [[x1,y1,x2,y2,s]]} - """ - - max_score = 0 - min_score = 1 - - for _, k in pred.items(): - for _, v in k.items(): - if len(v) == 0: - continue - _min = np.min(v[:, -1]) - _max = np.max(v[:, -1]) - max_score = max(_max, max_score) - min_score = min(_min, min_score) - - diff = max_score - min_score - for _, k in pred.items(): - for _, v in k.items(): - if len(v) == 0: - continue - v[:, -1] = (v[:, -1] - min_score)/diff - - -def image_eval(pred, gt, ignore, iou_thresh): - """ single image evaluation - pred: Nx5 - gt: Nx4 - ignore: - """ - - _pred = list(pred.copy().values()) - _gt = gt.copy() - pred_recall = [] - recall_list = np.zeros(_gt.shape[0]) - proposal_list = np.ones((1,5)) - - _pred[:2] = _pred[:2] + _pred[:0] - _pred[:3] = _pred[:3] + _pred[:1] - _gt[:, 2] = _gt[:, 2] + _gt[:, 0] - _gt[:, 3] = _gt[:, 3] + _gt[:, 1] - - overlaps = bbox_overlaps(np.squeeze(np.array(_pred[:4]), axis=1), _gt) - for h in range(len(_pred)): - gt_overlap = overlaps[h] - max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax() - if max_overlap >= iou_thresh: - if ignore[max_idx] == 0: - recall_list[max_idx] = -1 - proposal_list[h] = -1 - elif recall_list[max_idx] == 0: - recall_list[max_idx] = 1 - r_keep_index = np.where(recall_list == 1)[0] - pred_recall.append(len(list(r_keep_index))) - return pred_recall, proposal_list - - -def img_pr_info(thresh_num, pred_info, proposal_list, pred_recall): - pr_info = np.zeros((thresh_num, 2)).astype('float') - pred_info = list(pred_info.copy().values()) - - for t in range(thresh_num): - thresh = 1 - (t+1)/thresh_num - r_index = np.where(np.array(pred_info[:4]) >= thresh) - if len(r_index) == 0: - pr_info[t, 0] = 0 - pr_info[t, 1] = 0 - else: - pr_info[t, 0] = 1 - pr_info[t, 1] = 1 - return pr_info - -def dataset_pr_info(thresh_num, pr_curve, count_face): - _pr_curve = np.zeros((thresh_num, 2)) - for i in range(thresh_num): - _pr_curve[i, 0] = pr_curve[i, 1] / pr_curve[i, 0] - _pr_curve[i, 1] = pr_curve[i, 1] / count_face - return _pr_curve - -def reprocess(res): - for i in range(len(res)): - if res[i] >= 0.3: - res[i] *= 2.93 - elif res[i] >= 0.15: - res[i] *= 5.5 - else: - res[i] *= 12.3 - return res - -def voc_ap(repr): - # correct AP calculation - # first append sentinel values at the end - aps = [] - for id in range(len(repr)): - # compute the precision envelope - if id == 0: - rec = [elem*6*1135 for elem in repr[id][0][0]] - elif id == 1: - rec = [elem*6*2075 for elem in repr[id][0][0]] - else: - rec = [elem*6*4605 for elem in repr[id][0][0]] - prec = repr[id][0][1] - mrec = np.concatenate(([0.], rec, [1.])) - mpre = np.concatenate(([0.], prec, [0.])) - - for i in range(mpre.size - 1, 0, -1): - mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) - i = np.where(mrec[1:] != mrec[:-1])[0] - ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) - aps.append(ap) - return aps - -def bbox_vote(det): - order = det[:, 4].ravel().argsort()[::-1] - det = det[order, :] - dets = np.zeros((0, 5), dtype=np.float32) - while det.shape[0] > 0: - # IOU - area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) - xx1 = np.maximum(det[0, 0], det[:, 0]) - yy1 = np.maximum(det[0, 1], det[:, 1]) - xx2 = np.minimum(det[0, 2], det[:, 2]) - yy2 = np.minimum(det[0, 3], det[:, 3]) - w = np.maximum(0.0, xx1 - xx2 + 1) - h = np.maximum(0.0, yy2 - yy1 + 1) - inter = w * h - o = inter / (area[0] + area[:] - inter) - - # get needed merge det and delete these det - merge_index = np.where(o >= 0.3)[0] #o>=0.3 - - det_accu = det[merge_index, :] - det = np.delete(det, merge_index, 0) - - if merge_index.shape[0] <= 1: - break - det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) - max_score = np.max(det_accu[:, 4]) - det_accu_sum = np.zeros((1, 5)) - det_accu_sum[:, 0:4] = np.sum( - det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) - det_accu_sum[:, 4] = max_score - try: - dets = np.row_stack((dets, det_accu_sum)) - except: - dets = det_accu_sum - - dets = dets[0:750, :] - return dets - -def tensor2txt(det, called_file): - dets = bbox_vote(det) - fout = os.path.join(args.save_path, called_file.split('/')[-1]) - if not os.path.exists(fout): - os.system(r"touch {}".format(fout)) - fout = open(fout ,'w') - - for i in range(dets.shape[0]): - xmin = dets[i][0] - ymin = dets[i][1] - xmax = dets[i][2] - ymax = dets[i][3] - score = dets[i][4] - fout.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score)) - - -def file2tensor(annotation_file): - filelist = os.listdir(annotation_file) - for annfile in filelist: - if annfile.endswith('_1.txt'): - print("process:", annfile) - called_file = annfile - annfile = os.path.join(annotation_file,annfile) - size = os.path.getsize(annfile) - res = [] - L = int(size / 4) - annfile = open(annfile, 'r+').readlines() - res = annfile[0].strip().split(' ') - res = list(map(float, res))[:390] - sum = 0.0 - for elem in res: - try: - sum += elem - except Exception as e: - print(str(e)) - dim_res = np.array(res).reshape(1, 2, -1, 5) - tensor_res = torch.tensor(dim_res, dtype=torch.float32) - detections = tensor_res - img = torch.randn([640,640]) - det_conf = detections[0, 1, :, 0] - shrink = 1 - det_xmin = img.shape[1] * detections[0, 1, :, 1] / shrink - det_ymin = img.shape[0] * detections[0, 1, :, 2] / shrink - det_xmax = img.shape[1] * detections[0, 1, :, 3] / shrink - det_ymax = img.shape[0] * detections[0, 1, :, 4] / shrink - det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf)) - - keep_index = np.where(det[:, 4] >= args.thresh)[0] - det = det[keep_index, :] - tensor2txt(det, called_file) - - -def evaluation(pred, gt_path, iou_thresh=0.5): - facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list = get_gt_boxes(gt_path) - event_num = len(event_list) - thresh_num = 1000 - settings = ['easy', 'medium', 'hard'] - setting_gts = [easy_gt_list, medium_gt_list, hard_gt_list] - file2tensor(pred) - pred = get_preds(args.save_path) - norm_score(pred) - repr = [] - - for setting_id in range(len(settings)): - # different setting - gt_list = setting_gts[setting_id] - count_face = 0 - pr_curve = np.zeros((thresh_num, 2)).astype('float') - tmp_inf = [] - # [hard, medium, easy] - pbar = tqdm.tqdm(range(event_num), ncols=100) - pbar.set_description('Processing {}'.format(settings[setting_id])) - for i in pbar: - img_list = file_list[i][0] - sub_gt_list = gt_list[i][0] - gt_bbx_list = facebox_list[i][0] - for j in range(len(img_list)): - pred_info = pred[str(img_list[j][0][0])+'_1.txt'] - gt_boxes = gt_bbx_list[j][0].astype('float') - keep_index = sub_gt_list[j][0] - count_face += len(keep_index) - if len(gt_boxes) == 0 or len(pred_info) == 0: - continue - ignore = np.zeros(gt_boxes.shape[0]) - if len(keep_index) != 0: - ignore[keep_index-1] = 1 - try: - pred_recall, proposal_list = image_eval(pred_info, gt_boxes, ignore, iou_thresh) - _img_pr_info = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall) - pr_curve += _img_pr_info - except: - pass - pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face) - - recall = pr_curve[:, 1] - propose = pr_curve[:, 0] - tmp_inf.append([recall,propose]) - repr.append(tmp_inf) - aps = voc_ap(repr) - - print(time.asctime( time.localtime(time.time()))) - print("==================== Results ====================") - print("Easy Val AP: {}".format(aps[0])) - print("Medium Val AP: {}".format(aps[1])) - print("Hard Val AP: {}".format(aps[2])) - print("=================================================") - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('-p', '--pred', default="../result/dumpOutput_device0/") - parser.add_argument('-g', '--gt', default='./ground_truth/') - parser.add_argument('--thresh', default=0.05, type=float, help='Final confidence threshold') - parser.add_argument('-save_path', default='./infer_results/', help='Final confidence threshold') - args = parser.parse_args() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#coding=utf-8 + +import os +import tqdm +import pickle +import argparse +import numpy as np +from scipy.io import loadmat +from bbox import bbox_overlaps +import torch +import time + +#from IPython import embed + +def get_gt_boxes(gt_dir): + """ gt dir: (wider_face_val.mat, wider_easy_val.mat, wider_medium_val.mat, wider_hard_val.mat)""" + + gt_mat = loadmat(os.path.join(gt_dir, 'wider_face_val.mat')) + hard_mat = loadmat(os.path.join(gt_dir, 'wider_hard_val.mat')) + medium_mat = loadmat(os.path.join(gt_dir, 'wider_medium_val.mat')) + easy_mat = loadmat(os.path.join(gt_dir, 'wider_easy_val.mat')) + + facebox_list = gt_mat['face_bbx_list'] + event_list = gt_mat['event_list'] + file_list = gt_mat['file_list'] + + hard_gt_list = hard_mat['gt_list'] + medium_gt_list = medium_mat['gt_list'] + easy_gt_list = easy_mat['gt_list'] + + return facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list + + +def get_gt_boxes_from_txt(gt_path, cache_dir): + + cache_file = os.path.join(cache_dir, 'gt_cache.pkl') + if os.path.exists(cache_file): + f = open(cache_file, 'rb') + boxes = pickle.load(f) + f.close() + return boxes + + f = open(gt_path, 'r') + state = 0 + lines = f.readlines() + lines = list(map(lambda x: x.rstrip('\r\n'), lines)) + boxes = {} + print(len(lines)) + f.close() + current_boxes = [] + current_name = None + for line in lines: + if state == 0 and '--' in line: + state = 1 + current_name = line + continue + if state == 1: + state = 2 + continue + + if state == 2 and '--' in line: + state = 1 + boxes[current_name] = np.array(current_boxes).astype('float32') + current_name = line + current_boxes = [] + continue + + if state == 2: + box = [float(x) for x in line.split(' ')[:4]] + current_boxes.append(box) + continue + + f = open(cache_file, 'wb') + pickle.dump(boxes, f) + f.close() + return boxes + + +def read_pred_file(filepath): + with open(filepath, 'r') as f: + try: + lines = f.readlines() + img_file = filepath.split('/')[-1] #改写 + #lines = lines[2:] + except Exception as e: + print(str(e)) + + boxes = [] + for line in lines: + line = line.rstrip('\r\n').split(' ') + if line[0] is '': + continue + # a = float(line[4]) + boxes.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), float(line[4])]) + boxes = np.array(boxes) + # boxes = np.array(list(map(lambda x: [float(a) for a in x.rstrip('\r\n').split(' ')], lines))).astype('float') + return img_file.split('.')[0], boxes + +def get_preds(pred_dir): + events = os.listdir(pred_dir) + boxes = dict() + pbar = tqdm.tqdm(events, ncols=100) + pbar.set_description('Reading Predictions') + for event in pbar: + current_event = dict() + imgname, _boxes = read_pred_file(os.path.join(pred_dir, event)) + current_event[imgname.rstrip('.jpg')] = _boxes + boxes[event] = current_event + return boxes + + +def norm_score(pred): + """ norm score + pred {key: [[x1,y1,x2,y2,s]]} + """ + + max_score = 0 + min_score = 1 + + for _, k in pred.items(): + for _, v in k.items(): + if len(v) == 0: + continue + _min = np.min(v[:, -1]) + _max = np.max(v[:, -1]) + max_score = max(_max, max_score) + min_score = min(_min, min_score) + + diff = max_score - min_score + for _, k in pred.items(): + for _, v in k.items(): + if len(v) == 0: + continue + v[:, -1] = (v[:, -1] - min_score)/diff + + +def image_eval(pred, gt, ignore, iou_thresh): + """ single image evaluation + pred: Nx5 + gt: Nx4 + ignore: + """ + + _pred = list(pred.copy().values()) + _gt = gt.copy() + pred_recall = [] + recall_list = np.zeros(_gt.shape[0]) + proposal_list = np.ones((1,5)) + + _pred[:2] = _pred[:2] + _pred[:0] + _pred[:3] = _pred[:3] + _pred[:1] + _gt[:, 2] = _gt[:, 2] + _gt[:, 0] + _gt[:, 3] = _gt[:, 3] + _gt[:, 1] + + overlaps = bbox_overlaps(np.squeeze(np.array(_pred[:4]), axis=1), _gt) + for h in range(len(_pred)): + gt_overlap = overlaps[h] + max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax() + if max_overlap >= iou_thresh: + if ignore[max_idx] == 0: + recall_list[max_idx] = -1 + proposal_list[h] = -1 + elif recall_list[max_idx] == 0: + recall_list[max_idx] = 1 + r_keep_index = np.where(recall_list == 1)[0] + pred_recall.append(len(list(r_keep_index))) + return pred_recall, proposal_list + + +def img_pr_info(thresh_num, pred_info, proposal_list, pred_recall): + pr_info = np.zeros((thresh_num, 2)).astype('float') + pred_info = list(pred_info.copy().values()) + + for t in range(thresh_num): + thresh = 1 - (t+1)/thresh_num + r_index = np.where(np.array(pred_info[:4]) >= thresh) + if len(r_index) == 0: + pr_info[t, 0] = 0 + pr_info[t, 1] = 0 + else: + pr_info[t, 0] = 1 + pr_info[t, 1] = 1 + return pr_info + +def dataset_pr_info(thresh_num, pr_curve, count_face): + _pr_curve = np.zeros((thresh_num, 2)) + for i in range(thresh_num): + _pr_curve[i, 0] = pr_curve[i, 1] / pr_curve[i, 0] + _pr_curve[i, 1] = pr_curve[i, 1] / count_face + return _pr_curve + +def reprocess(res): + for i in range(len(res)): + if res[i] >= 0.3: + res[i] *= 2.93 + elif res[i] >= 0.15: + res[i] *= 5.5 + else: + res[i] *= 12.3 + return res + +def voc_ap(repr): + # correct AP calculation + # first append sentinel values at the end + aps = [] + for id in range(len(repr)): + # compute the precision envelope + if id == 0: + rec = [elem*6*1135 for elem in repr[id][0][0]] + elif id == 1: + rec = [elem*6*2075 for elem in repr[id][0][0]] + else: + rec = [elem*6*4605 for elem in repr[id][0][0]] + prec = repr[id][0][1] + mrec = np.concatenate(([0.], rec, [1.])) + mpre = np.concatenate(([0.], prec, [0.])) + + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + i = np.where(mrec[1:] != mrec[:-1])[0] + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + aps.append(ap) + return aps + +def bbox_vote(det): + order = det[:, 4].ravel().argsort()[::-1] + det = det[order, :] + dets = np.zeros((0, 5), dtype=np.float32) + while det.shape[0] > 0: + # IOU + area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) + xx1 = np.maximum(det[0, 0], det[:, 0]) + yy1 = np.maximum(det[0, 1], det[:, 1]) + xx2 = np.minimum(det[0, 2], det[:, 2]) + yy2 = np.minimum(det[0, 3], det[:, 3]) + w = np.maximum(0.0, xx1 - xx2 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + o = inter / (area[0] + area[:] - inter) + + # get needed merge det and delete these det + merge_index = np.where(o >= 0.3)[0] #o>=0.3 + + det_accu = det[merge_index, :] + det = np.delete(det, merge_index, 0) + + if merge_index.shape[0] <= 1: + break + det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) + max_score = np.max(det_accu[:, 4]) + det_accu_sum = np.zeros((1, 5)) + det_accu_sum[:, 0:4] = np.sum( + det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) + det_accu_sum[:, 4] = max_score + try: + dets = np.row_stack((dets, det_accu_sum)) + except: + dets = det_accu_sum + + dets = dets[0:750, :] + return dets + +def tensor2txt(det, called_file): + dets = bbox_vote(det) + fout = os.path.join(args.save_path, called_file.split('/')[-1]) + if not os.path.exists(fout): + os.system(r"touch {}".format(fout)) + fout = open(fout ,'w') + + for i in range(dets.shape[0]): + xmin = dets[i][0] + ymin = dets[i][1] + xmax = dets[i][2] + ymax = dets[i][3] + score = dets[i][4] + fout.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score)) + + +def file2tensor(annotation_file): + filelist = os.listdir(annotation_file) + for annfile in filelist: + if annfile.endswith('_1.txt'): + print("process:", annfile) + called_file = annfile + annfile = os.path.join(annotation_file,annfile) + size = os.path.getsize(annfile) + res = [] + L = int(size / 4) + annfile = open(annfile, 'r+').readlines() + res = annfile[0].strip().split(' ') + res = list(map(float, res))[:390] + sum = 0.0 + for elem in res: + try: + sum += elem + except Exception as e: + print(str(e)) + dim_res = np.array(res).reshape(1, 2, -1, 5) + tensor_res = torch.tensor(dim_res, dtype=torch.float32) + detections = tensor_res + img = torch.randn([640,640]) + det_conf = detections[0, 1, :, 0] + shrink = 1 + det_xmin = img.shape[1] * detections[0, 1, :, 1] / shrink + det_ymin = img.shape[0] * detections[0, 1, :, 2] / shrink + det_xmax = img.shape[1] * detections[0, 1, :, 3] / shrink + det_ymax = img.shape[0] * detections[0, 1, :, 4] / shrink + det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf)) + + keep_index = np.where(det[:, 4] >= args.thresh)[0] + det = det[keep_index, :] + tensor2txt(det, called_file) + + +def evaluation(pred, gt_path, iou_thresh=0.5): + facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list = get_gt_boxes(gt_path) + event_num = len(event_list) + thresh_num = 1000 + settings = ['easy', 'medium', 'hard'] + setting_gts = [easy_gt_list, medium_gt_list, hard_gt_list] + file2tensor(pred) + pred = get_preds(args.save_path) + norm_score(pred) + repr = [] + + for setting_id in range(len(settings)): + # different setting + gt_list = setting_gts[setting_id] + count_face = 0 + pr_curve = np.zeros((thresh_num, 2)).astype('float') + tmp_inf = [] + # [hard, medium, easy] + pbar = tqdm.tqdm(range(event_num), ncols=100) + pbar.set_description('Processing {}'.format(settings[setting_id])) + for i in pbar: + img_list = file_list[i][0] + sub_gt_list = gt_list[i][0] + gt_bbx_list = facebox_list[i][0] + for j in range(len(img_list)): + pred_info = pred[str(img_list[j][0][0])+'_1.txt'] + gt_boxes = gt_bbx_list[j][0].astype('float') + keep_index = sub_gt_list[j][0] + count_face += len(keep_index) + if len(gt_boxes) == 0 or len(pred_info) == 0: + continue + ignore = np.zeros(gt_boxes.shape[0]) + if len(keep_index) != 0: + ignore[keep_index-1] = 1 + try: + pred_recall, proposal_list = image_eval(pred_info, gt_boxes, ignore, iou_thresh) + _img_pr_info = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall) + pr_curve += _img_pr_info + except: + pass + pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face) + + recall = pr_curve[:, 1] + propose = pr_curve[:, 0] + tmp_inf.append([recall,propose]) + repr.append(tmp_inf) + aps = voc_ap(repr) + + print(time.asctime( time.localtime(time.time()))) + print("==================== Results ====================") + print("Easy Val AP: {}".format(aps[0])) + print("Medium Val AP: {}".format(aps[1])) + print("Hard Val AP: {}".format(aps[2])) + print("=================================================") + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--pred', default="../result/dumpOutput_device0/") + parser.add_argument('-g', '--gt', default='./ground_truth/') + parser.add_argument('--thresh', default=0.05, type=float, help='Final confidence threshold') + parser.add_argument('-save_path', default='./infer_results/', help='Final confidence threshold') + args = parser.parse_args() evaluation(args.pred, args.gt) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/DSFD/requirements.txt b/ACL_PyTorch/contrib/cv/detection/DSFD/requirements.txt index bf12959916f03c54d92b5926c818833539d77a42..994f395c0467a4a18a56ae3c3acdc1e931cf2cf4 100644 --- a/ACL_PyTorch/contrib/cv/detection/DSFD/requirements.txt +++ b/ACL_PyTorch/contrib/cv/detection/DSFD/requirements.txt @@ -1,9 +1,9 @@ -torch==1.5.0 -torchvision==0.6.0 -pillow -Cython -easydict -scipy==1.7.2 -opencv-python==4.5.3.56 -numpy -tqdm==4.62.2 +torch==1.5.0 +torchvision==0.6.0 +pillow +Cython +easydict +scipy==1.7.2 +opencv-python==4.5.3.56 +numpy +tqdm==4.62.2 diff --git a/ACL_PyTorch/contrib/cv/detection/DSFD/test/dsfd_pth2onnx.py b/ACL_PyTorch/contrib/cv/detection/DSFD/test/dsfd_pth2onnx.py index dbe7e85598af16bbde098cafc80d20e71c60f0d1..f8deb52e3f1882825797a50d4f3d56938545f9e8 100644 --- a/ACL_PyTorch/contrib/cv/detection/DSFD/test/dsfd_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/DSFD/test/dsfd_pth2onnx.py @@ -1,67 +1,67 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#coding=utf-8 -#torch.__version >= 1.3.0 -import sys -sys.path.append("..") - -import torch.onnx -from models.factory import build_net -import argparse - - -parser = argparse.ArgumentParser(description="trans pth to onnx usage") -parser.add_argument( '--model_path', type=str, default='../dsfd.pth', help='Default ph model location(default: %(default)s)') -args = parser.parse_args() - - -#Function to Convert to ONNX -def Convert_ONNX(model): - print("enter Convert_ONNX") - - # set the model to inference mode - model.eval() - - # 构建输入信息和输出信息 - input_names = ["image"] - output_names = ["modelOutput1", "modelOutput2", "modelOutput3", "modelOutput4", "modelOutput5", "modelOutput6"] - #dynamic_axes = {'image': {0: '4'}, 'modelOutput': {0: '-1'}} - dynamic_axes = {'image': {0: '4'}, 'modelOutput1': {0: '4'}, 'modelOutput2': {0: '4'}, 'modelOutput3': {0: '4'}, - 'modelOutput4': {0: '4'}, 'modelOutput5': {0: '4'},'modelOutput6': {0: '4'}} - #dynamic_axes = {'image': {0: '4'}, 'modelOutput': {0: '4'}} - dummy_input = torch.randn(4, 3, 224, 224) - - # 开始转换 - torch.onnx.export(model, - dummy_input, - "dsfd.onnx", - input_names=input_names, - dynamic_axes=dynamic_axes, - output_names=output_names, - opset_version=11, - verbose=True) - print("*************Convert to ONNX model file SUCCESS!*************") - - -if __name__ == '__main__': - - model = build_net('train', 2, 'resnet152') - model.load_state_dict(torch.load(args.model_path, map_location=torch.device('cpu'))) - Convert_ONNX(model) - - - - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#coding=utf-8 +#torch.__version >= 1.3.0 +import sys +sys.path.append("..") + +import torch.onnx +from models.factory import build_net +import argparse + + +parser = argparse.ArgumentParser(description="trans pth to onnx usage") +parser.add_argument( '--model_path', type=str, default='../dsfd.pth', help='Default ph model location(default: %(default)s)') +args = parser.parse_args() + + +#Function to Convert to ONNX +def Convert_ONNX(model): + print("enter Convert_ONNX") + + # set the model to inference mode + model.eval() + + # 构建输入信息和输出信息 + input_names = ["image"] + output_names = ["modelOutput1", "modelOutput2", "modelOutput3", "modelOutput4", "modelOutput5", "modelOutput6"] + #dynamic_axes = {'image': {0: '4'}, 'modelOutput': {0: '-1'}} + dynamic_axes = {'image': {0: '4'}, 'modelOutput1': {0: '4'}, 'modelOutput2': {0: '4'}, 'modelOutput3': {0: '4'}, + 'modelOutput4': {0: '4'}, 'modelOutput5': {0: '4'},'modelOutput6': {0: '4'}} + #dynamic_axes = {'image': {0: '4'}, 'modelOutput': {0: '4'}} + dummy_input = torch.randn(4, 3, 224, 224) + + # 开始转换 + torch.onnx.export(model, + dummy_input, + "dsfd.onnx", + input_names=input_names, + dynamic_axes=dynamic_axes, + output_names=output_names, + opset_version=11, + verbose=True) + print("*************Convert to ONNX model file SUCCESS!*************") + + +if __name__ == '__main__': + + model = build_net('train', 2, 'resnet152') + model.load_state_dict(torch.load(args.model_path, map_location=torch.device('cpu'))) + Convert_ONNX(model) + + + + + diff --git a/ACL_PyTorch/contrib/cv/detection/Deepspeech/README.md b/ACL_PyTorch/contrib/cv/detection/Deepspeech/README.md index 431abb974e76236b139c26b99d483f972ca56cf8..f441c12e87be4714f7fd7c4a416820e69f8975b2 100644 --- a/ACL_PyTorch/contrib/cv/detection/Deepspeech/README.md +++ b/ACL_PyTorch/contrib/cv/detection/Deepspeech/README.md @@ -1,253 +1,253 @@ -# DeepSpeech模型PyTorch离线推理指导 -- [1 环境说明](#1-环境说明) - - [1.1 环境搭建与使用说明](#11-环境搭建与使用说明) -- [2 推理流程](#2-推理流程) - - [2.1 获取开源PyTorch模型代码与权重文件](#21-获取开源PyTorch模型代码与权重文件) - - [2.2 导出onnx模型](#22-导出onnx模型) - - [2.3 转换为om模型](#23-转换为om模型) - - [2.4 数据集处理](#24-数据集处理) - - [2.5 离线推理](#25-离线推理) -- [3 精度统计](#3-精度统计) - - [3.1 离线推理精度](#31-离线推理精度) - - [3.2 精度对比](#32-精度对比) -- [4 性能对比](#4-性能对比) - - [4.1 npu性能数据](#41-npu性能数据) - - [4.2 gpu性能数据](#42-gpu性能数据) - - [4.3 性能数据对比](#43-性能数据对比) - - - - -## 1 环境说明 - -- **[环境搭建与使用说明](#11-环境搭建与使用说明)** - - -### 1.1 环境搭建与使用说明 - - -深度学习框架与第三方库 - -``` -python3.7.5 - -torch == 1.8.0 -torchaudio == 0.8.0 -torchvision == 0.9.0 -torchelastic == 0.2.2 - -onnx -onnxruntime -onnxoptimizer - -fairscale -flask -google-cloud-storage -hydra-core -jupyter -librosa -matplotlib -numpy -optuna -pytest -python-levenshtein -pytorch-lightning>=1.1 -scipy -sklearn -sox -tqdm -wget -git+https://github.com/romesco/hydra-lightning/#subdirectory=hydra-configs-pytorch-lightning -``` - -其中apex安装使用pip install会报错,应使用下述方式安装: -``` -git clone https://github.com/NVIDIA/apex.git -cd apex -python3 setup.py install --cpp_ext --cuda_ext -``` - -**说明:** -> -> X86架构:pytorch和torchvision可以通过官方下载whl包安装,其他可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和opencv可以通过github下载源码编译安装,其他可以通过pip3.7 install 包名 安装 -> -> 以上为多数网络需要安装的软件与推荐的版本,根据实际情况安装。如果python脚本运行过程中import 模块失败,安装相应模块即可,如果报错是缺少动态库,网上搜索报错信息找到相应安装包,执行apt-get install 包名安装即可 - - - -## 2 推理流程 - -- **[获取开源PyTorch模型代码与权重文件](#21-获取开源PyTorch模型代码与权重文件)** - -- **[导出onnx模型](#22-导出onnx模型)** - -- **[转换为om模型](#23-转换为om模型)** - -- **[数据集处理](#24-数据集处理)** - -- **[离线推理](#25-离线推理)** - - -### 2.1 获取开源PyTorch模型代码与权重文件 - -#### 2.1.1 基于开源PyTorch框架的Deepspeech开源模型代码 -``` -git clone https://github.com/SeanNaren/deepspeech.pytorch.git -b V3.0 -``` -#### 2.1.2 修改deepspeech.pytorch/deepspeech_pytorch/model.py - -#### 2.1.3 [下载ckpt权重文件](https://github.com/SeanNaren/deepspeech.pytorch/releases/download/V3.0/an4_pretrained_v3.ckpt) -``` -wget https://github.com/SeanNaren/deepspeech.pytorch/releases/download/V3.0/an4_pretrained_v3.ckpt -``` - - -### 2.2 导出onnx模型 - -#### 2.2.1 配置环境变量 - - 将env.sh文件放到根目录下 - - source环境变量 -``` -source env.sh -``` -#### 2.2.2 将ckpt2onnx.py放到根目录下 -#### 2.2.3 执行pth2onnx脚本,生成onnx模型文件 -``` -python3 ckpt2onnx.py --ckpt_path ./an4_pretrained_v3.ckpt --out_file deepspeech.onnx -``` -**说明:** -> -> --ckpt_path:ckpt权重文件 -> --out_file:生成的onnx文件名 -> - - -### 2.3 转换为om模型 - -#### 2.3.1 设置环境变量 -``` -source env.sh -``` -#### 2.3.2 使用Ascend atc工具将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 -``` -atc --framework=5 --model=./deepspeech.onnx --input_format=NCHW --input_shape="spect:1,1,161,621;transcript:1" --output=deepspeech_bs1 --log=debug --soc_version=Ascend310 -``` - - -### 2.4 数据集处理 - -#### 2.4.1 获取数据集 -获取AN4数据集 -``` -cd deepspeech.pytorch/data -python3 an4.py -cd ../.. -``` - -#### 2.4.2 数据预处理 - - 将预处理脚本deepspeech_preprocess.py放到根目录下 - - 执行预处理脚本,生成数据集预处理后的bin文件 -``` -python3 deepspeech_preprocess.py --data_file ./deepspeech.pytorch/data/an4_test_manifest.json --save_path ./deepspeech.pytorch/data/an4_dataset/test --label_file ./deepspeech.pytorch/labels.json -``` -**说明:** -> -> --data_file:存放数据路径的json文件 -> --save_path:预处理产生的bin文件存储路径(会在save_path目录下建立两个文件夹spect和sizes分别存放两组输入文件) -> --label_file: labels文件路径 -> - - -### 2.5 离线推理 - -#### 2.5.1 msame工具概述 -输入.om模型和模型所需要的输入bin文件,输出模型的输出数据文件,支持多次推理(指对同一输入数据进行推理)。 -模型必须是通过atc工具转换的om模型,输入bin文件需要符合模型的输入要求(支持模型多输入)。 -**说明:** -> -> benchmark工具暂不支持多输入,因此改用msame -> -#### 2.5.2 离线推理 - - [获取msame工具](https://gitee.com/ascend/tools/tree/master/msame) - - 执行离线推理 -``` -./msame --model "./deepspeech_bs1.om" --input "./deepspeech.pytorch/data/an4_dataset/test/spect,./deepspeech.pytorch/data/an4_dataset/test/sizes" --output "./deepspeech.pytorch/result" --outfmt TXT - -``` -**说明:** -> -> 将/tools/msame/msame文件复制到根目录下,执行上述命令,或直接在msame文件夹下执行命令,将input、output等路径改为绝对路径 -> 输出保存在--output路径下,会自动生成新文件夹 -> - - - -## 3 精度统计 - -- **[离线推理精度](#31-离线推理精度)** - -- **[精度对比](#32-精度对比)** - - -### 3.1 离线推理精度 - - 将后处理脚本deepspeech_postprocess.py放到根目录下 - - 调用后处理脚本产生推理结果 -``` -python3 deepspeech_postprocess.py --out_path ./deepspeech.pytorch/result --info_path ./deepspeech.pytorch/data/an4_dataset/test --label_file ./deepspeech.pytorch/labels.json -``` -**说明:** -> -> --out_path:离线推理输出的路径,是msame推理后的输出路径 -> --info_path:与执行数据预处理脚本deepspeech_preprocess.py时设置的--save_path一致 -> --label_file: labels文件路径 -> - -### 3.2 精度对比 - -| 模型 | 官网ckpt精度 | 310离线推理精度 | -| :------: | :------: | :------: | -| Deepspeech bs1 | [Average WER 9.573 Average CER 5.515](https://github.com/SeanNaren/deepspeech.pytorch/releases) | Average WER 9.573 Average CER 5.515 | - -**说明:** -> -> 将得到的om离线模型推理精度与该模型github代码仓上公布的精度对比,精度与之一致,故精度达标 -> - - - -## 4 性能对比 - -- **[npu性能数据](#41-npu性能数据)** - -- **[gpu性能数据](#42-gpu性能数据)** - -- **[性能优化](#43-性能优化)** - - -### 4.1 npu性能数据 -由于benchmark工具不支持多输入,改为使用msame进行om的离线推理。msame工具在推理时会输出每条数据运行的时间,计算10条数据运行的时间均值,作为性能的衡量标准。由于msame不支持多batch,因此以bs1的数据为准。 -``` -Run time of each data: 9.09s -performance: 0.11seq/s -``` - -### 4.2 gpu性能数据 -在装有T4卡的服务器上测试gpu性能,在GPU上进行在线推理,取5次运行的平均时长作为性能的衡量标准。 -``` -Run time of each data: 0.28s -performance: 3.44seq/s -``` - -### 4.3 性能优化 -使用性能分析工具profiling,查看了模型中每类算子总体耗时与百分比和模型每个算子的aicore耗时,发现DynamicRNN耗时最多,使用autotune进行性能优化,优化后性能如下: -``` -Run time of each data: 2.03s -performance: 0.49seq/s -``` -在此基础上,对TransData算子进行优化,优化后性能如下: -``` -Run time of each data: 1.41s -performance: 0.71seq/s +# DeepSpeech模型PyTorch离线推理指导 +- [1 环境说明](#1-环境说明) + - [1.1 环境搭建与使用说明](#11-环境搭建与使用说明) +- [2 推理流程](#2-推理流程) + - [2.1 获取开源PyTorch模型代码与权重文件](#21-获取开源PyTorch模型代码与权重文件) + - [2.2 导出onnx模型](#22-导出onnx模型) + - [2.3 转换为om模型](#23-转换为om模型) + - [2.4 数据集处理](#24-数据集处理) + - [2.5 离线推理](#25-离线推理) +- [3 精度统计](#3-精度统计) + - [3.1 离线推理精度](#31-离线推理精度) + - [3.2 精度对比](#32-精度对比) +- [4 性能对比](#4-性能对比) + - [4.1 npu性能数据](#41-npu性能数据) + - [4.2 gpu性能数据](#42-gpu性能数据) + - [4.3 性能数据对比](#43-性能数据对比) + + + + +## 1 环境说明 + +- **[环境搭建与使用说明](#11-环境搭建与使用说明)** + + +### 1.1 环境搭建与使用说明 + + +深度学习框架与第三方库 + +``` +python3.7.5 + +torch == 1.8.0 +torchaudio == 0.8.0 +torchvision == 0.9.0 +torchelastic == 0.2.2 + +onnx +onnxruntime +onnxoptimizer + +fairscale +flask +google-cloud-storage +hydra-core +jupyter +librosa +matplotlib +numpy +optuna +pytest +python-levenshtein +pytorch-lightning>=1.1 +scipy +sklearn +sox +tqdm +wget +git+https://github.com/romesco/hydra-lightning/#subdirectory=hydra-configs-pytorch-lightning +``` + +其中apex安装使用pip install会报错,应使用下述方式安装: +``` +git clone https://github.com/NVIDIA/apex.git +cd apex +python3 setup.py install --cpp_ext --cuda_ext +``` + +**说明:** +> +> X86架构:pytorch和torchvision可以通过官方下载whl包安装,其他可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和opencv可以通过github下载源码编译安装,其他可以通过pip3.7 install 包名 安装 +> +> 以上为多数网络需要安装的软件与推荐的版本,根据实际情况安装。如果python脚本运行过程中import 模块失败,安装相应模块即可,如果报错是缺少动态库,网上搜索报错信息找到相应安装包,执行apt-get install 包名安装即可 + + + +## 2 推理流程 + +- **[获取开源PyTorch模型代码与权重文件](#21-获取开源PyTorch模型代码与权重文件)** + +- **[导出onnx模型](#22-导出onnx模型)** + +- **[转换为om模型](#23-转换为om模型)** + +- **[数据集处理](#24-数据集处理)** + +- **[离线推理](#25-离线推理)** + + +### 2.1 获取开源PyTorch模型代码与权重文件 + +#### 2.1.1 基于开源PyTorch框架的Deepspeech开源模型代码 +``` +git clone https://github.com/SeanNaren/deepspeech.pytorch.git -b V3.0 +``` +#### 2.1.2 修改deepspeech.pytorch/deepspeech_pytorch/model.py + +#### 2.1.3 [下载ckpt权重文件](https://github.com/SeanNaren/deepspeech.pytorch/releases/download/V3.0/an4_pretrained_v3.ckpt) +``` +wget https://github.com/SeanNaren/deepspeech.pytorch/releases/download/V3.0/an4_pretrained_v3.ckpt +``` + + +### 2.2 导出onnx模型 + +#### 2.2.1 配置环境变量 + - 将env.sh文件放到根目录下 + - source环境变量 +``` +source env.sh +``` +#### 2.2.2 将ckpt2onnx.py放到根目录下 +#### 2.2.3 执行pth2onnx脚本,生成onnx模型文件 +``` +python3 ckpt2onnx.py --ckpt_path ./an4_pretrained_v3.ckpt --out_file deepspeech.onnx +``` +**说明:** +> +> --ckpt_path:ckpt权重文件 +> --out_file:生成的onnx文件名 +> + + +### 2.3 转换为om模型 + +#### 2.3.1 设置环境变量 +``` +source env.sh +``` +#### 2.3.2 使用Ascend atc工具将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 +``` +atc --framework=5 --model=./deepspeech.onnx --input_format=NCHW --input_shape="spect:1,1,161,621;transcript:1" --output=deepspeech_bs1 --log=debug --soc_version=Ascend310 +``` + + +### 2.4 数据集处理 + +#### 2.4.1 获取数据集 +获取AN4数据集 +``` +cd deepspeech.pytorch/data +python3 an4.py +cd ../.. +``` + +#### 2.4.2 数据预处理 + - 将预处理脚本deepspeech_preprocess.py放到根目录下 + - 执行预处理脚本,生成数据集预处理后的bin文件 +``` +python3 deepspeech_preprocess.py --data_file ./deepspeech.pytorch/data/an4_test_manifest.json --save_path ./deepspeech.pytorch/data/an4_dataset/test --label_file ./deepspeech.pytorch/labels.json +``` +**说明:** +> +> --data_file:存放数据路径的json文件 +> --save_path:预处理产生的bin文件存储路径(会在save_path目录下建立两个文件夹spect和sizes分别存放两组输入文件) +> --label_file: labels文件路径 +> + + +### 2.5 离线推理 + +#### 2.5.1 msame工具概述 +输入.om模型和模型所需要的输入bin文件,输出模型的输出数据文件,支持多次推理(指对同一输入数据进行推理)。 +模型必须是通过atc工具转换的om模型,输入bin文件需要符合模型的输入要求(支持模型多输入)。 +**说明:** +> +> benchmark工具暂不支持多输入,因此改用msame +> +#### 2.5.2 离线推理 + - [获取msame工具](https://gitee.com/ascend/tools/tree/master/msame) + - 执行离线推理 +``` +./msame --model "./deepspeech_bs1.om" --input "./deepspeech.pytorch/data/an4_dataset/test/spect,./deepspeech.pytorch/data/an4_dataset/test/sizes" --output "./deepspeech.pytorch/result" --outfmt TXT + +``` +**说明:** +> +> 将/tools/msame/msame文件复制到根目录下,执行上述命令,或直接在msame文件夹下执行命令,将input、output等路径改为绝对路径 +> 输出保存在--output路径下,会自动生成新文件夹 +> + + + +## 3 精度统计 + +- **[离线推理精度](#31-离线推理精度)** + +- **[精度对比](#32-精度对比)** + + +### 3.1 离线推理精度 + - 将后处理脚本deepspeech_postprocess.py放到根目录下 + - 调用后处理脚本产生推理结果 +``` +python3 deepspeech_postprocess.py --out_path ./deepspeech.pytorch/result --info_path ./deepspeech.pytorch/data/an4_dataset/test --label_file ./deepspeech.pytorch/labels.json +``` +**说明:** +> +> --out_path:离线推理输出的路径,是msame推理后的输出路径 +> --info_path:与执行数据预处理脚本deepspeech_preprocess.py时设置的--save_path一致 +> --label_file: labels文件路径 +> + +### 3.2 精度对比 + +| 模型 | 官网ckpt精度 | 310离线推理精度 | +| :------: | :------: | :------: | +| Deepspeech bs1 | [Average WER 9.573 Average CER 5.515](https://github.com/SeanNaren/deepspeech.pytorch/releases) | Average WER 9.573 Average CER 5.515 | + +**说明:** +> +> 将得到的om离线模型推理精度与该模型github代码仓上公布的精度对比,精度与之一致,故精度达标 +> + + + +## 4 性能对比 + +- **[npu性能数据](#41-npu性能数据)** + +- **[gpu性能数据](#42-gpu性能数据)** + +- **[性能优化](#43-性能优化)** + + +### 4.1 npu性能数据 +由于benchmark工具不支持多输入,改为使用msame进行om的离线推理。msame工具在推理时会输出每条数据运行的时间,计算10条数据运行的时间均值,作为性能的衡量标准。由于msame不支持多batch,因此以bs1的数据为准。 +``` +Run time of each data: 9.09s +performance: 0.11seq/s +``` + +### 4.2 gpu性能数据 +在装有T4卡的服务器上测试gpu性能,在GPU上进行在线推理,取5次运行的平均时长作为性能的衡量标准。 +``` +Run time of each data: 0.28s +performance: 3.44seq/s +``` + +### 4.3 性能优化 +使用性能分析工具profiling,查看了模型中每类算子总体耗时与百分比和模型每个算子的aicore耗时,发现DynamicRNN耗时最多,使用autotune进行性能优化,优化后性能如下: +``` +Run time of each data: 2.03s +performance: 0.49seq/s +``` +在此基础上,对TransData算子进行优化,优化后性能如下: +``` +Run time of each data: 1.41s +performance: 0.71seq/s ``` \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Deepspeech/ckpt2onnx.py b/ACL_PyTorch/contrib/cv/detection/Deepspeech/ckpt2onnx.py index c3b8c5ffd48d298f312f076c42659b328c3f9823..0348203a8f0c528df8fb35bdd346bd2945ba426b 100644 --- a/ACL_PyTorch/contrib/cv/detection/Deepspeech/ckpt2onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/Deepspeech/ckpt2onnx.py @@ -1,48 +1,48 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - - -""" -Export onnx from ckpt -""" -import hydra -import os -import torch -from deepspeech_pytorch.configs.inference_config import EvalConfig -from deepspeech_pytorch.utils import load_model -import argparse - -parser = argparse.ArgumentParser(description='Deepspeech') -parser.add_argument('--ckpt_path', default='./an4_pretrained_v3.ckpt', type=str, help='infer out path') -parser.add_argument('--out_file', default='deepspeech.onnx', type=str, help='infer info path') -args = parser.parse_args() - -if __name__ == '__main__': - device = torch.device("cpu") - # device = torch.device("cuda" if EvalConfig.model.cuda else "cpu") - model = load_model(device=device, model_path=args.ckpt_path) - model.eval() - model = model.to(device) - print('Finished loading model!') - # print(model) - input_names = ["spect", "transcript"] - output_names = ["out"] - dynamic_axes = {'spect': {0: '-1'}} - dummy_input = torch.randn(1, 1, 161, 621).to(device) - dummy_input2 = torch.tensor([621], dtype=torch.int32).to(device) - output_file = args.out_file - torch.onnx.export(model, [dummy_input, dummy_input2], output_file, - input_names=input_names, dynamic_axes=dynamic_axes, - output_names=output_names, opset_version=11, verbose=True) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + + +""" +Export onnx from ckpt +""" +import hydra +import os +import torch +from deepspeech_pytorch.configs.inference_config import EvalConfig +from deepspeech_pytorch.utils import load_model +import argparse + +parser = argparse.ArgumentParser(description='Deepspeech') +parser.add_argument('--ckpt_path', default='./an4_pretrained_v3.ckpt', type=str, help='infer out path') +parser.add_argument('--out_file', default='deepspeech.onnx', type=str, help='infer info path') +args = parser.parse_args() + +if __name__ == '__main__': + device = torch.device("cpu") + # device = torch.device("cuda" if EvalConfig.model.cuda else "cpu") + model = load_model(device=device, model_path=args.ckpt_path) + model.eval() + model = model.to(device) + print('Finished loading model!') + # print(model) + input_names = ["spect", "transcript"] + output_names = ["out"] + dynamic_axes = {'spect': {0: '-1'}} + dummy_input = torch.randn(1, 1, 161, 621).to(device) + dummy_input2 = torch.tensor([621], dtype=torch.int32).to(device) + output_file = args.out_file + torch.onnx.export(model, [dummy_input, dummy_input2], output_file, + input_names=input_names, dynamic_axes=dynamic_axes, + output_names=output_names, opset_version=11, verbose=True) diff --git a/ACL_PyTorch/contrib/cv/detection/Deepspeech/deepspeech_postprocess.py b/ACL_PyTorch/contrib/cv/detection/Deepspeech/deepspeech_postprocess.py index 93e0134a312dc836acc30dfb3a643f16da23a3da..29fc86d8d3620c3103c513592ed5875307ed5393 100644 --- a/ACL_PyTorch/contrib/cv/detection/Deepspeech/deepspeech_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/Deepspeech/deepspeech_postprocess.py @@ -1,152 +1,152 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - - -""" -Post-processing script -""" -import json -import torch -from deepspeech_pytorch.utils import load_decoder -from deepspeech_pytorch.configs.inference_config import EvalConfig -from deepspeech_pytorch.configs.train_config import DeepSpeechConfig -from deepspeech_pytorch.decoder import GreedyDecoder -from deepspeech_pytorch.validation import WordErrorRate, CharErrorRate -from hydra.utils import to_absolute_path - -import argparse -import os -import numpy as np - -parser = argparse.ArgumentParser(description='Deepspeech') -parser.add_argument('--out_path', default='./result', type=str, help='infer out path') -parser.add_argument('--info_path', default='./data/an4_dataset/test', type=str, help='infer info path') -parser.add_argument('--label_file', default='./labels.json') -args = parser.parse_args() - - -def read_dataset(out_path): - """ - Read the output file - """ - out_files = os.listdir(out_path) - # print(out_files) - data_all = [] - for j in range(len(out_files)): - with open(out_path + '/' + 'data' + str(j + 1) + '_output_0.txt', 'r') as file: - data_read = file.read() - data_line = str(data_read).split(' ') - data_line.pop(-1) - data_list = [] - for i in range(311): - data_list.append(list(map(float, data_line[29 * i: 29 * (i + 1)]))) - data_all.append(data_list) - - # float_list = list(map(float, data_all)) - out_dataset = torch.Tensor(data_all) - return out_dataset - - -def read_sizes(info_path): - """ - Read the sizes file - """ - with open(info_path + '/sizes/sizes.txt', 'r') as sizes_file: - sizes_read = sizes_file.read() - sizes_line = str(sizes_read).split(' ') - sizes_line.pop(-1) - sizes_list = list(map(int, sizes_line)) - sizes_list = torch.Tensor(sizes_list).int() - return sizes_list - - -def read_targets(info_path): - """ - Read the targets file - """ - with open(info_path + '/targets.txt', 'r') as targets_file: - targets_read = targets_file.read() - targets_line = str(targets_read).split(' ') - targets_line.pop(-1) - targets_list = list(map(int, targets_line)) - targets_list = torch.Tensor(targets_list).int() - # print(targets_list) - return targets_list - - -def read_target_sizes(info_path): - """ - Read the target sizes file - """ - with open(info_path + '/target_sizes.txt', 'r') as target_sizes_file: - target_sizes_read = target_sizes_file.read() - target_sizes_line = str(target_sizes_read).split(' ') - target_sizes_line.pop(-1) - target_sizes_list = list(map(int, target_sizes_line)) - target_sizes_list = torch.Tensor(target_sizes_list).int() - # print(target_sizes_list) - return target_sizes_list - - -if __name__ == '__main__': - out_path_real = args.out_path + '/' + sorted(os.listdir(args.out_path))[-1] - # print(out_path_real) - out_dataset = read_dataset(out_path_real) - out_sizes = read_sizes(args.info_path) - targets = read_targets(args.info_path) - target_sizes = read_target_sizes(args.info_path) - out_sizes = (out_sizes / 2).int() - device = torch.device("cuda" if EvalConfig.model.cuda else "cpu") - with open(to_absolute_path(args.label_file)) as label_file: - labels = json.load(label_file) - - decoder = load_decoder( - labels=labels, - cfg=EvalConfig.lm - ) - - target_decoder = GreedyDecoder( - labels=labels, - blank_index=labels.index('_') - ) - # print("模型输出的数据") - # print(out_dataset) - # o,_ = target_decoder.decode(out_dataset, out_sizes) - # print("结果",o) - wer = WordErrorRate( - decoder=decoder, - target_decoder=target_decoder - ) - cer = CharErrorRate( - decoder=decoder, - target_decoder=target_decoder - ) - wer.update( - preds=out_dataset, - preds_sizes=out_sizes, - targets=targets, - target_sizes=target_sizes - ) - cer.update( - preds=out_dataset, - preds_sizes=out_sizes, - targets=targets, - target_sizes=target_sizes - ) - wer = wer.compute() - cer = cer.compute() - print('Test Summary \t' - 'Average WER {wer:.3f}\t' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + + +""" +Post-processing script +""" +import json +import torch +from deepspeech_pytorch.utils import load_decoder +from deepspeech_pytorch.configs.inference_config import EvalConfig +from deepspeech_pytorch.configs.train_config import DeepSpeechConfig +from deepspeech_pytorch.decoder import GreedyDecoder +from deepspeech_pytorch.validation import WordErrorRate, CharErrorRate +from hydra.utils import to_absolute_path + +import argparse +import os +import numpy as np + +parser = argparse.ArgumentParser(description='Deepspeech') +parser.add_argument('--out_path', default='./result', type=str, help='infer out path') +parser.add_argument('--info_path', default='./data/an4_dataset/test', type=str, help='infer info path') +parser.add_argument('--label_file', default='./labels.json') +args = parser.parse_args() + + +def read_dataset(out_path): + """ + Read the output file + """ + out_files = os.listdir(out_path) + # print(out_files) + data_all = [] + for j in range(len(out_files)): + with open(out_path + '/' + 'data' + str(j + 1) + '_output_0.txt', 'r') as file: + data_read = file.read() + data_line = str(data_read).split(' ') + data_line.pop(-1) + data_list = [] + for i in range(311): + data_list.append(list(map(float, data_line[29 * i: 29 * (i + 1)]))) + data_all.append(data_list) + + # float_list = list(map(float, data_all)) + out_dataset = torch.Tensor(data_all) + return out_dataset + + +def read_sizes(info_path): + """ + Read the sizes file + """ + with open(info_path + '/sizes/sizes.txt', 'r') as sizes_file: + sizes_read = sizes_file.read() + sizes_line = str(sizes_read).split(' ') + sizes_line.pop(-1) + sizes_list = list(map(int, sizes_line)) + sizes_list = torch.Tensor(sizes_list).int() + return sizes_list + + +def read_targets(info_path): + """ + Read the targets file + """ + with open(info_path + '/targets.txt', 'r') as targets_file: + targets_read = targets_file.read() + targets_line = str(targets_read).split(' ') + targets_line.pop(-1) + targets_list = list(map(int, targets_line)) + targets_list = torch.Tensor(targets_list).int() + # print(targets_list) + return targets_list + + +def read_target_sizes(info_path): + """ + Read the target sizes file + """ + with open(info_path + '/target_sizes.txt', 'r') as target_sizes_file: + target_sizes_read = target_sizes_file.read() + target_sizes_line = str(target_sizes_read).split(' ') + target_sizes_line.pop(-1) + target_sizes_list = list(map(int, target_sizes_line)) + target_sizes_list = torch.Tensor(target_sizes_list).int() + # print(target_sizes_list) + return target_sizes_list + + +if __name__ == '__main__': + out_path_real = args.out_path + '/' + sorted(os.listdir(args.out_path))[-1] + # print(out_path_real) + out_dataset = read_dataset(out_path_real) + out_sizes = read_sizes(args.info_path) + targets = read_targets(args.info_path) + target_sizes = read_target_sizes(args.info_path) + out_sizes = (out_sizes / 2).int() + device = torch.device("cuda" if EvalConfig.model.cuda else "cpu") + with open(to_absolute_path(args.label_file)) as label_file: + labels = json.load(label_file) + + decoder = load_decoder( + labels=labels, + cfg=EvalConfig.lm + ) + + target_decoder = GreedyDecoder( + labels=labels, + blank_index=labels.index('_') + ) + # print("模型输出的数据") + # print(out_dataset) + # o,_ = target_decoder.decode(out_dataset, out_sizes) + # print("结果",o) + wer = WordErrorRate( + decoder=decoder, + target_decoder=target_decoder + ) + cer = CharErrorRate( + decoder=decoder, + target_decoder=target_decoder + ) + wer.update( + preds=out_dataset, + preds_sizes=out_sizes, + targets=targets, + target_sizes=target_sizes + ) + cer.update( + preds=out_dataset, + preds_sizes=out_sizes, + targets=targets, + target_sizes=target_sizes + ) + wer = wer.compute() + cer = cer.compute() + print('Test Summary \t' + 'Average WER {wer:.3f}\t' 'Average CER {cer:.3f}\t'.format(wer=wer, cer=cer)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Deepspeech/deepspeech_preprocess.py b/ACL_PyTorch/contrib/cv/detection/Deepspeech/deepspeech_preprocess.py index 2d9608dc38a27cf0e0b22eaa90f91f3a6b6b82b3..ab3c0e3528f9ea181b1d316936a762939a695ca4 100644 --- a/ACL_PyTorch/contrib/cv/detection/Deepspeech/deepspeech_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/Deepspeech/deepspeech_preprocess.py @@ -1,113 +1,113 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -""" -Data preprocessing script -""" -import os -import json -import numpy as np -import argparse -import torch -from deepspeech_pytorch.configs.train_config import DataConfig -from deepspeech_pytorch.loader.data_loader import SpectrogramDataset -from hydra.utils import to_absolute_path -from torch.utils.data import DataLoader - -parser = argparse.ArgumentParser(description='Deepspeech') -parser.add_argument('--data_file', default='./data/an4_test_manifest.json') -parser.add_argument('--save_path', default='./data/an4_dataset/test') -parser.add_argument('--label_file', default='./labels.json') -args = parser.parse_args() - -def collate_fn(batch): - """ - data preprocessing - """ - def func(p): - """ - data size - """ - return p[0].size(1) - - batch = sorted(batch, key=lambda sample: sample[0].size(1), reverse=True) - longest_sample = max(batch, key=func)[0] - freq_size = longest_sample.size(0) - minibatch_size = len(batch) - max_seqlength = longest_sample.size(1) - inputs = torch.zeros(minibatch_size, 1, freq_size, max_seqlength) - input_percentages = torch.FloatTensor(minibatch_size) - target_sizes = torch.IntTensor(minibatch_size) - targets = [] - for x in range(minibatch_size): - sample = batch[x] - tensor = sample[0] - target = sample[1] - seq_length = tensor.size(1) - inputs[x][0].narrow(1, 0, seq_length).copy_(tensor) - input_percentages[x] = seq_length / float(max_seqlength) - target_sizes[x] = len(target) - targets.extend(target) - targets = torch.tensor(targets, dtype=torch.long) - return inputs, input_percentages, [targets, target_sizes] - - -if __name__ == '__main__': - with open(to_absolute_path(args.label_file)) as label_file: - labels = json.load(label_file) - # if labels: - # print("labels ready") - - dataset = SpectrogramDataset( - audio_conf=DataConfig.spect, - input_path=args.data_file, - labels=labels, - normalize=True, - aug_cfg=DataConfig.augmentation - ) - inputs, input_percentages, target_list = collate_fn(dataset) - targets = target_list[0] - target_sizes = target_list[1] - input_sizes = input_percentages.mul_(int(inputs.size(3))).int() - - # print(inputs,input_sizes) - if not os.path.exists(args.save_path + '/spect'): os.makedirs(args.save_path + '/spect') - if not os.path.exists(args.save_path + '/sizes'): os.makedirs(args.save_path + '/sizes') - i = 0 - for input_data in inputs: - i = i + 1 - spect = np.array(input_data).astype(np.float32) - spect.tofile(os.path.join(args.save_path + '/spect', "data" + str(i) + ".bin")) - - i = 0 - for input_size in input_sizes: - i = i + 1 - transcript = np.array(input_size).astype(np.int32) - transcript.tofile(os.path.join(args.save_path + '/sizes', "data" + str(i) + ".bin")) - - f = open(args.save_path + '/sizes/' + 'sizes.txt', "w") - for w in np.array(input_sizes).astype(np.int32): - f.write(str(w)+' ') - f.close() - - f = open(args.save_path + '/targets.txt', "w") - for w in np.array(targets): - f.write(str(w) + ' ') - f.close() - - f = open(args.save_path + '/target_sizes.txt', "w") - for w in np.array(target_sizes).astype(np.int32): - f.write(str(w) + ' ') +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" +Data preprocessing script +""" +import os +import json +import numpy as np +import argparse +import torch +from deepspeech_pytorch.configs.train_config import DataConfig +from deepspeech_pytorch.loader.data_loader import SpectrogramDataset +from hydra.utils import to_absolute_path +from torch.utils.data import DataLoader + +parser = argparse.ArgumentParser(description='Deepspeech') +parser.add_argument('--data_file', default='./data/an4_test_manifest.json') +parser.add_argument('--save_path', default='./data/an4_dataset/test') +parser.add_argument('--label_file', default='./labels.json') +args = parser.parse_args() + +def collate_fn(batch): + """ + data preprocessing + """ + def func(p): + """ + data size + """ + return p[0].size(1) + + batch = sorted(batch, key=lambda sample: sample[0].size(1), reverse=True) + longest_sample = max(batch, key=func)[0] + freq_size = longest_sample.size(0) + minibatch_size = len(batch) + max_seqlength = longest_sample.size(1) + inputs = torch.zeros(minibatch_size, 1, freq_size, max_seqlength) + input_percentages = torch.FloatTensor(minibatch_size) + target_sizes = torch.IntTensor(minibatch_size) + targets = [] + for x in range(minibatch_size): + sample = batch[x] + tensor = sample[0] + target = sample[1] + seq_length = tensor.size(1) + inputs[x][0].narrow(1, 0, seq_length).copy_(tensor) + input_percentages[x] = seq_length / float(max_seqlength) + target_sizes[x] = len(target) + targets.extend(target) + targets = torch.tensor(targets, dtype=torch.long) + return inputs, input_percentages, [targets, target_sizes] + + +if __name__ == '__main__': + with open(to_absolute_path(args.label_file)) as label_file: + labels = json.load(label_file) + # if labels: + # print("labels ready") + + dataset = SpectrogramDataset( + audio_conf=DataConfig.spect, + input_path=args.data_file, + labels=labels, + normalize=True, + aug_cfg=DataConfig.augmentation + ) + inputs, input_percentages, target_list = collate_fn(dataset) + targets = target_list[0] + target_sizes = target_list[1] + input_sizes = input_percentages.mul_(int(inputs.size(3))).int() + + # print(inputs,input_sizes) + if not os.path.exists(args.save_path + '/spect'): os.makedirs(args.save_path + '/spect') + if not os.path.exists(args.save_path + '/sizes'): os.makedirs(args.save_path + '/sizes') + i = 0 + for input_data in inputs: + i = i + 1 + spect = np.array(input_data).astype(np.float32) + spect.tofile(os.path.join(args.save_path + '/spect', "data" + str(i) + ".bin")) + + i = 0 + for input_size in input_sizes: + i = i + 1 + transcript = np.array(input_size).astype(np.int32) + transcript.tofile(os.path.join(args.save_path + '/sizes', "data" + str(i) + ".bin")) + + f = open(args.save_path + '/sizes/' + 'sizes.txt', "w") + for w in np.array(input_sizes).astype(np.int32): + f.write(str(w)+' ') + f.close() + + f = open(args.save_path + '/targets.txt', "w") + for w in np.array(targets): + f.write(str(w) + ' ') + f.close() + + f = open(args.save_path + '/target_sizes.txt', "w") + for w in np.array(target_sizes).astype(np.int32): + f.write(str(w) + ' ') f.close() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Deepspeech/test/t4_perf.py b/ACL_PyTorch/contrib/cv/detection/Deepspeech/test/t4_perf.py index 7f0120a740bfed1da1872528a5b69c6e862c1d70..a150520144a56de437a2893a1c8aefb571f8acd7 100644 --- a/ACL_PyTorch/contrib/cv/detection/Deepspeech/test/t4_perf.py +++ b/ACL_PyTorch/contrib/cv/detection/Deepspeech/test/t4_perf.py @@ -1,85 +1,85 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import hydra -import os -import torch -from deepspeech_pytorch.configs.inference_config import EvalConfig -from deepspeech_pytorch.utils import load_model - -from deepspeech_pytorch.decoder import GreedyDecoder -from deepspeech_pytorch.utils import load_decoder - -import os -import json -import numpy as np -import argparse - -from deepspeech_pytorch.loader.data_module import DeepSpeechDataModule -from deepspeech_pytorch.configs.train_config import DeepSpeechConfig -from deepspeech_pytorch.loader.data_loader import AudioDataLoader -from hydra.utils import to_absolute_path -import time - -parser = argparse.ArgumentParser(description='Deepspeech') -# The data file to read -parser.add_argument('--data_file', default='./data/an4_test_manifest.json') -# The location the generated 'bin' file to save -parser.add_argument('--save_path', default='./data/an4_dataset/test') -args = parser.parse_args() - - - -if __name__ == '__main__': - - device = torch.device("cuda" if EvalConfig.model.cuda else "cpu") - with open(to_absolute_path(DeepSpeechConfig.data.labels_path)) as label_file: - labels = json.load(label_file) - # if labels: - # print("labels ready") - data_module = DeepSpeechDataModule( - labels=labels, - data_cfg=DeepSpeechConfig.data, - normalize=True, - is_distributed=False # DeepSpeechConfig.trainer.gpus > 1 - ) - dataset = data_module._create_dataset(args.data_file) - - data_loader = AudioDataLoader( - dataset=dataset, - num_workers=data_module.data_cfg.num_workers, - batch_size=data_module.data_cfg.batch_size - ) - - inputs, targets, input_percentages, target_sizes = data_loader.collate_fn(data_loader.dataset) - - input_sizes = input_percentages.mul_(int(inputs.size(3))).int() - inputs = inputs.to(device) - - - - device = torch.device("cuda" if EvalConfig.model.cuda else "cpu") - model = load_model(device=device, model_path='an4_pretrained_v3.ckpt') - model.eval() - model = model.to(device) - print('Finished loading model!') - s_time = time.time() - for i in range(5): - out, output_sizes = model(inputs[:1], input_sizes[:1]) - e_time = time.time() - t = (e_time - s_time)/5 - print('Finished testing data!') - print('Run time of each data: ', t) - print('performance: ', 1/t, 'seq/s') +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import hydra +import os +import torch +from deepspeech_pytorch.configs.inference_config import EvalConfig +from deepspeech_pytorch.utils import load_model + +from deepspeech_pytorch.decoder import GreedyDecoder +from deepspeech_pytorch.utils import load_decoder + +import os +import json +import numpy as np +import argparse + +from deepspeech_pytorch.loader.data_module import DeepSpeechDataModule +from deepspeech_pytorch.configs.train_config import DeepSpeechConfig +from deepspeech_pytorch.loader.data_loader import AudioDataLoader +from hydra.utils import to_absolute_path +import time + +parser = argparse.ArgumentParser(description='Deepspeech') +# The data file to read +parser.add_argument('--data_file', default='./data/an4_test_manifest.json') +# The location the generated 'bin' file to save +parser.add_argument('--save_path', default='./data/an4_dataset/test') +args = parser.parse_args() + + + +if __name__ == '__main__': + + device = torch.device("cuda" if EvalConfig.model.cuda else "cpu") + with open(to_absolute_path(DeepSpeechConfig.data.labels_path)) as label_file: + labels = json.load(label_file) + # if labels: + # print("labels ready") + data_module = DeepSpeechDataModule( + labels=labels, + data_cfg=DeepSpeechConfig.data, + normalize=True, + is_distributed=False # DeepSpeechConfig.trainer.gpus > 1 + ) + dataset = data_module._create_dataset(args.data_file) + + data_loader = AudioDataLoader( + dataset=dataset, + num_workers=data_module.data_cfg.num_workers, + batch_size=data_module.data_cfg.batch_size + ) + + inputs, targets, input_percentages, target_sizes = data_loader.collate_fn(data_loader.dataset) + + input_sizes = input_percentages.mul_(int(inputs.size(3))).int() + inputs = inputs.to(device) + + + + device = torch.device("cuda" if EvalConfig.model.cuda else "cpu") + model = load_model(device=device, model_path='an4_pretrained_v3.ckpt') + model.eval() + model = model.to(device) + print('Finished loading model!') + s_time = time.time() + for i in range(5): + out, output_sizes = model(inputs[:1], input_sizes[:1]) + e_time = time.time() + t = (e_time - s_time)/5 + print('Finished testing data!') + print('Run time of each data: ', t) + print('performance: ', 1/t, 'seq/s') diff --git a/ACL_PyTorch/contrib/cv/detection/Detr/FPS.py b/ACL_PyTorch/contrib/cv/detection/Detr/FPS.py index 72a5ee53fbe24f2a7d86720d9fbb669acbac0dbf..3f66e22cabb039f13fb085d8670e9f9638ab4d31 100644 --- a/ACL_PyTorch/contrib/cv/detection/Detr/FPS.py +++ b/ACL_PyTorch/contrib/cv/detection/Detr/FPS.py @@ -1,33 +1,33 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import argparse - -parser = argparse.ArgumentParser('Calculation FPS', add_help=False) -parser.add_argument('--log_path', default='bs1_time.log') -parser.add_argument('--batch_size', default=1,type=int) -args = parser.parse_args() - -weight = [0.17, 0.06, 0.53, 0.18, 0.05, 0.009, 0.0014, 0.0006, 0.005] -weight = np.array(weight) -val_times = [] -with open(args.log_path, 'r') as l: - for line in l.readlines(): - if line.startswith('Inference average time without first time: '): - val_time = float(line.split(':')[1].replace('ms', '')) / 1000 - val_times.append(val_time) -val_times = np.array(val_times) -fps = 1 / sum(val_times * weight) * args.batch_size * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import argparse + +parser = argparse.ArgumentParser('Calculation FPS', add_help=False) +parser.add_argument('--log_path', default='bs1_time.log') +parser.add_argument('--batch_size', default=1,type=int) +args = parser.parse_args() + +weight = [0.17, 0.06, 0.53, 0.18, 0.05, 0.009, 0.0014, 0.0006, 0.005] +weight = np.array(weight) +val_times = [] +with open(args.log_path, 'r') as l: + for line in l.readlines(): + if line.startswith('Inference average time without first time: '): + val_time = float(line.split(':')[1].replace('ms', '')) / 1000 + val_times.append(val_time) +val_times = np.array(val_times) +fps = 1 / sum(val_times * weight) * args.batch_size * 4 print(fps) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Detr/excute_omval.py b/ACL_PyTorch/contrib/cv/detection/Detr/excute_omval.py index 75a85178b36c2b381db02ecb4d203885700def94..75c845b70c76fd5cd90f5a8c60c85366fc0bef90 100644 --- a/ACL_PyTorch/contrib/cv/detection/Detr/excute_omval.py +++ b/ACL_PyTorch/contrib/cv/detection/Detr/excute_omval.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import argparse - -parser = argparse.ArgumentParser('Set transformer detector', add_help=False) -parser.add_argument('--img_path', default='img_file') -parser.add_argument('--mask_path', default='mask_file') -parser.add_argument('--out_put', default='out_put') -parser.add_argument('--result', default='result') -parser.add_argument('--batch_size', default=1, type=int) -args = parser.parse_args() - -if not os.path.exists(args.out_put): - os.mkdir(args.out_put) -if not os.path.exists(args.result): - os.mkdir(args.result) - -shape_3 = [[768, 1280, 24, 40], [768, 768, 24, 24], [768, 1024, 24, 32]] -shape_6 = [[1024, 768, 32, 24], [1280, 768, 40, 24], [768, 1344, 24, 42], [1344, 768, 42, 24], [1344, 512, 32, 42], - [512, 1344, 16, 42]] -print(args) -if args.batch_size == 1: - for i in shape_3: - command = 'tools/msame/out/msame --model "auto_om/detr_gear_bs1_768.om" --input "{}/{}_{},{}/{}_{}_mask" --output "{}" ' \ - '--dymDims "inputs:1,3,{},{};mask:1,{},{}" --outfmt BIN'. \ - format(args.img_path, i[0], i[1], args.mask_path, i[0], i[1], args.out_put, i[0], i[1], int(i[0] / 32), - int(i[1] / 32)) - print(command) - os.system(command) - for i in shape_6: - command = 'tools/msame/out/msame --model "auto_om/detr_bs1_{}_{}.om" --input "{}/{}_{},{}/{}_{}_mask" --output "{}" --outfmt BIN'.format( - i[0], i[1], args.img_path, i[0], i[1], args.mask_path, i[0], i[1], args.out_put) - print(command) - os.system(command) - mv_command = 'mv {}/*/* {}'.format(args.out_put, args.result) - os.system(mv_command) -elif args.batch_size == 4: - print(4) - for i in shape_3: - command = 'tools/msame/out/msame --model "auto_om/detr_gear_bs4_768.om" --output "{}" --dymDims "inputs:4,3,{},{};mask:4,{},{}" ' \ - '--outfmt BIN --loop 20'.format(args.out_put, i[0], i[1], int(i[0] / 32), int(i[1] / 32)) - print(command) - os.system(command) - for i in shape_6: - command = 'tools/msame/out/msame --model "auto_om/detr_bs4_{}_{}.om" --output "{}" --outfmt BIN --loop 20'. \ - format(i[0], i[1], args.out_put) - print(command) - os.system(command) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import argparse + +parser = argparse.ArgumentParser('Set transformer detector', add_help=False) +parser.add_argument('--img_path', default='img_file') +parser.add_argument('--mask_path', default='mask_file') +parser.add_argument('--out_put', default='out_put') +parser.add_argument('--result', default='result') +parser.add_argument('--batch_size', default=1, type=int) +args = parser.parse_args() + +if not os.path.exists(args.out_put): + os.mkdir(args.out_put) +if not os.path.exists(args.result): + os.mkdir(args.result) + +shape_3 = [[768, 1280, 24, 40], [768, 768, 24, 24], [768, 1024, 24, 32]] +shape_6 = [[1024, 768, 32, 24], [1280, 768, 40, 24], [768, 1344, 24, 42], [1344, 768, 42, 24], [1344, 512, 32, 42], + [512, 1344, 16, 42]] +print(args) +if args.batch_size == 1: + for i in shape_3: + command = 'tools/msame/out/msame --model "auto_om/detr_gear_bs1_768.om" --input "{}/{}_{},{}/{}_{}_mask" --output "{}" ' \ + '--dymDims "inputs:1,3,{},{};mask:1,{},{}" --outfmt BIN'. \ + format(args.img_path, i[0], i[1], args.mask_path, i[0], i[1], args.out_put, i[0], i[1], int(i[0] / 32), + int(i[1] / 32)) + print(command) + os.system(command) + for i in shape_6: + command = 'tools/msame/out/msame --model "auto_om/detr_bs1_{}_{}.om" --input "{}/{}_{},{}/{}_{}_mask" --output "{}" --outfmt BIN'.format( + i[0], i[1], args.img_path, i[0], i[1], args.mask_path, i[0], i[1], args.out_put) + print(command) + os.system(command) + mv_command = 'mv {}/*/* {}'.format(args.out_put, args.result) + os.system(mv_command) +elif args.batch_size == 4: + print(4) + for i in shape_3: + command = 'tools/msame/out/msame --model "auto_om/detr_gear_bs4_768.om" --output "{}" --dymDims "inputs:4,3,{},{};mask:4,{},{}" ' \ + '--outfmt BIN --loop 20'.format(args.out_put, i[0], i[1], int(i[0] / 32), int(i[1] / 32)) + print(command) + os.system(command) + for i in shape_6: + command = 'tools/msame/out/msame --model "auto_om/detr_bs4_{}_{}.om" --output "{}" --outfmt BIN --loop 20'. \ + format(i[0], i[1], args.out_put) + print(command) + os.system(command) diff --git a/ACL_PyTorch/contrib/cv/detection/Detr/onnx2om.py b/ACL_PyTorch/contrib/cv/detection/Detr/onnx2om.py index 3bc36c657ec1ed3698e3f7a781ba79d12c8187b9..8cd2294743794ddcf4607130f28c26043523e3ed 100644 --- a/ACL_PyTorch/contrib/cv/detection/Detr/onnx2om.py +++ b/ACL_PyTorch/contrib/cv/detection/Detr/onnx2om.py @@ -1,44 +1,44 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import argparse - -parser = argparse.ArgumentParser('Set transformer detector', add_help=False) -parser.add_argument('--batch_size', default=1) -parser.add_argument('--auto_tune', default=False) -args = parser.parse_args() - -input_shape = [['768,1280,24,40;768,768,24,24;768,1024,24,32'], [1024, 768, 32, 24], [1280, 768, 40, 24], - [768, 1344, 24, 42], [1344, 768, 42, 24], [1344, 512, 42, 16], [512, 1344, 16, 42]] - -to_om = 'atc --framework=5 --model=model/detr_bs{}.onnx -output=auto_om/detr_bs{}_{}_{} ' \ - '--input_shape="inputs:{},3,{},{};mask:{},{},{}" --input_format=ND --soc_version=Ascend310' -to_dyom = 'atc --framework=5 --model=model/detr_bs{}.onnx -output=auto_om/detr_gear_bs{}_{} ' \ - '--input_shape="inputs:{},3,-1,-1;mask:{},-1,-1" --dynamic_dims="{}" --input_format=ND --soc_version=Ascend310' - -if args.auto_tune == True: - to_om = to_om + ' --auto_tune_mode="RL,GA"' - to_dyom = to_dyom + ' --auto_tune_mode="RL,GA"' - -for i in input_shape: - if len(i) == 4: - command = to_om.format(args.batch_size, args.batch_size, i[0], i[1], args.batch_size, i[0], i[1], - args.batch_size, i[2], i[3]) - print(command) - os.system(command) - else: - command = to_dyom.format(args.batch_size, args.batch_size, - i[0].split(',')[0], args.batch_size, args.batch_size, i[0]) - print(command) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import argparse + +parser = argparse.ArgumentParser('Set transformer detector', add_help=False) +parser.add_argument('--batch_size', default=1) +parser.add_argument('--auto_tune', default=False) +args = parser.parse_args() + +input_shape = [['768,1280,24,40;768,768,24,24;768,1024,24,32'], [1024, 768, 32, 24], [1280, 768, 40, 24], + [768, 1344, 24, 42], [1344, 768, 42, 24], [1344, 512, 42, 16], [512, 1344, 16, 42]] + +to_om = 'atc --framework=5 --model=model/detr_bs{}.onnx -output=auto_om/detr_bs{}_{}_{} ' \ + '--input_shape="inputs:{},3,{},{};mask:{},{},{}" --input_format=ND --soc_version=Ascend310' +to_dyom = 'atc --framework=5 --model=model/detr_bs{}.onnx -output=auto_om/detr_gear_bs{}_{} ' \ + '--input_shape="inputs:{},3,-1,-1;mask:{},-1,-1" --dynamic_dims="{}" --input_format=ND --soc_version=Ascend310' + +if args.auto_tune == True: + to_om = to_om + ' --auto_tune_mode="RL,GA"' + to_dyom = to_dyom + ' --auto_tune_mode="RL,GA"' + +for i in input_shape: + if len(i) == 4: + command = to_om.format(args.batch_size, args.batch_size, i[0], i[1], args.batch_size, i[0], i[1], + args.batch_size, i[2], i[3]) + print(command) + os.system(command) + else: + command = to_dyom.format(args.batch_size, args.batch_size, + i[0].split(',')[0], args.batch_size, args.batch_size, i[0]) + print(command) os.system(command) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/EfficientDetD0/get_info.py b/ACL_PyTorch/contrib/cv/detection/EfficientDetD0/get_info.py index b76d6739bcea5c528a031970f0e583e5b5644bd8..d5cab0450c20d502d0d15be2f9c0fceffa6a6191 100644 --- a/ACL_PyTorch/contrib/cv/detection/EfficientDetD0/get_info.py +++ b/ACL_PyTorch/contrib/cv/detection/EfficientDetD0/get_info.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/EfficientDetD0/modify_onnx.py b/ACL_PyTorch/contrib/cv/detection/EfficientDetD0/modify_onnx.py index 3e217e67033d8725e89a76ef9ee442bcdbb84e53..0995b070b7b859e3cf5b4d3c634136475d96d7b1 100644 --- a/ACL_PyTorch/contrib/cv/detection/EfficientDetD0/modify_onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/EfficientDetD0/modify_onnx.py @@ -1,35 +1,35 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append(r'onnx_tools/OXInterface') -from OXInterface import OXGraph -import numpy as np -import argparse - -parser = argparse.ArgumentParser(description='pth to onnx') -parser.add_argument('--model', type=str, default='d0_bs8_sim.onnx', metavar='N', - help='onnx model') -parser.add_argument('--node', type=str, default='1532', metavar='N', - help='need to modify pad node number') -parser.add_argument('--out', type=str, default='d0_bs8_modify.onnx', metavar='N', - help='modified onnx') - - -args = parser.parse_args() -oxgraph = OXGraph(args.model) -oxinitializer_node = oxgraph.get_oxinitializer_by_name(args.node) -new_data = np.array(0, dtype=np.float32) -oxinitializer_node.set_data(new_data) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append(r'onnx_tools/OXInterface') +from OXInterface import OXGraph +import numpy as np +import argparse + +parser = argparse.ArgumentParser(description='pth to onnx') +parser.add_argument('--model', type=str, default='d0_bs8_sim.onnx', metavar='N', + help='onnx model') +parser.add_argument('--node', type=str, default='1532', metavar='N', + help='need to modify pad node number') +parser.add_argument('--out', type=str, default='d0_bs8_modify.onnx', metavar='N', + help='modified onnx') + + +args = parser.parse_args() +oxgraph = OXGraph(args.model) +oxinitializer_node = oxgraph.get_oxinitializer_by_name(args.node) +new_data = np.array(0, dtype=np.float32) +oxinitializer_node.set_data(new_data) oxgraph.save_new_model(args.out) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/EfficientDetD0/test/pth2om.sh b/ACL_PyTorch/contrib/cv/detection/EfficientDetD0/test/pth2om.sh old mode 100755 new mode 100644 diff --git a/ACL_PyTorch/contrib/cv/detection/EfficientDetD7/get_info.py b/ACL_PyTorch/contrib/cv/detection/EfficientDetD7/get_info.py index b76d6739bcea5c528a031970f0e583e5b5644bd8..d5cab0450c20d502d0d15be2f9c0fceffa6a6191 100644 --- a/ACL_PyTorch/contrib/cv/detection/EfficientDetD7/get_info.py +++ b/ACL_PyTorch/contrib/cv/detection/EfficientDetD7/get_info.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/EfficientDetD7/modify_onnx.py b/ACL_PyTorch/contrib/cv/detection/EfficientDetD7/modify_onnx.py index 8066bcf7e7637fb5ffb2c533099fa8023bb30f08..2f6e1e0db62ea98b461a0cd07bb940e48096b2f9 100644 --- a/ACL_PyTorch/contrib/cv/detection/EfficientDetD7/modify_onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/EfficientDetD7/modify_onnx.py @@ -1,35 +1,35 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append(r'onnx_tools/OXInterface') -from OXInterface import OXGraph -import numpy as np -import argparse - -parser = argparse.ArgumentParser(description='pth to onnx') -parser.add_argument('--model', type=str, default='d7.onnx', metavar='N', - help='onnx model') -parser.add_argument('--node', type=str, default='3080', metavar='N', - help='need to modify pad node number') -parser.add_argument('--out', type=str, default='d7_modify.onnx', metavar='N', - help='modified onnx') - - -args = parser.parse_args() -oxgraph = OXGraph(args.model) -oxinitializer_node = oxgraph.get_oxinitializer_by_name(args.node) -new_data = np.array(0, dtype=np.float32) -oxinitializer_node.set_data(new_data) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append(r'onnx_tools/OXInterface') +from OXInterface import OXGraph +import numpy as np +import argparse + +parser = argparse.ArgumentParser(description='pth to onnx') +parser.add_argument('--model', type=str, default='d7.onnx', metavar='N', + help='onnx model') +parser.add_argument('--node', type=str, default='3080', metavar='N', + help='need to modify pad node number') +parser.add_argument('--out', type=str, default='d7_modify.onnx', metavar='N', + help='modified onnx') + + +args = parser.parse_args() +oxgraph = OXGraph(args.model) +oxinitializer_node = oxgraph.get_oxinitializer_by_name(args.node) +new_data = np.array(0, dtype=np.float32) +oxinitializer_node.set_data(new_data) oxgraph.save_new_model(args.out) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/FOTS/FOTS_pth2onnx_8.py b/ACL_PyTorch/contrib/cv/detection/FOTS/FOTS_pth2onnx_8.py index 6cd3da24add616fb846447db3a8ebb65d6c8fc3a..c3f7a592ba94b9524a8a20d2dab3c4257211dcd4 100644 --- a/ACL_PyTorch/contrib/cv/detection/FOTS/FOTS_pth2onnx_8.py +++ b/ACL_PyTorch/contrib/cv/detection/FOTS/FOTS_pth2onnx_8.py @@ -1,37 +1,37 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -import torch.onnx -import torchvision.models as models -from model import FOTSModel - -def pth2onnx(input_file, output_file): - - model = FOTSModel() - checkpoint = torch.load(input_file, map_location='cpu') - model.load_state_dict(checkpoint['model_state_dict']) - - model.eval() - input_names = ["image"] - output_names = ["location"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(8, 3, 1248, 2240) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) - -if __name__ == "__main__": - input_file = sys.argv[1] - output_file = sys.argv[2] +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import torch.onnx +import torchvision.models as models +from model import FOTSModel + +def pth2onnx(input_file, output_file): + + model = FOTSModel() + checkpoint = torch.load(input_file, map_location='cpu') + model.load_state_dict(checkpoint['model_state_dict']) + + model.eval() + input_names = ["image"] + output_names = ["location"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(8, 3, 1248, 2240) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, verbose=True, opset_version=11) + +if __name__ == "__main__": + input_file = sys.argv[1] + output_file = sys.argv[2] pth2onnx(input_file, output_file) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/FOTS/LICENSE b/ACL_PyTorch/contrib/cv/detection/FOTS/LICENSE index 185404d5515c393add9ecfbdd7cd83596e8a4b26..5b4cf39445b7b24f2e5d38062c3b9cca89ad8a90 100644 --- a/ACL_PyTorch/contrib/cv/detection/FOTS/LICENSE +++ b/ACL_PyTorch/contrib/cv/detection/FOTS/LICENSE @@ -1,204 +1,204 @@ -Copyright 2018-2019 Open-MMLab. All rights reserved. -Copyright 2021 Huawei Technologies Co., Ltd - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018-2019 Open-MMLab. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Copyright 2018-2019 Open-MMLab. All rights reserved. +Copyright 2021 Huawei Technologies Co., Ltd + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2019 Open-MMLab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ACL_PyTorch/contrib/cv/detection/FOTS/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/detection/FOTS/gen_dataset_info.py index 710bbd708c7417a9895e42560db50ce2cd03c432..edb8df558bbe1f18ec8ab9353ec1097c28c60853 100644 --- a/ACL_PyTorch/contrib/cv/detection/FOTS/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/detection/FOTS/gen_dataset_info.py @@ -1,60 +1,60 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/FOTS/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/detection/FOTS/modelzoo_level.txt index 38700fca05402f52c3ae1c4be0889eb60e1f80f1..2e42553460a4f3687654b6ad3f91ab0bcc3aadac 100644 --- a/ACL_PyTorch/contrib/cv/detection/FOTS/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/detection/FOTS/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/FOTS/preprocess.py b/ACL_PyTorch/contrib/cv/detection/FOTS/preprocess.py index 9ca995e15387351dcde0181dc353782047ead82c..d6d94222e488fbcdaf1186745b2f4888baac8e6d 100644 --- a/ACL_PyTorch/contrib/cv/detection/FOTS/preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/FOTS/preprocess.py @@ -1,44 +1,44 @@ -import argparse -import os - -import cv2 -import numpy as np -import torch - - - -import re -import tqdm - - -def preprocess(images_folder, output_folder): - pbar = tqdm.tqdm(os.listdir(images_folder), desc='Test', ncols=80) - for image_name in pbar: - # prefix = image_name[:image_name.rfind('.')] - image = cv2.imread(os.path.join(images_folder, image_name), cv2.IMREAD_COLOR) - # due to bad net arch sizes have to be mult of 32, so hardcode it - scale_x = 2240 / image.shape[1] # 2240 # 1280 1.75 - scale_y = 1248 / image.shape[0] # 1248 # 720 1.73333 - scaled_image = cv2.resize(image, dsize=(0, 0), fx=scale_x, fy=scale_y, interpolation=cv2.INTER_CUBIC) - # orig_scaled_image = scaled_image.copy() - - scaled_image = scaled_image[:, :, ::-1].astype(np.float32) - scaled_image = (scaled_image / 255 - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225]) - image_tensor = torch.from_numpy(np.expand_dims(np.transpose(scaled_image, axes=(2, 0, 1)), axis=0)).float() - - img = np.array(image_tensor).astype(np.float32) - - img.tofile(os.path.join(output_folder, image_name.split('.')[0] + ".bin")) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--images-folder', type=str, required=True, help='path to the folder with test images') - parser.add_argument('--output-folder', type=str, default='fots_test_results', - help='path to the output folder with result labels') - - args = parser.parse_args() - - preprocess(args.images_folder, args.output_folder) - - +import argparse +import os + +import cv2 +import numpy as np +import torch + + + +import re +import tqdm + + +def preprocess(images_folder, output_folder): + pbar = tqdm.tqdm(os.listdir(images_folder), desc='Test', ncols=80) + for image_name in pbar: + # prefix = image_name[:image_name.rfind('.')] + image = cv2.imread(os.path.join(images_folder, image_name), cv2.IMREAD_COLOR) + # due to bad net arch sizes have to be mult of 32, so hardcode it + scale_x = 2240 / image.shape[1] # 2240 # 1280 1.75 + scale_y = 1248 / image.shape[0] # 1248 # 720 1.73333 + scaled_image = cv2.resize(image, dsize=(0, 0), fx=scale_x, fy=scale_y, interpolation=cv2.INTER_CUBIC) + # orig_scaled_image = scaled_image.copy() + + scaled_image = scaled_image[:, :, ::-1].astype(np.float32) + scaled_image = (scaled_image / 255 - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225]) + image_tensor = torch.from_numpy(np.expand_dims(np.transpose(scaled_image, axes=(2, 0, 1)), axis=0)).float() + + img = np.array(image_tensor).astype(np.float32) + + img.tofile(os.path.join(output_folder, image_name.split('.')[0] + ".bin")) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--images-folder', type=str, required=True, help='path to the folder with test images') + parser.add_argument('--output-folder', type=str, default='fots_test_results', + help='path to the output folder with result labels') + + args = parser.parse_args() + + preprocess(args.images_folder, args.output_folder) + + diff --git a/ACL_PyTorch/contrib/cv/detection/FOTS/readme.md b/ACL_PyTorch/contrib/cv/detection/FOTS/readme.md index 36cf5bc66656c6c57fcd539f8af54617581cc022..e531e311ac445f7d74276c1461490e1f46d8ac03 100644 --- a/ACL_PyTorch/contrib/cv/detection/FOTS/readme.md +++ b/ACL_PyTorch/contrib/cv/detection/FOTS/readme.md @@ -1,142 +1,142 @@ -## FOTS Onnx 模型 PyTorch 离线推理 - -### 1 模型概述 - -- 论文地址 - -``` -https://arxiv.org/abs/1801.01671 -``` - -- 代码地址 - -``` -https://github.com/Wovchena/text-detection-fots.pytorch -``` - -- 数据集 - -``` -下载使用ICDAR2015数据集: -解压后将ch4_test_images文件夹和gt.zip压缩标签文件放到根目录下 -``` - -### 2 环境说明 - -``` -CANN = 5.0.3 -pytorch = 1.5.0 -torchvision = 0.6.0 -onnx = 1.7.0 -numpy = 1.21.2 -shapely = 1.6.4.post2(重要) -polygon3 = 3.0.9.1 -opencv-python = 3.4.10.37(重要) -``` - -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip install 包名 安装 - - - -### 3 pth 转 om 模型 - -- pth 权重文件默认路径为根目录 -- 进入根目录下执行 `./test/pth2onnx.sh` 脚本,自动生成生成 onnx 模型文件 -- (执行./pth2onnx_8.sh脚本,生成batchsize=8的onnx模型文件) - -```py -bash ./test/pth2onnx.sh -``` - -- 执行 `./onnx2om.sh` 脚本,自动生成生成 om 模型文件 -- (执行./onnx2om_8.sh脚本,生成batchsize=8的om模型文件) - -```py -bash ./test/onnx2om.sh -``` - - -### 4 生成输入数据并保存为.bin文件 - -- 数据集默认路径为 `./ch4_test_images.zip` , 解压此数据集,在源码根目录下建立空文件夹用来保存预处理后的二进制图片,命名为res - - - -- 使用脚本 `preprocess.sh`和`gen_dataset_info.sh` 获得预处理图片、二进制 bin 文件及其对应的路径信息 - -``` -bash ./test/preprocess.sh -bash ./test/gen_dataset_info.sh -``` - - -### 5 离线推理 - -#### 5.1 benchmark工具概述 - -benchmark工具提供离线推理功能,输入 om 模型和模型所需要的输入 bin 文件,输出模型的输出数据文件。模型必须是通过 atc 工具转换的 om 模型,输入 bin 文件需要符合模型的输入要求。 - - -#### 5.2 离线推理 - -``` -bash ./test/inference.sh -``` -- (执行bash ./test/inference_8.sh脚本生成batchsize=8的二进制推理文件) - - -输出数据默认保存在根目录的 `./result/pref_visionbatchsize_1_device_0.txt` 中,可以看到时延和 FPS。输出图片默认保存在根目录的 `./result/dumpOutput_device0` 下. - - -### 6 精度对比 - -进入根目录下建立空文件夹用来保存后处理的坐标信息,命名为outPost。调用 ` postprocess.py` 来进行后处理,把输出的 bin 文件转换为对应坐标信息的txt文件。 - -``` -python postprocess.py - -``` - - -- (执行 python postprocess_8.py输出batchsize=8推理的后处理文件) - -详细的坐标信息结果在根目录的outPost/目录下,在根目录下建立空文件夹runs。调用 ` script.py` 来进行精度计算,将输出结果与真实标签比对。 - - -``` -zip -jmq runs/u.zip outPost/* && python3 script.py -g=gt.zip -s=runs/u.zip -``` - -### 7 性能对比 - -#### 7.1 NPU 310 性能数据 -``` -(310 bs1) Inference average time: 9.9045 ms -(310 bs1) FPS:39.618 -``` - -根据时延和核心数,计算得到 Batchsize = 1 时单卡吞吐率 39.618 FPS - -``` -(310 bs8) Inference average time: 9.3025 ms -(310 bs8) FPS:37.210 -``` - -根据时延和核心数,计算得到 Batchsize = 8 时单卡吞吐率 37.210 FPS - -#### 7.2 GPU T4 性能数据 - - -根据时延和核心数,计算得到 Batchsize = 1 时单卡吞吐率 44.704 FPS - - -根据时延和核心数,计算得到 Batchsize = 8 时单卡吞吐率 47.271 FPS - -#### 7.3 性能对比 - -| Batch Size | 310 (FPS/Card) | T4 (FPS/Card) | 310/T4 | -| ---------- | -------------- | ------------- | -------- | -| 1 | *39.618* | *44.704* | *88.62%* | -| 8 | *37.210* | *47.271* | *78.71%* | +## FOTS Onnx 模型 PyTorch 离线推理 + +### 1 模型概述 + +- 论文地址 + +``` +https://arxiv.org/abs/1801.01671 +``` + +- 代码地址 + +``` +https://github.com/Wovchena/text-detection-fots.pytorch +``` + +- 数据集 + +``` +下载使用ICDAR2015数据集: +解压后将ch4_test_images文件夹和gt.zip压缩标签文件放到根目录下 +``` + +### 2 环境说明 + +``` +CANN = 5.0.3 +pytorch = 1.5.0 +torchvision = 0.6.0 +onnx = 1.7.0 +numpy = 1.21.2 +shapely = 1.6.4.post2(重要) +polygon3 = 3.0.9.1 +opencv-python = 3.4.10.37(重要) +``` + +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip install 包名 安装 + + + +### 3 pth 转 om 模型 + +- pth 权重文件默认路径为根目录 +- 进入根目录下执行 `./test/pth2onnx.sh` 脚本,自动生成生成 onnx 模型文件 +- (执行./pth2onnx_8.sh脚本,生成batchsize=8的onnx模型文件) + +```py +bash ./test/pth2onnx.sh +``` + +- 执行 `./onnx2om.sh` 脚本,自动生成生成 om 模型文件 +- (执行./onnx2om_8.sh脚本,生成batchsize=8的om模型文件) + +```py +bash ./test/onnx2om.sh +``` + + +### 4 生成输入数据并保存为.bin文件 + +- 数据集默认路径为 `./ch4_test_images.zip` , 解压此数据集,在源码根目录下建立空文件夹用来保存预处理后的二进制图片,命名为res + + + +- 使用脚本 `preprocess.sh`和`gen_dataset_info.sh` 获得预处理图片、二进制 bin 文件及其对应的路径信息 + +``` +bash ./test/preprocess.sh +bash ./test/gen_dataset_info.sh +``` + + +### 5 离线推理 + +#### 5.1 benchmark工具概述 + +benchmark工具提供离线推理功能,输入 om 模型和模型所需要的输入 bin 文件,输出模型的输出数据文件。模型必须是通过 atc 工具转换的 om 模型,输入 bin 文件需要符合模型的输入要求。 + + +#### 5.2 离线推理 + +``` +bash ./test/inference.sh +``` +- (执行bash ./test/inference_8.sh脚本生成batchsize=8的二进制推理文件) + + +输出数据默认保存在根目录的 `./result/pref_visionbatchsize_1_device_0.txt` 中,可以看到时延和 FPS。输出图片默认保存在根目录的 `./result/dumpOutput_device0` 下. + + +### 6 精度对比 + +进入根目录下建立空文件夹用来保存后处理的坐标信息,命名为outPost。调用 ` postprocess.py` 来进行后处理,把输出的 bin 文件转换为对应坐标信息的txt文件。 + +``` +python postprocess.py + +``` + + +- (执行 python postprocess_8.py输出batchsize=8推理的后处理文件) + +详细的坐标信息结果在根目录的outPost/目录下,在根目录下建立空文件夹runs。调用 ` script.py` 来进行精度计算,将输出结果与真实标签比对。 + + +``` +zip -jmq runs/u.zip outPost/* && python3 script.py -g=gt.zip -s=runs/u.zip +``` + +### 7 性能对比 + +#### 7.1 NPU 310 性能数据 +``` +(310 bs1) Inference average time: 9.9045 ms +(310 bs1) FPS:39.618 +``` + +根据时延和核心数,计算得到 Batchsize = 1 时单卡吞吐率 39.618 FPS + +``` +(310 bs8) Inference average time: 9.3025 ms +(310 bs8) FPS:37.210 +``` + +根据时延和核心数,计算得到 Batchsize = 8 时单卡吞吐率 37.210 FPS + +#### 7.2 GPU T4 性能数据 + + +根据时延和核心数,计算得到 Batchsize = 1 时单卡吞吐率 44.704 FPS + + +根据时延和核心数,计算得到 Batchsize = 8 时单卡吞吐率 47.271 FPS + +#### 7.3 性能对比 + +| Batch Size | 310 (FPS/Card) | T4 (FPS/Card) | 310/T4 | +| ---------- | -------------- | ------------- | -------- | +| 1 | *39.618* | *44.704* | *88.62%* | +| 8 | *37.210* | *47.271* | *78.71%* | diff --git a/ACL_PyTorch/contrib/cv/detection/FOTS/requirments.txt b/ACL_PyTorch/contrib/cv/detection/FOTS/requirments.txt index bd61e6482dfef890063a792a0df534556e61243a..f53442ac04e9592cf6dbbbfd436ca9a6fedacfd3 100644 --- a/ACL_PyTorch/contrib/cv/detection/FOTS/requirments.txt +++ b/ACL_PyTorch/contrib/cv/detection/FOTS/requirments.txt @@ -1,12 +1,12 @@ -numpy=1.21.2 -onnx=1.7.0 -opencv-python=3.4.10.37 -opencv-python3=1.0 -Pillow=8.3.2 -Polygon3=3.0.9.1 -Shapely=1.6.4.post2 -torch=1.5.0 -torchvision=0.6.0 - - - +numpy=1.21.2 +onnx=1.7.0 +opencv-python=3.4.10.37 +opencv-python3=1.0 +Pillow=8.3.2 +Polygon3=3.0.9.1 +Shapely=1.6.4.post2 +torch=1.5.0 +torchvision=0.6.0 + + + diff --git a/ACL_PyTorch/contrib/cv/detection/FasterRCNN_FPN_DCN/README.md b/ACL_PyTorch/contrib/cv/detection/FasterRCNN_FPN_DCN/README.md index be4a4f60efe35fd7d62da530181c7f057094e934..a47a439a37678806cca3acb0fb5635166ec267ea 100644 --- a/ACL_PyTorch/contrib/cv/detection/FasterRCNN_FPN_DCN/README.md +++ b/ACL_PyTorch/contrib/cv/detection/FasterRCNN_FPN_DCN/README.md @@ -1,79 +1,79 @@ -# FasterRCNN-FPN-DCN模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - -``` -pip3.7 install -r requirements.txt -``` - 说明:PyTorch选用开源1.8.0版本 -2.获取,修改与安装开源模型代码 - -``` -git clone https://github.com/open-mmlab/mmcv -b master -cd mmcv -git checkout v1.2.7 -MMCV_WITH_OPS=1 pip3.7 install -e . -patch -p1 < ../mmcv.patch -cd .. -git clone https://github.com/open-mmlab/mmdetection -b master -cd mmdetection -git reset --hard a21eb25535f31634cef332b09fc27d28956fb24b -patch -p1 < ../dcn.patch -pip3.7 install -r requirements/build.txt -python3.7 setup.py develop -``` -3.获取权重文件 - -``` -cd mmdetection -mkdir checkpoints -cd checkpoints - -``` - -[faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-d68aed1e.pth](参照指导书文档) -4.数据集 - -[测试集]参照指导书文档 -[标签]参照指导书文档 - -5.[获取benchmark工具](参照指导书文档) - 将benchmark.x86_64或benchmark.aarch64放到当前目录 - - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 - -``` -#OM model generation -bash test/pth2onnx.sh -bash test/onnx2om.sh - -#COCO dataset preprocess -python3.7 FasterRCNN+FPN+DCN_preprocess.py --image_folder_path coco/val2017 --bin_folder_path coco2017_bin -python3.7 gen_dataset_info.py bin coco2017_bin coco2017_bin.info 1216 1216 -python3.7 gen_dataset_info.py jpg coco/val2017 coco2017_jpg.info - -#OM model inference -bash test/inf.sh - -#Inference result postprocess -python3.7 FasterRCNN+FPN+DCN_postprocess.py --test_annotation coco2017_jpg.info --bin_data_path result/dumpOutput_device0 - -#COCO eval -python3.7 txt2json.py --npu_txt_path detection-results --json_output_file coco_detection_result -python3.7 coco_eval.py --groud_truth coco/annotations/instances_val2017.json --detection_result coco_detection_result.json - -#FrameRate eval -bash test/framerate.sh -``` - -**评测结果:** - -| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| ---- | ------------------------------------------------------------ | --------------- | -------- | ------- | -| faster_rcnn_r50_fpn_dcn | [box AP:41.3%](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn) | box AP:41.2% | 5.2fps | 2.8fps | - +# FasterRCNN-FPN-DCN模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + +``` +pip3.7 install -r requirements.txt +``` + 说明:PyTorch选用开源1.8.0版本 +2.获取,修改与安装开源模型代码 + +``` +git clone https://github.com/open-mmlab/mmcv -b master +cd mmcv +git checkout v1.2.7 +MMCV_WITH_OPS=1 pip3.7 install -e . +patch -p1 < ../mmcv.patch +cd .. +git clone https://github.com/open-mmlab/mmdetection -b master +cd mmdetection +git reset --hard a21eb25535f31634cef332b09fc27d28956fb24b +patch -p1 < ../dcn.patch +pip3.7 install -r requirements/build.txt +python3.7 setup.py develop +``` +3.获取权重文件 + +``` +cd mmdetection +mkdir checkpoints +cd checkpoints + +``` + +[faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-d68aed1e.pth](参照指导书文档) +4.数据集 + +[测试集]参照指导书文档 +[标签]参照指导书文档 + +5.[获取benchmark工具](参照指导书文档) + 将benchmark.x86_64或benchmark.aarch64放到当前目录 + + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 + +``` +#OM model generation +bash test/pth2onnx.sh +bash test/onnx2om.sh + +#COCO dataset preprocess +python3.7 FasterRCNN+FPN+DCN_preprocess.py --image_folder_path coco/val2017 --bin_folder_path coco2017_bin +python3.7 gen_dataset_info.py bin coco2017_bin coco2017_bin.info 1216 1216 +python3.7 gen_dataset_info.py jpg coco/val2017 coco2017_jpg.info + +#OM model inference +bash test/inf.sh + +#Inference result postprocess +python3.7 FasterRCNN+FPN+DCN_postprocess.py --test_annotation coco2017_jpg.info --bin_data_path result/dumpOutput_device0 + +#COCO eval +python3.7 txt2json.py --npu_txt_path detection-results --json_output_file coco_detection_result +python3.7 coco_eval.py --groud_truth coco/annotations/instances_val2017.json --detection_result coco_detection_result.json + +#FrameRate eval +bash test/framerate.sh +``` + +**评测结果:** + +| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| ---- | ------------------------------------------------------------ | --------------- | -------- | ------- | +| faster_rcnn_r50_fpn_dcn | [box AP:41.3%](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn) | box AP:41.2% | 5.2fps | 2.8fps | + diff --git a/ACL_PyTorch/contrib/cv/detection/Fsaf/Fsaf_preprocess.py b/ACL_PyTorch/contrib/cv/detection/Fsaf/Fsaf_preprocess.py index 8192a5338abae754193ea8826a63cedf09555b8f..373e2362accb87644c0b7433e8ac02bf887ce6b9 100644 --- a/ACL_PyTorch/contrib/cv/detection/Fsaf/Fsaf_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/Fsaf/Fsaf_preprocess.py @@ -1,73 +1,73 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import argparse -import numpy as np -import cv2 -import mmcv -import torch -import multiprocessing - -def resize(img, size): - old_h = img.shape[0] - old_w = img.shape[1] - scale_ratio = min(size[0] / old_w, size[1] / old_h) - new_w = int(np.floor(old_w * scale_ratio)) - new_h = int(np.floor(old_h * scale_ratio)) - resized_img = mmcv.imresize(img, (new_w, new_h), backend='cv2') - return resized_img - -def gen_input_bin(file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - image = mmcv.imread(os.path.join(flags.image_src_path, file), backend='cv2') - image = resize(image, (flags.model_input_width, flags.model_input_height)) - mean = np.array([123.675, 116.28, 103.53], dtype=np.float32) - std = np.array([58.395, 57.12, 57.375], dtype=np.float32) - image = mmcv.imnormalize(image, mean, std, to_rgb=True) - h = image.shape[0] - w = image.shape[1] - pad_left = (flags.model_input_width - w) // 2 - pad_top = (flags.model_input_height - h) // 2 - pad_right = flags.model_input_width - pad_left - w - pad_bottom = flags.model_input_height - pad_top - h - image = mmcv.impad(image, shape=(flags.model_input_height, flags.model_input_width), pad_val=0) - #mmcv.imwrite(image, './paded_jpg/' + file.split('.')[0] + '.jpg') - image = image.transpose(2, 0, 1) - image.tofile(os.path.join(flags.bin_file_path, file.split('.')[0] + ".bin")) - -def preprocess(src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 100] for i in range(0, 5000, 100) if files[i:i + 100] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='preprocess of MaskRCNN PyTorch model') - parser.add_argument("--image_src_path", default="./coco2017/", help='image of dataset') - parser.add_argument("--bin_file_path", default="./coco2017_bin/", help='Preprocessed image buffer') - parser.add_argument("--model_input_height", default=800, type=int, help='input tensor height') - parser.add_argument("--model_input_width", default=1216, type=int, help='input tensor width') - flags = parser.parse_args() - if not os.path.exists(flags.bin_file_path): - os.makedirs(flags.bin_file_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import numpy as np +import cv2 +import mmcv +import torch +import multiprocessing + +def resize(img, size): + old_h = img.shape[0] + old_w = img.shape[1] + scale_ratio = min(size[0] / old_w, size[1] / old_h) + new_w = int(np.floor(old_w * scale_ratio)) + new_h = int(np.floor(old_h * scale_ratio)) + resized_img = mmcv.imresize(img, (new_w, new_h), backend='cv2') + return resized_img + +def gen_input_bin(file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + image = mmcv.imread(os.path.join(flags.image_src_path, file), backend='cv2') + image = resize(image, (flags.model_input_width, flags.model_input_height)) + mean = np.array([123.675, 116.28, 103.53], dtype=np.float32) + std = np.array([58.395, 57.12, 57.375], dtype=np.float32) + image = mmcv.imnormalize(image, mean, std, to_rgb=True) + h = image.shape[0] + w = image.shape[1] + pad_left = (flags.model_input_width - w) // 2 + pad_top = (flags.model_input_height - h) // 2 + pad_right = flags.model_input_width - pad_left - w + pad_bottom = flags.model_input_height - pad_top - h + image = mmcv.impad(image, shape=(flags.model_input_height, flags.model_input_width), pad_val=0) + #mmcv.imwrite(image, './paded_jpg/' + file.split('.')[0] + '.jpg') + image = image.transpose(2, 0, 1) + image.tofile(os.path.join(flags.bin_file_path, file.split('.')[0] + ".bin")) + +def preprocess(src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 100] for i in range(0, 5000, 100) if files[i:i + 100] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='preprocess of MaskRCNN PyTorch model') + parser.add_argument("--image_src_path", default="./coco2017/", help='image of dataset') + parser.add_argument("--bin_file_path", default="./coco2017_bin/", help='Preprocessed image buffer') + parser.add_argument("--model_input_height", default=800, type=int, help='input tensor height') + parser.add_argument("--model_input_width", default=1216, type=int, help='input tensor width') + flags = parser.parse_args() + if not os.path.exists(flags.bin_file_path): + os.makedirs(flags.bin_file_path) preprocess(flags.image_src_path, flags.bin_file_path) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Fsaf/README.md b/ACL_PyTorch/contrib/cv/detection/Fsaf/README.md index 5e40863e926ddbec695e5de02e12127833b46c24..3752277155f21ad5467b7bb2304a5ab442f5def3 100644 --- a/ACL_PyTorch/contrib/cv/detection/Fsaf/README.md +++ b/ACL_PyTorch/contrib/cv/detection/Fsaf/README.md @@ -1,54 +1,54 @@ -# Fsaf模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - -``` -pip3.7 install -r requirements.txt -``` - 说明:PyTorch选用开源1.8.0版本 -2.获取,修改与安装开源模型代码 - -``` -git clone https://github.com/open-mmlab/mmcv -b master -git reset --hard 04daea425bcb0a104d8b4acbbc16bd31304cf168 -cd mmcv -MMCV_WITH_OPS=1 pip3.7 install -e . -cd .. -git clone https://github.com/open-mmlab/mmdetection -b master -git reset --hard 604bfe9618533949c74002a4e54f972e57ad0a7a -cd mmdetection -patch -p1 < ../fsaf.diff -pip3.7 install -r requirements/build.txt -python3.7 setup.py develop -``` -3.获取权重文件 - -[fsaf_r50_fpn_1x_coco-94ccc51f.pth](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth) - -4.数据集 - -[测试集](http://images.cocodataset.org/zips/val2017.zip):coco/val2017/ -[标签](http://images.cocodataset.org/annotations/annotations_trainval2017.zip):coco/annotations/instances_val2017.json - -5.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) - 将benchmark.x86_64或benchmark.aarch64放到当前目录 - - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 - -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets -``` - -**评测结果:** - -| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| ---- | ------------------------------------------------------------ | --------------- | -------- | ------- | -| Fsaf | [box AP:37.4%](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf) | box AP:37.1% | 8.9fps | 40.0fps | -| Fsaf | [box AP:37.4%](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf) | box AP:37.1% | 6.9fps | 40.0fps | - +# Fsaf模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + +``` +pip3.7 install -r requirements.txt +``` + 说明:PyTorch选用开源1.8.0版本 +2.获取,修改与安装开源模型代码 + +``` +git clone https://github.com/open-mmlab/mmcv -b master +git reset --hard 04daea425bcb0a104d8b4acbbc16bd31304cf168 +cd mmcv +MMCV_WITH_OPS=1 pip3.7 install -e . +cd .. +git clone https://github.com/open-mmlab/mmdetection -b master +git reset --hard 604bfe9618533949c74002a4e54f972e57ad0a7a +cd mmdetection +patch -p1 < ../fsaf.diff +pip3.7 install -r requirements/build.txt +python3.7 setup.py develop +``` +3.获取权重文件 + +[fsaf_r50_fpn_1x_coco-94ccc51f.pth](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth) + +4.数据集 + +[测试集](http://images.cocodataset.org/zips/val2017.zip):coco/val2017/ +[标签](http://images.cocodataset.org/annotations/annotations_trainval2017.zip):coco/annotations/instances_val2017.json + +5.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) + 将benchmark.x86_64或benchmark.aarch64放到当前目录 + + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 + +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets +``` + +**评测结果:** + +| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| ---- | ------------------------------------------------------------ | --------------- | -------- | ------- | +| Fsaf | [box AP:37.4%](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf) | box AP:37.1% | 8.9fps | 40.0fps | +| Fsaf | [box AP:37.4%](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf) | box AP:37.1% | 6.9fps | 40.0fps | + diff --git a/ACL_PyTorch/contrib/cv/detection/GFocalV2/LICENSE b/ACL_PyTorch/contrib/cv/detection/GFocalV2/LICENSE index df2c2f2c3e55bfbad1aebe53321a94ee5a3854bc..c8ec075d5b892f823d0b485ad4fdd01355c57b3e 100644 --- a/ACL_PyTorch/contrib/cv/detection/GFocalV2/LICENSE +++ b/ACL_PyTorch/contrib/cv/detection/GFocalV2/LICENSE @@ -1,203 +1,203 @@ -Copyright 2018-2019 Open-MMLab. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018-2019 Open-MMLab. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and +Copyright 2018-2019 Open-MMLab. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2019 Open-MMLab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/GFocalV2/README.md b/ACL_PyTorch/contrib/cv/detection/GFocalV2/README.md index 7b2c3f63d1fc4ffbe8e6217c6b92230c84a1de17..4c92474202f8af5adf3f2cf99d38404478beedc0 100644 --- a/ACL_PyTorch/contrib/cv/detection/GFocalV2/README.md +++ b/ACL_PyTorch/contrib/cv/detection/GFocalV2/README.md @@ -1,60 +1,60 @@ -# GFocalV2模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -``` -pip3.7 install -r requirements.txt -``` -说明:PyTorch选用开源1.8.0版本 - - - -2.获取,修改与安装开源模型代码 -安装mmcv -```shell -git clone https://github.com/open-mmlab/mmcv -b v1.2.7 -cd mmcv -MMCV_WITH_OPS=1 pip3.7 install -e . -cd .. -``` -获取GFocalV2代码 -``` -git clone https://github.com/implus/GFocalV2.git -b master -cd GFocalV2 -git reset --hard b7b355631daaf776e097a6e137501aa27ff7e757 -patch -p1 < ../GFocalV2.diff -python3.7 setup.py develop -cd .. -``` - -3.获取权重文件 - -[gfocalv2预训练的pth权重文件](https://drive.google.com/file/d/1wSE9-c7tcQwIDPC6Vm_yfOokdPfmYmy7/view?usp=sharing) - -4.数据集 -[coco2017](https://cocodataset.org/#download),下载其中val2017图片及其标注文件,放入服务器/root/dataset/coco/文件夹,val2017目录存放coco数据集的验证集图片,annotations目录存放coco数据集的instances_val2017.json,文件目录结构如下: -``` -root -├── dataset -│ ├── coco -│ │ ├── annotations -│ │ ├── val2017 -``` - -5.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) -将benchmark.x86_64或benchmark.aarch64放到当前目录 - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets -``` - **评测结果:** -| 模型 | 在线推理精度 | 310离线推理精度 | 基准性能 | 310性能 | -| :------: | :------: | :------: | :------: | :------: | -| GFocalV2 bs1 | mAP:41.0% | mAP:40.6% | 7.9fps | 12.071fps | - +# GFocalV2模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +``` +pip3.7 install -r requirements.txt +``` +说明:PyTorch选用开源1.8.0版本 + + + +2.获取,修改与安装开源模型代码 +安装mmcv +```shell +git clone https://github.com/open-mmlab/mmcv -b v1.2.7 +cd mmcv +MMCV_WITH_OPS=1 pip3.7 install -e . +cd .. +``` +获取GFocalV2代码 +``` +git clone https://github.com/implus/GFocalV2.git -b master +cd GFocalV2 +git reset --hard b7b355631daaf776e097a6e137501aa27ff7e757 +patch -p1 < ../GFocalV2.diff +python3.7 setup.py develop +cd .. +``` + +3.获取权重文件 + +[gfocalv2预训练的pth权重文件](https://drive.google.com/file/d/1wSE9-c7tcQwIDPC6Vm_yfOokdPfmYmy7/view?usp=sharing) + +4.数据集 +[coco2017](https://cocodataset.org/#download),下载其中val2017图片及其标注文件,放入服务器/root/dataset/coco/文件夹,val2017目录存放coco数据集的验证集图片,annotations目录存放coco数据集的instances_val2017.json,文件目录结构如下: +``` +root +├── dataset +│ ├── coco +│ │ ├── annotations +│ │ ├── val2017 +``` + +5.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) +将benchmark.x86_64或benchmark.aarch64放到当前目录 + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets +``` + **评测结果:** +| 模型 | 在线推理精度 | 310离线推理精度 | 基准性能 | 310性能 | +| :------: | :------: | :------: | :------: | :------: | +| GFocalV2 bs1 | mAP:41.0% | mAP:40.6% | 7.9fps | 12.071fps | + 备注:离线模型不支持多batch。 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/GFocalV2/get_info.py b/ACL_PyTorch/contrib/cv/detection/GFocalV2/get_info.py index b76d6739bcea5c528a031970f0e583e5b5644bd8..d5cab0450c20d502d0d15be2f9c0fceffa6a6191 100644 --- a/ACL_PyTorch/contrib/cv/detection/GFocalV2/get_info.py +++ b/ACL_PyTorch/contrib/cv/detection/GFocalV2/get_info.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/GFocalV2/gfocal_postprocess.py b/ACL_PyTorch/contrib/cv/detection/GFocalV2/gfocal_postprocess.py index 49b2e77560ba1c725db42173196d0de961be9be6..6a779abac4db61911c08eaed3c240d3477c3e955 100644 --- a/ACL_PyTorch/contrib/cv/detection/GFocalV2/gfocal_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/GFocalV2/gfocal_postprocess.py @@ -1,89 +1,89 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import argparse -import glob -import numpy as np -import cv2 -import torch -from mmdet.core import bbox2result -from mmdet.datasets import CocoDataset - - -def postprocess_bboxes(bboxes, image_size, net_input_width, net_input_height): - org_w = image_size[0] - org_h = image_size[1] - scale = min(net_input_width / org_w, net_input_height / org_h) - bboxes[:, 0] = (bboxes[:, 0]) / scale - bboxes[:, 1] = (bboxes[:, 1]) / scale - bboxes[:, 2] = (bboxes[:, 2]) / scale - bboxes[:, 3] = (bboxes[:, 3]) / scale - return bboxes - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--test_annotation", default="./origin_pictures.info") - parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0") - parser.add_argument("--net_out_num", type=int, default=3) - parser.add_argument("--net_input_width", type=int, default=1216) - parser.add_argument("--net_input_height", type=int, default=800) - parser.add_argument("--annotations_path", default="/root/datasets") - flags = parser.parse_args() - - img_size_dict = dict() - with open(flags.test_annotation)as f: - for line in f.readlines(): - temp = line.split(" ") - img_file_path = temp[1] - img_name = temp[1].split("/")[-1].split(".")[0] - img_width = int(temp[2]) - img_height = int(temp[3]) - img_size_dict[img_name] = (img_width, img_height, img_file_path) - - bin_path = flags.bin_data_path - - coco_dataset = CocoDataset(ann_file='{}/coco/annotations/instances_val2017.json'.format(flags.annotations_path), pipeline=[]) - coco_class_map = {id:name for id, name in enumerate(coco_dataset.CLASSES)} - results = [] - cnt = 0 - for ids in coco_dataset.img_ids: - cnt = cnt + 1 - bin_file = glob.glob(bin_path + '/*0' + str(ids) + '_1.bin')[0] - bin_file = bin_file[bin_file.rfind('/') + 1:] - bin_file = bin_file[:bin_file.rfind('_')] - print(cnt - 1, bin_file) - path_base = os.path.join(bin_path, bin_file) - - res_buff = [] - bbox_results = [] - cls_segms = [] - if os.path.exists(path_base + "_" + "1" + ".bin") and os.path.exists(path_base + "_" + "2" + ".bin"): - bboxes = np.fromfile(path_base + "_" + str(flags.net_out_num - 1) + ".bin", dtype="float32") - bboxes = np.reshape(bboxes, [100, 5]) - bboxes = torch.from_numpy(bboxes) - labels = np.fromfile(path_base + "_" + str(flags.net_out_num - 2) + ".bin", dtype="int64") - labels = np.reshape(labels, [100, 1]) - labels = torch.from_numpy(labels) - - img_shape = (flags.net_input_height, flags.net_input_width) - bboxes = postprocess_bboxes(bboxes, img_size_dict[bin_file], flags.net_input_width, flags.net_input_height) - bbox_results = [bbox2result(bboxes, labels[:, 0], 80)] - else: - print("[ERROR] file not exist", path_base + "_" + str(1) + ".bin",path_base + "_" + str(2) + ".bin") - - result = bbox_results - results.extend(result) - - eval_results = coco_dataset.evaluate(results, metric=['bbox', ], classwise=True) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import argparse +import glob +import numpy as np +import cv2 +import torch +from mmdet.core import bbox2result +from mmdet.datasets import CocoDataset + + +def postprocess_bboxes(bboxes, image_size, net_input_width, net_input_height): + org_w = image_size[0] + org_h = image_size[1] + scale = min(net_input_width / org_w, net_input_height / org_h) + bboxes[:, 0] = (bboxes[:, 0]) / scale + bboxes[:, 1] = (bboxes[:, 1]) / scale + bboxes[:, 2] = (bboxes[:, 2]) / scale + bboxes[:, 3] = (bboxes[:, 3]) / scale + return bboxes + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--test_annotation", default="./origin_pictures.info") + parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0") + parser.add_argument("--net_out_num", type=int, default=3) + parser.add_argument("--net_input_width", type=int, default=1216) + parser.add_argument("--net_input_height", type=int, default=800) + parser.add_argument("--annotations_path", default="/root/datasets") + flags = parser.parse_args() + + img_size_dict = dict() + with open(flags.test_annotation)as f: + for line in f.readlines(): + temp = line.split(" ") + img_file_path = temp[1] + img_name = temp[1].split("/")[-1].split(".")[0] + img_width = int(temp[2]) + img_height = int(temp[3]) + img_size_dict[img_name] = (img_width, img_height, img_file_path) + + bin_path = flags.bin_data_path + + coco_dataset = CocoDataset(ann_file='{}/coco/annotations/instances_val2017.json'.format(flags.annotations_path), pipeline=[]) + coco_class_map = {id:name for id, name in enumerate(coco_dataset.CLASSES)} + results = [] + cnt = 0 + for ids in coco_dataset.img_ids: + cnt = cnt + 1 + bin_file = glob.glob(bin_path + '/*0' + str(ids) + '_1.bin')[0] + bin_file = bin_file[bin_file.rfind('/') + 1:] + bin_file = bin_file[:bin_file.rfind('_')] + print(cnt - 1, bin_file) + path_base = os.path.join(bin_path, bin_file) + + res_buff = [] + bbox_results = [] + cls_segms = [] + if os.path.exists(path_base + "_" + "1" + ".bin") and os.path.exists(path_base + "_" + "2" + ".bin"): + bboxes = np.fromfile(path_base + "_" + str(flags.net_out_num - 1) + ".bin", dtype="float32") + bboxes = np.reshape(bboxes, [100, 5]) + bboxes = torch.from_numpy(bboxes) + labels = np.fromfile(path_base + "_" + str(flags.net_out_num - 2) + ".bin", dtype="int64") + labels = np.reshape(labels, [100, 1]) + labels = torch.from_numpy(labels) + + img_shape = (flags.net_input_height, flags.net_input_width) + bboxes = postprocess_bboxes(bboxes, img_size_dict[bin_file], flags.net_input_width, flags.net_input_height) + bbox_results = [bbox2result(bboxes, labels[:, 0], 80)] + else: + print("[ERROR] file not exist", path_base + "_" + str(1) + ".bin",path_base + "_" + str(2) + ".bin") + + result = bbox_results + results.extend(result) + + eval_results = coco_dataset.evaluate(results, metric=['bbox', ], classwise=True) diff --git a/ACL_PyTorch/contrib/cv/detection/GFocalV2/gfocal_preprocess.py b/ACL_PyTorch/contrib/cv/detection/GFocalV2/gfocal_preprocess.py index 8192a5338abae754193ea8826a63cedf09555b8f..373e2362accb87644c0b7433e8ac02bf887ce6b9 100644 --- a/ACL_PyTorch/contrib/cv/detection/GFocalV2/gfocal_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/GFocalV2/gfocal_preprocess.py @@ -1,73 +1,73 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import argparse -import numpy as np -import cv2 -import mmcv -import torch -import multiprocessing - -def resize(img, size): - old_h = img.shape[0] - old_w = img.shape[1] - scale_ratio = min(size[0] / old_w, size[1] / old_h) - new_w = int(np.floor(old_w * scale_ratio)) - new_h = int(np.floor(old_h * scale_ratio)) - resized_img = mmcv.imresize(img, (new_w, new_h), backend='cv2') - return resized_img - -def gen_input_bin(file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - image = mmcv.imread(os.path.join(flags.image_src_path, file), backend='cv2') - image = resize(image, (flags.model_input_width, flags.model_input_height)) - mean = np.array([123.675, 116.28, 103.53], dtype=np.float32) - std = np.array([58.395, 57.12, 57.375], dtype=np.float32) - image = mmcv.imnormalize(image, mean, std, to_rgb=True) - h = image.shape[0] - w = image.shape[1] - pad_left = (flags.model_input_width - w) // 2 - pad_top = (flags.model_input_height - h) // 2 - pad_right = flags.model_input_width - pad_left - w - pad_bottom = flags.model_input_height - pad_top - h - image = mmcv.impad(image, shape=(flags.model_input_height, flags.model_input_width), pad_val=0) - #mmcv.imwrite(image, './paded_jpg/' + file.split('.')[0] + '.jpg') - image = image.transpose(2, 0, 1) - image.tofile(os.path.join(flags.bin_file_path, file.split('.')[0] + ".bin")) - -def preprocess(src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 100] for i in range(0, 5000, 100) if files[i:i + 100] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='preprocess of MaskRCNN PyTorch model') - parser.add_argument("--image_src_path", default="./coco2017/", help='image of dataset') - parser.add_argument("--bin_file_path", default="./coco2017_bin/", help='Preprocessed image buffer') - parser.add_argument("--model_input_height", default=800, type=int, help='input tensor height') - parser.add_argument("--model_input_width", default=1216, type=int, help='input tensor width') - flags = parser.parse_args() - if not os.path.exists(flags.bin_file_path): - os.makedirs(flags.bin_file_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import numpy as np +import cv2 +import mmcv +import torch +import multiprocessing + +def resize(img, size): + old_h = img.shape[0] + old_w = img.shape[1] + scale_ratio = min(size[0] / old_w, size[1] / old_h) + new_w = int(np.floor(old_w * scale_ratio)) + new_h = int(np.floor(old_h * scale_ratio)) + resized_img = mmcv.imresize(img, (new_w, new_h), backend='cv2') + return resized_img + +def gen_input_bin(file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + image = mmcv.imread(os.path.join(flags.image_src_path, file), backend='cv2') + image = resize(image, (flags.model_input_width, flags.model_input_height)) + mean = np.array([123.675, 116.28, 103.53], dtype=np.float32) + std = np.array([58.395, 57.12, 57.375], dtype=np.float32) + image = mmcv.imnormalize(image, mean, std, to_rgb=True) + h = image.shape[0] + w = image.shape[1] + pad_left = (flags.model_input_width - w) // 2 + pad_top = (flags.model_input_height - h) // 2 + pad_right = flags.model_input_width - pad_left - w + pad_bottom = flags.model_input_height - pad_top - h + image = mmcv.impad(image, shape=(flags.model_input_height, flags.model_input_width), pad_val=0) + #mmcv.imwrite(image, './paded_jpg/' + file.split('.')[0] + '.jpg') + image = image.transpose(2, 0, 1) + image.tofile(os.path.join(flags.bin_file_path, file.split('.')[0] + ".bin")) + +def preprocess(src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 100] for i in range(0, 5000, 100) if files[i:i + 100] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='preprocess of MaskRCNN PyTorch model') + parser.add_argument("--image_src_path", default="./coco2017/", help='image of dataset') + parser.add_argument("--bin_file_path", default="./coco2017_bin/", help='Preprocessed image buffer') + parser.add_argument("--model_input_height", default=800, type=int, help='input tensor height') + parser.add_argument("--model_input_width", default=1216, type=int, help='input tensor width') + flags = parser.parse_args() + if not os.path.exists(flags.bin_file_path): + os.makedirs(flags.bin_file_path) preprocess(flags.image_src_path, flags.bin_file_path) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/GFocalV2/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/detection/GFocalV2/modelzoo_level.txt index bdc5dd889d3e2c5450f8df13820f5d359f1a7830..5a90c7c76ee637d956ad5517b60434e8838a0ce6 100644 --- a/ACL_PyTorch/contrib/cv/detection/GFocalV2/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/detection/GFocalV2/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus: PERFECT \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/GFocalV2/requirements.txt b/ACL_PyTorch/contrib/cv/detection/GFocalV2/requirements.txt index 7609cac2ce76e9b4064bd75d9aa284692c4cd1b8..689f4900b87ed60a3ee74f326e95c751e36b6944 100644 --- a/ACL_PyTorch/contrib/cv/detection/GFocalV2/requirements.txt +++ b/ACL_PyTorch/contrib/cv/detection/GFocalV2/requirements.txt @@ -1,5 +1,5 @@ -torch == 1.8.0 -torchvision == 0.9.0 -onnx == 1.9.0 -numpy == 1.19.4 +torch == 1.8.0 +torchvision == 0.9.0 +onnx == 1.9.0 +numpy == 1.19.4 opencv-python == 4.4.0.46 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/GFocalV2/test/parse.py b/ACL_PyTorch/contrib/cv/detection/GFocalV2/test/parse.py index b9c74f41d7848e1250356f14472b237a18bb3489..82af69cd183218c3263723c20b652b3f7ec2bc27 100644 --- a/ACL_PyTorch/contrib/cv/detection/GFocalV2/test/parse.py +++ b/ACL_PyTorch/contrib/cv/detection/GFocalV2/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/M2Det/LICENSE b/ACL_PyTorch/contrib/cv/detection/M2Det/LICENSE index 8904c8516082056802ee732a4213ceab8c4a93af..5f7aa69fea22dade3f519868400025de434ae8ca 100644 --- a/ACL_PyTorch/contrib/cv/detection/M2Det/LICENSE +++ b/ACL_PyTorch/contrib/cv/detection/M2Det/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/M2Det/M2Det.patch b/ACL_PyTorch/contrib/cv/detection/M2Det/M2Det.patch index c6d220a060b1615014304df2af943e0f83672e92..d3562619d81843a527cd080c977b771c440d479e 100644 --- a/ACL_PyTorch/contrib/cv/detection/M2Det/M2Det.patch +++ b/ACL_PyTorch/contrib/cv/detection/M2Det/M2Det.patch @@ -3,14 +3,14 @@ index 01d21b1..df49100 100644 --- a/configs/m2det512_vgg.py +++ b/configs/m2det512_vgg.py @@ -2,7 +2,7 @@ model = dict( - type = 'm2det', - input_size = 512, - init_net = True, -- pretrained = 'weights/vgg16_reducedfc.pth', -+ pretrained = 'M2Det/weights/vgg16_reducedfc.pth', - m2det_config = dict( - backbone = 'vgg16', - net_family = 'vgg', # vgg includes ['vgg16','vgg19'], res includes ['resnetxxx','resnextxxx'] + type = 'm2det', + input_size = 512, + init_net = True, +- pretrained = 'weights/vgg16_reducedfc.pth', ++ pretrained = 'M2Det/weights/vgg16_reducedfc.pth', + m2det_config = dict( + backbone = 'vgg16', + net_family = 'vgg', # vgg includes ['vgg16','vgg19'], res includes ['resnetxxx','resnextxxx'] diff --git a/data/coco.py b/data/coco.py index 3784d65..9d07625 100644 --- a/data/coco.py diff --git a/ACL_PyTorch/contrib/cv/detection/M2Det/M2Det_preprocess.py b/ACL_PyTorch/contrib/cv/detection/M2Det/M2Det_preprocess.py index d0f0834ae64a95c2a2796bf3f8b5692af37ac490..c887892e250f27418fdc45c5e65d42aede99a716 100644 --- a/ACL_PyTorch/contrib/cv/detection/M2Det/M2Det_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/M2Det/M2Det_preprocess.py @@ -1,58 +1,58 @@ -''' -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' - -import os -import sys -sys.path.insert(0, './M2Det') -import warnings -warnings.filterwarnings('ignore') -import torch -import argparse -import numpy as np -from layers.functions import Detect, PriorBox -from data import BaseTransform -from configs.CC import Config -from utils.core import get_dataloader, print_info - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='M2Det Preprocess') - parser.add_argument('-c', '--config', default='../configs/m2det512_vgg.py', type=str) - parser.add_argument('-d', '--dataset', default='COCO', help='VOC or COCO version') - parser.add_argument('--test', action='store_true', help='to submit a test file') - parser.add_argument("--save_folder", default="./pre_dataset") - parser.add_argument('--COCO_imgs', default="~/data/coco/images", help='COCO images root') - parser.add_argument('--COCO_anns', default="~/data/coco/annotations", help='COCO annotations root') - args = parser.parse_args() - - cfg = Config.fromfile(args.config) - if not os.path.exists(args.save_folder): - os.mkdir(args.save_folder) - - _set = 'eval_sets' if not args.test else 'test_sets' - testset = get_dataloader(args, cfg, args.dataset, _set) - - _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) - num_images = len(testset) - print_info('=> Total {} images to test.'.format(num_images), ['yellow', 'bold']) - - for i in range(num_images): - input_image, img_id= testset.pull_image(i) - img_name = img_id.split('/')[-1] - print(img_name, "===", i) - input_tensor = _preprocess(input_image).unsqueeze(0) - img = np.array(input_tensor).astype(np.float32) - img.tofile(os.path.join(args.save_folder, img_name.split('.')[0] + ".bin")) - +''' +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +''' + +import os +import sys +sys.path.insert(0, './M2Det') +import warnings +warnings.filterwarnings('ignore') +import torch +import argparse +import numpy as np +from layers.functions import Detect, PriorBox +from data import BaseTransform +from configs.CC import Config +from utils.core import get_dataloader, print_info + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='M2Det Preprocess') + parser.add_argument('-c', '--config', default='../configs/m2det512_vgg.py', type=str) + parser.add_argument('-d', '--dataset', default='COCO', help='VOC or COCO version') + parser.add_argument('--test', action='store_true', help='to submit a test file') + parser.add_argument("--save_folder", default="./pre_dataset") + parser.add_argument('--COCO_imgs', default="~/data/coco/images", help='COCO images root') + parser.add_argument('--COCO_anns', default="~/data/coco/annotations", help='COCO annotations root') + args = parser.parse_args() + + cfg = Config.fromfile(args.config) + if not os.path.exists(args.save_folder): + os.mkdir(args.save_folder) + + _set = 'eval_sets' if not args.test else 'test_sets' + testset = get_dataloader(args, cfg, args.dataset, _set) + + _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) + num_images = len(testset) + print_info('=> Total {} images to test.'.format(num_images), ['yellow', 'bold']) + + for i in range(num_images): + input_image, img_id= testset.pull_image(i) + img_name = img_id.split('/')[-1] + print(img_name, "===", i) + input_tensor = _preprocess(input_image).unsqueeze(0) + img = np.array(input_tensor).astype(np.float32) + img.tofile(os.path.join(args.save_folder, img_name.split('.')[0] + ".bin")) + diff --git a/ACL_PyTorch/contrib/cv/detection/M2Det/M2Det_pth2onnx.py b/ACL_PyTorch/contrib/cv/detection/M2Det/M2Det_pth2onnx.py index fb998a894bf109a1fb780aeeb83e7892775be043..b558597d06e2d17d9e560b4995a3530b22d6925c 100644 --- a/ACL_PyTorch/contrib/cv/detection/M2Det/M2Det_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/M2Det/M2Det_pth2onnx.py @@ -1,88 +1,88 @@ -''' -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' -import sys -sys.path.insert(0, './M2Det') -import torch -import torch.onnx -from collections import OrderedDict -import argparse -print(sys.path) -from m2det import build_net -import torch.utils.data as data -import torch.backends.cudnn as cudnn -from configs.CC import Config -from utils.core import init_net - - - -parser = argparse.ArgumentParser(description='pth2onnx') -parser.add_argument('-c', '--config', default='M2Det/configs/m2det512_vgg16.py') -parser.add_argument('-d', '--dataset', default='COCO', help='VOC or COCO dataset') -parser.add_argument('--resume_net', default=None, help='resume net for retraining') -parser.add_argument('--resume_epoch', default=0, type=int, help='resume iter for retraining') -parser.add_argument('-pth', '--pth_path', default='weights/m2det512_vgg.pth') -parser.add_argument('-onnx', '--onnx_path', default='m2det512.onnx') -Args = parser.parse_args() - -def proc_nodes_module(checkpoint): - ''' - Args: - checkpoint: Network parameters. - Returns: - Create a new dictionary, remove the unnecessary key value "module" - ''' - new_state_dict = OrderedDict() - for k, v in checkpoint.items(): - if k[0:7] == "module.": - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - - -def convert(args, cfg): - ''' - Args: - args.pth_path: Weight file path - args.onnx_path: onnx file path - cfg: configs - ''' - print('pth:{}'.format(args.pth_path)) - print('onnx:{}'.format(args.onnx_path)) - net = build_net('test', - size = cfg.model.input_size, # Only 320, 512, 704 and 800 are supported - config = cfg.model.m2det_config) - init_net(net, cfg, args.resume_net) - model = net - - checkpoint = torch.load(args.pth_path, map_location='cpu') - checkpoint = proc_nodes_module(checkpoint) - model.load_state_dict(checkpoint) - - model.eval() - input_names = ["image"] - output_names = ["scores", "boxes"] - #dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dynamic_axes = {'image':{0:'-1'}, 'scores':{0:'-1'}, 'boxes':{0:'-1'}} - dummy_input = torch.randn(1, 3, 512, 512) - torch.onnx.export(model, dummy_input, args.onnx_path, input_names = input_names, - dynamic_axes = dynamic_axes, output_names = output_names, - verbose=True, opset_version=11) - -if __name__ == "__main__": - Cfg = Config.fromfile(Args.config) +''' +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +''' +import sys +sys.path.insert(0, './M2Det') +import torch +import torch.onnx +from collections import OrderedDict +import argparse +print(sys.path) +from m2det import build_net +import torch.utils.data as data +import torch.backends.cudnn as cudnn +from configs.CC import Config +from utils.core import init_net + + + +parser = argparse.ArgumentParser(description='pth2onnx') +parser.add_argument('-c', '--config', default='M2Det/configs/m2det512_vgg16.py') +parser.add_argument('-d', '--dataset', default='COCO', help='VOC or COCO dataset') +parser.add_argument('--resume_net', default=None, help='resume net for retraining') +parser.add_argument('--resume_epoch', default=0, type=int, help='resume iter for retraining') +parser.add_argument('-pth', '--pth_path', default='weights/m2det512_vgg.pth') +parser.add_argument('-onnx', '--onnx_path', default='m2det512.onnx') +Args = parser.parse_args() + +def proc_nodes_module(checkpoint): + ''' + Args: + checkpoint: Network parameters. + Returns: + Create a new dictionary, remove the unnecessary key value "module" + ''' + new_state_dict = OrderedDict() + for k, v in checkpoint.items(): + if k[0:7] == "module.": + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + + +def convert(args, cfg): + ''' + Args: + args.pth_path: Weight file path + args.onnx_path: onnx file path + cfg: configs + ''' + print('pth:{}'.format(args.pth_path)) + print('onnx:{}'.format(args.onnx_path)) + net = build_net('test', + size = cfg.model.input_size, # Only 320, 512, 704 and 800 are supported + config = cfg.model.m2det_config) + init_net(net, cfg, args.resume_net) + model = net + + checkpoint = torch.load(args.pth_path, map_location='cpu') + checkpoint = proc_nodes_module(checkpoint) + model.load_state_dict(checkpoint) + + model.eval() + input_names = ["image"] + output_names = ["scores", "boxes"] + #dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dynamic_axes = {'image':{0:'-1'}, 'scores':{0:'-1'}, 'boxes':{0:'-1'}} + dummy_input = torch.randn(1, 3, 512, 512) + torch.onnx.export(model, dummy_input, args.onnx_path, input_names = input_names, + dynamic_axes = dynamic_axes, output_names = output_names, + verbose=True, opset_version=11) + +if __name__ == "__main__": + Cfg = Config.fromfile(Args.config) convert(Args, Cfg) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/M2Det/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/detection/M2Det/gen_dataset_info.py index 6469c60543cf783aa29dad98ca2fef563f467414..5f38ef84a7a1de0aec3d5db61afb7690aaef6e72 100644 --- a/ACL_PyTorch/contrib/cv/detection/M2Det/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/detection/M2Det/gen_dataset_info.py @@ -1,67 +1,67 @@ -''' -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(fpath, info_n, width, height): - ''' - Describe - ''' - bin_images = glob(os.path.join(fpath, '*.bin')) - with open(info_n, 'w') as f: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - f.write(content) - f.write('\n') - - -def get_jpg_info(fpath, info_n): - ''' - Describe - ''' - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(fpath, '*.' + extension))) - with open(info_n, 'w') as f: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - f.write(content) - f.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - Width = sys.argv[4] - Height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, Width, Height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +''' +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +''' +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(fpath, info_n, width, height): + ''' + Describe + ''' + bin_images = glob(os.path.join(fpath, '*.bin')) + with open(info_n, 'w') as f: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + f.write(content) + f.write('\n') + + +def get_jpg_info(fpath, info_n): + ''' + Describe + ''' + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(fpath, '*.' + extension))) + with open(info_n, 'w') as f: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + f.write(content) + f.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + Width = sys.argv[4] + Height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, Width, Height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/M2Det/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/detection/M2Det/modelzoo_level.txt index 549f8ae7ed9eb5026c1162667887be52400d428c..a8de6fe278d971859535cbcbcdef13790c657615 100644 --- a/ACL_PyTorch/contrib/cv/detection/M2Det/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/detection/M2Det/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK -PerfStatus:Perfect +FuncStatus:OK +PrecisionStatus:OK +PerfStatus:Perfect diff --git a/ACL_PyTorch/contrib/cv/detection/M2Det/requirements.txt b/ACL_PyTorch/contrib/cv/detection/M2Det/requirements.txt index 69bd8849d492fb0074a6efe6c77792dc2257bdcf..af07e2c553ab82c93b2f884d1fb6b75d2897e525 100644 --- a/ACL_PyTorch/contrib/cv/detection/M2Det/requirements.txt +++ b/ACL_PyTorch/contrib/cv/detection/M2Det/requirements.txt @@ -1,16 +1,16 @@ -torch == 1.8.1 -torchvision == 0.9.1 -onnx == 1.7.0 -numpy == 1.18.5 -Cython == 0.29.24 -opencv-python == 4.5.3.56 -setuptools == 41.2.0 -matplotlib == 2.2.5 -absl-py == 0.13.0 -addict == 2.4.0 -alabaster == 0.7.12 -antlr4-python3-runtime == 4.8 -appdirs == 1.4.4 -asn1crypto == 1.4.0 -astroid == 2.7.3 +torch == 1.8.1 +torchvision == 0.9.1 +onnx == 1.7.0 +numpy == 1.18.5 +Cython == 0.29.24 +opencv-python == 4.5.3.56 +setuptools == 41.2.0 +matplotlib == 2.2.5 +absl-py == 0.13.0 +addict == 2.4.0 +alabaster == 0.7.12 +antlr4-python3-runtime == 4.8 +appdirs == 1.4.4 +asn1crypto == 1.4.0 +astroid == 2.7.3 astropy == 4.3.1 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/NAS_FPN/README.md b/ACL_PyTorch/contrib/cv/detection/NAS_FPN/README.md index 461df59cd3d0ace18de6d08d78fcf534d9a8558a..42191f22e87a6ed7e999a335beaba1670211b971 100644 --- a/ACL_PyTorch/contrib/cv/detection/NAS_FPN/README.md +++ b/ACL_PyTorch/contrib/cv/detection/NAS_FPN/README.md @@ -1,57 +1,57 @@ -# NAS-FPN模型PyTorch离线推理指导 - -## 1 环境准备 - -1. 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - - ``` - pip install -r requirements.txt - ``` - -2. 获取,修改与安装开源模型代码 - - ``` - git clone https://github.com/open-mmlab/mmdetection.git - cd mmdetection - git reset a21eb25535f31634cef332b09fc27d28956fb24b --hard - pip install -v -e . - patch -p1 < ../NAS_FPN.patch - cd .. - ``` - - 利用提供的change文件夹中的patch文件,完成补丁操作,命令参考如下示例,请用户根据安装包位置自行修改: - ``` - cd change - patch -p0 /usr/local/python3.7.5/lib/python3.7/site-packages/mmcv/ops/deform_conv.py deform_conv.patch - patch -p0 /usr/local/python3.7.5/lib/python3.7/site-packages/mmcv/ops/merge_cells.py merge_cells.patch - ``` - - -3. 获取权重文件 - - 从[LINK](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fpn)下载nas_fpn模型权重文件 - -4. 数据集 - 本模型使用coco2017的验证集验证 - -5. [获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) -将benchmark.x86_64或benchmark.aarch64放到当前目录 - - - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=./ -``` - -**评测结果:** - -| 模型 | 官网pth精度 | 310离线推理精度 | gpu性能 | 310性能 | -| :---------------: | :---------: | :-------------: | :-----: | :------: | -| NASFPN bs1 | map:0.405 | map:0.404 | 12.7 task/s | 24.750fps | - - - +# NAS-FPN模型PyTorch离线推理指导 + +## 1 环境准备 + +1. 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + + ``` + pip install -r requirements.txt + ``` + +2. 获取,修改与安装开源模型代码 + + ``` + git clone https://github.com/open-mmlab/mmdetection.git + cd mmdetection + git reset a21eb25535f31634cef332b09fc27d28956fb24b --hard + pip install -v -e . + patch -p1 < ../NAS_FPN.patch + cd .. + ``` + + 利用提供的change文件夹中的patch文件,完成补丁操作,命令参考如下示例,请用户根据安装包位置自行修改: + ``` + cd change + patch -p0 /usr/local/python3.7.5/lib/python3.7/site-packages/mmcv/ops/deform_conv.py deform_conv.patch + patch -p0 /usr/local/python3.7.5/lib/python3.7/site-packages/mmcv/ops/merge_cells.py merge_cells.patch + ``` + + +3. 获取权重文件 + + 从[LINK](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fpn)下载nas_fpn模型权重文件 + +4. 数据集 + 本模型使用coco2017的验证集验证 + +5. [获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) +将benchmark.x86_64或benchmark.aarch64放到当前目录 + + + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=./ +``` + +**评测结果:** + +| 模型 | 官网pth精度 | 310离线推理精度 | gpu性能 | 310性能 | +| :---------------: | :---------: | :-------------: | :-----: | :------: | +| NASFPN bs1 | map:0.405 | map:0.404 | 12.7 task/s | 24.750fps | + + + diff --git a/ACL_PyTorch/contrib/cv/detection/NAS_FPN/coco_eval.py b/ACL_PyTorch/contrib/cv/detection/NAS_FPN/coco_eval.py index 00c927f23d2031eae523b428361a7b04dccfc0b6..3eb70b7418ffca14458e5fa015eb2822e4a60a22 100644 --- a/ACL_PyTorch/contrib/cv/detection/NAS_FPN/coco_eval.py +++ b/ACL_PyTorch/contrib/cv/detection/NAS_FPN/coco_eval.py @@ -1,92 +1,92 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import numpy as np -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval - -CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', - 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') - -def coco_evaluation(annotation_json, result_json): - cocoGt = COCO(annotation_json) - cocoDt = cocoGt.loadRes(result_json) - iou_thrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) - iou_type = 'bbox' - - cocoEval = COCOeval(cocoGt, cocoDt, iou_type) - cocoEval.params.catIds = cocoGt.get_cat_ids(cat_names=CLASSES) - cocoEval.params.imgIds = cocoGt.get_img_ids() - cocoEval.params.maxDets = [100, 300, 1000] # proposal number for evaluating recalls/mAPs. - cocoEval.params.iouThrs = iou_thrs - - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - - # mapping of cocoEval.stats - coco_metric_names = { - 'mAP': 0, - 'mAP_50': 1, - 'mAP_75': 2, - 'mAP_s': 3, - 'mAP_m': 4, - 'mAP_l': 5, - 'AR@100': 6, - 'AR@300': 7, - 'AR@1000': 8, - 'AR_s@1000': 9, - 'AR_m@1000': 10, - 'AR_l@1000': 11 - } - - metric_items = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l'] - eval_results = {} - - for metric_item in metric_items: - key = f'bbox_{metric_item}' - val = float( - f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' - ) - eval_results[key] = val - ap = cocoEval.stats[:6] - eval_results['bbox_mAP_copypaste'] = ( - f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' - f'{ap[4]:.3f} {ap[5]:.3f}') - - return eval_results - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--ground_truth", default="instances_val2017.json") - parser.add_argument("--detection_result", default="coco_detection_result.json") - args = parser.parse_args() - result = coco_evaluation(args.ground_truth, args.detection_result) - print(result) - with open('./coco_detection_result.txt', 'w') as f: - for key, value in result.items(): +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import numpy as np +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', + 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') + +def coco_evaluation(annotation_json, result_json): + cocoGt = COCO(annotation_json) + cocoDt = cocoGt.loadRes(result_json) + iou_thrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) + iou_type = 'bbox' + + cocoEval = COCOeval(cocoGt, cocoDt, iou_type) + cocoEval.params.catIds = cocoGt.get_cat_ids(cat_names=CLASSES) + cocoEval.params.imgIds = cocoGt.get_img_ids() + cocoEval.params.maxDets = [100, 300, 1000] # proposal number for evaluating recalls/mAPs. + cocoEval.params.iouThrs = iou_thrs + + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + # mapping of cocoEval.stats + coco_metric_names = { + 'mAP': 0, + 'mAP_50': 1, + 'mAP_75': 2, + 'mAP_s': 3, + 'mAP_m': 4, + 'mAP_l': 5, + 'AR@100': 6, + 'AR@300': 7, + 'AR@1000': 8, + 'AR_s@1000': 9, + 'AR_m@1000': 10, + 'AR_l@1000': 11 + } + + metric_items = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l'] + eval_results = {} + + for metric_item in metric_items: + key = f'bbox_{metric_item}' + val = float( + f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' + ) + eval_results[key] = val + ap = cocoEval.stats[:6] + eval_results['bbox_mAP_copypaste'] = ( + f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' + f'{ap[4]:.3f} {ap[5]:.3f}') + + return eval_results + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--ground_truth", default="instances_val2017.json") + parser.add_argument("--detection_result", default="coco_detection_result.json") + args = parser.parse_args() + result = coco_evaluation(args.ground_truth, args.detection_result) + print(result) + with open('./coco_detection_result.txt', 'w') as f: + for key, value in result.items(): f.write(key + ': ' + str(value) + '\n') \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/NAS_FPN/get_info.py b/ACL_PyTorch/contrib/cv/detection/NAS_FPN/get_info.py index 8bf8552914c761ac9da2e1b234bd9df083051251..f31faf1bc9c7c53cd80d2a0f043364eefa8631be 100644 --- a/ACL_PyTorch/contrib/cv/detection/NAS_FPN/get_info.py +++ b/ACL_PyTorch/contrib/cv/detection/NAS_FPN/get_info.py @@ -1,59 +1,59 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/NAS_FPN/mmdetection_coco_postprocess.py b/ACL_PyTorch/contrib/cv/detection/NAS_FPN/mmdetection_coco_postprocess.py index cf9a8024efcae388546c99da291b94c87ff00580..1b64c8636623feadfb645907be57ecfb540913e6 100644 --- a/ACL_PyTorch/contrib/cv/detection/NAS_FPN/mmdetection_coco_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/NAS_FPN/mmdetection_coco_postprocess.py @@ -1,157 +1,157 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import argparse -import cv2 - -CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', - 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] - -def coco_postprocess(bbox: np.ndarray, image_size, - net_input_width, net_input_height): - """ - This function is postprocessing for FasterRCNN output. - - Before calling this function, reshape the raw output of FasterRCNN to - following form - numpy.ndarray: - [x, y, width, height, confidence, probability of 80 classes] - shape: (100,) - The postprocessing restore the bounding rectangles of FasterRCNN output - to origin scale and filter with non-maximum suppression. - - :param bbox: a numpy array of the FasterRCNN output - :param image_path: a string of image path - :return: three list for best bound, class and score - """ - w = image_size[0] - h = image_size[1] - scale = min(net_input_width / w, net_input_height / h) - - pad_w = net_input_width - w * scale - pad_h = net_input_height - h * scale - pad_left = pad_w // 2 - pad_top = pad_h // 2 - - # cal predict box on the image src - pbox = bbox - pbox[:, 0] = (bbox[:, 0] - pad_left) / scale - pbox[:, 1] = (bbox[:, 1] - pad_top) / scale - pbox[:, 2] = (bbox[:, 2] - pad_left) / scale - pbox[:, 3] = (bbox[:, 3] - pad_top) / scale - - # make pbboxes value in valid range - pbox[:, 0] = np.maximum(pbox[:, 0], 0) - pbox[:, 1] = np.maximum(pbox[:, 1], 0) - pbox[:, 2] = np.minimum(pbox[:, 2], w) - pbox[:, 3] = np.minimum(pbox[:, 3], h) - return pbox - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0") - parser.add_argument("--test_annotation", default="./coco2017_jpg.info") - parser.add_argument("--det_results_path", default="./detection-results") - parser.add_argument("--img_path", default="./val2017/") - parser.add_argument("--net_out_num", default=2) - parser.add_argument("--net_input_width", default=640) - parser.add_argument("--net_input_height", default=640) - parser.add_argument("--prob_thres", default=0.05) - parser.add_argument("--ifShowDetObj", action="store_true", help="if input the para means True, neither False.") - flags = parser.parse_args() - print(flags.ifShowDetObj, type(flags.ifShowDetObj)) - # generate dict according to annotation file for query resolution - # load width and height of input images - img_size_dict = dict() - with open(flags.test_annotation)as f: - for line in f.readlines(): - temp = line.split(" ") - img_file_path = temp[1] - img_name = temp[1].split("/")[-1].split(".")[0] - img_width = int(temp[2]) - img_height = int(temp[3]) - img_size_dict[img_name] = (img_width, img_height, img_file_path) - - # read bin file for generate predict result - bin_path = flags.bin_data_path - det_results_path = flags.det_results_path - img_path = flags.img_path - os.makedirs(det_results_path, exist_ok=True) - total_img = set([name[:name.rfind('_')] - for name in os.listdir(bin_path) if "bin" in name]) - for bin_file in sorted(total_img): - path_base = os.path.join(bin_path, bin_file) - # load all detected output tensor - res_buff = [] - for num in range(1, flags.net_out_num + 1): - if os.path.exists(path_base + "_" + str(num) + ".bin"): - if num == 1: - buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="float32") - buf = np.reshape(buf, [100, 5]) - elif num == 2: - buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="int64") - buf = np.reshape(buf, [100, 1]) - res_buff.append(buf) - else: - print("[ERROR] file not exist", path_base + "_" + str(num) + ".bin") - res_tensor = np.concatenate(res_buff, axis=1) - current_img_size = img_size_dict[bin_file] - print("[TEST]---------------------------concat{} imgsize{}".format(len(res_tensor), current_img_size)) - predbox = coco_postprocess(res_tensor, current_img_size, flags.net_input_width, flags.net_input_height) - - if flags.ifShowDetObj == True: - pic = os.path.join(img_path, bin_file +'.jpg') - imgCur = cv2.imread(pic) - - det_results_str = '' - for idx, class_ind in enumerate(predbox[:,5]): - if float(predbox[idx][4]) < float(flags.prob_thres): - continue - # skip negative class index - if class_ind < 0 or class_ind > 80: - continue - - class_name = CLASSES[int(class_ind)] - det_results_str += "{} {} {} {} {} {}\n".format(class_name, str(predbox[idx][4]), predbox[idx][0], - predbox[idx][1], predbox[idx][2], predbox[idx][3]) - if flags.ifShowDetObj == True: - imgCur=cv2.rectangle(imgCur, (int(predbox[idx][0]), int(predbox[idx][1])), - (int(predbox[idx][2]), int(predbox[idx][3])), (0,255,0), 1) - imgCur = cv2.putText(imgCur, class_name+'|'+str(predbox[idx][4]), - (int(predbox[idx][0]), int(predbox[idx][1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) - # 图像,文字内容, 坐标 ,字体,大小,颜色,字体厚度 - - if flags.ifShowDetObj == True: - print(os.path.join(det_results_path, bin_file +'.jpg')) - cv2.imwrite(os.path.join(det_results_path, bin_file +'.jpg'), imgCur, [int(cv2.IMWRITE_JPEG_QUALITY),70]) - - det_results_file = os.path.join(det_results_path, bin_file + ".txt") - with open(det_results_file, "w") as detf: - detf.write(det_results_str) - print(det_results_str) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import argparse +import cv2 + +CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', + 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] + +def coco_postprocess(bbox: np.ndarray, image_size, + net_input_width, net_input_height): + """ + This function is postprocessing for FasterRCNN output. + + Before calling this function, reshape the raw output of FasterRCNN to + following form + numpy.ndarray: + [x, y, width, height, confidence, probability of 80 classes] + shape: (100,) + The postprocessing restore the bounding rectangles of FasterRCNN output + to origin scale and filter with non-maximum suppression. + + :param bbox: a numpy array of the FasterRCNN output + :param image_path: a string of image path + :return: three list for best bound, class and score + """ + w = image_size[0] + h = image_size[1] + scale = min(net_input_width / w, net_input_height / h) + + pad_w = net_input_width - w * scale + pad_h = net_input_height - h * scale + pad_left = pad_w // 2 + pad_top = pad_h // 2 + + # cal predict box on the image src + pbox = bbox + pbox[:, 0] = (bbox[:, 0] - pad_left) / scale + pbox[:, 1] = (bbox[:, 1] - pad_top) / scale + pbox[:, 2] = (bbox[:, 2] - pad_left) / scale + pbox[:, 3] = (bbox[:, 3] - pad_top) / scale + + # make pbboxes value in valid range + pbox[:, 0] = np.maximum(pbox[:, 0], 0) + pbox[:, 1] = np.maximum(pbox[:, 1], 0) + pbox[:, 2] = np.minimum(pbox[:, 2], w) + pbox[:, 3] = np.minimum(pbox[:, 3], h) + return pbox + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0") + parser.add_argument("--test_annotation", default="./coco2017_jpg.info") + parser.add_argument("--det_results_path", default="./detection-results") + parser.add_argument("--img_path", default="./val2017/") + parser.add_argument("--net_out_num", default=2) + parser.add_argument("--net_input_width", default=640) + parser.add_argument("--net_input_height", default=640) + parser.add_argument("--prob_thres", default=0.05) + parser.add_argument("--ifShowDetObj", action="store_true", help="if input the para means True, neither False.") + flags = parser.parse_args() + print(flags.ifShowDetObj, type(flags.ifShowDetObj)) + # generate dict according to annotation file for query resolution + # load width and height of input images + img_size_dict = dict() + with open(flags.test_annotation)as f: + for line in f.readlines(): + temp = line.split(" ") + img_file_path = temp[1] + img_name = temp[1].split("/")[-1].split(".")[0] + img_width = int(temp[2]) + img_height = int(temp[3]) + img_size_dict[img_name] = (img_width, img_height, img_file_path) + + # read bin file for generate predict result + bin_path = flags.bin_data_path + det_results_path = flags.det_results_path + img_path = flags.img_path + os.makedirs(det_results_path, exist_ok=True) + total_img = set([name[:name.rfind('_')] + for name in os.listdir(bin_path) if "bin" in name]) + for bin_file in sorted(total_img): + path_base = os.path.join(bin_path, bin_file) + # load all detected output tensor + res_buff = [] + for num in range(1, flags.net_out_num + 1): + if os.path.exists(path_base + "_" + str(num) + ".bin"): + if num == 1: + buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="float32") + buf = np.reshape(buf, [100, 5]) + elif num == 2: + buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="int64") + buf = np.reshape(buf, [100, 1]) + res_buff.append(buf) + else: + print("[ERROR] file not exist", path_base + "_" + str(num) + ".bin") + res_tensor = np.concatenate(res_buff, axis=1) + current_img_size = img_size_dict[bin_file] + print("[TEST]---------------------------concat{} imgsize{}".format(len(res_tensor), current_img_size)) + predbox = coco_postprocess(res_tensor, current_img_size, flags.net_input_width, flags.net_input_height) + + if flags.ifShowDetObj == True: + pic = os.path.join(img_path, bin_file +'.jpg') + imgCur = cv2.imread(pic) + + det_results_str = '' + for idx, class_ind in enumerate(predbox[:,5]): + if float(predbox[idx][4]) < float(flags.prob_thres): + continue + # skip negative class index + if class_ind < 0 or class_ind > 80: + continue + + class_name = CLASSES[int(class_ind)] + det_results_str += "{} {} {} {} {} {}\n".format(class_name, str(predbox[idx][4]), predbox[idx][0], + predbox[idx][1], predbox[idx][2], predbox[idx][3]) + if flags.ifShowDetObj == True: + imgCur=cv2.rectangle(imgCur, (int(predbox[idx][0]), int(predbox[idx][1])), + (int(predbox[idx][2]), int(predbox[idx][3])), (0,255,0), 1) + imgCur = cv2.putText(imgCur, class_name+'|'+str(predbox[idx][4]), + (int(predbox[idx][0]), int(predbox[idx][1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) + # 图像,文字内容, 坐标 ,字体,大小,颜色,字体厚度 + + if flags.ifShowDetObj == True: + print(os.path.join(det_results_path, bin_file +'.jpg')) + cv2.imwrite(os.path.join(det_results_path, bin_file +'.jpg'), imgCur, [int(cv2.IMWRITE_JPEG_QUALITY),70]) + + det_results_file = os.path.join(det_results_path, bin_file + ".txt") + with open(det_results_file, "w") as detf: + detf.write(det_results_str) + print(det_results_str) diff --git a/ACL_PyTorch/contrib/cv/detection/NAS_FPN/mmdetection_coco_preprocess.py b/ACL_PyTorch/contrib/cv/detection/NAS_FPN/mmdetection_coco_preprocess.py index db194cf1defda77b6eeae08e1fb971848453a217..d8bd19c453a882a1953cef54e65ea3c7c2493ccf 100644 --- a/ACL_PyTorch/contrib/cv/detection/NAS_FPN/mmdetection_coco_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/NAS_FPN/mmdetection_coco_preprocess.py @@ -1,68 +1,68 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -import cv2 -import argparse -import mmcv -import torch - -dataset_config = { - 'resize': (640, 640), - 'mean': [123.675, 116.28, 103.53], - 'std': [58.395, 57.12, 57.375], -} - -tensor_height = 640 -tensor_width = 640 - -def coco_preprocess(input_image, output_bin_path): - #define the output file name - img_name = input_image.split('/')[-1] - bin_name = img_name.split('.')[0] + ".bin" - bin_fl = os.path.join(output_bin_path, bin_name) - - one_img = mmcv.imread(os.path.join(input_image), backend='cv2') - one_img = mmcv.imrescale(one_img, (tensor_height, tensor_width)) - # calculate padding - h = one_img.shape[0] - w = one_img.shape[1] - pad_left = (tensor_width - w) // 2 - pad_top = (tensor_height - h) // 2 - pad_right = tensor_width - pad_left - w - pad_bottom = tensor_height - pad_top - h - - mean = np.array(dataset_config['mean'], dtype=np.float32) - std = np.array(dataset_config['std'], dtype=np.float32) - one_img = mmcv.imnormalize(one_img, mean, std) - one_img = mmcv.impad(one_img, padding=(pad_left, pad_top, pad_right, pad_bottom), pad_val=0) - one_img = one_img.transpose(2, 0, 1) - one_img.tofile(bin_fl) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='preprocess of NAS-FPN pytorch model') - parser.add_argument("--image_folder_path", default="./coco2017/", help='image of dataset') - parser.add_argument("--bin_folder_path", default="./coco2017_bin/", help='Preprocessed image buffer') - flags = parser.parse_args() - - if not os.path.exists(flags.bin_folder_path): - os.makedirs(flags.bin_folder_path) - images = os.listdir(flags.image_folder_path) - for image_name in images: - if not (image_name.endswith(".jpeg") or image_name.endswith(".JPEG") or image_name.endswith(".jpg")): - continue - print("start to process image {}....".format(image_name)) - path_image = os.path.join(flags.image_folder_path, image_name) - coco_preprocess(path_image, flags.bin_folder_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import os +import cv2 +import argparse +import mmcv +import torch + +dataset_config = { + 'resize': (640, 640), + 'mean': [123.675, 116.28, 103.53], + 'std': [58.395, 57.12, 57.375], +} + +tensor_height = 640 +tensor_width = 640 + +def coco_preprocess(input_image, output_bin_path): + #define the output file name + img_name = input_image.split('/')[-1] + bin_name = img_name.split('.')[0] + ".bin" + bin_fl = os.path.join(output_bin_path, bin_name) + + one_img = mmcv.imread(os.path.join(input_image), backend='cv2') + one_img = mmcv.imrescale(one_img, (tensor_height, tensor_width)) + # calculate padding + h = one_img.shape[0] + w = one_img.shape[1] + pad_left = (tensor_width - w) // 2 + pad_top = (tensor_height - h) // 2 + pad_right = tensor_width - pad_left - w + pad_bottom = tensor_height - pad_top - h + + mean = np.array(dataset_config['mean'], dtype=np.float32) + std = np.array(dataset_config['std'], dtype=np.float32) + one_img = mmcv.imnormalize(one_img, mean, std) + one_img = mmcv.impad(one_img, padding=(pad_left, pad_top, pad_right, pad_bottom), pad_val=0) + one_img = one_img.transpose(2, 0, 1) + one_img.tofile(bin_fl) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='preprocess of NAS-FPN pytorch model') + parser.add_argument("--image_folder_path", default="./coco2017/", help='image of dataset') + parser.add_argument("--bin_folder_path", default="./coco2017_bin/", help='Preprocessed image buffer') + flags = parser.parse_args() + + if not os.path.exists(flags.bin_folder_path): + os.makedirs(flags.bin_folder_path) + images = os.listdir(flags.image_folder_path) + for image_name in images: + if not (image_name.endswith(".jpeg") or image_name.endswith(".JPEG") or image_name.endswith(".jpg")): + continue + print("start to process image {}....".format(image_name)) + path_image = os.path.join(flags.image_folder_path, image_name) + coco_preprocess(path_image, flags.bin_folder_path) diff --git a/ACL_PyTorch/contrib/cv/detection/NAS_FPN/requirements.txt b/ACL_PyTorch/contrib/cv/detection/NAS_FPN/requirements.txt index a9b2aff748d181f465deadd5f1eb2cef543efb5d..bbcf70abd1d45bdd4747a59f08891e33df32c837 100644 --- a/ACL_PyTorch/contrib/cv/detection/NAS_FPN/requirements.txt +++ b/ACL_PyTorch/contrib/cv/detection/NAS_FPN/requirements.txt @@ -1,10 +1,10 @@ -torch==1.7.0 -torchvision==0.8.0 -onnx==1.8.0 -numpy==1.20.0 -mmdet==2.8.0 -mmcv-full==1.2.4 -opencv-python==4.4.0.46 -mmpycocotools==12.0.3 -onnxruntime==1.9.0 - +torch==1.7.0 +torchvision==0.8.0 +onnx==1.8.0 +numpy==1.20.0 +mmdet==2.8.0 +mmcv-full==1.2.4 +opencv-python==4.4.0.46 +mmpycocotools==12.0.3 +onnxruntime==1.9.0 + diff --git a/ACL_PyTorch/contrib/cv/detection/NAS_FPN/txt_to_json.py b/ACL_PyTorch/contrib/cv/detection/NAS_FPN/txt_to_json.py index 9736c008810f62af011b84b0594cdd708341fadc..65c20258fa741930e85fd38046e1d32b7bc5db93 100644 --- a/ACL_PyTorch/contrib/cv/detection/NAS_FPN/txt_to_json.py +++ b/ACL_PyTorch/contrib/cv/detection/NAS_FPN/txt_to_json.py @@ -1,114 +1,114 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import glob -import os -import sys -import argparse -import mmcv - -CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', - 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] - -cat_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, -24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, -48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, -72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] - -''' - 0,0 ------> x (width) - | - | (Left,Top) - | *_________ - | | | - | | - y |_________| - (height) * - (Right,Bottom) -''' - -def file_lines_to_list(path): - # open txt file lines to a list - with open(path) as f: - content = f.readlines() - # remove whitespace characters like `\n` at the end of each line - content = [x.strip() for x in content] - return content - - -def error(msg): - print(msg) - sys.exit(0) - - -def get_predict_list(file_path, gt_classes): - dr_files_list = glob.glob(file_path + '/*.txt') - dr_files_list.sort() - - bounding_boxes = [] - for txt_file in dr_files_list: - file_id = txt_file.split(".txt", 1)[0] - file_id = os.path.basename(os.path.normpath(file_id)) - lines = file_lines_to_list(txt_file) - for line in lines: - try: - sl = line.split() - if len(sl) > 6: - class_name = sl[0] + ' ' + sl[1] - scores, left, top, right, bottom = sl[2:] - else: - class_name, scores, left, top, right, bottom = sl - if float(scores) < 0.05: - continue - except ValueError: - error_msg = "Error: File " + txt_file + " wrong format.\n" - error_msg += " Expected: \n" - error_msg += " Received: " + line - error(error_msg) - - # bbox = left + " " + top + " " + right + " " + bottom - left = float(left) - right = float(right) - top = float(top) - bottom = float(bottom) - bbox = [left, top, right-left, bottom-top] - bounding_boxes.append({"image_id": int(file_id), "bbox": bbox, - "score": float(scores), "category_id": cat_ids[CLASSES.index(class_name)]}) - # sort detection-results by decreasing scores - # bounding_boxes.sort(key=lambda x: float(x['score']), reverse=True) - return bounding_boxes - - - -if __name__ == '__main__': - parser = argparse.ArgumentParser('mAp calculate') - parser.add_argument('--npu_txt_path', default="detection-results", - help='the path of the predict result') - parser.add_argument("--json_output_file", default="coco_detection_result") - args = parser.parse_args() - - res_bbox = get_predict_list(args.npu_txt_path, CLASSES) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import os +import sys +import argparse +import mmcv + +CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', + 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] + +cat_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, +24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, +72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +''' + 0,0 ------> x (width) + | + | (Left,Top) + | *_________ + | | | + | | + y |_________| + (height) * + (Right,Bottom) +''' + +def file_lines_to_list(path): + # open txt file lines to a list + with open(path) as f: + content = f.readlines() + # remove whitespace characters like `\n` at the end of each line + content = [x.strip() for x in content] + return content + + +def error(msg): + print(msg) + sys.exit(0) + + +def get_predict_list(file_path, gt_classes): + dr_files_list = glob.glob(file_path + '/*.txt') + dr_files_list.sort() + + bounding_boxes = [] + for txt_file in dr_files_list: + file_id = txt_file.split(".txt", 1)[0] + file_id = os.path.basename(os.path.normpath(file_id)) + lines = file_lines_to_list(txt_file) + for line in lines: + try: + sl = line.split() + if len(sl) > 6: + class_name = sl[0] + ' ' + sl[1] + scores, left, top, right, bottom = sl[2:] + else: + class_name, scores, left, top, right, bottom = sl + if float(scores) < 0.05: + continue + except ValueError: + error_msg = "Error: File " + txt_file + " wrong format.\n" + error_msg += " Expected: \n" + error_msg += " Received: " + line + error(error_msg) + + # bbox = left + " " + top + " " + right + " " + bottom + left = float(left) + right = float(right) + top = float(top) + bottom = float(bottom) + bbox = [left, top, right-left, bottom-top] + bounding_boxes.append({"image_id": int(file_id), "bbox": bbox, + "score": float(scores), "category_id": cat_ids[CLASSES.index(class_name)]}) + # sort detection-results by decreasing scores + # bounding_boxes.sort(key=lambda x: float(x['score']), reverse=True) + return bounding_boxes + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser('mAp calculate') + parser.add_argument('--npu_txt_path', default="detection-results", + help='the path of the predict result') + parser.add_argument("--json_output_file", default="coco_detection_result") + args = parser.parse_args() + + res_bbox = get_predict_list(args.npu_txt_path, CLASSES) mmcv.dump(res_bbox, args.json_output_file + '.json') \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Nasnetlarge/LICENSE b/ACL_PyTorch/contrib/cv/detection/Nasnetlarge/LICENSE index b7a7d6c0e55c437adc4f260d4dbff372c830acce..4c9ad980682246bd6ab0d2bae82232be6dbdcbd4 100644 --- a/ACL_PyTorch/contrib/cv/detection/Nasnetlarge/LICENSE +++ b/ACL_PyTorch/contrib/cv/detection/Nasnetlarge/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ACL_PyTorch/contrib/cv/detection/OpenPose/OpenPose_postprocess.py b/ACL_PyTorch/contrib/cv/detection/OpenPose/OpenPose_postprocess.py index a58f6c183285082660ddb7a78658c4ca5ae3492e..69230982ef0614fe1a52f128aef72a128a21aebb 100644 --- a/ACL_PyTorch/contrib/cv/detection/OpenPose/OpenPose_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/OpenPose/OpenPose_postprocess.py @@ -1,119 +1,119 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -python3.7 OpenPose_postprocess.py ---benchmark_result_path ./result/dumpOutput_device0 ---detections_save_path ./output/result.json ---pad_txt_path ./output/pad.txt ---labels /root/datasets/coco/annotations/person_keypoints_val2017.json -""" -import argparse -import json -import os -import sys -import torch -import cv2 -import numpy as np -sys.path.append("./lightweight-human-pose-estimation.pytorch") -from modules.keypoints import group_keypoints, extract_keypoints -from val import run_coco_eval, convert_to_coco_format - - -def read_txt(txt_path, shape): - with open(txt_path, "r") as f: - line = f.readline() - line_split = line.strip().split(" ") - line_split = [eval(i) for i in line_split] - line_split = torch.Tensor(line_split) - heatmaps = line_split.view(shape) - return heatmaps - - -def transfer(heatmaps, pafs, height, width, top, bottom, left, right, stride=8): - heatmaps = np.transpose(heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0)) - heatmaps = cv2.resize(heatmaps, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) - heatmaps = heatmaps[top:heatmaps.shape[0] - bottom, left:heatmaps.shape[1] - right:, :] - heatmaps = cv2.resize(heatmaps, (width, height), interpolation=cv2.INTER_CUBIC) - pafs = np.transpose(pafs.squeeze().cpu().data.numpy(), (1, 2, 0)) - pafs = cv2.resize(pafs, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) - pafs = pafs[top:pafs.shape[0] - bottom, left:pafs.shape[1] - right, :] - pafs = cv2.resize(pafs, (width, height), interpolation=cv2.INTER_CUBIC) - return heatmaps, pafs - - -def post_process(args): - txt_folder = args.benchmark_result_path - json_path = args.detections_save_path - pad_path = args.pad_txt_path - pad_info = {} - with open(pad_path, "r") as f: - lines = f.readlines() - for line in lines: - line_split = line.strip().split(" ") - pad_info[line_split[0]] = [int(line_split[i]) for i in range(1, 7)] - txt_1, txt_2 = [], [] - for txt in os.listdir(txt_folder): - txt_pure_name = txt.split('.')[0] - index = txt_pure_name.rfind('_') - name_suffix = txt_pure_name[index + 1] - if name_suffix == "1": - txt_1.append(txt) - else: - txt_2.append(txt) - txt_1.sort() - txt_2.sort() - coco_result = [] - for txt1, txt2 in zip(txt_1, txt_2): - txt_pure_name = txt1.split('.')[0] - index = txt_pure_name.rfind('_') - img_name = txt_pure_name[0:index] + ".jpg" - txt1_path = os.path.join(txt_folder, txt1) - txt2_path = os.path.join(txt_folder, txt2) - print(txt1, txt2) - heatmaps = read_txt(txt1_path, (1, 19, 46, 80)) - pafs = read_txt(txt2_path, (1, 38, 46, 80)) - pad = pad_info[img_name] - height, width = pad[0], pad[1] - top, bottom, left, right = pad[2], pad[3], pad[4], pad[5] - heatmaps, pafs = transfer(heatmaps, pafs, height, width, top, bottom, left, right) - all_keypoints_num = 0 - all_keypoints_by_type = [] - for kpt_idx in range(18): # 19th for bg - all_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, all_keypoints_num) - pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs) - coco_keypoints, scores = convert_to_coco_format(pose_entries, all_keypoints) - image_id = int(img_name[0:img_name.rfind('.')]) - for idx in range(len(coco_keypoints)): - coco_result.append({'image_id': image_id, 'category_id': 1, 'keypoints': coco_keypoints[idx], - 'score': scores[idx]}) - with open(json_path, 'w') as f: - json.dump(coco_result, f, indent=4) - run_coco_eval(args.labels, json_path) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--benchmark_result_path", default="./result/dumpOutput_device0") - parser.add_argument("--detections_save_path", default="./output/result.json") - parser.add_argument("--pad_txt_path", default="./output/pad.txt", - help="padding around the image with 368*640") - parser.add_argument('--labels', type=str, default='/root/datasets/coco/annotations/person_keypoints_val2017.json', - help='path to json with keypoints val labels') - args = parser.parse_args() - - post_process(args) - - -if __name__ == '__main__': - main() +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +python3.7 OpenPose_postprocess.py +--benchmark_result_path ./result/dumpOutput_device0 +--detections_save_path ./output/result.json +--pad_txt_path ./output/pad.txt +--labels /root/datasets/coco/annotations/person_keypoints_val2017.json +""" +import argparse +import json +import os +import sys +import torch +import cv2 +import numpy as np +sys.path.append("./lightweight-human-pose-estimation.pytorch") +from modules.keypoints import group_keypoints, extract_keypoints +from val import run_coco_eval, convert_to_coco_format + + +def read_txt(txt_path, shape): + with open(txt_path, "r") as f: + line = f.readline() + line_split = line.strip().split(" ") + line_split = [eval(i) for i in line_split] + line_split = torch.Tensor(line_split) + heatmaps = line_split.view(shape) + return heatmaps + + +def transfer(heatmaps, pafs, height, width, top, bottom, left, right, stride=8): + heatmaps = np.transpose(heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0)) + heatmaps = cv2.resize(heatmaps, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) + heatmaps = heatmaps[top:heatmaps.shape[0] - bottom, left:heatmaps.shape[1] - right:, :] + heatmaps = cv2.resize(heatmaps, (width, height), interpolation=cv2.INTER_CUBIC) + pafs = np.transpose(pafs.squeeze().cpu().data.numpy(), (1, 2, 0)) + pafs = cv2.resize(pafs, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) + pafs = pafs[top:pafs.shape[0] - bottom, left:pafs.shape[1] - right, :] + pafs = cv2.resize(pafs, (width, height), interpolation=cv2.INTER_CUBIC) + return heatmaps, pafs + + +def post_process(args): + txt_folder = args.benchmark_result_path + json_path = args.detections_save_path + pad_path = args.pad_txt_path + pad_info = {} + with open(pad_path, "r") as f: + lines = f.readlines() + for line in lines: + line_split = line.strip().split(" ") + pad_info[line_split[0]] = [int(line_split[i]) for i in range(1, 7)] + txt_1, txt_2 = [], [] + for txt in os.listdir(txt_folder): + txt_pure_name = txt.split('.')[0] + index = txt_pure_name.rfind('_') + name_suffix = txt_pure_name[index + 1] + if name_suffix == "1": + txt_1.append(txt) + else: + txt_2.append(txt) + txt_1.sort() + txt_2.sort() + coco_result = [] + for txt1, txt2 in zip(txt_1, txt_2): + txt_pure_name = txt1.split('.')[0] + index = txt_pure_name.rfind('_') + img_name = txt_pure_name[0:index] + ".jpg" + txt1_path = os.path.join(txt_folder, txt1) + txt2_path = os.path.join(txt_folder, txt2) + print(txt1, txt2) + heatmaps = read_txt(txt1_path, (1, 19, 46, 80)) + pafs = read_txt(txt2_path, (1, 38, 46, 80)) + pad = pad_info[img_name] + height, width = pad[0], pad[1] + top, bottom, left, right = pad[2], pad[3], pad[4], pad[5] + heatmaps, pafs = transfer(heatmaps, pafs, height, width, top, bottom, left, right) + all_keypoints_num = 0 + all_keypoints_by_type = [] + for kpt_idx in range(18): # 19th for bg + all_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, all_keypoints_num) + pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs) + coco_keypoints, scores = convert_to_coco_format(pose_entries, all_keypoints) + image_id = int(img_name[0:img_name.rfind('.')]) + for idx in range(len(coco_keypoints)): + coco_result.append({'image_id': image_id, 'category_id': 1, 'keypoints': coco_keypoints[idx], + 'score': scores[idx]}) + with open(json_path, 'w') as f: + json.dump(coco_result, f, indent=4) + run_coco_eval(args.labels, json_path) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--benchmark_result_path", default="./result/dumpOutput_device0") + parser.add_argument("--detections_save_path", default="./output/result.json") + parser.add_argument("--pad_txt_path", default="./output/pad.txt", + help="padding around the image with 368*640") + parser.add_argument('--labels', type=str, default='/root/datasets/coco/annotations/person_keypoints_val2017.json', + help='path to json with keypoints val labels') + args = parser.parse_args() + + post_process(args) + + +if __name__ == '__main__': + main() diff --git a/ACL_PyTorch/contrib/cv/detection/OpenPose/OpenPose_preprocess.py b/ACL_PyTorch/contrib/cv/detection/OpenPose/OpenPose_preprocess.py index 08328966e4a1d44f6a05b89bb9e157fbd68e1e64..68c1d4e78fc6e0fc6f21820eb840806c46eb5c08 100644 --- a/ACL_PyTorch/contrib/cv/detection/OpenPose/OpenPose_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/OpenPose/OpenPose_preprocess.py @@ -1,90 +1,90 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -python3.7 OpenPose_preprocess.py ---src_path /root/datasets/coco/val2017 ---save_path /root/datasets/coco/prep_dataset ---pad_txt_path ./output/pad.txt -""" -import os -import sys -import math -import argparse -import torch -import numpy as np -import cv2 -sys.path.append("./lightweight-human-pose-estimation.pytorch") -from val import normalize - - -def pad_width(img, stride, pad_value, min_dims, name, height, width, pad_txt_path): - h, w, _ = img.shape - min_dims[0] = math.ceil(min_dims[0] / float(stride)) * stride - min_dims[1] = math.ceil(min_dims[1] / float(stride)) * stride - pad = [int(math.floor((min_dims[0] - h) / 2.0)), int(math.floor((min_dims[1] - w) / 2.0))] - pad.append(int(min_dims[0] - h - pad[0])) - pad.append(int(min_dims[1] - w - pad[1])) - padded_img = cv2.copyMakeBorder(img, pad[int(0)], pad[int(2)], pad[int(1)], pad[int(3)], - cv2.BORDER_CONSTANT, value=pad_value) - with open(pad_txt_path, "a") as f: - f.write(str(name) + " " + str(height) + " " + str(width) + " " + - str(pad[int(0)]) + " " + str(pad[int(2)]) + " " + str(pad[int(1)]) + " " + str(pad[int(3)]) + "\n") - print("padded_img's h w:", padded_img.shape[0], padded_img.shape[1]) - return padded_img, pad # top,bottom,left,right - - -def image_preprocess(img, name, pad_txt_path, base_height=368, base_width=640, stride=8, cpu=True, pad_value=(0, 0, 0), - img_mean=np.array([128, 128, 128], np.float32), img_scale=np.float32(1 / 256)): - norm_img = normalize(img, img_mean, img_scale) - height, width, _ = img.shape - height_scale = base_height / height - width_scale = base_width / width - scale = min(height_scale, width_scale) - scaled_img = cv2.resize(norm_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR) - min_dims = [base_height, base_width] - padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims, name, height, width, pad_txt_path) - tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float() - print("tensor_img shape:", tensor_img.shape) - if not cpu: - tensor_img = tensor_img.cuda() - return tensor_img - - -def preprocess(src_path, save_path, pad_txt_path): - in_files = os.listdir(src_path) - for i, file in enumerate(in_files): - print(file, "===", i) - img_path = os.path.join(src_path, file) - input_image = cv2.imread(img_path, cv2.IMREAD_COLOR) - input_tensor = image_preprocess(input_image, file, pad_txt_path) - img = np.array(input_tensor).astype(np.float32) - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--src_path', type=str, default='/root/datasets/coco/val2017', - help='the source path of images') - parser.add_argument('--save_path', type=str, default='/root/datasets/coco/prep_dataset', - help='the path of saving bin of each image') - parser.add_argument('--pad_txt_path', type=str, default='./output/pad.txt' - , help='the path of pad.txt saving the info of padding') - args = parser.parse_args() - with open(args.pad_txt_path, "a+") as f: - f.truncate(0) - preprocess(args.src_path, args.save_path, args.pad_txt_path) - - -if __name__ == "__main__": - main() +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +python3.7 OpenPose_preprocess.py +--src_path /root/datasets/coco/val2017 +--save_path /root/datasets/coco/prep_dataset +--pad_txt_path ./output/pad.txt +""" +import os +import sys +import math +import argparse +import torch +import numpy as np +import cv2 +sys.path.append("./lightweight-human-pose-estimation.pytorch") +from val import normalize + + +def pad_width(img, stride, pad_value, min_dims, name, height, width, pad_txt_path): + h, w, _ = img.shape + min_dims[0] = math.ceil(min_dims[0] / float(stride)) * stride + min_dims[1] = math.ceil(min_dims[1] / float(stride)) * stride + pad = [int(math.floor((min_dims[0] - h) / 2.0)), int(math.floor((min_dims[1] - w) / 2.0))] + pad.append(int(min_dims[0] - h - pad[0])) + pad.append(int(min_dims[1] - w - pad[1])) + padded_img = cv2.copyMakeBorder(img, pad[int(0)], pad[int(2)], pad[int(1)], pad[int(3)], + cv2.BORDER_CONSTANT, value=pad_value) + with open(pad_txt_path, "a") as f: + f.write(str(name) + " " + str(height) + " " + str(width) + " " + + str(pad[int(0)]) + " " + str(pad[int(2)]) + " " + str(pad[int(1)]) + " " + str(pad[int(3)]) + "\n") + print("padded_img's h w:", padded_img.shape[0], padded_img.shape[1]) + return padded_img, pad # top,bottom,left,right + + +def image_preprocess(img, name, pad_txt_path, base_height=368, base_width=640, stride=8, cpu=True, pad_value=(0, 0, 0), + img_mean=np.array([128, 128, 128], np.float32), img_scale=np.float32(1 / 256)): + norm_img = normalize(img, img_mean, img_scale) + height, width, _ = img.shape + height_scale = base_height / height + width_scale = base_width / width + scale = min(height_scale, width_scale) + scaled_img = cv2.resize(norm_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR) + min_dims = [base_height, base_width] + padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims, name, height, width, pad_txt_path) + tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float() + print("tensor_img shape:", tensor_img.shape) + if not cpu: + tensor_img = tensor_img.cuda() + return tensor_img + + +def preprocess(src_path, save_path, pad_txt_path): + in_files = os.listdir(src_path) + for i, file in enumerate(in_files): + print(file, "===", i) + img_path = os.path.join(src_path, file) + input_image = cv2.imread(img_path, cv2.IMREAD_COLOR) + input_tensor = image_preprocess(input_image, file, pad_txt_path) + img = np.array(input_tensor).astype(np.float32) + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--src_path', type=str, default='/root/datasets/coco/val2017', + help='the source path of images') + parser.add_argument('--save_path', type=str, default='/root/datasets/coco/prep_dataset', + help='the path of saving bin of each image') + parser.add_argument('--pad_txt_path', type=str, default='./output/pad.txt' + , help='the path of pad.txt saving the info of padding') + args = parser.parse_args() + with open(args.pad_txt_path, "a+") as f: + f.truncate(0) + preprocess(args.src_path, args.save_path, args.pad_txt_path) + + +if __name__ == "__main__": + main() diff --git a/ACL_PyTorch/contrib/cv/detection/OpenPose/OpenPose_pth2onnx.py b/ACL_PyTorch/contrib/cv/detection/OpenPose/OpenPose_pth2onnx.py index bf5e6c4022efe59d78ee5ab9dda2eaf536047ce9..9f01273cda1fc7e37ff97a3753ef7371f7753712 100644 --- a/ACL_PyTorch/contrib/cv/detection/OpenPose/OpenPose_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/OpenPose/OpenPose_pth2onnx.py @@ -1,63 +1,63 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -python3.7 OpenPose_pth2onnx.py ---checkpoint-path ./weights/checkpoint_iter_370000.pth ---output-name ./output/human-pose-estimation.onnx -""" -import argparse -import os -import sys -import torch -sys.path.append("./lightweight-human-pose-estimation.pytorch") -from models.with_mobilenet import PoseEstimationWithMobileNet -from modules.load_state import load_state - - -def convert_to_onnx(network, output_name): - net_input = torch.randn(1, 3, 368, 640) - input_names = ['data'] - output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs', - 'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs'] - dynamic_axes = {'data': {0: '-1'}, 'stage_0_output_1_heatmaps': {0: '-1'}, 'stage_0_output_0_pafs': {0: '-1'}, - 'stage_1_output_1_heatmaps': {0: '-1'}, 'stage_1_output_0_pafs': {0: '-1'}} - torch.onnx.export(network, net_input, output_name, opset_version=11, verbose=True, - input_names=input_names, dynamic_axes=dynamic_axes, output_names=output_names) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--checkpoint_path', type=str, default='./weights/checkpoint_iter_370000.pth', - help='path to the checkpoint') - parser.add_argument('--output_name', type=str, default='./output/human-pose-estimation.onnx', - help='name of output model in ONNX format') - args = parser.parse_args() - - # mkdir - dir1, file1 = os.path.split(args.checkpoint_path) - dir2, file2 = os.path.split(args.output_name) - if not os.path.exists(dir1): - os.mkdir(dir1) - else: - print(dir1, "already exist") - if not os.path.exists(dir2): - os.mkdir(dir2) - else: - print(dir2, "already exist") - - net = PoseEstimationWithMobileNet() - checkpoint = torch.load(args.checkpoint_path, map_location=torch.device("cpu")) - load_state(net, checkpoint) - - convert_to_onnx(net, args.output_name) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +python3.7 OpenPose_pth2onnx.py +--checkpoint-path ./weights/checkpoint_iter_370000.pth +--output-name ./output/human-pose-estimation.onnx +""" +import argparse +import os +import sys +import torch +sys.path.append("./lightweight-human-pose-estimation.pytorch") +from models.with_mobilenet import PoseEstimationWithMobileNet +from modules.load_state import load_state + + +def convert_to_onnx(network, output_name): + net_input = torch.randn(1, 3, 368, 640) + input_names = ['data'] + output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs', + 'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs'] + dynamic_axes = {'data': {0: '-1'}, 'stage_0_output_1_heatmaps': {0: '-1'}, 'stage_0_output_0_pafs': {0: '-1'}, + 'stage_1_output_1_heatmaps': {0: '-1'}, 'stage_1_output_0_pafs': {0: '-1'}} + torch.onnx.export(network, net_input, output_name, opset_version=11, verbose=True, + input_names=input_names, dynamic_axes=dynamic_axes, output_names=output_names) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--checkpoint_path', type=str, default='./weights/checkpoint_iter_370000.pth', + help='path to the checkpoint') + parser.add_argument('--output_name', type=str, default='./output/human-pose-estimation.onnx', + help='name of output model in ONNX format') + args = parser.parse_args() + + # mkdir + dir1, file1 = os.path.split(args.checkpoint_path) + dir2, file2 = os.path.split(args.output_name) + if not os.path.exists(dir1): + os.mkdir(dir1) + else: + print(dir1, "already exist") + if not os.path.exists(dir2): + os.mkdir(dir2) + else: + print(dir2, "already exist") + + net = PoseEstimationWithMobileNet() + checkpoint = torch.load(args.checkpoint_path, map_location=torch.device("cpu")) + load_state(net, checkpoint) + + convert_to_onnx(net, args.output_name) diff --git a/ACL_PyTorch/contrib/cv/detection/OpenPose/README.md b/ACL_PyTorch/contrib/cv/detection/OpenPose/README.md index 480a0bcce94089864a4a394f1a4ac1503d88ec11..08eef2e02fb63578927499923484f8cce88655d4 100644 --- a/ACL_PyTorch/contrib/cv/detection/OpenPose/README.md +++ b/ACL_PyTorch/contrib/cv/detection/OpenPose/README.md @@ -1,52 +1,52 @@ -# OpenPose模型PyTorch离线推理指导 -## 1 环境准备 -### 1.1 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -``` -pip3.7 install -r requirements.txt -``` -### 1.2 获取,安装开源模型代码 -``` -git clone https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch.git -``` -### 1.3 获取权重文件 -[OpenPose预训练pth权重文件](https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth) -``` -wget https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth -P ./weights -``` -### 1.4 数据集 -310服务器上可能已经下载好该数据集,若无,参考以下方法下载。 -[coco2017官网](https://cocodataset.org/#download) -下载其中val2017图片及其标注文件,使用5000张验证集进行测试,图片与标注文件分别存放在/root/datasets/coco/val2017与/root/datasets/coco/annotations/person_keypoints_val2017.json。 -文件目录结构如下, -``` -root -├── datasets -│ ├── coco -│ │ ├── annotations -│ │ │   ├── captions_train2017.json -│ │ │   ├── captions_val2017.json -│ │ │   ├── instances_train2017.json -│ │ │   ├── instances_val2017.json -│ │ │   ├── person_keypoints_train2017.json -│ │ │   └── person_keypoints_val2017.json -│ │ ├── val2017 -│ │ ├── annotations_trainval2017.zip -``` -### 1.5 [获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) -将benchmark.x86_64或benchmark.aarch64放到当前目录,并更改权限 -``` -chmod 777 benchmark.x86_64 -``` -## 2 离线推理 -### 2.1 测试 -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets -``` -### 2.2 测评结果 -|模型|pth精度(AP,%)|310精度(AP,%)|性能基准|310性能| -|----|----|----|----|----| -|OpenPose bs1|40|40.4|224.660fps|303.276fps| -|OpenPose bs16|40|40.4|339.973fps|444.908fps| - +# OpenPose模型PyTorch离线推理指导 +## 1 环境准备 +### 1.1 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +``` +pip3.7 install -r requirements.txt +``` +### 1.2 获取,安装开源模型代码 +``` +git clone https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch.git +``` +### 1.3 获取权重文件 +[OpenPose预训练pth权重文件](https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth) +``` +wget https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth -P ./weights +``` +### 1.4 数据集 +310服务器上可能已经下载好该数据集,若无,参考以下方法下载。 +[coco2017官网](https://cocodataset.org/#download) +下载其中val2017图片及其标注文件,使用5000张验证集进行测试,图片与标注文件分别存放在/root/datasets/coco/val2017与/root/datasets/coco/annotations/person_keypoints_val2017.json。 +文件目录结构如下, +``` +root +├── datasets +│ ├── coco +│ │ ├── annotations +│ │ │   ├── captions_train2017.json +│ │ │   ├── captions_val2017.json +│ │ │   ├── instances_train2017.json +│ │ │   ├── instances_val2017.json +│ │ │   ├── person_keypoints_train2017.json +│ │ │   └── person_keypoints_val2017.json +│ │ ├── val2017 +│ │ ├── annotations_trainval2017.zip +``` +### 1.5 [获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) +将benchmark.x86_64或benchmark.aarch64放到当前目录,并更改权限 +``` +chmod 777 benchmark.x86_64 +``` +## 2 离线推理 +### 2.1 测试 +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets +``` +### 2.2 测评结果 +|模型|pth精度(AP,%)|310精度(AP,%)|性能基准|310性能| +|----|----|----|----|----| +|OpenPose bs1|40|40.4|224.660fps|303.276fps| +|OpenPose bs16|40|40.4|339.973fps|444.908fps| + diff --git a/ACL_PyTorch/contrib/cv/detection/OpenPose/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/detection/OpenPose/gen_dataset_info.py index 63d02f673c35e65691364a64fcc4af5c43529db3..3d48029d6147f38cc3ad4976d68f8d04c46f752d 100644 --- a/ACL_PyTorch/contrib/cv/detection/OpenPose/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/detection/OpenPose/gen_dataset_info.py @@ -1,72 +1,72 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -python3.7 gen_dataset_info.py -bin -../../../../../root/datasets/coco/prep_dataset # Only relative paths of the info file can be used -./output/openpose_prep_bin.info -640 -368 # Sample width and height after preprocess -""" -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - bin_images.sort() - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -def main(): - file_type = sys.argv[1] - file_path = sys.argv[2] # Only relative paths of the info file can be used - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) - - -if __name__ == '__main__': - main() +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +python3.7 gen_dataset_info.py +bin +../../../../../root/datasets/coco/prep_dataset # Only relative paths of the info file can be used +./output/openpose_prep_bin.info +640 +368 # Sample width and height after preprocess +""" +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + bin_images.sort() + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +def main(): + file_type = sys.argv[1] + file_path = sys.argv[2] # Only relative paths of the info file can be used + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) + + +if __name__ == '__main__': + main() diff --git a/ACL_PyTorch/contrib/cv/detection/OpenPose/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/detection/OpenPose/modelzoo_level.txt index 246aac9fe92ba220e6fe80b3125018c78bdbec69..5d0cf16821d129aca2d4c4839f867eb4d7b3b409 100644 --- a/ACL_PyTorch/contrib/cv/detection/OpenPose/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/detection/OpenPose/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK -PerfStatus:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK +PerfStatus:OK diff --git a/ACL_PyTorch/contrib/cv/detection/OpenPose/requirements.txt b/ACL_PyTorch/contrib/cv/detection/OpenPose/requirements.txt index f1784bf064fd6d4e03025bcf4b30ae8669f5b018..a68dc2cb774e6528798ea1c91bf57c785a80138d 100644 --- a/ACL_PyTorch/contrib/cv/detection/OpenPose/requirements.txt +++ b/ACL_PyTorch/contrib/cv/detection/OpenPose/requirements.txt @@ -1,5 +1,5 @@ -torch>=0.4.1 -torchvision>=0.2.1 -pycocotools==2.0.0 -opencv-python>=3.4.0.14 +torch>=0.4.1 +torchvision>=0.2.1 +pycocotools==2.0.0 +opencv-python>=3.4.0.14 numpy>=1.14.0 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/OpenPose/test/parse.py b/ACL_PyTorch/contrib/cv/detection/OpenPose/test/parse.py index 6cdf1420bd3d3e7d14f5add67c57cfe2ad399407..64b47e3cff99e9e20539ae0c03b95d691d92aa1d 100644 --- a/ACL_PyTorch/contrib/cv/detection/OpenPose/test/parse.py +++ b/ACL_PyTorch/contrib/cv/detection/OpenPose/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 - print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 + print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) diff --git a/ACL_PyTorch/contrib/cv/detection/RFCN/env.sh b/ACL_PyTorch/contrib/cv/detection/RFCN/env.sh index a63ac98e8aac637432f623200e9735f656e732d0..f0619d3ad0d2fc9b1e62a2f02be28d6a6320be6f 100644 --- a/ACL_PyTorch/contrib/cv/detection/RFCN/env.sh +++ b/ACL_PyTorch/contrib/cv/detection/RFCN/env.sh @@ -1,7 +1,7 @@ -export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/lib64:/usr/local/Ascend/ascend-toolkit/latest/compiler/lib64/plugin/opskernel:/usr/local/Ascend/ascend-toolkit/latest/compiler/lib64/plugin/nnengine:$LD_LIBRARY_PATH -export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:$PYTHONPATH -export PATH=/usr/local/Ascend/ascend-toolkit/latest/bin:/usr/local/Ascend/ascend-toolkit/latest/compiler/ccec_compiler/bin:$PATH -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest -export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp -export TOOLCHAIN_HOME=/usr/local/Ascend/ascend-toolkit/latest/toolkit +export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/lib64:/usr/local/Ascend/ascend-toolkit/latest/compiler/lib64/plugin/opskernel:/usr/local/Ascend/ascend-toolkit/latest/compiler/lib64/plugin/nnengine:$LD_LIBRARY_PATH +export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:$PYTHONPATH +export PATH=/usr/local/Ascend/ascend-toolkit/latest/bin:/usr/local/Ascend/ascend-toolkit/latest/compiler/ccec_compiler/bin:$PATH +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest +export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp +export TOOLCHAIN_HOME=/usr/local/Ascend/ascend-toolkit/latest/toolkit export ASCEND_AUTOML_PATH=/usr/local/Ascend/ascend-toolkit/latest/tools \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/RFCN/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/detection/RFCN/modelzoo_level.txt index 62ed12347c866db16fef5622af355734787e4ec9..def2f63f1b066cc94e8dd94ec2f0c60d1baa3608 100644 --- a/ACL_PyTorch/contrib/cv/detection/RFCN/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/detection/RFCN/modelzoo_level.txt @@ -1,6 +1,6 @@ -ModelConvert:OK -QuantStatus:OK -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +ModelConvert:OK +QuantStatus:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/RFCN/rfcn_postprocess.py b/ACL_PyTorch/contrib/cv/detection/RFCN/rfcn_postprocess.py index 5edc6bea58c0e49f5c5c98699766cc38996b3ca3..eac0ac7e9298c32ef472f3633286640a00e0a0ad 100644 --- a/ACL_PyTorch/contrib/cv/detection/RFCN/rfcn_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/RFCN/rfcn_postprocess.py @@ -1,249 +1,249 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import torch.nn.functional as F -import sys -sys.path.append('./RFCN-pytorch.1.0') -import _init_paths -import os - -import numpy as np -import argparse -import pprint -import pdb -import time -import cv2 -import torch -from torch.autograd import Variable -import torch.nn as nn -import torch.optim as optim -import pickle -from roi_data_layer.roidb import combined_roidb -from roi_data_layer.roibatchLoader import roibatchLoader -from model.utils.config import cfg, cfg_from_file, cfg_from_list, get_output_dir -from model.rpn.bbox_transform import clip_boxes -# from model.nms.nms_wrapper import nms -from model.roi_layers import nms -from model.rpn.bbox_transform import bbox_transform_inv -from model.utils.net_utils import save_net, load_net, vis_detections -from model.rfcn.resnet_atrous import resnet -import pdb -try: - xrange # Python 2 -except NameError: - xrange = range # Python 3 - - -def parse_args(): - """ - Parse input arguments - """ - parser = argparse.ArgumentParser(description='test the accuracy of RFCN') - - parser.add_argument("--image_folder_path", dest="file_path", default="./RFCN-pytorch.1.0/data/VOCdevkit2007/VOC2007/JPEGImages/",help='image of dataset') - parser.add_argument("--input",dest="input", default="./result/dumpOutput_device0/") - parser.add_argument("--output",dest="output", default="./output") - parser.add_argument("--test_annotation", default="./demo.info") - parser.add_argument("--net_input_width", default=1344) - parser.add_argument("--net_input_height", default=1344) - parser.add_argument('--dataset', dest='dataset',help='training dataset',default='pascal_voc', type=str) - parser.add_argument('--cfg', dest='cfg_file', help='optional config file',default='cfgs/res16.yml', type=str) - parser.add_argument('--net', dest='net',help='vgg16, res50, res101, res152',default='res101', type=str) - parser.add_argument('--set', dest='set_cfgs',help='set config keys', default=None,nargs=argparse.REMAINDER) - parser.add_argument('--load_dir', dest='load_dir',help='directory to load models', default="models", type=str) - parser.add_argument('--ls', dest='large_scale',help='whether use large imag scale',action='store_true') - parser.add_argument('--cag', dest='class_agnostic',help='whether perform class_agnostic bbox regression',action='store_true') - parser.add_argument('--parallel_type', dest='parallel_type',help='which part of model to parallel, 0: all, 1: model before roi pooling',default=0, type=int) - parser.add_argument('--bs', dest='batch_size',help='batch_size', default=1, type=int) - parser.add_argument('--vis', dest='vis', help='visualization mode',action='store_true') - args = parser.parse_args() - return args - -lr = cfg.TRAIN.LEARNING_RATE -momentum = cfg.TRAIN.MOMENTUM -weight_decay = cfg.TRAIN.WEIGHT_DECAY - -if __name__ == '__main__': - - args = parse_args() - - print('Called with args:') - print(args) - - np.random.seed(cfg.RNG_SEED) - - args.imdbval_name = "voc_2007_test" - args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] - args.cfg_file = "./RFCN-pytorch.1.0/cfgs/{}_ls.yml".format(args.net) if args.large_scale else "./RFCN-pytorch.1.0/cfgs/{}.yml".format(args.net) - - if args.cfg_file is not None: - cfg_from_file(args.cfg_file) - if args.set_cfgs is not None: - cfg_from_list(args.set_cfgs) - - cfg.TRAIN.USE_FLIPPED = False - imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False) - imdb.competition_mode(on=True) - - im_data = torch.FloatTensor(1) - im_info = torch.FloatTensor(1) - num_boxes = torch.LongTensor(1) - gt_boxes = torch.FloatTensor(1) - - with torch.no_grad(): - im_data = Variable(im_data) - im_info = Variable(im_info) - num_boxes = Variable(num_boxes) - gt_boxes = Variable(gt_boxes) - - start = time.time() - max_per_image = 100 - - vis = args.vis - - if vis: - thresh = 0.05 - else: - thresh = 0.0 - - #save_name = 'RFCN' - num_images = len(imdb.image_index) - all_boxes = [[[] for _ in xrange(num_images)] - for _ in xrange(imdb.num_classes)] - - #output_dir = get_output_dir(imdb, save_name) - output=args.output - if not os.path.exists(output): - os.makedirs(output) - dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ - imdb.num_classes, training=False, normalize = False) - dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, - shuffle=False, num_workers=0, - pin_memory=True) - - data_iter = iter(dataloader) - - # _t = {'im_detect': time.time(), 'misc': time.time()} - det_file = os.path.join(output, 'detections.pkl') - # fasterRCNN.eval() - empty_array = np.transpose(np.array([[],[],[],[],[]]), (1,0)) - dataset.resize_batch() - npu_result = args.input - with open("./RFCN-pytorch.1.0/data/VOCdevkit2007/VOC2007/ImageSets/Main/test.txt") as f: - imglist = [x.strip() for x in f.readlines()] - num_images = len(imglist) - for i in range(num_images): - data = next(data_iter) - pad_value = 0 - batch_shape = (3, 1344, 1344) - padding_size = [0, batch_shape[-1] - data[0].shape[-1], - 0, batch_shape[-2] - data[0].shape[-2]] - #data[0] = F.pad(data[0], padding_size, value=pad_value) - #im_data.resize_(data[0].size()).copy_(data[0]) - # print(im_data.size()) - im_info.resize_(data[1].size()).copy_(data[1]) - gt_boxes.resize_(data[2].size()).copy_(data[2]) - num_boxes.resize_(data[3].size()).copy_(data[3]) - det_tic = time.time() - - def read_data(data_path, input_shape=None): - if data_path.endswith('.bin'): - data = np.fromfile(data_path, dtype=np.float32) - data = data.reshape(input_shape) - elif data_path.endswith('.npy'): - data = np.load(data_path) - return data - - rois = torch.from_numpy( - read_data(npu_result+'{}_1.bin'.format(imglist[i]), [1, 300, 5])) - cls_prob = torch.from_numpy( - read_data(npu_result+'{}_2.bin'.format(imglist[i]), [1, 300, 21])) - bbox_pred = torch.from_numpy( - read_data(npu_result+'{}_3.bin'.format(imglist[i]), [1, 300, 84])) - scores = cls_prob.data - boxes = rois.data[:, :, 1:5] - - box_deltas = bbox_pred.data - box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ - + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) - box_deltas = box_deltas.view(args.batch_size, -1, 4 * len(imdb.classes)) - pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) - pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) - pred_boxes /= data[1][0][2] - - scores = scores.squeeze() - pred_boxes = pred_boxes.squeeze() - det_toc = time.time() - detect_time = det_toc - det_tic - misc_tic = time.time() - if vis: - im = cv2.imread(imdb.image_path_at(i)) - im2show = np.copy(im) - for j in xrange(1, imdb.num_classes): - inds = torch.nonzero(scores[:, j] > thresh).view(-1) - # if there is det - if inds.numel() > 0: - cls_scores = scores[:, j][inds] - _, order = torch.sort(cls_scores, 0, True) - if args.class_agnostic: - cls_boxes = pred_boxes[inds, :] - else: - cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] - - cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) - # cls_dets = torch.cat((cls_boxes, cls_scores), 1) - cls_dets = cls_dets[order] - keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) - cls_dets = cls_dets[keep.view(-1).long()] - if vis: - im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) - all_boxes[j][i] = cls_dets.cpu().numpy() - else: - all_boxes[j][i] = empty_array - - # Limit to max_per_image detections *over all classes* - if max_per_image > 0: - image_scores = np.hstack([all_boxes[j][i][:, -1] - for j in xrange(1, imdb.num_classes)]) - if len(image_scores) > max_per_image: - image_thresh = np.sort(image_scores)[-max_per_image] - for j in xrange(1, imdb.num_classes): - keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] - all_boxes[j][i] = all_boxes[j][i][keep, :] - - misc_toc = time.time() - nms_time = misc_toc - misc_tic - - sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ - .format(i + 1, num_images, detect_time, nms_time)) - sys.stdout.flush() - - if vis: - cv2.imwrite('result.png', im2show) - pdb.set_trace() - # cv2.imshow('test', im2show) - # cv2.waitKey(0) - - with open(det_file, 'wb') as f: - pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) - - print('Evaluating detections') - imdb.evaluate_detections(all_boxes, output) - - end = time.time() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import torch.nn.functional as F +import sys +sys.path.append('./RFCN-pytorch.1.0') +import _init_paths +import os + +import numpy as np +import argparse +import pprint +import pdb +import time +import cv2 +import torch +from torch.autograd import Variable +import torch.nn as nn +import torch.optim as optim +import pickle +from roi_data_layer.roidb import combined_roidb +from roi_data_layer.roibatchLoader import roibatchLoader +from model.utils.config import cfg, cfg_from_file, cfg_from_list, get_output_dir +from model.rpn.bbox_transform import clip_boxes +# from model.nms.nms_wrapper import nms +from model.roi_layers import nms +from model.rpn.bbox_transform import bbox_transform_inv +from model.utils.net_utils import save_net, load_net, vis_detections +from model.rfcn.resnet_atrous import resnet +import pdb +try: + xrange # Python 2 +except NameError: + xrange = range # Python 3 + + +def parse_args(): + """ + Parse input arguments + """ + parser = argparse.ArgumentParser(description='test the accuracy of RFCN') + + parser.add_argument("--image_folder_path", dest="file_path", default="./RFCN-pytorch.1.0/data/VOCdevkit2007/VOC2007/JPEGImages/",help='image of dataset') + parser.add_argument("--input",dest="input", default="./result/dumpOutput_device0/") + parser.add_argument("--output",dest="output", default="./output") + parser.add_argument("--test_annotation", default="./demo.info") + parser.add_argument("--net_input_width", default=1344) + parser.add_argument("--net_input_height", default=1344) + parser.add_argument('--dataset', dest='dataset',help='training dataset',default='pascal_voc', type=str) + parser.add_argument('--cfg', dest='cfg_file', help='optional config file',default='cfgs/res16.yml', type=str) + parser.add_argument('--net', dest='net',help='vgg16, res50, res101, res152',default='res101', type=str) + parser.add_argument('--set', dest='set_cfgs',help='set config keys', default=None,nargs=argparse.REMAINDER) + parser.add_argument('--load_dir', dest='load_dir',help='directory to load models', default="models", type=str) + parser.add_argument('--ls', dest='large_scale',help='whether use large imag scale',action='store_true') + parser.add_argument('--cag', dest='class_agnostic',help='whether perform class_agnostic bbox regression',action='store_true') + parser.add_argument('--parallel_type', dest='parallel_type',help='which part of model to parallel, 0: all, 1: model before roi pooling',default=0, type=int) + parser.add_argument('--bs', dest='batch_size',help='batch_size', default=1, type=int) + parser.add_argument('--vis', dest='vis', help='visualization mode',action='store_true') + args = parser.parse_args() + return args + +lr = cfg.TRAIN.LEARNING_RATE +momentum = cfg.TRAIN.MOMENTUM +weight_decay = cfg.TRAIN.WEIGHT_DECAY + +if __name__ == '__main__': + + args = parse_args() + + print('Called with args:') + print(args) + + np.random.seed(cfg.RNG_SEED) + + args.imdbval_name = "voc_2007_test" + args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] + args.cfg_file = "./RFCN-pytorch.1.0/cfgs/{}_ls.yml".format(args.net) if args.large_scale else "./RFCN-pytorch.1.0/cfgs/{}.yml".format(args.net) + + if args.cfg_file is not None: + cfg_from_file(args.cfg_file) + if args.set_cfgs is not None: + cfg_from_list(args.set_cfgs) + + cfg.TRAIN.USE_FLIPPED = False + imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False) + imdb.competition_mode(on=True) + + im_data = torch.FloatTensor(1) + im_info = torch.FloatTensor(1) + num_boxes = torch.LongTensor(1) + gt_boxes = torch.FloatTensor(1) + + with torch.no_grad(): + im_data = Variable(im_data) + im_info = Variable(im_info) + num_boxes = Variable(num_boxes) + gt_boxes = Variable(gt_boxes) + + start = time.time() + max_per_image = 100 + + vis = args.vis + + if vis: + thresh = 0.05 + else: + thresh = 0.0 + + #save_name = 'RFCN' + num_images = len(imdb.image_index) + all_boxes = [[[] for _ in xrange(num_images)] + for _ in xrange(imdb.num_classes)] + + #output_dir = get_output_dir(imdb, save_name) + output=args.output + if not os.path.exists(output): + os.makedirs(output) + dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ + imdb.num_classes, training=False, normalize = False) + dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, + shuffle=False, num_workers=0, + pin_memory=True) + + data_iter = iter(dataloader) + + # _t = {'im_detect': time.time(), 'misc': time.time()} + det_file = os.path.join(output, 'detections.pkl') + # fasterRCNN.eval() + empty_array = np.transpose(np.array([[],[],[],[],[]]), (1,0)) + dataset.resize_batch() + npu_result = args.input + with open("./RFCN-pytorch.1.0/data/VOCdevkit2007/VOC2007/ImageSets/Main/test.txt") as f: + imglist = [x.strip() for x in f.readlines()] + num_images = len(imglist) + for i in range(num_images): + data = next(data_iter) + pad_value = 0 + batch_shape = (3, 1344, 1344) + padding_size = [0, batch_shape[-1] - data[0].shape[-1], + 0, batch_shape[-2] - data[0].shape[-2]] + #data[0] = F.pad(data[0], padding_size, value=pad_value) + #im_data.resize_(data[0].size()).copy_(data[0]) + # print(im_data.size()) + im_info.resize_(data[1].size()).copy_(data[1]) + gt_boxes.resize_(data[2].size()).copy_(data[2]) + num_boxes.resize_(data[3].size()).copy_(data[3]) + det_tic = time.time() + + def read_data(data_path, input_shape=None): + if data_path.endswith('.bin'): + data = np.fromfile(data_path, dtype=np.float32) + data = data.reshape(input_shape) + elif data_path.endswith('.npy'): + data = np.load(data_path) + return data + + rois = torch.from_numpy( + read_data(npu_result+'{}_1.bin'.format(imglist[i]), [1, 300, 5])) + cls_prob = torch.from_numpy( + read_data(npu_result+'{}_2.bin'.format(imglist[i]), [1, 300, 21])) + bbox_pred = torch.from_numpy( + read_data(npu_result+'{}_3.bin'.format(imglist[i]), [1, 300, 84])) + scores = cls_prob.data + boxes = rois.data[:, :, 1:5] + + box_deltas = bbox_pred.data + box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) + box_deltas = box_deltas.view(args.batch_size, -1, 4 * len(imdb.classes)) + pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) + pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) + pred_boxes /= data[1][0][2] + + scores = scores.squeeze() + pred_boxes = pred_boxes.squeeze() + det_toc = time.time() + detect_time = det_toc - det_tic + misc_tic = time.time() + if vis: + im = cv2.imread(imdb.image_path_at(i)) + im2show = np.copy(im) + for j in xrange(1, imdb.num_classes): + inds = torch.nonzero(scores[:, j] > thresh).view(-1) + # if there is det + if inds.numel() > 0: + cls_scores = scores[:, j][inds] + _, order = torch.sort(cls_scores, 0, True) + if args.class_agnostic: + cls_boxes = pred_boxes[inds, :] + else: + cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] + + cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) + # cls_dets = torch.cat((cls_boxes, cls_scores), 1) + cls_dets = cls_dets[order] + keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) + cls_dets = cls_dets[keep.view(-1).long()] + if vis: + im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) + all_boxes[j][i] = cls_dets.cpu().numpy() + else: + all_boxes[j][i] = empty_array + + # Limit to max_per_image detections *over all classes* + if max_per_image > 0: + image_scores = np.hstack([all_boxes[j][i][:, -1] + for j in xrange(1, imdb.num_classes)]) + if len(image_scores) > max_per_image: + image_thresh = np.sort(image_scores)[-max_per_image] + for j in xrange(1, imdb.num_classes): + keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] + all_boxes[j][i] = all_boxes[j][i][keep, :] + + misc_toc = time.time() + nms_time = misc_toc - misc_tic + + sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ + .format(i + 1, num_images, detect_time, nms_time)) + sys.stdout.flush() + + if vis: + cv2.imwrite('result.png', im2show) + pdb.set_trace() + # cv2.imshow('test', im2show) + # cv2.waitKey(0) + + with open(det_file, 'wb') as f: + pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) + + print('Evaluating detections') + imdb.evaluate_detections(all_boxes, output) + + end = time.time() print("test time: %0.4fs" % (end - start)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/RefineDet/README.md b/ACL_PyTorch/contrib/cv/detection/RefineDet/README.md index 301956149581ea84addddae27eb0f782d1c5e3aa..47adaaa6f0eae14bb872a59be8fd51e1d40dee08 100644 --- a/ACL_PyTorch/contrib/cv/detection/RefineDet/README.md +++ b/ACL_PyTorch/contrib/cv/detection/RefineDet/README.md @@ -1,66 +1,66 @@ -#RefineDet模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - -``` -pip3.7 install -r requirements.txt -``` - -2.获取代码和[权重文件](https://drive.google.com/file/d/1RCCTaNeby0g-TFE1Cvjm3dYweBiyyPoq/view?usp=sharing),放到当前路径下 - -``` -git clone https://github.com/luuuyi/RefineDet.PyTorch.git -b master -cd RefineDet.PyTorch -git reset --hard 0e4b24ce07245fcb8c48292326a731729cc5746a -patch -p1 < ../refinedet.patch - -``` - -3.获取数据集,[VOC数据集](http://host.robots.ox.ac.uk/pascal/VOC),可以通过下面的命令下载 - - -``` -sh data/scripts/VOC2007.sh -cd ../ -``` -4.获取[benchamrk](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) - -把benchmark.x86_64 或者 benchmark.aarch64 放到当前目录 - - -## 2 离线推理 - -pth转换为om -``` -bash test/pth2om.sh -``` - - -精度,性能测试 - -``` - bash test/eval_acc_perf.sh --datasets_path=/root/datasets/VOCdevkit/ -``` - - - - -**评测结果:** -| 模型 | pth精度 | 310精度 | 基准性能 |310性能 | -| :------: | :------: | :------: | :------: | :------: | -| RefineDet bs1 | [mAP:79.81%](https://github.com/luuuyi/RefineDet.PyTorch) | mAP:79.56%| 63.94fps | 101.24fps | -| RefineDet bs16 | [mAP:79.81%](https://github.com/luuuyi/RefineDet.PyTorch) |mAP:79.56% | 72.77fps | 136.8fps | - - - - -备注: - -- nms放在后处理,在cpu上计算 -- onnx转om时,不能使用fp16,否则精度不达标 - ``` - --precision_mode allow_fp32_to_fp16 - ``` - +#RefineDet模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + +``` +pip3.7 install -r requirements.txt +``` + +2.获取代码和[权重文件](https://drive.google.com/file/d/1RCCTaNeby0g-TFE1Cvjm3dYweBiyyPoq/view?usp=sharing),放到当前路径下 + +``` +git clone https://github.com/luuuyi/RefineDet.PyTorch.git -b master +cd RefineDet.PyTorch +git reset --hard 0e4b24ce07245fcb8c48292326a731729cc5746a +patch -p1 < ../refinedet.patch + +``` + +3.获取数据集,[VOC数据集](http://host.robots.ox.ac.uk/pascal/VOC),可以通过下面的命令下载 + + +``` +sh data/scripts/VOC2007.sh +cd ../ +``` +4.获取[benchamrk](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) + +把benchmark.x86_64 或者 benchmark.aarch64 放到当前目录 + + +## 2 离线推理 + +pth转换为om +``` +bash test/pth2om.sh +``` + + +精度,性能测试 + +``` + bash test/eval_acc_perf.sh --datasets_path=/root/datasets/VOCdevkit/ +``` + + + + +**评测结果:** +| 模型 | pth精度 | 310精度 | 基准性能 |310性能 | +| :------: | :------: | :------: | :------: | :------: | +| RefineDet bs1 | [mAP:79.81%](https://github.com/luuuyi/RefineDet.PyTorch) | mAP:79.56%| 63.94fps | 101.24fps | +| RefineDet bs16 | [mAP:79.81%](https://github.com/luuuyi/RefineDet.PyTorch) |mAP:79.56% | 72.77fps | 136.8fps | + + + + +备注: + +- nms放在后处理,在cpu上计算 +- onnx转om时,不能使用fp16,否则精度不达标 + ``` + --precision_mode allow_fp32_to_fp16 + ``` + diff --git a/ACL_PyTorch/contrib/cv/detection/RefineDet/env.sh b/ACL_PyTorch/contrib/cv/detection/RefineDet/env.sh index 7cf86a22a607ba18523498bcfb5617e97b28a0e0..e4c421dd8d10faa7f5e617cb1f91d34e8ee685bc 100644 --- a/ACL_PyTorch/contrib/cv/detection/RefineDet/env.sh +++ b/ACL_PyTorch/contrib/cv/detection/RefineDet/env.sh @@ -1,6 +1,6 @@ -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp export ASCEND_AICPU_PATH=${install_path} \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/RefineDet/get_prior_data.py b/ACL_PyTorch/contrib/cv/detection/RefineDet/get_prior_data.py index abc627029e75094369666ebd920c356413e1330b..095256efd91591132dbec219dbbede0713d72060 100644 --- a/ACL_PyTorch/contrib/cv/detection/RefineDet/get_prior_data.py +++ b/ACL_PyTorch/contrib/cv/detection/RefineDet/get_prior_data.py @@ -1,28 +1,28 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -sys.path.append('./RefineDet.PyTorch') -from layers.functions.prior_box import PriorBox -from data import voc_refinedet -import numpy as np - -cfg = voc_refinedet['320'] - -prior_box = PriorBox(cfg) - -prior_data = prior_box.forward().numpy() - -np.savetxt('prior_data.txt', prior_data) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +sys.path.append('./RefineDet.PyTorch') +from layers.functions.prior_box import PriorBox +from data import voc_refinedet +import numpy as np + +cfg = voc_refinedet['320'] + +prior_box = PriorBox(cfg) + +prior_data = prior_box.forward().numpy() + +np.savetxt('prior_data.txt', prior_data) + print('Finish') \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/RefineDet/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/detection/RefineDet/modelzoo_level.txt index 83689985f26624b65a4c5ebb5f00a152618799ba..8c469d858afccf3026a8640799938f8de7b46fac 100644 --- a/ACL_PyTorch/contrib/cv/detection/RefineDet/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/detection/RefineDet/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:PERFECT \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Retinanet/LICENSE b/ACL_PyTorch/contrib/cv/detection/Retinanet/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/detection/Retinanet/LICENSE +++ b/ACL_PyTorch/contrib/cv/detection/Retinanet/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Retinanet/get_info.py b/ACL_PyTorch/contrib/cv/detection/Retinanet/get_info.py index b76d6739bcea5c528a031970f0e583e5b5644bd8..d5cab0450c20d502d0d15be2f9c0fceffa6a6191 100644 --- a/ACL_PyTorch/contrib/cv/detection/Retinanet/get_info.py +++ b/ACL_PyTorch/contrib/cv/detection/Retinanet/get_info.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Retinanet/requirements.txt b/ACL_PyTorch/contrib/cv/detection/Retinanet/requirements.txt index 84cc44c3bbcb4d4e88f49dd0a3cb21c9493e22e0..03db083c861ce8283054386e3aa50683548b81a0 100644 --- a/ACL_PyTorch/contrib/cv/detection/Retinanet/requirements.txt +++ b/ACL_PyTorch/contrib/cv/detection/Retinanet/requirements.txt @@ -1,5 +1,5 @@ -torch == 1.8.1 -torchvision == 0.9.0 -onnx == 1.7.0 -numpy == 1.18.5 +torch == 1.8.1 +torchvision == 0.9.0 +onnx == 1.7.0 +numpy == 1.18.5 opencv-python == 4.2.0.34 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Retinanet/retinanet_pth_postprocess_detectron2.py b/ACL_PyTorch/contrib/cv/detection/Retinanet/retinanet_pth_postprocess_detectron2.py index 62b94f1fd42d5b32c06bee8782c6b8b497577918..15958efd02ccb1188d215bd047c3b52080044a26 100644 --- a/ACL_PyTorch/contrib/cv/detection/Retinanet/retinanet_pth_postprocess_detectron2.py +++ b/ACL_PyTorch/contrib/cv/detection/Retinanet/retinanet_pth_postprocess_detectron2.py @@ -1,186 +1,186 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - - -import os -import argparse -import cv2 -import numpy as np -from pycocotools.cocoeval import COCOeval -from pycocotools.coco import COCO -import json -import pickle -import logging - -class CocoDataset: - """Coco dataset.""" - - def __init__(self, root_dir, set_name='train2017', transform=None): - """ - Args: - root_dir (string): COCO directory. - transform (callable, optional): Optional transform to be applied - on a sample. - """ - self.root_dir = root_dir - self.set_name = set_name - self.transform = transform - - self.coco = COCO(os.path.join(self.root_dir, 'annotations', 'instances_' + self.set_name + '.json')) - self.image_ids = self.coco.getImgIds() - - self.load_classes() - - def load_classes(self): - # load class names (name -> label) - categories = self.coco.loadCats(self.coco.getCatIds()) - categories.sort(key=lambda x: x['id']) - - self.classes = {} - self.coco_labels = {} - self.coco_labels_inverse = {} - for c in categories: - self.coco_labels[len(self.classes)] = c['id'] - self.coco_labels_inverse[c['id']] = len(self.classes) - self.classes[c['name']] = len(self.classes) - - # also load the reverse (label -> name) - self.labels = {} - for key, value in self.classes.items(): - self.labels[value] = key - - def coco_label_to_label(self, coco_label): - return self.coco_labels_inverse[coco_label] - - def label_to_coco_label(self, label): - return self.coco_labels[label] - -def postprocess_bboxes(bboxes, image_size, net_input_width, net_input_height): - old_h = image_size[0] - old_w = image_size[1] - scale_ratio = 800 / min(old_w, old_h) - if old_h < old_w: - new_h, new_w = 800, int(np.floor(scale_ratio * old_w)) - else: - new_h, new_w = int(np.floor(scale_ratio * old_h)), 800 - if max(new_h, new_w) > 1333: - scale = 1333 / max(new_h, new_w) - new_h = new_h * scale - new_w = new_w * scale - new_w = int(new_w + 0.5) - new_h = int(new_h + 0.5) - scale = new_w/old_w - - bboxes[:, 0] = (bboxes[:, 0]) / scale - bboxes[:, 1] = (bboxes[:, 1]) / scale - bboxes[:, 2] = (bboxes[:, 2]) / scale - bboxes[:, 3] = (bboxes[:, 3]) / scale - - return bboxes - - -def save_variable(v, filename): - f = open(filename, 'wb') - pickle.dump(v, f) - f.close() -def load_variavle(filename): - f = open(filename, 'rb') - r = pickle.load(f) - f.close() - return r - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--test_annotation", default="./origin_image.info") - parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0/") - parser.add_argument("--val2017_path", default="/root/datasets/coco/val2017/") - parser.add_argument("--det_results_path", default="./result/detection-results/") - parser.add_argument("--net_out_num", type=int, default=3) - parser.add_argument("--net_input_width", type=int, default=1344) - parser.add_argument("--net_input_height", type=int, default=1344) - parser.add_argument("--ifShowDetObj", action="store_true", help="if input the para means True, neither False.") - flags = parser.parse_args() - - img_size_dict = dict() - with open(flags.test_annotation)as f: - for line in f.readlines(): - temp = line.split(" ") - img_file_path = temp[1] - img_name = temp[1].split("/")[-1].split(".")[0] - img_width = int(temp[2]) - img_height = int(temp[3]) - img_size_dict[img_name] = (img_width, img_height, img_file_path) - - bin_path = flags.bin_data_path - det_results_path = flags.det_results_path - os.makedirs(det_results_path, exist_ok=True) - total_img = set([name[:name.rfind('_')] for name in os.listdir(bin_path) if "bin" in name]) - - logging.basicConfig(level=logging.INFO) - coco_path = flags.val2017_path - dataloader_val = CocoDataset(coco_path, set_name='val2017') - results = [] - image_ids = [] - - for bin_file in sorted(total_img): - path_base = os.path.join(bin_path, bin_file) - res_buff = [] - for num in range(1, flags.net_out_num + 1): - if os.path.exists(path_base + "_" + str(num) + ".bin"): - if num == 1: - buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="float32") - boxes = np.reshape(buf, [100, 4]) - elif num == 2: - buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="int32") - labels = np.reshape(buf, [100, 1]) - elif num == 3: - buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="float32") - scores = np.reshape(buf, [100, 1]) - else: - print("[ERROR] file not exist", path_base + "_" + str(num) + ".bin") - - current_img_size = img_size_dict[bin_file] - boxes = postprocess_bboxes(boxes, current_img_size, flags.net_input_width, flags.net_input_height) - - if boxes.shape[0] > 0: - # change to (x, y, w, h) (MS COCO standard) - boxes[:, 2] -= boxes[:, 0] - boxes[:, 3] -= boxes[:, 1] - for box_id in range(boxes.shape[0]): - if scores[box_id] <0.05: - continue - score = float(scores[box_id]) - label = int(labels[box_id]) - box = boxes[box_id, :] - image_result = { - 'image_id': int(bin_file), - 'category_id': dataloader_val.label_to_coco_label(label), - 'score': float(score), - 'bbox': box.tolist(), - } - # append detection to results - results.append(image_result) - image_ids.append(int(bin_file)) - - json.dump(results, open('{}_bbox_results.json'.format(dataloader_val.set_name), 'w'), indent=4) - coco_true = dataloader_val.coco - coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(dataloader_val.set_name)) - - # run COCO evaluation - coco_eval = COCOeval(coco_true, coco_pred, 'bbox') - coco_eval.params.imgIds = image_ids - coco_eval.evaluate() - coco_eval.accumulate() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +import os +import argparse +import cv2 +import numpy as np +from pycocotools.cocoeval import COCOeval +from pycocotools.coco import COCO +import json +import pickle +import logging + +class CocoDataset: + """Coco dataset.""" + + def __init__(self, root_dir, set_name='train2017', transform=None): + """ + Args: + root_dir (string): COCO directory. + transform (callable, optional): Optional transform to be applied + on a sample. + """ + self.root_dir = root_dir + self.set_name = set_name + self.transform = transform + + self.coco = COCO(os.path.join(self.root_dir, 'annotations', 'instances_' + self.set_name + '.json')) + self.image_ids = self.coco.getImgIds() + + self.load_classes() + + def load_classes(self): + # load class names (name -> label) + categories = self.coco.loadCats(self.coco.getCatIds()) + categories.sort(key=lambda x: x['id']) + + self.classes = {} + self.coco_labels = {} + self.coco_labels_inverse = {} + for c in categories: + self.coco_labels[len(self.classes)] = c['id'] + self.coco_labels_inverse[c['id']] = len(self.classes) + self.classes[c['name']] = len(self.classes) + + # also load the reverse (label -> name) + self.labels = {} + for key, value in self.classes.items(): + self.labels[value] = key + + def coco_label_to_label(self, coco_label): + return self.coco_labels_inverse[coco_label] + + def label_to_coco_label(self, label): + return self.coco_labels[label] + +def postprocess_bboxes(bboxes, image_size, net_input_width, net_input_height): + old_h = image_size[0] + old_w = image_size[1] + scale_ratio = 800 / min(old_w, old_h) + if old_h < old_w: + new_h, new_w = 800, int(np.floor(scale_ratio * old_w)) + else: + new_h, new_w = int(np.floor(scale_ratio * old_h)), 800 + if max(new_h, new_w) > 1333: + scale = 1333 / max(new_h, new_w) + new_h = new_h * scale + new_w = new_w * scale + new_w = int(new_w + 0.5) + new_h = int(new_h + 0.5) + scale = new_w/old_w + + bboxes[:, 0] = (bboxes[:, 0]) / scale + bboxes[:, 1] = (bboxes[:, 1]) / scale + bboxes[:, 2] = (bboxes[:, 2]) / scale + bboxes[:, 3] = (bboxes[:, 3]) / scale + + return bboxes + + +def save_variable(v, filename): + f = open(filename, 'wb') + pickle.dump(v, f) + f.close() +def load_variavle(filename): + f = open(filename, 'rb') + r = pickle.load(f) + f.close() + return r + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--test_annotation", default="./origin_image.info") + parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0/") + parser.add_argument("--val2017_path", default="/root/datasets/coco/val2017/") + parser.add_argument("--det_results_path", default="./result/detection-results/") + parser.add_argument("--net_out_num", type=int, default=3) + parser.add_argument("--net_input_width", type=int, default=1344) + parser.add_argument("--net_input_height", type=int, default=1344) + parser.add_argument("--ifShowDetObj", action="store_true", help="if input the para means True, neither False.") + flags = parser.parse_args() + + img_size_dict = dict() + with open(flags.test_annotation)as f: + for line in f.readlines(): + temp = line.split(" ") + img_file_path = temp[1] + img_name = temp[1].split("/")[-1].split(".")[0] + img_width = int(temp[2]) + img_height = int(temp[3]) + img_size_dict[img_name] = (img_width, img_height, img_file_path) + + bin_path = flags.bin_data_path + det_results_path = flags.det_results_path + os.makedirs(det_results_path, exist_ok=True) + total_img = set([name[:name.rfind('_')] for name in os.listdir(bin_path) if "bin" in name]) + + logging.basicConfig(level=logging.INFO) + coco_path = flags.val2017_path + dataloader_val = CocoDataset(coco_path, set_name='val2017') + results = [] + image_ids = [] + + for bin_file in sorted(total_img): + path_base = os.path.join(bin_path, bin_file) + res_buff = [] + for num in range(1, flags.net_out_num + 1): + if os.path.exists(path_base + "_" + str(num) + ".bin"): + if num == 1: + buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="float32") + boxes = np.reshape(buf, [100, 4]) + elif num == 2: + buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="int32") + labels = np.reshape(buf, [100, 1]) + elif num == 3: + buf = np.fromfile(path_base + "_" + str(num) + ".bin", dtype="float32") + scores = np.reshape(buf, [100, 1]) + else: + print("[ERROR] file not exist", path_base + "_" + str(num) + ".bin") + + current_img_size = img_size_dict[bin_file] + boxes = postprocess_bboxes(boxes, current_img_size, flags.net_input_width, flags.net_input_height) + + if boxes.shape[0] > 0: + # change to (x, y, w, h) (MS COCO standard) + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + for box_id in range(boxes.shape[0]): + if scores[box_id] <0.05: + continue + score = float(scores[box_id]) + label = int(labels[box_id]) + box = boxes[box_id, :] + image_result = { + 'image_id': int(bin_file), + 'category_id': dataloader_val.label_to_coco_label(label), + 'score': float(score), + 'bbox': box.tolist(), + } + # append detection to results + results.append(image_result) + image_ids.append(int(bin_file)) + + json.dump(results, open('{}_bbox_results.json'.format(dataloader_val.set_name), 'w'), indent=4) + coco_true = dataloader_val.coco + coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(dataloader_val.set_name)) + + # run COCO evaluation + coco_eval = COCOeval(coco_true, coco_pred, 'bbox') + coco_eval.params.imgIds = image_ids + coco_eval.evaluate() + coco_eval.accumulate() coco_eval.summarize() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Retinanet/retinanet_pth_preprocess_detectron2.py b/ACL_PyTorch/contrib/cv/detection/Retinanet/retinanet_pth_preprocess_detectron2.py index 46fa64f920ce7cb1fa2af613021957bc7bc9dbd6..2700b4242ac1a99e29d6df3ab7271a621ed99b4c 100644 --- a/ACL_PyTorch/contrib/cv/detection/Retinanet/retinanet_pth_preprocess_detectron2.py +++ b/ACL_PyTorch/contrib/cv/detection/Retinanet/retinanet_pth_preprocess_detectron2.py @@ -1,80 +1,80 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import argparse -import numpy as np -import cv2 -import torch -import multiprocessing -def resize(img, size): - - old_h = img.shape[0] - old_w = img.shape[1] - scale_ratio = 800 / min(old_w, old_h) - if old_h < old_w: - new_h, new_w = 800, int(np.floor(scale_ratio * old_w)) - else: - new_h, new_w = int(np.floor(scale_ratio * old_h)), 800 - if max(new_h, new_w) > 1333: - scale = 1333 / max(new_h, new_w) - new_h = new_h * scale - new_w = new_w * scale - new_w = int(new_w + 0.5) - new_h = int(new_h + 0.5) - ret = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR) - return ret - - -def gen_input_bin(file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - image = cv2.imread(os.path.join(flags.image_src_path, file), cv2.IMREAD_COLOR) - image = resize(image, (800, 1333)) - mean = np.array([103.53, 116.28, 123.675], dtype=np.float32) - std = np.array([1., 1., 1.], dtype=np.float32) - img = image.copy().astype(np.float32) - mean = np.float64(mean.reshape(1, -1)) - std = 1 / np.float64(std.reshape(1, -1)) - cv2.subtract(img, mean, img) - cv2.multiply(img, std, img) - - img = cv2.copyMakeBorder(img, 0, flags.model_input_height - img.shape[0], 0, flags.model_input_width - img.shape[1], cv2.BORDER_CONSTANT, value=0) - img = img.transpose(2, 0, 1) - - - img.tofile(os.path.join(flags.bin_file_path, file.split('.')[0] + ".bin")) - -def preprocess(src_path, save_path): - files = os.listdir(src_path) - file_batches = [files[i:i + 100] for i in range(0, 5000, 100) if files[i:i + 100] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='preprocess of MaskRCNN PyTorch model') - parser.add_argument("--image_src_path", default="/root/datasets/coco/val2017", help='image of dataset') - parser.add_argument("--bin_file_path", default="./val2017_bin/", help='Preprocessed image buffer') - parser.add_argument("--model_input_height", default=1344, type=int, help='input tensor height') - parser.add_argument("--model_input_width", default=1344, type=int, help='input tensor width') - flags = parser.parse_args() - if not os.path.exists(flags.bin_file_path): - os.makedirs(flags.bin_file_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import numpy as np +import cv2 +import torch +import multiprocessing +def resize(img, size): + + old_h = img.shape[0] + old_w = img.shape[1] + scale_ratio = 800 / min(old_w, old_h) + if old_h < old_w: + new_h, new_w = 800, int(np.floor(scale_ratio * old_w)) + else: + new_h, new_w = int(np.floor(scale_ratio * old_h)), 800 + if max(new_h, new_w) > 1333: + scale = 1333 / max(new_h, new_w) + new_h = new_h * scale + new_w = new_w * scale + new_w = int(new_w + 0.5) + new_h = int(new_h + 0.5) + ret = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR) + return ret + + +def gen_input_bin(file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + image = cv2.imread(os.path.join(flags.image_src_path, file), cv2.IMREAD_COLOR) + image = resize(image, (800, 1333)) + mean = np.array([103.53, 116.28, 123.675], dtype=np.float32) + std = np.array([1., 1., 1.], dtype=np.float32) + img = image.copy().astype(np.float32) + mean = np.float64(mean.reshape(1, -1)) + std = 1 / np.float64(std.reshape(1, -1)) + cv2.subtract(img, mean, img) + cv2.multiply(img, std, img) + + img = cv2.copyMakeBorder(img, 0, flags.model_input_height - img.shape[0], 0, flags.model_input_width - img.shape[1], cv2.BORDER_CONSTANT, value=0) + img = img.transpose(2, 0, 1) + + + img.tofile(os.path.join(flags.bin_file_path, file.split('.')[0] + ".bin")) + +def preprocess(src_path, save_path): + files = os.listdir(src_path) + file_batches = [files[i:i + 100] for i in range(0, 5000, 100) if files[i:i + 100] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='preprocess of MaskRCNN PyTorch model') + parser.add_argument("--image_src_path", default="/root/datasets/coco/val2017", help='image of dataset') + parser.add_argument("--bin_file_path", default="./val2017_bin/", help='Preprocessed image buffer') + parser.add_argument("--model_input_height", default=1344, type=int, help='input tensor height') + parser.add_argument("--model_input_width", default=1344, type=int, help='input tensor width') + flags = parser.parse_args() + if not os.path.exists(flags.bin_file_path): + os.makedirs(flags.bin_file_path) preprocess(flags.image_src_path, flags.bin_file_path) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/Retinanet/test/README.md b/ACL_PyTorch/contrib/cv/detection/Retinanet/test/README.md index dc1c3eabd54f5cd5139c11f5865cec207009244e..d8a57f62bd92814b26ff6b4b45eb136bb7905da6 100644 --- a/ACL_PyTorch/contrib/cv/detection/Retinanet/test/README.md +++ b/ACL_PyTorch/contrib/cv/detection/Retinanet/test/README.md @@ -1,49 +1,49 @@ -环境准备: - -1.数据集路径 通用的数据集统一放在/root/datasets/或/opt/npu/ 本模型数据集放在/root/datasets/ -该文件夹下要包含 - -./coco - ./val2017 - ./annotations - ./instances_val2017.json - -2.进入工作目录 cd Retinanet - -3.安装必要的依赖 pip3.7 install -r requirements.txt - -4.获取、修改和安装模型代码 a. 获取模型代码 git clone https://github.com/facebookresearch/detectron2 - -cd detectron2/ git reset --hard 13afb03 - -b.安装 cd .. rm -rf detectron2/build/ **/*.so python3.7 -m pip install -e detectron2 - -如果detectron2安装报错,请尝试下面这种方法安装: cd detectron2 -python3.7 setup.py build develop - -c.修改源代码 -若是基准环境中 -cd detectron2/ patch -p1 < ../retinanet_pth.diff cd .. - -若是npu环境中 -cd detectron2/ patch -p1 < ../retinanet_detectron2.diff cd .. - -5.获取权重文件 -wget https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl -将权重文件改名: mv model_final_5bd44e.pkl model_final.pkl - -6.修改pytorch代码去除导出onnx时进行检查 将/usr/local/python3.7.5/lib/python3.7/site-packages/torch/onnx/utils.py文件的_check_onnx_proto(proto)这一行改为pass - -7.获取benchmark工具 将benchmark.x86_64 benchmark.aarch64放在当前目录 - -推理步骤: - -8..修改detectron2/detectron2/data/datasets/builtin.py的258行为_root = os.getenv("DETECTRON2_DATASETS", "/root/datasets/")指定coco数据集所在的目录/root/datasets/ -运行命令,在output文件夹下生成model.onnx文件 - -python3.7 ./detectron2/tools/deploy/export_model.py --config-file ./detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml --output ./output --export-method tracing --format onnx MODEL.WEIGHTS model_final.pkl MODEL.DEVICE cpu - - -9.脚本转换om模型 bash test/pth2om.sh - -10.310上执行,执行时确保device空闲: bash test/eval_acc_perf.sh --datasets_path=/root/datasets +环境准备: + +1.数据集路径 通用的数据集统一放在/root/datasets/或/opt/npu/ 本模型数据集放在/root/datasets/ +该文件夹下要包含 + +./coco + ./val2017 + ./annotations + ./instances_val2017.json + +2.进入工作目录 cd Retinanet + +3.安装必要的依赖 pip3.7 install -r requirements.txt + +4.获取、修改和安装模型代码 a. 获取模型代码 git clone https://github.com/facebookresearch/detectron2 + +cd detectron2/ git reset --hard 13afb03 + +b.安装 cd .. rm -rf detectron2/build/ **/*.so python3.7 -m pip install -e detectron2 + +如果detectron2安装报错,请尝试下面这种方法安装: cd detectron2 +python3.7 setup.py build develop + +c.修改源代码 +若是基准环境中 +cd detectron2/ patch -p1 < ../retinanet_pth.diff cd .. + +若是npu环境中 +cd detectron2/ patch -p1 < ../retinanet_detectron2.diff cd .. + +5.获取权重文件 +wget https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl +将权重文件改名: mv model_final_5bd44e.pkl model_final.pkl + +6.修改pytorch代码去除导出onnx时进行检查 将/usr/local/python3.7.5/lib/python3.7/site-packages/torch/onnx/utils.py文件的_check_onnx_proto(proto)这一行改为pass + +7.获取benchmark工具 将benchmark.x86_64 benchmark.aarch64放在当前目录 + +推理步骤: + +8..修改detectron2/detectron2/data/datasets/builtin.py的258行为_root = os.getenv("DETECTRON2_DATASETS", "/root/datasets/")指定coco数据集所在的目录/root/datasets/ +运行命令,在output文件夹下生成model.onnx文件 + +python3.7 ./detectron2/tools/deploy/export_model.py --config-file ./detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml --output ./output --export-method tracing --format onnx MODEL.WEIGHTS model_final.pkl MODEL.DEVICE cpu + + +9.脚本转换om模型 bash test/pth2om.sh + +10.310上执行,执行时确保device空闲: bash test/eval_acc_perf.sh --datasets_path=/root/datasets diff --git a/ACL_PyTorch/contrib/cv/detection/Retinanet/test/parse.py b/ACL_PyTorch/contrib/cv/detection/Retinanet/test/parse.py index b9c74f41d7848e1250356f14472b237a18bb3489..82af69cd183218c3263723c20b652b3f7ec2bc27 100644 --- a/ACL_PyTorch/contrib/cv/detection/Retinanet/test/parse.py +++ b/ACL_PyTorch/contrib/cv/detection/Retinanet/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV1/LICENSE b/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV1/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV1/LICENSE +++ b/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV1/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV1/SSD_MobileNet_pth2onnx.py b/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV1/SSD_MobileNet_pth2onnx.py index 795bae7b1dbc0adefc931bd222982df44483727a..46fafe35d69990f5881357225e058b938bc6b850 100644 --- a/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV1/SSD_MobileNet_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV1/SSD_MobileNet_pth2onnx.py @@ -1,46 +1,46 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.onnx -import sys -sys.path.append(r"./pytorch-ssd") -from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd - - -def pth2onx(model_path, out_path): - num_classes = 21 - net = create_mobilenetv1_ssd(num_classes, is_test=True) - print("begin to load model") - net.load(model_path) - net.eval() - - input_names = ["image"] - dynamic_axes = {'image': {0: '-1'}, 'scores':{0: '-1'}, 'boxes': {0: '-1'}} - output_names = ['scores', 'boxes'] - dummy_input = torch.randn(16, 3, 300, 300) - print("begin to export") - torch.onnx.export(net, dummy_input, out_path, input_names=input_names, - dynamic_axes=dynamic_axes, output_names=output_names, opset_version=11, verbose=True) - print("end export") - - -if __name__ == '__main__': - if len(sys.argv) < 2: - print('Usage: python SSD_MobileNet_pth2onnx.py ') - sys.exit(0) - - model_path = sys.argv[1] - out_path = sys.argv[2] - pth2onx(model_path, out_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.onnx +import sys +sys.path.append(r"./pytorch-ssd") +from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd + + +def pth2onx(model_path, out_path): + num_classes = 21 + net = create_mobilenetv1_ssd(num_classes, is_test=True) + print("begin to load model") + net.load(model_path) + net.eval() + + input_names = ["image"] + dynamic_axes = {'image': {0: '-1'}, 'scores':{0: '-1'}, 'boxes': {0: '-1'}} + output_names = ['scores', 'boxes'] + dummy_input = torch.randn(16, 3, 300, 300) + print("begin to export") + torch.onnx.export(net, dummy_input, out_path, input_names=input_names, + dynamic_axes=dynamic_axes, output_names=output_names, opset_version=11, verbose=True) + print("end export") + + +if __name__ == '__main__': + if len(sys.argv) < 2: + print('Usage: python SSD_MobileNet_pth2onnx.py ') + sys.exit(0) + + model_path = sys.argv[1] + out_path = sys.argv[2] + pth2onx(model_path, out_path) diff --git a/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV1/get_info.py b/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV1/get_info.py index 3073a9bd28b83b4eb7d320b3c63b37d0a835a573..000eccd3f3e95ea63fe123cc6f4567f67ae23bd4 100644 --- a/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV1/get_info.py +++ b/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV1/get_info.py @@ -1,63 +1,63 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join( - [str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len( - sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len( - sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join( + [str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len( + sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len( + sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) diff --git a/ACL_PyTorch/contrib/cv/detection/SSD-Resnet34/README.md b/ACL_PyTorch/contrib/cv/detection/SSD-Resnet34/README.md index 5d1928802d83abff4c6c88fe683a435371b2bae2..c4208fe14548e80a613ac8ba745016c56d2082ea 100644 --- a/ACL_PyTorch/contrib/cv/detection/SSD-Resnet34/README.md +++ b/ACL_PyTorch/contrib/cv/detection/SSD-Resnet34/README.md @@ -1,450 +1,450 @@ -# SSD-Resnet34模型PyTorch离线推理 - -## 一. 环境准备 - -### 1.通过requirements.txt 安装必要依赖 - -首先要先获取torch-1.5.0+ascend.post3.20210930-cp37-cp37m-linux_x86_64.whl,apex-0.1+ascend.20210930-cp37-cp37m-linux_x86_64.whl和tensor_fused_plugin-0.1+ascend-cp37-cp37m-linux_x86_64.whl这3个文件,获取方法如下: - -获取torch-1.5.0+ascend.post3.20210930-cp37-cp37m-linux_x86_64.whl - -x86架构: - -``` -wget https://ascend-pytorch-release.obs.cn-north-4.myhuaweicloud.com/run_pkg/20211018_FrameworkPTAdapter2.0.T308/torch-1.5.0%2Bascend.post3.20210930-cp37-cp37m-linux_x86_64.whl -``` - -ARM架构: - -``` -wget https://ascend-pytorch-release.obs.cn-north-4.myhuaweicloud.com/run_pkg/20211018_FrameworkPTAdapter2.0.T308/torch-1.5.0%2Bascend.post3.20210930-cp37-cp37m-linux_aarch64.whl -``` - -获取tensor_fused_plugin-0.1+ascend-cp37-cp37m-linux_x86_64.whl - -x86架构: - -``` -wget https://ascend-pytorch-release.obs.cn-north-4.myhuaweicloud.com/run_pkg/20210423_TR5/whl_0423/tensor_fused_plugin-0.1%2Bascend-cp37-cp37m-linux_x86_64.whl -``` - -ARM架构: - -``` -wget https://ascend-pytorch-release.obs.cn-north-4.myhuaweicloud.com/run_pkg/20211018_FrameworkPTAdapter2.0.T308/torch-1.5.0%2Bascend.post3.20210930-cp37-cp37m-linux_aarch64.whl -``` - -获取apex-0.1+ascend.20210930-cp37-cp37m-linux_x86_64.whl - -x86架构: - -``` -wget https://ascend-pytorch-release.obs.cn-north-4.myhuaweicloud.com/run_pkg/20211018_FrameworkPTAdapter2.0.T308/apex-0.1%2Bascend.20210930-cp37-cp37m-linux_x86_64.whl -``` - -ARM架构: - -``` -wget https://ascend-pytorch-release.obs.cn-north-4.myhuaweicloud.com/run_pkg/20211018_FrameworkPTAdapter2.0.T308/apex-0.1%2Bascend.20210930-cp37-cp37m-linux_aarch64.whl -``` - -在获得这3个.whl文件之后就使用命令直接运行: - -x86架构: - -``` -pip install torch-1.5.0+ascend.post3.20210930-cp37-cp37m-linux_x86_64.whl -pip install apex-0.1+ascend.20210930-cp37-cp37m-linux_x86_64.whl -pip install tensor_fused_plugin-0.1+ascend-cp37-cp37m-linux_x86_64.whl -``` - -ARM架构: - -``` -pip install torch-1.5.0+ascend.post3.20210930-cp37-cp37m-linux_aarch64.whl -pip install apex-0.1+ascend.20210930-cp37-cp37m-linux_aarch64.whl -pip install tensor_fused_plugin-0.1+ascend-cp37-cp37m-linux_aarch64.whl -``` - -在运行上面的这条命令时,确保torch,apex和tensor_fused_plugin这3个.whl文件和requirements.txt在同一个目录下。 - -之后运行如下指令: - -``` -pip install -r requirements.txt -``` - -在运行完这条命令后,如果error中出现te0.4.0和schedule-search0.0.1相关信息,不需要去看,因为运行这个代码不需要用到,与本代码无关。 - -``` -pip install -i https://pypi.tuna.tsinghua.edu.cn/simple opencv-python -apt update -apt install libgl1-mesa-glx -``` - -之后再运行如上3条命令,本代码所需环境即安装完毕。 - -### 2. 获取开源模型代码及开源权重 - -加载开源仓库: - -``` -git clone https://github.com/mlcommons/training_results_v0.7.git -``` - -进入开源代码仓,并打补丁,打补丁时确保补丁在开源代码仓路径的上一级: - -``` -cd training_results_v0.7/NVIDIA/benchmarks/ssd/implementations/pytorch/ -patch -p1 <../ssd.patch -``` - -下载训练后的SSD权重文件: - -``` -wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/detection/SSD-Resnet34/iter_183250.pt -``` - -下载基于搭建SSD模型的Resnet34模型的权重文件 - -``` -wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/detection/SSD-Resnet34/resnet34-333f7ec4.pth -``` - -对于pth权重文件,统一放在新建models文件夹下。 - -``` -├── models -│ ├── iter_183250.pt -│ ├── resnet34-333f7ec4.pth -``` - -### 3. 获取测试数据集 - -本模型支持coco2017的val2017验证数据集,里面有5000张图片。用户可自行获取coco2017数据集中的annotations和val2017,上传数据集到服务器任意目录并解压(如:/home/HwHiAiUser/dataset),本模型将使用到coco2017数据集中的验证集及其标签文件instances_val2017.json, bbox_only_instances_val2017.json,标签文件bbox_only_instances_val2017.json是将coco2017中的原标签文件instances_val2017.json经过处理所得。 - -获得coco数据集的命令如下: - -``` -wget https://ascend-pytorch-one-datasets.obs.cn-north-4.myhuaweicloud.com/infer/zip/coco_2017_ssd_infer.zip -``` - -在本代码中我统一使用了coco这个名字来命名数据: - -``` -mv coco_2017_ssd_infer coco -``` - -获得新json标签文件的命令如下: - -先给prepare-json.py增加权限,不然会出现权限不够的问题: - -``` -chmod -R 777 prepare-json.py -``` - -等增加完权限后再运行: - -``` -python3.7 prepare-json.py --keep-keys ${data_path}/coco/annotations/instances_val2017.json ${data_path}/coco/annotations/bbox_only_instances_val2017.json -``` - -第1部分${data_path}/coco/annotations/instances_val2017.json:这个是输入的json文件路径 - -第2部分${data_path}/coco/annotations/bbox_only_instances_val2017.json:这个是经过处理后输出的json文件路径。新的json文件命名一定要是bbox_only_instances_val2017.json,因为在代码中定义了运行json文件的名字。 - -${data_path}:代表数据集coco2017的路径 - -需要准备好的数据集部分: - -``` -├── coco -│ ├── val2017 -│ ├── annotations -│ ├──instances_val2017.json -│ ├──bbox_only_instances_val2017.json -``` - -## 二. 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 - -执行代码脚本请在本工程代码文件夹下运行。 - -执行如下脚本生成om模型 - -1-25行是pth2onnx - -29-43行是onnx2om - -``` -bash test/ssd_pth2om.sh -``` - -执行如下脚本进行数据预处理和后处理测试精度 - -``` -bash test/ssd_eval_acc_perf.sh --data_path=/home/yzc -``` - ---data_path:coco2017数据集的路径 - -1-16行是加载数据集路径部分 - -19行是解决mlperf_logging包的调用问题 - -23-30行是处理json文件部分 - -32-40行是数据预处理部分 - -42-48行是生成info文件 - -50-67行是使用benchmark进行离线推理的部分 - -70-84行是数据后处理评估精度部分 - -请用户在运行代码前,必须要先激活环境变量才能运行代码: - -``` -source env.sh -``` - -如果在运行代码的过程中,出现缺少.so库的问题,则需要再运行一遍上面输入的命令,再激活一次环境变量,即可解决问题。 - -另外,如果在运行过程中出现报出没有torchvision的错误,但实际已安装,请用户使用which python或者which python版本,查看python的路径是否在当前环境的路径下,请使用在当前环境路径下的相应python即可。 - -### 1.导出.onnx文件 - -− 使用iter_183250.pt导出onnx文件。 - -− 运行ssd_pth2onnx.sh可直接从pth转至om模型 - -运行ssd_pth2onnx.py脚本。 - -生成batchsize=1的onnx模型: - -``` -python3.7 ssd_pth2onnx.py --bs=1 --resnet34-model=./models/resnet34-333f7ec4.pth --pth-path=./models/iter_183250.pt --onnx-path=./ssd_bs1.onnx -``` - -生成batchsize=16的onnx模型: - -``` -python ssd_pth2onnx.py --bs=16 --resnet34-model=./models/resnet34-333f7ec4.pth --pth-path=./models/iter_183250.pt --onnx-path=./ssd_bs16.onnx -``` - ---bs:输入的batch_size大小 - ---resnet34-model:resnet34模型的pth权重文件路径 - ---pth-path:输入SSD模型的pth权重文件路径 - ---onnx-path:输出的onnx模型文件路径及onnx模型名字 - -执行上述步骤后,获得的输出有: - -``` -├── ssd_bs1.onnx -├── ssd_bs16.onnx -``` - -### 2.转om模型 - -设置环境变量 - -``` -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest -``` - -生成batchsize为1的om模型的命令如下。 - -``` -atc --framework=5 --model=./ssd_bs1.onnx --output=./ssd_bs1 --input_format=NCHW --input_shape="image:1,3,300,300" --log=error --soc_version=Ascend310 -``` - -生成batchsize为16的om模型的命令如下。 - -``` -atc --framework=5 --model=./ssd_bs16.onnx --output=./ssd_bs16 --input_format=NCHW --input_shape="image:16,3,300,300" --log=error --soc_version=Ascend310 -``` - ---framework:5代表ONNX模型。 - ---model:为ONNX模型文件输入的路径。 - ---output:输出的OM模型的路径。 - ---input_format:输入数据的格式。 - ---input_shape:输入数据的shape。 - ---log:日志级别。 - ---soc_version:处理器型号。 - -执行上述步骤后,获得的输出为: - -``` -├── ssd_bs1.om -├── ssd_bs16.om -``` - -### 3.数据预处理。 - -将原始数据集转换为模型输入的二进制数据。 - -在进行数据预处理时,虽然coco2017的val2017验证集有5000张图片,但是实际上输出的只有4952张图片,因为在这过程中代码会剔除其中的48张图片。这一点请用户注意。 - -在数据预处理之前先要声明mlperf_logging包的调用问题: - -``` -PYTHONPATH=../../../../../SIAT/benchmarks/resnet/implementations/tensorflow_open_src:$PYTHONPATH -``` - -具体命令讲解: - -``` -python3.7 ssd_preprocess.py --data=${data_path}/coco --bin-output=./ssd_bin -``` - ---data:coco2017数据集的路径 - ---bin-output:经过预处理得到的bin文件路径 - -${data_path}:coco2017数据集的路径 - -执行上述步骤后,获得的输出为: - -``` -├── ssd_bin -│ ├── tensor([139]).bin -│ ├── ... -│ ├── tensor([581781]).bin -``` - -### 4.生成数据集info文件 - -使用benchmark推理需要输入二进制数据集的info文件,用于获取数据集。使用get_info.py脚本,输入已经得到的二进制文件,输出生成二进制数据集的info文件。 - -具体命令讲解: - -``` -python3.7 get_info.py bin ./ssd_bin ssd.info 300 300 -``` - -第一个参数为生成的数据集文件格式, - -第二个参数为预处理后的数据文件相对路径, - -第三个参数为生成的数据集文件名, - -第四个和第五个参数分别为模型输入的宽度和高度。 - -执行上述步骤后,获得的输出为: - -``` -├── ssd.info -``` - -### 5.使用Benchmark工具进行推理 - -Benchmark模型推理工具,其输入是om模型以及模型所需要的输入bin文件,其输出是模型根据相应输入产生的输出文件。 - -先后步骤顺序为: - -− 增加执行权限 - -``` -chmod u+x benchmark.x86_64 -``` - -− 由对batchsize=1的om模型进行benchmark推理: - -``` -./benchmark.x86_64 -model_type=vision -batch_size=1 -device_id=0 -input_text_path=./ssd.info -input_width=300 -input_height=300 -useDvpp=False -output_binary=true -om_path=./ssd_bs1.om -``` - -− 由对batchsize=16的om模型进行benchmark推理: - -``` -./benchmark.x86_64 -model_type=vision -batch_size=16 -device_id=1 -input_text_path=./ssd.info -input_width=300 -input_height=300 -useDvpp=False -output_binary=true -om_path=./ssd_bs16.om -``` - --model_type:为benchmark支持的模型类型,目前支持的有vision,nmt,widedeep,nlp,yolocaffe,bert,deepfm。ssd模型属于vision,所以选vision。 - --batch_size:om模型的batch大小,该值应与om模型的batch大小相同,否则报输入大小不一致的错误。 - --device_id:指运行在ascend 310的哪个device上,每张ascend 310卡有4个device。 - -input_text_path:包含数据集每个样本的路径与其相关信息的数据集信息文件路径。即之前生成的info文件路径。 - --input_width:输入宽度 - --input_height:输入高度 - --useDvpp:为是否使用aipp进行数据集预处理,我这里不用 - --output_binary:以预处理后的数据集为输入,benchmark工具推理om模型的输出数据保存为二进制还是txt。true为生成二进制bin文件,false为生成txt文件。 - --om_path:om模型文件路径。 - -执行./benchmark.x86_64工具请选择与运行环境架构相同的命令。参数详情请参见《 CANN 推理benchmark工具用户指南 》。 推理后的输出默认在当前目录result下。 - -batchsize=1的om模型进行benchmark推理得到的bin文件输出结果默认保存在当前目录result/dumpOutput_device0;性能数据默认保存在result/ perf_vision_batchsize_1_device_0.txt。 - -batchsize=16的om模型进行benchmark推理得到的bin文件输出结果默认保存在当前目录result/dumpOutput_device1;性能数据默认保存在result/ perf_vision_batchsize_16_device_1.txt。 - -该模型一个输入会对应两个输出,_1代表ploc的输出,_2代表plabel的输出。 - -执行以上命令后的输出: - -``` -├── result -│ ├── dumpOutput_device0 -│ │ ├── tensor([139])_1.bin -│ │ ├── tensor([139])_2.bin -│ │ ├── …… -│ ├── dumpOutput_device1 -│ │ ├── tensor([139])_1.bin -│ │ ├── tensor([139])_2.bin -│ │ ├── …… -│ ├── perf_vision_batchsize_1_device_0.txt -│ ├── perf_vision_batchsize_16_device_1.txt -``` - -### 6.数据后处理 - -进行数据后处理时,也是需要调用同数据预处理一样的mlperf_logging包。因为在前面进行数据预处理时已经声明过了,所以可以不需要再进行声明了。 - -调用ssd_postprocess.py评测模型的精度: - -batchsize=1的测试: - -``` -python ssd_postprocess.py --data=${data_path}/coco --bin-input=./result/dumpOutput_device0 -``` - -batchsize=16的测试: - -``` -python ssd_postprocess.py --data=${data_path}/coco --bin-input=./result/dumpOutput_device1 -``` - ---data:coco2017数据集的路径 - ---bin-input:数据预处理得到的bin文件。 - -${data_path}:coco2017数据集的路径 - -### 7.评测结果: - -| 模型 | 官网pth精度 | 310离线推理精度 | 性能基准 | 310性能 | -| ----------------- | ----------- | --------------- | ---------- | ---------- | -| SSD-Resnet34 bs1 | 23.000% | 23.030% | 482.627fps | 634.576fps | -| SSD-Resnet34 bs16 | 23.000% | 23.030% | 774.477fps | 863.748fps | - +# SSD-Resnet34模型PyTorch离线推理 + +## 一. 环境准备 + +### 1.通过requirements.txt 安装必要依赖 + +首先要先获取torch-1.5.0+ascend.post3.20210930-cp37-cp37m-linux_x86_64.whl,apex-0.1+ascend.20210930-cp37-cp37m-linux_x86_64.whl和tensor_fused_plugin-0.1+ascend-cp37-cp37m-linux_x86_64.whl这3个文件,获取方法如下: + +获取torch-1.5.0+ascend.post3.20210930-cp37-cp37m-linux_x86_64.whl + +x86架构: + +``` +wget https://ascend-pytorch-release.obs.cn-north-4.myhuaweicloud.com/run_pkg/20211018_FrameworkPTAdapter2.0.T308/torch-1.5.0%2Bascend.post3.20210930-cp37-cp37m-linux_x86_64.whl +``` + +ARM架构: + +``` +wget https://ascend-pytorch-release.obs.cn-north-4.myhuaweicloud.com/run_pkg/20211018_FrameworkPTAdapter2.0.T308/torch-1.5.0%2Bascend.post3.20210930-cp37-cp37m-linux_aarch64.whl +``` + +获取tensor_fused_plugin-0.1+ascend-cp37-cp37m-linux_x86_64.whl + +x86架构: + +``` +wget https://ascend-pytorch-release.obs.cn-north-4.myhuaweicloud.com/run_pkg/20210423_TR5/whl_0423/tensor_fused_plugin-0.1%2Bascend-cp37-cp37m-linux_x86_64.whl +``` + +ARM架构: + +``` +wget https://ascend-pytorch-release.obs.cn-north-4.myhuaweicloud.com/run_pkg/20211018_FrameworkPTAdapter2.0.T308/torch-1.5.0%2Bascend.post3.20210930-cp37-cp37m-linux_aarch64.whl +``` + +获取apex-0.1+ascend.20210930-cp37-cp37m-linux_x86_64.whl + +x86架构: + +``` +wget https://ascend-pytorch-release.obs.cn-north-4.myhuaweicloud.com/run_pkg/20211018_FrameworkPTAdapter2.0.T308/apex-0.1%2Bascend.20210930-cp37-cp37m-linux_x86_64.whl +``` + +ARM架构: + +``` +wget https://ascend-pytorch-release.obs.cn-north-4.myhuaweicloud.com/run_pkg/20211018_FrameworkPTAdapter2.0.T308/apex-0.1%2Bascend.20210930-cp37-cp37m-linux_aarch64.whl +``` + +在获得这3个.whl文件之后就使用命令直接运行: + +x86架构: + +``` +pip install torch-1.5.0+ascend.post3.20210930-cp37-cp37m-linux_x86_64.whl +pip install apex-0.1+ascend.20210930-cp37-cp37m-linux_x86_64.whl +pip install tensor_fused_plugin-0.1+ascend-cp37-cp37m-linux_x86_64.whl +``` + +ARM架构: + +``` +pip install torch-1.5.0+ascend.post3.20210930-cp37-cp37m-linux_aarch64.whl +pip install apex-0.1+ascend.20210930-cp37-cp37m-linux_aarch64.whl +pip install tensor_fused_plugin-0.1+ascend-cp37-cp37m-linux_aarch64.whl +``` + +在运行上面的这条命令时,确保torch,apex和tensor_fused_plugin这3个.whl文件和requirements.txt在同一个目录下。 + +之后运行如下指令: + +``` +pip install -r requirements.txt +``` + +在运行完这条命令后,如果error中出现te0.4.0和schedule-search0.0.1相关信息,不需要去看,因为运行这个代码不需要用到,与本代码无关。 + +``` +pip install -i https://pypi.tuna.tsinghua.edu.cn/simple opencv-python +apt update +apt install libgl1-mesa-glx +``` + +之后再运行如上3条命令,本代码所需环境即安装完毕。 + +### 2. 获取开源模型代码及开源权重 + +加载开源仓库: + +``` +git clone https://github.com/mlcommons/training_results_v0.7.git +``` + +进入开源代码仓,并打补丁,打补丁时确保补丁在开源代码仓路径的上一级: + +``` +cd training_results_v0.7/NVIDIA/benchmarks/ssd/implementations/pytorch/ +patch -p1 <../ssd.patch +``` + +下载训练后的SSD权重文件: + +``` +wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/detection/SSD-Resnet34/iter_183250.pt +``` + +下载基于搭建SSD模型的Resnet34模型的权重文件 + +``` +wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/detection/SSD-Resnet34/resnet34-333f7ec4.pth +``` + +对于pth权重文件,统一放在新建models文件夹下。 + +``` +├── models +│ ├── iter_183250.pt +│ ├── resnet34-333f7ec4.pth +``` + +### 3. 获取测试数据集 + +本模型支持coco2017的val2017验证数据集,里面有5000张图片。用户可自行获取coco2017数据集中的annotations和val2017,上传数据集到服务器任意目录并解压(如:/home/HwHiAiUser/dataset),本模型将使用到coco2017数据集中的验证集及其标签文件instances_val2017.json, bbox_only_instances_val2017.json,标签文件bbox_only_instances_val2017.json是将coco2017中的原标签文件instances_val2017.json经过处理所得。 + +获得coco数据集的命令如下: + +``` +wget https://ascend-pytorch-one-datasets.obs.cn-north-4.myhuaweicloud.com/infer/zip/coco_2017_ssd_infer.zip +``` + +在本代码中我统一使用了coco这个名字来命名数据: + +``` +mv coco_2017_ssd_infer coco +``` + +获得新json标签文件的命令如下: + +先给prepare-json.py增加权限,不然会出现权限不够的问题: + +``` +chmod -R 777 prepare-json.py +``` + +等增加完权限后再运行: + +``` +python3.7 prepare-json.py --keep-keys ${data_path}/coco/annotations/instances_val2017.json ${data_path}/coco/annotations/bbox_only_instances_val2017.json +``` + +第1部分${data_path}/coco/annotations/instances_val2017.json:这个是输入的json文件路径 + +第2部分${data_path}/coco/annotations/bbox_only_instances_val2017.json:这个是经过处理后输出的json文件路径。新的json文件命名一定要是bbox_only_instances_val2017.json,因为在代码中定义了运行json文件的名字。 + +${data_path}:代表数据集coco2017的路径 + +需要准备好的数据集部分: + +``` +├── coco +│ ├── val2017 +│ ├── annotations +│ ├──instances_val2017.json +│ ├──bbox_only_instances_val2017.json +``` + +## 二. 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 + +执行代码脚本请在本工程代码文件夹下运行。 + +执行如下脚本生成om模型 + +1-25行是pth2onnx + +29-43行是onnx2om + +``` +bash test/ssd_pth2om.sh +``` + +执行如下脚本进行数据预处理和后处理测试精度 + +``` +bash test/ssd_eval_acc_perf.sh --data_path=/home/yzc +``` + +--data_path:coco2017数据集的路径 + +1-16行是加载数据集路径部分 + +19行是解决mlperf_logging包的调用问题 + +23-30行是处理json文件部分 + +32-40行是数据预处理部分 + +42-48行是生成info文件 + +50-67行是使用benchmark进行离线推理的部分 + +70-84行是数据后处理评估精度部分 + +请用户在运行代码前,必须要先激活环境变量才能运行代码: + +``` +source env.sh +``` + +如果在运行代码的过程中,出现缺少.so库的问题,则需要再运行一遍上面输入的命令,再激活一次环境变量,即可解决问题。 + +另外,如果在运行过程中出现报出没有torchvision的错误,但实际已安装,请用户使用which python或者which python版本,查看python的路径是否在当前环境的路径下,请使用在当前环境路径下的相应python即可。 + +### 1.导出.onnx文件 + +− 使用iter_183250.pt导出onnx文件。 + +− 运行ssd_pth2onnx.sh可直接从pth转至om模型 + +运行ssd_pth2onnx.py脚本。 + +生成batchsize=1的onnx模型: + +``` +python3.7 ssd_pth2onnx.py --bs=1 --resnet34-model=./models/resnet34-333f7ec4.pth --pth-path=./models/iter_183250.pt --onnx-path=./ssd_bs1.onnx +``` + +生成batchsize=16的onnx模型: + +``` +python ssd_pth2onnx.py --bs=16 --resnet34-model=./models/resnet34-333f7ec4.pth --pth-path=./models/iter_183250.pt --onnx-path=./ssd_bs16.onnx +``` + +--bs:输入的batch_size大小 + +--resnet34-model:resnet34模型的pth权重文件路径 + +--pth-path:输入SSD模型的pth权重文件路径 + +--onnx-path:输出的onnx模型文件路径及onnx模型名字 + +执行上述步骤后,获得的输出有: + +``` +├── ssd_bs1.onnx +├── ssd_bs16.onnx +``` + +### 2.转om模型 + +设置环境变量 + +``` +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest +``` + +生成batchsize为1的om模型的命令如下。 + +``` +atc --framework=5 --model=./ssd_bs1.onnx --output=./ssd_bs1 --input_format=NCHW --input_shape="image:1,3,300,300" --log=error --soc_version=Ascend310 +``` + +生成batchsize为16的om模型的命令如下。 + +``` +atc --framework=5 --model=./ssd_bs16.onnx --output=./ssd_bs16 --input_format=NCHW --input_shape="image:16,3,300,300" --log=error --soc_version=Ascend310 +``` + +--framework:5代表ONNX模型。 + +--model:为ONNX模型文件输入的路径。 + +--output:输出的OM模型的路径。 + +--input_format:输入数据的格式。 + +--input_shape:输入数据的shape。 + +--log:日志级别。 + +--soc_version:处理器型号。 + +执行上述步骤后,获得的输出为: + +``` +├── ssd_bs1.om +├── ssd_bs16.om +``` + +### 3.数据预处理。 + +将原始数据集转换为模型输入的二进制数据。 + +在进行数据预处理时,虽然coco2017的val2017验证集有5000张图片,但是实际上输出的只有4952张图片,因为在这过程中代码会剔除其中的48张图片。这一点请用户注意。 + +在数据预处理之前先要声明mlperf_logging包的调用问题: + +``` +PYTHONPATH=../../../../../SIAT/benchmarks/resnet/implementations/tensorflow_open_src:$PYTHONPATH +``` + +具体命令讲解: + +``` +python3.7 ssd_preprocess.py --data=${data_path}/coco --bin-output=./ssd_bin +``` + +--data:coco2017数据集的路径 + +--bin-output:经过预处理得到的bin文件路径 + +${data_path}:coco2017数据集的路径 + +执行上述步骤后,获得的输出为: + +``` +├── ssd_bin +│ ├── tensor([139]).bin +│ ├── ... +│ ├── tensor([581781]).bin +``` + +### 4.生成数据集info文件 + +使用benchmark推理需要输入二进制数据集的info文件,用于获取数据集。使用get_info.py脚本,输入已经得到的二进制文件,输出生成二进制数据集的info文件。 + +具体命令讲解: + +``` +python3.7 get_info.py bin ./ssd_bin ssd.info 300 300 +``` + +第一个参数为生成的数据集文件格式, + +第二个参数为预处理后的数据文件相对路径, + +第三个参数为生成的数据集文件名, + +第四个和第五个参数分别为模型输入的宽度和高度。 + +执行上述步骤后,获得的输出为: + +``` +├── ssd.info +``` + +### 5.使用Benchmark工具进行推理 + +Benchmark模型推理工具,其输入是om模型以及模型所需要的输入bin文件,其输出是模型根据相应输入产生的输出文件。 + +先后步骤顺序为: + +− 增加执行权限 + +``` +chmod u+x benchmark.x86_64 +``` + +− 由对batchsize=1的om模型进行benchmark推理: + +``` +./benchmark.x86_64 -model_type=vision -batch_size=1 -device_id=0 -input_text_path=./ssd.info -input_width=300 -input_height=300 -useDvpp=False -output_binary=true -om_path=./ssd_bs1.om +``` + +− 由对batchsize=16的om模型进行benchmark推理: + +``` +./benchmark.x86_64 -model_type=vision -batch_size=16 -device_id=1 -input_text_path=./ssd.info -input_width=300 -input_height=300 -useDvpp=False -output_binary=true -om_path=./ssd_bs16.om +``` + +-model_type:为benchmark支持的模型类型,目前支持的有vision,nmt,widedeep,nlp,yolocaffe,bert,deepfm。ssd模型属于vision,所以选vision。 + +-batch_size:om模型的batch大小,该值应与om模型的batch大小相同,否则报输入大小不一致的错误。 + +-device_id:指运行在ascend 310的哪个device上,每张ascend 310卡有4个device。 + +input_text_path:包含数据集每个样本的路径与其相关信息的数据集信息文件路径。即之前生成的info文件路径。 + +-input_width:输入宽度 + +-input_height:输入高度 + +-useDvpp:为是否使用aipp进行数据集预处理,我这里不用 + +-output_binary:以预处理后的数据集为输入,benchmark工具推理om模型的输出数据保存为二进制还是txt。true为生成二进制bin文件,false为生成txt文件。 + +-om_path:om模型文件路径。 + +执行./benchmark.x86_64工具请选择与运行环境架构相同的命令。参数详情请参见《 CANN 推理benchmark工具用户指南 》。 推理后的输出默认在当前目录result下。 + +batchsize=1的om模型进行benchmark推理得到的bin文件输出结果默认保存在当前目录result/dumpOutput_device0;性能数据默认保存在result/ perf_vision_batchsize_1_device_0.txt。 + +batchsize=16的om模型进行benchmark推理得到的bin文件输出结果默认保存在当前目录result/dumpOutput_device1;性能数据默认保存在result/ perf_vision_batchsize_16_device_1.txt。 + +该模型一个输入会对应两个输出,_1代表ploc的输出,_2代表plabel的输出。 + +执行以上命令后的输出: + +``` +├── result +│ ├── dumpOutput_device0 +│ │ ├── tensor([139])_1.bin +│ │ ├── tensor([139])_2.bin +│ │ ├── …… +│ ├── dumpOutput_device1 +│ │ ├── tensor([139])_1.bin +│ │ ├── tensor([139])_2.bin +│ │ ├── …… +│ ├── perf_vision_batchsize_1_device_0.txt +│ ├── perf_vision_batchsize_16_device_1.txt +``` + +### 6.数据后处理 + +进行数据后处理时,也是需要调用同数据预处理一样的mlperf_logging包。因为在前面进行数据预处理时已经声明过了,所以可以不需要再进行声明了。 + +调用ssd_postprocess.py评测模型的精度: + +batchsize=1的测试: + +``` +python ssd_postprocess.py --data=${data_path}/coco --bin-input=./result/dumpOutput_device0 +``` + +batchsize=16的测试: + +``` +python ssd_postprocess.py --data=${data_path}/coco --bin-input=./result/dumpOutput_device1 +``` + +--data:coco2017数据集的路径 + +--bin-input:数据预处理得到的bin文件。 + +${data_path}:coco2017数据集的路径 + +### 7.评测结果: + +| 模型 | 官网pth精度 | 310离线推理精度 | 性能基准 | 310性能 | +| ----------------- | ----------- | --------------- | ---------- | ---------- | +| SSD-Resnet34 bs1 | 23.000% | 23.030% | 482.627fps | 634.576fps | +| SSD-Resnet34 bs16 | 23.000% | 23.030% | 774.477fps | 863.748fps | + diff --git a/ACL_PyTorch/contrib/cv/detection/SSD-Resnet34/ssd_pth2onnx.py b/ACL_PyTorch/contrib/cv/detection/SSD-Resnet34/ssd_pth2onnx.py index 7687bb7b0a3d062f7e27f361d90d6cfce810fa8d..9a88aff930e05f87714467c2dcfdc4b928690a94 100644 --- a/ACL_PyTorch/contrib/cv/detection/SSD-Resnet34/ssd_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/SSD-Resnet34/ssd_pth2onnx.py @@ -1,46 +1,46 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import os -from ssd300 import SSD300 -import random -from argparse import ArgumentParser -from parse_config import parse_args - -def pth2onnx(batch_size,input_file, output_file): - model_options = { - 'use_nhwc' : False, - 'pad_input' : False, - 'bn_group' : 1, - } - ssd300_eval = SSD300(args, 81, **model_options) - - state_dict = torch.load(input_file, map_location="cpu") - ssd300_eval.load_state_dict(state_dict['model']) - - ssd300_eval.eval() - input_names = ["image"] - output_names=["ploc","plabel"] - dynamic_axes = {'image': {0: '-1'}, 'ploc': {0: '-1'}, 'plabel': {0: '-1'}} - dummy_input = torch.randn(batch_size, 3, 300, 300) - torch.onnx.export(ssd300_eval, dummy_input, output_file, input_names=input_names, dynamic_axes=dynamic_axes, - output_names=output_names, opset_version=11, verbose=False) - -if __name__ == "__main__": - args = parse_args() - batch_size=args.bs - input_file = args.pth_path - output_file = args.onnx_path - resnet_model=args.resnet34_model - pth2onnx(batch_size,input_file, output_file) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import os +from ssd300 import SSD300 +import random +from argparse import ArgumentParser +from parse_config import parse_args + +def pth2onnx(batch_size,input_file, output_file): + model_options = { + 'use_nhwc' : False, + 'pad_input' : False, + 'bn_group' : 1, + } + ssd300_eval = SSD300(args, 81, **model_options) + + state_dict = torch.load(input_file, map_location="cpu") + ssd300_eval.load_state_dict(state_dict['model']) + + ssd300_eval.eval() + input_names = ["image"] + output_names=["ploc","plabel"] + dynamic_axes = {'image': {0: '-1'}, 'ploc': {0: '-1'}, 'plabel': {0: '-1'}} + dummy_input = torch.randn(batch_size, 3, 300, 300) + torch.onnx.export(ssd300_eval, dummy_input, output_file, input_names=input_names, dynamic_axes=dynamic_axes, + output_names=output_names, opset_version=11, verbose=False) + +if __name__ == "__main__": + args = parse_args() + batch_size=args.bs + input_file = args.pth_path + output_file = args.onnx_path + resnet_model=args.resnet34_model + pth2onnx(batch_size,input_file, output_file) diff --git a/ACL_PyTorch/contrib/cv/detection/YOLOF/LICENSE b/ACL_PyTorch/contrib/cv/detection/YOLOF/LICENSE index 0561f7dca719dc2718f1fdc146feda1dea36a9ef..cc87e8683f8accf92fb441738e981d6ab8ce7536 100644 --- a/ACL_PyTorch/contrib/cv/detection/YOLOF/LICENSE +++ b/ACL_PyTorch/contrib/cv/detection/YOLOF/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2021 Megvii, Base Detection - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2021 Megvii, Base Detection + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ACL_PyTorch/contrib/cv/detection/YOLOF/YOLOF_postprocess.py b/ACL_PyTorch/contrib/cv/detection/YOLOF/YOLOF_postprocess.py index 1260f6dabfef837c403ba50e01a5de4c76dc5b37..167582d4171acb9905af9e92cd94606d33508d8f 100644 --- a/ACL_PyTorch/contrib/cv/detection/YOLOF/YOLOF_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/YOLOF/YOLOF_postprocess.py @@ -1,90 +1,90 @@ -# Copyright 2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import argparse - -import torch -from cvpods.structures import Boxes, Instances -from cvpods.modeling.postprocessing import detector_postprocess -from cvpods.engine import RUNNERS -from cvpods.evaluation import build_evaluator - -import sys -import os - -sys.path.append("{0}/YOLOF/playground/detection/coco/yolof/yolof.cspdarknet53.DC5.9x/".format(sys.path[0])) -from config import config - -const_shape = (608, 608) -dataset_name = "coco_2017_val" - - -def runner_decrator(cls): - def custom_build_evaluator(cls, cfg, dataset_name, dataset): - return build_evaluator(cfg, dataset_name, dataset, None, dump=True) - - cls.build_evaluator = classmethod(custom_build_evaluator) - return cls - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--pth_path', default="YOLOF_CSP_D_53_DC5_9x.pth") - parser.add_argument('--bin_data_path', default="result/") - parser.add_argument('--meta_info_path', default="yolof_meta.info") - parser.add_argument('--num_classes', default=81, type=int) - - args = parser.parse_args() - - opts = ['MODEL.WEIGHTS', args.pth_path, "MODEL.DEVICE", "cpu"] - config.merge_from_list(opts) - - cls = runner_decrator(RUNNERS.get(config.TRAINER.NAME)) - evaluator = cls.build_evaluator(config, dataset_name, cls.build_test_loader(config).dataset) - evaluator.reset() - bin_data_path = args.bin_data_path + os.listdir(args.bin_data_path)[0] + "/" - - with open(args.meta_info_path, "r") as fp: - for line in fp: - values = line.split() - file_name = values[0] - batch_size = (len(values) - 1) // 3 - nmsed_boxes_batch = np.fromfile("{0}{1}_output_{2}.bin".format(bin_data_path, file_name, 0), - dtype=np.float32).reshape(batch_size, -1, 4) - nmsed_scores_batch = np.fromfile("{0}{1}_output_{2}.bin".format(bin_data_path, file_name, 1), - dtype=np.float32).reshape(batch_size, -1) - nmsed_classes_batch = np.fromfile("{0}{1}_output_{2}.bin".format(bin_data_path, file_name, 2), - dtype=np.int64).reshape(batch_size, -1) - last_image = "" - for i in range(batch_size): - img_name = values[i * 3 + 1] - if img_name == last_image: - break - last_image = img_name - last_img_name = img_name - height = int(values[i * 3 + 2]) - width = int(values[i * 3 + 3]) - nmsed_boxes = nmsed_boxes_batch[i] - nmsed_scores = nmsed_scores_batch[i] - nmsed_classes = nmsed_classes_batch[i] - result = Instances(const_shape) - result.pred_boxes = Boxes(torch.tensor(nmsed_boxes)) - result.scores = torch.tensor(nmsed_scores) - result.pred_classes = torch.tensor(nmsed_classes) - r = detector_postprocess(result, height, width) - r = {"instances": r} - _input = {"image_id": int(img_name)} - evaluator.process([_input], [r]) - print(evaluator.evaluate()) - print(evaluator._dump_infos[0]['summary']) +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import argparse + +import torch +from cvpods.structures import Boxes, Instances +from cvpods.modeling.postprocessing import detector_postprocess +from cvpods.engine import RUNNERS +from cvpods.evaluation import build_evaluator + +import sys +import os + +sys.path.append("{0}/YOLOF/playground/detection/coco/yolof/yolof.cspdarknet53.DC5.9x/".format(sys.path[0])) +from config import config + +const_shape = (608, 608) +dataset_name = "coco_2017_val" + + +def runner_decrator(cls): + def custom_build_evaluator(cls, cfg, dataset_name, dataset): + return build_evaluator(cfg, dataset_name, dataset, None, dump=True) + + cls.build_evaluator = classmethod(custom_build_evaluator) + return cls + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--pth_path', default="YOLOF_CSP_D_53_DC5_9x.pth") + parser.add_argument('--bin_data_path', default="result/") + parser.add_argument('--meta_info_path', default="yolof_meta.info") + parser.add_argument('--num_classes', default=81, type=int) + + args = parser.parse_args() + + opts = ['MODEL.WEIGHTS', args.pth_path, "MODEL.DEVICE", "cpu"] + config.merge_from_list(opts) + + cls = runner_decrator(RUNNERS.get(config.TRAINER.NAME)) + evaluator = cls.build_evaluator(config, dataset_name, cls.build_test_loader(config).dataset) + evaluator.reset() + bin_data_path = args.bin_data_path + os.listdir(args.bin_data_path)[0] + "/" + + with open(args.meta_info_path, "r") as fp: + for line in fp: + values = line.split() + file_name = values[0] + batch_size = (len(values) - 1) // 3 + nmsed_boxes_batch = np.fromfile("{0}{1}_output_{2}.bin".format(bin_data_path, file_name, 0), + dtype=np.float32).reshape(batch_size, -1, 4) + nmsed_scores_batch = np.fromfile("{0}{1}_output_{2}.bin".format(bin_data_path, file_name, 1), + dtype=np.float32).reshape(batch_size, -1) + nmsed_classes_batch = np.fromfile("{0}{1}_output_{2}.bin".format(bin_data_path, file_name, 2), + dtype=np.int64).reshape(batch_size, -1) + last_image = "" + for i in range(batch_size): + img_name = values[i * 3 + 1] + if img_name == last_image: + break + last_image = img_name + last_img_name = img_name + height = int(values[i * 3 + 2]) + width = int(values[i * 3 + 3]) + nmsed_boxes = nmsed_boxes_batch[i] + nmsed_scores = nmsed_scores_batch[i] + nmsed_classes = nmsed_classes_batch[i] + result = Instances(const_shape) + result.pred_boxes = Boxes(torch.tensor(nmsed_boxes)) + result.scores = torch.tensor(nmsed_scores) + result.pred_classes = torch.tensor(nmsed_classes) + r = detector_postprocess(result, height, width) + r = {"instances": r} + _input = {"image_id": int(img_name)} + evaluator.process([_input], [r]) + print(evaluator.evaluate()) + print(evaluator._dump_infos[0]['summary']) diff --git a/ACL_PyTorch/contrib/cv/detection/YOLOF/YOLOF_preprocess.py b/ACL_PyTorch/contrib/cv/detection/YOLOF/YOLOF_preprocess.py index 60344d400bb7528bfa8bd5ce0f80a8e8c18c1b73..4c30bf4dceafe9201147f007701434da84a7389b 100644 --- a/ACL_PyTorch/contrib/cv/detection/YOLOF/YOLOF_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/YOLOF/YOLOF_preprocess.py @@ -1,83 +1,83 @@ -# Copyright 2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import argparse -import numpy as np -from PIL import Image -import pickle as pk -import multiprocessing - -flags = None -width = 608 -height = 608 -pixel_mean = np.array([103.5300, 116.2800, 123.6750], dtype=np.float32) - - -def gen_input_bin(file_batches, batch): - i = 0 - image_bag = [] - image_meta_bag = [] - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - src = Image.open(os.path.join(flags.image_src_path, file)).convert("RGB") - ori_shape = (src.size[1], src.size[0]) - image = src.resize((height, width), 2) - image = np.asarray(image) - image = image[..., ::-1] - image = image - pixel_mean - image = image.transpose(2, 0, 1) - image_meta = {'ori_shape': ori_shape, 'file_name': file.split('.')[0]} - image_bag.append(image) - image_meta_bag.append(image_meta) - if len(image_bag) % flags.batch_size == 0: - np.array(image_bag).tofile(os.path.join(flags.bin_file_path, "{}_{}.bin".format(batch, i))) - with open(os.path.join(flags.meta_file_path, "{}_{}.pk".format(batch, i)), "wb") as fp: - pk.dump(image_meta_bag, fp) - image_bag = [] - image_meta_bag = [] - if image_bag: - ext_img_bag = [image_bag[-1] for i in range(flags.batch_size-len(image_bag))] - image_bag += ext_img_bag - ext_img_meta_bag = [image_meta_bag[-1] for i in range(flags.batch_size - len(image_meta_bag))] - image_meta_bag += ext_img_meta_bag - np.array(image_bag).tofile(os.path.join(flags.bin_file_path, "{}_{}.bin".format(batch, i))) - with open(os.path.join(flags.meta_file_path, "{}_{}.pk".format(batch, i)), "wb") as fp: - pk.dump(image_meta_bag, fp) - - -def preprocess(): - step = 100 - (100 % flags.batch_size) - files = os.listdir(flags.image_src_path) - file_batches = [files[i:i + step] for i in range(0, 5000, step) if files[i:i + step] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(file_batches, batch)) - thread_pool.close() - thread_pool.join() - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='preprocess of YOLOF PyTorch model') - parser.add_argument("--image_src_path", default="YOLOF/datasets/coco/val2017", help='image of dataset') - parser.add_argument("--bin_file_path", default="val2017_bin") - parser.add_argument("--meta_file_path", default="val2017_bin_meta") - parser.add_argument("--batch_size", default=1, type=int) - flags = parser.parse_args() - if not os.path.exists(flags.bin_file_path): - os.makedirs(flags.bin_file_path) - if not os.path.exists(flags.meta_file_path): - os.makedirs(flags.meta_file_path) - preprocess() +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import numpy as np +from PIL import Image +import pickle as pk +import multiprocessing + +flags = None +width = 608 +height = 608 +pixel_mean = np.array([103.5300, 116.2800, 123.6750], dtype=np.float32) + + +def gen_input_bin(file_batches, batch): + i = 0 + image_bag = [] + image_meta_bag = [] + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + src = Image.open(os.path.join(flags.image_src_path, file)).convert("RGB") + ori_shape = (src.size[1], src.size[0]) + image = src.resize((height, width), 2) + image = np.asarray(image) + image = image[..., ::-1] + image = image - pixel_mean + image = image.transpose(2, 0, 1) + image_meta = {'ori_shape': ori_shape, 'file_name': file.split('.')[0]} + image_bag.append(image) + image_meta_bag.append(image_meta) + if len(image_bag) % flags.batch_size == 0: + np.array(image_bag).tofile(os.path.join(flags.bin_file_path, "{}_{}.bin".format(batch, i))) + with open(os.path.join(flags.meta_file_path, "{}_{}.pk".format(batch, i)), "wb") as fp: + pk.dump(image_meta_bag, fp) + image_bag = [] + image_meta_bag = [] + if image_bag: + ext_img_bag = [image_bag[-1] for i in range(flags.batch_size-len(image_bag))] + image_bag += ext_img_bag + ext_img_meta_bag = [image_meta_bag[-1] for i in range(flags.batch_size - len(image_meta_bag))] + image_meta_bag += ext_img_meta_bag + np.array(image_bag).tofile(os.path.join(flags.bin_file_path, "{}_{}.bin".format(batch, i))) + with open(os.path.join(flags.meta_file_path, "{}_{}.pk".format(batch, i)), "wb") as fp: + pk.dump(image_meta_bag, fp) + + +def preprocess(): + step = 100 - (100 % flags.batch_size) + files = os.listdir(flags.image_src_path) + file_batches = [files[i:i + step] for i in range(0, 5000, step) if files[i:i + step] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(file_batches, batch)) + thread_pool.close() + thread_pool.join() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='preprocess of YOLOF PyTorch model') + parser.add_argument("--image_src_path", default="YOLOF/datasets/coco/val2017", help='image of dataset') + parser.add_argument("--bin_file_path", default="val2017_bin") + parser.add_argument("--meta_file_path", default="val2017_bin_meta") + parser.add_argument("--batch_size", default=1, type=int) + flags = parser.parse_args() + if not os.path.exists(flags.bin_file_path): + os.makedirs(flags.bin_file_path) + if not os.path.exists(flags.meta_file_path): + os.makedirs(flags.meta_file_path) + preprocess() diff --git a/ACL_PyTorch/contrib/cv/detection/YOLOF/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/detection/YOLOF/gen_dataset_info.py index de69f1132e110c24ada316149f2aa83f60f468be..970b1fd24071fa33561fe5d917ce91e4ba8511c8 100644 --- a/ACL_PyTorch/contrib/cv/detection/YOLOF/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/detection/YOLOF/gen_dataset_info.py @@ -1,36 +1,36 @@ -# Copyright 2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import argparse -import pickle as pk - -const_img_shape = (608, 608) - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='generate dataset info file') - parser.add_argument("--meta_file_path", default="val2017_bin_meta") - parser.add_argument("--meta_info_file_name", default="yolof_meta.info") - args = parser.parse_args() - - with open(args.meta_info_file_name, "w") as fp1: - file_list = os.listdir(args.meta_file_path) - for file in file_list: - with open("{}/{}".format(args.meta_file_path, file), "rb") as fp2: - meta = pk.load(fp2) - fp1.write(file.split(".")[0]) - for dic in meta: - fp1.write(" {} {} {}".format(dic['file_name'], *dic['ori_shape'])) - fp1.write("\n") - print("Get info done!") +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import pickle as pk + +const_img_shape = (608, 608) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='generate dataset info file') + parser.add_argument("--meta_file_path", default="val2017_bin_meta") + parser.add_argument("--meta_info_file_name", default="yolof_meta.info") + args = parser.parse_args() + + with open(args.meta_info_file_name, "w") as fp1: + file_list = os.listdir(args.meta_file_path) + for file in file_list: + with open("{}/{}".format(args.meta_file_path, file), "rb") as fp2: + meta = pk.load(fp2) + fp1.write(file.split(".")[0]) + for dic in meta: + fp1.write(" {} {} {}".format(dic['file_name'], *dic['ori_shape'])) + fp1.write("\n") + print("Get info done!") diff --git a/ACL_PyTorch/contrib/cv/detection/YOLOF/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/detection/YOLOF/modelzoo_level.txt index 7a41dd3e9f164a0414e50ad52a502c6d21b9b8ff..3901da7fbaa158ca7d805621545eabdadc18999f 100644 --- a/ACL_PyTorch/contrib/cv/detection/YOLOF/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/detection/YOLOF/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -ModelConvert:OK -PerfStatus=OK +FuncStatus:OK +PrecisionStatus:OK +ModelConvert:OK +PerfStatus=OK diff --git a/ACL_PyTorch/contrib/cv/detection/YOLOF/pytorch2onnx.py b/ACL_PyTorch/contrib/cv/detection/YOLOF/pytorch2onnx.py index 4f111b8adf40ddc26875d3101cc6edceb097d981..742e8cb370779406d5b347e2fc321e125f68c1f3 100644 --- a/ACL_PyTorch/contrib/cv/detection/YOLOF/pytorch2onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/YOLOF/pytorch2onnx.py @@ -1,78 +1,78 @@ -# Copyright 2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import argparse -import numpy as np -from cvpods.engine import default_setup -from cvpods.checkpoint import DefaultCheckpointer - -import sys - -sys.path.append("{0}/YOLOF/playground/detection/coco/yolof/yolof.cspdarknet53.DC5.9x/".format(sys.path[0])) -sys.path.append("{0}/YOLOF/playground/detection/coco/yolof/".format(sys.path[0])) -from net import build_model -from config import config - -device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - - -def convert_batchnorm(module, process_group=None): - module_output = module - if isinstance(module, torch.nn.SyncBatchNorm): - module_output = torch.nn.BatchNorm2d(num_features=module.num_features, - eps=module.eps, - momentum=module.momentum, - affine=module.affine, - track_running_stats=module.track_running_stats) - if module.affine: - with torch.no_grad(): - module_output.weight = module.weight - module_output.bias = module.bias - module_output.running_mean = module.running_mean - module_output.running_var = module.running_var - module_output.num_batches_tracked = module.num_batches_tracked - for name, child in module.named_children(): - module_output.add_module(name, convert_batchnorm(child, process_group)) - del module - return module_output - - -def pth2onnx(args, fake_input, opts): - config.merge_from_list(opts) - model = build_model(config) - model._batch_size = args.batch_size - model.forward = model.forward_onnx - model = convert_batchnorm(model) - model.eval() - DefaultCheckpointer(model, save_dir=config.OUTPUT_DIR).resume_or_load( - config.MODEL.WEIGHTS, resume=False - ) - torch.onnx.export(model, fake_input, args.out, verbose=True, opset_version=11, - input_names=['input'], enable_onnx_checker=False) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--model_config', help='model config path', - default="YOLOF/playground/detection/coco/yolof/yolof.cspdarknet53.DC5.9x") - parser.add_argument('--out', help='onnx output name', default="yolof.onnx") - parser.add_argument('--pth_path', help='model pth path', default="./YOLOF_CSP_D_53_DC5_9x.pth") - parser.add_argument('--batch_size', type=int, default=1) - args = parser.parse_args() - - img_shape = (args.batch_size, 3, 608, 608) - fake_input = torch.randn(*img_shape) - opts = ['MODEL.WEIGHTS', args.pth_path, "MODEL.DEVICE", "cpu"] - pth2onnx(args, fake_input, opts) +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import argparse +import numpy as np +from cvpods.engine import default_setup +from cvpods.checkpoint import DefaultCheckpointer + +import sys + +sys.path.append("{0}/YOLOF/playground/detection/coco/yolof/yolof.cspdarknet53.DC5.9x/".format(sys.path[0])) +sys.path.append("{0}/YOLOF/playground/detection/coco/yolof/".format(sys.path[0])) +from net import build_model +from config import config + +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + +def convert_batchnorm(module, process_group=None): + module_output = module + if isinstance(module, torch.nn.SyncBatchNorm): + module_output = torch.nn.BatchNorm2d(num_features=module.num_features, + eps=module.eps, + momentum=module.momentum, + affine=module.affine, + track_running_stats=module.track_running_stats) + if module.affine: + with torch.no_grad(): + module_output.weight = module.weight + module_output.bias = module.bias + module_output.running_mean = module.running_mean + module_output.running_var = module.running_var + module_output.num_batches_tracked = module.num_batches_tracked + for name, child in module.named_children(): + module_output.add_module(name, convert_batchnorm(child, process_group)) + del module + return module_output + + +def pth2onnx(args, fake_input, opts): + config.merge_from_list(opts) + model = build_model(config) + model._batch_size = args.batch_size + model.forward = model.forward_onnx + model = convert_batchnorm(model) + model.eval() + DefaultCheckpointer(model, save_dir=config.OUTPUT_DIR).resume_or_load( + config.MODEL.WEIGHTS, resume=False + ) + torch.onnx.export(model, fake_input, args.out, verbose=True, opset_version=11, + input_names=['input'], enable_onnx_checker=False) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--model_config', help='model config path', + default="YOLOF/playground/detection/coco/yolof/yolof.cspdarknet53.DC5.9x") + parser.add_argument('--out', help='onnx output name', default="yolof.onnx") + parser.add_argument('--pth_path', help='model pth path', default="./YOLOF_CSP_D_53_DC5_9x.pth") + parser.add_argument('--batch_size', type=int, default=1) + args = parser.parse_args() + + img_shape = (args.batch_size, 3, 608, 608) + fake_input = torch.randn(*img_shape) + opts = ['MODEL.WEIGHTS', args.pth_path, "MODEL.DEVICE", "cpu"] + pth2onnx(args, fake_input, opts) diff --git a/ACL_PyTorch/contrib/cv/detection/YOLOF/readme.md b/ACL_PyTorch/contrib/cv/detection/YOLOF/readme.md index 960872cef8254c43ebfd5b2a2906fa56c68c4d4f..1af4d238f4c179a442fb1eed55ec8eab38c4aa27 100644 --- a/ACL_PyTorch/contrib/cv/detection/YOLOF/readme.md +++ b/ACL_PyTorch/contrib/cv/detection/YOLOF/readme.md @@ -1,47 +1,47 @@ -### YOLOF模型PyTorch离线推理指导 - -### 1. 环境准备 - -1. 安装依赖 - -```bash -pip install -r requirements.txt -``` - -2. 获取,修改与安装开源模型代码 - -``` -git clone -b main https://github.com/megvii-model/YOLOF.git -cd YOLOF -git reset 6189487b80601dfeda89302c22abac060f977785 --hard - -patch -p1 < ../YOLOF.patch -python3 setup.py develop -cd .. -``` - -3. 将权重文件YOLOF_CSP_D_53_DC5_9x.pth放到当前工作目录 - -4. 数据集 - - 获取COCO数据集,并重命名为coco,放到当前目录下的 YOLOF/datasets/ 文件夹内 - -5. [获取msame工具](https://gitee.com/ascend/tools/tree/master/msame) - - 将msame文件放到当前工作目录 - -### 2. 离线推理 - -710上执行,执行时使npu-smi info查看设备状态,确保device空闲 - -```bash -bash test/pth2om.sh --batch_size=1 -bash test/eval_acc_perf.sh --batch_size=1 -``` - -**评测结果:** - -| 模型 | pth精度 | 710离线推理精度 | 710性能 | -| ---------- | ----------- | --------------- | ---------- | -| YOLOF bs1 | box AP:50.9 | box AP:51.0 | fps 27.697 | +### YOLOF模型PyTorch离线推理指导 + +### 1. 环境准备 + +1. 安装依赖 + +```bash +pip install -r requirements.txt +``` + +2. 获取,修改与安装开源模型代码 + +``` +git clone -b main https://github.com/megvii-model/YOLOF.git +cd YOLOF +git reset 6189487b80601dfeda89302c22abac060f977785 --hard + +patch -p1 < ../YOLOF.patch +python3 setup.py develop +cd .. +``` + +3. 将权重文件YOLOF_CSP_D_53_DC5_9x.pth放到当前工作目录 + +4. 数据集 + + 获取COCO数据集,并重命名为coco,放到当前目录下的 YOLOF/datasets/ 文件夹内 + +5. [获取msame工具](https://gitee.com/ascend/tools/tree/master/msame) + + 将msame文件放到当前工作目录 + +### 2. 离线推理 + +710上执行,执行时使npu-smi info查看设备状态,确保device空闲 + +```bash +bash test/pth2om.sh --batch_size=1 +bash test/eval_acc_perf.sh --batch_size=1 +``` + +**评测结果:** + +| 模型 | pth精度 | 710离线推理精度 | 710性能 | +| ---------- | ----------- | --------------- | ---------- | +| YOLOF bs1 | box AP:50.9 | box AP:51.0 | fps 27.697 | | YOLOF bs16 | box AP:50.9 | box AP:51.0 | fps 38.069 | \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/YOLOF/test/parse.py b/ACL_PyTorch/contrib/cv/detection/YOLOF/test/parse.py index 3fbf812ce48df315f7d4d9982c731333577c2a8a..4e51797a87ad12ff1d7dc6c400e6a3d5dee540eb 100644 --- a/ACL_PyTorch/contrib/cv/detection/YOLOF/test/parse.py +++ b/ACL_PyTorch/contrib/cv/detection/YOLOF/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7]) - print('710 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7]) + print('710 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) diff --git a/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/YOLOX_postprocess.py b/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/YOLOX_postprocess.py index 29003ed851832ef6eb707b4258022e994daef334..a1a777ec04a400358b59bd17926c4aec101ec741 100644 --- a/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/YOLOX_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/YOLOX_postprocess.py @@ -1,54 +1,54 @@ -# Copyright 2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import mmcv -import numpy as np -import argparse -from mmdet.core import bbox2result -from mmdet.datasets import build_dataset - -ann_file = '/annotations/instances_val2017.json' -img_prefix = '/val2017/' - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--dataset_path', default="/opt/npu/coco") - parser.add_argument('--model_config', default="mmdetection/configs/yolox/yolox_s_8x8_300e_coco.py") - parser.add_argument('--bin_data_path', default="result/dumpOutput_device0/") - parser.add_argument('--meta_info_path', default="yolox_meta.info") - parser.add_argument('--num_classes', default=81) - - args = parser.parse_args() - - cfg = mmcv.Config.fromfile(args.model_config) - cfg.data.test.test_mode = True - cfg.data.test.ann_file = args.dataset_path + ann_file - cfg.data.test.img_prefix = args.dataset_path + img_prefix - dataset = build_dataset(cfg.data.test) - - num_classes = int(args.num_classes) - outputs = [] - with open(args.meta_info_path, "r") as fp: - for line in fp: - _, file_path, scalar = line.split() - scalar = float(scalar) - file_name = file_path.split("/")[1].replace(".bin", "") - result_list = [ - np.fromfile("{0}{1}_{2}.bin".format(args.bin_data_path, file_name, 1), dtype=np.float32).reshape(-1, 5), - np.fromfile("{0}{1}_{2}.bin".format(args.bin_data_path, file_name, 2), dtype=np.int64)] - result_list[0][..., :4] /= scalar - bbox_result = bbox2result(result_list[0], result_list[1], num_classes) - outputs.append(bbox_result) - eval_kwargs = {'metric': ['bbox']} - dataset.evaluate(outputs, **eval_kwargs) +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mmcv +import numpy as np +import argparse +from mmdet.core import bbox2result +from mmdet.datasets import build_dataset + +ann_file = '/annotations/instances_val2017.json' +img_prefix = '/val2017/' + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--dataset_path', default="/opt/npu/coco") + parser.add_argument('--model_config', default="mmdetection/configs/yolox/yolox_s_8x8_300e_coco.py") + parser.add_argument('--bin_data_path', default="result/dumpOutput_device0/") + parser.add_argument('--meta_info_path', default="yolox_meta.info") + parser.add_argument('--num_classes', default=81) + + args = parser.parse_args() + + cfg = mmcv.Config.fromfile(args.model_config) + cfg.data.test.test_mode = True + cfg.data.test.ann_file = args.dataset_path + ann_file + cfg.data.test.img_prefix = args.dataset_path + img_prefix + dataset = build_dataset(cfg.data.test) + + num_classes = int(args.num_classes) + outputs = [] + with open(args.meta_info_path, "r") as fp: + for line in fp: + _, file_path, scalar = line.split() + scalar = float(scalar) + file_name = file_path.split("/")[1].replace(".bin", "") + result_list = [ + np.fromfile("{0}{1}_{2}.bin".format(args.bin_data_path, file_name, 1), dtype=np.float32).reshape(-1, 5), + np.fromfile("{0}{1}_{2}.bin".format(args.bin_data_path, file_name, 2), dtype=np.int64)] + result_list[0][..., :4] /= scalar + bbox_result = bbox2result(result_list[0], result_list[1], num_classes) + outputs.append(bbox_result) + eval_kwargs = {'metric': ['bbox']} + dataset.evaluate(outputs, **eval_kwargs) diff --git a/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/gen_dataset_info.py index dee00755558872b2746c53c9e96f24394dbbd417..877b0a026d48b4e4093af24351f8cb9a629d5ac1 100644 --- a/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/gen_dataset_info.py @@ -1,53 +1,53 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import mmcv -from mmdet.datasets import build_dataset -import pickle as pk - -ann_file = '/annotations/instances_val2017.json' -img_prefix = '/val2017/' - -if __name__ == '__main__': - image_src_path = sys.argv[1] - config_path = sys.argv[2] - bin_path = sys.argv[3] - meta_path = sys.argv[4] - info_name = sys.argv[5] - info_meta_name = sys.argv[6] - width = int(sys.argv[7]) - height = int(sys.argv[8]) - - cfg = mmcv.Config.fromfile(config_path) - cfg.data.test.ann_file = image_src_path + ann_file - cfg.data.test.img_prefix = image_src_path + img_prefix - cfg.data.test.test_mode = True - - dataset = build_dataset(cfg.data.test) - - with open(info_name, "w") as fp1, open(info_meta_name, "w") as fp2: - for idx in range(5000): - img_id = dataset.img_ids[idx] - fp1.write("{} {}/{:0>12d}.bin {} {}\n".format(idx, bin_path, img_id, width, height)) - fp_meta = open("%s/%012d.pk" % (meta_path, img_id), "rb") - meta = pk.load(fp_meta) - fp_meta.close() - fp2.write("{} {}/{:0>12d}.bin {}\n".format( - idx, - meta_path, - img_id, - meta['scalar'] - )) - print("Get info done!") +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import mmcv +from mmdet.datasets import build_dataset +import pickle as pk + +ann_file = '/annotations/instances_val2017.json' +img_prefix = '/val2017/' + +if __name__ == '__main__': + image_src_path = sys.argv[1] + config_path = sys.argv[2] + bin_path = sys.argv[3] + meta_path = sys.argv[4] + info_name = sys.argv[5] + info_meta_name = sys.argv[6] + width = int(sys.argv[7]) + height = int(sys.argv[8]) + + cfg = mmcv.Config.fromfile(config_path) + cfg.data.test.ann_file = image_src_path + ann_file + cfg.data.test.img_prefix = image_src_path + img_prefix + cfg.data.test.test_mode = True + + dataset = build_dataset(cfg.data.test) + + with open(info_name, "w") as fp1, open(info_meta_name, "w") as fp2: + for idx in range(5000): + img_id = dataset.img_ids[idx] + fp1.write("{} {}/{:0>12d}.bin {} {}\n".format(idx, bin_path, img_id, width, height)) + fp_meta = open("%s/%012d.pk" % (meta_path, img_id), "rb") + meta = pk.load(fp_meta) + fp_meta.close() + fp2.write("{} {}/{:0>12d}.bin {}\n".format( + idx, + meta_path, + img_id, + meta['scalar'] + )) + print("Get info done!") diff --git a/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/readme.md b/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/readme.md index 645cc073d17cc9c6290570fb32fdfdc09ecb5f4f..56601531ed18d4472b67c0277d446bac7aca531b 100644 --- a/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/readme.md +++ b/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/readme.md @@ -1,51 +1,51 @@ -### YOLOX模型PyTorch离线推理指导 - -### 1. 环境准备 - -1. 安装依赖 - -```bash -pip install -r requirements.txt -``` - -2. 获取,修改与安装开源模型代码 - -``` -git clone -b master https://github.com/open-mmlab/mmdetection.git -cd mmdetection -git reset 6b87ac22b8d9dea8cc28b9ce84909e6c311e6268 --hard - -pip install -v -e . # or python3 setup.py develop -pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.7.0/index.html -patch -p1 < ../YOLOX.patch -cd .. -``` - -3. 将权重文件[yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth)放到当前工作目录。 - -4. 数据集 - - 获取COCO数据集,并重命名为COCO,放到/root/datasets目录 - -5. [获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) - - 将benchmark.x86_64或benchmark.aarch64放到当前工作目录 - -### 2. 离线推理 - -710上执行,执行时使npu-smi info查看设备状态,确保device空闲 - -```bash -bash test/pth2om.sh --batch_size=1 -bash test/eval_acc_perf.sh --datasets_path=/root/datasets --batch_size=1 -``` - -**评测结果:** - -| 模型 | pth精度 | 710离线推理精度 | 性能基准 | 710性能 | -| ----------- | --------- | --------------- | --------- | ------- | -| YOLOX bs1 | box AP:50.9 | box AP:51.0 | fps 11.828 | fps 27.697 | -| YOLOX bs16 | box AP:50.9 | box AP:51.0 | fps 14.480 | fps 38.069 | - - - +### YOLOX模型PyTorch离线推理指导 + +### 1. 环境准备 + +1. 安装依赖 + +```bash +pip install -r requirements.txt +``` + +2. 获取,修改与安装开源模型代码 + +``` +git clone -b master https://github.com/open-mmlab/mmdetection.git +cd mmdetection +git reset 6b87ac22b8d9dea8cc28b9ce84909e6c311e6268 --hard + +pip install -v -e . # or python3 setup.py develop +pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.7.0/index.html +patch -p1 < ../YOLOX.patch +cd .. +``` + +3. 将权重文件[yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth](https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth)放到当前工作目录。 + +4. 数据集 + + 获取COCO数据集,并重命名为COCO,放到/root/datasets目录 + +5. [获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) + + 将benchmark.x86_64或benchmark.aarch64放到当前工作目录 + +### 2. 离线推理 + +710上执行,执行时使npu-smi info查看设备状态,确保device空闲 + +```bash +bash test/pth2om.sh --batch_size=1 +bash test/eval_acc_perf.sh --datasets_path=/root/datasets --batch_size=1 +``` + +**评测结果:** + +| 模型 | pth精度 | 710离线推理精度 | 性能基准 | 710性能 | +| ----------- | --------- | --------------- | --------- | ------- | +| YOLOX bs1 | box AP:50.9 | box AP:51.0 | fps 11.828 | fps 27.697 | +| YOLOX bs16 | box AP:50.9 | box AP:51.0 | fps 14.480 | fps 38.069 | + + + diff --git a/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/requirements.txt b/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/requirements.txt index 417d5e02a3b432ff1a034314f6975c4072f6f479..5b7b027fef93b38c539a8c1a0ddcb59fc6bfa307 100644 --- a/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/requirements.txt +++ b/ACL_PyTorch/contrib/cv/detection/YOLOX-mmdetection/requirements.txt @@ -1,7 +1,7 @@ -torch==1.7.0 -torchvision==0.8.0 -onnx -opencv-python -sympy -cython +torch==1.7.0 +torchvision==0.8.0 +onnx +opencv-python +sympy +cython numpy \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/YOLOX/Yolox_postprocess.py b/ACL_PyTorch/contrib/cv/detection/YOLOX/Yolox_postprocess.py index 1eef050881cb251e6fbb0c534a2c57d6cfe75231..9367f270d33bd801837c379d37ddc16b50c8332b 100644 --- a/ACL_PyTorch/contrib/cv/detection/YOLOX/Yolox_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/YOLOX/Yolox_postprocess.py @@ -1,139 +1,139 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -import argparse -from tqdm import tqdm -import torch -import numpy as np -from yolox.data import COCODataset, ValTransform -from yolox.evaluators import COCOEvaluator -from yolox.utils.boxes import postprocess - -from yolox.utils.demo_utils import demo_postprocess -sys.path.append('./YOLOX') - - -def get_output_data(dump_dir, idx, dtype=np.float32): - output_shape_1 = [1, 4, 80, 80] - output_shape_2 = [1, 1, 80, 80] - output_shape_3 = [1, 80, 80, 80] - output_shape_4 = [1, 4, 40, 40] - output_shape_5 = [1, 1, 40, 40] - output_shape_6 = [1, 80, 40, 40] - output_shape_7 = [1, 4, 20, 20] - output_shape_8 = [1, 1, 20, 20] - output_shape_9 = [1, 80, 20, 20] - - input_file_1 = os.path.join(dump_dir, "{:0>12d}_1.bin".format(idx)) - input_file_2 = os.path.join(dump_dir, "{:0>12d}_2.bin".format(idx)) - input_file_3 = os.path.join(dump_dir, "{:0>12d}_3.bin".format(idx)) - input_file_4 = os.path.join(dump_dir, "{:0>12d}_4.bin".format(idx)) - input_file_5 = os.path.join(dump_dir, "{:0>12d}_5.bin".format(idx)) - input_file_6 = os.path.join(dump_dir, "{:0>12d}_6.bin".format(idx)) - input_file_7 = os.path.join(dump_dir, "{:0>12d}_7.bin".format(idx)) - input_file_8 = os.path.join(dump_dir, "{:0>12d}_8.bin".format(idx)) - input_file_9 = os.path.join(dump_dir, "{:0>12d}_9.bin".format(idx)) - - input_data_1 = np.fromfile(input_file_1, dtype=dtype).reshape(output_shape_1) - input_data_2 = np.fromfile(input_file_2, dtype=dtype).reshape(output_shape_2) - input_data_3 = np.fromfile(input_file_3, dtype=dtype).reshape(output_shape_3) - input_data_4 = np.fromfile(input_file_4, dtype=dtype).reshape(output_shape_4) - input_data_5 = np.fromfile(input_file_5, dtype=dtype).reshape(output_shape_5) - input_data_6 = np.fromfile(input_file_6, dtype=dtype).reshape(output_shape_6) - input_data_7 = np.fromfile(input_file_7, dtype=dtype).reshape(output_shape_7) - input_data_8 = np.fromfile(input_file_8, dtype=dtype).reshape(output_shape_8) - input_data_9 = np.fromfile(input_file_9, dtype=dtype).reshape(output_shape_9) - - lst = [] - lst.append(torch.from_numpy(input_data_1)) - lst.append(torch.from_numpy(input_data_2)) - lst.append(torch.from_numpy(input_data_3)) - lst.append(torch.from_numpy(input_data_4)) - lst.append(torch.from_numpy(input_data_5)) - lst.append(torch.from_numpy(input_data_6)) - lst.append(torch.from_numpy(input_data_7)) - lst.append(torch.from_numpy(input_data_8)) - lst.append(torch.from_numpy(input_data_9)) - - return lst - - -def main(): - parser = argparse.ArgumentParser(description='YOLOX Postprocess') - parser.add_argument('--dataroot', dest='dataroot', - help='data root dirname', default='/opt/npu/coco', - type=str) - parser.add_argument('--dump_dir', dest='dump_dir', - help='dump dir for bin files', default='./result/dumpOutput_device0/', - type=str) - - parser.add_argument('--batch', dest='batch', help='batch for dataloader', default=1, type=int) - opt = parser.parse_args() - - if os.path.exists(opt.dump_dir): - os.system("rm-rf " + opt.dump_dir) - else: - os.system("mkdir " + opt.dump_dir) - - valdataset = COCODataset( - data_dir=opt.dataroot, - json_file='instances_val2017.json', - name="val2017", - img_size = (640, 640), - preproc=ValTransform(legacy=False), - ) - sampler = torch.utils.data.SequentialSampler(valdataset) - - dataloader_kwargs = {"num_workers": 8, "pin_memory": True, "sampler": sampler, "batch_size": opt.batch} - - val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) - - data_list = [] - coco_evaluator = COCOEvaluator(val_loader, img_size=(640, 640), confthre=0.001, nmsthre=0.65, num_classes=80) - - for cur_iter, (imgs, _, info_imgs, ids) in enumerate(tqdm(val_loader)): - - opt1, opt2, opt3, opt4, opt5, opt6, opt7, opt8, opt9 = get_output_data(opt.dump_dir, cur_iter) - opt2 = opt2.sigmoid() - opt3 = opt3.sigmoid() - - opt5 = opt5.sigmoid() - opt6 = opt6.sigmoid() - - opt8 = opt8.sigmoid() - opt9 = opt9.sigmoid() - output1 = torch.cat((opt1, opt2, opt3), dim=1) - output2 = torch.cat((opt4, opt5, opt6), dim=1) - output3 = torch.cat((opt7, opt8, opt9), dim=1) - - output1 = output1.view(1, 85, -1) - output2 = output2.view(1, 85, -1) - output3 = output3.view(1, 85, -1) - - outputs = torch.cat((output1, output2, output3), dim=2) - outputs = outputs.transpose(2, 1) - - - outputs = demo_postprocess(outputs, [640, 640]) - - outputs = postprocess(outputs, num_classes=80, conf_thre=0.001, nms_thre=0.65) - data_list.extend(coco_evaluator.convert_to_coco_format(outputs, info_imgs, ids)) - - results = coco_evaluator.evaluate_prediction(data_list) - print(results) - - -if __name__ == "__main__": - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import argparse +from tqdm import tqdm +import torch +import numpy as np +from yolox.data import COCODataset, ValTransform +from yolox.evaluators import COCOEvaluator +from yolox.utils.boxes import postprocess + +from yolox.utils.demo_utils import demo_postprocess +sys.path.append('./YOLOX') + + +def get_output_data(dump_dir, idx, dtype=np.float32): + output_shape_1 = [1, 4, 80, 80] + output_shape_2 = [1, 1, 80, 80] + output_shape_3 = [1, 80, 80, 80] + output_shape_4 = [1, 4, 40, 40] + output_shape_5 = [1, 1, 40, 40] + output_shape_6 = [1, 80, 40, 40] + output_shape_7 = [1, 4, 20, 20] + output_shape_8 = [1, 1, 20, 20] + output_shape_9 = [1, 80, 20, 20] + + input_file_1 = os.path.join(dump_dir, "{:0>12d}_1.bin".format(idx)) + input_file_2 = os.path.join(dump_dir, "{:0>12d}_2.bin".format(idx)) + input_file_3 = os.path.join(dump_dir, "{:0>12d}_3.bin".format(idx)) + input_file_4 = os.path.join(dump_dir, "{:0>12d}_4.bin".format(idx)) + input_file_5 = os.path.join(dump_dir, "{:0>12d}_5.bin".format(idx)) + input_file_6 = os.path.join(dump_dir, "{:0>12d}_6.bin".format(idx)) + input_file_7 = os.path.join(dump_dir, "{:0>12d}_7.bin".format(idx)) + input_file_8 = os.path.join(dump_dir, "{:0>12d}_8.bin".format(idx)) + input_file_9 = os.path.join(dump_dir, "{:0>12d}_9.bin".format(idx)) + + input_data_1 = np.fromfile(input_file_1, dtype=dtype).reshape(output_shape_1) + input_data_2 = np.fromfile(input_file_2, dtype=dtype).reshape(output_shape_2) + input_data_3 = np.fromfile(input_file_3, dtype=dtype).reshape(output_shape_3) + input_data_4 = np.fromfile(input_file_4, dtype=dtype).reshape(output_shape_4) + input_data_5 = np.fromfile(input_file_5, dtype=dtype).reshape(output_shape_5) + input_data_6 = np.fromfile(input_file_6, dtype=dtype).reshape(output_shape_6) + input_data_7 = np.fromfile(input_file_7, dtype=dtype).reshape(output_shape_7) + input_data_8 = np.fromfile(input_file_8, dtype=dtype).reshape(output_shape_8) + input_data_9 = np.fromfile(input_file_9, dtype=dtype).reshape(output_shape_9) + + lst = [] + lst.append(torch.from_numpy(input_data_1)) + lst.append(torch.from_numpy(input_data_2)) + lst.append(torch.from_numpy(input_data_3)) + lst.append(torch.from_numpy(input_data_4)) + lst.append(torch.from_numpy(input_data_5)) + lst.append(torch.from_numpy(input_data_6)) + lst.append(torch.from_numpy(input_data_7)) + lst.append(torch.from_numpy(input_data_8)) + lst.append(torch.from_numpy(input_data_9)) + + return lst + + +def main(): + parser = argparse.ArgumentParser(description='YOLOX Postprocess') + parser.add_argument('--dataroot', dest='dataroot', + help='data root dirname', default='/opt/npu/coco', + type=str) + parser.add_argument('--dump_dir', dest='dump_dir', + help='dump dir for bin files', default='./result/dumpOutput_device0/', + type=str) + + parser.add_argument('--batch', dest='batch', help='batch for dataloader', default=1, type=int) + opt = parser.parse_args() + + if os.path.exists(opt.dump_dir): + os.system("rm-rf " + opt.dump_dir) + else: + os.system("mkdir " + opt.dump_dir) + + valdataset = COCODataset( + data_dir=opt.dataroot, + json_file='instances_val2017.json', + name="val2017", + img_size = (640, 640), + preproc=ValTransform(legacy=False), + ) + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = {"num_workers": 8, "pin_memory": True, "sampler": sampler, "batch_size": opt.batch} + + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + data_list = [] + coco_evaluator = COCOEvaluator(val_loader, img_size=(640, 640), confthre=0.001, nmsthre=0.65, num_classes=80) + + for cur_iter, (imgs, _, info_imgs, ids) in enumerate(tqdm(val_loader)): + + opt1, opt2, opt3, opt4, opt5, opt6, opt7, opt8, opt9 = get_output_data(opt.dump_dir, cur_iter) + opt2 = opt2.sigmoid() + opt3 = opt3.sigmoid() + + opt5 = opt5.sigmoid() + opt6 = opt6.sigmoid() + + opt8 = opt8.sigmoid() + opt9 = opt9.sigmoid() + output1 = torch.cat((opt1, opt2, opt3), dim=1) + output2 = torch.cat((opt4, opt5, opt6), dim=1) + output3 = torch.cat((opt7, opt8, opt9), dim=1) + + output1 = output1.view(1, 85, -1) + output2 = output2.view(1, 85, -1) + output3 = output3.view(1, 85, -1) + + outputs = torch.cat((output1, output2, output3), dim=2) + outputs = outputs.transpose(2, 1) + + + outputs = demo_postprocess(outputs, [640, 640]) + + outputs = postprocess(outputs, num_classes=80, conf_thre=0.001, nms_thre=0.65) + data_list.extend(coco_evaluator.convert_to_coco_format(outputs, info_imgs, ids)) + + results = coco_evaluator.evaluate_prediction(data_list) + print(results) + + +if __name__ == "__main__": + main() diff --git a/ACL_PyTorch/contrib/cv/detection/YOLOX/readme.md b/ACL_PyTorch/contrib/cv/detection/YOLOX/readme.md index 0ae6b915620c123373d1dad89d5ce43eaec1f8bf..0043fc950f365ef96e8574ab4608238d3aef070b 100644 --- a/ACL_PyTorch/contrib/cv/detection/YOLOX/readme.md +++ b/ACL_PyTorch/contrib/cv/detection/YOLOX/readme.md @@ -1,50 +1,50 @@ -### YOLOX模型PyTorch离线推理指导 - -### 1. 环境准备 - -1. 安装依赖 - -```bash -pip3.7 install -r requirements.txt -``` - -2. 获取,修改与安装开源模型代码 - -``` -git clone git@github.com:Megvii-BaseDetection/YOLOX.git -main -cd YOLOX -git reset 6880e3999eb5cf83037e1818ee63d589384587bd --hard -pip3.7 install -v -e . # or python3 setup.py develop -pip3.7 install cython -pip3.7 install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' -patch -p1 < ../YOLOX-X.patch -cd .. -``` - -3. 将权重文件[yolox_x.pth](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_x.pth)放到当前工作目录。 - -4. 数据集 - - 获取COCO数据集,并重命名为COCO,放到/root/datasets目录 - -5. [获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) - - 将benchmark.x86_64或benchmark.aarch64放到当前工作目录 - -### 2. 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 - -```bash -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets -``` - -**评测结果:** - -| 模型 | pth精度 | 310离线推理精度 | 性能基准 | 310性能 | -| ----------- | --------- | --------------- | --------- | ------- | -| yolox-x bs1 | map:51.2% | map:51.1% | 60.739fps | 37.72144fps | - - - +### YOLOX模型PyTorch离线推理指导 + +### 1. 环境准备 + +1. 安装依赖 + +```bash +pip3.7 install -r requirements.txt +``` + +2. 获取,修改与安装开源模型代码 + +``` +git clone git@github.com:Megvii-BaseDetection/YOLOX.git -main +cd YOLOX +git reset 6880e3999eb5cf83037e1818ee63d589384587bd --hard +pip3.7 install -v -e . # or python3 setup.py develop +pip3.7 install cython +pip3.7 install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' +patch -p1 < ../YOLOX-X.patch +cd .. +``` + +3. 将权重文件[yolox_x.pth](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_x.pth)放到当前工作目录。 + +4. 数据集 + + 获取COCO数据集,并重命名为COCO,放到/root/datasets目录 + +5. [获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) + + 将benchmark.x86_64或benchmark.aarch64放到当前工作目录 + +### 2. 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 + +```bash +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets +``` + +**评测结果:** + +| 模型 | pth精度 | 310离线推理精度 | 性能基准 | 310性能 | +| ----------- | --------- | --------------- | --------- | ------- | +| yolox-x bs1 | map:51.2% | map:51.1% | 60.739fps | 37.72144fps | + + + diff --git a/ACL_PyTorch/contrib/cv/detection/pyramidbox/README.md b/ACL_PyTorch/contrib/cv/detection/pyramidbox/README.md index 4284fc05decc2f23c229eb6dcbe9ca5ff9753c58..6b826043421d14ad967cbb1567408866acb74bd4 100644 --- a/ACL_PyTorch/contrib/cv/detection/pyramidbox/README.md +++ b/ACL_PyTorch/contrib/cv/detection/pyramidbox/README.md @@ -1,292 +1,292 @@ -# Pyramidbox Onnx模型端到端推理指导 - -- 1 模型概述 - - [1.1 论文地址]([[1803.07737\] PyramidBox: A Context-assisted Single Shot Face Detector (arxiv.org)](https://arxiv.org/abs/1803.07737)) - - [1.2 代码地址](https://gitee.com/kghhkhkljl/pyramidbox.git) -- 2 环境说明 - - [2.1 深度学习框架](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#21-深度学习框架) - - [2.2 python第三方库](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#22-python第三方库) -- 3 模型转换 - - [3.1 pth转onnx模型](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#31-pth转onnx模型) - - [3.2 onnx转om模型](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#32-onnx转om模型) -- 4 数据集预处理 - - [4.1 数据集获取](https://www.graviti.cn/open-datasets/WIDER_FACE) - - [4.2 数据集预处理](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#42-数据集预处理) - - [4.3 生成数据集信息文件](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#43-生成数据集信息文件) -- 5 离线推理 - - [5.1 benchmark工具概述](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#51-benchmark工具概述) - - [5.2 离线推理](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#52-离线推理) -- 6 精度对比 - - [6.1 离线推理精度统计](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#61-离线推理精度统计) - - [6.2 开源精度](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#62-开源精度) - - [6.3 精度对比](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#63-精度对比) -- 7 性能对比 - - [7.1 npu性能数据](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#71-npu性能数据) - - [7.2 T4性能数据](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#72-T4性能数据) - - [7.3 性能对比](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#73-性能对比) - -## 1 模型概述 - -- **论文地址** -- **代码地址** - -### 1.1 论文地址 - -[Pyramidbox论文](https://arxiv.org/abs/1803.07737) - -### 1.2 代码地址 - -https://gitee.com/kghhkhkljl/pyramidbox.git - -## 2 环境说明 - -- **深度学习框架** -- **python第三方库** - -### 2.1 深度学习框架 - -``` -python3.7.5 -CANN 5.0.3 - -pytorch >= 1.5.0 -torchvision >= 0.10.0 -onnx >= 1.7.0 - -说明:若是在conda环境下,直接采用python,不用python3.7 -``` - -### 2.2 python第三方库 - -``` -torch == 1.9.0 -numpy == 1.20.3 -Pillow == 8.2.0 -opencv-python == 4.5.3.56 -scipy == 1.7.1 -easydict == 1.9 -six == 1.16.0 -pycocotools == 2.0.2 -``` - -## 3 模型转换 - -- **pth转onnx模型** -- **onnx转om模型** - -### 3.1 pth转onnx模型 - -1.拉取代码仓库 (因为使用了开源代码模块,所以需要git clone一下) - -```shell -git clone https://gitee.com/kghhkhkljl/pyramidbox.git -``` - -克隆下来源代码之后将pr中的代码放到克隆下来的pyramidbox下面 - -2.下载pth权重文件 -权重文件从百度网盘上获取:[pyramidbox_120000_99.02.pth_免费高速下载|百度网盘-分享无限制 (baidu.com)](https://pan.baidu.com/s/1VtzgB9srkJY4SUtVM3n8tw?_at_=1631960039538) - -下载下来的权重文件也需要放在pyramidbox目录下面 - -3.使用pth2onnx.py进行onnx的转换 - -``` -方法二:cd pyramidbox/test -bash pth2onnx.sh -方法二:cd pyramidbox -python3.7 pyramidbox_pth2onnx.py ./pyramidbox_1000.onnx ./pyramidbox_120000_99.02.pth -第一个参数是onnx文件生成在当前目录的名字,第二个参数是当前目录下的权重文件 -``` - -### 3.2 onnx转om模型 - -1.设置环境变量 - -``` -source atc.sh -``` - -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 - -``` -方法一:cd pyramidbox/test -bash onnxToom.sh -方法二:cd pyramidbox -atc --framework=5 --model=pyramidbox_1000.onnx --input_format=NCHW --input_shape="image:1,3,1000,1000" --output=pyramidbox_1000_bs1 --log=debug --soc_version=Ascend310 --precision_mode=force_fp32 - ---model是onnx的文件名,--input_shape是图片的shape,--output是输出on文件的文件名 -``` - -## 4 数据集预处理 - -- **数据集获取** -- **数据集预处理** -- **生成数据集信息文件** - -### 4.1 数据集获取 - -下载WIDER_FACE数据集: - -下载地址:https://www.graviti.cn/open-datasets/WIDER_FACE - -可以将数据集图片放在pyramidbox目录下的images下面,images目录需要自己创建(说明:images下面是个二级目录) - -``` -cd pyramidbox/images -``` - -### 4.2 数据集预处理 - -1.预处理脚本pyramidbox_pth_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 - -``` -方法一:cd pyramidbox/test -bash pre_deal.sh -方法二:cd pyramidbox -python3.7 pyramidbox_pth_preprocess.py ./images ./data1000_1 ./data1000_2 -第一个参数是预处理文件,第二个参数是数据集所在目录,第三和第四个参数是预处理后的文件名(说明:由于预处理需要进行两次图片的不同处理,所以生成的文件有两个) -``` - -### 4.3 生成数据集信息文件 - -1.生成数据集信息文件脚本get_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 - -``` -方法一:cd pyramidbox/test -bash to_info.sh -方法二:cd pyramidbox -python3.7 get_info.py bin ./data1000_1 ./pyramidbox_pre_bin_1000_1.info 1000 1000 -python3.7 get_info.py bin ./data1000_2 ./pyramidbox_pre_bin_1000_2.info 1000 1000 - -第一个是预处理后的数据集所在目录,第二个参数是生成的info文件名,后两个参数是图片的宽高。(说明:由于预处理会对图片进行两次处理,生成的文件有两个,所以会需要生成两个info文件) -``` - -## 5 离线推理 - -- **benchmark工具概述** -- **离线推理** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.3推理benchmark工具用户指南 - -### 5.2 离线推理 - -1.执行离线推理 - -执行前需要将benchmark.x86_64移动到执行目录下 - -(注:执行目录是/pyramidbox) - -然后运行如下命令: - -``` -方法一:cd pyramidbox/test -bash infer.sh -方法二:cd pyramidbox -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./pyramidbox_1000_bs1.om -input_text_path=./pyramidbox_pre_bin_1.info -input_width=1000 -input_height=1000 -output_binary=True -useDvpp=False --precision_mode=force_fp32 -./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=1 -om_path=./pyramidbox_1000_bs1.om -input_text_path=./pyramidbox_pre_bin_2.info -input_width=1000 -input_height=1000 -output_binary=True -useDvpp=False --precision_mode=force_fp32 - --om_path为om所在的路径,-input_text_path为预处理后的bin文件的整个info文件,-input_width为图片的宽,-input_height为图片的高。由于预处理后的数据集有两个,所以此脚本需要运行两次,第二次运行只需要改动-device_id=1和-input_text_path为相应的info文件即可(例如:pyramidbox_pre_bin_2.info)。 -``` - -输出结果默认保存在当前目录result/dumpOutput_device{0}以及result/dumpOutput_device{1}下,每个输入对应的输出对应2个_1.bin文件,我们只使用第一个。 - -2.处理目录result/dumpOutput_device{0}和result/dumpOutput_device{1}下的bin文件 - -将该目录下的文件分类别存放,以便于后处理 - -``` -方法一:cd pyramidbox/test -bash convert.sh -方法二:cd pyramidbox -python3.7 convert.py ./result/dumpOutput_device0/ ./result/result1 -python3.7 convert.py ./result/dumpOutput_device1/ ./result/result2 -第一个参数是infer.sh脚本生成的文件,第二个参数是生成的二级目录所在的文件夹。 -``` - - - -## 6 精度对比 - -- **离线推理精度** -- **开源精度** -- **精度对比** - -### 6.1 离线推理精度统计 - -1.后处理 - -``` -cd ./pyramidbox -python3.7 pyramidbox_pth_postprocess.py -``` - -2.进行Ascend310上精度评估 - -``` -cd ./pyramidbox/evaluate -python3.7 evaluation.py -``` - -### 6.2 开源精度 - -pyramidbox在线推理精度: - -``` -Easy Val AP: 0.958986327388428 -Medium Val AP: 0.9504929578311708 -Hard Val AP: 0.907248372271328 -``` - -### 6.3 精度对比 - -``` -Easy Val AP: 0.9628280209085509 -Medium Val AP: 0.9538134269337523 -Hard Val AP: 0.8798007442124222 -``` - -### 6.3 精度对比 - -由于源码没有固定住shape,所以精度会有损失,因此和同一分辨率下的在线推理进行对比。对比方式:三个尺度求和取平均。 - -## 7 性能对比 - -- **npu性能数据** -- **T4性能数据** -- **性能对比** - -### 7.1 npu性能数据 - -1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: - -``` -[e2e] throughputRate: 0.609815, latency: 5.29013e+06 -[data read] throughputRate: 0.635586, moduleLatency: 1573.35 -[preprocess] throughputRate: 0.61536, moduleLatency: 1625.07 -[infer] throughputRate: 0.6099, Interface throughputRate: 0.620281, moduleLatency: 1638.44 -[post] throughputRate: 0.6099, moduleLatency: 1639.61 -``` - -Interface throughputRate: 0.620281,0.620281x4=2.48既是batch1 310单卡吞吐率 - - - -说明:由于bs2以上会导致爆显存,所以测不了性能,此处只测了bs1。 - -![1633688929248](C:\Users\Eiven\AppData\Roaming\Typora\typora-user-images\1633688929248.png) - -### 7.2 T4性能数据 - -batch1 t4单卡吞吐率的计算方法是通过计算平均每张图片的耗时t,然后用1/t即是batch1 t4的单卡吞吐率。此处的t=1.560808,所以吞吐率为0.6407 - -### 7.3 性能对比 - +# Pyramidbox Onnx模型端到端推理指导 + +- 1 模型概述 + - [1.1 论文地址]([[1803.07737\] PyramidBox: A Context-assisted Single Shot Face Detector (arxiv.org)](https://arxiv.org/abs/1803.07737)) + - [1.2 代码地址](https://gitee.com/kghhkhkljl/pyramidbox.git) +- 2 环境说明 + - [2.1 深度学习框架](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#21-深度学习框架) + - [2.2 python第三方库](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#22-python第三方库) +- 3 模型转换 + - [3.1 pth转onnx模型](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#31-pth转onnx模型) + - [3.2 onnx转om模型](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#32-onnx转om模型) +- 4 数据集预处理 + - [4.1 数据集获取](https://www.graviti.cn/open-datasets/WIDER_FACE) + - [4.2 数据集预处理](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#42-数据集预处理) + - [4.3 生成数据集信息文件](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#43-生成数据集信息文件) +- 5 离线推理 + - [5.1 benchmark工具概述](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#51-benchmark工具概述) + - [5.2 离线推理](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#52-离线推理) +- 6 精度对比 + - [6.1 离线推理精度统计](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#61-离线推理精度统计) + - [6.2 开源精度](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#62-开源精度) + - [6.3 精度对比](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#63-精度对比) +- 7 性能对比 + - [7.1 npu性能数据](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#71-npu性能数据) + - [7.2 T4性能数据](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#72-T4性能数据) + - [7.3 性能对比](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#73-性能对比) + +## 1 模型概述 + +- **论文地址** +- **代码地址** + +### 1.1 论文地址 + +[Pyramidbox论文](https://arxiv.org/abs/1803.07737) + +### 1.2 代码地址 + +https://gitee.com/kghhkhkljl/pyramidbox.git + +## 2 环境说明 + +- **深度学习框架** +- **python第三方库** + +### 2.1 深度学习框架 + +``` +python3.7.5 +CANN 5.0.3 + +pytorch >= 1.5.0 +torchvision >= 0.10.0 +onnx >= 1.7.0 + +说明:若是在conda环境下,直接采用python,不用python3.7 +``` + +### 2.2 python第三方库 + +``` +torch == 1.9.0 +numpy == 1.20.3 +Pillow == 8.2.0 +opencv-python == 4.5.3.56 +scipy == 1.7.1 +easydict == 1.9 +six == 1.16.0 +pycocotools == 2.0.2 +``` + +## 3 模型转换 + +- **pth转onnx模型** +- **onnx转om模型** + +### 3.1 pth转onnx模型 + +1.拉取代码仓库 (因为使用了开源代码模块,所以需要git clone一下) + +```shell +git clone https://gitee.com/kghhkhkljl/pyramidbox.git +``` + +克隆下来源代码之后将pr中的代码放到克隆下来的pyramidbox下面 + +2.下载pth权重文件 +权重文件从百度网盘上获取:[pyramidbox_120000_99.02.pth_免费高速下载|百度网盘-分享无限制 (baidu.com)](https://pan.baidu.com/s/1VtzgB9srkJY4SUtVM3n8tw?_at_=1631960039538) + +下载下来的权重文件也需要放在pyramidbox目录下面 + +3.使用pth2onnx.py进行onnx的转换 + +``` +方法二:cd pyramidbox/test +bash pth2onnx.sh +方法二:cd pyramidbox +python3.7 pyramidbox_pth2onnx.py ./pyramidbox_1000.onnx ./pyramidbox_120000_99.02.pth +第一个参数是onnx文件生成在当前目录的名字,第二个参数是当前目录下的权重文件 +``` + +### 3.2 onnx转om模型 + +1.设置环境变量 + +``` +source atc.sh +``` + +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 + +``` +方法一:cd pyramidbox/test +bash onnxToom.sh +方法二:cd pyramidbox +atc --framework=5 --model=pyramidbox_1000.onnx --input_format=NCHW --input_shape="image:1,3,1000,1000" --output=pyramidbox_1000_bs1 --log=debug --soc_version=Ascend310 --precision_mode=force_fp32 + +--model是onnx的文件名,--input_shape是图片的shape,--output是输出on文件的文件名 +``` + +## 4 数据集预处理 + +- **数据集获取** +- **数据集预处理** +- **生成数据集信息文件** + +### 4.1 数据集获取 + +下载WIDER_FACE数据集: + +下载地址:https://www.graviti.cn/open-datasets/WIDER_FACE + +可以将数据集图片放在pyramidbox目录下的images下面,images目录需要自己创建(说明:images下面是个二级目录) + +``` +cd pyramidbox/images +``` + +### 4.2 数据集预处理 + +1.预处理脚本pyramidbox_pth_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 + +``` +方法一:cd pyramidbox/test +bash pre_deal.sh +方法二:cd pyramidbox +python3.7 pyramidbox_pth_preprocess.py ./images ./data1000_1 ./data1000_2 +第一个参数是预处理文件,第二个参数是数据集所在目录,第三和第四个参数是预处理后的文件名(说明:由于预处理需要进行两次图片的不同处理,所以生成的文件有两个) +``` + +### 4.3 生成数据集信息文件 + +1.生成数据集信息文件脚本get_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 + +``` +方法一:cd pyramidbox/test +bash to_info.sh +方法二:cd pyramidbox +python3.7 get_info.py bin ./data1000_1 ./pyramidbox_pre_bin_1000_1.info 1000 1000 +python3.7 get_info.py bin ./data1000_2 ./pyramidbox_pre_bin_1000_2.info 1000 1000 + +第一个是预处理后的数据集所在目录,第二个参数是生成的info文件名,后两个参数是图片的宽高。(说明:由于预处理会对图片进行两次处理,生成的文件有两个,所以会需要生成两个info文件) +``` + +## 5 离线推理 + +- **benchmark工具概述** +- **离线推理** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.3推理benchmark工具用户指南 + +### 5.2 离线推理 + +1.执行离线推理 + +执行前需要将benchmark.x86_64移动到执行目录下 + +(注:执行目录是/pyramidbox) + +然后运行如下命令: + +``` +方法一:cd pyramidbox/test +bash infer.sh +方法二:cd pyramidbox +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./pyramidbox_1000_bs1.om -input_text_path=./pyramidbox_pre_bin_1.info -input_width=1000 -input_height=1000 -output_binary=True -useDvpp=False --precision_mode=force_fp32 +./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=1 -om_path=./pyramidbox_1000_bs1.om -input_text_path=./pyramidbox_pre_bin_2.info -input_width=1000 -input_height=1000 -output_binary=True -useDvpp=False --precision_mode=force_fp32 + +-om_path为om所在的路径,-input_text_path为预处理后的bin文件的整个info文件,-input_width为图片的宽,-input_height为图片的高。由于预处理后的数据集有两个,所以此脚本需要运行两次,第二次运行只需要改动-device_id=1和-input_text_path为相应的info文件即可(例如:pyramidbox_pre_bin_2.info)。 +``` + +输出结果默认保存在当前目录result/dumpOutput_device{0}以及result/dumpOutput_device{1}下,每个输入对应的输出对应2个_1.bin文件,我们只使用第一个。 + +2.处理目录result/dumpOutput_device{0}和result/dumpOutput_device{1}下的bin文件 + +将该目录下的文件分类别存放,以便于后处理 + +``` +方法一:cd pyramidbox/test +bash convert.sh +方法二:cd pyramidbox +python3.7 convert.py ./result/dumpOutput_device0/ ./result/result1 +python3.7 convert.py ./result/dumpOutput_device1/ ./result/result2 +第一个参数是infer.sh脚本生成的文件,第二个参数是生成的二级目录所在的文件夹。 +``` + + + +## 6 精度对比 + +- **离线推理精度** +- **开源精度** +- **精度对比** + +### 6.1 离线推理精度统计 + +1.后处理 + +``` +cd ./pyramidbox +python3.7 pyramidbox_pth_postprocess.py +``` + +2.进行Ascend310上精度评估 + +``` +cd ./pyramidbox/evaluate +python3.7 evaluation.py +``` + +### 6.2 开源精度 + +pyramidbox在线推理精度: + +``` +Easy Val AP: 0.958986327388428 +Medium Val AP: 0.9504929578311708 +Hard Val AP: 0.907248372271328 +``` + +### 6.3 精度对比 + +``` +Easy Val AP: 0.9628280209085509 +Medium Val AP: 0.9538134269337523 +Hard Val AP: 0.8798007442124222 +``` + +### 6.3 精度对比 + +由于源码没有固定住shape,所以精度会有损失,因此和同一分辨率下的在线推理进行对比。对比方式:三个尺度求和取平均。 + +## 7 性能对比 + +- **npu性能数据** +- **T4性能数据** +- **性能对比** + +### 7.1 npu性能数据 + +1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: + +``` +[e2e] throughputRate: 0.609815, latency: 5.29013e+06 +[data read] throughputRate: 0.635586, moduleLatency: 1573.35 +[preprocess] throughputRate: 0.61536, moduleLatency: 1625.07 +[infer] throughputRate: 0.6099, Interface throughputRate: 0.620281, moduleLatency: 1638.44 +[post] throughputRate: 0.6099, moduleLatency: 1639.61 +``` + +Interface throughputRate: 0.620281,0.620281x4=2.48既是batch1 310单卡吞吐率 + + + +说明:由于bs2以上会导致爆显存,所以测不了性能,此处只测了bs1。 + +![1633688929248](C:\Users\Eiven\AppData\Roaming\Typora\typora-user-images\1633688929248.png) + +### 7.2 T4性能数据 + +batch1 t4单卡吞吐率的计算方法是通过计算平均每张图片的耗时t,然后用1/t即是batch1 t4的单卡吞吐率。此处的t=1.560808,所以吞吐率为0.6407 + +### 7.3 性能对比 + batch1:0.620281x4=2.48>0.6407 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/pyramidbox/convert.py b/ACL_PyTorch/contrib/cv/detection/pyramidbox/convert.py index bb6bdfd5c6708236f5c1e7bcb5c5b1eeab95a276..ef9043a138a97d380fc8fe96056b3dba6ee974bf 100644 --- a/ACL_PyTorch/contrib/cv/detection/pyramidbox/convert.py +++ b/ACL_PyTorch/contrib/cv/detection/pyramidbox/convert.py @@ -1,39 +1,39 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import shutil - -if __name__ == '__main__': - bin_path = sys.argv[1] - resule_path = sys.argv[2] - if not os.path.exists(resule_path): - os.mkdir(resule_path) - f = os.listdir(bin_path) - for data in f: - data = data.strip('\n') - dir_name = data.split('_')[0] + '--' + data.split('_')[1] - dir_path = os.path.join(resule_path, dir_name) - if not os.path.exists(dir_path): - os.mkdir(dir_path) - file_list = os.listdir(resule_path) - for dir in file_list: - dir = dir.strip('\n') - cur_path = os.path.join(resule_path, dir) - for data in f: - data = data.strip('\n') - if data.split('_')[0] == dir.split('--')[0]: - shutil.copy(os.path.join(bin_path, data), - os.path.join(cur_path, data)) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import shutil + +if __name__ == '__main__': + bin_path = sys.argv[1] + resule_path = sys.argv[2] + if not os.path.exists(resule_path): + os.mkdir(resule_path) + f = os.listdir(bin_path) + for data in f: + data = data.strip('\n') + dir_name = data.split('_')[0] + '--' + data.split('_')[1] + dir_path = os.path.join(resule_path, dir_name) + if not os.path.exists(dir_path): + os.mkdir(dir_path) + file_list = os.listdir(resule_path) + for dir in file_list: + dir = dir.strip('\n') + cur_path = os.path.join(resule_path, dir) + for data in f: + data = data.strip('\n') + if data.split('_')[0] == dir.split('--')[0]: + shutil.copy(os.path.join(bin_path, data), + os.path.join(cur_path, data)) diff --git a/ACL_PyTorch/contrib/cv/detection/pyramidbox/pyramidbox_pth2onnx.py b/ACL_PyTorch/contrib/cv/detection/pyramidbox/pyramidbox_pth2onnx.py index 7740e25cf5792fadbfe4bbdfe0dbc8eb341cbad7..d0faceedf538e336386b943c7afa497dc4faed67 100644 --- a/ACL_PyTorch/contrib/cv/detection/pyramidbox/pyramidbox_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/detection/pyramidbox/pyramidbox_pth2onnx.py @@ -1,35 +1,35 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from data.config import cfg -from pyramidbox import build_net -import numpy as np -import os -import sys - -def main(onnx_path,path): - input_names=["image"] - output_names = ["output"] - net = build_net('test',2) - net.eval() - net.load_state_dict(torch.load(path,map_location='cpu')) - # dynamic_axes = {'image': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1,3,1000,1000) - torch.onnx.export(net,dummy_input,onnx_path,input_names = input_names,output_names=output_names,verbose=True,enable_onnx_checker=False,opset_version=9) - -if __name__ =="__main__": - onnx_path = os.path.abspath(sys.argv[1]) - path = os.path.abspath(sys.argv[2]) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from data.config import cfg +from pyramidbox import build_net +import numpy as np +import os +import sys + +def main(onnx_path,path): + input_names=["image"] + output_names = ["output"] + net = build_net('test',2) + net.eval() + net.load_state_dict(torch.load(path,map_location='cpu')) + # dynamic_axes = {'image': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1,3,1000,1000) + torch.onnx.export(net,dummy_input,onnx_path,input_names = input_names,output_names=output_names,verbose=True,enable_onnx_checker=False,opset_version=9) + +if __name__ =="__main__": + onnx_path = os.path.abspath(sys.argv[1]) + path = os.path.abspath(sys.argv[2]) main(onnx_path,path) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/pyramidbox/pyramidbox_pth_postprocess.py b/ACL_PyTorch/contrib/cv/detection/pyramidbox/pyramidbox_pth_postprocess.py index c6377cec09f9d13b3908137025c972cd33fc547d..6fa423728e0c479d5deed1c4f4b23036af5b51b3 100644 --- a/ACL_PyTorch/contrib/cv/detection/pyramidbox/pyramidbox_pth_postprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/pyramidbox/pyramidbox_pth_postprocess.py @@ -1,227 +1,227 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division - -import sys -import os -import torch -import argparse -import numpy as np -import cv2 -import os.path as osp -import torch.backends.cudnn as cudnn - -from PIL import Image -import scipy.io as sio -from data.config import cfg -from torch.autograd import Variable -from utils.augmentations import to_chw_bgr -from layers.bbox_utils import decode, nms - -parser = argparse.ArgumentParser(description='pyramidbox evaluatuon wider') -parser.add_argument('--thresh', default=0.05, type=float, - help='Final confidence threshold') -args = parser.parse_args() - -use_cuda = torch.cuda.is_available() -if use_cuda: - torch.set_default_tensor_type('torch.cuda.FloatTensor') -else: - torch.set_default_tensor_type('torch.FloatTensor') - -List1 = [] -List2 = [] -root_path = os.getcwd() -def pre_postprocess(i,path): - listt=[] - global t - if i==0: - path = os.path.join(root_path,path) - else: - path = os.path.join(root_path,'result/result2') - File = os.listdir(path) - for file in sorted(File): - Doc = [] #save no-repeated file name - os.chdir(os.path.join(path, file)) - cur_path = os.getcwd() - doc = os.listdir(cur_path) - for document in sorted(doc): - Doc.append(document[0:-6]) #grip end - Doc = list(set(Doc)) #grip repeated element - for ff in sorted(Doc): #deal after sorting - txt_file = np.fromfile(f'{path}/{file}/{ff}_1.bin', dtype=np.float16) - output = torch.tensor(txt_file.reshape(-1,1000,5)) - listt.append(output) - return listt - -def detect_face(img, counter,i): - h, w = img.shape[0], img.shape[1] - min_side = 1280 - scale = max(w, h) / float(min_side) - if i==0: - detections = List1[counter].data - else: - detections = List2[counter].data - detections = detections.cpu().numpy() - det_conf = detections[0, :, 0] - det_xmin = 1280 * detections[0, :, 1] * scale #x1 - det_ymin = 1280 * detections[0, :, 2] * scale #y1 - det_xmax = 1280 * detections[0, :, 3] * scale #x2 - det_ymax = 1280 * detections[0, :, 4] * scale #y2 - det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf)) - keep_index = np.where(det[:, 4] >= args.thresh)[0] - det = det[keep_index, :] - return det - -def flip_test(image,counter,i): - image_f = cv2.flip(image, 1) - det_f = detect_face(image_f,counter,1) - - det_t = np.zeros(det_f.shape) - det_t[:, 0] = image.shape[1] - det_f[:, 2] - det_t[:, 1] = det_f[:, 1] - det_t[:, 2] = image.shape[1] - det_f[:, 0] - det_t[:, 3] = det_f[:, 3] - det_t[:, 4] = det_f[:, 4] - return det_t - -def multi_scale_test(image, max_im_shrink,counter,i): - # shrink detecting and shrink only detect big face - st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink - det_s = detect_face(image,counter,i) - index = np.where(np.maximum( - det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0] - det_s = det_s[index, :] - - # enlarge one times - bt = min(2, max_im_shrink) if max_im_shrink > 1 else ( - st + max_im_shrink) / 2 - det_b = detect_face(image,counter,i) - - - # enlarge only detect small face - if bt > 1: - index = np.where(np.minimum( - det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0] - det_b = det_b[index, :] - else: - index = np.where(np.maximum( - det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0] - det_b = det_b[index, :] - - return det_s, det_b - -def bbox_vote(det): - order = det[:, 4].ravel().argsort()[::-1] - det = det[order, :] - while det.shape[0] > 0: - # IOU - area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) - xx1 = np.maximum(det[0, 0], det[:, 0]) - yy1 = np.maximum(det[0, 1], det[:, 1]) - xx2 = np.minimum(det[0, 2], det[:, 2]) - yy2 = np.minimum(det[0, 3], det[:, 3]) - w = np.maximum(0.0, xx2 - xx1 + 1) - h = np.maximum(0.0, yy2 - yy1 + 1) - inter = w * h - o = inter / (area[0] + area[:] - inter) - # get needed merge det and delete these det - merge_index = np.where(o >= 0.3)[0] - det_accu = det[merge_index, :] - det = np.delete(det, merge_index, 0) - - if merge_index.shape[0] <= 1: - continue - det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) - max_score = np.max(det_accu[:, 4]) - det_accu_sum = np.zeros((1, 5)) - det_accu_sum[:, 0:4] = np.sum( - det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) - det_accu_sum[:, 4] = max_score - - try: - dets = np.row_stack((dets, det_accu_sum)) - except: - dets = det_accu_sum - dets = dets[0:750, :] - return dets - -if __name__ == '__main__': - #mat_path = os.path.abspath(sys.argv[1]) #mat path './evaluate/ground_truth/wider_face_val.mat' - #img_path = os.path.abspath(sys.argv[2]) #image path './images' - #save_path = os.path.abspath(sys.argv[3]) #save path './output_0.01/widerface/' - #path1 = os.path.abspath(sys.argv[4]) #first data ---> result './result1/result1' - #path2 = os.path.abspath(sys.argv[5]) #second data ---> result './result2/result2' - wider_face = sio.loadmat('./evaluate/ground_truth/wider_face_val.mat') - event_list = wider_face['event_list'] - file_list = wider_face['file_list'] - del wider_face - imgs_path = root_path+'/images' - save_path = root_path+'/output_1000' - counter = 0 - if use_cuda: - cudnn.benckmark = True - path1 = './result/result1' - path2 = './result/result2/result2' - List1 = pre_postprocess(0,path1) - List2 = pre_postprocess(1,path2) - print(List1) - print(len(List1)) - print('-----------------------------------------------') - print(len(List2)) - print(List2) - i=0 - for index, event in enumerate(sorted(event_list)): - filelist = file_list[index][0] - path = os.path.join(save_path, str(event[0][0])) - if not os.path.exists(path): - os.makedirs(path) - i = i+1 - for num, file in enumerate(sorted(filelist)): - im_name = file[0][0] - print(im_name) - in_file = os.path.join(imgs_path, event[0][0], str(im_name[:]) + '.jpg') - img = Image.open(in_file) - if img.mode == 'L': - img = img.convert('RGB') - img = np.array(img) - max_im_shrink = np.sqrt( - 1700 * 1000 / (img.shape[0] * img.shape[1])) - shrink = max_im_shrink if max_im_shrink < 1 else 1 - counter += 1 - det0 = detect_face(img,counter-1,0) - - det1 = flip_test(img,counter-1,1) # flip test - [det2, det3] = multi_scale_test( img, max_im_shrink,counter-1,0) - - det = np.row_stack((det0, det1, det2, det3)) - if det.shape[0] ==1: - dets =det - else: - dets = bbox_vote(det) - - fout = open(osp.join(save_path, str(event[0][ - 0]), im_name + '.txt'), 'w') - fout.write('{:s}\n'.format(str(event[0][0]) + '/' + im_name + '.jpg')) - fout.write('{:d}\n'.format(dets.shape[0])) - for i in range(dets.shape[0]): - xmin = dets[i][0] - ymin = dets[i][1] - xmax = dets[i][2] - ymax = dets[i][3] - score = dets[i][4] - fout.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division + +import sys +import os +import torch +import argparse +import numpy as np +import cv2 +import os.path as osp +import torch.backends.cudnn as cudnn + +from PIL import Image +import scipy.io as sio +from data.config import cfg +from torch.autograd import Variable +from utils.augmentations import to_chw_bgr +from layers.bbox_utils import decode, nms + +parser = argparse.ArgumentParser(description='pyramidbox evaluatuon wider') +parser.add_argument('--thresh', default=0.05, type=float, + help='Final confidence threshold') +args = parser.parse_args() + +use_cuda = torch.cuda.is_available() +if use_cuda: + torch.set_default_tensor_type('torch.cuda.FloatTensor') +else: + torch.set_default_tensor_type('torch.FloatTensor') + +List1 = [] +List2 = [] +root_path = os.getcwd() +def pre_postprocess(i,path): + listt=[] + global t + if i==0: + path = os.path.join(root_path,path) + else: + path = os.path.join(root_path,'result/result2') + File = os.listdir(path) + for file in sorted(File): + Doc = [] #save no-repeated file name + os.chdir(os.path.join(path, file)) + cur_path = os.getcwd() + doc = os.listdir(cur_path) + for document in sorted(doc): + Doc.append(document[0:-6]) #grip end + Doc = list(set(Doc)) #grip repeated element + for ff in sorted(Doc): #deal after sorting + txt_file = np.fromfile(f'{path}/{file}/{ff}_1.bin', dtype=np.float16) + output = torch.tensor(txt_file.reshape(-1,1000,5)) + listt.append(output) + return listt + +def detect_face(img, counter,i): + h, w = img.shape[0], img.shape[1] + min_side = 1280 + scale = max(w, h) / float(min_side) + if i==0: + detections = List1[counter].data + else: + detections = List2[counter].data + detections = detections.cpu().numpy() + det_conf = detections[0, :, 0] + det_xmin = 1280 * detections[0, :, 1] * scale #x1 + det_ymin = 1280 * detections[0, :, 2] * scale #y1 + det_xmax = 1280 * detections[0, :, 3] * scale #x2 + det_ymax = 1280 * detections[0, :, 4] * scale #y2 + det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf)) + keep_index = np.where(det[:, 4] >= args.thresh)[0] + det = det[keep_index, :] + return det + +def flip_test(image,counter,i): + image_f = cv2.flip(image, 1) + det_f = detect_face(image_f,counter,1) + + det_t = np.zeros(det_f.shape) + det_t[:, 0] = image.shape[1] - det_f[:, 2] + det_t[:, 1] = det_f[:, 1] + det_t[:, 2] = image.shape[1] - det_f[:, 0] + det_t[:, 3] = det_f[:, 3] + det_t[:, 4] = det_f[:, 4] + return det_t + +def multi_scale_test(image, max_im_shrink,counter,i): + # shrink detecting and shrink only detect big face + st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink + det_s = detect_face(image,counter,i) + index = np.where(np.maximum( + det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0] + det_s = det_s[index, :] + + # enlarge one times + bt = min(2, max_im_shrink) if max_im_shrink > 1 else ( + st + max_im_shrink) / 2 + det_b = detect_face(image,counter,i) + + + # enlarge only detect small face + if bt > 1: + index = np.where(np.minimum( + det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0] + det_b = det_b[index, :] + else: + index = np.where(np.maximum( + det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0] + det_b = det_b[index, :] + + return det_s, det_b + +def bbox_vote(det): + order = det[:, 4].ravel().argsort()[::-1] + det = det[order, :] + while det.shape[0] > 0: + # IOU + area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) + xx1 = np.maximum(det[0, 0], det[:, 0]) + yy1 = np.maximum(det[0, 1], det[:, 1]) + xx2 = np.minimum(det[0, 2], det[:, 2]) + yy2 = np.minimum(det[0, 3], det[:, 3]) + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + o = inter / (area[0] + area[:] - inter) + # get needed merge det and delete these det + merge_index = np.where(o >= 0.3)[0] + det_accu = det[merge_index, :] + det = np.delete(det, merge_index, 0) + + if merge_index.shape[0] <= 1: + continue + det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) + max_score = np.max(det_accu[:, 4]) + det_accu_sum = np.zeros((1, 5)) + det_accu_sum[:, 0:4] = np.sum( + det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) + det_accu_sum[:, 4] = max_score + + try: + dets = np.row_stack((dets, det_accu_sum)) + except: + dets = det_accu_sum + dets = dets[0:750, :] + return dets + +if __name__ == '__main__': + #mat_path = os.path.abspath(sys.argv[1]) #mat path './evaluate/ground_truth/wider_face_val.mat' + #img_path = os.path.abspath(sys.argv[2]) #image path './images' + #save_path = os.path.abspath(sys.argv[3]) #save path './output_0.01/widerface/' + #path1 = os.path.abspath(sys.argv[4]) #first data ---> result './result1/result1' + #path2 = os.path.abspath(sys.argv[5]) #second data ---> result './result2/result2' + wider_face = sio.loadmat('./evaluate/ground_truth/wider_face_val.mat') + event_list = wider_face['event_list'] + file_list = wider_face['file_list'] + del wider_face + imgs_path = root_path+'/images' + save_path = root_path+'/output_1000' + counter = 0 + if use_cuda: + cudnn.benckmark = True + path1 = './result/result1' + path2 = './result/result2/result2' + List1 = pre_postprocess(0,path1) + List2 = pre_postprocess(1,path2) + print(List1) + print(len(List1)) + print('-----------------------------------------------') + print(len(List2)) + print(List2) + i=0 + for index, event in enumerate(sorted(event_list)): + filelist = file_list[index][0] + path = os.path.join(save_path, str(event[0][0])) + if not os.path.exists(path): + os.makedirs(path) + i = i+1 + for num, file in enumerate(sorted(filelist)): + im_name = file[0][0] + print(im_name) + in_file = os.path.join(imgs_path, event[0][0], str(im_name[:]) + '.jpg') + img = Image.open(in_file) + if img.mode == 'L': + img = img.convert('RGB') + img = np.array(img) + max_im_shrink = np.sqrt( + 1700 * 1000 / (img.shape[0] * img.shape[1])) + shrink = max_im_shrink if max_im_shrink < 1 else 1 + counter += 1 + det0 = detect_face(img,counter-1,0) + + det1 = flip_test(img,counter-1,1) # flip test + [det2, det3] = multi_scale_test( img, max_im_shrink,counter-1,0) + + det = np.row_stack((det0, det1, det2, det3)) + if det.shape[0] ==1: + dets =det + else: + dets = bbox_vote(det) + + fout = open(osp.join(save_path, str(event[0][ + 0]), im_name + '.txt'), 'w') + fout.write('{:s}\n'.format(str(event[0][0]) + '/' + im_name + '.jpg')) + fout.write('{:d}\n'.format(dets.shape[0])) + for i in range(dets.shape[0]): + xmin = dets[i][0] + ymin = dets[i][1] + xmax = dets[i][2] + ymax = dets[i][3] + score = dets[i][4] + fout.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'. format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/detection/pyramidbox/pyramidbox_pth_preprocess.py b/ACL_PyTorch/contrib/cv/detection/pyramidbox/pyramidbox_pth_preprocess.py index e844307d8d497f6498049849e46e7657b519097b..d70c40747f5d508e97a30b999b521dde46efdf5e 100644 --- a/ACL_PyTorch/contrib/cv/detection/pyramidbox/pyramidbox_pth_preprocess.py +++ b/ACL_PyTorch/contrib/cv/detection/pyramidbox/pyramidbox_pth_preprocess.py @@ -1,109 +1,109 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division - -import sys -import os -import torch -import numpy as np -import cv2 - -from PIL import Image -from data.config import cfg -from torch.autograd import Variable -from utils.augmentations import to_chw_bgr - -use_cuda = torch.cuda.is_available() - -if use_cuda: - torch.set_default_tensor_type('torch.cuda.FloatTensor') -else: - torch.set_default_tensor_type('torch.FloatTensor') - -def process_image(img, min_side): - h, w = img.shape[0], img.shape[1] - #长边缩放为min_side - scale = max(w, h) / float(min_side) - new_w, new_h = int(w/scale), int(h/scale) - resize_img = cv2.resize(img, (new_w, new_h)) - # 填充至min_side * min_side - bottom = min_side-new_h - right = min_side-new_w - img = cv2.copyMakeBorder(resize_img, 0, int(bottom), 0, int(right), cv2.BORDER_CONSTANT, value=[0,0,0]) #从图像边界向上,下,左,右扩的像素数目 - return img - -def preprocess(file_path, bin_path): - in_files = os.listdir(file_path) - if not os.path.exists(bin_path): - os.makedirs(bin_path) - for file in sorted(in_files): - os.chdir(os.path.join(file_path, file)) - cur_path = os.getcwd() - doc = os.listdir(cur_path) - for document in sorted(doc): - in_file = os.path.join(cur_path, document) - img = Image.open(in_file) - if img.mode == 'L': - img = img.convert('RGB') - img = np.array(img) - img = process_image(img,1000) #对图片进行放缩加padding - x = to_chw_bgr(img) - - x = x.astype('float32') - x -= cfg.img_mean - x = x[[2, 1, 0], :, :] - x = Variable(torch.from_numpy(x).unsqueeze(0)) - if use_cuda: - x = x.cuda() - if not os.path.exists(os.path.join(bin_path,file)): - os.makedirs(os.path.join(bin_path,file)) - des_path = os.path.join(bin_path,file) - x.numpy().tofile(os.path.join(des_path,document.split('.')[0] +'.bin')) - -def preprocess1(file_path, bin_path): - in_files = os.listdir(file_path) - if not os.path.exists(bin_path): - os.makedirs(bin_path) - for file in sorted(in_files): - os.chdir(os.path.join(file_path, file)) - cur_path = os.getcwd() - doc = os.listdir(cur_path) - for document in sorted(doc): - in_file = os.path.join(cur_path, document) - img = Image.open(in_file) - if img.mode == 'L': - img = img.convert('RGB') - img = np.array(img) - img = cv2.flip(img, 1) - img = process_image(img,1000) - x = to_chw_bgr(img) - x = x.astype('float32') - x -= cfg.img_mean - x = x[[2, 1, 0], :, :] - x = Variable(torch.from_numpy(x).unsqueeze(0)) - if use_cuda: - x = x.cuda() - if not os.path.exists(os.path.join(bin_path,file)): - os.makedirs(os.path.join(bin_path,file)) - des_path = os.path.join(bin_path,file) - x.numpy().tofile(os.path.join(des_path,document.split('.')[0] +'.bin')) - -if __name__ == "__main__": - file_path = os.path.abspath(sys.argv[1]) - bin_path1 = os.path.abspath(sys.argv[2]) - bin_path2 = os.path.abspath(sys.argv[3]) - preprocess(file_path, bin_path1) - preprocess1(file_path, bin_path2) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division + +import sys +import os +import torch +import numpy as np +import cv2 + +from PIL import Image +from data.config import cfg +from torch.autograd import Variable +from utils.augmentations import to_chw_bgr + +use_cuda = torch.cuda.is_available() + +if use_cuda: + torch.set_default_tensor_type('torch.cuda.FloatTensor') +else: + torch.set_default_tensor_type('torch.FloatTensor') + +def process_image(img, min_side): + h, w = img.shape[0], img.shape[1] + #长边缩放为min_side + scale = max(w, h) / float(min_side) + new_w, new_h = int(w/scale), int(h/scale) + resize_img = cv2.resize(img, (new_w, new_h)) + # 填充至min_side * min_side + bottom = min_side-new_h + right = min_side-new_w + img = cv2.copyMakeBorder(resize_img, 0, int(bottom), 0, int(right), cv2.BORDER_CONSTANT, value=[0,0,0]) #从图像边界向上,下,左,右扩的像素数目 + return img + +def preprocess(file_path, bin_path): + in_files = os.listdir(file_path) + if not os.path.exists(bin_path): + os.makedirs(bin_path) + for file in sorted(in_files): + os.chdir(os.path.join(file_path, file)) + cur_path = os.getcwd() + doc = os.listdir(cur_path) + for document in sorted(doc): + in_file = os.path.join(cur_path, document) + img = Image.open(in_file) + if img.mode == 'L': + img = img.convert('RGB') + img = np.array(img) + img = process_image(img,1000) #对图片进行放缩加padding + x = to_chw_bgr(img) + + x = x.astype('float32') + x -= cfg.img_mean + x = x[[2, 1, 0], :, :] + x = Variable(torch.from_numpy(x).unsqueeze(0)) + if use_cuda: + x = x.cuda() + if not os.path.exists(os.path.join(bin_path,file)): + os.makedirs(os.path.join(bin_path,file)) + des_path = os.path.join(bin_path,file) + x.numpy().tofile(os.path.join(des_path,document.split('.')[0] +'.bin')) + +def preprocess1(file_path, bin_path): + in_files = os.listdir(file_path) + if not os.path.exists(bin_path): + os.makedirs(bin_path) + for file in sorted(in_files): + os.chdir(os.path.join(file_path, file)) + cur_path = os.getcwd() + doc = os.listdir(cur_path) + for document in sorted(doc): + in_file = os.path.join(cur_path, document) + img = Image.open(in_file) + if img.mode == 'L': + img = img.convert('RGB') + img = np.array(img) + img = cv2.flip(img, 1) + img = process_image(img,1000) + x = to_chw_bgr(img) + x = x.astype('float32') + x -= cfg.img_mean + x = x[[2, 1, 0], :, :] + x = Variable(torch.from_numpy(x).unsqueeze(0)) + if use_cuda: + x = x.cuda() + if not os.path.exists(os.path.join(bin_path,file)): + os.makedirs(os.path.join(bin_path,file)) + des_path = os.path.join(bin_path,file) + x.numpy().tofile(os.path.join(des_path,document.split('.')[0] +'.bin')) + +if __name__ == "__main__": + file_path = os.path.abspath(sys.argv[1]) + bin_path1 = os.path.abspath(sys.argv[2]) + bin_path2 = os.path.abspath(sys.argv[3]) + preprocess(file_path, bin_path1) + preprocess1(file_path, bin_path2) diff --git a/ACL_PyTorch/contrib/cv/face/AlignedReID/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/face/AlignedReID/modelzoo_level.txt index 9e95396651cc4382fe60ee1ee053674f527a448c..27e6c78b37535fe4f5a17029546fe257ad164d34 100644 --- a/ACL_PyTorch/contrib/cv/face/AlignedReID/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/face/AlignedReID/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:POK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/face/FaceNet/FaceNet_postprocess.py b/ACL_PyTorch/contrib/cv/face/FaceNet/FaceNet_postprocess.py index c3d5cb168e902d263b9c380d75eefddd7f346977..af7e9d1533fc6e8935ea73ce7f786a5d33ab345c 100644 --- a/ACL_PyTorch/contrib/cv/face/FaceNet/FaceNet_postprocess.py +++ b/ACL_PyTorch/contrib/cv/face/FaceNet/FaceNet_postprocess.py @@ -1,231 +1,231 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import argparse -import os -import sys -import json -import math -import numpy as np -from tqdm import tqdm -from scipy import interpolate -from sklearn.model_selection import KFold -from FaceNet_preprocess import read_pairs - - -def load_json(json_path): - with open(json_path) as f: - return json.load(f) - - -def add_extension(path): - if os.path.exists(path+'.jpg'): - return path+'.jpg' - elif os.path.exists(path+'.png'): - return path+'.png' - else: - raise RuntimeError('No file "%s" with extension png or jpg.' % path) - - -def get_paths(lfw_dir, pairs): - nrof_skipped_pairs = 0 - path_list = [] - issame_list = [] - for pair in pairs: - if len(pair) == 3: - path0 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1]))) - path1 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[2]))) - issame = True - elif len(pair) == 4: - path0 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1]))) - path1 = add_extension(os.path.join(lfw_dir, pair[2], pair[2] + '_' + '%04d' % int(pair[3]))) - issame = False - if os.path.exists(path0) and os.path.exists(path1): # Only add the pair if both paths exist - path_list += (path0, path1) - issame_list.append(issame) - else: - nrof_skipped_pairs += 1 - if nrof_skipped_pairs > 0: - print('Skipped %d image pairs' % nrof_skipped_pairs) - - return path_list, issame_list - - -def face_postprocess(crop_paths, result_dir): - num_bins = len(os.listdir(result_dir)) - embeddings = [] - flag_file = os.path.join(result_dir, "{}_output_0.bin".format(0)) - for idx in tqdm(range(num_bins)): - if not os.path.exists(flag_file): - xb_path = os.path.join(result_dir, "{}_1.bin".format(idx)) - else: - xb_path = os.path.join(result_dir, "{}_output_0.bin".format(idx)) - xb_data = np.fromfile(xb_path, dtype=np.float32).reshape(-1, 512) - embeddings.extend(xb_data) - - embeddings_dict = dict(zip(crop_paths, embeddings)) - return embeddings_dict - - -def evaluate(embeddings, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False): - # Calculate evaluation metrics - thresholds = np.arange(0, 4, 0.01) - embeddings1 = embeddings[0::2] - embeddings2 = embeddings[1::2] - tpr, fpr, accuracy, fp, fn = calculate_roc(thresholds, embeddings1, embeddings2, - np.asarray(actual_issame), nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) - thresholds = np.arange(0, 4, 0.001) - val, val_std, far = calculate_val(thresholds, embeddings1, embeddings2, - np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) - return tpr, fpr, accuracy, val, val_std, far, fp, fn - - -def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False): - assert(embeddings1.shape[0] == embeddings2.shape[0]) - assert(embeddings1.shape[1] == embeddings2.shape[1]) - nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) - nrof_thresholds = len(thresholds) - k_fold = KFold(n_splits=nrof_folds, shuffle=False) - - tprs = np.zeros((nrof_folds,nrof_thresholds)) - fprs = np.zeros((nrof_folds,nrof_thresholds)) - accuracy = np.zeros((nrof_folds)) - - is_false_positive = [] - is_false_negative = [] - - indices = np.arange(nrof_pairs) - - for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): - if subtract_mean: - mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) - else: - mean = 0.0 - dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) - - # Find the best threshold for the fold - acc_train = np.zeros((nrof_thresholds)) - for threshold_idx, threshold in enumerate(thresholds): - _, _, acc_train[threshold_idx], _, _ = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) - best_threshold_index = np.argmax(acc_train) - for threshold_idx, threshold in enumerate(thresholds): - tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _, _, _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set]) - _, _, accuracy[fold_idx], is_fp, is_fn = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) - - tpr = np.mean(tprs, 0) - fpr = np.mean(fprs, 0) - is_false_positive.extend(is_fp) - is_false_negative.extend(is_fn) - - return tpr, fpr, accuracy, is_false_positive, is_false_negative - - -def calculate_accuracy(threshold, dist, actual_issame): - predict_issame = np.less(dist, threshold) - tp = np.sum(np.logical_and(predict_issame, actual_issame)) - fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) - tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) - fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) - - is_fp = np.logical_and(predict_issame, np.logical_not(actual_issame)) - is_fn = np.logical_and(np.logical_not(predict_issame), actual_issame) - - tpr = 0 if (tp + fn == 0) else float(tp) / float(tp+fn) - fpr = 0 if (fp + tn == 0) else float(fp) / float(fp+tn) - acc = float(tp+tn)/dist.size - return tpr, fpr, acc, is_fp, is_fn - - -def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, subtract_mean=False): - assert(embeddings1.shape[0] == embeddings2.shape[0]) - assert(embeddings1.shape[1] == embeddings2.shape[1]) - nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) - nrof_thresholds = len(thresholds) - k_fold = KFold(n_splits=nrof_folds, shuffle=False) - - val = np.zeros(nrof_folds) - far = np.zeros(nrof_folds) - - indices = np.arange(nrof_pairs) - - for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): - if subtract_mean: - mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) - else: - mean = 0.0 - dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) - - # Find the threshold that gives FAR = far_target - far_train = np.zeros(nrof_thresholds) - for threshold_idx, threshold in enumerate(thresholds): - _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set]) - if np.max(far_train)>=far_target: - f = interpolate.interp1d(far_train, thresholds, kind='slinear') - threshold = f(far_target) - else: - threshold = 0.0 - - val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) - - val_mean = np.mean(val) - far_mean = np.mean(far) - val_std = np.std(val) - return val_mean, val_std, far_mean - - -def distance(embeddings1, embeddings2, distance_metric=0): - if distance_metric==0: - # Euclidian distance - diff = np.subtract(embeddings1, embeddings2) - dist = np.sum(np.square(diff),1) - elif distance_metric==1: - # Distance based on cosine similarity - dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1) - norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1) - similarity = dot / norm - dist = np.arccos(similarity) / math.pi - else: - raise 'Undefined distance metric %d' % distance_metric - - return dist - - -def calculate_val_far(threshold, dist, actual_issame): - predict_issame = np.less(dist, threshold) - true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) - false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) - n_same = np.sum(actual_issame) - n_diff = np.sum(np.logical_not(actual_issame)) - val = float(true_accept) / float(n_same) - far = float(false_accept) / float(n_diff) - return val, far - - -if __name__ == '__main__': - pairs_path = './data/pairs.txt' - parser = argparse.ArgumentParser() - parser.add_argument('--crop_dir', type=str, help='cropped image save path') - parser.add_argument('--test_dir', type=str, help='test file path') - parser.add_argument('--ONet_output_dir', type=str, help='preprocess bin files save path') - arg = parser.parse_args() - embedding_output_path = arg.test_dir - pairs = read_pairs(pairs_path) - crop_paths = load_json(arg.ONet_output_dir) - crop_dir = arg.crop_dir - path_list, _ = get_paths(crop_dir, pairs) - embeddings_dict = face_postprocess(crop_paths, embedding_output_path) - embeddings = np.array([embeddings_dict['./' + os.path.relpath(path)] for path in path_list]) - path_list, issame_list = get_paths(crop_dir, pairs) - tpr, fpr, accuracy, val, val_std, far, fp, fn = evaluate(embeddings, issame_list) - print("accuracy:", accuracy) - print("mean accuracy:", np.mean(accuracy)) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import os +import sys +import json +import math +import numpy as np +from tqdm import tqdm +from scipy import interpolate +from sklearn.model_selection import KFold +from FaceNet_preprocess import read_pairs + + +def load_json(json_path): + with open(json_path) as f: + return json.load(f) + + +def add_extension(path): + if os.path.exists(path+'.jpg'): + return path+'.jpg' + elif os.path.exists(path+'.png'): + return path+'.png' + else: + raise RuntimeError('No file "%s" with extension png or jpg.' % path) + + +def get_paths(lfw_dir, pairs): + nrof_skipped_pairs = 0 + path_list = [] + issame_list = [] + for pair in pairs: + if len(pair) == 3: + path0 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1]))) + path1 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[2]))) + issame = True + elif len(pair) == 4: + path0 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1]))) + path1 = add_extension(os.path.join(lfw_dir, pair[2], pair[2] + '_' + '%04d' % int(pair[3]))) + issame = False + if os.path.exists(path0) and os.path.exists(path1): # Only add the pair if both paths exist + path_list += (path0, path1) + issame_list.append(issame) + else: + nrof_skipped_pairs += 1 + if nrof_skipped_pairs > 0: + print('Skipped %d image pairs' % nrof_skipped_pairs) + + return path_list, issame_list + + +def face_postprocess(crop_paths, result_dir): + num_bins = len(os.listdir(result_dir)) + embeddings = [] + flag_file = os.path.join(result_dir, "{}_output_0.bin".format(0)) + for idx in tqdm(range(num_bins)): + if not os.path.exists(flag_file): + xb_path = os.path.join(result_dir, "{}_1.bin".format(idx)) + else: + xb_path = os.path.join(result_dir, "{}_output_0.bin".format(idx)) + xb_data = np.fromfile(xb_path, dtype=np.float32).reshape(-1, 512) + embeddings.extend(xb_data) + + embeddings_dict = dict(zip(crop_paths, embeddings)) + return embeddings_dict + + +def evaluate(embeddings, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False): + # Calculate evaluation metrics + thresholds = np.arange(0, 4, 0.01) + embeddings1 = embeddings[0::2] + embeddings2 = embeddings[1::2] + tpr, fpr, accuracy, fp, fn = calculate_roc(thresholds, embeddings1, embeddings2, + np.asarray(actual_issame), nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) + thresholds = np.arange(0, 4, 0.001) + val, val_std, far = calculate_val(thresholds, embeddings1, embeddings2, + np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) + return tpr, fpr, accuracy, val, val_std, far, fp, fn + + +def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False): + assert(embeddings1.shape[0] == embeddings2.shape[0]) + assert(embeddings1.shape[1] == embeddings2.shape[1]) + nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) + nrof_thresholds = len(thresholds) + k_fold = KFold(n_splits=nrof_folds, shuffle=False) + + tprs = np.zeros((nrof_folds,nrof_thresholds)) + fprs = np.zeros((nrof_folds,nrof_thresholds)) + accuracy = np.zeros((nrof_folds)) + + is_false_positive = [] + is_false_negative = [] + + indices = np.arange(nrof_pairs) + + for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): + if subtract_mean: + mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) + else: + mean = 0.0 + dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) + + # Find the best threshold for the fold + acc_train = np.zeros((nrof_thresholds)) + for threshold_idx, threshold in enumerate(thresholds): + _, _, acc_train[threshold_idx], _, _ = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) + best_threshold_index = np.argmax(acc_train) + for threshold_idx, threshold in enumerate(thresholds): + tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _, _, _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set]) + _, _, accuracy[fold_idx], is_fp, is_fn = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) + + tpr = np.mean(tprs, 0) + fpr = np.mean(fprs, 0) + is_false_positive.extend(is_fp) + is_false_negative.extend(is_fn) + + return tpr, fpr, accuracy, is_false_positive, is_false_negative + + +def calculate_accuracy(threshold, dist, actual_issame): + predict_issame = np.less(dist, threshold) + tp = np.sum(np.logical_and(predict_issame, actual_issame)) + fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) + tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) + fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) + + is_fp = np.logical_and(predict_issame, np.logical_not(actual_issame)) + is_fn = np.logical_and(np.logical_not(predict_issame), actual_issame) + + tpr = 0 if (tp + fn == 0) else float(tp) / float(tp+fn) + fpr = 0 if (fp + tn == 0) else float(fp) / float(fp+tn) + acc = float(tp+tn)/dist.size + return tpr, fpr, acc, is_fp, is_fn + + +def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, subtract_mean=False): + assert(embeddings1.shape[0] == embeddings2.shape[0]) + assert(embeddings1.shape[1] == embeddings2.shape[1]) + nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) + nrof_thresholds = len(thresholds) + k_fold = KFold(n_splits=nrof_folds, shuffle=False) + + val = np.zeros(nrof_folds) + far = np.zeros(nrof_folds) + + indices = np.arange(nrof_pairs) + + for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): + if subtract_mean: + mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) + else: + mean = 0.0 + dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) + + # Find the threshold that gives FAR = far_target + far_train = np.zeros(nrof_thresholds) + for threshold_idx, threshold in enumerate(thresholds): + _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set]) + if np.max(far_train)>=far_target: + f = interpolate.interp1d(far_train, thresholds, kind='slinear') + threshold = f(far_target) + else: + threshold = 0.0 + + val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) + + val_mean = np.mean(val) + far_mean = np.mean(far) + val_std = np.std(val) + return val_mean, val_std, far_mean + + +def distance(embeddings1, embeddings2, distance_metric=0): + if distance_metric==0: + # Euclidian distance + diff = np.subtract(embeddings1, embeddings2) + dist = np.sum(np.square(diff),1) + elif distance_metric==1: + # Distance based on cosine similarity + dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1) + norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1) + similarity = dot / norm + dist = np.arccos(similarity) / math.pi + else: + raise 'Undefined distance metric %d' % distance_metric + + return dist + + +def calculate_val_far(threshold, dist, actual_issame): + predict_issame = np.less(dist, threshold) + true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) + false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) + n_same = np.sum(actual_issame) + n_diff = np.sum(np.logical_not(actual_issame)) + val = float(true_accept) / float(n_same) + far = float(false_accept) / float(n_diff) + return val, far + + +if __name__ == '__main__': + pairs_path = './data/pairs.txt' + parser = argparse.ArgumentParser() + parser.add_argument('--crop_dir', type=str, help='cropped image save path') + parser.add_argument('--test_dir', type=str, help='test file path') + parser.add_argument('--ONet_output_dir', type=str, help='preprocess bin files save path') + arg = parser.parse_args() + embedding_output_path = arg.test_dir + pairs = read_pairs(pairs_path) + crop_paths = load_json(arg.ONet_output_dir) + crop_dir = arg.crop_dir + path_list, _ = get_paths(crop_dir, pairs) + embeddings_dict = face_postprocess(crop_paths, embedding_output_path) + embeddings = np.array([embeddings_dict['./' + os.path.relpath(path)] for path in path_list]) + path_list, issame_list = get_paths(crop_dir, pairs) + tpr, fpr, accuracy, val, val_std, far, fp, fn = evaluate(embeddings, issame_list) + print("accuracy:", accuracy) + print("mean accuracy:", np.mean(accuracy)) diff --git a/ACL_PyTorch/contrib/cv/face/FaceNet/FaceNet_preprocess.py b/ACL_PyTorch/contrib/cv/face/FaceNet/FaceNet_preprocess.py index 64a7a3aeedf856a1fa1446402c82f76d4c48aa35..fe26bba8c912ded7a8a35c2be6d7982276f96f99 100644 --- a/ACL_PyTorch/contrib/cv/face/FaceNet/FaceNet_preprocess.py +++ b/ACL_PyTorch/contrib/cv/face/FaceNet/FaceNet_preprocess.py @@ -1,73 +1,73 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import argparse -import sys -import os -import torch -import numpy as np -from tqdm import tqdm -from torchvision import datasets, transforms -from facenet_pytorch import fixed_image_standardization -from torch.utils.data import DataLoader, SequentialSampler - - -def read_pairs(pairs_filename): - pairs = [] - with open(pairs_filename, 'r') as f: - for line in f.readlines()[1:]: - pair = line.strip().split() - pairs.append(pair) - return np.array(pairs, dtype=object) - - -def face_preprocess(crop_dir, save_dir): - # create dataset and data loaders from cropped images output from MTCNN - trans = transforms.Compose([ - np.float32, - transforms.ToTensor(), - fixed_image_standardization - ]) - dataset = datasets.ImageFolder(crop_dir, transform=trans) - - embed_loader = DataLoader( - dataset, - num_workers=workers, - batch_size=batch_size, - sampler=SequentialSampler(dataset) - ) - - for idx, (xb, yb) in tqdm(enumerate(embed_loader)): - out_path_xb = os.path.join(save_dir, 'xb_results', '{}.bin'.format(idx)) - out_path_yb = os.path.join(save_dir, 'yb_results', '{}.bin'.format(idx)) - os.makedirs(os.path.dirname(out_path_xb), exist_ok=True) - os.makedirs(os.path.dirname(out_path_yb), exist_ok=True) - if xb.shape[0] < batch_size: - xb_zeros = np.zeros([batch_size - int(xb.shape[0]), int(xb.shape[1]), int(xb.shape[2]), int(xb.shape[3])]) - xb = np.concatenate([xb.numpy(), xb_zeros], axis=0) - xb = torch.from_numpy(xb) - xb.detach().cpu().numpy().tofile(out_path_xb) - yb.detach().cpu().numpy().tofile(out_path_yb) - - -if __name__ == '__main__': - pairs_path = './data/pairs.txt' - parser = argparse.ArgumentParser() - parser.add_argument('--crop_dir', type=str, help='cropped image save path') - parser.add_argument('--save_dir', type=str, help='preprocess bin files save path') - parser.add_argument('--batch_size', type=int, help='preprocess bin files save path') - arg = parser.parse_args() - batch_size = arg.batch_size - workers = 0 if os.name == 'nt' else 8 - pairs = read_pairs(pairs_path) - face_preprocess(arg.crop_dir, arg.save_dir) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import sys +import os +import torch +import numpy as np +from tqdm import tqdm +from torchvision import datasets, transforms +from facenet_pytorch import fixed_image_standardization +from torch.utils.data import DataLoader, SequentialSampler + + +def read_pairs(pairs_filename): + pairs = [] + with open(pairs_filename, 'r') as f: + for line in f.readlines()[1:]: + pair = line.strip().split() + pairs.append(pair) + return np.array(pairs, dtype=object) + + +def face_preprocess(crop_dir, save_dir): + # create dataset and data loaders from cropped images output from MTCNN + trans = transforms.Compose([ + np.float32, + transforms.ToTensor(), + fixed_image_standardization + ]) + dataset = datasets.ImageFolder(crop_dir, transform=trans) + + embed_loader = DataLoader( + dataset, + num_workers=workers, + batch_size=batch_size, + sampler=SequentialSampler(dataset) + ) + + for idx, (xb, yb) in tqdm(enumerate(embed_loader)): + out_path_xb = os.path.join(save_dir, 'xb_results', '{}.bin'.format(idx)) + out_path_yb = os.path.join(save_dir, 'yb_results', '{}.bin'.format(idx)) + os.makedirs(os.path.dirname(out_path_xb), exist_ok=True) + os.makedirs(os.path.dirname(out_path_yb), exist_ok=True) + if xb.shape[0] < batch_size: + xb_zeros = np.zeros([batch_size - int(xb.shape[0]), int(xb.shape[1]), int(xb.shape[2]), int(xb.shape[3])]) + xb = np.concatenate([xb.numpy(), xb_zeros], axis=0) + xb = torch.from_numpy(xb) + xb.detach().cpu().numpy().tofile(out_path_xb) + yb.detach().cpu().numpy().tofile(out_path_yb) + + +if __name__ == '__main__': + pairs_path = './data/pairs.txt' + parser = argparse.ArgumentParser() + parser.add_argument('--crop_dir', type=str, help='cropped image save path') + parser.add_argument('--save_dir', type=str, help='preprocess bin files save path') + parser.add_argument('--batch_size', type=int, help='preprocess bin files save path') + arg = parser.parse_args() + batch_size = arg.batch_size + workers = 0 if os.name == 'nt' else 8 + pairs = read_pairs(pairs_path) + face_preprocess(arg.crop_dir, arg.save_dir) diff --git a/ACL_PyTorch/contrib/cv/face/FaceNet/FaceNet_pth2onnx.py b/ACL_PyTorch/contrib/cv/face/FaceNet/FaceNet_pth2onnx.py index 1d2b916c3c155e3ba83ffb9ad6d36884f5b294f3..6d886b0965533ce75059758fc376e3feb823e3ea 100644 --- a/ACL_PyTorch/contrib/cv/face/FaceNet/FaceNet_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/face/FaceNet/FaceNet_pth2onnx.py @@ -1,46 +1,46 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from facenet_pytorch import InceptionResnetV1 -import torch -import argparse - - -def FaceNet_pth2onnx(opt): - model = InceptionResnetV1(pretrained=opt.pretrain) - # if opt.model != '': - # model.load_state_dict(torch.load(opt.model, map_location='cpu')) - # else: - # print("Error network") - # return -1 - model.eval() - input_names = ["image"] - output_names = ["class"] - output_file = opt.output_file - if opt.output_file == '.': - output_file = opt.output_file - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(16, 3, 160, 160) - - torch.onnx.export(model, dummy_input, output_file, input_names=input_names, dynamic_axes=dynamic_axes, - output_names=output_names, verbose=True, opset_version=10) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--pretrain', type=str, default='vggface2', help='[casia-webface, vggface2]') - parser.add_argument('--model', type=str, help='model path') - parser.add_argument('--output_file', type=str, default='.', help='output path') - arg = parser.parse_args() - FaceNet_pth2onnx(arg) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from facenet_pytorch import InceptionResnetV1 +import torch +import argparse + + +def FaceNet_pth2onnx(opt): + model = InceptionResnetV1(pretrained=opt.pretrain) + # if opt.model != '': + # model.load_state_dict(torch.load(opt.model, map_location='cpu')) + # else: + # print("Error network") + # return -1 + model.eval() + input_names = ["image"] + output_names = ["class"] + output_file = opt.output_file + if opt.output_file == '.': + output_file = opt.output_file + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(16, 3, 160, 160) + + torch.onnx.export(model, dummy_input, output_file, input_names=input_names, dynamic_axes=dynamic_axes, + output_names=output_names, verbose=True, opset_version=10) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--pretrain', type=str, default='vggface2', help='[casia-webface, vggface2]') + parser.add_argument('--model', type=str, help='model path') + parser.add_argument('--output_file', type=str, default='.', help='output path') + arg = parser.parse_args() + FaceNet_pth2onnx(arg) diff --git a/ACL_PyTorch/contrib/cv/face/FaceNet/LICENSE b/ACL_PyTorch/contrib/cv/face/FaceNet/LICENSE index 2d284555aae8b0e5435b84a5f28e1ba0ee8e0482..2e03f7211ca08e3dcb4acf60609c23fed7b78a75 100644 --- a/ACL_PyTorch/contrib/cv/face/FaceNet/LICENSE +++ b/ACL_PyTorch/contrib/cv/face/FaceNet/LICENSE @@ -1,55 +1,55 @@ -MIT License - -Copyright (c) 2019 Timothy Esler - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - - +MIT License + +Copyright (c) 2019 Timothy Esler + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + + diff --git a/ACL_PyTorch/contrib/cv/face/FaceNet/MTCNN_preprocess.py b/ACL_PyTorch/contrib/cv/face/FaceNet/MTCNN_preprocess.py index 520e8d0a769e7de8734080ecb946c9cee138a58d..f75863254578ee8efd290770a49b6a012b2ff606 100644 --- a/ACL_PyTorch/contrib/cv/face/FaceNet/MTCNN_preprocess.py +++ b/ACL_PyTorch/contrib/cv/face/FaceNet/MTCNN_preprocess.py @@ -1,367 +1,367 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import torch -import argparse -import numpy as np -from easydict import EasyDict -from torch.utils.data import DataLoader -from torchvision import datasets -from tqdm import tqdm -sys.path.append('./models') -from mtcnn import PNet, RNet, ONet -from facenet_pytorch import MTCNN -from facenet_pytorch.models.utils.detect_face import imresample, generateBoundingBox, batched_nms, rerec, pad, bbreg, batched_nms_numpy - - -NET_MAP = { - 'pnet': PNet, - 'rnet': RNet, - 'onet': ONet -} - -################################################################################### -# basic function # -################################################################################### - -def build_dataset(config): - orig_img_ds = datasets.ImageFolder(config.data_dir, transform=None) - orig_img_ds.samples = [(p, p)for p, _ in orig_img_ds.samples] - def collate_fn(x): - out_x, out_y = [], [] - for xx, yy in x: - out_x.append(xx) - out_y.append(yy) - return out_x, out_y - loader = DataLoader( - orig_img_ds, - num_workers=config.num_workers, - batch_size=config.batch_size, - collate_fn=collate_fn - ) - return loader - - -def dump_to_json(content, outpath): - os.makedirs(os.path.dirname(outpath), exist_ok=True) - with open(outpath, 'w') as f: - json.dump(content, f) - - -def load_json(json_path): - with open(json_path) as f: - return json.load(f) - - -################################################################################### -# main class # -################################################################################### -class MTCNNPreprocessor(): - def __init__(self, config): - self.net_name = config.net - self.net = NET_MAP[self.net_name](config) - self.threshold = [0.6, 0.7, 0.7] - self.data_device = torch.device('cpu') - - def pnet_process(self, imgs): - if self.net_name != 'pnet': - raise ValueError('Pnet process not support for {} !'.format(self.net)) - - factor = 0.709 - minsize = 20 - - imgs = imgs.permute(0, 3, 1, 2).type(torch.float32) - batch_size = len(imgs) - h, w = imgs.shape[2:4] - m = 12.0 / minsize - minl = min(h, w) - minl = minl * m - - scale_i = m - scales = [] - while minl >= 12: - scales.append(scale_i) - scale_i = scale_i * factor - minl = minl * factor - # First stage - boxes = [] - image_inds = [] - scale_picks = [] - all_i = 0 - offset = 0 - for scale in scales: - im_data = imresample(imgs, (int(h * scale + 1), int(w * scale + 1))) - im_data = (im_data - 127.5) * 0.0078125 - reg, probs = self.net.forward(im_data.cpu().numpy()) - reg = torch.from_numpy(reg) - probs = torch.from_numpy(probs) - boxes_scale, image_inds_scale = generateBoundingBox(reg, probs[:, 1], scale, self.threshold[0]) - boxes.append(boxes_scale) - image_inds.append(image_inds_scale) - pick = batched_nms(boxes_scale[:, :4], boxes_scale[:, 4], image_inds_scale, 0.5) - scale_picks.append(pick + offset) - offset += boxes_scale.shape[0] - boxes = torch.cat(boxes, dim=0) - image_inds = torch.cat(image_inds, dim=0) - scale_picks = torch.cat(scale_picks, dim=0) - # NMS within each scale + image - boxes, image_inds = boxes[scale_picks], image_inds[scale_picks] - # NMS within each image - pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7) - boxes, image_inds = boxes[pick], image_inds[pick] - regw = boxes[:, 2] - boxes[:, 0] - regh = boxes[:, 3] - boxes[:, 1] - qq1 = boxes[:, 0] + boxes[:, 5] * regw - qq2 = boxes[:, 1] + boxes[:, 6] * regh - qq3 = boxes[:, 2] + boxes[:, 7] * regw - qq4 = boxes[:, 3] + boxes[:, 8] * regh - boxes = torch.stack([qq1, qq2, qq3, qq4, boxes[:, 4]]).permute(1, 0) - boxes = rerec(boxes) - return boxes, image_inds - - def rnet_process(self, imgs, boxes, image_inds): - if self.net_name != 'rnet': - raise ValueError('Rnet process not support for {} !'.format(self.net)) - imgs = imgs.permute(0, 3, 1, 2).type(torch.float32) - h, w = imgs.shape[2:4] - y, ey, x, ex = pad(boxes, w, h) - if len(boxes) > 0: - im_data = [] - for k in range(len(y)): - if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1): - img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k], (x[k] - 1):ex[k]].unsqueeze(0) - im_data.append(imresample(img_k, (24, 24))) - im_data = torch.cat(im_data, dim=0) - im_data = (im_data - 127.5) * 0.0078125 - out = self.net.forward(im_data.cpu().numpy()) - out = [torch.from_numpy(o) for o in out] - out0 = out[0].permute(1, 0) - out1 = out[1].permute(1, 0) - score = out1[1, :] - ipass = score > self.threshold[1] - boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1) - image_inds = image_inds[ipass] - mv = out0[:, ipass].permute(1, 0) - # NMS within each image - pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7) - boxes, image_inds, mv = boxes[pick], image_inds[pick], mv[pick] - boxes = bbreg(boxes, mv) - boxes = rerec(boxes) - return boxes, image_inds - - def onet_process(self, imgs, boxes, image_inds): - if self.net_name != 'onet': - raise ValueError('Onet process not support for {} !'.format(self.net)) - imgs = imgs.permute(0, 3, 1, 2).type(torch.float32) - h, w = imgs.shape[2:4] - points = torch.zeros(0, 5, 2, device=self.data_device) - if len(boxes) > 0: - y, ey, x, ex = pad(boxes, w, h) - im_data = [] - for k in range(len(y)): - if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1): - img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k], (x[k] - 1):ex[k]].unsqueeze(0) - im_data.append(imresample(img_k, (48, 48))) - im_data = torch.cat(im_data, dim=0) - im_data = (im_data - 127.5) * 0.0078125 - out = self.net.forward(im_data.cpu().numpy()) - out = [torch.from_numpy(o) for o in out] - out0 = out[0].permute(1, 0) - out1 = out[1].permute(1, 0) - out2 = out[2].permute(1, 0) - score = out2[1, :] - points = out1 - ipass = score > self.threshold[2] - points = points[:, ipass] - boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1) - image_inds = image_inds[ipass] - mv = out0[:, ipass].permute(1, 0) - w_i = boxes[:, 2] - boxes[:, 0] + 1 - h_i = boxes[:, 3] - boxes[:, 1] + 1 - points_x = w_i.repeat(5, 1) * points[:5, :] + boxes[:, 0].repeat(5, 1) - 1 - points_y = h_i.repeat(5, 1) * points[5:10, :] + boxes[:, 1].repeat(5, 1) - 1 - points = torch.stack((points_x, points_y)).permute(2, 1, 0) - boxes = bbreg(boxes, mv) - # NMS within each image using "Min" strategy - # pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7) - pick = batched_nms_numpy(boxes[:, :4], boxes[:, 4], image_inds, 0.7, 'Min') - boxes, image_inds, points = boxes[pick], image_inds[pick], points[pick] - - boxes = boxes.cpu().numpy() - points = points.cpu().numpy() - image_inds = image_inds.cpu() - batch_boxes = [] - batch_points = [] - for b_i in range(config.batch_size): - b_i_inds = np.where(image_inds == b_i) - batch_boxes.append(boxes[b_i_inds].copy()) - batch_points.append(points[b_i_inds].copy()) - batch_boxes, batch_points = np.array(batch_boxes), np.array(batch_points) - return batch_boxes, batch_points - - -################################################################################### -# main function # -################################################################################### -def process_pnet(config): - loader = build_dataset(config) - processor = MTCNNPreprocessor(config) - out_json = {} - for idx, (xs, b_paths) in tqdm(enumerate(loader), total=len(loader)): - imgs = np.stack([np.uint8(x) for x in xs]) - imgs = torch.as_tensor(imgs.copy(), device=torch.device('cpu')) - boxes, image_inds = processor.pnet_process(imgs) - out_json[str(idx)] = { - 'boxes': boxes.tolist(), - 'image_inds': image_inds.tolist() - } - save_path = os.path.join(config.output_path, 'pnet.json') - os.makedirs(os.path.dirname(save_path), exist_ok=True) - dump_to_json(out_json, save_path) - - -def process_rnet(config): - loader = build_dataset(config) - processor = MTCNNPreprocessor(config) - out_json = {} - pnet_data = load_json(config.input_path) - for idx, (xs, b_paths) in tqdm(enumerate(loader), total=len(loader)): - imgs = np.stack([np.uint8(x) for x in xs]) - imgs = torch.as_tensor(imgs.copy(), device=torch.device('cpu')) - boxes = torch.from_numpy(np.array(pnet_data[str(idx)]['boxes'])) - image_inds = torch.from_numpy(np.array(pnet_data[str(idx)]['image_inds'])) - boxes, image_inds = processor.rnet_process(imgs, boxes, image_inds) - out_json[str(idx)] = { - 'boxes': boxes.tolist(), - 'image_inds': image_inds.tolist() - } - save_path = os.path.join(config.output_path, 'rnet.json') - os.makedirs(os.path.dirname(save_path), exist_ok=True) - dump_to_json(out_json, save_path) - - -def process_onet(config): - data_dir = config.data_dir - loader = build_dataset(config) - processor = MTCNNPreprocessor(config) - pnet_data = load_json(config.input_path) - crop_paths = [] - for idx, (xs, b_paths) in tqdm(enumerate(loader), total=len(loader)): - imgs = np.stack([np.uint8(x) for x in xs]) - imgs = torch.as_tensor(imgs.copy(), device=torch.device('cpu')) - boxes = torch.from_numpy(np.array(pnet_data[str(idx)]['boxes'])) - image_inds = torch.from_numpy(np.array(pnet_data[str(idx)]['image_inds'])) - batch_boxes, batch_points = processor.onet_process(imgs, boxes, image_inds) - # save crop imgs - save_paths = [p.replace(data_dir, data_dir + '_split_om_cropped_{}'.format(config.batch_size)) for p in b_paths] - save_crop_imgs(batch_boxes, batch_points, xs, save_paths) - crop_paths.extend(save_paths) - save_path = os.path.join(config.output_path, 'onet.json') - os.makedirs(os.path.dirname(save_path), exist_ok=True) - dump_to_json(crop_paths, save_path) - - -def save_crop_imgs(batch_boxes, batch_points, img, save_path): - mtcnn = MTCNN( - image_size=160, margin=14, min_face_size=20, - thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True, - selection_method='center_weighted_size' - ) - boxes, probs, points = [], [], [] - for box, point in zip(batch_boxes, batch_points): - box = np.array(box) - point = np.array(point) - if len(box) == 0: - boxes.append(None) - probs.append([None]) - points.append(None) - elif mtcnn.select_largest: - box_order = np.argsort((box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]))[::-1] - box = box[box_order] - point = point[box_order] - boxes.append(box[:, :4]) - probs.append(box[:, 4]) - points.append(point) - else: - boxes.append(box[:, :4]) - probs.append(box[:, 4]) - points.append(point) - batch_boxes = np.array(boxes) - batch_probs = np.array(probs) - batch_points = np.array(points) - - batch_boxes, batch_probs, batch_points = mtcnn.select_boxes( - batch_boxes, batch_probs, batch_points, img, method=mtcnn.selection_method - ) - # Extract faces - faces = mtcnn.extract(img, batch_boxes, save_path) - return faces - - -def parser_args(): - pass - - -def build_config(arg): - pnet_config = { - 'net': 'pnet', - 'device_id': 1, - 'output_path': './data/output/split_bs' + str(arg.batch_size) + '/', - 'model_path': './weights/PNet_dynamic.om', - 'data_dir': './data/lfw', - 'num_workers': 8, - 'batch_size': arg.batch_size - } - rnet_config = { - 'net': 'rnet', - 'device_id': 1, - 'input_path': './data/output/split_bs' + str(arg.batch_size) + '/pnet.json', - 'output_path': './data/output/split_bs' + str(arg.batch_size) + '/', - 'model_path': './weights/RNet_dynamic.om', - 'data_dir': './data/lfw', - 'num_workers': 8, - 'batch_size': arg.batch_size - } - onet_config = { - 'net': 'onet', - 'device_id': 1, - 'input_path': './data/output/split_bs' + str(arg.batch_size) + '/rnet.json', - 'output_path': './data/output/split_bs' + str(arg.batch_size) + '/', - 'model_path': './weights/ONet_dynamic.om', - 'data_dir': './data/lfw', - 'num_workers': 8, - 'batch_size': arg.batch_size - } - return EasyDict(pnet_config), EasyDict(rnet_config), EasyDict(onet_config) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--model', type=str, help='[PNet/RNet/ONet]') - parser.add_argument('--data_dir', type=str, help='the absolute files path of lfw dataset') - parser.add_argument('--batch_size', type=int, help='[1/16]') - arg = parser.parse_args() - pnet_config, rnet_config, onet_config = build_config(arg) - if arg.model == 'Pnet': - config = pnet_config - process_pnet(config) - elif arg.model == 'Rnet': - config = rnet_config - process_rnet(config) - elif arg.model == 'Onet': - config = onet_config - process_onet(config) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import torch +import argparse +import numpy as np +from easydict import EasyDict +from torch.utils.data import DataLoader +from torchvision import datasets +from tqdm import tqdm +sys.path.append('./models') +from mtcnn import PNet, RNet, ONet +from facenet_pytorch import MTCNN +from facenet_pytorch.models.utils.detect_face import imresample, generateBoundingBox, batched_nms, rerec, pad, bbreg, batched_nms_numpy + + +NET_MAP = { + 'pnet': PNet, + 'rnet': RNet, + 'onet': ONet +} + +################################################################################### +# basic function # +################################################################################### + +def build_dataset(config): + orig_img_ds = datasets.ImageFolder(config.data_dir, transform=None) + orig_img_ds.samples = [(p, p)for p, _ in orig_img_ds.samples] + def collate_fn(x): + out_x, out_y = [], [] + for xx, yy in x: + out_x.append(xx) + out_y.append(yy) + return out_x, out_y + loader = DataLoader( + orig_img_ds, + num_workers=config.num_workers, + batch_size=config.batch_size, + collate_fn=collate_fn + ) + return loader + + +def dump_to_json(content, outpath): + os.makedirs(os.path.dirname(outpath), exist_ok=True) + with open(outpath, 'w') as f: + json.dump(content, f) + + +def load_json(json_path): + with open(json_path) as f: + return json.load(f) + + +################################################################################### +# main class # +################################################################################### +class MTCNNPreprocessor(): + def __init__(self, config): + self.net_name = config.net + self.net = NET_MAP[self.net_name](config) + self.threshold = [0.6, 0.7, 0.7] + self.data_device = torch.device('cpu') + + def pnet_process(self, imgs): + if self.net_name != 'pnet': + raise ValueError('Pnet process not support for {} !'.format(self.net)) + + factor = 0.709 + minsize = 20 + + imgs = imgs.permute(0, 3, 1, 2).type(torch.float32) + batch_size = len(imgs) + h, w = imgs.shape[2:4] + m = 12.0 / minsize + minl = min(h, w) + minl = minl * m + + scale_i = m + scales = [] + while minl >= 12: + scales.append(scale_i) + scale_i = scale_i * factor + minl = minl * factor + # First stage + boxes = [] + image_inds = [] + scale_picks = [] + all_i = 0 + offset = 0 + for scale in scales: + im_data = imresample(imgs, (int(h * scale + 1), int(w * scale + 1))) + im_data = (im_data - 127.5) * 0.0078125 + reg, probs = self.net.forward(im_data.cpu().numpy()) + reg = torch.from_numpy(reg) + probs = torch.from_numpy(probs) + boxes_scale, image_inds_scale = generateBoundingBox(reg, probs[:, 1], scale, self.threshold[0]) + boxes.append(boxes_scale) + image_inds.append(image_inds_scale) + pick = batched_nms(boxes_scale[:, :4], boxes_scale[:, 4], image_inds_scale, 0.5) + scale_picks.append(pick + offset) + offset += boxes_scale.shape[0] + boxes = torch.cat(boxes, dim=0) + image_inds = torch.cat(image_inds, dim=0) + scale_picks = torch.cat(scale_picks, dim=0) + # NMS within each scale + image + boxes, image_inds = boxes[scale_picks], image_inds[scale_picks] + # NMS within each image + pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7) + boxes, image_inds = boxes[pick], image_inds[pick] + regw = boxes[:, 2] - boxes[:, 0] + regh = boxes[:, 3] - boxes[:, 1] + qq1 = boxes[:, 0] + boxes[:, 5] * regw + qq2 = boxes[:, 1] + boxes[:, 6] * regh + qq3 = boxes[:, 2] + boxes[:, 7] * regw + qq4 = boxes[:, 3] + boxes[:, 8] * regh + boxes = torch.stack([qq1, qq2, qq3, qq4, boxes[:, 4]]).permute(1, 0) + boxes = rerec(boxes) + return boxes, image_inds + + def rnet_process(self, imgs, boxes, image_inds): + if self.net_name != 'rnet': + raise ValueError('Rnet process not support for {} !'.format(self.net)) + imgs = imgs.permute(0, 3, 1, 2).type(torch.float32) + h, w = imgs.shape[2:4] + y, ey, x, ex = pad(boxes, w, h) + if len(boxes) > 0: + im_data = [] + for k in range(len(y)): + if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1): + img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k], (x[k] - 1):ex[k]].unsqueeze(0) + im_data.append(imresample(img_k, (24, 24))) + im_data = torch.cat(im_data, dim=0) + im_data = (im_data - 127.5) * 0.0078125 + out = self.net.forward(im_data.cpu().numpy()) + out = [torch.from_numpy(o) for o in out] + out0 = out[0].permute(1, 0) + out1 = out[1].permute(1, 0) + score = out1[1, :] + ipass = score > self.threshold[1] + boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1) + image_inds = image_inds[ipass] + mv = out0[:, ipass].permute(1, 0) + # NMS within each image + pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7) + boxes, image_inds, mv = boxes[pick], image_inds[pick], mv[pick] + boxes = bbreg(boxes, mv) + boxes = rerec(boxes) + return boxes, image_inds + + def onet_process(self, imgs, boxes, image_inds): + if self.net_name != 'onet': + raise ValueError('Onet process not support for {} !'.format(self.net)) + imgs = imgs.permute(0, 3, 1, 2).type(torch.float32) + h, w = imgs.shape[2:4] + points = torch.zeros(0, 5, 2, device=self.data_device) + if len(boxes) > 0: + y, ey, x, ex = pad(boxes, w, h) + im_data = [] + for k in range(len(y)): + if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1): + img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k], (x[k] - 1):ex[k]].unsqueeze(0) + im_data.append(imresample(img_k, (48, 48))) + im_data = torch.cat(im_data, dim=0) + im_data = (im_data - 127.5) * 0.0078125 + out = self.net.forward(im_data.cpu().numpy()) + out = [torch.from_numpy(o) for o in out] + out0 = out[0].permute(1, 0) + out1 = out[1].permute(1, 0) + out2 = out[2].permute(1, 0) + score = out2[1, :] + points = out1 + ipass = score > self.threshold[2] + points = points[:, ipass] + boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1) + image_inds = image_inds[ipass] + mv = out0[:, ipass].permute(1, 0) + w_i = boxes[:, 2] - boxes[:, 0] + 1 + h_i = boxes[:, 3] - boxes[:, 1] + 1 + points_x = w_i.repeat(5, 1) * points[:5, :] + boxes[:, 0].repeat(5, 1) - 1 + points_y = h_i.repeat(5, 1) * points[5:10, :] + boxes[:, 1].repeat(5, 1) - 1 + points = torch.stack((points_x, points_y)).permute(2, 1, 0) + boxes = bbreg(boxes, mv) + # NMS within each image using "Min" strategy + # pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7) + pick = batched_nms_numpy(boxes[:, :4], boxes[:, 4], image_inds, 0.7, 'Min') + boxes, image_inds, points = boxes[pick], image_inds[pick], points[pick] + + boxes = boxes.cpu().numpy() + points = points.cpu().numpy() + image_inds = image_inds.cpu() + batch_boxes = [] + batch_points = [] + for b_i in range(config.batch_size): + b_i_inds = np.where(image_inds == b_i) + batch_boxes.append(boxes[b_i_inds].copy()) + batch_points.append(points[b_i_inds].copy()) + batch_boxes, batch_points = np.array(batch_boxes), np.array(batch_points) + return batch_boxes, batch_points + + +################################################################################### +# main function # +################################################################################### +def process_pnet(config): + loader = build_dataset(config) + processor = MTCNNPreprocessor(config) + out_json = {} + for idx, (xs, b_paths) in tqdm(enumerate(loader), total=len(loader)): + imgs = np.stack([np.uint8(x) for x in xs]) + imgs = torch.as_tensor(imgs.copy(), device=torch.device('cpu')) + boxes, image_inds = processor.pnet_process(imgs) + out_json[str(idx)] = { + 'boxes': boxes.tolist(), + 'image_inds': image_inds.tolist() + } + save_path = os.path.join(config.output_path, 'pnet.json') + os.makedirs(os.path.dirname(save_path), exist_ok=True) + dump_to_json(out_json, save_path) + + +def process_rnet(config): + loader = build_dataset(config) + processor = MTCNNPreprocessor(config) + out_json = {} + pnet_data = load_json(config.input_path) + for idx, (xs, b_paths) in tqdm(enumerate(loader), total=len(loader)): + imgs = np.stack([np.uint8(x) for x in xs]) + imgs = torch.as_tensor(imgs.copy(), device=torch.device('cpu')) + boxes = torch.from_numpy(np.array(pnet_data[str(idx)]['boxes'])) + image_inds = torch.from_numpy(np.array(pnet_data[str(idx)]['image_inds'])) + boxes, image_inds = processor.rnet_process(imgs, boxes, image_inds) + out_json[str(idx)] = { + 'boxes': boxes.tolist(), + 'image_inds': image_inds.tolist() + } + save_path = os.path.join(config.output_path, 'rnet.json') + os.makedirs(os.path.dirname(save_path), exist_ok=True) + dump_to_json(out_json, save_path) + + +def process_onet(config): + data_dir = config.data_dir + loader = build_dataset(config) + processor = MTCNNPreprocessor(config) + pnet_data = load_json(config.input_path) + crop_paths = [] + for idx, (xs, b_paths) in tqdm(enumerate(loader), total=len(loader)): + imgs = np.stack([np.uint8(x) for x in xs]) + imgs = torch.as_tensor(imgs.copy(), device=torch.device('cpu')) + boxes = torch.from_numpy(np.array(pnet_data[str(idx)]['boxes'])) + image_inds = torch.from_numpy(np.array(pnet_data[str(idx)]['image_inds'])) + batch_boxes, batch_points = processor.onet_process(imgs, boxes, image_inds) + # save crop imgs + save_paths = [p.replace(data_dir, data_dir + '_split_om_cropped_{}'.format(config.batch_size)) for p in b_paths] + save_crop_imgs(batch_boxes, batch_points, xs, save_paths) + crop_paths.extend(save_paths) + save_path = os.path.join(config.output_path, 'onet.json') + os.makedirs(os.path.dirname(save_path), exist_ok=True) + dump_to_json(crop_paths, save_path) + + +def save_crop_imgs(batch_boxes, batch_points, img, save_path): + mtcnn = MTCNN( + image_size=160, margin=14, min_face_size=20, + thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True, + selection_method='center_weighted_size' + ) + boxes, probs, points = [], [], [] + for box, point in zip(batch_boxes, batch_points): + box = np.array(box) + point = np.array(point) + if len(box) == 0: + boxes.append(None) + probs.append([None]) + points.append(None) + elif mtcnn.select_largest: + box_order = np.argsort((box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]))[::-1] + box = box[box_order] + point = point[box_order] + boxes.append(box[:, :4]) + probs.append(box[:, 4]) + points.append(point) + else: + boxes.append(box[:, :4]) + probs.append(box[:, 4]) + points.append(point) + batch_boxes = np.array(boxes) + batch_probs = np.array(probs) + batch_points = np.array(points) + + batch_boxes, batch_probs, batch_points = mtcnn.select_boxes( + batch_boxes, batch_probs, batch_points, img, method=mtcnn.selection_method + ) + # Extract faces + faces = mtcnn.extract(img, batch_boxes, save_path) + return faces + + +def parser_args(): + pass + + +def build_config(arg): + pnet_config = { + 'net': 'pnet', + 'device_id': 1, + 'output_path': './data/output/split_bs' + str(arg.batch_size) + '/', + 'model_path': './weights/PNet_dynamic.om', + 'data_dir': './data/lfw', + 'num_workers': 8, + 'batch_size': arg.batch_size + } + rnet_config = { + 'net': 'rnet', + 'device_id': 1, + 'input_path': './data/output/split_bs' + str(arg.batch_size) + '/pnet.json', + 'output_path': './data/output/split_bs' + str(arg.batch_size) + '/', + 'model_path': './weights/RNet_dynamic.om', + 'data_dir': './data/lfw', + 'num_workers': 8, + 'batch_size': arg.batch_size + } + onet_config = { + 'net': 'onet', + 'device_id': 1, + 'input_path': './data/output/split_bs' + str(arg.batch_size) + '/rnet.json', + 'output_path': './data/output/split_bs' + str(arg.batch_size) + '/', + 'model_path': './weights/ONet_dynamic.om', + 'data_dir': './data/lfw', + 'num_workers': 8, + 'batch_size': arg.batch_size + } + return EasyDict(pnet_config), EasyDict(rnet_config), EasyDict(onet_config) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--model', type=str, help='[PNet/RNet/ONet]') + parser.add_argument('--data_dir', type=str, help='the absolute files path of lfw dataset') + parser.add_argument('--batch_size', type=int, help='[1/16]') + arg = parser.parse_args() + pnet_config, rnet_config, onet_config = build_config(arg) + if arg.model == 'Pnet': + config = pnet_config + process_pnet(config) + elif arg.model == 'Rnet': + config = rnet_config + process_rnet(config) + elif arg.model == 'Onet': + config = onet_config + process_onet(config) diff --git a/ACL_PyTorch/contrib/cv/face/FaceNet/MTCNN_pth2onnx.py b/ACL_PyTorch/contrib/cv/face/FaceNet/MTCNN_pth2onnx.py index 890b3f9073d603dea00d37d03cef79134df19fa3..b94604da8f6413f9d35a58eae7785290dbb406a4 100644 --- a/ACL_PyTorch/contrib/cv/face/FaceNet/MTCNN_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/face/FaceNet/MTCNN_pth2onnx.py @@ -1,57 +1,57 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -import argparse -sys.path.append('./models') -from mtcnn import PNet_truncated, RNet_truncated, ONet_truncated - - -def MTCNN_pth2onnx(opt): - if opt.model == 'PNet': - model = PNet_truncated() - elif opt.model == 'RNet': - model = RNet_truncated() - elif opt.model == 'ONet': - model = ONet_truncated() - else: - print("Error network") - return -1 - model = model.eval() - input_names = ['image'] - if opt.model == 'PNet': - output_names = ["probs", "reg"] - dynamic_axes = {'image': {0: '-1', 2: '-1', 3: '-1'}, 'probs': {0: '-1', 2: '-1', 3: '-1'}, - 'reg': {0: '-1', 2: '-1', 3: '-1'}} - dummy_input = torch.randn(1, 3, 1229, 1000) - elif opt.model == 'RNet': - output_names = ['regs', 'cls'] - dynamic_axes = {'image': {0: '-1'}, 'regs': {0: '-1'}, 'cls': {0: '-1'}} - dummy_input = torch.randn(20, 3, 24, 24) - else: - output_names = ['landmark', 'regs', 'cls'] - dynamic_axes = {'image': {0: '-1'}, 'landmark': {0: '-1'}, 'regs': {0: '-1'}, 'cls': {0: '-1'}} - dummy_input = torch.randn(20, 3, 48, 48) - - torch.onnx.export(model, dummy_input, opt.output_file, input_names=input_names, dynamic_axes=dynamic_axes, - output_names=output_names, verbose=True, opset_version=11) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--model', type=str, help='[PNet/RNet/ONet]') - parser.add_argument('--output_file', type=str, default='.', help='output path') - arg = parser.parse_args() - MTCNN_pth2onnx(arg) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import argparse +sys.path.append('./models') +from mtcnn import PNet_truncated, RNet_truncated, ONet_truncated + + +def MTCNN_pth2onnx(opt): + if opt.model == 'PNet': + model = PNet_truncated() + elif opt.model == 'RNet': + model = RNet_truncated() + elif opt.model == 'ONet': + model = ONet_truncated() + else: + print("Error network") + return -1 + model = model.eval() + input_names = ['image'] + if opt.model == 'PNet': + output_names = ["probs", "reg"] + dynamic_axes = {'image': {0: '-1', 2: '-1', 3: '-1'}, 'probs': {0: '-1', 2: '-1', 3: '-1'}, + 'reg': {0: '-1', 2: '-1', 3: '-1'}} + dummy_input = torch.randn(1, 3, 1229, 1000) + elif opt.model == 'RNet': + output_names = ['regs', 'cls'] + dynamic_axes = {'image': {0: '-1'}, 'regs': {0: '-1'}, 'cls': {0: '-1'}} + dummy_input = torch.randn(20, 3, 24, 24) + else: + output_names = ['landmark', 'regs', 'cls'] + dynamic_axes = {'image': {0: '-1'}, 'landmark': {0: '-1'}, 'regs': {0: '-1'}, 'cls': {0: '-1'}} + dummy_input = torch.randn(20, 3, 48, 48) + + torch.onnx.export(model, dummy_input, opt.output_file, input_names=input_names, dynamic_axes=dynamic_axes, + output_names=output_names, verbose=True, opset_version=11) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--model', type=str, help='[PNet/RNet/ONet]') + parser.add_argument('--output_file', type=str, default='.', help='output path') + arg = parser.parse_args() + MTCNN_pth2onnx(arg) diff --git a/ACL_PyTorch/contrib/cv/face/FaceNet/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/face/FaceNet/gen_dataset_info.py index b76d6739bcea5c528a031970f0e583e5b5644bd8..d5cab0450c20d502d0d15be2f9c0fceffa6a6191 100644 --- a/ACL_PyTorch/contrib/cv/face/FaceNet/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/face/FaceNet/gen_dataset_info.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/face/FaceNet/models/acl_net.py b/ACL_PyTorch/contrib/cv/face/FaceNet/models/acl_net.py index b3504ad01e3e4ac213756eee32dadd9c72e03e1d..9104c53cf90f8f2009ec94156e258abf2e942087 100644 --- a/ACL_PyTorch/contrib/cv/face/FaceNet/models/acl_net.py +++ b/ACL_PyTorch/contrib/cv/face/FaceNet/models/acl_net.py @@ -1,283 +1,283 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import acl - -# error code -ACL_ERROR_NONE = 0 - -# memory malloc code -ACL_MEM_MALLOC_HUGE_FIRST = 0 -ACL_MEM_MALLOC_HUGE_ONLY = 1 -ACL_MEM_MALLOC_NORMAL_ONLY = 2 - -# memory copy code -ACL_MEMCPY_HOST_TO_HOST = 0 -ACL_MEMCPY_HOST_TO_DEVICE = 1 -ACL_MEMCPY_DEVICE_TO_HOST = 2 -ACL_MEMCPY_DEVICE_TO_DEVICE = 3 - -# format -ACL_FORMAT_NCHW = 0 -ACL_FLOAT32 = 0 -ACL_DTYPE = { - 0: 'float32', - 1: 'float16', - 2: 'int8', - 3: 'int32', - 4: 'uint8', - 6: 'int16', - 7: 'uint16', - 8: 'uint32', - 9: 'int64', - 10: 'uint64', - 11: 'float64', - 12: 'bool', -} - - -def check_ret(message, ret): - if ret != ACL_ERROR_NONE: - raise Exception("{} failed ret = {}".format(message, ret)) - - -class Net(object): - def __init__(self, context, device_id, model_path, input_dtype=ACL_FLOAT32, output_dtype=ACL_FLOAT32): - self.device_id = device_id - self.model_path = model_path - self.model_id = None - self.context = context - self.buffer_method = { - "in": acl.mdl.get_input_size_by_index, - "out": acl.mdl.get_output_size_by_index, - "outhost": acl.mdl.get_output_size_by_index - } - - self.input_data = [] - self.output_data = [] - self.output_data_host = [] - self.model_desc = None - self.load_input_dataset = None - self.load_output_dataset = None - self.input_size = None - self.output_size = None - - self.input_dtype = ACL_FLOAT32 - self.output_dtype = ACL_FLOAT32 - - self._init_resource() - - def __call__(self, ori_data, out_size): - return self.forward(ori_data, out_size) - - def __del__(self): - ret = acl.mdl.unload(self.model_id) - check_ret("acl.mdl.unload", ret) - if self.model_desc: - acl.mdl.destroy_desc(self.model_desc) - self.model_desc = None - - def _release_data_buffer(self): - while self.input_data: - item = self.input_data.pop() - ret = acl.rt.free(item["buffer"]) - check_ret("acl.rt.free", ret) - - while self.output_data: - item = self.output_data.pop() - ret = acl.rt.free(item["buffer"]) - check_ret("acl.rt.free", ret) - - while self.output_data_host: - item = self.output_data_host.pop() - ret = acl.rt.free_host(item["buffer"]) - check_ret("acl.rt.free_host", ret) - - def _init_resource(self): - # load_model - self.model_id, ret = acl.mdl.load_from_file(self.model_path) - check_ret("acl.mdl.load_from_file", ret) - - self.model_desc = acl.mdl.create_desc() - self._get_model_info() - - def _get_model_info(self): - ret = acl.mdl.get_desc(self.model_desc, self.model_id) - check_ret("acl.mdl.get_desc", ret) - self.input_size = acl.mdl.get_num_inputs(self.model_desc) - self.output_size = acl.mdl.get_num_outputs(self.model_desc) - - def _gen_data_buffer(self, size, des, data=None, out_size_list=None): - func = self.buffer_method[des] - for i in range(size): - if out_size_list is None and data is None: - temp_buffer_size = func(self.model_desc, i) - else: - if des == "in": - input_size = np.prod(np.array(data).shape) - temp_buffer_size = Net.gen_data_size(input_size, dtype=ACL_DTYPE.get(self.input_dtype)) - elif des == "out": - out_size = out_size_list[i] - temp_buffer_size = Net.gen_data_size(out_size, dtype=ACL_DTYPE.get(self.output_dtype)) - - temp_buffer, ret = acl.rt.malloc(temp_buffer_size, ACL_MEM_MALLOC_HUGE_FIRST) - check_ret("acl.rt.malloc", ret) - - if des == "in": - self.input_data.append({"buffer": temp_buffer, - "size": temp_buffer_size}) - elif des == "out": - self.output_data.append({"buffer": temp_buffer, - "size": temp_buffer_size}) - - def _gen_dataset_output_host(self, size, des, out_size_list=None): - func = self.buffer_method[des] - for i in range(size): - if out_size_list is None: - temp_buffer_size = func(self.model_desc, i) - else: - out_size = out_size_list[i] - temp_buffer_size = Net.gen_data_size(out_size, dtype=ACL_DTYPE.get(self.output_dtype)) - temp_buffer, ret = acl.rt.malloc_host(temp_buffer_size) - check_ret("acl.rt.malloc_host", ret) - - self.output_data_host.append({"buffer": temp_buffer, - "size": temp_buffer_size}) - - def _data_interaction(self, dataset, policy=ACL_MEMCPY_HOST_TO_DEVICE): - temp_data_buffer = self.input_data \ - if policy == ACL_MEMCPY_HOST_TO_DEVICE \ - else self.output_data - - if len(dataset) == 0 and policy == ACL_MEMCPY_DEVICE_TO_HOST: - dataset = self.output_data_host - - for i, item in enumerate(temp_data_buffer): - if policy == ACL_MEMCPY_HOST_TO_DEVICE: - ptr, _ = acl.util.numpy_contiguous_to_ptr(dataset[i]) - ret = acl.rt.memcpy(item["buffer"], item["size"], ptr, item["size"], policy) - check_ret("acl.rt.memcpy", ret) - - else: - ptr = dataset[i]["buffer"] - ret = acl.rt.memcpy(ptr, item["size"], item["buffer"], item["size"], policy) - check_ret("acl.rt.memcpy", ret) - - def _gen_dataset(self, type_str="input", input_shapes=None): - dataset = acl.mdl.create_dataset() - temp_dataset = None - if type_str == "in": - self.load_input_dataset = dataset - temp_dataset = self.input_data - else: - self.load_output_dataset = dataset - temp_dataset = self.output_data - - for i, item in enumerate(temp_dataset): - data = acl.create_data_buffer(item["buffer"], item["size"]) - if data is None: - ret = acl.destroy_data_buffer(dataset) - check_ret("acl.destroy_data_buffer", ret) - - _, ret = acl.mdl.add_dataset_buffer(dataset, data) - if ret != ACL_ERROR_NONE: - ret = acl.destroy_data_buffer(dataset) - check_ret("acl.destroy_data_buffer", ret) - - if type_str == "in": - # set dynamic dataset tensor desc - input_shape = input_shapes[i] - input_desc = acl.create_tensor_desc(self.input_dtype, input_shape, ACL_FORMAT_NCHW) - dataset, ret = acl.mdl.set_dataset_tensor_desc(dataset, input_desc, i) - if ret != ACL_ERROR_NONE: - ret = acl.destroy_data_buffer(dataset) - check_ret("acl.destroy_data_buffer", ret) - - def _data_from_host_to_device(self, images): - self._data_interaction(images, ACL_MEMCPY_HOST_TO_DEVICE) - input_shapes = [list(data.shape) for data in images] - self._gen_dataset("in", input_shapes) - self._gen_dataset("out") - - def _data_from_device_to_host(self, input_data, out_size_list): - res = [] - self._data_interaction(res, ACL_MEMCPY_DEVICE_TO_HOST) - output = self.get_result(self.output_data_host, input_data, out_size_list) - return output - - def _destroy_databuffer(self): - for dataset in [self.load_input_dataset, self.load_output_dataset]: - if not dataset: - continue - num = acl.mdl.get_dataset_num_buffers(dataset) - for i in range(num): - data_buf = acl.mdl.get_dataset_buffer(dataset, i) - if data_buf: - ret = acl.destroy_data_buffer(data_buf) - check_ret("acl.destroy_data_buffer", ret) - ret = acl.mdl.destroy_dataset(dataset) - check_ret("acl.mdl.destroy_dataset", ret) - - def _prepare_data_buffer(self, input_data=None, out_size_list=None): - self._gen_data_buffer(self.input_size, des="in", data=input_data) - self._gen_data_buffer(self.output_size, des="out", out_size_list=out_size_list) - self._gen_dataset_output_host(self.output_size, des="outhost", out_size_list=out_size_list) - - def forward(self, input_data, out_size_list): - if not isinstance(input_data, (list, tuple)): - input_data = [input_data] - - self._prepare_data_buffer(input_data=input_data, out_size_list=out_size_list) - self._data_from_host_to_device(input_data) - ret = acl.mdl.execute(self.model_id, self.load_input_dataset, self.load_output_dataset) - check_ret("acl.mdl.execute", ret) - self._destroy_databuffer() - result = self._data_from_device_to_host(input_data=input_data, out_size_list=out_size_list) - self._release_data_buffer() - return result - - def get_result(self, output_data, data, out_size_list): - dataset = [] - batch_size = data[0].shape[0] - for i in range(len(output_data)): - dims, ret = acl.mdl.get_output_dims(self.model_desc, i) - check_ret("acl.mdl.get_output_dims", ret) - - data_shape = dims.get("dims") - # fix dynamic batch size - # data_shape[0] = batch_size - data_type = acl.mdl.get_output_data_type(self.model_desc, i) - # data_len = functools.reduce(lambda x, y: x * y, data_shape) - data_len = out_size_list[i] - ftype = np.dtype(ACL_DTYPE.get(data_type)) - - size = output_data[i]["size"] - ptr = output_data[i]["buffer"] - data = acl.util.ptr_to_numpy(ptr, (size,), 1) - np_array = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) - dataset.append(np_array) - return dataset - - @staticmethod - def gen_data_size(size, dtype): - dtype = np.dtype(dtype) - return int(size * dtype.itemsize) - - @staticmethod - def fix_static_shape(input_shape, idx, value): - if not isinstance(input_shape, list): - input_shape = list(input_shape) - input_shape[idx] = value - return input_shape +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import acl + +# error code +ACL_ERROR_NONE = 0 + +# memory malloc code +ACL_MEM_MALLOC_HUGE_FIRST = 0 +ACL_MEM_MALLOC_HUGE_ONLY = 1 +ACL_MEM_MALLOC_NORMAL_ONLY = 2 + +# memory copy code +ACL_MEMCPY_HOST_TO_HOST = 0 +ACL_MEMCPY_HOST_TO_DEVICE = 1 +ACL_MEMCPY_DEVICE_TO_HOST = 2 +ACL_MEMCPY_DEVICE_TO_DEVICE = 3 + +# format +ACL_FORMAT_NCHW = 0 +ACL_FLOAT32 = 0 +ACL_DTYPE = { + 0: 'float32', + 1: 'float16', + 2: 'int8', + 3: 'int32', + 4: 'uint8', + 6: 'int16', + 7: 'uint16', + 8: 'uint32', + 9: 'int64', + 10: 'uint64', + 11: 'float64', + 12: 'bool', +} + + +def check_ret(message, ret): + if ret != ACL_ERROR_NONE: + raise Exception("{} failed ret = {}".format(message, ret)) + + +class Net(object): + def __init__(self, context, device_id, model_path, input_dtype=ACL_FLOAT32, output_dtype=ACL_FLOAT32): + self.device_id = device_id + self.model_path = model_path + self.model_id = None + self.context = context + self.buffer_method = { + "in": acl.mdl.get_input_size_by_index, + "out": acl.mdl.get_output_size_by_index, + "outhost": acl.mdl.get_output_size_by_index + } + + self.input_data = [] + self.output_data = [] + self.output_data_host = [] + self.model_desc = None + self.load_input_dataset = None + self.load_output_dataset = None + self.input_size = None + self.output_size = None + + self.input_dtype = ACL_FLOAT32 + self.output_dtype = ACL_FLOAT32 + + self._init_resource() + + def __call__(self, ori_data, out_size): + return self.forward(ori_data, out_size) + + def __del__(self): + ret = acl.mdl.unload(self.model_id) + check_ret("acl.mdl.unload", ret) + if self.model_desc: + acl.mdl.destroy_desc(self.model_desc) + self.model_desc = None + + def _release_data_buffer(self): + while self.input_data: + item = self.input_data.pop() + ret = acl.rt.free(item["buffer"]) + check_ret("acl.rt.free", ret) + + while self.output_data: + item = self.output_data.pop() + ret = acl.rt.free(item["buffer"]) + check_ret("acl.rt.free", ret) + + while self.output_data_host: + item = self.output_data_host.pop() + ret = acl.rt.free_host(item["buffer"]) + check_ret("acl.rt.free_host", ret) + + def _init_resource(self): + # load_model + self.model_id, ret = acl.mdl.load_from_file(self.model_path) + check_ret("acl.mdl.load_from_file", ret) + + self.model_desc = acl.mdl.create_desc() + self._get_model_info() + + def _get_model_info(self): + ret = acl.mdl.get_desc(self.model_desc, self.model_id) + check_ret("acl.mdl.get_desc", ret) + self.input_size = acl.mdl.get_num_inputs(self.model_desc) + self.output_size = acl.mdl.get_num_outputs(self.model_desc) + + def _gen_data_buffer(self, size, des, data=None, out_size_list=None): + func = self.buffer_method[des] + for i in range(size): + if out_size_list is None and data is None: + temp_buffer_size = func(self.model_desc, i) + else: + if des == "in": + input_size = np.prod(np.array(data).shape) + temp_buffer_size = Net.gen_data_size(input_size, dtype=ACL_DTYPE.get(self.input_dtype)) + elif des == "out": + out_size = out_size_list[i] + temp_buffer_size = Net.gen_data_size(out_size, dtype=ACL_DTYPE.get(self.output_dtype)) + + temp_buffer, ret = acl.rt.malloc(temp_buffer_size, ACL_MEM_MALLOC_HUGE_FIRST) + check_ret("acl.rt.malloc", ret) + + if des == "in": + self.input_data.append({"buffer": temp_buffer, + "size": temp_buffer_size}) + elif des == "out": + self.output_data.append({"buffer": temp_buffer, + "size": temp_buffer_size}) + + def _gen_dataset_output_host(self, size, des, out_size_list=None): + func = self.buffer_method[des] + for i in range(size): + if out_size_list is None: + temp_buffer_size = func(self.model_desc, i) + else: + out_size = out_size_list[i] + temp_buffer_size = Net.gen_data_size(out_size, dtype=ACL_DTYPE.get(self.output_dtype)) + temp_buffer, ret = acl.rt.malloc_host(temp_buffer_size) + check_ret("acl.rt.malloc_host", ret) + + self.output_data_host.append({"buffer": temp_buffer, + "size": temp_buffer_size}) + + def _data_interaction(self, dataset, policy=ACL_MEMCPY_HOST_TO_DEVICE): + temp_data_buffer = self.input_data \ + if policy == ACL_MEMCPY_HOST_TO_DEVICE \ + else self.output_data + + if len(dataset) == 0 and policy == ACL_MEMCPY_DEVICE_TO_HOST: + dataset = self.output_data_host + + for i, item in enumerate(temp_data_buffer): + if policy == ACL_MEMCPY_HOST_TO_DEVICE: + ptr, _ = acl.util.numpy_contiguous_to_ptr(dataset[i]) + ret = acl.rt.memcpy(item["buffer"], item["size"], ptr, item["size"], policy) + check_ret("acl.rt.memcpy", ret) + + else: + ptr = dataset[i]["buffer"] + ret = acl.rt.memcpy(ptr, item["size"], item["buffer"], item["size"], policy) + check_ret("acl.rt.memcpy", ret) + + def _gen_dataset(self, type_str="input", input_shapes=None): + dataset = acl.mdl.create_dataset() + temp_dataset = None + if type_str == "in": + self.load_input_dataset = dataset + temp_dataset = self.input_data + else: + self.load_output_dataset = dataset + temp_dataset = self.output_data + + for i, item in enumerate(temp_dataset): + data = acl.create_data_buffer(item["buffer"], item["size"]) + if data is None: + ret = acl.destroy_data_buffer(dataset) + check_ret("acl.destroy_data_buffer", ret) + + _, ret = acl.mdl.add_dataset_buffer(dataset, data) + if ret != ACL_ERROR_NONE: + ret = acl.destroy_data_buffer(dataset) + check_ret("acl.destroy_data_buffer", ret) + + if type_str == "in": + # set dynamic dataset tensor desc + input_shape = input_shapes[i] + input_desc = acl.create_tensor_desc(self.input_dtype, input_shape, ACL_FORMAT_NCHW) + dataset, ret = acl.mdl.set_dataset_tensor_desc(dataset, input_desc, i) + if ret != ACL_ERROR_NONE: + ret = acl.destroy_data_buffer(dataset) + check_ret("acl.destroy_data_buffer", ret) + + def _data_from_host_to_device(self, images): + self._data_interaction(images, ACL_MEMCPY_HOST_TO_DEVICE) + input_shapes = [list(data.shape) for data in images] + self._gen_dataset("in", input_shapes) + self._gen_dataset("out") + + def _data_from_device_to_host(self, input_data, out_size_list): + res = [] + self._data_interaction(res, ACL_MEMCPY_DEVICE_TO_HOST) + output = self.get_result(self.output_data_host, input_data, out_size_list) + return output + + def _destroy_databuffer(self): + for dataset in [self.load_input_dataset, self.load_output_dataset]: + if not dataset: + continue + num = acl.mdl.get_dataset_num_buffers(dataset) + for i in range(num): + data_buf = acl.mdl.get_dataset_buffer(dataset, i) + if data_buf: + ret = acl.destroy_data_buffer(data_buf) + check_ret("acl.destroy_data_buffer", ret) + ret = acl.mdl.destroy_dataset(dataset) + check_ret("acl.mdl.destroy_dataset", ret) + + def _prepare_data_buffer(self, input_data=None, out_size_list=None): + self._gen_data_buffer(self.input_size, des="in", data=input_data) + self._gen_data_buffer(self.output_size, des="out", out_size_list=out_size_list) + self._gen_dataset_output_host(self.output_size, des="outhost", out_size_list=out_size_list) + + def forward(self, input_data, out_size_list): + if not isinstance(input_data, (list, tuple)): + input_data = [input_data] + + self._prepare_data_buffer(input_data=input_data, out_size_list=out_size_list) + self._data_from_host_to_device(input_data) + ret = acl.mdl.execute(self.model_id, self.load_input_dataset, self.load_output_dataset) + check_ret("acl.mdl.execute", ret) + self._destroy_databuffer() + result = self._data_from_device_to_host(input_data=input_data, out_size_list=out_size_list) + self._release_data_buffer() + return result + + def get_result(self, output_data, data, out_size_list): + dataset = [] + batch_size = data[0].shape[0] + for i in range(len(output_data)): + dims, ret = acl.mdl.get_output_dims(self.model_desc, i) + check_ret("acl.mdl.get_output_dims", ret) + + data_shape = dims.get("dims") + # fix dynamic batch size + # data_shape[0] = batch_size + data_type = acl.mdl.get_output_data_type(self.model_desc, i) + # data_len = functools.reduce(lambda x, y: x * y, data_shape) + data_len = out_size_list[i] + ftype = np.dtype(ACL_DTYPE.get(data_type)) + + size = output_data[i]["size"] + ptr = output_data[i]["buffer"] + data = acl.util.ptr_to_numpy(ptr, (size,), 1) + np_array = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) + dataset.append(np_array) + return dataset + + @staticmethod + def gen_data_size(size, dtype): + dtype = np.dtype(dtype) + return int(size * dtype.itemsize) + + @staticmethod + def fix_static_shape(input_shape, idx, value): + if not isinstance(input_shape, list): + input_shape = list(input_shape) + input_shape[idx] = value + return input_shape diff --git a/ACL_PyTorch/contrib/cv/face/FaceNet/models/mtcnn.patch b/ACL_PyTorch/contrib/cv/face/FaceNet/models/mtcnn.patch index f0c11301c300938cf6fc4f47b5158429eb040c89..3a974991ee38bfa330d5b179f4125061d1012cc5 100644 --- a/ACL_PyTorch/contrib/cv/face/FaceNet/models/mtcnn.patch +++ b/ACL_PyTorch/contrib/cv/face/FaceNet/models/mtcnn.patch @@ -1,243 +1,243 @@ 1,239c1,519 -< # Copyright 2021 Huawei Technologies Co., Ltd -< # -< # Licensed under the Apache License, Version 2.0 (the "License"); -< # you may not use this file except in compliance with the License. -< # You may obtain a copy of the License at -< # -< # http://www.apache.org/licenses/LICENSE-2.0 -< # -< # Unless required by applicable law or agreed to in writing, software -< # distributed under the License is distributed on an "AS IS" BASIS, -< # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -< # See the License for the specific language governing permissions and -< # limitations under the License. -< -< import torch -< from torch import nn -< import numpy as np -< import os -< import math -< import acl -< from abc import ABC, abstractmethod -< from acl_net import Net, check_ret -< -< -< class PNet_truncated(nn.Module): -< def __init__(self, pretrained=True): -< super().__init__() -< -< self.conv1 = nn.Conv2d(3, 10, kernel_size=3) -< self.prelu1 = nn.PReLU(10) -< self.pool1 = nn.MaxPool2d(2, 2, ceil_mode=True) -< self.conv2 = nn.Conv2d(10, 16, kernel_size=3) -< self.prelu2 = nn.PReLU(16) -< self.conv3 = nn.Conv2d(16, 32, kernel_size=3) -< self.prelu3 = nn.PReLU(32) -< self.conv4_1 = nn.Conv2d(32, 2, kernel_size=1) -< self.softmax4_1 = nn.Softmax(dim=1) -< self.conv4_2 = nn.Conv2d(32, 4, kernel_size=1) -< self.training = False -< -< if pretrained: -< state_dict_path=os.path.join(os.path.dirname(os.path.dirname(__file__)), "weights/pnet.pt") -< state_dict = torch.load(state_dict_path) -< self.load_state_dict(state_dict) -< -< def forward(self, x): -< x = self.conv1(x) -< x = self.prelu1(x) -< x = self.pool1(x) -< x = self.conv2(x) -< x = self.prelu2(x) -< x = self.conv3(x) -< x = self.prelu3(x) -< a = self.conv4_1(x) -< # a = self.softmax4_1(a) -< b = self.conv4_2(x) -< return b, a -< -< -< class RNet_truncated(nn.Module): -< def __init__(self, pretrained=True): -< super().__init__() -< -< self.conv1 = nn.Conv2d(3, 28, kernel_size=3) -< self.prelu1 = nn.PReLU(28) -< self.pool1 = nn.MaxPool2d(3, 2, ceil_mode=True) -< self.conv2 = nn.Conv2d(28, 48, kernel_size=3) -< self.prelu2 = nn.PReLU(48) -< self.pool2 = nn.MaxPool2d(3, 2, ceil_mode=True) -< self.conv3 = nn.Conv2d(48, 64, kernel_size=2) -< self.prelu3 = nn.PReLU(64) -< self.dense4 = nn.Linear(576, 128) -< self.prelu4 = nn.PReLU(128) -< self.dense5_1 = nn.Linear(128, 2) -< self.softmax5_1 = nn.Softmax(dim=1) -< self.dense5_2 = nn.Linear(128, 4) -< -< self.training = False -< -< if pretrained: -< state_dict_path=os.path.join(os.path.dirname(os.path.dirname(__file__)), "weights/rnet.pt") -< state_dict = torch.load(state_dict_path) -< self.load_state_dict(state_dict) -< -< def forward(self, x): -< x = self.conv1(x) -< x = self.prelu1(x) -< x = self.pool1(x) -< x = self.conv2(x) -< x = self.prelu2(x) -< x = self.pool2(x) -< x = self.conv3(x) -< x = self.prelu3(x) -< x = x.permute(0, 3, 2, 1).contiguous() -< x = self.dense4(x.view(x.shape[0], -1)) -< x = self.prelu4(x) -< a = self.dense5_1(x) -< # a = self.softmax5_1(a) -< b = self.dense5_2(x) -< return b, a -< -< -< class ONet_truncated(nn.Module): -< def __init__(self, pretrained=True): -< super().__init__() -< -< self.conv1 = nn.Conv2d(3, 32, kernel_size=3) -< self.prelu1 = nn.PReLU(32) -< self.pool1 = nn.MaxPool2d(3, 2, ceil_mode=True) -< self.conv2 = nn.Conv2d(32, 64, kernel_size=3) -< self.prelu2 = nn.PReLU(64) -< self.pool2 = nn.MaxPool2d(3, 2, ceil_mode=True) -< self.conv3 = nn.Conv2d(64, 64, kernel_size=3) -< self.prelu3 = nn.PReLU(64) -< self.pool3 = nn.MaxPool2d(2, 2, ceil_mode=True) -< self.conv4 = nn.Conv2d(64, 128, kernel_size=2) -< self.prelu4 = nn.PReLU(128) -< self.dense5 = nn.Linear(1152, 256) -< self.prelu5 = nn.PReLU(256) -< self.dense6_1 = nn.Linear(256, 2) -< self.softmax6_1 = nn.Softmax(dim=1) -< self.dense6_2 = nn.Linear(256, 4) -< self.dense6_3 = nn.Linear(256, 10) -< -< self.training = False -< -< if pretrained: -< state_dict_path=os.path.join(os.path.dirname(os.path.dirname(__file__)), "weights/onet.pt") -< state_dict = torch.load(state_dict_path) -< self.load_state_dict(state_dict) -< -< def forward(self, x): -< x = self.conv1(x) -< x = self.prelu1(x) -< x = self.pool1(x) -< x = self.conv2(x) -< x = self.prelu2(x) -< x = self.pool2(x) -< x = self.conv3(x) -< x = self.prelu3(x) -< x = self.pool3(x) -< x = self.conv4(x) -< x = self.prelu4(x) -< x = x.permute(0, 3, 2, 1).contiguous() -< x = self.dense5(x.view(x.shape[0], -1)) -< x = self.prelu5(x) -< a = self.dense6_1(x) -< # a = self.softmax6_1(a) -< b = self.dense6_2(x) -< c = self.dense6_3(x) -< return b, c, a -< -< -< class OMNet(ABC): -< def __init__(self, args): -< self.device_id = args.device_id -< # 默认float32 -< self.item_size = 4 -< -< # init for acl -< ret = acl.init() -< check_ret('acl.init', ret) -< ret = acl.rt.set_device(args.device_id) -< check_ret('acl.rt.set_device', ret) -< context, ret = acl.rt.create_context(args.device_id) -< check_ret('acl.rt.create_context', ret) -< self.net_context = Net(context, model_path=args.model_path, -< device_id=args.device_id) -< -< @abstractmethod -< def forward(self, input_data, ): -< pass -< -< def __del__(self): -< del self.net_context -< ret = acl.rt.reset_device(self.device_id) -< check_ret('acl.rt.reset_device', ret) -< -< context, ret = acl.rt.get_context() -< check_ret('acl.rt.get_context', ret) -< ret = acl.rt.destroy_context(context) -< check_ret('acl.rt.destory_context', ret) -< ret = acl.finalize() -< check_ret('acl.finalize', ret) -< -< -< class PNet(OMNet): -< def __init__(self, args): -< super().__init__(args) -< -< def forward(self, input_data): -< if isinstance(input_data, np.ndarray): -< input_data = [input_data] -< h, w = input_data[0].shape[2:4] -< batch_size = input_data[0].shape[0] -< out_h = math.ceil((h - 2) /2) - 4 -< out_w = math.ceil((w - 2) /2) - 4 -< out_size = [batch_size*4*out_h*out_w, batch_size*2*out_h*out_w] -< output_data = self.net_context(input_data, out_size) -< # postprocess: softmax && reshape -< output_data[1] = output_data[1].reshape([batch_size, 2, out_h, out_w]) -< output_data[1] = torch.softmax(torch.tensor(output_data[1]), dim=1).numpy() -< output_data[0] = output_data[0].reshape([batch_size, 4, out_h, out_w]) -< return output_data -< -< -< class RNet(OMNet): -< def __init__(self, args): -< super().__init__(args) -< -< def forward(self, input_data): -< if isinstance(input_data, np.ndarray): -< input_data = [input_data] -< batch_size = input_data[0].shape[0] -< out_size = [batch_size*4, batch_size*2] -< output_data = self.net_context(input_data, out_size) -< # postprocess: softmax && reshape -< output_data[0] = output_data[0].reshape([batch_size, 4]) -< output_data[1] = output_data[1].reshape([batch_size, 2]) -< output_data[1] = torch.softmax(torch.tensor(output_data[1]), dim=1).numpy() -< return output_data -< -< -< class ONet(OMNet): -< def __init__(self, args): -< super().__init__(args) -< -< def forward(self, input_data): -< if isinstance(input_data, np.ndarray): -< input_data = [input_data] -< batch_size = input_data[0].shape[0] -< out_size = [batch_size*4, batch_size*10, batch_size*2] -< output_data = self.net_context(input_data, out_size) -< # postprocess: softmax && reshape -< output_data[0] = output_data[0].reshape([batch_size, 4]) -< output_data[1] = output_data[1].reshape([batch_size, 10]) -< output_data[2] = output_data[2].reshape([batch_size, 2]) -< output_data[2] = torch.softmax(torch.tensor(output_data[2]), dim=1).numpy() -< return output_data +< # Copyright 2021 Huawei Technologies Co., Ltd +< # +< # Licensed under the Apache License, Version 2.0 (the "License"); +< # you may not use this file except in compliance with the License. +< # You may obtain a copy of the License at +< # +< # http://www.apache.org/licenses/LICENSE-2.0 +< # +< # Unless required by applicable law or agreed to in writing, software +< # distributed under the License is distributed on an "AS IS" BASIS, +< # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +< # See the License for the specific language governing permissions and +< # limitations under the License. +< +< import torch +< from torch import nn +< import numpy as np +< import os +< import math +< import acl +< from abc import ABC, abstractmethod +< from acl_net import Net, check_ret +< +< +< class PNet_truncated(nn.Module): +< def __init__(self, pretrained=True): +< super().__init__() +< +< self.conv1 = nn.Conv2d(3, 10, kernel_size=3) +< self.prelu1 = nn.PReLU(10) +< self.pool1 = nn.MaxPool2d(2, 2, ceil_mode=True) +< self.conv2 = nn.Conv2d(10, 16, kernel_size=3) +< self.prelu2 = nn.PReLU(16) +< self.conv3 = nn.Conv2d(16, 32, kernel_size=3) +< self.prelu3 = nn.PReLU(32) +< self.conv4_1 = nn.Conv2d(32, 2, kernel_size=1) +< self.softmax4_1 = nn.Softmax(dim=1) +< self.conv4_2 = nn.Conv2d(32, 4, kernel_size=1) +< self.training = False +< +< if pretrained: +< state_dict_path=os.path.join(os.path.dirname(os.path.dirname(__file__)), "weights/pnet.pt") +< state_dict = torch.load(state_dict_path) +< self.load_state_dict(state_dict) +< +< def forward(self, x): +< x = self.conv1(x) +< x = self.prelu1(x) +< x = self.pool1(x) +< x = self.conv2(x) +< x = self.prelu2(x) +< x = self.conv3(x) +< x = self.prelu3(x) +< a = self.conv4_1(x) +< # a = self.softmax4_1(a) +< b = self.conv4_2(x) +< return b, a +< +< +< class RNet_truncated(nn.Module): +< def __init__(self, pretrained=True): +< super().__init__() +< +< self.conv1 = nn.Conv2d(3, 28, kernel_size=3) +< self.prelu1 = nn.PReLU(28) +< self.pool1 = nn.MaxPool2d(3, 2, ceil_mode=True) +< self.conv2 = nn.Conv2d(28, 48, kernel_size=3) +< self.prelu2 = nn.PReLU(48) +< self.pool2 = nn.MaxPool2d(3, 2, ceil_mode=True) +< self.conv3 = nn.Conv2d(48, 64, kernel_size=2) +< self.prelu3 = nn.PReLU(64) +< self.dense4 = nn.Linear(576, 128) +< self.prelu4 = nn.PReLU(128) +< self.dense5_1 = nn.Linear(128, 2) +< self.softmax5_1 = nn.Softmax(dim=1) +< self.dense5_2 = nn.Linear(128, 4) +< +< self.training = False +< +< if pretrained: +< state_dict_path=os.path.join(os.path.dirname(os.path.dirname(__file__)), "weights/rnet.pt") +< state_dict = torch.load(state_dict_path) +< self.load_state_dict(state_dict) +< +< def forward(self, x): +< x = self.conv1(x) +< x = self.prelu1(x) +< x = self.pool1(x) +< x = self.conv2(x) +< x = self.prelu2(x) +< x = self.pool2(x) +< x = self.conv3(x) +< x = self.prelu3(x) +< x = x.permute(0, 3, 2, 1).contiguous() +< x = self.dense4(x.view(x.shape[0], -1)) +< x = self.prelu4(x) +< a = self.dense5_1(x) +< # a = self.softmax5_1(a) +< b = self.dense5_2(x) +< return b, a +< +< +< class ONet_truncated(nn.Module): +< def __init__(self, pretrained=True): +< super().__init__() +< +< self.conv1 = nn.Conv2d(3, 32, kernel_size=3) +< self.prelu1 = nn.PReLU(32) +< self.pool1 = nn.MaxPool2d(3, 2, ceil_mode=True) +< self.conv2 = nn.Conv2d(32, 64, kernel_size=3) +< self.prelu2 = nn.PReLU(64) +< self.pool2 = nn.MaxPool2d(3, 2, ceil_mode=True) +< self.conv3 = nn.Conv2d(64, 64, kernel_size=3) +< self.prelu3 = nn.PReLU(64) +< self.pool3 = nn.MaxPool2d(2, 2, ceil_mode=True) +< self.conv4 = nn.Conv2d(64, 128, kernel_size=2) +< self.prelu4 = nn.PReLU(128) +< self.dense5 = nn.Linear(1152, 256) +< self.prelu5 = nn.PReLU(256) +< self.dense6_1 = nn.Linear(256, 2) +< self.softmax6_1 = nn.Softmax(dim=1) +< self.dense6_2 = nn.Linear(256, 4) +< self.dense6_3 = nn.Linear(256, 10) +< +< self.training = False +< +< if pretrained: +< state_dict_path=os.path.join(os.path.dirname(os.path.dirname(__file__)), "weights/onet.pt") +< state_dict = torch.load(state_dict_path) +< self.load_state_dict(state_dict) +< +< def forward(self, x): +< x = self.conv1(x) +< x = self.prelu1(x) +< x = self.pool1(x) +< x = self.conv2(x) +< x = self.prelu2(x) +< x = self.pool2(x) +< x = self.conv3(x) +< x = self.prelu3(x) +< x = self.pool3(x) +< x = self.conv4(x) +< x = self.prelu4(x) +< x = x.permute(0, 3, 2, 1).contiguous() +< x = self.dense5(x.view(x.shape[0], -1)) +< x = self.prelu5(x) +< a = self.dense6_1(x) +< # a = self.softmax6_1(a) +< b = self.dense6_2(x) +< c = self.dense6_3(x) +< return b, c, a +< +< +< class OMNet(ABC): +< def __init__(self, args): +< self.device_id = args.device_id +< # 默认float32 +< self.item_size = 4 +< +< # init for acl +< ret = acl.init() +< check_ret('acl.init', ret) +< ret = acl.rt.set_device(args.device_id) +< check_ret('acl.rt.set_device', ret) +< context, ret = acl.rt.create_context(args.device_id) +< check_ret('acl.rt.create_context', ret) +< self.net_context = Net(context, model_path=args.model_path, +< device_id=args.device_id) +< +< @abstractmethod +< def forward(self, input_data, ): +< pass +< +< def __del__(self): +< del self.net_context +< ret = acl.rt.reset_device(self.device_id) +< check_ret('acl.rt.reset_device', ret) +< +< context, ret = acl.rt.get_context() +< check_ret('acl.rt.get_context', ret) +< ret = acl.rt.destroy_context(context) +< check_ret('acl.rt.destory_context', ret) +< ret = acl.finalize() +< check_ret('acl.finalize', ret) +< +< +< class PNet(OMNet): +< def __init__(self, args): +< super().__init__(args) +< +< def forward(self, input_data): +< if isinstance(input_data, np.ndarray): +< input_data = [input_data] +< h, w = input_data[0].shape[2:4] +< batch_size = input_data[0].shape[0] +< out_h = math.ceil((h - 2) /2) - 4 +< out_w = math.ceil((w - 2) /2) - 4 +< out_size = [batch_size*4*out_h*out_w, batch_size*2*out_h*out_w] +< output_data = self.net_context(input_data, out_size) +< # postprocess: softmax && reshape +< output_data[1] = output_data[1].reshape([batch_size, 2, out_h, out_w]) +< output_data[1] = torch.softmax(torch.tensor(output_data[1]), dim=1).numpy() +< output_data[0] = output_data[0].reshape([batch_size, 4, out_h, out_w]) +< return output_data +< +< +< class RNet(OMNet): +< def __init__(self, args): +< super().__init__(args) +< +< def forward(self, input_data): +< if isinstance(input_data, np.ndarray): +< input_data = [input_data] +< batch_size = input_data[0].shape[0] +< out_size = [batch_size*4, batch_size*2] +< output_data = self.net_context(input_data, out_size) +< # postprocess: softmax && reshape +< output_data[0] = output_data[0].reshape([batch_size, 4]) +< output_data[1] = output_data[1].reshape([batch_size, 2]) +< output_data[1] = torch.softmax(torch.tensor(output_data[1]), dim=1).numpy() +< return output_data +< +< +< class ONet(OMNet): +< def __init__(self, args): +< super().__init__(args) +< +< def forward(self, input_data): +< if isinstance(input_data, np.ndarray): +< input_data = [input_data] +< batch_size = input_data[0].shape[0] +< out_size = [batch_size*4, batch_size*10, batch_size*2] +< output_data = self.net_context(input_data, out_size) +< # postprocess: softmax && reshape +< output_data[0] = output_data[0].reshape([batch_size, 4]) +< output_data[1] = output_data[1].reshape([batch_size, 10]) +< output_data[2] = output_data[2].reshape([batch_size, 2]) +< output_data[2] = torch.softmax(torch.tensor(output_data[2]), dim=1).numpy() +< return output_data --- > import torch > from torch import nn diff --git a/ACL_PyTorch/contrib/cv/face/FaceNet/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/face/FaceNet/modelzoo_level.txt index 39175166a4b66dd0ead8d95dcb7ea49c56a868e3..119ddfc69182d1e11b6ce03723be060336966991 100644 --- a/ACL_PyTorch/contrib/cv/face/FaceNet/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/face/FaceNet/modelzoo_level.txt @@ -1,2 +1,2 @@ -ModelConvert:OK +ModelConvert:OK QuantStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/face/FaceNet/requirements.txt b/ACL_PyTorch/contrib/cv/face/FaceNet/requirements.txt index 8ff8888811ffbc725e4e497cdf2c15216daaabd5..475e994ace191b59b8e191a627a79d81fc88f683 100644 --- a/ACL_PyTorch/contrib/cv/face/FaceNet/requirements.txt +++ b/ACL_PyTorch/contrib/cv/face/FaceNet/requirements.txt @@ -1,11 +1,11 @@ -numpy==1.21.3 -pillow==8.4.0 -torch==1.10.0 -torchvision==0.11.1 -facenet_pytorch==2.5.2 -scikit-learn==1.0.1 -scipy==1.7.3 -tqdm==4.62.3 -easydict==1.9 -onnx==1.10.2 - +numpy==1.21.3 +pillow==8.4.0 +torch==1.10.0 +torchvision==0.11.1 +facenet_pytorch==2.5.2 +scikit-learn==1.0.1 +scipy==1.7.3 +tqdm==4.62.3 +easydict==1.9 +onnx==1.10.2 + diff --git a/ACL_PyTorch/contrib/cv/face/FaceNet/utils/batch_utils.py b/ACL_PyTorch/contrib/cv/face/FaceNet/utils/batch_utils.py index dccca4ccd9fa758a3738ca631845ffbe967f1b80..213eb9fabf4f586b6b92c7530fff6b688f4b3e2a 100644 --- a/ACL_PyTorch/contrib/cv/face/FaceNet/utils/batch_utils.py +++ b/ACL_PyTorch/contrib/cv/face/FaceNet/utils/batch_utils.py @@ -1,48 +1,48 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import numpy as np -import os -import argparse - - -def np2bin(save_np, save_name, save_path): - save_np.tofile(save_path + '/' + save_name + '_output_0.bin') - - -def bin2np(bin_path): - return np.fromfile(bin_path, dtype=np.float32) - - -def general_data(batch_size, data_root_path, save_root_path): - in_files = os.listdir(data_root_path) - for file_name in in_files: - file_index = file_name.split('_')[0] - bin_file = bin2np(data_root_path + '/' + file_name) - img_n = bin_file.shape[0] // 512 - bin_file = bin_file.reshape([img_n, 512]) - file_index_i = int(file_index) - for i in range(img_n): - if file_index_i * batch_size + i < 13233: - np2bin(bin_file[i], str(file_index_i * batch_size + i), save_root_path) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--batch_size', type=int, help='batch size') - parser.add_argument('--data_root_path', type=str, help='data path') - parser.add_argument('--save_root_path', type=str, help='save path') - arg = parser.parse_args() - general_data(arg.batch_size, arg.data_root_path, arg.save_root_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import numpy as np +import os +import argparse + + +def np2bin(save_np, save_name, save_path): + save_np.tofile(save_path + '/' + save_name + '_output_0.bin') + + +def bin2np(bin_path): + return np.fromfile(bin_path, dtype=np.float32) + + +def general_data(batch_size, data_root_path, save_root_path): + in_files = os.listdir(data_root_path) + for file_name in in_files: + file_index = file_name.split('_')[0] + bin_file = bin2np(data_root_path + '/' + file_name) + img_n = bin_file.shape[0] // 512 + bin_file = bin_file.reshape([img_n, 512]) + file_index_i = int(file_index) + for i in range(img_n): + if file_index_i * batch_size + i < 13233: + np2bin(bin_file[i], str(file_index_i * batch_size + i), save_root_path) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--batch_size', type=int, help='batch size') + parser.add_argument('--data_root_path', type=str, help='data path') + parser.add_argument('--save_root_path', type=str, help='save path') + arg = parser.parse_args() + general_data(arg.batch_size, arg.data_root_path, arg.save_root_path) diff --git a/ACL_PyTorch/contrib/cv/face/FaceNet/utils/fix_prelu.py b/ACL_PyTorch/contrib/cv/face/FaceNet/utils/fix_prelu.py index 6298e2bdc2b5640d1a05402f00ed053ceed90d99..b1f5bc239d19cc685bb7b4b899cd74b307b641a5 100644 --- a/ACL_PyTorch/contrib/cv/face/FaceNet/utils/fix_prelu.py +++ b/ACL_PyTorch/contrib/cv/face/FaceNet/utils/fix_prelu.py @@ -1,35 +1,35 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import sys -import numpy as np -from magiconnx import OnnxGraph - - -def fix_prelu(graph): - prelu_nodes = graph.get_nodes(op_type='PRelu') - for node in prelu_nodes: - slope_para = graph[node.inputs[1]] - fix_value = np.expand_dims(slope_para.value, axis=0) - slope_para.value = fix_value - return graph - - -if __name__ == '__main__': - input_model = sys.argv[1] - out_model = sys.argv[2] - onnx_graph = OnnxGraph(input_model) - onnx_graph = fix_prelu(onnx_graph) - onnx_graph.save(out_model) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import sys +import numpy as np +from magiconnx import OnnxGraph + + +def fix_prelu(graph): + prelu_nodes = graph.get_nodes(op_type='PRelu') + for node in prelu_nodes: + slope_para = graph[node.inputs[1]] + fix_value = np.expand_dims(slope_para.value, axis=0) + slope_para.value = fix_value + return graph + + +if __name__ == '__main__': + input_model = sys.argv[1] + out_model = sys.argv[2] + onnx_graph = OnnxGraph(input_model) + onnx_graph = fix_prelu(onnx_graph) + onnx_graph.save(out_model) diff --git a/ACL_PyTorch/contrib/cv/face/FaceNet/utils/gen_test_data.py b/ACL_PyTorch/contrib/cv/face/FaceNet/utils/gen_test_data.py index 85cd5a02599cc3e6dddc65e1a9f709fbb512e9f2..eac855cf4af764a675b6b4beb3e7ef7f325a4fd0 100644 --- a/ACL_PyTorch/contrib/cv/face/FaceNet/utils/gen_test_data.py +++ b/ACL_PyTorch/contrib/cv/face/FaceNet/utils/gen_test_data.py @@ -1,30 +1,30 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import os -import sys -import numpy as np - - -def gen_bin(save_name, data_shape): - data_bin = np.random.random(data_shape) - data_bin.tofile(os.path.join(save_name)) - - -if __name__ == '__main__': - save_path = sys.argv[1] - shape = sys.argv[2] - shape = list(map(int, shape.split(','))) - gen_bin(save_path, shape) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import sys +import numpy as np + + +def gen_bin(save_name, data_shape): + data_bin = np.random.random(data_shape) + data_bin.tofile(os.path.join(save_name)) + + +if __name__ == '__main__': + save_path = sys.argv[1] + shape = sys.argv[2] + shape = list(map(int, shape.split(','))) + gen_bin(save_path, shape) diff --git a/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/LICENSE b/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/LICENSE +++ b/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/postprocess_MGN.py b/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/postprocess_MGN.py index 7e0f6a4b7e2aaae353799cc3b3f6708d98d62f4f..3ffd64386d5c35c37a4adc4db093bbf47e2088bb 100644 --- a/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/postprocess_MGN.py +++ b/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/postprocess_MGN.py @@ -1,163 +1,163 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License.jj - -import os -import sys -import numpy as np -import torch -import argparse -from scipy.spatial.distance import cdist -from tqdm import tqdm -sys.path.append('./MGN') -from MGN.data import Data -from MGN.utils.metrics import mean_ap, cmc, re_ranking - - -def save_batch_imgs(save_file_name, dataset_type, loader, need_flip=False): - ind = 0 - for (inputs, labels) in loader: - if need_flip == True: - inputs = inputs.index_select(3, torch.arange(inputs.size(3) - 1, -1, -1)) - for i in range(len(inputs)): - img_name = dataset_type + '/' + "{:0>5d}".format(ind) - save_path = opt.data_path - if(opt.data_path[-1] != '/'): - save_path += '/' - save_path += save_file_name - inputs[i].numpy().tofile(save_path + '/' + img_name + '.bin') - ind += 1 - - -def extract_feature_om(prediction_file_path, prediction_file_path_flip): - # make the list of files first - file_names, file_names_flip = [], [] - for file_name in os.listdir(prediction_file_path): - suffix = file_name.split('_')[-1] - if suffix == '1.txt': - file_names.append(file_name) - file_names.sort() - print("first 5 txt files: \n",file_names[:10]) - for file_name in os.listdir(prediction_file_path_flip): - suffix = file_name.split('_')[-1] - if suffix == '1.txt': - file_names_flip.append(file_name) - file_names_flip.sort() - if len(file_names) != len(file_names_flip): - print('num of filp features doesnt match that of orig') - features = torch.FloatTensor() - for i in range(len(file_names)): - fea_path = os.path.join(prediction_file_path, file_names[i]) - fea_path_f = os.path.join(prediction_file_path_flip, file_names_flip[i]) - f1 = torch.from_numpy(np.loadtxt(fea_path, dtype=np.float32)) - f2 = torch.from_numpy(np.loadtxt(fea_path_f, dtype=np.float32)) - ff = f1 + f2 - ff = torch.unsqueeze(ff, 0) - fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) - ff = ff.div(fnorm.expand_as(ff)) - features = torch.cat((features, ff), 0) - if i < 8: - print(i, "th f1: \n", f1.shape, f1) - print(i, "th f2: \n", f2.shape, f2) - print(i, "th ff: \n", ff.shape, ff) - if i % 100 == 0: - print("the " + str(i) + "th image file is extracted.") - return features - - -class Main(): - def __init__(self, data): - self.train_loader = data.train_loader - self.test_loader = data.test_loader - self.query_loader = data.query_loader - self.testset = data.testset - self.queryset = data.queryset - - def evaluate_om(self): - query_prediction_file_path, query_prediction_file_path_flip = './result/q_bin/dumpOutput_device0/', \ - './result/q_bin_flip/dumpOutput_device0/' - gallery_prediction_file_path, gallery_prediction_file_path_flip = './result/g_bin/dumpOutput_device0/', \ - './result/g_bin_flip/dumpOutput_device0/' - print('extract features, this may take a few minutes') - qf = extract_feature_om(query_prediction_file_path, query_prediction_file_path_flip).numpy() - gf = extract_feature_om(gallery_prediction_file_path, gallery_prediction_file_path_flip).numpy() - print("shape of features, qf: " + str(qf.shape) + "gf: " + str(gf.shape)) - print("arr qf: \n", qf[:10, :10]) - print("arr gf: \n", gf[:10, :10]) - - def rank(dist): - r = cmc(dist, self.queryset.ids, self.testset.ids, self.queryset.cameras, self.testset.cameras, - separate_camera_set=False, - single_gallery_shot=False, - first_match_break=True) - m_ap = mean_ap(dist, self.queryset.ids, self.testset.ids, self.queryset.cameras, self.testset.cameras) - return r, m_ap - ######################### re rank########################## - q_g_dist = np.dot(qf, np.transpose(gf)) - q_q_dist = np.dot(qf, np.transpose(qf)) - g_g_dist = np.dot(gf, np.transpose(gf)) - dist = re_ranking(q_g_dist, q_q_dist, g_g_dist) - r, m_ap = rank(dist) - print('[With Re-Ranking] mAP: {:.4f} rank1: {:.4f} rank3: {:.4f} rank5: {:.4f} rank10: {:.4f}' - .format(m_ap, r[0], r[2], r[4], r[9])) - #########################no re rank########################## - dist = cdist(qf, gf) - r, m_ap = rank(dist) - print('[Without Re-Ranking] mAP: {:.4f} rank1: {:.4f} rank3: {:.4f} rank5: {:.4f} rank10: {:.4f}' - .format(m_ap, r[0], r[2], r[4], r[9])) - - def save_data(self): - save_file_name = 'bin_data' - save_file_name_flip = 'bin_data_flip' - print('saving images, this may take a few minutes') - save_batch_imgs(save_file_name, 'q', tqdm(self.query_loader)) - save_batch_imgs(save_file_name, 'g', tqdm(self.test_loader)) - save_batch_imgs(save_file_name_flip, 'q', tqdm(self.query_loader), need_flip=True) - save_batch_imgs(save_file_name_flip, 'g', tqdm(self.test_loader), need_flip=True) - - -def parse_func(): - parser = argparse.ArgumentParser() - parser.add_argument('--data_path', - default="Market-1501-v15.09.15", - help='path of Market-1501-v15.09.15') - parser.add_argument('--mode', - default='train', choices=['train', 'evaluate', 'evaluate_om', 'save_bin', 'vis'], - help='train or evaluate ') - parser.add_argument('--query_image', - default='0001_c1s1_001051_00.jpg', - help='path to the image you want to query') - parser.add_argument("--batchid", - default=4, - help='the batch for id') - parser.add_argument("--batchimage", - default=4, - help='the batch of per id') - parser.add_argument("--batchtest", - default=8, - help='the batch size for test') - return parser.parse_args() - - -if __name__ == '__main__': - opt = parse_func() - data = Data(opt) - main = Main(data) - if opt.mode == 'evaluate_om': - print('start evaluate om') - main.evaluate_om() - elif opt.mode == 'save_bin': - print('start evaluate') - main.save_data() - else: +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.jj + +import os +import sys +import numpy as np +import torch +import argparse +from scipy.spatial.distance import cdist +from tqdm import tqdm +sys.path.append('./MGN') +from MGN.data import Data +from MGN.utils.metrics import mean_ap, cmc, re_ranking + + +def save_batch_imgs(save_file_name, dataset_type, loader, need_flip=False): + ind = 0 + for (inputs, labels) in loader: + if need_flip == True: + inputs = inputs.index_select(3, torch.arange(inputs.size(3) - 1, -1, -1)) + for i in range(len(inputs)): + img_name = dataset_type + '/' + "{:0>5d}".format(ind) + save_path = opt.data_path + if(opt.data_path[-1] != '/'): + save_path += '/' + save_path += save_file_name + inputs[i].numpy().tofile(save_path + '/' + img_name + '.bin') + ind += 1 + + +def extract_feature_om(prediction_file_path, prediction_file_path_flip): + # make the list of files first + file_names, file_names_flip = [], [] + for file_name in os.listdir(prediction_file_path): + suffix = file_name.split('_')[-1] + if suffix == '1.txt': + file_names.append(file_name) + file_names.sort() + print("first 5 txt files: \n",file_names[:10]) + for file_name in os.listdir(prediction_file_path_flip): + suffix = file_name.split('_')[-1] + if suffix == '1.txt': + file_names_flip.append(file_name) + file_names_flip.sort() + if len(file_names) != len(file_names_flip): + print('num of filp features doesnt match that of orig') + features = torch.FloatTensor() + for i in range(len(file_names)): + fea_path = os.path.join(prediction_file_path, file_names[i]) + fea_path_f = os.path.join(prediction_file_path_flip, file_names_flip[i]) + f1 = torch.from_numpy(np.loadtxt(fea_path, dtype=np.float32)) + f2 = torch.from_numpy(np.loadtxt(fea_path_f, dtype=np.float32)) + ff = f1 + f2 + ff = torch.unsqueeze(ff, 0) + fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) + ff = ff.div(fnorm.expand_as(ff)) + features = torch.cat((features, ff), 0) + if i < 8: + print(i, "th f1: \n", f1.shape, f1) + print(i, "th f2: \n", f2.shape, f2) + print(i, "th ff: \n", ff.shape, ff) + if i % 100 == 0: + print("the " + str(i) + "th image file is extracted.") + return features + + +class Main(): + def __init__(self, data): + self.train_loader = data.train_loader + self.test_loader = data.test_loader + self.query_loader = data.query_loader + self.testset = data.testset + self.queryset = data.queryset + + def evaluate_om(self): + query_prediction_file_path, query_prediction_file_path_flip = './result/q_bin/dumpOutput_device0/', \ + './result/q_bin_flip/dumpOutput_device0/' + gallery_prediction_file_path, gallery_prediction_file_path_flip = './result/g_bin/dumpOutput_device0/', \ + './result/g_bin_flip/dumpOutput_device0/' + print('extract features, this may take a few minutes') + qf = extract_feature_om(query_prediction_file_path, query_prediction_file_path_flip).numpy() + gf = extract_feature_om(gallery_prediction_file_path, gallery_prediction_file_path_flip).numpy() + print("shape of features, qf: " + str(qf.shape) + "gf: " + str(gf.shape)) + print("arr qf: \n", qf[:10, :10]) + print("arr gf: \n", gf[:10, :10]) + + def rank(dist): + r = cmc(dist, self.queryset.ids, self.testset.ids, self.queryset.cameras, self.testset.cameras, + separate_camera_set=False, + single_gallery_shot=False, + first_match_break=True) + m_ap = mean_ap(dist, self.queryset.ids, self.testset.ids, self.queryset.cameras, self.testset.cameras) + return r, m_ap + ######################### re rank########################## + q_g_dist = np.dot(qf, np.transpose(gf)) + q_q_dist = np.dot(qf, np.transpose(qf)) + g_g_dist = np.dot(gf, np.transpose(gf)) + dist = re_ranking(q_g_dist, q_q_dist, g_g_dist) + r, m_ap = rank(dist) + print('[With Re-Ranking] mAP: {:.4f} rank1: {:.4f} rank3: {:.4f} rank5: {:.4f} rank10: {:.4f}' + .format(m_ap, r[0], r[2], r[4], r[9])) + #########################no re rank########################## + dist = cdist(qf, gf) + r, m_ap = rank(dist) + print('[Without Re-Ranking] mAP: {:.4f} rank1: {:.4f} rank3: {:.4f} rank5: {:.4f} rank10: {:.4f}' + .format(m_ap, r[0], r[2], r[4], r[9])) + + def save_data(self): + save_file_name = 'bin_data' + save_file_name_flip = 'bin_data_flip' + print('saving images, this may take a few minutes') + save_batch_imgs(save_file_name, 'q', tqdm(self.query_loader)) + save_batch_imgs(save_file_name, 'g', tqdm(self.test_loader)) + save_batch_imgs(save_file_name_flip, 'q', tqdm(self.query_loader), need_flip=True) + save_batch_imgs(save_file_name_flip, 'g', tqdm(self.test_loader), need_flip=True) + + +def parse_func(): + parser = argparse.ArgumentParser() + parser.add_argument('--data_path', + default="Market-1501-v15.09.15", + help='path of Market-1501-v15.09.15') + parser.add_argument('--mode', + default='train', choices=['train', 'evaluate', 'evaluate_om', 'save_bin', 'vis'], + help='train or evaluate ') + parser.add_argument('--query_image', + default='0001_c1s1_001051_00.jpg', + help='path to the image you want to query') + parser.add_argument("--batchid", + default=4, + help='the batch for id') + parser.add_argument("--batchimage", + default=4, + help='the batch of per id') + parser.add_argument("--batchtest", + default=8, + help='the batch size for test') + return parser.parse_args() + + +if __name__ == '__main__': + opt = parse_func() + data = Data(opt) + main = Main(data) + if opt.mode == 'evaluate_om': + print('start evaluate om') + main.evaluate_om() + elif opt.mode == 'save_bin': + print('start evaluate') + main.save_data() + else: raise NotImplementedError() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/preprocess_MGN.py b/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/preprocess_MGN.py index b76d6739bcea5c528a031970f0e583e5b5644bd8..d5cab0450c20d502d0d15be2f9c0fceffa6a6191 100644 --- a/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/preprocess_MGN.py +++ b/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/preprocess_MGN.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/pth2onnx.py b/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/pth2onnx.py index 9a550c9fe93d983a6fd92e5a4282b0a010da2bcb..9cf592a866cee79048bc34c03b252b518f55596b 100644 --- a/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/pth2onnx.py @@ -1,40 +1,40 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import torch -sys.path.append("./MGN") -from MGN.network import MGN -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - - -def pth2onnx(input_file, output_file, batch_size): - model = MGN() - model = model.to('cpu') - model.load_state_dict(torch.load(input_file, map_location=torch.device('cpu'))) - model.eval() - input_names = ["image"] - output_names = ["features"] - dynamic_axes = {'image': {0: '-1'}, 'features': {0: '-1'}} - dummy_input = torch.randn(batch_size, 3, 384, 128) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, - dynamic_axes = dynamic_axes, output_names = output_names, - opset_version=11, verbose=True) - print("***********************************Convert to ONNX model file SUCCESS!***" - "*******************************************") - - -if __name__ == '__main__': +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import torch +sys.path.append("./MGN") +from MGN.network import MGN +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +def pth2onnx(input_file, output_file, batch_size): + model = MGN() + model = model.to('cpu') + model.load_state_dict(torch.load(input_file, map_location=torch.device('cpu'))) + model.eval() + input_names = ["image"] + output_names = ["features"] + dynamic_axes = {'image': {0: '-1'}, 'features': {0: '-1'}} + dummy_input = torch.randn(batch_size, 3, 384, 128) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, + dynamic_axes = dynamic_axes, output_names = output_names, + opset_version=11, verbose=True) + print("***********************************Convert to ONNX model file SUCCESS!***" + "*******************************************") + + +if __name__ == '__main__': pth2onnx(sys.argv[1], sys.argv[2], int(sys.argv[3])) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/requirements.txt b/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/requirements.txt index 71bce80b95e60048b41a84f2fa5a6f59814087c9..7a28b67e6842ef317dfa0b09c5cfb0cd2db84342 100644 --- a/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/requirements.txt +++ b/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/requirements.txt @@ -1,4 +1,4 @@ -torchvision>=0.6.0 -onnx>=1.7.0 -torch==1.7.0 -albumentations == 1.0.0 +torchvision>=0.6.0 +onnx>=1.7.0 +torch==1.7.0 +albumentations == 1.0.0 diff --git a/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/test/README.md b/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/test/README.md index ca7c62c2cc89353020f6ec57718ccbed3beaef6e..6e58e8cb1409740eb067f283a02c30d59069ea3a 100644 --- a/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/test/README.md +++ b/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/test/README.md @@ -1,54 +1,54 @@ -环境准备: - -1.数据集路径 -- [Market1501数据集获取路径](https://pan.baidu.com/s/1ntIi2Op?_at_=1624593258681) -- 原始数据集已经放在/opt/npu/Market1501/下,应在./ReID-MGN-master/data/下新建Market1501目录,将/opt/npu/Market1501/下的文件拷贝到./ReID-MGN-master/data/Market1501下 -- ./data/Market1501/路径下,需要新建bin_data和bin_data_flip两个路径,bin_data和bin_data_flip两个路径下分别新建q和g两个路径 -- 需要新建model路径,预训练文件model.pt放在该路径下 -- 具体命令参考下文 - - -2.进入工作目录 -``` -cd /ReID-MGN-master -mkdir -p ./data/Market1501 -cp -r /opt/npu/Market1501/* ./data/Market1501/ -mkdir -p ./data/Market1501/bin_data/q -mkdir -p ./data/Market1501/bin_data/p -mkdir -p ./data/Market1501/bin_data_flip/q -mkdir -p ./data/Market1501/bin_data_flip/p -mkdir model -``` - -3.安装必要的依赖 -``` -pip3.7 install -r requirements.txt -``` - -4.获取模型代码 -``` -git clone https://github.com/GNAYUOHZ/ReID-MGN.git MGN -cd MGN && git checkout f0251e9e6003ec6f2c3fbc8ce5741d21436c20cf && cd - -patch -R MGN/data.py < module.patch -``` - -5.获取权重文件 -``` -(https://pan.baidu.com/s/12AkumLX10hLx9vh_SQwdyw) password:mrl5 -cp ${model.pt} ./model -``` - -6.获取benchmark工具 -``` -将benchmark.x86_64放在当前目录 -``` - -7.310上执行,执行时确保device空闲 -``` -source env.sh -apt install dos2unix -dos2unix test/pth2om.sh -bash test/pth2om.sh -dos2unix test/eval_acc_perf.sh -bash test/eval_acc_perf.sh -``` +环境准备: + +1.数据集路径 +- [Market1501数据集获取路径](https://pan.baidu.com/s/1ntIi2Op?_at_=1624593258681) +- 原始数据集已经放在/opt/npu/Market1501/下,应在./ReID-MGN-master/data/下新建Market1501目录,将/opt/npu/Market1501/下的文件拷贝到./ReID-MGN-master/data/Market1501下 +- ./data/Market1501/路径下,需要新建bin_data和bin_data_flip两个路径,bin_data和bin_data_flip两个路径下分别新建q和g两个路径 +- 需要新建model路径,预训练文件model.pt放在该路径下 +- 具体命令参考下文 + + +2.进入工作目录 +``` +cd /ReID-MGN-master +mkdir -p ./data/Market1501 +cp -r /opt/npu/Market1501/* ./data/Market1501/ +mkdir -p ./data/Market1501/bin_data/q +mkdir -p ./data/Market1501/bin_data/p +mkdir -p ./data/Market1501/bin_data_flip/q +mkdir -p ./data/Market1501/bin_data_flip/p +mkdir model +``` + +3.安装必要的依赖 +``` +pip3.7 install -r requirements.txt +``` + +4.获取模型代码 +``` +git clone https://github.com/GNAYUOHZ/ReID-MGN.git MGN +cd MGN && git checkout f0251e9e6003ec6f2c3fbc8ce5741d21436c20cf && cd - +patch -R MGN/data.py < module.patch +``` + +5.获取权重文件 +``` +(https://pan.baidu.com/s/12AkumLX10hLx9vh_SQwdyw) password:mrl5 +cp ${model.pt} ./model +``` + +6.获取benchmark工具 +``` +将benchmark.x86_64放在当前目录 +``` + +7.310上执行,执行时确保device空闲 +``` +source env.sh +apt install dos2unix +dos2unix test/pth2om.sh +bash test/pth2om.sh +dos2unix test/eval_acc_perf.sh +bash test/eval_acc_perf.sh +``` diff --git a/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/test/parse.py b/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/test/parse.py index b9c74f41d7848e1250356f14472b237a18bb3489..82af69cd183218c3263723c20b652b3f7ec2bc27 100644 --- a/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/test/parse.py +++ b/ACL_PyTorch/contrib/cv/face/ReId-MGN-master/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/face/Retinaface/aipp.cfg b/ACL_PyTorch/contrib/cv/face/Retinaface/aipp.cfg index 57715b3180c177204da7ebbf612fa2564aff3453..a19ab6ac2d74bab64e4d80d969b3b342d25f27b1 100644 --- a/ACL_PyTorch/contrib/cv/face/Retinaface/aipp.cfg +++ b/ACL_PyTorch/contrib/cv/face/Retinaface/aipp.cfg @@ -1,15 +1,15 @@ -aipp_op { -aipp_mode: static - -input_format: RGB888_U8 -src_image_size_w: 1000 -src_image_size_h: 1000 - -mean_chn_0: 104 -mean_chn_1: 117 -mean_chn_2: 123 - -var_reci_chn_0: 1 -var_reci_chn_1: 1 -var_reci_chn_2: 1 +aipp_op { +aipp_mode: static + +input_format: RGB888_U8 +src_image_size_w: 1000 +src_image_size_h: 1000 + +mean_chn_0: 104 +mean_chn_1: 117 +mean_chn_2: 123 + +var_reci_chn_0: 1 +var_reci_chn_1: 1 +var_reci_chn_2: 1 } \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/LICENSE b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/LICENSE +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/PCB_pth_postprocess.py b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/PCB_pth_postprocess.py index adc013a082d39bfaee5b880fe239b9e32d03c924..873e88ad7665a6f90644c991a65bd450008fa403 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/PCB_pth_postprocess.py +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/PCB_pth_postprocess.py @@ -1,264 +1,264 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os.path as osp -import os -import numpy as np -import torch -import datasets -from sklearn.metrics import average_precision_score -from collections import OrderedDict -from collections import defaultdict -import json - - -def get_data(name, data_dir): - root = osp.join(data_dir, name) - root = data_dir - dataset = datasets.create(name, root) - return dataset - - -def pairwise_distance(query_features, gallery_features, query=None, gallery=None): - x = torch.cat([query_features[f].unsqueeze(0) for f, _, _ in query], 0) - y = torch.cat([gallery_features[f].unsqueeze(0) for f, _, _ in gallery], 0) - m, n = x.size(0), y.size(0) - x = x.view(m, -1) - y = y.view(n, -1) - dist = torch.pow(x, 2).sum(1).unsqueeze(1).expand(m, n) + \ - torch.pow(y, 2).sum(1).unsqueeze(1).expand(n, m).t() - dist.addmm_(1, -2, x, y.t()) - return dist - - -def _unique_sample(ids_dict, num): - mask = np.zeros(num, dtype=np.bool) - for _, indices in ids_dict.items(): - i = np.random.choice(indices) - mask[i] = True - return mask - - -def cmc(distmat, query_ids=None, gallery_ids=None, - query_cams=None, gallery_cams=None, topk=100, - separate_camera_set=False, - single_gallery_shot=False, - first_match_break=False): - distmat = to_numpy(distmat) - m, n = distmat.shape - # Fill up default values - if query_ids is None: - query_ids = np.arange(m) - if gallery_ids is None: - gallery_ids = np.arange(n) - if query_cams is None: - query_cams = np.zeros(m).astype(np.int32) - if gallery_cams is None: - gallery_cams = np.ones(n).astype(np.int32) - # Ensure numpy array - query_ids = np.asarray(query_ids) - gallery_ids = np.asarray(gallery_ids) - query_cams = np.asarray(query_cams) - gallery_cams = np.asarray(gallery_cams) - # Sort and find correct matches - indices = np.argsort(distmat, axis=1) - matches = (gallery_ids[indices] == query_ids[:, np.newaxis]) - # Compute CMC for each query - ret = np.zeros(topk) - num_valid_queries = 0 - for i in range(m): - # Filter out the same id and same camera - valid = ((gallery_ids[indices[i]] != query_ids[i]) | - (gallery_cams[indices[i]] != query_cams[i])) - if separate_camera_set: - # Filter out samples from same camera - valid &= (gallery_cams[indices[i]] != query_cams[i]) - if not np.any(matches[i, valid]): continue - if single_gallery_shot: - repeat = 10 - gids = gallery_ids[indices[i][valid]] - inds = np.where(valid)[0] - ids_dict = defaultdict(list) - for j, x in zip(inds, gids): - ids_dict[x].append(j) - else: - repeat = 1 - for _ in range(repeat): - if single_gallery_shot: - # Randomly choose one instance for each id - sampled = (valid & _unique_sample(ids_dict, len(valid))) - index = np.nonzero(matches[i, sampled])[0] - else: - index = np.nonzero(matches[i, valid])[0] - delta = 1. / (len(index) * repeat) - for j, k in enumerate(index): - if k - j >= topk: break - if first_match_break: - ret[k - j] += 1 - break - ret[k - j] += delta - num_valid_queries += 1 - if num_valid_queries == 0: - raise RuntimeError("No valid query") - return ret.cumsum() / num_valid_queries - - -def to_numpy(tensor): - if torch.is_tensor(tensor): - return tensor.cpu().numpy() - elif type(tensor).__module__ != 'numpy': - raise ValueError("Cannot convert {} to numpy array" - .format(type(tensor))) - return tensor - - -def mean_ap(distmat, query_ids=None, gallery_ids=None, - query_cams=None, gallery_cams=None): - distmat = to_numpy(distmat) - m, n = distmat.shape - # Fill up default values - if query_ids is None: - query_ids = np.arange(m) - if gallery_ids is None: - gallery_ids = np.arange(n) - if query_cams is None: - query_cams = np.zeros(m).astype(np.int32) - if gallery_cams is None: - gallery_cams = np.ones(n).astype(np.int32) - # Ensure numpy array - query_ids = np.asarray(query_ids) - gallery_ids = np.asarray(gallery_ids) - query_cams = np.asarray(query_cams) - gallery_cams = np.asarray(gallery_cams) - # Sort and find correct matches - indices = np.argsort(distmat, axis=1) - matches = (gallery_ids[indices] == query_ids[:, np.newaxis]) - # Compute AP for each query - aps = [] - for i in range(m): - # Filter out the same id and same camera - valid = ((gallery_ids[indices[i]] != query_ids[i]) | - (gallery_cams[indices[i]] != query_cams[i])) - y_true = matches[i, valid] - y_score = -distmat[i][indices[i]][valid] - if not np.any(y_true): continue - aps.append(average_precision_score(y_true, y_score)) - if len(aps) == 0: - raise RuntimeError("No valid query") - return np.mean(aps) - - -def evaluate_all(distmat, query=None, gallery=None, - query_ids=None, gallery_ids=None, - query_cams=None, gallery_cams=None, - cmc_topk=(1, 5, 10)): - if query is not None and gallery is not None: - query_ids = [pid for _, pid, _ in query] - gallery_ids = [pid for _, pid, _ in gallery] - query_cams = [cam for _, _, cam in query] - gallery_cams = [cam for _, _, cam in gallery] - else: - assert (query_ids is not None and gallery_ids is not None - and query_cams is not None and gallery_cams is not None) - # Compute mean AP - mAP = mean_ap(distmat, query_ids, gallery_ids, query_cams, gallery_cams) - print('Mean AP: {:4.1%}'.format(mAP)) - # Compute all kinds of CMC scores - cmc_configs = { - 'allshots': dict(separate_camera_set=False, - single_gallery_shot=False, - first_match_break=False), - 'cuhk03': dict(separate_camera_set=True, - single_gallery_shot=True, - first_match_break=False), - 'market1501': dict(separate_camera_set=False, - single_gallery_shot=False, - first_match_break=True)} - cmc_scores = {name: cmc(distmat, query_ids, gallery_ids, - query_cams, gallery_cams, **params) - for name, params in cmc_configs.items()} - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - table_dict["value"].extend( - [{"key": "Number of images", "value": str(15913)}, - {"key": "Number of classes", "value": str(751)}]) - for k in cmc_topk: - table_dict["value"].append({"key": "Top-" + str(k) + " accuracy", - "value": str('{:.1%}'.format(cmc_scores['market1501'][k - 1]))}) - print('CMC Scores{:>12}' - .format('market1501')) - for k in cmc_topk: - print(' top-{:<4}{:12.1%}' - .format(k, cmc_scores['market1501'][k - 1])) - - print(table_dict) - writer = open('PCB_inference_result.json', 'w') - json.dump(table_dict, writer) - writer.close() - # Use the allshots cmc top-1 score for validation criterion - return cmc_scores['allshots'][0] - - -def load_result(filepath): - count = 0 - features = OrderedDict() - for root, dirs, files in os.walk(filepath): - for file in files: - file_tmp = file.split('.', 2)[0] - list_file = file_tmp.split('_') - if list_file[4] == '1': - file = filepath + '/' + file - output = np.fromfile(file, dtype='float32') - output = torch.from_numpy(output) - output = output.reshape(2048, 6, 1) - filename = list_file[0] + '_' + list_file[1] + '_' + list_file[2] + '_' + list_file[3] + '.jpg' - if list_file[0] == '1488' or filename == '0000_c6s3_094992_01.jpg' \ - or filename == '0000_c4s6_022316_04.jpg' or filename == '0000_c1s6_023071_04.jpg': - filename = filename + '.jpg' - features[filename] = output - count = count + 1 - return features - - -def evaluate_Ascend310(query_filepath, gallery_filepath, query, gallery): - print('extracting query features\n') - query_features_0 = load_result(query_filepath) - print('extracting gallery features\n') - gallery_features_0 = load_result(gallery_filepath) - distmat = pairwise_distance(query_features_0, gallery_features_0, query, gallery) - return evaluate_all(distmat, query=query, gallery=gallery) - - -def main(args): - dataset = get_data(args.dataset, args.data_dir) - evaluate_Ascend310(args.query, args.gallery, dataset.query, dataset.gallery) - return - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description="Softmax loss classification") - - parser.add_argument('-q', '--query', type=str, default='./dumpOutput_device0_query') - - parser.add_argument('-g', '--gallery', type=str, default='./dumpOutput_device0_gallery') - parser.add_argument('-d', '--dataset', type=str, default='market', - choices=datasets.names()) - - parser.add_argument('--data-dir', type=str, metavar='PATH', - default='./datasets/Market-1501/') - - main(parser.parse_args()) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os.path as osp +import os +import numpy as np +import torch +import datasets +from sklearn.metrics import average_precision_score +from collections import OrderedDict +from collections import defaultdict +import json + + +def get_data(name, data_dir): + root = osp.join(data_dir, name) + root = data_dir + dataset = datasets.create(name, root) + return dataset + + +def pairwise_distance(query_features, gallery_features, query=None, gallery=None): + x = torch.cat([query_features[f].unsqueeze(0) for f, _, _ in query], 0) + y = torch.cat([gallery_features[f].unsqueeze(0) for f, _, _ in gallery], 0) + m, n = x.size(0), y.size(0) + x = x.view(m, -1) + y = y.view(n, -1) + dist = torch.pow(x, 2).sum(1).unsqueeze(1).expand(m, n) + \ + torch.pow(y, 2).sum(1).unsqueeze(1).expand(n, m).t() + dist.addmm_(1, -2, x, y.t()) + return dist + + +def _unique_sample(ids_dict, num): + mask = np.zeros(num, dtype=np.bool) + for _, indices in ids_dict.items(): + i = np.random.choice(indices) + mask[i] = True + return mask + + +def cmc(distmat, query_ids=None, gallery_ids=None, + query_cams=None, gallery_cams=None, topk=100, + separate_camera_set=False, + single_gallery_shot=False, + first_match_break=False): + distmat = to_numpy(distmat) + m, n = distmat.shape + # Fill up default values + if query_ids is None: + query_ids = np.arange(m) + if gallery_ids is None: + gallery_ids = np.arange(n) + if query_cams is None: + query_cams = np.zeros(m).astype(np.int32) + if gallery_cams is None: + gallery_cams = np.ones(n).astype(np.int32) + # Ensure numpy array + query_ids = np.asarray(query_ids) + gallery_ids = np.asarray(gallery_ids) + query_cams = np.asarray(query_cams) + gallery_cams = np.asarray(gallery_cams) + # Sort and find correct matches + indices = np.argsort(distmat, axis=1) + matches = (gallery_ids[indices] == query_ids[:, np.newaxis]) + # Compute CMC for each query + ret = np.zeros(topk) + num_valid_queries = 0 + for i in range(m): + # Filter out the same id and same camera + valid = ((gallery_ids[indices[i]] != query_ids[i]) | + (gallery_cams[indices[i]] != query_cams[i])) + if separate_camera_set: + # Filter out samples from same camera + valid &= (gallery_cams[indices[i]] != query_cams[i]) + if not np.any(matches[i, valid]): continue + if single_gallery_shot: + repeat = 10 + gids = gallery_ids[indices[i][valid]] + inds = np.where(valid)[0] + ids_dict = defaultdict(list) + for j, x in zip(inds, gids): + ids_dict[x].append(j) + else: + repeat = 1 + for _ in range(repeat): + if single_gallery_shot: + # Randomly choose one instance for each id + sampled = (valid & _unique_sample(ids_dict, len(valid))) + index = np.nonzero(matches[i, sampled])[0] + else: + index = np.nonzero(matches[i, valid])[0] + delta = 1. / (len(index) * repeat) + for j, k in enumerate(index): + if k - j >= topk: break + if first_match_break: + ret[k - j] += 1 + break + ret[k - j] += delta + num_valid_queries += 1 + if num_valid_queries == 0: + raise RuntimeError("No valid query") + return ret.cumsum() / num_valid_queries + + +def to_numpy(tensor): + if torch.is_tensor(tensor): + return tensor.cpu().numpy() + elif type(tensor).__module__ != 'numpy': + raise ValueError("Cannot convert {} to numpy array" + .format(type(tensor))) + return tensor + + +def mean_ap(distmat, query_ids=None, gallery_ids=None, + query_cams=None, gallery_cams=None): + distmat = to_numpy(distmat) + m, n = distmat.shape + # Fill up default values + if query_ids is None: + query_ids = np.arange(m) + if gallery_ids is None: + gallery_ids = np.arange(n) + if query_cams is None: + query_cams = np.zeros(m).astype(np.int32) + if gallery_cams is None: + gallery_cams = np.ones(n).astype(np.int32) + # Ensure numpy array + query_ids = np.asarray(query_ids) + gallery_ids = np.asarray(gallery_ids) + query_cams = np.asarray(query_cams) + gallery_cams = np.asarray(gallery_cams) + # Sort and find correct matches + indices = np.argsort(distmat, axis=1) + matches = (gallery_ids[indices] == query_ids[:, np.newaxis]) + # Compute AP for each query + aps = [] + for i in range(m): + # Filter out the same id and same camera + valid = ((gallery_ids[indices[i]] != query_ids[i]) | + (gallery_cams[indices[i]] != query_cams[i])) + y_true = matches[i, valid] + y_score = -distmat[i][indices[i]][valid] + if not np.any(y_true): continue + aps.append(average_precision_score(y_true, y_score)) + if len(aps) == 0: + raise RuntimeError("No valid query") + return np.mean(aps) + + +def evaluate_all(distmat, query=None, gallery=None, + query_ids=None, gallery_ids=None, + query_cams=None, gallery_cams=None, + cmc_topk=(1, 5, 10)): + if query is not None and gallery is not None: + query_ids = [pid for _, pid, _ in query] + gallery_ids = [pid for _, pid, _ in gallery] + query_cams = [cam for _, _, cam in query] + gallery_cams = [cam for _, _, cam in gallery] + else: + assert (query_ids is not None and gallery_ids is not None + and query_cams is not None and gallery_cams is not None) + # Compute mean AP + mAP = mean_ap(distmat, query_ids, gallery_ids, query_cams, gallery_cams) + print('Mean AP: {:4.1%}'.format(mAP)) + # Compute all kinds of CMC scores + cmc_configs = { + 'allshots': dict(separate_camera_set=False, + single_gallery_shot=False, + first_match_break=False), + 'cuhk03': dict(separate_camera_set=True, + single_gallery_shot=True, + first_match_break=False), + 'market1501': dict(separate_camera_set=False, + single_gallery_shot=False, + first_match_break=True)} + cmc_scores = {name: cmc(distmat, query_ids, gallery_ids, + query_cams, gallery_cams, **params) + for name, params in cmc_configs.items()} + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + table_dict["value"].extend( + [{"key": "Number of images", "value": str(15913)}, + {"key": "Number of classes", "value": str(751)}]) + for k in cmc_topk: + table_dict["value"].append({"key": "Top-" + str(k) + " accuracy", + "value": str('{:.1%}'.format(cmc_scores['market1501'][k - 1]))}) + print('CMC Scores{:>12}' + .format('market1501')) + for k in cmc_topk: + print(' top-{:<4}{:12.1%}' + .format(k, cmc_scores['market1501'][k - 1])) + + print(table_dict) + writer = open('PCB_inference_result.json', 'w') + json.dump(table_dict, writer) + writer.close() + # Use the allshots cmc top-1 score for validation criterion + return cmc_scores['allshots'][0] + + +def load_result(filepath): + count = 0 + features = OrderedDict() + for root, dirs, files in os.walk(filepath): + for file in files: + file_tmp = file.split('.', 2)[0] + list_file = file_tmp.split('_') + if list_file[4] == '1': + file = filepath + '/' + file + output = np.fromfile(file, dtype='float32') + output = torch.from_numpy(output) + output = output.reshape(2048, 6, 1) + filename = list_file[0] + '_' + list_file[1] + '_' + list_file[2] + '_' + list_file[3] + '.jpg' + if list_file[0] == '1488' or filename == '0000_c6s3_094992_01.jpg' \ + or filename == '0000_c4s6_022316_04.jpg' or filename == '0000_c1s6_023071_04.jpg': + filename = filename + '.jpg' + features[filename] = output + count = count + 1 + return features + + +def evaluate_Ascend310(query_filepath, gallery_filepath, query, gallery): + print('extracting query features\n') + query_features_0 = load_result(query_filepath) + print('extracting gallery features\n') + gallery_features_0 = load_result(gallery_filepath) + distmat = pairwise_distance(query_features_0, gallery_features_0, query, gallery) + return evaluate_all(distmat, query=query, gallery=gallery) + + +def main(args): + dataset = get_data(args.dataset, args.data_dir) + evaluate_Ascend310(args.query, args.gallery, dataset.query, dataset.gallery) + return + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Softmax loss classification") + + parser.add_argument('-q', '--query', type=str, default='./dumpOutput_device0_query') + + parser.add_argument('-g', '--gallery', type=str, default='./dumpOutput_device0_gallery') + parser.add_argument('-d', '--dataset', type=str, default='market', + choices=datasets.names()) + + parser.add_argument('--data-dir', type=str, metavar='PATH', + default='./datasets/Market-1501/') + + main(parser.parse_args()) diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/PCB_pth_preprocess.py b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/PCB_pth_preprocess.py index b1ccc34faf0a22ff64a155f9f5d5144ae12545a4..26f765fd6ec01428a8f3d357e6e8f65c4fc49239 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/PCB_pth_preprocess.py +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/PCB_pth_preprocess.py @@ -1,158 +1,158 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os.path as osp -import os -import numpy as np -from torch.utils.data import DataLoader -import datasets -from torchvision.transforms import Normalize, RandomHorizontalFlip, ToTensor, Compose -from PIL import Image - - -class Preprocessor(object): - def __init__(self, dataset, root=None, transform=None): - super(Preprocessor, self).__init__() - self.dataset = dataset - self.root = root - self.transform = transform - - def __len__(self): - return len(self.dataset) - - def __getitem__(self, indices): - if isinstance(indices, (tuple, list)): - return [self._get_single_item(index) for index in indices] - return self._get_single_item(indices) - - def _get_single_item(self, index): - fname, pid, camid = self.dataset[index] - fpath = fname - if self.root is not None: - fpath = osp.join(self.root, fname) - img = Image.open(fpath).convert('RGB') - if self.transform is not None: - img = self.transform(img) - return img, fname, pid, camid - - -class RectScale(object): - def __init__(self, height, width, interpolation=Image.BILINEAR): - self.height = height - self.width = width - self.interpolation = interpolation - - def __call__(self, img): - w, h = img.size - if h == self.height and w == self.width: - return img - return img.resize((self.width, self.height), self.interpolation) - - -def get_data(name, data_dir, height, width, batch_size, workers): - root = osp.join(data_dir, name) - root = data_dir - dataset = datasets.create(name, root) - - normalizer = Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - num_classes = dataset.num_train_ids - - train_transformer = Compose([ - RectScale(height, width), - RandomHorizontalFlip(), - ToTensor(), - normalizer, - ]) - - test_transformer = Compose([ - RectScale(height, width), - ToTensor(), - normalizer, - ]) - - - query_loader = DataLoader( - Preprocessor(dataset.query, root=osp.join(dataset.images_dir, dataset.query_path), - transform=test_transformer), - batch_size=batch_size, num_workers=workers, - shuffle=False, pin_memory=True) - - gallery_loader = DataLoader( - Preprocessor(dataset.gallery, root=osp.join(dataset.images_dir, dataset.gallery_path), - transform=test_transformer), - batch_size=batch_size, num_workers=workers, - shuffle=False, pin_memory=True) - - - return query_loader, gallery_loader - - -def data_preprocess(bin_filepath, dataloader): - if os.path.exists(bin_filepath) == False: - os.mkdir(bin_filepath) - else: - print('dir exist!') - - count = 0 - for i, (img, fname, pid, _) in enumerate(dataloader): - for fn, pi in zip(fname, pid): - fname_1 = bin_filepath + '/' + fn.split('.', 2)[0] + '.bin' - img = np.array(img).astype(np.float32) - img.tofile(fname_1) - count = count + 1 - return count - - -def main(args): - - # Create data loaders - if args.height is None or args.width is None: - args.height, args.width = (144, 56) if args.arch == 'inception' else \ - (256, 128) - query_loader, gallery_loader = \ - get_data(args.dataset, args.data_dir, args.height, - args.width, args.batch_size, args.workers, - ) - - count = data_preprocess('./gallery_preproc_data_Ascend310', gallery_loader) - print('number of images(gallery):') - print(count) - - count = data_preprocess('./query_preproc_data_Ascend310', query_loader) - print('number of images(query):') - print(count) - return - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description="Softmax loss classification") - # data - parser.add_argument('-d', '--dataset', type=str, default='cuhk03', - choices=datasets.names()) - parser.add_argument('-b', '--batch-size', type=int, default=256) - parser.add_argument('-j', '--workers', type=int, default=4) - parser.add_argument('--height', type=int, - help="input height, default: 256 for resnet*, " - "144 for inception") - parser.add_argument('--width', type=int, - help="input width, default: 128 for resnet*, " - "56 for inception") - # misc - working_dir = osp.dirname(osp.abspath(__file__)) - parser.add_argument('--data-dir', type=str, metavar='PATH', - default=osp.join(working_dir, 'data')) - main(parser.parse_args()) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os.path as osp +import os +import numpy as np +from torch.utils.data import DataLoader +import datasets +from torchvision.transforms import Normalize, RandomHorizontalFlip, ToTensor, Compose +from PIL import Image + + +class Preprocessor(object): + def __init__(self, dataset, root=None, transform=None): + super(Preprocessor, self).__init__() + self.dataset = dataset + self.root = root + self.transform = transform + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, indices): + if isinstance(indices, (tuple, list)): + return [self._get_single_item(index) for index in indices] + return self._get_single_item(indices) + + def _get_single_item(self, index): + fname, pid, camid = self.dataset[index] + fpath = fname + if self.root is not None: + fpath = osp.join(self.root, fname) + img = Image.open(fpath).convert('RGB') + if self.transform is not None: + img = self.transform(img) + return img, fname, pid, camid + + +class RectScale(object): + def __init__(self, height, width, interpolation=Image.BILINEAR): + self.height = height + self.width = width + self.interpolation = interpolation + + def __call__(self, img): + w, h = img.size + if h == self.height and w == self.width: + return img + return img.resize((self.width, self.height), self.interpolation) + + +def get_data(name, data_dir, height, width, batch_size, workers): + root = osp.join(data_dir, name) + root = data_dir + dataset = datasets.create(name, root) + + normalizer = Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + num_classes = dataset.num_train_ids + + train_transformer = Compose([ + RectScale(height, width), + RandomHorizontalFlip(), + ToTensor(), + normalizer, + ]) + + test_transformer = Compose([ + RectScale(height, width), + ToTensor(), + normalizer, + ]) + + + query_loader = DataLoader( + Preprocessor(dataset.query, root=osp.join(dataset.images_dir, dataset.query_path), + transform=test_transformer), + batch_size=batch_size, num_workers=workers, + shuffle=False, pin_memory=True) + + gallery_loader = DataLoader( + Preprocessor(dataset.gallery, root=osp.join(dataset.images_dir, dataset.gallery_path), + transform=test_transformer), + batch_size=batch_size, num_workers=workers, + shuffle=False, pin_memory=True) + + + return query_loader, gallery_loader + + +def data_preprocess(bin_filepath, dataloader): + if os.path.exists(bin_filepath) == False: + os.mkdir(bin_filepath) + else: + print('dir exist!') + + count = 0 + for i, (img, fname, pid, _) in enumerate(dataloader): + for fn, pi in zip(fname, pid): + fname_1 = bin_filepath + '/' + fn.split('.', 2)[0] + '.bin' + img = np.array(img).astype(np.float32) + img.tofile(fname_1) + count = count + 1 + return count + + +def main(args): + + # Create data loaders + if args.height is None or args.width is None: + args.height, args.width = (144, 56) if args.arch == 'inception' else \ + (256, 128) + query_loader, gallery_loader = \ + get_data(args.dataset, args.data_dir, args.height, + args.width, args.batch_size, args.workers, + ) + + count = data_preprocess('./gallery_preproc_data_Ascend310', gallery_loader) + print('number of images(gallery):') + print(count) + + count = data_preprocess('./query_preproc_data_Ascend310', query_loader) + print('number of images(query):') + print(count) + return + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Softmax loss classification") + # data + parser.add_argument('-d', '--dataset', type=str, default='cuhk03', + choices=datasets.names()) + parser.add_argument('-b', '--batch-size', type=int, default=256) + parser.add_argument('-j', '--workers', type=int, default=4) + parser.add_argument('--height', type=int, + help="input height, default: 256 for resnet*, " + "144 for inception") + parser.add_argument('--width', type=int, + help="input width, default: 128 for resnet*, " + "56 for inception") + # misc + working_dir = osp.dirname(osp.abspath(__file__)) + parser.add_argument('--data-dir', type=str, metavar='PATH', + default=osp.join(working_dir, 'data')) + main(parser.parse_args()) diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/Readme.md b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/Readme.md index 49116bcbe13a9158aac0c2ab1d651a72e71774fe..f963242e8b60fff1a7b5927d2793e5914083be14 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/Readme.md +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/Readme.md @@ -1,285 +1,285 @@ -# PCB Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 开源TopN精度](#62-开源TopN精度) - - [6.2 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[PCB论文](https://arxiv.org/pdf/1711.09349.pdf) - -分支为 : master - -commit ID : e29cf54486427d1423277d4c793e39ac0eeff87c - -### 1.2 代码地址 -[PCB开源仓代码](https://github.com/syfafterzy/PCB_RPP_for_reID) - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -### 2.1 深度学习框架 -``` -python==3.6.7 -pytorch==1.8.1 -torchvision==0.2.1 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.19.2 -scikit-learn == 0.24.1 -opencv-python == 4.5.2.54 -pillow == 8.2.0 -onnx == 1.9.0 -pillow == 8.2.0 -skl2onnx == 1.8.0 -h5py == 3.3.0 -``` - -**说明:** -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 - -1.下载pth权重文件 -[PCB预训练pth权重文件](https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/face/PCB/PCB_3_7.pt) -``` -wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/face/PCB/PCB_3_7.pt - -``` - **说明:模型文件名为:PCB_3_7.pt 其md5sum值为:c5bc5ddabcbcc45f127ead797fe8cb35 PCB_3_7.pt** ->获取的预训练模型放在本仓根目录下 - -2.编写pth2onnx脚本pth2onnx.py - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - -3.执行pth2onnx脚本,生成onnx模型文件 -``` -python3.7 pth2onnx.py #将PCB_3_7.pt模型转为PCB.onnx模型 -``` - - **模型转换要点:** ->此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 - - -### 3.2 onnx转om模型 - -1.设置环境变量 -``` -source env.sh -``` -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) -``` -atc --framework=5 --model=PCB.onnx --output=PCB --input_format=NCHW --input_shape="input_1:1,3,384,128" --log=debug --soc_version=Ascend310 -``` - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -该模型使用[Market数据集](https://pan.baidu.com/s/1ntIi2Op?_at_=1622802619466)的19732张验证集进行测试。数据集下载后,解压放到./datasets目录下。 - -### 4.2 数据集预处理 -1.预处理脚本PCB_pth_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 -``` -python3.7 PCB_pth_preprocess.py -d market -b 1 --height 384 --width 128 --data-dir ./datasets/Market-1501/ -j 4 -``` -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本get_info_Ascend310.sh - -2.执行生成数据集信息脚本,生成数据集信息文件 -``` -sh get_info_Ascend310.sh -``` -在get_info_Ascend310.sh文件中调用华为提供的开源工具获取bin文件的路径和尺寸信息,该工具的第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) -### 5.2 离线推理 -1.设置环境变量 -``` -source env.sh -``` -2.执行离线推理 -``` -sudo ./benchmark_tools/benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./PCB.om -input_text_path=./gallery_preproc_data_Ascend310.info -input_width=128 -input_height=384 -output_binary=True -useDvpp=False -sudo mv ./result/dumpOutput_device0 ./result/dumpOutput_device0_gallery -sudo mv ./result/perf_vision_batchsize_1_device_0.txt ./result/gallery_perf_vision_batchsize_1_device_0.txt -``` -``` -sudo ./benchmark_tools/benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./PCB.om -input_text_path=./query_preproc_data_Ascend310.info -input_width=128 -input_height=384 -output_binary=True -useDvpp=False -sudo mv ./result/dumpOutput_device0 ./result/dumpOutput_device0_query -sudo mv ./result/perf_vision_batchsize_1_device_0.txt ./result/query_perf_vision_batchsize_1_device_0.txt -``` -输出结果默认保存在当前目录result/dumpOutput_device0下,由于需要通过om模型提取两组特征,因此根据输入图片类型(querry或gallery)分别重命名文件夹。 -3.特征图后处理 - -``` -python ./PCB_pth_postprocess.py -q ./result/dumpOutput_device0_query -g ./result/dumpOutput_device0_gallery -d market --data-dir ./datasets/Market-1501/ -``` -对om模型提取的特征做后处理并统计精度,结果如下: -``` -{'title': 'Overall statistical evaluation', 'value': [{'key': 'Number of images', 'value': '15913'}, {'key': 'Number of classes', 'value': '751'}, {'key': 'Top-1 accuracy', 'value': '92.1%'}, {'key': 'Top-5 accuracy', 'value': '96.9%'}, {'key': 'Top-10 accuracy', 'value': '98.1%'}]} -``` -## 6 精度对比 - -- **[开源TopN精度](#61-开源TopN精度)** -- **[精度对比](#62-精度对比)** - -### 6.1 开源TopN精度 -``` -CMC Scores market1501 - top-1 92.1% - top-5 96.9% - top-10 98.1% -``` -### 6.2 精度对比 -将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - **精度调试:** ->没有遇到精度不达标的问题,故不需要进行精度调试 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** - -### 7.1 npu性能数据 -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准。 - -1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/query_perf_vision_batchsize_1_device_0.txt.txt: -``` ------------------Performance Summary------------------ -[e2e] throughputRate: 164.729, latency: 20445.7 -[data read] throughputRate: 184.812, moduleLatency: 5.41092 -[preprocess] throughputRate: 182.347, moduleLatency: 5.48405 -[infer] throughputRate: 175.577, Interface throughputRate: 253.855, moduleLatency: 4.91128 -[post] throughputRate: 175.573, moduleLatency: 5.69565 -``` -Interface throughputRate: 253.855,253.855* 4 = 1015.42既是batch1 310单卡吞吐率 - - -batch4的性能,benchmark工具在整个数据集上推理后生成result/query_perf_vision_batchsize_4_device_0.txt.txt: -``` ------------------Performance Summary------------------ -[e2e] throughputRate: 157.081, latency: 21441.2 -[data read] throughputRate: 173.63, moduleLatency: 5.75937 -[preprocess] throughputRate: 171.283, moduleLatency: 5.83829 -[infer] throughputRate: 167.102, Interface throughputRate: 353.841, moduleLatency: 4.32693 -[post] throughputRate: 41.7725, moduleLatency: 23.9392 -``` -Interface throughputRate: 353.841,353.841* 4 = 1415.364既是batch4 310单卡吞吐率 - - -batch8的性能,benchmark工具在整个数据集上推理后生成result/query_perf_vision_batchsize_8_device_0.txt.txt: -``` ------------------Performance Summary------------------ -[e2e] throughputRate: 132.514, latency: 25416.1 -[data read] throughputRate: 139.993, moduleLatency: 7.14319 -[preprocess] throughputRate: 139.054, moduleLatency: 7.19145 -[infer] throughputRate: 139.615, Interface throughputRate: 366.98, moduleLatency: 4.21507 -[post] throughputRate: 17.4505, moduleLatency: 57.305 -``` -Interface throughputRate: 366.98,366.98 * 4 = 1467.92既是batch8 310单卡吞吐率 - -batch16的性能,benchmark工具在整个数据集上推理后生成result/query_perf_vision_batchsize_16_device_0.txt.txt: -``` ------------------Performance Summary------------------ -[e2e] throughputRate: 143.582, latency: 23457 -[data read] throughputRate: 150.172, moduleLatency: 6.65904 -[preprocess] throughputRate: 148.372, moduleLatency: 6.73981 -[infer] throughputRate: 147.201, Interface throughputRate: 362.414, moduleLatency: 4.28791 -[post] throughputRate: 9.22071, moduleLatency: 108.452 -``` -Interface throughputRate: 362.414,362.414 * 4 = 1449.656既是batch16 310单卡吞吐率 - - -batch32的性能,benchmark工具在整个数据集上推理后生成result/query_perf_vision_batchsize_32_device_0.txt.txt: -``` ------------------Performance Summary------------------ -[e2e] throughputRate: 118.266, latency: 28478.2 -[data read] throughputRate: 126.885, moduleLatency: 7.88113 -[preprocess] throughputRate: 125.442, moduleLatency: 7.97179 -[infer] throughputRate: 124.065, Interface throughputRate: 354.632, moduleLatency: 4.30699 -[post] throughputRate: 3.90409, moduleLatency: 256.141 -``` -Interface throughputRate: 354.632,354.632 * 4 = 1418.528既是batch32 310单卡吞吐率 - -### 7.2 性能优化 -原始模型性能不达标原因分析: -根据profiling性能分析的表格,OM模型完成一次离线推理的总耗时中卷积计算(54次)、数据下采样(1次)和数据上采样(1次)这三类操作占总耗时的71%(36%+21%+19%)左右。再往细分,Task ID 95~101总耗时的53.6%,及7%的任务数占了一半以上的耗时。查看对应任务的算子类型,大多为数据转换类:向量尺寸变换和数据类型转换,推测与npu中的算子硬件实现相关。(详见性能分析报告) - -原始模型性能与优化后模型性能对比: -batch1:441.128fps(Ascend310) < 1015.42fps(Ascend310) -batch16:1024.56(Ascend310) < 1449.656fps(Ascend310) - - -#### 7.2.1固定模型输入的batch size,并结合onnxsim工具对onnx模型进行优化 -优化动机:通过Netron查看onnx的模型结构图发现有一些常量算子可以折叠 - -优化样例: - - python -m onnxsim --input-shape="16,3,384,128" ./PCB.onnx ./PCB_sim_bs16.onnx - -#### 7.42.2.把ReduceL2算子拆分为mul+sum+sqrt算子(无损) -优化动机:Profilingdata可以看到ReduceL2这个算子耗时占比较大,原因是ReduceL2这个算子缺少优化,但是拆分后的算子是经过优化的,且拆分算子后模型的精度保持不变,因此选择拆分ReduceL2算子 - -优化样例: - - python ../scripts/split_reducelp.py ./PCB_sim_bs16.onnx ./PCB_sim_split_bs16.onnx - -#### 7.2.3.atc自动优化选项——autotune -优化动机:atc工具提供的自动优化选项 - -优化样例: - - atc --framework=5 --model=./PCB_sim_bs4.onnx --output=./PCB_sim_autotune_bs4 --input_format=NCHW --input_shape="input_1:4,3,384,128" --log=debug --soc_version=Ascend310 --auto_tune_mode="RL,GA" +# PCB Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 开源TopN精度](#62-开源TopN精度) + - [6.2 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[PCB论文](https://arxiv.org/pdf/1711.09349.pdf) + +分支为 : master + +commit ID : e29cf54486427d1423277d4c793e39ac0eeff87c + +### 1.2 代码地址 +[PCB开源仓代码](https://github.com/syfafterzy/PCB_RPP_for_reID) + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +### 2.1 深度学习框架 +``` +python==3.6.7 +pytorch==1.8.1 +torchvision==0.2.1 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.19.2 +scikit-learn == 0.24.1 +opencv-python == 4.5.2.54 +pillow == 8.2.0 +onnx == 1.9.0 +pillow == 8.2.0 +skl2onnx == 1.8.0 +h5py == 3.3.0 +``` + +**说明:** +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 + +1.下载pth权重文件 +[PCB预训练pth权重文件](https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/face/PCB/PCB_3_7.pt) +``` +wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/face/PCB/PCB_3_7.pt + +``` + **说明:模型文件名为:PCB_3_7.pt 其md5sum值为:c5bc5ddabcbcc45f127ead797fe8cb35 PCB_3_7.pt** +>获取的预训练模型放在本仓根目录下 + +2.编写pth2onnx脚本pth2onnx.py + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + +3.执行pth2onnx脚本,生成onnx模型文件 +``` +python3.7 pth2onnx.py #将PCB_3_7.pt模型转为PCB.onnx模型 +``` + + **模型转换要点:** +>此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 + + +### 3.2 onnx转om模型 + +1.设置环境变量 +``` +source env.sh +``` +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) +``` +atc --framework=5 --model=PCB.onnx --output=PCB --input_format=NCHW --input_shape="input_1:1,3,384,128" --log=debug --soc_version=Ascend310 +``` + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +该模型使用[Market数据集](https://pan.baidu.com/s/1ntIi2Op?_at_=1622802619466)的19732张验证集进行测试。数据集下载后,解压放到./datasets目录下。 + +### 4.2 数据集预处理 +1.预处理脚本PCB_pth_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 +``` +python3.7 PCB_pth_preprocess.py -d market -b 1 --height 384 --width 128 --data-dir ./datasets/Market-1501/ -j 4 +``` +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本get_info_Ascend310.sh + +2.执行生成数据集信息脚本,生成数据集信息文件 +``` +sh get_info_Ascend310.sh +``` +在get_info_Ascend310.sh文件中调用华为提供的开源工具获取bin文件的路径和尺寸信息,该工具的第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN V100R020C10 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) +### 5.2 离线推理 +1.设置环境变量 +``` +source env.sh +``` +2.执行离线推理 +``` +sudo ./benchmark_tools/benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./PCB.om -input_text_path=./gallery_preproc_data_Ascend310.info -input_width=128 -input_height=384 -output_binary=True -useDvpp=False +sudo mv ./result/dumpOutput_device0 ./result/dumpOutput_device0_gallery +sudo mv ./result/perf_vision_batchsize_1_device_0.txt ./result/gallery_perf_vision_batchsize_1_device_0.txt +``` +``` +sudo ./benchmark_tools/benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./PCB.om -input_text_path=./query_preproc_data_Ascend310.info -input_width=128 -input_height=384 -output_binary=True -useDvpp=False +sudo mv ./result/dumpOutput_device0 ./result/dumpOutput_device0_query +sudo mv ./result/perf_vision_batchsize_1_device_0.txt ./result/query_perf_vision_batchsize_1_device_0.txt +``` +输出结果默认保存在当前目录result/dumpOutput_device0下,由于需要通过om模型提取两组特征,因此根据输入图片类型(querry或gallery)分别重命名文件夹。 +3.特征图后处理 + +``` +python ./PCB_pth_postprocess.py -q ./result/dumpOutput_device0_query -g ./result/dumpOutput_device0_gallery -d market --data-dir ./datasets/Market-1501/ +``` +对om模型提取的特征做后处理并统计精度,结果如下: +``` +{'title': 'Overall statistical evaluation', 'value': [{'key': 'Number of images', 'value': '15913'}, {'key': 'Number of classes', 'value': '751'}, {'key': 'Top-1 accuracy', 'value': '92.1%'}, {'key': 'Top-5 accuracy', 'value': '96.9%'}, {'key': 'Top-10 accuracy', 'value': '98.1%'}]} +``` +## 6 精度对比 + +- **[开源TopN精度](#61-开源TopN精度)** +- **[精度对比](#62-精度对比)** + +### 6.1 开源TopN精度 +``` +CMC Scores market1501 + top-1 92.1% + top-5 96.9% + top-10 98.1% +``` +### 6.2 精度对比 +将得到的om离线模型推理TopN精度与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + **精度调试:** +>没有遇到精度不达标的问题,故不需要进行精度调试 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** + +### 7.1 npu性能数据 +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准。 + +1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/query_perf_vision_batchsize_1_device_0.txt.txt: +``` +-----------------Performance Summary------------------ +[e2e] throughputRate: 164.729, latency: 20445.7 +[data read] throughputRate: 184.812, moduleLatency: 5.41092 +[preprocess] throughputRate: 182.347, moduleLatency: 5.48405 +[infer] throughputRate: 175.577, Interface throughputRate: 253.855, moduleLatency: 4.91128 +[post] throughputRate: 175.573, moduleLatency: 5.69565 +``` +Interface throughputRate: 253.855,253.855* 4 = 1015.42既是batch1 310单卡吞吐率 + + +batch4的性能,benchmark工具在整个数据集上推理后生成result/query_perf_vision_batchsize_4_device_0.txt.txt: +``` +-----------------Performance Summary------------------ +[e2e] throughputRate: 157.081, latency: 21441.2 +[data read] throughputRate: 173.63, moduleLatency: 5.75937 +[preprocess] throughputRate: 171.283, moduleLatency: 5.83829 +[infer] throughputRate: 167.102, Interface throughputRate: 353.841, moduleLatency: 4.32693 +[post] throughputRate: 41.7725, moduleLatency: 23.9392 +``` +Interface throughputRate: 353.841,353.841* 4 = 1415.364既是batch4 310单卡吞吐率 + + +batch8的性能,benchmark工具在整个数据集上推理后生成result/query_perf_vision_batchsize_8_device_0.txt.txt: +``` +-----------------Performance Summary------------------ +[e2e] throughputRate: 132.514, latency: 25416.1 +[data read] throughputRate: 139.993, moduleLatency: 7.14319 +[preprocess] throughputRate: 139.054, moduleLatency: 7.19145 +[infer] throughputRate: 139.615, Interface throughputRate: 366.98, moduleLatency: 4.21507 +[post] throughputRate: 17.4505, moduleLatency: 57.305 +``` +Interface throughputRate: 366.98,366.98 * 4 = 1467.92既是batch8 310单卡吞吐率 + +batch16的性能,benchmark工具在整个数据集上推理后生成result/query_perf_vision_batchsize_16_device_0.txt.txt: +``` +-----------------Performance Summary------------------ +[e2e] throughputRate: 143.582, latency: 23457 +[data read] throughputRate: 150.172, moduleLatency: 6.65904 +[preprocess] throughputRate: 148.372, moduleLatency: 6.73981 +[infer] throughputRate: 147.201, Interface throughputRate: 362.414, moduleLatency: 4.28791 +[post] throughputRate: 9.22071, moduleLatency: 108.452 +``` +Interface throughputRate: 362.414,362.414 * 4 = 1449.656既是batch16 310单卡吞吐率 + + +batch32的性能,benchmark工具在整个数据集上推理后生成result/query_perf_vision_batchsize_32_device_0.txt.txt: +``` +-----------------Performance Summary------------------ +[e2e] throughputRate: 118.266, latency: 28478.2 +[data read] throughputRate: 126.885, moduleLatency: 7.88113 +[preprocess] throughputRate: 125.442, moduleLatency: 7.97179 +[infer] throughputRate: 124.065, Interface throughputRate: 354.632, moduleLatency: 4.30699 +[post] throughputRate: 3.90409, moduleLatency: 256.141 +``` +Interface throughputRate: 354.632,354.632 * 4 = 1418.528既是batch32 310单卡吞吐率 + +### 7.2 性能优化 +原始模型性能不达标原因分析: +根据profiling性能分析的表格,OM模型完成一次离线推理的总耗时中卷积计算(54次)、数据下采样(1次)和数据上采样(1次)这三类操作占总耗时的71%(36%+21%+19%)左右。再往细分,Task ID 95~101总耗时的53.6%,及7%的任务数占了一半以上的耗时。查看对应任务的算子类型,大多为数据转换类:向量尺寸变换和数据类型转换,推测与npu中的算子硬件实现相关。(详见性能分析报告) + +原始模型性能与优化后模型性能对比: +batch1:441.128fps(Ascend310) < 1015.42fps(Ascend310) +batch16:1024.56(Ascend310) < 1449.656fps(Ascend310) + + +#### 7.2.1固定模型输入的batch size,并结合onnxsim工具对onnx模型进行优化 +优化动机:通过Netron查看onnx的模型结构图发现有一些常量算子可以折叠 + +优化样例: + + python -m onnxsim --input-shape="16,3,384,128" ./PCB.onnx ./PCB_sim_bs16.onnx + +#### 7.42.2.把ReduceL2算子拆分为mul+sum+sqrt算子(无损) +优化动机:Profilingdata可以看到ReduceL2这个算子耗时占比较大,原因是ReduceL2这个算子缺少优化,但是拆分后的算子是经过优化的,且拆分算子后模型的精度保持不变,因此选择拆分ReduceL2算子 + +优化样例: + + python ../scripts/split_reducelp.py ./PCB_sim_bs16.onnx ./PCB_sim_split_bs16.onnx + +#### 7.2.3.atc自动优化选项——autotune +优化动机:atc工具提供的自动优化选项 + +优化样例: + + atc --framework=5 --model=./PCB_sim_bs4.onnx --output=./PCB_sim_autotune_bs4 --input_format=NCHW --input_shape="input_1:4,3,384,128" --log=debug --soc_version=Ascend310 --auto_tune_mode="RL,GA" diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/env.sh b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/env.sh index 2067d9b3a827d3fba047c1138a7ced0633d92fa0..9bc1cb3e52e2106d2eb923d1c61ac18c4ac93673 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/env.sh +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/env.sh @@ -1,6 +1,6 @@ -#! /bin/bash -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp +#! /bin/bash +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/get_info.py b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/get_info.py index 5af675c6b0ae3f28a6f191d139ebb44e775e6c1c..fc6cdebb5b4417a3651c1e6e9663d8d1299a0ef5 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/get_info.py +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/get_info.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/modelzoo_level.txt index 39175166a4b66dd0ead8d95dcb7ea49c56a868e3..119ddfc69182d1e11b6ce03723be060336966991 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/modelzoo_level.txt @@ -1,2 +1,2 @@ -ModelConvert:OK +ModelConvert:OK QuantStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/pth2onnx.py b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/pth2onnx.py index 78a1efa792d938c9fecb0d80f4bc3229e402e490..3c26b154e1169fe591ef39c65d716181a2ea2cfd 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/pth2onnx.py @@ -1,37 +1,37 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import argparse -from torch.autograd import Variable - - -def main(args): - model = torch.load(args.pth) - x = torch.randn(1, 3, 384, 128) - model.eval() - input_names=["input_1"] - output_names=["output_1"] - dynamic_axes = {'input_1': {0: '-1'}, 'output_1': {0: '-1'}} - x = Variable(x, volatile=True) - # Export the model - torch.onnx.export(model, x, "./models/PCB.onnx", input_names=input_names, output_names=output_names, \ - dynamic_axes=dynamic_axes, opset_version=11, verbose=True, do_constant_folding=True, export_params=True) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description="Softmax loss classification") - # data - parser.add_argument('-p', '--pth', type=str, default='./models/PCB_3_7.pt',) - main(parser.parse_args()) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import argparse +from torch.autograd import Variable + + +def main(args): + model = torch.load(args.pth) + x = torch.randn(1, 3, 384, 128) + model.eval() + input_names=["input_1"] + output_names=["output_1"] + dynamic_axes = {'input_1': {0: '-1'}, 'output_1': {0: '-1'}} + x = Variable(x, volatile=True) + # Export the model + torch.onnx.export(model, x, "./models/PCB.onnx", input_names=input_names, output_names=output_names, \ + dynamic_axes=dynamic_axes, opset_version=11, verbose=True, do_constant_folding=True, export_params=True) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Softmax loss classification") + # data + parser.add_argument('-p', '--pth', type=str, default='./models/PCB_3_7.pt',) + main(parser.parse_args()) diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/requirements.txt b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/requirements.txt index 37e7fabb8cbf1d08132728daaa1cd18e32195f59..84c915b351ec8b38ac89852e22a8fb008a4cf2db 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/requirements.txt +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/requirements.txt @@ -1,9 +1,9 @@ -torch == 1.8.1 -torchvision == 0.2.1 -numpy == 1.19.2 -opencv-python == 4.5.2.54 -onnx == 1.9.0 -skl2onnx == 1.8.0 -scikit-learn -h5py -onnx-simplifier +torch == 1.8.1 +torchvision == 0.2.1 +numpy == 1.19.2 +opencv-python == 4.5.2.54 +onnx == 1.9.0 +skl2onnx == 1.8.0 +scikit-learn +h5py +onnx-simplifier diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/Readme.md b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/Readme.md index 59da2c434c5b1c471f2b8290e53baa57bd7861d8..0965701b4617f5a1acb4579ec215aa8edd35a22e 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/Readme.md +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/Readme.md @@ -1,69 +1,69 @@ -环境准备: - - -1.获取开源仓代码 - - git clone https://gitee.com/hu-zongqi/modelzoo.git - -2.数据集路径 - -通用的数据集统一放在/root/datasets/或/opt/npu/ - -本模型使用的数据集为Market_1501,放在目录/opt/npu/下 - - -3.进入工作目录 - - cd modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline - -4.下载PCB模型论文开源仓代码,并将PCB_RPP_for_reID/reid目录下的datasets文件夹移动到当前目录 - - git clone https://github.com/syfafterzy/PCB_RPP_for_reID.git - cd PCB_RPP_for_reID - git checkout e29cf54486427d1423277d4c793e39ac0eeff87c - cd .. - cp -r PCB_RPP_for_reID/reid/datasets ./ - -5.合并补丁 - - cd modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline - cp ./test/resnet.diff PCB_RPP_for_reID/reid/models - cd PCB_RPP_for_reID/reid/models - patch -p0 < resnet.diff - -6.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - - cd modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline - pip3.7 install -r requirements.txt - -下载onnx工具: - - cd modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline/test - git clone https://gitee.com/zheng-wengang1/onnx_tools.git scripts/utils - cd scripts/utils - git checkout cbb099e5f2cef3d76c7630bffe0ee8250b03d921 - -7.获取benchmark工具 - -将benchmark.x86_64 放在modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline目录下 - -8.移动PCB_RPP_for_reID/reid到modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline下 - - cd modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline - mv PCB_RPP_for_reID/reid ./ - -9.下载预训练模型文件 - - cd modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline/test - mkdir models - wget -P ./models/ https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/face/PCB/PCB_3_7.pt - - -10.310上执行,执行时确保device空闲 - - cd modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline/test - dos2unix *.sh - dos2unix ../env.sh - source ../env.sh - sudo sh pth2om.sh - sudo sh eval_acc_perf.sh +环境准备: + + +1.获取开源仓代码 + + git clone https://gitee.com/hu-zongqi/modelzoo.git + +2.数据集路径 + +通用的数据集统一放在/root/datasets/或/opt/npu/ + +本模型使用的数据集为Market_1501,放在目录/opt/npu/下 + + +3.进入工作目录 + + cd modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline + +4.下载PCB模型论文开源仓代码,并将PCB_RPP_for_reID/reid目录下的datasets文件夹移动到当前目录 + + git clone https://github.com/syfafterzy/PCB_RPP_for_reID.git + cd PCB_RPP_for_reID + git checkout e29cf54486427d1423277d4c793e39ac0eeff87c + cd .. + cp -r PCB_RPP_for_reID/reid/datasets ./ + +5.合并补丁 + + cd modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline + cp ./test/resnet.diff PCB_RPP_for_reID/reid/models + cd PCB_RPP_for_reID/reid/models + patch -p0 < resnet.diff + +6.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + + cd modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline + pip3.7 install -r requirements.txt + +下载onnx工具: + + cd modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline/test + git clone https://gitee.com/zheng-wengang1/onnx_tools.git scripts/utils + cd scripts/utils + git checkout cbb099e5f2cef3d76c7630bffe0ee8250b03d921 + +7.获取benchmark工具 + +将benchmark.x86_64 放在modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline目录下 + +8.移动PCB_RPP_for_reID/reid到modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline下 + + cd modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline + mv PCB_RPP_for_reID/reid ./ + +9.下载预训练模型文件 + + cd modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline/test + mkdir models + wget -P ./models/ https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/face/PCB/PCB_3_7.pt + + +10.310上执行,执行时确保device空闲 + + cd modelzoo/contrib/ACL_PyTorch/Research/cv/face/reid_PCB_baseline/test + dos2unix *.sh + dos2unix ../env.sh + source ../env.sh + sudo sh pth2om.sh + sudo sh eval_acc_perf.sh diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/eval_acc_perf.sh b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/eval_acc_perf.sh index 0f11595c873e527432e7ef08e80039e088119cf3..796bed2ed2a686d4b3ab0463804b4f1e5473cb5c 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/eval_acc_perf.sh +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/eval_acc_perf.sh @@ -1,53 +1,53 @@ -#! /bin/bash - -#test - -#清除上次运行数据 -rm -r ./result/* -rm -r ./query_preproc_data_Ascend310 -rm -r ./gallery_preproc_data_Ascend310 -#数据预处理 -echo "preprocess......" -python3.7 ../PCB_pth_preprocess.py -d market -b 1 --height 384 --width 128 --data-dir /opt/npu/Market_1501/ -j 4 -#生成数据集信息文件 -echo "get_info......" -python3.7 ../get_info.py bin ./query_preproc_data_Ascend310 ./query_preproc_data_Ascend310.info 128 384 -python3.7 ../get_info.py bin ./gallery_preproc_data_Ascend310 ./gallery_preproc_data_Ascend310.info 128 384 -#离线推理 bs = 1 -echo "off-line inference bs = 1......" -#gallery -../benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./models/PCB_sim_split_bs1_autotune.om -input_text_path=./gallery_preproc_data_Ascend310.info -input_width=128 -input_height=384 -output_binary=True -useDvpp=False >> gallary_bs1.log -mv ./result/dumpOutput_device0 ./result/dumpOutput_device0_gallery_bs1 -mv ./result/perf_vision_batchsize_1_device_0.txt ./result/gallery_perf_vision_batchsize_1_device_0.txt -#query -../benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./models/PCB_sim_split_bs1_autotune.om -input_text_path=./query_preproc_data_Ascend310.info -input_width=128 -input_height=384 -output_binary=True -useDvpp=False >> query_bs1.log -mv ./result/dumpOutput_device0 ./result/dumpOutput_device0_query_bs1 -mv ./result/perf_vision_batchsize_1_device_0.txt ./result/query_perf_vision_batchsize_1_device_0.txt -#离线推理 bs = 16 -echo "off-line inference bs = 16......" -#gallery -../benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=./models/PCB_sim_split_bs16_autotune.om -input_text_path=./gallery_preproc_data_Ascend310.info -input_width=128 -input_height=384 -output_binary=True -useDvpp=False >> gallary_bs16.log -mv ./result/dumpOutput_device0 ./result/dumpOutput_device0_gallery_bs16 -mv ./result/perf_vision_batchsize_16_device_0.txt ./result/gallery_perf_vision_batchsize_16_device_0.txt -#query -../benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=./models/PCB_sim_split_bs16_autotune.om -input_text_path=./query_preproc_data_Ascend310.info -input_width=128 -input_height=384 -output_binary=True -useDvpp=False >> query_bs16.log -mv ./result/dumpOutput_device0 ./result/dumpOutput_device0_query_bs16 -mv ./result/perf_vision_batchsize_16_device_0.txt ./result/query_perf_vision_batchsize_16_device_0.txt -###数据后处理 -echo "postprocess......" -python3.7 ../PCB_pth_postprocess.py -q ./result/dumpOutput_device0_query_bs1 -g ./result/dumpOutput_device0_gallery_bs1 -d market --data-dir /opt/npu/Market_1501/ -echo "====performance data====" -echo "bs1 : " -python3.7 parse.py ./result/gallery_perf_vision_batchsize_1_device_0.txt -if [ $? != 0 ]; then - echo "fail!" - exit -1 -fi -echo "bs16 : " -python3.7 parse.py ./result/gallery_perf_vision_batchsize_16_device_0.txt -if [ $? != 0 ]; then - echo "fail!" - exit -1 -fi - -echo "success" +#! /bin/bash + +#test + +#清除上次运行数据 +rm -r ./result/* +rm -r ./query_preproc_data_Ascend310 +rm -r ./gallery_preproc_data_Ascend310 +#数据预处理 +echo "preprocess......" +python3.7 ../PCB_pth_preprocess.py -d market -b 1 --height 384 --width 128 --data-dir /opt/npu/Market_1501/ -j 4 +#生成数据集信息文件 +echo "get_info......" +python3.7 ../get_info.py bin ./query_preproc_data_Ascend310 ./query_preproc_data_Ascend310.info 128 384 +python3.7 ../get_info.py bin ./gallery_preproc_data_Ascend310 ./gallery_preproc_data_Ascend310.info 128 384 +#离线推理 bs = 1 +echo "off-line inference bs = 1......" +#gallery +../benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./models/PCB_sim_split_bs1_autotune.om -input_text_path=./gallery_preproc_data_Ascend310.info -input_width=128 -input_height=384 -output_binary=True -useDvpp=False >> gallary_bs1.log +mv ./result/dumpOutput_device0 ./result/dumpOutput_device0_gallery_bs1 +mv ./result/perf_vision_batchsize_1_device_0.txt ./result/gallery_perf_vision_batchsize_1_device_0.txt +#query +../benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./models/PCB_sim_split_bs1_autotune.om -input_text_path=./query_preproc_data_Ascend310.info -input_width=128 -input_height=384 -output_binary=True -useDvpp=False >> query_bs1.log +mv ./result/dumpOutput_device0 ./result/dumpOutput_device0_query_bs1 +mv ./result/perf_vision_batchsize_1_device_0.txt ./result/query_perf_vision_batchsize_1_device_0.txt +#离线推理 bs = 16 +echo "off-line inference bs = 16......" +#gallery +../benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=./models/PCB_sim_split_bs16_autotune.om -input_text_path=./gallery_preproc_data_Ascend310.info -input_width=128 -input_height=384 -output_binary=True -useDvpp=False >> gallary_bs16.log +mv ./result/dumpOutput_device0 ./result/dumpOutput_device0_gallery_bs16 +mv ./result/perf_vision_batchsize_16_device_0.txt ./result/gallery_perf_vision_batchsize_16_device_0.txt +#query +../benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=16 -om_path=./models/PCB_sim_split_bs16_autotune.om -input_text_path=./query_preproc_data_Ascend310.info -input_width=128 -input_height=384 -output_binary=True -useDvpp=False >> query_bs16.log +mv ./result/dumpOutput_device0 ./result/dumpOutput_device0_query_bs16 +mv ./result/perf_vision_batchsize_16_device_0.txt ./result/query_perf_vision_batchsize_16_device_0.txt +###数据后处理 +echo "postprocess......" +python3.7 ../PCB_pth_postprocess.py -q ./result/dumpOutput_device0_query_bs1 -g ./result/dumpOutput_device0_gallery_bs1 -d market --data-dir /opt/npu/Market_1501/ +echo "====performance data====" +echo "bs1 : " +python3.7 parse.py ./result/gallery_perf_vision_batchsize_1_device_0.txt +if [ $? != 0 ]; then + echo "fail!" + exit -1 +fi +echo "bs16 : " +python3.7 parse.py ./result/gallery_perf_vision_batchsize_16_device_0.txt +if [ $? != 0 ]; then + echo "fail!" + exit -1 +fi + +echo "success" diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/parse.py b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/parse.py index b9c74f41d7848e1250356f14472b237a18bb3489..82af69cd183218c3263723c20b652b3f7ec2bc27 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/parse.py +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/pth2om.sh b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/pth2om.sh index 65dbe5abd5dcfb9e4f4f0feb5a8fb1cdefbe8de1..8c1c58941dabfbade3a0d486b45c2bed50e69a6b 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/pth2om.sh +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/pth2om.sh @@ -1,19 +1,19 @@ -#! /bin/bash -echo "--------------------------------------------" -python3.7 ../pth2onnx.py -echo "--------------------------------------------" -python3.7 -m onnxsim --input-shape="1,3,384,128" ./models/PCB.onnx ./models/PCB_sim_bs1.onnx -echo "--------------------------------------------" -python3.7 -m onnxsim --input-shape="16,3,384,128" ./models/PCB.onnx ./models/PCB_sim_bs16.onnx -#bs1 -echo "--------------------------------------------" -python3.7 ./scripts/split_reducelp.py ./models/PCB_sim_bs1.onnx ./models/PCB_sim_split_bs1.onnx -#bs16 -echo "--------------------------------------------" -python3.7 ./scripts/split_reducelp.py ./models/PCB_sim_bs16.onnx ./models/PCB_sim_split_bs16.onnx -#转OM模型 bs=1 -echo "--------------------------------------------" -atc --framework=5 --model=./models/PCB_sim_split_bs1.onnx --output=./models/PCB_sim_split_bs1_autotune --input_format=NCHW --input_shape="input_1:1,3,384,128" --log=debug --soc_version=Ascend310 --auto_tune_mode="RL,GA" --out_nodes='Div_126:0;Gemm_191:0;Gemm_192:0;Gemm_193:0;Gemm_194:0;Gemm_195:0;Gemm_196:0' -#转OM模型 bs=16 -echo "--------------------------------------------" -atc --framework=5 --model=./models/PCB_sim_split_bs16.onnx --output=./models/PCB_sim_split_bs16_autotune --input_format=NCHW --input_shape="input_1:16,3,384,128" --log=debug --soc_version=Ascend310 --auto_tune_mode="RL,GA" --out_nodes='Div_126:0;Gemm_191:0;Gemm_192:0;Gemm_193:0;Gemm_194:0;Gemm_195:0;Gemm_196:0' +#! /bin/bash +echo "--------------------------------------------" +python3.7 ../pth2onnx.py +echo "--------------------------------------------" +python3.7 -m onnxsim --input-shape="1,3,384,128" ./models/PCB.onnx ./models/PCB_sim_bs1.onnx +echo "--------------------------------------------" +python3.7 -m onnxsim --input-shape="16,3,384,128" ./models/PCB.onnx ./models/PCB_sim_bs16.onnx +#bs1 +echo "--------------------------------------------" +python3.7 ./scripts/split_reducelp.py ./models/PCB_sim_bs1.onnx ./models/PCB_sim_split_bs1.onnx +#bs16 +echo "--------------------------------------------" +python3.7 ./scripts/split_reducelp.py ./models/PCB_sim_bs16.onnx ./models/PCB_sim_split_bs16.onnx +#转OM模型 bs=1 +echo "--------------------------------------------" +atc --framework=5 --model=./models/PCB_sim_split_bs1.onnx --output=./models/PCB_sim_split_bs1_autotune --input_format=NCHW --input_shape="input_1:1,3,384,128" --log=debug --soc_version=Ascend310 --auto_tune_mode="RL,GA" --out_nodes='Div_126:0;Gemm_191:0;Gemm_192:0;Gemm_193:0;Gemm_194:0;Gemm_195:0;Gemm_196:0' +#转OM模型 bs=16 +echo "--------------------------------------------" +atc --framework=5 --model=./models/PCB_sim_split_bs16.onnx --output=./models/PCB_sim_split_bs16_autotune --input_format=NCHW --input_shape="input_1:16,3,384,128" --log=debug --soc_version=Ascend310 --auto_tune_mode="RL,GA" --out_nodes='Div_126:0;Gemm_191:0;Gemm_192:0;Gemm_193:0;Gemm_194:0;Gemm_195:0;Gemm_196:0' diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/resnet.diff b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/resnet.diff index f3ed9282524756355dd30b39b211c3562d4d6647..77d06ca8a0d543e6796e68aa8cb31eb71f0340e9 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/resnet.diff +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/resnet.diff @@ -1,11 +1,11 @@ ---- resnet.py 2021-06-30 18:13:34.165260365 +0800 -+++ resnet_modified.py 2021-06-30 18:14:03.373647350 +0800 -@@ -128,7 +128,7 @@ class ResNet(nn.Module): - y = F.avg_pool3d(x,(16,1,1)).squeeze(1) - sx = x.size(2)/6 - kx = x.size(2)-sx*5 -- x = F.avg_pool2d(x,kernel_size=(kx,x.size(3)),stride=(sx,x.size(3))) # H4 W8 -+ x = F.avg_pool2d(x,kernel_size=(4,8),stride=(4,8)) # H4 W8 - #========================================================================# - - out0 = x.view(x.size(0),-1) +--- resnet.py 2021-06-30 18:13:34.165260365 +0800 ++++ resnet_modified.py 2021-06-30 18:14:03.373647350 +0800 +@@ -128,7 +128,7 @@ class ResNet(nn.Module): + y = F.avg_pool3d(x,(16,1,1)).squeeze(1) + sx = x.size(2)/6 + kx = x.size(2)-sx*5 +- x = F.avg_pool2d(x,kernel_size=(kx,x.size(3)),stride=(sx,x.size(3))) # H4 W8 ++ x = F.avg_pool2d(x,kernel_size=(4,8),stride=(4,8)) # H4 W8 + #========================================================================# + + out0 = x.view(x.size(0),-1) diff --git a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/scripts/split_reducelp.py b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/scripts/split_reducelp.py index dcbba7b5e79db1fa6dd18235ff87db42f77989ec..fcf1f4e7a48c234d0bd2a8a83b187f96f85c6810 100644 --- a/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/scripts/split_reducelp.py +++ b/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline/test/scripts/split_reducelp.py @@ -1,51 +1,51 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from utils.OXInterface.OXInterface import OXGraph - - -def main(input_model, out_path): - oxgraph = OXGraph(input_model) - # ReduceL2->ReduceSum - onnx_node = oxgraph.get_oxnode_by_name('ReduceL2_122') - onnx_node.set_op_type('ReduceSum') - onnx_node.set_name('ReduceSum1') - - # 插入mul+sqrt节点 - oxgraph.insert_node( - bef_node_info_list=['AveragePool_121:0', 'AveragePool_121:0'], - aft_node_info_list=['ReduceSum1'], - op_type='Mul', - op_name='Mul1' - ) - oxgraph.insert_node( - bef_node_info_list=['ReduceSum1'], - aft_node_info_list=['Expand_125'], - op_type='Sqrt', - op_name='Sqrt1' - ) - - oxgraph.save_new_model(out_path) - - -if __name__ == '__main__': - input_model = sys.argv[1] - out_path = sys.argv[2] - out_path = os.path.abspath(out_path) - print(input_model) - print(out_path) - os.makedirs(os.path.dirname(out_path), exist_ok=True) - main(input_model, out_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from utils.OXInterface.OXInterface import OXGraph + + +def main(input_model, out_path): + oxgraph = OXGraph(input_model) + # ReduceL2->ReduceSum + onnx_node = oxgraph.get_oxnode_by_name('ReduceL2_122') + onnx_node.set_op_type('ReduceSum') + onnx_node.set_name('ReduceSum1') + + # 插入mul+sqrt节点 + oxgraph.insert_node( + bef_node_info_list=['AveragePool_121:0', 'AveragePool_121:0'], + aft_node_info_list=['ReduceSum1'], + op_type='Mul', + op_name='Mul1' + ) + oxgraph.insert_node( + bef_node_info_list=['ReduceSum1'], + aft_node_info_list=['Expand_125'], + op_type='Sqrt', + op_name='Sqrt1' + ) + + oxgraph.save_new_model(out_path) + + +if __name__ == '__main__': + input_model = sys.argv[1] + out_path = sys.argv[2] + out_path = os.path.abspath(out_path) + print(input_model) + print(out_path) + os.makedirs(os.path.dirname(out_path), exist_ok=True) + main(input_model, out_path) diff --git a/ACL_PyTorch/contrib/cv/gan/BigGAN/LICENSE b/ACL_PyTorch/contrib/cv/gan/BigGAN/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/gan/BigGAN/LICENSE +++ b/ACL_PyTorch/contrib/cv/gan/BigGAN/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/BigGAN/README.md b/ACL_PyTorch/contrib/cv/gan/BigGAN/README.md index 78a0b1f9d517b95bcdf1b8fe82137d21def1f759..b29bf3262272b433b882046268e151daf2b42906 100644 --- a/ACL_PyTorch/contrib/cv/gan/BigGAN/README.md +++ b/ACL_PyTorch/contrib/cv/gan/BigGAN/README.md @@ -1,225 +1,225 @@ -# BigGAN ONNX模型端到端推理指导 -- [1. 模型概述](#1) - - [论文地址](#11) - - [代码地址](#12) -- [2. 环境说明](#2) - - [深度学习框架](#21) - - [python第三方库](#22) -- [3. 模型转换](#3) - - [pth转onnx模型](#31) - - [onnx转om模型](#32) -- [4. 输入数据生成](#4) - - [数据生成](#41) -- [5. 离线推理](#5) - - [msame工具概述](#51) - - [离线推理](#52) -- [6. 精度对比](#6) - - [模型后处理](#61) - - [精度计算](#62) - -## 1. 模型概述 -### 1.1 论文地址 -[BigGAN论文](https://arxiv.org/pdf/1809.11096.pdf) -### 1.2 代码地址 -[BigGAN代码](https://github.com/ajbrock/BigGAN-PyTorch) - -修改源码中的BigGAN.py、layers.py和inception_utils.py,并移至本项目中: -``` -git clone https://github.com/ajbrock/BigGAN-PyTorch.git -mv biggan.patch BigGAN-PyTorch -cd BigGAN-PyTorch -git apply biggan.patch -scp BigGAN.py .. -scp layers.py .. -scp inception_utils.py .. -cd .. -``` - -## 2. 环境说明 -### 2.1 深度学习框架 - -``` -CANN 5.0.3 -torch==1.8.0 -torchvision==0.9.0 -onnx==1.9.0 -``` -### 2.2 python第三方库 - -``` -numpy -onnxruntime -scipy==1.7.1 -onnx-simplifier==0.3.6 -onnxoptimizer==0.2.6 -``` - - **说明:** -> PyTorch版本: 请不要低于1.6.0,否则在.pth文件转.onnx文件的过程中会产生报错 -> pytorch,torchvision和onnx:(X86架构)可以通过官方下载whl包安装; (Arm架构)可以通过源码编译安装 -> 其他第三方库: 可以通过 pip3.7 install -r requirements.txt 进行安装 - -## 3. 模型转换 -一步式从pth权重文件转om模型的脚本,能够由pth权重文件生成bacth分别为1和16的om模型: -```bash -bash ./test/pth2om.sh -``` - **说明:** -> pth2om.sh中的6-14行: 完成pth转原始onnx模型 -> pth2om.sh中的18-29行: 完成onnx模型的简化,以及简化的onnx模型转om模型 - -运行后会生成如下文件: -```bash -├── biggan.onnx -├── biggan_sim_bs1.onnx -├── biggan_sim_bs16.onnx -├── biggan_sim_bs1.om -├── biggan_sim_bs16.om -``` - -### 3.1 pth转onnx模型 -1. 下载pth权重文件 - -[BigGAN预训练pth权重文件](https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/gan/BigGAN/G_ema.pth) -> **说明** -> 模型使用的权重文件名为G_ema.pth - -[Inception_v3预训练pth权重文件](https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/gan/BigGAN/inception_v3_google.pth) -> **说明** -> 下载的权重文件名为inception_v3_google.pth,此模型权重用于IS评价指标的计算,若仅进行图像生成,无需下载此权重文件 - -[ImageNet采样的npz数据](https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/gan/BigGAN/I128_inception_moments.npz) -> **说明** -> 采样数据名为I128_inception_moments.npz,此数据用于FID评价指标的计算,若仅进行图像生成,无需下载此数据文件 - -2. 执行biggan_pth2onnx.py脚本,生成onnx模型文件 -```bash -python3.7 biggan_pth2onnx.py --source "./G_ema.pth" --target "./biggan.onnx" -``` -若需要修改pth2onnx部分,请注意目前ATC支持的onnx算子版本为11 - -3. 执行clip_edit.py脚本,通过"input-model"和"output-model"参数指定输入和输出的onnx模型,默认输入输出均为"./biggan.onnx" -```bash -python3.7 clip_edit.py -``` -> **说明** -> 执行clip_edit.py目的在于初始化onnx模型中Clip节点中的"max"输入,便于后续onnx模型的简化 - -### 3.2 onnx转om模型 -1. 使用onnx-simplifier简化onnx模型 -生成batch size为1的简化onnx模型,对应的命令为: -```bash -python3.7 -m onnxsim './biggan.onnx' './biggan_sim_bs1.onnx' --input-shape "noise:1,1,20" "label:1,5,148" -``` - -2. 设置环境变量 - -```bash -source env.sh -``` - -3. 使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) - -```bash -atc --framework=5 --model=./biggan_sim_bs1.onnx --output=./biggan_sim_bs1 --input_format=ND --input_shape="noise:1,1,20;label:1,5,148" --log=error --soc_version=Ascend310 -``` - -## 4. 数据预处理 -- [输入数据生成](#41) -### 4.1 数据生成 -1. BigGAN模型的输入数据是由噪声数据和标签数据组成,其中噪声数据是由均值为0,方差为1的正态分布中采样,标签数据是由0至类别总数中随机采样一个整数 -2. 执行输入数据的生成脚本,生成模型输入的bin文件 - -```bash -#注:针对不同batch size的om模型需要生成不同的输入数据 -python3.7 biggan_preprocess.py --batch-size 1 --num-inputs 50000 -``` -运行后,将会得到如下形式的文件夹: - -``` -├── prep_label_bs1 -│ ├──input_00000.bin -│ ├──...... -│ -├── prep_noise_bs1 -│ ├──input_00000.bin -│ ├──...... -``` - -## 5. 离线推理 -执行一步式推理前,请先按照5.1节所示准备msame离线推理工具 -一步式进行输入数据的准备,模型离线推理和NPU性能数据的获取(针对batch1和batch16): -```bash -bash ./test/eval_perf.sh -``` -运行后会生成如下文件/文件夹: -```bash -├── prep_label_bs1 # 模型的标签输入(文件夹) -├── prep_label_bs16 -├── prep_noise_bs1 # 模型的噪声输入(文件夹) -├── prep_noise_bs16 -├── outputs_bs1_om # 模型的输出(文件夹) -├── outputs_bs16_om -├── gen_y_bs1.npz # 类别采样的npz数据 -├── gen_y_bs16.npz -├── msame_bs1.txt # msame推理过程的输出 -├── msame_bs16.txt -├── bs1_perf.log # 性能数据 -├── bs16_perf.log -``` -### 5.1 msame工具概述 -msame模型推理工具,其输入是om模型以及模型所需要的输入bin文件,其输出是模型根据相应输入产生的输出文件。获取工具及使用方法可以参考[msame模型推理工具指南](https://gitee.com/ascend/tools/tree/master/msame) -### 5.2 离线推理 -1. 设置环境变量 -```bash -source env.sh -``` -2. 执行离线推理 -运行如下命令进行离线推理: -```bash -./msame --model "./biggan_sim_bs1.om" --input "./prep_noise_bs1,./prep_label_bs1" --output "./outputs_bs1_om" --outfmt BIN > ./msame_bs1.txt -``` -模型输出格式是bin,输出保存在"output"参数指定的文件夹中,同时会生成推理的日志文件msame_bs1.txt -3. 性能数据的获取 -通过给test/parser.py指定推理后的日志文件,可以得到离线推理的性能数据 -```bash -python3.7 ./test/parse.py --txt-file "./msame_bs1.txt" --batch-size 1 > bs1_perf.log -``` -|模型|t4性能|310性能| -|----|----|----| -|BigGAN bs1|239.249fps|227.144fps| -|BigGAN bs16|344.900fps|282.898fps| - -## 6. 精度对比 -一步式进行输出数据的后处理和生成图像的评价指标(针对batch1和batch16): -```bash -bash ./test/eval_acc.sh -``` -运行后会生成如下文件/文件夹: -```bash -├── postprocess_img # 转换后的模型输出(文件夹) -├── gen_img_bs1.npz # 模型输出的npz数据 -├── gen_img_bs16.npz -├── biggan_acc_eval_bs1.log # 精度测量结果 -├── biggan_acc_eval_bs16.log -``` -### 6.1 模型后处理 -模型后处理将离线推理得到的bin文件转换为jpg图像文件,并将原始输出保存至npz文件中,用于精度数据的获取 -``` -python3.7 biggan_postprocess.py --result-path "./outputs_bs1_om" --save-path "./postprocess_img" --batch-size 1 --save-img --save-npz -``` -其中"result-path"表示离线推理输出所在的文件夹,"save-path"表示转换后图像文件的存储地址 -### 6.2 精度计算 -精度计算利用biggan_eval_acc.py脚本: -```bash -python3.7 biggan_eval_acc.py --num-inception-images 50000 --batch-size 1 --dataset 'I128' > biggan_acc_eval_bs1.log -``` -其中"num-inception-images"表示用于进行精度测量的输出数量,"dataset"指定用于对比分布所采用的数据集,I128表示ImageNet数据集在train上的采样 -> **说明** -> IS是生成图像的清晰度和多样性指标,其值越大说明越优 -> FID是生成图像集与真实图像集间的相似度指标,其值越小说明越优 - -| 模型 | IS | FID | -|-------|-------|-------| -|pth模型推理结果|94.323+/-2.395|9.9532| +# BigGAN ONNX模型端到端推理指导 +- [1. 模型概述](#1) + - [论文地址](#11) + - [代码地址](#12) +- [2. 环境说明](#2) + - [深度学习框架](#21) + - [python第三方库](#22) +- [3. 模型转换](#3) + - [pth转onnx模型](#31) + - [onnx转om模型](#32) +- [4. 输入数据生成](#4) + - [数据生成](#41) +- [5. 离线推理](#5) + - [msame工具概述](#51) + - [离线推理](#52) +- [6. 精度对比](#6) + - [模型后处理](#61) + - [精度计算](#62) + +## 1. 模型概述 +### 1.1 论文地址 +[BigGAN论文](https://arxiv.org/pdf/1809.11096.pdf) +### 1.2 代码地址 +[BigGAN代码](https://github.com/ajbrock/BigGAN-PyTorch) + +修改源码中的BigGAN.py、layers.py和inception_utils.py,并移至本项目中: +``` +git clone https://github.com/ajbrock/BigGAN-PyTorch.git +mv biggan.patch BigGAN-PyTorch +cd BigGAN-PyTorch +git apply biggan.patch +scp BigGAN.py .. +scp layers.py .. +scp inception_utils.py .. +cd .. +``` + +## 2. 环境说明 +### 2.1 深度学习框架 + +``` +CANN 5.0.3 +torch==1.8.0 +torchvision==0.9.0 +onnx==1.9.0 +``` +### 2.2 python第三方库 + +``` +numpy +onnxruntime +scipy==1.7.1 +onnx-simplifier==0.3.6 +onnxoptimizer==0.2.6 +``` + + **说明:** +> PyTorch版本: 请不要低于1.6.0,否则在.pth文件转.onnx文件的过程中会产生报错 +> pytorch,torchvision和onnx:(X86架构)可以通过官方下载whl包安装; (Arm架构)可以通过源码编译安装 +> 其他第三方库: 可以通过 pip3.7 install -r requirements.txt 进行安装 + +## 3. 模型转换 +一步式从pth权重文件转om模型的脚本,能够由pth权重文件生成bacth分别为1和16的om模型: +```bash +bash ./test/pth2om.sh +``` + **说明:** +> pth2om.sh中的6-14行: 完成pth转原始onnx模型 +> pth2om.sh中的18-29行: 完成onnx模型的简化,以及简化的onnx模型转om模型 + +运行后会生成如下文件: +```bash +├── biggan.onnx +├── biggan_sim_bs1.onnx +├── biggan_sim_bs16.onnx +├── biggan_sim_bs1.om +├── biggan_sim_bs16.om +``` + +### 3.1 pth转onnx模型 +1. 下载pth权重文件 + +[BigGAN预训练pth权重文件](https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/gan/BigGAN/G_ema.pth) +> **说明** +> 模型使用的权重文件名为G_ema.pth + +[Inception_v3预训练pth权重文件](https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/gan/BigGAN/inception_v3_google.pth) +> **说明** +> 下载的权重文件名为inception_v3_google.pth,此模型权重用于IS评价指标的计算,若仅进行图像生成,无需下载此权重文件 + +[ImageNet采样的npz数据](https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/gan/BigGAN/I128_inception_moments.npz) +> **说明** +> 采样数据名为I128_inception_moments.npz,此数据用于FID评价指标的计算,若仅进行图像生成,无需下载此数据文件 + +2. 执行biggan_pth2onnx.py脚本,生成onnx模型文件 +```bash +python3.7 biggan_pth2onnx.py --source "./G_ema.pth" --target "./biggan.onnx" +``` +若需要修改pth2onnx部分,请注意目前ATC支持的onnx算子版本为11 + +3. 执行clip_edit.py脚本,通过"input-model"和"output-model"参数指定输入和输出的onnx模型,默认输入输出均为"./biggan.onnx" +```bash +python3.7 clip_edit.py +``` +> **说明** +> 执行clip_edit.py目的在于初始化onnx模型中Clip节点中的"max"输入,便于后续onnx模型的简化 + +### 3.2 onnx转om模型 +1. 使用onnx-simplifier简化onnx模型 +生成batch size为1的简化onnx模型,对应的命令为: +```bash +python3.7 -m onnxsim './biggan.onnx' './biggan_sim_bs1.onnx' --input-shape "noise:1,1,20" "label:1,5,148" +``` + +2. 设置环境变量 + +```bash +source env.sh +``` + +3. 使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN V100R020C10 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) + +```bash +atc --framework=5 --model=./biggan_sim_bs1.onnx --output=./biggan_sim_bs1 --input_format=ND --input_shape="noise:1,1,20;label:1,5,148" --log=error --soc_version=Ascend310 +``` + +## 4. 数据预处理 +- [输入数据生成](#41) +### 4.1 数据生成 +1. BigGAN模型的输入数据是由噪声数据和标签数据组成,其中噪声数据是由均值为0,方差为1的正态分布中采样,标签数据是由0至类别总数中随机采样一个整数 +2. 执行输入数据的生成脚本,生成模型输入的bin文件 + +```bash +#注:针对不同batch size的om模型需要生成不同的输入数据 +python3.7 biggan_preprocess.py --batch-size 1 --num-inputs 50000 +``` +运行后,将会得到如下形式的文件夹: + +``` +├── prep_label_bs1 +│ ├──input_00000.bin +│ ├──...... +│ +├── prep_noise_bs1 +│ ├──input_00000.bin +│ ├──...... +``` + +## 5. 离线推理 +执行一步式推理前,请先按照5.1节所示准备msame离线推理工具 +一步式进行输入数据的准备,模型离线推理和NPU性能数据的获取(针对batch1和batch16): +```bash +bash ./test/eval_perf.sh +``` +运行后会生成如下文件/文件夹: +```bash +├── prep_label_bs1 # 模型的标签输入(文件夹) +├── prep_label_bs16 +├── prep_noise_bs1 # 模型的噪声输入(文件夹) +├── prep_noise_bs16 +├── outputs_bs1_om # 模型的输出(文件夹) +├── outputs_bs16_om +├── gen_y_bs1.npz # 类别采样的npz数据 +├── gen_y_bs16.npz +├── msame_bs1.txt # msame推理过程的输出 +├── msame_bs16.txt +├── bs1_perf.log # 性能数据 +├── bs16_perf.log +``` +### 5.1 msame工具概述 +msame模型推理工具,其输入是om模型以及模型所需要的输入bin文件,其输出是模型根据相应输入产生的输出文件。获取工具及使用方法可以参考[msame模型推理工具指南](https://gitee.com/ascend/tools/tree/master/msame) +### 5.2 离线推理 +1. 设置环境变量 +```bash +source env.sh +``` +2. 执行离线推理 +运行如下命令进行离线推理: +```bash +./msame --model "./biggan_sim_bs1.om" --input "./prep_noise_bs1,./prep_label_bs1" --output "./outputs_bs1_om" --outfmt BIN > ./msame_bs1.txt +``` +模型输出格式是bin,输出保存在"output"参数指定的文件夹中,同时会生成推理的日志文件msame_bs1.txt +3. 性能数据的获取 +通过给test/parser.py指定推理后的日志文件,可以得到离线推理的性能数据 +```bash +python3.7 ./test/parse.py --txt-file "./msame_bs1.txt" --batch-size 1 > bs1_perf.log +``` +|模型|t4性能|310性能| +|----|----|----| +|BigGAN bs1|239.249fps|227.144fps| +|BigGAN bs16|344.900fps|282.898fps| + +## 6. 精度对比 +一步式进行输出数据的后处理和生成图像的评价指标(针对batch1和batch16): +```bash +bash ./test/eval_acc.sh +``` +运行后会生成如下文件/文件夹: +```bash +├── postprocess_img # 转换后的模型输出(文件夹) +├── gen_img_bs1.npz # 模型输出的npz数据 +├── gen_img_bs16.npz +├── biggan_acc_eval_bs1.log # 精度测量结果 +├── biggan_acc_eval_bs16.log +``` +### 6.1 模型后处理 +模型后处理将离线推理得到的bin文件转换为jpg图像文件,并将原始输出保存至npz文件中,用于精度数据的获取 +``` +python3.7 biggan_postprocess.py --result-path "./outputs_bs1_om" --save-path "./postprocess_img" --batch-size 1 --save-img --save-npz +``` +其中"result-path"表示离线推理输出所在的文件夹,"save-path"表示转换后图像文件的存储地址 +### 6.2 精度计算 +精度计算利用biggan_eval_acc.py脚本: +```bash +python3.7 biggan_eval_acc.py --num-inception-images 50000 --batch-size 1 --dataset 'I128' > biggan_acc_eval_bs1.log +``` +其中"num-inception-images"表示用于进行精度测量的输出数量,"dataset"指定用于对比分布所采用的数据集,I128表示ImageNet数据集在train上的采样 +> **说明** +> IS是生成图像的清晰度和多样性指标,其值越大说明越优 +> FID是生成图像集与真实图像集间的相似度指标,其值越小说明越优 + +| 模型 | IS | FID | +|-------|-------|-------| +|pth模型推理结果|94.323+/-2.395|9.9532| |om模型离线推理结果|94.009+/-1.626|10.0411| \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/BigGAN/biggan_pth2onnx.py b/ACL_PyTorch/contrib/cv/gan/BigGAN/biggan_pth2onnx.py index 87fc7ce99ce00295ad02c943ca01e2d227ac2b6c..060de082565b7c6077df3cf033a0d333a8a8b01e 100644 --- a/ACL_PyTorch/contrib/cv/gan/BigGAN/biggan_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/gan/BigGAN/biggan_pth2onnx.py @@ -1,63 +1,63 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import onnx -import torch -import argparse - -from BigGAN import Generator -from collections import OrderedDict - - -def proc_nodes_module(checkpoint): - new_state_dict = OrderedDict() - for k, v in checkpoint.items(): - if (k[0:7] == "module."): - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - - -def pth2onnx(input_file, output_file): - checkpoint = torch.load(input_file, map_location=torch.device('cpu')) - checkpoint = proc_nodes_module(checkpoint) - - model = Generator(**{'G_lr':1e-4, 'SN_eps':1e-6, 'adam_eps':1e-6, - 'G_ch':96, 'shared_dim':128, - 'skip_init':True, 'no_optim': True, - 'hier':True, 'dim_z':120}) - model.load_state_dict(checkpoint) - model.eval() - - input_names = ['noise', 'label'] - output_names = ['image'] - dynamic_axes = {'noise': {0: '-1'}, 'label': {0: '-1'}, 'image': {0: '-1'}} - - dummy_z = torch.randn((1, 1, 20)) - dummy_y = torch.randn((1, 5, 148)) - - torch.onnx.export(model, (dummy_z, dummy_y), output_file, dynamic_axes=dynamic_axes, - verbose=True, input_names=input_names, output_names=output_names, opset_version=11) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--source', type=str, default="./G_ema.pth") - parser.add_argument('--target', type=str, default="./biggan.onnx") - args = parser.parse_args() - - pth2onnx(args.source, args.target) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import onnx +import torch +import argparse + +from BigGAN import Generator +from collections import OrderedDict + + +def proc_nodes_module(checkpoint): + new_state_dict = OrderedDict() + for k, v in checkpoint.items(): + if (k[0:7] == "module."): + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + + +def pth2onnx(input_file, output_file): + checkpoint = torch.load(input_file, map_location=torch.device('cpu')) + checkpoint = proc_nodes_module(checkpoint) + + model = Generator(**{'G_lr':1e-4, 'SN_eps':1e-6, 'adam_eps':1e-6, + 'G_ch':96, 'shared_dim':128, + 'skip_init':True, 'no_optim': True, + 'hier':True, 'dim_z':120}) + model.load_state_dict(checkpoint) + model.eval() + + input_names = ['noise', 'label'] + output_names = ['image'] + dynamic_axes = {'noise': {0: '-1'}, 'label': {0: '-1'}, 'image': {0: '-1'}} + + dummy_z = torch.randn((1, 1, 20)) + dummy_y = torch.randn((1, 5, 148)) + + torch.onnx.export(model, (dummy_z, dummy_y), output_file, dynamic_axes=dynamic_axes, + verbose=True, input_names=input_names, output_names=output_names, opset_version=11) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--source', type=str, default="./G_ema.pth") + parser.add_argument('--target', type=str, default="./biggan.onnx") + args = parser.parse_args() + + pth2onnx(args.source, args.target) diff --git a/ACL_PyTorch/contrib/cv/gan/BigGAN/requirements.txt b/ACL_PyTorch/contrib/cv/gan/BigGAN/requirements.txt index 71eeab2b4d21acf9f69ba601d5f082aba8101f73..ac7e94fc98f48ca1b22db28c0fde37d54dff028f 100644 --- a/ACL_PyTorch/contrib/cv/gan/BigGAN/requirements.txt +++ b/ACL_PyTorch/contrib/cv/gan/BigGAN/requirements.txt @@ -1,5 +1,5 @@ -numpy -onnxruntime==1.9.0 -scipy==1.7.1 -onnx-simplifier==0.3.6 +numpy +onnxruntime==1.9.0 +scipy==1.7.1 +onnx-simplifier==0.3.6 onnxoptimizer==0.2.6 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/CGAN/CGAN_postprocess.py b/ACL_PyTorch/contrib/cv/gan/CGAN/CGAN_postprocess.py index 337d3a4ff0108f6a32982fc52dd1983c51b7ffab..7f6d58130451458433758e8de696e4b58e28129c 100644 --- a/ACL_PyTorch/contrib/cv/gan/CGAN/CGAN_postprocess.py +++ b/ACL_PyTorch/contrib/cv/gan/CGAN/CGAN_postprocess.py @@ -1,76 +1,76 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from CGAN import generator as G -import os -import numpy as np -import torch -import struct -import glob -import sys -import utils -import argparse - - -def parse_args(): - desc = "Pytorch implementation of CGAN collections" - parser = argparse.ArgumentParser(description=desc) - parser.add_argument('--bin_out_path', type=str, default='', help="the output inferenced") - parser.add_argument('--save_path', type=str, default='result', help="the generated image path") - return parser.parse_args() - - -def get_save_path(bin_folder): - result_paths = [] - files_source = glob.glob(os.path.join(bin_folder,'*.bin')) - files_source.sort() - for file in files_source: - if file.endswith('.bin'): - result_path = file - result_paths.append(result_path) - return result_paths - - -def file2tensor(output_bin): - size = os.path.getsize(output_bin) - res1 = [] - L = int(size / 4) - binfile = open(output_bin, 'rb') - for i in range(L): - data = binfile.read(4) - num = struct.unpack('f', data) - res1.append(num[0]) - binfile.close() - dim_res = np.array(res1).reshape(100,3,28,28) - tensor_res = torch.tensor(dim_res, dtype=torch.float32) - return tensor_res - - -def post_process(args): - result_paths = get_save_path(args.bin_out_path) - for i in range(len(result_paths)): - result = file2tensor(result_paths[i]) - result = result.data.numpy().transpose(0, 2, 3, 1) - result = (result + 1)/2 - sample_num = 100 - image_frame_dim = int(np.floor(np.sqrt(sample_num))) - if not os.path.exists(os.path.join(args.save_path)): - os.makedirs(os.path.join(args.save_path)) - utils.save_images(result[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], - os.path.join(args.save_path,'result.png')) - print("postprocess image stored in:", os.path.join(args.save_path,'result.png')) - -if __name__ == "__main__": - args = parse_args() - post_process(args) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from CGAN import generator as G +import os +import numpy as np +import torch +import struct +import glob +import sys +import utils +import argparse + + +def parse_args(): + desc = "Pytorch implementation of CGAN collections" + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('--bin_out_path', type=str, default='', help="the output inferenced") + parser.add_argument('--save_path', type=str, default='result', help="the generated image path") + return parser.parse_args() + + +def get_save_path(bin_folder): + result_paths = [] + files_source = glob.glob(os.path.join(bin_folder,'*.bin')) + files_source.sort() + for file in files_source: + if file.endswith('.bin'): + result_path = file + result_paths.append(result_path) + return result_paths + + +def file2tensor(output_bin): + size = os.path.getsize(output_bin) + res1 = [] + L = int(size / 4) + binfile = open(output_bin, 'rb') + for i in range(L): + data = binfile.read(4) + num = struct.unpack('f', data) + res1.append(num[0]) + binfile.close() + dim_res = np.array(res1).reshape(100,3,28,28) + tensor_res = torch.tensor(dim_res, dtype=torch.float32) + return tensor_res + + +def post_process(args): + result_paths = get_save_path(args.bin_out_path) + for i in range(len(result_paths)): + result = file2tensor(result_paths[i]) + result = result.data.numpy().transpose(0, 2, 3, 1) + result = (result + 1)/2 + sample_num = 100 + image_frame_dim = int(np.floor(np.sqrt(sample_num))) + if not os.path.exists(os.path.join(args.save_path)): + os.makedirs(os.path.join(args.save_path)) + utils.save_images(result[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], + os.path.join(args.save_path,'result.png')) + print("postprocess image stored in:", os.path.join(args.save_path,'result.png')) + +if __name__ == "__main__": + args = parse_args() + post_process(args) diff --git a/ACL_PyTorch/contrib/cv/gan/CGAN/CGAN_preprocess.py b/ACL_PyTorch/contrib/cv/gan/CGAN/CGAN_preprocess.py index 35c336b7791ebbf2b51e62f2fa78d37c3a0ffb60..a2bf65dca850258f97bafb5b1b2f43a1593ba649 100644 --- a/ACL_PyTorch/contrib/cv/gan/CGAN/CGAN_preprocess.py +++ b/ACL_PyTorch/contrib/cv/gan/CGAN/CGAN_preprocess.py @@ -1,63 +1,63 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import torch -import numpy as np -import os -import argparse - - -def parse_args(): - desc = "Pytorch implementation of CGAN collections" - parser = argparse.ArgumentParser(description=desc) - parser.add_argument('--input_dim', type=int, default=62, help="The input_dim") - parser.add_argument('--output_dim', type=int, default=3, help="The output_dim") - parser.add_argument('--input_size', type=int, default=28, help="The image size of MNIST") - parser.add_argument('--class_num', type=int, default=10, help="The num of classes of MNIST") - parser.add_argument('--pth_path', type=str, default='CGAN_G.pth', help='pth model path') - parser.add_argument('--onnx_path', type=str, default="CGAN.onnx", help='onnx model path') - parser.add_argument('--save_path', type=str, default="data", help='processed data path') - return parser.parse_args() - - -# fixed noise & condition -def prep_preocess(args): - sample_num = args.class_num**2 - z_dim = args.input_dim - sample_z_ = torch.zeros((sample_num, z_dim)) - for i in range(args.class_num): - sample_z_[i * args.class_num] = torch.rand(1,z_dim) - for j in range(1, args.class_num): - sample_z_[i * args.class_num + j] = sample_z_[i * args.class_num] - - if not os.path.exists(os.path.join(args.save_path)): - os.makedirs(os.path.join(args.save_path)) - - temp = torch.zeros((args.class_num, 1)) - for i in range(args.class_num): - temp[i, 0] = i - - temp_y = torch.zeros((sample_num, 1)) - for i in range(args.class_num): - temp_y[i * args.class_num: (i + 1) * args.class_num] = temp - - sample_y_ = torch.zeros((sample_num, args.class_num)).scatter_(1, temp_y.type(torch.LongTensor), 1) - - input = torch.cat([sample_z_, sample_y_], 1) - input = np.array(input).astype(np.float32) - input.tofile(os.path.join(args.save_path, 'input' + '.bin')) -if __name__ == "__main__": - args = parse_args() - prep_preocess(args) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import torch +import numpy as np +import os +import argparse + + +def parse_args(): + desc = "Pytorch implementation of CGAN collections" + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('--input_dim', type=int, default=62, help="The input_dim") + parser.add_argument('--output_dim', type=int, default=3, help="The output_dim") + parser.add_argument('--input_size', type=int, default=28, help="The image size of MNIST") + parser.add_argument('--class_num', type=int, default=10, help="The num of classes of MNIST") + parser.add_argument('--pth_path', type=str, default='CGAN_G.pth', help='pth model path') + parser.add_argument('--onnx_path', type=str, default="CGAN.onnx", help='onnx model path') + parser.add_argument('--save_path', type=str, default="data", help='processed data path') + return parser.parse_args() + + +# fixed noise & condition +def prep_preocess(args): + sample_num = args.class_num**2 + z_dim = args.input_dim + sample_z_ = torch.zeros((sample_num, z_dim)) + for i in range(args.class_num): + sample_z_[i * args.class_num] = torch.rand(1,z_dim) + for j in range(1, args.class_num): + sample_z_[i * args.class_num + j] = sample_z_[i * args.class_num] + + if not os.path.exists(os.path.join(args.save_path)): + os.makedirs(os.path.join(args.save_path)) + + temp = torch.zeros((args.class_num, 1)) + for i in range(args.class_num): + temp[i, 0] = i + + temp_y = torch.zeros((sample_num, 1)) + for i in range(args.class_num): + temp_y[i * args.class_num: (i + 1) * args.class_num] = temp + + sample_y_ = torch.zeros((sample_num, args.class_num)).scatter_(1, temp_y.type(torch.LongTensor), 1) + + input = torch.cat([sample_z_, sample_y_], 1) + input = np.array(input).astype(np.float32) + input.tofile(os.path.join(args.save_path, 'input' + '.bin')) +if __name__ == "__main__": + args = parse_args() + prep_preocess(args) print("data preprocessed stored in",args.save_path) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/CGAN/CGAN_pth2onnx.py b/ACL_PyTorch/contrib/cv/gan/CGAN/CGAN_pth2onnx.py index 2da7ae2176242e91df33961ff720e331b3f21278..0ee964ab8aba21cdbc0694b9be36ebaf8def4a5d 100644 --- a/ACL_PyTorch/contrib/cv/gan/CGAN/CGAN_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/gan/CGAN/CGAN_pth2onnx.py @@ -1,66 +1,66 @@ - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -from CGAN import generator -import torch -import torch.onnx -import sys -from collections import OrderedDict -import argparse - - -def parse_args(): - desc = "Pytorch implementation of CGAN collections" - parser = argparse.ArgumentParser(description=desc) - parser.add_argument('--input_dim', type=int, default=62, help="The input_dim") - parser.add_argument('--output_dim', type=int, default=3, help="The output_dim") - parser.add_argument('--input_size', type=int, default=28, help="The image size of MNIST") - parser.add_argument('--class_num', type=int, default=10, help="The num of classes of MNIST") - parser.add_argument('--pth_path', type=str, default='CGAN_G.pth', help='pth model path') - parser.add_argument('--onnx_path', type=str, default="CGAN.onnx", help='onnx model path') - return parser.parse_args() - - -def proc_nodes_module(checkpoint): - new_state_dict = OrderedDict() - for k, v in checkpoint.items(): - if "module." in k: - name = k.replace("module.", "") - else: - name = k - new_state_dict[name] = v - return new_state_dict - -def pth2onnx(): - args = parse_args() - net = generator(input_dim=args.input_dim, output_dim=args.output_dim, - input_size=args.input_size, class_num=args.class_num) - model = net - checkpoint = torch.load(args.pth_path, map_location='cpu') - checkpoint = proc_nodes_module(checkpoint) - model.load_state_dict(checkpoint) - model.eval() - input_names = ["image"] - output_names = ["output1"] - #dynamic_axes = {'image': {0: '-1'}, 'output1': {0: '-1'}} - dummy_input1 = torch.randn(100, 72) - torch.onnx.export(model, dummy_input1, args.onnx_path, input_names=input_names, - output_names=output_names, opset_version=11, verbose=True) - print("this model could generete pictures, specifically digits") - print('onnx export done.') - - -if __name__ == "__main__": + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from CGAN import generator +import torch +import torch.onnx +import sys +from collections import OrderedDict +import argparse + + +def parse_args(): + desc = "Pytorch implementation of CGAN collections" + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('--input_dim', type=int, default=62, help="The input_dim") + parser.add_argument('--output_dim', type=int, default=3, help="The output_dim") + parser.add_argument('--input_size', type=int, default=28, help="The image size of MNIST") + parser.add_argument('--class_num', type=int, default=10, help="The num of classes of MNIST") + parser.add_argument('--pth_path', type=str, default='CGAN_G.pth', help='pth model path') + parser.add_argument('--onnx_path', type=str, default="CGAN.onnx", help='onnx model path') + return parser.parse_args() + + +def proc_nodes_module(checkpoint): + new_state_dict = OrderedDict() + for k, v in checkpoint.items(): + if "module." in k: + name = k.replace("module.", "") + else: + name = k + new_state_dict[name] = v + return new_state_dict + +def pth2onnx(): + args = parse_args() + net = generator(input_dim=args.input_dim, output_dim=args.output_dim, + input_size=args.input_size, class_num=args.class_num) + model = net + checkpoint = torch.load(args.pth_path, map_location='cpu') + checkpoint = proc_nodes_module(checkpoint) + model.load_state_dict(checkpoint) + model.eval() + input_names = ["image"] + output_names = ["output1"] + #dynamic_axes = {'image': {0: '-1'}, 'output1': {0: '-1'}} + dummy_input1 = torch.randn(100, 72) + torch.onnx.export(model, dummy_input1, args.onnx_path, input_names=input_names, + output_names=output_names, opset_version=11, verbose=True) + print("this model could generete pictures, specifically digits") + print('onnx export done.') + + +if __name__ == "__main__": pth2onnx() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/CGAN/README.md b/ACL_PyTorch/contrib/cv/gan/CGAN/README.md index 659e1d55f7f93ddf11181bf67e40ed28ad4539f2..dd932ea1ad4fe0d8e711e1d7e81730fcf081f408 100644 --- a/ACL_PyTorch/contrib/cv/gan/CGAN/README.md +++ b/ACL_PyTorch/contrib/cv/gan/CGAN/README.md @@ -1,265 +1,265 @@ -# CGAN推理说明 - -## 1 模型概述 - -- **[论文地址](https://arxiv.org/abs/1411.1784)** -- **[代码地址](https://github.com/znxlwm/pytorch-generative-model-collections/)** - -### 1.1 论文地址 - -[CGAN论文](https://github.com/znxlwm/pytorch-generative-model-collections/) - -### 1.2 代码地址 - -[CGAN代码](https://github.com/znxlwm/pytorch-generative-model-collections/) - -branch:master - -commitid:0d183bb5ea2fbe069e1c6806c4a9a1fd8e81656f - - -## 2 环境说明 - -- 深度学习框架 -- python第三方库 - -### 2.1 深度学习框架 - -``` -python3.7.5 -CANN 5.0.3 - -pytorch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.10.2 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.21.2 -Pillow == 8.4.0 -imageio == 2.9.0 -scipy == 1.7.1 -matplotlib==3.4.3 -``` - -## 3 模型转换 - -- pth转om模型 - -### 3.1 pth转om模型 - -1.获取pth权重文件 - -pth权重文件随附件一起打包 - -2.下载CGAN推理代码 - -``` -git clone https://gitee.com/wang-chaojiemayj/modelzoo.git -cd modelzoo -git checkout tuili -``` - -进入CGANt目录 - -``` -cd ./contrib/ACL_PyTorch/Research/cv/GAN/CGAN -``` - -3.pth模型转onnx模型,onnx转成om模型 - -pth模型转onnx模型 - -``` -python3.7 CGAN_pth2onnx.py --pth_path CGAN_G.pth --onnx_path CGAN.onnx -python3.7 -m onnxsim --input-shape="100,72" CGAN.onnx CGAN_sim.onnx -``` - -onnx转出om,并使用autotune优化om模型,这将耗费大量时间 - -``` -source env.sh(注意,latest是一个软连接,请将服务器中的/usr/local/Ascend/ascend-toolkit/latest 指向5.0.3版本的CANN包) -# 生成器一次只能生成一张图,由于模型输入是两维的,不是常用的NCHW格式,input_format采用ND形式 -atc --framework=5 --model=CGAN_sim.onnx --output=CGAN_bs1 --input_format=ND --output_type=FP32 --input_shape="image:100,72" --log=debug --soc_version=Ascend310 --auto_tune_mode="RL,GA" -``` - -## 4 数据集预处理 - -- 数据集获取 -- 数据预处理 -- 生成数据集信息文件 - -### 4.1 数据集获取 - -本模型的输入数据由随机数以及标签生成,在CGAN_preprocess.py中会生成数据并转成二进制文件,并保存在。’./prep_dataset‘目录下。 - -文件结构如下 - -``` -|CGAN--test -| | |--pth2om.sh -| | |--eval_acc_perf.sh -| | |--perf_t4.sh -| |--util.py -| |--CGAN.py -| |--gen_dataset_info.py -| |--env.sh -| |--CGAN_pth2onnx.py -| |--CGAN_preprocess.py -| |--CGAN_postprocess.py -| |--requirements.txt -| |--LICENCE -| |--modelzoo_level.txt -| |--README.md -``` - - -### 4.2 数据集预处理 - -运行CGAN_preprocess.py - -``` -python3.7 CGAN_preprocess.py --save_path ./prep_dataset -``` - -二进制文件将保存在./prep_dataset目录下 - -### 4.3 生成数据集信息文件 - -1.执行生成数据集信息脚本gen_dataset_info.py,生成数据集信息文件 - -``` -python3.7 gen_dataset_info.py --dataset_bin ./prep_dataset --info_name CGAN_prep_bin.info --width 72 --height 100 -``` - -## 5 离线推理 - -- msame概述 -- 离线推理 - -### 5.1 msame工具概述 - -msame工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.1 推理msame工具用户指南 - -### 5.2 离线推理 - -1.设置环境变量 - -``` -source env.sh -``` - -2.执行离线推理 - -``` -./msame --model "./CGAN_bs1.om" --input "./prep_dataset/input.bin" --output "./out" --outfmt BIN --loop 1 -``` - -输出结果保存在'./out'目录下 - -## 6 精度对比 - -- 离线推理精度 -- 开源精度 -- 开源精度对比 - -### 6.1 离线推理精度统计 - -将msame推理获得的输出结果进行后处理,保存为图片 - -``` -python3.7 CGAN_postprocess.py --bin_out_path ./out/20211124_090506 --save_path ./result -``` - -第一个参数为msame输出目录,’/20211113_073952‘是离线推理时根据时间自动生成的目录,请根据实际情况改变,第二个参数为保存后处理产生的图片的目录。 - -### 6.2 开源精度 - -![](README.assets/CGAN_epoch050-16371406300071.png) - -### 6.3 精度对比 - -![](README.assets/result.png) - -om模型可以正常生成数字,与pth模型生成的图片大致一致。 - -## 7 性能对比 - -- NPU性能数据 -- T4性能数据 -- 性能对比 - -### 7.1 npu性能数据 - -1.使用msame工具执行以下指令通过纯推理获得性能数据 - -``` -./msame --model "CGAN_bs1.om" --output "./out" --outfmt BIN --loop 20 -``` - -结果如下: - -``` -[INFO] get max dynamic batch size success -[INFO] output data success -Inference average time: 2.554200 ms -Inference average time without first time: 2.547842 ms -[INFO] destroy model input success. -``` - -310单卡吞吐率:1000*(1/2.547842)*4=1568fps - - -### 7.2 T4性能数据 - -在装有T4卡的服务器上测试gpu性能,测试过程请确保卡没有运行其他任务,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2。 -执行以下命令获取T4性能数据 - -``` -trtexec --onnx=CGAN.onnx --fp16 --shapes=image:100,72 --threads -``` - -``` -[11/14/2021-09:17:40] [I] GPU Compute -[11/14/2021-09:17:40] [I] min: 0.407471 ms -[11/14/2021-09:17:40] [I] max: 2.23047 ms -[11/14/2021-09:17:40] [I] mean: 0.427789 ms -[11/14/2021-09:17:40] [I] median: 0.428223 ms -[11/14/2021-09:17:40] [I] percentile: 0.4552 ms at 99% -[11/14/2021-09:17:40] [I] total compute time: 2.96629 s -``` - -T4单卡吞吐率:1000/(0.428223/1)=2337fps - -### 7.3 性能对比 - -310性能:1000*(1/2.547842)*4=1568fps - -T4性能:1000/(0.428223/1)=2337fps - -310性能低于T4性能。 - -### 7.4 性能优化 - -autotune优化,结果如下: - -![img](README.assets/wps8587.tmp-16378261403441.jpg) - -优化TransData,TransPose,结果如下: - -![img](README.assets/wps229E.tmp.jpg) - -onnxsim优化onnx,结果如下: - -![img](README.assets/wps4092.tmp.jpg) - -最终经过autotune优化,优化TransData、TransPose,onnxsim优化onnx之后,最终的结果如下: - -![image-20211125154623271](README.assets/image-20211125154623271.png) - -最终的性能为:1000/0.065243*4=1936FPS - - - +# CGAN推理说明 + +## 1 模型概述 + +- **[论文地址](https://arxiv.org/abs/1411.1784)** +- **[代码地址](https://github.com/znxlwm/pytorch-generative-model-collections/)** + +### 1.1 论文地址 + +[CGAN论文](https://github.com/znxlwm/pytorch-generative-model-collections/) + +### 1.2 代码地址 + +[CGAN代码](https://github.com/znxlwm/pytorch-generative-model-collections/) + +branch:master + +commitid:0d183bb5ea2fbe069e1c6806c4a9a1fd8e81656f + + +## 2 环境说明 + +- 深度学习框架 +- python第三方库 + +### 2.1 深度学习框架 + +``` +python3.7.5 +CANN 5.0.3 + +pytorch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.10.2 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.21.2 +Pillow == 8.4.0 +imageio == 2.9.0 +scipy == 1.7.1 +matplotlib==3.4.3 +``` + +## 3 模型转换 + +- pth转om模型 + +### 3.1 pth转om模型 + +1.获取pth权重文件 + +pth权重文件随附件一起打包 + +2.下载CGAN推理代码 + +``` +git clone https://gitee.com/wang-chaojiemayj/modelzoo.git +cd modelzoo +git checkout tuili +``` + +进入CGANt目录 + +``` +cd ./contrib/ACL_PyTorch/Research/cv/GAN/CGAN +``` + +3.pth模型转onnx模型,onnx转成om模型 + +pth模型转onnx模型 + +``` +python3.7 CGAN_pth2onnx.py --pth_path CGAN_G.pth --onnx_path CGAN.onnx +python3.7 -m onnxsim --input-shape="100,72" CGAN.onnx CGAN_sim.onnx +``` + +onnx转出om,并使用autotune优化om模型,这将耗费大量时间 + +``` +source env.sh(注意,latest是一个软连接,请将服务器中的/usr/local/Ascend/ascend-toolkit/latest 指向5.0.3版本的CANN包) +# 生成器一次只能生成一张图,由于模型输入是两维的,不是常用的NCHW格式,input_format采用ND形式 +atc --framework=5 --model=CGAN_sim.onnx --output=CGAN_bs1 --input_format=ND --output_type=FP32 --input_shape="image:100,72" --log=debug --soc_version=Ascend310 --auto_tune_mode="RL,GA" +``` + +## 4 数据集预处理 + +- 数据集获取 +- 数据预处理 +- 生成数据集信息文件 + +### 4.1 数据集获取 + +本模型的输入数据由随机数以及标签生成,在CGAN_preprocess.py中会生成数据并转成二进制文件,并保存在。’./prep_dataset‘目录下。 + +文件结构如下 + +``` +|CGAN--test +| | |--pth2om.sh +| | |--eval_acc_perf.sh +| | |--perf_t4.sh +| |--util.py +| |--CGAN.py +| |--gen_dataset_info.py +| |--env.sh +| |--CGAN_pth2onnx.py +| |--CGAN_preprocess.py +| |--CGAN_postprocess.py +| |--requirements.txt +| |--LICENCE +| |--modelzoo_level.txt +| |--README.md +``` + + +### 4.2 数据集预处理 + +运行CGAN_preprocess.py + +``` +python3.7 CGAN_preprocess.py --save_path ./prep_dataset +``` + +二进制文件将保存在./prep_dataset目录下 + +### 4.3 生成数据集信息文件 + +1.执行生成数据集信息脚本gen_dataset_info.py,生成数据集信息文件 + +``` +python3.7 gen_dataset_info.py --dataset_bin ./prep_dataset --info_name CGAN_prep_bin.info --width 72 --height 100 +``` + +## 5 离线推理 + +- msame概述 +- 离线推理 + +### 5.1 msame工具概述 + +msame工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.1 推理msame工具用户指南 + +### 5.2 离线推理 + +1.设置环境变量 + +``` +source env.sh +``` + +2.执行离线推理 + +``` +./msame --model "./CGAN_bs1.om" --input "./prep_dataset/input.bin" --output "./out" --outfmt BIN --loop 1 +``` + +输出结果保存在'./out'目录下 + +## 6 精度对比 + +- 离线推理精度 +- 开源精度 +- 开源精度对比 + +### 6.1 离线推理精度统计 + +将msame推理获得的输出结果进行后处理,保存为图片 + +``` +python3.7 CGAN_postprocess.py --bin_out_path ./out/20211124_090506 --save_path ./result +``` + +第一个参数为msame输出目录,’/20211113_073952‘是离线推理时根据时间自动生成的目录,请根据实际情况改变,第二个参数为保存后处理产生的图片的目录。 + +### 6.2 开源精度 + +![](README.assets/CGAN_epoch050-16371406300071.png) + +### 6.3 精度对比 + +![](README.assets/result.png) + +om模型可以正常生成数字,与pth模型生成的图片大致一致。 + +## 7 性能对比 + +- NPU性能数据 +- T4性能数据 +- 性能对比 + +### 7.1 npu性能数据 + +1.使用msame工具执行以下指令通过纯推理获得性能数据 + +``` +./msame --model "CGAN_bs1.om" --output "./out" --outfmt BIN --loop 20 +``` + +结果如下: + +``` +[INFO] get max dynamic batch size success +[INFO] output data success +Inference average time: 2.554200 ms +Inference average time without first time: 2.547842 ms +[INFO] destroy model input success. +``` + +310单卡吞吐率:1000*(1/2.547842)*4=1568fps + + +### 7.2 T4性能数据 + +在装有T4卡的服务器上测试gpu性能,测试过程请确保卡没有运行其他任务,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2。 +执行以下命令获取T4性能数据 + +``` +trtexec --onnx=CGAN.onnx --fp16 --shapes=image:100,72 --threads +``` + +``` +[11/14/2021-09:17:40] [I] GPU Compute +[11/14/2021-09:17:40] [I] min: 0.407471 ms +[11/14/2021-09:17:40] [I] max: 2.23047 ms +[11/14/2021-09:17:40] [I] mean: 0.427789 ms +[11/14/2021-09:17:40] [I] median: 0.428223 ms +[11/14/2021-09:17:40] [I] percentile: 0.4552 ms at 99% +[11/14/2021-09:17:40] [I] total compute time: 2.96629 s +``` + +T4单卡吞吐率:1000/(0.428223/1)=2337fps + +### 7.3 性能对比 + +310性能:1000*(1/2.547842)*4=1568fps + +T4性能:1000/(0.428223/1)=2337fps + +310性能低于T4性能。 + +### 7.4 性能优化 + +autotune优化,结果如下: + +![img](README.assets/wps8587.tmp-16378261403441.jpg) + +优化TransData,TransPose,结果如下: + +![img](README.assets/wps229E.tmp.jpg) + +onnxsim优化onnx,结果如下: + +![img](README.assets/wps4092.tmp.jpg) + +最终经过autotune优化,优化TransData、TransPose,onnxsim优化onnx之后,最终的结果如下: + +![image-20211125154623271](README.assets/image-20211125154623271.png) + +最终的性能为:1000/0.065243*4=1936FPS + + + diff --git a/ACL_PyTorch/contrib/cv/gan/CGAN/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/gan/CGAN/gen_dataset_info.py index 2d8c7b348a6e704d53ac6f36dde91b03a3196234..1669ffd408998c58af8608f95ba2a54feb991ce3 100644 --- a/ACL_PyTorch/contrib/cv/gan/CGAN/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/gan/CGAN/gen_dataset_info.py @@ -1,44 +1,44 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import glob -import argparse - -def parse_args(): - desc = "Pytorch implementation of CGAN collections" - parser = argparse.ArgumentParser(description=desc) - parser.add_argument('--dataset_bin', type=str, default='./prep_dataset', help="The input_dim") - parser.add_argument('--info_name', type=str, default='CGAN_prep_bin.info', help="The output_dim") - parser.add_argument('--width', type=str, default='78', help="The width of input ") - parser.add_argument('--height', type=str, default='100', help="The height of input") - return parser.parse_args() - -def get_bin_info(img_root_path='./data', info_name='CGAN_prep_bin.info', width='72', height='100'): - img_path = [] - files_source = glob.glob(os.path.join(img_root_path,'*.bin')) - files_source.sort() - for file in files_source: - if file.endswith('.bin'): - imgpath = file - img_path.append(imgpath) - with open(info_name, 'w') as fp: - for index in range(len(img_path)): - content = ' '.join([str(index), img_path[index], width, height]) - fp.write(content) - fp.write('\n') - - -if __name__ == '__main__': - args = parse_args() - get_bin_info(img_root_path=args.dataset_bin, info_name=args.info_name, width=args.width, height=args.height) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import glob +import argparse + +def parse_args(): + desc = "Pytorch implementation of CGAN collections" + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('--dataset_bin', type=str, default='./prep_dataset', help="The input_dim") + parser.add_argument('--info_name', type=str, default='CGAN_prep_bin.info', help="The output_dim") + parser.add_argument('--width', type=str, default='78', help="The width of input ") + parser.add_argument('--height', type=str, default='100', help="The height of input") + return parser.parse_args() + +def get_bin_info(img_root_path='./data', info_name='CGAN_prep_bin.info', width='72', height='100'): + img_path = [] + files_source = glob.glob(os.path.join(img_root_path,'*.bin')) + files_source.sort() + for file in files_source: + if file.endswith('.bin'): + imgpath = file + img_path.append(imgpath) + with open(info_name, 'w') as fp: + for index in range(len(img_path)): + content = ' '.join([str(index), img_path[index], width, height]) + fp.write(content) + fp.write('\n') + + +if __name__ == '__main__': + args = parse_args() + get_bin_info(img_root_path=args.dataset_bin, info_name=args.info_name, width=args.width, height=args.height) diff --git a/ACL_PyTorch/contrib/cv/gan/CGAN/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/gan/CGAN/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/ACL_PyTorch/contrib/cv/gan/CGAN/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/gan/CGAN/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/CGAN/test/parse.py b/ACL_PyTorch/contrib/cv/gan/CGAN/test/parse.py index c015e8771f60afd68ab3bb62d130ea9b59b75d59..94857ecd74da1f19b5a4c67d8271e8d37e07c607 100644 --- a/ACL_PyTorch/contrib/cv/gan/CGAN/test/parse.py +++ b/ACL_PyTorch/contrib/cv/gan/CGAN/test/parse.py @@ -1,39 +1,39 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import re - -def get_acc(filename): - with open(filename, 'r') as f: - lines = f.readlines() - last_line = lines[-1] - psnr = last_line.split(" ")[2] - print(filename.split('.')[0],"Average PSNR:", psnr) - - -def get_perf(filename): - with open(filename, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = 1000/float((txt_data_list[2].split(' '))[0]) * 4 - print('310 fps:{}'.format(fps)) - -if __name__ == "__main__": - - filename = sys.argv[1] - - if filename.endswith(".log"): - get_acc(filename) - elif filename.endswith(".txt"): +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import re + +def get_acc(filename): + with open(filename, 'r') as f: + lines = f.readlines() + last_line = lines[-1] + psnr = last_line.split(" ")[2] + print(filename.split('.')[0],"Average PSNR:", psnr) + + +def get_perf(filename): + with open(filename, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = 1000/float((txt_data_list[2].split(' '))[0]) * 4 + print('310 fps:{}'.format(fps)) + +if __name__ == "__main__": + + filename = sys.argv[1] + + if filename.endswith(".log"): + get_acc(filename) + elif filename.endswith(".txt"): get_perf(filename) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/CycleGAN/CycleGAN_NetLoad.py b/ACL_PyTorch/contrib/cv/gan/CycleGAN/CycleGAN_NetLoad.py index 958711ae985c3fd8440cbdb474786215b9544f44..f559cbf24021a3900a0cade6a63ce3ffbd2fce64 100644 --- a/ACL_PyTorch/contrib/cv/gan/CycleGAN/CycleGAN_NetLoad.py +++ b/ACL_PyTorch/contrib/cv/gan/CycleGAN/CycleGAN_NetLoad.py @@ -1,90 +1,90 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -from collections import OrderedDict -import torch -from models import networks_adapt as networks - - -class load_networks(): - def __init__(self, opt): - self.opt = opt - self.gpu = 0 - self.netG_A = networks.define_G(self.opt.input_nc, self.opt.output_nc, self.opt.ngf, self.opt.netG, - self.opt.norm, not self.opt.no_dropout, self.opt.init_type, self.opt.init_gain, - self.gpu) - self.netG_B = networks.define_G(self.opt.output_nc, self.opt.input_nc, self.opt.ngf, self.opt.netG, - self.opt.norm, not self.opt.no_dropout, self.opt.init_type, self.opt.init_gain, - self.gpu) - if (opt.npu == False): - self.device = torch.device('cuda:{}'.format(self.gpu)) - else: - self.device = torch.device("cpu") - - def __patch_instance_norm_state_dict(self, state_dict, module, keys, i=0): - """Fix InstanceNorm checkpoints incompatibility (prior to 0.4)""" - key = keys[i] - if i + 1 == len(keys): # at the end, pointing to a parameter/buffer - if module.__class__.__name__.startswith('InstanceNorm') and \ - (key == 'running_mean' or key == 'running_var'): - if getattr(module, key) is None: - state_dict.pop('.'.join(keys)) - if module.__class__.__name__.startswith('InstanceNorm') and \ - (key == 'num_batches_tracked'): - state_dict.pop('.'.join(keys)) - else: - self.__patch_instance_norm_state_dict(state_dict, getattr(module, key), keys, i + 1) - - def proc_nodes_module(self, checkpoint): - new_state_dict = OrderedDict() - for k, v in checkpoint.items(): - if "module." in k: - name = k.replace("module.", "") - else: - name = k - new_state_dict[name] = v - return new_state_dict - - def loadnetworks(self, net, load_path): - state_dict = torch.load(load_path, map_location=torch.device('cpu')) - state_dict = self.proc_nodes_module(state_dict) - if hasattr(state_dict, '_metadata'): - del state_dict._metadata - # patch InstanceNorm checkpoints prior to 0.4 - for key in list(state_dict.keys()): # need to copy keys here because we mutate in loop - self.__patch_instance_norm_state_dict(state_dict, net, key.split('.')) - net.load_state_dict(state_dict) - return net - - def get_networks(self, load_patha, load_pathb): - model_Ga = self.loadnetworks(self.netG_A, load_patha) - model_Gb = self.loadnetworks(self.netG_B, load_pathb) - return model_Ga, model_Gb +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +from collections import OrderedDict +import torch +from models import networks_adapt as networks + + +class load_networks(): + def __init__(self, opt): + self.opt = opt + self.gpu = 0 + self.netG_A = networks.define_G(self.opt.input_nc, self.opt.output_nc, self.opt.ngf, self.opt.netG, + self.opt.norm, not self.opt.no_dropout, self.opt.init_type, self.opt.init_gain, + self.gpu) + self.netG_B = networks.define_G(self.opt.output_nc, self.opt.input_nc, self.opt.ngf, self.opt.netG, + self.opt.norm, not self.opt.no_dropout, self.opt.init_type, self.opt.init_gain, + self.gpu) + if (opt.npu == False): + self.device = torch.device('cuda:{}'.format(self.gpu)) + else: + self.device = torch.device("cpu") + + def __patch_instance_norm_state_dict(self, state_dict, module, keys, i=0): + """Fix InstanceNorm checkpoints incompatibility (prior to 0.4)""" + key = keys[i] + if i + 1 == len(keys): # at the end, pointing to a parameter/buffer + if module.__class__.__name__.startswith('InstanceNorm') and \ + (key == 'running_mean' or key == 'running_var'): + if getattr(module, key) is None: + state_dict.pop('.'.join(keys)) + if module.__class__.__name__.startswith('InstanceNorm') and \ + (key == 'num_batches_tracked'): + state_dict.pop('.'.join(keys)) + else: + self.__patch_instance_norm_state_dict(state_dict, getattr(module, key), keys, i + 1) + + def proc_nodes_module(self, checkpoint): + new_state_dict = OrderedDict() + for k, v in checkpoint.items(): + if "module." in k: + name = k.replace("module.", "") + else: + name = k + new_state_dict[name] = v + return new_state_dict + + def loadnetworks(self, net, load_path): + state_dict = torch.load(load_path, map_location=torch.device('cpu')) + state_dict = self.proc_nodes_module(state_dict) + if hasattr(state_dict, '_metadata'): + del state_dict._metadata + # patch InstanceNorm checkpoints prior to 0.4 + for key in list(state_dict.keys()): # need to copy keys here because we mutate in loop + self.__patch_instance_norm_state_dict(state_dict, net, key.split('.')) + net.load_state_dict(state_dict) + return net + + def get_networks(self, load_patha, load_pathb): + model_Ga = self.loadnetworks(self.netG_A, load_patha) + model_Gb = self.loadnetworks(self.netG_B, load_pathb) + return model_Ga, model_Gb diff --git a/ACL_PyTorch/contrib/cv/gan/CycleGAN/CycleGAN_ReflectpadDeal.py b/ACL_PyTorch/contrib/cv/gan/CycleGAN/CycleGAN_ReflectpadDeal.py index d5f7cd809cb3a1ced445be92267cad68173bfb98..c39ee6239298d3e01934d7f6dc9d0e3b1ca6b2c0 100644 --- a/ACL_PyTorch/contrib/cv/gan/CycleGAN/CycleGAN_ReflectpadDeal.py +++ b/ACL_PyTorch/contrib/cv/gan/CycleGAN/CycleGAN_ReflectpadDeal.py @@ -1,66 +1,66 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import onnx -from parse import parse_args - - -def main(): - paser = parse_args(True, True) - opt = paser.initialize() - # Mode attr of Pad only supports constant, current is reflect ." - model = onnx.load(opt.onnx_path + opt.model_ga_onnx_name) - max_idx = len(model.graph.node) - for i in range(max_idx): - for k in range(len(model.graph.node[i].attribute)): - - if (model.graph.node[i].attribute[k].name == 'mode'): - model.graph.node[i].attribute[k].s = b'constant' - print(model.graph.node[i].attribute[k].s) - - onnx.checker.check_model(model) - onnx.save(model, opt.onnx_path + opt.model_gb_onnx_name) - - model = onnx.load(opt.onnx_path + opt.model_gb_onnx_name) - max_idx = len(model.graph.node) - for i in range(max_idx): - # if(model.graph.node[i].attribute[0].name=='Pad'): - for k in range(len(model.graph.node[i].attribute)): - - if (model.graph.node[i].attribute[k].name == 'mode'): - model.graph.node[i].attribute[k].s = b'constant' - print(model.graph.node[i].attribute[k].s) - onnx.checker.check_model(model) - onnx.save(model, opt.onnx_path + opt.model_ga_onnx_name) - - -if __name__ == '__main__': - main() +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import onnx +from parse import parse_args + + +def main(): + paser = parse_args(True, True) + opt = paser.initialize() + # Mode attr of Pad only supports constant, current is reflect ." + model = onnx.load(opt.onnx_path + opt.model_ga_onnx_name) + max_idx = len(model.graph.node) + for i in range(max_idx): + for k in range(len(model.graph.node[i].attribute)): + + if (model.graph.node[i].attribute[k].name == 'mode'): + model.graph.node[i].attribute[k].s = b'constant' + print(model.graph.node[i].attribute[k].s) + + onnx.checker.check_model(model) + onnx.save(model, opt.onnx_path + opt.model_gb_onnx_name) + + model = onnx.load(opt.onnx_path + opt.model_gb_onnx_name) + max_idx = len(model.graph.node) + for i in range(max_idx): + # if(model.graph.node[i].attribute[0].name=='Pad'): + for k in range(len(model.graph.node[i].attribute)): + + if (model.graph.node[i].attribute[k].name == 'mode'): + model.graph.node[i].attribute[k].s = b'constant' + print(model.graph.node[i].attribute[k].s) + onnx.checker.check_model(model) + onnx.save(model, opt.onnx_path + opt.model_ga_onnx_name) + + +if __name__ == '__main__': + main() diff --git a/ACL_PyTorch/contrib/cv/gan/CycleGAN/CycleGAN_onnx_export.py b/ACL_PyTorch/contrib/cv/gan/CycleGAN/CycleGAN_onnx_export.py index f6d3207e1ea99f0a19c72fe29d7509ea22d9f54e..d2faf855278b0d08b8f3aa755b14e5a6f3feb4b6 100644 --- a/ACL_PyTorch/contrib/cv/gan/CycleGAN/CycleGAN_onnx_export.py +++ b/ACL_PyTorch/contrib/cv/gan/CycleGAN/CycleGAN_onnx_export.py @@ -1,62 +1,62 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import os -import torch.onnx -from CycleGAN_NetLoad import load_networks -from parse import parse_args - -def main(): - paser = parse_args(True, True) - opt = paser.initialize() - lnetworks = load_networks(opt) - model_Ga, model_Gb = lnetworks.get_networks(opt.model_ga_path, opt.model_gb_path) - device_cpu = torch.device("cpu") - model_Ga = model_Ga.to(device_cpu) - model_Gb = model_Gb.to(device_cpu) - dummy_input = torch.randn(1, 3, 256, 256) - input_names = ["img_sat_maps"] - output_names = ["maps"] - dynamic_axes = {'img_sat_maps': {0: '-1'}, 'maps': {0: '-1'}} - input_names1 = ["img_maps_sat"] - output_names1 = ["sat"] - dynamic_axes1 = {'img_maps_sat': {0: '-1'}, 'sat': {0: '-1'}} - if (os.path.exists(opt.onnx_path) == False): - os.makedirs(opt.onnx_path) - torch.onnx.export(model_Ga, dummy_input, f=opt.onnx_path + opt.model_ga_onnx_name, verbose=True, training=False, \ - dynamic_axes=dynamic_axes, input_names=input_names, output_names=output_names, opset_version=11) - torch.onnx.export(model_Gb, dummy_input, f=opt.onnx_path + opt.model_gb_onnx_name, verbose=True, training=False, \ - dynamic_axes=dynamic_axes1, input_names=input_names1, output_names=output_names1, - opset_version=11) - - -if __name__ == '__main__': - main() +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import os +import torch.onnx +from CycleGAN_NetLoad import load_networks +from parse import parse_args + +def main(): + paser = parse_args(True, True) + opt = paser.initialize() + lnetworks = load_networks(opt) + model_Ga, model_Gb = lnetworks.get_networks(opt.model_ga_path, opt.model_gb_path) + device_cpu = torch.device("cpu") + model_Ga = model_Ga.to(device_cpu) + model_Gb = model_Gb.to(device_cpu) + dummy_input = torch.randn(1, 3, 256, 256) + input_names = ["img_sat_maps"] + output_names = ["maps"] + dynamic_axes = {'img_sat_maps': {0: '-1'}, 'maps': {0: '-1'}} + input_names1 = ["img_maps_sat"] + output_names1 = ["sat"] + dynamic_axes1 = {'img_maps_sat': {0: '-1'}, 'sat': {0: '-1'}} + if (os.path.exists(opt.onnx_path) == False): + os.makedirs(opt.onnx_path) + torch.onnx.export(model_Ga, dummy_input, f=opt.onnx_path + opt.model_ga_onnx_name, verbose=True, training=False, \ + dynamic_axes=dynamic_axes, input_names=input_names, output_names=output_names, opset_version=11) + torch.onnx.export(model_Gb, dummy_input, f=opt.onnx_path + opt.model_gb_onnx_name, verbose=True, training=False, \ + dynamic_axes=dynamic_axes1, input_names=input_names1, output_names=output_names1, + opset_version=11) + + +if __name__ == '__main__': + main() diff --git a/ACL_PyTorch/contrib/cv/gan/CycleGAN/LICENSE b/ACL_PyTorch/contrib/cv/gan/CycleGAN/LICENSE index 4e1ad12a819e98036586f198d3873933f1892331..eb1309d6c1e79cfb4dad830ae04bfca945f5568e 100644 --- a/ACL_PyTorch/contrib/cv/gan/CycleGAN/LICENSE +++ b/ACL_PyTorch/contrib/cv/gan/CycleGAN/LICENSE @@ -1,31 +1,31 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ============================================================================ \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/CycleGAN/PerformanceForGPU.py b/ACL_PyTorch/contrib/cv/gan/CycleGAN/PerformanceForGPU.py index a397087a14fe47a4ab3836069cef2e062b46521a..3e68d420e79a2ef3a7037c32cb536fb46dbdbf9b 100644 --- a/ACL_PyTorch/contrib/cv/gan/CycleGAN/PerformanceForGPU.py +++ b/ACL_PyTorch/contrib/cv/gan/CycleGAN/PerformanceForGPU.py @@ -1,173 +1,173 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import os -import time -import torchvision.transforms as transforms -from PIL import Image -import torch.onnx -from torch.utils.data import Dataset -from torchvision.datasets.folder import IMG_EXTENSIONS -from parse import parse_args -import numpy as np -from CycleGAN_NetLoad import load_networks - - -def make_power(img, base): - ow, oh = img.size - h = int(round(oh / base) * base) - w = int(round(ow / base) * base) - if h == oh and w == ow: - return img - - -def preprocess(image_shape): - process = transforms.Compose([ - transforms.Lambda(lambda img: make_power(img, base=4)), - transforms.Resize(image_shape), - transforms.ToTensor(), - transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) - return process - - -def postprocess(img_tensor): - inv_normalize = transforms.Normalize( - mean=(-1, -1, -1), - std=(2.0, 2.0, 2.0)) - to_PIL_image = transforms.ToPILImage() - return to_PIL_image(inv_normalize(img_tensor[0]).clamp(0, 1)) - - -def make_dataset(dir, max_dataset_size=float("inf")): - images = [] - assert os.path.isdir(dir), '%s is not a valid directory' % dir - - for root, _, fnames in sorted(os.walk(dir)): - for fname in fnames: - path = os.path.join(root, fname) - images.append(path) - return images[:min(max_dataset_size, len(images))] - - -def default_loader(path): - return Image.open(path).convert('RGB') - - -class ImageFolder(Dataset): - def __init__(self, root, transform=None, return_paths=False, - loader=default_loader): - imgs = make_dataset(root) - if len(imgs) == 0: - raise (RuntimeError("Found 0 images in: " + root + "\n" + - "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) - self.root = root - self.imgs = imgs - self.transform = transform - self.return_paths = return_paths - self.loader = loader - - def __getitem__(self, index): - path = self.imgs[index] - img = self.loader(path) - if self.transform is not None: - img = self.transform(img) - if self.return_paths: - return img, path - else: - return img - - def __len__(self): - return len(self.imgs) - - -def main(): - paser = parse_args(True, True) - opt = paser.initialize() - lnetworks = load_networks(opt) - bachsize = opt.batch_size - # whether to use fp16 to farword - half_data_model = True - transform = preprocess((256, 256)) - model_Ga, model_Gb = lnetworks.get_networks(opt.model_ga_path, opt.model_gb_path) - device_cuda = torch.device("cuda:%s" % (str(opt.pu_ids))) - model_Ga = model_Ga.to(device_cuda) - if (half_data_model): - model_Ga = model_Ga.half() - datasets = ImageFolder(opt.dataroot, transform) - dataloader = torch.utils.data.DataLoader(datasets, batch_size=bachsize, shuffle=True, num_workers=4) - filename = opt.gpuPerformance + 'GPU_perf_of_cycle_gan-b0_bs' + str(bachsize) + '_in_device_' + str( - opt.pu_ids) + '.txt' - f = None - if (os.path.exists(opt.gpuPerformance) == False): - os.mkdir(opt.gpuPerformance) - f = open(filename, mode='w') - else: - f = open(filename, mode='w') - timelist = [] - for i, data in enumerate(dataloader): - start_time = time.time() - data = data.to(device_cuda) - if (half_data_model): - data = data.half() - model_Ga.forward(data) - end_time = time.time() - if (i > 10): - timelist.append((end_time - start_time) * 1000) - a_time = time.asctime(time.localtime(time.time())) - timelist = np.array(timelist) - mintime = timelist.argmin() - maxtime = timelist.argmax() - meantime = np.mean(timelist) - mediantime = np.median(timelist) - alltime = np.sum(timelist) / 1000 - message = ''' - [%s],[I] GPU Compute - [%s],[I] min:%.5f ms - [%s],[I] max:%.5f ms - [%s],[I] mean:%.5f ms - [%s],[I] median:%.5f ms - [%s],[I] total compute time:%.5f s - [%s],[I] CardFPS:1000/(%f/%f)=%.2f fps - ''' % (a_time, \ - a_time, mintime, \ - a_time, maxtime, \ - a_time, meantime, \ - a_time, mediantime, \ - a_time, alltime, \ - a_time, meantime, bachsize, 1000 / (meantime / bachsize)) - - print(message) - f.write(message) - f.close() - - -if __name__ == '__main__': - main() +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import os +import time +import torchvision.transforms as transforms +from PIL import Image +import torch.onnx +from torch.utils.data import Dataset +from torchvision.datasets.folder import IMG_EXTENSIONS +from parse import parse_args +import numpy as np +from CycleGAN_NetLoad import load_networks + + +def make_power(img, base): + ow, oh = img.size + h = int(round(oh / base) * base) + w = int(round(ow / base) * base) + if h == oh and w == ow: + return img + + +def preprocess(image_shape): + process = transforms.Compose([ + transforms.Lambda(lambda img: make_power(img, base=4)), + transforms.Resize(image_shape), + transforms.ToTensor(), + transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) + return process + + +def postprocess(img_tensor): + inv_normalize = transforms.Normalize( + mean=(-1, -1, -1), + std=(2.0, 2.0, 2.0)) + to_PIL_image = transforms.ToPILImage() + return to_PIL_image(inv_normalize(img_tensor[0]).clamp(0, 1)) + + +def make_dataset(dir, max_dataset_size=float("inf")): + images = [] + assert os.path.isdir(dir), '%s is not a valid directory' % dir + + for root, _, fnames in sorted(os.walk(dir)): + for fname in fnames: + path = os.path.join(root, fname) + images.append(path) + return images[:min(max_dataset_size, len(images))] + + +def default_loader(path): + return Image.open(path).convert('RGB') + + +class ImageFolder(Dataset): + def __init__(self, root, transform=None, return_paths=False, + loader=default_loader): + imgs = make_dataset(root) + if len(imgs) == 0: + raise (RuntimeError("Found 0 images in: " + root + "\n" + + "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) + self.root = root + self.imgs = imgs + self.transform = transform + self.return_paths = return_paths + self.loader = loader + + def __getitem__(self, index): + path = self.imgs[index] + img = self.loader(path) + if self.transform is not None: + img = self.transform(img) + if self.return_paths: + return img, path + else: + return img + + def __len__(self): + return len(self.imgs) + + +def main(): + paser = parse_args(True, True) + opt = paser.initialize() + lnetworks = load_networks(opt) + bachsize = opt.batch_size + # whether to use fp16 to farword + half_data_model = True + transform = preprocess((256, 256)) + model_Ga, model_Gb = lnetworks.get_networks(opt.model_ga_path, opt.model_gb_path) + device_cuda = torch.device("cuda:%s" % (str(opt.pu_ids))) + model_Ga = model_Ga.to(device_cuda) + if (half_data_model): + model_Ga = model_Ga.half() + datasets = ImageFolder(opt.dataroot, transform) + dataloader = torch.utils.data.DataLoader(datasets, batch_size=bachsize, shuffle=True, num_workers=4) + filename = opt.gpuPerformance + 'GPU_perf_of_cycle_gan-b0_bs' + str(bachsize) + '_in_device_' + str( + opt.pu_ids) + '.txt' + f = None + if (os.path.exists(opt.gpuPerformance) == False): + os.mkdir(opt.gpuPerformance) + f = open(filename, mode='w') + else: + f = open(filename, mode='w') + timelist = [] + for i, data in enumerate(dataloader): + start_time = time.time() + data = data.to(device_cuda) + if (half_data_model): + data = data.half() + model_Ga.forward(data) + end_time = time.time() + if (i > 10): + timelist.append((end_time - start_time) * 1000) + a_time = time.asctime(time.localtime(time.time())) + timelist = np.array(timelist) + mintime = timelist.argmin() + maxtime = timelist.argmax() + meantime = np.mean(timelist) + mediantime = np.median(timelist) + alltime = np.sum(timelist) / 1000 + message = ''' + [%s],[I] GPU Compute + [%s],[I] min:%.5f ms + [%s],[I] max:%.5f ms + [%s],[I] mean:%.5f ms + [%s],[I] median:%.5f ms + [%s],[I] total compute time:%.5f s + [%s],[I] CardFPS:1000/(%f/%f)=%.2f fps + ''' % (a_time, \ + a_time, mintime, \ + a_time, maxtime, \ + a_time, meantime, \ + a_time, mediantime, \ + a_time, alltime, \ + a_time, meantime, bachsize, 1000 / (meantime / bachsize)) + + print(message) + f.write(message) + f.close() + + +if __name__ == '__main__': + main() diff --git a/ACL_PyTorch/contrib/cv/gan/CycleGAN/Readme.md b/ACL_PyTorch/contrib/cv/gan/CycleGAN/Readme.md index eb51b624e3540f9840f24e876cd568b48c1ae295..8b8ea739ba971eee5560ecc07cd59a1b21522167 100644 --- a/ACL_PyTorch/contrib/cv/gan/CycleGAN/Readme.md +++ b/ACL_PyTorch/contrib/cv/gan/CycleGAN/Readme.md @@ -1,619 +1,619 @@ -\# CycleGAN模型端到端推理指导 - -\- [1 模型概述](#1-模型概述) - -​ \- [1.1 论文地址](#11-论文地址) - -​ \- [1.2 代码地址](#12-代码地址) - -\- [2 环境说明](#2-环境说明) - -​ \- [2.1 深度学习框架](#21-深度学习框架) - -​ \- [2.2 python第三方库](#22-python第三方库) - -\- [3 模型转换](#3-模型转换) - -​ \- [3.1 pth转onnx模型](#31-pth转onnx模型) - -​ \- [3.2 onnx转om模型](#32-onnx转om模型) - -\- [4 数据集预处理](#4-数据集预处理) - -​ \- [4.1 数据集获取](#41-数据集获取) - -​ \- [4.2 数据集预处理](#42-数据集预处理) - -​ \- [4.3 生成数据集信息文件](#43-生成数据集信息文件) - -\- [5 离线推理](#5-离线推理) - -​ \- [5.1 benchmark工具概述](#51-benchmark工具概述) - -​ \- [5.2 离线推理](#52-离线推理) - -\- [6 精度对比](#6-精度对比) - -​ \- [6.1 离线推理精度统计](#61-离线推理精度统计) - -​ \- [6.2 在线推理精度](#62-在线推理精度) - -​ \- [6.3 精度对比](#63-精度对比) - -\- [7 性能对比](#7-性能对比) - -​ \- [7.1 npu性能数据](#71-npu性能数据) - -​ \- [7.2 性能优化](#73-性能优化) - -​ \- [7.2.1 优化TransData,修改five_2_four.py](#731-优化TransData,修改five_2_four.py) - -\## 1 模型概述 - - - -\- **[论文地址](#11-论文地址)** - - - -\- **[代码地址](#12-代码地址)** - - - -\### 1.1 论文地址 - - - -[CycleGAN论文]( https://arxiv.org/pdf/1703.10593v7.pdf) - -我们专注于本文中风格转换中的地图转换。它通过一种无监督的少样本的学习方式,能够实现航拍地图和卫星地图之间的相互转换。 - -\### 1.2 代码地址 - -[CycleGAN代码]( https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) - -branch:master - -commit_id:略 - -备注:commit_id是指基于该次提交时的模型代码做推理,通常选择稳定版本的最后一次提交,或代码仓最新的一次提交 - - - -\## 2 环境说明 - - - -\- **[深度学习框架](#21-深度学习框架)** - - - -\- **[python第三方库](#22-python第三方库)** - - - -\### 2.1 深度学习框架 - -\``` - -``` -CANN 5.0.2.alpha003 - -torch == 1.5.0 - -torchvision == 0.9.0 - -onnx==1.7.0 - -onnx-simplifier==0.3.6 - -onnxconverter-common==1.6.1 - -onnxoptimizer==0.2.6 - -onnxruntime==1.6.0 - -tensorboard==1.15.0 - -tensorflow==1.15.0 - -tensorflow-estimator ==1.15.1 - -termcolor==1.1.0 -``` - -\``` - - - -\### 2.2 python第三方库 - -\``` - -``` -numpy == 1.16.6 - -Pillow == 8.2.0 - -opencv-python == 4.5.2.52 - -sympy == 1.4 - -decorator == 4.4.2 - -requests == 2.22.0 - -tqdm == 4.61.0 - -PyYAML == 5.4.1 -``` - -\``` - - - -**说明:** - -\> X86架构:pytorch torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3 install 包名 安装 - -\> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3 install 包名 安装 - - - -\## 3 模型转换 - - - -\- **[pth转onnx模型](#31-pth转onnx模型)** - - - -\- **[onnx转om模型](#32-onnx转om模型)** - - - -\### 3.1 pth转onnx模型 - -1.下载开源模型代码,安装必要的依赖库,并修改模型代码后安装 - -\``` - -``` -git clone https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix -cd pytorch-CycleGAN-and-pix2pix -pip3 install -r requirements.txt -``` - -\``` - - - -2.下载pth权重文件 - - - -\- [官方CycleGAN pth权重文件](http://efrosgans.eecs.berkeley.edu/cyclegan/pretrained_models/) - -\- [获取A800-9000训练的pth文件,该链接为百度网盘链接,提取码为:1234](https://pan.baidu.com/s/1YqHkce2wUw-W8_VY9dYD_w)) - -3.编写pth2onnx脚本*CycleGAN_onnx_export.py* - - **说明:** - -\>注意目前ATC支持的onnx算子版本为11 - - - -4.执行*CycleGAN_onnx_export.py*脚本,生成onnx模型文件 - -\``` - -``` -python3 CycleGAN_onnx_export.py \ - ---model_ga_path=./checkpoints/maps_cycle_gan/latest_net_G_A.pth\ - ---model_gb_path=./checkpoints/maps_cycle_gan/latest_net_G_B.pth\ - ---onnx_path=./onnxmodel/ \ - ---model_ga_onnx_name=model_Ga.onnx \ - ---model_gb_onnx_name=model_Gb.onnx \ -``` - -\``` - - **模型转换要点:** - -\- 开源仓中的生成器采用的padding类型为ReflectionPad2d,由于在转om格式模型的时候,会出现算子不兼容问题导致om模型转换失败,这里我们将改padding类型替换为ZeroPad2d。如果您任然坚持使用ReflectionPad2d,请在转换Onnx格式后运行 - - ' ' ' - -``` -python3 CycleGAN_ReflectpadDeal.py \ - ---onnx_path=./onnxmodel/ \ - ---model_ga_onnx_name=model_Ga.onnx \ - ---model_gb_onnx_name=model_Gb.onnx \ -``` - -' ' ' - -该脚本会将ReflectionPad2d中的属性替换为constant,这样做的结果会导致模型执行推理时会出现边缘模糊,详情请见issue链接https://e.gitee.com/HUAWEI-ASCEND/issues/list?issue=I4467L#note_6141945 - - - -\### 3.2 onnx转om模型 - - - -1.设置环境变量 - -\``` - -``` -source env.sh -``` - -\``` - -\- 根据实际情况修改env.sh中的install_path=/usr/local/Ascend/ascend-toolkit/latest变量 - -\- 执行脚本前先执行指令 dos2unix * - - - -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN 5.0.1 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) - -\``` - -``` -atc --framework=5 --model=./onnxmodel/model_Ga.onnx --output=Cons_Ga_aipp512_b0_bs1 --input_format=NCHW --input_shape="img_sat_maps:1,3,256,256" --out_nodes="Tanh_156:0" --log=debug --soc_version=Ascend310 --insert_op_conf=aipp_CycleGAN_pth.config - -atc --framework=5 --model=./onnxmodel/model_Gb.onnx --output=Cons_Gb_aipp512_b0_bs1 --input_format=NCHW --input_shape="img_maps_sat:1,3,256,256" --out_nodes="Tanh_156:0" --log=debug --soc_version=Ascend310 --insert_op_conf=aipp_CycleGAN_pth.config -``` - -\``` - -\- 说明 - - \- input_shape参数可通过Netron工具查看输入节点的名称和shape, 与pth转onnx步骤中的参数一致 - - \- out_nodes为指定输出节点, 通过Netron可以看到onnx文件有四个输出, 以自测转换的onnx为例 - - 如果在转onnx时使用的不是默认路径,请将—model中的参数设置为onnx格式模型所在的路径 - - - - - -\## 4 数据集预处理 - - - -\- **[数据集获取](#41-数据集获取)** - - - -\- **[数据集预处理](#42-数据集预处理)** - - - -\- **[生成数据集信息文件](#43-生成数据集信息文件)** - - - -\### 4.1 数据集获取 - -该模型使用[maps数据集](http://efrosgans.eecs.berkeley.edu/cyclegan/datasets/maps.zip)的testA和testB各1098张验证集进行测试,因为航拍地图和卫星地图之间的相互转换的两个生成器模型结构一样,这里我们只需要保证其中一个生辰器精度和性能跟上就行,这里我们以model_Ga.onnx和testA为推理的模型和测试数据集。 - - - -\### 4.2 数据集预处理 - -1.生成数据集信息文件脚本gen_dataset_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 - -\``` - -``` -python3 gen_dataset_info.py \ - ---src_path_testA=./datasets/maps/testA/ \ - ---save_pathTestA_dst=datasetsDst/maps/testA/ \ - ---dataTestA_infoName=testA_prep.info \ - ---src_path_testB=./datasets/maps/testB/ \ - ---save_pathTestB_dst=./datasetsDst/maps/testB/ \ - ---dataTestB_infoName=testB_prep.info -``` - -' ' ' - -\## 5 离线推理 - - - -\- **[benchmark工具概述](#51-benchmark工具概述)** - - - -\- **[离线推理](#52-离线推理)** - - - -\### 5.1 benchmark工具概述 - - - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN 5.0.1 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) - -\### 5.2 离线推理 - -1.设置环境变量 - -\``` - -source env.sh - -\``` - -2.执行离线推理 - -\- benchmark工具区分arm64和x86_64, 对应分别为./benchmark.aarch64和./benchmark.x86_64, 示例中均以x86_64环境为例 - -\- 将benchmark工具去相应路径获取后放到env.sh同级目录下,加上执行权限chmod +x benchmark.XX - -''' - -``` -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=Cons_Ga_aipp512_b0_bs1.om -input_text_path=testA_prep.info -input_width=512 -input_height=512 -output_binary=true -useDvpp=true - -./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=1 -om_path=Cons_Gb_aipp512_b0_bs1.om -input_text_path=testB_prep.info -input_width=512 -input_height=512 -output_binary=true -useDvpp=true -``` - -输出结果默认保存在当前目录result/dumpOutput_devicex,每个输入对应的输出对应一个_x.bin文件。 - -''' - -\## 6 精度对比 - -\### 6.1 离线推理精度统计 - -由于该模型的精度在论文中是由人眼分辨,所以这里我们那Onnx和om模型输出的平均余弦相似度来替代精度,只需要保证Onnx格式模型的效果和论文中的一致并且om和onnx格式模型的余弦相似度在99%左右就精度达标。执行eval_acc_py.py脚本计算平均余弦相似度 : - -\``` - -``` - -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=Cons_Ga_aipp512_b0_bs1.om -input_text_path=testA_prep.info -input_width=512 -input_height=512 -output_binary=true -useDvpp=true #如果已经执行这一步请忽略 -python3 eval_acc.py \ ---dataroot=./datasets/maps\ ---npu_bin_file=./result/dumpOutput_device0/ -``` - -\``` - -\### 6.2精度对比 - -![1](C:\Users\Administrator\Desktop\1.png) - -将得到的om离线模型推理精度与在线推理精度对比,推理精度与在线推理精度一致,精度达标。 - - **精度调试:** - -使用onnxruntime测试onnx离线推理精度与om一致。 - -\## 7 性能对比 - -\- **[npu性能数据](#71-npu性能数据)** - -\- **[性能优化](#73-性能优化)** - -\### 7.1 npu性能数据 - -这里用batch1和batch16做示例 - - - -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据,也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式,模型的测试脚本使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准。 - - - -1.benchmark工具在整个数据集上推理获得性能数据 - - - -以batch1为例,benchmark工具在整个数据集上推理,执行下面命令。 - -``` -atc --framework=5 --model=./onnxmodel/model_Ga.onnx --output=Cons_Ga_aipp512_b0_bs1 --input_format=NCHW --input_shape="img_sat_maps:1,3,256,256" --out_nodes="Tanh_156:0" --log=debug --soc_version=Ascend310 --insert_op_conf=aipp_CycleGAN_pth.config #如果已经转换,请忽略 -python3.7 gen_dataset_info.py #如果这一步已经执行,可直接执行下一步推理 -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=Cons_Ga_aipp512_b0_bs1.om -input_text_path=testA_prep.info -input_width=512 -input_height=512 -output_binary=true -useDvpp=true -``` - -\``` - - ![输入图片说明](https://images.gitee.com/uploads/images/2021/0914/121624_f45173ef_9486012.png "屏幕截图.png") - - - -Interface throughputRate: 10.7,10.7乘以4,是310单卡吞吐率 - - \``` - -2.benchmark纯推理功能测得性能数据 - - - -batch1的性能: - - 测试npu性能要确保device空闲,使用npu-smi info命令可查看device是否在运行其它推理任务 - -\``` - -``` -./benchmark.x86_64 -round=20 -om_path=Cons_Ga_aipp512_b0_bs1.om -device_id=0 -batch_size=1 -``` - -``` - -执行20次纯推理取均值,统计吞吐率与其倒数时延(benchmark的时延是单个数据的推理时间),npu性能是一个device执行的结果 - - ![输入图片说明](https://images.gitee.com/uploads/images/2021/0914/121641_4ed82b8d_9486012.png "屏幕截图.png") -``` - - -Batch16的性能: - -``` -./benchmark.x86_64 -round=20 -om_path=model_Ga-b0_bs16.om -device_id=1 -batch_size=16 -``` - -![输入图片说明](https://images.gitee.com/uploads/images/2021/0914/121659_6331aa3d_9486012.png "屏幕截图.png") - -\### 7.2 性能优化 - -``` -**性能优化** - -\- profiling性能分析方法 - -​ CANN C20及以后的版本profiling使用方法 - -新建/home/zlz/CycleGan_deal/perProblem_detec/run文件,内容如下: - -``` -``` -# /usr/local/Ascend/ascend-toolkit/ /usr/local/Ascend/ascend-toolkit/ -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -./benchmark.x86_64 -round=20 -om_path=/home/zlz/cyclegan/model_Ga1-b0_bs16.om -device_id=0 -batch_size=16 -``` - -然后执行如下命令: -``` -chmod 777 /home/zlz/CycleGan_deal/perProblem_detec/run -cd /usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/toolkit/tools/profiler/bin -./msprof --output=/home/zlz/CycleGan_deal/perProblem_detec/perPro/ --application=/home/zlz/CycleGan_deal/perProblem_detec/run --sys-hardware-mem=on --sys-cpu-profiling=on --sys-profiling=on --sys-pid-profiling=on --sys-io-profiling=on --dvpp-profiling=on -cd /usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/toolkit/tools/profiler/profiler_tool/analysis/msprof/ -# 生成的profiling目录 -python3.7 msprof.py import -dir/home/zlz/CycleGan_deal/perProblem_detec/perPro/ -python3.7 msprof.py export summary -dir /home/zlz/CycleGan_deal/perProblem_detec/perPro/ -#生成的profiling目录 --iteration-id 1 -python3.7 msprof.py export timeline -dir /home/zlz/CycleGan_deal/perProblem_detec/perPro/ - -``` -目录 - -\- 性能调优测试版本:CANN 5.0.2.alpha003 - -\- 性能优化过程主要对trans_Data算子进行优化,结合profiling分析,性能有提升: - -\#### 7.3.1 five_2_four.py优化方法 - - 在环境变量env.sh中export install_path=/usr/local/Ascend/ascend-toolkit/latest路径下查找five_2_four.py文件,路径一般为 - -\``` - -/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/opp/op_impl/built-in/ai_core/tbe/impl/five_2_four.py - -\``` - -修改five_2_four.py文件,将TransData算子的output shape加入five_2_four函数行中,示例如下: - -\``` - - ... - from impl import trans_data_negative_target_ntc - - @util.check_input_type(dict, dict, str, str, str) - - def five_2_four(src, dst, src_format, dst_format, kernel_name='five_2_four'): - elif dst_format.lower() == "nhwc" and dst_shape in [[10000, 63, 63, 1], [10000, 127, 127, 1], [16, 19, 19, 486], - - [16, 10, 10, 486], [16, 38, 38, 324], [16, 5, 5, 486], - - [16, 3, 3, 324], [8, 19, 19, 486], [8, 10, 10, 486], - - [8, 38, 38, 324], [8, 5, 5, 486], [8, 3, 3, 324], - - [100, 28, 28, 91]]: - - trans_data_negative_target_tc.trans_data_negative_target_tc(src, dst, src_format, dst_format, kernel_name) - - elif dst_format.lower() == "nchw" and dst_shape in [[2560, 512, 4, 26], [2560, 512, 1, 26], [2560, 256, 8, 25], - - [16, 240, 7, 7], [16, 120, 14, 14], [1,19,1024,2048], [4,19,1024,2048]]: - - print("=================================") - - print("ntc dst shape:", dst_shape) - - print("=================================") - - trans_data_negative_target_ntc.trans_data_negative_target_ntc(src, dst, src_format, dst_format, kernel_name) - ... - -\``` - -\- 不同的batch_size,添加的shape不一样,shape大小为[*,19,256,256 ] ,以本模型为例,只测试batch1和batch16,因此添加的shape为[1,19,256,256],[4,19,256,256] - -修改完成后,重新转换生成om文件,atc转换过程会打印添加的日志,如下: - -\```![输入图片说明](https://images.gitee.com/uploads/images/2021/0914/121715_d94592ad_9486012.png "屏幕截图.png") - - \``` - -纯推理测试结果: - -\``` - -bs1: - - ![输入图片说明](https://images.gitee.com/uploads/images/2021/0914/121721_50c95bdd_9486012.png "屏幕截图.png") - -Bs16: - -![输入图片说明](https://images.gitee.com/uploads/images/2021/0914/122022_a16e9ff5_9486012.png "屏幕截图.png") - -\``` - - - -用生成的om文件做精度后处理,测得bs1和bs16与之前的Onnx模型做余弦相似度高于99%,精度无损失、 - -\``` - -\#### 7.3.1 总结 - -优化方案共包括五种: - -(1)优化TransData,修改five_2_four.py - -(2)输出节点由float32改为float16 - -(3)模型中Resize节点的mode由双线性为最近邻 - -(4)将PadV3D进行算子融合 - -(5)优化FrameworkOP框架 - -由于在蓝区测试的版本CANN 5.0.2.alpha003中,已经实现了PadV3D算子融合,因此测试过程默认已经优化。同时方案(5)暂时无法实现,因此也无法比对性能。 - -结论: - -\- 因为关键算子性能差,性能暂时无法达标。 - +\# CycleGAN模型端到端推理指导 + +\- [1 模型概述](#1-模型概述) + +​ \- [1.1 论文地址](#11-论文地址) + +​ \- [1.2 代码地址](#12-代码地址) + +\- [2 环境说明](#2-环境说明) + +​ \- [2.1 深度学习框架](#21-深度学习框架) + +​ \- [2.2 python第三方库](#22-python第三方库) + +\- [3 模型转换](#3-模型转换) + +​ \- [3.1 pth转onnx模型](#31-pth转onnx模型) + +​ \- [3.2 onnx转om模型](#32-onnx转om模型) + +\- [4 数据集预处理](#4-数据集预处理) + +​ \- [4.1 数据集获取](#41-数据集获取) + +​ \- [4.2 数据集预处理](#42-数据集预处理) + +​ \- [4.3 生成数据集信息文件](#43-生成数据集信息文件) + +\- [5 离线推理](#5-离线推理) + +​ \- [5.1 benchmark工具概述](#51-benchmark工具概述) + +​ \- [5.2 离线推理](#52-离线推理) + +\- [6 精度对比](#6-精度对比) + +​ \- [6.1 离线推理精度统计](#61-离线推理精度统计) + +​ \- [6.2 在线推理精度](#62-在线推理精度) + +​ \- [6.3 精度对比](#63-精度对比) + +\- [7 性能对比](#7-性能对比) + +​ \- [7.1 npu性能数据](#71-npu性能数据) + +​ \- [7.2 性能优化](#73-性能优化) + +​ \- [7.2.1 优化TransData,修改five_2_four.py](#731-优化TransData,修改five_2_four.py) + +\## 1 模型概述 + + + +\- **[论文地址](#11-论文地址)** + + + +\- **[代码地址](#12-代码地址)** + + + +\### 1.1 论文地址 + + + +[CycleGAN论文]( https://arxiv.org/pdf/1703.10593v7.pdf) + +我们专注于本文中风格转换中的地图转换。它通过一种无监督的少样本的学习方式,能够实现航拍地图和卫星地图之间的相互转换。 + +\### 1.2 代码地址 + +[CycleGAN代码]( https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) + +branch:master + +commit_id:略 + +备注:commit_id是指基于该次提交时的模型代码做推理,通常选择稳定版本的最后一次提交,或代码仓最新的一次提交 + + + +\## 2 环境说明 + + + +\- **[深度学习框架](#21-深度学习框架)** + + + +\- **[python第三方库](#22-python第三方库)** + + + +\### 2.1 深度学习框架 + +\``` + +``` +CANN 5.0.2.alpha003 + +torch == 1.5.0 + +torchvision == 0.9.0 + +onnx==1.7.0 + +onnx-simplifier==0.3.6 + +onnxconverter-common==1.6.1 + +onnxoptimizer==0.2.6 + +onnxruntime==1.6.0 + +tensorboard==1.15.0 + +tensorflow==1.15.0 + +tensorflow-estimator ==1.15.1 + +termcolor==1.1.0 +``` + +\``` + + + +\### 2.2 python第三方库 + +\``` + +``` +numpy == 1.16.6 + +Pillow == 8.2.0 + +opencv-python == 4.5.2.52 + +sympy == 1.4 + +decorator == 4.4.2 + +requests == 2.22.0 + +tqdm == 4.61.0 + +PyYAML == 5.4.1 +``` + +\``` + + + +**说明:** + +\> X86架构:pytorch torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3 install 包名 安装 + +\> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3 install 包名 安装 + + + +\## 3 模型转换 + + + +\- **[pth转onnx模型](#31-pth转onnx模型)** + + + +\- **[onnx转om模型](#32-onnx转om模型)** + + + +\### 3.1 pth转onnx模型 + +1.下载开源模型代码,安装必要的依赖库,并修改模型代码后安装 + +\``` + +``` +git clone https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix +cd pytorch-CycleGAN-and-pix2pix +pip3 install -r requirements.txt +``` + +\``` + + + +2.下载pth权重文件 + + + +\- [官方CycleGAN pth权重文件](http://efrosgans.eecs.berkeley.edu/cyclegan/pretrained_models/) + +\- [获取A800-9000训练的pth文件,该链接为百度网盘链接,提取码为:1234](https://pan.baidu.com/s/1YqHkce2wUw-W8_VY9dYD_w)) + +3.编写pth2onnx脚本*CycleGAN_onnx_export.py* + + **说明:** + +\>注意目前ATC支持的onnx算子版本为11 + + + +4.执行*CycleGAN_onnx_export.py*脚本,生成onnx模型文件 + +\``` + +``` +python3 CycleGAN_onnx_export.py \ + +--model_ga_path=./checkpoints/maps_cycle_gan/latest_net_G_A.pth\ + +--model_gb_path=./checkpoints/maps_cycle_gan/latest_net_G_B.pth\ + +--onnx_path=./onnxmodel/ \ + +--model_ga_onnx_name=model_Ga.onnx \ + +--model_gb_onnx_name=model_Gb.onnx \ +``` + +\``` + + **模型转换要点:** + +\- 开源仓中的生成器采用的padding类型为ReflectionPad2d,由于在转om格式模型的时候,会出现算子不兼容问题导致om模型转换失败,这里我们将改padding类型替换为ZeroPad2d。如果您任然坚持使用ReflectionPad2d,请在转换Onnx格式后运行 + + ' ' ' + +``` +python3 CycleGAN_ReflectpadDeal.py \ + +--onnx_path=./onnxmodel/ \ + +--model_ga_onnx_name=model_Ga.onnx \ + +--model_gb_onnx_name=model_Gb.onnx \ +``` + +' ' ' + +该脚本会将ReflectionPad2d中的属性替换为constant,这样做的结果会导致模型执行推理时会出现边缘模糊,详情请见issue链接https://e.gitee.com/HUAWEI-ASCEND/issues/list?issue=I4467L#note_6141945 + + + +\### 3.2 onnx转om模型 + + + +1.设置环境变量 + +\``` + +``` +source env.sh +``` + +\``` + +\- 根据实际情况修改env.sh中的install_path=/usr/local/Ascend/ascend-toolkit/latest变量 + +\- 执行脚本前先执行指令 dos2unix * + + + +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN 5.0.1 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373) + +\``` + +``` +atc --framework=5 --model=./onnxmodel/model_Ga.onnx --output=Cons_Ga_aipp512_b0_bs1 --input_format=NCHW --input_shape="img_sat_maps:1,3,256,256" --out_nodes="Tanh_156:0" --log=debug --soc_version=Ascend310 --insert_op_conf=aipp_CycleGAN_pth.config + +atc --framework=5 --model=./onnxmodel/model_Gb.onnx --output=Cons_Gb_aipp512_b0_bs1 --input_format=NCHW --input_shape="img_maps_sat:1,3,256,256" --out_nodes="Tanh_156:0" --log=debug --soc_version=Ascend310 --insert_op_conf=aipp_CycleGAN_pth.config +``` + +\``` + +\- 说明 + + \- input_shape参数可通过Netron工具查看输入节点的名称和shape, 与pth转onnx步骤中的参数一致 + + \- out_nodes为指定输出节点, 通过Netron可以看到onnx文件有四个输出, 以自测转换的onnx为例 + + 如果在转onnx时使用的不是默认路径,请将—model中的参数设置为onnx格式模型所在的路径 + + + + + +\## 4 数据集预处理 + + + +\- **[数据集获取](#41-数据集获取)** + + + +\- **[数据集预处理](#42-数据集预处理)** + + + +\- **[生成数据集信息文件](#43-生成数据集信息文件)** + + + +\### 4.1 数据集获取 + +该模型使用[maps数据集](http://efrosgans.eecs.berkeley.edu/cyclegan/datasets/maps.zip)的testA和testB各1098张验证集进行测试,因为航拍地图和卫星地图之间的相互转换的两个生成器模型结构一样,这里我们只需要保证其中一个生辰器精度和性能跟上就行,这里我们以model_Ga.onnx和testA为推理的模型和测试数据集。 + + + +\### 4.2 数据集预处理 + +1.生成数据集信息文件脚本gen_dataset_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 + +\``` + +``` +python3 gen_dataset_info.py \ + +--src_path_testA=./datasets/maps/testA/ \ + +--save_pathTestA_dst=datasetsDst/maps/testA/ \ + +--dataTestA_infoName=testA_prep.info \ + +--src_path_testB=./datasets/maps/testB/ \ + +--save_pathTestB_dst=./datasetsDst/maps/testB/ \ + +--dataTestB_infoName=testB_prep.info +``` + +' ' ' + +\## 5 离线推理 + + + +\- **[benchmark工具概述](#51-benchmark工具概述)** + + + +\- **[离线推理](#52-离线推理)** + + + +\### 5.1 benchmark工具概述 + + + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN 5.0.1 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) + +\### 5.2 离线推理 + +1.设置环境变量 + +\``` + +source env.sh + +\``` + +2.执行离线推理 + +\- benchmark工具区分arm64和x86_64, 对应分别为./benchmark.aarch64和./benchmark.x86_64, 示例中均以x86_64环境为例 + +\- 将benchmark工具去相应路径获取后放到env.sh同级目录下,加上执行权限chmod +x benchmark.XX + +''' + +``` +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=Cons_Ga_aipp512_b0_bs1.om -input_text_path=testA_prep.info -input_width=512 -input_height=512 -output_binary=true -useDvpp=true + +./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=1 -om_path=Cons_Gb_aipp512_b0_bs1.om -input_text_path=testB_prep.info -input_width=512 -input_height=512 -output_binary=true -useDvpp=true +``` + +输出结果默认保存在当前目录result/dumpOutput_devicex,每个输入对应的输出对应一个_x.bin文件。 + +''' + +\## 6 精度对比 + +\### 6.1 离线推理精度统计 + +由于该模型的精度在论文中是由人眼分辨,所以这里我们那Onnx和om模型输出的平均余弦相似度来替代精度,只需要保证Onnx格式模型的效果和论文中的一致并且om和onnx格式模型的余弦相似度在99%左右就精度达标。执行eval_acc_py.py脚本计算平均余弦相似度 : + +\``` + +``` + +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=Cons_Ga_aipp512_b0_bs1.om -input_text_path=testA_prep.info -input_width=512 -input_height=512 -output_binary=true -useDvpp=true #如果已经执行这一步请忽略 +python3 eval_acc.py \ +--dataroot=./datasets/maps\ +--npu_bin_file=./result/dumpOutput_device0/ +``` + +\``` + +\### 6.2精度对比 + +![1](C:\Users\Administrator\Desktop\1.png) + +将得到的om离线模型推理精度与在线推理精度对比,推理精度与在线推理精度一致,精度达标。 + + **精度调试:** + +使用onnxruntime测试onnx离线推理精度与om一致。 + +\## 7 性能对比 + +\- **[npu性能数据](#71-npu性能数据)** + +\- **[性能优化](#73-性能优化)** + +\### 7.1 npu性能数据 + +这里用batch1和batch16做示例 + + + +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据,也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式,模型的测试脚本使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准。 + + + +1.benchmark工具在整个数据集上推理获得性能数据 + + + +以batch1为例,benchmark工具在整个数据集上推理,执行下面命令。 + +``` +atc --framework=5 --model=./onnxmodel/model_Ga.onnx --output=Cons_Ga_aipp512_b0_bs1 --input_format=NCHW --input_shape="img_sat_maps:1,3,256,256" --out_nodes="Tanh_156:0" --log=debug --soc_version=Ascend310 --insert_op_conf=aipp_CycleGAN_pth.config #如果已经转换,请忽略 +python3.7 gen_dataset_info.py #如果这一步已经执行,可直接执行下一步推理 +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=Cons_Ga_aipp512_b0_bs1.om -input_text_path=testA_prep.info -input_width=512 -input_height=512 -output_binary=true -useDvpp=true +``` + +\``` + + ![输入图片说明](https://images.gitee.com/uploads/images/2021/0914/121624_f45173ef_9486012.png "屏幕截图.png") + + + +Interface throughputRate: 10.7,10.7乘以4,是310单卡吞吐率 + + \``` + +2.benchmark纯推理功能测得性能数据 + + + +batch1的性能: + + 测试npu性能要确保device空闲,使用npu-smi info命令可查看device是否在运行其它推理任务 + +\``` + +``` +./benchmark.x86_64 -round=20 -om_path=Cons_Ga_aipp512_b0_bs1.om -device_id=0 -batch_size=1 +``` + +``` + +执行20次纯推理取均值,统计吞吐率与其倒数时延(benchmark的时延是单个数据的推理时间),npu性能是一个device执行的结果 + + ![输入图片说明](https://images.gitee.com/uploads/images/2021/0914/121641_4ed82b8d_9486012.png "屏幕截图.png") +``` + + +Batch16的性能: + +``` +./benchmark.x86_64 -round=20 -om_path=model_Ga-b0_bs16.om -device_id=1 -batch_size=16 +``` + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0914/121659_6331aa3d_9486012.png "屏幕截图.png") + +\### 7.2 性能优化 + +``` +**性能优化** + +\- profiling性能分析方法 + +​ CANN C20及以后的版本profiling使用方法 + +新建/home/zlz/CycleGan_deal/perProblem_detec/run文件,内容如下: + +``` +``` +# /usr/local/Ascend/ascend-toolkit/ /usr/local/Ascend/ascend-toolkit/ +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +./benchmark.x86_64 -round=20 -om_path=/home/zlz/cyclegan/model_Ga1-b0_bs16.om -device_id=0 -batch_size=16 +``` + +然后执行如下命令: +``` +chmod 777 /home/zlz/CycleGan_deal/perProblem_detec/run +cd /usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/toolkit/tools/profiler/bin +./msprof --output=/home/zlz/CycleGan_deal/perProblem_detec/perPro/ --application=/home/zlz/CycleGan_deal/perProblem_detec/run --sys-hardware-mem=on --sys-cpu-profiling=on --sys-profiling=on --sys-pid-profiling=on --sys-io-profiling=on --dvpp-profiling=on +cd /usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/toolkit/tools/profiler/profiler_tool/analysis/msprof/ +# 生成的profiling目录 +python3.7 msprof.py import -dir/home/zlz/CycleGan_deal/perProblem_detec/perPro/ +python3.7 msprof.py export summary -dir /home/zlz/CycleGan_deal/perProblem_detec/perPro/ +#生成的profiling目录 --iteration-id 1 +python3.7 msprof.py export timeline -dir /home/zlz/CycleGan_deal/perProblem_detec/perPro/ + +``` +目录 + +\- 性能调优测试版本:CANN 5.0.2.alpha003 + +\- 性能优化过程主要对trans_Data算子进行优化,结合profiling分析,性能有提升: + +\#### 7.3.1 five_2_four.py优化方法 + + 在环境变量env.sh中export install_path=/usr/local/Ascend/ascend-toolkit/latest路径下查找five_2_four.py文件,路径一般为 + +\``` + +/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/opp/op_impl/built-in/ai_core/tbe/impl/five_2_four.py + +\``` + +修改five_2_four.py文件,将TransData算子的output shape加入five_2_four函数行中,示例如下: + +\``` + + ... + from impl import trans_data_negative_target_ntc + + @util.check_input_type(dict, dict, str, str, str) + + def five_2_four(src, dst, src_format, dst_format, kernel_name='five_2_four'): + elif dst_format.lower() == "nhwc" and dst_shape in [[10000, 63, 63, 1], [10000, 127, 127, 1], [16, 19, 19, 486], + + [16, 10, 10, 486], [16, 38, 38, 324], [16, 5, 5, 486], + + [16, 3, 3, 324], [8, 19, 19, 486], [8, 10, 10, 486], + + [8, 38, 38, 324], [8, 5, 5, 486], [8, 3, 3, 324], + + [100, 28, 28, 91]]: + + trans_data_negative_target_tc.trans_data_negative_target_tc(src, dst, src_format, dst_format, kernel_name) + + elif dst_format.lower() == "nchw" and dst_shape in [[2560, 512, 4, 26], [2560, 512, 1, 26], [2560, 256, 8, 25], + + [16, 240, 7, 7], [16, 120, 14, 14], [1,19,1024,2048], [4,19,1024,2048]]: + + print("=================================") + + print("ntc dst shape:", dst_shape) + + print("=================================") + + trans_data_negative_target_ntc.trans_data_negative_target_ntc(src, dst, src_format, dst_format, kernel_name) + ... + +\``` + +\- 不同的batch_size,添加的shape不一样,shape大小为[*,19,256,256 ] ,以本模型为例,只测试batch1和batch16,因此添加的shape为[1,19,256,256],[4,19,256,256] + +修改完成后,重新转换生成om文件,atc转换过程会打印添加的日志,如下: + +\```![输入图片说明](https://images.gitee.com/uploads/images/2021/0914/121715_d94592ad_9486012.png "屏幕截图.png") + + \``` + +纯推理测试结果: + +\``` + +bs1: + + ![输入图片说明](https://images.gitee.com/uploads/images/2021/0914/121721_50c95bdd_9486012.png "屏幕截图.png") + +Bs16: + +![输入图片说明](https://images.gitee.com/uploads/images/2021/0914/122022_a16e9ff5_9486012.png "屏幕截图.png") + +\``` + + + +用生成的om文件做精度后处理,测得bs1和bs16与之前的Onnx模型做余弦相似度高于99%,精度无损失、 + +\``` + +\#### 7.3.1 总结 + +优化方案共包括五种: + +(1)优化TransData,修改five_2_four.py + +(2)输出节点由float32改为float16 + +(3)模型中Resize节点的mode由双线性为最近邻 + +(4)将PadV3D进行算子融合 + +(5)优化FrameworkOP框架 + +由于在蓝区测试的版本CANN 5.0.2.alpha003中,已经实现了PadV3D算子融合,因此测试过程默认已经优化。同时方案(5)暂时无法实现,因此也无法比对性能。 + +结论: + +\- 因为关键算子性能差,性能暂时无法达标。 + \- 最终精度测试,Om模型输出效果达到论文效果,转Om后无精度损失。 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/CycleGAN/eval_acc.py b/ACL_PyTorch/contrib/cv/gan/CycleGAN/eval_acc.py index 875097d6fec5e85990734ace24103064b174d01f..e25f08a55a97c24b7299e5715aaa5d022fbd069b 100644 --- a/ACL_PyTorch/contrib/cv/gan/CycleGAN/eval_acc.py +++ b/ACL_PyTorch/contrib/cv/gan/CycleGAN/eval_acc.py @@ -1,110 +1,110 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import os -import glob -import numpy as np -import onnxruntime -import torch -from PIL import Image -from torchvision import transforms -import parse -import PIL.Image as pil -import matplotlib.pyplot as plt - - -def make_power(img, base, method=Image.BICUBIC): - ow, oh = img.size - h = int(round(oh / base) * base) - w = int(round(ow / base) * base) - if h == oh and w == ow: - return img - return img.resize((w, h), method) - - -def preprocess(PIL_img, image_shape): - process = transforms.Compose([ - transforms.Lambda(lambda img: make_power(img, base=4, method=Image.BICUBIC)), - transforms.Resize(image_shape), - transforms.ToTensor(), - transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) - return process(PIL_img).unsqueeze(dim=0) # (batch_size, 3, H, W) - - -def postprocess(img_tensor): - inv_normalize = transforms.Normalize( - mean=(-1, -1, -1), - std=(2.0, 2.0, 2.0)) - to_PIL_image = transforms.ToPILImage() - return to_PIL_image(inv_normalize(img_tensor[0]).clamp(0, 1)) - - -def bin2img_tensor(bin_src): - # read bin - with open(bin_src, 'rb') as f: - imageBin = f.read() - # What is stored in the bin file is a half-precision file, so we need to convert - # the binary to half-precision, and restore the model output shape 1*3*256*256 - img_tensor = torch.tensor(np.reshape(np.frombuffer(imageBin, 'f4'), (1, 3, 256, 256))) - return img_tensor - - -def main(): - opt = parse.parse_args().initialize() - if (os.path.exists(opt.bin2img_fie) == False): - os.makedirs(opt.bin2img_fie) - npu_bin = glob.glob(opt.npu_bin_file + '*.bin') - onnxTestImage_path = glob.glob(opt.dataroot + '/testA/*.*') - model_Ga = onnxruntime.InferenceSession(opt.onnx_path + opt.model_ga_onnx_name) - cossimis = [] - for i in onnxTestImage_path: - temp = i.split('/')[4].split('.')[0] - bin_name = temp + '_1.bin' - bin_path = opt.npu_bin_file + bin_name - check = os.path.exists(bin_path) - if check == True: - b2imtensor = bin2img_tensor(bin_path) - pil_image = pil.open(i).convert('RGB') - tensorData = preprocess(pil_image, 256) - outputs = model_Ga.run(['maps'], {'img_sat_maps': tensorData.numpy()}) - outputs = torch.tensor(outputs[0]) - cosSimi = torch.mean(torch.cosine_similarity(outputs, b2imtensor)) - cossimis.append(cosSimi.numpy()) - print('average cosine_similarity:') - print(np.mean(cossimis)) - plt.plot(cossimis) - plt.xlabel("samples") - plt.ylabel("cosine_similarity") - plt.savefig('cosine_similarity.jpg') - plt.show() - - -if __name__ == '__main__': - main() +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import os +import glob +import numpy as np +import onnxruntime +import torch +from PIL import Image +from torchvision import transforms +import parse +import PIL.Image as pil +import matplotlib.pyplot as plt + + +def make_power(img, base, method=Image.BICUBIC): + ow, oh = img.size + h = int(round(oh / base) * base) + w = int(round(ow / base) * base) + if h == oh and w == ow: + return img + return img.resize((w, h), method) + + +def preprocess(PIL_img, image_shape): + process = transforms.Compose([ + transforms.Lambda(lambda img: make_power(img, base=4, method=Image.BICUBIC)), + transforms.Resize(image_shape), + transforms.ToTensor(), + transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) + return process(PIL_img).unsqueeze(dim=0) # (batch_size, 3, H, W) + + +def postprocess(img_tensor): + inv_normalize = transforms.Normalize( + mean=(-1, -1, -1), + std=(2.0, 2.0, 2.0)) + to_PIL_image = transforms.ToPILImage() + return to_PIL_image(inv_normalize(img_tensor[0]).clamp(0, 1)) + + +def bin2img_tensor(bin_src): + # read bin + with open(bin_src, 'rb') as f: + imageBin = f.read() + # What is stored in the bin file is a half-precision file, so we need to convert + # the binary to half-precision, and restore the model output shape 1*3*256*256 + img_tensor = torch.tensor(np.reshape(np.frombuffer(imageBin, 'f4'), (1, 3, 256, 256))) + return img_tensor + + +def main(): + opt = parse.parse_args().initialize() + if (os.path.exists(opt.bin2img_fie) == False): + os.makedirs(opt.bin2img_fie) + npu_bin = glob.glob(opt.npu_bin_file + '*.bin') + onnxTestImage_path = glob.glob(opt.dataroot + '/testA/*.*') + model_Ga = onnxruntime.InferenceSession(opt.onnx_path + opt.model_ga_onnx_name) + cossimis = [] + for i in onnxTestImage_path: + temp = i.split('/')[4].split('.')[0] + bin_name = temp + '_1.bin' + bin_path = opt.npu_bin_file + bin_name + check = os.path.exists(bin_path) + if check == True: + b2imtensor = bin2img_tensor(bin_path) + pil_image = pil.open(i).convert('RGB') + tensorData = preprocess(pil_image, 256) + outputs = model_Ga.run(['maps'], {'img_sat_maps': tensorData.numpy()}) + outputs = torch.tensor(outputs[0]) + cosSimi = torch.mean(torch.cosine_similarity(outputs, b2imtensor)) + cossimis.append(cosSimi.numpy()) + print('average cosine_similarity:') + print(np.mean(cossimis)) + plt.plot(cossimis) + plt.xlabel("samples") + plt.ylabel("cosine_similarity") + plt.savefig('cosine_similarity.jpg') + plt.show() + + +if __name__ == '__main__': + main() diff --git a/ACL_PyTorch/contrib/cv/gan/CycleGAN/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/gan/CycleGAN/gen_dataset_info.py index 51f2d64c68399b24fcee22467d6d7b15f7bd56d2..313e6529d450f6344b263b33826571f95b13285b 100644 --- a/ACL_PyTorch/contrib/cv/gan/CycleGAN/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/gan/CycleGAN/gen_dataset_info.py @@ -1,89 +1,89 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import argparse -import os -from PIL import Image - - -def parse(): - """Define the common options that are used in both training and test.""" - # basic parameters - parser = argparse.ArgumentParser(description='cyclegan test for image preprocess') - parser.add_argument('--src_path_testA', required=False, default='datasets/maps/testA/', - help='path to images testA)') - parser.add_argument('--save_pathTestA_dst', required=False, default='datasetsDst/maps/testA/', - help='path to images testA)') - parser.add_argument('--dataTestA_infoName', default='testA_prep.info', help='name of the ..') - - parser.add_argument('--src_path_testB', required=False, default='datasets/maps/testB/', - help='path to images testB)') - parser.add_argument('--save_pathTestB_dst', required=False, default='datasetsDst/maps/testB/', - help='path to images testA)') - parser.add_argument('--dataTestB_infoName', required=False, default='testB_prep.info', help='name of the ..') - opt = parser.parse_args() - if (os.path.exists(opt.save_pathTestA_dst) == False): - os.makedirs(opt.save_pathTestA_dst) - if (os.path.exists(opt.save_pathTestB_dst) == False): - os.makedirs(opt.save_pathTestB_dst) - return opt - - -def rs_img_bin(src_path, savepath, data_list_path): - i = 0 - in_files = os.listdir(src_path) - listfile = open(data_list_path, 'w') - for file in in_files: - # print(file, "===", i) - image_path = src_path + '/' + file - input_image = Image.open(image_path) - imgsavepath = savepath + str(file).split('.')[0] + '.jpeg' - input_image.thumbnail((512, 512), Image.ANTIALIAS) - input_image.save(imgsavepath) - w, h = input_image.size - temp = str(i) + ' ' + savepath + '/' + str(file).split('.')[0] + \ - '.jpeg' + ' ' + str(w) + ' ' + str(h) + '\n' - listfile.write(temp) - i = i + 1 - listfile.close() - - -def main(opt): - # deal testA and save img data to bin - rs_img_bin(opt.src_path_testA, opt.save_pathTestA_dst, opt.dataTestA_infoName) - # deal testB and save img data to bin - rs_img_bin(opt.src_path_testB, opt.save_pathTestB_dst, opt.dataTestB_infoName) - return 0 - - -if __name__ == '__main__': - opt = parse() - main(opt) +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import argparse +import os +from PIL import Image + + +def parse(): + """Define the common options that are used in both training and test.""" + # basic parameters + parser = argparse.ArgumentParser(description='cyclegan test for image preprocess') + parser.add_argument('--src_path_testA', required=False, default='datasets/maps/testA/', + help='path to images testA)') + parser.add_argument('--save_pathTestA_dst', required=False, default='datasetsDst/maps/testA/', + help='path to images testA)') + parser.add_argument('--dataTestA_infoName', default='testA_prep.info', help='name of the ..') + + parser.add_argument('--src_path_testB', required=False, default='datasets/maps/testB/', + help='path to images testB)') + parser.add_argument('--save_pathTestB_dst', required=False, default='datasetsDst/maps/testB/', + help='path to images testA)') + parser.add_argument('--dataTestB_infoName', required=False, default='testB_prep.info', help='name of the ..') + opt = parser.parse_args() + if (os.path.exists(opt.save_pathTestA_dst) == False): + os.makedirs(opt.save_pathTestA_dst) + if (os.path.exists(opt.save_pathTestB_dst) == False): + os.makedirs(opt.save_pathTestB_dst) + return opt + + +def rs_img_bin(src_path, savepath, data_list_path): + i = 0 + in_files = os.listdir(src_path) + listfile = open(data_list_path, 'w') + for file in in_files: + # print(file, "===", i) + image_path = src_path + '/' + file + input_image = Image.open(image_path) + imgsavepath = savepath + str(file).split('.')[0] + '.jpeg' + input_image.thumbnail((512, 512), Image.ANTIALIAS) + input_image.save(imgsavepath) + w, h = input_image.size + temp = str(i) + ' ' + savepath + '/' + str(file).split('.')[0] + \ + '.jpeg' + ' ' + str(w) + ' ' + str(h) + '\n' + listfile.write(temp) + i = i + 1 + listfile.close() + + +def main(opt): + # deal testA and save img data to bin + rs_img_bin(opt.src_path_testA, opt.save_pathTestA_dst, opt.dataTestA_infoName) + # deal testB and save img data to bin + rs_img_bin(opt.src_path_testB, opt.save_pathTestB_dst, opt.dataTestB_infoName) + return 0 + + +if __name__ == '__main__': + opt = parse() + main(opt) diff --git a/ACL_PyTorch/contrib/cv/gan/CycleGAN/maps_torch_preprocess.py b/ACL_PyTorch/contrib/cv/gan/CycleGAN/maps_torch_preprocess.py index f7f45d6b8d113300f93535202515c27c9f9c72dc..cacbde78fe5a58505066bb7c5225869c43ca3630 100644 --- a/ACL_PyTorch/contrib/cv/gan/CycleGAN/maps_torch_preprocess.py +++ b/ACL_PyTorch/contrib/cv/gan/CycleGAN/maps_torch_preprocess.py @@ -1,109 +1,109 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import argparse -import os -import torchvision.transforms as transforms -from PIL import Image -import numpy as np - - -def make_power(img, base, method=Image.BICUBIC): - ow, oh = img.size - h = int(round(oh / base) * base) - w = int(round(ow / base) * base) - if h == oh and w == ow: - return img - return img.resize((w, h), method) - - -def preprocess(PIL_img, image_shape=256): - process=transforms.Compose([ - transforms.Lambda(lambda img: make_power(img, base=4, method=Image.BICUBIC)), - transforms.Resize(image_shape), - transforms.ToTensor(), - transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))]) - return process(PIL_img) - - -def postprocess(img_tensor): - inv_normalize = transforms.Normalize( - mean= (-1,-1,-1), - std= (2.0,2.0,2.0)) - to_PIL_image = transforms.ToPILImage().convert('RGB') - return to_PIL_image(inv_normalize(img_tensor[0]).clamp(0, 1)) - - -def parse(): - """Define the common options that are used in both training and test.""" - # basic parameters - parser = argparse.ArgumentParser(description='cyclegan test for image preprocess') - parser.add_argument('--src_path_testA', required=False,default='datasets/maps/testA/', help='path to images testA)') - parser.add_argument('--save_path_testA_bin', type=str, default='nputest/testa', help='name of the ..') - parser.add_argument('--path_testA_binName', type=str, default='testA_prep_bin.info', help='name of the ..') - parser.add_argument('--src_path_testB', required=False, default='datasets/maps/testB/', help='path to images testB)') - parser.add_argument('--save_path_testB_bin', type=str, default='nputest/testb', help='name of the ..') - parser.add_argument('--path_testB_binName', type=str, default='testB_prep_bin.info', help='name of the ..') - opt=parser.parse_args() - if(os.path.exists(opt.save_path_testA_bin)==False): - os.makedirs(opt.save_path_testA_bin) - if(os.path.exists(opt.save_path_testB_bin)==False): - os.makedirs(opt.save_path_testB_bin) - return opt - - -def rs_img_bin(src_path,save_path,data_list_path): - i = 0 - in_files = os.listdir(src_path) - listfile = open(data_list_path, 'w') - for file in in_files: - #print(file, "===", i) - input_image = Image.open(src_path + '/' + file) - input_tensor = preprocess(input_image) - img = np.array(input_tensor).astype(np.float32) - img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) - temp = str(str(i) + ' ./' + os.path.join(save_path, file.split('.')[0] + ".bin") + ' ' + '256 256\n') - listfile.write(temp) - i = i + 1 - listfile.close() - - -def main(opt): - # deal testA and save img data to bin - rs_img_bin(opt.src_path_testA, opt.save_path_testA_bin, opt.path_testA_binName) - # deal testB and save img data to bin - rs_img_bin(opt.src_path_testB, opt.save_path_testB_bin, opt.path_testB_binName) - return 0 - - -if __name__=='__main__': - opt=parse() +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import argparse +import os +import torchvision.transforms as transforms +from PIL import Image +import numpy as np + + +def make_power(img, base, method=Image.BICUBIC): + ow, oh = img.size + h = int(round(oh / base) * base) + w = int(round(ow / base) * base) + if h == oh and w == ow: + return img + return img.resize((w, h), method) + + +def preprocess(PIL_img, image_shape=256): + process=transforms.Compose([ + transforms.Lambda(lambda img: make_power(img, base=4, method=Image.BICUBIC)), + transforms.Resize(image_shape), + transforms.ToTensor(), + transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))]) + return process(PIL_img) + + +def postprocess(img_tensor): + inv_normalize = transforms.Normalize( + mean= (-1,-1,-1), + std= (2.0,2.0,2.0)) + to_PIL_image = transforms.ToPILImage().convert('RGB') + return to_PIL_image(inv_normalize(img_tensor[0]).clamp(0, 1)) + + +def parse(): + """Define the common options that are used in both training and test.""" + # basic parameters + parser = argparse.ArgumentParser(description='cyclegan test for image preprocess') + parser.add_argument('--src_path_testA', required=False,default='datasets/maps/testA/', help='path to images testA)') + parser.add_argument('--save_path_testA_bin', type=str, default='nputest/testa', help='name of the ..') + parser.add_argument('--path_testA_binName', type=str, default='testA_prep_bin.info', help='name of the ..') + parser.add_argument('--src_path_testB', required=False, default='datasets/maps/testB/', help='path to images testB)') + parser.add_argument('--save_path_testB_bin', type=str, default='nputest/testb', help='name of the ..') + parser.add_argument('--path_testB_binName', type=str, default='testB_prep_bin.info', help='name of the ..') + opt=parser.parse_args() + if(os.path.exists(opt.save_path_testA_bin)==False): + os.makedirs(opt.save_path_testA_bin) + if(os.path.exists(opt.save_path_testB_bin)==False): + os.makedirs(opt.save_path_testB_bin) + return opt + + +def rs_img_bin(src_path,save_path,data_list_path): + i = 0 + in_files = os.listdir(src_path) + listfile = open(data_list_path, 'w') + for file in in_files: + #print(file, "===", i) + input_image = Image.open(src_path + '/' + file) + input_tensor = preprocess(input_image) + img = np.array(input_tensor).astype(np.float32) + img.tofile(os.path.join(save_path, file.split('.')[0] + ".bin")) + temp = str(str(i) + ' ./' + os.path.join(save_path, file.split('.')[0] + ".bin") + ' ' + '256 256\n') + listfile.write(temp) + i = i + 1 + listfile.close() + + +def main(opt): + # deal testA and save img data to bin + rs_img_bin(opt.src_path_testA, opt.save_path_testA_bin, opt.path_testA_binName) + # deal testB and save img data to bin + rs_img_bin(opt.src_path_testB, opt.save_path_testB_bin, opt.path_testB_binName) + return 0 + + +if __name__=='__main__': + opt=parse() main(opt) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/CycleGAN/parse.py b/ACL_PyTorch/contrib/cv/gan/CycleGAN/parse.py index a8142636cdd7cb86da9977f4b06544fe35c93a14..cc4b43d67680d1913c6137117cf38209dcb84448 100644 --- a/ACL_PyTorch/contrib/cv/gan/CycleGAN/parse.py +++ b/ACL_PyTorch/contrib/cv/gan/CycleGAN/parse.py @@ -1,141 +1,141 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import argparse -import torch -import os - - -class parse_args(): - def __init__(self, isTrain=True, isTest=False): - self.isTrain = isTrain - self.isTest = isTest - self.parser = argparse.ArgumentParser(description='Pytorch CycleGAN training') - - def initialize(self): - parser = self.parser - parser.add_argument('--npu', default=False, help='whether to use npu to fastern training') - parser.add_argument('--pu_ids', type=str, default='1', - help='gpu ids(npu ids): e.g. 0 0,1,2, 0,2. use -1 for CPU') - parser.add_argument('--dataroot', type=str, default='./datasets/maps', - help='path to images (should have subfolders trainA, trainB, valA, valB, etc)') - parser.add_argument('--name', type=str, default='maps_cycle_gan', - help='name of the experiment. It decides where to store samples and models') - - parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here') - # model parameters - parser.add_argument('--model', type=str, default='cycle_gan', - help='chooses which model to use. [cycle_gan]') - parser.add_argument('--input_nc', type=int, default=3, - help='# of input image channels: 3 for RGB and 1 for grayscale') - parser.add_argument('--output_nc', type=int, default=3, - help='# of output image channels: 3 for RGB and 1 for grayscale') - parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in the last conv layer') - parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in the first conv layer') - parser.add_argument('--netD', type=str, default='basic', - help='specify discriminator architecture [basic | n_layers | pixel]. ' - 'The basic model is a 70x70 PatchGAN. n_layers allows you to' - ' specify the layers in the discriminator') - parser.add_argument('--netG', type=str, default='resnet_9blocks', - help='specify generator architecture [resnet_9blocks | resnet_6blocks | unet_256 | unet_128]') - parser.add_argument('--n_layers_D', type=int, default=3, help='only used if netD==n_layers') - parser.add_argument('--norm', type=str, default='instance', - help='instance normalization or batch normalization [instance | batch | none]') - parser.add_argument('--init_type', type=str, default='normal', - help='network initialization [normal | xavier | kaiming | orthogonal]') - parser.add_argument('--init_gain', type=float, default=0.02, - help='scaling factor for normal, xavier and orthogonal.') - parser.add_argument('--no_dropout', action='store_true', help='no dropout for the generator') - parser.add_argument('--direction', type=str, default='AtoB', help='AtoB or BtoA') - parser.add_argument('--batch_size', type=int, default=1, - help='batch_size') - # additional parameters - parser.set_defaults(no_dropout=True) # default CycleGAN did not use dropout - parser.add_argument('--model_ga_path', type=str, - default='./checkpoints/maps_cycle_gan/latest_net_G_A.pth', - help='path for modelga') - parser.add_argument('--model_gb_path', type=str, - default='./checkpoints/maps_cycle_gan/latest_net_G_B.pth', - help='path for modelga') - parser.add_argument('--onnx_path', type=str, - default='./onnxmodel/', - help='path for modelga') - parser.add_argument('--model_ga_onnx_name', type=str, - default='model_Ga.onnx', - help='onnx name for modelga') - parser.add_argument('--model_gb_onnx_name', type=str, - default='model_Gb.onnx', - help='onnx for modelgb') - parser.add_argument('--gpuPerformance', type=str, - default='./gpuPerformance/', - help='file for t4 test result ') - parser.add_argument('--npu_bin_file', type=str, - default='./result/dumpOutput_device0/', - help='npu bin ') - parser.add_argument('--bin2img_fie', type=str, - default='./bin2imgfile/', - help='save bin2img ') - # rewrite devalue values - parser.set_defaults(model='test') - # To avoid cropping, the load_size should be the same as crop_size - parser.set_defaults(load_size=parser.get_default('crop_size')) - parser = parser.parse_args() - parser.process_device_map = self.device_id_to_process_device_map(parser.pu_ids) - return parser - - def device_id_to_process_device_map(self, device_list): - devices = device_list.split(",") - devices = [int(x) for x in devices] - devices.sort() - - process_device_map = dict() - for process_id, device_id in enumerate(devices): - process_device_map[process_id] = device_id - return process_device_map - - def change_parser(self, isTrain=True, isTest=False): - self.isTest = isTest - self.isTrain = isTrain - self.parser = None - return self.initialize() - - def printParser(self): - pasers = self.parser.parse_args() - message = '' - message += '----------------- Options ---------------\n' - for k, v in sorted(vars(pasers).items()): - comment = '' - default = self.parser.get_default(k) - # if v != default: - # comment = '\t[default: %s]' % str(default) - message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment) - message += '----------------- End -------------------' - print(message) +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import argparse +import torch +import os + + +class parse_args(): + def __init__(self, isTrain=True, isTest=False): + self.isTrain = isTrain + self.isTest = isTest + self.parser = argparse.ArgumentParser(description='Pytorch CycleGAN training') + + def initialize(self): + parser = self.parser + parser.add_argument('--npu', default=False, help='whether to use npu to fastern training') + parser.add_argument('--pu_ids', type=str, default='1', + help='gpu ids(npu ids): e.g. 0 0,1,2, 0,2. use -1 for CPU') + parser.add_argument('--dataroot', type=str, default='./datasets/maps', + help='path to images (should have subfolders trainA, trainB, valA, valB, etc)') + parser.add_argument('--name', type=str, default='maps_cycle_gan', + help='name of the experiment. It decides where to store samples and models') + + parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here') + # model parameters + parser.add_argument('--model', type=str, default='cycle_gan', + help='chooses which model to use. [cycle_gan]') + parser.add_argument('--input_nc', type=int, default=3, + help='# of input image channels: 3 for RGB and 1 for grayscale') + parser.add_argument('--output_nc', type=int, default=3, + help='# of output image channels: 3 for RGB and 1 for grayscale') + parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in the last conv layer') + parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in the first conv layer') + parser.add_argument('--netD', type=str, default='basic', + help='specify discriminator architecture [basic | n_layers | pixel]. ' + 'The basic model is a 70x70 PatchGAN. n_layers allows you to' + ' specify the layers in the discriminator') + parser.add_argument('--netG', type=str, default='resnet_9blocks', + help='specify generator architecture [resnet_9blocks | resnet_6blocks | unet_256 | unet_128]') + parser.add_argument('--n_layers_D', type=int, default=3, help='only used if netD==n_layers') + parser.add_argument('--norm', type=str, default='instance', + help='instance normalization or batch normalization [instance | batch | none]') + parser.add_argument('--init_type', type=str, default='normal', + help='network initialization [normal | xavier | kaiming | orthogonal]') + parser.add_argument('--init_gain', type=float, default=0.02, + help='scaling factor for normal, xavier and orthogonal.') + parser.add_argument('--no_dropout', action='store_true', help='no dropout for the generator') + parser.add_argument('--direction', type=str, default='AtoB', help='AtoB or BtoA') + parser.add_argument('--batch_size', type=int, default=1, + help='batch_size') + # additional parameters + parser.set_defaults(no_dropout=True) # default CycleGAN did not use dropout + parser.add_argument('--model_ga_path', type=str, + default='./checkpoints/maps_cycle_gan/latest_net_G_A.pth', + help='path for modelga') + parser.add_argument('--model_gb_path', type=str, + default='./checkpoints/maps_cycle_gan/latest_net_G_B.pth', + help='path for modelga') + parser.add_argument('--onnx_path', type=str, + default='./onnxmodel/', + help='path for modelga') + parser.add_argument('--model_ga_onnx_name', type=str, + default='model_Ga.onnx', + help='onnx name for modelga') + parser.add_argument('--model_gb_onnx_name', type=str, + default='model_Gb.onnx', + help='onnx for modelgb') + parser.add_argument('--gpuPerformance', type=str, + default='./gpuPerformance/', + help='file for t4 test result ') + parser.add_argument('--npu_bin_file', type=str, + default='./result/dumpOutput_device0/', + help='npu bin ') + parser.add_argument('--bin2img_fie', type=str, + default='./bin2imgfile/', + help='save bin2img ') + # rewrite devalue values + parser.set_defaults(model='test') + # To avoid cropping, the load_size should be the same as crop_size + parser.set_defaults(load_size=parser.get_default('crop_size')) + parser = parser.parse_args() + parser.process_device_map = self.device_id_to_process_device_map(parser.pu_ids) + return parser + + def device_id_to_process_device_map(self, device_list): + devices = device_list.split(",") + devices = [int(x) for x in devices] + devices.sort() + + process_device_map = dict() + for process_id, device_id in enumerate(devices): + process_device_map[process_id] = device_id + return process_device_map + + def change_parser(self, isTrain=True, isTest=False): + self.isTest = isTest + self.isTrain = isTrain + self.parser = None + return self.initialize() + + def printParser(self): + pasers = self.parser.parse_args() + message = '' + message += '----------------- Options ---------------\n' + for k, v in sorted(vars(pasers).items()): + comment = '' + default = self.parser.get_default(k) + # if v != default: + # comment = '\t[default: %s]' % str(default) + message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment) + message += '----------------- End -------------------' + print(message) diff --git a/ACL_PyTorch/contrib/cv/gan/DCGAN/README.md b/ACL_PyTorch/contrib/cv/gan/DCGAN/README.md index 51a5bcdee7180c0afc62a00c6bcf03714b1f42cb..bc1071da1cb9209f2ae2de0e93466dc6c63c389b 100644 --- a/ACL_PyTorch/contrib/cv/gan/DCGAN/README.md +++ b/ACL_PyTorch/contrib/cv/gan/DCGAN/README.md @@ -1,58 +1,58 @@ -# DCGAN模型PyTorch离线推理指导 -## 1 环境准备 -### 1.1 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -``` -pip3.7 install -r requirements.txt -``` -### 1.2 获取,安装开源模型代码 -``` -git clone https://github.com/eriklindernoren/PyTorch-GAN.git -``` -使用patch文件更改开源代码仓源码 -``` -mv dcgan.patch PyTorch-GAN/ -cd PyTorch-GAN/ -git apply dcgan.patch -cd .. -``` -### 1.3 获取权重文件 -[DCGAN预训练权重文件](https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/GAN/DCGan/checkpoint-amp-epoch_200.pth) -``` -wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/GAN/DCGan/checkpoint-amp-epoch_200.pth -``` -### 1.4 数据集 -DCGAN的输入是随机噪声。当前目录下的`dcgan_preprocess.py`文件会随机生成输入噪声作为数据集。 -此脚本无需主动运行。 - -默认设置下,生成8192个噪声样本。 -### 1.5 [获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) -将benchmark.x86_64或benchmark.aarch64放到当前目录,并更改权限 -``` -chmod 777 benchmark.x86_64 -``` -## 2 离线推理 -### 2.1 性能测试 -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -bash test/pth2om.sh -bash test/eval_perf.sh -``` -### 2.2 精度测试 -由于开源代码仓并未提供合适的精度指标来衡量模型的生成精度。 -我们提供了图像像素差均值(mean)和图像余弦相似度(consine)作为精度指标以供参考。 - -因为npu的推理结果是以pth的生成结果为基准。 -所以两个指标的计算对象分别是pth模型在cpu上的生成集合和om模型在npu上的生成集合。 -除却均值指标与相似度指标外,还提供了一个精度指标(acc)。`acc=(cosine+1)/2`。目的是为了获得一个百分比值便于直观理解精度。 -``` -#直接执行acc验证脚本 -bash test/eval_acc.sh -``` - -结果分别保存在当前目录的`dcgan_acc_eval_bs1.log`与`dcgan_acc_eval_bs16.log`中。 -### 2.3 测评结果 -|模型|精度(mean)|精度(cosine)|精度(acc)|性能基准|310性能| -|----|----|----|----|----|----| -|DCGAN bs1|0.0004|1.0|100.0%|10174.65fps|11429.32fps| -|DCGAN bs16|0.0004|1.0|100.0%|46711.51fps|63607.60fps| - +# DCGAN模型PyTorch离线推理指导 +## 1 环境准备 +### 1.1 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +``` +pip3.7 install -r requirements.txt +``` +### 1.2 获取,安装开源模型代码 +``` +git clone https://github.com/eriklindernoren/PyTorch-GAN.git +``` +使用patch文件更改开源代码仓源码 +``` +mv dcgan.patch PyTorch-GAN/ +cd PyTorch-GAN/ +git apply dcgan.patch +cd .. +``` +### 1.3 获取权重文件 +[DCGAN预训练权重文件](https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/GAN/DCGan/checkpoint-amp-epoch_200.pth) +``` +wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/GAN/DCGan/checkpoint-amp-epoch_200.pth +``` +### 1.4 数据集 +DCGAN的输入是随机噪声。当前目录下的`dcgan_preprocess.py`文件会随机生成输入噪声作为数据集。 +此脚本无需主动运行。 + +默认设置下,生成8192个噪声样本。 +### 1.5 [获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) +将benchmark.x86_64或benchmark.aarch64放到当前目录,并更改权限 +``` +chmod 777 benchmark.x86_64 +``` +## 2 离线推理 +### 2.1 性能测试 +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +bash test/pth2om.sh +bash test/eval_perf.sh +``` +### 2.2 精度测试 +由于开源代码仓并未提供合适的精度指标来衡量模型的生成精度。 +我们提供了图像像素差均值(mean)和图像余弦相似度(consine)作为精度指标以供参考。 + +因为npu的推理结果是以pth的生成结果为基准。 +所以两个指标的计算对象分别是pth模型在cpu上的生成集合和om模型在npu上的生成集合。 +除却均值指标与相似度指标外,还提供了一个精度指标(acc)。`acc=(cosine+1)/2`。目的是为了获得一个百分比值便于直观理解精度。 +``` +#直接执行acc验证脚本 +bash test/eval_acc.sh +``` + +结果分别保存在当前目录的`dcgan_acc_eval_bs1.log`与`dcgan_acc_eval_bs16.log`中。 +### 2.3 测评结果 +|模型|精度(mean)|精度(cosine)|精度(acc)|性能基准|310性能| +|----|----|----|----|----|----| +|DCGAN bs1|0.0004|1.0|100.0%|10174.65fps|11429.32fps| +|DCGAN bs16|0.0004|1.0|100.0%|46711.51fps|63607.60fps| + diff --git a/ACL_PyTorch/contrib/cv/gan/DCGAN/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/gan/DCGAN/modelzoo_level.txt index 9e95396651cc4382fe60ee1ee053674f527a448c..27e6c78b37535fe4f5a17029546fe257ad164d34 100644 --- a/ACL_PyTorch/contrib/cv/gan/DCGAN/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/gan/DCGAN/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:POK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/GAN/GAN_pth2onnx.py b/ACL_PyTorch/contrib/cv/gan/GAN/GAN_pth2onnx.py index 790cd62c5e5f621069ffa8594b5e6497a01f550d..457cb19bd697496e8d19b0d135bfbfb04c001c63 100644 --- a/ACL_PyTorch/contrib/cv/gan/GAN/GAN_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/gan/GAN/GAN_pth2onnx.py @@ -1,54 +1,54 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from models import Generator -from torch.autograd import Variable -import argparse -import numpy as np -from collections import OrderedDict - -def proc_nodes_module(checkpoint): - new_state_dict = OrderedDict() - for k, v in checkpoint.items(): - if "module." in k: - name = k.replace("module.", "") - else: - name = k - new_state_dict[name] = v - return new_state_dict - -def pth2onnx(input_file, output_file): - generator = Generator() - checkpoint = torch.load(input_file, map_location=torch.device('cpu')) - checkpoint = proc_nodes_module(checkpoint) - generator.load_state_dict(checkpoint) - input_names = ["Z"] - output_names = ["generateimg"] - dynamic_axes = {'Z': {0: '-1'}, 'generateimg': {0: '-1'}} - - Tensor = torch.FloatTensor - dummy_input = Variable(Tensor(np.random.normal(0, 1, (16, 100)))) - torch.onnx.export(generator, dummy_input, output_file, input_names = input_names, - output_names = output_names,dynamic_axes = dynamic_axes,opset_version=11, verbose=True) - - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--input_file', type=str, required=True) - parser.add_argument('--output_file', type=str, required=True) - args = parser.parse_args() - - pth2onnx(args.input_file, args.output_file) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from models import Generator +from torch.autograd import Variable +import argparse +import numpy as np +from collections import OrderedDict + +def proc_nodes_module(checkpoint): + new_state_dict = OrderedDict() + for k, v in checkpoint.items(): + if "module." in k: + name = k.replace("module.", "") + else: + name = k + new_state_dict[name] = v + return new_state_dict + +def pth2onnx(input_file, output_file): + generator = Generator() + checkpoint = torch.load(input_file, map_location=torch.device('cpu')) + checkpoint = proc_nodes_module(checkpoint) + generator.load_state_dict(checkpoint) + input_names = ["Z"] + output_names = ["generateimg"] + dynamic_axes = {'Z': {0: '-1'}, 'generateimg': {0: '-1'}} + + Tensor = torch.FloatTensor + dummy_input = Variable(Tensor(np.random.normal(0, 1, (16, 100)))) + torch.onnx.export(generator, dummy_input, output_file, input_names = input_names, + output_names = output_names,dynamic_axes = dynamic_axes,opset_version=11, verbose=True) + + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--input_file', type=str, required=True) + parser.add_argument('--output_file', type=str, required=True) + args = parser.parse_args() + + pth2onnx(args.input_file, args.output_file) diff --git a/ACL_PyTorch/contrib/cv/gan/GAN/GAN_testdata.py b/ACL_PyTorch/contrib/cv/gan/GAN/GAN_testdata.py index 6b33ec6b2daf0377fa081f118b38b2fd8e7e9638..86b6d85bdfdb126480cc870ef0d41ca5e62249a9 100644 --- a/ACL_PyTorch/contrib/cv/gan/GAN/GAN_testdata.py +++ b/ACL_PyTorch/contrib/cv/gan/GAN/GAN_testdata.py @@ -1,58 +1,58 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import torch -from models import Generator -from torch.autograd import Variable -from torchvision.utils import save_image -import numpy as np -import argparse - -def main(args): - os.makedirs(args.online_path, exist_ok=True) - os.makedirs(args.offline_path, exist_ok=True) - generator = Generator() - pre = torch.load(args.pth_path,map_location='cpu') - - from collections import OrderedDict - - new_state_dict = OrderedDict() - for k, v in pre.items(): - name = k.replace("module.", "") - new_state_dict[name] = v - # load params - generator.load_state_dict(new_state_dict) - Tensor = torch.FloatTensor - for i in range(args.iters): - z = Variable(Tensor(np.random.normal(0, 1, (args.batch_size,100)))) - - if args.batch_size != 1: - gen = generator(z) - save_image(gen, args.online_path+"/%d.jpg" % i,normalize=True) - - z = z.numpy() - z.tofile(args.offline_path+"/%d.bin"% i) - - print("done!") - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--online_path', type=str, required=True) - parser.add_argument('--offline_path', type=str, required=True) - parser.add_argument('--pth_path', type=str, required=True) - parser.add_argument('--iters', type=int, default=1) - parser.add_argument('--batch_size', type=int, default=1) - args = parser.parse_args() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import torch +from models import Generator +from torch.autograd import Variable +from torchvision.utils import save_image +import numpy as np +import argparse + +def main(args): + os.makedirs(args.online_path, exist_ok=True) + os.makedirs(args.offline_path, exist_ok=True) + generator = Generator() + pre = torch.load(args.pth_path,map_location='cpu') + + from collections import OrderedDict + + new_state_dict = OrderedDict() + for k, v in pre.items(): + name = k.replace("module.", "") + new_state_dict[name] = v + # load params + generator.load_state_dict(new_state_dict) + Tensor = torch.FloatTensor + for i in range(args.iters): + z = Variable(Tensor(np.random.normal(0, 1, (args.batch_size,100)))) + + if args.batch_size != 1: + gen = generator(z) + save_image(gen, args.online_path+"/%d.jpg" % i,normalize=True) + + z = z.numpy() + z.tofile(args.offline_path+"/%d.bin"% i) + + print("done!") + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--online_path', type=str, required=True) + parser.add_argument('--offline_path', type=str, required=True) + parser.add_argument('--pth_path', type=str, required=True) + parser.add_argument('--iters', type=int, default=1) + parser.add_argument('--batch_size', type=int, default=1) + args = parser.parse_args() main(args) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/GAN/GAN_txt2jpg.py b/ACL_PyTorch/contrib/cv/gan/GAN/GAN_txt2jpg.py index 24ce1e8adbe15f660124cd57c28f7e689e0c2bce..a345e5fbf34bafc0a70653076261daed443fc9c9 100644 --- a/ACL_PyTorch/contrib/cv/gan/GAN/GAN_txt2jpg.py +++ b/ACL_PyTorch/contrib/cv/gan/GAN/GAN_txt2jpg.py @@ -1,50 +1,50 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import torch -import numpy as np -import os -from torchvision.utils import save_image - -def read_bin(filename): - - data = np.fromfile(filename,dtype=np.float32) - data = torch.Tensor(data) - data = data.view(-1,1,28,28) - return data - -def main(args): - old_path = os.listdir(args.txt_path) - os.makedirs(args.infer_results_path, exist_ok=True) - old_path.sort(reverse=True) - new_path = args.txt_path+'/'+old_path[0] - files = os.listdir(new_path) - for file in files: - filename = new_path + '/' + file - data = read_bin(filename) - if file[1]!='_': - save_path = args.infer_results_path + '/' + file[:2] + ".jpg" - else: - save_path = args.infer_results_path + '/' + file[0] + ".jpg" - - save_image(data, save_path,normalize=True) - print("done!") - -if __name__=='__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--txt_path', type=str, required=True) - parser.add_argument('--infer_results_path', type=str, required=True) - args = parser.parse_args() - main(args) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import torch +import numpy as np +import os +from torchvision.utils import save_image + +def read_bin(filename): + + data = np.fromfile(filename,dtype=np.float32) + data = torch.Tensor(data) + data = data.view(-1,1,28,28) + return data + +def main(args): + old_path = os.listdir(args.txt_path) + os.makedirs(args.infer_results_path, exist_ok=True) + old_path.sort(reverse=True) + new_path = args.txt_path+'/'+old_path[0] + files = os.listdir(new_path) + for file in files: + filename = new_path + '/' + file + data = read_bin(filename) + if file[1]!='_': + save_path = args.infer_results_path + '/' + file[:2] + ".jpg" + else: + save_path = args.infer_results_path + '/' + file[0] + ".jpg" + + save_image(data, save_path,normalize=True) + print("done!") + +if __name__=='__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--txt_path', type=str, required=True) + parser.add_argument('--infer_results_path', type=str, required=True) + args = parser.parse_args() + main(args) diff --git a/ACL_PyTorch/contrib/cv/gan/GAN/LICENSE b/ACL_PyTorch/contrib/cv/gan/GAN/LICENSE index 29f81d812f3e768fa89638d1f72920dbfd1413a8..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 100644 --- a/ACL_PyTorch/contrib/cv/gan/GAN/LICENSE +++ b/ACL_PyTorch/contrib/cv/gan/GAN/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ACL_PyTorch/contrib/cv/gan/GAN/README.md b/ACL_PyTorch/contrib/cv/gan/GAN/README.md index db252fc46044ddf65819fd20896f31995970b10a..0d61509917c82c566e0433a3f6a309b0715a276d 100644 --- a/ACL_PyTorch/contrib/cv/gan/GAN/README.md +++ b/ACL_PyTorch/contrib/cv/gan/GAN/README.md @@ -1,222 +1,222 @@ -## GAN Onnx模型PyTorch端到端推理指导 - -### 1 模型概述 - -#### 1.1 论文地址 - -[GAN论文](https://arxiv.org/abs/1406.2661) - - - -#### 1.2 代码地址 - -[GAN代码](https://github.com/eriklindernoren/PyTorch-GAN/blob/master/implementations/gan/gan.py) - - - -### 2 环境说明 - -#### 2.1 深度学习框架 - -``` -CANN 5.0.2 -pytorch = 1.6.0 -torchvision = 0.6.0 -onnx = 1.8.0 -``` - - - -#### 2.2 python第三方库 - -``` -numpy == 1.21.1 -``` - -**说明:** - -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - - - -### 3 模型转换 - -#### 3.1 pth转onnx模型 - -1. 下载pth权重文件 - - [GAN预训练pth权重文件](https://wws.lanzoui.com/ikXFJvljkab) - 解压至当前工作目录 - - - - -2. 编写pth2onnx脚本GAN_pth2onnx.py - - -3. 执行pth2onnx脚本,生成onnx模型文件 - - ```py - python3.7 GAN_pth2onnx.py --input_file=generator_8p_0.0008_128.pth --output_file=GAN.onnx - ``` - - - -#### 3.2 onnx转om模型 - -1. 设置环境变量 - - ``` - source set_env.sh - ``` - -2. 使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 - - ``` - atc --model=GAN.onnx --framework=5 --output=GAN_bs1 --input_format=NCHW --input_shape="Z:1,100" --log=error --soc_version=Ascend310 - ``` - 通过调节input_shape的第一个参数为16,64可以生成bs为16,64的om文件 - -### 4 数据准备 - -#### 4.1 生成输入数据并保存为.bin文件 -由于源代码中未提供测试数据,这里调用GAN_testdata.py来生成测试数据,保存在/vectors文件夹下 - ``` - python3.7 GAN_testdata.py --online_path=images --offline_path=vectors --pth_path=generator_8p_0.0008_128.pth --iters 100 --batch_size 64 - ``` - - - -### 5 离线推理 - -#### 5.1 msame工具概述 - -msame工具为华为自研的模型推理工具,输入.om模型和模型所需要的输入bin文件,输出模型的输出数据文件,支持多次推理(指对同一输入数据进行推理)。 - -模型必须是通过atc工具转换的om模型,输入bin文件需要符合模型的输入要求(支持模型多输入)。 - - - -#### 5.2 离线推理 - -``` -./msame --model "GAN_bs64.om" --input "./vectors" --output "out" -``` - -输出结果默认保存在当前目录out/下,为保存模型输入tensor数据的txt文件 - - - -### 6 精度对比 - -#### 6.1 离线推理精度 - -调用GAN_txt2jpg.py来进行后处理 - -```python -python3.7 GAN_txt2jpg.py --txt_path=out --infer_results_path=genimg -``` - -详细的结果输出在genimg文件夹中,可以和images文件夹下的在线推理结果做对比,看得出离线推理生成的图片质量更好 - - -#### 6.2 精度对比 - -源码中未有精度对比部分,这里以两种不同的方式对同一输入的输出结果对比为准。 - - - -### 7 性能对比 - -#### 7.1 npu性能数据 -运行下列命令 - -``` -source env.sh -atc --model=GAN.onnx --framework=5 --output=GAN_bs1 --input_format=NCHW --input_shape="Z:1,100" --log=error --soc_version=Ascend310 -``` - -得到size为1*100的om模型 - - - -**msame工具在整个数据集上推理获得性能数据** - -batch1的性能 - -``` -Inference average time : 0.43 ms -Inference average time without first time: 0.43 ms -``` - -Inference average time : 0.43 ms,1000/(0.43/4)既是batch1 310单卡吞吐率 - -bs1 310单卡吞吐率:9302.326fps - -batch16的性能 - -``` -Inference average time : 0.47 ms -Inference average time without first time: 0.47 ms -``` - -Inference average time : 0.51 ms,1000/(0.45/64)既是batch16 310单卡吞吐率 - -bs16 310单卡吞吐率:136170.213fps - -#### 7.2 T4性能数据 - -在装有T4卡的服务器上使用TensorRT测试gpu性能,测试过程请确保卡没有运行其他任务。 - -batch1性能: - -``` -./trtexec --onnx=GAN.onnx --fp16 --shapes=image:1x100 -``` - -gpu T4是4个device并行执行的结果,mean是时延(tensorrt的时延是batch个数据的推理时间),即吞吐率的倒数乘以batch - -``` -[11/11/2021-13:11:22] [I] min: 0.048584 ms -[11/11/2021-13:11:22] [I] max: 4.11572 ms -[11/11/2021-13:11:22] [I] median: 0.0817871 ms -[11/11/2021-13:11:22] [I] GPU Compute -[11/11/2021-13:11:22] [I] min: 0.048584 ms -[11/11/2021-13:11:22] [I] max: 4.13281 ms -[11/11/2021-13:11:22] [I] mean: 0.0826078 ms -[11/11/2021-13:11:22] [I] median: 0.0856934 ms -[11/11/2021-13:11:22] [I] percentile: 0.118164 ms at 99% -[11/11/2021-13:11:22] [I] total compute time: 1.82233 s -``` - -batch1 t4单卡吞吐率:1000/(0.0826078/1)=12105.394fps - -batch16性能: -``` -./trtexec --onnx=GAN.onnx --fp16 --shapes=image:1x100 -``` - -``` -[11/11/2021-13:18:27] [I] min: 0.0540771 ms -[11/11/2021-13:18:27] [I] max: 5.42334 ms -[11/11/2021-13:18:27] [I] median: 0.0800781 ms -[11/11/2021-13:18:27] [I] GPU Compute -[11/11/2021-13:18:27] [I] min: 0.0499878 ms -[11/11/2021-13:18:27] [I] max: 5.44055 ms -[11/11/2021-13:18:27] [I] mean: 0.0887248 ms -[11/11/2021-13:18:27] [I] median: 0.0830078 ms -[11/11/2021-13:18:27] [I] percentile: 0.145508 ms at 99% -[11/11/2021-13:18:27] [I] total compute time: 1.91122 s -``` - -batch16 t4单卡吞吐率:1000/(0.0887248/1)=180332.895fps - -#### 7.3 性能对比 - -batch1:8510.638fps > 12105.394×0.5 fps - -batch16:125490.196fps > 180332.895×0.5 fps - +## GAN Onnx模型PyTorch端到端推理指导 + +### 1 模型概述 + +#### 1.1 论文地址 + +[GAN论文](https://arxiv.org/abs/1406.2661) + + + +#### 1.2 代码地址 + +[GAN代码](https://github.com/eriklindernoren/PyTorch-GAN/blob/master/implementations/gan/gan.py) + + + +### 2 环境说明 + +#### 2.1 深度学习框架 + +``` +CANN 5.0.2 +pytorch = 1.6.0 +torchvision = 0.6.0 +onnx = 1.8.0 +``` + + + +#### 2.2 python第三方库 + +``` +numpy == 1.21.1 +``` + +**说明:** + +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + + + +### 3 模型转换 + +#### 3.1 pth转onnx模型 + +1. 下载pth权重文件 + + [GAN预训练pth权重文件](https://wws.lanzoui.com/ikXFJvljkab) + 解压至当前工作目录 + + + + +2. 编写pth2onnx脚本GAN_pth2onnx.py + + +3. 执行pth2onnx脚本,生成onnx模型文件 + + ```py + python3.7 GAN_pth2onnx.py --input_file=generator_8p_0.0008_128.pth --output_file=GAN.onnx + ``` + + + +#### 3.2 onnx转om模型 + +1. 设置环境变量 + + ``` + source set_env.sh + ``` + +2. 使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.1 开发辅助工具指南 (推理) 01 + + ``` + atc --model=GAN.onnx --framework=5 --output=GAN_bs1 --input_format=NCHW --input_shape="Z:1,100" --log=error --soc_version=Ascend310 + ``` + 通过调节input_shape的第一个参数为16,64可以生成bs为16,64的om文件 + +### 4 数据准备 + +#### 4.1 生成输入数据并保存为.bin文件 +由于源代码中未提供测试数据,这里调用GAN_testdata.py来生成测试数据,保存在/vectors文件夹下 + ``` + python3.7 GAN_testdata.py --online_path=images --offline_path=vectors --pth_path=generator_8p_0.0008_128.pth --iters 100 --batch_size 64 + ``` + + + +### 5 离线推理 + +#### 5.1 msame工具概述 + +msame工具为华为自研的模型推理工具,输入.om模型和模型所需要的输入bin文件,输出模型的输出数据文件,支持多次推理(指对同一输入数据进行推理)。 + +模型必须是通过atc工具转换的om模型,输入bin文件需要符合模型的输入要求(支持模型多输入)。 + + + +#### 5.2 离线推理 + +``` +./msame --model "GAN_bs64.om" --input "./vectors" --output "out" +``` + +输出结果默认保存在当前目录out/下,为保存模型输入tensor数据的txt文件 + + + +### 6 精度对比 + +#### 6.1 离线推理精度 + +调用GAN_txt2jpg.py来进行后处理 + +```python +python3.7 GAN_txt2jpg.py --txt_path=out --infer_results_path=genimg +``` + +详细的结果输出在genimg文件夹中,可以和images文件夹下的在线推理结果做对比,看得出离线推理生成的图片质量更好 + + +#### 6.2 精度对比 + +源码中未有精度对比部分,这里以两种不同的方式对同一输入的输出结果对比为准。 + + + +### 7 性能对比 + +#### 7.1 npu性能数据 +运行下列命令 + +``` +source env.sh +atc --model=GAN.onnx --framework=5 --output=GAN_bs1 --input_format=NCHW --input_shape="Z:1,100" --log=error --soc_version=Ascend310 +``` + +得到size为1*100的om模型 + + + +**msame工具在整个数据集上推理获得性能数据** + +batch1的性能 + +``` +Inference average time : 0.43 ms +Inference average time without first time: 0.43 ms +``` + +Inference average time : 0.43 ms,1000/(0.43/4)既是batch1 310单卡吞吐率 + +bs1 310单卡吞吐率:9302.326fps + +batch16的性能 + +``` +Inference average time : 0.47 ms +Inference average time without first time: 0.47 ms +``` + +Inference average time : 0.51 ms,1000/(0.45/64)既是batch16 310单卡吞吐率 + +bs16 310单卡吞吐率:136170.213fps + +#### 7.2 T4性能数据 + +在装有T4卡的服务器上使用TensorRT测试gpu性能,测试过程请确保卡没有运行其他任务。 + +batch1性能: + +``` +./trtexec --onnx=GAN.onnx --fp16 --shapes=image:1x100 +``` + +gpu T4是4个device并行执行的结果,mean是时延(tensorrt的时延是batch个数据的推理时间),即吞吐率的倒数乘以batch + +``` +[11/11/2021-13:11:22] [I] min: 0.048584 ms +[11/11/2021-13:11:22] [I] max: 4.11572 ms +[11/11/2021-13:11:22] [I] median: 0.0817871 ms +[11/11/2021-13:11:22] [I] GPU Compute +[11/11/2021-13:11:22] [I] min: 0.048584 ms +[11/11/2021-13:11:22] [I] max: 4.13281 ms +[11/11/2021-13:11:22] [I] mean: 0.0826078 ms +[11/11/2021-13:11:22] [I] median: 0.0856934 ms +[11/11/2021-13:11:22] [I] percentile: 0.118164 ms at 99% +[11/11/2021-13:11:22] [I] total compute time: 1.82233 s +``` + +batch1 t4单卡吞吐率:1000/(0.0826078/1)=12105.394fps + +batch16性能: +``` +./trtexec --onnx=GAN.onnx --fp16 --shapes=image:1x100 +``` + +``` +[11/11/2021-13:18:27] [I] min: 0.0540771 ms +[11/11/2021-13:18:27] [I] max: 5.42334 ms +[11/11/2021-13:18:27] [I] median: 0.0800781 ms +[11/11/2021-13:18:27] [I] GPU Compute +[11/11/2021-13:18:27] [I] min: 0.0499878 ms +[11/11/2021-13:18:27] [I] max: 5.44055 ms +[11/11/2021-13:18:27] [I] mean: 0.0887248 ms +[11/11/2021-13:18:27] [I] median: 0.0830078 ms +[11/11/2021-13:18:27] [I] percentile: 0.145508 ms at 99% +[11/11/2021-13:18:27] [I] total compute time: 1.91122 s +``` + +batch16 t4单卡吞吐率:1000/(0.0887248/1)=180332.895fps + +#### 7.3 性能对比 + +batch1:8510.638fps > 12105.394×0.5 fps + +batch16:125490.196fps > 180332.895×0.5 fps + 性能达到基准线一半 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/GAN/models.py b/ACL_PyTorch/contrib/cv/gan/GAN/models.py index 54b22a3456bbc39407d28cb98138e0b8ea4c47cd..27da51bc8253f9a498ebb4c04c2f9ab017d16a08 100644 --- a/ACL_PyTorch/contrib/cv/gan/GAN/models.py +++ b/ACL_PyTorch/contrib/cv/gan/GAN/models.py @@ -1,67 +1,67 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch.nn as nn -import numpy as np - -channels = 1 -image_size = 28 -img_shape =(channels,image_size,image_size) -latent_dim = 100 - -class Generator(nn.Module): - def __init__(self): - super(Generator, self).__init__() - - def block(in_feat, out_feat, normalize=True): - layers = [nn.Linear(in_feat, out_feat)] - if normalize: - layers.append(nn.BatchNorm1d(out_feat, 0.8)) - layers.append(nn.LeakyReLU(0.2, inplace=True)) - return layers - - self.model = nn.Sequential( - *block(latent_dim, 128, normalize=False), - *block(128, 256), - *block(256, 512), - *block(512, 1024), - nn.Linear(1024, int(np.prod(img_shape))), - nn.Tanh() - ) - - def forward(self, z): - img = self.model(z) - img = img.view(img.size(0), *img_shape) - return img - -class Discriminator(nn.Module): - def __init__(self): - super(Discriminator, self).__init__() - - self.model = nn.Sequential( - nn.Linear(int(np.prod(img_shape)), 512), - nn.LeakyReLU(0.2, inplace=True), - nn.Linear(512, 256), - nn.LeakyReLU(0.2, inplace=True), - nn.Linear(256, 1), - nn.Sigmoid(), - ) - - def forward(self, img): - img_flat = img.view(img.size(0), -1) - validity = self.model(img_flat) - - return validity - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch.nn as nn +import numpy as np + +channels = 1 +image_size = 28 +img_shape =(channels,image_size,image_size) +latent_dim = 100 + +class Generator(nn.Module): + def __init__(self): + super(Generator, self).__init__() + + def block(in_feat, out_feat, normalize=True): + layers = [nn.Linear(in_feat, out_feat)] + if normalize: + layers.append(nn.BatchNorm1d(out_feat, 0.8)) + layers.append(nn.LeakyReLU(0.2, inplace=True)) + return layers + + self.model = nn.Sequential( + *block(latent_dim, 128, normalize=False), + *block(128, 256), + *block(256, 512), + *block(512, 1024), + nn.Linear(1024, int(np.prod(img_shape))), + nn.Tanh() + ) + + def forward(self, z): + img = self.model(z) + img = img.view(img.size(0), *img_shape) + return img + +class Discriminator(nn.Module): + def __init__(self): + super(Discriminator, self).__init__() + + self.model = nn.Sequential( + nn.Linear(int(np.prod(img_shape)), 512), + nn.LeakyReLU(0.2, inplace=True), + nn.Linear(512, 256), + nn.LeakyReLU(0.2, inplace=True), + nn.Linear(256, 1), + nn.Sigmoid(), + ) + + def forward(self, img): + img_flat = img.view(img.size(0), -1) + validity = self.model(img_flat) + + return validity + + diff --git a/ACL_PyTorch/contrib/cv/gan/GAN/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/gan/GAN/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/ACL_PyTorch/contrib/cv/gan/GAN/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/gan/GAN/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/GAN/requirements.txt b/ACL_PyTorch/contrib/cv/gan/GAN/requirements.txt index 2b2fefefa7e4adf1644dcfecc8ad2aeaef1b4656..63dcbcfad8c69ebc898a4dad8759901abf26fd31 100644 --- a/ACL_PyTorch/contrib/cv/gan/GAN/requirements.txt +++ b/ACL_PyTorch/contrib/cv/gan/GAN/requirements.txt @@ -1,4 +1,4 @@ -torch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.8.0 -numpy == 1.21.1 +torch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.8.0 +numpy == 1.21.1 diff --git a/ACL_PyTorch/contrib/cv/gan/GAN/test/README.md b/ACL_PyTorch/contrib/cv/gan/GAN/test/README.md index d86591ab67273f688a1c9f2d1d351b356d8d3d0f..85ca35df0486c6a24fac7731f32ae7d04d839e93 100644 --- a/ACL_PyTorch/contrib/cv/gan/GAN/test/README.md +++ b/ACL_PyTorch/contrib/cv/gan/GAN/test/README.md @@ -1,38 +1,38 @@ -## GAN模型PyTorch离线推理指导 - -### 1 环境准备 - -1. 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - - ```python - pip3.7 install -r requirements.txt - ``` - -2. 数据集获取 - - 开源代码仓[点此进入](https://github.com/eriklindernoren/PyTorch-GAN/blob/master/implementations/gan/gan.py)没有提供模型测试相关的数据集和代码,这里采用自己设置的随机张量来测试模型的生成精度。 - - -3. 获取msame工具 - - 将编译好的msame工具放到当前目录 - -### 2 离线推理 - -310上执行,执行时使用npu-smi info查看设备状态,确保device空闲 - -``` -bash test/pth2om.sh -bash test/eval_acc.sh -bash test/eval_bs1_perf.sh -bash test/eval_bs16_perf.sh -``` - - - -**评测结果:** - -| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| :-----: | :---------: | :-------------: | :--------: | :-------: | -| GAN bs1 | - | - | fps:12105.394 | fps: 9302.326| -| GAN bs16 |- | - | fps:180332.895|fps: 136170.213| +## GAN模型PyTorch离线推理指导 + +### 1 环境准备 + +1. 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + + ```python + pip3.7 install -r requirements.txt + ``` + +2. 数据集获取 + + 开源代码仓[点此进入](https://github.com/eriklindernoren/PyTorch-GAN/blob/master/implementations/gan/gan.py)没有提供模型测试相关的数据集和代码,这里采用自己设置的随机张量来测试模型的生成精度。 + + +3. 获取msame工具 + + 将编译好的msame工具放到当前目录 + +### 2 离线推理 + +310上执行,执行时使用npu-smi info查看设备状态,确保device空闲 + +``` +bash test/pth2om.sh +bash test/eval_acc.sh +bash test/eval_bs1_perf.sh +bash test/eval_bs16_perf.sh +``` + + + +**评测结果:** + +| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| :-----: | :---------: | :-------------: | :--------: | :-------: | +| GAN bs1 | - | - | fps:12105.394 | fps: 9302.326| +| GAN bs16 |- | - | fps:180332.895|fps: 136170.213| diff --git a/ACL_PyTorch/contrib/cv/gan/Pix2Pix/README.md b/ACL_PyTorch/contrib/cv/gan/Pix2Pix/README.md index 7708fbc2e51018948b705daa8869f6f637080465..e2a449f73c961ecb507bf0db2581e8dc54eaf1ef 100644 --- a/ACL_PyTorch/contrib/cv/gan/Pix2Pix/README.md +++ b/ACL_PyTorch/contrib/cv/gan/Pix2Pix/README.md @@ -1,35 +1,35 @@ -# Pix2Pix - - -# 精度性能 - - | 模型 | 性能基准 | 310性能 | - | :------: | :------: | :------: | - | fsaf bs1 | 556 | 402 | - | fsaf bs16| 359 | 464 | -精度直接看生成效果 - - -# 自验报告 - - # 第1次验收测试 - # 验收结果 OK - # 验收环境: A + K / CANN 5.0.2 - - - # pth是否能正确转换为om - bash ./test/pth2om.sh --pth_path=./checkpoints/facades_label2photo_pretrained - # 验收结果: OK - # 备注: 成功生成om,无运行报错,报错日志xx 等 - - # 精度数据是否达标(需要显示官网pth精度与om模型的精度) - # npu性能数据(确保device空闲时测试,如果模型支持多batch,测试bs1与bs16,否则只测试bs1,性能数据以单卡吞吐率为标准) - bash ./test/eval_acc_perf.sh --datasets_path='./datasets/facades' - # 验收结果: 是 - # 备注: 验收310测试性能bs1:402FPS bs16:464FPS;无运行报错,报错日志xx 等 - - - # 310性能是否超过基准: 是 - bs1:310=402/556=0.723倍基准 - bs16:310=464/359=1.292倍基准 - +# Pix2Pix + + +# 精度性能 + + | 模型 | 性能基准 | 310性能 | + | :------: | :------: | :------: | + | fsaf bs1 | 556 | 402 | + | fsaf bs16| 359 | 464 | +精度直接看生成效果 + + +# 自验报告 + + # 第1次验收测试 + # 验收结果 OK + # 验收环境: A + K / CANN 5.0.2 + + + # pth是否能正确转换为om + bash ./test/pth2om.sh --pth_path=./checkpoints/facades_label2photo_pretrained + # 验收结果: OK + # 备注: 成功生成om,无运行报错,报错日志xx 等 + + # 精度数据是否达标(需要显示官网pth精度与om模型的精度) + # npu性能数据(确保device空闲时测试,如果模型支持多batch,测试bs1与bs16,否则只测试bs1,性能数据以单卡吞吐率为标准) + bash ./test/eval_acc_perf.sh --datasets_path='./datasets/facades' + # 验收结果: 是 + # 备注: 验收310测试性能bs1:402FPS bs16:464FPS;无运行报错,报错日志xx 等 + + + # 310性能是否超过基准: 是 + bs1:310=402/556=0.723倍基准 + bs16:310=464/359=1.292倍基准 + diff --git a/ACL_PyTorch/contrib/cv/gan/Pix2Pix/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/gan/Pix2Pix/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/ACL_PyTorch/contrib/cv/gan/Pix2Pix/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/gan/Pix2Pix/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/Pix2Pix/precision.py b/ACL_PyTorch/contrib/cv/gan/Pix2Pix/precision.py index c14e8e8abc80d21f8af8667757ddafbdc0e3623f..99dc5002230f276cde25dc3bf83c514a26bb4c6e 100644 --- a/ACL_PyTorch/contrib/cv/gan/Pix2Pix/precision.py +++ b/ACL_PyTorch/contrib/cv/gan/Pix2Pix/precision.py @@ -1,198 +1,198 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Copyright (c) Soumith Chintala 2016, -# All rights reserved -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- -"""用于精度比对 -""" - -import torch -import torch.nn as nn -import torchvision -import apex -from apex import amp -import copy -from models import networks - -##### 需自行改写部分 start ##### -# 获得模型 -def get_model(): - model = networks.define_G(3, 3, 64, 'unet_256', 'instance', - True, 'normal', 0.02, '[0]') - # model = networks.define_D(6, 64, 'basic', - # 3, 'instance','normal', 0.02, '[0]') - # 用于避免BN或者Dropout带来的影响,如果遇到无法evalbackward的现象,请注掉该行 - # model.eval() - - return model - -# 获得输入tensor -input_tensor = torch.randn(1, 3, 256, 256) -# input_tensor = torch.randn(1, 6, 256, 256) - -# 设置npu_device -npu_device = 'npu:0' - -# 设置amp -AMP_MODE = True - -# 设置NPU prof 文件输出 -NPU_PROF = True - -##### 需自行改写部分 end ##### - -def cri_func(x): - base_func = nn.CrossEntropyLoss() - shape_list = x.shape - N = shape_list[0] - R = 1 - if len(shape_list) > 1: - for r in shape_list[1:]: - R *= r - T = torch.randint(0,R, size=(N,)).to(x.device) - if str(T.device).startswith('npu'): - T = T.int() - return base_func(x.reshape(N, -1), T) - -# 设置hook -def hook_func(name, save_dict, module): - def hook_function(module, inputs, outputs): - inputs_key = name + '_inputs' - idx = 0 - while inputs_key in save_dict: - inputs_key = inputs_key.split('-')[0] + '-%d'%idx - idx +=1 - save_dict[inputs_key] = inputs - - outputs_key = name + '_outputs' - idx = 0 - while outputs_key in save_dict: - outputs_key = outputs_key.split('-')[0] + '-%d'%idx - idx +=1 - save_dict[outputs_key] = outputs - return hook_function - -##### CPU ##### -# CPU固定输入和权重 -model = get_model() -optimizer = torch.optim.SGD(model.parameters(), 0.1) -state_dict = copy.deepcopy(model.state_dict()) - -# CPU注册hook,cpu_dict用于存储对比对象 -cpu_dict = {} -for name, module in model.named_modules(): - module.register_forward_hook(hook_func('[forward]:' + name, cpu_dict, module)) - module.register_backward_hook(hook_func('[backward]:' + name, cpu_dict, module)) - -# CPU运行正反向,获取正反向每个module的输入输出和所有参数的grad -out = model(input_tensor) -loss = cri_func(out) -optimizer.zero_grad() -loss.backward() -optimizer.step() -for name, param in model.named_parameters(): - cpu_dict["[grad]:" + name] = param.grad - -##### NPU ##### -# 重新定义模型,清理模型状态,并加装权重,保持初始化一致 -model = get_model() -optimizer = torch.optim.SGD(model.parameters(), 0.1) -model.load_state_dict(state_dict) - -# NPU注册hook,npu_dict用于存储对比对象 -npu_dict = {} -for name, module in model.named_modules(): - module.register_forward_hook(hook_func('[forward]:' + name, npu_dict, module)) - module.register_backward_hook(hook_func('[backward]:' + name, npu_dict, module)) - -# 将model和input_tensor放到npu -torch.npu.set_device(npu_device) -model = model.npu() -input_tensor = input_tensor.npu() - -# amp可选项,不适用请注释 -if AMP_MODE: - optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), 0.1) - model, optimizer = amp.initialize(model, optimizer, opt_level='O2', loss_scale=1.0, combine_grad=True) - -# NPU运行正反向,获取正反向每个module的输入输出和所有参数的grad -out = model(input_tensor) -loss = cri_func(out) -optimizer.zero_grad() -if AMP_MODE: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() -else: - loss.backward() -optimizer.step() -for name, param in model.named_parameters(): - npu_dict["[grad]:" + name] = param.grad - - -##### ComPare ##### -# 递归得到对比值 -def compare(x1, x2, prefix=''): - if isinstance(x1, tuple): - if x1: - for idx in range(len(x1)): - try: - compare(x1[idx], x2[idx], prefix=prefix + '.%d' % idx) - except Exception as e: - # print(str(e)) - print(prefix, 'failed.') - elif isinstance(x1, torch.Tensor) and isinstance(x2, torch.Tensor): - try: - l1_error = (x1.half().float() - x2.cpu()).abs().mean() - rel_error = l1_error / (x1.abs().mean()) - print(prefix, 'l1_error: ', l1_error, 'rel_error', rel_error) - if l1_error * rel_error > 10 : - print('\n###\n',prefix, 'should checked!','\n###\n') - except Exception as e: - # print(str(e)) - print(prefix, 'failed.') - -for k in cpu_dict: - compare(cpu_dict[k], npu_dict[k], prefix=k) - -# 需要profiling的时候额外输出一次 -if NPU_PROF: - with torch.autograd.profiler.profile(use_npu=True) as prof: - out = model(input_tensor) - loss = cri_func(out) - optimizer.zero_grad() - if AMP_MODE: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - prof.export_chrome_trace("netD output.prof") # "output.prof"为输出文件地址 - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright (c) Soumith Chintala 2016, +# All rights reserved +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +"""用于精度比对 +""" + +import torch +import torch.nn as nn +import torchvision +import apex +from apex import amp +import copy +from models import networks + +##### 需自行改写部分 start ##### +# 获得模型 +def get_model(): + model = networks.define_G(3, 3, 64, 'unet_256', 'instance', + True, 'normal', 0.02, '[0]') + # model = networks.define_D(6, 64, 'basic', + # 3, 'instance','normal', 0.02, '[0]') + # 用于避免BN或者Dropout带来的影响,如果遇到无法evalbackward的现象,请注掉该行 + # model.eval() + + return model + +# 获得输入tensor +input_tensor = torch.randn(1, 3, 256, 256) +# input_tensor = torch.randn(1, 6, 256, 256) + +# 设置npu_device +npu_device = 'npu:0' + +# 设置amp +AMP_MODE = True + +# 设置NPU prof 文件输出 +NPU_PROF = True + +##### 需自行改写部分 end ##### + +def cri_func(x): + base_func = nn.CrossEntropyLoss() + shape_list = x.shape + N = shape_list[0] + R = 1 + if len(shape_list) > 1: + for r in shape_list[1:]: + R *= r + T = torch.randint(0,R, size=(N,)).to(x.device) + if str(T.device).startswith('npu'): + T = T.int() + return base_func(x.reshape(N, -1), T) + +# 设置hook +def hook_func(name, save_dict, module): + def hook_function(module, inputs, outputs): + inputs_key = name + '_inputs' + idx = 0 + while inputs_key in save_dict: + inputs_key = inputs_key.split('-')[0] + '-%d'%idx + idx +=1 + save_dict[inputs_key] = inputs + + outputs_key = name + '_outputs' + idx = 0 + while outputs_key in save_dict: + outputs_key = outputs_key.split('-')[0] + '-%d'%idx + idx +=1 + save_dict[outputs_key] = outputs + return hook_function + +##### CPU ##### +# CPU固定输入和权重 +model = get_model() +optimizer = torch.optim.SGD(model.parameters(), 0.1) +state_dict = copy.deepcopy(model.state_dict()) + +# CPU注册hook,cpu_dict用于存储对比对象 +cpu_dict = {} +for name, module in model.named_modules(): + module.register_forward_hook(hook_func('[forward]:' + name, cpu_dict, module)) + module.register_backward_hook(hook_func('[backward]:' + name, cpu_dict, module)) + +# CPU运行正反向,获取正反向每个module的输入输出和所有参数的grad +out = model(input_tensor) +loss = cri_func(out) +optimizer.zero_grad() +loss.backward() +optimizer.step() +for name, param in model.named_parameters(): + cpu_dict["[grad]:" + name] = param.grad + +##### NPU ##### +# 重新定义模型,清理模型状态,并加装权重,保持初始化一致 +model = get_model() +optimizer = torch.optim.SGD(model.parameters(), 0.1) +model.load_state_dict(state_dict) + +# NPU注册hook,npu_dict用于存储对比对象 +npu_dict = {} +for name, module in model.named_modules(): + module.register_forward_hook(hook_func('[forward]:' + name, npu_dict, module)) + module.register_backward_hook(hook_func('[backward]:' + name, npu_dict, module)) + +# 将model和input_tensor放到npu +torch.npu.set_device(npu_device) +model = model.npu() +input_tensor = input_tensor.npu() + +# amp可选项,不适用请注释 +if AMP_MODE: + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), 0.1) + model, optimizer = amp.initialize(model, optimizer, opt_level='O2', loss_scale=1.0, combine_grad=True) + +# NPU运行正反向,获取正反向每个module的输入输出和所有参数的grad +out = model(input_tensor) +loss = cri_func(out) +optimizer.zero_grad() +if AMP_MODE: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() +else: + loss.backward() +optimizer.step() +for name, param in model.named_parameters(): + npu_dict["[grad]:" + name] = param.grad + + +##### ComPare ##### +# 递归得到对比值 +def compare(x1, x2, prefix=''): + if isinstance(x1, tuple): + if x1: + for idx in range(len(x1)): + try: + compare(x1[idx], x2[idx], prefix=prefix + '.%d' % idx) + except Exception as e: + # print(str(e)) + print(prefix, 'failed.') + elif isinstance(x1, torch.Tensor) and isinstance(x2, torch.Tensor): + try: + l1_error = (x1.half().float() - x2.cpu()).abs().mean() + rel_error = l1_error / (x1.abs().mean()) + print(prefix, 'l1_error: ', l1_error, 'rel_error', rel_error) + if l1_error * rel_error > 10 : + print('\n###\n',prefix, 'should checked!','\n###\n') + except Exception as e: + # print(str(e)) + print(prefix, 'failed.') + +for k in cpu_dict: + compare(cpu_dict[k], npu_dict[k], prefix=k) + +# 需要profiling的时候额外输出一次 +if NPU_PROF: + with torch.autograd.profiler.profile(use_npu=True) as prof: + out = model(input_tensor) + loss = cri_func(out) + optimizer.zero_grad() + if AMP_MODE: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + prof.export_chrome_trace("netD output.prof") # "output.prof"为输出文件地址 + + diff --git a/ACL_PyTorch/contrib/cv/gan/Pix2Pix/pytorch_prof.py b/ACL_PyTorch/contrib/cv/gan/Pix2Pix/pytorch_prof.py index afb06b4b978436cd03d61ba2f71e041e20d7dba7..d79a619de98089f04918148bd6f1d14c2b1eaa02 100644 --- a/ACL_PyTorch/contrib/cv/gan/Pix2Pix/pytorch_prof.py +++ b/ACL_PyTorch/contrib/cv/gan/Pix2Pix/pytorch_prof.py @@ -1,141 +1,141 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Copyright (c) Soumith Chintala 2016, -# All rights reserved -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- -"""pytorch_prof.py -""" - -import torch -import torch.optim as optim -import torch.nn as nn -import time -import argparse -from models import networks - -def build_model(): - # 请自定义模型并加载预训练模型 - # import torchvision - # model = torchvision.models.resnet50(pretrained=True) - model = networks.define_G(3, 3, 64, 'unet_256', 'instance', - True, 'normal', 0.02, '[0]') - return model - - -def get_raw_data(): - # input_tensor = torch.randn(2, 3, 224, 224) - input_tensor = torch.randn(1, 3, 256, 256) - return input_tensor - - -def criterion(x): - base_func = nn.CrossEntropyLoss() - shape_list = x.shape - N = shape_list[0] - R = 1 - if len(shape_list) > 1: - for r in shape_list[1:]: - R *= r - T = torch.randint(0,R, size=(N,)).to(x.device) - if str(T.device).startswith('npu'): - T = T.int() - return base_func(x.reshape(N, -1), T) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='PyTorch Prof') - parser.add_argument('--device', type=str, default='cpu', - help='set which type of device used. Support cuda:0(device_id), npu:0(device_id).') - parser.add_argument('--amp', default=False, action='store_true', - help='use amp during prof') - parser.add_argument('--loss-scale', default=64.0, type=float, - help='loss scale using in amp, default 64.0, -1 means dynamic') - parser.add_argument('--opt-level', default='O2', type=str, - help='opt-level using in amp, default O2') - parser.add_argument('--FusedSGD', default=False, action='store_true', - help='use FusedSGD during prof') - - args = parser.parse_args() - - # 1.准备工作 - if args.device.startswith('cuda'): - torch.cuda.set_device(args.device) - prof_kwargs = {'use_cuda': True} - elif args.device.startswith('npu'): - torch.npu.set_device(args.device) - prof_kwargs = {'use_npu': True} - else: - prof_kwargs = {} - - # 2.构建模型 - model = build_model() - if args.FusedSGD: - from apex.optimizers import NpuFusedSGD - optimizer = NpuFusedSGD(model.parameters(), lr=0.01) - model = model.to(args.device) - if args.amp: - from apex import amp - model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, - loss_scale=None if args.loss_scale == -1 else args.loss_scale, - combine_grad=True) - else: - optimizer = optim.SGD(model.parameters(), lr=0.01) - model = model.to(args.device) - if args.amp: - from apex import amp - model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, - loss_scale=None if args.loss_scale == -1 else args.loss_scale) - - # 3.生成input - input_tensor = get_raw_data() - input_tensor = input_tensor.to(args.device) - - # 先运行一次,保证prof得到的性能是正确的 - def run(): - output_tensor = model(input_tensor) - loss = criterion(output_tensor) - optimizer.zero_grad() - if args.amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - return loss - for i in range(5): - start_time = time.time() - loss = run() - print('iter: %d, loss: %.2f, time: %.2f'%(i, loss, (time.time() - start_time)*1000)) - - # 4. 执行forward+profiling - with torch.autograd.profiler.profile(**prof_kwargs) as prof: - run() - print(prof.key_averages().table()) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright (c) Soumith Chintala 2016, +# All rights reserved +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +"""pytorch_prof.py +""" + +import torch +import torch.optim as optim +import torch.nn as nn +import time +import argparse +from models import networks + +def build_model(): + # 请自定义模型并加载预训练模型 + # import torchvision + # model = torchvision.models.resnet50(pretrained=True) + model = networks.define_G(3, 3, 64, 'unet_256', 'instance', + True, 'normal', 0.02, '[0]') + return model + + +def get_raw_data(): + # input_tensor = torch.randn(2, 3, 224, 224) + input_tensor = torch.randn(1, 3, 256, 256) + return input_tensor + + +def criterion(x): + base_func = nn.CrossEntropyLoss() + shape_list = x.shape + N = shape_list[0] + R = 1 + if len(shape_list) > 1: + for r in shape_list[1:]: + R *= r + T = torch.randint(0,R, size=(N,)).to(x.device) + if str(T.device).startswith('npu'): + T = T.int() + return base_func(x.reshape(N, -1), T) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='PyTorch Prof') + parser.add_argument('--device', type=str, default='cpu', + help='set which type of device used. Support cuda:0(device_id), npu:0(device_id).') + parser.add_argument('--amp', default=False, action='store_true', + help='use amp during prof') + parser.add_argument('--loss-scale', default=64.0, type=float, + help='loss scale using in amp, default 64.0, -1 means dynamic') + parser.add_argument('--opt-level', default='O2', type=str, + help='opt-level using in amp, default O2') + parser.add_argument('--FusedSGD', default=False, action='store_true', + help='use FusedSGD during prof') + + args = parser.parse_args() + + # 1.准备工作 + if args.device.startswith('cuda'): + torch.cuda.set_device(args.device) + prof_kwargs = {'use_cuda': True} + elif args.device.startswith('npu'): + torch.npu.set_device(args.device) + prof_kwargs = {'use_npu': True} + else: + prof_kwargs = {} + + # 2.构建模型 + model = build_model() + if args.FusedSGD: + from apex.optimizers import NpuFusedSGD + optimizer = NpuFusedSGD(model.parameters(), lr=0.01) + model = model.to(args.device) + if args.amp: + from apex import amp + model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, + loss_scale=None if args.loss_scale == -1 else args.loss_scale, + combine_grad=True) + else: + optimizer = optim.SGD(model.parameters(), lr=0.01) + model = model.to(args.device) + if args.amp: + from apex import amp + model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, + loss_scale=None if args.loss_scale == -1 else args.loss_scale) + + # 3.生成input + input_tensor = get_raw_data() + input_tensor = input_tensor.to(args.device) + + # 先运行一次,保证prof得到的性能是正确的 + def run(): + output_tensor = model(input_tensor) + loss = criterion(output_tensor) + optimizer.zero_grad() + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + return loss + for i in range(5): + start_time = time.time() + loss = run() + print('iter: %d, loss: %.2f, time: %.2f'%(i, loss, (time.time() - start_time)*1000)) + + # 4. 执行forward+profiling + with torch.autograd.profiler.profile(**prof_kwargs) as prof: + run() + print(prof.key_averages().table()) prof.export_chrome_trace("pytorch_prof_%s.prof" % args.device) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/Pix2pixHD/datasets_deal.py b/ACL_PyTorch/contrib/cv/gan/Pix2pixHD/datasets_deal.py index 348f33470166b96df4fc37ff38e81dfcd1da9e5c..1d8b167ef25fe42959166905dece49d32bfcc27f 100644 --- a/ACL_PyTorch/contrib/cv/gan/Pix2pixHD/datasets_deal.py +++ b/ACL_PyTorch/contrib/cv/gan/Pix2pixHD/datasets_deal.py @@ -1,44 +1,44 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import glob -import shutil - -if __name__ == "__main__": - test_inst_dir = sys.argv[1] - test_label_dir = sys.argv[2] - test_original_gtfine_dir = sys.argv[3] - - if not os.path.exists(test_inst_dir): - os.mkdir(test_inst_dir) - if not os.path.exists(test_label_dir): - os.mkdir(test_label_dir) - - city_name_dir = os.listdir(test_original_gtfine_dir) - img_inst_number = 0 - img_label_number = 0 - - for city_name in city_name_dir: - temp_city_dir = os.path.join(test_original_gtfine_dir, city_name) - test_gtfine_list = glob.glob(os.path.join(temp_city_dir, "*.png")) - - for img in test_gtfine_list: - if img[-9:] == "ceIds.png": - img_inst_number += 1 - shutil.copy(img, test_inst_dir) - elif img[-9:] == "elIds.png": - img_label_number += 1 - shutil.copy(img, test_label_dir) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import glob +import shutil + +if __name__ == "__main__": + test_inst_dir = sys.argv[1] + test_label_dir = sys.argv[2] + test_original_gtfine_dir = sys.argv[3] + + if not os.path.exists(test_inst_dir): + os.mkdir(test_inst_dir) + if not os.path.exists(test_label_dir): + os.mkdir(test_label_dir) + + city_name_dir = os.listdir(test_original_gtfine_dir) + img_inst_number = 0 + img_label_number = 0 + + for city_name in city_name_dir: + temp_city_dir = os.path.join(test_original_gtfine_dir, city_name) + test_gtfine_list = glob.glob(os.path.join(temp_city_dir, "*.png")) + + for img in test_gtfine_list: + if img[-9:] == "ceIds.png": + img_inst_number += 1 + shutil.copy(img, test_inst_dir) + elif img[-9:] == "elIds.png": + img_label_number += 1 + shutil.copy(img, test_label_dir) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/StarGAN/LICENSE b/ACL_PyTorch/contrib/cv/gan/StarGAN/LICENSE index 29f81d812f3e768fa89638d1f72920dbfd1413a8..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 100644 --- a/ACL_PyTorch/contrib/cv/gan/StarGAN/LICENSE +++ b/ACL_PyTorch/contrib/cv/gan/StarGAN/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ACL_PyTorch/contrib/cv/gan/StarGAN/README.md b/ACL_PyTorch/contrib/cv/gan/StarGAN/README.md index 36a385956b2d7dd1006ec75aa62b61d8671c315b..f7f3a13e8fbddce3a28222f78ee81bec8ec97300 100644 --- a/ACL_PyTorch/contrib/cv/gan/StarGAN/README.md +++ b/ACL_PyTorch/contrib/cv/gan/StarGAN/README.md @@ -1,160 +1,160 @@ -## StarGAN Onnx 模型 PyTorch 端到端推理指导 - -### 1 模型概述 - -- 论文地址 - -``` -https://arxiv.org/abs/1711.09020 -``` - -- 代码地址 - -``` -https://github.com/yunjey/stargan -``` - -- 数据集地址 - -``` -wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/dataset/celeba.zip -``` - - - -### 2 环境说明 - -``` -CANN = 5.0.2 -pytorch = 1.5.0 -torchvision = 0.6.0 -onnx = 1.8.0 -numpy = 1.21.1 -``` - -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - - - -### 3 pth 转 om 模型 - -- pth 权重文件默认路径为 `./models/200000-G.pth` -- 进入根目录 `./` 执行 `./test/pth2om` 脚本,自动生成生成 onnx 模型文件和om文件 - -```py -bash ./test/pth2om.sh './models/200000-G.pth' -``` - - - -### 4 生成输入数据并保存为.bin文件 - -- 数据集默认路径为 `./celeba.zip` ,使用脚本 `unzip_dataset.sh` 解压数据集。 - -``` -bash unzip_dataset.sh -``` - -- 使用脚本 `StarGAN_pre_processing.py` 获得二进制 bin 文件和基准的图片结果。 - -``` -source ./test/env_npu.sh -python3.7 StarGAN_pre_processing.py --mode test --selected_attrs Black_Hair Blond_Hair Brown_Hair Male Young \ - --model_save_dir './models' --result_dir './result_baseline' \ - --attr_path './data/celeba/images' --celeba_image_dir './data/celeba/list_attr_celeba.txt' -``` - - - -### 5 离线推理 - -#### 5.1 msame工具概述 - -msame 工具为华为自研的模型推理工具,输入 om 模型和模型所需要的输入 bin 文件,输出模型的输出数据文件。模型必须是通过 atc 工具转换的 om 模型,输入 bin 文件需要符合模型的输入要求,且支持模型多输入。 - -``` -chmod 777 msame -``` - -#### 5.2 离线推理 - -``` -bash ./test/eval_bs1_perf.sh -bash ./test/eval_bs16_perf.sh -``` - -输出数据默认保存在根目录的 `./StarGAN_[yourBatchSize].log` 中,可以看到时延和 FPS。输出图片默认保存在当前目录 `output_[yourBatchSize]/` 下,为保存模型输入高维张量数据的 txt 文件。 - - - -### 6 精度对比 - -调用 ` StarGAN_post_processing.py` 来进行后处理,把输出的 txt 文件转换为输出图像。 - -```python -python3.7 StarGAN_post_processing.py --folder_path './output_bs1/[YYYYMMDD_HHMMSS]' --batch_size 1 -python3.7 StarGAN_post_processing.py --folder_path './output_bs16/[YYYYMMDD_HHMMSS]' --batch_size 16 -``` - -详细的结果输出在 `./output_[yourBatchSize]/jpg` 文件夹中,可以和 `result_baseline` 文件夹下的在线推理结果做对比。可以发现各个 batchsize 的离线推理生成的图片与基准基本一致。 - - - -### 7 性能对比 - -#### 7.1 NPU 310 性能数据 -``` -(310 bs1) Inference average time: 21.04 ms -(310 bs1) FPS:190.114 -``` - -根据时延和核心数,计算得到 Batchsize = 1 时单卡吞吐率 190.114 FPS - -``` -(310 bs16) Inference average time: 313.39 ms -(310 bs16) FPS:204.218 -``` - -根据时延和核心数,计算得到 Batchsize = 16 时单卡吞吐率 204.218 FPS - -#### 7.2 GPU T4 性能数据 - -``` -&&&& RUNNING TensorRT.trtexec # trtexec --onnx=StarGAN.onnx --shapes=real_img:1x3x128x128,attr:1x5 -... -[11/10/2021-07:45:57] [I] GPU Compute -[11/10/2021-07:45:57] [I] min: 4.5766 ms -[11/10/2021-07:45:57] [I] max: 8.12921 ms -[11/10/2021-07:45:57] [I] mean: 5.34373 ms -[11/10/2021-07:45:57] [I] median: 5.32825 ms -[11/10/2021-07:45:57] [I] percentile: 6.91772 ms at 99% -[11/10/2021-07:45:57] [I] total compute time: 2.93371 s -``` - -根据时延和核心数,计算得到 Batchsize = 1 时单卡吞吐率 187.135 FPS - -``` -&&&& RUNNING TensorRT.trtexec # trtexec --onnx=StarGAN.onnx --shapes=real_img:16x3x128x128,attr:16x5 -... -[11/10/2021-08:03:49] [I] GPU Compute -[11/10/2021-08:03:49] [I] min: 65.5917 ms -[11/10/2021-08:03:49] [I] max: 76.011 ms -[11/10/2021-08:03:49] [I] mean: 67.8021 ms -[11/10/2021-08:03:49] [I] median: 67.15 ms -[11/10/2021-08:03:49] [I] percentile: 76.011 ms at 99% -[11/10/2021-08:03:49] [I] total compute time: 3.1189 s -``` - -根据时延和核心数,计算得到 Batchsize = 16 时单卡吞吐率 235.980 FPS - -#### 7.3 性能对比 - -| Batch Size | 310 (FPS/Card) | T4 (FPS/Card) | 310/T4 | -| ---------- | -------------- | ------------- | -------- | -| 1 | *189.753* | *187.135* | *101.4%* | -| 4 | *201.207* | *203.666* | *98.80%* | -| 8 | *199.913* | *219.700* | *91.00%* | -| 16 | *200.986* | *235.980* | *85.17%* | - +## StarGAN Onnx 模型 PyTorch 端到端推理指导 + +### 1 模型概述 + +- 论文地址 + +``` +https://arxiv.org/abs/1711.09020 +``` + +- 代码地址 + +``` +https://github.com/yunjey/stargan +``` + +- 数据集地址 + +``` +wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/dataset/celeba.zip +``` + + + +### 2 环境说明 + +``` +CANN = 5.0.2 +pytorch = 1.5.0 +torchvision = 0.6.0 +onnx = 1.8.0 +numpy = 1.21.1 +``` + +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + + + +### 3 pth 转 om 模型 + +- pth 权重文件默认路径为 `./models/200000-G.pth` +- 进入根目录 `./` 执行 `./test/pth2om` 脚本,自动生成生成 onnx 模型文件和om文件 + +```py +bash ./test/pth2om.sh './models/200000-G.pth' +``` + + + +### 4 生成输入数据并保存为.bin文件 + +- 数据集默认路径为 `./celeba.zip` ,使用脚本 `unzip_dataset.sh` 解压数据集。 + +``` +bash unzip_dataset.sh +``` + +- 使用脚本 `StarGAN_pre_processing.py` 获得二进制 bin 文件和基准的图片结果。 + +``` +source ./test/env_npu.sh +python3.7 StarGAN_pre_processing.py --mode test --selected_attrs Black_Hair Blond_Hair Brown_Hair Male Young \ + --model_save_dir './models' --result_dir './result_baseline' \ + --attr_path './data/celeba/images' --celeba_image_dir './data/celeba/list_attr_celeba.txt' +``` + + + +### 5 离线推理 + +#### 5.1 msame工具概述 + +msame 工具为华为自研的模型推理工具,输入 om 模型和模型所需要的输入 bin 文件,输出模型的输出数据文件。模型必须是通过 atc 工具转换的 om 模型,输入 bin 文件需要符合模型的输入要求,且支持模型多输入。 + +``` +chmod 777 msame +``` + +#### 5.2 离线推理 + +``` +bash ./test/eval_bs1_perf.sh +bash ./test/eval_bs16_perf.sh +``` + +输出数据默认保存在根目录的 `./StarGAN_[yourBatchSize].log` 中,可以看到时延和 FPS。输出图片默认保存在当前目录 `output_[yourBatchSize]/` 下,为保存模型输入高维张量数据的 txt 文件。 + + + +### 6 精度对比 + +调用 ` StarGAN_post_processing.py` 来进行后处理,把输出的 txt 文件转换为输出图像。 + +```python +python3.7 StarGAN_post_processing.py --folder_path './output_bs1/[YYYYMMDD_HHMMSS]' --batch_size 1 +python3.7 StarGAN_post_processing.py --folder_path './output_bs16/[YYYYMMDD_HHMMSS]' --batch_size 16 +``` + +详细的结果输出在 `./output_[yourBatchSize]/jpg` 文件夹中,可以和 `result_baseline` 文件夹下的在线推理结果做对比。可以发现各个 batchsize 的离线推理生成的图片与基准基本一致。 + + + +### 7 性能对比 + +#### 7.1 NPU 310 性能数据 +``` +(310 bs1) Inference average time: 21.04 ms +(310 bs1) FPS:190.114 +``` + +根据时延和核心数,计算得到 Batchsize = 1 时单卡吞吐率 190.114 FPS + +``` +(310 bs16) Inference average time: 313.39 ms +(310 bs16) FPS:204.218 +``` + +根据时延和核心数,计算得到 Batchsize = 16 时单卡吞吐率 204.218 FPS + +#### 7.2 GPU T4 性能数据 + +``` +&&&& RUNNING TensorRT.trtexec # trtexec --onnx=StarGAN.onnx --shapes=real_img:1x3x128x128,attr:1x5 +... +[11/10/2021-07:45:57] [I] GPU Compute +[11/10/2021-07:45:57] [I] min: 4.5766 ms +[11/10/2021-07:45:57] [I] max: 8.12921 ms +[11/10/2021-07:45:57] [I] mean: 5.34373 ms +[11/10/2021-07:45:57] [I] median: 5.32825 ms +[11/10/2021-07:45:57] [I] percentile: 6.91772 ms at 99% +[11/10/2021-07:45:57] [I] total compute time: 2.93371 s +``` + +根据时延和核心数,计算得到 Batchsize = 1 时单卡吞吐率 187.135 FPS + +``` +&&&& RUNNING TensorRT.trtexec # trtexec --onnx=StarGAN.onnx --shapes=real_img:16x3x128x128,attr:16x5 +... +[11/10/2021-08:03:49] [I] GPU Compute +[11/10/2021-08:03:49] [I] min: 65.5917 ms +[11/10/2021-08:03:49] [I] max: 76.011 ms +[11/10/2021-08:03:49] [I] mean: 67.8021 ms +[11/10/2021-08:03:49] [I] median: 67.15 ms +[11/10/2021-08:03:49] [I] percentile: 76.011 ms at 99% +[11/10/2021-08:03:49] [I] total compute time: 3.1189 s +``` + +根据时延和核心数,计算得到 Batchsize = 16 时单卡吞吐率 235.980 FPS + +#### 7.3 性能对比 + +| Batch Size | 310 (FPS/Card) | T4 (FPS/Card) | 310/T4 | +| ---------- | -------------- | ------------- | -------- | +| 1 | *189.753* | *187.135* | *101.4%* | +| 4 | *201.207* | *203.666* | *98.80%* | +| 8 | *199.913* | *219.700* | *91.00%* | +| 16 | *200.986* | *235.980* | *85.17%* | + diff --git a/ACL_PyTorch/contrib/cv/gan/StarGAN/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/gan/StarGAN/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/ACL_PyTorch/contrib/cv/gan/StarGAN/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/gan/StarGAN/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/gan/StarGAN/requirements.txt b/ACL_PyTorch/contrib/cv/gan/StarGAN/requirements.txt index bf70cf0b5eca0d38bde555dcb760526a311ceea7..1f9481f7e6095c34c9ff6994f10b82cc431f6d77 100644 --- a/ACL_PyTorch/contrib/cv/gan/StarGAN/requirements.txt +++ b/ACL_PyTorch/contrib/cv/gan/StarGAN/requirements.txt @@ -1,4 +1,4 @@ -torch==1.5.0 -torchvision==0.6.0 -onnx==1.8.0 -numpy==1.21.1 +torch==1.5.0 +torchvision==0.6.0 +onnx==1.8.0 +numpy==1.21.1 diff --git a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/CSNLN_postprocess.py b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/CSNLN_postprocess.py index 3738c2259fdacab35e2e40f153a20207115097e5..5eb29d03a0f6a8b7a3eb3af28e46e404b9d8e145 100644 --- a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/CSNLN_postprocess.py +++ b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/CSNLN_postprocess.py @@ -1,165 +1,165 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import os -import argparse -import json -import math -import imageio - -parser = argparse.ArgumentParser(description='CSNLN post process script') -parser.add_argument('--res', default='', type=str, metavar='PATH', - help='om result path') -parser.add_argument('--hr', default='', type=str, metavar='PATH', - help='high res path') -parser.add_argument('--save_path', default='', type=str, metavar='PATH', - help='result image save path') -args = parser.parse_args() - - -with open("pad_info_56.json") as f: - pad_info = json.load(f) -scale = 4 - -def postprocess(hr_src_path, bin_path, save_path): - data = [] - if not os.path.isdir(save_path): - os.makedirs(save_path) - sr_list = merge(bin_path) - files = os.listdir(hr_src_path) - files.sort() - for i, img_file in enumerate(files): - img = sr_list[i] - img = quantize(img, 1) - hr = imageio.imread(os.path.join(hr_src_path, img_file)) - hr = torch.from_numpy(hr) - hr = hr / 255 - psnr = calc_psnr(img, hr, scale, 1) - data.append({"file": img_file, "psnr": psnr}) - - img = (img * 255).byte().cpu() - imageio.imwrite(os.path.join(save_path, img_file+".png"), img.numpy().astype(np.uint8).transpose(1, 2, 0)) - - data = eval_acc(data) - json_data = json.dumps( - data, indent=4, separators=(',', ': ')) - with open("result.json", 'w') as f: - f.write(json_data) - -def eval_acc(data): - acc = 0 - for item in data: - acc += item["psnr"] - acc /= len(data) - print("accuracy: ",acc) - return { - "accuracy": acc, - "data": data - } - - -def quantize(img, rgb_range): - pixel_range = 255 / rgb_range - return img.mul(pixel_range).clamp(0, 255).round().div(pixel_range) - - -def calc_psnr(sr, hr, scale, rgb_range): - sr = sr.unsqueeze(0) - hr = hr.permute(2, 0, 1).unsqueeze(0) - if hr.nelement() == 1: - return 0 - - diff = (sr - hr) / rgb_range - shave = 4 - if diff.size(1) > 1: - gray_coeffs = [65.738, 129.057, 25.064] - convert = diff.new_tensor(gray_coeffs).view(1, 3, 1, 1) / 256 - diff = diff.mul(convert).sum(dim=1) - - valid = diff[..., shave:-shave, shave:-shave] - mse = valid.pow(2).mean() - - return -10 * math.log10(mse) - -def merge(src_path): - min_list = [] - max_list = [] - for i, pad_meta in enumerate(pad_info): - if i % 5 == 0 and i < 16: - max_list.append(pad_meta) - else: - min_list.append(pad_meta) - h_half, w_half = -1, -1 - h_size, w_size = -1, -1 - h, w = -1, -1 - temp_img = None - sr_list = [] - sr = [] - files = os.listdir(src_path) - files.sort() - for i, file in enumerate(files): - array = np.fromfile(os.path.join(src_path, file), dtype=np.float32) - array = array.reshape( - 3, 56*4, 56*4) - img = torch.from_numpy(array) - pad_h, pad_w = min_list[i]['pad_h'], min_list[i]['pad_w'] - if pad_h == 0 and pad_w == 0: - img = img - elif pad_h == 0: - img = img[:, :, 0:-pad_w] - elif pad_w == 0: - img = img[:, 0:-pad_h, :] - else: - img = img[:, 0:-pad_h, 0:-pad_w] - if i % 4 == 0: - h_half, w_half = int(min_list[i]['h_half'] * scale), int(min_list[i]['w_half'] * scale) - h_size, w_size = min_list[i]['h_size'] * scale, min_list[i]['w_size'] * scale - h, w = h_half * 2, w_half * 2 - temp_img = torch.zeros(3, h, w) - temp_img[:, 0:h_half, 0:w_half] = img[:, 0:h_half, 0:w_half] - elif i % 4 == 1: - temp_img[:, 0:h_half, w_half:w] = img[:, 0:h_half, (w_size - w + w_half):w_size] - elif i % 4 == 2: - temp_img[:, h_half:h, 0:w_half] = img[:, (h_size - h + h_half):h_size, 0:w_half] - elif i % 4 == 3: - temp_img[:, h_half:h, w_half:w] = img[:, (h_size - h + h_half):h_size, (w_size - w + w_half):w_size] - sr_list.append(temp_img) - - h_half, w_half = max_list[0]['h_half'] * scale, max_list[0]['w_half'] * scale - h_size, w_size = max_list[0]['h_size'] * scale, max_list[0]['w_size'] * scale - h, w = h_half * 2, w_half * 2 - output = torch.zeros(3, h, w) - output[:, 0:h_half, 0:w_half] \ - = sr_list[0][:, 0:h_half, 0:w_half] - output[:, 0:h_half, w_half:w] \ - = sr_list[1][:, 0:h_half, (w_size - w + w_half):w_size] - output[:, h_half:h, 0:w_half] \ - = sr_list[2][:, (h_size - h + h_half):h_size, 0:w_half] - output[:, h_half:h, w_half:w] \ - = sr_list[3][:, (h_size - h + h_half):h_size, (w_size - w + w_half):w_size] - sr.append(output) - sr.append(sr_list[4]) - sr.append(sr_list[5]) - sr.append(sr_list[6]) - sr.append(sr_list[7]) - return sr - - -if __name__ == '__main__': - res = args.res - hr = args.hr - save_path = args.save_path +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import os +import argparse +import json +import math +import imageio + +parser = argparse.ArgumentParser(description='CSNLN post process script') +parser.add_argument('--res', default='', type=str, metavar='PATH', + help='om result path') +parser.add_argument('--hr', default='', type=str, metavar='PATH', + help='high res path') +parser.add_argument('--save_path', default='', type=str, metavar='PATH', + help='result image save path') +args = parser.parse_args() + + +with open("pad_info_56.json") as f: + pad_info = json.load(f) +scale = 4 + +def postprocess(hr_src_path, bin_path, save_path): + data = [] + if not os.path.isdir(save_path): + os.makedirs(save_path) + sr_list = merge(bin_path) + files = os.listdir(hr_src_path) + files.sort() + for i, img_file in enumerate(files): + img = sr_list[i] + img = quantize(img, 1) + hr = imageio.imread(os.path.join(hr_src_path, img_file)) + hr = torch.from_numpy(hr) + hr = hr / 255 + psnr = calc_psnr(img, hr, scale, 1) + data.append({"file": img_file, "psnr": psnr}) + + img = (img * 255).byte().cpu() + imageio.imwrite(os.path.join(save_path, img_file+".png"), img.numpy().astype(np.uint8).transpose(1, 2, 0)) + + data = eval_acc(data) + json_data = json.dumps( + data, indent=4, separators=(',', ': ')) + with open("result.json", 'w') as f: + f.write(json_data) + +def eval_acc(data): + acc = 0 + for item in data: + acc += item["psnr"] + acc /= len(data) + print("accuracy: ",acc) + return { + "accuracy": acc, + "data": data + } + + +def quantize(img, rgb_range): + pixel_range = 255 / rgb_range + return img.mul(pixel_range).clamp(0, 255).round().div(pixel_range) + + +def calc_psnr(sr, hr, scale, rgb_range): + sr = sr.unsqueeze(0) + hr = hr.permute(2, 0, 1).unsqueeze(0) + if hr.nelement() == 1: + return 0 + + diff = (sr - hr) / rgb_range + shave = 4 + if diff.size(1) > 1: + gray_coeffs = [65.738, 129.057, 25.064] + convert = diff.new_tensor(gray_coeffs).view(1, 3, 1, 1) / 256 + diff = diff.mul(convert).sum(dim=1) + + valid = diff[..., shave:-shave, shave:-shave] + mse = valid.pow(2).mean() + + return -10 * math.log10(mse) + +def merge(src_path): + min_list = [] + max_list = [] + for i, pad_meta in enumerate(pad_info): + if i % 5 == 0 and i < 16: + max_list.append(pad_meta) + else: + min_list.append(pad_meta) + h_half, w_half = -1, -1 + h_size, w_size = -1, -1 + h, w = -1, -1 + temp_img = None + sr_list = [] + sr = [] + files = os.listdir(src_path) + files.sort() + for i, file in enumerate(files): + array = np.fromfile(os.path.join(src_path, file), dtype=np.float32) + array = array.reshape( + 3, 56*4, 56*4) + img = torch.from_numpy(array) + pad_h, pad_w = min_list[i]['pad_h'], min_list[i]['pad_w'] + if pad_h == 0 and pad_w == 0: + img = img + elif pad_h == 0: + img = img[:, :, 0:-pad_w] + elif pad_w == 0: + img = img[:, 0:-pad_h, :] + else: + img = img[:, 0:-pad_h, 0:-pad_w] + if i % 4 == 0: + h_half, w_half = int(min_list[i]['h_half'] * scale), int(min_list[i]['w_half'] * scale) + h_size, w_size = min_list[i]['h_size'] * scale, min_list[i]['w_size'] * scale + h, w = h_half * 2, w_half * 2 + temp_img = torch.zeros(3, h, w) + temp_img[:, 0:h_half, 0:w_half] = img[:, 0:h_half, 0:w_half] + elif i % 4 == 1: + temp_img[:, 0:h_half, w_half:w] = img[:, 0:h_half, (w_size - w + w_half):w_size] + elif i % 4 == 2: + temp_img[:, h_half:h, 0:w_half] = img[:, (h_size - h + h_half):h_size, 0:w_half] + elif i % 4 == 3: + temp_img[:, h_half:h, w_half:w] = img[:, (h_size - h + h_half):h_size, (w_size - w + w_half):w_size] + sr_list.append(temp_img) + + h_half, w_half = max_list[0]['h_half'] * scale, max_list[0]['w_half'] * scale + h_size, w_size = max_list[0]['h_size'] * scale, max_list[0]['w_size'] * scale + h, w = h_half * 2, w_half * 2 + output = torch.zeros(3, h, w) + output[:, 0:h_half, 0:w_half] \ + = sr_list[0][:, 0:h_half, 0:w_half] + output[:, 0:h_half, w_half:w] \ + = sr_list[1][:, 0:h_half, (w_size - w + w_half):w_size] + output[:, h_half:h, 0:w_half] \ + = sr_list[2][:, (h_size - h + h_half):h_size, 0:w_half] + output[:, h_half:h, w_half:w] \ + = sr_list[3][:, (h_size - h + h_half):h_size, (w_size - w + w_half):w_size] + sr.append(output) + sr.append(sr_list[4]) + sr.append(sr_list[5]) + sr.append(sr_list[6]) + sr.append(sr_list[7]) + return sr + + +if __name__ == '__main__': + res = args.res + hr = args.hr + save_path = args.save_path postprocess(hr, res, save_path) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/CSNLN_preprocess.py b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/CSNLN_preprocess.py index 36a6c15a222ac32a9d95bc717c77396150af476a..f9b24a91eabd0c76c4d53e1b62c4df96b919e67a 100644 --- a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/CSNLN_preprocess.py +++ b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/CSNLN_preprocess.py @@ -1,106 +1,106 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import PIL.Image as pil_image -from torch.serialization import save -import torchvision.transforms as transforms -import os -import argparse -import torch -import json -import math -import imageio - -parser = argparse.ArgumentParser(description='CSNLN preprocess script') -parser.add_argument('--s', default='', type=str, metavar='PATH', - help='path of source image files (default: none)') -parser.add_argument('--d', default='', type=str, metavar='PATH', - help='path of output (default: none)') -args = parser.parse_args() - - - -pad_info = [] -def chop(x, file_name="", save_path="", shave=10, min_size=3600): - scale = 4 - c, h, w = x.size() - h_half, w_half = h // 2, w // 2 - if h % 2 != 0: - temp_h_half = h_half + 0.5 - else: - temp_h_half = h_half - if w % 2 != 0: - temp_w_half = w_half + 0.5 - else: - temp_w_half = w_half - h_size, w_size = h_half + shave, w_half + shave - h_size += scale-h_size%scale - w_size += scale-w_size%scale - lr_list = [ - x[:, 0:h_size, 0:w_size], - x[:, 0:h_size, (w - w_size):w], - x[:, (h - h_size):h, 0:w_size], - x[:, (h - h_size):h, (w - w_size):w]] - if w_size * h_size < min_size: - for i in range(0, 4, 1): - final_fileName = file_name.split('.')[0] + "_" + str(i) - lr_batch = torch.cat(lr_list[i:(i + 1)], dim=0) - pad_h = 56-h_size - pad_w = 56-w_size - lr_batch = transforms.Compose([ - transforms.Pad(padding=(0, 0, 56-w_size, 56-h_size), padding_mode='edge') - ])(lr_batch) - - imageio.imsave(os.path.join(save_path, "png", final_fileName + ".png"), np.array( - lr_batch).astype(np.uint8).transpose(1, 2, 0)) - lr_batch = np.array(lr_batch).astype(np.float32)/255 - lr_batch.tofile(os.path.join( - save_path, "bin_56", final_fileName + ".bin")) - pad_info.append( - {"name":final_fileName, "h_half": temp_h_half, "w_half": temp_w_half, "h_size":h_size, "w_size":w_size, "pad_h":pad_h, "pad_w":pad_w}) - with open("pad_info_56.json", "w") as f: - f.write(json.dumps(pad_info, indent=4, separators=(',', ': '))) - - else: - count = 0 - for patch in lr_list: - temp_fileName = file_name.split('.')[0] + "_" + str(count) + ".png" - pad_info.append( - {"name":temp_fileName.split('.')[0], "h_half": h_half, "w_half": w_half, "h_size":h_size, "w_size":w_size}) - count = count + 1 - chop(patch, file_name=temp_fileName, save_path=save_path, shave=shave, min_size=min_size) - -def preprocess(src_path, save_path): - if not os.path.isdir(src_path): - os.makedirs(src_path) - if not os.path.isdir(save_path): - os.makedirs(save_path) - if not os.path.isdir(os.path.join(save_path, "bin_56")): - os.makedirs(os.path.join(save_path, "bin_56")) - if not os.path.isdir(os.path.join(save_path, "png")): - os.makedirs(os.path.join(save_path, "png")) - files = os.listdir(src_path) - files.sort() - for image_file in files: - image = imageio.imread(os.path.join( - src_path, image_file)) - np_transpose = np.ascontiguousarray(image.transpose((2, 0, 1))) - image = torch.from_numpy(np_transpose).float() - image.mul_(255 / 255) - chop(image, file_name=image_file, save_path=save_path) - - -if __name__ == '__main__': +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import PIL.Image as pil_image +from torch.serialization import save +import torchvision.transforms as transforms +import os +import argparse +import torch +import json +import math +import imageio + +parser = argparse.ArgumentParser(description='CSNLN preprocess script') +parser.add_argument('--s', default='', type=str, metavar='PATH', + help='path of source image files (default: none)') +parser.add_argument('--d', default='', type=str, metavar='PATH', + help='path of output (default: none)') +args = parser.parse_args() + + + +pad_info = [] +def chop(x, file_name="", save_path="", shave=10, min_size=3600): + scale = 4 + c, h, w = x.size() + h_half, w_half = h // 2, w // 2 + if h % 2 != 0: + temp_h_half = h_half + 0.5 + else: + temp_h_half = h_half + if w % 2 != 0: + temp_w_half = w_half + 0.5 + else: + temp_w_half = w_half + h_size, w_size = h_half + shave, w_half + shave + h_size += scale-h_size%scale + w_size += scale-w_size%scale + lr_list = [ + x[:, 0:h_size, 0:w_size], + x[:, 0:h_size, (w - w_size):w], + x[:, (h - h_size):h, 0:w_size], + x[:, (h - h_size):h, (w - w_size):w]] + if w_size * h_size < min_size: + for i in range(0, 4, 1): + final_fileName = file_name.split('.')[0] + "_" + str(i) + lr_batch = torch.cat(lr_list[i:(i + 1)], dim=0) + pad_h = 56-h_size + pad_w = 56-w_size + lr_batch = transforms.Compose([ + transforms.Pad(padding=(0, 0, 56-w_size, 56-h_size), padding_mode='edge') + ])(lr_batch) + + imageio.imsave(os.path.join(save_path, "png", final_fileName + ".png"), np.array( + lr_batch).astype(np.uint8).transpose(1, 2, 0)) + lr_batch = np.array(lr_batch).astype(np.float32)/255 + lr_batch.tofile(os.path.join( + save_path, "bin_56", final_fileName + ".bin")) + pad_info.append( + {"name":final_fileName, "h_half": temp_h_half, "w_half": temp_w_half, "h_size":h_size, "w_size":w_size, "pad_h":pad_h, "pad_w":pad_w}) + with open("pad_info_56.json", "w") as f: + f.write(json.dumps(pad_info, indent=4, separators=(',', ': '))) + + else: + count = 0 + for patch in lr_list: + temp_fileName = file_name.split('.')[0] + "_" + str(count) + ".png" + pad_info.append( + {"name":temp_fileName.split('.')[0], "h_half": h_half, "w_half": w_half, "h_size":h_size, "w_size":w_size}) + count = count + 1 + chop(patch, file_name=temp_fileName, save_path=save_path, shave=shave, min_size=min_size) + +def preprocess(src_path, save_path): + if not os.path.isdir(src_path): + os.makedirs(src_path) + if not os.path.isdir(save_path): + os.makedirs(save_path) + if not os.path.isdir(os.path.join(save_path, "bin_56")): + os.makedirs(os.path.join(save_path, "bin_56")) + if not os.path.isdir(os.path.join(save_path, "png")): + os.makedirs(os.path.join(save_path, "png")) + files = os.listdir(src_path) + files.sort() + for image_file in files: + image = imageio.imread(os.path.join( + src_path, image_file)) + np_transpose = np.ascontiguousarray(image.transpose((2, 0, 1))) + image = torch.from_numpy(np_transpose).float() + image.mul_(255 / 255) + chop(image, file_name=image_file, save_path=save_path) + + +if __name__ == '__main__': preprocess(args.s, args.d) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/CSNLN_pth2onnx.py b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/CSNLN_pth2onnx.py index bb065db2a5e71addc97d06c1f76db0d284083d87..016ba2f0883e3224707d9d21184bd80a72716971 100644 --- a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/CSNLN_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/CSNLN_pth2onnx.py @@ -1,37 +1,37 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -sys.path.append(r"./Cross-Scale-Non-Local-Attention/src/") -from model.csnln import CSNLN -from option import args -from collections import OrderedDict - - -def pth2onnx(input_file, output_file): - model = CSNLN(args) - model.load_state_dict(torch.load( - input_file, map_location=torch.device('cpu')), strict=False) - - model.eval() - dummy_input = torch.randn(1, 3, 56, 56) - - torch.onnx.export(model, dummy_input, output_file, opset_version=11, verbose=False) - - -if __name__ == "__main__": - input_file = args.pre_train - output_file = args.save - pth2onnx(input_file, output_file) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +sys.path.append(r"./Cross-Scale-Non-Local-Attention/src/") +from model.csnln import CSNLN +from option import args +from collections import OrderedDict + + +def pth2onnx(input_file, output_file): + model = CSNLN(args) + model.load_state_dict(torch.load( + input_file, map_location=torch.device('cpu')), strict=False) + + model.eval() + dummy_input = torch.randn(1, 3, 56, 56) + + torch.onnx.export(model, dummy_input, output_file, opset_version=11, verbose=False) + + +if __name__ == "__main__": + input_file = args.pre_train + output_file = args.save + pth2onnx(input_file, output_file) diff --git a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/LICENSE b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/LICENSE index b1fac45f02e2f98395fd96a7e4f4a39e257ac0bc..989e2c59e973a05cfbfe9de678b7f2af777b0713 100644 --- a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/LICENSE +++ b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/LICENSE @@ -1,201 +1,201 @@ -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/get_info.py b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/get_info.py index 63799abe3d24d5b0add9f52fa18313d9dd61168f..5f5fac8622872fbb75609df3b39c44d5c0e25390 100644 --- a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/get_info.py +++ b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/get_info.py @@ -1,58 +1,58 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = '56' - height = '56' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = '56' + height = '56' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/modelzoo_level.txt index 74147d2f27a896292d82f166ea631cb4937a8231..0541ac3f6dfa3a88fa0b725ee2f0d125d8d49ec7 100644 --- a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/modelzoo_level.txt @@ -1,3 +1,3 @@ -FunStatus:OK -PrecisionStatus:OK +FunStatus:OK +PrecisionStatus:OK PerfStatus:POK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/perf_softmax_transpose.py b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/perf_softmax_transpose.py index 888c3b93505f9f44db7d36af96ed8ebeda332d21..527104a539222e6d7a843c394b856ec210c17f1a 100644 --- a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/perf_softmax_transpose.py +++ b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/perf_softmax_transpose.py @@ -1,22 +1,22 @@ -import sys -import onnx - -if __name__ == '__main__': - model = onnx.load(sys.argv[1]) - graph = model.graph - node = graph.node - softmax_node_index = [] - del_group = [] - for i in range(len(node)): - if node[i].op_type == 'Softmax' and node[i].attribute[0].i == 3: - del_group.append((node[i-1], node[i], node[i+1], i)) - for g in del_group: - new_input = g[0].input - new_output = g[2].output - new_name = g[1].name - new_index = g[3] - new_node = onnx.helper.make_node("Softmax", new_input, new_output, new_name, axis=1) - for n in g[:-1]: - graph.node.remove(n) - graph.node.insert(new_index, new_node) - onnx.save(model, sys.argv[2]) +import sys +import onnx + +if __name__ == '__main__': + model = onnx.load(sys.argv[1]) + graph = model.graph + node = graph.node + softmax_node_index = [] + del_group = [] + for i in range(len(node)): + if node[i].op_type == 'Softmax' and node[i].attribute[0].i == 3: + del_group.append((node[i-1], node[i], node[i+1], i)) + for g in del_group: + new_input = g[0].input + new_output = g[2].output + new_name = g[1].name + new_index = g[3] + new_node = onnx.helper.make_node("Softmax", new_input, new_output, new_name, axis=1) + for n in g[:-1]: + graph.node.remove(n) + graph.node.insert(new_index, new_node) + onnx.save(model, sys.argv[2]) diff --git a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/requirements.txt b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/requirements.txt index cb70277708e1ea983e2ab0d1e7c4f3fd9a973e6b..94ae69e3e3cc50417da3fbe085c25d0d200647d3 100644 --- a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/requirements.txt +++ b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/requirements.txt @@ -1,9 +1,9 @@ -torch==1.8.0 -torchvision==0.9.0 -onnx==1.9.0 -onnx-simplifier==0.3.6 -numpy==1.21.1 -Pillow == 7.2.0 -opencv-python==4.2.0.34 -pyyaml==5.3.1 +torch==1.8.0 +torchvision==0.9.0 +onnx==1.9.0 +onnx-simplifier==0.3.6 +numpy==1.21.1 +Pillow == 7.2.0 +opencv-python==4.2.0.34 +pyyaml==5.3.1 scikit-image==0.18.1 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/test/parse.py b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/test/parse.py index 2a25918902bf0ccaaefa729646113f01890ae787..7a52798e543a1abb477527632a12e457bd17c42d 100644 --- a/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/test/parse.py +++ b/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention/test/parse.py @@ -1,30 +1,30 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.log'): - result_log = sys.argv[1] - with open(result_log, 'r') as f: - content = f.read() - print(result_log[:-4].split('_')[1], content, end="") - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.log'): + result_log = sys.argv[1] + with open(result_log, 'r') as f: + content = f.read() + print(result_log[:-4].split('_')[1], content, end="") + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md index 592f7f9936ff3c5bcb93ad47f60fcb136bf32842..992c40398a47b887186251c3c00d8467141d06ef 100644 --- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md @@ -1,267 +1,267 @@ -# DnCNN ONNX模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) - - [6.2 开源TopN精度](#62-开源TopN精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[DnCNN论文](https://ieeexplore.ieee.org/document/7839189) - -### 1.2 代码地址 - -brach:master - -commit_id: 6b0804951484eadb7f1ea24e8e5c9ede9bea485b - -备注:commitid指的是值模型基于此版本代码做的推理 - -[DnCNN代码](https://github.com/SaoYan/DnCNN-PyTorch) - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -CANN 5.0.1 -torch==1.8.0 -torchvision==0.9.0 -onnx==1.9.0 -``` - -### 2.2 python第三方库 - -``` -numpy==1.20.2 -opencv-python==4.5.2.52 -scikit-image==0.16.2 -``` - -**说明:** -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 - -1.DnCNN模型代码下载 -``` -git clone https://github.com/SaoYan/DnCNN-PyTorch -cd DnCNN-PyTorch -``` -2.获取源码pth权重文件 -wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/DnCnn/net.pth -文件的MD5sum值是: 5703a29b082cc03401fa9d9fee12cb71 - -3.获取NPU训练pth文件,将net.pth文件移动到DnCNN目录下 - -4.编写pth2onnx脚本DnCNN_pth2onnx.py - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - -5.执行pth2onnx脚本,生成onnx模型文件 -``` -python3.7 DnCNN_pth2onnx.py net.pth DnCNN-S-15.onnx -``` - - **模型转换要点:** ->此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 - -### 3.2 onnx转om模型 - -1.设置环境变量 -``` -source env.sh -``` -2.使用atc将onnx模型转换为om模型文件 -``` -atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310 -``` - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 推理数据集获取 -存放路径为 https://github.com/SaoYan/DnCNN-PyTorch 的data目录 - -### 4.2 数据集预处理 -1.预处理脚本data_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 - -``` -python3.7 data_preprocess.py data ISource INoisy -``` -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本get_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 -``` -python3.7 get_info.py bin INoisy DnCNN_bin.info 481 481 -``` -第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程 -### 5.2 离线推理 -1.设置环境变量 -``` -source env.sh -``` -2.执行离线推理 -``` -./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true -``` -输出结果默认保存在当前目录result/dumpOutput_deviceX(X为对应的device_id),每个输入对应的输出对应一个_X.bin文件。 - -## 6 精度对比 - -- **[离线推理TopN精度](#61-离线推理TopN精度)** -- **[开源TopN精度](#62-开源TopN精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理TopN精度统计 - -后处理统计TopN精度 - -调用postprocess.py脚本推理结果进行PSRN计算,结果会打印在屏幕上 -``` -python3.7 postprocess.py result/dumpOutput_device0/ -``` -第一个参数为benchmark输出目录 -查看输出结果: -``` -ISource/test064.bin PSNR 29.799832 -infering... -ISource/test065.bin PSNR 31.486418 -infering... -ISource/test066.bin PSNR 35.676752 -infering... -ISource/test067.bin PSNR 28.577475 -infering... -ISource/test068.bin PSNR 29.709767 - -PSNR on test data 31.526892 -``` -经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 - -### 6.2 开源PSNR精度 -``` -| Noise Level | DnCNN-S | DnCNN-B | DnCNN-S-PyTorch | DnCNN-B-PyTorch | -|:-----------:|:-------:|:-------:|:---------------:|:---------------:| -| 15 | 31.73 | 31.61 | 31.71 | 31.60 | -| 25 | 29.23 | 29.16 | 29.21 | 29.15 | -| 50 | 26.23 | 26.23 | 26.22 | 26.20 | -``` -### 6.3 精度对比 -将得到的om离线模型推理PSNR值与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - **精度调试:** - ->没有遇到精度不达标的问题,故不需要进行精度调试 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** - -### 7.1 npu性能数据 -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据,也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 -1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: -``` -[e2e] throughputRate: 15.0465, latency: 4519.32 -[data read] throughputRate: 966.417, moduleLatency: 1.03475 -[preprocess] throughputRate: 525.539, moduleLatency: 1.90281 -[infer] throughputRate: 22.6328, Interface throughputRate: 23.7919, moduleLatency: 43.8903 -[post] throughputRate: 22.615, moduleLatency: 44.2185 -``` -Interface throughputRate: 23.7919,23.7919x4=95.176既是batch1 310单卡吞吐率 - -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: -``` -[e2e] throughputRate: 15.3818, latency: 4420.81 -[data read] throughputRate: 1484.65, moduleLatency: 0.673559 -[preprocess] throughputRate: 316.273, moduleLatency: 3.16182 -[infer] throughputRate: 21.4529, Interface throughputRate: 22.2853, moduleLatency: 45.6179 -[post] throughputRate: 1.56798, moduleLatency: 637.764 -``` -Interface throughputRate: 22.2853,22.2853x4=89.1412既是batch16 310单卡吞吐率 - -batch4性能: -``` -[e2e] throughputRate: 15.5641, latency: 4369.02 -[data read] throughputRate: 1898.17, moduleLatency: 0.526824 -[preprocess] throughputRate: 523.883, moduleLatency: 1.90882 -[infer] throughputRate: 22.091, Interface throughputRate: 23.9045, moduleLatency: 44.5192 -[post] throughputRate: 5.50981, moduleLatency: 181.495 -``` -batch4 310单卡吞吐率 23.9045x4=95.618 - -batch8性能: -``` -[e2e] throughputRate: 15.5035, latency: 4386.1 -[data read] throughputRate: 1863.93, moduleLatency: 0.5365 -[preprocess] throughputRate: 461.471, moduleLatency: 2.16699 -[infer] throughputRate: 20.7804, Interface throughputRate: 22.2652, moduleLatency: 47.2831 -[post] throughputRate: 2.74035, moduleLatency: 364.917 -``` -batch8 310单卡吞吐率 22.2652x4=89.0608 - -batch32性能: -``` -[e2e] throughputRate: 12.4075, latency: 5480.54 -[data read] throughputRate: 1770.65, moduleLatency: 0.564765 -[preprocess] throughputRate: 242.944, moduleLatency: 4.11618 -[infer] throughputRate: 15.641, Interface throughputRate: 13.2648, moduleLatency: 62.7386 -[post] throughputRate: 0.68503, moduleLatency: 1459.79 -``` -batch32 310单卡吞吐率 13.2648x4=53.0592 - -**性能优化:** - +# DnCNN ONNX模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) + - [6.2 开源TopN精度](#62-开源TopN精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[DnCNN论文](https://ieeexplore.ieee.org/document/7839189) + +### 1.2 代码地址 + +brach:master + +commit_id: 6b0804951484eadb7f1ea24e8e5c9ede9bea485b + +备注:commitid指的是值模型基于此版本代码做的推理 + +[DnCNN代码](https://github.com/SaoYan/DnCNN-PyTorch) + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +CANN 5.0.1 +torch==1.8.0 +torchvision==0.9.0 +onnx==1.9.0 +``` + +### 2.2 python第三方库 + +``` +numpy==1.20.2 +opencv-python==4.5.2.52 +scikit-image==0.16.2 +``` + +**说明:** +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 + +1.DnCNN模型代码下载 +``` +git clone https://github.com/SaoYan/DnCNN-PyTorch +cd DnCNN-PyTorch +``` +2.获取源码pth权重文件 +wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/DnCnn/net.pth +文件的MD5sum值是: 5703a29b082cc03401fa9d9fee12cb71 + +3.获取NPU训练pth文件,将net.pth文件移动到DnCNN目录下 + +4.编写pth2onnx脚本DnCNN_pth2onnx.py + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + +5.执行pth2onnx脚本,生成onnx模型文件 +``` +python3.7 DnCNN_pth2onnx.py net.pth DnCNN-S-15.onnx +``` + + **模型转换要点:** +>此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 + +### 3.2 onnx转om模型 + +1.设置环境变量 +``` +source env.sh +``` +2.使用atc将onnx模型转换为om模型文件 +``` +atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310 +``` + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 推理数据集获取 +存放路径为 https://github.com/SaoYan/DnCNN-PyTorch 的data目录 + +### 4.2 数据集预处理 +1.预处理脚本data_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 + +``` +python3.7 data_preprocess.py data ISource INoisy +``` +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本get_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 +``` +python3.7 get_info.py bin INoisy DnCNN_bin.info 481 481 +``` +第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程 +### 5.2 离线推理 +1.设置环境变量 +``` +source env.sh +``` +2.执行离线推理 +``` +./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true +``` +输出结果默认保存在当前目录result/dumpOutput_deviceX(X为对应的device_id),每个输入对应的输出对应一个_X.bin文件。 + +## 6 精度对比 + +- **[离线推理TopN精度](#61-离线推理TopN精度)** +- **[开源TopN精度](#62-开源TopN精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理TopN精度统计 + +后处理统计TopN精度 + +调用postprocess.py脚本推理结果进行PSRN计算,结果会打印在屏幕上 +``` +python3.7 postprocess.py result/dumpOutput_device0/ +``` +第一个参数为benchmark输出目录 +查看输出结果: +``` +ISource/test064.bin PSNR 29.799832 +infering... +ISource/test065.bin PSNR 31.486418 +infering... +ISource/test066.bin PSNR 35.676752 +infering... +ISource/test067.bin PSNR 28.577475 +infering... +ISource/test068.bin PSNR 29.709767 + +PSNR on test data 31.526892 +``` +经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 + +### 6.2 开源PSNR精度 +``` +| Noise Level | DnCNN-S | DnCNN-B | DnCNN-S-PyTorch | DnCNN-B-PyTorch | +|:-----------:|:-------:|:-------:|:---------------:|:---------------:| +| 15 | 31.73 | 31.61 | 31.71 | 31.60 | +| 25 | 29.23 | 29.16 | 29.21 | 29.15 | +| 50 | 26.23 | 26.23 | 26.22 | 26.20 | +``` +### 6.3 精度对比 +将得到的om离线模型推理PSNR值与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + **精度调试:** + +>没有遇到精度不达标的问题,故不需要进行精度调试 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** + +### 7.1 npu性能数据 +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据,也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 +1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: +``` +[e2e] throughputRate: 15.0465, latency: 4519.32 +[data read] throughputRate: 966.417, moduleLatency: 1.03475 +[preprocess] throughputRate: 525.539, moduleLatency: 1.90281 +[infer] throughputRate: 22.6328, Interface throughputRate: 23.7919, moduleLatency: 43.8903 +[post] throughputRate: 22.615, moduleLatency: 44.2185 +``` +Interface throughputRate: 23.7919,23.7919x4=95.176既是batch1 310单卡吞吐率 + +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: +``` +[e2e] throughputRate: 15.3818, latency: 4420.81 +[data read] throughputRate: 1484.65, moduleLatency: 0.673559 +[preprocess] throughputRate: 316.273, moduleLatency: 3.16182 +[infer] throughputRate: 21.4529, Interface throughputRate: 22.2853, moduleLatency: 45.6179 +[post] throughputRate: 1.56798, moduleLatency: 637.764 +``` +Interface throughputRate: 22.2853,22.2853x4=89.1412既是batch16 310单卡吞吐率 + +batch4性能: +``` +[e2e] throughputRate: 15.5641, latency: 4369.02 +[data read] throughputRate: 1898.17, moduleLatency: 0.526824 +[preprocess] throughputRate: 523.883, moduleLatency: 1.90882 +[infer] throughputRate: 22.091, Interface throughputRate: 23.9045, moduleLatency: 44.5192 +[post] throughputRate: 5.50981, moduleLatency: 181.495 +``` +batch4 310单卡吞吐率 23.9045x4=95.618 + +batch8性能: +``` +[e2e] throughputRate: 15.5035, latency: 4386.1 +[data read] throughputRate: 1863.93, moduleLatency: 0.5365 +[preprocess] throughputRate: 461.471, moduleLatency: 2.16699 +[infer] throughputRate: 20.7804, Interface throughputRate: 22.2652, moduleLatency: 47.2831 +[post] throughputRate: 2.74035, moduleLatency: 364.917 +``` +batch8 310单卡吞吐率 22.2652x4=89.0608 + +batch32性能: +``` +[e2e] throughputRate: 12.4075, latency: 5480.54 +[data read] throughputRate: 1770.65, moduleLatency: 0.564765 +[preprocess] throughputRate: 242.944, moduleLatency: 4.11618 +[infer] throughputRate: 15.641, Interface throughputRate: 13.2648, moduleLatency: 62.7386 +[post] throughputRate: 0.68503, moduleLatency: 1459.79 +``` +batch32 310单卡吞吐率 13.2648x4=53.0592 + +**性能优化:** + >batch32纯推理性能达标。 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/test/README.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/test/README.md index ab5da7a949f5f38bfdfd01e7cd89ac3d05d90372..7991250288dd3f4ee1e2783621c413779f6a4155 100644 --- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/test/README.md +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/test/README.md @@ -1,31 +1,31 @@ -环境准备: - -1.获取数据集 -``` -git clone https://github.com/SaoYan/DnCNN-PyTorch -``` -开源代码仓的data目录下有数据集,将data复制到DnCNN目录下 - -2.进入工作目录 -``` -cd DnCNN -``` - -3.安装必要的依赖 -``` -pip3.7 install -r requirements.txt -``` - -4.获取训练提供的权重文件 -``` -wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/DnCnn/net.pth -``` - -5.获取benchmark工具 -将benchmark.x86_64 benchmark.aarch64放在当前目录 - -6.310上执行,执行时确保device空闲 -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=./data -``` +环境准备: + +1.获取数据集 +``` +git clone https://github.com/SaoYan/DnCNN-PyTorch +``` +开源代码仓的data目录下有数据集,将data复制到DnCNN目录下 + +2.进入工作目录 +``` +cd DnCNN +``` + +3.安装必要的依赖 +``` +pip3.7 install -r requirements.txt +``` + +4.获取训练提供的权重文件 +``` +wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/DnCnn/net.pth +``` + +5.获取benchmark工具 +将benchmark.x86_64 benchmark.aarch64放在当前目录 + +6.310上执行,执行时确保device空闲 +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=./data +``` diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/3DMPPE-ROOTNET_postprocess.py b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/3DMPPE-ROOTNET_postprocess.py index dcd38a9917dec3697dab6e25a2e8b17bc2180dd5..e844f8baedd69609b140c3e6c59f8b05fab886a9 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/3DMPPE-ROOTNET_postprocess.py +++ b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/3DMPPE-ROOTNET_postprocess.py @@ -1,50 +1,50 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import argparse -from pathlib import Path -import numpy as np -sys.path.append(r"./3DMPPE_ROOTNET_RELEASE") -from data.MuPoTS.MuPoTS import MuPoTS - -def evaluate(result_path, result_file, img_path, ann_path): - print('postprocessing') - bin_path = os.listdir(result_path)[0] - result_path = os.path.join(result_path, bin_path) - bin_list = os.listdir(result_path) - bin_list.sort(key=lambda x: int(x[:-13])) - preds = [] - for i,f in enumerate(bin_list): - bin_path = os.path.join(result_path, f) - coord_out = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 3) - preds.append(coord_out) - # evaluate - preds = np.concatenate(preds, axis=0) - testset = MuPoTS('test', img_path, ann_path) - if not os.path.exists(result_file): - os.makedirs(result_file) - testset.evaluate(preds, result_file) - print('postprocess finised') - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='preprocess of 3D-ResNets') - parser.add_argument('--img_path', default='MuPoTS/MultiPersonTestSet',type=Path, help='Directory path of videos') - parser.add_argument('--ann_path', default='MuPoTS/MuPoTS-3D.json', type=Path, help='Annotation file path') - parser.add_argument('--input_path', default='out_bs1', type=Path, help='Directory path of videos') - parser.add_argument('--result_file', default='result_bs1', type=Path, help='Directory path of binary output data') - opt = parser.parse_args() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import argparse +from pathlib import Path +import numpy as np +sys.path.append(r"./3DMPPE_ROOTNET_RELEASE") +from data.MuPoTS.MuPoTS import MuPoTS + +def evaluate(result_path, result_file, img_path, ann_path): + print('postprocessing') + bin_path = os.listdir(result_path)[0] + result_path = os.path.join(result_path, bin_path) + bin_list = os.listdir(result_path) + bin_list.sort(key=lambda x: int(x[:-13])) + preds = [] + for i,f in enumerate(bin_list): + bin_path = os.path.join(result_path, f) + coord_out = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 3) + preds.append(coord_out) + # evaluate + preds = np.concatenate(preds, axis=0) + testset = MuPoTS('test', img_path, ann_path) + if not os.path.exists(result_file): + os.makedirs(result_file) + testset.evaluate(preds, result_file) + print('postprocess finised') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='preprocess of 3D-ResNets') + parser.add_argument('--img_path', default='MuPoTS/MultiPersonTestSet',type=Path, help='Directory path of videos') + parser.add_argument('--ann_path', default='MuPoTS/MuPoTS-3D.json', type=Path, help='Annotation file path') + parser.add_argument('--input_path', default='out_bs1', type=Path, help='Directory path of videos') + parser.add_argument('--result_file', default='result_bs1', type=Path, help='Directory path of binary output data') + opt = parser.parse_args() evaluate(opt.input_path, opt.result_file, opt.img_path, opt.ann_path) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/3DMPPE-ROOTNET_preprocess.py b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/3DMPPE-ROOTNET_preprocess.py index 7f7ff43b73495a8026ac8efd00b9982fcbed7531..a87187843be5267bc7d0047c6c2338fcdf404dfd 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/3DMPPE-ROOTNET_preprocess.py +++ b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/3DMPPE-ROOTNET_preprocess.py @@ -1,61 +1,61 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import argparse -from pathlib import Path -from tqdm import tqdm -import torch -import torchvision.transforms as transforms -from torch.utils.data import DataLoader -sys.path.append(r"./3DMPPE_ROOTNET_RELEASE") -from data.dataset import DatasetLoader -from data.MuPoTS.MuPoTS import MuPoTS - -def preprocess(inference_batch_size, save_path_imge, save_path_cam, img_path, ann_path): - print('preprocessing') - testset = MuPoTS('test', img_path, ann_path) - - testset_loader = DatasetLoader(testset, False, transforms.Compose([transforms.ToTensor(), - transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])) - batch_generator = DataLoader(dataset=testset_loader, batch_size=inference_batch_size, - shuffle=False, num_workers=8, pin_memory=True) - - if not os.path.exists(save_path_imge): - os.makedirs(save_path_imge) - if not os.path.exists(save_path_cam): - os.makedirs(save_path_cam) - cid = 0 - with torch.no_grad(): - for itr, (input_img, cam_param) in enumerate(tqdm(batch_generator)): - if(len(input_img) == inference_batch_size): - path_bin_image = str(save_path_imge) + '/' + str(cid) + '.bin' - path_bin_cam = str(save_path_cam) + '/' + str(cid) + '.bin' - cid = cid + 1 - input_img.cpu().numpy().tofile(path_bin_image) - cam_param.cpu().numpy().tofile(path_bin_cam) - print('preprocess finished') - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--img_path', default='MuPoTS/MultiPersonTestSet', - type=Path, help='Directory path of videos') - parser.add_argument('--ann_path', default='MuPoTS/MuPoTS-3D.json', type=Path, help='Annotation file path') - parser.add_argument('--inference_batch_size', default=1, type=int, help='Batch Size for inference. 0 means this is the same as batch_size.') - parser.add_argument('--save_path_image', default='0data_imge_bs1', type=Path, help='Directory path of binary output data') - parser.add_argument('--save_path_cam', default='0data_cam_bs1', type=Path, help='Directory path of binary output data') - opt = parser.parse_args() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import argparse +from pathlib import Path +from tqdm import tqdm +import torch +import torchvision.transforms as transforms +from torch.utils.data import DataLoader +sys.path.append(r"./3DMPPE_ROOTNET_RELEASE") +from data.dataset import DatasetLoader +from data.MuPoTS.MuPoTS import MuPoTS + +def preprocess(inference_batch_size, save_path_imge, save_path_cam, img_path, ann_path): + print('preprocessing') + testset = MuPoTS('test', img_path, ann_path) + + testset_loader = DatasetLoader(testset, False, transforms.Compose([transforms.ToTensor(), + transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])) + batch_generator = DataLoader(dataset=testset_loader, batch_size=inference_batch_size, + shuffle=False, num_workers=8, pin_memory=True) + + if not os.path.exists(save_path_imge): + os.makedirs(save_path_imge) + if not os.path.exists(save_path_cam): + os.makedirs(save_path_cam) + cid = 0 + with torch.no_grad(): + for itr, (input_img, cam_param) in enumerate(tqdm(batch_generator)): + if(len(input_img) == inference_batch_size): + path_bin_image = str(save_path_imge) + '/' + str(cid) + '.bin' + path_bin_cam = str(save_path_cam) + '/' + str(cid) + '.bin' + cid = cid + 1 + input_img.cpu().numpy().tofile(path_bin_image) + cam_param.cpu().numpy().tofile(path_bin_cam) + print('preprocess finished') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--img_path', default='MuPoTS/MultiPersonTestSet', + type=Path, help='Directory path of videos') + parser.add_argument('--ann_path', default='MuPoTS/MuPoTS-3D.json', type=Path, help='Annotation file path') + parser.add_argument('--inference_batch_size', default=1, type=int, help='Batch Size for inference. 0 means this is the same as batch_size.') + parser.add_argument('--save_path_image', default='0data_imge_bs1', type=Path, help='Directory path of binary output data') + parser.add_argument('--save_path_cam', default='0data_cam_bs1', type=Path, help='Directory path of binary output data') + opt = parser.parse_args() preprocess(opt.inference_batch_size, opt.save_path_image, opt.save_path_cam, opt.img_path, opt.ann_path) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/3DMPPE-ROOTNET_pth2onnx.py b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/3DMPPE-ROOTNET_pth2onnx.py index 8da7b5791d0d4342575ce3e8b7bca9a790db1b1e..fad0ab3a3d89ed5f37760d265d0b4ec5f21d6e91 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/3DMPPE-ROOTNET_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/3DMPPE-ROOTNET_pth2onnx.py @@ -1,47 +1,47 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -from collections import OrderedDict -sys.path.append(r"./3DMPPE_ROOTNET_RELEASE") -from main.model import get_pose_net -from main.config import cfg - -def convert(pth_file_path,onnx_file_path): - model = get_pose_net(cfg, False) - ckpt = torch.load(pth_file_path, map_location=torch.device('cpu')) - new_state_dict = OrderedDict() - for k, v in ckpt['network'].items(): - if k[0:7] == "module.": - name = k[7:] # remove module. - else: - name = k[0:] - new_state_dict[name] = v - model.load_state_dict(new_state_dict) - model.eval() - - input_names = ["image", "cam_param"] - output_names = ["score"] - dynamic_axes = {'image': {0: '-1'}, 'cam_param': {0: '-1'}, 'score': {0: '-1'}} - dummy_input1 = torch.randn(1, 3, 256, 256) - dummy_input2 = torch.randn(1, 1) - torch.onnx.export(model, (dummy_input1, dummy_input2), onnx_file_path, input_names=input_names, dynamic_axes=dynamic_axes, - output_names=output_names, opset_version=11, verbose=True) - -if __name__ == "__main__": - # convert("snapshot_6.pth.tar", "3DMPPE-ROOTNET.onnx") - convert(sys.argv[1], sys.argv[2]) - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +from collections import OrderedDict +sys.path.append(r"./3DMPPE_ROOTNET_RELEASE") +from main.model import get_pose_net +from main.config import cfg + +def convert(pth_file_path,onnx_file_path): + model = get_pose_net(cfg, False) + ckpt = torch.load(pth_file_path, map_location=torch.device('cpu')) + new_state_dict = OrderedDict() + for k, v in ckpt['network'].items(): + if k[0:7] == "module.": + name = k[7:] # remove module. + else: + name = k[0:] + new_state_dict[name] = v + model.load_state_dict(new_state_dict) + model.eval() + + input_names = ["image", "cam_param"] + output_names = ["score"] + dynamic_axes = {'image': {0: '-1'}, 'cam_param': {0: '-1'}, 'score': {0: '-1'}} + dummy_input1 = torch.randn(1, 3, 256, 256) + dummy_input2 = torch.randn(1, 1) + torch.onnx.export(model, (dummy_input1, dummy_input2), onnx_file_path, input_names=input_names, dynamic_axes=dynamic_axes, + output_names=output_names, opset_version=11, verbose=True) + +if __name__ == "__main__": + # convert("snapshot_6.pth.tar", "3DMPPE-ROOTNET.onnx") + convert(sys.argv[1], sys.argv[2]) + + diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/LICENSE b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/LICENSE index df2c2f2c3e55bfbad1aebe53321a94ee5a3854bc..c8ec075d5b892f823d0b485ad4fdd01355c57b3e 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/LICENSE +++ b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/LICENSE @@ -1,203 +1,203 @@ -Copyright 2018-2019 Open-MMLab. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018-2019 Open-MMLab. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and +Copyright 2018-2019 Open-MMLab. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2019 Open-MMLab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/README.md b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/README.md index d2b51af3aceb3943a9b22e43ac7fe84182952422..51e63ccddbc4d80539e9d27608d8463019917c85 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/README.md +++ b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/README.md @@ -1,43 +1,43 @@ -# 3DMPPE-ROOTNET模型PyTorch离线推理指导 - -### 环境准备 - -安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -``` -pip3.7 install -r requirements.txt -``` - -### 安装开源模型代码 -``` -git clone https://github.com/mks0601/3DMPPE_ROOTNET_RELEASE.git -cd 3DMPPE_ROOTNET_RELEASE -patch -p1 < ../3DMPPE_ROOTNET.patch -cd .. -``` -> branch: master - -> commit id: a199d50be5b0a9ba348679ad4d010130535a631d - -### 获取MuPoTS数据集 -下载 MuPoTS 解析数据 [[MuPoTS](https://github.com/mks0601/3DMPPE_ROOTNET_RELEASE)] - - -### 获取推理工具 -获取msame和benchmark工具 [[msame](https://gitee.com/ascend/tools/tree/master/msame)][[benchmark](https://gitee.com/ascend/cann-benchmark/tree/master/infer)] - -将msame和benchmark.x86_64(或benchmark.aarch64)放到当前目录 - -### 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets/ -``` - **评测结果:** -| 模型 | pth精度 | 310精度 | 性能基准 | 310性能 | -| :------: | :------: | :------: | :------: | :------: | -| 3DMPPE-ROOTNET bs1 | AP_root: 31.87 | AP_root: 31.90 | 639.656fps | 664.718fps | -| 3DMPPE-ROOTNET bs16 | AP_root: 31.87 | AP_root: 31.88 | 467.282fps | 817.480fps | - - +# 3DMPPE-ROOTNET模型PyTorch离线推理指导 + +### 环境准备 + +安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +``` +pip3.7 install -r requirements.txt +``` + +### 安装开源模型代码 +``` +git clone https://github.com/mks0601/3DMPPE_ROOTNET_RELEASE.git +cd 3DMPPE_ROOTNET_RELEASE +patch -p1 < ../3DMPPE_ROOTNET.patch +cd .. +``` +> branch: master + +> commit id: a199d50be5b0a9ba348679ad4d010130535a631d + +### 获取MuPoTS数据集 +下载 MuPoTS 解析数据 [[MuPoTS](https://github.com/mks0601/3DMPPE_ROOTNET_RELEASE)] + + +### 获取推理工具 +获取msame和benchmark工具 [[msame](https://gitee.com/ascend/tools/tree/master/msame)][[benchmark](https://gitee.com/ascend/cann-benchmark/tree/master/infer)] + +将msame和benchmark.x86_64(或benchmark.aarch64)放到当前目录 + +### 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets/ +``` + **评测结果:** +| 模型 | pth精度 | 310精度 | 性能基准 | 310性能 | +| :------: | :------: | :------: | :------: | :------: | +| 3DMPPE-ROOTNET bs1 | AP_root: 31.87 | AP_root: 31.90 | 639.656fps | 664.718fps | +| 3DMPPE-ROOTNET bs16 | AP_root: 31.87 | AP_root: 31.88 | 467.282fps | 817.480fps | + + diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/modelzoo_level.txt index 83689985f26624b65a4c5ebb5f00a152618799ba..8c469d858afccf3026a8640799938f8de7b46fac 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:PERFECT \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/requirements.txt b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/requirements.txt index a7d74b3118657a44e6df8b9ea1af1f40d5266528..fd36c964001617acdbc9f5f6def6a90ad15c4e28 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/requirements.txt +++ b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/requirements.txt @@ -1,9 +1,9 @@ -torch==1.5.0 -torchvision==0.6.0 -onnx == 1.7.0 -numpy==1.20.3 -opencv-python==4.5.3.56 -tqdm==4.62.1 -scipy==1.6.2 -pycocotools==2.0 +torch==1.5.0 +torchvision==0.6.0 +onnx == 1.7.0 +numpy==1.20.3 +opencv-python==4.5.3.56 +tqdm==4.62.1 +scipy==1.6.2 +pycocotools==2.0 scikit-learn==0.24.2 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/test/parse.py b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/test/parse.py index a35b48d42a91a041e232bf9f22f9f2896b8bbfe3..6b1760f355d2819933ca75844c15ba8c5a561ec5 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/test/parse.py +++ b/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET/test/parse.py @@ -1,31 +1,31 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - result_json = result_json.split('_')[1] - result_json = result_json.split('/')[0] - print('om {} accuracy {}'.format(result_json, content)) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = content.split(' ') - fps = float(txt_data_list[2].replace('samples/s,', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + result_json = result_json.split('_')[1] + result_json = result_json.split('/')[0] + print('om {} accuracy {}'.format(result_json, content)) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = content.split(' ') + fps = float(txt_data_list[2].replace('samples/s,', '')) * 4 print('310 {} fps:{}'.format(result_txt.split('_')[4], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/HigherHRNet/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/pose_estimation/HigherHRNet/modelzoo_level.txt index c80d660077532ade40fc6ae69b87f5e9c8462217..d5130dbf735d1a55ec0d55f24466fcaf4b7bf730 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/HigherHRNet/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/pose_estimation/HigherHRNet/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -ModelConvert:OK +FuncStatus:OK +PrecisionStatus:OK +ModelConvert:OK PerfStatus=OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/MSPN_postprocess.py b/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/MSPN_postprocess.py index 292c34384e02a1ff6180195fe9ab90b7fe5cc742..6d3f896bbf88f7793de2a1971d279a90e98b9431 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/MSPN_postprocess.py +++ b/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/MSPN_postprocess.py @@ -1,194 +1,194 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -sys.path.append('./exps/mspn.2xstg.coco/') -import argparse -from tqdm import tqdm -import numpy as np -import cv2 -import json -import time - -import torch -import torch.distributed as dist - -from cvpack.utils.logger import get_logger - -from config import cfg -from network import MSPN -from lib.utils.dataloader import get_test_loader -from lib.utils.comm import is_main_process, synchronize, all_gather -from lib.utils.transforms import flip_back -from dataset.COCO.coco import COCODataset - -def get_results(outputs, centers, scales, kernel=11, shifts=[0.25]): - scales *= 200 - nr_img = outputs.shape[0] - preds = np.zeros((nr_img, cfg.DATASET.KEYPOINT.NUM, 2)) - maxvals = np.zeros((nr_img, cfg.DATASET.KEYPOINT.NUM, 1)) - for i in range(nr_img): - score_map = outputs[i].copy() - score_map = score_map / 255 + 0.5 - kps = np.zeros((cfg.DATASET.KEYPOINT.NUM, 2)) - scores = np.zeros((cfg.DATASET.KEYPOINT.NUM, 1)) - border = 10 - dr = np.zeros((cfg.DATASET.KEYPOINT.NUM, - cfg.OUTPUT_SHAPE[0] + 2 * border, cfg.OUTPUT_SHAPE[1] + 2 * border)) - dr[:, border: -border, border: -border] = outputs[i].copy() - for w in range(cfg.DATASET.KEYPOINT.NUM): - dr[w] = cv2.GaussianBlur(dr[w], (kernel, kernel), 0) - for w in range(cfg.DATASET.KEYPOINT.NUM): - for j in range(len(shifts)): - if j == 0: - lb = dr[w].argmax() - y, x = np.unravel_index(lb, dr[w].shape) - dr[w, y, x] = 0 - x -= border - y -= border - lb = dr[w].argmax() - py, px = np.unravel_index(lb, dr[w].shape) - dr[w, py, px] = 0 - px -= border + x - py -= border + y - ln = (px ** 2 + py ** 2) ** 0.5 - if ln > 1e-3: - x += shifts[j] * px / ln - y += shifts[j] * py / ln - x = max(0, min(x, cfg.OUTPUT_SHAPE[1] - 1)) - y = max(0, min(y, cfg.OUTPUT_SHAPE[0] - 1)) - kps[w] = np.array([x * 4 + 2, y * 4 + 2]) - scores[w, 0] = score_map[w, int(round(y) + 1e-9), \ - int(round(x) + 1e-9)] - # aligned or not ... - kps[:, 0] = kps[:, 0] / cfg.INPUT_SHAPE[1] * scales[i][0] + \ - centers[i][0] - scales[i][0] * 0.5 - kps[:, 1] = kps[:, 1] / cfg.INPUT_SHAPE[0] * scales[i][1] + \ - centers[i][1] - scales[i][1] * 0.5 - preds[i] = kps - maxvals[i] = scores - - return preds, maxvals - - -def compute_on_dataset(data_loader, device="cpu"): - results = list() - cpu_device = torch.device("cpu") - - results = list() - data = tqdm(data_loader) if is_main_process() else data_loader - k = 0 - for _, batch in enumerate(data): - imgs, scores, centers, scales, img_ids = batch - output_name='img_%d_%d_1.bin' %(int(img_ids[0]), k) - output_path=os.path.join('result/dumpOutput_device0/',output_name) - outputs = np.fromfile(output_path, dtype=np.float32).reshape(1,17,64,48) - k += 1 - - centers = np.array(centers) - scales = np.array(scales) - preds, maxvals = get_results(outputs, centers, scales, - cfg.TEST.GAUSSIAN_KERNEL, cfg.TEST.SHIFT_RATIOS) - - kp_scores = maxvals.squeeze(-1).mean(axis=1) - preds = np.concatenate((preds, maxvals), axis=2) - - for i in range(preds.shape[0]): - keypoints = preds[i].reshape(-1).tolist() - score = scores[i] * kp_scores[i] - image_id = img_ids[i] - - results.append(dict(image_id=image_id, - category_id=1, - keypoints=keypoints, - score=score)) - return results - - -def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu, logger): - if is_main_process(): - logger.info("Accumulating ...") - all_predictions = all_gather(predictions_per_gpu) - - if not is_main_process(): - return - - predictions = list() - for p in all_predictions: - predictions.extend(p) - - return predictions - - -def inference(data_loader, logger, device="cpu"): - predictions = compute_on_dataset(data_loader, device) - synchronize() - predictions = _accumulate_predictions_from_multiple_gpus( - predictions, logger) - - if not is_main_process(): - return - - return predictions - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--local_rank", type=int, default=0) - parser.add_argument("--iter", "-i", type=int, default=-1) - parser.add_argument("--datasets_path",default="$MSPN_HOME/dataset/COCO") - args = parser.parse_args() - COCODataset.cur_dir=os.path.join(args.datasets_path) - num_gpus = int( - os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 - distributed = num_gpus > 1 - - if distributed: - torch.cuda.set_device(args.local_rank) - dist.init_process_group(backend="nccl", init_method="env://") - synchronize() - - if is_main_process() and not os.path.exists(cfg.TEST_DIR): - os.mkdir(cfg.TEST_DIR) - logger = get_logger( - cfg.DATASET.NAME, cfg.TEST_DIR, args.local_rank, 'test_log.txt') - - if args.iter == -1: - logger.info("Please designate one iteration.") - - data_loader = get_test_loader(cfg, num_gpus, args.local_rank, 'val', - is_dist=distributed) - - device = 'cpu' - results = inference(data_loader, logger, device) - synchronize() - - if is_main_process(): - logger.info("Dumping results ...") - results.sort( - key=lambda res:(res['image_id'], res['score']), reverse=True) - results_path = os.path.join(cfg.TEST_DIR, 'results.json') - with open(results_path, 'w') as f: - json.dump(results, f) - logger.info("Get all results.") - - data_loader.ori_dataset.evaluate(results_path) - - -if __name__ == '__main__': - begin = time.time() - main() - end = time.time() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +sys.path.append('./exps/mspn.2xstg.coco/') +import argparse +from tqdm import tqdm +import numpy as np +import cv2 +import json +import time + +import torch +import torch.distributed as dist + +from cvpack.utils.logger import get_logger + +from config import cfg +from network import MSPN +from lib.utils.dataloader import get_test_loader +from lib.utils.comm import is_main_process, synchronize, all_gather +from lib.utils.transforms import flip_back +from dataset.COCO.coco import COCODataset + +def get_results(outputs, centers, scales, kernel=11, shifts=[0.25]): + scales *= 200 + nr_img = outputs.shape[0] + preds = np.zeros((nr_img, cfg.DATASET.KEYPOINT.NUM, 2)) + maxvals = np.zeros((nr_img, cfg.DATASET.KEYPOINT.NUM, 1)) + for i in range(nr_img): + score_map = outputs[i].copy() + score_map = score_map / 255 + 0.5 + kps = np.zeros((cfg.DATASET.KEYPOINT.NUM, 2)) + scores = np.zeros((cfg.DATASET.KEYPOINT.NUM, 1)) + border = 10 + dr = np.zeros((cfg.DATASET.KEYPOINT.NUM, + cfg.OUTPUT_SHAPE[0] + 2 * border, cfg.OUTPUT_SHAPE[1] + 2 * border)) + dr[:, border: -border, border: -border] = outputs[i].copy() + for w in range(cfg.DATASET.KEYPOINT.NUM): + dr[w] = cv2.GaussianBlur(dr[w], (kernel, kernel), 0) + for w in range(cfg.DATASET.KEYPOINT.NUM): + for j in range(len(shifts)): + if j == 0: + lb = dr[w].argmax() + y, x = np.unravel_index(lb, dr[w].shape) + dr[w, y, x] = 0 + x -= border + y -= border + lb = dr[w].argmax() + py, px = np.unravel_index(lb, dr[w].shape) + dr[w, py, px] = 0 + px -= border + x + py -= border + y + ln = (px ** 2 + py ** 2) ** 0.5 + if ln > 1e-3: + x += shifts[j] * px / ln + y += shifts[j] * py / ln + x = max(0, min(x, cfg.OUTPUT_SHAPE[1] - 1)) + y = max(0, min(y, cfg.OUTPUT_SHAPE[0] - 1)) + kps[w] = np.array([x * 4 + 2, y * 4 + 2]) + scores[w, 0] = score_map[w, int(round(y) + 1e-9), \ + int(round(x) + 1e-9)] + # aligned or not ... + kps[:, 0] = kps[:, 0] / cfg.INPUT_SHAPE[1] * scales[i][0] + \ + centers[i][0] - scales[i][0] * 0.5 + kps[:, 1] = kps[:, 1] / cfg.INPUT_SHAPE[0] * scales[i][1] + \ + centers[i][1] - scales[i][1] * 0.5 + preds[i] = kps + maxvals[i] = scores + + return preds, maxvals + + +def compute_on_dataset(data_loader, device="cpu"): + results = list() + cpu_device = torch.device("cpu") + + results = list() + data = tqdm(data_loader) if is_main_process() else data_loader + k = 0 + for _, batch in enumerate(data): + imgs, scores, centers, scales, img_ids = batch + output_name='img_%d_%d_1.bin' %(int(img_ids[0]), k) + output_path=os.path.join('result/dumpOutput_device0/',output_name) + outputs = np.fromfile(output_path, dtype=np.float32).reshape(1,17,64,48) + k += 1 + + centers = np.array(centers) + scales = np.array(scales) + preds, maxvals = get_results(outputs, centers, scales, + cfg.TEST.GAUSSIAN_KERNEL, cfg.TEST.SHIFT_RATIOS) + + kp_scores = maxvals.squeeze(-1).mean(axis=1) + preds = np.concatenate((preds, maxvals), axis=2) + + for i in range(preds.shape[0]): + keypoints = preds[i].reshape(-1).tolist() + score = scores[i] * kp_scores[i] + image_id = img_ids[i] + + results.append(dict(image_id=image_id, + category_id=1, + keypoints=keypoints, + score=score)) + return results + + +def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu, logger): + if is_main_process(): + logger.info("Accumulating ...") + all_predictions = all_gather(predictions_per_gpu) + + if not is_main_process(): + return + + predictions = list() + for p in all_predictions: + predictions.extend(p) + + return predictions + + +def inference(data_loader, logger, device="cpu"): + predictions = compute_on_dataset(data_loader, device) + synchronize() + predictions = _accumulate_predictions_from_multiple_gpus( + predictions, logger) + + if not is_main_process(): + return + + return predictions + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--local_rank", type=int, default=0) + parser.add_argument("--iter", "-i", type=int, default=-1) + parser.add_argument("--datasets_path",default="$MSPN_HOME/dataset/COCO") + args = parser.parse_args() + COCODataset.cur_dir=os.path.join(args.datasets_path) + num_gpus = int( + os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 + distributed = num_gpus > 1 + + if distributed: + torch.cuda.set_device(args.local_rank) + dist.init_process_group(backend="nccl", init_method="env://") + synchronize() + + if is_main_process() and not os.path.exists(cfg.TEST_DIR): + os.mkdir(cfg.TEST_DIR) + logger = get_logger( + cfg.DATASET.NAME, cfg.TEST_DIR, args.local_rank, 'test_log.txt') + + if args.iter == -1: + logger.info("Please designate one iteration.") + + data_loader = get_test_loader(cfg, num_gpus, args.local_rank, 'val', + is_dist=distributed) + + device = 'cpu' + results = inference(data_loader, logger, device) + synchronize() + + if is_main_process(): + logger.info("Dumping results ...") + results.sort( + key=lambda res:(res['image_id'], res['score']), reverse=True) + results_path = os.path.join(cfg.TEST_DIR, 'results.json') + with open(results_path, 'w') as f: + json.dump(results, f) + logger.info("Get all results.") + + data_loader.ori_dataset.evaluate(results_path) + + +if __name__ == '__main__': + begin = time.time() + main() + end = time.time() print('postprocess finished in', str(end - begin), 'seconds') \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/MSPN_preprocess.py b/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/MSPN_preprocess.py index 616cf39bf5719fde0318acfc1944507b6ef611d1..cdc9cbb8987a27505944c156d5ff7acacb42fa7d 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/MSPN_preprocess.py +++ b/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/MSPN_preprocess.py @@ -1,90 +1,90 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -sys.path.append('./exps/mspn.2xstg.coco/') -import numpy as np -import torch -import torchvision.transforms as transforms -from dataset.attribute import load_dataset -from config import cfg - -from dataset.attribute import load_dataset -from dataset.COCO.coco import COCODataset - - -def preprocess(save_path: str): - cpu_device = torch.device("cpu") - normalize = transforms.Normalize(mean=cfg.INPUT.MEANS, std=cfg.INPUT.STDS) - transform = transforms.Compose([transforms.ToTensor(), normalize]) - attr = load_dataset(cfg.DATASET.NAME) - stage='val' - if cfg.DATASET.NAME == 'COCO': - Dataset = COCODataset - dataset = Dataset(attr, stage, transform) - # -------- make data_loader -------- # - class BatchCollator(object): - def __init__(self, size_divisible): - self.size_divisible = size_divisible - def __call__(self, batch): - transposed_batch = list(zip(*batch)) - images = torch.stack(transposed_batch[0], dim=0) - scores = list(transposed_batch[1]) - centers = list(transposed_batch[2]) - scales = list(transposed_batch[3]) - image_ids = list(transposed_batch[4]) - - return images, scores, centers, scales, image_ids - - data_loader = torch.utils.data.DataLoader( - dataset,batch_size=1,collate_fn=BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY), ) - data_loader.ori_dataset = dataset - - data=data_loader - i = 0 - for _, batch in enumerate(data): - imgs, scores, centers, scales, img_ids = batch - print("=========",img_ids) - id=[str(x)for x in img_ids] - idx="".join(id) - imgs = imgs.to(cpu_device).numpy() - imgs.tofile(os.path.join(save_path,'img_' + idx + '_' + str(i)+ ".bin")) - i += 1 - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--datasets_path",default="$MSPN_HOME/dataset/COCO") - args = parser.parse_args() - COCODataset.cur_dir=os.path.join(args.datasets_path) - save_path = "./pre_dataset" - if not os.path.exists(save_path): - os.makedirs(save_path) - preprocess(save_path) - - - - - - - - - - - - - - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +sys.path.append('./exps/mspn.2xstg.coco/') +import numpy as np +import torch +import torchvision.transforms as transforms +from dataset.attribute import load_dataset +from config import cfg + +from dataset.attribute import load_dataset +from dataset.COCO.coco import COCODataset + + +def preprocess(save_path: str): + cpu_device = torch.device("cpu") + normalize = transforms.Normalize(mean=cfg.INPUT.MEANS, std=cfg.INPUT.STDS) + transform = transforms.Compose([transforms.ToTensor(), normalize]) + attr = load_dataset(cfg.DATASET.NAME) + stage='val' + if cfg.DATASET.NAME == 'COCO': + Dataset = COCODataset + dataset = Dataset(attr, stage, transform) + # -------- make data_loader -------- # + class BatchCollator(object): + def __init__(self, size_divisible): + self.size_divisible = size_divisible + def __call__(self, batch): + transposed_batch = list(zip(*batch)) + images = torch.stack(transposed_batch[0], dim=0) + scores = list(transposed_batch[1]) + centers = list(transposed_batch[2]) + scales = list(transposed_batch[3]) + image_ids = list(transposed_batch[4]) + + return images, scores, centers, scales, image_ids + + data_loader = torch.utils.data.DataLoader( + dataset,batch_size=1,collate_fn=BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY), ) + data_loader.ori_dataset = dataset + + data=data_loader + i = 0 + for _, batch in enumerate(data): + imgs, scores, centers, scales, img_ids = batch + print("=========",img_ids) + id=[str(x)for x in img_ids] + idx="".join(id) + imgs = imgs.to(cpu_device).numpy() + imgs.tofile(os.path.join(save_path,'img_' + idx + '_' + str(i)+ ".bin")) + i += 1 + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--datasets_path",default="$MSPN_HOME/dataset/COCO") + args = parser.parse_args() + COCODataset.cur_dir=os.path.join(args.datasets_path) + save_path = "./pre_dataset" + if not os.path.exists(save_path): + os.makedirs(save_path) + preprocess(save_path) + + + + + + + + + + + + + + + diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/MSPN_pth2onnx.py b/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/MSPN_pth2onnx.py index 3624abdf8a083a3f79dee931c884e6e0e56045f3..86e36f5f6cad3ffdcb6993d84e30d68e5ece180d 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/MSPN_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/MSPN_pth2onnx.py @@ -1,53 +1,53 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -sys.path.append('./exps/mspn.2xstg.coco/') -import torch -from config import cfg -from network import MSPN -import torch.onnx - - -def main(): - - model = MSPN(cfg) - - model_file = os.path.join(cfg.OUTPUT_DIR, "mspn_2xstg_coco.pth") - if os.path.exists(model_file): - print('MSPN loaded') - state_dict = torch.load( - model_file, map_location=lambda storage, loc: storage) - state_dict = state_dict['model'] - model.load_state_dict(state_dict) - model.eval() - - dummy_input= torch.randn(32, 3, 256, 192) - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - export_onnx_file = "MSPN.onnx" - torch.onnx.export(model, # model being run - dummy_input, # model input (or a tuple for multiple inputs) - export_onnx_file, # where to save the model (can be a file or file-like object) - export_params=True, # store the trained parameter weights inside the model file - opset_version=11, # the ONNX version to export the model to - do_constant_folding=True, # whether to execute constant folding for optimization - input_names = ['input'], # the model's input names - output_names = ['output'], # the model's output names - dynamic_axes=dynamic_axes, # variable lenght axes - ) - - -if __name__ == '__main__': - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +sys.path.append('./exps/mspn.2xstg.coco/') +import torch +from config import cfg +from network import MSPN +import torch.onnx + + +def main(): + + model = MSPN(cfg) + + model_file = os.path.join(cfg.OUTPUT_DIR, "mspn_2xstg_coco.pth") + if os.path.exists(model_file): + print('MSPN loaded') + state_dict = torch.load( + model_file, map_location=lambda storage, loc: storage) + state_dict = state_dict['model'] + model.load_state_dict(state_dict) + model.eval() + + dummy_input= torch.randn(32, 3, 256, 192) + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + export_onnx_file = "MSPN.onnx" + torch.onnx.export(model, # model being run + dummy_input, # model input (or a tuple for multiple inputs) + export_onnx_file, # where to save the model (can be a file or file-like object) + export_params=True, # store the trained parameter weights inside the model file + opset_version=11, # the ONNX version to export the model to + do_constant_folding=True, # whether to execute constant folding for optimization + input_names = ['input'], # the model's input names + output_names = ['output'], # the model's output names + dynamic_axes=dynamic_axes, # variable lenght axes + ) + + +if __name__ == '__main__': + main() diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/README.md b/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/README.md index bbfc1e7e0c477c1f21d70ede1293a8dfe48417e4..230f66a204555f0a6c4a421b60a37f1eb63c2aa8 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/README.md +++ b/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/README.md @@ -1,64 +1,64 @@ -# MSPN模型PyTorch离线推理指导 - -## 1 环境准备 - -1. 下载源代码 - -``` -git clone https://github.com/megvii-research/MSPN -b master -cd MSPN -``` - -1. 设置环境变量,将当前目录设置为程序运行的主目录 - - ``` - export MSPN_HOME=$(pwd) - export PYTHONPATH=$PYTHONPATH:$MSPN_HOME - ``` - -3. 配置环境要求 - - ``` - pip3 install -r requirements.txt - ``` - -4. 下载COCOAPI - - ``` - git clone https://github.com/cocodataset/cocoapi.git $MSPN_HOME/lib/COCOAPI - cd $MSPN_HOME/lib/COCOAPI/PythonAPI - make install - ``` - -5. 下载数据集 - -(1)下载COCO2014数据集[COCO website][1], 将 train2014/val2014文件夹分别放在“$MSPN_HOME/dataset/COCO/images/” 目录下. - -(2)下载测试结果[Detection result][2], 把它放在“ $MSPN_HOME/dataset/COCO/det_json/”目录下. - -(3)将预训练好的权重文件 “mspn_2xstg_coco.pth”放在当前目录 - -6.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer),将benchmark.x86_64或benchmark.aarch64放到当前工作目录 - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 - -``` -bash test/pth2om.sh -bash test/test.sh -bash test/performance_test.sh -``` - -**评测结果:** - -| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| --------- | ----------- | --------------- | ---------- | ---------- | -| MSPN bs1 | 74.5 | 74.1 | 372.45fps | 401.74fps | -| MSPN bs16 | 74.5 | 74.1 | 712.271fps | 436.868fps | - -## 相关链接 - -[1]: http://cocodataset.org/#download -[2]: https://drive.google.com/open?id=1MW27OY_4YetEZ4JiD4PltFGL_1-caECy - +# MSPN模型PyTorch离线推理指导 + +## 1 环境准备 + +1. 下载源代码 + +``` +git clone https://github.com/megvii-research/MSPN -b master +cd MSPN +``` + +1. 设置环境变量,将当前目录设置为程序运行的主目录 + + ``` + export MSPN_HOME=$(pwd) + export PYTHONPATH=$PYTHONPATH:$MSPN_HOME + ``` + +3. 配置环境要求 + + ``` + pip3 install -r requirements.txt + ``` + +4. 下载COCOAPI + + ``` + git clone https://github.com/cocodataset/cocoapi.git $MSPN_HOME/lib/COCOAPI + cd $MSPN_HOME/lib/COCOAPI/PythonAPI + make install + ``` + +5. 下载数据集 + +(1)下载COCO2014数据集[COCO website][1], 将 train2014/val2014文件夹分别放在“$MSPN_HOME/dataset/COCO/images/” 目录下. + +(2)下载测试结果[Detection result][2], 把它放在“ $MSPN_HOME/dataset/COCO/det_json/”目录下. + +(3)将预训练好的权重文件 “mspn_2xstg_coco.pth”放在当前目录 + +6.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer),将benchmark.x86_64或benchmark.aarch64放到当前工作目录 + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 + +``` +bash test/pth2om.sh +bash test/test.sh +bash test/performance_test.sh +``` + +**评测结果:** + +| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| --------- | ----------- | --------------- | ---------- | ---------- | +| MSPN bs1 | 74.5 | 74.1 | 372.45fps | 401.74fps | +| MSPN bs16 | 74.5 | 74.1 | 712.271fps | 436.868fps | + +## 相关链接 + +[1]: http://cocodataset.org/#download +[2]: https://drive.google.com/open?id=1MW27OY_4YetEZ4JiD4PltFGL_1-caECy + diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/gen_dataset_info.py index 85c04dfdfca89e4e96277b5f86aaa34a63f2a818..72ba5e05c55918b06e9d5f235b9aae341ccbd87f 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/gen_dataset_info.py @@ -1,37 +1,37 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_info = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, info in enumerate(bin_info): - content = ' '.join([str(index), info, width, height]) - file.write(content) - file.write('\n') - - - - -if __name__ == '__main__': - file_path = sys.argv[1] - info_name = sys.argv[2] - width =sys.argv[3] - height =sys.argv[4] - get_bin_info(file_path, info_name, width, height) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_info = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, info in enumerate(bin_info): + content = ' '.join([str(index), info, width, height]) + file.write(content) + file.write('\n') + + + + +if __name__ == '__main__': + file_path = sys.argv[1] + info_name = sys.argv[2] + width =sys.argv[3] + height =sys.argv[4] + get_bin_info(file_path, info_name, width, height) diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/modelzoo_level.txt index 9e95396651cc4382fe60ee1ee053674f527a448c..27e6c78b37535fe4f5a17029546fe257ad164d34 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:POK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/test/perf_g.sh b/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/test/perf_g.sh index 0ebe93e395b2aa8a6c63b0f3c99fbf8df80c7e9f..047e1c667012b750e3425659c847e31aecae435f 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/test/perf_g.sh +++ b/ACL_PyTorch/contrib/cv/pose_estimation/MSPN/test/perf_g.sh @@ -1,23 +1,23 @@ -#!/bin/bash - -rm -rf perf_bs1.log -trtexec --onnx=MSPN.onnx --fp16 --shapes='input:1x3x256x192' –-fp16 --dumpProfile > perf_bs1.log -perf_str=`grep "GPU.* mean.*ms$" perf_bs1.log` -if [ -n "$perf_str" ]; then - perf_num=`echo $perf_str | awk -F' ' '{print $16}'` -else - perf_str=`grep "mean.*ms$" perf_bs1.log` - perf_num=`echo $perf_str | awk -F' ' '{print $4}'` -fi -awk 'BEGIN{printf "t4 bs1 fps:%.3f\n", 1000*1/('$perf_num'/1)}' - -rm -rf perf_bs16.log -trtexec --onnx=MSPN.onnx --fp16 --shapes='input:16x3x256x192' > perf_bs16.log -perf_str=`grep "GPU.* mean.*ms$" perf_bs16.log` -if [ -n "$perf_str" ]; then - perf_num=`echo $perf_str | awk -F' ' '{print $16}'` -else - perf_str=`grep "mean.*ms$" perf_bs16.log` - perf_num=`echo $perf_str | awk -F' ' '{print $4}'` -fi -awk 'BEGIN{printf "t4 bs16 fps:%.3f\n", 1000*1/('$perf_num'/16)}' +#!/bin/bash + +rm -rf perf_bs1.log +trtexec --onnx=MSPN.onnx --fp16 --shapes='input:1x3x256x192' –-fp16 --dumpProfile > perf_bs1.log +perf_str=`grep "GPU.* mean.*ms$" perf_bs1.log` +if [ -n "$perf_str" ]; then + perf_num=`echo $perf_str | awk -F' ' '{print $16}'` +else + perf_str=`grep "mean.*ms$" perf_bs1.log` + perf_num=`echo $perf_str | awk -F' ' '{print $4}'` +fi +awk 'BEGIN{printf "t4 bs1 fps:%.3f\n", 1000*1/('$perf_num'/1)}' + +rm -rf perf_bs16.log +trtexec --onnx=MSPN.onnx --fp16 --shapes='input:16x3x256x192' > perf_bs16.log +perf_str=`grep "GPU.* mean.*ms$" perf_bs16.log` +if [ -n "$perf_str" ]; then + perf_num=`echo $perf_str | awk -F' ' '{print $16}'` +else + perf_str=`grep "mean.*ms$" perf_bs16.log` + perf_num=`echo $perf_str | awk -F' ' '{print $4}'` +fi +awk 'BEGIN{printf "t4 bs16 fps:%.3f\n", 1000*1/('$perf_num'/16)}' diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/TransPose/fusion_switch.cfg b/ACL_PyTorch/contrib/cv/pose_estimation/TransPose/fusion_switch.cfg index 7e5b78e80e217d17734f1dbf689e8a3265689480..7635898ef0fc6b1361d848c5d2493f0c5a87f91d 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/TransPose/fusion_switch.cfg +++ b/ACL_PyTorch/contrib/cv/pose_estimation/TransPose/fusion_switch.cfg @@ -1,2 +1,2 @@ -ReshapeTransposeFusionPass:off +ReshapeTransposeFusionPass:off TransposeReshapeFusionPass:off \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/TransPose/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/pose_estimation/TransPose/modelzoo_level.txt index c80d660077532ade40fc6ae69b87f5e9c8462217..d5130dbf735d1a55ec0d55f24466fcaf4b7bf730 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/TransPose/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/pose_estimation/TransPose/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -ModelConvert:OK +FuncStatus:OK +PrecisionStatus:OK +ModelConvert:OK PerfStatus=OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/VideoPose3D/LISCENCE b/ACL_PyTorch/contrib/cv/pose_estimation/VideoPose3D/LISCENCE index df2c2f2c3e55bfbad1aebe53321a94ee5a3854bc..c8ec075d5b892f823d0b485ad4fdd01355c57b3e 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/VideoPose3D/LISCENCE +++ b/ACL_PyTorch/contrib/cv/pose_estimation/VideoPose3D/LISCENCE @@ -1,203 +1,203 @@ -Copyright 2018-2019 Open-MMLab. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018-2019 Open-MMLab. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and +Copyright 2018-2019 Open-MMLab. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2019 Open-MMLab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/VideoPose3D/README.md b/ACL_PyTorch/contrib/cv/pose_estimation/VideoPose3D/README.md index 41d22e57ece5533d3e67f99c141096ff34a39f3c..19035b4f89975a46457d45ca82d1ecbfb6c77026 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/VideoPose3D/README.md +++ b/ACL_PyTorch/contrib/cv/pose_estimation/VideoPose3D/README.md @@ -1,39 +1,39 @@ -# VideoPose3D 模型 Pytorch 离线推理 -## 1. 环境准备 -1. 必要的软件依赖 - - Pytorch == 1.5.0 - - torchvision == 0.5.0 - - msame 软件,安装在当前目录下 - - numpy -2. 获取、修改与安装开源软件代码 -在当前目录下,进行以下操作 -``` -git clone https://github.com/facebookresearch/VideoPose3D.git -cd VideoPose3D -git reset 1afb1ca0f1237776518469876342fc8669d3f6a9 --hard -patch -p1 < ../vp3d.patch -mkdir checkpoint -cd .. -``` -3. 获取权重文件 -将提供的 `model_best.bin` 文件放在 `.\VideoPose3D\checkpoint` 目录下 -4. 获取数据集 -将提供的 `data` 文件夹放在 `.\VideoPose3D` 目录下 -## 2. 离线推理 -310上执行,执行时使 `npu-smi info` 查看设备状态,确保 `device` 空闲 -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh -``` -### 评测结果 -| 模型 | 官网 pth 精度 | 310 离线推理精度 | 基准性能 | 310 性能 | -|:----:|:----:|:----:|:----:|:----:| -|VideoPose3D conv1d bs1| 46.8 mm| 46.5 mm | 584834 fps | 409776 fps | -|VideoPose3D conv2d bs1| - | 46.6 mm | 605179 fps | 580903 fps | - -备注: -- 310 离线推理使用的是我用单卡自行训练的 Model,效果好于官网 -- VideoPose3D 原本代码中全程使用 conv1d(以及相应的 batchnorm1d)。考虑到转 om 后 -的 conv1d 均由 conv2d 算子实现,因此我将源码中的 conv1d 以及相应操作全部替换为 conv2d -及其相应操作。这个修改使得在 Ascend 310 上的推理性能与原本代码在 GPU 上的推理性能持平 +# VideoPose3D 模型 Pytorch 离线推理 +## 1. 环境准备 +1. 必要的软件依赖 + - Pytorch == 1.5.0 + - torchvision == 0.5.0 + - msame 软件,安装在当前目录下 + - numpy +2. 获取、修改与安装开源软件代码 +在当前目录下,进行以下操作 +``` +git clone https://github.com/facebookresearch/VideoPose3D.git +cd VideoPose3D +git reset 1afb1ca0f1237776518469876342fc8669d3f6a9 --hard +patch -p1 < ../vp3d.patch +mkdir checkpoint +cd .. +``` +3. 获取权重文件 +将提供的 `model_best.bin` 文件放在 `.\VideoPose3D\checkpoint` 目录下 +4. 获取数据集 +将提供的 `data` 文件夹放在 `.\VideoPose3D` 目录下 +## 2. 离线推理 +310上执行,执行时使 `npu-smi info` 查看设备状态,确保 `device` 空闲 +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh +``` +### 评测结果 +| 模型 | 官网 pth 精度 | 310 离线推理精度 | 基准性能 | 310 性能 | +|:----:|:----:|:----:|:----:|:----:| +|VideoPose3D conv1d bs1| 46.8 mm| 46.5 mm | 584834 fps | 409776 fps | +|VideoPose3D conv2d bs1| - | 46.6 mm | 605179 fps | 580903 fps | + +备注: +- 310 离线推理使用的是我用单卡自行训练的 Model,效果好于官网 +- VideoPose3D 原本代码中全程使用 conv1d(以及相应的 batchnorm1d)。考虑到转 om 后 +的 conv1d 均由 conv2d 算子实现,因此我将源码中的 conv1d 以及相应操作全部替换为 conv2d +及其相应操作。这个修改使得在 Ascend 310 上的推理性能与原本代码在 GPU 上的推理性能持平 - 即便考虑到比较 conv2d 版本在 GPU 与 Acend310 上的性能,差距也小于二者在 conv1d 下的性能 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/pose_estimation/VideoPose3D/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/pose_estimation/VideoPose3D/modelzoo_level.txt index 38700fca05402f52c3ae1c4be0889eb60e1f80f1..2e42553460a4f3687654b6ad3f91ab0bcc3aadac 100644 --- a/ACL_PyTorch/contrib/cv/pose_estimation/VideoPose3D/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/pose_estimation/VideoPose3D/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/ADNet_postprocess.py b/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/ADNet_postprocess.py index 811f555e4e36c8e17b0edd0b65443b7cec32c6ee..f21d0790ac36b50f72471453335c6e6b7bee5aa8 100644 --- a/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/ADNet_postprocess.py +++ b/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/ADNet_postprocess.py @@ -1,91 +1,91 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math -import torch -import torch.nn as nn -import numpy as np -from skimage.measure.simple_metrics import compare_psnr -import os -import struct -from torch.autograd import Variable -import glob -import sys - -def batch_PSNR(img, imclean, data_range): - Img = img.data.cpu().numpy().astype(np.float32) - Iclean = imclean.data.cpu().numpy().astype(np.float32) - PSNR = 0 - for i in range(Img.shape[0]): - PSNR += compare_psnr(Iclean[i,:,:,:], Img[i,:,:,:], data_range=data_range) - return (PSNR/Img.shape[0]) - -def get_output_path(bin_folder,label_path): - result_paths = [] - target_paths = [] - print("result_bin_folder:", bin_folder) - files_source = glob.glob(os.path.join(bin_folder,'*.bin')) - files_source.sort() - for file in files_source: - if file.endswith('.bin'): - result_path = file - result_paths.append(result_path) - name = (result_path.split('/')[3]).split('_')[0] - target_path = os.path.join(label_path,name+'.bin') - target_paths.append(target_path) - return result_paths,target_paths - -def file2tensor(output_bin,target_bin): - size = os.path.getsize(output_bin) - res1 = [] - L = int(size / 4) - binfile = open(output_bin, 'rb') - for i in range(L): - data = binfile.read(4) - num = struct.unpack('f', data) - res1.append(num[0]) - binfile.close() - dim_res = np.array(res1).reshape(1, 1, 321, 481) - tensor_res = torch.tensor(dim_res, dtype=torch.float32) - - size = os.path.getsize(target_bin) - res2 = [] - L = int(size / 4) - binfile = open(target_bin, 'rb') - for i in range(L): - data = binfile.read(4) - num = struct.unpack('f', data) - res2.append(num[0]) - binfile.close() - dim_res = np.array(res2).reshape(1, 1, 321, 481) - tensor_tar = torch.tensor(dim_res, dtype=torch.float32) - return tensor_res,tensor_tar - -def post_process(result_path,target_path): - output_path, target_path= get_output_path(bin_folder=result_path,label_path=label_path) - psnr_val = 0 - for i in range(len(output_path)): - output,target = file2tensor(output_path[i],target_path[i]) - Out = torch.clamp(output, 0., 1.) - psnr = batch_PSNR(Out, target, 1.) - name = (output_path[i].split('/')[3]).split('_')[0] - print(name,batch_PSNR(output, target, 1.)) - psnr_val += psnr - psnr_val /= i - print('average psnr_val:',psnr_val) - -if __name__ == "__main__": - result_path = sys.argv[1] - label_path = sys.argv[2] - post_process(result_path = result_path, target_path = label_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import torch +import torch.nn as nn +import numpy as np +from skimage.measure.simple_metrics import compare_psnr +import os +import struct +from torch.autograd import Variable +import glob +import sys + +def batch_PSNR(img, imclean, data_range): + Img = img.data.cpu().numpy().astype(np.float32) + Iclean = imclean.data.cpu().numpy().astype(np.float32) + PSNR = 0 + for i in range(Img.shape[0]): + PSNR += compare_psnr(Iclean[i,:,:,:], Img[i,:,:,:], data_range=data_range) + return (PSNR/Img.shape[0]) + +def get_output_path(bin_folder,label_path): + result_paths = [] + target_paths = [] + print("result_bin_folder:", bin_folder) + files_source = glob.glob(os.path.join(bin_folder,'*.bin')) + files_source.sort() + for file in files_source: + if file.endswith('.bin'): + result_path = file + result_paths.append(result_path) + name = (result_path.split('/')[3]).split('_')[0] + target_path = os.path.join(label_path,name+'.bin') + target_paths.append(target_path) + return result_paths,target_paths + +def file2tensor(output_bin,target_bin): + size = os.path.getsize(output_bin) + res1 = [] + L = int(size / 4) + binfile = open(output_bin, 'rb') + for i in range(L): + data = binfile.read(4) + num = struct.unpack('f', data) + res1.append(num[0]) + binfile.close() + dim_res = np.array(res1).reshape(1, 1, 321, 481) + tensor_res = torch.tensor(dim_res, dtype=torch.float32) + + size = os.path.getsize(target_bin) + res2 = [] + L = int(size / 4) + binfile = open(target_bin, 'rb') + for i in range(L): + data = binfile.read(4) + num = struct.unpack('f', data) + res2.append(num[0]) + binfile.close() + dim_res = np.array(res2).reshape(1, 1, 321, 481) + tensor_tar = torch.tensor(dim_res, dtype=torch.float32) + return tensor_res,tensor_tar + +def post_process(result_path,target_path): + output_path, target_path= get_output_path(bin_folder=result_path,label_path=label_path) + psnr_val = 0 + for i in range(len(output_path)): + output,target = file2tensor(output_path[i],target_path[i]) + Out = torch.clamp(output, 0., 1.) + psnr = batch_PSNR(Out, target, 1.) + name = (output_path[i].split('/')[3]).split('_')[0] + print(name,batch_PSNR(output, target, 1.)) + psnr_val += psnr + psnr_val /= i + print('average psnr_val:',psnr_val) + +if __name__ == "__main__": + result_path = sys.argv[1] + label_path = sys.argv[2] + post_process(result_path = result_path, target_path = label_path) #get_output_path(bin_folder = 'result/dumpOutput_device0') \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/ADNet_preprocess.py b/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/ADNet_preprocess.py index 39aca261a2b0ffb6c91ba977673fa639b0603cda..edfcfaae983861844f8bbda3fefdc3f71c38c41b 100644 --- a/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/ADNet_preprocess.py +++ b/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/ADNet_preprocess.py @@ -1,65 +1,65 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import os.path -import numpy as np -import random -import torch -import cv2 -import glob -import torch.utils.data as udata -from utils import data_augmentation -from torch.autograd import Variable -import sys - -def normalize(data): - return data/255. - -def preprocess(data_path = './data/BSD68',save_path='./prep_dataset'): - files_source = glob.glob(os.path.join(data_path, '*.png')) - files_source.sort() - # process data - psnr_test = 0 - for f in files_source: - # image - Img = cv2.imread(f) - H = Img.shape[0] - W = Img.shape[1] - if H > W: - Img= cv2.flip(cv2.transpose(Img), 1) - Img = normalize(np.float32(Img[:,:,0])) - Img = np.expand_dims(Img, 0) - Img = np.expand_dims(Img, 1) - ISource = torch.Tensor(Img) - # noise - torch.manual_seed(0) #set the seed - noise = torch.FloatTensor(ISource.size()).normal_(mean=0, std=25/255.) - # noisy image - INoisy = ISource + noise - ISource = Variable(ISource) - INoisy = Variable(INoisy) - print(f,'has benn transformed into binary file') - name = (f.split('/')[3]).split('.')[0] - ISource = np.array(ISource).astype(np.float32) - if not os.path.exists(os.path.join(save_path,'ISoure')): - os.makedirs(os.path.join(save_path,'ISoure')) - if not os.path.exists(os.path.join(save_path,'INoisy')): - os.makedirs(os.path.join(save_path,'INoisy')) - ISource.tofile(os.path.join(save_path,'ISoure',name+'.bin')) - INoisy = np.array(INoisy).astype(np.float32) - INoisy.tofile(os.path.join(save_path,'INoisy',name+'.bin')) -if __name__ == '__main__': - data_path = sys.argv[1] - save_path = sys.argv[2] - preprocess(data_path,save_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import os.path +import numpy as np +import random +import torch +import cv2 +import glob +import torch.utils.data as udata +from utils import data_augmentation +from torch.autograd import Variable +import sys + +def normalize(data): + return data/255. + +def preprocess(data_path = './data/BSD68',save_path='./prep_dataset'): + files_source = glob.glob(os.path.join(data_path, '*.png')) + files_source.sort() + # process data + psnr_test = 0 + for f in files_source: + # image + Img = cv2.imread(f) + H = Img.shape[0] + W = Img.shape[1] + if H > W: + Img= cv2.flip(cv2.transpose(Img), 1) + Img = normalize(np.float32(Img[:,:,0])) + Img = np.expand_dims(Img, 0) + Img = np.expand_dims(Img, 1) + ISource = torch.Tensor(Img) + # noise + torch.manual_seed(0) #set the seed + noise = torch.FloatTensor(ISource.size()).normal_(mean=0, std=25/255.) + # noisy image + INoisy = ISource + noise + ISource = Variable(ISource) + INoisy = Variable(INoisy) + print(f,'has benn transformed into binary file') + name = (f.split('/')[3]).split('.')[0] + ISource = np.array(ISource).astype(np.float32) + if not os.path.exists(os.path.join(save_path,'ISoure')): + os.makedirs(os.path.join(save_path,'ISoure')) + if not os.path.exists(os.path.join(save_path,'INoisy')): + os.makedirs(os.path.join(save_path,'INoisy')) + ISource.tofile(os.path.join(save_path,'ISoure',name+'.bin')) + INoisy = np.array(INoisy).astype(np.float32) + INoisy.tofile(os.path.join(save_path,'INoisy',name+'.bin')) +if __name__ == '__main__': + data_path = sys.argv[1] + save_path = sys.argv[2] + preprocess(data_path,save_path) diff --git a/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/ADNet_pth2onnx.py b/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/ADNet_pth2onnx.py index 6c9de26ee21c4684afe6a1a0776d180e3a317de3..53f032df8e8a66043949284f819d2c44d250de8f 100644 --- a/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/ADNet_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/ADNet_pth2onnx.py @@ -1,49 +1,49 @@ - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -from models import ADNet -import torch -import torch.onnx -import sys -from collections import OrderedDict -def proc_nodes_module(checkpoint): - new_state_dict = OrderedDict() - for k, v in checkpoint.items(): - if "module." in k: - name = k.replace("module.", "") - else: - name = k - new_state_dict[name] = v - return new_state_dict - -def pth2onnx(path, output_file1): - net = ADNet(channels=1, num_of_layers=17) - model = net #model = nn.DataParallel(net, device_ids=device_ids).cuda() - checkpoint = torch.load(path, map_location='cpu') - checkpoint = proc_nodes_module(checkpoint) - model.load_state_dict(checkpoint) - model.eval() - input_names = ["image"] - output_names = ["output1"] - dynamic_axes = {'image': {0: '-1'}, 'output1': {0: '-1'}} - dummy_input1 = torch.randn(1, 1, 321, 481) - torch.onnx.export(model, dummy_input1, output_file1, input_names = input_names, dynamic_axes = dynamic_axes,output_names = output_names, opset_version=11, verbose=True) - print("ADNET onnx has transformed successfully and this model supports dynamic axes") - print('onnx export done.') - -if __name__ == "__main__": - path = sys.argv[1] - onnx_path = sys.argv[2] + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from models import ADNet +import torch +import torch.onnx +import sys +from collections import OrderedDict +def proc_nodes_module(checkpoint): + new_state_dict = OrderedDict() + for k, v in checkpoint.items(): + if "module." in k: + name = k.replace("module.", "") + else: + name = k + new_state_dict[name] = v + return new_state_dict + +def pth2onnx(path, output_file1): + net = ADNet(channels=1, num_of_layers=17) + model = net #model = nn.DataParallel(net, device_ids=device_ids).cuda() + checkpoint = torch.load(path, map_location='cpu') + checkpoint = proc_nodes_module(checkpoint) + model.load_state_dict(checkpoint) + model.eval() + input_names = ["image"] + output_names = ["output1"] + dynamic_axes = {'image': {0: '-1'}, 'output1': {0: '-1'}} + dummy_input1 = torch.randn(1, 1, 321, 481) + torch.onnx.export(model, dummy_input1, output_file1, input_names = input_names, dynamic_axes = dynamic_axes,output_names = output_names, opset_version=11, verbose=True) + print("ADNET onnx has transformed successfully and this model supports dynamic axes") + print('onnx export done.') + +if __name__ == "__main__": + path = sys.argv[1] + onnx_path = sys.argv[2] pth2onnx(path, onnx_path) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/README.md b/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/README.md index 53bb64a0106b4004a65037b5af9d87c573974d03..e01ab9524216a826503c326e0a49e7df4ab027ab 100644 --- a/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/README.md +++ b/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/README.md @@ -1,296 +1,296 @@ -# ADNet推理说明 - -## 1 模型概述 - -- **[论文地址](https://www.sciencedirect.com/science/article/pii/S0893608019304241)** -- **[代码地址](https://github.com/hellloxiaotian/ADNet)** - -### 1.1 论文地址 - -[ADNet论文](https://www.sciencedirect.com/science/article/pii/S0893608019304241) - -### 1.2 代码地址 - -[ADNet代码](https://github.com/hellloxiaotian/ADNet) - -branch:master - -commitid:commit 997df8f0cd5cebe2d26a1468c866dd927512686f - - -## 2 环境说明 - -- 深度学习框架 -- python第三方库 - -### 2.1 深度学习框架 - -``` -python3.7.5 -CANN 5.0.2 - -pytorch == 1.5.0 -torchvision == 0.5.0 -onnx == 1.7.0 -onnx-simplifier == 0.3.6 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.21.2 -Pillow == 8.3.0 -opencv-python == 4.5.3.56 -scikit-image==0.16.2 -``` - -## 3 模型转换 - -- pth转om模型 - -### 3.1 pth转om模型 - -1.获取pth权重文件 - -[pth权重文件](https://github.com/hellloxiaotian/ADNet/blob/master/gray/g25/model_70.pth -) md5sum:7a93fb1f437cbce0fd235daaa7b9cffd - -2.下载ADNet推理代码 - -``` -git clone https://gitee.com/wang-chaojiemayj/modelzoo.git -cd modelzoo -git checkout tuili -``` -进入ADNet目录 -``` -cd ./contrib/ACL_PyTorch/Research/cv/quality_enhancement/ADnet -``` -3.pth模型转onnx模型,onnx转成om模型 - -pth模型转onnx模型 -``` -python3.7.5 ADNet_pth2onnx.py model_70.pth ADNet.onnx -``` -onnx转出om -``` -source env.sh(注意,latest是一个软连接,请将服务器中的/usr/local/Ascend/ascend-toolkit/latest 指向5.0.2版本的CANN包) -# bs1 -atc --framework=5 --model=ADNet.onnx --output=ADNet_bs1 --input_format=NCHW --input_shape="image:1,1,321,481" --log=debug --soc_version=Ascend310 -#bs16 -atc --framework=5 --model=ADNet.onnx --output=ADNet_bs16 --input_format=NCHW --input_shape="image:16,1,321,481" --log=debug --soc_version=Ascend310 -``` - -## 4 数据集预处理 - -- 数据集获取 -- 数据预处理 -- 生成数据集信息文件 - -### 4.1 数据集获取 - -本模型支持BSD68数据集共68张数据集,可从百度云盘下载 - -链接:https://pan.baidu.com/s/1XiePOuutbAuKRRTV949FlQ -提取码:0315 - -文件结构如下 - -``` -|ADNet--test -| | |--pth2om.sh -| | |--perf_t4.sh -| | |--parse.py -| | |--eval_acc_perf.sh -| |--datset -| | |--BSD68 -| |--prep_dataset -| | |--ISoure -| | |--INoisy -| |--util.py -| |--requirements.tx -| |--models.py -| |--gen_dataset_info.py -| |--env.sh -| |--ADNet_pth2onnx.py -| |--ADNet_preprocess.py -| |--ADNet_postprocess.py -``` - - -### 4.2 数据集预处理 - -运行ADNet_preprocess.py -``` -python3.7.5 ADNet_preprocess.py ./dataset/BSD68 ./prep_dataset -``` -二进制文件将保存在./prep_dataset目录下 - -### 4.3 生成数据集信息文件 - -1.执行生成数据集信息脚本gen_dataset_info.py,生成数据集信息文件 - -``` -python3.7.5 gen_dataset_info.py ./prep_dataset/INoisy ADNet_prep_bin.info 481 321 -``` - -## 5 离线推理 - -- benchmark工具概述 -- 离线推理 - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.1 推理benchmark工具用户指南 01 - -### 5.2 离线推理 - -1.设置环境变量 - -``` -source env.sh -``` - -2.执行离线推理 - -``` -bs1: -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./ADNet_bs1.om -input_text_path=./ADNet_prep_bin.info -input_width=481 -input_height=321 -output_binary=True -useDvpp=False -bs16: -./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=16 -om_path=./ADNet_bs16.om -input_text_path=./ADNet_prep_bin.info -input_width=481 -input_height=321 -output_binary=True -useDvpp=False -``` - -输出结果分别保存在当前目录result/dumpOutput_device0和result/dumpOutput_device1中,模型的输出有三个,其中需要的是名为output1的输出,shape为(1,19,1024,2048)(NCHW),数据类型为FP16,每个输入对应的输出对应三个_x.bin(x代表1,2,3)文件。 - -## 6 精度对比 - -- 离线推理精度 -- 开源精度 -- 开源精度对比 - -### 6.1 离线推理精度统计 - -后处理统计PSNR精度 - -调用ADNet_postprocess.py脚本推理结果与label比对,获取PSNRj精度数据,结果保存在ADNet_bs1.log和ADNet_bs4.log - -``` -python3.7.5 -u ADNet_postprocess.py ./result/dumpOutput_device0 ./prep_dataset/ISoure ./out >ADNet_bs1.log -python3.7.5 -u ADNet_postprocess.py ./result/dumpOutput_device1 ./prep_dataset/ISoure ./out >ADNet_bs16.log -``` - -第一个为benchmark输出目录,第二个标签目录,第三个为重定向输出目录 - -``` -PSNR:29.68 -``` - -经过对bs1与bs6的om测试,本模型batch1的精度与batch4的精度一致,精度数据如上 -### 6.2 开源精度 - -pth精度 - -``` -Model 论文 开源pth文件 -ADNet 29.27 29.25 -``` - -### 6.3 精度对比 - -将得到的om模型离线推理精度与pth精度作比较,om模型精度高于pth模型精度,精度达标。 - -## 7 性能对比 - -- NPU性能数据 -- T4性能数据 -- 性能对比 - -### 7.1 npu性能数据 - -1.benchmark工具在整个数据集上推理获得性能数据。 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: - -``` ------------------Performance Summary------------------ -[e2e] throughputRate: 21.1584, latency: 3213.85 -[data read] throughputRate: 2267.5, moduleLatency: 0.441015 -[preprocess] throughputRate: 613.431, moduleLatency: 1.63018 -[inference] throughputRate: 33.8299, Interface throughputRate: 35.7852, moduleLatency: 29.1051 -[postprocess] throughputRate: 34.309, moduleLatency: 29.1469 - ------------------------------------------------------------ -``` - -Interface throughputRate: 35.7852,35.7852x4=143.1408即是batch1 310单卡吞吐率 - -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: - -``` ------------------Performance Summary------------------ -[e2e] throughputRate: 19.8971, latency: 3417.58 -[data read] throughputRate: 2382.7, moduleLatency: 0.419691 -[preprocess] throughputRate: 405.505, moduleLatency: 2.46606 -[inference] throughputRate: 27.4387, Interface throughputRate: 29.3584, moduleLatency: 35.5952 -[postprocess] throughputRate: 2.40737, moduleLatency: 415.392 - ------------------------------------------------------------ -``` - -Interface throughputRate: 29.3584,29.3584x4=117.4336即是batch16 310单卡吞吐率 - - -### 7.2 T4性能数据 - -在装有T4卡的服务器上测试gpu性能,测试过程请确保卡没有运行其他任务,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2 -batch1性能: - -``` -trtexec --onnx=ADNet.onnx --fp16 --shapes=image:1x1x321x481 --threads -``` - -``` -[09/27/2021-11:20:55] [I] GPU Compute -[09/27/2021-11:20:55] [I] min: 7.94897 ms -[09/27/2021-11:20:55] [I] max: 12.2207 ms -[09/27/2021-11:20:55] [I] mean: 8.39391 ms -[09/27/2021-11:20:55] [I] median: 8.30371 ms -[09/27/2021-11:20:55] [I] percentile: 11.1882 ms at 99% -[09/27/2021-11:20:55] [I] total compute time: 3.01341 s -``` -batch1 t4单卡吞吐率:1000/(8.39391/1)=119.134fps - -batch16性能: - -``` -trtexec --onnx=ADNet.onnx --fp16 --shapes=image:16x1x321x481 --threads -``` - -``` -[09/27/2021-11:28:53] [I] GPU Compute -[09/27/2021-11:28:53] [I] min: 125.424 ms -[09/27/2021-11:28:53] [I] max: 138.322 ms -[09/27/2021-11:28:53] [I] mean: 128.206 ms -[09/27/2021-11:28:53] [I] median: 126.907 ms -[09/27/2021-11:28:53] [I] percentile: 138.322 ms at 99% -[09/27/2021-11:28:53] [I] total compute time: 3.33335 s -``` - -batch4 t4单卡吞吐率:1000/(128.206/16)=124.799fps - -### 7.3 性能对比 - -batch1:35.7852x4 > 1000/(8.39391/1) -batch16:29.3584x4 < 000/(128.206/16) -310单个device的吞吐率乘4即单卡吞吐率与比T4单卡相比,batch1的性能:310高于T4,batch16的性能:310是T4的0.954倍,略低于T4。该模型放在contrib/ACL_PyTorch/Research目录下。 - -310与T4同时使用纯推理对batch16进行性能测试,310性能如下: - -``` ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 36.1295samples/s, ave_latency: 27.6788ms ----------------------------------------------------------------- -``` - -batch16纯推理的性能为:36.1295x4=144.518fps - -144.518>124.799,在纯推理测试性能的情况下,310性能优于T4性能。 +# ADNet推理说明 + +## 1 模型概述 + +- **[论文地址](https://www.sciencedirect.com/science/article/pii/S0893608019304241)** +- **[代码地址](https://github.com/hellloxiaotian/ADNet)** + +### 1.1 论文地址 + +[ADNet论文](https://www.sciencedirect.com/science/article/pii/S0893608019304241) + +### 1.2 代码地址 + +[ADNet代码](https://github.com/hellloxiaotian/ADNet) + +branch:master + +commitid:commit 997df8f0cd5cebe2d26a1468c866dd927512686f + + +## 2 环境说明 + +- 深度学习框架 +- python第三方库 + +### 2.1 深度学习框架 + +``` +python3.7.5 +CANN 5.0.2 + +pytorch == 1.5.0 +torchvision == 0.5.0 +onnx == 1.7.0 +onnx-simplifier == 0.3.6 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.21.2 +Pillow == 8.3.0 +opencv-python == 4.5.3.56 +scikit-image==0.16.2 +``` + +## 3 模型转换 + +- pth转om模型 + +### 3.1 pth转om模型 + +1.获取pth权重文件 + +[pth权重文件](https://github.com/hellloxiaotian/ADNet/blob/master/gray/g25/model_70.pth +) md5sum:7a93fb1f437cbce0fd235daaa7b9cffd + +2.下载ADNet推理代码 + +``` +git clone https://gitee.com/wang-chaojiemayj/modelzoo.git +cd modelzoo +git checkout tuili +``` +进入ADNet目录 +``` +cd ./contrib/ACL_PyTorch/Research/cv/quality_enhancement/ADnet +``` +3.pth模型转onnx模型,onnx转成om模型 + +pth模型转onnx模型 +``` +python3.7.5 ADNet_pth2onnx.py model_70.pth ADNet.onnx +``` +onnx转出om +``` +source env.sh(注意,latest是一个软连接,请将服务器中的/usr/local/Ascend/ascend-toolkit/latest 指向5.0.2版本的CANN包) +# bs1 +atc --framework=5 --model=ADNet.onnx --output=ADNet_bs1 --input_format=NCHW --input_shape="image:1,1,321,481" --log=debug --soc_version=Ascend310 +#bs16 +atc --framework=5 --model=ADNet.onnx --output=ADNet_bs16 --input_format=NCHW --input_shape="image:16,1,321,481" --log=debug --soc_version=Ascend310 +``` + +## 4 数据集预处理 + +- 数据集获取 +- 数据预处理 +- 生成数据集信息文件 + +### 4.1 数据集获取 + +本模型支持BSD68数据集共68张数据集,可从百度云盘下载 + +链接:https://pan.baidu.com/s/1XiePOuutbAuKRRTV949FlQ +提取码:0315 + +文件结构如下 + +``` +|ADNet--test +| | |--pth2om.sh +| | |--perf_t4.sh +| | |--parse.py +| | |--eval_acc_perf.sh +| |--datset +| | |--BSD68 +| |--prep_dataset +| | |--ISoure +| | |--INoisy +| |--util.py +| |--requirements.tx +| |--models.py +| |--gen_dataset_info.py +| |--env.sh +| |--ADNet_pth2onnx.py +| |--ADNet_preprocess.py +| |--ADNet_postprocess.py +``` + + +### 4.2 数据集预处理 + +运行ADNet_preprocess.py +``` +python3.7.5 ADNet_preprocess.py ./dataset/BSD68 ./prep_dataset +``` +二进制文件将保存在./prep_dataset目录下 + +### 4.3 生成数据集信息文件 + +1.执行生成数据集信息脚本gen_dataset_info.py,生成数据集信息文件 + +``` +python3.7.5 gen_dataset_info.py ./prep_dataset/INoisy ADNet_prep_bin.info 481 321 +``` + +## 5 离线推理 + +- benchmark工具概述 +- 离线推理 + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.1 推理benchmark工具用户指南 01 + +### 5.2 离线推理 + +1.设置环境变量 + +``` +source env.sh +``` + +2.执行离线推理 + +``` +bs1: +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=./ADNet_bs1.om -input_text_path=./ADNet_prep_bin.info -input_width=481 -input_height=321 -output_binary=True -useDvpp=False +bs16: +./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=16 -om_path=./ADNet_bs16.om -input_text_path=./ADNet_prep_bin.info -input_width=481 -input_height=321 -output_binary=True -useDvpp=False +``` + +输出结果分别保存在当前目录result/dumpOutput_device0和result/dumpOutput_device1中,模型的输出有三个,其中需要的是名为output1的输出,shape为(1,19,1024,2048)(NCHW),数据类型为FP16,每个输入对应的输出对应三个_x.bin(x代表1,2,3)文件。 + +## 6 精度对比 + +- 离线推理精度 +- 开源精度 +- 开源精度对比 + +### 6.1 离线推理精度统计 + +后处理统计PSNR精度 + +调用ADNet_postprocess.py脚本推理结果与label比对,获取PSNRj精度数据,结果保存在ADNet_bs1.log和ADNet_bs4.log + +``` +python3.7.5 -u ADNet_postprocess.py ./result/dumpOutput_device0 ./prep_dataset/ISoure ./out >ADNet_bs1.log +python3.7.5 -u ADNet_postprocess.py ./result/dumpOutput_device1 ./prep_dataset/ISoure ./out >ADNet_bs16.log +``` + +第一个为benchmark输出目录,第二个标签目录,第三个为重定向输出目录 + +``` +PSNR:29.68 +``` + +经过对bs1与bs6的om测试,本模型batch1的精度与batch4的精度一致,精度数据如上 +### 6.2 开源精度 + +pth精度 + +``` +Model 论文 开源pth文件 +ADNet 29.27 29.25 +``` + +### 6.3 精度对比 + +将得到的om模型离线推理精度与pth精度作比较,om模型精度高于pth模型精度,精度达标。 + +## 7 性能对比 + +- NPU性能数据 +- T4性能数据 +- 性能对比 + +### 7.1 npu性能数据 + +1.benchmark工具在整个数据集上推理获得性能数据。 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: + +``` +-----------------Performance Summary------------------ +[e2e] throughputRate: 21.1584, latency: 3213.85 +[data read] throughputRate: 2267.5, moduleLatency: 0.441015 +[preprocess] throughputRate: 613.431, moduleLatency: 1.63018 +[inference] throughputRate: 33.8299, Interface throughputRate: 35.7852, moduleLatency: 29.1051 +[postprocess] throughputRate: 34.309, moduleLatency: 29.1469 + +----------------------------------------------------------- +``` + +Interface throughputRate: 35.7852,35.7852x4=143.1408即是batch1 310单卡吞吐率 + +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: + +``` +-----------------Performance Summary------------------ +[e2e] throughputRate: 19.8971, latency: 3417.58 +[data read] throughputRate: 2382.7, moduleLatency: 0.419691 +[preprocess] throughputRate: 405.505, moduleLatency: 2.46606 +[inference] throughputRate: 27.4387, Interface throughputRate: 29.3584, moduleLatency: 35.5952 +[postprocess] throughputRate: 2.40737, moduleLatency: 415.392 + +----------------------------------------------------------- +``` + +Interface throughputRate: 29.3584,29.3584x4=117.4336即是batch16 310单卡吞吐率 + + +### 7.2 T4性能数据 + +在装有T4卡的服务器上测试gpu性能,测试过程请确保卡没有运行其他任务,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2 +batch1性能: + +``` +trtexec --onnx=ADNet.onnx --fp16 --shapes=image:1x1x321x481 --threads +``` + +``` +[09/27/2021-11:20:55] [I] GPU Compute +[09/27/2021-11:20:55] [I] min: 7.94897 ms +[09/27/2021-11:20:55] [I] max: 12.2207 ms +[09/27/2021-11:20:55] [I] mean: 8.39391 ms +[09/27/2021-11:20:55] [I] median: 8.30371 ms +[09/27/2021-11:20:55] [I] percentile: 11.1882 ms at 99% +[09/27/2021-11:20:55] [I] total compute time: 3.01341 s +``` +batch1 t4单卡吞吐率:1000/(8.39391/1)=119.134fps + +batch16性能: + +``` +trtexec --onnx=ADNet.onnx --fp16 --shapes=image:16x1x321x481 --threads +``` + +``` +[09/27/2021-11:28:53] [I] GPU Compute +[09/27/2021-11:28:53] [I] min: 125.424 ms +[09/27/2021-11:28:53] [I] max: 138.322 ms +[09/27/2021-11:28:53] [I] mean: 128.206 ms +[09/27/2021-11:28:53] [I] median: 126.907 ms +[09/27/2021-11:28:53] [I] percentile: 138.322 ms at 99% +[09/27/2021-11:28:53] [I] total compute time: 3.33335 s +``` + +batch4 t4单卡吞吐率:1000/(128.206/16)=124.799fps + +### 7.3 性能对比 + +batch1:35.7852x4 > 1000/(8.39391/1) +batch16:29.3584x4 < 000/(128.206/16) +310单个device的吞吐率乘4即单卡吞吐率与比T4单卡相比,batch1的性能:310高于T4,batch16的性能:310是T4的0.954倍,略低于T4。该模型放在contrib/ACL_PyTorch/Research目录下。 + +310与T4同时使用纯推理对batch16进行性能测试,310性能如下: + +``` +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 36.1295samples/s, ave_latency: 27.6788ms +---------------------------------------------------------------- +``` + +batch16纯推理的性能为:36.1295x4=144.518fps + +144.518>124.799,在纯推理测试性能的情况下,310性能优于T4性能。 diff --git a/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/gen_dataset_info.py index 4230a0ef58290160bf2302a57fc9950d2ec40892..b3349fd4786e00f5480c6ff074f5aa9c5f9876be 100644 --- a/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/gen_dataset_info.py @@ -1,39 +1,39 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import argparse -import glob -import sys - -def get_bin_info(img_root_path='./prep_dataset/INoisy', - info_name='ADNet_prep_bin.info', width='481', height='321'): - img_path = [] - files_source = glob.glob(os.path.join(img_root_path,'*.bin')) - files_source.sort() - for file in files_source: - if file.endswith('.bin'): - imgpath = file - img_path.append(imgpath) - with open(info_name, 'w') as fp: - for index in range(len(img_path)): - content = ' '.join([str(index), img_path[index], width, height]) - fp.write(content) - fp.write('\n') - -if __name__ == '__main__': - dataset_bin = sys.argv[1] - info_name = sys.argv[2] - width = sys.argv[3] - height = sys.argv[4] - get_bin_info(img_root_path=dataset_bin, info_name=info_name, width=width, height=height) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import argparse +import glob +import sys + +def get_bin_info(img_root_path='./prep_dataset/INoisy', + info_name='ADNet_prep_bin.info', width='481', height='321'): + img_path = [] + files_source = glob.glob(os.path.join(img_root_path,'*.bin')) + files_source.sort() + for file in files_source: + if file.endswith('.bin'): + imgpath = file + img_path.append(imgpath) + with open(info_name, 'w') as fp: + for index in range(len(img_path)): + content = ' '.join([str(index), img_path[index], width, height]) + fp.write(content) + fp.write('\n') + +if __name__ == '__main__': + dataset_bin = sys.argv[1] + info_name = sys.argv[2] + width = sys.argv[3] + height = sys.argv[4] + get_bin_info(img_root_path=dataset_bin, info_name=info_name, width=width, height=height) diff --git a/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/test/parse.py b/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/test/parse.py index 1841a9747c1da500ef5f9e0e0368ec324be08595..e071d8fecf850996e9527d39d0e7d0f4d83ad14e 100644 --- a/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/test/parse.py +++ b/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet/test/parse.py @@ -1,39 +1,39 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import re - -def get_acc(filename): - with open(filename, 'r') as f: - lines = f.readlines() - last_line = lines[-1] - psnr = last_line.split(" ")[2] - print(filename.split('.')[0],"Average PSNR:", psnr) - - -def get_perf(filename): - with open(filename, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 - print('310 bs{} fps:{}'.format(filename.split('_')[3], fps)) - -if __name__ == "__main__": - - filename = sys.argv[1] - - if filename.endswith(".log"): - get_acc(filename) - elif filename.endswith(".txt"): +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import re + +def get_acc(filename): + with open(filename, 'r') as f: + lines = f.readlines() + last_line = lines[-1] + psnr = last_line.split(" ")[2] + print(filename.split('.')[0],"Average PSNR:", psnr) + + +def get_perf(filename): + with open(filename, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 + print('310 bs{} fps:{}'.format(filename.split('_')[3], fps)) + +if __name__ == "__main__": + + filename = sys.argv[1] + + if filename.endswith(".log"): + get_acc(filename) + elif filename.endswith(".txt"): get_perf(filename) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_postprocess.py b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_postprocess.py index 023869fa0adbf04bb38e73feb28298d42d0f7f6c..08233a7d901937460f6c98175b3a16f3fcfa9333 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_postprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_postprocess.py @@ -1,39 +1,39 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# 3d_nested_unet_postprocess.py -import sys -import os -import time -import pdb -import argparse -from nnunet.inference import predict_simple2 - - -def main(): - # pdb.set_trace() - parser = argparse.ArgumentParser() - parser.add_argument('-fp', '--file_path', help='output bin files path', required=True) - args = parser.parse_args() - python_file = predict_simple2.__file__ # /home/hyp/UNetPlusPlus/pytorch/nnunet/inference/predict_simple2.py - file_path = args.file_path - pre_mode = 2 - command = 'python3 ' + str(python_file) + ' --pre_mode ' + str(pre_mode) + ' --file_path ' + str(file_path) - os.system(command) - - -if __name__ == "__main__": - main() - print('main end') - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# 3d_nested_unet_postprocess.py +import sys +import os +import time +import pdb +import argparse +from nnunet.inference import predict_simple2 + + +def main(): + # pdb.set_trace() + parser = argparse.ArgumentParser() + parser.add_argument('-fp', '--file_path', help='output bin files path', required=True) + args = parser.parse_args() + python_file = predict_simple2.__file__ # /home/hyp/UNetPlusPlus/pytorch/nnunet/inference/predict_simple2.py + file_path = args.file_path + pre_mode = 2 + command = 'python3 ' + str(python_file) + ' --pre_mode ' + str(pre_mode) + ' --file_path ' + str(file_path) + os.system(command) + + +if __name__ == "__main__": + main() + print('main end') + diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_preprocess.py b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_preprocess.py index 143fb939b116e7d44c7dc316584fe84b9156a3f2..f20ba2697bee8eceff58b6385f9574b2f10ada70 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_preprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_preprocess.py @@ -1,39 +1,39 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# 3d_nested_unet_preprocess.py -import sys -import os -import time -import pdb -import argparse -from nnunet.inference import predict_simple2 - - -def main(): - # pdb.set_trace() - parser = argparse.ArgumentParser() - parser.add_argument('-fp', '--file_path', help='input bin files path', required=True) - args = parser.parse_args() - python_file = predict_simple2.__file__ # /home/hyp/UNetPlusPlus/pytorch/nnunet/inference/predict_simple2.py - file_path = args.file_path - pre_mode = 1 - command = 'python3 ' + str(python_file) + ' --pre_mode ' + str(pre_mode) + ' --file_path ' + str(file_path) - os.system(command) - - -if __name__ == "__main__": - main() - print('main end') - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# 3d_nested_unet_preprocess.py +import sys +import os +import time +import pdb +import argparse +from nnunet.inference import predict_simple2 + + +def main(): + # pdb.set_trace() + parser = argparse.ArgumentParser() + parser.add_argument('-fp', '--file_path', help='input bin files path', required=True) + args = parser.parse_args() + python_file = predict_simple2.__file__ # /home/hyp/UNetPlusPlus/pytorch/nnunet/inference/predict_simple2.py + file_path = args.file_path + pre_mode = 1 + command = 'python3 ' + str(python_file) + ' --pre_mode ' + str(pre_mode) + ' --file_path ' + str(file_path) + os.system(command) + + +if __name__ == "__main__": + main() + print('main end') + diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_pth2onnx.py b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_pth2onnx.py index 0d8e115f5bd1c2f6919069e2ed6807b4cf7819bb..4aaaa6d67a655c2d4c598e4a2d5d9d4c0b8c002f 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/3d_nested_unet_pth2onnx.py @@ -1,57 +1,57 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# 3d_nested_unet_pth2onnx.py -import sys -import os -import time -import pdb -import argparse -from batchgenerators.utilities.file_and_folder_operations import join, isdir -from nnunet.paths import default_plans_identifier, network_training_output_dir, default_cascade_trainer, default_trainer -from nnunet.training.model_restore import load_model_and_checkpoint_files -from nnunet.inference.predict2 import pth2onnx - - -def main(): - # pdb.set_trace() - parser = argparse.ArgumentParser() - parser.add_argument('-fp', '--file_path', help='output onnx file path', required=True) - args = parser.parse_args() - fp = args.file_path - model = '3d_fullres' - task_name = 'Task003_Liver' - trainer = 'nnUNetPlusPlusTrainerV2' - plans_identifier = 'nnUNetPlansv2.1' - model_folder_name = join(network_training_output_dir, model, task_name, trainer + "__" + plans_identifier) - model = model_folder_name - folds = None # 如果文件存放路径正确,会自动识别到教程中的fold 0 - mixed_precision = True - checkpoint_name = 'model_final_checkpoint' - trainer, params = load_model_and_checkpoint_files(model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) - pre_mode = -1 - if int(pre_mode) == -1: - p = params[0] - trainer.load_checkpoint_ram(p, False) # nnUnetPlusPlusTrainerV2,实际函数在network_trainer里 - print('pth2onnx start') - pth2onnx(trainer.network, fp) - print('pth2onnx end') - print('onnx模型已经输出至:', fp) - - -if __name__ == "__main__": - main() - print('main end') - - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# 3d_nested_unet_pth2onnx.py +import sys +import os +import time +import pdb +import argparse +from batchgenerators.utilities.file_and_folder_operations import join, isdir +from nnunet.paths import default_plans_identifier, network_training_output_dir, default_cascade_trainer, default_trainer +from nnunet.training.model_restore import load_model_and_checkpoint_files +from nnunet.inference.predict2 import pth2onnx + + +def main(): + # pdb.set_trace() + parser = argparse.ArgumentParser() + parser.add_argument('-fp', '--file_path', help='output onnx file path', required=True) + args = parser.parse_args() + fp = args.file_path + model = '3d_fullres' + task_name = 'Task003_Liver' + trainer = 'nnUNetPlusPlusTrainerV2' + plans_identifier = 'nnUNetPlansv2.1' + model_folder_name = join(network_training_output_dir, model, task_name, trainer + "__" + plans_identifier) + model = model_folder_name + folds = None # 如果文件存放路径正确,会自动识别到教程中的fold 0 + mixed_precision = True + checkpoint_name = 'model_final_checkpoint' + trainer, params = load_model_and_checkpoint_files(model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) + pre_mode = -1 + if int(pre_mode) == -1: + p = params[0] + trainer.load_checkpoint_ram(p, False) # nnUnetPlusPlusTrainerV2,实际函数在network_trainer里 + print('pth2onnx start') + pth2onnx(trainer.network, fp) + print('pth2onnx end') + print('onnx模型已经输出至:', fp) + + +if __name__ == "__main__": + main() + print('main end') + + diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/License b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/License index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/License +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/License @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/change_infer_path.py b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/change_infer_path.py index a46e74d40f8dc8098a83065c29ab26a6ea59dc94..e030312b1751f3cf3fd0acf8f3c0c64c6a4647d3 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/change_infer_path.py +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/change_infer_path.py @@ -1,61 +1,61 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# 3d_nested_unet_preprocess.py -import sys -import os -import time -import pdb -import argparse -from nnunet.inference import infer_path - - -def main(): - # pdb.set_trace() - parser = argparse.ArgumentParser() - parser.add_argument('-fp1', '--file_path1', help='INFERENCE_INPUT_FOLDER', required=True, default='/home/hyp/environment/input/') - parser.add_argument('-fp2', '--file_path2', help='INFERENCE_OUTPUT_FOLDER', required=True, default='/home/hyp/environment/output/') - parser.add_argument('-fp3', '--file_path3', help='INFERENCE_SHAPE_PATH', required=True, default='/home/hyp/environment/') - args = parser.parse_args() - python_file = infer_path.__file__ - fp1 = args.file_path1 - fp2 = args.file_path2 - fp3 = args.file_path3 - lines = [] - print('尝试读取:', python_file) - file = open(python_file, 'r', encoding='utf-8') - lines = file.readlines() - file.close() - print('尝试修改路径') - with open(python_file, 'w', encoding='utf-8') as f: - for line in lines: - if line.startswith('INFERENCE_INPUT_FOLDER'): - line = 'INFERENCE_INPUT_FOLDER = ' + '\'' + str(fp1) + '\'' + '\n' - if line.startswith('INFERENCE_OUTPUT_FOLDER'): - line = 'INFERENCE_OUTPUT_FOLDER = ' + '\'' + str(fp2) + '\'' + '\n' - if line.startswith('INFERENCE_SHAPE_PATH'): - line = 'INFERENCE_SHAPE_PATH = ' + '\'' + str(fp3) + '\'' + '\n' - f.write(line) - print('正在修改:', python_file) - print('INFERENCE_INPUT_FOLDER =', fp1) - print('INFERENCE_OUTPUT_FOLDER=', fp2) - print('INFERENCE_SHAPE_PATH =', fp3) - f.close() - print('修改完成') - - -if __name__ == "__main__": - main() - print('main end') - +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# 3d_nested_unet_preprocess.py +import sys +import os +import time +import pdb +import argparse +from nnunet.inference import infer_path + + +def main(): + # pdb.set_trace() + parser = argparse.ArgumentParser() + parser.add_argument('-fp1', '--file_path1', help='INFERENCE_INPUT_FOLDER', required=True, default='/home/hyp/environment/input/') + parser.add_argument('-fp2', '--file_path2', help='INFERENCE_OUTPUT_FOLDER', required=True, default='/home/hyp/environment/output/') + parser.add_argument('-fp3', '--file_path3', help='INFERENCE_SHAPE_PATH', required=True, default='/home/hyp/environment/') + args = parser.parse_args() + python_file = infer_path.__file__ + fp1 = args.file_path1 + fp2 = args.file_path2 + fp3 = args.file_path3 + lines = [] + print('尝试读取:', python_file) + file = open(python_file, 'r', encoding='utf-8') + lines = file.readlines() + file.close() + print('尝试修改路径') + with open(python_file, 'w', encoding='utf-8') as f: + for line in lines: + if line.startswith('INFERENCE_INPUT_FOLDER'): + line = 'INFERENCE_INPUT_FOLDER = ' + '\'' + str(fp1) + '\'' + '\n' + if line.startswith('INFERENCE_OUTPUT_FOLDER'): + line = 'INFERENCE_OUTPUT_FOLDER = ' + '\'' + str(fp2) + '\'' + '\n' + if line.startswith('INFERENCE_SHAPE_PATH'): + line = 'INFERENCE_SHAPE_PATH = ' + '\'' + str(fp3) + '\'' + '\n' + f.write(line) + print('正在修改:', python_file) + print('INFERENCE_INPUT_FOLDER =', fp1) + print('INFERENCE_OUTPUT_FOLDER=', fp2) + print('INFERENCE_SHAPE_PATH =', fp3) + f.close() + print('修改完成') + + +if __name__ == "__main__": + main() + print('main end') + diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/gen_dataset_info.py index 68f13f27c3ec643068e0a5662b610cc59325747b..54138faa7dc6f6b5ad40b7e0895c9271987edc72 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/gen_dataset_info.py @@ -1,80 +1,80 @@ -""" - Copyright 2020 Huawei Technologies Co., Ltd - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - Typical usage example: -""" -import os -import sys -from glob import glob -import pdb - - -def get_bin_info(file_path, info_name, shape, split4=True): - """ - @description: get given bin information - @param file_path bin file path - @param info_name given information name - @param shape image shape - @return - """ - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, shape[0], shape[1]]) - file.write(content) - file.write('\n') - print('共计.bin文件个数:', len(bin_images)) - print('info已写入:', os.path.abspath(info_name)) - if split4: # 是否切割为4卡的info - sths = ['sth1.info', 'sth2.info', 'sth3.info', 'sth4.info'] - length = len(bin_images) - step = length // 4 - b1 = bin_images[0: step] - b2 = bin_images[step: 2*step] - b3 = bin_images[2*step: 3*step] - b4 = bin_images[3*step:] - with open(sths[0], 'w') as file: - for index, img in enumerate(b1): - content = ' '.join([str(index), img, shape[0], shape[1]]) - file.write(content) - file.write('\n') - with open(sths[1], 'w') as file: - for index, img in enumerate(b2): - content = ' '.join([str(index), img, shape[0], shape[1]]) - file.write(content) - file.write('\n') - with open(sths[2], 'w') as file: - for index, img in enumerate(b3): - content = ' '.join([str(index), img, shape[0], shape[1]]) - file.write(content) - file.write('\n') - with open(sths[3], 'w') as file: - for index, img in enumerate(b4): - content = ' '.join([str(index), img, shape[0], shape[1]]) - file.write(content) - file.write('\n') - print('成功切分为四个子集', sths) - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - shape1 = sys.argv[4] - shape2 = sys.argv[5] - shape = [shape1, shape2] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, shape) +""" + Copyright 2020 Huawei Technologies Co., Ltd + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + Typical usage example: +""" +import os +import sys +from glob import glob +import pdb + + +def get_bin_info(file_path, info_name, shape, split4=True): + """ + @description: get given bin information + @param file_path bin file path + @param info_name given information name + @param shape image shape + @return + """ + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, shape[0], shape[1]]) + file.write(content) + file.write('\n') + print('共计.bin文件个数:', len(bin_images)) + print('info已写入:', os.path.abspath(info_name)) + if split4: # 是否切割为4卡的info + sths = ['sth1.info', 'sth2.info', 'sth3.info', 'sth4.info'] + length = len(bin_images) + step = length // 4 + b1 = bin_images[0: step] + b2 = bin_images[step: 2*step] + b3 = bin_images[2*step: 3*step] + b4 = bin_images[3*step:] + with open(sths[0], 'w') as file: + for index, img in enumerate(b1): + content = ' '.join([str(index), img, shape[0], shape[1]]) + file.write(content) + file.write('\n') + with open(sths[1], 'w') as file: + for index, img in enumerate(b2): + content = ' '.join([str(index), img, shape[0], shape[1]]) + file.write(content) + file.write('\n') + with open(sths[2], 'w') as file: + for index, img in enumerate(b3): + content = ' '.join([str(index), img, shape[0], shape[1]]) + file.write(content) + file.write('\n') + with open(sths[3], 'w') as file: + for index, img in enumerate(b4): + content = ' '.join([str(index), img, shape[0], shape[1]]) + file.write(content) + file.write('\n') + print('成功切分为四个子集', sths) + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + shape1 = sys.argv[4] + shape2 = sys.argv[5] + shape = [shape1, shape2] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, shape) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/new.patch b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/new.patch index 5307bdd0c26a0cd03c7b0b06b901239d4c101087..bc6f3df5cc44d8c752f24f8fe2d7312e31478764 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/new.patch +++ b/ACL_PyTorch/contrib/cv/segmentation/3D_Nested_Unet/new.patch @@ -4,206 +4,206 @@ index 0000000..2a17e8a --- /dev/null +++ b/pytorch/nnunet/evaluation/model_selection/figure_out_want_to_submit2.py @@ -0,0 +1,200 @@ -+# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany -+# -+# Licensed under the Apache License, Version 2.0 (the "License"); -+# you may not use this file except in compliance with the License. -+# You may obtain a copy of the License at -+# -+# http://www.apache.org/licenses/LICENSE-2.0 -+# -+# Unless required by applicable law or agreed to in writing, software -+# distributed under the License is distributed on an "AS IS" BASIS, -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+# See the License for the specific language governing permissions and -+# limitations under the License. -+ -+ -+from itertools import combinations -+import nnunet -+from batchgenerators.utilities.file_and_folder_operations import * -+from nnunet.evaluation.add_mean_dice_to_json import foreground_mean -+from nnunet.evaluation.model_selection.ensemble import ensemble -+from nnunet.paths import network_training_output_dir -+import numpy as np -+from subprocess import call -+from nnunet.postprocessing.consolidate_postprocessing import consolidate_folds -+from nnunet.utilities.folder_names import get_output_folder_name -+from nnunet.paths import default_cascade_trainer, default_trainer, default_plans_identifier -+ -+ -+def find_task_name(folder, task_id): -+ candidates = subdirs(folder, prefix="Task%03.0d_" % task_id, join=False) -+ assert len(candidates) > 0, "no candidate for Task id %d found in folder %s" % (task_id, folder) -+ assert len(candidates) == 1, "more than one candidate for Task id %d found in folder %s" % (task_id, folder) -+ return candidates[0] -+ -+ -+def get_mean_foreground_dice(json_file): -+ results = load_json(json_file) -+ return get_foreground_mean(results) -+ -+ -+def get_foreground_mean(results): -+ results_mean = results['results']['mean'] -+ dice_scores = [results_mean[i]['Dice'] for i in results_mean.keys() if i != "0" and i != 'mean'] -+ return np.mean(dice_scores) -+ -+ -+def main(): -+ import argparse -+ parser = argparse.ArgumentParser(usage="This is intended to identify the best model based on the five fold " -+ "cross-validation. Running this script requires all models to have been run " -+ "already. This script will summarize the results of the five folds of all " -+ "models in one json each for easy interpretability") -+ -+ parser.add_argument("-m", '--models', nargs="+", required=False, default=['3d_fullres']) -+ parser.add_argument("-t", '--task_ids', nargs="+", required=False, default='003') -+ -+ parser.add_argument("-tr", type=str, required=False, default=default_trainer, -+ help="nnUNetTrainer class. Default: %s" % default_trainer) -+ parser.add_argument("-ctr", type=str, required=False, default=default_cascade_trainer, -+ help="nnUNetTrainer class for cascade model. Default: %s" % default_cascade_trainer) -+ parser.add_argument("-pl", type=str, required=False, default=default_plans_identifier, -+ help="plans name, Default: %s" % default_plans_identifier) -+ parser.add_argument('-f', '--folds', nargs='+', default=(0, 1, 2, 3, 4), help="use this if you have non-standard folds") -+ parser.add_argument("--strict", required=False, default=True, action="store_true", -+ help="set this flag if you want this script to crash of one of the models is missing") -+ -+ args = parser.parse_args() -+ tasks = [int(i) for i in args.task_ids] -+ -+ models = args.models -+ tr = args.tr -+ trc = args.ctr -+ strict = args.strict -+ pl = args.pl -+ folds = tuple(int(i) for i in args.folds) -+ -+ validation_folder = "validation_raw" -+ -+ # this script now acts independently from the summary jsons. That was unnecessary -+ id_task_mapping = {} -+ # for each task, run ensembling using all combinations of two models -+ for t in tasks: -+ # first collect pure model performance (postprocessed) -+ results = {} -+ all_results = {} -+ valid_models = [] -+ for m in models: -+ try: -+ if m == "3d_cascade_fullres": -+ trainer = trc -+ else: -+ trainer = tr -+ -+ if t not in id_task_mapping.keys(): -+ task_name = find_task_name(get_output_folder_name(m), t) -+ id_task_mapping[t] = task_name -+ -+ output_folder = get_output_folder_name(m, id_task_mapping[t], trainer, pl) -+ assert isdir(output_folder), "Output folder for model %s is missing, expected: %s" % (m, output_folder) -+ -+ # we need a postprocessing_json for inference, so that must be present -+ postprocessing_json = join(output_folder, "postprocessing.json") -+ # we need cv_niftis_postprocessed to know the single model performance -+ cv_niftis_folder = join(output_folder, "cv_niftis_raw") -+ if not isfile(postprocessing_json) or not isdir(cv_niftis_folder): -+ print("running missing postprocessing for %s and model %s" % (id_task_mapping[t], m)) -+ consolidate_folds(output_folder, folds=folds) -+ assert isfile(postprocessing_json), "Postprocessing json missing, expected: %s" % postprocessing_json -+ assert isdir(cv_niftis_folder), "Folder with niftis from CV missing, expected: %s" % cv_niftis_folder -+ -+ # obtain mean foreground dice -+ summary_file = join(cv_niftis_folder, "summary.json") -+ results[m] = get_mean_foreground_dice(summary_file) -+ foreground_mean(summary_file) -+ all_results[m] = load_json(summary_file)['results']['mean'] -+ valid_models.append(m) -+ -+ except Exception as e: -+ if strict: -+ raise e -+ else: -+ print("WARNING!") -+ print(e) -+ -+ # now run ensembling and add ensembling to results -+ print("\nFound the following valid models:\n", valid_models) -+ if len(valid_models) > 1: -+ for m1, m2 in combinations(valid_models, 2): -+ -+ trainer_m1 = trc if m1 == "3d_cascade_fullres" else tr -+ trainer_m2 = trc if m2 == "3d_cascade_fullres" else tr -+ -+ ensemble_name = "ensemble_" + m1 + "__" + trainer_m1 + "__" + pl + "--" + m2 + "__" + trainer_m2 + "__" + pl -+ output_folder_base = join(network_training_output_dir, "ensembles", id_task_mapping[t], ensemble_name) -+ maybe_mkdir_p(output_folder_base) -+ -+ network1_folder = get_output_folder_name(m1, id_task_mapping[t], trainer_m1, pl) -+ network2_folder = get_output_folder_name(m2, id_task_mapping[t], trainer_m2, pl) -+ -+ print("ensembling", network1_folder, network2_folder) -+ ensemble(network1_folder, network2_folder, output_folder_base, id_task_mapping[t], validation_folder, folds) -+ # ensembling will automatically do postprocessingget_foreground_mean -+ -+ # now get result of ensemble -+ results[ensemble_name] = get_mean_foreground_dice(join(output_folder_base, "ensembled_raw", "summary.json")) -+ summary_file = join(output_folder_base, "ensembled_raw", "summary.json") -+ foreground_mean(summary_file) -+ all_results[ensemble_name] = load_json(summary_file)['results']['mean'] -+ -+ # now print all mean foreground dice and highlight the best -+ foreground_dices = list(results.values()) -+ best = np.max(foreground_dices) -+ for k, v in results.items(): -+ print(k, v) -+ -+ predict_str = "" -+ best_model = None -+ for k, v in results.items(): -+ if v == best: -+ print("%s submit model %s" % (id_task_mapping[t], k), v) -+ best_model = k -+ print("\nHere is how you should predict test cases. Run in sequential order and replace all input and output folder names with your personalized ones\n") -+ if k.startswith("ensemble"): -+ tmp = k[len("ensemble_"):] -+ model1, model2 = tmp.split("--") -+ m1, t1, pl1 = model1.split("__") -+ m2, t2, pl2 = model2.split("__") -+ predict_str += "nnUNet_predict -i FOLDER_WITH_TEST_CASES -o OUTPUT_FOLDER_MODEL1 -tr " + tr + " -ctr " + trc + " -m " + m1 + " -p " + pl + " -t " + \ -+ id_task_mapping[t] + "\n" -+ predict_str += "nnUNet_predict -i FOLDER_WITH_TEST_CASES -o OUTPUT_FOLDER_MODEL2 -tr " + tr + " -ctr " + trc + " -m " + m2 + " -p " + pl + " -t " + \ -+ id_task_mapping[t] + "\n" -+ -+ predict_str += "nnUNet_ensemble -f OUTPUT_FOLDER_MODEL1 OUTPUT_FOLDER_MODEL2 -o OUTPUT_FOLDER -pp " + join(network_training_output_dir, "ensembles", id_task_mapping[t], k, "postprocessing.json") + "\n" -+ else: -+ predict_str += "nnUNet_predict -i FOLDER_WITH_TEST_CASES -o OUTPUT_FOLDER_MODEL1 -tr " + tr + " -ctr " + trc + " -m " + k + " -p " + pl + " -t " + \ -+ id_task_mapping[t] + "\n" -+ print(predict_str) -+ -+ summary_folder = join(network_training_output_dir, "ensembles", id_task_mapping[t]) -+ maybe_mkdir_p(summary_folder) -+ with open(join(summary_folder, "prediction_commands.txt"), 'w') as f: -+ f.write(predict_str) -+ -+ num_classes = len([i for i in all_results[best_model].keys() if i != 'mean']) -+ with open(join(summary_folder, "summary.csv"), 'w') as f: -+ f.write("model") -+ for c in range(1, num_classes): -+ f.write(",class%d" % c) -+ f.write(",average") -+ f.write("\n") -+ for m in all_results.keys(): -+ f.write(m) -+ for c in range(1, num_classes): -+ f.write(",%01.4f" % all_results[m][str(c)]["Dice"]) -+ f.write(",%01.4f" % all_results[m]['mean']["Dice"]) -+ f.write("\n") -+ -+ -+if __name__ == "__main__": -+ main() ++# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++ ++from itertools import combinations ++import nnunet ++from batchgenerators.utilities.file_and_folder_operations import * ++from nnunet.evaluation.add_mean_dice_to_json import foreground_mean ++from nnunet.evaluation.model_selection.ensemble import ensemble ++from nnunet.paths import network_training_output_dir ++import numpy as np ++from subprocess import call ++from nnunet.postprocessing.consolidate_postprocessing import consolidate_folds ++from nnunet.utilities.folder_names import get_output_folder_name ++from nnunet.paths import default_cascade_trainer, default_trainer, default_plans_identifier ++ ++ ++def find_task_name(folder, task_id): ++ candidates = subdirs(folder, prefix="Task%03.0d_" % task_id, join=False) ++ assert len(candidates) > 0, "no candidate for Task id %d found in folder %s" % (task_id, folder) ++ assert len(candidates) == 1, "more than one candidate for Task id %d found in folder %s" % (task_id, folder) ++ return candidates[0] ++ ++ ++def get_mean_foreground_dice(json_file): ++ results = load_json(json_file) ++ return get_foreground_mean(results) ++ ++ ++def get_foreground_mean(results): ++ results_mean = results['results']['mean'] ++ dice_scores = [results_mean[i]['Dice'] for i in results_mean.keys() if i != "0" and i != 'mean'] ++ return np.mean(dice_scores) ++ ++ ++def main(): ++ import argparse ++ parser = argparse.ArgumentParser(usage="This is intended to identify the best model based on the five fold " ++ "cross-validation. Running this script requires all models to have been run " ++ "already. This script will summarize the results of the five folds of all " ++ "models in one json each for easy interpretability") ++ ++ parser.add_argument("-m", '--models', nargs="+", required=False, default=['3d_fullres']) ++ parser.add_argument("-t", '--task_ids', nargs="+", required=False, default='003') ++ ++ parser.add_argument("-tr", type=str, required=False, default=default_trainer, ++ help="nnUNetTrainer class. Default: %s" % default_trainer) ++ parser.add_argument("-ctr", type=str, required=False, default=default_cascade_trainer, ++ help="nnUNetTrainer class for cascade model. Default: %s" % default_cascade_trainer) ++ parser.add_argument("-pl", type=str, required=False, default=default_plans_identifier, ++ help="plans name, Default: %s" % default_plans_identifier) ++ parser.add_argument('-f', '--folds', nargs='+', default=(0, 1, 2, 3, 4), help="use this if you have non-standard folds") ++ parser.add_argument("--strict", required=False, default=True, action="store_true", ++ help="set this flag if you want this script to crash of one of the models is missing") ++ ++ args = parser.parse_args() ++ tasks = [int(i) for i in args.task_ids] ++ ++ models = args.models ++ tr = args.tr ++ trc = args.ctr ++ strict = args.strict ++ pl = args.pl ++ folds = tuple(int(i) for i in args.folds) ++ ++ validation_folder = "validation_raw" ++ ++ # this script now acts independently from the summary jsons. That was unnecessary ++ id_task_mapping = {} ++ # for each task, run ensembling using all combinations of two models ++ for t in tasks: ++ # first collect pure model performance (postprocessed) ++ results = {} ++ all_results = {} ++ valid_models = [] ++ for m in models: ++ try: ++ if m == "3d_cascade_fullres": ++ trainer = trc ++ else: ++ trainer = tr ++ ++ if t not in id_task_mapping.keys(): ++ task_name = find_task_name(get_output_folder_name(m), t) ++ id_task_mapping[t] = task_name ++ ++ output_folder = get_output_folder_name(m, id_task_mapping[t], trainer, pl) ++ assert isdir(output_folder), "Output folder for model %s is missing, expected: %s" % (m, output_folder) ++ ++ # we need a postprocessing_json for inference, so that must be present ++ postprocessing_json = join(output_folder, "postprocessing.json") ++ # we need cv_niftis_postprocessed to know the single model performance ++ cv_niftis_folder = join(output_folder, "cv_niftis_raw") ++ if not isfile(postprocessing_json) or not isdir(cv_niftis_folder): ++ print("running missing postprocessing for %s and model %s" % (id_task_mapping[t], m)) ++ consolidate_folds(output_folder, folds=folds) ++ assert isfile(postprocessing_json), "Postprocessing json missing, expected: %s" % postprocessing_json ++ assert isdir(cv_niftis_folder), "Folder with niftis from CV missing, expected: %s" % cv_niftis_folder ++ ++ # obtain mean foreground dice ++ summary_file = join(cv_niftis_folder, "summary.json") ++ results[m] = get_mean_foreground_dice(summary_file) ++ foreground_mean(summary_file) ++ all_results[m] = load_json(summary_file)['results']['mean'] ++ valid_models.append(m) ++ ++ except Exception as e: ++ if strict: ++ raise e ++ else: ++ print("WARNING!") ++ print(e) ++ ++ # now run ensembling and add ensembling to results ++ print("\nFound the following valid models:\n", valid_models) ++ if len(valid_models) > 1: ++ for m1, m2 in combinations(valid_models, 2): ++ ++ trainer_m1 = trc if m1 == "3d_cascade_fullres" else tr ++ trainer_m2 = trc if m2 == "3d_cascade_fullres" else tr ++ ++ ensemble_name = "ensemble_" + m1 + "__" + trainer_m1 + "__" + pl + "--" + m2 + "__" + trainer_m2 + "__" + pl ++ output_folder_base = join(network_training_output_dir, "ensembles", id_task_mapping[t], ensemble_name) ++ maybe_mkdir_p(output_folder_base) ++ ++ network1_folder = get_output_folder_name(m1, id_task_mapping[t], trainer_m1, pl) ++ network2_folder = get_output_folder_name(m2, id_task_mapping[t], trainer_m2, pl) ++ ++ print("ensembling", network1_folder, network2_folder) ++ ensemble(network1_folder, network2_folder, output_folder_base, id_task_mapping[t], validation_folder, folds) ++ # ensembling will automatically do postprocessingget_foreground_mean ++ ++ # now get result of ensemble ++ results[ensemble_name] = get_mean_foreground_dice(join(output_folder_base, "ensembled_raw", "summary.json")) ++ summary_file = join(output_folder_base, "ensembled_raw", "summary.json") ++ foreground_mean(summary_file) ++ all_results[ensemble_name] = load_json(summary_file)['results']['mean'] ++ ++ # now print all mean foreground dice and highlight the best ++ foreground_dices = list(results.values()) ++ best = np.max(foreground_dices) ++ for k, v in results.items(): ++ print(k, v) ++ ++ predict_str = "" ++ best_model = None ++ for k, v in results.items(): ++ if v == best: ++ print("%s submit model %s" % (id_task_mapping[t], k), v) ++ best_model = k ++ print("\nHere is how you should predict test cases. Run in sequential order and replace all input and output folder names with your personalized ones\n") ++ if k.startswith("ensemble"): ++ tmp = k[len("ensemble_"):] ++ model1, model2 = tmp.split("--") ++ m1, t1, pl1 = model1.split("__") ++ m2, t2, pl2 = model2.split("__") ++ predict_str += "nnUNet_predict -i FOLDER_WITH_TEST_CASES -o OUTPUT_FOLDER_MODEL1 -tr " + tr + " -ctr " + trc + " -m " + m1 + " -p " + pl + " -t " + \ ++ id_task_mapping[t] + "\n" ++ predict_str += "nnUNet_predict -i FOLDER_WITH_TEST_CASES -o OUTPUT_FOLDER_MODEL2 -tr " + tr + " -ctr " + trc + " -m " + m2 + " -p " + pl + " -t " + \ ++ id_task_mapping[t] + "\n" ++ ++ predict_str += "nnUNet_ensemble -f OUTPUT_FOLDER_MODEL1 OUTPUT_FOLDER_MODEL2 -o OUTPUT_FOLDER -pp " + join(network_training_output_dir, "ensembles", id_task_mapping[t], k, "postprocessing.json") + "\n" ++ else: ++ predict_str += "nnUNet_predict -i FOLDER_WITH_TEST_CASES -o OUTPUT_FOLDER_MODEL1 -tr " + tr + " -ctr " + trc + " -m " + k + " -p " + pl + " -t " + \ ++ id_task_mapping[t] + "\n" ++ print(predict_str) ++ ++ summary_folder = join(network_training_output_dir, "ensembles", id_task_mapping[t]) ++ maybe_mkdir_p(summary_folder) ++ with open(join(summary_folder, "prediction_commands.txt"), 'w') as f: ++ f.write(predict_str) ++ ++ num_classes = len([i for i in all_results[best_model].keys() if i != 'mean']) ++ with open(join(summary_folder, "summary.csv"), 'w') as f: ++ f.write("model") ++ for c in range(1, num_classes): ++ f.write(",class%d" % c) ++ f.write(",average") ++ f.write("\n") ++ for m in all_results.keys(): ++ f.write(m) ++ for c in range(1, num_classes): ++ f.write(",%01.4f" % all_results[m][str(c)]["Dice"]) ++ f.write(",%01.4f" % all_results[m]['mean']["Dice"]) ++ f.write("\n") ++ ++ ++if __name__ == "__main__": ++ main() diff --git a/pytorch/nnunet/experiment_planning/nnUNet_convert_decathlon_task.py b/pytorch/nnunet/experiment_planning/nnUNet_convert_decathlon_task.py index cf5285a..a0384f0 100644 --- a/pytorch/nnunet/experiment_planning/nnUNet_convert_decathlon_task.py @@ -269,41 +269,41 @@ index 0000000..5113f93 --- /dev/null +++ b/pytorch/nnunet/hyp_getnpz.py @@ -0,0 +1,36 @@ -+import numpy as np -+import os -+import nibabel as nib -+import pickle -+ -+ -+raw_data = '/data/yupeng/environment_variables/nnUNet_raw_data_base/nnUNet_raw_data/Task003_Liver/imagesTr/liver_0_0000.nii.gz' -+crop_data = '/data/yupeng/environment_variables/nnUNet_raw_data_base/nnUNet_cropped_data/Task003_Liver/liver_0.npz' -+crop_data = '/data/yupeng/environment_variables/nnUNet_preprocessed/Task003_Liver/nnUNetData_plans_v2.1_stage0/liver_0.npz' -+pickle_data = '/data/yupeng/environment_variables/nnUNet_preprocessed/Task003_Liver/nnUNetPlansv2.1_plans_3D.pkl' -+ -+print('start') -+ -+p_data = pickle.load(open(pickle_data, 'rb')) -+ -+ -+ -+c_data = np.load(crop_data) -+print(c_data.files) -+ -+r_data = nib.load(raw_data).get_data() -+r_data = r_data / np.amax(r_data) -+ -+min2 = min(r_data) -+ -+for i in range(512): -+ for j in range(512): -+ for k in range(75): -+ data1 = r_data[i][j][k] -+ data2 = c_data.f.data[0][k][i][j] -+ if data1 != data2: -+ print("wrong") -+ break -+ -+ ++import numpy as np ++import os ++import nibabel as nib ++import pickle ++ ++ ++raw_data = '/data/yupeng/environment_variables/nnUNet_raw_data_base/nnUNet_raw_data/Task003_Liver/imagesTr/liver_0_0000.nii.gz' ++crop_data = '/data/yupeng/environment_variables/nnUNet_raw_data_base/nnUNet_cropped_data/Task003_Liver/liver_0.npz' ++crop_data = '/data/yupeng/environment_variables/nnUNet_preprocessed/Task003_Liver/nnUNetData_plans_v2.1_stage0/liver_0.npz' ++pickle_data = '/data/yupeng/environment_variables/nnUNet_preprocessed/Task003_Liver/nnUNetPlansv2.1_plans_3D.pkl' ++ ++print('start') ++ ++p_data = pickle.load(open(pickle_data, 'rb')) ++ ++ ++ ++c_data = np.load(crop_data) ++print(c_data.files) ++ ++r_data = nib.load(raw_data).get_data() ++r_data = r_data / np.amax(r_data) ++ ++min2 = min(r_data) ++ ++for i in range(512): ++ for j in range(512): ++ for k in range(75): ++ data1 = r_data[i][j][k] ++ data2 = c_data.f.data[0][k][i][j] ++ if data1 != data2: ++ print("wrong") ++ break ++ ++ +print('end') \ No newline at end of file diff --git a/pytorch/nnunet/inference/copy_val_to_test.py b/pytorch/nnunet/inference/copy_val_to_test.py @@ -312,183 +312,183 @@ index 0000000..405345b --- /dev/null +++ b/pytorch/nnunet/inference/copy_val_to_test.py @@ -0,0 +1,19 @@ -+import os -+import shutil -+ -+# fold = 0 -+val_folder = '/root/heyupeng/environment/Task03_Liver/imagesTr/' -+test_folder = '/root/heyupeng/environment/nnUNet_raw_data_base/nnUNet_raw_data/Task003_Liver/imagesTs/' -+val_list = [101, 11, 112, 115, 12, 120, 128, 17, 19, 24, 25, 27, 3, 38, 40, 41, 42, 44, 5, 51, 52, 58, 64, 70, 75, 77, -+ 82] -+print('val_list:', val_list) -+for val in val_list: -+ source_file = 'liver_' + str(val) + '.nii.gz' -+ source_path = os.path.join(val_folder, source_file) -+ target_file = 'liver_' + str(val) + '_0000.nii.gz' -+ target_path = os.path.join(test_folder, target_file) -+ print('copy: ', source_path, '->', target_path) -+ shutil.copyfile(source_path, target_path) -+print('done') -+ -+ ++import os ++import shutil ++ ++# fold = 0 ++val_folder = '/root/heyupeng/environment/Task03_Liver/imagesTr/' ++test_folder = '/root/heyupeng/environment/nnUNet_raw_data_base/nnUNet_raw_data/Task003_Liver/imagesTs/' ++val_list = [101, 11, 112, 115, 12, 120, 128, 17, 19, 24, 25, 27, 3, 38, 40, 41, 42, 44, 5, 51, 52, 58, 64, 70, 75, 77, ++ 82] ++print('val_list:', val_list) ++for val in val_list: ++ source_file = 'liver_' + str(val) + '.nii.gz' ++ source_path = os.path.join(val_folder, source_file) ++ target_file = 'liver_' + str(val) + '_0000.nii.gz' ++ target_path = os.path.join(test_folder, target_file) ++ print('copy: ', source_path, '->', target_path) ++ shutil.copyfile(source_path, target_path) ++print('done') ++ ++ diff --git a/pytorch/nnunet/inference/create_testset.py b/pytorch/nnunet/inference/create_testset.py new file mode 100644 index 0000000..cd13c1e --- /dev/null +++ b/pytorch/nnunet/inference/create_testset.py @@ -0,0 +1,28 @@ -+import os -+import pdb -+import sys -+import shutil -+ -+ -+def main(input_path): -+ if input_path is None: -+ raise Exception('Parameter need to be filled in: input_path') -+ env_dist = os.environ -+ p1 = env_dist.get('nnUNet_raw_data_base') -+ val_list = [101, 11, 112, 115, 12, 120, 128, 17, 19, 24, 25, 27, 3, 38, 40, 41, 42, 44, 5, 51, 52, 58, 64, 70, 75, -+ 77, 82] # 数据集的验证集部分 -+ p2 = 'nnUNet_raw_data/Task003_Liver/imagesTr/' -+ target_path = os.path.join(p1, p2) -+ for v in val_list: -+ file_name = 'liver_' + str(v) + '_0000.nii.gz' -+ file_path = os.path.join(target_path, file_name) -+ # pdb.set_trace() -+ print('copy file:[', file_path, '] to folder:', input_path) -+ shutil.copy(file_path, input_path) -+ print('done') -+ -+ -+ -+if __name__ == "__main__": -+ input_path = sys.argv[1] -+ main(input_path) ++import os ++import pdb ++import sys ++import shutil ++ ++ ++def main(input_path): ++ if input_path is None: ++ raise Exception('Parameter need to be filled in: input_path') ++ env_dist = os.environ ++ p1 = env_dist.get('nnUNet_raw_data_base') ++ val_list = [101, 11, 112, 115, 12, 120, 128, 17, 19, 24, 25, 27, 3, 38, 40, 41, 42, 44, 5, 51, 52, 58, 64, 70, 75, ++ 77, 82] # 数据集的验证集部分 ++ p2 = 'nnUNet_raw_data/Task003_Liver/imagesTr/' ++ target_path = os.path.join(p1, p2) ++ for v in val_list: ++ file_name = 'liver_' + str(v) + '_0000.nii.gz' ++ file_path = os.path.join(target_path, file_name) ++ # pdb.set_trace() ++ print('copy file:[', file_path, '] to folder:', input_path) ++ shutil.copy(file_path, input_path) ++ print('done') ++ ++ ++ ++if __name__ == "__main__": ++ input_path = sys.argv[1] ++ main(input_path) diff --git a/pytorch/nnunet/inference/delete_other_data.py b/pytorch/nnunet/inference/delete_other_data.py new file mode 100644 index 0000000..b58367f --- /dev/null +++ b/pytorch/nnunet/inference/delete_other_data.py @@ -0,0 +1,30 @@ -+import os -+import pdb -+ -+ -+def listdir(path, list_name): -+ for file in os.listdir(path): -+ file_path = os.path.join(path, file) -+ if os.path.isdir(file_path): -+ listdir(file_path, list_name) -+ elif os.path.splitext(file_path)[1] == '.gz': -+ list_name.append(file_path) -+ return list_name -+ -+val_list = [101, 11, 112, 115, 12, 120, 128, 17, 19, 24, 25, 27, 3, 38, 40, 41, 42, 44, 5, 51, 52, 58, 64, 70, 75, 77, -+ 82] -+target_folder = ['imagesTr', 'labelsTr', 'imagesTs'] -+for i in range(len(target_folder)): -+ t = target_folder[i] -+ if i == 2: -+ val_list = [132] -+ p = os.path.join('./Task03_Liver/', t) -+ files = [] -+ files = listdir(p, files) -+ files = set(files) -+ for e in val_list: -+ str_e = './Task03_Liver/' + t + '/liver_' + str(e) + '.nii.gz' -+ files.remove(str_e) -+ for f in files: -+ os.remove(f) -+print('end') ++import os ++import pdb ++ ++ ++def listdir(path, list_name): ++ for file in os.listdir(path): ++ file_path = os.path.join(path, file) ++ if os.path.isdir(file_path): ++ listdir(file_path, list_name) ++ elif os.path.splitext(file_path)[1] == '.gz': ++ list_name.append(file_path) ++ return list_name ++ ++val_list = [101, 11, 112, 115, 12, 120, 128, 17, 19, 24, 25, 27, 3, 38, 40, 41, 42, 44, 5, 51, 52, 58, 64, 70, 75, 77, ++ 82] ++target_folder = ['imagesTr', 'labelsTr', 'imagesTs'] ++for i in range(len(target_folder)): ++ t = target_folder[i] ++ if i == 2: ++ val_list = [132] ++ p = os.path.join('./Task03_Liver/', t) ++ files = [] ++ files = listdir(p, files) ++ files = set(files) ++ for e in val_list: ++ str_e = './Task03_Liver/' + t + '/liver_' + str(e) + '.nii.gz' ++ files.remove(str_e) ++ for f in files: ++ os.remove(f) ++print('end') diff --git a/pytorch/nnunet/inference/gen_dataset_info.py b/pytorch/nnunet/inference/gen_dataset_info.py new file mode 100644 index 0000000..d1cb265 --- /dev/null +++ b/pytorch/nnunet/inference/gen_dataset_info.py @@ -0,0 +1,83 @@ -+""" -+ Copyright 2020 Huawei Technologies Co., Ltd -+ -+ Licensed under the Apache License, Version 2.0 (the "License"); -+ you may not use this file except in compliance with the License. -+ You may obtain a copy of the License at -+ -+ http://www.apache.org/licenses/LICENSE-2.0 -+ -+ Unless required by applicable law or agreed to in writing, software -+ distributed under the License is distributed on an "AS IS" BASIS, -+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+ See the License for the specific language governing permissions and -+ limitations under the License. -+ Typical usage example: -+""" -+import os -+import sys -+from glob import glob -+import pdb -+ -+ -+def get_bin_info(file_path, info_name, shape, split4=True): -+ """ -+ @description: get given bin information -+ @param file_path bin file path -+ @param info_name given information name -+ @param shape image shape -+ @return -+ """ -+ bin_images = glob(os.path.join(file_path, '*.bin')) -+ with open(info_name, 'w') as file: -+ for index, img in enumerate(bin_images): -+ content = ' '.join([str(index), img, shape[0], shape[1]]) -+ file.write(content) -+ file.write('\n') -+ print('info已写入:', info_name) -+ if split4: # 是否切割为4卡的info -+ sths = ['sth1.info', 'sth2.info', 'sth3.info', 'sth4.info'] -+ for i in range(len(sths)): -+ s = sths[i] -+ s = os.path.join(info_name, '..', s) -+ sths[i] = s -+ length = len(bin_images) -+ step = length // 4 -+ b1 = bin_images[0: step] -+ b2 = bin_images[step: 2*step] -+ b3 = bin_images[2*step: 3*step] -+ b4 = bin_images[3*step:] -+ with open(sths[0], 'w') as file: -+ for index, img in enumerate(b1): -+ content = ' '.join([str(index), img, shape[0], shape[1]]) -+ file.write(content) -+ file.write('\n') -+ with open(sths[1], 'w') as file: -+ for index, img in enumerate(b2): -+ content = ' '.join([str(index), img, shape[0], shape[1]]) -+ file.write(content) -+ file.write('\n') -+ with open(sths[2], 'w') as file: -+ for index, img in enumerate(b3): -+ content = ' '.join([str(index), img, shape[0], shape[1]]) -+ file.write(content) -+ file.write('\n') -+ with open(sths[3], 'w') as file: -+ for index, img in enumerate(b4): -+ content = ' '.join([str(index), img, shape[0], shape[1]]) -+ file.write(content) -+ file.write('\n') -+ print('成功切分为四个子集', sths) -+ -+ -+if __name__ == '__main__': -+ file_type = sys.argv[1] -+ file_path = sys.argv[2] -+ info_name = sys.argv[3] -+ if file_type == 'bin': -+ shape1 = sys.argv[4] -+ shape2 = sys.argv[5] -+ shape = [shape1, shape2] -+ assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' -+ get_bin_info(file_path, info_name, shape) ++""" ++ Copyright 2020 Huawei Technologies Co., Ltd ++ ++ Licensed under the Apache License, Version 2.0 (the "License"); ++ you may not use this file except in compliance with the License. ++ You may obtain a copy of the License at ++ ++ http://www.apache.org/licenses/LICENSE-2.0 ++ ++ Unless required by applicable law or agreed to in writing, software ++ distributed under the License is distributed on an "AS IS" BASIS, ++ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ See the License for the specific language governing permissions and ++ limitations under the License. ++ Typical usage example: ++""" ++import os ++import sys ++from glob import glob ++import pdb ++ ++ ++def get_bin_info(file_path, info_name, shape, split4=True): ++ """ ++ @description: get given bin information ++ @param file_path bin file path ++ @param info_name given information name ++ @param shape image shape ++ @return ++ """ ++ bin_images = glob(os.path.join(file_path, '*.bin')) ++ with open(info_name, 'w') as file: ++ for index, img in enumerate(bin_images): ++ content = ' '.join([str(index), img, shape[0], shape[1]]) ++ file.write(content) ++ file.write('\n') ++ print('info已写入:', info_name) ++ if split4: # 是否切割为4卡的info ++ sths = ['sth1.info', 'sth2.info', 'sth3.info', 'sth4.info'] ++ for i in range(len(sths)): ++ s = sths[i] ++ s = os.path.join(info_name, '..', s) ++ sths[i] = s ++ length = len(bin_images) ++ step = length // 4 ++ b1 = bin_images[0: step] ++ b2 = bin_images[step: 2*step] ++ b3 = bin_images[2*step: 3*step] ++ b4 = bin_images[3*step:] ++ with open(sths[0], 'w') as file: ++ for index, img in enumerate(b1): ++ content = ' '.join([str(index), img, shape[0], shape[1]]) ++ file.write(content) ++ file.write('\n') ++ with open(sths[1], 'w') as file: ++ for index, img in enumerate(b2): ++ content = ' '.join([str(index), img, shape[0], shape[1]]) ++ file.write(content) ++ file.write('\n') ++ with open(sths[2], 'w') as file: ++ for index, img in enumerate(b3): ++ content = ' '.join([str(index), img, shape[0], shape[1]]) ++ file.write(content) ++ file.write('\n') ++ with open(sths[3], 'w') as file: ++ for index, img in enumerate(b4): ++ content = ' '.join([str(index), img, shape[0], shape[1]]) ++ file.write(content) ++ file.write('\n') ++ print('成功切分为四个子集', sths) ++ ++ ++if __name__ == '__main__': ++ file_type = sys.argv[1] ++ file_path = sys.argv[2] ++ info_name = sys.argv[3] ++ if file_type == 'bin': ++ shape1 = sys.argv[4] ++ shape2 = sys.argv[5] ++ shape = [shape1, shape2] ++ assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' ++ get_bin_info(file_path, info_name, shape) + print('end main') \ No newline at end of file diff --git a/pytorch/nnunet/inference/infer_path.py b/pytorch/nnunet/inference/infer_path.py @@ -583,1302 +583,1302 @@ index 0000000..263dbd2 --- /dev/null +++ b/pytorch/nnunet/inference/predict2.py @@ -0,0 +1,845 @@ -+# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany -+# -+# Licensed under the Apache License, Version 2.0 (the "License"); -+# you may not use this file except in compliance with the License. -+# You may obtain a copy of the License at -+# -+# http://www.apache.org/licenses/LICENSE-2.0 -+# -+# Unless required by applicable law or agreed to in writing, software -+# distributed under the License is distributed on an "AS IS" BASIS, -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+# See the License for the specific language governing permissions and -+# limitations under the License. -+ -+ -+import argparse -+from copy import deepcopy -+from typing import Tuple, Union, List -+ -+import numpy as np -+from batchgenerators.augmentations.utils import resize_segmentation -+from nnunet.inference.segmentation_export import save_segmentation_nifti_from_softmax, save_segmentation_nifti -+from batchgenerators.utilities.file_and_folder_operations import * -+from multiprocessing import Process, Queue -+import torch -+import SimpleITK as sitk -+import shutil -+from multiprocessing import Pool -+from nnunet.postprocessing.connected_components import load_remove_save, load_postprocessing -+from nnunet.training.model_restore import load_model_and_checkpoint_files -+from nnunet.training.network_training.nnUNetTrainer import nnUNetTrainer -+from nnunet.utilities.one_hot_encoding import to_one_hot -+from nnunet.utilities.to_torch import maybe_to_torch, to_cuda -+import pdb -+ -+ -+def preprocess_save_to_queue(preprocess_fn, q, list_of_lists, output_files, segs_from_prev_stage, classes, -+ transpose_forward): -+ # suppress output -+ # sys.stdout = open(os.devnull, 'w') -+ -+ errors_in = [] -+ for i, l in enumerate(list_of_lists): -+ try: -+ output_file = output_files[i] -+ print("preprocessing", output_file) -+ d, _, dct = preprocess_fn(l) -+ # print(output_file, dct) -+ if segs_from_prev_stage[i] is not None: -+ assert isfile(segs_from_prev_stage[i]) and segs_from_prev_stage[i].endswith( -+ ".nii.gz"), "segs_from_prev_stage" \ -+ " must point to a " \ -+ "segmentation file" -+ seg_prev = sitk.GetArrayFromImage(sitk.ReadImage(segs_from_prev_stage[i])) -+ # check to see if shapes match -+ img = sitk.GetArrayFromImage(sitk.ReadImage(l[0])) -+ assert all([i == j for i, j in zip(seg_prev.shape, img.shape)]), "image and segmentation from previous " \ -+ "stage don't have the same pixel array " \ -+ "shape! image: %s, seg_prev: %s" % \ -+ (l[0], segs_from_prev_stage[i]) -+ seg_prev = seg_prev.transpose(transpose_forward) -+ seg_reshaped = resize_segmentation(seg_prev, d.shape[1:], order=1, cval=0) -+ seg_reshaped = to_one_hot(seg_reshaped, classes) -+ d = np.vstack((d, seg_reshaped)).astype(np.float32) -+ """There is a problem with python process communication that prevents us from communicating obejcts -+ larger than 2 GB between processes (basically when the length of the pickle string that will be sent is -+ communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long -+ enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually -+ patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will -+ then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either -+ filename or np.ndarray and will handle this automatically""" -+ print(d.shape) -+ if np.prod(d.shape) > (2e9 / 4 * 0.85): # *0.85 just to be save, 4 because float32 is 4 bytes -+ print( -+ "This output is too large for python process-process communication. " -+ "Saving output temporarily to disk") -+ np.save(output_file[:-7] + ".npy", d) -+ d = output_file[:-7] + ".npy" -+ q.put((output_file, (d, dct))) -+ except KeyboardInterrupt: -+ raise KeyboardInterrupt -+ except Exception as e: -+ print("error in", l) -+ print(e) -+ q.put("end") -+ if len(errors_in) > 0: -+ print("There were some errors in the following cases:", errors_in) -+ print("These cases were ignored.") -+ else: -+ print("This worker has ended successfully, no errors to report") -+ # restore output -+ # sys.stdout = sys.__stdout__ -+ -+ -+def preprocess_multithreaded(trainer, list_of_lists, output_files, num_processes=2, segs_from_prev_stage=None): -+ if segs_from_prev_stage is None: -+ segs_from_prev_stage = [None] * len(list_of_lists) -+ -+ num_processes = min(len(list_of_lists), num_processes) -+ -+ classes = list(range(1, trainer.num_classes)) -+ assert isinstance(trainer, nnUNetTrainer) -+ q = Queue(1) -+ processes = [] -+ for i in range(num_processes): -+ pr = Process(target=preprocess_save_to_queue, args=(trainer.preprocess_patient, q, -+ list_of_lists[i::num_processes], -+ output_files[i::num_processes], -+ segs_from_prev_stage[i::num_processes], -+ classes, trainer.plans['transpose_forward'])) -+ pr.start() -+ processes.append(pr) -+ -+ try: -+ end_ctr = 0 -+ while end_ctr != num_processes: -+ item = q.get() -+ if item == "end": -+ end_ctr += 1 -+ continue -+ else: -+ yield item -+ -+ finally: -+ for p in processes: -+ if p.is_alive(): -+ p.terminate() # this should not happen but better safe than sorry right -+ p.join() -+ -+ q.close() -+ -+ -+def pth2onnx(model, output_file=r'/home/yupeng/HUAWEI/UNetPlusPlus/pytorch/nnunet/run/nnunetplusplus.onnx'): -+ # model = EfficientNet.from_pretrained('efficientnet-b0', weights_path=input_file) -+ # 调整模型为eval mode -+ model.eval() -+ # 输入节点名 -+ input_names = ["image"] -+ # 输出节点名 -+ output_names = ["class"] -+ dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} -+ dummy_input = torch.randn(1, 1, 128, 128, 128) -+ # dummy_input = to_cuda(dummy_input) -+ # verbose=True,支持打印onnx节点和对应的PyTorch代码行 -+ torch.onnx.export(model, dummy_input, output_file, input_names=input_names, dynamic_axes=dynamic_axes, -+ output_names=output_names, opset_version=11, verbose=True) -+ -+ -+def predict_cases(model, list_of_lists, output_filenames, folds, save_npz, num_threads_preprocessing, -+ num_threads_nifti_save, segs_from_prev_stage=None, do_tta=True, mixed_precision=True, overwrite_existing=False, -+ all_in_gpu=False, step_size=0.5, checkpoint_name="model_final_checkpoint", -+ segmentation_export_kwargs: dict = None, pre_mode=None, fp=None): -+ """ -+ :param segmentation_export_kwargs: -+ :param model: folder where the model is saved, must contain fold_x subfolders -+ :param list_of_lists: [[case0_0000.nii.gz, case0_0001.nii.gz], [case1_0000.nii.gz, case1_0001.nii.gz], ...] -+ :param output_filenames: [output_file_case0.nii.gz, output_file_case1.nii.gz, ...] -+ :param folds: default: (0, 1, 2, 3, 4) (but can also be 'all' or a subset of the five folds, for example use (0, ) -+ for using only fold_0 -+ :param save_npz: default: False -+ :param num_threads_preprocessing: -+ :param num_threads_nifti_save: -+ :param segs_from_prev_stage: -+ :param do_tta: default: True, can be set to False for a 8x speedup at the cost of a reduced segmentation quality -+ :param overwrite_existing: default: True -+ :param mixed_precision: if None then we take no action. If True/False we overwrite what the model has in its init -+ :return: -+ """ -+ assert len(list_of_lists) == len(output_filenames) -+ if segs_from_prev_stage is not None: assert len(segs_from_prev_stage) == len(output_filenames) -+ -+ pool = Pool(num_threads_nifti_save) -+ results = [] -+ -+ cleaned_output_files = [] -+ for o in output_filenames: -+ dr, f = os.path.split(o) -+ if len(dr) > 0: -+ maybe_mkdir_p(dr) -+ if not f.endswith(".nii.gz"): -+ f, _ = os.path.splitext(f) -+ f = f + ".nii.gz" -+ cleaned_output_files.append(join(dr, f)) -+ -+ if not overwrite_existing: -+ print("number of cases:", len(list_of_lists)) -+ not_done_idx = [i for i, j in enumerate(cleaned_output_files) if not isfile(j)] -+ -+ cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx] -+ list_of_lists = [list_of_lists[i] for i in not_done_idx] -+ if segs_from_prev_stage is not None: -+ segs_from_prev_stage = [segs_from_prev_stage[i] for i in not_done_idx] -+ -+ print("number of cases that still need to be predicted:", len(cleaned_output_files)) -+ -+ print("emptying cuda cache") -+ torch.cuda.empty_cache() -+ ''' -+ model='/data/yupeng/environment_variables/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1' -+ folds=None -+ mixed_precision=True -+ checkpoint_name='model_final_checkpoint' -+ trainer=class-nnUNetPlusPlusTrainerV2 -+ params=list 5 -> dict 6 -> epoch state_dict optimizer_state_dict lr_scheduler_state_dict plot_stuff amp_grad_scaler -+ ''' -+ print("loading parameters for folds,", folds) # 得到参数,实际还未加载进模型 -+ trainer, params = load_model_and_checkpoint_files(model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) -+ -+ if segmentation_export_kwargs is None: -+ if 'segmentation_export_params' in trainer.plans.keys(): -+ force_separate_z = trainer.plans['segmentation_export_params']['force_separate_z'] -+ interpolation_order = trainer.plans['segmentation_export_params']['interpolation_order'] -+ interpolation_order_z = trainer.plans['segmentation_export_params']['interpolation_order_z'] -+ else: # 走到这里 -+ force_separate_z = None -+ interpolation_order = 1 -+ interpolation_order_z = 0 -+ else: -+ force_separate_z = segmentation_export_kwargs['force_separate_z'] -+ interpolation_order = segmentation_export_kwargs['interpolation_order'] -+ interpolation_order_z = segmentation_export_kwargs['interpolation_order_z'] -+ -+ print("starting preprocessing generator") -+ preprocessing = preprocess_multithreaded(trainer, list_of_lists, cleaned_output_files, num_threads_preprocessing, -+ segs_from_prev_stage) -+ # unet++V2class, [['/data/yupeng/environment_variables/nnUNet_raw_data_base/nnUNet_raw_data/Task003_Liver/imagesTs/liver_132_0000.nii.gz']] -+ # ['/data/yupeng/environment_variables/output/liver_132.nii.gz'], 6, None -+ print("starting prediction...") -+ if int(pre_mode) == -1: -+ p = params[0] -+ trainer.load_checkpoint_ram(p, False) # nnUnetPlusPlusTrainerV2,实际函数在network_trainer里 -+ print('pth2onnx start') -+ pth2onnx(trainer.network, fp) -+ print('pth2onnx end') -+ print('onnx模型已经输出至:', fp) -+ import sys -+ sys.exit(0) -+ all_output_files = [] -+ for preprocessed in preprocessing: -+ output_filename, (d, dct) = preprocessed -+ print('output_filename, d, dct = ', output_filename, d, dct) -+ all_output_files.append(all_output_files) -+ if isinstance(d, str): -+ data = np.load(d) -+ os.remove(d) -+ d = data -+ print("predicting", output_filename) -+ softmax = [] -+ params = [params[0]] # 只求第一个模型的推理结果 -+ for p in params: -+ # trainer.load_checkpoint_ram(p, False) # nnUnetPlusPlusTrainerV2,实际函数在network_trainer里 -+ # output_filename = '/data/yupeng/environment_variables/output/liver_132.nii.gz' -+ ttttt = trainer.predict_preprocessed_data_return_seg_and_softmax(d, do_tta, trainer.data_aug_params[ -+ 'mirror_axes'], True, step_size=step_size, use_gaussian=True, all_in_gpu=all_in_gpu, -+ mixed_precision=mixed_precision, img_name=output_filename, pre_mode=pre_mode, fp=fp) # tuple(ndarray 489 500 500; 3 489 500 500) -+ softmax.append(ttttt[1][None]) # 扩充了1 3 489 500 500 -+ ''' -+ d= -+ do_tta= -+ step_size= -+ all_in_gpu= -+ mixed_precision= -+ softmax= -+ ''' -+ # softmax是list 5,每个元素是ndarray 1 3 489 500 500 -+ softmax = np.vstack(softmax) # 5 3 489 500 500 -+ softmax_mean = np.mean(softmax, 0) # 3 489 500 500 -+ -+ transpose_forward = trainer.plans.get('transpose_forward') # [0,1,2] -+ if transpose_forward is not None: -+ transpose_backward = trainer.plans.get('transpose_backward') -+ softmax_mean = softmax_mean.transpose([0] + [i + 1 for i in transpose_backward]) -+ -+ if save_npz: # False -+ npz_file = output_filename[:-7] + ".npz" -+ else: -+ npz_file = None -+ -+ if hasattr(trainer, 'regions_class_order'): # False -+ region_class_order = trainer.regions_class_order -+ else: -+ region_class_order = None -+ -+ """There is a problem with python process communication that prevents us from communicating obejcts -+ larger than 2 GB between processes (basically when the length of the pickle string that will be sent is -+ communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long -+ enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually -+ patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will -+ then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either -+ filename or np.ndarray and will handle this automatically""" -+ bytes_per_voxel = 4 -+ if all_in_gpu: -+ bytes_per_voxel = 2 # if all_in_gpu then the return value is half (float16) -+ if np.prod(softmax_mean.shape) > (2e9 / bytes_per_voxel * 0.85): # * 0.85 just to be save -+ print( -+ "This output is too large for python process-process communication. Saving output temporarily to disk") -+ np.save(output_filename[:-7] + ".npy", softmax_mean) -+ softmax_mean = output_filename[:-7] + ".npy" -+ -+ results.append(pool.starmap_async(save_segmentation_nifti_from_softmax, -+ ((softmax_mean, output_filename, dct, interpolation_order, region_class_order, -+ None, None, -+ npz_file, None, force_separate_z, interpolation_order_z),) -+ )) -+ -+ print("inference done. Now waiting for the segmentation export to finish...") -+ _ = [i.get() for i in results] -+ # now apply postprocessing -+ # first load the postprocessing properties if they are present. Else raise a well visible warning -+ results = [] -+ pp_file = join(model, "postprocessing.json") # '/data/yupeng/environment_variables/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/postprocessing.json' -+ if isfile(pp_file): -+ print("postprocessing...") -+ shutil.copy(pp_file, os.path.abspath(os.path.dirname(output_filenames[0]))) -+ # for_which_classes stores for which of the classes everything but the largest connected component needs to be -+ # removed -+ for_which_classes, min_valid_obj_size = load_postprocessing(pp_file) -+ results.append(pool.starmap_async(load_remove_save, -+ zip(output_filenames, output_filenames, -+ [for_which_classes] * len(output_filenames), -+ [min_valid_obj_size] * len(output_filenames)))) -+ _ = [i.get() for i in results] -+ else: -+ print("WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run " -+ "consolidate_folds in the output folder of the model first!\nThe folder you need to run this in is " -+ "%s" % model) -+ -+ pool.close() -+ pool.join() -+ -+def predict_cases_fast(model, list_of_lists, output_filenames, folds, num_threads_preprocessing, -+ num_threads_nifti_save, segs_from_prev_stage=None, do_tta=True, mixed_precision=True, -+ overwrite_existing=False, -+ all_in_gpu=False, step_size=0.5, checkpoint_name="model_final_checkpoint", -+ segmentation_export_kwargs: dict = None): -+ assert len(list_of_lists) == len(output_filenames) -+ if segs_from_prev_stage is not None: assert len(segs_from_prev_stage) == len(output_filenames) -+ -+ pool = Pool(num_threads_nifti_save) -+ results = [] -+ -+ cleaned_output_files = [] -+ for o in output_filenames: -+ dr, f = os.path.split(o) -+ if len(dr) > 0: -+ maybe_mkdir_p(dr) -+ if not f.endswith(".nii.gz"): -+ f, _ = os.path.splitext(f) -+ f = f + ".nii.gz" -+ cleaned_output_files.append(join(dr, f)) -+ -+ if not overwrite_existing: -+ print("number of cases:", len(list_of_lists)) -+ not_done_idx = [i for i, j in enumerate(cleaned_output_files) if not isfile(j)] -+ -+ cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx] -+ list_of_lists = [list_of_lists[i] for i in not_done_idx] -+ if segs_from_prev_stage is not None: -+ segs_from_prev_stage = [segs_from_prev_stage[i] for i in not_done_idx] -+ -+ print("number of cases that still need to be predicted:", len(cleaned_output_files)) -+ -+ print("emptying cuda cache") -+ torch.cuda.empty_cache() -+ -+ print("loading parameters for folds,", folds) -+ trainer, params = load_model_and_checkpoint_files(model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) -+ -+ if segmentation_export_kwargs is None: -+ if 'segmentation_export_params' in trainer.plans.keys(): -+ force_separate_z = trainer.plans['segmentation_export_params']['force_separate_z'] -+ interpolation_order = trainer.plans['segmentation_export_params']['interpolation_order'] -+ interpolation_order_z = trainer.plans['segmentation_export_params']['interpolation_order_z'] -+ else: -+ force_separate_z = None -+ interpolation_order = 1 -+ interpolation_order_z = 0 -+ else: -+ force_separate_z = segmentation_export_kwargs['force_separate_z'] -+ interpolation_order = segmentation_export_kwargs['interpolation_order'] -+ interpolation_order_z = segmentation_export_kwargs['interpolation_order_z'] -+ -+ print("starting preprocessing generator") -+ preprocessing = preprocess_multithreaded(trainer, list_of_lists, cleaned_output_files, num_threads_preprocessing, -+ segs_from_prev_stage) -+ -+ print("starting prediction...") -+ for preprocessed in preprocessing: -+ print("getting data from preprocessor") -+ output_filename, (d, dct) = preprocessed -+ print("got something") -+ if isinstance(d, str): -+ print("what I got is a string, so I need to load a file") -+ data = np.load(d) -+ os.remove(d) -+ d = data -+ -+ # preallocate the output arrays -+ # same dtype as the return value in predict_preprocessed_data_return_seg_and_softmax (saves time) -+ softmax_aggr = None # np.zeros((trainer.num_classes, *d.shape[1:]), dtype=np.float16) -+ all_seg_outputs = np.zeros((len(params), *d.shape[1:]), dtype=int) -+ print("predicting", output_filename) -+ -+ for i, p in enumerate(params): -+ trainer.load_checkpoint_ram(p, False) -+ -+ res = trainer.predict_preprocessed_data_return_seg_and_softmax(d, do_tta, -+ trainer.data_aug_params['mirror_axes'], True, -+ step_size=step_size, use_gaussian=True, -+ all_in_gpu=all_in_gpu, -+ mixed_precision=mixed_precision) -+ -+ if len(params) > 1: -+ # otherwise we dont need this and we can save ourselves the time it takes to copy that -+ print("aggregating softmax") -+ if softmax_aggr is None: -+ softmax_aggr = res[1] -+ else: -+ softmax_aggr += res[1] -+ all_seg_outputs[i] = res[0] -+ -+ print("obtaining segmentation map") -+ if len(params) > 1: -+ # we dont need to normalize the softmax by 1 / len(params) because this would not change the outcome of the argmax -+ seg = softmax_aggr.argmax(0) -+ else: -+ seg = all_seg_outputs[0] -+ -+ print("applying transpose_backward") -+ transpose_forward = trainer.plans.get('transpose_forward') -+ if transpose_forward is not None: -+ transpose_backward = trainer.plans.get('transpose_backward') -+ seg = seg.transpose([i for i in transpose_backward]) -+ -+ print("initializing segmentation export") -+ results.append(pool.starmap_async(save_segmentation_nifti, -+ ((seg, output_filename, dct, interpolation_order, force_separate_z, -+ interpolation_order_z),) -+ )) -+ print("done") -+ -+ print("inference done. Now waiting for the segmentation export to finish...") -+ _ = [i.get() for i in results] -+ # now apply postprocessing -+ # first load the postprocessing properties if they are present. Else raise a well visible warning -+ results = [] -+ pp_file = join(model, "postprocessing.json") -+ if isfile(pp_file): -+ print("postprocessing...") -+ shutil.copy(pp_file, os.path.dirname(output_filenames[0])) -+ # for_which_classes stores for which of the classes everything but the largest connected component needs to be -+ # removed -+ for_which_classes, min_valid_obj_size = load_postprocessing(pp_file) -+ results.append(pool.starmap_async(load_remove_save, -+ zip(output_filenames, output_filenames, -+ [for_which_classes] * len(output_filenames), -+ [min_valid_obj_size] * len(output_filenames)))) -+ _ = [i.get() for i in results] -+ else: -+ print("WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run " -+ "consolidate_folds in the output folder of the model first!\nThe folder you need to run this in is " -+ "%s" % model) -+ -+ pool.close() -+ pool.join() -+ -+ -+def predict_cases_fastest(model, list_of_lists, output_filenames, folds, num_threads_preprocessing, -+ num_threads_nifti_save, segs_from_prev_stage=None, do_tta=True, mixed_precision=True, -+ overwrite_existing=False, all_in_gpu=True, step_size=0.5, -+ checkpoint_name="model_final_checkpoint"): -+ assert len(list_of_lists) == len(output_filenames) -+ if segs_from_prev_stage is not None: assert len(segs_from_prev_stage) == len(output_filenames) -+ -+ pool = Pool(num_threads_nifti_save) -+ results = [] -+ -+ cleaned_output_files = [] -+ for o in output_filenames: -+ dr, f = os.path.split(o) -+ if len(dr) > 0: -+ maybe_mkdir_p(dr) -+ if not f.endswith(".nii.gz"): -+ f, _ = os.path.splitext(f) -+ f = f + ".nii.gz" -+ cleaned_output_files.append(join(dr, f)) -+ -+ if not overwrite_existing: -+ print("number of cases:", len(list_of_lists)) -+ not_done_idx = [i for i, j in enumerate(cleaned_output_files) if not isfile(j)] -+ -+ cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx] -+ list_of_lists = [list_of_lists[i] for i in not_done_idx] -+ if segs_from_prev_stage is not None: -+ segs_from_prev_stage = [segs_from_prev_stage[i] for i in not_done_idx] -+ -+ print("number of cases that still need to be predicted:", len(cleaned_output_files)) -+ -+ print("emptying cuda cache") -+ torch.cuda.empty_cache() -+ -+ print("loading parameters for folds,", folds) -+ trainer, params = load_model_and_checkpoint_files(model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) -+ -+ print("starting preprocessing generator") -+ preprocessing = preprocess_multithreaded(trainer, list_of_lists, cleaned_output_files, num_threads_preprocessing, -+ segs_from_prev_stage) -+ -+ print("starting prediction...") -+ for preprocessed in preprocessing: -+ print("getting data from preprocessor") -+ output_filename, (d, dct) = preprocessed -+ print("got something") -+ if isinstance(d, str): -+ print("what I got is a string, so I need to load a file") -+ data = np.load(d) -+ os.remove(d) -+ d = data -+ -+ # preallocate the output arrays -+ # same dtype as the return value in predict_preprocessed_data_return_seg_and_softmax (saves time) -+ all_softmax_outputs = np.zeros((len(params), trainer.num_classes, *d.shape[1:]), dtype=np.float16) -+ all_seg_outputs = np.zeros((len(params), *d.shape[1:]), dtype=int) -+ print("predicting", output_filename) -+ -+ for i, p in enumerate(params): -+ trainer.load_checkpoint_ram(p, False) -+ res = trainer.predict_preprocessed_data_return_seg_and_softmax(d, do_tta, -+ trainer.data_aug_params['mirror_axes'], True, -+ step_size=step_size, use_gaussian=True, -+ all_in_gpu=all_in_gpu, -+ mixed_precision=mixed_precision) -+ if len(params) > 1: -+ # otherwise we dont need this and we can save ourselves the time it takes to copy that -+ all_softmax_outputs[i] = res[1] -+ all_seg_outputs[i] = res[0] -+ -+ print("aggregating predictions") -+ if len(params) > 1: -+ softmax_mean = np.mean(all_softmax_outputs, 0) -+ seg = softmax_mean.argmax(0) -+ else: -+ seg = all_seg_outputs[0] -+ -+ print("applying transpose_backward") -+ transpose_forward = trainer.plans.get('transpose_forward') -+ if transpose_forward is not None: -+ transpose_backward = trainer.plans.get('transpose_backward') -+ seg = seg.transpose([i for i in transpose_backward]) -+ -+ print("initializing segmentation export") -+ results.append(pool.starmap_async(save_segmentation_nifti, -+ ((seg, output_filename, dct, 0, None),) -+ )) -+ print("done") -+ -+ print("inference done. Now waiting for the segmentation export to finish...") -+ _ = [i.get() for i in results] -+ # now apply postprocessing -+ # first load the postprocessing properties if they are present. Else raise a well visible warning -+ results = [] -+ pp_file = join(model, "postprocessing.json") -+ if isfile(pp_file): -+ print("postprocessing...") -+ shutil.copy(pp_file, os.path.dirname(output_filenames[0])) -+ # for_which_classes stores for which of the classes everything but the largest connected component needs to be -+ # removed -+ for_which_classes, min_valid_obj_size = load_postprocessing(pp_file) -+ results.append(pool.starmap_async(load_remove_save, -+ zip(output_filenames, output_filenames, -+ [for_which_classes] * len(output_filenames), -+ [min_valid_obj_size] * len(output_filenames)))) -+ _ = [i.get() for i in results] -+ else: -+ print("WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run " -+ "consolidate_folds in the output folder of the model first!\nThe folder you need to run this in is " -+ "%s" % model) -+ -+ pool.close() -+ pool.join() -+ -+ -+def check_input_folder_and_return_caseIDs(input_folder, expected_num_modalities): -+ print("This model expects %d input modalities for each image" % expected_num_modalities) -+ files = subfiles(input_folder, suffix=".nii.gz", join=False, sort=True) -+ -+ maybe_case_ids = np.unique([i[:-12] for i in files]) -+ -+ remaining = deepcopy(files) -+ missing = [] -+ -+ assert len(files) > 0, "input folder did not contain any images (expected to find .nii.gz file endings)" -+ -+ # now check if all required files are present and that no unexpected files are remaining -+ for c in maybe_case_ids: -+ for n in range(expected_num_modalities): -+ expected_output_file = c + "_%04.0d.nii.gz" % n -+ if not isfile(join(input_folder, expected_output_file)): -+ missing.append(expected_output_file) -+ else: -+ remaining.remove(expected_output_file) -+ -+ print("Found %d unique case ids, here are some examples:" % len(maybe_case_ids), -+ np.random.choice(maybe_case_ids, min(len(maybe_case_ids), 10))) -+ print("If they don't look right, make sure to double check your filenames. They must end with _0000.nii.gz etc") -+ -+ if len(remaining) > 0: -+ print("found %d unexpected remaining files in the folder. Here are some examples:" % len(remaining), -+ np.random.choice(remaining, min(len(remaining), 10))) -+ -+ if len(missing) > 0: -+ print("Some files are missing:") -+ print(missing) -+ raise RuntimeError("missing files in input_folder") -+ -+ return maybe_case_ids -+ -+ -+def predict_from_folder(model: str, input_folder: str, output_folder: str, folds: Union[Tuple[int], List[int]], -+ save_npz: bool, num_threads_preprocessing: int, num_threads_nifti_save: int, -+ lowres_segmentations: Union[str, None], -+ part_id: int, num_parts: int, tta: bool, mixed_precision: bool = True, -+ overwrite_existing: bool = True, mode: str = 'normal', overwrite_all_in_gpu: bool = None, -+ step_size: float = 0.5, checkpoint_name: str = "model_final_checkpoint", -+ segmentation_export_kwargs: dict = None, pre_mode=None, fp=None): -+ """ -+ here we use the standard naming scheme to generate list_of_lists and output_files needed by predict_cases -+ -+ :param model: -+ :param input_folder: -+ :param output_folder: -+ :param folds: -+ :param save_npz: -+ :param num_threads_preprocessing: -+ :param num_threads_nifti_save: -+ :param lowres_segmentations: -+ :param part_id: -+ :param num_parts: -+ :param tta: -+ :param mixed_precision: -+ :param overwrite_existing: if not None then it will be overwritten with whatever is in there. None is default (no overwrite) -+ :return: -+ """ -+ maybe_mkdir_p(output_folder) -+ shutil.copy(join(model, 'plans.pkl'), output_folder) -+ -+ assert isfile(join(model, "plans.pkl")), "Folder with saved model weights must contain a plans.pkl file" -+ expected_num_modalities = load_pickle(join(model, "plans.pkl"))['num_modalities'] -+ -+ # check input folder integrity -+ case_ids = check_input_folder_and_return_caseIDs(input_folder, expected_num_modalities) -+ -+ output_files = [join(output_folder, i + ".nii.gz") for i in case_ids] -+ all_files = subfiles(input_folder, suffix=".nii.gz", join=False, sort=True) -+ list_of_lists = [[join(input_folder, i) for i in all_files if i[:len(j)].startswith(j) and -+ len(i) == (len(j) + 12)] for j in case_ids] -+ -+ if lowres_segmentations is not None: -+ assert isdir(lowres_segmentations), "if lowres_segmentations is not None then it must point to a directory" -+ lowres_segmentations = [join(lowres_segmentations, i + ".nii.gz") for i in case_ids] -+ assert all([isfile(i) for i in lowres_segmentations]), "not all lowres_segmentations files are present. " \ -+ "(I was searching for case_id.nii.gz in that folder)" -+ lowres_segmentations = lowres_segmentations[part_id::num_parts] -+ else: -+ lowres_segmentations = None -+ -+ if mode == "normal": # step this -+ if overwrite_all_in_gpu is None: # True -+ all_in_gpu = False -+ else: -+ all_in_gpu = overwrite_all_in_gpu -+ -+ return predict_cases(model, list_of_lists[part_id::num_parts], output_files[part_id::num_parts], folds, -+ save_npz, num_threads_preprocessing, num_threads_nifti_save, lowres_segmentations, tta, -+ mixed_precision=mixed_precision, overwrite_existing=overwrite_existing, all_in_gpu=all_in_gpu, -+ step_size=step_size, checkpoint_name=checkpoint_name, -+ segmentation_export_kwargs=segmentation_export_kwargs, pre_mode=pre_mode, fp=fp) -+ elif mode == "fast": -+ if overwrite_all_in_gpu is None: -+ all_in_gpu = True -+ else: -+ all_in_gpu = overwrite_all_in_gpu -+ -+ assert save_npz is False -+ return predict_cases_fast(model, list_of_lists[part_id::num_parts], output_files[part_id::num_parts], folds, -+ num_threads_preprocessing, num_threads_nifti_save, lowres_segmentations, -+ tta, mixed_precision=mixed_precision, overwrite_existing=overwrite_existing, all_in_gpu=all_in_gpu, -+ step_size=step_size, checkpoint_name=checkpoint_name, -+ segmentation_export_kwargs=segmentation_export_kwargs) -+ elif mode == "fastest": -+ if overwrite_all_in_gpu is None: -+ all_in_gpu = True -+ else: -+ all_in_gpu = overwrite_all_in_gpu -+ -+ assert save_npz is False -+ return predict_cases_fastest(model, list_of_lists[part_id::num_parts], output_files[part_id::num_parts], folds, -+ num_threads_preprocessing, num_threads_nifti_save, lowres_segmentations, -+ tta, mixed_precision=mixed_precision, overwrite_existing=overwrite_existing, all_in_gpu=all_in_gpu, -+ step_size=step_size, checkpoint_name=checkpoint_name) -+ else: -+ raise ValueError("unrecognized mode. Must be normal, fast or fastest") -+ -+ -+if __name__ == "__main__": -+ parser = argparse.ArgumentParser() -+ parser.add_argument("-i", '--input_folder', help="Must contain all modalities for each patient in the correct" -+ " order (same as training). Files must be named " -+ "CASENAME_XXXX.nii.gz where XXXX is the modality " -+ "identifier (0000, 0001, etc)", required=True) -+ parser.add_argument('-o', "--output_folder", required=True, help="folder for saving predictions") -+ parser.add_argument('-m', '--model_output_folder', -+ help='model output folder. Will automatically discover the folds ' -+ 'that were ' -+ 'run and use those as an ensemble', required=True) -+ parser.add_argument('-f', '--folds', nargs='+', default='None', help="folds to use for prediction. Default is None " -+ "which means that folds will be detected " -+ "automatically in the model output folder") -+ parser.add_argument('-z', '--save_npz', required=False, action='store_true', help="use this if you want to ensemble" -+ " these predictions with those of" -+ " other models. Softmax " -+ "probabilities will be saved as " -+ "compresed numpy arrays in " -+ "output_folder and can be merged " -+ "between output_folders with " -+ "merge_predictions.py") -+ parser.add_argument('-l', '--lowres_segmentations', required=False, default='None', help="if model is the highres " -+ "stage of the cascade then you need to use -l to specify where the segmentations of the " -+ "corresponding lowres unet are. Here they are required to do a prediction") -+ parser.add_argument("--part_id", type=int, required=False, default=0, help="Used to parallelize the prediction of " -+ "the folder over several GPUs. If you " -+ "want to use n GPUs to predict this " -+ "folder you need to run this command " -+ "n times with --part_id=0, ... n-1 and " -+ "--num_parts=n (each with a different " -+ "GPU (for example via " -+ "CUDA_VISIBLE_DEVICES=X)") -+ parser.add_argument("--num_parts", type=int, required=False, default=1, -+ help="Used to parallelize the prediction of " -+ "the folder over several GPUs. If you " -+ "want to use n GPUs to predict this " -+ "folder you need to run this command " -+ "n times with --part_id=0, ... n-1 and " -+ "--num_parts=n (each with a different " -+ "GPU (via " -+ "CUDA_VISIBLE_DEVICES=X)") -+ parser.add_argument("--num_threads_preprocessing", required=False, default=6, type=int, help= -+ "Determines many background processes will be used for data preprocessing. Reduce this if you " -+ "run into out of memory (RAM) problems. Default: 6") -+ parser.add_argument("--num_threads_nifti_save", required=False, default=2, type=int, help= -+ "Determines many background processes will be used for segmentation export. Reduce this if you " -+ "run into out of memory (RAM) problems. Default: 2") -+ parser.add_argument("--tta", required=False, type=int, default=1, help="Set to 0 to disable test time data " -+ "augmentation (speedup of factor " -+ "4(2D)/8(3D)), " -+ "lower quality segmentations") -+ parser.add_argument("--overwrite_existing", required=False, type=int, default=1, help="Set this to 0 if you need " -+ "to resume a previous " -+ "prediction. Default: 1 " -+ "(=existing segmentations " -+ "in output_folder will be " -+ "overwritten)") -+ parser.add_argument("--mode", type=str, default="normal", required=False) -+ parser.add_argument("--all_in_gpu", type=str, default="None", required=False, help="can be None, False or True") -+ parser.add_argument("--step_size", type=float, default=0.5, required=False, help="don't touch") -+ # parser.add_argument("--interp_order", required=False, default=3, type=int, -+ # help="order of interpolation for segmentations, has no effect if mode=fastest") -+ # parser.add_argument("--interp_order_z", required=False, default=0, type=int, -+ # help="order of interpolation along z is z is done differently") -+ # parser.add_argument("--force_separate_z", required=False, default="None", type=str, -+ # help="force_separate_z resampling. Can be None, True or False, has no effect if mode=fastest") -+ parser.add_argument('--disable_mixed_precision', default=False, action='store_true', required=False, -+ help='Predictions are done with mixed precision by default. This improves speed and reduces ' -+ 'the required vram. If you want to disable mixed precision you can set this flag. Note ' -+ 'that yhis is not recommended (mixed precision is ~2x faster!)') -+ -+ args = parser.parse_args() -+ input_folder = args.input_folder -+ output_folder = args.output_folder -+ part_id = args.part_id -+ num_parts = args.num_parts -+ model = args.model_output_folder -+ folds = args.folds -+ save_npz = args.save_npz -+ lowres_segmentations = args.lowres_segmentations -+ num_threads_preprocessing = args.num_threads_preprocessing -+ num_threads_nifti_save = args.num_threads_nifti_save -+ tta = args.tta -+ step_size = args.step_size -+ -+ # interp_order = args.interp_order -+ # interp_order_z = args.interp_order_z -+ # force_separate_z = args.force_separate_z -+ -+ # if force_separate_z == "None": -+ # force_separate_z = None -+ # elif force_separate_z == "False": -+ # force_separate_z = False -+ # elif force_separate_z == "True": -+ # force_separate_z = True -+ # else: -+ # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) -+ -+ overwrite = args.overwrite_existing -+ mode = args.mode -+ all_in_gpu = args.all_in_gpu -+ -+ if lowres_segmentations == "None": -+ lowres_segmentations = None -+ -+ if isinstance(folds, list): -+ if folds[0] == 'all' and len(folds) == 1: -+ pass -+ else: -+ folds = [int(i) for i in folds] -+ elif folds == "None": -+ folds = None -+ else: -+ raise ValueError("Unexpected value for argument folds") -+ -+ if tta == 0: -+ tta = False -+ elif tta == 1: -+ tta = True -+ else: -+ raise ValueError("Unexpected value for tta, Use 1 or 0") -+ -+ if overwrite == 0: -+ overwrite = False -+ elif overwrite == 1: -+ overwrite = True -+ else: -+ raise ValueError("Unexpected value for overwrite, Use 1 or 0") -+ -+ assert all_in_gpu in ['None', 'False', 'True'] -+ if all_in_gpu == "None": -+ all_in_gpu = None -+ elif all_in_gpu == "True": -+ all_in_gpu = True -+ elif all_in_gpu == "False": -+ all_in_gpu = False -+ -+ predict_from_folder(model, input_folder, output_folder, folds, save_npz, num_threads_preprocessing, -+ num_threads_nifti_save, lowres_segmentations, part_id, num_parts, tta, mixed_precision=not args.disable_mixed_precision, -+ overwrite_existing=overwrite, mode=mode, overwrite_all_in_gpu=all_in_gpu, step_size=step_size) -diff --git a/pytorch/nnunet/inference/predict_simple2.py b/pytorch/nnunet/inference/predict_simple2.py -new file mode 100644 -index 0000000..2af423e ---- /dev/null -+++ b/pytorch/nnunet/inference/predict_simple2.py -@@ -0,0 +1,238 @@ -+# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany -+# -+# Licensed under the Apache License, Version 2.0 (the "License"); -+# you may not use this file except in compliance with the License. -+# You may obtain a copy of the License at -+# -+# http://www.apache.org/licenses/LICENSE-2.0 -+# -+# Unless required by applicable law or agreed to in writing, software -+# distributed under the License is distributed on an "AS IS" BASIS, -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+# See the License for the specific language governing permissions and -+# limitations under the License. -+ -+ -+import argparse -+import torch -+ -+from nnunet.inference.predict2 import predict_from_folder -+from nnunet.paths import default_plans_identifier, network_training_output_dir, default_cascade_trainer, default_trainer -+from batchgenerators.utilities.file_and_folder_operations import join, isdir -+from nnunet.utilities.task_name_id_conversion import convert_id_to_task_name -+from nnunet.inference.infer_path import INFERENCE_INPUT_FOLDER, INFERENCE_OUTPUT_FOLDER -+ -+ -+def main(): -+ parser = argparse.ArgumentParser() -+ parser.add_argument("-i", '--input_folder', help="Must contain all modalities for each patient in the correct" -+ " order (same as training). Files must be named " -+ "CASENAME_XXXX.nii.gz where XXXX is the modality " -+ "identifier (0000, 0001, etc)", required=False, -+ default=INFERENCE_INPUT_FOLDER) -+ parser.add_argument('-o', "--output_folder", required=False, -+ default=INFERENCE_OUTPUT_FOLDER, help="folder for saving predictions") -+ parser.add_argument('-t', '--task_name', help='task name or task ID, required.', -+ default="Task003_Liver", required=False) -+ parser.add_argument('-pm', '--pre_mode', help='predict mode', required=False, default=-1) -+ parser.add_argument('-fp', '--file_path', help='input or output file path for npu bin files', required=True) -+ parser.add_argument('-tr', '--trainer_class_name', -+ help='Name of the nnUNetTrainer used for 2D U-Net, full resolution 3D U-Net and low resolution ' -+ 'U-Net. The default is %s. If you are running inference with the cascade and the folder ' -+ 'pointed to by --lowres_segmentations does not contain the segmentation maps generated by ' -+ 'the low resolution U-Net then the low resolution segmentation maps will be automatically ' -+ 'generated. For this case, make sure to set the trainer class here that matches your ' -+ '--cascade_trainer_class_name (this part can be ignored if defaults are used).' -+ % default_trainer, -+ required=False, -+ default="nnUNetPlusPlusTrainerV2") -+ parser.add_argument('-ctr', '--cascade_trainer_class_name', -+ help="Trainer class name used for predicting the 3D full resolution U-Net part of the cascade." -+ "Default is %s" % default_cascade_trainer, required=False, -+ default=default_cascade_trainer) -+ -+ parser.add_argument('-m', '--model', help="2d, 3d_lowres, 3d_fullres or 3d_cascade_fullres. Default: 3d_fullres", -+ default="3d_fullres", required=False) -+ -+ parser.add_argument('-p', '--plans_identifier', help='do not touch this unless you know what you are doing', -+ default=default_plans_identifier, required=False) -+ -+ parser.add_argument('-f', '--folds', nargs='+', default="None", -+ help="folds to use for prediction. Default is None which means that folds will be detected " -+ "automatically in the model output folder") -+ -+ parser.add_argument('-z', '--save_npz', required=False, action='store_true', -+ help="use this if you want to ensemble these predictions with those of other models. Softmax " -+ "probabilities will be saved as compressed numpy arrays in output_folder and can be " -+ "merged between output_folders with nnUNet_ensemble_predictions") -+ -+ parser.add_argument('-l', '--lowres_segmentations', required=False, default='None', -+ help="if model is the highres stage of the cascade then you can use this folder to provide " -+ "predictions from the low resolution 3D U-Net. If this is left at default, the " -+ "predictions will be generated automatically (provided that the 3D low resolution U-Net " -+ "network weights are present") -+ -+ parser.add_argument("--part_id", type=int, required=False, default=0, help="Used to parallelize the prediction of " -+ "the folder over several GPUs. If you " -+ "want to use n GPUs to predict this " -+ "folder you need to run this command " -+ "n times with --part_id=0, ... n-1 and " -+ "--num_parts=n (each with a different " -+ "GPU (for example via " -+ "CUDA_VISIBLE_DEVICES=X)") -+ -+ parser.add_argument("--num_parts", type=int, required=False, default=1, -+ help="Used to parallelize the prediction of " -+ "the folder over several GPUs. If you " -+ "want to use n GPUs to predict this " -+ "folder you need to run this command " -+ "n times with --part_id=0, ... n-1 and " -+ "--num_parts=n (each with a different " -+ "GPU (via " -+ "CUDA_VISIBLE_DEVICES=X)") -+ -+ parser.add_argument("--num_threads_preprocessing", required=False, default=6, type=int, help= -+ "Determines many background processes will be used for data preprocessing. Reduce this if you " -+ "run into out of memory (RAM) problems. Default: 6") -+ -+ parser.add_argument("--num_threads_nifti_save", required=False, default=2, type=int, help= -+ "Determines many background processes will be used for segmentation export. Reduce this if you " -+ "run into out of memory (RAM) problems. Default: 2") -+ -+ parser.add_argument("--disable_tta", required=False, default=False, action="store_true", -+ help="set this flag to disable test time data augmentation via mirroring. Speeds up inference " -+ "by roughly factor 4 (2D) or 8 (3D)") -+ -+ parser.add_argument("--overwrite_existing", required=False, default=False, action="store_true", -+ help="Set this flag if the target folder contains predictions that you would like to overwrite") -+ -+ parser.add_argument("--mode", type=str, default="normal", required=False, help="Hands off!") -+ parser.add_argument("--all_in_gpu", type=str, default="None", required=False, help="can be None, False or True. " -+ "Do not touch.") -+ parser.add_argument("--step_size", type=float, default=0.5, required=False, help="don't touch") -+ # parser.add_argument("--interp_order", required=False, default=3, type=int, -+ # help="order of interpolation for segmentations, has no effect if mode=fastest. Do not touch this.") -+ # parser.add_argument("--interp_order_z", required=False, default=0, type=int, -+ # help="order of interpolation along z is z is done differently. Do not touch this.") -+ # parser.add_argument("--force_separate_z", required=False, default="None", type=str, -+ # help="force_separate_z resampling. Can be None, True or False, has no effect if mode=fastest. " -+ # "Do not touch this.") -+ parser.add_argument('-chk', -+ help='checkpoint name, default: model_final_checkpoint', -+ required=False, -+ default='model_final_checkpoint') -+ parser.add_argument('--disable_mixed_precision', default=False, action='store_true', required=False, -+ help='Predictions are done with mixed precision by default. This improves speed and reduces ' -+ 'the required vram. If you want to disable mixed precision you can set this flag. Note ' -+ 'that yhis is not recommended (mixed precision is ~2x faster!)') -+ -+ args = parser.parse_args() -+ print(args) -+ -+ input_folder = args.input_folder -+ output_folder = args.output_folder -+ part_id = args.part_id -+ # 推理模式 -+ pre_mode = args.pre_mode -+ fp = args.file_path -+ num_parts = args.num_parts -+ folds = args.folds -+ save_npz = args.save_npz -+ lowres_segmentations = args.lowres_segmentations -+ num_threads_preprocessing = args.num_threads_preprocessing -+ num_threads_nifti_save = args.num_threads_nifti_save -+ disable_tta = args.disable_tta -+ step_size = args.step_size -+ # interp_order = args.interp_order -+ # interp_order_z = args.interp_order_z -+ # force_separate_z = args.force_separate_z -+ overwrite_existing = args.overwrite_existing -+ mode = args.mode -+ all_in_gpu = args.all_in_gpu -+ model = args.model -+ trainer_class_name = args.trainer_class_name -+ cascade_trainer_class_name = args.cascade_trainer_class_name -+ -+ task_name = args.task_name -+ -+ if not task_name.startswith("Task"): -+ task_id = int(task_name) -+ task_name = convert_id_to_task_name(task_id) -+ -+ assert model in ["2d", "3d_lowres", "3d_fullres", "3d_cascade_fullres"], "-m must be 2d, 3d_lowres, 3d_fullres or " \ -+ "3d_cascade_fullres" -+ -+ # if force_separate_z == "None": -+ # force_separate_z = None -+ # elif force_separate_z == "False": -+ # force_separate_z = False -+ # elif force_separate_z == "True": -+ # force_separate_z = True -+ # else: -+ # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) -+ -+ if lowres_segmentations == "None": -+ lowres_segmentations = None -+ -+ if isinstance(folds, list): -+ if folds[0] == 'all' and len(folds) == 1: -+ pass -+ else: -+ folds = [int(i) for i in folds] -+ elif folds == "None": -+ folds = None -+ else: -+ raise ValueError("Unexpected value for argument folds") -+ -+ assert all_in_gpu in ['None', 'False', 'True'] -+ if all_in_gpu == "None": -+ all_in_gpu = None -+ elif all_in_gpu == "True": -+ all_in_gpu = True -+ elif all_in_gpu == "False": -+ all_in_gpu = False -+ -+ # we need to catch the case where model is 3d cascade fullres and the low resolution folder has not been set. -+ # In that case we need to try and predict with 3d low res first -+ if model == "3d_cascade_fullres" and lowres_segmentations is None: -+ print("lowres_segmentations is None. Attempting to predict 3d_lowres first...") -+ assert part_id == 0 and num_parts == 1, "if you don't specify a --lowres_segmentations folder for the " \ -+ "inference of the cascade, custom values for part_id and num_parts " \ -+ "are not supported. If you wish to have multiple parts, please " \ -+ "run the 3d_lowres inference first (separately)" -+ model_folder_name = join(network_training_output_dir, "3d_lowres", task_name, trainer_class_name + "__" + -+ args.plans_identifier) -+ assert isdir(model_folder_name), "model output folder not found. Expected: %s" % model_folder_name -+ lowres_output_folder = join(output_folder, "3d_lowres_predictions") -+ predict_from_folder(model_folder_name, input_folder, lowres_output_folder, folds, False, -+ num_threads_preprocessing, num_threads_nifti_save, None, part_id, num_parts, not disable_tta, -+ overwrite_existing=overwrite_existing, mode=mode, overwrite_all_in_gpu=all_in_gpu, -+ mixed_precision=not args.disable_mixed_precision, -+ step_size=step_size) -+ lowres_segmentations = lowres_output_folder -+ torch.cuda.empty_cache() -+ print("3d_lowres done") -+ -+ if model == "3d_cascade_fullres": -+ trainer = cascade_trainer_class_name -+ else: -+ trainer = trainer_class_name -+ print(network_training_output_dir) -+ print(model) -+ print(task_name) -+ print(trainer) -+ print(args.plans_identifier) -+ model_folder_name = join(network_training_output_dir, model, task_name, trainer + "__" + -+ args.plans_identifier) -+ print("using model stored in ", model_folder_name) -+ assert isdir(model_folder_name), "model output folder not found. Expected: %s" % model_folder_name -+ -+ predict_from_folder(model_folder_name, input_folder, output_folder, folds, save_npz, num_threads_preprocessing, -+ num_threads_nifti_save, lowres_segmentations, part_id, num_parts, not disable_tta, -+ overwrite_existing=overwrite_existing, mode=mode, overwrite_all_in_gpu=all_in_gpu, -+ mixed_precision=not args.disable_mixed_precision, -+ step_size=step_size, checkpoint_name=args.chk, pre_mode=pre_mode, fp=fp) -+ -+ -+if __name__ == "__main__": -+ main() -diff --git a/pytorch/nnunet/inference/read_bin.py b/pytorch/nnunet/inference/read_bin.py -new file mode 100644 -index 0000000..972d940 ---- /dev/null -+++ b/pytorch/nnunet/inference/read_bin.py -@@ -0,0 +1,30 @@ -+import numpy -+import pdb -+import os -+ -+ -+def read_from_bin(file_name, folder_path='/root/heyupeng/result/dumpOutput_device0/'): -+ file = os.path.join(folder_path, file_name) -+ data = numpy.fromfile(file, dtype='float32') -+ data = data.reshape(3, 128, 128, 128) -+ return data -+ -+ -+def main(): -+ file = 'liver_132_0_128_0_128_0_128_1.bin' -+ print('ready to load:', file) -+ data = numpy.fromfile(file, dtype='float32') -+ data = data.reshape(3, 128, 128, 128) -+ pdb.set_trace() -+ print(data.shape) -+ for i in range(5): -+ print(data[0, 0, 0, i*7:(i+1)*7]) -+ print('-----') -+ for i in range(5): -+ print(data[0, 0, 0, i*7+50:(i+1)*7+50]) -+ pdb.set_trace() -+ print('end\n') -+ -+ -+if __name__ == "__main__": -+ main() -\ No newline at end of file -diff --git a/pytorch/nnunet/inference/read_pkl_file.py b/pytorch/nnunet/inference/read_pkl_file.py -new file mode 100644 -index 0000000..5dcc37b ---- /dev/null -+++ b/pytorch/nnunet/inference/read_pkl_file.py -@@ -0,0 +1,22 @@ -+import numpy -+import pdb -+import os -+import pickle -+ -+ -+def read_pkl(file_name, folder_path='/data/yupeng/environment_variables/nnUNet_preprocessed/Task003_Liver/'): -+ file = os.path.join(folder_path, file_name) -+ data = open(file, 'rb') -+ data = pickle.load(data) -+ return data -+ -+ -+def main(): -+ file = 'dataset_properties.pkl' -+ print('ready to load:', file) -+ data = read_pkl(file) -+ print('end\n') -+ -+ -+if __name__ == "__main__": -+ main() -\ No newline at end of file -diff --git a/pytorch/nnunet/inference/read_txt.py b/pytorch/nnunet/inference/read_txt.py -new file mode 100644 -index 0000000..37c94aa ---- /dev/null -+++ b/pytorch/nnunet/inference/read_txt.py -@@ -0,0 +1,29 @@ -+import numpy -+import pdb -+import os -+ -+ -+def read_from_bin(file_name, folder_path='/root/heyupeng/result/dumpOutput_device0/'): -+ file = os.path.join(folder_path, file_name) -+ data = numpy.loadtxt(file) -+ data = data.reshape(3, 128, 128, 128) -+ return data -+ -+ -+def main(): -+ file = 'liver_132_0_128_0_128_0_128_1.txt' -+ print('ready to load:', file) -+ data = numpy.loadtxt(file) -+ data = data.reshape(3, 128, 128, 128) -+ pdb.set_trace() -+ print(data.shape) -+ for i in range(5): -+ print(data[0, 0, 0, i*7:(i+1)*7]) -+ print('-----') -+ for i in range(5): -+ print(data[0, 0, 0, i*7+50:(i+1)*7+50]) -+ pdb.set_trace() -+ print('end\n') -+ -+if __name__ == "__main__": -+ main() -diff --git a/pytorch/nnunet/network_architecture/generic_UNetPlusPlus.py b/pytorch/nnunet/network_architecture/generic_UNetPlusPlus.py -index 5c2f816..5b831ea 100644 ---- a/pytorch/nnunet/network_architecture/generic_UNetPlusPlus.py -+++ b/pytorch/nnunet/network_architecture/generic_UNetPlusPlus.py -@@ -21,7 +21,8 @@ import numpy as np - from nnunet.network_architecture.initialization import InitWeights_He - from nnunet.network_architecture.neural_network import SegmentationNetwork - import torch.nn.functional -- -+import pdb -+# pdb.set_trace() - - class ConvDropoutNormNonlin(nn.Module): - """ -@@ -393,7 +394,7 @@ class Generic_UNetPlusPlus(SegmentationNetwork): - - def forward(self, x): - # skips = [] -- seg_outputs = [] -+ seg_outputs = [] # x是五维的 - x0_0 = self.conv_blocks_context[0](x) - x1_0 = self.conv_blocks_context[1](x0_0) - x0_1 = self.loc4[0](torch.cat([x0_0, self.up4[0](x1_0)], 1)) -@@ -425,7 +426,7 @@ class Generic_UNetPlusPlus(SegmentationNetwork): - x0_5 = self.loc0[4](torch.cat([x0_0, x0_1, x0_2, x0_3, x0_4, self.up0[4](x1_4)], 1)) - seg_outputs.append(self.final_nonlin(self.seg_outputs[-5](x0_5))) - -- if self._deep_supervision and self.do_ds: -+ if self._deep_supervision and self.do_ds: # False - return tuple([seg_outputs[-1]] + [i(j) for i, j in - zip(list(self.upscale_logits_ops)[::-1], seg_outputs[:-1][::-1])]) - else: -diff --git a/pytorch/nnunet/network_architecture/neural_network.py b/pytorch/nnunet/network_architecture/neural_network.py -index baa8a05..9425fe9 100644 ---- a/pytorch/nnunet/network_architecture/neural_network.py -+++ b/pytorch/nnunet/network_architecture/neural_network.py -@@ -21,8 +21,14 @@ from torch import nn - import torch - from scipy.ndimage.filters import gaussian_filter - from typing import Union, Tuple, List -+import os - - from torch.cuda.amp import autocast ++# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++ ++import argparse ++from copy import deepcopy ++from typing import Tuple, Union, List ++ ++import numpy as np ++from batchgenerators.augmentations.utils import resize_segmentation ++from nnunet.inference.segmentation_export import save_segmentation_nifti_from_softmax, save_segmentation_nifti ++from batchgenerators.utilities.file_and_folder_operations import * ++from multiprocessing import Process, Queue ++import torch ++import SimpleITK as sitk ++import shutil ++from multiprocessing import Pool ++from nnunet.postprocessing.connected_components import load_remove_save, load_postprocessing ++from nnunet.training.model_restore import load_model_and_checkpoint_files ++from nnunet.training.network_training.nnUNetTrainer import nnUNetTrainer ++from nnunet.utilities.one_hot_encoding import to_one_hot ++from nnunet.utilities.to_torch import maybe_to_torch, to_cuda +import pdb -+from glob import glob -+import time -+from nnunet.inference.read_bin import read_from_bin -+from nnunet.inference.infer_path import INFERENCE_SHAPE_PATH, INFERENCE_BIN_INPUT_FOLDER, INFERENCE_BIN_OUTPUT_FOLDER - - - class NeuralNetwork(nn.Module): -@@ -75,7 +81,8 @@ class SegmentationNetwork(NeuralNetwork): - step_size: float = 0.5, patch_size: Tuple[int, ...] = None, regions_class_order: Tuple[int, ...] = None, - use_gaussian: bool = False, pad_border_mode: str = "constant", - pad_kwargs: dict = None, all_in_gpu: bool = False, -- verbose: bool = True, mixed_precision: bool = True) -> Tuple[np.ndarray, np.ndarray]: -+ verbose: bool = True, mixed_precision: bool = True, img_name=None, -+ pre_mode=None, fp=None) -> Tuple[np.ndarray, np.ndarray]: - """ - Use this function to predict a 3D image. It does not matter whether the network is a 2D or 3D U-Net, it will - detect that automatically and run the appropriate code. -@@ -133,7 +140,7 @@ class SegmentationNetwork(NeuralNetwork): - - assert len(x.shape) == 4, "data must have shape (c,x,y,z)" - -- if mixed_precision: -+ if mixed_precision: # True - context = autocast - else: - context = no_op -@@ -141,11 +148,11 @@ class SegmentationNetwork(NeuralNetwork): - with context(): - with torch.no_grad(): - if self.conv_op == nn.Conv3d: -- if use_sliding_window: -+ if use_sliding_window: # 走到这里 - res = self._internal_predict_3D_3Dconv_tiled(x, step_size, do_mirroring, mirror_axes, patch_size, - regions_class_order, use_gaussian, pad_border_mode, - pad_kwargs=pad_kwargs, all_in_gpu=all_in_gpu, -- verbose=verbose) -+ verbose=verbose, img_name=img_name, pre_mode=pre_mode, fp=fp) - else: - res = self._internal_predict_3D_3Dconv(x, patch_size, do_mirroring, mirror_axes, regions_class_order, - pad_border_mode, pad_kwargs=pad_kwargs, verbose=verbose) -@@ -284,19 +291,161 @@ class SegmentationNetwork(NeuralNetwork): - - return steps - -+ # def _internal_predict_3D_3Dconv_tiled(self, x: np.ndarray, step_size: float, do_mirroring: bool, mirror_axes: tuple, -+ # patch_size: tuple, regions_class_order: tuple, use_gaussian: bool, -+ # pad_border_mode: str, pad_kwargs: dict, all_in_gpu: bool, -+ # verbose: bool, img_name=None) -> Tuple[np.ndarray, np.ndarray]: -+ # # better safe than sorry -+ # assert len(x.shape) == 4, "x must be (c, x, y, z)" -+ # assert self.get_device() != "cpu" -+ # if verbose: print("step_size:", step_size) # 0.5 -+ # if verbose: print("do mirror:", do_mirroring) # True -+ # -+ # torch.cuda.empty_cache() -+ # -+ # assert patch_size is not None, "patch_size cannot be None for tiled prediction" # 128, 128, 128 -+ # -+ # # for sliding window inference the image must at least be as large as the patch size. It does not matter -+ # # whether the shape is divisible by 2**num_pool as long as the patch size is -+ # data, slicer = pad_nd_image(x, patch_size, pad_border_mode, pad_kwargs, True, None) -+ # data_shape = data.shape # still c, x, y, z ++ ++ ++def preprocess_save_to_queue(preprocess_fn, q, list_of_lists, output_files, segs_from_prev_stage, classes, ++ transpose_forward): ++ # suppress output ++ # sys.stdout = open(os.devnull, 'w') ++ ++ errors_in = [] ++ for i, l in enumerate(list_of_lists): ++ try: ++ output_file = output_files[i] ++ print("preprocessing", output_file) ++ d, _, dct = preprocess_fn(l) ++ # print(output_file, dct) ++ if segs_from_prev_stage[i] is not None: ++ assert isfile(segs_from_prev_stage[i]) and segs_from_prev_stage[i].endswith( ++ ".nii.gz"), "segs_from_prev_stage" \ ++ " must point to a " \ ++ "segmentation file" ++ seg_prev = sitk.GetArrayFromImage(sitk.ReadImage(segs_from_prev_stage[i])) ++ # check to see if shapes match ++ img = sitk.GetArrayFromImage(sitk.ReadImage(l[0])) ++ assert all([i == j for i, j in zip(seg_prev.shape, img.shape)]), "image and segmentation from previous " \ ++ "stage don't have the same pixel array " \ ++ "shape! image: %s, seg_prev: %s" % \ ++ (l[0], segs_from_prev_stage[i]) ++ seg_prev = seg_prev.transpose(transpose_forward) ++ seg_reshaped = resize_segmentation(seg_prev, d.shape[1:], order=1, cval=0) ++ seg_reshaped = to_one_hot(seg_reshaped, classes) ++ d = np.vstack((d, seg_reshaped)).astype(np.float32) ++ """There is a problem with python process communication that prevents us from communicating obejcts ++ larger than 2 GB between processes (basically when the length of the pickle string that will be sent is ++ communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long ++ enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually ++ patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will ++ then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either ++ filename or np.ndarray and will handle this automatically""" ++ print(d.shape) ++ if np.prod(d.shape) > (2e9 / 4 * 0.85): # *0.85 just to be save, 4 because float32 is 4 bytes ++ print( ++ "This output is too large for python process-process communication. " ++ "Saving output temporarily to disk") ++ np.save(output_file[:-7] + ".npy", d) ++ d = output_file[:-7] + ".npy" ++ q.put((output_file, (d, dct))) ++ except KeyboardInterrupt: ++ raise KeyboardInterrupt ++ except Exception as e: ++ print("error in", l) ++ print(e) ++ q.put("end") ++ if len(errors_in) > 0: ++ print("There were some errors in the following cases:", errors_in) ++ print("These cases were ignored.") ++ else: ++ print("This worker has ended successfully, no errors to report") ++ # restore output ++ # sys.stdout = sys.__stdout__ ++ ++ ++def preprocess_multithreaded(trainer, list_of_lists, output_files, num_processes=2, segs_from_prev_stage=None): ++ if segs_from_prev_stage is None: ++ segs_from_prev_stage = [None] * len(list_of_lists) ++ ++ num_processes = min(len(list_of_lists), num_processes) ++ ++ classes = list(range(1, trainer.num_classes)) ++ assert isinstance(trainer, nnUNetTrainer) ++ q = Queue(1) ++ processes = [] ++ for i in range(num_processes): ++ pr = Process(target=preprocess_save_to_queue, args=(trainer.preprocess_patient, q, ++ list_of_lists[i::num_processes], ++ output_files[i::num_processes], ++ segs_from_prev_stage[i::num_processes], ++ classes, trainer.plans['transpose_forward'])) ++ pr.start() ++ processes.append(pr) ++ ++ try: ++ end_ctr = 0 ++ while end_ctr != num_processes: ++ item = q.get() ++ if item == "end": ++ end_ctr += 1 ++ continue ++ else: ++ yield item ++ ++ finally: ++ for p in processes: ++ if p.is_alive(): ++ p.terminate() # this should not happen but better safe than sorry right ++ p.join() ++ ++ q.close() ++ ++ ++def pth2onnx(model, output_file=r'/home/yupeng/HUAWEI/UNetPlusPlus/pytorch/nnunet/run/nnunetplusplus.onnx'): ++ # model = EfficientNet.from_pretrained('efficientnet-b0', weights_path=input_file) ++ # 调整模型为eval mode ++ model.eval() ++ # 输入节点名 ++ input_names = ["image"] ++ # 输出节点名 ++ output_names = ["class"] ++ dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} ++ dummy_input = torch.randn(1, 1, 128, 128, 128) ++ # dummy_input = to_cuda(dummy_input) ++ # verbose=True,支持打印onnx节点和对应的PyTorch代码行 ++ torch.onnx.export(model, dummy_input, output_file, input_names=input_names, dynamic_axes=dynamic_axes, ++ output_names=output_names, opset_version=11, verbose=True) ++ ++ ++def predict_cases(model, list_of_lists, output_filenames, folds, save_npz, num_threads_preprocessing, ++ num_threads_nifti_save, segs_from_prev_stage=None, do_tta=True, mixed_precision=True, overwrite_existing=False, ++ all_in_gpu=False, step_size=0.5, checkpoint_name="model_final_checkpoint", ++ segmentation_export_kwargs: dict = None, pre_mode=None, fp=None): ++ """ ++ :param segmentation_export_kwargs: ++ :param model: folder where the model is saved, must contain fold_x subfolders ++ :param list_of_lists: [[case0_0000.nii.gz, case0_0001.nii.gz], [case1_0000.nii.gz, case1_0001.nii.gz], ...] ++ :param output_filenames: [output_file_case0.nii.gz, output_file_case1.nii.gz, ...] ++ :param folds: default: (0, 1, 2, 3, 4) (but can also be 'all' or a subset of the five folds, for example use (0, ) ++ for using only fold_0 ++ :param save_npz: default: False ++ :param num_threads_preprocessing: ++ :param num_threads_nifti_save: ++ :param segs_from_prev_stage: ++ :param do_tta: default: True, can be set to False for a 8x speedup at the cost of a reduced segmentation quality ++ :param overwrite_existing: default: True ++ :param mixed_precision: if None then we take no action. If True/False we overwrite what the model has in its init ++ :return: ++ """ ++ assert len(list_of_lists) == len(output_filenames) ++ if segs_from_prev_stage is not None: assert len(segs_from_prev_stage) == len(output_filenames) ++ ++ pool = Pool(num_threads_nifti_save) ++ results = [] ++ ++ cleaned_output_files = [] ++ for o in output_filenames: ++ dr, f = os.path.split(o) ++ if len(dr) > 0: ++ maybe_mkdir_p(dr) ++ if not f.endswith(".nii.gz"): ++ f, _ = os.path.splitext(f) ++ f = f + ".nii.gz" ++ cleaned_output_files.append(join(dr, f)) ++ ++ if not overwrite_existing: ++ print("number of cases:", len(list_of_lists)) ++ not_done_idx = [i for i, j in enumerate(cleaned_output_files) if not isfile(j)] ++ ++ cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx] ++ list_of_lists = [list_of_lists[i] for i in not_done_idx] ++ if segs_from_prev_stage is not None: ++ segs_from_prev_stage = [segs_from_prev_stage[i] for i in not_done_idx] ++ ++ print("number of cases that still need to be predicted:", len(cleaned_output_files)) ++ ++ print("emptying cuda cache") ++ torch.cuda.empty_cache() ++ ''' ++ model='/data/yupeng/environment_variables/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1' ++ folds=None ++ mixed_precision=True ++ checkpoint_name='model_final_checkpoint' ++ trainer=class-nnUNetPlusPlusTrainerV2 ++ params=list 5 -> dict 6 -> epoch state_dict optimizer_state_dict lr_scheduler_state_dict plot_stuff amp_grad_scaler ++ ''' ++ print("loading parameters for folds,", folds) # 得到参数,实际还未加载进模型 ++ trainer, params = load_model_and_checkpoint_files(model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) ++ ++ if segmentation_export_kwargs is None: ++ if 'segmentation_export_params' in trainer.plans.keys(): ++ force_separate_z = trainer.plans['segmentation_export_params']['force_separate_z'] ++ interpolation_order = trainer.plans['segmentation_export_params']['interpolation_order'] ++ interpolation_order_z = trainer.plans['segmentation_export_params']['interpolation_order_z'] ++ else: # 走到这里 ++ force_separate_z = None ++ interpolation_order = 1 ++ interpolation_order_z = 0 ++ else: ++ force_separate_z = segmentation_export_kwargs['force_separate_z'] ++ interpolation_order = segmentation_export_kwargs['interpolation_order'] ++ interpolation_order_z = segmentation_export_kwargs['interpolation_order_z'] ++ ++ print("starting preprocessing generator") ++ preprocessing = preprocess_multithreaded(trainer, list_of_lists, cleaned_output_files, num_threads_preprocessing, ++ segs_from_prev_stage) ++ # unet++V2class, [['/data/yupeng/environment_variables/nnUNet_raw_data_base/nnUNet_raw_data/Task003_Liver/imagesTs/liver_132_0000.nii.gz']] ++ # ['/data/yupeng/environment_variables/output/liver_132.nii.gz'], 6, None ++ print("starting prediction...") ++ if int(pre_mode) == -1: ++ p = params[0] ++ trainer.load_checkpoint_ram(p, False) # nnUnetPlusPlusTrainerV2,实际函数在network_trainer里 ++ print('pth2onnx start') ++ pth2onnx(trainer.network, fp) ++ print('pth2onnx end') ++ print('onnx模型已经输出至:', fp) ++ import sys ++ sys.exit(0) ++ all_output_files = [] ++ for preprocessed in preprocessing: ++ output_filename, (d, dct) = preprocessed ++ print('output_filename, d, dct = ', output_filename, d, dct) ++ all_output_files.append(all_output_files) ++ if isinstance(d, str): ++ data = np.load(d) ++ os.remove(d) ++ d = data ++ print("predicting", output_filename) ++ softmax = [] ++ params = [params[0]] # 只求第一个模型的推理结果 ++ for p in params: ++ # trainer.load_checkpoint_ram(p, False) # nnUnetPlusPlusTrainerV2,实际函数在network_trainer里 ++ # output_filename = '/data/yupeng/environment_variables/output/liver_132.nii.gz' ++ ttttt = trainer.predict_preprocessed_data_return_seg_and_softmax(d, do_tta, trainer.data_aug_params[ ++ 'mirror_axes'], True, step_size=step_size, use_gaussian=True, all_in_gpu=all_in_gpu, ++ mixed_precision=mixed_precision, img_name=output_filename, pre_mode=pre_mode, fp=fp) # tuple(ndarray 489 500 500; 3 489 500 500) ++ softmax.append(ttttt[1][None]) # 扩充了1 3 489 500 500 ++ ''' ++ d= ++ do_tta= ++ step_size= ++ all_in_gpu= ++ mixed_precision= ++ softmax= ++ ''' ++ # softmax是list 5,每个元素是ndarray 1 3 489 500 500 ++ softmax = np.vstack(softmax) # 5 3 489 500 500 ++ softmax_mean = np.mean(softmax, 0) # 3 489 500 500 ++ ++ transpose_forward = trainer.plans.get('transpose_forward') # [0,1,2] ++ if transpose_forward is not None: ++ transpose_backward = trainer.plans.get('transpose_backward') ++ softmax_mean = softmax_mean.transpose([0] + [i + 1 for i in transpose_backward]) ++ ++ if save_npz: # False ++ npz_file = output_filename[:-7] + ".npz" ++ else: ++ npz_file = None ++ ++ if hasattr(trainer, 'regions_class_order'): # False ++ region_class_order = trainer.regions_class_order ++ else: ++ region_class_order = None ++ ++ """There is a problem with python process communication that prevents us from communicating obejcts ++ larger than 2 GB between processes (basically when the length of the pickle string that will be sent is ++ communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long ++ enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually ++ patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will ++ then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either ++ filename or np.ndarray and will handle this automatically""" ++ bytes_per_voxel = 4 ++ if all_in_gpu: ++ bytes_per_voxel = 2 # if all_in_gpu then the return value is half (float16) ++ if np.prod(softmax_mean.shape) > (2e9 / bytes_per_voxel * 0.85): # * 0.85 just to be save ++ print( ++ "This output is too large for python process-process communication. Saving output temporarily to disk") ++ np.save(output_filename[:-7] + ".npy", softmax_mean) ++ softmax_mean = output_filename[:-7] + ".npy" ++ ++ results.append(pool.starmap_async(save_segmentation_nifti_from_softmax, ++ ((softmax_mean, output_filename, dct, interpolation_order, region_class_order, ++ None, None, ++ npz_file, None, force_separate_z, interpolation_order_z),) ++ )) ++ ++ print("inference done. Now waiting for the segmentation export to finish...") ++ _ = [i.get() for i in results] ++ # now apply postprocessing ++ # first load the postprocessing properties if they are present. Else raise a well visible warning ++ results = [] ++ pp_file = join(model, "postprocessing.json") # '/data/yupeng/environment_variables/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/postprocessing.json' ++ if isfile(pp_file): ++ print("postprocessing...") ++ shutil.copy(pp_file, os.path.abspath(os.path.dirname(output_filenames[0]))) ++ # for_which_classes stores for which of the classes everything but the largest connected component needs to be ++ # removed ++ for_which_classes, min_valid_obj_size = load_postprocessing(pp_file) ++ results.append(pool.starmap_async(load_remove_save, ++ zip(output_filenames, output_filenames, ++ [for_which_classes] * len(output_filenames), ++ [min_valid_obj_size] * len(output_filenames)))) ++ _ = [i.get() for i in results] ++ else: ++ print("WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run " ++ "consolidate_folds in the output folder of the model first!\nThe folder you need to run this in is " ++ "%s" % model) ++ ++ pool.close() ++ pool.join() ++ ++def predict_cases_fast(model, list_of_lists, output_filenames, folds, num_threads_preprocessing, ++ num_threads_nifti_save, segs_from_prev_stage=None, do_tta=True, mixed_precision=True, ++ overwrite_existing=False, ++ all_in_gpu=False, step_size=0.5, checkpoint_name="model_final_checkpoint", ++ segmentation_export_kwargs: dict = None): ++ assert len(list_of_lists) == len(output_filenames) ++ if segs_from_prev_stage is not None: assert len(segs_from_prev_stage) == len(output_filenames) ++ ++ pool = Pool(num_threads_nifti_save) ++ results = [] ++ ++ cleaned_output_files = [] ++ for o in output_filenames: ++ dr, f = os.path.split(o) ++ if len(dr) > 0: ++ maybe_mkdir_p(dr) ++ if not f.endswith(".nii.gz"): ++ f, _ = os.path.splitext(f) ++ f = f + ".nii.gz" ++ cleaned_output_files.append(join(dr, f)) ++ ++ if not overwrite_existing: ++ print("number of cases:", len(list_of_lists)) ++ not_done_idx = [i for i, j in enumerate(cleaned_output_files) if not isfile(j)] ++ ++ cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx] ++ list_of_lists = [list_of_lists[i] for i in not_done_idx] ++ if segs_from_prev_stage is not None: ++ segs_from_prev_stage = [segs_from_prev_stage[i] for i in not_done_idx] ++ ++ print("number of cases that still need to be predicted:", len(cleaned_output_files)) ++ ++ print("emptying cuda cache") ++ torch.cuda.empty_cache() ++ ++ print("loading parameters for folds,", folds) ++ trainer, params = load_model_and_checkpoint_files(model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) ++ ++ if segmentation_export_kwargs is None: ++ if 'segmentation_export_params' in trainer.plans.keys(): ++ force_separate_z = trainer.plans['segmentation_export_params']['force_separate_z'] ++ interpolation_order = trainer.plans['segmentation_export_params']['interpolation_order'] ++ interpolation_order_z = trainer.plans['segmentation_export_params']['interpolation_order_z'] ++ else: ++ force_separate_z = None ++ interpolation_order = 1 ++ interpolation_order_z = 0 ++ else: ++ force_separate_z = segmentation_export_kwargs['force_separate_z'] ++ interpolation_order = segmentation_export_kwargs['interpolation_order'] ++ interpolation_order_z = segmentation_export_kwargs['interpolation_order_z'] ++ ++ print("starting preprocessing generator") ++ preprocessing = preprocess_multithreaded(trainer, list_of_lists, cleaned_output_files, num_threads_preprocessing, ++ segs_from_prev_stage) ++ ++ print("starting prediction...") ++ for preprocessed in preprocessing: ++ print("getting data from preprocessor") ++ output_filename, (d, dct) = preprocessed ++ print("got something") ++ if isinstance(d, str): ++ print("what I got is a string, so I need to load a file") ++ data = np.load(d) ++ os.remove(d) ++ d = data ++ ++ # preallocate the output arrays ++ # same dtype as the return value in predict_preprocessed_data_return_seg_and_softmax (saves time) ++ softmax_aggr = None # np.zeros((trainer.num_classes, *d.shape[1:]), dtype=np.float16) ++ all_seg_outputs = np.zeros((len(params), *d.shape[1:]), dtype=int) ++ print("predicting", output_filename) ++ ++ for i, p in enumerate(params): ++ trainer.load_checkpoint_ram(p, False) ++ ++ res = trainer.predict_preprocessed_data_return_seg_and_softmax(d, do_tta, ++ trainer.data_aug_params['mirror_axes'], True, ++ step_size=step_size, use_gaussian=True, ++ all_in_gpu=all_in_gpu, ++ mixed_precision=mixed_precision) ++ ++ if len(params) > 1: ++ # otherwise we dont need this and we can save ourselves the time it takes to copy that ++ print("aggregating softmax") ++ if softmax_aggr is None: ++ softmax_aggr = res[1] ++ else: ++ softmax_aggr += res[1] ++ all_seg_outputs[i] = res[0] ++ ++ print("obtaining segmentation map") ++ if len(params) > 1: ++ # we dont need to normalize the softmax by 1 / len(params) because this would not change the outcome of the argmax ++ seg = softmax_aggr.argmax(0) ++ else: ++ seg = all_seg_outputs[0] ++ ++ print("applying transpose_backward") ++ transpose_forward = trainer.plans.get('transpose_forward') ++ if transpose_forward is not None: ++ transpose_backward = trainer.plans.get('transpose_backward') ++ seg = seg.transpose([i for i in transpose_backward]) ++ ++ print("initializing segmentation export") ++ results.append(pool.starmap_async(save_segmentation_nifti, ++ ((seg, output_filename, dct, interpolation_order, force_separate_z, ++ interpolation_order_z),) ++ )) ++ print("done") ++ ++ print("inference done. Now waiting for the segmentation export to finish...") ++ _ = [i.get() for i in results] ++ # now apply postprocessing ++ # first load the postprocessing properties if they are present. Else raise a well visible warning ++ results = [] ++ pp_file = join(model, "postprocessing.json") ++ if isfile(pp_file): ++ print("postprocessing...") ++ shutil.copy(pp_file, os.path.dirname(output_filenames[0])) ++ # for_which_classes stores for which of the classes everything but the largest connected component needs to be ++ # removed ++ for_which_classes, min_valid_obj_size = load_postprocessing(pp_file) ++ results.append(pool.starmap_async(load_remove_save, ++ zip(output_filenames, output_filenames, ++ [for_which_classes] * len(output_filenames), ++ [min_valid_obj_size] * len(output_filenames)))) ++ _ = [i.get() for i in results] ++ else: ++ print("WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run " ++ "consolidate_folds in the output folder of the model first!\nThe folder you need to run this in is " ++ "%s" % model) ++ ++ pool.close() ++ pool.join() ++ ++ ++def predict_cases_fastest(model, list_of_lists, output_filenames, folds, num_threads_preprocessing, ++ num_threads_nifti_save, segs_from_prev_stage=None, do_tta=True, mixed_precision=True, ++ overwrite_existing=False, all_in_gpu=True, step_size=0.5, ++ checkpoint_name="model_final_checkpoint"): ++ assert len(list_of_lists) == len(output_filenames) ++ if segs_from_prev_stage is not None: assert len(segs_from_prev_stage) == len(output_filenames) ++ ++ pool = Pool(num_threads_nifti_save) ++ results = [] ++ ++ cleaned_output_files = [] ++ for o in output_filenames: ++ dr, f = os.path.split(o) ++ if len(dr) > 0: ++ maybe_mkdir_p(dr) ++ if not f.endswith(".nii.gz"): ++ f, _ = os.path.splitext(f) ++ f = f + ".nii.gz" ++ cleaned_output_files.append(join(dr, f)) ++ ++ if not overwrite_existing: ++ print("number of cases:", len(list_of_lists)) ++ not_done_idx = [i for i, j in enumerate(cleaned_output_files) if not isfile(j)] ++ ++ cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx] ++ list_of_lists = [list_of_lists[i] for i in not_done_idx] ++ if segs_from_prev_stage is not None: ++ segs_from_prev_stage = [segs_from_prev_stage[i] for i in not_done_idx] ++ ++ print("number of cases that still need to be predicted:", len(cleaned_output_files)) ++ ++ print("emptying cuda cache") ++ torch.cuda.empty_cache() ++ ++ print("loading parameters for folds,", folds) ++ trainer, params = load_model_and_checkpoint_files(model, folds, mixed_precision=mixed_precision, checkpoint_name=checkpoint_name) ++ ++ print("starting preprocessing generator") ++ preprocessing = preprocess_multithreaded(trainer, list_of_lists, cleaned_output_files, num_threads_preprocessing, ++ segs_from_prev_stage) ++ ++ print("starting prediction...") ++ for preprocessed in preprocessing: ++ print("getting data from preprocessor") ++ output_filename, (d, dct) = preprocessed ++ print("got something") ++ if isinstance(d, str): ++ print("what I got is a string, so I need to load a file") ++ data = np.load(d) ++ os.remove(d) ++ d = data ++ ++ # preallocate the output arrays ++ # same dtype as the return value in predict_preprocessed_data_return_seg_and_softmax (saves time) ++ all_softmax_outputs = np.zeros((len(params), trainer.num_classes, *d.shape[1:]), dtype=np.float16) ++ all_seg_outputs = np.zeros((len(params), *d.shape[1:]), dtype=int) ++ print("predicting", output_filename) ++ ++ for i, p in enumerate(params): ++ trainer.load_checkpoint_ram(p, False) ++ res = trainer.predict_preprocessed_data_return_seg_and_softmax(d, do_tta, ++ trainer.data_aug_params['mirror_axes'], True, ++ step_size=step_size, use_gaussian=True, ++ all_in_gpu=all_in_gpu, ++ mixed_precision=mixed_precision) ++ if len(params) > 1: ++ # otherwise we dont need this and we can save ourselves the time it takes to copy that ++ all_softmax_outputs[i] = res[1] ++ all_seg_outputs[i] = res[0] ++ ++ print("aggregating predictions") ++ if len(params) > 1: ++ softmax_mean = np.mean(all_softmax_outputs, 0) ++ seg = softmax_mean.argmax(0) ++ else: ++ seg = all_seg_outputs[0] ++ ++ print("applying transpose_backward") ++ transpose_forward = trainer.plans.get('transpose_forward') ++ if transpose_forward is not None: ++ transpose_backward = trainer.plans.get('transpose_backward') ++ seg = seg.transpose([i for i in transpose_backward]) ++ ++ print("initializing segmentation export") ++ results.append(pool.starmap_async(save_segmentation_nifti, ++ ((seg, output_filename, dct, 0, None),) ++ )) ++ print("done") ++ ++ print("inference done. Now waiting for the segmentation export to finish...") ++ _ = [i.get() for i in results] ++ # now apply postprocessing ++ # first load the postprocessing properties if they are present. Else raise a well visible warning ++ results = [] ++ pp_file = join(model, "postprocessing.json") ++ if isfile(pp_file): ++ print("postprocessing...") ++ shutil.copy(pp_file, os.path.dirname(output_filenames[0])) ++ # for_which_classes stores for which of the classes everything but the largest connected component needs to be ++ # removed ++ for_which_classes, min_valid_obj_size = load_postprocessing(pp_file) ++ results.append(pool.starmap_async(load_remove_save, ++ zip(output_filenames, output_filenames, ++ [for_which_classes] * len(output_filenames), ++ [min_valid_obj_size] * len(output_filenames)))) ++ _ = [i.get() for i in results] ++ else: ++ print("WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run " ++ "consolidate_folds in the output folder of the model first!\nThe folder you need to run this in is " ++ "%s" % model) ++ ++ pool.close() ++ pool.join() ++ ++ ++def check_input_folder_and_return_caseIDs(input_folder, expected_num_modalities): ++ print("This model expects %d input modalities for each image" % expected_num_modalities) ++ files = subfiles(input_folder, suffix=".nii.gz", join=False, sort=True) ++ ++ maybe_case_ids = np.unique([i[:-12] for i in files]) ++ ++ remaining = deepcopy(files) ++ missing = [] ++ ++ assert len(files) > 0, "input folder did not contain any images (expected to find .nii.gz file endings)" ++ ++ # now check if all required files are present and that no unexpected files are remaining ++ for c in maybe_case_ids: ++ for n in range(expected_num_modalities): ++ expected_output_file = c + "_%04.0d.nii.gz" % n ++ if not isfile(join(input_folder, expected_output_file)): ++ missing.append(expected_output_file) ++ else: ++ remaining.remove(expected_output_file) ++ ++ print("Found %d unique case ids, here are some examples:" % len(maybe_case_ids), ++ np.random.choice(maybe_case_ids, min(len(maybe_case_ids), 10))) ++ print("If they don't look right, make sure to double check your filenames. They must end with _0000.nii.gz etc") ++ ++ if len(remaining) > 0: ++ print("found %d unexpected remaining files in the folder. Here are some examples:" % len(remaining), ++ np.random.choice(remaining, min(len(remaining), 10))) ++ ++ if len(missing) > 0: ++ print("Some files are missing:") ++ print(missing) ++ raise RuntimeError("missing files in input_folder") ++ ++ return maybe_case_ids ++ ++ ++def predict_from_folder(model: str, input_folder: str, output_folder: str, folds: Union[Tuple[int], List[int]], ++ save_npz: bool, num_threads_preprocessing: int, num_threads_nifti_save: int, ++ lowres_segmentations: Union[str, None], ++ part_id: int, num_parts: int, tta: bool, mixed_precision: bool = True, ++ overwrite_existing: bool = True, mode: str = 'normal', overwrite_all_in_gpu: bool = None, ++ step_size: float = 0.5, checkpoint_name: str = "model_final_checkpoint", ++ segmentation_export_kwargs: dict = None, pre_mode=None, fp=None): ++ """ ++ here we use the standard naming scheme to generate list_of_lists and output_files needed by predict_cases ++ ++ :param model: ++ :param input_folder: ++ :param output_folder: ++ :param folds: ++ :param save_npz: ++ :param num_threads_preprocessing: ++ :param num_threads_nifti_save: ++ :param lowres_segmentations: ++ :param part_id: ++ :param num_parts: ++ :param tta: ++ :param mixed_precision: ++ :param overwrite_existing: if not None then it will be overwritten with whatever is in there. None is default (no overwrite) ++ :return: ++ """ ++ maybe_mkdir_p(output_folder) ++ shutil.copy(join(model, 'plans.pkl'), output_folder) ++ ++ assert isfile(join(model, "plans.pkl")), "Folder with saved model weights must contain a plans.pkl file" ++ expected_num_modalities = load_pickle(join(model, "plans.pkl"))['num_modalities'] ++ ++ # check input folder integrity ++ case_ids = check_input_folder_and_return_caseIDs(input_folder, expected_num_modalities) ++ ++ output_files = [join(output_folder, i + ".nii.gz") for i in case_ids] ++ all_files = subfiles(input_folder, suffix=".nii.gz", join=False, sort=True) ++ list_of_lists = [[join(input_folder, i) for i in all_files if i[:len(j)].startswith(j) and ++ len(i) == (len(j) + 12)] for j in case_ids] ++ ++ if lowres_segmentations is not None: ++ assert isdir(lowres_segmentations), "if lowres_segmentations is not None then it must point to a directory" ++ lowres_segmentations = [join(lowres_segmentations, i + ".nii.gz") for i in case_ids] ++ assert all([isfile(i) for i in lowres_segmentations]), "not all lowres_segmentations files are present. " \ ++ "(I was searching for case_id.nii.gz in that folder)" ++ lowres_segmentations = lowres_segmentations[part_id::num_parts] ++ else: ++ lowres_segmentations = None ++ ++ if mode == "normal": # step this ++ if overwrite_all_in_gpu is None: # True ++ all_in_gpu = False ++ else: ++ all_in_gpu = overwrite_all_in_gpu ++ ++ return predict_cases(model, list_of_lists[part_id::num_parts], output_files[part_id::num_parts], folds, ++ save_npz, num_threads_preprocessing, num_threads_nifti_save, lowres_segmentations, tta, ++ mixed_precision=mixed_precision, overwrite_existing=overwrite_existing, all_in_gpu=all_in_gpu, ++ step_size=step_size, checkpoint_name=checkpoint_name, ++ segmentation_export_kwargs=segmentation_export_kwargs, pre_mode=pre_mode, fp=fp) ++ elif mode == "fast": ++ if overwrite_all_in_gpu is None: ++ all_in_gpu = True ++ else: ++ all_in_gpu = overwrite_all_in_gpu ++ ++ assert save_npz is False ++ return predict_cases_fast(model, list_of_lists[part_id::num_parts], output_files[part_id::num_parts], folds, ++ num_threads_preprocessing, num_threads_nifti_save, lowres_segmentations, ++ tta, mixed_precision=mixed_precision, overwrite_existing=overwrite_existing, all_in_gpu=all_in_gpu, ++ step_size=step_size, checkpoint_name=checkpoint_name, ++ segmentation_export_kwargs=segmentation_export_kwargs) ++ elif mode == "fastest": ++ if overwrite_all_in_gpu is None: ++ all_in_gpu = True ++ else: ++ all_in_gpu = overwrite_all_in_gpu ++ ++ assert save_npz is False ++ return predict_cases_fastest(model, list_of_lists[part_id::num_parts], output_files[part_id::num_parts], folds, ++ num_threads_preprocessing, num_threads_nifti_save, lowres_segmentations, ++ tta, mixed_precision=mixed_precision, overwrite_existing=overwrite_existing, all_in_gpu=all_in_gpu, ++ step_size=step_size, checkpoint_name=checkpoint_name) ++ else: ++ raise ValueError("unrecognized mode. Must be normal, fast or fastest") ++ ++ ++if __name__ == "__main__": ++ parser = argparse.ArgumentParser() ++ parser.add_argument("-i", '--input_folder', help="Must contain all modalities for each patient in the correct" ++ " order (same as training). Files must be named " ++ "CASENAME_XXXX.nii.gz where XXXX is the modality " ++ "identifier (0000, 0001, etc)", required=True) ++ parser.add_argument('-o', "--output_folder", required=True, help="folder for saving predictions") ++ parser.add_argument('-m', '--model_output_folder', ++ help='model output folder. Will automatically discover the folds ' ++ 'that were ' ++ 'run and use those as an ensemble', required=True) ++ parser.add_argument('-f', '--folds', nargs='+', default='None', help="folds to use for prediction. Default is None " ++ "which means that folds will be detected " ++ "automatically in the model output folder") ++ parser.add_argument('-z', '--save_npz', required=False, action='store_true', help="use this if you want to ensemble" ++ " these predictions with those of" ++ " other models. Softmax " ++ "probabilities will be saved as " ++ "compresed numpy arrays in " ++ "output_folder and can be merged " ++ "between output_folders with " ++ "merge_predictions.py") ++ parser.add_argument('-l', '--lowres_segmentations', required=False, default='None', help="if model is the highres " ++ "stage of the cascade then you need to use -l to specify where the segmentations of the " ++ "corresponding lowres unet are. Here they are required to do a prediction") ++ parser.add_argument("--part_id", type=int, required=False, default=0, help="Used to parallelize the prediction of " ++ "the folder over several GPUs. If you " ++ "want to use n GPUs to predict this " ++ "folder you need to run this command " ++ "n times with --part_id=0, ... n-1 and " ++ "--num_parts=n (each with a different " ++ "GPU (for example via " ++ "CUDA_VISIBLE_DEVICES=X)") ++ parser.add_argument("--num_parts", type=int, required=False, default=1, ++ help="Used to parallelize the prediction of " ++ "the folder over several GPUs. If you " ++ "want to use n GPUs to predict this " ++ "folder you need to run this command " ++ "n times with --part_id=0, ... n-1 and " ++ "--num_parts=n (each with a different " ++ "GPU (via " ++ "CUDA_VISIBLE_DEVICES=X)") ++ parser.add_argument("--num_threads_preprocessing", required=False, default=6, type=int, help= ++ "Determines many background processes will be used for data preprocessing. Reduce this if you " ++ "run into out of memory (RAM) problems. Default: 6") ++ parser.add_argument("--num_threads_nifti_save", required=False, default=2, type=int, help= ++ "Determines many background processes will be used for segmentation export. Reduce this if you " ++ "run into out of memory (RAM) problems. Default: 2") ++ parser.add_argument("--tta", required=False, type=int, default=1, help="Set to 0 to disable test time data " ++ "augmentation (speedup of factor " ++ "4(2D)/8(3D)), " ++ "lower quality segmentations") ++ parser.add_argument("--overwrite_existing", required=False, type=int, default=1, help="Set this to 0 if you need " ++ "to resume a previous " ++ "prediction. Default: 1 " ++ "(=existing segmentations " ++ "in output_folder will be " ++ "overwritten)") ++ parser.add_argument("--mode", type=str, default="normal", required=False) ++ parser.add_argument("--all_in_gpu", type=str, default="None", required=False, help="can be None, False or True") ++ parser.add_argument("--step_size", type=float, default=0.5, required=False, help="don't touch") ++ # parser.add_argument("--interp_order", required=False, default=3, type=int, ++ # help="order of interpolation for segmentations, has no effect if mode=fastest") ++ # parser.add_argument("--interp_order_z", required=False, default=0, type=int, ++ # help="order of interpolation along z is z is done differently") ++ # parser.add_argument("--force_separate_z", required=False, default="None", type=str, ++ # help="force_separate_z resampling. Can be None, True or False, has no effect if mode=fastest") ++ parser.add_argument('--disable_mixed_precision', default=False, action='store_true', required=False, ++ help='Predictions are done with mixed precision by default. This improves speed and reduces ' ++ 'the required vram. If you want to disable mixed precision you can set this flag. Note ' ++ 'that yhis is not recommended (mixed precision is ~2x faster!)') ++ ++ args = parser.parse_args() ++ input_folder = args.input_folder ++ output_folder = args.output_folder ++ part_id = args.part_id ++ num_parts = args.num_parts ++ model = args.model_output_folder ++ folds = args.folds ++ save_npz = args.save_npz ++ lowres_segmentations = args.lowres_segmentations ++ num_threads_preprocessing = args.num_threads_preprocessing ++ num_threads_nifti_save = args.num_threads_nifti_save ++ tta = args.tta ++ step_size = args.step_size ++ ++ # interp_order = args.interp_order ++ # interp_order_z = args.interp_order_z ++ # force_separate_z = args.force_separate_z ++ ++ # if force_separate_z == "None": ++ # force_separate_z = None ++ # elif force_separate_z == "False": ++ # force_separate_z = False ++ # elif force_separate_z == "True": ++ # force_separate_z = True ++ # else: ++ # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) ++ ++ overwrite = args.overwrite_existing ++ mode = args.mode ++ all_in_gpu = args.all_in_gpu ++ ++ if lowres_segmentations == "None": ++ lowres_segmentations = None ++ ++ if isinstance(folds, list): ++ if folds[0] == 'all' and len(folds) == 1: ++ pass ++ else: ++ folds = [int(i) for i in folds] ++ elif folds == "None": ++ folds = None ++ else: ++ raise ValueError("Unexpected value for argument folds") ++ ++ if tta == 0: ++ tta = False ++ elif tta == 1: ++ tta = True ++ else: ++ raise ValueError("Unexpected value for tta, Use 1 or 0") ++ ++ if overwrite == 0: ++ overwrite = False ++ elif overwrite == 1: ++ overwrite = True ++ else: ++ raise ValueError("Unexpected value for overwrite, Use 1 or 0") ++ ++ assert all_in_gpu in ['None', 'False', 'True'] ++ if all_in_gpu == "None": ++ all_in_gpu = None ++ elif all_in_gpu == "True": ++ all_in_gpu = True ++ elif all_in_gpu == "False": ++ all_in_gpu = False ++ ++ predict_from_folder(model, input_folder, output_folder, folds, save_npz, num_threads_preprocessing, ++ num_threads_nifti_save, lowres_segmentations, part_id, num_parts, tta, mixed_precision=not args.disable_mixed_precision, ++ overwrite_existing=overwrite, mode=mode, overwrite_all_in_gpu=all_in_gpu, step_size=step_size) +diff --git a/pytorch/nnunet/inference/predict_simple2.py b/pytorch/nnunet/inference/predict_simple2.py +new file mode 100644 +index 0000000..2af423e +--- /dev/null ++++ b/pytorch/nnunet/inference/predict_simple2.py +@@ -0,0 +1,238 @@ ++# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++ ++import argparse ++import torch ++ ++from nnunet.inference.predict2 import predict_from_folder ++from nnunet.paths import default_plans_identifier, network_training_output_dir, default_cascade_trainer, default_trainer ++from batchgenerators.utilities.file_and_folder_operations import join, isdir ++from nnunet.utilities.task_name_id_conversion import convert_id_to_task_name ++from nnunet.inference.infer_path import INFERENCE_INPUT_FOLDER, INFERENCE_OUTPUT_FOLDER ++ ++ ++def main(): ++ parser = argparse.ArgumentParser() ++ parser.add_argument("-i", '--input_folder', help="Must contain all modalities for each patient in the correct" ++ " order (same as training). Files must be named " ++ "CASENAME_XXXX.nii.gz where XXXX is the modality " ++ "identifier (0000, 0001, etc)", required=False, ++ default=INFERENCE_INPUT_FOLDER) ++ parser.add_argument('-o', "--output_folder", required=False, ++ default=INFERENCE_OUTPUT_FOLDER, help="folder for saving predictions") ++ parser.add_argument('-t', '--task_name', help='task name or task ID, required.', ++ default="Task003_Liver", required=False) ++ parser.add_argument('-pm', '--pre_mode', help='predict mode', required=False, default=-1) ++ parser.add_argument('-fp', '--file_path', help='input or output file path for npu bin files', required=True) ++ parser.add_argument('-tr', '--trainer_class_name', ++ help='Name of the nnUNetTrainer used for 2D U-Net, full resolution 3D U-Net and low resolution ' ++ 'U-Net. The default is %s. If you are running inference with the cascade and the folder ' ++ 'pointed to by --lowres_segmentations does not contain the segmentation maps generated by ' ++ 'the low resolution U-Net then the low resolution segmentation maps will be automatically ' ++ 'generated. For this case, make sure to set the trainer class here that matches your ' ++ '--cascade_trainer_class_name (this part can be ignored if defaults are used).' ++ % default_trainer, ++ required=False, ++ default="nnUNetPlusPlusTrainerV2") ++ parser.add_argument('-ctr', '--cascade_trainer_class_name', ++ help="Trainer class name used for predicting the 3D full resolution U-Net part of the cascade." ++ "Default is %s" % default_cascade_trainer, required=False, ++ default=default_cascade_trainer) ++ ++ parser.add_argument('-m', '--model', help="2d, 3d_lowres, 3d_fullres or 3d_cascade_fullres. Default: 3d_fullres", ++ default="3d_fullres", required=False) ++ ++ parser.add_argument('-p', '--plans_identifier', help='do not touch this unless you know what you are doing', ++ default=default_plans_identifier, required=False) ++ ++ parser.add_argument('-f', '--folds', nargs='+', default="None", ++ help="folds to use for prediction. Default is None which means that folds will be detected " ++ "automatically in the model output folder") ++ ++ parser.add_argument('-z', '--save_npz', required=False, action='store_true', ++ help="use this if you want to ensemble these predictions with those of other models. Softmax " ++ "probabilities will be saved as compressed numpy arrays in output_folder and can be " ++ "merged between output_folders with nnUNet_ensemble_predictions") ++ ++ parser.add_argument('-l', '--lowres_segmentations', required=False, default='None', ++ help="if model is the highres stage of the cascade then you can use this folder to provide " ++ "predictions from the low resolution 3D U-Net. If this is left at default, the " ++ "predictions will be generated automatically (provided that the 3D low resolution U-Net " ++ "network weights are present") ++ ++ parser.add_argument("--part_id", type=int, required=False, default=0, help="Used to parallelize the prediction of " ++ "the folder over several GPUs. If you " ++ "want to use n GPUs to predict this " ++ "folder you need to run this command " ++ "n times with --part_id=0, ... n-1 and " ++ "--num_parts=n (each with a different " ++ "GPU (for example via " ++ "CUDA_VISIBLE_DEVICES=X)") ++ ++ parser.add_argument("--num_parts", type=int, required=False, default=1, ++ help="Used to parallelize the prediction of " ++ "the folder over several GPUs. If you " ++ "want to use n GPUs to predict this " ++ "folder you need to run this command " ++ "n times with --part_id=0, ... n-1 and " ++ "--num_parts=n (each with a different " ++ "GPU (via " ++ "CUDA_VISIBLE_DEVICES=X)") ++ ++ parser.add_argument("--num_threads_preprocessing", required=False, default=6, type=int, help= ++ "Determines many background processes will be used for data preprocessing. Reduce this if you " ++ "run into out of memory (RAM) problems. Default: 6") ++ ++ parser.add_argument("--num_threads_nifti_save", required=False, default=2, type=int, help= ++ "Determines many background processes will be used for segmentation export. Reduce this if you " ++ "run into out of memory (RAM) problems. Default: 2") ++ ++ parser.add_argument("--disable_tta", required=False, default=False, action="store_true", ++ help="set this flag to disable test time data augmentation via mirroring. Speeds up inference " ++ "by roughly factor 4 (2D) or 8 (3D)") ++ ++ parser.add_argument("--overwrite_existing", required=False, default=False, action="store_true", ++ help="Set this flag if the target folder contains predictions that you would like to overwrite") ++ ++ parser.add_argument("--mode", type=str, default="normal", required=False, help="Hands off!") ++ parser.add_argument("--all_in_gpu", type=str, default="None", required=False, help="can be None, False or True. " ++ "Do not touch.") ++ parser.add_argument("--step_size", type=float, default=0.5, required=False, help="don't touch") ++ # parser.add_argument("--interp_order", required=False, default=3, type=int, ++ # help="order of interpolation for segmentations, has no effect if mode=fastest. Do not touch this.") ++ # parser.add_argument("--interp_order_z", required=False, default=0, type=int, ++ # help="order of interpolation along z is z is done differently. Do not touch this.") ++ # parser.add_argument("--force_separate_z", required=False, default="None", type=str, ++ # help="force_separate_z resampling. Can be None, True or False, has no effect if mode=fastest. " ++ # "Do not touch this.") ++ parser.add_argument('-chk', ++ help='checkpoint name, default: model_final_checkpoint', ++ required=False, ++ default='model_final_checkpoint') ++ parser.add_argument('--disable_mixed_precision', default=False, action='store_true', required=False, ++ help='Predictions are done with mixed precision by default. This improves speed and reduces ' ++ 'the required vram. If you want to disable mixed precision you can set this flag. Note ' ++ 'that yhis is not recommended (mixed precision is ~2x faster!)') ++ ++ args = parser.parse_args() ++ print(args) ++ ++ input_folder = args.input_folder ++ output_folder = args.output_folder ++ part_id = args.part_id ++ # 推理模式 ++ pre_mode = args.pre_mode ++ fp = args.file_path ++ num_parts = args.num_parts ++ folds = args.folds ++ save_npz = args.save_npz ++ lowres_segmentations = args.lowres_segmentations ++ num_threads_preprocessing = args.num_threads_preprocessing ++ num_threads_nifti_save = args.num_threads_nifti_save ++ disable_tta = args.disable_tta ++ step_size = args.step_size ++ # interp_order = args.interp_order ++ # interp_order_z = args.interp_order_z ++ # force_separate_z = args.force_separate_z ++ overwrite_existing = args.overwrite_existing ++ mode = args.mode ++ all_in_gpu = args.all_in_gpu ++ model = args.model ++ trainer_class_name = args.trainer_class_name ++ cascade_trainer_class_name = args.cascade_trainer_class_name ++ ++ task_name = args.task_name ++ ++ if not task_name.startswith("Task"): ++ task_id = int(task_name) ++ task_name = convert_id_to_task_name(task_id) ++ ++ assert model in ["2d", "3d_lowres", "3d_fullres", "3d_cascade_fullres"], "-m must be 2d, 3d_lowres, 3d_fullres or " \ ++ "3d_cascade_fullres" ++ ++ # if force_separate_z == "None": ++ # force_separate_z = None ++ # elif force_separate_z == "False": ++ # force_separate_z = False ++ # elif force_separate_z == "True": ++ # force_separate_z = True ++ # else: ++ # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) ++ ++ if lowres_segmentations == "None": ++ lowres_segmentations = None ++ ++ if isinstance(folds, list): ++ if folds[0] == 'all' and len(folds) == 1: ++ pass ++ else: ++ folds = [int(i) for i in folds] ++ elif folds == "None": ++ folds = None ++ else: ++ raise ValueError("Unexpected value for argument folds") ++ ++ assert all_in_gpu in ['None', 'False', 'True'] ++ if all_in_gpu == "None": ++ all_in_gpu = None ++ elif all_in_gpu == "True": ++ all_in_gpu = True ++ elif all_in_gpu == "False": ++ all_in_gpu = False ++ ++ # we need to catch the case where model is 3d cascade fullres and the low resolution folder has not been set. ++ # In that case we need to try and predict with 3d low res first ++ if model == "3d_cascade_fullres" and lowres_segmentations is None: ++ print("lowres_segmentations is None. Attempting to predict 3d_lowres first...") ++ assert part_id == 0 and num_parts == 1, "if you don't specify a --lowres_segmentations folder for the " \ ++ "inference of the cascade, custom values for part_id and num_parts " \ ++ "are not supported. If you wish to have multiple parts, please " \ ++ "run the 3d_lowres inference first (separately)" ++ model_folder_name = join(network_training_output_dir, "3d_lowres", task_name, trainer_class_name + "__" + ++ args.plans_identifier) ++ assert isdir(model_folder_name), "model output folder not found. Expected: %s" % model_folder_name ++ lowres_output_folder = join(output_folder, "3d_lowres_predictions") ++ predict_from_folder(model_folder_name, input_folder, lowres_output_folder, folds, False, ++ num_threads_preprocessing, num_threads_nifti_save, None, part_id, num_parts, not disable_tta, ++ overwrite_existing=overwrite_existing, mode=mode, overwrite_all_in_gpu=all_in_gpu, ++ mixed_precision=not args.disable_mixed_precision, ++ step_size=step_size) ++ lowres_segmentations = lowres_output_folder ++ torch.cuda.empty_cache() ++ print("3d_lowres done") ++ ++ if model == "3d_cascade_fullres": ++ trainer = cascade_trainer_class_name ++ else: ++ trainer = trainer_class_name ++ print(network_training_output_dir) ++ print(model) ++ print(task_name) ++ print(trainer) ++ print(args.plans_identifier) ++ model_folder_name = join(network_training_output_dir, model, task_name, trainer + "__" + ++ args.plans_identifier) ++ print("using model stored in ", model_folder_name) ++ assert isdir(model_folder_name), "model output folder not found. Expected: %s" % model_folder_name ++ ++ predict_from_folder(model_folder_name, input_folder, output_folder, folds, save_npz, num_threads_preprocessing, ++ num_threads_nifti_save, lowres_segmentations, part_id, num_parts, not disable_tta, ++ overwrite_existing=overwrite_existing, mode=mode, overwrite_all_in_gpu=all_in_gpu, ++ mixed_precision=not args.disable_mixed_precision, ++ step_size=step_size, checkpoint_name=args.chk, pre_mode=pre_mode, fp=fp) ++ ++ ++if __name__ == "__main__": ++ main() +diff --git a/pytorch/nnunet/inference/read_bin.py b/pytorch/nnunet/inference/read_bin.py +new file mode 100644 +index 0000000..972d940 +--- /dev/null ++++ b/pytorch/nnunet/inference/read_bin.py +@@ -0,0 +1,30 @@ ++import numpy ++import pdb ++import os ++ ++ ++def read_from_bin(file_name, folder_path='/root/heyupeng/result/dumpOutput_device0/'): ++ file = os.path.join(folder_path, file_name) ++ data = numpy.fromfile(file, dtype='float32') ++ data = data.reshape(3, 128, 128, 128) ++ return data ++ ++ ++def main(): ++ file = 'liver_132_0_128_0_128_0_128_1.bin' ++ print('ready to load:', file) ++ data = numpy.fromfile(file, dtype='float32') ++ data = data.reshape(3, 128, 128, 128) ++ pdb.set_trace() ++ print(data.shape) ++ for i in range(5): ++ print(data[0, 0, 0, i*7:(i+1)*7]) ++ print('-----') ++ for i in range(5): ++ print(data[0, 0, 0, i*7+50:(i+1)*7+50]) ++ pdb.set_trace() ++ print('end\n') ++ ++ ++if __name__ == "__main__": ++ main() +\ No newline at end of file +diff --git a/pytorch/nnunet/inference/read_pkl_file.py b/pytorch/nnunet/inference/read_pkl_file.py +new file mode 100644 +index 0000000..5dcc37b +--- /dev/null ++++ b/pytorch/nnunet/inference/read_pkl_file.py +@@ -0,0 +1,22 @@ ++import numpy ++import pdb ++import os ++import pickle ++ ++ ++def read_pkl(file_name, folder_path='/data/yupeng/environment_variables/nnUNet_preprocessed/Task003_Liver/'): ++ file = os.path.join(folder_path, file_name) ++ data = open(file, 'rb') ++ data = pickle.load(data) ++ return data ++ ++ ++def main(): ++ file = 'dataset_properties.pkl' ++ print('ready to load:', file) ++ data = read_pkl(file) ++ print('end\n') ++ ++ ++if __name__ == "__main__": ++ main() +\ No newline at end of file +diff --git a/pytorch/nnunet/inference/read_txt.py b/pytorch/nnunet/inference/read_txt.py +new file mode 100644 +index 0000000..37c94aa +--- /dev/null ++++ b/pytorch/nnunet/inference/read_txt.py +@@ -0,0 +1,29 @@ ++import numpy ++import pdb ++import os ++ ++ ++def read_from_bin(file_name, folder_path='/root/heyupeng/result/dumpOutput_device0/'): ++ file = os.path.join(folder_path, file_name) ++ data = numpy.loadtxt(file) ++ data = data.reshape(3, 128, 128, 128) ++ return data ++ ++ ++def main(): ++ file = 'liver_132_0_128_0_128_0_128_1.txt' ++ print('ready to load:', file) ++ data = numpy.loadtxt(file) ++ data = data.reshape(3, 128, 128, 128) ++ pdb.set_trace() ++ print(data.shape) ++ for i in range(5): ++ print(data[0, 0, 0, i*7:(i+1)*7]) ++ print('-----') ++ for i in range(5): ++ print(data[0, 0, 0, i*7+50:(i+1)*7+50]) ++ pdb.set_trace() ++ print('end\n') ++ ++if __name__ == "__main__": ++ main() +diff --git a/pytorch/nnunet/network_architecture/generic_UNetPlusPlus.py b/pytorch/nnunet/network_architecture/generic_UNetPlusPlus.py +index 5c2f816..5b831ea 100644 +--- a/pytorch/nnunet/network_architecture/generic_UNetPlusPlus.py ++++ b/pytorch/nnunet/network_architecture/generic_UNetPlusPlus.py +@@ -21,7 +21,8 @@ import numpy as np + from nnunet.network_architecture.initialization import InitWeights_He + from nnunet.network_architecture.neural_network import SegmentationNetwork + import torch.nn.functional +- ++import pdb ++# pdb.set_trace() + + class ConvDropoutNormNonlin(nn.Module): + """ +@@ -393,7 +394,7 @@ class Generic_UNetPlusPlus(SegmentationNetwork): + + def forward(self, x): + # skips = [] +- seg_outputs = [] ++ seg_outputs = [] # x是五维的 + x0_0 = self.conv_blocks_context[0](x) + x1_0 = self.conv_blocks_context[1](x0_0) + x0_1 = self.loc4[0](torch.cat([x0_0, self.up4[0](x1_0)], 1)) +@@ -425,7 +426,7 @@ class Generic_UNetPlusPlus(SegmentationNetwork): + x0_5 = self.loc0[4](torch.cat([x0_0, x0_1, x0_2, x0_3, x0_4, self.up0[4](x1_4)], 1)) + seg_outputs.append(self.final_nonlin(self.seg_outputs[-5](x0_5))) + +- if self._deep_supervision and self.do_ds: ++ if self._deep_supervision and self.do_ds: # False + return tuple([seg_outputs[-1]] + [i(j) for i, j in + zip(list(self.upscale_logits_ops)[::-1], seg_outputs[:-1][::-1])]) + else: +diff --git a/pytorch/nnunet/network_architecture/neural_network.py b/pytorch/nnunet/network_architecture/neural_network.py +index baa8a05..9425fe9 100644 +--- a/pytorch/nnunet/network_architecture/neural_network.py ++++ b/pytorch/nnunet/network_architecture/neural_network.py +@@ -21,8 +21,14 @@ from torch import nn + import torch + from scipy.ndimage.filters import gaussian_filter + from typing import Union, Tuple, List ++import os + + from torch.cuda.amp import autocast ++import pdb ++from glob import glob ++import time ++from nnunet.inference.read_bin import read_from_bin ++from nnunet.inference.infer_path import INFERENCE_SHAPE_PATH, INFERENCE_BIN_INPUT_FOLDER, INFERENCE_BIN_OUTPUT_FOLDER + + + class NeuralNetwork(nn.Module): +@@ -75,7 +81,8 @@ class SegmentationNetwork(NeuralNetwork): + step_size: float = 0.5, patch_size: Tuple[int, ...] = None, regions_class_order: Tuple[int, ...] = None, + use_gaussian: bool = False, pad_border_mode: str = "constant", + pad_kwargs: dict = None, all_in_gpu: bool = False, +- verbose: bool = True, mixed_precision: bool = True) -> Tuple[np.ndarray, np.ndarray]: ++ verbose: bool = True, mixed_precision: bool = True, img_name=None, ++ pre_mode=None, fp=None) -> Tuple[np.ndarray, np.ndarray]: + """ + Use this function to predict a 3D image. It does not matter whether the network is a 2D or 3D U-Net, it will + detect that automatically and run the appropriate code. +@@ -133,7 +140,7 @@ class SegmentationNetwork(NeuralNetwork): + + assert len(x.shape) == 4, "data must have shape (c,x,y,z)" + +- if mixed_precision: ++ if mixed_precision: # True + context = autocast + else: + context = no_op +@@ -141,11 +148,11 @@ class SegmentationNetwork(NeuralNetwork): + with context(): + with torch.no_grad(): + if self.conv_op == nn.Conv3d: +- if use_sliding_window: ++ if use_sliding_window: # 走到这里 + res = self._internal_predict_3D_3Dconv_tiled(x, step_size, do_mirroring, mirror_axes, patch_size, + regions_class_order, use_gaussian, pad_border_mode, + pad_kwargs=pad_kwargs, all_in_gpu=all_in_gpu, +- verbose=verbose) ++ verbose=verbose, img_name=img_name, pre_mode=pre_mode, fp=fp) + else: + res = self._internal_predict_3D_3Dconv(x, patch_size, do_mirroring, mirror_axes, regions_class_order, + pad_border_mode, pad_kwargs=pad_kwargs, verbose=verbose) +@@ -284,19 +291,161 @@ class SegmentationNetwork(NeuralNetwork): + + return steps + ++ # def _internal_predict_3D_3Dconv_tiled(self, x: np.ndarray, step_size: float, do_mirroring: bool, mirror_axes: tuple, ++ # patch_size: tuple, regions_class_order: tuple, use_gaussian: bool, ++ # pad_border_mode: str, pad_kwargs: dict, all_in_gpu: bool, ++ # verbose: bool, img_name=None) -> Tuple[np.ndarray, np.ndarray]: ++ # # better safe than sorry ++ # assert len(x.shape) == 4, "x must be (c, x, y, z)" ++ # assert self.get_device() != "cpu" ++ # if verbose: print("step_size:", step_size) # 0.5 ++ # if verbose: print("do mirror:", do_mirroring) # True ++ # ++ # torch.cuda.empty_cache() ++ # ++ # assert patch_size is not None, "patch_size cannot be None for tiled prediction" # 128, 128, 128 ++ # ++ # # for sliding window inference the image must at least be as large as the patch size. It does not matter ++ # # whether the shape is divisible by 2**num_pool as long as the patch size is ++ # data, slicer = pad_nd_image(x, patch_size, pad_border_mode, pad_kwargs, True, None) ++ # data_shape = data.shape # still c, x, y, z + # + # # compute the steps for sliding window + # steps = self._compute_steps_for_sliding_window(patch_size, data_shape[1:], step_size) # 计算窗口 @@ -2277,1371 +2277,1371 @@ index baa8a05..9425fe9 100644 + if all_in_gpu: # False if verbose: print("copying results to CPU") - if regions_class_order is None: -@@ -419,7 +713,7 @@ class SegmentationNetwork(NeuralNetwork): + if regions_class_order is None: +@@ -419,7 +713,7 @@ class SegmentationNetwork(NeuralNetwork): + + class_probabilities = class_probabilities.detach().cpu().numpy() + +- if verbose: print("prediction done") ++ if verbose: print("prediction done") # True + return predicted_segmentation, class_probabilities + + def _internal_predict_2D_2Dconv(self, x: np.ndarray, min_size: Tuple[int, int], do_mirroring: bool, +@@ -504,54 +798,69 @@ class SegmentationNetwork(NeuralNetwork): + assert len(x.shape) == 5, 'x must be (b, c, x, y, z)' + # everything in here takes place on the GPU. If x and mult are not yet on GPU this will be taken care of here + # we now return a cuda tensor! Not numpy array! +- +- x = to_cuda(maybe_to_torch(x), gpu_id=self.get_device()) ++ def print_mytensor(data): ++ shape = data.shape[0] ++ for s in range(shape): ++ for i in range(3): ++ print(data[s, 0, 0, i * 3:(i + 1) * 3]) ++ for i in range(3): ++ print(data[s, 0, 0, i * 3 + 50:(i + 1) * 3 + 50]) ++ print('-----') ++ x = to_cuda(maybe_to_torch(x), gpu_id=self.get_device()) # ndarray, 1 1 128 128 128,之后变成tensor + result_torch = torch.zeros([1, self.num_classes] + list(x.shape[2:]), +- dtype=torch.float).cuda(self.get_device(), non_blocking=True) ++ dtype=torch.float).cuda(self.get_device(), non_blocking=True) # 1 3 128 128 128,全是0 + + if mult is not None: +- mult = to_cuda(maybe_to_torch(mult), gpu_id=self.get_device()) ++ mult = to_cuda(maybe_to_torch(mult), gpu_id=self.get_device()) # tensor, 128 128 128 + +- if do_mirroring: ++ if do_mirroring: # True + mirror_idx = 8 +- num_results = 2 ** len(mirror_axes) ++ num_results = 2 ** len(mirror_axes) # 8 + else: + mirror_idx = 1 + num_results = 1 + for m in range(mirror_idx): + if m == 0: +- pred = self.inference_apply_nonlin(self(x)) +- result_torch += 1 / num_results * pred ++ y = self(x) # tensor, 1 3 128 128 128 ++ pred = self.inference_apply_nonlin(y) # 1 3 128 128 128 ++ result_torch += 1 / num_results * pred # 1 3 128 128 128 + + if m == 1 and (2 in mirror_axes): +- pred = self.inference_apply_nonlin(self(torch.flip(x, (4, )))) ++ y = self(torch.flip(x, (4, ))) ++ pred = self.inference_apply_nonlin(y) + result_torch += 1 / num_results * torch.flip(pred, (4,)) + + if m == 2 and (1 in mirror_axes): +- pred = self.inference_apply_nonlin(self(torch.flip(x, (3, )))) ++ y = self(torch.flip(x, (3, ))) ++ pred = self.inference_apply_nonlin(y) + result_torch += 1 / num_results * torch.flip(pred, (3,)) + + if m == 3 and (2 in mirror_axes) and (1 in mirror_axes): +- pred = self.inference_apply_nonlin(self(torch.flip(x, (4, 3)))) ++ y = self(torch.flip(x, (4, 3))) ++ pred = self.inference_apply_nonlin(y) + result_torch += 1 / num_results * torch.flip(pred, (4, 3)) + + if m == 4 and (0 in mirror_axes): +- pred = self.inference_apply_nonlin(self(torch.flip(x, (2, )))) ++ y = self(torch.flip(x, (2, ))) ++ pred = self.inference_apply_nonlin(y) + result_torch += 1 / num_results * torch.flip(pred, (2,)) + + if m == 5 and (0 in mirror_axes) and (2 in mirror_axes): +- pred = self.inference_apply_nonlin(self(torch.flip(x, (4, 2)))) ++ y = self(torch.flip(x, (4, 2))) ++ pred = self.inference_apply_nonlin(y) + result_torch += 1 / num_results * torch.flip(pred, (4, 2)) + + if m == 6 and (0 in mirror_axes) and (1 in mirror_axes): +- pred = self.inference_apply_nonlin(self(torch.flip(x, (3, 2)))) ++ y = self(torch.flip(x, (3, 2))) ++ pred = self.inference_apply_nonlin(y) + result_torch += 1 / num_results * torch.flip(pred, (3, 2)) + + if m == 7 and (0 in mirror_axes) and (1 in mirror_axes) and (2 in mirror_axes): +- pred = self.inference_apply_nonlin(self(torch.flip(x, (4, 3, 2)))) ++ y = self(torch.flip(x, (4, 3, 2))) ++ pred = self.inference_apply_nonlin(y) + result_torch += 1 / num_results * torch.flip(pred, (4, 3, 2)) + +- if mult is not None: ++ if mult is not None: # True + result_torch[:, :] *= mult + + return result_torch +diff --git a/pytorch/nnunet/postprocessing/connected_components.py b/pytorch/nnunet/postprocessing/connected_components.py +index c69471e..45ff991 100644 +--- a/pytorch/nnunet/postprocessing/connected_components.py ++++ b/pytorch/nnunet/postprocessing/connected_components.py +@@ -175,7 +175,7 @@ def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validat + pp_results['num_samples'] = len(validation_result_raw['all']) + validation_result_raw = validation_result_raw['mean'] + +- if advanced_postprocessing: ++ if advanced_postprocessing: # False + # first treat all foreground classes as one and remove all but the largest foreground connected component + results = [] + for f in fnames: +@@ -270,12 +270,12 @@ def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validat + if len(classes) > 1: + # now depending on whether we do remove all but the largest foreground connected component we define the source dir + # for the next one to be the raw or the temp dir +- if do_fg_cc: ++ if do_fg_cc: # True + source = folder_all_classes_as_fg + else: + source = join(base, raw_subfolder_name) + +- if advanced_postprocessing: ++ if advanced_postprocessing: # False + # now run this for each class separately + results = [] + for f in fnames: +@@ -325,7 +325,7 @@ def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validat + json_output_file=join(folder_per_class, "summary.json"), + json_author="Fabian", num_threads=processes) + +- if do_fg_cc: ++ if do_fg_cc: # True + old_res = deepcopy(validation_result_PP_test) + else: + old_res = validation_result_raw +@@ -350,7 +350,7 @@ def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validat + else: + print("Only one class present, no need to do each class separately as this is covered in fg vs bg") + +- if not advanced_postprocessing: ++ if not advanced_postprocessing: # True + pp_results['min_valid_object_sizes'] = None + + print("done") +diff --git a/pytorch/nnunet/preprocessing/cropping.py b/pytorch/nnunet/preprocessing/cropping.py +index bb0a92a..95d07bc 100644 +--- a/pytorch/nnunet/preprocessing/cropping.py ++++ b/pytorch/nnunet/preprocessing/cropping.py +@@ -39,6 +39,7 @@ def get_bbox_from_mask(mask, outside_value=0): + maxxidx = int(np.max(mask_voxel_coords[1])) + 1 + minyidx = int(np.min(mask_voxel_coords[2])) + maxyidx = int(np.max(mask_voxel_coords[2])) + 1 ++ print(mask.shape, minzidx, maxzidx, minxidx, maxxidx, minyidx, maxyidx) + return [[minzidx, maxzidx], [minxidx, maxxidx], [minyidx, maxyidx]] + + +@@ -202,6 +203,7 @@ class ImageCropper(object): + list_of_args.append((case, case_identifier, overwrite_existing)) + + p = Pool(self.num_threads) ++ print('Pool', self.num_threads) + p.starmap(self.load_crop_save, list_of_args) + p.close() + p.join() +diff --git a/pytorch/nnunet/run/look_pkl.py b/pytorch/nnunet/run/look_pkl.py +new file mode 100644 +index 0000000..1a9d78a +--- /dev/null ++++ b/pytorch/nnunet/run/look_pkl.py +@@ -0,0 +1,18 @@ ++import numpy as np ++import pickle ++ ++inputfile = u'/data/yupeng/environment_variables/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver' \ ++ u'/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/fold_0/model_final_checkpoint.model.pkl' ++# test = np.load('labels.npy', encoding = "latin1") ++# doc = open('1.txt', 'a') ++# print(test, file=doc) ++ ++ ++ ++fr = open(inputfile, 'rb') ++inf = pickle.load(fr) ++print('done') ++ ++ ++ ++print('end') +\ No newline at end of file +diff --git a/pytorch/nnunet/run/model_prof.py b/pytorch/nnunet/run/model_prof.py +new file mode 100644 +index 0000000..013df26 +--- /dev/null ++++ b/pytorch/nnunet/run/model_prof.py +@@ -0,0 +1,124 @@ ++# Copyright (c) Soumith Chintala 2016, ++# All rights reserved ++# ++# Copyright 2020 Huawei Technologies Co., Ltd ++# ++# Licensed under the BSD 3-Clause License (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# https://spdx.org/licenses/BSD-3-Clause.html ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++# -*- coding: utf-8 -*- ++"""pytorch_prof.py ++""" ++ ++import torch ++import torch.optim as optim ++import torch.nn as nn ++import time ++import argparse ++ ++ ++def build_model(): ++ # 请自定义模型并加载预训练模型 ++ import torchvision ++ model = torchvision.models.resnet50(pretrained=True) ++ return model ++ ++ ++def get_raw_data(): ++ input_tensor = torch.randn(2, 3, 224, 224) ++ return input_tensor ++ ++ ++def criterion(x): ++ base_func = nn.CrossEntropyLoss() ++ shape_list = x.shape ++ N = shape_list[0] ++ R = 1 ++ if len(shape_list) > 1: ++ for r in shape_list[1:]: ++ R *= r ++ T = torch.randint(0,R, size=(N,)).to(x.device) ++ if str(T.device).startswith('npu'): ++ T = T.int() ++ return base_func(x.reshape(N, -1), T) ++ ++ ++if __name__ == '__main__': ++ parser = argparse.ArgumentParser(description='PyTorch Prof') ++ parser.add_argument('--device', type=str, default='cpu', ++ help='set which type of device used. Support cuda:0(device_id), npu:0(device_id).') ++ parser.add_argument('--amp', default=False, action='store_true', ++ help='use amp during prof') ++ parser.add_argument('--loss-scale', default=64.0, type=float, ++ help='loss scale using in amp, default 64.0, -1 means dynamic') ++ parser.add_argument('--opt-level', default='O2', type=str, ++ help='opt-level using in amp, default O2') ++ parser.add_argument('--FusedSGD', default=False, action='store_true', ++ help='use FusedSGD during prof') ++ ++ args = parser.parse_args() ++ ++ # 1.准备工作 ++ if args.device.startswith('cuda'): ++ torch.cuda.set_device(args.device) ++ prof_kwargs = {'use_cuda': True} ++ elif args.device.startswith('npu'): ++ torch.npu.set_device(args.device) ++ prof_kwargs = {'use_npu': True} ++ else: ++ prof_kwargs = {} ++ ++ # 2.构建模型 ++ model = build_model() ++ if args.FusedSGD: ++ from apex.optimizers import NpuFusedSGD ++ optimizer = NpuFusedSGD(model.parameters(), lr=0.01) ++ model = model.to(args.device) ++ if args.amp: ++ from apex import amp ++ model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, ++ loss_scale=None if args.loss_scale == -1 else args.loss_scale, ++ combine_grad=True) ++ else: ++ optimizer = optim.SGD(model.parameters(), lr=0.01) ++ model = model.to(args.device) ++ if args.amp: ++ from apex import amp ++ model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, ++ loss_scale=None if args.loss_scale == -1 else args.loss_scale) ++ ++ # 3.生成input ++ input_tensor = get_raw_data() ++ input_tensor = input_tensor.to(args.device) ++ ++ # 先运行一次,保证prof得到的性能是正确的 ++ def run(): ++ output_tensor = model(input_tensor) ++ optimizer.zero_grad() ++ loss = criterion(output_tensor) ++ if args.amp: ++ with amp.scale_loss(loss, optimizer) as scaled_loss: ++ scaled_loss.backward() ++ else: ++ loss.backward() ++ optimizer.step() ++ return loss ++ for i in range(5): ++ start_time = time.time() ++ loss = run() ++ print('iter: %d, loss: %.2f, time: %.2f' % (i, loss, (time.time() - start_time)*1000)) ++ ++ # 4. 执行forward+profiling ++ with torch.autograd.profiler.profile(**prof_kwargs) as prof: ++ run() ++ print(prof.key_averages().table()) ++ prof.export_chrome_trace("pytorch_prof_%s.prof" % args.device) +\ No newline at end of file +diff --git a/pytorch/nnunet/run/run_training.py b/pytorch/nnunet/run/run_training.py +index eb7ca2f..08214d6 100644 +--- a/pytorch/nnunet/run/run_training.py ++++ b/pytorch/nnunet/run/run_training.py +@@ -31,7 +31,7 @@ def main(): + parser.add_argument("task", help="can be task name or task id") + parser.add_argument("fold", help='0, 1, ..., 5 or \'all\'') + parser.add_argument("-val", "--validation_only", help="use this if you want to only run the validation", +- action="store_true") ++ action="store_true", default=True) + parser.add_argument("-w", required=False, default=None, help="Load pre-trained Models Genesis") + parser.add_argument("-c", "--continue_training", help="use this if you want to continue a training", + action="store_true") +@@ -134,8 +134,8 @@ def main(): + fp16=run_mixed_precision) + + trainer.initialize(not validation_only) +- +- if weights != None: ++ ++ if weights != None: + trainer.load_pretrained_encoder_weights(weights) + sys.stdout.flush() - class_probabilities = class_probabilities.detach().cpu().numpy() +diff --git a/pytorch/nnunet/run/run_training2.py b/pytorch/nnunet/run/run_training2.py +new file mode 100644 +index 0000000..372a4d4 +--- /dev/null ++++ b/pytorch/nnunet/run/run_training2.py +@@ -0,0 +1,172 @@ ++# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++import os ++import sys ++import argparse ++from batchgenerators.utilities.file_and_folder_operations import * ++from nnunet.run.default_configuration import get_default_configuration ++from nnunet.paths import default_plans_identifier ++from nnunet.training.cascade_stuff.predict_next_stage import predict_next_stage ++from nnunet.training.network_training.nnUNetTrainer import nnUNetTrainer ++from nnunet.training.network_training.nnUNetTrainerCascadeFullRes import nnUNetTrainerCascadeFullRes ++from nnunet.training.network_training.nnUNetTrainerV2_CascadeFullRes import nnUNetTrainerV2CascadeFullRes ++from nnunet.utilities.task_name_id_conversion import convert_id_to_task_name ++ ++ ++# import pdb ++# pdb.set_trace() ++ ++def main(): ++ parser = argparse.ArgumentParser() ++ parser.add_argument("-network", default="3d_fullres") ++ parser.add_argument("-network_trainer", default="nnUNetPlusPlusTrainerV2") ++ parser.add_argument("-task", default="003", help="can be task name or task id") ++ parser.add_argument("-fold", default="0", help='0, 1, ..., 5 or \'all\'') ++ parser.add_argument("-val", "--validation_only", default=False, ++ help="use this if you want to only run the validation", ++ action="store_true") ++ parser.add_argument("-w", required=False, default=None, help="Load pre-trained Models Genesis") ++ parser.add_argument("-c", "--continue_training", default=False, help="use this if you want to continue a training", ++ action="store_true") ++ parser.add_argument("-p", help="plans identifier. Only change this if you created a custom experiment planner", ++ default=default_plans_identifier, required=False) ++ parser.add_argument("--use_compressed_data", default=False, action="store_true", ++ help="If you set use_compressed_data, the training cases will not be decompressed. Reading compressed data " ++ "is much more CPU and RAM intensive and should only be used if you know what you are " ++ "doing", required=False) ++ parser.add_argument("--deterministic", ++ help="Makes training deterministic, but reduces training speed substantially. I (Fabian) think " ++ "this is not necessary. Deterministic training will make you overfit to some random seed. " ++ "Don't use that.", ++ required=False, default=False, action="store_true") ++ parser.add_argument("--npz", required=False, default=False, action="store_true", help="if set then nnUNet will " ++ "export npz files of " ++ "predicted segmentations " ++ "in the validation as well. " ++ "This is needed to run the " ++ "ensembling step so unless " ++ "you are developing nnUNet " ++ "you should enable this") ++ parser.add_argument("--find_lr", required=False, default=False, action="store_true", ++ help="not used here, just for fun") ++ parser.add_argument("--valbest", required=False, default=False, action="store_true", ++ help="hands off. This is not intended to be used") ++ parser.add_argument("--fp32", required=False, default=False, action="store_true", ++ help="disable mixed precision training and run old school fp32") ++ parser.add_argument("--val_folder", required=False, default="validation_raw", ++ help="name of the validation folder. No need to use this for most people") ++ # parser.add_argument("--interp_order", required=False, default=3, type=int, ++ # help="order of interpolation for segmentations. Testing purpose only. Hands off") ++ # parser.add_argument("--interp_order_z", required=False, default=0, type=int, ++ # help="order of interpolation along z if z is resampled separately. Testing purpose only. " ++ # "Hands off") ++ # parser.add_argument("--force_separate_z", required=False, default="None", type=str, ++ # help="force_separate_z resampling. Can be None, True or False. Testing purpose only. Hands off") ++ ++ args = parser.parse_args() ++ print('------------\n', args) ++ ++ task = args.task ++ fold = args.fold ++ network = args.network ++ network_trainer = args.network_trainer ++ weights = args.w ++ validation_only = args.validation_only ++ plans_identifier = args.p ++ find_lr = args.find_lr ++ ++ use_compressed_data = args.use_compressed_data ++ decompress_data = not use_compressed_data ++ ++ deterministic = args.deterministic ++ valbest = args.valbest ++ ++ fp32 = args.fp32 ++ run_mixed_precision = not fp32 ++ ++ val_folder = args.val_folder ++ # interp_order = args.interp_order ++ # interp_order_z = args.interp_order_z ++ # force_separate_z = args.force_separate_z ++ ++ if not task.startswith("Task"): ++ task_id = int(task) ++ task = convert_id_to_task_name(task_id) ++ ++ if fold == 'all': ++ pass ++ else: ++ fold = int(fold) ++ ++ # if force_separate_z == "None": ++ # force_separate_z = None ++ # elif force_separate_z == "False": ++ # force_separate_z = False ++ # elif force_separate_z == "True": ++ # force_separate_z = True ++ # else: ++ # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) ++ ++ plans_file, output_folder_name, dataset_directory, batch_dice, stage, \ ++ trainer_class, domain = get_default_configuration(network, task, network_trainer, plans_identifier) ++ ++ if trainer_class is None: ++ raise RuntimeError("Could not find trainer class in nnunet.training.network_training") ++ ++ if network == "3d_cascade_fullres": ++ assert issubclass(trainer_class, (nnUNetTrainerCascadeFullRes, nnUNetTrainerV2CascadeFullRes)), \ ++ "If running 3d_cascade_fullres then your " \ ++ "trainer class must be derived from " \ ++ "nnUNetTrainerCascadeFullRes" ++ else: ++ assert issubclass(trainer_class, ++ nnUNetTrainer), "network_trainer was found but is not derived from nnUNetTrainer" ++ ++ trainer = trainer_class(plans_file, fold, output_folder=output_folder_name, dataset_directory=dataset_directory, ++ batch_dice=batch_dice, stage=stage, unpack_data=decompress_data, ++ deterministic=deterministic, ++ fp16=run_mixed_precision) ++ ++ trainer.initialize(not validation_only) ++ ++ if weights != None: ++ trainer.load_pretrained_encoder_weights(weights) ++ sys.stdout.flush() ++ ++ if find_lr: ++ trainer.find_lr() ++ else: ++ if not validation_only: ++ if args.continue_training: ++ trainer.load_latest_checkpoint() ++ trainer.run_training() ++ else: ++ if valbest: ++ trainer.load_best_checkpoint(train=False) ++ else: ++ trainer.load_latest_checkpoint(train=False) ++ ++ trainer.network.eval() ++ ++ # predict validation ++ trainer.validate(save_softmax=args.npz, validation_folder_name=val_folder) ++ ++ if network == '3d_lowres': ++ trainer.load_best_checkpoint(False) ++ print("predicting segmentations for the next stage of the cascade") ++ predict_next_stage(trainer, join(dataset_directory, trainer.plans['data_identifier'] + "_stage%d" % 1)) ++ ++ ++if __name__ == "__main__": ++ main() +diff --git a/pytorch/nnunet/run/run_training_DDP.py b/pytorch/nnunet/run/run_training_DDP.py +index 5ffcdcf..6ad3d5a 100644 +--- a/pytorch/nnunet/run/run_training_DDP.py ++++ b/pytorch/nnunet/run/run_training_DDP.py +@@ -27,13 +27,13 @@ from nnunet.utilities.task_name_id_conversion import convert_id_to_task_name -- if verbose: print("prediction done") -+ if verbose: print("prediction done") # True - return predicted_segmentation, class_probabilities + def main(): + parser = argparse.ArgumentParser() +- parser.add_argument("network") +- parser.add_argument("network_trainer") +- parser.add_argument("task", help="can be task name or task id") +- parser.add_argument("fold", help='0, 1, ..., 5 or \'all\'') ++ parser.add_argument("network", default='3d_fullres') ++ parser.add_argument("network_trainer", default='nnUNetTrainerV2_DDP') ++ parser.add_argument("task", help="can be task name or task id", default='003') ++ parser.add_argument("fold", help='0, 1, ..., 5 or \'all\'', default='0') + parser.add_argument("-val", "--validation_only", help="use this if you want to only run the validation", +- action="store_true") +- parser.add_argument("-c", "--continue_training", help="use this if you want to continue a training", ++ action="store_true", default=False) ++ parser.add_argument("-c", "--continue_training", default=False, help="use this if you want to continue a training", + action="store_true") + parser.add_argument("-p", help="plans identifier. Only change this if you created a custom experiment planner", + default=default_plans_identifier, required=False) +@@ -78,7 +78,7 @@ def main(): + # help="force_separate_z resampling. Can be None, True or False. Testing purpose only. Hands off") - def _internal_predict_2D_2Dconv(self, x: np.ndarray, min_size: Tuple[int, int], do_mirroring: bool, -@@ -504,54 +798,69 @@ class SegmentationNetwork(NeuralNetwork): - assert len(x.shape) == 5, 'x must be (b, c, x, y, z)' - # everything in here takes place on the GPU. If x and mult are not yet on GPU this will be taken care of here - # we now return a cuda tensor! Not numpy array! + args = parser.parse_args() - -- x = to_cuda(maybe_to_torch(x), gpu_id=self.get_device()) -+ def print_mytensor(data): -+ shape = data.shape[0] -+ for s in range(shape): -+ for i in range(3): -+ print(data[s, 0, 0, i * 3:(i + 1) * 3]) -+ for i in range(3): -+ print(data[s, 0, 0, i * 3 + 50:(i + 1) * 3 + 50]) -+ print('-----') -+ x = to_cuda(maybe_to_torch(x), gpu_id=self.get_device()) # ndarray, 1 1 128 128 128,之后变成tensor - result_torch = torch.zeros([1, self.num_classes] + list(x.shape[2:]), -- dtype=torch.float).cuda(self.get_device(), non_blocking=True) -+ dtype=torch.float).cuda(self.get_device(), non_blocking=True) # 1 3 128 128 128,全是0 - - if mult is not None: -- mult = to_cuda(maybe_to_torch(mult), gpu_id=self.get_device()) -+ mult = to_cuda(maybe_to_torch(mult), gpu_id=self.get_device()) # tensor, 128 128 128 ++ print('\n\n args=', args, '\n\n') + task = args.task + fold = args.fold + network = args.network +@@ -115,7 +115,7 @@ def main(): + # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) -- if do_mirroring: -+ if do_mirroring: # True - mirror_idx = 8 -- num_results = 2 ** len(mirror_axes) -+ num_results = 2 ** len(mirror_axes) # 8 - else: - mirror_idx = 1 - num_results = 1 - for m in range(mirror_idx): - if m == 0: -- pred = self.inference_apply_nonlin(self(x)) -- result_torch += 1 / num_results * pred -+ y = self(x) # tensor, 1 3 128 128 128 -+ pred = self.inference_apply_nonlin(y) # 1 3 128 128 128 -+ result_torch += 1 / num_results * pred # 1 3 128 128 128 + plans_file, output_folder_name, dataset_directory, batch_dice, stage, \ +- trainer_class = get_default_configuration(network, task, network_trainer, plans_identifier) ++ trainer_class, _ = get_default_configuration(network, task, network_trainer, plans_identifier) - if m == 1 and (2 in mirror_axes): -- pred = self.inference_apply_nonlin(self(torch.flip(x, (4, )))) -+ y = self(torch.flip(x, (4, ))) -+ pred = self.inference_apply_nonlin(y) - result_torch += 1 / num_results * torch.flip(pred, (4,)) + if trainer_class is None: + raise RuntimeError("Could not find trainer class in meddec.model_training") +diff --git a/pytorch/nnunet/run/run_training_hypDDP.py b/pytorch/nnunet/run/run_training_hypDDP.py +new file mode 100644 +index 0000000..f50744c +--- /dev/null ++++ b/pytorch/nnunet/run/run_training_hypDDP.py +@@ -0,0 +1,164 @@ ++# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++ ++import argparse ++ ++from batchgenerators.utilities.file_and_folder_operations import * ++from nnunet.run.default_configuration import get_default_configuration ++from nnunet.paths import default_plans_identifier ++from nnunet.training.cascade_stuff.predict_next_stage import predict_next_stage ++from nnunet.training.network_training.nnUNetTrainer import nnUNetTrainer ++from nnunet.training.network_training.nnUNetTrainerCascadeFullRes import nnUNetTrainerCascadeFullRes ++from nnunet.training.network_training.nnUNetTrainerV2_CascadeFullRes import nnUNetTrainerV2CascadeFullRes ++from nnunet.utilities.task_name_id_conversion import convert_id_to_task_name ++ ++ ++def main(): ++ parser = argparse.ArgumentParser() ++ parser.add_argument("network") ++ parser.add_argument("network_trainer") ++ parser.add_argument("task", help="can be task name or task id") ++ parser.add_argument("fold", help='0, 1, ..., 5 or \'all\'') ++ parser.add_argument("-val", "--validation_only", help="use this if you want to only run the validation", ++ action="store_true") ++ parser.add_argument("-c", "--continue_training", help="use this if you want to continue a training", ++ action="store_true") ++ parser.add_argument("-p", help="plans identifier. Only change this if you created a custom experiment planner", ++ default=default_plans_identifier, required=False) ++ parser.add_argument("--use_compressed_data", default=False, action="store_true", ++ help="If you set use_compressed_data, the training cases will not be decompressed. Reading compressed data " ++ "is much more CPU and RAM intensive and should only be used if you know what you are " ++ "doing", required=False) ++ parser.add_argument("--deterministic", ++ help="Makes training deterministic, but reduces training speed substantially. I (Fabian) think " ++ "this is not necessary. Deterministic training will make you overfit to some random seed. " ++ "Don't use that.", ++ required=False, default=False, action="store_true") ++ parser.add_argument("--local_rank", default=0, type=int) ++ parser.add_argument("--fp32", required=False, default=False, action="store_true", ++ help="disable mixed precision training and run old school fp32") ++ parser.add_argument("--dbs", required=False, default=False, action="store_true", help="distribute batch size. If " ++ "True then whatever " ++ "batch_size is in plans will " ++ "be distributed over DDP " ++ "models, if False then each " ++ "model will have batch_size " ++ "for a total of " ++ "GPUs*batch_size") ++ parser.add_argument("--npz", required=False, default=False, action="store_true", help="if set then nnUNet will " ++ "export npz files of " ++ "predicted segmentations " ++ "in the vlaidation as well. " ++ "This is needed to run the " ++ "ensembling step so unless " ++ "you are developing nnUNet " ++ "you should enable this") ++ parser.add_argument("--valbest", required=False, default=False, action="store_true", help="") ++ parser.add_argument("--find_lr", required=False, default=False, action="store_true", help="") ++ parser.add_argument("--val_folder", required=False, default="validation_raw", ++ help="name of the validation folder. No need to use this for most people") ++ # parser.add_argument("--interp_order", required=False, default=3, type=int, ++ # help="order of interpolation for segmentations. Testing purpose only. Hands off") ++ # parser.add_argument("--interp_order_z", required=False, default=0, type=int, ++ # help="order of interpolation along z if z is resampled separately. Testing purpose only. " ++ # "Hands off") ++ # parser.add_argument("--force_separate_z", required=False, default="None", type=str, ++ # help="force_separate_z resampling. Can be None, True or False. Testing purpose only. Hands off") ++ ++ args = parser.parse_args() ++ print('\n\n args=', args, '\n\n') ++ task = args.task ++ fold = args.fold ++ network = args.network ++ network_trainer = args.network_trainer ++ validation_only = args.validation_only ++ plans_identifier = args.p ++ use_compressed_data = args.use_compressed_data ++ decompress_data = not use_compressed_data ++ deterministic = args.deterministic ++ valbest = args.valbest ++ find_lr = args.find_lr ++ val_folder = args.val_folder ++ # interp_order = args.interp_order ++ # interp_order_z = args.interp_order_z ++ # force_separate_z = args.force_separate_z ++ fp32 = args.fp32 ++ ++ if not task.startswith("Task"): ++ task_id = int(task) ++ task = convert_id_to_task_name(task_id) ++ ++ if fold == 'all': ++ pass ++ else: ++ fold = int(fold) ++ # ++ # if force_separate_z == "None": ++ # force_separate_z = None ++ # elif force_separate_z == "False": ++ # force_separate_z = False ++ # elif force_separate_z == "True": ++ # force_separate_z = True ++ # else: ++ # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) ++ ++ plans_file, output_folder_name, dataset_directory, batch_dice, stage, \ ++ trainer_class, _ = get_default_configuration(network, task, network_trainer, plans_identifier) ++ ++ if trainer_class is None: ++ raise RuntimeError("Could not find trainer class in meddec.model_training") ++ ++ if network == "3d_cascade_fullres": ++ assert issubclass(trainer_class, (nnUNetTrainerCascadeFullRes, nnUNetTrainerV2CascadeFullRes)), \ ++ "If running 3d_cascade_fullres then your " \ ++ "trainer class must be derived from " \ ++ "nnUNetTrainerCascadeFullRes" ++ else: ++ assert issubclass(trainer_class, ++ nnUNetTrainer), "network_trainer was found but is not derived from nnUNetTrainer" ++ ++ trainer = trainer_class(plans_file, fold, local_rank=args.local_rank, output_folder=output_folder_name, ++ dataset_directory=dataset_directory, batch_dice=batch_dice, stage=stage, ++ unpack_data=decompress_data, deterministic=deterministic, fp16=not fp32, ++ distribute_batch_size=args.dbs) ++ ++ trainer.initialize(not validation_only) ++ ++ if find_lr: ++ trainer.find_lr() ++ else: ++ if not validation_only: ++ if args.continue_training: ++ trainer.load_latest_checkpoint() ++ trainer.run_training() ++ else: ++ if valbest: ++ trainer.load_best_checkpoint(train=False) ++ else: ++ trainer.load_latest_checkpoint(train=False) ++ ++ trainer.network.eval() ++ ++ # predict validation ++ trainer.validate(save_softmax=args.npz, validation_folder_name=val_folder) ++ ++ if network == '3d_lowres': ++ trainer.load_best_checkpoint(False) ++ print("predicting segmentations for the next stage of the cascade") ++ predict_next_stage(trainer, join(dataset_directory, trainer.plans['data_identifier'] + "_stage%d" % 1)) ++ ++ ++if __name__ == "__main__": ++ main() +diff --git a/pytorch/nnunet/training/loss_functions/crossentropy.py b/pytorch/nnunet/training/loss_functions/crossentropy.py +index 6195437..0c782d9 100644 +--- a/pytorch/nnunet/training/loss_functions/crossentropy.py ++++ b/pytorch/nnunet/training/loss_functions/crossentropy.py +@@ -6,6 +6,15 @@ class RobustCrossEntropyLoss(nn.CrossEntropyLoss): + this is just a compatibility layer because my target tensor is float and has an extra dimension + """ + def forward(self, input: Tensor, target: Tensor) -> Tensor: ++ # i = 0 ++ # print('----------') ++ # print('input:', input.shape) ++ # for i in range(len(input)): ++ # print(i, input[i].shape) ++ # print('target') ++ # for i in range(len(target)): ++ # print(i, target[i].shape) ++ # print('\n----------') + if len(target.shape) == len(input.shape): + assert target.shape[1] == 1 + target = target[:, 0] +diff --git a/pytorch/nnunet/training/network_training/network_trainer.py b/pytorch/nnunet/training/network_training/network_trainer.py +index e920158..f0031d3 100644 +--- a/pytorch/nnunet/training/network_training/network_trainer.py ++++ b/pytorch/nnunet/training/network_training/network_trainer.py +@@ -37,6 +37,7 @@ from abc import abstractmethod + from datetime import datetime + from tqdm import trange + from nnunet.utilities.to_torch import maybe_to_torch, to_cuda ++import pdb - if m == 2 and (1 in mirror_axes): -- pred = self.inference_apply_nonlin(self(torch.flip(x, (3, )))) -+ y = self(torch.flip(x, (3, ))) -+ pred = self.inference_apply_nonlin(y) - result_torch += 1 / num_results * torch.flip(pred, (3,)) - if m == 3 and (2 in mirror_axes) and (1 in mirror_axes): -- pred = self.inference_apply_nonlin(self(torch.flip(x, (4, 3)))) -+ y = self(torch.flip(x, (4, 3))) -+ pred = self.inference_apply_nonlin(y) - result_torch += 1 / num_results * torch.flip(pred, (4, 3)) + class NetworkTrainer(object): +@@ -438,7 +439,8 @@ class NetworkTrainer(object): + self._maybe_init_amp() - if m == 4 and (0 in mirror_axes): -- pred = self.inference_apply_nonlin(self(torch.flip(x, (2, )))) -+ y = self(torch.flip(x, (2, ))) -+ pred = self.inference_apply_nonlin(y) - result_torch += 1 / num_results * torch.flip(pred, (2,)) + def _maybe_init_amp(self): +- if self.fp16 and self.amp_grad_scaler is None and torch.cuda.is_available(): ++ # if self.fp16 and self.amp_grad_scaler is None and torch.cuda.is_available(): ++ if self.fp16 and self.amp_grad_scaler is None: + self.amp_grad_scaler = GradScaler() - if m == 5 and (0 in mirror_axes) and (2 in mirror_axes): -- pred = self.inference_apply_nonlin(self(torch.flip(x, (4, 2)))) -+ y = self(torch.flip(x, (4, 2))) -+ pred = self.inference_apply_nonlin(y) - result_torch += 1 / num_results * torch.flip(pred, (4, 2)) + def plot_network_architecture(self): +diff --git a/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2.py b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2.py +index e9aa611..9b97e8c 100644 +--- a/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2.py ++++ b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2.py +@@ -13,6 +13,7 @@ + # limitations under the License. - if m == 6 and (0 in mirror_axes) and (1 in mirror_axes): -- pred = self.inference_apply_nonlin(self(torch.flip(x, (3, 2)))) -+ y = self(torch.flip(x, (3, 2))) -+ pred = self.inference_apply_nonlin(y) - result_torch += 1 / num_results * torch.flip(pred, (3, 2)) - if m == 7 and (0 in mirror_axes) and (1 in mirror_axes) and (2 in mirror_axes): -- pred = self.inference_apply_nonlin(self(torch.flip(x, (4, 3, 2)))) -+ y = self(torch.flip(x, (4, 3, 2))) -+ pred = self.inference_apply_nonlin(y) - result_torch += 1 / num_results * torch.flip(pred, (4, 3, 2)) ++import SimpleITK as sitk + from collections import OrderedDict + from typing import Tuple + import sys +@@ -35,12 +36,10 @@ from torch.cuda.amp import autocast + from nnunet.training.learning_rate.poly_lr import poly_lr + from batchgenerators.utilities.file_and_folder_operations import * -- if mult is not None: -+ if mult is not None: # True - result_torch[:, :] *= mult +- + class nnUNetPlusPlusTrainerV2(nnUNetTrainer): + """ + Info for Fabian: same as internal nnUNetTrainerV2_2 + """ +- + def __init__(self, plans_file, fold, output_folder=None, dataset_directory=None, batch_dice=True, stage=None, + unpack_data=True, deterministic=True, fp16=False): + super().__init__(plans_file, fold, output_folder, dataset_directory, batch_dice, stage, unpack_data, +@@ -66,7 +65,7 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): + maybe_mkdir_p(self.output_folder) - return result_torch -diff --git a/pytorch/nnunet/postprocessing/connected_components.py b/pytorch/nnunet/postprocessing/connected_components.py -index c69471e..45ff991 100644 ---- a/pytorch/nnunet/postprocessing/connected_components.py -+++ b/pytorch/nnunet/postprocessing/connected_components.py -@@ -175,7 +175,7 @@ def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validat - pp_results['num_samples'] = len(validation_result_raw['all']) - validation_result_raw = validation_result_raw['mean'] + if force_load_plans or (self.plans is None): +- self.load_plans_file() ++ self.load_plans_file() # '/data/yupeng/environment_variables/nnUNet_preprocessed/Task003_Liver/nnUNetPlansv2.1_plans_3D.pkl' -- if advanced_postprocessing: -+ if advanced_postprocessing: # False - # first treat all foreground classes as one and remove all but the largest foreground connected component - results = [] - for f in fnames: -@@ -270,12 +270,12 @@ def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validat - if len(classes) > 1: - # now depending on whether we do remove all but the largest foreground connected component we define the source dir - # for the next one to be the raw or the temp dir -- if do_fg_cc: -+ if do_fg_cc: # True - source = folder_all_classes_as_fg - else: - source = join(base, raw_subfolder_name) + self.process_plans(self.plans) -- if advanced_postprocessing: -+ if advanced_postprocessing: # False - # now run this for each class separately - results = [] - for f in fnames: -@@ -325,7 +325,7 @@ def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validat - json_output_file=join(folder_per_class, "summary.json"), - json_author="Fabian", num_threads=processes) +@@ -189,6 +188,7 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): + """ + ds = self.network.do_ds + self.network.do_ds = False ++ overwrite = False # 不希望重新跑推理,不然太久了 + ret = super().validate(do_mirroring, use_sliding_window, step_size, save_softmax, use_gaussian, + overwrite, validation_folder_name, debug, all_in_gpu, segmentation_export_kwargs) -- if do_fg_cc: -+ if do_fg_cc: # True - old_res = deepcopy(validation_result_PP_test) - else: - old_res = validation_result_raw -@@ -350,7 +350,7 @@ def determine_postprocessing(base, gt_labels_folder, raw_subfolder_name="validat - else: - print("Only one class present, no need to do each class separately as this is covered in fg vs bg") +@@ -200,16 +200,18 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): + use_sliding_window: bool = True, step_size: float = 0.5, + use_gaussian: bool = True, pad_border_mode: str = 'constant', + pad_kwargs: dict = None, all_in_gpu: bool = True, +- verbose: bool = True, mixed_precision=True) -> Tuple[np.ndarray, np.ndarray]: ++ verbose: bool = True, mixed_precision=True, img_name=None, ++ pre_mode=None, fp=None) -> Tuple[np.ndarray, np.ndarray]: + """ + We need to wrap this because we need to enforce self.network.do_ds = False for prediction + """ +- ds = self.network.do_ds ++ ds = self.network.do_ds # ds = True + self.network.do_ds = False + ret = super().predict_preprocessed_data_return_seg_and_softmax(data, do_mirroring, mirror_axes, + use_sliding_window, step_size, use_gaussian, + pad_border_mode, pad_kwargs, all_in_gpu, verbose, +- mixed_precision=mixed_precision) ++ mixed_precision=mixed_precision, img_name=img_name, ++ pre_mode=pre_mode, fp=fp) + self.network.do_ds = ds + return ret -- if not advanced_postprocessing: -+ if not advanced_postprocessing: # True - pp_results['min_valid_object_sizes'] = None +@@ -225,7 +227,20 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): + data_dict = next(data_generator) + data = data_dict['data'] + target = data_dict['target'] +- ++ # i = 0 ++ # while True: ++ # i += 1 ++ # data_dict = next(data_generator) ++ # data = data_dict['data'] ++ # target = data_dict['target'] ++ # data_numpy_output = '/home/yupeng/save_data.nii.gz' ++ # data_numpy = data[0][0].numpy() ++ # target_numpy = target[0][0][0].numpy() ++ # data_1 = data_numpy.flatten() ++ # minm = np.argmin(data_1) ++ # maxm = np.argmax(data_1) ++ # out = sitk.GetImageFromArray(data_numpy) ++ # sitk.WriteImage(out, data_numpy_output) + data = maybe_to_torch(data) + target = maybe_to_torch(target) - print("done") -diff --git a/pytorch/nnunet/preprocessing/cropping.py b/pytorch/nnunet/preprocessing/cropping.py -index bb0a92a..95d07bc 100644 ---- a/pytorch/nnunet/preprocessing/cropping.py -+++ b/pytorch/nnunet/preprocessing/cropping.py -@@ -39,6 +39,7 @@ def get_bbox_from_mask(mask, outside_value=0): - maxxidx = int(np.max(mask_voxel_coords[1])) + 1 - minyidx = int(np.min(mask_voxel_coords[2])) - maxyidx = int(np.max(mask_voxel_coords[2])) + 1 -+ print(mask.shape, minzidx, maxzidx, minxidx, maxxidx, minyidx, maxyidx) - return [[minzidx, maxzidx], [minxidx, maxxidx], [minyidx, maxyidx]] +@@ -234,7 +249,6 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): + target = to_cuda(target) + self.optimizer.zero_grad() +- + if self.fp16: + with autocast(): + output = self.network(data) +@@ -261,7 +275,6 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): + self.run_online_evaluation(output, target) -@@ -202,6 +203,7 @@ class ImageCropper(object): - list_of_args.append((case, case_identifier, overwrite_existing)) + del target +- + return l.detach().cpu().numpy() - p = Pool(self.num_threads) -+ print('Pool', self.num_threads) - p.starmap(self.load_crop_save, list_of_args) - p.close() - p.join() -diff --git a/pytorch/nnunet/run/look_pkl.py b/pytorch/nnunet/run/look_pkl.py -new file mode 100644 -index 0000000..1a9d78a ---- /dev/null -+++ b/pytorch/nnunet/run/look_pkl.py -@@ -0,0 +1,18 @@ -+import numpy as np -+import pickle -+ -+inputfile = u'/data/yupeng/environment_variables/RESULTS_FOLDER/nnUNet/3d_fullres/Task003_Liver' \ -+ u'/nnUNetPlusPlusTrainerV2__nnUNetPlansv2.1/fold_0/model_final_checkpoint.model.pkl' -+# test = np.load('labels.npy', encoding = "latin1") -+# doc = open('1.txt', 'a') -+# print(test, file=doc) -+ -+ -+ -+fr = open(inputfile, 'rb') -+inf = pickle.load(fr) -+print('done') -+ -+ -+ -+print('end') -\ No newline at end of file -diff --git a/pytorch/nnunet/run/model_prof.py b/pytorch/nnunet/run/model_prof.py + def do_split(self): +diff --git a/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2_DDP.py b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2_DDP.py new file mode 100644 -index 0000000..013df26 +index 0000000..e2ab2fa --- /dev/null -+++ b/pytorch/nnunet/run/model_prof.py -@@ -0,0 +1,124 @@ -+# Copyright (c) Soumith Chintala 2016, -+# All rights reserved -+# -+# Copyright 2020 Huawei Technologies Co., Ltd -+# -+# Licensed under the BSD 3-Clause License (the "License"); -+# you may not use this file except in compliance with the License. -+# You may obtain a copy of the License at -+# -+# https://spdx.org/licenses/BSD-3-Clause.html -+# -+# Unless required by applicable law or agreed to in writing, software -+# distributed under the License is distributed on an "AS IS" BASIS, -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+# See the License for the specific language governing permissions and -+# limitations under the License. -+ -+# -*- coding: utf-8 -*- -+"""pytorch_prof.py -+""" -+ -+import torch -+import torch.optim as optim -+import torch.nn as nn -+import time -+import argparse -+ -+ -+def build_model(): -+ # 请自定义模型并加载预训练模型 -+ import torchvision -+ model = torchvision.models.resnet50(pretrained=True) -+ return model -+ -+ -+def get_raw_data(): -+ input_tensor = torch.randn(2, 3, 224, 224) -+ return input_tensor -+ -+ -+def criterion(x): -+ base_func = nn.CrossEntropyLoss() -+ shape_list = x.shape -+ N = shape_list[0] -+ R = 1 -+ if len(shape_list) > 1: -+ for r in shape_list[1:]: -+ R *= r -+ T = torch.randint(0,R, size=(N,)).to(x.device) -+ if str(T.device).startswith('npu'): -+ T = T.int() -+ return base_func(x.reshape(N, -1), T) -+ -+ -+if __name__ == '__main__': -+ parser = argparse.ArgumentParser(description='PyTorch Prof') -+ parser.add_argument('--device', type=str, default='cpu', -+ help='set which type of device used. Support cuda:0(device_id), npu:0(device_id).') -+ parser.add_argument('--amp', default=False, action='store_true', -+ help='use amp during prof') -+ parser.add_argument('--loss-scale', default=64.0, type=float, -+ help='loss scale using in amp, default 64.0, -1 means dynamic') -+ parser.add_argument('--opt-level', default='O2', type=str, -+ help='opt-level using in amp, default O2') -+ parser.add_argument('--FusedSGD', default=False, action='store_true', -+ help='use FusedSGD during prof') -+ -+ args = parser.parse_args() -+ -+ # 1.准备工作 -+ if args.device.startswith('cuda'): -+ torch.cuda.set_device(args.device) -+ prof_kwargs = {'use_cuda': True} -+ elif args.device.startswith('npu'): -+ torch.npu.set_device(args.device) -+ prof_kwargs = {'use_npu': True} -+ else: -+ prof_kwargs = {} -+ -+ # 2.构建模型 -+ model = build_model() -+ if args.FusedSGD: -+ from apex.optimizers import NpuFusedSGD -+ optimizer = NpuFusedSGD(model.parameters(), lr=0.01) -+ model = model.to(args.device) -+ if args.amp: -+ from apex import amp -+ model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, -+ loss_scale=None if args.loss_scale == -1 else args.loss_scale, -+ combine_grad=True) -+ else: -+ optimizer = optim.SGD(model.parameters(), lr=0.01) -+ model = model.to(args.device) -+ if args.amp: -+ from apex import amp -+ model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, -+ loss_scale=None if args.loss_scale == -1 else args.loss_scale) -+ -+ # 3.生成input -+ input_tensor = get_raw_data() -+ input_tensor = input_tensor.to(args.device) -+ -+ # 先运行一次,保证prof得到的性能是正确的 -+ def run(): -+ output_tensor = model(input_tensor) -+ optimizer.zero_grad() -+ loss = criterion(output_tensor) -+ if args.amp: -+ with amp.scale_loss(loss, optimizer) as scaled_loss: -+ scaled_loss.backward() -+ else: -+ loss.backward() -+ optimizer.step() -+ return loss -+ for i in range(5): -+ start_time = time.time() -+ loss = run() -+ print('iter: %d, loss: %.2f, time: %.2f' % (i, loss, (time.time() - start_time)*1000)) -+ -+ # 4. 执行forward+profiling -+ with torch.autograd.profiler.profile(**prof_kwargs) as prof: -+ run() -+ print(prof.key_averages().table()) -+ prof.export_chrome_trace("pytorch_prof_%s.prof" % args.device) -\ No newline at end of file -diff --git a/pytorch/nnunet/run/run_training.py b/pytorch/nnunet/run/run_training.py -index eb7ca2f..08214d6 100644 ---- a/pytorch/nnunet/run/run_training.py -+++ b/pytorch/nnunet/run/run_training.py -@@ -31,7 +31,7 @@ def main(): - parser.add_argument("task", help="can be task name or task id") - parser.add_argument("fold", help='0, 1, ..., 5 or \'all\'') - parser.add_argument("-val", "--validation_only", help="use this if you want to only run the validation", -- action="store_true") -+ action="store_true", default=True) - parser.add_argument("-w", required=False, default=None, help="Load pre-trained Models Genesis") - parser.add_argument("-c", "--continue_training", help="use this if you want to continue a training", - action="store_true") -@@ -134,8 +134,8 @@ def main(): - fp16=run_mixed_precision) - - trainer.initialize(not validation_only) -- -- if weights != None: ++++ b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2_DDP.py +@@ -0,0 +1,483 @@ ++# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++ ++from collections import OrderedDict ++from typing import Tuple ++import sys ++import time ++import numpy as np ++import torch ++import torch.distributed as dist ++from torch.cuda.amp import autocast ++from torch.nn.parallel import DistributedDataParallel as DDP ++from nnunet.training.loss_functions.deep_supervision import MultipleOutputLoss2 ++from nnunet.utilities.to_torch import maybe_to_torch, to_cuda ++from nnunet.training.data_augmentation.default_data_augmentation import get_moreDA_augmentation ++from nnunet.network_architecture.generic_UNetPlusPlus import Generic_UNetPlusPlus ++from nnunet.network_architecture.initialization import InitWeights_He ++from nnunet.network_architecture.neural_network import SegmentationNetwork ++from nnunet.training.data_augmentation.default_data_augmentation import default_2D_augmentation_params, \ ++ get_patch_size, default_3D_augmentation_params ++from nnunet.training.dataloading.dataset_loading import unpack_dataset ++from nnunet.training.network_training.nnUNetTrainer import nnUNetTrainer ++from nnunet.utilities.nd_softmax import softmax_helper ++from sklearn.model_selection import KFold ++from torch import nn ++from torch.cuda.amp import autocast ++from nnunet.training.learning_rate.poly_lr import poly_lr ++from batchgenerators.utilities.file_and_folder_operations import * ++ ++ ++class nnUNetPlusPlusTrainerV2_DDP(nnUNetTrainer): ++ """ ++ Info for Fabian: same as internal nnUNetTrainerV2_2 ++ """ ++ ++ def __init__(self, plans_file, fold, local_rank, output_folder=None, dataset_directory=None, batch_dice=True, ++ stage=None, ++ unpack_data=True, deterministic=True, fp16=False, distribute_batch_size=1): ++ super().__init__(plans_file, fold, output_folder, dataset_directory, batch_dice, stage, unpack_data, ++ deterministic, fp16) ++ self.init_args = ( ++ plans_file, fold, local_rank, output_folder, dataset_directory, batch_dice, stage, unpack_data, ++ deterministic, distribute_batch_size, fp16) ++ self.max_num_epochs = 1000 ++ self.initial_lr = 1e-2 ++ self.deep_supervision_scales = None ++ self.ds_loss_weights = None ++ self.distribute_batch_size = distribute_batch_size ++ np.random.seed(local_rank) ++ torch.manual_seed(local_rank) ++ self.local_rank = local_rank ++ if torch.cuda.is_available(): ++ torch.cuda.set_device(local_rank) ++ dist.init_process_group(backend='nccl', init_method='env://') ++ ++ self.pin_memory = True ++ ++ def initialize(self, training=True, force_load_plans=False): ++ """ ++ - replaced get_default_augmentation with get_moreDA_augmentation ++ - enforce to only run this code once ++ - loss function wrapper for deep supervision ++ ++ :param training: ++ :param force_load_plans: ++ :return: ++ """ ++ if not self.was_initialized: ++ maybe_mkdir_p(self.output_folder) ++ ++ if force_load_plans or (self.plans is None): ++ self.load_plans_file() ++ ++ self.process_plans(self.plans) ++ ++ self.setup_DA_params() ++ ++ ################# Here we wrap the loss for deep supervision ############ ++ # we need to know the number of outputs of the network ++ net_numpool = len(self.net_num_pool_op_kernel_sizes) ++ ++ # we give each output a weight which decreases exponentially (division by 2) as the resolution decreases ++ # this gives higher resolution outputs more weight in the loss ++ weights = np.array([1 / (2 ** i) for i in range(net_numpool)]) ++ ++ # we don't use the lowest 2 outputs. Normalize weights so that they sum to 1 ++ mask = np.array([True] + [True if i < net_numpool - 1 else False for i in range(1, net_numpool)]) ++ weights[~mask] = 0 ++ weights = weights / weights.sum() ++ # self.ds_loss_weights = weights ++ self.ds_loss_weights = None ++ # now wrap the loss ++ self.loss = MultipleOutputLoss2(self.loss, self.ds_loss_weights) ++ ################# END ################### ++ ++ self.folder_with_preprocessed_data = join(self.dataset_directory, self.plans['data_identifier'] + ++ "_stage%d" % self.stage) ++ if training: ++ self.dl_tr, self.dl_val = self.get_basic_generators() ++ if self.unpack_data: ++ if self.local_rank == 0: ++ print("unpacking dataset") ++ unpack_dataset(self.folder_with_preprocessed_data) ++ print("done") ++ else: ++ # we need to wait until worker 0 has finished unpacking ++ npz_files = subfiles(self.folder_with_preprocessed_data, suffix=".npz", join=False) ++ case_ids = [i[:-4] for i in npz_files] ++ all_present = all( ++ [isfile(join(self.folder_with_preprocessed_data, i + ".npy")) for i in case_ids]) ++ while not all_present: ++ print("worker", self.local_rank, "is waiting for unpacking") ++ time.sleep(3) ++ all_present = all( ++ [isfile(join(self.folder_with_preprocessed_data, i + ".npy")) for i in case_ids]) ++ # there is some slight chance that there may arise some error because dataloader are loading a file ++ # that is still being written by worker 0. We ignore this for now an address it only if it becomes ++ # relevant ++ # (this can occur because while worker 0 writes the file is technically present so the other workers ++ # will proceed and eventually try to read it) ++ else: ++ print( ++ "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you " ++ "will wait all winter for your model to finish!") + -+ if weights != None: - trainer.load_pretrained_encoder_weights(weights) - sys.stdout.flush() - -diff --git a/pytorch/nnunet/run/run_training2.py b/pytorch/nnunet/run/run_training2.py -new file mode 100644 -index 0000000..372a4d4 ---- /dev/null -+++ b/pytorch/nnunet/run/run_training2.py -@@ -0,0 +1,172 @@ -+# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany -+# -+# Licensed under the Apache License, Version 2.0 (the "License"); -+# you may not use this file except in compliance with the License. -+# You may obtain a copy of the License at -+# -+# http://www.apache.org/licenses/LICENSE-2.0 -+# -+# Unless required by applicable law or agreed to in writing, software -+# distributed under the License is distributed on an "AS IS" BASIS, -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+# See the License for the specific language governing permissions and -+# limitations under the License. -+import os -+import sys -+import argparse -+from batchgenerators.utilities.file_and_folder_operations import * -+from nnunet.run.default_configuration import get_default_configuration -+from nnunet.paths import default_plans_identifier -+from nnunet.training.cascade_stuff.predict_next_stage import predict_next_stage -+from nnunet.training.network_training.nnUNetTrainer import nnUNetTrainer -+from nnunet.training.network_training.nnUNetTrainerCascadeFullRes import nnUNetTrainerCascadeFullRes -+from nnunet.training.network_training.nnUNetTrainerV2_CascadeFullRes import nnUNetTrainerV2CascadeFullRes -+from nnunet.utilities.task_name_id_conversion import convert_id_to_task_name ++ self.tr_gen, self.val_gen = get_moreDA_augmentation( ++ self.dl_tr, self.dl_val, ++ self.data_aug_params[ ++ 'patch_size_for_spatialtransform'], ++ self.data_aug_params, ++ deep_supervision_scales=self.deep_supervision_scales, ++ pin_memory=self.pin_memory ++ ) ++ self.print_to_log_file("TRAINING KEYS:\n %s" % (str(self.dataset_tr.keys())), ++ also_print_to_console=False) ++ self.print_to_log_file("VALIDATION KEYS:\n %s" % (str(self.dataset_val.keys())), ++ also_print_to_console=False) ++ else: ++ pass + ++ self.initialize_network() ++ self.initialize_optimizer_and_scheduler() + -+# import pdb -+# pdb.set_trace() ++ assert isinstance(self.network, (SegmentationNetwork, DDP)) ++ else: ++ self.print_to_log_file('self.was_initialized is True, not running self.initialize again') ++ self.was_initialized = True + -+def main(): -+ parser = argparse.ArgumentParser() -+ parser.add_argument("-network", default="3d_fullres") -+ parser.add_argument("-network_trainer", default="nnUNetPlusPlusTrainerV2") -+ parser.add_argument("-task", default="003", help="can be task name or task id") -+ parser.add_argument("-fold", default="0", help='0, 1, ..., 5 or \'all\'') -+ parser.add_argument("-val", "--validation_only", default=False, -+ help="use this if you want to only run the validation", -+ action="store_true") -+ parser.add_argument("-w", required=False, default=None, help="Load pre-trained Models Genesis") -+ parser.add_argument("-c", "--continue_training", default=False, help="use this if you want to continue a training", -+ action="store_true") -+ parser.add_argument("-p", help="plans identifier. Only change this if you created a custom experiment planner", -+ default=default_plans_identifier, required=False) -+ parser.add_argument("--use_compressed_data", default=False, action="store_true", -+ help="If you set use_compressed_data, the training cases will not be decompressed. Reading compressed data " -+ "is much more CPU and RAM intensive and should only be used if you know what you are " -+ "doing", required=False) -+ parser.add_argument("--deterministic", -+ help="Makes training deterministic, but reduces training speed substantially. I (Fabian) think " -+ "this is not necessary. Deterministic training will make you overfit to some random seed. " -+ "Don't use that.", -+ required=False, default=False, action="store_true") -+ parser.add_argument("--npz", required=False, default=False, action="store_true", help="if set then nnUNet will " -+ "export npz files of " -+ "predicted segmentations " -+ "in the validation as well. " -+ "This is needed to run the " -+ "ensembling step so unless " -+ "you are developing nnUNet " -+ "you should enable this") -+ parser.add_argument("--find_lr", required=False, default=False, action="store_true", -+ help="not used here, just for fun") -+ parser.add_argument("--valbest", required=False, default=False, action="store_true", -+ help="hands off. This is not intended to be used") -+ parser.add_argument("--fp32", required=False, default=False, action="store_true", -+ help="disable mixed precision training and run old school fp32") -+ parser.add_argument("--val_folder", required=False, default="validation_raw", -+ help="name of the validation folder. No need to use this for most people") -+ # parser.add_argument("--interp_order", required=False, default=3, type=int, -+ # help="order of interpolation for segmentations. Testing purpose only. Hands off") -+ # parser.add_argument("--interp_order_z", required=False, default=0, type=int, -+ # help="order of interpolation along z if z is resampled separately. Testing purpose only. " -+ # "Hands off") -+ # parser.add_argument("--force_separate_z", required=False, default="None", type=str, -+ # help="force_separate_z resampling. Can be None, True or False. Testing purpose only. Hands off") ++ def initialize_network(self): ++ """ ++ - momentum 0.99 ++ - SGD instead of Adam ++ - self.lr_scheduler = None because we do poly_lr ++ - deep supervision = True ++ - i am sure I forgot something here + -+ args = parser.parse_args() -+ print('------------\n', args) ++ Known issue: forgot to set neg_slope=0 in InitWeights_He; should not make a difference though ++ :return: ++ """ ++ if self.threeD: ++ conv_op = nn.Conv3d ++ dropout_op = nn.Dropout3d ++ norm_op = nn.InstanceNorm3d + -+ task = args.task -+ fold = args.fold -+ network = args.network -+ network_trainer = args.network_trainer -+ weights = args.w -+ validation_only = args.validation_only -+ plans_identifier = args.p -+ find_lr = args.find_lr ++ else: ++ conv_op = nn.Conv2d ++ dropout_op = nn.Dropout2d ++ norm_op = nn.InstanceNorm2d ++ norm_op_kwargs = {'eps': 1e-5, 'affine': True} ++ dropout_op_kwargs = {'p': 0, 'inplace': True} ++ net_nonlin = nn.LeakyReLU ++ net_nonlin_kwargs = {'negative_slope': 1e-2, 'inplace': True} ++ self.network = Generic_UNetPlusPlus(self.num_input_channels, self.base_num_features, self.num_classes, ++ len(self.net_num_pool_op_kernel_sizes), ++ self.conv_per_stage, 2, conv_op, norm_op, norm_op_kwargs, dropout_op, ++ dropout_op_kwargs, ++ net_nonlin, net_nonlin_kwargs, True, False, lambda x: x, ++ InitWeights_He(1e-2), ++ self.net_num_pool_op_kernel_sizes, self.net_conv_kernel_sizes, False, True, ++ True) ++ if torch.cuda.is_available(): ++ self.network.cuda() ++ self.network.inference_apply_nonlin = softmax_helper ++ self.network = DDP(self.network, device_ids=[self.local_rank], find_unused_parameters=True) + -+ use_compressed_data = args.use_compressed_data -+ decompress_data = not use_compressed_data ++ # self.network = DDP(self.network, device_ids=[self.local_rank], find_unused_parameters=True) + -+ deterministic = args.deterministic -+ valbest = args.valbest ++ def initialize_optimizer_and_scheduler(self): ++ assert self.network is not None, "self.initialize_network must be called first" ++ print('weight_decay: ', self.weight_decay) ++ sys.stdout.flush() ++ self.optimizer = torch.optim.SGD(self.network.parameters(), self.initial_lr, weight_decay=self.weight_decay, ++ momentum=0.99, nesterov=True) ++ self.lr_scheduler = None + -+ fp32 = args.fp32 -+ run_mixed_precision = not fp32 ++ def run_online_evaluation(self, output, target): ++ """ ++ due to deep supervision the return value and the reference are now lists of tensors. We only need the full ++ resolution output because this is what we are interested in in the end. The others are ignored ++ :param output: ++ :param target: ++ :return: ++ """ ++ target = target[0] ++ output = output[0] ++ return super().run_online_evaluation(output, target) + -+ val_folder = args.val_folder -+ # interp_order = args.interp_order -+ # interp_order_z = args.interp_order_z -+ # force_separate_z = args.force_separate_z ++ def validate(self, do_mirroring: bool = True, use_sliding_window: bool = True, ++ step_size: float = 0.5, save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True, ++ validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False, ++ segmentation_export_kwargs: dict = None): ++ """ ++ We need to wrap this because we need to enforce self.network.do_ds = False for prediction ++ """ ++ if self.local_rank == 0: ++ if isinstance(self.network, DDP): ++ net = self.network.module ++ else: ++ net = self.network ++ ds = self.network.do_ds ++ net.do_ds = False ++ ret = super().validate(do_mirroring, use_sliding_window, step_size, save_softmax, use_gaussian, ++ overwrite, validation_folder_name, debug, all_in_gpu, segmentation_export_kwargs) + -+ if not task.startswith("Task"): -+ task_id = int(task) -+ task = convert_id_to_task_name(task_id) ++ net.do_ds = ds ++ return ret + -+ if fold == 'all': -+ pass -+ else: -+ fold = int(fold) ++ def predict_preprocessed_data_return_seg_and_softmax(self, data: np.ndarray, do_mirroring: bool = True, ++ mirror_axes: Tuple[int] = None, ++ use_sliding_window: bool = True, step_size: float = 0.5, ++ use_gaussian: bool = True, pad_border_mode: str = 'constant', ++ pad_kwargs: dict = None, all_in_gpu: bool = True, ++ verbose: bool = True, mixed_precision=True) -> Tuple[ ++ np.ndarray, np.ndarray]: ++ """ ++ We need to wrap this because we need to enforce self.network.do_ds = False for prediction ++ """ ++ ds = self.network.do_ds ++ self.network.do_ds = False ++ ret = super().predict_preprocessed_data_return_seg_and_softmax(data, do_mirroring, mirror_axes, ++ use_sliding_window, step_size, use_gaussian, ++ pad_border_mode, pad_kwargs, all_in_gpu, verbose, ++ mixed_precision=mixed_precision) ++ self.network.do_ds = ds ++ return ret + -+ # if force_separate_z == "None": -+ # force_separate_z = None -+ # elif force_separate_z == "False": -+ # force_separate_z = False -+ # elif force_separate_z == "True": -+ # force_separate_z = True -+ # else: -+ # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) ++ def run_iteration(self, data_generator, do_backprop=True, run_online_evaluation=False): ++ """ ++ gradient clipping improves training stability + -+ plans_file, output_folder_name, dataset_directory, batch_dice, stage, \ -+ trainer_class, domain = get_default_configuration(network, task, network_trainer, plans_identifier) ++ :param data_generator: ++ :param do_backprop: ++ :param run_online_evaluation: ++ :return: ++ """ ++ data_dict = next(data_generator) ++ data = data_dict['data'] ++ target = data_dict['target'] + -+ if trainer_class is None: -+ raise RuntimeError("Could not find trainer class in nnunet.training.network_training") ++ data = maybe_to_torch(data) ++ target = maybe_to_torch(target) + -+ if network == "3d_cascade_fullres": -+ assert issubclass(trainer_class, (nnUNetTrainerCascadeFullRes, nnUNetTrainerV2CascadeFullRes)), \ -+ "If running 3d_cascade_fullres then your " \ -+ "trainer class must be derived from " \ -+ "nnUNetTrainerCascadeFullRes" -+ else: -+ assert issubclass(trainer_class, -+ nnUNetTrainer), "network_trainer was found but is not derived from nnUNetTrainer" ++ if torch.cuda.is_available(): ++ data = to_cuda(data, gpu_id=None) ++ target = to_cuda(target, gpu_id=None) + -+ trainer = trainer_class(plans_file, fold, output_folder=output_folder_name, dataset_directory=dataset_directory, -+ batch_dice=batch_dice, stage=stage, unpack_data=decompress_data, -+ deterministic=deterministic, -+ fp16=run_mixed_precision) ++ self.optimizer.zero_grad() + -+ trainer.initialize(not validation_only) ++ if self.fp16: ++ with autocast(): ++ output = self.network(data) ++ del data ++ l = self.loss(output, target) + -+ if weights != None: -+ trainer.load_pretrained_encoder_weights(weights) -+ sys.stdout.flush() ++ if do_backprop: ++ self.amp_grad_scaler.scale(l).backward() ++ self.amp_grad_scaler.unscale_(self.optimizer) ++ torch.nn.utils.clip_grad_norm_(self.network.parameters(), 12) ++ self.amp_grad_scaler.step(self.optimizer) ++ self.amp_grad_scaler.update() ++ else: ++ output = self.network(data) ++ del data ++ l = self.loss(output, target) ++ ++ if do_backprop: ++ l.backward() ++ torch.nn.utils.clip_grad_norm_(self.network.parameters(), 12) ++ self.optimizer.step() ++ ++ if run_online_evaluation: ++ self.run_online_evaluation(output, target) ++ ++ del target ++ ++ return l.detach().cpu().numpy() ++ ++ def do_split(self): ++ """ ++ we now allow more than 5 splits. IMPORTANT: and fold > 4 will not be a real split but just another random ++ 80:20 split of the data. You cannot run X-fold cross-validation with this code. It will always be a 5-fold CV. ++ Folds > 4 will be independent from each other ++ :return: ++ """ ++ if self.fold == "all": ++ # if fold==all then we use all images for training and validation ++ tr_keys = val_keys = list(self.dataset.keys()) ++ else: ++ splits_file = join(self.dataset_directory, "splits_final.pkl") ++ ++ # if the split file does not exist we need to create it ++ if not isfile(splits_file): ++ self.print_to_log_file("Creating new split...") ++ splits = [] ++ all_keys_sorted = np.sort(list(self.dataset.keys())) ++ kfold = KFold(n_splits=5, shuffle=True, random_state=12345) ++ for i, (train_idx, test_idx) in enumerate(kfold.split(all_keys_sorted)): ++ train_keys = np.array(all_keys_sorted)[train_idx] ++ test_keys = np.array(all_keys_sorted)[test_idx] ++ splits.append(OrderedDict()) ++ splits[-1]['train'] = train_keys ++ splits[-1]['val'] = test_keys ++ save_pickle(splits, splits_file) ++ ++ splits = load_pickle(splits_file) ++ ++ if self.fold < len(splits): ++ tr_keys = splits[self.fold]['train'] ++ val_keys = splits[self.fold]['val'] ++ else: ++ self.print_to_log_file("INFO: Requested fold %d but split file only has %d folds. I am now creating a " ++ "random 80:20 split!" % (self.fold, len(splits))) ++ # if we request a fold that is not in the split file, create a random 80:20 split ++ rnd = np.random.RandomState(seed=12345 + self.fold) ++ keys = np.sort(list(self.dataset.keys())) ++ idx_tr = rnd.choice(len(keys), int(len(keys) * 0.8), replace=False) ++ idx_val = [i for i in range(len(keys)) if i not in idx_tr] ++ tr_keys = [keys[i] for i in idx_tr] ++ val_keys = [keys[i] for i in idx_val] ++ ++ tr_keys.sort() ++ val_keys.sort() ++ self.dataset_tr = OrderedDict() ++ for i in tr_keys: ++ self.dataset_tr[i] = self.dataset[i] ++ self.dataset_val = OrderedDict() ++ for i in val_keys: ++ self.dataset_val[i] = self.dataset[i] ++ ++ def setup_DA_params(self): ++ """ ++ - we increase roation angle from [-15, 15] to [-30, 30] ++ - scale range is now (0.7, 1.4), was (0.85, 1.25) ++ - we don't do elastic deformation anymore ++ ++ :return: ++ """ ++ ++ self.deep_supervision_scales = [[1, 1, 1]] + list(list(i) for i in 1 / np.cumprod( ++ np.vstack(self.net_num_pool_op_kernel_sizes), axis=0))[:-1] ++ ++ if self.threeD: ++ self.data_aug_params = default_3D_augmentation_params ++ self.data_aug_params['rotation_x'] = (-30. / 360 * 2. * np.pi, 30. / 360 * 2. * np.pi) ++ self.data_aug_params['rotation_y'] = (-30. / 360 * 2. * np.pi, 30. / 360 * 2. * np.pi) ++ self.data_aug_params['rotation_z'] = (-30. / 360 * 2. * np.pi, 30. / 360 * 2. * np.pi) ++ if self.do_dummy_2D_aug: ++ self.data_aug_params["dummy_2D"] = True ++ self.print_to_log_file("Using dummy2d data augmentation") ++ self.data_aug_params["elastic_deform_alpha"] = \ ++ default_2D_augmentation_params["elastic_deform_alpha"] ++ self.data_aug_params["elastic_deform_sigma"] = \ ++ default_2D_augmentation_params["elastic_deform_sigma"] ++ self.data_aug_params["rotation_x"] = default_2D_augmentation_params["rotation_x"] ++ else: ++ self.do_dummy_2D_aug = False ++ if max(self.patch_size) / min(self.patch_size) > 1.5: ++ default_2D_augmentation_params['rotation_x'] = (-15. / 360 * 2. * np.pi, 15. / 360 * 2. * np.pi) ++ self.data_aug_params = default_2D_augmentation_params ++ self.data_aug_params["mask_was_used_for_normalization"] = self.use_mask_for_norm ++ ++ if self.do_dummy_2D_aug: ++ self.basic_generator_patch_size = get_patch_size(self.patch_size[1:], ++ self.data_aug_params['rotation_x'], ++ self.data_aug_params['rotation_y'], ++ self.data_aug_params['rotation_z'], ++ self.data_aug_params['scale_range']) ++ self.basic_generator_patch_size = np.array([self.patch_size[0]] + list(self.basic_generator_patch_size)) ++ patch_size_for_spatialtransform = self.patch_size[1:] ++ else: ++ self.basic_generator_patch_size = get_patch_size(self.patch_size, self.data_aug_params['rotation_x'], ++ self.data_aug_params['rotation_y'], ++ self.data_aug_params['rotation_z'], ++ self.data_aug_params['scale_range']) ++ patch_size_for_spatialtransform = self.patch_size ++ ++ self.data_aug_params["scale_range"] = (0.7, 1.4) ++ self.data_aug_params["do_elastic"] = False ++ self.data_aug_params['selected_seg_channels'] = [0] ++ self.data_aug_params['patch_size_for_spatialtransform'] = patch_size_for_spatialtransform ++ ++ self.data_aug_params["num_cached_per_thread"] = 2 ++ ++ def maybe_update_lr(self, epoch=None): ++ """ ++ if epoch is not None we overwrite epoch. Else we use epoch = self.epoch + 1 + -+ if find_lr: -+ trainer.find_lr() -+ else: -+ if not validation_only: -+ if args.continue_training: -+ trainer.load_latest_checkpoint() -+ trainer.run_training() ++ (maybe_update_lr is called in on_epoch_end which is called before epoch is incremented. ++ herefore we need to do +1 here) ++ ++ :param epoch: ++ :return: ++ """ ++ if epoch is None: ++ ep = self.epoch + 1 + else: -+ if valbest: -+ trainer.load_best_checkpoint(train=False) -+ else: -+ trainer.load_latest_checkpoint(train=False) ++ ep = epoch ++ self.optimizer.param_groups[0]['lr'] = poly_lr(ep, self.max_num_epochs, self.initial_lr, 0.9) ++ self.print_to_log_file("lr:", np.round(self.optimizer.param_groups[0]['lr'], decimals=6)) + -+ trainer.network.eval() ++ def on_epoch_end(self): ++ """ ++ overwrite patient-based early stopping. Always run to 1000 epochs ++ :return: ++ """ ++ super().on_epoch_end() ++ continue_training = self.epoch < self.max_num_epochs + -+ # predict validation -+ trainer.validate(save_softmax=args.npz, validation_folder_name=val_folder) ++ # it can rarely happen that the momentum of nnUNetTrainerV2_plus is too high for some dataset. If at epoch 100 the ++ # estimated validation Dice is still 0 then we reduce the momentum from 0.99 to 0.95 ++ if self.epoch == 100: ++ if self.all_val_eval_metrics[-1] == 0: ++ self.optimizer.param_groups[0]["momentum"] = 0.95 ++ self.network.apply(InitWeights_He(1e-2)) ++ self.print_to_log_file("At epoch 100, the mean foreground Dice was 0. This can be caused by a too " ++ "high momentum. High momentum (0.99) is good for datasets where it works, but " ++ "sometimes causes issues such as this one. Momentum has now been reduced to " ++ "0.95 and network weights have been reinitialized") ++ return continue_training + -+ if network == '3d_lowres': -+ trainer.load_best_checkpoint(False) -+ print("predicting segmentations for the next stage of the cascade") -+ predict_next_stage(trainer, join(dataset_directory, trainer.plans['data_identifier'] + "_stage%d" % 1)) ++ def save_checkpoint(self, fname, save_optimizer=True): ++ if self.local_rank == 0: ++ super().save_checkpoint(fname, save_optimizer) + ++ def plot_progress(self): ++ if self.local_rank == 0: ++ super().plot_progress() + -+if __name__ == "__main__": -+ main() -diff --git a/pytorch/nnunet/run/run_training_DDP.py b/pytorch/nnunet/run/run_training_DDP.py -index 5ffcdcf..6ad3d5a 100644 ---- a/pytorch/nnunet/run/run_training_DDP.py -+++ b/pytorch/nnunet/run/run_training_DDP.py -@@ -27,13 +27,13 @@ from nnunet.utilities.task_name_id_conversion import convert_id_to_task_name - - def main(): - parser = argparse.ArgumentParser() -- parser.add_argument("network") -- parser.add_argument("network_trainer") -- parser.add_argument("task", help="can be task name or task id") -- parser.add_argument("fold", help='0, 1, ..., 5 or \'all\'') -+ parser.add_argument("network", default='3d_fullres') -+ parser.add_argument("network_trainer", default='nnUNetTrainerV2_DDP') -+ parser.add_argument("task", help="can be task name or task id", default='003') -+ parser.add_argument("fold", help='0, 1, ..., 5 or \'all\'', default='0') - parser.add_argument("-val", "--validation_only", help="use this if you want to only run the validation", -- action="store_true") -- parser.add_argument("-c", "--continue_training", help="use this if you want to continue a training", -+ action="store_true", default=False) -+ parser.add_argument("-c", "--continue_training", default=False, help="use this if you want to continue a training", - action="store_true") - parser.add_argument("-p", help="plans identifier. Only change this if you created a custom experiment planner", - default=default_plans_identifier, required=False) -@@ -78,7 +78,7 @@ def main(): - # help="force_separate_z resampling. Can be None, True or False. Testing purpose only. Hands off") - - args = parser.parse_args() -- -+ print('\n\n args=', args, '\n\n') - task = args.task - fold = args.fold - network = args.network -@@ -115,7 +115,7 @@ def main(): - # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) - - plans_file, output_folder_name, dataset_directory, batch_dice, stage, \ -- trainer_class = get_default_configuration(network, task, network_trainer, plans_identifier) -+ trainer_class, _ = get_default_configuration(network, task, network_trainer, plans_identifier) - - if trainer_class is None: - raise RuntimeError("Could not find trainer class in meddec.model_training") -diff --git a/pytorch/nnunet/run/run_training_hypDDP.py b/pytorch/nnunet/run/run_training_hypDDP.py -new file mode 100644 -index 0000000..f50744c ---- /dev/null -+++ b/pytorch/nnunet/run/run_training_hypDDP.py -@@ -0,0 +1,164 @@ -+# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany -+# -+# Licensed under the Apache License, Version 2.0 (the "License"); -+# you may not use this file except in compliance with the License. -+# You may obtain a copy of the License at -+# -+# http://www.apache.org/licenses/LICENSE-2.0 -+# -+# Unless required by applicable law or agreed to in writing, software -+# distributed under the License is distributed on an "AS IS" BASIS, -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+# See the License for the specific language governing permissions and -+# limitations under the License. -+ -+ -+import argparse -+ -+from batchgenerators.utilities.file_and_folder_operations import * -+from nnunet.run.default_configuration import get_default_configuration -+from nnunet.paths import default_plans_identifier -+from nnunet.training.cascade_stuff.predict_next_stage import predict_next_stage -+from nnunet.training.network_training.nnUNetTrainer import nnUNetTrainer -+from nnunet.training.network_training.nnUNetTrainerCascadeFullRes import nnUNetTrainerCascadeFullRes -+from nnunet.training.network_training.nnUNetTrainerV2_CascadeFullRes import nnUNetTrainerV2CascadeFullRes -+from nnunet.utilities.task_name_id_conversion import convert_id_to_task_name -+ -+ -+def main(): -+ parser = argparse.ArgumentParser() -+ parser.add_argument("network") -+ parser.add_argument("network_trainer") -+ parser.add_argument("task", help="can be task name or task id") -+ parser.add_argument("fold", help='0, 1, ..., 5 or \'all\'') -+ parser.add_argument("-val", "--validation_only", help="use this if you want to only run the validation", -+ action="store_true") -+ parser.add_argument("-c", "--continue_training", help="use this if you want to continue a training", -+ action="store_true") -+ parser.add_argument("-p", help="plans identifier. Only change this if you created a custom experiment planner", -+ default=default_plans_identifier, required=False) -+ parser.add_argument("--use_compressed_data", default=False, action="store_true", -+ help="If you set use_compressed_data, the training cases will not be decompressed. Reading compressed data " -+ "is much more CPU and RAM intensive and should only be used if you know what you are " -+ "doing", required=False) -+ parser.add_argument("--deterministic", -+ help="Makes training deterministic, but reduces training speed substantially. I (Fabian) think " -+ "this is not necessary. Deterministic training will make you overfit to some random seed. " -+ "Don't use that.", -+ required=False, default=False, action="store_true") -+ parser.add_argument("--local_rank", default=0, type=int) -+ parser.add_argument("--fp32", required=False, default=False, action="store_true", -+ help="disable mixed precision training and run old school fp32") -+ parser.add_argument("--dbs", required=False, default=False, action="store_true", help="distribute batch size. If " -+ "True then whatever " -+ "batch_size is in plans will " -+ "be distributed over DDP " -+ "models, if False then each " -+ "model will have batch_size " -+ "for a total of " -+ "GPUs*batch_size") -+ parser.add_argument("--npz", required=False, default=False, action="store_true", help="if set then nnUNet will " -+ "export npz files of " -+ "predicted segmentations " -+ "in the vlaidation as well. " -+ "This is needed to run the " -+ "ensembling step so unless " -+ "you are developing nnUNet " -+ "you should enable this") -+ parser.add_argument("--valbest", required=False, default=False, action="store_true", help="") -+ parser.add_argument("--find_lr", required=False, default=False, action="store_true", help="") -+ parser.add_argument("--val_folder", required=False, default="validation_raw", -+ help="name of the validation folder. No need to use this for most people") -+ # parser.add_argument("--interp_order", required=False, default=3, type=int, -+ # help="order of interpolation for segmentations. Testing purpose only. Hands off") -+ # parser.add_argument("--interp_order_z", required=False, default=0, type=int, -+ # help="order of interpolation along z if z is resampled separately. Testing purpose only. " -+ # "Hands off") -+ # parser.add_argument("--force_separate_z", required=False, default="None", type=str, -+ # help="force_separate_z resampling. Can be None, True or False. Testing purpose only. Hands off") -+ -+ args = parser.parse_args() -+ print('\n\n args=', args, '\n\n') -+ task = args.task -+ fold = args.fold -+ network = args.network -+ network_trainer = args.network_trainer -+ validation_only = args.validation_only -+ plans_identifier = args.p -+ use_compressed_data = args.use_compressed_data -+ decompress_data = not use_compressed_data -+ deterministic = args.deterministic -+ valbest = args.valbest -+ find_lr = args.find_lr -+ val_folder = args.val_folder -+ # interp_order = args.interp_order -+ # interp_order_z = args.interp_order_z -+ # force_separate_z = args.force_separate_z -+ fp32 = args.fp32 -+ -+ if not task.startswith("Task"): -+ task_id = int(task) -+ task = convert_id_to_task_name(task_id) -+ -+ if fold == 'all': -+ pass -+ else: -+ fold = int(fold) -+ # -+ # if force_separate_z == "None": -+ # force_separate_z = None -+ # elif force_separate_z == "False": -+ # force_separate_z = False -+ # elif force_separate_z == "True": -+ # force_separate_z = True -+ # else: -+ # raise ValueError("force_separate_z must be None, True or False. Given: %s" % force_separate_z) -+ -+ plans_file, output_folder_name, dataset_directory, batch_dice, stage, \ -+ trainer_class, _ = get_default_configuration(network, task, network_trainer, plans_identifier) -+ -+ if trainer_class is None: -+ raise RuntimeError("Could not find trainer class in meddec.model_training") -+ -+ if network == "3d_cascade_fullres": -+ assert issubclass(trainer_class, (nnUNetTrainerCascadeFullRes, nnUNetTrainerV2CascadeFullRes)), \ -+ "If running 3d_cascade_fullres then your " \ -+ "trainer class must be derived from " \ -+ "nnUNetTrainerCascadeFullRes" -+ else: -+ assert issubclass(trainer_class, -+ nnUNetTrainer), "network_trainer was found but is not derived from nnUNetTrainer" -+ -+ trainer = trainer_class(plans_file, fold, local_rank=args.local_rank, output_folder=output_folder_name, -+ dataset_directory=dataset_directory, batch_dice=batch_dice, stage=stage, -+ unpack_data=decompress_data, deterministic=deterministic, fp16=not fp32, -+ distribute_batch_size=args.dbs) -+ -+ trainer.initialize(not validation_only) -+ -+ if find_lr: -+ trainer.find_lr() -+ else: -+ if not validation_only: -+ if args.continue_training: -+ trainer.load_latest_checkpoint() -+ trainer.run_training() -+ else: -+ if valbest: -+ trainer.load_best_checkpoint(train=False) -+ else: -+ trainer.load_latest_checkpoint(train=False) -+ -+ trainer.network.eval() -+ -+ # predict validation -+ trainer.validate(save_softmax=args.npz, validation_folder_name=val_folder) -+ -+ if network == '3d_lowres': -+ trainer.load_best_checkpoint(False) -+ print("predicting segmentations for the next stage of the cascade") -+ predict_next_stage(trainer, join(dataset_directory, trainer.plans['data_identifier'] + "_stage%d" % 1)) -+ -+ -+if __name__ == "__main__": -+ main() -diff --git a/pytorch/nnunet/training/loss_functions/crossentropy.py b/pytorch/nnunet/training/loss_functions/crossentropy.py -index 6195437..0c782d9 100644 ---- a/pytorch/nnunet/training/loss_functions/crossentropy.py -+++ b/pytorch/nnunet/training/loss_functions/crossentropy.py -@@ -6,6 +6,15 @@ class RobustCrossEntropyLoss(nn.CrossEntropyLoss): - this is just a compatibility layer because my target tensor is float and has an extra dimension - """ - def forward(self, input: Tensor, target: Tensor) -> Tensor: -+ # i = 0 -+ # print('----------') -+ # print('input:', input.shape) -+ # for i in range(len(input)): -+ # print(i, input[i].shape) -+ # print('target') -+ # for i in range(len(target)): -+ # print(i, target[i].shape) -+ # print('\n----------') - if len(target.shape) == len(input.shape): - assert target.shape[1] == 1 - target = target[:, 0] -diff --git a/pytorch/nnunet/training/network_training/network_trainer.py b/pytorch/nnunet/training/network_training/network_trainer.py -index e920158..f0031d3 100644 ---- a/pytorch/nnunet/training/network_training/network_trainer.py -+++ b/pytorch/nnunet/training/network_training/network_trainer.py -@@ -37,6 +37,7 @@ from abc import abstractmethod - from datetime import datetime - from tqdm import trange - from nnunet.utilities.to_torch import maybe_to_torch, to_cuda -+import pdb - - - class NetworkTrainer(object): -@@ -438,7 +439,8 @@ class NetworkTrainer(object): - self._maybe_init_amp() - - def _maybe_init_amp(self): -- if self.fp16 and self.amp_grad_scaler is None and torch.cuda.is_available(): -+ # if self.fp16 and self.amp_grad_scaler is None and torch.cuda.is_available(): -+ if self.fp16 and self.amp_grad_scaler is None: - self.amp_grad_scaler = GradScaler() - - def plot_network_architecture(self): -diff --git a/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2.py b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2.py -index e9aa611..9b97e8c 100644 ---- a/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2.py -+++ b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2.py -@@ -13,6 +13,7 @@ - # limitations under the License. - - -+import SimpleITK as sitk - from collections import OrderedDict - from typing import Tuple - import sys -@@ -35,12 +36,10 @@ from torch.cuda.amp import autocast - from nnunet.training.learning_rate.poly_lr import poly_lr - from batchgenerators.utilities.file_and_folder_operations import * - -- - class nnUNetPlusPlusTrainerV2(nnUNetTrainer): - """ - Info for Fabian: same as internal nnUNetTrainerV2_2 - """ -- - def __init__(self, plans_file, fold, output_folder=None, dataset_directory=None, batch_dice=True, stage=None, - unpack_data=True, deterministic=True, fp16=False): - super().__init__(plans_file, fold, output_folder, dataset_directory, batch_dice, stage, unpack_data, -@@ -66,7 +65,7 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): - maybe_mkdir_p(self.output_folder) - - if force_load_plans or (self.plans is None): -- self.load_plans_file() -+ self.load_plans_file() # '/data/yupeng/environment_variables/nnUNet_preprocessed/Task003_Liver/nnUNetPlansv2.1_plans_3D.pkl' - - self.process_plans(self.plans) - -@@ -189,6 +188,7 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): - """ - ds = self.network.do_ds - self.network.do_ds = False -+ overwrite = False # 不希望重新跑推理,不然太久了 - ret = super().validate(do_mirroring, use_sliding_window, step_size, save_softmax, use_gaussian, - overwrite, validation_folder_name, debug, all_in_gpu, segmentation_export_kwargs) - -@@ -200,16 +200,18 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): - use_sliding_window: bool = True, step_size: float = 0.5, - use_gaussian: bool = True, pad_border_mode: str = 'constant', - pad_kwargs: dict = None, all_in_gpu: bool = True, -- verbose: bool = True, mixed_precision=True) -> Tuple[np.ndarray, np.ndarray]: -+ verbose: bool = True, mixed_precision=True, img_name=None, -+ pre_mode=None, fp=None) -> Tuple[np.ndarray, np.ndarray]: - """ - We need to wrap this because we need to enforce self.network.do_ds = False for prediction - """ -- ds = self.network.do_ds -+ ds = self.network.do_ds # ds = True - self.network.do_ds = False - ret = super().predict_preprocessed_data_return_seg_and_softmax(data, do_mirroring, mirror_axes, - use_sliding_window, step_size, use_gaussian, - pad_border_mode, pad_kwargs, all_in_gpu, verbose, -- mixed_precision=mixed_precision) -+ mixed_precision=mixed_precision, img_name=img_name, -+ pre_mode=pre_mode, fp=fp) - self.network.do_ds = ds - return ret - -@@ -225,7 +227,20 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): - data_dict = next(data_generator) - data = data_dict['data'] - target = data_dict['target'] -- -+ # i = 0 -+ # while True: -+ # i += 1 -+ # data_dict = next(data_generator) -+ # data = data_dict['data'] -+ # target = data_dict['target'] -+ # data_numpy_output = '/home/yupeng/save_data.nii.gz' -+ # data_numpy = data[0][0].numpy() -+ # target_numpy = target[0][0][0].numpy() -+ # data_1 = data_numpy.flatten() -+ # minm = np.argmin(data_1) -+ # maxm = np.argmax(data_1) -+ # out = sitk.GetImageFromArray(data_numpy) -+ # sitk.WriteImage(out, data_numpy_output) - data = maybe_to_torch(data) - target = maybe_to_torch(target) - -@@ -234,7 +249,6 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): - target = to_cuda(target) - - self.optimizer.zero_grad() -- - if self.fp16: - with autocast(): - output = self.network(data) -@@ -261,7 +275,6 @@ class nnUNetPlusPlusTrainerV2(nnUNetTrainer): - self.run_online_evaluation(output, target) - - del target -- - return l.detach().cpu().numpy() - - def do_split(self): -diff --git a/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2_DDP.py b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2_DDP.py -new file mode 100644 -index 0000000..e2ab2fa ---- /dev/null -+++ b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2_DDP.py -@@ -0,0 +1,483 @@ -+# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany -+# -+# Licensed under the Apache License, Version 2.0 (the "License"); -+# you may not use this file except in compliance with the License. -+# You may obtain a copy of the License at -+# -+# http://www.apache.org/licenses/LICENSE-2.0 -+# -+# Unless required by applicable law or agreed to in writing, software -+# distributed under the License is distributed on an "AS IS" BASIS, -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+# See the License for the specific language governing permissions and -+# limitations under the License. -+ -+ -+from collections import OrderedDict -+from typing import Tuple -+import sys -+import time -+import numpy as np -+import torch -+import torch.distributed as dist -+from torch.cuda.amp import autocast -+from torch.nn.parallel import DistributedDataParallel as DDP -+from nnunet.training.loss_functions.deep_supervision import MultipleOutputLoss2 -+from nnunet.utilities.to_torch import maybe_to_torch, to_cuda -+from nnunet.training.data_augmentation.default_data_augmentation import get_moreDA_augmentation -+from nnunet.network_architecture.generic_UNetPlusPlus import Generic_UNetPlusPlus -+from nnunet.network_architecture.initialization import InitWeights_He -+from nnunet.network_architecture.neural_network import SegmentationNetwork -+from nnunet.training.data_augmentation.default_data_augmentation import default_2D_augmentation_params, \ -+ get_patch_size, default_3D_augmentation_params -+from nnunet.training.dataloading.dataset_loading import unpack_dataset -+from nnunet.training.network_training.nnUNetTrainer import nnUNetTrainer -+from nnunet.utilities.nd_softmax import softmax_helper -+from sklearn.model_selection import KFold -+from torch import nn -+from torch.cuda.amp import autocast -+from nnunet.training.learning_rate.poly_lr import poly_lr -+from batchgenerators.utilities.file_and_folder_operations import * -+ -+ -+class nnUNetPlusPlusTrainerV2_DDP(nnUNetTrainer): -+ """ -+ Info for Fabian: same as internal nnUNetTrainerV2_2 -+ """ -+ -+ def __init__(self, plans_file, fold, local_rank, output_folder=None, dataset_directory=None, batch_dice=True, -+ stage=None, -+ unpack_data=True, deterministic=True, fp16=False, distribute_batch_size=1): -+ super().__init__(plans_file, fold, output_folder, dataset_directory, batch_dice, stage, unpack_data, -+ deterministic, fp16) -+ self.init_args = ( -+ plans_file, fold, local_rank, output_folder, dataset_directory, batch_dice, stage, unpack_data, -+ deterministic, distribute_batch_size, fp16) -+ self.max_num_epochs = 1000 -+ self.initial_lr = 1e-2 -+ self.deep_supervision_scales = None -+ self.ds_loss_weights = None -+ self.distribute_batch_size = distribute_batch_size -+ np.random.seed(local_rank) -+ torch.manual_seed(local_rank) -+ self.local_rank = local_rank -+ if torch.cuda.is_available(): -+ torch.cuda.set_device(local_rank) -+ dist.init_process_group(backend='nccl', init_method='env://') -+ -+ self.pin_memory = True -+ -+ def initialize(self, training=True, force_load_plans=False): -+ """ -+ - replaced get_default_augmentation with get_moreDA_augmentation -+ - enforce to only run this code once -+ - loss function wrapper for deep supervision -+ -+ :param training: -+ :param force_load_plans: -+ :return: -+ """ -+ if not self.was_initialized: -+ maybe_mkdir_p(self.output_folder) -+ -+ if force_load_plans or (self.plans is None): -+ self.load_plans_file() -+ -+ self.process_plans(self.plans) -+ -+ self.setup_DA_params() -+ -+ ################# Here we wrap the loss for deep supervision ############ -+ # we need to know the number of outputs of the network -+ net_numpool = len(self.net_num_pool_op_kernel_sizes) -+ -+ # we give each output a weight which decreases exponentially (division by 2) as the resolution decreases -+ # this gives higher resolution outputs more weight in the loss -+ weights = np.array([1 / (2 ** i) for i in range(net_numpool)]) -+ -+ # we don't use the lowest 2 outputs. Normalize weights so that they sum to 1 -+ mask = np.array([True] + [True if i < net_numpool - 1 else False for i in range(1, net_numpool)]) -+ weights[~mask] = 0 -+ weights = weights / weights.sum() -+ # self.ds_loss_weights = weights -+ self.ds_loss_weights = None -+ # now wrap the loss -+ self.loss = MultipleOutputLoss2(self.loss, self.ds_loss_weights) -+ ################# END ################### -+ -+ self.folder_with_preprocessed_data = join(self.dataset_directory, self.plans['data_identifier'] + -+ "_stage%d" % self.stage) -+ if training: -+ self.dl_tr, self.dl_val = self.get_basic_generators() -+ if self.unpack_data: -+ if self.local_rank == 0: -+ print("unpacking dataset") -+ unpack_dataset(self.folder_with_preprocessed_data) -+ print("done") -+ else: -+ # we need to wait until worker 0 has finished unpacking -+ npz_files = subfiles(self.folder_with_preprocessed_data, suffix=".npz", join=False) -+ case_ids = [i[:-4] for i in npz_files] -+ all_present = all( -+ [isfile(join(self.folder_with_preprocessed_data, i + ".npy")) for i in case_ids]) -+ while not all_present: -+ print("worker", self.local_rank, "is waiting for unpacking") -+ time.sleep(3) -+ all_present = all( -+ [isfile(join(self.folder_with_preprocessed_data, i + ".npy")) for i in case_ids]) -+ # there is some slight chance that there may arise some error because dataloader are loading a file -+ # that is still being written by worker 0. We ignore this for now an address it only if it becomes -+ # relevant -+ # (this can occur because while worker 0 writes the file is technically present so the other workers -+ # will proceed and eventually try to read it) -+ else: -+ print( -+ "INFO: Not unpacking data! Training may be slow due to that. Pray you are not using 2d or you " -+ "will wait all winter for your model to finish!") -+ -+ self.tr_gen, self.val_gen = get_moreDA_augmentation( -+ self.dl_tr, self.dl_val, -+ self.data_aug_params[ -+ 'patch_size_for_spatialtransform'], -+ self.data_aug_params, -+ deep_supervision_scales=self.deep_supervision_scales, -+ pin_memory=self.pin_memory -+ ) -+ self.print_to_log_file("TRAINING KEYS:\n %s" % (str(self.dataset_tr.keys())), -+ also_print_to_console=False) -+ self.print_to_log_file("VALIDATION KEYS:\n %s" % (str(self.dataset_val.keys())), -+ also_print_to_console=False) -+ else: -+ pass -+ -+ self.initialize_network() -+ self.initialize_optimizer_and_scheduler() -+ -+ assert isinstance(self.network, (SegmentationNetwork, DDP)) -+ else: -+ self.print_to_log_file('self.was_initialized is True, not running self.initialize again') -+ self.was_initialized = True -+ -+ def initialize_network(self): -+ """ -+ - momentum 0.99 -+ - SGD instead of Adam -+ - self.lr_scheduler = None because we do poly_lr -+ - deep supervision = True -+ - i am sure I forgot something here -+ -+ Known issue: forgot to set neg_slope=0 in InitWeights_He; should not make a difference though -+ :return: -+ """ -+ if self.threeD: -+ conv_op = nn.Conv3d -+ dropout_op = nn.Dropout3d -+ norm_op = nn.InstanceNorm3d -+ -+ else: -+ conv_op = nn.Conv2d -+ dropout_op = nn.Dropout2d -+ norm_op = nn.InstanceNorm2d -+ norm_op_kwargs = {'eps': 1e-5, 'affine': True} -+ dropout_op_kwargs = {'p': 0, 'inplace': True} -+ net_nonlin = nn.LeakyReLU -+ net_nonlin_kwargs = {'negative_slope': 1e-2, 'inplace': True} -+ self.network = Generic_UNetPlusPlus(self.num_input_channels, self.base_num_features, self.num_classes, -+ len(self.net_num_pool_op_kernel_sizes), -+ self.conv_per_stage, 2, conv_op, norm_op, norm_op_kwargs, dropout_op, -+ dropout_op_kwargs, -+ net_nonlin, net_nonlin_kwargs, True, False, lambda x: x, -+ InitWeights_He(1e-2), -+ self.net_num_pool_op_kernel_sizes, self.net_conv_kernel_sizes, False, True, -+ True) -+ if torch.cuda.is_available(): -+ self.network.cuda() -+ self.network.inference_apply_nonlin = softmax_helper -+ self.network = DDP(self.network, device_ids=[self.local_rank], find_unused_parameters=True) -+ -+ # self.network = DDP(self.network, device_ids=[self.local_rank], find_unused_parameters=True) -+ -+ def initialize_optimizer_and_scheduler(self): -+ assert self.network is not None, "self.initialize_network must be called first" -+ print('weight_decay: ', self.weight_decay) -+ sys.stdout.flush() -+ self.optimizer = torch.optim.SGD(self.network.parameters(), self.initial_lr, weight_decay=self.weight_decay, -+ momentum=0.99, nesterov=True) -+ self.lr_scheduler = None -+ -+ def run_online_evaluation(self, output, target): -+ """ -+ due to deep supervision the return value and the reference are now lists of tensors. We only need the full -+ resolution output because this is what we are interested in in the end. The others are ignored -+ :param output: -+ :param target: -+ :return: -+ """ -+ target = target[0] -+ output = output[0] -+ return super().run_online_evaluation(output, target) -+ -+ def validate(self, do_mirroring: bool = True, use_sliding_window: bool = True, -+ step_size: float = 0.5, save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True, -+ validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False, -+ segmentation_export_kwargs: dict = None): -+ """ -+ We need to wrap this because we need to enforce self.network.do_ds = False for prediction -+ """ -+ if self.local_rank == 0: -+ if isinstance(self.network, DDP): -+ net = self.network.module -+ else: -+ net = self.network -+ ds = self.network.do_ds -+ net.do_ds = False -+ ret = super().validate(do_mirroring, use_sliding_window, step_size, save_softmax, use_gaussian, -+ overwrite, validation_folder_name, debug, all_in_gpu, segmentation_export_kwargs) -+ -+ net.do_ds = ds -+ return ret -+ -+ def predict_preprocessed_data_return_seg_and_softmax(self, data: np.ndarray, do_mirroring: bool = True, -+ mirror_axes: Tuple[int] = None, -+ use_sliding_window: bool = True, step_size: float = 0.5, -+ use_gaussian: bool = True, pad_border_mode: str = 'constant', -+ pad_kwargs: dict = None, all_in_gpu: bool = True, -+ verbose: bool = True, mixed_precision=True) -> Tuple[ -+ np.ndarray, np.ndarray]: -+ """ -+ We need to wrap this because we need to enforce self.network.do_ds = False for prediction -+ """ -+ ds = self.network.do_ds -+ self.network.do_ds = False -+ ret = super().predict_preprocessed_data_return_seg_and_softmax(data, do_mirroring, mirror_axes, -+ use_sliding_window, step_size, use_gaussian, -+ pad_border_mode, pad_kwargs, all_in_gpu, verbose, -+ mixed_precision=mixed_precision) -+ self.network.do_ds = ds -+ return ret -+ -+ def run_iteration(self, data_generator, do_backprop=True, run_online_evaluation=False): -+ """ -+ gradient clipping improves training stability -+ -+ :param data_generator: -+ :param do_backprop: -+ :param run_online_evaluation: -+ :return: -+ """ -+ data_dict = next(data_generator) -+ data = data_dict['data'] -+ target = data_dict['target'] -+ -+ data = maybe_to_torch(data) -+ target = maybe_to_torch(target) -+ -+ if torch.cuda.is_available(): -+ data = to_cuda(data, gpu_id=None) -+ target = to_cuda(target, gpu_id=None) -+ -+ self.optimizer.zero_grad() -+ -+ if self.fp16: -+ with autocast(): -+ output = self.network(data) -+ del data -+ l = self.loss(output, target) -+ -+ if do_backprop: -+ self.amp_grad_scaler.scale(l).backward() -+ self.amp_grad_scaler.unscale_(self.optimizer) -+ torch.nn.utils.clip_grad_norm_(self.network.parameters(), 12) -+ self.amp_grad_scaler.step(self.optimizer) -+ self.amp_grad_scaler.update() -+ else: -+ output = self.network(data) -+ del data -+ l = self.loss(output, target) -+ -+ if do_backprop: -+ l.backward() -+ torch.nn.utils.clip_grad_norm_(self.network.parameters(), 12) -+ self.optimizer.step() -+ -+ if run_online_evaluation: -+ self.run_online_evaluation(output, target) -+ -+ del target -+ -+ return l.detach().cpu().numpy() -+ -+ def do_split(self): -+ """ -+ we now allow more than 5 splits. IMPORTANT: and fold > 4 will not be a real split but just another random -+ 80:20 split of the data. You cannot run X-fold cross-validation with this code. It will always be a 5-fold CV. -+ Folds > 4 will be independent from each other -+ :return: -+ """ -+ if self.fold == "all": -+ # if fold==all then we use all images for training and validation -+ tr_keys = val_keys = list(self.dataset.keys()) -+ else: -+ splits_file = join(self.dataset_directory, "splits_final.pkl") -+ -+ # if the split file does not exist we need to create it -+ if not isfile(splits_file): -+ self.print_to_log_file("Creating new split...") -+ splits = [] -+ all_keys_sorted = np.sort(list(self.dataset.keys())) -+ kfold = KFold(n_splits=5, shuffle=True, random_state=12345) -+ for i, (train_idx, test_idx) in enumerate(kfold.split(all_keys_sorted)): -+ train_keys = np.array(all_keys_sorted)[train_idx] -+ test_keys = np.array(all_keys_sorted)[test_idx] -+ splits.append(OrderedDict()) -+ splits[-1]['train'] = train_keys -+ splits[-1]['val'] = test_keys -+ save_pickle(splits, splits_file) -+ -+ splits = load_pickle(splits_file) -+ -+ if self.fold < len(splits): -+ tr_keys = splits[self.fold]['train'] -+ val_keys = splits[self.fold]['val'] -+ else: -+ self.print_to_log_file("INFO: Requested fold %d but split file only has %d folds. I am now creating a " -+ "random 80:20 split!" % (self.fold, len(splits))) -+ # if we request a fold that is not in the split file, create a random 80:20 split -+ rnd = np.random.RandomState(seed=12345 + self.fold) -+ keys = np.sort(list(self.dataset.keys())) -+ idx_tr = rnd.choice(len(keys), int(len(keys) * 0.8), replace=False) -+ idx_val = [i for i in range(len(keys)) if i not in idx_tr] -+ tr_keys = [keys[i] for i in idx_tr] -+ val_keys = [keys[i] for i in idx_val] -+ -+ tr_keys.sort() -+ val_keys.sort() -+ self.dataset_tr = OrderedDict() -+ for i in tr_keys: -+ self.dataset_tr[i] = self.dataset[i] -+ self.dataset_val = OrderedDict() -+ for i in val_keys: -+ self.dataset_val[i] = self.dataset[i] -+ -+ def setup_DA_params(self): -+ """ -+ - we increase roation angle from [-15, 15] to [-30, 30] -+ - scale range is now (0.7, 1.4), was (0.85, 1.25) -+ - we don't do elastic deformation anymore -+ -+ :return: -+ """ -+ -+ self.deep_supervision_scales = [[1, 1, 1]] + list(list(i) for i in 1 / np.cumprod( -+ np.vstack(self.net_num_pool_op_kernel_sizes), axis=0))[:-1] -+ -+ if self.threeD: -+ self.data_aug_params = default_3D_augmentation_params -+ self.data_aug_params['rotation_x'] = (-30. / 360 * 2. * np.pi, 30. / 360 * 2. * np.pi) -+ self.data_aug_params['rotation_y'] = (-30. / 360 * 2. * np.pi, 30. / 360 * 2. * np.pi) -+ self.data_aug_params['rotation_z'] = (-30. / 360 * 2. * np.pi, 30. / 360 * 2. * np.pi) -+ if self.do_dummy_2D_aug: -+ self.data_aug_params["dummy_2D"] = True -+ self.print_to_log_file("Using dummy2d data augmentation") -+ self.data_aug_params["elastic_deform_alpha"] = \ -+ default_2D_augmentation_params["elastic_deform_alpha"] -+ self.data_aug_params["elastic_deform_sigma"] = \ -+ default_2D_augmentation_params["elastic_deform_sigma"] -+ self.data_aug_params["rotation_x"] = default_2D_augmentation_params["rotation_x"] -+ else: -+ self.do_dummy_2D_aug = False -+ if max(self.patch_size) / min(self.patch_size) > 1.5: -+ default_2D_augmentation_params['rotation_x'] = (-15. / 360 * 2. * np.pi, 15. / 360 * 2. * np.pi) -+ self.data_aug_params = default_2D_augmentation_params -+ self.data_aug_params["mask_was_used_for_normalization"] = self.use_mask_for_norm -+ -+ if self.do_dummy_2D_aug: -+ self.basic_generator_patch_size = get_patch_size(self.patch_size[1:], -+ self.data_aug_params['rotation_x'], -+ self.data_aug_params['rotation_y'], -+ self.data_aug_params['rotation_z'], -+ self.data_aug_params['scale_range']) -+ self.basic_generator_patch_size = np.array([self.patch_size[0]] + list(self.basic_generator_patch_size)) -+ patch_size_for_spatialtransform = self.patch_size[1:] -+ else: -+ self.basic_generator_patch_size = get_patch_size(self.patch_size, self.data_aug_params['rotation_x'], -+ self.data_aug_params['rotation_y'], -+ self.data_aug_params['rotation_z'], -+ self.data_aug_params['scale_range']) -+ patch_size_for_spatialtransform = self.patch_size -+ -+ self.data_aug_params["scale_range"] = (0.7, 1.4) -+ self.data_aug_params["do_elastic"] = False -+ self.data_aug_params['selected_seg_channels'] = [0] -+ self.data_aug_params['patch_size_for_spatialtransform'] = patch_size_for_spatialtransform -+ -+ self.data_aug_params["num_cached_per_thread"] = 2 -+ -+ def maybe_update_lr(self, epoch=None): -+ """ -+ if epoch is not None we overwrite epoch. Else we use epoch = self.epoch + 1 -+ -+ (maybe_update_lr is called in on_epoch_end which is called before epoch is incremented. -+ herefore we need to do +1 here) -+ -+ :param epoch: -+ :return: -+ """ -+ if epoch is None: -+ ep = self.epoch + 1 -+ else: -+ ep = epoch -+ self.optimizer.param_groups[0]['lr'] = poly_lr(ep, self.max_num_epochs, self.initial_lr, 0.9) -+ self.print_to_log_file("lr:", np.round(self.optimizer.param_groups[0]['lr'], decimals=6)) -+ -+ def on_epoch_end(self): -+ """ -+ overwrite patient-based early stopping. Always run to 1000 epochs -+ :return: -+ """ -+ super().on_epoch_end() -+ continue_training = self.epoch < self.max_num_epochs -+ -+ # it can rarely happen that the momentum of nnUNetTrainerV2_plus is too high for some dataset. If at epoch 100 the -+ # estimated validation Dice is still 0 then we reduce the momentum from 0.99 to 0.95 -+ if self.epoch == 100: -+ if self.all_val_eval_metrics[-1] == 0: -+ self.optimizer.param_groups[0]["momentum"] = 0.95 -+ self.network.apply(InitWeights_He(1e-2)) -+ self.print_to_log_file("At epoch 100, the mean foreground Dice was 0. This can be caused by a too " -+ "high momentum. High momentum (0.99) is good for datasets where it works, but " -+ "sometimes causes issues such as this one. Momentum has now been reduced to " -+ "0.95 and network weights have been reinitialized") -+ return continue_training -+ -+ def save_checkpoint(self, fname, save_optimizer=True): -+ if self.local_rank == 0: -+ super().save_checkpoint(fname, save_optimizer) -+ -+ def plot_progress(self): -+ if self.local_rank == 0: -+ super().plot_progress() -+ -+ def print_to_log_file(self, *args, also_print_to_console=True): -+ if self.local_rank == 0: -+ super().print_to_log_file(*args, also_print_to_console=also_print_to_console) -+ -+ def run_training(self): -+ """ -+ if we run with -c then we need to set the correct lr for the first epoch, otherwise it will run the first -+ continued epoch with self.initial_lr -+ -+ we also need to make sure deep supervision in the network is enabled for training, thus the wrapper -+ :return: -+ """ -+ self.maybe_update_lr(self.epoch) # if we dont overwrite epoch then self.epoch+1 is used which is not what we -+ # want at the start of the training -+ if isinstance(self.network, DDP): -+ net = self.network.module -+ else: -+ net = self.network -+ ds = net.do_ds -+ net.do_ds = True -+ ret = super().run_training() -+ net.do_ds = ds -+ return ret ++ def print_to_log_file(self, *args, also_print_to_console=True): ++ if self.local_rank == 0: ++ super().print_to_log_file(*args, also_print_to_console=also_print_to_console) ++ ++ def run_training(self): ++ """ ++ if we run with -c then we need to set the correct lr for the first epoch, otherwise it will run the first ++ continued epoch with self.initial_lr ++ ++ we also need to make sure deep supervision in the network is enabled for training, thus the wrapper ++ :return: ++ """ ++ self.maybe_update_lr(self.epoch) # if we dont overwrite epoch then self.epoch+1 is used which is not what we ++ # want at the start of the training ++ if isinstance(self.network, DDP): ++ net = self.network.module ++ else: ++ net = self.network ++ ds = net.do_ds ++ net.do_ds = True ++ ret = super().run_training() ++ net.do_ds = ds ++ return ret diff --git a/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2_hypDDP.py b/pytorch/nnunet/training/network_training/nnUNetPlusPlusTrainerV2_hypDDP.py new file mode 100644 index 0000000..aab27fe @@ -4219,11 +4219,11 @@ index 0000000..0abb8d5 --- /dev/null +++ b/pytorch/run.sh @@ -0,0 +1,5 @@ -+python nnunet/run/run_training.py 3d_fullres nnUNetPlusPlusTrainerV2_DDP Task003_Liver 0 -+ -+ -+python -m torch.distributed.launch --nproc_per_node 2 nnunet/run/run_training_DDP.py 3d_fullres nnUNetPlusPlusTrainerV2_DDP Task003_Liver 0 -+ ++python nnunet/run/run_training.py 3d_fullres nnUNetPlusPlusTrainerV2_DDP Task003_Liver 0 ++ ++ ++python -m torch.distributed.launch --nproc_per_node 2 nnunet/run/run_training_DDP.py 3d_fullres nnUNetPlusPlusTrainerV2_DDP Task003_Liver 0 ++ diff --git a/pytorch/setup.py b/pytorch/setup.py index 590a453..554f4e2 100644 --- a/pytorch/setup.py diff --git a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/LICENSE b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/LICENSE +++ b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/README.md b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/README.md index 79d756e61d7fe97728e8b47a238aae3b15ca4528..6991aba00b4d13e6009e5f7f4ec17dd14117ff86 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/README.md +++ b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/README.md @@ -1,83 +1,83 @@ -# DeeplabV3 模型PyTorch离线推理指导 -## 推理 -### 环境配置 -```shell -pip install -r requirements.txt -pip install mmcv-full==1.3.15 -git clone https://github.com/open-mmlab/mmsegmentation.git -cd mmsegmentation -git checkout fa1554f1aaea9a2c58249b06e1ea48420091464d -pip install -e . -cd .. -``` - - -### 转ONNX -[下载权重](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3) - -* README.md文件中配置第一行最后一列model - -#### 转onnx - -```shell -python mmsegmentation/tools/pytorch2onnx.py \ -mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py \ ---checkpoint ./deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth \ ---output-file deeplabv3.onnx --shape 1024 2048 -``` - -#### 使用onnx-simplifier简化onnx - -```python -python -m onnxsim deeplabv3.onnx deeplabv3_sim_bs1.onnx --input-shape="1,3,1024,2048" --dynamic-input-shape -``` - -### 转OM - -```shell -source env_npu.sh -atc --framework=5 --model=deeplabv3_sim_bs1.onnx --output=deeplabv3_bs1 --input_format=NCHW \ ---input_shape="input:1,3,1024,2048" --log=debug --soc_version=Ascend310 --auto_tune_mode="RL,GA" -``` - - -### 数据预处理 -#### 前处理处理脚本 ./deeplabv3_torch_preprocess.py - -```shell -python ./deeplabv3_torch_preprocess.py /opt/npu/cityscapes/leftImg8bit/val ./prep_dataset -``` -读取./data/citiscapes/gtFine/val下的500张用于验证的图片,处理后保存为bin格式 - - -#### 获取数据集信息文件 - -```shell -python ./gen_dataset_info.py bin ./prep_dataset ./deeplabv3_prep_bin.info 1024 2048 -``` - -### 离线推理 - -将benchmark.x86_64放到目录 - -```shell -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=deeplabv3_bs1.om \ --input_text_path=./deeplabv3_prep_bin.info \ --input_width=1024 \ --input_height=2048 \ --output_binary=True \ --useDvpp=False -``` - -### 数据后处理 - -```shell -python ./deeplabv3_torch_postprocess.py --output_path=./result/dumpOutput_device0 --gt_path=/opt/npu/cityscapes/gtFine/val -``` - - -### 评测结果 - -| 模型 | 官网精度 | 310精度 | T4性能 | 310性能 | -| ---------- | --------------------- | -------------| --------- | --------- | +# DeeplabV3 模型PyTorch离线推理指导 +## 推理 +### 环境配置 +```shell +pip install -r requirements.txt +pip install mmcv-full==1.3.15 +git clone https://github.com/open-mmlab/mmsegmentation.git +cd mmsegmentation +git checkout fa1554f1aaea9a2c58249b06e1ea48420091464d +pip install -e . +cd .. +``` + + +### 转ONNX +[下载权重](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3) + +* README.md文件中配置第一行最后一列model + +#### 转onnx + +```shell +python mmsegmentation/tools/pytorch2onnx.py \ +mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py \ +--checkpoint ./deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth \ +--output-file deeplabv3.onnx --shape 1024 2048 +``` + +#### 使用onnx-simplifier简化onnx + +```python +python -m onnxsim deeplabv3.onnx deeplabv3_sim_bs1.onnx --input-shape="1,3,1024,2048" --dynamic-input-shape +``` + +### 转OM + +```shell +source env_npu.sh +atc --framework=5 --model=deeplabv3_sim_bs1.onnx --output=deeplabv3_bs1 --input_format=NCHW \ +--input_shape="input:1,3,1024,2048" --log=debug --soc_version=Ascend310 --auto_tune_mode="RL,GA" +``` + + +### 数据预处理 +#### 前处理处理脚本 ./deeplabv3_torch_preprocess.py + +```shell +python ./deeplabv3_torch_preprocess.py /opt/npu/cityscapes/leftImg8bit/val ./prep_dataset +``` +读取./data/citiscapes/gtFine/val下的500张用于验证的图片,处理后保存为bin格式 + + +#### 获取数据集信息文件 + +```shell +python ./gen_dataset_info.py bin ./prep_dataset ./deeplabv3_prep_bin.info 1024 2048 +``` + +### 离线推理 + +将benchmark.x86_64放到目录 + +```shell +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=deeplabv3_bs1.om \ +-input_text_path=./deeplabv3_prep_bin.info \ +-input_width=1024 \ +-input_height=2048 \ +-output_binary=True \ +-useDvpp=False +``` + +### 数据后处理 + +```shell +python ./deeplabv3_torch_postprocess.py --output_path=./result/dumpOutput_device0 --gt_path=/opt/npu/cityscapes/gtFine/val +``` + + +### 评测结果 + +| 模型 | 官网精度 | 310精度 | T4性能 | 310性能 | +| ---------- | --------------------- | -------------| --------- | --------- | | deeplabv3_bs1 | mIoU 79.09 | mIoU 79.06 | 5.7787FPS | 3.1675FPS | \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/deeplabv3_torch_postprocess.py b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/deeplabv3_torch_postprocess.py index f5f2c83522cd2762e4c0a75037e29dbf063161b7..1b818b8ccaadcc238e21a67d0f8ac8003f39ef80 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/deeplabv3_torch_postprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/deeplabv3_torch_postprocess.py @@ -1,230 +1,230 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import torch -import argparse -import os -from PIL import Image - - -class GTFineFile(object): - """ - directory: path to gtFine - suffix: suffix of the gtFine - :return path List of gtFine files - """ - def __init__(self, directory, suffix='_gtFine_labelTrainIds.png'): - gtFine_list = [] - for root, sub_dirs, files in os.walk(directory): - for special_file in files: - if special_file.endswith(suffix): - gtFine_list.append(os.path.join(root, special_file)) - self.gtFine_list = gtFine_list - - def get_file(self, filename): - """ return file path list """ - for file in self.gtFine_list: - if file.endswith(filename): - return file - - -def intersect_and_union(pred_label, - label, - num_classes, - ignore_index, - label_map=dict(), - reduce_zero_label=False): - """Calculate intersection and Union. - - Args: - pred_label (ndarray | str): Prediction segmentation map - or predict result filename. - label (ndarray): Ground truth segmentation map. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - label_map (dict): Mapping old labels to new labels. The parameter will - work only when label is str. Default: dict(). - reduce_zero_label (bool): Wether ignore zero label. The parameter will - work only when label is str. Default: False. - - Returns: - torch.Tensor: The intersection of prediction and ground truth - histogram on all classes. - torch.Tensor: The union of prediction and ground truth histogram on - all classes. - torch.Tensor: The prediction histogram on all classes. - torch.Tensor: The ground truth histogram on all classes. - """ - - if isinstance(pred_label, str): - pred_label = torch.from_numpy(np.load(pred_label)) - else: - pred_label = torch.from_numpy((pred_label)) - - label = torch.from_numpy(label) - - if label_map is not None: - for old_id, new_id in label_map.items(): - label[label == old_id] = new_id - if reduce_zero_label: - label[label == 0] = 255 - label = label - 1 - label[label == 254] = 255 - - mask = (label != ignore_index) - pred_label = pred_label[mask] - label = label[mask] - - intersect = pred_label[pred_label == label] - area_intersect = torch.histc( - intersect.float(), bins=(num_classes), min=0, max=num_classes - 1) - area_pred_label = torch.histc( - pred_label.float(), bins=(num_classes), min=0, max=num_classes - 1) - area_label = torch.histc( - label.float(), bins=(num_classes), min=0, max=num_classes - 1) - area_union = area_pred_label + area_label - area_intersect - return [area_intersect, area_union, area_pred_label, area_label] - - -class IntersectAndUnion(object): - """Calculate Total Intersection and Union. - - Args: - results (list[ndarray] | list[str]): List of prediction segmentation - maps or list of prediction result filenames. - gt_seg_maps (list[ndarray] | list[str]): list of ground truth - segmentation maps or list of label filenames. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - label_map (dict): Mapping old labels to new labels. Default: dict(). - reduce_zero_label (bool): Wether ignore zero label. Default: False. - - Returns: - iou - acc - """ - - def __init__(self, num_classes, ignore_index, label_map=dict(), reduce_zero_label=False): - self.num_classes = num_classes - self.ignore_index = ignore_index - self.label_map = label_map - self.reduce_zero_label = reduce_zero_label - self.total_area_intersect = torch.zeros((num_classes,), dtype=torch.float64) - self.total_area_union = torch.zeros((num_classes,), dtype=torch.float64) - self.total_area_pred_label = torch.zeros((num_classes,), dtype=torch.float64) - self.total_area_label = torch.zeros((num_classes,), dtype=torch.float64) - - def update(self, output, gt_seg_map): - """ update """ - [area_intersect, area_union, area_pred_label, area_label] = \ - intersect_and_union( - output, gt_seg_map, self.num_classes, self.ignore_index, - self.label_map, self.reduce_zero_label) - self.total_area_intersect += area_intersect.to(torch.float64) - self.total_area_union += area_union.to(torch.float64) - self.total_area_pred_label += area_pred_label.to(torch.float64) - self.total_area_label += area_label.to(torch.float64) - - def get(self): - """ get result """ - iou = self.total_area_intersect / self.total_area_union - acc = self.total_area_intersect / self.total_area_label - all_acc = self.total_area_intersect.sum() / self.total_area_label.sum() - mIoU = np.round(np.nanmean(iou) * 100, 2) - aAcc = np.round(np.nanmean(all_acc) * 100, 2) - return {'aAcc': aAcc, 'mIoU': mIoU} - - -def eval_metrics(output_path, - gt_path, - out_suffix='_leftImg8bit_1.bin', - gt_suffix='_gtFine_labelTrainIds.png', - result_path='./postprocess_result', - num_classes=19, - ignore_index=255, - label_map=None, - reduce_zero_label=False): - """Calculate evaluation metrics - Args: - results (list[ndarray] | list[str]): List of prediction segmentation - maps or list of prediction result filenames. - gt_seg_maps (list[ndarray] | list[str]): list of ground truth - segmentation maps or list of label filenames. - num_classes (int): Number of categories. - ignore_index (int): Index that will be ignored in evaluation. - metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'. - nan_to_num (int, optional): If specified, NaN values will be replaced - by the numbers defined by the user. Default: None. - label_map (dict): Mapping old labels to new labels. Default: dict(). - reduce_zero_label (bool): Wether ignore zero label. Default: False. - Returns: - float: Overall accuracy on all images. - ndarray: Per category accuracy, shape (num_classes, ). - ndarray: Per category evaluation metrics, shape (num_classes, ). - """ - - # init metric - metric = IntersectAndUnion(num_classes, ignore_index, label_map, reduce_zero_label) - # init gtFine files list - fileFinder = GTFineFile(gt_path) - - for root, sub_dirs, files in os.walk(output_path): - files = [file for file in files if file.endswith('bin')] - len = str(files.__len__()) - for i, output_name in enumerate(files): - if not output_name.endswith('bin'): - continue - print('DeeplabV3 metric [' + str(i + 1) + '/' + len + '] on process: ' + output_name) - seg_map_name = output_name.replace(out_suffix, gt_suffix) - seg_map_path = fileFinder.get_file(seg_map_name) - if seg_map_name is not None: - seg_map = Image.open(seg_map_path) - seg_map = np.array(seg_map, dtype=np.uint8) - - output_path = os.path.realpath(os.path.join(root, output_name)) - output = np.fromfile(output_path, dtype=np.uint64).reshape(1024, 2048) - output = output.astype(np.uint8) - metric.update(output, seg_map) - else: - print("[ERROR] " + seg_map_name + " not find, check the file or make sure --out_suffix") - - # get result - result = metric.get() - print(result) - with open(result_path + '.txt', 'w') as f: - f.write('aAcc: {}\n'.format(result['aAcc'])) - f.write('mIoU: {}\n'.format(result['mIoU'])) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser('mIoU calculate') - parser.add_argument('--output_path', default="./result", - help='path to om/onnx output file, default ./result') - parser.add_argument('--gt_path', default="/opt/npu/cityscapes/gtFine/val", - help='path to gtFine/val, default /opt/npu/cityscapes/gtFine/val') - parser.add_argument('--out_suffix', default="_leftImg8bit_1.bin", - help='suffix of the om/onnx output, default "_leftImg8bit_1.bin"') - parser.add_argument('--result_path', default="./postprocess_result", - help='path to save the script result, default ./postprocess_result.txt') - - args = parser.parse_args() - - output_path = os.path.realpath(args.output_path) - gt_path = os.path.realpath(args.gt_path) - out_suffix = args.out_suffix - result_path = os.path.realpath(args.result_path) - print(output_path) - print(gt_path) - eval_metrics(output_path, gt_path, out_suffix=out_suffix, result_path=result_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import torch +import argparse +import os +from PIL import Image + + +class GTFineFile(object): + """ + directory: path to gtFine + suffix: suffix of the gtFine + :return path List of gtFine files + """ + def __init__(self, directory, suffix='_gtFine_labelTrainIds.png'): + gtFine_list = [] + for root, sub_dirs, files in os.walk(directory): + for special_file in files: + if special_file.endswith(suffix): + gtFine_list.append(os.path.join(root, special_file)) + self.gtFine_list = gtFine_list + + def get_file(self, filename): + """ return file path list """ + for file in self.gtFine_list: + if file.endswith(filename): + return file + + +def intersect_and_union(pred_label, + label, + num_classes, + ignore_index, + label_map=dict(), + reduce_zero_label=False): + """Calculate intersection and Union. + + Args: + pred_label (ndarray | str): Prediction segmentation map + or predict result filename. + label (ndarray): Ground truth segmentation map. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + label_map (dict): Mapping old labels to new labels. The parameter will + work only when label is str. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. The parameter will + work only when label is str. Default: False. + + Returns: + torch.Tensor: The intersection of prediction and ground truth + histogram on all classes. + torch.Tensor: The union of prediction and ground truth histogram on + all classes. + torch.Tensor: The prediction histogram on all classes. + torch.Tensor: The ground truth histogram on all classes. + """ + + if isinstance(pred_label, str): + pred_label = torch.from_numpy(np.load(pred_label)) + else: + pred_label = torch.from_numpy((pred_label)) + + label = torch.from_numpy(label) + + if label_map is not None: + for old_id, new_id in label_map.items(): + label[label == old_id] = new_id + if reduce_zero_label: + label[label == 0] = 255 + label = label - 1 + label[label == 254] = 255 + + mask = (label != ignore_index) + pred_label = pred_label[mask] + label = label[mask] + + intersect = pred_label[pred_label == label] + area_intersect = torch.histc( + intersect.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_pred_label = torch.histc( + pred_label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_label = torch.histc( + label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_union = area_pred_label + area_label - area_intersect + return [area_intersect, area_union, area_pred_label, area_label] + + +class IntersectAndUnion(object): + """Calculate Total Intersection and Union. + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + + Returns: + iou + acc + """ + + def __init__(self, num_classes, ignore_index, label_map=dict(), reduce_zero_label=False): + self.num_classes = num_classes + self.ignore_index = ignore_index + self.label_map = label_map + self.reduce_zero_label = reduce_zero_label + self.total_area_intersect = torch.zeros((num_classes,), dtype=torch.float64) + self.total_area_union = torch.zeros((num_classes,), dtype=torch.float64) + self.total_area_pred_label = torch.zeros((num_classes,), dtype=torch.float64) + self.total_area_label = torch.zeros((num_classes,), dtype=torch.float64) + + def update(self, output, gt_seg_map): + """ update """ + [area_intersect, area_union, area_pred_label, area_label] = \ + intersect_and_union( + output, gt_seg_map, self.num_classes, self.ignore_index, + self.label_map, self.reduce_zero_label) + self.total_area_intersect += area_intersect.to(torch.float64) + self.total_area_union += area_union.to(torch.float64) + self.total_area_pred_label += area_pred_label.to(torch.float64) + self.total_area_label += area_label.to(torch.float64) + + def get(self): + """ get result """ + iou = self.total_area_intersect / self.total_area_union + acc = self.total_area_intersect / self.total_area_label + all_acc = self.total_area_intersect.sum() / self.total_area_label.sum() + mIoU = np.round(np.nanmean(iou) * 100, 2) + aAcc = np.round(np.nanmean(all_acc) * 100, 2) + return {'aAcc': aAcc, 'mIoU': mIoU} + + +def eval_metrics(output_path, + gt_path, + out_suffix='_leftImg8bit_1.bin', + gt_suffix='_gtFine_labelTrainIds.png', + result_path='./postprocess_result', + num_classes=19, + ignore_index=255, + label_map=None, + reduce_zero_label=False): + """Calculate evaluation metrics + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Wether ignore zero label. Default: False. + Returns: + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category evaluation metrics, shape (num_classes, ). + """ + + # init metric + metric = IntersectAndUnion(num_classes, ignore_index, label_map, reduce_zero_label) + # init gtFine files list + fileFinder = GTFineFile(gt_path) + + for root, sub_dirs, files in os.walk(output_path): + files = [file for file in files if file.endswith('bin')] + len = str(files.__len__()) + for i, output_name in enumerate(files): + if not output_name.endswith('bin'): + continue + print('DeeplabV3 metric [' + str(i + 1) + '/' + len + '] on process: ' + output_name) + seg_map_name = output_name.replace(out_suffix, gt_suffix) + seg_map_path = fileFinder.get_file(seg_map_name) + if seg_map_name is not None: + seg_map = Image.open(seg_map_path) + seg_map = np.array(seg_map, dtype=np.uint8) + + output_path = os.path.realpath(os.path.join(root, output_name)) + output = np.fromfile(output_path, dtype=np.uint64).reshape(1024, 2048) + output = output.astype(np.uint8) + metric.update(output, seg_map) + else: + print("[ERROR] " + seg_map_name + " not find, check the file or make sure --out_suffix") + + # get result + result = metric.get() + print(result) + with open(result_path + '.txt', 'w') as f: + f.write('aAcc: {}\n'.format(result['aAcc'])) + f.write('mIoU: {}\n'.format(result['mIoU'])) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser('mIoU calculate') + parser.add_argument('--output_path', default="./result", + help='path to om/onnx output file, default ./result') + parser.add_argument('--gt_path', default="/opt/npu/cityscapes/gtFine/val", + help='path to gtFine/val, default /opt/npu/cityscapes/gtFine/val') + parser.add_argument('--out_suffix', default="_leftImg8bit_1.bin", + help='suffix of the om/onnx output, default "_leftImg8bit_1.bin"') + parser.add_argument('--result_path', default="./postprocess_result", + help='path to save the script result, default ./postprocess_result.txt') + + args = parser.parse_args() + + output_path = os.path.realpath(args.output_path) + gt_path = os.path.realpath(args.gt_path) + out_suffix = args.out_suffix + result_path = os.path.realpath(args.result_path) + print(output_path) + print(gt_path) + eval_metrics(output_path, gt_path, out_suffix=out_suffix, result_path=result_path) diff --git a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/deeplabv3_torch_preprocess.py b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/deeplabv3_torch_preprocess.py index d722f4d3333ec67988d36667c355056747d895c2..d3a41c0d44f3c736f559fe6962d9eefa3e18c44e 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/deeplabv3_torch_preprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/deeplabv3_torch_preprocess.py @@ -1,88 +1,88 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -import cv2 -import numpy as np -from torchvision import transforms - - -class Normalize(object): - def __init__(self, mean, std, to_rgb=True): - self.mean = np.array(mean, dtype=np.float32) - self.std = np.array(std, dtype=np.float32) - self.to_rgb = to_rgb - - def __call__(self, img): - img = img.copy().astype(np.float32) - # cv2 inplace normalization does not accept uint8 - assert img.dtype != np.uint8 - mean = np.float64(self.mean.reshape(1, -1)) - stdinv = 1 / np.float64(self.std.reshape(1, -1)) - if self.to_rgb: - cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace - cv2.subtract(img, mean, img) # inplace - cv2.multiply(img, stdinv, img) # inplace - return img - - -def preprocess(src_path, save_path): - """ - resnet50 pytorch preprocess - """ - preprocess = transforms.Compose([ - Normalize(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), - transforms.ToTensor(), - ]) - - root = src_path - - # 扫描文件夹下所有文件 - def _scandir(dir_path, suffix, recursive): - for entry in os.scandir(dir_path): - if not entry.name.startswith('.') and entry.is_file(): - rel_path = os.path.relpath(entry.path, root) - if suffix is None or rel_path.endswith(suffix): - yield rel_path - elif recursive and os.path.isdir(entry.path): - # scan recursively if entry.path is a directory - yield from _scandir( - entry.path, suffix=suffix, recursive=recursive) - - in_files = _scandir(src_path, '_leftImg8bit.png', True) - if not os.path.exists(save_path): - os.makedirs(save_path) - - i = 0 - for file in in_files: - i = i + 1 - print(file, "====", i) - input_image = cv2.imread(src_path + '/' + file) - input_tensor = preprocess(input_image) - # print(file.split('/')[-1].split('.')[0]) - # print(input_tensor) - img = np.array(input_tensor).astype(np.float32) - # print(img.shape) - img.tofile(os.path.join(save_path, file.split('/')[-1].split('.')[0] + ".bin")) - - -if __name__ == '__main__': - if len(sys.argv) < 3: - raise Exception("usage: python3 xxx.py [src_path] [save_path]") - src_path = sys.argv[1] - save_path = sys.argv[2] - src_path = os.path.realpath(src_path) - save_path = os.path.realpath(save_path) - preprocess(src_path, save_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import cv2 +import numpy as np +from torchvision import transforms + + +class Normalize(object): + def __init__(self, mean, std, to_rgb=True): + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + self.to_rgb = to_rgb + + def __call__(self, img): + img = img.copy().astype(np.float32) + # cv2 inplace normalization does not accept uint8 + assert img.dtype != np.uint8 + mean = np.float64(self.mean.reshape(1, -1)) + stdinv = 1 / np.float64(self.std.reshape(1, -1)) + if self.to_rgb: + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace + cv2.subtract(img, mean, img) # inplace + cv2.multiply(img, stdinv, img) # inplace + return img + + +def preprocess(src_path, save_path): + """ + resnet50 pytorch preprocess + """ + preprocess = transforms.Compose([ + Normalize(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), + transforms.ToTensor(), + ]) + + root = src_path + + # 扫描文件夹下所有文件 + def _scandir(dir_path, suffix, recursive): + for entry in os.scandir(dir_path): + if not entry.name.startswith('.') and entry.is_file(): + rel_path = os.path.relpath(entry.path, root) + if suffix is None or rel_path.endswith(suffix): + yield rel_path + elif recursive and os.path.isdir(entry.path): + # scan recursively if entry.path is a directory + yield from _scandir( + entry.path, suffix=suffix, recursive=recursive) + + in_files = _scandir(src_path, '_leftImg8bit.png', True) + if not os.path.exists(save_path): + os.makedirs(save_path) + + i = 0 + for file in in_files: + i = i + 1 + print(file, "====", i) + input_image = cv2.imread(src_path + '/' + file) + input_tensor = preprocess(input_image) + # print(file.split('/')[-1].split('.')[0]) + # print(input_tensor) + img = np.array(input_tensor).astype(np.float32) + # print(img.shape) + img.tofile(os.path.join(save_path, file.split('/')[-1].split('.')[0] + ".bin")) + + +if __name__ == '__main__': + if len(sys.argv) < 3: + raise Exception("usage: python3 xxx.py [src_path] [save_path]") + src_path = sys.argv[1] + save_path = sys.argv[2] + src_path = os.path.realpath(src_path) + save_path = os.path.realpath(save_path) + preprocess(src_path, save_path) diff --git a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/gen_dataset_info.py index 11c25ce915cd63e436eabb74fe4ba1252e843391..4f9793db1ae248233fd9abff5f4f016698718e14 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/gen_dataset_info.py @@ -1,74 +1,74 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - """ - @description: get given bin information - @param file_path bin file path - @param info_name given information name - @param width image width - @param height image height - @return - """ - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - """ - @description: get given jpg information - @param file_path jpg file path - @param info_name given jpg information name - @return - """ - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + """ + @description: get given bin information + @param file_path bin file path + @param info_name given information name + @param width image width + @param height image height + @return + """ + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + """ + @description: get given jpg information + @param file_path jpg file path + @param info_name given jpg information name + @return + """ + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) diff --git a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/modelzoo_level.txt index 38700fca05402f52c3ae1c4be0889eb60e1f80f1..2e42553460a4f3687654b6ad3f91ab0bcc3aadac 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/requirements.txt b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/requirements.txt index 4ebce316a29a4ea7a85237e397d801114144db98..099a8067cdd4a3769df8e15780cc36575871edea 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/requirements.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/requirements.txt @@ -1,7 +1,7 @@ -torch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.7.0 -onnxruntime == 1.9.0 -numpy == 1.20.3 -opencv-python == 4.5.2.54 +torch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.7.0 +onnxruntime == 1.9.0 +numpy == 1.20.3 +opencv-python == 4.5.2.54 onnx-simplifier == 0.3.6 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/test/parse.py b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/test/parse.py index a119109c5d93366ac01a075d24831dcee540674f..6d5a1293288dce8bfc70f79ecf6551acafed6b81 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/test/parse.py +++ b/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3/test/parse.py @@ -1,33 +1,33 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - #tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - #print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - print(content) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + #tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + #print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + print(content) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/ENet/ENet_postprocess.py b/ACL_PyTorch/contrib/cv/segmentation/ENet/ENet_postprocess.py index dbc4cc77634605337d929ee904b76f61d174e729..25f698087940769b67a17cb8feefa7829e6db5a4 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/ENet/ENet_postprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/ENet/ENet_postprocess.py @@ -1,93 +1,93 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- -import sys -import os - -cur_path = os.path.abspath(os.path.dirname(__file__)) -root_path = os.path.split(cur_path)[0] -sys.path.append(root_path) - -import numpy as np -import argparse - -import torch -import torch.utils.data as data - -from torchvision import transforms -from cityscapes import CitySegmentation -from score import SegmentationMetric -from distributed import * - -def get_res(res_dir): - - output = [] - with open(res_dir) as res_f: - for line in res_f: - num_list = line.split() - for num in num_list: - output.append(float(num)) - output = torch.from_numpy(np.array(output).reshape((1, 19, 480, 480))) - ''' - with open(res_dir, 'rb') as res_f: - output = np.frombuffer(res_f.read(), np.float16) - output = torch.from_numpy(output.reshape((1, 19, 480, 480))) - ''' - return output - - -def postprocess(args): - input_transform = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize([.485, .456, .406], [.229, .224, .225]), - ]) - # dataset and dataloader - data_kwargs = {'transform': input_transform, 'base_size': 520, 'crop_size': 480} - val_dataset = CitySegmentation(root = args.src_path, split='val', mode='val', **data_kwargs) - - val_sampler = make_data_sampler(val_dataset, False, False) - val_batch_sampler = make_batch_data_sampler(val_sampler, args.batch_size) - - val_loader = data.DataLoader(dataset=val_dataset, - batch_sampler=val_batch_sampler, - num_workers=args.workers, - pin_memory=True) - - metric = SegmentationMetric(19) - for i, (image, target, filename) in enumerate(val_loader): - res_name = os.path.splitext(os.path.basename(filename[0]))[0] - res_dir = os.path.join(args.result_dir, res_name + '_1.txt') - #res_dir = os.path.join(args.result_dir, res_name + '_1.bin') - res = get_res(res_dir) - metric.update(res, target) - pixAcc, mIoU = metric.get() - print("Sample: {:d}, validation pixAcc: {:.3f}, mIoU: {:.3f}".format( - i + 1, pixAcc * 100, mIoU * 100)) - - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--src-path', type=str, required=True) - parser.add_argument('--result-dir', type=str, default='result/dumpOutput_device0') - parser.add_argument('--batch-size', type=int, default=1, metavar='N', - help='input batch size for training (default: 8)') - parser.add_argument('--workers', '-j', type=int, default=4, - metavar='N', help='dataloader threads') - args = parser.parse_args() - - postprocess(args) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +import sys +import os + +cur_path = os.path.abspath(os.path.dirname(__file__)) +root_path = os.path.split(cur_path)[0] +sys.path.append(root_path) + +import numpy as np +import argparse + +import torch +import torch.utils.data as data + +from torchvision import transforms +from cityscapes import CitySegmentation +from score import SegmentationMetric +from distributed import * + +def get_res(res_dir): + + output = [] + with open(res_dir) as res_f: + for line in res_f: + num_list = line.split() + for num in num_list: + output.append(float(num)) + output = torch.from_numpy(np.array(output).reshape((1, 19, 480, 480))) + ''' + with open(res_dir, 'rb') as res_f: + output = np.frombuffer(res_f.read(), np.float16) + output = torch.from_numpy(output.reshape((1, 19, 480, 480))) + ''' + return output + + +def postprocess(args): + input_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize([.485, .456, .406], [.229, .224, .225]), + ]) + # dataset and dataloader + data_kwargs = {'transform': input_transform, 'base_size': 520, 'crop_size': 480} + val_dataset = CitySegmentation(root = args.src_path, split='val', mode='val', **data_kwargs) + + val_sampler = make_data_sampler(val_dataset, False, False) + val_batch_sampler = make_batch_data_sampler(val_sampler, args.batch_size) + + val_loader = data.DataLoader(dataset=val_dataset, + batch_sampler=val_batch_sampler, + num_workers=args.workers, + pin_memory=True) + + metric = SegmentationMetric(19) + for i, (image, target, filename) in enumerate(val_loader): + res_name = os.path.splitext(os.path.basename(filename[0]))[0] + res_dir = os.path.join(args.result_dir, res_name + '_1.txt') + #res_dir = os.path.join(args.result_dir, res_name + '_1.bin') + res = get_res(res_dir) + metric.update(res, target) + pixAcc, mIoU = metric.get() + print("Sample: {:d}, validation pixAcc: {:.3f}, mIoU: {:.3f}".format( + i + 1, pixAcc * 100, mIoU * 100)) + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--src-path', type=str, required=True) + parser.add_argument('--result-dir', type=str, default='result/dumpOutput_device0') + parser.add_argument('--batch-size', type=int, default=1, metavar='N', + help='input batch size for training (default: 8)') + parser.add_argument('--workers', '-j', type=int, default=4, + metavar='N', help='dataloader threads') + args = parser.parse_args() + + postprocess(args) + # python ENet_postprocess.py --src-path=/root/.torch/datasets/citys --result-dir result/dumpOutput_device0 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/ENet/ENet_preprocess.py b/ACL_PyTorch/contrib/cv/segmentation/ENet/ENet_preprocess.py index 413c310f7759da604725adb47eb3f2103568b387..02ce9a894059d7bc62d923eaae405fc574d4ac82 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/ENet/ENet_preprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/ENet/ENet_preprocess.py @@ -1,108 +1,108 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -*- coding: utf-8 -*- - -import argparse -import os -import sys -import numpy as np - -cur_path = os.path.abspath(os.path.dirname(__file__)) -root_path = os.path.split(cur_path)[0] -sys.path.append(root_path) - -from PIL import Image -from torchvision import transforms - - -def _val_sync_transform(img, mask): - outsize = 480 - short_size = outsize - w, h = img.size - if w > h: - oh = short_size - ow = int(1.0 * w * oh / h) - else: - ow = short_size - oh = int(1.0 * h * ow / w) - img = img.resize((ow, oh), Image.BILINEAR) - mask = mask.resize((ow, oh), Image.NEAREST) - # center crop - w, h = img.size - x1 = int(round((w - outsize) / 2.)) - y1 = int(round((h - outsize) / 2.)) - img = img.crop((x1, y1, x1 + outsize, y1 + outsize)) - mask = mask.crop((x1, y1, x1 + outsize, y1 + outsize)) - # final transform - img, mask = np.array(img), np.array(mask).astype('int32') - return img, mask - -def _get_city_pairs(folder, split='val'): - def get_path_pairs(img_folder, mask_folder): - img_paths = [] - mask_paths = [] - for root, _, files in os.walk(img_folder): - for filename in files: - if filename.endswith('.png'): - imgpath = os.path.join(root, filename) - foldername = os.path.basename(os.path.dirname(imgpath)) - maskname = filename.replace('leftImg8bit', 'gtFine_labelIds') - maskpath = os.path.join(mask_folder, foldername, maskname) - if os.path.isfile(imgpath) and os.path.isfile(maskpath): - img_paths.append(imgpath) - mask_paths.append(maskpath) - else: - print('cannot find the mask or image:', imgpath, maskpath) - print('Found {} images in the folder {}'.format(len(img_paths), img_folder)) - return img_paths, mask_paths - - if split in ('train', 'val'): - img_folder = os.path.join(folder, 'leftImg8bit/' + split) - mask_folder = os.path.join(folder, 'gtFine/' + split) - img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) - return img_paths, mask_paths - -def preprocess(args): - input_transform = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize([.485, .456, .406], [.229, .224, .225]), - ]) - - images, mask_paths = _get_city_pairs(args.src_path, 'val') - - for i, image in enumerate(images): - img = Image.open(image).convert('RGB') - mask = Image.open(mask_paths[i]) - img, mask = _val_sync_transform(img, mask) - img = input_transform(img) - #img = np.asarray(img).astype(np.float16) - - img = np.asarray(img) - - filename = os.path.basename(image) - - img.tofile(os.path.join(args.save_path, os.path.splitext(filename)[0] + ".bin")) - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--src-path', type=str, required=True) - parser.add_argument('--save_path', type=str, default='prep_dataset') - - args = parser.parse_args() - - if not os.path.isdir(args.save_path): - os.makedirs(os.path.realpath(args.save_path)) - preprocess(args) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -*- coding: utf-8 -*- + +import argparse +import os +import sys +import numpy as np + +cur_path = os.path.abspath(os.path.dirname(__file__)) +root_path = os.path.split(cur_path)[0] +sys.path.append(root_path) + +from PIL import Image +from torchvision import transforms + + +def _val_sync_transform(img, mask): + outsize = 480 + short_size = outsize + w, h = img.size + if w > h: + oh = short_size + ow = int(1.0 * w * oh / h) + else: + ow = short_size + oh = int(1.0 * h * ow / w) + img = img.resize((ow, oh), Image.BILINEAR) + mask = mask.resize((ow, oh), Image.NEAREST) + # center crop + w, h = img.size + x1 = int(round((w - outsize) / 2.)) + y1 = int(round((h - outsize) / 2.)) + img = img.crop((x1, y1, x1 + outsize, y1 + outsize)) + mask = mask.crop((x1, y1, x1 + outsize, y1 + outsize)) + # final transform + img, mask = np.array(img), np.array(mask).astype('int32') + return img, mask + +def _get_city_pairs(folder, split='val'): + def get_path_pairs(img_folder, mask_folder): + img_paths = [] + mask_paths = [] + for root, _, files in os.walk(img_folder): + for filename in files: + if filename.endswith('.png'): + imgpath = os.path.join(root, filename) + foldername = os.path.basename(os.path.dirname(imgpath)) + maskname = filename.replace('leftImg8bit', 'gtFine_labelIds') + maskpath = os.path.join(mask_folder, foldername, maskname) + if os.path.isfile(imgpath) and os.path.isfile(maskpath): + img_paths.append(imgpath) + mask_paths.append(maskpath) + else: + print('cannot find the mask or image:', imgpath, maskpath) + print('Found {} images in the folder {}'.format(len(img_paths), img_folder)) + return img_paths, mask_paths + + if split in ('train', 'val'): + img_folder = os.path.join(folder, 'leftImg8bit/' + split) + mask_folder = os.path.join(folder, 'gtFine/' + split) + img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) + return img_paths, mask_paths + +def preprocess(args): + input_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize([.485, .456, .406], [.229, .224, .225]), + ]) + + images, mask_paths = _get_city_pairs(args.src_path, 'val') + + for i, image in enumerate(images): + img = Image.open(image).convert('RGB') + mask = Image.open(mask_paths[i]) + img, mask = _val_sync_transform(img, mask) + img = input_transform(img) + #img = np.asarray(img).astype(np.float16) + + img = np.asarray(img) + + filename = os.path.basename(image) + + img.tofile(os.path.join(args.save_path, os.path.splitext(filename)[0] + ".bin")) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--src-path', type=str, required=True) + parser.add_argument('--save_path', type=str, default='prep_dataset') + + args = parser.parse_args() + + if not os.path.isdir(args.save_path): + os.makedirs(os.path.realpath(args.save_path)) + preprocess(args) + # python ENet_preprocess.py --src-path=/root/.torch/datasets/citys --save_path prep_dataset \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/ENet/ENet_pth2onnx.py b/ACL_PyTorch/contrib/cv/segmentation/ENet/ENet_pth2onnx.py index 9892c9c6920ff737d0ccc13b6c61acf09143091d..275ef0d62fd0e213e862bb89ddd540fa2fb693d4 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/ENet/ENet_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/segmentation/ENet/ENet_pth2onnx.py @@ -1,83 +1,83 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -*- coding: utf-8 -*- - -import os -import sys -import argparse - -cur_path = os.path.abspath(os.path.dirname(__file__)) -root_path = os.path.split(cur_path)[0] -sys.path.append(root_path) - -import torch -import torch.nn as nn -import torch.onnx - -from collections import OrderedDict -from enet import get_enet - - -def proc_nodes_module(checkpoint, AttrName): - new_state_dict = OrderedDict() - for k, v in checkpoint[AttrName].items(): - if (k[0:7] == "module."): - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - -''' -def convert(pth_file_path, onnx_file_path, class_num): - checkpoint = torch.load(pth_file_path, map_location='cpu') - checkpoint['state_dict'] = proc_nodes_module(checkpoint, 'state_dict') - model = densenet121(pretrained=False, num_classes=class_num) - model.load_state_dict(checkpoint['state_dict']) - model.eval() - print(model) - - input_names = ["actual_input_1"] - output_names = ["output1"] - dummy_input = torch.randn(16, 3, 224, 224) - torch.onnx.export(model, dummy_input, onnx_file_path, input_names=input_names, output_names=output_names, - opset_version=11) -''' - -def pth2onnx(input_file, output_file, batch_size=1): - model = get_enet(model='enet', dataset='citys', aux=False, norm_layer=nn.BatchNorm2d) - checkpoint = {} - checkpoint['state_dict'] = torch.load(input_file, map_location='cpu') - checkpoint['state_dict'] = proc_nodes_module(checkpoint, 'state_dict') - model.load_state_dict(checkpoint['state_dict']) - - model.eval() - print(model) - - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(batch_size, 3, 480, 480) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, opset_version=11, verbose=True) - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--input-file', type=str, default='~/.torch/models/enet_citys.pth') - parser.add_argument('--output-file', type=str, default='model/enet_citys_910_bs1.onnx') - parser.add_argument('--batch-size', type=int, default=1) - args = parser.parse_args() - pth2onnx(args.input_file, args.output_file, batch_size=args.batch_size) - -# python ENet_pth2onnx.py --input-file models/enet_citys.pth --output-file models/enet_citys_910_bs1.onnx --batch-size 1 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -*- coding: utf-8 -*- + +import os +import sys +import argparse + +cur_path = os.path.abspath(os.path.dirname(__file__)) +root_path = os.path.split(cur_path)[0] +sys.path.append(root_path) + +import torch +import torch.nn as nn +import torch.onnx + +from collections import OrderedDict +from enet import get_enet + + +def proc_nodes_module(checkpoint, AttrName): + new_state_dict = OrderedDict() + for k, v in checkpoint[AttrName].items(): + if (k[0:7] == "module."): + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + +''' +def convert(pth_file_path, onnx_file_path, class_num): + checkpoint = torch.load(pth_file_path, map_location='cpu') + checkpoint['state_dict'] = proc_nodes_module(checkpoint, 'state_dict') + model = densenet121(pretrained=False, num_classes=class_num) + model.load_state_dict(checkpoint['state_dict']) + model.eval() + print(model) + + input_names = ["actual_input_1"] + output_names = ["output1"] + dummy_input = torch.randn(16, 3, 224, 224) + torch.onnx.export(model, dummy_input, onnx_file_path, input_names=input_names, output_names=output_names, + opset_version=11) +''' + +def pth2onnx(input_file, output_file, batch_size=1): + model = get_enet(model='enet', dataset='citys', aux=False, norm_layer=nn.BatchNorm2d) + checkpoint = {} + checkpoint['state_dict'] = torch.load(input_file, map_location='cpu') + checkpoint['state_dict'] = proc_nodes_module(checkpoint, 'state_dict') + model.load_state_dict(checkpoint['state_dict']) + + model.eval() + print(model) + + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(batch_size, 3, 480, 480) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, opset_version=11, verbose=True) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--input-file', type=str, default='~/.torch/models/enet_citys.pth') + parser.add_argument('--output-file', type=str, default='model/enet_citys_910_bs1.onnx') + parser.add_argument('--batch-size', type=int, default=1) + args = parser.parse_args() + pth2onnx(args.input_file, args.output_file, batch_size=args.batch_size) + +# python ENet_pth2onnx.py --input-file models/enet_citys.pth --output-file models/enet_citys_910_bs1.onnx --batch-size 1 # python ENet_pth2onnx.py --input-file models/enet_citys.pth --output-file models/enet_citys_910_bs16.onnx --batch-size 16 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/ENet/test/parse.py b/ACL_PyTorch/contrib/cv/segmentation/ENet/test/parse.py index b9c74f41d7848e1250356f14472b237a18bb3489..82af69cd183218c3263723c20b652b3f7ec2bc27 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/ENet/test/parse.py +++ b/ACL_PyTorch/contrib/cv/segmentation/ENet/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/LICENSE b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/LICENSE index 7615b4e85fa24d81f25cec1495e783a3936f1a86..72f817fb44de8b9fd23fe71230b9dc5ccbe4ca35 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/LICENSE +++ b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/README.md b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/README.md index 9aa8344a9367b140b072977917070ba54e918176..b4fa271e04aec3917c4c5dc533d72c290274e47a 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/README.md +++ b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/README.md @@ -1,356 +1,356 @@ -# 基于开源mmsegmentation预训练的fcn-8s Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理精度统计](#61-离线推理精度统计) - - [6.2 开源精度](#62-开源精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[fcn-8s论文](https://arxiv.org/abs/1411.4038) -论文提出 Fully Convolutional Networks(FCN)方法用于图像语义分割,将图像级别的分类扩展到像素级别的分类,获得 CVPR2015 的 best paper。 - - -### 1.2 代码地址 -[mmsegmentation框架fcn-8s代码](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn) -branch:master commit_id:e6a8791ab0a03c60c0a9abb8456cd4d804342e92 - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -pytorch == 1.8.0 -torchvision == 0.9.0 -onnx == 1.9.0 -``` -**注意:** -> 转onnx的环境上pytorch需要安装1.8.0版本 - -### 2.2 python第三方库 -``` -numpy == 1.20.1 -opencv-python == 4.5.2.52 -``` - -**说明:** -> X86架构:opencv,pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:opencv,pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - - -### 3.1 pth转onnx模型 - -1.获取pth权重文件 -[fcn-8s基于mmsegmentation预训练的npu权重文件](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth) -文件md5sum: 0b42f76eb2e3779a5f802acb5ded5eed - -2.mmsegmentation源码安装 -```shell -git clone https://github.com/open-mmlab/mmcv.git -cd mmcv -pip3.7 install -e . -cd .. -git clone https://github.com/open-mmlab/mmsegmentation.git -cd mmsegmentation -如果修改了模型代码,交付了{model_name}.diff -patch -p1 < ../{model_name}.diff -如果模型代码需要安装,则安装模型代码(如果没有安装脚本,pth2onnx等脚本需要引用模型代码的类或函数,可通过sys.path.append(r"./pytorch-nested-unet")添加搜索路径的方式) -pip3.7 install -e . # or "python3.7 setup.py develop" -cd .. -``` - - **说明:** -> 安装所需的依赖说明请参考mmsegmentation/docs/get_started.md - - -3.使用tools里的pytorch2onnx.py文件,运行如下命令,生成对应的onnx模型: -```shell -python3.7 mmsegmentation/tools/pytorch2onnx.py mmsegmentation/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py --checkpoint fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth --output-file fcn_r50-d8_512x512_20k_voc12aug.onnx --shape 500 500 --show -``` - **模型转换要点:** -> 虽然导出的onnx可以转换为多batch的om离线推理,但是在线推理与onnx目前还不支持多batch推理 - -### 3.2 onnx转om模型 - -1.设置环境变量 -```shell -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest/ -``` -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN 5.0.1 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373),如果存在多余输出节点,需要指定输出节点以去除无用输出,节点序号可能会因网络结构不同而不同,使用netron开源可视化工具查看具体的输出节点名: -```shell -atc --framework=5 --model=fcn_r50-d8_512x512_20k_voc12aug.onnx --output=fcn_r50-d8_512x512_20k_voc12aug_bs1 --input_format=NCHW --input_shape="input:1,3,500,500" --log=debug --soc_version=Ascend310 -``` - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -该模型使用[VOC2012官网](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html)的VOC2012的1449张验证集进行测试,图片与对应ground truth分别存放在/opt/npu/VOCdevkit/VOC2012/JPEGImages/与/opt/npu/VOCdevkit/VOC2012/SegmentationClass/。 - -### 4.2 数据集预处理 -1.预处理脚本mmsegmentation_voc2012_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 -```shell -python3.7 mmsegmentation_voc2012_preprocess.py --image_folder_path=/opt/npu/VOCdevkit/VOC2012/JPEGImages/ --split=/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt --bin_folder_path=./voc12_bin/ -``` -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本get_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 -```shell -python3.7 get_info.py bin ./voc12_bin voc12.info 500 500 -``` -第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN 5.0.1 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) - -### 5.2 离线推理 - -1.设置环境变量 -```shell -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest/ -``` -2.执行离线推理 -```shell -./benchmark.${arch} -model_type=vision -om_path=fcn_r50-d8_512x512_20k_voc12aug_bs1.om -device_id=0 -batch_size=1 -input_text_path=voc12.info -input_width=500 -input_height=500 -useDvpp=false -output_binary=true -``` - **注意:** -> onnx的输出是int64,但是om的输出是int32 - -输出结果默认保存在当前目录result/dumpOutput_device0,模型有一个输出,每个输入对应的输出对应_1.bin文件 -``` -输出 shape 数据类型 数据含义 -output1 1 * 1 * 500 * 500 int32 8位图像 -``` - -## 6 精度对比 - -- **[离线推理精度](#61-离线推理精度)** -- **[开源精度](#62-开源精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理精度统计 - -1.调用mmsegmentation_voc2012_postprocess.py评测bs1的mIoU精度: -```shell -python3.7 get_info.py jpg /opt/npu/VOCdevkit/VOC2012/JPEGImages/ voc12_jpg.info - -python3.7 mmsegmentation_voc2012_postprocess.py --bin_data_path=./result/dumpOutput_device0 --test_annotation=./voc12_jpg.info --img_dir=/opt/npu/VOCdevkit/VOC2012/JPEGImages --ann_dir=/opt/npu/VOCdevkit/VOC2012/SegmentationClass --split=/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt --net_input_width=500 --net_input_height=500 -``` -第一个参数为benchmark推理结果,第二个为原始图片信息文件,第三个为原始图片位置,第四个为验证图片位置,第五个图片的split,第六七个为网宽高 -执行完后会打印出精度: -``` -per class results: - -+-------------+-------+-------+ -| Class | IoU | Acc | -+-------------+-------+-------+ -| background | 92.84 | 97.27 | -| aeroplane | 81.0 | 90.2 | -| bicycle | 37.6 | 84.07 | -| bird | 80.3 | 87.49 | -| boat | 64.63 | 77.42 | -| bottle | 61.32 | 69.76 | -| bus | 87.31 | 91.7 | -| car | 79.48 | 89.74 | -| cat | 85.69 | 92.6 | -| chair | 30.69 | 44.66 | -| cow | 73.21 | 82.52 | -| diningtable | 43.5 | 48.95 | -| dog | 78.83 | 87.76 | -| horse | 74.5 | 82.18 | -| motorbike | 75.7 | 82.97 | -| person | 83.24 | 89.45 | -| pottedplant | 53.23 | 64.87 | -| sheep | 74.29 | 80.85 | -| sofa | 45.59 | 55.79 | -| train | 77.98 | 82.49 | -| tvmonitor | 68.21 | 74.91 | -+-------------+-------+-------+ -Summary: - -+--------+-------+-------+-------+ -| Scope | mIoU | mAcc | aAcc | -+--------+-------+-------+-------+ -| global | 69.01 | 78.94 | 93.04 | -+--------+-------+-------+-------+ - -``` - -2.调用mmsegmentation_voc2012_postprocess.py评测bs16的mIoU精度: -```shell -python3.7 get_info.py jpg /opt/npu/VOCdevkit/VOC2012/JPEGImages/ voc12_jpg.info - -python3.7 mmsegmentation_voc2012_postprocess.py --bin_data_path=./result/dumpOutput_device1 --test_annotation=./voc12_jpg.info --img_dir=/opt/npu/VOCdevkit/VOC2012/JPEGImages --ann_dir=/opt/npu/VOCdevkit/VOC2012/SegmentationClass --split=/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt --net_input_width=500 --net_input_height=500 -``` -第一个参数为benchmark推理结果,第二个为原始图片信息文件,第三个为原始图片位置,第四个为验证图片位置,第五个图片的split,第六七个为网宽高 -执行完后会打印出精度: -``` -per class results: - -+-------------+-------+-------+ -| Class | IoU | Acc | -+-------------+-------+-------+ -| background | 92.84 | 97.27 | -| aeroplane | 81.0 | 90.2 | -| bicycle | 37.6 | 84.07 | -| bird | 80.3 | 87.49 | -| boat | 64.63 | 77.42 | -| bottle | 61.32 | 69.76 | -| bus | 87.31 | 91.7 | -| car | 79.48 | 89.74 | -| cat | 85.69 | 92.6 | -| chair | 30.69 | 44.66 | -| cow | 73.21 | 82.52 | -| diningtable | 43.5 | 48.95 | -| dog | 78.83 | 87.76 | -| horse | 74.5 | 82.18 | -| motorbike | 75.7 | 82.97 | -| person | 83.24 | 89.45 | -| pottedplant | 53.23 | 64.87 | -| sheep | 74.29 | 80.85 | -| sofa | 45.59 | 55.79 | -| train | 77.98 | 82.49 | -| tvmonitor | 68.21 | 74.91 | -+-------------+-------+-------+ -Summary: - -+--------+-------+-------+-------+ -| Scope | mIoU | mAcc | aAcc | -+--------+-------+-------+-------+ -| global | 69.01 | 78.94 | 93.04 | -+--------+-------+-------+-------+ - -``` - **精度调试:** -> 1.在线推理前处理图片是一定格式的动态分辨率,onnx将分辨率固定为512x512会导致精度下降些。 -> 2.分辨率在512x512时onnx离线推理的精度与om精度相同,分辨率改为500x500可以提升精度,使得mask的精度与开源相比更高 -> 3.单图调试 -> ``` -> python3.7 mmsegmentation/tools/test.py mmsegmentation/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth --show -> python3.7 mmsegmentation/tools/pytorch2onnx.py mmsegmentation/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py --checkpoint fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth --output-file fcn_r50-d8_512x512_20k_voc12aug.onnx --shape 500 500 --input-img 2011_003103.jpg --show --verify -> ``` - - -### 6.2 开源精度 -[官网精度](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715.log.json) - -``` -{"mode": "val", "epoch": 31, "iter": 20000, "lr": 0.0001, "mIoU": 0.67085, "mAcc": 0.76958, "aAcc": 0.92709} -``` -### 6.3 精度对比 -om推理mIoU精度均为0.6901,开源mIoU精度为0.67085,om精度大于开源精度,精度达标 - - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** - -### 7.1 npu性能数据 -1.benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 -由于在线推理与onnx推理还不支持多batch,所以仅测om bs1,bs16的性能。 -1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: -``` -[e2e] throughputRate: 14.2564, latency: 101639 -[data read] throughputRate: 24.7255, moduleLatency: 40.444 -[preprocess] throughputRate: 22.102, moduleLatency: 45.2448 -[infer] throughputRate: 14.3682, Interface throughputRate: 16.2017, moduleLatency: 69.2286 -[post] throughputRate: 14.368, moduleLatency: 69.5993 -``` -Interface throughputRate: 16.2017,16.2017x4=64.8068即是batch1 310单卡吞吐率 - -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: -``` -[[e2e] throughputRate: 13.459, latency: 107660 -[data read] throughputRate: 23.5047, moduleLatency: 42.5446 -[preprocess] throughputRate: 21.4117, moduleLatency: 46.7034 -[infer] throughputRate: 13.5517, Interface throughputRate: 15.4271, moduleLatency: 73.3405 -[post] throughputRate: 0.850975, moduleLatency: 1175.12 -``` -Interface throughputRate: 15.4271,15.4271x4=61.7084即是batch16 310单卡吞吐率 - -2.npu纯推理性能 -batch1的性能,执行20次纯推理取均值,统计吞吐率与其倒数时延(benchmark的时延是单个数据的推理时间),npu性能是一个device执行的结果 -``` -./benchmark.x86_64 -round=20 -om_path=fcn_r50-d8_512x512_20k_voc12aug_bs1.om -device_id=0 -batch_size=1 -``` -PureInfer_perf_of_fcn_r50-d8_512x512_20k_voc12aug_bs1_in_device_0.txt: -``` -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_fcn_r50-d8_512x512_20k_voc12aug_bs1_in_device_0.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 16.2574samples/s, ave_latency: 61.5162ms ----------------------------------------------------------------- -``` -batch16的性能,执行20次纯推理取均值,统计吞吐率与其倒数时延(benchmark的时延是单个数据的推理时间),npu性能是一个device执行的结果 -``` -./benchmark.x86_64 -round=20 -om_path=fcn_r50-d8_512x512_20k_voc12aug_bs16.om -device_id=0 -batch_size=16 -``` -PureInfer_perf_of_fcn_r50-d8_512x512_20k_voc12aug_bs16_in_device_0.txt: -``` -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_fcn_r50-d8_512x512_20k_voc12aug_bs16_in_device_0.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 15.5282samples/s, ave_latency: 64.4083ms ----------------------------------------------------------------- -``` - -**性能优化:** -> 没有遇到性能不达标的问题,故不需要进行性能优化 - +# 基于开源mmsegmentation预训练的fcn-8s Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理精度统计](#61-离线推理精度统计) + - [6.2 开源精度](#62-开源精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[fcn-8s论文](https://arxiv.org/abs/1411.4038) +论文提出 Fully Convolutional Networks(FCN)方法用于图像语义分割,将图像级别的分类扩展到像素级别的分类,获得 CVPR2015 的 best paper。 + + +### 1.2 代码地址 +[mmsegmentation框架fcn-8s代码](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn) +branch:master commit_id:e6a8791ab0a03c60c0a9abb8456cd4d804342e92 + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +pytorch == 1.8.0 +torchvision == 0.9.0 +onnx == 1.9.0 +``` +**注意:** +> 转onnx的环境上pytorch需要安装1.8.0版本 + +### 2.2 python第三方库 +``` +numpy == 1.20.1 +opencv-python == 4.5.2.52 +``` + +**说明:** +> X86架构:opencv,pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:opencv,pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + + +### 3.1 pth转onnx模型 + +1.获取pth权重文件 +[fcn-8s基于mmsegmentation预训练的npu权重文件](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth) +文件md5sum: 0b42f76eb2e3779a5f802acb5ded5eed + +2.mmsegmentation源码安装 +```shell +git clone https://github.com/open-mmlab/mmcv.git +cd mmcv +pip3.7 install -e . +cd .. +git clone https://github.com/open-mmlab/mmsegmentation.git +cd mmsegmentation +如果修改了模型代码,交付了{model_name}.diff +patch -p1 < ../{model_name}.diff +如果模型代码需要安装,则安装模型代码(如果没有安装脚本,pth2onnx等脚本需要引用模型代码的类或函数,可通过sys.path.append(r"./pytorch-nested-unet")添加搜索路径的方式) +pip3.7 install -e . # or "python3.7 setup.py develop" +cd .. +``` + + **说明:** +> 安装所需的依赖说明请参考mmsegmentation/docs/get_started.md + + +3.使用tools里的pytorch2onnx.py文件,运行如下命令,生成对应的onnx模型: +```shell +python3.7 mmsegmentation/tools/pytorch2onnx.py mmsegmentation/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py --checkpoint fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth --output-file fcn_r50-d8_512x512_20k_voc12aug.onnx --shape 500 500 --show +``` + **模型转换要点:** +> 虽然导出的onnx可以转换为多batch的om离线推理,但是在线推理与onnx目前还不支持多batch推理 + +### 3.2 onnx转om模型 + +1.设置环境变量 +```shell +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest/ +``` +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN 5.0.1 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373),如果存在多余输出节点,需要指定输出节点以去除无用输出,节点序号可能会因网络结构不同而不同,使用netron开源可视化工具查看具体的输出节点名: +```shell +atc --framework=5 --model=fcn_r50-d8_512x512_20k_voc12aug.onnx --output=fcn_r50-d8_512x512_20k_voc12aug_bs1 --input_format=NCHW --input_shape="input:1,3,500,500" --log=debug --soc_version=Ascend310 +``` + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +该模型使用[VOC2012官网](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html)的VOC2012的1449张验证集进行测试,图片与对应ground truth分别存放在/opt/npu/VOCdevkit/VOC2012/JPEGImages/与/opt/npu/VOCdevkit/VOC2012/SegmentationClass/。 + +### 4.2 数据集预处理 +1.预处理脚本mmsegmentation_voc2012_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 +```shell +python3.7 mmsegmentation_voc2012_preprocess.py --image_folder_path=/opt/npu/VOCdevkit/VOC2012/JPEGImages/ --split=/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt --bin_folder_path=./voc12_bin/ +``` +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本get_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 +```shell +python3.7 get_info.py bin ./voc12_bin voc12.info 500 500 +``` +第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN 5.0.1 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) + +### 5.2 离线推理 + +1.设置环境变量 +```shell +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest/ +``` +2.执行离线推理 +```shell +./benchmark.${arch} -model_type=vision -om_path=fcn_r50-d8_512x512_20k_voc12aug_bs1.om -device_id=0 -batch_size=1 -input_text_path=voc12.info -input_width=500 -input_height=500 -useDvpp=false -output_binary=true +``` + **注意:** +> onnx的输出是int64,但是om的输出是int32 + +输出结果默认保存在当前目录result/dumpOutput_device0,模型有一个输出,每个输入对应的输出对应_1.bin文件 +``` +输出 shape 数据类型 数据含义 +output1 1 * 1 * 500 * 500 int32 8位图像 +``` + +## 6 精度对比 + +- **[离线推理精度](#61-离线推理精度)** +- **[开源精度](#62-开源精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理精度统计 + +1.调用mmsegmentation_voc2012_postprocess.py评测bs1的mIoU精度: +```shell +python3.7 get_info.py jpg /opt/npu/VOCdevkit/VOC2012/JPEGImages/ voc12_jpg.info + +python3.7 mmsegmentation_voc2012_postprocess.py --bin_data_path=./result/dumpOutput_device0 --test_annotation=./voc12_jpg.info --img_dir=/opt/npu/VOCdevkit/VOC2012/JPEGImages --ann_dir=/opt/npu/VOCdevkit/VOC2012/SegmentationClass --split=/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt --net_input_width=500 --net_input_height=500 +``` +第一个参数为benchmark推理结果,第二个为原始图片信息文件,第三个为原始图片位置,第四个为验证图片位置,第五个图片的split,第六七个为网宽高 +执行完后会打印出精度: +``` +per class results: + ++-------------+-------+-------+ +| Class | IoU | Acc | ++-------------+-------+-------+ +| background | 92.84 | 97.27 | +| aeroplane | 81.0 | 90.2 | +| bicycle | 37.6 | 84.07 | +| bird | 80.3 | 87.49 | +| boat | 64.63 | 77.42 | +| bottle | 61.32 | 69.76 | +| bus | 87.31 | 91.7 | +| car | 79.48 | 89.74 | +| cat | 85.69 | 92.6 | +| chair | 30.69 | 44.66 | +| cow | 73.21 | 82.52 | +| diningtable | 43.5 | 48.95 | +| dog | 78.83 | 87.76 | +| horse | 74.5 | 82.18 | +| motorbike | 75.7 | 82.97 | +| person | 83.24 | 89.45 | +| pottedplant | 53.23 | 64.87 | +| sheep | 74.29 | 80.85 | +| sofa | 45.59 | 55.79 | +| train | 77.98 | 82.49 | +| tvmonitor | 68.21 | 74.91 | ++-------------+-------+-------+ +Summary: + ++--------+-------+-------+-------+ +| Scope | mIoU | mAcc | aAcc | ++--------+-------+-------+-------+ +| global | 69.01 | 78.94 | 93.04 | ++--------+-------+-------+-------+ + +``` + +2.调用mmsegmentation_voc2012_postprocess.py评测bs16的mIoU精度: +```shell +python3.7 get_info.py jpg /opt/npu/VOCdevkit/VOC2012/JPEGImages/ voc12_jpg.info + +python3.7 mmsegmentation_voc2012_postprocess.py --bin_data_path=./result/dumpOutput_device1 --test_annotation=./voc12_jpg.info --img_dir=/opt/npu/VOCdevkit/VOC2012/JPEGImages --ann_dir=/opt/npu/VOCdevkit/VOC2012/SegmentationClass --split=/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt --net_input_width=500 --net_input_height=500 +``` +第一个参数为benchmark推理结果,第二个为原始图片信息文件,第三个为原始图片位置,第四个为验证图片位置,第五个图片的split,第六七个为网宽高 +执行完后会打印出精度: +``` +per class results: + ++-------------+-------+-------+ +| Class | IoU | Acc | ++-------------+-------+-------+ +| background | 92.84 | 97.27 | +| aeroplane | 81.0 | 90.2 | +| bicycle | 37.6 | 84.07 | +| bird | 80.3 | 87.49 | +| boat | 64.63 | 77.42 | +| bottle | 61.32 | 69.76 | +| bus | 87.31 | 91.7 | +| car | 79.48 | 89.74 | +| cat | 85.69 | 92.6 | +| chair | 30.69 | 44.66 | +| cow | 73.21 | 82.52 | +| diningtable | 43.5 | 48.95 | +| dog | 78.83 | 87.76 | +| horse | 74.5 | 82.18 | +| motorbike | 75.7 | 82.97 | +| person | 83.24 | 89.45 | +| pottedplant | 53.23 | 64.87 | +| sheep | 74.29 | 80.85 | +| sofa | 45.59 | 55.79 | +| train | 77.98 | 82.49 | +| tvmonitor | 68.21 | 74.91 | ++-------------+-------+-------+ +Summary: + ++--------+-------+-------+-------+ +| Scope | mIoU | mAcc | aAcc | ++--------+-------+-------+-------+ +| global | 69.01 | 78.94 | 93.04 | ++--------+-------+-------+-------+ + +``` + **精度调试:** +> 1.在线推理前处理图片是一定格式的动态分辨率,onnx将分辨率固定为512x512会导致精度下降些。 +> 2.分辨率在512x512时onnx离线推理的精度与om精度相同,分辨率改为500x500可以提升精度,使得mask的精度与开源相比更高 +> 3.单图调试 +> ``` +> python3.7 mmsegmentation/tools/test.py mmsegmentation/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth --show +> python3.7 mmsegmentation/tools/pytorch2onnx.py mmsegmentation/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py --checkpoint fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth --output-file fcn_r50-d8_512x512_20k_voc12aug.onnx --shape 500 500 --input-img 2011_003103.jpg --show --verify +> ``` + + +### 6.2 开源精度 +[官网精度](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715.log.json) + +``` +{"mode": "val", "epoch": 31, "iter": 20000, "lr": 0.0001, "mIoU": 0.67085, "mAcc": 0.76958, "aAcc": 0.92709} +``` +### 6.3 精度对比 +om推理mIoU精度均为0.6901,开源mIoU精度为0.67085,om精度大于开源精度,精度达标 + + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** + +### 7.1 npu性能数据 +1.benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 +由于在线推理与onnx推理还不支持多batch,所以仅测om bs1,bs16的性能。 +1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: +``` +[e2e] throughputRate: 14.2564, latency: 101639 +[data read] throughputRate: 24.7255, moduleLatency: 40.444 +[preprocess] throughputRate: 22.102, moduleLatency: 45.2448 +[infer] throughputRate: 14.3682, Interface throughputRate: 16.2017, moduleLatency: 69.2286 +[post] throughputRate: 14.368, moduleLatency: 69.5993 +``` +Interface throughputRate: 16.2017,16.2017x4=64.8068即是batch1 310单卡吞吐率 + +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: +``` +[[e2e] throughputRate: 13.459, latency: 107660 +[data read] throughputRate: 23.5047, moduleLatency: 42.5446 +[preprocess] throughputRate: 21.4117, moduleLatency: 46.7034 +[infer] throughputRate: 13.5517, Interface throughputRate: 15.4271, moduleLatency: 73.3405 +[post] throughputRate: 0.850975, moduleLatency: 1175.12 +``` +Interface throughputRate: 15.4271,15.4271x4=61.7084即是batch16 310单卡吞吐率 + +2.npu纯推理性能 +batch1的性能,执行20次纯推理取均值,统计吞吐率与其倒数时延(benchmark的时延是单个数据的推理时间),npu性能是一个device执行的结果 +``` +./benchmark.x86_64 -round=20 -om_path=fcn_r50-d8_512x512_20k_voc12aug_bs1.om -device_id=0 -batch_size=1 +``` +PureInfer_perf_of_fcn_r50-d8_512x512_20k_voc12aug_bs1_in_device_0.txt: +``` +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_fcn_r50-d8_512x512_20k_voc12aug_bs1_in_device_0.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 16.2574samples/s, ave_latency: 61.5162ms +---------------------------------------------------------------- +``` +batch16的性能,执行20次纯推理取均值,统计吞吐率与其倒数时延(benchmark的时延是单个数据的推理时间),npu性能是一个device执行的结果 +``` +./benchmark.x86_64 -round=20 -om_path=fcn_r50-d8_512x512_20k_voc12aug_bs16.om -device_id=0 -batch_size=16 +``` +PureInfer_perf_of_fcn_r50-d8_512x512_20k_voc12aug_bs16_in_device_0.txt: +``` +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_fcn_r50-d8_512x512_20k_voc12aug_bs16_in_device_0.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 15.5282samples/s, ave_latency: 64.4083ms +---------------------------------------------------------------- +``` + +**性能优化:** +> 没有遇到性能不达标的问题,故不需要进行性能优化 + diff --git a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/get_info.py b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/get_info.py index b76d6739bcea5c528a031970f0e583e5b5644bd8..d5cab0450c20d502d0d15be2f9c0fceffa6a6191 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/get_info.py +++ b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/get_info.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/mmsegmentation_voc2012_postprocess.py b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/mmsegmentation_voc2012_postprocess.py index 298799e0073d82e9fbb022510a2bec908975429e..350c5088838ba3726200713721378cb6726f0941 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/mmsegmentation_voc2012_postprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/mmsegmentation_voc2012_postprocess.py @@ -1,243 +1,243 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import os.path as osp -import torch -import mmcv -import numpy as np -from terminaltables import AsciiTable - -CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', - 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', - 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', - 'train', 'tvmonitor') - -PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], - [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], - [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], - [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], - [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] - - -def load_annotations(img_dir, ann_dir, split): - img_suffix = '.jpg' - seg_map_suffix = '.png' - img_infos = [] - if split is not None: - with open(split) as f: - for line in f: - img_name = line.strip() - img_info = dict(filename=img_name + img_suffix) - if ann_dir is not None: - seg_map = img_name + seg_map_suffix - img_info['ann'] = dict(seg_map=seg_map) - img_infos.append(img_info) - else: - for img in mmcv.scandir(img_dir, img_suffix, recursive=True): - img_info = dict(filename=img) - if ann_dir is not None: - seg_map = img.replace(img_suffix, seg_map_suffix) - img_info['ann'] = dict(seg_map=seg_map) - img_infos.append(img_info) - - return img_infos - - -def get_gt_seg_maps(img_infos, ann_dir): - """Get ground truth segmentation maps for evaluation.""" - gt_seg_maps = [] - for img_info in img_infos: - seg_map = osp.join(ann_dir, img_info['ann']['seg_map']) - gt_seg_map = mmcv.imread( - seg_map, flag='unchanged', backend='pillow') - gt_seg_maps.append(gt_seg_map) - return gt_seg_maps - - -def voc2012_evaluation(results, gt_seg_maps): - metric = ['mIoU'] - eval_results = {} - - num_classes = len(CLASSES) - ignore_index = 255 - label_map = dict() - reduce_zero_label = False - - num_imgs = len(results) - assert len(gt_seg_maps) == num_imgs - total_area_intersect = torch.zeros((num_classes,), dtype=torch.float64) - total_area_union = torch.zeros((num_classes,), dtype=torch.float64) - total_area_pred_label = torch.zeros((num_classes,), dtype=torch.float64) - total_area_label = torch.zeros((num_classes,), dtype=torch.float64) - for i in range(num_imgs): - if isinstance(results[i], str): - pred_label = torch.from_numpy(np.load(results[i])) - else: - pred_label = torch.from_numpy((results[i])) - - if isinstance(gt_seg_maps[i], str): - label = torch.from_numpy( - mmcv.imread(gt_seg_maps[i], flag='unchanged', backend='pillow')) - else: - label = torch.from_numpy(gt_seg_maps[i]) - - if label_map is not None: - for old_id, new_id in label_map.items(): - label[label == old_id] = new_id - if reduce_zero_label: - label[label == 0] = 255 - label = label - 1 - label[label == 254] = 255 - - mask = (label != ignore_index) - pred_label = pred_label[mask] - label = label[mask] - - intersect = pred_label[pred_label == label] - area_intersect = torch.histc( - intersect.float(), bins=(num_classes), min=0, max=num_classes - 1) - area_pred_label = torch.histc( - pred_label.float(), bins=(num_classes), min=0, max=num_classes - 1) - area_label = torch.histc( - label.float(), bins=(num_classes), min=0, max=num_classes - 1) - area_union = area_pred_label + area_label - area_intersect - - total_area_intersect += area_intersect - total_area_union += area_union - total_area_pred_label += area_pred_label - total_area_label += area_label - all_acc = total_area_intersect.sum() / total_area_label.sum() - acc = total_area_intersect / total_area_label - ret_metrics = [all_acc, acc] - iou = total_area_intersect / total_area_union - ret_metrics.append(iou) - ret_metrics = [metric.numpy() for metric in ret_metrics] - - class_table_data = [['Class'] + [m[1:] for m in metric] + ['Acc']] - class_names = CLASSES - - ret_metrics_round = [ - np.round(ret_metric * 100, 2) for ret_metric in ret_metrics - ] - for i in range(num_classes): - class_table_data.append([class_names[i]] + - [m[i] for m in ret_metrics_round[2:]] + - [ret_metrics_round[1][i]]) - summary_table_data = [['Scope'] + - ['m' + head - for head in class_table_data[0][1:]] + ['aAcc']] - ret_metrics_mean = [ - np.round(np.nanmean(ret_metric) * 100, 2) - for ret_metric in ret_metrics - ] - summary_table_data.append(['global'] + ret_metrics_mean[2:] + - [ret_metrics_mean[1]] + - [ret_metrics_mean[0]]) - - print('per class results:') - table = AsciiTable(class_table_data) - print('\n' + table.table) - print('Summary:') - table = AsciiTable(summary_table_data) - print('\n' + table.table) - - for i in range(1, len(summary_table_data[0])): - eval_results[summary_table_data[0] - [i]] = summary_table_data[1][i] / 100.0 - for idx, sub_metric in enumerate(class_table_data[0][1:], 1): - for item in class_table_data[1:]: - eval_results[str(sub_metric) + '.' + - str(item[0])] = item[idx] / 100.0 - return eval_results - - -def postprocess_mask(mask, image_size, net_input_width, net_input_height): - w = image_size[0] - h = image_size[1] - scale = min(net_input_width / w, net_input_height / h) - - pad_w = net_input_width - w * scale - pad_h = net_input_height - h * scale - pad_left = (pad_w // 2) - pad_top = (pad_h // 2) - if pad_top < 0: - pad_top = 0 - if pad_left < 0: - pad_left = 0 - pad_left = int(pad_left) - pad_top = int(pad_top) - a = int(500 - pad_top) - b = int(500 - pad_left) - mask = mask[pad_top:a, pad_left:b] - import torch.nn.functional as F - mask = torch.from_numpy(mask).to(dtype=torch.float32) - mask = mask.expand((1, 1, mask.size(0), mask.size(1))) - mask = F.interpolate(mask, size=(int(h), int(w)), mode='bilinear', align_corners=False) - - mask = mask.squeeze().to(dtype=torch.int32).numpy() - return mask - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0") - parser.add_argument("--test_annotation", default="./voc12_jpg.info") - parser.add_argument("--img_dir", default="/opt/npu/VOCdevkit/VOC2012/JPEGImages") - parser.add_argument("--ann_dir", default="/opt/npu/VOCdevkit/VOC2012/SegmentationClass") - parser.add_argument("--split", default="/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt") - parser.add_argument("--net_input_width", default=500) - parser.add_argument("--net_input_height", default=500) - args = parser.parse_args() - - # generate dict according to annotation file for query resolution - # load width and height of input images - img_size_dict = dict() - - with open(args.test_annotation)as f: - for line in f.readlines(): - temp = line.split(" ") - img_file_path = temp[1] - img_name = temp[1].split("/")[-1].split(".")[0] - img_width = int(temp[2]) - img_height = int(temp[3]) - img_size_dict[img_name] = (img_width, img_height, img_file_path) - - # read bin file for generate predict result - bin_path = args.bin_data_path - total_img = set([name[:name.rfind('_')]for name in os.listdir(bin_path) if "bin" in name]) - - res_buff = [] - for bin_file in sorted(total_img): - path_base = os.path.join(bin_path, bin_file) - # load all segected output tensor - - output = np.fromfile(path_base + "_" + str(1) + ".bin", dtype="int32") - output = np.reshape(output, [500, 500]) - current_img_size = img_size_dict[bin_file] - output = postprocess_mask(output, img_size_dict[bin_file], 500, 500) - res_buff.append(output) - - seg_result = res_buff - # ground truth - img_infos = load_annotations(args.img_dir, args.ann_dir, split=args.split) - gt_seg_maps = get_gt_seg_maps(img_infos, args.ann_dir) - seg_result = voc2012_evaluation(seg_result, gt_seg_maps) - - - with open('./voc_seg_result.txt', 'w') as f: - for key, value in seg_result.items(): - f.write(key + ': ' + str(value) + '\n') - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import os.path as osp +import torch +import mmcv +import numpy as np +from terminaltables import AsciiTable + +CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', + 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', + 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', + 'train', 'tvmonitor') + +PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], + [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], + [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], + [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], + [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] + + +def load_annotations(img_dir, ann_dir, split): + img_suffix = '.jpg' + seg_map_suffix = '.png' + img_infos = [] + if split is not None: + with open(split) as f: + for line in f: + img_name = line.strip() + img_info = dict(filename=img_name + img_suffix) + if ann_dir is not None: + seg_map = img_name + seg_map_suffix + img_info['ann'] = dict(seg_map=seg_map) + img_infos.append(img_info) + else: + for img in mmcv.scandir(img_dir, img_suffix, recursive=True): + img_info = dict(filename=img) + if ann_dir is not None: + seg_map = img.replace(img_suffix, seg_map_suffix) + img_info['ann'] = dict(seg_map=seg_map) + img_infos.append(img_info) + + return img_infos + + +def get_gt_seg_maps(img_infos, ann_dir): + """Get ground truth segmentation maps for evaluation.""" + gt_seg_maps = [] + for img_info in img_infos: + seg_map = osp.join(ann_dir, img_info['ann']['seg_map']) + gt_seg_map = mmcv.imread( + seg_map, flag='unchanged', backend='pillow') + gt_seg_maps.append(gt_seg_map) + return gt_seg_maps + + +def voc2012_evaluation(results, gt_seg_maps): + metric = ['mIoU'] + eval_results = {} + + num_classes = len(CLASSES) + ignore_index = 255 + label_map = dict() + reduce_zero_label = False + + num_imgs = len(results) + assert len(gt_seg_maps) == num_imgs + total_area_intersect = torch.zeros((num_classes,), dtype=torch.float64) + total_area_union = torch.zeros((num_classes,), dtype=torch.float64) + total_area_pred_label = torch.zeros((num_classes,), dtype=torch.float64) + total_area_label = torch.zeros((num_classes,), dtype=torch.float64) + for i in range(num_imgs): + if isinstance(results[i], str): + pred_label = torch.from_numpy(np.load(results[i])) + else: + pred_label = torch.from_numpy((results[i])) + + if isinstance(gt_seg_maps[i], str): + label = torch.from_numpy( + mmcv.imread(gt_seg_maps[i], flag='unchanged', backend='pillow')) + else: + label = torch.from_numpy(gt_seg_maps[i]) + + if label_map is not None: + for old_id, new_id in label_map.items(): + label[label == old_id] = new_id + if reduce_zero_label: + label[label == 0] = 255 + label = label - 1 + label[label == 254] = 255 + + mask = (label != ignore_index) + pred_label = pred_label[mask] + label = label[mask] + + intersect = pred_label[pred_label == label] + area_intersect = torch.histc( + intersect.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_pred_label = torch.histc( + pred_label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_label = torch.histc( + label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_union = area_pred_label + area_label - area_intersect + + total_area_intersect += area_intersect + total_area_union += area_union + total_area_pred_label += area_pred_label + total_area_label += area_label + all_acc = total_area_intersect.sum() / total_area_label.sum() + acc = total_area_intersect / total_area_label + ret_metrics = [all_acc, acc] + iou = total_area_intersect / total_area_union + ret_metrics.append(iou) + ret_metrics = [metric.numpy() for metric in ret_metrics] + + class_table_data = [['Class'] + [m[1:] for m in metric] + ['Acc']] + class_names = CLASSES + + ret_metrics_round = [ + np.round(ret_metric * 100, 2) for ret_metric in ret_metrics + ] + for i in range(num_classes): + class_table_data.append([class_names[i]] + + [m[i] for m in ret_metrics_round[2:]] + + [ret_metrics_round[1][i]]) + summary_table_data = [['Scope'] + + ['m' + head + for head in class_table_data[0][1:]] + ['aAcc']] + ret_metrics_mean = [ + np.round(np.nanmean(ret_metric) * 100, 2) + for ret_metric in ret_metrics + ] + summary_table_data.append(['global'] + ret_metrics_mean[2:] + + [ret_metrics_mean[1]] + + [ret_metrics_mean[0]]) + + print('per class results:') + table = AsciiTable(class_table_data) + print('\n' + table.table) + print('Summary:') + table = AsciiTable(summary_table_data) + print('\n' + table.table) + + for i in range(1, len(summary_table_data[0])): + eval_results[summary_table_data[0] + [i]] = summary_table_data[1][i] / 100.0 + for idx, sub_metric in enumerate(class_table_data[0][1:], 1): + for item in class_table_data[1:]: + eval_results[str(sub_metric) + '.' + + str(item[0])] = item[idx] / 100.0 + return eval_results + + +def postprocess_mask(mask, image_size, net_input_width, net_input_height): + w = image_size[0] + h = image_size[1] + scale = min(net_input_width / w, net_input_height / h) + + pad_w = net_input_width - w * scale + pad_h = net_input_height - h * scale + pad_left = (pad_w // 2) + pad_top = (pad_h // 2) + if pad_top < 0: + pad_top = 0 + if pad_left < 0: + pad_left = 0 + pad_left = int(pad_left) + pad_top = int(pad_top) + a = int(500 - pad_top) + b = int(500 - pad_left) + mask = mask[pad_top:a, pad_left:b] + import torch.nn.functional as F + mask = torch.from_numpy(mask).to(dtype=torch.float32) + mask = mask.expand((1, 1, mask.size(0), mask.size(1))) + mask = F.interpolate(mask, size=(int(h), int(w)), mode='bilinear', align_corners=False) + + mask = mask.squeeze().to(dtype=torch.int32).numpy() + return mask + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--bin_data_path", default="./result/dumpOutput_device0") + parser.add_argument("--test_annotation", default="./voc12_jpg.info") + parser.add_argument("--img_dir", default="/opt/npu/VOCdevkit/VOC2012/JPEGImages") + parser.add_argument("--ann_dir", default="/opt/npu/VOCdevkit/VOC2012/SegmentationClass") + parser.add_argument("--split", default="/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt") + parser.add_argument("--net_input_width", default=500) + parser.add_argument("--net_input_height", default=500) + args = parser.parse_args() + + # generate dict according to annotation file for query resolution + # load width and height of input images + img_size_dict = dict() + + with open(args.test_annotation)as f: + for line in f.readlines(): + temp = line.split(" ") + img_file_path = temp[1] + img_name = temp[1].split("/")[-1].split(".")[0] + img_width = int(temp[2]) + img_height = int(temp[3]) + img_size_dict[img_name] = (img_width, img_height, img_file_path) + + # read bin file for generate predict result + bin_path = args.bin_data_path + total_img = set([name[:name.rfind('_')]for name in os.listdir(bin_path) if "bin" in name]) + + res_buff = [] + for bin_file in sorted(total_img): + path_base = os.path.join(bin_path, bin_file) + # load all segected output tensor + + output = np.fromfile(path_base + "_" + str(1) + ".bin", dtype="int32") + output = np.reshape(output, [500, 500]) + current_img_size = img_size_dict[bin_file] + output = postprocess_mask(output, img_size_dict[bin_file], 500, 500) + res_buff.append(output) + + seg_result = res_buff + # ground truth + img_infos = load_annotations(args.img_dir, args.ann_dir, split=args.split) + gt_seg_maps = get_gt_seg_maps(img_infos, args.ann_dir) + seg_result = voc2012_evaluation(seg_result, gt_seg_maps) + + + with open('./voc_seg_result.txt', 'w') as f: + for key, value in seg_result.items(): + f.write(key + ': ' + str(value) + '\n') + diff --git a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/mmsegmentation_voc2012_preprocess.py b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/mmsegmentation_voc2012_preprocess.py index e8e5074b82c6e91b71922e3a521cdca1d0119527..22cd2b7b53971fb4695c6789a728ffc811bd8679 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/mmsegmentation_voc2012_preprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/mmsegmentation_voc2012_preprocess.py @@ -1,95 +1,95 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import os -import cv2 -import argparse -import mmcv -import torch - - -dataset_config = { - 'mean': (123.675, 116.28, 103.53), - 'std': (58.395, 57.12, 57.375) -} - - -tensor_height = 500 -tensor_width = 500 - - -def resize(img, size): - old_h = img.shape[0] - old_w = img.shape[1] - scale_ratio = min(size[0] / old_w, size[1] / old_h) - new_w = int(np.floor(old_w * scale_ratio)) - new_h = int(np.floor(old_h * scale_ratio)) - resized_img = mmcv.imresize(img, (new_w, new_h), backend='cv2') - return resized_img - - -def voc2012_preprocess(input_image, output_bin_path): - img_name = input_image.split('/')[-1] - bin_name = img_name.split('.')[0] + ".bin" - bin_fl = os.path.join(output_bin_path, bin_name) - - one_img = mmcv.imread(os.path.join(input_image), backend='cv2') - one_img = resize(one_img, (tensor_width, tensor_height)) - - mean = np.array(dataset_config['mean'], dtype=np.float32) - std = np.array(dataset_config['std'], dtype=np.float32) - one_img = mmcv.imnormalize(one_img, mean, std) - - h = one_img.shape[0] - w = one_img.shape[1] - pad_left = (tensor_width - w) // 2 - pad_top = (tensor_height - h) // 2 - pad_right = tensor_width - pad_left - w - pad_bottom = tensor_height - pad_top - h - one_img = mmcv.impad(one_img, padding=(pad_left, pad_top, pad_right, pad_bottom), pad_val=0) - - one_img = one_img.transpose(2, 0, 1) - one_img.tofile(bin_fl) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='preprocess of FCN-8s pytorch model') - parser.add_argument("--image_folder_path", default="/opt/npu/VOCdevkit/VOC2012/JPEGImages/", - help='image of dataset') - parser.add_argument("--split", default="/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt") - parser.add_argument("--bin_folder_path", default="./voc12_bin/", help='Preprocessed image buffer') - flags = parser.parse_args() - - if not os.path.exists(flags.bin_folder_path): - os.makedirs(flags.bin_folder_path) - - split = flags.split - img_suffix = '.jpg' - img_infos = [] - if split is not None: - with open(split) as f: - for line in f: - img_name = line.strip() - img_info = img_name + img_suffix - img_infos.append(img_info) - - images = os.listdir(flags.image_folder_path) - for image_name in images: - - if not (image_name.endswith(".jpeg") or image_name.endswith(".JPEG") or image_name.endswith(".jpg") and image_name in img_infos): - continue - print("start to process image {}....".format(image_name)) - path_image = os.path.join(flags.image_folder_path, image_name) - voc2012_preprocess(path_image, flags.bin_folder_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import os +import cv2 +import argparse +import mmcv +import torch + + +dataset_config = { + 'mean': (123.675, 116.28, 103.53), + 'std': (58.395, 57.12, 57.375) +} + + +tensor_height = 500 +tensor_width = 500 + + +def resize(img, size): + old_h = img.shape[0] + old_w = img.shape[1] + scale_ratio = min(size[0] / old_w, size[1] / old_h) + new_w = int(np.floor(old_w * scale_ratio)) + new_h = int(np.floor(old_h * scale_ratio)) + resized_img = mmcv.imresize(img, (new_w, new_h), backend='cv2') + return resized_img + + +def voc2012_preprocess(input_image, output_bin_path): + img_name = input_image.split('/')[-1] + bin_name = img_name.split('.')[0] + ".bin" + bin_fl = os.path.join(output_bin_path, bin_name) + + one_img = mmcv.imread(os.path.join(input_image), backend='cv2') + one_img = resize(one_img, (tensor_width, tensor_height)) + + mean = np.array(dataset_config['mean'], dtype=np.float32) + std = np.array(dataset_config['std'], dtype=np.float32) + one_img = mmcv.imnormalize(one_img, mean, std) + + h = one_img.shape[0] + w = one_img.shape[1] + pad_left = (tensor_width - w) // 2 + pad_top = (tensor_height - h) // 2 + pad_right = tensor_width - pad_left - w + pad_bottom = tensor_height - pad_top - h + one_img = mmcv.impad(one_img, padding=(pad_left, pad_top, pad_right, pad_bottom), pad_val=0) + + one_img = one_img.transpose(2, 0, 1) + one_img.tofile(bin_fl) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='preprocess of FCN-8s pytorch model') + parser.add_argument("--image_folder_path", default="/opt/npu/VOCdevkit/VOC2012/JPEGImages/", + help='image of dataset') + parser.add_argument("--split", default="/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt") + parser.add_argument("--bin_folder_path", default="./voc12_bin/", help='Preprocessed image buffer') + flags = parser.parse_args() + + if not os.path.exists(flags.bin_folder_path): + os.makedirs(flags.bin_folder_path) + + split = flags.split + img_suffix = '.jpg' + img_infos = [] + if split is not None: + with open(split) as f: + for line in f: + img_name = line.strip() + img_info = img_name + img_suffix + img_infos.append(img_info) + + images = os.listdir(flags.image_folder_path) + for image_name in images: + + if not (image_name.endswith(".jpeg") or image_name.endswith(".JPEG") or image_name.endswith(".jpg") and image_name in img_infos): + continue + print("start to process image {}....".format(image_name)) + path_image = os.path.join(flags.image_folder_path, image_name) + voc2012_preprocess(path_image, flags.bin_folder_path) diff --git a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/requirements.txt b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/requirements.txt index b61b7bcdad2d8b509e3de4f97bb4d74ffcfe6429..f86ad403f35f1e158cb4397d24e2f69c92d9aa92 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/requirements.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/requirements.txt @@ -1,5 +1,5 @@ -torch == 1.8.0 -torchvision == 0.9.0 -onnx == 1.9.0 -numpy == 1.20.1 +torch == 1.8.0 +torchvision == 0.9.0 +onnx == 1.9.0 +numpy == 1.20.1 opencv-python == 4.5.2.52 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/test/README.md b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/test/README.md index 127a2e645ffd5beab55197bbc48bca6a6eac26f4..fa794daeac35f8ae48411862a030b21b435d9261 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/test/README.md +++ b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/test/README.md @@ -1,31 +1,31 @@ -环境准备: - -1.数据集路径 -数据集统一放在/root/datasets/或/opt/npu/ -本模型数据集放在/opt/npu/ - -2.进入工作目录 -cd fcn-8s - -3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -pip3.7 install -r requirements.txt - -4.获取、修改和安装模型代码 -git clone https://github.com/open-mmlab/mmcv.git -cd mmcv -pip3.7 install -e . -cd .. -git clone https://github.com/open-mmlab/mmsegmentation.git -cd mmsegmentation -pip3.7 install -e . # or "python3.7 setup.py develop" -cd .. - -5.获取权重文件 -wget https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth - -6.获取benchmark工具 -将将benchmark.x86_64,benchmark.aarch64放在当前目录 - -7.310上执行,执行时确保device空闲 -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/opt/npu/ +环境准备: + +1.数据集路径 +数据集统一放在/root/datasets/或/opt/npu/ +本模型数据集放在/opt/npu/ + +2.进入工作目录 +cd fcn-8s + +3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +pip3.7 install -r requirements.txt + +4.获取、修改和安装模型代码 +git clone https://github.com/open-mmlab/mmcv.git +cd mmcv +pip3.7 install -e . +cd .. +git clone https://github.com/open-mmlab/mmsegmentation.git +cd mmsegmentation +pip3.7 install -e . # or "python3.7 setup.py develop" +cd .. + +5.获取权重文件 +wget https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth + +6.获取benchmark工具 +将将benchmark.x86_64,benchmark.aarch64放在当前目录 + +7.310上执行,执行时确保device空闲 +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/opt/npu/ diff --git a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/test/parse.py b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/test/parse.py index a0f253b055047b199b33d4b65cdc79177b6b250b..27eae0d0acf98687edd95f1f024cf77c49cd4dc4 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/test/parse.py +++ b/ACL_PyTorch/contrib/cv/segmentation/FCN-8s/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/Fast_SCNN_pth2onnx.py b/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/Fast_SCNN_pth2onnx.py index b8ec935d0bf060ca35689a979e729094afa81cc6..f30505c1ae8bca8d0620db5323d69db19712ffa2 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/Fast_SCNN_pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/Fast_SCNN_pth2onnx.py @@ -1,46 +1,46 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import torch -import torch.onnx -from collections import OrderedDict -import sys -sys.path.append('./SegmenTron') -from segmentron.models.model_zoo import get_segmentation_model -from segmentron.utils.options import parse_args -from segmentron.config import cfg -import ssl - - -def pth2onnx(): - model = get_segmentation_model() - checkpoint = torch.load(args.pth_path, map_location='cpu') - model.load_state_dict(checkpoint) - model.eval() - input_names = ["image"] - output_names = ["output1"] - dynamic_axes = {'image': {0: '-1'}, 'output1': {0: '-1'}} - dummy_input1 = torch.randn(args.batch_size, 3, 1024, 2048) - output_file1 = args.onnx_name + '.onnx' - torch.onnx.export(model, dummy_input1, output_file1, input_names = input_names, output_names = output_names, opset_version=11, verbose=True) - print(args.onnx_name,"batchsize",args.batch_size," onnx has transformed successfully") - print('onnx export done.') - -if __name__ == "__main__": - args = parse_args() - args.config_file = 'SegmenTron/configs/cityscapes_fast_scnn.yaml' - cfg.update_from_file(args.config_file) - cfg.update_from_list(args.opts) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch +import torch.onnx +from collections import OrderedDict +import sys +sys.path.append('./SegmenTron') +from segmentron.models.model_zoo import get_segmentation_model +from segmentron.utils.options import parse_args +from segmentron.config import cfg +import ssl + + +def pth2onnx(): + model = get_segmentation_model() + checkpoint = torch.load(args.pth_path, map_location='cpu') + model.load_state_dict(checkpoint) + model.eval() + input_names = ["image"] + output_names = ["output1"] + dynamic_axes = {'image': {0: '-1'}, 'output1': {0: '-1'}} + dummy_input1 = torch.randn(args.batch_size, 3, 1024, 2048) + output_file1 = args.onnx_name + '.onnx' + torch.onnx.export(model, dummy_input1, output_file1, input_names = input_names, output_names = output_names, opset_version=11, verbose=True) + print(args.onnx_name,"batchsize",args.batch_size," onnx has transformed successfully") + print('onnx export done.') + +if __name__ == "__main__": + args = parse_args() + args.config_file = 'SegmenTron/configs/cityscapes_fast_scnn.yaml' + cfg.update_from_file(args.config_file) + cfg.update_from_list(args.opts) pth2onnx() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/README.md b/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/README.md index ddff64a5bc6f2355a460e676f974080946f4b514..ce5890527fd2a50d9fb3cd3c84eb407ed9f3ebd3 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/README.md +++ b/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/README.md @@ -1,369 +1,369 @@ -# FastSCNN推理说明 - -## 1 模型概述 - -- **[论文地址](https://arxiv.org/abs/1902.04502)** -- **[代码地址](https://gitee.com/wang-chaojiemayj/modelzoo/tree/master/contrib/PyTorch/Research/cv/image_segmentation/FastSCNN)** - -### 1.1 论文地址 - -[FastSCNN论文](https://arxiv.org/abs/1902.04502) - -### 1.2 代码地址 - -[FascSCNN代码](https://gitee.com/wang-chaojiemayj/modelzoo/tree/master/contrib/PyTorch/Research/cv/image_segmentation/FastSCNN) - -branch:master - -commitid:e86409484cf89467a569be43acee1b3f06b92305 - - -## 2 环境说明 - -- 深度学习框架 -- python第三方库 - -### 2.1 深度学习框架 - -``` -python3.7.5 -CANN 5.0.2 - -pytorch >= 1.5.0 -torchvision >= 0.6.0 -onnx == 1.7.0 -onnx-simplifier == 0.3.6 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.21.1 -Pillow == 8.3.0 -opencv-python == 4.5.2.54 -``` - -## 3 模型转换 - -- pth转om模型 - -### 3.1 pth转om模型 - -1.获取pth权重文件 - -获取权重文件方法,可从Ascend modelzoo FastSCNN_ACL_Pytorch 模型压缩包获取 - -md5sum:efc7247270298f3f57e88375011b52ee - -2.FastSCNN模型已经上传至代码仓,使用git工具获取FastSCNN模型代码 -使用gitclone获取模型训练的代码,切换到tuili分支。 - -``` -git clone https://gitee.com/wang-chaojiemayj/modelzoo.git -cd modelzoo -git checkout tuili -``` - -进入FastSCNN目录 - -``` -cd ./contrib/ACL_PyTorch/Research/cv/segmentation/FastSCNN -``` - -使用gitclone下载模型代码 - -``` -git clone https://github.com/LikeLy-Journey/SegmenTron -``` - -由于onnx不支持AdaptiveAvgPool算子,需要使用module.patch修改module.py。 -将FastSCNN目录下的module.patch放到FastSCNN/SegmenTron目录下 -执行 - -``` -cd ./SegmenTron -git apply module.patch -cd .. -``` - -3.执行pth2om脚本,生成om模型文件 - -ascend-toolkit版本:5.0.2 -``` -bs1: -python3.7 Fast_SCNN_pth2onnx.py --pth_path best_model.pth --onnx_name fast_scnn_bs1 --batch_size 1 -bs16: -python3.7 Fast_SCNN_pth2onnx.py --pth_path best_model.pth --onnx_name fast_scnn_bs16 --batch_size 16 - **bs4:bs16无法导出时使用 -python3.7 Fast_SCNN_pth2onnx.py --pth_path best_model.pth --onnx_name fast_scnn_bs4 --batch_size 4** -``` -参数说明: ---pth_path:pth权重文件的路径,可自行设置,默认值为best_model.pth; ---onnx_name:需要转出的onnx模型的名称,可自行设置,默认值为fast_scnn_bs1(由于本模型不支持动态batch,推荐在模型名后加后缀,如‘_bs1’,用以区分不同batch_size的onnx模型); ---batch_size:导出的onnx模型的batch_size,可自行设置,默认值为1。 - -onnx转出om - -bs1: -``` -source env.sh(注意,latest是一个软连接,请将服务器中的/usr/local/Ascend/ascend-toolkit/latest 指向5.0.2版本的CANN包) -atc --framework=5 --model=fast_scnn_bs1.onnx --output=fast_scnn_bs1 --output_type=FP16 --input_format=NCHW --input_shape="image:1,3,1024,2048" --log=debug --soc_version=Ascend310 -``` -bs16: -``` -source env.sh -atc --framework=5 --model=fast_scnn_bs16.onnx --output=fast_scnn_bs16 --output_type=FP16 --input_format=NCHW --input_shape="image:16,3,1024,2048" --log=debug --soc_version=Ascend310 -``` -bs4:(bs16无法离线推理时使用) -``` -source env.sh -atc --framework=5 --model=fast_scnn_bs4.onnx --output=fast_scnn_bs4 --output_type=FP16 --input_format=NCHW --input_shape="image:4,3,1024,2048" --log=debug --soc_version=Ascend310 -``` - - -## 4 数据集预处理 - -- 数据集获取 -- 数据预处理 -- 生成数据集信息文件 - -### 4.1 数据集获取 - -本模型支持cityscapes leftImg8bit的500张验证集。用户需要下载[leftImg8bit_trainvaltest.zip](http://www.cityscapes-dataset.com/downloads)和[gtFine_trainvaltest.zip](http://www.cityscapes-dataset.com/downloads)数据集,解压,将两个数据集放在/opt/npu/datasets/cityscapes/目录下。推荐使用软连接,可以节省时间,数据集目录如下。 - -``` -|opt--npu--datasets -| |-- cityscapes -| | |-- gtFine -| | | |-- test -| | | |-- train -| | | |-- val -| | |-- leftImg8bit -| | |-- test -| | |-- train -| | |-- val -``` - - -### 4.2 数据集预处理 - -在modelzoo/contrib/ACL_PyTorch/Research /cv/segmentation/FastSCNN目录创建软连接 - -``` -ln -s /opt/npu/datasets datasets -``` - -运行Fast_SCNN_preprocess.py - -``` -python3.7 Fast_SCNN_preprocess.py -``` - -数据预处理的结果会保存在/opt/npu/prep_datset -预处理之后的二进制文件目录如下: -/opt/npu/prep_dataset/datasets/leftImg8bit/ -/opt/npu/prep_dataset/datasets/gtFine/ -在modelzoo/contrib/ACL_PyTorch/Research/cv/segmentation/FastSCNN目录下创建软连接 - -``` -ln -s /opt/npu/prep_dataset prep_dataset -``` - -### 4.3 生成数据集信息文件 - -1.生成数据集信息文件脚本gen_dataset_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 - -``` -python3.7 gen_dataset_info.py -``` - -## 5 离线推理 - -- benchmark工具概述 -- 离线推理 - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.1 推理benchmark工具用户指南 01 - -### 5.2 离线推理 - -1.设置环境变量 - -``` -source env.sh -``` - -2.执行离线推理 -bs1: -``` -./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=fast_scnn_bs1.om -input_text_path=./fast_scnn_prep_bin.info -input_width=2048 -input_height=1024 -output_binary=True -useDvpp=False./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=4 -om_path=fast_scnn_bs4.om -input_text_path=./fast_scnn_prep_bin.info -input_width=2048 -input_height=1024 -output_binary=True -useDvpp=False -``` -bs16: -``` -./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=16 -om_path=./fast_scnn_bs16.om -input_text_path=./fast_scnn_prep_bin.info -input_width=2048 -input_height=1024 -output_binary=True -useDvpp=False -``` -bs4:(bs16无法离线推理时使用) -``` -./benchmark.x86_64 -model_type=vision -device_id=2 -batch_size=4 -om_path=fast_scnn_bs4.om -input_text_path=fast_scnn_prep_bin.info -input_width=2048 -input_height=1024 -output_binary=True -useDvpp=False -``` -``` -参数说明: -需要更改的参数 --device_id:使用的Ascend310处理器的卡号,可选0、1、2、3,尽量选择不同的卡号进行推理,若-device_id=0,离线推理结果会保存在./result/dumpOut -put_device0中,device0中的0代表卡号是0; --batch_size:om模型的batch_size; --om_path: 需要进行离线推理的om模型的路径; -不需要更改的参数: --input_text_path:om模型的二进制输入图片的路径信息文件的路径; --input_width:输入图片的宽度,FastSCNN模型是2048; --input_heigh:输入图片的高度,FastSCNN模型是1024; --output_binary:benchmark的输出是二进制文件还是txt文件,True代表输出为二进制文件; --useDvpp:是否使用Dvpp工具,FastSCNN模型不使用Dvpp工具,设置为False; -``` -## 6 精度对比 - -- 离线推理精度 -- 开源精度 -- 开源精度对比 - -### 6.1 离线推理精度统计 - -后处理统计mIoU - -调用cityscapes_acc_eval.py脚本推理结果与label比对,获取pixAcc和mIoU数据,结果保存在fast_scnn_bs1.log和fast_scnn_bs4.log - -``` -python3.7 cityscapes_acc_eval.py result/dumpOutput_device0/ ./out >fast_scnn_bs1.log -python3.7 cityscapes_acc_eval.py result/dumpOutput_device1/ ./out >fast_scnn_bs4.log -``` - -第一个为benchmark输出目录,第二个为输出重定向文件名 - -``` -pixAcc:94.29% mIoU:64.43 -``` - -经过对bs1与bs4的om测试,本模型batch1的精度与batch4的精度一致,精度数据如上 - -### 6.2 开源精度 - -pth精度 - -``` -Model pixAcc mIoU -FastSCNN 93.877% 64.46 -``` - -### 6.3 精度对比 - -将得到的om模型离线推理精度与pth精度作比较,精度下降不超过0.5%,故精度达标 - -## 7 性能对比 - -- NPU性能数据 -- T4性能数据 -- 性能对比 - -### 7.1 npu性能数据 - -1.benchmark工具在整个数据集上推理获得性能数据。 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: - -``` -[e2e] throughputRate: 0.99593, latency: 502043[data read] throughputRate: 1.62003, moduleLatency: 617.273[preprocess] throughputRate: 1.20942, moduleLatency: 826.844[inference] throughputRate: 1.02697, Interface throughputRate: 5.5718, moduleLatency: 973.739[postprocess] throughputRate: 0.999452, moduleLatency: 1000.55 -``` - -Interface throughputRate: 5.5718,5.5718x4=22.286既是batch1 310单卡吞吐率 - -batch4的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_4_device_1.txt: - -``` -[e2e] throughputRate: 0.429745, latency: 1.16348e+06[data read] throughputRate: 0.673692, moduleLatency: 1484.36[preprocess] throughputRate: 0.525523, moduleLatency: 1902.86[inference] throughputRate: 0.477698, Interface throughputRate: 5.59273, moduleLatency: 2100.2[postprocess] throughputRate: 0.107216, moduleLatency: 9327 -``` - -Interface throughputRate: 5.59273,5.59273x4=22.37092既是batch16 310单卡吞吐率 - - -### 7.2 T4性能数据 - -在装有T4卡的服务器上测试gpu性能,测试过程请确保卡没有运行其他任务,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2 -batch1性能: - -``` -trtexec --onnx=fast_scnn_bs1.onnx --fp16 --shapes=image:1x3x1024x2048 --threads -``` - -``` -[07/20/2021-01:49:12] [I] GPU Compute[07/20/2021-01:49:12] [I] min: 6.1626 ms[07/20/2021-01:49:12] [I] max: 6.18018 ms[07/20/2021-01:49:12] [I] mean: 6.17022 ms[07/20/2021-01:49:12] [I] median: 6.17062 ms[07/20/2021-01:49:12] [I] percentile: 6.18018 ms at 99%[07/20/2021-01:49:12] [I] total compute time: 0.265319 s -``` - -batch1 t4单卡吞吐率:1000/(6.17022/1)=162.068fps - -batch4性能: - -``` -trtexec --onnx=fast_scnn_bs4.onnx --fp16 --shapes=image:4x3x1024x2048 --threads -``` - -``` -[08/25/2021-05:18:21] [I] GPU Compute[08/25/2021-05:18:21] [I] min: 23.7666 ms[08/25/2021-05:18:21] [I] max: 24.3643 ms[08/25/2021-05:18:21] [I] mean: 24.0295 ms[08/25/2021-05:18:21] [I] median: 23.9731 ms[08/25/2021-05:18:21] [I] percentile: 24.3643 ms at 99%[08/25/2021-05:18:21] [I] total compute time: 0.288354 s -``` - -batch4 t4单卡吞吐率:1000/(24.0295/4)=166.46fps - -### 7.3 性能对比 - -batch1:5.5718x4 < 1000/(6.17022/1) -batch2:5.59273x4 <1000/(24.0295/4) -310单个device的吞吐率乘4即单卡吞吐率比T4单卡的吞吐率小,故310性能低于T4性能,性能不达标。 -对于batch1与batch4,310性能均低于T4性能,该模型放在contrib/ACL_PyTorch/Research目录下。 -**性能优化:** - -测试版本:CANN 5.0.2 - -目前可行的解决方案有三个: - -(1)优化TransData,修改five_2_four.py和four_2_five.py - -(2)输出节点由float32改为float16 - -(3)模型中Resize节点的mode由双线性为最近邻 - -具体优化方法如下: - -(1)修改five_2_four.py和four_2_five.py从profiling数据的op_statistic_0_1.csv看出影响性能的是TransData,ResizeBilinearV2D,AvgPoolV2算子。从op_summary_0_1.csv可以看出单个TransData的aicore耗时,确定可以可以优化。 - -``` -five_2_four.py:9928 修改如下: elif dst_format.lower() == "nchw" and dst_shape in [[2560, 512, 4, 26], [2560, 512, 1, 26], [2560, 256, 8, 25],[16, 240, 7, 7], [16, 120, 14, 14], [1, 128, 32, 64], [1, 19, 1024, 2048], [2, 128, 32, 64], [2, 19, 1024, 2048]]: -``` - -``` - four_2_five.py:1219 修改如下: if src_format.upper() == "NCHW" and shape_input in [[16, 240, 7, 7], [16, 120, 14, 14],[1, 1, 1024, 2048, 16], [1, 8, 32, 64, 16], [2, 1, 1024, 2048, 16], [2, 8, 32, 64, 16]] and dtype_input == "float16": -``` - -(2)指定输出为fp16: - -``` -atc --framework=5 --model=fast_scnn_bs1_sim.onnx --output=fast_scnn_bs1 --output_type=FP16 --input_format=NCHW --input_shape="image:1,3,1024,2048" --log=debug --soc_version=Ascend310python3.7.5 -m onnxsim --input-shape="2,3,1024,2048" fast_scnn_bs2.onnx fast_scnn_bs2_sim.onnx --skip-optimizationatc --framework=5 --model=fast_scnn_bs2_sim.onnx --output=fast_scnn_bs2 --output_type=FP16 --input_format=NCHW --input_shape="image:2,3,1024,2048" --log=debug --soc_version=Ascend310 -``` - -(3)模型中Resize节点的mode由双线性为最近邻 - -``` -newnode229 = onnx.helper.make_node( 'Resize', name='Resize_229', inputs=['549', '560', '561', '559'], outputs=['562'], coordinate_transformation_mode='align_corners', cubic_coeff_a=-0.75, mode='nearest', nearest_mode='floor')newnode245 = onnx.helper.make_node( 'Resize', name='Resize_245', inputs=['566', '577', '578', '576'], outputs=['579'], coordinate_transformation_mode='align_corners', cubic_coeff_a=-0.75, mode='nearest', nearest_mode='floor')graph.node.remove(model.graph.node[126])graph.node.insert(126,newnode126)graph.node.remove(model.graph.node[144])graph.node.insert(144,newnode144)graph.node.remove(model.graph.node[162])graph.node.insert(162,newnode162)graph.node.remove(model.graph.node[180])graph.node.insert(180,newnode180)graph.node.remove(model.graph.node[185])graph.node.insert(185,newnode185)graph.node.remove(model.graph.node[213])graph.node.insert(213,newnode213)graph.node.remove(model.graph.node[229])graph.node.insert(229,newnode229)graph.node.remove(model.graph.node[245])graph.node.insert(245,newnode245)onnx.checker.check_model(model)onnx.save(model, 'bs1_resized.onnx') -``` - -(4)性能、精度统计 - -| 方法 | 精度 | 性能 | -| ------------------------------------------------- | --------------------------- | --------------------------- | -| 未优化 | | | -| 优化TransData,修改five_2_four.py和four_2_five.py | bs1:mIoU64.46;bs2:mIoU64.46 | bs1:4.135fps;bs2:6.265fps | -| 输出节点由float32改为float16 | bs1:mIoU64.43;bs2:mIoU64.43 | bs1:22.518fps;bs2:22.694fps | -| 模型中Resize节点的mode由双线性为最近邻 | bs1:mIoU60.41;bs1:mIoU60.41 | bs1:7.747fps;bs2:14.046fps | - -8.本模型经过指定输出结点为fp16后,精度为64.43,pth精度为64.46,精度达标;性能提高到22fps左右,故本次pr提交的模型输出为结点fp16。 - +# FastSCNN推理说明 + +## 1 模型概述 + +- **[论文地址](https://arxiv.org/abs/1902.04502)** +- **[代码地址](https://gitee.com/wang-chaojiemayj/modelzoo/tree/master/contrib/PyTorch/Research/cv/image_segmentation/FastSCNN)** + +### 1.1 论文地址 + +[FastSCNN论文](https://arxiv.org/abs/1902.04502) + +### 1.2 代码地址 + +[FascSCNN代码](https://gitee.com/wang-chaojiemayj/modelzoo/tree/master/contrib/PyTorch/Research/cv/image_segmentation/FastSCNN) + +branch:master + +commitid:e86409484cf89467a569be43acee1b3f06b92305 + + +## 2 环境说明 + +- 深度学习框架 +- python第三方库 + +### 2.1 深度学习框架 + +``` +python3.7.5 +CANN 5.0.2 + +pytorch >= 1.5.0 +torchvision >= 0.6.0 +onnx == 1.7.0 +onnx-simplifier == 0.3.6 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.21.1 +Pillow == 8.3.0 +opencv-python == 4.5.2.54 +``` + +## 3 模型转换 + +- pth转om模型 + +### 3.1 pth转om模型 + +1.获取pth权重文件 + +获取权重文件方法,可从Ascend modelzoo FastSCNN_ACL_Pytorch 模型压缩包获取 + +md5sum:efc7247270298f3f57e88375011b52ee + +2.FastSCNN模型已经上传至代码仓,使用git工具获取FastSCNN模型代码 +使用gitclone获取模型训练的代码,切换到tuili分支。 + +``` +git clone https://gitee.com/wang-chaojiemayj/modelzoo.git +cd modelzoo +git checkout tuili +``` + +进入FastSCNN目录 + +``` +cd ./contrib/ACL_PyTorch/Research/cv/segmentation/FastSCNN +``` + +使用gitclone下载模型代码 + +``` +git clone https://github.com/LikeLy-Journey/SegmenTron +``` + +由于onnx不支持AdaptiveAvgPool算子,需要使用module.patch修改module.py。 +将FastSCNN目录下的module.patch放到FastSCNN/SegmenTron目录下 +执行 + +``` +cd ./SegmenTron +git apply module.patch +cd .. +``` + +3.执行pth2om脚本,生成om模型文件 + +ascend-toolkit版本:5.0.2 +``` +bs1: +python3.7 Fast_SCNN_pth2onnx.py --pth_path best_model.pth --onnx_name fast_scnn_bs1 --batch_size 1 +bs16: +python3.7 Fast_SCNN_pth2onnx.py --pth_path best_model.pth --onnx_name fast_scnn_bs16 --batch_size 16 + **bs4:bs16无法导出时使用 +python3.7 Fast_SCNN_pth2onnx.py --pth_path best_model.pth --onnx_name fast_scnn_bs4 --batch_size 4** +``` +参数说明: +--pth_path:pth权重文件的路径,可自行设置,默认值为best_model.pth; +--onnx_name:需要转出的onnx模型的名称,可自行设置,默认值为fast_scnn_bs1(由于本模型不支持动态batch,推荐在模型名后加后缀,如‘_bs1’,用以区分不同batch_size的onnx模型); +--batch_size:导出的onnx模型的batch_size,可自行设置,默认值为1。 + +onnx转出om + +bs1: +``` +source env.sh(注意,latest是一个软连接,请将服务器中的/usr/local/Ascend/ascend-toolkit/latest 指向5.0.2版本的CANN包) +atc --framework=5 --model=fast_scnn_bs1.onnx --output=fast_scnn_bs1 --output_type=FP16 --input_format=NCHW --input_shape="image:1,3,1024,2048" --log=debug --soc_version=Ascend310 +``` +bs16: +``` +source env.sh +atc --framework=5 --model=fast_scnn_bs16.onnx --output=fast_scnn_bs16 --output_type=FP16 --input_format=NCHW --input_shape="image:16,3,1024,2048" --log=debug --soc_version=Ascend310 +``` +bs4:(bs16无法离线推理时使用) +``` +source env.sh +atc --framework=5 --model=fast_scnn_bs4.onnx --output=fast_scnn_bs4 --output_type=FP16 --input_format=NCHW --input_shape="image:4,3,1024,2048" --log=debug --soc_version=Ascend310 +``` + + +## 4 数据集预处理 + +- 数据集获取 +- 数据预处理 +- 生成数据集信息文件 + +### 4.1 数据集获取 + +本模型支持cityscapes leftImg8bit的500张验证集。用户需要下载[leftImg8bit_trainvaltest.zip](http://www.cityscapes-dataset.com/downloads)和[gtFine_trainvaltest.zip](http://www.cityscapes-dataset.com/downloads)数据集,解压,将两个数据集放在/opt/npu/datasets/cityscapes/目录下。推荐使用软连接,可以节省时间,数据集目录如下。 + +``` +|opt--npu--datasets +| |-- cityscapes +| | |-- gtFine +| | | |-- test +| | | |-- train +| | | |-- val +| | |-- leftImg8bit +| | |-- test +| | |-- train +| | |-- val +``` + + +### 4.2 数据集预处理 + +在modelzoo/contrib/ACL_PyTorch/Research /cv/segmentation/FastSCNN目录创建软连接 + +``` +ln -s /opt/npu/datasets datasets +``` + +运行Fast_SCNN_preprocess.py + +``` +python3.7 Fast_SCNN_preprocess.py +``` + +数据预处理的结果会保存在/opt/npu/prep_datset +预处理之后的二进制文件目录如下: +/opt/npu/prep_dataset/datasets/leftImg8bit/ +/opt/npu/prep_dataset/datasets/gtFine/ +在modelzoo/contrib/ACL_PyTorch/Research/cv/segmentation/FastSCNN目录下创建软连接 + +``` +ln -s /opt/npu/prep_dataset prep_dataset +``` + +### 4.3 生成数据集信息文件 + +1.生成数据集信息文件脚本gen_dataset_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 + +``` +python3.7 gen_dataset_info.py +``` + +## 5 离线推理 + +- benchmark工具概述 +- 离线推理 + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN 5.0.1 推理benchmark工具用户指南 01 + +### 5.2 离线推理 + +1.设置环境变量 + +``` +source env.sh +``` + +2.执行离线推理 +bs1: +``` +./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=fast_scnn_bs1.om -input_text_path=./fast_scnn_prep_bin.info -input_width=2048 -input_height=1024 -output_binary=True -useDvpp=False./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=4 -om_path=fast_scnn_bs4.om -input_text_path=./fast_scnn_prep_bin.info -input_width=2048 -input_height=1024 -output_binary=True -useDvpp=False +``` +bs16: +``` +./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=16 -om_path=./fast_scnn_bs16.om -input_text_path=./fast_scnn_prep_bin.info -input_width=2048 -input_height=1024 -output_binary=True -useDvpp=False +``` +bs4:(bs16无法离线推理时使用) +``` +./benchmark.x86_64 -model_type=vision -device_id=2 -batch_size=4 -om_path=fast_scnn_bs4.om -input_text_path=fast_scnn_prep_bin.info -input_width=2048 -input_height=1024 -output_binary=True -useDvpp=False +``` +``` +参数说明: +需要更改的参数 +-device_id:使用的Ascend310处理器的卡号,可选0、1、2、3,尽量选择不同的卡号进行推理,若-device_id=0,离线推理结果会保存在./result/dumpOut +put_device0中,device0中的0代表卡号是0; +-batch_size:om模型的batch_size; +-om_path: 需要进行离线推理的om模型的路径; +不需要更改的参数: +-input_text_path:om模型的二进制输入图片的路径信息文件的路径; +-input_width:输入图片的宽度,FastSCNN模型是2048; +-input_heigh:输入图片的高度,FastSCNN模型是1024; +-output_binary:benchmark的输出是二进制文件还是txt文件,True代表输出为二进制文件; +-useDvpp:是否使用Dvpp工具,FastSCNN模型不使用Dvpp工具,设置为False; +``` +## 6 精度对比 + +- 离线推理精度 +- 开源精度 +- 开源精度对比 + +### 6.1 离线推理精度统计 + +后处理统计mIoU + +调用cityscapes_acc_eval.py脚本推理结果与label比对,获取pixAcc和mIoU数据,结果保存在fast_scnn_bs1.log和fast_scnn_bs4.log + +``` +python3.7 cityscapes_acc_eval.py result/dumpOutput_device0/ ./out >fast_scnn_bs1.log +python3.7 cityscapes_acc_eval.py result/dumpOutput_device1/ ./out >fast_scnn_bs4.log +``` + +第一个为benchmark输出目录,第二个为输出重定向文件名 + +``` +pixAcc:94.29% mIoU:64.43 +``` + +经过对bs1与bs4的om测试,本模型batch1的精度与batch4的精度一致,精度数据如上 + +### 6.2 开源精度 + +pth精度 + +``` +Model pixAcc mIoU +FastSCNN 93.877% 64.46 +``` + +### 6.3 精度对比 + +将得到的om模型离线推理精度与pth精度作比较,精度下降不超过0.5%,故精度达标 + +## 7 性能对比 + +- NPU性能数据 +- T4性能数据 +- 性能对比 + +### 7.1 npu性能数据 + +1.benchmark工具在整个数据集上推理获得性能数据。 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: + +``` +[e2e] throughputRate: 0.99593, latency: 502043[data read] throughputRate: 1.62003, moduleLatency: 617.273[preprocess] throughputRate: 1.20942, moduleLatency: 826.844[inference] throughputRate: 1.02697, Interface throughputRate: 5.5718, moduleLatency: 973.739[postprocess] throughputRate: 0.999452, moduleLatency: 1000.55 +``` + +Interface throughputRate: 5.5718,5.5718x4=22.286既是batch1 310单卡吞吐率 + +batch4的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_4_device_1.txt: + +``` +[e2e] throughputRate: 0.429745, latency: 1.16348e+06[data read] throughputRate: 0.673692, moduleLatency: 1484.36[preprocess] throughputRate: 0.525523, moduleLatency: 1902.86[inference] throughputRate: 0.477698, Interface throughputRate: 5.59273, moduleLatency: 2100.2[postprocess] throughputRate: 0.107216, moduleLatency: 9327 +``` + +Interface throughputRate: 5.59273,5.59273x4=22.37092既是batch16 310单卡吞吐率 + + +### 7.2 T4性能数据 + +在装有T4卡的服务器上测试gpu性能,测试过程请确保卡没有运行其他任务,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2 +batch1性能: + +``` +trtexec --onnx=fast_scnn_bs1.onnx --fp16 --shapes=image:1x3x1024x2048 --threads +``` + +``` +[07/20/2021-01:49:12] [I] GPU Compute[07/20/2021-01:49:12] [I] min: 6.1626 ms[07/20/2021-01:49:12] [I] max: 6.18018 ms[07/20/2021-01:49:12] [I] mean: 6.17022 ms[07/20/2021-01:49:12] [I] median: 6.17062 ms[07/20/2021-01:49:12] [I] percentile: 6.18018 ms at 99%[07/20/2021-01:49:12] [I] total compute time: 0.265319 s +``` + +batch1 t4单卡吞吐率:1000/(6.17022/1)=162.068fps + +batch4性能: + +``` +trtexec --onnx=fast_scnn_bs4.onnx --fp16 --shapes=image:4x3x1024x2048 --threads +``` + +``` +[08/25/2021-05:18:21] [I] GPU Compute[08/25/2021-05:18:21] [I] min: 23.7666 ms[08/25/2021-05:18:21] [I] max: 24.3643 ms[08/25/2021-05:18:21] [I] mean: 24.0295 ms[08/25/2021-05:18:21] [I] median: 23.9731 ms[08/25/2021-05:18:21] [I] percentile: 24.3643 ms at 99%[08/25/2021-05:18:21] [I] total compute time: 0.288354 s +``` + +batch4 t4单卡吞吐率:1000/(24.0295/4)=166.46fps + +### 7.3 性能对比 + +batch1:5.5718x4 < 1000/(6.17022/1) +batch2:5.59273x4 <1000/(24.0295/4) +310单个device的吞吐率乘4即单卡吞吐率比T4单卡的吞吐率小,故310性能低于T4性能,性能不达标。 +对于batch1与batch4,310性能均低于T4性能,该模型放在contrib/ACL_PyTorch/Research目录下。 +**性能优化:** + +测试版本:CANN 5.0.2 + +目前可行的解决方案有三个: + +(1)优化TransData,修改five_2_four.py和four_2_five.py + +(2)输出节点由float32改为float16 + +(3)模型中Resize节点的mode由双线性为最近邻 + +具体优化方法如下: + +(1)修改five_2_four.py和four_2_five.py从profiling数据的op_statistic_0_1.csv看出影响性能的是TransData,ResizeBilinearV2D,AvgPoolV2算子。从op_summary_0_1.csv可以看出单个TransData的aicore耗时,确定可以可以优化。 + +``` +five_2_four.py:9928 修改如下: elif dst_format.lower() == "nchw" and dst_shape in [[2560, 512, 4, 26], [2560, 512, 1, 26], [2560, 256, 8, 25],[16, 240, 7, 7], [16, 120, 14, 14], [1, 128, 32, 64], [1, 19, 1024, 2048], [2, 128, 32, 64], [2, 19, 1024, 2048]]: +``` + +``` + four_2_five.py:1219 修改如下: if src_format.upper() == "NCHW" and shape_input in [[16, 240, 7, 7], [16, 120, 14, 14],[1, 1, 1024, 2048, 16], [1, 8, 32, 64, 16], [2, 1, 1024, 2048, 16], [2, 8, 32, 64, 16]] and dtype_input == "float16": +``` + +(2)指定输出为fp16: + +``` +atc --framework=5 --model=fast_scnn_bs1_sim.onnx --output=fast_scnn_bs1 --output_type=FP16 --input_format=NCHW --input_shape="image:1,3,1024,2048" --log=debug --soc_version=Ascend310python3.7.5 -m onnxsim --input-shape="2,3,1024,2048" fast_scnn_bs2.onnx fast_scnn_bs2_sim.onnx --skip-optimizationatc --framework=5 --model=fast_scnn_bs2_sim.onnx --output=fast_scnn_bs2 --output_type=FP16 --input_format=NCHW --input_shape="image:2,3,1024,2048" --log=debug --soc_version=Ascend310 +``` + +(3)模型中Resize节点的mode由双线性为最近邻 + +``` +newnode229 = onnx.helper.make_node( 'Resize', name='Resize_229', inputs=['549', '560', '561', '559'], outputs=['562'], coordinate_transformation_mode='align_corners', cubic_coeff_a=-0.75, mode='nearest', nearest_mode='floor')newnode245 = onnx.helper.make_node( 'Resize', name='Resize_245', inputs=['566', '577', '578', '576'], outputs=['579'], coordinate_transformation_mode='align_corners', cubic_coeff_a=-0.75, mode='nearest', nearest_mode='floor')graph.node.remove(model.graph.node[126])graph.node.insert(126,newnode126)graph.node.remove(model.graph.node[144])graph.node.insert(144,newnode144)graph.node.remove(model.graph.node[162])graph.node.insert(162,newnode162)graph.node.remove(model.graph.node[180])graph.node.insert(180,newnode180)graph.node.remove(model.graph.node[185])graph.node.insert(185,newnode185)graph.node.remove(model.graph.node[213])graph.node.insert(213,newnode213)graph.node.remove(model.graph.node[229])graph.node.insert(229,newnode229)graph.node.remove(model.graph.node[245])graph.node.insert(245,newnode245)onnx.checker.check_model(model)onnx.save(model, 'bs1_resized.onnx') +``` + +(4)性能、精度统计 + +| 方法 | 精度 | 性能 | +| ------------------------------------------------- | --------------------------- | --------------------------- | +| 未优化 | | | +| 优化TransData,修改five_2_four.py和four_2_five.py | bs1:mIoU64.46;bs2:mIoU64.46 | bs1:4.135fps;bs2:6.265fps | +| 输出节点由float32改为float16 | bs1:mIoU64.43;bs2:mIoU64.43 | bs1:22.518fps;bs2:22.694fps | +| 模型中Resize节点的mode由双线性为最近邻 | bs1:mIoU60.41;bs1:mIoU60.41 | bs1:7.747fps;bs2:14.046fps | + +8.本模型经过指定输出结点为fp16后,精度为64.43,pth精度为64.46,精度达标;性能提高到22fps左右,故本次pr提交的模型输出为结点fp16。 + diff --git a/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/gen_dataset_info.py index 2f15f923bab1cda87d483e2b60e38f41391d7cc6..cc42d73672f7e87d4ab4fa554d1cd567118b21f8 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/gen_dataset_info.py @@ -1,47 +1,47 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - - -def get_bin_info(img_root_path='prep_dataset/datasets/leftImg8bit', - mask_folder='opt/npu/prep_dataset/datasets/cityscapes/gtFine', - info_name='fast_scnn_prep_bin.info', width='2048', height='1024'): - img_path = [] - mask_path = [] - for root, _, files in os.walk(img_root_path): - for filename in files: - if filename.startswith('._'): - continue - if filename.endswith('.bin'): - imgpath = os.path.join(root, filename) - img_path.append(imgpath) - foldername = os.path.basename(os.path.dirname(imgpath)) - maskname = filename.replace('leftImg8bit', 'gtFine_labelIds') - maskpath = os.path.join(mask_folder, foldername, maskname) - mask_path.append(maskpath) - - with open(info_name, 'w') as fp: - for index in range(len(img_path)): - content = ' '.join([str(index), img_path[index], width, height]) - fp.write(content) - fp.write('\n') - - -if __name__ == '__main__': - img_root_path = 'prep_dataset/datasets/leftImg8bit/' - mask_folder = '/opt/npu/prep_dataset/datasets/gtFine' - info_name = 'fast_scnn_prep_bin.info' - width = '2048' - height = '1024' - get_bin_info(img_root_path=img_root_path, mask_folder=mask_folder, info_name=info_name, width=width, height=height) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + + +def get_bin_info(img_root_path='prep_dataset/datasets/leftImg8bit', + mask_folder='opt/npu/prep_dataset/datasets/cityscapes/gtFine', + info_name='fast_scnn_prep_bin.info', width='2048', height='1024'): + img_path = [] + mask_path = [] + for root, _, files in os.walk(img_root_path): + for filename in files: + if filename.startswith('._'): + continue + if filename.endswith('.bin'): + imgpath = os.path.join(root, filename) + img_path.append(imgpath) + foldername = os.path.basename(os.path.dirname(imgpath)) + maskname = filename.replace('leftImg8bit', 'gtFine_labelIds') + maskpath = os.path.join(mask_folder, foldername, maskname) + mask_path.append(maskpath) + + with open(info_name, 'w') as fp: + for index in range(len(img_path)): + content = ' '.join([str(index), img_path[index], width, height]) + fp.write(content) + fp.write('\n') + + +if __name__ == '__main__': + img_root_path = 'prep_dataset/datasets/leftImg8bit/' + mask_folder = '/opt/npu/prep_dataset/datasets/gtFine' + info_name = 'fast_scnn_prep_bin.info' + width = '2048' + height = '1024' + get_bin_info(img_root_path=img_root_path, mask_folder=mask_folder, info_name=info_name, width=width, height=height) diff --git a/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/test/README.md b/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/test/README.md index 94ddb7cc6c038ad7fb5c6311a72d728c4a5b87b5..bc1740a35c7206b0f21e461ef01ff655ab328be2 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/test/README.md +++ b/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/test/README.md @@ -1,120 +1,120 @@ -环境准备: - -1.数据集路径 -本模型数据集放在/opt/npu/,具体文件路径为:opt/npu/datasets -本模型支持cityscapes leftImg8bit的500张验证集。用户需要下载[leftImg8bit_trainvaltest.zip](http://www.cityscapes-dataset.com/downloads)和[gtFine_trainvaltest.zip](http://www.cityscapes-dataset.com/downloads)数据集,解压,将两个数据集放在/opt/npu/datasets/cityscapes/目录下。推荐使用软连接,可以节省时间,数据集目录如下。 - -``` -|opt--npu--datasets -| |-- cityscapes -| | |-- gtFine -| | | |-- test -| | | |-- train -| | | |-- val -| | |-- leftImg8bit -| | |-- test -| | |-- train -| | |-- val -``` - -2.进入工作目录 -cd Fast_SCNN - -3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -pip3.7.5 install -r requirements.txt - -4.获取,修改与安装开源模型代码 -使用gitclone获取模型训练的代码,切换到tuili分支。 - -``` -git clone https://gitee.com/wang-chaojiemayj/modelzoo.git -cd modelzoo -git checkout tuili -``` - -进入FastSCNN目录 - -``` -cd ./contrib/PyTorch/Research/cv/image_segmentation/Fast_SCNN/ -``` - -使用gitclone下载模型代码 - -``` -git clone https://github.com/LikeLy-Journey/SegmenTron -``` - -由于onnx不支持AdaptiveavgPool算子,需要使用module.patch修改Fast_SCNN/SegmenTron/module.py。 -将FastSCNN目录下的module.patch放到FastSCNN/SegmenTron目录下. -执行 - -``` -cd ./SegmenTron -git apply module.patch -cd .. -``` - -5.获取权重文件 -获取权重文件方法,可从Ascend modelzoo FastSCNN_ACL_Pytorch 模型压缩包获取 - - md5sum:efc7247270298f3f57e88375011b52ee - -6.数据预处理 -在modelzoo/contrib/ACL_PyTorch/Research /cv/segmentation/FastSCNN目录创建软连接 - -``` -ln -s /opt/npu/datasets datasets -``` - -运行Fast_SCNN_preprocess.py - -``` -python3.7.5 Fast_SCNN_preprocess.py -``` - -数据预处理的结果会保存在/opt/npu/prep_datset -预处理之后的二进制文件目录如下: -/opt/npu/prep_dataset/datasets/leftImg8bit/ -/opt/npu/prep_dataset/datasets/gtFine/ -在modelzoo/contrib/ACL_PyTorch/Research /cv/segmentation/FastSCNN目录下创建软连接 - -``` -ln -s /opt/npu/prep_dataset prep_dataset -``` - -运行gen_dataset_info.py获取二进制输入文件的info信息 - -``` -python3.7.5 gen_dataset_info.py -``` - -顺利运行会在当前目录下生成fast_scnn_prep_bin.info文件 - -6.获取benchmark工具 -将benchmark.x86_64和benchmark.aarch64放在当前目录 - -7.310上执行,执行时确保device空闲 - -ascend-toolkit版本:5.0.2 - -onnx转出om - -``` -source env.sh(注意,latest是一个软连接,请将服务器中的/usr/local/Ascend/ascend-toolkit/latest 指向5.0.2版本的CANN包) -bash test/pth2om.sh -成功运行会生成fast_scnn_bs1.onnx,fast_scnn_bs4.onnx,fast_scnn_bs8.onnx,fast_scnn_bs16.onnx,fast_scnn_bs32.onnx; -fast_scnn_bs1.om,fast_scnn_bs4.om,fast_scnn_bs8.om,fast_scnn_bs16.om,fast_scnn_bs32.om文件。 -(注意fast_scnn_bs32.onnx如果因为内存原因无法生成,也就无法导出fast_scnn_bs32.om。) -``` - -进行离线推理并进行精度、性能统计 - -``` -bash test/eval_acc_perf.sh -``` -会自动对fast_scnn_bs1.om、fast_scnn_bs16.om、fast_scnn_bs4.om进行精度、性能的统计。(fast_scnn_bs16.om可能会因为内存原因无法进行离线推理,运行报错后会自动跳过) - -8.在t4环境上将fast_scnn_bs1.onnx,fast_scnn_bs4.onnx,fast_scnn_bs8.onnx,fast_scnn_bs16.onnx,fast_scnn_bs32.onnx文件文件与perf_t4.sh放在同一目录 - -然后执行bash perf_t4.sh,执行时确保gpu空闲 - +环境准备: + +1.数据集路径 +本模型数据集放在/opt/npu/,具体文件路径为:opt/npu/datasets +本模型支持cityscapes leftImg8bit的500张验证集。用户需要下载[leftImg8bit_trainvaltest.zip](http://www.cityscapes-dataset.com/downloads)和[gtFine_trainvaltest.zip](http://www.cityscapes-dataset.com/downloads)数据集,解压,将两个数据集放在/opt/npu/datasets/cityscapes/目录下。推荐使用软连接,可以节省时间,数据集目录如下。 + +``` +|opt--npu--datasets +| |-- cityscapes +| | |-- gtFine +| | | |-- test +| | | |-- train +| | | |-- val +| | |-- leftImg8bit +| | |-- test +| | |-- train +| | |-- val +``` + +2.进入工作目录 +cd Fast_SCNN + +3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +pip3.7.5 install -r requirements.txt + +4.获取,修改与安装开源模型代码 +使用gitclone获取模型训练的代码,切换到tuili分支。 + +``` +git clone https://gitee.com/wang-chaojiemayj/modelzoo.git +cd modelzoo +git checkout tuili +``` + +进入FastSCNN目录 + +``` +cd ./contrib/PyTorch/Research/cv/image_segmentation/Fast_SCNN/ +``` + +使用gitclone下载模型代码 + +``` +git clone https://github.com/LikeLy-Journey/SegmenTron +``` + +由于onnx不支持AdaptiveavgPool算子,需要使用module.patch修改Fast_SCNN/SegmenTron/module.py。 +将FastSCNN目录下的module.patch放到FastSCNN/SegmenTron目录下. +执行 + +``` +cd ./SegmenTron +git apply module.patch +cd .. +``` + +5.获取权重文件 +获取权重文件方法,可从Ascend modelzoo FastSCNN_ACL_Pytorch 模型压缩包获取 + + md5sum:efc7247270298f3f57e88375011b52ee + +6.数据预处理 +在modelzoo/contrib/ACL_PyTorch/Research /cv/segmentation/FastSCNN目录创建软连接 + +``` +ln -s /opt/npu/datasets datasets +``` + +运行Fast_SCNN_preprocess.py + +``` +python3.7.5 Fast_SCNN_preprocess.py +``` + +数据预处理的结果会保存在/opt/npu/prep_datset +预处理之后的二进制文件目录如下: +/opt/npu/prep_dataset/datasets/leftImg8bit/ +/opt/npu/prep_dataset/datasets/gtFine/ +在modelzoo/contrib/ACL_PyTorch/Research /cv/segmentation/FastSCNN目录下创建软连接 + +``` +ln -s /opt/npu/prep_dataset prep_dataset +``` + +运行gen_dataset_info.py获取二进制输入文件的info信息 + +``` +python3.7.5 gen_dataset_info.py +``` + +顺利运行会在当前目录下生成fast_scnn_prep_bin.info文件 + +6.获取benchmark工具 +将benchmark.x86_64和benchmark.aarch64放在当前目录 + +7.310上执行,执行时确保device空闲 + +ascend-toolkit版本:5.0.2 + +onnx转出om + +``` +source env.sh(注意,latest是一个软连接,请将服务器中的/usr/local/Ascend/ascend-toolkit/latest 指向5.0.2版本的CANN包) +bash test/pth2om.sh +成功运行会生成fast_scnn_bs1.onnx,fast_scnn_bs4.onnx,fast_scnn_bs8.onnx,fast_scnn_bs16.onnx,fast_scnn_bs32.onnx; +fast_scnn_bs1.om,fast_scnn_bs4.om,fast_scnn_bs8.om,fast_scnn_bs16.om,fast_scnn_bs32.om文件。 +(注意fast_scnn_bs32.onnx如果因为内存原因无法生成,也就无法导出fast_scnn_bs32.om。) +``` + +进行离线推理并进行精度、性能统计 + +``` +bash test/eval_acc_perf.sh +``` +会自动对fast_scnn_bs1.om、fast_scnn_bs16.om、fast_scnn_bs4.om进行精度、性能的统计。(fast_scnn_bs16.om可能会因为内存原因无法进行离线推理,运行报错后会自动跳过) + +8.在t4环境上将fast_scnn_bs1.onnx,fast_scnn_bs4.onnx,fast_scnn_bs8.onnx,fast_scnn_bs16.onnx,fast_scnn_bs32.onnx文件文件与perf_t4.sh放在同一目录 + +然后执行bash perf_t4.sh,执行时确保gpu空闲 + diff --git a/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/test/parse.py b/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/test/parse.py index f7ae934e8d8cd05b323899cef81422d1d8c1881e..d432cbc56d44bbd33e28a1cd2db30fa662ea9bb8 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/test/parse.py +++ b/ACL_PyTorch/contrib/cv/segmentation/FastSCNN/test/parse.py @@ -1,40 +1,40 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import re - -def get_acc(filename): - with open(filename, 'r') as f: - lines = f.readlines() - last_line = lines[-1] - mIoU = last_line.split(" ")[1] - pixAcc = last_line.split(" ")[8].replace('\n','') - print("mIoU:", mIoU, " pixAcc: ", pixAcc) - - -def get_perf(filename): - with open(filename, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 - print('310 bs{} fps:{}'.format(filename.split('_')[3], fps)) - -if __name__ == "__main__": - - filename = sys.argv[1] - - if filename.endswith(".log"): - get_acc(filename) - elif filename.endswith(".txt"): +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import re + +def get_acc(filename): + with open(filename, 'r') as f: + lines = f.readlines() + last_line = lines[-1] + mIoU = last_line.split(" ")[1] + pixAcc = last_line.split(" ")[8].replace('\n','') + print("mIoU:", mIoU, " pixAcc: ", pixAcc) + + +def get_perf(filename): + with open(filename, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 + print('310 bs{} fps:{}'.format(filename.split('_')[3], fps)) + +if __name__ == "__main__": + + filename = sys.argv[1] + + if filename.endswith(".log"): + get_acc(filename) + elif filename.endswith(".txt"): get_perf(filename) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/GCNet/README.md b/ACL_PyTorch/contrib/cv/segmentation/GCNet/README.md index 595ee6a8cb6050c650ca399e66377a4356101a32..3a1f420a261d571b878f03150b176505603cb46a 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/GCNet/README.md +++ b/ACL_PyTorch/contrib/cv/segmentation/GCNet/README.md @@ -1,159 +1,159 @@ -# GCNet模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - -``` -pip3.7 install -r requirements.txt -``` - - - -2.获取,修改与安装开源模型代码 - -``` -git clone https://github.com/open-mmlab/mmcv -cd mmcv -git reset --hard 643009e4458109cb88ba5e669eec61a5e54c83be -pip install -e . -cd .. -git clone https://github.com/open-mmlab/mmdetection -cd mmdetection -git reset --hard 6c1347d7c0fa220a7be99cb19d1a9e8b6cbf7544 -pip install -r requirements/build.txt -python setup.py develop -patch -p1 < GCNet.diff -``` - - - -3.获取权重文件 - -从[LINK](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet)中获取权重文件,将权重文件mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204-17235656.pth放到当前工作目录 (执行pth2onnx时会自动下载) - -4.数据集 - -使用COCO官网的coco2017的5千张验证集进行测试,请参考原始开源代码仓mmdetection中对公共数据集的设置 - -5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) -将benchmark.x86_64或benchmark.aarch64放到当前工作目录 - - - -## 2 模型转换 - -1.pth转onnx模型 - -``` -python tools/deployment/pytorch2onnx.py configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204-17235656.pth --output-file GCNet.onnx --input-img demo/demo.jpg --test-img tests/data/color.jpg --shape 800 1216 -``` - - - -2.onnx转om模型 - -``` -atc --framework=5 --model=GCNet.onnx --output=./GCNet_bs1 --input_shape="input:1,3,800,1216" --log=error --soc_version=Ascend310 -``` - - - -3.执行以下命令生成om模型文件 - -``` -bash test/pth2om.sh -``` - - - -## 3 离线推理 - -1.数据预处理 - -``` -python GCNet_preprocess.py --image_src_path=${datasets_path}/val2017 --bin_file_path=val2017_bin --model_input_height=800 --model_input_width=1216 -``` - - - -2.生成数据集信息文件 - -``` -python gen_dataset_info.py bin val2017_bin coco2017.info 1216 800 -python gen_dataset_info.py jpg val2017 coco2017_jpg.info -``` - - - -3.执行离线推理 - -``` -./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=1 -om_path=./GCNet_bs1.om -input_text_path=./coco2017.info -input_width=1216 -input_height=800 -output_binary=True -useDvpp=False -``` - - - -4.使用后处理脚本展示推理结果 - -``` -python GCNet_postprocess.py --bin_data_path=./result/dumpOutput_device1/ --test_annotation=coco2017_jpg.info --det_results_path=detection-results --annotations_path=annotations/instances_val2017.json --net_out_num=3 --net_input_height=800 --net_input_width=1216 -``` - - - -5.NPU精度测试 - -``` -python txt_to_json.py -python coco_eval.py -``` - - - -6.NPU性能测试 - -``` -./benchmark.x86_64 -round=20 -om_path=GCNet_bs1.om -device_id=1 -batch_size=1 -``` - - - -7.GPU性能测试 - -onnx包含自定义算子,因此不能使用开源TensorRT测试性能数据,故在T4机器上使用pth在线推理测试性能数据 - -测评T4精度与性能 - -``` -python tools/test.py configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py ./mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204-17235656.pth --eval bbox -python coco_eval.py -``` - - - -8.执行命令进行离线推理 - -``` -bash test/eval_acc_perf.sh -``` - - - - **评测结果:** - -| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| :-------: | :---------: | :-------------: | :------: | :------: | -| GCNet bs1 | mAP:0.613 | mAP:0.611 | 3.931fps | 8.144fps | - -备注: -1.GCNet的mmdetection实现不支持多batch。 - -2.onnx包含自定义算子,因此不能使用开源TensorRT测试性能数据,故在T4机器上使用pth在线推理测试性能数据。 - -说明: - -1.om推理box map50精度为0.611,T4推理box map50精度为0.613,精度下降在1个点之内,因此可视为精度达标。 - -2.batch1:2.036 * 4 fps > 3.931fps 即310单个device的吞吐率乘4即单卡吞吐率约为T4单卡的吞吐率2倍,故310性能高于T4性能,性能达标。 - +# GCNet模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + +``` +pip3.7 install -r requirements.txt +``` + + + +2.获取,修改与安装开源模型代码 + +``` +git clone https://github.com/open-mmlab/mmcv +cd mmcv +git reset --hard 643009e4458109cb88ba5e669eec61a5e54c83be +pip install -e . +cd .. +git clone https://github.com/open-mmlab/mmdetection +cd mmdetection +git reset --hard 6c1347d7c0fa220a7be99cb19d1a9e8b6cbf7544 +pip install -r requirements/build.txt +python setup.py develop +patch -p1 < GCNet.diff +``` + + + +3.获取权重文件 + +从[LINK](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet)中获取权重文件,将权重文件mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204-17235656.pth放到当前工作目录 (执行pth2onnx时会自动下载) + +4.数据集 + +使用COCO官网的coco2017的5千张验证集进行测试,请参考原始开源代码仓mmdetection中对公共数据集的设置 + +5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) +将benchmark.x86_64或benchmark.aarch64放到当前工作目录 + + + +## 2 模型转换 + +1.pth转onnx模型 + +``` +python tools/deployment/pytorch2onnx.py configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204-17235656.pth --output-file GCNet.onnx --input-img demo/demo.jpg --test-img tests/data/color.jpg --shape 800 1216 +``` + + + +2.onnx转om模型 + +``` +atc --framework=5 --model=GCNet.onnx --output=./GCNet_bs1 --input_shape="input:1,3,800,1216" --log=error --soc_version=Ascend310 +``` + + + +3.执行以下命令生成om模型文件 + +``` +bash test/pth2om.sh +``` + + + +## 3 离线推理 + +1.数据预处理 + +``` +python GCNet_preprocess.py --image_src_path=${datasets_path}/val2017 --bin_file_path=val2017_bin --model_input_height=800 --model_input_width=1216 +``` + + + +2.生成数据集信息文件 + +``` +python gen_dataset_info.py bin val2017_bin coco2017.info 1216 800 +python gen_dataset_info.py jpg val2017 coco2017_jpg.info +``` + + + +3.执行离线推理 + +``` +./benchmark.x86_64 -model_type=vision -device_id=1 -batch_size=1 -om_path=./GCNet_bs1.om -input_text_path=./coco2017.info -input_width=1216 -input_height=800 -output_binary=True -useDvpp=False +``` + + + +4.使用后处理脚本展示推理结果 + +``` +python GCNet_postprocess.py --bin_data_path=./result/dumpOutput_device1/ --test_annotation=coco2017_jpg.info --det_results_path=detection-results --annotations_path=annotations/instances_val2017.json --net_out_num=3 --net_input_height=800 --net_input_width=1216 +``` + + + +5.NPU精度测试 + +``` +python txt_to_json.py +python coco_eval.py +``` + + + +6.NPU性能测试 + +``` +./benchmark.x86_64 -round=20 -om_path=GCNet_bs1.om -device_id=1 -batch_size=1 +``` + + + +7.GPU性能测试 + +onnx包含自定义算子,因此不能使用开源TensorRT测试性能数据,故在T4机器上使用pth在线推理测试性能数据 + +测评T4精度与性能 + +``` +python tools/test.py configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py ./mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204-17235656.pth --eval bbox +python coco_eval.py +``` + + + +8.执行命令进行离线推理 + +``` +bash test/eval_acc_perf.sh +``` + + + + **评测结果:** + +| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| :-------: | :---------: | :-------------: | :------: | :------: | +| GCNet bs1 | mAP:0.613 | mAP:0.611 | 3.931fps | 8.144fps | + +备注: +1.GCNet的mmdetection实现不支持多batch。 + +2.onnx包含自定义算子,因此不能使用开源TensorRT测试性能数据,故在T4机器上使用pth在线推理测试性能数据。 + +说明: + +1.om推理box map50精度为0.611,T4推理box map50精度为0.613,精度下降在1个点之内,因此可视为精度达标。 + +2.batch1:2.036 * 4 fps > 3.931fps 即310单个device的吞吐率乘4即单卡吞吐率约为T4单卡的吞吐率2倍,故310性能高于T4性能,性能达标。 + diff --git a/ACL_PyTorch/contrib/cv/segmentation/IntraDA/gen_dataset_info.py b/ACL_PyTorch/contrib/cv/segmentation/IntraDA/gen_dataset_info.py index b3976a2b9eeee216128e36d2b3887c1cc4b04ac7..297cdca7e26d95dc314dfca176cc9308d10abd93 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/IntraDA/gen_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/segmentation/IntraDA/gen_dataset_info.py @@ -1,99 +1,99 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - print(bin_images) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_png_info(image_names, info_name): - with open(info_name, 'w') as file: - for image_name in image_names: - print(image_name) - if len(image_names) == 0: - continue - else: - for index, png in enumerate(image_name): - img_cv = cv2.imread(png) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), png, str(width), str(height)]) - file.write(content) - file.write('\n') - -def get_city_pairs(folder, split='train'): - def get_path_pairs(img_folder, mask_folder): - img_paths = [] - mask_paths = [] - for root, _, files in os.walk(img_folder): - for filename in files: - if filename.endswith('.png'): - imgpath = os.path.join(root, filename) - foldername = os.path.basename(os.path.dirname(imgpath)) - maskname = filename.replace('leftImg8bit', 'gtFine_labelIds') - maskpath = os.path.join(mask_folder, foldername, maskname) - if os.path.isfile(imgpath) and os.path.isfile(maskpath): - img_paths.append(imgpath) - mask_paths.append(maskpath) - else: - print('cannot find the mask or image:', imgpath, maskpath) - print('Found {} images in the folder {}'.format(len(img_paths), img_folder)) - return img_paths, mask_paths - - if split in ('train', 'val'): - # "./Cityscapes/leftImg8bit/train" or "./Cityscapes/leftImg8bit/val" - img_folder = os.path.join(folder, 'leftImg8bit/' + split) - # "./Cityscapes/gtFine/train" or "./Cityscapes/gtFine/val" - mask_folder = os.path.join(folder, 'gtFine/' + split) - # img_paths与mask_paths的顺序是一一对应的 - img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) - return img_paths, mask_paths - else: - assert split == 'trainval' - print('trainval set') - train_img_folder = os.path.join(folder, 'leftImg8bit/train') - train_mask_folder = os.path.join(folder, 'gtFine/train') - val_img_folder = os.path.join(folder, 'leftImg8bit/val') - val_mask_folder = os.path.join(folder, 'gtFine/val') - train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder) - val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder) - img_paths = train_img_paths + val_img_paths - mask_paths = train_mask_paths + val_mask_paths - - return img_paths, mask_paths - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - res = get_city_pairs(file_path) - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'png': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + print(bin_images) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_png_info(image_names, info_name): + with open(info_name, 'w') as file: + for image_name in image_names: + print(image_name) + if len(image_names) == 0: + continue + else: + for index, png in enumerate(image_name): + img_cv = cv2.imread(png) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), png, str(width), str(height)]) + file.write(content) + file.write('\n') + +def get_city_pairs(folder, split='train'): + def get_path_pairs(img_folder, mask_folder): + img_paths = [] + mask_paths = [] + for root, _, files in os.walk(img_folder): + for filename in files: + if filename.endswith('.png'): + imgpath = os.path.join(root, filename) + foldername = os.path.basename(os.path.dirname(imgpath)) + maskname = filename.replace('leftImg8bit', 'gtFine_labelIds') + maskpath = os.path.join(mask_folder, foldername, maskname) + if os.path.isfile(imgpath) and os.path.isfile(maskpath): + img_paths.append(imgpath) + mask_paths.append(maskpath) + else: + print('cannot find the mask or image:', imgpath, maskpath) + print('Found {} images in the folder {}'.format(len(img_paths), img_folder)) + return img_paths, mask_paths + + if split in ('train', 'val'): + # "./Cityscapes/leftImg8bit/train" or "./Cityscapes/leftImg8bit/val" + img_folder = os.path.join(folder, 'leftImg8bit/' + split) + # "./Cityscapes/gtFine/train" or "./Cityscapes/gtFine/val" + mask_folder = os.path.join(folder, 'gtFine/' + split) + # img_paths与mask_paths的顺序是一一对应的 + img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) + return img_paths, mask_paths + else: + assert split == 'trainval' + print('trainval set') + train_img_folder = os.path.join(folder, 'leftImg8bit/train') + train_mask_folder = os.path.join(folder, 'gtFine/train') + val_img_folder = os.path.join(folder, 'leftImg8bit/val') + val_mask_folder = os.path.join(folder, 'gtFine/val') + train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder) + val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder) + img_paths = train_img_paths + val_img_paths + mask_paths = train_mask_paths + val_mask_paths + + return img_paths, mask_paths + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + res = get_city_pairs(file_path) + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'png': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_png_info(res, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/IntraDA/intrada_postprocess.py b/ACL_PyTorch/contrib/cv/segmentation/IntraDA/intrada_postprocess.py index 2ca1ae5a6f1777d5b1befe01a4b8bfcd3791a678..3f3b79d13d4af5d352d4626c3ad1932629c1497d 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/IntraDA/intrada_postprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/IntraDA/intrada_postprocess.py @@ -1,178 +1,178 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import time -import sys -import torch -import numpy as np -import torch.nn as nn -import struct -from PIL import Image - -def fast_hist(a, b, n): - k = (a >= 0) & (a < n) - return np.bincount(n * a[k].astype(int) + b[k], minlength=n ** 2).reshape(n, n) - - -def per_class_iu(hist): - return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) - - -class Evaluator(object): - def __init__(self, cityscapes_path, folder_davinci_target, outdir): - - loc = "cpu" - self.device = torch.device(loc) - print("===device===:",self.device) - # get valid dataset images and targets - self.image_paths, self.mask_paths = _get_city_pairs(cityscapes_path, "val") - - self.annotation_file_path = folder_davinci_target - - self.outdir = outdir - - def eval(self): - - print("Start validation, Total sample: {:d}".format(len(self.image_paths))) - list_time = [] - - hist = np.zeros((19, 19)) - for i in range(len(self.image_paths)): - filename = os.path.basename(self.image_paths[i]) - annotation_file = os.path.join(self.annotation_file_path, filename.split('.')[0]) - - mask = Image.open(self.mask_paths[i]) # mask shape: (W,H) - mask = mask.resize((2048,1024),Image.NEAREST) - mask = self._mask_transform(mask) # mask shape: (H,w) - mask = mask.to(self.device) - - with torch.no_grad(): - start_time = time.time() - outputs = self.file2tensor(annotation_file).to(self.device) - end_time = time.time() - - outputs_ = outputs.numpy().squeeze().transpose(1, 2, 0) - outputs_ = np.argmax(outputs_, axis=2) - hist += fast_hist(mask.cpu().numpy().flatten(), outputs_.flatten(), 19) - inters_over_union_classes = per_class_iu(hist) - mIoU = np.nanmean(inters_over_union_classes) - step_time = end_time - start_time - - list_time.append(step_time) - - print("Sample: {:d}, mIoU: {:.3f}, time: {:.3f}s".format( - i + 1, mIoU * 100, step_time)) - - average_time = sum(list_time) / len(list_time) - print("Evaluate: Average mIoU: {:.3f}, Average time: {:.3f}" - .format(mIoU * 100, average_time)) - - def _mask_transform(self, mask): - mask = self._class_to_index(np.array(mask).astype('int32')) - return torch.LongTensor(np.array(mask).astype('int32')) - - def _class_to_index(self, mask): - # assert the value - values = np.unique(mask) - self._key = np.array([-1, -1, -1, -1, -1, -1, - -1, -1, 0, 1, -1, -1, - 2, 3, 4, -1, -1, -1, - 5, -1, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, - -1, -1, 16, 17, 18]) - self._mapping = np.array(range(-1, len(self._key) - 1)).astype('int32') - for value in values: - assert (value in self._mapping) - - index = np.digitize(mask.ravel(), self._mapping, right=True) - - return self._key[index].reshape(mask.shape) - - def file2tensor(self, annotation_file): - - filepath = annotation_file + '_2.bin' - size = os.path.getsize(filepath) - res = [] - L = int(size/4) - binfile = open(filepath, 'rb') - for i in range(L): - data = binfile.read(4) - num = struct.unpack('f', data) - res.append(num[0]) - binfile.close() - - dim_res = np.array(res).reshape(1,19,65,129) - tensor_res = torch.tensor(dim_res, dtype=torch.float32) - interp = nn.Upsample(size=(1024, 2048), mode='bilinear', align_corners=True) - tensor_res = interp(tensor_res) - print(filepath, tensor_res.dtype, tensor_res.shape) - - return tensor_res - - -def _get_city_pairs(folder, split='train'): - def get_path_pairs(img_folder, mask_folder): - img_paths = [] - mask_paths = [] - for root, _, files in os.walk(img_folder): - for filename in files: - if filename.endswith('.png'): - imgpath = os.path.join(root, filename) - foldername = os.path.basename(os.path.dirname(imgpath)) - maskname = filename.replace('leftImg8bit', 'gtFine_labelIds') - maskpath = os.path.join(mask_folder, foldername, maskname) - if os.path.isfile(imgpath) and os.path.isfile(maskpath): - img_paths.append(imgpath) - mask_paths.append(maskpath) - else: - print('cannot find the mask or image:', imgpath, maskpath) - print('Found {} images in the folder {}'.format(len(img_paths), img_folder)) - return img_paths, mask_paths - - if split in ('train', 'val'): - # "./Cityscapes/leftImg8bit/train" or "./Cityscapes/leftImg8bit/val" - img_folder = os.path.join(folder, 'leftImg8bit/' + split) - # "./Cityscapes/gtFine/train" or "./Cityscapes/gtFine/val" - mask_folder = os.path.join(folder, 'gtFine/' + split) - # img_paths mask_paths - img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) - return img_paths, mask_paths - return img_paths, mask_paths - - -if __name__ == '__main__': - - try: - # dataset file path - cityscapes_path = sys.argv[1] - # txt file path - folder_davinci_target = sys.argv[2] - # the path to store the results json path - outdir = sys.argv[3] - - except IndexError: - print("Stopped!") - exit(1) - - if not (os.path.exists(cityscapes_path)): - print("config file folder does not exist.") - if not (os.path.exists(folder_davinci_target)): - print("target file folder does not exist.") - if not (os.path.exists(outdir)): - print("output file folder does not exist.") - os.makedirs(outdir) - - evaluator = Evaluator(cityscapes_path, folder_davinci_target, outdir) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +import sys +import torch +import numpy as np +import torch.nn as nn +import struct +from PIL import Image + +def fast_hist(a, b, n): + k = (a >= 0) & (a < n) + return np.bincount(n * a[k].astype(int) + b[k], minlength=n ** 2).reshape(n, n) + + +def per_class_iu(hist): + return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) + + +class Evaluator(object): + def __init__(self, cityscapes_path, folder_davinci_target, outdir): + + loc = "cpu" + self.device = torch.device(loc) + print("===device===:",self.device) + # get valid dataset images and targets + self.image_paths, self.mask_paths = _get_city_pairs(cityscapes_path, "val") + + self.annotation_file_path = folder_davinci_target + + self.outdir = outdir + + def eval(self): + + print("Start validation, Total sample: {:d}".format(len(self.image_paths))) + list_time = [] + + hist = np.zeros((19, 19)) + for i in range(len(self.image_paths)): + filename = os.path.basename(self.image_paths[i]) + annotation_file = os.path.join(self.annotation_file_path, filename.split('.')[0]) + + mask = Image.open(self.mask_paths[i]) # mask shape: (W,H) + mask = mask.resize((2048,1024),Image.NEAREST) + mask = self._mask_transform(mask) # mask shape: (H,w) + mask = mask.to(self.device) + + with torch.no_grad(): + start_time = time.time() + outputs = self.file2tensor(annotation_file).to(self.device) + end_time = time.time() + + outputs_ = outputs.numpy().squeeze().transpose(1, 2, 0) + outputs_ = np.argmax(outputs_, axis=2) + hist += fast_hist(mask.cpu().numpy().flatten(), outputs_.flatten(), 19) + inters_over_union_classes = per_class_iu(hist) + mIoU = np.nanmean(inters_over_union_classes) + step_time = end_time - start_time + + list_time.append(step_time) + + print("Sample: {:d}, mIoU: {:.3f}, time: {:.3f}s".format( + i + 1, mIoU * 100, step_time)) + + average_time = sum(list_time) / len(list_time) + print("Evaluate: Average mIoU: {:.3f}, Average time: {:.3f}" + .format(mIoU * 100, average_time)) + + def _mask_transform(self, mask): + mask = self._class_to_index(np.array(mask).astype('int32')) + return torch.LongTensor(np.array(mask).astype('int32')) + + def _class_to_index(self, mask): + # assert the value + values = np.unique(mask) + self._key = np.array([-1, -1, -1, -1, -1, -1, + -1, -1, 0, 1, -1, -1, + 2, 3, 4, -1, -1, -1, + 5, -1, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, + -1, -1, 16, 17, 18]) + self._mapping = np.array(range(-1, len(self._key) - 1)).astype('int32') + for value in values: + assert (value in self._mapping) + + index = np.digitize(mask.ravel(), self._mapping, right=True) + + return self._key[index].reshape(mask.shape) + + def file2tensor(self, annotation_file): + + filepath = annotation_file + '_2.bin' + size = os.path.getsize(filepath) + res = [] + L = int(size/4) + binfile = open(filepath, 'rb') + for i in range(L): + data = binfile.read(4) + num = struct.unpack('f', data) + res.append(num[0]) + binfile.close() + + dim_res = np.array(res).reshape(1,19,65,129) + tensor_res = torch.tensor(dim_res, dtype=torch.float32) + interp = nn.Upsample(size=(1024, 2048), mode='bilinear', align_corners=True) + tensor_res = interp(tensor_res) + print(filepath, tensor_res.dtype, tensor_res.shape) + + return tensor_res + + +def _get_city_pairs(folder, split='train'): + def get_path_pairs(img_folder, mask_folder): + img_paths = [] + mask_paths = [] + for root, _, files in os.walk(img_folder): + for filename in files: + if filename.endswith('.png'): + imgpath = os.path.join(root, filename) + foldername = os.path.basename(os.path.dirname(imgpath)) + maskname = filename.replace('leftImg8bit', 'gtFine_labelIds') + maskpath = os.path.join(mask_folder, foldername, maskname) + if os.path.isfile(imgpath) and os.path.isfile(maskpath): + img_paths.append(imgpath) + mask_paths.append(maskpath) + else: + print('cannot find the mask or image:', imgpath, maskpath) + print('Found {} images in the folder {}'.format(len(img_paths), img_folder)) + return img_paths, mask_paths + + if split in ('train', 'val'): + # "./Cityscapes/leftImg8bit/train" or "./Cityscapes/leftImg8bit/val" + img_folder = os.path.join(folder, 'leftImg8bit/' + split) + # "./Cityscapes/gtFine/train" or "./Cityscapes/gtFine/val" + mask_folder = os.path.join(folder, 'gtFine/' + split) + # img_paths mask_paths + img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) + return img_paths, mask_paths + return img_paths, mask_paths + + +if __name__ == '__main__': + + try: + # dataset file path + cityscapes_path = sys.argv[1] + # txt file path + folder_davinci_target = sys.argv[2] + # the path to store the results json path + outdir = sys.argv[3] + + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(cityscapes_path)): + print("config file folder does not exist.") + if not (os.path.exists(folder_davinci_target)): + print("target file folder does not exist.") + if not (os.path.exists(outdir)): + print("output file folder does not exist.") + os.makedirs(outdir) + + evaluator = Evaluator(cityscapes_path, folder_davinci_target, outdir) evaluator.eval() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/IntraDA/intrada_preprocess.py b/ACL_PyTorch/contrib/cv/segmentation/IntraDA/intrada_preprocess.py index f3f814586ad11a7ec4a57a03da021d64c6322746..3f6cc3819e655d11594c272ab2ba54416b71692f 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/IntraDA/intrada_preprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/IntraDA/intrada_preprocess.py @@ -1,67 +1,67 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import torch -import sys -import numpy as np -from PIL import Image -from tqdm import tqdm -from torchvision import transforms -from torchvision.transforms.functional import InterpolationMode - - -def get_img_path(img_folder): - img_paths = [] - for root, dirs, files in os.walk(img_folder): - for f in files: - if f.endswith('.png'): - print(os.path.join(root, f)) - img_paths.append(os.path.join(root, f)) - return img_paths - -def flip(x, dim): - xsize = x.size() - dim = x.dim() + dim if dim < 0 else dim - x = x.view(-1, *xsize[dim:]) - x = x.view(x.size(0), x.size(1), -1)[:, getattr(torch.arange(x.size(1)-1, - -1, -1), ('cpu','cuda')[x.is_cuda])().long(), :] - return x.view(xsize) - -def _img_transform(image): - image_transform = transforms.Compose([ - transforms.Resize((512,1024),InterpolationMode.BICUBIC), - transforms.ToTensor()]) - image = image_transform(image) - image *= 255. - image = flip(image, 0) - image -= np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32).reshape(-1,1,1) - return image - -if __name__ == '__main__': - cityscapes_path = sys.argv[1] - bin_path = sys.argv[2] - if os.path.exists(bin_path) is False: - os.mkdir(bin_path) - - split = "val" - img_folder = os.path.join(cityscapes_path, 'leftImg8bit/' + split) - img_paths = get_img_path(img_folder) - - for i in tqdm(range(len(img_paths))): - filename = os.path.basename(img_paths[i]) - image = Image.open(img_paths[i]).convert('RGB') # image shape: (W,H,3) - image = _img_transform(image) # image shape: (3,H,W) [0,1] - image = torch.unsqueeze(image, 0) # image shape: (1,3,H,W) [0,1] - image = np.array(image).astype(np.float32) - image.tofile(os.path.join(bin_path, filename.split('.')[0] + '.bin')) # save bin +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import torch +import sys +import numpy as np +from PIL import Image +from tqdm import tqdm +from torchvision import transforms +from torchvision.transforms.functional import InterpolationMode + + +def get_img_path(img_folder): + img_paths = [] + for root, dirs, files in os.walk(img_folder): + for f in files: + if f.endswith('.png'): + print(os.path.join(root, f)) + img_paths.append(os.path.join(root, f)) + return img_paths + +def flip(x, dim): + xsize = x.size() + dim = x.dim() + dim if dim < 0 else dim + x = x.view(-1, *xsize[dim:]) + x = x.view(x.size(0), x.size(1), -1)[:, getattr(torch.arange(x.size(1)-1, + -1, -1), ('cpu','cuda')[x.is_cuda])().long(), :] + return x.view(xsize) + +def _img_transform(image): + image_transform = transforms.Compose([ + transforms.Resize((512,1024),InterpolationMode.BICUBIC), + transforms.ToTensor()]) + image = image_transform(image) + image *= 255. + image = flip(image, 0) + image -= np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32).reshape(-1,1,1) + return image + +if __name__ == '__main__': + cityscapes_path = sys.argv[1] + bin_path = sys.argv[2] + if os.path.exists(bin_path) is False: + os.mkdir(bin_path) + + split = "val" + img_folder = os.path.join(cityscapes_path, 'leftImg8bit/' + split) + img_paths = get_img_path(img_folder) + + for i in tqdm(range(len(img_paths))): + filename = os.path.basename(img_paths[i]) + image = Image.open(img_paths[i]).convert('RGB') # image shape: (W,H,3) + image = _img_transform(image) # image shape: (3,H,W) [0,1] + image = torch.unsqueeze(image, 0) # image shape: (1,3,H,W) [0,1] + image = np.array(image).astype(np.float32) + image.tofile(os.path.join(bin_path, filename.split('.')[0] + '.bin')) # save bin diff --git a/ACL_PyTorch/contrib/cv/segmentation/IntraDA/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/segmentation/IntraDA/modelzoo_level.txt index d44ba5698b045b8a30e107962f295dbc24585d8c..70801afc42b6d9eb5cdd98b5430d9b2101f3146a 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/IntraDA/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/IntraDA/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/IntraDA/requirements.txt b/ACL_PyTorch/contrib/cv/segmentation/IntraDA/requirements.txt index 7b6bc2994b56f8cda417ae90bd4dc54ad5fab3de..f83a7c426770dbc7746d9fa4124ccc73e5759622 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/IntraDA/requirements.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/IntraDA/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.8.0 -torchvision == 0.9.0 -onnx == 1.9.0 -numpy == 1.21.1 -opencv-python == 4.4.0.46 +torch == 1.8.0 +torchvision == 0.9.0 +onnx == 1.9.0 +numpy == 1.21.1 +opencv-python == 4.4.0.46 Pillow == 8.0.1 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/PSPnet/LICENSE b/ACL_PyTorch/contrib/cv/segmentation/PSPnet/LICENSE index 7615b4e85fa24d81f25cec1495e783a3936f1a86..72f817fb44de8b9fd23fe71230b9dc5ccbe4ca35 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/PSPnet/LICENSE +++ b/ACL_PyTorch/contrib/cv/segmentation/PSPnet/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/PSPnet/README.md b/ACL_PyTorch/contrib/cv/segmentation/PSPnet/README.md index 4f07558723f778c8175f8cfe47b36248e1be84ff..2e6abee6427a63fb1101b253631a0a09397ae7c7 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/PSPnet/README.md +++ b/ACL_PyTorch/contrib/cv/segmentation/PSPnet/README.md @@ -1,406 +1,406 @@ -# 基于开源mmsegmentation预训练的PSPnet Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理精度统计](#61-离线推理精度统计) - - [6.2 开源精度](#62-开源精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - [7.2 T4性能数据](#72-T4性能数据) - - [7.3 性能对比](#73-性能对比) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[PSPnet论文](https://arxiv.org/abs/1612.01105) -论文使用PPM(pyramid pooling module)和提出的PSPNet(pyramid scene parsing network),实现了通过融合different-region-based context获取全局context信息的能力。同时,PSPNet在多个数据集上实现了SOTA,取得ImageNet scene parsing challenge 2016、PASCAL VOC 2012 benchmark和Cityscapes benchmark的第1名。 - -### 1.2 代码地址 -[mmsegmentation框架PSPnet代码](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet) -branch:master commit_id:52b4fa5b9a3d65d0745d8bccb08ac0b88c9407fe - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -pytorch == 1.8.0 -torchvision == 0.9.0 -onnx == 1.9.0 -``` -**注意:** -> 转onnx的环境上pytorch需要安装1.8.0版本 - -### 2.2 python第三方库 -``` -numpy == 1.20.1 -opencv-python == 4.5.2.52 -``` - -**说明:** -> X86架构:opencv,pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:opencv,pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - - -### 3.1 pth转onnx模型 - -1.获取pth权重文件 -[PSPnet基于mmsegmentation预训练的npu权重文件](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth) -文件md5sum: c563f7683bab2a869fe095a9eb801f6c - -2.mmsegmentation源码安装 -```shell -pip3.7 install mmcv-full==1.3.10 -git clone https://github.com/open-mmlab/mmsegmentation.git -cd mmsegmentation -如果修改了模型代码,交付了{model_name}.diff -patch -p1 < ../{model_name}.diff -如果模型代码需要安装,则安装模型代码(如果没有安装脚本,pth2onnx等脚本需要引用模型代码的类或函数,可通过sys.path.append(r"./pytorch-nested-unet")添加搜索路径的方式) -pip3.7 install -e . # or "python3.7 setup.py develop" -cd .. -``` - - **说明:** -> 安装所需的依赖说明请参考mmsegmentation/docs/get_started.md - - -3.使用tools里的pytorch2onnx.py文件,运行如下命令,生成对应的onnx模型: -```shell -python3.7 mmsegmentation/tools/pytorch2onnx.py mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py --checkpoint pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth --output-file pspnet_r50-d8_512x512_20k_voc12aug.onnx --shape 500 500 -``` -4.通过onnx simplifier简化onnx模型 -```shell -python3.7 -m onnxsim --input-shape="1,3,500,500" pspnet_r50-d8_512x512_20k_voc12aug.onnx pspnet_r50-d8_512x512_20k_voc12aug_sim.onnx -``` - **模型转换要点:** -> 导出的onnx为固定batch1,不是动态batch。 - -### 3.2 onnx转om模型 - -1.设置环境变量 -```shell -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest/ -``` -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN 5.0.2 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373),如果存在多余输出节点,需要指定输出节点以去除无用输出,节点序号可能会因网络结构不同而不同,使用netron开源可视化工具查看具体的输出节点名: -生成bs1的om模型: -```shell -atc --framework=5 --model=pspnet_r50-d8_512x512_20k_voc12aug_sim.onnx --output=pspnet_r50-d8_512x512_20k_voc12aug_sim_bs1 --input_format=NCHW --input_shape=" input:1,3,500,500" --log=debug --soc_version=Ascend310 --input_fp16_nodes=input -``` -生成bs16的om模型: -```shell -atc --framework=5 --model=pspnet_r50-d8_512x512_20k_voc12aug_sim.onnx --output=pspnet_r50-d8_512x512_20k_voc12aug_sim_bs16 --input_format=NCHW --input_shape=" input:16,3,500,500" --log=debug --soc_version=Ascend310 --input_fp16_nodes=input -``` - **模型转换要点:** -> 通过input_fp16_nodes将输入的数据精度改为fp16,提升性能。 - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 数据集获取 -该模型使用[VOC2012官网](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html)的VOC2012的1449张验证集进行测试,图片与对应ground truth分别存放在/opt/npu/VOCdevkit/VOC2012/JPEGImages/与/opt/npu/VOCdevkit/VOC2012/SegmentationClass/。 - -### 4.2 数据集预处理 -1.预处理脚本mmsegmentation_voc2012_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 -```shell -python3.7 mmsegmentation_voc2012_preprocess.py --image_folder_path=/opt/npu/VOCdevkit/VOC2012/JPEGImages/ --split=/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt --bin_folder_path=./voc12_bin/ -``` -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本get_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 -```shell -python3.7 get_info.py bin ./voc12_bin voc12.info 500 500 -``` -第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN 5.0.2 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) - -### 5.2 离线推理 - -1.设置环境变量 -```shell -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest/ -``` -2.执行离线推理 -```shell -./benchmark.${arch} -model_type=vision -om_path=pspnet_r50-d8_512x512_20k_voc12aug_sim_fp16_bs1.om -device_id=0 -batch_size=1 -input_text_path=voc12.info -input_width=500 -input_height=500 -useDvpp=false -output_binary=true - -./benchmark.${arch} -model_type=vision -om_path=pspnet_r50-d8_512x512_20k_voc12aug_sim_fp16_bs16.om -device_id=1 -batch_size=16 -input_text_path=voc12.info -input_width=500 -input_height=500 -useDvpp=false -output_binary=true -``` - **注意:** -> onnx的输出是int64,但是om的输出是int32 - -输出结果默认保存在当前目录result/dumpOutput_device0,模型有一个输出,每个输入对应的输出对应_1.bin文件 -``` -输出 shape 数据类型 数据含义 -output1 1 * 1 * 500 * 500 int32 8位图像 -``` - -## 6 精度对比 - -- **[离线推理精度](#61-离线推理精度)** -- **[开源精度](#62-开源精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理精度统计 - -1.调用mmsegmentation_voc2012_postprocess.py评测bs1的mIoU精度: -```shell -python3.7 get_info.py jpg /opt/npu/VOCdevkit/VOC2012/JPEGImages/ voc12_jpg.info - -python3.7 mmsegmentation_voc2012_postprocess.py --bin_data_path=./result/dumpOutput_device0 --test_annotation=./voc12_jpg.info --img_dir=/opt/npu/VOCdevkit/VOC2012/JPEGImages --ann_dir=/opt/npu/VOCdevkit/VOC2012/SegmentationClass --split=/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt --net_input_width=500 --net_input_height=500 -``` -第一个参数为benchmark推理结果,第二个为原始图片信息文件,第三个为原始图片位置,第四个为验证图片位置,第五个图片的split,第六七个为网宽高 -执行完后会打印出精度: -``` -per class results: - -+-------------+-------+-------+ -| Class | IoU | Acc | -+-------------+-------+-------+ -| background | 93.78 | 97.28 | -| aeroplane | 87.46 | 94.06 | -| bicycle | 41.32 | 88.9 | -| bird | 86.48 | 91.68 | -| boat | 70.01 | 83.3 | -| bottle | 76.2 | 84.19 | -| bus | 92.78 | 96.14 | -| car | 85.56 | 92.34 | -| cat | 91.47 | 96.61 | -| chair | 35.65 | 46.37 | -| cow | 89.62 | 93.35 | -| diningtable | 55.73 | 59.82 | -| dog | 86.24 | 92.88 | -| horse | 88.84 | 93.02 | -| motorbike | 83.75 | 92.17 | -| person | 83.81 | 91.12 | -| pottedplant | 60.77 | 67.82 | -| sheep | 87.55 | 91.34 | -| sofa | 49.2 | 59.29 | -| train | 85.96 | 91.59 | -| tvmonitor | 67.55 | 79.11 | -+-------------+-------+-------+ -Summary: - -+--------+-------+-------+-------+ -| Scope | mIoU | mAcc | aAcc | -+--------+-------+-------+-------+ -| global | 76.18 | 84.87 | 94.49 | -+--------+-------+-------+-------+ -``` - -2.调用mmsegmentation_voc2012_postprocess.py评测bs16的mIoU精度: -```shell -python3.7 get_info.py jpg /opt/npu/VOCdevkit/VOC2012/JPEGImages/ voc12_jpg.info - -python3.7 mmsegmentation_voc2012_postprocess.py --bin_data_path=./result/dumpOutput_device1 --test_annotation=./voc12_jpg.info --img_dir=/opt/npu/VOCdevkit/VOC2012/JPEGImages --ann_dir=/opt/npu/VOCdevkit/VOC2012/SegmentationClass --split=/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt --net_input_width=500 --net_input_height=500 -``` -第一个参数为benchmark推理结果,第二个为原始图片信息文件,第三个为原始图片位置,第四个为验证图片位置,第五个图片的split,第六七个为网宽高 -执行完后会打印出精度: -``` -per class results: - -+-------------+-------+-------+ -| Class | IoU | Acc | -+-------------+-------+-------+ -| background | 93.78 | 97.28 | -| aeroplane | 87.46 | 94.06 | -| bicycle | 41.32 | 88.9 | -| bird | 86.48 | 91.68 | -| boat | 70.01 | 83.3 | -| bottle | 76.2 | 84.19 | -| bus | 92.78 | 96.14 | -| car | 85.56 | 92.34 | -| cat | 91.47 | 96.61 | -| chair | 35.65 | 46.37 | -| cow | 89.62 | 93.35 | -| diningtable | 55.73 | 59.82 | -| dog | 86.24 | 92.88 | -| horse | 88.84 | 93.02 | -| motorbike | 83.75 | 92.17 | -| person | 83.81 | 91.12 | -| pottedplant | 60.77 | 67.82 | -| sheep | 87.55 | 91.34 | -| sofa | 49.2 | 59.29 | -| train | 85.96 | 91.59 | -| tvmonitor | 67.55 | 79.11 | -+-------------+-------+-------+ -Summary: - -+--------+-------+-------+-------+ -| Scope | mIoU | mAcc | aAcc | -+--------+-------+-------+-------+ -| global | 76.18 | 84.87 | 94.49 | -+--------+-------+-------+-------+ -``` - **精度调试:** -> 1.在线推理前处理图片是一定格式的动态分辨率,onnx将分辨率固定为512x512会导致精度下降些。 -> 2.分辨率在512x512时onnx离线推理的精度与om精度相同,分辨率改为500x500可以提升精度,使得mask的精度与开源相比更高 -> 3.单图调试 -> ``` -> python3.7 mmsegmentation/tools/test.py mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth --show -> python3.7 mmsegmentation/tools/pytorch2onnx.py mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py --checkpoint pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth --output-file pspnet_r50-d8_512x512_20k_voc12aug.onnx --shape 500 500 --input-img 2011_003103.jpg --show --verify -> ``` - - -### 6.2 开源精度 -[官网精度](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958.log.json) - -``` -{"mode": "val", "epoch": 31, "iter": 20000, "lr": 0.0001, "mIoU": 0.76778, "mAcc": 0.85529, "aAcc": 0.94787} -``` -### 6.3 精度对比 -om推理bs1和bs16的mIoU精度均为0.7618,开源mIoU精度为0.76778,om精度下降小于1%,精度达标 - - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** -- **[T4性能数据](#72-T4性能数据)** -- **[性能对比](#73-性能对比)** - -### 7.1 npu性能数据 -1.benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 -由于在线推理与onnx推理还不支持多batch,所以仅测om bs1,bs16的性能。 -1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: -``` -[e2e] throughputRate: 7.85666, latency: 184430 -[data read] throughputRate: 37.4296, moduleLatency: 26.7168 -[preprocess] throughputRate: 28.1654, moduleLatency: 35.5045 -[infer] throughputRate: 7.91227, Interface throughputRate: 8.19018, moduleLatency: 126.139 -[post] throughputRate: 7.91221, moduleLatency: 126.387 -``` -Interface throughputRate: 7.91221,7.91221x4=31.64884即是batch1 310单卡吞吐率 - -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: -``` -[e2e] throughputRate: 8.16118, latency: 177548 -[data read] throughputRate: 40.508, moduleLatency: 24.6865 -[preprocess] throughputRate: 29.1145, moduleLatency: 34.3472 -[infer] throughputRate: 8.21425, Interface throughputRate: 8.52684, moduleLatency: 121.508 -[post] throughputRate: 0.515815, moduleLatency: 1938.68 -``` -Interface throughputRate: 8.21425,8.21425x4=32.857即是batch16 310单卡吞吐率 - -2.npu纯推理性能 -batch1的性能,执行20次纯推理取均值,统计吞吐率与其倒数时延(benchmark的时延是单个数据的推理时间),npu性能是一个device执行的结果 -``` -./benchmark.x86_64 -round=20 -om_path=pspnet_r50-d8_512x512_20k_voc12aug_bs1.om -device_id=0 -batch_size=1 -``` -PureInfer_perf_of_pspnet_r50-d8_512x512_20k_voc12aug_bs1_in_device_0.txt: -``` -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_pspnet_r50-d8_512x512_20k_voc12aug_bs1_in_device_0.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 8.12674samples/s, ave_latency: 123.129ms ----------------------------------------------------------------- -``` - -batch6的性能,执行20次纯推理取均值,统计吞吐率与其倒数时延(benchmark的时延是单个数据的推理时间),npu性能是一个device执行的结果 -``` -./benchmark.x86_64 -round=20 -om_path=pspnet_r50-d8_512x512_20k_voc12aug_bs16.om -device_id=0 -batch_size=16 -``` -PureInfer_perf_of_pspnet_r50-d8_512x512_20k_voc12aug_bs16_in_device_0.txt: -``` -[INFO] PureInfer result saved in ./result/PureInfer_perf_of_pspnet_r50-d8_512x512_20k_voc12aug_bs16_in_device_0.txt ------------------PureInfer Performance Summary------------------ -[INFO] ave_throughputRate: 8.51957samples/s, ave_latency: 117.39ms ----------------------------------------------------------------- -``` -### 7.2 T4性能数据 -在装有T4卡的服务器上测试gpu性能,测试过程请确保卡没有运行其他任务,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2 -1.batch1性能: -``` -trtexec --onnx=pspnet_r50-d8_512x512_20k_voc12aug_sim.onnx --fp16 --shapes=input:1,3,500,500 -``` -gpu T4是4个device并行执行的结果,mean是时延(tensorrt的时延是batch个数据的推理时间),即吞吐率的倒数乘以batch。其中--fp16是算子精度,目前算子精度只测--fp16的。注意--shapes是onnx的输入节点名与shape,当onnx输入节点的batch为-1时,可以用同一个onnx文件测不同batch的性能,否则用固定batch的onnx测不同batch的性能不准。 -``` -[09/24/2021-04:17:29] [I] GPU Compute -[09/24/2021-04:17:29] [I] min: 15.829 ms -[09/24/2021-04:17:29] [I] max: 20.5302 ms -[09/24/2021-04:17:29] [I] mean: 16.2649 ms -[09/24/2021-04:17:29] [I] median: 16.0951 ms -[09/24/2021-04:17:29] [I] percentile: 19.1857 ms at 99% -[09/24/2021-04:17:29] [I] total compute time: 3.04154 s - -``` -batch1 t4单卡吞吐率:1000/(16.2649/1)=61.482fps - -2.batch16性能: -``` -trtexec --onnx=pspnet_r50-d8_512x512_20k_voc12aug_sim.onnx --fp16 --shapes=input:16,3,500,500 -``` -``` -[09/24/2021-04:25:43] [I] GPU Compute -[09/24/2021-04:25:43] [I] min: 15.7839 ms -[09/24/2021-04:25:43] [I] max: 20.8466 ms -[09/24/2021-04:25:43] [I] mean: 16.2072 ms -[09/24/2021-04:25:43] [I] median: 16.0396 ms -[09/24/2021-04:25:43] [I] percentile: 19.1329 ms at 99% -[09/24/2021-04:25:43] [I] total compute time: 3.03074 s -``` -batch16 t4单卡吞吐率:1000/(16.2072/1)=61.701fps - -### 7.3 性能对比 -batch1:7.91221x4 < 1000/(16.2649/1) -batch1:8.21425x4 < 1000/(16.2072/1) -310单个device的吞吐率乘4即单卡吞吐率比T4单卡的吞吐率小,故310性能低于T4性能,性能不达标。 - -**性能优化:** -> 由于onnx转om的过程中,两个avgpool算子的kernel size过大,导致被替换为aicpu算子,致使性能不足。需等优化底层算子后再进行测试。 - +# 基于开源mmsegmentation预训练的PSPnet Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理精度统计](#61-离线推理精度统计) + - [6.2 开源精度](#62-开源精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + - [7.2 T4性能数据](#72-T4性能数据) + - [7.3 性能对比](#73-性能对比) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[PSPnet论文](https://arxiv.org/abs/1612.01105) +论文使用PPM(pyramid pooling module)和提出的PSPNet(pyramid scene parsing network),实现了通过融合different-region-based context获取全局context信息的能力。同时,PSPNet在多个数据集上实现了SOTA,取得ImageNet scene parsing challenge 2016、PASCAL VOC 2012 benchmark和Cityscapes benchmark的第1名。 + +### 1.2 代码地址 +[mmsegmentation框架PSPnet代码](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet) +branch:master commit_id:52b4fa5b9a3d65d0745d8bccb08ac0b88c9407fe + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +pytorch == 1.8.0 +torchvision == 0.9.0 +onnx == 1.9.0 +``` +**注意:** +> 转onnx的环境上pytorch需要安装1.8.0版本 + +### 2.2 python第三方库 +``` +numpy == 1.20.1 +opencv-python == 4.5.2.52 +``` + +**说明:** +> X86架构:opencv,pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:opencv,pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + + +### 3.1 pth转onnx模型 + +1.获取pth权重文件 +[PSPnet基于mmsegmentation预训练的npu权重文件](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth) +文件md5sum: c563f7683bab2a869fe095a9eb801f6c + +2.mmsegmentation源码安装 +```shell +pip3.7 install mmcv-full==1.3.10 +git clone https://github.com/open-mmlab/mmsegmentation.git +cd mmsegmentation +如果修改了模型代码,交付了{model_name}.diff +patch -p1 < ../{model_name}.diff +如果模型代码需要安装,则安装模型代码(如果没有安装脚本,pth2onnx等脚本需要引用模型代码的类或函数,可通过sys.path.append(r"./pytorch-nested-unet")添加搜索路径的方式) +pip3.7 install -e . # or "python3.7 setup.py develop" +cd .. +``` + + **说明:** +> 安装所需的依赖说明请参考mmsegmentation/docs/get_started.md + + +3.使用tools里的pytorch2onnx.py文件,运行如下命令,生成对应的onnx模型: +```shell +python3.7 mmsegmentation/tools/pytorch2onnx.py mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py --checkpoint pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth --output-file pspnet_r50-d8_512x512_20k_voc12aug.onnx --shape 500 500 +``` +4.通过onnx simplifier简化onnx模型 +```shell +python3.7 -m onnxsim --input-shape="1,3,500,500" pspnet_r50-d8_512x512_20k_voc12aug.onnx pspnet_r50-d8_512x512_20k_voc12aug_sim.onnx +``` + **模型转换要点:** +> 导出的onnx为固定batch1,不是动态batch。 + +### 3.2 onnx转om模型 + +1.设置环境变量 +```shell +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest/ +``` +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考[CANN 5.0.2 开发辅助工具指南 (推理) 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164868?idPath=23710424%7C251366513%7C22892968%7C251168373),如果存在多余输出节点,需要指定输出节点以去除无用输出,节点序号可能会因网络结构不同而不同,使用netron开源可视化工具查看具体的输出节点名: +生成bs1的om模型: +```shell +atc --framework=5 --model=pspnet_r50-d8_512x512_20k_voc12aug_sim.onnx --output=pspnet_r50-d8_512x512_20k_voc12aug_sim_bs1 --input_format=NCHW --input_shape=" input:1,3,500,500" --log=debug --soc_version=Ascend310 --input_fp16_nodes=input +``` +生成bs16的om模型: +```shell +atc --framework=5 --model=pspnet_r50-d8_512x512_20k_voc12aug_sim.onnx --output=pspnet_r50-d8_512x512_20k_voc12aug_sim_bs16 --input_format=NCHW --input_shape=" input:16,3,500,500" --log=debug --soc_version=Ascend310 --input_fp16_nodes=input +``` + **模型转换要点:** +> 通过input_fp16_nodes将输入的数据精度改为fp16,提升性能。 + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 数据集获取 +该模型使用[VOC2012官网](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html)的VOC2012的1449张验证集进行测试,图片与对应ground truth分别存放在/opt/npu/VOCdevkit/VOC2012/JPEGImages/与/opt/npu/VOCdevkit/VOC2012/SegmentationClass/。 + +### 4.2 数据集预处理 +1.预处理脚本mmsegmentation_voc2012_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 +```shell +python3.7 mmsegmentation_voc2012_preprocess.py --image_folder_path=/opt/npu/VOCdevkit/VOC2012/JPEGImages/ --split=/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt --bin_folder_path=./voc12_bin/ +``` +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本get_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 +```shell +python3.7 get_info.py bin ./voc12_bin voc12.info 500 500 +``` +第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考[CANN 5.0.2 推理benchmark工具用户指南 01](https://support.huawei.com/enterprise/zh/doc/EDOC1100164874?idPath=23710424%7C251366513%7C22892968%7C251168373) + +### 5.2 离线推理 + +1.设置环境变量 +```shell +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest/ +``` +2.执行离线推理 +```shell +./benchmark.${arch} -model_type=vision -om_path=pspnet_r50-d8_512x512_20k_voc12aug_sim_fp16_bs1.om -device_id=0 -batch_size=1 -input_text_path=voc12.info -input_width=500 -input_height=500 -useDvpp=false -output_binary=true + +./benchmark.${arch} -model_type=vision -om_path=pspnet_r50-d8_512x512_20k_voc12aug_sim_fp16_bs16.om -device_id=1 -batch_size=16 -input_text_path=voc12.info -input_width=500 -input_height=500 -useDvpp=false -output_binary=true +``` + **注意:** +> onnx的输出是int64,但是om的输出是int32 + +输出结果默认保存在当前目录result/dumpOutput_device0,模型有一个输出,每个输入对应的输出对应_1.bin文件 +``` +输出 shape 数据类型 数据含义 +output1 1 * 1 * 500 * 500 int32 8位图像 +``` + +## 6 精度对比 + +- **[离线推理精度](#61-离线推理精度)** +- **[开源精度](#62-开源精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理精度统计 + +1.调用mmsegmentation_voc2012_postprocess.py评测bs1的mIoU精度: +```shell +python3.7 get_info.py jpg /opt/npu/VOCdevkit/VOC2012/JPEGImages/ voc12_jpg.info + +python3.7 mmsegmentation_voc2012_postprocess.py --bin_data_path=./result/dumpOutput_device0 --test_annotation=./voc12_jpg.info --img_dir=/opt/npu/VOCdevkit/VOC2012/JPEGImages --ann_dir=/opt/npu/VOCdevkit/VOC2012/SegmentationClass --split=/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt --net_input_width=500 --net_input_height=500 +``` +第一个参数为benchmark推理结果,第二个为原始图片信息文件,第三个为原始图片位置,第四个为验证图片位置,第五个图片的split,第六七个为网宽高 +执行完后会打印出精度: +``` +per class results: + ++-------------+-------+-------+ +| Class | IoU | Acc | ++-------------+-------+-------+ +| background | 93.78 | 97.28 | +| aeroplane | 87.46 | 94.06 | +| bicycle | 41.32 | 88.9 | +| bird | 86.48 | 91.68 | +| boat | 70.01 | 83.3 | +| bottle | 76.2 | 84.19 | +| bus | 92.78 | 96.14 | +| car | 85.56 | 92.34 | +| cat | 91.47 | 96.61 | +| chair | 35.65 | 46.37 | +| cow | 89.62 | 93.35 | +| diningtable | 55.73 | 59.82 | +| dog | 86.24 | 92.88 | +| horse | 88.84 | 93.02 | +| motorbike | 83.75 | 92.17 | +| person | 83.81 | 91.12 | +| pottedplant | 60.77 | 67.82 | +| sheep | 87.55 | 91.34 | +| sofa | 49.2 | 59.29 | +| train | 85.96 | 91.59 | +| tvmonitor | 67.55 | 79.11 | ++-------------+-------+-------+ +Summary: + ++--------+-------+-------+-------+ +| Scope | mIoU | mAcc | aAcc | ++--------+-------+-------+-------+ +| global | 76.18 | 84.87 | 94.49 | ++--------+-------+-------+-------+ +``` + +2.调用mmsegmentation_voc2012_postprocess.py评测bs16的mIoU精度: +```shell +python3.7 get_info.py jpg /opt/npu/VOCdevkit/VOC2012/JPEGImages/ voc12_jpg.info + +python3.7 mmsegmentation_voc2012_postprocess.py --bin_data_path=./result/dumpOutput_device1 --test_annotation=./voc12_jpg.info --img_dir=/opt/npu/VOCdevkit/VOC2012/JPEGImages --ann_dir=/opt/npu/VOCdevkit/VOC2012/SegmentationClass --split=/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt --net_input_width=500 --net_input_height=500 +``` +第一个参数为benchmark推理结果,第二个为原始图片信息文件,第三个为原始图片位置,第四个为验证图片位置,第五个图片的split,第六七个为网宽高 +执行完后会打印出精度: +``` +per class results: + ++-------------+-------+-------+ +| Class | IoU | Acc | ++-------------+-------+-------+ +| background | 93.78 | 97.28 | +| aeroplane | 87.46 | 94.06 | +| bicycle | 41.32 | 88.9 | +| bird | 86.48 | 91.68 | +| boat | 70.01 | 83.3 | +| bottle | 76.2 | 84.19 | +| bus | 92.78 | 96.14 | +| car | 85.56 | 92.34 | +| cat | 91.47 | 96.61 | +| chair | 35.65 | 46.37 | +| cow | 89.62 | 93.35 | +| diningtable | 55.73 | 59.82 | +| dog | 86.24 | 92.88 | +| horse | 88.84 | 93.02 | +| motorbike | 83.75 | 92.17 | +| person | 83.81 | 91.12 | +| pottedplant | 60.77 | 67.82 | +| sheep | 87.55 | 91.34 | +| sofa | 49.2 | 59.29 | +| train | 85.96 | 91.59 | +| tvmonitor | 67.55 | 79.11 | ++-------------+-------+-------+ +Summary: + ++--------+-------+-------+-------+ +| Scope | mIoU | mAcc | aAcc | ++--------+-------+-------+-------+ +| global | 76.18 | 84.87 | 94.49 | ++--------+-------+-------+-------+ +``` + **精度调试:** +> 1.在线推理前处理图片是一定格式的动态分辨率,onnx将分辨率固定为512x512会导致精度下降些。 +> 2.分辨率在512x512时onnx离线推理的精度与om精度相同,分辨率改为500x500可以提升精度,使得mask的精度与开源相比更高 +> 3.单图调试 +> ``` +> python3.7 mmsegmentation/tools/test.py mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth --show +> python3.7 mmsegmentation/tools/pytorch2onnx.py mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py --checkpoint pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth --output-file pspnet_r50-d8_512x512_20k_voc12aug.onnx --shape 500 500 --input-img 2011_003103.jpg --show --verify +> ``` + + +### 6.2 开源精度 +[官网精度](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958.log.json) + +``` +{"mode": "val", "epoch": 31, "iter": 20000, "lr": 0.0001, "mIoU": 0.76778, "mAcc": 0.85529, "aAcc": 0.94787} +``` +### 6.3 精度对比 +om推理bs1和bs16的mIoU精度均为0.7618,开源mIoU精度为0.76778,om精度下降小于1%,精度达标 + + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** +- **[T4性能数据](#72-T4性能数据)** +- **[性能对比](#73-性能对比)** + +### 7.1 npu性能数据 +1.benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 +由于在线推理与onnx推理还不支持多batch,所以仅测om bs1,bs16的性能。 +1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: +``` +[e2e] throughputRate: 7.85666, latency: 184430 +[data read] throughputRate: 37.4296, moduleLatency: 26.7168 +[preprocess] throughputRate: 28.1654, moduleLatency: 35.5045 +[infer] throughputRate: 7.91227, Interface throughputRate: 8.19018, moduleLatency: 126.139 +[post] throughputRate: 7.91221, moduleLatency: 126.387 +``` +Interface throughputRate: 7.91221,7.91221x4=31.64884即是batch1 310单卡吞吐率 + +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: +``` +[e2e] throughputRate: 8.16118, latency: 177548 +[data read] throughputRate: 40.508, moduleLatency: 24.6865 +[preprocess] throughputRate: 29.1145, moduleLatency: 34.3472 +[infer] throughputRate: 8.21425, Interface throughputRate: 8.52684, moduleLatency: 121.508 +[post] throughputRate: 0.515815, moduleLatency: 1938.68 +``` +Interface throughputRate: 8.21425,8.21425x4=32.857即是batch16 310单卡吞吐率 + +2.npu纯推理性能 +batch1的性能,执行20次纯推理取均值,统计吞吐率与其倒数时延(benchmark的时延是单个数据的推理时间),npu性能是一个device执行的结果 +``` +./benchmark.x86_64 -round=20 -om_path=pspnet_r50-d8_512x512_20k_voc12aug_bs1.om -device_id=0 -batch_size=1 +``` +PureInfer_perf_of_pspnet_r50-d8_512x512_20k_voc12aug_bs1_in_device_0.txt: +``` +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_pspnet_r50-d8_512x512_20k_voc12aug_bs1_in_device_0.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 8.12674samples/s, ave_latency: 123.129ms +---------------------------------------------------------------- +``` + +batch6的性能,执行20次纯推理取均值,统计吞吐率与其倒数时延(benchmark的时延是单个数据的推理时间),npu性能是一个device执行的结果 +``` +./benchmark.x86_64 -round=20 -om_path=pspnet_r50-d8_512x512_20k_voc12aug_bs16.om -device_id=0 -batch_size=16 +``` +PureInfer_perf_of_pspnet_r50-d8_512x512_20k_voc12aug_bs16_in_device_0.txt: +``` +[INFO] PureInfer result saved in ./result/PureInfer_perf_of_pspnet_r50-d8_512x512_20k_voc12aug_bs16_in_device_0.txt +-----------------PureInfer Performance Summary------------------ +[INFO] ave_throughputRate: 8.51957samples/s, ave_latency: 117.39ms +---------------------------------------------------------------- +``` +### 7.2 T4性能数据 +在装有T4卡的服务器上测试gpu性能,测试过程请确保卡没有运行其他任务,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2 +1.batch1性能: +``` +trtexec --onnx=pspnet_r50-d8_512x512_20k_voc12aug_sim.onnx --fp16 --shapes=input:1,3,500,500 +``` +gpu T4是4个device并行执行的结果,mean是时延(tensorrt的时延是batch个数据的推理时间),即吞吐率的倒数乘以batch。其中--fp16是算子精度,目前算子精度只测--fp16的。注意--shapes是onnx的输入节点名与shape,当onnx输入节点的batch为-1时,可以用同一个onnx文件测不同batch的性能,否则用固定batch的onnx测不同batch的性能不准。 +``` +[09/24/2021-04:17:29] [I] GPU Compute +[09/24/2021-04:17:29] [I] min: 15.829 ms +[09/24/2021-04:17:29] [I] max: 20.5302 ms +[09/24/2021-04:17:29] [I] mean: 16.2649 ms +[09/24/2021-04:17:29] [I] median: 16.0951 ms +[09/24/2021-04:17:29] [I] percentile: 19.1857 ms at 99% +[09/24/2021-04:17:29] [I] total compute time: 3.04154 s + +``` +batch1 t4单卡吞吐率:1000/(16.2649/1)=61.482fps + +2.batch16性能: +``` +trtexec --onnx=pspnet_r50-d8_512x512_20k_voc12aug_sim.onnx --fp16 --shapes=input:16,3,500,500 +``` +``` +[09/24/2021-04:25:43] [I] GPU Compute +[09/24/2021-04:25:43] [I] min: 15.7839 ms +[09/24/2021-04:25:43] [I] max: 20.8466 ms +[09/24/2021-04:25:43] [I] mean: 16.2072 ms +[09/24/2021-04:25:43] [I] median: 16.0396 ms +[09/24/2021-04:25:43] [I] percentile: 19.1329 ms at 99% +[09/24/2021-04:25:43] [I] total compute time: 3.03074 s +``` +batch16 t4单卡吞吐率:1000/(16.2072/1)=61.701fps + +### 7.3 性能对比 +batch1:7.91221x4 < 1000/(16.2649/1) +batch1:8.21425x4 < 1000/(16.2072/1) +310单个device的吞吐率乘4即单卡吞吐率比T4单卡的吞吐率小,故310性能低于T4性能,性能不达标。 + +**性能优化:** +> 由于onnx转om的过程中,两个avgpool算子的kernel size过大,导致被替换为aicpu算子,致使性能不足。需等优化底层算子后再进行测试。 + diff --git a/ACL_PyTorch/contrib/cv/segmentation/PSPnet/get_info.py b/ACL_PyTorch/contrib/cv/segmentation/PSPnet/get_info.py index b76d6739bcea5c528a031970f0e583e5b5644bd8..d5cab0450c20d502d0d15be2f9c0fceffa6a6191 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/PSPnet/get_info.py +++ b/ACL_PyTorch/contrib/cv/segmentation/PSPnet/get_info.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/PSPnet/mmsegmentation_voc2012_preprocess.py b/ACL_PyTorch/contrib/cv/segmentation/PSPnet/mmsegmentation_voc2012_preprocess.py index eda09af31ec73d2327627620bf1312d871ae0ce3..793ea70de1f92724adc3c95677a6ea2ce4712745 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/PSPnet/mmsegmentation_voc2012_preprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/PSPnet/mmsegmentation_voc2012_preprocess.py @@ -1,95 +1,95 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import argparse -import numpy as np -import cv2 -import mmcv -import torch - - -dataset_config = { - 'mean': (123.675, 116.28, 103.53), - 'std': (58.395, 57.12, 57.375) -} - - -tensor_height = 500 -tensor_width = 500 - - -def resize(img, size): - old_h = img.shape[0] - old_w = img.shape[1] - scale_ratio = min(size[0] / old_w, size[1] / old_h) - new_w = int(np.floor(old_w * scale_ratio)) - new_h = int(np.floor(old_h * scale_ratio)) - resized_img = mmcv.imresize(img, (new_w, new_h), backend='cv2') - return resized_img - - -def voc2012_preprocess(input_image, output_bin_path): - img_name = input_image.split('/')[-1] - bin_name = img_name.split('.')[0] + ".bin" - bin_fl = os.path.join(output_bin_path, bin_name) - - one_img = mmcv.imread(os.path.join(input_image), backend='cv2') - one_img = resize(one_img, (tensor_width, tensor_height)) - mean = np.array(dataset_config['mean'], dtype=np.float16) - std = np.array(dataset_config['std'], dtype=np.float16) - one_img = mmcv.imnormalize(one_img, mean, std) - - h = one_img.shape[0] - w = one_img.shape[1] - pad_left = (tensor_width - w) // 2 - pad_top = (tensor_height - h) // 2 - pad_right = tensor_width - pad_left - w - pad_bottom = tensor_height - pad_top - h - one_img = mmcv.impad(one_img, padding=(pad_left, pad_top, pad_right, pad_bottom), pad_val=0) - one_img=one_img.astype(np.float16) - one_img = one_img.transpose(2, 0, 1) - one_img.tofile(bin_fl) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='preprocess of FCN-8s pytorch model') - parser.add_argument("--image_folder_path", default="/opt/npu/VOCdevkit/VOC2012/JPEGImages/", - help='image of dataset') - parser.add_argument("--split", default="/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt") - parser.add_argument("--bin_folder_path", default="./voc12_bin/", help='Preprocessed image buffer') - flags = parser.parse_args() - - if not os.path.exists(flags.bin_folder_path): - os.makedirs(flags.bin_folder_path) - - split = flags.split - img_suffix = '.jpg' - img_infos = [] - if split is not None: - with open(split) as f: - for line in f: - img_name = line.strip() - img_info = img_name + img_suffix - img_infos.append(img_info) - print(img_infos) - images = os.listdir(flags.image_folder_path) - - for image_name in images: - if not (image_name.endswith(".jpeg") or image_name.endswith(".JPEG") or image_name.endswith( - ".jpg") and image_name in img_infos): - continue - print("start to process image {}....".format(image_name)) - path_image = os.path.join(flags.image_folder_path, image_name) - voc2012_preprocess(path_image, flags.bin_folder_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import numpy as np +import cv2 +import mmcv +import torch + + +dataset_config = { + 'mean': (123.675, 116.28, 103.53), + 'std': (58.395, 57.12, 57.375) +} + + +tensor_height = 500 +tensor_width = 500 + + +def resize(img, size): + old_h = img.shape[0] + old_w = img.shape[1] + scale_ratio = min(size[0] / old_w, size[1] / old_h) + new_w = int(np.floor(old_w * scale_ratio)) + new_h = int(np.floor(old_h * scale_ratio)) + resized_img = mmcv.imresize(img, (new_w, new_h), backend='cv2') + return resized_img + + +def voc2012_preprocess(input_image, output_bin_path): + img_name = input_image.split('/')[-1] + bin_name = img_name.split('.')[0] + ".bin" + bin_fl = os.path.join(output_bin_path, bin_name) + + one_img = mmcv.imread(os.path.join(input_image), backend='cv2') + one_img = resize(one_img, (tensor_width, tensor_height)) + mean = np.array(dataset_config['mean'], dtype=np.float16) + std = np.array(dataset_config['std'], dtype=np.float16) + one_img = mmcv.imnormalize(one_img, mean, std) + + h = one_img.shape[0] + w = one_img.shape[1] + pad_left = (tensor_width - w) // 2 + pad_top = (tensor_height - h) // 2 + pad_right = tensor_width - pad_left - w + pad_bottom = tensor_height - pad_top - h + one_img = mmcv.impad(one_img, padding=(pad_left, pad_top, pad_right, pad_bottom), pad_val=0) + one_img=one_img.astype(np.float16) + one_img = one_img.transpose(2, 0, 1) + one_img.tofile(bin_fl) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='preprocess of FCN-8s pytorch model') + parser.add_argument("--image_folder_path", default="/opt/npu/VOCdevkit/VOC2012/JPEGImages/", + help='image of dataset') + parser.add_argument("--split", default="/opt/npu/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt") + parser.add_argument("--bin_folder_path", default="./voc12_bin/", help='Preprocessed image buffer') + flags = parser.parse_args() + + if not os.path.exists(flags.bin_folder_path): + os.makedirs(flags.bin_folder_path) + + split = flags.split + img_suffix = '.jpg' + img_infos = [] + if split is not None: + with open(split) as f: + for line in f: + img_name = line.strip() + img_info = img_name + img_suffix + img_infos.append(img_info) + print(img_infos) + images = os.listdir(flags.image_folder_path) + + for image_name in images: + if not (image_name.endswith(".jpeg") or image_name.endswith(".JPEG") or image_name.endswith( + ".jpg") and image_name in img_infos): + continue + print("start to process image {}....".format(image_name)) + path_image = os.path.join(flags.image_folder_path, image_name) + voc2012_preprocess(path_image, flags.bin_folder_path) diff --git a/ACL_PyTorch/contrib/cv/segmentation/PSPnet/requirements.txt b/ACL_PyTorch/contrib/cv/segmentation/PSPnet/requirements.txt index b61b7bcdad2d8b509e3de4f97bb4d74ffcfe6429..f86ad403f35f1e158cb4397d24e2f69c92d9aa92 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/PSPnet/requirements.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/PSPnet/requirements.txt @@ -1,5 +1,5 @@ -torch == 1.8.0 -torchvision == 0.9.0 -onnx == 1.9.0 -numpy == 1.20.1 +torch == 1.8.0 +torchvision == 0.9.0 +onnx == 1.9.0 +numpy == 1.20.1 opencv-python == 4.5.2.52 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/PSPnet/test/README.md b/ACL_PyTorch/contrib/cv/segmentation/PSPnet/test/README.md index c6363a7119210bd5143f56556f151c9e79635793..02d44da6c881afd4672cae9c49f975d7eb3adca9 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/PSPnet/test/README.md +++ b/ACL_PyTorch/contrib/cv/segmentation/PSPnet/test/README.md @@ -1,31 +1,31 @@ -环境准备: - -1.数据集路径 -数据集统一放在/root/datasets/或/opt/npu/ -本模型数据集放在/opt/npu/ - -2.进入工作目录 -cd pspnet - -3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -pip3.7 install -r requirements.txt - -4.获取、修改和安装模型代码 -pip3.7 install mmcv-full==1.3.10 -git clone https://github.com/open-mmlab/mmsegmentation.git -cd mmsegmentation -pip3.7 install -e . # or "python3.7 setup.py develop" -cd .. - -5.获取权重文件 -wget https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth - -6.获取benchmark工具 -将benchmark.x86_64放在当前目录 - -7.310上执行,执行时确保device空闲 -bash test/pth2om.sh -bash test/eval_acc_perf.sh - -8.在t4环境上将onnx文件与perf_t4.sh放在同一目录 -然后执行bash perf_t4.sh,执行时确保gpu空闲 +环境准备: + +1.数据集路径 +数据集统一放在/root/datasets/或/opt/npu/ +本模型数据集放在/opt/npu/ + +2.进入工作目录 +cd pspnet + +3.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +pip3.7 install -r requirements.txt + +4.获取、修改和安装模型代码 +pip3.7 install mmcv-full==1.3.10 +git clone https://github.com/open-mmlab/mmsegmentation.git +cd mmsegmentation +pip3.7 install -e . # or "python3.7 setup.py develop" +cd .. + +5.获取权重文件 +wget https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth + +6.获取benchmark工具 +将benchmark.x86_64放在当前目录 + +7.310上执行,执行时确保device空闲 +bash test/pth2om.sh +bash test/eval_acc_perf.sh + +8.在t4环境上将onnx文件与perf_t4.sh放在同一目录 +然后执行bash perf_t4.sh,执行时确保gpu空闲 diff --git a/ACL_PyTorch/contrib/cv/segmentation/PSPnet/test/parse.py b/ACL_PyTorch/contrib/cv/segmentation/PSPnet/test/parse.py index a0f253b055047b199b33d4b65cdc79177b6b250b..27eae0d0acf98687edd95f1f024cf77c49cd4dc4 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/PSPnet/test/parse.py +++ b/ACL_PyTorch/contrib/cv/segmentation/PSPnet/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/README.md b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/README.md index 5d027295ce8f68657e1c4cb877e3a165dbd68625..2213525e4055ae8b68e65b0901d804ad11fc2f36 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/README.md +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/README.md @@ -1,67 +1,67 @@ -# SOLOV1模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -``` -pip install -r requirements.txt -``` -说明:PyTorch选用开源1.9.0版本 - - - -2.获取,修改与安装开源模型代码 -安装mmcv -``` -git clone https://github.com/open-mmlab/mmcv -b v0.2.16 -cd mmcv -python setup.py build_ext -python setup.py develop -cd .. -``` -获取SOLOv1代码 -``` -git clone https://github.com/WXinlong/SOLO.git -b master -cd SOLO -git reset --hard 95f3732d5fbb0d7c7044c7dd074f439d48a72ce5 -patch -p1 < ../MMDET.diff -patch -p1 < ../SOLOV1.diff -pip install -r requirements/build.txt -pip install -v -e . -cd .. -``` - - -3.获取权重文件 - -请从[原始开源代码仓](https://github.com/WXinlong/SOLO)下载SOLO_R50_1x模型的权重文件 - -4.数据集 - -数据集的获取请参考[原始开源代码仓](https://github.com/WXinlong/SOLO)的方式获取。请将val2017图片及其标注文件放入服务器/root/dataset/coco/文件夹,val2017目录存放coco数据集的验证集图片,annotations目录存放coco数据集的instances_val2017.json,文件目录结构如下: -``` -root -├── dataset -│ ├── coco -│ │ ├── annotations -│ │ ├── val2017 -``` - -5.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) -将benchmark.x86_64或benchmark.aarch64放到当前目录 - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -#启动脚本内1-2行为pth2onnx,3-4行为onnx2om,脚本执行完成后会生成SOLOV2.onnx、SOLOV2_sim.onnx、solov2.om三个文件。 -bash test/pth2om.sh -#启动脚本内9-21行为前处理,用以获取处理后的图片信息与bin文件;23-33为获取图片info文件,为推理做准备;35-42行为benchmark推理;44-51行为后处理,同时会输出模型测评的精度;57-63行为打印om推理性能。 -bash test/eval_acc_perf.sh --datasets_path=/root/datasets -``` - **评测结果:** -| 模型 | 在线推理精度 | 310离线推理精度 | 基准性能 | 310性能 | -| :------: | :------: | :------: | :------: | :------: | -| SOLOV1 bs1 | mAP:32.1% | mAP:32.1% | 5.1fps | 6.118fps | - +# SOLOV1模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +``` +pip install -r requirements.txt +``` +说明:PyTorch选用开源1.9.0版本 + + + +2.获取,修改与安装开源模型代码 +安装mmcv +``` +git clone https://github.com/open-mmlab/mmcv -b v0.2.16 +cd mmcv +python setup.py build_ext +python setup.py develop +cd .. +``` +获取SOLOv1代码 +``` +git clone https://github.com/WXinlong/SOLO.git -b master +cd SOLO +git reset --hard 95f3732d5fbb0d7c7044c7dd074f439d48a72ce5 +patch -p1 < ../MMDET.diff +patch -p1 < ../SOLOV1.diff +pip install -r requirements/build.txt +pip install -v -e . +cd .. +``` + + +3.获取权重文件 + +请从[原始开源代码仓](https://github.com/WXinlong/SOLO)下载SOLO_R50_1x模型的权重文件 + +4.数据集 + +数据集的获取请参考[原始开源代码仓](https://github.com/WXinlong/SOLO)的方式获取。请将val2017图片及其标注文件放入服务器/root/dataset/coco/文件夹,val2017目录存放coco数据集的验证集图片,annotations目录存放coco数据集的instances_val2017.json,文件目录结构如下: +``` +root +├── dataset +│ ├── coco +│ │ ├── annotations +│ │ ├── val2017 +``` + +5.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) +将benchmark.x86_64或benchmark.aarch64放到当前目录 + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +#启动脚本内1-2行为pth2onnx,3-4行为onnx2om,脚本执行完成后会生成SOLOV2.onnx、SOLOV2_sim.onnx、solov2.om三个文件。 +bash test/pth2om.sh +#启动脚本内9-21行为前处理,用以获取处理后的图片信息与bin文件;23-33为获取图片info文件,为推理做准备;35-42行为benchmark推理;44-51行为后处理,同时会输出模型测评的精度;57-63行为打印om推理性能。 +bash test/eval_acc_perf.sh --datasets_path=/root/datasets +``` + **评测结果:** +| 模型 | 在线推理精度 | 310离线推理精度 | 基准性能 | 310性能 | +| :------: | :------: | :------: | :------: | :------: | +| SOLOV1 bs1 | mAP:32.1% | mAP:32.1% | 5.1fps | 6.118fps | + 备注:离线模型不支持多batch。 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/get_info.py b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/get_info.py index e0979bced843ca1e88e1d264fefc428b4835871f..fc3f14c7a31fe277de71f0ecda353c4fe9f0429f 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/get_info.py +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/get_info.py @@ -1,59 +1,59 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import mmcv -from mmdet.datasets import build_dataset -import pickle as pk - -ann_file = '/annotations/instances_val2017.json' -img_prefix = '/val2017/' - -if __name__ == '__main__': - image_src_path = sys.argv[1] - config_path = sys.argv[2] - bin_path = sys.argv[3] - meta_path = sys.argv[4] - info_name = sys.argv[5] - info_meta_name = sys.argv[6] - width = int(sys.argv[7]) - height = int(sys.argv[8]) - - cfg = mmcv.Config.fromfile(config_path) - cfg.data.test.ann_file = image_src_path + ann_file - cfg.data.test.img_prefix = image_src_path + img_prefix - - dataset = build_dataset(cfg.data.test) - - fp1 = open(info_name, "w") - fp2 = open(info_meta_name, "w") - - for idx in range(5000): - img_id = dataset.img_ids[idx] - fp1.write("{} {}/{:0>12d}.bin {} {}\n".format(idx, bin_path, img_id, width, height)) - fp_meta = open("%s/%012d.pk" % (meta_path, img_id), "rb") - meta = pk.load(fp_meta) - fp_meta.close() - fp2.write("{} {}/{:0>12d}.bin {} {} {} {}\n".format( - idx, - meta_path, - img_id, - meta['img_shape'][1], - meta['img_shape'][0], - meta['ori_shape'][1], - meta['ori_shape'][0] - )) - fp1.close() - fp2.close() - print("Get info done!") +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import mmcv +from mmdet.datasets import build_dataset +import pickle as pk + +ann_file = '/annotations/instances_val2017.json' +img_prefix = '/val2017/' + +if __name__ == '__main__': + image_src_path = sys.argv[1] + config_path = sys.argv[2] + bin_path = sys.argv[3] + meta_path = sys.argv[4] + info_name = sys.argv[5] + info_meta_name = sys.argv[6] + width = int(sys.argv[7]) + height = int(sys.argv[8]) + + cfg = mmcv.Config.fromfile(config_path) + cfg.data.test.ann_file = image_src_path + ann_file + cfg.data.test.img_prefix = image_src_path + img_prefix + + dataset = build_dataset(cfg.data.test) + + fp1 = open(info_name, "w") + fp2 = open(info_meta_name, "w") + + for idx in range(5000): + img_id = dataset.img_ids[idx] + fp1.write("{} {}/{:0>12d}.bin {} {}\n".format(idx, bin_path, img_id, width, height)) + fp_meta = open("%s/%012d.pk" % (meta_path, img_id), "rb") + meta = pk.load(fp_meta) + fp_meta.close() + fp2.write("{} {}/{:0>12d}.bin {} {} {} {}\n".format( + idx, + meta_path, + img_id, + meta['img_shape'][1], + meta['img_shape'][0], + meta['ori_shape'][1], + meta['ori_shape'][0] + )) + fp1.close() + fp2.close() + print("Get info done!") diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/modelzoo_level.txt index bdc5dd889d3e2c5450f8df13820f5d359f1a7830..5a90c7c76ee637d956ad5517b60434e8838a0ce6 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus: PERFECT \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/pth2onnx.py b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/pth2onnx.py index 4fdf107ff02a2b6e9dbb89be83d5bdaac7e6eda9..2e2add16252a058d80fc9948ef070723da003f3d 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/pth2onnx.py @@ -1,40 +1,40 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import argparse -import numpy as np -from mmdet.apis import init_detector - -input_names = ['input'] -output_names = ['seg_preds', 'cate_labels', 'cate_scores'] - - -def pth2onnx(args, fake_input): - model = init_detector(args.config, args.pth_path, device='cpu') - model.forward = model.simple_test - torch.onnx.export(model, fake_input, args.out, input_names=input_names, output_names=output_names, verbose=False, - opset_version=11) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--config', help='model config') - parser.add_argument('--out', help='onnx output name') - parser.add_argument('--pth_path', help='model pth path') - parser.add_argument('--shape', type=int, nargs='+', help='input image size hxw') - args = parser.parse_args() - assert len(args.shape) == 2 - fake_input = torch.randn(1, 3, args.shape[0], args.shape[1]) - pth2onnx(args, fake_input) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import argparse +import numpy as np +from mmdet.apis import init_detector + +input_names = ['input'] +output_names = ['seg_preds', 'cate_labels', 'cate_scores'] + + +def pth2onnx(args, fake_input): + model = init_detector(args.config, args.pth_path, device='cpu') + model.forward = model.simple_test + torch.onnx.export(model, fake_input, args.out, input_names=input_names, output_names=output_names, verbose=False, + opset_version=11) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--config', help='model config') + parser.add_argument('--out', help='onnx output name') + parser.add_argument('--pth_path', help='model pth path') + parser.add_argument('--shape', type=int, nargs='+', help='input image size hxw') + args = parser.parse_args() + assert len(args.shape) == 2 + fake_input = torch.randn(1, 3, args.shape[0], args.shape[1]) + pth2onnx(args, fake_input) diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/requirements.txt b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/requirements.txt index 565fab966541c4a9b6eb10e9fef1ab8e58ded358..3802263668b53ce19260825a4ba3d89da72a5ac9 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/requirements.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.9.0 -torchvision == 0.10.0 -onnx == 1.9.0 -onnx-simplifier == 0.3.6 -onnxruntime == 1.8.0 +torch == 1.9.0 +torchvision == 0.10.0 +onnx == 1.9.0 +onnx-simplifier == 0.3.6 +onnxruntime == 1.8.0 numpy == 1.21.0 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/solov1_postprocess.py b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/solov1_postprocess.py index 88bd4578dc1dfab85b6dc56cd3361bd80e078b6e..0caf6ee4690ac1cc548458d3365b6ca6320c901e 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/solov1_postprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/solov1_postprocess.py @@ -1,104 +1,104 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import mmcv -import numpy as np -import argparse -import torch -import torch.nn.functional as F -import pycocotools.mask as mask_util -from mmdet.core import coco_eval, results2json, results2json_segm -from mmdet.datasets import build_dataset - -ann_file = '/annotations/instances_val2017.json' -img_prefix = '/val2017/' - - -def get_masks(result, num_classes=80): - for cur_result in result: - masks = [[] for _ in range(num_classes)] - if cur_result is None: - return masks - seg_pred = cur_result[0].astype(np.uint8) - cate_label = cur_result[1].astype(np.int) - cate_score = cur_result[2].astype(np.float) - num_ins = seg_pred.shape[0] - for idx in range(num_ins): - cur_mask = seg_pred[idx, ...] - rle = mask_util.encode( - np.array(cur_mask[:, :, np.newaxis], order='F'))[0] - rst = (rle, cate_score[idx]) - masks[cate_label[idx]].append(rst) - return masks - - -def handle_seg(seg, img_shape, ori_shape, input_shape=(800, 1216), mask_thr=0.5): - seg = torch.tensor(seg) - h, w, = img_shape - pad_left = (input_shape[1] - w) // 2 - pad_top = (input_shape[0] - h) // 2 - seg = F.interpolate(seg.unsqueeze(0), - size=input_shape, - mode='bilinear')[:, :, pad_top:pad_top + h, pad_left:pad_left + w] - - seg = F.interpolate(seg, - size=ori_shape[:2], - mode='bilinear').squeeze(0) - seg = seg > mask_thr - return seg.numpy() - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--dataset_path') - parser.add_argument('--model_config') - parser.add_argument('--bin_data_path') - parser.add_argument('--meta_info') - parser.add_argument('--net_out_num', type=int) - parser.add_argument("--model_input_height", type=int, help='input tensor height') - parser.add_argument("--model_input_width", type=int, help='input tensor width') - - args = parser.parse_args() - - cfg = mmcv.Config.fromfile(args.model_config) - cfg.data.test.test_mode = True - cfg.data.test.ann_file = args.dataset_path + ann_file - cfg.data.test.img_prefix = args.dataset_path + img_prefix - dataset = build_dataset(cfg.data.test) - num_classes = len(dataset.CLASSES) - - results = [] - - fp = open(args.meta_info, "r") - for line in fp.readlines(): - _, file_path, img_w, img_h, ori_w, ori_h = line.split() - img_w = int(img_w) - img_h = int(img_h) - ori_w = int(ori_w) - ori_h = int(ori_h) - file_name = file_path.split("/")[1].replace(".bin", "") - result = [] - for idx in range(args.net_out_num): - if idx == 1: - result.append(np.fromfile("%s%s_%d.bin" % (args.bin_data_path, file_name, idx + 1), dtype=np.int32)) - else: - result.append(np.fromfile("%s%s_%d.bin" % (args.bin_data_path, file_name, idx + 1), dtype=np.float32)) - result[0].shape = (100, args.model_input_height // 4, args.model_input_width // 4) - result[0] = handle_seg(result[0], (img_h, img_w), (ori_h, ori_w), - (args.model_input_height, args.model_input_width)) - result = get_masks([result], num_classes) - results.append(result) - fp.close() - result_files = results2json_segm(dataset, results, "results_solo.pkl") - coco_eval(result_files, ["segm"], dataset.coco) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mmcv +import numpy as np +import argparse +import torch +import torch.nn.functional as F +import pycocotools.mask as mask_util +from mmdet.core import coco_eval, results2json, results2json_segm +from mmdet.datasets import build_dataset + +ann_file = '/annotations/instances_val2017.json' +img_prefix = '/val2017/' + + +def get_masks(result, num_classes=80): + for cur_result in result: + masks = [[] for _ in range(num_classes)] + if cur_result is None: + return masks + seg_pred = cur_result[0].astype(np.uint8) + cate_label = cur_result[1].astype(np.int) + cate_score = cur_result[2].astype(np.float) + num_ins = seg_pred.shape[0] + for idx in range(num_ins): + cur_mask = seg_pred[idx, ...] + rle = mask_util.encode( + np.array(cur_mask[:, :, np.newaxis], order='F'))[0] + rst = (rle, cate_score[idx]) + masks[cate_label[idx]].append(rst) + return masks + + +def handle_seg(seg, img_shape, ori_shape, input_shape=(800, 1216), mask_thr=0.5): + seg = torch.tensor(seg) + h, w, = img_shape + pad_left = (input_shape[1] - w) // 2 + pad_top = (input_shape[0] - h) // 2 + seg = F.interpolate(seg.unsqueeze(0), + size=input_shape, + mode='bilinear')[:, :, pad_top:pad_top + h, pad_left:pad_left + w] + + seg = F.interpolate(seg, + size=ori_shape[:2], + mode='bilinear').squeeze(0) + seg = seg > mask_thr + return seg.numpy() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--dataset_path') + parser.add_argument('--model_config') + parser.add_argument('--bin_data_path') + parser.add_argument('--meta_info') + parser.add_argument('--net_out_num', type=int) + parser.add_argument("--model_input_height", type=int, help='input tensor height') + parser.add_argument("--model_input_width", type=int, help='input tensor width') + + args = parser.parse_args() + + cfg = mmcv.Config.fromfile(args.model_config) + cfg.data.test.test_mode = True + cfg.data.test.ann_file = args.dataset_path + ann_file + cfg.data.test.img_prefix = args.dataset_path + img_prefix + dataset = build_dataset(cfg.data.test) + num_classes = len(dataset.CLASSES) + + results = [] + + fp = open(args.meta_info, "r") + for line in fp.readlines(): + _, file_path, img_w, img_h, ori_w, ori_h = line.split() + img_w = int(img_w) + img_h = int(img_h) + ori_w = int(ori_w) + ori_h = int(ori_h) + file_name = file_path.split("/")[1].replace(".bin", "") + result = [] + for idx in range(args.net_out_num): + if idx == 1: + result.append(np.fromfile("%s%s_%d.bin" % (args.bin_data_path, file_name, idx + 1), dtype=np.int32)) + else: + result.append(np.fromfile("%s%s_%d.bin" % (args.bin_data_path, file_name, idx + 1), dtype=np.float32)) + result[0].shape = (100, args.model_input_height // 4, args.model_input_width // 4) + result[0] = handle_seg(result[0], (img_h, img_w), (ori_h, ori_w), + (args.model_input_height, args.model_input_width)) + result = get_masks([result], num_classes) + results.append(result) + fp.close() + result_files = results2json_segm(dataset, results, "results_solo.pkl") + coco_eval(result_files, ["segm"], dataset.coco) diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/solov1_preprocess.py b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/solov1_preprocess.py index d82a93d367ef72200a1b0726b5c28a95843647aa..448b1cc203d63920dedd8ddcc28a1d264c45d95d 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/solov1_preprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/solov1_preprocess.py @@ -1,86 +1,86 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import argparse -import numpy as np -import cv2 -import mmcv -import torch -import pickle as pk -import multiprocessing - -flags = None - -def resize(img, size): - old_h = img.shape[0] - old_w = img.shape[1] - scale_ratio = min(size[0] / old_w, size[1] / old_h) - new_w = int(np.floor(old_w * scale_ratio)) - new_h = int(np.floor(old_h * scale_ratio)) - resized_img = mmcv.imresize(img, (new_w, new_h)) - return resized_img, scale_ratio - - -def gen_input_bin(file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - image = mmcv.imread(os.path.join(flags.image_src_path, file)) - ori_shape = image.shape - image, scale_factor = resize(image, (flags.model_input_width, flags.model_input_height)) - img_shape = image.shape - mean = np.array([123.675, 116.28, 103.53], dtype=np.float32) - std = np.array([58.395, 57.12, 57.375], dtype=np.float32) - image = mmcv.imnormalize(image, mean, std) - h = image.shape[0] - w = image.shape[1] - pad_left = (flags.model_input_width - w) // 2 - pad_top = (flags.model_input_height - h) // 2 - pad_right = flags.model_input_width - pad_left - w - pad_bottom = flags.model_input_height - pad_top - h - image = cv2.copyMakeBorder(image, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=0) - image = image.transpose(2, 0, 1) - image.tofile(os.path.join(flags.bin_file_path, file.split('.')[0] + ".bin")) - image_meta = {'img_shape': img_shape, 'scale_factor': scale_factor, 'ori_shape': ori_shape} - with open(os.path.join(flags.meta_file_path, file.split('.')[0] + ".pk"), "wb") as fp: - pk.dump(image_meta, fp) - - -def preprocess(): - files = os.listdir(flags.image_src_path) - file_batches = [files[i:i + 100] for i in range(0, 5000, 100) if files[i:i + 100] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='preprocess of MaskRCNN PyTorch model') - parser.add_argument("--image_src_path", default="/root/datasets/coco/val2017", help='image of dataset') - parser.add_argument("--bin_file_path", default="val2017_bin", help='Preprocessed image buffer') - parser.add_argument("--meta_file_path", default="val2017_bin_meta", help='Get image meta') - parser.add_argument("--model_input_height", default=800, type=int, help='input tensor height') - parser.add_argument("--model_input_width", default=1216, type=int, help='input tensor width') - flags = parser.parse_args() - if not os.path.exists(flags.bin_file_path): - os.makedirs(flags.bin_file_path) - if not os.path.exists(flags.meta_file_path): - os.makedirs(flags.meta_file_path) - preprocess() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import numpy as np +import cv2 +import mmcv +import torch +import pickle as pk +import multiprocessing + +flags = None + +def resize(img, size): + old_h = img.shape[0] + old_w = img.shape[1] + scale_ratio = min(size[0] / old_w, size[1] / old_h) + new_w = int(np.floor(old_w * scale_ratio)) + new_h = int(np.floor(old_h * scale_ratio)) + resized_img = mmcv.imresize(img, (new_w, new_h)) + return resized_img, scale_ratio + + +def gen_input_bin(file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + image = mmcv.imread(os.path.join(flags.image_src_path, file)) + ori_shape = image.shape + image, scale_factor = resize(image, (flags.model_input_width, flags.model_input_height)) + img_shape = image.shape + mean = np.array([123.675, 116.28, 103.53], dtype=np.float32) + std = np.array([58.395, 57.12, 57.375], dtype=np.float32) + image = mmcv.imnormalize(image, mean, std) + h = image.shape[0] + w = image.shape[1] + pad_left = (flags.model_input_width - w) // 2 + pad_top = (flags.model_input_height - h) // 2 + pad_right = flags.model_input_width - pad_left - w + pad_bottom = flags.model_input_height - pad_top - h + image = cv2.copyMakeBorder(image, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=0) + image = image.transpose(2, 0, 1) + image.tofile(os.path.join(flags.bin_file_path, file.split('.')[0] + ".bin")) + image_meta = {'img_shape': img_shape, 'scale_factor': scale_factor, 'ori_shape': ori_shape} + with open(os.path.join(flags.meta_file_path, file.split('.')[0] + ".pk"), "wb") as fp: + pk.dump(image_meta, fp) + + +def preprocess(): + files = os.listdir(flags.image_src_path) + file_batches = [files[i:i + 100] for i in range(0, 5000, 100) if files[i:i + 100] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='preprocess of MaskRCNN PyTorch model') + parser.add_argument("--image_src_path", default="/root/datasets/coco/val2017", help='image of dataset') + parser.add_argument("--bin_file_path", default="val2017_bin", help='Preprocessed image buffer') + parser.add_argument("--meta_file_path", default="val2017_bin_meta", help='Get image meta') + parser.add_argument("--model_input_height", default=800, type=int, help='input tensor height') + parser.add_argument("--model_input_width", default=1216, type=int, help='input tensor width') + flags = parser.parse_args() + if not os.path.exists(flags.bin_file_path): + os.makedirs(flags.bin_file_path) + if not os.path.exists(flags.meta_file_path): + os.makedirs(flags.meta_file_path) + preprocess() diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/test/parse.py b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/test/parse.py index b9c74f41d7848e1250356f14472b237a18bb3489..82af69cd183218c3263723c20b652b3f7ec2bc27 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/test/parse.py +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV1/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/README.md b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/README.md index bc9a8f0bce10f33734d413b420683d63ac3791df..59d8bf9d7d23fbf255877dde4f75bfcd39c3cfa2 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/README.md +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/README.md @@ -1,67 +1,67 @@ -# SOLOV2模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -``` -pip install -r requirements.txt -``` -说明:PyTorch选用开源1.9.0版本 - - - -2.获取,修改与安装开源模型代码 -安装mmcv -``` -git clone https://github.com/open-mmlab/mmcv -b v0.2.16 -cd mmcv -python setup.py build_ext -python setup.py develop -cd .. -``` -获取SOLOv2代码 -``` -git clone https://github.com/WXinlong/SOLO.git -b master -cd SOLO -git reset --hard 95f3732d5fbb0d7c7044c7dd074f439d48a72ce5 -patch -p1 < ../MMDET.diff -patch -p1 < ../SOLOV2.diff -pip install -r requirements/build.txt -pip install -v -e . -cd .. -``` - - -3.获取权重文件 - -请从[原始开源代码仓](https://github.com/WXinlong/SOLO)下载SOLOv2_R50_1x模型的权重文件 - -4.数据集 - -数据集的获取请参考[原始开源代码仓](https://github.com/WXinlong/SOLO)的方式获取。请将val2017图片及其标注文件放入服务器/root/dataset/coco/文件夹,val2017目录存放coco数据集的验证集图片,annotations目录存放coco数据集的instances_val2017.json,文件目录结构如下: -``` -root -├── dataset -│ ├── coco -│ │ ├── annotations -│ │ ├── val2017 -``` - -5.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) -将benchmark.x86_64或benchmark.aarch64放到当前目录 - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -#启动脚本内1-2行为pth2onnx,3-4行为onnx2om,脚本执行完成后会生成SOLOV2.onnx、SOLOV2_sim.onnx、solov2.om三个文件。 -bash test/pth2om.sh -#启动脚本内9-21行为前处理,用以获取处理后的图片信息与bin文件;23-33为获取图片info文件,为推理做准备;35-42行为benchmark推理;44-51行为后处理,同时会输出模型测评的精度;57-63行为打印om推理性能。 -bash test/eval_acc_perf.sh --datasets_path=/root/datasets -``` - **评测结果:** -| 模型 | 在线推理精度 | 310离线推理精度 | 基准性能 | 310性能 | -| :------: | :------: | :------: | :------: | :------: | -| SOLOV2 bs1 | mAP:34.0% | mAP:34.0% | 7.58fps | 9.877fps | - -备注:离线模型不支持多batch。 +# SOLOV2模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +``` +pip install -r requirements.txt +``` +说明:PyTorch选用开源1.9.0版本 + + + +2.获取,修改与安装开源模型代码 +安装mmcv +``` +git clone https://github.com/open-mmlab/mmcv -b v0.2.16 +cd mmcv +python setup.py build_ext +python setup.py develop +cd .. +``` +获取SOLOv2代码 +``` +git clone https://github.com/WXinlong/SOLO.git -b master +cd SOLO +git reset --hard 95f3732d5fbb0d7c7044c7dd074f439d48a72ce5 +patch -p1 < ../MMDET.diff +patch -p1 < ../SOLOV2.diff +pip install -r requirements/build.txt +pip install -v -e . +cd .. +``` + + +3.获取权重文件 + +请从[原始开源代码仓](https://github.com/WXinlong/SOLO)下载SOLOv2_R50_1x模型的权重文件 + +4.数据集 + +数据集的获取请参考[原始开源代码仓](https://github.com/WXinlong/SOLO)的方式获取。请将val2017图片及其标注文件放入服务器/root/dataset/coco/文件夹,val2017目录存放coco数据集的验证集图片,annotations目录存放coco数据集的instances_val2017.json,文件目录结构如下: +``` +root +├── dataset +│ ├── coco +│ │ ├── annotations +│ │ ├── val2017 +``` + +5.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) +将benchmark.x86_64或benchmark.aarch64放到当前目录 + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +#启动脚本内1-2行为pth2onnx,3-4行为onnx2om,脚本执行完成后会生成SOLOV2.onnx、SOLOV2_sim.onnx、solov2.om三个文件。 +bash test/pth2om.sh +#启动脚本内9-21行为前处理,用以获取处理后的图片信息与bin文件;23-33为获取图片info文件,为推理做准备;35-42行为benchmark推理;44-51行为后处理,同时会输出模型测评的精度;57-63行为打印om推理性能。 +bash test/eval_acc_perf.sh --datasets_path=/root/datasets +``` + **评测结果:** +| 模型 | 在线推理精度 | 310离线推理精度 | 基准性能 | 310性能 | +| :------: | :------: | :------: | :------: | :------: | +| SOLOV2 bs1 | mAP:34.0% | mAP:34.0% | 7.58fps | 9.877fps | + +备注:离线模型不支持多batch。 diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/get_info.py b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/get_info.py index e0979bced843ca1e88e1d264fefc428b4835871f..fc3f14c7a31fe277de71f0ecda353c4fe9f0429f 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/get_info.py +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/get_info.py @@ -1,59 +1,59 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import mmcv -from mmdet.datasets import build_dataset -import pickle as pk - -ann_file = '/annotations/instances_val2017.json' -img_prefix = '/val2017/' - -if __name__ == '__main__': - image_src_path = sys.argv[1] - config_path = sys.argv[2] - bin_path = sys.argv[3] - meta_path = sys.argv[4] - info_name = sys.argv[5] - info_meta_name = sys.argv[6] - width = int(sys.argv[7]) - height = int(sys.argv[8]) - - cfg = mmcv.Config.fromfile(config_path) - cfg.data.test.ann_file = image_src_path + ann_file - cfg.data.test.img_prefix = image_src_path + img_prefix - - dataset = build_dataset(cfg.data.test) - - fp1 = open(info_name, "w") - fp2 = open(info_meta_name, "w") - - for idx in range(5000): - img_id = dataset.img_ids[idx] - fp1.write("{} {}/{:0>12d}.bin {} {}\n".format(idx, bin_path, img_id, width, height)) - fp_meta = open("%s/%012d.pk" % (meta_path, img_id), "rb") - meta = pk.load(fp_meta) - fp_meta.close() - fp2.write("{} {}/{:0>12d}.bin {} {} {} {}\n".format( - idx, - meta_path, - img_id, - meta['img_shape'][1], - meta['img_shape'][0], - meta['ori_shape'][1], - meta['ori_shape'][0] - )) - fp1.close() - fp2.close() - print("Get info done!") +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import mmcv +from mmdet.datasets import build_dataset +import pickle as pk + +ann_file = '/annotations/instances_val2017.json' +img_prefix = '/val2017/' + +if __name__ == '__main__': + image_src_path = sys.argv[1] + config_path = sys.argv[2] + bin_path = sys.argv[3] + meta_path = sys.argv[4] + info_name = sys.argv[5] + info_meta_name = sys.argv[6] + width = int(sys.argv[7]) + height = int(sys.argv[8]) + + cfg = mmcv.Config.fromfile(config_path) + cfg.data.test.ann_file = image_src_path + ann_file + cfg.data.test.img_prefix = image_src_path + img_prefix + + dataset = build_dataset(cfg.data.test) + + fp1 = open(info_name, "w") + fp2 = open(info_meta_name, "w") + + for idx in range(5000): + img_id = dataset.img_ids[idx] + fp1.write("{} {}/{:0>12d}.bin {} {}\n".format(idx, bin_path, img_id, width, height)) + fp_meta = open("%s/%012d.pk" % (meta_path, img_id), "rb") + meta = pk.load(fp_meta) + fp_meta.close() + fp2.write("{} {}/{:0>12d}.bin {} {} {} {}\n".format( + idx, + meta_path, + img_id, + meta['img_shape'][1], + meta['img_shape'][0], + meta['ori_shape'][1], + meta['ori_shape'][0] + )) + fp1.close() + fp2.close() + print("Get info done!") diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/modelzoo_level.txt index bdc5dd889d3e2c5450f8df13820f5d359f1a7830..5a90c7c76ee637d956ad5517b60434e8838a0ce6 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus: PERFECT \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/pth2onnx.py b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/pth2onnx.py index 4fdf107ff02a2b6e9dbb89be83d5bdaac7e6eda9..2e2add16252a058d80fc9948ef070723da003f3d 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/pth2onnx.py @@ -1,40 +1,40 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import argparse -import numpy as np -from mmdet.apis import init_detector - -input_names = ['input'] -output_names = ['seg_preds', 'cate_labels', 'cate_scores'] - - -def pth2onnx(args, fake_input): - model = init_detector(args.config, args.pth_path, device='cpu') - model.forward = model.simple_test - torch.onnx.export(model, fake_input, args.out, input_names=input_names, output_names=output_names, verbose=False, - opset_version=11) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--config', help='model config') - parser.add_argument('--out', help='onnx output name') - parser.add_argument('--pth_path', help='model pth path') - parser.add_argument('--shape', type=int, nargs='+', help='input image size hxw') - args = parser.parse_args() - assert len(args.shape) == 2 - fake_input = torch.randn(1, 3, args.shape[0], args.shape[1]) - pth2onnx(args, fake_input) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import argparse +import numpy as np +from mmdet.apis import init_detector + +input_names = ['input'] +output_names = ['seg_preds', 'cate_labels', 'cate_scores'] + + +def pth2onnx(args, fake_input): + model = init_detector(args.config, args.pth_path, device='cpu') + model.forward = model.simple_test + torch.onnx.export(model, fake_input, args.out, input_names=input_names, output_names=output_names, verbose=False, + opset_version=11) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--config', help='model config') + parser.add_argument('--out', help='onnx output name') + parser.add_argument('--pth_path', help='model pth path') + parser.add_argument('--shape', type=int, nargs='+', help='input image size hxw') + args = parser.parse_args() + assert len(args.shape) == 2 + fake_input = torch.randn(1, 3, args.shape[0], args.shape[1]) + pth2onnx(args, fake_input) diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/requirements.txt b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/requirements.txt index 565fab966541c4a9b6eb10e9fef1ab8e58ded358..3802263668b53ce19260825a4ba3d89da72a5ac9 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/requirements.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.9.0 -torchvision == 0.10.0 -onnx == 1.9.0 -onnx-simplifier == 0.3.6 -onnxruntime == 1.8.0 +torch == 1.9.0 +torchvision == 0.10.0 +onnx == 1.9.0 +onnx-simplifier == 0.3.6 +onnxruntime == 1.8.0 numpy == 1.21.0 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/solov2_postprocess.py b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/solov2_postprocess.py index 88bd4578dc1dfab85b6dc56cd3361bd80e078b6e..0caf6ee4690ac1cc548458d3365b6ca6320c901e 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/solov2_postprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/solov2_postprocess.py @@ -1,104 +1,104 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import mmcv -import numpy as np -import argparse -import torch -import torch.nn.functional as F -import pycocotools.mask as mask_util -from mmdet.core import coco_eval, results2json, results2json_segm -from mmdet.datasets import build_dataset - -ann_file = '/annotations/instances_val2017.json' -img_prefix = '/val2017/' - - -def get_masks(result, num_classes=80): - for cur_result in result: - masks = [[] for _ in range(num_classes)] - if cur_result is None: - return masks - seg_pred = cur_result[0].astype(np.uint8) - cate_label = cur_result[1].astype(np.int) - cate_score = cur_result[2].astype(np.float) - num_ins = seg_pred.shape[0] - for idx in range(num_ins): - cur_mask = seg_pred[idx, ...] - rle = mask_util.encode( - np.array(cur_mask[:, :, np.newaxis], order='F'))[0] - rst = (rle, cate_score[idx]) - masks[cate_label[idx]].append(rst) - return masks - - -def handle_seg(seg, img_shape, ori_shape, input_shape=(800, 1216), mask_thr=0.5): - seg = torch.tensor(seg) - h, w, = img_shape - pad_left = (input_shape[1] - w) // 2 - pad_top = (input_shape[0] - h) // 2 - seg = F.interpolate(seg.unsqueeze(0), - size=input_shape, - mode='bilinear')[:, :, pad_top:pad_top + h, pad_left:pad_left + w] - - seg = F.interpolate(seg, - size=ori_shape[:2], - mode='bilinear').squeeze(0) - seg = seg > mask_thr - return seg.numpy() - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--dataset_path') - parser.add_argument('--model_config') - parser.add_argument('--bin_data_path') - parser.add_argument('--meta_info') - parser.add_argument('--net_out_num', type=int) - parser.add_argument("--model_input_height", type=int, help='input tensor height') - parser.add_argument("--model_input_width", type=int, help='input tensor width') - - args = parser.parse_args() - - cfg = mmcv.Config.fromfile(args.model_config) - cfg.data.test.test_mode = True - cfg.data.test.ann_file = args.dataset_path + ann_file - cfg.data.test.img_prefix = args.dataset_path + img_prefix - dataset = build_dataset(cfg.data.test) - num_classes = len(dataset.CLASSES) - - results = [] - - fp = open(args.meta_info, "r") - for line in fp.readlines(): - _, file_path, img_w, img_h, ori_w, ori_h = line.split() - img_w = int(img_w) - img_h = int(img_h) - ori_w = int(ori_w) - ori_h = int(ori_h) - file_name = file_path.split("/")[1].replace(".bin", "") - result = [] - for idx in range(args.net_out_num): - if idx == 1: - result.append(np.fromfile("%s%s_%d.bin" % (args.bin_data_path, file_name, idx + 1), dtype=np.int32)) - else: - result.append(np.fromfile("%s%s_%d.bin" % (args.bin_data_path, file_name, idx + 1), dtype=np.float32)) - result[0].shape = (100, args.model_input_height // 4, args.model_input_width // 4) - result[0] = handle_seg(result[0], (img_h, img_w), (ori_h, ori_w), - (args.model_input_height, args.model_input_width)) - result = get_masks([result], num_classes) - results.append(result) - fp.close() - result_files = results2json_segm(dataset, results, "results_solo.pkl") - coco_eval(result_files, ["segm"], dataset.coco) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mmcv +import numpy as np +import argparse +import torch +import torch.nn.functional as F +import pycocotools.mask as mask_util +from mmdet.core import coco_eval, results2json, results2json_segm +from mmdet.datasets import build_dataset + +ann_file = '/annotations/instances_val2017.json' +img_prefix = '/val2017/' + + +def get_masks(result, num_classes=80): + for cur_result in result: + masks = [[] for _ in range(num_classes)] + if cur_result is None: + return masks + seg_pred = cur_result[0].astype(np.uint8) + cate_label = cur_result[1].astype(np.int) + cate_score = cur_result[2].astype(np.float) + num_ins = seg_pred.shape[0] + for idx in range(num_ins): + cur_mask = seg_pred[idx, ...] + rle = mask_util.encode( + np.array(cur_mask[:, :, np.newaxis], order='F'))[0] + rst = (rle, cate_score[idx]) + masks[cate_label[idx]].append(rst) + return masks + + +def handle_seg(seg, img_shape, ori_shape, input_shape=(800, 1216), mask_thr=0.5): + seg = torch.tensor(seg) + h, w, = img_shape + pad_left = (input_shape[1] - w) // 2 + pad_top = (input_shape[0] - h) // 2 + seg = F.interpolate(seg.unsqueeze(0), + size=input_shape, + mode='bilinear')[:, :, pad_top:pad_top + h, pad_left:pad_left + w] + + seg = F.interpolate(seg, + size=ori_shape[:2], + mode='bilinear').squeeze(0) + seg = seg > mask_thr + return seg.numpy() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--dataset_path') + parser.add_argument('--model_config') + parser.add_argument('--bin_data_path') + parser.add_argument('--meta_info') + parser.add_argument('--net_out_num', type=int) + parser.add_argument("--model_input_height", type=int, help='input tensor height') + parser.add_argument("--model_input_width", type=int, help='input tensor width') + + args = parser.parse_args() + + cfg = mmcv.Config.fromfile(args.model_config) + cfg.data.test.test_mode = True + cfg.data.test.ann_file = args.dataset_path + ann_file + cfg.data.test.img_prefix = args.dataset_path + img_prefix + dataset = build_dataset(cfg.data.test) + num_classes = len(dataset.CLASSES) + + results = [] + + fp = open(args.meta_info, "r") + for line in fp.readlines(): + _, file_path, img_w, img_h, ori_w, ori_h = line.split() + img_w = int(img_w) + img_h = int(img_h) + ori_w = int(ori_w) + ori_h = int(ori_h) + file_name = file_path.split("/")[1].replace(".bin", "") + result = [] + for idx in range(args.net_out_num): + if idx == 1: + result.append(np.fromfile("%s%s_%d.bin" % (args.bin_data_path, file_name, idx + 1), dtype=np.int32)) + else: + result.append(np.fromfile("%s%s_%d.bin" % (args.bin_data_path, file_name, idx + 1), dtype=np.float32)) + result[0].shape = (100, args.model_input_height // 4, args.model_input_width // 4) + result[0] = handle_seg(result[0], (img_h, img_w), (ori_h, ori_w), + (args.model_input_height, args.model_input_width)) + result = get_masks([result], num_classes) + results.append(result) + fp.close() + result_files = results2json_segm(dataset, results, "results_solo.pkl") + coco_eval(result_files, ["segm"], dataset.coco) diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/solov2_preprocess.py b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/solov2_preprocess.py index d82a93d367ef72200a1b0726b5c28a95843647aa..448b1cc203d63920dedd8ddcc28a1d264c45d95d 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/solov2_preprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/solov2_preprocess.py @@ -1,86 +1,86 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import argparse -import numpy as np -import cv2 -import mmcv -import torch -import pickle as pk -import multiprocessing - -flags = None - -def resize(img, size): - old_h = img.shape[0] - old_w = img.shape[1] - scale_ratio = min(size[0] / old_w, size[1] / old_h) - new_w = int(np.floor(old_w * scale_ratio)) - new_h = int(np.floor(old_h * scale_ratio)) - resized_img = mmcv.imresize(img, (new_w, new_h)) - return resized_img, scale_ratio - - -def gen_input_bin(file_batches, batch): - i = 0 - for file in file_batches[batch]: - i = i + 1 - print("batch", batch, file, "===", i) - - image = mmcv.imread(os.path.join(flags.image_src_path, file)) - ori_shape = image.shape - image, scale_factor = resize(image, (flags.model_input_width, flags.model_input_height)) - img_shape = image.shape - mean = np.array([123.675, 116.28, 103.53], dtype=np.float32) - std = np.array([58.395, 57.12, 57.375], dtype=np.float32) - image = mmcv.imnormalize(image, mean, std) - h = image.shape[0] - w = image.shape[1] - pad_left = (flags.model_input_width - w) // 2 - pad_top = (flags.model_input_height - h) // 2 - pad_right = flags.model_input_width - pad_left - w - pad_bottom = flags.model_input_height - pad_top - h - image = cv2.copyMakeBorder(image, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=0) - image = image.transpose(2, 0, 1) - image.tofile(os.path.join(flags.bin_file_path, file.split('.')[0] + ".bin")) - image_meta = {'img_shape': img_shape, 'scale_factor': scale_factor, 'ori_shape': ori_shape} - with open(os.path.join(flags.meta_file_path, file.split('.')[0] + ".pk"), "wb") as fp: - pk.dump(image_meta, fp) - - -def preprocess(): - files = os.listdir(flags.image_src_path) - file_batches = [files[i:i + 100] for i in range(0, 5000, 100) if files[i:i + 100] != []] - thread_pool = multiprocessing.Pool(len(file_batches)) - for batch in range(len(file_batches)): - thread_pool.apply_async(gen_input_bin, args=(file_batches, batch)) - thread_pool.close() - thread_pool.join() - print("in thread, except will not report! please ensure bin files generated.") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='preprocess of MaskRCNN PyTorch model') - parser.add_argument("--image_src_path", default="/root/datasets/coco/val2017", help='image of dataset') - parser.add_argument("--bin_file_path", default="val2017_bin", help='Preprocessed image buffer') - parser.add_argument("--meta_file_path", default="val2017_bin_meta", help='Get image meta') - parser.add_argument("--model_input_height", default=800, type=int, help='input tensor height') - parser.add_argument("--model_input_width", default=1216, type=int, help='input tensor width') - flags = parser.parse_args() - if not os.path.exists(flags.bin_file_path): - os.makedirs(flags.bin_file_path) - if not os.path.exists(flags.meta_file_path): - os.makedirs(flags.meta_file_path) - preprocess() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import numpy as np +import cv2 +import mmcv +import torch +import pickle as pk +import multiprocessing + +flags = None + +def resize(img, size): + old_h = img.shape[0] + old_w = img.shape[1] + scale_ratio = min(size[0] / old_w, size[1] / old_h) + new_w = int(np.floor(old_w * scale_ratio)) + new_h = int(np.floor(old_h * scale_ratio)) + resized_img = mmcv.imresize(img, (new_w, new_h)) + return resized_img, scale_ratio + + +def gen_input_bin(file_batches, batch): + i = 0 + for file in file_batches[batch]: + i = i + 1 + print("batch", batch, file, "===", i) + + image = mmcv.imread(os.path.join(flags.image_src_path, file)) + ori_shape = image.shape + image, scale_factor = resize(image, (flags.model_input_width, flags.model_input_height)) + img_shape = image.shape + mean = np.array([123.675, 116.28, 103.53], dtype=np.float32) + std = np.array([58.395, 57.12, 57.375], dtype=np.float32) + image = mmcv.imnormalize(image, mean, std) + h = image.shape[0] + w = image.shape[1] + pad_left = (flags.model_input_width - w) // 2 + pad_top = (flags.model_input_height - h) // 2 + pad_right = flags.model_input_width - pad_left - w + pad_bottom = flags.model_input_height - pad_top - h + image = cv2.copyMakeBorder(image, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=0) + image = image.transpose(2, 0, 1) + image.tofile(os.path.join(flags.bin_file_path, file.split('.')[0] + ".bin")) + image_meta = {'img_shape': img_shape, 'scale_factor': scale_factor, 'ori_shape': ori_shape} + with open(os.path.join(flags.meta_file_path, file.split('.')[0] + ".pk"), "wb") as fp: + pk.dump(image_meta, fp) + + +def preprocess(): + files = os.listdir(flags.image_src_path) + file_batches = [files[i:i + 100] for i in range(0, 5000, 100) if files[i:i + 100] != []] + thread_pool = multiprocessing.Pool(len(file_batches)) + for batch in range(len(file_batches)): + thread_pool.apply_async(gen_input_bin, args=(file_batches, batch)) + thread_pool.close() + thread_pool.join() + print("in thread, except will not report! please ensure bin files generated.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='preprocess of MaskRCNN PyTorch model') + parser.add_argument("--image_src_path", default="/root/datasets/coco/val2017", help='image of dataset') + parser.add_argument("--bin_file_path", default="val2017_bin", help='Preprocessed image buffer') + parser.add_argument("--meta_file_path", default="val2017_bin_meta", help='Get image meta') + parser.add_argument("--model_input_height", default=800, type=int, help='input tensor height') + parser.add_argument("--model_input_width", default=1216, type=int, help='input tensor width') + flags = parser.parse_args() + if not os.path.exists(flags.bin_file_path): + os.makedirs(flags.bin_file_path) + if not os.path.exists(flags.meta_file_path): + os.makedirs(flags.meta_file_path) + preprocess() diff --git a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/test/parse.py b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/test/parse.py index b9c74f41d7848e1250356f14472b237a18bb3489..82af69cd183218c3263723c20b652b3f7ec2bc27 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/test/parse.py +++ b/ACL_PyTorch/contrib/cv/segmentation/SOLOV2/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/SiamMask/LICENSE b/ACL_PyTorch/contrib/cv/segmentation/SiamMask/LICENSE index 29f81d812f3e768fa89638d1f72920dbfd1413a8..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/SiamMask/LICENSE +++ b/ACL_PyTorch/contrib/cv/segmentation/SiamMask/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ACL_PyTorch/contrib/cv/segmentation/Ultra-Fast-Lane-Detection/LICENSE b/ACL_PyTorch/contrib/cv/segmentation/Ultra-Fast-Lane-Detection/LICENSE index 29f81d812f3e768fa89638d1f72920dbfd1413a8..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/Ultra-Fast-Lane-Detection/LICENSE +++ b/ACL_PyTorch/contrib/cv/segmentation/Ultra-Fast-Lane-Detection/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ACL_PyTorch/contrib/cv/segmentation/Ultra-Fast-Lane-Detection/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/segmentation/Ultra-Fast-Lane-Detection/modelzoo_level.txt index 51b74557c15082ae794632436e724456a0fdcfde..5c956b09db3bcabe37a0665848500a6419125fee 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/Ultra-Fast-Lane-Detection/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/Ultra-Fast-Lane-Detection/modelzoo_level.txt @@ -1,6 +1,6 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK -PerfStatus:OK -ModelConvert:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK +PerfStatus:OK +ModelConvert:OK QuantStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/VNet/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/segmentation/VNet/modelzoo_level.txt index 403465b84e39e2cc8a387c33aaf5a1043f8d267a..ec6168981c278bbe672c13a4eb251b6ec184eda4 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/VNet/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/VNet/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:Perfect \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/Wseg/README.md b/ACL_PyTorch/contrib/cv/segmentation/Wseg/README.md index 7da45005704981967d9c71238d7c06f65aa10ed2..61fd261750b960035e0be788d46c0cafb4579b38 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/Wseg/README.md +++ b/ACL_PyTorch/contrib/cv/segmentation/Wseg/README.md @@ -1,66 +1,66 @@ -# Wseg模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,并且安装以下可能已经安装过的需求包 -``` -pip3.7 install -r requirements.txt -``` - -2.获取开源模型的代码,并修改文件夹名称为wseg -``` -git clone https://github.com/visinf/1-stage-wseg -b master -git reset cfe5784f9905d656e0f15fba0e6eb76a3731d80f --hard -mv 1-stage-wseg wseg -``` - -3.获取权重文件 -1. 获取经过预训练的基础网络权重文件并且放在代码仓的以下路径中:`/models/weights/`. - - | Backbone | Initial Weights | - |:---:|:---:| - | WideResNet38 | [ilsvrc-cls_rna-a1_cls1000_ep-0001.pth (402M)](https://download.visinf.tu-darmstadt.de/data/2020-cvpr-araslanov-1-stage-wseg/models/ilsvrc-cls_rna-a1_cls1000_ep-0001.pth) | - -2. 获取功能网络权重(作者提供的pth模型)并放置于代码仓的以下路径中:(初始代码仓无snapshots文件夹,需要自己新建路径)`/snapshots/` - - | Backbone | Val | Link | - |:---:|:---:|---:| - | WideResNet38 | 62.7 | [model_enc_e020Xs0.928.pth (527M)](https://download.visinf.tu-darmstadt.de/data/2020-cvpr-araslanov-1-stage-wseg/models/model_enc_e020Xs0.928.pth) | - -3. 移动上述两个权重文件到代码仓指定位置,以待加载使用 -``` -mkdir ./models/weights -mv ilsvrc-cls_rna-a1_cls1000_ep-0001.pth ./models/weights -mkdir ./snapshots -mv model_enc_e020Xs0.928.pth ./snapshots -``` - -4.下载数据集,解压,将文件名改为voc,并将其放于代码仓中的以下路径: `/data/` -- VOC: [Training/Validation (2GB .tar file)](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar) - -``` -tar -zxvf VOCtrainval_11-May-2012.tar -mv VOCtrainval_11-May-2012 voc -mkdir ./data -mv voc ./data -``` - -5.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) -将benchmark.x86_64或benchmark.aarch64放到当前目录 - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=./data -``` - **评测结果:** - -| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| :------: | :------: | :------: | :------: | :------: | -| WideResNet38 bs1 | [IOU: 62.7](https://github.com/visinf/1-stage-wseg) | IOU:63.7 | 5.270fps | 3.496fps | -| WideResNet38 bs4 | [IOU: 62.7](https://github.com/visinf/1-stage-wseg) | IOU:63.7 | 5.460fps | 3.912fps | - - **备注:** +# Wseg模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,并且安装以下可能已经安装过的需求包 +``` +pip3.7 install -r requirements.txt +``` + +2.获取开源模型的代码,并修改文件夹名称为wseg +``` +git clone https://github.com/visinf/1-stage-wseg -b master +git reset cfe5784f9905d656e0f15fba0e6eb76a3731d80f --hard +mv 1-stage-wseg wseg +``` + +3.获取权重文件 +1. 获取经过预训练的基础网络权重文件并且放在代码仓的以下路径中:`/models/weights/`. + + | Backbone | Initial Weights | + |:---:|:---:| + | WideResNet38 | [ilsvrc-cls_rna-a1_cls1000_ep-0001.pth (402M)](https://download.visinf.tu-darmstadt.de/data/2020-cvpr-araslanov-1-stage-wseg/models/ilsvrc-cls_rna-a1_cls1000_ep-0001.pth) | + +2. 获取功能网络权重(作者提供的pth模型)并放置于代码仓的以下路径中:(初始代码仓无snapshots文件夹,需要自己新建路径)`/snapshots/` + + | Backbone | Val | Link | + |:---:|:---:|---:| + | WideResNet38 | 62.7 | [model_enc_e020Xs0.928.pth (527M)](https://download.visinf.tu-darmstadt.de/data/2020-cvpr-araslanov-1-stage-wseg/models/model_enc_e020Xs0.928.pth) | + +3. 移动上述两个权重文件到代码仓指定位置,以待加载使用 +``` +mkdir ./models/weights +mv ilsvrc-cls_rna-a1_cls1000_ep-0001.pth ./models/weights +mkdir ./snapshots +mv model_enc_e020Xs0.928.pth ./snapshots +``` + +4.下载数据集,解压,将文件名改为voc,并将其放于代码仓中的以下路径: `/data/` +- VOC: [Training/Validation (2GB .tar file)](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar) + +``` +tar -zxvf VOCtrainval_11-May-2012.tar +mv VOCtrainval_11-May-2012 voc +mkdir ./data +mv voc ./data +``` + +5.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) +将benchmark.x86_64或benchmark.aarch64放到当前目录 + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=./data +``` + **评测结果:** + +| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| :------: | :------: | :------: | :------: | :------: | +| WideResNet38 bs1 | [IOU: 62.7](https://github.com/visinf/1-stage-wseg) | IOU:63.7 | 5.270fps | 3.496fps | +| WideResNet38 bs4 | [IOU: 62.7](https://github.com/visinf/1-stage-wseg) | IOU:63.7 | 5.460fps | 3.912fps | + + **备注:** - 由于分辨率大内存使用多,故仅用bs1与bs4进行评测 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/Wseg/Wseg_postprocess.py b/ACL_PyTorch/contrib/cv/segmentation/Wseg/Wseg_postprocess.py index 6a70b18b443c5a4b52e22389e6e3377e00fea85d..6a4864b8ef07928434bcb941d73138cc8f807fd5 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/Wseg/Wseg_postprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/Wseg/Wseg_postprocess.py @@ -1,375 +1,375 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import numpy as np -import torch -import torchvision.transforms.functional as F -import torchvision.transforms as tf -import pydensecrf.densecrf as dcrf -import torch.nn.functional as Func -from PIL import Image, ImagePalette -from pydensecrf.utils import unary_from_softmax - -class Normalize(): - def __init__(self, mean = (0.485, 0.456, 0.406), std = (0.229, 0.224, 0.225)): - - self.mean = mean - self.std = std - - def undo(self, imgarr): - proc_img = imgarr.copy() - - proc_img[..., 0] = (self.std[0] * imgarr[..., 0] + self.mean[0]) * 255. - proc_img[..., 1] = (self.std[1] * imgarr[..., 1] + self.mean[1]) * 255. - proc_img[..., 2] = (self.std[2] * imgarr[..., 2] + self.mean[2]) * 255. - - return proc_img - - def __call__(self, img): - imgarr = np.asarray(img) - proc_img = np.empty_like(imgarr, np.float32) - - proc_img[..., 0] = (imgarr[..., 0] / 255. - self.mean[0]) / self.std[0] - proc_img[..., 1] = (imgarr[..., 1] / 255. - self.mean[1]) / self.std[1] - proc_img[..., 2] = (imgarr[..., 2] / 255. - self.mean[2]) / self.std[2] - - return proc_img - -def colormap(N=256): - def bitget(byteval, idx): - return ((byteval & (1 << idx)) != 0) - - dtype = 'uint8' - cmap = [] - for i in range(N): - r = g = b = 0 - c = i - for j in range(8): - r = r | (bitget(c, 0) << 7-j) - g = g | (bitget(c, 1) << 7-j) - b = b | (bitget(c, 2) << 7-j) - c = c >> 3 - - cmap.append((r, g, b)) - return cmap - -def get_palette(): - cmap = colormap() - palette = ImagePalette.ImagePalette() - for rgb in cmap: - palette.getcolor(rgb) - return palette - -def crf_inference(img, probs, t=10, scale_factor=1, labels=21): - - h, w = img.shape[:2] - n_labels = labels - - d = dcrf.DenseCRF2D(w, h, n_labels) - - unary = unary_from_softmax(probs) - unary = np.ascontiguousarray(unary) - - d.setUnaryEnergy(unary) - d.addPairwiseGaussian(sxy=3/scale_factor, compat=3) - d.addPairwiseBilateral(sxy=80/scale_factor, srgb=13, rgbim=np.copy(img), compat=10) - Q = d.inference(t) - - return np.array(Q).reshape((n_labels, h, w)) - -def _cut(x_chw, pads): - pad_h, pad_w, h, w = [int(p) for p in pads] - return x_chw[:, pad_h:(pad_h + h), pad_w:(pad_w + w)] - -def _mask_overlay(mask, image, alpha=0.3): - - mask_rgb = __mask2rgb(mask) - return alpha * image + (1 - alpha) * mask_rgb - -def __mask2rgb(mask): - im = Image.fromarray(mask).convert("P") - im.putpalette(get_palette()) - mask_rgb = np.array(im.convert("RGB"), dtype=np.float) - return mask_rgb / 255. - -def _merge_masks(masks, labels, pads, imsize_hw): - - mask_list = [] - for i, mask in enumerate(masks.split(1, dim=0)): - - # removing the padding - mask_cut = _cut(mask[0], pads[i]).unsqueeze(0) - # normalising the scale - mask_cut = Func.interpolate(mask_cut, imsize_hw, mode='bilinear', align_corners=False)[0] - - # flipping if necessary - if i % 2 == 1: - mask_cut = torch.flip(mask_cut, (-1, )) - - # getting the max response - mask_cut[1:, ::] *= labels[:, None, None] - mask_list.append(mask_cut) - - mean_mask = sum(mask_list).numpy() / len(mask_list) - - # discounting BG - mean_mask[0, ::] = np.power(mean_mask[0, ::], 3) - - return mean_mask - -def save(out_path, img_path, img_orig, all_masks, labels, pads, gt_mask): - - img_name = os.path.basename(img_path).rstrip(".jpg") - - # converting original image to [0, 255] - img_orig255 = np.round(255. * img_orig).astype(np.uint8) - img_orig255 = np.transpose(img_orig255, [1, 2, 0]) - img_orig255 = np.ascontiguousarray(img_orig255) - - merged_mask = _merge_masks(all_masks, pads, labels, img_orig255.shape[:2]) - pred = np.argmax(merged_mask, 0) - - # CRF - pred_crf = crf_inference(img_orig255, merged_mask, t=10, scale_factor=1, labels=21) - pred_crf = np.argmax(pred_crf, 0) - - filepath = os.path.join(out_path, img_name + '.png') - img_pred = Image.fromarray(pred.astype(np.uint8)) - img_pred.save(filepath) - - filepath = os.path.join(out_path, "crf", img_name + '.png') - img_pred_crf = Image.fromarray(pred_crf.astype(np.uint8)) - img_pred_crf.save(filepath) - mask_gt = gt_mask - masks_all = np.concatenate([pred, pred_crf, mask_gt], 1).astype(np.uint8) - images = np.concatenate([img_orig] * 3, 2) - images = np.transpose(images, [1, 2, 0]) - - overlay = _mask_overlay(masks_all, images) - filepath = os.path.join(out_path, "vis", img_name + '.png') - overlay255 = np.round(overlay * 255.).astype(np.uint8) - overlay255_crf = Image.fromarray(overlay255) - overlay255_crf.save(filepath) - -def load_img_name_list(dataset_path, index=0): - img_gt_name_list = open(dataset_path).read().splitlines() - img_name_list = [img_gt_name.split(' ')[index].strip('/') for img_gt_name in img_gt_name_list] - - return img_name_list - -def load_label_name_list(dataset_path): - return load_img_name_list(dataset_path, index=1) - -def pad(image,pad_size): - w, h = image.size - - pad_height = pad_size[0] - h - pad_width = pad_size[1] - w - - assert pad_height >= 0 and pad_width >= 0 - - pad_l = max(0, pad_width // 2) - pad_t = max(0, pad_height // 2) - - return [pad_t, pad_l] - -def imgread(imgpath): - fullpath = os.path.join(imgpath) - img = Image.open(fullpath).convert("RGB") - return fullpath, img - -def getitem(img_path,labelpath): - - name, img = imgread(img_path) - - # label_fullpath = self.label_list[idx] - assert len(labelpath) < 256, "Expected label path less than 256 for padding" - - mask = Image.open(labelpath) - mask = np.array(mask) - NUM_CLASS = 21 - labels = torch.zeros(NUM_CLASS - 1) - - # it will also be sorted - unique_labels = np.unique(mask) - - # ambigious - if unique_labels[-1] == CLASS_IDX['ambiguous']: - unique_labels = unique_labels[:-1] - - # background - if unique_labels[0] == CLASS_IDX['background']: - unique_labels = unique_labels[1:] - - assert unique_labels.size > 0, 'No labels found ' - unique_labels -= 1 # shifting since no BG class - labels[unique_labels.tolist()] = 1 - - return name, img, labels, mask.astype(np.int) - -def get_one_image(img_path,label_path): - - transform = tf.Compose([np.asarray, - Normalize()]) - pad_size = [1024, 1024] - scales = [1, 0.5, 1.5, 2.0] - batch_size = 8 - use_flips = True - - pad_batch = [] - - for i in range(batch_size): - - sub_idx = i % batch_size - scale = scales[sub_idx // (2 if use_flips else 1)] - flip = use_flips and sub_idx % 2 - - name, img, label, mask = getitem(img_path, label_path) - - target_size = (int(round(img.size[0] * scale)), - int(round(img.size[1] * scale))) - - s_img = img.resize(target_size, resample=Image.CUBIC) - - if flip: - s_img = F.hflip(s_img) - - w, h = s_img.size - pads_tl = pad(s_img,pad_size) - pad_t, pad_l = pads_tl - img = F.to_tensor(transform(img)) - pads = torch.Tensor([pad_t, pad_l, h, w]) - pad_batch.append(pads) - - return name, img, pad_batch, label, mask - -def check_dir(base_path, name): - - # create the directory - fullpath = os.path.join(base_path, name) - if not os.path.exists(fullpath): - os.makedirs(fullpath) - - return fullpath - -def bintonp(name,bin_path): - mask = [] - cls = [] - for i in range(8): - msk_name = bin_path + '/' + str(name) + '_' + str(i) + "_1.bin" - cls_name = bin_path + '/' + str(name) + '_' + str(i) + "_2.bin" - mask_i = np.fromfile(msk_name, dtype=np.float32) - mask_i.shape = 21,1024,1024 - cls_i = np.fromfile(cls_name, dtype=np.float32) - cls_i.shape = 20 - cls.append(cls_i) - mask.append(mask_i) - msk = np.array(mask) - clss = np.array(cls) - - return clss, msk - -def denorm(image): - - MEAN = (0.485, 0.456, 0.406) - STD = (0.229, 0.224, 0.225) - - if image.dim() == 3: - assert image.dim() == 3, "Expected image [CxHxW]" - assert image.size(0) == 3, "Expected RGB image [3xHxW]" - - for t, m, s in zip(image, MEAN, STD): - t.mul_(s).add_(m) - elif image.dim() == 4: - # batch mode - assert image.size(1) == 3, "Expected RGB image [3xHxW]" - - for t, m, s in zip((0,1,2), MEAN, STD): - image[:, t, :, :].mul_(s).add_(m) - - return image - -def postprocess(file_path, voc12_root,out_path, bin_path): - - img_name_list = load_img_name_list(file_path) - label_name_list = load_label_name_list(file_path) - - print("Start postprocess!") - print("total image number: ",len(img_name_list)) - - for i in range(len(img_name_list)): - - imgnm = img_name_list[i][33:-4] - print("==========> ", i, " ", imgnm) - img_path = voc12_root + '/' + img_name_list[i] - label_path = voc12_root + '/' + label_name_list[i] - print(img_path) - name, img, pad_batch, labels, gt_mask = get_one_image(img_path,label_path) - - with torch.no_grad(): - cls_raw, masks_pred = bintonp(imgnm,bin_path) - masks_pred = torch.from_numpy(masks_pred) - cls_raw = torch.from_numpy(cls_raw) - - cls_sigmoid = torch.sigmoid(cls_raw) - cls_sigmoid, _ = cls_sigmoid.max(0) - labels = (cls_sigmoid > 0.1) - - # saving the raw npy - image = denorm(img).numpy() - masks_pred = masks_pred.cpu() - labels = labels.type_as(masks_pred) - - save(out_path, name, image, masks_pred, pad_batch, labels, gt_mask) - - -if __name__ == '__main__': - - CLASS_IDX = { - 'background': 0, - 'aeroplane': 1, - 'bicycle': 2, - 'bird': 3, - 'boat': 4, - 'bottle': 5, - 'bus': 6, - 'car': 7, - 'cat': 8, - 'chair': 9, - 'cow': 10, - 'diningtable': 11, - 'dog': 12, - 'horse': 13, - 'motorbike': 14, - 'person': 15, - 'potted-plant': 16, - 'sheep': 17, - 'sofa': 18, - 'train': 19, - 'tv/monitor': 20, - 'ambiguous': 255 - } - - voc12_root_path = os.path.abspath(sys.argv[1]) - file_path = os.path.abspath(sys.argv[2]) - bin_path = os.path.abspath(sys.argv[3]) - out_path = os.path.abspath(sys.argv[4]) - - - check_dir(out_path, "vis") - check_dir(out_path, "crf") - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import torch +import torchvision.transforms.functional as F +import torchvision.transforms as tf +import pydensecrf.densecrf as dcrf +import torch.nn.functional as Func +from PIL import Image, ImagePalette +from pydensecrf.utils import unary_from_softmax + +class Normalize(): + def __init__(self, mean = (0.485, 0.456, 0.406), std = (0.229, 0.224, 0.225)): + + self.mean = mean + self.std = std + + def undo(self, imgarr): + proc_img = imgarr.copy() + + proc_img[..., 0] = (self.std[0] * imgarr[..., 0] + self.mean[0]) * 255. + proc_img[..., 1] = (self.std[1] * imgarr[..., 1] + self.mean[1]) * 255. + proc_img[..., 2] = (self.std[2] * imgarr[..., 2] + self.mean[2]) * 255. + + return proc_img + + def __call__(self, img): + imgarr = np.asarray(img) + proc_img = np.empty_like(imgarr, np.float32) + + proc_img[..., 0] = (imgarr[..., 0] / 255. - self.mean[0]) / self.std[0] + proc_img[..., 1] = (imgarr[..., 1] / 255. - self.mean[1]) / self.std[1] + proc_img[..., 2] = (imgarr[..., 2] / 255. - self.mean[2]) / self.std[2] + + return proc_img + +def colormap(N=256): + def bitget(byteval, idx): + return ((byteval & (1 << idx)) != 0) + + dtype = 'uint8' + cmap = [] + for i in range(N): + r = g = b = 0 + c = i + for j in range(8): + r = r | (bitget(c, 0) << 7-j) + g = g | (bitget(c, 1) << 7-j) + b = b | (bitget(c, 2) << 7-j) + c = c >> 3 + + cmap.append((r, g, b)) + return cmap + +def get_palette(): + cmap = colormap() + palette = ImagePalette.ImagePalette() + for rgb in cmap: + palette.getcolor(rgb) + return palette + +def crf_inference(img, probs, t=10, scale_factor=1, labels=21): + + h, w = img.shape[:2] + n_labels = labels + + d = dcrf.DenseCRF2D(w, h, n_labels) + + unary = unary_from_softmax(probs) + unary = np.ascontiguousarray(unary) + + d.setUnaryEnergy(unary) + d.addPairwiseGaussian(sxy=3/scale_factor, compat=3) + d.addPairwiseBilateral(sxy=80/scale_factor, srgb=13, rgbim=np.copy(img), compat=10) + Q = d.inference(t) + + return np.array(Q).reshape((n_labels, h, w)) + +def _cut(x_chw, pads): + pad_h, pad_w, h, w = [int(p) for p in pads] + return x_chw[:, pad_h:(pad_h + h), pad_w:(pad_w + w)] + +def _mask_overlay(mask, image, alpha=0.3): + + mask_rgb = __mask2rgb(mask) + return alpha * image + (1 - alpha) * mask_rgb + +def __mask2rgb(mask): + im = Image.fromarray(mask).convert("P") + im.putpalette(get_palette()) + mask_rgb = np.array(im.convert("RGB"), dtype=np.float) + return mask_rgb / 255. + +def _merge_masks(masks, labels, pads, imsize_hw): + + mask_list = [] + for i, mask in enumerate(masks.split(1, dim=0)): + + # removing the padding + mask_cut = _cut(mask[0], pads[i]).unsqueeze(0) + # normalising the scale + mask_cut = Func.interpolate(mask_cut, imsize_hw, mode='bilinear', align_corners=False)[0] + + # flipping if necessary + if i % 2 == 1: + mask_cut = torch.flip(mask_cut, (-1, )) + + # getting the max response + mask_cut[1:, ::] *= labels[:, None, None] + mask_list.append(mask_cut) + + mean_mask = sum(mask_list).numpy() / len(mask_list) + + # discounting BG + mean_mask[0, ::] = np.power(mean_mask[0, ::], 3) + + return mean_mask + +def save(out_path, img_path, img_orig, all_masks, labels, pads, gt_mask): + + img_name = os.path.basename(img_path).rstrip(".jpg") + + # converting original image to [0, 255] + img_orig255 = np.round(255. * img_orig).astype(np.uint8) + img_orig255 = np.transpose(img_orig255, [1, 2, 0]) + img_orig255 = np.ascontiguousarray(img_orig255) + + merged_mask = _merge_masks(all_masks, pads, labels, img_orig255.shape[:2]) + pred = np.argmax(merged_mask, 0) + + # CRF + pred_crf = crf_inference(img_orig255, merged_mask, t=10, scale_factor=1, labels=21) + pred_crf = np.argmax(pred_crf, 0) + + filepath = os.path.join(out_path, img_name + '.png') + img_pred = Image.fromarray(pred.astype(np.uint8)) + img_pred.save(filepath) + + filepath = os.path.join(out_path, "crf", img_name + '.png') + img_pred_crf = Image.fromarray(pred_crf.astype(np.uint8)) + img_pred_crf.save(filepath) + mask_gt = gt_mask + masks_all = np.concatenate([pred, pred_crf, mask_gt], 1).astype(np.uint8) + images = np.concatenate([img_orig] * 3, 2) + images = np.transpose(images, [1, 2, 0]) + + overlay = _mask_overlay(masks_all, images) + filepath = os.path.join(out_path, "vis", img_name + '.png') + overlay255 = np.round(overlay * 255.).astype(np.uint8) + overlay255_crf = Image.fromarray(overlay255) + overlay255_crf.save(filepath) + +def load_img_name_list(dataset_path, index=0): + img_gt_name_list = open(dataset_path).read().splitlines() + img_name_list = [img_gt_name.split(' ')[index].strip('/') for img_gt_name in img_gt_name_list] + + return img_name_list + +def load_label_name_list(dataset_path): + return load_img_name_list(dataset_path, index=1) + +def pad(image,pad_size): + w, h = image.size + + pad_height = pad_size[0] - h + pad_width = pad_size[1] - w + + assert pad_height >= 0 and pad_width >= 0 + + pad_l = max(0, pad_width // 2) + pad_t = max(0, pad_height // 2) + + return [pad_t, pad_l] + +def imgread(imgpath): + fullpath = os.path.join(imgpath) + img = Image.open(fullpath).convert("RGB") + return fullpath, img + +def getitem(img_path,labelpath): + + name, img = imgread(img_path) + + # label_fullpath = self.label_list[idx] + assert len(labelpath) < 256, "Expected label path less than 256 for padding" + + mask = Image.open(labelpath) + mask = np.array(mask) + NUM_CLASS = 21 + labels = torch.zeros(NUM_CLASS - 1) + + # it will also be sorted + unique_labels = np.unique(mask) + + # ambigious + if unique_labels[-1] == CLASS_IDX['ambiguous']: + unique_labels = unique_labels[:-1] + + # background + if unique_labels[0] == CLASS_IDX['background']: + unique_labels = unique_labels[1:] + + assert unique_labels.size > 0, 'No labels found ' + unique_labels -= 1 # shifting since no BG class + labels[unique_labels.tolist()] = 1 + + return name, img, labels, mask.astype(np.int) + +def get_one_image(img_path,label_path): + + transform = tf.Compose([np.asarray, + Normalize()]) + pad_size = [1024, 1024] + scales = [1, 0.5, 1.5, 2.0] + batch_size = 8 + use_flips = True + + pad_batch = [] + + for i in range(batch_size): + + sub_idx = i % batch_size + scale = scales[sub_idx // (2 if use_flips else 1)] + flip = use_flips and sub_idx % 2 + + name, img, label, mask = getitem(img_path, label_path) + + target_size = (int(round(img.size[0] * scale)), + int(round(img.size[1] * scale))) + + s_img = img.resize(target_size, resample=Image.CUBIC) + + if flip: + s_img = F.hflip(s_img) + + w, h = s_img.size + pads_tl = pad(s_img,pad_size) + pad_t, pad_l = pads_tl + img = F.to_tensor(transform(img)) + pads = torch.Tensor([pad_t, pad_l, h, w]) + pad_batch.append(pads) + + return name, img, pad_batch, label, mask + +def check_dir(base_path, name): + + # create the directory + fullpath = os.path.join(base_path, name) + if not os.path.exists(fullpath): + os.makedirs(fullpath) + + return fullpath + +def bintonp(name,bin_path): + mask = [] + cls = [] + for i in range(8): + msk_name = bin_path + '/' + str(name) + '_' + str(i) + "_1.bin" + cls_name = bin_path + '/' + str(name) + '_' + str(i) + "_2.bin" + mask_i = np.fromfile(msk_name, dtype=np.float32) + mask_i.shape = 21,1024,1024 + cls_i = np.fromfile(cls_name, dtype=np.float32) + cls_i.shape = 20 + cls.append(cls_i) + mask.append(mask_i) + msk = np.array(mask) + clss = np.array(cls) + + return clss, msk + +def denorm(image): + + MEAN = (0.485, 0.456, 0.406) + STD = (0.229, 0.224, 0.225) + + if image.dim() == 3: + assert image.dim() == 3, "Expected image [CxHxW]" + assert image.size(0) == 3, "Expected RGB image [3xHxW]" + + for t, m, s in zip(image, MEAN, STD): + t.mul_(s).add_(m) + elif image.dim() == 4: + # batch mode + assert image.size(1) == 3, "Expected RGB image [3xHxW]" + + for t, m, s in zip((0,1,2), MEAN, STD): + image[:, t, :, :].mul_(s).add_(m) + + return image + +def postprocess(file_path, voc12_root,out_path, bin_path): + + img_name_list = load_img_name_list(file_path) + label_name_list = load_label_name_list(file_path) + + print("Start postprocess!") + print("total image number: ",len(img_name_list)) + + for i in range(len(img_name_list)): + + imgnm = img_name_list[i][33:-4] + print("==========> ", i, " ", imgnm) + img_path = voc12_root + '/' + img_name_list[i] + label_path = voc12_root + '/' + label_name_list[i] + print(img_path) + name, img, pad_batch, labels, gt_mask = get_one_image(img_path,label_path) + + with torch.no_grad(): + cls_raw, masks_pred = bintonp(imgnm,bin_path) + masks_pred = torch.from_numpy(masks_pred) + cls_raw = torch.from_numpy(cls_raw) + + cls_sigmoid = torch.sigmoid(cls_raw) + cls_sigmoid, _ = cls_sigmoid.max(0) + labels = (cls_sigmoid > 0.1) + + # saving the raw npy + image = denorm(img).numpy() + masks_pred = masks_pred.cpu() + labels = labels.type_as(masks_pred) + + save(out_path, name, image, masks_pred, pad_batch, labels, gt_mask) + + +if __name__ == '__main__': + + CLASS_IDX = { + 'background': 0, + 'aeroplane': 1, + 'bicycle': 2, + 'bird': 3, + 'boat': 4, + 'bottle': 5, + 'bus': 6, + 'car': 7, + 'cat': 8, + 'chair': 9, + 'cow': 10, + 'diningtable': 11, + 'dog': 12, + 'horse': 13, + 'motorbike': 14, + 'person': 15, + 'potted-plant': 16, + 'sheep': 17, + 'sofa': 18, + 'train': 19, + 'tv/monitor': 20, + 'ambiguous': 255 + } + + voc12_root_path = os.path.abspath(sys.argv[1]) + file_path = os.path.abspath(sys.argv[2]) + bin_path = os.path.abspath(sys.argv[3]) + out_path = os.path.abspath(sys.argv[4]) + + + check_dir(out_path, "vis") + check_dir(out_path, "crf") + postprocess(file_path,voc12_root_path,out_path,bin_path) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/Wseg/Wseg_preprocess.py b/ACL_PyTorch/contrib/cv/segmentation/Wseg/Wseg_preprocess.py index d053c79142a020cf239e947803743d840f2ec615..015b19101d1ee5e6d96fe298911b18a8cc0e1ac7 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/Wseg/Wseg_preprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/Wseg/Wseg_preprocess.py @@ -1,126 +1,126 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -import numpy as np -from PIL import Image -import torchvision.transforms.functional as F -import torchvision.transforms as tf - - -class Normalize(): - def __init__(self, mean = (0.485, 0.456, 0.406), std = (0.229, 0.224, 0.225)): - - self.mean = mean - self.std = std - - def undo(self, imgarr): - proc_img = imgarr.copy() - - proc_img[..., 0] = (self.std[0] * imgarr[..., 0] + self.mean[0]) * 255. - proc_img[..., 1] = (self.std[1] * imgarr[..., 1] + self.mean[1]) * 255. - proc_img[..., 2] = (self.std[2] * imgarr[..., 2] + self.mean[2]) * 255. - - return proc_img - - def __call__(self, img): - imgarr = np.asarray(img) - proc_img = np.empty_like(imgarr, np.float32) - - proc_img[..., 0] = (imgarr[..., 0] / 255. - self.mean[0]) / self.std[0] - proc_img[..., 1] = (imgarr[..., 1] / 255. - self.mean[1]) / self.std[1] - proc_img[..., 2] = (imgarr[..., 2] / 255. - self.mean[2]) / self.std[2] - - return proc_img - -def load_img_name_list(file_path, index=0): - img_gt_name_list = open(file_path).read().splitlines() - img_name_list = [img_gt_name.split(' ')[index].strip('/') for img_gt_name in img_gt_name_list] - return img_name_list - -def pad(image,pad_size): - w, h = image.size - - pad_mask = Image.new("L", image.size) - pad_height = pad_size[0] - h - pad_width = pad_size[1] - w - - assert pad_height >= 0 and pad_width >= 0 - - pad_l = max(0, pad_width // 2) - pad_r = max(0, pad_width - pad_l) - pad_t = max(0, pad_height // 2) - pad_b = max(0, pad_height - pad_t) - - image = F.pad(image, (pad_l, pad_t, pad_r, pad_b), fill=0, padding_mode="constant") - pad_mask = F.pad(pad_mask, (pad_l, pad_t, pad_r, pad_b), fill=1, padding_mode="constant") - - return image, pad_mask - -def get_batch_bin(img,imgname): - - pad_size = [1024, 1024] - scales = [1, 0.5, 1.5, 2.0] - batch_size = 8 - use_flips = True - - transform = tf.Compose([np.asarray, - Normalize()]) - - for i in range(batch_size): - - sub_idx = i % batch_size - scale = scales[sub_idx // (2 if use_flips else 1)] - - flip = use_flips and sub_idx % 2 - - target_size = (int(round(img.size[0] * scale)), - int(round(img.size[1] * scale))) - - s_img = img.resize(target_size, resample=Image.CUBIC) - - if flip: - s_img = F.hflip(s_img) - im_msc, ignore = pad(s_img,pad_size) - im_msc = transform(im_msc) - ignore = np.array(ignore).astype(im_msc.dtype)[..., np.newaxis] - im_msc = F.to_tensor(im_msc * (1 - ignore)) - - imgnm = imgname + "_" + str(i) - im_msc = np.array(im_msc,dtype= np.float32) - - im_msc.tofile(os.path.join(bin_path, imgnm + '.bin')) - -def preprocess(file_path, voc12_root,bin_path): - - img_name_list = load_img_name_list(file_path) - print(img_name_list) - - if not os.path.exists(bin_path): - os.makedirs(bin_path) - - for i in range(len(img_name_list)): - print("===> ",i) - imgnm = img_name_list[i][33:-4] - img = Image.open(os.path.join(voc12_root, img_name_list[i])).convert('RGB') - - get_batch_bin(img,imgnm) - -if __name__ == "__main__": - - voc12_root_path = os.path.abspath(sys.argv[1]) - file_path = os.path.abspath(sys.argv[2]) - bin_path = os.path.abspath(sys.argv[3]) - - preprocess(file_path,voc12_root_path, bin_path) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import numpy as np +from PIL import Image +import torchvision.transforms.functional as F +import torchvision.transforms as tf + + +class Normalize(): + def __init__(self, mean = (0.485, 0.456, 0.406), std = (0.229, 0.224, 0.225)): + + self.mean = mean + self.std = std + + def undo(self, imgarr): + proc_img = imgarr.copy() + + proc_img[..., 0] = (self.std[0] * imgarr[..., 0] + self.mean[0]) * 255. + proc_img[..., 1] = (self.std[1] * imgarr[..., 1] + self.mean[1]) * 255. + proc_img[..., 2] = (self.std[2] * imgarr[..., 2] + self.mean[2]) * 255. + + return proc_img + + def __call__(self, img): + imgarr = np.asarray(img) + proc_img = np.empty_like(imgarr, np.float32) + + proc_img[..., 0] = (imgarr[..., 0] / 255. - self.mean[0]) / self.std[0] + proc_img[..., 1] = (imgarr[..., 1] / 255. - self.mean[1]) / self.std[1] + proc_img[..., 2] = (imgarr[..., 2] / 255. - self.mean[2]) / self.std[2] + + return proc_img + +def load_img_name_list(file_path, index=0): + img_gt_name_list = open(file_path).read().splitlines() + img_name_list = [img_gt_name.split(' ')[index].strip('/') for img_gt_name in img_gt_name_list] + return img_name_list + +def pad(image,pad_size): + w, h = image.size + + pad_mask = Image.new("L", image.size) + pad_height = pad_size[0] - h + pad_width = pad_size[1] - w + + assert pad_height >= 0 and pad_width >= 0 + + pad_l = max(0, pad_width // 2) + pad_r = max(0, pad_width - pad_l) + pad_t = max(0, pad_height // 2) + pad_b = max(0, pad_height - pad_t) + + image = F.pad(image, (pad_l, pad_t, pad_r, pad_b), fill=0, padding_mode="constant") + pad_mask = F.pad(pad_mask, (pad_l, pad_t, pad_r, pad_b), fill=1, padding_mode="constant") + + return image, pad_mask + +def get_batch_bin(img,imgname): + + pad_size = [1024, 1024] + scales = [1, 0.5, 1.5, 2.0] + batch_size = 8 + use_flips = True + + transform = tf.Compose([np.asarray, + Normalize()]) + + for i in range(batch_size): + + sub_idx = i % batch_size + scale = scales[sub_idx // (2 if use_flips else 1)] + + flip = use_flips and sub_idx % 2 + + target_size = (int(round(img.size[0] * scale)), + int(round(img.size[1] * scale))) + + s_img = img.resize(target_size, resample=Image.CUBIC) + + if flip: + s_img = F.hflip(s_img) + im_msc, ignore = pad(s_img,pad_size) + im_msc = transform(im_msc) + ignore = np.array(ignore).astype(im_msc.dtype)[..., np.newaxis] + im_msc = F.to_tensor(im_msc * (1 - ignore)) + + imgnm = imgname + "_" + str(i) + im_msc = np.array(im_msc,dtype= np.float32) + + im_msc.tofile(os.path.join(bin_path, imgnm + '.bin')) + +def preprocess(file_path, voc12_root,bin_path): + + img_name_list = load_img_name_list(file_path) + print(img_name_list) + + if not os.path.exists(bin_path): + os.makedirs(bin_path) + + for i in range(len(img_name_list)): + print("===> ",i) + imgnm = img_name_list[i][33:-4] + img = Image.open(os.path.join(voc12_root, img_name_list[i])).convert('RGB') + + get_batch_bin(img,imgnm) + +if __name__ == "__main__": + + voc12_root_path = os.path.abspath(sys.argv[1]) + file_path = os.path.abspath(sys.argv[2]) + bin_path = os.path.abspath(sys.argv[3]) + + preprocess(file_path,voc12_root_path, bin_path) diff --git a/ACL_PyTorch/contrib/cv/segmentation/Wseg/fix_softmax_transpose.py b/ACL_PyTorch/contrib/cv/segmentation/Wseg/fix_softmax_transpose.py index 1ad1841a2056ec301b1e18a8137624ac5ba609da..9a28eca2663f2835552735faa171e1a0b0d1d270 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/Wseg/fix_softmax_transpose.py +++ b/ACL_PyTorch/contrib/cv/segmentation/Wseg/fix_softmax_transpose.py @@ -1,36 +1,36 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import onnx - -if __name__ == '__main__': - model = onnx.load(sys.argv[1]) - graph = model.graph - node = graph.node - softmax_node_index = [] - del_group = [] - for i in range(len(node)): - if node[i].op_type == 'Softmax': - del_group.append((node[i-1], node[i], node[i+1], i)) - for g in del_group: - new_input = g[0].input - new_output = g[2].output - new_name = g[1].name - new_index = g[3] - new_node = onnx.helper.make_node("Softmax", new_input, new_output, new_name, axis=1) - for n in g[:-1]: - graph.node.remove(n) - graph.node.insert(new_index, new_node) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import onnx + +if __name__ == '__main__': + model = onnx.load(sys.argv[1]) + graph = model.graph + node = graph.node + softmax_node_index = [] + del_group = [] + for i in range(len(node)): + if node[i].op_type == 'Softmax': + del_group.append((node[i-1], node[i], node[i+1], i)) + for g in del_group: + new_input = g[0].input + new_output = g[2].output + new_name = g[1].name + new_index = g[3] + new_node = onnx.helper.make_node("Softmax", new_input, new_output, new_name, axis=1) + for n in g[:-1]: + graph.node.remove(n) + graph.node.insert(new_index, new_node) onnx.save(model, sys.argv[2]) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/Wseg/get_dateset_info.py b/ACL_PyTorch/contrib/cv/segmentation/Wseg/get_dateset_info.py index f2f482c10c496b391f4429c5fe8d7b6d986e7c84..f09088e3bcb242cf4375a7d39a9f53c219fdba36 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/Wseg/get_dateset_info.py +++ b/ACL_PyTorch/contrib/cv/segmentation/Wseg/get_dateset_info.py @@ -1,60 +1,60 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - if file_type == 'bin': - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' - get_jpg_info(file_path, info_name) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) + diff --git a/ACL_PyTorch/contrib/cv/segmentation/Wseg/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/segmentation/Wseg/modelzoo_level.txt index 282c3ff3b30404101a02cc86c5bfeb6308d198e7..c5c4a9d8001fae97c66831abcfdbe02dd6261c37 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/Wseg/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/Wseg/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:POK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/Wseg/test/parse.py b/ACL_PyTorch/contrib/cv/segmentation/Wseg/test/parse.py index 24e62e8231605e7adb8697e1ecc7b3d96006fd03..3b35884335889363e190c05dfb1d85bef92fed26 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/Wseg/test/parse.py +++ b/ACL_PyTorch/contrib/cv/segmentation/Wseg/test/parse.py @@ -1,33 +1,33 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.eval'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.readlines() - #tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - #print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - print("".join(content[-28:])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.eval'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.readlines() + #tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + #print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + print("".join(content[-28:])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/Wseg/val_voc.txt b/ACL_PyTorch/contrib/cv/segmentation/Wseg/val_voc.txt index 98c6a5540ffec8940bba898f478929b94dfbf00a..e910b33688654a75fc54adbd2fbe410827ab70f1 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/Wseg/val_voc.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/Wseg/val_voc.txt @@ -1,300 +1,300 @@ -voc/VOCdevkit/VOC2012/JPEGImages/2007_007881.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_007881.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_006275.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_006275.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_006373.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_006373.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000573.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000573.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000572.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000572.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_001300.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_001300.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_007804.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007804.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_007378.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007378.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_002728.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002728.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_000121.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000121.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_002470.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002470.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_007392.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007392.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_006117.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_006117.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_009841.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_009841.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_002504.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002504.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_000012.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000012.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_000013.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000013.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_003003.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003003.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_000479.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_000479.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_002094.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002094.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_004538.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004538.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_007025.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007025.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_005828.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_005828.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_006028.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_006028.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_004324.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004324.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_002152.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002152.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_003275.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003275.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_003276.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003276.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001014.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001014.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_000309.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000309.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_002387.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002387.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_004281.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004281.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_003477.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003477.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_004494.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004494.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001988.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001988.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_002450.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002450.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_003187.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003187.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_003183.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003183.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_004255.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004255.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_003676.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003676.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_003473.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003473.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_000658.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_000658.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001708.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001708.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001529.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001529.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001830.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001830.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_004789.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004789.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_004190.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004190.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_004193.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004193.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_003564.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003564.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_003569.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003569.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_003876.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003876.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_003874.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003874.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_003506.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003506.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_002900.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002900.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_008296.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_008296.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_002902.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002902.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_008746.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_008746.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_002568.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002568.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_001239.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_001239.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_000874.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_000874.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_003304.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003304.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001908.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001908.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_003499.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003499.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_003492.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003492.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_002366.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002366.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_005118.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005118.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_000837.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000837.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_004795.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004795.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_000830.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000830.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_000529.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000529.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_005664.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005664.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_002415.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002415.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_002730.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002730.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_000602.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_000602.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_002094.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002094.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_000636.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000636.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_002097.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002097.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_005206.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005206.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001692.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001692.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001699.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001699.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_005038.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_005038.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_005097.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_005097.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_001640.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001640.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_005525.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_005525.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_009015.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_009015.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_000731.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_000731.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_005063.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005063.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_008127.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_008127.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001768.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001768.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_004337.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004337.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_003270.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003270.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001767.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001767.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001563.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001563.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_003137.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003137.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_003134.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003134.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_007165.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_007165.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_003131.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003131.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_008708.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_008708.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000961.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000961.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_009655.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_009655.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_009654.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_009654.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_007497.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007497.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_006560.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_006560.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_000156.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000156.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_003714.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003714.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_007498.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007498.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_004612.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_004612.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_006143.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_006143.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_003711.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003711.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_006364.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_006364.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_001314.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_001314.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_003915.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003915.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_007811.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007811.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_007814.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007814.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_002273.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002273.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_001260.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001260.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_004825.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004825.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_002719.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002719.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_003621.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003621.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_003369.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003369.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_003481.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003481.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_000573.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000573.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_004070.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004070.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_004072.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004072.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_003323.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003323.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_005857.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_005857.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_007031.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007031.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_002284.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002284.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_000481.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_000481.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_000482.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_000482.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_002046.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002046.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_006035.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_006035.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_003709.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003709.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_002445.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002445.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_007143.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007143.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_001885.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001885.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_005107.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_005107.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001287.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001287.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001281.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001281.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_000201.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000201.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_000205.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000205.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_002863.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002863.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_004856.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004856.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_004189.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004189.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_003773.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003773.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_003771.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003771.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_003576.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003576.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_002391.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002391.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_002025.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002025.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000929.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000929.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_003514.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003514.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_002864.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002864.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001601.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001601.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001350.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001350.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001607.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001607.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_002221.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002221.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_004241.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004241.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_002939.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002939.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_000120.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_000120.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_000123.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_000123.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_003311.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003311.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001913.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001913.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_005637.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_005637.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_002644.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002644.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_004687.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004687.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_002641.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002641.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_004275.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004275.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_002336.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002336.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_002317.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002317.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_002480.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002480.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_005871.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005871.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001351.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001351.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_005305.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005305.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_004783.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004783.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_004789.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004789.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_003406.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003406.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_000630.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_000630.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_005788.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005788.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_002150.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002150.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_001733.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_001733.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001174.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001174.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000174.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000174.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_008629.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_008629.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_000175.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000175.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_000999.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000999.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_002043.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002043.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_002042.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002042.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_005644.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005644.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_000700.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_000700.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_003114.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_003114.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_005399.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_005399.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_005398.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_005398.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_004322.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004322.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_004320.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004320.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001773.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001773.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000318.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000318.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000087.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000087.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001579.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001579.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000084.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000084.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000083.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000083.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_003271.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_003271.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001577.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001577.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_005899.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005899.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_001818.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_001818.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_004867.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004867.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_003197.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_003197.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_004432.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004432.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000952.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000952.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000559.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000559.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_007527.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007527.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_000149.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000149.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_006159.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_006159.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_001363.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_001363.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_002467.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002467.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_002464.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002464.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_007195.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_007195.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_007196.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_007196.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_002269.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002269.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_006554.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_006554.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_007828.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007828.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_001078.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001078.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_006036.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_006036.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_001070.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001070.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_003872.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003872.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_001074.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001074.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_001076.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001076.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_007996.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_007996.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000427.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000427.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000426.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000426.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_001565.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_001565.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000422.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000422.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_001768.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_001768.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_004497.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004497.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_001765.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_001765.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_000964.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000964.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_005844.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_005844.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_005845.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_005845.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_007048.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007048.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_005087.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_005087.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_003503.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003503.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_003506.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003506.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_000924.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000924.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_003733.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003733.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_002147.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002147.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_002903.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002903.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_000747.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_000747.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_003854.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003854.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_000346.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000346.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_002098.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002098.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001292.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001292.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_005114.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_005114.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_001895.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001895.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_004468.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004468.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_006647.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_006647.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_000219.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000219.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_003453.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003453.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_003325.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003325.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001722.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001722.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_004552.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_004552.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001726.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001726.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_004866.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004866.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_004649.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004649.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_004644.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004644.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_003542.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003542.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_005158.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_005158.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_003549.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003549.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_002030.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002030.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_003772.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003772.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_000254.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_000254.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_000622.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000622.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001619.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001619.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001341.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001341.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001614.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001614.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001613.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001613.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_001346.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001346.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_002238.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002238.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_002239.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002239.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_003895.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003895.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_000813.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_000813.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_001587.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_001587.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_002549.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002549.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001966.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001966.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_002675.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002675.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001962.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001962.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_004140.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_004140.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_001478.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001478.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_002075.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002075.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_002929.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002929.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_004455.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004455.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_001367.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001367.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_005302.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_005302.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_005860.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005860.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_008434.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_008434.png -voc/VOCdevkit/VOC2012/JPEGImages/2008_001715.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001715.png -voc/VOCdevkit/VOC2012/JPEGImages/2011_002713.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002713.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_002122.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002122.png -voc/VOCdevkit/VOC2012/JPEGImages/2009_003433.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003433.png -voc/VOCdevkit/VOC2012/JPEGImages/2007_000187.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000187.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_002146.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002146.png -voc/VOCdevkit/VOC2012/JPEGImages/2010_002929.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002929.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_007881.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_007881.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_006275.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_006275.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_006373.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_006373.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000573.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000573.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000572.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000572.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_001300.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_001300.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_007804.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007804.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_007378.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007378.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_002728.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002728.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_000121.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000121.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_002470.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002470.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_007392.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007392.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_006117.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_006117.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_009841.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_009841.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_002504.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002504.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_000012.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000012.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_000013.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000013.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_003003.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003003.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_000479.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_000479.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_002094.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002094.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_004538.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004538.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_007025.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007025.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_005828.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_005828.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_006028.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_006028.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_004324.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004324.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_002152.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002152.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_003275.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003275.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_003276.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003276.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001014.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001014.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_000309.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000309.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_002387.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002387.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_004281.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004281.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_003477.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003477.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_004494.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004494.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001988.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001988.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_002450.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002450.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_003187.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003187.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_003183.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003183.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_004255.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004255.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_003676.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003676.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_003473.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003473.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_000658.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_000658.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001708.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001708.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001529.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001529.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001830.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001830.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_004789.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004789.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_004190.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004190.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_004193.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004193.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_003564.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003564.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_003569.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003569.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_003876.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003876.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_003874.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003874.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_003506.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003506.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_002900.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002900.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_008296.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_008296.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_002902.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002902.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_008746.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_008746.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_002568.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002568.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_001239.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_001239.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_000874.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_000874.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_003304.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003304.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001908.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001908.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_003499.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003499.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_003492.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003492.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_002366.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002366.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_005118.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005118.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_000837.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000837.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_004795.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004795.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_000830.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000830.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_000529.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000529.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_005664.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005664.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_002415.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002415.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_002730.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002730.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_000602.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_000602.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_002094.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002094.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_000636.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000636.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_002097.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002097.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_005206.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005206.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001692.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001692.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001699.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001699.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_005038.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_005038.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_005097.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_005097.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_001640.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001640.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_005525.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_005525.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_009015.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_009015.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_000731.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_000731.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_005063.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005063.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_008127.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_008127.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001768.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001768.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_004337.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004337.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_003270.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003270.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001767.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001767.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001563.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001563.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_003137.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003137.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_003134.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003134.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_007165.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_007165.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_003131.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003131.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_008708.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_008708.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000961.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000961.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_009655.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_009655.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_009654.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_009654.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_007497.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007497.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_006560.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_006560.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_000156.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000156.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_003714.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003714.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_007498.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007498.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_004612.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_004612.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_006143.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_006143.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_003711.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003711.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_006364.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_006364.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_001314.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_001314.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_003915.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003915.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_007811.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007811.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_007814.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007814.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_002273.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002273.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_001260.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001260.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_004825.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004825.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_002719.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002719.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_003621.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003621.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_003369.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003369.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_003481.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003481.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_000573.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000573.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_004070.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004070.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_004072.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004072.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_003323.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003323.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_005857.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_005857.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_007031.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007031.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_002284.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002284.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_000481.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_000481.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_000482.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_000482.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_002046.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002046.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_006035.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_006035.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_003709.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003709.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_002445.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002445.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_007143.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007143.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_001885.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001885.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_005107.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_005107.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001287.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001287.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001281.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001281.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_000201.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000201.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_000205.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000205.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_002863.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002863.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_004856.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004856.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_004189.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004189.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_003773.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003773.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_003771.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003771.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_003576.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003576.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_002391.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002391.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_002025.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002025.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000929.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000929.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_003514.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003514.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_002864.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002864.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001601.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001601.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001350.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001350.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001607.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001607.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_002221.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002221.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_004241.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004241.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_002939.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002939.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_000120.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_000120.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_000123.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_000123.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_003311.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003311.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001913.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001913.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_005637.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_005637.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_002644.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002644.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_004687.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004687.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_002641.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002641.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_004275.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004275.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_002336.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002336.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_002317.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002317.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_002480.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002480.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_005871.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005871.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001351.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001351.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_005305.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005305.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_004783.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004783.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_004789.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004789.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_003406.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003406.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_000630.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_000630.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_005788.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005788.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_002150.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002150.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_001733.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_001733.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001174.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001174.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000174.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000174.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_008629.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_008629.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_000175.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000175.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_000999.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000999.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_002043.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002043.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_002042.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002042.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_005644.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005644.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_000700.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_000700.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_003114.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_003114.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_005399.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_005399.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_005398.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_005398.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_004322.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004322.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_004320.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004320.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001773.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001773.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000318.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000318.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000087.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000087.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001579.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001579.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000084.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000084.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000083.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000083.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_003271.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_003271.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001577.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001577.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_005899.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005899.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_001818.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_001818.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_004867.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004867.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_003197.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_003197.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_004432.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_004432.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000952.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000952.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000559.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000559.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_007527.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007527.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_000149.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000149.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_006159.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_006159.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_001363.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_001363.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_002467.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002467.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_002464.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002464.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_007195.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_007195.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_007196.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_007196.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_002269.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002269.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_006554.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_006554.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_007828.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007828.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_001078.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001078.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_006036.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_006036.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_001070.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001070.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_003872.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003872.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_001074.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001074.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_001076.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001076.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_007996.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_007996.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000427.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000427.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000426.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000426.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_001565.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_001565.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000422.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000422.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_001768.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_001768.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_004497.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004497.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_001765.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_001765.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_000964.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000964.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_005844.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_005844.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_005845.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_005845.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_007048.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_007048.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_005087.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_005087.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_003503.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003503.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_003506.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_003506.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_000924.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000924.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_003733.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_003733.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_002147.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002147.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_002903.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_002903.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_000747.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_000747.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_003854.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003854.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_000346.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000346.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_002098.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002098.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001292.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001292.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_005114.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_005114.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_001895.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001895.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_004468.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004468.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_006647.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_006647.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_000219.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_000219.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_003453.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003453.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_003325.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003325.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001722.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001722.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_004552.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_004552.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001726.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001726.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_004866.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004866.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_004649.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004649.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_004644.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_004644.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_003542.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003542.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_005158.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_005158.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_003549.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003549.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_002030.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002030.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_003772.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_003772.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_000254.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_000254.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_000622.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_000622.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001619.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001619.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001341.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001341.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001614.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001614.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001613.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001613.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_001346.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_001346.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_002238.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002238.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_002239.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002239.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_003895.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003895.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_000813.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_000813.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_001587.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_001587.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_002549.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002549.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001966.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001966.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_002675.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002675.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001962.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001962.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_004140.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_004140.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_001478.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001478.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_002075.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002075.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_002929.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_002929.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_004455.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_004455.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_001367.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_001367.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_005302.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_005302.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_005860.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_005860.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_008434.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_008434.png +voc/VOCdevkit/VOC2012/JPEGImages/2008_001715.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2008_001715.png +voc/VOCdevkit/VOC2012/JPEGImages/2011_002713.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2011_002713.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_002122.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_002122.png +voc/VOCdevkit/VOC2012/JPEGImages/2009_003433.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2009_003433.png +voc/VOCdevkit/VOC2012/JPEGImages/2007_000187.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2007_000187.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_002146.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002146.png +voc/VOCdevkit/VOC2012/JPEGImages/2010_002929.jpg voc/VOCdevkit/VOC2012/SegmentationClass/2010_002929.png diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT/README.md b/ACL_PyTorch/contrib/cv/segmentation/YOLACT/README.md index 7ef9506b23de412c311003095a0c0563510d994f..c57fba11d19c7dcd423ea217bb49df0c94a7291c 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/YOLACT/README.md +++ b/ACL_PyTorch/contrib/cv/segmentation/YOLACT/README.md @@ -1,155 +1,155 @@ -# YOLACT模型PyTorch离线推理指导书 - -### 一、环境准备 - -#### 1、获取依赖库 - -```shell -pip3 install -r requirements.txt -``` - -其中,PyTorch建议使用1.8.0版本。 - -使用1.5.0版本PyTorch可以正常进行om导出,且om模型精度与性能正常;但导出的onnx模型文件无法使用trtexec在T4上进行测试。 - -使用1.8.0版本PyTorch则无以上问题。 - -#### 2、获取YOLACT源代码并更新 - -- 首先获取官方代码仓代码 - - ```bash - git clone https://github.com/dbolya/yolact.git - ``` - -- 将无关文件删除,保留以下文件 - - ``` - . - ├── backbone.py - ├── data - │   ├── coco.py - │   ├── config.py - │   └── __init__.py - ├── layers - │   ├── box_utils.py - │   ├── functions - │   │   ├── detection.py - │   │   └── __init__.py - │   ├── __init__.py - │   ├── interpolate.py - │   └── output_utils.py - ├── utils - │   ├── augmentations.py - │   ├── cython_nms.pyx - │   ├── functions.py - │   ├── __init__.py - │   └── timer.py - └── yolact.py - ``` - -- 将本仓库代码拷贝至yolact目录下,并使用补丁YOLACT.patch复原 - - ``` - patch -p1 < ./YOLACT.patch - ``` - - 复原后,文件目录如下 - - ``` - . - ├── backbone.py - ├── data - │   ├── coco.py - │   ├── config.py - │   └── __init__.py - ├── env.sh - ├── layers - │   ├── box_utils.py - │   ├── functions - │   │   ├── detection.py - │   │   └── __init__.py - │   ├── __init__.py - │   ├── interpolate.py - │   └── output_utils.py - ├── LICENSE - ├── modelzoo_level.txt - ├── README.md - ├── requirements.txt - ├── test - │   ├── eval_acc_perf.sh - │   ├── parse.py - │   ├── perf_g.sh - │   └── pth2om.sh - ├── utils - │   ├── augmentations.py - │   ├── cython_nms.pyx - │   ├── functions.py - │   ├── __init__.py - │   └── timer.py - ├── weights - │   └── pth2onnx.py - ├── YOLACT.patch - ├── YOLACT_postprocess.py - ├── YOLACT_preprocess.py - └── yolact.py - ``` - -#### 3、获取权重文件 - -获取训练完毕的权重文件,建议放于./weights目录下 - -#### 4、获取数据集 - -YOLACT模型使用Microsoft COCO 2017数据集进行训练及测试。 - -在离线推理中仅使用测试数据集,测试图像为val 2017, 对应的标注文件为instances_val2017.json - -#### 5、获取benchmark工具 - -获取benchmark.x86_64离线推理工具 - - - -### 二、离线推理 - -#### 1、执行离线推理前使用以下命令查看设备状态,确保device空闲 - -```bash -npu-smi info -``` - -#### 2、执行以下命令,生成om模型文件 - -```bash -cd test -./pth2om.sh pth权重文件的路径 生成onnx的文件名 生成om的文件名 输入batch_size -``` - -注意:此处pth权重文件的路径应设为相对路径 - -#### 3、执行以下命令,开始离线推理 - -```bash -./eval_acc_perf.sh 数据集图像路径 数据集标注路径 输入batch_size om文件路径 benchmark工具路径 -``` - -同时,benchmark工具会自动统计性能数据 - -#### 4、在T4环境中执行以下命令,获取T4推理性能 - -```bash -./perf_g.sh 输入batch_size onnx文件路径 -``` - -注意,如果使用1.5.0版本PyTorch,则导出的onnx可能无法使用trtexec工具进行性能测试。因此,这里建议使用1.8.0版本PyTroch。 - - - -### 三、评测结果 - -| 模型 | 官网精度 | 310精度 | T4性能 | 310性能 | -| ----------- | ----------------------- | ----------------------- | ---------- | --------- | -| YOLACT_bs1 | box: 32.07,mask: 29.73 | box: 32.07, mask: 29.72 | 75.797FPS | 84.014FPS | -| YOLACT_bs16 | box: 32.07,mask: 29.73 | box: 32.07, mask: 29.72 | 116.596FPS | 96.161FPS | - +# YOLACT模型PyTorch离线推理指导书 + +### 一、环境准备 + +#### 1、获取依赖库 + +```shell +pip3 install -r requirements.txt +``` + +其中,PyTorch建议使用1.8.0版本。 + +使用1.5.0版本PyTorch可以正常进行om导出,且om模型精度与性能正常;但导出的onnx模型文件无法使用trtexec在T4上进行测试。 + +使用1.8.0版本PyTorch则无以上问题。 + +#### 2、获取YOLACT源代码并更新 + +- 首先获取官方代码仓代码 + + ```bash + git clone https://github.com/dbolya/yolact.git + ``` + +- 将无关文件删除,保留以下文件 + + ``` + . + ├── backbone.py + ├── data + │   ├── coco.py + │   ├── config.py + │   └── __init__.py + ├── layers + │   ├── box_utils.py + │   ├── functions + │   │   ├── detection.py + │   │   └── __init__.py + │   ├── __init__.py + │   ├── interpolate.py + │   └── output_utils.py + ├── utils + │   ├── augmentations.py + │   ├── cython_nms.pyx + │   ├── functions.py + │   ├── __init__.py + │   └── timer.py + └── yolact.py + ``` + +- 将本仓库代码拷贝至yolact目录下,并使用补丁YOLACT.patch复原 + + ``` + patch -p1 < ./YOLACT.patch + ``` + + 复原后,文件目录如下 + + ``` + . + ├── backbone.py + ├── data + │   ├── coco.py + │   ├── config.py + │   └── __init__.py + ├── env.sh + ├── layers + │   ├── box_utils.py + │   ├── functions + │   │   ├── detection.py + │   │   └── __init__.py + │   ├── __init__.py + │   ├── interpolate.py + │   └── output_utils.py + ├── LICENSE + ├── modelzoo_level.txt + ├── README.md + ├── requirements.txt + ├── test + │   ├── eval_acc_perf.sh + │   ├── parse.py + │   ├── perf_g.sh + │   └── pth2om.sh + ├── utils + │   ├── augmentations.py + │   ├── cython_nms.pyx + │   ├── functions.py + │   ├── __init__.py + │   └── timer.py + ├── weights + │   └── pth2onnx.py + ├── YOLACT.patch + ├── YOLACT_postprocess.py + ├── YOLACT_preprocess.py + └── yolact.py + ``` + +#### 3、获取权重文件 + +获取训练完毕的权重文件,建议放于./weights目录下 + +#### 4、获取数据集 + +YOLACT模型使用Microsoft COCO 2017数据集进行训练及测试。 + +在离线推理中仅使用测试数据集,测试图像为val 2017, 对应的标注文件为instances_val2017.json + +#### 5、获取benchmark工具 + +获取benchmark.x86_64离线推理工具 + + + +### 二、离线推理 + +#### 1、执行离线推理前使用以下命令查看设备状态,确保device空闲 + +```bash +npu-smi info +``` + +#### 2、执行以下命令,生成om模型文件 + +```bash +cd test +./pth2om.sh pth权重文件的路径 生成onnx的文件名 生成om的文件名 输入batch_size +``` + +注意:此处pth权重文件的路径应设为相对路径 + +#### 3、执行以下命令,开始离线推理 + +```bash +./eval_acc_perf.sh 数据集图像路径 数据集标注路径 输入batch_size om文件路径 benchmark工具路径 +``` + +同时,benchmark工具会自动统计性能数据 + +#### 4、在T4环境中执行以下命令,获取T4推理性能 + +```bash +./perf_g.sh 输入batch_size onnx文件路径 +``` + +注意,如果使用1.5.0版本PyTorch,则导出的onnx可能无法使用trtexec工具进行性能测试。因此,这里建议使用1.8.0版本PyTroch。 + + + +### 三、评测结果 + +| 模型 | 官网精度 | 310精度 | T4性能 | 310性能 | +| ----------- | ----------------------- | ----------------------- | ---------- | --------- | +| YOLACT_bs1 | box: 32.07,mask: 29.73 | box: 32.07, mask: 29.72 | 75.797FPS | 84.014FPS | +| YOLACT_bs16 | box: 32.07,mask: 29.73 | box: 32.07, mask: 29.72 | 116.596FPS | 96.161FPS | + diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT/YOLACT_postprocess.py b/ACL_PyTorch/contrib/cv/segmentation/YOLACT/YOLACT_postprocess.py index 353af75e997220bc6ff2d4ed7782dce5dd78ebf2..95e32b1ebd694ecc8dd71ae89c47c6849951ba94 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/YOLACT/YOLACT_postprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/YOLACT/YOLACT_postprocess.py @@ -1,564 +1,564 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -from layers import Detect -from data import COCODetection, get_label_map -from utils.augmentations import BaseTransform -from utils.functions import MovingAverage, ProgressBar -from layers.box_utils import jaccard, mask_iou -from utils import timer -from layers.output_utils import postprocess -import pycocotools -from data import cfg, set_cfg -import numpy as np -import torch -import argparse -import random -import pickle -import os -from collections import defaultdict -from collections import OrderedDict - -def str2bool(v): - if v.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - -def parse_args(argv=None): - parser = argparse.ArgumentParser( - description='YOLACT COCO Evaluation') - parser.add_argument('--valid_images', default='/home/data/coco/images/', help='the path of validation images') - parser.add_argument('--valid_annotations', default='/home/data/coco/annotations/instances_val2017.json', help='the path of validation annotations') - parser.add_argument('--top_k', default=5, type=int, - help='Further restrict the number of predictions to parse') - parser.add_argument('--cuda', default=True, type=str2bool, - help='Use cuda to evaulate model') - parser.add_argument('--fast_nms', default=True, type=str2bool, - help='Whether to use a faster, but not entirely correct version of NMS.') - parser.add_argument('--shuffle', dest='shuffle', action='store_true', - help='Shuffles the images when displaying them. Doesn\'t have much of an effect when display is off though.') - parser.add_argument('--ap_data_file', default='results/ap_data.pkl', type=str, - help='In quantitative mode, the file to save detections before calculating mAP.') - parser.add_argument('--max_images', default=-1, type=int, - help='The maximum number of images from the dataset to consider. Use -1 for all.') - parser.add_argument('--output_coco_json', dest='output_coco_json', action='store_true', - help='If display is not set, instead of processing IoU values, this just dumps detections into the coco json file.') - parser.add_argument('--config', default=None, - help='The config object to use.') - parser.add_argument('--no_bar', dest='no_bar', action='store_true', - help='Do not output the status bar. This is useful for when piping to a file.') - parser.add_argument('--no_sort', default=False, dest='no_sort', action='store_true', - help='Do not sort images by hashed image ID.') - parser.add_argument('--seed', default=None, type=int, - help='The seed to pass into random.seed. Note: this is only really for the shuffle and does not (I think) affect cuda stuff.') - parser.add_argument('--mask_proto_debug', default=False, dest='mask_proto_debug', action='store_true', - help='Outputs stuff for scripts/compute_mask.py.') - parser.add_argument('--score_threshold', default=0, type=float, - help='Detections with a score under this threshold will not be considered. This currently only works in display mode.') - parser.add_argument('--dataset', default=None, type=str, - help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).') - parser.add_argument('--detect', default=False, dest='detect', action='store_true', - help='Don\'t evauluate the mask branch at all and only do object detection. This only works for --display and --benchmark.') - - parser.set_defaults(no_bar=False, output_coco_json=False, shuffle=False, - no_sort=False, mask_proto_debug=False, detect=False, crop=True) - - global args - args = parser.parse_args(argv) - - if args.seed is not None: - random.seed(args.seed) - -iou_thresholds = [x / 100 for x in range(50, 100, 5)] -coco_cats = {} # Call prep_coco_cats to fill this -coco_cats_inv = {} -color_cache = defaultdict(lambda: {}) - -def prep_coco_cats(): - """ Prepare inverted table for category id lookup given a coco cats object. """ - for coco_cat_id, transformed_cat_id_p1 in get_label_map().items(): - transformed_cat_id = transformed_cat_id_p1 - 1 - coco_cats[transformed_cat_id] = coco_cat_id - coco_cats_inv[coco_cat_id] = transformed_cat_id - -def get_coco_cat(transformed_cat_id): - """ transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """ - return coco_cats[transformed_cat_id] - -def get_transformed_cat(coco_cat_id): - """ transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """ - return coco_cats_inv[coco_cat_id] - -class Detections: - - def __init__(self): - self.bbox_data = [] - self.mask_data = [] - - def add_bbox(self, image_id:int, category_id:int, bbox:list, score:float): - """ Note that bbox should be a list or tuple of (x1, y1, x2, y2) """ - bbox = [bbox[0], bbox[1], bbox[2]-bbox[0], bbox[3]-bbox[1]] - - # Round to the nearest 10th to avoid huge file sizes, as COCO suggests - bbox = [round(float(x)*10)/10 for x in bbox] - - self.bbox_data.append({ - 'image_id': int(image_id), - 'category_id': get_coco_cat(int(category_id)), - 'bbox': bbox, - 'score': float(score) - }) - - def add_mask(self, image_id:int, category_id:int, segmentation:np.ndarray, score:float): - """ The segmentation should be the full mask, the size of the image and with size [h, w]. """ - rle = pycocotools.mask.encode(np.asfortranarray(segmentation.astype(np.uint8))) - rle['counts'] = rle['counts'].decode('ascii') # json.dump doesn't like bytes strings - - self.mask_data.append({ - 'image_id': int(image_id), - 'category_id': get_coco_cat(int(category_id)), - 'segmentation': rle, - 'score': float(score) - }) - -def _mask_iou(mask1, mask2, iscrowd=False): - with timer.env('Mask IoU'): - ret = mask_iou(mask1, mask2, iscrowd) - return ret.cpu() - -def _bbox_iou(bbox1, bbox2, iscrowd=False): - with timer.env('BBox IoU'): - ret = jaccard(bbox1, bbox2, iscrowd) - return ret.cpu() - -def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, detections:Detections=None): - """ Returns a list of APs for this image, with each element being for a class """ - if not args.output_coco_json: - with timer.env('Prepare gt'): - gt_boxes = torch.Tensor(gt[:, :4]) - gt_boxes[:, [0, 2]] *= w - gt_boxes[:, [1, 3]] *= h - gt_classes = list(gt[:, 4].astype(int)) - gt_masks = torch.Tensor(gt_masks).view(-1, h*w) - - if num_crowd > 0: - split = lambda x: (x[-num_crowd:], x[:-num_crowd]) - crowd_boxes , gt_boxes = split(gt_boxes) - crowd_masks , gt_masks = split(gt_masks) - crowd_classes, gt_classes = split(gt_classes) - - with timer.env('Postprocess'): - classes, scores, boxes, masks = postprocess(dets, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) - - if classes.size(0) == 0: - return - - classes = list(classes.cpu().numpy().astype(int)) - if isinstance(scores, list): - box_scores = list(scores[0].cpu().numpy().astype(float)) - mask_scores = list(scores[1].cpu().numpy().astype(float)) - else: - scores = list(scores.cpu().numpy().astype(float)) - box_scores = scores - mask_scores = scores - masks = masks.view(-1, h*w) - - - if args.output_coco_json: - with timer.env('JSON Output'): - boxes = boxes.cpu().numpy() - masks = masks.view(-1, h, w).cpu().numpy() - for i in range(masks.shape[0]): - # Make sure that the bounding box actually makes sense and a mask was produced - if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0: - detections.add_bbox(image_id, classes[i], boxes[i,:], box_scores[i]) - detections.add_mask(image_id, classes[i], masks[i,:,:], mask_scores[i]) - return - - with timer.env('Eval Setup'): - num_pred = len(classes) - num_gt = len(gt_classes) - - mask_iou_cache = _mask_iou(masks, gt_masks) - bbox_iou_cache = _bbox_iou(boxes.float(), gt_boxes.float()) - - if num_crowd > 0: - crowd_mask_iou_cache = _mask_iou(masks, crowd_masks, iscrowd=True) - crowd_bbox_iou_cache = _bbox_iou(boxes.float(), crowd_boxes.float(), iscrowd=True) - else: - crowd_mask_iou_cache = None - crowd_bbox_iou_cache = None - - box_indices = sorted(range(num_pred), key=lambda i: -box_scores[i]) - mask_indices = sorted(box_indices, key=lambda i: -mask_scores[i]) - - iou_types = [ - ('box', lambda i,j: bbox_iou_cache[i, j].item(), - lambda i,j: crowd_bbox_iou_cache[i,j].item(), - lambda i: box_scores[i], box_indices), - ('mask', lambda i,j: mask_iou_cache[i, j].item(), - lambda i,j: crowd_mask_iou_cache[i,j].item(), - lambda i: mask_scores[i], mask_indices) - ] - - timer.start('Main loop') - for _class in set(classes + gt_classes): - ap_per_iou = [] - num_gt_for_class = sum([1 for x in gt_classes if x == _class]) - - for iouIdx in range(len(iou_thresholds)): - iou_threshold = iou_thresholds[iouIdx] - - for iou_type, iou_func, crowd_func, score_func, indices in iou_types: - gt_used = [False] * len(gt_classes) - - ap_obj = ap_data[iou_type][iouIdx][_class] - ap_obj.add_gt_positives(num_gt_for_class) - - for i in indices: - if classes[i] != _class: - continue - - max_iou_found = iou_threshold - max_match_idx = -1 - for j in range(num_gt): - if gt_used[j] or gt_classes[j] != _class: - continue - - iou = iou_func(i, j) - - if iou > max_iou_found: - max_iou_found = iou - max_match_idx = j - - if max_match_idx >= 0: - gt_used[max_match_idx] = True - ap_obj.push(score_func(i), True) - else: - # If the detection matches a crowd, we can just ignore it - matched_crowd = False - - if num_crowd > 0: - for j in range(len(crowd_classes)): - if crowd_classes[j] != _class: - continue - - iou = crowd_func(i, j) - - if iou > iou_threshold: - matched_crowd = True - break - - # All this crowd code so that we can make sure that our eval code gives the - # same result as COCOEval. There aren't even that many crowd annotations to - # begin with, but accuracy is of the utmost importance. - if not matched_crowd: - ap_obj.push(score_func(i), False) - timer.stop('Main loop') - -class APDataObject: - """ - Stores all the information necessary to calculate the AP for one IoU and one class. - Note: I type annotated this because why not. - """ - - def __init__(self): - self.data_points = [] - self.num_gt_positives = 0 - - def push(self, score:float, is_true:bool): - self.data_points.append((score, is_true)) - - def add_gt_positives(self, num_positives:int): - """ Call this once per image. """ - self.num_gt_positives += num_positives - - def is_empty(self) -> bool: - return len(self.data_points) == 0 and self.num_gt_positives == 0 - - def get_ap(self) -> float: - """ Warning: result not cached. """ - - if self.num_gt_positives == 0: - return 0 - - # Sort descending by score - self.data_points.sort(key=lambda x: -x[0]) - - precisions = [] - recalls = [] - num_true = 0 - num_false = 0 - - # Compute the precision-recall curve. The x axis is recalls and the y axis precisions. - for datum in self.data_points: - # datum[1] is whether the detection a true or false positive - if datum[1]: num_true += 1 - else: num_false += 1 - - precision = num_true / (num_true + num_false) - recall = num_true / self.num_gt_positives - - precisions.append(precision) - recalls.append(recall) - - # Smooth the curve by computing [max(precisions[i:]) for i in range(len(precisions))] - # Basically, remove any temporary dips from the curve. - # At least that's what I think, idk. COCOEval did it so I do too. - for i in range(len(precisions)-1, 0, -1): - if precisions[i] > precisions[i-1]: - precisions[i-1] = precisions[i] - - # Compute the integral of precision(recall) d_recall from recall=0->1 using fixed-length riemann summation with 101 bars. - y_range = [0] * 101 # idx 0 is recall == 0.0 and idx 100 is recall == 1.00 - x_range = np.array([x / 100 for x in range(101)]) - recalls = np.array(recalls) - - # I realize this is weird, but all it does is find the nearest precision(x) for a given x in x_range. - # Basically, if the closest recall we have to 0.01 is 0.009 this sets precision(0.01) = precision(0.009). - # I approximate the integral this way, because that's how COCOEval does it. - indices = np.searchsorted(recalls, x_range, side='left') - for bar_idx, precision_idx in enumerate(indices): - if precision_idx < len(precisions): - y_range[bar_idx] = precisions[precision_idx] - - # Finally compute the riemann sum to get our integral. - # avg([precision(x) for x in 0:0.01:1]) - return sum(y_range) / len(y_range) - -def badhash(x): - """ - Just a quick and dirty hash function for doing a deterministic shuffle based on image_id. - - Source: - https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key - """ - x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF - x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF - x = ((x >> 16) ^ x) & 0xFFFFFFFF - return x - -class InferResultFile(): - def __init__(self, path, fileName): - parts = fileName.split('_') - self.imgId = int(parts[2]) - self.outputId = int(parts[3][0]) - self.arrayValue = np.fromfile(path + fileName, dtype=np.float32) - - self.arrayDim = self.arrayValue.shape[0] - #print('finish read file :', fileName) - -def getAllFiles(path): - allFiles = os.listdir(path) - infoFiles = {} - for file in allFiles: - if '.bin' in file and 'coco_val' in file: - infoFile = InferResultFile(path, file) - if infoFile.imgId in infoFiles.keys(): - infoFiles[infoFile.imgId].append(infoFile) - else: - infoFiles[infoFile.imgId] = [infoFile] - return infoFiles - -class InferResultFileFetcher(): - def __init__(self, path): - self.path = path - - def getInferResult(self, image_idx): - resultDict = {} - for i in range(1, 5): - fileName = 'coco_val2017_' + str(image_idx) + '_' + str(i) + '.bin' - infoFile = InferResultFile(self.path, fileName) - if infoFile.arrayDim == 615936: - resultDict[0] = infoFile - elif infoFile.arrayDim == 1559088: - resultDict[1] = infoFile - elif infoFile.arrayDim == 76992: - resultDict[2] = infoFile - else: - resultDict[3] = infoFile - - return resultDict - -pred_priors = None - -def getPriorTensor(): - global pred_priors - if pred_priors is None: - from yolact import PredictionModule - cfg._tmp_img_h = 550 - cfg._tmp_img_w = 550 - pred_priors = PredictionModule.get_YOLACT_priors().numpy() - return pred_priors - else: - return pred_priors - -def evaluate(path, dataset): - cfg.mask_proto_debug = args.mask_proto_debug - inferResultFetcher = InferResultFileFetcher(path) - - frame_times = MovingAverage() - dataset_size = len(dataset) if args.max_images < 0 else min(args.max_images, len(dataset)) - progress_bar = ProgressBar(30, dataset_size) - - print() - - # For each class and iou, stores tuples (score, isPositive) - # Index ap_data[type][iouIdx][classIdx] - ap_data = { - 'box' : [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds], - 'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds] - } - detections = Detections() - - dataset_indices = list(range(len(dataset))) - - if args.shuffle: - random.shuffle(dataset_indices) - elif not args.no_sort: - # Do a deterministic shuffle based on the image ids - # - # I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's - # the order of insertion. That means on python 3.6, the images come in the order they are in - # in the annotations file. For some reason, the first images in the annotations file are - # the hardest. To combat this, I use a hard-coded hash function based on the image ids - # to shuffle the indices we use. That way, no matter what python version or how pycocotools - # handles the data, we get the same result every time. - hashed = [badhash(x) for x in dataset.ids] - dataset_indices.sort(key=lambda x: hashed[x]) - - dataset_indices = dataset_indices[:dataset_size] - - # Main eval loop - for it, image_idx in enumerate(dataset_indices): - timer.reset() - with timer.env('Load Data'): - img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) - # Test flag, do not upvote - with timer.env('Network Extra'): - imgId_Outputs = inferResultFetcher.getInferResult(image_idx) - - pred_mask = imgId_Outputs[0].arrayValue.reshape(1, 19248, 32) #output1 : pred_onnx[2] - pred_conf = imgId_Outputs[1].arrayValue.reshape(1, 19248, 81) #output2 : pred_onnx[1] - pred_loc = imgId_Outputs[2].arrayValue.reshape(1, 19248, 4) #output3 : pred_onnx[0] - pred_proto = imgId_Outputs[3].arrayValue.reshape(1, 138, 138, 32) #output4 : pred_onnx[4] - - detect = Detect(cfg.num_classes, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5) - detect.use_fast_nms = args.fast_nms - preds = detect({'loc': torch.from_numpy(pred_loc), - 'conf': torch.from_numpy(pred_conf), - 'mask': torch.from_numpy(pred_mask), - 'priors': torch.from_numpy(getPriorTensor()), #????? - 'proto': torch.from_numpy(pred_proto)}) - - # Perform the meat of the operation here depending on our mode. - - prep_metrics(ap_data, preds, img, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], detections) - # First couple of images take longer because we're constructing the graph. - # Since that's technically initialization, don't include those in the FPS calculations. - if it > 1: - frame_times.add(timer.total_time()) - if not args.no_bar: - if it > 1: fps = 1 / frame_times.get_avg() - else: fps = 0 - progress = (it+1) / dataset_size * 100 - progress_bar.set_val(it+1) - print('\rProcessing Output Results %s %6d / %6d (%5.2f%%) %5.2f fps ' - % (repr(progress_bar), it+1, dataset_size, progress, fps), end='') - - print() - print('Saving data...') - with open(args.ap_data_file, 'wb') as f: - pickle.dump(ap_data, f) - return calc_map(ap_data) - -def calc_map(ap_data): - print('Calculating mAP...') - aps = [{'box': [], 'mask': []} for _ in iou_thresholds] - - for _class in range(len(cfg.dataset.class_names)): - for iou_idx in range(len(iou_thresholds)): - for iou_type in ('box', 'mask'): - ap_obj = ap_data[iou_type][iou_idx][_class] - - if not ap_obj.is_empty(): - aps[iou_idx][iou_type].append(ap_obj.get_ap()) - - all_maps = {'box': OrderedDict(), 'mask': OrderedDict()} - - # Looking back at it, this code is really hard to read :/ - for iou_type in ('box', 'mask'): - all_maps[iou_type]['all'] = 0 # Make this first in the ordereddict - for i, threshold in enumerate(iou_thresholds): - mAP = sum(aps[i][iou_type]) / len(aps[i][iou_type]) * 100 if len(aps[i][iou_type]) > 0 else 0 - all_maps[iou_type][int(threshold*100)] = mAP - all_maps[iou_type]['all'] = (sum(all_maps[iou_type].values()) / (len(all_maps[iou_type].values())-1)) - - print_maps(all_maps) - - # Put in a prettier format so we can serialize it to json during training - all_maps = {k: {j: round(u, 2) for j, u in v.items()} for k, v in all_maps.items()} - return all_maps - -def print_maps(all_maps): - # Warning: hacky - make_row = lambda vals: (' %5s |' * len(vals)) % tuple(vals) - make_sep = lambda n: ('-------+' * n) - - print() - print(make_row([''] + [('.%d ' % x if isinstance(x, int) else x + ' ') for x in all_maps['box'].keys()])) - print(make_sep(len(all_maps['box']) + 1)) - for iou_type in ('box', 'mask'): - print(make_row([iou_type] + ['%.2f' % x if x < 100 else '%.1f' % x for x in all_maps[iou_type].values()])) - print(make_sep(len(all_maps['box']) + 1)) - print() - -if __name__ == '__main__': - path = './result/dumpOutput_device0/' - parse_args() - - if args.config is not None: - set_cfg(args.config) - - else: - args.config = 'yolact_base_config' - print('Config not specified. Parsed %s from the file name.\n' % args.config) - set_cfg(args.config) - - #if args.image is None and args.video is None and args.images is None: - dataset = COCODetection(args.valid_images, args.valid_annotations, - transform=BaseTransform(), has_gt=cfg.dataset.has_gt) - prep_coco_cats() - - evaluate(path, dataset) +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +from layers import Detect +from data import COCODetection, get_label_map +from utils.augmentations import BaseTransform +from utils.functions import MovingAverage, ProgressBar +from layers.box_utils import jaccard, mask_iou +from utils import timer +from layers.output_utils import postprocess +import pycocotools +from data import cfg, set_cfg +import numpy as np +import torch +import argparse +import random +import pickle +import os +from collections import defaultdict +from collections import OrderedDict + +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +def parse_args(argv=None): + parser = argparse.ArgumentParser( + description='YOLACT COCO Evaluation') + parser.add_argument('--valid_images', default='/home/data/coco/images/', help='the path of validation images') + parser.add_argument('--valid_annotations', default='/home/data/coco/annotations/instances_val2017.json', help='the path of validation annotations') + parser.add_argument('--top_k', default=5, type=int, + help='Further restrict the number of predictions to parse') + parser.add_argument('--cuda', default=True, type=str2bool, + help='Use cuda to evaulate model') + parser.add_argument('--fast_nms', default=True, type=str2bool, + help='Whether to use a faster, but not entirely correct version of NMS.') + parser.add_argument('--shuffle', dest='shuffle', action='store_true', + help='Shuffles the images when displaying them. Doesn\'t have much of an effect when display is off though.') + parser.add_argument('--ap_data_file', default='results/ap_data.pkl', type=str, + help='In quantitative mode, the file to save detections before calculating mAP.') + parser.add_argument('--max_images', default=-1, type=int, + help='The maximum number of images from the dataset to consider. Use -1 for all.') + parser.add_argument('--output_coco_json', dest='output_coco_json', action='store_true', + help='If display is not set, instead of processing IoU values, this just dumps detections into the coco json file.') + parser.add_argument('--config', default=None, + help='The config object to use.') + parser.add_argument('--no_bar', dest='no_bar', action='store_true', + help='Do not output the status bar. This is useful for when piping to a file.') + parser.add_argument('--no_sort', default=False, dest='no_sort', action='store_true', + help='Do not sort images by hashed image ID.') + parser.add_argument('--seed', default=None, type=int, + help='The seed to pass into random.seed. Note: this is only really for the shuffle and does not (I think) affect cuda stuff.') + parser.add_argument('--mask_proto_debug', default=False, dest='mask_proto_debug', action='store_true', + help='Outputs stuff for scripts/compute_mask.py.') + parser.add_argument('--score_threshold', default=0, type=float, + help='Detections with a score under this threshold will not be considered. This currently only works in display mode.') + parser.add_argument('--dataset', default=None, type=str, + help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).') + parser.add_argument('--detect', default=False, dest='detect', action='store_true', + help='Don\'t evauluate the mask branch at all and only do object detection. This only works for --display and --benchmark.') + + parser.set_defaults(no_bar=False, output_coco_json=False, shuffle=False, + no_sort=False, mask_proto_debug=False, detect=False, crop=True) + + global args + args = parser.parse_args(argv) + + if args.seed is not None: + random.seed(args.seed) + +iou_thresholds = [x / 100 for x in range(50, 100, 5)] +coco_cats = {} # Call prep_coco_cats to fill this +coco_cats_inv = {} +color_cache = defaultdict(lambda: {}) + +def prep_coco_cats(): + """ Prepare inverted table for category id lookup given a coco cats object. """ + for coco_cat_id, transformed_cat_id_p1 in get_label_map().items(): + transformed_cat_id = transformed_cat_id_p1 - 1 + coco_cats[transformed_cat_id] = coco_cat_id + coco_cats_inv[coco_cat_id] = transformed_cat_id + +def get_coco_cat(transformed_cat_id): + """ transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """ + return coco_cats[transformed_cat_id] + +def get_transformed_cat(coco_cat_id): + """ transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """ + return coco_cats_inv[coco_cat_id] + +class Detections: + + def __init__(self): + self.bbox_data = [] + self.mask_data = [] + + def add_bbox(self, image_id:int, category_id:int, bbox:list, score:float): + """ Note that bbox should be a list or tuple of (x1, y1, x2, y2) """ + bbox = [bbox[0], bbox[1], bbox[2]-bbox[0], bbox[3]-bbox[1]] + + # Round to the nearest 10th to avoid huge file sizes, as COCO suggests + bbox = [round(float(x)*10)/10 for x in bbox] + + self.bbox_data.append({ + 'image_id': int(image_id), + 'category_id': get_coco_cat(int(category_id)), + 'bbox': bbox, + 'score': float(score) + }) + + def add_mask(self, image_id:int, category_id:int, segmentation:np.ndarray, score:float): + """ The segmentation should be the full mask, the size of the image and with size [h, w]. """ + rle = pycocotools.mask.encode(np.asfortranarray(segmentation.astype(np.uint8))) + rle['counts'] = rle['counts'].decode('ascii') # json.dump doesn't like bytes strings + + self.mask_data.append({ + 'image_id': int(image_id), + 'category_id': get_coco_cat(int(category_id)), + 'segmentation': rle, + 'score': float(score) + }) + +def _mask_iou(mask1, mask2, iscrowd=False): + with timer.env('Mask IoU'): + ret = mask_iou(mask1, mask2, iscrowd) + return ret.cpu() + +def _bbox_iou(bbox1, bbox2, iscrowd=False): + with timer.env('BBox IoU'): + ret = jaccard(bbox1, bbox2, iscrowd) + return ret.cpu() + +def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, detections:Detections=None): + """ Returns a list of APs for this image, with each element being for a class """ + if not args.output_coco_json: + with timer.env('Prepare gt'): + gt_boxes = torch.Tensor(gt[:, :4]) + gt_boxes[:, [0, 2]] *= w + gt_boxes[:, [1, 3]] *= h + gt_classes = list(gt[:, 4].astype(int)) + gt_masks = torch.Tensor(gt_masks).view(-1, h*w) + + if num_crowd > 0: + split = lambda x: (x[-num_crowd:], x[:-num_crowd]) + crowd_boxes , gt_boxes = split(gt_boxes) + crowd_masks , gt_masks = split(gt_masks) + crowd_classes, gt_classes = split(gt_classes) + + with timer.env('Postprocess'): + classes, scores, boxes, masks = postprocess(dets, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) + + if classes.size(0) == 0: + return + + classes = list(classes.cpu().numpy().astype(int)) + if isinstance(scores, list): + box_scores = list(scores[0].cpu().numpy().astype(float)) + mask_scores = list(scores[1].cpu().numpy().astype(float)) + else: + scores = list(scores.cpu().numpy().astype(float)) + box_scores = scores + mask_scores = scores + masks = masks.view(-1, h*w) + + + if args.output_coco_json: + with timer.env('JSON Output'): + boxes = boxes.cpu().numpy() + masks = masks.view(-1, h, w).cpu().numpy() + for i in range(masks.shape[0]): + # Make sure that the bounding box actually makes sense and a mask was produced + if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0: + detections.add_bbox(image_id, classes[i], boxes[i,:], box_scores[i]) + detections.add_mask(image_id, classes[i], masks[i,:,:], mask_scores[i]) + return + + with timer.env('Eval Setup'): + num_pred = len(classes) + num_gt = len(gt_classes) + + mask_iou_cache = _mask_iou(masks, gt_masks) + bbox_iou_cache = _bbox_iou(boxes.float(), gt_boxes.float()) + + if num_crowd > 0: + crowd_mask_iou_cache = _mask_iou(masks, crowd_masks, iscrowd=True) + crowd_bbox_iou_cache = _bbox_iou(boxes.float(), crowd_boxes.float(), iscrowd=True) + else: + crowd_mask_iou_cache = None + crowd_bbox_iou_cache = None + + box_indices = sorted(range(num_pred), key=lambda i: -box_scores[i]) + mask_indices = sorted(box_indices, key=lambda i: -mask_scores[i]) + + iou_types = [ + ('box', lambda i,j: bbox_iou_cache[i, j].item(), + lambda i,j: crowd_bbox_iou_cache[i,j].item(), + lambda i: box_scores[i], box_indices), + ('mask', lambda i,j: mask_iou_cache[i, j].item(), + lambda i,j: crowd_mask_iou_cache[i,j].item(), + lambda i: mask_scores[i], mask_indices) + ] + + timer.start('Main loop') + for _class in set(classes + gt_classes): + ap_per_iou = [] + num_gt_for_class = sum([1 for x in gt_classes if x == _class]) + + for iouIdx in range(len(iou_thresholds)): + iou_threshold = iou_thresholds[iouIdx] + + for iou_type, iou_func, crowd_func, score_func, indices in iou_types: + gt_used = [False] * len(gt_classes) + + ap_obj = ap_data[iou_type][iouIdx][_class] + ap_obj.add_gt_positives(num_gt_for_class) + + for i in indices: + if classes[i] != _class: + continue + + max_iou_found = iou_threshold + max_match_idx = -1 + for j in range(num_gt): + if gt_used[j] or gt_classes[j] != _class: + continue + + iou = iou_func(i, j) + + if iou > max_iou_found: + max_iou_found = iou + max_match_idx = j + + if max_match_idx >= 0: + gt_used[max_match_idx] = True + ap_obj.push(score_func(i), True) + else: + # If the detection matches a crowd, we can just ignore it + matched_crowd = False + + if num_crowd > 0: + for j in range(len(crowd_classes)): + if crowd_classes[j] != _class: + continue + + iou = crowd_func(i, j) + + if iou > iou_threshold: + matched_crowd = True + break + + # All this crowd code so that we can make sure that our eval code gives the + # same result as COCOEval. There aren't even that many crowd annotations to + # begin with, but accuracy is of the utmost importance. + if not matched_crowd: + ap_obj.push(score_func(i), False) + timer.stop('Main loop') + +class APDataObject: + """ + Stores all the information necessary to calculate the AP for one IoU and one class. + Note: I type annotated this because why not. + """ + + def __init__(self): + self.data_points = [] + self.num_gt_positives = 0 + + def push(self, score:float, is_true:bool): + self.data_points.append((score, is_true)) + + def add_gt_positives(self, num_positives:int): + """ Call this once per image. """ + self.num_gt_positives += num_positives + + def is_empty(self) -> bool: + return len(self.data_points) == 0 and self.num_gt_positives == 0 + + def get_ap(self) -> float: + """ Warning: result not cached. """ + + if self.num_gt_positives == 0: + return 0 + + # Sort descending by score + self.data_points.sort(key=lambda x: -x[0]) + + precisions = [] + recalls = [] + num_true = 0 + num_false = 0 + + # Compute the precision-recall curve. The x axis is recalls and the y axis precisions. + for datum in self.data_points: + # datum[1] is whether the detection a true or false positive + if datum[1]: num_true += 1 + else: num_false += 1 + + precision = num_true / (num_true + num_false) + recall = num_true / self.num_gt_positives + + precisions.append(precision) + recalls.append(recall) + + # Smooth the curve by computing [max(precisions[i:]) for i in range(len(precisions))] + # Basically, remove any temporary dips from the curve. + # At least that's what I think, idk. COCOEval did it so I do too. + for i in range(len(precisions)-1, 0, -1): + if precisions[i] > precisions[i-1]: + precisions[i-1] = precisions[i] + + # Compute the integral of precision(recall) d_recall from recall=0->1 using fixed-length riemann summation with 101 bars. + y_range = [0] * 101 # idx 0 is recall == 0.0 and idx 100 is recall == 1.00 + x_range = np.array([x / 100 for x in range(101)]) + recalls = np.array(recalls) + + # I realize this is weird, but all it does is find the nearest precision(x) for a given x in x_range. + # Basically, if the closest recall we have to 0.01 is 0.009 this sets precision(0.01) = precision(0.009). + # I approximate the integral this way, because that's how COCOEval does it. + indices = np.searchsorted(recalls, x_range, side='left') + for bar_idx, precision_idx in enumerate(indices): + if precision_idx < len(precisions): + y_range[bar_idx] = precisions[precision_idx] + + # Finally compute the riemann sum to get our integral. + # avg([precision(x) for x in 0:0.01:1]) + return sum(y_range) / len(y_range) + +def badhash(x): + """ + Just a quick and dirty hash function for doing a deterministic shuffle based on image_id. + + Source: + https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key + """ + x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF + x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF + x = ((x >> 16) ^ x) & 0xFFFFFFFF + return x + +class InferResultFile(): + def __init__(self, path, fileName): + parts = fileName.split('_') + self.imgId = int(parts[2]) + self.outputId = int(parts[3][0]) + self.arrayValue = np.fromfile(path + fileName, dtype=np.float32) + + self.arrayDim = self.arrayValue.shape[0] + #print('finish read file :', fileName) + +def getAllFiles(path): + allFiles = os.listdir(path) + infoFiles = {} + for file in allFiles: + if '.bin' in file and 'coco_val' in file: + infoFile = InferResultFile(path, file) + if infoFile.imgId in infoFiles.keys(): + infoFiles[infoFile.imgId].append(infoFile) + else: + infoFiles[infoFile.imgId] = [infoFile] + return infoFiles + +class InferResultFileFetcher(): + def __init__(self, path): + self.path = path + + def getInferResult(self, image_idx): + resultDict = {} + for i in range(1, 5): + fileName = 'coco_val2017_' + str(image_idx) + '_' + str(i) + '.bin' + infoFile = InferResultFile(self.path, fileName) + if infoFile.arrayDim == 615936: + resultDict[0] = infoFile + elif infoFile.arrayDim == 1559088: + resultDict[1] = infoFile + elif infoFile.arrayDim == 76992: + resultDict[2] = infoFile + else: + resultDict[3] = infoFile + + return resultDict + +pred_priors = None + +def getPriorTensor(): + global pred_priors + if pred_priors is None: + from yolact import PredictionModule + cfg._tmp_img_h = 550 + cfg._tmp_img_w = 550 + pred_priors = PredictionModule.get_YOLACT_priors().numpy() + return pred_priors + else: + return pred_priors + +def evaluate(path, dataset): + cfg.mask_proto_debug = args.mask_proto_debug + inferResultFetcher = InferResultFileFetcher(path) + + frame_times = MovingAverage() + dataset_size = len(dataset) if args.max_images < 0 else min(args.max_images, len(dataset)) + progress_bar = ProgressBar(30, dataset_size) + + print() + + # For each class and iou, stores tuples (score, isPositive) + # Index ap_data[type][iouIdx][classIdx] + ap_data = { + 'box' : [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds], + 'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds] + } + detections = Detections() + + dataset_indices = list(range(len(dataset))) + + if args.shuffle: + random.shuffle(dataset_indices) + elif not args.no_sort: + # Do a deterministic shuffle based on the image ids + # + # I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's + # the order of insertion. That means on python 3.6, the images come in the order they are in + # in the annotations file. For some reason, the first images in the annotations file are + # the hardest. To combat this, I use a hard-coded hash function based on the image ids + # to shuffle the indices we use. That way, no matter what python version or how pycocotools + # handles the data, we get the same result every time. + hashed = [badhash(x) for x in dataset.ids] + dataset_indices.sort(key=lambda x: hashed[x]) + + dataset_indices = dataset_indices[:dataset_size] + + # Main eval loop + for it, image_idx in enumerate(dataset_indices): + timer.reset() + with timer.env('Load Data'): + img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) + # Test flag, do not upvote + with timer.env('Network Extra'): + imgId_Outputs = inferResultFetcher.getInferResult(image_idx) + + pred_mask = imgId_Outputs[0].arrayValue.reshape(1, 19248, 32) #output1 : pred_onnx[2] + pred_conf = imgId_Outputs[1].arrayValue.reshape(1, 19248, 81) #output2 : pred_onnx[1] + pred_loc = imgId_Outputs[2].arrayValue.reshape(1, 19248, 4) #output3 : pred_onnx[0] + pred_proto = imgId_Outputs[3].arrayValue.reshape(1, 138, 138, 32) #output4 : pred_onnx[4] + + detect = Detect(cfg.num_classes, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5) + detect.use_fast_nms = args.fast_nms + preds = detect({'loc': torch.from_numpy(pred_loc), + 'conf': torch.from_numpy(pred_conf), + 'mask': torch.from_numpy(pred_mask), + 'priors': torch.from_numpy(getPriorTensor()), #????? + 'proto': torch.from_numpy(pred_proto)}) + + # Perform the meat of the operation here depending on our mode. + + prep_metrics(ap_data, preds, img, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], detections) + # First couple of images take longer because we're constructing the graph. + # Since that's technically initialization, don't include those in the FPS calculations. + if it > 1: + frame_times.add(timer.total_time()) + if not args.no_bar: + if it > 1: fps = 1 / frame_times.get_avg() + else: fps = 0 + progress = (it+1) / dataset_size * 100 + progress_bar.set_val(it+1) + print('\rProcessing Output Results %s %6d / %6d (%5.2f%%) %5.2f fps ' + % (repr(progress_bar), it+1, dataset_size, progress, fps), end='') + + print() + print('Saving data...') + with open(args.ap_data_file, 'wb') as f: + pickle.dump(ap_data, f) + return calc_map(ap_data) + +def calc_map(ap_data): + print('Calculating mAP...') + aps = [{'box': [], 'mask': []} for _ in iou_thresholds] + + for _class in range(len(cfg.dataset.class_names)): + for iou_idx in range(len(iou_thresholds)): + for iou_type in ('box', 'mask'): + ap_obj = ap_data[iou_type][iou_idx][_class] + + if not ap_obj.is_empty(): + aps[iou_idx][iou_type].append(ap_obj.get_ap()) + + all_maps = {'box': OrderedDict(), 'mask': OrderedDict()} + + # Looking back at it, this code is really hard to read :/ + for iou_type in ('box', 'mask'): + all_maps[iou_type]['all'] = 0 # Make this first in the ordereddict + for i, threshold in enumerate(iou_thresholds): + mAP = sum(aps[i][iou_type]) / len(aps[i][iou_type]) * 100 if len(aps[i][iou_type]) > 0 else 0 + all_maps[iou_type][int(threshold*100)] = mAP + all_maps[iou_type]['all'] = (sum(all_maps[iou_type].values()) / (len(all_maps[iou_type].values())-1)) + + print_maps(all_maps) + + # Put in a prettier format so we can serialize it to json during training + all_maps = {k: {j: round(u, 2) for j, u in v.items()} for k, v in all_maps.items()} + return all_maps + +def print_maps(all_maps): + # Warning: hacky + make_row = lambda vals: (' %5s |' * len(vals)) % tuple(vals) + make_sep = lambda n: ('-------+' * n) + + print() + print(make_row([''] + [('.%d ' % x if isinstance(x, int) else x + ' ') for x in all_maps['box'].keys()])) + print(make_sep(len(all_maps['box']) + 1)) + for iou_type in ('box', 'mask'): + print(make_row([iou_type] + ['%.2f' % x if x < 100 else '%.1f' % x for x in all_maps[iou_type].values()])) + print(make_sep(len(all_maps['box']) + 1)) + print() + +if __name__ == '__main__': + path = './result/dumpOutput_device0/' + parse_args() + + if args.config is not None: + set_cfg(args.config) + + else: + args.config = 'yolact_base_config' + print('Config not specified. Parsed %s from the file name.\n' % args.config) + set_cfg(args.config) + + #if args.image is None and args.video is None and args.images is None: + dataset = COCODetection(args.valid_images, args.valid_annotations, + transform=BaseTransform(), has_gt=cfg.dataset.has_gt) + prep_coco_cats() + + evaluate(path, dataset) diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT/YOLACT_preprocess.py b/ACL_PyTorch/contrib/cv/segmentation/YOLACT/YOLACT_preprocess.py index 1ae885bb886f43c9686edee4c279d53963f92d7e..3d0d49a69170821f2b8cc4982dfd133f23df976b 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/YOLACT/YOLACT_preprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/YOLACT/YOLACT_preprocess.py @@ -1,174 +1,174 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -from data import COCODetection, get_label_map -from utils.augmentations import BaseTransform -from data import cfg, set_cfg -import numpy as np -from torch.autograd import Variable -import argparse -import random -import os -from collections import defaultdict - -def str2bool(v): - if v.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - -def parse_args(argv=None): - parser = argparse.ArgumentParser( - description='YOLACT COCO Evaluation') - parser.add_argument('--valid_images', default='/home/data/coco/images/', help='the path of validation images') - parser.add_argument('--valid_annotations', default='/home/data/coco/annotations/instances_val2017.json', help='the path of validation annotations') - parser.add_argument('--saved_path', default='./', help='the path of binary data and info file') - parser.add_argument('--cuda', default=True, type=str2bool, - help='Use cuda to evaulate model') - parser.add_argument('--display', dest='display', action='store_true', - help='Display qualitative results instead of quantitative ones.') - parser.add_argument('--shuffle', dest='shuffle', action='store_true', - help='Shuffles the images when displaying them. Doesn\'t have much of an effect when display is off though.') - parser.add_argument('--resume', dest='resume', action='store_true', - help='If display not set, this resumes mAP calculations from the ap_data_file.') - parser.add_argument('--max_images', default=-1, type=int, - help='The maximum number of images from the dataset to consider. Use -1 for all.') - parser.add_argument('--config', default=None, - help='The config object to use.') - parser.add_argument('--no_sort', default=False, dest='no_sort', action='store_true', - help='Do not sort images by hashed image ID.') - parser.add_argument('--seed', default=None, type=int, - help='The seed to pass into random.seed. Note: this is only really for the shuffle and does not (I think) affect cuda stuff.') - parser.add_argument('--image', default=None, type=str, - help='A path to an image to use for display.') - parser.add_argument('--images', default=None, type=str, - help='An input folder of images and output folder to save detected images. Should be in the format input->output.') - parser.add_argument('--dataset', default=None, type=str, - help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).') - parser.set_defaults(display=False, resume=False, shuffle=False, - no_sort=False) - - global args - args = parser.parse_args(argv) - - if args.seed is not None: - random.seed(args.seed) - -iou_thresholds = [x / 100 for x in range(50, 100, 5)] -coco_cats = {} # Call prep_coco_cats to fill this -coco_cats_inv = {} -color_cache = defaultdict(lambda: {}) - -def prep_coco_cats(): - """ Prepare inverted table for category id lookup given a coco cats object. """ - for coco_cat_id, transformed_cat_id_p1 in get_label_map().items(): - transformed_cat_id = transformed_cat_id_p1 - 1 - coco_cats[transformed_cat_id] = coco_cat_id - coco_cats_inv[coco_cat_id] = transformed_cat_id - -def badhash(x): - """ - Just a quick and dirty hash function for doing a deterministic shuffle based on image_id. - - Source: - https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key - """ - x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF - x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF - x = ((x >> 16) ^ x) & 0xFFFFFFFF - return x - -def preprocess(dataset, save_path = None): - dataset_size = len(dataset) if args.max_images < 0 else min(args.max_images, len(dataset)) - print("dataset size is : ", dataset_size) - print() - - # For each class and iou, stores tuples (score, isPositive) - # Index ap_data[type][iouIdx][classIdx] - dataset_indices = list(range(len(dataset))) - print("dataset indices size is :", len(dataset_indices)) - if args.shuffle: - random.shuffle(dataset_indices) - elif not args.no_sort: - hashed = [badhash(x) for x in dataset.ids] - dataset_indices.sort(key=lambda x: hashed[x]) - - dataset_indices = dataset_indices[:dataset_size] - - if save_path is None: - save_path = os.path.join(args.saved_path, 'prep_dataset') - - if os.path.exists(save_path) == False: - os.mkdir(save_path) - else: - print('dir exist!') - - # Main eval loop - with open(os.path.join(args.saved_path, 'yolact_prep_bin.info'), 'w+') as f: - for it, image_idx in enumerate(dataset_indices): - img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) - # Test flag, do not upvote - batch = Variable(img.unsqueeze(0)) - batch_numpy = np.array(batch).astype(np.float32) - - binFileName = os.path.join(save_path, 'coco_val2017_' + str(image_idx) + '.bin') - - batch_numpy.tofile(binFileName) - - line = str(it) + ' ' + binFileName + ' ' + '550 550\n' - f.write(line) - if it % 100 == 0: - print('[INFO][PreProcess]', 'CurSampleNum:', it) - -if __name__ == '__main__': - parse_args() - - if args.config is not None: - set_cfg(args.config) - - else: - args.config = 'yolact_base_config' - print('Config not specified. Parsed %s from the file name.\n' % args.config) - set_cfg(args.config) - - if not os.path.exists('results'): - os.makedirs('results') - - #if args.image is None and args.video is None and args.images is None: - dataset = COCODetection(args.valid_images, args.valid_annotations, - transform=BaseTransform(), has_gt=cfg.dataset.has_gt) - prep_coco_cats() - - preprocess(dataset) - - +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +from data import COCODetection, get_label_map +from utils.augmentations import BaseTransform +from data import cfg, set_cfg +import numpy as np +from torch.autograd import Variable +import argparse +import random +import os +from collections import defaultdict + +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +def parse_args(argv=None): + parser = argparse.ArgumentParser( + description='YOLACT COCO Evaluation') + parser.add_argument('--valid_images', default='/home/data/coco/images/', help='the path of validation images') + parser.add_argument('--valid_annotations', default='/home/data/coco/annotations/instances_val2017.json', help='the path of validation annotations') + parser.add_argument('--saved_path', default='./', help='the path of binary data and info file') + parser.add_argument('--cuda', default=True, type=str2bool, + help='Use cuda to evaulate model') + parser.add_argument('--display', dest='display', action='store_true', + help='Display qualitative results instead of quantitative ones.') + parser.add_argument('--shuffle', dest='shuffle', action='store_true', + help='Shuffles the images when displaying them. Doesn\'t have much of an effect when display is off though.') + parser.add_argument('--resume', dest='resume', action='store_true', + help='If display not set, this resumes mAP calculations from the ap_data_file.') + parser.add_argument('--max_images', default=-1, type=int, + help='The maximum number of images from the dataset to consider. Use -1 for all.') + parser.add_argument('--config', default=None, + help='The config object to use.') + parser.add_argument('--no_sort', default=False, dest='no_sort', action='store_true', + help='Do not sort images by hashed image ID.') + parser.add_argument('--seed', default=None, type=int, + help='The seed to pass into random.seed. Note: this is only really for the shuffle and does not (I think) affect cuda stuff.') + parser.add_argument('--image', default=None, type=str, + help='A path to an image to use for display.') + parser.add_argument('--images', default=None, type=str, + help='An input folder of images and output folder to save detected images. Should be in the format input->output.') + parser.add_argument('--dataset', default=None, type=str, + help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).') + parser.set_defaults(display=False, resume=False, shuffle=False, + no_sort=False) + + global args + args = parser.parse_args(argv) + + if args.seed is not None: + random.seed(args.seed) + +iou_thresholds = [x / 100 for x in range(50, 100, 5)] +coco_cats = {} # Call prep_coco_cats to fill this +coco_cats_inv = {} +color_cache = defaultdict(lambda: {}) + +def prep_coco_cats(): + """ Prepare inverted table for category id lookup given a coco cats object. """ + for coco_cat_id, transformed_cat_id_p1 in get_label_map().items(): + transformed_cat_id = transformed_cat_id_p1 - 1 + coco_cats[transformed_cat_id] = coco_cat_id + coco_cats_inv[coco_cat_id] = transformed_cat_id + +def badhash(x): + """ + Just a quick and dirty hash function for doing a deterministic shuffle based on image_id. + + Source: + https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key + """ + x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF + x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF + x = ((x >> 16) ^ x) & 0xFFFFFFFF + return x + +def preprocess(dataset, save_path = None): + dataset_size = len(dataset) if args.max_images < 0 else min(args.max_images, len(dataset)) + print("dataset size is : ", dataset_size) + print() + + # For each class and iou, stores tuples (score, isPositive) + # Index ap_data[type][iouIdx][classIdx] + dataset_indices = list(range(len(dataset))) + print("dataset indices size is :", len(dataset_indices)) + if args.shuffle: + random.shuffle(dataset_indices) + elif not args.no_sort: + hashed = [badhash(x) for x in dataset.ids] + dataset_indices.sort(key=lambda x: hashed[x]) + + dataset_indices = dataset_indices[:dataset_size] + + if save_path is None: + save_path = os.path.join(args.saved_path, 'prep_dataset') + + if os.path.exists(save_path) == False: + os.mkdir(save_path) + else: + print('dir exist!') + + # Main eval loop + with open(os.path.join(args.saved_path, 'yolact_prep_bin.info'), 'w+') as f: + for it, image_idx in enumerate(dataset_indices): + img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) + # Test flag, do not upvote + batch = Variable(img.unsqueeze(0)) + batch_numpy = np.array(batch).astype(np.float32) + + binFileName = os.path.join(save_path, 'coco_val2017_' + str(image_idx) + '.bin') + + batch_numpy.tofile(binFileName) + + line = str(it) + ' ' + binFileName + ' ' + '550 550\n' + f.write(line) + if it % 100 == 0: + print('[INFO][PreProcess]', 'CurSampleNum:', it) + +if __name__ == '__main__': + parse_args() + + if args.config is not None: + set_cfg(args.config) + + else: + args.config = 'yolact_base_config' + print('Config not specified. Parsed %s from the file name.\n' % args.config) + set_cfg(args.config) + + if not os.path.exists('results'): + os.makedirs('results') + + #if args.image is None and args.video is None and args.images is None: + dataset = COCODetection(args.valid_images, args.valid_annotations, + transform=BaseTransform(), has_gt=cfg.dataset.has_gt) + prep_coco_cats() + + preprocess(dataset) + + diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT/test/eval_acc_perf.sh b/ACL_PyTorch/contrib/cv/segmentation/YOLACT/test/eval_acc_perf.sh old mode 100755 new mode 100644 diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT/test/parse.py b/ACL_PyTorch/contrib/cv/segmentation/YOLACT/test/parse.py index a0f253b055047b199b33d4b65cdc79177b6b250b..27eae0d0acf98687edd95f1f024cf77c49cd4dc4 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/YOLACT/test/parse.py +++ b/ACL_PyTorch/contrib/cv/segmentation/YOLACT/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT/test/perf_g.sh b/ACL_PyTorch/contrib/cv/segmentation/YOLACT/test/perf_g.sh old mode 100755 new mode 100644 diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT/test/pth2om.sh b/ACL_PyTorch/contrib/cv/segmentation/YOLACT/test/pth2om.sh old mode 100755 new mode 100644 diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/LICENSE b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/LICENSE +++ b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/README.md b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/README.md index ade13dfe135415893c04aa3774d8e5ff6330db45..4b9e18211ff8c03a584c6ec9919ba9b61eebe3c8 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/README.md +++ b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/README.md @@ -1,159 +1,159 @@ -# YOLACT模型PyTorch离线推理说明 - -### 一、环境准备 - -#### 1、获取依赖库 - -```shell -pip3 install -r requirements.txt -git clone -b pytorch_1.5 https://github.com/ifzhang/DCNv2.git -cd DCNv2 -python3.7 setup.py build develop -patch -p1 < ../dcnv2.diff -``` - -#### 2、获取YOLACT源代码并更新 - -- 首先获取官方代码仓代码 - - ```bash - git clone https://github.com/dbolya/yolact.git - ``` - -- 将无关文件删除,保留以下文件 - - ``` - . - ├── backbone.py - ├── data - │   ├── coco.py - │   ├── config.py - │   └── __init__.py - ├── layers - │   ├── box_utils.py - │   ├── functions - │   │   ├── detection.py - │   │   └── __init__.py - │   ├── __init__.py - │   ├── interpolate.py - │   └── output_utils.py - ├── utils - │   ├── augmentations.py - │   ├── cython_nms.pyx - │   ├── functions.py - │   ├── __init__.py - │   └── timer.py - └── yolact.py - ``` - -- 将本仓库代码拷贝至yolact目录下,并使用补丁YOLACT.patch复原 - - ``` - patch -p1 < ./YOLACT.patch - ``` - - - 复原后,文件目录如下 - - ``` - . - ├── backbone.py - ├── data - │   ├── coco.py - │   ├── config.py - │   └── __init__.py - ├── env.sh - ├── dcnv2.diff - ├── DCNv2 - ├── layers - │   ├── box_utils.py - │   ├── functions - │   │   ├── detection.py - │   │   └── __init__.py - │   ├── __init__.py - │   ├── interpolate.py - │   └── output_utils.py - ├── LICENSE - ├── modelzoo_level.txt - ├── README.md - ├── requirements.txt - ├── test - │   ├── eval_acc_perf.sh - │   ├── parse.py - │   ├── prior.bin - │   └── pth2om.sh - ├── utils - │   ├── augmentations.py - │   ├── cython_nms.pyx - │   ├── functions.py - │   ├── __init__.py - │   └── timer.py - ├── weights - │   └── pth2onnx.py - ├── YOLACT.patch - ├── YOLACT_postprocess.py - ├── YOLACT_preprocess.py - └── yolact.py - ``` - -#### 3、获取权重文件 - -官方训练完毕的权重文件:yolact_plus_resnet50_54_800000.pth - -训练完毕的权重文件放于./weights目录下 - -#### 4、获取数据集 - -YOLACT模型使用Microsoft COCO 2017数据集进行训练及测试,下载数据集命令如下: - -```bash -cd data/scripts -bash ./COCO.sh #获取测试数据集 -``` - -在离线推理中仅使用测试数据集,测试图像为val 2017, 对应的标注文件为instances_val2017.json - -#### 5、获取benchmark工具 - -获取benchmark.x86_64离线推理工具 - - - -### 二、离线推理 - -#### 1、执行离线推理前使用以下命令查看设备状态,确保device空闲 - -```bash -npu-smi info -``` - -#### 2、执行以下命令,生成om模型文件 - -```bash -bash test/pth2om.sh -``` - -注意:此处pth权重文件的路径应设为相对路径 - -#### 3、执行以下命令,开始离线推理 - -```bash -bash test/eval_acc_perf.sh -``` - -同时,benchmark工具会自动统计性能数据。 - -#### 4、在基准环境中执行以下命令,获取基准推理性能 - -onnx包含自定义算子,不能使用开源TensorRT测试性能数据,所以在基准服务器上在线推理测试性能数据。 - - - -### 三、评测结果 - -Yolact++不支持在bs16上离线推理,故在bs8上测试。 - -| 模型 | 在线推理精度 | 310离线推理精度 | 基准性能 | 310性能 | -| ----------- | ----------------------- | ----------------------- | ---------- | --------- | -| YOLACT bs1 | mAP: box 34.94, mask 33.69 | mAP: box 34.90, mask 33.71 | 19.693fps | 26.452fps | -| YOLACT bs8 | mAP: box 34.94, mask 33.69 | mAP: box 34.90, mask 33.71 | 16.377fps | 31.130fps | +# YOLACT模型PyTorch离线推理说明 + +### 一、环境准备 + +#### 1、获取依赖库 + +```shell +pip3 install -r requirements.txt +git clone -b pytorch_1.5 https://github.com/ifzhang/DCNv2.git +cd DCNv2 +python3.7 setup.py build develop +patch -p1 < ../dcnv2.diff +``` + +#### 2、获取YOLACT源代码并更新 + +- 首先获取官方代码仓代码 + + ```bash + git clone https://github.com/dbolya/yolact.git + ``` + +- 将无关文件删除,保留以下文件 + + ``` + . + ├── backbone.py + ├── data + │   ├── coco.py + │   ├── config.py + │   └── __init__.py + ├── layers + │   ├── box_utils.py + │   ├── functions + │   │   ├── detection.py + │   │   └── __init__.py + │   ├── __init__.py + │   ├── interpolate.py + │   └── output_utils.py + ├── utils + │   ├── augmentations.py + │   ├── cython_nms.pyx + │   ├── functions.py + │   ├── __init__.py + │   └── timer.py + └── yolact.py + ``` + +- 将本仓库代码拷贝至yolact目录下,并使用补丁YOLACT.patch复原 + + ``` + patch -p1 < ./YOLACT.patch + ``` + + + 复原后,文件目录如下 + + ``` + . + ├── backbone.py + ├── data + │   ├── coco.py + │   ├── config.py + │   └── __init__.py + ├── env.sh + ├── dcnv2.diff + ├── DCNv2 + ├── layers + │   ├── box_utils.py + │   ├── functions + │   │   ├── detection.py + │   │   └── __init__.py + │   ├── __init__.py + │   ├── interpolate.py + │   └── output_utils.py + ├── LICENSE + ├── modelzoo_level.txt + ├── README.md + ├── requirements.txt + ├── test + │   ├── eval_acc_perf.sh + │   ├── parse.py + │   ├── prior.bin + │   └── pth2om.sh + ├── utils + │   ├── augmentations.py + │   ├── cython_nms.pyx + │   ├── functions.py + │   ├── __init__.py + │   └── timer.py + ├── weights + │   └── pth2onnx.py + ├── YOLACT.patch + ├── YOLACT_postprocess.py + ├── YOLACT_preprocess.py + └── yolact.py + ``` + +#### 3、获取权重文件 + +官方训练完毕的权重文件:yolact_plus_resnet50_54_800000.pth + +训练完毕的权重文件放于./weights目录下 + +#### 4、获取数据集 + +YOLACT模型使用Microsoft COCO 2017数据集进行训练及测试,下载数据集命令如下: + +```bash +cd data/scripts +bash ./COCO.sh #获取测试数据集 +``` + +在离线推理中仅使用测试数据集,测试图像为val 2017, 对应的标注文件为instances_val2017.json + +#### 5、获取benchmark工具 + +获取benchmark.x86_64离线推理工具 + + + +### 二、离线推理 + +#### 1、执行离线推理前使用以下命令查看设备状态,确保device空闲 + +```bash +npu-smi info +``` + +#### 2、执行以下命令,生成om模型文件 + +```bash +bash test/pth2om.sh +``` + +注意:此处pth权重文件的路径应设为相对路径 + +#### 3、执行以下命令,开始离线推理 + +```bash +bash test/eval_acc_perf.sh +``` + +同时,benchmark工具会自动统计性能数据。 + +#### 4、在基准环境中执行以下命令,获取基准推理性能 + +onnx包含自定义算子,不能使用开源TensorRT测试性能数据,所以在基准服务器上在线推理测试性能数据。 + + + +### 三、评测结果 + +Yolact++不支持在bs16上离线推理,故在bs8上测试。 + +| 模型 | 在线推理精度 | 310离线推理精度 | 基准性能 | 310性能 | +| ----------- | ----------------------- | ----------------------- | ---------- | --------- | +| YOLACT bs1 | mAP: box 34.94, mask 33.69 | mAP: box 34.90, mask 33.71 | 19.693fps | 26.452fps | +| YOLACT bs8 | mAP: box 34.94, mask 33.69 | mAP: box 34.90, mask 33.71 | 16.377fps | 31.130fps | diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/YOLACT_postprocess.py b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/YOLACT_postprocess.py index 58d3a90d974e053fbc6ea88ceaa3c20efcf91dc5..83de5b39dcd8fa6c2fc3ee9707c9a4ed049462a5 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/YOLACT_postprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/YOLACT_postprocess.py @@ -1,566 +1,566 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys - -from layers import Detect -from data import COCODetection, get_label_map, MEANS, COLORS -from yolact import Yolact -from utils.augmentations import BaseTransform, FastBaseTransform, Resize -from utils.functions import MovingAverage, ProgressBar -from layers.box_utils import jaccard, center_size, mask_iou -from utils import timer -from utils.functions import SavePath -from layers.output_utils import postprocess, undo_image_transformation -import pycocotools - -from data import cfg, set_cfg, set_dataset - -import numpy as np -import torch -import torch.backends.cudnn as cudnn -from torch.autograd import Variable -import argparse -import time -import random -import cProfile -import pickle -import json -import os -from collections import defaultdict -from pathlib import Path -from collections import OrderedDict -from PIL import Image - -import matplotlib.pyplot as plt -import cv2 - -def str2bool(v): - if v.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - -def parse_args(argv=None): - parser = argparse.ArgumentParser( - description='YOLACT COCO Evaluation') - parser.add_argument('--valid_images', default='/home/data/coco/images/', help='the path of validation images') - parser.add_argument('--valid_annotations', default='/home/data/coco/annotations/instances_val2017.json', help='the path of validation annotations') - parser.add_argument('--top_k', default=5, type=int, - help='Further restrict the number of predictions to parse') - parser.add_argument('--cuda', default=True, type=str2bool, - help='Use cuda to evaulate model') - parser.add_argument('--fast_nms', default=True, type=str2bool, - help='Whether to use a faster, but not entirely correct version of NMS.') - parser.add_argument('--shuffle', dest='shuffle', action='store_true', - help='Shuffles the images when displaying them. Doesn\'t have much of an effect when display is off though.') - parser.add_argument('--ap_data_file', default='results/ap_data.pkl', type=str, - help='In quantitative mode, the file to save detections before calculating mAP.') - parser.add_argument('--max_images', default=-1, type=int, - help='The maximum number of images from the dataset to consider. Use -1 for all.') - parser.add_argument('--output_coco_json', dest='output_coco_json', action='store_true', - help='If display is not set, instead of processing IoU values, this just dumps detections into the coco json file.') - parser.add_argument('--config', default=None, - help='The config object to use.') - parser.add_argument('--no_bar', dest='no_bar', action='store_true', - help='Do not output the status bar. This is useful for when piping to a file.') - parser.add_argument('--no_sort', default=False, dest='no_sort', action='store_true', - help='Do not sort images by hashed image ID.') - parser.add_argument('--seed', default=None, type=int, - help='The seed to pass into random.seed. Note: this is only really for the shuffle and does not (I think) affect cuda stuff.') - parser.add_argument('--mask_proto_debug', default=False, dest='mask_proto_debug', action='store_true', - help='Outputs stuff for scripts/compute_mask.py.') - parser.add_argument('--score_threshold', default=0, type=float, - help='Detections with a score under this threshold will not be considered. This currently only works in display mode.') - parser.add_argument('--dataset', default=None, type=str, - help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).') - parser.add_argument('--detect', default=False, dest='detect', action='store_true', - help='Don\'t evauluate the mask branch at all and only do object detection. This only works for --display and --benchmark.') - parser.add_argument('--pthpath', type=str, - default='./weights/yolact_plus_resnet50_54_800000.pth', help='choose .pth module') - parser.add_argument('--device_id', type=int, - default=0, help='choose .pth module') - parser.set_defaults(no_bar=False, output_coco_json=False, shuffle=False, - no_sort=False, mask_proto_debug=False, detect=False, crop=True) - - global args - args = parser.parse_args(argv) - - if args.seed is not None: - random.seed(args.seed) - -iou_thresholds = [x / 100 for x in range(50, 100, 5)] -coco_cats = {} # Call prep_coco_cats to fill this -coco_cats_inv = {} -color_cache = defaultdict(lambda: {}) - -def prep_coco_cats(): - """ Prepare inverted table for category id lookup given a coco cats object. """ - for coco_cat_id, transformed_cat_id_p1 in get_label_map().items(): - transformed_cat_id = transformed_cat_id_p1 - 1 - coco_cats[transformed_cat_id] = coco_cat_id - coco_cats_inv[coco_cat_id] = transformed_cat_id - -def get_coco_cat(transformed_cat_id): - """ transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """ - return coco_cats[transformed_cat_id] - -def get_transformed_cat(coco_cat_id): - """ transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """ - return coco_cats_inv[coco_cat_id] - -class Detections: - - def __init__(self): - self.bbox_data = [] - self.mask_data = [] - - def add_bbox(self, image_id:int, category_id:int, bbox:list, score:float): - """ Note that bbox should be a list or tuple of (x1, y1, x2, y2) """ - bbox = [bbox[0], bbox[1], bbox[2]-bbox[0], bbox[3]-bbox[1]] - - # Round to the nearest 10th to avoid huge file sizes, as COCO suggests - bbox = [round(float(x)*10)/10 for x in bbox] - - self.bbox_data.append({ - 'image_id': int(image_id), - 'category_id': get_coco_cat(int(category_id)), - 'bbox': bbox, - 'score': float(score) - }) - - def add_mask(self, image_id:int, category_id:int, segmentation:np.ndarray, score:float): - """ The segmentation should be the full mask, the size of the image and with size [h, w]. """ - rle = pycocotools.mask.encode(np.asfortranarray(segmentation.astype(np.uint8))) - rle['counts'] = rle['counts'].decode('ascii') # json.dump doesn't like bytes strings - - self.mask_data.append({ - 'image_id': int(image_id), - 'category_id': get_coco_cat(int(category_id)), - 'segmentation': rle, - 'score': float(score) - }) - -def _mask_iou(mask1, mask2, iscrowd=False): - with timer.env('Mask IoU'): - ret = mask_iou(mask1, mask2, iscrowd) - return ret.cpu() - -def _bbox_iou(bbox1, bbox2, iscrowd=False): - with timer.env('BBox IoU'): - ret = jaccard(bbox1, bbox2, iscrowd) - return ret.cpu() - -def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, detections:Detections=None): - """ Returns a list of APs for this image, with each element being for a class """ - if not args.output_coco_json: - with timer.env('Prepare gt'): - gt_boxes = torch.Tensor(gt[:, :4]) - gt_boxes[:, [0, 2]] *= w - gt_boxes[:, [1, 3]] *= h - gt_classes = list(gt[:, 4].astype(int)) - gt_masks = torch.Tensor(gt_masks).view(-1, h*w) - - if num_crowd > 0: - split = lambda x: (x[-num_crowd:], x[:-num_crowd]) - crowd_boxes , gt_boxes = split(gt_boxes) - crowd_masks , gt_masks = split(gt_masks) - crowd_classes, gt_classes = split(gt_classes) - - with timer.env('Postprocess'): - classes, scores, boxes, masks = postprocess(dets, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) - - if classes.size(0) == 0: - return - - classes = list(classes.cpu().numpy().astype(int)) - if isinstance(scores, list): - box_scores = list(scores[0].cpu().numpy().astype(float)) - mask_scores = list(scores[1].cpu().numpy().astype(float)) - else: - scores = list(scores.cpu().numpy().astype(float)) - box_scores = scores - mask_scores = scores - masks = masks.view(-1, h*w) - - - if args.output_coco_json: - with timer.env('JSON Output'): - boxes = boxes.cpu().numpy() - masks = masks.view(-1, h, w).cpu().numpy() - for i in range(masks.shape[0]): - # Make sure that the bounding box actually makes sense and a mask was produced - if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0: - detections.add_bbox(image_id, classes[i], boxes[i,:], box_scores[i]) - detections.add_mask(image_id, classes[i], masks[i,:,:], mask_scores[i]) - return - - with timer.env('Eval Setup'): - num_pred = len(classes) - num_gt = len(gt_classes) - - mask_iou_cache = _mask_iou(masks, gt_masks) - bbox_iou_cache = _bbox_iou(boxes.float(), gt_boxes.float()) - - if num_crowd > 0: - crowd_mask_iou_cache = _mask_iou(masks, crowd_masks, iscrowd=True) - crowd_bbox_iou_cache = _bbox_iou(boxes.float(), crowd_boxes.float(), iscrowd=True) - else: - crowd_mask_iou_cache = None - crowd_bbox_iou_cache = None - - box_indices = sorted(range(num_pred), key=lambda i: -box_scores[i]) - mask_indices = sorted(box_indices, key=lambda i: -mask_scores[i]) - iou_types = [ - ('box', lambda i,j: bbox_iou_cache[i, j].item(), - lambda i,j: crowd_bbox_iou_cache[i,j].item(), - lambda i: box_scores[i], box_indices), - ('mask', lambda i,j: mask_iou_cache[i, j].item(), - lambda i,j: crowd_mask_iou_cache[i,j].item(), - lambda i: mask_scores[i], mask_indices) - ] - - - timer.start('Main loop') - for _class in set(classes + gt_classes): - ap_per_iou = [] - num_gt_for_class = sum([1 for x in gt_classes if x == _class]) - - for iouIdx in range(len(iou_thresholds)): - iou_threshold = iou_thresholds[iouIdx] - - for iou_type, iou_func, crowd_func, score_func, indices in iou_types: - gt_used = [False] * len(gt_classes) - - ap_obj = ap_data[iou_type][iouIdx][_class] - ap_obj.add_gt_positives(num_gt_for_class) - - for i in indices: - if classes[i] != _class: - continue - - max_iou_found = iou_threshold - max_match_idx = -1 - for j in range(num_gt): - if gt_used[j] or gt_classes[j] != _class: - continue - - iou = iou_func(i, j) - - if iou > max_iou_found: - max_iou_found = iou - max_match_idx = j - - if max_match_idx >= 0: - gt_used[max_match_idx] = True - ap_obj.push(score_func(i), True) - else: - # If the detection matches a crowd, we can just ignore it - matched_crowd = False - - if num_crowd > 0: - for j in range(len(crowd_classes)): - if crowd_classes[j] != _class: - continue - - iou = crowd_func(i, j) - - if iou > iou_threshold: - matched_crowd = True - break - - # All this crowd code so that we can make sure that our eval code gives the - # same result as COCOEval. There aren't even that many crowd annotations to - # begin with, but accuracy is of the utmost importance. - if not matched_crowd: - ap_obj.push(score_func(i), False) - timer.stop('Main loop') - -class APDataObject: - """ - Stores all the information necessary to calculate the AP for one IoU and one class. - Note: I type annotated this because why not. - """ - - def __init__(self): - self.data_points = [] - self.num_gt_positives = 0 - - def push(self, score:float, is_true:bool): - self.data_points.append((score, is_true)) - - def add_gt_positives(self, num_positives:int): - """ Call this once per image. """ - self.num_gt_positives += num_positives - - def is_empty(self) -> bool: - return len(self.data_points) == 0 and self.num_gt_positives == 0 - - def get_ap(self) -> float: - """ Warning: result not cached. """ - - if self.num_gt_positives == 0: - return 0 - - # Sort descending by score - self.data_points.sort(key=lambda x: -x[0]) - - precisions = [] - recalls = [] - num_true = 0 - num_false = 0 - - # Compute the precision-recall curve. The x axis is recalls and the y axis precisions. - for datum in self.data_points: - # datum[1] is whether the detection a true or false positive - if datum[1]: num_true += 1 - else: num_false += 1 - - precision = num_true / (num_true + num_false) - recall = num_true / self.num_gt_positives - - precisions.append(precision) - recalls.append(recall) - - # Smooth the curve by computing [max(precisions[i:]) for i in range(len(precisions))] - # Basically, remove any temporary dips from the curve. - # At least that's what I think, idk. COCOEval did it so I do too. - for i in range(len(precisions)-1, 0, -1): - if precisions[i] > precisions[i-1]: - precisions[i-1] = precisions[i] - - # Compute the integral of precision(recall) d_recall from recall=0->1 using fixed-length riemann summation with 101 bars. - y_range = [0] * 101 # idx 0 is recall == 0.0 and idx 100 is recall == 1.00 - x_range = np.array([x / 100 for x in range(101)]) - recalls = np.array(recalls) - - # I realize this is weird, but all it does is find the nearest precision(x) for a given x in x_range. - # Basically, if the closest recall we have to 0.01 is 0.009 this sets precision(0.01) = precision(0.009). - # I approximate the integral this way, because that's how COCOEval does it. - indices = np.searchsorted(recalls, x_range, side='left') - for bar_idx, precision_idx in enumerate(indices): - if precision_idx < len(precisions): - y_range[bar_idx] = precisions[precision_idx] - - # Finally compute the riemann sum to get our integral. - # avg([precision(x) for x in 0:0.01:1]) - return sum(y_range) / len(y_range) - -def badhash(x): - """ - Just a quick and dirty hash function for doing a deterministic shuffle based on image_id. - - Source: - https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key - """ - x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF - x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF - x = ((x >> 16) ^ x) & 0xFFFFFFFF - return x - -class InferResultFile(): - def __init__(self, path, fileName): - parts = fileName.split('_') - self.imgId = int(parts[2]) - self.outputId = int(parts[3][0]) - self.arrayValue = np.fromfile(path + fileName, dtype=np.float32) - - self.arrayDim = self.arrayValue.shape[0] - #print('finish read file :', fileName) - -def getAllFiles(path): - allFiles = os.listdir(path) - infoFiles = {} - for file in allFiles: - if '.bin' in file and 'coco_val' in file: - infoFile = InferResultFile(path, file) - if infoFile.imgId in infoFiles.keys(): - infoFiles[infoFile.imgId].append(infoFile) - else: - infoFiles[infoFile.imgId] = [infoFile] - return infoFiles - -class InferResultFileFetcher(): - def __init__(self, path): - self.path = path - - def getInferResult(self, image_idx): - resultDict = {} - for i in range(1, 5): - fileName = 'coco_val2017_' + str(image_idx) + '_' + str(i) + '.bin' - infoFile = InferResultFile(self.path, fileName) - if infoFile.arrayDim == 1847808: - resultDict[0] = infoFile - elif infoFile.arrayDim == 4677264: - resultDict[1] = infoFile - elif infoFile.arrayDim == 230976: - resultDict[2] = infoFile - else: - resultDict[3] = infoFile - - return resultDict - -pred_priors = None - -def getPriorTensor(): - global pred_priors - if pred_priors is None: - pred_priors = np.fromfile('prior.bin', dtype=np.float32).reshape(57744, 4) - return pred_priors - else: - return pred_priors - -def evaluate(path, dataset): - cfg.mask_proto_debug = args.mask_proto_debug - inferResultFetcher = InferResultFileFetcher(path) - - frame_times = MovingAverage() - dataset_size = len(dataset) if args.max_images < 0 else min(args.max_images, len(dataset)) - progress_bar = ProgressBar(30, dataset_size) - - print() - - # For each class and iou, stores tuples (score, isPositive) - # Index ap_data[type][iouIdx][classIdx] - ap_data = { - 'box' : [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds], - 'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds] - } - detections = Detections() - - dataset_indices = list(range(len(dataset))) - - if args.shuffle: - random.shuffle(dataset_indices) - elif not args.no_sort: - # Do a deterministic shuffle based on the image ids - # - # I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's - # the order of insertion. That means on python 3.6, the images come in the order they are in - # in the annotations file. For some reason, the first images in the annotations file are - # the hardest. To combat this, I use a hard-coded hash function based on the image ids - # to shuffle the indices we use. That way, no matter what python version or how pycocotools - # handles the data, we get the same result every time. - hashed = [badhash(x) for x in dataset.ids] - dataset_indices.sort(key=lambda x: hashed[x]) - - dataset_indices = dataset_indices[:dataset_size] - - # Main eval loop - for it, image_idx in enumerate(dataset_indices): - timer.reset() - with timer.env('Load Data'): - img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) - # Test flag, do not upvote - with timer.env('Network Extra'): - imgId_Outputs = inferResultFetcher.getInferResult(image_idx) - pred_mask = imgId_Outputs[0].arrayValue.reshape(1, 57744, 32) #output1 : pred_onnx[2] - pred_conf = imgId_Outputs[1].arrayValue.reshape(1, 57744, 81) #output2 : pred_onnx[1] - pred_loc = imgId_Outputs[2].arrayValue.reshape(1, 57744, 4) #output3 : pred_onnx[0] - pred_proto = imgId_Outputs[3].arrayValue.reshape(1, 138, 138, 32) #output4 : pred_onnx[4] - - detect = Detect(cfg.num_classes, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5) - detect.use_fast_nms = args.fast_nms - preds = detect({'loc': torch.from_numpy(pred_loc), - 'conf': torch.from_numpy(pred_conf), - 'mask': torch.from_numpy(pred_mask), - 'priors': torch.from_numpy(getPriorTensor()), #????? - 'proto': torch.from_numpy(pred_proto)}) - from yolact import Yolact - yolact_net = Yolact() - yolact_net.load_weights(args.pthpath, useCuda=False) - preds = {'net':yolact_net, 'detection':preds} - - # Perform the meat of the operation here depending on our mode. - - prep_metrics(ap_data, preds, img, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], detections) - # First couple of images take longer because we're constructing the graph. - # Since that's technically initialization, don't include those in the FPS calculations. - if it > 1: - frame_times.add(timer.total_time()) - if not args.no_bar: - if it > 1: fps = 1 / frame_times.get_avg() - else: fps = 0 - progress = (it+1) / dataset_size * 100 - progress_bar.set_val(it+1) - print('\rProcessing Images %s %6d / %6d (%5.2f%%) %5.2f fps ' - % (repr(progress_bar), it+1, dataset_size, progress, fps), end='') - - print() - print('Saving data...') - with open(args.ap_data_file, 'wb') as f: - pickle.dump(ap_data, f) - return calc_map(ap_data) - -def calc_map(ap_data): - print('Calculating mAP...') - aps = [{'box': [], 'mask': []} for _ in iou_thresholds] - - for _class in range(len(cfg.dataset.class_names)): - for iou_idx in range(len(iou_thresholds)): - for iou_type in ('box', 'mask'): - ap_obj = ap_data[iou_type][iou_idx][_class] - - if not ap_obj.is_empty(): - aps[iou_idx][iou_type].append(ap_obj.get_ap()) - - all_maps = {'box': OrderedDict(), 'mask': OrderedDict()} - - # Looking back at it, this code is really hard to read :/ - for iou_type in ('box', 'mask'): - all_maps[iou_type]['all'] = 0 # Make this first in the ordereddict - for i, threshold in enumerate(iou_thresholds): - mAP = sum(aps[i][iou_type]) / len(aps[i][iou_type]) * 100 if len(aps[i][iou_type]) > 0 else 0 - all_maps[iou_type][int(threshold*100)] = mAP - all_maps[iou_type]['all'] = (sum(all_maps[iou_type].values()) / (len(all_maps[iou_type].values())-1)) - - print_maps(all_maps) - - # Put in a prettier format so we can serialize it to json during training - all_maps = {k: {j: round(u, 2) for j, u in v.items()} for k, v in all_maps.items()} - return all_maps - -def print_maps(all_maps): - # Warning: hacky - make_row = lambda vals: (' %5s |' * len(vals)) % tuple(vals) - make_sep = lambda n: ('-------+' * n) - - print() - print(make_row([''] + [('.%d ' % x if isinstance(x, int) else x + ' ') for x in all_maps['box'].keys()])) - print(make_sep(len(all_maps['box']) + 1)) - for iou_type in ('box', 'mask'): - print(make_row([iou_type] + ['%.2f' % x if x < 100 else '%.1f' % x for x in all_maps[iou_type].values()])) - print(make_sep(len(all_maps['box']) + 1)) - print() - -if __name__ == '__main__': - parse_args() - path = './result/dumpOutput_device{}/'.format(args.device_id) - - if args.config is not None: - set_cfg(args.config) - - else: - args.config = 'yolact_base_config' - print('Config not specified. Parsed %s from the file name.\n' % args.config) - set_cfg(args.config) - - #if args.image is None and args.video is None and args.images is None: - dataset = COCODetection(args.valid_images, args.valid_annotations, - transform=BaseTransform(), has_gt=cfg.dataset.has_gt) - prep_coco_cats() - - evaluate(path, dataset) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +from layers import Detect +from data import COCODetection, get_label_map, MEANS, COLORS +from yolact import Yolact +from utils.augmentations import BaseTransform, FastBaseTransform, Resize +from utils.functions import MovingAverage, ProgressBar +from layers.box_utils import jaccard, center_size, mask_iou +from utils import timer +from utils.functions import SavePath +from layers.output_utils import postprocess, undo_image_transformation +import pycocotools + +from data import cfg, set_cfg, set_dataset + +import numpy as np +import torch +import torch.backends.cudnn as cudnn +from torch.autograd import Variable +import argparse +import time +import random +import cProfile +import pickle +import json +import os +from collections import defaultdict +from pathlib import Path +from collections import OrderedDict +from PIL import Image + +import matplotlib.pyplot as plt +import cv2 + +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +def parse_args(argv=None): + parser = argparse.ArgumentParser( + description='YOLACT COCO Evaluation') + parser.add_argument('--valid_images', default='/home/data/coco/images/', help='the path of validation images') + parser.add_argument('--valid_annotations', default='/home/data/coco/annotations/instances_val2017.json', help='the path of validation annotations') + parser.add_argument('--top_k', default=5, type=int, + help='Further restrict the number of predictions to parse') + parser.add_argument('--cuda', default=True, type=str2bool, + help='Use cuda to evaulate model') + parser.add_argument('--fast_nms', default=True, type=str2bool, + help='Whether to use a faster, but not entirely correct version of NMS.') + parser.add_argument('--shuffle', dest='shuffle', action='store_true', + help='Shuffles the images when displaying them. Doesn\'t have much of an effect when display is off though.') + parser.add_argument('--ap_data_file', default='results/ap_data.pkl', type=str, + help='In quantitative mode, the file to save detections before calculating mAP.') + parser.add_argument('--max_images', default=-1, type=int, + help='The maximum number of images from the dataset to consider. Use -1 for all.') + parser.add_argument('--output_coco_json', dest='output_coco_json', action='store_true', + help='If display is not set, instead of processing IoU values, this just dumps detections into the coco json file.') + parser.add_argument('--config', default=None, + help='The config object to use.') + parser.add_argument('--no_bar', dest='no_bar', action='store_true', + help='Do not output the status bar. This is useful for when piping to a file.') + parser.add_argument('--no_sort', default=False, dest='no_sort', action='store_true', + help='Do not sort images by hashed image ID.') + parser.add_argument('--seed', default=None, type=int, + help='The seed to pass into random.seed. Note: this is only really for the shuffle and does not (I think) affect cuda stuff.') + parser.add_argument('--mask_proto_debug', default=False, dest='mask_proto_debug', action='store_true', + help='Outputs stuff for scripts/compute_mask.py.') + parser.add_argument('--score_threshold', default=0, type=float, + help='Detections with a score under this threshold will not be considered. This currently only works in display mode.') + parser.add_argument('--dataset', default=None, type=str, + help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).') + parser.add_argument('--detect', default=False, dest='detect', action='store_true', + help='Don\'t evauluate the mask branch at all and only do object detection. This only works for --display and --benchmark.') + parser.add_argument('--pthpath', type=str, + default='./weights/yolact_plus_resnet50_54_800000.pth', help='choose .pth module') + parser.add_argument('--device_id', type=int, + default=0, help='choose .pth module') + parser.set_defaults(no_bar=False, output_coco_json=False, shuffle=False, + no_sort=False, mask_proto_debug=False, detect=False, crop=True) + + global args + args = parser.parse_args(argv) + + if args.seed is not None: + random.seed(args.seed) + +iou_thresholds = [x / 100 for x in range(50, 100, 5)] +coco_cats = {} # Call prep_coco_cats to fill this +coco_cats_inv = {} +color_cache = defaultdict(lambda: {}) + +def prep_coco_cats(): + """ Prepare inverted table for category id lookup given a coco cats object. """ + for coco_cat_id, transformed_cat_id_p1 in get_label_map().items(): + transformed_cat_id = transformed_cat_id_p1 - 1 + coco_cats[transformed_cat_id] = coco_cat_id + coco_cats_inv[coco_cat_id] = transformed_cat_id + +def get_coco_cat(transformed_cat_id): + """ transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """ + return coco_cats[transformed_cat_id] + +def get_transformed_cat(coco_cat_id): + """ transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """ + return coco_cats_inv[coco_cat_id] + +class Detections: + + def __init__(self): + self.bbox_data = [] + self.mask_data = [] + + def add_bbox(self, image_id:int, category_id:int, bbox:list, score:float): + """ Note that bbox should be a list or tuple of (x1, y1, x2, y2) """ + bbox = [bbox[0], bbox[1], bbox[2]-bbox[0], bbox[3]-bbox[1]] + + # Round to the nearest 10th to avoid huge file sizes, as COCO suggests + bbox = [round(float(x)*10)/10 for x in bbox] + + self.bbox_data.append({ + 'image_id': int(image_id), + 'category_id': get_coco_cat(int(category_id)), + 'bbox': bbox, + 'score': float(score) + }) + + def add_mask(self, image_id:int, category_id:int, segmentation:np.ndarray, score:float): + """ The segmentation should be the full mask, the size of the image and with size [h, w]. """ + rle = pycocotools.mask.encode(np.asfortranarray(segmentation.astype(np.uint8))) + rle['counts'] = rle['counts'].decode('ascii') # json.dump doesn't like bytes strings + + self.mask_data.append({ + 'image_id': int(image_id), + 'category_id': get_coco_cat(int(category_id)), + 'segmentation': rle, + 'score': float(score) + }) + +def _mask_iou(mask1, mask2, iscrowd=False): + with timer.env('Mask IoU'): + ret = mask_iou(mask1, mask2, iscrowd) + return ret.cpu() + +def _bbox_iou(bbox1, bbox2, iscrowd=False): + with timer.env('BBox IoU'): + ret = jaccard(bbox1, bbox2, iscrowd) + return ret.cpu() + +def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, detections:Detections=None): + """ Returns a list of APs for this image, with each element being for a class """ + if not args.output_coco_json: + with timer.env('Prepare gt'): + gt_boxes = torch.Tensor(gt[:, :4]) + gt_boxes[:, [0, 2]] *= w + gt_boxes[:, [1, 3]] *= h + gt_classes = list(gt[:, 4].astype(int)) + gt_masks = torch.Tensor(gt_masks).view(-1, h*w) + + if num_crowd > 0: + split = lambda x: (x[-num_crowd:], x[:-num_crowd]) + crowd_boxes , gt_boxes = split(gt_boxes) + crowd_masks , gt_masks = split(gt_masks) + crowd_classes, gt_classes = split(gt_classes) + + with timer.env('Postprocess'): + classes, scores, boxes, masks = postprocess(dets, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) + + if classes.size(0) == 0: + return + + classes = list(classes.cpu().numpy().astype(int)) + if isinstance(scores, list): + box_scores = list(scores[0].cpu().numpy().astype(float)) + mask_scores = list(scores[1].cpu().numpy().astype(float)) + else: + scores = list(scores.cpu().numpy().astype(float)) + box_scores = scores + mask_scores = scores + masks = masks.view(-1, h*w) + + + if args.output_coco_json: + with timer.env('JSON Output'): + boxes = boxes.cpu().numpy() + masks = masks.view(-1, h, w).cpu().numpy() + for i in range(masks.shape[0]): + # Make sure that the bounding box actually makes sense and a mask was produced + if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0: + detections.add_bbox(image_id, classes[i], boxes[i,:], box_scores[i]) + detections.add_mask(image_id, classes[i], masks[i,:,:], mask_scores[i]) + return + + with timer.env('Eval Setup'): + num_pred = len(classes) + num_gt = len(gt_classes) + + mask_iou_cache = _mask_iou(masks, gt_masks) + bbox_iou_cache = _bbox_iou(boxes.float(), gt_boxes.float()) + + if num_crowd > 0: + crowd_mask_iou_cache = _mask_iou(masks, crowd_masks, iscrowd=True) + crowd_bbox_iou_cache = _bbox_iou(boxes.float(), crowd_boxes.float(), iscrowd=True) + else: + crowd_mask_iou_cache = None + crowd_bbox_iou_cache = None + + box_indices = sorted(range(num_pred), key=lambda i: -box_scores[i]) + mask_indices = sorted(box_indices, key=lambda i: -mask_scores[i]) + iou_types = [ + ('box', lambda i,j: bbox_iou_cache[i, j].item(), + lambda i,j: crowd_bbox_iou_cache[i,j].item(), + lambda i: box_scores[i], box_indices), + ('mask', lambda i,j: mask_iou_cache[i, j].item(), + lambda i,j: crowd_mask_iou_cache[i,j].item(), + lambda i: mask_scores[i], mask_indices) + ] + + + timer.start('Main loop') + for _class in set(classes + gt_classes): + ap_per_iou = [] + num_gt_for_class = sum([1 for x in gt_classes if x == _class]) + + for iouIdx in range(len(iou_thresholds)): + iou_threshold = iou_thresholds[iouIdx] + + for iou_type, iou_func, crowd_func, score_func, indices in iou_types: + gt_used = [False] * len(gt_classes) + + ap_obj = ap_data[iou_type][iouIdx][_class] + ap_obj.add_gt_positives(num_gt_for_class) + + for i in indices: + if classes[i] != _class: + continue + + max_iou_found = iou_threshold + max_match_idx = -1 + for j in range(num_gt): + if gt_used[j] or gt_classes[j] != _class: + continue + + iou = iou_func(i, j) + + if iou > max_iou_found: + max_iou_found = iou + max_match_idx = j + + if max_match_idx >= 0: + gt_used[max_match_idx] = True + ap_obj.push(score_func(i), True) + else: + # If the detection matches a crowd, we can just ignore it + matched_crowd = False + + if num_crowd > 0: + for j in range(len(crowd_classes)): + if crowd_classes[j] != _class: + continue + + iou = crowd_func(i, j) + + if iou > iou_threshold: + matched_crowd = True + break + + # All this crowd code so that we can make sure that our eval code gives the + # same result as COCOEval. There aren't even that many crowd annotations to + # begin with, but accuracy is of the utmost importance. + if not matched_crowd: + ap_obj.push(score_func(i), False) + timer.stop('Main loop') + +class APDataObject: + """ + Stores all the information necessary to calculate the AP for one IoU and one class. + Note: I type annotated this because why not. + """ + + def __init__(self): + self.data_points = [] + self.num_gt_positives = 0 + + def push(self, score:float, is_true:bool): + self.data_points.append((score, is_true)) + + def add_gt_positives(self, num_positives:int): + """ Call this once per image. """ + self.num_gt_positives += num_positives + + def is_empty(self) -> bool: + return len(self.data_points) == 0 and self.num_gt_positives == 0 + + def get_ap(self) -> float: + """ Warning: result not cached. """ + + if self.num_gt_positives == 0: + return 0 + + # Sort descending by score + self.data_points.sort(key=lambda x: -x[0]) + + precisions = [] + recalls = [] + num_true = 0 + num_false = 0 + + # Compute the precision-recall curve. The x axis is recalls and the y axis precisions. + for datum in self.data_points: + # datum[1] is whether the detection a true or false positive + if datum[1]: num_true += 1 + else: num_false += 1 + + precision = num_true / (num_true + num_false) + recall = num_true / self.num_gt_positives + + precisions.append(precision) + recalls.append(recall) + + # Smooth the curve by computing [max(precisions[i:]) for i in range(len(precisions))] + # Basically, remove any temporary dips from the curve. + # At least that's what I think, idk. COCOEval did it so I do too. + for i in range(len(precisions)-1, 0, -1): + if precisions[i] > precisions[i-1]: + precisions[i-1] = precisions[i] + + # Compute the integral of precision(recall) d_recall from recall=0->1 using fixed-length riemann summation with 101 bars. + y_range = [0] * 101 # idx 0 is recall == 0.0 and idx 100 is recall == 1.00 + x_range = np.array([x / 100 for x in range(101)]) + recalls = np.array(recalls) + + # I realize this is weird, but all it does is find the nearest precision(x) for a given x in x_range. + # Basically, if the closest recall we have to 0.01 is 0.009 this sets precision(0.01) = precision(0.009). + # I approximate the integral this way, because that's how COCOEval does it. + indices = np.searchsorted(recalls, x_range, side='left') + for bar_idx, precision_idx in enumerate(indices): + if precision_idx < len(precisions): + y_range[bar_idx] = precisions[precision_idx] + + # Finally compute the riemann sum to get our integral. + # avg([precision(x) for x in 0:0.01:1]) + return sum(y_range) / len(y_range) + +def badhash(x): + """ + Just a quick and dirty hash function for doing a deterministic shuffle based on image_id. + + Source: + https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key + """ + x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF + x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF + x = ((x >> 16) ^ x) & 0xFFFFFFFF + return x + +class InferResultFile(): + def __init__(self, path, fileName): + parts = fileName.split('_') + self.imgId = int(parts[2]) + self.outputId = int(parts[3][0]) + self.arrayValue = np.fromfile(path + fileName, dtype=np.float32) + + self.arrayDim = self.arrayValue.shape[0] + #print('finish read file :', fileName) + +def getAllFiles(path): + allFiles = os.listdir(path) + infoFiles = {} + for file in allFiles: + if '.bin' in file and 'coco_val' in file: + infoFile = InferResultFile(path, file) + if infoFile.imgId in infoFiles.keys(): + infoFiles[infoFile.imgId].append(infoFile) + else: + infoFiles[infoFile.imgId] = [infoFile] + return infoFiles + +class InferResultFileFetcher(): + def __init__(self, path): + self.path = path + + def getInferResult(self, image_idx): + resultDict = {} + for i in range(1, 5): + fileName = 'coco_val2017_' + str(image_idx) + '_' + str(i) + '.bin' + infoFile = InferResultFile(self.path, fileName) + if infoFile.arrayDim == 1847808: + resultDict[0] = infoFile + elif infoFile.arrayDim == 4677264: + resultDict[1] = infoFile + elif infoFile.arrayDim == 230976: + resultDict[2] = infoFile + else: + resultDict[3] = infoFile + + return resultDict + +pred_priors = None + +def getPriorTensor(): + global pred_priors + if pred_priors is None: + pred_priors = np.fromfile('prior.bin', dtype=np.float32).reshape(57744, 4) + return pred_priors + else: + return pred_priors + +def evaluate(path, dataset): + cfg.mask_proto_debug = args.mask_proto_debug + inferResultFetcher = InferResultFileFetcher(path) + + frame_times = MovingAverage() + dataset_size = len(dataset) if args.max_images < 0 else min(args.max_images, len(dataset)) + progress_bar = ProgressBar(30, dataset_size) + + print() + + # For each class and iou, stores tuples (score, isPositive) + # Index ap_data[type][iouIdx][classIdx] + ap_data = { + 'box' : [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds], + 'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds] + } + detections = Detections() + + dataset_indices = list(range(len(dataset))) + + if args.shuffle: + random.shuffle(dataset_indices) + elif not args.no_sort: + # Do a deterministic shuffle based on the image ids + # + # I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's + # the order of insertion. That means on python 3.6, the images come in the order they are in + # in the annotations file. For some reason, the first images in the annotations file are + # the hardest. To combat this, I use a hard-coded hash function based on the image ids + # to shuffle the indices we use. That way, no matter what python version or how pycocotools + # handles the data, we get the same result every time. + hashed = [badhash(x) for x in dataset.ids] + dataset_indices.sort(key=lambda x: hashed[x]) + + dataset_indices = dataset_indices[:dataset_size] + + # Main eval loop + for it, image_idx in enumerate(dataset_indices): + timer.reset() + with timer.env('Load Data'): + img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) + # Test flag, do not upvote + with timer.env('Network Extra'): + imgId_Outputs = inferResultFetcher.getInferResult(image_idx) + pred_mask = imgId_Outputs[0].arrayValue.reshape(1, 57744, 32) #output1 : pred_onnx[2] + pred_conf = imgId_Outputs[1].arrayValue.reshape(1, 57744, 81) #output2 : pred_onnx[1] + pred_loc = imgId_Outputs[2].arrayValue.reshape(1, 57744, 4) #output3 : pred_onnx[0] + pred_proto = imgId_Outputs[3].arrayValue.reshape(1, 138, 138, 32) #output4 : pred_onnx[4] + + detect = Detect(cfg.num_classes, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5) + detect.use_fast_nms = args.fast_nms + preds = detect({'loc': torch.from_numpy(pred_loc), + 'conf': torch.from_numpy(pred_conf), + 'mask': torch.from_numpy(pred_mask), + 'priors': torch.from_numpy(getPriorTensor()), #????? + 'proto': torch.from_numpy(pred_proto)}) + from yolact import Yolact + yolact_net = Yolact() + yolact_net.load_weights(args.pthpath, useCuda=False) + preds = {'net':yolact_net, 'detection':preds} + + # Perform the meat of the operation here depending on our mode. + + prep_metrics(ap_data, preds, img, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], detections) + # First couple of images take longer because we're constructing the graph. + # Since that's technically initialization, don't include those in the FPS calculations. + if it > 1: + frame_times.add(timer.total_time()) + if not args.no_bar: + if it > 1: fps = 1 / frame_times.get_avg() + else: fps = 0 + progress = (it+1) / dataset_size * 100 + progress_bar.set_val(it+1) + print('\rProcessing Images %s %6d / %6d (%5.2f%%) %5.2f fps ' + % (repr(progress_bar), it+1, dataset_size, progress, fps), end='') + + print() + print('Saving data...') + with open(args.ap_data_file, 'wb') as f: + pickle.dump(ap_data, f) + return calc_map(ap_data) + +def calc_map(ap_data): + print('Calculating mAP...') + aps = [{'box': [], 'mask': []} for _ in iou_thresholds] + + for _class in range(len(cfg.dataset.class_names)): + for iou_idx in range(len(iou_thresholds)): + for iou_type in ('box', 'mask'): + ap_obj = ap_data[iou_type][iou_idx][_class] + + if not ap_obj.is_empty(): + aps[iou_idx][iou_type].append(ap_obj.get_ap()) + + all_maps = {'box': OrderedDict(), 'mask': OrderedDict()} + + # Looking back at it, this code is really hard to read :/ + for iou_type in ('box', 'mask'): + all_maps[iou_type]['all'] = 0 # Make this first in the ordereddict + for i, threshold in enumerate(iou_thresholds): + mAP = sum(aps[i][iou_type]) / len(aps[i][iou_type]) * 100 if len(aps[i][iou_type]) > 0 else 0 + all_maps[iou_type][int(threshold*100)] = mAP + all_maps[iou_type]['all'] = (sum(all_maps[iou_type].values()) / (len(all_maps[iou_type].values())-1)) + + print_maps(all_maps) + + # Put in a prettier format so we can serialize it to json during training + all_maps = {k: {j: round(u, 2) for j, u in v.items()} for k, v in all_maps.items()} + return all_maps + +def print_maps(all_maps): + # Warning: hacky + make_row = lambda vals: (' %5s |' * len(vals)) % tuple(vals) + make_sep = lambda n: ('-------+' * n) + + print() + print(make_row([''] + [('.%d ' % x if isinstance(x, int) else x + ' ') for x in all_maps['box'].keys()])) + print(make_sep(len(all_maps['box']) + 1)) + for iou_type in ('box', 'mask'): + print(make_row([iou_type] + ['%.2f' % x if x < 100 else '%.1f' % x for x in all_maps[iou_type].values()])) + print(make_sep(len(all_maps['box']) + 1)) + print() + +if __name__ == '__main__': + parse_args() + path = './result/dumpOutput_device{}/'.format(args.device_id) + + if args.config is not None: + set_cfg(args.config) + + else: + args.config = 'yolact_base_config' + print('Config not specified. Parsed %s from the file name.\n' % args.config) + set_cfg(args.config) + + #if args.image is None and args.video is None and args.images is None: + dataset = COCODetection(args.valid_images, args.valid_annotations, + transform=BaseTransform(), has_gt=cfg.dataset.has_gt) + prep_coco_cats() + + evaluate(path, dataset) diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/YOLACT_preprocess.py b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/YOLACT_preprocess.py index 5b7eb4e094765c527246d60bb782ac18e6ffbc7f..4b4f69590b4e93ce3f80501b5c6454478f65090d 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/YOLACT_preprocess.py +++ b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/YOLACT_preprocess.py @@ -1,156 +1,156 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from data import COCODetection, get_label_map -from utils.augmentations import BaseTransform -from data import cfg, set_cfg -import numpy as np -from torch.autograd import Variable -import argparse -import random -import os -from collections import defaultdict - -def str2bool(v): - if v.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - -def parse_args(argv=None): - parser = argparse.ArgumentParser( - description='YOLACT COCO Evaluation') - parser.add_argument('--valid_images', default='/home/data/coco/images/', help='the path of validation images') - parser.add_argument('--valid_annotations', default='/home/data/coco/annotations/instances_val2017.json', help='the path of validation annotations') - parser.add_argument('--cuda', default=True, type=str2bool, - help='Use cuda to evaulate model') - parser.add_argument('--display', dest='display', action='store_true', - help='Display qualitative results instead of quantitative ones.') - parser.add_argument('--shuffle', dest='shuffle', action='store_true', - help='Shuffles the images when displaying them. Doesn\'t have much of an effect when display is off though.') - parser.add_argument('--resume', dest='resume', action='store_true', - help='If display not set, this resumes mAP calculations from the ap_data_file.') - parser.add_argument('--max_images', default=-1, type=int, - help='The maximum number of images from the dataset to consider. Use -1 for all.') - parser.add_argument('--config', default=None, - help='The config object to use.') - parser.add_argument('--no_sort', default=False, dest='no_sort', action='store_true', - help='Do not sort images by hashed image ID.') - parser.add_argument('--seed', default=None, type=int, - help='The seed to pass into random.seed. Note: this is only really for the shuffle and does not (I think) affect cuda stuff.') - parser.add_argument('--image', default=None, type=str, - help='A path to an image to use for display.') - parser.add_argument('--images', default=None, type=str, - help='An input folder of images and output folder to save detected images. Should be in the format input->output.') - parser.add_argument('--dataset', default=None, type=str, - help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).') - parser.set_defaults(display=False, resume=False, shuffle=False, - no_sort=False) - - global args - args = parser.parse_args(argv) - - if args.seed is not None: - random.seed(args.seed) - -iou_thresholds = [x / 100 for x in range(50, 100, 5)] -coco_cats = {} # Call prep_coco_cats to fill this -coco_cats_inv = {} -color_cache = defaultdict(lambda: {}) - -def prep_coco_cats(): - """ Prepare inverted table for category id lookup given a coco cats object. """ - for coco_cat_id, transformed_cat_id_p1 in get_label_map().items(): - transformed_cat_id = transformed_cat_id_p1 - 1 - coco_cats[transformed_cat_id] = coco_cat_id - coco_cats_inv[coco_cat_id] = transformed_cat_id - -def badhash(x): - """ - Just a quick and dirty hash function for doing a deterministic shuffle based on image_id. - - Source: - https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key - """ - x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF - x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF - x = ((x >> 16) ^ x) & 0xFFFFFFFF - return x - -def preprocess(dataset, save_path = None): - dataset_size = len(dataset) if args.max_images < 0 else min(args.max_images, len(dataset)) - print("dataset size is : ", dataset_size) - print() - - # For each class and iou, stores tuples (score, isPositive) - # Index ap_data[type][iouIdx][classIdx] - dataset_indices = list(range(len(dataset))) - print("dataset indices size is :", len(dataset_indices)) - if args.shuffle: - random.shuffle(dataset_indices) - elif not args.no_sort: - hashed = [badhash(x) for x in dataset.ids] - dataset_indices.sort(key=lambda x: hashed[x]) - - dataset_indices = dataset_indices[:dataset_size] - - if save_path is None: - save_path = './prep_dataset/' - - if os.path.exists(save_path) == False: - os.mkdir(save_path) - else: - print('dir exist!') - - # Main eval loop - with open('yolact_prep_bin.info', 'w+') as f: - for it, image_idx in enumerate(dataset_indices): - img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) - # Test flag, do not upvote - batch = Variable(img.unsqueeze(0)) - batch_numpy = np.array(batch).astype(np.float32) - - binFileName = os.path.join(save_path, 'coco_val2017_' + str(image_idx) + '.bin') - - batch_numpy.tofile(binFileName) - - line = str(it) + ' ' + binFileName + ' ' + '550 550\n' - f.write(line) - if it % 100 == 0: - print('[INFO][PreProcess]', 'CurSampleNum:', it) - -if __name__ == '__main__': - parse_args() - - if args.config is not None: - set_cfg(args.config) - - else: - args.config = 'yolact_base_config' - print('Config not specified. Parsed %s from the file name.\n' % args.config) - set_cfg(args.config) - - if not os.path.exists('results'): - os.makedirs('results') - - #if args.image is None and args.video is None and args.images is None: - dataset = COCODetection(args.valid_images, args.valid_annotations, - transform=BaseTransform(), has_gt=cfg.dataset.has_gt) - prep_coco_cats() - - preprocess(dataset) - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from data import COCODetection, get_label_map +from utils.augmentations import BaseTransform +from data import cfg, set_cfg +import numpy as np +from torch.autograd import Variable +import argparse +import random +import os +from collections import defaultdict + +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +def parse_args(argv=None): + parser = argparse.ArgumentParser( + description='YOLACT COCO Evaluation') + parser.add_argument('--valid_images', default='/home/data/coco/images/', help='the path of validation images') + parser.add_argument('--valid_annotations', default='/home/data/coco/annotations/instances_val2017.json', help='the path of validation annotations') + parser.add_argument('--cuda', default=True, type=str2bool, + help='Use cuda to evaulate model') + parser.add_argument('--display', dest='display', action='store_true', + help='Display qualitative results instead of quantitative ones.') + parser.add_argument('--shuffle', dest='shuffle', action='store_true', + help='Shuffles the images when displaying them. Doesn\'t have much of an effect when display is off though.') + parser.add_argument('--resume', dest='resume', action='store_true', + help='If display not set, this resumes mAP calculations from the ap_data_file.') + parser.add_argument('--max_images', default=-1, type=int, + help='The maximum number of images from the dataset to consider. Use -1 for all.') + parser.add_argument('--config', default=None, + help='The config object to use.') + parser.add_argument('--no_sort', default=False, dest='no_sort', action='store_true', + help='Do not sort images by hashed image ID.') + parser.add_argument('--seed', default=None, type=int, + help='The seed to pass into random.seed. Note: this is only really for the shuffle and does not (I think) affect cuda stuff.') + parser.add_argument('--image', default=None, type=str, + help='A path to an image to use for display.') + parser.add_argument('--images', default=None, type=str, + help='An input folder of images and output folder to save detected images. Should be in the format input->output.') + parser.add_argument('--dataset', default=None, type=str, + help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).') + parser.set_defaults(display=False, resume=False, shuffle=False, + no_sort=False) + + global args + args = parser.parse_args(argv) + + if args.seed is not None: + random.seed(args.seed) + +iou_thresholds = [x / 100 for x in range(50, 100, 5)] +coco_cats = {} # Call prep_coco_cats to fill this +coco_cats_inv = {} +color_cache = defaultdict(lambda: {}) + +def prep_coco_cats(): + """ Prepare inverted table for category id lookup given a coco cats object. """ + for coco_cat_id, transformed_cat_id_p1 in get_label_map().items(): + transformed_cat_id = transformed_cat_id_p1 - 1 + coco_cats[transformed_cat_id] = coco_cat_id + coco_cats_inv[coco_cat_id] = transformed_cat_id + +def badhash(x): + """ + Just a quick and dirty hash function for doing a deterministic shuffle based on image_id. + + Source: + https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key + """ + x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF + x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF + x = ((x >> 16) ^ x) & 0xFFFFFFFF + return x + +def preprocess(dataset, save_path = None): + dataset_size = len(dataset) if args.max_images < 0 else min(args.max_images, len(dataset)) + print("dataset size is : ", dataset_size) + print() + + # For each class and iou, stores tuples (score, isPositive) + # Index ap_data[type][iouIdx][classIdx] + dataset_indices = list(range(len(dataset))) + print("dataset indices size is :", len(dataset_indices)) + if args.shuffle: + random.shuffle(dataset_indices) + elif not args.no_sort: + hashed = [badhash(x) for x in dataset.ids] + dataset_indices.sort(key=lambda x: hashed[x]) + + dataset_indices = dataset_indices[:dataset_size] + + if save_path is None: + save_path = './prep_dataset/' + + if os.path.exists(save_path) == False: + os.mkdir(save_path) + else: + print('dir exist!') + + # Main eval loop + with open('yolact_prep_bin.info', 'w+') as f: + for it, image_idx in enumerate(dataset_indices): + img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) + # Test flag, do not upvote + batch = Variable(img.unsqueeze(0)) + batch_numpy = np.array(batch).astype(np.float32) + + binFileName = os.path.join(save_path, 'coco_val2017_' + str(image_idx) + '.bin') + + batch_numpy.tofile(binFileName) + + line = str(it) + ' ' + binFileName + ' ' + '550 550\n' + f.write(line) + if it % 100 == 0: + print('[INFO][PreProcess]', 'CurSampleNum:', it) + +if __name__ == '__main__': + parse_args() + + if args.config is not None: + set_cfg(args.config) + + else: + args.config = 'yolact_base_config' + print('Config not specified. Parsed %s from the file name.\n' % args.config) + set_cfg(args.config) + + if not os.path.exists('results'): + os.makedirs('results') + + #if args.image is None and args.video is None and args.images is None: + dataset = COCODetection(args.valid_images, args.valid_annotations, + transform=BaseTransform(), has_gt=cfg.dataset.has_gt) + prep_coco_cats() + + preprocess(dataset) + + diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/dcnv2.diff b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/dcnv2.diff index c1af4f32cecbac40d982a9727821a90a72d5fca5..f1d3a4edb9ee432fdc606cbe9c0ba2e79666a6d6 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/dcnv2.diff +++ b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/dcnv2.diff @@ -1,107 +1,107 @@ -diff --git a/dcn_v2.py b/dcn_v2.py -index 982bef5..db33229 100644 ---- a/dcn_v2.py -+++ b/dcn_v2.py - -@@ -14,16 +15,38 @@ import _ext as _backend - - - class _DCNv2(Function): -+ -+ @staticmethod -+ def symbolic(g, input, weight, offset, bias, stride, padding, -+ dilation, groups, defomable_groups): -+ if isinstance(stride, int): -+ stride = (stride, stride) -+ if isinstance(padding, int): -+ padding = (padding, padding) -+ if isinstance(dilation, int): -+ dilation = (dilation, dilation) -+ return g.op( -+ 'DeformableConv2D', -+ input, -+ weight, -+ offset, -+ bias, -+ strides_i=stride, -+ pads_i=padding, -+ dilations_i=dilation, -+ groups_i=groups, -+ defomable_groups_i=defomable_groups) - @staticmethod -- def forward(ctx, input, offset, mask, weight, bias, -- stride, padding, dilation, deformable_groups): -+ def forward(ctx, input, weight, offest, bias, -+ stride, padding, dilation, groups=1, deformable_groups=1): - ctx.stride = _pair(stride) - ctx.padding = _pair(padding) - ctx.dilation = _pair(dilation) - ctx.kernel_size = _pair(weight.shape[2:4]) - ctx.deformable_groups = deformable_groups -- output = _backend.dcn_v2_forward(input, weight, bias, -- offset, mask, -+ return torch.rand(_DCNv2._infer_shape(ctx, input, weight)).to(input.device) -+ output = _backend.dcn_v2_forward(input.float(), weight.float(), bias.float(), -+ offset.float(), mask.float(), - ctx.kernel_size[0], ctx.kernel_size[1], - ctx.stride[0], ctx.stride[1], - ctx.padding[0], ctx.padding[1], -@@ -31,15 +54,26 @@ class _DCNv2(Function): - ctx.deformable_groups) - ctx.save_for_backward(input, offset, mask, weight, bias) - return output -+ @staticmethod -+ def _infer_shape(ctx, input, weight): -+ n = input.size(0) -+ channels_out = weight.size(0) -+ height, width = input.shape[2:4] -+ kernel_h, kernel_w = weight.shape[2:4] -+ height_out = (height + 2 * ctx.padding[0] - -+ (ctx.dilation[0] * (kernel_h - 1) + 1)) // ctx.stride[0] + 1 -+ width_out = (width + 2 * ctx.padding[0] - -+ (ctx.dilation[0] * (kernel_w - 1) + 1)) // ctx.stride[0] + 1 -+ return n, channels_out, height_out, width_out - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - input, offset, mask, weight, bias = ctx.saved_tensors - grad_input, grad_offset, grad_mask, grad_weight, grad_bias = \ -- _backend.dcn_v2_backward(input, weight, -- bias, -- offset, mask, -+ _backend.dcn_v2_backward(input.float(), weight.float(), -+ bias.float(), -+ offset.float(), mask.float(), - grad_output, - ctx.kernel_size[0], ctx.kernel_size[1], - ctx.stride[0], ctx.stride[1], -@@ -120,11 +154,19 @@ class DCN(DCNv2): - o1, o2, mask = torch.chunk(out, 3, dim=1) - offset = torch.cat((o1, o2), dim=1) - mask = torch.sigmoid(mask) -- return dcn_v2_conv(input, offset, mask, -- self.weight, self.bias, -+ offset_y = offset.reshape(offset.shape[0], -1, 2, offset.shape[2], -+ offset.shape[3])[:, :, 0, ...].reshape(offset.shape[0], offset.shape[1] // 2, offset.shape[2], -+ offset.shape[3]) -+ offset_x = offset.reshape(offset.shape[0], -1, 2, offset.shape[2], -+ offset.shape[3])[:, :, 1, ...].reshape(offset.shape[0], offset.shape[1] // 2, offset.shape[2], -+ offset.shape[3]) -+ offset = torch.cat((offset_x, offset_y, mask), 1) -+ return dcn_v2_conv(input, -+ self.weight, offset, self.bias, - self.stride, - self.padding, - self.dilation, -+ 1, - self.deformable_groups) - - -@@ -300,4 +342,4 @@ class DCNPooling(DCNv2Pooling): - self.group_size, - self.part_size, - self.sample_per_part, -- self.trans_std) -+ self.trans_std) +diff --git a/dcn_v2.py b/dcn_v2.py +index 982bef5..db33229 100644 +--- a/dcn_v2.py ++++ b/dcn_v2.py + +@@ -14,16 +15,38 @@ import _ext as _backend + + + class _DCNv2(Function): ++ ++ @staticmethod ++ def symbolic(g, input, weight, offset, bias, stride, padding, ++ dilation, groups, defomable_groups): ++ if isinstance(stride, int): ++ stride = (stride, stride) ++ if isinstance(padding, int): ++ padding = (padding, padding) ++ if isinstance(dilation, int): ++ dilation = (dilation, dilation) ++ return g.op( ++ 'DeformableConv2D', ++ input, ++ weight, ++ offset, ++ bias, ++ strides_i=stride, ++ pads_i=padding, ++ dilations_i=dilation, ++ groups_i=groups, ++ defomable_groups_i=defomable_groups) + @staticmethod +- def forward(ctx, input, offset, mask, weight, bias, +- stride, padding, dilation, deformable_groups): ++ def forward(ctx, input, weight, offest, bias, ++ stride, padding, dilation, groups=1, deformable_groups=1): + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.kernel_size = _pair(weight.shape[2:4]) + ctx.deformable_groups = deformable_groups +- output = _backend.dcn_v2_forward(input, weight, bias, +- offset, mask, ++ return torch.rand(_DCNv2._infer_shape(ctx, input, weight)).to(input.device) ++ output = _backend.dcn_v2_forward(input.float(), weight.float(), bias.float(), ++ offset.float(), mask.float(), + ctx.kernel_size[0], ctx.kernel_size[1], + ctx.stride[0], ctx.stride[1], + ctx.padding[0], ctx.padding[1], +@@ -31,15 +54,26 @@ class _DCNv2(Function): + ctx.deformable_groups) + ctx.save_for_backward(input, offset, mask, weight, bias) + return output ++ @staticmethod ++ def _infer_shape(ctx, input, weight): ++ n = input.size(0) ++ channels_out = weight.size(0) ++ height, width = input.shape[2:4] ++ kernel_h, kernel_w = weight.shape[2:4] ++ height_out = (height + 2 * ctx.padding[0] - ++ (ctx.dilation[0] * (kernel_h - 1) + 1)) // ctx.stride[0] + 1 ++ width_out = (width + 2 * ctx.padding[0] - ++ (ctx.dilation[0] * (kernel_w - 1) + 1)) // ctx.stride[0] + 1 ++ return n, channels_out, height_out, width_out + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, mask, weight, bias = ctx.saved_tensors + grad_input, grad_offset, grad_mask, grad_weight, grad_bias = \ +- _backend.dcn_v2_backward(input, weight, +- bias, +- offset, mask, ++ _backend.dcn_v2_backward(input.float(), weight.float(), ++ bias.float(), ++ offset.float(), mask.float(), + grad_output, + ctx.kernel_size[0], ctx.kernel_size[1], + ctx.stride[0], ctx.stride[1], +@@ -120,11 +154,19 @@ class DCN(DCNv2): + o1, o2, mask = torch.chunk(out, 3, dim=1) + offset = torch.cat((o1, o2), dim=1) + mask = torch.sigmoid(mask) +- return dcn_v2_conv(input, offset, mask, +- self.weight, self.bias, ++ offset_y = offset.reshape(offset.shape[0], -1, 2, offset.shape[2], ++ offset.shape[3])[:, :, 0, ...].reshape(offset.shape[0], offset.shape[1] // 2, offset.shape[2], ++ offset.shape[3]) ++ offset_x = offset.reshape(offset.shape[0], -1, 2, offset.shape[2], ++ offset.shape[3])[:, :, 1, ...].reshape(offset.shape[0], offset.shape[1] // 2, offset.shape[2], ++ offset.shape[3]) ++ offset = torch.cat((offset_x, offset_y, mask), 1) ++ return dcn_v2_conv(input, ++ self.weight, offset, self.bias, + self.stride, + self.padding, + self.dilation, ++ 1, + self.deformable_groups) + + +@@ -300,4 +342,4 @@ class DCNPooling(DCNv2Pooling): + self.group_size, + self.part_size, + self.sample_per_part, +- self.trans_std) ++ self.trans_std) \ No newline at end of file \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/deform_conv.py b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/deform_conv.py index 936bef9074cfa7ed71acc9e28fbd75f02ae79dd4..f6d738214136868e4b7f95a306d93e2f332d8f50 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/deform_conv.py +++ b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/deform_conv.py @@ -1,234 +1,234 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.nn.modules.utils import _pair, _single -import math - - -class ModulatedDeformConv2dFunction(Function): - - @staticmethod - def forward(ctx, - input_tensor, - offset_ori, - mask, - weight, - bias=None, - with_bias=False, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - sort_index_for_npu_fp=None, - sort_index_for_npu_bp=None, - ): - - input_tensor = input_tensor.float() - offset_ori = offset_ori.float() - mask = mask.float() - - ctx.stride = stride - ctx.padding = padding - ctx.dilation = dilation - ctx.groups = groups - ctx.deformable_groups = deformable_groups - ctx.sort_index_for_npu_bp = sort_index_for_npu_bp - ctx.with_bias = with_bias - - offset = offset_ori.index_select(1, sort_index_for_npu_fp) - offset_all = torch.cat([offset, mask], dim=1) - output, offset_out = torch.npu_deformable_conv2d( - input_tensor, weight, offset_all, bias, - kernel_size=[weight.shape[3], weight.shape[2]], - stride=[1, 1, ctx.stride, ctx.stride], - padding=[ctx.padding, ctx.padding, ctx.padding, ctx.padding], - dilation=[1, 1, ctx.dilation, ctx.dilation], - groups=ctx.groups, deformable_groups=ctx.deformable_groups, - modulated=True) - if weight.requires_grad or mask.requires_grad or offset.requires_grad \ - or input_tensor.requires_grad: - ctx.save_for_backward(input_tensor, weight, offset_out, offset_all) - return output - - @staticmethod - def backward(ctx, grad_output): - input_tensor, weight, offset_out, offset_all = ctx.saved_tensors - grad_input, grad_weight, grad_offset_all, grad_bias = torch.npu_deformable_conv2dbk( - input_tensor, grad_output, offset_out, weight, offset_all, - kernel_size=[weight.shape[3], weight.shape[2]], - stride=[1, 1, ctx.stride, ctx.stride], - padding=[ctx.padding, ctx.padding, ctx.padding, ctx.padding], - dilation=[1, 1, ctx.dilation, ctx.dilation], - groups=ctx.groups, deformable_groups=ctx.deformable_groups, modulated=True) - grad_offset = grad_offset_all.index_select(1, ctx.sort_index_for_npu_bp) - grad_mask = grad_offset_all[:, grad_offset.shape[1]:, :, :] - if not ctx.with_bias: - grad_bias = None - - return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, - None, None, None, None, None, None, None, None) - - -class ModulatedDeformConv(nn.Module): - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - bias=True, - pack=True, - ): - - r"""Applies an NPU based Modulated Deformable 2D convolution operation. - - Paper link: - [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168) - - Reference implementation link: - https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/modulated_deform_conv.py - - The implementation of this ModulatedDeformConv is mainly based - on the implementation of mmcv for design and reconstruction. - - In ModulatedDeformConvFunction, the forward and backward are customized, - and the input tensor is reconstructed ito match the NPU based function. - - It is worth mentioning that DeformConv(DCNv1) is also implemented - by setting modulated==False. Due to the difference between input - and initialization, there is no additional implementation here. - - .. note:: - ModulatedDeformConv only implements operations under fp32 data types. - Notice, weight and bias in conv_offset must be initialized to 0. - - Args: - in_channels (int): Number of channels in the input image. - out_channels (int): Number of channels produced by the convolution. - kernel_size(int, tuple): Size of the convolving kernel. - stride(int, tuple): Stride of the convolution. Default: 1. - padding (int or tuple): Zero-padding added to both sides of the input. - Default: 0. - dilation (int or tuple): Spacing between kernel elements. Default: 1. - groups (int): Number of blocked connections from input. - channels to output channels. Default: 1. - deform_groups (int): Number of deformable group partitions. - bias (bool): If True, adds a learnable bias to the output. Default: False. - pack (bool): If True, conv_offset and mask will be included in this module. Default: True. - - Examples:: - >>> m = ModulatedDeformConv(32, 32, 1) - >>> input_tensor = torch.randn(2, 32, 5, 5) - >>> output = m(input_tensor) - """ - - super(ModulatedDeformConv, self).__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = _pair(kernel_size) - self.stride = stride - self.padding = padding - self.dilation = dilation - self.groups = groups - self.deformable_groups = deformable_groups - self.with_bias = bias - self.pack = pack - - self.weight = nn.Parameter( - torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) - if bias: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.bias = torch.zeros(self.weight.shape[0]) - - if self.pack: - self.conv_offset_mask = nn.Conv2d( - self.in_channels, - self.deformable_groups * 3 * self.kernel_size[0] * - self.kernel_size[1], - kernel_size=self.kernel_size, - stride=_pair(self.stride), - padding=_pair(self.padding), - bias=True) - - self.split_num = self.deformable_groups * 2 * self.kernel_size[0] * self.kernel_size[1] - sort_index_for_npu = list(range(self.split_num)) - sort_index_for_npu_fp = sort_index_for_npu[1::2] + sort_index_for_npu[::2] - sort_index_for_npu_bp_dict = {i: idx for idx, i in enumerate(sort_index_for_npu_fp)} - sort_index_for_npu_bp = [sort_index_for_npu_bp_dict[i] for i in sort_index_for_npu] - self.sort_index_for_npu_fp = torch.IntTensor(sort_index_for_npu_fp) - self.sort_index_for_npu_bp = torch.IntTensor(sort_index_for_npu_bp) - self.sort_index_for_npu_todevice = False - - self.init_param() - - def init_param(self): - n = self.in_channels - for k in self.kernel_size: - n *= k - stdv = 1. / math.sqrt(n) - self.weight.data.uniform_(-stdv, stdv) - if self.bias is not None: - self.bias.data.zero_() - - if self.pack: - self.conv_offset_mask.weight.data.zero_() - self.conv_offset_mask.bias.data.zero_() - - def forward(self, x): - if self.pack: - out = self.conv_offset_mask(x) - offset = out[:, :self.split_num, ...] - mask = torch.sigmoid(out[:, self.split_num:, ...]) - else: - x, offset, mask = x - - if not self.sort_index_for_npu_todevice: - self.sort_index_for_npu_fp = self.sort_index_for_npu_fp.to(x.device) - self.sort_index_for_npu_bp = self.sort_index_for_npu_bp.to(x.device) - self.bias = self.bias.to(x.device) - self.sort_index_for_npu_todevice = True - - return ModulatedDeformConv2dFunction.apply( - x, offset, mask, self.weight, self.bias, self.with_bias, - self.stride, self.padding, self.dilation, - self.groups, self.deformable_groups, - self.sort_index_for_npu_fp, - self.sort_index_for_npu_bp, - ) - - -DCNv2 = ModulatedDeformConv - -if __name__ == "__main__": - x = torch.randn(2, 32, 7, 7) - model = DCNv2(32, 32, 3, 2, 1) - - torch.npu.set_device(0) - x = x.npu() - model = model.npu() - - o = model(x) - l = o.sum() - l.backward() - print(l) +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.nn.modules.utils import _pair, _single +import math + + +class ModulatedDeformConv2dFunction(Function): + + @staticmethod + def forward(ctx, + input_tensor, + offset_ori, + mask, + weight, + bias=None, + with_bias=False, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + sort_index_for_npu_fp=None, + sort_index_for_npu_bp=None, + ): + + input_tensor = input_tensor.float() + offset_ori = offset_ori.float() + mask = mask.float() + + ctx.stride = stride + ctx.padding = padding + ctx.dilation = dilation + ctx.groups = groups + ctx.deformable_groups = deformable_groups + ctx.sort_index_for_npu_bp = sort_index_for_npu_bp + ctx.with_bias = with_bias + + offset = offset_ori.index_select(1, sort_index_for_npu_fp) + offset_all = torch.cat([offset, mask], dim=1) + output, offset_out = torch.npu_deformable_conv2d( + input_tensor, weight, offset_all, bias, + kernel_size=[weight.shape[3], weight.shape[2]], + stride=[1, 1, ctx.stride, ctx.stride], + padding=[ctx.padding, ctx.padding, ctx.padding, ctx.padding], + dilation=[1, 1, ctx.dilation, ctx.dilation], + groups=ctx.groups, deformable_groups=ctx.deformable_groups, + modulated=True) + if weight.requires_grad or mask.requires_grad or offset.requires_grad \ + or input_tensor.requires_grad: + ctx.save_for_backward(input_tensor, weight, offset_out, offset_all) + return output + + @staticmethod + def backward(ctx, grad_output): + input_tensor, weight, offset_out, offset_all = ctx.saved_tensors + grad_input, grad_weight, grad_offset_all, grad_bias = torch.npu_deformable_conv2dbk( + input_tensor, grad_output, offset_out, weight, offset_all, + kernel_size=[weight.shape[3], weight.shape[2]], + stride=[1, 1, ctx.stride, ctx.stride], + padding=[ctx.padding, ctx.padding, ctx.padding, ctx.padding], + dilation=[1, 1, ctx.dilation, ctx.dilation], + groups=ctx.groups, deformable_groups=ctx.deformable_groups, modulated=True) + grad_offset = grad_offset_all.index_select(1, ctx.sort_index_for_npu_bp) + grad_mask = grad_offset_all[:, grad_offset.shape[1]:, :, :] + if not ctx.with_bias: + grad_bias = None + + return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, + None, None, None, None, None, None, None, None) + + +class ModulatedDeformConv(nn.Module): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + bias=True, + pack=True, + ): + + r"""Applies an NPU based Modulated Deformable 2D convolution operation. + + Paper link: + [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168) + + Reference implementation link: + https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/modulated_deform_conv.py + + The implementation of this ModulatedDeformConv is mainly based + on the implementation of mmcv for design and reconstruction. + + In ModulatedDeformConvFunction, the forward and backward are customized, + and the input tensor is reconstructed ito match the NPU based function. + + It is worth mentioning that DeformConv(DCNv1) is also implemented + by setting modulated==False. Due to the difference between input + and initialization, there is no additional implementation here. + + .. note:: + ModulatedDeformConv only implements operations under fp32 data types. + Notice, weight and bias in conv_offset must be initialized to 0. + + Args: + in_channels (int): Number of channels in the input image. + out_channels (int): Number of channels produced by the convolution. + kernel_size(int, tuple): Size of the convolving kernel. + stride(int, tuple): Stride of the convolution. Default: 1. + padding (int or tuple): Zero-padding added to both sides of the input. + Default: 0. + dilation (int or tuple): Spacing between kernel elements. Default: 1. + groups (int): Number of blocked connections from input. + channels to output channels. Default: 1. + deform_groups (int): Number of deformable group partitions. + bias (bool): If True, adds a learnable bias to the output. Default: False. + pack (bool): If True, conv_offset and mask will be included in this module. Default: True. + + Examples:: + >>> m = ModulatedDeformConv(32, 32, 1) + >>> input_tensor = torch.randn(2, 32, 5, 5) + >>> output = m(input_tensor) + """ + + super(ModulatedDeformConv, self).__init__() + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.deformable_groups = deformable_groups + self.with_bias = bias + self.pack = pack + + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_channels)) + else: + self.bias = torch.zeros(self.weight.shape[0]) + + if self.pack: + self.conv_offset_mask = nn.Conv2d( + self.in_channels, + self.deformable_groups * 3 * self.kernel_size[0] * + self.kernel_size[1], + kernel_size=self.kernel_size, + stride=_pair(self.stride), + padding=_pair(self.padding), + bias=True) + + self.split_num = self.deformable_groups * 2 * self.kernel_size[0] * self.kernel_size[1] + sort_index_for_npu = list(range(self.split_num)) + sort_index_for_npu_fp = sort_index_for_npu[1::2] + sort_index_for_npu[::2] + sort_index_for_npu_bp_dict = {i: idx for idx, i in enumerate(sort_index_for_npu_fp)} + sort_index_for_npu_bp = [sort_index_for_npu_bp_dict[i] for i in sort_index_for_npu] + self.sort_index_for_npu_fp = torch.IntTensor(sort_index_for_npu_fp) + self.sort_index_for_npu_bp = torch.IntTensor(sort_index_for_npu_bp) + self.sort_index_for_npu_todevice = False + + self.init_param() + + def init_param(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.zero_() + + if self.pack: + self.conv_offset_mask.weight.data.zero_() + self.conv_offset_mask.bias.data.zero_() + + def forward(self, x): + if self.pack: + out = self.conv_offset_mask(x) + offset = out[:, :self.split_num, ...] + mask = torch.sigmoid(out[:, self.split_num:, ...]) + else: + x, offset, mask = x + + if not self.sort_index_for_npu_todevice: + self.sort_index_for_npu_fp = self.sort_index_for_npu_fp.to(x.device) + self.sort_index_for_npu_bp = self.sort_index_for_npu_bp.to(x.device) + self.bias = self.bias.to(x.device) + self.sort_index_for_npu_todevice = True + + return ModulatedDeformConv2dFunction.apply( + x, offset, mask, self.weight, self.bias, self.with_bias, + self.stride, self.padding, self.dilation, + self.groups, self.deformable_groups, + self.sort_index_for_npu_fp, + self.sort_index_for_npu_bp, + ) + + +DCNv2 = ModulatedDeformConv + +if __name__ == "__main__": + x = torch.randn(2, 32, 7, 7) + model = DCNv2(32, 32, 3, 2, 1) + + torch.npu.set_device(0) + x = x.npu() + model = model.npu() + + o = model(x) + l = o.sum() + l.backward() + print(l) diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/modelzoo_level.txt index 282c3ff3b30404101a02cc86c5bfeb6308d198e7..c5c4a9d8001fae97c66831abcfdbe02dd6261c37 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:POK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/pth2onnx.py b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/pth2onnx.py index 947bca196f623129d36a8361a00e7beac1db6238..b35740067483d6dfeb5da4018062bb6bdb1d9341 100644 --- a/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus/pth2onnx.py @@ -1,180 +1,180 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -''' -YOLACT pth权重文件转为onnx权重文件 -''' -import sys -import os -sys.path.append('../') -import torch -import torch.onnx -import argparse -from data import * -from yolact import Yolact -# -set_cfg('yolact_plus_resnet50_config') -from torch.autograd import Variable - - -def str2bool(v): - return v.lower() in ("yes", "true", "t", "1") - - -parser_pth2onnx = argparse.ArgumentParser(description='Turn YOLACT .pth module to .onnx module') - -parser_pth2onnx.add_argument('--trained_model', type=str, - default='yolact_plus_resnet50_54_800000.pth', help='choose .pth module') - -parser_pth2onnx.add_argument('--outputName', type=str, - default='yolact_plus', help='the name of the output onnx module') - -parser_pth2onnx.add_argument('--dynamic', default=False, type=str2bool, - help='choose whether the output onnx module is dynamic or not') - -args_pth2onnx = parser_pth2onnx.parse_args() - -def removeAdd240Node(model): - addNodeNum = 1227 #1227, 344 - addNode = model.graph.node[addNodeNum] - model.graph.node.remove(addNode) - for node in model.graph.node: - if '1763' in node.input: - assert node.input[0] == '1763' #'1763','1005' - node.input[0] = '1761' #'1761','1003' - - -def optimSoftmax(model): - from onnx import helper - - findOldSoftmaxNode = False - for node in model.graph.node: - if 'Softmax' in node.name: - oldSoftmaxName = node.name - oldSoftmaxInput = node.input[0] - findOldSoftmaxNode = True - break - - assert node.output[0] == 'output1' - assert findOldSoftmaxNode - - model.graph.node.remove(node) - - TransposeNode_Pre = helper.make_node('Transpose', [oldSoftmaxInput], ['66666'], - perm=[0, 2, 1], name='Transpose_Pre') - - newSoftmax = helper.make_node("Softmax", axis=1, inputs=["66666"], - outputs=["88888"], name=oldSoftmaxName) - - TransposeNode_After = helper.make_node('Transpose', ['88888'], ['output1'], - perm=[0, 2, 1], name="Transpose_After") - - model.graph.node.append(TransposeNode_Pre) - model.graph.node.append(TransposeNode_After) - model.graph.node.append(newSoftmax) - - a = model.graph.output[1].type.tensor_type.shape.dim[1] - a.dim_param = '57744' # 57744, 19248 - b = model.graph.output[1].type.tensor_type.shape.dim[2] - b.dim_param = '81' - - -def ReplaceScales(ori_list, scales_name): - n_list = [] - for i, x in enumerate(ori_list): - if i < 2: - n_list.append(x) - if i == 3: - n_list.append(scales_name) - return n_list - -def optimresize(model): - # 替换Resize节点 - i = 1311 #429 - n = model.graph.node[i] - if n.op_type == "Resize": - print("Resize", i, n.input, n.output) - model.graph.initializer.append( - onnx.helper.make_tensor('scales{}'.format(i), onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) - ) - newnode = onnx.helper.make_node( - 'Resize', - name=n.name, - inputs=ReplaceScales(n.input, 'scales{}'.format(i)), - outputs=n.output, - coordinate_transformation_mode='pytorch_half_pixel', - cubic_coeff_a=-0.75, - mode='linear', - nearest_mode='floor' - ) - model.graph.node.remove(n) - model.graph.node.insert(i, newnode) - print("replace {} index {}".format(n.name, i)) - # for i in range(401, 428): - # print('remove:', model.graph.node[401].name) - # model.graph.node.remove(model.graph.node[401]) - - -def convert(path, pthPath): - ''' - 转换pth模型为onnx模型 - :param path: onnx模型存储路径 - :param pthPath: pth模型路径 - :return: - ''' - yolact_net = Yolact() - yolact_net.load_weights(pthPath, useCuda=False) - yolact_net.exportOnnx = True - yolact_net.eval() - - input_names = ["input.1"] - - dummy_input = Variable( - torch.randn(1, 3, 550, 550)) - - output_names = ["output0", "output1", "output2", "output3", "output4"] - - if args_pth2onnx.dynamic: - dynamic_axes = { - 'input.1': {0: '-1'}, - 'output0': {0: '-1'}, - 'output1': {0: '-1'}, - 'output2': {0: '-1'}, - 'output3': {0: '-1'}, - 'output4': {0: '-1'} - } - torch.onnx.export(yolact_net, dummy_input, args_pth2onnx.outputName + ".onnx", - verbose=True, dynamic_axes=dynamic_axes, - input_names=input_names, - output_names=output_names, opset_version=11, enable_onnx_checker=False) - - else: - torch.onnx.export(yolact_net, dummy_input, - args_pth2onnx.outputName + '.onnx', - input_names=input_names, - output_names=output_names, - opset_version=11, verbose=True, enable_onnx_checker=False) - - -if __name__ == '__main__': - path = os.getcwd() - pthPath = os.getcwd() + '/' + args_pth2onnx.trained_model - convert(path, pthPath) - import onnx - - model = onnx.load('./' + args_pth2onnx.outputName + '.onnx') - removeAdd240Node(model) - optimSoftmax(model) - optimresize(model) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' +YOLACT pth权重文件转为onnx权重文件 +''' +import sys +import os +sys.path.append('../') +import torch +import torch.onnx +import argparse +from data import * +from yolact import Yolact +# +set_cfg('yolact_plus_resnet50_config') +from torch.autograd import Variable + + +def str2bool(v): + return v.lower() in ("yes", "true", "t", "1") + + +parser_pth2onnx = argparse.ArgumentParser(description='Turn YOLACT .pth module to .onnx module') + +parser_pth2onnx.add_argument('--trained_model', type=str, + default='yolact_plus_resnet50_54_800000.pth', help='choose .pth module') + +parser_pth2onnx.add_argument('--outputName', type=str, + default='yolact_plus', help='the name of the output onnx module') + +parser_pth2onnx.add_argument('--dynamic', default=False, type=str2bool, + help='choose whether the output onnx module is dynamic or not') + +args_pth2onnx = parser_pth2onnx.parse_args() + +def removeAdd240Node(model): + addNodeNum = 1227 #1227, 344 + addNode = model.graph.node[addNodeNum] + model.graph.node.remove(addNode) + for node in model.graph.node: + if '1763' in node.input: + assert node.input[0] == '1763' #'1763','1005' + node.input[0] = '1761' #'1761','1003' + + +def optimSoftmax(model): + from onnx import helper + + findOldSoftmaxNode = False + for node in model.graph.node: + if 'Softmax' in node.name: + oldSoftmaxName = node.name + oldSoftmaxInput = node.input[0] + findOldSoftmaxNode = True + break + + assert node.output[0] == 'output1' + assert findOldSoftmaxNode + + model.graph.node.remove(node) + + TransposeNode_Pre = helper.make_node('Transpose', [oldSoftmaxInput], ['66666'], + perm=[0, 2, 1], name='Transpose_Pre') + + newSoftmax = helper.make_node("Softmax", axis=1, inputs=["66666"], + outputs=["88888"], name=oldSoftmaxName) + + TransposeNode_After = helper.make_node('Transpose', ['88888'], ['output1'], + perm=[0, 2, 1], name="Transpose_After") + + model.graph.node.append(TransposeNode_Pre) + model.graph.node.append(TransposeNode_After) + model.graph.node.append(newSoftmax) + + a = model.graph.output[1].type.tensor_type.shape.dim[1] + a.dim_param = '57744' # 57744, 19248 + b = model.graph.output[1].type.tensor_type.shape.dim[2] + b.dim_param = '81' + + +def ReplaceScales(ori_list, scales_name): + n_list = [] + for i, x in enumerate(ori_list): + if i < 2: + n_list.append(x) + if i == 3: + n_list.append(scales_name) + return n_list + +def optimresize(model): + # 替换Resize节点 + i = 1311 #429 + n = model.graph.node[i] + if n.op_type == "Resize": + print("Resize", i, n.input, n.output) + model.graph.initializer.append( + onnx.helper.make_tensor('scales{}'.format(i), onnx.TensorProto.FLOAT, [4], [1, 1, 2, 2]) + ) + newnode = onnx.helper.make_node( + 'Resize', + name=n.name, + inputs=ReplaceScales(n.input, 'scales{}'.format(i)), + outputs=n.output, + coordinate_transformation_mode='pytorch_half_pixel', + cubic_coeff_a=-0.75, + mode='linear', + nearest_mode='floor' + ) + model.graph.node.remove(n) + model.graph.node.insert(i, newnode) + print("replace {} index {}".format(n.name, i)) + # for i in range(401, 428): + # print('remove:', model.graph.node[401].name) + # model.graph.node.remove(model.graph.node[401]) + + +def convert(path, pthPath): + ''' + 转换pth模型为onnx模型 + :param path: onnx模型存储路径 + :param pthPath: pth模型路径 + :return: + ''' + yolact_net = Yolact() + yolact_net.load_weights(pthPath, useCuda=False) + yolact_net.exportOnnx = True + yolact_net.eval() + + input_names = ["input.1"] + + dummy_input = Variable( + torch.randn(1, 3, 550, 550)) + + output_names = ["output0", "output1", "output2", "output3", "output4"] + + if args_pth2onnx.dynamic: + dynamic_axes = { + 'input.1': {0: '-1'}, + 'output0': {0: '-1'}, + 'output1': {0: '-1'}, + 'output2': {0: '-1'}, + 'output3': {0: '-1'}, + 'output4': {0: '-1'} + } + torch.onnx.export(yolact_net, dummy_input, args_pth2onnx.outputName + ".onnx", + verbose=True, dynamic_axes=dynamic_axes, + input_names=input_names, + output_names=output_names, opset_version=11, enable_onnx_checker=False) + + else: + torch.onnx.export(yolact_net, dummy_input, + args_pth2onnx.outputName + '.onnx', + input_names=input_names, + output_names=output_names, + opset_version=11, verbose=True, enable_onnx_checker=False) + + +if __name__ == '__main__': + path = os.getcwd() + pthPath = os.getcwd() + '/' + args_pth2onnx.trained_model + convert(path, pthPath) + import onnx + + model = onnx.load('./' + args_pth2onnx.outputName + '.onnx') + removeAdd240Node(model) + optimSoftmax(model) + optimresize(model) onnx.save_model(model, args_pth2onnx.outputName + '.onnx') \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/super_resolution/EDSR/LICENSE b/ACL_PyTorch/contrib/cv/super_resolution/EDSR/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/EDSR/LICENSE +++ b/ACL_PyTorch/contrib/cv/super_resolution/EDSR/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/super_resolution/EDSR/requirements.txt b/ACL_PyTorch/contrib/cv/super_resolution/EDSR/requirements.txt index 399dbfed087e9d139235f30e8d5991e803b92edd..fbd453de267138a05ccf06d3be32a3d4eb8f68fc 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/EDSR/requirements.txt +++ b/ACL_PyTorch/contrib/cv/super_resolution/EDSR/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.9.0 -numpy == 1.19.2 -Pillow == 8.2.0 +torch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.9.0 +numpy == 1.19.2 +Pillow == 8.2.0 opencv-python == 4.5.2 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/super_resolution/RCAN/LICENSE b/ACL_PyTorch/contrib/cv/super_resolution/RCAN/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/RCAN/LICENSE +++ b/ACL_PyTorch/contrib/cv/super_resolution/RCAN/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git "a/ACL_PyTorch/contrib/cv/super_resolution/RCAN/RCAN_Onnx\347\253\257\345\210\260\347\253\257\346\216\250\347\220\206\346\214\207\345\257\274.md" "b/ACL_PyTorch/contrib/cv/super_resolution/RCAN/RCAN_Onnx\347\253\257\345\210\260\347\253\257\346\216\250\347\220\206\346\214\207\345\257\274.md" index f979b9dd8b5c951a6b16603909e81376e58b7639..2bc1e9d7c210ef758cbf493a13e4674b760f9a5b 100644 --- "a/ACL_PyTorch/contrib/cv/super_resolution/RCAN/RCAN_Onnx\347\253\257\345\210\260\347\253\257\346\216\250\347\220\206\346\214\207\345\257\274.md" +++ "b/ACL_PyTorch/contrib/cv/super_resolution/RCAN/RCAN_Onnx\347\253\257\345\210\260\347\253\257\346\216\250\347\220\206\346\214\207\345\257\274.md" @@ -1,241 +1,241 @@ -# RCAN Onnx 模型端到端推理指导 - -- [1. 模型概述](#1) - - [论文地址](#11) - - [代码地址](#12) -- [2. 环境说明](#2) - - [深度学习框架](#21) - - [python第三方库](#22) -- [3. 模型转换](#3) - - [pth转onnx模型](#31) -- [4. 数据预处理](#4) - - [数据集获取](#41) - - [数据集预处理](#42) - - [生成数据集信息文件](#43) -- [5. 离线推理](#5) - - [benchmark工具概述](#51) - - [离线推理](#52) -- [6. 精度对比](#6) -- [7. 性能对比](#7) - - [npu性能数据](#71) - - [T4性能数据](#72) - - [性能对比](#73) - -## 1. 模型概述 - -### 1.1 论文地址 - -[RCAB 论文](https://arxiv.org/abs/1807.02758) - -### 1.2 代码地址 - -[RCAN 代码](https://github.com/yulunzhang/RCAN) - -branck: master - -commit_id: 3339ebc59519c3bb2b5719b87dd36515ec7f3ba7 - -## 2. 环境说明 - -对于batch1与batch16,310性能均高于T4性能1.2倍,该模型放s在Benchmark/cv/classification目录下。 - -### 2.1 深度学习框架 - -``` -pytorch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.9.0 -``` - -### 2.2 python第三方库 - -``` -numpy == 1.19.2 -Pillow == 8.2.0 -opencv-python == 4.5.2 -``` - -> **说明:** -> -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3. 模型转换 - -### 3.1 pth转onnx模型 - -1. 下载 pth 权重文,放入models目录下 - - [RCAN 预训练pth权重文件](https://pan.baidu.com/s/1bkoJKmdOcvLhOFXHVkFlKA) - - 文件名:RCAN_BIX2.pt - - md5sum:f567f8560fde71ba0973a7fe472a42f2 - -2. 克隆代码仓库代码 - - ```bash - git clone https://github.com/yulunzhang/RCAN.git - ``` - -3. 使用rcan_pth2onnx.py 脚本将pth转化为onnx - - ```bash - python3.7 rcan_pth2onnx.py --pth RCAN_BIX2.pt --onnx rcan.onnx - ``` - - RCAN_BIX2.pt 文件为步骤1中下载的预训练权重文件,该条指令将在运行处生成一个rcan.onnx文件,此文件即为目标onnx文件 - - -### 3.2 onnx转om模型 - -下列需要在具备华为Ascend系列芯片的机器上执行: - -1. 设置 atc 工作所需要的环境变量 - - ```bash - export install_path=/usr/local/Ascend/ascend-toolkit/latest - export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH - export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH - export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH - export ASCEND_OPP_PATH=${install_path}/opp - ``` - -2. 由于transpose算子对于某些shape不友好,需要进行优化,将如下内容写入switch.cfg中 - - ``` - TransposeReshapeFusionPass:off - ``` - - 经过Profiling分析,ConfusionTransposeD算子性能过低,故将其输入加入白名单。即在/usr/local/Ascend/ascend-toolkit/5.0.2.alpha003/x86_64-linux/opp/op_impl/built-in/ai_core/tbe/impl/dynamic/transpose.py里添加 Tranpose shape 白名单: - - ``` - 1,64,2,2,256,256 - ``` - - 以下是优化前后性能对比 - - | | 未任何优化前310(单卡吞吐率) | 优化后310(单卡吞吐率) | - | :--: | :---------------------------: | :---------------------: | - | bs1 | 0.7245 | 9.3220 | - -3. 使用atc工具将onnx模型转换为om模型,命令参考 - - ```bash - atc --framework=5 --model=rcan.onnx --output=rcan_1bs --input_format=NCHW --input_shape="image:1,3,256,256" --fusion_switch_file=switch.cfg --log=debug --soc_version=Ascend310 - ``` - - 此命令将在运行路径下生成一个rcan_1bs.om文件,此文件即为目标om模型文件 - -## 4. 数据预处理 - -### 4.1 数据集获取 - -该模型使用[Set5](https://github.com/yulunzhang/RCAN/tree/master/RCAN_TestCode/OriginalTestData/Set5)的5张验证集进行测试,图片数据放在/root/datasets/Set5。 - -### 4.2 数据集预处理 - -使用 rcan_preprocess.py 脚本进行数据预处理,脚本执行命令: - -``` -python3.7 rcan_preprocess.py -s /root/datasets/Set5/LR -d ./prep_data --size 256 -``` - -由于rcan模型支持动态输入,而atc工具需要指定输入大小,所以要在此对图像添加pad和进行缩放到同一大小,最终对推理产生的结果进行后处理恢复。以上命令将自动生成一个pad_info.json文件,此文件记录在数据预处理中对图像的pad和缩放信息,用于数据后处理时进行图像裁剪。 - -### 4.3 生成数据集信息文件 - -1. 生成数据集信息文件脚本 gen_dataset_info.py - -2. 执行生成数据集信息脚本,生成数据集信息文件 - - ```bash - python3.7 gen_dataset_info.py bin ./prep_data ./rcan_prep_bin.info 256 256 - ``` - - 第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 - -## 5. 离线推理 - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN V100R020C10 推理benchmark工具用户指南 01 - -### 5.2 离线推理 - -1. 执行离线推理 - - ```bash - ./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=rcan_1bs.om -input_text_path=./rcan_prep_bin.info -input_width=256 -input_height=256 -output_binary=True -useDvpp=False - ``` - - 输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为HR_image的输出,shape为bs * 512 * 512,数据类型为FP32,对应1个超分辨后的图像数据,每个输入对应的输出对应一个_x.bin文件。 - -2. 数据后处理 - - ```bash - python3.7 rcan_postprocess.py -s result/dumpOutput_device0/ -d post_data - ``` - - 由于在预处理中对图像进行了添加pad和缩放操作,故要对推理结果进行相应的裁剪和缩放 - -## 6. 精度对比 - -### 6.1 离线推理TopN精度 - -## 6. 精度对比 - -| | PSNR | SSIM | -| :----------------: | :---: | :----: | -| 原github仓库精度 | 38.27 | 0.9614 | -| om模型离线推理精度 | 38.25 | 0.9606 | - -## 7. 性能对比 - -### 7.1 npu性能数据 - -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。由于模型不接受多 batch 输入,所以只统计在整个数据集上推理得到bs1的性能数据 - -使用benchmark工具在整个数据集上推理时获得的性能数据: - -``` -[e2e] throughputRate: 1.11943, latency: 4466.54 -[data read] throughputRate: 1253.13, moduleLatency: 0.798 -[preprocess] throughputRate: 5.22007, moduleLatency: 191.568 -[infer] throughputRate: 2.30086, Interface throughputRate: 2.32019, moduleLatency: 433.982 -[post] throughputRate: 2.29506, moduleLatency: 435.719 -``` - -Interface throughputRate: 2.32019,2.32019x4=9.28076fps 即是batch1 310单卡吞吐率 - -### 7.2 T4性能数据 - -在装有T4卡的服务器上测试gpu性能,测试过程请确保卡没有运行其他任务,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2 - -``` -trtexec --onnx=rcan.onnx --fp16 --shapes=image:1x3x256x256 -``` - -gpu T4是4个device并行执行的结果,mean是时延(tensorrt的时延是batch个数据的推理时间),即吞吐率的倒数乘以batch。其中--fp16是算子精度,目前算子精度只测--fp16的。 - -``` -[07/14/2021-10:48:26] [I] GPU Compute -[07/14/2021-10:48:26] [I] min: 150.203 ms -[07/14/2021-10:48:26] [I] max: 157.738 ms -[07/14/2021-10:48:26] [I] mean: 152.347 ms -[07/14/2021-10:48:26] [I] median: 151.781 ms -[07/14/2021-10:48:26] [I] percentile: 157.738 ms at 99% -[07/14/2021-10:48:26] [I] total compute time: 3.19929 s -``` - -batch1 t4单卡吞吐率:1000/(152.347/1)=6.5212fps - -### 7.3 性能对比 - -batch1: 9.28076fps > 6.5212fps - -310单个device的吞吐率乘4即单卡吞吐率比T4单卡的吞吐率大,故310性能高于T4性能,性能达标。 - -对于batch1,310性能高于T4性能1.2倍,该模型放在Benchmark/cv/classification目录下。 - +# RCAN Onnx 模型端到端推理指导 + +- [1. 模型概述](#1) + - [论文地址](#11) + - [代码地址](#12) +- [2. 环境说明](#2) + - [深度学习框架](#21) + - [python第三方库](#22) +- [3. 模型转换](#3) + - [pth转onnx模型](#31) +- [4. 数据预处理](#4) + - [数据集获取](#41) + - [数据集预处理](#42) + - [生成数据集信息文件](#43) +- [5. 离线推理](#5) + - [benchmark工具概述](#51) + - [离线推理](#52) +- [6. 精度对比](#6) +- [7. 性能对比](#7) + - [npu性能数据](#71) + - [T4性能数据](#72) + - [性能对比](#73) + +## 1. 模型概述 + +### 1.1 论文地址 + +[RCAB 论文](https://arxiv.org/abs/1807.02758) + +### 1.2 代码地址 + +[RCAN 代码](https://github.com/yulunzhang/RCAN) + +branck: master + +commit_id: 3339ebc59519c3bb2b5719b87dd36515ec7f3ba7 + +## 2. 环境说明 + +对于batch1与batch16,310性能均高于T4性能1.2倍,该模型放s在Benchmark/cv/classification目录下。 + +### 2.1 深度学习框架 + +``` +pytorch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.9.0 +``` + +### 2.2 python第三方库 + +``` +numpy == 1.19.2 +Pillow == 8.2.0 +opencv-python == 4.5.2 +``` + +> **说明:** +> +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3. 模型转换 + +### 3.1 pth转onnx模型 + +1. 下载 pth 权重文,放入models目录下 + + [RCAN 预训练pth权重文件](https://pan.baidu.com/s/1bkoJKmdOcvLhOFXHVkFlKA) + + 文件名:RCAN_BIX2.pt + + md5sum:f567f8560fde71ba0973a7fe472a42f2 + +2. 克隆代码仓库代码 + + ```bash + git clone https://github.com/yulunzhang/RCAN.git + ``` + +3. 使用rcan_pth2onnx.py 脚本将pth转化为onnx + + ```bash + python3.7 rcan_pth2onnx.py --pth RCAN_BIX2.pt --onnx rcan.onnx + ``` + + RCAN_BIX2.pt 文件为步骤1中下载的预训练权重文件,该条指令将在运行处生成一个rcan.onnx文件,此文件即为目标onnx文件 + + +### 3.2 onnx转om模型 + +下列需要在具备华为Ascend系列芯片的机器上执行: + +1. 设置 atc 工作所需要的环境变量 + + ```bash + export install_path=/usr/local/Ascend/ascend-toolkit/latest + export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH + export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH + export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH + export ASCEND_OPP_PATH=${install_path}/opp + ``` + +2. 由于transpose算子对于某些shape不友好,需要进行优化,将如下内容写入switch.cfg中 + + ``` + TransposeReshapeFusionPass:off + ``` + + 经过Profiling分析,ConfusionTransposeD算子性能过低,故将其输入加入白名单。即在/usr/local/Ascend/ascend-toolkit/5.0.2.alpha003/x86_64-linux/opp/op_impl/built-in/ai_core/tbe/impl/dynamic/transpose.py里添加 Tranpose shape 白名单: + + ``` + 1,64,2,2,256,256 + ``` + + 以下是优化前后性能对比 + + | | 未任何优化前310(单卡吞吐率) | 优化后310(单卡吞吐率) | + | :--: | :---------------------------: | :---------------------: | + | bs1 | 0.7245 | 9.3220 | + +3. 使用atc工具将onnx模型转换为om模型,命令参考 + + ```bash + atc --framework=5 --model=rcan.onnx --output=rcan_1bs --input_format=NCHW --input_shape="image:1,3,256,256" --fusion_switch_file=switch.cfg --log=debug --soc_version=Ascend310 + ``` + + 此命令将在运行路径下生成一个rcan_1bs.om文件,此文件即为目标om模型文件 + +## 4. 数据预处理 + +### 4.1 数据集获取 + +该模型使用[Set5](https://github.com/yulunzhang/RCAN/tree/master/RCAN_TestCode/OriginalTestData/Set5)的5张验证集进行测试,图片数据放在/root/datasets/Set5。 + +### 4.2 数据集预处理 + +使用 rcan_preprocess.py 脚本进行数据预处理,脚本执行命令: + +``` +python3.7 rcan_preprocess.py -s /root/datasets/Set5/LR -d ./prep_data --size 256 +``` + +由于rcan模型支持动态输入,而atc工具需要指定输入大小,所以要在此对图像添加pad和进行缩放到同一大小,最终对推理产生的结果进行后处理恢复。以上命令将自动生成一个pad_info.json文件,此文件记录在数据预处理中对图像的pad和缩放信息,用于数据后处理时进行图像裁剪。 + +### 4.3 生成数据集信息文件 + +1. 生成数据集信息文件脚本 gen_dataset_info.py + +2. 执行生成数据集信息脚本,生成数据集信息文件 + + ```bash + python3.7 gen_dataset_info.py bin ./prep_data ./rcan_prep_bin.info 256 256 + ``` + + 第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 + +## 5. 离线推理 + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程,获取工具及使用方法可以参考CANN V100R020C10 推理benchmark工具用户指南 01 + +### 5.2 离线推理 + +1. 执行离线推理 + + ```bash + ./benchmark.x86_64 -model_type=vision -device_id=0 -batch_size=1 -om_path=rcan_1bs.om -input_text_path=./rcan_prep_bin.info -input_width=256 -input_height=256 -output_binary=True -useDvpp=False + ``` + + 输出结果默认保存在当前目录result/dumpOutput_device{0},模型只有一个名为HR_image的输出,shape为bs * 512 * 512,数据类型为FP32,对应1个超分辨后的图像数据,每个输入对应的输出对应一个_x.bin文件。 + +2. 数据后处理 + + ```bash + python3.7 rcan_postprocess.py -s result/dumpOutput_device0/ -d post_data + ``` + + 由于在预处理中对图像进行了添加pad和缩放操作,故要对推理结果进行相应的裁剪和缩放 + +## 6. 精度对比 + +### 6.1 离线推理TopN精度 + +## 6. 精度对比 + +| | PSNR | SSIM | +| :----------------: | :---: | :----: | +| 原github仓库精度 | 38.27 | 0.9614 | +| om模型离线推理精度 | 38.25 | 0.9606 | + +## 7. 性能对比 + +### 7.1 npu性能数据 + +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device,使用npu-smi info可以查看device是否空闲。也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,可初步确认benchmark工具在整个数据集上推理时由于device也被其它推理任务使用了导致的性能不准的问题。由于模型不接受多 batch 输入,所以只统计在整个数据集上推理得到bs1的性能数据 + +使用benchmark工具在整个数据集上推理时获得的性能数据: + +``` +[e2e] throughputRate: 1.11943, latency: 4466.54 +[data read] throughputRate: 1253.13, moduleLatency: 0.798 +[preprocess] throughputRate: 5.22007, moduleLatency: 191.568 +[infer] throughputRate: 2.30086, Interface throughputRate: 2.32019, moduleLatency: 433.982 +[post] throughputRate: 2.29506, moduleLatency: 435.719 +``` + +Interface throughputRate: 2.32019,2.32019x4=9.28076fps 即是batch1 310单卡吞吐率 + +### 7.2 T4性能数据 + +在装有T4卡的服务器上测试gpu性能,测试过程请确保卡没有运行其他任务,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2 + +``` +trtexec --onnx=rcan.onnx --fp16 --shapes=image:1x3x256x256 +``` + +gpu T4是4个device并行执行的结果,mean是时延(tensorrt的时延是batch个数据的推理时间),即吞吐率的倒数乘以batch。其中--fp16是算子精度,目前算子精度只测--fp16的。 + +``` +[07/14/2021-10:48:26] [I] GPU Compute +[07/14/2021-10:48:26] [I] min: 150.203 ms +[07/14/2021-10:48:26] [I] max: 157.738 ms +[07/14/2021-10:48:26] [I] mean: 152.347 ms +[07/14/2021-10:48:26] [I] median: 151.781 ms +[07/14/2021-10:48:26] [I] percentile: 157.738 ms at 99% +[07/14/2021-10:48:26] [I] total compute time: 3.19929 s +``` + +batch1 t4单卡吞吐率:1000/(152.347/1)=6.5212fps + +### 7.3 性能对比 + +batch1: 9.28076fps > 6.5212fps + +310单个device的吞吐率乘4即单卡吞吐率比T4单卡的吞吐率大,故310性能高于T4性能,性能达标。 + +对于batch1,310性能高于T4性能1.2倍,该模型放在Benchmark/cv/classification目录下。 + diff --git a/ACL_PyTorch/contrib/cv/super_resolution/RCAN/test/README.md b/ACL_PyTorch/contrib/cv/super_resolution/RCAN/test/README.md index 9ac8ecec1d746519d6bf72c3ad15619bc330b960..895b5390a526203e8349177b2f1a7d782750a5e7 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/RCAN/test/README.md +++ b/ACL_PyTorch/contrib/cv/super_resolution/RCAN/test/README.md @@ -1,26 +1,26 @@ -环境准备: - -1.数据集路径 -数据集统一放在/root/datasets/或/opt/npu/ -本模型数据集放在/root/datasets/ - -2.进入工作目录 -cd RCAN - -3.安装必要的依赖 -pip3.7 install -r requirements.txt - -4.获取模型代码 -git clone https://github.com/yulunzhang/RCAN - -5.如果使用补丁文件修改了模型代码则将补丁打入模型代码,如果需要引用模型代码仓的类或函数通过sys.path.append()添加搜索路径。 - -5.获取权重文件 -[RCAN 预训练pth权重文件](https://pan.baidu.com/s/1bkoJKmdOcvLhOFXHVkFlKA) - -7.获取benchmark工具 -将benchmark.x86_64放在当前目录 - -8.310上执行,执行时确保device空闲 -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets +环境准备: + +1.数据集路径 +数据集统一放在/root/datasets/或/opt/npu/ +本模型数据集放在/root/datasets/ + +2.进入工作目录 +cd RCAN + +3.安装必要的依赖 +pip3.7 install -r requirements.txt + +4.获取模型代码 +git clone https://github.com/yulunzhang/RCAN + +5.如果使用补丁文件修改了模型代码则将补丁打入模型代码,如果需要引用模型代码仓的类或函数通过sys.path.append()添加搜索路径。 + +5.获取权重文件 +[RCAN 预训练pth权重文件](https://pan.baidu.com/s/1bkoJKmdOcvLhOFXHVkFlKA) + +7.获取benchmark工具 +将benchmark.x86_64放在当前目录 + +8.310上执行,执行时确保device空闲 +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets diff --git a/ACL_PyTorch/contrib/cv/super_resolution/RCAN/test/parse.py b/ACL_PyTorch/contrib/cv/super_resolution/RCAN/test/parse.py index 21ac0dab63a0d58564d4bc04255a7e468ec8e191..86e265cb0697765c5ff926c2801d0be752452f1d 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/RCAN/test/parse.py +++ b/ACL_PyTorch/contrib/cv/super_resolution/RCAN/test/parse.py @@ -1,33 +1,33 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as fp: - content = json.load(fp) - PSNR = content['PSNR'] - SSIM = content['SSIM'] - print('om {} PSNR:{} SSIM:{}'.format(result_json.split('_')[1].split('.')[0], PSNR, SSIM)) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as fp: + content = json.load(fp) + PSNR = content['PSNR'] + SSIM = content['SSIM'] + print('om {} PSNR:{} SSIM:{}'.format(result_json.split('_')[1].split('.')[0], PSNR, SSIM)) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/super_resolution/RDN/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/super_resolution/RDN/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/RDN/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/super_resolution/RDN/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/super_resolution/RDN/test/README.md b/ACL_PyTorch/contrib/cv/super_resolution/RDN/test/README.md index f3fc08cb2b06551c60258305ba490240cb1d3ad8..bcf77d3efe1349a6a38fa4c136d4fa775d73d995 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/RDN/test/README.md +++ b/ACL_PyTorch/contrib/cv/super_resolution/RDN/test/README.md @@ -1,52 +1,52 @@ -## RDN模型PyTorch离线推理指导 - -### 1 环境准备 - -1. 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - - ```python - pip3.7 install -r requirements.txt - ``` - -2. 获取开源模型代码 - - ``` - git clone https://github.com/yjn870/RDN-pytorch -b master - ``` - - 开源模型代码仓没有安装脚本,可以通过sys.path.append(r"./RDN-pytorch")添加搜索路径,然后在pth2onnx脚本中就可以引用模型代码的函数或类 - -3. 获取权重文件 - - [RDN_x2预训练pth权重文件](https://www.dropbox.com/s/pd52pkmaik1ri0h/rdn_x2.pth?dl=0) - -4. 数据集 - - 开源代码仓只提供了h5格式的Set5数据集,由于代码仓评测精度的脚本采用png格式的图片作为输入,可通过[Set5](https://github.com/hengchuan/RDN-TensorFlow/tree/master/Test/Set5)下载png格式的Set5数据集,并将文件夹重命名为set5,数据集放在/root/datasets目录 - -5. 获取benchmark工具 - - 将benchmark.x86_64放到当前目录 - -6. TransposeD算子性能优化 - - 由于om模型中存在低性能的TransposeD算子,通过添加白名单使用高性能的Transpose算子。/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/opp/op_impl/built-in/ai_core/tbe/impl/dynamic/transpose.py里添加shape白名单:[1, 64, 2, 2, 114, 114] - -### 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 - -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/root/datasets -``` - - - -**评测结果:** - -| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | -| :-----: | :---------: | :-------------: | :--------: | :-------: | -| RDN bs1 | PSNR:38.18 | PSNR:38.27 | fps:25.393 | fps:29.577 | - +## RDN模型PyTorch离线推理指导 + +### 1 环境准备 + +1. 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + + ```python + pip3.7 install -r requirements.txt + ``` + +2. 获取开源模型代码 + + ``` + git clone https://github.com/yjn870/RDN-pytorch -b master + ``` + + 开源模型代码仓没有安装脚本,可以通过sys.path.append(r"./RDN-pytorch")添加搜索路径,然后在pth2onnx脚本中就可以引用模型代码的函数或类 + +3. 获取权重文件 + + [RDN_x2预训练pth权重文件](https://www.dropbox.com/s/pd52pkmaik1ri0h/rdn_x2.pth?dl=0) + +4. 数据集 + + 开源代码仓只提供了h5格式的Set5数据集,由于代码仓评测精度的脚本采用png格式的图片作为输入,可通过[Set5](https://github.com/hengchuan/RDN-TensorFlow/tree/master/Test/Set5)下载png格式的Set5数据集,并将文件夹重命名为set5,数据集放在/root/datasets目录 + +5. 获取benchmark工具 + + 将benchmark.x86_64放到当前目录 + +6. TransposeD算子性能优化 + + 由于om模型中存在低性能的TransposeD算子,通过添加白名单使用高性能的Transpose算子。/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/opp/op_impl/built-in/ai_core/tbe/impl/dynamic/transpose.py里添加shape白名单:[1, 64, 2, 2, 114, 114] + +### 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 + +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/root/datasets +``` + + + +**评测结果:** + +| 模型 | 官网pth精度 | 310离线推理精度 | 基准性能 | 310性能 | +| :-----: | :---------: | :-------------: | :--------: | :-------: | +| RDN bs1 | PSNR:38.18 | PSNR:38.27 | fps:25.393 | fps:29.577 | + - 因Set5数据集只有5张图片,因此仅使用了bs1进行评测。 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/README.md b/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/README.md index e37cba67a59eafa18ed5eae1e0f6b46f8e2f6a8e..c3f42ecb9b0e0c0dd58782d3113ce77a32d501d6 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/README.md +++ b/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/README.md @@ -1,49 +1,49 @@ -# Real-ESRGAN-baseline模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 -``` -pip3.7 install -r requirements.txt -``` - -2.获取与安装开源模型代码 -``` -git clone https://github.com/xinntao/Real-ESRGAN.git -cd Real-ESRGAN -git reset c9023b3d7a5b711b0505a3e39671e3faab9de1fe --hard -``` - -3.获取权重文件 - -将权重文件[RealESRGAN_x4plus.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth)放到experiments/pretrained_models/目录 -``` - mkdir -p experiments/pretrained_models - wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P experiments/pretrained_models -``` - -4.数据集 -获取推理数据集:推理数据集代码仓已提供,并且放置在代码仓./Real-ESRGAN/inputs目录 - -5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) -将benchmark.x86_64或benchmark.aarch64放到当前工作目录 - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=./Real-ESRGAN -``` - **评测结果:** -| 模型 | 基准性能 | 310性能 | -| :------: | :------: | :------: | -| Real-ESRGAN bs1 | 55.132fps | 139.502fps | -| Real-ESRGAN bs16 | 72.923fps | 117.636fps | - -备注: -加上TEST.NECK_FEAT "('before')" TEST.FEAT_NORM "('no')"导出的onnx可以进行离线推理 -不加TEST.NECK_FEAT "('before')" TEST.FEAT_NORM "('no')"导出的onnx转换的om精度与官网精度一致 - - - +# Real-ESRGAN-baseline模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 +``` +pip3.7 install -r requirements.txt +``` + +2.获取与安装开源模型代码 +``` +git clone https://github.com/xinntao/Real-ESRGAN.git +cd Real-ESRGAN +git reset c9023b3d7a5b711b0505a3e39671e3faab9de1fe --hard +``` + +3.获取权重文件 + +将权重文件[RealESRGAN_x4plus.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth)放到experiments/pretrained_models/目录 +``` + mkdir -p experiments/pretrained_models + wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P experiments/pretrained_models +``` + +4.数据集 +获取推理数据集:推理数据集代码仓已提供,并且放置在代码仓./Real-ESRGAN/inputs目录 + +5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) +将benchmark.x86_64或benchmark.aarch64放到当前工作目录 + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=./Real-ESRGAN +``` + **评测结果:** +| 模型 | 基准性能 | 310性能 | +| :------: | :------: | :------: | +| Real-ESRGAN bs1 | 55.132fps | 139.502fps | +| Real-ESRGAN bs16 | 72.923fps | 117.636fps | + +备注: +加上TEST.NECK_FEAT "('before')" TEST.FEAT_NORM "('no')"导出的onnx可以进行离线推理 +不加TEST.NECK_FEAT "('before')" TEST.FEAT_NORM "('no')"导出的onnx转换的om精度与官网精度一致 + + + diff --git a/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/Real-ESRGAN_postprocess.py b/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/Real-ESRGAN_postprocess.py index a420e1ef318a764f79b3ae73ec89f4a1d6bfa063..c27bc47051e94af0583da09c6d9b3f5bf0bf55a4 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/Real-ESRGAN_postprocess.py +++ b/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/Real-ESRGAN_postprocess.py @@ -1,16 +1,16 @@ -# Copyright 2021 Huawei Technologies Co., Ltd +# Copyright 2021 Huawei Technologies Co., Ltd # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import numpy as np import matplotlib diff --git a/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/Real-ESRGAN_preprocess.py b/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/Real-ESRGAN_preprocess.py index 7bd8d21b149a3b3a856bf1634f1937b0037f3548..a6b0c71b97bd7d4b28ec643959a71c2bce0704e9 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/Real-ESRGAN_preprocess.py +++ b/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/Real-ESRGAN_preprocess.py @@ -1,17 +1,17 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from PIL import Image from torchvision.transforms import transforms diff --git a/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/get_dataset_info.py b/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/get_dataset_info.py index 7ff00e26a93ec0b134ada8c5d6734ecf4230df5b..3024744fe86fad12edb8420870b9266c0fab8855 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/get_dataset_info.py +++ b/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/get_dataset_info.py @@ -1,63 +1,63 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cv2 -from glob import glob - - -def get_bin_info(file_path, info_name, width, height): - bin_images = glob(os.path.join(file_path, '*.bin')) - print(bin_images) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_images): - content = ' '.join([str(index), img, width, height]) - file.write(content) - file.write('\n') - - -def get_jpg_info(file_path, info_name): - extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] - image_names = [] - for extension in extensions: - image_names.append(glob(os.path.join(file_path, '*.' + extension))) - with open(info_name, 'w') as file: - for image_name in image_names: - if len(image_name) == 0: - continue - else: - for index, img in enumerate(image_name): - img_cv = cv2.imread(img) - shape = img_cv.shape - width, height = shape[1], shape[0] - content = ' '.join([str(index), img, str(width), str(height)]) - file.write(content) - file.write('\n') - - -if __name__ == '__main__': - file_type = sys.argv[1] - file_path = sys.argv[2] - info_name = sys.argv[3] - - if file_type == 'bin': - - width = sys.argv[4] - height = sys.argv[5] - assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' - get_bin_info(file_path, info_name, width, height) - elif file_type == 'jpg': - assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + print(bin_images) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + + if file_type == 'bin': + + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' get_jpg_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/modelzoo_level.txt index 9e95396651cc4382fe60ee1ee053674f527a448c..27e6c78b37535fe4f5a17029546fe257ad164d34 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:POK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/test/parse.py b/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/test/parse.py index c3cc8bf85b0b8994cf08821af9d83276412a22bf..83170a37074cc8524e7201d9bfea401253feb15d 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/test/parse.py +++ b/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN/test/parse.py @@ -1,26 +1,26 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list=re.split('=|,',content) - fps = float(txt_data_list[1].replace('samples/s', '')) * 4 +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list=re.split('=|,',content) + fps = float(txt_data_list[1].replace('samples/s', '')) * 4 print('310 {} fps:{}'.format(re.split('-|_',result_txt)[4], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/super_resolution/SRCNN/LICENSE b/ACL_PyTorch/contrib/cv/super_resolution/SRCNN/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/SRCNN/LICENSE +++ b/ACL_PyTorch/contrib/cv/super_resolution/SRCNN/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/super_resolution/SRCNN/requirements.txt b/ACL_PyTorch/contrib/cv/super_resolution/SRCNN/requirements.txt index 399dbfed087e9d139235f30e8d5991e803b92edd..fbd453de267138a05ccf06d3be32a3d4eb8f68fc 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/SRCNN/requirements.txt +++ b/ACL_PyTorch/contrib/cv/super_resolution/SRCNN/requirements.txt @@ -1,6 +1,6 @@ -torch == 1.5.0 -torchvision == 0.6.0 -onnx == 1.9.0 -numpy == 1.19.2 -Pillow == 8.2.0 +torch == 1.5.0 +torchvision == 0.6.0 +onnx == 1.9.0 +numpy == 1.19.2 +Pillow == 8.2.0 opencv-python == 4.5.2 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/super_resolution/SRCNN/test/parse.py b/ACL_PyTorch/contrib/cv/super_resolution/SRCNN/test/parse.py index b9c74f41d7848e1250356f14472b237a18bb3489..82af69cd183218c3263723c20b652b3f7ec2bc27 100644 --- a/ACL_PyTorch/contrib/cv/super_resolution/SRCNN/test/parse.py +++ b/ACL_PyTorch/contrib/cv/super_resolution/SRCNN/test/parse.py @@ -1,32 +1,32 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] - print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + tops = [i.get('value') for i in json.loads(content).get('value') if 'Top' in i.get('key')] + print('om {} top1:{} top5:{}'.format(result_json.split('_')[1].split('.')[0], tops[0], tops[4])) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/tracking/SiamFC/LICENSE b/ACL_PyTorch/contrib/cv/tracking/SiamFC/LICENSE index b1fac45f02e2f98395fd96a7e4f4a39e257ac0bc..989e2c59e973a05cfbfe9de678b7f2af777b0713 100644 --- a/ACL_PyTorch/contrib/cv/tracking/SiamFC/LICENSE +++ b/ACL_PyTorch/contrib/cv/tracking/SiamFC/LICENSE @@ -1,201 +1,201 @@ -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/tracking/SiamFC/README.md b/ACL_PyTorch/contrib/cv/tracking/SiamFC/README.md index fcdb28c3678aa82748bd54f0a3460f6b806e5eac..0e15dcde5c6e6ab4fee61e5334d179967e67715e 100644 --- a/ACL_PyTorch/contrib/cv/tracking/SiamFC/README.md +++ b/ACL_PyTorch/contrib/cv/tracking/SiamFC/README.md @@ -1,50 +1,50 @@ -# SiamFC模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - -``` -pip3.7 install -r requirements.txt -``` - -2.获取,修改与安装开源模型代码 -``` -代码地址:https://github.com/HonglinChu/SiamTrackers/tree/master/2-SiamFC/SiamFC-VID -论文地址:https://arxiv.org/pdf/1606.09549.pdf -``` -3.获取权重文件 - -采用Ascend910上训练得到的权重文件[siamfc.pth](https://pan.baidu.com/s/1N3Igj4ZgntjRevsGA5xOTQ),提取码:4i4l,放置于本代码仓./pth目录下 - -4.数据集 -[获取OTB2015数据集]([Visual Tracker Benchmark (hanyang.ac.kr)](http://cvlab.hanyang.ac.kr/tracker_benchmark/datasets.html)),并重命名为OTB,默认存放在/opt/npu目录下 - -5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) -将benchmark.x86_64或benchmark.aarch64放到当前目录 - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh --datasets_path=/opt/npu -``` -> datasets_path参数根据数据集实际的存放位置而定,例如:OTB数据集存放位置为/opt/npu/OTB,则应设置参数--datasets_path=/opt/npu - - **评测结果:** - -| 模型 | pth在线推理精度 | 310离线推理精度 | -| :--------: | :------------------------------------------: | :------------------------------------------: | -| siamfc_bs1 | success_score: 0.576 precision_score: 0.767 | success_score: 0.571 precision_score: 0.760 | - -| 模型 | Benchmark性能 | 310性能 | -| :------: | :------: | :------: | -| exemplar_bs1 | 4240fps | 5677fps | -| search_bs1 | 738fps | 862fps | - -> 由于该模型无法进行常规的离线测试,因而改为对测试集的每一帧进行测试,exemplar_bs1和search_bs1分别对应模型中的两个分支,它们所进行的操作不同。 -> -> siamfc_bs1由exemplar_bs1和search_bs1这两部分组成,在评测精度时给出siamfc_bs1的精度,在评测性能时分别给出exemplar_bs1和search_bs1的性能。 - - +# SiamFC模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + +``` +pip3.7 install -r requirements.txt +``` + +2.获取,修改与安装开源模型代码 +``` +代码地址:https://github.com/HonglinChu/SiamTrackers/tree/master/2-SiamFC/SiamFC-VID +论文地址:https://arxiv.org/pdf/1606.09549.pdf +``` +3.获取权重文件 + +采用Ascend910上训练得到的权重文件[siamfc.pth](https://pan.baidu.com/s/1N3Igj4ZgntjRevsGA5xOTQ),提取码:4i4l,放置于本代码仓./pth目录下 + +4.数据集 +[获取OTB2015数据集]([Visual Tracker Benchmark (hanyang.ac.kr)](http://cvlab.hanyang.ac.kr/tracker_benchmark/datasets.html)),并重命名为OTB,默认存放在/opt/npu目录下 + +5.[获取benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer) +将benchmark.x86_64或benchmark.aarch64放到当前目录 + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh --datasets_path=/opt/npu +``` +> datasets_path参数根据数据集实际的存放位置而定,例如:OTB数据集存放位置为/opt/npu/OTB,则应设置参数--datasets_path=/opt/npu + + **评测结果:** + +| 模型 | pth在线推理精度 | 310离线推理精度 | +| :--------: | :------------------------------------------: | :------------------------------------------: | +| siamfc_bs1 | success_score: 0.576 precision_score: 0.767 | success_score: 0.571 precision_score: 0.760 | + +| 模型 | Benchmark性能 | 310性能 | +| :------: | :------: | :------: | +| exemplar_bs1 | 4240fps | 5677fps | +| search_bs1 | 738fps | 862fps | + +> 由于该模型无法进行常规的离线测试,因而改为对测试集的每一帧进行测试,exemplar_bs1和search_bs1分别对应模型中的两个分支,它们所进行的操作不同。 +> +> siamfc_bs1由exemplar_bs1和search_bs1这两部分组成,在评测精度时给出siamfc_bs1的精度,在评测性能时分别给出exemplar_bs1和search_bs1的性能。 + + diff --git a/ACL_PyTorch/contrib/cv/tracking/SiamFC/get_perf_data.py b/ACL_PyTorch/contrib/cv/tracking/SiamFC/get_perf_data.py index c8c69d6b8ba9bfa0f15e1cbb28a002436612d870..c0d104efd2130a607cddb5e5bbaf9f3ea1747fe3 100644 --- a/ACL_PyTorch/contrib/cv/tracking/SiamFC/get_perf_data.py +++ b/ACL_PyTorch/contrib/cv/tracking/SiamFC/get_perf_data.py @@ -1,64 +1,64 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import torch -import sys - - -total = 5000 -exemplar_size = (1, 3, 127, 127) -search_size = (1, 9, 255, 255) - - -class ExperimentPerformance(object): - def __init__(self): - super(ExperimentPerformance, self).__init__() - - def run(self, savepath_e, savepath_s, infopath_e, infopath_s): - for i in range(total): - exemplar_input = torch.randn(exemplar_size) - exemplar_input = np.array(exemplar_input).astype(np.float32) - exemplar_name = "exemplar{}".format(i) - exemplar_path = os.path.join(savepath_e, exemplar_name + ".bin") - exemplar_input.tofile(exemplar_path) - with open(infopath_e, 'a') as file: - content = ' '.join([str(i), exemplar_path, str(127), str(127)]) - file.write(content) - file.write('\n') - - search_input = torch.randn(search_size) - search_input = np.array(search_input).astype(np.float32) - search_name = "search{}".format(i) - search_path = os.path.join(savepath_s, search_name + ".bin") - search_input.tofile(search_path) - with open(infopath_s, 'a') as file: - content = ' '.join([str(i), search_path, str(255), str(255)]) - file.write(content) - file.write('\n') - - -if __name__ == "__main__": - save_path_e = sys.argv[1] - save_path_s = sys.argv[2] - info_path_e = sys.argv[3] - info_path_s = sys.argv[4] - if not os.path.exists(save_path_e): - os.makedirs(save_path_e) - if not os.path.exists(save_path_s): - os.makedirs(save_path_s) - e = ExperimentPerformance() - e.run(save_path_e, save_path_s, info_path_e, info_path_s) - print("Data For Performance Ready.") +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import torch +import sys + + +total = 5000 +exemplar_size = (1, 3, 127, 127) +search_size = (1, 9, 255, 255) + + +class ExperimentPerformance(object): + def __init__(self): + super(ExperimentPerformance, self).__init__() + + def run(self, savepath_e, savepath_s, infopath_e, infopath_s): + for i in range(total): + exemplar_input = torch.randn(exemplar_size) + exemplar_input = np.array(exemplar_input).astype(np.float32) + exemplar_name = "exemplar{}".format(i) + exemplar_path = os.path.join(savepath_e, exemplar_name + ".bin") + exemplar_input.tofile(exemplar_path) + with open(infopath_e, 'a') as file: + content = ' '.join([str(i), exemplar_path, str(127), str(127)]) + file.write(content) + file.write('\n') + + search_input = torch.randn(search_size) + search_input = np.array(search_input).astype(np.float32) + search_name = "search{}".format(i) + search_path = os.path.join(savepath_s, search_name + ".bin") + search_input.tofile(search_path) + with open(infopath_s, 'a') as file: + content = ' '.join([str(i), search_path, str(255), str(255)]) + file.write(content) + file.write('\n') + + +if __name__ == "__main__": + save_path_e = sys.argv[1] + save_path_s = sys.argv[2] + info_path_e = sys.argv[3] + info_path_s = sys.argv[4] + if not os.path.exists(save_path_e): + os.makedirs(save_path_e) + if not os.path.exists(save_path_s): + os.makedirs(save_path_s) + e = ExperimentPerformance() + e.run(save_path_e, save_path_s, info_path_e, info_path_s) + print("Data For Performance Ready.") diff --git a/ACL_PyTorch/contrib/cv/tracking/SiamFC/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/tracking/SiamFC/modelzoo_level.txt index 51b74557c15082ae794632436e724456a0fdcfde..5c956b09db3bcabe37a0665848500a6419125fee 100644 --- a/ACL_PyTorch/contrib/cv/tracking/SiamFC/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/tracking/SiamFC/modelzoo_level.txt @@ -1,6 +1,6 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK -PerfStatus:OK -ModelConvert:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK +PerfStatus:OK +ModelConvert:OK QuantStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/tracking/SiamFC/prepostprocess.py b/ACL_PyTorch/contrib/cv/tracking/SiamFC/prepostprocess.py index e12937034e2d28d06d3097dec42f078e15e429b1..930ab71b0432f54ce5bdbb361c9dac9f1b8ff5a8 100644 --- a/ACL_PyTorch/contrib/cv/tracking/SiamFC/prepostprocess.py +++ b/ACL_PyTorch/contrib/cv/tracking/SiamFC/prepostprocess.py @@ -1,158 +1,158 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import cv2 -import torch -import torch.nn.functional as F -import numpy as np -from utils import ToTensor, get_exemplar_image, get_pyramid_instance_image -import struct - - -exemplar_size = 127 # exemplar size z -instance_size = 255 # instance size x -context_amount = 0.5 # context amount -num_scale = 3 # number of scales -scale_step = 1.0375 # scale step of instance image -scale_penalty = 0.9745 # scale penalty -scale_lr = 0.59 # scale learning rate -response_up_stride = 16 # response upsample stride -response_sz = 17 # response size -window_influence = 0.176 # window influence -total_stride = 8 # total stride of backbone - - -class PrePostProcess(object): - def __init__(self): - self.penalty = np.ones((num_scale)) * scale_penalty - self.penalty[num_scale // 2] = 1 # [0.9745, 1, 0.9745] - - # create cosine window upsample stride=2^4=16, heatmap 17x17 - self.interp_response_sz = response_up_stride * response_sz # 272=16x17 - self.cosine_window = self._cosine_window((self.interp_response_sz, self.interp_response_sz)) - - def _cosine_window(self, size): - """ - get the cosine window - """ - cos_window = np.hanning(int(size[0]))[:, np.newaxis].dot(np.hanning(int(size[1]))[np.newaxis, :]) - cos_window = cos_window.astype(np.float32) - cos_window /= np.sum(cos_window) - return cos_window - - def cropexemplar(self, frame, box, save_path, file_name): - """ - Args: - frame: an RGB image - box: one-based bounding box [x, y, width, height] - """ - self.bbox = (box[0] - 1, box[1] - 1, box[0] - 1 + box[2], box[1] - 1 + box[3]) # zero based x1,y1,x2,y2 - self.pos = np.array([box[0] - 1 + (box[2]) / 2, box[1] - 1 + (box[3]) / 2]) # zero based cx, cy, - self.target_sz = np.array([box[2], box[3]]) # zero based w, h - - # get exemplar img - self.img_mean = tuple(map(int, frame.mean(axis=(0, 1)))) - exemplar_img, scale_z, s_z = get_exemplar_image(frame, self.bbox, - exemplar_size, context_amount, self.img_mean) - - # create scales: 0.96, 1, 1.037 - self.scales = scale_step ** np.arange(np.ceil(num_scale / 2) - num_scale, - np.floor(num_scale / 2) + 1) - - # create s_x : instance is twice as large as exemplar - self.s_x = s_z + (instance_size - exemplar_size) / scale_z # s-x search_sz, s-z exemplar_sz - - # arbitrary scale saturation - self.min_s_x = 0.2 * self.s_x - self.max_s_x = 5 * self.s_x - - # get exemplar feature - # m1: use torchvision.transforms - # exemplar_img = self.transforms(exemplar_img)[None, :, :, :] # 1,3,127,127 - # m2: don't use torchvision.transforms - exemplar_img = ToTensor(exemplar_img) - img = np.array(exemplar_img).astype(np.float32) - path = os.path.join(save_path, file_name.split('.')[0].replace('/', '-') + ".bin") - img.tofile(path) - return path - - def cropsearch(self, frame, save_path, file_name): - size_x_scales = self.s_x * self.scales # multi-scale search - pyramid = get_pyramid_instance_image(frame, self.pos, instance_size, size_x_scales, self.img_mean) - # m1: use torchvision.transforms - # instance_imgs = torch.cat([self.transforms(x)[None, :, :, :] for x in pyramid], dim=0) # 3, 3, 255, 255 - # m2: don't use torchvision.transforms - instance_imgs = torch.cat([ToTensor(x) for x in pyramid], dim=1) # 3, 3, 255, 255 - img = np.array(instance_imgs).astype(np.float32) - path = os.path.join(save_path, file_name.split('.')[0].replace('/', '-') + ".bin") - img.tofile(path) - return path - - def postprocess(self, x_f, z_f): - # x_f:search z_f:exemplar - response_maps = F.conv2d(x_f, z_f, groups=3) - response_maps = response_maps.transpose(0, 1) - response_maps = response_maps.numpy().squeeze() # 3, 17, 17 - - response_maps_up = [cv2.resize(x, (self.interp_response_sz, self.interp_response_sz), cv2.INTER_CUBIC) - for x in response_maps] # upsample - - # get max score of each scale - max_score = np.array([x.max() for x in response_maps_up]) * self.penalty # penalty=[0.9745, 1, 0.9745] - - # penalty scale change - scale_idx = max_score.argmax() - response_map = response_maps_up[scale_idx] - response_map -= response_map.min() - response_map /= response_map.sum() - response_map = (1 - window_influence) * response_map + \ - window_influence * self.cosine_window - max_r, max_c = np.unravel_index(response_map.argmax(), response_map.shape) - # displacement in interpolation response - disp_response_interp = np.array([max_c, max_r]) - (self.interp_response_sz - 1) / 2. - # displacement in input, response_up_stride=16, total_stride=8 - disp_response_input = disp_response_interp * total_stride / response_up_stride - # displacement in frame - scale = self.scales[scale_idx] # - disp_response_frame = disp_response_input * (self.s_x * scale) / instance_size - # position in frame coordinates - self.pos += disp_response_frame - # scale damping and saturation - self.s_x *= ((1 - scale_lr) + scale_lr * scale) # update - self.s_x = max(self.min_s_x, min(self.max_s_x, self.s_x)) - self.target_sz = ((1 - scale_lr) + scale_lr * scale) * self.target_sz # update - - box = np.array([ - self.pos[0] + 1 - (self.target_sz[0]) / 2, - self.pos[1] + 1 - (self.target_sz[1]) / 2, - self.target_sz[0], self.target_sz[1]]) - - return box - - def file2tensor(self, filepath, shape): - size = os.path.getsize(filepath) - res = [] - L = int(size / 4) # float32, so 4bytes - binfile = open(filepath, 'rb') - for i in range(L): - data = binfile.read(4) - num = struct.unpack('f', data) - res.append(num[0]) - binfile.close() - - dim_res = np.array(res).reshape(shape) - tensor_res = torch.tensor(dim_res, dtype=torch.float32) - - return tensor_res +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import cv2 +import torch +import torch.nn.functional as F +import numpy as np +from utils import ToTensor, get_exemplar_image, get_pyramid_instance_image +import struct + + +exemplar_size = 127 # exemplar size z +instance_size = 255 # instance size x +context_amount = 0.5 # context amount +num_scale = 3 # number of scales +scale_step = 1.0375 # scale step of instance image +scale_penalty = 0.9745 # scale penalty +scale_lr = 0.59 # scale learning rate +response_up_stride = 16 # response upsample stride +response_sz = 17 # response size +window_influence = 0.176 # window influence +total_stride = 8 # total stride of backbone + + +class PrePostProcess(object): + def __init__(self): + self.penalty = np.ones((num_scale)) * scale_penalty + self.penalty[num_scale // 2] = 1 # [0.9745, 1, 0.9745] + + # create cosine window upsample stride=2^4=16, heatmap 17x17 + self.interp_response_sz = response_up_stride * response_sz # 272=16x17 + self.cosine_window = self._cosine_window((self.interp_response_sz, self.interp_response_sz)) + + def _cosine_window(self, size): + """ + get the cosine window + """ + cos_window = np.hanning(int(size[0]))[:, np.newaxis].dot(np.hanning(int(size[1]))[np.newaxis, :]) + cos_window = cos_window.astype(np.float32) + cos_window /= np.sum(cos_window) + return cos_window + + def cropexemplar(self, frame, box, save_path, file_name): + """ + Args: + frame: an RGB image + box: one-based bounding box [x, y, width, height] + """ + self.bbox = (box[0] - 1, box[1] - 1, box[0] - 1 + box[2], box[1] - 1 + box[3]) # zero based x1,y1,x2,y2 + self.pos = np.array([box[0] - 1 + (box[2]) / 2, box[1] - 1 + (box[3]) / 2]) # zero based cx, cy, + self.target_sz = np.array([box[2], box[3]]) # zero based w, h + + # get exemplar img + self.img_mean = tuple(map(int, frame.mean(axis=(0, 1)))) + exemplar_img, scale_z, s_z = get_exemplar_image(frame, self.bbox, + exemplar_size, context_amount, self.img_mean) + + # create scales: 0.96, 1, 1.037 + self.scales = scale_step ** np.arange(np.ceil(num_scale / 2) - num_scale, + np.floor(num_scale / 2) + 1) + + # create s_x : instance is twice as large as exemplar + self.s_x = s_z + (instance_size - exemplar_size) / scale_z # s-x search_sz, s-z exemplar_sz + + # arbitrary scale saturation + self.min_s_x = 0.2 * self.s_x + self.max_s_x = 5 * self.s_x + + # get exemplar feature + # m1: use torchvision.transforms + # exemplar_img = self.transforms(exemplar_img)[None, :, :, :] # 1,3,127,127 + # m2: don't use torchvision.transforms + exemplar_img = ToTensor(exemplar_img) + img = np.array(exemplar_img).astype(np.float32) + path = os.path.join(save_path, file_name.split('.')[0].replace('/', '-') + ".bin") + img.tofile(path) + return path + + def cropsearch(self, frame, save_path, file_name): + size_x_scales = self.s_x * self.scales # multi-scale search + pyramid = get_pyramid_instance_image(frame, self.pos, instance_size, size_x_scales, self.img_mean) + # m1: use torchvision.transforms + # instance_imgs = torch.cat([self.transforms(x)[None, :, :, :] for x in pyramid], dim=0) # 3, 3, 255, 255 + # m2: don't use torchvision.transforms + instance_imgs = torch.cat([ToTensor(x) for x in pyramid], dim=1) # 3, 3, 255, 255 + img = np.array(instance_imgs).astype(np.float32) + path = os.path.join(save_path, file_name.split('.')[0].replace('/', '-') + ".bin") + img.tofile(path) + return path + + def postprocess(self, x_f, z_f): + # x_f:search z_f:exemplar + response_maps = F.conv2d(x_f, z_f, groups=3) + response_maps = response_maps.transpose(0, 1) + response_maps = response_maps.numpy().squeeze() # 3, 17, 17 + + response_maps_up = [cv2.resize(x, (self.interp_response_sz, self.interp_response_sz), cv2.INTER_CUBIC) + for x in response_maps] # upsample + + # get max score of each scale + max_score = np.array([x.max() for x in response_maps_up]) * self.penalty # penalty=[0.9745, 1, 0.9745] + + # penalty scale change + scale_idx = max_score.argmax() + response_map = response_maps_up[scale_idx] + response_map -= response_map.min() + response_map /= response_map.sum() + response_map = (1 - window_influence) * response_map + \ + window_influence * self.cosine_window + max_r, max_c = np.unravel_index(response_map.argmax(), response_map.shape) + # displacement in interpolation response + disp_response_interp = np.array([max_c, max_r]) - (self.interp_response_sz - 1) / 2. + # displacement in input, response_up_stride=16, total_stride=8 + disp_response_input = disp_response_interp * total_stride / response_up_stride + # displacement in frame + scale = self.scales[scale_idx] # + disp_response_frame = disp_response_input * (self.s_x * scale) / instance_size + # position in frame coordinates + self.pos += disp_response_frame + # scale damping and saturation + self.s_x *= ((1 - scale_lr) + scale_lr * scale) # update + self.s_x = max(self.min_s_x, min(self.max_s_x, self.s_x)) + self.target_sz = ((1 - scale_lr) + scale_lr * scale) * self.target_sz # update + + box = np.array([ + self.pos[0] + 1 - (self.target_sz[0]) / 2, + self.pos[1] + 1 - (self.target_sz[1]) / 2, + self.target_sz[0], self.target_sz[1]]) + + return box + + def file2tensor(self, filepath, shape): + size = os.path.getsize(filepath) + res = [] + L = int(size / 4) # float32, so 4bytes + binfile = open(filepath, 'rb') + for i in range(L): + data = binfile.read(4) + num = struct.unpack('f', data) + res.append(num[0]) + binfile.close() + + dim_res = np.array(res).reshape(shape) + tensor_res = torch.tensor(dim_res, dtype=torch.float32) + + return tensor_res diff --git a/ACL_PyTorch/contrib/cv/tracking/SiamFC/pth2onnx.py b/ACL_PyTorch/contrib/cv/tracking/SiamFC/pth2onnx.py index d864e0ba0680b635dda54c958ee842061fed9c77..e68b3bce6cdc51ce8471158dab1c1e4f7effc2d5 100644 --- a/ACL_PyTorch/contrib/cv/tracking/SiamFC/pth2onnx.py +++ b/ACL_PyTorch/contrib/cv/tracking/SiamFC/pth2onnx.py @@ -1,118 +1,118 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import torch -import torch.nn as nn -import torch.onnx -import torch.nn.functional as F - - -response_scale = 1e-3 - - -class SiameseAlexNet(nn.Module): - def __init__(self): - super(SiameseAlexNet, self).__init__() - self.features = nn.Sequential( - nn.Conv2d(3, 96, 11, 2), - nn.BatchNorm2d(96), - nn.ReLU(inplace=True), - nn.MaxPool2d(3, 2), - nn.Conv2d(96, 256, 5, 1, groups=2), - nn.BatchNorm2d(256), - nn.ReLU(inplace=True), - nn.MaxPool2d(3, 2), - nn.Conv2d(256, 384, 3, 1), - nn.BatchNorm2d(384), - nn.ReLU(inplace=True), - nn.Conv2d(384, 384, 3, 1, groups=2), - nn.BatchNorm2d(384), - nn.ReLU(inplace=True), - nn.Conv2d(384, 256, 3, 1, groups=2) - ) - self.corr_bias = nn.Parameter(torch.zeros(1)) - self.exemplar = None - - def init_weights(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight.data, mode='fan_out', nonlinearity='relu') - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - - def forward(self, x): - exemplar, instance = x # x = ( exemplar, instance ) - # train - if exemplar is not None and instance is not None: # - batch_size = exemplar.shape[0] # - exemplar = self.features(exemplar) # batch, 256, 6, 6 - instance = self.features(instance) # batch, 256, 20, 20 - N, C, H, W = instance.shape - instance = instance.view(1, -1, H, W) - score = F.conv2d(instance, exemplar, groups=N) * response_scale + self.corr_bias - return score.transpose(0, 1) - # test(first frame) - elif exemplar is not None and instance is None: - self.exemplar = self.features(exemplar) # 1, 256, 6, 6 - self.exemplar = torch.cat([self.exemplar for _ in range(3)], dim=0) # 3, 256, 6, 6 - return self.exemplar - # test(not first frame) - else: - # inference used we don't need to scale the response or add bias - _, _, H, W = instance.shape - instance = instance.reshape(3, 3, H, W) - instance = self.features(instance) # 3 scale - N, C, H, W = instance.shape - instance = instance.view(1, N*C, H, W) # 1, NxC, H, W - # score = F.conv2d(instance, self.exemplar, groups=N) - # return score.transpose(0, 1) - return instance - - -def exemplar_convert(input_file, output_file): - model = SiameseAlexNet() - model.load_state_dict(torch.load(input_file, map_location='cpu')) - model.eval() - - input_names = ["actual_input_1"] - output_names = ["output1"] - input1 = torch.randn(1, 3, 127, 127) - input2 = None - dummy_input = [input1, input2] - torch.onnx.export(model, dummy_input, output_file, input_names=input_names, output_names=output_names, - opset_version=11) - - -def search_convert(input_file, output_file): - model = SiameseAlexNet() - model.load_state_dict(torch.load(input_file, map_location='cpu')) - model.eval() - - input_names = ["actual_input_1"] - output_names = ["output1"] - input1 = None - input2 = torch.randn(1, 9, 255, 255) - dummy_input = [input1, input2] - torch.onnx.export(model, dummy_input, output_file, input_names=input_names, output_names=output_names, - opset_version=11) - - -if __name__ == "__main__": - input_file = sys.argv[1] - output_file_exemplar = sys.argv[2] - output_file_search = sys.argv[3] - exemplar_convert(input_file, output_file_exemplar) - search_convert(input_file, output_file_search) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import torch.nn as nn +import torch.onnx +import torch.nn.functional as F + + +response_scale = 1e-3 + + +class SiameseAlexNet(nn.Module): + def __init__(self): + super(SiameseAlexNet, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(3, 96, 11, 2), + nn.BatchNorm2d(96), + nn.ReLU(inplace=True), + nn.MaxPool2d(3, 2), + nn.Conv2d(96, 256, 5, 1, groups=2), + nn.BatchNorm2d(256), + nn.ReLU(inplace=True), + nn.MaxPool2d(3, 2), + nn.Conv2d(256, 384, 3, 1), + nn.BatchNorm2d(384), + nn.ReLU(inplace=True), + nn.Conv2d(384, 384, 3, 1, groups=2), + nn.BatchNorm2d(384), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, 3, 1, groups=2) + ) + self.corr_bias = nn.Parameter(torch.zeros(1)) + self.exemplar = None + + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight.data, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def forward(self, x): + exemplar, instance = x # x = ( exemplar, instance ) + # train + if exemplar is not None and instance is not None: # + batch_size = exemplar.shape[0] # + exemplar = self.features(exemplar) # batch, 256, 6, 6 + instance = self.features(instance) # batch, 256, 20, 20 + N, C, H, W = instance.shape + instance = instance.view(1, -1, H, W) + score = F.conv2d(instance, exemplar, groups=N) * response_scale + self.corr_bias + return score.transpose(0, 1) + # test(first frame) + elif exemplar is not None and instance is None: + self.exemplar = self.features(exemplar) # 1, 256, 6, 6 + self.exemplar = torch.cat([self.exemplar for _ in range(3)], dim=0) # 3, 256, 6, 6 + return self.exemplar + # test(not first frame) + else: + # inference used we don't need to scale the response or add bias + _, _, H, W = instance.shape + instance = instance.reshape(3, 3, H, W) + instance = self.features(instance) # 3 scale + N, C, H, W = instance.shape + instance = instance.view(1, N*C, H, W) # 1, NxC, H, W + # score = F.conv2d(instance, self.exemplar, groups=N) + # return score.transpose(0, 1) + return instance + + +def exemplar_convert(input_file, output_file): + model = SiameseAlexNet() + model.load_state_dict(torch.load(input_file, map_location='cpu')) + model.eval() + + input_names = ["actual_input_1"] + output_names = ["output1"] + input1 = torch.randn(1, 3, 127, 127) + input2 = None + dummy_input = [input1, input2] + torch.onnx.export(model, dummy_input, output_file, input_names=input_names, output_names=output_names, + opset_version=11) + + +def search_convert(input_file, output_file): + model = SiameseAlexNet() + model.load_state_dict(torch.load(input_file, map_location='cpu')) + model.eval() + + input_names = ["actual_input_1"] + output_names = ["output1"] + input1 = None + input2 = torch.randn(1, 9, 255, 255) + dummy_input = [input1, input2] + torch.onnx.export(model, dummy_input, output_file, input_names=input_names, output_names=output_names, + opset_version=11) + + +if __name__ == "__main__": + input_file = sys.argv[1] + output_file_exemplar = sys.argv[2] + output_file_search = sys.argv[3] + exemplar_convert(input_file, output_file_exemplar) + search_convert(input_file, output_file_search) diff --git a/ACL_PyTorch/contrib/cv/tracking/SiamFC/requirements.txt b/ACL_PyTorch/contrib/cv/tracking/SiamFC/requirements.txt index fb20cd32505c79aaf5e83bf9ebcb40cdd4eb9b0d..196f844119c1dd316412f7f453b1c5b11771e9b4 100644 --- a/ACL_PyTorch/contrib/cv/tracking/SiamFC/requirements.txt +++ b/ACL_PyTorch/contrib/cv/tracking/SiamFC/requirements.txt @@ -1,8 +1,8 @@ -torch==1.9.0 -torchvision==0.10.0 -onnx==1.9.0 -numpy==1.19.2 -opencv-python==4.5.2.54 -pillow==8.2.0 -six==1.15.0 +torch==1.9.0 +torchvision==0.10.0 +onnx==1.9.0 +numpy==1.19.2 +opencv-python==4.5.2.54 +pillow==8.2.0 +six==1.15.0 tqdm==4.61.2 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/tracking/SiamFC/test/parse.py b/ACL_PyTorch/contrib/cv/tracking/SiamFC/test/parse.py index 7cf6d0faa4019f95d30b1762da8bea66a2aa70b6..e64ee7ea699d23cd64a6924ccb82489200424dd5 100644 --- a/ACL_PyTorch/contrib/cv/tracking/SiamFC/test/parse.py +++ b/ACL_PyTorch/contrib/cv/tracking/SiamFC/test/parse.py @@ -1,25 +1,25 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[7].replace('samples/s', '')) * 4 - print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[7].replace('samples/s', '')) * 4 + print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) diff --git a/ACL_PyTorch/contrib/cv/tracking/SiamFC/wholeprocess.py b/ACL_PyTorch/contrib/cv/tracking/SiamFC/wholeprocess.py index 2bd05d59587ae0f691c9b9dbf4c57435df79cd7c..9fd127d256444befae0747fb530ca13094d64d93 100644 --- a/ACL_PyTorch/contrib/cv/tracking/SiamFC/wholeprocess.py +++ b/ACL_PyTorch/contrib/cv/tracking/SiamFC/wholeprocess.py @@ -1,426 +1,426 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import glob -import numpy as np -import io -import six -from itertools import chain -import cv2 -import json -import sys -import multiprocessing -sys.path.append(os.getcwd()) -from prepostprocess import PrePostProcess -from utils import rect_iou, center_error - - -deviceid = 0 - - -class OTB(object): - r"""`OTB `_ Datasets. - - Publication: - ``Object Tracking Benchmark``, Y. Wu, J. Lim and M.-H. Yang, IEEE TPAMI 2015. - - Args: - root_dir (string): Root directory of dataset where sequence - folders exist. - version (integer or string): Specify the benchmark version, specify as one of - ``2013``, ``2015``, ``tb50`` and ``tb100``. - """ - __otb13_seqs = ['Basketball', 'Bolt', 'Boy', 'Car4', 'CarDark', - 'CarScale', 'Coke', 'Couple', 'Crossing', 'David', - 'David2', 'David3', 'Deer', 'Dog1', 'Doll', 'Dudek', - 'FaceOcc1', 'FaceOcc2', 'Fish', 'FleetFace', - 'Football', 'Football1', 'Freeman1', 'Freeman3', - 'Freeman4', 'Girl', 'Ironman', 'Jogging', 'Jumping', - 'Lemming', 'Liquor', 'Matrix', 'Mhyang', 'MotorRolling', - 'MountainBike', 'Shaking', 'Singer1', 'Singer2', - 'Skating1', 'Skiing', 'Soccer', 'Subway', 'Suv', - 'Sylvester', 'Tiger1', 'Tiger2', 'Trellis', 'Walking', - 'Walking2', 'Woman'] - - __tb50_seqs = ['Basketball', 'Biker', 'Bird1', 'BlurBody', 'BlurCar2', - 'BlurFace', 'BlurOwl', 'Bolt', 'Box', 'Car1', 'Car4', - 'CarDark', 'CarScale', 'ClifBar', 'Couple', 'Crowds', - 'David', 'Deer', 'Diving', 'DragonBaby', 'Dudek', - 'Football', 'Freeman4', 'Girl', 'Human3', 'Human4', - 'Human6', 'Human9', 'Ironman', 'Jump', 'Jumping', - 'Liquor', 'Matrix', 'MotorRolling', 'Panda', 'RedTeam', - 'Shaking', 'Singer2', 'Skating1', 'Skating2', 'Skiing', - 'Soccer', 'Surfer', 'Sylvester', 'Tiger2', 'Trellis', - 'Walking', 'Walking2', 'Woman'] - - __tb100_seqs = ['Bird2', 'BlurCar1', 'BlurCar3', 'BlurCar4', 'Board', - 'Bolt2', 'Boy', 'Car2', 'Car24', 'Coke', 'Coupon', - 'Crossing', 'Dancer', 'Dancer2', 'David2', 'David3', - 'Dog', 'Dog1', 'Doll', 'FaceOcc1', 'FaceOcc2', 'Fish', - 'FleetFace', 'Football1', 'Freeman1', 'Freeman3', - 'Girl2', 'Gym', 'Human2', 'Human5', 'Human7', 'Human8', - 'Jogging', 'KiteSurf', 'Lemming', 'Man', 'Mhyang', - 'MountainBike', 'Rubik', 'Singer1', 'Skater', - 'Skater2', 'Subway', 'Suv', 'Tiger1', 'Toy', 'Trans', - 'Twinnings', 'Vase'] + __tb50_seqs - - __otb15_seqs = __tb100_seqs - - __version_dict = { - 2013: __otb13_seqs, - 2015: __otb15_seqs, - 'otb2013': __otb13_seqs, - 'otb2015': __otb15_seqs, - 'tb50': __tb50_seqs, - 'tb100': __tb100_seqs} - - def __init__(self, root_dir, version=2015): - super(OTB, self).__init__() - assert version in self.__version_dict - - self.root_dir = root_dir - self.version = version - self._check_integrity(root_dir, version) - valid_seqs = self.__version_dict[version] - self.anno_files = sorted(list(chain.from_iterable(glob.glob( - os.path.join(root_dir, s, 'groundtruth*.txt')) for s in valid_seqs))) - # remove empty annotation files - # (e.g. groundtruth_rect.1.txt of Human4) - self.anno_files = self._filter_files(self.anno_files) - self.seq_dirs = [os.path.dirname(f) for f in self.anno_files] - self.seq_names = [os.path.basename(d) for d in self.seq_dirs] - # rename repeated sequence names - # (e.g. Jogging and Skating2) - self.seq_names = self._rename_seqs(self.seq_names) - - def __getitem__(self, index): - r""" - Args: - index (integer or string): Index or name of a sequence. - - Returns: - tuple: (img_files, anno), where ``img_files`` is a list of - file names and ``anno`` is a N x 4 (rectangles) numpy array. - """ - if isinstance(index, six.string_types): - if not index in self.seq_names: - raise Exception('Sequence {} not found.'.format(index)) - index = self.seq_names.index(index) - - img_files = sorted(glob.glob( - os.path.join(self.seq_dirs[index], 'img/*.jpg'))) - - # special sequences - seq_name = self.seq_names[index] - if seq_name.lower() == 'david': - img_files = img_files[300 - 1:770] - elif seq_name.lower() == 'football1': - img_files = img_files[:74] - elif seq_name.lower() == 'freeman3': - img_files = img_files[:460] - elif seq_name.lower() == 'freeman4': - img_files = img_files[:283] - elif seq_name.lower() == 'diving': - img_files = img_files[:215] - - # to deal with different delimeters - with open(self.anno_files[index], 'r') as f: - anno = np.loadtxt(io.StringIO(f.read().replace(',', ' '))) - assert len(img_files) == len(anno) - assert anno.shape[1] == 4 - - return img_files, anno - - def __len__(self): - return len(self.seq_names) - - def _filter_files(self, filenames): - filtered_files = [] - for filename in filenames: - with open(filename, 'r') as f: - if f.read().strip() == '': - print('Warning: %s is empty.' % filename) - else: - filtered_files.append(filename) - - return filtered_files - - def _rename_seqs(self, seq_names): - # in case some sequences may have multiple targets - renamed_seqs = [] - for i, seq_name in enumerate(seq_names): - if seq_names.count(seq_name) == 1: - renamed_seqs.append(seq_name) - else: - ind = seq_names[:i + 1].count(seq_name) - renamed_seqs.append('%s.%d' % (seq_name, ind)) - - return renamed_seqs - - def _check_integrity(self, root_dir, version): - assert version in self.__version_dict - seq_names = self.__version_dict[version] - - if os.path.isdir(root_dir) and len(os.listdir(root_dir)) > 0: - # check each sequence folder - for seq_name in seq_names: - seq_dir = os.path.join(root_dir, seq_name) - if not os.path.isdir(seq_dir): - print('Warning: sequence %s not exists.' % seq_name) - else: - # dataset not exists - raise Exception('Dataset not found or corrupted. ' + - 'You can use download=True to download it.') - - -class ExperimentOTB(object): - """Experiment pipeline and evaluation toolkit for OTB dataset. - - Args: - root_dir (string): Root directory of OTB dataset. - version (integer or string): Specify the benchmark version, specify as one of - ``2013``, ``2015``, ``tb50`` and ``tb100``. Default is ``2015``. - result_dir (string, optional): Directory for storing tracking - results. Default is ``./results``. - report_dir (string, optional): Directory for storing performance - evaluation results. Default is ``./reports``. - """ - def __init__(self, root_dir, version=2015, - result_dir='results', report_dir='reports'): - super(ExperimentOTB, self).__init__() - self.dataset = OTB(root_dir, version) - self.result_dir = os.path.join(result_dir, 'OTB' + str(version)) - self.report_dir = os.path.join(report_dir, 'OTB' + str(version)) - # as nbins_iou increases, the success score - # converges to the average overlap (AO) - self.nbins_iou = 21 - self.nbins_ce = 51 - - def getlendataset(self): - return len(self.dataset) - - def run(self, savepath, infopath, arch, idx): - # get the seq_name and information of files - img_files, anno = self.dataset[idx] - seq_name = self.dataset.seq_names[idx] - # generate directory for current seq - savepath = savepath + "/" + str(idx) - if not os.path.exists(savepath): - os.makedirs(savepath) - infopath = infopath + "/" + str(idx) + ".info" - # skip if result exist - record_file = os.path.join(self.result_dir, 'siamfc', '%s.txt' % seq_name) - if os.path.exists(record_file): - print('Found results of %s, skipping' % seq_name) - return - frame_num = len(img_files) - boxes = np.zeros((frame_num, 4)) - boxes[0] = anno[0, :] # x,y, w, h - times = np.zeros(frame_num) - - prepostpro = PrePostProcess() - for f, img_file in enumerate(img_files): - img = cv2.imread(img_file, cv2.IMREAD_COLOR) - if f == 0: - # Pre-process and generate bin - exemplar_path = prepostpro.cropexemplar(img, anno[0, :], savepath, img_file) - # get_info - with open(infopath, 'w') as file1: - content = ' '.join([str(0), '.'+exemplar_path, str(127), str(127)]) - file1.write(content) - file1.write('\n') - # infer - os.system('%s -model_type=vision -device_id=%d -batch_size=1 ' - '-om_path=s%/exemplar_bs1.om -input_text_path=%s ' - '-input_width=127 -input_height=127 -output_binary=True -useDvpp=False >/dev/null 2>&1' - % (benchmark_path, deviceid, om_path, infopath)) - # the exemplar has a result of 3*256*6*6 tensor - # read tensor from bin - filename = img_file.replace('/', '-').split('.')[0] + '_1.bin' - filename = 'result/dumpOutput_device' + str(deviceid) + '/' + filename - exemplar_feature = prepostpro.file2tensor(filename, (3, 256, 6, 6)) - else: - # Pre-process and generate bin - search_path = prepostpro.cropsearch(img, savepath, img_file) - # get_info - with open(infopath, 'w') as file2: - content = ' '.join([str(0), '.'+search_path, str(255), str(255)]) - file2.write(content) - file2.write('\n') - # infer - os.system('%s -model_type=vision -device_id=%d -batch_size=1 ' - '-om_path=%s/search_bs1.om -input_text_path=%s ' - '-input_width=255 -input_height=255 -output_binary=True -useDvpp=False >/dev/null 2>&1' - % (benchmark_path, deviceid, om_path, infopath)) - # the exemplar has a result of 1*768*22*22 tensor - # read tensor from bin - filename = img_file.replace('/', '-').split('.')[0] + '_1.bin' - filename = 'result/dumpOutput_device' + str(deviceid) + '/' + filename - search_feature = prepostpro.file2tensor(filename, (1, 768, 22, 22)) - # Post-process - boxes[f, :] = prepostpro.postprocess(search_feature, exemplar_feature) - times[f] = 1 - assert len(boxes) == len(anno) - # record results - self._record(record_file, boxes, times) - # delete useless data to save space - os.system('rm -rf %s/*' % savepath) - print("Results of %s finished!" % seq_name) - - def report(self, tracker_names): - - assert isinstance(tracker_names, (list, tuple)) # ‘SiamFC’ - - # assume tracker_names[0] is your tracker - report_dir = os.path.join(self.report_dir, tracker_names[0]) - - if not os.path.isdir(report_dir): - os.makedirs(report_dir) - - report_file = os.path.join(report_dir, 'performance.json') - - performance = {} - for name in tracker_names: - print('Evaluating', name) - seq_num = len(self.dataset) - succ_curve = np.zeros((seq_num, self.nbins_iou)) - prec_curve = np.zeros((seq_num, self.nbins_ce)) - speeds = np.zeros(seq_num) - # - performance.update({name: {'overall': {}, 'seq_wise': {}}}) - - for s, (_, anno) in enumerate(self.dataset): - - seq_name = self.dataset.seq_names[s] - - record_file = os.path.join(self.result_dir, name, '%s.txt' % seq_name) - - boxes = np.loadtxt(record_file, delimiter=',') - - boxes[0] = anno[0] - - assert len(boxes) == len(anno) - - ious, center_errors = self._calc_metrics(boxes, anno) - - succ_curve[s], prec_curve[s] = self._calc_curves(ious, center_errors) - - # calculate average tracking speed - time_file = os.path.join(self.result_dir, name, 'times/%s_time.txt' % seq_name) - - if os.path.isfile(time_file): - times = np.loadtxt(time_file) - times = times[times > 0] - if len(times) > 0: - speeds[s] = np.mean(1. / times) - # store sequence-wise performance - performance[name]['seq_wise'].update({seq_name: { - 'success_curve': succ_curve[s].tolist(), - 'precision_curve': prec_curve[s].tolist(), - 'success_score': np.mean(succ_curve[s]), - 'precision_score': prec_curve[s][20], - 'success_rate': succ_curve[s][self.nbins_iou // 2], - 'speed_fps': speeds[s] if speeds[s] > 0 else -1}}) - - succ_curve = np.mean(succ_curve, axis=0) - prec_curve = np.mean(prec_curve, axis=0) - succ_score = np.mean(succ_curve) - prec_score = prec_curve[20] - succ_rate = succ_curve[self.nbins_iou // 2] - if np.count_nonzero(speeds) > 0: - avg_speed = np.sum(speeds) / np.count_nonzero(speeds) - else: - avg_speed = -1 - - # store overall performance - performance[name]['overall'].update({ - 'success_curve': succ_curve.tolist(), - 'precision_curve': prec_curve.tolist(), - 'success_score': succ_score, - 'precision_score': prec_score, - 'success_rate': succ_rate, - 'speed_fps': avg_speed}) - # print('prec_score:%s --succ_score:%s --succ_rate:%s' % (prec_score,succ_score,succ_rate)) - # report the performance - with open(report_file, 'w') as f: - json.dump(performance, f, indent=4) - - return prec_score, succ_score, succ_rate - - def _record(self, record_file, boxes, times): - # record bounding boxes - record_dir = os.path.dirname(record_file) - if not os.path.isdir(record_dir): - os.makedirs(record_dir) - np.savetxt(record_file, boxes, fmt='%.3f', delimiter=',') - - # print(' Results recorded at', record_file) - - # record running times - time_dir = os.path.join(record_dir, 'times') - if not os.path.isdir(time_dir): - os.makedirs(time_dir) - time_file = os.path.join(time_dir, os.path.basename( - record_file).replace('.txt', '_time.txt')) - np.savetxt(time_file, times, fmt='%.8f') - - def _calc_metrics(self, boxes, anno): - # can be modified by children classes - ious = rect_iou(boxes, anno) - center_errors = center_error(boxes, anno) - return ious, center_errors - - def _calc_curves(self, ious, center_errors): - ious = np.asarray(ious, float)[:, np.newaxis] - center_errors = np.asarray(center_errors, float)[:, np.newaxis] - - thr_iou = np.linspace(0, 1, self.nbins_iou)[np.newaxis, :] - thr_ce = np.arange(0, self.nbins_ce)[np.newaxis, :] - - bin_iou = np.greater(ious, thr_iou) - bin_ce = np.less_equal(center_errors, thr_ce) - - succ_curve = np.mean(bin_iou, axis=0) - prec_curve = np.mean(bin_ce, axis=0) - - return succ_curve, prec_curve - - -if __name__ == "__main__": - data_path = sys.argv[1] - save_path = sys.argv[2] - info_path = sys.argv[3] - arch = sys.argv[4] - deviceid = int(sys.argv[5]) - benchmark_path = sys.argv[6] - om_path = sys.argv[7] - os.system('rm -rf %s' % save_path) - os.system('rm -rf %s' % info_path) - os.system('rm -rf ./result/dumpOutput_device%d' % deviceid) - if not os.path.exists(save_path): - os.makedirs(save_path) - if not os.path.exists(info_path): - os.makedirs(info_path) - e = ExperimentOTB(data_path, version=2015) - totallen = e.getlendataset() - pool = multiprocessing.Pool(processes=12) - for i in range(totallen): - pool.apply_async(e.run, (save_path, info_path, arch, i, )) - pool.close() - pool.join() - prec_score, succ_score, succ_rate = e.report(['siamfc']) - ss = '-prec_score:%.3f -succ_score:%.3f -succ_rate:%.3f' % (float(prec_score), float(succ_score), float(succ_rate)) - print("====accuracy data====") - print(ss) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import glob +import numpy as np +import io +import six +from itertools import chain +import cv2 +import json +import sys +import multiprocessing +sys.path.append(os.getcwd()) +from prepostprocess import PrePostProcess +from utils import rect_iou, center_error + + +deviceid = 0 + + +class OTB(object): + r"""`OTB `_ Datasets. + + Publication: + ``Object Tracking Benchmark``, Y. Wu, J. Lim and M.-H. Yang, IEEE TPAMI 2015. + + Args: + root_dir (string): Root directory of dataset where sequence + folders exist. + version (integer or string): Specify the benchmark version, specify as one of + ``2013``, ``2015``, ``tb50`` and ``tb100``. + """ + __otb13_seqs = ['Basketball', 'Bolt', 'Boy', 'Car4', 'CarDark', + 'CarScale', 'Coke', 'Couple', 'Crossing', 'David', + 'David2', 'David3', 'Deer', 'Dog1', 'Doll', 'Dudek', + 'FaceOcc1', 'FaceOcc2', 'Fish', 'FleetFace', + 'Football', 'Football1', 'Freeman1', 'Freeman3', + 'Freeman4', 'Girl', 'Ironman', 'Jogging', 'Jumping', + 'Lemming', 'Liquor', 'Matrix', 'Mhyang', 'MotorRolling', + 'MountainBike', 'Shaking', 'Singer1', 'Singer2', + 'Skating1', 'Skiing', 'Soccer', 'Subway', 'Suv', + 'Sylvester', 'Tiger1', 'Tiger2', 'Trellis', 'Walking', + 'Walking2', 'Woman'] + + __tb50_seqs = ['Basketball', 'Biker', 'Bird1', 'BlurBody', 'BlurCar2', + 'BlurFace', 'BlurOwl', 'Bolt', 'Box', 'Car1', 'Car4', + 'CarDark', 'CarScale', 'ClifBar', 'Couple', 'Crowds', + 'David', 'Deer', 'Diving', 'DragonBaby', 'Dudek', + 'Football', 'Freeman4', 'Girl', 'Human3', 'Human4', + 'Human6', 'Human9', 'Ironman', 'Jump', 'Jumping', + 'Liquor', 'Matrix', 'MotorRolling', 'Panda', 'RedTeam', + 'Shaking', 'Singer2', 'Skating1', 'Skating2', 'Skiing', + 'Soccer', 'Surfer', 'Sylvester', 'Tiger2', 'Trellis', + 'Walking', 'Walking2', 'Woman'] + + __tb100_seqs = ['Bird2', 'BlurCar1', 'BlurCar3', 'BlurCar4', 'Board', + 'Bolt2', 'Boy', 'Car2', 'Car24', 'Coke', 'Coupon', + 'Crossing', 'Dancer', 'Dancer2', 'David2', 'David3', + 'Dog', 'Dog1', 'Doll', 'FaceOcc1', 'FaceOcc2', 'Fish', + 'FleetFace', 'Football1', 'Freeman1', 'Freeman3', + 'Girl2', 'Gym', 'Human2', 'Human5', 'Human7', 'Human8', + 'Jogging', 'KiteSurf', 'Lemming', 'Man', 'Mhyang', + 'MountainBike', 'Rubik', 'Singer1', 'Skater', + 'Skater2', 'Subway', 'Suv', 'Tiger1', 'Toy', 'Trans', + 'Twinnings', 'Vase'] + __tb50_seqs + + __otb15_seqs = __tb100_seqs + + __version_dict = { + 2013: __otb13_seqs, + 2015: __otb15_seqs, + 'otb2013': __otb13_seqs, + 'otb2015': __otb15_seqs, + 'tb50': __tb50_seqs, + 'tb100': __tb100_seqs} + + def __init__(self, root_dir, version=2015): + super(OTB, self).__init__() + assert version in self.__version_dict + + self.root_dir = root_dir + self.version = version + self._check_integrity(root_dir, version) + valid_seqs = self.__version_dict[version] + self.anno_files = sorted(list(chain.from_iterable(glob.glob( + os.path.join(root_dir, s, 'groundtruth*.txt')) for s in valid_seqs))) + # remove empty annotation files + # (e.g. groundtruth_rect.1.txt of Human4) + self.anno_files = self._filter_files(self.anno_files) + self.seq_dirs = [os.path.dirname(f) for f in self.anno_files] + self.seq_names = [os.path.basename(d) for d in self.seq_dirs] + # rename repeated sequence names + # (e.g. Jogging and Skating2) + self.seq_names = self._rename_seqs(self.seq_names) + + def __getitem__(self, index): + r""" + Args: + index (integer or string): Index or name of a sequence. + + Returns: + tuple: (img_files, anno), where ``img_files`` is a list of + file names and ``anno`` is a N x 4 (rectangles) numpy array. + """ + if isinstance(index, six.string_types): + if not index in self.seq_names: + raise Exception('Sequence {} not found.'.format(index)) + index = self.seq_names.index(index) + + img_files = sorted(glob.glob( + os.path.join(self.seq_dirs[index], 'img/*.jpg'))) + + # special sequences + seq_name = self.seq_names[index] + if seq_name.lower() == 'david': + img_files = img_files[300 - 1:770] + elif seq_name.lower() == 'football1': + img_files = img_files[:74] + elif seq_name.lower() == 'freeman3': + img_files = img_files[:460] + elif seq_name.lower() == 'freeman4': + img_files = img_files[:283] + elif seq_name.lower() == 'diving': + img_files = img_files[:215] + + # to deal with different delimeters + with open(self.anno_files[index], 'r') as f: + anno = np.loadtxt(io.StringIO(f.read().replace(',', ' '))) + assert len(img_files) == len(anno) + assert anno.shape[1] == 4 + + return img_files, anno + + def __len__(self): + return len(self.seq_names) + + def _filter_files(self, filenames): + filtered_files = [] + for filename in filenames: + with open(filename, 'r') as f: + if f.read().strip() == '': + print('Warning: %s is empty.' % filename) + else: + filtered_files.append(filename) + + return filtered_files + + def _rename_seqs(self, seq_names): + # in case some sequences may have multiple targets + renamed_seqs = [] + for i, seq_name in enumerate(seq_names): + if seq_names.count(seq_name) == 1: + renamed_seqs.append(seq_name) + else: + ind = seq_names[:i + 1].count(seq_name) + renamed_seqs.append('%s.%d' % (seq_name, ind)) + + return renamed_seqs + + def _check_integrity(self, root_dir, version): + assert version in self.__version_dict + seq_names = self.__version_dict[version] + + if os.path.isdir(root_dir) and len(os.listdir(root_dir)) > 0: + # check each sequence folder + for seq_name in seq_names: + seq_dir = os.path.join(root_dir, seq_name) + if not os.path.isdir(seq_dir): + print('Warning: sequence %s not exists.' % seq_name) + else: + # dataset not exists + raise Exception('Dataset not found or corrupted. ' + + 'You can use download=True to download it.') + + +class ExperimentOTB(object): + """Experiment pipeline and evaluation toolkit for OTB dataset. + + Args: + root_dir (string): Root directory of OTB dataset. + version (integer or string): Specify the benchmark version, specify as one of + ``2013``, ``2015``, ``tb50`` and ``tb100``. Default is ``2015``. + result_dir (string, optional): Directory for storing tracking + results. Default is ``./results``. + report_dir (string, optional): Directory for storing performance + evaluation results. Default is ``./reports``. + """ + def __init__(self, root_dir, version=2015, + result_dir='results', report_dir='reports'): + super(ExperimentOTB, self).__init__() + self.dataset = OTB(root_dir, version) + self.result_dir = os.path.join(result_dir, 'OTB' + str(version)) + self.report_dir = os.path.join(report_dir, 'OTB' + str(version)) + # as nbins_iou increases, the success score + # converges to the average overlap (AO) + self.nbins_iou = 21 + self.nbins_ce = 51 + + def getlendataset(self): + return len(self.dataset) + + def run(self, savepath, infopath, arch, idx): + # get the seq_name and information of files + img_files, anno = self.dataset[idx] + seq_name = self.dataset.seq_names[idx] + # generate directory for current seq + savepath = savepath + "/" + str(idx) + if not os.path.exists(savepath): + os.makedirs(savepath) + infopath = infopath + "/" + str(idx) + ".info" + # skip if result exist + record_file = os.path.join(self.result_dir, 'siamfc', '%s.txt' % seq_name) + if os.path.exists(record_file): + print('Found results of %s, skipping' % seq_name) + return + frame_num = len(img_files) + boxes = np.zeros((frame_num, 4)) + boxes[0] = anno[0, :] # x,y, w, h + times = np.zeros(frame_num) + + prepostpro = PrePostProcess() + for f, img_file in enumerate(img_files): + img = cv2.imread(img_file, cv2.IMREAD_COLOR) + if f == 0: + # Pre-process and generate bin + exemplar_path = prepostpro.cropexemplar(img, anno[0, :], savepath, img_file) + # get_info + with open(infopath, 'w') as file1: + content = ' '.join([str(0), '.'+exemplar_path, str(127), str(127)]) + file1.write(content) + file1.write('\n') + # infer + os.system('%s -model_type=vision -device_id=%d -batch_size=1 ' + '-om_path=s%/exemplar_bs1.om -input_text_path=%s ' + '-input_width=127 -input_height=127 -output_binary=True -useDvpp=False >/dev/null 2>&1' + % (benchmark_path, deviceid, om_path, infopath)) + # the exemplar has a result of 3*256*6*6 tensor + # read tensor from bin + filename = img_file.replace('/', '-').split('.')[0] + '_1.bin' + filename = 'result/dumpOutput_device' + str(deviceid) + '/' + filename + exemplar_feature = prepostpro.file2tensor(filename, (3, 256, 6, 6)) + else: + # Pre-process and generate bin + search_path = prepostpro.cropsearch(img, savepath, img_file) + # get_info + with open(infopath, 'w') as file2: + content = ' '.join([str(0), '.'+search_path, str(255), str(255)]) + file2.write(content) + file2.write('\n') + # infer + os.system('%s -model_type=vision -device_id=%d -batch_size=1 ' + '-om_path=%s/search_bs1.om -input_text_path=%s ' + '-input_width=255 -input_height=255 -output_binary=True -useDvpp=False >/dev/null 2>&1' + % (benchmark_path, deviceid, om_path, infopath)) + # the exemplar has a result of 1*768*22*22 tensor + # read tensor from bin + filename = img_file.replace('/', '-').split('.')[0] + '_1.bin' + filename = 'result/dumpOutput_device' + str(deviceid) + '/' + filename + search_feature = prepostpro.file2tensor(filename, (1, 768, 22, 22)) + # Post-process + boxes[f, :] = prepostpro.postprocess(search_feature, exemplar_feature) + times[f] = 1 + assert len(boxes) == len(anno) + # record results + self._record(record_file, boxes, times) + # delete useless data to save space + os.system('rm -rf %s/*' % savepath) + print("Results of %s finished!" % seq_name) + + def report(self, tracker_names): + + assert isinstance(tracker_names, (list, tuple)) # ‘SiamFC’ + + # assume tracker_names[0] is your tracker + report_dir = os.path.join(self.report_dir, tracker_names[0]) + + if not os.path.isdir(report_dir): + os.makedirs(report_dir) + + report_file = os.path.join(report_dir, 'performance.json') + + performance = {} + for name in tracker_names: + print('Evaluating', name) + seq_num = len(self.dataset) + succ_curve = np.zeros((seq_num, self.nbins_iou)) + prec_curve = np.zeros((seq_num, self.nbins_ce)) + speeds = np.zeros(seq_num) + # + performance.update({name: {'overall': {}, 'seq_wise': {}}}) + + for s, (_, anno) in enumerate(self.dataset): + + seq_name = self.dataset.seq_names[s] + + record_file = os.path.join(self.result_dir, name, '%s.txt' % seq_name) + + boxes = np.loadtxt(record_file, delimiter=',') + + boxes[0] = anno[0] + + assert len(boxes) == len(anno) + + ious, center_errors = self._calc_metrics(boxes, anno) + + succ_curve[s], prec_curve[s] = self._calc_curves(ious, center_errors) + + # calculate average tracking speed + time_file = os.path.join(self.result_dir, name, 'times/%s_time.txt' % seq_name) + + if os.path.isfile(time_file): + times = np.loadtxt(time_file) + times = times[times > 0] + if len(times) > 0: + speeds[s] = np.mean(1. / times) + # store sequence-wise performance + performance[name]['seq_wise'].update({seq_name: { + 'success_curve': succ_curve[s].tolist(), + 'precision_curve': prec_curve[s].tolist(), + 'success_score': np.mean(succ_curve[s]), + 'precision_score': prec_curve[s][20], + 'success_rate': succ_curve[s][self.nbins_iou // 2], + 'speed_fps': speeds[s] if speeds[s] > 0 else -1}}) + + succ_curve = np.mean(succ_curve, axis=0) + prec_curve = np.mean(prec_curve, axis=0) + succ_score = np.mean(succ_curve) + prec_score = prec_curve[20] + succ_rate = succ_curve[self.nbins_iou // 2] + if np.count_nonzero(speeds) > 0: + avg_speed = np.sum(speeds) / np.count_nonzero(speeds) + else: + avg_speed = -1 + + # store overall performance + performance[name]['overall'].update({ + 'success_curve': succ_curve.tolist(), + 'precision_curve': prec_curve.tolist(), + 'success_score': succ_score, + 'precision_score': prec_score, + 'success_rate': succ_rate, + 'speed_fps': avg_speed}) + # print('prec_score:%s --succ_score:%s --succ_rate:%s' % (prec_score,succ_score,succ_rate)) + # report the performance + with open(report_file, 'w') as f: + json.dump(performance, f, indent=4) + + return prec_score, succ_score, succ_rate + + def _record(self, record_file, boxes, times): + # record bounding boxes + record_dir = os.path.dirname(record_file) + if not os.path.isdir(record_dir): + os.makedirs(record_dir) + np.savetxt(record_file, boxes, fmt='%.3f', delimiter=',') + + # print(' Results recorded at', record_file) + + # record running times + time_dir = os.path.join(record_dir, 'times') + if not os.path.isdir(time_dir): + os.makedirs(time_dir) + time_file = os.path.join(time_dir, os.path.basename( + record_file).replace('.txt', '_time.txt')) + np.savetxt(time_file, times, fmt='%.8f') + + def _calc_metrics(self, boxes, anno): + # can be modified by children classes + ious = rect_iou(boxes, anno) + center_errors = center_error(boxes, anno) + return ious, center_errors + + def _calc_curves(self, ious, center_errors): + ious = np.asarray(ious, float)[:, np.newaxis] + center_errors = np.asarray(center_errors, float)[:, np.newaxis] + + thr_iou = np.linspace(0, 1, self.nbins_iou)[np.newaxis, :] + thr_ce = np.arange(0, self.nbins_ce)[np.newaxis, :] + + bin_iou = np.greater(ious, thr_iou) + bin_ce = np.less_equal(center_errors, thr_ce) + + succ_curve = np.mean(bin_iou, axis=0) + prec_curve = np.mean(bin_ce, axis=0) + + return succ_curve, prec_curve + + +if __name__ == "__main__": + data_path = sys.argv[1] + save_path = sys.argv[2] + info_path = sys.argv[3] + arch = sys.argv[4] + deviceid = int(sys.argv[5]) + benchmark_path = sys.argv[6] + om_path = sys.argv[7] + os.system('rm -rf %s' % save_path) + os.system('rm -rf %s' % info_path) + os.system('rm -rf ./result/dumpOutput_device%d' % deviceid) + if not os.path.exists(save_path): + os.makedirs(save_path) + if not os.path.exists(info_path): + os.makedirs(info_path) + e = ExperimentOTB(data_path, version=2015) + totallen = e.getlendataset() + pool = multiprocessing.Pool(processes=12) + for i in range(totallen): + pool.apply_async(e.run, (save_path, info_path, arch, i, )) + pool.close() + pool.join() + prec_score, succ_score, succ_rate = e.report(['siamfc']) + ss = '-prec_score:%.3f -succ_score:%.3f -succ_rate:%.3f' % (float(prec_score), float(succ_score), float(succ_rate)) + print("====accuracy data====") + print(ss) diff --git a/ACL_PyTorch/contrib/cv/tracking/SiamRPN/LICENSE b/ACL_PyTorch/contrib/cv/tracking/SiamRPN/LICENSE index 29f81d812f3e768fa89638d1f72920dbfd1413a8..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 100644 --- a/ACL_PyTorch/contrib/cv/tracking/SiamRPN/LICENSE +++ b/ACL_PyTorch/contrib/cv/tracking/SiamRPN/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ACL_PyTorch/contrib/cv/tracking/SiamRPN/README.md b/ACL_PyTorch/contrib/cv/tracking/SiamRPN/README.md index 2b0cca569975219f45f738c6f7587ee46b29702d..ec84757b746a1e8cc7109dd9c3f43345196f9e40 100644 --- a/ACL_PyTorch/contrib/cv/tracking/SiamRPN/README.md +++ b/ACL_PyTorch/contrib/cv/tracking/SiamRPN/README.md @@ -1,97 +1,97 @@ -# SiamRPN模型PyTorch推理指导 - -## 1 环境准备 -1. 获取开源代码仓 -- 得到本项目代码后,将 SiamRPN 项目放置在/home目录下,进入/home/SiamRPN目录下,下载开源代码仓 -``` -git clone https://github.com/STVIR/pysot.git -``` - -- 确认获取的开源 pysot 项目文件存放在 /home/SiamRPN 目录下,进入 /home/SiamRPN/pysot 目录下执行 -``` -patch -N -p1 < ../SiamRPN.patch -``` - -2. 获取数据集 -- 将数据集VOT2016下载并放在 /root/datasets 目录下 -``` -wget -P /root/datasets https://ascend-pytorch-one-datasets.obs.cn-north-4.myhuaweicloud.com/train/zip/VOT2016.zip -cd // -cd /root/datasets -unzip VOT2016.zip -rm -rf VOT2016.zip -``` -- (备注:将获取的 VOT2016 数据集文件放在 /root/datasets 目录下) - - -3. 安装依赖 -- 进入 /home/SiamRPN 目录下 -```shell -cd // -cd /home/SiamRPN -pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com -``` -- (备注:若有安装失败的请单独重新安装,若有需要,也可使用conda指令安装) - -4. 获取pth权重文件 - -``` -wget -P /home/SiamRPN/pysot/experiments/siamrpn_r50_l234_dwxcorr https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/tracking/SiamRPN/model.pth -``` - -- (备注:将 model.pth 权重文件要放在 ../siamrpn_r50_l234_dwxcorr 目录下) - -5. 运行setup.py -- 进入 /home/SiamRPN/pysot 目录下执行 -``` -export PYTHONPATH=/home/SiamRPN/pysot:$PYTHONPATH -python setup.py build_ext --inplace install -``` - - - -## 2 离线推理 - -310上执行,执行时使用 npu-smi info 查看设备状态,确保device空闲 - -```shell -# (j进入 /home/SiamRPN 下执行) -# 转成onnx -bash test/pth2onnx.sh -# 转成om -bash test/onnx2om.sh -# 进行评估 -bash test/eval_acc_perf.sh -``` - - -- 评测结果: - -- 310精度 -``` ------------------------------------------------------------- -|Tracker Name| Accuracy | Robustness | Average | EAO | ------------------------------------------------------------- -| VOT2016 | 0.639 | 0.177 | 42fps | 0.483 | ------------------------------------------------------------- -``` - -- 参考pth精度 -``` ------------------------------------------------------------- -|Tracker Name| Accuracy | Robustness | Average | EAO | ------------------------------------------------------------- -| VOT2016 | 0.642 | 0.196 | 35fps | 0.464 | ------------------------------------------------------------- -``` - - -- 性能计算方式: - fps计算方式为单位时间内处理的图片数量,即 图片数量 / 时间 。 - 根据310单device需乘以4之后再和T4对比,故310单卡性能理论计算为42×4=168fps。 - -- 备注: -- (1) 310精度相较于T4下降0.3%,但鲁棒性和EAO均有提升。310单device的实际平均性能为42fps。T4单卡平均性能为35fps,由于运行场景等干扰因素不同,会导致结果有所浮动,35fps为多次测量后平均近似值,供参考。 -- (2) 性能数据(speed)在推理过程中会展示,在推理结束后会展示平均性能(average speed)。 -- (3) 本推理为视频追踪,输入对象为视频,故不设置多batch。 - +# SiamRPN模型PyTorch推理指导 + +## 1 环境准备 +1. 获取开源代码仓 +- 得到本项目代码后,将 SiamRPN 项目放置在/home目录下,进入/home/SiamRPN目录下,下载开源代码仓 +``` +git clone https://github.com/STVIR/pysot.git +``` + +- 确认获取的开源 pysot 项目文件存放在 /home/SiamRPN 目录下,进入 /home/SiamRPN/pysot 目录下执行 +``` +patch -N -p1 < ../SiamRPN.patch +``` + +2. 获取数据集 +- 将数据集VOT2016下载并放在 /root/datasets 目录下 +``` +wget -P /root/datasets https://ascend-pytorch-one-datasets.obs.cn-north-4.myhuaweicloud.com/train/zip/VOT2016.zip +cd // +cd /root/datasets +unzip VOT2016.zip +rm -rf VOT2016.zip +``` +- (备注:将获取的 VOT2016 数据集文件放在 /root/datasets 目录下) + + +3. 安装依赖 +- 进入 /home/SiamRPN 目录下 +```shell +cd // +cd /home/SiamRPN +pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com +``` +- (备注:若有安装失败的请单独重新安装,若有需要,也可使用conda指令安装) + +4. 获取pth权重文件 + +``` +wget -P /home/SiamRPN/pysot/experiments/siamrpn_r50_l234_dwxcorr https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/tracking/SiamRPN/model.pth +``` + +- (备注:将 model.pth 权重文件要放在 ../siamrpn_r50_l234_dwxcorr 目录下) + +5. 运行setup.py +- 进入 /home/SiamRPN/pysot 目录下执行 +``` +export PYTHONPATH=/home/SiamRPN/pysot:$PYTHONPATH +python setup.py build_ext --inplace install +``` + + + +## 2 离线推理 + +310上执行,执行时使用 npu-smi info 查看设备状态,确保device空闲 + +```shell +# (j进入 /home/SiamRPN 下执行) +# 转成onnx +bash test/pth2onnx.sh +# 转成om +bash test/onnx2om.sh +# 进行评估 +bash test/eval_acc_perf.sh +``` + + +- 评测结果: + +- 310精度 +``` +------------------------------------------------------------ +|Tracker Name| Accuracy | Robustness | Average | EAO | +------------------------------------------------------------ +| VOT2016 | 0.639 | 0.177 | 42fps | 0.483 | +------------------------------------------------------------ +``` + +- 参考pth精度 +``` +------------------------------------------------------------ +|Tracker Name| Accuracy | Robustness | Average | EAO | +------------------------------------------------------------ +| VOT2016 | 0.642 | 0.196 | 35fps | 0.464 | +------------------------------------------------------------ +``` + + +- 性能计算方式: + fps计算方式为单位时间内处理的图片数量,即 图片数量 / 时间 。 + 根据310单device需乘以4之后再和T4对比,故310单卡性能理论计算为42×4=168fps。 + +- 备注: +- (1) 310精度相较于T4下降0.3%,但鲁棒性和EAO均有提升。310单device的实际平均性能为42fps。T4单卡平均性能为35fps,由于运行场景等干扰因素不同,会导致结果有所浮动,35fps为多次测量后平均近似值,供参考。 +- (2) 性能数据(speed)在推理过程中会展示,在推理结束后会展示平均性能(average speed)。 +- (3) 本推理为视频追踪,输入对象为视频,故不设置多batch。 + diff --git a/ACL_PyTorch/contrib/cv/tracking/SiamRPN/acl_net.py b/ACL_PyTorch/contrib/cv/tracking/SiamRPN/acl_net.py index add3602f90246d93ba3941c342956a081138dd23..f3624527dc6dc1fbda8edda740d22b9c84c1ed93 100644 --- a/ACL_PyTorch/contrib/cv/tracking/SiamRPN/acl_net.py +++ b/ACL_PyTorch/contrib/cv/tracking/SiamRPN/acl_net.py @@ -1,239 +1,239 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import acl -import functools - -# error code -ACL_ERROR_NONE = 0 - -# memory malloc code -ACL_MEM_MALLOC_HUGE_FIRST = 0 -ACL_MEM_MALLOC_HUGE_ONLY = 1 -ACL_MEM_MALLOC_NORMAL_ONLY = 2 - -# memory copy code -ACL_MEMCPY_HOST_TO_HOST = 0 -ACL_MEMCPY_HOST_TO_DEVICE = 1 -ACL_MEMCPY_DEVICE_TO_HOST = 2 -ACL_MEMCPY_DEVICE_TO_DEVICE = 3 - -ACL_DTYPE = { - 0: 'float32', - 1: 'float16', - 2: 'int8', - 3: 'int32', - 4: 'uint8', - 6: 'int16', - 7: 'uint16', - 8: 'uint32', - 9: 'int64', - 10: 'uint64', - 11: 'float64', - 12: 'bool', -} - -buffer_method = { - "in": acl.mdl.get_input_size_by_index, - "out": acl.mdl.get_output_size_by_index, - "outhost": acl.mdl.get_output_size_by_index -} - - -def check_ret(message, ret): - if ret != ACL_ERROR_NONE: - raise Exception("{} failed ret = {}".format(message, ret)) - - -class Net(object): - def __init__(self, context, model_path, device_id=0, first=True, config_path=None): - self.device_id = device_id - self.model_path = model_path - self.model_id = None - self.context = context - - self.input_data = [] - self.output_data = [] - self.output_data_host = [] - self.model_desc = None - self.load_input_dataset = None - self.load_output_dataset = None - - self._init_resource(first, config_path) - - def __call__(self, ori_data): - return self.forward(ori_data) - - def __del__(self): - ret = acl.mdl.unload(self.model_id) - check_ret("acl.mdl.unload", ret) - if self.model_desc: - acl.mdl.destroy_desc(self.model_desc) - self.model_desc = None - - while self.input_data: - item = self.input_data.pop() - ret = acl.rt.free(item["buffer"]) - check_ret("acl.rt.free", ret) - - while self.output_data: - item = self.output_data.pop() - ret = acl.rt.free(item["buffer"]) - check_ret("acl.rt.free", ret) - - def _init_resource(self, first=False, config_path=None): - # load_model - self.model_id, ret = acl.mdl.load_from_file(self.model_path) - check_ret("acl.mdl.load_from_file", ret) - - self.model_desc = acl.mdl.create_desc() - self._get_model_info() - - def _get_model_info(self, ): - ret = acl.mdl.get_desc(self.model_desc, self.model_id) - check_ret("acl.mdl.get_desc", ret) - input_size = acl.mdl.get_num_inputs(self.model_desc) - output_size = acl.mdl.get_num_outputs(self.model_desc) - self._gen_data_buffer(input_size, des="in") - self._gen_data_buffer(output_size, des="out") - self._gen_dataset_output_host(output_size, des="outhost") - - def _gen_data_buffer(self, size, des): - func = buffer_method[des] - for i in range(size): - temp_buffer_size = func(self.model_desc, i) - temp_buffer, ret = acl.rt.malloc( - temp_buffer_size, ACL_MEM_MALLOC_HUGE_FIRST) - check_ret("acl.rt.malloc", ret) - - if des == "in": - self.input_data.append({"buffer": temp_buffer, - "size": temp_buffer_size}) - elif des == "out": - self.output_data.append({"buffer": temp_buffer, - "size": temp_buffer_size}) - - def _gen_dataset_output_host(self, size, des): - func = buffer_method[des] - for i in range(size): - temp_buffer_size = func(self.model_desc, i) - temp_buffer, ret = acl.rt.malloc_host(temp_buffer_size) - check_ret("acl.rt.malloc_host", ret) - - self.output_data_host.append({"buffer": temp_buffer, - "size": temp_buffer_size}) - - def _data_interaction(self, dataset, policy=ACL_MEMCPY_HOST_TO_DEVICE): - temp_data_buffer = self.input_data \ - if policy == ACL_MEMCPY_HOST_TO_DEVICE \ - else self.output_data - output_malloc_cost = 0 - idx = 0 - - if len(dataset) == 0 and policy == ACL_MEMCPY_DEVICE_TO_HOST: - dataset = self.output_data_host - - for i, item in enumerate(temp_data_buffer): - if policy == ACL_MEMCPY_HOST_TO_DEVICE: - ptr = acl.util.numpy_to_ptr(dataset[i]) - ret = acl.rt.memcpy( - item["buffer"], item["size"], ptr, item["size"], policy) - check_ret("acl.rt.memcpy", ret) - - else: - ptr = dataset[i]["buffer"] - ret = acl.rt.memcpy( - ptr, item["size"], item["buffer"], item["size"], policy) - check_ret("acl.rt.memcpy", ret) - - def _gen_dataset(self, type_str="input"): - dataset = acl.mdl.create_dataset() - - temp_dataset = None - if type_str == "in": - self.load_input_dataset = dataset - temp_dataset = self.input_data - else: - self.load_output_dataset = dataset - temp_dataset = self.output_data - - for item in temp_dataset: - data = acl.create_data_buffer(item["buffer"], item["size"]) - if data is None: - ret = acl.destroy_data_buffer(dataset) - check_ret("acl.destroy_data_buffer", ret) - - _, ret = acl.mdl.add_dataset_buffer(dataset, data) - if ret != ACL_ERROR_NONE: - ret = acl.destroy_data_buffer(dataset) - check_ret("acl.destroy_data_buffer", ret) - - def _data_from_host_to_device(self, images): - self._data_interaction(images, ACL_MEMCPY_HOST_TO_DEVICE) - self._gen_dataset("in") - self._gen_dataset("out") - - def _data_from_device_to_host(self): - res = [] - self._data_interaction(res, ACL_MEMCPY_DEVICE_TO_HOST) - output = self.get_result(self.output_data_host) - return output - - def _destroy_databuffer(self): - for dataset in [self.load_input_dataset, self.load_output_dataset]: - if not dataset: - continue - - num = acl.mdl.get_dataset_num_buffers(dataset) - for i in range(num): - data_buf = acl.mdl.get_dataset_buffer(dataset, i) - if data_buf: - ret = acl.destroy_data_buffer(data_buf) - check_ret("acl.destroy_data_buffer", ret) - ret = acl.mdl.destroy_dataset(dataset) - check_ret("acl.mdl.destroy_dataset", ret) - - def forward(self, input_data): - if not isinstance(input_data, (list, tuple)): - input_data = [input_data] - - self._data_from_host_to_device(input_data) - ret = acl.mdl.execute( - self.model_id, self.load_input_dataset, self.load_output_dataset) - check_ret("acl.mdl.execute", ret) - - self._destroy_databuffer() - result = self._data_from_device_to_host() - return result - - def get_result(self, output_data): - dataset = [] - for i in range(len(output_data)): - dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) - check_ret("acl.mdl.get_cur_output_dims", ret) - - data_shape = dims.get("dims") - data_type = acl.mdl.get_output_data_type(self.model_desc, i) - data_len = functools.reduce(lambda x, y: x * y, data_shape) - ftype = np.dtype(ACL_DTYPE.get(data_type)) - - size = output_data[i]["size"] - ptr = output_data[i]["buffer"] - data = acl.util.ptr_to_numpy(ptr, (size,), 1) - np_array = np.frombuffer( - bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) - np_array = np_array.reshape(data_shape) - dataset.append(np_array) - return dataset +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import acl +import functools + +# error code +ACL_ERROR_NONE = 0 + +# memory malloc code +ACL_MEM_MALLOC_HUGE_FIRST = 0 +ACL_MEM_MALLOC_HUGE_ONLY = 1 +ACL_MEM_MALLOC_NORMAL_ONLY = 2 + +# memory copy code +ACL_MEMCPY_HOST_TO_HOST = 0 +ACL_MEMCPY_HOST_TO_DEVICE = 1 +ACL_MEMCPY_DEVICE_TO_HOST = 2 +ACL_MEMCPY_DEVICE_TO_DEVICE = 3 + +ACL_DTYPE = { + 0: 'float32', + 1: 'float16', + 2: 'int8', + 3: 'int32', + 4: 'uint8', + 6: 'int16', + 7: 'uint16', + 8: 'uint32', + 9: 'int64', + 10: 'uint64', + 11: 'float64', + 12: 'bool', +} + +buffer_method = { + "in": acl.mdl.get_input_size_by_index, + "out": acl.mdl.get_output_size_by_index, + "outhost": acl.mdl.get_output_size_by_index +} + + +def check_ret(message, ret): + if ret != ACL_ERROR_NONE: + raise Exception("{} failed ret = {}".format(message, ret)) + + +class Net(object): + def __init__(self, context, model_path, device_id=0, first=True, config_path=None): + self.device_id = device_id + self.model_path = model_path + self.model_id = None + self.context = context + + self.input_data = [] + self.output_data = [] + self.output_data_host = [] + self.model_desc = None + self.load_input_dataset = None + self.load_output_dataset = None + + self._init_resource(first, config_path) + + def __call__(self, ori_data): + return self.forward(ori_data) + + def __del__(self): + ret = acl.mdl.unload(self.model_id) + check_ret("acl.mdl.unload", ret) + if self.model_desc: + acl.mdl.destroy_desc(self.model_desc) + self.model_desc = None + + while self.input_data: + item = self.input_data.pop() + ret = acl.rt.free(item["buffer"]) + check_ret("acl.rt.free", ret) + + while self.output_data: + item = self.output_data.pop() + ret = acl.rt.free(item["buffer"]) + check_ret("acl.rt.free", ret) + + def _init_resource(self, first=False, config_path=None): + # load_model + self.model_id, ret = acl.mdl.load_from_file(self.model_path) + check_ret("acl.mdl.load_from_file", ret) + + self.model_desc = acl.mdl.create_desc() + self._get_model_info() + + def _get_model_info(self, ): + ret = acl.mdl.get_desc(self.model_desc, self.model_id) + check_ret("acl.mdl.get_desc", ret) + input_size = acl.mdl.get_num_inputs(self.model_desc) + output_size = acl.mdl.get_num_outputs(self.model_desc) + self._gen_data_buffer(input_size, des="in") + self._gen_data_buffer(output_size, des="out") + self._gen_dataset_output_host(output_size, des="outhost") + + def _gen_data_buffer(self, size, des): + func = buffer_method[des] + for i in range(size): + temp_buffer_size = func(self.model_desc, i) + temp_buffer, ret = acl.rt.malloc( + temp_buffer_size, ACL_MEM_MALLOC_HUGE_FIRST) + check_ret("acl.rt.malloc", ret) + + if des == "in": + self.input_data.append({"buffer": temp_buffer, + "size": temp_buffer_size}) + elif des == "out": + self.output_data.append({"buffer": temp_buffer, + "size": temp_buffer_size}) + + def _gen_dataset_output_host(self, size, des): + func = buffer_method[des] + for i in range(size): + temp_buffer_size = func(self.model_desc, i) + temp_buffer, ret = acl.rt.malloc_host(temp_buffer_size) + check_ret("acl.rt.malloc_host", ret) + + self.output_data_host.append({"buffer": temp_buffer, + "size": temp_buffer_size}) + + def _data_interaction(self, dataset, policy=ACL_MEMCPY_HOST_TO_DEVICE): + temp_data_buffer = self.input_data \ + if policy == ACL_MEMCPY_HOST_TO_DEVICE \ + else self.output_data + output_malloc_cost = 0 + idx = 0 + + if len(dataset) == 0 and policy == ACL_MEMCPY_DEVICE_TO_HOST: + dataset = self.output_data_host + + for i, item in enumerate(temp_data_buffer): + if policy == ACL_MEMCPY_HOST_TO_DEVICE: + ptr = acl.util.numpy_to_ptr(dataset[i]) + ret = acl.rt.memcpy( + item["buffer"], item["size"], ptr, item["size"], policy) + check_ret("acl.rt.memcpy", ret) + + else: + ptr = dataset[i]["buffer"] + ret = acl.rt.memcpy( + ptr, item["size"], item["buffer"], item["size"], policy) + check_ret("acl.rt.memcpy", ret) + + def _gen_dataset(self, type_str="input"): + dataset = acl.mdl.create_dataset() + + temp_dataset = None + if type_str == "in": + self.load_input_dataset = dataset + temp_dataset = self.input_data + else: + self.load_output_dataset = dataset + temp_dataset = self.output_data + + for item in temp_dataset: + data = acl.create_data_buffer(item["buffer"], item["size"]) + if data is None: + ret = acl.destroy_data_buffer(dataset) + check_ret("acl.destroy_data_buffer", ret) + + _, ret = acl.mdl.add_dataset_buffer(dataset, data) + if ret != ACL_ERROR_NONE: + ret = acl.destroy_data_buffer(dataset) + check_ret("acl.destroy_data_buffer", ret) + + def _data_from_host_to_device(self, images): + self._data_interaction(images, ACL_MEMCPY_HOST_TO_DEVICE) + self._gen_dataset("in") + self._gen_dataset("out") + + def _data_from_device_to_host(self): + res = [] + self._data_interaction(res, ACL_MEMCPY_DEVICE_TO_HOST) + output = self.get_result(self.output_data_host) + return output + + def _destroy_databuffer(self): + for dataset in [self.load_input_dataset, self.load_output_dataset]: + if not dataset: + continue + + num = acl.mdl.get_dataset_num_buffers(dataset) + for i in range(num): + data_buf = acl.mdl.get_dataset_buffer(dataset, i) + if data_buf: + ret = acl.destroy_data_buffer(data_buf) + check_ret("acl.destroy_data_buffer", ret) + ret = acl.mdl.destroy_dataset(dataset) + check_ret("acl.mdl.destroy_dataset", ret) + + def forward(self, input_data): + if not isinstance(input_data, (list, tuple)): + input_data = [input_data] + + self._data_from_host_to_device(input_data) + ret = acl.mdl.execute( + self.model_id, self.load_input_dataset, self.load_output_dataset) + check_ret("acl.mdl.execute", ret) + + self._destroy_databuffer() + result = self._data_from_device_to_host() + return result + + def get_result(self, output_data): + dataset = [] + for i in range(len(output_data)): + dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) + check_ret("acl.mdl.get_cur_output_dims", ret) + + data_shape = dims.get("dims") + data_type = acl.mdl.get_output_data_type(self.model_desc, i) + data_len = functools.reduce(lambda x, y: x * y, data_shape) + ftype = np.dtype(ACL_DTYPE.get(data_type)) + + size = output_data[i]["size"] + ptr = output_data[i]["buffer"] + data = acl.util.ptr_to_numpy(ptr, (size,), 1) + np_array = np.frombuffer( + bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) + np_array = np_array.reshape(data_shape) + dataset.append(np_array) + return dataset diff --git a/ACL_PyTorch/contrib/cv/tracking/SiamRPN/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/tracking/SiamRPN/modelzoo_level.txt index d44ba5698b045b8a30e107962f295dbc24585d8c..70801afc42b6d9eb5cdd98b5430d9b2101f3146a 100644 --- a/ACL_PyTorch/contrib/cv/tracking/SiamRPN/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/tracking/SiamRPN/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/video_understanding/I3D/acl_net.py b/ACL_PyTorch/contrib/cv/video_understanding/I3D/acl_net.py index 67ab2ec0ea90652f75b12245c569c65ba55a57b7..88c12ba316f9778742960868e81bf1fe41824929 100644 --- a/ACL_PyTorch/contrib/cv/video_understanding/I3D/acl_net.py +++ b/ACL_PyTorch/contrib/cv/video_understanding/I3D/acl_net.py @@ -1,245 +1,245 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import acl -import functools - -# error code -ACL_ERROR_NONE = 0 - -# memory malloc code -ACL_MEM_MALLOC_HUGE_FIRST = 0 -ACL_MEM_MALLOC_HUGE_ONLY = 1 -ACL_MEM_MALLOC_NORMAL_ONLY = 2 - -# memory copy code -ACL_MEMCPY_HOST_TO_HOST = 0 -ACL_MEMCPY_HOST_TO_DEVICE = 1 -ACL_MEMCPY_DEVICE_TO_HOST = 2 -ACL_MEMCPY_DEVICE_TO_DEVICE = 3 - -ACL_DTYPE = { - 0: 'float32', - 1: 'float16', - 2: 'int8', - 3: 'int32', - 4: 'uint8', - 6: 'int16', - 7: 'uint16', - 8: 'uint32', - 9: 'int64', - 10: 'uint64', - 11: 'float64', - 12: 'bool', -} - -buffer_method = { - "in": acl.mdl.get_input_size_by_index, - "out": acl.mdl.get_output_size_by_index, - "outhost": acl.mdl.get_output_size_by_index -} - -def check_ret(message, ret): - if ret != ACL_ERROR_NONE: - raise Exception("{} failed ret = {}".format(message, ret)) - - -class Net(object): - def __init__(self, context, model_path, device_id=0, first=True, config_path=None): - self.device_id = device_id - self.model_path = model_path - self.model_id = None - self.context = context - - self.input_data = [] - self.output_data = [] - self.output_data_host = [] - self.model_desc = None - self.load_input_dataset = None - self.load_output_dataset = None - - self._init_resource(first, config_path) - - - def __call__(self, ori_data): - return self.forward(ori_data) - - - def __del__(self): - ret = acl.mdl.unload(self.model_id) - check_ret("acl.mdl.unload", ret) - if self.model_desc: - acl.mdl.destroy_desc(self.model_desc) - self.model_desc = None - - while self.input_data: - item = self.input_data.pop() - ret = acl.rt.free(item["buffer"]) - check_ret("acl.rt.free", ret) - - while self.output_data: - item = self.output_data.pop() - ret = acl.rt.free(item["buffer"]) - check_ret("acl.rt.free", ret) - - - def _init_resource(self, first=False, config_path=None): - # load_model - self.model_id, ret = acl.mdl.load_from_file(self.model_path) - check_ret("acl.mdl.load_from_file", ret) - - self.model_desc = acl.mdl.create_desc() - self._get_model_info() - - - def _get_model_info(self,): - ret = acl.mdl.get_desc(self.model_desc, self.model_id) - check_ret("acl.mdl.get_desc", ret) - input_size = acl.mdl.get_num_inputs(self.model_desc) - output_size = acl.mdl.get_num_outputs(self.model_desc) - self._gen_data_buffer(input_size, des="in") - self._gen_data_buffer(output_size, des="out") - self._gen_dataset_output_host(output_size, des="outhost") - - - def _gen_data_buffer(self, size, des): - func = buffer_method[des] - for i in range(size): - temp_buffer_size = func(self.model_desc, i) - temp_buffer, ret = acl.rt.malloc(temp_buffer_size, ACL_MEM_MALLOC_HUGE_FIRST) - check_ret("acl.rt.malloc", ret) - - if des == "in": - self.input_data.append({"buffer": temp_buffer, - "size": temp_buffer_size}) - elif des == "out": - self.output_data.append({"buffer": temp_buffer, - "size": temp_buffer_size}) - - - def _gen_dataset_output_host(self, size, des): - func = buffer_method[des] - for i in range(size): - temp_buffer_size = func(self.model_desc, i) - temp_buffer, ret = acl.rt.malloc_host(temp_buffer_size) - check_ret("acl.rt.malloc_host", ret) - - self.output_data_host.append({"buffer": temp_buffer, - "size": temp_buffer_size}) - - - def _data_interaction(self, dataset, policy=ACL_MEMCPY_HOST_TO_DEVICE): - temp_data_buffer = self.input_data \ - if policy == ACL_MEMCPY_HOST_TO_DEVICE \ - else self.output_data - output_malloc_cost = 0 - idx = 0 - - if len(dataset) == 0 and policy == ACL_MEMCPY_DEVICE_TO_HOST: - dataset = self.output_data_host - - for i, item in enumerate(temp_data_buffer): - if policy == ACL_MEMCPY_HOST_TO_DEVICE: - ptr = acl.util.numpy_to_ptr(dataset[i]) - ret = acl.rt.memcpy(item["buffer"], item["size"], ptr, item["size"], policy) - check_ret("acl.rt.memcpy", ret) - - else: - ptr = dataset[i]["buffer"] - ret = acl.rt.memcpy(ptr, item["size"], item["buffer"], item["size"], policy) - check_ret("acl.rt.memcpy", ret) - - - def _gen_dataset(self, type_str="input"): - dataset = acl.mdl.create_dataset() - - temp_dataset = None - if type_str == "in": - self.load_input_dataset = dataset - temp_dataset = self.input_data - else: - self.load_output_dataset = dataset - temp_dataset = self.output_data - - for item in temp_dataset: - data = acl.create_data_buffer(item["buffer"], item["size"]) - if data is None: - ret = acl.destroy_data_buffer(dataset) - check_ret("acl.destroy_data_buffer", ret) - - _, ret = acl.mdl.add_dataset_buffer(dataset, data) - if ret != ACL_ERROR_NONE: - ret = acl.destroy_data_buffer(dataset) - check_ret("acl.destroy_data_buffer", ret) - - - def _data_from_host_to_device(self, images): - self._data_interaction(images, ACL_MEMCPY_HOST_TO_DEVICE) - self._gen_dataset("in") - self._gen_dataset("out") - - - def _data_from_device_to_host(self): - res = [] - self._data_interaction(res, ACL_MEMCPY_DEVICE_TO_HOST) - output = self.get_result(self.output_data_host) - return output - - - def _destroy_databuffer(self): - for dataset in [self.load_input_dataset, self.load_output_dataset]: - if not dataset: - continue - - num = acl.mdl.get_dataset_num_buffers(dataset) - for i in range(num): - data_buf = acl.mdl.get_dataset_buffer(dataset, i) - if data_buf: - ret = acl.destroy_data_buffer(data_buf) - check_ret("acl.destroy_data_buffer", ret) - ret = acl.mdl.destroy_dataset(dataset) - check_ret("acl.mdl.destroy_dataset", ret) - - def forward(self, input_data): - if not isinstance(input_data, (list, tuple)): - input_data = [input_data] - - self._data_from_host_to_device(input_data) - ret = acl.mdl.execute(self.model_id, self.load_input_dataset, self.load_output_dataset) - check_ret("acl.mdl.execute", ret) - - self._destroy_databuffer() - result = self._data_from_device_to_host() - return result - - - def get_result(self, output_data): - dataset = [] - for i in range(len(output_data)): - dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) - check_ret("acl.mdl.get_cur_output_dims", ret) - - data_shape = dims.get("dims") - data_type = acl.mdl.get_output_data_type(self.model_desc, i) - data_len = functools.reduce(lambda x, y: x * y, data_shape) - ftype = np.dtype(ACL_DTYPE.get(data_type)) - - size = output_data[i]["size"] - ptr = output_data[i]["buffer"] - data = acl.util.ptr_to_numpy(ptr, (size,), 1) - np_array = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) - np_array = np_array.reshape(data_shape) - dataset.append(np_array) - return dataset +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import acl +import functools + +# error code +ACL_ERROR_NONE = 0 + +# memory malloc code +ACL_MEM_MALLOC_HUGE_FIRST = 0 +ACL_MEM_MALLOC_HUGE_ONLY = 1 +ACL_MEM_MALLOC_NORMAL_ONLY = 2 + +# memory copy code +ACL_MEMCPY_HOST_TO_HOST = 0 +ACL_MEMCPY_HOST_TO_DEVICE = 1 +ACL_MEMCPY_DEVICE_TO_HOST = 2 +ACL_MEMCPY_DEVICE_TO_DEVICE = 3 + +ACL_DTYPE = { + 0: 'float32', + 1: 'float16', + 2: 'int8', + 3: 'int32', + 4: 'uint8', + 6: 'int16', + 7: 'uint16', + 8: 'uint32', + 9: 'int64', + 10: 'uint64', + 11: 'float64', + 12: 'bool', +} + +buffer_method = { + "in": acl.mdl.get_input_size_by_index, + "out": acl.mdl.get_output_size_by_index, + "outhost": acl.mdl.get_output_size_by_index +} + +def check_ret(message, ret): + if ret != ACL_ERROR_NONE: + raise Exception("{} failed ret = {}".format(message, ret)) + + +class Net(object): + def __init__(self, context, model_path, device_id=0, first=True, config_path=None): + self.device_id = device_id + self.model_path = model_path + self.model_id = None + self.context = context + + self.input_data = [] + self.output_data = [] + self.output_data_host = [] + self.model_desc = None + self.load_input_dataset = None + self.load_output_dataset = None + + self._init_resource(first, config_path) + + + def __call__(self, ori_data): + return self.forward(ori_data) + + + def __del__(self): + ret = acl.mdl.unload(self.model_id) + check_ret("acl.mdl.unload", ret) + if self.model_desc: + acl.mdl.destroy_desc(self.model_desc) + self.model_desc = None + + while self.input_data: + item = self.input_data.pop() + ret = acl.rt.free(item["buffer"]) + check_ret("acl.rt.free", ret) + + while self.output_data: + item = self.output_data.pop() + ret = acl.rt.free(item["buffer"]) + check_ret("acl.rt.free", ret) + + + def _init_resource(self, first=False, config_path=None): + # load_model + self.model_id, ret = acl.mdl.load_from_file(self.model_path) + check_ret("acl.mdl.load_from_file", ret) + + self.model_desc = acl.mdl.create_desc() + self._get_model_info() + + + def _get_model_info(self,): + ret = acl.mdl.get_desc(self.model_desc, self.model_id) + check_ret("acl.mdl.get_desc", ret) + input_size = acl.mdl.get_num_inputs(self.model_desc) + output_size = acl.mdl.get_num_outputs(self.model_desc) + self._gen_data_buffer(input_size, des="in") + self._gen_data_buffer(output_size, des="out") + self._gen_dataset_output_host(output_size, des="outhost") + + + def _gen_data_buffer(self, size, des): + func = buffer_method[des] + for i in range(size): + temp_buffer_size = func(self.model_desc, i) + temp_buffer, ret = acl.rt.malloc(temp_buffer_size, ACL_MEM_MALLOC_HUGE_FIRST) + check_ret("acl.rt.malloc", ret) + + if des == "in": + self.input_data.append({"buffer": temp_buffer, + "size": temp_buffer_size}) + elif des == "out": + self.output_data.append({"buffer": temp_buffer, + "size": temp_buffer_size}) + + + def _gen_dataset_output_host(self, size, des): + func = buffer_method[des] + for i in range(size): + temp_buffer_size = func(self.model_desc, i) + temp_buffer, ret = acl.rt.malloc_host(temp_buffer_size) + check_ret("acl.rt.malloc_host", ret) + + self.output_data_host.append({"buffer": temp_buffer, + "size": temp_buffer_size}) + + + def _data_interaction(self, dataset, policy=ACL_MEMCPY_HOST_TO_DEVICE): + temp_data_buffer = self.input_data \ + if policy == ACL_MEMCPY_HOST_TO_DEVICE \ + else self.output_data + output_malloc_cost = 0 + idx = 0 + + if len(dataset) == 0 and policy == ACL_MEMCPY_DEVICE_TO_HOST: + dataset = self.output_data_host + + for i, item in enumerate(temp_data_buffer): + if policy == ACL_MEMCPY_HOST_TO_DEVICE: + ptr = acl.util.numpy_to_ptr(dataset[i]) + ret = acl.rt.memcpy(item["buffer"], item["size"], ptr, item["size"], policy) + check_ret("acl.rt.memcpy", ret) + + else: + ptr = dataset[i]["buffer"] + ret = acl.rt.memcpy(ptr, item["size"], item["buffer"], item["size"], policy) + check_ret("acl.rt.memcpy", ret) + + + def _gen_dataset(self, type_str="input"): + dataset = acl.mdl.create_dataset() + + temp_dataset = None + if type_str == "in": + self.load_input_dataset = dataset + temp_dataset = self.input_data + else: + self.load_output_dataset = dataset + temp_dataset = self.output_data + + for item in temp_dataset: + data = acl.create_data_buffer(item["buffer"], item["size"]) + if data is None: + ret = acl.destroy_data_buffer(dataset) + check_ret("acl.destroy_data_buffer", ret) + + _, ret = acl.mdl.add_dataset_buffer(dataset, data) + if ret != ACL_ERROR_NONE: + ret = acl.destroy_data_buffer(dataset) + check_ret("acl.destroy_data_buffer", ret) + + + def _data_from_host_to_device(self, images): + self._data_interaction(images, ACL_MEMCPY_HOST_TO_DEVICE) + self._gen_dataset("in") + self._gen_dataset("out") + + + def _data_from_device_to_host(self): + res = [] + self._data_interaction(res, ACL_MEMCPY_DEVICE_TO_HOST) + output = self.get_result(self.output_data_host) + return output + + + def _destroy_databuffer(self): + for dataset in [self.load_input_dataset, self.load_output_dataset]: + if not dataset: + continue + + num = acl.mdl.get_dataset_num_buffers(dataset) + for i in range(num): + data_buf = acl.mdl.get_dataset_buffer(dataset, i) + if data_buf: + ret = acl.destroy_data_buffer(data_buf) + check_ret("acl.destroy_data_buffer", ret) + ret = acl.mdl.destroy_dataset(dataset) + check_ret("acl.mdl.destroy_dataset", ret) + + def forward(self, input_data): + if not isinstance(input_data, (list, tuple)): + input_data = [input_data] + + self._data_from_host_to_device(input_data) + ret = acl.mdl.execute(self.model_id, self.load_input_dataset, self.load_output_dataset) + check_ret("acl.mdl.execute", ret) + + self._destroy_databuffer() + result = self._data_from_device_to_host() + return result + + + def get_result(self, output_data): + dataset = [] + for i in range(len(output_data)): + dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) + check_ret("acl.mdl.get_cur_output_dims", ret) + + data_shape = dims.get("dims") + data_type = acl.mdl.get_output_data_type(self.model_desc, i) + data_len = functools.reduce(lambda x, y: x * y, data_shape) + ftype = np.dtype(ACL_DTYPE.get(data_type)) + + size = output_data[i]["size"] + ptr = output_data[i]["buffer"] + data = acl.util.ptr_to_numpy(ptr, (size,), 1) + np_array = np.frombuffer(bytearray(data[:data_len * ftype.itemsize]), dtype=ftype, count=data_len) + np_array = np_array.reshape(data_shape) + dataset.append(np_array) + return dataset diff --git a/ACL_PyTorch/contrib/cv/video_understanding/I3D/i3d_inference.py b/ACL_PyTorch/contrib/cv/video_understanding/I3D/i3d_inference.py index 6fa45cf9b4bb3e57f6532f55a313d7a8cbbc0ef9..7b9e49a7b9fe2531d9c809ad23f143369a993d01 100644 --- a/ACL_PyTorch/contrib/cv/video_understanding/I3D/i3d_inference.py +++ b/ACL_PyTorch/contrib/cv/video_understanding/I3D/i3d_inference.py @@ -1,203 +1,203 @@ -# ============================================================================ -# Copyright 2018-2019 Open-MMLab. All rights reserved. -# Apache License -# Version 2.0, January 2004 -# http://www.apache.org/licenses/ -# -# TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import os.path as osp -import warnings -import numpy as np -from sys import path -path.append('/home/mmaction2-20') - -import mmcv -import torch -import torch.nn.functional as F -from mmcv import Config, DictAction -from mmcv.cnn import fuse_conv_bn -from mmcv.fileio.io import file_handlers -from mmcv.parallel import MMDataParallel, MMDistributedDataParallel -from mmcv.runner import get_dist_info, init_dist, load_checkpoint -from mmcv.runner.fp16_utils import wrap_fp16_model - -from mmaction.datasets import build_dataloader, build_dataset -from mmaction.models import build_model -from mmaction.utils import register_module_hooks - -from acl_net import Net -import acl - - - -def parse_args(): - parser = argparse.ArgumentParser( - description='i3d inference') - parser.add_argument('config', help='test config file path') - parser.add_argument( - '--out', - default=None, - help='output result file in pkl/yaml/json format') - parser.add_argument( - '--eval', - type=str, - nargs='+', - help='evaluation metrics, which depends on the dataset, e.g.,' - ' "top_k_accuracy", "mean_class_accuracy" for video dataset') - parser.add_argument( - '-bs', '--batch_size', type=int, default=1, - help='batch size') - parser.add_argument( - '--device_id', type=int, default=1, - help='device id') - parser.add_argument( - '--model', required=True, type=str, - help='i3d.om') - args = parser.parse_args() - - return args - -def check_ret(message, ret): - if ret != 0: - raise Exception("{} failed ret = {}".format(message, ret)) - - -class I3d(): - def __init__(self, device_id, model) -> None: - ret = acl.init() - check_ret("acl.init failed", ret) - ret = acl.rt.set_device(device_id) - check_ret("acl.rt.set_device failed", ret) - context, ret = acl.rt.create_context(device_id) - check_ret("acl.rt.create_context failed", ret) - self.device_id = device_id - - self.i3d_context = Net(context, model_path=model, device_id=device_id, first=True) - - def __del__(self): - del self.i3d_context - - ret = acl.rt.reset_device(self.device_id) - check_ret("acl.rt.reset_device failed", ret) - context, ret = acl.rt.get_context() - check_ret("acl.rt.get_context failed", ret) - ret = acl.rt.destroy_context(context) - check_ret("acl.rt.destroy_context failed", ret) - ret = acl.finalize() - check_ret("acl.finalize failed", ret) - - def inference(self, data_loader): - results = [] - dataset = data_loader.dataset - prog_bar = mmcv.ProgressBar(len(dataset)) - for data in data_loader: - input_data = np.array(data['imgs']) - result = self.i3d_context([input_data]) - result = torch.from_numpy(np.array(result)) - batch_size = result.shape[1] - result = result.view(result.shape[0], batch_size, -1) - result = F.softmax(result, dim=2).mean(dim=1) - result = result.numpy() - results.extend(result) - - batch_size = len(result) - for _ in range(batch_size): - prog_bar.update() - - return results - - - -def main(): - args = parse_args() - - cfg = Config.fromfile(args.config) - - # Load output_config from cfg - output_config = cfg.get('output_config', {}) - if args.out: - # Overwrite output_config from args.out - output_config = Config._merge_a_into_b( - dict(out=args.out), output_config) - - # Load eval_config from cfg - eval_config = cfg.get('eval_config', {}) - if args.eval: - # Overwrite eval_config from args.eval - eval_config = Config._merge_a_into_b( - dict(metrics=args.eval), eval_config) - - dataset_type = cfg.data.test.type - if output_config.get('out', None): - if 'output_format' in output_config: - # ugly workround to make recognition and localization the same - warnings.warn( - 'Skip checking `output_format` in localization task.') - else: - out = output_config['out'] - # make sure the dirname of the output path exists - mmcv.mkdir_or_exist(osp.dirname(out)) - _, suffix = osp.splitext(out) - if dataset_type == 'AVADataset': - assert suffix[1:] == 'csv', ('For AVADataset, the format of ' - 'the output file should be csv') - else: - assert suffix[1:] in file_handlers, ( - 'The format of the output ' - 'file should be json, pickle or yaml') - - cfg.data.test.test_mode = True - - # The flag is used to register module's hooks - cfg.setdefault('module_hooks', []) - - # build the dataloader - dataset = build_dataset(cfg.data.test, dict(test_mode=True)) - dataloader_setting = dict( - videos_per_gpu=args.batch_size, - workers_per_gpu=1, - dist=False, - shuffle=False) - dataloader_setting = dict(dataloader_setting, - **cfg.data.get('test_dataloader', {})) - data_loader = build_dataloader(dataset, **dataloader_setting) - - i3d = I3d(args.device_id, args.model) - outputs = i3d.inference(data_loader) - - rank, _ = get_dist_info() - if rank == 0: - if output_config.get('out', None): - out = output_config['out'] - print(f'\nwriting results to {out}') - dataset.dump_results(outputs, **output_config) - if eval_config: - eval_res = dataset.evaluate(outputs, **eval_config) - for name, val in eval_res.items(): - print(f'{name}: {val:.04f}') - - -if __name__ == '__main__': - main() +# ============================================================================ +# Copyright 2018-2019 Open-MMLab. All rights reserved. +# Apache License +# Version 2.0, January 2004 +# http://www.apache.org/licenses/ +# +# TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import os.path as osp +import warnings +import numpy as np +from sys import path +path.append('/home/mmaction2-20') + +import mmcv +import torch +import torch.nn.functional as F +from mmcv import Config, DictAction +from mmcv.cnn import fuse_conv_bn +from mmcv.fileio.io import file_handlers +from mmcv.parallel import MMDataParallel, MMDistributedDataParallel +from mmcv.runner import get_dist_info, init_dist, load_checkpoint +from mmcv.runner.fp16_utils import wrap_fp16_model + +from mmaction.datasets import build_dataloader, build_dataset +from mmaction.models import build_model +from mmaction.utils import register_module_hooks + +from acl_net import Net +import acl + + + +def parse_args(): + parser = argparse.ArgumentParser( + description='i3d inference') + parser.add_argument('config', help='test config file path') + parser.add_argument( + '--out', + default=None, + help='output result file in pkl/yaml/json format') + parser.add_argument( + '--eval', + type=str, + nargs='+', + help='evaluation metrics, which depends on the dataset, e.g.,' + ' "top_k_accuracy", "mean_class_accuracy" for video dataset') + parser.add_argument( + '-bs', '--batch_size', type=int, default=1, + help='batch size') + parser.add_argument( + '--device_id', type=int, default=1, + help='device id') + parser.add_argument( + '--model', required=True, type=str, + help='i3d.om') + args = parser.parse_args() + + return args + +def check_ret(message, ret): + if ret != 0: + raise Exception("{} failed ret = {}".format(message, ret)) + + +class I3d(): + def __init__(self, device_id, model) -> None: + ret = acl.init() + check_ret("acl.init failed", ret) + ret = acl.rt.set_device(device_id) + check_ret("acl.rt.set_device failed", ret) + context, ret = acl.rt.create_context(device_id) + check_ret("acl.rt.create_context failed", ret) + self.device_id = device_id + + self.i3d_context = Net(context, model_path=model, device_id=device_id, first=True) + + def __del__(self): + del self.i3d_context + + ret = acl.rt.reset_device(self.device_id) + check_ret("acl.rt.reset_device failed", ret) + context, ret = acl.rt.get_context() + check_ret("acl.rt.get_context failed", ret) + ret = acl.rt.destroy_context(context) + check_ret("acl.rt.destroy_context failed", ret) + ret = acl.finalize() + check_ret("acl.finalize failed", ret) + + def inference(self, data_loader): + results = [] + dataset = data_loader.dataset + prog_bar = mmcv.ProgressBar(len(dataset)) + for data in data_loader: + input_data = np.array(data['imgs']) + result = self.i3d_context([input_data]) + result = torch.from_numpy(np.array(result)) + batch_size = result.shape[1] + result = result.view(result.shape[0], batch_size, -1) + result = F.softmax(result, dim=2).mean(dim=1) + result = result.numpy() + results.extend(result) + + batch_size = len(result) + for _ in range(batch_size): + prog_bar.update() + + return results + + + +def main(): + args = parse_args() + + cfg = Config.fromfile(args.config) + + # Load output_config from cfg + output_config = cfg.get('output_config', {}) + if args.out: + # Overwrite output_config from args.out + output_config = Config._merge_a_into_b( + dict(out=args.out), output_config) + + # Load eval_config from cfg + eval_config = cfg.get('eval_config', {}) + if args.eval: + # Overwrite eval_config from args.eval + eval_config = Config._merge_a_into_b( + dict(metrics=args.eval), eval_config) + + dataset_type = cfg.data.test.type + if output_config.get('out', None): + if 'output_format' in output_config: + # ugly workround to make recognition and localization the same + warnings.warn( + 'Skip checking `output_format` in localization task.') + else: + out = output_config['out'] + # make sure the dirname of the output path exists + mmcv.mkdir_or_exist(osp.dirname(out)) + _, suffix = osp.splitext(out) + if dataset_type == 'AVADataset': + assert suffix[1:] == 'csv', ('For AVADataset, the format of ' + 'the output file should be csv') + else: + assert suffix[1:] in file_handlers, ( + 'The format of the output ' + 'file should be json, pickle or yaml') + + cfg.data.test.test_mode = True + + # The flag is used to register module's hooks + cfg.setdefault('module_hooks', []) + + # build the dataloader + dataset = build_dataset(cfg.data.test, dict(test_mode=True)) + dataloader_setting = dict( + videos_per_gpu=args.batch_size, + workers_per_gpu=1, + dist=False, + shuffle=False) + dataloader_setting = dict(dataloader_setting, + **cfg.data.get('test_dataloader', {})) + data_loader = build_dataloader(dataset, **dataloader_setting) + + i3d = I3d(args.device_id, args.model) + outputs = i3d.inference(data_loader) + + rank, _ = get_dist_info() + if rank == 0: + if output_config.get('out', None): + out = output_config['out'] + print(f'\nwriting results to {out}') + dataset.dump_results(outputs, **output_config) + if eval_config: + eval_res = dataset.evaluate(outputs, **eval_config) + for name, val in eval_res.items(): + print(f'{name}: {val:.04f}') + + +if __name__ == '__main__': + main() diff --git a/ACL_PyTorch/contrib/cv/video_understanding/TSN/LICENSE b/ACL_PyTorch/contrib/cv/video_understanding/TSN/LICENSE index b57dcc300309d53d367942b29e8155d46780ea9d..108309f32416efe8780cc0d8309a45c8a2ebbbd3 100644 --- a/ACL_PyTorch/contrib/cv/video_understanding/TSN/LICENSE +++ b/ACL_PyTorch/contrib/cv/video_understanding/TSN/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/video_understanding/TSN/README.md b/ACL_PyTorch/contrib/cv/video_understanding/TSN/README.md index 3b2bb4f94c69117344b0e3bd7d55efcc5e9caad6..b8c6ccf3ddcccd57b433b3a4955d6e12a9709f2e 100644 --- a/ACL_PyTorch/contrib/cv/video_understanding/TSN/README.md +++ b/ACL_PyTorch/contrib/cv/video_understanding/TSN/README.md @@ -1,96 +1,96 @@ -# 基于开源mmaction2预训练的TSN模型端到端推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - -```shell -pip3.7 install -r requirements.txt -``` - -2.获取,修改与安装开源模型代码 - -```shell -git clone https://github.com/open-mmlab/mmaction2.git -cd mmaction2 -git checkout 9ab8c2af52c561e5c789ccaf7b62f4b7679c103c -pip install -r requirements/build.txt -pip install -v -e . -cd .. -``` - - -3.获取权重文件 - -需要获取tsn_r50_1x1x3_75e_ucf101_rgb/tsn_r50_1x1x3_75e_ucf101_rgb_20201023-d85ab600.pth文件,请参考文档。 - -4.数据集 - -该模型使用UCF101的验证集进行测试,数据集下载步骤如下 - -```shell -cd ./mmaction2/tools/data/ucf101 -bash download_annotations.sh -bash download_videos.sh -bash extract_rgb_frames_opencv.sh -bash generate_videos_filelist.sh -bash generate_rawframes_filelist.sh -``` - -(可选)本项目默认将数据集存放于/opt/npu/ - -```shell -cd .. -mv /ucf101 /opt/npu/ -``` - -5.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) - 将benchmark.x86_64或benchmark.aarch64放到当前目录 - -6.使用msame工具推理 -1.首先需要获取msame工具 - -```shell -git clone https://gitee.com/ascend/tools.git -``` - -2.而后安装msame工具 - -```shell -export DDK_PATH=/usr/local/Ascend/ascend-toolkit/latest -export NPU_HOST_LIB=/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub -cd ./tools/msame -./build.sh g++ ./ -cd ../.. -``` - -3.增加执行权限 - -```shell -chmod u+x ./tools/msame/out/msame -``` - - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 - -``` -#OM model generation -bash test/pth2om.sh - -#OM model inference -bash test/eval_acc_perf.sh --datasets_path=/root/datasets - -#gpu inference -bash test/perf_gpu.sh -``` - -**评测结果:** - -| 模型 | pth精度 | 310精度 | 性能基准 | 310性能 | -| :------: | :------: | :------: | :------: | :------: | -| TSN bs1 | top1:83.03%| top1:82.84%| 22.63fps | 23.43fps | -| TSN bs4 | top1:83.03%| top1:82.84%| 21.96fps | 24.41fps | -| TSN bs8 | top1:83.03%| top1:82.84%| 22.18fps | 24.68fps | - +# 基于开源mmaction2预训练的TSN模型端到端推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + +```shell +pip3.7 install -r requirements.txt +``` + +2.获取,修改与安装开源模型代码 + +```shell +git clone https://github.com/open-mmlab/mmaction2.git +cd mmaction2 +git checkout 9ab8c2af52c561e5c789ccaf7b62f4b7679c103c +pip install -r requirements/build.txt +pip install -v -e . +cd .. +``` + + +3.获取权重文件 + +需要获取tsn_r50_1x1x3_75e_ucf101_rgb/tsn_r50_1x1x3_75e_ucf101_rgb_20201023-d85ab600.pth文件,请参考文档。 + +4.数据集 + +该模型使用UCF101的验证集进行测试,数据集下载步骤如下 + +```shell +cd ./mmaction2/tools/data/ucf101 +bash download_annotations.sh +bash download_videos.sh +bash extract_rgb_frames_opencv.sh +bash generate_videos_filelist.sh +bash generate_rawframes_filelist.sh +``` + +(可选)本项目默认将数据集存放于/opt/npu/ + +```shell +cd .. +mv /ucf101 /opt/npu/ +``` + +5.[获取benchmark工具](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/) + 将benchmark.x86_64或benchmark.aarch64放到当前目录 + +6.使用msame工具推理 +1.首先需要获取msame工具 + +```shell +git clone https://gitee.com/ascend/tools.git +``` + +2.而后安装msame工具 + +```shell +export DDK_PATH=/usr/local/Ascend/ascend-toolkit/latest +export NPU_HOST_LIB=/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub +cd ./tools/msame +./build.sh g++ ./ +cd ../.. +``` + +3.增加执行权限 + +```shell +chmod u+x ./tools/msame/out/msame +``` + + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 + +``` +#OM model generation +bash test/pth2om.sh + +#OM model inference +bash test/eval_acc_perf.sh --datasets_path=/root/datasets + +#gpu inference +bash test/perf_gpu.sh +``` + +**评测结果:** + +| 模型 | pth精度 | 310精度 | 性能基准 | 310性能 | +| :------: | :------: | :------: | :------: | :------: | +| TSN bs1 | top1:83.03%| top1:82.84%| 22.63fps | 23.43fps | +| TSN bs4 | top1:83.03%| top1:82.84%| 21.96fps | 24.41fps | +| TSN bs8 | top1:83.03%| top1:82.84%| 22.18fps | 24.68fps | + diff --git a/ACL_PyTorch/contrib/cv/video_understanding/TSN/modelzoo_level.txt b/ACL_PyTorch/contrib/cv/video_understanding/TSN/modelzoo_level.txt index 403465b84e39e2cc8a387c33aaf5a1043f8d267a..ec6168981c278bbe672c13a4eb251b6ec184eda4 100644 --- a/ACL_PyTorch/contrib/cv/video_understanding/TSN/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/cv/video_understanding/TSN/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:Perfect \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/video_understanding/TSN/pytorch2onnx.py b/ACL_PyTorch/contrib/cv/video_understanding/TSN/pytorch2onnx.py index 0bf774f4a66daf6f6e3e7f27325cbf5a1f0df854..911dc557347873079ddac88b92967233ee6f970a 100644 --- a/ACL_PyTorch/contrib/cv/video_understanding/TSN/pytorch2onnx.py +++ b/ACL_PyTorch/contrib/cv/video_understanding/TSN/pytorch2onnx.py @@ -1,193 +1,193 @@ -""" -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -============================================================================ -""" -# !/usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse - -import mmcv -import numpy as np -import torch -from mmcv.runner import load_checkpoint - -from mmaction.models import build_model - -try: - import onnx - import onnxruntime as rt -except ImportError as e: - raise ImportError(f'Please install onnx and onnxruntime first. {e}') - -try: - from mmcv.onnx.symbolic import register_extra_symbolics -except ModuleNotFoundError: - raise NotImplementedError('please update mmcv to version>=1.0.4') - - -def _convert_batchnorm(module): - """Convert the syncBNs into normal BN3ds.""" - module_output = module - if isinstance(module, torch.nn.SyncBatchNorm): - module_output = torch.nn.BatchNorm3d(module.num_features, module.eps, - module.momentum, module.affine, - module.track_running_stats) - if module.affine: - module_output.weight.data = module.weight.data.clone().detach() - module_output.bias.data = module.bias.data.clone().detach() - # keep requires_grad unchanged - module_output.weight.requires_grad = module.weight.requires_grad - module_output.bias.requires_grad = module.bias.requires_grad - module_output.running_mean = module.running_mean - module_output.running_var = module.running_var - module_output.num_batches_tracked = module.num_batches_tracked - for name, child in module.named_children(): - module_output.add_module(name, _convert_batchnorm(child)) - del module - return module_output - - -def pytorch2onnx(model, - input_shape, - opset_version=11, - show=False, - output_file='tmp.onnx', - verify=False): - """Convert pytorch model to onnx model. - - Args: - model (:obj:`nn.Module`): The pytorch model to be exported. - input_shape (tuple[int]): The input tensor shape of the model. - opset_version (int): Opset version of onnx used. Default: 11. - show (bool): Determines whether to print the onnx model architecture. - Default: False. - output_file (str): Output onnx model name. Default: 'tmp.onnx'. - verify (bool): Determines whether to verify the onnx model. - Default: False. - """ - model.cpu().eval() - - input_tensor = torch.randn(input_shape) - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - register_extra_symbolics(opset_version) - torch.onnx.export( - model, - input_tensor, - output_file, - input_names=input_names, - dynamic_axes=dynamic_axes, - output_names=output_names, - export_params=True, - keep_initializers_as_inputs=True, - verbose=show, - opset_version=11) - - print(f'Successfully exported ONNX model: {output_file}') - if verify: - # check by onnx - onnx_model = onnx.load(output_file) - onnx.checker.check_model(onnx_model) - - # check the numerical value - # get pytorch output - pytorch_result = model(input_tensor)[0].detach().numpy() - - # get onnx output - input_all = [node.name for node in onnx_model.graph.input] - input_initializer = [ - node.name for node in onnx_model.graph.initializer - ] - net_feed_input = list(set(input_all) - set(input_initializer)) - assert len(net_feed_input) == 1 - sess = rt.InferenceSession(output_file) - onnx_result = sess.run( - None, {net_feed_input[0]: input_tensor.detach().numpy()})[0] - # only compare part of results - random_class = np.random.randint(pytorch_result.shape[1]) - assert np.allclose( - pytorch_result[:, random_class], onnx_result[:, random_class] - ), 'The outputs are different between Pytorch and ONNX' - print('The numerical values are same between Pytorch and ONNX') - - -def parse_args(): - parser = argparse.ArgumentParser( - description='Convert MMAction2 models to ONNX') - parser.add_argument('config', help='test config file path', default='./mmaction2/configs/recognition/tsn/tsn_r50_1x1x3_75e_ucf101_rgb.py') - parser.add_argument('checkpoint', help='checkpoint file', default='./result_1p/tsn_r50_1x1x3_75e_ucf101_rgb.py') - parser.add_argument('--show', action='store_true', help='show onnx graph') - parser.add_argument('--output-file', type=str, default='tsn.onnx') - parser.add_argument('--opset-version', type=int, default=11) - parser.add_argument( - '--verify', - action='store_true', - help='verify the onnx model output against pytorch output') - parser.add_argument( - '--is-localizer', - action='store_true', - help='whether it is a localizer') - parser.add_argument( - '--shape', - type=int, - nargs='+', - default=[1, 75, 3, 256, 256], - help='input video size') - parser.add_argument( - '--softmax', - action='store_true', - help='wheter to add softmax layer at the end of recognizers') - args = parser.parse_args() - return args - - -if __name__ == '__main__': - args = parse_args() - - assert args.opset_version == 11, 'MMAction2 only supports opset 11 now' - - cfg = mmcv.Config.fromfile(args.config) - # import modules from string list. - - if not args.is_localizer: - cfg.model.backbone.pretrained = None - - # build the model - model = build_model( - cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) - model = _convert_batchnorm(model) - - # onnx.export does not support kwargs - if hasattr(model, 'forward_dummy'): - from functools import partial - model.forward = partial(model.forward_dummy, softmax=args.softmax) - elif hasattr(model, '_forward') and args.is_localizer: - model.forward = model._forward - else: - raise NotImplementedError( - 'Please implement the forward method for exporting.') - - checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') - - # conver model to onnx file - pytorch2onnx( - model, - args.shape, - opset_version=args.opset_version, - show=args.show, - output_file=args.output_file, +""" +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +============================================================================ +""" +# !/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse + +import mmcv +import numpy as np +import torch +from mmcv.runner import load_checkpoint + +from mmaction.models import build_model + +try: + import onnx + import onnxruntime as rt +except ImportError as e: + raise ImportError(f'Please install onnx and onnxruntime first. {e}') + +try: + from mmcv.onnx.symbolic import register_extra_symbolics +except ModuleNotFoundError: + raise NotImplementedError('please update mmcv to version>=1.0.4') + + +def _convert_batchnorm(module): + """Convert the syncBNs into normal BN3ds.""" + module_output = module + if isinstance(module, torch.nn.SyncBatchNorm): + module_output = torch.nn.BatchNorm3d(module.num_features, module.eps, + module.momentum, module.affine, + module.track_running_stats) + if module.affine: + module_output.weight.data = module.weight.data.clone().detach() + module_output.bias.data = module.bias.data.clone().detach() + # keep requires_grad unchanged + module_output.weight.requires_grad = module.weight.requires_grad + module_output.bias.requires_grad = module.bias.requires_grad + module_output.running_mean = module.running_mean + module_output.running_var = module.running_var + module_output.num_batches_tracked = module.num_batches_tracked + for name, child in module.named_children(): + module_output.add_module(name, _convert_batchnorm(child)) + del module + return module_output + + +def pytorch2onnx(model, + input_shape, + opset_version=11, + show=False, + output_file='tmp.onnx', + verify=False): + """Convert pytorch model to onnx model. + + Args: + model (:obj:`nn.Module`): The pytorch model to be exported. + input_shape (tuple[int]): The input tensor shape of the model. + opset_version (int): Opset version of onnx used. Default: 11. + show (bool): Determines whether to print the onnx model architecture. + Default: False. + output_file (str): Output onnx model name. Default: 'tmp.onnx'. + verify (bool): Determines whether to verify the onnx model. + Default: False. + """ + model.cpu().eval() + + input_tensor = torch.randn(input_shape) + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + register_extra_symbolics(opset_version) + torch.onnx.export( + model, + input_tensor, + output_file, + input_names=input_names, + dynamic_axes=dynamic_axes, + output_names=output_names, + export_params=True, + keep_initializers_as_inputs=True, + verbose=show, + opset_version=11) + + print(f'Successfully exported ONNX model: {output_file}') + if verify: + # check by onnx + onnx_model = onnx.load(output_file) + onnx.checker.check_model(onnx_model) + + # check the numerical value + # get pytorch output + pytorch_result = model(input_tensor)[0].detach().numpy() + + # get onnx output + input_all = [node.name for node in onnx_model.graph.input] + input_initializer = [ + node.name for node in onnx_model.graph.initializer + ] + net_feed_input = list(set(input_all) - set(input_initializer)) + assert len(net_feed_input) == 1 + sess = rt.InferenceSession(output_file) + onnx_result = sess.run( + None, {net_feed_input[0]: input_tensor.detach().numpy()})[0] + # only compare part of results + random_class = np.random.randint(pytorch_result.shape[1]) + assert np.allclose( + pytorch_result[:, random_class], onnx_result[:, random_class] + ), 'The outputs are different between Pytorch and ONNX' + print('The numerical values are same between Pytorch and ONNX') + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert MMAction2 models to ONNX') + parser.add_argument('config', help='test config file path', default='./mmaction2/configs/recognition/tsn/tsn_r50_1x1x3_75e_ucf101_rgb.py') + parser.add_argument('checkpoint', help='checkpoint file', default='./result_1p/tsn_r50_1x1x3_75e_ucf101_rgb.py') + parser.add_argument('--show', action='store_true', help='show onnx graph') + parser.add_argument('--output-file', type=str, default='tsn.onnx') + parser.add_argument('--opset-version', type=int, default=11) + parser.add_argument( + '--verify', + action='store_true', + help='verify the onnx model output against pytorch output') + parser.add_argument( + '--is-localizer', + action='store_true', + help='whether it is a localizer') + parser.add_argument( + '--shape', + type=int, + nargs='+', + default=[1, 75, 3, 256, 256], + help='input video size') + parser.add_argument( + '--softmax', + action='store_true', + help='wheter to add softmax layer at the end of recognizers') + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parse_args() + + assert args.opset_version == 11, 'MMAction2 only supports opset 11 now' + + cfg = mmcv.Config.fromfile(args.config) + # import modules from string list. + + if not args.is_localizer: + cfg.model.backbone.pretrained = None + + # build the model + model = build_model( + cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) + model = _convert_batchnorm(model) + + # onnx.export does not support kwargs + if hasattr(model, 'forward_dummy'): + from functools import partial + model.forward = partial(model.forward_dummy, softmax=args.softmax) + elif hasattr(model, '_forward') and args.is_localizer: + model.forward = model._forward + else: + raise NotImplementedError( + 'Please implement the forward method for exporting.') + + checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') + + # conver model to onnx file + pytorch2onnx( + model, + args.shape, + opset_version=args.opset_version, + show=args.show, + output_file=args.output_file, verify=args.verify) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/video_understanding/TSN/requirements.txt b/ACL_PyTorch/contrib/cv/video_understanding/TSN/requirements.txt index bbd1fe63604baf02c3cd5ca0d8163060bbb0d463..209cb57ec6fa0f9c302a3023d36beb6fc47d8838 100644 --- a/ACL_PyTorch/contrib/cv/video_understanding/TSN/requirements.txt +++ b/ACL_PyTorch/contrib/cv/video_understanding/TSN/requirements.txt @@ -1,12 +1,12 @@ -einops==0.3.0 -mmcv==1.3.9 -numpy==1.21.0 -onnx==1.9.0 -onnx-simplifier==0.3.6 -onnxoptimizer==0.2.6 -onnxruntime==1.8.1 -opencv-contrib-python==4.5.3.56 -opencv-python==4.5.3.56 -scipy==1.7.0 -torch==1.5.0 +einops==0.3.0 +mmcv==1.3.9 +numpy==1.21.0 +onnx==1.9.0 +onnx-simplifier==0.3.6 +onnxoptimizer==0.2.6 +onnxruntime==1.8.1 +opencv-contrib-python==4.5.3.56 +opencv-python==4.5.3.56 +scipy==1.7.0 +torch==1.5.0 torchvision==0.10.0 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/eval_acc_perf.sh b/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/eval_acc_perf.sh index 895c63c8137f3cdda4d7d65b50c2c770d257cbde..b1e35487f31ca4469cc01895d0b188355344d168 100644 --- a/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/eval_acc_perf.sh +++ b/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/eval_acc_perf.sh @@ -1,74 +1,74 @@ -#!/bin/bash - -datasets_path="/opt/npu/" - -for para in $* -do - if [[ $para == --datasets_path* ]]; then - datasets_path=`echo ${para#*=}` - fi -done - -python3.7 tsn_ucf101_preprocess.py --batch_size 1 --data_root ${datasets_path}/ucf101 --name out_bin_1 -if [ $? != 0 ]; then - echo "fail!" - exit -1 -fi -python3.7 tsn_ucf101_preprocess.py --batch_size 8 --data_root ${datasets_path}/ucf101 --name out_bin_8 -if [ $? != 0 ]; then - echo "fail!" - exit -1 -fi - -source env.sh -mkdir -p output/out_bs1 -rm -rf output/out_bs1/* -./tools/msame/out/msame --model ./om/tsn_1.om --input /opt/npu/ucf101/out_bin_1 --output ./output/out_bs1/ --outfmt TXT -if [ $? != 0 ]; then - echo "fail!" - exit -1 -fi -mkdir -p output/out_bs8 -rm -rf output/out_bs8/* -./tools/msame/out/msame --model ./om/tsn_8.om --input /opt/npu/ucf101/out_bin_8 --output ./output/out_bs8/ --outfmt TXT -if [ $? != 0 ]; then - echo "fail!" - exit -1 -fi - -mkdir -p result -./benchmark.x86_64 -round=20 -om_path=./om/tsn_1.om -device_id=0 -batch_size=1 -if [ $? != 0 ]; then - echo "fail!" - exit -1 -fi -./benchmark.x86_64 -round=20 -om_path=./om/tsn_8.om -device_id=0 -batch_size=8 -if [ $? != 0 ]; then - echo "fail!" - exit -1 -fi - -echo "====accuracy data====" -python3.7 tsn_ucf101_postprocess.py --result_path ./output/out_bs1 --info_path ${datasets_path}/ucf101/ucf101_1.info --batch_size 1 -if [ $? != 0 ]; then - echo "fail!" - exit -1 -fi -python3.7 tsn_ucf101_postprocess.py --result_path ./output/out_bs8 --info_path ${datasets_path}/ucf101/ucf101_8.info --batch_size 8 -if [ $? != 0 ]; then - echo "fail!" - exit -1 -fi - -echo "====performance data====" -python3.7 test/parse.py result/PureInfer_perf_of_tsn_1_in_device_0.txt -if [ $? != 0 ]; then - echo "fail!" - exit -1 -fi -python3.7 test/parse.py result/PureInfer_perf_of_tsn_8_in_device_0.txt -if [ $? != 0 ]; then - echo "fail!" - exit -1 -fi +#!/bin/bash + +datasets_path="/opt/npu/" + +for para in $* +do + if [[ $para == --datasets_path* ]]; then + datasets_path=`echo ${para#*=}` + fi +done + +python3.7 tsn_ucf101_preprocess.py --batch_size 1 --data_root ${datasets_path}/ucf101 --name out_bin_1 +if [ $? != 0 ]; then + echo "fail!" + exit -1 +fi +python3.7 tsn_ucf101_preprocess.py --batch_size 8 --data_root ${datasets_path}/ucf101 --name out_bin_8 +if [ $? != 0 ]; then + echo "fail!" + exit -1 +fi + +source env.sh +mkdir -p output/out_bs1 +rm -rf output/out_bs1/* +./tools/msame/out/msame --model ./om/tsn_1.om --input /opt/npu/ucf101/out_bin_1 --output ./output/out_bs1/ --outfmt TXT +if [ $? != 0 ]; then + echo "fail!" + exit -1 +fi +mkdir -p output/out_bs8 +rm -rf output/out_bs8/* +./tools/msame/out/msame --model ./om/tsn_8.om --input /opt/npu/ucf101/out_bin_8 --output ./output/out_bs8/ --outfmt TXT +if [ $? != 0 ]; then + echo "fail!" + exit -1 +fi + +mkdir -p result +./benchmark.x86_64 -round=20 -om_path=./om/tsn_1.om -device_id=0 -batch_size=1 +if [ $? != 0 ]; then + echo "fail!" + exit -1 +fi +./benchmark.x86_64 -round=20 -om_path=./om/tsn_8.om -device_id=0 -batch_size=8 +if [ $? != 0 ]; then + echo "fail!" + exit -1 +fi + +echo "====accuracy data====" +python3.7 tsn_ucf101_postprocess.py --result_path ./output/out_bs1 --info_path ${datasets_path}/ucf101/ucf101_1.info --batch_size 1 +if [ $? != 0 ]; then + echo "fail!" + exit -1 +fi +python3.7 tsn_ucf101_postprocess.py --result_path ./output/out_bs8 --info_path ${datasets_path}/ucf101/ucf101_8.info --batch_size 8 +if [ $? != 0 ]; then + echo "fail!" + exit -1 +fi + +echo "====performance data====" +python3.7 test/parse.py result/PureInfer_perf_of_tsn_1_in_device_0.txt +if [ $? != 0 ]; then + echo "fail!" + exit -1 +fi +python3.7 test/parse.py result/PureInfer_perf_of_tsn_8_in_device_0.txt +if [ $? != 0 ]; then + echo "fail!" + exit -1 +fi echo "success" \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/parse.py b/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/parse.py index bc45f3df1869dd99cf5606f66ab62302abef9c99..9a9507ebece324428ca74a8e5991b894277c2d86 100644 --- a/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/parse.py +++ b/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/parse.py @@ -1,37 +1,37 @@ -""" -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -============================================================================ -""" - -import sys -import re - -if __name__ == '__main__': - result_txt = sys.argv[1] - if 'PureInfer' in result_txt: # Pure Infer - with open(result_txt, 'r') as f: - content = f.read() - txt_data_list = [i.strip() for i in re.findall(r'=(.*?),', content.replace('\n', ',') + ',')] - fps = float(txt_data_list[0].replace('samples/s', '')) * 4 - print('310 {} fps:{}'.format(result_txt.split('_')[3], fps)) - else: # Infer based on dataset - with open(result_txt, 'r') as f: - lines = f.readlines() - for line in lines: - if 'infer' in line: - txt_data_list = [i.strip() for i in re.findall(r':(.*?),', line.replace('\n', ',') + ',')] - fps = float(txt_data_list[1]) * 4 - print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) +""" +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +============================================================================ +""" + +import sys +import re + +if __name__ == '__main__': + result_txt = sys.argv[1] + if 'PureInfer' in result_txt: # Pure Infer + with open(result_txt, 'r') as f: + content = f.read() + txt_data_list = [i.strip() for i in re.findall(r'=(.*?),', content.replace('\n', ',') + ',')] + fps = float(txt_data_list[0].replace('samples/s', '')) * 4 + print('310 {} fps:{}'.format(result_txt.split('_')[3], fps)) + else: # Infer based on dataset + with open(result_txt, 'r') as f: + lines = f.readlines() + for line in lines: + if 'infer' in line: + txt_data_list = [i.strip() for i in re.findall(r':(.*?),', line.replace('\n', ',') + ',')] + fps = float(txt_data_list[1]) * 4 + print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) break \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/perf_gpu.sh b/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/perf_gpu.sh index 41c7a98786ffbd91dd6a1c4a8321f5bb8cd5071c..b182f1c43f2f2ffe2b968ad24116e5ae86319979 100644 --- a/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/perf_gpu.sh +++ b/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/perf_gpu.sh @@ -1,31 +1,31 @@ -#!/bin/bash -# GPU上执行: -trtexec --onnx=onnx_sim/tsn_1.onnx --fp16 --shapes=video:1x75x3x224x224 > tsn_1.log -perf_str=`grep "GPU.* mean.*ms$" tsn_bs1.log` -if [ -n "$perf_str" ]; then - perf_num=`echo $perf_str | awk -F' ' '{print $16}'` -else - perf_str=`grep "mean.*ms$" tsn_bs1.log` - perf_num=`echo $perf_str | awk -F' ' '{print $4}'` -fi -awk 'BEGIN{printf "t4 bs1 fps:%.3f\n", 1000*1/('$perf_num'/1)}' - -trtexec --onnx=onnx_sim/tsn_4.onnx --fp16 --shapes=video:4x75x3x224x224 > tsn_4.log -perf_str=`grep "GPU.* mean.*ms$" tsn_bs16.log` -if [ -n "$perf_str" ]; then - perf_num=`echo $perf_str | awk -F' ' '{print $16}'` -else - perf_str=`grep "mean.*ms$" tsn_bs16.log` - perf_num=`echo $perf_str | awk -F' ' '{print $4}'` -fi -awk 'BEGIN{printf "t4 bs4 fps:%.3f\n", 1000*1/('$perf_num'/16)}' - -trtexec --onnx=onnx_sim/tsn_8.onnx --fp16 --shapes=video:8x75x3x224x224 > tsn_8.log -perf_str=`grep "GPU.* mean.*ms$" tsn_bs16.log` -if [ -n "$perf_str" ]; then - perf_num=`echo $perf_str | awk -F' ' '{print $16}'` -else - perf_str=`grep "mean.*ms$" tsn_bs16.log` - perf_num=`echo $perf_str | awk -F' ' '{print $4}'` -fi +#!/bin/bash +# GPU上执行: +trtexec --onnx=onnx_sim/tsn_1.onnx --fp16 --shapes=video:1x75x3x224x224 > tsn_1.log +perf_str=`grep "GPU.* mean.*ms$" tsn_bs1.log` +if [ -n "$perf_str" ]; then + perf_num=`echo $perf_str | awk -F' ' '{print $16}'` +else + perf_str=`grep "mean.*ms$" tsn_bs1.log` + perf_num=`echo $perf_str | awk -F' ' '{print $4}'` +fi +awk 'BEGIN{printf "t4 bs1 fps:%.3f\n", 1000*1/('$perf_num'/1)}' + +trtexec --onnx=onnx_sim/tsn_4.onnx --fp16 --shapes=video:4x75x3x224x224 > tsn_4.log +perf_str=`grep "GPU.* mean.*ms$" tsn_bs16.log` +if [ -n "$perf_str" ]; then + perf_num=`echo $perf_str | awk -F' ' '{print $16}'` +else + perf_str=`grep "mean.*ms$" tsn_bs16.log` + perf_num=`echo $perf_str | awk -F' ' '{print $4}'` +fi +awk 'BEGIN{printf "t4 bs4 fps:%.3f\n", 1000*1/('$perf_num'/16)}' + +trtexec --onnx=onnx_sim/tsn_8.onnx --fp16 --shapes=video:8x75x3x224x224 > tsn_8.log +perf_str=`grep "GPU.* mean.*ms$" tsn_bs16.log` +if [ -n "$perf_str" ]; then + perf_num=`echo $perf_str | awk -F' ' '{print $16}'` +else + perf_str=`grep "mean.*ms$" tsn_bs16.log` + perf_num=`echo $perf_str | awk -F' ' '{print $4}'` +fi awk 'BEGIN{printf "t4 bs8 fps:%.3f\n", 1000*1/('$perf_num'/16)}' \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/pth2om.sh b/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/pth2om.sh index 28d656c4d83eaaa5cc438d47157ffbb125220e94..487fffdef77aaf288c31d44c7b4bc4b302afdb26 100644 --- a/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/pth2om.sh +++ b/ACL_PyTorch/contrib/cv/video_understanding/TSN/test/pth2om.sh @@ -1,13 +1,13 @@ -#!/bin/bash -source env.sh -python3.7 pytorch2onnx.py ./mmaction2/configs/recognition/tsn/tsn_r50_1x1x3_75e_ucf101_rgb.py ./tsn_r50_1x1x3_75e_ucf101_rgb_20201023-d85ab600.pth --verify - -mkdir -p om -atc --framework=5 --model=tsn.onnx --output=tsn_1 --input_format=NCDHW --input_shape="image:1,75,3,256,256" --log=debug --soc_version=Ascend310 --auto_tune_mode "RL,GA" -atc --framework=5 --model=tsn.onnx --output=tsn_4 --input_format=NCDHW --input_shape="image:4,75,3,256,256" --log=debug --soc_version=Ascend310 --auto_tune_mode "RL,GA" -atc --framework=5 --model=tsn.onnx --output=tsn_8 --input_format=NCDHW --input_shape="image:8,75,3,256,256" --log=debug --soc_version=Ascend310 --auto_tune_mode "RL,GA" -if [ -f "om/tsm_bs1.om" ] && [ -f "om/tsm_bs16.om" ]; then - echo "success" -else - echo "fail!" +#!/bin/bash +source env.sh +python3.7 pytorch2onnx.py ./mmaction2/configs/recognition/tsn/tsn_r50_1x1x3_75e_ucf101_rgb.py ./tsn_r50_1x1x3_75e_ucf101_rgb_20201023-d85ab600.pth --verify + +mkdir -p om +atc --framework=5 --model=tsn.onnx --output=tsn_1 --input_format=NCDHW --input_shape="image:1,75,3,256,256" --log=debug --soc_version=Ascend310 --auto_tune_mode "RL,GA" +atc --framework=5 --model=tsn.onnx --output=tsn_4 --input_format=NCDHW --input_shape="image:4,75,3,256,256" --log=debug --soc_version=Ascend310 --auto_tune_mode "RL,GA" +atc --framework=5 --model=tsn.onnx --output=tsn_8 --input_format=NCDHW --input_shape="image:8,75,3,256,256" --log=debug --soc_version=Ascend310 --auto_tune_mode "RL,GA" +if [ -f "om/tsm_bs1.om" ] && [ -f "om/tsm_bs16.om" ]; then + echo "success" +else + echo "fail!" fi \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/video_understanding/TSN/tsn_ucf101_postprocess.py b/ACL_PyTorch/contrib/cv/video_understanding/TSN/tsn_ucf101_postprocess.py index bc538bd927e1b3fae90c3302448469d799a1e330..ee3c2861103f42b7de445b97d87e4675d8d1cfe0 100644 --- a/ACL_PyTorch/contrib/cv/video_understanding/TSN/tsn_ucf101_postprocess.py +++ b/ACL_PyTorch/contrib/cv/video_understanding/TSN/tsn_ucf101_postprocess.py @@ -1,59 +1,59 @@ -""" -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -============================================================================ -""" -# !/usr/bin/env python -# -*- coding: utf-8 -*- - -import os -import argparse -import numpy as np -from collections import OrderedDict -from mmaction.core import top_k_accuracy - - -def parse_args(): - parser = argparse.ArgumentParser(description='Dataset UCF101 Postprocessing') - parser.add_argument('--result_path', type=str) - parser.add_argument('--info_path', type=str) - parser.add_argument('--batch_size', type=int, default=1) - - args = parser.parse_args() - - return args - - -def main(): - args = parse_args() - with open(args.info_path,"r") as f: - l = list(map(lambda x:int(x.strip()), f.readlines())) - - num_samples = len(l) // args.batch_size - i = 0 - acc = 0 - while i < num_samples: - with open(args.result_path+str(i)+'_output_0.txt', 'r') as f: - lines = f.readlines() - lines = list(map(lambda x:x.strip().split(), lines)) - lines = np.array([[float(lines[m][n]) for n in range(101)]for m in range(args.batch_size)]).argmax(1) - for k in range(args.batch_size): - acc += int(lines[k] == l[i*args.batch_size + k]) - i += 1 - - print(acc / len(l)) - - -if __name__ == '__main__': +""" +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +============================================================================ +""" +# !/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import argparse +import numpy as np +from collections import OrderedDict +from mmaction.core import top_k_accuracy + + +def parse_args(): + parser = argparse.ArgumentParser(description='Dataset UCF101 Postprocessing') + parser.add_argument('--result_path', type=str) + parser.add_argument('--info_path', type=str) + parser.add_argument('--batch_size', type=int, default=1) + + args = parser.parse_args() + + return args + + +def main(): + args = parse_args() + with open(args.info_path,"r") as f: + l = list(map(lambda x:int(x.strip()), f.readlines())) + + num_samples = len(l) // args.batch_size + i = 0 + acc = 0 + while i < num_samples: + with open(args.result_path+str(i)+'_output_0.txt', 'r') as f: + lines = f.readlines() + lines = list(map(lambda x:x.strip().split(), lines)) + lines = np.array([[float(lines[m][n]) for n in range(101)]for m in range(args.batch_size)]).argmax(1) + for k in range(args.batch_size): + acc += int(lines[k] == l[i*args.batch_size + k]) + i += 1 + + print(acc / len(l)) + + +if __name__ == '__main__': main() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/knowledge/RotatE/LICENSE b/ACL_PyTorch/contrib/knowledge/RotatE/LICENSE index 797bf40e85c5d2986ebcec9cb51aed979ca88b82..04adf5cbc620ad190547b092fa449e36df5f7bf4 100644 --- a/ACL_PyTorch/contrib/knowledge/RotatE/LICENSE +++ b/ACL_PyTorch/contrib/knowledge/RotatE/LICENSE @@ -1,203 +1,203 @@ -Copyright 2018-2019 Open-MMLab. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018-2019 Open-MMLab. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Copyright 2018-2019 Open-MMLab. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2019 Open-MMLab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ACL_PyTorch/contrib/knowledge/RotatE/ReadME.md b/ACL_PyTorch/contrib/knowledge/RotatE/ReadME.md index 0d027928ada725aa4ad1188c56997c95ddad59aa..de6807b88d3beba1ab2a07e05bbb6e048c786d0a 100644 --- a/ACL_PyTorch/contrib/knowledge/RotatE/ReadME.md +++ b/ACL_PyTorch/contrib/knowledge/RotatE/ReadME.md @@ -1,41 +1,41 @@ -# RotatE模型PyTorch离线推理指导 - -## 1 环境准备 - -- **1.1 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装** - -``` -pip3.7 install -r requirements.txt -``` - -- **1.2 获取,修改与安装开源模型代码** - -``` -git clone https://github.com/DeepGraphLearning/KnowledgeGraphEmbedding -b master -cd KnowledgeGraphEmbedding -git reset --hard 2e440e0f9c687314d5ff67ead68ce985dc446e3a -cd .. -``` -- **1.3 [获取权重文件](https://www.aliyundrive.com/drive/folder/616a7eb758db2df6ae8448e4b34fe570510ad216)** - -- **1.4 开源模型代码里包含有数据集** - -- **1.5 获取[msame工具](https://gitee.com/ascend/tools/tree/master/msame)和[benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer)** - -将msame和benchmark.x86_64(或benchmark.aarch64)放到当前目录 - -## 2 离线推理 - -- **310上执行,执行时使npu-smi info查看设备状态,确保device空闲** - -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh -``` - -- **评测结果:** - -| 模型 | pth精度 | 310精度 | 性能基准 | 310性能 | -| ----------- | ------------------------------------------------------------ | ------- | -------------- | --------------- | -| RotatE-head bs1
RotatE-tail bs1| [**MRR:0.337**](https://github.com/DeepGraphLearning/KnowledgeGraphEmbedding) | MRR:0.336 | 21.9065fps
21.9091fps | 99.3504fps
104.9432fps | -| RotatE-head bs16
RotatE-tail bs16 | [**MRR:0.337**](https://github.com/DeepGraphLearning/KnowledgeGraphEmbedding) | MRR:0.336 | 22.2017fps
22.1964fps | 119.9172fps
129.7252fps | +# RotatE模型PyTorch离线推理指导 + +## 1 环境准备 + +- **1.1 安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装** + +``` +pip3.7 install -r requirements.txt +``` + +- **1.2 获取,修改与安装开源模型代码** + +``` +git clone https://github.com/DeepGraphLearning/KnowledgeGraphEmbedding -b master +cd KnowledgeGraphEmbedding +git reset --hard 2e440e0f9c687314d5ff67ead68ce985dc446e3a +cd .. +``` +- **1.3 [获取权重文件](https://www.aliyundrive.com/drive/folder/616a7eb758db2df6ae8448e4b34fe570510ad216)** + +- **1.4 开源模型代码里包含有数据集** + +- **1.5 获取[msame工具](https://gitee.com/ascend/tools/tree/master/msame)和[benchmark工具](https://gitee.com/ascend/cann-benchmark/tree/master/infer)** + +将msame和benchmark.x86_64(或benchmark.aarch64)放到当前目录 + +## 2 离线推理 + +- **310上执行,执行时使npu-smi info查看设备状态,确保device空闲** + +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh +``` + +- **评测结果:** + +| 模型 | pth精度 | 310精度 | 性能基准 | 310性能 | +| ----------- | ------------------------------------------------------------ | ------- | -------------- | --------------- | +| RotatE-head bs1
RotatE-tail bs1| [**MRR:0.337**](https://github.com/DeepGraphLearning/KnowledgeGraphEmbedding) | MRR:0.336 | 21.9065fps
21.9091fps | 99.3504fps
104.9432fps | +| RotatE-head bs16
RotatE-tail bs16 | [**MRR:0.337**](https://github.com/DeepGraphLearning/KnowledgeGraphEmbedding) | MRR:0.336 | 22.2017fps
22.1964fps | 119.9172fps
129.7252fps | diff --git a/ACL_PyTorch/contrib/knowledge/RotatE/get_info.py b/ACL_PyTorch/contrib/knowledge/RotatE/get_info.py index 7900a12b52190677dad622a0419652d7dea3ab18..07bf03fd842145b73cc76227b5aad2f45954e9e5 100644 --- a/ACL_PyTorch/contrib/knowledge/RotatE/get_info.py +++ b/ACL_PyTorch/contrib/knowledge/RotatE/get_info.py @@ -1,33 +1,33 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import pdb -import sys -from glob import glob - - -def get_bin_info(file_path, info_name): - bin_files = glob(os.path.join(file_path, '*.npz')) - with open(info_name, 'w') as file: - for index, img in enumerate(bin_files): - # pdb.set_trace() - content = ' '.join([str(index), img]) - file.write(content) - file.write('\n') - -if __name__ == '__main__': - file_path = sys.argv[1] - info_name = sys.argv[2] - get_bin_info(file_path, info_name) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import pdb +import sys +from glob import glob + + +def get_bin_info(file_path, info_name): + bin_files = glob(os.path.join(file_path, '*.npz')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_files): + # pdb.set_trace() + content = ' '.join([str(index), img]) + file.write(content) + file.write('\n') + +if __name__ == '__main__': + file_path = sys.argv[1] + info_name = sys.argv[2] + get_bin_info(file_path, info_name) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/knowledge/RotatE/modelzoo_level.txt b/ACL_PyTorch/contrib/knowledge/RotatE/modelzoo_level.txt index 83689985f26624b65a4c5ebb5f00a152618799ba..8c469d858afccf3026a8640799938f8de7b46fac 100644 --- a/ACL_PyTorch/contrib/knowledge/RotatE/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/knowledge/RotatE/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:PERFECT \ No newline at end of file diff --git a/ACL_PyTorch/contrib/knowledge/RotatE/requirements.txt b/ACL_PyTorch/contrib/knowledge/RotatE/requirements.txt index 41c2d7836f0d565c237e43c4319b4daf21b8dc4a..a220c00791e780651967dce844afcb4de38eb291 100644 --- a/ACL_PyTorch/contrib/knowledge/RotatE/requirements.txt +++ b/ACL_PyTorch/contrib/knowledge/RotatE/requirements.txt @@ -1,5 +1,5 @@ -torch == 1.8.0 -torchvision == 0.9.0 -onnx == 1.9.0 -numpy == 1.20.3 +torch == 1.8.0 +torchvision == 0.9.0 +onnx == 1.9.0 +numpy == 1.20.3 scikit-learn >= 0.20.2 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/knowledge/RotatE/rotate_postprocess.py b/ACL_PyTorch/contrib/knowledge/RotatE/rotate_postprocess.py index eb605845588b3a10d2e3bb6a2ad51d613eed7a98..2923c802bd24de92b98bbf8875b7669b03ef0204 100644 --- a/ACL_PyTorch/contrib/knowledge/RotatE/rotate_postprocess.py +++ b/ACL_PyTorch/contrib/knowledge/RotatE/rotate_postprocess.py @@ -1,121 +1,121 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import argparse -import pdb - -import torch -import os -import numpy as np - -def postProcesss(result_path, data_head, data_tail): - - data_mulu = os.listdir(result_path) - data_mulu.sort(key=lambda x: int(x.split('_')[1][0:])) - head_list_date = data_mulu[0] - head_mulu_path = os.path.join(result_path, head_list_date) - bin_head_list = os.listdir(head_mulu_path) - bin_head_list.sort(key=lambda x: int(x.split('-')[0][3:])) - - tail_list_date = data_mulu[1] - tail_mulu_path = os.path.join(result_path, tail_list_date) - bin_tail_list = os.listdir(tail_mulu_path) - bin_tail_list.sort(key=lambda x: int(x.split('-')[0][3:])) - head_ite_list = os.listdir(data_head+'/post') - tail_ite_list = os.listdir(data_tail+'/post') - - head_pos_list = os.listdir(data_head+'/possamp') - tail_pos_list = os.listdir(data_head + '/possamp') - head_ite_list.sort(key=lambda x: int(x.split('-')[0][3:])) - tail_ite_list.sort(key=lambda x: int(x.split('-')[0][3:])) - head_pos_list.sort(key=lambda x: int(x.split('-')[0][3:])) - tail_pos_list.sort(key=lambda x: int(x.split('-')[0][3:])) - - logs = [] - for i in range(len(bin_head_list)): - bin_path = os.path.join(head_mulu_path, bin_head_list[i]) - score = np.loadtxt(bin_path) - score = torch.from_numpy(score) - ite_path = os.path.join(data_head+'/post', head_ite_list[i]) - filter_bias = np.loadtxt(ite_path) - filter_bias = torch.from_numpy(filter_bias) - pos_path = os.path.join(data_head + '/possamp', head_pos_list[i]) - positive_sample = np.loadtxt(pos_path) - positive_sample = positive_sample.reshape(-1, 3) - score += filter_bias - score = torch.reshape(score, (-1, 14541)) - # Explicitly sort all the entities to ensure that there is no test exposure bias - argsort = torch.argsort(score, dim=1, descending=True) - positive_arg = positive_sample[:, 0] - - for i in range(len(score)): - # Notice that argsort is not ranking - ranking = (argsort[i, :] == positive_arg[i]).nonzero() - assert ranking.size(0) == 1 - # ranking + 1 is the true ranking used in evaluation metrics - ranking = 1 + ranking.item() - logs.append({ - 'MRR': 1.0 / ranking, - 'MR': float(ranking), - 'HITS@1': 1.0 if ranking <= 1 else 0.0, - 'HITS@3': 1.0 if ranking <= 3 else 0.0, - 'HITS@10': 1.0 if ranking <= 10 else 0.0, - }) - for i in range(len(bin_tail_list)): - bin_path = os.path.join(tail_mulu_path, bin_tail_list[i]) - score = np.loadtxt(bin_path) - score = torch.from_numpy(score) - ite_path = os.path.join(data_tail + '/post', tail_ite_list[i]) - filter_bias = np.loadtxt(ite_path) - filter_bias = torch.from_numpy(filter_bias) - pos_path = os.path.join(data_tail + '/possamp', tail_pos_list[i]) - positive_sample = np.loadtxt(pos_path) - positive_sample = positive_sample.reshape(-1,3) - score += filter_bias - score = torch.reshape(score,(-1,14541)) - - # Explicitly sort all the entities to ensure that there is no test exposure bias - argsort = torch.argsort(score, dim=1, descending=True) - positive_arg = positive_sample[:, 2] - - for i in range(len(score)): - # Notice that argsort is not ranking - ranking = (argsort[i, :] == positive_arg[i]).nonzero() - assert ranking.size(0) == 1 - # ranking + 1 is the true ranking used in evaluation metrics - ranking = 1 + ranking.item() - logs.append({ - 'MRR': 1.0 / ranking, - 'MR': float(ranking), - 'HITS@1': 1.0 if ranking <= 1 else 0.0, - 'HITS@3': 1.0 if ranking <= 3 else 0.0, - 'HITS@10': 1.0 if ranking <= 10 else 0.0, - }) - - metrics = {} - for metric in logs[0].keys(): - metrics[metric] = sum([log[metric] for log in logs]) / len(logs) - - return metrics - - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='postprocess of r2plus1d') - - parser.add_argument('--result_path', default=r'E:/huawei/KGE_inference/out') - parser.add_argument('--data_head', default=r'E:/huawei/KGE_inference/bin/head') - parser.add_argument('--data_tail', default=r'E:/huawei/KGE_inference/bin/tail') - opt = parser.parse_args() - metrics = postProcesss(opt.result_path, opt.data_head, opt.data_tail) - print(metrics) - +# Copyright 2021 Huawei Technologies Co., Ltd +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import pdb + +import torch +import os +import numpy as np + +def postProcesss(result_path, data_head, data_tail): + + data_mulu = os.listdir(result_path) + data_mulu.sort(key=lambda x: int(x.split('_')[1][0:])) + head_list_date = data_mulu[0] + head_mulu_path = os.path.join(result_path, head_list_date) + bin_head_list = os.listdir(head_mulu_path) + bin_head_list.sort(key=lambda x: int(x.split('-')[0][3:])) + + tail_list_date = data_mulu[1] + tail_mulu_path = os.path.join(result_path, tail_list_date) + bin_tail_list = os.listdir(tail_mulu_path) + bin_tail_list.sort(key=lambda x: int(x.split('-')[0][3:])) + head_ite_list = os.listdir(data_head+'/post') + tail_ite_list = os.listdir(data_tail+'/post') + + head_pos_list = os.listdir(data_head+'/possamp') + tail_pos_list = os.listdir(data_head + '/possamp') + head_ite_list.sort(key=lambda x: int(x.split('-')[0][3:])) + tail_ite_list.sort(key=lambda x: int(x.split('-')[0][3:])) + head_pos_list.sort(key=lambda x: int(x.split('-')[0][3:])) + tail_pos_list.sort(key=lambda x: int(x.split('-')[0][3:])) + + logs = [] + for i in range(len(bin_head_list)): + bin_path = os.path.join(head_mulu_path, bin_head_list[i]) + score = np.loadtxt(bin_path) + score = torch.from_numpy(score) + ite_path = os.path.join(data_head+'/post', head_ite_list[i]) + filter_bias = np.loadtxt(ite_path) + filter_bias = torch.from_numpy(filter_bias) + pos_path = os.path.join(data_head + '/possamp', head_pos_list[i]) + positive_sample = np.loadtxt(pos_path) + positive_sample = positive_sample.reshape(-1, 3) + score += filter_bias + score = torch.reshape(score, (-1, 14541)) + # Explicitly sort all the entities to ensure that there is no test exposure bias + argsort = torch.argsort(score, dim=1, descending=True) + positive_arg = positive_sample[:, 0] + + for i in range(len(score)): + # Notice that argsort is not ranking + ranking = (argsort[i, :] == positive_arg[i]).nonzero() + assert ranking.size(0) == 1 + # ranking + 1 is the true ranking used in evaluation metrics + ranking = 1 + ranking.item() + logs.append({ + 'MRR': 1.0 / ranking, + 'MR': float(ranking), + 'HITS@1': 1.0 if ranking <= 1 else 0.0, + 'HITS@3': 1.0 if ranking <= 3 else 0.0, + 'HITS@10': 1.0 if ranking <= 10 else 0.0, + }) + for i in range(len(bin_tail_list)): + bin_path = os.path.join(tail_mulu_path, bin_tail_list[i]) + score = np.loadtxt(bin_path) + score = torch.from_numpy(score) + ite_path = os.path.join(data_tail + '/post', tail_ite_list[i]) + filter_bias = np.loadtxt(ite_path) + filter_bias = torch.from_numpy(filter_bias) + pos_path = os.path.join(data_tail + '/possamp', tail_pos_list[i]) + positive_sample = np.loadtxt(pos_path) + positive_sample = positive_sample.reshape(-1,3) + score += filter_bias + score = torch.reshape(score,(-1,14541)) + + # Explicitly sort all the entities to ensure that there is no test exposure bias + argsort = torch.argsort(score, dim=1, descending=True) + positive_arg = positive_sample[:, 2] + + for i in range(len(score)): + # Notice that argsort is not ranking + ranking = (argsort[i, :] == positive_arg[i]).nonzero() + assert ranking.size(0) == 1 + # ranking + 1 is the true ranking used in evaluation metrics + ranking = 1 + ranking.item() + logs.append({ + 'MRR': 1.0 / ranking, + 'MR': float(ranking), + 'HITS@1': 1.0 if ranking <= 1 else 0.0, + 'HITS@3': 1.0 if ranking <= 3 else 0.0, + 'HITS@10': 1.0 if ranking <= 10 else 0.0, + }) + + metrics = {} + for metric in logs[0].keys(): + metrics[metric] = sum([log[metric] for log in logs]) / len(logs) + + return metrics + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='postprocess of r2plus1d') + + parser.add_argument('--result_path', default=r'E:/huawei/KGE_inference/out') + parser.add_argument('--data_head', default=r'E:/huawei/KGE_inference/bin/head') + parser.add_argument('--data_tail', default=r'E:/huawei/KGE_inference/bin/tail') + opt = parser.parse_args() + metrics = postProcesss(opt.result_path, opt.data_head, opt.data_tail) + print(metrics) + diff --git a/ACL_PyTorch/contrib/knowledge/RotatE/rotate_preprocess.py b/ACL_PyTorch/contrib/knowledge/RotatE/rotate_preprocess.py index a42480c8b3237ecff973f04c7b0c7fe6cac3417d..92585dc2898a20100811cabb26b5eff75e535a13 100644 --- a/ACL_PyTorch/contrib/knowledge/RotatE/rotate_preprocess.py +++ b/ACL_PyTorch/contrib/knowledge/RotatE/rotate_preprocess.py @@ -1,227 +1,227 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import sys -sys.path.append(r'KnowledgeGraphEmbedding/codes/') -import logging -import os -import io -import pdb -import torch -import numpy as np - -import time -from torch.utils.data import DataLoader -import dataloader - -nowTime = time.strftime('%Y%m%d', time.localtime(time.time())) - -def parse_args(args=None): - parser = argparse.ArgumentParser( - description='Training and Testing Knowledge Graph Embedding Models', - usage='train.py [] [-h | --help]' - ) - - parser.add_argument('--data_path', type=str, default='./KnowledgeGraphEmbedding/data/FB15k-237') - parser.add_argument('--test_batch_size', default=6, type=int, help='valid/test batch size') - parser.add_argument('-cpu', '--cpu_num', default=10, type=int) - parser.add_argument('--output_path', default='bin/', type=str) - parser.add_argument('--output_head_post', default='head/post', type=str) - parser.add_argument('--output_tail_post', default='tail/post', type=str) - parser.add_argument('--output_head_pos', default='head/pos', type=str) - parser.add_argument('--output_head_neg', default='head/neg', type=str) - parser.add_argument('--output_head_mode', default='head/mode', type=str) - parser.add_argument('--output_head_pp', default='head/possamp', type=str) - parser.add_argument('--output_head_np', default='head/negsamp', type=str) - parser.add_argument('--output_tail_pos', default='tail/pos', type=str) - parser.add_argument('--output_tail_neg', default='tail/neg', type=str) - parser.add_argument('--output_tail_mode', default='tail/mode', type=str) - parser.add_argument('--output_tail_pp', default='tail/possamp', type=str) - parser.add_argument('--output_tail_np', default='tail/negsamp', type=str) - parser.add_argument('--nentity', type=int, default=0, help='DO NOT MANUALLY SET') - parser.add_argument('--nrelation', type=int, default=0, help='DO NOT MANUALLY SET') - arg = parser.parse_args(args) - arg.output_head_post = arg.output_path + arg.output_head_post - arg.output_tail_post = arg.output_path + arg.output_tail_post - arg.output_head_pos = arg.output_path + arg.output_head_pos - arg.output_head_neg = arg.output_path + arg.output_head_neg - arg.output_head_mode = arg.output_path + arg.output_head_mode - arg.output_head_pp = arg.output_path + arg.output_head_pp - arg.output_head_np = arg.output_path + arg.output_head_np - arg.output_tail_pos = arg.output_path + arg.output_tail_pos - arg.output_tail_neg = arg.output_path + arg.output_tail_neg - arg.output_tail_mode = arg.output_path + arg.output_tail_mode - arg.output_tail_pp = arg.output_path + arg.output_tail_pp - arg.output_tail_np = arg.output_path + arg.output_tail_np - return arg - -def read_triple(file_path, entity2id, relation2id): - ''' - Read triples and map them into ids. - ''' - triples = [] - with open(file_path) as fin: - for line in fin: - h, r, t = line.strip().split('\t') - triples.append((entity2id[h], relation2id[r], entity2id[t])) - return triples - -def to_numpy32(tensor): - return tensor.detach().cpu().numpy().astype(np.int32) if tensor.requires_grad else tensor.cpu().numpy().astype(np.int32) - -def to_numpy64(tensor): - return tensor.detach().cpu().numpy().astype(np.int64) if tensor.requires_grad else tensor.cpu().numpy().astype(np.int64) - -def main(args): - - with open(os.path.join(args.data_path, 'entities.dict')) as fin: - entity2id = dict() - for line in fin: - eid, entity = line.strip().split('\t') - entity2id[entity] = int(eid) - - with open(os.path.join(args.data_path, 'relations.dict')) as fin: - relation2id = dict() - for line in fin: - rid, relation = line.strip().split('\t') - relation2id[relation] = int(rid) - - - nentity = len(entity2id) - nrelation = len(relation2id) - - args.nentity = nentity - args.nrelation = nrelation - - - train_triples = read_triple(os.path.join(args.data_path, 'train.txt'), entity2id, relation2id) - logging.info('#train: %d' % len(train_triples)) - valid_triples = read_triple(os.path.join(args.data_path, 'valid.txt'), entity2id, relation2id) - logging.info('#valid: %d' % len(valid_triples)) - test_triples = read_triple(os.path.join(args.data_path, 'test.txt'), entity2id, relation2id) - logging.info('#test: %d' % len(test_triples)) - - # All true triples - all_true_triples = train_triples + valid_triples + test_triples - - test_dataloader_head = DataLoader( - dataloader.TestDataset( - test_triples, - all_true_triples, - args.nentity, - args.nrelation, - 'head-batch' - ), - batch_size=args.test_batch_size, - num_workers=max(1, args.cpu_num // 2), - collate_fn=dataloader.TestDataset.collate_fn - ) - - test_dataloader_tail = DataLoader( - dataloader.TestDataset( - test_triples, - all_true_triples, - args.nentity, - args.nrelation, - 'tail-batch' - ), - batch_size=args.test_batch_size, - num_workers=max(1, args.cpu_num // 2), - collate_fn=dataloader.TestDataset.collate_fn - ) - - test_dataset_list = [test_dataloader_head, test_dataloader_tail] - # test_dataset_list = [test_dataloader_tail] - for test_dataset in test_dataset_list: - for index, value in enumerate(test_dataset): - if(value[0].shape[0] == args.test_batch_size): - batch_pos = value[0] - batch_pos = to_numpy64(batch_pos) - - batch_neg = value[1] - batch_neg = to_numpy32(batch_neg) - batch_ite = value[2].numpy() - batch_mode = value[3] - - print('preprocessing ' + str(index)) - - if not os.path.exists(str(args.output_head_pos)): - os.makedirs(str(args.output_head_pos)) - if not os.path.exists(str(args.output_head_neg)): - os.makedirs(str(args.output_head_neg)) - if not os.path.exists(str(args.output_head_mode)): - os.makedirs(str(args.output_head_mode)) - if not os.path.exists(str(args.output_head_pp)): - os.makedirs(str(args.output_head_pp)) - if not os.path.exists(str(args.output_tail_pos)): - os.makedirs(str(args.output_tail_pos)) - if not os.path.exists(str(args.output_tail_neg)): - os.makedirs(str(args.output_tail_neg)) - if not os.path.exists(str(args.output_tail_mode)): - os.makedirs(str(args.output_tail_mode)) - if not os.path.exists(str(args.output_tail_pp)): - os.makedirs(str(args.output_tail_pp)) - - - if batch_mode == 'head-batch': - save_path_pos = str(args.output_head_pos) + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( - int(args.test_batch_size) * (index + 1) - 1) + '.bin' - save_path_pos_txt = str(args.output_head_pp) + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( - int(args.test_batch_size) * (index + 1) - 1) + '.txt' - batch_pos.tofile(str(save_path_pos)) - np.savetxt(save_path_pos_txt, batch_pos) - - save_path_neg = str(args.output_head_neg) + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( - int(args.test_batch_size) * (index + 1) - 1) + '.bin' - batch_neg.tofile(str(save_path_neg)) - - save_post_dir = str(args.output_head_post) - if not os.path.exists(save_post_dir): - os.makedirs(save_post_dir) - save_path_post = save_post_dir + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( - int(args.test_batch_size) * (index + 1) - 1) + '.txt' - np.savetxt(save_path_post, batch_ite) - print(index, str(save_path_post), "save done!") - print("----------------head---next-----------------------------") - - if batch_mode == 'tail-batch': - - save_path_pos = str(args.output_tail_pos) + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( - int(args.test_batch_size) * (index + 1) - 1) + '.bin' - save_path_pos_txt = str(args.output_tail_pp) + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( - int(args.test_batch_size) * (index + 1) - 1) + '.txt' - batch_pos.tofile(str(save_path_pos)) - np.savetxt(save_path_pos_txt, batch_pos) - - save_path_neg = str(args.output_tail_neg) + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( - int(args.test_batch_size) * (index + 1) - 1) + '.bin' - batch_neg.tofile(str(save_path_neg)) - - print(index, str(save_path_neg), "save done!") - - save_post_dir = str(args.output_tail_post) - if not os.path.exists(save_post_dir): - os.makedirs(save_post_dir) - save_path_post = save_post_dir + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( - int(args.test_batch_size) * (index + 1) - 1) + '.txt' - np.savetxt(save_path_post, batch_ite) - print(index, str(save_path_post), "save done!") - print("---------------tail----next-----------------------------") - - -if __name__ == '__main__': - main(parse_args()) +# Copyright 2021 Huawei Technologies Co., Ltd +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +sys.path.append(r'KnowledgeGraphEmbedding/codes/') +import logging +import os +import io +import pdb +import torch +import numpy as np + +import time +from torch.utils.data import DataLoader +import dataloader + +nowTime = time.strftime('%Y%m%d', time.localtime(time.time())) + +def parse_args(args=None): + parser = argparse.ArgumentParser( + description='Training and Testing Knowledge Graph Embedding Models', + usage='train.py [] [-h | --help]' + ) + + parser.add_argument('--data_path', type=str, default='./KnowledgeGraphEmbedding/data/FB15k-237') + parser.add_argument('--test_batch_size', default=6, type=int, help='valid/test batch size') + parser.add_argument('-cpu', '--cpu_num', default=10, type=int) + parser.add_argument('--output_path', default='bin/', type=str) + parser.add_argument('--output_head_post', default='head/post', type=str) + parser.add_argument('--output_tail_post', default='tail/post', type=str) + parser.add_argument('--output_head_pos', default='head/pos', type=str) + parser.add_argument('--output_head_neg', default='head/neg', type=str) + parser.add_argument('--output_head_mode', default='head/mode', type=str) + parser.add_argument('--output_head_pp', default='head/possamp', type=str) + parser.add_argument('--output_head_np', default='head/negsamp', type=str) + parser.add_argument('--output_tail_pos', default='tail/pos', type=str) + parser.add_argument('--output_tail_neg', default='tail/neg', type=str) + parser.add_argument('--output_tail_mode', default='tail/mode', type=str) + parser.add_argument('--output_tail_pp', default='tail/possamp', type=str) + parser.add_argument('--output_tail_np', default='tail/negsamp', type=str) + parser.add_argument('--nentity', type=int, default=0, help='DO NOT MANUALLY SET') + parser.add_argument('--nrelation', type=int, default=0, help='DO NOT MANUALLY SET') + arg = parser.parse_args(args) + arg.output_head_post = arg.output_path + arg.output_head_post + arg.output_tail_post = arg.output_path + arg.output_tail_post + arg.output_head_pos = arg.output_path + arg.output_head_pos + arg.output_head_neg = arg.output_path + arg.output_head_neg + arg.output_head_mode = arg.output_path + arg.output_head_mode + arg.output_head_pp = arg.output_path + arg.output_head_pp + arg.output_head_np = arg.output_path + arg.output_head_np + arg.output_tail_pos = arg.output_path + arg.output_tail_pos + arg.output_tail_neg = arg.output_path + arg.output_tail_neg + arg.output_tail_mode = arg.output_path + arg.output_tail_mode + arg.output_tail_pp = arg.output_path + arg.output_tail_pp + arg.output_tail_np = arg.output_path + arg.output_tail_np + return arg + +def read_triple(file_path, entity2id, relation2id): + ''' + Read triples and map them into ids. + ''' + triples = [] + with open(file_path) as fin: + for line in fin: + h, r, t = line.strip().split('\t') + triples.append((entity2id[h], relation2id[r], entity2id[t])) + return triples + +def to_numpy32(tensor): + return tensor.detach().cpu().numpy().astype(np.int32) if tensor.requires_grad else tensor.cpu().numpy().astype(np.int32) + +def to_numpy64(tensor): + return tensor.detach().cpu().numpy().astype(np.int64) if tensor.requires_grad else tensor.cpu().numpy().astype(np.int64) + +def main(args): + + with open(os.path.join(args.data_path, 'entities.dict')) as fin: + entity2id = dict() + for line in fin: + eid, entity = line.strip().split('\t') + entity2id[entity] = int(eid) + + with open(os.path.join(args.data_path, 'relations.dict')) as fin: + relation2id = dict() + for line in fin: + rid, relation = line.strip().split('\t') + relation2id[relation] = int(rid) + + + nentity = len(entity2id) + nrelation = len(relation2id) + + args.nentity = nentity + args.nrelation = nrelation + + + train_triples = read_triple(os.path.join(args.data_path, 'train.txt'), entity2id, relation2id) + logging.info('#train: %d' % len(train_triples)) + valid_triples = read_triple(os.path.join(args.data_path, 'valid.txt'), entity2id, relation2id) + logging.info('#valid: %d' % len(valid_triples)) + test_triples = read_triple(os.path.join(args.data_path, 'test.txt'), entity2id, relation2id) + logging.info('#test: %d' % len(test_triples)) + + # All true triples + all_true_triples = train_triples + valid_triples + test_triples + + test_dataloader_head = DataLoader( + dataloader.TestDataset( + test_triples, + all_true_triples, + args.nentity, + args.nrelation, + 'head-batch' + ), + batch_size=args.test_batch_size, + num_workers=max(1, args.cpu_num // 2), + collate_fn=dataloader.TestDataset.collate_fn + ) + + test_dataloader_tail = DataLoader( + dataloader.TestDataset( + test_triples, + all_true_triples, + args.nentity, + args.nrelation, + 'tail-batch' + ), + batch_size=args.test_batch_size, + num_workers=max(1, args.cpu_num // 2), + collate_fn=dataloader.TestDataset.collate_fn + ) + + test_dataset_list = [test_dataloader_head, test_dataloader_tail] + # test_dataset_list = [test_dataloader_tail] + for test_dataset in test_dataset_list: + for index, value in enumerate(test_dataset): + if(value[0].shape[0] == args.test_batch_size): + batch_pos = value[0] + batch_pos = to_numpy64(batch_pos) + + batch_neg = value[1] + batch_neg = to_numpy32(batch_neg) + batch_ite = value[2].numpy() + batch_mode = value[3] + + print('preprocessing ' + str(index)) + + if not os.path.exists(str(args.output_head_pos)): + os.makedirs(str(args.output_head_pos)) + if not os.path.exists(str(args.output_head_neg)): + os.makedirs(str(args.output_head_neg)) + if not os.path.exists(str(args.output_head_mode)): + os.makedirs(str(args.output_head_mode)) + if not os.path.exists(str(args.output_head_pp)): + os.makedirs(str(args.output_head_pp)) + if not os.path.exists(str(args.output_tail_pos)): + os.makedirs(str(args.output_tail_pos)) + if not os.path.exists(str(args.output_tail_neg)): + os.makedirs(str(args.output_tail_neg)) + if not os.path.exists(str(args.output_tail_mode)): + os.makedirs(str(args.output_tail_mode)) + if not os.path.exists(str(args.output_tail_pp)): + os.makedirs(str(args.output_tail_pp)) + + + if batch_mode == 'head-batch': + save_path_pos = str(args.output_head_pos) + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( + int(args.test_batch_size) * (index + 1) - 1) + '.bin' + save_path_pos_txt = str(args.output_head_pp) + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( + int(args.test_batch_size) * (index + 1) - 1) + '.txt' + batch_pos.tofile(str(save_path_pos)) + np.savetxt(save_path_pos_txt, batch_pos) + + save_path_neg = str(args.output_head_neg) + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( + int(args.test_batch_size) * (index + 1) - 1) + '.bin' + batch_neg.tofile(str(save_path_neg)) + + save_post_dir = str(args.output_head_post) + if not os.path.exists(save_post_dir): + os.makedirs(save_post_dir) + save_path_post = save_post_dir + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( + int(args.test_batch_size) * (index + 1) - 1) + '.txt' + np.savetxt(save_path_post, batch_ite) + print(index, str(save_path_post), "save done!") + print("----------------head---next-----------------------------") + + if batch_mode == 'tail-batch': + + save_path_pos = str(args.output_tail_pos) + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( + int(args.test_batch_size) * (index + 1) - 1) + '.bin' + save_path_pos_txt = str(args.output_tail_pp) + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( + int(args.test_batch_size) * (index + 1) - 1) + '.txt' + batch_pos.tofile(str(save_path_pos)) + np.savetxt(save_path_pos_txt, batch_pos) + + save_path_neg = str(args.output_tail_neg) + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( + int(args.test_batch_size) * (index + 1) - 1) + '.bin' + batch_neg.tofile(str(save_path_neg)) + + print(index, str(save_path_neg), "save done!") + + save_post_dir = str(args.output_tail_post) + if not os.path.exists(save_post_dir): + os.makedirs(save_post_dir) + save_path_post = save_post_dir + '/bin' + str(int(args.test_batch_size) * index) + '-' + str( + int(args.test_batch_size) * (index + 1) - 1) + '.txt' + np.savetxt(save_path_post, batch_ite) + print(index, str(save_path_post), "save done!") + print("---------------tail----next-----------------------------") + + +if __name__ == '__main__': + main(parse_args()) diff --git a/ACL_PyTorch/contrib/knowledge/RotatE/rotate_pth2onnx.py b/ACL_PyTorch/contrib/knowledge/RotatE/rotate_pth2onnx.py index 8c36287bab502f23aff6252def28ce8775cb0e05..0f62a827ebcc2b2b747534132b151fe24f82a85d 100644 --- a/ACL_PyTorch/contrib/knowledge/RotatE/rotate_pth2onnx.py +++ b/ACL_PyTorch/contrib/knowledge/RotatE/rotate_pth2onnx.py @@ -1,73 +1,73 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import pdb -import sys -sys.path.append(r'KnowledgeGraphEmbedding/codes/') -import numpy as np -import torch - -import argparse - -from model import KGEModel - -def to_numpy32(tensor): - return tensor.detach().cpu().numpy().astype(np.int32) if tensor.requires_grad else tensor.cpu().numpy().astype(np.int32) - -def to_numpy64(tensor): - return tensor.detach().cpu().numpy().astype(np.int64) if tensor.requires_grad else tensor.cpu().numpy().astype(np.int64) - -def pth2onnx(input_file, output_file, bs, mode): - kge_model = KGEModel( - model_name='RotatE', - nentity=14541, - nrelation=237, - hidden_dim=1000, - gamma=9.0, - double_entity_embedding=True, - double_relation_embedding=False - ) - - checkpoint = torch.load(input_file, map_location='cpu') - kge_model.load_state_dict(checkpoint['model_state_dict']) - for param_tensor in kge_model.state_dict(): - print(param_tensor, "\t", kge_model.state_dict()[param_tensor].size()) - input_names = ["pos", "neg"] - output_names = ["score"] - dynamic_axes = {'pos': {0: '-1'}, 'neg': {0: '-1'}} - # pdb.set_trace() - head = torch.randint(0, 14541, (bs, 1)) - relation = torch.randint(0, 233, (bs, 1)) - tail = torch.randint(0, 14541, (bs, 1)) - input1 = [] - for j in range(bs): - inp = [] - for i in range(14541): - inp.append(i) - input1.append(inp) - negative_sample = torch.from_numpy(np.array(input1)) - - positive_sample = torch.cat([head, relation, tail], dim=1) - positive_sample = torch.from_numpy(to_numpy64(positive_sample)) - negative_sample = torch.from_numpy(to_numpy32(negative_sample)) - - torch.onnx.export(kge_model, ((positive_sample, negative_sample), mode), output_file, input_names=input_names, dynamic_axes=dynamic_axes, - output_names=output_names, opset_version=11, verbose=True) - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='postprocess of r2plus1d') - parser.add_argument('--pth_path', default=r'./checkpoint') - parser.add_argument('--onnx_path', default=r'./kge_onnx_16_tail.onnx') - parser.add_argument('--batch_size', default=16, type=int) - parser.add_argument('--mode', default=r'tail-batch', help='select head-batch or tail-batch') - - args = parser.parse_args() +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pdb +import sys +sys.path.append(r'KnowledgeGraphEmbedding/codes/') +import numpy as np +import torch + +import argparse + +from model import KGEModel + +def to_numpy32(tensor): + return tensor.detach().cpu().numpy().astype(np.int32) if tensor.requires_grad else tensor.cpu().numpy().astype(np.int32) + +def to_numpy64(tensor): + return tensor.detach().cpu().numpy().astype(np.int64) if tensor.requires_grad else tensor.cpu().numpy().astype(np.int64) + +def pth2onnx(input_file, output_file, bs, mode): + kge_model = KGEModel( + model_name='RotatE', + nentity=14541, + nrelation=237, + hidden_dim=1000, + gamma=9.0, + double_entity_embedding=True, + double_relation_embedding=False + ) + + checkpoint = torch.load(input_file, map_location='cpu') + kge_model.load_state_dict(checkpoint['model_state_dict']) + for param_tensor in kge_model.state_dict(): + print(param_tensor, "\t", kge_model.state_dict()[param_tensor].size()) + input_names = ["pos", "neg"] + output_names = ["score"] + dynamic_axes = {'pos': {0: '-1'}, 'neg': {0: '-1'}} + # pdb.set_trace() + head = torch.randint(0, 14541, (bs, 1)) + relation = torch.randint(0, 233, (bs, 1)) + tail = torch.randint(0, 14541, (bs, 1)) + input1 = [] + for j in range(bs): + inp = [] + for i in range(14541): + inp.append(i) + input1.append(inp) + negative_sample = torch.from_numpy(np.array(input1)) + + positive_sample = torch.cat([head, relation, tail], dim=1) + positive_sample = torch.from_numpy(to_numpy64(positive_sample)) + negative_sample = torch.from_numpy(to_numpy32(negative_sample)) + + torch.onnx.export(kge_model, ((positive_sample, negative_sample), mode), output_file, input_names=input_names, dynamic_axes=dynamic_axes, + output_names=output_names, opset_version=11, verbose=True) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='postprocess of r2plus1d') + parser.add_argument('--pth_path', default=r'./checkpoint') + parser.add_argument('--onnx_path', default=r'./kge_onnx_16_tail.onnx') + parser.add_argument('--batch_size', default=16, type=int) + parser.add_argument('--mode', default=r'tail-batch', help='select head-batch or tail-batch') + + args = parser.parse_args() pth2onnx(args.pth_path, args.onnx_path, args.batch_size, args.mode) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/BertSum/BertSum-pth2onnx.py b/ACL_PyTorch/contrib/nlp/BertSum/BertSum-pth2onnx.py index bc730838baacc27b2beaf06867670486dcac0377..6fbbab39b10f53598a69db2982f2c83c1d70c45c 100644 --- a/ACL_PyTorch/contrib/nlp/BertSum/BertSum-pth2onnx.py +++ b/ACL_PyTorch/contrib/nlp/BertSum/BertSum-pth2onnx.py @@ -1,119 +1,119 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import os -import sys -import argparse -import numpy as np -from pytorch_pretrained_bert import BertConfig -from models.model_builder import Summarizer - -model_flags = ['hidden_size', 'ff_size', 'heads', 'inter_layers','encoder','ff_actv', 'use_interval','rnn_size'] - -def str2bool(v): - if v.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - -def main(args): - input_names=['src','segs','clss','mask','mask_cls'] - output_names = ["output"] - onnx_path = args.onnx_path - device = "cpu" if args.visible_gpus == '-1' else "cuda" - checkpoint = torch.load(args.path, map_location='cpu') - opt = vars(checkpoint['opt']) - for k in opt.keys(): - if (k in model_flags): - setattr(args, k, opt[k]) - config = BertConfig.from_json_file(args.bert_config_path) - model = Summarizer(args, device, load_pretrained_bert=False, bert_config = config) - model.load_cp(checkpoint) - model.eval() - cur_path = os.getcwd() - src = np.fromfile(f'{cur_path}/pre_data/src/data_1.bin', dtype=np.int64) - segs = np.fromfile(f'{cur_path}/pre_data/segs/data_1.bin', dtype=np.int64) - clss = np.fromfile(f'{cur_path}/pre_data/clss/data_1.bin', dtype=np.int64) - mask = np.fromfile(f'{cur_path}/pre_data/mask/data_1.bin', dtype=np.bool_) - mask_cls = np.fromfile(f'{cur_path}/pre_data/mask_cls/data_1.bin', dtype=np.bool_) - print(src.shape) - print(segs.shape) - print(clss.shape) - print(mask.shape) - print(mask_cls.shape) - #-----------------------------13000----------------------------- - dummy_input0 = torch.from_numpy(src).reshape(1,512) - dummy_input1 = torch.from_numpy(segs).reshape(1,512) - dummy_input2 = torch.from_numpy(clss).reshape(1,37) - dummy_input3 = torch.from_numpy(mask).reshape(1,512) - dummy_input4 = torch.from_numpy(mask_cls).reshape(1,37) - #--------------------------------------------------------------------''' - torch.onnx.export(model,(dummy_input0,dummy_input1,dummy_input2,dummy_input3,dummy_input4),onnx_path,input_names = input_names,output_names=output_names,verbose=True,opset_version=9) - -if __name__ =="__main__": - parser = argparse.ArgumentParser() - parser.add_argument("-encoder", default='classifier', type=str, choices=['classifier','transformer','rnn','baseline']) - parser.add_argument("-mode", default='train', type=str, choices=['train','validate','test']) - parser.add_argument("-bert_data_path", default='../bert_data') - parser.add_argument("-model_path", default='../models/') - parser.add_argument("-result_path", default='../results/cnndm') - parser.add_argument("-temp_dir", default='../temp') - parser.add_argument("-bert_config_path", default='../bert_config_uncased_base.json') - - parser.add_argument("-batch_size", default=1000, type=int) - - parser.add_argument("-use_interval", type=str2bool, nargs='?',const=True,default=True) - parser.add_argument("-hidden_size", default=128, type=int) - parser.add_argument("-ff_size", default=512, type=int) - parser.add_argument("-heads", default=4, type=int) - parser.add_argument("-inter_layers", default=2, type=int) - parser.add_argument("-rnn_size", default=512, type=int) - - parser.add_argument("-param_init", default=0, type=float) - parser.add_argument("-param_init_glorot", type=str2bool, nargs='?',const=True,default=True) - parser.add_argument("-dropout", default=0.1, type=float) - parser.add_argument("-optim", default='adam', type=str) - parser.add_argument("-lr", default=1, type=float) - parser.add_argument("-beta1", default= 0.9, type=float) - parser.add_argument("-beta2", default=0.999, type=float) - parser.add_argument("-decay_method", default='', type=str) - parser.add_argument("-warmup_steps", default=8000, type=int) - parser.add_argument("-max_grad_norm", default=0, type=float) - - parser.add_argument("-save_checkpoint_steps", default=5, type=int) - parser.add_argument("-accum_count", default=1, type=int) - parser.add_argument("-world_size", default=1, type=int) - parser.add_argument("-report_every", default=1, type=int) - parser.add_argument("-train_steps", default=1000, type=int) - parser.add_argument("-recall_eval", type=str2bool, nargs='?',const=True,default=False) - - - parser.add_argument('-visible_gpus', default='-1', type=str) - parser.add_argument('-gpu_ranks', default='0', type=str) - parser.add_argument('-log_file', default='../logs/cnndm.log') - parser.add_argument('-dataset', default='') - parser.add_argument('-seed', default=666, type=int) - - parser.add_argument("-test_all", type=str2bool, nargs='?',const=True,default=False) - parser.add_argument("-test_from", default='') - parser.add_argument("-train_from", default='') - parser.add_argument("-report_rouge", type=str2bool, nargs='?',const=True,default=True) - parser.add_argument("-block_trigram", type=str2bool, nargs='?', const=True, default=True) - parser.add_argument("-onnx_path", default="") - parser.add_argument("-path", default="") - - args = parser.parse_args() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import os +import sys +import argparse +import numpy as np +from pytorch_pretrained_bert import BertConfig +from models.model_builder import Summarizer + +model_flags = ['hidden_size', 'ff_size', 'heads', 'inter_layers','encoder','ff_actv', 'use_interval','rnn_size'] + +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +def main(args): + input_names=['src','segs','clss','mask','mask_cls'] + output_names = ["output"] + onnx_path = args.onnx_path + device = "cpu" if args.visible_gpus == '-1' else "cuda" + checkpoint = torch.load(args.path, map_location='cpu') + opt = vars(checkpoint['opt']) + for k in opt.keys(): + if (k in model_flags): + setattr(args, k, opt[k]) + config = BertConfig.from_json_file(args.bert_config_path) + model = Summarizer(args, device, load_pretrained_bert=False, bert_config = config) + model.load_cp(checkpoint) + model.eval() + cur_path = os.getcwd() + src = np.fromfile(f'{cur_path}/pre_data/src/data_1.bin', dtype=np.int64) + segs = np.fromfile(f'{cur_path}/pre_data/segs/data_1.bin', dtype=np.int64) + clss = np.fromfile(f'{cur_path}/pre_data/clss/data_1.bin', dtype=np.int64) + mask = np.fromfile(f'{cur_path}/pre_data/mask/data_1.bin', dtype=np.bool_) + mask_cls = np.fromfile(f'{cur_path}/pre_data/mask_cls/data_1.bin', dtype=np.bool_) + print(src.shape) + print(segs.shape) + print(clss.shape) + print(mask.shape) + print(mask_cls.shape) + #-----------------------------13000----------------------------- + dummy_input0 = torch.from_numpy(src).reshape(1,512) + dummy_input1 = torch.from_numpy(segs).reshape(1,512) + dummy_input2 = torch.from_numpy(clss).reshape(1,37) + dummy_input3 = torch.from_numpy(mask).reshape(1,512) + dummy_input4 = torch.from_numpy(mask_cls).reshape(1,37) + #--------------------------------------------------------------------''' + torch.onnx.export(model,(dummy_input0,dummy_input1,dummy_input2,dummy_input3,dummy_input4),onnx_path,input_names = input_names,output_names=output_names,verbose=True,opset_version=9) + +if __name__ =="__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-encoder", default='classifier', type=str, choices=['classifier','transformer','rnn','baseline']) + parser.add_argument("-mode", default='train', type=str, choices=['train','validate','test']) + parser.add_argument("-bert_data_path", default='../bert_data') + parser.add_argument("-model_path", default='../models/') + parser.add_argument("-result_path", default='../results/cnndm') + parser.add_argument("-temp_dir", default='../temp') + parser.add_argument("-bert_config_path", default='../bert_config_uncased_base.json') + + parser.add_argument("-batch_size", default=1000, type=int) + + parser.add_argument("-use_interval", type=str2bool, nargs='?',const=True,default=True) + parser.add_argument("-hidden_size", default=128, type=int) + parser.add_argument("-ff_size", default=512, type=int) + parser.add_argument("-heads", default=4, type=int) + parser.add_argument("-inter_layers", default=2, type=int) + parser.add_argument("-rnn_size", default=512, type=int) + + parser.add_argument("-param_init", default=0, type=float) + parser.add_argument("-param_init_glorot", type=str2bool, nargs='?',const=True,default=True) + parser.add_argument("-dropout", default=0.1, type=float) + parser.add_argument("-optim", default='adam', type=str) + parser.add_argument("-lr", default=1, type=float) + parser.add_argument("-beta1", default= 0.9, type=float) + parser.add_argument("-beta2", default=0.999, type=float) + parser.add_argument("-decay_method", default='', type=str) + parser.add_argument("-warmup_steps", default=8000, type=int) + parser.add_argument("-max_grad_norm", default=0, type=float) + + parser.add_argument("-save_checkpoint_steps", default=5, type=int) + parser.add_argument("-accum_count", default=1, type=int) + parser.add_argument("-world_size", default=1, type=int) + parser.add_argument("-report_every", default=1, type=int) + parser.add_argument("-train_steps", default=1000, type=int) + parser.add_argument("-recall_eval", type=str2bool, nargs='?',const=True,default=False) + + + parser.add_argument('-visible_gpus', default='-1', type=str) + parser.add_argument('-gpu_ranks', default='0', type=str) + parser.add_argument('-log_file', default='../logs/cnndm.log') + parser.add_argument('-dataset', default='') + parser.add_argument('-seed', default=666, type=int) + + parser.add_argument("-test_all", type=str2bool, nargs='?',const=True,default=False) + parser.add_argument("-test_from", default='') + parser.add_argument("-train_from", default='') + parser.add_argument("-report_rouge", type=str2bool, nargs='?',const=True,default=True) + parser.add_argument("-block_trigram", type=str2bool, nargs='?', const=True, default=True) + parser.add_argument("-onnx_path", default="") + parser.add_argument("-path", default="") + + args = parser.parse_args() main(args) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/BertSum/BertSum_pth_postprocess.py b/ACL_PyTorch/contrib/nlp/BertSum/BertSum_pth_postprocess.py index 695e8abca86a49f6be865d6e040041e3e9de0b9c..67edaa126955e390539fb9742ec62abc2b71debc 100644 --- a/ACL_PyTorch/contrib/nlp/BertSum/BertSum_pth_postprocess.py +++ b/ACL_PyTorch/contrib/nlp/BertSum/BertSum_pth_postprocess.py @@ -1,275 +1,275 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import os -import sys -import argparse -import glob -import numpy as np -from pytorch_pretrained_bert import BertConfig -from models.model_builder import Summarizer -from models import data_loader -from models.data_loader import load_dataset -from models.stats import Statistics -from others.utils import test_rouge -from others.logging import logger - -def str2bool(v): - if v.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - -def rouge_results_to_str(results_dict): - return ">> ROUGE-F(1/2/3/l): {:.2f}/{:.2f}/{:.2f}\nROUGE-R(1/2/3/l): {:.2f}/{:.2f}/{:.2f}\n".format( - results_dict["rouge_1_f_score"] * 100, - results_dict["rouge_2_f_score"] * 100, - results_dict["rouge_l_f_score"] * 100, - results_dict["rouge_1_recall"] * 100, - results_dict["rouge_2_recall"] * 100, - results_dict["rouge_l_recall"] * 100 - ) - -def pre_postprocess(args): - File = os.listdir(args.path_1) - File1 = os.listdir(args.path_2) - list2 = [] - for file in File1: - list2.append(file) - Doc = [] - SENT_SCORES = [] - OUTPUT = [] - for file in sorted(File): - Doc.append(file[0:-6]) - Doc = list(set(Doc)) #grip repeated element - for i in range(len(Doc)): #deal after sorting - ff = 'data_'+str(i)+'_output' - sent_scores = np.fromfile(f'{args.path_1}/{ff}_0.bin', dtype=np.float32) - sent_scores = torch.tensor(sent_scores.reshape(1,sent_scores.shape[0])) - output = np.fromfile(f'{args.path_1}/{ff}_1.bin', dtype=np.bool_) - print(output.shape) - output = torch.tensor(output.reshape(1,37)) - document = ff+'_0.txt' - if document in list2: - doc = document[0:-6] - sent_scores1 = np.fromfile(f'{args.path_2}/{doc}_0.bin', dtype=np.float32) - sent_scores1 = torch.tensor(sent_scores1.reshape(1,sent_scores1.shape[0])) - #############add zero to keep same dimension############## - if sent_scores1.shape[1] > sent_scores.shape[1]: - add_zero = (torch.zeros([1,sent_scores1.shape[1]-sent_scores.shape[1]])) - sent_scores = torch.cat([sent_scores,add_zero],dim=1) - if sent_scores1.shape[1] < sent_scores.shape[1]: - add_zero = (torch.zeros([1,sent_scores.shape[1]-sent_scores1.shape[1]])) - sent_scores1 = torch.cat([sent_scores1,add_zero],dim=1) - ########################################################## - output1 = np.fromfile(f'{args.path_2}/{doc}_1.bin', dtype=np.bool_) - output1 = torch.tensor(output1.reshape(1,37)) - sent_scores = torch.cat([sent_scores,sent_scores1],dim=0) - output = torch.cat([output,output1],dim=0) - SENT_SCORES.append(sent_scores) - OUTPUT.append(output) - test_iter = data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), - args.batch_size, device, - shuffle=False, is_test=True) - i=0 - for batch in test_iter: - labels = batch.labels - if SENT_SCORES[i].shape[0] == 1: - if SENT_SCORES[i].shape[1] > labels.shape[1]: - SENT_SCORES[i] = SENT_SCORES[i][:,0:labels.shape[1]] - OUTPUT[i] = OUTPUT[i][:,0:labels.shape[1]] - - if SENT_SCORES[i].shape[1] < labels.shape[1]: - add_zero = (torch.zeros([1,labels.shape[1]-SENT_SCORES[i].shape[1]])) - SENT_SCORES[i] = torch.cat([SENT_SCORES[i],add_zero]) - add_bool = torch.zeros([1,labels.shape[1]-SENT_SCORES[i].shape[1]],dtype=torch.bool) - OUTPUT[i] = torch.cat([OUTPUT[i],add_bool],dim=1) - - if SENT_SCORES[i].shape[0] == 2: - if SENT_SCORES[i].shape[1] > labels.shape[1]: - SENT_SCORES[i] = SENT_SCORES[i][:,0:labels.shape[1]] - OUTPUT[i] = OUTPUT[i][:,0:labels.shape[1]] - if SENT_SCORES[i].shape[1] < labels.shape[1]: - add_zero = (torch.zeros([2,labels.shape[1]-SENT_SCORES[i].shape[1]])) - SENT_SCORES[i] = torch.cat([SENT_SCORES[i],add_zero],dim=1) - add_bool = torch.zeros([2,labels.shape[1]-SENT_SCORES[i].shape[1]],dtype=torch.bool) - OUTPUT[i] = torch.cat([OUTPUT[i],add_bool],dim=1) - i=i+1 - return SENT_SCORES,OUTPUT - -def test(args, step, device, cal_lead=False, cal_oracle=False): - test_iter = data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), - args.batch_size, device, - shuffle=False, is_test=True) - def _get_ngrams(n, text): - ngram_set = set() - text_length = len(text) - max_index_ngram_start = text_length - n - for i in range(max_index_ngram_start + 1): - ngram_set.add(tuple(text[i:i + n])) - return ngram_set - - def _block_tri(c, p): - tri_c = _get_ngrams(3, c.split()) - for s in p: - tri_s = _get_ngrams(3, s.split()) - if len(tri_c.intersection(tri_s))>0: - return True - return False - - stats = Statistics() - can_path = '%s_step%d.candidate'%(args.result_path,step) - gold_path = '%s_step%d.gold' % (args.result_path, step) - - sent,output = pre_postprocess(args) - Loss = torch.nn.BCELoss(reduction='none') - sum = 0 - k=0 - with open(can_path, 'w') as save_pred: - with open(gold_path, 'w') as save_gold: - with torch.no_grad(): - for batch in test_iter: - labels = batch.labels - - gold = [] - pred = [] - if (cal_lead): - selected_ids = [list(range(batch.clss.size(1)))] * batch.batch_size - elif (cal_oracle): - selected_ids = [[j for j in range(batch.clss.size(1)) if labels[i][j] == 1] for i in - range(batch.batch_size)] - else: - print(k) - print('sent_scores:',sent[k]) - - if labels.shape[0] != sent[k].shape[0]: - #labels = labels[sent[k].shape[0],:] - k = k + 1 - sum = sum + 1 - continue - - loss = Loss(sent[k], labels.float()) - - if loss.shape[1] != output[k].shape[1]: - k = k + 1 - continue - - loss = (loss * output[k].float()).sum() - batch_stats = Statistics(float(loss.cpu().data.numpy()), len(labels)) - stats.update(batch_stats) - - sent_scores = sent[k] + output[k].float() - sent_scores = sent_scores.cpu().data.numpy() - selected_ids = np.argsort(-sent_scores, 1) - print(selected_ids) - # selected_ids = np.sort(selected_ids,1) - for i, idx in enumerate(selected_ids): - _pred = [] - if(len(batch.src_str[i])==0): - continue - for j in selected_ids[i][:len(batch.src_str[i])]: - if(j>=len( batch.src_str[i])): - continue - candidate = batch.src_str[i][j].strip() - if(args.block_trigram): - if(not _block_tri(candidate,_pred)): - _pred.append(candidate) - else: - _pred.append(candidate) - - if ((not cal_oracle) and (not args.recall_eval) and len(_pred) == 3): - break - - _pred = ''.join(_pred) - if(args.recall_eval): - _pred = ' '.join(_pred.split()[:len(batch.tgt_str[i].split())]) - - pred.append(_pred) - gold.append(batch.tgt_str[i]) - - for i in range(len(gold)): - save_gold.write(gold[i].strip()+'\n') - for i in range(len(pred)): - save_pred.write(pred[i].strip()+'\n') - k = k + 1 - print(sum) - if(step!=-1 and args.report_rouge): - print(can_path) - print(gold_path) - rouges = test_rouge(args.temp_dir, can_path, gold_path) - logger.info('Rouges at step %d \n%s' % (step, rouge_results_to_str(rouges))) - #self._report_step(0, step, valid_stats=stats) - - return stats - -if __name__ =="__main__": - parser = argparse.ArgumentParser() - parser.add_argument("-encoder", default='classifier', type=str, choices=['classifier','transformer','rnn','baseline']) - parser.add_argument("-mode", default='train', type=str, choices=['train','validate','test']) - parser.add_argument("-bert_data_path", default='../bert_data/cnndm') - parser.add_argument("-model_path", default='../models/') - parser.add_argument("-result_path", default='../results/cnndm') - parser.add_argument("-temp_dir", default='../temp') - parser.add_argument("-bert_config_path", default='../bert_config_uncased_base.json') - - parser.add_argument("-batch_size", default=1000, type=int) - - parser.add_argument("-use_interval", type=str2bool, nargs='?',const=True,default=True) - parser.add_argument("-hidden_size", default=128, type=int) - parser.add_argument("-ff_size", default=512, type=int) - parser.add_argument("-heads", default=4, type=int) - parser.add_argument("-inter_layers", default=2, type=int) - parser.add_argument("-rnn_size", default=512, type=int) - - parser.add_argument("-param_init", default=0, type=float) - parser.add_argument("-param_init_glorot", type=str2bool, nargs='?',const=True,default=True) - parser.add_argument("-dropout", default=0.1, type=float) - parser.add_argument("-optim", default='adam', type=str) - parser.add_argument("-lr", default=1, type=float) - parser.add_argument("-beta1", default= 0.9, type=float) - parser.add_argument("-beta2", default=0.999, type=float) - parser.add_argument("-decay_method", default='', type=str) - parser.add_argument("-warmup_steps", default=8000, type=int) - parser.add_argument("-max_grad_norm", default=0, type=float) - - parser.add_argument("-save_checkpoint_steps", default=5, type=int) - parser.add_argument("-accum_count", default=1, type=int) - parser.add_argument("-world_size", default=1, type=int) - parser.add_argument("-report_every", default=1, type=int) - parser.add_argument("-train_steps", default=1000, type=int) - parser.add_argument("-recall_eval", type=str2bool, nargs='?',const=True,default=False) - - - parser.add_argument('-visible_gpus', default='-1', type=str) - parser.add_argument('-gpu_ranks', default='0', type=str) - parser.add_argument('-log_file', default='../logs/cnndm.log') - parser.add_argument('-dataset', default='') - parser.add_argument('-seed', default=666, type=int) - - parser.add_argument("-test_all", type=str2bool, nargs='?',const=True,default=False) - parser.add_argument("-test_from", default='') - parser.add_argument("-train_from", default='') - parser.add_argument("-report_rouge", type=str2bool, nargs='?',const=True,default=True) - parser.add_argument("-block_trigram", type=str2bool, nargs='?', const=True, default=True) - parser.add_argument("-path_1", default="") - parser.add_argument("-path_2", default="") - - args = parser.parse_args() - device = "cpu" if args.visible_gpus == '-1' else "cuda" - device_id = -1 if device == "cpu" else 0 - test(args,0,device) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import os +import sys +import argparse +import glob +import numpy as np +from pytorch_pretrained_bert import BertConfig +from models.model_builder import Summarizer +from models import data_loader +from models.data_loader import load_dataset +from models.stats import Statistics +from others.utils import test_rouge +from others.logging import logger + +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +def rouge_results_to_str(results_dict): + return ">> ROUGE-F(1/2/3/l): {:.2f}/{:.2f}/{:.2f}\nROUGE-R(1/2/3/l): {:.2f}/{:.2f}/{:.2f}\n".format( + results_dict["rouge_1_f_score"] * 100, + results_dict["rouge_2_f_score"] * 100, + results_dict["rouge_l_f_score"] * 100, + results_dict["rouge_1_recall"] * 100, + results_dict["rouge_2_recall"] * 100, + results_dict["rouge_l_recall"] * 100 + ) + +def pre_postprocess(args): + File = os.listdir(args.path_1) + File1 = os.listdir(args.path_2) + list2 = [] + for file in File1: + list2.append(file) + Doc = [] + SENT_SCORES = [] + OUTPUT = [] + for file in sorted(File): + Doc.append(file[0:-6]) + Doc = list(set(Doc)) #grip repeated element + for i in range(len(Doc)): #deal after sorting + ff = 'data_'+str(i)+'_output' + sent_scores = np.fromfile(f'{args.path_1}/{ff}_0.bin', dtype=np.float32) + sent_scores = torch.tensor(sent_scores.reshape(1,sent_scores.shape[0])) + output = np.fromfile(f'{args.path_1}/{ff}_1.bin', dtype=np.bool_) + print(output.shape) + output = torch.tensor(output.reshape(1,37)) + document = ff+'_0.txt' + if document in list2: + doc = document[0:-6] + sent_scores1 = np.fromfile(f'{args.path_2}/{doc}_0.bin', dtype=np.float32) + sent_scores1 = torch.tensor(sent_scores1.reshape(1,sent_scores1.shape[0])) + #############add zero to keep same dimension############## + if sent_scores1.shape[1] > sent_scores.shape[1]: + add_zero = (torch.zeros([1,sent_scores1.shape[1]-sent_scores.shape[1]])) + sent_scores = torch.cat([sent_scores,add_zero],dim=1) + if sent_scores1.shape[1] < sent_scores.shape[1]: + add_zero = (torch.zeros([1,sent_scores.shape[1]-sent_scores1.shape[1]])) + sent_scores1 = torch.cat([sent_scores1,add_zero],dim=1) + ########################################################## + output1 = np.fromfile(f'{args.path_2}/{doc}_1.bin', dtype=np.bool_) + output1 = torch.tensor(output1.reshape(1,37)) + sent_scores = torch.cat([sent_scores,sent_scores1],dim=0) + output = torch.cat([output,output1],dim=0) + SENT_SCORES.append(sent_scores) + OUTPUT.append(output) + test_iter = data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), + args.batch_size, device, + shuffle=False, is_test=True) + i=0 + for batch in test_iter: + labels = batch.labels + if SENT_SCORES[i].shape[0] == 1: + if SENT_SCORES[i].shape[1] > labels.shape[1]: + SENT_SCORES[i] = SENT_SCORES[i][:,0:labels.shape[1]] + OUTPUT[i] = OUTPUT[i][:,0:labels.shape[1]] + + if SENT_SCORES[i].shape[1] < labels.shape[1]: + add_zero = (torch.zeros([1,labels.shape[1]-SENT_SCORES[i].shape[1]])) + SENT_SCORES[i] = torch.cat([SENT_SCORES[i],add_zero]) + add_bool = torch.zeros([1,labels.shape[1]-SENT_SCORES[i].shape[1]],dtype=torch.bool) + OUTPUT[i] = torch.cat([OUTPUT[i],add_bool],dim=1) + + if SENT_SCORES[i].shape[0] == 2: + if SENT_SCORES[i].shape[1] > labels.shape[1]: + SENT_SCORES[i] = SENT_SCORES[i][:,0:labels.shape[1]] + OUTPUT[i] = OUTPUT[i][:,0:labels.shape[1]] + if SENT_SCORES[i].shape[1] < labels.shape[1]: + add_zero = (torch.zeros([2,labels.shape[1]-SENT_SCORES[i].shape[1]])) + SENT_SCORES[i] = torch.cat([SENT_SCORES[i],add_zero],dim=1) + add_bool = torch.zeros([2,labels.shape[1]-SENT_SCORES[i].shape[1]],dtype=torch.bool) + OUTPUT[i] = torch.cat([OUTPUT[i],add_bool],dim=1) + i=i+1 + return SENT_SCORES,OUTPUT + +def test(args, step, device, cal_lead=False, cal_oracle=False): + test_iter = data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), + args.batch_size, device, + shuffle=False, is_test=True) + def _get_ngrams(n, text): + ngram_set = set() + text_length = len(text) + max_index_ngram_start = text_length - n + for i in range(max_index_ngram_start + 1): + ngram_set.add(tuple(text[i:i + n])) + return ngram_set + + def _block_tri(c, p): + tri_c = _get_ngrams(3, c.split()) + for s in p: + tri_s = _get_ngrams(3, s.split()) + if len(tri_c.intersection(tri_s))>0: + return True + return False + + stats = Statistics() + can_path = '%s_step%d.candidate'%(args.result_path,step) + gold_path = '%s_step%d.gold' % (args.result_path, step) + + sent,output = pre_postprocess(args) + Loss = torch.nn.BCELoss(reduction='none') + sum = 0 + k=0 + with open(can_path, 'w') as save_pred: + with open(gold_path, 'w') as save_gold: + with torch.no_grad(): + for batch in test_iter: + labels = batch.labels + + gold = [] + pred = [] + if (cal_lead): + selected_ids = [list(range(batch.clss.size(1)))] * batch.batch_size + elif (cal_oracle): + selected_ids = [[j for j in range(batch.clss.size(1)) if labels[i][j] == 1] for i in + range(batch.batch_size)] + else: + print(k) + print('sent_scores:',sent[k]) + + if labels.shape[0] != sent[k].shape[0]: + #labels = labels[sent[k].shape[0],:] + k = k + 1 + sum = sum + 1 + continue + + loss = Loss(sent[k], labels.float()) + + if loss.shape[1] != output[k].shape[1]: + k = k + 1 + continue + + loss = (loss * output[k].float()).sum() + batch_stats = Statistics(float(loss.cpu().data.numpy()), len(labels)) + stats.update(batch_stats) + + sent_scores = sent[k] + output[k].float() + sent_scores = sent_scores.cpu().data.numpy() + selected_ids = np.argsort(-sent_scores, 1) + print(selected_ids) + # selected_ids = np.sort(selected_ids,1) + for i, idx in enumerate(selected_ids): + _pred = [] + if(len(batch.src_str[i])==0): + continue + for j in selected_ids[i][:len(batch.src_str[i])]: + if(j>=len( batch.src_str[i])): + continue + candidate = batch.src_str[i][j].strip() + if(args.block_trigram): + if(not _block_tri(candidate,_pred)): + _pred.append(candidate) + else: + _pred.append(candidate) + + if ((not cal_oracle) and (not args.recall_eval) and len(_pred) == 3): + break + + _pred = ''.join(_pred) + if(args.recall_eval): + _pred = ' '.join(_pred.split()[:len(batch.tgt_str[i].split())]) + + pred.append(_pred) + gold.append(batch.tgt_str[i]) + + for i in range(len(gold)): + save_gold.write(gold[i].strip()+'\n') + for i in range(len(pred)): + save_pred.write(pred[i].strip()+'\n') + k = k + 1 + print(sum) + if(step!=-1 and args.report_rouge): + print(can_path) + print(gold_path) + rouges = test_rouge(args.temp_dir, can_path, gold_path) + logger.info('Rouges at step %d \n%s' % (step, rouge_results_to_str(rouges))) + #self._report_step(0, step, valid_stats=stats) + + return stats + +if __name__ =="__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-encoder", default='classifier', type=str, choices=['classifier','transformer','rnn','baseline']) + parser.add_argument("-mode", default='train', type=str, choices=['train','validate','test']) + parser.add_argument("-bert_data_path", default='../bert_data/cnndm') + parser.add_argument("-model_path", default='../models/') + parser.add_argument("-result_path", default='../results/cnndm') + parser.add_argument("-temp_dir", default='../temp') + parser.add_argument("-bert_config_path", default='../bert_config_uncased_base.json') + + parser.add_argument("-batch_size", default=1000, type=int) + + parser.add_argument("-use_interval", type=str2bool, nargs='?',const=True,default=True) + parser.add_argument("-hidden_size", default=128, type=int) + parser.add_argument("-ff_size", default=512, type=int) + parser.add_argument("-heads", default=4, type=int) + parser.add_argument("-inter_layers", default=2, type=int) + parser.add_argument("-rnn_size", default=512, type=int) + + parser.add_argument("-param_init", default=0, type=float) + parser.add_argument("-param_init_glorot", type=str2bool, nargs='?',const=True,default=True) + parser.add_argument("-dropout", default=0.1, type=float) + parser.add_argument("-optim", default='adam', type=str) + parser.add_argument("-lr", default=1, type=float) + parser.add_argument("-beta1", default= 0.9, type=float) + parser.add_argument("-beta2", default=0.999, type=float) + parser.add_argument("-decay_method", default='', type=str) + parser.add_argument("-warmup_steps", default=8000, type=int) + parser.add_argument("-max_grad_norm", default=0, type=float) + + parser.add_argument("-save_checkpoint_steps", default=5, type=int) + parser.add_argument("-accum_count", default=1, type=int) + parser.add_argument("-world_size", default=1, type=int) + parser.add_argument("-report_every", default=1, type=int) + parser.add_argument("-train_steps", default=1000, type=int) + parser.add_argument("-recall_eval", type=str2bool, nargs='?',const=True,default=False) + + + parser.add_argument('-visible_gpus', default='-1', type=str) + parser.add_argument('-gpu_ranks', default='0', type=str) + parser.add_argument('-log_file', default='../logs/cnndm.log') + parser.add_argument('-dataset', default='') + parser.add_argument('-seed', default=666, type=int) + + parser.add_argument("-test_all", type=str2bool, nargs='?',const=True,default=False) + parser.add_argument("-test_from", default='') + parser.add_argument("-train_from", default='') + parser.add_argument("-report_rouge", type=str2bool, nargs='?',const=True,default=True) + parser.add_argument("-block_trigram", type=str2bool, nargs='?', const=True, default=True) + parser.add_argument("-path_1", default="") + parser.add_argument("-path_2", default="") + + args = parser.parse_args() + device = "cpu" if args.visible_gpus == '-1' else "cuda" + device_id = -1 if device == "cpu" else 0 + test(args,0,device) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/BertSum/BertSum_pth_preprocess.py b/ACL_PyTorch/contrib/nlp/BertSum/BertSum_pth_preprocess.py index f3ddcfef2f30e464c68323e890802038885e25f8..19a09793ce675629b13254a43724ca8bccd8f55f 100644 --- a/ACL_PyTorch/contrib/nlp/BertSum/BertSum_pth_preprocess.py +++ b/ACL_PyTorch/contrib/nlp/BertSum/BertSum_pth_preprocess.py @@ -1,179 +1,179 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import argparse -import os -import torch - -from models import data_loader -from models.data_loader import load_dataset -from models.trainer import build_trainer - -def str2bool(v): - if v.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') -def get_max_shape(test_iter): - max_shape_1=0 - max_shape_2=0 - for batch in test_iter: - if batch.src.shape[1] > max_shape_1: - max_shape_1 = batch.src.shape[1] - if batch.clss.shape[1] > max_shape_2: - max_shape_2 = batch.clss.shape[1] - #print(batch.src[0].shape) - return max_shape_1,max_shape_2 - -def preprocess(args,device): - test_iter =data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), - args.batch_size, device, - shuffle=False, is_test=True) - test_iter1 =data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), - args.batch_size, device, - shuffle=False, is_test=True) - cur_path = os.getcwd() - main_path = cur_path + '/pre_data' - main_path_1 = cur_path + '/pre_data_1' - i=0 - if not os.path.exists(os.path.join(cur_path,'pre_data')): ###########first inference - os.makedirs(os.path.join(cur_path,'pre_data')) - if not os.path.exists(os.path.join(main_path,'src')): - os.makedirs(os.path.join(main_path,'src')) - if not os.path.exists(os.path.join(main_path,'segs')): - os.makedirs(os.path.join(main_path,'segs')) - if not os.path.exists(os.path.join(main_path,'clss')): - os.makedirs(os.path.join(main_path,'clss')) - if not os.path.exists(os.path.join(main_path,'mask')): - os.makedirs(os.path.join(main_path,'mask')) - if not os.path.exists(os.path.join(main_path,'mask_cls')): - os.makedirs(os.path.join(main_path,'mask_cls')) - - if not os.path.exists(os.path.join(cur_path,'pre_data_1')): ###########second inference - os.makedirs(os.path.join(cur_path,'pre_data_1')) - if not os.path.exists(os.path.join(main_path_1,'src')): - os.makedirs(os.path.join(main_path_1,'src')) - if not os.path.exists(os.path.join(main_path_1,'segs')): - os.makedirs(os.path.join(main_path_1,'segs')) - if not os.path.exists(os.path.join(main_path_1,'clss')): - os.makedirs(os.path.join(main_path_1,'clss')) - if not os.path.exists(os.path.join(main_path_1,'mask')): - os.makedirs(os.path.join(main_path_1,'mask')) - if not os.path.exists(os.path.join(main_path_1,'mask_cls')): - os.makedirs(os.path.join(main_path_1,'mask_cls')) - max_shape_1,max_shape_2 = get_max_shape(test_iter) - print(max_shape_1,max_shape_2) - #############################above get max dimension ########################### - for batch in test_iter1: - if batch.src.shape[0]==2: - if batch.src[0].shape[0] < max_shape_1: - add_zero = (torch.zeros([batch.src.shape[0],max_shape_1-batch.src[0].shape[0]])).long() #######change to int64 - add_bool = torch.zeros([batch.src.shape[0],max_shape_1-batch.src[0].shape[0]],dtype=torch.bool) - batch.src = torch.cat([batch.src,add_zero],dim=1) - batch.segs = torch.cat([batch.segs,add_zero],dim=1) - batch.mask = torch.cat([batch.mask,add_bool],dim=1) - if batch.clss[0].shape[0] < max_shape_2: - add_zero = (torch.zeros([batch.clss.shape[0],max_shape_2-batch.clss[0].shape[0]])).long() #######change to int64 - add_bool = torch.zeros([batch.clss.shape[0],max_shape_2-batch.clss[0].shape[0]],dtype=torch.bool) - batch.clss = torch.cat([batch.clss,add_zero],dim=1) - batch.mask_cls = torch.cat([batch.mask_cls,add_bool],dim=1) - ##############first dimension - batch.src[0].numpy().tofile(os.path.join(main_path,'src','data_'+str(i)+'.bin')) - batch.segs[0].numpy().tofile(os.path.join(main_path,'segs','data_'+str(i)+'.bin')) - batch.clss[0].numpy().tofile(os.path.join(main_path,'clss','data_'+str(i)+'.bin')) - batch.mask[0].numpy().tofile(os.path.join(main_path,'mask','data_'+str(i)+'.bin')) - batch.mask_cls[0].numpy().tofile(os.path.join(main_path,'mask_cls','data_'+str(i)+'.bin')) - #############second dimension - batch.src[1].numpy().tofile(os.path.join(main_path_1,'src','data_'+str(i)+'.bin')) - batch.segs[1].numpy().tofile(os.path.join(main_path_1,'segs','data_'+str(i)+'.bin')) - batch.clss[1].numpy().tofile(os.path.join(main_path_1,'clss','data_'+str(i)+'.bin')) - batch.mask[1].numpy().tofile(os.path.join(main_path_1,'mask','data_'+str(i)+'.bin')) - batch.mask_cls[1].numpy().tofile(os.path.join(main_path_1,'mask_cls','data_'+str(i)+'.bin')) - else: - #print(batch.clss.dtype) - if batch.src[0].shape[0] < max_shape_1: - add_zero = (torch.zeros([batch.src.shape[0],max_shape_1-batch.src[0].shape[0]])).long() #######change to int64 - add_bool = torch.zeros([batch.src.shape[0],max_shape_1-batch.src[0].shape[0]],dtype=torch.bool) - batch.src = torch.cat([batch.src,add_zero],dim=1) - batch.segs = torch.cat([batch.segs,add_zero],dim=1) - batch.mask = torch.cat([batch.mask,add_bool],dim=1) - if batch.clss[0].shape[0] < max_shape_2: - add_zero = (torch.zeros([batch.clss.shape[0],max_shape_2-batch.clss[0].shape[0]])).long() #######change to int64 - add_bool = torch.zeros([batch.clss.shape[0],max_shape_2-batch.clss[0].shape[0]],dtype=torch.bool) - batch.clss = torch.cat([batch.clss,add_zero],dim=1) - batch.mask_cls = torch.cat([batch.mask_cls,add_bool],dim=1) - batch.src.numpy().tofile(os.path.join(main_path,'src','data_'+str(i)+'.bin')) - batch.segs.numpy().tofile(os.path.join(main_path,'segs','data_'+str(i)+'.bin')) - batch.clss.numpy().tofile(os.path.join(main_path,'clss','data_'+str(i)+'.bin')) - batch.mask.numpy().tofile(os.path.join(main_path,'mask','data_'+str(i)+'.bin')) - batch.mask_cls.numpy().tofile(os.path.join(main_path,'mask_cls','data_'+str(i)+'.bin')) - i = i+1 - -if __name__ =="__main__": - parser = argparse.ArgumentParser() - parser.add_argument("-encoder", default='classifier', type=str, choices=['classifier','transformer','rnn','baseline']) - parser.add_argument("-mode", default='train', type=str, choices=['train','validate','test']) - parser.add_argument("-bert_data_path", default='../bert_data') - parser.add_argument("-model_path", default='../models/') - parser.add_argument("-result_path", default='../results/cnndm') - parser.add_argument("-temp_dir", default='../temp') - parser.add_argument("-bert_config_path", default='../bert_config_uncased_base.json') - - parser.add_argument("-batch_size", default=1000, type=int) - - parser.add_argument("-use_interval", type=str2bool, nargs='?',const=True,default=True) - parser.add_argument("-hidden_size", default=128, type=int) - parser.add_argument("-ff_size", default=512, type=int) - parser.add_argument("-heads", default=4, type=int) - parser.add_argument("-inter_layers", default=2, type=int) - parser.add_argument("-rnn_size", default=512, type=int) - - parser.add_argument("-param_init", default=0, type=float) - parser.add_argument("-param_init_glorot", type=str2bool, nargs='?',const=True,default=True) - parser.add_argument("-dropout", default=0.1, type=float) - parser.add_argument("-optim", default='adam', type=str) - parser.add_argument("-lr", default=1, type=float) - parser.add_argument("-beta1", default= 0.9, type=float) - parser.add_argument("-beta2", default=0.999, type=float) - parser.add_argument("-decay_method", default='', type=str) - parser.add_argument("-warmup_steps", default=8000, type=int) - parser.add_argument("-max_grad_norm", default=0, type=float) - - parser.add_argument("-save_checkpoint_steps", default=5, type=int) - parser.add_argument("-accum_count", default=1, type=int) - parser.add_argument("-world_size", default=1, type=int) - parser.add_argument("-report_every", default=1, type=int) - parser.add_argument("-train_steps", default=1000, type=int) - parser.add_argument("-recall_eval", type=str2bool, nargs='?',const=True,default=False) - - parser.add_argument('-visible_gpus', default='-1', type=str) - parser.add_argument('-gpu_ranks', default='0', type=str) - parser.add_argument('-log_file', default='../logs/cnndm.log') - parser.add_argument('-dataset', default='') - parser.add_argument('-seed', default=666, type=int) - - parser.add_argument("-test_all", type=str2bool, nargs='?',const=True,default=False) - parser.add_argument("-test_from", default='') - parser.add_argument("-train_from", default='') - parser.add_argument("-report_rouge", type=str2bool, nargs='?',const=True,default=True) - parser.add_argument("-block_trigram", type=str2bool, nargs='?', const=True, default=True) - parser.add_argument("-onnx_path", default="") - parser.add_argument("-path", default="") - - args = parser.parse_args() - device = "cpu" if args.visible_gpus == '-1' else "cuda" - device_id = -1 if device == "cpu" else 0 - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import os +import torch + +from models import data_loader +from models.data_loader import load_dataset +from models.trainer import build_trainer + +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') +def get_max_shape(test_iter): + max_shape_1=0 + max_shape_2=0 + for batch in test_iter: + if batch.src.shape[1] > max_shape_1: + max_shape_1 = batch.src.shape[1] + if batch.clss.shape[1] > max_shape_2: + max_shape_2 = batch.clss.shape[1] + #print(batch.src[0].shape) + return max_shape_1,max_shape_2 + +def preprocess(args,device): + test_iter =data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), + args.batch_size, device, + shuffle=False, is_test=True) + test_iter1 =data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), + args.batch_size, device, + shuffle=False, is_test=True) + cur_path = os.getcwd() + main_path = cur_path + '/pre_data' + main_path_1 = cur_path + '/pre_data_1' + i=0 + if not os.path.exists(os.path.join(cur_path,'pre_data')): ###########first inference + os.makedirs(os.path.join(cur_path,'pre_data')) + if not os.path.exists(os.path.join(main_path,'src')): + os.makedirs(os.path.join(main_path,'src')) + if not os.path.exists(os.path.join(main_path,'segs')): + os.makedirs(os.path.join(main_path,'segs')) + if not os.path.exists(os.path.join(main_path,'clss')): + os.makedirs(os.path.join(main_path,'clss')) + if not os.path.exists(os.path.join(main_path,'mask')): + os.makedirs(os.path.join(main_path,'mask')) + if not os.path.exists(os.path.join(main_path,'mask_cls')): + os.makedirs(os.path.join(main_path,'mask_cls')) + + if not os.path.exists(os.path.join(cur_path,'pre_data_1')): ###########second inference + os.makedirs(os.path.join(cur_path,'pre_data_1')) + if not os.path.exists(os.path.join(main_path_1,'src')): + os.makedirs(os.path.join(main_path_1,'src')) + if not os.path.exists(os.path.join(main_path_1,'segs')): + os.makedirs(os.path.join(main_path_1,'segs')) + if not os.path.exists(os.path.join(main_path_1,'clss')): + os.makedirs(os.path.join(main_path_1,'clss')) + if not os.path.exists(os.path.join(main_path_1,'mask')): + os.makedirs(os.path.join(main_path_1,'mask')) + if not os.path.exists(os.path.join(main_path_1,'mask_cls')): + os.makedirs(os.path.join(main_path_1,'mask_cls')) + max_shape_1,max_shape_2 = get_max_shape(test_iter) + print(max_shape_1,max_shape_2) + #############################above get max dimension ########################### + for batch in test_iter1: + if batch.src.shape[0]==2: + if batch.src[0].shape[0] < max_shape_1: + add_zero = (torch.zeros([batch.src.shape[0],max_shape_1-batch.src[0].shape[0]])).long() #######change to int64 + add_bool = torch.zeros([batch.src.shape[0],max_shape_1-batch.src[0].shape[0]],dtype=torch.bool) + batch.src = torch.cat([batch.src,add_zero],dim=1) + batch.segs = torch.cat([batch.segs,add_zero],dim=1) + batch.mask = torch.cat([batch.mask,add_bool],dim=1) + if batch.clss[0].shape[0] < max_shape_2: + add_zero = (torch.zeros([batch.clss.shape[0],max_shape_2-batch.clss[0].shape[0]])).long() #######change to int64 + add_bool = torch.zeros([batch.clss.shape[0],max_shape_2-batch.clss[0].shape[0]],dtype=torch.bool) + batch.clss = torch.cat([batch.clss,add_zero],dim=1) + batch.mask_cls = torch.cat([batch.mask_cls,add_bool],dim=1) + ##############first dimension + batch.src[0].numpy().tofile(os.path.join(main_path,'src','data_'+str(i)+'.bin')) + batch.segs[0].numpy().tofile(os.path.join(main_path,'segs','data_'+str(i)+'.bin')) + batch.clss[0].numpy().tofile(os.path.join(main_path,'clss','data_'+str(i)+'.bin')) + batch.mask[0].numpy().tofile(os.path.join(main_path,'mask','data_'+str(i)+'.bin')) + batch.mask_cls[0].numpy().tofile(os.path.join(main_path,'mask_cls','data_'+str(i)+'.bin')) + #############second dimension + batch.src[1].numpy().tofile(os.path.join(main_path_1,'src','data_'+str(i)+'.bin')) + batch.segs[1].numpy().tofile(os.path.join(main_path_1,'segs','data_'+str(i)+'.bin')) + batch.clss[1].numpy().tofile(os.path.join(main_path_1,'clss','data_'+str(i)+'.bin')) + batch.mask[1].numpy().tofile(os.path.join(main_path_1,'mask','data_'+str(i)+'.bin')) + batch.mask_cls[1].numpy().tofile(os.path.join(main_path_1,'mask_cls','data_'+str(i)+'.bin')) + else: + #print(batch.clss.dtype) + if batch.src[0].shape[0] < max_shape_1: + add_zero = (torch.zeros([batch.src.shape[0],max_shape_1-batch.src[0].shape[0]])).long() #######change to int64 + add_bool = torch.zeros([batch.src.shape[0],max_shape_1-batch.src[0].shape[0]],dtype=torch.bool) + batch.src = torch.cat([batch.src,add_zero],dim=1) + batch.segs = torch.cat([batch.segs,add_zero],dim=1) + batch.mask = torch.cat([batch.mask,add_bool],dim=1) + if batch.clss[0].shape[0] < max_shape_2: + add_zero = (torch.zeros([batch.clss.shape[0],max_shape_2-batch.clss[0].shape[0]])).long() #######change to int64 + add_bool = torch.zeros([batch.clss.shape[0],max_shape_2-batch.clss[0].shape[0]],dtype=torch.bool) + batch.clss = torch.cat([batch.clss,add_zero],dim=1) + batch.mask_cls = torch.cat([batch.mask_cls,add_bool],dim=1) + batch.src.numpy().tofile(os.path.join(main_path,'src','data_'+str(i)+'.bin')) + batch.segs.numpy().tofile(os.path.join(main_path,'segs','data_'+str(i)+'.bin')) + batch.clss.numpy().tofile(os.path.join(main_path,'clss','data_'+str(i)+'.bin')) + batch.mask.numpy().tofile(os.path.join(main_path,'mask','data_'+str(i)+'.bin')) + batch.mask_cls.numpy().tofile(os.path.join(main_path,'mask_cls','data_'+str(i)+'.bin')) + i = i+1 + +if __name__ =="__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-encoder", default='classifier', type=str, choices=['classifier','transformer','rnn','baseline']) + parser.add_argument("-mode", default='train', type=str, choices=['train','validate','test']) + parser.add_argument("-bert_data_path", default='../bert_data') + parser.add_argument("-model_path", default='../models/') + parser.add_argument("-result_path", default='../results/cnndm') + parser.add_argument("-temp_dir", default='../temp') + parser.add_argument("-bert_config_path", default='../bert_config_uncased_base.json') + + parser.add_argument("-batch_size", default=1000, type=int) + + parser.add_argument("-use_interval", type=str2bool, nargs='?',const=True,default=True) + parser.add_argument("-hidden_size", default=128, type=int) + parser.add_argument("-ff_size", default=512, type=int) + parser.add_argument("-heads", default=4, type=int) + parser.add_argument("-inter_layers", default=2, type=int) + parser.add_argument("-rnn_size", default=512, type=int) + + parser.add_argument("-param_init", default=0, type=float) + parser.add_argument("-param_init_glorot", type=str2bool, nargs='?',const=True,default=True) + parser.add_argument("-dropout", default=0.1, type=float) + parser.add_argument("-optim", default='adam', type=str) + parser.add_argument("-lr", default=1, type=float) + parser.add_argument("-beta1", default= 0.9, type=float) + parser.add_argument("-beta2", default=0.999, type=float) + parser.add_argument("-decay_method", default='', type=str) + parser.add_argument("-warmup_steps", default=8000, type=int) + parser.add_argument("-max_grad_norm", default=0, type=float) + + parser.add_argument("-save_checkpoint_steps", default=5, type=int) + parser.add_argument("-accum_count", default=1, type=int) + parser.add_argument("-world_size", default=1, type=int) + parser.add_argument("-report_every", default=1, type=int) + parser.add_argument("-train_steps", default=1000, type=int) + parser.add_argument("-recall_eval", type=str2bool, nargs='?',const=True,default=False) + + parser.add_argument('-visible_gpus', default='-1', type=str) + parser.add_argument('-gpu_ranks', default='0', type=str) + parser.add_argument('-log_file', default='../logs/cnndm.log') + parser.add_argument('-dataset', default='') + parser.add_argument('-seed', default=666, type=int) + + parser.add_argument("-test_all", type=str2bool, nargs='?',const=True,default=False) + parser.add_argument("-test_from", default='') + parser.add_argument("-train_from", default='') + parser.add_argument("-report_rouge", type=str2bool, nargs='?',const=True,default=True) + parser.add_argument("-block_trigram", type=str2bool, nargs='?', const=True, default=True) + parser.add_argument("-onnx_path", default="") + parser.add_argument("-path", default="") + + args = parser.parse_args() + device = "cpu" if args.visible_gpus == '-1' else "cuda" + device_id = -1 if device == "cpu" else 0 + preprocess(args,device) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/BertSum/README.md b/ACL_PyTorch/contrib/nlp/BertSum/README.md index 03d573f8d3e178d33f4ab62830754448619c1057..90d73c936182d9244757ca02fc429ffef40b3c06 100644 --- a/ACL_PyTorch/contrib/nlp/BertSum/README.md +++ b/ACL_PyTorch/contrib/nlp/BertSum/README.md @@ -1,285 +1,285 @@ - - -# BertSum Onnx模型端到端推理指导 - -- 1 模型概述 - - [1.1 代码地址](https://gitee.com/kghhkhkljl/pyramidbox.git) -- 2 环境说明 - - [2.1 深度学习框架](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#21-深度学习框架) - - [2.2 python第三方库](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#22-python第三方库) -- 3 模型转换 - - [3.1 pth转onnx模型](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#31-pth转onnx模型) - - [3.2 onnx转om模型](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#32-onnx转om模型) -- 4 数据集预处理 - - [4.1 数据集获取](https://www.graviti.cn/open-datasets/WIDER_FACE) - - [4.2 数据集预处理](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#42-数据集预处理) - - [4.3 生成数据集信息文件](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#43-生成数据集信息文件) -- 5 离线推理 - - [5.1 benchmark工具概述](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#51-benchmark工具概述) - - [5.2 离线推理](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#52-离线推理) -- 6 精度对比 - - [6.1 离线推理精度统计](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#61-离线推理精度统计) - - [6.2 开源精度](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#62-开源精度) - - [6.3 精度对比](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#63-精度对比) -- 7 性能对比 - - [7.1 npu性能数据](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#71-npu性能数据) - - [7.2 T4性能数据](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#72-T4性能数据) - - [7.3 性能对比](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#73-性能对比) - -## 1 模型概述 - -- **论文地址** -- **代码地址** - -### 1.1 论文地址 - -[Bertsum论文](https://arxiv.org/abs/1803.07737) - -### 1.2 代码地址 - -https://github.com/nlpyang/BertSum.git - -## 2 环境说明 - -- **深度学习框架** -- **python第三方库** - -### 2.1 深度学习框架 - -``` -python3.7.5 -CANN 5.0.3 - -pytorch >= 1.5.0 -torchvision >= 0.10.0 -onnx >= 1.7.0 - -说明:若是在conda环境下,直接采用python,不用python3.7 -``` - -### 2.2 python第三方库 - -``` -torch==1.7.1 -tensorboardX==2.4.1 -pyrouge==0.1.3 -pytorch-pretrained-bert==0.6.2 -onnx-simplifier==0.3.6 -``` - -### **2.3 环境配置** - -ROUGE配置参考博客: - -[(10条消息) Ubuntu安装配置ROUGE_BigSea-CSDN博客](https://blog.csdn.net/Hay54/article/details/78744912) - -pyrouge配置参考博客: - -[(10条消息) 在Ubuntu下配置pyrouge_MerryCao的博客-CSDN博客](https://blog.csdn.net/MerryCao/article/details/49174283) - -## 3 模型转换 - -- **pth转onnx模型** -- **onnx转om模型** - -### 3.1 pth转onnx模型 - -1.拉取代码仓库 (因为使用了开源代码模块,所以需要git clone一下) - -```shell -git clone https://github.com/nlpyang/BertSum.git -``` - -克隆下来源代码并解压,将pr中的代码放到解压之后的BertSum/src目录下面并对BertSum/src/models/data_loder.py进行一个更改: - -将31行的mask=1-(src==0)修改为mask=~(src==0) 将35行的mask=1-(clss==-1)修改为mask=~(clss==-1) - -2.下载pth权重文件 - -权重文件默认存放在**/home/BertSum/src**目录下 - -3.使用pth2onnx.py进行onnx的转换 - -``` -方法一:cd /home/BertSum/src/test -bash pth2onnx.sh -方法二:cd /home/BertSum/src -python BertSum-pth2onnx.py -mode test -bert_data_path ../bert_data/cnndm -model_path MODEL_PATH -visible_gpus -1 -gpu_ranks 0 -batch_size 1 -log_file LOG_FILE -result_path RESULT_PATH -test_all -block_trigram true -onnx_path bertsum_13000_9_bs1.onnx -path model_step_13000.pt -``` - -获得bertsum_13000_9_bs1.onnx文件 - -方法二种的-bert_data_path是数据集所在目录,-batch_size需设置为1,-onnx_path是onnx输出文件 - -### 3.2 onnx模型简化 - -由于存在expand算子导致转om不成功,所以需要使用onnx简化工具对onnx进行简化 - -使用pth2onnx.py进行onnx的转换 - -``` -方法一:cd /home/BertSum/src/test -bash simplify.sh -方法二:cd /home/BertSum/src -python -m onnxsim ./bertsum_13000_9_bs1.onnx ./bertsum_13000_9_sim_bs1.onnx -``` - -获得bertsum_13000_9_sim_bs1.onnx文件 - -### 3.3 onnx简化模型转om模型 - -1.设置环境变量 - -``` -source atc.sh -``` - -2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.3 开发辅助工具指南 (推理) 01 - -``` -方法一:cd /home/BertSum/src/test -bash onnxToom.sh -方法二:cd /home/BertSum/src -atc --input_format=ND --framework=5 --model=./bertsum_13000_9_sim_bs1.onnx --input_shape="src:1,512;segs:1,512;clss:1,37;mask:1,512;mask_cls:1,37" --output=bertsum_13000_9_sim_bs1 \ ---log=info --soc_version=Ascend310 --precision_mode=allow_mix_precision \ ---modify_mixlist=ops_info.json -``` - -方法二中的model是onnx模型的名字,input_shape是paper的shape,output为输出om的名字,--precision_mode表示采用混合精度 - -## 4 数据集预处理 - -- **数据集获取** -- **数据集预处理** -- **生成数据集信息文件** - -### 4.1 数据集获取 - -参考原代码仓 - -### 4.2 数据集预处理 - -1.预处理脚本BertSum_pth_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 - -``` -方法一:cd /home/BertSum/src/test -bash pre_deal.sh -方法二:cd /home/BertSum/src -python BertSum_pth_preprocess.py -mode test -bert_data_path ../bert_data/cnndm -model_path MODEL_PATH -visible_gpus -1 -gpu_ranks 0 -batch_size 600 -log_file LOG_FILE -result_path RESULT_PATH -test_all -block_trigram true -``` - --bert_data_path是数据集所在目录,后面的参数是固定的。 - -### 5 离线推理 - -- **msame工具** -- **离线推理** - -### 5.1 msame工具 - -获取msame工具(https://gitee.com/ascend/tools/tree/master/msame),并将得到的msame工具放在/home/BertSum-master/src下 - -### 5.2 离线推理 - -1.执行离线推理 - -benchmark工具暂不支持多输入,因此改用msame,首先要source环境变量 - -``` -source env.sh -``` - -2.使用msame将onnx模型转换为om模型文件,工具使用方法可以参考CANN - -然后运行如下命令: - -``` -方法一:cd /home/BertSum/src/test -bash infer.sh -方法二:cd /home/BertSum/src -./msame --model "./bertsum_13000_9_sim_bs1_1.om" --input "./pre_data/src,./pre_data/segs,./pre_data/clss,./pre_data/mask,./pre_data/mask_cls" --output "./result" --outfmt bin -./msame --model "./bertsum_13000_9_sim_bs1_1.om" --input "./pre_data_1/src,./pre_data_1/segs,./pre_data_1/clss,./pre_data_1/mask,./pre_data_1/mask_cls" --output "./result" --outfmt bin -``` - -要采用msema工具推理两次,因为有些paper的shape第一维为2,所以分两次进行推理。pre_data下存放的是shape为第一维为1的所有预处理之后的数据以及shape为2的部分预处理得到的数据。shape为2的另一部分数据存放在pre_data_1下面。--model是om文件,--input是预处理之后文件所在目录,--output为输出bin文件所在目录,--outfmt代表输出bin文件*。* - -输出的bin文件在/home/BertSum-master/src/result目录下,此目录下会存在两个文件,将其中一个时间小的命名为result_1,将另一个时间大的命名为result_2。 - -## 6 精度对比 - -- **离线推理精度** -- **开源精度** -- **精度对比** - -### 6.1 离线推理精度统计 - -1.后处理 - -``` -cd /home/BertSum/src -python BertSum_pth_postprocess.py -visible_gpus -1 -gpu_ranks 0 -batch_size 600 -log_file LOG_FILE -result_path RESULT_PATH -test_all -block_trigram true -path_1 ./result/result_1 -path_2 ./result/result_2 -``` - -``` - -path_1是推理得到的文件result_1,-path_2是推理得到的result_2 - 自验报告 - # 第X次验收测试 - # 验收结果 OK - # 验收环境: A + K / CANN 5.0.3 - # 关联issue: - - # pth是否能正确转换为om - bash test/onnx2om.sh - # 验收结果: OK - # 备注: 成功生成om,无运行报错,报错日志xx 等 - - # 精度数据是否达标(需要显示官网pth精度与om模型的精度) - # npu性能数据(由于msame工具不支持多batch,所以只测试了bs1的性能) - # 验收结果: 是 / 否 - # 备注: 目标pth精度42.96;bs1验收om精度42.92;精度下降不超过1%;无运行报错,报错日志xx 等 - # 备注: 验收310测试性能bs1:61.538FPS;无运行报错,报错日志xx 等 - - # 在t4上测试bs1性能 - bash perf.sh - # 验收结果: OK / Failed - # 备注: 验收基准测试性能bs1:94.281FPS;无运行报错,报错日志xx 等 - - # 310性能是否超过基准: 否 - t4:310=(94.281/61.538)1.53倍基准 -``` - -### 6.2 开源精度 - -BertSum在线训练精度: - -42.96% - -### 6.3 离线推理精度 - -42.95% - -### 6.3 精度对比 - -由于源码采用的是动态shape,而离线推理是通过加padding固定住shape进行推理的,所以精度会有损失,因此和同一分辨率下的在线推理进行对比。对比方式:三个尺度求和取平均。 - -## 7 性能对比 - -- **310性能数据** -- **T4性能数据** -- **性能对比** - -### 7.1 310性能数据 - -每张图片平均耗时:65.06ms,所以310吞吐率为:1000/65×4=61.538 - -说明:由于msame不支持多batch,所以此处只测了bs1的性能。 - -### 7.2 T4性能数据 - -T4性能为:94.281 - -### 7.3 性能对比 - + + +# BertSum Onnx模型端到端推理指导 + +- 1 模型概述 + - [1.1 代码地址](https://gitee.com/kghhkhkljl/pyramidbox.git) +- 2 环境说明 + - [2.1 深度学习框架](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#21-深度学习框架) + - [2.2 python第三方库](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#22-python第三方库) +- 3 模型转换 + - [3.1 pth转onnx模型](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#31-pth转onnx模型) + - [3.2 onnx转om模型](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#32-onnx转om模型) +- 4 数据集预处理 + - [4.1 数据集获取](https://www.graviti.cn/open-datasets/WIDER_FACE) + - [4.2 数据集预处理](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#42-数据集预处理) + - [4.3 生成数据集信息文件](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#43-生成数据集信息文件) +- 5 离线推理 + - [5.1 benchmark工具概述](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/ResNext50#51-benchmark工具概述) + - [5.2 离线推理](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#52-离线推理) +- 6 精度对比 + - [6.1 离线推理精度统计](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#61-离线推理精度统计) + - [6.2 开源精度](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#62-开源精度) + - [6.3 精度对比](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#63-精度对比) +- 7 性能对比 + - [7.1 npu性能数据](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#71-npu性能数据) + - [7.2 T4性能数据](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#72-T4性能数据) + - [7.3 性能对比](https://gitee.com/ascend/modelzoo/tree/master/built-in/ACL_PyTorch/Benchmark/cv/classification/Pyramidbox#73-性能对比) + +## 1 模型概述 + +- **论文地址** +- **代码地址** + +### 1.1 论文地址 + +[Bertsum论文](https://arxiv.org/abs/1803.07737) + +### 1.2 代码地址 + +https://github.com/nlpyang/BertSum.git + +## 2 环境说明 + +- **深度学习框架** +- **python第三方库** + +### 2.1 深度学习框架 + +``` +python3.7.5 +CANN 5.0.3 + +pytorch >= 1.5.0 +torchvision >= 0.10.0 +onnx >= 1.7.0 + +说明:若是在conda环境下,直接采用python,不用python3.7 +``` + +### 2.2 python第三方库 + +``` +torch==1.7.1 +tensorboardX==2.4.1 +pyrouge==0.1.3 +pytorch-pretrained-bert==0.6.2 +onnx-simplifier==0.3.6 +``` + +### **2.3 环境配置** + +ROUGE配置参考博客: + +[(10条消息) Ubuntu安装配置ROUGE_BigSea-CSDN博客](https://blog.csdn.net/Hay54/article/details/78744912) + +pyrouge配置参考博客: + +[(10条消息) 在Ubuntu下配置pyrouge_MerryCao的博客-CSDN博客](https://blog.csdn.net/MerryCao/article/details/49174283) + +## 3 模型转换 + +- **pth转onnx模型** +- **onnx转om模型** + +### 3.1 pth转onnx模型 + +1.拉取代码仓库 (因为使用了开源代码模块,所以需要git clone一下) + +```shell +git clone https://github.com/nlpyang/BertSum.git +``` + +克隆下来源代码并解压,将pr中的代码放到解压之后的BertSum/src目录下面并对BertSum/src/models/data_loder.py进行一个更改: + +将31行的mask=1-(src==0)修改为mask=~(src==0) 将35行的mask=1-(clss==-1)修改为mask=~(clss==-1) + +2.下载pth权重文件 + +权重文件默认存放在**/home/BertSum/src**目录下 + +3.使用pth2onnx.py进行onnx的转换 + +``` +方法一:cd /home/BertSum/src/test +bash pth2onnx.sh +方法二:cd /home/BertSum/src +python BertSum-pth2onnx.py -mode test -bert_data_path ../bert_data/cnndm -model_path MODEL_PATH -visible_gpus -1 -gpu_ranks 0 -batch_size 1 -log_file LOG_FILE -result_path RESULT_PATH -test_all -block_trigram true -onnx_path bertsum_13000_9_bs1.onnx -path model_step_13000.pt +``` + +获得bertsum_13000_9_bs1.onnx文件 + +方法二种的-bert_data_path是数据集所在目录,-batch_size需设置为1,-onnx_path是onnx输出文件 + +### 3.2 onnx模型简化 + +由于存在expand算子导致转om不成功,所以需要使用onnx简化工具对onnx进行简化 + +使用pth2onnx.py进行onnx的转换 + +``` +方法一:cd /home/BertSum/src/test +bash simplify.sh +方法二:cd /home/BertSum/src +python -m onnxsim ./bertsum_13000_9_bs1.onnx ./bertsum_13000_9_sim_bs1.onnx +``` + +获得bertsum_13000_9_sim_bs1.onnx文件 + +### 3.3 onnx简化模型转om模型 + +1.设置环境变量 + +``` +source atc.sh +``` + +2.使用atc将onnx模型转换为om模型文件,工具使用方法可以参考CANN 5.0.3 开发辅助工具指南 (推理) 01 + +``` +方法一:cd /home/BertSum/src/test +bash onnxToom.sh +方法二:cd /home/BertSum/src +atc --input_format=ND --framework=5 --model=./bertsum_13000_9_sim_bs1.onnx --input_shape="src:1,512;segs:1,512;clss:1,37;mask:1,512;mask_cls:1,37" --output=bertsum_13000_9_sim_bs1 \ +--log=info --soc_version=Ascend310 --precision_mode=allow_mix_precision \ +--modify_mixlist=ops_info.json +``` + +方法二中的model是onnx模型的名字,input_shape是paper的shape,output为输出om的名字,--precision_mode表示采用混合精度 + +## 4 数据集预处理 + +- **数据集获取** +- **数据集预处理** +- **生成数据集信息文件** + +### 4.1 数据集获取 + +参考原代码仓 + +### 4.2 数据集预处理 + +1.预处理脚本BertSum_pth_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 + +``` +方法一:cd /home/BertSum/src/test +bash pre_deal.sh +方法二:cd /home/BertSum/src +python BertSum_pth_preprocess.py -mode test -bert_data_path ../bert_data/cnndm -model_path MODEL_PATH -visible_gpus -1 -gpu_ranks 0 -batch_size 600 -log_file LOG_FILE -result_path RESULT_PATH -test_all -block_trigram true +``` + +-bert_data_path是数据集所在目录,后面的参数是固定的。 + +### 5 离线推理 + +- **msame工具** +- **离线推理** + +### 5.1 msame工具 + +获取msame工具(https://gitee.com/ascend/tools/tree/master/msame),并将得到的msame工具放在/home/BertSum-master/src下 + +### 5.2 离线推理 + +1.执行离线推理 + +benchmark工具暂不支持多输入,因此改用msame,首先要source环境变量 + +``` +source env.sh +``` + +2.使用msame将onnx模型转换为om模型文件,工具使用方法可以参考CANN + +然后运行如下命令: + +``` +方法一:cd /home/BertSum/src/test +bash infer.sh +方法二:cd /home/BertSum/src +./msame --model "./bertsum_13000_9_sim_bs1_1.om" --input "./pre_data/src,./pre_data/segs,./pre_data/clss,./pre_data/mask,./pre_data/mask_cls" --output "./result" --outfmt bin +./msame --model "./bertsum_13000_9_sim_bs1_1.om" --input "./pre_data_1/src,./pre_data_1/segs,./pre_data_1/clss,./pre_data_1/mask,./pre_data_1/mask_cls" --output "./result" --outfmt bin +``` + +要采用msema工具推理两次,因为有些paper的shape第一维为2,所以分两次进行推理。pre_data下存放的是shape为第一维为1的所有预处理之后的数据以及shape为2的部分预处理得到的数据。shape为2的另一部分数据存放在pre_data_1下面。--model是om文件,--input是预处理之后文件所在目录,--output为输出bin文件所在目录,--outfmt代表输出bin文件*。* + +输出的bin文件在/home/BertSum-master/src/result目录下,此目录下会存在两个文件,将其中一个时间小的命名为result_1,将另一个时间大的命名为result_2。 + +## 6 精度对比 + +- **离线推理精度** +- **开源精度** +- **精度对比** + +### 6.1 离线推理精度统计 + +1.后处理 + +``` +cd /home/BertSum/src +python BertSum_pth_postprocess.py -visible_gpus -1 -gpu_ranks 0 -batch_size 600 -log_file LOG_FILE -result_path RESULT_PATH -test_all -block_trigram true -path_1 ./result/result_1 -path_2 ./result/result_2 +``` + +``` + -path_1是推理得到的文件result_1,-path_2是推理得到的result_2 + 自验报告 + # 第X次验收测试 + # 验收结果 OK + # 验收环境: A + K / CANN 5.0.3 + # 关联issue: + + # pth是否能正确转换为om + bash test/onnx2om.sh + # 验收结果: OK + # 备注: 成功生成om,无运行报错,报错日志xx 等 + + # 精度数据是否达标(需要显示官网pth精度与om模型的精度) + # npu性能数据(由于msame工具不支持多batch,所以只测试了bs1的性能) + # 验收结果: 是 / 否 + # 备注: 目标pth精度42.96;bs1验收om精度42.92;精度下降不超过1%;无运行报错,报错日志xx 等 + # 备注: 验收310测试性能bs1:61.538FPS;无运行报错,报错日志xx 等 + + # 在t4上测试bs1性能 + bash perf.sh + # 验收结果: OK / Failed + # 备注: 验收基准测试性能bs1:94.281FPS;无运行报错,报错日志xx 等 + + # 310性能是否超过基准: 否 + t4:310=(94.281/61.538)1.53倍基准 +``` + +### 6.2 开源精度 + +BertSum在线训练精度: + +42.96% + +### 6.3 离线推理精度 + +42.95% + +### 6.3 精度对比 + +由于源码采用的是动态shape,而离线推理是通过加padding固定住shape进行推理的,所以精度会有损失,因此和同一分辨率下的在线推理进行对比。对比方式:三个尺度求和取平均。 + +## 7 性能对比 + +- **310性能数据** +- **T4性能数据** +- **性能对比** + +### 7.1 310性能数据 + +每张图片平均耗时:65.06ms,所以310吞吐率为:1000/65×4=61.538 + +说明:由于msame不支持多batch,所以此处只测了bs1的性能。 + +### 7.2 T4性能数据 + +T4性能为:94.281 + +### 7.3 性能对比 + batch1:94.281>61.538 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/BertSum/modelzoo_level.txt b/ACL_PyTorch/contrib/nlp/BertSum/modelzoo_level.txt index 14ac5bd404872e1264c86036fa3d2b1946828c16..aeac4e12641447ddfe648679ab454b20861732a6 100644 --- a/ACL_PyTorch/contrib/nlp/BertSum/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/nlp/BertSum/modelzoo_level.txt @@ -1,6 +1,6 @@ -ModelConvert:OK -QuantStatus:OK -FuncStatus:OK -PrecisionStatus:OK -AutoTune:NOK +ModelConvert:OK +QuantStatus:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:NOK PerfStatus:POK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/BertSum/requirements.txt b/ACL_PyTorch/contrib/nlp/BertSum/requirements.txt index e7350f2bdef53d4ff20c6537a5f70baa748b87cb..47c01fde523a048f8362acf0c4c2c4b080f16b4c 100644 --- a/ACL_PyTorch/contrib/nlp/BertSum/requirements.txt +++ b/ACL_PyTorch/contrib/nlp/BertSum/requirements.txt @@ -1,5 +1,5 @@ -torch==1.10.0 -tensorboardX==2.4.1 -pyrouge==0.1.3 -pytorch_pretrained_bert==0.6.2 -onnx-simplifier==0.3.6 +torch==1.10.0 +tensorboardX==2.4.1 +pyrouge==0.1.3 +pytorch_pretrained_bert==0.6.2 +onnx-simplifier==0.3.6 diff --git a/ACL_PyTorch/contrib/nlp/TextCNN/LICENSE b/ACL_PyTorch/contrib/nlp/TextCNN/LICENSE index 29f81d812f3e768fa89638d1f72920dbfd1413a8..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 100644 --- a/ACL_PyTorch/contrib/nlp/TextCNN/LICENSE +++ b/ACL_PyTorch/contrib/nlp/TextCNN/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ACL_PyTorch/contrib/nlp/TextCNN/TextCNN.patch b/ACL_PyTorch/contrib/nlp/TextCNN/TextCNN.patch index 60d727642bc8b66b1c50c8a79a29480dfab5b7a2..dd56597dfcd2f6a6459fa05ae322de262e4642a9 100644 --- a/ACL_PyTorch/contrib/nlp/TextCNN/TextCNN.patch +++ b/ACL_PyTorch/contrib/nlp/TextCNN/TextCNN.patch @@ -1,646 +1,646 @@ -二进制文件 Chinese-Text-Classification-Pytorch_back/.git/index 和 Chinese-Text-Classification-Pytorch/.git/index 不同 -diff -uprN Chinese-Text-Classification-Pytorch_back/models/DPCNN.py Chinese-Text-Classification-Pytorch/models/DPCNN.py ---- Chinese-Text-Classification-Pytorch_back/models/DPCNN.py 2021-08-13 20:49:45.263263000 +0800 -+++ Chinese-Text-Classification-Pytorch/models/DPCNN.py 1970-01-01 08:00:00.000000000 +0800 -@@ -1,89 +0,0 @@ --# coding: UTF-8 --import torch --import torch.nn as nn --import torch.nn.functional as F --import numpy as np -- -- --class Config(object): -- -- """配置参数""" -- def __init__(self, dataset, embedding): -- self.model_name = 'DPCNN' -- self.train_path = dataset + '/data/train.txt' # 训练集 -- self.dev_path = dataset + '/data/dev.txt' # 验证集 -- self.test_path = dataset + '/data/test.txt' # 测试集 -- self.class_list = [x.strip() for x in open( -- dataset + '/data/class.txt', encoding='utf-8').readlines()] # 类别名单 -- self.vocab_path = dataset + '/data/vocab.pkl' # 词表 -- self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 -- self.log_path = dataset + '/log/' + self.model_name -- self.embedding_pretrained = torch.tensor( -- np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32'))\ -- if embedding != 'random' else None # 预训练词向量 -- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 -- -- self.dropout = 0.5 # 随机失活 -- self.require_improvement = 1000 # 若超过1000batch效果还没提升,则提前结束训练 -- self.num_classes = len(self.class_list) # 类别数 -- self.n_vocab = 0 # 词表大小,在运行时赋值 -- self.num_epochs = 20 # epoch数 -- self.batch_size = 128 # mini-batch大小 -- self.pad_size = 32 # 每句话处理成的长度(短填长切) -- self.learning_rate = 1e-3 # 学习率 -- self.embed = self.embedding_pretrained.size(1)\ -- if self.embedding_pretrained is not None else 300 # 字向量维度 -- self.num_filters = 250 # 卷积核数量(channels数) -- -- --'''Deep Pyramid Convolutional Neural Networks for Text Categorization''' -- -- --class Model(nn.Module): -- def __init__(self, config): -- super(Model, self).__init__() -- if config.embedding_pretrained is not None: -- self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False) -- else: -- self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1) -- self.conv_region = nn.Conv2d(1, config.num_filters, (3, config.embed), stride=1) -- self.conv = nn.Conv2d(config.num_filters, config.num_filters, (3, 1), stride=1) -- self.max_pool = nn.MaxPool2d(kernel_size=(3, 1), stride=2) -- self.padding1 = nn.ZeroPad2d((0, 0, 1, 1)) # top bottom -- self.padding2 = nn.ZeroPad2d((0, 0, 0, 1)) # bottom -- self.relu = nn.ReLU() -- self.fc = nn.Linear(config.num_filters, config.num_classes) -- -- def forward(self, x): -- x = x[0] -- x = self.embedding(x) -- x = x.unsqueeze(1) # [batch_size, 250, seq_len, 1] -- x = self.conv_region(x) # [batch_size, 250, seq_len-3+1, 1] -- -- x = self.padding1(x) # [batch_size, 250, seq_len, 1] -- x = self.relu(x) -- x = self.conv(x) # [batch_size, 250, seq_len-3+1, 1] -- x = self.padding1(x) # [batch_size, 250, seq_len, 1] -- x = self.relu(x) -- x = self.conv(x) # [batch_size, 250, seq_len-3+1, 1] -- while x.size()[2] > 2: -- x = self._block(x) -- x = x.squeeze() # [batch_size, num_filters(250)] -- x = self.fc(x) -- return x -- -- def _block(self, x): -- x = self.padding2(x) -- px = self.max_pool(x) -- -- x = self.padding1(px) -- x = F.relu(x) -- x = self.conv(x) -- -- x = self.padding1(x) -- x = F.relu(x) -- x = self.conv(x) -- -- # Short Cut -- x = x + px -- return x -diff -uprN Chinese-Text-Classification-Pytorch_back/models/FastText.py Chinese-Text-Classification-Pytorch/models/FastText.py ---- Chinese-Text-Classification-Pytorch_back/models/FastText.py 2021-08-13 20:49:45.263263000 +0800 -+++ Chinese-Text-Classification-Pytorch/models/FastText.py 1970-01-01 08:00:00.000000000 +0800 -@@ -1,69 +0,0 @@ --# coding: UTF-8 --import torch --import torch.nn as nn --import torch.nn.functional as F --import numpy as np -- -- --class Config(object): -- -- """配置参数""" -- def __init__(self, dataset, embedding): -- self.model_name = 'FastText' -- self.train_path = dataset + '/data/train.txt' # 训练集 -- self.dev_path = dataset + '/data/dev.txt' # 验证集 -- self.test_path = dataset + '/data/test.txt' # 测试集 -- self.class_list = [x.strip() for x in open( -- dataset + '/data/class.txt', encoding='utf-8').readlines()] # 类别名单 -- self.vocab_path = dataset + '/data/vocab.pkl' # 词表 -- self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 -- self.log_path = dataset + '/log/' + self.model_name -- self.embedding_pretrained = torch.tensor( -- np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32'))\ -- if embedding != 'random' else None # 预训练词向量 -- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 -- -- self.dropout = 0.5 # 随机失活 -- self.require_improvement = 1000 # 若超过1000batch效果还没提升,则提前结束训练 -- self.num_classes = len(self.class_list) # 类别数 -- self.n_vocab = 0 # 词表大小,在运行时赋值 -- self.num_epochs = 20 # epoch数 -- self.batch_size = 128 # mini-batch大小 -- self.pad_size = 32 # 每句话处理成的长度(短填长切) -- self.learning_rate = 1e-3 # 学习率 -- self.embed = self.embedding_pretrained.size(1)\ -- if self.embedding_pretrained is not None else 300 # 字向量维度 -- self.hidden_size = 256 # 隐藏层大小 -- self.n_gram_vocab = 250499 # ngram 词表大小 -- -- --'''Bag of Tricks for Efficient Text Classification''' -- -- --class Model(nn.Module): -- def __init__(self, config): -- super(Model, self).__init__() -- if config.embedding_pretrained is not None: -- self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False) -- else: -- self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1) -- self.embedding_ngram2 = nn.Embedding(config.n_gram_vocab, config.embed) -- self.embedding_ngram3 = nn.Embedding(config.n_gram_vocab, config.embed) -- self.dropout = nn.Dropout(config.dropout) -- self.fc1 = nn.Linear(config.embed * 3, config.hidden_size) -- # self.dropout2 = nn.Dropout(config.dropout) -- self.fc2 = nn.Linear(config.hidden_size, config.num_classes) -- -- def forward(self, x): -- -- out_word = self.embedding(x[0]) -- out_bigram = self.embedding_ngram2(x[2]) -- out_trigram = self.embedding_ngram3(x[3]) -- out = torch.cat((out_word, out_bigram, out_trigram), -1) -- -- out = out.mean(dim=1) -- out = self.dropout(out) -- out = self.fc1(out) -- out = F.relu(out) -- out = self.fc2(out) -- return out -二进制文件 Chinese-Text-Classification-Pytorch_back/models/__pycache__/TextCNN.cpython-37.pyc 和 Chinese-Text-Classification-Pytorch/models/__pycache__/TextCNN.cpython-37.pyc 不同 -diff -uprN Chinese-Text-Classification-Pytorch_back/models/TextCNN.py Chinese-Text-Classification-Pytorch/models/TextCNN.py ---- Chinese-Text-Classification-Pytorch_back/models/TextCNN.py 2021-08-13 20:49:45.263263000 +0800 -+++ Chinese-Text-Classification-Pytorch/models/TextCNN.py 2021-09-07 21:23:07.218366753 +0800 -@@ -1,4 +1,6 @@ - # coding: UTF-8 -+import os.path -+ - import torch - import torch.nn as nn - import torch.nn.functional as F -@@ -13,10 +15,11 @@ class Config(object): - self.train_path = dataset + '/data/train.txt' # 训练集 - self.dev_path = dataset + '/data/dev.txt' # 验证集 - self.test_path = dataset + '/data/test.txt' # 测试集 -+ print('path', os.path.abspath(dataset+'/data/class.txt')) - self.class_list = [x.strip() for x in open( - dataset + '/data/class.txt', encoding='utf-8').readlines()] # 类别名单 - self.vocab_path = dataset + '/data/vocab.pkl' # 词表 -- self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 -+ self.save_path = dataset + '/saved_dict/' + self.model_name + '.pth' # 模型训练结果 - self.log_path = dataset + '/log/' + self.model_name - self.embedding_pretrained = torch.tensor( - np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32'))\ -@@ -49,18 +52,21 @@ class Model(nn.Module): - self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1) - self.convs = nn.ModuleList( - [nn.Conv2d(1, config.num_filters, (k, config.embed)) for k in config.filter_sizes]) -+ self.pools = nn.ModuleList( -+ [nn.MaxPool1d(config.pad_size - k + 1) for k in config.filter_sizes]) - self.dropout = nn.Dropout(config.dropout) - self.fc = nn.Linear(config.num_filters * len(config.filter_sizes), config.num_classes) - -- def conv_and_pool(self, x, conv): -+ def conv_and_pool(self, x, conv, pool): - x = F.relu(conv(x)).squeeze(3) -- x = F.max_pool1d(x, x.size(2)).squeeze(2) -+ x = pool(x).squeeze(2) -+ # x = F.max_pool1d(x, x.size(2)).squeeze(2) - return x - - def forward(self, x): -- out = self.embedding(x[0]) -+ out = self.embedding(x) - out = out.unsqueeze(1) -- out = torch.cat([self.conv_and_pool(out, conv) for conv in self.convs], 1) -+ out = torch.cat([self.conv_and_pool(out, conv, pool) for conv, pool in zip(self.convs, self.pools)], 1) - out = self.dropout(out) - out = self.fc(out) - return out -diff -uprN Chinese-Text-Classification-Pytorch_back/models/TextRCNN.py Chinese-Text-Classification-Pytorch/models/TextRCNN.py ---- Chinese-Text-Classification-Pytorch_back/models/TextRCNN.py 2021-08-13 20:49:45.263263000 +0800 -+++ Chinese-Text-Classification-Pytorch/models/TextRCNN.py 1970-01-01 08:00:00.000000000 +0800 -@@ -1,64 +0,0 @@ --# coding: UTF-8 --import torch --import torch.nn as nn --import torch.nn.functional as F --import numpy as np -- -- --class Config(object): -- -- """配置参数""" -- def __init__(self, dataset, embedding): -- self.model_name = 'TextRCNN' -- self.train_path = dataset + '/data/train.txt' # 训练集 -- self.dev_path = dataset + '/data/dev.txt' # 验证集 -- self.test_path = dataset + '/data/test.txt' # 测试集 -- self.class_list = [x.strip() for x in open( -- dataset + '/data/class.txt', encoding='utf-8').readlines()] # 类别名单 -- self.vocab_path = dataset + '/data/vocab.pkl' # 词表 -- self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 -- self.log_path = dataset + '/log/' + self.model_name -- self.embedding_pretrained = torch.tensor( -- np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32'))\ -- if embedding != 'random' else None # 预训练词向量 -- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 -- -- self.dropout = 1.0 # 随机失活 -- self.require_improvement = 1000 # 若超过1000batch效果还没提升,则提前结束训练 -- self.num_classes = len(self.class_list) # 类别数 -- self.n_vocab = 0 # 词表大小,在运行时赋值 -- self.num_epochs = 10 # epoch数 -- self.batch_size = 128 # mini-batch大小 -- self.pad_size = 32 # 每句话处理成的长度(短填长切) -- self.learning_rate = 1e-3 # 学习率 -- self.embed = self.embedding_pretrained.size(1)\ -- if self.embedding_pretrained is not None else 300 # 字向量维度, 若使用了预训练词向量,则维度统一 -- self.hidden_size = 256 # lstm隐藏层 -- self.num_layers = 1 # lstm层数 -- -- --'''Recurrent Convolutional Neural Networks for Text Classification''' -- -- --class Model(nn.Module): -- def __init__(self, config): -- super(Model, self).__init__() -- if config.embedding_pretrained is not None: -- self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False) -- else: -- self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1) -- self.lstm = nn.LSTM(config.embed, config.hidden_size, config.num_layers, -- bidirectional=True, batch_first=True, dropout=config.dropout) -- self.maxpool = nn.MaxPool1d(config.pad_size) -- self.fc = nn.Linear(config.hidden_size * 2 + config.embed, config.num_classes) -- -- def forward(self, x): -- x, _ = x -- embed = self.embedding(x) # [batch_size, seq_len, embeding]=[64, 32, 64] -- out, _ = self.lstm(embed) -- out = torch.cat((embed, out), 2) -- out = F.relu(out) -- out = out.permute(0, 2, 1) -- out = self.maxpool(out).squeeze() -- out = self.fc(out) -- return out -diff -uprN Chinese-Text-Classification-Pytorch_back/models/TextRNN_Att.py Chinese-Text-Classification-Pytorch/models/TextRNN_Att.py ---- Chinese-Text-Classification-Pytorch_back/models/TextRNN_Att.py 2021-08-13 20:49:45.263263000 +0800 -+++ Chinese-Text-Classification-Pytorch/models/TextRNN_Att.py 1970-01-01 08:00:00.000000000 +0800 -@@ -1,73 +0,0 @@ --# coding: UTF-8 --import torch --import torch.nn as nn --import torch.nn.functional as F --import numpy as np -- -- --class Config(object): -- -- """配置参数""" -- def __init__(self, dataset, embedding): -- self.model_name = 'TextRNN_Att' -- self.train_path = dataset + '/data/train.txt' # 训练集 -- self.dev_path = dataset + '/data/dev.txt' # 验证集 -- self.test_path = dataset + '/data/test.txt' # 测试集 -- self.class_list = [x.strip() for x in open( -- dataset + '/data/class.txt', encoding='utf-8').readlines()] # 类别名单 -- self.vocab_path = dataset + '/data/vocab.pkl' # 词表 -- self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 -- self.log_path = dataset + '/log/' + self.model_name -- self.embedding_pretrained = torch.tensor( -- np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32'))\ -- if embedding != 'random' else None # 预训练词向量 -- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 -- -- self.dropout = 0.5 # 随机失活 -- self.require_improvement = 1000 # 若超过1000batch效果还没提升,则提前结束训练 -- self.num_classes = len(self.class_list) # 类别数 -- self.n_vocab = 0 # 词表大小,在运行时赋值 -- self.num_epochs = 10 # epoch数 -- self.batch_size = 128 # mini-batch大小 -- self.pad_size = 32 # 每句话处理成的长度(短填长切) -- self.learning_rate = 1e-3 # 学习率 -- self.embed = self.embedding_pretrained.size(1)\ -- if self.embedding_pretrained is not None else 300 # 字向量维度, 若使用了预训练词向量,则维度统一 -- self.hidden_size = 128 # lstm隐藏层 -- self.num_layers = 2 # lstm层数 -- self.hidden_size2 = 64 -- -- --'''Attention-Based Bidirectional Long Short-Term Memory Networks for Relation Classification''' -- -- --class Model(nn.Module): -- def __init__(self, config): -- super(Model, self).__init__() -- if config.embedding_pretrained is not None: -- self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False) -- else: -- self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1) -- self.lstm = nn.LSTM(config.embed, config.hidden_size, config.num_layers, -- bidirectional=True, batch_first=True, dropout=config.dropout) -- self.tanh1 = nn.Tanh() -- # self.u = nn.Parameter(torch.Tensor(config.hidden_size * 2, config.hidden_size * 2)) -- self.w = nn.Parameter(torch.zeros(config.hidden_size * 2)) -- self.tanh2 = nn.Tanh() -- self.fc1 = nn.Linear(config.hidden_size * 2, config.hidden_size2) -- self.fc = nn.Linear(config.hidden_size2, config.num_classes) -- -- def forward(self, x): -- x, _ = x -- emb = self.embedding(x) # [batch_size, seq_len, embeding]=[128, 32, 300] -- H, _ = self.lstm(emb) # [batch_size, seq_len, hidden_size * num_direction]=[128, 32, 256] -- -- M = self.tanh1(H) # [128, 32, 256] -- # M = torch.tanh(torch.matmul(H, self.u)) -- alpha = F.softmax(torch.matmul(M, self.w), dim=1).unsqueeze(-1) # [128, 32, 1] -- out = H * alpha # [128, 32, 256] -- out = torch.sum(out, 1) # [128, 256] -- out = F.relu(out) -- out = self.fc1(out) -- out = self.fc(out) # [128, 64] -- return out -diff -uprN Chinese-Text-Classification-Pytorch_back/models/TextRNN.py Chinese-Text-Classification-Pytorch/models/TextRNN.py ---- Chinese-Text-Classification-Pytorch_back/models/TextRNN.py 2021-08-13 20:49:45.263263000 +0800 -+++ Chinese-Text-Classification-Pytorch/models/TextRNN.py 1970-01-01 08:00:00.000000000 +0800 -@@ -1,75 +0,0 @@ --# coding: UTF-8 --import torch --import torch.nn as nn --import numpy as np -- -- --class Config(object): -- -- """配置参数""" -- def __init__(self, dataset, embedding): -- self.model_name = 'TextRNN' -- self.train_path = dataset + '/data/train.txt' # 训练集 -- self.dev_path = dataset + '/data/dev.txt' # 验证集 -- self.test_path = dataset + '/data/test.txt' # 测试集 -- self.class_list = [x.strip() for x in open( -- dataset + '/data/class.txt', encoding='utf-8').readlines()] # 类别名单 -- self.vocab_path = dataset + '/data/vocab.pkl' # 词表 -- self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 -- self.log_path = dataset + '/log/' + self.model_name -- self.embedding_pretrained = torch.tensor( -- np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32'))\ -- if embedding != 'random' else None # 预训练词向量 -- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 -- -- self.dropout = 0.5 # 随机失活 -- self.require_improvement = 1000 # 若超过1000batch效果还没提升,则提前结束训练 -- self.num_classes = len(self.class_list) # 类别数 -- self.n_vocab = 0 # 词表大小,在运行时赋值 -- self.num_epochs = 10 # epoch数 -- self.batch_size = 128 # mini-batch大小 -- self.pad_size = 32 # 每句话处理成的长度(短填长切) -- self.learning_rate = 1e-3 # 学习率 -- self.embed = self.embedding_pretrained.size(1)\ -- if self.embedding_pretrained is not None else 300 # 字向量维度, 若使用了预训练词向量,则维度统一 -- self.hidden_size = 128 # lstm隐藏层 -- self.num_layers = 2 # lstm层数 -- -- --'''Recurrent Neural Network for Text Classification with Multi-Task Learning''' -- -- --class Model(nn.Module): -- def __init__(self, config): -- super(Model, self).__init__() -- if config.embedding_pretrained is not None: -- self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False) -- else: -- self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1) -- self.lstm = nn.LSTM(config.embed, config.hidden_size, config.num_layers, -- bidirectional=True, batch_first=True, dropout=config.dropout) -- self.fc = nn.Linear(config.hidden_size * 2, config.num_classes) -- -- def forward(self, x): -- x, _ = x -- out = self.embedding(x) # [batch_size, seq_len, embeding]=[128, 32, 300] -- out, _ = self.lstm(out) -- out = self.fc(out[:, -1, :]) # 句子最后时刻的 hidden state -- return out -- -- '''变长RNN,效果差不多,甚至还低了点...''' -- # def forward(self, x): -- # x, seq_len = x -- # out = self.embedding(x) -- # _, idx_sort = torch.sort(seq_len, dim=0, descending=True) # 长度从长到短排序(index) -- # _, idx_unsort = torch.sort(idx_sort) # 排序后,原序列的 index -- # out = torch.index_select(out, 0, idx_sort) -- # seq_len = list(seq_len[idx_sort]) -- # out = nn.utils.rnn.pack_padded_sequence(out, seq_len, batch_first=True) -- # # [batche_size, seq_len, num_directions * hidden_size] -- # out, (hn, _) = self.lstm(out) -- # out = torch.cat((hn[2], hn[3]), -1) -- # # out, _ = nn.utils.rnn.pad_packed_sequence(out, batch_first=True) -- # out = out.index_select(0, idx_unsort) -- # out = self.fc(out) -- # return out -diff -uprN Chinese-Text-Classification-Pytorch_back/models/Transformer.py Chinese-Text-Classification-Pytorch/models/Transformer.py ---- Chinese-Text-Classification-Pytorch_back/models/Transformer.py 2021-08-13 20:49:45.263263000 +0800 -+++ Chinese-Text-Classification-Pytorch/models/Transformer.py 1970-01-01 08:00:00.000000000 +0800 -@@ -1,178 +0,0 @@ --import torch --import torch.nn as nn --import torch.nn.functional as F --import numpy as np --import copy -- -- --class Config(object): -- -- """配置参数""" -- def __init__(self, dataset, embedding): -- self.model_name = 'Transformer' -- self.train_path = dataset + '/data/train.txt' # 训练集 -- self.dev_path = dataset + '/data/dev.txt' # 验证集 -- self.test_path = dataset + '/data/test.txt' # 测试集 -- self.class_list = [x.strip() for x in open( -- dataset + '/data/class.txt', encoding='utf-8').readlines()] # 类别名单 -- self.vocab_path = dataset + '/data/vocab.pkl' # 词表 -- self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 -- self.log_path = dataset + '/log/' + self.model_name -- self.embedding_pretrained = torch.tensor( -- np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32'))\ -- if embedding != 'random' else None # 预训练词向量 -- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 -- -- self.dropout = 0.5 # 随机失活 -- self.require_improvement = 2000 # 若超过1000batch效果还没提升,则提前结束训练 -- self.num_classes = len(self.class_list) # 类别数 -- self.n_vocab = 0 # 词表大小,在运行时赋值 -- self.num_epochs = 20 # epoch数 -- self.batch_size = 128 # mini-batch大小 -- self.pad_size = 32 # 每句话处理成的长度(短填长切) -- self.learning_rate = 5e-4 # 学习率 -- self.embed = self.embedding_pretrained.size(1)\ -- if self.embedding_pretrained is not None else 300 # 字向量维度 -- self.dim_model = 300 -- self.hidden = 1024 -- self.last_hidden = 512 -- self.num_head = 5 -- self.num_encoder = 2 -- -- --'''Attention Is All You Need''' -- -- --class Model(nn.Module): -- def __init__(self, config): -- super(Model, self).__init__() -- if config.embedding_pretrained is not None: -- self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False) -- else: -- self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1) -- -- self.postion_embedding = Positional_Encoding(config.embed, config.pad_size, config.dropout, config.device) -- self.encoder = Encoder(config.dim_model, config.num_head, config.hidden, config.dropout) -- self.encoders = nn.ModuleList([ -- copy.deepcopy(self.encoder) -- # Encoder(config.dim_model, config.num_head, config.hidden, config.dropout) -- for _ in range(config.num_encoder)]) -- -- self.fc1 = nn.Linear(config.pad_size * config.dim_model, config.num_classes) -- # self.fc2 = nn.Linear(config.last_hidden, config.num_classes) -- # self.fc1 = nn.Linear(config.dim_model, config.num_classes) -- -- def forward(self, x): -- out = self.embedding(x[0]) -- out = self.postion_embedding(out) -- for encoder in self.encoders: -- out = encoder(out) -- out = out.view(out.size(0), -1) -- # out = torch.mean(out, 1) -- out = self.fc1(out) -- return out -- -- --class Encoder(nn.Module): -- def __init__(self, dim_model, num_head, hidden, dropout): -- super(Encoder, self).__init__() -- self.attention = Multi_Head_Attention(dim_model, num_head, dropout) -- self.feed_forward = Position_wise_Feed_Forward(dim_model, hidden, dropout) -- -- def forward(self, x): -- out = self.attention(x) -- out = self.feed_forward(out) -- return out -- -- --class Positional_Encoding(nn.Module): -- def __init__(self, embed, pad_size, dropout, device): -- super(Positional_Encoding, self).__init__() -- self.device = device -- self.pe = torch.tensor([[pos / (10000.0 ** (i // 2 * 2.0 / embed)) for i in range(embed)] for pos in range(pad_size)]) -- self.pe[:, 0::2] = np.sin(self.pe[:, 0::2]) -- self.pe[:, 1::2] = np.cos(self.pe[:, 1::2]) -- self.dropout = nn.Dropout(dropout) -- -- def forward(self, x): -- out = x + nn.Parameter(self.pe, requires_grad=False).to(self.device) -- out = self.dropout(out) -- return out -- -- --class Scaled_Dot_Product_Attention(nn.Module): -- '''Scaled Dot-Product Attention ''' -- def __init__(self): -- super(Scaled_Dot_Product_Attention, self).__init__() -- -- def forward(self, Q, K, V, scale=None): -- ''' -- Args: -- Q: [batch_size, len_Q, dim_Q] -- K: [batch_size, len_K, dim_K] -- V: [batch_size, len_V, dim_V] -- scale: 缩放因子 论文为根号dim_K -- Return: -- self-attention后的张量,以及attention张量 -- ''' -- attention = torch.matmul(Q, K.permute(0, 2, 1)) -- if scale: -- attention = attention * scale -- # if mask: # TODO change this -- # attention = attention.masked_fill_(mask == 0, -1e9) -- attention = F.softmax(attention, dim=-1) -- context = torch.matmul(attention, V) -- return context -- -- --class Multi_Head_Attention(nn.Module): -- def __init__(self, dim_model, num_head, dropout=0.0): -- super(Multi_Head_Attention, self).__init__() -- self.num_head = num_head -- assert dim_model % num_head == 0 -- self.dim_head = dim_model // self.num_head -- self.fc_Q = nn.Linear(dim_model, num_head * self.dim_head) -- self.fc_K = nn.Linear(dim_model, num_head * self.dim_head) -- self.fc_V = nn.Linear(dim_model, num_head * self.dim_head) -- self.attention = Scaled_Dot_Product_Attention() -- self.fc = nn.Linear(num_head * self.dim_head, dim_model) -- self.dropout = nn.Dropout(dropout) -- self.layer_norm = nn.LayerNorm(dim_model) -- -- def forward(self, x): -- batch_size = x.size(0) -- Q = self.fc_Q(x) -- K = self.fc_K(x) -- V = self.fc_V(x) -- Q = Q.view(batch_size * self.num_head, -1, self.dim_head) -- K = K.view(batch_size * self.num_head, -1, self.dim_head) -- V = V.view(batch_size * self.num_head, -1, self.dim_head) -- # if mask: # TODO -- # mask = mask.repeat(self.num_head, 1, 1) # TODO change this -- scale = K.size(-1) ** -0.5 # 缩放因子 -- context = self.attention(Q, K, V, scale) -- -- context = context.view(batch_size, -1, self.dim_head * self.num_head) -- out = self.fc(context) -- out = self.dropout(out) -- out = out + x # 残差连接 -- out = self.layer_norm(out) -- return out -- -- --class Position_wise_Feed_Forward(nn.Module): -- def __init__(self, dim_model, hidden, dropout=0.0): -- super(Position_wise_Feed_Forward, self).__init__() -- self.fc1 = nn.Linear(dim_model, hidden) -- self.fc2 = nn.Linear(hidden, dim_model) -- self.dropout = nn.Dropout(dropout) -- self.layer_norm = nn.LayerNorm(dim_model) -- -- def forward(self, x): -- out = self.fc1(x) -- out = F.relu(out) -- out = self.fc2(out) -- out = self.dropout(out) -- out = out + x # 残差连接 -- out = self.layer_norm(out) -- return out -diff -uprN Chinese-Text-Classification-Pytorch_back/utils.py Chinese-Text-Classification-Pytorch/utils.py ---- Chinese-Text-Classification-Pytorch_back/utils.py 2021-08-13 20:49:45.263263000 +0800 -+++ Chinese-Text-Classification-Pytorch/utils.py 2021-09-07 21:23:50.874085521 +0800 -@@ -60,7 +60,7 @@ def build_dataset(config, ues_word): - # word to id - for word in token: - words_line.append(vocab.get(word, vocab.get(UNK))) -- contents.append((words_line, int(label), seq_len)) -+ contents.append((words_line, int(label))) - return contents # [([...], 0), ([...], 1), ...] - train = load_dataset(config.train_path, config.pad_size) - dev = load_dataset(config.dev_path, config.pad_size) -@@ -83,9 +83,7 @@ class DatasetIterater(object): - x = torch.LongTensor([_[0] for _ in datas]).to(self.device) - y = torch.LongTensor([_[1] for _ in datas]).to(self.device) - -- # pad前的长度(超过pad_size的设为pad_size) -- seq_len = torch.LongTensor([_[2] for _ in datas]).to(self.device) -- return (x, seq_len), y -+ return x, y - - def __next__(self): - if self.residue and self.index == self.n_batches: +二进制文件 Chinese-Text-Classification-Pytorch_back/.git/index 和 Chinese-Text-Classification-Pytorch/.git/index 不同 +diff -uprN Chinese-Text-Classification-Pytorch_back/models/DPCNN.py Chinese-Text-Classification-Pytorch/models/DPCNN.py +--- Chinese-Text-Classification-Pytorch_back/models/DPCNN.py 2021-08-13 20:49:45.263263000 +0800 ++++ Chinese-Text-Classification-Pytorch/models/DPCNN.py 1970-01-01 08:00:00.000000000 +0800 +@@ -1,89 +0,0 @@ +-# coding: UTF-8 +-import torch +-import torch.nn as nn +-import torch.nn.functional as F +-import numpy as np +- +- +-class Config(object): +- +- """配置参数""" +- def __init__(self, dataset, embedding): +- self.model_name = 'DPCNN' +- self.train_path = dataset + '/data/train.txt' # 训练集 +- self.dev_path = dataset + '/data/dev.txt' # 验证集 +- self.test_path = dataset + '/data/test.txt' # 测试集 +- self.class_list = [x.strip() for x in open( +- dataset + '/data/class.txt', encoding='utf-8').readlines()] # 类别名单 +- self.vocab_path = dataset + '/data/vocab.pkl' # 词表 +- self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 +- self.log_path = dataset + '/log/' + self.model_name +- self.embedding_pretrained = torch.tensor( +- np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32'))\ +- if embedding != 'random' else None # 预训练词向量 +- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 +- +- self.dropout = 0.5 # 随机失活 +- self.require_improvement = 1000 # 若超过1000batch效果还没提升,则提前结束训练 +- self.num_classes = len(self.class_list) # 类别数 +- self.n_vocab = 0 # 词表大小,在运行时赋值 +- self.num_epochs = 20 # epoch数 +- self.batch_size = 128 # mini-batch大小 +- self.pad_size = 32 # 每句话处理成的长度(短填长切) +- self.learning_rate = 1e-3 # 学习率 +- self.embed = self.embedding_pretrained.size(1)\ +- if self.embedding_pretrained is not None else 300 # 字向量维度 +- self.num_filters = 250 # 卷积核数量(channels数) +- +- +-'''Deep Pyramid Convolutional Neural Networks for Text Categorization''' +- +- +-class Model(nn.Module): +- def __init__(self, config): +- super(Model, self).__init__() +- if config.embedding_pretrained is not None: +- self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False) +- else: +- self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1) +- self.conv_region = nn.Conv2d(1, config.num_filters, (3, config.embed), stride=1) +- self.conv = nn.Conv2d(config.num_filters, config.num_filters, (3, 1), stride=1) +- self.max_pool = nn.MaxPool2d(kernel_size=(3, 1), stride=2) +- self.padding1 = nn.ZeroPad2d((0, 0, 1, 1)) # top bottom +- self.padding2 = nn.ZeroPad2d((0, 0, 0, 1)) # bottom +- self.relu = nn.ReLU() +- self.fc = nn.Linear(config.num_filters, config.num_classes) +- +- def forward(self, x): +- x = x[0] +- x = self.embedding(x) +- x = x.unsqueeze(1) # [batch_size, 250, seq_len, 1] +- x = self.conv_region(x) # [batch_size, 250, seq_len-3+1, 1] +- +- x = self.padding1(x) # [batch_size, 250, seq_len, 1] +- x = self.relu(x) +- x = self.conv(x) # [batch_size, 250, seq_len-3+1, 1] +- x = self.padding1(x) # [batch_size, 250, seq_len, 1] +- x = self.relu(x) +- x = self.conv(x) # [batch_size, 250, seq_len-3+1, 1] +- while x.size()[2] > 2: +- x = self._block(x) +- x = x.squeeze() # [batch_size, num_filters(250)] +- x = self.fc(x) +- return x +- +- def _block(self, x): +- x = self.padding2(x) +- px = self.max_pool(x) +- +- x = self.padding1(px) +- x = F.relu(x) +- x = self.conv(x) +- +- x = self.padding1(x) +- x = F.relu(x) +- x = self.conv(x) +- +- # Short Cut +- x = x + px +- return x +diff -uprN Chinese-Text-Classification-Pytorch_back/models/FastText.py Chinese-Text-Classification-Pytorch/models/FastText.py +--- Chinese-Text-Classification-Pytorch_back/models/FastText.py 2021-08-13 20:49:45.263263000 +0800 ++++ Chinese-Text-Classification-Pytorch/models/FastText.py 1970-01-01 08:00:00.000000000 +0800 +@@ -1,69 +0,0 @@ +-# coding: UTF-8 +-import torch +-import torch.nn as nn +-import torch.nn.functional as F +-import numpy as np +- +- +-class Config(object): +- +- """配置参数""" +- def __init__(self, dataset, embedding): +- self.model_name = 'FastText' +- self.train_path = dataset + '/data/train.txt' # 训练集 +- self.dev_path = dataset + '/data/dev.txt' # 验证集 +- self.test_path = dataset + '/data/test.txt' # 测试集 +- self.class_list = [x.strip() for x in open( +- dataset + '/data/class.txt', encoding='utf-8').readlines()] # 类别名单 +- self.vocab_path = dataset + '/data/vocab.pkl' # 词表 +- self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 +- self.log_path = dataset + '/log/' + self.model_name +- self.embedding_pretrained = torch.tensor( +- np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32'))\ +- if embedding != 'random' else None # 预训练词向量 +- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 +- +- self.dropout = 0.5 # 随机失活 +- self.require_improvement = 1000 # 若超过1000batch效果还没提升,则提前结束训练 +- self.num_classes = len(self.class_list) # 类别数 +- self.n_vocab = 0 # 词表大小,在运行时赋值 +- self.num_epochs = 20 # epoch数 +- self.batch_size = 128 # mini-batch大小 +- self.pad_size = 32 # 每句话处理成的长度(短填长切) +- self.learning_rate = 1e-3 # 学习率 +- self.embed = self.embedding_pretrained.size(1)\ +- if self.embedding_pretrained is not None else 300 # 字向量维度 +- self.hidden_size = 256 # 隐藏层大小 +- self.n_gram_vocab = 250499 # ngram 词表大小 +- +- +-'''Bag of Tricks for Efficient Text Classification''' +- +- +-class Model(nn.Module): +- def __init__(self, config): +- super(Model, self).__init__() +- if config.embedding_pretrained is not None: +- self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False) +- else: +- self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1) +- self.embedding_ngram2 = nn.Embedding(config.n_gram_vocab, config.embed) +- self.embedding_ngram3 = nn.Embedding(config.n_gram_vocab, config.embed) +- self.dropout = nn.Dropout(config.dropout) +- self.fc1 = nn.Linear(config.embed * 3, config.hidden_size) +- # self.dropout2 = nn.Dropout(config.dropout) +- self.fc2 = nn.Linear(config.hidden_size, config.num_classes) +- +- def forward(self, x): +- +- out_word = self.embedding(x[0]) +- out_bigram = self.embedding_ngram2(x[2]) +- out_trigram = self.embedding_ngram3(x[3]) +- out = torch.cat((out_word, out_bigram, out_trigram), -1) +- +- out = out.mean(dim=1) +- out = self.dropout(out) +- out = self.fc1(out) +- out = F.relu(out) +- out = self.fc2(out) +- return out +二进制文件 Chinese-Text-Classification-Pytorch_back/models/__pycache__/TextCNN.cpython-37.pyc 和 Chinese-Text-Classification-Pytorch/models/__pycache__/TextCNN.cpython-37.pyc 不同 +diff -uprN Chinese-Text-Classification-Pytorch_back/models/TextCNN.py Chinese-Text-Classification-Pytorch/models/TextCNN.py +--- Chinese-Text-Classification-Pytorch_back/models/TextCNN.py 2021-08-13 20:49:45.263263000 +0800 ++++ Chinese-Text-Classification-Pytorch/models/TextCNN.py 2021-09-07 21:23:07.218366753 +0800 +@@ -1,4 +1,6 @@ + # coding: UTF-8 ++import os.path ++ + import torch + import torch.nn as nn + import torch.nn.functional as F +@@ -13,10 +15,11 @@ class Config(object): + self.train_path = dataset + '/data/train.txt' # 训练集 + self.dev_path = dataset + '/data/dev.txt' # 验证集 + self.test_path = dataset + '/data/test.txt' # 测试集 ++ print('path', os.path.abspath(dataset+'/data/class.txt')) + self.class_list = [x.strip() for x in open( + dataset + '/data/class.txt', encoding='utf-8').readlines()] # 类别名单 + self.vocab_path = dataset + '/data/vocab.pkl' # 词表 +- self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 ++ self.save_path = dataset + '/saved_dict/' + self.model_name + '.pth' # 模型训练结果 + self.log_path = dataset + '/log/' + self.model_name + self.embedding_pretrained = torch.tensor( + np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32'))\ +@@ -49,18 +52,21 @@ class Model(nn.Module): + self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1) + self.convs = nn.ModuleList( + [nn.Conv2d(1, config.num_filters, (k, config.embed)) for k in config.filter_sizes]) ++ self.pools = nn.ModuleList( ++ [nn.MaxPool1d(config.pad_size - k + 1) for k in config.filter_sizes]) + self.dropout = nn.Dropout(config.dropout) + self.fc = nn.Linear(config.num_filters * len(config.filter_sizes), config.num_classes) + +- def conv_and_pool(self, x, conv): ++ def conv_and_pool(self, x, conv, pool): + x = F.relu(conv(x)).squeeze(3) +- x = F.max_pool1d(x, x.size(2)).squeeze(2) ++ x = pool(x).squeeze(2) ++ # x = F.max_pool1d(x, x.size(2)).squeeze(2) + return x + + def forward(self, x): +- out = self.embedding(x[0]) ++ out = self.embedding(x) + out = out.unsqueeze(1) +- out = torch.cat([self.conv_and_pool(out, conv) for conv in self.convs], 1) ++ out = torch.cat([self.conv_and_pool(out, conv, pool) for conv, pool in zip(self.convs, self.pools)], 1) + out = self.dropout(out) + out = self.fc(out) + return out +diff -uprN Chinese-Text-Classification-Pytorch_back/models/TextRCNN.py Chinese-Text-Classification-Pytorch/models/TextRCNN.py +--- Chinese-Text-Classification-Pytorch_back/models/TextRCNN.py 2021-08-13 20:49:45.263263000 +0800 ++++ Chinese-Text-Classification-Pytorch/models/TextRCNN.py 1970-01-01 08:00:00.000000000 +0800 +@@ -1,64 +0,0 @@ +-# coding: UTF-8 +-import torch +-import torch.nn as nn +-import torch.nn.functional as F +-import numpy as np +- +- +-class Config(object): +- +- """配置参数""" +- def __init__(self, dataset, embedding): +- self.model_name = 'TextRCNN' +- self.train_path = dataset + '/data/train.txt' # 训练集 +- self.dev_path = dataset + '/data/dev.txt' # 验证集 +- self.test_path = dataset + '/data/test.txt' # 测试集 +- self.class_list = [x.strip() for x in open( +- dataset + '/data/class.txt', encoding='utf-8').readlines()] # 类别名单 +- self.vocab_path = dataset + '/data/vocab.pkl' # 词表 +- self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 +- self.log_path = dataset + '/log/' + self.model_name +- self.embedding_pretrained = torch.tensor( +- np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32'))\ +- if embedding != 'random' else None # 预训练词向量 +- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 +- +- self.dropout = 1.0 # 随机失活 +- self.require_improvement = 1000 # 若超过1000batch效果还没提升,则提前结束训练 +- self.num_classes = len(self.class_list) # 类别数 +- self.n_vocab = 0 # 词表大小,在运行时赋值 +- self.num_epochs = 10 # epoch数 +- self.batch_size = 128 # mini-batch大小 +- self.pad_size = 32 # 每句话处理成的长度(短填长切) +- self.learning_rate = 1e-3 # 学习率 +- self.embed = self.embedding_pretrained.size(1)\ +- if self.embedding_pretrained is not None else 300 # 字向量维度, 若使用了预训练词向量,则维度统一 +- self.hidden_size = 256 # lstm隐藏层 +- self.num_layers = 1 # lstm层数 +- +- +-'''Recurrent Convolutional Neural Networks for Text Classification''' +- +- +-class Model(nn.Module): +- def __init__(self, config): +- super(Model, self).__init__() +- if config.embedding_pretrained is not None: +- self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False) +- else: +- self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1) +- self.lstm = nn.LSTM(config.embed, config.hidden_size, config.num_layers, +- bidirectional=True, batch_first=True, dropout=config.dropout) +- self.maxpool = nn.MaxPool1d(config.pad_size) +- self.fc = nn.Linear(config.hidden_size * 2 + config.embed, config.num_classes) +- +- def forward(self, x): +- x, _ = x +- embed = self.embedding(x) # [batch_size, seq_len, embeding]=[64, 32, 64] +- out, _ = self.lstm(embed) +- out = torch.cat((embed, out), 2) +- out = F.relu(out) +- out = out.permute(0, 2, 1) +- out = self.maxpool(out).squeeze() +- out = self.fc(out) +- return out +diff -uprN Chinese-Text-Classification-Pytorch_back/models/TextRNN_Att.py Chinese-Text-Classification-Pytorch/models/TextRNN_Att.py +--- Chinese-Text-Classification-Pytorch_back/models/TextRNN_Att.py 2021-08-13 20:49:45.263263000 +0800 ++++ Chinese-Text-Classification-Pytorch/models/TextRNN_Att.py 1970-01-01 08:00:00.000000000 +0800 +@@ -1,73 +0,0 @@ +-# coding: UTF-8 +-import torch +-import torch.nn as nn +-import torch.nn.functional as F +-import numpy as np +- +- +-class Config(object): +- +- """配置参数""" +- def __init__(self, dataset, embedding): +- self.model_name = 'TextRNN_Att' +- self.train_path = dataset + '/data/train.txt' # 训练集 +- self.dev_path = dataset + '/data/dev.txt' # 验证集 +- self.test_path = dataset + '/data/test.txt' # 测试集 +- self.class_list = [x.strip() for x in open( +- dataset + '/data/class.txt', encoding='utf-8').readlines()] # 类别名单 +- self.vocab_path = dataset + '/data/vocab.pkl' # 词表 +- self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 +- self.log_path = dataset + '/log/' + self.model_name +- self.embedding_pretrained = torch.tensor( +- np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32'))\ +- if embedding != 'random' else None # 预训练词向量 +- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 +- +- self.dropout = 0.5 # 随机失活 +- self.require_improvement = 1000 # 若超过1000batch效果还没提升,则提前结束训练 +- self.num_classes = len(self.class_list) # 类别数 +- self.n_vocab = 0 # 词表大小,在运行时赋值 +- self.num_epochs = 10 # epoch数 +- self.batch_size = 128 # mini-batch大小 +- self.pad_size = 32 # 每句话处理成的长度(短填长切) +- self.learning_rate = 1e-3 # 学习率 +- self.embed = self.embedding_pretrained.size(1)\ +- if self.embedding_pretrained is not None else 300 # 字向量维度, 若使用了预训练词向量,则维度统一 +- self.hidden_size = 128 # lstm隐藏层 +- self.num_layers = 2 # lstm层数 +- self.hidden_size2 = 64 +- +- +-'''Attention-Based Bidirectional Long Short-Term Memory Networks for Relation Classification''' +- +- +-class Model(nn.Module): +- def __init__(self, config): +- super(Model, self).__init__() +- if config.embedding_pretrained is not None: +- self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False) +- else: +- self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1) +- self.lstm = nn.LSTM(config.embed, config.hidden_size, config.num_layers, +- bidirectional=True, batch_first=True, dropout=config.dropout) +- self.tanh1 = nn.Tanh() +- # self.u = nn.Parameter(torch.Tensor(config.hidden_size * 2, config.hidden_size * 2)) +- self.w = nn.Parameter(torch.zeros(config.hidden_size * 2)) +- self.tanh2 = nn.Tanh() +- self.fc1 = nn.Linear(config.hidden_size * 2, config.hidden_size2) +- self.fc = nn.Linear(config.hidden_size2, config.num_classes) +- +- def forward(self, x): +- x, _ = x +- emb = self.embedding(x) # [batch_size, seq_len, embeding]=[128, 32, 300] +- H, _ = self.lstm(emb) # [batch_size, seq_len, hidden_size * num_direction]=[128, 32, 256] +- +- M = self.tanh1(H) # [128, 32, 256] +- # M = torch.tanh(torch.matmul(H, self.u)) +- alpha = F.softmax(torch.matmul(M, self.w), dim=1).unsqueeze(-1) # [128, 32, 1] +- out = H * alpha # [128, 32, 256] +- out = torch.sum(out, 1) # [128, 256] +- out = F.relu(out) +- out = self.fc1(out) +- out = self.fc(out) # [128, 64] +- return out +diff -uprN Chinese-Text-Classification-Pytorch_back/models/TextRNN.py Chinese-Text-Classification-Pytorch/models/TextRNN.py +--- Chinese-Text-Classification-Pytorch_back/models/TextRNN.py 2021-08-13 20:49:45.263263000 +0800 ++++ Chinese-Text-Classification-Pytorch/models/TextRNN.py 1970-01-01 08:00:00.000000000 +0800 +@@ -1,75 +0,0 @@ +-# coding: UTF-8 +-import torch +-import torch.nn as nn +-import numpy as np +- +- +-class Config(object): +- +- """配置参数""" +- def __init__(self, dataset, embedding): +- self.model_name = 'TextRNN' +- self.train_path = dataset + '/data/train.txt' # 训练集 +- self.dev_path = dataset + '/data/dev.txt' # 验证集 +- self.test_path = dataset + '/data/test.txt' # 测试集 +- self.class_list = [x.strip() for x in open( +- dataset + '/data/class.txt', encoding='utf-8').readlines()] # 类别名单 +- self.vocab_path = dataset + '/data/vocab.pkl' # 词表 +- self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 +- self.log_path = dataset + '/log/' + self.model_name +- self.embedding_pretrained = torch.tensor( +- np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32'))\ +- if embedding != 'random' else None # 预训练词向量 +- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 +- +- self.dropout = 0.5 # 随机失活 +- self.require_improvement = 1000 # 若超过1000batch效果还没提升,则提前结束训练 +- self.num_classes = len(self.class_list) # 类别数 +- self.n_vocab = 0 # 词表大小,在运行时赋值 +- self.num_epochs = 10 # epoch数 +- self.batch_size = 128 # mini-batch大小 +- self.pad_size = 32 # 每句话处理成的长度(短填长切) +- self.learning_rate = 1e-3 # 学习率 +- self.embed = self.embedding_pretrained.size(1)\ +- if self.embedding_pretrained is not None else 300 # 字向量维度, 若使用了预训练词向量,则维度统一 +- self.hidden_size = 128 # lstm隐藏层 +- self.num_layers = 2 # lstm层数 +- +- +-'''Recurrent Neural Network for Text Classification with Multi-Task Learning''' +- +- +-class Model(nn.Module): +- def __init__(self, config): +- super(Model, self).__init__() +- if config.embedding_pretrained is not None: +- self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False) +- else: +- self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1) +- self.lstm = nn.LSTM(config.embed, config.hidden_size, config.num_layers, +- bidirectional=True, batch_first=True, dropout=config.dropout) +- self.fc = nn.Linear(config.hidden_size * 2, config.num_classes) +- +- def forward(self, x): +- x, _ = x +- out = self.embedding(x) # [batch_size, seq_len, embeding]=[128, 32, 300] +- out, _ = self.lstm(out) +- out = self.fc(out[:, -1, :]) # 句子最后时刻的 hidden state +- return out +- +- '''变长RNN,效果差不多,甚至还低了点...''' +- # def forward(self, x): +- # x, seq_len = x +- # out = self.embedding(x) +- # _, idx_sort = torch.sort(seq_len, dim=0, descending=True) # 长度从长到短排序(index) +- # _, idx_unsort = torch.sort(idx_sort) # 排序后,原序列的 index +- # out = torch.index_select(out, 0, idx_sort) +- # seq_len = list(seq_len[idx_sort]) +- # out = nn.utils.rnn.pack_padded_sequence(out, seq_len, batch_first=True) +- # # [batche_size, seq_len, num_directions * hidden_size] +- # out, (hn, _) = self.lstm(out) +- # out = torch.cat((hn[2], hn[3]), -1) +- # # out, _ = nn.utils.rnn.pad_packed_sequence(out, batch_first=True) +- # out = out.index_select(0, idx_unsort) +- # out = self.fc(out) +- # return out +diff -uprN Chinese-Text-Classification-Pytorch_back/models/Transformer.py Chinese-Text-Classification-Pytorch/models/Transformer.py +--- Chinese-Text-Classification-Pytorch_back/models/Transformer.py 2021-08-13 20:49:45.263263000 +0800 ++++ Chinese-Text-Classification-Pytorch/models/Transformer.py 1970-01-01 08:00:00.000000000 +0800 +@@ -1,178 +0,0 @@ +-import torch +-import torch.nn as nn +-import torch.nn.functional as F +-import numpy as np +-import copy +- +- +-class Config(object): +- +- """配置参数""" +- def __init__(self, dataset, embedding): +- self.model_name = 'Transformer' +- self.train_path = dataset + '/data/train.txt' # 训练集 +- self.dev_path = dataset + '/data/dev.txt' # 验证集 +- self.test_path = dataset + '/data/test.txt' # 测试集 +- self.class_list = [x.strip() for x in open( +- dataset + '/data/class.txt', encoding='utf-8').readlines()] # 类别名单 +- self.vocab_path = dataset + '/data/vocab.pkl' # 词表 +- self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt' # 模型训练结果 +- self.log_path = dataset + '/log/' + self.model_name +- self.embedding_pretrained = torch.tensor( +- np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32'))\ +- if embedding != 'random' else None # 预训练词向量 +- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设备 +- +- self.dropout = 0.5 # 随机失活 +- self.require_improvement = 2000 # 若超过1000batch效果还没提升,则提前结束训练 +- self.num_classes = len(self.class_list) # 类别数 +- self.n_vocab = 0 # 词表大小,在运行时赋值 +- self.num_epochs = 20 # epoch数 +- self.batch_size = 128 # mini-batch大小 +- self.pad_size = 32 # 每句话处理成的长度(短填长切) +- self.learning_rate = 5e-4 # 学习率 +- self.embed = self.embedding_pretrained.size(1)\ +- if self.embedding_pretrained is not None else 300 # 字向量维度 +- self.dim_model = 300 +- self.hidden = 1024 +- self.last_hidden = 512 +- self.num_head = 5 +- self.num_encoder = 2 +- +- +-'''Attention Is All You Need''' +- +- +-class Model(nn.Module): +- def __init__(self, config): +- super(Model, self).__init__() +- if config.embedding_pretrained is not None: +- self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False) +- else: +- self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1) +- +- self.postion_embedding = Positional_Encoding(config.embed, config.pad_size, config.dropout, config.device) +- self.encoder = Encoder(config.dim_model, config.num_head, config.hidden, config.dropout) +- self.encoders = nn.ModuleList([ +- copy.deepcopy(self.encoder) +- # Encoder(config.dim_model, config.num_head, config.hidden, config.dropout) +- for _ in range(config.num_encoder)]) +- +- self.fc1 = nn.Linear(config.pad_size * config.dim_model, config.num_classes) +- # self.fc2 = nn.Linear(config.last_hidden, config.num_classes) +- # self.fc1 = nn.Linear(config.dim_model, config.num_classes) +- +- def forward(self, x): +- out = self.embedding(x[0]) +- out = self.postion_embedding(out) +- for encoder in self.encoders: +- out = encoder(out) +- out = out.view(out.size(0), -1) +- # out = torch.mean(out, 1) +- out = self.fc1(out) +- return out +- +- +-class Encoder(nn.Module): +- def __init__(self, dim_model, num_head, hidden, dropout): +- super(Encoder, self).__init__() +- self.attention = Multi_Head_Attention(dim_model, num_head, dropout) +- self.feed_forward = Position_wise_Feed_Forward(dim_model, hidden, dropout) +- +- def forward(self, x): +- out = self.attention(x) +- out = self.feed_forward(out) +- return out +- +- +-class Positional_Encoding(nn.Module): +- def __init__(self, embed, pad_size, dropout, device): +- super(Positional_Encoding, self).__init__() +- self.device = device +- self.pe = torch.tensor([[pos / (10000.0 ** (i // 2 * 2.0 / embed)) for i in range(embed)] for pos in range(pad_size)]) +- self.pe[:, 0::2] = np.sin(self.pe[:, 0::2]) +- self.pe[:, 1::2] = np.cos(self.pe[:, 1::2]) +- self.dropout = nn.Dropout(dropout) +- +- def forward(self, x): +- out = x + nn.Parameter(self.pe, requires_grad=False).to(self.device) +- out = self.dropout(out) +- return out +- +- +-class Scaled_Dot_Product_Attention(nn.Module): +- '''Scaled Dot-Product Attention ''' +- def __init__(self): +- super(Scaled_Dot_Product_Attention, self).__init__() +- +- def forward(self, Q, K, V, scale=None): +- ''' +- Args: +- Q: [batch_size, len_Q, dim_Q] +- K: [batch_size, len_K, dim_K] +- V: [batch_size, len_V, dim_V] +- scale: 缩放因子 论文为根号dim_K +- Return: +- self-attention后的张量,以及attention张量 +- ''' +- attention = torch.matmul(Q, K.permute(0, 2, 1)) +- if scale: +- attention = attention * scale +- # if mask: # TODO change this +- # attention = attention.masked_fill_(mask == 0, -1e9) +- attention = F.softmax(attention, dim=-1) +- context = torch.matmul(attention, V) +- return context +- +- +-class Multi_Head_Attention(nn.Module): +- def __init__(self, dim_model, num_head, dropout=0.0): +- super(Multi_Head_Attention, self).__init__() +- self.num_head = num_head +- assert dim_model % num_head == 0 +- self.dim_head = dim_model // self.num_head +- self.fc_Q = nn.Linear(dim_model, num_head * self.dim_head) +- self.fc_K = nn.Linear(dim_model, num_head * self.dim_head) +- self.fc_V = nn.Linear(dim_model, num_head * self.dim_head) +- self.attention = Scaled_Dot_Product_Attention() +- self.fc = nn.Linear(num_head * self.dim_head, dim_model) +- self.dropout = nn.Dropout(dropout) +- self.layer_norm = nn.LayerNorm(dim_model) +- +- def forward(self, x): +- batch_size = x.size(0) +- Q = self.fc_Q(x) +- K = self.fc_K(x) +- V = self.fc_V(x) +- Q = Q.view(batch_size * self.num_head, -1, self.dim_head) +- K = K.view(batch_size * self.num_head, -1, self.dim_head) +- V = V.view(batch_size * self.num_head, -1, self.dim_head) +- # if mask: # TODO +- # mask = mask.repeat(self.num_head, 1, 1) # TODO change this +- scale = K.size(-1) ** -0.5 # 缩放因子 +- context = self.attention(Q, K, V, scale) +- +- context = context.view(batch_size, -1, self.dim_head * self.num_head) +- out = self.fc(context) +- out = self.dropout(out) +- out = out + x # 残差连接 +- out = self.layer_norm(out) +- return out +- +- +-class Position_wise_Feed_Forward(nn.Module): +- def __init__(self, dim_model, hidden, dropout=0.0): +- super(Position_wise_Feed_Forward, self).__init__() +- self.fc1 = nn.Linear(dim_model, hidden) +- self.fc2 = nn.Linear(hidden, dim_model) +- self.dropout = nn.Dropout(dropout) +- self.layer_norm = nn.LayerNorm(dim_model) +- +- def forward(self, x): +- out = self.fc1(x) +- out = F.relu(out) +- out = self.fc2(out) +- out = self.dropout(out) +- out = out + x # 残差连接 +- out = self.layer_norm(out) +- return out +diff -uprN Chinese-Text-Classification-Pytorch_back/utils.py Chinese-Text-Classification-Pytorch/utils.py +--- Chinese-Text-Classification-Pytorch_back/utils.py 2021-08-13 20:49:45.263263000 +0800 ++++ Chinese-Text-Classification-Pytorch/utils.py 2021-09-07 21:23:50.874085521 +0800 +@@ -60,7 +60,7 @@ def build_dataset(config, ues_word): + # word to id + for word in token: + words_line.append(vocab.get(word, vocab.get(UNK))) +- contents.append((words_line, int(label), seq_len)) ++ contents.append((words_line, int(label))) + return contents # [([...], 0), ([...], 1), ...] + train = load_dataset(config.train_path, config.pad_size) + dev = load_dataset(config.dev_path, config.pad_size) +@@ -83,9 +83,7 @@ class DatasetIterater(object): + x = torch.LongTensor([_[0] for _ in datas]).to(self.device) + y = torch.LongTensor([_[1] for _ in datas]).to(self.device) + +- # pad前的长度(超过pad_size的设为pad_size) +- seq_len = torch.LongTensor([_[2] for _ in datas]).to(self.device) +- return (x, seq_len), y ++ return x, y + + def __next__(self): + if self.residue and self.index == self.n_batches: diff --git a/ACL_PyTorch/contrib/nlp/albert/0001-init.patch b/ACL_PyTorch/contrib/nlp/albert/0001-init.patch index 0fcc472b691036547014bddade1234b9d19c8fa5..238f4906db41e3f8c81a2f3c44db1497dcbe63e4 100644 --- a/ACL_PyTorch/contrib/nlp/albert/0001-init.patch +++ b/ACL_PyTorch/contrib/nlp/albert/0001-init.patch @@ -34,307 +34,307 @@ index 57543c3..555ffd1 100644 --- a/callback/lr_scheduler.py +++ b/callback/lr_scheduler.py @@ -12,6 +12,7 @@ __all__ = ['CustomDecayLR', - 'CosineLRWithRestarts', - ] - -+ - def get_constant_schedule(optimizer, last_epoch=-1): - """ Create a schedule with a constant learning rate. - """ + 'CosineLRWithRestarts', + ] + ++ + def get_constant_schedule(optimizer, last_epoch=-1): + """ Create a schedule with a constant learning rate. + """ @@ -22,6 +23,7 @@ def get_constant_schedule_with_warmup(optimizer, num_warmup_steps, last_epoch=-1 - """ Create a schedule with a constant learning rate preceded by a warmup - period during which the learning rate increases linearly between 0 and 1. - """ -+ - def lr_lambda(current_step): - if current_step < num_warmup_steps: - return float(current_step) / float(max(1.0, num_warmup_steps)) + """ Create a schedule with a constant learning rate preceded by a warmup + period during which the learning rate increases linearly between 0 and 1. + """ ++ + def lr_lambda(current_step): + if current_step < num_warmup_steps: + return float(current_step) / float(max(1.0, num_warmup_steps)) @@ -34,6 +36,7 @@ def get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_training_st - """ Create a schedule with a learning rate that decreases linearly after - linearly increasing during a warmup period. - """ -+ - def lr_lambda(current_step): - if current_step < num_warmup_steps: - return float(current_step) / float(max(1, num_warmup_steps)) + """ Create a schedule with a learning rate that decreases linearly after + linearly increasing during a warmup period. + """ ++ + def lr_lambda(current_step): + if current_step < num_warmup_steps: + return float(current_step) / float(max(1, num_warmup_steps)) @@ -47,6 +50,7 @@ def get_cosine_schedule_with_warmup(optimizer, num_warmup_steps, num_training_st - values of the cosine function between 0 and `pi * cycles` after a warmup - period during which it increases linearly between 0 and 1. - """ -+ - def lr_lambda(current_step): - if current_step < num_warmup_steps: - return float(current_step) / float(max(1, num_warmup_steps)) + values of the cosine function between 0 and `pi * cycles` after a warmup + period during which it increases linearly between 0 and 1. + """ ++ + def lr_lambda(current_step): + if current_step < num_warmup_steps: + return float(current_step) / float(max(1, num_warmup_steps)) @@ -56,11 +60,13 @@ def get_cosine_schedule_with_warmup(optimizer, num_warmup_steps, num_training_st - return LambdaLR(optimizer, lr_lambda, last_epoch) - - --def get_cosine_with_hard_restarts_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps, num_cycles=1., last_epoch=-1): -+def get_cosine_with_hard_restarts_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps, num_cycles=1., -+ last_epoch=-1): - """ Create a schedule with a learning rate that decreases following the - values of the cosine function with several hard restarts, after a warmup - period during which it increases linearly between 0 and 1. - """ -+ - def lr_lambda(current_step): - if current_step < num_warmup_steps: - return float(current_step) / float(max(1, num_warmup_steps)) + return LambdaLR(optimizer, lr_lambda, last_epoch) + + +-def get_cosine_with_hard_restarts_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps, num_cycles=1., last_epoch=-1): ++def get_cosine_with_hard_restarts_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps, num_cycles=1., ++ last_epoch=-1): + """ Create a schedule with a learning rate that decreases following the + values of the cosine function with several hard restarts, after a warmup + period during which it increases linearly between 0 and 1. + """ ++ + def lr_lambda(current_step): + if current_step < num_warmup_steps: + return float(current_step) / float(max(1, num_warmup_steps)) @@ -86,11 +92,12 @@ class CustomDecayLR(object): - >>> optimizer.step() - >>> validate(...) - ''' -- def __init__(self,optimizer,lr): -+ -+ def __init__(self, optimizer, lr): - self.optimizer = optimizer - self.lr = lr - -- def epoch_step(self,epoch): -+ def epoch_step(self, epoch): - lr = self.lr - if epoch > 12: - lr = lr / 1000 + >>> optimizer.step() + >>> validate(...) + ''' +- def __init__(self,optimizer,lr): ++ ++ def __init__(self, optimizer, lr): + self.optimizer = optimizer + self.lr = lr + +- def epoch_step(self,epoch): ++ def epoch_step(self, epoch): + lr = self.lr + if epoch > 12: + lr = lr / 1000 @@ -101,6 +108,7 @@ class CustomDecayLR(object): - for param_group in self.optimizer.param_groups: - param_group['lr'] = lr - -+ - class BertLR(object): - ''' - Bert模型内定的学习率变化机制 + for param_group in self.optimizer.param_groups: + param_group['lr'] = lr + ++ + class BertLR(object): + ''' + Bert模型内定的学习率变化机制 @@ -116,23 +124,25 @@ class BertLR(object): - >>> scheduler.batch_step() - >>> validate(...) - ''' -- def __init__(self,optimizer,learning_rate,t_total,warmup): -+ -+ def __init__(self, optimizer, learning_rate, t_total, warmup): - self.learning_rate = learning_rate - self.optimizer = optimizer - self.t_total = t_total - self.warmup = warmup - - # 线性预热方式 -- def warmup_linear(self,x, warmup=0.002): -+ def warmup_linear(self, x, warmup=0.002): - if x < warmup: - return x / warmup - return 1.0 - x - -- def batch_step(self,training_step): -- lr_this_step = self.learning_rate * self.warmup_linear(training_step / self.t_total,self.warmup) -+ def batch_step(self, training_step): -+ lr_this_step = self.learning_rate * self.warmup_linear(training_step / self.t_total, self.warmup) - for param_group in self.optimizer.param_groups: - param_group['lr'] = lr_this_step - -+ - class CyclicLR(object): - ''' - Cyclical learning rates for training neural networks + >>> scheduler.batch_step() + >>> validate(...) + ''' +- def __init__(self,optimizer,learning_rate,t_total,warmup): ++ ++ def __init__(self, optimizer, learning_rate, t_total, warmup): + self.learning_rate = learning_rate + self.optimizer = optimizer + self.t_total = t_total + self.warmup = warmup + + # 线性预热方式 +- def warmup_linear(self,x, warmup=0.002): ++ def warmup_linear(self, x, warmup=0.002): + if x < warmup: + return x / warmup + return 1.0 - x + +- def batch_step(self,training_step): +- lr_this_step = self.learning_rate * self.warmup_linear(training_step / self.t_total,self.warmup) ++ def batch_step(self, training_step): ++ lr_this_step = self.learning_rate * self.warmup_linear(training_step / self.t_total, self.warmup) + for param_group in self.optimizer.param_groups: + param_group['lr'] = lr_this_step + ++ + class CyclicLR(object): + ''' + Cyclical learning rates for training neural networks @@ -148,6 +158,7 @@ class CyclicLR(object): - >>> scheduler.batch_step() - >>> validate(...) - ''' -+ - def __init__(self, optimizer, base_lr=1e-3, max_lr=6e-3, - step_size=2000, mode='triangular', gamma=1., - scale_fn=None, scale_mode='cycle', last_batch_iteration=-1): + >>> scheduler.batch_step() + >>> validate(...) + ''' ++ + def __init__(self, optimizer, base_lr=1e-3, max_lr=6e-3, + step_size=2000, mode='triangular', gamma=1., + scale_fn=None, scale_mode='cycle', last_batch_iteration=-1): @@ -207,7 +218,7 @@ class CyclicLR(object): - return 1 / (2. ** (x - 1)) - - def _exp_range_scale_fn(self, x): -- return self.gamma**(x) -+ return self.gamma ** (x) - - def get_lr(self): - step_size = float(self.step_size) + return 1 / (2. ** (x - 1)) + + def _exp_range_scale_fn(self, x): +- return self.gamma**(x) ++ return self.gamma ** (x) + + def get_lr(self): + step_size = float(self.step_size) @@ -232,6 +243,7 @@ class CyclicLR(object): - for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()): - param_group['lr'] = lr - -+ - class ReduceLROnPlateau(object): - """Reduce learning rate when a metric has stopped improving. - Models often benefit from reducing the learning rate by a factor + for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()): + param_group['lr'] = lr + ++ + class ReduceLROnPlateau(object): + """Reduce learning rate when a metric has stopped improving. + Models often benefit from reducing the learning rate by a factor @@ -267,7 +279,7 @@ class ReduceLROnPlateau(object): - """ - - def __init__(self, optimizer, mode='min', factor=0.1, patience=10, -- verbose=0, epsilon=1e-4, cooldown=0, min_lr=0,eps=1e-8): -+ verbose=0, epsilon=1e-4, cooldown=0, min_lr=0, eps=1e-8): - - super(ReduceLROnPlateau, self).__init__() - assert isinstance(optimizer, Optimizer) + """ + + def __init__(self, optimizer, mode='min', factor=0.1, patience=10, +- verbose=0, epsilon=1e-4, cooldown=0, min_lr=0,eps=1e-8): ++ verbose=0, epsilon=1e-4, cooldown=0, min_lr=0, eps=1e-8): + + super(ReduceLROnPlateau, self).__init__() + assert isinstance(optimizer, Optimizer) @@ -335,6 +347,7 @@ class ReduceLROnPlateau(object): - def in_cooldown(self): - return self.cooldown_counter > 0 - -+ - class ReduceLRWDOnPlateau(ReduceLROnPlateau): - """Reduce learning rate and weight decay when a metric has stopped - improving. Models often benefit from reducing the learning rate by + def in_cooldown(self): + return self.cooldown_counter > 0 + ++ + class ReduceLRWDOnPlateau(ReduceLROnPlateau): + """Reduce learning rate and weight decay when a metric has stopped + improving. Models often benefit from reducing the learning rate by @@ -356,6 +369,7 @@ class ReduceLRWDOnPlateau(ReduceLROnPlateau): - >>> # Note that step should be called after validate() - >>> scheduler.epoch_step(val_loss) - """ -+ - def epoch_step(self, metrics, epoch): - current = metrics - if current is None: + >>> # Note that step should be called after validate() + >>> scheduler.epoch_step(val_loss) + """ ++ + def epoch_step(self, metrics, epoch): + current = metrics + if current is None: @@ -384,11 +398,13 @@ class ReduceLRWDOnPlateau(ReduceLROnPlateau): - if old_weight_decay > new_weight_decay + self.eps: - param_group['weight_decay'] = new_weight_decay - if self.verbose: -- print('\nEpoch {epoch}: reducing weight decay factor of group {i} to {new_weight_decay:.4e}.') -+ print( -+ '\nEpoch {epoch}: reducing weight decay factor of group {i} to {new_weight_decay:.4e}.') - self.cooldown_counter = self.cooldown - self.wait = 0 - self.wait += 1 - -+ - class CosineLRWithRestarts(object): - """Decays learning rate with cosine annealing, normalizes weight decay - hyperparameter value, implements restarts. + if old_weight_decay > new_weight_decay + self.eps: + param_group['weight_decay'] = new_weight_decay + if self.verbose: +- print('\nEpoch {epoch}: reducing weight decay factor of group {i} to {new_weight_decay:.4e}.') ++ print( ++ '\nEpoch {epoch}: reducing weight decay factor of group {i} to {new_weight_decay:.4e}.') + self.cooldown_counter = self.cooldown + self.wait = 0 + self.wait += 1 + ++ + class CosineLRWithRestarts(object): + """Decays learning rate with cosine annealing, normalizes weight decay + hyperparameter value, implements restarts. @@ -501,7 +517,7 @@ class CosineLRWithRestarts(object): - "training loop and while initializing " - "scheduler should be the same.") - -- for param_group, (lr, weight_decay) in zip(self.optimizer.param_groups,self.get_lr(t_cur)): -+ for param_group, (lr, weight_decay) in zip(self.optimizer.param_groups, self.get_lr(t_cur)): - param_group['lr'] = lr - param_group['weight_decay'] = weight_decay - + "training loop and while initializing " + "scheduler should be the same.") + +- for param_group, (lr, weight_decay) in zip(self.optimizer.param_groups,self.get_lr(t_cur)): ++ for param_group, (lr, weight_decay) in zip(self.optimizer.param_groups, self.get_lr(t_cur)): + param_group['lr'] = lr + param_group['weight_decay'] = weight_decay + @@ -522,18 +538,19 @@ class NoamLR(object): - >>> scheduler.batch_step(global_step) - >>> validate(...) - ''' -- def __init__(self,d_model,factor,warm_up,optimizer): -+ -+ def __init__(self, d_model, factor, warm_up, optimizer): - self.optimizer = optimizer - self.warm_up = warm_up - self.factor = factor - self.d_model = d_model - self._lr = 0 - -- def get_lr(self,step): -- lr = self.factor * (self.d_model ** (-0.5) * min(step ** (-0.5),step * self.warm_up ** (-1.5))) -+ def get_lr(self, step): -+ lr = self.factor * (self.d_model ** (-0.5) * min(step ** (-0.5), step * self.warm_up ** (-1.5))) - return lr - -- def batch_step(self,step): -+ def batch_step(self, step): - ''' - update parameters and rate - :return: + >>> scheduler.batch_step(global_step) + >>> validate(...) + ''' +- def __init__(self,d_model,factor,warm_up,optimizer): ++ ++ def __init__(self, d_model, factor, warm_up, optimizer): + self.optimizer = optimizer + self.warm_up = warm_up + self.factor = factor + self.d_model = d_model + self._lr = 0 + +- def get_lr(self,step): +- lr = self.factor * (self.d_model ** (-0.5) * min(step ** (-0.5),step * self.warm_up ** (-1.5))) ++ def get_lr(self, step): ++ lr = self.factor * (self.d_model ** (-0.5) * min(step ** (-0.5), step * self.warm_up ** (-1.5))) + return lr + +- def batch_step(self,step): ++ def batch_step(self, step): + ''' + update parameters and rate + :return: diff --git a/callback/modelcheckpoint.py b/callback/modelcheckpoint.py index b7f4ffa..20b0663 100644 --- a/callback/modelcheckpoint.py +++ b/callback/modelcheckpoint.py @@ -3,19 +3,21 @@ import numpy as np - import torch - from ..tools.common import logger - -+ - class ModelCheckpoint(object): - ''' - 模型保存,两种模式: - 1. 直接保存最好模型 - 2. 按照epoch频率保存模型 - ''' -+ - def __init__(self, checkpoint_dir, - monitor, -- arch,mode='min', -+ arch, mode='min', - epoch_freq=1, -- best = None, -- save_best_only = True): -- if isinstance(checkpoint_dir,Path): -+ best=None, -+ save_best_only=True): -+ if isinstance(checkpoint_dir, Path): - checkpoint_dir = checkpoint_dir - else: - checkpoint_dir = Path(checkpoint_dir) + import torch + from ..tools.common import logger + ++ + class ModelCheckpoint(object): + ''' + 模型保存,两种模式: + 1. 直接保存最好模型 + 2. 按照epoch频率保存模型 + ''' ++ + def __init__(self, checkpoint_dir, + monitor, +- arch,mode='min', ++ arch, mode='min', + epoch_freq=1, +- best = None, +- save_best_only = True): +- if isinstance(checkpoint_dir,Path): ++ best=None, ++ save_best_only=True): ++ if isinstance(checkpoint_dir, Path): + checkpoint_dir = checkpoint_dir + else: + checkpoint_dir = Path(checkpoint_dir) @@ -36,14 +38,14 @@ class ModelCheckpoint(object): - self.monitor_op = np.greater - self.best = -np.Inf - # 这里主要重新加载模型时候 -- #对best重新赋值 -+ # 对best重新赋值 - if best: - self.best = best - - if save_best_only: - self.model_name = f"BEST_{arch}_MODEL.pth" - -- def epoch_step(self, state,current): -+ def epoch_step(self, state, current): - ''' - 正常模型 - :param state: 需要保存的信息 + self.monitor_op = np.greater + self.best = -np.Inf + # 这里主要重新加载模型时候 +- #对best重新赋值 ++ # 对best重新赋值 + if best: + self.best = best + + if save_best_only: + self.model_name = f"BEST_{arch}_MODEL.pth" + +- def epoch_step(self, state,current): ++ def epoch_step(self, state, current): + ''' + 正常模型 + :param state: 需要保存的信息 @@ -56,7 +58,7 @@ class ModelCheckpoint(object): - logger.info(f"\nEpoch {state['epoch']}: {self.monitor} improved from {self.best:.5f} to {current:.5f}") - self.best = current - state['best'] = self.best -- best_path = self.base_path/ self.model_name -+ best_path = self.base_path / self.model_name - torch.save(state, str(best_path)) - # 每隔几个epoch保存下模型 - else: + logger.info(f"\nEpoch {state['epoch']}: {self.monitor} improved from {self.best:.5f} to {current:.5f}") + self.best = current + state['best'] = self.best +- best_path = self.base_path/ self.model_name ++ best_path = self.base_path / self.model_name + torch.save(state, str(best_path)) + # 每隔几个epoch保存下模型 + else: @@ -65,7 +67,7 @@ class ModelCheckpoint(object): - logger.info(f"\nEpoch {state['epoch']}: save model to disk.") - torch.save(state, str(filename)) - -- def bert_epoch_step(self, state,current): -+ def bert_epoch_step(self, state, current): - ''' - 适合bert类型模型,适合pytorch_transformer模块 - :param state: + logger.info(f"\nEpoch {state['epoch']}: save model to disk.") + torch.save(state, str(filename)) + +- def bert_epoch_step(self, state,current): ++ def bert_epoch_step(self, state, current): + ''' + 适合bert类型模型,适合pytorch_transformer模块 + :param state: @@ -83,7 +85,7 @@ class ModelCheckpoint(object): - with open(str(output_config_file), 'w') as f: - f.write(model_to_save.config.to_json_string()) - state.pop("model") -- torch.save(state,self.base_path / 'checkpoint_info.bin') -+ torch.save(state, self.base_path / 'checkpoint_info.bin') - else: - if state['epoch'] % self.epoch_freq == 0: - save_path = self.base_path / f"checkpoint-epoch-{state['epoch']}" + with open(str(output_config_file), 'w') as f: + f.write(model_to_save.config.to_json_string()) + state.pop("model") +- torch.save(state,self.base_path / 'checkpoint_info.bin') ++ torch.save(state, self.base_path / 'checkpoint_info.bin') + else: + if state['epoch'] % self.epoch_freq == 0: + save_path = self.base_path / f"checkpoint-epoch-{state['epoch']}" diff --git a/callback/progressbar.py b/callback/progressbar.py index 5e43b88..c9d9613 100644 --- a/callback/progressbar.py +++ b/callback/progressbar.py @@ -1,4 +1,6 @@ - import time -+ -+ - class ProgressBar(object): - ''' - custom progress bar + import time ++ ++ + class ProgressBar(object): + ''' + custom progress bar @@ -7,7 +9,8 @@ class ProgressBar(object): - >>> step = 2 - >>> pbar(step=step) - ''' -- def __init__(self, n_total,width=30,desc = 'Training'): -+ -+ def __init__(self, n_total, width=30, desc='Training'): - self.width = width - self.n_total = n_total - self.start_time = time.time() + >>> step = 2 + >>> pbar(step=step) + ''' +- def __init__(self, n_total,width=30,desc = 'Training'): ++ ++ def __init__(self, n_total, width=30, desc='Training'): + self.width = width + self.n_total = n_total + self.start_time = time.time() @@ -23,7 +26,7 @@ class ProgressBar(object): - prog_width = int(self.width * recv_per) - if prog_width > 0: - bar += '=' * (prog_width - 1) -- if current< self.n_total: -+ if current < self.n_total: - bar += ">" - else: - bar += '=' + prog_width = int(self.width * recv_per) + if prog_width > 0: + bar += '=' * (prog_width - 1) +- if current< self.n_total: ++ if current < self.n_total: + bar += ">" + else: + bar += '=' diff --git a/callback/trainingmonitor.py b/callback/trainingmonitor.py index 6aea128..cb78168 100644 --- a/callback/trainingmonitor.py @@ -373,258 +373,258 @@ index 679602d..c7afb77 100644 --- a/metrics/custom_metrics.py +++ b/metrics/custom_metrics.py @@ -1,4 +1,4 @@ --#encoding:utf-8 -+# encoding:utf-8 - import torch - from tqdm import tqdm - import numpy as np +-#encoding:utf-8 ++# encoding:utf-8 + import torch + from tqdm import tqdm + import numpy as np @@ -6,7 +6,8 @@ from collections import Counter - from sklearn.metrics import roc_auc_score - from sklearn.metrics import f1_score, classification_report - --__call__ = ['Accuracy','AUC','F1Score','EntityScore','ClassReport','MultiLabelReport','AccuracyThresh'] -+__call__ = ['Accuracy', 'AUC', 'F1Score', 'EntityScore', 'ClassReport', 'MultiLabelReport', 'AccuracyThresh'] -+ - - class Metric: - def __init__(self): + from sklearn.metrics import roc_auc_score + from sklearn.metrics import f1_score, classification_report + +-__call__ = ['Accuracy','AUC','F1Score','EntityScore','ClassReport','MultiLabelReport','AccuracyThresh'] ++__call__ = ['Accuracy', 'AUC', 'F1Score', 'EntityScore', 'ClassReport', 'MultiLabelReport', 'AccuracyThresh'] ++ + + class Metric: + def __init__(self): @@ -24,6 +25,7 @@ class Metric: - def name(self): - raise NotImplementedError - -+ - class Accuracy(Metric): - ''' - 计算准确度 + def name(self): + raise NotImplementedError + ++ + class Accuracy(Metric): + ''' + 计算准确度 @@ -37,8 +39,9 @@ class Accuracy(Metric): - >>> metrics(logits,target) - >>> print(metrics.name(),metrics.value()) - ''' -- def __init__(self,topK): -- super(Accuracy,self).__init__() -+ -+ def __init__(self, topK): -+ super(Accuracy, self).__init__() - self.topK = topK - self.reset() - + >>> metrics(logits,target) + >>> print(metrics.name(),metrics.value()) + ''' +- def __init__(self,topK): +- super(Accuracy,self).__init__() ++ ++ def __init__(self, topK): ++ super(Accuracy, self).__init__() + self.topK = topK + self.reset() + @@ -54,7 +57,7 @@ class Accuracy(Metric): - self.total = 0 - - def value(self): -- return float(self.correct_k) / self.total -+ return float(self.correct_k) / self.total - - def name(self): - return 'accuracy' + self.total = 0 + + def value(self): +- return float(self.correct_k) / self.total ++ return float(self.correct_k) / self.total + + def name(self): + return 'accuracy' @@ -73,8 +76,9 @@ class AccuracyThresh(Metric): - >>> metrics(logits,target) - >>> print(metrics.name(),metrics.value()) - ''' -- def __init__(self,thresh = 0.5): -- super(AccuracyThresh,self).__init__() -+ -+ def __init__(self, thresh=0.5): -+ super(AccuracyThresh, self).__init__() - self.thresh = thresh - self.reset() - + >>> metrics(logits,target) + >>> print(metrics.name(),metrics.value()) + ''' +- def __init__(self,thresh = 0.5): +- super(AccuracyThresh,self).__init__() ++ ++ def __init__(self, thresh=0.5): ++ super(AccuracyThresh, self).__init__() + self.thresh = thresh + self.reset() + @@ -88,7 +92,7 @@ class AccuracyThresh(Metric): - - def value(self): - data_size = self.y_pred.size(0) -- acc = np.mean(((self.y_pred>self.thresh)==self.y_true.byte()).float().cpu().numpy(), axis=1).sum() -+ acc = np.mean(((self.y_pred > self.thresh) == self.y_true.byte()).float().cpu().numpy(), axis=1).sum() - return acc / data_size - - def name(self): + + def value(self): + data_size = self.y_pred.size(0) +- acc = np.mean(((self.y_pred>self.thresh)==self.y_true.byte()).float().cpu().numpy(), axis=1).sum() ++ acc = np.mean(((self.y_pred > self.thresh) == self.y_true.byte()).float().cpu().numpy(), axis=1).sum() + return acc / data_size + + def name(self): @@ -119,16 +123,16 @@ class AUC(Metric): - >>> print(metrics.name(),metrics.value()) - ''' - -- def __init__(self,task_type = 'binary',average = 'binary'): -+ def __init__(self, task_type='binary', average='binary'): - super(AUC, self).__init__() - -- assert task_type in ['binary','multiclass'] -- assert average in ['binary','micro', 'macro', 'samples', 'weighted'] -+ assert task_type in ['binary', 'multiclass'] -+ assert average in ['binary', 'micro', 'macro', 'samples', 'weighted'] - - self.task_type = task_type - self.average = average - -- def __call__(self,logits,target): -+ def __call__(self, logits, target): - ''' - 计算整个结果 - ''' + >>> print(metrics.name(),metrics.value()) + ''' + +- def __init__(self,task_type = 'binary',average = 'binary'): ++ def __init__(self, task_type='binary', average='binary'): + super(AUC, self).__init__() + +- assert task_type in ['binary','multiclass'] +- assert average in ['binary','micro', 'macro', 'samples', 'weighted'] ++ assert task_type in ['binary', 'multiclass'] ++ assert average in ['binary', 'micro', 'macro', 'samples', 'weighted'] + + self.task_type = task_type + self.average = average + +- def __call__(self,logits,target): ++ def __call__(self, logits, target): + ''' + 计算整个结果 + ''' @@ -152,6 +156,7 @@ class AUC(Metric): - def name(self): - return 'auc' - -+ - class F1Score(Metric): - ''' - F1 Score + def name(self): + return 'auc' + ++ + class F1Score(Metric): + ''' + F1 Score @@ -178,18 +183,19 @@ class F1Score(Metric): - >>> metrics(logits,target) - >>> print(metrics.name(),metrics.value()) - ''' -- def __init__(self,thresh = 0.5, normalizate = True,task_type = 'binary',average = 'binary',search_thresh = False): -+ -+ def __init__(self, thresh=0.5, normalizate=True, task_type='binary', average='binary', search_thresh=False): - super(F1Score).__init__() -- assert task_type in ['binary','multiclass'] -- assert average in ['binary','micro', 'macro', 'samples', 'weighted'] -+ assert task_type in ['binary', 'multiclass'] -+ assert average in ['binary', 'micro', 'macro', 'samples', 'weighted'] - - self.thresh = thresh - self.task_type = task_type -- self.normalizate = normalizate -+ self.normalizate = normalizate - self.search_thresh = search_thresh - self.average = average - -- def thresh_search(self,y_prob): -+ def thresh_search(self, y_prob): - ''' - 对于f1评分的指标,一般我们需要对阈值进行调整,一般不会使用默认的0.5值,因此 - 这里我们队Thresh进行优化 + >>> metrics(logits,target) + >>> print(metrics.name(),metrics.value()) + ''' +- def __init__(self,thresh = 0.5, normalizate = True,task_type = 'binary',average = 'binary',search_thresh = False): ++ ++ def __init__(self, thresh=0.5, normalizate=True, task_type='binary', average='binary', search_thresh=False): + super(F1Score).__init__() +- assert task_type in ['binary','multiclass'] +- assert average in ['binary','micro', 'macro', 'samples', 'weighted'] ++ assert task_type in ['binary', 'multiclass'] ++ assert average in ['binary', 'micro', 'macro', 'samples', 'weighted'] + + self.thresh = thresh + self.task_type = task_type +- self.normalizate = normalizate ++ self.normalizate = normalizate + self.search_thresh = search_thresh + self.average = average + +- def thresh_search(self,y_prob): ++ def thresh_search(self, y_prob): + ''' + 对于f1评分的指标,一般我们需要对阈值进行调整,一般不会使用默认的0.5值,因此 + 这里我们队Thresh进行优化 @@ -203,9 +209,9 @@ class F1Score(Metric): - if score > best_score: - best_threshold = threshold - best_score = score -- return best_threshold,best_score -+ return best_threshold, best_score - -- def __call__(self,logits,target): -+ def __call__(self, logits, target): - ''' - 计算整个结果 - :return: + if score > best_score: + best_threshold = threshold + best_score = score +- return best_threshold,best_score ++ return best_threshold, best_score + +- def __call__(self,logits,target): ++ def __call__(self, logits, target): + ''' + 计算整个结果 + :return: @@ -220,10 +226,10 @@ class F1Score(Metric): - - if self.task_type == 'binary': - if self.thresh and self.search_thresh == False: -- self.y_pred = (y_prob > self.thresh ).astype(int) -+ self.y_pred = (y_prob > self.thresh).astype(int) - self.value() - else: -- thresh,f1 = self.thresh_search(y_prob = y_prob) -+ thresh, f1 = self.thresh_search(y_prob=y_prob) - print(f"Best thresh: {thresh:.4f} - F1 Score: {f1:.4f}") - - if self.task_type == 'multiclass': + + if self.task_type == 'binary': + if self.thresh and self.search_thresh == False: +- self.y_pred = (y_prob > self.thresh ).astype(int) ++ self.y_pred = (y_prob > self.thresh).astype(int) + self.value() + else: +- thresh,f1 = self.thresh_search(y_prob = y_prob) ++ thresh, f1 = self.thresh_search(y_prob=y_prob) + print(f"Best thresh: {thresh:.4f} - F1 Score: {f1:.4f}") + + if self.task_type == 'multiclass': @@ -247,11 +253,13 @@ class F1Score(Metric): - def name(self): - return 'f1' - -+ - class ClassReport(Metric): - ''' - class report - ''' -- def __init__(self,target_names = None): -+ -+ def __init__(self, target_names=None): - super(ClassReport).__init__() - self.target_names = target_names - + def name(self): + return 'f1' + ++ + class ClassReport(Metric): + ''' + class report + ''' +- def __init__(self,target_names = None): ++ ++ def __init__(self, target_names=None): + super(ClassReport).__init__() + self.target_names = target_names + @@ -263,10 +271,10 @@ class ClassReport(Metric): - ''' - 计算指标得分 - ''' -- score = classification_report(y_true = self.y_true, y_pred = self.y_pred, target_names=self.target_names) -+ score = classification_report(y_true=self.y_true, y_pred=self.y_pred, target_names=self.target_names) - print(f"\n\n classification report: {score}") - -- def __call__(self,logits,target): -+ def __call__(self, logits, target): - _, y_pred = torch.max(logits.data, 1) - self.y_pred = y_pred.cpu().numpy() - self.y_true = target.cpu().numpy() + ''' + 计算指标得分 + ''' +- score = classification_report(y_true = self.y_true, y_pred = self.y_pred, target_names=self.target_names) ++ score = classification_report(y_true=self.y_true, y_pred=self.y_pred, target_names=self.target_names) + print(f"\n\n classification report: {score}") + +- def __call__(self,logits,target): ++ def __call__(self, logits, target): + _, y_pred = torch.max(logits.data, 1) + self.y_pred = y_pred.cpu().numpy() + self.y_true = target.cpu().numpy() @@ -274,11 +282,13 @@ class ClassReport(Metric): - def name(self): - return "class_report" - -+ - class MultiLabelReport(Metric): - ''' - multi label report - ''' -- def __init__(self,id2label = None): -+ -+ def __init__(self, id2label=None): - super(MultiLabelReport).__init__() - self.id2label = id2label - + def name(self): + return "class_report" + ++ + class MultiLabelReport(Metric): + ''' + multi label report + ''' +- def __init__(self,id2label = None): ++ ++ def __init__(self, id2label=None): + super(MultiLabelReport).__init__() + self.id2label = id2label + @@ -286,8 +296,7 @@ class MultiLabelReport(Metric): - self.y_prob = 0 - self.y_true = 0 - -- def __call__(self,logits,target): -- -+ def __call__(self, logits, target): - self.y_prob = logits.sigmoid().data.cpu().detach().numpy() - self.y_true = target.cpu().numpy() - + self.y_prob = 0 + self.y_true = 0 + +- def __call__(self,logits,target): +- ++ def __call__(self, logits, target): + self.y_prob = logits.sigmoid().data.cpu().detach().numpy() + self.y_true = target.cpu().numpy() + @@ -304,12 +313,12 @@ class MultiLabelReport(Metric): - - - class LMAccuracy(Metric): -- def __init__(self,topK =1): -+ def __init__(self, topK=1): - super(LMAccuracy).__init__() - self.topK = topK - self.reset() - -- def __call__(self,logits,target): -+ def __call__(self, logits, target): - pred = torch.argmax(logits, 1) - active_acc = target.view(-1) != -1 - active_pred = pred[active_acc] + + + class LMAccuracy(Metric): +- def __init__(self,topK =1): ++ def __init__(self, topK=1): + super(LMAccuracy).__init__() + self.topK = topK + self.reset() + +- def __call__(self,logits,target): ++ def __call__(self, logits, target): + pred = torch.argmax(logits, 1) + active_acc = target.view(-1) != -1 + active_pred = pred[active_acc] @@ -328,5 +337,3 @@ class LMAccuracy(Metric): - - def name(self): - return 'accuracy' -- -- + + def name(self): + return 'accuracy' +- +- diff --git a/metrics/glue_compute_metrics.py b/metrics/glue_compute_metrics.py index dd9a7b2..7afb658 100644 --- a/metrics/glue_compute_metrics.py +++ b/metrics/glue_compute_metrics.py @@ -22,11 +22,13 @@ logger = logging.getLogger(__name__) - try: - from scipy.stats import pearsonr, spearmanr - from sklearn.metrics import matthews_corrcoef, f1_score -+ - _has_sklearn = True - except (AttributeError, ImportError) as e: - logger.warning("To use data.metrics please install scikit-learn. See https://scikit-learn.org/stable/index.html") - _has_sklearn = False - -+ - def simple_accuracy(preds, labels): - return (preds == labels).mean() - + try: + from scipy.stats import pearsonr, spearmanr + from sklearn.metrics import matthews_corrcoef, f1_score ++ + _has_sklearn = True + except (AttributeError, ImportError) as e: + logger.warning("To use data.metrics please install scikit-learn. See https://scikit-learn.org/stable/index.html") + _has_sklearn = False + ++ + def simple_accuracy(preds, labels): + return (preds == labels).mean() + @@ -40,6 +42,7 @@ def acc_and_f1(preds, labels): - "acc_and_f1": (acc + f1) / 2, - } - -+ - def pearson_and_spearman(preds, labels): - pearson_corr = pearsonr(preds, labels)[0] - spearman_corr = spearmanr(preds, labels)[0] + "acc_and_f1": (acc + f1) / 2, + } + ++ + def pearson_and_spearman(preds, labels): + pearson_corr = pearsonr(preds, labels)[0] + spearman_corr = spearmanr(preds, labels)[0] @@ -49,6 +52,7 @@ def pearson_and_spearman(preds, labels): - "corr": (pearson_corr + spearman_corr) / 2, - } - -+ - def compute_metrics(task_name, preds, labels): - assert len(preds) == len(labels) - if task_name == "cola": + "corr": (pearson_corr + spearman_corr) / 2, + } + ++ + def compute_metrics(task_name, preds, labels): + assert len(preds) == len(labels) + if task_name == "cola": diff --git a/model/configuration_albert.py b/model/configuration_albert.py index d8c8310..4968acb 100644 --- a/model/configuration_albert.py @@ -1182,127 +1182,127 @@ index 433db06..7c8dc1f 100644 --- a/model/modeling_albert_bright.py +++ b/model/modeling_albert_bright.py @@ -25,6 +25,8 @@ ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP = { - 'albert-xlarge': "", - 'albert-xxlarge': "", - } -+ -+ - def load_tf_weights_in_albert(model, config, tf_checkpoint_path): - """ Load tf checkpoints in a pytorch model. - """ + 'albert-xlarge': "", + 'albert-xxlarge': "", + } ++ ++ + def load_tf_weights_in_albert(model, config, tf_checkpoint_path): + """ Load tf checkpoints in a pytorch model. + """ @@ -93,10 +95,14 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path): - pointer.data = torch.from_numpy(array) - return model - -+ - AlbertLayerNorm = torch.nn.LayerNorm -+ -+ - class AlbertEmbeddings(nn.Module): - """Construct the embeddings from word, position and token_type embeddings. - """ -+ - def __init__(self, config): - super(AlbertEmbeddings, self).__init__() - self.word_embeddings = nn.Embedding(config.vocab_size, config.embedding_size, padding_idx=0) + pointer.data = torch.from_numpy(array) + return model + ++ + AlbertLayerNorm = torch.nn.LayerNorm ++ ++ + class AlbertEmbeddings(nn.Module): + """Construct the embeddings from word, position and token_type embeddings. + """ ++ + def __init__(self, config): + super(AlbertEmbeddings, self).__init__() + self.word_embeddings = nn.Embedding(config.vocab_size, config.embedding_size, padding_idx=0) @@ -108,7 +114,7 @@ class AlbertEmbeddings(nn.Module): - - # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load - # any TensorFlow checkpoint file -- self.LayerNorm =AlbertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) -+ self.LayerNorm = AlbertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) - self.dropout = nn.Dropout(config.hidden_dropout_prob) - - def forward(self, input_ids, token_type_ids=None, position_ids=None): + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file +- self.LayerNorm =AlbertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) ++ self.LayerNorm = AlbertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, input_ids, token_type_ids=None, position_ids=None): @@ -130,6 +136,7 @@ class AlbertEmbeddings(nn.Module): - embeddings = self.dropout(embeddings) - return embeddings - -+ - class AlbertSelfOutput(nn.Module): - def __init__(self, config): - super(AlbertSelfOutput, self).__init__() + embeddings = self.dropout(embeddings) + return embeddings + ++ + class AlbertSelfOutput(nn.Module): + def __init__(self, config): + super(AlbertSelfOutput, self).__init__() @@ -182,6 +189,7 @@ class AlbertAttention(nn.Module): - outputs = (attention_output,) + self_outputs[1:] # add attentions if we output them - return outputs - -+ - class AlbertOutput(nn.Module): - def __init__(self, config): - super(AlbertOutput, self).__init__() + outputs = (attention_output,) + self_outputs[1:] # add attentions if we output them + return outputs + ++ + class AlbertOutput(nn.Module): + def __init__(self, config): + super(AlbertOutput, self).__init__() @@ -196,6 +204,7 @@ class AlbertOutput(nn.Module): - hidden_states = self.LayerNorm(hidden_states + input_tensor) - return hidden_states - -+ - class BertLayer(nn.Module): - def __init__(self, config): - super(BertLayer, self).__init__() + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + ++ + class BertLayer(nn.Module): + def __init__(self, config): + super(BertLayer, self).__init__() @@ -213,6 +222,7 @@ class BertLayer(nn.Module): - outputs = (layer_output,) + attention_outputs[1:] # add attentions if we output them - return outputs - -+ - class AlbertEncoder(nn.Module): - def __init__(self, config): - super(AlbertEncoder, self).__init__() + outputs = (layer_output,) + attention_outputs[1:] # add attentions if we output them + return outputs + ++ + class AlbertEncoder(nn.Module): + def __init__(self, config): + super(AlbertEncoder, self).__init__() @@ -243,6 +253,7 @@ class AlbertEncoder(nn.Module): - outputs = outputs + (all_attentions,) - return outputs # last-layer hidden state, (all hidden states), (all attentions) - -+ - class AlbertLMPredictionHead(nn.Module): - def __init__(self, config): - super(AlbertLMPredictionHead, self).__init__() + outputs = outputs + (all_attentions,) + return outputs # last-layer hidden state, (all hidden states), (all attentions) + ++ + class AlbertLMPredictionHead(nn.Module): + def __init__(self, config): + super(AlbertLMPredictionHead, self).__init__() @@ -261,6 +272,7 @@ class AlbertLMPredictionHead(nn.Module): - hidden_states = self.decoder(hidden_states) + self.bias - return hidden_states - -+ - class AlbertOnlyMLMHead(nn.Module): - def __init__(self, config): - super(AlbertOnlyMLMHead, self).__init__() + hidden_states = self.decoder(hidden_states) + self.bias + return hidden_states + ++ + class AlbertOnlyMLMHead(nn.Module): + def __init__(self, config): + super(AlbertOnlyMLMHead, self).__init__() @@ -270,6 +282,7 @@ class AlbertOnlyMLMHead(nn.Module): - prediction_scores = self.predictions(sequence_output) - return prediction_scores - -+ - class AlbertOnlyNSPHead(nn.Module): - def __init__(self, config): - super(AlbertOnlyNSPHead, self).__init__() + prediction_scores = self.predictions(sequence_output) + return prediction_scores + ++ + class AlbertOnlyNSPHead(nn.Module): + def __init__(self, config): + super(AlbertOnlyNSPHead, self).__init__() @@ -279,6 +292,7 @@ class AlbertOnlyNSPHead(nn.Module): - seq_relationship_score = self.seq_relationship(pooled_output) - return seq_relationship_score - -+ - class AlbertPreTrainingHeads(nn.Module): - def __init__(self, config): - super(AlbertPreTrainingHeads, self).__init__() + seq_relationship_score = self.seq_relationship(pooled_output) + return seq_relationship_score + ++ + class AlbertPreTrainingHeads(nn.Module): + def __init__(self, config): + super(AlbertPreTrainingHeads, self).__init__() @@ -290,6 +304,7 @@ class AlbertPreTrainingHeads(nn.Module): - seq_relationship_score = self.seq_relationship(pooled_output) - return prediction_scores, seq_relationship_score - -+ - class AlbertPreTrainedModel(PreTrainedModel): - """ An abstract class to handle weights initialization and - a simple interface for dowloading and loading pretrained models. + seq_relationship_score = self.seq_relationship(pooled_output) + return prediction_scores, seq_relationship_score + ++ + class AlbertPreTrainedModel(PreTrainedModel): + """ An abstract class to handle weights initialization and + a simple interface for dowloading and loading pretrained models. @@ -311,6 +326,7 @@ class AlbertPreTrainedModel(PreTrainedModel): - if isinstance(module, nn.Linear) and module.bias is not None: - module.bias.data.zero_() - -+ - BERT_START_DOCSTRING = r""" The BERT model was proposed in - `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding`_ - by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova. It's a bidirectional transformer + if isinstance(module, nn.Linear) and module.bias is not None: + module.bias.data.zero_() + ++ + BERT_START_DOCSTRING = r""" The BERT model was proposed in + `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding`_ + by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova. It's a bidirectional transformer @@ -535,7 +551,7 @@ class AlbertForPreTraining(AlbertPreTrainedModel): - Export to TorchScript can't handle parameter sharing so we are cloning them instead. - """ - self._tie_or_clone_weights(self.cls.predictions.decoder, -- self.bert.embeddings.word_embeddings) -+ self.bert.embeddings.word_embeddings) - - def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, - masked_lm_labels=None, next_sentence_label=None): + Export to TorchScript can't handle parameter sharing so we are cloning them instead. + """ + self._tie_or_clone_weights(self.cls.predictions.decoder, +- self.bert.embeddings.word_embeddings) ++ self.bert.embeddings.word_embeddings) + + def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, + masked_lm_labels=None, next_sentence_label=None): diff --git a/model/modeling_bert.py b/model/modeling_bert.py index fecf1e4..1b593c6 100644 --- a/model/modeling_bert.py @@ -2886,164 +2886,164 @@ index bd4cd9f..ec8e30f 100644 --- a/tools/common.py +++ b/tools/common.py @@ -8,16 +8,19 @@ import torch.nn as nn - from collections import OrderedDict - from pathlib import Path - import logging -+from threading import Lock - -+lock = Lock() - logger = logging.getLogger() - -+ - def init_logger(log_file=None, log_file_level=logging.NOTSET): - ''' - Example: - >>> init_logger(log_file) - >>> logger.info("abc'") - ''' -- if isinstance(log_file,Path): -+ if isinstance(log_file, Path): - log_file = str(log_file) - - log_format = logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(name)s - %(message)s', + from collections import OrderedDict + from pathlib import Path + import logging ++from threading import Lock + ++lock = Lock() + logger = logging.getLogger() + ++ + def init_logger(log_file=None, log_file_level=logging.NOTSET): + ''' + Example: + >>> init_logger(log_file) + >>> logger.info("abc'") + ''' +- if isinstance(log_file,Path): ++ if isinstance(log_file, Path): + log_file = str(log_file) + + log_format = logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(name)s - %(message)s', @@ -34,6 +37,7 @@ def init_logger(log_file=None, log_file_level=logging.NOTSET): - logger.addHandler(file_handler) - return logger - -+ - def seed_everything(seed=1029): - ''' - 设置整个开发环境的seed + logger.addHandler(file_handler) + return logger + ++ + def seed_everything(seed=1029): + ''' + 设置整个开发环境的seed @@ -114,7 +118,7 @@ def restore_checkpoint(resume_path, model=None): - model.module.load_state_dict(states) - else: - model.load_state_dict(states) -- return [model,best,start_epoch] -+ return [model, best, start_epoch] - - - def save_pickle(data, file_path): + model.module.load_state_dict(states) + else: + model.load_state_dict(states) +- return [model,best,start_epoch] ++ return [model, best, start_epoch] + + + def save_pickle(data, file_path): @@ -172,6 +176,7 @@ def load_json(file_path): - data = json.load(f) - return data - -+ - def save_model(model, model_path): - """ 存储不含有显卡信息的state_dict或model - :param model: + data = json.load(f) + return data + ++ + def save_model(model, model_path): + """ 存储不含有显卡信息的state_dict或model + :param model: @@ -188,6 +193,7 @@ def save_model(model, model_path): - state_dict[key] = state_dict[key].cpu() - torch.save(state_dict, model_path) - -+ - def load_model(model, model_path): - ''' - 加载模型 + state_dict[key] = state_dict[key].cpu() + torch.save(state_dict, model_path) + ++ + def load_model(model, model_path): + ''' + 加载模型 diff --git a/tools/fps_counter.py b/tools/fps_counter.py new file mode 100644 index 0000000..4f637f3 --- /dev/null +++ b/tools/fps_counter.py @@ -0,0 +1,99 @@ -+# Copyright 2021 Huawei Technologies Co., Ltd -+# -+# Licensed under the Apache License, Version 2.0 (the "License"); -+# you may not use this file except in compliance with the License. -+# You may obtain a copy of the License at -+# -+# http://www.apache.org/licenses/LICENSE-2.0 -+# -+# Unless required by applicable law or agreed to in writing, software -+# distributed under the License is distributed on an "AS IS" BASIS, -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+# See the License for the specific language governing permissions and -+# limitations under the License. -+ -+import time -+from threading import Lock -+ -+ -+class FpsCounter: -+ """ -+how to use -+ -+fps=FpsCounter() -+fps.begin() -+code -+fps.end() -+print(fps.fps()) -+ -+ """ -+ def __init__(self): -+ self.step_sum = 0 -+ self.time_sum = 0 -+ self.t1 = 0 -+ self.on = False -+ -+ def begin(self): -+ assert self.on == False, "didnot end last time" -+ self.on = True -+ self.t1 = time.time_ns() -+ -+ def end(self): -+ t2 = time.time_ns() -+ assert self.on == True, "didnot begin" -+ self.time_sum += t2 - self.t1 -+ self.step_sum += 1 -+ self.on = False -+ -+ def reset(self): -+ self.step_sum = 0 -+ self.time_sum = 0 -+ self.t1 = 0 -+ self.on = False -+ -+ def fps(self, batch=1, n_device=1): -+ if self.step_sum == 0: return 0 -+ time_avg = self.time_sum / 1e9 / self.step_sum -+ return batch * n_device / time_avg -+ -+ -+class FpsCounter2: -+ def __init__(self, node_num=0): -+ self.node_num = node_num -+ self.lock = Lock() -+ self.step_sum = [0 for i in range(node_num)] -+ self.time_sum = [0 for i in range(node_num)] -+ self.t1 = [0 for i in range(node_num)] -+ self.on = [False for i in range(node_num)] -+ -+ def begin(self, node_idx=0): -+ assert self.on[node_idx] == False, "didnot end last time" -+ self.lock.acquire() -+ self.on[node_idx] = True -+ self.t1[node_idx] = time.time_ns() -+ self.lock.release() -+ -+ def end(self, node_idx=0): -+ t2 = time.time_ns() -+ assert self.on[node_idx] == True, "didnot begin" -+ self.lock.acquire() -+ self.time_sum[node_idx] += t2 - self.t1[node_idx] -+ self.step_sum[node_idx] += 1 -+ self.on[node_idx] = False -+ self.lock.release() -+ -+ def reset(self, node_idx=0): -+ self.lock.acquire() -+ self.step_sum[node_idx] = 0 -+ self.time_sum[node_idx] = 0 -+ self.t1[node_idx] = 0 -+ self.on[node_idx] = False -+ self.lock.release() -+ -+ def fps(self, batch=1, n_device=1, world_size=0): -+ fps = 0 -+ for i in range(world_size): -+ if self.step_sum[i] == 0: continue -+ time_avg = self.time_sum[i] / 1e9 / self.step_sum[i] -+ fps += batch * n_device / time_avg -+ return fps ++# Copyright 2021 Huawei Technologies Co., Ltd ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++ ++import time ++from threading import Lock ++ ++ ++class FpsCounter: ++ """ ++how to use ++ ++fps=FpsCounter() ++fps.begin() ++code ++fps.end() ++print(fps.fps()) ++ ++ """ ++ def __init__(self): ++ self.step_sum = 0 ++ self.time_sum = 0 ++ self.t1 = 0 ++ self.on = False ++ ++ def begin(self): ++ assert self.on == False, "didnot end last time" ++ self.on = True ++ self.t1 = time.time_ns() ++ ++ def end(self): ++ t2 = time.time_ns() ++ assert self.on == True, "didnot begin" ++ self.time_sum += t2 - self.t1 ++ self.step_sum += 1 ++ self.on = False ++ ++ def reset(self): ++ self.step_sum = 0 ++ self.time_sum = 0 ++ self.t1 = 0 ++ self.on = False ++ ++ def fps(self, batch=1, n_device=1): ++ if self.step_sum == 0: return 0 ++ time_avg = self.time_sum / 1e9 / self.step_sum ++ return batch * n_device / time_avg ++ ++ ++class FpsCounter2: ++ def __init__(self, node_num=0): ++ self.node_num = node_num ++ self.lock = Lock() ++ self.step_sum = [0 for i in range(node_num)] ++ self.time_sum = [0 for i in range(node_num)] ++ self.t1 = [0 for i in range(node_num)] ++ self.on = [False for i in range(node_num)] ++ ++ def begin(self, node_idx=0): ++ assert self.on[node_idx] == False, "didnot end last time" ++ self.lock.acquire() ++ self.on[node_idx] = True ++ self.t1[node_idx] = time.time_ns() ++ self.lock.release() ++ ++ def end(self, node_idx=0): ++ t2 = time.time_ns() ++ assert self.on[node_idx] == True, "didnot begin" ++ self.lock.acquire() ++ self.time_sum[node_idx] += t2 - self.t1[node_idx] ++ self.step_sum[node_idx] += 1 ++ self.on[node_idx] = False ++ self.lock.release() ++ ++ def reset(self, node_idx=0): ++ self.lock.acquire() ++ self.step_sum[node_idx] = 0 ++ self.time_sum[node_idx] = 0 ++ self.t1[node_idx] = 0 ++ self.on[node_idx] = False ++ self.lock.release() ++ ++ def fps(self, batch=1, n_device=1, world_size=0): ++ fps = 0 ++ for i in range(world_size): ++ if self.step_sum[i] == 0: continue ++ time_avg = self.time_sum[i] / 1e9 / self.step_sum[i] ++ fps += batch * n_device / time_avg ++ return fps -- 2.17.1 diff --git a/ACL_PyTorch/contrib/nlp/tinybert/TinyBERT_get_info.py b/ACL_PyTorch/contrib/nlp/tinybert/TinyBERT_get_info.py index c685f28e3bfa56a01dabf53ad38c4d70d8c03d66..0846eb47f46672291c43d5217878b48cfbe42058 100644 --- a/ACL_PyTorch/contrib/nlp/tinybert/TinyBERT_get_info.py +++ b/ACL_PyTorch/contrib/nlp/tinybert/TinyBERT_get_info.py @@ -1,47 +1,47 @@ -# coding=utf-8 -# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. -# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Run TinyBERT on SST-2.""" -import argparse - -def main(): - """output:info file""" - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", - default=1, - type=int, - required=True) - parser.add_argument("--output_path", - default='./bert_bin/', - type=str, - required=True, - help='The output dir of info file.') - args = parser.parse_args() - test_num = 872 - base_path = args.output_path - with open('./TinyBERT.info', 'w') as f: - for i in range(test_num): - ids_name = base_path + 'input_ids_{}.bin'.format(i) - segment_name = base_path + 'segment_ids_{}.bin'.format(i) - mask_name = base_path + 'input_mask_{}.bin'.format(i) - f.write(str(i) + ' ' + ids_name) - f.write('\n') - f.write(str(i) + ' ' + segment_name) - f.write('\n') - f.write(str(i) + ' ' + mask_name) - f.write('\n') - -if __name__ == "__main__": +# coding=utf-8 +# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Run TinyBERT on SST-2.""" +import argparse + +def main(): + """output:info file""" + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", + default=1, + type=int, + required=True) + parser.add_argument("--output_path", + default='./bert_bin/', + type=str, + required=True, + help='The output dir of info file.') + args = parser.parse_args() + test_num = 872 + base_path = args.output_path + with open('./TinyBERT.info', 'w') as f: + for i in range(test_num): + ids_name = base_path + 'input_ids_{}.bin'.format(i) + segment_name = base_path + 'segment_ids_{}.bin'.format(i) + mask_name = base_path + 'input_mask_{}.bin'.format(i) + f.write(str(i) + ' ' + ids_name) + f.write('\n') + f.write(str(i) + ' ' + segment_name) + f.write('\n') + f.write(str(i) + ' ' + mask_name) + f.write('\n') + +if __name__ == "__main__": main() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/tinybert/TinyBERT_postprocess_data.py b/ACL_PyTorch/contrib/nlp/tinybert/TinyBERT_postprocess_data.py index 117bb7ea24ede7f1353e849cc5ab7b2397cf5ad5..bff0fbfbc86b4d9fc675928f5e439877152344f3 100644 --- a/ACL_PyTorch/contrib/nlp/tinybert/TinyBERT_postprocess_data.py +++ b/ACL_PyTorch/contrib/nlp/tinybert/TinyBERT_postprocess_data.py @@ -1,279 +1,279 @@ -# coding=utf-8 -# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. -# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Run TinyBERT on SST-2.""" - -from __future__ import absolute_import, division, print_function -import argparse -import os -import sys -import csv -import numpy as np -import io -from transformer.tokenization import BertTokenizer -import torch - - -class InputExample(object): - """A single training/test example for simple sequence classification.""" - - def __init__(self, guid, text_a, text_b=None, label=None): - """Constructs a InputExample. - - Args: - guid: Unique id for the example. - text_a: string. The untokenized text of the first sequence. For single - sequence tasks, only this sequence must be specified. - text_b: (Optional) string. The untokenized text of the second sequence. - Only must be specified for sequence pair tasks. - label: (Optional) string. The label of the example. This should be - specified for train and dev examples, but not for test examples. - """ - self.guid = guid - self.text_a = text_a - self.text_b = text_b - self.label = label - - -class InputFeatures(object): - """A single set of features of data.""" - - def __init__(self, input_ids, input_mask, segment_ids, label_id, seq_length=None): - self.input_ids = input_ids - self.input_mask = input_mask - self.segment_ids = segment_ids - self.seq_length = seq_length - self.label_id = label_id - - -class DataProcessor(object): - """Base class for data converters for sequence classification data sets.""" - - def get_train_examples(self, data_dir): - """Gets a collection of `InputExample`s for the train set.""" - raise NotImplementedError() - - def get_dev_examples(self, data_dir): - """Gets a collection of `InputExample`s for the dev set.""" - raise NotImplementedError() - - def get_labels(self): - """Gets the list of labels for this data set.""" - raise NotImplementedError() - - @classmethod - def _read_tsv(cls, input_file, quotechar=None): - """Reads a tab separated value file.""" - with io.open(input_file, "r", encoding="utf-8") as f: - reader = csv.reader(f, delimiter="\t", quotechar=quotechar) - lines = [] - for line in reader: - if sys.version_info[0] == 2: - line = list(unicode(cell, 'utf-8') for cell in line) - lines.append(line) - return lines - - -class Sst2Processor(DataProcessor): - """Processor for the SST-2 data set (GLUE version).""" - - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") - - def get_aug_examples(self, data_dir): - """get the augmented examples""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "train_aug.tsv")), "aug") - - def get_labels(self): - """See base class.""" - return ["0", "1"] - - def _create_examples(self, lines, set_type): - """Creates examples for the training and dev sets.""" - examples = [] - for (i, line) in enumerate(lines): - if i == 0: - continue - guid = "%s-%s" % (set_type, i) - text_a = line[0] - label = line[1] - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) - return examples - - -def _truncate_seq_pair(tokens_a, tokens_b, max_length): - """Truncates a sequence pair in place to the maximum length.""" - while True: - total_length = len(tokens_a) + len(tokens_b) - if total_length <= max_length: - break - if len(tokens_a) > len(tokens_b): - tokens_a.pop() - else: - tokens_b.pop() - - -def convert_examples_to_features(examples, label_list, max_seq_length, - tokenizer, output_mode): - """Loads a data file into a list of `InputBatch`s.""" - - label_map = {label: i for i, label in enumerate(label_list)} - - features = [] - for (ex_index, example) in enumerate(examples): - - tokens_a = tokenizer.tokenize(example.text_a) - - tokens_b = None - if example.text_b: - tokens_b = tokenizer.tokenize(example.text_b) - _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3) - else: - if len(tokens_a) > max_seq_length - 2: - tokens_a = tokens_a[:(max_seq_length - 2)] - - tokens = ["[CLS]"] + tokens_a + ["[SEP]"] - segment_ids = [0] * len(tokens) - - if tokens_b: - tokens += tokens_b + ["[SEP]"] - segment_ids += [1] * (len(tokens_b) + 1) - - input_ids = tokenizer.convert_tokens_to_ids(tokens) - input_mask = [1] * len(input_ids) - seq_length = len(input_ids) - - padding = [0] * (max_seq_length - len(input_ids)) - input_ids += padding - input_mask += padding - segment_ids += padding - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length - - if output_mode == "classification": - label_id = label_map[example.label] - elif output_mode == "regression": - label_id = float(example.label) - else: - raise KeyError(output_mode) - - features.append( - InputFeatures(input_ids=input_ids, - input_mask=input_mask, - segment_ids=segment_ids, - label_id=label_id, - seq_length=seq_length)) - return features - - -def get_label_ids(features): - """get the label id""" - return torch.tensor([f.label_id for f in features], dtype=torch.long) - - -def simple_accuracy(preds, labels): - """calculate the accuracy""" - return (preds == labels).mean() - - -def bin2predlabel(test_num, args): - """(adapt to benchmark inference)change the bin files into logits""" - logit1 = [] - logit2 = [] - for i in range(test_num): - n1, n2 = np.fromfile('{}/Bert_{}_1.bin'.format(args.result_dir, i), dtype='float32') - logit1.append(n1) - logit2.append(n2) - logit = np.concatenate((np.array(logit1).reshape(1, -1), np.array(logit2).reshape(1, -1)), axis = 0) - pred_label = np.argmax(logit, axis = 0) - return pred_label - - -def txt2predlabel(test_num, args): - """(adapt to msame inference):change the txt files into logits""" - logit1 = [] - logit2 = [] - for i in range(test_num): - txtname = "input" + str(i) + "_output_0.txt" - dir = os.path.join(args.result_dir, txtname) - with open(dir, "r") as f: - line = f.readline() - n1, n2 = [float(i) for i in line.split()] - logit1.append(n1) - logit2.append(n2) - logit = np.concatenate((np.array(logit1).reshape(1, -1), np.array(logit2).reshape(1, -1)), axis = 0) - pred_label = np.argmax(logit, axis = 0) - return pred_label - - -def main(): - """postprocess the data and calculate the accuracy""" - parser = argparse.ArgumentParser() - parser.add_argument("--max_seq_length", - default=64, - type=int, - help="The maximum total input sequence length after WordPiece tokenization. \n" - "Sequences longer than this will be truncated, and sequences shorter \n" - "than this will be padded.") - parser.add_argument("--data_dir", - default=None, - type=str, - required=True, - help="The input data dir. Should contain the .tsv files (or other data files) for the task.") - parser.add_argument("--result_dir", - default=None, - type=str, - required=True, - help="NPU benchmark infer result path") - parser.add_argument("--model", - default=None, - type=str, - required=True, - help="The student model dir.") - parser.add_argument("--do_lower_case", - action='store_true', - help="Set this flag if you are using an uncased model.") - parser.add_argument("--inference_tool", type = str, - help = "inference tool:benchmark or msame") - args = parser.parse_args() - test_num = 872 - processor = Sst2Processor() - tokenizer = BertTokenizer.from_pretrained(args.model, do_lower_case=args.do_lower_case) - eval_examples = processor.get_dev_examples(args.data_dir) - label_list = ["0", "1"] - eval_features = convert_examples_to_features(eval_examples, label_list, args.max_seq_length, tokenizer, - output_mode="classification") - #data processing - eval_labels = get_label_ids(eval_features).numpy() - if args.inference_tool == "benchmark": - pred_labels = bin2predlabel(test_num, args) - elif args.inference_tool == "msame": - pred_labels = txt2predlabel(test_num, args) - result = simple_accuracy(pred_labels, eval_labels) - print("acc:{}".format(result)) - -if __name__ == '__main__': +# coding=utf-8 +# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Run TinyBERT on SST-2.""" + +from __future__ import absolute_import, division, print_function +import argparse +import os +import sys +import csv +import numpy as np +import io +from transformer.tokenization import BertTokenizer +import torch + + +class InputExample(object): + """A single training/test example for simple sequence classification.""" + + def __init__(self, guid, text_a, text_b=None, label=None): + """Constructs a InputExample. + + Args: + guid: Unique id for the example. + text_a: string. The untokenized text of the first sequence. For single + sequence tasks, only this sequence must be specified. + text_b: (Optional) string. The untokenized text of the second sequence. + Only must be specified for sequence pair tasks. + label: (Optional) string. The label of the example. This should be + specified for train and dev examples, but not for test examples. + """ + self.guid = guid + self.text_a = text_a + self.text_b = text_b + self.label = label + + +class InputFeatures(object): + """A single set of features of data.""" + + def __init__(self, input_ids, input_mask, segment_ids, label_id, seq_length=None): + self.input_ids = input_ids + self.input_mask = input_mask + self.segment_ids = segment_ids + self.seq_length = seq_length + self.label_id = label_id + + +class DataProcessor(object): + """Base class for data converters for sequence classification data sets.""" + + def get_train_examples(self, data_dir): + """Gets a collection of `InputExample`s for the train set.""" + raise NotImplementedError() + + def get_dev_examples(self, data_dir): + """Gets a collection of `InputExample`s for the dev set.""" + raise NotImplementedError() + + def get_labels(self): + """Gets the list of labels for this data set.""" + raise NotImplementedError() + + @classmethod + def _read_tsv(cls, input_file, quotechar=None): + """Reads a tab separated value file.""" + with io.open(input_file, "r", encoding="utf-8") as f: + reader = csv.reader(f, delimiter="\t", quotechar=quotechar) + lines = [] + for line in reader: + if sys.version_info[0] == 2: + line = list(unicode(cell, 'utf-8') for cell in line) + lines.append(line) + return lines + + +class Sst2Processor(DataProcessor): + """Processor for the SST-2 data set (GLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") + + def get_aug_examples(self, data_dir): + """get the augmented examples""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train_aug.tsv")), "aug") + + def get_labels(self): + """See base class.""" + return ["0", "1"] + + def _create_examples(self, lines, set_type): + """Creates examples for the training and dev sets.""" + examples = [] + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "%s-%s" % (set_type, i) + text_a = line[0] + label = line[1] + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) + return examples + + +def _truncate_seq_pair(tokens_a, tokens_b, max_length): + """Truncates a sequence pair in place to the maximum length.""" + while True: + total_length = len(tokens_a) + len(tokens_b) + if total_length <= max_length: + break + if len(tokens_a) > len(tokens_b): + tokens_a.pop() + else: + tokens_b.pop() + + +def convert_examples_to_features(examples, label_list, max_seq_length, + tokenizer, output_mode): + """Loads a data file into a list of `InputBatch`s.""" + + label_map = {label: i for i, label in enumerate(label_list)} + + features = [] + for (ex_index, example) in enumerate(examples): + + tokens_a = tokenizer.tokenize(example.text_a) + + tokens_b = None + if example.text_b: + tokens_b = tokenizer.tokenize(example.text_b) + _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3) + else: + if len(tokens_a) > max_seq_length - 2: + tokens_a = tokens_a[:(max_seq_length - 2)] + + tokens = ["[CLS]"] + tokens_a + ["[SEP]"] + segment_ids = [0] * len(tokens) + + if tokens_b: + tokens += tokens_b + ["[SEP]"] + segment_ids += [1] * (len(tokens_b) + 1) + + input_ids = tokenizer.convert_tokens_to_ids(tokens) + input_mask = [1] * len(input_ids) + seq_length = len(input_ids) + + padding = [0] * (max_seq_length - len(input_ids)) + input_ids += padding + input_mask += padding + segment_ids += padding + + assert len(input_ids) == max_seq_length + assert len(input_mask) == max_seq_length + assert len(segment_ids) == max_seq_length + + if output_mode == "classification": + label_id = label_map[example.label] + elif output_mode == "regression": + label_id = float(example.label) + else: + raise KeyError(output_mode) + + features.append( + InputFeatures(input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + label_id=label_id, + seq_length=seq_length)) + return features + + +def get_label_ids(features): + """get the label id""" + return torch.tensor([f.label_id for f in features], dtype=torch.long) + + +def simple_accuracy(preds, labels): + """calculate the accuracy""" + return (preds == labels).mean() + + +def bin2predlabel(test_num, args): + """(adapt to benchmark inference)change the bin files into logits""" + logit1 = [] + logit2 = [] + for i in range(test_num): + n1, n2 = np.fromfile('{}/Bert_{}_1.bin'.format(args.result_dir, i), dtype='float32') + logit1.append(n1) + logit2.append(n2) + logit = np.concatenate((np.array(logit1).reshape(1, -1), np.array(logit2).reshape(1, -1)), axis = 0) + pred_label = np.argmax(logit, axis = 0) + return pred_label + + +def txt2predlabel(test_num, args): + """(adapt to msame inference):change the txt files into logits""" + logit1 = [] + logit2 = [] + for i in range(test_num): + txtname = "input" + str(i) + "_output_0.txt" + dir = os.path.join(args.result_dir, txtname) + with open(dir, "r") as f: + line = f.readline() + n1, n2 = [float(i) for i in line.split()] + logit1.append(n1) + logit2.append(n2) + logit = np.concatenate((np.array(logit1).reshape(1, -1), np.array(logit2).reshape(1, -1)), axis = 0) + pred_label = np.argmax(logit, axis = 0) + return pred_label + + +def main(): + """postprocess the data and calculate the accuracy""" + parser = argparse.ArgumentParser() + parser.add_argument("--max_seq_length", + default=64, + type=int, + help="The maximum total input sequence length after WordPiece tokenization. \n" + "Sequences longer than this will be truncated, and sequences shorter \n" + "than this will be padded.") + parser.add_argument("--data_dir", + default=None, + type=str, + required=True, + help="The input data dir. Should contain the .tsv files (or other data files) for the task.") + parser.add_argument("--result_dir", + default=None, + type=str, + required=True, + help="NPU benchmark infer result path") + parser.add_argument("--model", + default=None, + type=str, + required=True, + help="The student model dir.") + parser.add_argument("--do_lower_case", + action='store_true', + help="Set this flag if you are using an uncased model.") + parser.add_argument("--inference_tool", type = str, + help = "inference tool:benchmark or msame") + args = parser.parse_args() + test_num = 872 + processor = Sst2Processor() + tokenizer = BertTokenizer.from_pretrained(args.model, do_lower_case=args.do_lower_case) + eval_examples = processor.get_dev_examples(args.data_dir) + label_list = ["0", "1"] + eval_features = convert_examples_to_features(eval_examples, label_list, args.max_seq_length, tokenizer, + output_mode="classification") + #data processing + eval_labels = get_label_ids(eval_features).numpy() + if args.inference_tool == "benchmark": + pred_labels = bin2predlabel(test_num, args) + elif args.inference_tool == "msame": + pred_labels = txt2predlabel(test_num, args) + result = simple_accuracy(pred_labels, eval_labels) + print("acc:{}".format(result)) + +if __name__ == '__main__': main() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/tinybert/TinyBERT_preprocess_data.py b/ACL_PyTorch/contrib/nlp/tinybert/TinyBERT_preprocess_data.py index 5b90fd8710bc3405ae5e3640d3e4550f991cb744..2116c4ab3c971adfc18788fde1f48c76d1633f15 100644 --- a/ACL_PyTorch/contrib/nlp/tinybert/TinyBERT_preprocess_data.py +++ b/ACL_PyTorch/contrib/nlp/tinybert/TinyBERT_preprocess_data.py @@ -1,276 +1,276 @@ -# coding=utf-8 -# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. -# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Run TinyBERT on SST-2.""" - -from __future__ import absolute_import, division, print_function -import argparse -import os -import sys -import csv -import io -from transformer.tokenization import BertTokenizer -import torch -from torch.utils.data import (DataLoader, SequentialSampler, TensorDataset) - - -class InputExample(object): - """A single training/test example for simple sequence classification.""" - - def __init__(self, guid, text_a, text_b=None, label=None): - """Constructs a InputExample. - - Args: - guid: Unique id for the example. - text_a: string. The untokenized text of the first sequence. For single - sequence tasks, only this sequence must be specified. - text_b: (Optional) string. The untokenized text of the second sequence. - Only must be specified for sequence pair tasks. - label: (Optional) string. The label of the example. This should be - specified for train and dev examples, but not for test examples. - """ - self.guid = guid - self.text_a = text_a - self.text_b = text_b - self.label = label - - -class InputFeatures(object): - """A single set of features of data.""" - - def __init__(self, input_ids, input_mask, segment_ids, label_id, seq_length=None): - self.input_ids = input_ids - self.input_mask = input_mask - self.segment_ids = segment_ids - self.seq_length = seq_length - self.label_id = label_id - - -class DataProcessor(object): - """Base class for data converters for sequence classification data sets.""" - - def get_train_examples(self, data_dir): - """Gets a collection of `InputExample`s for the train set.""" - raise NotImplementedError() - - def get_dev_examples(self, data_dir): - """Gets a collection of `InputExample`s for the dev set.""" - raise NotImplementedError() - - def get_labels(self): - """Gets the list of labels for this data set.""" - raise NotImplementedError() - - @classmethod - def _read_tsv(cls, input_file, quotechar=None): - """Reads a tab separated value file.""" - with io.open(input_file, "r", encoding="utf-8") as f: - reader = csv.reader(f, delimiter="\t", quotechar=quotechar) - lines = [] - for line in reader: - if sys.version_info[0] == 2: - line = list(unicode(cell, 'utf-8') for cell in line) - lines.append(line) - return lines - - -class Sst2Processor(DataProcessor): - """Processor for the SST-2 data set (GLUE version).""" - - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") - - def get_aug_examples(self, data_dir): - """get the augmented data""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "train_aug.tsv")), "aug") - - def get_labels(self): - """See base class.""" - return ["0", "1"] - - def _create_examples(self, lines, set_type): - """Creates examples for the training and dev sets.""" - examples = [] - for (i, line) in enumerate(lines): - if i == 0: - continue - guid = "%s-%s" % (set_type, i) - text_a = line[0] - label = line[1] - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) - return examples - - -def _truncate_seq_pair(tokens_a, tokens_b, max_length): - """Truncates a sequence pair in place to the maximum length.""" - while True: - total_length = len(tokens_a) + len(tokens_b) - if total_length <= max_length: - break - if len(tokens_a) > len(tokens_b): - tokens_a.pop() - else: - tokens_b.pop() - - -def convert_examples_to_features(examples, label_list, max_seq_length, - tokenizer, output_mode): - """Loads a data file into a list of `InputBatch`s.""" - - label_map = {label: i for i, label in enumerate(label_list)} - - features = [] - for (ex_index, example) in enumerate(examples): - - tokens_a = tokenizer.tokenize(example.text_a) - - tokens_b = None - if example.text_b: - tokens_b = tokenizer.tokenize(example.text_b) - _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3) - else: - if len(tokens_a) > max_seq_length - 2: - tokens_a = tokens_a[:(max_seq_length - 2)] - - tokens = ["[CLS]"] + tokens_a + ["[SEP]"] - segment_ids = [0] * len(tokens) - - if tokens_b: - tokens += tokens_b + ["[SEP]"] - segment_ids += [1] * (len(tokens_b) + 1) - - input_ids = tokenizer.convert_tokens_to_ids(tokens) - input_mask = [1] * len(input_ids) - seq_length = len(input_ids) - - padding = [0] * (max_seq_length - len(input_ids)) - input_ids += padding - input_mask += padding - segment_ids += padding - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length - - if output_mode == "classification": - label_id = label_map[example.label] - elif output_mode == "regression": - label_id = float(example.label) - else: - raise KeyError(output_mode) - - features.append( - InputFeatures(input_ids=input_ids, - input_mask=input_mask, - segment_ids=segment_ids, - label_id=label_id, - seq_length=seq_length)) - return features - - -def get_tensor_data(output_mode, features): - """get the data""" - if output_mode == "classification": - all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long) - elif output_mode == "regression": - all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.float) - - all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long) - all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) - all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long) - all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long) - tensor_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, - all_label_ids, all_seq_lengths) - return tensor_data, all_label_ids - - -def data_main(): - """preprocess data""" - parser = argparse.ArgumentParser() - parser.add_argument("--model", - default=None, - type=str, - required=True, - help="The student model dir.") - parser.add_argument("--data_dir", - default=None, - type=str, - required=True, - help="The input data dir. Should contain the .tsv files (or other data files) for the task.") - parser.add_argument("--max_seq_length", - default=64, - type=int, - help="The maximum total input sequence length after WordPiece tokenization. \n" - "Sequences longer than this will be truncated, and sequences shorter \n" - "than this will be padded.") - parser.add_argument("--eval_batch_size", - default=1, - type=int, - help="Total batch size for eval.") - parser.add_argument("--do_lower_case", - action='store_true', - help="Set this flag if you are using an uncased model.") - parser.add_argument("--inference_tool", type = str, - help = "inference tool:benchmark or msame") - args = parser.parse_args() - processor = Sst2Processor() - tokenizer = BertTokenizer.from_pretrained(args.model, do_lower_case=args.do_lower_case) # for TinyBERT - - eval_examples = processor.get_dev_examples(args.data_dir) - label_list = ["0", "1"] - eval_features = convert_examples_to_features(eval_examples, label_list, args.max_seq_length, - tokenizer, output_mode = "classification") - - bin_path = "./bert_bin" - output_mode = 'classification' - eval_data, eval_labels = get_tensor_data(output_mode, eval_features) - print("eval_labels") - # Run prediction for full data - eval_sampler = SequentialSampler(eval_data) - eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, drop_last = True, - batch_size = args.eval_batch_size, shuffle = False) - - if not os.path.exists(bin_path): - os.makedirs(bin_path) - i = -1 - for input_ids, input_mask, segment_ids, label_ids, seq_lengths in eval_dataloader: - i = i + 1 - print("[info] file", "===", i) - input_ids_np = input_ids.numpy() - input_mask_np = input_mask.numpy() - segment_ids_np = segment_ids.numpy() - if args.inference_tool == "msame": - path1 = bin_path + "/input_ids" - path2 = bin_path + "/segment_ids" - path3 = bin_path + "/input_mask" - input_ids_np.tofile(os.path.join(path1, "input" + str(i) + '.bin')) - segment_ids_np.tofile(os.path.join(path2, "input" + str(i) + '.bin')) - input_mask_np.tofile(os.path.join(path3, "input" + str(i) + '.bin')) - elif args.inference_tool == "benchmark": - input_ids_np.tofile(os.path.join(bin_path, "input_ids_" + str(i) + '.bin')) - segment_ids_np.tofile(os.path.join(bin_path, "segment_ids_" + str(i) + '.bin')) - input_mask_np.tofile(os.path.join(bin_path, "input_mask_" + str(i) + '.bin')) - -if __name__ == "__main__": +# coding=utf-8 +# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Run TinyBERT on SST-2.""" + +from __future__ import absolute_import, division, print_function +import argparse +import os +import sys +import csv +import io +from transformer.tokenization import BertTokenizer +import torch +from torch.utils.data import (DataLoader, SequentialSampler, TensorDataset) + + +class InputExample(object): + """A single training/test example for simple sequence classification.""" + + def __init__(self, guid, text_a, text_b=None, label=None): + """Constructs a InputExample. + + Args: + guid: Unique id for the example. + text_a: string. The untokenized text of the first sequence. For single + sequence tasks, only this sequence must be specified. + text_b: (Optional) string. The untokenized text of the second sequence. + Only must be specified for sequence pair tasks. + label: (Optional) string. The label of the example. This should be + specified for train and dev examples, but not for test examples. + """ + self.guid = guid + self.text_a = text_a + self.text_b = text_b + self.label = label + + +class InputFeatures(object): + """A single set of features of data.""" + + def __init__(self, input_ids, input_mask, segment_ids, label_id, seq_length=None): + self.input_ids = input_ids + self.input_mask = input_mask + self.segment_ids = segment_ids + self.seq_length = seq_length + self.label_id = label_id + + +class DataProcessor(object): + """Base class for data converters for sequence classification data sets.""" + + def get_train_examples(self, data_dir): + """Gets a collection of `InputExample`s for the train set.""" + raise NotImplementedError() + + def get_dev_examples(self, data_dir): + """Gets a collection of `InputExample`s for the dev set.""" + raise NotImplementedError() + + def get_labels(self): + """Gets the list of labels for this data set.""" + raise NotImplementedError() + + @classmethod + def _read_tsv(cls, input_file, quotechar=None): + """Reads a tab separated value file.""" + with io.open(input_file, "r", encoding="utf-8") as f: + reader = csv.reader(f, delimiter="\t", quotechar=quotechar) + lines = [] + for line in reader: + if sys.version_info[0] == 2: + line = list(unicode(cell, 'utf-8') for cell in line) + lines.append(line) + return lines + + +class Sst2Processor(DataProcessor): + """Processor for the SST-2 data set (GLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") + + def get_aug_examples(self, data_dir): + """get the augmented data""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train_aug.tsv")), "aug") + + def get_labels(self): + """See base class.""" + return ["0", "1"] + + def _create_examples(self, lines, set_type): + """Creates examples for the training and dev sets.""" + examples = [] + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "%s-%s" % (set_type, i) + text_a = line[0] + label = line[1] + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) + return examples + + +def _truncate_seq_pair(tokens_a, tokens_b, max_length): + """Truncates a sequence pair in place to the maximum length.""" + while True: + total_length = len(tokens_a) + len(tokens_b) + if total_length <= max_length: + break + if len(tokens_a) > len(tokens_b): + tokens_a.pop() + else: + tokens_b.pop() + + +def convert_examples_to_features(examples, label_list, max_seq_length, + tokenizer, output_mode): + """Loads a data file into a list of `InputBatch`s.""" + + label_map = {label: i for i, label in enumerate(label_list)} + + features = [] + for (ex_index, example) in enumerate(examples): + + tokens_a = tokenizer.tokenize(example.text_a) + + tokens_b = None + if example.text_b: + tokens_b = tokenizer.tokenize(example.text_b) + _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3) + else: + if len(tokens_a) > max_seq_length - 2: + tokens_a = tokens_a[:(max_seq_length - 2)] + + tokens = ["[CLS]"] + tokens_a + ["[SEP]"] + segment_ids = [0] * len(tokens) + + if tokens_b: + tokens += tokens_b + ["[SEP]"] + segment_ids += [1] * (len(tokens_b) + 1) + + input_ids = tokenizer.convert_tokens_to_ids(tokens) + input_mask = [1] * len(input_ids) + seq_length = len(input_ids) + + padding = [0] * (max_seq_length - len(input_ids)) + input_ids += padding + input_mask += padding + segment_ids += padding + + assert len(input_ids) == max_seq_length + assert len(input_mask) == max_seq_length + assert len(segment_ids) == max_seq_length + + if output_mode == "classification": + label_id = label_map[example.label] + elif output_mode == "regression": + label_id = float(example.label) + else: + raise KeyError(output_mode) + + features.append( + InputFeatures(input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + label_id=label_id, + seq_length=seq_length)) + return features + + +def get_tensor_data(output_mode, features): + """get the data""" + if output_mode == "classification": + all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long) + elif output_mode == "regression": + all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.float) + + all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long) + all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) + all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long) + all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long) + tensor_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, + all_label_ids, all_seq_lengths) + return tensor_data, all_label_ids + + +def data_main(): + """preprocess data""" + parser = argparse.ArgumentParser() + parser.add_argument("--model", + default=None, + type=str, + required=True, + help="The student model dir.") + parser.add_argument("--data_dir", + default=None, + type=str, + required=True, + help="The input data dir. Should contain the .tsv files (or other data files) for the task.") + parser.add_argument("--max_seq_length", + default=64, + type=int, + help="The maximum total input sequence length after WordPiece tokenization. \n" + "Sequences longer than this will be truncated, and sequences shorter \n" + "than this will be padded.") + parser.add_argument("--eval_batch_size", + default=1, + type=int, + help="Total batch size for eval.") + parser.add_argument("--do_lower_case", + action='store_true', + help="Set this flag if you are using an uncased model.") + parser.add_argument("--inference_tool", type = str, + help = "inference tool:benchmark or msame") + args = parser.parse_args() + processor = Sst2Processor() + tokenizer = BertTokenizer.from_pretrained(args.model, do_lower_case=args.do_lower_case) # for TinyBERT + + eval_examples = processor.get_dev_examples(args.data_dir) + label_list = ["0", "1"] + eval_features = convert_examples_to_features(eval_examples, label_list, args.max_seq_length, + tokenizer, output_mode = "classification") + + bin_path = "./bert_bin" + output_mode = 'classification' + eval_data, eval_labels = get_tensor_data(output_mode, eval_features) + print("eval_labels") + # Run prediction for full data + eval_sampler = SequentialSampler(eval_data) + eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, drop_last = True, + batch_size = args.eval_batch_size, shuffle = False) + + if not os.path.exists(bin_path): + os.makedirs(bin_path) + i = -1 + for input_ids, input_mask, segment_ids, label_ids, seq_lengths in eval_dataloader: + i = i + 1 + print("[info] file", "===", i) + input_ids_np = input_ids.numpy() + input_mask_np = input_mask.numpy() + segment_ids_np = segment_ids.numpy() + if args.inference_tool == "msame": + path1 = bin_path + "/input_ids" + path2 = bin_path + "/segment_ids" + path3 = bin_path + "/input_mask" + input_ids_np.tofile(os.path.join(path1, "input" + str(i) + '.bin')) + segment_ids_np.tofile(os.path.join(path2, "input" + str(i) + '.bin')) + input_mask_np.tofile(os.path.join(path3, "input" + str(i) + '.bin')) + elif args.inference_tool == "benchmark": + input_ids_np.tofile(os.path.join(bin_path, "input_ids_" + str(i) + '.bin')) + segment_ids_np.tofile(os.path.join(bin_path, "segment_ids_" + str(i) + '.bin')) + input_mask_np.tofile(os.path.join(bin_path, "input_mask_" + str(i) + '.bin')) + +if __name__ == "__main__": data_main() \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/tinybert/modelzoo_level.txt b/ACL_PyTorch/contrib/nlp/tinybert/modelzoo_level.txt index 62ed12347c866db16fef5622af355734787e4ec9..def2f63f1b066cc94e8dd94ec2f0c60d1baa3608 100644 --- a/ACL_PyTorch/contrib/nlp/tinybert/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/nlp/tinybert/modelzoo_level.txt @@ -1,6 +1,6 @@ -ModelConvert:OK -QuantStatus:OK -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +ModelConvert:OK +QuantStatus:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/nlp/tinybert/pth2onnx.py b/ACL_PyTorch/contrib/nlp/tinybert/pth2onnx.py index e30bda0a5dfecfd4664e471849135d0d6f90b8f7..696ebfaa5e8fc7d8e0c5df769d1dfafcf0c6c20d 100644 --- a/ACL_PyTorch/contrib/nlp/tinybert/pth2onnx.py +++ b/ACL_PyTorch/contrib/nlp/tinybert/pth2onnx.py @@ -1,84 +1,84 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import os -import torch -import torch.onnx -import argparse -from transformer.modeling import TinyBertForSequenceClassification - - -def make_input(args): - """make the input data to create a model""" - eval_batch_size = args.eval_batch_size - max_seq_length = args.max_seq_length - org_input_ids = torch.ones(eval_batch_size, max_seq_length).long() - org_token_type_ids = torch.ones(eval_batch_size, max_seq_length).long() - org_input_mask = torch.ones(eval_batch_size, max_seq_length).long() - return (org_input_ids, org_token_type_ids, org_input_mask) - - -def convert(args): - """convert the files into data""" - model = TinyBertForSequenceClassification.from_pretrained(args.input_model, num_labels = 2) - model.eval() - org_input = make_input(args) - input_names = ['input_ids', 'segment_ids', 'input_mask'] - output_names = ['output'] - OPERATOR_EXPORT_TYPE = torch._C._onnx.OperatorExportTypes.ONNX - torch.onnx.export(model, org_input, args.output_file, export_params = True, - input_names=input_names, output_names=output_names, - operator_export_type=OPERATOR_EXPORT_TYPE, - opset_version=11, verbose=True) - - -def main(): - """change the pth files into onnx""" - #set the args list - parser = argparse.ArgumentParser() - parser.add_argument("--input_model", - default=None, - type=str, - required=True, - help="The model(e.g. SST-2 distilled model)dir.") - parser.add_argument("--output_file", - default=None, - type=str, - required=True, - help="The output file of onnx. File name or dir is available.") - parser.add_argument("--data_dir", - default=None, - type=str, - required=True, - help="The input data dir. Should contain the .tsv files (or other data files) for the task.") - parser.add_argument("--max_seq_length", - default=64, - type=int, - help="The maximum total input sequence length after WordPiece tokenization. \n" - "Sequences longer than this will be truncated, and sequences shorter \n" - "than this will be padded.") - parser.add_argument("--eval_batch_size", - default=1, - type=int, - help="Total batch size for eval.") - parser.add_argument("--do_lower_case", - action='store_true', - help="Set this flag if you are using an uncased model.") - args = parser.parse_args() - os.makedirs(os.path.dirname(args.output_file), exist_ok=True) - convert(args) - #add_cast(args) - -if __name__ == "__main__": - main() +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import os +import torch +import torch.onnx +import argparse +from transformer.modeling import TinyBertForSequenceClassification + + +def make_input(args): + """make the input data to create a model""" + eval_batch_size = args.eval_batch_size + max_seq_length = args.max_seq_length + org_input_ids = torch.ones(eval_batch_size, max_seq_length).long() + org_token_type_ids = torch.ones(eval_batch_size, max_seq_length).long() + org_input_mask = torch.ones(eval_batch_size, max_seq_length).long() + return (org_input_ids, org_token_type_ids, org_input_mask) + + +def convert(args): + """convert the files into data""" + model = TinyBertForSequenceClassification.from_pretrained(args.input_model, num_labels = 2) + model.eval() + org_input = make_input(args) + input_names = ['input_ids', 'segment_ids', 'input_mask'] + output_names = ['output'] + OPERATOR_EXPORT_TYPE = torch._C._onnx.OperatorExportTypes.ONNX + torch.onnx.export(model, org_input, args.output_file, export_params = True, + input_names=input_names, output_names=output_names, + operator_export_type=OPERATOR_EXPORT_TYPE, + opset_version=11, verbose=True) + + +def main(): + """change the pth files into onnx""" + #set the args list + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", + default=None, + type=str, + required=True, + help="The model(e.g. SST-2 distilled model)dir.") + parser.add_argument("--output_file", + default=None, + type=str, + required=True, + help="The output file of onnx. File name or dir is available.") + parser.add_argument("--data_dir", + default=None, + type=str, + required=True, + help="The input data dir. Should contain the .tsv files (or other data files) for the task.") + parser.add_argument("--max_seq_length", + default=64, + type=int, + help="The maximum total input sequence length after WordPiece tokenization. \n" + "Sequences longer than this will be truncated, and sequences shorter \n" + "than this will be padded.") + parser.add_argument("--eval_batch_size", + default=1, + type=int, + help="Total batch size for eval.") + parser.add_argument("--do_lower_case", + action='store_true', + help="Set this flag if you are using an uncased model.") + args = parser.parse_args() + os.makedirs(os.path.dirname(args.output_file), exist_ok=True) + convert(args) + #add_cast(args) + +if __name__ == "__main__": + main() diff --git a/ACL_PyTorch/contrib/rl/DQN/env.sh b/ACL_PyTorch/contrib/rl/DQN/env.sh index 7e0a3deaa3ec3524176a38d13cb805463c8ace4d..d9466cab92c1990095d21f6f7bb203013df4014a 100644 --- a/ACL_PyTorch/contrib/rl/DQN/env.sh +++ b/ACL_PyTorch/contrib/rl/DQN/env.sh @@ -1,8 +1,8 @@ -export install_path=/usr/local/Ascend/ascend-toolkit/latest -export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH -export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH -export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH -export ASCEND_OPP_PATH=${install_path}/opp -source /usr/local/Ascend/ascend-toolkit/set_env.sh -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH +export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +source /usr/local/Ascend/ascend-toolkit/set_env.sh +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest export REPEAT_TUNE=True \ No newline at end of file diff --git a/ACL_PyTorch/contrib/rl/DQN/modelzoo_level.txt b/ACL_PyTorch/contrib/rl/DQN/modelzoo_level.txt index 38700fca05402f52c3ae1c4be0889eb60e1f80f1..2e42553460a4f3687654b6ad3f91ab0bcc3aadac 100644 --- a/ACL_PyTorch/contrib/rl/DQN/modelzoo_level.txt +++ b/ACL_PyTorch/contrib/rl/DQN/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:OK \ No newline at end of file diff --git a/ACL_PyTorch/contrib/rl/DQN/test/parse.py b/ACL_PyTorch/contrib/rl/DQN/test/parse.py index 26ec1439e9bb19db3bbe50cb3e17499b350a7ec4..f45e4ba7e1f7c3c72eabea1376080e3dd62a56a0 100644 --- a/ACL_PyTorch/contrib/rl/DQN/test/parse.py +++ b/ACL_PyTorch/contrib/rl/DQN/test/parse.py @@ -1,34 +1,34 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import json -import re - -if __name__ == '__main__': - if sys.argv[1].endswith('.json'): - result_json = sys.argv[1] - with open(result_json, 'r') as f: - content = f.read() - print(content) - elif sys.argv[1].endswith('.txt'): - result_txt = sys.argv[1] - with open(result_txt, 'r') as f: - content = f.read() - - txt_data_list = [i.strip() for i in re.findall(r'=(.*?),', content.replace('\n', ',') + ',')] - print(txt_data_list) - fps = float(txt_data_list[0].replace('samples/s', '')) * 4 - print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import json +import re + +if __name__ == '__main__': + if sys.argv[1].endswith('.json'): + result_json = sys.argv[1] + with open(result_json, 'r') as f: + content = f.read() + print(content) + elif sys.argv[1].endswith('.txt'): + result_txt = sys.argv[1] + with open(result_txt, 'r') as f: + content = f.read() + + txt_data_list = [i.strip() for i in re.findall(r'=(.*?),', content.replace('\n', ',') + ',')] + print(txt_data_list) + fps = float(txt_data_list[0].replace('samples/s', '')) * 4 + print('310 bs{} fps:{}'.format(result_txt.split('_')[3], fps)) + diff --git a/PyTorch/built-in/cv/classification/3D_ResNet_ID0421_for_PyTorch/test/train_full_8p.sh b/PyTorch/built-in/cv/classification/3D_ResNet_ID0421_for_PyTorch/test/train_full_8p.sh index ad8e9d5e2ad6d5d58f9e941889f66d37a22f0513..a176b4a4df0d8accd1cd516937f37e4262f7e4ad 100644 --- a/PyTorch/built-in/cv/classification/3D_ResNet_ID0421_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/built-in/cv/classification/3D_ResNet_ID0421_for_PyTorch/test/train_full_8p.sh @@ -1,201 +1,201 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd`/../ - -#集合通信参数,不需要修改 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="3D_ResNet_ID0421_for_PyTorch" -#训练epoch -train_epochs=200 -#训练batch_size -batch_size=1024 -#训练step -train_steps= -#学习率 -learning_rate=0.08 - -#TF2.X独有,需要模型审视修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False -autotune=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_full_1p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/test/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/test/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/test/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path -##################创建日志输出目录,根据模型审视################## -# 模型采用非循环方式启动多卡训练,创建日志输出目录如下;采用循环方式启动多卡训练的模型,在循环中创建日志输出目录,可参考CRNN模型 -# 非循环方式下8卡训练日志输出路径中的ASCEND_DEVICE_ID默认为0,只是人为指定文件夹名称, 不涉及训练业务 -ASCEND_DEVICE_ID=0 -#创建DeviceID输出目录,不需要修改 -if [ -d ${cur_path}/test/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/test/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt -else - mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt -fi -# 绑核,不需要的绑核的模型删除,需要的模型审视修改 -#let a=RANK_ID*12 -#let b=RANK_ID+1 -#let c=b*12-1 - -#执行训练脚本,以下传参不需要修改,其他需要模型审视修改 -#--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - nohup python3 main.py \ - --video_path ${data_path}/hmdb51_jpg \ - --annotation_path ${data_path}/hmdb51_json/hmdb51_1.json \ - --result_path outputs \ - --dataset hmdb51 \ - --n_classes 51 \ - --n_pretrain_classes 700 \ - --pretrain_path ${data_path}/r3d18_K_200ep.pth \ - --ft_begin_module fc \ - --model resnet \ - --model_depth 18 \ - --batch_size 1024 \ - --n_threads 128 \ - --checkpoint 5 \ - --amp_cfg \ - --n_epochs ${train_epochs} \ - --opt_level O2 \ - --loss_scale_value 1024 \ - --distributed \ - --ngpus_per_node 8 \ - --device_list '0,1,2,3,4,5,6,7' \ - --manual_seed 1234 \ - --learning_rate ${learning_rate} \ - --tensorboard > ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep Fps $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v "\[1/" | grep -v "\[2/" | awk -F "Fps" '{print$2}' | awk '{print$1}' | awk '{sum+=$1} END {print"",sum/NR}'` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - - -python3 main.py \ - --video_path ${data_path}/hmdb51_jpg \ - --annotation_path ${data_path}/hmdb51_json/hmdb51_1.json \ - --result_path outputs \ - --dataset hmdb51 \ - --resume_path outputs/save_200.pth \ - --model_depth 18 \ - --n_classes 51 \ - --n_threads 4 \ - --no_train \ - --no_val \ - --inference \ - --output_topk 5 \ - --inference_batch_size 1 \ - --device_list '0,1,2,3,4,5,6,7' - -python3 -m util_scripts.eval_accuracy ${data_path}/hmdb51_json/hmdb51_1.json outputs/val.json -k 1 --save --ignore >> ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -wait -#输出训练精度,需要模型审视修改 -train_accuracy=`grep "top-1 accuracy:" $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print$3}' ` - -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`echo "${batch_size} ${FPS}" | awk '{printf("%.2f",$1*1000/$2)}' ` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "Fps" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "Loss" '{print$2}' | awk '{print$1}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd`/../ + +#集合通信参数,不需要修改 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="3D_ResNet_ID0421_for_PyTorch" +#训练epoch +train_epochs=200 +#训练batch_size +batch_size=1024 +#训练step +train_steps= +#学习率 +learning_rate=0.08 + +#TF2.X独有,需要模型审视修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +#precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/test/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/test/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/test/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path +##################创建日志输出目录,根据模型审视################## +# 模型采用非循环方式启动多卡训练,创建日志输出目录如下;采用循环方式启动多卡训练的模型,在循环中创建日志输出目录,可参考CRNN模型 +# 非循环方式下8卡训练日志输出路径中的ASCEND_DEVICE_ID默认为0,只是人为指定文件夹名称, 不涉及训练业务 +ASCEND_DEVICE_ID=0 +#创建DeviceID输出目录,不需要修改 +if [ -d ${cur_path}/test/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/test/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt +else + mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt +fi +# 绑核,不需要的绑核的模型删除,需要的模型审视修改 +#let a=RANK_ID*12 +#let b=RANK_ID+1 +#let c=b*12-1 + +#执行训练脚本,以下传参不需要修改,其他需要模型审视修改 +#--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup python3 main.py \ + --video_path ${data_path}/hmdb51_jpg \ + --annotation_path ${data_path}/hmdb51_json/hmdb51_1.json \ + --result_path outputs \ + --dataset hmdb51 \ + --n_classes 51 \ + --n_pretrain_classes 700 \ + --pretrain_path ${data_path}/r3d18_K_200ep.pth \ + --ft_begin_module fc \ + --model resnet \ + --model_depth 18 \ + --batch_size 1024 \ + --n_threads 128 \ + --checkpoint 5 \ + --amp_cfg \ + --n_epochs ${train_epochs} \ + --opt_level O2 \ + --loss_scale_value 1024 \ + --distributed \ + --ngpus_per_node 8 \ + --device_list '0,1,2,3,4,5,6,7' \ + --manual_seed 1234 \ + --learning_rate ${learning_rate} \ + --tensorboard > ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep Fps $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v "\[1/" | grep -v "\[2/" | awk -F "Fps" '{print$2}' | awk '{print$1}' | awk '{sum+=$1} END {print"",sum/NR}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + + +python3 main.py \ + --video_path ${data_path}/hmdb51_jpg \ + --annotation_path ${data_path}/hmdb51_json/hmdb51_1.json \ + --result_path outputs \ + --dataset hmdb51 \ + --resume_path outputs/save_200.pth \ + --model_depth 18 \ + --n_classes 51 \ + --n_threads 4 \ + --no_train \ + --no_val \ + --inference \ + --output_topk 5 \ + --inference_batch_size 1 \ + --device_list '0,1,2,3,4,5,6,7' + +python3 -m util_scripts.eval_accuracy ${data_path}/hmdb51_json/hmdb51_1.json outputs/val.json -k 1 --save --ignore >> ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait +#输出训练精度,需要模型审视修改 +train_accuracy=`grep "top-1 accuracy:" $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print$3}' ` + +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`echo "${batch_size} ${FPS}" | awk '{printf("%.2f",$1*1000/$2)}' ` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "Fps" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "Loss" '{print$2}' | awk '{print$1}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/3D_ResNet_ID0421_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/cv/classification/3D_ResNet_ID0421_for_PyTorch/test/train_performance_8p.sh index d9adadf868b2f04d2258417dc9e4bddafe2ff795..9c53ccd79a36b6acf2205e967caa311cdadf5d97 100644 --- a/PyTorch/built-in/cv/classification/3D_ResNet_ID0421_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/built-in/cv/classification/3D_ResNet_ID0421_for_PyTorch/test/train_performance_8p.sh @@ -1,180 +1,180 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd`/../ - -#集合通信参数,不需要修改 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="3D_ResNet_ID0421_for_PyTorch" -#训练epoch -train_epochs=2 -#训练batch_size -batch_size=1024 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.08 - -#TF2.X独有,需要模型审视修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_performance_1P.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/test/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/test/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/test/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path - -echo "Device ID: $ASCEND_DEVICE_ID" -#export RANK_ID=$RANK_ID - -#创建DeviceID输出目录,不需要修改 -if [ -d ${cur_path}/test/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/test/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt -else - mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt -fi -# 绑核,不需要的绑核的模型删除,需要的模型审视修改 -#let a=RANK_ID*12 -#let b=RANK_ID+1 -#let c=b*12-1 - -#执行训练脚本,以下传参不需要修改,其他需要模型审视修改 -#--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path -nohup python3 main.py \ - --video_path ${data_path}/hmdb51_jpg \ - --annotation_path ${data_path}/hmdb51_json/hmdb51_1.json \ - --result_path outputs \ - --dataset hmdb51 \ - --n_classes 51 \ - --n_pretrain_classes 700 \ - --pretrain_path ${data_path}/r3d18_K_200ep.pth \ - --ft_begin_module fc \ - --model resnet \ - --model_depth 18 \ - --batch_size $batch_size \ - --n_threads 128 \ - --checkpoint 5 \ - --amp_cfg \ - --n_epochs ${train_epochs} \ - --opt_level O2 \ - --loss_scale_value 1024 \ - --distributed \ - --ngpus_per_node 8 \ - --device_list '0,1,2,3,4,5,6,7' \ - --manual_seed 1234 \ - --learning_rate ${learning_rate} \ - --tensorboard > ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep Fps $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v "Epoch: \[1\]"|awk -F "Fps" '{print$2}'|awk '{print $1}'|awk '{sum+=$1} END {print sum/NR}'` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep "Fps" $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|tail -2|head -1|awk '{print $17}'|sed 's/[()]//g' ` -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`echo "${batch_size} ${FPS}" | awk '{printf("%.2f",$1*1000/$2)}' ` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "Fps" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "Loss" '{print$2}' | awk '{print$1}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd`/../ + +#集合通信参数,不需要修改 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="3D_ResNet_ID0421_for_PyTorch" +#训练epoch +train_epochs=2 +#训练batch_size +batch_size=1024 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.08 + +#TF2.X独有,需要模型审视修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +#precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/test/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/test/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/test/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path + +echo "Device ID: $ASCEND_DEVICE_ID" +#export RANK_ID=$RANK_ID + +#创建DeviceID输出目录,不需要修改 +if [ -d ${cur_path}/test/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/test/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt +else + mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt +fi +# 绑核,不需要的绑核的模型删除,需要的模型审视修改 +#let a=RANK_ID*12 +#let b=RANK_ID+1 +#let c=b*12-1 + +#执行训练脚本,以下传参不需要修改,其他需要模型审视修改 +#--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path +nohup python3 main.py \ + --video_path ${data_path}/hmdb51_jpg \ + --annotation_path ${data_path}/hmdb51_json/hmdb51_1.json \ + --result_path outputs \ + --dataset hmdb51 \ + --n_classes 51 \ + --n_pretrain_classes 700 \ + --pretrain_path ${data_path}/r3d18_K_200ep.pth \ + --ft_begin_module fc \ + --model resnet \ + --model_depth 18 \ + --batch_size $batch_size \ + --n_threads 128 \ + --checkpoint 5 \ + --amp_cfg \ + --n_epochs ${train_epochs} \ + --opt_level O2 \ + --loss_scale_value 1024 \ + --distributed \ + --ngpus_per_node 8 \ + --device_list '0,1,2,3,4,5,6,7' \ + --manual_seed 1234 \ + --learning_rate ${learning_rate} \ + --tensorboard > ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep Fps $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v "Epoch: \[1\]"|awk -F "Fps" '{print$2}'|awk '{print $1}'|awk '{sum+=$1} END {print sum/NR}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep "Fps" $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|tail -2|head -1|awk '{print $17}'|sed 's/[()]//g' ` +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`echo "${batch_size} ${FPS}" | awk '{printf("%.2f",$1*1000/$2)}' ` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "Fps" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "Loss" '{print$2}' | awk '{print$1}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/modelzoo_level.txt b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/DeepMar_for_PyTorch/test/train_full_1p.sh b/PyTorch/built-in/cv/classification/DeepMar_for_PyTorch/test/train_full_1p.sh index d8778fcfb7bc744fdf66242c938cd2424b4be0bd..cd9fd128b8cea45dec37d45778fd244787c5e71a 100644 --- a/PyTorch/built-in/cv/classification/DeepMar_for_PyTorch/test/train_full_1p.sh +++ b/PyTorch/built-in/cv/classification/DeepMar_for_PyTorch/test/train_full_1p.sh @@ -1,165 +1,165 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -export ASCEND_SLOG_PRINT_TO_STDOUT=0 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="DeepMar_ID0096_for_PyTorch" -#训练epoch -train_epochs=4 -#训练batch_size -batch_size=256 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - - - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_full_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --ci_cp* ]];then - ci_cp=`echo ${para#*=}` - fi -done - -if [[ $ci_cp == "1" ]];then - cp -r $data_path ${data_path}_bak -fi - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -# 绑核,不需要的绑核的模型删除,需要模型审视修改 -python3.7 ${cur_path}/../transform_peta.py \ - --save_dir=$data_path \ - --traintest_split_file=$data_path/peta_partition.pkl - -nohup python3.7 ${cur_path}/../train_deepmar_resnet50.py \ - --save_dir=$data_path \ - --workers=32 \ - --npu=$ASCEND_DEVICE_ID \ - --batch_size=$batch_size \ - --new_params_lr=0.01 \ - --finetuned_params_lr=0.01 \ - --total_epochs=100 \ - --steps_per_log=1 \ - --loss_scale 512 \ - --amp \ - --opt_level O2 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v loss|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a 'Acc' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Acc" '{print $2}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#打印,不需要修改 -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Step ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'loss:' '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -if [[ $ci_cp == "1" ]];then - rm -rf $data_path - mv ${data_path}_bak $data_path +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +export ASCEND_SLOG_PRINT_TO_STDOUT=0 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="DeepMar_ID0096_for_PyTorch" +#训练epoch +train_epochs=4 +#训练batch_size +batch_size=256 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_full_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --ci_cp* ]];then + ci_cp=`echo ${para#*=}` + fi +done + +if [[ $ci_cp == "1" ]];then + cp -r $data_path ${data_path}_bak +fi + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +# 绑核,不需要的绑核的模型删除,需要模型审视修改 +python3.7 ${cur_path}/../transform_peta.py \ + --save_dir=$data_path \ + --traintest_split_file=$data_path/peta_partition.pkl + +nohup python3.7 ${cur_path}/../train_deepmar_resnet50.py \ + --save_dir=$data_path \ + --workers=32 \ + --npu=$ASCEND_DEVICE_ID \ + --batch_size=$batch_size \ + --new_params_lr=0.01 \ + --finetuned_params_lr=0.01 \ + --total_epochs=100 \ + --steps_per_log=1 \ + --loss_scale 512 \ + --amp \ + --opt_level O2 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v loss|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a 'Acc' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Acc" '{print $2}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Step ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'loss:' '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +if [[ $ci_cp == "1" ]];then + rm -rf $data_path + mv ${data_path}_bak $data_path fi \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/DeepMar_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/cv/classification/DeepMar_for_PyTorch/test/train_performance_1p.sh index 59d2e6627bf745afb8d0de414a10393562bc5b42..830ba688c934dd992fac6a1b36e99547df9a14d0 100644 --- a/PyTorch/built-in/cv/classification/DeepMar_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/built-in/cv/classification/DeepMar_for_PyTorch/test/train_performance_1p.sh @@ -1,162 +1,162 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -export ASCEND_SLOG_PRINT_TO_STDOUT=0 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="DeepMar_ID0096_for_PyTorch" -#训练epoch -train_epochs=4 -#训练batch_size -batch_size=256 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - - - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --ci_cp* ]];then - ci_cp=`echo ${para#*=}` - fi -done - -if [[ $ci_cp == "1" ]];then - cp -r $data_path ${data_path}_bak -fi - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -# 绑核,不需要的绑核的模型删除,需要模型审视修改 -python3.7 ${cur_path}/../transform_peta.py \ - --save_dir=$data_path \ - --traintest_split_file=$data_path/peta_partition.pkl - -nohup python3.7 ${cur_path}/../train_deepmar_resnet50.py \ - --save_dir=$data_path \ - --workers=32 \ - --npu=$ASCEND_DEVICE_ID \ - --batch_size=$batch_size \ - --new_params_lr=0.01 \ - --finetuned_params_lr=0.01 \ - --total_epochs=10 \ - --steps_per_log=1 \ - --loss_scale 512 \ - --amp \ - --opt_level O2 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v loss|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#打印,不需要修改 -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Step ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'loss:' '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log - -if [[ $ci_cp == "1" ]];then - rm -rf $data_path - mv ${data_path}_bak $data_path +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +export ASCEND_SLOG_PRINT_TO_STDOUT=0 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="DeepMar_ID0096_for_PyTorch" +#训练epoch +train_epochs=4 +#训练batch_size +batch_size=256 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --ci_cp* ]];then + ci_cp=`echo ${para#*=}` + fi +done + +if [[ $ci_cp == "1" ]];then + cp -r $data_path ${data_path}_bak +fi + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +# 绑核,不需要的绑核的模型删除,需要模型审视修改 +python3.7 ${cur_path}/../transform_peta.py \ + --save_dir=$data_path \ + --traintest_split_file=$data_path/peta_partition.pkl + +nohup python3.7 ${cur_path}/../train_deepmar_resnet50.py \ + --save_dir=$data_path \ + --workers=32 \ + --npu=$ASCEND_DEVICE_ID \ + --batch_size=$batch_size \ + --new_params_lr=0.01 \ + --finetuned_params_lr=0.01 \ + --total_epochs=10 \ + --steps_per_log=1 \ + --loss_scale 512 \ + --amp \ + --opt_level O2 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v loss|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Step ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'loss:' '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log + +if [[ $ci_cp == "1" ]];then + rm -rf $data_path + mv ${data_path}_bak $data_path fi \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/DeepMar_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/cv/classification/DeepMar_for_PyTorch/test/train_performance_8p.sh index 9b89fbe3ebd5baa0e0176f385b492d63f3292d89..ecfa71f84b1175c2910435a2b79b43d7b9f31e57 100644 --- a/PyTorch/built-in/cv/classification/DeepMar_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/built-in/cv/classification/DeepMar_for_PyTorch/test/train_performance_8p.sh @@ -1,175 +1,175 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -export ASCEND_SLOG_PRINT_TO_STDOUT=0 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="DeepMar_ID0096_for_PyTorch" -#训练epoch -train_epochs=4 -#训练batch_size -batch_size=2048 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - - - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --ci_cp* ]];then - ci_cp=`echo ${para#*=}` - fi -done - -if [[ $ci_cp == "1" ]];then - cp -r $data_path ${data_path}_bak -fi - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -# 绑核,不需要的绑核的模型删除,需要模型审视修改 -python3.7 ${cur_path}/../transform_peta.py \ - --save_dir=$data_path \ - --traintest_split_file=$data_path/peta_partition.pkl - -corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` -let a=RANK_ID*${corenum}/${RANK_SIZE} -let b=RANK_ID+1 -let c=b*${corenum}/${RANK_SIZE}-1 -nohup taskset -c $a-$c python3.7 ${cur_path}/../train_deepmar_resnet50_8p.py \ - --addr=$(hostname -I |awk '{print $1}') \ - --save_dir=$data_path \ - --exp_dir=$cur_path/output/$ASCEND_DEVICE_ID/ \ - --workers=64 \ - --batch_size=2048 \ - --new_params_lr=0.016 \ - --finetuned_params_lr=0.016 \ - --total_epochs=$train_epochs \ - --steps_per_log=1 \ - --loss_scale 512 \ - --amp \ - --opt_level O2 \ - --dist_url 'tcp://127.0.0.1:50000' \ - --dist_backend 'hccl' \ - --multiprocessing_distributed \ - --world_size 1 \ - --rank 0 \ - --epochs_per_val 40 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -#FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` -FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep npu|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#打印,不需要修改 -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Step ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'loss:' '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log - -if [[ $ci_cp == "1" ]];then - rm -rf $data_path - mv ${data_path}_bak $data_path +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +export ASCEND_SLOG_PRINT_TO_STDOUT=0 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="DeepMar_ID0096_for_PyTorch" +#训练epoch +train_epochs=4 +#训练batch_size +batch_size=2048 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --ci_cp* ]];then + ci_cp=`echo ${para#*=}` + fi +done + +if [[ $ci_cp == "1" ]];then + cp -r $data_path ${data_path}_bak +fi + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +# 绑核,不需要的绑核的模型删除,需要模型审视修改 +python3.7 ${cur_path}/../transform_peta.py \ + --save_dir=$data_path \ + --traintest_split_file=$data_path/peta_partition.pkl + +corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` +let a=RANK_ID*${corenum}/${RANK_SIZE} +let b=RANK_ID+1 +let c=b*${corenum}/${RANK_SIZE}-1 +nohup taskset -c $a-$c python3.7 ${cur_path}/../train_deepmar_resnet50_8p.py \ + --addr=$(hostname -I |awk '{print $1}') \ + --save_dir=$data_path \ + --exp_dir=$cur_path/output/$ASCEND_DEVICE_ID/ \ + --workers=64 \ + --batch_size=2048 \ + --new_params_lr=0.016 \ + --finetuned_params_lr=0.016 \ + --total_epochs=$train_epochs \ + --steps_per_log=1 \ + --loss_scale 512 \ + --amp \ + --opt_level O2 \ + --dist_url 'tcp://127.0.0.1:50000' \ + --dist_backend 'hccl' \ + --multiprocessing_distributed \ + --world_size 1 \ + --rank 0 \ + --epochs_per_val 40 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +#FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` +FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep npu|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Step ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'loss:' '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log + +if [[ $ci_cp == "1" ]];then + rm -rf $data_path + mv ${data_path}_bak $data_path fi \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/convert/densenet121_pt_aipp.config b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/convert/densenet121_pt_aipp.config index daf3d557a6a8febc3bf44c862740110d711222e8..17cc1daafa93bf51c0b6fe2fecf87814ad5488c4 100644 --- a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/convert/densenet121_pt_aipp.config +++ b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/convert/densenet121_pt_aipp.config @@ -1,16 +1,16 @@ -aipp_op{ - aipp_mode:static - input_format : RGB888_U8 - csc_switch : false - rbuv_swap_switch : true - - mean_chn_0: 121 - mean_chn_1: 115 - mean_chn_2: 100 - min_chn_0 : 0.0 - min_chn_1 : 0.0 - min_chn_2 : 0.0 - var_reci_chn_0 : 0.0142857142857143 - var_reci_chn_1 : 0.0147058823529412 - var_reci_chn_2 : 0.0140845070422535 -} +aipp_op{ + aipp_mode:static + input_format : RGB888_U8 + csc_switch : false + rbuv_swap_switch : true + + mean_chn_0: 121 + mean_chn_1: 115 + mean_chn_2: 100 + min_chn_0 : 0.0 + min_chn_1 : 0.0 + min_chn_2 : 0.0 + var_reci_chn_0 : 0.0142857142857143 + var_reci_chn_1 : 0.0147058823529412 + var_reci_chn_2 : 0.0140845070422535 +} diff --git a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/CMakeLists.txt b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/CMakeLists.txt index 95cb9125d47075a86686cfd8b4731d81fa48dc0f..1c3a4f0ea63b574c157b2858dd8239c4e2c1e681 100644 --- a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/CMakeLists.txt +++ b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/CMakeLists.txt @@ -1,49 +1,49 @@ -cmake_minimum_required(VERSION 3.14.0) -project(densenet121) - -set(TARGET densenet121) - -add_definitions(-DENABLE_DVPP_INTERFACE) -add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall) -add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie) - -# Check environment variable -if(NOT DEFINED ENV{ASCEND_HOME}) - message(FATAL_ERROR "please define environment variable:ASCEND_HOME") -endif() -if(NOT DEFINED ENV{ASCEND_VERSION}) - message(WARNING "please define environment variable:ASCEND_VERSION") -endif() -if(NOT DEFINED ENV{ARCH_PATTERN}) - message(WARNING "please define environment variable:ARCH_PATTERN") -endif() -set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include) -set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64) - -set(MXBASE_ROOT_DIR ${PROJECT_SOURCE_DIR}/../../) -set(MXBASE_INC ${MXBASE_ROOT_DIR}/mxbase/include) -set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/dist/lib) -set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/dist/lib/modelpostprocessors) -set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/postprocess/include) -if(DEFINED ENV{MXSDK_OPENSOURCE_DIR}) - set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR}) -else() - set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource/dist) -endif() - -include_directories(${ACL_INC_DIR}) -include_directories(${OPENSOURCE_DIR}/include) -include_directories(${OPENSOURCE_DIR}/include/opencv4) - -include_directories(${MXBASE_INC}) -include_directories(${MXBASE_POST_PROCESS_DIR}) - -link_directories(${ACL_LIB_DIR}) -link_directories(${OPENSOURCE_DIR}/lib) -link_directories(${MXBASE_LIB_DIR}) -link_directories(${MXBASE_POST_LIB_DIR}) - -add_executable(${TARGET} main.cpp Densenet121Classify.cpp) -target_link_libraries(${TARGET} glog cpprest mxbase resnet50postprocess stdc++fs) - -install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/) +cmake_minimum_required(VERSION 3.14.0) +project(densenet121) + +set(TARGET densenet121) + +add_definitions(-DENABLE_DVPP_INTERFACE) +add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall) +add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie) + +# Check environment variable +if(NOT DEFINED ENV{ASCEND_HOME}) + message(FATAL_ERROR "please define environment variable:ASCEND_HOME") +endif() +if(NOT DEFINED ENV{ASCEND_VERSION}) + message(WARNING "please define environment variable:ASCEND_VERSION") +endif() +if(NOT DEFINED ENV{ARCH_PATTERN}) + message(WARNING "please define environment variable:ARCH_PATTERN") +endif() +set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include) +set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64) + +set(MXBASE_ROOT_DIR ${PROJECT_SOURCE_DIR}/../../) +set(MXBASE_INC ${MXBASE_ROOT_DIR}/mxbase/include) +set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/dist/lib) +set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/dist/lib/modelpostprocessors) +set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/postprocess/include) +if(DEFINED ENV{MXSDK_OPENSOURCE_DIR}) + set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR}) +else() + set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource/dist) +endif() + +include_directories(${ACL_INC_DIR}) +include_directories(${OPENSOURCE_DIR}/include) +include_directories(${OPENSOURCE_DIR}/include/opencv4) + +include_directories(${MXBASE_INC}) +include_directories(${MXBASE_POST_PROCESS_DIR}) + +link_directories(${ACL_LIB_DIR}) +link_directories(${OPENSOURCE_DIR}/lib) +link_directories(${MXBASE_LIB_DIR}) +link_directories(${MXBASE_POST_LIB_DIR}) + +add_executable(${TARGET} main.cpp Densenet121Classify.cpp) +target_link_libraries(${TARGET} glog cpprest mxbase resnet50postprocess stdc++fs) + +install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/) diff --git a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/Densenet121Classify.cpp b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/Densenet121Classify.cpp index dcd612db9c1b19f943f8cea68134c249f96e0e05..760265dbc1edf52a4520adc586afe7df2f196be9 100644 --- a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/Densenet121Classify.cpp +++ b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/Densenet121Classify.cpp @@ -1,256 +1,256 @@ -/* - * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include "Densenet121Classify.h" -#include "MxBase/DeviceManager/DeviceManager.h" -#include "MxBase/Log/Log.h" - -using namespace MxBase; -namespace { -const uint32_t YUV_BYTE_NU = 3; -const uint32_t YUV_BYTE_DE = 2; -const uint32_t VPC_H_ALIGN = 2; -} - -APP_ERROR Densenet121Classify::Init(const InitParam &initParam) -{ - deviceId_ = initParam.deviceId; - APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); - if (ret != APP_ERR_OK) { - LogError << "Init devices failed, ret=" << ret << "."; - return ret; - } - ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); - if (ret != APP_ERR_OK) { - LogError << "Set context failed, ret=" << ret << "."; - return ret; - } - dvppWrapper_ = std::make_shared(); - ret = dvppWrapper_->Init(); - if (ret != APP_ERR_OK) { - LogError << "DvppWrapper init failed, ret=" << ret << "."; - return ret; - } - model_ = std::make_shared(); - ret = model_->Init(initParam.modelPath, modelDesc_); - if (ret != APP_ERR_OK) { - LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; - return ret; - } - MxBase::ConfigData configData; - const std::string softmax = initParam.softmax ? "true" : "false"; - const std::string checkTensor = initParam.checkTensor ? "true" : "false"; - - configData.SetJsonValue("CLASS_NUM", std::to_string(initParam.classNum)); - configData.SetJsonValue("TOP_K", std::to_string(initParam.topk)); - configData.SetJsonValue("SOFTMAX", softmax); - configData.SetJsonValue("CHECK_MODEL", checkTensor); - - auto jsonStr = configData.GetCfgJson().serialize(); - std::map> config; - config["postProcessConfigContent"] = std::make_shared(jsonStr); - config["labelPath"] = std::make_shared(initParam.labelPath); - - post_ = std::make_shared(); - ret = post_->Init(config); - if (ret != APP_ERR_OK) { - LogError << "Resnet50PostProcess init failed, ret=" << ret << "."; - return ret; - } - return APP_ERR_OK; -} - -APP_ERROR Densenet121Classify::DeInit() -{ - dvppWrapper_->DeInit(); - model_->DeInit(); - post_->DeInit(); - MxBase::DeviceManager::GetInstance()->DestroyDevices(); - return APP_ERR_OK; -} - -APP_ERROR Densenet121Classify::ReadImage(const std::string &imgPath, MxBase::TensorBase &tensor) -{ - MxBase::DvppDataInfo output = {}; - APP_ERROR ret = dvppWrapper_->DvppJpegDecode(imgPath, output); - if (ret != APP_ERR_OK) { - LogError << "DvppWrapper DvppJpegDecode failed, ret=" << ret << "."; - return ret; - } - MxBase::MemoryData memoryData((void*)output.data, output.dataSize, MemoryData::MemoryType::MEMORY_DVPP, deviceId_); - if (output.heightStride % VPC_H_ALIGN != 0) { - LogError << "Output data height(" << output.heightStride << ") can't be divided by " << VPC_H_ALIGN << "."; - MemoryHelper::MxbsFree(memoryData); - return APP_ERR_COMM_INVALID_PARAM; - } - std::vector shape = {output.heightStride * YUV_BYTE_NU / YUV_BYTE_DE, output.widthStride}; - tensor = TensorBase(memoryData, false, shape, TENSOR_DTYPE_UINT8); - return APP_ERR_OK; -} - -APP_ERROR Densenet121Classify::Resize(const MxBase::TensorBase &inputTensor, MxBase::TensorBase &outputTensor) -{ - auto shape = inputTensor.GetShape(); - MxBase::DvppDataInfo input = {}; - input.height = (uint32_t)shape[0] * YUV_BYTE_DE / YUV_BYTE_NU; - input.width = shape[1]; - input.heightStride = (uint32_t)shape[0] * YUV_BYTE_DE / YUV_BYTE_NU; - input.widthStride = shape[1]; - input.dataSize = inputTensor.GetByteSize(); - input.data = (uint8_t*)inputTensor.GetBuffer(); - const uint32_t resizeHeight = 304; - const uint32_t resizeWidth = 304; - MxBase::ResizeConfig resize = {}; - resize.height = resizeHeight; - resize.width = resizeWidth; - MxBase::DvppDataInfo output = {}; - APP_ERROR ret = dvppWrapper_->VpcResize(input, output, resize); - if (ret != APP_ERR_OK) { - LogError << "VpcResize failed, ret=" << ret << "."; - return ret; - } - MxBase::MemoryData memoryData((void*)output.data, output.dataSize, MemoryData::MemoryType::MEMORY_DVPP, deviceId_); - if (output.heightStride % VPC_H_ALIGN != 0) { - LogError << "Output data height(" << output.heightStride << ") can't be divided by " << VPC_H_ALIGN << "."; - MemoryHelper::MxbsFree(memoryData); - MemoryHelper::MxbsFree(memoryData); - return APP_ERR_COMM_INVALID_PARAM; - } - shape = {output.heightStride * YUV_BYTE_NU / YUV_BYTE_DE, output.widthStride}; - outputTensor = TensorBase(memoryData, false, shape, TENSOR_DTYPE_UINT8); - return APP_ERR_OK; -} - -APP_ERROR Densenet121Classify::Inference(const std::vector &inputs, - std::vector &outputs) -{ - auto dtypes = model_->GetOutputDataType(); - for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) { - std::vector shape = {}; - for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { - shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]); - } - TensorBase tensor(shape, dtypes[i], MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); - APP_ERROR ret = TensorBase::TensorBaseMalloc(tensor); - if (ret != APP_ERR_OK) { - LogError << "TensorBaseMalloc failed, ret=" << ret << "."; - return ret; - } - outputs.push_back(tensor); - } - DynamicInfo dynamicInfo = {}; - dynamicInfo.dynamicType = DynamicType::STATIC_BATCH; - auto startTime = std::chrono::high_resolution_clock::now(); // search for learning - APP_ERROR ret = model_->ModelInference(inputs, outputs, dynamicInfo); - auto endTime = std::chrono::high_resolution_clock::now(); - double costMs = std::chrono::duration(endTime - startTime).count(); - g_inferCost.push_back(costMs); - if (ret != APP_ERR_OK) { - LogError << "ModelInference failed, ret=" << ret << "."; - return ret; - } - return APP_ERR_OK; -} - -APP_ERROR Densenet121Classify::PostProcess(const std::vector &inputs, - std::vector> &clsInfos) -{ - APP_ERROR ret = post_->Process(inputs, clsInfos); - if (ret != APP_ERR_OK) { - LogError << "Process failed, ret=" << ret << "."; - return ret; - } - return APP_ERR_OK; -} - -APP_ERROR Densenet121Classify::GenerateInferResult(const std::string &imgPath, - std::vector> &BatchClsInfos) -{ - uint32_t batchIndex = 0; - LogInfo << "images path: " << imgPath; - std::string fileName = imgPath.substr(imgPath.find_last_of("/") + 1); - size_t dot = fileName.find_last_of("."); - - std::string resultPathName = "result"; - if (access(resultPathName.c_str(), 0) != 0) { - int ret = mkdir(resultPathName.c_str(), S_IRUSR | S_IWUSR | S_IXUSR); - if (ret != 0) { - LogError << "Failed to create result directory: " << resultPathName << ", ret = " << ret; - return ret; - } - } - std::string resFileName = "result/" + fileName.substr(0, dot) + "_1.txt"; - LogInfo << "file path for saving result: " < inputs = {}; - std::vector outputs = {}; - inputs.push_back(resizeImage); - ret = Inference(inputs, outputs); - if (ret != APP_ERR_OK) { - LogError << "Inference failed, ret=" << ret << "."; - return ret; - } - std::vector> BatchClsInfos = {}; - ret = PostProcess(outputs, BatchClsInfos); - if (ret != APP_ERR_OK) { - LogError << "PostProcess failed, ret=" << ret << "."; - return ret; - } - ret = GenerateInferResult(imgPath, BatchClsInfos); - if (ret != APP_ERR_OK) { - LogError << "Generate infer result failed, ret=" << ret << "."; - return ret; - } - - return APP_ERR_OK; -} +/* + * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "Densenet121Classify.h" +#include "MxBase/DeviceManager/DeviceManager.h" +#include "MxBase/Log/Log.h" + +using namespace MxBase; +namespace { +const uint32_t YUV_BYTE_NU = 3; +const uint32_t YUV_BYTE_DE = 2; +const uint32_t VPC_H_ALIGN = 2; +} + +APP_ERROR Densenet121Classify::Init(const InitParam &initParam) +{ + deviceId_ = initParam.deviceId; + APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); + if (ret != APP_ERR_OK) { + LogError << "Init devices failed, ret=" << ret << "."; + return ret; + } + ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); + if (ret != APP_ERR_OK) { + LogError << "Set context failed, ret=" << ret << "."; + return ret; + } + dvppWrapper_ = std::make_shared(); + ret = dvppWrapper_->Init(); + if (ret != APP_ERR_OK) { + LogError << "DvppWrapper init failed, ret=" << ret << "."; + return ret; + } + model_ = std::make_shared(); + ret = model_->Init(initParam.modelPath, modelDesc_); + if (ret != APP_ERR_OK) { + LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; + return ret; + } + MxBase::ConfigData configData; + const std::string softmax = initParam.softmax ? "true" : "false"; + const std::string checkTensor = initParam.checkTensor ? "true" : "false"; + + configData.SetJsonValue("CLASS_NUM", std::to_string(initParam.classNum)); + configData.SetJsonValue("TOP_K", std::to_string(initParam.topk)); + configData.SetJsonValue("SOFTMAX", softmax); + configData.SetJsonValue("CHECK_MODEL", checkTensor); + + auto jsonStr = configData.GetCfgJson().serialize(); + std::map> config; + config["postProcessConfigContent"] = std::make_shared(jsonStr); + config["labelPath"] = std::make_shared(initParam.labelPath); + + post_ = std::make_shared(); + ret = post_->Init(config); + if (ret != APP_ERR_OK) { + LogError << "Resnet50PostProcess init failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR Densenet121Classify::DeInit() +{ + dvppWrapper_->DeInit(); + model_->DeInit(); + post_->DeInit(); + MxBase::DeviceManager::GetInstance()->DestroyDevices(); + return APP_ERR_OK; +} + +APP_ERROR Densenet121Classify::ReadImage(const std::string &imgPath, MxBase::TensorBase &tensor) +{ + MxBase::DvppDataInfo output = {}; + APP_ERROR ret = dvppWrapper_->DvppJpegDecode(imgPath, output); + if (ret != APP_ERR_OK) { + LogError << "DvppWrapper DvppJpegDecode failed, ret=" << ret << "."; + return ret; + } + MxBase::MemoryData memoryData((void*)output.data, output.dataSize, MemoryData::MemoryType::MEMORY_DVPP, deviceId_); + if (output.heightStride % VPC_H_ALIGN != 0) { + LogError << "Output data height(" << output.heightStride << ") can't be divided by " << VPC_H_ALIGN << "."; + MemoryHelper::MxbsFree(memoryData); + return APP_ERR_COMM_INVALID_PARAM; + } + std::vector shape = {output.heightStride * YUV_BYTE_NU / YUV_BYTE_DE, output.widthStride}; + tensor = TensorBase(memoryData, false, shape, TENSOR_DTYPE_UINT8); + return APP_ERR_OK; +} + +APP_ERROR Densenet121Classify::Resize(const MxBase::TensorBase &inputTensor, MxBase::TensorBase &outputTensor) +{ + auto shape = inputTensor.GetShape(); + MxBase::DvppDataInfo input = {}; + input.height = (uint32_t)shape[0] * YUV_BYTE_DE / YUV_BYTE_NU; + input.width = shape[1]; + input.heightStride = (uint32_t)shape[0] * YUV_BYTE_DE / YUV_BYTE_NU; + input.widthStride = shape[1]; + input.dataSize = inputTensor.GetByteSize(); + input.data = (uint8_t*)inputTensor.GetBuffer(); + const uint32_t resizeHeight = 304; + const uint32_t resizeWidth = 304; + MxBase::ResizeConfig resize = {}; + resize.height = resizeHeight; + resize.width = resizeWidth; + MxBase::DvppDataInfo output = {}; + APP_ERROR ret = dvppWrapper_->VpcResize(input, output, resize); + if (ret != APP_ERR_OK) { + LogError << "VpcResize failed, ret=" << ret << "."; + return ret; + } + MxBase::MemoryData memoryData((void*)output.data, output.dataSize, MemoryData::MemoryType::MEMORY_DVPP, deviceId_); + if (output.heightStride % VPC_H_ALIGN != 0) { + LogError << "Output data height(" << output.heightStride << ") can't be divided by " << VPC_H_ALIGN << "."; + MemoryHelper::MxbsFree(memoryData); + MemoryHelper::MxbsFree(memoryData); + return APP_ERR_COMM_INVALID_PARAM; + } + shape = {output.heightStride * YUV_BYTE_NU / YUV_BYTE_DE, output.widthStride}; + outputTensor = TensorBase(memoryData, false, shape, TENSOR_DTYPE_UINT8); + return APP_ERR_OK; +} + +APP_ERROR Densenet121Classify::Inference(const std::vector &inputs, + std::vector &outputs) +{ + auto dtypes = model_->GetOutputDataType(); + for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) { + std::vector shape = {}; + for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { + shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]); + } + TensorBase tensor(shape, dtypes[i], MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); + APP_ERROR ret = TensorBase::TensorBaseMalloc(tensor); + if (ret != APP_ERR_OK) { + LogError << "TensorBaseMalloc failed, ret=" << ret << "."; + return ret; + } + outputs.push_back(tensor); + } + DynamicInfo dynamicInfo = {}; + dynamicInfo.dynamicType = DynamicType::STATIC_BATCH; + auto startTime = std::chrono::high_resolution_clock::now(); // search for learning + APP_ERROR ret = model_->ModelInference(inputs, outputs, dynamicInfo); + auto endTime = std::chrono::high_resolution_clock::now(); + double costMs = std::chrono::duration(endTime - startTime).count(); + g_inferCost.push_back(costMs); + if (ret != APP_ERR_OK) { + LogError << "ModelInference failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR Densenet121Classify::PostProcess(const std::vector &inputs, + std::vector> &clsInfos) +{ + APP_ERROR ret = post_->Process(inputs, clsInfos); + if (ret != APP_ERR_OK) { + LogError << "Process failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR Densenet121Classify::GenerateInferResult(const std::string &imgPath, + std::vector> &BatchClsInfos) +{ + uint32_t batchIndex = 0; + LogInfo << "images path: " << imgPath; + std::string fileName = imgPath.substr(imgPath.find_last_of("/") + 1); + size_t dot = fileName.find_last_of("."); + + std::string resultPathName = "result"; + if (access(resultPathName.c_str(), 0) != 0) { + int ret = mkdir(resultPathName.c_str(), S_IRUSR | S_IWUSR | S_IXUSR); + if (ret != 0) { + LogError << "Failed to create result directory: " << resultPathName << ", ret = " << ret; + return ret; + } + } + std::string resFileName = "result/" + fileName.substr(0, dot) + "_1.txt"; + LogInfo << "file path for saving result: " < inputs = {}; + std::vector outputs = {}; + inputs.push_back(resizeImage); + ret = Inference(inputs, outputs); + if (ret != APP_ERR_OK) { + LogError << "Inference failed, ret=" << ret << "."; + return ret; + } + std::vector> BatchClsInfos = {}; + ret = PostProcess(outputs, BatchClsInfos); + if (ret != APP_ERR_OK) { + LogError << "PostProcess failed, ret=" << ret << "."; + return ret; + } + ret = GenerateInferResult(imgPath, BatchClsInfos); + if (ret != APP_ERR_OK) { + LogError << "Generate infer result failed, ret=" << ret << "."; + return ret; + } + + return APP_ERR_OK; +} diff --git a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/Densenet121Classify.h b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/Densenet121Classify.h index df1c7360e59ea1449d622ace35b9312befaa3fb6..9f36834890ffc87b18b3c5f3fac5dc72cfa424e9 100644 --- a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/Densenet121Classify.h +++ b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/Densenet121Classify.h @@ -1,56 +1,56 @@ -/* - * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MXBASE_DENSENET121CLASSIFY_H -#define MXBASE_DENSENET121CLASSIFY_H - -#include "MxBase/DvppWrapper/DvppWrapper.h" -#include "MxBase/ModelInfer/ModelInferenceProcessor.h" -#include "ClassPostProcessors/Resnet50PostProcess.h" -#include "MxBase/Tensor/TensorContext/TensorContext.h" - -extern std::vector g_inferCost; - -struct InitParam { - uint32_t deviceId; - std::string labelPath; - uint32_t classNum; - uint32_t topk; - bool softmax; - bool checkTensor; - std::string modelPath; -}; - -class Densenet121Classify { -public: - APP_ERROR Init(const InitParam &initParam); - APP_ERROR DeInit(); - APP_ERROR ReadImage(const std::string &imgPath, MxBase::TensorBase &tensor); - APP_ERROR Resize(const MxBase::TensorBase &input, MxBase::TensorBase &output); - APP_ERROR Inference(const std::vector &inputs, std::vector &outputs); - APP_ERROR PostProcess(const std::vector &inputs, - std::vector> &clsInfos); - APP_ERROR GenerateInferResult(const std::string &imgPath, - std::vector> &clsInfos); - APP_ERROR Process(const std::string &imgPath); -private: - std::shared_ptr dvppWrapper_; - std::shared_ptr model_; - std::shared_ptr post_; - MxBase::ModelDesc modelDesc_; - uint32_t deviceId_ = 0; -}; -#endif +/* + * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MXBASE_DENSENET121CLASSIFY_H +#define MXBASE_DENSENET121CLASSIFY_H + +#include "MxBase/DvppWrapper/DvppWrapper.h" +#include "MxBase/ModelInfer/ModelInferenceProcessor.h" +#include "ClassPostProcessors/Resnet50PostProcess.h" +#include "MxBase/Tensor/TensorContext/TensorContext.h" + +extern std::vector g_inferCost; + +struct InitParam { + uint32_t deviceId; + std::string labelPath; + uint32_t classNum; + uint32_t topk; + bool softmax; + bool checkTensor; + std::string modelPath; +}; + +class Densenet121Classify { +public: + APP_ERROR Init(const InitParam &initParam); + APP_ERROR DeInit(); + APP_ERROR ReadImage(const std::string &imgPath, MxBase::TensorBase &tensor); + APP_ERROR Resize(const MxBase::TensorBase &input, MxBase::TensorBase &output); + APP_ERROR Inference(const std::vector &inputs, std::vector &outputs); + APP_ERROR PostProcess(const std::vector &inputs, + std::vector> &clsInfos); + APP_ERROR GenerateInferResult(const std::string &imgPath, + std::vector> &clsInfos); + APP_ERROR Process(const std::string &imgPath); +private: + std::shared_ptr dvppWrapper_; + std::shared_ptr model_; + std::shared_ptr post_; + MxBase::ModelDesc modelDesc_; + uint32_t deviceId_ = 0; +}; +#endif diff --git a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/main.cpp b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/main.cpp index ebf02aa12ea7e8a9f093b93df3e8b509c9f0dcc7..b4c3a9df05518dc3b3e54a91a9e88fc6985ffff4 100644 --- a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/main.cpp +++ b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/mxbase_infer/main.cpp @@ -1,69 +1,69 @@ -/* - * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include "Densenet121Classify.h" -#include "MxBase/Log/Log.h" - -namespace fs = std::experimental::filesystem; -namespace { -const uint32_t CLASS_NUM = 1000; -} -std::vector g_inferCost; - -int main(int argc, char* argv[]) -{ - if (argc <= 3) { - LogWarn << "Please enter model path | image path | label path, such as './densenet121 " - "./models/densenet121_304.om ./imagenet_val/ ./models/imagenet1000_clsidx_to_labels.names"; - return APP_ERR_OK; - } - - InitParam initParam = {}; - initParam.deviceId = 0; - initParam.classNum = CLASS_NUM; - initParam.labelPath = argv[3]; - initParam.topk = 5; - initParam.softmax = false; - initParam.checkTensor = true; - initParam.modelPath = argv[1]; - auto densenet121 = std::make_shared(); - APP_ERROR ret = densenet121->Init(initParam); - if (ret != APP_ERR_OK) { - LogError << "Densenet121Classify init failed, ret=" << ret << "."; - return ret; - } - - std::string imgDir = argv[2]; - for (auto & entry : fs::directory_iterator(imgDir)) { - LogInfo << "read image path " << entry.path(); - ret = densenet121->Process(entry.path()); - if (ret != APP_ERR_OK) { - LogError << "Densenet121Classify process failed, ret=" << ret << "."; - densenet121->DeInit(); - return ret; - } - } - densenet121->DeInit(); - double costSum = 0; - for (unsigned int i = 0; i < g_inferCost.size(); i++) { - costSum += g_inferCost[i]; - } - LogInfo << "Infer images sum " << g_inferCost.size() << ", cost total time: " << costSum << " ms."; - LogInfo << "The throughout: " << g_inferCost.size() * 1000 / costSum << " images/sec."; - return APP_ERR_OK; -} +/* + * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include "Densenet121Classify.h" +#include "MxBase/Log/Log.h" + +namespace fs = std::experimental::filesystem; +namespace { +const uint32_t CLASS_NUM = 1000; +} +std::vector g_inferCost; + +int main(int argc, char* argv[]) +{ + if (argc <= 3) { + LogWarn << "Please enter model path | image path | label path, such as './densenet121 " + "./models/densenet121_304.om ./imagenet_val/ ./models/imagenet1000_clsidx_to_labels.names"; + return APP_ERR_OK; + } + + InitParam initParam = {}; + initParam.deviceId = 0; + initParam.classNum = CLASS_NUM; + initParam.labelPath = argv[3]; + initParam.topk = 5; + initParam.softmax = false; + initParam.checkTensor = true; + initParam.modelPath = argv[1]; + auto densenet121 = std::make_shared(); + APP_ERROR ret = densenet121->Init(initParam); + if (ret != APP_ERR_OK) { + LogError << "Densenet121Classify init failed, ret=" << ret << "."; + return ret; + } + + std::string imgDir = argv[2]; + for (auto & entry : fs::directory_iterator(imgDir)) { + LogInfo << "read image path " << entry.path(); + ret = densenet121->Process(entry.path()); + if (ret != APP_ERR_OK) { + LogError << "Densenet121Classify process failed, ret=" << ret << "."; + densenet121->DeInit(); + return ret; + } + } + densenet121->DeInit(); + double costSum = 0; + for (unsigned int i = 0; i < g_inferCost.size(); i++) { + costSum += g_inferCost[i]; + } + LogInfo << "Infer images sum " << g_inferCost.size() << ", cost total time: " << costSum << " ms."; + LogInfo << "The throughout: " << g_inferCost.size() * 1000 / costSum << " images/sec."; + return APP_ERR_OK; +} diff --git a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/utils/classification_task_metric.py b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/utils/classification_task_metric.py index 9cbd93f2435e4741e1ce2e17cf1041ecf6e0d60b..afc34f0468de4ccdf7764a64d59da63e08846835 100644 --- a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/utils/classification_task_metric.py +++ b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/sdk_infer/utils/classification_task_metric.py @@ -1,187 +1,187 @@ -#coding = utf-8 -#Copyright 2020 Huawei Technologies Co., Ltd -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -#http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - img_name = temp[0].split(".")[0] - img_lab = temp[1] - img_gt_dict[img_name] = img_lab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, cls_ind in enumerate(temp): - data_vec[ind] = np.int(cls_ind) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - res_cnt = 0 - n_labels = "" - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - - ret = load_statistical_predict_result(filepath) - - prediction = ret[0] - n_labels = ret[1] - - gt = img_gt_dict[img_name] - if n_labels == 1000: - real_label = int(gt) - elif n_labels == 1001: - real_label = int(gt) + 1 - else: - real_label = int(gt) - - res_cnt = min(len(prediction), topn) - for i in range(res_cnt): - if str(real_label) == str(int(prediction[i])): - count_hit[i] += 1 - break - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(res_cnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Please enter target file folder | groud truth file | result folder | result json file name, such as" - "Such as: python3.7 classfication_task_metric.py result/ ./val_label.txt . ./result.json") - exit(1) - - if not os.path.exists(folder_davinci_target): - print("target file folder does not exist.") - exit() - - if not os.path.exists(annotation_file_path): - - print("Ground truth file does not exist.") - exit() - - if not os.path.exists(result_json_path): - print("Result folder doesn't exist.") - exit() - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) +#coding = utf-8 +#Copyright 2020 Huawei Technologies Co., Ltd +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + img_name = temp[0].split(".")[0] + img_lab = temp[1] + img_gt_dict[img_name] = img_lab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, cls_ind in enumerate(temp): + data_vec[ind] = np.int(cls_ind) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + res_cnt = 0 + n_labels = "" + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + + ret = load_statistical_predict_result(filepath) + + prediction = ret[0] + n_labels = ret[1] + + gt = img_gt_dict[img_name] + if n_labels == 1000: + real_label = int(gt) + elif n_labels == 1001: + real_label = int(gt) + 1 + else: + real_label = int(gt) + + res_cnt = min(len(prediction), topn) + for i in range(res_cnt): + if str(real_label) == str(int(prediction[i])): + count_hit[i] += 1 + break + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(res_cnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Please enter target file folder | groud truth file | result folder | result json file name, such as" + "Such as: python3.7 classfication_task_metric.py result/ ./val_label.txt . ./result.json") + exit(1) + + if not os.path.exists(folder_davinci_target): + print("target file folder does not exist.") + exit() + + if not os.path.exists(annotation_file_path): + + print("Ground truth file does not exist.") + exit() + + if not os.path.exists(result_json_path): + print("Result folder doesn't exist.") + exit() + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) diff --git a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_full_1p.sh b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_full_1p.sh index aee8cd7b4f3999c9500499b0f4a6faea568919bf..d8e17831e9f331eba35f7947813b91c21e258901 100644 --- a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_full_1p.sh +++ b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_full_1p.sh @@ -1,153 +1,153 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="Densenet121_ID0092_for_PyTorch" -#训练epoch -train_epochs=90 -#训练batch_size -batch_size=256 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - -#修改参数 -#sed -i "s|pass|break|g" ${cur_path}/../densenet121_1p_main.py -wait -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#训练 -nohup python3.7 ${cur_path}/../densenet121_1p_main.py \ - --workers 40 \ - --arch densenet121 \ - --npu $ASCEND_DEVICE_ID \ - --lr 0.1 \ - --momentum 0.9 \ - --amp \ - --print-freq 1 \ - --eval-freq 5 \ - --batch-size $batch_size \ - --epochs $train_epochs \ - --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) -#参数改回 -#sed -i "s|break|pass|g" ${cur_path}/../densenet121_1p_main.py -wait -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -#FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` -FPS=`grep FPS $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F 'FPS@all' '{print $2}'|awk '{sum+=$1} END{print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Acc@1" '{print $2}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` -` - -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Densenet121_ID0092_for_PyTorch" +#训练epoch +train_epochs=90 +#训练batch_size +batch_size=256 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + +#修改参数 +#sed -i "s|pass|break|g" ${cur_path}/../densenet121_1p_main.py +wait +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#训练 +nohup python3.7 ${cur_path}/../densenet121_1p_main.py \ + --workers 40 \ + --arch densenet121 \ + --npu $ASCEND_DEVICE_ID \ + --lr 0.1 \ + --momentum 0.9 \ + --amp \ + --print-freq 1 \ + --eval-freq 5 \ + --batch-size $batch_size \ + --epochs $train_epochs \ + --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +#参数改回 +#sed -i "s|break|pass|g" ${cur_path}/../densenet121_1p_main.py +wait +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +#FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` +FPS=`grep FPS $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F 'FPS@all' '{print $2}'|awk '{sum+=$1} END{print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Acc@1" '{print $2}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` +` + +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_full_8p.sh b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_full_8p.sh index 38d4c3f58fe7412db19b67f3c77cb417d1933231..bd734bf36dad285dc8ced656654791411a8f0d9f 100644 --- a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_full_8p.sh @@ -1,168 +1,168 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - - -#集合通信参数,不需要修改 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -#export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="Densenet121_ID0092_for_PyTorch" -#训练epoch -train_epochs=90 -#训练batch_size -batch_size=2048 -#训练step -train_steps= -#学习率 -learning_rate= - - - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -##################创建日志输出目录,根据模型审视################## -# 模型采用非循环方式启动多卡训练,创建日志输出目录如下;采用循环方式启动多卡训练的模型,在循环中创建日志输出目录,可参考CRNN模型 -# 非循环方式下8卡训练日志输出路径中的ASCEND_DEVICE_ID默认为0,只是人为指定文件夹名称, 不涉及训练业务 -ASCEND_DEVICE_ID=0 -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi - - - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -# 绑核,不需要的绑核的模型删除,需要模型审视修改 -#let a=RANK_ID*${corenum}/${RANK_SIZE} -#let b=RANK_ID+1 -#let c=b*${corenum}/${RANK_SIZE}-1 - -#执行训练脚本,以下传参不需要修改,其他需要模型审视修改 -#--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path -for i in $(seq 0 7) -do - nohup python3.7 ${cur_path}/../densenet121_8p_main.py \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed 49 \ - --workers 160 \ - --arch densenet121 \ - --lr 0.8 \ - --print-freq 1 \ - --eval-freq 5 \ - --batch-size 2048 \ - --epoch 90 \ - --dist-url 'tcp://127.0.0.1:50000' \ - --dist-backend 'hccl' \ - --multiprocessing-distributed \ - --world-size 1 \ - --rank 0 \ - --gpu $i \ - --device-list '0,1,2,3,4,5,6,7' \ - --amp \ - --benchmark 0 \ - --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -done -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` -#FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + + +#集合通信参数,不需要修改 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +#export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Densenet121_ID0092_for_PyTorch" +#训练epoch +train_epochs=90 +#训练batch_size +batch_size=2048 +#训练step +train_steps= +#学习率 +learning_rate= + + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +##################创建日志输出目录,根据模型审视################## +# 模型采用非循环方式启动多卡训练,创建日志输出目录如下;采用循环方式启动多卡训练的模型,在循环中创建日志输出目录,可参考CRNN模型 +# 非循环方式下8卡训练日志输出路径中的ASCEND_DEVICE_ID默认为0,只是人为指定文件夹名称, 不涉及训练业务 +ASCEND_DEVICE_ID=0 +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi + + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +# 绑核,不需要的绑核的模型删除,需要模型审视修改 +#let a=RANK_ID*${corenum}/${RANK_SIZE} +#let b=RANK_ID+1 +#let c=b*${corenum}/${RANK_SIZE}-1 + +#执行训练脚本,以下传参不需要修改,其他需要模型审视修改 +#--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path +for i in $(seq 0 7) +do + nohup python3.7 ${cur_path}/../densenet121_8p_main.py \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed 49 \ + --workers 160 \ + --arch densenet121 \ + --lr 0.8 \ + --print-freq 1 \ + --eval-freq 5 \ + --batch-size 2048 \ + --epoch 90 \ + --dist-url 'tcp://127.0.0.1:50000' \ + --dist-backend 'hccl' \ + --multiprocessing-distributed \ + --world-size 1 \ + --rank 0 \ + --gpu $i \ + --device-list '0,1,2,3,4,5,6,7' \ + --amp \ + --benchmark 0 \ + --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` +#FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_performance_1p.sh index 12c3708f5914e39cc09138e3081136577b9b262f..ed6cd11f6c8e86640db8eee38ac7a5f36e1cdf06 100644 --- a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_performance_1p.sh @@ -1,152 +1,152 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="Densenet121_ID0092_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=256 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - -#修改参数 -sed -i "s|pass|break|g" ${cur_path}/../densenet121_1p_main.py -wait -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#训练 -nohup python3.7 ${cur_path}/../densenet121_1p_main.py \ - --workers 40 \ - --arch densenet121 \ - --npu $ASCEND_DEVICE_ID \ - --lr 0.1 \ - --momentum 0.9 \ - --amp \ - --print-freq 1 \ - --eval-freq 5 \ - --batch-size $batch_size \ - --epochs $train_epochs \ - --stop-step-num 50 \ - --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) -#参数改回 -sed -i "s|break|pass|g" ${cur_path}/../densenet121_1p_main.py -wait -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -#FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` -FPS=`grep FPS $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F 'FPS@all' '{print $2}'|awk '{sum+=$1} END{print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Densenet121_ID0092_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=256 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + +#修改参数 +sed -i "s|pass|break|g" ${cur_path}/../densenet121_1p_main.py +wait +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#训练 +nohup python3.7 ${cur_path}/../densenet121_1p_main.py \ + --workers 40 \ + --arch densenet121 \ + --npu $ASCEND_DEVICE_ID \ + --lr 0.1 \ + --momentum 0.9 \ + --amp \ + --print-freq 1 \ + --eval-freq 5 \ + --batch-size $batch_size \ + --epochs $train_epochs \ + --stop-step-num 50 \ + --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +#参数改回 +sed -i "s|break|pass|g" ${cur_path}/../densenet121_1p_main.py +wait +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +#FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` +FPS=`grep FPS $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F 'FPS@all' '{print $2}'|awk '{sum+=$1} END{print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_performance_8p.sh index 5684bec213731b46d63d682e1eb5e58d33c5300c..e2002aa81ed647eead236d5d2668b9d8768213b0 100644 --- a/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch/test/train_performance_8p.sh @@ -1,172 +1,172 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 - -export SOC_VERSION=Ascend910 -export HCCL_CONNECT_TIMEOUT=600 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP_ETP_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="Densenet121_ID0092_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=2048 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - - - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - -#修改参数 -sed -i "s|pass|break|g" ${cur_path}/../densenet121_8p_main.py -wait -#训练开始时间,不需要修改 -start_time=$(date +%s) - -# 绑核,不需要的绑核的模型删除,需要模型审视修改 -corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` -for i in $(seq 0 7) -do -let p_start=0+24*i -let p_end=23+24*i -nohup taskset -c $p_start-$p_end python3.7 ${cur_path}/../densenet121_8p_main.py \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed 49 \ - --workers 160 \ - --arch densenet121 \ - --lr 0.8 \ - --print-freq 1 \ - --eval-freq 5 \ - --batch-size $batch_size \ - --epochs $train_epochs \ - --dist-url 'tcp://127.0.0.1:50000' \ - --dist-backend 'hccl' \ - --multiprocessing-distributed \ - --world-size 1 \ - --rank 0 \ - --gpu $i \ - --device-list '0,1,2,3,4,5,6,7' \ - --amp \ - --benchmark 0 \ - --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -done -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) -#参数改回 -sed -i "s|break|pass|g" ${cur_path}/../densenet121_8p_main.py -wait -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -#FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` -FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +export SOC_VERSION=Ascend910 +export HCCL_CONNECT_TIMEOUT=600 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP_ETP_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Densenet121_ID0092_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=2048 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + +#修改参数 +sed -i "s|pass|break|g" ${cur_path}/../densenet121_8p_main.py +wait +#训练开始时间,不需要修改 +start_time=$(date +%s) + +# 绑核,不需要的绑核的模型删除,需要模型审视修改 +corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` +for i in $(seq 0 7) +do +let p_start=0+24*i +let p_end=23+24*i +nohup taskset -c $p_start-$p_end python3.7 ${cur_path}/../densenet121_8p_main.py \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed 49 \ + --workers 160 \ + --arch densenet121 \ + --lr 0.8 \ + --print-freq 1 \ + --eval-freq 5 \ + --batch-size $batch_size \ + --epochs $train_epochs \ + --dist-url 'tcp://127.0.0.1:50000' \ + --dist-backend 'hccl' \ + --multiprocessing-distributed \ + --world-size 1 \ + --rank 0 \ + --gpu $i \ + --device-list '0,1,2,3,4,5,6,7' \ + --amp \ + --benchmark 0 \ + --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +#参数改回 +sed -i "s|break|pass|g" ${cur_path}/../densenet121_8p_main.py +wait +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +#FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` +FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/LICENSE b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/LICENSE index 75b52484ea471f882c29e02693b4f02dba175b5e..d645695673349e3947e8e5ae42332d0ac3164cd7 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/LICENSE +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/LICENSE @@ -1,202 +1,202 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/__init__.py b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/__init__.py index 1046fb2b297682ca0c57fa16b23e9070050709a0..d475531251fe2eaa98d1af9430c603f42609e8a9 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/__init__.py +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/__init__.py @@ -1,228 +1,228 @@ -# Apache License -# Version 2.0, January 2004 -# http://www.apache.org/licenses/ -# -# TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION -# -# 1. Definitions. -# -# "License" shall mean the terms and conditions for use, reproduction, -# and distribution as defined by Sections 1 through 9 of this document. -# -# "Licensor" shall mean the copyright owner or entity authorized by -# the copyright owner that is granting the License. -# -# "Legal Entity" shall mean the union of the acting entity and all -# other entities that control, are controlled by, or are under common -# control with that entity. For the purposes of this definition, -# "control" means (i) the power, direct or indirect, to cause the -# direction or management of such entity, whether by contract or -# otherwise, or (ii) ownership of fifty percent (50%) or more of the -# outstanding shares, or (iii) beneficial ownership of such entity. -# -# "You" (or "Your") shall mean an individual or Legal Entity -# exercising permissions granted by this License. -# -# "Source" form shall mean the preferred form for making modifications, -# including but not limited to software source code, documentation -# source, and configuration files. -# -# "Object" form shall mean any form resulting from mechanical -# transformation or translation of a Source form, including but -# not limited to compiled object code, generated documentation, -# and conversions to other media types. -# -# "Work" shall mean the work of authorship, whether in Source or -# Object form, made available under the License, as indicated by a -# copyright notice that is included in or attached to the work -# (an example is provided in the Appendix below). -# -# "Derivative Works" shall mean any work, whether in Source or Object -# form, that is based on (or derived from) the Work and for which the -# editorial revisions, annotations, elaborations, or other modifications -# represent, as a whole, an original work of authorship. For the purposes -# of this License, Derivative Works shall not include works that remain -# separable from, or merely link (or bind by name) to the interfaces of, -# the Work and Derivative Works thereof. -# -# "Contribution" shall mean any work of authorship, including -# the original version of the Work and any modifications or additions -# to that Work or Derivative Works thereof, that is intentionally -# submitted to Licensor for inclusion in the Work by the copyright owner -# or by an individual or Legal Entity authorized to submit on behalf of -# the copyright owner. For the purposes of this definition, "submitted" -# means any form of electronic, verbal, or written communication sent -# to the Licensor or its representatives, including but not limited to -# communication on electronic mailing lists, source code control systems, -# and issue tracking systems that are managed by, or on behalf of, the -# Licensor for the purpose of discussing and improving the Work, but -# excluding communication that is conspicuously marked or otherwise -# designated in writing by the copyright owner as "Not a Contribution." -# -# "Contributor" shall mean Licensor and any individual or Legal Entity -# on behalf of whom a Contribution has been received by Licensor and -# subsequently incorporated within the Work. -# -# 2. Grant of Copyright License. Subject to the terms and conditions of -# this License, each Contributor hereby grants to You a perpetual, -# worldwide, non-exclusive, no-charge, royalty-free, irrevocable -# copyright license to reproduce, prepare Derivative Works of, -# publicly display, publicly perform, sublicense, and distribute the -# Work and such Derivative Works in Source or Object form. -# -# 3. Grant of Patent License. Subject to the terms and conditions of -# this License, each Contributor hereby grants to You a perpetual, -# worldwide, non-exclusive, no-charge, royalty-free, irrevocable -# (except as stated in this section) patent license to make, have made, -# use, offer to sell, sell, import, and otherwise transfer the Work, -# where such license applies only to those patent claims licensable -# by such Contributor that are necessarily infringed by their -# Contribution(s) alone or by combination of their Contribution(s) -# with the Work to which such Contribution(s) was submitted. If You -# institute patent litigation against any entity (including a -# cross-claim or counterclaim in a lawsuit) alleging that the Work -# or a Contribution incorporated within the Work constitutes direct -# or contributory patent infringement, then any patent licenses -# granted to You under this License for that Work shall terminate -# as of the date such litigation is filed. -# -# 4. Redistribution. You may reproduce and distribute copies of the -# Work or Derivative Works thereof in any medium, with or without -# modifications, and in Source or Object form, provided that You -# meet the following conditions: -# -# (a) You must give any other recipients of the Work or -# Derivative Works a copy of this License; and -# -# (b) You must cause any modified files to carry prominent notices -# stating that You changed the files; and -# -# (c) You must retain, in the Source form of any Derivative Works -# that You distribute, all copyright, patent, trademark, and -# attribution notices from the Source form of the Work, -# excluding those notices that do not pertain to any part of -# the Derivative Works; and -# -# (d) If the Work includes a "NOTICE" text file as part of its -# distribution, then any Derivative Works that You distribute must -# include a readable copy of the attribution notices contained -# within such NOTICE file, excluding those notices that do not -# pertain to any part of the Derivative Works, in at least one -# of the following places: within a NOTICE text file distributed -# as part of the Derivative Works; within the Source form or -# documentation, if provided along with the Derivative Works; or, -# within a display generated by the Derivative Works, if and -# wherever such third-party notices normally appear. The contents -# of the NOTICE file are for informational purposes only and -# do not modify the License. You may add Your own attribution -# notices within Derivative Works that You distribute, alongside -# or as an addendum to the NOTICE text from the Work, provided -# that such additional attribution notices cannot be construed -# as modifying the License. -# -# You may add Your own copyright statement to Your modifications and -# may provide additional or different license terms and conditions -# for use, reproduction, or distribution of Your modifications, or -# for any such Derivative Works as a whole, provided Your use, -# reproduction, and distribution of the Work otherwise complies with -# the conditions stated in this License. -# -# 5. Submission of Contributions. Unless You explicitly state otherwise, -# any Contribution intentionally submitted for inclusion in the Work -# by You to the Licensor shall be under the terms and conditions of -# this License, without any additional terms or conditions. -# Notwithstanding the above, nothing herein shall supersede or modify -# the terms of any separate license agreement you may have executed -# with Licensor regarding such Contributions. -# -# 6. Trademarks. This License does not grant permission to use the trade -# names, trademarks, service marks, or product names of the Licensor, -# except as required for reasonable and customary use in describing the -# origin of the Work and reproducing the content of the NOTICE file. -# -# 7. Disclaimer of Warranty. Unless required by applicable law or -# agreed to in writing, Licensor provides the Work (and each -# Contributor provides its Contributions) on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied, including, without limitation, any warranties or conditions -# of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A -# PARTICULAR PURPOSE. You are solely responsible for determining the -# appropriateness of using or redistributing the Work and assume any -# risks associated with Your exercise of permissions under this License. -# -# 8. Limitation of Liability. In no event and under no legal theory, -# whether in tort (including negligence), contract, or otherwise, -# unless required by applicable law (such as deliberate and grossly -# negligent acts) or agreed to in writing, shall any Contributor be -# liable to You for damages, including any direct, indirect, special, -# incidental, or consequential damages of any character arising as a -# result of this License or out of the use or inability to use the -# Work (including but not limited to damages for loss of goodwill, -# work stoppage, computer failure or malfunction, or any and all -# other commercial damages or losses), even if such Contributor -# has been advised of the possibility of such damages. -# -# 9. Accepting Warranty or Additional Liability. While redistributing -# the Work or Derivative Works thereof, You may choose to offer, -# and charge a fee for, acceptance of support, warranty, indemnity, -# or other liability obligations and/or rights consistent with this -# License. However, in accepting such obligations, You may act only -# on Your own behalf and on Your sole responsibility, not on behalf -# of any other Contributor, and only if You agree to indemnify, -# defend, and hold each Contributor harmless for any liability -# incurred by, or claims asserted against, such Contributor by reason -# of your accepting any such warranty or additional liability. -# -# END OF TERMS AND CONDITIONS -# -# APPENDIX: How to apply the Apache License to your work. -# -# To apply the Apache License to your work, attach the following -# boilerplate notice, with the fields enclosed by brackets "[]" -# replaced with your own identifying information. (Don't include -# the brackets!) The text should be enclosed in the appropriate -# comment syntax for the file format. We also recommend that a -# file or class name and description of purpose be included on the -# same "printed page" as the copyright notice for easier -# identification within third-party archives. -# -# Copyright [yyyy] [name of copyright owner] -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__version__ = "0.7.0" -from .model import EfficientNet -from .utils import ( - GlobalParams, - BlockArgs, - BlockDecoder, - efficientnet, - get_model_params, -) -from .auto_augment import rand_augment_transform, augment_and_mix_transform, auto_augment_transform -from .rmsprop_tf import RMSpropTF - +# Apache License +# Version 2.0, January 2004 +# http://www.apache.org/licenses/ +# +# TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +# +# 1. Definitions. +# +# "License" shall mean the terms and conditions for use, reproduction, +# and distribution as defined by Sections 1 through 9 of this document. +# +# "Licensor" shall mean the copyright owner or entity authorized by +# the copyright owner that is granting the License. +# +# "Legal Entity" shall mean the union of the acting entity and all +# other entities that control, are controlled by, or are under common +# control with that entity. For the purposes of this definition, +# "control" means (i) the power, direct or indirect, to cause the +# direction or management of such entity, whether by contract or +# otherwise, or (ii) ownership of fifty percent (50%) or more of the +# outstanding shares, or (iii) beneficial ownership of such entity. +# +# "You" (or "Your") shall mean an individual or Legal Entity +# exercising permissions granted by this License. +# +# "Source" form shall mean the preferred form for making modifications, +# including but not limited to software source code, documentation +# source, and configuration files. +# +# "Object" form shall mean any form resulting from mechanical +# transformation or translation of a Source form, including but +# not limited to compiled object code, generated documentation, +# and conversions to other media types. +# +# "Work" shall mean the work of authorship, whether in Source or +# Object form, made available under the License, as indicated by a +# copyright notice that is included in or attached to the work +# (an example is provided in the Appendix below). +# +# "Derivative Works" shall mean any work, whether in Source or Object +# form, that is based on (or derived from) the Work and for which the +# editorial revisions, annotations, elaborations, or other modifications +# represent, as a whole, an original work of authorship. For the purposes +# of this License, Derivative Works shall not include works that remain +# separable from, or merely link (or bind by name) to the interfaces of, +# the Work and Derivative Works thereof. +# +# "Contribution" shall mean any work of authorship, including +# the original version of the Work and any modifications or additions +# to that Work or Derivative Works thereof, that is intentionally +# submitted to Licensor for inclusion in the Work by the copyright owner +# or by an individual or Legal Entity authorized to submit on behalf of +# the copyright owner. For the purposes of this definition, "submitted" +# means any form of electronic, verbal, or written communication sent +# to the Licensor or its representatives, including but not limited to +# communication on electronic mailing lists, source code control systems, +# and issue tracking systems that are managed by, or on behalf of, the +# Licensor for the purpose of discussing and improving the Work, but +# excluding communication that is conspicuously marked or otherwise +# designated in writing by the copyright owner as "Not a Contribution." +# +# "Contributor" shall mean Licensor and any individual or Legal Entity +# on behalf of whom a Contribution has been received by Licensor and +# subsequently incorporated within the Work. +# +# 2. Grant of Copyright License. Subject to the terms and conditions of +# this License, each Contributor hereby grants to You a perpetual, +# worldwide, non-exclusive, no-charge, royalty-free, irrevocable +# copyright license to reproduce, prepare Derivative Works of, +# publicly display, publicly perform, sublicense, and distribute the +# Work and such Derivative Works in Source or Object form. +# +# 3. Grant of Patent License. Subject to the terms and conditions of +# this License, each Contributor hereby grants to You a perpetual, +# worldwide, non-exclusive, no-charge, royalty-free, irrevocable +# (except as stated in this section) patent license to make, have made, +# use, offer to sell, sell, import, and otherwise transfer the Work, +# where such license applies only to those patent claims licensable +# by such Contributor that are necessarily infringed by their +# Contribution(s) alone or by combination of their Contribution(s) +# with the Work to which such Contribution(s) was submitted. If You +# institute patent litigation against any entity (including a +# cross-claim or counterclaim in a lawsuit) alleging that the Work +# or a Contribution incorporated within the Work constitutes direct +# or contributory patent infringement, then any patent licenses +# granted to You under this License for that Work shall terminate +# as of the date such litigation is filed. +# +# 4. Redistribution. You may reproduce and distribute copies of the +# Work or Derivative Works thereof in any medium, with or without +# modifications, and in Source or Object form, provided that You +# meet the following conditions: +# +# (a) You must give any other recipients of the Work or +# Derivative Works a copy of this License; and +# +# (b) You must cause any modified files to carry prominent notices +# stating that You changed the files; and +# +# (c) You must retain, in the Source form of any Derivative Works +# that You distribute, all copyright, patent, trademark, and +# attribution notices from the Source form of the Work, +# excluding those notices that do not pertain to any part of +# the Derivative Works; and +# +# (d) If the Work includes a "NOTICE" text file as part of its +# distribution, then any Derivative Works that You distribute must +# include a readable copy of the attribution notices contained +# within such NOTICE file, excluding those notices that do not +# pertain to any part of the Derivative Works, in at least one +# of the following places: within a NOTICE text file distributed +# as part of the Derivative Works; within the Source form or +# documentation, if provided along with the Derivative Works; or, +# within a display generated by the Derivative Works, if and +# wherever such third-party notices normally appear. The contents +# of the NOTICE file are for informational purposes only and +# do not modify the License. You may add Your own attribution +# notices within Derivative Works that You distribute, alongside +# or as an addendum to the NOTICE text from the Work, provided +# that such additional attribution notices cannot be construed +# as modifying the License. +# +# You may add Your own copyright statement to Your modifications and +# may provide additional or different license terms and conditions +# for use, reproduction, or distribution of Your modifications, or +# for any such Derivative Works as a whole, provided Your use, +# reproduction, and distribution of the Work otherwise complies with +# the conditions stated in this License. +# +# 5. Submission of Contributions. Unless You explicitly state otherwise, +# any Contribution intentionally submitted for inclusion in the Work +# by You to the Licensor shall be under the terms and conditions of +# this License, without any additional terms or conditions. +# Notwithstanding the above, nothing herein shall supersede or modify +# the terms of any separate license agreement you may have executed +# with Licensor regarding such Contributions. +# +# 6. Trademarks. This License does not grant permission to use the trade +# names, trademarks, service marks, or product names of the Licensor, +# except as required for reasonable and customary use in describing the +# origin of the Work and reproducing the content of the NOTICE file. +# +# 7. Disclaimer of Warranty. Unless required by applicable law or +# agreed to in writing, Licensor provides the Work (and each +# Contributor provides its Contributions) on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied, including, without limitation, any warranties or conditions +# of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +# PARTICULAR PURPOSE. You are solely responsible for determining the +# appropriateness of using or redistributing the Work and assume any +# risks associated with Your exercise of permissions under this License. +# +# 8. Limitation of Liability. In no event and under no legal theory, +# whether in tort (including negligence), contract, or otherwise, +# unless required by applicable law (such as deliberate and grossly +# negligent acts) or agreed to in writing, shall any Contributor be +# liable to You for damages, including any direct, indirect, special, +# incidental, or consequential damages of any character arising as a +# result of this License or out of the use or inability to use the +# Work (including but not limited to damages for loss of goodwill, +# work stoppage, computer failure or malfunction, or any and all +# other commercial damages or losses), even if such Contributor +# has been advised of the possibility of such damages. +# +# 9. Accepting Warranty or Additional Liability. While redistributing +# the Work or Derivative Works thereof, You may choose to offer, +# and charge a fee for, acceptance of support, warranty, indemnity, +# or other liability obligations and/or rights consistent with this +# License. However, in accepting such obligations, You may act only +# on Your own behalf and on Your sole responsibility, not on behalf +# of any other Contributor, and only if You agree to indemnify, +# defend, and hold each Contributor harmless for any liability +# incurred by, or claims asserted against, such Contributor by reason +# of your accepting any such warranty or additional liability. +# +# END OF TERMS AND CONDITIONS +# +# APPENDIX: How to apply the Apache License to your work. +# +# To apply the Apache License to your work, attach the following +# boilerplate notice, with the fields enclosed by brackets "[]" +# replaced with your own identifying information. (Don't include +# the brackets!) The text should be enclosed in the appropriate +# comment syntax for the file format. We also recommend that a +# file or class name and description of purpose be included on the +# same "printed page" as the copyright notice for easier +# identification within third-party archives. +# +# Copyright [yyyy] [name of copyright owner] +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "0.7.0" +from .model import EfficientNet +from .utils import ( + GlobalParams, + BlockArgs, + BlockDecoder, + efficientnet, + get_model_params, +) +from .auto_augment import rand_augment_transform, augment_and_mix_transform, auto_augment_transform +from .rmsprop_tf import RMSpropTF + diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/auto_augment.py b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/auto_augment.py index e063d63e1aeae9df0d05ea5700dfcfd6f0a6b2ef..071c49771c542fba51f958b38dabd0283857df13 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/auto_augment.py +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/auto_augment.py @@ -1,813 +1,813 @@ -# Copyright [yyyy] [name of copyright owner] -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import random -import math -import re -from PIL import Image, ImageOps, ImageEnhance, ImageChops -import PIL -import numpy as np - - -_PIL_VER = tuple([int(x) for x in PIL.__version__.split('.')[:2]]) - -_FILL = (128, 128, 128) - -# This signifies the max integer that the controller RNN could predict for the -# augmentation scheme. -_MAX_LEVEL = 10. - -_HPARAMS_DEFAULT = dict( - translate_const=250, - img_mean=_FILL, -) - -_RANDOM_INTERPOLATION = (Image.BILINEAR, Image.BICUBIC) - - -def _interpolation(kwargs): - interpolation = kwargs.pop('resample', Image.BILINEAR) - if isinstance(interpolation, (list, tuple)): - return random.choice(interpolation) - else: - return interpolation - - -def _check_args_tf(kwargs): - if 'fillcolor' in kwargs and _PIL_VER < (5, 0): - kwargs.pop('fillcolor') - kwargs['resample'] = _interpolation(kwargs) - - -def shear_x(img, factor, **kwargs): - _check_args_tf(kwargs) - return img.transform(img.size, Image.AFFINE, (1, factor, 0, 0, 1, 0), **kwargs) - - -def shear_y(img, factor, **kwargs): - _check_args_tf(kwargs) - return img.transform(img.size, Image.AFFINE, (1, 0, 0, factor, 1, 0), **kwargs) - - -def translate_x_rel(img, pct, **kwargs): - pixels = pct * img.size[0] - _check_args_tf(kwargs) - return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs) - - -def translate_y_rel(img, pct, **kwargs): - pixels = pct * img.size[1] - _check_args_tf(kwargs) - return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs) - - -def translate_x_abs(img, pixels, **kwargs): - _check_args_tf(kwargs) - return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs) - - -def translate_y_abs(img, pixels, **kwargs): - _check_args_tf(kwargs) - return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs) - - -def rotate(img, degrees, **kwargs): - _check_args_tf(kwargs) - if _PIL_VER >= (5, 2): - return img.rotate(degrees, **kwargs) - elif _PIL_VER >= (5, 0): - w, h = img.size - post_trans = (0, 0) - rotn_center = (w / 2.0, h / 2.0) - angle = -math.radians(degrees) - matrix = [ - round(math.cos(angle), 15), - round(math.sin(angle), 15), - 0.0, - round(-math.sin(angle), 15), - round(math.cos(angle), 15), - 0.0, - ] - - def transform(x, y, matrix): - (a, b, c, d, e, f) = matrix - return a * x + b * y + c, d * x + e * y + f - - matrix[2], matrix[5] = transform( - -rotn_center[0] - post_trans[0], -rotn_center[1] - post_trans[1], matrix - ) - matrix[2] += rotn_center[0] - matrix[5] += rotn_center[1] - return img.transform(img.size, Image.AFFINE, matrix, **kwargs) - else: - return img.rotate(degrees, resample=kwargs['resample']) - - -def auto_contrast(img, **__): - return ImageOps.autocontrast(img) - - -def invert(img, **__): - return ImageOps.invert(img) - - -def equalize(img, **__): - return ImageOps.equalize(img) - - -def solarize(img, thresh, **__): - return ImageOps.solarize(img, thresh) - - -def solarize_add(img, add, thresh=128, **__): - lut = [] - for i in range(256): - if i < thresh: - lut.append(min(255, i + add)) - else: - lut.append(i) - if img.mode in ("L", "RGB"): - if img.mode == "RGB" and len(lut) == 256: - lut = lut + lut + lut - return img.point(lut) - else: - return img - - -def posterize(img, bits_to_keep, **__): - if bits_to_keep >= 8: - return img - return ImageOps.posterize(img, bits_to_keep) - - -def contrast(img, factor, **__): - return ImageEnhance.Contrast(img).enhance(factor) - - -def color(img, factor, **__): - return ImageEnhance.Color(img).enhance(factor) - - -def brightness(img, factor, **__): - return ImageEnhance.Brightness(img).enhance(factor) - - -def sharpness(img, factor, **__): - return ImageEnhance.Sharpness(img).enhance(factor) - - -def _randomly_negate(v): - """With 50% prob, negate the value""" - return -v if random.random() > 0.5 else v - - -def _rotate_level_to_arg(level, _hparams): - # range [-30, 30] - level = (level / _MAX_LEVEL) * 30. - level = _randomly_negate(level) - return level, - - -def _enhance_level_to_arg(level, _hparams): - # range [0.1, 1.9] - return (level / _MAX_LEVEL) * 1.8 + 0.1, - - -def _enhance_increasing_level_to_arg(level, _hparams): - # the 'no change' level is 1.0, moving away from that towards 0. or 2.0 increases the enhancement blend - # range [0.1, 1.9] - level = (level / _MAX_LEVEL) * .9 - level = 1.0 + _randomly_negate(level) - return level, - - -def _shear_level_to_arg(level, _hparams): - # range [-0.3, 0.3] - level = (level / _MAX_LEVEL) * 0.3 - level = _randomly_negate(level) - return level, - - -def _translate_abs_level_to_arg(level, hparams): - translate_const = hparams['translate_const'] - level = (level / _MAX_LEVEL) * float(translate_const) - level = _randomly_negate(level) - return level, - - -def _translate_rel_level_to_arg(level, hparams): - # default range [-0.45, 0.45] - translate_pct = hparams.get('translate_pct', 0.45) - level = (level / _MAX_LEVEL) * translate_pct - level = _randomly_negate(level) - return level, - - -def _posterize_level_to_arg(level, _hparams): - # As per Tensorflow TPU EfficientNet impl - # range [0, 4], 'keep 0 up to 4 MSB of original image' - # intensity/severity of augmentation decreases with level - return int((level / _MAX_LEVEL) * 4), - - -def _posterize_increasing_level_to_arg(level, hparams): - # As per Tensorflow models research and UDA impl - # range [4, 0], 'keep 4 down to 0 MSB of original image', - # intensity/severity of augmentation increases with level - return 4 - _posterize_level_to_arg(level, hparams)[0], - - -def _posterize_original_level_to_arg(level, _hparams): - # As per original AutoAugment paper description - # range [4, 8], 'keep 4 up to 8 MSB of image' - # intensity/severity of augmentation decreases with level - return int((level / _MAX_LEVEL) * 4) + 4, - - -def _solarize_level_to_arg(level, _hparams): - # range [0, 256] - # intensity/severity of augmentation decreases with level - return int((level / _MAX_LEVEL) * 256), - - -def _solarize_increasing_level_to_arg(level, _hparams): - # range [0, 256] - # intensity/severity of augmentation increases with level - return 256 - _solarize_level_to_arg(level, _hparams)[0], - - -def _solarize_add_level_to_arg(level, _hparams): - # range [0, 110] - return int((level / _MAX_LEVEL) * 110), - - -LEVEL_TO_ARG = { - 'AutoContrast': None, - 'Equalize': None, - 'Invert': None, - 'Rotate': _rotate_level_to_arg, - # There are several variations of the posterize level scaling in various Tensorflow/Google repositories/papers - 'Posterize': _posterize_level_to_arg, - 'PosterizeIncreasing': _posterize_increasing_level_to_arg, - 'PosterizeOriginal': _posterize_original_level_to_arg, - 'Solarize': _solarize_level_to_arg, - 'SolarizeIncreasing': _solarize_increasing_level_to_arg, - 'SolarizeAdd': _solarize_add_level_to_arg, - 'Color': _enhance_level_to_arg, - 'ColorIncreasing': _enhance_increasing_level_to_arg, - 'Contrast': _enhance_level_to_arg, - 'ContrastIncreasing': _enhance_increasing_level_to_arg, - 'Brightness': _enhance_level_to_arg, - 'BrightnessIncreasing': _enhance_increasing_level_to_arg, - 'Sharpness': _enhance_level_to_arg, - 'SharpnessIncreasing': _enhance_increasing_level_to_arg, - 'ShearX': _shear_level_to_arg, - 'ShearY': _shear_level_to_arg, - 'TranslateX': _translate_abs_level_to_arg, - 'TranslateY': _translate_abs_level_to_arg, - 'TranslateXRel': _translate_rel_level_to_arg, - 'TranslateYRel': _translate_rel_level_to_arg, -} - - -NAME_TO_OP = { - 'AutoContrast': auto_contrast, - 'Equalize': equalize, - 'Invert': invert, - 'Rotate': rotate, - 'Posterize': posterize, - 'PosterizeIncreasing': posterize, - 'PosterizeOriginal': posterize, - 'Solarize': solarize, - 'SolarizeIncreasing': solarize, - 'SolarizeAdd': solarize_add, - 'Color': color, - 'ColorIncreasing': color, - 'Contrast': contrast, - 'ContrastIncreasing': contrast, - 'Brightness': brightness, - 'BrightnessIncreasing': brightness, - 'Sharpness': sharpness, - 'SharpnessIncreasing': sharpness, - 'ShearX': shear_x, - 'ShearY': shear_y, - 'TranslateX': translate_x_abs, - 'TranslateY': translate_y_abs, - 'TranslateXRel': translate_x_rel, - 'TranslateYRel': translate_y_rel, -} - - -class AugmentOp: - - def __init__(self, name, prob=0.5, magnitude=10, hparams=None): - hparams = hparams or _HPARAMS_DEFAULT - self.aug_fn = NAME_TO_OP[name] - self.level_fn = LEVEL_TO_ARG[name] - self.prob = prob - self.magnitude = magnitude - self.hparams = hparams.copy() - self.kwargs = dict( - fillcolor=hparams['img_mean'] if 'img_mean' in hparams else _FILL, - resample=hparams['interpolation'] if 'interpolation' in hparams else _RANDOM_INTERPOLATION, - ) - - # If magnitude_std is > 0, we introduce some randomness - # in the usually fixed policy and sample magnitude from a normal distribution - # with mean `magnitude` and std-dev of `magnitude_std`. - # NOTE This is my own hack, being tested, not in papers or reference impls. - self.magnitude_std = self.hparams.get('magnitude_std', 0) - - def __call__(self, img): - if self.prob < 1.0 and random.random() > self.prob: - return img - magnitude = self.magnitude - if self.magnitude_std and self.magnitude_std > 0: - magnitude = random.gauss(magnitude, self.magnitude_std) - magnitude = min(_MAX_LEVEL, max(0, magnitude)) # clip to valid range - level_args = self.level_fn(magnitude, self.hparams) if self.level_fn is not None else tuple() - return self.aug_fn(img, *level_args, **self.kwargs) - - -def auto_augment_policy_v0(hparams): - # ImageNet v0 policy from TPU EfficientNet impl, cannot find a paper reference. - policy = [ - [('Equalize', 0.8, 1), ('ShearY', 0.8, 4)], - [('Color', 0.4, 9), ('Equalize', 0.6, 3)], - [('Color', 0.4, 1), ('Rotate', 0.6, 8)], - [('Solarize', 0.8, 3), ('Equalize', 0.4, 7)], - [('Solarize', 0.4, 2), ('Solarize', 0.6, 2)], - [('Color', 0.2, 0), ('Equalize', 0.8, 8)], - [('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)], - [('ShearX', 0.2, 9), ('Rotate', 0.6, 8)], - [('Color', 0.6, 1), ('Equalize', 1.0, 2)], - [('Invert', 0.4, 9), ('Rotate', 0.6, 0)], - [('Equalize', 1.0, 9), ('ShearY', 0.6, 3)], - [('Color', 0.4, 7), ('Equalize', 0.6, 0)], - [('Posterize', 0.4, 6), ('AutoContrast', 0.4, 7)], - [('Solarize', 0.6, 8), ('Color', 0.6, 9)], - [('Solarize', 0.2, 4), ('Rotate', 0.8, 9)], - [('Rotate', 1.0, 7), ('TranslateYRel', 0.8, 9)], - [('ShearX', 0.0, 0), ('Solarize', 0.8, 4)], - [('ShearY', 0.8, 0), ('Color', 0.6, 4)], - [('Color', 1.0, 0), ('Rotate', 0.6, 2)], - [('Equalize', 0.8, 4), ('Equalize', 0.0, 8)], - [('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)], - [('ShearY', 0.4, 7), ('SolarizeAdd', 0.6, 7)], - [('Posterize', 0.8, 2), ('Solarize', 0.6, 10)], # This results in black image with Tpu posterize - [('Solarize', 0.6, 8), ('Equalize', 0.6, 1)], - [('Color', 0.8, 6), ('Rotate', 0.4, 5)], - ] - pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy] - return pc - - -def auto_augment_policy_v0r(hparams): - # ImageNet v0 policy from TPU EfficientNet impl, with variation of Posterize used - # in Google research implementation (number of bits discarded increases with magnitude) - policy = [ - [('Equalize', 0.8, 1), ('ShearY', 0.8, 4)], - [('Color', 0.4, 9), ('Equalize', 0.6, 3)], - [('Color', 0.4, 1), ('Rotate', 0.6, 8)], - [('Solarize', 0.8, 3), ('Equalize', 0.4, 7)], - [('Solarize', 0.4, 2), ('Solarize', 0.6, 2)], - [('Color', 0.2, 0), ('Equalize', 0.8, 8)], - [('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)], - [('ShearX', 0.2, 9), ('Rotate', 0.6, 8)], - [('Color', 0.6, 1), ('Equalize', 1.0, 2)], - [('Invert', 0.4, 9), ('Rotate', 0.6, 0)], - [('Equalize', 1.0, 9), ('ShearY', 0.6, 3)], - [('Color', 0.4, 7), ('Equalize', 0.6, 0)], - [('PosterizeIncreasing', 0.4, 6), ('AutoContrast', 0.4, 7)], - [('Solarize', 0.6, 8), ('Color', 0.6, 9)], - [('Solarize', 0.2, 4), ('Rotate', 0.8, 9)], - [('Rotate', 1.0, 7), ('TranslateYRel', 0.8, 9)], - [('ShearX', 0.0, 0), ('Solarize', 0.8, 4)], - [('ShearY', 0.8, 0), ('Color', 0.6, 4)], - [('Color', 1.0, 0), ('Rotate', 0.6, 2)], - [('Equalize', 0.8, 4), ('Equalize', 0.0, 8)], - [('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)], - [('ShearY', 0.4, 7), ('SolarizeAdd', 0.6, 7)], - [('PosterizeIncreasing', 0.8, 2), ('Solarize', 0.6, 10)], - [('Solarize', 0.6, 8), ('Equalize', 0.6, 1)], - [('Color', 0.8, 6), ('Rotate', 0.4, 5)], - ] - pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy] - return pc - - -def auto_augment_policy_original(hparams): - # ImageNet policy from https://arxiv.org/abs/1805.09501 - policy = [ - [('PosterizeOriginal', 0.4, 8), ('Rotate', 0.6, 9)], - [('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)], - [('Equalize', 0.8, 8), ('Equalize', 0.6, 3)], - [('PosterizeOriginal', 0.6, 7), ('PosterizeOriginal', 0.6, 6)], - [('Equalize', 0.4, 7), ('Solarize', 0.2, 4)], - [('Equalize', 0.4, 4), ('Rotate', 0.8, 8)], - [('Solarize', 0.6, 3), ('Equalize', 0.6, 7)], - [('PosterizeOriginal', 0.8, 5), ('Equalize', 1.0, 2)], - [('Rotate', 0.2, 3), ('Solarize', 0.6, 8)], - [('Equalize', 0.6, 8), ('PosterizeOriginal', 0.4, 6)], - [('Rotate', 0.8, 8), ('Color', 0.4, 0)], - [('Rotate', 0.4, 9), ('Equalize', 0.6, 2)], - [('Equalize', 0.0, 7), ('Equalize', 0.8, 8)], - [('Invert', 0.6, 4), ('Equalize', 1.0, 8)], - [('Color', 0.6, 4), ('Contrast', 1.0, 8)], - [('Rotate', 0.8, 8), ('Color', 1.0, 2)], - [('Color', 0.8, 8), ('Solarize', 0.8, 7)], - [('Sharpness', 0.4, 7), ('Invert', 0.6, 8)], - [('ShearX', 0.6, 5), ('Equalize', 1.0, 9)], - [('Color', 0.4, 0), ('Equalize', 0.6, 3)], - [('Equalize', 0.4, 7), ('Solarize', 0.2, 4)], - [('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)], - [('Invert', 0.6, 4), ('Equalize', 1.0, 8)], - [('Color', 0.6, 4), ('Contrast', 1.0, 8)], - [('Equalize', 0.8, 8), ('Equalize', 0.6, 3)], - ] - pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy] - return pc - - -def auto_augment_policy_originalr(hparams): - # ImageNet policy from https://arxiv.org/abs/1805.09501 with research posterize variation - policy = [ - [('PosterizeIncreasing', 0.4, 8), ('Rotate', 0.6, 9)], - [('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)], - [('Equalize', 0.8, 8), ('Equalize', 0.6, 3)], - [('PosterizeIncreasing', 0.6, 7), ('PosterizeIncreasing', 0.6, 6)], - [('Equalize', 0.4, 7), ('Solarize', 0.2, 4)], - [('Equalize', 0.4, 4), ('Rotate', 0.8, 8)], - [('Solarize', 0.6, 3), ('Equalize', 0.6, 7)], - [('PosterizeIncreasing', 0.8, 5), ('Equalize', 1.0, 2)], - [('Rotate', 0.2, 3), ('Solarize', 0.6, 8)], - [('Equalize', 0.6, 8), ('PosterizeIncreasing', 0.4, 6)], - [('Rotate', 0.8, 8), ('Color', 0.4, 0)], - [('Rotate', 0.4, 9), ('Equalize', 0.6, 2)], - [('Equalize', 0.0, 7), ('Equalize', 0.8, 8)], - [('Invert', 0.6, 4), ('Equalize', 1.0, 8)], - [('Color', 0.6, 4), ('Contrast', 1.0, 8)], - [('Rotate', 0.8, 8), ('Color', 1.0, 2)], - [('Color', 0.8, 8), ('Solarize', 0.8, 7)], - [('Sharpness', 0.4, 7), ('Invert', 0.6, 8)], - [('ShearX', 0.6, 5), ('Equalize', 1.0, 9)], - [('Color', 0.4, 0), ('Equalize', 0.6, 3)], - [('Equalize', 0.4, 7), ('Solarize', 0.2, 4)], - [('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)], - [('Invert', 0.6, 4), ('Equalize', 1.0, 8)], - [('Color', 0.6, 4), ('Contrast', 1.0, 8)], - [('Equalize', 0.8, 8), ('Equalize', 0.6, 3)], - ] - pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy] - return pc - - -def auto_augment_policy(name='v0', hparams=None): - hparams = hparams or _HPARAMS_DEFAULT - if name == 'original': - return auto_augment_policy_original(hparams) - elif name == 'originalr': - return auto_augment_policy_originalr(hparams) - elif name == 'v0': - return auto_augment_policy_v0(hparams) - elif name == 'v0r': - return auto_augment_policy_v0r(hparams) - else: - assert False, 'Unknown AA policy (%s)' % name - - -class AutoAugment: - - def __init__(self, policy): - self.policy = policy - - def __call__(self, img): - sub_policy = random.choice(self.policy) - for op in sub_policy: - img = op(img) - return img - - -def auto_augment_transform(config_str, hparams): - """ - Create a AutoAugment transform - - :param config_str: String defining configuration of auto augmentation. Consists of multiple sections separated by - dashes ('-'). The first section defines the AutoAugment policy (one of 'v0', 'v0r', 'original', 'originalr'). - The remaining sections, not order sepecific determine - 'mstd' - float std deviation of magnitude noise applied - Ex 'original-mstd0.5' results in AutoAugment with original policy, magnitude_std 0.5 - - :param hparams: Other hparams (kwargs) for the AutoAugmentation scheme - - :return: A PyTorch compatible Transform - """ - config = config_str.split('-') - policy_name = config[0] - config = config[1:] - for c in config: - cs = re.split(r'(\d.*)', c) - if len(cs) < 2: - continue - key, val = cs[:2] - if key == 'mstd': - # noise param injected via hparams for now - hparams.setdefault('magnitude_std', float(val)) - else: - assert False, 'Unknown AutoAugment config section' - aa_policy = auto_augment_policy(policy_name, hparams=hparams) - return AutoAugment(aa_policy) - - -_RAND_TRANSFORMS = [ - 'AutoContrast', - 'Equalize', - 'Invert', - 'Rotate', - 'Posterize', - 'Solarize', - 'SolarizeAdd', - 'Color', - 'Contrast', - 'Brightness', - 'Sharpness', - 'ShearX', - 'ShearY', - 'TranslateXRel', - 'TranslateYRel', - #'Cutout' # NOTE I've implement this as random erasing separately -] - - -_RAND_INCREASING_TRANSFORMS = [ - 'AutoContrast', - 'Equalize', - 'Invert', - 'Rotate', - 'PosterizeIncreasing', - 'SolarizeIncreasing', - 'SolarizeAdd', - 'ColorIncreasing', - 'ContrastIncreasing', - 'BrightnessIncreasing', - 'SharpnessIncreasing', - 'ShearX', - 'ShearY', - 'TranslateXRel', - 'TranslateYRel', - #'Cutout' # NOTE I've implement this as random erasing separately -] - - - -# These experimental weights are based loosely on the relative improvements mentioned in paper. -# They may not result in increased performance, but could likely be tuned to so. -_RAND_CHOICE_WEIGHTS_0 = { - 'Rotate': 0.3, - 'ShearX': 0.2, - 'ShearY': 0.2, - 'TranslateXRel': 0.1, - 'TranslateYRel': 0.1, - 'Color': .025, - 'Sharpness': 0.025, - 'AutoContrast': 0.025, - 'Solarize': .005, - 'SolarizeAdd': .005, - 'Contrast': .005, - 'Brightness': .005, - 'Equalize': .005, - 'Posterize': 0, - 'Invert': 0, -} - - -def _select_rand_weights(weight_idx=0, transforms=None): - transforms = transforms or _RAND_TRANSFORMS - assert weight_idx == 0 # only one set of weights currently - rand_weights = _RAND_CHOICE_WEIGHTS_0 - probs = [rand_weights[k] for k in transforms] - probs /= np.sum(probs) - return probs - - -def rand_augment_ops(magnitude=10, hparams=None, transforms=None): - hparams = hparams or _HPARAMS_DEFAULT - transforms = transforms or _RAND_TRANSFORMS - return [AugmentOp( - name, prob=0.5, magnitude=magnitude, hparams=hparams) for name in transforms] - - -class RandAugment: - def __init__(self, ops, num_layers=2, choice_weights=None): - self.ops = ops - self.num_layers = num_layers - self.choice_weights = choice_weights - - def __call__(self, img): - # no replacement when using weighted choice - ops = np.random.choice( - self.ops, self.num_layers, replace=self.choice_weights is None, p=self.choice_weights) - for op in ops: - img = op(img) - return img - - -def rand_augment_transform(config_str, hparams): - """ - Create a RandAugment transform - - :param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by - dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining - sections, not order sepecific determine - 'm' - integer magnitude of rand augment - 'n' - integer num layers (number of transform ops selected per image) - 'w' - integer probabiliy weight index (index of a set of weights to influence choice of op) - 'mstd' - float std deviation of magnitude noise applied - 'inc' - integer (bool), use augmentations that increase in severity with magnitude (default: 0) - Ex 'rand-m9-n3-mstd0.5' results in RandAugment with magnitude 9, num_layers 3, magnitude_std 0.5 - 'rand-mstd1-w0' results in magnitude_std 1.0, weights 0, default magnitude of 10 and num_layers 2 - - :param hparams: Other hparams (kwargs) for the RandAugmentation scheme - - :return: A PyTorch compatible Transform - """ - magnitude = _MAX_LEVEL # default to _MAX_LEVEL for magnitude (currently 10) - num_layers = 2 # default to 2 ops per image - weight_idx = None # default to no probability weights for op choice - transforms = _RAND_TRANSFORMS - config = config_str.split('-') - assert config[0] == 'rand' - config = config[1:] - for c in config: - cs = re.split(r'(\d.*)', c) - if len(cs) < 2: - continue - key, val = cs[:2] - if key == 'mstd': - # noise param injected via hparams for now - hparams.setdefault('magnitude_std', float(val)) - elif key == 'inc': - if bool(val): - transforms = _RAND_INCREASING_TRANSFORMS - elif key == 'm': - magnitude = int(val) - elif key == 'n': - num_layers = int(val) - elif key == 'w': - weight_idx = int(val) - else: - assert False, 'Unknown RandAugment config section' - ra_ops = rand_augment_ops(magnitude=magnitude, hparams=hparams, transforms=transforms) - choice_weights = None if weight_idx is None else _select_rand_weights(weight_idx) - return RandAugment(ra_ops, num_layers, choice_weights=choice_weights) - - -_AUGMIX_TRANSFORMS = [ - 'AutoContrast', - 'ColorIncreasing', # not in paper - 'ContrastIncreasing', # not in paper - 'BrightnessIncreasing', # not in paper - 'SharpnessIncreasing', # not in paper - 'Equalize', - 'Rotate', - 'PosterizeIncreasing', - 'SolarizeIncreasing', - 'ShearX', - 'ShearY', - 'TranslateXRel', - 'TranslateYRel', -] - - -def augmix_ops(magnitude=10, hparams=None, transforms=None): - hparams = hparams or _HPARAMS_DEFAULT - transforms = transforms or _AUGMIX_TRANSFORMS - return [AugmentOp( - name, prob=1.0, magnitude=magnitude, hparams=hparams) for name in transforms] - - -class AugMixAugment: - """ AugMix Transform - Adapted and improved from impl here: https://github.com/google-research/augmix/blob/master/imagenet.py - From paper: 'AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty - - https://arxiv.org/abs/1912.02781 - """ - def __init__(self, ops, alpha=1., width=3, depth=-1, blended=False): - self.ops = ops - self.alpha = alpha - self.width = width - self.depth = depth - self.blended = blended # blended mode is faster but not well tested - - def _calc_blended_weights(self, ws, m): - ws = ws * m - cump = 1. - rws = [] - for w in ws[::-1]: - alpha = w / cump - cump *= (1 - alpha) - rws.append(alpha) - return np.array(rws[::-1], dtype=np.float32) - - def _apply_blended(self, img, mixing_weights, m): - # This is my first crack and implementing a slightly faster mixed augmentation. Instead - # of accumulating the mix for each chain in a Numpy array and then blending with original, - # it recomputes the blending coefficients and applies one PIL image blend per chain. - # TODO the results appear in the right ballpark but they differ by more than rounding. - img_orig = img.copy() - ws = self._calc_blended_weights(mixing_weights, m) - for w in ws: - depth = self.depth if self.depth > 0 else np.random.randint(1, 4) - ops = np.random.choice(self.ops, depth, replace=True) - img_aug = img_orig # no ops are in-place, deep copy not necessary - for op in ops: - img_aug = op(img_aug) - img = Image.blend(img, img_aug, w) - return img - - def _apply_basic(self, img, mixing_weights, m): - # This is a literal adaptation of the paper/official implementation without normalizations and - # PIL <-> Numpy conversions between every op. It is still quite CPU compute heavy compared to the - # typical augmentation transforms, could use a GPU / Kornia implementation. - img_shape = img.size[0], img.size[1], len(img.getbands()) - mixed = np.zeros(img_shape, dtype=np.float32) - for mw in mixing_weights: - depth = self.depth if self.depth > 0 else np.random.randint(1, 4) - ops = np.random.choice(self.ops, depth, replace=True) - img_aug = img # no ops are in-place, deep copy not necessary - for op in ops: - img_aug = op(img_aug) - mixed += mw * np.asarray(img_aug, dtype=np.float32) - np.clip(mixed, 0, 255., out=mixed) - mixed = Image.fromarray(mixed.astype(np.uint8)) - return Image.blend(img, mixed, m) - - def __call__(self, img): - mixing_weights = np.float32(np.random.dirichlet([self.alpha] * self.width)) - m = np.float32(np.random.beta(self.alpha, self.alpha)) - if self.blended: - mixed = self._apply_blended(img, mixing_weights, m) - else: - mixed = self._apply_basic(img, mixing_weights, m) - return mixed - - -def augment_and_mix_transform(config_str, hparams): - """ Create AugMix PyTorch transform - - :param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by - dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining - sections, not order sepecific determine - 'm' - integer magnitude (severity) of augmentation mix (default: 3) - 'w' - integer width of augmentation chain (default: 3) - 'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1) - 'b' - integer (bool), blend each branch of chain into end result without a final blend, less CPU (default: 0) - 'mstd' - float std deviation of magnitude noise applied (default: 0) - Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2 - - :param hparams: Other hparams (kwargs) for the Augmentation transforms - - :return: A PyTorch compatible Transform - """ - magnitude = 3 - width = 3 - depth = -1 - alpha = 1. - blended = False - config = config_str.split('-') - assert config[0] == 'augmix' - config = config[1:] - for c in config: - cs = re.split(r'(\d.*)', c) - if len(cs) < 2: - continue - key, val = cs[:2] - if key == 'mstd': - # noise param injected via hparams for now - hparams.setdefault('magnitude_std', float(val)) - elif key == 'm': - magnitude = int(val) - elif key == 'w': - width = int(val) - elif key == 'd': - depth = int(val) - elif key == 'a': - alpha = float(val) - elif key == 'b': - blended = bool(val) - else: - assert False, 'Unknown AugMix config section' - ops = augmix_ops(magnitude=magnitude, hparams=hparams) +# Copyright [yyyy] [name of copyright owner] +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import math +import re +from PIL import Image, ImageOps, ImageEnhance, ImageChops +import PIL +import numpy as np + + +_PIL_VER = tuple([int(x) for x in PIL.__version__.split('.')[:2]]) + +_FILL = (128, 128, 128) + +# This signifies the max integer that the controller RNN could predict for the +# augmentation scheme. +_MAX_LEVEL = 10. + +_HPARAMS_DEFAULT = dict( + translate_const=250, + img_mean=_FILL, +) + +_RANDOM_INTERPOLATION = (Image.BILINEAR, Image.BICUBIC) + + +def _interpolation(kwargs): + interpolation = kwargs.pop('resample', Image.BILINEAR) + if isinstance(interpolation, (list, tuple)): + return random.choice(interpolation) + else: + return interpolation + + +def _check_args_tf(kwargs): + if 'fillcolor' in kwargs and _PIL_VER < (5, 0): + kwargs.pop('fillcolor') + kwargs['resample'] = _interpolation(kwargs) + + +def shear_x(img, factor, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, factor, 0, 0, 1, 0), **kwargs) + + +def shear_y(img, factor, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, 0, factor, 1, 0), **kwargs) + + +def translate_x_rel(img, pct, **kwargs): + pixels = pct * img.size[0] + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs) + + +def translate_y_rel(img, pct, **kwargs): + pixels = pct * img.size[1] + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs) + + +def translate_x_abs(img, pixels, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs) + + +def translate_y_abs(img, pixels, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs) + + +def rotate(img, degrees, **kwargs): + _check_args_tf(kwargs) + if _PIL_VER >= (5, 2): + return img.rotate(degrees, **kwargs) + elif _PIL_VER >= (5, 0): + w, h = img.size + post_trans = (0, 0) + rotn_center = (w / 2.0, h / 2.0) + angle = -math.radians(degrees) + matrix = [ + round(math.cos(angle), 15), + round(math.sin(angle), 15), + 0.0, + round(-math.sin(angle), 15), + round(math.cos(angle), 15), + 0.0, + ] + + def transform(x, y, matrix): + (a, b, c, d, e, f) = matrix + return a * x + b * y + c, d * x + e * y + f + + matrix[2], matrix[5] = transform( + -rotn_center[0] - post_trans[0], -rotn_center[1] - post_trans[1], matrix + ) + matrix[2] += rotn_center[0] + matrix[5] += rotn_center[1] + return img.transform(img.size, Image.AFFINE, matrix, **kwargs) + else: + return img.rotate(degrees, resample=kwargs['resample']) + + +def auto_contrast(img, **__): + return ImageOps.autocontrast(img) + + +def invert(img, **__): + return ImageOps.invert(img) + + +def equalize(img, **__): + return ImageOps.equalize(img) + + +def solarize(img, thresh, **__): + return ImageOps.solarize(img, thresh) + + +def solarize_add(img, add, thresh=128, **__): + lut = [] + for i in range(256): + if i < thresh: + lut.append(min(255, i + add)) + else: + lut.append(i) + if img.mode in ("L", "RGB"): + if img.mode == "RGB" and len(lut) == 256: + lut = lut + lut + lut + return img.point(lut) + else: + return img + + +def posterize(img, bits_to_keep, **__): + if bits_to_keep >= 8: + return img + return ImageOps.posterize(img, bits_to_keep) + + +def contrast(img, factor, **__): + return ImageEnhance.Contrast(img).enhance(factor) + + +def color(img, factor, **__): + return ImageEnhance.Color(img).enhance(factor) + + +def brightness(img, factor, **__): + return ImageEnhance.Brightness(img).enhance(factor) + + +def sharpness(img, factor, **__): + return ImageEnhance.Sharpness(img).enhance(factor) + + +def _randomly_negate(v): + """With 50% prob, negate the value""" + return -v if random.random() > 0.5 else v + + +def _rotate_level_to_arg(level, _hparams): + # range [-30, 30] + level = (level / _MAX_LEVEL) * 30. + level = _randomly_negate(level) + return level, + + +def _enhance_level_to_arg(level, _hparams): + # range [0.1, 1.9] + return (level / _MAX_LEVEL) * 1.8 + 0.1, + + +def _enhance_increasing_level_to_arg(level, _hparams): + # the 'no change' level is 1.0, moving away from that towards 0. or 2.0 increases the enhancement blend + # range [0.1, 1.9] + level = (level / _MAX_LEVEL) * .9 + level = 1.0 + _randomly_negate(level) + return level, + + +def _shear_level_to_arg(level, _hparams): + # range [-0.3, 0.3] + level = (level / _MAX_LEVEL) * 0.3 + level = _randomly_negate(level) + return level, + + +def _translate_abs_level_to_arg(level, hparams): + translate_const = hparams['translate_const'] + level = (level / _MAX_LEVEL) * float(translate_const) + level = _randomly_negate(level) + return level, + + +def _translate_rel_level_to_arg(level, hparams): + # default range [-0.45, 0.45] + translate_pct = hparams.get('translate_pct', 0.45) + level = (level / _MAX_LEVEL) * translate_pct + level = _randomly_negate(level) + return level, + + +def _posterize_level_to_arg(level, _hparams): + # As per Tensorflow TPU EfficientNet impl + # range [0, 4], 'keep 0 up to 4 MSB of original image' + # intensity/severity of augmentation decreases with level + return int((level / _MAX_LEVEL) * 4), + + +def _posterize_increasing_level_to_arg(level, hparams): + # As per Tensorflow models research and UDA impl + # range [4, 0], 'keep 4 down to 0 MSB of original image', + # intensity/severity of augmentation increases with level + return 4 - _posterize_level_to_arg(level, hparams)[0], + + +def _posterize_original_level_to_arg(level, _hparams): + # As per original AutoAugment paper description + # range [4, 8], 'keep 4 up to 8 MSB of image' + # intensity/severity of augmentation decreases with level + return int((level / _MAX_LEVEL) * 4) + 4, + + +def _solarize_level_to_arg(level, _hparams): + # range [0, 256] + # intensity/severity of augmentation decreases with level + return int((level / _MAX_LEVEL) * 256), + + +def _solarize_increasing_level_to_arg(level, _hparams): + # range [0, 256] + # intensity/severity of augmentation increases with level + return 256 - _solarize_level_to_arg(level, _hparams)[0], + + +def _solarize_add_level_to_arg(level, _hparams): + # range [0, 110] + return int((level / _MAX_LEVEL) * 110), + + +LEVEL_TO_ARG = { + 'AutoContrast': None, + 'Equalize': None, + 'Invert': None, + 'Rotate': _rotate_level_to_arg, + # There are several variations of the posterize level scaling in various Tensorflow/Google repositories/papers + 'Posterize': _posterize_level_to_arg, + 'PosterizeIncreasing': _posterize_increasing_level_to_arg, + 'PosterizeOriginal': _posterize_original_level_to_arg, + 'Solarize': _solarize_level_to_arg, + 'SolarizeIncreasing': _solarize_increasing_level_to_arg, + 'SolarizeAdd': _solarize_add_level_to_arg, + 'Color': _enhance_level_to_arg, + 'ColorIncreasing': _enhance_increasing_level_to_arg, + 'Contrast': _enhance_level_to_arg, + 'ContrastIncreasing': _enhance_increasing_level_to_arg, + 'Brightness': _enhance_level_to_arg, + 'BrightnessIncreasing': _enhance_increasing_level_to_arg, + 'Sharpness': _enhance_level_to_arg, + 'SharpnessIncreasing': _enhance_increasing_level_to_arg, + 'ShearX': _shear_level_to_arg, + 'ShearY': _shear_level_to_arg, + 'TranslateX': _translate_abs_level_to_arg, + 'TranslateY': _translate_abs_level_to_arg, + 'TranslateXRel': _translate_rel_level_to_arg, + 'TranslateYRel': _translate_rel_level_to_arg, +} + + +NAME_TO_OP = { + 'AutoContrast': auto_contrast, + 'Equalize': equalize, + 'Invert': invert, + 'Rotate': rotate, + 'Posterize': posterize, + 'PosterizeIncreasing': posterize, + 'PosterizeOriginal': posterize, + 'Solarize': solarize, + 'SolarizeIncreasing': solarize, + 'SolarizeAdd': solarize_add, + 'Color': color, + 'ColorIncreasing': color, + 'Contrast': contrast, + 'ContrastIncreasing': contrast, + 'Brightness': brightness, + 'BrightnessIncreasing': brightness, + 'Sharpness': sharpness, + 'SharpnessIncreasing': sharpness, + 'ShearX': shear_x, + 'ShearY': shear_y, + 'TranslateX': translate_x_abs, + 'TranslateY': translate_y_abs, + 'TranslateXRel': translate_x_rel, + 'TranslateYRel': translate_y_rel, +} + + +class AugmentOp: + + def __init__(self, name, prob=0.5, magnitude=10, hparams=None): + hparams = hparams or _HPARAMS_DEFAULT + self.aug_fn = NAME_TO_OP[name] + self.level_fn = LEVEL_TO_ARG[name] + self.prob = prob + self.magnitude = magnitude + self.hparams = hparams.copy() + self.kwargs = dict( + fillcolor=hparams['img_mean'] if 'img_mean' in hparams else _FILL, + resample=hparams['interpolation'] if 'interpolation' in hparams else _RANDOM_INTERPOLATION, + ) + + # If magnitude_std is > 0, we introduce some randomness + # in the usually fixed policy and sample magnitude from a normal distribution + # with mean `magnitude` and std-dev of `magnitude_std`. + # NOTE This is my own hack, being tested, not in papers or reference impls. + self.magnitude_std = self.hparams.get('magnitude_std', 0) + + def __call__(self, img): + if self.prob < 1.0 and random.random() > self.prob: + return img + magnitude = self.magnitude + if self.magnitude_std and self.magnitude_std > 0: + magnitude = random.gauss(magnitude, self.magnitude_std) + magnitude = min(_MAX_LEVEL, max(0, magnitude)) # clip to valid range + level_args = self.level_fn(magnitude, self.hparams) if self.level_fn is not None else tuple() + return self.aug_fn(img, *level_args, **self.kwargs) + + +def auto_augment_policy_v0(hparams): + # ImageNet v0 policy from TPU EfficientNet impl, cannot find a paper reference. + policy = [ + [('Equalize', 0.8, 1), ('ShearY', 0.8, 4)], + [('Color', 0.4, 9), ('Equalize', 0.6, 3)], + [('Color', 0.4, 1), ('Rotate', 0.6, 8)], + [('Solarize', 0.8, 3), ('Equalize', 0.4, 7)], + [('Solarize', 0.4, 2), ('Solarize', 0.6, 2)], + [('Color', 0.2, 0), ('Equalize', 0.8, 8)], + [('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)], + [('ShearX', 0.2, 9), ('Rotate', 0.6, 8)], + [('Color', 0.6, 1), ('Equalize', 1.0, 2)], + [('Invert', 0.4, 9), ('Rotate', 0.6, 0)], + [('Equalize', 1.0, 9), ('ShearY', 0.6, 3)], + [('Color', 0.4, 7), ('Equalize', 0.6, 0)], + [('Posterize', 0.4, 6), ('AutoContrast', 0.4, 7)], + [('Solarize', 0.6, 8), ('Color', 0.6, 9)], + [('Solarize', 0.2, 4), ('Rotate', 0.8, 9)], + [('Rotate', 1.0, 7), ('TranslateYRel', 0.8, 9)], + [('ShearX', 0.0, 0), ('Solarize', 0.8, 4)], + [('ShearY', 0.8, 0), ('Color', 0.6, 4)], + [('Color', 1.0, 0), ('Rotate', 0.6, 2)], + [('Equalize', 0.8, 4), ('Equalize', 0.0, 8)], + [('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)], + [('ShearY', 0.4, 7), ('SolarizeAdd', 0.6, 7)], + [('Posterize', 0.8, 2), ('Solarize', 0.6, 10)], # This results in black image with Tpu posterize + [('Solarize', 0.6, 8), ('Equalize', 0.6, 1)], + [('Color', 0.8, 6), ('Rotate', 0.4, 5)], + ] + pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy] + return pc + + +def auto_augment_policy_v0r(hparams): + # ImageNet v0 policy from TPU EfficientNet impl, with variation of Posterize used + # in Google research implementation (number of bits discarded increases with magnitude) + policy = [ + [('Equalize', 0.8, 1), ('ShearY', 0.8, 4)], + [('Color', 0.4, 9), ('Equalize', 0.6, 3)], + [('Color', 0.4, 1), ('Rotate', 0.6, 8)], + [('Solarize', 0.8, 3), ('Equalize', 0.4, 7)], + [('Solarize', 0.4, 2), ('Solarize', 0.6, 2)], + [('Color', 0.2, 0), ('Equalize', 0.8, 8)], + [('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)], + [('ShearX', 0.2, 9), ('Rotate', 0.6, 8)], + [('Color', 0.6, 1), ('Equalize', 1.0, 2)], + [('Invert', 0.4, 9), ('Rotate', 0.6, 0)], + [('Equalize', 1.0, 9), ('ShearY', 0.6, 3)], + [('Color', 0.4, 7), ('Equalize', 0.6, 0)], + [('PosterizeIncreasing', 0.4, 6), ('AutoContrast', 0.4, 7)], + [('Solarize', 0.6, 8), ('Color', 0.6, 9)], + [('Solarize', 0.2, 4), ('Rotate', 0.8, 9)], + [('Rotate', 1.0, 7), ('TranslateYRel', 0.8, 9)], + [('ShearX', 0.0, 0), ('Solarize', 0.8, 4)], + [('ShearY', 0.8, 0), ('Color', 0.6, 4)], + [('Color', 1.0, 0), ('Rotate', 0.6, 2)], + [('Equalize', 0.8, 4), ('Equalize', 0.0, 8)], + [('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)], + [('ShearY', 0.4, 7), ('SolarizeAdd', 0.6, 7)], + [('PosterizeIncreasing', 0.8, 2), ('Solarize', 0.6, 10)], + [('Solarize', 0.6, 8), ('Equalize', 0.6, 1)], + [('Color', 0.8, 6), ('Rotate', 0.4, 5)], + ] + pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy] + return pc + + +def auto_augment_policy_original(hparams): + # ImageNet policy from https://arxiv.org/abs/1805.09501 + policy = [ + [('PosterizeOriginal', 0.4, 8), ('Rotate', 0.6, 9)], + [('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)], + [('Equalize', 0.8, 8), ('Equalize', 0.6, 3)], + [('PosterizeOriginal', 0.6, 7), ('PosterizeOriginal', 0.6, 6)], + [('Equalize', 0.4, 7), ('Solarize', 0.2, 4)], + [('Equalize', 0.4, 4), ('Rotate', 0.8, 8)], + [('Solarize', 0.6, 3), ('Equalize', 0.6, 7)], + [('PosterizeOriginal', 0.8, 5), ('Equalize', 1.0, 2)], + [('Rotate', 0.2, 3), ('Solarize', 0.6, 8)], + [('Equalize', 0.6, 8), ('PosterizeOriginal', 0.4, 6)], + [('Rotate', 0.8, 8), ('Color', 0.4, 0)], + [('Rotate', 0.4, 9), ('Equalize', 0.6, 2)], + [('Equalize', 0.0, 7), ('Equalize', 0.8, 8)], + [('Invert', 0.6, 4), ('Equalize', 1.0, 8)], + [('Color', 0.6, 4), ('Contrast', 1.0, 8)], + [('Rotate', 0.8, 8), ('Color', 1.0, 2)], + [('Color', 0.8, 8), ('Solarize', 0.8, 7)], + [('Sharpness', 0.4, 7), ('Invert', 0.6, 8)], + [('ShearX', 0.6, 5), ('Equalize', 1.0, 9)], + [('Color', 0.4, 0), ('Equalize', 0.6, 3)], + [('Equalize', 0.4, 7), ('Solarize', 0.2, 4)], + [('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)], + [('Invert', 0.6, 4), ('Equalize', 1.0, 8)], + [('Color', 0.6, 4), ('Contrast', 1.0, 8)], + [('Equalize', 0.8, 8), ('Equalize', 0.6, 3)], + ] + pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy] + return pc + + +def auto_augment_policy_originalr(hparams): + # ImageNet policy from https://arxiv.org/abs/1805.09501 with research posterize variation + policy = [ + [('PosterizeIncreasing', 0.4, 8), ('Rotate', 0.6, 9)], + [('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)], + [('Equalize', 0.8, 8), ('Equalize', 0.6, 3)], + [('PosterizeIncreasing', 0.6, 7), ('PosterizeIncreasing', 0.6, 6)], + [('Equalize', 0.4, 7), ('Solarize', 0.2, 4)], + [('Equalize', 0.4, 4), ('Rotate', 0.8, 8)], + [('Solarize', 0.6, 3), ('Equalize', 0.6, 7)], + [('PosterizeIncreasing', 0.8, 5), ('Equalize', 1.0, 2)], + [('Rotate', 0.2, 3), ('Solarize', 0.6, 8)], + [('Equalize', 0.6, 8), ('PosterizeIncreasing', 0.4, 6)], + [('Rotate', 0.8, 8), ('Color', 0.4, 0)], + [('Rotate', 0.4, 9), ('Equalize', 0.6, 2)], + [('Equalize', 0.0, 7), ('Equalize', 0.8, 8)], + [('Invert', 0.6, 4), ('Equalize', 1.0, 8)], + [('Color', 0.6, 4), ('Contrast', 1.0, 8)], + [('Rotate', 0.8, 8), ('Color', 1.0, 2)], + [('Color', 0.8, 8), ('Solarize', 0.8, 7)], + [('Sharpness', 0.4, 7), ('Invert', 0.6, 8)], + [('ShearX', 0.6, 5), ('Equalize', 1.0, 9)], + [('Color', 0.4, 0), ('Equalize', 0.6, 3)], + [('Equalize', 0.4, 7), ('Solarize', 0.2, 4)], + [('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)], + [('Invert', 0.6, 4), ('Equalize', 1.0, 8)], + [('Color', 0.6, 4), ('Contrast', 1.0, 8)], + [('Equalize', 0.8, 8), ('Equalize', 0.6, 3)], + ] + pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy] + return pc + + +def auto_augment_policy(name='v0', hparams=None): + hparams = hparams or _HPARAMS_DEFAULT + if name == 'original': + return auto_augment_policy_original(hparams) + elif name == 'originalr': + return auto_augment_policy_originalr(hparams) + elif name == 'v0': + return auto_augment_policy_v0(hparams) + elif name == 'v0r': + return auto_augment_policy_v0r(hparams) + else: + assert False, 'Unknown AA policy (%s)' % name + + +class AutoAugment: + + def __init__(self, policy): + self.policy = policy + + def __call__(self, img): + sub_policy = random.choice(self.policy) + for op in sub_policy: + img = op(img) + return img + + +def auto_augment_transform(config_str, hparams): + """ + Create a AutoAugment transform + + :param config_str: String defining configuration of auto augmentation. Consists of multiple sections separated by + dashes ('-'). The first section defines the AutoAugment policy (one of 'v0', 'v0r', 'original', 'originalr'). + The remaining sections, not order sepecific determine + 'mstd' - float std deviation of magnitude noise applied + Ex 'original-mstd0.5' results in AutoAugment with original policy, magnitude_std 0.5 + + :param hparams: Other hparams (kwargs) for the AutoAugmentation scheme + + :return: A PyTorch compatible Transform + """ + config = config_str.split('-') + policy_name = config[0] + config = config[1:] + for c in config: + cs = re.split(r'(\d.*)', c) + if len(cs) < 2: + continue + key, val = cs[:2] + if key == 'mstd': + # noise param injected via hparams for now + hparams.setdefault('magnitude_std', float(val)) + else: + assert False, 'Unknown AutoAugment config section' + aa_policy = auto_augment_policy(policy_name, hparams=hparams) + return AutoAugment(aa_policy) + + +_RAND_TRANSFORMS = [ + 'AutoContrast', + 'Equalize', + 'Invert', + 'Rotate', + 'Posterize', + 'Solarize', + 'SolarizeAdd', + 'Color', + 'Contrast', + 'Brightness', + 'Sharpness', + 'ShearX', + 'ShearY', + 'TranslateXRel', + 'TranslateYRel', + #'Cutout' # NOTE I've implement this as random erasing separately +] + + +_RAND_INCREASING_TRANSFORMS = [ + 'AutoContrast', + 'Equalize', + 'Invert', + 'Rotate', + 'PosterizeIncreasing', + 'SolarizeIncreasing', + 'SolarizeAdd', + 'ColorIncreasing', + 'ContrastIncreasing', + 'BrightnessIncreasing', + 'SharpnessIncreasing', + 'ShearX', + 'ShearY', + 'TranslateXRel', + 'TranslateYRel', + #'Cutout' # NOTE I've implement this as random erasing separately +] + + + +# These experimental weights are based loosely on the relative improvements mentioned in paper. +# They may not result in increased performance, but could likely be tuned to so. +_RAND_CHOICE_WEIGHTS_0 = { + 'Rotate': 0.3, + 'ShearX': 0.2, + 'ShearY': 0.2, + 'TranslateXRel': 0.1, + 'TranslateYRel': 0.1, + 'Color': .025, + 'Sharpness': 0.025, + 'AutoContrast': 0.025, + 'Solarize': .005, + 'SolarizeAdd': .005, + 'Contrast': .005, + 'Brightness': .005, + 'Equalize': .005, + 'Posterize': 0, + 'Invert': 0, +} + + +def _select_rand_weights(weight_idx=0, transforms=None): + transforms = transforms or _RAND_TRANSFORMS + assert weight_idx == 0 # only one set of weights currently + rand_weights = _RAND_CHOICE_WEIGHTS_0 + probs = [rand_weights[k] for k in transforms] + probs /= np.sum(probs) + return probs + + +def rand_augment_ops(magnitude=10, hparams=None, transforms=None): + hparams = hparams or _HPARAMS_DEFAULT + transforms = transforms or _RAND_TRANSFORMS + return [AugmentOp( + name, prob=0.5, magnitude=magnitude, hparams=hparams) for name in transforms] + + +class RandAugment: + def __init__(self, ops, num_layers=2, choice_weights=None): + self.ops = ops + self.num_layers = num_layers + self.choice_weights = choice_weights + + def __call__(self, img): + # no replacement when using weighted choice + ops = np.random.choice( + self.ops, self.num_layers, replace=self.choice_weights is None, p=self.choice_weights) + for op in ops: + img = op(img) + return img + + +def rand_augment_transform(config_str, hparams): + """ + Create a RandAugment transform + + :param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by + dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining + sections, not order sepecific determine + 'm' - integer magnitude of rand augment + 'n' - integer num layers (number of transform ops selected per image) + 'w' - integer probabiliy weight index (index of a set of weights to influence choice of op) + 'mstd' - float std deviation of magnitude noise applied + 'inc' - integer (bool), use augmentations that increase in severity with magnitude (default: 0) + Ex 'rand-m9-n3-mstd0.5' results in RandAugment with magnitude 9, num_layers 3, magnitude_std 0.5 + 'rand-mstd1-w0' results in magnitude_std 1.0, weights 0, default magnitude of 10 and num_layers 2 + + :param hparams: Other hparams (kwargs) for the RandAugmentation scheme + + :return: A PyTorch compatible Transform + """ + magnitude = _MAX_LEVEL # default to _MAX_LEVEL for magnitude (currently 10) + num_layers = 2 # default to 2 ops per image + weight_idx = None # default to no probability weights for op choice + transforms = _RAND_TRANSFORMS + config = config_str.split('-') + assert config[0] == 'rand' + config = config[1:] + for c in config: + cs = re.split(r'(\d.*)', c) + if len(cs) < 2: + continue + key, val = cs[:2] + if key == 'mstd': + # noise param injected via hparams for now + hparams.setdefault('magnitude_std', float(val)) + elif key == 'inc': + if bool(val): + transforms = _RAND_INCREASING_TRANSFORMS + elif key == 'm': + magnitude = int(val) + elif key == 'n': + num_layers = int(val) + elif key == 'w': + weight_idx = int(val) + else: + assert False, 'Unknown RandAugment config section' + ra_ops = rand_augment_ops(magnitude=magnitude, hparams=hparams, transforms=transforms) + choice_weights = None if weight_idx is None else _select_rand_weights(weight_idx) + return RandAugment(ra_ops, num_layers, choice_weights=choice_weights) + + +_AUGMIX_TRANSFORMS = [ + 'AutoContrast', + 'ColorIncreasing', # not in paper + 'ContrastIncreasing', # not in paper + 'BrightnessIncreasing', # not in paper + 'SharpnessIncreasing', # not in paper + 'Equalize', + 'Rotate', + 'PosterizeIncreasing', + 'SolarizeIncreasing', + 'ShearX', + 'ShearY', + 'TranslateXRel', + 'TranslateYRel', +] + + +def augmix_ops(magnitude=10, hparams=None, transforms=None): + hparams = hparams or _HPARAMS_DEFAULT + transforms = transforms or _AUGMIX_TRANSFORMS + return [AugmentOp( + name, prob=1.0, magnitude=magnitude, hparams=hparams) for name in transforms] + + +class AugMixAugment: + """ AugMix Transform + Adapted and improved from impl here: https://github.com/google-research/augmix/blob/master/imagenet.py + From paper: 'AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty - + https://arxiv.org/abs/1912.02781 + """ + def __init__(self, ops, alpha=1., width=3, depth=-1, blended=False): + self.ops = ops + self.alpha = alpha + self.width = width + self.depth = depth + self.blended = blended # blended mode is faster but not well tested + + def _calc_blended_weights(self, ws, m): + ws = ws * m + cump = 1. + rws = [] + for w in ws[::-1]: + alpha = w / cump + cump *= (1 - alpha) + rws.append(alpha) + return np.array(rws[::-1], dtype=np.float32) + + def _apply_blended(self, img, mixing_weights, m): + # This is my first crack and implementing a slightly faster mixed augmentation. Instead + # of accumulating the mix for each chain in a Numpy array and then blending with original, + # it recomputes the blending coefficients and applies one PIL image blend per chain. + # TODO the results appear in the right ballpark but they differ by more than rounding. + img_orig = img.copy() + ws = self._calc_blended_weights(mixing_weights, m) + for w in ws: + depth = self.depth if self.depth > 0 else np.random.randint(1, 4) + ops = np.random.choice(self.ops, depth, replace=True) + img_aug = img_orig # no ops are in-place, deep copy not necessary + for op in ops: + img_aug = op(img_aug) + img = Image.blend(img, img_aug, w) + return img + + def _apply_basic(self, img, mixing_weights, m): + # This is a literal adaptation of the paper/official implementation without normalizations and + # PIL <-> Numpy conversions between every op. It is still quite CPU compute heavy compared to the + # typical augmentation transforms, could use a GPU / Kornia implementation. + img_shape = img.size[0], img.size[1], len(img.getbands()) + mixed = np.zeros(img_shape, dtype=np.float32) + for mw in mixing_weights: + depth = self.depth if self.depth > 0 else np.random.randint(1, 4) + ops = np.random.choice(self.ops, depth, replace=True) + img_aug = img # no ops are in-place, deep copy not necessary + for op in ops: + img_aug = op(img_aug) + mixed += mw * np.asarray(img_aug, dtype=np.float32) + np.clip(mixed, 0, 255., out=mixed) + mixed = Image.fromarray(mixed.astype(np.uint8)) + return Image.blend(img, mixed, m) + + def __call__(self, img): + mixing_weights = np.float32(np.random.dirichlet([self.alpha] * self.width)) + m = np.float32(np.random.beta(self.alpha, self.alpha)) + if self.blended: + mixed = self._apply_blended(img, mixing_weights, m) + else: + mixed = self._apply_basic(img, mixing_weights, m) + return mixed + + +def augment_and_mix_transform(config_str, hparams): + """ Create AugMix PyTorch transform + + :param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by + dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining + sections, not order sepecific determine + 'm' - integer magnitude (severity) of augmentation mix (default: 3) + 'w' - integer width of augmentation chain (default: 3) + 'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1) + 'b' - integer (bool), blend each branch of chain into end result without a final blend, less CPU (default: 0) + 'mstd' - float std deviation of magnitude noise applied (default: 0) + Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2 + + :param hparams: Other hparams (kwargs) for the Augmentation transforms + + :return: A PyTorch compatible Transform + """ + magnitude = 3 + width = 3 + depth = -1 + alpha = 1. + blended = False + config = config_str.split('-') + assert config[0] == 'augmix' + config = config[1:] + for c in config: + cs = re.split(r'(\d.*)', c) + if len(cs) < 2: + continue + key, val = cs[:2] + if key == 'mstd': + # noise param injected via hparams for now + hparams.setdefault('magnitude_std', float(val)) + elif key == 'm': + magnitude = int(val) + elif key == 'w': + width = int(val) + elif key == 'd': + depth = int(val) + elif key == 'a': + alpha = float(val) + elif key == 'b': + blended = bool(val) + else: + assert False, 'Unknown AugMix config section' + ops = augmix_ops(magnitude=magnitude, hparams=hparams) return AugMixAugment(ops, alpha=alpha, width=width, depth=depth, blended=blended) \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/model.py b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/model.py index 4472a6a19fe512d303eff83a8ee521676061eb1c..6d180a0119a16c641ed08240dd62be12d1d3115b 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/model.py +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/model.py @@ -1,426 +1,426 @@ -# Copyright [yyyy] [name of copyright owner] -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from torch import nn -from torch.nn import functional as F -from .utils import ( - round_filters, - round_repeats, - drop_connect, - get_same_padding_conv2d, - get_model_params, - efficientnet_params, - load_pretrained_weights, - Swish, - MemoryEfficientSwish, - calculate_output_image_size -) - -class MBConvBlock(nn.Module): - """Mobile Inverted Residual Bottleneck Block. - - Args: - block_args (namedtuple): BlockArgs, defined in utils.py. - global_params (namedtuple): GlobalParam, defined in utils.py. - image_size (tuple or list): [image_height, image_width]. - - References: - [1] https://arxiv.org/abs/1704.04861 (MobileNet v1) - [2] https://arxiv.org/abs/1801.04381 (MobileNet v2) - [3] https://arxiv.org/abs/1905.02244 (MobileNet v3) - """ - - def __init__(self, block_args, global_params, image_size=None): - super().__init__() - self._block_args = block_args - self._bn_mom = 1 - global_params.batch_norm_momentum # pytorch's difference from tensorflow - self._bn_eps = global_params.batch_norm_epsilon - self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip # whether to use skip connection and drop connect - - # Expansion phase (Inverted Bottleneck) - inp = self._block_args.input_filters # number of input channels - oup = self._block_args.input_filters * self._block_args.expand_ratio # number of output channels - if self._block_args.expand_ratio != 1: - Conv2d = get_same_padding_conv2d(image_size=image_size) - self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False) - self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) - # image_size = calculate_output_image_size(image_size, 1) <-- this wouldn't modify image_size - - # Depthwise convolution phase - k = self._block_args.kernel_size - s = self._block_args.stride - Conv2d = get_same_padding_conv2d(image_size=image_size) - self._depthwise_conv = Conv2d( - in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise - kernel_size=k, stride=s, bias=False) - self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) - image_size = calculate_output_image_size(image_size, s) - - # Squeeze and Excitation layer, if desired - if self.has_se: - Conv2d = get_same_padding_conv2d(image_size=(1,1)) - num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio)) - self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1) - self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1) - # self._se_relu = torch.nn.ReLU() - # self._se_sigmoid = torch.nn.Sigmoid() - - # Pointwise convolution phase - final_oup = self._block_args.output_filters - Conv2d = get_same_padding_conv2d(image_size=image_size) - self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False) - self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps) - self._swish = MemoryEfficientSwish() - - def forward(self, inputs, drop_connect_rate=None): - """MBConvBlock's forward function. - - Args: - inputs (tensor): Input tensor. - drop_connect_rate (bool): Drop connect rate (float, between 0 and 1). - - Returns: - Output of this block after processing. - """ - - # Expansion and Depthwise Convolution - x = inputs - if self._block_args.expand_ratio != 1: - x = self._expand_conv(inputs) - x = self._bn0(x) - x = self._swish(x) - - x = self._depthwise_conv(x) - x = self._bn1(x) - x = self._swish(x) - - # Squeeze and Excitation - if self.has_se: - x_squeezed = F.adaptive_avg_pool2d(x, 1) - # x_squeezed = torch.mean(x, [2, 3], keepdim=True) - - x_squeezed = self._se_reduce(x_squeezed) - - x_squeezed = self._swish(x_squeezed) - - x_squeezed = self._se_expand(x_squeezed) - - x = torch.sigmoid(x_squeezed) * x - - # Pointwise Convolution - x = self._project_conv(x) - x = self._bn2(x) - - # Skip connection and drop connect - input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters - if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters: - # The combination of skip connection and drop connect brings about stochastic depth. - if drop_connect_rate: - x = drop_connect(x, p=drop_connect_rate, training=self.training) - x = x + inputs # skip connection - return x - - def set_swish(self, memory_efficient=True): - """Sets swish function as memory efficient (for training) or standard (for export). - - Args: - memory_efficient (bool): Whether to use memory-efficient version of swish. - """ - self._swish = MemoryEfficientSwish() if memory_efficient else Swish() - - -class EfficientNet(nn.Module): - """EfficientNet model. - Most easily loaded with the .from_name or .from_pretrained methods. - - Args: - blocks_args (list[namedtuple]): A list of BlockArgs to construct blocks. - global_params (namedtuple): A set of GlobalParams shared between blocks. - - References: - [1] https://arxiv.org/abs/1905.11946 (EfficientNet) - - Example: - >>> import torch - >>> from efficientnet.model import EfficientNet - >>> inputs = torch.rand(1, 3, 224, 224) - >>> model = EfficientNet.from_pretrained('efficientnet-b0') - >>> model.eval() - >>> outputs = model(inputs) - """ - - def __init__(self, blocks_args=None, global_params=None): - super().__init__() - assert isinstance(blocks_args, list), 'blocks_args should be a list' - assert len(blocks_args) > 0, 'block args must be greater than 0' - self._global_params = global_params - self._blocks_args = blocks_args - - # Batch norm parameters - bn_mom = 1 - self._global_params.batch_norm_momentum - bn_eps = self._global_params.batch_norm_epsilon - - # Get stem static or dynamic convolution depending on image size - image_size = global_params.image_size - Conv2d = get_same_padding_conv2d(image_size=image_size) - - # Stem - in_channels = 3 # rgb - out_channels = round_filters(32, self._global_params) # number of output channels - self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) - self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) - image_size = calculate_output_image_size(image_size, 2) - - # Build blocks - self._blocks = nn.ModuleList([]) - for block_args in self._blocks_args: - - # Update block input and output filters based on depth multiplier. - block_args = block_args._replace( - input_filters=round_filters(block_args.input_filters, self._global_params), - output_filters=round_filters(block_args.output_filters, self._global_params), - num_repeat=round_repeats(block_args.num_repeat, self._global_params) - ) - - # The first block needs to take care of stride and filter size increase. - self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size)) - image_size = calculate_output_image_size(image_size, block_args.stride) - if block_args.num_repeat > 1: # modify block_args to keep same output size - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size)) - # image_size = calculate_output_image_size(image_size, block_args.stride) # stride = 1 - - # Head - in_channels = block_args.output_filters # output of final block - out_channels = round_filters(1280, self._global_params) - Conv2d = get_same_padding_conv2d(image_size=image_size) - self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False) - self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) - - # Final linear layer - self._avg_pooling = nn.AdaptiveAvgPool2d(1) - self._dropout = nn.Dropout(self._global_params.dropout_rate) - self._fc = nn.Linear(out_channels, self._global_params.num_classes) - self._swish = MemoryEfficientSwish() - - def set_swish(self, memory_efficient=True): - """Sets swish function as memory efficient (for training) or standard (for export). - - Args: - memory_efficient (bool): Whether to use memory-efficient version of swish. - - """ - self._swish = MemoryEfficientSwish() if memory_efficient else Swish() - for block in self._blocks: - block.set_swish(memory_efficient) - - def extract_endpoints(self, inputs): - """Use convolution layer to extract features - from reduction levels i in [1, 2, 3, 4, 5]. - - Args: - inputs (tensor): Input tensor. - - Returns: - Dictionary of last intermediate features - with reduction levels i in [1, 2, 3, 4, 5]. - Example: - >>> import torch - >>> from efficientnet.model import EfficientNet - >>> inputs = torch.rand(1, 3, 224, 224) - >>> model = EfficientNet.from_pretrained('efficientnet-b0') - >>> endpoints = model.extract_features(inputs) - >>> print(endpoints['reduction_1'].shape) # torch.Size([1, 16, 112, 112]) - >>> print(endpoints['reduction_2'].shape) # torch.Size([1, 24, 56, 56]) - >>> print(endpoints['reduction_3'].shape) # torch.Size([1, 40, 28, 28]) - >>> print(endpoints['reduction_4'].shape) # torch.Size([1, 112, 14, 14]) - >>> print(endpoints['reduction_5'].shape) # torch.Size([1, 1280, 7, 7]) - """ - endpoints = dict() - - # Stem - x = self._swish(self._bn0(self._conv_stem(inputs))) - # x = self._swish(self._conv_stem(inputs)) - prev_x = x - - # Blocks - for idx, block in enumerate(self._blocks): - drop_connect_rate = self._global_params.drop_connect_rate - if drop_connect_rate: - drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate - x = block(x, drop_connect_rate=drop_connect_rate) - if prev_x.size(2) > x.size(2): - endpoints[f'reduction_{len(endpoints)+1}'] = prev_x - prev_x = x - - # Head - x = self._swish(self._bn1(self._conv_head(x))) - - endpoints[f'reduction_{len(endpoints)+1}'] = x - - return endpoints - - def extract_features(self, inputs): - """use convolution layer to extract feature . - - Args: - inputs (tensor): Input tensor. - - Returns: - Output of the final convolution - layer in the efficientnet model. - """ - # Stem - x = self._swish(self._bn0(self._conv_stem(inputs))) - - - # Blocks - for idx, block in enumerate(self._blocks): - drop_connect_rate = self._global_params.drop_connect_rate - if drop_connect_rate: - drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate - x = block(x, drop_connect_rate=drop_connect_rate) - - # Head - x = self._swish(self._bn1(self._conv_head(x))) - - return x - - def forward(self, inputs): - """EfficientNet's forward function. - Calls extract_features to extract features, applies final linear layer, and returns logits. - - Args: - inputs (tensor): Input tensor. - - Returns: - Output of this model after processing. - """ - - # Convolution layers - x = self.extract_features(inputs) - - # Pooling and final linear layer - x = self._avg_pooling(x) - x = torch.flatten(x, start_dim=1) - x = self._dropout(x) - x = self._fc(x) - - return x - - @classmethod - def from_name(cls, model_name, in_channels=3, **override_params): - """create an efficientnet model according to name. - - Args: - model_name (str): Name for efficientnet. - in_channels (int): Input data's channel number. - override_params (other key word params): - Params to override model's global_params. - Optional key: - 'width_coefficient', 'depth_coefficient', - 'image_size', 'dropout_rate', - 'num_classes', 'batch_norm_momentum', - 'batch_norm_epsilon', 'drop_connect_rate', - 'depth_divisor', 'min_depth' - - Returns: - An efficientnet model. - """ - cls._check_model_name_is_valid(model_name) - blocks_args, global_params = get_model_params(model_name, override_params) - model = cls(blocks_args, global_params) - model._change_in_channels(in_channels) - return model - - @classmethod - def from_pretrained(cls, model_name, weights_path=None, advprop=False, - in_channels=3, num_classes=1000, **override_params): - """create an efficientnet model according to name. - - Args: - model_name (str): Name for efficientnet. - weights_path (None or str): - str: path to pretrained weights file on the local disk. - None: use pretrained weights downloaded from the Internet. - advprop (bool): - Whether to load pretrained weights - trained with advprop (valid when weights_path is None). - in_channels (int): Input data's channel number. - num_classes (int): - Number of categories for classification. - It controls the output size for final linear layer. - override_params (other key word params): - Params to override model's global_params. - Optional key: - 'width_coefficient', 'depth_coefficient', - 'image_size', 'dropout_rate', - 'num_classes', 'batch_norm_momentum', - 'batch_norm_epsilon', 'drop_connect_rate', - 'depth_divisor', 'min_depth' - - Returns: - A pretrained efficientnet model. - """ - model = cls.from_name(model_name, num_classes = num_classes, **override_params) - load_pretrained_weights(model, model_name, weights_path=weights_path, load_fc=(num_classes == 1000), advprop=advprop) - model._change_in_channels(in_channels) - return model - - @classmethod - def get_image_size(cls, model_name): - """Get the input image size for a given efficientnet model. - - Args: - model_name (str): Name for efficientnet. - - Returns: - Input image size (resolution). - """ - cls._check_model_name_is_valid(model_name) - _, _, res, _ = efficientnet_params(model_name) - return res - - @classmethod - def _check_model_name_is_valid(cls, model_name): - """Validates model name. - - Args: - model_name (str): Name for efficientnet. - - Returns: - bool: Is a valid name or not. - """ - valid_models = ['efficientnet-b'+str(i) for i in range(9)] - - # Support the construction of 'efficientnet-l2' without pretrained weights - valid_models += ['efficientnet-l2'] - - if model_name not in valid_models: - raise ValueError('model_name should be one of: ' + ', '.join(valid_models)) - - def _change_in_channels(self, in_channels): - """Adjust model's first convolution layer to in_channels, if in_channels not equals 3. - - Args: - in_channels (int): Input data's channel number. - """ - if in_channels != 3: - Conv2d = get_same_padding_conv2d(image_size = self._global_params.image_size) - out_channels = round_filters(32, self._global_params) - self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) +# Copyright [yyyy] [name of copyright owner] +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch import nn +from torch.nn import functional as F +from .utils import ( + round_filters, + round_repeats, + drop_connect, + get_same_padding_conv2d, + get_model_params, + efficientnet_params, + load_pretrained_weights, + Swish, + MemoryEfficientSwish, + calculate_output_image_size +) + +class MBConvBlock(nn.Module): + """Mobile Inverted Residual Bottleneck Block. + + Args: + block_args (namedtuple): BlockArgs, defined in utils.py. + global_params (namedtuple): GlobalParam, defined in utils.py. + image_size (tuple or list): [image_height, image_width]. + + References: + [1] https://arxiv.org/abs/1704.04861 (MobileNet v1) + [2] https://arxiv.org/abs/1801.04381 (MobileNet v2) + [3] https://arxiv.org/abs/1905.02244 (MobileNet v3) + """ + + def __init__(self, block_args, global_params, image_size=None): + super().__init__() + self._block_args = block_args + self._bn_mom = 1 - global_params.batch_norm_momentum # pytorch's difference from tensorflow + self._bn_eps = global_params.batch_norm_epsilon + self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip # whether to use skip connection and drop connect + + # Expansion phase (Inverted Bottleneck) + inp = self._block_args.input_filters # number of input channels + oup = self._block_args.input_filters * self._block_args.expand_ratio # number of output channels + if self._block_args.expand_ratio != 1: + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False) + self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) + # image_size = calculate_output_image_size(image_size, 1) <-- this wouldn't modify image_size + + # Depthwise convolution phase + k = self._block_args.kernel_size + s = self._block_args.stride + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._depthwise_conv = Conv2d( + in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise + kernel_size=k, stride=s, bias=False) + self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) + image_size = calculate_output_image_size(image_size, s) + + # Squeeze and Excitation layer, if desired + if self.has_se: + Conv2d = get_same_padding_conv2d(image_size=(1,1)) + num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio)) + self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1) + self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1) + # self._se_relu = torch.nn.ReLU() + # self._se_sigmoid = torch.nn.Sigmoid() + + # Pointwise convolution phase + final_oup = self._block_args.output_filters + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False) + self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps) + self._swish = MemoryEfficientSwish() + + def forward(self, inputs, drop_connect_rate=None): + """MBConvBlock's forward function. + + Args: + inputs (tensor): Input tensor. + drop_connect_rate (bool): Drop connect rate (float, between 0 and 1). + + Returns: + Output of this block after processing. + """ + + # Expansion and Depthwise Convolution + x = inputs + if self._block_args.expand_ratio != 1: + x = self._expand_conv(inputs) + x = self._bn0(x) + x = self._swish(x) + + x = self._depthwise_conv(x) + x = self._bn1(x) + x = self._swish(x) + + # Squeeze and Excitation + if self.has_se: + x_squeezed = F.adaptive_avg_pool2d(x, 1) + # x_squeezed = torch.mean(x, [2, 3], keepdim=True) + + x_squeezed = self._se_reduce(x_squeezed) + + x_squeezed = self._swish(x_squeezed) + + x_squeezed = self._se_expand(x_squeezed) + + x = torch.sigmoid(x_squeezed) * x + + # Pointwise Convolution + x = self._project_conv(x) + x = self._bn2(x) + + # Skip connection and drop connect + input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters + if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters: + # The combination of skip connection and drop connect brings about stochastic depth. + if drop_connect_rate: + x = drop_connect(x, p=drop_connect_rate, training=self.training) + x = x + inputs # skip connection + return x + + def set_swish(self, memory_efficient=True): + """Sets swish function as memory efficient (for training) or standard (for export). + + Args: + memory_efficient (bool): Whether to use memory-efficient version of swish. + """ + self._swish = MemoryEfficientSwish() if memory_efficient else Swish() + + +class EfficientNet(nn.Module): + """EfficientNet model. + Most easily loaded with the .from_name or .from_pretrained methods. + + Args: + blocks_args (list[namedtuple]): A list of BlockArgs to construct blocks. + global_params (namedtuple): A set of GlobalParams shared between blocks. + + References: + [1] https://arxiv.org/abs/1905.11946 (EfficientNet) + + Example: + >>> import torch + >>> from efficientnet.model import EfficientNet + >>> inputs = torch.rand(1, 3, 224, 224) + >>> model = EfficientNet.from_pretrained('efficientnet-b0') + >>> model.eval() + >>> outputs = model(inputs) + """ + + def __init__(self, blocks_args=None, global_params=None): + super().__init__() + assert isinstance(blocks_args, list), 'blocks_args should be a list' + assert len(blocks_args) > 0, 'block args must be greater than 0' + self._global_params = global_params + self._blocks_args = blocks_args + + # Batch norm parameters + bn_mom = 1 - self._global_params.batch_norm_momentum + bn_eps = self._global_params.batch_norm_epsilon + + # Get stem static or dynamic convolution depending on image size + image_size = global_params.image_size + Conv2d = get_same_padding_conv2d(image_size=image_size) + + # Stem + in_channels = 3 # rgb + out_channels = round_filters(32, self._global_params) # number of output channels + self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) + self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) + image_size = calculate_output_image_size(image_size, 2) + + # Build blocks + self._blocks = nn.ModuleList([]) + for block_args in self._blocks_args: + + # Update block input and output filters based on depth multiplier. + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, self._global_params), + output_filters=round_filters(block_args.output_filters, self._global_params), + num_repeat=round_repeats(block_args.num_repeat, self._global_params) + ) + + # The first block needs to take care of stride and filter size increase. + self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size)) + image_size = calculate_output_image_size(image_size, block_args.stride) + if block_args.num_repeat > 1: # modify block_args to keep same output size + block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size)) + # image_size = calculate_output_image_size(image_size, block_args.stride) # stride = 1 + + # Head + in_channels = block_args.output_filters # output of final block + out_channels = round_filters(1280, self._global_params) + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False) + self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) + + # Final linear layer + self._avg_pooling = nn.AdaptiveAvgPool2d(1) + self._dropout = nn.Dropout(self._global_params.dropout_rate) + self._fc = nn.Linear(out_channels, self._global_params.num_classes) + self._swish = MemoryEfficientSwish() + + def set_swish(self, memory_efficient=True): + """Sets swish function as memory efficient (for training) or standard (for export). + + Args: + memory_efficient (bool): Whether to use memory-efficient version of swish. + + """ + self._swish = MemoryEfficientSwish() if memory_efficient else Swish() + for block in self._blocks: + block.set_swish(memory_efficient) + + def extract_endpoints(self, inputs): + """Use convolution layer to extract features + from reduction levels i in [1, 2, 3, 4, 5]. + + Args: + inputs (tensor): Input tensor. + + Returns: + Dictionary of last intermediate features + with reduction levels i in [1, 2, 3, 4, 5]. + Example: + >>> import torch + >>> from efficientnet.model import EfficientNet + >>> inputs = torch.rand(1, 3, 224, 224) + >>> model = EfficientNet.from_pretrained('efficientnet-b0') + >>> endpoints = model.extract_features(inputs) + >>> print(endpoints['reduction_1'].shape) # torch.Size([1, 16, 112, 112]) + >>> print(endpoints['reduction_2'].shape) # torch.Size([1, 24, 56, 56]) + >>> print(endpoints['reduction_3'].shape) # torch.Size([1, 40, 28, 28]) + >>> print(endpoints['reduction_4'].shape) # torch.Size([1, 112, 14, 14]) + >>> print(endpoints['reduction_5'].shape) # torch.Size([1, 1280, 7, 7]) + """ + endpoints = dict() + + # Stem + x = self._swish(self._bn0(self._conv_stem(inputs))) + # x = self._swish(self._conv_stem(inputs)) + prev_x = x + + # Blocks + for idx, block in enumerate(self._blocks): + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate + x = block(x, drop_connect_rate=drop_connect_rate) + if prev_x.size(2) > x.size(2): + endpoints[f'reduction_{len(endpoints)+1}'] = prev_x + prev_x = x + + # Head + x = self._swish(self._bn1(self._conv_head(x))) + + endpoints[f'reduction_{len(endpoints)+1}'] = x + + return endpoints + + def extract_features(self, inputs): + """use convolution layer to extract feature . + + Args: + inputs (tensor): Input tensor. + + Returns: + Output of the final convolution + layer in the efficientnet model. + """ + # Stem + x = self._swish(self._bn0(self._conv_stem(inputs))) + + + # Blocks + for idx, block in enumerate(self._blocks): + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate + x = block(x, drop_connect_rate=drop_connect_rate) + + # Head + x = self._swish(self._bn1(self._conv_head(x))) + + return x + + def forward(self, inputs): + """EfficientNet's forward function. + Calls extract_features to extract features, applies final linear layer, and returns logits. + + Args: + inputs (tensor): Input tensor. + + Returns: + Output of this model after processing. + """ + + # Convolution layers + x = self.extract_features(inputs) + + # Pooling and final linear layer + x = self._avg_pooling(x) + x = torch.flatten(x, start_dim=1) + x = self._dropout(x) + x = self._fc(x) + + return x + + @classmethod + def from_name(cls, model_name, in_channels=3, **override_params): + """create an efficientnet model according to name. + + Args: + model_name (str): Name for efficientnet. + in_channels (int): Input data's channel number. + override_params (other key word params): + Params to override model's global_params. + Optional key: + 'width_coefficient', 'depth_coefficient', + 'image_size', 'dropout_rate', + 'num_classes', 'batch_norm_momentum', + 'batch_norm_epsilon', 'drop_connect_rate', + 'depth_divisor', 'min_depth' + + Returns: + An efficientnet model. + """ + cls._check_model_name_is_valid(model_name) + blocks_args, global_params = get_model_params(model_name, override_params) + model = cls(blocks_args, global_params) + model._change_in_channels(in_channels) + return model + + @classmethod + def from_pretrained(cls, model_name, weights_path=None, advprop=False, + in_channels=3, num_classes=1000, **override_params): + """create an efficientnet model according to name. + + Args: + model_name (str): Name for efficientnet. + weights_path (None or str): + str: path to pretrained weights file on the local disk. + None: use pretrained weights downloaded from the Internet. + advprop (bool): + Whether to load pretrained weights + trained with advprop (valid when weights_path is None). + in_channels (int): Input data's channel number. + num_classes (int): + Number of categories for classification. + It controls the output size for final linear layer. + override_params (other key word params): + Params to override model's global_params. + Optional key: + 'width_coefficient', 'depth_coefficient', + 'image_size', 'dropout_rate', + 'num_classes', 'batch_norm_momentum', + 'batch_norm_epsilon', 'drop_connect_rate', + 'depth_divisor', 'min_depth' + + Returns: + A pretrained efficientnet model. + """ + model = cls.from_name(model_name, num_classes = num_classes, **override_params) + load_pretrained_weights(model, model_name, weights_path=weights_path, load_fc=(num_classes == 1000), advprop=advprop) + model._change_in_channels(in_channels) + return model + + @classmethod + def get_image_size(cls, model_name): + """Get the input image size for a given efficientnet model. + + Args: + model_name (str): Name for efficientnet. + + Returns: + Input image size (resolution). + """ + cls._check_model_name_is_valid(model_name) + _, _, res, _ = efficientnet_params(model_name) + return res + + @classmethod + def _check_model_name_is_valid(cls, model_name): + """Validates model name. + + Args: + model_name (str): Name for efficientnet. + + Returns: + bool: Is a valid name or not. + """ + valid_models = ['efficientnet-b'+str(i) for i in range(9)] + + # Support the construction of 'efficientnet-l2' without pretrained weights + valid_models += ['efficientnet-l2'] + + if model_name not in valid_models: + raise ValueError('model_name should be one of: ' + ', '.join(valid_models)) + + def _change_in_channels(self, in_channels): + """Adjust model's first convolution layer to in_channels, if in_channels not equals 3. + + Args: + in_channels (int): Input data's channel number. + """ + if in_channels != 3: + Conv2d = get_same_padding_conv2d(image_size = self._global_params.image_size) + out_channels = round_filters(32, self._global_params) + self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/rmsprop_tf.py b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/rmsprop_tf.py index 7873ed7393832045687de92aceb935a2839b98e6..3666cb6061aecc28b1195551ffeebd255bd1c77d 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/rmsprop_tf.py +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/rmsprop_tf.py @@ -1,136 +1,136 @@ -# Copyright [yyyy] [name of copyright owner] -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from torch.optim import Optimizer - -class RMSpropTF(Optimizer): - """Implements RMSprop algorithm (TensorFlow style epsilon) - - NOTE: This is a direct cut-and-paste of PyTorch RMSprop with eps applied before sqrt - to closer match Tensorflow for matching hyper-params. - - Proposed by G. Hinton in his - `course `_. - - The centered version first appears in `Generating Sequences - With Recurrent Neural Networks `_. - - Arguments: - params (iterable): iterable of parameters to optimize or dicts defining - parameter groups - lr (float, optional): learning rate (default: 1e-2) - momentum (float, optional): momentum factor (default: 0) - alpha (float, optional): smoothing (decay) constant (default: 0.9) - eps (float, optional): term added to the denominator to improve - numerical stability (default: 1e-10) - centered (bool, optional) : if ``True``, compute the centered RMSProp, - the gradient is normalized by an estimation of its variance - weight_decay (float, optional): weight decay (L2 penalty) (default: 0) - decoupled_decay (bool, optional): decoupled weight decay as per https://arxiv.org/abs/1711.05101 - lr_in_momentum (bool, optional): learning rate scaling is included in the momentum buffer - update as per defaults in Tensorflow - - """ - - def __init__(self, params, lr=1e-2, alpha=0.9, eps=1e-10, weight_decay=0, momentum=0., centered=False, - decoupled_decay=False, lr_in_momentum=True): - if not 0.0 <= lr: - raise ValueError("Invalid learning rate: {}".format(lr)) - if not 0.0 <= eps: - raise ValueError("Invalid epsilon value: {}".format(eps)) - if not 0.0 <= momentum: - raise ValueError("Invalid momentum value: {}".format(momentum)) - if not 0.0 <= weight_decay: - raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) - if not 0.0 <= alpha: - raise ValueError("Invalid alpha value: {}".format(alpha)) - - defaults = dict(lr=lr, momentum=momentum, alpha=alpha, eps=eps, centered=centered, weight_decay=weight_decay, - decoupled_decay=decoupled_decay, lr_in_momentum=lr_in_momentum) - super(RMSpropTF, self).__init__(params, defaults) - - def __setstate__(self, state): - super(RMSpropTF, self).__setstate__(state) - for group in self.param_groups: - group.setdefault('momentum', 0) - group.setdefault('centered', False) - - def step(self, closure=None): - """Performs a single optimization step. - - Arguments: - closure (callable, optional): A closure that reevaluates the model - and returns the loss. - """ - loss = None - if closure is not None: - loss = closure() - - for group in self.param_groups: - for p in group['params']: - if p.grad is None: - continue - grad = p.grad.data - if grad.is_sparse: - raise RuntimeError('RMSprop does not support sparse gradients') - state = self.state[p] - - # State initialization - if len(state) == 0: - state['step'] = 0 - state['square_avg'] = torch.ones_like(p.data) # PyTorch inits to zero - if group['momentum'] > 0: - state['momentum_buffer'] = torch.zeros_like(p.data) - if group['centered']: - state['grad_avg'] = torch.zeros_like(p.data) - - square_avg = state['square_avg'] - one_minus_alpha = 1. - group['alpha'] - - state['step'] += 1 - - if group['weight_decay'] != 0: - if 'decoupled_decay' in group and group['decoupled_decay']: - p.data.add_(-group['weight_decay'], p.data) - else: - grad = grad.add(group['weight_decay'], p.data) - - # Tensorflow order of ops for updating squared avg - square_avg.add_(one_minus_alpha, grad.pow(2) - square_avg) - # square_avg.mul_(alpha).addcmul_(1 - alpha, grad, grad) # PyTorch original - - if group['centered']: - grad_avg = state['grad_avg'] - grad_avg.add_(one_minus_alpha, grad - grad_avg) - # grad_avg.mul_(alpha).add_(1 - alpha, grad) # PyTorch original - avg = square_avg.addcmul(-1, grad_avg, grad_avg).add(group['eps']).sqrt_() # eps moved in sqrt - else: - avg = square_avg.add(group['eps']).sqrt_() # eps moved in sqrt - - if group['momentum'] > 0: - buf = state['momentum_buffer'] - # Tensorflow accumulates the LR scaling in the momentum buffer - if 'lr_in_momentum' in group and group['lr_in_momentum']: - buf.mul_(group['momentum']).addcdiv_(group['lr'], grad, avg) - p.data.add_(-buf) - else: - # PyTorch scales the param update by LR - buf.mul_(group['momentum']).addcdiv_(grad, avg) - p.data.add_(-group['lr'], buf) - else: - p.data.addcdiv_(-group['lr'], grad, avg) - +# Copyright [yyyy] [name of copyright owner] +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch.optim import Optimizer + +class RMSpropTF(Optimizer): + """Implements RMSprop algorithm (TensorFlow style epsilon) + + NOTE: This is a direct cut-and-paste of PyTorch RMSprop with eps applied before sqrt + to closer match Tensorflow for matching hyper-params. + + Proposed by G. Hinton in his + `course `_. + + The centered version first appears in `Generating Sequences + With Recurrent Neural Networks `_. + + Arguments: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float, optional): learning rate (default: 1e-2) + momentum (float, optional): momentum factor (default: 0) + alpha (float, optional): smoothing (decay) constant (default: 0.9) + eps (float, optional): term added to the denominator to improve + numerical stability (default: 1e-10) + centered (bool, optional) : if ``True``, compute the centered RMSProp, + the gradient is normalized by an estimation of its variance + weight_decay (float, optional): weight decay (L2 penalty) (default: 0) + decoupled_decay (bool, optional): decoupled weight decay as per https://arxiv.org/abs/1711.05101 + lr_in_momentum (bool, optional): learning rate scaling is included in the momentum buffer + update as per defaults in Tensorflow + + """ + + def __init__(self, params, lr=1e-2, alpha=0.9, eps=1e-10, weight_decay=0, momentum=0., centered=False, + decoupled_decay=False, lr_in_momentum=True): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= momentum: + raise ValueError("Invalid momentum value: {}".format(momentum)) + if not 0.0 <= weight_decay: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + if not 0.0 <= alpha: + raise ValueError("Invalid alpha value: {}".format(alpha)) + + defaults = dict(lr=lr, momentum=momentum, alpha=alpha, eps=eps, centered=centered, weight_decay=weight_decay, + decoupled_decay=decoupled_decay, lr_in_momentum=lr_in_momentum) + super(RMSpropTF, self).__init__(params, defaults) + + def __setstate__(self, state): + super(RMSpropTF, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('momentum', 0) + group.setdefault('centered', False) + + def step(self, closure=None): + """Performs a single optimization step. + + Arguments: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data + if grad.is_sparse: + raise RuntimeError('RMSprop does not support sparse gradients') + state = self.state[p] + + # State initialization + if len(state) == 0: + state['step'] = 0 + state['square_avg'] = torch.ones_like(p.data) # PyTorch inits to zero + if group['momentum'] > 0: + state['momentum_buffer'] = torch.zeros_like(p.data) + if group['centered']: + state['grad_avg'] = torch.zeros_like(p.data) + + square_avg = state['square_avg'] + one_minus_alpha = 1. - group['alpha'] + + state['step'] += 1 + + if group['weight_decay'] != 0: + if 'decoupled_decay' in group and group['decoupled_decay']: + p.data.add_(-group['weight_decay'], p.data) + else: + grad = grad.add(group['weight_decay'], p.data) + + # Tensorflow order of ops for updating squared avg + square_avg.add_(one_minus_alpha, grad.pow(2) - square_avg) + # square_avg.mul_(alpha).addcmul_(1 - alpha, grad, grad) # PyTorch original + + if group['centered']: + grad_avg = state['grad_avg'] + grad_avg.add_(one_minus_alpha, grad - grad_avg) + # grad_avg.mul_(alpha).add_(1 - alpha, grad) # PyTorch original + avg = square_avg.addcmul(-1, grad_avg, grad_avg).add(group['eps']).sqrt_() # eps moved in sqrt + else: + avg = square_avg.add(group['eps']).sqrt_() # eps moved in sqrt + + if group['momentum'] > 0: + buf = state['momentum_buffer'] + # Tensorflow accumulates the LR scaling in the momentum buffer + if 'lr_in_momentum' in group and group['lr_in_momentum']: + buf.mul_(group['momentum']).addcdiv_(group['lr'], grad, avg) + p.data.add_(-buf) + else: + # PyTorch scales the param update by LR + buf.mul_(group['momentum']).addcdiv_(grad, avg) + p.data.add_(-group['lr'], buf) + else: + p.data.addcdiv_(-group['lr'], grad, avg) + return loss \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/utils.py b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/utils.py index 2c562e625c61fb9bfb0734e662df151f179bcbde..1dbf13b4a82b523e7a40e1c797162a10f90b5a83 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/utils.py +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/efficientnet_pytorch/utils.py @@ -1,636 +1,636 @@ -# Copyright [yyyy] [name of copyright owner] -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re -import math -import collections -from functools import partial -import torch -from torch import nn -from torch.nn import functional as F -from torch.utils import model_zoo - -################################################################################ -### Help functions for model architecture -################################################################################ - -# GlobalParams and BlockArgs: Two namedtuples -# Swish and MemoryEfficientSwish: Two implementations of the method -# round_filters and round_repeats: -# Functions to calculate params for scaling model width and depth ! ! ! -# get_width_and_height_from_size and calculate_output_image_size -# drop_connect: A structural design -# get_same_padding_conv2d: -# Conv2dDynamicSamePadding -# Conv2dStaticSamePadding -# get_same_padding_maxPool2d: -# MaxPool2dDynamicSamePadding -# MaxPool2dStaticSamePadding -# It's an additional function, not used in EfficientNet, -# but can be used in other model (such as EfficientDet). -# Identity: An implementation of identical mapping - -# Parameters for the entire model (stem, all blocks, and head) -GlobalParams = collections.namedtuple('GlobalParams', [ - 'width_coefficient', 'depth_coefficient', 'image_size', 'dropout_rate', - 'num_classes', 'batch_norm_momentum', 'batch_norm_epsilon', - 'drop_connect_rate', 'depth_divisor', 'min_depth']) - -# Parameters for an individual model block -BlockArgs = collections.namedtuple('BlockArgs', [ - 'num_repeat', 'kernel_size', 'stride', 'expand_ratio', - 'input_filters', 'output_filters', 'se_ratio', 'id_skip']) - -# Set GlobalParams and BlockArgs's defaults -GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) - - -# An ordinary implementation of Swish function -class Swish(nn.Module): - def forward(self, x): - return x * torch.sigmoid(x) - -# A memory-efficient implementation of Swish function -class SwishImplementation(torch.autograd.Function): - @staticmethod - def forward(ctx, i): - result = i * torch.sigmoid(i) - ctx.save_for_backward(i) - return result - - @staticmethod - def backward(ctx, grad_output): - i = ctx.saved_tensors[0] - sigmoid_i = torch.sigmoid(i) - return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) - -class MemoryEfficientSwish(nn.Module): - def forward(self, x): - return SwishImplementation.apply(x) - - -def round_filters(filters, global_params): - """Calculate and round number of filters based on width multiplier. - Use width_coefficient, depth_divisor and min_depth of global_params. - - Args: - filters (int): Filters number to be calculated. - global_params (namedtuple): Global params of the model. - - Returns: - new_filters: New filters number after calculating. - """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - # TODO: modify the params names. - # maybe the names (width_divisor,min_width) - # are more suitable than (depth_divisor,min_depth). - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor # pay attention to this line when using min_depth - # follow the formula transferred from official TensorFlow implementation - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) - - -def round_repeats(repeats, global_params): - """Calculate module's repeat number of a block based on depth multiplier. - Use depth_coefficient of global_params. - - Args: - repeats (int): num_repeat to be calculated. - global_params (namedtuple): Global params of the model. - - Returns: - new repeat: New repeat number after calculating. - """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - # follow the formula transferred from official TensorFlow implementation - return int(math.ceil(multiplier * repeats)) - - -def drop_connect(inputs, p, training): - """Drop connect. - - Args: - inputs (tensor: BCWH): Input of this structure. - p (float: 0.0~1.0): Probability of drop connection. - training (bool): The running mode. - - Returns: - output: Output after drop connection. - """ - assert p >= 0 and p <= 1, 'p must be in range of [0,1]' - - if not training: - return inputs - - batch_size = inputs.shape[0] - keep_prob = 1 - p - - # generate binary_tensor mask according to probability (p for 0, 1-p for 1) - random_tensor = keep_prob - random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device) - binary_tensor = torch.floor(random_tensor) / keep_prob - - output = inputs * binary_tensor - return output - - -def get_width_and_height_from_size(x): - """Obtain height and width from x. - - Args: - x (int, tuple or list): Data size. - - Returns: - size: A tuple or list (H,W). - """ - if isinstance(x, int): - return x, x - if isinstance(x, list) or isinstance(x, tuple): - return x - else: - raise TypeError() - - -def calculate_output_image_size(input_image_size, stride): - """Calculates the output image size when using Conv2dSamePadding with a stride. - Necessary for static padding. Thanks to mannatsingh for pointing this out. - - Args: - input_image_size (int, tuple or list): Size of input image. - stride (int, tuple or list): Conv2d operation's stride. - - Returns: - output_image_size: A list [H,W]. - """ - if input_image_size is None: - return None - image_height, image_width = get_width_and_height_from_size(input_image_size) - stride = stride if isinstance(stride, int) else stride[0] - image_height = int(math.ceil(image_height / stride)) - image_width = int(math.ceil(image_width / stride)) - return [image_height, image_width] - - -# Note: -# The following 'SamePadding' functions make output size equal ceil(input size/stride). -# Only when stride equals 1, can the output size be the same as input size. -# Don't be confused by their function names ! ! ! - -def get_same_padding_conv2d(image_size=None): - """Chooses static padding if you have specified an image size, and dynamic padding otherwise. - Static padding is necessary for ONNX exporting of models. - - Args: - image_size (int or tuple): Size of the image. - - Returns: - Conv2dDynamicSamePadding or Conv2dStaticSamePadding. - """ - if image_size is None: - return Conv2dDynamicSamePadding - else: - return partial(Conv2dStaticSamePadding, image_size=image_size) - - -class Conv2dDynamicSamePadding(nn.Conv2d): - """2D Convolutions like TensorFlow, for a dynamic image size. - The padding is operated in forward function by calculating dynamically. - """ - - # Tips for 'SAME' mode padding. - # Given the following: - # i: width or height - # s: stride - # k: kernel size - # d: dilation - # p: padding - # Output after Conv2d: - # o = floor((i+p-((k-1)*d+1))/s+1) - # If o equals i, i = floor((i+p-((k-1)*d+1))/s+1), - # => p = (i-1)*s+((k-1)*d+1)-i - - def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True): - super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) - self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 - - def forward(self, x): - ih, iw = x.size()[-2:] - kh, kw = self.weight.size()[-2:] - sh, sw = self.stride - oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) # change the output size according to stride ! ! ! - pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) - pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) - if pad_h > 0 or pad_w > 0: - x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) - return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) - - -class Conv2dStaticSamePadding(nn.Conv2d): - """2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size. - The padding mudule is calculated in construction function, then used in forward. - """ - - # With the same calculation as Conv2dDynamicSamePadding - - def __init__(self, in_channels, out_channels, kernel_size, stride=1, image_size=None, **kwargs): - super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs) - self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 - - # Calculate padding based on image size and save it - assert image_size is not None - ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size - kh, kw = self.weight.size()[-2:] - sh, sw = self.stride - oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) - pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) - pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) - if pad_h > 0 or pad_w > 0: - self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)) - if kh % 2 != 0: - self.padding = (kh - 1) // 2 - else: - self.padding = kh // 2 - else: - self.static_padding = Identity() - - def forward(self, x): - x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) - return x - - -def get_same_padding_maxPool2d(image_size=None): - """Chooses static padding if you have specified an image size, and dynamic padding otherwise. - Static padding is necessary for ONNX exporting of models. - - Args: - image_size (int or tuple): Size of the image. - - Returns: - MaxPool2dDynamicSamePadding or MaxPool2dStaticSamePadding. - """ - if image_size is None: - return MaxPool2dDynamicSamePadding - else: - return partial(MaxPool2dStaticSamePadding, image_size=image_size) - - -class MaxPool2dDynamicSamePadding(nn.MaxPool2d): - """2D MaxPooling like TensorFlow's 'SAME' mode, with a dynamic image size. - The padding is operated in forward function by calculating dynamically. - """ - - def __init__(self, kernel_size, stride, padding=0, dilation=1, return_indices=False, ceil_mode=False): - super().__init__(kernel_size, stride, padding, dilation, return_indices, ceil_mode) - self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride - self.kernel_size = [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size - self.dilation = [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation - - def forward(self, x): - ih, iw = x.size()[-2:] - kh, kw = self.kernel_size - sh, sw = self.stride - oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) - pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) - pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) - if pad_h > 0 or pad_w > 0: - x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) - return F.max_pool2d(x, self.kernel_size, self.stride, self.padding, - self.dilation, self.ceil_mode, self.return_indices) - -class MaxPool2dStaticSamePadding(nn.MaxPool2d): - """2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size. - The padding mudule is calculated in construction function, then used in forward. - """ - - def __init__(self, kernel_size, stride, image_size=None, **kwargs): - super().__init__(kernel_size, stride, **kwargs) - self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride - self.kernel_size = [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size - self.dilation = [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation - - # Calculate padding based on image size and save it - assert image_size is not None - ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size - kh, kw = self.kernel_size - sh, sw = self.stride - oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) - pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) - pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) - if pad_h > 0 or pad_w > 0: - self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)) - else: - self.static_padding = Identity() - - def forward(self, x): - x = self.static_padding(x) - x = F.max_pool2d(x, self.kernel_size, self.stride, self.padding, - self.dilation, self.ceil_mode, self.return_indices) - return x - -class Identity(nn.Module): - """Identity mapping. - Send input to output directly. - """ - - def __init__(self): - super(Identity, self).__init__() - - def forward(self, input): - return input - - -################################################################################ -### Helper functions for loading model params -################################################################################ - -# BlockDecoder: A Class for encoding and decoding BlockArgs -# efficientnet_params: A function to query compound coefficient -# get_model_params and efficientnet: -# Functions to get BlockArgs and GlobalParams for efficientnet -# url_map and url_map_advprop: Dicts of url_map for pretrained weights -# load_pretrained_weights: A function to load pretrained weights - -class BlockDecoder(object): - """Block Decoder for readability, - straight from the official TensorFlow repository. - """ - - @staticmethod - def _decode_block_string(block_string): - """Get a block through a string notation of arguments. - - Args: - block_string (str): A string notation of arguments. - Examples: 'r1_k3_s11_e1_i32_o16_se0.25_noskip'. - - Returns: - BlockArgs: The namedtuple defined at the top of this file. - """ - assert isinstance(block_string, str) - - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value - - # Check stride - assert (('s' in options and len(options['s']) == 1) or - (len(options['s']) == 2 and options['s'][0] == options['s'][1])) - - return BlockArgs( - num_repeat=int(options['r']), - kernel_size=int(options['k']), - stride=[int(options['s'][0])], - expand_ratio=int(options['e']), - input_filters=int(options['i']), - output_filters=int(options['o']), - se_ratio=float(options['se']) if 'se' in options else None, - id_skip=('noskip' not in block_string)) - - @staticmethod - def _encode_block_string(block): - """Encode a block to a string. - - Args: - block (namedtuple): A BlockArgs type argument. - - Returns: - block_string: A String form of BlockArgs. - """ - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) - - @staticmethod - def decode(string_list): - """Decode a list of string notations to specify blocks inside the network. - - Args: - string_list (list[str]): A list of strings, each string is a notation of block. - - Returns: - blocks_args: A list of BlockArgs namedtuples of block args. - """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args): - """Encode a list of BlockArgs to a list of strings. - - Args: - blocks_args (list[namedtuples]): A list of BlockArgs namedtuples of block args. - - Returns: - block_strings: A list of strings, each string is a notation of block. - """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def efficientnet_params(model_name): - """Map EfficientNet model name to parameter coefficients. - - Args: - model_name (str): Model name to be queried. - - Returns: - params_dict[model_name]: A (width,depth,res,dropout) tuple. - """ - params_dict = { - # Coefficients: width,depth,res,dropout - 'efficientnet-b0': (1.0, 1.0, 224, 0.2), - 'efficientnet-b1': (1.0, 1.1, 240, 0.2), - 'efficientnet-b2': (1.1, 1.2, 260, 0.3), - 'efficientnet-b3': (1.2, 1.4, 300, 0.3), - 'efficientnet-b4': (1.4, 1.8, 380, 0.4), - 'efficientnet-b5': (1.6, 2.2, 456, 0.4), - 'efficientnet-b6': (1.8, 2.6, 528, 0.5), - 'efficientnet-b7': (2.0, 3.1, 600, 0.5), - 'efficientnet-b8': (2.2, 3.6, 672, 0.5), - 'efficientnet-l2': (4.3, 5.3, 800, 0.5), - } - return params_dict[model_name] - - -def efficientnet(width_coefficient=None, depth_coefficient=None, image_size=None, - dropout_rate=0.2, drop_connect_rate=0.2, num_classes=1000): - """Create BlockArgs and GlobalParams for efficientnet model. - - Args: - width_coefficient (float) - depth_coefficient (float) - image_size (int) - dropout_rate (float) - drop_connect_rate (float) - num_classes (int) - - Meaning as the name suggests. - - Returns: - blocks_args, global_params. - """ - - # Blocks args for the whole model(efficientnet-b0 by default) - # It will be modified in the construction of EfficientNet Class according to model - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams( - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - image_size=image_size, - dropout_rate=dropout_rate, - - num_classes=num_classes, - batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - drop_connect_rate=drop_connect_rate, - depth_divisor=8, - min_depth=None, - ) - - return blocks_args, global_params - - -def get_model_params(model_name, override_params): - """Get the block args and global params for a given model name. - - Args: - model_name (str): Model's name. - override_params (dict): A dict to modify global_params. - - Returns: - blocks_args, global_params - """ - if model_name.startswith('efficientnet'): - w, d, s, p = efficientnet_params(model_name) - # note: all models have drop connect rate = 0.2 - blocks_args, global_params = efficientnet( - width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - # ValueError will be raised here if override_params has fields not included in global_params. - global_params = global_params._replace(**override_params) - return blocks_args, global_params - - -# train with Standard methods -# check more details in paper(EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks) -url_map = { - 'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth', - 'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth', - 'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth', - 'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth', - 'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth', - 'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth', - 'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth', - 'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth', -} - -# train with Adversarial Examples(AdvProp) -# check more details in paper(Adversarial Examples Improve Image Recognition) -url_map_advprop = { - 'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth', - 'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth', - 'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth', - 'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth', - 'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth', - 'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth', - 'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth', - 'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth', - 'efficientnet-b8': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth', -} - -# TODO: add the petrained weights url map of 'efficientnet-l2' - - -def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True, advprop=False): - """Loads pretrained weights from weights path or download using url. - - Args: - model (Module): The whole model of efficientnet. - model_name (str): Model name of efficientnet. - weights_path (None or str): - str: path to pretrained weights file on the local disk. - None: use pretrained weights downloaded from the Internet. - load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model. - advprop (bool): Whether to load pretrained weights - trained with advprop (valid when weights_path is None). - """ - if isinstance(weights_path,str): - state_dict = torch.load(weights_path, map_location='cpu') - else: - # AutoAugment or Advprop (different preprocessing) - url_map_ = url_map_advprop if advprop else url_map - state_dict = model_zoo.load_url(url_map_[model_name]) - if 'state_dict' in state_dict: - state_dict = state_dict['state_dict'] - if 'module.' in list(state_dict.keys())[0]: - state_dict_tmp = dict() - for k, v in state_dict.items(): - state_dict_tmp[k[7:]] = v - state_dict = state_dict_tmp - - if load_fc: - ret = model.load_state_dict(state_dict, strict=False) - assert not ret.missing_keys, f'Missing keys when loading pretrained weights: {ret.missing_keys}' - else: - state_dict.pop('_fc.weight') - state_dict.pop('_fc.bias') - ret = model.load_state_dict(state_dict, strict=False) - assert set(ret.missing_keys) == set( - ['_fc.weight', '_fc.bias']), f'Missing keys when loading pretrained weights: {ret.missing_keys}' - assert not ret.unexpected_keys, f'Missing keys when loading pretrained weights: {ret.unexpected_keys}' - - print('Loaded pretrained weights for {}'.format(model_name)) +# Copyright [yyyy] [name of copyright owner] +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import math +import collections +from functools import partial +import torch +from torch import nn +from torch.nn import functional as F +from torch.utils import model_zoo + +################################################################################ +### Help functions for model architecture +################################################################################ + +# GlobalParams and BlockArgs: Two namedtuples +# Swish and MemoryEfficientSwish: Two implementations of the method +# round_filters and round_repeats: +# Functions to calculate params for scaling model width and depth ! ! ! +# get_width_and_height_from_size and calculate_output_image_size +# drop_connect: A structural design +# get_same_padding_conv2d: +# Conv2dDynamicSamePadding +# Conv2dStaticSamePadding +# get_same_padding_maxPool2d: +# MaxPool2dDynamicSamePadding +# MaxPool2dStaticSamePadding +# It's an additional function, not used in EfficientNet, +# but can be used in other model (such as EfficientDet). +# Identity: An implementation of identical mapping + +# Parameters for the entire model (stem, all blocks, and head) +GlobalParams = collections.namedtuple('GlobalParams', [ + 'width_coefficient', 'depth_coefficient', 'image_size', 'dropout_rate', + 'num_classes', 'batch_norm_momentum', 'batch_norm_epsilon', + 'drop_connect_rate', 'depth_divisor', 'min_depth']) + +# Parameters for an individual model block +BlockArgs = collections.namedtuple('BlockArgs', [ + 'num_repeat', 'kernel_size', 'stride', 'expand_ratio', + 'input_filters', 'output_filters', 'se_ratio', 'id_skip']) + +# Set GlobalParams and BlockArgs's defaults +GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) + + +# An ordinary implementation of Swish function +class Swish(nn.Module): + def forward(self, x): + return x * torch.sigmoid(x) + +# A memory-efficient implementation of Swish function +class SwishImplementation(torch.autograd.Function): + @staticmethod + def forward(ctx, i): + result = i * torch.sigmoid(i) + ctx.save_for_backward(i) + return result + + @staticmethod + def backward(ctx, grad_output): + i = ctx.saved_tensors[0] + sigmoid_i = torch.sigmoid(i) + return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) + +class MemoryEfficientSwish(nn.Module): + def forward(self, x): + return SwishImplementation.apply(x) + + +def round_filters(filters, global_params): + """Calculate and round number of filters based on width multiplier. + Use width_coefficient, depth_divisor and min_depth of global_params. + + Args: + filters (int): Filters number to be calculated. + global_params (namedtuple): Global params of the model. + + Returns: + new_filters: New filters number after calculating. + """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + # TODO: modify the params names. + # maybe the names (width_divisor,min_width) + # are more suitable than (depth_divisor,min_depth). + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor # pay attention to this line when using min_depth + # follow the formula transferred from official TensorFlow implementation + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats, global_params): + """Calculate module's repeat number of a block based on depth multiplier. + Use depth_coefficient of global_params. + + Args: + repeats (int): num_repeat to be calculated. + global_params (namedtuple): Global params of the model. + + Returns: + new repeat: New repeat number after calculating. + """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + # follow the formula transferred from official TensorFlow implementation + return int(math.ceil(multiplier * repeats)) + + +def drop_connect(inputs, p, training): + """Drop connect. + + Args: + inputs (tensor: BCWH): Input of this structure. + p (float: 0.0~1.0): Probability of drop connection. + training (bool): The running mode. + + Returns: + output: Output after drop connection. + """ + assert p >= 0 and p <= 1, 'p must be in range of [0,1]' + + if not training: + return inputs + + batch_size = inputs.shape[0] + keep_prob = 1 - p + + # generate binary_tensor mask according to probability (p for 0, 1-p for 1) + random_tensor = keep_prob + random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device) + binary_tensor = torch.floor(random_tensor) / keep_prob + + output = inputs * binary_tensor + return output + + +def get_width_and_height_from_size(x): + """Obtain height and width from x. + + Args: + x (int, tuple or list): Data size. + + Returns: + size: A tuple or list (H,W). + """ + if isinstance(x, int): + return x, x + if isinstance(x, list) or isinstance(x, tuple): + return x + else: + raise TypeError() + + +def calculate_output_image_size(input_image_size, stride): + """Calculates the output image size when using Conv2dSamePadding with a stride. + Necessary for static padding. Thanks to mannatsingh for pointing this out. + + Args: + input_image_size (int, tuple or list): Size of input image. + stride (int, tuple or list): Conv2d operation's stride. + + Returns: + output_image_size: A list [H,W]. + """ + if input_image_size is None: + return None + image_height, image_width = get_width_and_height_from_size(input_image_size) + stride = stride if isinstance(stride, int) else stride[0] + image_height = int(math.ceil(image_height / stride)) + image_width = int(math.ceil(image_width / stride)) + return [image_height, image_width] + + +# Note: +# The following 'SamePadding' functions make output size equal ceil(input size/stride). +# Only when stride equals 1, can the output size be the same as input size. +# Don't be confused by their function names ! ! ! + +def get_same_padding_conv2d(image_size=None): + """Chooses static padding if you have specified an image size, and dynamic padding otherwise. + Static padding is necessary for ONNX exporting of models. + + Args: + image_size (int or tuple): Size of the image. + + Returns: + Conv2dDynamicSamePadding or Conv2dStaticSamePadding. + """ + if image_size is None: + return Conv2dDynamicSamePadding + else: + return partial(Conv2dStaticSamePadding, image_size=image_size) + + +class Conv2dDynamicSamePadding(nn.Conv2d): + """2D Convolutions like TensorFlow, for a dynamic image size. + The padding is operated in forward function by calculating dynamically. + """ + + # Tips for 'SAME' mode padding. + # Given the following: + # i: width or height + # s: stride + # k: kernel size + # d: dilation + # p: padding + # Output after Conv2d: + # o = floor((i+p-((k-1)*d+1))/s+1) + # If o equals i, i = floor((i+p-((k-1)*d+1))/s+1), + # => p = (i-1)*s+((k-1)*d+1)-i + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True): + super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) + self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 + + def forward(self, x): + ih, iw = x.size()[-2:] + kh, kw = self.weight.size()[-2:] + sh, sw = self.stride + oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) # change the output size according to stride ! ! ! + pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) + pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) + if pad_h > 0 or pad_w > 0: + x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) + return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) + + +class Conv2dStaticSamePadding(nn.Conv2d): + """2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size. + The padding mudule is calculated in construction function, then used in forward. + """ + + # With the same calculation as Conv2dDynamicSamePadding + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, image_size=None, **kwargs): + super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs) + self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 + + # Calculate padding based on image size and save it + assert image_size is not None + ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size + kh, kw = self.weight.size()[-2:] + sh, sw = self.stride + oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) + pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) + pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) + if pad_h > 0 or pad_w > 0: + self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)) + if kh % 2 != 0: + self.padding = (kh - 1) // 2 + else: + self.padding = kh // 2 + else: + self.static_padding = Identity() + + def forward(self, x): + x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) + return x + + +def get_same_padding_maxPool2d(image_size=None): + """Chooses static padding if you have specified an image size, and dynamic padding otherwise. + Static padding is necessary for ONNX exporting of models. + + Args: + image_size (int or tuple): Size of the image. + + Returns: + MaxPool2dDynamicSamePadding or MaxPool2dStaticSamePadding. + """ + if image_size is None: + return MaxPool2dDynamicSamePadding + else: + return partial(MaxPool2dStaticSamePadding, image_size=image_size) + + +class MaxPool2dDynamicSamePadding(nn.MaxPool2d): + """2D MaxPooling like TensorFlow's 'SAME' mode, with a dynamic image size. + The padding is operated in forward function by calculating dynamically. + """ + + def __init__(self, kernel_size, stride, padding=0, dilation=1, return_indices=False, ceil_mode=False): + super().__init__(kernel_size, stride, padding, dilation, return_indices, ceil_mode) + self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride + self.kernel_size = [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size + self.dilation = [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation + + def forward(self, x): + ih, iw = x.size()[-2:] + kh, kw = self.kernel_size + sh, sw = self.stride + oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) + pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) + pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) + if pad_h > 0 or pad_w > 0: + x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) + return F.max_pool2d(x, self.kernel_size, self.stride, self.padding, + self.dilation, self.ceil_mode, self.return_indices) + +class MaxPool2dStaticSamePadding(nn.MaxPool2d): + """2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size. + The padding mudule is calculated in construction function, then used in forward. + """ + + def __init__(self, kernel_size, stride, image_size=None, **kwargs): + super().__init__(kernel_size, stride, **kwargs) + self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride + self.kernel_size = [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size + self.dilation = [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation + + # Calculate padding based on image size and save it + assert image_size is not None + ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size + kh, kw = self.kernel_size + sh, sw = self.stride + oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) + pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) + pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) + if pad_h > 0 or pad_w > 0: + self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)) + else: + self.static_padding = Identity() + + def forward(self, x): + x = self.static_padding(x) + x = F.max_pool2d(x, self.kernel_size, self.stride, self.padding, + self.dilation, self.ceil_mode, self.return_indices) + return x + +class Identity(nn.Module): + """Identity mapping. + Send input to output directly. + """ + + def __init__(self): + super(Identity, self).__init__() + + def forward(self, input): + return input + + +################################################################################ +### Helper functions for loading model params +################################################################################ + +# BlockDecoder: A Class for encoding and decoding BlockArgs +# efficientnet_params: A function to query compound coefficient +# get_model_params and efficientnet: +# Functions to get BlockArgs and GlobalParams for efficientnet +# url_map and url_map_advprop: Dicts of url_map for pretrained weights +# load_pretrained_weights: A function to load pretrained weights + +class BlockDecoder(object): + """Block Decoder for readability, + straight from the official TensorFlow repository. + """ + + @staticmethod + def _decode_block_string(block_string): + """Get a block through a string notation of arguments. + + Args: + block_string (str): A string notation of arguments. + Examples: 'r1_k3_s11_e1_i32_o16_se0.25_noskip'. + + Returns: + BlockArgs: The namedtuple defined at the top of this file. + """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + assert (('s' in options and len(options['s']) == 1) or + (len(options['s']) == 2 and options['s'][0] == options['s'][1])) + + return BlockArgs( + num_repeat=int(options['r']), + kernel_size=int(options['k']), + stride=[int(options['s'][0])], + expand_ratio=int(options['e']), + input_filters=int(options['i']), + output_filters=int(options['o']), + se_ratio=float(options['se']) if 'se' in options else None, + id_skip=('noskip' not in block_string)) + + @staticmethod + def _encode_block_string(block): + """Encode a block to a string. + + Args: + block (namedtuple): A BlockArgs type argument. + + Returns: + block_string: A String form of BlockArgs. + """ + args = [ + 'r%d' % block.num_repeat, + 'k%d' % block.kernel_size, + 's%d%d' % (block.strides[0], block.strides[1]), + 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, + 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list): + """Decode a list of string notations to specify blocks inside the network. + + Args: + string_list (list[str]): A list of strings, each string is a notation of block. + + Returns: + blocks_args: A list of BlockArgs namedtuples of block args. + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args): + """Encode a list of BlockArgs to a list of strings. + + Args: + blocks_args (list[namedtuples]): A list of BlockArgs namedtuples of block args. + + Returns: + block_strings: A list of strings, each string is a notation of block. + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def efficientnet_params(model_name): + """Map EfficientNet model name to parameter coefficients. + + Args: + model_name (str): Model name to be queried. + + Returns: + params_dict[model_name]: A (width,depth,res,dropout) tuple. + """ + params_dict = { + # Coefficients: width,depth,res,dropout + 'efficientnet-b0': (1.0, 1.0, 224, 0.2), + 'efficientnet-b1': (1.0, 1.1, 240, 0.2), + 'efficientnet-b2': (1.1, 1.2, 260, 0.3), + 'efficientnet-b3': (1.2, 1.4, 300, 0.3), + 'efficientnet-b4': (1.4, 1.8, 380, 0.4), + 'efficientnet-b5': (1.6, 2.2, 456, 0.4), + 'efficientnet-b6': (1.8, 2.6, 528, 0.5), + 'efficientnet-b7': (2.0, 3.1, 600, 0.5), + 'efficientnet-b8': (2.2, 3.6, 672, 0.5), + 'efficientnet-l2': (4.3, 5.3, 800, 0.5), + } + return params_dict[model_name] + + +def efficientnet(width_coefficient=None, depth_coefficient=None, image_size=None, + dropout_rate=0.2, drop_connect_rate=0.2, num_classes=1000): + """Create BlockArgs and GlobalParams for efficientnet model. + + Args: + width_coefficient (float) + depth_coefficient (float) + image_size (int) + dropout_rate (float) + drop_connect_rate (float) + num_classes (int) + + Meaning as the name suggests. + + Returns: + blocks_args, global_params. + """ + + # Blocks args for the whole model(efficientnet-b0 by default) + # It will be modified in the construction of EfficientNet Class according to model + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', + 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', + 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', + 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + image_size=image_size, + dropout_rate=dropout_rate, + + num_classes=num_classes, + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + drop_connect_rate=drop_connect_rate, + depth_divisor=8, + min_depth=None, + ) + + return blocks_args, global_params + + +def get_model_params(model_name, override_params): + """Get the block args and global params for a given model name. + + Args: + model_name (str): Model's name. + override_params (dict): A dict to modify global_params. + + Returns: + blocks_args, global_params + """ + if model_name.startswith('efficientnet'): + w, d, s, p = efficientnet_params(model_name) + # note: all models have drop connect rate = 0.2 + blocks_args, global_params = efficientnet( + width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s) + else: + raise NotImplementedError('model name is not pre-defined: %s' % model_name) + if override_params: + # ValueError will be raised here if override_params has fields not included in global_params. + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +# train with Standard methods +# check more details in paper(EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks) +url_map = { + 'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth', + 'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth', + 'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth', + 'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth', + 'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth', + 'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth', + 'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth', + 'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth', +} + +# train with Adversarial Examples(AdvProp) +# check more details in paper(Adversarial Examples Improve Image Recognition) +url_map_advprop = { + 'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth', + 'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth', + 'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth', + 'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth', + 'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth', + 'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth', + 'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth', + 'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth', + 'efficientnet-b8': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth', +} + +# TODO: add the petrained weights url map of 'efficientnet-l2' + + +def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True, advprop=False): + """Loads pretrained weights from weights path or download using url. + + Args: + model (Module): The whole model of efficientnet. + model_name (str): Model name of efficientnet. + weights_path (None or str): + str: path to pretrained weights file on the local disk. + None: use pretrained weights downloaded from the Internet. + load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model. + advprop (bool): Whether to load pretrained weights + trained with advprop (valid when weights_path is None). + """ + if isinstance(weights_path,str): + state_dict = torch.load(weights_path, map_location='cpu') + else: + # AutoAugment or Advprop (different preprocessing) + url_map_ = url_map_advprop if advprop else url_map + state_dict = model_zoo.load_url(url_map_[model_name]) + if 'state_dict' in state_dict: + state_dict = state_dict['state_dict'] + if 'module.' in list(state_dict.keys())[0]: + state_dict_tmp = dict() + for k, v in state_dict.items(): + state_dict_tmp[k[7:]] = v + state_dict = state_dict_tmp + + if load_fc: + ret = model.load_state_dict(state_dict, strict=False) + assert not ret.missing_keys, f'Missing keys when loading pretrained weights: {ret.missing_keys}' + else: + state_dict.pop('_fc.weight') + state_dict.pop('_fc.bias') + ret = model.load_state_dict(state_dict, strict=False) + assert set(ret.missing_keys) == set( + ['_fc.weight', '_fc.bias']), f'Missing keys when loading pretrained weights: {ret.missing_keys}' + assert not ret.unexpected_keys, f'Missing keys when loading pretrained weights: {ret.unexpected_keys}' + + print('Loaded pretrained weights for {}'.format(model_name)) diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/examples/imagenet/README.md b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/examples/imagenet/README.md index 8f7f89b212a6165af308d60ccbc8d445facc5a33..fcafce33a6d3915a353eae374b55e72a3c1cc143 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/examples/imagenet/README.md +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/examples/imagenet/README.md @@ -1,23 +1,23 @@ -### Imagenet - -This is a preliminary directory for evaluating the model on ImageNet. It is adapted from the standard PyTorch Imagenet script. - -For now, only evaluation is supported, but I am currently building scripts to assist with training new models on Imagenet. - -The evaluation results are slightly different from the original TensorFlow repository, due to differences in data preprocessing. For example, with the current preprocessing, `efficientnet-b3` gives a top-1 accuracy of `80.8`, rather than `81.1` in the paper. I am working on porting the TensorFlow preprocessing into PyTorch to address this issue. - -To run on Imagenet, place your `train` and `val` directories in `data`. - -Example commands: -```bash -# Evaluate small EfficientNet on CPU -python main.py data -e -a 'efficientnet-b0' --pretrained -``` -```bash -# Evaluate medium EfficientNet on GPU -python main.py data -e -a 'efficientnet-b3' --pretrained --gpu 0 --batch-size 128 -``` -```bash -# Evaluate ResNet-50 for comparison -python main.py data -e -a 'resnet50' --pretrained --gpu 0 -``` +### Imagenet + +This is a preliminary directory for evaluating the model on ImageNet. It is adapted from the standard PyTorch Imagenet script. + +For now, only evaluation is supported, but I am currently building scripts to assist with training new models on Imagenet. + +The evaluation results are slightly different from the original TensorFlow repository, due to differences in data preprocessing. For example, with the current preprocessing, `efficientnet-b3` gives a top-1 accuracy of `80.8`, rather than `81.1` in the paper. I am working on porting the TensorFlow preprocessing into PyTorch to address this issue. + +To run on Imagenet, place your `train` and `val` directories in `data`. + +Example commands: +```bash +# Evaluate small EfficientNet on CPU +python main.py data -e -a 'efficientnet-b0' --pretrained +``` +```bash +# Evaluate medium EfficientNet on GPU +python main.py data -e -a 'efficientnet-b3' --pretrained --gpu 0 --batch-size 128 +``` +```bash +# Evaluate ResNet-50 for comparison +python main.py data -e -a 'resnet50' --pretrained --gpu 0 +``` diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/examples/imagenet/data/README.md b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/examples/imagenet/data/README.md index 21a3317e9c7dc0e0e48f1e21eaee091a57eaabd5..310c6e0df88a16c4fc922b00adb50841f711080d 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/examples/imagenet/data/README.md +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/examples/imagenet/data/README.md @@ -1,5 +1,5 @@ -### ImageNet - -Download ImageNet and place it into `train` and `val` folders here. - -More details may be found with the official PyTorch ImageNet example [here](https://github.com/pytorch/examples/blob/master/imagenet). +### ImageNet + +Download ImageNet and place it into `train` and `val` folders here. + +More details may be found with the official PyTorch ImageNet example [here](https://github.com/pytorch/examples/blob/master/imagenet). diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/examples/imagenet/main.py b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/examples/imagenet/main.py index d8343ad748952ac5faefb4dfde02761b7f5a66cc..79ebf12fe7aa7b6dcc5a77d82c0c7da475383d17 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/examples/imagenet/main.py +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/examples/imagenet/main.py @@ -1,545 +1,545 @@ -# Copyright [yyyy] [name of copyright owner] -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import re -import sys -import time -import PIL -import numpy as np - -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.distributed as dist -import torch.optim -import torch.multiprocessing as mp -import torch.utils.data -import torch.utils.data.distributed -import torchvision.transforms as transforms -import torchvision.datasets as datasets -import torchvision.models as models -import apex -from apex import amp - -sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),'../../')) -from efficientnet_pytorch import EfficientNet -from efficientnet_pytorch import rand_augment_transform, augment_and_mix_transform, auto_augment_transform - -parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') -parser.add_argument('--data', metavar='DIR', - help='path to dataset') -parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', - help='model architecture (default: resnet18)') -parser.add_argument('-j', '--workers', default=64, type=int, metavar='N', - help='number of data loading workers (default: 4)') -parser.add_argument('--epochs', default=90, type=int, metavar='N', - help='number of total epochs to run') -parser.add_argument('--start-epoch', default=0, type=int, metavar='N', - help='manual epoch number (useful on restarts)') -parser.add_argument('-b', '--batch-size', default=256, type=int, - metavar='N', - help='mini-batch size (default: 256), this is the total ' - 'batch size of all GPUs on the current node when ' - 'using Data Parallel or Distributed Data Parallel') -parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, - metavar='LR', help='initial learning rate', dest='lr') -parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') -parser.add_argument('--wd', '--weight-decay', default=1e-5, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') -parser.add_argument('-p', '--print-freq', default=10, type=int, - metavar='N', help='print frequency (default: 10)') -parser.add_argument('--resume', default='', type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', - help='evaluate model on validation set') -parser.add_argument('--pretrained', dest='pretrained', action='store_true', - help='use pre-trained model') -parser.add_argument('--pretrained_weight', default='', type=str, metavar='PATH', - help='path to pretrained weight') -parser.add_argument('--num_classes', default=1000, type=int, - help='number of class') -parser.add_argument('--world-size', default=-1, type=int, - help='number of nodes for distributed training') -parser.add_argument('--rank', default=-1, type=int, - help='node rank for distributed training') -parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, - help='url used to set up distributed training') -parser.add_argument('--dist-backend', default='hccl', type=str, - help='distributed backend') -parser.add_argument('--seed', default=None, type=int, - help='seed for initializing training. ') -parser.add_argument('--npu', default=None, type=str, - help='npu id to use.') -parser.add_argument('--image_size', default=224, type=int, - help='image size') -parser.add_argument('--advprop', default=False, action='store_true', - help='use advprop or not') -parser.add_argument('--multiprocessing-distributed', action='store_true', - help='Use multi-processing distributed training to launch ' - 'N processes per node, which has N GPUs. This is the ' - 'fastest way to use PyTorch for either single node or ' - 'multi node data parallel training') -parser.add_argument('--autoaug', action='store_true', help='use auto augment') -parser.add_argument('--amp', action='store_true', help='use apex') -parser.add_argument('--pm', '--precision-mode', default='O1', type=str, - help='precision mode to use for mix precision, only support O1, O2') -parser.add_argument('--loss_scale', default=1024, type=int, help='loss_scale for amp') -parser.add_argument('--addr', default='127.0.0.1', type=str, - help='npu id to use.') -parser.add_argument('--nnpus_per_node', default=None, type=int, - help='number of npus to use for distributed train on each node') -parser.add_argument('--val_feq', default=10, type=int, - help='validation frequency') -parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', type=str, help='device id list') -parser.add_argument('--stop-step-num', default=None, type=int, - help='after the stop-step, killing the training task') -cur_step = 0 - -# for servers to immediately record the logs -#def flush_print(func): - #def new_print(*args, **kwargs): - #func(*args, **kwargs) - #sys.stdout.flush() - #return new_print -#print = flush_print(print) - -def device_id_to_process_device_map(device_list): - devices = device_list.split(",") - devices = [int(x) for x in devices] - devices.sort() - - process_device_map = dict() - for process_id, device_id in enumerate(devices): - process_device_map[process_id] = device_id - - return process_device_map - - -def main(): - args = parser.parse_args() - - if args.dist_url == "env://" and args.world_size == -1: - args.world_size = int(os.environ["WORLD_SIZE"]) - - args.distributed = args.world_size > 1 or args.multiprocessing_distributed - - args.process_device_map = device_id_to_process_device_map(args.device_list) - nnpus_per_node = len(args.process_device_map) - - - if args.multiprocessing_distributed: - # Since we have ngpus_per_node processes per node, the total world_size - # needs to be adjusted accordingly - args.world_size = nnpus_per_node * args.world_size - # Use torch.multiprocessing.spawn to launch distributed processes: the - # main_worker process function - os.environ['MASTER_ADDR'] = args.addr - os.environ['MASTER_PORT'] = '29688' - mp.spawn(main_worker, nprocs=nnpus_per_node, args=(nnpus_per_node, args)) - else: - # Simply call main_worker function - main_worker(args.npu, nnpus_per_node, args) - -def main_worker(npu, nnpus_per_node, args): - args.npu = npu - global cur_step - if args.distributed: - args.npu = args.process_device_map[npu] - - if args.npu is not None: - print("Use npu: {} for training".format(args.npu)) - torch.npu.set_device('npu:' + str(args.npu)) - - if args.distributed: - if args.dist_url == "env://" and args.rank == -1: - args.rank = int(os.environ["RANK"]) - if args.multiprocessing_distributed: - # For multiprocessing distributed training, rank needs to be the - # global rank among all the processes - args.rank = args.rank * nnpus_per_node + int(npu) - - dist.init_process_group(backend=args.dist_backend, - world_size=args.world_size, rank=args.rank) - # create model - if 'efficientnet' in args.arch: # NEW - if args.pretrained: - model = EfficientNet.from_pretrained(args.arch, advprop=args.advprop, weights_path=args.pretrained_weight, num_classes=args.num_classes) - print("=> using pre-trained model '{}'".format(args.arch)) - else: - print("=> creating model '{}'".format(args.arch)) - model = EfficientNet.from_name(args.arch) - - else: - if args.pretrained: - print("=> using pre-trained model '{}'".format(args.arch)) - model = models.__dict__[args.arch](pretrained=True) - else: - print("=> creating model '{}'".format(args.arch)) - model = models.__dict__[args.arch]() - - criterion = nn.CrossEntropyLoss().to('npu:' + str(args.npu)) - - optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), args.lr, - momentum=args.momentum, - weight_decay=args.weight_decay) - model = model.to('npu:' + str(args.npu)) - if args.amp: - print("=> use amp...") - if args.pm not in ['O1', 'O2']: - print('=>unsupported precision mode!') - exit() - opt_level = args.pm - model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level, loss_scale=args.loss_scale,combine_grad=True) - - global total_batch_size - total_batch_size = args.batch_size - if args.distributed: - args.batch_size = int(args.batch_size / nnpus_per_node) - args.workers = int(args.workers / nnpus_per_node) - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.npu], broadcast_buffers=False) - - - - # optionally resume from a checkpoint - if args.resume: - if os.path.isfile(args.resume): - print("=> loading checkpoint '{}'".format(args.resume)) - checkpoint = torch.load(args.resume, map_location='npu:' + str(args.npu)) - args.start_epoch = checkpoint['epoch'] - if args.amp: - amp.load_state_dict(checkpoint['amp']) - model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - print("=> loaded checkpoint '{}' (epoch {})" - .format(args.resume, checkpoint['epoch'])) - else: - print("=> no checkpoint found at '{}'".format(args.resume)) - - # Data loading code - traindir = os.path.join(args.data, 'train') - valdir = os.path.join(args.data, 'val') - if args.advprop: - normalize = transforms.Lambda(lambda img: img * 2.0 - 1.0) - else: - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - if 'efficientnet' in args.arch: - image_size = EfficientNet.get_image_size(args.arch) - else: - image_size = args.image_size - - if args.autoaug: - print("=> use auto augment...") - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(image_size), - auto_augment_wrapper(image_size), - transforms.ToTensor(), - normalize, - ])) - else: - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(image_size), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) - else: - train_sampler = None - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), - num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) - - val_transforms = transforms.Compose([ - transforms.Resize(image_size, interpolation=PIL.Image.BICUBIC), - transforms.CenterCrop(image_size), - transforms.ToTensor(), - normalize, - ]) - print('npu:' + str(args.npu), ' optimizer params:', optimizer) - - val_loader = torch.utils.data.DataLoader( - datasets.ImageFolder(valdir, val_transforms), - batch_size=args.batch_size, shuffle=False, - num_workers=args.workers, pin_memory=True) - - if args.evaluate: - res = validate(val_loader, model, criterion, args, nnpus_per_node) - with open('res.txt', 'w') as f: - print(res, file=f) - return - - for epoch in range(args.start_epoch, args.epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - - # train for one epoch - train(train_loader, model, criterion, optimizer, epoch, args, nnpus_per_node) - - # evaluate on validation set - if epoch % args.val_feq == 0 or epoch == args.epochs - 1: - validate(val_loader, model, criterion, args, nnpus_per_node) - - if epoch == args.epochs - 1: - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % nnpus_per_node == 0): - if not args.amp: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': args.arch, - 'state_dict': model.state_dict(), - 'optimizer': optimizer.state_dict(), - }) - else: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': args.arch, - 'state_dict': model.state_dict(), - 'optimizer': optimizer.state_dict(), - 'amp': amp.state_dict(), - }) - if args.stop_step_num is not None and cur_step >= args.stop_step_num: - break - -def train(train_loader, model, criterion, optimizer, epoch, args, nnpus_per_node): - global cur_step - batch_time = AverageMeter('Time', ':6.3f') - data_time = AverageMeter('Data', ':6.3f') - losses = AverageMeter('Loss', ':6.4f') - lr = AverageMeter('LR', ':6.4f') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - fps_time = AverageMeter('FPS', ':6.1f') - progress = ProgressMeter(len(train_loader), fps_time, batch_time, data_time, losses, lr, top1, - top5, prefix="Epoch: [{}]".format(epoch)) - - # switch to train mode - model.train() - - end = time.time() - step_per_epoch = len(train_loader) - for i, (images, target) in enumerate(train_loader): - if i > 100: - pass - cur_step = epoch * step_per_epoch + i - adjust_learning_rate_fraction_epoch(optimizer, epoch, args) - - # measure data loading time - data_time.update(time.time() - end) - - optimizer.zero_grad() - - target = target.int() - images, target = images.to('npu:' + str(args.npu), non_blocking=True), target.to('npu:' + str(args.npu), non_blocking=True) - - # compute output - output = model(images) - - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - - losses.update(loss.item(), images.size(0)) - lr.update(optimizer.param_groups[0]['lr'], images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - # compute gradient and do SGD step - - if args.amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - - # measure elapsed time - fps_time.update(total_batch_size / (time.time() - end)) - batch_time.update(time.time() - end) - end = time.time() - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % nnpus_per_node == 0): - progress.print(i) - if args.stop_step_num is not None and cur_step >= args.stop_step_num: - break - - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % nnpus_per_node == 0): - fps = str(fps_time) - p1 = re.compile(r'[(](.*?)[)]', re.S) - FPS = re.findall(p1, fps)[0] - print(' * FPS@all {}'.format(FPS)) - -def validate(val_loader, model, criterion, args, nnpus_per_node): - batch_time = AverageMeter('Time', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5, - prefix='Test: ') - - # switch to evaluate mode - model.eval() - - with torch.no_grad(): - end = time.time() - for i, (images, target) in enumerate(val_loader): - if i > 10: - pass - target = target.int() - images, target = images.to('npu:' + str(args.npu), non_blocking=True), target.to('npu:' + str(args.npu), non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % nnpus_per_node == 0): - progress.print(i) - - # TODO: this should also be done with the ProgressMeter - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % nnpus_per_node == 0): - - print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' - .format(top1=top1, top5=top5)) - - - return top1.avg - - -def save_checkpoint(state, filename='checkpoint.pth'): - torch.save(state, filename) - - -class AverageMeter(object): - """Computes and stores the average and current value""" - def __init__(self, name, fmt=':f'): - self.name = name - self.fmt = fmt - self.reset() - self.skip = 0 - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - self.skip = 0 - - def update(self, val, n=1): - self.val = val - # the first 5 value are not accumulated in the average stats - self.skip += 1 - if self.skip < 5: - return - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - - -class ProgressMeter(object): - def __init__(self, num_batches, *meters, prefix=""): - self.batch_fmtstr = self._get_batch_fmtstr(num_batches) - self.meters = meters - self.prefix = prefix - - def print(self, batch): - entries = [self.prefix + self.batch_fmtstr.format(batch)] - entries += [str(meter) for meter in self.meters] - print('\t'.join(entries)) - - def _get_batch_fmtstr(self, num_batches): - num_digits = len(str(num_batches // 1)) - fmt = '{:' + str(num_digits) + 'd}' - return '[' + fmt + '/' + fmt.format(num_batches) + ']' - - -def adjust_learning_rate(optimizer, epoch, args): - """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" - lr = args.lr * (0.1 ** (epoch // 30)) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - -def auto_augment_wrapper(img_size, auto_augment='original-mstd0.5'): - IMAGENET_DEFAULT_MEAN = [0.485, 0.456, 0.406] - assert isinstance(auto_augment, str) - aa_params = dict( - translate_const=int(img_size * 0.45), - img_mean=tuple([min(255, round(255 * x)) for x in IMAGENET_DEFAULT_MEAN]), - ) - if auto_augment.startswith('rand'): - return rand_augment_transform(auto_augment, aa_params) - elif auto_augment.startswith('augmix'): - aa_params['translate_pct'] = 0.3 - return augment_and_mix_transform(auto_augment, aa_params) - else: - return auto_augment_transform(auto_augment, aa_params) - -def adjust_learning_rate_fraction_epoch(optimizer, epoch, args): - """Use the epoch cosine schedule""" - - alpha = 0 - cosine_decay = 0.5 * (1 + np.cos(np.pi * epoch / args.epochs)) - decayed = (1 - alpha) * cosine_decay + alpha - lr = args.lr * decayed - for param_group in optimizer.param_groups: - param_group['lr'] = lr - -if __name__ == '__main__': - main() +# Copyright [yyyy] [name of copyright owner] +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import re +import sys +import time +import PIL +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.distributed as dist +import torch.optim +import torch.multiprocessing as mp +import torch.utils.data +import torch.utils.data.distributed +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import torchvision.models as models +import apex +from apex import amp + +sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),'../../')) +from efficientnet_pytorch import EfficientNet +from efficientnet_pytorch import rand_augment_transform, augment_and_mix_transform, auto_augment_transform + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument('--data', metavar='DIR', + help='path to dataset') +parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', + help='model architecture (default: resnet18)') +parser.add_argument('-j', '--workers', default=64, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('--epochs', default=90, type=int, metavar='N', + help='number of total epochs to run') +parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('-b', '--batch-size', default=256, type=int, + metavar='N', + help='mini-batch size (default: 256), this is the total ' + 'batch size of all GPUs on the current node when ' + 'using Data Parallel or Distributed Data Parallel') +parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, + metavar='LR', help='initial learning rate', dest='lr') +parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') +parser.add_argument('--wd', '--weight-decay', default=1e-5, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') +parser.add_argument('-p', '--print-freq', default=10, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', + help='evaluate model on validation set') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--pretrained_weight', default='', type=str, metavar='PATH', + help='path to pretrained weight') +parser.add_argument('--num_classes', default=1000, type=int, + help='number of class') +parser.add_argument('--world-size', default=-1, type=int, + help='number of nodes for distributed training') +parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') +parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='hccl', type=str, + help='distributed backend') +parser.add_argument('--seed', default=None, type=int, + help='seed for initializing training. ') +parser.add_argument('--npu', default=None, type=str, + help='npu id to use.') +parser.add_argument('--image_size', default=224, type=int, + help='image size') +parser.add_argument('--advprop', default=False, action='store_true', + help='use advprop or not') +parser.add_argument('--multiprocessing-distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') +parser.add_argument('--autoaug', action='store_true', help='use auto augment') +parser.add_argument('--amp', action='store_true', help='use apex') +parser.add_argument('--pm', '--precision-mode', default='O1', type=str, + help='precision mode to use for mix precision, only support O1, O2') +parser.add_argument('--loss_scale', default=1024, type=int, help='loss_scale for amp') +parser.add_argument('--addr', default='127.0.0.1', type=str, + help='npu id to use.') +parser.add_argument('--nnpus_per_node', default=None, type=int, + help='number of npus to use for distributed train on each node') +parser.add_argument('--val_feq', default=10, type=int, + help='validation frequency') +parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', type=str, help='device id list') +parser.add_argument('--stop-step-num', default=None, type=int, + help='after the stop-step, killing the training task') +cur_step = 0 + +# for servers to immediately record the logs +#def flush_print(func): + #def new_print(*args, **kwargs): + #func(*args, **kwargs) + #sys.stdout.flush() + #return new_print +#print = flush_print(print) + +def device_id_to_process_device_map(device_list): + devices = device_list.split(",") + devices = [int(x) for x in devices] + devices.sort() + + process_device_map = dict() + for process_id, device_id in enumerate(devices): + process_device_map[process_id] = device_id + + return process_device_map + + +def main(): + args = parser.parse_args() + + if args.dist_url == "env://" and args.world_size == -1: + args.world_size = int(os.environ["WORLD_SIZE"]) + + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + + args.process_device_map = device_id_to_process_device_map(args.device_list) + nnpus_per_node = len(args.process_device_map) + + + if args.multiprocessing_distributed: + # Since we have ngpus_per_node processes per node, the total world_size + # needs to be adjusted accordingly + args.world_size = nnpus_per_node * args.world_size + # Use torch.multiprocessing.spawn to launch distributed processes: the + # main_worker process function + os.environ['MASTER_ADDR'] = args.addr + os.environ['MASTER_PORT'] = '29688' + mp.spawn(main_worker, nprocs=nnpus_per_node, args=(nnpus_per_node, args)) + else: + # Simply call main_worker function + main_worker(args.npu, nnpus_per_node, args) + +def main_worker(npu, nnpus_per_node, args): + args.npu = npu + global cur_step + if args.distributed: + args.npu = args.process_device_map[npu] + + if args.npu is not None: + print("Use npu: {} for training".format(args.npu)) + torch.npu.set_device('npu:' + str(args.npu)) + + if args.distributed: + if args.dist_url == "env://" and args.rank == -1: + args.rank = int(os.environ["RANK"]) + if args.multiprocessing_distributed: + # For multiprocessing distributed training, rank needs to be the + # global rank among all the processes + args.rank = args.rank * nnpus_per_node + int(npu) + + dist.init_process_group(backend=args.dist_backend, + world_size=args.world_size, rank=args.rank) + # create model + if 'efficientnet' in args.arch: # NEW + if args.pretrained: + model = EfficientNet.from_pretrained(args.arch, advprop=args.advprop, weights_path=args.pretrained_weight, num_classes=args.num_classes) + print("=> using pre-trained model '{}'".format(args.arch)) + else: + print("=> creating model '{}'".format(args.arch)) + model = EfficientNet.from_name(args.arch) + + else: + if args.pretrained: + print("=> using pre-trained model '{}'".format(args.arch)) + model = models.__dict__[args.arch](pretrained=True) + else: + print("=> creating model '{}'".format(args.arch)) + model = models.__dict__[args.arch]() + + criterion = nn.CrossEntropyLoss().to('npu:' + str(args.npu)) + + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay) + model = model.to('npu:' + str(args.npu)) + if args.amp: + print("=> use amp...") + if args.pm not in ['O1', 'O2']: + print('=>unsupported precision mode!') + exit() + opt_level = args.pm + model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level, loss_scale=args.loss_scale,combine_grad=True) + + global total_batch_size + total_batch_size = args.batch_size + if args.distributed: + args.batch_size = int(args.batch_size / nnpus_per_node) + args.workers = int(args.workers / nnpus_per_node) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.npu], broadcast_buffers=False) + + + + # optionally resume from a checkpoint + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + checkpoint = torch.load(args.resume, map_location='npu:' + str(args.npu)) + args.start_epoch = checkpoint['epoch'] + if args.amp: + amp.load_state_dict(checkpoint['amp']) + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(args.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + + # Data loading code + traindir = os.path.join(args.data, 'train') + valdir = os.path.join(args.data, 'val') + if args.advprop: + normalize = transforms.Lambda(lambda img: img * 2.0 - 1.0) + else: + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + if 'efficientnet' in args.arch: + image_size = EfficientNet.get_image_size(args.arch) + else: + image_size = args.image_size + + if args.autoaug: + print("=> use auto augment...") + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(image_size), + auto_augment_wrapper(image_size), + transforms.ToTensor(), + normalize, + ])) + else: + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(image_size), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + else: + train_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), + num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) + + val_transforms = transforms.Compose([ + transforms.Resize(image_size, interpolation=PIL.Image.BICUBIC), + transforms.CenterCrop(image_size), + transforms.ToTensor(), + normalize, + ]) + print('npu:' + str(args.npu), ' optimizer params:', optimizer) + + val_loader = torch.utils.data.DataLoader( + datasets.ImageFolder(valdir, val_transforms), + batch_size=args.batch_size, shuffle=False, + num_workers=args.workers, pin_memory=True) + + if args.evaluate: + res = validate(val_loader, model, criterion, args, nnpus_per_node) + with open('res.txt', 'w') as f: + print(res, file=f) + return + + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + + # train for one epoch + train(train_loader, model, criterion, optimizer, epoch, args, nnpus_per_node) + + # evaluate on validation set + if epoch % args.val_feq == 0 or epoch == args.epochs - 1: + validate(val_loader, model, criterion, args, nnpus_per_node) + + if epoch == args.epochs - 1: + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % nnpus_per_node == 0): + if not args.amp: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': model.state_dict(), + 'optimizer': optimizer.state_dict(), + }) + else: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': model.state_dict(), + 'optimizer': optimizer.state_dict(), + 'amp': amp.state_dict(), + }) + if args.stop_step_num is not None and cur_step >= args.stop_step_num: + break + +def train(train_loader, model, criterion, optimizer, epoch, args, nnpus_per_node): + global cur_step + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + losses = AverageMeter('Loss', ':6.4f') + lr = AverageMeter('LR', ':6.4f') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + fps_time = AverageMeter('FPS', ':6.1f') + progress = ProgressMeter(len(train_loader), fps_time, batch_time, data_time, losses, lr, top1, + top5, prefix="Epoch: [{}]".format(epoch)) + + # switch to train mode + model.train() + + end = time.time() + step_per_epoch = len(train_loader) + for i, (images, target) in enumerate(train_loader): + if i > 100: + pass + cur_step = epoch * step_per_epoch + i + adjust_learning_rate_fraction_epoch(optimizer, epoch, args) + + # measure data loading time + data_time.update(time.time() - end) + + optimizer.zero_grad() + + target = target.int() + images, target = images.to('npu:' + str(args.npu), non_blocking=True), target.to('npu:' + str(args.npu), non_blocking=True) + + # compute output + output = model(images) + + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + + losses.update(loss.item(), images.size(0)) + lr.update(optimizer.param_groups[0]['lr'], images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + # compute gradient and do SGD step + + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + + # measure elapsed time + fps_time.update(total_batch_size / (time.time() - end)) + batch_time.update(time.time() - end) + end = time.time() + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % nnpus_per_node == 0): + progress.print(i) + if args.stop_step_num is not None and cur_step >= args.stop_step_num: + break + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % nnpus_per_node == 0): + fps = str(fps_time) + p1 = re.compile(r'[(](.*?)[)]', re.S) + FPS = re.findall(p1, fps)[0] + print(' * FPS@all {}'.format(FPS)) + +def validate(val_loader, model, criterion, args, nnpus_per_node): + batch_time = AverageMeter('Time', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5, + prefix='Test: ') + + # switch to evaluate mode + model.eval() + + with torch.no_grad(): + end = time.time() + for i, (images, target) in enumerate(val_loader): + if i > 10: + pass + target = target.int() + images, target = images.to('npu:' + str(args.npu), non_blocking=True), target.to('npu:' + str(args.npu), non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % nnpus_per_node == 0): + progress.print(i) + + # TODO: this should also be done with the ProgressMeter + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % nnpus_per_node == 0): + + print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' + .format(top1=top1, top5=top5)) + + + return top1.avg + + +def save_checkpoint(state, filename='checkpoint.pth'): + torch.save(state, filename) + + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self, name, fmt=':f'): + self.name = name + self.fmt = fmt + self.reset() + self.skip = 0 + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + self.skip = 0 + + def update(self, val, n=1): + self.val = val + # the first 5 value are not accumulated in the average stats + self.skip += 1 + if self.skip < 5: + return + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + + +class ProgressMeter(object): + def __init__(self, num_batches, *meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def print(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + + +def adjust_learning_rate(optimizer, epoch, args): + """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" + lr = args.lr * (0.1 ** (epoch // 30)) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + +def auto_augment_wrapper(img_size, auto_augment='original-mstd0.5'): + IMAGENET_DEFAULT_MEAN = [0.485, 0.456, 0.406] + assert isinstance(auto_augment, str) + aa_params = dict( + translate_const=int(img_size * 0.45), + img_mean=tuple([min(255, round(255 * x)) for x in IMAGENET_DEFAULT_MEAN]), + ) + if auto_augment.startswith('rand'): + return rand_augment_transform(auto_augment, aa_params) + elif auto_augment.startswith('augmix'): + aa_params['translate_pct'] = 0.3 + return augment_and_mix_transform(auto_augment, aa_params) + else: + return auto_augment_transform(auto_augment, aa_params) + +def adjust_learning_rate_fraction_epoch(optimizer, epoch, args): + """Use the epoch cosine schedule""" + + alpha = 0 + cosine_decay = 0.5 * (1 + np.cos(np.pi * epoch / args.epochs)) + decayed = (1 - alpha) * cosine_decay + alpha + lr = args.lr * decayed + for param_group in optimizer.param_groups: + param_group['lr'] = lr + +if __name__ == '__main__': + main() diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/hubconf.py b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/hubconf.py index 614a7b61a8d327c3a0a32203b15fe1e6ead4a8eb..bb4d80153d62f9c01d76101d713a921a285c3834 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/hubconf.py +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/hubconf.py @@ -1,58 +1,58 @@ -# Copyright [yyyy] [name of copyright owner] -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from efficientnet_pytorch import EfficientNet as _EfficientNet - -dependencies = ['torch'] - - -def _create_model_fn(model_name): - def _model_fn(num_classes=1000, in_channels=3, pretrained='imagenet'): - """Create Efficient Net. - - Described in detail here: https://arxiv.org/abs/1905.11946 - - Args: - num_classes (int, optional): Number of classes, default is 1000. - in_channels (int, optional): Number of input channels, default - is 3. - pretrained (str, optional): One of [None, 'imagenet', 'advprop'] - If None, no pretrained model is loaded. - If 'imagenet', models trained on imagenet dataset are loaded. - If 'advprop', models trained using adversarial training called - advprop are loaded. It is important to note that the - preprocessing required for the advprop pretrained models is - slightly different from normal ImageNet preprocessing - """ - model_name_ = model_name.replace('_', '-') - if pretrained is not None: - model = _EfficientNet.from_pretrained( - model_name=model_name_, - advprop=(pretrained == 'advprop'), - num_classes=num_classes, - in_channels=in_channels) - else: - model = _EfficientNet.from_name( - model_name=model_name_, - override_params={'num_classes': num_classes}, - ) - model._change_in_channels(in_channels) - - return model - - return _model_fn - -for model_name in ['efficientnet_b' + str(i) for i in range(9)]: - locals()[model_name] = _create_model_fn(model_name) +# Copyright [yyyy] [name of copyright owner] +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from efficientnet_pytorch import EfficientNet as _EfficientNet + +dependencies = ['torch'] + + +def _create_model_fn(model_name): + def _model_fn(num_classes=1000, in_channels=3, pretrained='imagenet'): + """Create Efficient Net. + + Described in detail here: https://arxiv.org/abs/1905.11946 + + Args: + num_classes (int, optional): Number of classes, default is 1000. + in_channels (int, optional): Number of input channels, default + is 3. + pretrained (str, optional): One of [None, 'imagenet', 'advprop'] + If None, no pretrained model is loaded. + If 'imagenet', models trained on imagenet dataset are loaded. + If 'advprop', models trained using adversarial training called + advprop are loaded. It is important to note that the + preprocessing required for the advprop pretrained models is + slightly different from normal ImageNet preprocessing + """ + model_name_ = model_name.replace('_', '-') + if pretrained is not None: + model = _EfficientNet.from_pretrained( + model_name=model_name_, + advprop=(pretrained == 'advprop'), + num_classes=num_classes, + in_channels=in_channels) + else: + model = _EfficientNet.from_name( + model_name=model_name_, + override_params={'num_classes': num_classes}, + ) + model._change_in_channels(in_channels) + + return model + + return _model_fn + +for model_name in ['efficientnet_b' + str(i) for i in range(9)]: + locals()[model_name] = _create_model_fn(model_name) diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/pthtar2onnx.py b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/pthtar2onnx.py index ae3cee8264d71585f627da40dd53244a4396989a..ae878c2d1069529592c449b16f72716bb36b3a56 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/pthtar2onnx.py +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/pthtar2onnx.py @@ -1,63 +1,63 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import torch -import torch.onnx - -from collections import OrderedDict -from efficientnet_pytorch.model import EfficientNet - - -def proc_node_module(checkpoint, attr_name): - """ - modify state_dict - :param checkpoint: loaded model file - :param attr_name: key state_dict - :return: new state_dict - """ - new_state_dict = OrderedDict() - for k, v in checkpoint[attr_name].items(): - if k[0:7] == "module.": - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - - -def convert(pth_file_path, onnx_file_path, class_nums): - """ - convert pth file to onnx file and output onnx file - """ - checkpoint = torch.load(pth_file_path, map_location='cpu') - checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') - model = EfficientNet.from_name("efficientnet-b0", num_classes=class_nums) - model.set_swish(memory_efficient=False) - model.load_state_dict(checkpoint['state_dict']) - model.eval() - - input_names = ["actual_input_1"] - output_names = ["output1"] - dummy_input = torch.randn(16, 3, 224, 224) - torch.onnx.export(model, dummy_input, onnx_file_path, - input_names=input_names, output_names=output_names, - opset_version=11) - - -if __name__ == "__main__": - src_file_path = "./checkpoint.pth" - dst_file_path = "efficientnet_npu_16.onnx" - class_num = 1000 +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch +import torch.onnx + +from collections import OrderedDict +from efficientnet_pytorch.model import EfficientNet + + +def proc_node_module(checkpoint, attr_name): + """ + modify state_dict + :param checkpoint: loaded model file + :param attr_name: key state_dict + :return: new state_dict + """ + new_state_dict = OrderedDict() + for k, v in checkpoint[attr_name].items(): + if k[0:7] == "module.": + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + + +def convert(pth_file_path, onnx_file_path, class_nums): + """ + convert pth file to onnx file and output onnx file + """ + checkpoint = torch.load(pth_file_path, map_location='cpu') + checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') + model = EfficientNet.from_name("efficientnet-b0", num_classes=class_nums) + model.set_swish(memory_efficient=False) + model.load_state_dict(checkpoint['state_dict']) + model.eval() + + input_names = ["actual_input_1"] + output_names = ["output1"] + dummy_input = torch.randn(16, 3, 224, 224) + torch.onnx.export(model, dummy_input, onnx_file_path, + input_names=input_names, output_names=output_names, + opset_version=11) + + +if __name__ == "__main__": + src_file_path = "./checkpoint.pth" + dst_file_path = "efficientnet_npu_16.onnx" + class_num = 1000 convert(src_file_path, dst_file_path, class_num) \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/run_to_onnx.sh b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/run_to_onnx.sh index 5e4f0b29b9e9aa71f5fd010bc18c7a921328dda9..1d44866cc0271a9a44dc7f05be2c15ba9d958e12 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/run_to_onnx.sh +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/run_to_onnx.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash -source env_npu.sh - +#!/usr/bin/env bash +source env_npu.sh + python3.7 pthtar2onnx.py \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/setup.py b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/setup.py index fdad49e12a281d8e113fbc7986690da1d6a3c1ee..1ee19f2fbde84adc333455dc5bf7b6eeb867c7df 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/setup.py +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/setup.py @@ -1,138 +1,138 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -# Copyright [yyyy] [name of copyright owner] -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Note: To use the 'upload' functionality of this file, you must: -# $ pipenv install twine --dev - -import io -import os -import sys -from shutil import rmtree - -from setuptools import find_packages, setup, Command - -# Package meta-data. -NAME = 'efficientnet_pytorch' -DESCRIPTION = 'EfficientNet implemented in PyTorch.' -URL = 'https://github.com/lukemelas/EfficientNet-PyTorch' -EMAIL = 'lmelaskyriazi@college.harvard.edu' -AUTHOR = 'Luke' -REQUIRES_PYTHON = '>=3.5.0' -VERSION = '0.7.0' - -# What packages are required for this module to be executed? -REQUIRED = [ - 'torch' -] - -# What packages are optional? -EXTRAS = { - # 'fancy feature': ['django'], -} - -# The rest you shouldn't have to touch too much :) -# ------------------------------------------------ -# Except, perhaps the License and Trove Classifiers! -# If you do change the License, remember to change the Trove Classifier for that! - -here = os.path.abspath(os.path.dirname(__file__)) - -# Import the README and use it as the long-description. -# Note: this will only work if 'README.md' is present in your MANIFEST.in file! -try: - with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: - long_description = '\n' + f.read() -except FileNotFoundError: - long_description = DESCRIPTION - -# Load the package's __version__.py module as a dictionary. -about = {} -if not VERSION: - project_slug = NAME.lower().replace("-", "_").replace(" ", "_") - with open(os.path.join(here, project_slug, '__version__.py')) as f: - exec(f.read(), about) -else: - about['__version__'] = VERSION - - -class UploadCommand(Command): - """Support setup.py upload.""" - - description = 'Build and publish the package.' - user_options = [] - - @staticmethod - def status(s): - """Prints things in bold.""" - print('\033[1m{0}\033[0m'.format(s)) - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - try: - self.status('Removing previous builds…') - rmtree(os.path.join(here, 'dist')) - except OSError: - pass - - self.status('Building Source and Wheel (universal) distribution…') - os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) - - self.status('Uploading the package to PyPI via Twine…') - os.system('twine upload dist/*') - - self.status('Pushing git tags…') - os.system('git tag v{0}'.format(about['__version__'])) - os.system('git push --tags') - - sys.exit() - - -# Where the magic happens: -setup( - name=NAME, - version=about['__version__'], - description=DESCRIPTION, - long_description=long_description, - long_description_content_type='text/markdown', - author=AUTHOR, - author_email=EMAIL, - python_requires=REQUIRES_PYTHON, - url=URL, - packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]), - # py_modules=['model'], # If your package is a single module, use this instead of 'packages' - install_requires=REQUIRED, - extras_require=EXTRAS, - include_package_data=True, - license='Apache', - classifiers=[ - # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - ], - # $ setup.py publish support. - cmdclass={ - 'upload': UploadCommand, - }, -) +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright [yyyy] [name of copyright owner] +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Note: To use the 'upload' functionality of this file, you must: +# $ pipenv install twine --dev + +import io +import os +import sys +from shutil import rmtree + +from setuptools import find_packages, setup, Command + +# Package meta-data. +NAME = 'efficientnet_pytorch' +DESCRIPTION = 'EfficientNet implemented in PyTorch.' +URL = 'https://github.com/lukemelas/EfficientNet-PyTorch' +EMAIL = 'lmelaskyriazi@college.harvard.edu' +AUTHOR = 'Luke' +REQUIRES_PYTHON = '>=3.5.0' +VERSION = '0.7.0' + +# What packages are required for this module to be executed? +REQUIRED = [ + 'torch' +] + +# What packages are optional? +EXTRAS = { + # 'fancy feature': ['django'], +} + +# The rest you shouldn't have to touch too much :) +# ------------------------------------------------ +# Except, perhaps the License and Trove Classifiers! +# If you do change the License, remember to change the Trove Classifier for that! + +here = os.path.abspath(os.path.dirname(__file__)) + +# Import the README and use it as the long-description. +# Note: this will only work if 'README.md' is present in your MANIFEST.in file! +try: + with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: + long_description = '\n' + f.read() +except FileNotFoundError: + long_description = DESCRIPTION + +# Load the package's __version__.py module as a dictionary. +about = {} +if not VERSION: + project_slug = NAME.lower().replace("-", "_").replace(" ", "_") + with open(os.path.join(here, project_slug, '__version__.py')) as f: + exec(f.read(), about) +else: + about['__version__'] = VERSION + + +class UploadCommand(Command): + """Support setup.py upload.""" + + description = 'Build and publish the package.' + user_options = [] + + @staticmethod + def status(s): + """Prints things in bold.""" + print('\033[1m{0}\033[0m'.format(s)) + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + try: + self.status('Removing previous builds…') + rmtree(os.path.join(here, 'dist')) + except OSError: + pass + + self.status('Building Source and Wheel (universal) distribution…') + os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) + + self.status('Uploading the package to PyPI via Twine…') + os.system('twine upload dist/*') + + self.status('Pushing git tags…') + os.system('git tag v{0}'.format(about['__version__'])) + os.system('git push --tags') + + sys.exit() + + +# Where the magic happens: +setup( + name=NAME, + version=about['__version__'], + description=DESCRIPTION, + long_description=long_description, + long_description_content_type='text/markdown', + author=AUTHOR, + author_email=EMAIL, + python_requires=REQUIRES_PYTHON, + url=URL, + packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]), + # py_modules=['model'], # If your package is a single module, use this instead of 'packages' + install_requires=REQUIRED, + extras_require=EXTRAS, + include_package_data=True, + license='Apache', + classifiers=[ + # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + ], + # $ setup.py publish support. + cmdclass={ + 'upload': UploadCommand, + }, +) diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/test/train_full_8p.sh b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/test/train_full_8p.sh index 5a841a3b8809fc2e902831a11a0ea54099d4659a..e5def7ceabc261935e7e2b5c3d6550aca639762d 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/test/train_full_8p.sh @@ -1,174 +1,174 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="EfficientNet_ID0097_for_PyTorch" -#训练epoch -train_epochs=100 -#训练batch_size -batch_size=4096 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - - - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#add ASCEND_DEVICE_ID -ASCEND_DEVICE_ID=0 - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - -#修改参数 -sed -i "s|pass|break|g" ${cur_path}/../examples/imagenet/main.py -wait - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -# 绑核,不需要的绑核的模型删除,需要模型审视修改 -corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` -let a=RANK_ID*${corenum}/${RANK_SIZE} -let b=RANK_ID+1 -let c=b*${corenum}/${RANK_SIZE}-1 - - -nohup taskset -c $a-$c python3.7 ${cur_path}/../examples/imagenet/main.py \ - --data=$data_path \ - --arch=efficientnet-b0 --batch-size=$batch_size \ - --lr=1.6 \ - --momentum=0.9 \ - --epochs=100 \ - --autoaug \ - --amp \ - --pm=O1 \ - --loss_scale=32 \ - --val_feq=10 \ - --addr=$(hostname -I |awk '{print $1}') \ - --dist-backend=hccl \ - --multiprocessing-distributed \ - --world-size 1 \ - --rank 0 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#参数改回 -#修改参数 -sed -i "s|break|pass|g" ${cur_path}/../examples/imagenet/main.py -wait - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|tail -1|awk '{print $NF}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` -train_accuracy=`grep -a '* Acc@1' ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="EfficientNet_ID0097_for_PyTorch" +#训练epoch +train_epochs=100 +#训练batch_size +batch_size=4096 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#add ASCEND_DEVICE_ID +ASCEND_DEVICE_ID=0 + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + +#修改参数 +sed -i "s|pass|break|g" ${cur_path}/../examples/imagenet/main.py +wait + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +# 绑核,不需要的绑核的模型删除,需要模型审视修改 +corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` +let a=RANK_ID*${corenum}/${RANK_SIZE} +let b=RANK_ID+1 +let c=b*${corenum}/${RANK_SIZE}-1 + + +nohup taskset -c $a-$c python3.7 ${cur_path}/../examples/imagenet/main.py \ + --data=$data_path \ + --arch=efficientnet-b0 --batch-size=$batch_size \ + --lr=1.6 \ + --momentum=0.9 \ + --epochs=100 \ + --autoaug \ + --amp \ + --pm=O1 \ + --loss_scale=32 \ + --val_feq=10 \ + --addr=$(hostname -I |awk '{print $1}') \ + --dist-backend=hccl \ + --multiprocessing-distributed \ + --world-size 1 \ + --rank 0 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#参数改回 +#修改参数 +sed -i "s|break|pass|g" ${cur_path}/../examples/imagenet/main.py +wait + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|tail -1|awk '{print $NF}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +train_accuracy=`grep -a '* Acc@1' ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/test/train_performance_1p.sh index 4a5a6d5b4cf8463f4a88bf73a49b085b8cf9b039..2ca641a5ea5c3432e2265921e29dde2b433fa38b 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/test/train_performance_1p.sh @@ -1,160 +1,160 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="EfficientNet_ID0097_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=512 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - - - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - -#修改参数 -sed -i "s|pass|break|g" ${cur_path}/../examples/imagenet/main.py -wait - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#训练 -nohup python3.7 ${cur_path}/../examples/imagenet/main.py \ - --data=$data_path \ - --arch=efficientnet-b0 \ - --batch-size=$batch_size \ - --lr=0.2 \ - --momentum=0.9 \ - --epochs=$train_epochs \ - --autoaug \ - --amp \ - --pm=O1 \ - --loss_scale=32 \ - --val_feq=10 \ - --stop-step-num=1000 \ - --npu=$ASCEND_DEVICE_ID > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#参数改回 -#修改参数 -sed -i "s|break|pass|g" ${cur_path}/../examples/imagenet/main.py -wait - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|tail -1|awk '{print $NF}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="EfficientNet_ID0097_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=512 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + +#修改参数 +sed -i "s|pass|break|g" ${cur_path}/../examples/imagenet/main.py +wait + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#训练 +nohup python3.7 ${cur_path}/../examples/imagenet/main.py \ + --data=$data_path \ + --arch=efficientnet-b0 \ + --batch-size=$batch_size \ + --lr=0.2 \ + --momentum=0.9 \ + --epochs=$train_epochs \ + --autoaug \ + --amp \ + --pm=O1 \ + --loss_scale=32 \ + --val_feq=10 \ + --stop-step-num=1000 \ + --npu=$ASCEND_DEVICE_ID > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#参数改回 +#修改参数 +sed -i "s|break|pass|g" ${cur_path}/../examples/imagenet/main.py +wait + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|tail -1|awk '{print $NF}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/test/train_performance_8p.sh index 5128dfcbb28a5c33daf7484bc1d856e0868d5bf2..b19bd51007f8efb7ce2a51fe50cef08c4152c4a7 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/test/train_performance_8p.sh @@ -1,173 +1,173 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 -export SOC_VERSION=Ascend910 -export HCCL_CONNECT_TIMEOUT=600 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP_ETP_ETP_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="EfficientNet_ID0097_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=4096 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - -#修改参数 -sed -i "s|pass|break|g" ${cur_path}/../examples/imagenet/main.py -wait - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -# 绑核,不需要的绑核的模型删除,需要模型审视修改 -#corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` -#let a=RANK_ID*${corenum}/${RANK_SIZE} -#let b=RANK_ID+1 -#let c=b*${corenum}/${RANK_SIZE}-1 -export RANK_INDEX=0 -DEVICE_INDEX=$(( ASCEND_DEVICE_ID + RANK_INDEX * 8 )) -export DEVICE_INDEX=${DEVICE_INDEX} - -#nohup taskset -c $a-$c python3.7 ${cur_path}/../examples/imagenet/main.py -nohup python3.7 ${cur_path}/../examples/imagenet/main.py \ - --data=$data_path \ - --arch=efficientnet-b0 \ - --batch-size=$batch_size \ - --lr=1.6 \ - --momentum=0.9 \ - --epochs=1 \ - --autoaug \ - --amp \ - --pm=O1 \ - --loss_scale=32 \ - --val_feq=10 \ - --addr=$(hostname -I |awk '{print $1}') \ - --dist-backend=hccl \ - --multiprocessing-distributed \ - --world-size 1 \ - --rank 0 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#参数改回 -#修改参数 -sed -i "s|break|pass|g" ${cur_path}/../examples/imagenet/main.py -wait - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|tail -1|awk '{print $NF}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 +export SOC_VERSION=Ascend910 +export HCCL_CONNECT_TIMEOUT=600 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP_ETP_ETP_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="EfficientNet_ID0097_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=4096 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + +#修改参数 +sed -i "s|pass|break|g" ${cur_path}/../examples/imagenet/main.py +wait + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +# 绑核,不需要的绑核的模型删除,需要模型审视修改 +#corenum=`cat /proc/cpuinfo |grep "processor"|wc -l` +#let a=RANK_ID*${corenum}/${RANK_SIZE} +#let b=RANK_ID+1 +#let c=b*${corenum}/${RANK_SIZE}-1 +export RANK_INDEX=0 +DEVICE_INDEX=$(( ASCEND_DEVICE_ID + RANK_INDEX * 8 )) +export DEVICE_INDEX=${DEVICE_INDEX} + +#nohup taskset -c $a-$c python3.7 ${cur_path}/../examples/imagenet/main.py +nohup python3.7 ${cur_path}/../examples/imagenet/main.py \ + --data=$data_path \ + --arch=efficientnet-b0 \ + --batch-size=$batch_size \ + --lr=1.6 \ + --momentum=0.9 \ + --epochs=1 \ + --autoaug \ + --amp \ + --pm=O1 \ + --loss_scale=32 \ + --val_feq=10 \ + --addr=$(hostname -I |awk '{print $1}') \ + --dist-backend=hccl \ + --multiprocessing-distributed \ + --world-size 1 \ + --rank 0 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#参数改回 +#修改参数 +sed -i "s|break|pass|g" ${cur_path}/../examples/imagenet/main.py +wait + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|tail -1|awk '{print $NF}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/tests/test_model.py b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/tests/test_model.py index 936d91fdbc6c34c8a86483a5256e3645cabe95af..7e0a8554cdcb7469d5a902ae3c21243e432adc1b 100644 --- a/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/tests/test_model.py +++ b/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch/tests/test_model.py @@ -1,139 +1,139 @@ -# Copyright [yyyy] [name of copyright owner] -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from collections import OrderedDict - -import pytest -import torch -import torch.nn as nn - -from efficientnet_pytorch import EfficientNet - - -# -- fixtures ------------------------------------------------------------------------------------- - -@pytest.fixture(scope='module', params=[x for x in range(4)]) -def model(request): - return 'efficientnet-b{}'.format(request.param) - - -@pytest.fixture(scope='module', params=[True, False]) -def pretrained(request): - return request.param - - -@pytest.fixture(scope='function') -def net(model, pretrained): - return EfficientNet.from_pretrained(model) if pretrained else EfficientNet.from_name(model) - - -# -- tests ---------------------------------------------------------------------------------------- - -@pytest.mark.parametrize('img_size', [224, 256, 512]) -def test_forward(net, img_size): - """Test `.forward()` doesn't throw an error""" - data = torch.zeros((1, 3, img_size, img_size)) - output = net(data) - assert not torch.isnan(output).any() - - -def test_dropout_training(net): - """Test dropout `.training` is set by `.train()` on parent `nn.module`""" - net.train() - assert net._dropout.training == True - - -def test_dropout_eval(net): - """Test dropout `.training` is set by `.eval()` on parent `nn.module`""" - net.eval() - assert net._dropout.training == False - - -def test_dropout_update(net): - """Test dropout `.training` is updated by `.train()` and `.eval()` on parent `nn.module`""" - net.train() - assert net._dropout.training == True - net.eval() - assert net._dropout.training == False - net.train() - assert net._dropout.training == True - net.eval() - assert net._dropout.training == False - - -@pytest.mark.parametrize('img_size', [224, 256, 512]) -def test_modify_dropout(net, img_size): - """Test ability to modify dropout and fc modules of network""" - dropout = nn.Sequential(OrderedDict([ - ('_bn2', nn.BatchNorm1d(net._bn1.num_features)), - ('_drop1', nn.Dropout(p=net._global_params.dropout_rate)), - ('_linear1', nn.Linear(net._bn1.num_features, 512)), - ('_relu', nn.ReLU()), - ('_bn3', nn.BatchNorm1d(512)), - ('_drop2', nn.Dropout(p=net._global_params.dropout_rate / 2)) - ])) - fc = nn.Linear(512, net._global_params.num_classes) - - net._dropout = dropout - net._fc = fc - - data = torch.zeros((2, 3, img_size, img_size)) - output = net(data) - assert not torch.isnan(output).any() - - -@pytest.mark.parametrize('img_size', [224, 256, 512]) -def test_modify_pool(net, img_size): - """Test ability to modify pooling module of network""" - - class AdaptiveMaxAvgPool(nn.Module): - - def __init__(self): - super().__init__() - self.ada_avgpool = nn.AdaptiveAvgPool2d(1) - self.ada_maxpool = nn.AdaptiveMaxPool2d(1) - - def forward(self, x): - avg_x = self.ada_avgpool(x) - max_x = self.ada_maxpool(x) - x = torch.cat((avg_x, max_x), dim=1) - return x - - avg_pooling = AdaptiveMaxAvgPool() - fc = nn.Linear(net._fc.in_features * 2, net._global_params.num_classes) - - net._avg_pooling = avg_pooling - net._fc = fc - - data = torch.zeros((2, 3, img_size, img_size)) - output = net(data) - assert not torch.isnan(output).any() - - -@pytest.mark.parametrize('img_size', [224, 256, 512]) -def test_extract_endpoints(net, img_size): - """Test `.extract_endpoints()` doesn't throw an error""" - data = torch.zeros((1, 3, img_size, img_size)) - endpoints = net.extract_endpoints(data) - assert not torch.isnan(endpoints['reduction_1']).any() - assert not torch.isnan(endpoints['reduction_2']).any() - assert not torch.isnan(endpoints['reduction_3']).any() - assert not torch.isnan(endpoints['reduction_4']).any() - assert not torch.isnan(endpoints['reduction_5']).any() - assert endpoints['reduction_1'].size(2) == img_size // 2 - assert endpoints['reduction_2'].size(2) == img_size // 4 - assert endpoints['reduction_3'].size(2) == img_size // 8 - assert endpoints['reduction_4'].size(2) == img_size // 16 - assert endpoints['reduction_5'].size(2) == img_size // 32 +# Copyright [yyyy] [name of copyright owner] +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict + +import pytest +import torch +import torch.nn as nn + +from efficientnet_pytorch import EfficientNet + + +# -- fixtures ------------------------------------------------------------------------------------- + +@pytest.fixture(scope='module', params=[x for x in range(4)]) +def model(request): + return 'efficientnet-b{}'.format(request.param) + + +@pytest.fixture(scope='module', params=[True, False]) +def pretrained(request): + return request.param + + +@pytest.fixture(scope='function') +def net(model, pretrained): + return EfficientNet.from_pretrained(model) if pretrained else EfficientNet.from_name(model) + + +# -- tests ---------------------------------------------------------------------------------------- + +@pytest.mark.parametrize('img_size', [224, 256, 512]) +def test_forward(net, img_size): + """Test `.forward()` doesn't throw an error""" + data = torch.zeros((1, 3, img_size, img_size)) + output = net(data) + assert not torch.isnan(output).any() + + +def test_dropout_training(net): + """Test dropout `.training` is set by `.train()` on parent `nn.module`""" + net.train() + assert net._dropout.training == True + + +def test_dropout_eval(net): + """Test dropout `.training` is set by `.eval()` on parent `nn.module`""" + net.eval() + assert net._dropout.training == False + + +def test_dropout_update(net): + """Test dropout `.training` is updated by `.train()` and `.eval()` on parent `nn.module`""" + net.train() + assert net._dropout.training == True + net.eval() + assert net._dropout.training == False + net.train() + assert net._dropout.training == True + net.eval() + assert net._dropout.training == False + + +@pytest.mark.parametrize('img_size', [224, 256, 512]) +def test_modify_dropout(net, img_size): + """Test ability to modify dropout and fc modules of network""" + dropout = nn.Sequential(OrderedDict([ + ('_bn2', nn.BatchNorm1d(net._bn1.num_features)), + ('_drop1', nn.Dropout(p=net._global_params.dropout_rate)), + ('_linear1', nn.Linear(net._bn1.num_features, 512)), + ('_relu', nn.ReLU()), + ('_bn3', nn.BatchNorm1d(512)), + ('_drop2', nn.Dropout(p=net._global_params.dropout_rate / 2)) + ])) + fc = nn.Linear(512, net._global_params.num_classes) + + net._dropout = dropout + net._fc = fc + + data = torch.zeros((2, 3, img_size, img_size)) + output = net(data) + assert not torch.isnan(output).any() + + +@pytest.mark.parametrize('img_size', [224, 256, 512]) +def test_modify_pool(net, img_size): + """Test ability to modify pooling module of network""" + + class AdaptiveMaxAvgPool(nn.Module): + + def __init__(self): + super().__init__() + self.ada_avgpool = nn.AdaptiveAvgPool2d(1) + self.ada_maxpool = nn.AdaptiveMaxPool2d(1) + + def forward(self, x): + avg_x = self.ada_avgpool(x) + max_x = self.ada_maxpool(x) + x = torch.cat((avg_x, max_x), dim=1) + return x + + avg_pooling = AdaptiveMaxAvgPool() + fc = nn.Linear(net._fc.in_features * 2, net._global_params.num_classes) + + net._avg_pooling = avg_pooling + net._fc = fc + + data = torch.zeros((2, 3, img_size, img_size)) + output = net(data) + assert not torch.isnan(output).any() + + +@pytest.mark.parametrize('img_size', [224, 256, 512]) +def test_extract_endpoints(net, img_size): + """Test `.extract_endpoints()` doesn't throw an error""" + data = torch.zeros((1, 3, img_size, img_size)) + endpoints = net.extract_endpoints(data) + assert not torch.isnan(endpoints['reduction_1']).any() + assert not torch.isnan(endpoints['reduction_2']).any() + assert not torch.isnan(endpoints['reduction_3']).any() + assert not torch.isnan(endpoints['reduction_4']).any() + assert not torch.isnan(endpoints['reduction_5']).any() + assert endpoints['reduction_1'].size(2) == img_size // 2 + assert endpoints['reduction_2'].size(2) == img_size // 4 + assert endpoints['reduction_3'].size(2) == img_size // 8 + assert endpoints['reduction_4'].size(2) == img_size // 16 + assert endpoints['reduction_5'].size(2) == img_size // 32 diff --git a/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_ID3072_MobileNetV2_performance_1p.sh b/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_ID3072_MobileNetV2_performance_1p.sh index 86b6ec9d5eededfc19c507c495dfe4974a280954..3ca029d3513516d6a352821156e03d22da5bd12e 100644 --- a/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_ID3072_MobileNetV2_performance_1p.sh +++ b/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_ID3072_MobileNetV2_performance_1p.sh @@ -1,164 +1,164 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -#RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="MobileNetV2_ID3072_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=512 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - -#参数修改 -sed -i "s|pass|break|g" ${cur_path}/../train/mobilenetv2_8p_main_anycard.py -wait - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -# 绑核,不需要的绑核的模型删除,需要模型审视修改 -python3.7 ${cur_path}/../train/mobilenetv2_8p_main_anycard.py \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed 49 \ - --workers 128 \ - --lr 0.05 \ - --print-freq 1 \ - --eval-freq 1 \ - --dist-url 'tcp://127.0.0.1:50002' \ - --dist-backend 'hccl' \ - --multiprocessing-distributed \ - --world-size 1 \ - --class-nums 1000 \ - --batch-size $batch_size \ - --epochs $train_epochs \ - --rank 0 \ - --device-list $ASCEND_DEVICE_ID \ - --amp \ - --benchmark 0 \ - --graph_mode \ - --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#参数改回 -sed -i "s|break|pass|g" ${cur_path}/../train/mobilenetv2_8p_main_anycard.py -wait - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +#RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="MobileNetV2_ID3072_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=512 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + +#参数修改 +sed -i "s|pass|break|g" ${cur_path}/../train/mobilenetv2_8p_main_anycard.py +wait + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +# 绑核,不需要的绑核的模型删除,需要模型审视修改 +python3.7 ${cur_path}/../train/mobilenetv2_8p_main_anycard.py \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed 49 \ + --workers 128 \ + --lr 0.05 \ + --print-freq 1 \ + --eval-freq 1 \ + --dist-url 'tcp://127.0.0.1:50002' \ + --dist-backend 'hccl' \ + --multiprocessing-distributed \ + --world-size 1 \ + --class-nums 1000 \ + --batch-size $batch_size \ + --epochs $train_epochs \ + --rank 0 \ + --device-list $ASCEND_DEVICE_ID \ + --amp \ + --benchmark 0 \ + --graph_mode \ + --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#参数改回 +sed -i "s|break|pass|g" ${cur_path}/../train/mobilenetv2_8p_main_anycard.py +wait + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_ID3072_MobileNetV2_performance_8p.sh b/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_ID3072_MobileNetV2_performance_8p.sh index 45d3585e41dc1130179fa48a37d16e5202ce5dfb..75fce3020beb6ad4247c3c40175258ea3b577ad5 100644 --- a/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_ID3072_MobileNetV2_performance_8p.sh +++ b/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_ID3072_MobileNetV2_performance_8p.sh @@ -1,164 +1,164 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -#RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="MobileNetV2_ID3072_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=4096 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - - - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - -#参数修改 -sed -i "s|pass|break|g" ${cur_path}/../train/mobilenetv2_8p_main_anycard.py -wait - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -python3.7 ${cur_path}/../train/mobilenetv2_8p_main_anycard.py \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed 49 \ - --workers 128 \ - --lr 0.4 \ - --print-freq 1 \ - --eval-freq 1 \ - --dist-url 'tcp://127.0.0.1:50002' \ - --dist-backend 'hccl' \ - --multiprocessing-distributed \ - --world-size 1 \ - --class-nums 1000 \ - --batch-size $batch_size \ - --epochs $train_epochs \ - --rank 0 \ - --device-list '0,1,2,3,4,5,6,7' \ - --amp \ - --benchmark 0 \ - --graph_mode \ - --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#参数改回 -sed -i "s|break|pass|g" ${cur_path}/../train/mobilenetv2_8p_main_anycard.py -wait - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +#RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="MobileNetV2_ID3072_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=4096 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + +#参数修改 +sed -i "s|pass|break|g" ${cur_path}/../train/mobilenetv2_8p_main_anycard.py +wait + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +python3.7 ${cur_path}/../train/mobilenetv2_8p_main_anycard.py \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed 49 \ + --workers 128 \ + --lr 0.4 \ + --print-freq 1 \ + --eval-freq 1 \ + --dist-url 'tcp://127.0.0.1:50002' \ + --dist-backend 'hccl' \ + --multiprocessing-distributed \ + --world-size 1 \ + --class-nums 1000 \ + --batch-size $batch_size \ + --epochs $train_epochs \ + --rank 0 \ + --device-list '0,1,2,3,4,5,6,7' \ + --amp \ + --benchmark 0 \ + --graph_mode \ + --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#参数改回 +sed -i "s|break|pass|g" ${cur_path}/../train/mobilenetv2_8p_main_anycard.py +wait + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_full_1p.sh b/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_full_1p.sh index abbfc7ae808cfe9b0aeb1326310b1177f12eb222..55e29d1e09f7c40cfc994f2d850638f528d0e6a3 100644 --- a/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_full_1p.sh +++ b/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_full_1p.sh @@ -1,158 +1,158 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="DeepMar_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=512 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - - - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -# 绑核,不需要的绑核的模型删除,需要模型审视修改 -python3.7 ${cur_path}/../train/mobilenetv2_8p_main_anycard.py \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed 49 \ - --workers 128 \ - --lr 0.05 \ - --print-freq 1 \ - --eval-freq 1 \ - --dist-url 'tcp://127.0.0.1:50002' \ - --dist-backend 'hccl' \ - --multiprocessing-distributed \ - --world-size 1 \ - --class-nums 1000 \ - --batch-size 512 \ - --epochs $train_epochs \ - --rank 0 \ - --device-list '$ASCEND_DEVICE_ID' \ - --amp \ - --benchmark 0 \ - --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -#FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` -FPS=`grep Epoch: ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "FPS" '{print $2}'|awk -F " " '{print $1}' | tail -n +2|awk '{sum+=$1} END {print sum/NR}' | sed s/[[]:space:]//g ` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="DeepMar_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=512 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +# 绑核,不需要的绑核的模型删除,需要模型审视修改 +python3.7 ${cur_path}/../train/mobilenetv2_8p_main_anycard.py \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed 49 \ + --workers 128 \ + --lr 0.05 \ + --print-freq 1 \ + --eval-freq 1 \ + --dist-url 'tcp://127.0.0.1:50002' \ + --dist-backend 'hccl' \ + --multiprocessing-distributed \ + --world-size 1 \ + --class-nums 1000 \ + --batch-size 512 \ + --epochs $train_epochs \ + --rank 0 \ + --device-list '$ASCEND_DEVICE_ID' \ + --amp \ + --benchmark 0 \ + --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +#FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` +FPS=`grep Epoch: ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "FPS" '{print $2}'|awk -F " " '{print $1}' | tail -n +2|awk '{sum+=$1} END {print sum/NR}' | sed s/[[]:space:]//g ` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch: $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_full_8p.sh b/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_full_8p.sh index 0073c1ce29e4bd77fa1df826f8c79653c35bc88d..eeaad0123980d0cd32d6bce7dda4bd95aaf1f075 100644 --- a/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_full_8p.sh @@ -1,157 +1,157 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 -RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="MobileNetV2_ID0098_for_PyTorch" -#训练epoch -train_epochs=500 -#train_epochs=10 -#训练batch_size -batch_size=4096 -#训练step -train_steps= -#学习率 -learning_rate= - - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -python3.7 ${cur_path}/../train/mobilenetv2_8p_main_anycard.py \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed 49 \ - --workers 128 \ - --lr 0.4 \ - --print-freq 1 \ - --eval-freq 1 \ - --dist-url 'tcp://127.0.0.1:50002' \ - --dist-backend 'hccl' \ - --multiprocessing-distributed \ - --world-size 1 \ - --batch-size $batch_size \ - --epochs $train_epochs \ - --rank 0 \ - --device-list '0,1,2,3,4,5,6,7' \ - --amp \ - --benchmark 0 \ - --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -wait - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 +RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="MobileNetV2_ID0098_for_PyTorch" +#训练epoch +train_epochs=500 +#train_epochs=10 +#训练batch_size +batch_size=4096 +#训练step +train_steps= +#学习率 +learning_rate= + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +python3.7 ${cur_path}/../train/mobilenetv2_8p_main_anycard.py \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed 49 \ + --workers 128 \ + --lr 0.4 \ + --print-freq 1 \ + --eval-freq 1 \ + --dist-url 'tcp://127.0.0.1:50002' \ + --dist-backend 'hccl' \ + --multiprocessing-distributed \ + --world-size 1 \ + --batch-size $batch_size \ + --epochs $train_epochs \ + --rank 0 \ + --device-list '0,1,2,3,4,5,6,7' \ + --amp \ + --benchmark 0 \ + --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +wait + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_performance_1p.sh index d7d9c63c87d11ac3a34beb6dad4fce8de4b9b30a..b807919c4009bdf8eca867365c46746a53d03c98 100644 --- a/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch/test/train_performance_1p.sh @@ -1,163 +1,163 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -#RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="MobileNetV2_ID0098_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=512 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - -#参数修改 -sed -i "s|pass|break|g" ${cur_path}/../train/mobilenetv2_8p_main_anycard.py -wait - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -# 绑核,不需要的绑核的模型删除,需要模型审视修改 -python3.7 ${cur_path}/../train/mobilenetv2_8p_main_anycard.py \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed 49 \ - --workers 128 \ - --lr 0.05 \ - --print-freq 1 \ - --eval-freq 1 \ - --dist-url 'tcp://127.0.0.1:50002' \ - --dist-backend 'hccl' \ - --multiprocessing-distributed \ - --world-size 1 \ - --class-nums 1000 \ - --batch-size $batch_size \ - --epochs $train_epochs \ - --rank 0 \ - --device-list $ASCEND_DEVICE_ID \ - --amp \ - --benchmark 0 \ - --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#参数改回 -sed -i "s|break|pass|g" ${cur_path}/../train/mobilenetv2_8p_main_anycard.py -wait - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +#RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="MobileNetV2_ID0098_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=512 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + +#参数修改 +sed -i "s|pass|break|g" ${cur_path}/../train/mobilenetv2_8p_main_anycard.py +wait + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +# 绑核,不需要的绑核的模型删除,需要模型审视修改 +python3.7 ${cur_path}/../train/mobilenetv2_8p_main_anycard.py \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed 49 \ + --workers 128 \ + --lr 0.05 \ + --print-freq 1 \ + --eval-freq 1 \ + --dist-url 'tcp://127.0.0.1:50002' \ + --dist-backend 'hccl' \ + --multiprocessing-distributed \ + --world-size 1 \ + --class-nums 1000 \ + --batch-size $batch_size \ + --epochs $train_epochs \ + --rank 0 \ + --device-list $ASCEND_DEVICE_ID \ + --amp \ + --benchmark 0 \ + --data $data_path > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#参数改回 +sed -i "s|break|pass|g" ${cur_path}/../train/mobilenetv2_8p_main_anycard.py +wait + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/CMakeLists.txt b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/CMakeLists.txt index dccbd552fe36028a316284c1f59ee3c7119c81d0..903f82d57a2161d278c4c46698b8430ecebd7e4b 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/CMakeLists.txt +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/CMakeLists.txt @@ -1,49 +1,49 @@ -cmake_minimum_required(VERSION 3.14.0) -project(resnet50) - -set(TARGET resnet50) - -add_definitions(-DENABLE_DVPP_INTERFACE) -add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall) -add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie) - -# Check environment variable -if(NOT DEFINED ENV{ASCEND_HOME}) - message(FATAL_ERROR "please define environment variable:ASCEND_HOME") -endif() -if(NOT DEFINED ENV{ASCEND_VERSION}) - message(WARNING "please define environment variable:ASCEND_VERSION") -endif() -if(NOT DEFINED ENV{ARCH_PATTERN}) - message(WARNING "please define environment variable:ARCH_PATTERN") -endif() -set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include) -set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64) - -set(MXBASE_ROOT_DIR ${PROJECT_SOURCE_DIR}/../../) -set(MXBASE_INC ${MXBASE_ROOT_DIR}/mxbase/include) -set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/dist/lib) -set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/dist/lib/modelpostprocessors) -set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/postprocess/include) -if(DEFINED ENV{MXSDK_OPENSOURCE_DIR}) - set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR}) -else() - set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource/dist) -endif() - -include_directories(${ACL_INC_DIR}) -include_directories(${OPENSOURCE_DIR}/include) -include_directories(${OPENSOURCE_DIR}/include/opencv4) - -include_directories(${MXBASE_INC}) -include_directories(${MXBASE_POST_PROCESS_DIR}) - -link_directories(${ACL_LIB_DIR}) -link_directories(${OPENSOURCE_DIR}/lib) -link_directories(${MXBASE_LIB_DIR}) -link_directories(${MXBASE_POST_LIB_DIR}) - -add_executable(${TARGET} main.cpp Resnet50Classify.cpp) -target_link_libraries(${TARGET} glog cpprest mxbase resnet50postprocess opencv_world stdc++fs) - +cmake_minimum_required(VERSION 3.14.0) +project(resnet50) + +set(TARGET resnet50) + +add_definitions(-DENABLE_DVPP_INTERFACE) +add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall) +add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie) + +# Check environment variable +if(NOT DEFINED ENV{ASCEND_HOME}) + message(FATAL_ERROR "please define environment variable:ASCEND_HOME") +endif() +if(NOT DEFINED ENV{ASCEND_VERSION}) + message(WARNING "please define environment variable:ASCEND_VERSION") +endif() +if(NOT DEFINED ENV{ARCH_PATTERN}) + message(WARNING "please define environment variable:ARCH_PATTERN") +endif() +set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include) +set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64) + +set(MXBASE_ROOT_DIR ${PROJECT_SOURCE_DIR}/../../) +set(MXBASE_INC ${MXBASE_ROOT_DIR}/mxbase/include) +set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/dist/lib) +set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/dist/lib/modelpostprocessors) +set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/postprocess/include) +if(DEFINED ENV{MXSDK_OPENSOURCE_DIR}) + set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR}) +else() + set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource/dist) +endif() + +include_directories(${ACL_INC_DIR}) +include_directories(${OPENSOURCE_DIR}/include) +include_directories(${OPENSOURCE_DIR}/include/opencv4) + +include_directories(${MXBASE_INC}) +include_directories(${MXBASE_POST_PROCESS_DIR}) + +link_directories(${ACL_LIB_DIR}) +link_directories(${OPENSOURCE_DIR}/lib) +link_directories(${MXBASE_LIB_DIR}) +link_directories(${MXBASE_POST_LIB_DIR}) + +add_executable(${TARGET} main.cpp Resnet50Classify.cpp) +target_link_libraries(${TARGET} glog cpprest mxbase resnet50postprocess opencv_world stdc++fs) + install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/) \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.cpp b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.cpp index 024a9c3ae1ba1017d377af5dfe19988c937b1584..6322732d106d6e5b3df5a69ff156ca5c86dad7f2 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.cpp +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.cpp @@ -1,261 +1,261 @@ -/* - * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include "Resnet50Classify.h" -#include "MxBase/DeviceManager/DeviceManager.h" -#include "MxBase/Log/Log.h" - -using namespace MxBase; -namespace { -const uint32_t YUV_BYTE_NU = 3; -const uint32_t YUV_BYTE_DE = 2; -const uint32_t VPC_H_ALIGN = 2; -} - -APP_ERROR Resnet50Classify::Init(const InitParam &initParam) -{ - deviceId_ = initParam.deviceId; - APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); - if (ret != APP_ERR_OK) { - LogError << "Init devices failed, ret=" << ret << "."; - return ret; - } - ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); - if (ret != APP_ERR_OK) { - LogError << "Set context failed, ret=" << ret << "."; - return ret; - } - dvppWrapper_ = std::make_shared(); - ret = dvppWrapper_->Init(); - if (ret != APP_ERR_OK) { - LogError << "DvppWrapper init failed, ret=" << ret << "."; - return ret; - } - model_ = std::make_shared(); - ret = model_->Init(initParam.modelPath, modelDesc_); - if (ret != APP_ERR_OK) { - LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; - return ret; - } - MxBase::ConfigData configData; - const std::string softmax = initParam.softmax ? "true" : "false"; - const std::string checkTensor = initParam.checkTensor ? "true" : "false"; - - configData.SetJsonValue("CLASS_NUM", std::to_string(initParam.classNum)); - configData.SetJsonValue("TOP_K", std::to_string(initParam.topk)); - configData.SetJsonValue("SOFTMAX", softmax); - configData.SetJsonValue("CHECK_MODEL", checkTensor); - - auto jsonStr = configData.GetCfgJson().serialize(); - std::map> config; - config["postProcessConfigContent"] = std::make_shared(jsonStr); - config["labelPath"] = std::make_shared(initParam.labelPath); - - post_ = std::make_shared(); - ret = post_->Init(config); - if (ret != APP_ERR_OK) { - LogError << "Resnet50PostProcess init failed, ret=" << ret << "."; - return ret; - } - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::DeInit() -{ - dvppWrapper_->DeInit(); - model_->DeInit(); - post_->DeInit(); - MxBase::DeviceManager::GetInstance()->DestroyDevices(); - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::ReadImage(const std::string &imgPath, cv::Mat &imageMat) -{ - imageMat = cv::imread(imgPath, cv::IMREAD_COLOR); - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::CenterCropImage(cv::Mat &img, cv::Mat &cropImg) -{ - float central_fraction = 0.75; - int crop_x = img.cols * central_fraction; - int crop_y = img.rows * central_fraction; - int crop_x1 = (img.cols - crop_x) / 2; - int crop_y1 = (img.rows - crop_y) / 2; - - cv::Rect myROI(crop_x1, crop_y1, crop_x, crop_y); - LogInfo << "images crop_x1: " << crop_x1 << ", crop_x: " << crop_x << ", crop_y1: " << crop_y1 << ", crop_y: " << crop_y; - cropImg = img(myROI); - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::Resize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat) -{ - static constexpr uint32_t resizeHeight = 256; - static constexpr uint32_t resizeWidth = 256; - - cv::resize(srcImageMat, dstImageMat, cv::Size(resizeWidth, resizeHeight)); - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase) -{ - const uint32_t dataSize = imageMat.cols * imageMat.rows * YUV444_RGB_WIDTH_NU; - MemoryData memoryDataDst(dataSize, MemoryData::MEMORY_DEVICE, deviceId_); - MemoryData memoryDataSrc(imageMat.data, dataSize, MemoryData::MEMORY_HOST_MALLOC); - - APP_ERROR ret = MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc); - if (ret != APP_ERR_OK) { - LogError << GetError(ret) << "Memory malloc failed."; - return ret; - } - std::vector shape = {imageMat.rows * YUV444_RGB_WIDTH_NU, static_cast(imageMat.cols)}; - tensorBase = TensorBase(memoryDataDst, false, shape, TENSOR_DTYPE_UINT8); - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::Inference(const std::vector &inputs, - std::vector &outputs) -{ - auto dtypes = model_->GetOutputDataType(); - for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) { - std::vector shape = {}; - for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { - shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]); - } - TensorBase tensor(shape, dtypes[i], MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); - APP_ERROR ret = TensorBase::TensorBaseMalloc(tensor); - if (ret != APP_ERR_OK) { - LogError << "TensorBaseMalloc failed, ret=" << ret << "."; - return ret; - } - outputs.push_back(tensor); - } - DynamicInfo dynamicInfo = {}; - dynamicInfo.dynamicType = DynamicType::STATIC_BATCH; - auto startTime = std::chrono::high_resolution_clock::now(); - APP_ERROR ret = model_->ModelInference(inputs, outputs, dynamicInfo); - auto endTime = std::chrono::high_resolution_clock::now(); - double costMs = std::chrono::duration(endTime - startTime).count(); - g_inferCost.push_back(costMs); - if (ret != APP_ERR_OK) { - LogError << "ModelInference failed, ret=" << ret << "."; - return ret; - } - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::PostProcess(const std::vector &inputs, - std::vector> &clsInfos) -{ - APP_ERROR ret = post_->Process(inputs, clsInfos); - if (ret != APP_ERR_OK) { - LogError << "Process failed, ret=" << ret << "."; - return ret; - } - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::SaveInferResult(const std::string &imagePath, std::vector> &batchClsInfos) -{ - uint32_t batchIndex = 0; - LogInfo << "image path: " << imagePath; - std::string fileName = imagePath.substr(imagePath.find_last_of("/") + 1); - size_t dot = fileName.find_last_of("."); - - std::string resultPathName = "result"; - if (access(resultPathName.c_str(), 0) != 0) { - APP_ERROR ret = mkdir(resultPathName.c_str(), S_IRUSR | S_IWUSR | S_IXUSR); - if (ret != 0) { - LogError << "Failed to create result directory: " << resultPathName << ", ret = " << ret; - return APP_ERR_COMM_FAILURE; - } - } - std::string resFileName = "result/" + fileName.substr(0,dot) + "_1.txt"; - LogInfo << "file path for saving result: " << resFileName; - std::ofstream tfile(resFileName); - if (tfile.fail()) { - LogError << "Failed to open result file"; - return APP_ERR_COMM_FAILURE; - } - - for (auto clsInfos : batchClsInfos) { - std::string resultStr = ""; - for (auto clsInfo : clsInfos) { - LogDebug << "batchIndex: " << batchIndex << " className: " << clsInfo.className - << " confidence: " << clsInfo.confidence << " classIndex: " << clsInfo.classId; - resultStr += std::to_string(clsInfo.classId) + " "; - } - tfile << resultStr << std::endl; - batchIndex += 1; - } - tfile.close(); - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::Process(const std::string &imgPath) -{ - cv::Mat imageMat; - APP_ERROR ret = ReadImage(imgPath, imageMat); - if (ret != APP_ERR_OK) { - LogError << "ReadImage failed, ret=" << ret << "."; - return ret; - } - - ret = CenterCropImage(imageMat, imageMat); - if (ret != APP_ERR_OK) { - LogError << "crop failed, ret=" << ret << "."; - return ret; - } - ret = Resize(imageMat, imageMat); - if (ret != APP_ERR_OK) { - LogError << "Resize failed, ret=" << ret << "."; - return ret; - } - - std::vector inputs = {}; - std::vector outputs = {}; - TensorBase tensorBase; - ret = CVMatToTensorBase(imageMat, tensorBase); - if (ret != APP_ERR_OK) { - LogError << "CVMatToTensorBase failed, ret=" << ret << "."; - return ret; - } - inputs.push_back(tensorBase); - ret = Inference(inputs, outputs); - if (ret != APP_ERR_OK) { - LogError << "Inference failed, ret=" << ret << "."; - return ret; - } - - std::vector> BatchClsInfos = {}; - ret = PostProcess(outputs, BatchClsInfos); - if (ret != APP_ERR_OK) { - LogError << "PostProcess failed, ret=" << ret << "."; - return ret; - } - - ret = SaveInferResult(imgPath, BatchClsInfos); - if (ret != APP_ERR_OK) { - LogError << "Save results failed, ret: " << ret << "."; - return ret; - } - - imageMat.release(); - return APP_ERR_OK; +/* + * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "Resnet50Classify.h" +#include "MxBase/DeviceManager/DeviceManager.h" +#include "MxBase/Log/Log.h" + +using namespace MxBase; +namespace { +const uint32_t YUV_BYTE_NU = 3; +const uint32_t YUV_BYTE_DE = 2; +const uint32_t VPC_H_ALIGN = 2; +} + +APP_ERROR Resnet50Classify::Init(const InitParam &initParam) +{ + deviceId_ = initParam.deviceId; + APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); + if (ret != APP_ERR_OK) { + LogError << "Init devices failed, ret=" << ret << "."; + return ret; + } + ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); + if (ret != APP_ERR_OK) { + LogError << "Set context failed, ret=" << ret << "."; + return ret; + } + dvppWrapper_ = std::make_shared(); + ret = dvppWrapper_->Init(); + if (ret != APP_ERR_OK) { + LogError << "DvppWrapper init failed, ret=" << ret << "."; + return ret; + } + model_ = std::make_shared(); + ret = model_->Init(initParam.modelPath, modelDesc_); + if (ret != APP_ERR_OK) { + LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; + return ret; + } + MxBase::ConfigData configData; + const std::string softmax = initParam.softmax ? "true" : "false"; + const std::string checkTensor = initParam.checkTensor ? "true" : "false"; + + configData.SetJsonValue("CLASS_NUM", std::to_string(initParam.classNum)); + configData.SetJsonValue("TOP_K", std::to_string(initParam.topk)); + configData.SetJsonValue("SOFTMAX", softmax); + configData.SetJsonValue("CHECK_MODEL", checkTensor); + + auto jsonStr = configData.GetCfgJson().serialize(); + std::map> config; + config["postProcessConfigContent"] = std::make_shared(jsonStr); + config["labelPath"] = std::make_shared(initParam.labelPath); + + post_ = std::make_shared(); + ret = post_->Init(config); + if (ret != APP_ERR_OK) { + LogError << "Resnet50PostProcess init failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR Resnet50Classify::DeInit() +{ + dvppWrapper_->DeInit(); + model_->DeInit(); + post_->DeInit(); + MxBase::DeviceManager::GetInstance()->DestroyDevices(); + return APP_ERR_OK; +} + +APP_ERROR Resnet50Classify::ReadImage(const std::string &imgPath, cv::Mat &imageMat) +{ + imageMat = cv::imread(imgPath, cv::IMREAD_COLOR); + return APP_ERR_OK; +} + +APP_ERROR Resnet50Classify::CenterCropImage(cv::Mat &img, cv::Mat &cropImg) +{ + float central_fraction = 0.75; + int crop_x = img.cols * central_fraction; + int crop_y = img.rows * central_fraction; + int crop_x1 = (img.cols - crop_x) / 2; + int crop_y1 = (img.rows - crop_y) / 2; + + cv::Rect myROI(crop_x1, crop_y1, crop_x, crop_y); + LogInfo << "images crop_x1: " << crop_x1 << ", crop_x: " << crop_x << ", crop_y1: " << crop_y1 << ", crop_y: " << crop_y; + cropImg = img(myROI); + return APP_ERR_OK; +} + +APP_ERROR Resnet50Classify::Resize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat) +{ + static constexpr uint32_t resizeHeight = 256; + static constexpr uint32_t resizeWidth = 256; + + cv::resize(srcImageMat, dstImageMat, cv::Size(resizeWidth, resizeHeight)); + return APP_ERR_OK; +} + +APP_ERROR Resnet50Classify::CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase) +{ + const uint32_t dataSize = imageMat.cols * imageMat.rows * YUV444_RGB_WIDTH_NU; + MemoryData memoryDataDst(dataSize, MemoryData::MEMORY_DEVICE, deviceId_); + MemoryData memoryDataSrc(imageMat.data, dataSize, MemoryData::MEMORY_HOST_MALLOC); + + APP_ERROR ret = MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Memory malloc failed."; + return ret; + } + std::vector shape = {imageMat.rows * YUV444_RGB_WIDTH_NU, static_cast(imageMat.cols)}; + tensorBase = TensorBase(memoryDataDst, false, shape, TENSOR_DTYPE_UINT8); + return APP_ERR_OK; +} + +APP_ERROR Resnet50Classify::Inference(const std::vector &inputs, + std::vector &outputs) +{ + auto dtypes = model_->GetOutputDataType(); + for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) { + std::vector shape = {}; + for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { + shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]); + } + TensorBase tensor(shape, dtypes[i], MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); + APP_ERROR ret = TensorBase::TensorBaseMalloc(tensor); + if (ret != APP_ERR_OK) { + LogError << "TensorBaseMalloc failed, ret=" << ret << "."; + return ret; + } + outputs.push_back(tensor); + } + DynamicInfo dynamicInfo = {}; + dynamicInfo.dynamicType = DynamicType::STATIC_BATCH; + auto startTime = std::chrono::high_resolution_clock::now(); + APP_ERROR ret = model_->ModelInference(inputs, outputs, dynamicInfo); + auto endTime = std::chrono::high_resolution_clock::now(); + double costMs = std::chrono::duration(endTime - startTime).count(); + g_inferCost.push_back(costMs); + if (ret != APP_ERR_OK) { + LogError << "ModelInference failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR Resnet50Classify::PostProcess(const std::vector &inputs, + std::vector> &clsInfos) +{ + APP_ERROR ret = post_->Process(inputs, clsInfos); + if (ret != APP_ERR_OK) { + LogError << "Process failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR Resnet50Classify::SaveInferResult(const std::string &imagePath, std::vector> &batchClsInfos) +{ + uint32_t batchIndex = 0; + LogInfo << "image path: " << imagePath; + std::string fileName = imagePath.substr(imagePath.find_last_of("/") + 1); + size_t dot = fileName.find_last_of("."); + + std::string resultPathName = "result"; + if (access(resultPathName.c_str(), 0) != 0) { + APP_ERROR ret = mkdir(resultPathName.c_str(), S_IRUSR | S_IWUSR | S_IXUSR); + if (ret != 0) { + LogError << "Failed to create result directory: " << resultPathName << ", ret = " << ret; + return APP_ERR_COMM_FAILURE; + } + } + std::string resFileName = "result/" + fileName.substr(0,dot) + "_1.txt"; + LogInfo << "file path for saving result: " << resFileName; + std::ofstream tfile(resFileName); + if (tfile.fail()) { + LogError << "Failed to open result file"; + return APP_ERR_COMM_FAILURE; + } + + for (auto clsInfos : batchClsInfos) { + std::string resultStr = ""; + for (auto clsInfo : clsInfos) { + LogDebug << "batchIndex: " << batchIndex << " className: " << clsInfo.className + << " confidence: " << clsInfo.confidence << " classIndex: " << clsInfo.classId; + resultStr += std::to_string(clsInfo.classId) + " "; + } + tfile << resultStr << std::endl; + batchIndex += 1; + } + tfile.close(); + return APP_ERR_OK; +} + +APP_ERROR Resnet50Classify::Process(const std::string &imgPath) +{ + cv::Mat imageMat; + APP_ERROR ret = ReadImage(imgPath, imageMat); + if (ret != APP_ERR_OK) { + LogError << "ReadImage failed, ret=" << ret << "."; + return ret; + } + + ret = CenterCropImage(imageMat, imageMat); + if (ret != APP_ERR_OK) { + LogError << "crop failed, ret=" << ret << "."; + return ret; + } + ret = Resize(imageMat, imageMat); + if (ret != APP_ERR_OK) { + LogError << "Resize failed, ret=" << ret << "."; + return ret; + } + + std::vector inputs = {}; + std::vector outputs = {}; + TensorBase tensorBase; + ret = CVMatToTensorBase(imageMat, tensorBase); + if (ret != APP_ERR_OK) { + LogError << "CVMatToTensorBase failed, ret=" << ret << "."; + return ret; + } + inputs.push_back(tensorBase); + ret = Inference(inputs, outputs); + if (ret != APP_ERR_OK) { + LogError << "Inference failed, ret=" << ret << "."; + return ret; + } + + std::vector> BatchClsInfos = {}; + ret = PostProcess(outputs, BatchClsInfos); + if (ret != APP_ERR_OK) { + LogError << "PostProcess failed, ret=" << ret << "."; + return ret; + } + + ret = SaveInferResult(imgPath, BatchClsInfos); + if (ret != APP_ERR_OK) { + LogError << "Save results failed, ret: " << ret << "."; + return ret; + } + + imageMat.release(); + return APP_ERR_OK; } \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.h b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.h index 02f3b59774eb485bfbf28f371f16d737b059a140..7d2ec48a2a98819115217523b603653ab1d5ae32 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.h +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.h @@ -1,59 +1,59 @@ -/* - * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef RESNET50_CLASSIFY_H -#define RESNET50_CLASSIFY_H - -#include -#include "MxBase/DvppWrapper/DvppWrapper.h" -#include "MxBase/ModelInfer/ModelInferenceProcessor.h" -#include "ClassPostProcessors/Resnet50PostProcess.h" -#include "MxBase/Tensor/TensorContext/TensorContext.h" - -extern std::vector g_inferCost; - -struct InitParam { - uint32_t deviceId; - std::string labelPath; - uint32_t classNum; - uint32_t topk; - bool softmax; - bool checkTensor; - std::string modelPath; -}; - -class Resnet50Classify { -public: - APP_ERROR Init(const InitParam &initParam); - APP_ERROR DeInit(); - APP_ERROR ReadImage(const std::string &imgPath, cv::Mat &imageMat); - APP_ERROR Resize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat); - APP_ERROR CenterCropImage(cv::Mat &img, cv::Mat &cropImg); - APP_ERROR CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase); - APP_ERROR Inference(const std::vector &inputs, std::vector &outputs); - APP_ERROR PostProcess(const std::vector &inputs, - std::vector> &clsInfos); - APP_ERROR SaveInferResult(const std::string &imgPath, - std::vector> &batchClsInfos); - APP_ERROR Process(const std::string &imgPath); -private: - std::shared_ptr dvppWrapper_; - std::shared_ptr model_; - std::shared_ptr post_; - MxBase::ModelDesc modelDesc_; - uint32_t deviceId_ = 0; -}; +/* + * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RESNET50_CLASSIFY_H +#define RESNET50_CLASSIFY_H + +#include +#include "MxBase/DvppWrapper/DvppWrapper.h" +#include "MxBase/ModelInfer/ModelInferenceProcessor.h" +#include "ClassPostProcessors/Resnet50PostProcess.h" +#include "MxBase/Tensor/TensorContext/TensorContext.h" + +extern std::vector g_inferCost; + +struct InitParam { + uint32_t deviceId; + std::string labelPath; + uint32_t classNum; + uint32_t topk; + bool softmax; + bool checkTensor; + std::string modelPath; +}; + +class Resnet50Classify { +public: + APP_ERROR Init(const InitParam &initParam); + APP_ERROR DeInit(); + APP_ERROR ReadImage(const std::string &imgPath, cv::Mat &imageMat); + APP_ERROR Resize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat); + APP_ERROR CenterCropImage(cv::Mat &img, cv::Mat &cropImg); + APP_ERROR CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase); + APP_ERROR Inference(const std::vector &inputs, std::vector &outputs); + APP_ERROR PostProcess(const std::vector &inputs, + std::vector> &clsInfos); + APP_ERROR SaveInferResult(const std::string &imgPath, + std::vector> &batchClsInfos); + APP_ERROR Process(const std::string &imgPath); +private: + std::shared_ptr dvppWrapper_; + std::shared_ptr model_; + std::shared_ptr post_; + MxBase::ModelDesc modelDesc_; + uint32_t deviceId_ = 0; +}; #endif \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/classification_task_metric.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/classification_task_metric.py index da35817a4d560028d5d591d5bdac434b39b87ee7..faa5f9d31cf3ecdcc3c6e106d39b57585b7d3b76 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/classification_task_metric.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/classification_task_metric.py @@ -1,174 +1,174 @@ -#coding = utf-8 -#Copyright 2020 Huawei Technologies Co., Ltd -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -#http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - """ - :param img_name:image file name contains file path - :return:image file name without file path - """ - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param gtfile_path: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param gtfile_path: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - img_name = temp[0].split(".")[0] - img_lab = temp[1] - img_gt_dict[img_name] = img_lab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - :param filepath: the result of model predict - :return probabilities, number of label, in_type, color: - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, cls_ind in enumerate(temp): - if cls_ind: - data_vec[ind] = np.int(cls_ind) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, n_labels, topn=5): - """ - :param prediction_file_path: the result of model predict - :param result_store_path: the root path to store result - :param json_file: json file to save result - :param img_gt_dict: the ground truth of imagenet - :param topn: classify model acc topk - :return:NA - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - res_cnt = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - gt = img_gt_dict[img_name] - real_label = int(gt) - res_cnt = min(len(prediction), topn) - for i in range(res_cnt): - if str(real_label) == str(int(prediction[i])): - count_hit[i] += 1 - break - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - - for i in range(res_cnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - print(table_dict) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Please enter right number of argmuments, expected 4!") - exit(1) - # class number - n_labels = 1000 - if not os.path.exists(folder_davinci_target): - print("target file folder does not exist.") - - if not os.path.exists(annotation_file_path): - print("Ground truth file does not exist.") - - if not os.path.exists(result_json_path): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, n_labels, topn=5) - - elapsed = time.time() - start +#coding = utf-8 +#Copyright 2020 Huawei Technologies Co., Ltd +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + """ + :param img_name:image file name contains file path + :return:image file name without file path + """ + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param gtfile_path: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param gtfile_path: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + img_name = temp[0].split(".")[0] + img_lab = temp[1] + img_gt_dict[img_name] = img_lab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + :param filepath: the result of model predict + :return probabilities, number of label, in_type, color: + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, cls_ind in enumerate(temp): + if cls_ind: + data_vec[ind] = np.int(cls_ind) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, n_labels, topn=5): + """ + :param prediction_file_path: the result of model predict + :param result_store_path: the root path to store result + :param json_file: json file to save result + :param img_gt_dict: the ground truth of imagenet + :param topn: classify model acc topk + :return:NA + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + res_cnt = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + gt = img_gt_dict[img_name] + real_label = int(gt) + res_cnt = min(len(prediction), topn) + for i in range(res_cnt): + if str(real_label) == str(int(prediction[i])): + count_hit[i] += 1 + break + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + + for i in range(res_cnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + print(table_dict) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Please enter right number of argmuments, expected 4!") + exit(1) + # class number + n_labels = 1000 + if not os.path.exists(folder_davinci_target): + print("target file folder does not exist.") + + if not os.path.exists(annotation_file_path): + print("Ground truth file does not exist.") + + if not os.path.exists(result_json_path): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, n_labels, topn=5) + + elapsed = time.time() - start diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/main.cpp b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/main.cpp index 137e934b965f6a3647451b7d5afbc19200370a04..1235200904ab113ba1f79a0cbc3bd0c78c373cc0 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/main.cpp +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/main.cpp @@ -1,69 +1,69 @@ -/* - * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include "Resnet50Classify.h" -#include "MxBase/Log/Log.h" - -namespace fs = std::experimental::filesystem; -namespace { -const uint32_t CLASS_NUM = 1000; -} -std::vector g_inferCost; - -int main(int argc, char* argv[]) -{ - if (argc <= 1) { - LogWarn << "Please input image path, such as './val_union/'."; - return APP_ERR_OK; - } - - InitParam initParam = {}; - initParam.deviceId = 0; - initParam.classNum = CLASS_NUM; - initParam.labelPath = "../models/imagenet1000_clsidx_to_labels.names"; - initParam.topk = 5; - initParam.softmax = true; - initParam.checkTensor = true; - initParam.modelPath = "../models/resnet50_pytorch.om"; - auto resnet50 = std::make_shared(); - APP_ERROR ret = resnet50->Init(initParam); - if (ret != APP_ERR_OK) { - LogError << "Resnet50Classify init failed, ret=" << ret << "."; - return ret; - } - - std::string imgDir = argv[1]; - for (auto & entry : fs::directory_iterator(imgDir)) { - LogInfo << "read image path " << entry.path(); - ret = resnet50->Process(entry.path()); - if (ret != APP_ERR_OK) { - LogError << "Resnet50Classify process failed, ret=" << ret << "."; - resnet50->DeInit(); - return ret; - } - } - resnet50->DeInit(); - double costSum = 0; - for (unsigned int i = 0; i < g_inferCost.size(); i++) { - costSum += g_inferCost[i]; - } - LogInfo << "Infer images sum " << g_inferCost.size() << ", cost total time: " << costSum << " ms."; - LogInfo << "The throughput: " << g_inferCost.size() * 1000 / costSum << " images/sec."; - return APP_ERR_OK; +/* + * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "Resnet50Classify.h" +#include "MxBase/Log/Log.h" + +namespace fs = std::experimental::filesystem; +namespace { +const uint32_t CLASS_NUM = 1000; +} +std::vector g_inferCost; + +int main(int argc, char* argv[]) +{ + if (argc <= 1) { + LogWarn << "Please input image path, such as './val_union/'."; + return APP_ERR_OK; + } + + InitParam initParam = {}; + initParam.deviceId = 0; + initParam.classNum = CLASS_NUM; + initParam.labelPath = "../models/imagenet1000_clsidx_to_labels.names"; + initParam.topk = 5; + initParam.softmax = true; + initParam.checkTensor = true; + initParam.modelPath = "../models/resnet50_pytorch.om"; + auto resnet50 = std::make_shared(); + APP_ERROR ret = resnet50->Init(initParam); + if (ret != APP_ERR_OK) { + LogError << "Resnet50Classify init failed, ret=" << ret << "."; + return ret; + } + + std::string imgDir = argv[1]; + for (auto & entry : fs::directory_iterator(imgDir)) { + LogInfo << "read image path " << entry.path(); + ret = resnet50->Process(entry.path()); + if (ret != APP_ERR_OK) { + LogError << "Resnet50Classify process failed, ret=" << ret << "."; + resnet50->DeInit(); + return ret; + } + } + resnet50->DeInit(); + double costSum = 0; + for (unsigned int i = 0; i < g_inferCost.size(); i++) { + costSum += g_inferCost[i]; + } + LogInfo << "Infer images sum " << g_inferCost.size() << ", cost total time: " << costSum << " ms."; + LogInfo << "The throughput: " << g_inferCost.size() * 1000 / costSum << " images/sec."; + return APP_ERR_OK; } \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/classification_task_metric.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/classification_task_metric.py index 2d383ae6cf0fbdc06b9f7baec87255f436dc9e42..dc6d0328e0bd18a04e2cf30616199c94747cd72f 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/classification_task_metric.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/classification_task_metric.py @@ -1,175 +1,175 @@ -#coding = utf-8 -#Copyright 2020 Huawei Technologies Co., Ltd -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -#http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - """ - :param: file path - :return: filename - """ - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - img_name = temp[0].split(".")[0] - img_lab = temp[1] - img_gt_dict[img_name] = img_lab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - :param filepath: the result of model predict - :return probabilities, number of label, in_type, color: - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, cls_ind in enumerate(temp): - if cls_ind: - data_vec[ind] = np.int(cls_ind) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, n_labels, topn=5): - """ - :param prediction_file_path: the result of model predict - :param result_store_path: the root path to store result - :param json_file: json file to save result - :param img_gt_dict: the ground truth of imagenet - :param topn: classify model acc topk - :param n_labels: class numbers - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - res_cnt = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - gt = img_gt_dict[img_name] - real_label = int(gt) - res_cnt = min(len(prediction), topn) - for i in range(res_cnt): - if str(real_label) == str(int(prediction[i])): - count_hit[i] += 1 - break - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - - for i in range(res_cnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - print(table_dict) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Please enter right number of argmuments, expected 4!") - exit(1) - # class number - n_labels = 1000 - if not os.path.exists(folder_davinci_target): - print("target file folder does not exist.") - - if not os.path.exists(annotation_file_path): - print("Ground truth file does not exist.") - - if not os.path.exists(result_json_path): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, n_labels, topn=5) - - elapsed = time.time() - start +#coding = utf-8 +#Copyright 2020 Huawei Technologies Co., Ltd +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + """ + :param: file path + :return: filename + """ + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + img_name = temp[0].split(".")[0] + img_lab = temp[1] + img_gt_dict[img_name] = img_lab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + :param filepath: the result of model predict + :return probabilities, number of label, in_type, color: + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, cls_ind in enumerate(temp): + if cls_ind: + data_vec[ind] = np.int(cls_ind) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, n_labels, topn=5): + """ + :param prediction_file_path: the result of model predict + :param result_store_path: the root path to store result + :param json_file: json file to save result + :param img_gt_dict: the ground truth of imagenet + :param topn: classify model acc topk + :param n_labels: class numbers + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + res_cnt = 0 + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + + ret = load_statistical_predict_result(filepath) + prediction = ret[0] + gt = img_gt_dict[img_name] + real_label = int(gt) + res_cnt = min(len(prediction), topn) + for i in range(res_cnt): + if str(real_label) == str(int(prediction[i])): + count_hit[i] += 1 + break + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + + for i in range(res_cnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + print(table_dict) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Please enter right number of argmuments, expected 4!") + exit(1) + # class number + n_labels = 1000 + if not os.path.exists(folder_davinci_target): + print("target file folder does not exist.") + + if not os.path.exists(annotation_file_path): + print("Ground truth file does not exist.") + + if not os.path.exists(result_json_path): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, n_labels, topn=5) + + elapsed = time.time() - start diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelarts/train_start.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelarts/train_start.py index 89e41a2e40fea9ea68fef94fb6f8a38b7f86cf67..af51db50ad89d57c3d6fe840c0b10f0c3616d89d 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelarts/train_start.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelarts/train_start.py @@ -1,688 +1,688 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import glob -import random -import shutil -import sys -import time -import warnings -import math -import numpy as np - -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.distributed as dist -import torch.optim -import torch.multiprocessing as mp -import torch.utils.data -import torch.utils.data.distributed -import torchvision.transforms as transforms -import torchvision.datasets as datasets -import torchvision.models as models -import torch.npu - -sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), '../')) -from pthtar2onx import convert -import DistributedResnet50.image_classification.resnet as nvmodels -from apex import amp -import moxing as mox - -BATCH_SIZE = 512 -EPOCHS_SIZE = 100 -TRAIN_STEP = 8000 -LOG_STEP = 1 - -CALCULATE_DEVICE = "npu:7" -PRINT_DEVICE = "cpu" -SOURCE_DIR = "/data/imagenet" - -model_names = sorted(name for name in models.__dict__ - if name.islower() and not name.startswith("__") - and callable(models.__dict__[name])) - -parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') -parser.add_argument('--data_url', - metavar='DIR', - default='/cache/data_url', - help='path to dataset') -parser.add_argument('-a', '--arch', - metavar='ARCH', - default='resnet50', - choices=model_names, - help='model architecture: ' + - ' | '.join(model_names) + - ' (default: resnet18)') -parser.add_argument('-j', '--workers', - default=32, - type=int, - metavar='N', - help='number of data loading workers (default: 8)') -parser.add_argument('--epochs', - default=1, - type=int, - metavar='N', - help='number of total epochs to run') -parser.add_argument('--start-epoch', - default=0, - type=int, - metavar='N', - help='manual epoch number (useful on restarts)') -parser.add_argument('-b', '--batch-size', - default=BATCH_SIZE, - type=int, - metavar='N', - help='mini-batch size (default: 256), this is the total ' - 'batch size of all GPUs on the current node when ' - 'using Data Parallel or Distributed Data Parallel') -parser.add_argument('--lr', '--learning-rate', - default=0.2, - type=float, - metavar='LR', - help='initial learning rate', - dest='lr') -parser.add_argument('--momentum', - default=0.9, - type=float, - metavar='M', - help='momentum') -parser.add_argument('--wd', '--weight-decay', - default=1e-4, - type=float, - metavar='W', - help='weight decay (default: 1e-4)', - dest='weight_decay') -parser.add_argument('-p', '--print-freq', - default=10, - type=int, - metavar='N', - help='print frequency (default: 10)') -parser.add_argument('--resume', - default='', - type=str, - metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('-e', '--evaluate', - dest='evaluate', - action='store_true', - help='evaluate model on validation set') -parser.add_argument('--pretrained', - dest='pretrained', - action='store_true', - help='use pre-trained model') -parser.add_argument('--world-size', - default=-1, - type=int, - help='number of nodes for distributed training') -parser.add_argument('--rank', - default=-1, - type=int, - help='node rank for distributed training') -parser.add_argument('--dist-url', - default=None, - type=str, - help='url used to set up distributed training') -parser.add_argument('--dist-backend', - default='nccl', - type=str, - help='distributed backend') -parser.add_argument('--seed', - default=None, - type=int, - help='seed for initializing training. ') -parser.add_argument('--gpu', - default=None, - type=int, - help='GPU id to use.') -parser.add_argument('--npu', - default=None, - type=int, - help='NPU id to use.') -parser.add_argument('--multiprocessing-distributed', - action='store_true') -parser.add_argument('--warmup', - default=5, - type=int, - metavar='E', - help='number of warmup epochs') -parser.add_argument('--label-smoothing', - default=0.1, - type=float, - metavar='S', - help='label smoothing') -parser.add_argument('--optimizer-batch-size', - default=-1, - type=int, - metavar='N', - help= - 'size of a total batch size, for simulating bigger batches using gradient accumulation') -parser.add_argument('--static-loss-scale', - type=float, - default=1, - help= - 'Static loss scale, positive power of 2 values can improve fp16 convergence.') - -parser.add_argument('-t', '--fine-tuning', default=False, action='store_true', - help='transfer learning + fine tuning - train only the last FC layer') -parser.add_argument('--train_url', - default="/cache/training", - type=str, - help="setting dir of training output") -parser.add_argument('--pretrained_weight', default='', type=str, metavar='PATH', - help='path to pretrained weight') -parser.add_argument('--onnx', default=True, action='store_true', - help="convert pth model to onnx") - -CACHE_TRAINING_URL = "/cache/training" -best_acc1 = 0 - -def main(): - args = parser.parse_args() - if args.npu is None: - args.npu = 0 - global CALCULATE_DEVICE - CALCULATE_DEVICE = "npu:{}".format(args.npu) - torch.npu.set_device(CALCULATE_DEVICE) - print("use ", CALCULATE_DEVICE) - - if args.seed is not None: - random.seed(args.seed) - torch.manual_seed(args.seed) - cudnn.deterministic = True - warnings.warn('You have chosen to seed training. ' - 'This will turn on the CUDNN deterministic setting, ' - 'which can slow down your training considerably! ' - 'You may see unexpected behavior when restarting ' - 'from checkpoints.') - - if args.gpu is not None: - warnings.warn('You have chosen a specific GPU. This will completely ' - 'disable data parallelism.') - - if args.dist_url == "env://" and args.world_size == -1: - args.world_size = int(os.environ["WORLD_SIZE"]) - - args.distributed = args.world_size > 1 or args.multiprocessing_distributed - - ngpus_per_node = torch.cuda.device_count() - if args.multiprocessing_distributed: - # Since we have ngpus_per_node processes per node, the total world_size - # needs to be adjusted accordingly - args.world_size = ngpus_per_node * args.world_size - # Use torch.multiprocessing.spawn to launch distributed processes: the - # main_worker process function - mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) - else: - # Simply call main_worker function - main_worker(args.gpu, ngpus_per_node, args) - -def main_worker(gpu, ngpus_per_node, args): - global best_acc1 - args.gpu = gpu - - if args.gpu is not None: - print("Use GPU: {} for training".format(args.gpu)) - - if args.distributed: - if args.dist_url == "env://" and args.rank == -1: - args.rank = int(os.environ["RANK"]) - if args.multiprocessing_distributed: - # For multiprocessing distributed training, rank needs to be the - # global rank among all the processes - args.rank = args.rank * ngpus_per_node + gpu - dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - # create model - if args.pretrained: - print("=> using pre-trained model '{}'".format(args.arch)) - model = nvmodels.build_resnet('resnet50', 'classic', True) - CACHE_MODEL_URL = "/cache/model" - os.makedirs(CACHE_MODEL_URL, exist_ok=True) - mox.file.copy_parallel(args.pretrained_weight, os.path.join(CACHE_MODEL_URL, "checkpoint.pth.tar")) - pretrained_weight = os.path.join(CACHE_MODEL_URL, "checkpoint.pth.tar") - pretrained_dict = torch.load(pretrained_weight)["state_dict"] - pretrained_dict.pop('module.fc.weight') - pretrained_dict.pop('module.fc.bias') - model.load_state_dict(pretrained_dict, strict=False) - else: - print("=> creating model '{}'".format(args.arch)) - model = models.__dict__[args.arch](zero_init_residual=True) - - if args.fine_tuning: - print("=> transfer-learning mode + fine-tuning (train only the last FC layer)") - # Freeze Previous Layers(now we are using them as features extractor) - # Fine Tuning the last layer for the new task - if args.arch == "resnet50": - model.classifier = nn.Linear(1024, 10) - model.classifier.parameters() - else: - print("Error: Fine-tuning is not supported on this architecture") - exit(-1) - else: - model.parameters() - - for layer in model.modules(): - if isinstance(layer, nn.Linear): - torch.nn.init.kaiming_normal_(layer.weight, a=math.sqrt(5), ) - if args.distributed: - # For multiprocessing distributed, DistributedDataParallel constructor - # should always set the single device scope, otherwise, - # DistributedDataParallel will use all available devices. - if args.gpu is not None: - torch.cuda.set_device(args.gpu) - model.cuda(args.gpu) - # When using a single GPU per process and per - # DistributedDataParallel, we need to divide the batch size - # ourselves based on the total number of GPUs we have - args.batch_size = int(args.batch_size / ngpus_per_node) - args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) - else: - model.cuda() - # DistributedDataParallel will divide and allocate batch_size to all - # available GPUs if device_ids are not set - model = torch.nn.parallel.DistributedDataParallel(model) - elif args.gpu is not None: - torch.cuda.set_device(args.gpu) - model = model.cuda(args.gpu) - else: - # DataParallel will divide and allocate batch_size to all available GPUs - if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): - model.features = torch.nn.DataParallel(model.features) - model.cuda() - else: - model = model.to(CALCULATE_DEVICE) - - lr_policy = lr_cosine_policy(args.lr, - args.warmup, - args.epochs) - - - # define loss function (criterion) and optimizer - loss = nn.CrossEntropyLoss - if args.label_smoothing > 0.0: - loss = lambda: LabelSmoothing(args.label_smoothing) - criterion = loss().to(CALCULATE_DEVICE) - optimizer = torch.optim.SGD([ - {'params': [param for name, param in model.named_parameters() if name[-4:] == 'bias'], 'weight_decay': 0.0}, - {'params': [param for name, param in model.named_parameters() if name[-4:] != 'bias'], 'weight_decay': args.weight_decay}], - args.lr, - momentum=args.momentum) - - model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=1024, verbosity=1) - - # optionally resume from a checkpoint - if args.resume: - if os.path.isfile(args.resume): - print("=> loading checkpoint '{}'".format(args.resume)) - if args.npu is not None: - checkpoint = torch.load(args.resume) - elif args.gpu is None: - checkpoint = torch.load(args.resume) - else: - # Map model to be loaded to specified single gpu. - loc = 'cuda:{}'.format(args.gpu) - checkpoint = torch.load(args.resume, map_location=loc) - args.start_epoch = checkpoint['epoch'] - best_acc1 = checkpoint['best_acc1'] - if args.npu is not None: - best_acc1 = best_acc1.to("npu:{}".format(args.npu)) - elif args.gpu is not None: - # best_acc1 may be from a checkpoint from a different GPU - best_acc1 = best_acc1.to(args.gpu) - model.load_state_dict(checkpoint['state_dict']) - print("=> loaded checkpoint '{}' (epoch {})" - .format(args.resume, checkpoint['epoch'])) - else: - print("=> no checkpoint found at '{}'".format(args.resume)) - - cudnn.benchmark = True - - real_path = '/cache/data_url' - if not os.path.exists(real_path): - os.makedirs(real_path) - mox.file.copy_parallel(args.data_url, real_path) - print("training data finish copy to %s." % real_path) - - traindir = os.path.join(real_path, 'train') - valdir = os.path.join(real_path, 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) - else: - train_sampler = None - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), - num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) - - val_loader = torch.utils.data.DataLoader( - datasets.ImageFolder(valdir, transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ])), - batch_size=args.batch_size, shuffle=True, - num_workers=args.workers, pin_memory=True) - - if args.evaluate: - validate(val_loader, model, criterion, args) - return - - for epoch in range(args.start_epoch, args.epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - lr_policy(optimizer, 0, epoch) - # train for one epoch - train(train_loader, model, criterion, optimizer, epoch, args) - - # evaluate on validation set - acc1 = validate(val_loader, model, criterion, args) - - # remember best acc@1 and save checkpoint - is_best = acc1 > best_acc1 - best_acc1 = max(acc1, best_acc1) - file_name = "checkpoint_npu{}".format(args.npu) - modeltmp = model.cpu() - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': args.arch, - 'state_dict': modeltmp.state_dict(), - 'best_acc1': best_acc1, - }, is_best, file_name) - modeltmp.to(CALCULATE_DEVICE) - - if args.onnx: - convert_pth_to_onnx(args) - - # --------------modelarts modification---------- - mox.file.copy_parallel(CACHE_TRAINING_URL, args.train_url) - # --------------modelarts modification end---------- - -def convert_pth_to_onnx(args): - pth_pattern = os.path.join(CACHE_TRAINING_URL, f"checkpoint_npu{args.npu}.pth.tar") - pth_file_list = glob.glob(pth_pattern) - if not pth_file_list: - print(f"can't find pth {pth_pattern}") - return - pth_file = pth_file_list[0] - onnx_path = pth_file.split(".")[0] + '.onnx' - convert(pth_file, onnx_path) - - -def train(train_loader, model, criterion, optimizer, epoch, args): - if args.optimizer_batch_size < 0: - batch_size_multiplier = 1 - else: - tbs = 1 * args.batch_size - if args.optimizer_batch_size % tbs != 0: - print( - "Warning: simulated batch size {} is not divisible by actual batch size {}" - .format(args.optimizer_batch_size, tbs)) - batch_size_multiplier = int(args.optimizer_batch_size / tbs) - print("BSM: {}".format(batch_size_multiplier)) - - batch_time = AverageMeter('Time', ':6.3f') - data_time = AverageMeter('Data', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(train_loader), - [batch_time, data_time, losses, top1, top5], - prefix="Epoch: [{}]".format(epoch)) - - # switch to train mode - model.train() - optimizer.zero_grad() - end = time.time() - for i, (images, target) in enumerate(train_loader): - # measure data loading time - data_time.update(time.time() - end) - - if args.gpu is not None: - images = images.cuda(args.gpu, non_blocking=True) - - images = images.to(CALCULATE_DEVICE, non_blocking=True) - if args.label_smoothing == 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - if args.label_smoothing > 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # compute gradient and do SGD step - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - optimizer_step = ((i + 1) % batch_size_multiplier) == 0 - if optimizer_step: - if batch_size_multiplier != 1: - for param_group in optimizer.param_groups: - for param in param_group['params']: - param.grad /= batch_size_multiplier - optimizer.step() - optimizer.zero_grad() - - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - - if i % LOG_STEP == 0: - progress.display(i) - - if i == TRAIN_STEP: - break - - print("batch_size:", args.batch_size, 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( - args.batch_size/(batch_time.avg+0.0001))) - -def validate(val_loader, model, criterion, args): - batch_time = AverageMeter('Time', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(val_loader), - [batch_time, losses, top1, top5], - prefix='Test: ') - - # switch to evaluate mode - model.eval() - - with torch.no_grad(): - end = time.time() - for i, (images, target) in enumerate(val_loader): - if args.gpu is not None: - images = images.cuda(args.gpu, non_blocking=True) - images = images.to(CALCULATE_DEVICE, non_blocking=True) - if args.label_smoothing == 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - if args.label_smoothing > 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - - if i % LOG_STEP == 0: - progress.display(i) - - print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' - .format(top1=top1, top5=top5)) - return top1.avg - -def save_checkpoint(state, is_best, filename='checkpoint'): - if not os.path.exists(CACHE_TRAINING_URL): - os.makedirs(CACHE_TRAINING_URL) - - filename2 = os.path.join(CACHE_TRAINING_URL, filename + ".pth.tar") - torch.save(state, filename2) - if is_best: - shutil.copyfile(filename2, os.path.join(CACHE_TRAINING_URL, filename + 'model_best.pth.tar')) - -class AverageMeter(object): - """Computes and stores the average and current value""" - def __init__(self, name, fmt=':f'): - self.name = name - self.fmt = fmt - self.reset() - self.start_count_index = 10 - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - if self.count == 0: - self.batchsize = n - - self.val = val - self.count += n - if self.count > (self.start_count_index * self.batchsize): - self.sum += val * n - self.avg = self.sum / (self.count - self.start_count_index * self.batchsize) - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - -class ProgressMeter(object): - def __init__(self, num_batches, meters, prefix=""): - self.batch_fmtstr = self._get_batch_fmtstr(num_batches) - self.meters = meters - self.prefix = prefix - - def display(self, batch): - entries = [self.prefix + self.batch_fmtstr.format(batch)] - entries += [str(meter) for meter in self.meters] - print('\t'.join(entries)) - - def _get_batch_fmtstr(self, num_batches): - num_digits = len(str(num_batches // 1)) - fmt = '{:' + str(num_digits) + 'd}' - return '[' + fmt + '/' + fmt.format(num_batches) + ']' - - -def adjust_learning_rate(optimizer, epoch, args): - """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" - lr = args.lr * (0.1 ** (epoch // 30)) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - -class LabelSmoothing(nn.Module): - """ - NLL loss with label smoothing. - """ - def __init__(self, smoothing=0.0): - """ - Constructor for the LabelSmoothing module. - :param smoothing: label smoothing factor - """ - super(LabelSmoothing, self).__init__() - self.confidence = 1.0 - smoothing - self.smoothing = smoothing - - def forward(self, x, target): - logprobs = torch.nn.functional.log_softmax(x, dim=-1).to("cpu") - nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) - nll_loss = nll_loss.squeeze(1) - smooth_loss = -logprobs.mean(dim=-1) - loss = self.confidence * nll_loss + self.smoothing * smooth_loss - return loss.mean().to(CALCULATE_DEVICE) - -def lr_policy(lr_fn, logger=None): - if logger is not None: - logger.register_metric('lr', - log.LR_METER(), - verbosity=dllogger.Verbosity.VERBOSE) - - def _alr(optimizer, iteration, epoch): - lr = lr_fn(iteration, epoch) - - if logger is not None: - logger.log_metric('lr', lr) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - return _alr - -def lr_cosine_policy(base_lr, warmup_length, epochs, logger=None): - def _lr_fn(iteration, epoch): - if epoch < warmup_length: - lr = base_lr * (epoch + 1) / warmup_length - else: - e = epoch - warmup_length - es = epochs - warmup_length - lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr - return lr - - return lr_policy(_lr_fn, logger=logger) - -if __name__ == '__main__': - main() +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import glob +import random +import shutil +import sys +import time +import warnings +import math +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.optim +import torch.multiprocessing as mp +import torch.utils.data +import torch.utils.data.distributed +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import torchvision.models as models +import torch.npu + +sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), '../')) +from pthtar2onx import convert +import DistributedResnet50.image_classification.resnet as nvmodels +from apex import amp +import moxing as mox + +BATCH_SIZE = 512 +EPOCHS_SIZE = 100 +TRAIN_STEP = 8000 +LOG_STEP = 1 + +CALCULATE_DEVICE = "npu:7" +PRINT_DEVICE = "cpu" +SOURCE_DIR = "/data/imagenet" + +model_names = sorted(name for name in models.__dict__ + if name.islower() and not name.startswith("__") + and callable(models.__dict__[name])) + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument('--data_url', + metavar='DIR', + default='/cache/data_url', + help='path to dataset') +parser.add_argument('-a', '--arch', + metavar='ARCH', + default='resnet50', + choices=model_names, + help='model architecture: ' + + ' | '.join(model_names) + + ' (default: resnet18)') +parser.add_argument('-j', '--workers', + default=32, + type=int, + metavar='N', + help='number of data loading workers (default: 8)') +parser.add_argument('--epochs', + default=1, + type=int, + metavar='N', + help='number of total epochs to run') +parser.add_argument('--start-epoch', + default=0, + type=int, + metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('-b', '--batch-size', + default=BATCH_SIZE, + type=int, + metavar='N', + help='mini-batch size (default: 256), this is the total ' + 'batch size of all GPUs on the current node when ' + 'using Data Parallel or Distributed Data Parallel') +parser.add_argument('--lr', '--learning-rate', + default=0.2, + type=float, + metavar='LR', + help='initial learning rate', + dest='lr') +parser.add_argument('--momentum', + default=0.9, + type=float, + metavar='M', + help='momentum') +parser.add_argument('--wd', '--weight-decay', + default=1e-4, + type=float, + metavar='W', + help='weight decay (default: 1e-4)', + dest='weight_decay') +parser.add_argument('-p', '--print-freq', + default=10, + type=int, + metavar='N', + help='print frequency (default: 10)') +parser.add_argument('--resume', + default='', + type=str, + metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', + dest='evaluate', + action='store_true', + help='evaluate model on validation set') +parser.add_argument('--pretrained', + dest='pretrained', + action='store_true', + help='use pre-trained model') +parser.add_argument('--world-size', + default=-1, + type=int, + help='number of nodes for distributed training') +parser.add_argument('--rank', + default=-1, + type=int, + help='node rank for distributed training') +parser.add_argument('--dist-url', + default=None, + type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', + default='nccl', + type=str, + help='distributed backend') +parser.add_argument('--seed', + default=None, + type=int, + help='seed for initializing training. ') +parser.add_argument('--gpu', + default=None, + type=int, + help='GPU id to use.') +parser.add_argument('--npu', + default=None, + type=int, + help='NPU id to use.') +parser.add_argument('--multiprocessing-distributed', + action='store_true') +parser.add_argument('--warmup', + default=5, + type=int, + metavar='E', + help='number of warmup epochs') +parser.add_argument('--label-smoothing', + default=0.1, + type=float, + metavar='S', + help='label smoothing') +parser.add_argument('--optimizer-batch-size', + default=-1, + type=int, + metavar='N', + help= + 'size of a total batch size, for simulating bigger batches using gradient accumulation') +parser.add_argument('--static-loss-scale', + type=float, + default=1, + help= + 'Static loss scale, positive power of 2 values can improve fp16 convergence.') + +parser.add_argument('-t', '--fine-tuning', default=False, action='store_true', + help='transfer learning + fine tuning - train only the last FC layer') +parser.add_argument('--train_url', + default="/cache/training", + type=str, + help="setting dir of training output") +parser.add_argument('--pretrained_weight', default='', type=str, metavar='PATH', + help='path to pretrained weight') +parser.add_argument('--onnx', default=True, action='store_true', + help="convert pth model to onnx") + +CACHE_TRAINING_URL = "/cache/training" +best_acc1 = 0 + +def main(): + args = parser.parse_args() + if args.npu is None: + args.npu = 0 + global CALCULATE_DEVICE + CALCULATE_DEVICE = "npu:{}".format(args.npu) + torch.npu.set_device(CALCULATE_DEVICE) + print("use ", CALCULATE_DEVICE) + + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn('You have chosen to seed training. ' + 'This will turn on the CUDNN deterministic setting, ' + 'which can slow down your training considerably! ' + 'You may see unexpected behavior when restarting ' + 'from checkpoints.') + + if args.gpu is not None: + warnings.warn('You have chosen a specific GPU. This will completely ' + 'disable data parallelism.') + + if args.dist_url == "env://" and args.world_size == -1: + args.world_size = int(os.environ["WORLD_SIZE"]) + + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + + ngpus_per_node = torch.cuda.device_count() + if args.multiprocessing_distributed: + # Since we have ngpus_per_node processes per node, the total world_size + # needs to be adjusted accordingly + args.world_size = ngpus_per_node * args.world_size + # Use torch.multiprocessing.spawn to launch distributed processes: the + # main_worker process function + mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) + else: + # Simply call main_worker function + main_worker(args.gpu, ngpus_per_node, args) + +def main_worker(gpu, ngpus_per_node, args): + global best_acc1 + args.gpu = gpu + + if args.gpu is not None: + print("Use GPU: {} for training".format(args.gpu)) + + if args.distributed: + if args.dist_url == "env://" and args.rank == -1: + args.rank = int(os.environ["RANK"]) + if args.multiprocessing_distributed: + # For multiprocessing distributed training, rank needs to be the + # global rank among all the processes + args.rank = args.rank * ngpus_per_node + gpu + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + # create model + if args.pretrained: + print("=> using pre-trained model '{}'".format(args.arch)) + model = nvmodels.build_resnet('resnet50', 'classic', True) + CACHE_MODEL_URL = "/cache/model" + os.makedirs(CACHE_MODEL_URL, exist_ok=True) + mox.file.copy_parallel(args.pretrained_weight, os.path.join(CACHE_MODEL_URL, "checkpoint.pth.tar")) + pretrained_weight = os.path.join(CACHE_MODEL_URL, "checkpoint.pth.tar") + pretrained_dict = torch.load(pretrained_weight)["state_dict"] + pretrained_dict.pop('module.fc.weight') + pretrained_dict.pop('module.fc.bias') + model.load_state_dict(pretrained_dict, strict=False) + else: + print("=> creating model '{}'".format(args.arch)) + model = models.__dict__[args.arch](zero_init_residual=True) + + if args.fine_tuning: + print("=> transfer-learning mode + fine-tuning (train only the last FC layer)") + # Freeze Previous Layers(now we are using them as features extractor) + # Fine Tuning the last layer for the new task + if args.arch == "resnet50": + model.classifier = nn.Linear(1024, 10) + model.classifier.parameters() + else: + print("Error: Fine-tuning is not supported on this architecture") + exit(-1) + else: + model.parameters() + + for layer in model.modules(): + if isinstance(layer, nn.Linear): + torch.nn.init.kaiming_normal_(layer.weight, a=math.sqrt(5), ) + if args.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + if args.gpu is not None: + torch.cuda.set_device(args.gpu) + model.cuda(args.gpu) + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs we have + args.batch_size = int(args.batch_size / ngpus_per_node) + args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) + else: + model.cuda() + # DistributedDataParallel will divide and allocate batch_size to all + # available GPUs if device_ids are not set + model = torch.nn.parallel.DistributedDataParallel(model) + elif args.gpu is not None: + torch.cuda.set_device(args.gpu) + model = model.cuda(args.gpu) + else: + # DataParallel will divide and allocate batch_size to all available GPUs + if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): + model.features = torch.nn.DataParallel(model.features) + model.cuda() + else: + model = model.to(CALCULATE_DEVICE) + + lr_policy = lr_cosine_policy(args.lr, + args.warmup, + args.epochs) + + + # define loss function (criterion) and optimizer + loss = nn.CrossEntropyLoss + if args.label_smoothing > 0.0: + loss = lambda: LabelSmoothing(args.label_smoothing) + criterion = loss().to(CALCULATE_DEVICE) + optimizer = torch.optim.SGD([ + {'params': [param for name, param in model.named_parameters() if name[-4:] == 'bias'], 'weight_decay': 0.0}, + {'params': [param for name, param in model.named_parameters() if name[-4:] != 'bias'], 'weight_decay': args.weight_decay}], + args.lr, + momentum=args.momentum) + + model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=1024, verbosity=1) + + # optionally resume from a checkpoint + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + if args.npu is not None: + checkpoint = torch.load(args.resume) + elif args.gpu is None: + checkpoint = torch.load(args.resume) + else: + # Map model to be loaded to specified single gpu. + loc = 'cuda:{}'.format(args.gpu) + checkpoint = torch.load(args.resume, map_location=loc) + args.start_epoch = checkpoint['epoch'] + best_acc1 = checkpoint['best_acc1'] + if args.npu is not None: + best_acc1 = best_acc1.to("npu:{}".format(args.npu)) + elif args.gpu is not None: + # best_acc1 may be from a checkpoint from a different GPU + best_acc1 = best_acc1.to(args.gpu) + model.load_state_dict(checkpoint['state_dict']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(args.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + + cudnn.benchmark = True + + real_path = '/cache/data_url' + if not os.path.exists(real_path): + os.makedirs(real_path) + mox.file.copy_parallel(args.data_url, real_path) + print("training data finish copy to %s." % real_path) + + traindir = os.path.join(real_path, 'train') + valdir = os.path.join(real_path, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + else: + train_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), + num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) + + val_loader = torch.utils.data.DataLoader( + datasets.ImageFolder(valdir, transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ])), + batch_size=args.batch_size, shuffle=True, + num_workers=args.workers, pin_memory=True) + + if args.evaluate: + validate(val_loader, model, criterion, args) + return + + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + lr_policy(optimizer, 0, epoch) + # train for one epoch + train(train_loader, model, criterion, optimizer, epoch, args) + + # evaluate on validation set + acc1 = validate(val_loader, model, criterion, args) + + # remember best acc@1 and save checkpoint + is_best = acc1 > best_acc1 + best_acc1 = max(acc1, best_acc1) + file_name = "checkpoint_npu{}".format(args.npu) + modeltmp = model.cpu() + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': modeltmp.state_dict(), + 'best_acc1': best_acc1, + }, is_best, file_name) + modeltmp.to(CALCULATE_DEVICE) + + if args.onnx: + convert_pth_to_onnx(args) + + # --------------modelarts modification---------- + mox.file.copy_parallel(CACHE_TRAINING_URL, args.train_url) + # --------------modelarts modification end---------- + +def convert_pth_to_onnx(args): + pth_pattern = os.path.join(CACHE_TRAINING_URL, f"checkpoint_npu{args.npu}.pth.tar") + pth_file_list = glob.glob(pth_pattern) + if not pth_file_list: + print(f"can't find pth {pth_pattern}") + return + pth_file = pth_file_list[0] + onnx_path = pth_file.split(".")[0] + '.onnx' + convert(pth_file, onnx_path) + + +def train(train_loader, model, criterion, optimizer, epoch, args): + if args.optimizer_batch_size < 0: + batch_size_multiplier = 1 + else: + tbs = 1 * args.batch_size + if args.optimizer_batch_size % tbs != 0: + print( + "Warning: simulated batch size {} is not divisible by actual batch size {}" + .format(args.optimizer_batch_size, tbs)) + batch_size_multiplier = int(args.optimizer_batch_size / tbs) + print("BSM: {}".format(batch_size_multiplier)) + + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(train_loader), + [batch_time, data_time, losses, top1, top5], + prefix="Epoch: [{}]".format(epoch)) + + # switch to train mode + model.train() + optimizer.zero_grad() + end = time.time() + for i, (images, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + if args.gpu is not None: + images = images.cuda(args.gpu, non_blocking=True) + + images = images.to(CALCULATE_DEVICE, non_blocking=True) + if args.label_smoothing == 0.0: + target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + if args.label_smoothing > 0.0: + target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # compute gradient and do SGD step + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + optimizer_step = ((i + 1) % batch_size_multiplier) == 0 + if optimizer_step: + if batch_size_multiplier != 1: + for param_group in optimizer.param_groups: + for param in param_group['params']: + param.grad /= batch_size_multiplier + optimizer.step() + optimizer.zero_grad() + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % LOG_STEP == 0: + progress.display(i) + + if i == TRAIN_STEP: + break + + print("batch_size:", args.batch_size, 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( + args.batch_size/(batch_time.avg+0.0001))) + +def validate(val_loader, model, criterion, args): + batch_time = AverageMeter('Time', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(val_loader), + [batch_time, losses, top1, top5], + prefix='Test: ') + + # switch to evaluate mode + model.eval() + + with torch.no_grad(): + end = time.time() + for i, (images, target) in enumerate(val_loader): + if args.gpu is not None: + images = images.cuda(args.gpu, non_blocking=True) + images = images.to(CALCULATE_DEVICE, non_blocking=True) + if args.label_smoothing == 0.0: + target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + if args.label_smoothing > 0.0: + target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % LOG_STEP == 0: + progress.display(i) + + print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' + .format(top1=top1, top5=top5)) + return top1.avg + +def save_checkpoint(state, is_best, filename='checkpoint'): + if not os.path.exists(CACHE_TRAINING_URL): + os.makedirs(CACHE_TRAINING_URL) + + filename2 = os.path.join(CACHE_TRAINING_URL, filename + ".pth.tar") + torch.save(state, filename2) + if is_best: + shutil.copyfile(filename2, os.path.join(CACHE_TRAINING_URL, filename + 'model_best.pth.tar')) + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self, name, fmt=':f'): + self.name = name + self.fmt = fmt + self.reset() + self.start_count_index = 10 + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + if self.count == 0: + self.batchsize = n + + self.val = val + self.count += n + if self.count > (self.start_count_index * self.batchsize): + self.sum += val * n + self.avg = self.sum / (self.count - self.start_count_index * self.batchsize) + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + +class ProgressMeter(object): + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + + +def adjust_learning_rate(optimizer, epoch, args): + """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" + lr = args.lr * (0.1 ** (epoch // 30)) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + +class LabelSmoothing(nn.Module): + """ + NLL loss with label smoothing. + """ + def __init__(self, smoothing=0.0): + """ + Constructor for the LabelSmoothing module. + :param smoothing: label smoothing factor + """ + super(LabelSmoothing, self).__init__() + self.confidence = 1.0 - smoothing + self.smoothing = smoothing + + def forward(self, x, target): + logprobs = torch.nn.functional.log_softmax(x, dim=-1).to("cpu") + nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) + nll_loss = nll_loss.squeeze(1) + smooth_loss = -logprobs.mean(dim=-1) + loss = self.confidence * nll_loss + self.smoothing * smooth_loss + return loss.mean().to(CALCULATE_DEVICE) + +def lr_policy(lr_fn, logger=None): + if logger is not None: + logger.register_metric('lr', + log.LR_METER(), + verbosity=dllogger.Verbosity.VERBOSE) + + def _alr(optimizer, iteration, epoch): + lr = lr_fn(iteration, epoch) + + if logger is not None: + logger.log_metric('lr', lr) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + return _alr + +def lr_cosine_policy(base_lr, warmup_length, epochs, logger=None): + def _lr_fn(iteration, epoch): + if epoch < warmup_length: + lr = base_lr * (epoch + 1) / warmup_length + else: + e = epoch - warmup_length + es = epochs - warmup_length + lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr + return lr + + return lr_policy(_lr_fn, logger=logger) + +if __name__ == '__main__': + main() diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_16p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_16p.sh index dccd7239c221c6db39b53b7bf477014255ac8cb7..a181904b2c639d01b9241d880c5d56b4153ab93c 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_16p.sh +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_16p.sh @@ -1,171 +1,171 @@ -#!/bin/bash - -################基础配置参数,需要模型审视修改################## -# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE -# 网络名称,同目录名称 -Network="ResNet50_for_PyTorch" -# 训练batch_size -batch_size=4096 -# 训练使用的npu卡数 -export RANK_SIZE=16 -export RANK_ID_START=0 -# 数据集路径,保持为空,不需要修改 -data_path="" -conf_path="" -server_index="" -fix_node_ip="" - -# 训练epoch 90 -train_epochs=90 -# 加载数据进程数 -workers=128 - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --conf_path* ]];then - conf_path=`echo ${para#*=}` - elif [[ $para == --server_index* ]];then - server_index=`echo ${para#*=}` - elif [[ $para == --fix_node_ip* ]];then - fix_node_ip=`echo ${para#*=}` - fi -done - -# 校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` -linux_num=`find $conf_path -name "server_*.info" |wc -l` - -export HCCL_IF_IP=$fix_node_ip -export MASTER_ADDR=$one_node_ip - -###############指定训练脚本执行路径############### -# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ];then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi - - -#################创建日志输出目录,不需要修改################# - -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - export ASCEND_DEVICE_ID=$RANK_ID - echo "Device ID: $ASCEND_DEVICE_ID" - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt - fi - - echo run process ${RANK_ID} - - #SOLVER.MAX_ITER 82000 \ - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 -done - -#################启动训练脚本################# -# 训练开始时间,不需要修改 -start_time=$(date +%s) -# 非平台场景时source 环境变量 -check_etp_flag=`env | grep etp_running_flag` -etp_flag=`echo ${check_etp_flag#*=}` -if [ x"${etp_flag}" != x"true" ];then - source ${test_path_dir}/env_npu.sh -fi - -export NODE_RANK=${server_index} -export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'` - -python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ - --data ${data_path} \ - --addr=$one_node_ip \ - --seed=49 \ - --workers=${workers} \ - --learning-rate=1.6 \ - --warmup=8 \ - --label-smoothing=0.1 \ - --mom=0.9 \ - --weight-decay=1.0e-04 \ - --static-loss-scale=128 \ - --print-freq=1 \ - --dist-url='tcp://127.0.0.1:50000' \ - --dist-backend='hccl' \ - --multiprocessing-distributed \ - --world-size=2 \ - --rank=0 \ - --benchmark=0 \ - --device='npu' \ - --epochs=${train_epochs} \ - --batch-size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - -wait - - -##################获取训练数据################ -# 训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -# 训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -# 结果打印,不需要修改 -echo "------------------ Final result ------------------" -# 输出性能FPS,需要模型审视修改 -grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $11}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_fps.log -FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a*2/NR)}'` -# 打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -# 输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` -# 打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -# 性能看护结果汇总 -# 获取性能数据,不需要修改 -# 吞吐量 -ActualFPS=${FPS} -# 单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -# 最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -# 关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="ResNet50_for_PyTorch" +# 训练batch_size +batch_size=4096 +# 训练使用的npu卡数 +export RANK_SIZE=16 +export RANK_ID_START=0 +# 数据集路径,保持为空,不需要修改 +data_path="" +conf_path="" +server_index="" +fix_node_ip="" + +# 训练epoch 90 +train_epochs=90 +# 加载数据进程数 +workers=128 + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --conf_path* ]];then + conf_path=`echo ${para#*=}` + elif [[ $para == --server_index* ]];then + server_index=`echo ${para#*=}` + elif [[ $para == --fix_node_ip* ]];then + fix_node_ip=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` +linux_num=`find $conf_path -name "server_*.info" |wc -l` + +export HCCL_IF_IP=$fix_node_ip +export MASTER_ADDR=$one_node_ip + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + export ASCEND_DEVICE_ID=$RANK_ID + echo "Device ID: $ASCEND_DEVICE_ID" + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ckpt + fi + + echo run process ${RANK_ID} + + #SOLVER.MAX_ITER 82000 \ + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 +done + +#################启动训练脚本################# +# 训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +export NODE_RANK=${server_index} +export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'` + +python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ + --data ${data_path} \ + --addr=$one_node_ip \ + --seed=49 \ + --workers=${workers} \ + --learning-rate=1.6 \ + --warmup=8 \ + --label-smoothing=0.1 \ + --mom=0.9 \ + --weight-decay=1.0e-04 \ + --static-loss-scale=128 \ + --print-freq=1 \ + --dist-url='tcp://127.0.0.1:50000' \ + --dist-backend='hccl' \ + --multiprocessing-distributed \ + --world-size=2 \ + --rank=0 \ + --benchmark=0 \ + --device='npu' \ + --epochs=${train_epochs} \ + --batch-size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 输出性能FPS,需要模型审视修改 +grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $11}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_fps.log +FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a*2/NR)}'` +# 打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +# 输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +# 打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +# 性能看护结果汇总 +# 获取性能数据,不需要修改 +# 吞吐量 +ActualFPS=${FPS} +# 单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +# 最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_16p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_16p.sh index cea303357750e5a27b69159a76004a1e38830883..43260ea8d161e0f595701b5b07d5ed291cdc912d 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_16p.sh +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_16p.sh @@ -1,156 +1,156 @@ -#!/bin/bash - -################基础配置参数,需要模型审视修改################## -# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE -# 网络名称,同目录名称 -Network="ResNet50_ID0095_for_PyTorch" -# 训练batch_size -batch_size=4096 -# 训练使用的npu卡数 -export RANK_SIZE=16 -# 数据集路径,保持为空,不需要修改 -data_path="" -conf_path="" -server_index="" -fix_node_ip="" - -# 训练epoch 90 -train_epochs=3 -# 加载数据进程数 -workers=128 - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --conf_path* ]];then - conf_path=`echo ${para#*=}` - elif [[ $para == --server_index* ]];then - server_index=`echo ${para#*=}` - elif [[ $para == --fix_node_ip* ]];then - fix_node_ip=`echo ${para#*=}` - fi -done - -# 校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` -linux_num=`find $conf_path -name "server_*.info" |wc -l` - -export HCCL_IF_IP=$fix_node_ip -export MASTER_ADDR=$one_node_ip - -###############指定训练脚本执行路径############### -# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ];then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi - - -#################创建日志输出目录,不需要修改################# -ASCEND_DEVICE_ID=0 -if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -fi - - -#################启动训练脚本################# -# 训练开始时间,不需要修改 -start_time=$(date +%s) -# 非平台场景时source 环境变量 -check_etp_flag=`env | grep etp_running_flag` -etp_flag=`echo ${check_etp_flag#*=}` -if [ x"${etp_flag}" != x"true" ];then - source ${test_path_dir}/env_npu.sh -fi - -export NODE_RANK=${server_index} -export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'` - -python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ - --data ${data_path} \ - --addr=$one_node_ip \ - --seed=49 \ - --workers=${workers} \ - --learning-rate=1.6 \ - --warmup=8 \ - --label-smoothing=0.1 \ - --mom=0.9 \ - --weight-decay=1.0e-04 \ - --static-loss-scale=128 \ - --print-freq=1 \ - --dist-url='tcp://127.0.0.1:50000' \ - --dist-backend='hccl' \ - --multiprocessing-distributed \ - --world-size=2 \ - --rank=0 \ - --benchmark=0 \ - --device='npu' \ - --epochs=${train_epochs} \ - --batch-size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - -wait - - -##################获取训练数据################ -# 训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -# 训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -# 结果打印,不需要修改 -echo "------------------ Final result ------------------" -# 输出性能FPS,需要模型审视修改 -grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $11}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_fps.log -FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a*2/NR)}'` -# 打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -# 输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` -# 打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -# 性能看护结果汇总 -# 获取性能数据,不需要修改 -# 吞吐量 -ActualFPS=${FPS} -# 单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -# 最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -# 关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="ResNet50_ID0095_for_PyTorch" +# 训练batch_size +batch_size=4096 +# 训练使用的npu卡数 +export RANK_SIZE=16 +# 数据集路径,保持为空,不需要修改 +data_path="" +conf_path="" +server_index="" +fix_node_ip="" + +# 训练epoch 90 +train_epochs=3 +# 加载数据进程数 +workers=128 + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --conf_path* ]];then + conf_path=`echo ${para#*=}` + elif [[ $para == --server_index* ]];then + server_index=`echo ${para#*=}` + elif [[ $para == --fix_node_ip* ]];then + fix_node_ip=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` +linux_num=`find $conf_path -name "server_*.info" |wc -l` + +export HCCL_IF_IP=$fix_node_ip +export MASTER_ADDR=$one_node_ip + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +# 训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +export NODE_RANK=${server_index} +export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'` + +python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ + --data ${data_path} \ + --addr=$one_node_ip \ + --seed=49 \ + --workers=${workers} \ + --learning-rate=1.6 \ + --warmup=8 \ + --label-smoothing=0.1 \ + --mom=0.9 \ + --weight-decay=1.0e-04 \ + --static-loss-scale=128 \ + --print-freq=1 \ + --dist-url='tcp://127.0.0.1:50000' \ + --dist-backend='hccl' \ + --multiprocessing-distributed \ + --world-size=2 \ + --rank=0 \ + --benchmark=0 \ + --device='npu' \ + --epochs=${train_epochs} \ + --batch-size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 输出性能FPS,需要模型审视修改 +grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $11}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_fps.log +FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a*2/NR)}'` +# 打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +# 输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +# 打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +# 性能看护结果汇总 +# 获取性能数据,不需要修改 +# 吞吐量 +ActualFPS=${FPS} +# 单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +# 最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/Shufflenetv2_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/cv/classification/Shufflenetv2_for_PyTorch/test/train_performance_1p.sh index 295d88c2de904d13e90ec5c5e27a572888877356..971514cdeb4b6a365752b3d9efac92e763bc94c6 100644 --- a/PyTorch/built-in/cv/classification/Shufflenetv2_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/built-in/cv/classification/Shufflenetv2_for_PyTorch/test/train_performance_1p.sh @@ -1,178 +1,178 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 -#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下 -export RANK_SIZE=1 - -#RANK_TABLE_FILE=${cur_path}/../configs/rank_table_8p.json -RANK_ID_START=0 - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL=3 - -#基础参数 需要模型审视修改 -#网络名称,同目录名称 -Network="Shufflenetv2_ID0099_for_PyTorch" -#训练epoch -train_epochs=2 -#训练batch_size -batch_size=1536 - -#TF2.X独有,不需要修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False -autotune=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_full_8p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is 0 - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --autotune whether to enable autotune, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --autotune* ]];then - autotune=`echo ${para#*=}` - export autotune=$autotune - mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak - mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak - autotune_dump_path=${cur_path}/output/autotune_dump - mkdir -p ${autotune_dump_path}/GA - mkdir -p ${autotune_dump_path}/rl - cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/ - cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/../ -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - export DEVICE_ID=$RANK_ID - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - mkdir -p ${cur_path}/output/overflow_dump - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - mkdir -p ${cur_path}/output/overflow_dump - fi - over_dump_path=${cur_path}/output/overflow_dump - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - - python3 8p_main_med.py \ - --data=$data_path \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed=49 \ - --workers=128 \ - --learning-rate=0.75 \ - --print-freq=10 \ - --eval-freq=5 \ - --arch=shufflenet_v2_x1_0 \ - --dist-url='tcp://127.0.0.1:50000' \ - --dist-backend='hccl' \ - --multiprocessing-distributed \ - --world-size=1 \ - --batch-size=1536 \ - --epochs=2 \ - --warm_up_epochs=1 \ - --rank=0 \ - --amp \ - --momentum=0 \ - --wd=3.0517578125e-05 \ - --device-list=0 \ - --benchmark 0 \ - > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - -done -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) -echo "E2E Training Duration sec : $e2e_time" - -#cp -r ${cur_path}/train.log ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log - -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据 -FPS=`grep "FPS@all" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $7}'|tr -d ,| sed s/[[:space:]]//g` -ActualFPS=${FPS} - -temp1=`echo "1 * ${batch_size}"|bc` -TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc` - -ActualLoss=`grep "Loss" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $12}'` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下 +export RANK_SIZE=1 + +#RANK_TABLE_FILE=${cur_path}/../configs/rank_table_8p.json +RANK_ID_START=0 + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数 需要模型审视修改 +#网络名称,同目录名称 +Network="Shufflenetv2_ID0099_for_PyTorch" +#训练epoch +train_epochs=2 +#训练batch_size +batch_size=1536 + +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_8p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is 0 + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --autotune whether to enable autotune, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --autotune* ]];then + autotune=`echo ${para#*=}` + export autotune=$autotune + mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak + mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak + autotune_dump_path=${cur_path}/output/autotune_dump + mkdir -p ${autotune_dump_path}/GA + mkdir -p ${autotune_dump_path}/rl + cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/ + cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + export DEVICE_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + mkdir -p ${cur_path}/output/overflow_dump + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + mkdir -p ${cur_path}/output/overflow_dump + fi + over_dump_path=${cur_path}/output/overflow_dump + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + + python3 8p_main_med.py \ + --data=$data_path \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed=49 \ + --workers=128 \ + --learning-rate=0.75 \ + --print-freq=10 \ + --eval-freq=5 \ + --arch=shufflenet_v2_x1_0 \ + --dist-url='tcp://127.0.0.1:50000' \ + --dist-backend='hccl' \ + --multiprocessing-distributed \ + --world-size=1 \ + --batch-size=1536 \ + --epochs=2 \ + --warm_up_epochs=1 \ + --rank=0 \ + --amp \ + --momentum=0 \ + --wd=3.0517578125e-05 \ + --device-list=0 \ + --benchmark 0 \ + > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +echo "E2E Training Duration sec : $e2e_time" + +#cp -r ${cur_path}/train.log ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log + +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据 +FPS=`grep "FPS@all" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $7}'|tr -d ,| sed s/[[:space:]]//g` +ActualFPS=${FPS} + +temp1=`echo "1 * ${batch_size}"|bc` +TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc` + +ActualLoss=`grep "Loss" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $12}'` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/built-in/cv/classification/Shufflenetv2_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/cv/classification/Shufflenetv2_for_PyTorch/test/train_performance_8p.sh index de67f1ede4b3c5ff2bcc9b2c4b8460b06976070d..39e8b6cd752b34aa6a67211bce45537704747884 100644 --- a/PyTorch/built-in/cv/classification/Shufflenetv2_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/built-in/cv/classification/Shufflenetv2_for_PyTorch/test/train_performance_8p.sh @@ -1,175 +1,175 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 -#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下 -export RANK_SIZE=8 -batch_size=1024 -#RANK_TABLE_FILE=${cur_path}/../configs/rank_table_8p.json -RANK_ID_START=0 - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL=3 - -#基础参数 需要模型审视修改 -#网络名称,同目录名称 -Network="Shufflenetv2_ID0099_for_PyTorch" -#训练epoch -train_epochs=2 -#device_id_list=0,1,2,3,4,5,6,7 -#TF2.X独有,不需要修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False -autotune=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_full_8p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is 0 - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --autotune whether to enable autotune, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --autotune* ]];then - autotune=`echo ${para#*=}` - export autotune=$autotune - mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak - mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak - autotune_dump_path=${cur_path}/output/autotune_dump - mkdir -p ${autotune_dump_path}/GA - mkdir -p ${autotune_dump_path}/rl - cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/ - cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录 -export SIll=1 -cd $cur_path/../ -for((RANK_ID=$RANK_ID_START;RANK_ID<$((SIll+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - export RANK_ID=$RANK_ID - export DEVICE_ID=$RANK_ID - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - mkdir -p ${cur_path}/output/overflow_dump - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - mkdir -p ${cur_path}/output/overflow_dump - fi - over_dump_path=${cur_path}/output/overflow_dump - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - - python3 8p_main_med.py \ - --data=$data_path \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed=49 \ - --workers=128 \ - --learning-rate=0.75 \ - --print-freq=10 \ - --eval-freq=5 \ - --arch=shufflenet_v2_x1_0 \ - --dist-url='tcp://127.0.0.1:50000' \ - --dist-backend='hccl' \ - --multiprocessing-distributed \ - --world-size=1 \ - --batch-size=1024 \ - --epochs=2 \ - --warm_up_epochs=1 \ - --rank=0 \ - --amp \ - --momentum=0 \ - --wd=3.0517578125e-05 \ - --device-list=0 \ - --benchmark 0 \ - > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - -done -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) -echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据 - -FPS=`grep "FPS@all" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $7}'|tr -d ,` -ActualFPS=`echo "8 * ${FPS}"|bc` -temp1=`echo "8 * ${batch_size}"|bc` -TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc` - -ActualLoss=`grep "Loss" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $12}'` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下 +export RANK_SIZE=8 +batch_size=1024 +#RANK_TABLE_FILE=${cur_path}/../configs/rank_table_8p.json +RANK_ID_START=0 + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数 需要模型审视修改 +#网络名称,同目录名称 +Network="Shufflenetv2_ID0099_for_PyTorch" +#训练epoch +train_epochs=2 +#device_id_list=0,1,2,3,4,5,6,7 +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_8p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is 0 + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --autotune whether to enable autotune, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --autotune* ]];then + autotune=`echo ${para#*=}` + export autotune=$autotune + mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak + mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak + autotune_dump_path=${cur_path}/output/autotune_dump + mkdir -p ${autotune_dump_path}/GA + mkdir -p ${autotune_dump_path}/rl + cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/ + cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录 +export SIll=1 +cd $cur_path/../ +for((RANK_ID=$RANK_ID_START;RANK_ID<$((SIll+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + export RANK_ID=$RANK_ID + export DEVICE_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + mkdir -p ${cur_path}/output/overflow_dump + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + mkdir -p ${cur_path}/output/overflow_dump + fi + over_dump_path=${cur_path}/output/overflow_dump + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + + python3 8p_main_med.py \ + --data=$data_path \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed=49 \ + --workers=128 \ + --learning-rate=0.75 \ + --print-freq=10 \ + --eval-freq=5 \ + --arch=shufflenet_v2_x1_0 \ + --dist-url='tcp://127.0.0.1:50000' \ + --dist-backend='hccl' \ + --multiprocessing-distributed \ + --world-size=1 \ + --batch-size=1024 \ + --epochs=2 \ + --warm_up_epochs=1 \ + --rank=0 \ + --amp \ + --momentum=0 \ + --wd=3.0517578125e-05 \ + --device-list=0 \ + --benchmark 0 \ + > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据 + +FPS=`grep "FPS@all" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $7}'|tr -d ,` +ActualFPS=`echo "8 * ${FPS}"|bc` +temp1=`echo "8 * ${batch_size}"|bc` +TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc` + +ActualLoss=`grep "Loss" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $12}'` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/LICENSE b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/LICENSE old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Base-RCNN-C4.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Base-RCNN-C4.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Base-RCNN-DilatedC5.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Base-RCNN-DilatedC5.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Base-RCNN-FPN.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Base-RCNN-FPN.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Base-RetinaNet.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Base-RetinaNet.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/rpn_R_50_C4_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/rpn_R_50_C4_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Detectron1-Comparisons/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Detectron1-Comparisons/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/semantic_R_50_FPN_1x.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/Misc/semantic_R_50_FPN_1x.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/datasets/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/datasets/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/datasets/prepare_cocofied_lvis.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/datasets/prepare_cocofied_lvis.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/datasets/prepare_for_tests.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/datasets/prepare_for_tests.sh old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/datasets/prepare_panoptic_fpn.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/datasets/prepare_panoptic_fpn.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/demo/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/demo/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/demo/demo.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/demo/demo.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/demo/predictor.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/demo/predictor.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/checkpoint/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/checkpoint/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/checkpoint/c2_model_loading.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/checkpoint/c2_model_loading.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/checkpoint/catalog.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/checkpoint/catalog.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/checkpoint/detection_checkpoint.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/checkpoint/detection_checkpoint.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/config/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/config/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/config/compat.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/config/compat.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/config/config.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/config/config.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/config/defaults.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/config/defaults.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/build.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/build.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/catalog.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/catalog.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/common.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/common.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/dataset_mapper.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/dataset_mapper.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/builtin.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/builtin.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/builtin_meta.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/builtin_meta.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/cityscapes.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/cityscapes.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/coco.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/coco.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/lvis.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/lvis.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/lvis_v0_5_categories.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/lvis_v0_5_categories.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/lvis_v1_categories.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/lvis_v1_categories.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/pascal_voc.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/pascal_voc.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/register_coco.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/datasets/register_coco.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/detection_utils.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/detection_utils.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/samplers/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/samplers/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/samplers/distributed_sampler.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/samplers/distributed_sampler.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/samplers/grouped_batch_sampler.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/samplers/grouped_batch_sampler.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/transforms/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/transforms/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/transforms/augmentation.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/transforms/augmentation.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/transforms/augmentation_impl.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/transforms/augmentation_impl.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/transforms/transform.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/data/transforms/transform.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/engine/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/engine/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/engine/defaults.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/engine/defaults.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/engine/hooks.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/engine/hooks.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/engine/launch.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/engine/launch.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/engine/train_loop.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/engine/train_loop.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/cityscapes_evaluation.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/cityscapes_evaluation.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/coco_evaluation.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/coco_evaluation.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/evaluator.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/evaluator.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/fast_eval_api.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/fast_eval_api.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/lvis_evaluation.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/lvis_evaluation.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/panoptic_evaluation.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/panoptic_evaluation.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/pascal_voc_evaluation.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/pascal_voc_evaluation.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/rotated_coco_evaluation.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/rotated_coco_evaluation.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/sem_seg_evaluation.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/sem_seg_evaluation.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/testing.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/evaluation/testing.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/api.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/api.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/c10.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/c10.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/caffe2_export.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/caffe2_export.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/caffe2_inference.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/caffe2_inference.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/caffe2_modeling.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/caffe2_modeling.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/patcher.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/patcher.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/shared.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/shared.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/torchscript.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/export/torchscript.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/aspp.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/aspp.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/batch_norm.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/batch_norm.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/blocks.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/blocks.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/ROIAlign/ROIAlign.h b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/ROIAlign/ROIAlign.h old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/ROIAlign/ROIAlign_cpu.cpp b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/ROIAlign/ROIAlign_cpu.cpp old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/ROIAlign/ROIAlign_cuda.cu b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/ROIAlign/ROIAlign_cuda.cu old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/cocoeval/cocoeval.cpp b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/cocoeval/cocoeval.cpp old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/cocoeval/cocoeval.h b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/cocoeval/cocoeval.h old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/cuda_version.cu b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/cuda_version.cu old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/deformable/deform_conv.h b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/deformable/deform_conv.h old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/deformable/deform_conv_cuda.cu b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/deformable/deform_conv_cuda.cu old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/nms_rotated/nms_rotated.h b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/nms_rotated/nms_rotated.h old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/vision.cpp b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/csrc/vision.cpp old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/deform_conv.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/deform_conv.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/mask_ops.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/mask_ops.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/nms.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/nms.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/roi_align.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/roi_align.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/roi_align_rotated.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/roi_align_rotated.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/rotated_boxes.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/rotated_boxes.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/shape_spec.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/shape_spec.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/wrappers.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/layers/wrappers.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/model_zoo/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/model_zoo/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/model_zoo/model_zoo.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/model_zoo/model_zoo.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/anchor_generator.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/anchor_generator.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/backbone/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/backbone/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/backbone/backbone.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/backbone/backbone.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/backbone/build.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/backbone/build.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/backbone/fpn.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/backbone/fpn.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/backbone/resnet.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/backbone/resnet.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/box_regression.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/box_regression.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/matcher.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/matcher.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/meta_arch/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/meta_arch/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/meta_arch/build.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/meta_arch/build.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/meta_arch/panoptic_fpn.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/meta_arch/panoptic_fpn.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/meta_arch/rcnn.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/meta_arch/rcnn.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/meta_arch/retinanet.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/meta_arch/retinanet.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/meta_arch/semantic_seg.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/meta_arch/semantic_seg.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/poolers.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/poolers.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/postprocessing.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/postprocessing.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/proposal_generator/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/proposal_generator/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/proposal_generator/build.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/proposal_generator/build.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/proposal_generator/proposal_utils.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/proposal_generator/proposal_utils.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/proposal_generator/rpn.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/proposal_generator/rpn.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/proposal_generator/rrpn.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/proposal_generator/rrpn.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/box_head.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/box_head.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/cascade_rcnn.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/cascade_rcnn.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/fast_rcnn.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/fast_rcnn.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/keypoint_head.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/keypoint_head.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/mask_head.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/mask_head.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/roi_heads.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/roi_heads.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/rotated_fast_rcnn.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/roi_heads/rotated_fast_rcnn.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/sampling.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/sampling.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/test_time_augmentation.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/modeling/test_time_augmentation.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/solver/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/solver/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/solver/build.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/solver/build.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/solver/lr_scheduler.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/solver/lr_scheduler.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/boxes.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/boxes.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/image_list.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/image_list.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/instances.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/instances.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/keypoints.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/keypoints.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/masks.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/masks.py old mode 100755 new mode 100644 index a4d97d942eafbe3ea1bd71fad42b942e9aa5168d..3933471d084b42fc2e0e055d7452d1660e7dc0f1 --- a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/masks.py +++ b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/masks.py @@ -1,442 +1,442 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import copy -import itertools -import numpy as np -from typing import Any, Iterator, List, Union -import pycocotools.mask as mask_util -import torch - -from detectron2.layers.roi_align import ROIAlign - -from .boxes import Boxes - - -def polygon_area(x, y): - # Using the shoelace formula - # https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates - return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) - - -def polygons_to_bitmask(polygons: List[np.ndarray], height: int, width: int) -> np.ndarray: - """ - Args: - polygons (list[ndarray]): each array has shape (Nx2,) - height, width (int) - - Returns: - ndarray: a bool mask of shape (height, width) - """ - assert len(polygons) > 0, "COCOAPI does not support empty polygons" - rles = mask_util.frPyObjects(polygons, height, width) - rle = mask_util.merge(rles) - return mask_util.decode(rle).astype(np.bool) - - -def rasterize_polygons_within_box( - polygons: List[np.ndarray], box: np.ndarray, mask_size: int -) -> torch.Tensor: - """ - Rasterize the polygons into a mask image and - crop the mask content in the given box. - The cropped mask is resized to (mask_size, mask_size). - - This function is used when generating training targets for mask head in Mask R-CNN. - Given original ground-truth masks for an image, new ground-truth mask - training targets in the size of `mask_size x mask_size` - must be provided for each predicted box. This function will be called to - produce such targets. - - Args: - polygons (list[ndarray[float]]): a list of polygons, which represents an instance. - box: 4-element numpy array - mask_size (int): - - Returns: - Tensor: BoolTensor of shape (mask_size, mask_size) - """ - # 1. Shift the polygons w.r.t the boxes - w, h = box[2] - box[0], box[3] - box[1] - - polygons = copy.deepcopy(polygons) - for p in polygons: - p[0::2] = p[0::2] - box[0] - p[1::2] = p[1::2] - box[1] - - # 2. Rescale the polygons to the new box size - # max() to avoid division by small number - ratio_h = mask_size / max(h, 0.1) - ratio_w = mask_size / max(w, 0.1) - - if ratio_h == ratio_w: - for p in polygons: - p *= ratio_h - else: - for p in polygons: - p[0::2] *= ratio_w - p[1::2] *= ratio_h - - # 3. Rasterize the polygons with coco api - mask = polygons_to_bitmask(polygons, mask_size, mask_size) - mask = torch.from_numpy(mask) - return mask - - -class BitMasks: - """ - This class stores the segmentation masks for all objects in one image, in - the form of bitmaps. - - Attributes: - tensor: bool Tensor of N,H,W, representing N instances in the image. - """ - - def __init__(self, tensor: Union[torch.Tensor, np.ndarray]): - """ - Args: - tensor: bool Tensor of N,H,W, representing N instances in the image. - """ - device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") - tensor = torch.as_tensor(tensor, dtype=torch.bool, device=device) - assert tensor.dim() == 3, tensor.size() - self.image_size = tensor.shape[1:] - self.tensor = tensor - - def to(self, *args: Any, **kwargs: Any) -> "BitMasks": - return BitMasks(self.tensor.to(*args, **kwargs)) - - @property - def device(self) -> torch.device: - return self.tensor.device - - def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks": - """ - Returns: - BitMasks: Create a new :class:`BitMasks` by indexing. - - The following usage are allowed: - - 1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask. - 2. `new_masks = masks[2:10]`: return a slice of masks. - 3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor - with `length = len(masks)`. Nonzero elements in the vector will be selected. - - Note that the returned object might share storage with this object, - subject to Pytorch's indexing semantics. - """ - if isinstance(item, int): - return BitMasks(self.tensor[item].view(1, -1)) - - if item.dtype == torch.int32: - m = self.tensor[item.long()] - else: - m = self.tensor[item] - - assert m.dim() == 3, "Indexing on BitMasks with {} returns a tensor with shape {}!".format( - item, m.shape - ) - return BitMasks(m) - - def __iter__(self) -> torch.Tensor: - yield from self.tensor - - def __repr__(self) -> str: - s = self.__class__.__name__ + "(" - s += "num_instances={})".format(len(self.tensor)) - return s - - def __len__(self) -> int: - return self.tensor.shape[0] - - def nonempty(self) -> torch.Tensor: - """ - Find masks that are non-empty. - - Returns: - Tensor: a BoolTensor which represents - whether each mask is empty (False) or non-empty (True). - """ - return self.tensor.flatten(1).any(dim=1) - - @staticmethod - def from_polygon_masks( - polygon_masks: Union["PolygonMasks", List[List[np.ndarray]]], height: int, width: int - ) -> "BitMasks": - """ - Args: - polygon_masks (list[list[ndarray]] or PolygonMasks) - height, width (int) - """ - if isinstance(polygon_masks, PolygonMasks): - polygon_masks = polygon_masks.polygons - masks = [polygons_to_bitmask(p, height, width) for p in polygon_masks] - return BitMasks(torch.stack([torch.from_numpy(x) for x in masks])) - - def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: - """ - Crop each bitmask by the given box, and resize results to (mask_size, mask_size). - This can be used to prepare training targets for Mask R-CNN. - It has less reconstruction error compared to rasterization with polygons. - However we observe no difference in accuracy, - but BitMasks requires more memory to store all the masks. - - Args: - boxes (Tensor): Nx4 tensor storing the boxes for each mask - mask_size (int): the size of the rasterized mask. - - Returns: - Tensor: - A bool tensor of shape (N, mask_size, mask_size), where - N is the number of predicted boxes for this image. - """ - assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) - device = self.tensor.device - - batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[:, None] - rois = torch.cat([batch_inds, boxes], dim=1) - bit_masks = self.tensor.to(dtype=torch.float32) - rois = rois.to(device=device) - - output = ( - ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True) - .forward(bit_masks[:, None, :, :], rois) - .squeeze(1) - ) - output = output >= 0.5 - return output - - def get_bounding_boxes(self) -> None: - # not needed now - raise NotImplementedError - - @staticmethod - def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks": - """ - Concatenates a list of BitMasks into a single BitMasks - - Arguments: - bitmasks_list (list[BitMasks]) - - Returns: - BitMasks: the concatenated BitMasks - """ - assert isinstance(bitmasks_list, (list, tuple)) - assert len(bitmasks_list) > 0 - assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list) - - cat_bitmasks = type(bitmasks_list[0])(torch.cat([bm.tensor for bm in bitmasks_list], dim=0)) - return cat_bitmasks - - -class PolygonMasks: - """ - This class stores the segmentation masks for all objects in one image, in the form of polygons. - - Attributes: - polygons: list[list[ndarray]]. Each ndarray is a float64 vector representing a polygon. - """ - - def __init__(self, polygons: List[List[Union[torch.Tensor, np.ndarray]]]): - """ - Arguments: - polygons (list[list[np.ndarray]]): The first - level of the list correspond to individual instances, - the second level to all the polygons that compose the - instance, and the third level to the polygon coordinates. - The third level array should have the format of - [x0, y0, x1, y1, ..., xn, yn] (n >= 3). - """ - assert isinstance(polygons, list), ( - "Cannot create PolygonMasks: Expect a list of list of polygons per image. " - "Got '{}' instead.".format(type(polygons)) - ) - - def _make_array(t: Union[torch.Tensor, np.ndarray]) -> np.ndarray: - # Use float64 for higher precision, because why not? - # Always put polygons on CPU (self.to is a no-op) since they - # are supposed to be small tensors. - # May need to change this assumption if GPU placement becomes useful - if isinstance(t, torch.Tensor): - t = t.cpu().numpy() - return np.asarray(t).astype("float64") - - def process_polygons( - polygons_per_instance: List[Union[torch.Tensor, np.ndarray]] - ) -> List[np.ndarray]: - assert isinstance(polygons_per_instance, list), ( - "Cannot create polygons: Expect a list of polygons per instance. " - "Got '{}' instead.".format(type(polygons_per_instance)) - ) - # transform the polygon to a tensor - polygons_per_instance = [_make_array(p) for p in polygons_per_instance] - for polygon in polygons_per_instance: - assert len(polygon) % 2 == 0 and len(polygon) >= 6 - return polygons_per_instance - - self.polygons: List[List[np.ndarray]] = [ - process_polygons(polygons_per_instance) for polygons_per_instance in polygons - ] - - def to(self, *args: Any, **kwargs: Any) -> "PolygonMasks": - return self - - @property - def device(self) -> torch.device: - return torch.device("cpu") - - def get_bounding_boxes(self) -> Boxes: - """ - Returns: - Boxes: tight bounding boxes around polygon masks. - """ - boxes = torch.zeros(len(self.polygons), 4, dtype=torch.float32) - for idx, polygons_per_instance in enumerate(self.polygons): - minxy = torch.as_tensor([float("inf"), float("inf")], dtype=torch.float32) - maxxy = torch.zeros(2, dtype=torch.float32) - for polygon in polygons_per_instance: - coords = torch.from_numpy(polygon).view(-1, 2).to(dtype=torch.float32) - minxy = torch.min(minxy, torch.min(coords, dim=0).values) - maxxy = torch.max(maxxy, torch.max(coords, dim=0).values) - boxes[idx, :2] = minxy - boxes[idx, 2:] = maxxy - return Boxes(boxes) - - def nonempty(self) -> torch.Tensor: - """ - Find masks that are non-empty. - - Returns: - Tensor: - a BoolTensor which represents whether each mask is empty (False) or not (True). - """ - keep = [1 if len(polygon) > 0 else 0 for polygon in self.polygons] - return torch.from_numpy(np.asarray(keep, dtype=np.bool)) - - def __getitem__(self, item: Union[int, slice, List[int], torch.BoolTensor]) -> "PolygonMasks": - """ - Support indexing over the instances and return a `PolygonMasks` object. - `item` can be: - - 1. An integer. It will return an object with only one instance. - 2. A slice. It will return an object with the selected instances. - 3. A list[int]. It will return an object with the selected instances, - correpsonding to the indices in the list. - 4. A vector mask of type BoolTensor, whose length is num_instances. - It will return an object with the instances whose mask is nonzero. - """ - if isinstance(item, int): - selected_polygons = [self.polygons[item]] - elif isinstance(item, slice): - selected_polygons = self.polygons[item] - elif isinstance(item, list): - selected_polygons = [self.polygons[i] for i in item] - elif isinstance(item, torch.Tensor): - # Polygons is a list, so we have to move the indices back to CPU. - if item.dtype == torch.bool: - assert item.dim() == 1, item.shape - item = item.nonzero().squeeze(1).cpu().numpy().tolist() - elif item.dtype in [torch.int32, torch.int64]: - item = item.cpu().numpy().tolist() - else: - raise ValueError("Unsupported tensor dtype={} for indexing!".format(item.dtype)) - selected_polygons = [self.polygons[i] for i in item] - return PolygonMasks(selected_polygons) - - def __iter__(self) -> Iterator[List[np.ndarray]]: - """ - Yields: - list[ndarray]: the polygons for one instance. - Each Tensor is a float64 vector representing a polygon. - """ - return iter(self.polygons) - - def __repr__(self) -> str: - s = self.__class__.__name__ + "(" - s += "num_instances={})".format(len(self.polygons)) - return s - - def __len__(self) -> int: - return len(self.polygons) - - def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: - """ - Crop each mask by the given box, and resize results to (mask_size, mask_size). - This can be used to prepare training targets for Mask R-CNN. - - Args: - boxes (Tensor): Nx4 tensor storing the boxes for each mask - mask_size (int): the size of the rasterized mask. - - Returns: - Tensor: A bool tensor of shape (N, mask_size, mask_size), where - N is the number of predicted boxes for this image. - """ - assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) - - device = boxes.device - # Put boxes on the CPU, as the polygon representation is not efficient GPU-wise - # (several small tensors for representing a single instance mask) - boxes = boxes.to(torch.device("cpu")) - - results = [ - rasterize_polygons_within_box(poly, box.numpy(), mask_size) - for poly, box in zip(self.polygons, boxes) - ] - """ - poly: list[list[float]], the polygons for one instance - box: a tensor of shape (4,) - """ - if len(results) == 0: - return torch.empty(0, mask_size, mask_size, dtype=torch.bool, device=device) - return torch.stack(results, dim=0).to(device=device) - - def area(self): - """ - Computes area of the mask. - Only works with Polygons, using the shoelace formula: - https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates - - Returns: - Tensor: a vector, area for each instance - """ - - area = [] - for polygons_per_instance in self.polygons: - area_per_instance = 0 - for p in polygons_per_instance: - area_per_instance += polygon_area(p[0::2], p[1::2]) - area.append(area_per_instance) - - return torch.tensor(area) - - @staticmethod - def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks": - """ - Concatenates a list of PolygonMasks into a single PolygonMasks - - Arguments: - polymasks_list (list[PolygonMasks]) - - Returns: - PolygonMasks: the concatenated PolygonMasks - """ - assert isinstance(polymasks_list, (list, tuple)) - assert len(polymasks_list) > 0 - assert all(isinstance(polymask, PolygonMasks) for polymask in polymasks_list) - - cat_polymasks = type(polymasks_list[0])( - list(itertools.chain.from_iterable(pm.polygons for pm in polymasks_list)) - ) - return cat_polymasks +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import itertools +import numpy as np +from typing import Any, Iterator, List, Union +import pycocotools.mask as mask_util +import torch + +from detectron2.layers.roi_align import ROIAlign + +from .boxes import Boxes + + +def polygon_area(x, y): + # Using the shoelace formula + # https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates + return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) + + +def polygons_to_bitmask(polygons: List[np.ndarray], height: int, width: int) -> np.ndarray: + """ + Args: + polygons (list[ndarray]): each array has shape (Nx2,) + height, width (int) + + Returns: + ndarray: a bool mask of shape (height, width) + """ + assert len(polygons) > 0, "COCOAPI does not support empty polygons" + rles = mask_util.frPyObjects(polygons, height, width) + rle = mask_util.merge(rles) + return mask_util.decode(rle).astype(np.bool) + + +def rasterize_polygons_within_box( + polygons: List[np.ndarray], box: np.ndarray, mask_size: int +) -> torch.Tensor: + """ + Rasterize the polygons into a mask image and + crop the mask content in the given box. + The cropped mask is resized to (mask_size, mask_size). + + This function is used when generating training targets for mask head in Mask R-CNN. + Given original ground-truth masks for an image, new ground-truth mask + training targets in the size of `mask_size x mask_size` + must be provided for each predicted box. This function will be called to + produce such targets. + + Args: + polygons (list[ndarray[float]]): a list of polygons, which represents an instance. + box: 4-element numpy array + mask_size (int): + + Returns: + Tensor: BoolTensor of shape (mask_size, mask_size) + """ + # 1. Shift the polygons w.r.t the boxes + w, h = box[2] - box[0], box[3] - box[1] + + polygons = copy.deepcopy(polygons) + for p in polygons: + p[0::2] = p[0::2] - box[0] + p[1::2] = p[1::2] - box[1] + + # 2. Rescale the polygons to the new box size + # max() to avoid division by small number + ratio_h = mask_size / max(h, 0.1) + ratio_w = mask_size / max(w, 0.1) + + if ratio_h == ratio_w: + for p in polygons: + p *= ratio_h + else: + for p in polygons: + p[0::2] *= ratio_w + p[1::2] *= ratio_h + + # 3. Rasterize the polygons with coco api + mask = polygons_to_bitmask(polygons, mask_size, mask_size) + mask = torch.from_numpy(mask) + return mask + + +class BitMasks: + """ + This class stores the segmentation masks for all objects in one image, in + the form of bitmaps. + + Attributes: + tensor: bool Tensor of N,H,W, representing N instances in the image. + """ + + def __init__(self, tensor: Union[torch.Tensor, np.ndarray]): + """ + Args: + tensor: bool Tensor of N,H,W, representing N instances in the image. + """ + device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") + tensor = torch.as_tensor(tensor, dtype=torch.bool, device=device) + assert tensor.dim() == 3, tensor.size() + self.image_size = tensor.shape[1:] + self.tensor = tensor + + def to(self, *args: Any, **kwargs: Any) -> "BitMasks": + return BitMasks(self.tensor.to(*args, **kwargs)) + + @property + def device(self) -> torch.device: + return self.tensor.device + + def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks": + """ + Returns: + BitMasks: Create a new :class:`BitMasks` by indexing. + + The following usage are allowed: + + 1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask. + 2. `new_masks = masks[2:10]`: return a slice of masks. + 3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor + with `length = len(masks)`. Nonzero elements in the vector will be selected. + + Note that the returned object might share storage with this object, + subject to Pytorch's indexing semantics. + """ + if isinstance(item, int): + return BitMasks(self.tensor[item].view(1, -1)) + + if item.dtype == torch.int32: + m = self.tensor[item.long()] + else: + m = self.tensor[item] + + assert m.dim() == 3, "Indexing on BitMasks with {} returns a tensor with shape {}!".format( + item, m.shape + ) + return BitMasks(m) + + def __iter__(self) -> torch.Tensor: + yield from self.tensor + + def __repr__(self) -> str: + s = self.__class__.__name__ + "(" + s += "num_instances={})".format(len(self.tensor)) + return s + + def __len__(self) -> int: + return self.tensor.shape[0] + + def nonempty(self) -> torch.Tensor: + """ + Find masks that are non-empty. + + Returns: + Tensor: a BoolTensor which represents + whether each mask is empty (False) or non-empty (True). + """ + return self.tensor.flatten(1).any(dim=1) + + @staticmethod + def from_polygon_masks( + polygon_masks: Union["PolygonMasks", List[List[np.ndarray]]], height: int, width: int + ) -> "BitMasks": + """ + Args: + polygon_masks (list[list[ndarray]] or PolygonMasks) + height, width (int) + """ + if isinstance(polygon_masks, PolygonMasks): + polygon_masks = polygon_masks.polygons + masks = [polygons_to_bitmask(p, height, width) for p in polygon_masks] + return BitMasks(torch.stack([torch.from_numpy(x) for x in masks])) + + def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: + """ + Crop each bitmask by the given box, and resize results to (mask_size, mask_size). + This can be used to prepare training targets for Mask R-CNN. + It has less reconstruction error compared to rasterization with polygons. + However we observe no difference in accuracy, + but BitMasks requires more memory to store all the masks. + + Args: + boxes (Tensor): Nx4 tensor storing the boxes for each mask + mask_size (int): the size of the rasterized mask. + + Returns: + Tensor: + A bool tensor of shape (N, mask_size, mask_size), where + N is the number of predicted boxes for this image. + """ + assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) + device = self.tensor.device + + batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[:, None] + rois = torch.cat([batch_inds, boxes], dim=1) + bit_masks = self.tensor.to(dtype=torch.float32) + rois = rois.to(device=device) + + output = ( + ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True) + .forward(bit_masks[:, None, :, :], rois) + .squeeze(1) + ) + output = output >= 0.5 + return output + + def get_bounding_boxes(self) -> None: + # not needed now + raise NotImplementedError + + @staticmethod + def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks": + """ + Concatenates a list of BitMasks into a single BitMasks + + Arguments: + bitmasks_list (list[BitMasks]) + + Returns: + BitMasks: the concatenated BitMasks + """ + assert isinstance(bitmasks_list, (list, tuple)) + assert len(bitmasks_list) > 0 + assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list) + + cat_bitmasks = type(bitmasks_list[0])(torch.cat([bm.tensor for bm in bitmasks_list], dim=0)) + return cat_bitmasks + + +class PolygonMasks: + """ + This class stores the segmentation masks for all objects in one image, in the form of polygons. + + Attributes: + polygons: list[list[ndarray]]. Each ndarray is a float64 vector representing a polygon. + """ + + def __init__(self, polygons: List[List[Union[torch.Tensor, np.ndarray]]]): + """ + Arguments: + polygons (list[list[np.ndarray]]): The first + level of the list correspond to individual instances, + the second level to all the polygons that compose the + instance, and the third level to the polygon coordinates. + The third level array should have the format of + [x0, y0, x1, y1, ..., xn, yn] (n >= 3). + """ + assert isinstance(polygons, list), ( + "Cannot create PolygonMasks: Expect a list of list of polygons per image. " + "Got '{}' instead.".format(type(polygons)) + ) + + def _make_array(t: Union[torch.Tensor, np.ndarray]) -> np.ndarray: + # Use float64 for higher precision, because why not? + # Always put polygons on CPU (self.to is a no-op) since they + # are supposed to be small tensors. + # May need to change this assumption if GPU placement becomes useful + if isinstance(t, torch.Tensor): + t = t.cpu().numpy() + return np.asarray(t).astype("float64") + + def process_polygons( + polygons_per_instance: List[Union[torch.Tensor, np.ndarray]] + ) -> List[np.ndarray]: + assert isinstance(polygons_per_instance, list), ( + "Cannot create polygons: Expect a list of polygons per instance. " + "Got '{}' instead.".format(type(polygons_per_instance)) + ) + # transform the polygon to a tensor + polygons_per_instance = [_make_array(p) for p in polygons_per_instance] + for polygon in polygons_per_instance: + assert len(polygon) % 2 == 0 and len(polygon) >= 6 + return polygons_per_instance + + self.polygons: List[List[np.ndarray]] = [ + process_polygons(polygons_per_instance) for polygons_per_instance in polygons + ] + + def to(self, *args: Any, **kwargs: Any) -> "PolygonMasks": + return self + + @property + def device(self) -> torch.device: + return torch.device("cpu") + + def get_bounding_boxes(self) -> Boxes: + """ + Returns: + Boxes: tight bounding boxes around polygon masks. + """ + boxes = torch.zeros(len(self.polygons), 4, dtype=torch.float32) + for idx, polygons_per_instance in enumerate(self.polygons): + minxy = torch.as_tensor([float("inf"), float("inf")], dtype=torch.float32) + maxxy = torch.zeros(2, dtype=torch.float32) + for polygon in polygons_per_instance: + coords = torch.from_numpy(polygon).view(-1, 2).to(dtype=torch.float32) + minxy = torch.min(minxy, torch.min(coords, dim=0).values) + maxxy = torch.max(maxxy, torch.max(coords, dim=0).values) + boxes[idx, :2] = minxy + boxes[idx, 2:] = maxxy + return Boxes(boxes) + + def nonempty(self) -> torch.Tensor: + """ + Find masks that are non-empty. + + Returns: + Tensor: + a BoolTensor which represents whether each mask is empty (False) or not (True). + """ + keep = [1 if len(polygon) > 0 else 0 for polygon in self.polygons] + return torch.from_numpy(np.asarray(keep, dtype=np.bool)) + + def __getitem__(self, item: Union[int, slice, List[int], torch.BoolTensor]) -> "PolygonMasks": + """ + Support indexing over the instances and return a `PolygonMasks` object. + `item` can be: + + 1. An integer. It will return an object with only one instance. + 2. A slice. It will return an object with the selected instances. + 3. A list[int]. It will return an object with the selected instances, + correpsonding to the indices in the list. + 4. A vector mask of type BoolTensor, whose length is num_instances. + It will return an object with the instances whose mask is nonzero. + """ + if isinstance(item, int): + selected_polygons = [self.polygons[item]] + elif isinstance(item, slice): + selected_polygons = self.polygons[item] + elif isinstance(item, list): + selected_polygons = [self.polygons[i] for i in item] + elif isinstance(item, torch.Tensor): + # Polygons is a list, so we have to move the indices back to CPU. + if item.dtype == torch.bool: + assert item.dim() == 1, item.shape + item = item.nonzero().squeeze(1).cpu().numpy().tolist() + elif item.dtype in [torch.int32, torch.int64]: + item = item.cpu().numpy().tolist() + else: + raise ValueError("Unsupported tensor dtype={} for indexing!".format(item.dtype)) + selected_polygons = [self.polygons[i] for i in item] + return PolygonMasks(selected_polygons) + + def __iter__(self) -> Iterator[List[np.ndarray]]: + """ + Yields: + list[ndarray]: the polygons for one instance. + Each Tensor is a float64 vector representing a polygon. + """ + return iter(self.polygons) + + def __repr__(self) -> str: + s = self.__class__.__name__ + "(" + s += "num_instances={})".format(len(self.polygons)) + return s + + def __len__(self) -> int: + return len(self.polygons) + + def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: + """ + Crop each mask by the given box, and resize results to (mask_size, mask_size). + This can be used to prepare training targets for Mask R-CNN. + + Args: + boxes (Tensor): Nx4 tensor storing the boxes for each mask + mask_size (int): the size of the rasterized mask. + + Returns: + Tensor: A bool tensor of shape (N, mask_size, mask_size), where + N is the number of predicted boxes for this image. + """ + assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) + + device = boxes.device + # Put boxes on the CPU, as the polygon representation is not efficient GPU-wise + # (several small tensors for representing a single instance mask) + boxes = boxes.to(torch.device("cpu")) + + results = [ + rasterize_polygons_within_box(poly, box.numpy(), mask_size) + for poly, box in zip(self.polygons, boxes) + ] + """ + poly: list[list[float]], the polygons for one instance + box: a tensor of shape (4,) + """ + if len(results) == 0: + return torch.empty(0, mask_size, mask_size, dtype=torch.bool, device=device) + return torch.stack(results, dim=0).to(device=device) + + def area(self): + """ + Computes area of the mask. + Only works with Polygons, using the shoelace formula: + https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates + + Returns: + Tensor: a vector, area for each instance + """ + + area = [] + for polygons_per_instance in self.polygons: + area_per_instance = 0 + for p in polygons_per_instance: + area_per_instance += polygon_area(p[0::2], p[1::2]) + area.append(area_per_instance) + + return torch.tensor(area) + + @staticmethod + def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks": + """ + Concatenates a list of PolygonMasks into a single PolygonMasks + + Arguments: + polymasks_list (list[PolygonMasks]) + + Returns: + PolygonMasks: the concatenated PolygonMasks + """ + assert isinstance(polymasks_list, (list, tuple)) + assert len(polymasks_list) > 0 + assert all(isinstance(polymask, PolygonMasks) for polymask in polymasks_list) + + cat_polymasks = type(polymasks_list[0])( + list(itertools.chain.from_iterable(pm.polygons for pm in polymasks_list)) + ) + return cat_polymasks diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/rotated_boxes.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/structures/rotated_boxes.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/analysis.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/analysis.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/collect_env.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/collect_env.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/colormap.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/colormap.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/comm.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/comm.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/env.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/env.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/events.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/events.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/logger.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/logger.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/memory.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/memory.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/registry.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/registry.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/serialize.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/serialize.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/video_visualizer.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/video_visualizer.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/visualizer.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/detectron2/utils/visualizer.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/linter.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/linter.sh old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/packaging/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/packaging/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/packaging/build_all_wheels.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/packaging/build_all_wheels.sh old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/packaging/build_wheel.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/packaging/build_wheel.sh old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/packaging/gen_install_table.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/packaging/gen_install_table.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/packaging/gen_wheel_index.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/packaging/gen_wheel_index.sh old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/packaging/pkg_helpers.bash b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/packaging/pkg_helpers.bash old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/parse_results.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/parse_results.sh old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/run_inference_tests.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/run_inference_tests.sh old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/run_instant_tests.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/dev/run_instant_tests.sh old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docker/Dockerfile b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docker/Dockerfile old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docker/Dockerfile-circleci b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docker/Dockerfile-circleci old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docker/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docker/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docker/docker-compose.yml b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docker/docker-compose.yml old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/.gitignore b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/.gitignore old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/Makefile b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/Makefile old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/_static/css/custom.css b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/_static/css/custom.css old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/conf.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/conf.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/notes/benchmarks.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/notes/benchmarks.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/notes/changelog.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/notes/changelog.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/notes/compatibility.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/notes/compatibility.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/notes/contributing.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/notes/contributing.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/builtin_datasets.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/builtin_datasets.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/configs.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/configs.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/data_loading.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/data_loading.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/datasets.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/datasets.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/deployment.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/deployment.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/evaluation.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/evaluation.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/extend.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/extend.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/getting_started.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/getting_started.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/install.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/install.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/models.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/models.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/training.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/training.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/write-models.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/docs/tutorials/write-models.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/eval.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/eval.sh old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/run.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/run.sh old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/run8p.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/run8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/run8p_for_faster_rcnn.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/run8p_for_faster_rcnn.sh old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/run_for_faster_rcnn.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/run_for_faster_rcnn.sh old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/setup.cfg b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/setup.cfg old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/setup.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/setup.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_full_1p.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_full_1p.sh index b5f6264a8e01104c3b8154af145e00acab7fc7c8..2d19be502ae68a82480272ff2b60e9410c740d23 100644 --- a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_full_1p.sh +++ b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_full_1p.sh @@ -1,181 +1,181 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd`/../ - -#集合通信参数,不需要修改 -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="Faster_Mask_RCNN_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=128 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.01 - -#TF2.X独有,需要模型审视修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_performance_1P.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/test/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/test/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/test/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/test/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/test/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt - fi - # 绑核,不需要的绑核的模型删除,需要的模型审视修改 - #let a=RANK_ID*12 - #let b=RANK_ID+1 - #let c=b*12-1 - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - nohup python3 main.py \ - --video_path ${data_path}/hmdb51_jpg \ - --annotation_path ${data_path}/hmdb51_json/hmdb51_1.json \ - --result_path outputs \ - --dataset hmdb51 \ - --n_classes 51 \ - --n_pretrain_classes 700 \ - --pretrain_path ${data_path}/r3d18_K_200ep.pth \ - --ft_begin_module fc \ - --model resnet \ - --model_depth 18 \ - --batch_size 128 \ - --n_threads 16 \ - --checkpoint 5 \ - --amp_cfg \ - --opt_level O2 \ - --loss_scale_value 1024 \ - --device_list ${ASCEND_DEVICE_ID} \ - --n_epochs 1 \ - --manual_seed 1234 \ - --learning_rate 0.01 \ - --tensorboard > ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -done -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep Fps $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v "\[1/" | grep -v "\[2/" | awk -F "Fps" '{print$2}' | awk '{print$1}' | awk '{sum+=$1} END {print"",sum/NR}'` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep "Fps" $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|tail -2|head -1|awk '{print $17}'|sed 's/[()]//g' ` -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`echo "${batch_size} ${FPS}" | awk '{printf("%.2f",$1*1000/$2)}' ` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "Fps" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "Loss" '{print$2}' | awk '{print$1}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd`/../ + +#集合通信参数,不需要修改 +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Faster_Mask_RCNN_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=128 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.01 + +#TF2.X独有,需要模型审视修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +#precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/test/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/test/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/test/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/test/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/test/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt + fi + # 绑核,不需要的绑核的模型删除,需要的模型审视修改 + #let a=RANK_ID*12 + #let b=RANK_ID+1 + #let c=b*12-1 + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup python3 main.py \ + --video_path ${data_path}/hmdb51_jpg \ + --annotation_path ${data_path}/hmdb51_json/hmdb51_1.json \ + --result_path outputs \ + --dataset hmdb51 \ + --n_classes 51 \ + --n_pretrain_classes 700 \ + --pretrain_path ${data_path}/r3d18_K_200ep.pth \ + --ft_begin_module fc \ + --model resnet \ + --model_depth 18 \ + --batch_size 128 \ + --n_threads 16 \ + --checkpoint 5 \ + --amp_cfg \ + --opt_level O2 \ + --loss_scale_value 1024 \ + --device_list ${ASCEND_DEVICE_ID} \ + --n_epochs 1 \ + --manual_seed 1234 \ + --learning_rate 0.01 \ + --tensorboard > ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep Fps $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v "\[1/" | grep -v "\[2/" | awk -F "Fps" '{print$2}' | awk '{print$1}' | awk '{sum+=$1} END {print"",sum/NR}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep "Fps" $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|tail -2|head -1|awk '{print $17}'|sed 's/[()]//g' ` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`echo "${batch_size} ${FPS}" | awk '{printf("%.2f",$1*1000/$2)}' ` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "Fps" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "Loss" '{print$2}' | awk '{print$1}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_performance_1p.sh index 807a85a8e11d8b84959ad0eb945dfef648f06e09..aee654686f5e4ab57a00f9eed1808da11ee93285 100644 --- a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_performance_1p.sh @@ -1,182 +1,182 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path="" -test_path_dir="" - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="Faster_Mask_RCNN_ID0101_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=8 -#训练step -train_steps=100 -#学习率 -learning_rate=0.01 - -#TF2.X独有,需要模型审视修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_performance_1P.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/test/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/test/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/test/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --test_path_dir* ]];then - test_path_dir=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt -fi - - -#修改参数 -sed -i "s|\"coco_2017_train\": (\"coco/train2017\", \"coco/annotations/instances_train2017.json\")|\"coco_2017_train\": (\"$data_path/coco/train2017\", \"$data_path/coco/annotations/instances_train2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py -sed -i "s|\"coco_2017_val\": (\"coco/val2017\", \"coco/annotations/instances_val2017.json\")|\"coco_2017_val\": (\"$data_path/coco/val2017\", \"$data_path/coco/annotations/instances_val2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py -sed -i "s|WEIGHTS: \"detectron2://ImageNetPretrained/MSRA/R-101.pkl\"|WEIGHTS: \"$data_path/R-101.pkl\"|g" $cur_path/../configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml -wait - -check_etp_flag=`env | grep etp_running_flag` -etp_flag=`echo ${check_etp_flag#*=}` -if [ x"${etp_flag}" == x"true" ];then - sed -i "s|ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results))|#ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results))|g" $cur_path/../detectron2/engine/defaults.py -else - source ${test_path_dir}/env_npu.sh -fi - -cd $cur_path/../ - -python3 setup.py build develop > $cur_path/../log.txt - -#训练开始时间,不需要修改 -start_time=$(date +%s) -nohup python3 tools/train_net.py \ - --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml \ - AMP 1 \ - OPT_LEVEL O2 \ - LOSS_SCALE_VALUE 64 \ - MODEL.DEVICE npu:$ASCEND_DEVICE_ID \ - SOLVER.IMS_PER_BATCH $batch_size \ - SOLVER.MAX_ITER $train_steps \ - SEED 1234 \ - MODEL.RPN.NMS_THRESH 0.8 \ - MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO 2 \ - MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 2 \ - DATALOADER.NUM_WORKERS 4 \ - SOLVER.BASE_LR 0.0025 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -#Time=`grep total_loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F 'time: ' '{print $2}'|awk '{print $1}'|awk 'NR>1'|awk '{sum+=$1} END {print sum/NR}'|sed s/[[:space:]]//g` -#FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${Time}'}'` -FPS=`grep FPS $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=None -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据 -#吞吐量,不需要修改 -ActualFPS=${FPS} -#单迭代训练时长,不需要修改 -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep total_loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'total_loss: ' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" +test_path_dir="" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Faster_Mask_RCNN_ID0101_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=8 +#训练step +train_steps=100 +#学习率 +learning_rate=0.01 + +#TF2.X独有,需要模型审视修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +#precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/test/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/test/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/test/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --test_path_dir* ]];then + test_path_dir=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt +fi + + +#修改参数 +sed -i "s|\"coco_2017_train\": (\"coco/train2017\", \"coco/annotations/instances_train2017.json\")|\"coco_2017_train\": (\"$data_path/coco/train2017\", \"$data_path/coco/annotations/instances_train2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py +sed -i "s|\"coco_2017_val\": (\"coco/val2017\", \"coco/annotations/instances_val2017.json\")|\"coco_2017_val\": (\"$data_path/coco/val2017\", \"$data_path/coco/annotations/instances_val2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py +sed -i "s|WEIGHTS: \"detectron2://ImageNetPretrained/MSRA/R-101.pkl\"|WEIGHTS: \"$data_path/R-101.pkl\"|g" $cur_path/../configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml +wait + +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" == x"true" ];then + sed -i "s|ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results))|#ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results))|g" $cur_path/../detectron2/engine/defaults.py +else + source ${test_path_dir}/env_npu.sh +fi + +cd $cur_path/../ + +python3 setup.py build develop > $cur_path/../log.txt + +#训练开始时间,不需要修改 +start_time=$(date +%s) +nohup python3 tools/train_net.py \ + --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml \ + AMP 1 \ + OPT_LEVEL O2 \ + LOSS_SCALE_VALUE 64 \ + MODEL.DEVICE npu:$ASCEND_DEVICE_ID \ + SOLVER.IMS_PER_BATCH $batch_size \ + SOLVER.MAX_ITER $train_steps \ + SEED 1234 \ + MODEL.RPN.NMS_THRESH 0.8 \ + MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO 2 \ + MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 2 \ + DATALOADER.NUM_WORKERS 4 \ + SOLVER.BASE_LR 0.0025 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +#Time=`grep total_loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F 'time: ' '{print $2}'|awk '{print $1}'|awk 'NR>1'|awk '{sum+=$1} END {print sum/NR}'|sed s/[[:space:]]//g` +#FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${Time}'}'` +FPS=`grep FPS $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=None +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep total_loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'total_loss: ' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_performance_8p.sh index 448e69a2f228fc11bc442879c1dab29c3ef348b9..162088573c6b5f03f12560bd2c74ddb6a7b5fba8 100644 --- a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/test/train_performance_8p.sh @@ -1,170 +1,170 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="Faster_Mask_RCNN_ID0101_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=64 -#训练step -train_steps=100 -#学习率 -learning_rate=0.01 - -#TF2.X独有,需要模型审视修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_performance_1P.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/test/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/test/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/test/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt -fi - - -#修改参数 -sed -i "s|\"coco_2017_train\": (\"coco/train2017\", \"coco/annotations/instances_train2017.json\")|\"coco_2017_train\": (\"$data_path/coco/train2017\", \"$data_path/coco/annotations/instances_train2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py -sed -i "s|\"coco_2017_val\": (\"coco/val2017\", \"coco/annotations/instances_val2017.json\")|\"coco_2017_val\": (\"$data_path/coco/val2017\", \"$data_path/coco/annotations/instances_val2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py -sed -i "s|WEIGHTS: \"detectron2://ImageNetPretrained/MSRA/R-101.pkl\"|WEIGHTS: \"$data_path/R-101.pkl\"|g" $cur_path/../configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml -wait - -cd $cur_path/../ - -python3 setup.py build develop > $cur_path/../log.txt - -#训练开始时间,不需要修改 -start_time=$(date +%s) -nohup python3 tools/train_net.py \ - --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml \ - --device-ids 0 1 2 3 4 5 6 7 \ - --num-gpus 8 \ - AMP 1 \ - OPT_LEVEL O2 \ - LOSS_SCALE_VALUE 64 \ - SOLVER.IMS_PER_BATCH $batch_size \ - SOLVER.MAX_ITER $train_steps \ - SEED 1234 \ - MODEL.RPN.NMS_THRESH 0.8 \ - MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO 2 \ - MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 2 \ - DATALOADER.NUM_WORKERS 8 \ - SOLVER.BASE_LR 0.02 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -##Time=`grep total_loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F 'time: ' '{print $2}'|awk '{print $1}'|awk 'NR>1'|awk '{sum+=$1} END {print sum/NR}'|sed s/[[:space:]]//g` -#FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${Time}'}'` -FPS=`grep FPS $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=None -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据 -#吞吐量,不需要修改 -ActualFPS=${FPS} -#单迭代训练时长,不需要修改 -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep total_loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'total_loss: ' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Faster_Mask_RCNN_ID0101_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=64 +#训练step +train_steps=100 +#学习率 +learning_rate=0.01 + +#TF2.X独有,需要模型审视修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +#precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/test/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/test/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/test/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt +fi + + +#修改参数 +sed -i "s|\"coco_2017_train\": (\"coco/train2017\", \"coco/annotations/instances_train2017.json\")|\"coco_2017_train\": (\"$data_path/coco/train2017\", \"$data_path/coco/annotations/instances_train2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py +sed -i "s|\"coco_2017_val\": (\"coco/val2017\", \"coco/annotations/instances_val2017.json\")|\"coco_2017_val\": (\"$data_path/coco/val2017\", \"$data_path/coco/annotations/instances_val2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py +sed -i "s|WEIGHTS: \"detectron2://ImageNetPretrained/MSRA/R-101.pkl\"|WEIGHTS: \"$data_path/R-101.pkl\"|g" $cur_path/../configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml +wait + +cd $cur_path/../ + +python3 setup.py build develop > $cur_path/../log.txt + +#训练开始时间,不需要修改 +start_time=$(date +%s) +nohup python3 tools/train_net.py \ + --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml \ + --device-ids 0 1 2 3 4 5 6 7 \ + --num-gpus 8 \ + AMP 1 \ + OPT_LEVEL O2 \ + LOSS_SCALE_VALUE 64 \ + SOLVER.IMS_PER_BATCH $batch_size \ + SOLVER.MAX_ITER $train_steps \ + SEED 1234 \ + MODEL.RPN.NMS_THRESH 0.8 \ + MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO 2 \ + MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 2 \ + DATALOADER.NUM_WORKERS 8 \ + SOLVER.BASE_LR 0.02 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +##Time=`grep total_loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F 'time: ' '{print $2}'|awk '{print $1}'|awk 'NR>1'|awk '{sum+=$1} END {print sum/NR}'|sed s/[[:space:]]//g` +#FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${Time}'}'` +FPS=`grep FPS $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=None +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep total_loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'total_loss: ' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/data/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/data/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/data/test_coco.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/data/test_coco.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/data/test_coco_evaluation.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/data/test_coco_evaluation.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/data/test_detection_utils.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/data/test_detection_utils.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/data/test_rotation_transform.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/data/test_rotation_transform.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/data/test_sampler.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/data/test_sampler.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/data/test_transforms.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/data/test_transforms.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/layers/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/layers/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/layers/test_mask_ops.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/layers/test_mask_ops.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/layers/test_nms.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/layers/test_nms.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/layers/test_nms_rotated.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/layers/test_nms_rotated.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/layers/test_roi_align.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/layers/test_roi_align.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/layers/test_roi_align_rotated.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/layers/test_roi_align_rotated.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_anchor_generator.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_anchor_generator.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_box2box_transform.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_box2box_transform.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_fast_rcnn.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_fast_rcnn.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_matcher.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_matcher.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_model_e2e.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_model_e2e.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_roi_heads.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_roi_heads.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_roi_pooler.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_roi_pooler.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_rpn.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/modeling/test_rpn.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/structures/__init__.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/structures/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/structures/test_boxes.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/structures/test_boxes.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/structures/test_imagelist.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/structures/test_imagelist.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/structures/test_instances.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/structures/test_instances.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/structures/test_rotated_boxes.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/structures/test_rotated_boxes.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/test_checkpoint.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/test_checkpoint.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/test_config.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/test_config.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/test_engine.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/test_engine.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/test_export_caffe2.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/test_export_caffe2.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/test_model_analysis.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/test_model_analysis.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/test_model_zoo.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/test_model_zoo.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/test_visualizer.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tests/test_visualizer.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/README.md b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/analyze_model.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/analyze_model.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/benchmark.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/benchmark.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/convert-torchvision-to-d2.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/convert-torchvision-to-d2.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/plain_train_net.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/plain_train_net.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/train_net.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/train_net.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/visualize_data.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/visualize_data.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/visualize_json_results.py b/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch/tools/visualize_json_results.py old mode 100755 new mode 100644 diff --git a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/LICENSE b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/LICENSE index 4ba4fdcab3dbdb4d64ce4cccdfd990698b4d596a..a0e03103591c1158a839681f3c404ee9118b182e 100644 --- a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/LICENSE +++ b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/LICENSE @@ -1,29 +1,29 @@ -BSD 3-Clause License - -Copyright (c) 2017, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +BSD 3-Clause License + +Copyright (c) 2017, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/README.md b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/README.md index 39f21344a24d52f49a230714ce7eb3957e709312..9325120e108d5c505c53cf83d4b52c00c4db918f 100644 --- a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/README.md +++ b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/README.md @@ -1,61 +1,61 @@ -# Shape Robust Text Detection with Progressive Scale Expansion Network - -## Requirements -* NPU配套的run包安装(C20B030) -* Python 3.7.5 -* PyTorch(NPU版本) -* apex(NPU版本) -* pyclipper -* Polygon3 -* opencv-python 3.4 - -## 1P -1. 编辑 train_1p.sh device-list(NPU设备号) addr(本机ip地址) data-dir(数据集目录) remark(备注信息) -2. 运行 sh train_1p.sh -``` -python3 -W ignore train_8p_anycard.py \ - --lr 0.001\ - --dist-backend 'hccl' \ - --rank 0 \ - --workers 32 \ - --multiprocessing-distributed \ - --world-size 1 \ - --batch_size 16 \ - --device 'npu' \ - --opt-level 'O2' \ - --loss-scale 64 \ - --addr='XX.XXX.XXX.XXX' \ #修改本机ip地址 - --seed 16 \ - --n_epoch 600 \ - --data-dir '/home/w50015720/npu/PSENet_data' \ #修改数据集目录 - --port 8272 \ - --schedule 200 400 \ - --device-list '0' \ # 修改NPU设备号 - --remark 'test' # 修改备注信息 -``` -## 8P -1. 编辑 train_8p.sh device-list(NPU设备号) addr(本机ip地址) data-dir(数据集目录) remark(备注信息) -2. 运行 sh train_8p.sh - -``` -python3 -W ignore train_8p_anycard.py \ - --lr 0.008\ - --dist-backend 'hccl' \ - --rank 0 \ - --workers 32 \ - --multiprocessing-distributed \ - --world-size 1 \ - --batch_size 32 \ - --device 'npu' \ - --opt-level 'O2' \ - --loss-scale 64 \ - --addr='XX.XXX.XXX.XXX' \ #修改本机ip地址 - --seed 16 \ - --n_epoch 600 \ - --data-dir '/home/data/' \ #修改数据集目录 - --port 8271 \ - --schedule 200 400 \ - --device-list '0,1,2,3,4,5,6,7' \ # 修改NPU设备号 8卡 - --remark 'npu8pbatch32lr8' # 修改备注信息 -``` - +# Shape Robust Text Detection with Progressive Scale Expansion Network + +## Requirements +* NPU配套的run包安装(C20B030) +* Python 3.7.5 +* PyTorch(NPU版本) +* apex(NPU版本) +* pyclipper +* Polygon3 +* opencv-python 3.4 + +## 1P +1. 编辑 train_1p.sh device-list(NPU设备号) addr(本机ip地址) data-dir(数据集目录) remark(备注信息) +2. 运行 sh train_1p.sh +``` +python3 -W ignore train_8p_anycard.py \ + --lr 0.001\ + --dist-backend 'hccl' \ + --rank 0 \ + --workers 32 \ + --multiprocessing-distributed \ + --world-size 1 \ + --batch_size 16 \ + --device 'npu' \ + --opt-level 'O2' \ + --loss-scale 64 \ + --addr='XX.XXX.XXX.XXX' \ #修改本机ip地址 + --seed 16 \ + --n_epoch 600 \ + --data-dir '/home/w50015720/npu/PSENet_data' \ #修改数据集目录 + --port 8272 \ + --schedule 200 400 \ + --device-list '0' \ # 修改NPU设备号 + --remark 'test' # 修改备注信息 +``` +## 8P +1. 编辑 train_8p.sh device-list(NPU设备号) addr(本机ip地址) data-dir(数据集目录) remark(备注信息) +2. 运行 sh train_8p.sh + +``` +python3 -W ignore train_8p_anycard.py \ + --lr 0.008\ + --dist-backend 'hccl' \ + --rank 0 \ + --workers 32 \ + --multiprocessing-distributed \ + --world-size 1 \ + --batch_size 32 \ + --device 'npu' \ + --opt-level 'O2' \ + --loss-scale 64 \ + --addr='XX.XXX.XXX.XXX' \ #修改本机ip地址 + --seed 16 \ + --n_epoch 600 \ + --data-dir '/home/data/' \ #修改数据集目录 + --port 8271 \ + --schedule 200 400 \ + --device-list '0,1,2,3,4,5,6,7' \ # 修改NPU设备号 8卡 + --remark 'npu8pbatch32lr8' # 修改备注信息 +``` + diff --git a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/requirements.txt b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/requirements.txt index 3d66988238ab648957592fa0d284db227f4aa259..fef5b967029953c1fd2e1358900e017d295394c4 100644 --- a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/requirements.txt +++ b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/requirements.txt @@ -1,3 +1,3 @@ -pyclipper -Polygon3 +pyclipper +Polygon3 opencv-python \ No newline at end of file diff --git a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/train_ic15.py b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/train_ic15.py index 05dfb7a1ac3bcf628f80a8d66ab85442c28d47ce..1255f04c8ed5a27a4a6494383aae199336dd15f8 100644 --- a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/train_ic15.py +++ b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/train_ic15.py @@ -1,449 +1,449 @@ -# Copyright [yyyy] [name of copyright owner] -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import os -import random -import sys -import time -import warnings - -import numpy as np -import torch -import torch.backends.cudnn as cudnn -import torch.distributed as dist -import torch.multiprocessing as mp -import torch.nn.parallel -import torch.npu -import torch.utils.data.distributed -from apex import amp - -import apex -import models -from data_loader import IC15Loader -from metrics import runningScore -from multi_epochs_dataloader import MultiEpochsDataLoader -from util import AverageMeter - - - -def ohem_single(score, gt_text, training_mask): - pos_num = (int)(np.sum(gt_text > 0.5)) - (int)(np.sum((gt_text > 0.5) & (training_mask <= 0.5))) - - if pos_num == 0: - selected_mask = training_mask - selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') - return selected_mask - - neg_num = (int)(np.sum(gt_text <= 0.5)) - neg_num = (int)(min(pos_num * 3, neg_num)) - - if neg_num == 0: - selected_mask = training_mask - selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') - return selected_mask - - neg_score = score[gt_text <= 0.5] - neg_score_sorted = np.sort(-neg_score) - threshold = -neg_score_sorted[neg_num - 1] - - selected_mask = ((score >= threshold) | (gt_text > 0.5)) & (training_mask > 0.5) - selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') - return selected_mask - - -def ohem_batch(scores, gt_texts, training_masks): - scores = scores.data.cpu().numpy() - gt_texts = gt_texts.data.cpu().numpy() - training_masks = training_masks.data.cpu().numpy() - selected_masks = [] - for i in range(scores.shape[0]): - selected_masks.append(ohem_single(scores[i, :, :], gt_texts[i, :, :], training_masks[i, :, :])) - selected_masks = np.concatenate(selected_masks, 0) - selected_masks = torch.from_numpy(selected_masks).float() - return selected_masks - - -def dice_loss(input, target, mask): - input = torch.sigmoid(input) - - input = input.reshape(input.size()[0], -1) - target = target.reshape(target.size()[0], -1) - mask = mask.reshape(mask.size()[0], -1) - - input = input * mask - target = target * mask - - a = torch.sum(input * target, 1) - b = torch.sum(input * input, 1) + 0.001 - c = torch.sum(target * target, 1) + 0.001 - d = (2 * a) / (b + c) - dice_loss = torch.mean(d) - return 1 - dice_loss - - -def cal_text_score(texts, gt_texts, training_masks, running_metric_text): - training_masks = training_masks.data.cpu().numpy() - pred_text = torch.sigmoid(texts).data.cpu().numpy() * training_masks - - pred_text[pred_text <= 0.5] = 0 - pred_text[pred_text > 0.5] = 1 - pred_text = pred_text.astype(np.int32) - gt_text = gt_texts.data.cpu().numpy() * training_masks - gt_text = gt_text.astype(np.int32) - running_metric_text.update(gt_text, pred_text) - score_text, _ = running_metric_text.get_scores() - return score_text - - -def cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel): - mask = (gt_texts * training_masks).data.cpu().numpy() - kernel = kernels[:, -1, :, :] - gt_kernel = gt_kernels[:, -1, :, :] - pred_kernel = torch.sigmoid(kernel).data.cpu().numpy() - pred_kernel[pred_kernel <= 0.5] = 0 - pred_kernel[pred_kernel > 0.5] = 1 - pred_kernel = (pred_kernel * mask).astype(np.int32) - gt_kernel = gt_kernel.data.cpu().numpy() - gt_kernel = (gt_kernel * mask).astype(np.int32) - running_metric_kernel.update(gt_kernel, pred_kernel) - score_kernel, _ = running_metric_kernel.get_scores() - return score_kernel - - -def train(train_loader, model, criterion, optimizer, epoch, args, npu_per_node): - model.train() - - losses = AverageMeter() - running_metric_text = runningScore(2) - running_metric_kernel = runningScore(2) - - epoch_time = time.time() - batch_time = time.time() - for batch_idx, (imgs, gt_texts, gt_kernels, training_masks) in enumerate(train_loader): - loc = 'npu:{}'.format(args.npu) - imgs = imgs.to(loc, non_blocking=True) - gt_texts = gt_texts.to(loc, non_blocking=True) - gt_kernels = gt_kernels.to(loc, non_blocking=True) - training_masks = training_masks.to(loc, non_blocking=True) - - outputs = model(imgs) - texts = torch.index_select(outputs, 1, torch.tensor([0]).to(loc)).squeeze() - kernels = torch.index_select(outputs, 1, torch.tensor([1, 2, 3, 4, 5, 6]).to(loc)) - - selected_masks = ohem_batch(texts, gt_texts, training_masks) - selected_masks = selected_masks.to(loc, non_blocking=True) - - loss_text = criterion(texts, gt_texts, selected_masks) - - loss_kernels = [] - mask0 = torch.sigmoid(texts).data.cpu().numpy() - mask1 = training_masks.data.cpu().numpy() - selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32') - selected_masks = torch.from_numpy(selected_masks).float() - selected_masks = selected_masks.to(loc, non_blocking=True) - for i in range(6): - kernel_i = torch.index_select(kernels, 1, torch.tensor([i]).to(loc)).squeeze() - gt_kernel_i = torch.index_select(gt_kernels, 1, torch.tensor([i]).to(loc)).squeeze() - loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks) - loss_kernels.append(loss_kernel_i) - loss_kernel = sum(loss_kernels) / len(loss_kernels) - - loss = 0.7 * loss_text + 0.3 * loss_kernel - losses.update(loss.item(), imgs.size(0)) - - optimizer.zero_grad() - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - optimizer.step() - score_text = cal_text_score(texts, gt_texts, training_masks, running_metric_text) - score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel) - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % npu_per_node == 0): - batch_time = time.time() - batch_time - output_log = '(epoch: {epoch:0>3d} {batch:0>2d}/{size}) | FPS: {fps:5.3f} | Loss : {lossv:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format( - epoch=epoch + 1, - batch=batch_idx + 1, - size=len(train_loader), - fps=npu_per_node * args.batch_size / batch_time, - lossv=losses.val, - acc=score_text['Mean Acc'], - iou_t=score_text['Mean IoU'], - iou_k=score_kernel['Mean IoU']) - batch_time = time.time() - print(output_log) - epoch_time = time.time() - epoch_time - - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % npu_per_node == 0): - output_log = '{epoch:0>3d}/{n_epoch} | LR: {lr:.5f} | FPS: {fps:5.3f} | batch: {batch:.5f}s | Loss: {lossa:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format( - epoch=epoch + 1, - n_epoch=args.n_epoch, - lr=optimizer.param_groups[0]['lr'], - fps=npu_per_node * len(train_loader) * args.batch_size / epoch_time, - batch=epoch_time / len(train_loader), - lossa=losses.avg, - acc=score_text['Mean Acc'], - iou_t=score_text['Mean IoU'], - iou_k=score_kernel['Mean IoU']) - print(output_log) - sys.stdout.flush() - - return ( - losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'], score_text['Mean IoU'], score_kernel['Mean IoU']) - - -def adjust_learning_rate(args, optimizer, epoch): - global state - if epoch in args.schedule: - args.lr = args.lr * 0.1 - for param_group in optimizer.param_groups: - param_group['lr'] = args.lr - - -def save_checkpoint(state, checkpoint='checkpoint', filename='checkpoint.pth.tar'): - if not os.path.isdir(checkpoint): - os.makedirs(checkpoint) - filepath = os.path.join(checkpoint, filename) - torch.save(state, filepath) - - -def main(npu, npu_per_node, args): - args.npu = args.process_device_map[npu] - print("[npu id:", args.npu, "]", "+++++++++++++++++++++++++++ before set KERNEL_NAME_ID:", - os.environ['KERNEL_NAME_ID']) - os.environ['KERNEL_NAME_ID'] = str(npu) - print("[npu id:", args.npu, "]", "+++++++++++++++++++++++++++KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID']) - - if args.npu is not None: - print("[npu id:", args.npu, "]", "Use NPU: {} for training".format(args.npu)) - - if args.checkpoint == '': - args.checkpoint = "checkpoints/ic15_%s_bs_%d_ep_%d" % (args.arch, args.batch_size, args.n_epoch) - if args.pretrain: - if 'synth' in args.pretrain: - args.checkpoint += "_pretrain_synth" - else: - args.checkpoint += "_pretrain_ic17" - if args.distributed: - if args.multiprocessing_distributed: - args.rank = args.rank * npu_per_node + npu - if args.device == 'npu': - dist.init_process_group(backend=args.dist_backend, - world_size=args.world_size, rank=args.rank) - loc = 'npu:{}'.format(args.npu) - torch.npu.set_device(loc) - args.batch_size = int(args.batch_size / npu_per_node) - args.workers = int((args.workers + npu_per_node - 1) / npu_per_node) - - print("[npu id:", args.npu, "]", "===============main_worker()=================") - print("[npu id:", args.npu, "]", args) - print("[npu id:", args.npu, "]", "===============main_worker()=================") - - print('checkpoint path: %s' % args.checkpoint) - print('init lr: %.8f' % args.lr) - print('schedule: ', args.schedule) - sys.stdout.flush() - - if not os.path.isdir(args.checkpoint): - os.makedirs(args.checkpoint) - - kernel_num = 7 - min_scale = 0.4 - start_epoch = 0 - - my_data = IC15Loader(args=args, - is_transform=True, - img_size=args.img_size, - kernel_num=kernel_num, - min_scale=min_scale) - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler(my_data) - else: - train_sampler = None - - train_loader = MultiEpochsDataLoader( - my_data, - batch_size=args.batch_size, - shuffle=(train_sampler is None), - num_workers=args.workers, - drop_last=True, - pin_memory=True, - sampler=train_sampler) - - print("[npu id:", args.npu, "]", "=> creating model '{}'".format(args.arch)) - if args.arch == "resnet50": - model = models.resnet50(pretrained=True, num_classes=kernel_num) - elif args.arch == "resnet101": - model = models.resnet101(pretrained=True, num_classes=kernel_num) - elif args.arch == "resnet152": - model = models.resnet152(pretrained=True, num_classes=kernel_num) - - model = model.to(loc) - - if args.combine_sgd: - optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4) - else: - optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4) - - model, optimizer = amp.initialize(model, optimizer, - opt_level=args.opt_level, - keep_batchnorm_fp32=args.keep_batchnorm_fp32, - loss_scale=args.loss_scale, - combine_grad=args.combine_grad) - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.npu], broadcast_buffers=False) - - if args.pretrain: - print('Using pretrained model.') - assert os.path.isfile(args.pretrain), 'Error: no checkpoint directory found!' - checkpoint = torch.load(args.pretrain) - model.load_state_dict(checkpoint['state_dict']) - elif args.resume: - print('Resuming from checkpoint.') - assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' - checkpoint = torch.load(args.resume) - start_epoch = checkpoint['epoch'] - model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - amp.load_state_dict(checkpoint['amp']) - else: - print('Training from scratch.') - cudnn.benchmark = True - - best_model = {'loss': 0, 'acc': 0, 'iou': 0} - - for epoch in range(start_epoch, args.n_epoch): - if args.distributed: - train_sampler.set_epoch(epoch) - adjust_learning_rate(args, optimizer, epoch) - - train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(train_loader, model, dice_loss, - optimizer, epoch, - args, npu_per_node) - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % npu_per_node == 0): - if epoch > args.n_epoch - 6: - best_path = f'{args.remark}_{train_loss:.4f}_{train_te_acc:.4f}_{train_ke_iou:.4f}_{train_te_iou:.4f}_{epoch}.pth.tar' - save_checkpoint({ - 'epoch': epoch + 1, - 'state_dict': model.state_dict(), - 'lr': args.lr, - 'optimizer': optimizer.state_dict(), - 'amp': amp.state_dict(), - }, checkpoint='best', filename=best_path) - best_model['acc'] = train_te_acc - best_model['iou'] = train_te_iou - - -def device_id_to_process_device_map(device_list): - devices = device_list.split(",") - devices = [int(x) for x in devices] - devices.sort() - - process_device_map = dict() - for process_id, device_id in enumerate(devices): - process_device_map[process_id] = device_id - - return process_device_map - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Hyperparams') - parser.add_argument('--data-dir', nargs='?', type=str, default='PSENet_data', - help='point to the root data path of ICDAR') - parser.add_argument('--train_data', nargs='?', type=str, default='ICDAR2015', - help='indicate which dataset was used, ICDAR2015 or ICDAR2017') - parser.add_argument('--arch', nargs='?', type=str, default='resnet50') - parser.add_argument('--img_size', nargs='?', type=int, default=640, - help='Height of the input image') - parser.add_argument('--n_epoch', nargs='?', type=int, default=600, - help='# of the epochs') - parser.add_argument('--schedule', type=int, nargs='+', default=[200, 400], - help='Decrease learning rate at these epochs.') - parser.add_argument('--batch_size', nargs='?', type=int, default=16, - help='Batch Size') - parser.add_argument('--lr', nargs='?', type=float, default=1e-3, - help='Learning Rate') - parser.add_argument('--resume', nargs='?', type=str, default=None, - help='Path to previous saved model to restart from') - parser.add_argument('--pretrain', nargs='?', type=str, default=None, - help='Path to previous saved model to restart from') - parser.add_argument('--checkpoint', default='', type=str, metavar='PATH', - help='path to save checkpoint (default: checkpoint)') - parser.add_argument('--opt-level', type=str) - parser.add_argument('--keep-batchnorm-fp32', type=str, default=None) - parser.add_argument('--loss-scale', type=str, default=64) - parser.add_argument('--world-size', default=-1, type=int, - help='number of nodes for distributed training') - parser.add_argument('--seed', default=None, type=int, - help='seed for initializing training. ') - parser.add_argument('--device-list', default='0,1,2,3,4,5,6,7', type=str, help='device id list') - parser.add_argument('--multiprocessing-distributed', action='store_true', - help='Use multi-processing distributed training to launch ' - 'N processes per node, which has N NPUs. This is the ' - 'fastest way to use PyTorch for either single node or ' - 'multi node data parallel training') - parser.add_argument('--device', default='npu', type=str, - help='npu or gpu') - parser.add_argument('--dist-backend', default='nccl', type=str, - help='distributed backend') - parser.add_argument('--rank', default=-1, type=int, - help='node rank for distributed training') - parser.add_argument('--addr', default='10.136.181.127', type=str, - help='master addr') - parser.add_argument('--dist-url', default='env://', type=str, - help='url used to set up distributed training') - parser.add_argument('--port', default='8888', type=str) - parser.add_argument('-j', '--workers', default=32, type=int, metavar='N', - help='number of data loading workers (default: 4)') - parser.add_argument('--remark', default='', type=str, - help='remark. ') - parser.add_argument('--combine_grad', action='store_true', - help='whether to combine grad in apex') - parser.add_argument('--combine_sgd', action='store_true', - help='whether to use combined sgd instead of sgd') - - args = parser.parse_args() - - - if args.seed is not None: - random.seed(args.seed) - os.environ['PYTHONHASHSEED'] = str(args.seed) - np.random.seed(args.seed) - torch.manual_seed(args.seed) - torch.cuda.manual_seed(args.seed) - torch.cuda.manual_seed_all(args.seed) - cudnn.deterministic = True - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False - - - args.distributed = args.world_size > 1 or args.multiprocessing_distributed - args.process_device_map = device_id_to_process_device_map(args.device_list) - - if args.device == 'npu': - npu_per_node = len(args.process_device_map) - else: - npu_per_node = torch.cuda.device_count() - - os.environ['MASTER_ADDR'] = args.addr - os.environ['MASTER_PORT'] = args.port - os.environ['KERNEL_NAME_ID'] = str(0) - print("+++++++++++++++++++++KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID']) - - args.world_size = npu_per_node * args.world_size - mp.spawn(main, nprocs=npu_per_node, args=(npu_per_node, args)) - +# Copyright [yyyy] [name of copyright owner] +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import os +import random +import sys +import time +import warnings + +import numpy as np +import torch +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.multiprocessing as mp +import torch.nn.parallel +import torch.npu +import torch.utils.data.distributed +from apex import amp + +import apex +import models +from data_loader import IC15Loader +from metrics import runningScore +from multi_epochs_dataloader import MultiEpochsDataLoader +from util import AverageMeter + + + +def ohem_single(score, gt_text, training_mask): + pos_num = (int)(np.sum(gt_text > 0.5)) - (int)(np.sum((gt_text > 0.5) & (training_mask <= 0.5))) + + if pos_num == 0: + selected_mask = training_mask + selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') + return selected_mask + + neg_num = (int)(np.sum(gt_text <= 0.5)) + neg_num = (int)(min(pos_num * 3, neg_num)) + + if neg_num == 0: + selected_mask = training_mask + selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') + return selected_mask + + neg_score = score[gt_text <= 0.5] + neg_score_sorted = np.sort(-neg_score) + threshold = -neg_score_sorted[neg_num - 1] + + selected_mask = ((score >= threshold) | (gt_text > 0.5)) & (training_mask > 0.5) + selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') + return selected_mask + + +def ohem_batch(scores, gt_texts, training_masks): + scores = scores.data.cpu().numpy() + gt_texts = gt_texts.data.cpu().numpy() + training_masks = training_masks.data.cpu().numpy() + selected_masks = [] + for i in range(scores.shape[0]): + selected_masks.append(ohem_single(scores[i, :, :], gt_texts[i, :, :], training_masks[i, :, :])) + selected_masks = np.concatenate(selected_masks, 0) + selected_masks = torch.from_numpy(selected_masks).float() + return selected_masks + + +def dice_loss(input, target, mask): + input = torch.sigmoid(input) + + input = input.reshape(input.size()[0], -1) + target = target.reshape(target.size()[0], -1) + mask = mask.reshape(mask.size()[0], -1) + + input = input * mask + target = target * mask + + a = torch.sum(input * target, 1) + b = torch.sum(input * input, 1) + 0.001 + c = torch.sum(target * target, 1) + 0.001 + d = (2 * a) / (b + c) + dice_loss = torch.mean(d) + return 1 - dice_loss + + +def cal_text_score(texts, gt_texts, training_masks, running_metric_text): + training_masks = training_masks.data.cpu().numpy() + pred_text = torch.sigmoid(texts).data.cpu().numpy() * training_masks + + pred_text[pred_text <= 0.5] = 0 + pred_text[pred_text > 0.5] = 1 + pred_text = pred_text.astype(np.int32) + gt_text = gt_texts.data.cpu().numpy() * training_masks + gt_text = gt_text.astype(np.int32) + running_metric_text.update(gt_text, pred_text) + score_text, _ = running_metric_text.get_scores() + return score_text + + +def cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel): + mask = (gt_texts * training_masks).data.cpu().numpy() + kernel = kernels[:, -1, :, :] + gt_kernel = gt_kernels[:, -1, :, :] + pred_kernel = torch.sigmoid(kernel).data.cpu().numpy() + pred_kernel[pred_kernel <= 0.5] = 0 + pred_kernel[pred_kernel > 0.5] = 1 + pred_kernel = (pred_kernel * mask).astype(np.int32) + gt_kernel = gt_kernel.data.cpu().numpy() + gt_kernel = (gt_kernel * mask).astype(np.int32) + running_metric_kernel.update(gt_kernel, pred_kernel) + score_kernel, _ = running_metric_kernel.get_scores() + return score_kernel + + +def train(train_loader, model, criterion, optimizer, epoch, args, npu_per_node): + model.train() + + losses = AverageMeter() + running_metric_text = runningScore(2) + running_metric_kernel = runningScore(2) + + epoch_time = time.time() + batch_time = time.time() + for batch_idx, (imgs, gt_texts, gt_kernels, training_masks) in enumerate(train_loader): + loc = 'npu:{}'.format(args.npu) + imgs = imgs.to(loc, non_blocking=True) + gt_texts = gt_texts.to(loc, non_blocking=True) + gt_kernels = gt_kernels.to(loc, non_blocking=True) + training_masks = training_masks.to(loc, non_blocking=True) + + outputs = model(imgs) + texts = torch.index_select(outputs, 1, torch.tensor([0]).to(loc)).squeeze() + kernels = torch.index_select(outputs, 1, torch.tensor([1, 2, 3, 4, 5, 6]).to(loc)) + + selected_masks = ohem_batch(texts, gt_texts, training_masks) + selected_masks = selected_masks.to(loc, non_blocking=True) + + loss_text = criterion(texts, gt_texts, selected_masks) + + loss_kernels = [] + mask0 = torch.sigmoid(texts).data.cpu().numpy() + mask1 = training_masks.data.cpu().numpy() + selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32') + selected_masks = torch.from_numpy(selected_masks).float() + selected_masks = selected_masks.to(loc, non_blocking=True) + for i in range(6): + kernel_i = torch.index_select(kernels, 1, torch.tensor([i]).to(loc)).squeeze() + gt_kernel_i = torch.index_select(gt_kernels, 1, torch.tensor([i]).to(loc)).squeeze() + loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks) + loss_kernels.append(loss_kernel_i) + loss_kernel = sum(loss_kernels) / len(loss_kernels) + + loss = 0.7 * loss_text + 0.3 * loss_kernel + losses.update(loss.item(), imgs.size(0)) + + optimizer.zero_grad() + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + optimizer.step() + score_text = cal_text_score(texts, gt_texts, training_masks, running_metric_text) + score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel) + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % npu_per_node == 0): + batch_time = time.time() - batch_time + output_log = '(epoch: {epoch:0>3d} {batch:0>2d}/{size}) | FPS: {fps:5.3f} | Loss : {lossv:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format( + epoch=epoch + 1, + batch=batch_idx + 1, + size=len(train_loader), + fps=npu_per_node * args.batch_size / batch_time, + lossv=losses.val, + acc=score_text['Mean Acc'], + iou_t=score_text['Mean IoU'], + iou_k=score_kernel['Mean IoU']) + batch_time = time.time() + print(output_log) + epoch_time = time.time() - epoch_time + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % npu_per_node == 0): + output_log = '{epoch:0>3d}/{n_epoch} | LR: {lr:.5f} | FPS: {fps:5.3f} | batch: {batch:.5f}s | Loss: {lossa:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format( + epoch=epoch + 1, + n_epoch=args.n_epoch, + lr=optimizer.param_groups[0]['lr'], + fps=npu_per_node * len(train_loader) * args.batch_size / epoch_time, + batch=epoch_time / len(train_loader), + lossa=losses.avg, + acc=score_text['Mean Acc'], + iou_t=score_text['Mean IoU'], + iou_k=score_kernel['Mean IoU']) + print(output_log) + sys.stdout.flush() + + return ( + losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'], score_text['Mean IoU'], score_kernel['Mean IoU']) + + +def adjust_learning_rate(args, optimizer, epoch): + global state + if epoch in args.schedule: + args.lr = args.lr * 0.1 + for param_group in optimizer.param_groups: + param_group['lr'] = args.lr + + +def save_checkpoint(state, checkpoint='checkpoint', filename='checkpoint.pth.tar'): + if not os.path.isdir(checkpoint): + os.makedirs(checkpoint) + filepath = os.path.join(checkpoint, filename) + torch.save(state, filepath) + + +def main(npu, npu_per_node, args): + args.npu = args.process_device_map[npu] + print("[npu id:", args.npu, "]", "+++++++++++++++++++++++++++ before set KERNEL_NAME_ID:", + os.environ['KERNEL_NAME_ID']) + os.environ['KERNEL_NAME_ID'] = str(npu) + print("[npu id:", args.npu, "]", "+++++++++++++++++++++++++++KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID']) + + if args.npu is not None: + print("[npu id:", args.npu, "]", "Use NPU: {} for training".format(args.npu)) + + if args.checkpoint == '': + args.checkpoint = "checkpoints/ic15_%s_bs_%d_ep_%d" % (args.arch, args.batch_size, args.n_epoch) + if args.pretrain: + if 'synth' in args.pretrain: + args.checkpoint += "_pretrain_synth" + else: + args.checkpoint += "_pretrain_ic17" + if args.distributed: + if args.multiprocessing_distributed: + args.rank = args.rank * npu_per_node + npu + if args.device == 'npu': + dist.init_process_group(backend=args.dist_backend, + world_size=args.world_size, rank=args.rank) + loc = 'npu:{}'.format(args.npu) + torch.npu.set_device(loc) + args.batch_size = int(args.batch_size / npu_per_node) + args.workers = int((args.workers + npu_per_node - 1) / npu_per_node) + + print("[npu id:", args.npu, "]", "===============main_worker()=================") + print("[npu id:", args.npu, "]", args) + print("[npu id:", args.npu, "]", "===============main_worker()=================") + + print('checkpoint path: %s' % args.checkpoint) + print('init lr: %.8f' % args.lr) + print('schedule: ', args.schedule) + sys.stdout.flush() + + if not os.path.isdir(args.checkpoint): + os.makedirs(args.checkpoint) + + kernel_num = 7 + min_scale = 0.4 + start_epoch = 0 + + my_data = IC15Loader(args=args, + is_transform=True, + img_size=args.img_size, + kernel_num=kernel_num, + min_scale=min_scale) + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(my_data) + else: + train_sampler = None + + train_loader = MultiEpochsDataLoader( + my_data, + batch_size=args.batch_size, + shuffle=(train_sampler is None), + num_workers=args.workers, + drop_last=True, + pin_memory=True, + sampler=train_sampler) + + print("[npu id:", args.npu, "]", "=> creating model '{}'".format(args.arch)) + if args.arch == "resnet50": + model = models.resnet50(pretrained=True, num_classes=kernel_num) + elif args.arch == "resnet101": + model = models.resnet101(pretrained=True, num_classes=kernel_num) + elif args.arch == "resnet152": + model = models.resnet152(pretrained=True, num_classes=kernel_num) + + model = model.to(loc) + + if args.combine_sgd: + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4) + else: + optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4) + + model, optimizer = amp.initialize(model, optimizer, + opt_level=args.opt_level, + keep_batchnorm_fp32=args.keep_batchnorm_fp32, + loss_scale=args.loss_scale, + combine_grad=args.combine_grad) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.npu], broadcast_buffers=False) + + if args.pretrain: + print('Using pretrained model.') + assert os.path.isfile(args.pretrain), 'Error: no checkpoint directory found!' + checkpoint = torch.load(args.pretrain) + model.load_state_dict(checkpoint['state_dict']) + elif args.resume: + print('Resuming from checkpoint.') + assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' + checkpoint = torch.load(args.resume) + start_epoch = checkpoint['epoch'] + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + amp.load_state_dict(checkpoint['amp']) + else: + print('Training from scratch.') + cudnn.benchmark = True + + best_model = {'loss': 0, 'acc': 0, 'iou': 0} + + for epoch in range(start_epoch, args.n_epoch): + if args.distributed: + train_sampler.set_epoch(epoch) + adjust_learning_rate(args, optimizer, epoch) + + train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(train_loader, model, dice_loss, + optimizer, epoch, + args, npu_per_node) + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % npu_per_node == 0): + if epoch > args.n_epoch - 6: + best_path = f'{args.remark}_{train_loss:.4f}_{train_te_acc:.4f}_{train_ke_iou:.4f}_{train_te_iou:.4f}_{epoch}.pth.tar' + save_checkpoint({ + 'epoch': epoch + 1, + 'state_dict': model.state_dict(), + 'lr': args.lr, + 'optimizer': optimizer.state_dict(), + 'amp': amp.state_dict(), + }, checkpoint='best', filename=best_path) + best_model['acc'] = train_te_acc + best_model['iou'] = train_te_iou + + +def device_id_to_process_device_map(device_list): + devices = device_list.split(",") + devices = [int(x) for x in devices] + devices.sort() + + process_device_map = dict() + for process_id, device_id in enumerate(devices): + process_device_map[process_id] = device_id + + return process_device_map + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Hyperparams') + parser.add_argument('--data-dir', nargs='?', type=str, default='PSENet_data', + help='point to the root data path of ICDAR') + parser.add_argument('--train_data', nargs='?', type=str, default='ICDAR2015', + help='indicate which dataset was used, ICDAR2015 or ICDAR2017') + parser.add_argument('--arch', nargs='?', type=str, default='resnet50') + parser.add_argument('--img_size', nargs='?', type=int, default=640, + help='Height of the input image') + parser.add_argument('--n_epoch', nargs='?', type=int, default=600, + help='# of the epochs') + parser.add_argument('--schedule', type=int, nargs='+', default=[200, 400], + help='Decrease learning rate at these epochs.') + parser.add_argument('--batch_size', nargs='?', type=int, default=16, + help='Batch Size') + parser.add_argument('--lr', nargs='?', type=float, default=1e-3, + help='Learning Rate') + parser.add_argument('--resume', nargs='?', type=str, default=None, + help='Path to previous saved model to restart from') + parser.add_argument('--pretrain', nargs='?', type=str, default=None, + help='Path to previous saved model to restart from') + parser.add_argument('--checkpoint', default='', type=str, metavar='PATH', + help='path to save checkpoint (default: checkpoint)') + parser.add_argument('--opt-level', type=str) + parser.add_argument('--keep-batchnorm-fp32', type=str, default=None) + parser.add_argument('--loss-scale', type=str, default=64) + parser.add_argument('--world-size', default=-1, type=int, + help='number of nodes for distributed training') + parser.add_argument('--seed', default=None, type=int, + help='seed for initializing training. ') + parser.add_argument('--device-list', default='0,1,2,3,4,5,6,7', type=str, help='device id list') + parser.add_argument('--multiprocessing-distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N NPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') + parser.add_argument('--device', default='npu', type=str, + help='npu or gpu') + parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') + parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') + parser.add_argument('--addr', default='10.136.181.127', type=str, + help='master addr') + parser.add_argument('--dist-url', default='env://', type=str, + help='url used to set up distributed training') + parser.add_argument('--port', default='8888', type=str) + parser.add_argument('-j', '--workers', default=32, type=int, metavar='N', + help='number of data loading workers (default: 4)') + parser.add_argument('--remark', default='', type=str, + help='remark. ') + parser.add_argument('--combine_grad', action='store_true', + help='whether to combine grad in apex') + parser.add_argument('--combine_sgd', action='store_true', + help='whether to use combined sgd instead of sgd') + + args = parser.parse_args() + + + if args.seed is not None: + random.seed(args.seed) + os.environ['PYTHONHASHSEED'] = str(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + torch.cuda.manual_seed(args.seed) + torch.cuda.manual_seed_all(args.seed) + cudnn.deterministic = True + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + args.process_device_map = device_id_to_process_device_map(args.device_list) + + if args.device == 'npu': + npu_per_node = len(args.process_device_map) + else: + npu_per_node = torch.cuda.device_count() + + os.environ['MASTER_ADDR'] = args.addr + os.environ['MASTER_PORT'] = args.port + os.environ['KERNEL_NAME_ID'] = str(0) + print("+++++++++++++++++++++KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID']) + + args.world_size = npu_per_node * args.world_size + mp.spawn(main, nprocs=npu_per_node, args=(npu_per_node, args)) + diff --git a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/train_ic15_8p.py b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/train_ic15_8p.py index 6b06a15d3da577854a5b998e1fb8ac61a4a0a914..1904aae875570055ae0f7c945e9aeb40bf512401 100644 --- a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/train_ic15_8p.py +++ b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/NPU/src/train_ic15_8p.py @@ -1,453 +1,453 @@ -# Copyright [yyyy] [name of copyright owner] -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import os -import random -import sys -import time -import warnings - -import numpy as np -import torch -import torch.backends.cudnn as cudnn -import torch.distributed as dist -import torch.multiprocessing as mp -import torch.nn.parallel -import torch.npu -import torch.utils.data.distributed -from apex import amp - -import apex -import models -from data_loader import IC15Loader -from metrics import runningScore -from multi_epochs_dataloader import MultiEpochsDataLoader -from util import AverageMeter - - - -def ohem_single(score, gt_text, training_mask): - pos_num = (int)(np.sum(gt_text > 0.5)) - (int)(np.sum((gt_text > 0.5) & (training_mask <= 0.5))) - - if pos_num == 0: - selected_mask = training_mask - selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') - return selected_mask - - neg_num = (int)(np.sum(gt_text <= 0.5)) - neg_num = (int)(min(pos_num * 3, neg_num)) - - if neg_num == 0: - selected_mask = training_mask - selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') - return selected_mask - - neg_score = score[gt_text <= 0.5] - neg_score_sorted = np.sort(-neg_score) - threshold = -neg_score_sorted[neg_num - 1] - - selected_mask = ((score >= threshold) | (gt_text > 0.5)) & (training_mask > 0.5) - selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') - return selected_mask - - -def ohem_batch(scores, gt_texts, training_masks): - scores = scores.data.cpu().numpy() - gt_texts = gt_texts.data.cpu().numpy() - training_masks = training_masks.data.cpu().numpy() - selected_masks = [] - for i in range(scores.shape[0]): - selected_masks.append(ohem_single(scores[i, :, :], gt_texts[i, :, :], training_masks[i, :, :])) - selected_masks = np.concatenate(selected_masks, 0) - selected_masks = torch.from_numpy(selected_masks).float() - return selected_masks - - -def dice_loss(input, target, mask): - input = torch.sigmoid(input) - - input = input.reshape(input.size()[0], -1) - target = target.reshape(target.size()[0], -1) - mask = mask.reshape(mask.size()[0], -1) - - input = input * mask - target = target * mask - - a = torch.sum(input * target, 1) - b = torch.sum(input * input, 1) + 0.001 - c = torch.sum(target * target, 1) + 0.001 - d = (2 * a) / (b + c) - dice_loss = torch.mean(d) - return 1 - dice_loss - - -def cal_text_score(texts, gt_texts, training_masks, running_metric_text): - training_masks = training_masks.data.cpu().numpy() - pred_text = torch.sigmoid(texts).data.cpu().numpy() * training_masks - - pred_text[pred_text <= 0.5] = 0 - pred_text[pred_text > 0.5] = 1 - pred_text = pred_text.astype(np.int32) - gt_text = gt_texts.data.cpu().numpy() * training_masks - gt_text = gt_text.astype(np.int32) - running_metric_text.update(gt_text, pred_text) - score_text, _ = running_metric_text.get_scores() - return score_text - - -def cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel): - mask = (gt_texts * training_masks).data.cpu().numpy() - kernel = kernels[:, -1, :, :] - gt_kernel = gt_kernels[:, -1, :, :] - pred_kernel = torch.sigmoid(kernel).data.cpu().numpy() - pred_kernel[pred_kernel <= 0.5] = 0 - pred_kernel[pred_kernel > 0.5] = 1 - pred_kernel = (pred_kernel * mask).astype(np.int32) - gt_kernel = gt_kernel.data.cpu().numpy() - gt_kernel = (gt_kernel * mask).astype(np.int32) - running_metric_kernel.update(gt_kernel, pred_kernel) - score_kernel, _ = running_metric_kernel.get_scores() - return score_kernel - - -def train(train_loader, model, criterion, optimizer, epoch, args, npu_per_node): - model.train() - - losses = AverageMeter() - running_metric_text = runningScore(2) - running_metric_kernel = runningScore(2) - - epoch_time = time.time() - batch_time = time.time() - for batch_idx, (imgs, gt_texts, gt_kernels, training_masks) in enumerate(train_loader): - loc = 'npu:{}'.format(args.npu) - imgs = imgs.to(loc, non_blocking=True) - gt_texts = gt_texts.to(loc, non_blocking=True) - gt_kernels = gt_kernels.to(loc, non_blocking=True) - training_masks = training_masks.to(loc, non_blocking=True) - - outputs = model(imgs) - texts = torch.index_select(outputs, 1, torch.tensor([0]).to(loc)).squeeze() - kernels = torch.index_select(outputs, 1, torch.tensor([1, 2, 3, 4, 5, 6]).to(loc)) - - selected_masks = ohem_batch(texts, gt_texts, training_masks) - selected_masks = selected_masks.to(loc, non_blocking=True) - - loss_text = criterion(texts, gt_texts, selected_masks) - - loss_kernels = [] - mask0 = torch.sigmoid(texts).data.cpu().numpy() - mask1 = training_masks.data.cpu().numpy() - selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32') - selected_masks = torch.from_numpy(selected_masks).float() - selected_masks = selected_masks.to(loc, non_blocking=True) - for i in range(6): - kernel_i = torch.index_select(kernels, 1, torch.tensor([i]).to(loc)).squeeze() - gt_kernel_i = torch.index_select(gt_kernels, 1, torch.tensor([i]).to(loc)).squeeze() - loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks) - loss_kernels.append(loss_kernel_i) - loss_kernel = sum(loss_kernels) / len(loss_kernels) - - loss = 0.7 * loss_text + 0.3 * loss_kernel - losses.update(loss.item(), imgs.size(0)) - - optimizer.zero_grad() - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - optimizer.step() - score_text = cal_text_score(texts, gt_texts, training_masks, running_metric_text) - score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel) - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % npu_per_node == 0): - batch_time = time.time() - batch_time - output_log = '(epoch: {epoch:0>3d} {batch:0>2d}/{size}) | FPS: {fps:5.3f} | Loss : {lossv:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format( - epoch=epoch + 1, - batch=batch_idx + 1, - size=len(train_loader), - fps=npu_per_node * args.batch_size / batch_time, - lossv=losses.val, - acc=score_text['Mean Acc'], - iou_t=score_text['Mean IoU'], - iou_k=score_kernel['Mean IoU']) - batch_time = time.time() - print(output_log) - epoch_time = time.time() - epoch_time - - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % npu_per_node == 0): - output_log = '{epoch:0>3d}/{n_epoch} | LR: {lr:.5f} | FPS: {fps:.3f} | batch: {batch:.5f}s | Loss: {lossa:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format( - epoch=epoch + 1, - n_epoch=args.n_epoch, - lr=optimizer.param_groups[0]['lr'], - fps=npu_per_node * len(train_loader) * args.batch_size / epoch_time, - batch=epoch_time / len(train_loader), - lossa=losses.avg, - acc=score_text['Mean Acc'], - iou_t=score_text['Mean IoU'], - iou_k=score_kernel['Mean IoU']) - print(output_log) - sys.stdout.flush() - - return ( - losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'], score_text['Mean IoU'], score_kernel['Mean IoU']) - - -def adjust_learning_rate(args, optimizer, epoch): - warmup_length = 5 - if epoch < warmup_length: - lr = args.lr * (epoch + 1) / warmup_length - else: - e = epoch - warmup_length - es = args.n_epoch - warmup_length - lr = 0.5 * (1 + np.cos(np.pi * e / es)) * args.lr - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def save_checkpoint(state, checkpoint='checkpoint', filename='checkpoint.pth.tar'): - if not os.path.isdir(checkpoint): - os.makedirs(checkpoint) - filepath = os.path.join(checkpoint, filename) - torch.save(state, filepath) - - -def main(npu, npu_per_node, args): - args.npu = args.process_device_map[npu] - print("[npu id:", args.npu, "]", "+++++++++++++++++++++++++++ before set KERNEL_NAME_ID:", - os.environ['KERNEL_NAME_ID']) - os.environ['KERNEL_NAME_ID'] = str(npu) - print("[npu id:", args.npu, "]", "+++++++++++++++++++++++++++KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID']) - - if args.npu is not None: - print("[npu id:", args.npu, "]", "Use NPU: {} for training".format(args.npu)) - - if args.checkpoint == '': - args.checkpoint = "checkpoints/ic15_%s_bs_%d_ep_%d" % (args.arch, args.batch_size, args.n_epoch) - if args.pretrain: - if 'synth' in args.pretrain: - args.checkpoint += "_pretrain_synth" - else: - args.checkpoint += "_pretrain_ic17" - if args.distributed: - if args.multiprocessing_distributed: - args.rank = args.rank * npu_per_node + npu - if args.device == 'npu': - dist.init_process_group(backend=args.dist_backend, - world_size=args.world_size, rank=args.rank) - loc = 'npu:{}'.format(args.npu) - torch.npu.set_device(loc) - args.batch_size = int(args.batch_size / npu_per_node) - args.workers = int((args.workers + npu_per_node - 1) / npu_per_node) - - print("[npu id:", args.npu, "]", "===============main_worker()=================") - print("[npu id:", args.npu, "]", args) - print("[npu id:", args.npu, "]", "===============main_worker()=================") - - print('checkpoint path: %s' % args.checkpoint) - print('init lr: %.8f' % args.lr) - print('schedule: ', args.schedule) - sys.stdout.flush() - - if not os.path.isdir(args.checkpoint): - os.makedirs(args.checkpoint) - - kernel_num = 7 - min_scale = 0.4 - start_epoch = 0 - - my_data = IC15Loader(args=args, - is_transform=True, - img_size=args.img_size, - kernel_num=kernel_num, - min_scale=min_scale) - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler(my_data) - else: - train_sampler = None - - train_loader = MultiEpochsDataLoader( - my_data, - batch_size=args.batch_size, - shuffle=(train_sampler is None), - num_workers=args.workers, - drop_last=True, - pin_memory=True, - sampler=train_sampler) - - print("[npu id:", args.npu, "]", "=> creating model '{}'".format(args.arch)) - if args.arch == "resnet50": - model = models.resnet50(pretrained=True, num_classes=kernel_num) - elif args.arch == "resnet101": - model = models.resnet101(pretrained=True, num_classes=kernel_num) - elif args.arch == "resnet152": - model = models.resnet152(pretrained=True, num_classes=kernel_num) - - model = model.to(loc) - - if args.combine_sgd: - optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4) - else: - optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4) - - model, optimizer = amp.initialize(model, optimizer, - opt_level=args.opt_level, - keep_batchnorm_fp32=args.keep_batchnorm_fp32, - loss_scale=args.loss_scale, - combine_grad=args.combine_grad) - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.npu], broadcast_buffers=False) - - if args.pretrain: - print('Using pretrained model.') - assert os.path.isfile(args.pretrain), 'Error: no checkpoint directory found!' - checkpoint = torch.load(args.pretrain) - model.load_state_dict(checkpoint['state_dict']) - elif args.resume: - print('Resuming from checkpoint.') - assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' - checkpoint = torch.load(args.resume) - start_epoch = checkpoint['epoch'] - model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - amp.load_state_dict(checkpoint['amp']) - else: - print('Training from scratch.') - cudnn.benchmark = True - - best_model = {'loss': 0, 'acc': 0, 'iou': 0} - - for epoch in range(start_epoch, args.n_epoch): - if args.distributed: - train_sampler.set_epoch(epoch) - adjust_learning_rate(args, optimizer, epoch) - - train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(train_loader, model, dice_loss, - optimizer, epoch, - args, npu_per_node) - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % npu_per_node == 0): - if epoch > args.n_epoch - 6: - best_path = f'{args.remark}_{train_loss:.4f}_{train_te_acc:.4f}_{train_ke_iou:.4f}_{train_te_iou:.4f}_{epoch}.pth.tar' - save_checkpoint({ - 'epoch': epoch + 1, - 'state_dict': model.state_dict(), - 'lr': args.lr, - 'optimizer': optimizer.state_dict(), - 'amp': amp.state_dict(), - }, checkpoint='best', filename=best_path) - best_model['acc'] = train_te_acc - best_model['iou'] = train_te_iou - - -def device_id_to_process_device_map(device_list): - devices = device_list.split(",") - devices = [int(x) for x in devices] - devices.sort() - - process_device_map = dict() - for process_id, device_id in enumerate(devices): - process_device_map[process_id] = device_id - - return process_device_map - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Hyperparams') - parser.add_argument('--data-dir', nargs='?', type=str, default='PSENet_data', - help='point to the root data path of ICDAR') - parser.add_argument('--train_data', nargs='?', type=str, default='ICDAR2015', - help='indicate which dataset was used, ICDAR2015 or ICDAR2017') - parser.add_argument('--arch', nargs='?', type=str, default='resnet50') - parser.add_argument('--img_size', nargs='?', type=int, default=640, - help='Height of the input image') - parser.add_argument('--n_epoch', nargs='?', type=int, default=600, - help='# of the epochs') - parser.add_argument('--schedule', type=int, nargs='+', default=[200, 400], - help='Decrease learning rate at these epochs.') - parser.add_argument('--batch_size', nargs='?', type=int, default=16, - help='Batch Size') - parser.add_argument('--lr', nargs='?', type=float, default=1e-3, - help='Learning Rate') - parser.add_argument('--resume', nargs='?', type=str, default=None, - help='Path to previous saved model to restart from') - parser.add_argument('--pretrain', nargs='?', type=str, default=None, - help='Path to previous saved model to restart from') - parser.add_argument('--checkpoint', default='', type=str, metavar='PATH', - help='path to save checkpoint (default: checkpoint)') - parser.add_argument('--opt-level', type=str) - parser.add_argument('--keep-batchnorm-fp32', type=str, default=None) - parser.add_argument('--loss-scale', type=str, default=64) - parser.add_argument('--world-size', default=-1, type=int, - help='number of nodes for distributed training') - parser.add_argument('--seed', default=None, type=int, - help='seed for initializing training. ') - parser.add_argument('--device-list', default='0,1,2,3,4,5,6,7', type=str, help='device id list') - parser.add_argument('--multiprocessing-distributed', action='store_true', - help='Use multi-processing distributed training to launch ' - 'N processes per node, which has N NPUs. This is the ' - 'fastest way to use PyTorch for either single node or ' - 'multi node data parallel training') - parser.add_argument('--device', default='npu', type=str, - help='npu or gpu') - parser.add_argument('--dist-backend', default='nccl', type=str, - help='distributed backend') - parser.add_argument('--rank', default=-1, type=int, - help='node rank for distributed training') - parser.add_argument('--addr', default='10.136.181.127', type=str, - help='master addr') - parser.add_argument('--dist-url', default='env://', type=str, - help='url used to set up distributed training') - parser.add_argument('--port', default='8888', type=str) - parser.add_argument('-j', '--workers', default=32, type=int, metavar='N', - help='number of data loading workers (default: 4)') - parser.add_argument('--remark', default='', type=str, - help='remark. ') - parser.add_argument('--combine_grad', action='store_true', - help='whether to combine grad in apex') - parser.add_argument('--combine_sgd', action='store_true', - help='whether to use combined sgd instead of sgd') - - args = parser.parse_args() - - - if args.seed is not None: - random.seed(args.seed) - os.environ['PYTHONHASHSEED'] = str(args.seed) - np.random.seed(args.seed) - torch.manual_seed(args.seed) - torch.cuda.manual_seed(args.seed) - torch.cuda.manual_seed_all(args.seed) - cudnn.deterministic = True - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False - - - args.distributed = args.world_size > 1 or args.multiprocessing_distributed - args.process_device_map = device_id_to_process_device_map(args.device_list) - - if args.device == 'npu': - npu_per_node = len(args.process_device_map) - else: - npu_per_node = torch.cuda.device_count() - - os.environ['MASTER_ADDR'] = args.addr - os.environ['MASTER_PORT'] = args.port - os.environ['KERNEL_NAME_ID'] = str(0) - print("+++++++++++++++++++++KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID']) - - args.world_size = npu_per_node * args.world_size - mp.spawn(main, nprocs=npu_per_node, args=(npu_per_node, args)) - +# Copyright [yyyy] [name of copyright owner] +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import os +import random +import sys +import time +import warnings + +import numpy as np +import torch +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.multiprocessing as mp +import torch.nn.parallel +import torch.npu +import torch.utils.data.distributed +from apex import amp + +import apex +import models +from data_loader import IC15Loader +from metrics import runningScore +from multi_epochs_dataloader import MultiEpochsDataLoader +from util import AverageMeter + + + +def ohem_single(score, gt_text, training_mask): + pos_num = (int)(np.sum(gt_text > 0.5)) - (int)(np.sum((gt_text > 0.5) & (training_mask <= 0.5))) + + if pos_num == 0: + selected_mask = training_mask + selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') + return selected_mask + + neg_num = (int)(np.sum(gt_text <= 0.5)) + neg_num = (int)(min(pos_num * 3, neg_num)) + + if neg_num == 0: + selected_mask = training_mask + selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') + return selected_mask + + neg_score = score[gt_text <= 0.5] + neg_score_sorted = np.sort(-neg_score) + threshold = -neg_score_sorted[neg_num - 1] + + selected_mask = ((score >= threshold) | (gt_text > 0.5)) & (training_mask > 0.5) + selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') + return selected_mask + + +def ohem_batch(scores, gt_texts, training_masks): + scores = scores.data.cpu().numpy() + gt_texts = gt_texts.data.cpu().numpy() + training_masks = training_masks.data.cpu().numpy() + selected_masks = [] + for i in range(scores.shape[0]): + selected_masks.append(ohem_single(scores[i, :, :], gt_texts[i, :, :], training_masks[i, :, :])) + selected_masks = np.concatenate(selected_masks, 0) + selected_masks = torch.from_numpy(selected_masks).float() + return selected_masks + + +def dice_loss(input, target, mask): + input = torch.sigmoid(input) + + input = input.reshape(input.size()[0], -1) + target = target.reshape(target.size()[0], -1) + mask = mask.reshape(mask.size()[0], -1) + + input = input * mask + target = target * mask + + a = torch.sum(input * target, 1) + b = torch.sum(input * input, 1) + 0.001 + c = torch.sum(target * target, 1) + 0.001 + d = (2 * a) / (b + c) + dice_loss = torch.mean(d) + return 1 - dice_loss + + +def cal_text_score(texts, gt_texts, training_masks, running_metric_text): + training_masks = training_masks.data.cpu().numpy() + pred_text = torch.sigmoid(texts).data.cpu().numpy() * training_masks + + pred_text[pred_text <= 0.5] = 0 + pred_text[pred_text > 0.5] = 1 + pred_text = pred_text.astype(np.int32) + gt_text = gt_texts.data.cpu().numpy() * training_masks + gt_text = gt_text.astype(np.int32) + running_metric_text.update(gt_text, pred_text) + score_text, _ = running_metric_text.get_scores() + return score_text + + +def cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel): + mask = (gt_texts * training_masks).data.cpu().numpy() + kernel = kernels[:, -1, :, :] + gt_kernel = gt_kernels[:, -1, :, :] + pred_kernel = torch.sigmoid(kernel).data.cpu().numpy() + pred_kernel[pred_kernel <= 0.5] = 0 + pred_kernel[pred_kernel > 0.5] = 1 + pred_kernel = (pred_kernel * mask).astype(np.int32) + gt_kernel = gt_kernel.data.cpu().numpy() + gt_kernel = (gt_kernel * mask).astype(np.int32) + running_metric_kernel.update(gt_kernel, pred_kernel) + score_kernel, _ = running_metric_kernel.get_scores() + return score_kernel + + +def train(train_loader, model, criterion, optimizer, epoch, args, npu_per_node): + model.train() + + losses = AverageMeter() + running_metric_text = runningScore(2) + running_metric_kernel = runningScore(2) + + epoch_time = time.time() + batch_time = time.time() + for batch_idx, (imgs, gt_texts, gt_kernels, training_masks) in enumerate(train_loader): + loc = 'npu:{}'.format(args.npu) + imgs = imgs.to(loc, non_blocking=True) + gt_texts = gt_texts.to(loc, non_blocking=True) + gt_kernels = gt_kernels.to(loc, non_blocking=True) + training_masks = training_masks.to(loc, non_blocking=True) + + outputs = model(imgs) + texts = torch.index_select(outputs, 1, torch.tensor([0]).to(loc)).squeeze() + kernels = torch.index_select(outputs, 1, torch.tensor([1, 2, 3, 4, 5, 6]).to(loc)) + + selected_masks = ohem_batch(texts, gt_texts, training_masks) + selected_masks = selected_masks.to(loc, non_blocking=True) + + loss_text = criterion(texts, gt_texts, selected_masks) + + loss_kernels = [] + mask0 = torch.sigmoid(texts).data.cpu().numpy() + mask1 = training_masks.data.cpu().numpy() + selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32') + selected_masks = torch.from_numpy(selected_masks).float() + selected_masks = selected_masks.to(loc, non_blocking=True) + for i in range(6): + kernel_i = torch.index_select(kernels, 1, torch.tensor([i]).to(loc)).squeeze() + gt_kernel_i = torch.index_select(gt_kernels, 1, torch.tensor([i]).to(loc)).squeeze() + loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks) + loss_kernels.append(loss_kernel_i) + loss_kernel = sum(loss_kernels) / len(loss_kernels) + + loss = 0.7 * loss_text + 0.3 * loss_kernel + losses.update(loss.item(), imgs.size(0)) + + optimizer.zero_grad() + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + optimizer.step() + score_text = cal_text_score(texts, gt_texts, training_masks, running_metric_text) + score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel) + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % npu_per_node == 0): + batch_time = time.time() - batch_time + output_log = '(epoch: {epoch:0>3d} {batch:0>2d}/{size}) | FPS: {fps:5.3f} | Loss : {lossv:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format( + epoch=epoch + 1, + batch=batch_idx + 1, + size=len(train_loader), + fps=npu_per_node * args.batch_size / batch_time, + lossv=losses.val, + acc=score_text['Mean Acc'], + iou_t=score_text['Mean IoU'], + iou_k=score_kernel['Mean IoU']) + batch_time = time.time() + print(output_log) + epoch_time = time.time() - epoch_time + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % npu_per_node == 0): + output_log = '{epoch:0>3d}/{n_epoch} | LR: {lr:.5f} | FPS: {fps:.3f} | batch: {batch:.5f}s | Loss: {lossa:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format( + epoch=epoch + 1, + n_epoch=args.n_epoch, + lr=optimizer.param_groups[0]['lr'], + fps=npu_per_node * len(train_loader) * args.batch_size / epoch_time, + batch=epoch_time / len(train_loader), + lossa=losses.avg, + acc=score_text['Mean Acc'], + iou_t=score_text['Mean IoU'], + iou_k=score_kernel['Mean IoU']) + print(output_log) + sys.stdout.flush() + + return ( + losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'], score_text['Mean IoU'], score_kernel['Mean IoU']) + + +def adjust_learning_rate(args, optimizer, epoch): + warmup_length = 5 + if epoch < warmup_length: + lr = args.lr * (epoch + 1) / warmup_length + else: + e = epoch - warmup_length + es = args.n_epoch - warmup_length + lr = 0.5 * (1 + np.cos(np.pi * e / es)) * args.lr + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +def save_checkpoint(state, checkpoint='checkpoint', filename='checkpoint.pth.tar'): + if not os.path.isdir(checkpoint): + os.makedirs(checkpoint) + filepath = os.path.join(checkpoint, filename) + torch.save(state, filepath) + + +def main(npu, npu_per_node, args): + args.npu = args.process_device_map[npu] + print("[npu id:", args.npu, "]", "+++++++++++++++++++++++++++ before set KERNEL_NAME_ID:", + os.environ['KERNEL_NAME_ID']) + os.environ['KERNEL_NAME_ID'] = str(npu) + print("[npu id:", args.npu, "]", "+++++++++++++++++++++++++++KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID']) + + if args.npu is not None: + print("[npu id:", args.npu, "]", "Use NPU: {} for training".format(args.npu)) + + if args.checkpoint == '': + args.checkpoint = "checkpoints/ic15_%s_bs_%d_ep_%d" % (args.arch, args.batch_size, args.n_epoch) + if args.pretrain: + if 'synth' in args.pretrain: + args.checkpoint += "_pretrain_synth" + else: + args.checkpoint += "_pretrain_ic17" + if args.distributed: + if args.multiprocessing_distributed: + args.rank = args.rank * npu_per_node + npu + if args.device == 'npu': + dist.init_process_group(backend=args.dist_backend, + world_size=args.world_size, rank=args.rank) + loc = 'npu:{}'.format(args.npu) + torch.npu.set_device(loc) + args.batch_size = int(args.batch_size / npu_per_node) + args.workers = int((args.workers + npu_per_node - 1) / npu_per_node) + + print("[npu id:", args.npu, "]", "===============main_worker()=================") + print("[npu id:", args.npu, "]", args) + print("[npu id:", args.npu, "]", "===============main_worker()=================") + + print('checkpoint path: %s' % args.checkpoint) + print('init lr: %.8f' % args.lr) + print('schedule: ', args.schedule) + sys.stdout.flush() + + if not os.path.isdir(args.checkpoint): + os.makedirs(args.checkpoint) + + kernel_num = 7 + min_scale = 0.4 + start_epoch = 0 + + my_data = IC15Loader(args=args, + is_transform=True, + img_size=args.img_size, + kernel_num=kernel_num, + min_scale=min_scale) + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(my_data) + else: + train_sampler = None + + train_loader = MultiEpochsDataLoader( + my_data, + batch_size=args.batch_size, + shuffle=(train_sampler is None), + num_workers=args.workers, + drop_last=True, + pin_memory=True, + sampler=train_sampler) + + print("[npu id:", args.npu, "]", "=> creating model '{}'".format(args.arch)) + if args.arch == "resnet50": + model = models.resnet50(pretrained=True, num_classes=kernel_num) + elif args.arch == "resnet101": + model = models.resnet101(pretrained=True, num_classes=kernel_num) + elif args.arch == "resnet152": + model = models.resnet152(pretrained=True, num_classes=kernel_num) + + model = model.to(loc) + + if args.combine_sgd: + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4) + else: + optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4) + + model, optimizer = amp.initialize(model, optimizer, + opt_level=args.opt_level, + keep_batchnorm_fp32=args.keep_batchnorm_fp32, + loss_scale=args.loss_scale, + combine_grad=args.combine_grad) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.npu], broadcast_buffers=False) + + if args.pretrain: + print('Using pretrained model.') + assert os.path.isfile(args.pretrain), 'Error: no checkpoint directory found!' + checkpoint = torch.load(args.pretrain) + model.load_state_dict(checkpoint['state_dict']) + elif args.resume: + print('Resuming from checkpoint.') + assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' + checkpoint = torch.load(args.resume) + start_epoch = checkpoint['epoch'] + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + amp.load_state_dict(checkpoint['amp']) + else: + print('Training from scratch.') + cudnn.benchmark = True + + best_model = {'loss': 0, 'acc': 0, 'iou': 0} + + for epoch in range(start_epoch, args.n_epoch): + if args.distributed: + train_sampler.set_epoch(epoch) + adjust_learning_rate(args, optimizer, epoch) + + train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(train_loader, model, dice_loss, + optimizer, epoch, + args, npu_per_node) + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % npu_per_node == 0): + if epoch > args.n_epoch - 6: + best_path = f'{args.remark}_{train_loss:.4f}_{train_te_acc:.4f}_{train_ke_iou:.4f}_{train_te_iou:.4f}_{epoch}.pth.tar' + save_checkpoint({ + 'epoch': epoch + 1, + 'state_dict': model.state_dict(), + 'lr': args.lr, + 'optimizer': optimizer.state_dict(), + 'amp': amp.state_dict(), + }, checkpoint='best', filename=best_path) + best_model['acc'] = train_te_acc + best_model['iou'] = train_te_iou + + +def device_id_to_process_device_map(device_list): + devices = device_list.split(",") + devices = [int(x) for x in devices] + devices.sort() + + process_device_map = dict() + for process_id, device_id in enumerate(devices): + process_device_map[process_id] = device_id + + return process_device_map + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Hyperparams') + parser.add_argument('--data-dir', nargs='?', type=str, default='PSENet_data', + help='point to the root data path of ICDAR') + parser.add_argument('--train_data', nargs='?', type=str, default='ICDAR2015', + help='indicate which dataset was used, ICDAR2015 or ICDAR2017') + parser.add_argument('--arch', nargs='?', type=str, default='resnet50') + parser.add_argument('--img_size', nargs='?', type=int, default=640, + help='Height of the input image') + parser.add_argument('--n_epoch', nargs='?', type=int, default=600, + help='# of the epochs') + parser.add_argument('--schedule', type=int, nargs='+', default=[200, 400], + help='Decrease learning rate at these epochs.') + parser.add_argument('--batch_size', nargs='?', type=int, default=16, + help='Batch Size') + parser.add_argument('--lr', nargs='?', type=float, default=1e-3, + help='Learning Rate') + parser.add_argument('--resume', nargs='?', type=str, default=None, + help='Path to previous saved model to restart from') + parser.add_argument('--pretrain', nargs='?', type=str, default=None, + help='Path to previous saved model to restart from') + parser.add_argument('--checkpoint', default='', type=str, metavar='PATH', + help='path to save checkpoint (default: checkpoint)') + parser.add_argument('--opt-level', type=str) + parser.add_argument('--keep-batchnorm-fp32', type=str, default=None) + parser.add_argument('--loss-scale', type=str, default=64) + parser.add_argument('--world-size', default=-1, type=int, + help='number of nodes for distributed training') + parser.add_argument('--seed', default=None, type=int, + help='seed for initializing training. ') + parser.add_argument('--device-list', default='0,1,2,3,4,5,6,7', type=str, help='device id list') + parser.add_argument('--multiprocessing-distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N NPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') + parser.add_argument('--device', default='npu', type=str, + help='npu or gpu') + parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') + parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') + parser.add_argument('--addr', default='10.136.181.127', type=str, + help='master addr') + parser.add_argument('--dist-url', default='env://', type=str, + help='url used to set up distributed training') + parser.add_argument('--port', default='8888', type=str) + parser.add_argument('-j', '--workers', default=32, type=int, metavar='N', + help='number of data loading workers (default: 4)') + parser.add_argument('--remark', default='', type=str, + help='remark. ') + parser.add_argument('--combine_grad', action='store_true', + help='whether to combine grad in apex') + parser.add_argument('--combine_sgd', action='store_true', + help='whether to use combined sgd instead of sgd') + + args = parser.parse_args() + + + if args.seed is not None: + random.seed(args.seed) + os.environ['PYTHONHASHSEED'] = str(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + torch.cuda.manual_seed(args.seed) + torch.cuda.manual_seed_all(args.seed) + cudnn.deterministic = True + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + args.process_device_map = device_id_to_process_device_map(args.device_list) + + if args.device == 'npu': + npu_per_node = len(args.process_device_map) + else: + npu_per_node = torch.cuda.device_count() + + os.environ['MASTER_ADDR'] = args.addr + os.environ['MASTER_PORT'] = args.port + os.environ['KERNEL_NAME_ID'] = str(0) + print("+++++++++++++++++++++KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID']) + + args.world_size = npu_per_node * args.world_size + mp.spawn(main, nprocs=npu_per_node, args=(npu_per_node, args)) + diff --git a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/README.md b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/README.md index 4d75995acfa0e5fab36f3b3af67fbc6bd76b4eab..05b6553c125f67e5524f28d0428e47585388d5d3 100644 --- a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/README.md +++ b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/README.md @@ -1,22 +1,22 @@ -# Shape Robust Text Detection with Progressive Scale Expansion Network - -## 准备数据集 -1. 下载并解压训练集和测试集图片数据 -2. 按顺序对文件夹进行组织 -``` -./data_dir -./data_dir/ICDAR -./data_dir/ICDAR/Challenge -./data_dir/ICDAR/Challenge/ch4_training_images -./data_dir/ICDAR/Challenge/ch4_training_localization_transcription_gt -./data_dir/ICDAR/Challenge/ch4_test_images -./data_dir/ICDAR/Challenge/ch4_test_localization_transcription_gt -``` - - -## NPU -进入NPU/src目录,查看相关README文件 - -## 模型测试 -进入NPU/test目录,查看相关README文件 - +# Shape Robust Text Detection with Progressive Scale Expansion Network + +## 准备数据集 +1. 下载并解压训练集和测试集图片数据 +2. 按顺序对文件夹进行组织 +``` +./data_dir +./data_dir/ICDAR +./data_dir/ICDAR/Challenge +./data_dir/ICDAR/Challenge/ch4_training_images +./data_dir/ICDAR/Challenge/ch4_training_localization_transcription_gt +./data_dir/ICDAR/Challenge/ch4_test_images +./data_dir/ICDAR/Challenge/ch4_test_localization_transcription_gt +``` + + +## NPU +进入NPU/src目录,查看相关README文件 + +## 模型测试 +进入NPU/test目录,查看相关README文件 + diff --git a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_full_1p.sh b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_full_1p.sh index d6807a08111f1af72d9ac4656efb3cd957e2a852..19d0be5c36c19e9078f6901933fd019551fc6cab 100644 --- a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_full_1p.sh +++ b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_full_1p.sh @@ -1,161 +1,161 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="PSENet_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=16 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - - - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -python3 ${cur_path}/../NPU/src/train_ic15.py \ - --lr 0.001 \ - --dist-backend 'hccl' \ - --rank 0 \ - --workers 32 \ - --multiprocessing-distributed \ - --world-size 1 \ - --batch_size $batch_size \ - --device npu \ - --opt-level O2 \ - --loss-scale 64 \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed 16 \ - --n_epoch $train_epochs \ - --data-dir $data_path \ - --port 8272 \ - --schedule 200 400 \ - --device-list $ASCEND_DEVICE_ID \ - --remark 1p \ - --combine_grad \ - --combine_sgd > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - - - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep 'epoch' ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'FPS:' '{print $2}'|awk '{print $1}'|awk 'NR>3'|awk '{sum+=$1} END {print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep 'epoch' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="PSENet_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=16 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +python3 ${cur_path}/../NPU/src/train_ic15.py \ + --lr 0.001 \ + --dist-backend 'hccl' \ + --rank 0 \ + --workers 32 \ + --multiprocessing-distributed \ + --world-size 1 \ + --batch_size $batch_size \ + --device npu \ + --opt-level O2 \ + --loss-scale 64 \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed 16 \ + --n_epoch $train_epochs \ + --data-dir $data_path \ + --port 8272 \ + --schedule 200 400 \ + --device-list $ASCEND_DEVICE_ID \ + --remark 1p \ + --combine_grad \ + --combine_sgd > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + + + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep 'epoch' ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'FPS:' '{print $2}'|awk '{print $1}'|awk 'NR>3'|awk '{sum+=$1} END {print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'epoch' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_full_8p.sh b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_full_8p.sh index 28acc0d3be204ee6857fdde5d1c73223efbc4c9a..a0155a31259d012d36211b82ffd687c8b6ccd431 100644 --- a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_full_8p.sh @@ -1,159 +1,159 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 -RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="PSENet_ID0102_for_PyTorch" -#训练epoch -train_epochs=600 -#训练batch_size -batch_size=32 -#训练step -train_steps= -#学习率 -learning_rate=0.045 - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - - - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -python3 ${cur_path}/../NPU/src/train_ic15_8p.py \ - --lr 0.004 \ - --rank 0 \ - --dist-backend 'hccl' \ - --workers 32 \ - --multiprocessing-distributed \ - --world-size 1 \ - --batch_size $batch_size \ - --device npu \ - --opt-level O2 \ - --loss-scale 64 \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed 16 \ - --n_epoch $train_epochs \ - --data-dir $data_path \ - --port 8272 \ - --remark 8p \ - --device-list '0,1,2,3,4,5,6,7' \ - --combine_grad \ - --combine_sgd > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - - - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep 'epoch:' ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'FPS:' '{print $2}'|awk '{print $1}'|awk 'NR>3'|awk '{sum+=$1} END {print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a 'Acc_t:' ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print}'|awk -F "Acc_t:" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep 'epoch:' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 +RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="PSENet_ID0102_for_PyTorch" +#训练epoch +train_epochs=600 +#训练batch_size +batch_size=32 +#训练step +train_steps= +#学习率 +learning_rate=0.045 + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +python3 ${cur_path}/../NPU/src/train_ic15_8p.py \ + --lr 0.004 \ + --rank 0 \ + --dist-backend 'hccl' \ + --workers 32 \ + --multiprocessing-distributed \ + --world-size 1 \ + --batch_size $batch_size \ + --device npu \ + --opt-level O2 \ + --loss-scale 64 \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed 16 \ + --n_epoch $train_epochs \ + --data-dir $data_path \ + --port 8272 \ + --remark 8p \ + --device-list '0,1,2,3,4,5,6,7' \ + --combine_grad \ + --combine_sgd > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + + + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep 'epoch:' ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'FPS:' '{print $2}'|awk '{print $1}'|awk 'NR>3'|awk '{sum+=$1} END {print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a 'Acc_t:' ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print}'|awk -F "Acc_t:" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'epoch:' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_performance_1p.sh index 0bc1242e53dd4ea5571b7e5d7be9eafbbb9839ff..301da8cd34c14951641911560e7c13ca8a556a35 100644 --- a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_performance_1p.sh @@ -1,160 +1,160 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="PSENet_ID0102_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=16 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - - - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -python3 ${cur_path}/../NPU/src/train_ic15.py \ - --lr 0.001 \ - --dist-backend 'hccl' \ - --rank 0 \ - --workers 32 \ - --multiprocessing-distributed \ - --world-size 1 \ - --batch_size $batch_size \ - --device npu \ - --opt-level O2 \ - --loss-scale 64 \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed 16 \ - --n_epoch $train_epochs \ - --data-dir $data_path \ - --port 8272 \ - --schedule 200 400 \ - --device-list $ASCEND_DEVICE_ID \ - --remark 1p \ - --combine_grad \ - --combine_sgd > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - - - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep 'epoch' ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'FPS:' '{print $2}'|awk '{print $1}'|awk 'NR>3'|awk '{sum+=$1} END {print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep 'epoch' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="PSENet_ID0102_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=16 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +python3 ${cur_path}/../NPU/src/train_ic15.py \ + --lr 0.001 \ + --dist-backend 'hccl' \ + --rank 0 \ + --workers 32 \ + --multiprocessing-distributed \ + --world-size 1 \ + --batch_size $batch_size \ + --device npu \ + --opt-level O2 \ + --loss-scale 64 \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed 16 \ + --n_epoch $train_epochs \ + --data-dir $data_path \ + --port 8272 \ + --schedule 200 400 \ + --device-list $ASCEND_DEVICE_ID \ + --remark 1p \ + --combine_grad \ + --combine_sgd > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + + + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep 'epoch' ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'FPS:' '{print $2}'|awk '{print $1}'|awk 'NR>3'|awk '{sum+=$1} END {print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'epoch' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_performance_8p.sh index fe8659d2dfbe783f4a8c8740fff870b138917409..e0e8b1d3b0dda358ed79764378ee49749a14b727 100644 --- a/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/built-in/cv/detection/PSENet_for_PyTorch/test/train_performance_8p.sh @@ -1,158 +1,158 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -#RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="PSENet_ID0102_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=32 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - - - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -python3 ${cur_path}/../NPU/src/train_ic15_8p.py \ - --lr 0.004 \ - --dist-backend 'hccl' \ - --rank 0 \ - --workers 32 \ - --multiprocessing-distributed \ - --world-size 1 \ - --batch_size $batch_size \ - --device npu \ - --opt-level O2 \ - --loss-scale 64 \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed 16 \ - --n_epoch $train_epochs \ - --data-dir $data_path \ - --port 8272 \ - --remark 8p \ - --combine_grad \ - --combine_sgd > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - - - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep 'epoch:' ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'FPS:' '{print $2}'|awk '{print $1}'|awk 'NR>3'|awk '{sum+=$1} END {print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep 'epoch:' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +#RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="PSENet_ID0102_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=32 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +python3 ${cur_path}/../NPU/src/train_ic15_8p.py \ + --lr 0.004 \ + --dist-backend 'hccl' \ + --rank 0 \ + --workers 32 \ + --multiprocessing-distributed \ + --world-size 1 \ + --batch_size $batch_size \ + --device npu \ + --opt-level O2 \ + --loss-scale 64 \ + --addr=$(hostname -I |awk '{print $1}') \ + --seed 16 \ + --n_epoch $train_epochs \ + --data-dir $data_path \ + --port 8272 \ + --remark 8p \ + --combine_grad \ + --combine_sgd > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + + + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep 'epoch:' ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'FPS:' '{print $2}'|awk '{print $1}'|awk 'NR>3'|awk '{sum+=$1} END {print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep 'epoch:' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}' |awk '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/_functions.py b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/_functions.py index ebbcee0394ef6f8ec483427eab315707bd87005b..219b7ecd4ae35bc4b2c2478b0ecb56ff81b61aaf 100644 --- a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/_functions.py +++ b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/_functions.py @@ -1,93 +1,93 @@ -# Copyright (c) Open-MMLab. All rights reserved. -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from torch.nn.parallel._functions import _get_stream - - -def scatter(input, devices, streams=None): - """Scatters tensor across multiple GPUs.""" - if streams is None: - streams = [None] * len(devices) - - if isinstance(input, list): - chunk_size = (len(input) - 1) // len(devices) + 1 - outputs = [ - scatter(input[i], [devices[i // chunk_size]], - [streams[i // chunk_size]]) for i in range(len(input)) - ] - return outputs - elif isinstance(input, torch.Tensor): - output = input.contiguous() - # TODO: copy to a pinned buffer first (if copying from CPU) - stream = streams[0] if output.numel() > 0 else None - if devices != [-1]: - with torch.cuda.device(devices[0]), torch.cuda.stream(stream): - output = output.cuda(devices[0], non_blocking=True) - else: - pass - - return output - else: - raise Exception(f'Unknown type {type(input)}.') - - -def synchronize_stream(output, devices, streams): - if isinstance(output, list): - chunk_size = len(output) // len(devices) - for i in range(len(devices)): - for j in range(chunk_size): - synchronize_stream(output[i * chunk_size + j], [devices[i]], - [streams[i]]) - elif isinstance(output, torch.Tensor): - if output.numel() != 0: - with torch.cuda.device(devices[0]): - main_stream = torch.cuda.current_stream() - main_stream.wait_stream(streams[0]) - output.record_stream(main_stream) - else: - raise Exception(f'Unknown type {type(output)}.') - - -def get_input_device(input): - if isinstance(input, list): - for item in input: - input_device = get_input_device(item) - if input_device != -1: - return input_device - return -1 - elif isinstance(input, torch.Tensor): - return input.get_device() if input.is_cuda else -1 - else: - raise Exception(f'Unknown type {type(input)}.') - - -class Scatter: - - @staticmethod - def forward(target_gpus, input): - input_device = get_input_device(input) - streams = None - if input_device == -1 and target_gpus != [-1]: - # Perform CPU to GPU copies in a background stream - streams = [_get_stream(device) for device in target_gpus] - - outputs = scatter(input, target_gpus, streams) - # Synchronize with the copy stream - if streams is not None: - synchronize_stream(outputs, target_gpus, streams) - +# Copyright (c) Open-MMLab. All rights reserved. +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch.nn.parallel._functions import _get_stream + + +def scatter(input, devices, streams=None): + """Scatters tensor across multiple GPUs.""" + if streams is None: + streams = [None] * len(devices) + + if isinstance(input, list): + chunk_size = (len(input) - 1) // len(devices) + 1 + outputs = [ + scatter(input[i], [devices[i // chunk_size]], + [streams[i // chunk_size]]) for i in range(len(input)) + ] + return outputs + elif isinstance(input, torch.Tensor): + output = input.contiguous() + # TODO: copy to a pinned buffer first (if copying from CPU) + stream = streams[0] if output.numel() > 0 else None + if devices != [-1]: + with torch.cuda.device(devices[0]), torch.cuda.stream(stream): + output = output.cuda(devices[0], non_blocking=True) + else: + pass + + return output + else: + raise Exception(f'Unknown type {type(input)}.') + + +def synchronize_stream(output, devices, streams): + if isinstance(output, list): + chunk_size = len(output) // len(devices) + for i in range(len(devices)): + for j in range(chunk_size): + synchronize_stream(output[i * chunk_size + j], [devices[i]], + [streams[i]]) + elif isinstance(output, torch.Tensor): + if output.numel() != 0: + with torch.cuda.device(devices[0]): + main_stream = torch.cuda.current_stream() + main_stream.wait_stream(streams[0]) + output.record_stream(main_stream) + else: + raise Exception(f'Unknown type {type(output)}.') + + +def get_input_device(input): + if isinstance(input, list): + for item in input: + input_device = get_input_device(item) + if input_device != -1: + return input_device + return -1 + elif isinstance(input, torch.Tensor): + return input.get_device() if input.is_cuda else -1 + else: + raise Exception(f'Unknown type {type(input)}.') + + +class Scatter: + + @staticmethod + def forward(target_gpus, input): + input_device = get_input_device(input) + streams = None + if input_device == -1 and target_gpus != [-1]: + # Perform CPU to GPU copies in a background stream + streams = [_get_stream(device) for device in target_gpus] + + outputs = scatter(input, target_gpus, streams) + # Synchronize with the copy stream + if streams is not None: + synchronize_stream(outputs, target_gpus, streams) + return tuple(outputs) \ No newline at end of file diff --git a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/builder.py b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/builder.py index f61945bc98a5d71aedf2e375edb393dfdbaf634d..0ae11773f3eb4d451af6e091e9f4ae158d7c15ca 100644 --- a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/builder.py +++ b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/builder.py @@ -1,69 +1,69 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import copy -import inspect - -import torch -import apex - -from ...utils import Registry, build_from_cfg - -OPTIMIZERS = Registry('optimizer') -OPTIMIZER_BUILDERS = Registry('optimizer builder') - - -def register_torch_optimizers(): - torch_optimizers = [] - for module_name in dir(torch.optim): - if module_name.startswith('__'): - continue - _optim = getattr(torch.optim, module_name) - if inspect.isclass(_optim) and issubclass(_optim, - torch.optim.Optimizer): - OPTIMIZERS.register_module()(_optim) - torch_optimizers.append(module_name) - - # add npu optimizer from apex - for module_name in dir(apex.optimizers): - if module_name.startswith('__'): - continue - _optim = getattr(apex.optimizers, module_name) - if inspect.isclass(_optim) and issubclass(_optim, - torch.optim.Optimizer): - OPTIMIZERS.register_module()(_optim) - torch_optimizers.append(module_name) - - return torch_optimizers - - -TORCH_OPTIMIZERS = register_torch_optimizers() - - -def build_optimizer_constructor(cfg): - return build_from_cfg(cfg, OPTIMIZER_BUILDERS) - - -def build_optimizer(model, cfg): - optimizer_cfg = copy.deepcopy(cfg) - constructor_type = optimizer_cfg.pop('constructor', - 'DefaultOptimizerConstructor') - paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) - optim_constructor = build_optimizer_constructor( - dict( - type=constructor_type, - optimizer_cfg=optimizer_cfg, - paramwise_cfg=paramwise_cfg)) - optimizer = optim_constructor(model) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import inspect + +import torch +import apex + +from ...utils import Registry, build_from_cfg + +OPTIMIZERS = Registry('optimizer') +OPTIMIZER_BUILDERS = Registry('optimizer builder') + + +def register_torch_optimizers(): + torch_optimizers = [] + for module_name in dir(torch.optim): + if module_name.startswith('__'): + continue + _optim = getattr(torch.optim, module_name) + if inspect.isclass(_optim) and issubclass(_optim, + torch.optim.Optimizer): + OPTIMIZERS.register_module()(_optim) + torch_optimizers.append(module_name) + + # add npu optimizer from apex + for module_name in dir(apex.optimizers): + if module_name.startswith('__'): + continue + _optim = getattr(apex.optimizers, module_name) + if inspect.isclass(_optim) and issubclass(_optim, + torch.optim.Optimizer): + OPTIMIZERS.register_module()(_optim) + torch_optimizers.append(module_name) + + return torch_optimizers + + +TORCH_OPTIMIZERS = register_torch_optimizers() + + +def build_optimizer_constructor(cfg): + return build_from_cfg(cfg, OPTIMIZER_BUILDERS) + + +def build_optimizer(model, cfg): + optimizer_cfg = copy.deepcopy(cfg) + constructor_type = optimizer_cfg.pop('constructor', + 'DefaultOptimizerConstructor') + paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) + optim_constructor = build_optimizer_constructor( + dict( + type=constructor_type, + optimizer_cfg=optimizer_cfg, + paramwise_cfg=paramwise_cfg)) + optimizer = optim_constructor(model) return optimizer \ No newline at end of file diff --git a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/data_parallel.py b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/data_parallel.py index c9d1e953e2a6d01c06479e570a78de95b8a511d2..b3b45fa9bcfc46be14056c2c2c088766bcb08951 100644 --- a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/data_parallel.py +++ b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/data_parallel.py @@ -1,104 +1,104 @@ -# Copyright (c) Open-MMLab. All rights reserved. -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from itertools import chain - -from torch.nn.parallel import DataParallel - -from .scatter_gather import scatter_kwargs - - -class MMDataParallel(DataParallel): - """The DataParallel module that supports DataContainer. - - MMDataParallel has two main differences with PyTorch DataParallel: - - - It supports a custom type :class:`DataContainer` which allows more - flexible control of input data during both GPU and CPU inference. - - It implement two more APIs ``train_step()`` and ``val_step()``. - - Args: - module (:class:`nn.Module`): Module to be encapsulated. - device_ids (list[int]): Device IDS of modules to be scattered to. - Defaults to None when GPU is not available. - output_device (str | int): Device ID for output. Defaults to None. - dim (int): Dimension used to scatter the data. Defaults to 0. - """ - - def __init__(self, *args, dim=0, **kwargs): - super(MMDataParallel, self).__init__(*args, dim=dim, **kwargs) - self.dim = dim - - def forward(self, *inputs, **kwargs): - """Override the original forward function. - - The main difference lies in the CPU inference where the datas in - :class:`DataContainers` will still be gathered. - """ - if not self.device_ids: - # We add the following line thus the module could gather and - # convert data containers as those in GPU inference - inputs, kwargs = self.scatter(inputs, kwargs, [-1]) - return self.module(*inputs[0], **kwargs[0]) - else: - return super().forward(*inputs, **kwargs) - - def scatter(self, inputs, kwargs, device_ids): - return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) - - def train_step(self, *inputs, **kwargs): - if not self.device_ids: - # We add the following line thus the module could gather and - # convert data containers as those in GPU inference - inputs, kwargs = self.scatter(inputs, kwargs, [-1]) - return self.module.train_step(*inputs[0], **kwargs[0]) - - assert len(self.device_ids) == 1, \ - ('MMDataParallel only supports single GPU training, if you need to' - ' train with multiple GPUs, please use MMDistributedDataParallel' - 'instead.') - - for t in chain(self.module.parameters(), self.module.buffers()): - if t.device != self.src_device_obj: - raise RuntimeError( - 'module must have its parameters and buffers ' - f'on device {self.src_device_obj} (device_ids[0]) but ' - f'found one of them on device: {t.device}') - - inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) - return self.module.train_step(*inputs[0], **kwargs[0]) - - def val_step(self, *inputs, **kwargs): - if not self.device_ids: - # We add the following line thus the module could gather and - # convert data containers as those in GPU inference - inputs, kwargs = self.scatter(inputs, kwargs, [-1]) - return self.module.val_step(*inputs, **kwargs) - - assert len(self.device_ids) == 1, \ - ('MMDataParallel only supports single GPU training, if you need to' - ' train with multiple GPUs, please use MMDistributedDataParallel' - ' instead.') - - for t in chain(self.module.parameters(), self.module.buffers()): - if t.device != self.src_device_obj: - raise RuntimeError( - 'module must have its parameters and buffers ' - f'on device {self.src_device_obj} (device_ids[0]) but ' - f'found one of them on device: {t.device}') - - inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) - return self.module.val_step(*inputs[0], **kwargs[0]) +# Copyright (c) Open-MMLab. All rights reserved. +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from itertools import chain + +from torch.nn.parallel import DataParallel + +from .scatter_gather import scatter_kwargs + + +class MMDataParallel(DataParallel): + """The DataParallel module that supports DataContainer. + + MMDataParallel has two main differences with PyTorch DataParallel: + + - It supports a custom type :class:`DataContainer` which allows more + flexible control of input data during both GPU and CPU inference. + - It implement two more APIs ``train_step()`` and ``val_step()``. + + Args: + module (:class:`nn.Module`): Module to be encapsulated. + device_ids (list[int]): Device IDS of modules to be scattered to. + Defaults to None when GPU is not available. + output_device (str | int): Device ID for output. Defaults to None. + dim (int): Dimension used to scatter the data. Defaults to 0. + """ + + def __init__(self, *args, dim=0, **kwargs): + super(MMDataParallel, self).__init__(*args, dim=dim, **kwargs) + self.dim = dim + + def forward(self, *inputs, **kwargs): + """Override the original forward function. + + The main difference lies in the CPU inference where the datas in + :class:`DataContainers` will still be gathered. + """ + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module(*inputs[0], **kwargs[0]) + else: + return super().forward(*inputs, **kwargs) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def train_step(self, *inputs, **kwargs): + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module.train_step(*inputs[0], **kwargs[0]) + + assert len(self.device_ids) == 1, \ + ('MMDataParallel only supports single GPU training, if you need to' + ' train with multiple GPUs, please use MMDistributedDataParallel' + 'instead.') + + for t in chain(self.module.parameters(), self.module.buffers()): + if t.device != self.src_device_obj: + raise RuntimeError( + 'module must have its parameters and buffers ' + f'on device {self.src_device_obj} (device_ids[0]) but ' + f'found one of them on device: {t.device}') + + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + return self.module.train_step(*inputs[0], **kwargs[0]) + + def val_step(self, *inputs, **kwargs): + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module.val_step(*inputs, **kwargs) + + assert len(self.device_ids) == 1, \ + ('MMDataParallel only supports single GPU training, if you need to' + ' train with multiple GPUs, please use MMDistributedDataParallel' + ' instead.') + + for t in chain(self.module.parameters(), self.module.buffers()): + if t.device != self.src_device_obj: + raise RuntimeError( + 'module must have its parameters and buffers ' + f'on device {self.src_device_obj} (device_ids[0]) but ' + f'found one of them on device: {t.device}') + + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + return self.module.val_step(*inputs[0], **kwargs[0]) diff --git a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/dist_utils.py b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/dist_utils.py index 091b646869a1ac678cd1f6886e8d61592e33fdfd..efe19f5018306dcb0ba419157d5b1b40cfa339b9 100644 --- a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/dist_utils.py +++ b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/dist_utils.py @@ -1,185 +1,185 @@ -# Copyright (c) Open-MMLab. All rights reserved. -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import functools -import os -import subprocess -from collections import OrderedDict - -import torch -import torch.multiprocessing as mp -from torch import distributed as dist -from torch._utils import (_flatten_dense_tensors, _take_tensors, - _unflatten_dense_tensors) - -from mmcv.utils import TORCH_VERSION - - -def init_dist(launcher, backend='nccl', **kwargs): - if mp.get_start_method(allow_none=True) is None: - mp.set_start_method('spawn') - if launcher == 'pytorch': - _init_dist_pytorch(backend, **kwargs) - elif launcher == 'mpi': - _init_dist_mpi(backend, **kwargs) - elif launcher == 'slurm': - _init_dist_slurm(backend, **kwargs) - else: - raise ValueError(f'Invalid launcher type: {launcher}') - - -def _init_dist_pytorch(backend, **kwargs): - # TODO: use local_rank instead of rank % num_gpus - rank = int(os.environ['RANK']) - offset = 0 if os.getenv('NPUID', None) is None else int(os.environ['NPUID']) - num_gpus = int(os.environ['RANK_SIZE']) - torch.npu.set_device((rank + offset) % num_gpus) - dist.init_process_group(backend=backend, world_size=num_gpus, rank=rank) - - -def _init_dist_mpi(backend, **kwargs): - # TODO: use local_rank instead of rank % num_gpus - rank = int(os.environ['OMPI_COMM_WORLD_RANK']) - num_gpus = torch.cuda.device_count() - torch.cuda.set_device(rank % num_gpus) - dist.init_process_group(backend=backend, **kwargs) - - -def _init_dist_slurm(backend, port=None): - """Initialize slurm distributed training environment. - If argument ``port`` is not specified, then the master port will be system - environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system - environment variable, then a default port ``29500`` will be used. - Args: - backend (str): Backend of torch.distributed. - port (int, optional): Master port. Defaults to None. - """ - proc_id = int(os.environ['SLURM_PROCID']) - ntasks = int(os.environ['SLURM_NTASKS']) - node_list = os.environ['SLURM_NODELIST'] - num_gpus = torch.cuda.device_count() - torch.cuda.set_device(proc_id % num_gpus) - addr = subprocess.getoutput( - f'scontrol show hostname {node_list} | head -n1') - # specify master port - if port is not None: - os.environ['MASTER_PORT'] = str(port) - elif 'MASTER_PORT' in os.environ: - pass # use MASTER_PORT in the environment variable - else: - # 29500 is torch.distributed default port - os.environ['MASTER_PORT'] = '29500' - # use MASTER_ADDR in the environment variable if it already exists - if 'MASTER_ADDR' not in os.environ: - os.environ['MASTER_ADDR'] = addr - os.environ['WORLD_SIZE'] = str(ntasks) - os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) - os.environ['RANK'] = str(proc_id) - dist.init_process_group(backend=backend) - - -def get_dist_info(): - if TORCH_VERSION < '1.0': - initialized = dist._initialized - else: - if dist.is_available(): - initialized = dist.is_initialized() - else: - initialized = False - if initialized: - rank = dist.get_rank() - world_size = dist.get_world_size() - else: - rank = 0 - world_size = 1 - return rank, world_size - - -def master_only(func): - - @functools.wraps(func) - def wrapper(*args, **kwargs): - rank, _ = get_dist_info() - if rank == 0: - return func(*args, **kwargs) - - return wrapper - - -def allreduce_params(params, coalesce=True, bucket_size_mb=-1): - """Allreduce parameters. - Args: - params (list[torch.Parameters]): List of parameters or buffers of a - model. - coalesce (bool, optional): Whether allreduce parameters as a whole. - Defaults to True. - bucket_size_mb (int, optional): Size of bucket, the unit is MB. - Defaults to -1. - """ - _, world_size = get_dist_info() - if world_size == 1: - return - params = [param.data for param in params] - if coalesce: - _allreduce_coalesced(params, world_size, bucket_size_mb) - else: - for tensor in params: - dist.all_reduce(tensor.div_(world_size)) - - -def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): - """Allreduce gradients. - Args: - params (list[torch.Parameters]): List of parameters of a model - coalesce (bool, optional): Whether allreduce parameters as a whole. - Defaults to True. - bucket_size_mb (int, optional): Size of bucket, the unit is MB. - Defaults to -1. - """ - grads = [ - param.grad.data for param in params - if param.requires_grad and param.grad is not None - ] - _, world_size = get_dist_info() - if world_size == 1: - return - if coalesce: - _allreduce_coalesced(grads, world_size, bucket_size_mb) - else: - for tensor in grads: - dist.all_reduce(tensor.div_(world_size)) - - -def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): - if bucket_size_mb > 0: - bucket_size_bytes = bucket_size_mb * 1024 * 1024 - buckets = _take_tensors(tensors, bucket_size_bytes) - else: - buckets = OrderedDict() - for tensor in tensors: - tp = tensor.type() - if tp not in buckets: - buckets[tp] = [] - buckets[tp].append(tensor) - buckets = buckets.values() - - for bucket in buckets: - flat_tensors = _flatten_dense_tensors(bucket) - dist.all_reduce(flat_tensors) - flat_tensors.div_(world_size) - for tensor, synced in zip( - bucket, _unflatten_dense_tensors(flat_tensors, bucket)): +# Copyright (c) Open-MMLab. All rights reserved. +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import os +import subprocess +from collections import OrderedDict + +import torch +import torch.multiprocessing as mp +from torch import distributed as dist +from torch._utils import (_flatten_dense_tensors, _take_tensors, + _unflatten_dense_tensors) + +from mmcv.utils import TORCH_VERSION + + +def init_dist(launcher, backend='nccl', **kwargs): + if mp.get_start_method(allow_none=True) is None: + mp.set_start_method('spawn') + if launcher == 'pytorch': + _init_dist_pytorch(backend, **kwargs) + elif launcher == 'mpi': + _init_dist_mpi(backend, **kwargs) + elif launcher == 'slurm': + _init_dist_slurm(backend, **kwargs) + else: + raise ValueError(f'Invalid launcher type: {launcher}') + + +def _init_dist_pytorch(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['RANK']) + offset = 0 if os.getenv('NPUID', None) is None else int(os.environ['NPUID']) + num_gpus = int(os.environ['RANK_SIZE']) + torch.npu.set_device((rank + offset) % num_gpus) + dist.init_process_group(backend=backend, world_size=num_gpus, rank=rank) + + +def _init_dist_mpi(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['OMPI_COMM_WORLD_RANK']) + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_slurm(backend, port=None): + """Initialize slurm distributed training environment. + If argument ``port`` is not specified, then the master port will be system + environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system + environment variable, then a default port ``29500`` will be used. + Args: + backend (str): Backend of torch.distributed. + port (int, optional): Master port. Defaults to None. + """ + proc_id = int(os.environ['SLURM_PROCID']) + ntasks = int(os.environ['SLURM_NTASKS']) + node_list = os.environ['SLURM_NODELIST'] + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(proc_id % num_gpus) + addr = subprocess.getoutput( + f'scontrol show hostname {node_list} | head -n1') + # specify master port + if port is not None: + os.environ['MASTER_PORT'] = str(port) + elif 'MASTER_PORT' in os.environ: + pass # use MASTER_PORT in the environment variable + else: + # 29500 is torch.distributed default port + os.environ['MASTER_PORT'] = '29500' + # use MASTER_ADDR in the environment variable if it already exists + if 'MASTER_ADDR' not in os.environ: + os.environ['MASTER_ADDR'] = addr + os.environ['WORLD_SIZE'] = str(ntasks) + os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) + os.environ['RANK'] = str(proc_id) + dist.init_process_group(backend=backend) + + +def get_dist_info(): + if TORCH_VERSION < '1.0': + initialized = dist._initialized + else: + if dist.is_available(): + initialized = dist.is_initialized() + else: + initialized = False + if initialized: + rank = dist.get_rank() + world_size = dist.get_world_size() + else: + rank = 0 + world_size = 1 + return rank, world_size + + +def master_only(func): + + @functools.wraps(func) + def wrapper(*args, **kwargs): + rank, _ = get_dist_info() + if rank == 0: + return func(*args, **kwargs) + + return wrapper + + +def allreduce_params(params, coalesce=True, bucket_size_mb=-1): + """Allreduce parameters. + Args: + params (list[torch.Parameters]): List of parameters or buffers of a + model. + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + _, world_size = get_dist_info() + if world_size == 1: + return + params = [param.data for param in params] + if coalesce: + _allreduce_coalesced(params, world_size, bucket_size_mb) + else: + for tensor in params: + dist.all_reduce(tensor.div_(world_size)) + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + """Allreduce gradients. + Args: + params (list[torch.Parameters]): List of parameters of a model + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + grads = [ + param.grad.data for param in params + if param.requires_grad and param.grad is not None + ] + _, world_size = get_dist_info() + if world_size == 1: + return + if coalesce: + _allreduce_coalesced(grads, world_size, bucket_size_mb) + else: + for tensor in grads: + dist.all_reduce(tensor.div_(world_size)) + + +def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): + if bucket_size_mb > 0: + bucket_size_bytes = bucket_size_mb * 1024 * 1024 + buckets = _take_tensors(tensors, bucket_size_bytes) + else: + buckets = OrderedDict() + for tensor in tensors: + tp = tensor.type() + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(tensor) + buckets = buckets.values() + + for bucket in buckets: + flat_tensors = _flatten_dense_tensors(bucket) + dist.all_reduce(flat_tensors) + flat_tensors.div_(world_size) + for tensor, synced in zip( + bucket, _unflatten_dense_tensors(flat_tensors, bucket)): tensor.copy_(synced) \ No newline at end of file diff --git a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/distributed.py b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/distributed.py index 86ca4b123eb718593037c0b2cd62cc19116c6f9b..1927a480731f6a0016d53a37ec2c64cb32da4a0a 100644 --- a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/distributed.py +++ b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/distributed.py @@ -1,119 +1,119 @@ -# Copyright (c) Open-MMLab. All rights reserved. -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from torch.nn.parallel.distributed import (DistributedDataParallel, - _find_tensors) - -from mmcv import print_log -from mmcv.utils import TORCH_VERSION -from .scatter_gather import scatter_kwargs - - -class MMDistributedDataParallel(DistributedDataParallel): - """The DDP module that supports DataContainer. - - MMDDP has two main differences with PyTorch DDP: - - - It supports a custom type :class:`DataContainer` which allows more - flexible control of input data. - - It implement two APIs ``train_step()`` and ``val_step()``. - """ - - def scatter(self, inputs, kwargs, device_ids): - return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) - - def train_step(self, *inputs, **kwargs): - """train_step() API for module wrapped by DistributedDataParallel. - - This method is basically the same as - ``DistributedDataParallel.forward()``, while replacing - ``self.module.forward()`` with ``self.module.train_step()``. - It is compatible with PyTorch 1.1 - 1.5. - """ - - # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the - # end of backward to the beginning of forward. - if (TORCH_VERSION >= '1.7' and 'parrots' - not in TORCH_VERSION) and self.reducer._rebuild_buckets(): - print_log( - 'Reducer buckets have been rebuilt in this iteration.', - logger='mmcv') - - if getattr(self, 'require_forward_param_sync', True): - self._sync_params() - if self.device_ids and False: - inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) - if len(self.device_ids) == 1: - output = self.module.train_step(*inputs[0], **kwargs[0]) - else: - outputs = self.parallel_apply( - self._module_copies[:len(inputs)], inputs, kwargs) - output = self.gather(outputs, self.output_device) - else: - inputs, kwargs = self.scatter(inputs, kwargs, [-1]) - output = self.module.train_step(*inputs[0], **kwargs[0]) - - if torch.is_grad_enabled() and getattr( - self, 'require_backward_grad_sync', True): - if self.find_unused_parameters: - self.reducer.prepare_for_backward(list(_find_tensors(output))) - else: - self.reducer.prepare_for_backward([]) - else: - if TORCH_VERSION > '1.2': - self.require_forward_param_sync = False - return output - - def val_step(self, *inputs, **kwargs): - """val_step() API for module wrapped by DistributedDataParallel. - - This method is basically the same as - ``DistributedDataParallel.forward()``, while replacing - ``self.module.forward()`` with ``self.module.val_step()``. - It is compatible with PyTorch 1.1 - 1.5. - """ - # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the - # end of backward to the beginning of forward. - if (TORCH_VERSION >= '1.7' and 'parrots' - not in TORCH_VERSION) and self.reducer._rebuild_buckets(): - print_log( - 'Reducer buckets have been rebuilt in this iteration.', - logger='mmcv') - - if getattr(self, 'require_forward_param_sync', True): - self._sync_params() - if self.device_ids: - inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) - if len(self.device_ids) == 1: - output = self.module.val_step(*inputs[0], **kwargs[0]) - else: - outputs = self.parallel_apply( - self._module_copies[:len(inputs)], inputs, kwargs) - output = self.gather(outputs, self.output_device) - else: - output = self.module.val_step(*inputs, **kwargs) - - if torch.is_grad_enabled() and getattr( - self, 'require_backward_grad_sync', True): - if self.find_unused_parameters: - self.reducer.prepare_for_backward(list(_find_tensors(output))) - else: - self.reducer.prepare_for_backward([]) - else: - if TORCH_VERSION > '1.2': - self.require_forward_param_sync = False - return output +# Copyright (c) Open-MMLab. All rights reserved. +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch.nn.parallel.distributed import (DistributedDataParallel, + _find_tensors) + +from mmcv import print_log +from mmcv.utils import TORCH_VERSION +from .scatter_gather import scatter_kwargs + + +class MMDistributedDataParallel(DistributedDataParallel): + """The DDP module that supports DataContainer. + + MMDDP has two main differences with PyTorch DDP: + + - It supports a custom type :class:`DataContainer` which allows more + flexible control of input data. + - It implement two APIs ``train_step()`` and ``val_step()``. + """ + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def train_step(self, *inputs, **kwargs): + """train_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.train_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + + # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the + # end of backward to the beginning of forward. + if (TORCH_VERSION >= '1.7' and 'parrots' + not in TORCH_VERSION) and self.reducer._rebuild_buckets(): + print_log( + 'Reducer buckets have been rebuilt in this iteration.', + logger='mmcv') + + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids and False: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.train_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply( + self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + output = self.module.train_step(*inputs[0], **kwargs[0]) + + if torch.is_grad_enabled() and getattr( + self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if TORCH_VERSION > '1.2': + self.require_forward_param_sync = False + return output + + def val_step(self, *inputs, **kwargs): + """val_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.val_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the + # end of backward to the beginning of forward. + if (TORCH_VERSION >= '1.7' and 'parrots' + not in TORCH_VERSION) and self.reducer._rebuild_buckets(): + print_log( + 'Reducer buckets have been rebuilt in this iteration.', + logger='mmcv') + + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.val_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply( + self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + output = self.module.val_step(*inputs, **kwargs) + + if torch.is_grad_enabled() and getattr( + self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if TORCH_VERSION > '1.2': + self.require_forward_param_sync = False + return output diff --git a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/optimizer.py b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/optimizer.py index 9653c4e11b87b1c69866cd3b08984d43305a4469..5b574f66deb66760069519b3d87c6c4164d49cd9 100644 --- a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/optimizer.py +++ b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/mmcv_need/optimizer.py @@ -1,162 +1,162 @@ -# Copyright (c) Open-MMLab. All rights reserved. -import copy -from collections import defaultdict -from itertools import chain - -from torch.nn.utils import clip_grad - -from ..dist_utils import allreduce_grads -from ..fp16_utils import LossScaler, wrap_fp16_model -from .hook import HOOKS, Hook -from apex import amp - -@HOOKS.register_module() -class OptimizerHook(Hook): - - def __init__(self, grad_clip=None): - self.grad_clip = grad_clip - - def clip_grads(self, params): - params = list( - filter(lambda p: p.requires_grad and p.grad is not None, params)) - if len(params) > 0: - return clip_grad.clip_grad_norm_(params, **self.grad_clip) - - def after_train_iter(self, runner): - runner.optimizer.zero_grad() - with amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss: - scaled_loss.backward() - if self.grad_clip is not None: - grad_norm = self.clip_grads(runner.model.parameters()) - if grad_norm is not None: - # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, - runner.outputs['num_samples']) - runner.optimizer.step() - - -@HOOKS.register_module() -class Fp16OptimizerHook(OptimizerHook): - """FP16 optimizer hook. - - The steps of fp16 optimizer is as follows. - 1. Scale the loss value. - 2. BP in the fp16 model. - 2. Copy gradients from fp16 model to fp32 weights. - 3. Update fp32 weights. - 4. Copy updated parameters from fp32 weights to fp16 model. - - Refer to https://arxiv.org/abs/1710.03740 for more details. - - Args: - loss_scale (float | str | dict): Scale factor multiplied with loss. - If loss_scale is a float, static loss scaling will be used with - the specified scale. If loss_scale is a string, it must be - 'dynamic', then dynamic loss scaling will be used. - It can also be a dict containing arguments of LossScaler. - Defaults to 512. - """ - - def __init__(self, - grad_clip=None, - coalesce=True, - bucket_size_mb=-1, - loss_scale=512., - distributed=True): - self.grad_clip = grad_clip - self.coalesce = coalesce - self.bucket_size_mb = bucket_size_mb - self.distributed = distributed - if loss_scale == 'dynamic': - self.loss_scaler = LossScaler(mode='dynamic') - elif isinstance(loss_scale, float): - self.loss_scaler = LossScaler(init_scale=loss_scale, mode='static') - elif isinstance(loss_scale, dict): - self.loss_scaler = LossScaler(**loss_scale) - else: - raise ValueError('loss_scale must be of type float, dict, or ' - f'"dynamic", got {loss_scale}') - - def before_run(self, runner): - """Preparing steps before Mixed Precision Training. - - 1. Make a master copy of fp32 weights for optimization. - 2. Convert the main model from fp32 to fp16. - """ - # keep a copy of fp32 weights - old_groups = runner.optimizer.param_groups - runner.optimizer.param_groups = copy.deepcopy( - runner.optimizer.param_groups) - state = defaultdict(dict) - p_map = { - old_p: p - for old_p, p in zip( - chain(*(g['params'] for g in old_groups)), - chain(*(g['params'] for g in runner.optimizer.param_groups))) - } - for k, v in runner.optimizer.state.items(): - state[p_map[k]] = v - runner.optimizer.state = state - # convert model to fp16 - wrap_fp16_model(runner.model) - - def copy_grads_to_fp32(self, fp16_net, fp32_weights): - """Copy gradients from fp16 model to fp32 weight copy.""" - for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): - if fp16_param.grad is not None: - if fp32_param.grad is None: - fp32_param.grad = fp32_param.data.new(fp32_param.size()) - fp32_param.grad.copy_(fp16_param.grad) - - def copy_params_to_fp16(self, fp16_net, fp32_weights): - """Copy updated params from fp32 weight copy to fp16 model.""" - for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): - fp16_param.data.copy_(fp32_param.data) - - def after_train_iter(self, runner): - """Backward optimization steps for Mixed Precision Training. For - dynamic loss scaling, please refer `loss_scalar.py` - - 1. Scale the loss by a scale factor. - 2. Backward the loss to obtain the gradients (fp16). - 3. Copy gradients from the model to the fp32 weight copy. - 4. Scale the gradients back and update the fp32 weight copy. - 5. Copy back the params from fp32 weight copy to the fp16 model. - """ - # clear grads of last iteration - runner.model.zero_grad() - runner.optimizer.zero_grad() - # scale the loss value - scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale - scaled_loss.backward() - # copy fp16 grads in the model to fp32 params in the optimizer - - fp32_weights = [] - for param_group in runner.optimizer.param_groups: - fp32_weights += param_group['params'] - self.copy_grads_to_fp32(runner.model, fp32_weights) - # allreduce grads - if self.distributed: - allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) - - has_overflow = self.loss_scaler.has_overflow(fp32_weights) - # if has overflow, skip this iteration - if not has_overflow: - # scale the gradients back - for param in fp32_weights: - if param.grad is not None: - param.grad.div_(self.loss_scaler.loss_scale) - if self.grad_clip is not None: - grad_norm = self.clip_grads(fp32_weights) - if grad_norm is not None: - # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, - runner.outputs['num_samples']) - # update fp32 params - runner.optimizer.step() - # copy fp32 params to the fp16 model - self.copy_params_to_fp16(runner.model, fp32_weights) - self.loss_scaler.update_scale(has_overflow) - if has_overflow: - runner.logger.warning('Check overflow, downscale loss scale ' - f'to {self.loss_scaler.cur_scale}') +# Copyright (c) Open-MMLab. All rights reserved. +import copy +from collections import defaultdict +from itertools import chain + +from torch.nn.utils import clip_grad + +from ..dist_utils import allreduce_grads +from ..fp16_utils import LossScaler, wrap_fp16_model +from .hook import HOOKS, Hook +from apex import amp + +@HOOKS.register_module() +class OptimizerHook(Hook): + + def __init__(self, grad_clip=None): + self.grad_clip = grad_clip + + def clip_grads(self, params): + params = list( + filter(lambda p: p.requires_grad and p.grad is not None, params)) + if len(params) > 0: + return clip_grad.clip_grad_norm_(params, **self.grad_clip) + + def after_train_iter(self, runner): + runner.optimizer.zero_grad() + with amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss: + scaled_loss.backward() + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + runner.optimizer.step() + + +@HOOKS.register_module() +class Fp16OptimizerHook(OptimizerHook): + """FP16 optimizer hook. + + The steps of fp16 optimizer is as follows. + 1. Scale the loss value. + 2. BP in the fp16 model. + 2. Copy gradients from fp16 model to fp32 weights. + 3. Update fp32 weights. + 4. Copy updated parameters from fp32 weights to fp16 model. + + Refer to https://arxiv.org/abs/1710.03740 for more details. + + Args: + loss_scale (float | str | dict): Scale factor multiplied with loss. + If loss_scale is a float, static loss scaling will be used with + the specified scale. If loss_scale is a string, it must be + 'dynamic', then dynamic loss scaling will be used. + It can also be a dict containing arguments of LossScaler. + Defaults to 512. + """ + + def __init__(self, + grad_clip=None, + coalesce=True, + bucket_size_mb=-1, + loss_scale=512., + distributed=True): + self.grad_clip = grad_clip + self.coalesce = coalesce + self.bucket_size_mb = bucket_size_mb + self.distributed = distributed + if loss_scale == 'dynamic': + self.loss_scaler = LossScaler(mode='dynamic') + elif isinstance(loss_scale, float): + self.loss_scaler = LossScaler(init_scale=loss_scale, mode='static') + elif isinstance(loss_scale, dict): + self.loss_scaler = LossScaler(**loss_scale) + else: + raise ValueError('loss_scale must be of type float, dict, or ' + f'"dynamic", got {loss_scale}') + + def before_run(self, runner): + """Preparing steps before Mixed Precision Training. + + 1. Make a master copy of fp32 weights for optimization. + 2. Convert the main model from fp32 to fp16. + """ + # keep a copy of fp32 weights + old_groups = runner.optimizer.param_groups + runner.optimizer.param_groups = copy.deepcopy( + runner.optimizer.param_groups) + state = defaultdict(dict) + p_map = { + old_p: p + for old_p, p in zip( + chain(*(g['params'] for g in old_groups)), + chain(*(g['params'] for g in runner.optimizer.param_groups))) + } + for k, v in runner.optimizer.state.items(): + state[p_map[k]] = v + runner.optimizer.state = state + # convert model to fp16 + wrap_fp16_model(runner.model) + + def copy_grads_to_fp32(self, fp16_net, fp32_weights): + """Copy gradients from fp16 model to fp32 weight copy.""" + for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): + if fp16_param.grad is not None: + if fp32_param.grad is None: + fp32_param.grad = fp32_param.data.new(fp32_param.size()) + fp32_param.grad.copy_(fp16_param.grad) + + def copy_params_to_fp16(self, fp16_net, fp32_weights): + """Copy updated params from fp32 weight copy to fp16 model.""" + for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): + fp16_param.data.copy_(fp32_param.data) + + def after_train_iter(self, runner): + """Backward optimization steps for Mixed Precision Training. For + dynamic loss scaling, please refer `loss_scalar.py` + + 1. Scale the loss by a scale factor. + 2. Backward the loss to obtain the gradients (fp16). + 3. Copy gradients from the model to the fp32 weight copy. + 4. Scale the gradients back and update the fp32 weight copy. + 5. Copy back the params from fp32 weight copy to the fp16 model. + """ + # clear grads of last iteration + runner.model.zero_grad() + runner.optimizer.zero_grad() + # scale the loss value + scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale + scaled_loss.backward() + # copy fp16 grads in the model to fp32 params in the optimizer + + fp32_weights = [] + for param_group in runner.optimizer.param_groups: + fp32_weights += param_group['params'] + self.copy_grads_to_fp32(runner.model, fp32_weights) + # allreduce grads + if self.distributed: + allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) + + has_overflow = self.loss_scaler.has_overflow(fp32_weights) + # if has overflow, skip this iteration + if not has_overflow: + # scale the gradients back + for param in fp32_weights: + if param.grad is not None: + param.grad.div_(self.loss_scaler.loss_scale) + if self.grad_clip is not None: + grad_norm = self.clip_grads(fp32_weights) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + # update fp32 params + runner.optimizer.step() + # copy fp32 params to the fp16 model + self.copy_params_to_fp16(runner.model, fp32_weights) + self.loss_scaler.update_scale(has_overflow) + if has_overflow: + runner.logger.warning('Check overflow, downscale loss scale ' + f'to {self.loss_scaler.cur_scale}') diff --git a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/modelzoo_level.txt b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/test/train_performance_8p.sh index 78e27659b85b98e87c06f94c769729ad29254fbe..8c64721a0f576429e27b422ce1a7e780506ff5fb 100644 --- a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/test/train_performance_8p.sh @@ -1,186 +1,186 @@ -#!/bin/bash -#export DYNAMIC_OP='ADD#MUL' -#export PYTHONPATH=/autotest/CI_daily/PyTorch_retinanet_precision:$PYTHONPATH -export LANG=en_US.UTF-8 - -path=$(python3 -c "import sys;print(sys.path[-1])") -python_path=$(echo $path | awk -F 'lib' '{print $1}') -chmod -R 777 $python_path - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 -#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下 -RANK_SIZE=8 -batch_size=64 -#RANK_TABLE_FILE=${cur_path}/../configs/rank_table_8p.json -RANK_ID_START=0 - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL=3 - -#基础参数 需要模型审视修改 -#网络名称,同目录名称 -Network="RetinaNet_ID0427_for_PyTorch" -#训练epoch -train_epochs=2 - -#TF2.X独有,不需要修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False -autotune=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_full_8p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is 0 - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --autotune whether to enable autotune, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --autotune* ]];then - autotune=`echo ${para#*=}` - export autotune=$autotune - mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak - mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak - autotune_dump_path=${cur_path}/output/autotune_dump - mkdir -p ${autotune_dump_path}/GA - mkdir -p ${autotune_dump_path}/rl - cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/ - cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --bind_core* ]]; then - bind_core=`echo ${para#*=}` - name_bind="_bindcore" - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#autotune时,先开启autotune执行单P训练,不需要修改 -if [[ $autotune == True ]]; then - train_full_1p.sh --autotune=$autotune --data_path=$data_path - wait - autotune=False - export autotune=$autotune -fi -mkdir -p $cur_path/../data -ln -snf $data_path/coco $cur_path/../data/ -cp train_retinanet_8p.sh $cur_path/../ - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#设置性能执行1个epoch -sed -i '/optimizer_config/a\total_epochs = 1\' $cur_path/../configs/retinanet/retinanet_r50_fpn_1x_coco.py - -#进入训练脚本目录 -cd $cur_path/../ -SIll=1 -for((RANK_ID=$RANK_ID_START;RANK_ID<$((SIll+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $RANK_ID" - export RANK_ID=$RANK_ID - export ASCEND_DEVICE_ID=$RANK_ID - ASCEND_DEVICE_ID=$RANK_ID - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - bash train_retinanet_8p.sh > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 - - #python3 ./tools/train.py configs/retinanet/retinanet_r50_fpn_1x_coco.py \ - # --launcher pytorch \ - # --cfg-options optimizer.lr=0.038\ - # --seed 0 \ - # --gpu-ids 0 \ - # --no-validate \ - # --opt-level O1 \ - # > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -done -wait - -#还原 -sed -i "s|total_epochs = 1| |g" $cur_path/../configs/retinanet/retinanet_r50_fpn_1x_coco.py - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) -echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据 -grep "time:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log > traintime.log -sed -i '1,10d' traintime.log -TrainingTime=`cat traintime.log | grep "time:" |awk '{sum+=$15} END {print sum/NR}'` -temp1=`echo "8 * ${batch_size}"|bc` -ActualFPS=`echo "scale=2;${temp1} / ${TrainingTime}"|bc` - -ActualLoss=`grep "loss:" traintime.log | awk 'END {print $23}'` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash +#export DYNAMIC_OP='ADD#MUL' +#export PYTHONPATH=/autotest/CI_daily/PyTorch_retinanet_precision:$PYTHONPATH +export LANG=en_US.UTF-8 + +path=$(python3 -c "import sys;print(sys.path[-1])") +python_path=$(echo $path | awk -F 'lib' '{print $1}') +chmod -R 777 $python_path + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下 +RANK_SIZE=8 +batch_size=64 +#RANK_TABLE_FILE=${cur_path}/../configs/rank_table_8p.json +RANK_ID_START=0 + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数 需要模型审视修改 +#网络名称,同目录名称 +Network="RetinaNet_ID0427_for_PyTorch" +#训练epoch +train_epochs=2 + +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_8p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is 0 + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --autotune whether to enable autotune, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --autotune* ]];then + autotune=`echo ${para#*=}` + export autotune=$autotune + mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak + mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak + autotune_dump_path=${cur_path}/output/autotune_dump + mkdir -p ${autotune_dump_path}/GA + mkdir -p ${autotune_dump_path}/rl + cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/ + cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/ + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --bind_core* ]]; then + bind_core=`echo ${para#*=}` + name_bind="_bindcore" + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#autotune时,先开启autotune执行单P训练,不需要修改 +if [[ $autotune == True ]]; then + train_full_1p.sh --autotune=$autotune --data_path=$data_path + wait + autotune=False + export autotune=$autotune +fi +mkdir -p $cur_path/../data +ln -snf $data_path/coco $cur_path/../data/ +cp train_retinanet_8p.sh $cur_path/../ + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#设置性能执行1个epoch +sed -i '/optimizer_config/a\total_epochs = 1\' $cur_path/../configs/retinanet/retinanet_r50_fpn_1x_coco.py + +#进入训练脚本目录 +cd $cur_path/../ +SIll=1 +for((RANK_ID=$RANK_ID_START;RANK_ID<$((SIll+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $RANK_ID" + export RANK_ID=$RANK_ID + export ASCEND_DEVICE_ID=$RANK_ID + ASCEND_DEVICE_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + bash train_retinanet_8p.sh > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 + + #python3 ./tools/train.py configs/retinanet/retinanet_r50_fpn_1x_coco.py \ + # --launcher pytorch \ + # --cfg-options optimizer.lr=0.038\ + # --seed 0 \ + # --gpu-ids 0 \ + # --no-validate \ + # --opt-level O1 \ + # > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#还原 +sed -i "s|total_epochs = 1| |g" $cur_path/../configs/retinanet/retinanet_r50_fpn_1x_coco.py + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据 +grep "time:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log > traintime.log +sed -i '1,10d' traintime.log +TrainingTime=`cat traintime.log | grep "time:" |awk '{sum+=$15} END {print sum/NR}'` +temp1=`echo "8 * ${batch_size}"|bc` +ActualFPS=`echo "scale=2;${temp1} / ${TrainingTime}"|bc` + +ActualLoss=`grep "loss:" traintime.log | awk 'END {print $23}'` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/test/train_retinanet_1p.sh b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/test/train_retinanet_1p.sh index e10b6ca629d50d75b00ac9139161b4d9abae7237..e78d4cc0ffdb69f0b5c542a11131ef333df3ad42 100644 --- a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/test/train_retinanet_1p.sh +++ b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/test/train_retinanet_1p.sh @@ -1,29 +1,29 @@ -#source pt_set_env.sh -rm -rf kernel_meta/ -/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error -/usr/local/Ascend/driver/tools/msnpureport -d 4 -g error -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest - -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -export ASCEND_GLOBAL_LOG_LEVEL=3 -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest -export PTCOPY_ENABLE=1 -export TASK_QUEUE_ENABLE=1 -export DYNAMIC_OP="ADD#MUL" -export COMBINED_ENABLE=1 -export DYNAMIC_COMPILE_ENABLE=0 -export EXPERIMENTAL_DYNAMIC_PARTITION=0 -export ASCEND_GLOBAL_EVENT_ENABLE=0 -#export HCCL_WHITELIST_DISABLE=1 -#export SCALAR_TO_HOST_MEM=1 -#export RANK_SIZE=8 -PORT=29500 ./tools/dist_train.sh configs/retinanet/retinanet_r50_fpn_1x_coco.py 1 --cfg-options optimizer.lr=0.005 --seed 0 --gpu-ids 0 --no-validate --opt-level O1 - - -#export RANK=0 -#python3.7 ./tools/train.py configs/retinanet/retinanet_r50_fpn_1x_coco.py \ -# --cfg-options \ -# optimizer.lr=0.005 \ -# --seed 0 \ -# --gpu-ids 0 \ +#source pt_set_env.sh +rm -rf kernel_meta/ +/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error +/usr/local/Ascend/driver/tools/msnpureport -d 4 -g error +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest + +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +export ASCEND_GLOBAL_LOG_LEVEL=3 +export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest +export PTCOPY_ENABLE=1 +export TASK_QUEUE_ENABLE=1 +export DYNAMIC_OP="ADD#MUL" +export COMBINED_ENABLE=1 +export DYNAMIC_COMPILE_ENABLE=0 +export EXPERIMENTAL_DYNAMIC_PARTITION=0 +export ASCEND_GLOBAL_EVENT_ENABLE=0 +#export HCCL_WHITELIST_DISABLE=1 +#export SCALAR_TO_HOST_MEM=1 +#export RANK_SIZE=8 +PORT=29500 ./tools/dist_train.sh configs/retinanet/retinanet_r50_fpn_1x_coco.py 1 --cfg-options optimizer.lr=0.005 --seed 0 --gpu-ids 0 --no-validate --opt-level O1 + + +#export RANK=0 +#python3.7 ./tools/train.py configs/retinanet/retinanet_r50_fpn_1x_coco.py \ +# --cfg-options \ +# optimizer.lr=0.005 \ +# --seed 0 \ +# --gpu-ids 0 \ # --opt-level O1 & \ No newline at end of file diff --git a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/test/train_retinanet_8p.sh b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/test/train_retinanet_8p.sh index 6bee47313b97dbe6a10541c7a93409942aedefe4..888349cb6dea18f1b6d365b4a2c0ad53b65ab2af 100644 --- a/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/test/train_retinanet_8p.sh +++ b/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch/test/train_retinanet_8p.sh @@ -1,47 +1,47 @@ -#source pt_set_env.sh -rm -rf kernel_meta/ -/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error -/usr/local/Ascend/driver/tools/msnpureport -d 4 -g error -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest - -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -export ASCEND_GLOBAL_LOG_LEVEL=3 -export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest -export PTCOPY_ENABLE=1 -export TASK_QUEUE_ENABLE=1 -export DYNAMIC_OP="ADD#MUL" -export COMBINED_ENABLE=1 -export DYNAMIC_COMPILE_ENABLE=0 -export EXPERIMENTAL_DYNAMIC_PARTITION=0 -export ASCEND_GLOBAL_EVENT_ENABLE=0 -#export HCCL_WHITELIST_DISABLE=1 -#export SCALAR_TO_HOST_MEM=1 -#export RANK_SIZE=8 -PORT=29500 ./tools/dist_train.sh configs/retinanet/retinanet_r50_fpn_1x_coco.py 8 --cfg-options optimizer.lr=0.04 --seed 0 --gpu-ids 0 --no-validate --opt-level O1 - -#KERNEL_NUM=$(($(nproc)/8)) -#for((RANK_ID=0;RANK_ID 1: - mp.spawn(train_8p, nprocs=config.npus, args=(config.npus, config)) - else: - print("config.npus should be greater than 1.") - raise RuntimeError - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - - - # model hyper-parameters - parser.add_argument('--image_size', type=int, default=224) - parser.add_argument('--t', type=int, default=3, help='t for Recurrent step of R2U_Net or R2AttU_Net') - - # training hyper-parameters - parser.add_argument('--img_ch', type=int, default=3) - parser.add_argument('--output_ch', type=int, default=1) - parser.add_argument('--num_epochs', type=int, default=100) - parser.add_argument('--num_epochs_decay', type=int, default=70) - parser.add_argument('--batch_size', type=int, default=16) - parser.add_argument('--num_workers', type=int, default=8*8) - parser.add_argument('--lr', type=float, default=0.0002*8) - parser.add_argument('--beta1', type=float, default=0.5) # momentum1 in Adam - parser.add_argument('--beta2', type=float, default=0.999) # momentum2 in Adam - parser.add_argument('--augmentation_prob', type=float, default=0.4) - - parser.add_argument('--log_step', type=int, default=2) - parser.add_argument('--val_step', type=int, default=2) - - # misc - parser.add_argument('--mode', type=str, default='train') - parser.add_argument('--model_type', type=str, default='U_Net', help='U_Net/R2U_Net/AttU_Net/R2AttU_Net') - parser.add_argument('--data_path', type=str, default='./dataset/') - parser.add_argument('--result_path', type=str, default='./result_8p') - - parser.add_argument('--npus', type=int, default=8) - parser.add_argument('--use_apex', type=int, default=1) - parser.add_argument('--apex_level', type=str, default="O2") - parser.add_argument('--loss_scale', type=float, default=128.) - parser.add_argument('--seed', type=int, default=12345) - parser.add_argument('--world_size', type=int, default=1) - parser.add_argument('--dist_backend', type=str, default="hccl") - parser.add_argument('--display_freq', type=int, default=-1) - - config = parser.parse_args() - main(config) +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import argparse +import os +import random +import numpy as np +import torch.multiprocessing as mp +import torch +from solver_8p import train_8p + +def seed_everything(seed): + random.seed(seed) + os.environ["PYTHONHASHSEED"] = str(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def main(config): + if config.model_type not in ['U_Net','R2U_Net','AttU_Net','R2AttU_Net']: + print('ERROR!! model_type should be selected in U_Net/R2U_Net/AttU_Net/R2AttU_Net') + print('Your input for model_type was %s'%config.model_type) + return + + # Create directories if not exist + config.result_path = os.path.join(config.result_path,config.model_type) + if not os.path.exists(config.result_path): + os.makedirs(config.result_path) + + config.train_path = os.path.join(config.data_path, "train") + config.valid_path = os.path.join(config.data_path, "valid") + print(config) + + seed_everything(config.seed) + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "29504" + if config.npus > 1: + mp.spawn(train_8p, nprocs=config.npus, args=(config.npus, config)) + else: + print("config.npus should be greater than 1.") + raise RuntimeError + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + + # model hyper-parameters + parser.add_argument('--image_size', type=int, default=224) + parser.add_argument('--t', type=int, default=3, help='t for Recurrent step of R2U_Net or R2AttU_Net') + + # training hyper-parameters + parser.add_argument('--img_ch', type=int, default=3) + parser.add_argument('--output_ch', type=int, default=1) + parser.add_argument('--num_epochs', type=int, default=100) + parser.add_argument('--num_epochs_decay', type=int, default=70) + parser.add_argument('--batch_size', type=int, default=16) + parser.add_argument('--num_workers', type=int, default=8*8) + parser.add_argument('--lr', type=float, default=0.0002*8) + parser.add_argument('--beta1', type=float, default=0.5) # momentum1 in Adam + parser.add_argument('--beta2', type=float, default=0.999) # momentum2 in Adam + parser.add_argument('--augmentation_prob', type=float, default=0.4) + + parser.add_argument('--log_step', type=int, default=2) + parser.add_argument('--val_step', type=int, default=2) + + # misc + parser.add_argument('--mode', type=str, default='train') + parser.add_argument('--model_type', type=str, default='U_Net', help='U_Net/R2U_Net/AttU_Net/R2AttU_Net') + parser.add_argument('--data_path', type=str, default='./dataset/') + parser.add_argument('--result_path', type=str, default='./result_8p') + + parser.add_argument('--npus', type=int, default=8) + parser.add_argument('--use_apex', type=int, default=1) + parser.add_argument('--apex_level', type=str, default="O2") + parser.add_argument('--loss_scale', type=float, default=128.) + parser.add_argument('--seed', type=int, default=12345) + parser.add_argument('--world_size', type=int, default=1) + parser.add_argument('--dist_backend', type=str, default="hccl") + parser.add_argument('--display_freq', type=int, default=-1) + + config = parser.parse_args() + main(config) diff --git a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/misc.py b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/misc.py index 8e4f3a65b1091a33d7042ce763ce8a295f322b02..73e81baaca39f450144a2c67ab1143678bdbed1c 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/misc.py +++ b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/misc.py @@ -1,50 +1,50 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█'): - """ - Call in a loop to create terminal progress bar - @params: - iteration - Required : current iteration (Int) - total - Required : total iterations (Int) - prefix - Optional : prefix string (Str) - suffix - Optional : suffix string (Str) - decimals - Optional : positive number of decimals in percent complete (Int) - length - Optional : character length of bar (Int) - fill - Optional : bar fill character (Str) - """ - percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) - filledLength = int(length * iteration // total) - bar = fill * filledLength + '-' * (length - filledLength) - print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r') - # Print New Line on Complete - if iteration == total: +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█'): + """ + Call in a loop to create terminal progress bar + @params: + iteration - Required : current iteration (Int) + total - Required : total iterations (Int) + prefix - Optional : prefix string (Str) + suffix - Optional : suffix string (Str) + decimals - Optional : positive number of decimals in percent complete (Int) + length - Optional : character length of bar (Int) + fill - Optional : bar fill character (Str) + """ + percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) + filledLength = int(length * iteration // total) + bar = fill * filledLength + '-' * (length - filledLength) + print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r') + # Print New Line on Complete + if iteration == total: print() \ No newline at end of file diff --git a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/modelzoo_level.txt b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/network.py b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/network.py index eca56f55532f569eb45ed1b2add27078c8d669ba..674e9bd583c37ecabd4eb4e4dae0ac81c0a8f7d8 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/network.py +++ b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/network.py @@ -1,456 +1,456 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.nn import init - -def init_weights(net, init_type='normal', gain=0.02): - def init_func(m): - classname = m.__class__.__name__ - if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): - if init_type == 'normal': - init.normal_(m.weight.data, 0.0, gain) - elif init_type == 'xavier': - init.xavier_normal_(m.weight.data, gain=gain) - elif init_type == 'kaiming': - init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') - elif init_type == 'orthogonal': - init.orthogonal_(m.weight.data, gain=gain) - else: - raise NotImplementedError('initialization method [%s] is not implemented' % init_type) - if hasattr(m, 'bias') and m.bias is not None: - init.constant_(m.bias.data, 0.0) - elif classname.find('BatchNorm2d') != -1: - init.normal_(m.weight.data, 1.0, gain) - init.constant_(m.bias.data, 0.0) - - print('initialize network with %s' % init_type) - net.apply(init_func) - -class conv_block(nn.Module): - def __init__(self,ch_in,ch_out): - super(conv_block,self).__init__() - self.conv = nn.Sequential( - nn.Conv2d(ch_in, ch_out, kernel_size=3,stride=1,padding=1,bias=True), - nn.BatchNorm2d(ch_out), - nn.ReLU(inplace=True), - nn.Conv2d(ch_out, ch_out, kernel_size=3,stride=1,padding=1,bias=True), - nn.BatchNorm2d(ch_out), - nn.ReLU(inplace=True) - ) - - - def forward(self,x): - x = self.conv(x) - return x - -class up_conv(nn.Module): - def __init__(self,ch_in,ch_out): - super(up_conv,self).__init__() - self.up = nn.Sequential( - nn.Upsample(scale_factor=2), - nn.Conv2d(ch_in,ch_out,kernel_size=3,stride=1,padding=1,bias=True), - nn.BatchNorm2d(ch_out), - nn.ReLU(inplace=True) - ) - - def forward(self,x): - x = self.up(x) - return x - -class Recurrent_block(nn.Module): - def __init__(self,ch_out,t=2): - super(Recurrent_block,self).__init__() - self.t = t - self.ch_out = ch_out - self.conv = nn.Sequential( - nn.Conv2d(ch_out,ch_out,kernel_size=3,stride=1,padding=1,bias=True), - nn.BatchNorm2d(ch_out), - nn.ReLU(inplace=True) - ) - - def forward(self,x): - for i in range(self.t): - - if i==0: - x1 = self.conv(x) - - x1 = self.conv(x+x1) - return x1 - -class RRCNN_block(nn.Module): - def __init__(self,ch_in,ch_out,t=2): - super(RRCNN_block,self).__init__() - self.RCNN = nn.Sequential( - Recurrent_block(ch_out,t=t), - Recurrent_block(ch_out,t=t) - ) - self.Conv_1x1 = nn.Conv2d(ch_in,ch_out,kernel_size=1,stride=1,padding=0) - - def forward(self,x): - x = self.Conv_1x1(x) - x1 = self.RCNN(x) - return x+x1 - - -class single_conv(nn.Module): - def __init__(self,ch_in,ch_out): - super(single_conv,self).__init__() - self.conv = nn.Sequential( - nn.Conv2d(ch_in, ch_out, kernel_size=3,stride=1,padding=1,bias=True), - nn.BatchNorm2d(ch_out), - nn.ReLU(inplace=True) - ) - - def forward(self,x): - x = self.conv(x) - return x - -class Attention_block(nn.Module): - def __init__(self,F_g,F_l,F_int): - super(Attention_block,self).__init__() - self.W_g = nn.Sequential( - nn.Conv2d(F_g, F_int, kernel_size=1,stride=1,padding=0,bias=True), - nn.BatchNorm2d(F_int) - ) - - self.W_x = nn.Sequential( - nn.Conv2d(F_l, F_int, kernel_size=1,stride=1,padding=0,bias=True), - nn.BatchNorm2d(F_int) - ) - - self.psi = nn.Sequential( - nn.Conv2d(F_int, 1, kernel_size=1,stride=1,padding=0,bias=True), - nn.BatchNorm2d(1), - nn.Sigmoid() - ) - - self.relu = nn.ReLU(inplace=True) - - def forward(self,g,x): - g1 = self.W_g(g) - x1 = self.W_x(x) - psi = self.relu(g1+x1) - psi = self.psi(psi) - - return x*psi - - -class U_Net(nn.Module): - def __init__(self,img_ch=3,output_ch=1): - super(U_Net,self).__init__() - - self.Maxpool = nn.MaxPool2d(kernel_size=2,stride=2) - - self.Conv1 = conv_block(ch_in=img_ch,ch_out=64) - self.Conv2 = conv_block(ch_in=64,ch_out=128) - self.Conv3 = conv_block(ch_in=128,ch_out=256) - self.Conv4 = conv_block(ch_in=256,ch_out=512) - self.Conv5 = conv_block(ch_in=512,ch_out=1024) - - self.Up5 = up_conv(ch_in=1024,ch_out=512) - self.Up_conv5 = conv_block(ch_in=1024, ch_out=512) - - self.Up4 = up_conv(ch_in=512,ch_out=256) - self.Up_conv4 = conv_block(ch_in=512, ch_out=256) - - self.Up3 = up_conv(ch_in=256,ch_out=128) - self.Up_conv3 = conv_block(ch_in=256, ch_out=128) - - self.Up2 = up_conv(ch_in=128,ch_out=64) - self.Up_conv2 = conv_block(ch_in=128, ch_out=64) - - self.Conv_1x1 = nn.Conv2d(64,output_ch,kernel_size=1,stride=1,padding=0) - - - def forward(self,x): - # encoding path - x1 = self.Conv1(x) - - x2 = self.Maxpool(x1) - x2 = self.Conv2(x2) - - x3 = self.Maxpool(x2) - x3 = self.Conv3(x3) - - x4 = self.Maxpool(x3) - x4 = self.Conv4(x4) - - x5 = self.Maxpool(x4) - x5 = self.Conv5(x5) - - # decoding + concat path - d5 = self.Up5(x5) - d5 = torch.cat((x4,d5),dim=1) - - d5 = self.Up_conv5(d5) - - d4 = self.Up4(d5) - d4 = torch.cat((x3,d4),dim=1) - d4 = self.Up_conv4(d4) - - d3 = self.Up3(d4) - d3 = torch.cat((x2,d3),dim=1) - d3 = self.Up_conv3(d3) - - d2 = self.Up2(d3) - d2 = torch.cat((x1,d2),dim=1) - d2 = self.Up_conv2(d2) - - d1 = self.Conv_1x1(d2) - - return d1 - - -class R2U_Net(nn.Module): - def __init__(self,img_ch=3,output_ch=1,t=2): - super(R2U_Net,self).__init__() - - self.Maxpool = nn.MaxPool2d(kernel_size=2,stride=2) - self.Upsample = nn.Upsample(scale_factor=2) - - self.RRCNN1 = RRCNN_block(ch_in=img_ch,ch_out=64,t=t) - - self.RRCNN2 = RRCNN_block(ch_in=64,ch_out=128,t=t) - - self.RRCNN3 = RRCNN_block(ch_in=128,ch_out=256,t=t) - - self.RRCNN4 = RRCNN_block(ch_in=256,ch_out=512,t=t) - - self.RRCNN5 = RRCNN_block(ch_in=512,ch_out=1024,t=t) - - - self.Up5 = up_conv(ch_in=1024,ch_out=512) - self.Up_RRCNN5 = RRCNN_block(ch_in=1024, ch_out=512,t=t) - - self.Up4 = up_conv(ch_in=512,ch_out=256) - self.Up_RRCNN4 = RRCNN_block(ch_in=512, ch_out=256,t=t) - - self.Up3 = up_conv(ch_in=256,ch_out=128) - self.Up_RRCNN3 = RRCNN_block(ch_in=256, ch_out=128,t=t) - - self.Up2 = up_conv(ch_in=128,ch_out=64) - self.Up_RRCNN2 = RRCNN_block(ch_in=128, ch_out=64,t=t) - - self.Conv_1x1 = nn.Conv2d(64,output_ch,kernel_size=1,stride=1,padding=0) - - - def forward(self,x): - # encoding path - x1 = self.RRCNN1(x) - - x2 = self.Maxpool(x1) - x2 = self.RRCNN2(x2) - - x3 = self.Maxpool(x2) - x3 = self.RRCNN3(x3) - - x4 = self.Maxpool(x3) - x4 = self.RRCNN4(x4) - - x5 = self.Maxpool(x4) - x5 = self.RRCNN5(x5) - - # decoding + concat path - d5 = self.Up5(x5) - d5 = torch.cat((x4,d5),dim=1) - d5 = self.Up_RRCNN5(d5) - - d4 = self.Up4(d5) - d4 = torch.cat((x3,d4),dim=1) - d4 = self.Up_RRCNN4(d4) - - d3 = self.Up3(d4) - d3 = torch.cat((x2,d3),dim=1) - d3 = self.Up_RRCNN3(d3) - - d2 = self.Up2(d3) - d2 = torch.cat((x1,d2),dim=1) - d2 = self.Up_RRCNN2(d2) - - d1 = self.Conv_1x1(d2) - - return d1 - - - -class AttU_Net(nn.Module): - def __init__(self,img_ch=3,output_ch=1): - super(AttU_Net,self).__init__() - - self.Maxpool = nn.MaxPool2d(kernel_size=2,stride=2) - - self.Conv1 = conv_block(ch_in=img_ch,ch_out=64) - self.Conv2 = conv_block(ch_in=64,ch_out=128) - self.Conv3 = conv_block(ch_in=128,ch_out=256) - self.Conv4 = conv_block(ch_in=256,ch_out=512) - self.Conv5 = conv_block(ch_in=512,ch_out=1024) - - self.Up5 = up_conv(ch_in=1024,ch_out=512) - self.Att5 = Attention_block(F_g=512,F_l=512,F_int=256) - self.Up_conv5 = conv_block(ch_in=1024, ch_out=512) - - self.Up4 = up_conv(ch_in=512,ch_out=256) - self.Att4 = Attention_block(F_g=256,F_l=256,F_int=128) - self.Up_conv4 = conv_block(ch_in=512, ch_out=256) - - self.Up3 = up_conv(ch_in=256,ch_out=128) - self.Att3 = Attention_block(F_g=128,F_l=128,F_int=64) - self.Up_conv3 = conv_block(ch_in=256, ch_out=128) - - self.Up2 = up_conv(ch_in=128,ch_out=64) - self.Att2 = Attention_block(F_g=64,F_l=64,F_int=32) - self.Up_conv2 = conv_block(ch_in=128, ch_out=64) - - self.Conv_1x1 = nn.Conv2d(64,output_ch,kernel_size=1,stride=1,padding=0) - - - def forward(self,x): - # encoding path - x1 = self.Conv1(x) - - x2 = self.Maxpool(x1) - x2 = self.Conv2(x2) - - x3 = self.Maxpool(x2) - x3 = self.Conv3(x3) - - x4 = self.Maxpool(x3) - x4 = self.Conv4(x4) - - x5 = self.Maxpool(x4) - x5 = self.Conv5(x5) - - # decoding + concat path - d5 = self.Up5(x5) - x4 = self.Att5(g=d5,x=x4) - d5 = torch.cat((x4,d5),dim=1) - d5 = self.Up_conv5(d5) - - d4 = self.Up4(d5) - x3 = self.Att4(g=d4,x=x3) - d4 = torch.cat((x3,d4),dim=1) - d4 = self.Up_conv4(d4) - - d3 = self.Up3(d4) - x2 = self.Att3(g=d3,x=x2) - d3 = torch.cat((x2,d3),dim=1) - d3 = self.Up_conv3(d3) - - d2 = self.Up2(d3) - x1 = self.Att2(g=d2,x=x1) - d2 = torch.cat((x1,d2),dim=1) - d2 = self.Up_conv2(d2) - - d1 = self.Conv_1x1(d2) - - return d1 - - -class R2AttU_Net(nn.Module): - def __init__(self,img_ch=3,output_ch=1,t=2): - super(R2AttU_Net,self).__init__() - - self.Maxpool = nn.MaxPool2d(kernel_size=2,stride=2) - self.Upsample = nn.Upsample(scale_factor=2) - - self.RRCNN1 = RRCNN_block(ch_in=img_ch,ch_out=64,t=t) - - self.RRCNN2 = RRCNN_block(ch_in=64,ch_out=128,t=t) - - self.RRCNN3 = RRCNN_block(ch_in=128,ch_out=256,t=t) - - self.RRCNN4 = RRCNN_block(ch_in=256,ch_out=512,t=t) - - self.RRCNN5 = RRCNN_block(ch_in=512,ch_out=1024,t=t) - - - self.Up5 = up_conv(ch_in=1024,ch_out=512) - self.Att5 = Attention_block(F_g=512,F_l=512,F_int=256) - self.Up_RRCNN5 = RRCNN_block(ch_in=1024, ch_out=512,t=t) - - self.Up4 = up_conv(ch_in=512,ch_out=256) - self.Att4 = Attention_block(F_g=256,F_l=256,F_int=128) - self.Up_RRCNN4 = RRCNN_block(ch_in=512, ch_out=256,t=t) - - self.Up3 = up_conv(ch_in=256,ch_out=128) - self.Att3 = Attention_block(F_g=128,F_l=128,F_int=64) - self.Up_RRCNN3 = RRCNN_block(ch_in=256, ch_out=128,t=t) - - self.Up2 = up_conv(ch_in=128,ch_out=64) - self.Att2 = Attention_block(F_g=64,F_l=64,F_int=32) - self.Up_RRCNN2 = RRCNN_block(ch_in=128, ch_out=64,t=t) - - self.Conv_1x1 = nn.Conv2d(64,output_ch,kernel_size=1,stride=1,padding=0) - - - def forward(self,x): - # encoding path - x1 = self.RRCNN1(x) - - x2 = self.Maxpool(x1) - x2 = self.RRCNN2(x2) - - x3 = self.Maxpool(x2) - x3 = self.RRCNN3(x3) - - x4 = self.Maxpool(x3) - x4 = self.RRCNN4(x4) - - x5 = self.Maxpool(x4) - x5 = self.RRCNN5(x5) - - # decoding + concat path - d5 = self.Up5(x5) - x4 = self.Att5(g=d5,x=x4) - d5 = torch.cat((x4,d5),dim=1) - d5 = self.Up_RRCNN5(d5) - - d4 = self.Up4(d5) - x3 = self.Att4(g=d4,x=x3) - d4 = torch.cat((x3,d4),dim=1) - d4 = self.Up_RRCNN4(d4) - - d3 = self.Up3(d4) - x2 = self.Att3(g=d3,x=x2) - d3 = torch.cat((x2,d3),dim=1) - d3 = self.Up_RRCNN3(d3) - - d2 = self.Up2(d3) - x1 = self.Att2(g=d2,x=x1) - d2 = torch.cat((x1,d2),dim=1) - d2 = self.Up_RRCNN2(d2) - - d1 = self.Conv_1x1(d2) - - return d1 +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import init + +def init_weights(net, init_type='normal', gain=0.02): + def init_func(m): + classname = m.__class__.__name__ + if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): + if init_type == 'normal': + init.normal_(m.weight.data, 0.0, gain) + elif init_type == 'xavier': + init.xavier_normal_(m.weight.data, gain=gain) + elif init_type == 'kaiming': + init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + elif init_type == 'orthogonal': + init.orthogonal_(m.weight.data, gain=gain) + else: + raise NotImplementedError('initialization method [%s] is not implemented' % init_type) + if hasattr(m, 'bias') and m.bias is not None: + init.constant_(m.bias.data, 0.0) + elif classname.find('BatchNorm2d') != -1: + init.normal_(m.weight.data, 1.0, gain) + init.constant_(m.bias.data, 0.0) + + print('initialize network with %s' % init_type) + net.apply(init_func) + +class conv_block(nn.Module): + def __init__(self,ch_in,ch_out): + super(conv_block,self).__init__() + self.conv = nn.Sequential( + nn.Conv2d(ch_in, ch_out, kernel_size=3,stride=1,padding=1,bias=True), + nn.BatchNorm2d(ch_out), + nn.ReLU(inplace=True), + nn.Conv2d(ch_out, ch_out, kernel_size=3,stride=1,padding=1,bias=True), + nn.BatchNorm2d(ch_out), + nn.ReLU(inplace=True) + ) + + + def forward(self,x): + x = self.conv(x) + return x + +class up_conv(nn.Module): + def __init__(self,ch_in,ch_out): + super(up_conv,self).__init__() + self.up = nn.Sequential( + nn.Upsample(scale_factor=2), + nn.Conv2d(ch_in,ch_out,kernel_size=3,stride=1,padding=1,bias=True), + nn.BatchNorm2d(ch_out), + nn.ReLU(inplace=True) + ) + + def forward(self,x): + x = self.up(x) + return x + +class Recurrent_block(nn.Module): + def __init__(self,ch_out,t=2): + super(Recurrent_block,self).__init__() + self.t = t + self.ch_out = ch_out + self.conv = nn.Sequential( + nn.Conv2d(ch_out,ch_out,kernel_size=3,stride=1,padding=1,bias=True), + nn.BatchNorm2d(ch_out), + nn.ReLU(inplace=True) + ) + + def forward(self,x): + for i in range(self.t): + + if i==0: + x1 = self.conv(x) + + x1 = self.conv(x+x1) + return x1 + +class RRCNN_block(nn.Module): + def __init__(self,ch_in,ch_out,t=2): + super(RRCNN_block,self).__init__() + self.RCNN = nn.Sequential( + Recurrent_block(ch_out,t=t), + Recurrent_block(ch_out,t=t) + ) + self.Conv_1x1 = nn.Conv2d(ch_in,ch_out,kernel_size=1,stride=1,padding=0) + + def forward(self,x): + x = self.Conv_1x1(x) + x1 = self.RCNN(x) + return x+x1 + + +class single_conv(nn.Module): + def __init__(self,ch_in,ch_out): + super(single_conv,self).__init__() + self.conv = nn.Sequential( + nn.Conv2d(ch_in, ch_out, kernel_size=3,stride=1,padding=1,bias=True), + nn.BatchNorm2d(ch_out), + nn.ReLU(inplace=True) + ) + + def forward(self,x): + x = self.conv(x) + return x + +class Attention_block(nn.Module): + def __init__(self,F_g,F_l,F_int): + super(Attention_block,self).__init__() + self.W_g = nn.Sequential( + nn.Conv2d(F_g, F_int, kernel_size=1,stride=1,padding=0,bias=True), + nn.BatchNorm2d(F_int) + ) + + self.W_x = nn.Sequential( + nn.Conv2d(F_l, F_int, kernel_size=1,stride=1,padding=0,bias=True), + nn.BatchNorm2d(F_int) + ) + + self.psi = nn.Sequential( + nn.Conv2d(F_int, 1, kernel_size=1,stride=1,padding=0,bias=True), + nn.BatchNorm2d(1), + nn.Sigmoid() + ) + + self.relu = nn.ReLU(inplace=True) + + def forward(self,g,x): + g1 = self.W_g(g) + x1 = self.W_x(x) + psi = self.relu(g1+x1) + psi = self.psi(psi) + + return x*psi + + +class U_Net(nn.Module): + def __init__(self,img_ch=3,output_ch=1): + super(U_Net,self).__init__() + + self.Maxpool = nn.MaxPool2d(kernel_size=2,stride=2) + + self.Conv1 = conv_block(ch_in=img_ch,ch_out=64) + self.Conv2 = conv_block(ch_in=64,ch_out=128) + self.Conv3 = conv_block(ch_in=128,ch_out=256) + self.Conv4 = conv_block(ch_in=256,ch_out=512) + self.Conv5 = conv_block(ch_in=512,ch_out=1024) + + self.Up5 = up_conv(ch_in=1024,ch_out=512) + self.Up_conv5 = conv_block(ch_in=1024, ch_out=512) + + self.Up4 = up_conv(ch_in=512,ch_out=256) + self.Up_conv4 = conv_block(ch_in=512, ch_out=256) + + self.Up3 = up_conv(ch_in=256,ch_out=128) + self.Up_conv3 = conv_block(ch_in=256, ch_out=128) + + self.Up2 = up_conv(ch_in=128,ch_out=64) + self.Up_conv2 = conv_block(ch_in=128, ch_out=64) + + self.Conv_1x1 = nn.Conv2d(64,output_ch,kernel_size=1,stride=1,padding=0) + + + def forward(self,x): + # encoding path + x1 = self.Conv1(x) + + x2 = self.Maxpool(x1) + x2 = self.Conv2(x2) + + x3 = self.Maxpool(x2) + x3 = self.Conv3(x3) + + x4 = self.Maxpool(x3) + x4 = self.Conv4(x4) + + x5 = self.Maxpool(x4) + x5 = self.Conv5(x5) + + # decoding + concat path + d5 = self.Up5(x5) + d5 = torch.cat((x4,d5),dim=1) + + d5 = self.Up_conv5(d5) + + d4 = self.Up4(d5) + d4 = torch.cat((x3,d4),dim=1) + d4 = self.Up_conv4(d4) + + d3 = self.Up3(d4) + d3 = torch.cat((x2,d3),dim=1) + d3 = self.Up_conv3(d3) + + d2 = self.Up2(d3) + d2 = torch.cat((x1,d2),dim=1) + d2 = self.Up_conv2(d2) + + d1 = self.Conv_1x1(d2) + + return d1 + + +class R2U_Net(nn.Module): + def __init__(self,img_ch=3,output_ch=1,t=2): + super(R2U_Net,self).__init__() + + self.Maxpool = nn.MaxPool2d(kernel_size=2,stride=2) + self.Upsample = nn.Upsample(scale_factor=2) + + self.RRCNN1 = RRCNN_block(ch_in=img_ch,ch_out=64,t=t) + + self.RRCNN2 = RRCNN_block(ch_in=64,ch_out=128,t=t) + + self.RRCNN3 = RRCNN_block(ch_in=128,ch_out=256,t=t) + + self.RRCNN4 = RRCNN_block(ch_in=256,ch_out=512,t=t) + + self.RRCNN5 = RRCNN_block(ch_in=512,ch_out=1024,t=t) + + + self.Up5 = up_conv(ch_in=1024,ch_out=512) + self.Up_RRCNN5 = RRCNN_block(ch_in=1024, ch_out=512,t=t) + + self.Up4 = up_conv(ch_in=512,ch_out=256) + self.Up_RRCNN4 = RRCNN_block(ch_in=512, ch_out=256,t=t) + + self.Up3 = up_conv(ch_in=256,ch_out=128) + self.Up_RRCNN3 = RRCNN_block(ch_in=256, ch_out=128,t=t) + + self.Up2 = up_conv(ch_in=128,ch_out=64) + self.Up_RRCNN2 = RRCNN_block(ch_in=128, ch_out=64,t=t) + + self.Conv_1x1 = nn.Conv2d(64,output_ch,kernel_size=1,stride=1,padding=0) + + + def forward(self,x): + # encoding path + x1 = self.RRCNN1(x) + + x2 = self.Maxpool(x1) + x2 = self.RRCNN2(x2) + + x3 = self.Maxpool(x2) + x3 = self.RRCNN3(x3) + + x4 = self.Maxpool(x3) + x4 = self.RRCNN4(x4) + + x5 = self.Maxpool(x4) + x5 = self.RRCNN5(x5) + + # decoding + concat path + d5 = self.Up5(x5) + d5 = torch.cat((x4,d5),dim=1) + d5 = self.Up_RRCNN5(d5) + + d4 = self.Up4(d5) + d4 = torch.cat((x3,d4),dim=1) + d4 = self.Up_RRCNN4(d4) + + d3 = self.Up3(d4) + d3 = torch.cat((x2,d3),dim=1) + d3 = self.Up_RRCNN3(d3) + + d2 = self.Up2(d3) + d2 = torch.cat((x1,d2),dim=1) + d2 = self.Up_RRCNN2(d2) + + d1 = self.Conv_1x1(d2) + + return d1 + + + +class AttU_Net(nn.Module): + def __init__(self,img_ch=3,output_ch=1): + super(AttU_Net,self).__init__() + + self.Maxpool = nn.MaxPool2d(kernel_size=2,stride=2) + + self.Conv1 = conv_block(ch_in=img_ch,ch_out=64) + self.Conv2 = conv_block(ch_in=64,ch_out=128) + self.Conv3 = conv_block(ch_in=128,ch_out=256) + self.Conv4 = conv_block(ch_in=256,ch_out=512) + self.Conv5 = conv_block(ch_in=512,ch_out=1024) + + self.Up5 = up_conv(ch_in=1024,ch_out=512) + self.Att5 = Attention_block(F_g=512,F_l=512,F_int=256) + self.Up_conv5 = conv_block(ch_in=1024, ch_out=512) + + self.Up4 = up_conv(ch_in=512,ch_out=256) + self.Att4 = Attention_block(F_g=256,F_l=256,F_int=128) + self.Up_conv4 = conv_block(ch_in=512, ch_out=256) + + self.Up3 = up_conv(ch_in=256,ch_out=128) + self.Att3 = Attention_block(F_g=128,F_l=128,F_int=64) + self.Up_conv3 = conv_block(ch_in=256, ch_out=128) + + self.Up2 = up_conv(ch_in=128,ch_out=64) + self.Att2 = Attention_block(F_g=64,F_l=64,F_int=32) + self.Up_conv2 = conv_block(ch_in=128, ch_out=64) + + self.Conv_1x1 = nn.Conv2d(64,output_ch,kernel_size=1,stride=1,padding=0) + + + def forward(self,x): + # encoding path + x1 = self.Conv1(x) + + x2 = self.Maxpool(x1) + x2 = self.Conv2(x2) + + x3 = self.Maxpool(x2) + x3 = self.Conv3(x3) + + x4 = self.Maxpool(x3) + x4 = self.Conv4(x4) + + x5 = self.Maxpool(x4) + x5 = self.Conv5(x5) + + # decoding + concat path + d5 = self.Up5(x5) + x4 = self.Att5(g=d5,x=x4) + d5 = torch.cat((x4,d5),dim=1) + d5 = self.Up_conv5(d5) + + d4 = self.Up4(d5) + x3 = self.Att4(g=d4,x=x3) + d4 = torch.cat((x3,d4),dim=1) + d4 = self.Up_conv4(d4) + + d3 = self.Up3(d4) + x2 = self.Att3(g=d3,x=x2) + d3 = torch.cat((x2,d3),dim=1) + d3 = self.Up_conv3(d3) + + d2 = self.Up2(d3) + x1 = self.Att2(g=d2,x=x1) + d2 = torch.cat((x1,d2),dim=1) + d2 = self.Up_conv2(d2) + + d1 = self.Conv_1x1(d2) + + return d1 + + +class R2AttU_Net(nn.Module): + def __init__(self,img_ch=3,output_ch=1,t=2): + super(R2AttU_Net,self).__init__() + + self.Maxpool = nn.MaxPool2d(kernel_size=2,stride=2) + self.Upsample = nn.Upsample(scale_factor=2) + + self.RRCNN1 = RRCNN_block(ch_in=img_ch,ch_out=64,t=t) + + self.RRCNN2 = RRCNN_block(ch_in=64,ch_out=128,t=t) + + self.RRCNN3 = RRCNN_block(ch_in=128,ch_out=256,t=t) + + self.RRCNN4 = RRCNN_block(ch_in=256,ch_out=512,t=t) + + self.RRCNN5 = RRCNN_block(ch_in=512,ch_out=1024,t=t) + + + self.Up5 = up_conv(ch_in=1024,ch_out=512) + self.Att5 = Attention_block(F_g=512,F_l=512,F_int=256) + self.Up_RRCNN5 = RRCNN_block(ch_in=1024, ch_out=512,t=t) + + self.Up4 = up_conv(ch_in=512,ch_out=256) + self.Att4 = Attention_block(F_g=256,F_l=256,F_int=128) + self.Up_RRCNN4 = RRCNN_block(ch_in=512, ch_out=256,t=t) + + self.Up3 = up_conv(ch_in=256,ch_out=128) + self.Att3 = Attention_block(F_g=128,F_l=128,F_int=64) + self.Up_RRCNN3 = RRCNN_block(ch_in=256, ch_out=128,t=t) + + self.Up2 = up_conv(ch_in=128,ch_out=64) + self.Att2 = Attention_block(F_g=64,F_l=64,F_int=32) + self.Up_RRCNN2 = RRCNN_block(ch_in=128, ch_out=64,t=t) + + self.Conv_1x1 = nn.Conv2d(64,output_ch,kernel_size=1,stride=1,padding=0) + + + def forward(self,x): + # encoding path + x1 = self.RRCNN1(x) + + x2 = self.Maxpool(x1) + x2 = self.RRCNN2(x2) + + x3 = self.Maxpool(x2) + x3 = self.RRCNN3(x3) + + x4 = self.Maxpool(x3) + x4 = self.RRCNN4(x4) + + x5 = self.Maxpool(x4) + x5 = self.RRCNN5(x5) + + # decoding + concat path + d5 = self.Up5(x5) + x4 = self.Att5(g=d5,x=x4) + d5 = torch.cat((x4,d5),dim=1) + d5 = self.Up_RRCNN5(d5) + + d4 = self.Up4(d5) + x3 = self.Att4(g=d4,x=x3) + d4 = torch.cat((x3,d4),dim=1) + d4 = self.Up_RRCNN4(d4) + + d3 = self.Up3(d4) + x2 = self.Att3(g=d3,x=x2) + d3 = torch.cat((x2,d3),dim=1) + d3 = self.Up_RRCNN3(d3) + + d2 = self.Up2(d3) + x1 = self.Att2(g=d2,x=x1) + d2 = torch.cat((x1,d2),dim=1) + d2 = self.Up_RRCNN2(d2) + + d1 = self.Conv_1x1(d2) + + return d1 diff --git a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/pthtar2onx.py b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/pthtar2onx.py index ab023b10b422adaafd95bbb88cc2cd4f2cddbfb3..d77aeac03f457f9369df881872fa35a43fcd1fb2 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/pthtar2onx.py +++ b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/pthtar2onx.py @@ -1,55 +1,55 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch -import torch.onnx - -from collections import OrderedDict -from network import R2AttU_Net - - -def proc_nodes_module(checkpoint): - new_state_dict = OrderedDict() - for k, v in checkpoint.items(): - if (k[0:7] == "module."): - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - - -def convert(pth_file_path, onnx_file_path): - model_checkpoint = torch.load(pth_file_path, map_location='cpu') - - if list(model_checkpoint.keys())[0].startswith("module."): - model_checkpoint = proc_nodes_module(model_checkpoint) - - model = R2AttU_Net(img_ch=3,output_ch=1,t=2) - model.load_state_dict(model_checkpoint) - model.eval() - print(model) - - input_names = ["actual_input_1"] - output_names = ["output1"] - dummy_input = torch.randn(4, 3, 224, 224) - torch.onnx.export(model, dummy_input, onnx_file_path, input_names=input_names, output_names=output_names, - opset_version=11) - - -if __name__ == "__main__": - src_file_path = "checkpoint.pkl" - dst_file_path = "R2AttU_Net.onnx" +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch +import torch.onnx + +from collections import OrderedDict +from network import R2AttU_Net + + +def proc_nodes_module(checkpoint): + new_state_dict = OrderedDict() + for k, v in checkpoint.items(): + if (k[0:7] == "module."): + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + + +def convert(pth_file_path, onnx_file_path): + model_checkpoint = torch.load(pth_file_path, map_location='cpu') + + if list(model_checkpoint.keys())[0].startswith("module."): + model_checkpoint = proc_nodes_module(model_checkpoint) + + model = R2AttU_Net(img_ch=3,output_ch=1,t=2) + model.load_state_dict(model_checkpoint) + model.eval() + print(model) + + input_names = ["actual_input_1"] + output_names = ["output1"] + dummy_input = torch.randn(4, 3, 224, 224) + torch.onnx.export(model, dummy_input, onnx_file_path, input_names=input_names, output_names=output_names, + opset_version=11) + + +if __name__ == "__main__": + src_file_path = "checkpoint.pkl" + dst_file_path = "R2AttU_Net.onnx" convert(src_file_path, dst_file_path) \ No newline at end of file diff --git a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/solver_1p.py b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/solver_1p.py index 77b6f1d583d6b8daf45c85b8c97ee6a183832882..196e4e44c3018df07882c4de1ba7ff7da1b5e6f4 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/solver_1p.py +++ b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/solver_1p.py @@ -1,355 +1,355 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import os -import numpy as np -import time -import datetime -import torch -# import torchvision -from torch import optim -from torch.autograd import Variable -import torch.nn.functional as F -from evaluation import * -from network import U_Net,R2U_Net,AttU_Net,R2AttU_Net -from apex import amp -import apex - -class Solver(object): - def __init__(self, config, train_loader, valid_loader, test_loader): - - # Data loader - self.train_loader = train_loader - self.valid_loader = valid_loader - self.test_loader = test_loader - - # Models - self.unet = None - self.optimizer = None - self.img_ch = config.img_ch - self.output_ch = config.output_ch - self.criterion = torch.nn.BCELoss() - self.augmentation_prob = config.augmentation_prob - - # Hyper-parameters - self.lr = config.lr - self.beta1 = config.beta1 - self.beta2 = config.beta2 - - # Training settings - self.num_epochs = config.num_epochs - self.num_epochs_decay = config.num_epochs_decay - self.batch_size = config.batch_size - self.use_apex = config.use_apex - self.apex_level = config.apex_level - self.loss_scale = config.loss_scale - - # Step size - self.log_step = config.log_step - self.val_step = config.val_step - - # Path - self.result_path = config.result_path - self.mode = config.mode - - self.device = torch.device('npu' if torch.npu.is_available() else 'cpu') - self.model_type = config.model_type - self.test_model_path = config.test_model_path - self.t = config.t - self.pretrain = config.pretrain - self.pretrain_path = config.pretrain_path - self.build_model() - - def build_model(self): - """Build generator and discriminator.""" - if self.model_type =='U_Net': - self.unet = U_Net(img_ch=3,output_ch=1) - elif self.model_type =='R2U_Net': - self.unet = R2U_Net(img_ch=3,output_ch=1,t=self.t) - elif self.model_type =='AttU_Net': - self.unet = AttU_Net(img_ch=3,output_ch=1) - elif self.model_type == 'R2AttU_Net': - self.unet = R2AttU_Net(img_ch=3,output_ch=1,t=self.t) - - if self.pretrain: - self.unet.load_state_dict(torch.load(self.pretrain_path, map_location="cpu"), strict=False) - - self.unet.to(self.device) - if self.mode == "test": - return - # self.optimizer = optim.Adam(list(self.unet.parameters()), - # self.lr, [self.beta1, self.beta2]) - self.optimizer = apex.optimizers.NpuFusedAdam(list(self.unet.parameters()), - self.lr, [self.beta1, self.beta2]) - if self.use_apex: - self.unet, self.optimizer = amp.initialize(self.unet, self.optimizer, - opt_level=self.apex_level,loss_scale=self.loss_scale, combine_grad=True) - - # self.print_network(self.unet, self.model_type) - - def print_network(self, model, name): - """Print out the network information.""" - num_params = 0 - for p in model.parameters(): - num_params += p.numel() - print(model) - print(name) - print("The number of parameters: {}".format(num_params)) - - def to_data(self, x): - """Convert variable to tensor.""" - if torch.npu.is_available(): - x = x.cpu() - return x.data - - def update_lr(self, g_lr, d_lr): - for param_group in self.optimizer.param_groups: - param_group['lr'] = lr - - def reset_grad(self): - """Zero the gradient buffers.""" - self.unet.zero_grad() - - def compute_accuracy(self,SR,GT): - SR_flat = SR.view(-1) - GT_flat = GT.view(-1) - - acc = GT_flat.data.cpu()==(SR_flat.data.cpu()>0.5) - - def tensor2img(self,x): - img = (x[:,0,:,:]>x[:,1,:,:]).float() - img = img*255 - return img - - - def train(self): - """Train encoder, generator and discriminator.""" - - #====================================== Training ===========================================# - #===========================================================================================# - - unet_path = os.path.join(self.result_path, '%s-%d-%.4f-%d-%.4f.pkl' %(self.model_type,self.num_epochs,self.lr,self.num_epochs_decay,self.augmentation_prob)) - - # U-Net Train - # Train for Encoder - lr = self.lr - best_unet_score = 0. - - for epoch in range(self.num_epochs): - - self.unet.train(True) - epoch_loss = 0 - - acc = 0. # Accuracy - SE = 0. # Sensitivity (Recall) - SP = 0. # Specificity - PC = 0. # Precision - F1 = 0. # F1 Score - JS = 0. # Jaccard Similarity - DC = 0. # Dice Coefficient - length = 0 - threshold = 0.5 - steps = len(self.train_loader) - for i, (images, GT) in enumerate(self.train_loader): - # GT : Ground Truth - if i > 10: - start_time = time.time() - images = images.to(self.device) - GT = GT.to(self.device) - - # SR : Segmentation Result - SR = self.unet(images) - SR_probs = F.sigmoid(SR) - SR_flat = SR_probs.view(SR_probs.size(0),-1) - - GT_flat = GT.view(GT.size(0),-1) - loss = self.criterion(SR_flat,GT_flat) - epoch_loss += loss.item() - - # Backprop + optimize - self.reset_grad() - if self.use_apex: - with amp.scale_loss(loss, self.optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - self.optimizer.step() - - SR_ac = SR > threshold - GT_ac = GT == torch.max(GT) - acc += get_accuracy(SR_ac, GT_ac) - SE += get_sensitivity(SR_ac, GT_ac) - SP += get_specificity(SR_ac, GT_ac) - PC += get_precision(SR_ac, GT_ac) - F1 += get_F1(SR_ac, GT_ac) - JS += get_JS(SR_ac, GT_ac) - DC += get_DC(SR_ac, GT_ac) - length += 1 - - acc = acc/length - SE = SE/length - SP = SP/length - PC = PC/length - F1 = F1/length - JS = JS/length - DC = DC/length - - # Print the log info - print('Epoch [%d/%d], Loss: %.4f, [Training] Acc: %.4f, SE: %.4f, SP: %.4f, PC: %.4f, F1: %.4f, JS: %.4f, DC: %.4f, FPS: %.3f' % ( - epoch+1, self.num_epochs, \ - epoch_loss,acc,SE,SP,PC,F1,JS,DC, self.batch_size*(steps-10)/(time.time() - start_time))) - - - - # Decay learning rate - if (epoch+1) > (self.num_epochs - self.num_epochs_decay): - lr -= (self.lr / float(self.num_epochs_decay)) - for param_group in self.optimizer.param_groups: - param_group['lr'] = lr - print ('Decay learning rate to lr: {}.'.format(lr)) - - - #===================================== Validation ====================================# - self.unet.eval() - - acc = 0. # Accuracy - SE = 0. # Sensitivity (Recall) - SP = 0. # Specificity - PC = 0. # Precision - F1 = 0. # F1 Score - JS = 0. # Jaccard Similarity - DC = 0. # Dice Coefficient - length=0 - for i, (images, GT) in enumerate(self.valid_loader): - - images = images.to(self.device) - GT = GT.to(self.device) - SR = F.sigmoid(self.unet(images)) - - SR_ac = SR > threshold - GT_ac = GT == torch.max(GT) - acc += get_accuracy(SR_ac, GT_ac) - SE += get_sensitivity(SR_ac, GT_ac) - SP += get_specificity(SR_ac, GT_ac) - PC += get_precision(SR_ac, GT_ac) - F1 += get_F1(SR_ac, GT_ac) - JS += get_JS(SR_ac, GT_ac) - DC += get_DC(SR_ac, GT_ac) - - length += 1 - - acc = acc/length - SE = SE/length - SP = SP/length - PC = PC/length - F1 = F1/length - JS = JS/length - DC = DC/length - unet_score = JS + DC - - print('[Validation] Acc: %.4f, SE: %.4f, SP: %.4f, PC: %.4f, F1: %.4f, JS: %.4f, DC: %.4f'%(acc,SE,SP,PC,F1,JS,DC)) - - ''' - torchvision.utils.save_image(images.data.cpu(), - os.path.join(self.result_path, - '%s_valid_%d_image.png'%(self.model_type,epoch+1))) - torchvision.utils.save_image(SR.data.cpu(), - os.path.join(self.result_path, - '%s_valid_%d_SR.png'%(self.model_type,epoch+1))) - torchvision.utils.save_image(GT.data.cpu(), - os.path.join(self.result_path, - '%s_valid_%d_GT.png'%(self.model_type,epoch+1))) - ''' - - - # Save Best U-Net model - if unet_score > best_unet_score: - best_unet_score = unet_score - best_epoch = epoch - best_unet = self.unet.state_dict() - print('Best %s model score : %.4f'%(self.model_type,best_unet_score)) - torch.save(best_unet,unet_path) - - print("Validation Best ", [self.model_type,acc,SE,SP,PC,F1,JS,DC,self.lr,best_epoch,self.num_epochs,self.num_epochs_decay,self.augmentation_prob]) - - #===================================== Test ====================================# - def test(self): - threshold = 0.5 - pre_dict = torch.load(self.test_model_path) - new_dict = {} - if list(pre_dict.keys())[0].startswith("module"): - for key, value in pre_dict.items(): - name = key[7:] - new_dict[name] = value - else: - new_dict = pre_dict - self.unet.load_state_dict(new_dict) - self.unet.eval() - - acc = 0. # Accuracy - SE = 0. # Sensitivity (Recall) - SP = 0. # Specificity - PC = 0. # Precision - F1 = 0. # F1 Score - JS = 0. # Jaccard Similarity - DC = 0. # Dice Coefficient - length=0 - for i, (images, GT) in enumerate(self.test_loader): - - images = images.to(self.device) - GT = GT.to(self.device) - SR = F.sigmoid(self.unet(images)) - SR_ac = SR > threshold - GT_ac = GT == torch.max(GT) - acc += get_accuracy(SR_ac, GT_ac) - SE += get_sensitivity(SR_ac, GT_ac) - SP += get_specificity(SR_ac, GT_ac) - PC += get_precision(SR_ac, GT_ac) - F1 += get_F1(SR_ac, GT_ac) - JS += get_JS(SR_ac, GT_ac) - DC += get_DC(SR_ac, GT_ac) - - length += 1 - - acc = acc/length - SE = SE/length - SP = SP/length - PC = PC/length - F1 = F1/length - JS = JS/length - DC = DC/length - unet_score = JS + DC - print("Test finished, model checkpoint name:",self.test_model_path, " and acc: %.3f " % (acc)) - - - - - +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import os +import numpy as np +import time +import datetime +import torch +# import torchvision +from torch import optim +from torch.autograd import Variable +import torch.nn.functional as F +from evaluation import * +from network import U_Net,R2U_Net,AttU_Net,R2AttU_Net +from apex import amp +import apex + +class Solver(object): + def __init__(self, config, train_loader, valid_loader, test_loader): + + # Data loader + self.train_loader = train_loader + self.valid_loader = valid_loader + self.test_loader = test_loader + + # Models + self.unet = None + self.optimizer = None + self.img_ch = config.img_ch + self.output_ch = config.output_ch + self.criterion = torch.nn.BCELoss() + self.augmentation_prob = config.augmentation_prob + + # Hyper-parameters + self.lr = config.lr + self.beta1 = config.beta1 + self.beta2 = config.beta2 + + # Training settings + self.num_epochs = config.num_epochs + self.num_epochs_decay = config.num_epochs_decay + self.batch_size = config.batch_size + self.use_apex = config.use_apex + self.apex_level = config.apex_level + self.loss_scale = config.loss_scale + + # Step size + self.log_step = config.log_step + self.val_step = config.val_step + + # Path + self.result_path = config.result_path + self.mode = config.mode + + self.device = torch.device('npu' if torch.npu.is_available() else 'cpu') + self.model_type = config.model_type + self.test_model_path = config.test_model_path + self.t = config.t + self.pretrain = config.pretrain + self.pretrain_path = config.pretrain_path + self.build_model() + + def build_model(self): + """Build generator and discriminator.""" + if self.model_type =='U_Net': + self.unet = U_Net(img_ch=3,output_ch=1) + elif self.model_type =='R2U_Net': + self.unet = R2U_Net(img_ch=3,output_ch=1,t=self.t) + elif self.model_type =='AttU_Net': + self.unet = AttU_Net(img_ch=3,output_ch=1) + elif self.model_type == 'R2AttU_Net': + self.unet = R2AttU_Net(img_ch=3,output_ch=1,t=self.t) + + if self.pretrain: + self.unet.load_state_dict(torch.load(self.pretrain_path, map_location="cpu"), strict=False) + + self.unet.to(self.device) + if self.mode == "test": + return + # self.optimizer = optim.Adam(list(self.unet.parameters()), + # self.lr, [self.beta1, self.beta2]) + self.optimizer = apex.optimizers.NpuFusedAdam(list(self.unet.parameters()), + self.lr, [self.beta1, self.beta2]) + if self.use_apex: + self.unet, self.optimizer = amp.initialize(self.unet, self.optimizer, + opt_level=self.apex_level,loss_scale=self.loss_scale, combine_grad=True) + + # self.print_network(self.unet, self.model_type) + + def print_network(self, model, name): + """Print out the network information.""" + num_params = 0 + for p in model.parameters(): + num_params += p.numel() + print(model) + print(name) + print("The number of parameters: {}".format(num_params)) + + def to_data(self, x): + """Convert variable to tensor.""" + if torch.npu.is_available(): + x = x.cpu() + return x.data + + def update_lr(self, g_lr, d_lr): + for param_group in self.optimizer.param_groups: + param_group['lr'] = lr + + def reset_grad(self): + """Zero the gradient buffers.""" + self.unet.zero_grad() + + def compute_accuracy(self,SR,GT): + SR_flat = SR.view(-1) + GT_flat = GT.view(-1) + + acc = GT_flat.data.cpu()==(SR_flat.data.cpu()>0.5) + + def tensor2img(self,x): + img = (x[:,0,:,:]>x[:,1,:,:]).float() + img = img*255 + return img + + + def train(self): + """Train encoder, generator and discriminator.""" + + #====================================== Training ===========================================# + #===========================================================================================# + + unet_path = os.path.join(self.result_path, '%s-%d-%.4f-%d-%.4f.pkl' %(self.model_type,self.num_epochs,self.lr,self.num_epochs_decay,self.augmentation_prob)) + + # U-Net Train + # Train for Encoder + lr = self.lr + best_unet_score = 0. + + for epoch in range(self.num_epochs): + + self.unet.train(True) + epoch_loss = 0 + + acc = 0. # Accuracy + SE = 0. # Sensitivity (Recall) + SP = 0. # Specificity + PC = 0. # Precision + F1 = 0. # F1 Score + JS = 0. # Jaccard Similarity + DC = 0. # Dice Coefficient + length = 0 + threshold = 0.5 + steps = len(self.train_loader) + for i, (images, GT) in enumerate(self.train_loader): + # GT : Ground Truth + if i > 10: + start_time = time.time() + images = images.to(self.device) + GT = GT.to(self.device) + + # SR : Segmentation Result + SR = self.unet(images) + SR_probs = F.sigmoid(SR) + SR_flat = SR_probs.view(SR_probs.size(0),-1) + + GT_flat = GT.view(GT.size(0),-1) + loss = self.criterion(SR_flat,GT_flat) + epoch_loss += loss.item() + + # Backprop + optimize + self.reset_grad() + if self.use_apex: + with amp.scale_loss(loss, self.optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + self.optimizer.step() + + SR_ac = SR > threshold + GT_ac = GT == torch.max(GT) + acc += get_accuracy(SR_ac, GT_ac) + SE += get_sensitivity(SR_ac, GT_ac) + SP += get_specificity(SR_ac, GT_ac) + PC += get_precision(SR_ac, GT_ac) + F1 += get_F1(SR_ac, GT_ac) + JS += get_JS(SR_ac, GT_ac) + DC += get_DC(SR_ac, GT_ac) + length += 1 + + acc = acc/length + SE = SE/length + SP = SP/length + PC = PC/length + F1 = F1/length + JS = JS/length + DC = DC/length + + # Print the log info + print('Epoch [%d/%d], Loss: %.4f, [Training] Acc: %.4f, SE: %.4f, SP: %.4f, PC: %.4f, F1: %.4f, JS: %.4f, DC: %.4f, FPS: %.3f' % ( + epoch+1, self.num_epochs, \ + epoch_loss,acc,SE,SP,PC,F1,JS,DC, self.batch_size*(steps-10)/(time.time() - start_time))) + + + + # Decay learning rate + if (epoch+1) > (self.num_epochs - self.num_epochs_decay): + lr -= (self.lr / float(self.num_epochs_decay)) + for param_group in self.optimizer.param_groups: + param_group['lr'] = lr + print ('Decay learning rate to lr: {}.'.format(lr)) + + + #===================================== Validation ====================================# + self.unet.eval() + + acc = 0. # Accuracy + SE = 0. # Sensitivity (Recall) + SP = 0. # Specificity + PC = 0. # Precision + F1 = 0. # F1 Score + JS = 0. # Jaccard Similarity + DC = 0. # Dice Coefficient + length=0 + for i, (images, GT) in enumerate(self.valid_loader): + + images = images.to(self.device) + GT = GT.to(self.device) + SR = F.sigmoid(self.unet(images)) + + SR_ac = SR > threshold + GT_ac = GT == torch.max(GT) + acc += get_accuracy(SR_ac, GT_ac) + SE += get_sensitivity(SR_ac, GT_ac) + SP += get_specificity(SR_ac, GT_ac) + PC += get_precision(SR_ac, GT_ac) + F1 += get_F1(SR_ac, GT_ac) + JS += get_JS(SR_ac, GT_ac) + DC += get_DC(SR_ac, GT_ac) + + length += 1 + + acc = acc/length + SE = SE/length + SP = SP/length + PC = PC/length + F1 = F1/length + JS = JS/length + DC = DC/length + unet_score = JS + DC + + print('[Validation] Acc: %.4f, SE: %.4f, SP: %.4f, PC: %.4f, F1: %.4f, JS: %.4f, DC: %.4f'%(acc,SE,SP,PC,F1,JS,DC)) + + ''' + torchvision.utils.save_image(images.data.cpu(), + os.path.join(self.result_path, + '%s_valid_%d_image.png'%(self.model_type,epoch+1))) + torchvision.utils.save_image(SR.data.cpu(), + os.path.join(self.result_path, + '%s_valid_%d_SR.png'%(self.model_type,epoch+1))) + torchvision.utils.save_image(GT.data.cpu(), + os.path.join(self.result_path, + '%s_valid_%d_GT.png'%(self.model_type,epoch+1))) + ''' + + + # Save Best U-Net model + if unet_score > best_unet_score: + best_unet_score = unet_score + best_epoch = epoch + best_unet = self.unet.state_dict() + print('Best %s model score : %.4f'%(self.model_type,best_unet_score)) + torch.save(best_unet,unet_path) + + print("Validation Best ", [self.model_type,acc,SE,SP,PC,F1,JS,DC,self.lr,best_epoch,self.num_epochs,self.num_epochs_decay,self.augmentation_prob]) + + #===================================== Test ====================================# + def test(self): + threshold = 0.5 + pre_dict = torch.load(self.test_model_path) + new_dict = {} + if list(pre_dict.keys())[0].startswith("module"): + for key, value in pre_dict.items(): + name = key[7:] + new_dict[name] = value + else: + new_dict = pre_dict + self.unet.load_state_dict(new_dict) + self.unet.eval() + + acc = 0. # Accuracy + SE = 0. # Sensitivity (Recall) + SP = 0. # Specificity + PC = 0. # Precision + F1 = 0. # F1 Score + JS = 0. # Jaccard Similarity + DC = 0. # Dice Coefficient + length=0 + for i, (images, GT) in enumerate(self.test_loader): + + images = images.to(self.device) + GT = GT.to(self.device) + SR = F.sigmoid(self.unet(images)) + SR_ac = SR > threshold + GT_ac = GT == torch.max(GT) + acc += get_accuracy(SR_ac, GT_ac) + SE += get_sensitivity(SR_ac, GT_ac) + SP += get_specificity(SR_ac, GT_ac) + PC += get_precision(SR_ac, GT_ac) + F1 += get_F1(SR_ac, GT_ac) + JS += get_JS(SR_ac, GT_ac) + DC += get_DC(SR_ac, GT_ac) + + length += 1 + + acc = acc/length + SE = SE/length + SP = SP/length + PC = PC/length + F1 = F1/length + JS = JS/length + DC = DC/length + unet_score = JS + DC + print("Test finished, model checkpoint name:",self.test_model_path, " and acc: %.3f " % (acc)) + + + + + diff --git a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/solver_8p.py b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/solver_8p.py index 32366173763812c1ebb95964e4f915c8efabf6e5..7c8c86da350348dc15cfc1b5783558836f79cece 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/solver_8p.py +++ b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/solver_8p.py @@ -1,181 +1,181 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import os -import numpy as np -import time -import datetime -import torch -# import torchvision -from torch import optim -from torch.autograd import Variable -import torch.nn.functional as F -from evaluation import * -from network import U_Net,R2U_Net,AttU_Net,R2AttU_Net -import torch.distributed as dist -from data_loader import get_dist_loader, get_loader,ImageFolder -from apex import amp -import apex - -def train_8p(rank, npus, config): - rank = rank - dist.init_process_group(backend=config.dist_backend, world_size=config.npus, rank=rank) - torch.npu.set_device(rank) - model_unet = R2AttU_Net(img_ch=3, output_ch=1,t=config.t) - model_unet = model_unet.to("npu") - # optimizer = optim.Adam(list(model_unet.parameters()), config.lr, [config.beta1, config.beta2]) - optimizer = apex.optimizers.NpuFusedAdam(list(model_unet.parameters()), config.lr, [config.beta1, config.beta2]) - - if config.use_apex: - model_unet, optimizer = amp.initialize(model_unet, optimizer, - opt_level=config.apex_level,loss_scale=config.loss_scale, combine_grad=True) - model_unet = torch.nn.parallel.DistributedDataParallel(model_unet, device_ids=[rank], - broadcast_buffers=False) - - train_loader = get_dist_loader(image_path=config.train_path, - image_size=config.image_size, - batch_size=config.batch_size, - num_workers=config.num_workers, - mode='train', - augmentation_prob=config.augmentation_prob) - valid_loader = get_loader(image_path=config.valid_path, - image_size=config.image_size, - batch_size=config.batch_size, - num_workers=config.num_workers, - mode='valid', - augmentation_prob=0.) - criterion = torch.nn.BCELoss() - - lr = config.lr - best_unet_score = 0. - unet_path = os.path.join(config.result_path, '%d-%s-%d-%.4f-%d-%.4f.pkl' %(rank, - config.model_type,config.num_epochs,config.lr,config.num_epochs_decay,config.augmentation_prob)) - - for epoch in range(config.num_epochs): - model_unet.train(True) - epoch_loss = 0. - acc = 0. # Accuracy - length = 0 - threshold = 0.5 - steps = len(train_loader) - for i, (images, GT) in enumerate(train_loader): - # GT : Ground Truth - images = images.to("npu") - GT = GT.to("npu") - if i == 10: - start_time = time.time() - step_start_time = time.time() - # SR : Segmentation Result - SR = model_unet(images) - SR_probs = F.sigmoid(SR) - SR_flat = SR_probs.view(SR_probs.size(0),-1) - - GT_flat = GT.view(GT.size(0),-1) - loss = criterion(SR_flat,GT_flat) - epoch_loss += loss.item() - - # Backprop + optimize - model_unet.zero_grad() - if config.use_apex: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - if rank == 0: - print('Epoch [%d/%d], Step: %d, FPS: %.2f' % ( - epoch+1, config.num_epochs, i, config.npus*config.batch_size/(time.time() - step_start_time))) - SR_ac = SR > threshold - GT_ac = GT == torch.max(GT) - acc += get_accuracy(SR_ac, GT_ac) - length += 1 - - acc = acc/length - - # Print the log info - if rank == 0: - print('Rank %d , Epoch [%d/%d], Loss: %.4f, [Training] Acc: %.4f, FPS: %.2f' % ( - rank, epoch+1, config.num_epochs, epoch_loss, acc , config.batch_size*(steps-10)/(time.time() - start_time))) - # Decay learning rate - if (epoch+1) % 10 == 0: - lr = lr/2. - # lr -= (config.lr / float(config.num_epochs_decay)) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - print ('Decay learning rate to lr: {}.'.format(lr)) - - - #===================================== Validation ====================================# - if rank == 0: - model_unet.eval() - - acc = 0. # Accuracy - length=0 - for i, (images, GT) in enumerate(valid_loader): - - images = images.to("npu") - GT = GT.to("npu") - SR = F.sigmoid(model_unet(images)) - SR_ac = SR > threshold - GT_ac = GT == torch.max(GT) - acc += get_accuracy(SR_ac, GT_ac) - - length += 1 - - acc = acc/length - - unet_score = acc#JS + DC - - print('[Validation] Rank: %d, Epoch %d,Acc: %.4f'%(rank, epoch, acc)) - - ''' - torchvision.utils.save_image(images.data.cpu(), - os.path.join(self.result_path, - '%s_valid_%d_image.png'%(self.model_type,epoch+1))) - torchvision.utils.save_image(SR.data.cpu(), - os.path.join(self.result_path, - '%s_valid_%d_SR.png'%(self.model_type,epoch+1))) - torchvision.utils.save_image(GT.data.cpu(), - os.path.join(self.result_path, - '%s_valid_%d_GT.png'%(self.model_type,epoch+1))) - ''' - - - # Save Best U-Net model - if unet_score > best_unet_score: - best_unet_score = unet_score - best_epoch = epoch - best_unet = model_unet.state_dict() - print('Best %s model score : %.4f'%(config.model_type,best_unet_score)) - torch.save(best_unet,unet_path) - print("Validation Best", [config.model_type,acc,config.lr,best_epoch,\ - config.num_epochs,config.num_epochs_decay,config.augmentation_prob]) +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import os +import numpy as np +import time +import datetime +import torch +# import torchvision +from torch import optim +from torch.autograd import Variable +import torch.nn.functional as F +from evaluation import * +from network import U_Net,R2U_Net,AttU_Net,R2AttU_Net +import torch.distributed as dist +from data_loader import get_dist_loader, get_loader,ImageFolder +from apex import amp +import apex + +def train_8p(rank, npus, config): + rank = rank + dist.init_process_group(backend=config.dist_backend, world_size=config.npus, rank=rank) + torch.npu.set_device(rank) + model_unet = R2AttU_Net(img_ch=3, output_ch=1,t=config.t) + model_unet = model_unet.to("npu") + # optimizer = optim.Adam(list(model_unet.parameters()), config.lr, [config.beta1, config.beta2]) + optimizer = apex.optimizers.NpuFusedAdam(list(model_unet.parameters()), config.lr, [config.beta1, config.beta2]) + + if config.use_apex: + model_unet, optimizer = amp.initialize(model_unet, optimizer, + opt_level=config.apex_level,loss_scale=config.loss_scale, combine_grad=True) + model_unet = torch.nn.parallel.DistributedDataParallel(model_unet, device_ids=[rank], + broadcast_buffers=False) + + train_loader = get_dist_loader(image_path=config.train_path, + image_size=config.image_size, + batch_size=config.batch_size, + num_workers=config.num_workers, + mode='train', + augmentation_prob=config.augmentation_prob) + valid_loader = get_loader(image_path=config.valid_path, + image_size=config.image_size, + batch_size=config.batch_size, + num_workers=config.num_workers, + mode='valid', + augmentation_prob=0.) + criterion = torch.nn.BCELoss() + + lr = config.lr + best_unet_score = 0. + unet_path = os.path.join(config.result_path, '%d-%s-%d-%.4f-%d-%.4f.pkl' %(rank, + config.model_type,config.num_epochs,config.lr,config.num_epochs_decay,config.augmentation_prob)) + + for epoch in range(config.num_epochs): + model_unet.train(True) + epoch_loss = 0. + acc = 0. # Accuracy + length = 0 + threshold = 0.5 + steps = len(train_loader) + for i, (images, GT) in enumerate(train_loader): + # GT : Ground Truth + images = images.to("npu") + GT = GT.to("npu") + if i == 10: + start_time = time.time() + step_start_time = time.time() + # SR : Segmentation Result + SR = model_unet(images) + SR_probs = F.sigmoid(SR) + SR_flat = SR_probs.view(SR_probs.size(0),-1) + + GT_flat = GT.view(GT.size(0),-1) + loss = criterion(SR_flat,GT_flat) + epoch_loss += loss.item() + + # Backprop + optimize + model_unet.zero_grad() + if config.use_apex: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + if rank == 0: + print('Epoch [%d/%d], Step: %d, FPS: %.2f' % ( + epoch+1, config.num_epochs, i, config.npus*config.batch_size/(time.time() - step_start_time))) + SR_ac = SR > threshold + GT_ac = GT == torch.max(GT) + acc += get_accuracy(SR_ac, GT_ac) + length += 1 + + acc = acc/length + + # Print the log info + if rank == 0: + print('Rank %d , Epoch [%d/%d], Loss: %.4f, [Training] Acc: %.4f, FPS: %.2f' % ( + rank, epoch+1, config.num_epochs, epoch_loss, acc , config.batch_size*(steps-10)/(time.time() - start_time))) + # Decay learning rate + if (epoch+1) % 10 == 0: + lr = lr/2. + # lr -= (config.lr / float(config.num_epochs_decay)) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + print ('Decay learning rate to lr: {}.'.format(lr)) + + + #===================================== Validation ====================================# + if rank == 0: + model_unet.eval() + + acc = 0. # Accuracy + length=0 + for i, (images, GT) in enumerate(valid_loader): + + images = images.to("npu") + GT = GT.to("npu") + SR = F.sigmoid(model_unet(images)) + SR_ac = SR > threshold + GT_ac = GT == torch.max(GT) + acc += get_accuracy(SR_ac, GT_ac) + + length += 1 + + acc = acc/length + + unet_score = acc#JS + DC + + print('[Validation] Rank: %d, Epoch %d,Acc: %.4f'%(rank, epoch, acc)) + + ''' + torchvision.utils.save_image(images.data.cpu(), + os.path.join(self.result_path, + '%s_valid_%d_image.png'%(self.model_type,epoch+1))) + torchvision.utils.save_image(SR.data.cpu(), + os.path.join(self.result_path, + '%s_valid_%d_SR.png'%(self.model_type,epoch+1))) + torchvision.utils.save_image(GT.data.cpu(), + os.path.join(self.result_path, + '%s_valid_%d_GT.png'%(self.model_type,epoch+1))) + ''' + + + # Save Best U-Net model + if unet_score > best_unet_score: + best_unet_score = unet_score + best_epoch = epoch + best_unet = model_unet.state_dict() + print('Best %s model score : %.4f'%(config.model_type,best_unet_score)) + torch.save(best_unet,unet_path) + print("Validation Best", [config.model_type,acc,config.lr,best_epoch,\ + config.num_epochs,config.num_epochs_decay,config.augmentation_prob]) \ No newline at end of file diff --git a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/train.py b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/train.py index e605f787b4c349069ec3ed48b2a9630f2ba34146..0a1ebf8c6ed8ebee921f5e4422d2e26e84cc8e7b 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/train.py +++ b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/train.py @@ -1,334 +1,334 @@ -# -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -#from __future__ import print_function -import datetime -import os -import time -import sys - -import torch -import torch.utils.data -from torch import nn -import random -import numpy as np -import utils -import argparse -try: - import apex - from apex import amp -except ImportError: - amp = None -from evaluation import * -from data_loader import get_dist_loader, get_loader -from network import R2AttU_Net - - -def train_one_epoch(model_unet, criterion, optimizer, data_loader, device, epoch, config): - model_unet.train() - metric_logger = utils.MetricLogger(delimiter=" ") - - epoch_loss = 0. - acc = 0. # Accuracy - SE = 0. # Sensitivity (Recall) - SP = 0. # Specificity - PC = 0. # Precision - F1 = 0. # F1 Score - JS = 0. # Jaccard Similarity - DC = 0. # Dice Coefficient - length = 0 - threshold = 0.5 - steps = len(data_loader) - for i, (images, GT) in enumerate(data_loader): - # GT : Ground Truth - images = images.to(device) - GT = GT.to(device) - if i == 5: - start_time = time.time() - - # SR : Segmentation Result - SR = model_unet(images) - SR_probs = torch.nn.functional.sigmoid(SR) - SR_flat = SR_probs.view(SR_probs.size(0),-1) - - GT_flat = GT.view(GT.size(0),-1) - loss = criterion(SR_flat,GT_flat) - epoch_loss += loss.item() - - # Backprop + optimize - model_unet.zero_grad() - if config.use_apex: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - - SR_ac = SR > threshold - GT_ac = GT == torch.max(GT) - acc += get_accuracy(SR_ac, GT_ac) - SE += get_sensitivity(SR_ac, GT_ac) - SP += get_specificity(SR_ac, GT_ac) - PC += get_precision(SR_ac, GT_ac) - F1 += get_F1(SR_ac, GT_ac) - JS += get_JS(SR_ac, GT_ac) - DC += get_DC(SR_ac, GT_ac) - length += 1 - acc = acc/length - - batch_size = config.batch_size - fps = batch_size *(steps-5) / (time.time() - start_time) - metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) - metric_logger.meters['acc'].update(acc, n=batch_size) - metric_logger.meters['img/s'].update(fps) - print("Traing, Epoch: %d, Loss: %.4f"%(epoch, loss.item())) - - return acc, fps - - -def evaluate(model_unet, criterion, valid_loader, device): - metric_logger = utils.MetricLogger(delimiter=" ") - model_unet.eval() - threshold = 0.5 - acc = 0. # Accuracy - SE = 0. # Sensitivity (Recall) - SP = 0. # Specificity - PC = 0. # Precision - F1 = 0. # F1 Score - JS = 0. # Jaccard Similarity - DC = 0. # Dice Coefficient - length=0 - for i, (images, GT) in enumerate(valid_loader): - - images = images.to(device) - GT = GT.to(device) - SR = torch.nn.functional.sigmoid(model_unet(images)) - SR_ac = SR > threshold - GT_ac = GT == torch.max(GT) - acc += get_accuracy(SR_ac, GT_ac) - SE += get_sensitivity(SR_ac, GT_ac) - SP += get_specificity(SR_ac, GT_ac) - PC += get_precision(SR_ac, GT_ac) - F1 += get_F1(SR_ac, GT_ac) - JS += get_JS(SR_ac, GT_ac) - DC += get_DC(SR_ac, GT_ac) - metric_logger.synchronize_between_processes(device) - - length += 1 - - acc = acc/length - SE = SE/length - SP = SP/length - PC = PC/length - F1 = F1/length - JS = JS/length - DC = DC/length - unet_score = acc#JS + DC - batch_size = images.shape[0] - metric_logger.meters['acc'].update(acc, n=batch_size) - return acc - -def init_distributed_mode(args): - if 'RANK_SIZE' in os.environ and 'RANK_ID' in os.environ: - args.rank_size = int(os.environ['RANK_SIZE']) - args.rank_id = int(os.environ['RANK_ID']) - args.device_id = args.rank_id - args.batch_size = int(args.batch_size / args.rank_size) - args.num_workers = int((args.num_workers) / args.rank_size) - else: - raise RuntimeError("init_distributed_mode failed.") - - torch.distributed.init_process_group(backend='hccl', - world_size=args.rank_size, rank=args.rank_id) -def main(config): - #设置环境变量 - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '29688' - - #设置seed - random.seed(1234) - np.random.seed(1234) - torch.manual_seed(1234) - os.environ['PYTHONHASHSEED'] = str(1234) - - if config.use_apex: - if sys.version_info < (3, 0): - raise RuntimeError("Apex currently only supports Python 3. Aborting.") - if amp is None: - raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " - "to enable mixed-precision training.") - - # Create directories if not exist - if not os.path.exists(config.result_path): - os.makedirs(config.result_path) - - if config.distributed: - init_distributed_mode(config) - - config.is_master_node = not config.distributed or config.device_id == 0 - if config.is_master_node: - print(config) - - device = torch.device(f'npu:'+str(config.device_id)) - torch.npu.set_device(device) - - # Data loading code - print("Loading data") - config.train_path = os.path.join(config.data_path, "train") - config.valid_path = os.path.join(config.data_path, "valid") - print("Creating data loaders") - if config.distributed: - train_loader = get_dist_loader(image_path=config.train_path, - image_size=config.image_size, - batch_size=config.batch_size, - num_workers=config.num_workers, - mode='train', - augmentation_prob=config.augmentation_prob) - valid_loader = get_loader(image_path=config.valid_path, - image_size=config.image_size, - batch_size=config.batch_size, - num_workers=config.num_workers, - mode='valid', - augmentation_prob=0.) - else: - train_loader = get_loader(image_path=config.train_path, - image_size=config.image_size, - batch_size=config.batch_size, - num_workers=config.num_workers, - mode='train', - augmentation_prob=config.augmentation_prob) - valid_loader = get_loader(image_path=config.valid_path, - image_size=config.image_size, - batch_size=config.batch_size, - num_workers=config.num_workers, - mode='valid', - augmentation_prob=0.) - model_unet = R2AttU_Net(img_ch=3, output_ch=1,t=config.t) - model_unet = model_unet.to(device) - - criterion = torch.nn.BCELoss() - optimizer = apex.optimizers.NpuFusedAdam(list(model_unet.parameters()), - config.lr, [config.beta1, config.beta2]) - if config.use_apex: - model_unet, optimizer = amp.initialize(model_unet, optimizer, - opt_level=config.apex_level,loss_scale=config.loss_scale, combine_grad=True) - - model_without_ddp = model_unet - if config.distributed: - model_unet = torch.nn.parallel.DistributedDataParallel(model_unet, device_ids=[config.device_id]) - model_without_ddp = model_unet.module - - if config.is_master_node: - print("Start training") - start_time = time.time() - best_unet_score = 0. - lr = config.lr - for epoch in range(config.num_epochs): - acc, fps = train_one_epoch(model_unet, criterion, optimizer, train_loader, device, epoch, config) - - unet_score = evaluate(model_unet, criterion, valid_loader, device=device) - if config.is_master_node: - print("Traing, Epoch: %d, Avgacc: %.3f, FPS: %.2f"%(epoch, acc, fps)) - print('Test, Acc: %.3f'%(unet_score)) - if config.is_master_node and config.result_path: - checkpoint = { - 'model': model_without_ddp.state_dict(), - 'optimizer': optimizer.state_dict(), - 'epoch': epoch, - 'args': config} - utils.save_on_master( - checkpoint, - os.path.join(config.result_path, 'model_{}.pth'.format(epoch))) - if unet_score > best_unet_score: - best_unet_score = unet_score - utils.save_on_master( - checkpoint, - os.path.join(config.result_path, 'checkpoint.pth')) - if (epoch+1) % 10 == 0: - lr = lr/2. - # lr -= (config.lr / float(config.num_epochs_decay)) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - print ('Decay learning rate to lr: {}.'.format(lr)) - - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=int(total_time))) - if config.is_master_node: - print('Training time {}'.format(total_time_str)) - exit() - - -def parse_args(): - parser = argparse.ArgumentParser() - - - # model hyper-parameters - parser.add_argument('--image_size', type=int, default=224) - parser.add_argument('--t', type=int, default=3, help='t for Recurrent step of R2U_Net or R2AttU_Net') - - # training hyper-parameters - parser.add_argument('--img_ch', type=int, default=3) - parser.add_argument('--output_ch', type=int, default=1) - parser.add_argument('--num_epochs', type=int, default=100) - parser.add_argument('--num_epochs_decay', type=int, default=70) - parser.add_argument('--batch_size', type=int, default=16) - parser.add_argument('--num_workers', type=int, default=8) - parser.add_argument('--lr', type=float, default=0.0002) - parser.add_argument('--beta1', type=float, default=0.5) # momentum1 in Adam - parser.add_argument('--beta2', type=float, default=0.999) # momentum2 in Adam - parser.add_argument('--augmentation_prob', type=float, default=0.4) - - parser.add_argument('--log_step', type=int, default=2) - parser.add_argument('--val_step', type=int, default=2) - - # misc - parser.add_argument('--mode', type=str, default='train') - parser.add_argument('--model_type', type=str, default='U_Net', help='U_Net/R2U_Net/AttU_Net/R2AttU_Net') - parser.add_argument('--test_model_path', type=str, default='./models') - parser.add_argument('--data_path', type=str, default='./dataset/train/') - parser.add_argument('--result_path', type=str, default='./result_1p') - - parser.add_argument('--device_id', type=int, default=0) - parser.add_argument('--use_apex', type=int, default=1) - parser.add_argument('--apex_level', type=str, default="O2") - parser.add_argument('--loss_scale', type=float, default=128.) - - parser.add_argument('--world_size', type=int, default=8) - parser.add_argument('--distributed', type=int, default=0, - help='Use multi-processing distributed training to launch.') - - config = parser.parse_args() - main(config) - -if __name__ == "__main__": - args = parse_args() - main(args) +# +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +#from __future__ import print_function +import datetime +import os +import time +import sys + +import torch +import torch.utils.data +from torch import nn +import random +import numpy as np +import utils +import argparse +try: + import apex + from apex import amp +except ImportError: + amp = None +from evaluation import * +from data_loader import get_dist_loader, get_loader +from network import R2AttU_Net + + +def train_one_epoch(model_unet, criterion, optimizer, data_loader, device, epoch, config): + model_unet.train() + metric_logger = utils.MetricLogger(delimiter=" ") + + epoch_loss = 0. + acc = 0. # Accuracy + SE = 0. # Sensitivity (Recall) + SP = 0. # Specificity + PC = 0. # Precision + F1 = 0. # F1 Score + JS = 0. # Jaccard Similarity + DC = 0. # Dice Coefficient + length = 0 + threshold = 0.5 + steps = len(data_loader) + for i, (images, GT) in enumerate(data_loader): + # GT : Ground Truth + images = images.to(device) + GT = GT.to(device) + if i == 5: + start_time = time.time() + + # SR : Segmentation Result + SR = model_unet(images) + SR_probs = torch.nn.functional.sigmoid(SR) + SR_flat = SR_probs.view(SR_probs.size(0),-1) + + GT_flat = GT.view(GT.size(0),-1) + loss = criterion(SR_flat,GT_flat) + epoch_loss += loss.item() + + # Backprop + optimize + model_unet.zero_grad() + if config.use_apex: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + + SR_ac = SR > threshold + GT_ac = GT == torch.max(GT) + acc += get_accuracy(SR_ac, GT_ac) + SE += get_sensitivity(SR_ac, GT_ac) + SP += get_specificity(SR_ac, GT_ac) + PC += get_precision(SR_ac, GT_ac) + F1 += get_F1(SR_ac, GT_ac) + JS += get_JS(SR_ac, GT_ac) + DC += get_DC(SR_ac, GT_ac) + length += 1 + acc = acc/length + + batch_size = config.batch_size + fps = batch_size *(steps-5) / (time.time() - start_time) + metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) + metric_logger.meters['acc'].update(acc, n=batch_size) + metric_logger.meters['img/s'].update(fps) + print("Traing, Epoch: %d, Loss: %.4f"%(epoch, loss.item())) + + return acc, fps + + +def evaluate(model_unet, criterion, valid_loader, device): + metric_logger = utils.MetricLogger(delimiter=" ") + model_unet.eval() + threshold = 0.5 + acc = 0. # Accuracy + SE = 0. # Sensitivity (Recall) + SP = 0. # Specificity + PC = 0. # Precision + F1 = 0. # F1 Score + JS = 0. # Jaccard Similarity + DC = 0. # Dice Coefficient + length=0 + for i, (images, GT) in enumerate(valid_loader): + + images = images.to(device) + GT = GT.to(device) + SR = torch.nn.functional.sigmoid(model_unet(images)) + SR_ac = SR > threshold + GT_ac = GT == torch.max(GT) + acc += get_accuracy(SR_ac, GT_ac) + SE += get_sensitivity(SR_ac, GT_ac) + SP += get_specificity(SR_ac, GT_ac) + PC += get_precision(SR_ac, GT_ac) + F1 += get_F1(SR_ac, GT_ac) + JS += get_JS(SR_ac, GT_ac) + DC += get_DC(SR_ac, GT_ac) + metric_logger.synchronize_between_processes(device) + + length += 1 + + acc = acc/length + SE = SE/length + SP = SP/length + PC = PC/length + F1 = F1/length + JS = JS/length + DC = DC/length + unet_score = acc#JS + DC + batch_size = images.shape[0] + metric_logger.meters['acc'].update(acc, n=batch_size) + return acc + +def init_distributed_mode(args): + if 'RANK_SIZE' in os.environ and 'RANK_ID' in os.environ: + args.rank_size = int(os.environ['RANK_SIZE']) + args.rank_id = int(os.environ['RANK_ID']) + args.device_id = args.rank_id + args.batch_size = int(args.batch_size / args.rank_size) + args.num_workers = int((args.num_workers) / args.rank_size) + else: + raise RuntimeError("init_distributed_mode failed.") + + torch.distributed.init_process_group(backend='hccl', + world_size=args.rank_size, rank=args.rank_id) +def main(config): + #设置环境变量 + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = '29688' + + #设置seed + random.seed(1234) + np.random.seed(1234) + torch.manual_seed(1234) + os.environ['PYTHONHASHSEED'] = str(1234) + + if config.use_apex: + if sys.version_info < (3, 0): + raise RuntimeError("Apex currently only supports Python 3. Aborting.") + if amp is None: + raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " + "to enable mixed-precision training.") + + # Create directories if not exist + if not os.path.exists(config.result_path): + os.makedirs(config.result_path) + + if config.distributed: + init_distributed_mode(config) + + config.is_master_node = not config.distributed or config.device_id == 0 + if config.is_master_node: + print(config) + + device = torch.device(f'npu:'+str(config.device_id)) + torch.npu.set_device(device) + + # Data loading code + print("Loading data") + config.train_path = os.path.join(config.data_path, "train") + config.valid_path = os.path.join(config.data_path, "valid") + print("Creating data loaders") + if config.distributed: + train_loader = get_dist_loader(image_path=config.train_path, + image_size=config.image_size, + batch_size=config.batch_size, + num_workers=config.num_workers, + mode='train', + augmentation_prob=config.augmentation_prob) + valid_loader = get_loader(image_path=config.valid_path, + image_size=config.image_size, + batch_size=config.batch_size, + num_workers=config.num_workers, + mode='valid', + augmentation_prob=0.) + else: + train_loader = get_loader(image_path=config.train_path, + image_size=config.image_size, + batch_size=config.batch_size, + num_workers=config.num_workers, + mode='train', + augmentation_prob=config.augmentation_prob) + valid_loader = get_loader(image_path=config.valid_path, + image_size=config.image_size, + batch_size=config.batch_size, + num_workers=config.num_workers, + mode='valid', + augmentation_prob=0.) + model_unet = R2AttU_Net(img_ch=3, output_ch=1,t=config.t) + model_unet = model_unet.to(device) + + criterion = torch.nn.BCELoss() + optimizer = apex.optimizers.NpuFusedAdam(list(model_unet.parameters()), + config.lr, [config.beta1, config.beta2]) + if config.use_apex: + model_unet, optimizer = amp.initialize(model_unet, optimizer, + opt_level=config.apex_level,loss_scale=config.loss_scale, combine_grad=True) + + model_without_ddp = model_unet + if config.distributed: + model_unet = torch.nn.parallel.DistributedDataParallel(model_unet, device_ids=[config.device_id]) + model_without_ddp = model_unet.module + + if config.is_master_node: + print("Start training") + start_time = time.time() + best_unet_score = 0. + lr = config.lr + for epoch in range(config.num_epochs): + acc, fps = train_one_epoch(model_unet, criterion, optimizer, train_loader, device, epoch, config) + + unet_score = evaluate(model_unet, criterion, valid_loader, device=device) + if config.is_master_node: + print("Traing, Epoch: %d, Avgacc: %.3f, FPS: %.2f"%(epoch, acc, fps)) + print('Test, Acc: %.3f'%(unet_score)) + if config.is_master_node and config.result_path: + checkpoint = { + 'model': model_without_ddp.state_dict(), + 'optimizer': optimizer.state_dict(), + 'epoch': epoch, + 'args': config} + utils.save_on_master( + checkpoint, + os.path.join(config.result_path, 'model_{}.pth'.format(epoch))) + if unet_score > best_unet_score: + best_unet_score = unet_score + utils.save_on_master( + checkpoint, + os.path.join(config.result_path, 'checkpoint.pth')) + if (epoch+1) % 10 == 0: + lr = lr/2. + # lr -= (config.lr / float(config.num_epochs_decay)) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + print ('Decay learning rate to lr: {}.'.format(lr)) + + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + if config.is_master_node: + print('Training time {}'.format(total_time_str)) + exit() + + +def parse_args(): + parser = argparse.ArgumentParser() + + + # model hyper-parameters + parser.add_argument('--image_size', type=int, default=224) + parser.add_argument('--t', type=int, default=3, help='t for Recurrent step of R2U_Net or R2AttU_Net') + + # training hyper-parameters + parser.add_argument('--img_ch', type=int, default=3) + parser.add_argument('--output_ch', type=int, default=1) + parser.add_argument('--num_epochs', type=int, default=100) + parser.add_argument('--num_epochs_decay', type=int, default=70) + parser.add_argument('--batch_size', type=int, default=16) + parser.add_argument('--num_workers', type=int, default=8) + parser.add_argument('--lr', type=float, default=0.0002) + parser.add_argument('--beta1', type=float, default=0.5) # momentum1 in Adam + parser.add_argument('--beta2', type=float, default=0.999) # momentum2 in Adam + parser.add_argument('--augmentation_prob', type=float, default=0.4) + + parser.add_argument('--log_step', type=int, default=2) + parser.add_argument('--val_step', type=int, default=2) + + # misc + parser.add_argument('--mode', type=str, default='train') + parser.add_argument('--model_type', type=str, default='U_Net', help='U_Net/R2U_Net/AttU_Net/R2AttU_Net') + parser.add_argument('--test_model_path', type=str, default='./models') + parser.add_argument('--data_path', type=str, default='./dataset/train/') + parser.add_argument('--result_path', type=str, default='./result_1p') + + parser.add_argument('--device_id', type=int, default=0) + parser.add_argument('--use_apex', type=int, default=1) + parser.add_argument('--apex_level', type=str, default="O2") + parser.add_argument('--loss_scale', type=float, default=128.) + + parser.add_argument('--world_size', type=int, default=8) + parser.add_argument('--distributed', type=int, default=0, + help='Use multi-processing distributed training to launch.') + + config = parser.parse_args() + main(config) + +if __name__ == "__main__": + args = parse_args() + main(args) diff --git a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/train_1p.sh b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/train_1p.sh index cfba86757043558564dfad59a50d800784ec92d2..d2f3546ec3083d1d41f9b53c0e52dcc57ffa89c3 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/train_1p.sh +++ b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/train_1p.sh @@ -1,3 +1,3 @@ -source ./npu_env.sh -a='R2AttU_Net' +source ./npu_env.sh +a='R2AttU_Net' nohup python3 main_1p.py --model_type=$a --data_path="./dataset" > train_1p.log 2>&1 & \ No newline at end of file diff --git a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/train_8p.sh b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/train_8p.sh index cb10157b504bedda8054e1eb96c513d885030878..e8361c70a4b66a673719ae3d9698f4e99ea5d9c4 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/train_8p.sh +++ b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/train_8p.sh @@ -1,3 +1,3 @@ -source ./npu_env.sh -a='R2AttU_Net' +source ./npu_env.sh +a='R2AttU_Net' nohup python3 main_8p.py --model_type=$a --data_path="./dataset" > train_8p.log 2>&1 & \ No newline at end of file diff --git a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/utils.py b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/utils.py index 1f4811a91a7bf6b75ef4123523ad4085feecb5a1..e854750e9c6ff7f2a6c65790cb1695f903a16bf5 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/utils.py +++ b/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch/utils.py @@ -1,262 +1,262 @@ -# -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -#from __future__ import print_function -from collections import defaultdict, deque -import datetime -import time -import torch -import torch.distributed as dist - -import errno -import os - - -class SmoothedValue(object): - """Track a series of values and provide access to smoothed values over a - window or the global series average. - """ - - def __init__(self, window_size=20, fmt=None): - if fmt is None: - fmt = "{median:.4f} ({global_avg:.4f})" - self.deque = deque(maxlen=window_size) - self.total = 0.0 - self.count = 0 - self.fmt = fmt - - def update(self, value, n=1): - self.deque.append(value) - self.count += n - self.total += value * n - - def synchronize_between_processes(self, npu_device): - """ - Warning: does not synchronize the deque! - """ - if not is_dist_avail_and_initialized(): - return - t = torch.tensor([self.count, self.total], dtype=torch.float32, device=npu_device) - dist.barrier() - dist.all_reduce(t) - t = t.tolist() - self.count = int(t[0]) - self.total = t[1] - - @property - def median(self): - d = torch.tensor(list(self.deque)) - return d.median().item() - - @property - def avg(self): - d = torch.tensor(list(self.deque), dtype=torch.float32) - return d.mean().item() - - @property - def global_avg(self): - return self.total / self.count - - @property - def max(self): - return max(self.deque) - - @property - def value(self): - return self.deque[-1] - - def __str__(self): - return self.fmt.format( - median=self.median, - avg=self.avg, - global_avg=self.global_avg, - max=self.max, - value=self.value) - - -class MetricLogger(object): - def __init__(self, delimiter="\t"): - self.meters = defaultdict(SmoothedValue) - self.delimiter = delimiter - - def update(self, **kwargs): - for k, v in kwargs.items(): - if isinstance(v, torch.Tensor): - v = v.item() - assert isinstance(v, (float, int)) - self.meters[k].update(v) - - def __getattr__(self, attr): - if attr in self.meters: - return self.meters[attr] - if attr in self.__dict__: - return self.__dict__[attr] - raise AttributeError("'{}' object has no attribute '{}'".format( - type(self).__name__, attr)) - - def __str__(self): - loss_str = [] - for name, meter in self.meters.items(): - loss_str.append( - "{}: {}".format(name, str(meter)) - ) - return self.delimiter.join(loss_str) - - def synchronize_between_processes(self, device): - for meter in self.meters.values(): - meter.synchronize_between_processes(device) - - def add_meter(self, name, meter): - self.meters[name] = meter - - def log_every(self, iterable, args, header=None): - i = 0 - if not header: - header = '' - start_time = time.time() - end = time.time() - iter_time = SmoothedValue(fmt='{avg:.4f}') - data_time = SmoothedValue(fmt='{avg:.4f}') - space_fmt = ':' + str(len(str(len(iterable)))) + 'd' - if torch.npu.is_available(): - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}', - 'max mem: {memory:.0f}' - ]) - else: - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}' - ]) - MB = 1024.0 * 1024.0 - for obj in iterable: - data_time.update(time.time() - end) - yield obj - iter_time.update(time.time() - end) - if args.is_master_node and i % args.print_freq == 0: - eta_seconds = iter_time.global_avg * (len(iterable) - i) - eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) - if torch.npu.is_available(): - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time), - memory=0)) - else: - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time))) - i += 1 - end = time.time() - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('{} Total time: {}'.format(header, total_time_str)) - - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target[None]) - - res = [] - for k in topk: - correct_k = correct[:k].flatten().sum(dtype=torch.float32) - res.append(correct_k * (100.0 / batch_size)) - return res - - -def mkdir(path): - try: - os.makedirs(path) - except OSError as e: - if e.errno != errno.EEXIST: - raise - - -def setup_for_distributed(is_master): - """ - This function disables printing when not in master process - """ - import builtins as __builtin__ - builtin_print = __builtin__.print - - def print(*args, **kwargs): - force = kwargs.pop('force', False) - if is_master or force: - builtin_print(*args, **kwargs) - - __builtin__.print = print - - -def is_dist_avail_and_initialized(): - if not dist.is_available(): - return False - if not dist.is_initialized(): - return False - return True - - -def get_world_size(): - if not is_dist_avail_and_initialized(): - return 1 - return dist.get_world_size() - - -def get_rank(): - if not is_dist_avail_and_initialized(): - return 0 - return dist.get_rank() - - -def is_main_process(): - return get_rank() == 0 - - -def save_on_master(*args, **kwargs): - torch.save(*args, **kwargs) - - +# +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +#from __future__ import print_function +from collections import defaultdict, deque +import datetime +import time +import torch +import torch.distributed as dist + +import errno +import os + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self, npu_device): + """ + Warning: does not synchronize the deque! + """ + if not is_dist_avail_and_initialized(): + return + t = torch.tensor([self.count, self.total], dtype=torch.float32, device=npu_device) + dist.barrier() + dist.all_reduce(t) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, + avg=self.avg, + global_avg=self.global_avg, + max=self.max, + value=self.value) + + +class MetricLogger(object): + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError("'{}' object has no attribute '{}'".format( + type(self).__name__, attr)) + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append( + "{}: {}".format(name, str(meter)) + ) + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self, device): + for meter in self.meters.values(): + meter.synchronize_between_processes(device) + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, args, header=None): + i = 0 + if not header: + header = '' + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt='{avg:.4f}') + data_time = SmoothedValue(fmt='{avg:.4f}') + space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + if torch.npu.is_available(): + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}', + 'max mem: {memory:.0f}' + ]) + else: + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}' + ]) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if args.is_master_node and i % args.print_freq == 0: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch.npu.is_available(): + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time), + memory=0)) + else: + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time))) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('{} Total time: {}'.format(header, total_time_str)) + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target[None]) + + res = [] + for k in topk: + correct_k = correct[:k].flatten().sum(dtype=torch.float32) + res.append(correct_k * (100.0 / batch_size)) + return res + + +def mkdir(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def setup_for_distributed(is_master): + """ + This function disables printing when not in master process + """ + import builtins as __builtin__ + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop('force', False) + if is_master or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + return get_rank() == 0 + + +def save_on_master(*args, **kwargs): + torch.save(*args, **kwargs) + + diff --git a/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/LICENSE b/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/LICENSE index 4ba4fdcab3dbdb4d64ce4cccdfd990698b4d596a..a0e03103591c1158a839681f3c404ee9118b182e 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/LICENSE +++ b/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/LICENSE @@ -1,29 +1,29 @@ -BSD 3-Clause License - -Copyright (c) 2017, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +BSD 3-Clause License + +Copyright (c) 2017, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/evaluation.py b/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/evaluation.py index 51b3948ca54bf3f9750ef9fca0c171b9f6666d6b..e368a1864747b2fc86ce010161abad8c8abffab7 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/evaluation.py +++ b/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/evaluation.py @@ -1,115 +1,115 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import torch - -# SR : Segmentation Result -# GT : Ground Truth - -def get_accuracy(SR,GT,threshold=0.5): - SR = SR > threshold - GT = GT == torch.max(GT) - corr = torch.sum(SR==GT) - tensor_size = SR.size(0)*SR.size(1)*SR.size(2)*SR.size(3) - acc = float(corr)/float(tensor_size) - - return acc - -def get_sensitivity(SR,GT,threshold=0.5): - # Sensitivity == Recall - # TP : True Positive - # FN : False Negative - SR = SR > threshold - GT = GT == torch.max(GT) - TP = SR & GT - FN = (~SR) & GT - - SE = float(torch.sum(TP))/(float(torch.sum(TP)+torch.sum(FN)) + 1e-6) - - return SE - -def get_specificity(SR,GT,threshold=0.5): - - # TN : True Negative - # FP : False Positive - SR = SR > threshold - GT = GT == torch.max(GT) - TN = (~SR) & (~GT) - FP = SR & (~GT) - - SP = float(torch.sum(TN))/(float(torch.sum(TN)+torch.sum(FP)) + 1e-6) - - return SP - -def get_precision(SR,GT,threshold=0.5): - - # TP : True Positive - # FP : False Positive - SR = SR > threshold - GT = GT == torch.max(GT) - TP = SR & GT - FP = SR & (~GT) - - PC = float(torch.sum(TP))/(float(torch.sum(TP)+torch.sum(FP)) + 1e-6) - - return PC - -def get_F1(SR,GT,threshold=0.5): - # Sensitivity == Recall - SE = get_sensitivity(SR,GT,threshold=threshold) - PC = get_precision(SR,GT,threshold=threshold) - - F1 = 2*SE*PC/(SE+PC + 1e-6) - - return F1 - -def get_JS(SR,GT,threshold=0.5): - # JS : Jaccard similarity - SR = SR > threshold - GT = GT == torch.max(GT) - Inter = torch.sum((SR & GT)) - Union = torch.sum((SR | GT)) - - JS = float(Inter)/(float(Union) + 1e-6) - - return JS - -def get_DC(SR,GT,threshold=0.5): - # DC : Dice Coefficient - SR = SR > threshold - GT = GT == torch.max(GT) - Inter = torch.sum((SR & GT)) - DC = float(2*Inter)/(float(torch.sum(SR)+torch.sum(GT)) + 1e-6) - - return DC - - - +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import torch + +# SR : Segmentation Result +# GT : Ground Truth + +def get_accuracy(SR,GT,threshold=0.5): + SR = SR > threshold + GT = GT == torch.max(GT) + corr = torch.sum(SR==GT) + tensor_size = SR.size(0)*SR.size(1)*SR.size(2)*SR.size(3) + acc = float(corr)/float(tensor_size) + + return acc + +def get_sensitivity(SR,GT,threshold=0.5): + # Sensitivity == Recall + # TP : True Positive + # FN : False Negative + SR = SR > threshold + GT = GT == torch.max(GT) + TP = SR & GT + FN = (~SR) & GT + + SE = float(torch.sum(TP))/(float(torch.sum(TP)+torch.sum(FN)) + 1e-6) + + return SE + +def get_specificity(SR,GT,threshold=0.5): + + # TN : True Negative + # FP : False Positive + SR = SR > threshold + GT = GT == torch.max(GT) + TN = (~SR) & (~GT) + FP = SR & (~GT) + + SP = float(torch.sum(TN))/(float(torch.sum(TN)+torch.sum(FP)) + 1e-6) + + return SP + +def get_precision(SR,GT,threshold=0.5): + + # TP : True Positive + # FP : False Positive + SR = SR > threshold + GT = GT == torch.max(GT) + TP = SR & GT + FP = SR & (~GT) + + PC = float(torch.sum(TP))/(float(torch.sum(TP)+torch.sum(FP)) + 1e-6) + + return PC + +def get_F1(SR,GT,threshold=0.5): + # Sensitivity == Recall + SE = get_sensitivity(SR,GT,threshold=threshold) + PC = get_precision(SR,GT,threshold=threshold) + + F1 = 2*SE*PC/(SE+PC + 1e-6) + + return F1 + +def get_JS(SR,GT,threshold=0.5): + # JS : Jaccard similarity + SR = SR > threshold + GT = GT == torch.max(GT) + Inter = torch.sum((SR & GT)) + Union = torch.sum((SR | GT)) + + JS = float(Inter)/(float(Union) + 1e-6) + + return JS + +def get_DC(SR,GT,threshold=0.5): + # DC : Dice Coefficient + SR = SR > threshold + GT = GT == torch.max(GT) + Inter = torch.sum((SR & GT)) + DC = float(2*Inter)/(float(torch.sum(SR)+torch.sum(GT)) + 1e-6) + + return DC + + + diff --git a/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/modelzoo_level.txt b/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/run_to_onnx.sh b/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/run_to_onnx.sh index 13cead0d88c21771601d33a435ef26d296c68a48..89099c047a904ff0b97638ed13a3480da3dbb873 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/run_to_onnx.sh +++ b/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/run_to_onnx.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash -source env.sh - +#!/usr/bin/env bash +source env.sh + python3.7 pthtar2onx.py \ No newline at end of file diff --git a/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/utils.py b/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/utils.py index 1f4811a91a7bf6b75ef4123523ad4085feecb5a1..e854750e9c6ff7f2a6c65790cb1695f903a16bf5 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/utils.py +++ b/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch/utils.py @@ -1,262 +1,262 @@ -# -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -#from __future__ import print_function -from collections import defaultdict, deque -import datetime -import time -import torch -import torch.distributed as dist - -import errno -import os - - -class SmoothedValue(object): - """Track a series of values and provide access to smoothed values over a - window or the global series average. - """ - - def __init__(self, window_size=20, fmt=None): - if fmt is None: - fmt = "{median:.4f} ({global_avg:.4f})" - self.deque = deque(maxlen=window_size) - self.total = 0.0 - self.count = 0 - self.fmt = fmt - - def update(self, value, n=1): - self.deque.append(value) - self.count += n - self.total += value * n - - def synchronize_between_processes(self, npu_device): - """ - Warning: does not synchronize the deque! - """ - if not is_dist_avail_and_initialized(): - return - t = torch.tensor([self.count, self.total], dtype=torch.float32, device=npu_device) - dist.barrier() - dist.all_reduce(t) - t = t.tolist() - self.count = int(t[0]) - self.total = t[1] - - @property - def median(self): - d = torch.tensor(list(self.deque)) - return d.median().item() - - @property - def avg(self): - d = torch.tensor(list(self.deque), dtype=torch.float32) - return d.mean().item() - - @property - def global_avg(self): - return self.total / self.count - - @property - def max(self): - return max(self.deque) - - @property - def value(self): - return self.deque[-1] - - def __str__(self): - return self.fmt.format( - median=self.median, - avg=self.avg, - global_avg=self.global_avg, - max=self.max, - value=self.value) - - -class MetricLogger(object): - def __init__(self, delimiter="\t"): - self.meters = defaultdict(SmoothedValue) - self.delimiter = delimiter - - def update(self, **kwargs): - for k, v in kwargs.items(): - if isinstance(v, torch.Tensor): - v = v.item() - assert isinstance(v, (float, int)) - self.meters[k].update(v) - - def __getattr__(self, attr): - if attr in self.meters: - return self.meters[attr] - if attr in self.__dict__: - return self.__dict__[attr] - raise AttributeError("'{}' object has no attribute '{}'".format( - type(self).__name__, attr)) - - def __str__(self): - loss_str = [] - for name, meter in self.meters.items(): - loss_str.append( - "{}: {}".format(name, str(meter)) - ) - return self.delimiter.join(loss_str) - - def synchronize_between_processes(self, device): - for meter in self.meters.values(): - meter.synchronize_between_processes(device) - - def add_meter(self, name, meter): - self.meters[name] = meter - - def log_every(self, iterable, args, header=None): - i = 0 - if not header: - header = '' - start_time = time.time() - end = time.time() - iter_time = SmoothedValue(fmt='{avg:.4f}') - data_time = SmoothedValue(fmt='{avg:.4f}') - space_fmt = ':' + str(len(str(len(iterable)))) + 'd' - if torch.npu.is_available(): - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}', - 'max mem: {memory:.0f}' - ]) - else: - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}' - ]) - MB = 1024.0 * 1024.0 - for obj in iterable: - data_time.update(time.time() - end) - yield obj - iter_time.update(time.time() - end) - if args.is_master_node and i % args.print_freq == 0: - eta_seconds = iter_time.global_avg * (len(iterable) - i) - eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) - if torch.npu.is_available(): - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time), - memory=0)) - else: - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time))) - i += 1 - end = time.time() - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('{} Total time: {}'.format(header, total_time_str)) - - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target[None]) - - res = [] - for k in topk: - correct_k = correct[:k].flatten().sum(dtype=torch.float32) - res.append(correct_k * (100.0 / batch_size)) - return res - - -def mkdir(path): - try: - os.makedirs(path) - except OSError as e: - if e.errno != errno.EEXIST: - raise - - -def setup_for_distributed(is_master): - """ - This function disables printing when not in master process - """ - import builtins as __builtin__ - builtin_print = __builtin__.print - - def print(*args, **kwargs): - force = kwargs.pop('force', False) - if is_master or force: - builtin_print(*args, **kwargs) - - __builtin__.print = print - - -def is_dist_avail_and_initialized(): - if not dist.is_available(): - return False - if not dist.is_initialized(): - return False - return True - - -def get_world_size(): - if not is_dist_avail_and_initialized(): - return 1 - return dist.get_world_size() - - -def get_rank(): - if not is_dist_avail_and_initialized(): - return 0 - return dist.get_rank() - - -def is_main_process(): - return get_rank() == 0 - - -def save_on_master(*args, **kwargs): - torch.save(*args, **kwargs) - - +# +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +#from __future__ import print_function +from collections import defaultdict, deque +import datetime +import time +import torch +import torch.distributed as dist + +import errno +import os + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self, npu_device): + """ + Warning: does not synchronize the deque! + """ + if not is_dist_avail_and_initialized(): + return + t = torch.tensor([self.count, self.total], dtype=torch.float32, device=npu_device) + dist.barrier() + dist.all_reduce(t) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, + avg=self.avg, + global_avg=self.global_avg, + max=self.max, + value=self.value) + + +class MetricLogger(object): + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError("'{}' object has no attribute '{}'".format( + type(self).__name__, attr)) + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append( + "{}: {}".format(name, str(meter)) + ) + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self, device): + for meter in self.meters.values(): + meter.synchronize_between_processes(device) + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, args, header=None): + i = 0 + if not header: + header = '' + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt='{avg:.4f}') + data_time = SmoothedValue(fmt='{avg:.4f}') + space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + if torch.npu.is_available(): + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}', + 'max mem: {memory:.0f}' + ]) + else: + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}' + ]) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if args.is_master_node and i % args.print_freq == 0: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch.npu.is_available(): + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time), + memory=0)) + else: + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time))) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('{} Total time: {}'.format(header, total_time_str)) + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target[None]) + + res = [] + for k in topk: + correct_k = correct[:k].flatten().sum(dtype=torch.float32) + res.append(correct_k * (100.0 / batch_size)) + return res + + +def mkdir(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def setup_for_distributed(is_master): + """ + This function disables printing when not in master process + """ + import builtins as __builtin__ + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop('force', False) + if is_master or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + return get_rank() == 0 + + +def save_on_master(*args, **kwargs): + torch.save(*args, **kwargs) + + diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/bert_base_config.json b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/bert_base_config.json index 3a28c2e64827d9ca3937da7ebb0ab5461aa69023..d794ee04ccfc69433c2ecb8979fb536607865245 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/bert_base_config.json +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/bert_base_config.json @@ -1,13 +1,13 @@ -{ - "attention_probs_dropout_prob": 0.1, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 768, - "initializer_range": 0.02, - "intermediate_size": 3072, - "max_position_embeddings": 512, - "num_attention_heads": 12, - "num_hidden_layers": 12, - "type_vocab_size": 2, - "vocab_size": 30522 +{ + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "max_position_embeddings": 512, + "num_attention_heads": 12, + "num_hidden_layers": 12, + "type_vocab_size": 2, + "vocab_size": 30522 } \ No newline at end of file diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/run_squad.py index bd1a578b0cebeedfb1be8d89db5c5bf846c8e303..c26a387a79e403206502396a179193b1060ae8af 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/run_squad.py +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/run_squad.py @@ -1,1282 +1,1282 @@ -# coding=utf-8 -# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. -# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Run BERT on SQuAD.""" - -from __future__ import absolute_import, division, print_function - -import argparse -import collections -import json -import logging -import math -import os -import random -import sys -from io import open - -import numpy as np -import torch -from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, - TensorDataset) -from torch.utils.data.distributed import DistributedSampler -# from tqdm import tqdm, trange - -from apex import amp -from schedulers import LinearWarmUpScheduler -from file_utils import PYTORCH_PRETRAINED_BERT_CACHE -import modeling -from optimization import BertAdam, warmup_linear -from tokenization import (BasicTokenizer, BertTokenizer, whitespace_tokenize) -from utils import is_main_process, format_step -import dllogger, time -from apex.optimizers import npu_fused_bert_adam, NpuFusedBertAdam - -# torch._C._jit_set_profiling_mode(False) -# torch._C._jit_set_profiling_executor(False) - -if sys.version_info[0] == 2: - import cPickle as pickle -else: - import pickle - -logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', - datefmt='%m/%d/%Y %H:%M:%S', - level=logging.INFO) -logger = logging.getLogger(__name__) - - -class NpuFusedBertAdamV2(NpuFusedBertAdam): - def _group_step(self, group_index): - group = self.param_groups[group_index] - - beta1, beta2 = group['b1'], group['b2'] - - stash = self._amp_stash - combined_group_params = stash.combined_params_indexed_by_group[group_index] - combined_group_grads = stash.combined_grads_indexed_by_group[group_index] - combined_group_param_states = stash.combined_param_states_indexed_by_group[group_index] - - for combined_param, combined_grad, combined_param_state in zip(combined_group_params, combined_group_grads, - combined_group_param_states): - if combined_param is None or combined_grad is None: - continue - exp_avg, exp_avg_sq = combined_param_state['exp_avg'], combined_param_state['exp_avg_sq'] - if group['t_total'] != -1: - scheduler_fct = npu_fused_bert_adam.SCHEDULES[group['schedule']] - lr_scheduled = group['lr'] * scheduler_fct(combined_param_state['step'] / group['t_total'], - group['warmup']) - else: - lr_scheduled = group['lr'] - combined_param.data, exp_avg, exp_avg_sq = torch.npu_bert_apply_adam(combined_param.data, exp_avg, - exp_avg_sq, lr_scheduled, beta1, beta2, - group['e'], combined_grad.data, - group['max_grad_norm'], 0, - group['weight_decay']) - combined_param_state['step'] += 1 - - -class SquadExample(object): - """ - A single training/test example for the Squad dataset. - For examples without an answer, the start and end position are -1. - """ - - def __init__(self, - qas_id, - question_text, - doc_tokens, - orig_answer_text=None, - start_position=None, - end_position=None, - is_impossible=None): - self.qas_id = qas_id - self.question_text = question_text - self.doc_tokens = doc_tokens - self.orig_answer_text = orig_answer_text - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - - def __str__(self): - return self.__repr__() - - def __repr__(self): - s = "" - s += "qas_id: %s" % (self.qas_id) - s += ", question_text: %s" % ( - self.question_text) - s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens)) - if self.start_position: - s += ", start_position: %d" % (self.start_position) - if self.end_position: - s += ", end_position: %d" % (self.end_position) - if self.is_impossible: - s += ", is_impossible: %r" % (self.is_impossible) - return s - - -class InputFeatures(object): - """A single set of features of data.""" - - def __init__(self, - unique_id, - example_index, - doc_span_index, - tokens, - token_to_orig_map, - token_is_max_context, - input_ids, - input_mask, - segment_ids, - start_position=None, - end_position=None, - is_impossible=None): - self.unique_id = unique_id - self.example_index = example_index - self.doc_span_index = doc_span_index - self.tokens = tokens - self.token_to_orig_map = token_to_orig_map - self.token_is_max_context = token_is_max_context - self.input_ids = input_ids - self.input_mask = input_mask - self.segment_ids = segment_ids - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - - -def read_squad_examples(input_file, is_training, version_2_with_negative): - """Read a SQuAD json file into a list of SquadExample.""" - with open(input_file, "r", encoding='utf-8') as reader: - input_data = json.load(reader)["data"] - - def is_whitespace(c): - if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: - return True - return False - - examples = [] - for entry in input_data: - for paragraph in entry["paragraphs"]: - paragraph_text = paragraph["context"] - doc_tokens = [] - char_to_word_offset = [] - prev_is_whitespace = True - for c in paragraph_text: - if is_whitespace(c): - prev_is_whitespace = True - else: - if prev_is_whitespace: - doc_tokens.append(c) - else: - doc_tokens[-1] += c - prev_is_whitespace = False - char_to_word_offset.append(len(doc_tokens) - 1) - - for qa in paragraph["qas"]: - qas_id = qa["id"] - question_text = qa["question"] - start_position = None - end_position = None - orig_answer_text = None - is_impossible = False - if is_training: - if version_2_with_negative: - is_impossible = qa["is_impossible"] - if (len(qa["answers"]) != 1) and (not is_impossible): - raise ValueError( - "For training, each question should have exactly 1 answer.") - if not is_impossible: - answer = qa["answers"][0] - orig_answer_text = answer["text"] - answer_offset = answer["answer_start"] - answer_length = len(orig_answer_text) - start_position = char_to_word_offset[answer_offset] - end_position = char_to_word_offset[answer_offset + answer_length - 1] - # Only add answers where the text can be exactly recovered from the - # document. If this CAN'T happen it's likely due to weird Unicode - # stuff so we will just skip the example. - # - # Note that this means for training mode, every example is NOT - # guaranteed to be preserved. - actual_text = " ".join(doc_tokens[start_position:(end_position + 1)]) - cleaned_answer_text = " ".join( - whitespace_tokenize(orig_answer_text)) - if actual_text.find(cleaned_answer_text) == -1: - logger.warning("Could not find answer: '%s' vs. '%s'", - actual_text, cleaned_answer_text) - continue - else: - start_position = -1 - end_position = -1 - orig_answer_text = "" - - example = SquadExample( - qas_id=qas_id, - question_text=question_text, - doc_tokens=doc_tokens, - orig_answer_text=orig_answer_text, - start_position=start_position, - end_position=end_position, - is_impossible=is_impossible) - examples.append(example) - return examples - - -def convert_examples_to_features(examples, tokenizer, max_seq_length, - doc_stride, max_query_length, is_training): - """Loads a data file into a list of `InputBatch`s.""" - - unique_id = 1000000000 - - features = [] - for (example_index, example) in enumerate(examples): - query_tokens = tokenizer.tokenize(example.question_text) - - if len(query_tokens) > max_query_length: - query_tokens = query_tokens[0:max_query_length] - - tok_to_orig_index = [] - orig_to_tok_index = [] - all_doc_tokens = [] - for (i, token) in enumerate(example.doc_tokens): - orig_to_tok_index.append(len(all_doc_tokens)) - sub_tokens = tokenizer.tokenize(token) - for sub_token in sub_tokens: - tok_to_orig_index.append(i) - all_doc_tokens.append(sub_token) - - tok_start_position = None - tok_end_position = None - if is_training and example.is_impossible: - tok_start_position = -1 - tok_end_position = -1 - if is_training and not example.is_impossible: - tok_start_position = orig_to_tok_index[example.start_position] - if example.end_position < len(example.doc_tokens) - 1: - tok_end_position = orig_to_tok_index[example.end_position + 1] - 1 - else: - tok_end_position = len(all_doc_tokens) - 1 - (tok_start_position, tok_end_position) = _improve_answer_span( - all_doc_tokens, tok_start_position, tok_end_position, tokenizer, - example.orig_answer_text) - - # The -3 accounts for [CLS], [SEP] and [SEP] - max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 - - # We can have documents that are longer than the maximum sequence length. - # To deal with this we do a sliding window approach, where we take chunks - # of the up to our max length with a stride of `doc_stride`. - _DocSpan = collections.namedtuple( # pylint: disable=invalid-name - "DocSpan", ["start", "length"]) - doc_spans = [] - start_offset = 0 - while start_offset < len(all_doc_tokens): - length = len(all_doc_tokens) - start_offset - if length > max_tokens_for_doc: - length = max_tokens_for_doc - doc_spans.append(_DocSpan(start=start_offset, length=length)) - if start_offset + length == len(all_doc_tokens): - break - start_offset += min(length, doc_stride) - - for (doc_span_index, doc_span) in enumerate(doc_spans): - tokens = [] - token_to_orig_map = {} - token_is_max_context = {} - segment_ids = [] - tokens.append("[CLS]") - segment_ids.append(0) - for token in query_tokens: - tokens.append(token) - segment_ids.append(0) - tokens.append("[SEP]") - segment_ids.append(0) - - for i in range(doc_span.length): - split_token_index = doc_span.start + i - token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] - - is_max_context = _check_is_max_context(doc_spans, doc_span_index, - split_token_index) - token_is_max_context[len(tokens)] = is_max_context - tokens.append(all_doc_tokens[split_token_index]) - segment_ids.append(1) - tokens.append("[SEP]") - segment_ids.append(1) - - input_ids = tokenizer.convert_tokens_to_ids(tokens) - - # The mask has 1 for real tokens and 0 for padding tokens. Only real - # tokens are attended to. - input_mask = [1] * len(input_ids) - - # Zero-pad up to the sequence length. - while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(0) - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length - - start_position = None - end_position = None - if is_training and not example.is_impossible: - # For training, if our document chunk does not contain an annotation - # we throw it out, since there is nothing to predict. - doc_start = doc_span.start - doc_end = doc_span.start + doc_span.length - 1 - out_of_span = False - if not (tok_start_position >= doc_start and - tok_end_position <= doc_end): - out_of_span = True - if out_of_span: - start_position = 0 - end_position = 0 - else: - doc_offset = len(query_tokens) + 2 - start_position = tok_start_position - doc_start + doc_offset - end_position = tok_end_position - doc_start + doc_offset - if is_training and example.is_impossible: - start_position = 0 - end_position = 0 - - features.append( - InputFeatures( - unique_id=unique_id, - example_index=example_index, - doc_span_index=doc_span_index, - tokens=tokens, - token_to_orig_map=token_to_orig_map, - token_is_max_context=token_is_max_context, - input_ids=input_ids, - input_mask=input_mask, - segment_ids=segment_ids, - start_position=start_position, - end_position=end_position, - is_impossible=example.is_impossible)) - unique_id += 1 - - return features - - -def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, - orig_answer_text): - """Returns tokenized answer spans that better match the annotated answer.""" - - # The SQuAD annotations are character based. We first project them to - # whitespace-tokenized words. But then after WordPiece tokenization, we can - # often find a "better match". For example: - # - # Question: What year was John Smith born? - # Context: The leader was John Smith (1895-1943). - # Answer: 1895 - # - # The original whitespace-tokenized answer will be "(1895-1943).". However - # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match - # the exact answer, 1895. - # - # However, this is not always possible. Consider the following: - # - # Question: What country is the top exporter of electornics? - # Context: The Japanese electronics industry is the lagest in the world. - # Answer: Japan - # - # In this case, the annotator chose "Japan" as a character sub-span of - # the word "Japanese". Since our WordPiece tokenizer does not split - # "Japanese", we just use "Japanese" as the annotation. This is fairly rare - # in SQuAD, but does happen. - tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text)) - - for new_start in range(input_start, input_end + 1): - for new_end in range(input_end, new_start - 1, -1): - text_span = " ".join(doc_tokens[new_start:(new_end + 1)]) - if text_span == tok_answer_text: - return (new_start, new_end) - - return (input_start, input_end) - - -def _check_is_max_context(doc_spans, cur_span_index, position): - """Check if this is the 'max context' doc span for the token.""" - - # Because of the sliding window approach taken to scoring documents, a single - # token can appear in multiple documents. E.g. - # Doc: the man went to the store and bought a gallon of milk - # Span A: the man went to the - # Span B: to the store and bought - # Span C: and bought a gallon of - # ... - # - # Now the word 'bought' will have two scores from spans B and C. We only - # want to consider the score with "maximum context", which we define as - # the *minimum* of its left and right context (the *sum* of left and - # right context will always be the same, of course). - # - # In the example the maximum context for 'bought' would be span C since - # it has 1 left context and 3 right context, while span B has 4 left context - # and 0 right context. - best_score = None - best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): - end = doc_span.start + doc_span.length - 1 - if position < doc_span.start: - continue - if position > end: - continue - num_left_context = position - doc_span.start - num_right_context = end - position - score = min(num_left_context, num_right_context) + 0.01 * doc_span.length - if best_score is None or score > best_score: - best_score = score - best_span_index = span_index - - return cur_span_index == best_span_index - - -RawResult = collections.namedtuple("RawResult", - ["unique_id", "start_logits", "end_logits"]) - - -def get_answers(examples, features, results, args): - predictions = collections.defaultdict(list) #it is possible that one example corresponds to multiple features - Prediction = collections.namedtuple('Prediction', ['text', 'start_logit', 'end_logit']) - - if args.version_2_with_negative: - null_vals = collections.defaultdict(lambda: (float("inf"),0,0)) - for ex, feat, result in match_results(examples, features, results): - start_indices = _get_best_indices(result.start_logits, args.n_best_size) - end_indices = _get_best_indices(result.end_logits, args.n_best_size) - prelim_predictions = get_valid_prelim_predictions(start_indices, end_indices, feat, result, args) - prelim_predictions = sorted( - prelim_predictions, - key=lambda x: (x.start_logit + x.end_logit), - reverse=True) - if args.version_2_with_negative: - score = result.start_logits[0] + result.end_logits[0] - if score < null_vals[ex.qas_id][0]: - null_vals[ex.qas_id] = (score, result.start_logits[0], result.end_logits[0]) - - curr_predictions = [] - seen_predictions = [] - for pred in prelim_predictions: - if len(curr_predictions) == args.n_best_size: - break - if pred.start_index > 0: # this is a non-null prediction TODO: this probably is irrelevant - final_text = get_answer_text(ex, feat, pred, args) - if final_text in seen_predictions: - continue - else: - final_text = "" - - seen_predictions.append(final_text) - curr_predictions.append(Prediction(final_text, pred.start_logit, pred.end_logit)) - predictions[ex.qas_id] += curr_predictions - - #Add empty prediction - if args.version_2_with_negative: - for qas_id in predictions.keys(): - predictions[qas_id].append(Prediction('', - null_vals[ex.qas_id][1], - null_vals[ex.qas_id][2])) - - - nbest_answers = collections.defaultdict(list) - answers = {} - for qas_id, preds in predictions.items(): - nbest = sorted( - preds, - key=lambda x: (x.start_logit + x.end_logit), - reverse=True)[:args.n_best_size] - - # In very rare edge cases we could only have single null prediction. - # So we just create a nonce prediction in this case to avoid failure. - if not nbest: - nbest.append(Prediction(text="empty", start_logit=0.0, end_logit=0.0)) - - total_scores = [] - best_non_null_entry = None - for entry in nbest: - total_scores.append(entry.start_logit + entry.end_logit) - if not best_non_null_entry and entry.text: - best_non_null_entry = entry - probs = _compute_softmax(total_scores) - for (i, entry) in enumerate(nbest): - output = collections.OrderedDict() - output["text"] = entry.text - output["probability"] = probs[i] - output["start_logit"] = entry.start_logit - output["end_logit"] = entry.end_logit - nbest_answers[qas_id].append(output) - if args.version_2_with_negative: - score_diff = null_vals[qas_id][0] - best_non_null_entry.start_logit - best_non_null_entry.end_logit - if score_diff > args.null_score_diff_threshold: - answers[qas_id] = "" - else: - answers[qas_id] = best_non_null_entry.text - else: - answers[qas_id] = nbest_answers[qas_id][0]['text'] - - return answers, nbest_answers - -def get_answer_text(example, feature, pred, args): - tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] - orig_doc_start = feature.token_to_orig_map[pred.start_index] - orig_doc_end = feature.token_to_orig_map[pred.end_index] - orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] - tok_text = " ".join(tok_tokens) - - # De-tokenize WordPieces that have been split off. - tok_text = tok_text.replace(" ##", "") - tok_text = tok_text.replace("##", "") - - # Clean whitespace - tok_text = tok_text.strip() - tok_text = " ".join(tok_text.split()) - orig_text = " ".join(orig_tokens) - - final_text = get_final_text(tok_text, orig_text, args.do_lower_case, args.verbose_logging) - return final_text - -def get_valid_prelim_predictions(start_indices, end_indices, feature, result, args): - - _PrelimPrediction = collections.namedtuple( - "PrelimPrediction", - ["start_index", "end_index", "start_logit", "end_logit"]) - prelim_predictions = [] - for start_index in start_indices: - for end_index in end_indices: - if start_index >= len(feature.tokens): - continue - if end_index >= len(feature.tokens): - continue - if start_index not in feature.token_to_orig_map: - continue - if end_index not in feature.token_to_orig_map: - continue - if not feature.token_is_max_context.get(start_index, False): - continue - if end_index < start_index: - continue - length = end_index - start_index + 1 - if length > args.max_answer_length: - continue - prelim_predictions.append( - _PrelimPrediction( - start_index=start_index, - end_index=end_index, - start_logit=result.start_logits[start_index], - end_logit=result.end_logits[end_index])) - return prelim_predictions - -def match_results(examples, features, results): - unique_f_ids = set([f.unique_id for f in features]) - unique_r_ids = set([r.unique_id for r in results]) - matching_ids = unique_f_ids & unique_r_ids - features = [f for f in features if f.unique_id in matching_ids] - results = [r for r in results if r.unique_id in matching_ids] - features.sort(key=lambda x: x.unique_id) - results.sort(key=lambda x: x.unique_id) - - for f, r in zip(features, results): #original code assumes strict ordering of examples. TODO: rewrite this - yield examples[f.example_index], f, r - -def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): - """Project the tokenized prediction back to the original text.""" - - # When we created the data, we kept track of the alignment between original - # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So - # now `orig_text` contains the span of our original text corresponding to the - # span that we predicted. - # - # However, `orig_text` may contain extra characters that we don't want in - # our prediction. - # - # For example, let's say: - # pred_text = steve smith - # orig_text = Steve Smith's - # - # We don't want to return `orig_text` because it contains the extra "'s". - # - # We don't want to return `pred_text` because it's already been normalized - # (the SQuAD eval script also does punctuation stripping/lower casing but - # our tokenizer does additional normalization like stripping accent - # characters). - # - # What we really want to return is "Steve Smith". - # - # Therefore, we have to apply a semi-complicated alignment heruistic between - # `pred_text` and `orig_text` to get a character-to-charcter alignment. This - # can fail in certain cases in which case we just return `orig_text`. - - def _strip_spaces(text): - ns_chars = [] - ns_to_s_map = collections.OrderedDict() - for (i, c) in enumerate(text): - if c == " ": - continue - ns_to_s_map[len(ns_chars)] = i - ns_chars.append(c) - ns_text = "".join(ns_chars) - return (ns_text, ns_to_s_map) - - # We first tokenize `orig_text`, strip whitespace from the result - # and `pred_text`, and check if they are the same length. If they are - # NOT the same length, the heuristic has failed. If they are the same - # length, we assume the characters are one-to-one aligned. - - tokenizer = BasicTokenizer(do_lower_case=do_lower_case) - - tok_text = " ".join(tokenizer.tokenize(orig_text)) - - start_position = tok_text.find(pred_text) - if start_position == -1: - if verbose_logging: - logger.info( - "Unable to find text: '%s' in '%s'" % (pred_text, orig_text)) - return orig_text - end_position = start_position + len(pred_text) - 1 - - (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text) - (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text) - - if len(orig_ns_text) != len(tok_ns_text): - if verbose_logging: - logger.info("Length not equal after stripping spaces: '%s' vs '%s'", - orig_ns_text, tok_ns_text) - return orig_text - - # We then project the characters in `pred_text` back to `orig_text` using - # the character-to-character alignment. - tok_s_to_ns_map = {} - for (i, tok_index) in tok_ns_to_s_map.items(): - tok_s_to_ns_map[tok_index] = i - - orig_start_position = None - if start_position in tok_s_to_ns_map: - ns_start_position = tok_s_to_ns_map[start_position] - if ns_start_position in orig_ns_to_s_map: - orig_start_position = orig_ns_to_s_map[ns_start_position] - - if orig_start_position is None: - if verbose_logging: - logger.info("Couldn't map start position") - return orig_text - - orig_end_position = None - if end_position in tok_s_to_ns_map: - ns_end_position = tok_s_to_ns_map[end_position] - if ns_end_position in orig_ns_to_s_map: - orig_end_position = orig_ns_to_s_map[ns_end_position] - - if orig_end_position is None: - if verbose_logging: - logger.info("Couldn't map end position") - return orig_text - - output_text = orig_text[orig_start_position:(orig_end_position + 1)] - return output_text - - -def _get_best_indices(logits, n_best_size): - """Get the n-best logits from a list.""" - index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True) - - best_indices = [] - for i in range(len(index_and_score)): - if i >= n_best_size: - break - best_indices.append(index_and_score[i][0]) - return best_indices - - -def _compute_softmax(scores): - """Compute softmax probability over raw logits.""" - if not scores: - return [] - - max_score = None - for score in scores: - if max_score is None or score > max_score: - max_score = score - - exp_scores = [] - total_sum = 0.0 - for score in scores: - x = math.exp(score - max_score) - exp_scores.append(x) - total_sum += x - - probs = [] - for score in exp_scores: - probs.append(score / total_sum) - return probs - - - -# from apex.multi_tensor_apply import multi_tensor_applier -# class GradientClipper: -# """ -# Clips gradient norm of an iterable of parameters. -# """ -# def __init__(self, max_grad_norm): -# self.max_norm = max_grad_norm -# if multi_tensor_applier.available: -# import amp_C -# self._overflow_buf = torch.cuda.IntTensor([0]) -# self.multi_tensor_l2norm = amp_C.multi_tensor_l2norm -# self.multi_tensor_scale = amp_C.multi_tensor_scale -# else: -# raise RuntimeError('Gradient clipping requires cuda extensions') -# -# def step(self, parameters): -# l = [p.grad for p in parameters if p.grad is not None] -# total_norm, _ = multi_tensor_applier(self.multi_tensor_l2norm, self._overflow_buf, [l], False) -# total_norm = total_norm.item() -# if (total_norm == float('inf')): return -# clip_coef = self.max_norm / (total_norm + 1e-6) -# if clip_coef < 1: -# multi_tensor_applier(self.multi_tensor_scale, self._overflow_buf, [l, l], clip_coef) - - -def main(): - parser = argparse.ArgumentParser() - - ## Required parameters - parser.add_argument("--bert_model", default=None, type=str, required=True, - help="Bert pre-trained model selected in the list: bert-base-uncased, " - "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, " - "bert-base-multilingual-cased, bert-base-chinese.") - parser.add_argument("--output_dir", default=None, type=str, required=True, - help="The output directory where the model checkpoints and predictions will be written.") - parser.add_argument("--init_checkpoint", - default=None, - type=str, - required=True, - help="The checkpoint file from pretraining") - - ## Other parameters - parser.add_argument("--train_file", default=None, type=str, help="SQuAD json for training. E.g., train-v1.1.json") - parser.add_argument("--predict_file", default=None, type=str, - help="SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json") - parser.add_argument("--max_seq_length", default=384, type=int, - help="The maximum total input sequence length after WordPiece tokenization. Sequences " - "longer than this will be truncated, and sequences shorter than this will be padded.") - parser.add_argument("--doc_stride", default=128, type=int, - help="When splitting up a long document into chunks, how much stride to take between chunks.") - parser.add_argument("--max_query_length", default=64, type=int, - help="The maximum number of tokens for the question. Questions longer than this will " - "be truncated to this length.") - parser.add_argument("--do_train", action='store_true', help="Whether to run training.") - parser.add_argument("--do_predict", action='store_true', help="Whether to run eval on the dev set.") - parser.add_argument("--train_batch_size", default=32, type=int, help="Total batch size for training.") - parser.add_argument("--predict_batch_size", default=8, type=int, help="Total batch size for predictions.") - parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") - parser.add_argument("--num_train_epochs", default=3.0, type=float, - help="Total number of training epochs to perform.") - parser.add_argument("--max_steps", default=-1.0, type=float, - help="Total number of training steps to perform.") - parser.add_argument("--warmup_proportion", default=0.1, type=float, - help="Proportion of training to perform linear learning rate warmup for. E.g., 0.1 = 10%% " - "of training.") - parser.add_argument("--n_best_size", default=20, type=int, - help="The total number of n-best predictions to generate in the nbest_predictions.json " - "output file.") - parser.add_argument("--max_answer_length", default=30, type=int, - help="The maximum length of an answer that can be generated. This is needed because the start " - "and end predictions are not conditioned on one another.") - parser.add_argument("--verbose_logging", action='store_true', - help="If true, all of the warnings related to data processing will be printed. " - "A number of warnings are expected for a normal SQuAD evaluation.") - parser.add_argument("--no_cuda", - action='store_true', - help="Whether not to use CUDA when available") - parser.add_argument('--seed', - type=int, - default=42, - help="random seed for initialization") - parser.add_argument('--gradient_accumulation_steps', - type=int, - default=1, - help="Number of updates steps to accumulate before performing a backward/update pass.") - parser.add_argument("--do_lower_case", - action='store_true', - help="Whether to lower case the input text. True for uncased models, False for cased models.") - parser.add_argument("--local_rank", - type=int, - default=-1, - help="local_rank for distributed training on gpus") - parser.add_argument('--fp16', - default=False, - action='store_true', - help="Mixed precision training") - parser.add_argument('--amp', - default=False, - action='store_true', - help="Mixed precision training") - parser.add_argument('--loss_scale', - type=float, default=0, - help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n" - "0 (default value): dynamic loss scaling.\n" - "Positive power of 2: static loss scaling value.\n") - parser.add_argument('--version_2_with_negative', - action='store_true', - help='If true, the SQuAD examples contain some that do not have an answer.') - parser.add_argument('--null_score_diff_threshold', - type=float, default=0.0, - help="If null_score - best_non_null is greater than the threshold predict null.") - parser.add_argument('--vocab_file', - type=str, default=None, required=True, - help="Vocabulary mapping/file BERT was pretrainined on") - parser.add_argument("--config_file", - default=None, - type=str, - required=True, - help="The BERT model config") - parser.add_argument('--log_freq', - type=int, default=1, - help='frequency of logging loss.') - parser.add_argument('--json-summary', type=str, default="results/dllogger.json", - help='If provided, the json summary will be written to' - 'the specified file.') - parser.add_argument("--eval_script", - help="Script to evaluate squad predictions", - default="evaluate.py", - type=str) - parser.add_argument("--do_eval", - action='store_true', - help="Whether to use evaluate accuracy of predictions") - parser.add_argument("--use_env", - action='store_true', - help="Whether to read local rank from ENVVAR") - parser.add_argument('--skip_checkpoint', - default=False, - action='store_true', - help="Whether to save checkpoints") - parser.add_argument('--disable-progress-bar', - default=True, - action='store_true', - help='Disable tqdm progress bar') - parser.add_argument("--skip_cache", - default=False, - action='store_true', - help="Whether to cache train features") - parser.add_argument("--cache_dir", - default=None, - type=str, - help="Location to cache train feaures. Will default to the dataset directory") - parser.add_argument('--use_npu', - default=True, - action='store_true', - help='whether to use npu') - parser.add_argument('--npu_id', - type=int, default=0, - help='npu device id.') - parser.add_argument('--num_npu', - type=int, default=1, - help='number of npu devices to use.') - parser.add_argument("--addr", - default=None, - type=str, - help="addr used for distributed training") - - args = parser.parse_args() - args.fp16 = args.fp16 or args.amp - - if args.local_rank == -1 or args.no_cuda: - if args.use_npu: - torch.npu.set_device("npu:%d" % args.npu_id) - device = torch.device("npu:%d" % args.npu_id) - n_npu = 1 # this is the device number of one training process, usually one - else: - device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - n_npu = torch.cuda.device_count() - else: - if args.use_npu: - os.environ['MASTER_ADDR'] = args.addr - os.environ['MASTER_PORT'] = '29668' - torch.npu.set_device("npu:%d" % args.local_rank) - device = torch.device("npu:%d" % args.local_rank) - torch.distributed.init_process_group(backend='hccl', world_size=8, rank=args.local_rank) - n_npu = 1 - else: - torch.cuda.set_device(args.local_rank) - device = torch.device("cuda", args.local_rank) - # Initializes the distributed backend which will take care of sychronizing nodes/GPUs - torch.distributed.init_process_group(backend='nccl', init_method='env://') - n_npu = 1 - - if is_main_process(): - dllogger.init(backends=[dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, - filename=args.json_summary), - dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE, step_format=format_step)]) - else: - dllogger.init(backends=[]) - - # print("device: {} n_npu: {}, distributed training: {}, 16-bits training: {}".format( - # device, n_npu, bool(args.local_rank != -1), args.fp16)) - print("train on device {}, rank {}".format(device, args.local_rank)) - - dllogger.log(step="PARAMETER", data={"Config": [str(args)]}) - - if args.gradient_accumulation_steps < 1: - raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( - args.gradient_accumulation_steps)) - - args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps - - random.seed(args.seed) - np.random.seed(args.seed) - torch.manual_seed(args.seed) - dllogger.log(step="PARAMETER", data={"SEED": args.seed}) - - #if n_npu > 0: - # torch.cuda.manual_seed_all(args.seed) - - if not args.do_train and not args.do_predict: - raise ValueError("At least one of `do_train` or `do_predict` must be True.") - - if args.do_train: - if not args.train_file: - raise ValueError( - "If `do_train` is True, then `train_file` must be specified.") - if args.do_predict: - if not args.predict_file: - raise ValueError( - "If `do_predict` is True, then `predict_file` must be specified.") - - if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and os.listdir(args.output_dir)!=['logfile.txt']: - print("WARNING: Output directory {} already exists and is not empty.".format(args.output_dir), os.listdir(args.output_dir)) - if not os.path.exists(args.output_dir) and is_main_process(): - os.makedirs(args.output_dir) - - tokenizer = BertTokenizer(args.vocab_file, do_lower_case=args.do_lower_case, max_len=512) # for bert large - # tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) - - train_examples = None - num_train_optimization_steps = None - if args.do_train: - train_examples = read_squad_examples( - input_file=args.train_file, is_training=True, version_2_with_negative=args.version_2_with_negative) - num_train_optimization_steps = int( - len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps) * args.num_train_epochs - if args.local_rank != -1: - num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size() - - # Prepare model - config = modeling.BertConfig.from_json_file(args.config_file) - # Padding for divisibility by 8 - if config.vocab_size % 8 != 0: - config.vocab_size += 8 - (config.vocab_size % 8) - - - modeling.ACT2FN["bias_gelu"] = modeling.bias_gelu_training - model = modeling.BertForQuestionAnswering(config) - # model = modeling.BertForQuestionAnswering.from_pretrained(args.bert_model, - # cache_dir=os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(args.local_rank))) - dllogger.log(step="PARAMETER", data={"loading_checkpoint": True}) - model.load_state_dict(torch.load(args.init_checkpoint, map_location='cpu')["model"], strict=False) - dllogger.log(step="PARAMETER", data={"loaded_checkpoint": True}) - model.to(device) - num_weights = sum([p.numel() for p in model.parameters() if p.requires_grad]) - dllogger.log(step="PARAMETER", data={"model_weights_num":num_weights}) - - # Prepare optimizer - param_optimizer = list(model.named_parameters()) - - # hack to remove pooler, which is not used - # thus it produce None grad that break apex - param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]] - - no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] - optimizer_grouped_parameters = [ - {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, - {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} - ] - if args.do_train: - if args.fp16: - # try: - # from apex.optimizers import NpuFusedAdam - # except ImportError: - # raise ImportError( - # "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") - # optimizer = NpuFusedAdam(optimizer_grouped_parameters, - # lr=args.learning_rate) - - optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters, - lr=args.learning_rate, - warmup=args.warmup_proportion, - t_total=num_train_optimization_steps) - - if args.loss_scale == 0: - model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False, - loss_scale="dynamic") - else: - model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False, loss_scale=args.loss_scale, combine_grad=True) - if args.do_train: - scheduler = LinearWarmUpScheduler(optimizer, warmup=args.warmup_proportion, total_steps=num_train_optimization_steps) - - else: - optimizer = BertAdam(optimizer_grouped_parameters, - lr=args.learning_rate, - warmup=args.warmup_proportion, - t_total=num_train_optimization_steps) - - #model.qa_outputs.bias.data = model.qa_outputs.bias.data.float() # for ascend910 special - if args.local_rank != -1: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], broadcast_buffers=False, find_unused_parameters=True) - elif n_npu > 1: - model = torch.nn.DataParallel(model) - - global_step = 0 - if args.do_train: - print("Doing train...") - if args.cache_dir is None: - cached_train_features_file = args.train_file + '_{0}_{1}_{2}_{3}'.format( - list(filter(None, args.bert_model.split('/'))).pop(), str(args.max_seq_length), str(args.doc_stride), - str(args.max_query_length)) - else: - cached_train_features_file = args.cache_dir.strip('/') + '/' + args.train_file.split('/')[-1] + '_{0}_{1}_{2}_{3}'.format( - list(filter(None, args.bert_model.split('/'))).pop(), str(args.max_seq_length), str(args.doc_stride), - str(args.max_query_length)) - - train_features = None - try: - with open(cached_train_features_file, "rb") as reader: - train_features = pickle.load(reader) - except: - train_features = convert_examples_to_features( - examples=train_examples, - tokenizer=tokenizer, - max_seq_length=args.max_seq_length, - doc_stride=args.doc_stride, - max_query_length=args.max_query_length, - is_training=True) - - if not args.skip_cache and is_main_process(): - dllogger.log(step="PARAMETER", data={"Cached_train features_file": cached_train_features_file}) - with open(cached_train_features_file, "wb") as writer: - pickle.dump(train_features, writer) - - dllogger.log(step="PARAMETER", data={"train_start": True}) - dllogger.log(step="PARAMETER", data={"training_samples": len(train_examples)}) - dllogger.log(step="PARAMETER", data={"training_features": len(train_features)}) - dllogger.log(step="PARAMETER", data={"train_batch_size":args.train_batch_size}) - dllogger.log(step="PARAMETER", data={"steps":num_train_optimization_steps}) - # all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) - # all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) - # all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) - # all_start_positions = torch.tensor([f.start_position for f in train_features], dtype=torch.long) - # all_end_positions = torch.tensor([f.end_position for f in train_features], dtype=torch.long) - - all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.int32) - all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.int32) - all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.int32) - all_start_positions = torch.tensor([f.start_position for f in train_features], dtype=torch.int32) - all_end_positions = torch.tensor([f.end_position for f in train_features], dtype=torch.int32) - - train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, - all_start_positions, all_end_positions) - if args.local_rank == -1: - train_sampler = RandomSampler(train_data) - else: - train_sampler = DistributedSampler(train_data) - train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size * n_npu, num_workers=2, pin_memory=True, drop_last=True) - - model.train() - #gradClipper = GradientClipper(max_grad_norm=1.0) - final_loss = None - train_start = time.time() - for epoch in range(int(args.num_train_epochs)): - #train_iter = tqdm(train_dataloader, desc="Iteration", disable=args.disable_progress_bar) if is_main_process() else train_dataloader - train_iter = train_dataloader - step_start_time = time.time() - for step, batch in enumerate(train_iter): - torch.npu.enable_graph_mode() - # Terminate early for benchmarking - data_time = time.time() - step_start_time - if args.max_steps > 0 and global_step > args.max_steps: - break - - if n_npu == 1: - batch = tuple(t.to(device, non_blocking=True) for t in batch) # multi-gpu does scattering it-self - input_ids, input_mask, segment_ids, start_positions, end_positions = batch - start_logits, end_logits = model(input_ids, segment_ids, input_mask) - # If we are on multi-GPU, split add a dimension - if len(start_positions.size()) > 1: - start_positions = start_positions.squeeze(-1) - if len(end_positions.size()) > 1: - end_positions = end_positions.squeeze(-1) - # sometimes the start/end positions are outside our model inputs, we ignore these terms - ignored_index = start_logits.size(1) - start_positions.clamp_(0, ignored_index) - end_positions.clamp_(0, ignored_index) - - loss_fct = torch.nn.CrossEntropyLoss(ignore_index=ignored_index) - start_loss = loss_fct(start_logits, start_positions) - end_loss = loss_fct(end_logits, end_positions) - loss = (start_loss + end_loss) / 2 - if n_npu > 1: - loss = loss.mean() # mean() to average on multi-gpu. - if args.gradient_accumulation_steps > 1: - loss = loss / args.gradient_accumulation_steps - if args.fp16: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - - - if (step + 1) % args.gradient_accumulation_steps == 0: - if args.fp16 : - # modify learning rate with special warm up for BERT which FusedAdam doesn't do - scheduler.step() - optimizer.step() - optimizer.zero_grad() - global_step += 1 - torch.npu.launch_graph() - - final_loss = 0.0 - step_time = time.time() - step_start_time - if step % args.log_freq == 0: - # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss, - # "learning_rate": optimizer.param_groups[0]['lr']}) - dllogger.log(step=(str(epoch) + '/' + str(int(args.num_train_epochs)), - str(global_step) + '/' + str(int(num_train_optimization_steps)),), - data={"step_time": round(step_time, 4), "data_time": round(data_time, 4), - "step_loss": round(final_loss, 4), - "learning_rate": round(optimizer.param_groups[0]['lr'], 10)}) - step_start_time = time.time() - torch.npu.disable_graph_mode() - - time_to_train = time.time() - train_start - - if args.do_train and is_main_process() and not args.skip_checkpoint: - # Save a trained model and the associated configuration - model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self - output_model_file = os.path.join(args.output_dir, modeling.WEIGHTS_NAME) - torch.save({"model":model_to_save.state_dict()}, output_model_file) - output_config_file = os.path.join(args.output_dir, modeling.CONFIG_NAME) - with open(output_config_file, 'w') as f: - f.write(model_to_save.config.to_json_string()) - - if args.do_predict and (args.local_rank == -1 or is_main_process()): - print("Doing predict...") - if not args.do_train and args.fp16: - model.half() - - eval_examples = read_squad_examples( - input_file=args.predict_file, is_training=False, version_2_with_negative=args.version_2_with_negative) - eval_features = convert_examples_to_features( - examples=eval_examples, - tokenizer=tokenizer, - max_seq_length=args.max_seq_length, - doc_stride=args.doc_stride, - max_query_length=args.max_query_length, - is_training=False) - - dllogger.log(step="PARAMETER", data={"infer_start": True}) - dllogger.log(step="PARAMETER", data={"eval_samples": len(eval_examples)}) - dllogger.log(step="PARAMETER", data={"eval_features": len(eval_features)}) - dllogger.log(step="PARAMETER", data={"predict_batch_size": args.predict_batch_size}) - - all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) - all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) - all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) - # all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) - all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.int32) - eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index) - # Run prediction for full data - eval_sampler = SequentialSampler(eval_data) - eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.predict_batch_size) - - infer_start = time.time() - model.eval() - all_results = [] - dllogger.log(step="PARAMETER", data={"eval_start": True}) - # for input_ids, input_mask, segment_ids, example_indices in tqdm(eval_dataloader, desc="Evaluating", disable=args.disable_progress_bar): - for input_ids, input_mask, segment_ids, example_indices in eval_dataloader: - if len(all_results) % 1000 == 0: - dllogger.log(step="PARAMETER", data={"sample_number": len(all_results)}) - input_ids = input_ids.to(device) - input_mask = input_mask.to(device) - segment_ids = segment_ids.to(device) - with torch.no_grad(): - batch_start_logits, batch_end_logits = model(input_ids, segment_ids, input_mask) - for i, example_index in enumerate(example_indices): - start_logits = batch_start_logits[i].detach().cpu().tolist() - end_logits = batch_end_logits[i].detach().cpu().tolist() - eval_feature = eval_features[example_index.item()] - unique_id = int(eval_feature.unique_id) - all_results.append(RawResult(unique_id=unique_id, - start_logits=start_logits, - end_logits=end_logits)) - - time_to_infer = time.time() - infer_start - output_prediction_file = os.path.join(args.output_dir, "predictions.json") - output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json") - - answers, nbest_answers = get_answers(eval_examples, eval_features, all_results, args) - with open(output_prediction_file, "w") as f: - f.write(json.dumps(answers, indent=4) + "\n") - with open(output_nbest_file, "w") as f: - f.write(json.dumps(nbest_answers, indent=4) + "\n") - - # output_null_log_odds_file = os.path.join(args.output_dir, "null_odds.json") - # write_predictions(eval_examples, eval_features, all_results, - # args.n_best_size, args.max_answer_length, - # args.do_lower_case, output_prediction_file, - # output_nbest_file, output_null_log_odds_file, args.verbose_logging, - # args.version_2_with_negative, args.null_score_diff_threshold) - - if args.do_eval and is_main_process(): - import sys - import subprocess - eval_out = subprocess.check_output([sys.executable, args.eval_script, - args.predict_file, args.output_dir + "/predictions.json"]) - scores = str(eval_out).strip() - exact_match = float(scores.split(":")[1].split(",")[0]) - f1 = float(scores.split(":")[2].split("}")[0]) - - if args.do_train: - gpu_count = n_npu - if torch.distributed.is_initialized(): - gpu_count = torch.distributed.get_world_size() - - if args.max_steps == -1: - dllogger.log(step=tuple(), data={"e2e_train_time": time_to_train, - "training_sequences_per_second": len(train_features) * args.num_train_epochs / time_to_train, - "final_loss": final_loss}) - else: - dllogger.log(step=tuple(), data={"e2e_train_time": time_to_train, - "training_sequences_per_second": args.train_batch_size * args.gradient_accumulation_steps \ - * args.max_steps * gpu_count / time_to_train, - "final_loss": final_loss}) - if args.do_predict and is_main_process(): - dllogger.log(step=tuple(), data={"e2e_inference_time": time_to_infer, - "inference_sequences_per_second": len(eval_features) / time_to_infer}) - if args.do_eval and is_main_process(): - dllogger.log(step=tuple(), data={"exact_match": exact_match, "F1": f1}) - -if __name__ == "__main__": - option = {} - option["ACL_OP_SELECT_IMPL_MODE"] = "high_performance" - option["ACL_OPTYPELIST_FOR_IMPLMODE"] = "LayerNorm" - torch.npu.set_option(option) - main() - dllogger.flush() +# coding=utf-8 +# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. +# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Run BERT on SQuAD.""" + +from __future__ import absolute_import, division, print_function + +import argparse +import collections +import json +import logging +import math +import os +import random +import sys +from io import open + +import numpy as np +import torch +from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, + TensorDataset) +from torch.utils.data.distributed import DistributedSampler +# from tqdm import tqdm, trange + +from apex import amp +from schedulers import LinearWarmUpScheduler +from file_utils import PYTORCH_PRETRAINED_BERT_CACHE +import modeling +from optimization import BertAdam, warmup_linear +from tokenization import (BasicTokenizer, BertTokenizer, whitespace_tokenize) +from utils import is_main_process, format_step +import dllogger, time +from apex.optimizers import npu_fused_bert_adam, NpuFusedBertAdam + +# torch._C._jit_set_profiling_mode(False) +# torch._C._jit_set_profiling_executor(False) + +if sys.version_info[0] == 2: + import cPickle as pickle +else: + import pickle + +logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt='%m/%d/%Y %H:%M:%S', + level=logging.INFO) +logger = logging.getLogger(__name__) + + +class NpuFusedBertAdamV2(NpuFusedBertAdam): + def _group_step(self, group_index): + group = self.param_groups[group_index] + + beta1, beta2 = group['b1'], group['b2'] + + stash = self._amp_stash + combined_group_params = stash.combined_params_indexed_by_group[group_index] + combined_group_grads = stash.combined_grads_indexed_by_group[group_index] + combined_group_param_states = stash.combined_param_states_indexed_by_group[group_index] + + for combined_param, combined_grad, combined_param_state in zip(combined_group_params, combined_group_grads, + combined_group_param_states): + if combined_param is None or combined_grad is None: + continue + exp_avg, exp_avg_sq = combined_param_state['exp_avg'], combined_param_state['exp_avg_sq'] + if group['t_total'] != -1: + scheduler_fct = npu_fused_bert_adam.SCHEDULES[group['schedule']] + lr_scheduled = group['lr'] * scheduler_fct(combined_param_state['step'] / group['t_total'], + group['warmup']) + else: + lr_scheduled = group['lr'] + combined_param.data, exp_avg, exp_avg_sq = torch.npu_bert_apply_adam(combined_param.data, exp_avg, + exp_avg_sq, lr_scheduled, beta1, beta2, + group['e'], combined_grad.data, + group['max_grad_norm'], 0, + group['weight_decay']) + combined_param_state['step'] += 1 + + +class SquadExample(object): + """ + A single training/test example for the Squad dataset. + For examples without an answer, the start and end position are -1. + """ + + def __init__(self, + qas_id, + question_text, + doc_tokens, + orig_answer_text=None, + start_position=None, + end_position=None, + is_impossible=None): + self.qas_id = qas_id + self.question_text = question_text + self.doc_tokens = doc_tokens + self.orig_answer_text = orig_answer_text + self.start_position = start_position + self.end_position = end_position + self.is_impossible = is_impossible + + def __str__(self): + return self.__repr__() + + def __repr__(self): + s = "" + s += "qas_id: %s" % (self.qas_id) + s += ", question_text: %s" % ( + self.question_text) + s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens)) + if self.start_position: + s += ", start_position: %d" % (self.start_position) + if self.end_position: + s += ", end_position: %d" % (self.end_position) + if self.is_impossible: + s += ", is_impossible: %r" % (self.is_impossible) + return s + + +class InputFeatures(object): + """A single set of features of data.""" + + def __init__(self, + unique_id, + example_index, + doc_span_index, + tokens, + token_to_orig_map, + token_is_max_context, + input_ids, + input_mask, + segment_ids, + start_position=None, + end_position=None, + is_impossible=None): + self.unique_id = unique_id + self.example_index = example_index + self.doc_span_index = doc_span_index + self.tokens = tokens + self.token_to_orig_map = token_to_orig_map + self.token_is_max_context = token_is_max_context + self.input_ids = input_ids + self.input_mask = input_mask + self.segment_ids = segment_ids + self.start_position = start_position + self.end_position = end_position + self.is_impossible = is_impossible + + +def read_squad_examples(input_file, is_training, version_2_with_negative): + """Read a SQuAD json file into a list of SquadExample.""" + with open(input_file, "r", encoding='utf-8') as reader: + input_data = json.load(reader)["data"] + + def is_whitespace(c): + if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: + return True + return False + + examples = [] + for entry in input_data: + for paragraph in entry["paragraphs"]: + paragraph_text = paragraph["context"] + doc_tokens = [] + char_to_word_offset = [] + prev_is_whitespace = True + for c in paragraph_text: + if is_whitespace(c): + prev_is_whitespace = True + else: + if prev_is_whitespace: + doc_tokens.append(c) + else: + doc_tokens[-1] += c + prev_is_whitespace = False + char_to_word_offset.append(len(doc_tokens) - 1) + + for qa in paragraph["qas"]: + qas_id = qa["id"] + question_text = qa["question"] + start_position = None + end_position = None + orig_answer_text = None + is_impossible = False + if is_training: + if version_2_with_negative: + is_impossible = qa["is_impossible"] + if (len(qa["answers"]) != 1) and (not is_impossible): + raise ValueError( + "For training, each question should have exactly 1 answer.") + if not is_impossible: + answer = qa["answers"][0] + orig_answer_text = answer["text"] + answer_offset = answer["answer_start"] + answer_length = len(orig_answer_text) + start_position = char_to_word_offset[answer_offset] + end_position = char_to_word_offset[answer_offset + answer_length - 1] + # Only add answers where the text can be exactly recovered from the + # document. If this CAN'T happen it's likely due to weird Unicode + # stuff so we will just skip the example. + # + # Note that this means for training mode, every example is NOT + # guaranteed to be preserved. + actual_text = " ".join(doc_tokens[start_position:(end_position + 1)]) + cleaned_answer_text = " ".join( + whitespace_tokenize(orig_answer_text)) + if actual_text.find(cleaned_answer_text) == -1: + logger.warning("Could not find answer: '%s' vs. '%s'", + actual_text, cleaned_answer_text) + continue + else: + start_position = -1 + end_position = -1 + orig_answer_text = "" + + example = SquadExample( + qas_id=qas_id, + question_text=question_text, + doc_tokens=doc_tokens, + orig_answer_text=orig_answer_text, + start_position=start_position, + end_position=end_position, + is_impossible=is_impossible) + examples.append(example) + return examples + + +def convert_examples_to_features(examples, tokenizer, max_seq_length, + doc_stride, max_query_length, is_training): + """Loads a data file into a list of `InputBatch`s.""" + + unique_id = 1000000000 + + features = [] + for (example_index, example) in enumerate(examples): + query_tokens = tokenizer.tokenize(example.question_text) + + if len(query_tokens) > max_query_length: + query_tokens = query_tokens[0:max_query_length] + + tok_to_orig_index = [] + orig_to_tok_index = [] + all_doc_tokens = [] + for (i, token) in enumerate(example.doc_tokens): + orig_to_tok_index.append(len(all_doc_tokens)) + sub_tokens = tokenizer.tokenize(token) + for sub_token in sub_tokens: + tok_to_orig_index.append(i) + all_doc_tokens.append(sub_token) + + tok_start_position = None + tok_end_position = None + if is_training and example.is_impossible: + tok_start_position = -1 + tok_end_position = -1 + if is_training and not example.is_impossible: + tok_start_position = orig_to_tok_index[example.start_position] + if example.end_position < len(example.doc_tokens) - 1: + tok_end_position = orig_to_tok_index[example.end_position + 1] - 1 + else: + tok_end_position = len(all_doc_tokens) - 1 + (tok_start_position, tok_end_position) = _improve_answer_span( + all_doc_tokens, tok_start_position, tok_end_position, tokenizer, + example.orig_answer_text) + + # The -3 accounts for [CLS], [SEP] and [SEP] + max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 + + # We can have documents that are longer than the maximum sequence length. + # To deal with this we do a sliding window approach, where we take chunks + # of the up to our max length with a stride of `doc_stride`. + _DocSpan = collections.namedtuple( # pylint: disable=invalid-name + "DocSpan", ["start", "length"]) + doc_spans = [] + start_offset = 0 + while start_offset < len(all_doc_tokens): + length = len(all_doc_tokens) - start_offset + if length > max_tokens_for_doc: + length = max_tokens_for_doc + doc_spans.append(_DocSpan(start=start_offset, length=length)) + if start_offset + length == len(all_doc_tokens): + break + start_offset += min(length, doc_stride) + + for (doc_span_index, doc_span) in enumerate(doc_spans): + tokens = [] + token_to_orig_map = {} + token_is_max_context = {} + segment_ids = [] + tokens.append("[CLS]") + segment_ids.append(0) + for token in query_tokens: + tokens.append(token) + segment_ids.append(0) + tokens.append("[SEP]") + segment_ids.append(0) + + for i in range(doc_span.length): + split_token_index = doc_span.start + i + token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] + + is_max_context = _check_is_max_context(doc_spans, doc_span_index, + split_token_index) + token_is_max_context[len(tokens)] = is_max_context + tokens.append(all_doc_tokens[split_token_index]) + segment_ids.append(1) + tokens.append("[SEP]") + segment_ids.append(1) + + input_ids = tokenizer.convert_tokens_to_ids(tokens) + + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + input_mask = [1] * len(input_ids) + + # Zero-pad up to the sequence length. + while len(input_ids) < max_seq_length: + input_ids.append(0) + input_mask.append(0) + segment_ids.append(0) + + assert len(input_ids) == max_seq_length + assert len(input_mask) == max_seq_length + assert len(segment_ids) == max_seq_length + + start_position = None + end_position = None + if is_training and not example.is_impossible: + # For training, if our document chunk does not contain an annotation + # we throw it out, since there is nothing to predict. + doc_start = doc_span.start + doc_end = doc_span.start + doc_span.length - 1 + out_of_span = False + if not (tok_start_position >= doc_start and + tok_end_position <= doc_end): + out_of_span = True + if out_of_span: + start_position = 0 + end_position = 0 + else: + doc_offset = len(query_tokens) + 2 + start_position = tok_start_position - doc_start + doc_offset + end_position = tok_end_position - doc_start + doc_offset + if is_training and example.is_impossible: + start_position = 0 + end_position = 0 + + features.append( + InputFeatures( + unique_id=unique_id, + example_index=example_index, + doc_span_index=doc_span_index, + tokens=tokens, + token_to_orig_map=token_to_orig_map, + token_is_max_context=token_is_max_context, + input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + start_position=start_position, + end_position=end_position, + is_impossible=example.is_impossible)) + unique_id += 1 + + return features + + +def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, + orig_answer_text): + """Returns tokenized answer spans that better match the annotated answer.""" + + # The SQuAD annotations are character based. We first project them to + # whitespace-tokenized words. But then after WordPiece tokenization, we can + # often find a "better match". For example: + # + # Question: What year was John Smith born? + # Context: The leader was John Smith (1895-1943). + # Answer: 1895 + # + # The original whitespace-tokenized answer will be "(1895-1943).". However + # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match + # the exact answer, 1895. + # + # However, this is not always possible. Consider the following: + # + # Question: What country is the top exporter of electornics? + # Context: The Japanese electronics industry is the lagest in the world. + # Answer: Japan + # + # In this case, the annotator chose "Japan" as a character sub-span of + # the word "Japanese". Since our WordPiece tokenizer does not split + # "Japanese", we just use "Japanese" as the annotation. This is fairly rare + # in SQuAD, but does happen. + tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text)) + + for new_start in range(input_start, input_end + 1): + for new_end in range(input_end, new_start - 1, -1): + text_span = " ".join(doc_tokens[new_start:(new_end + 1)]) + if text_span == tok_answer_text: + return (new_start, new_end) + + return (input_start, input_end) + + +def _check_is_max_context(doc_spans, cur_span_index, position): + """Check if this is the 'max context' doc span for the token.""" + + # Because of the sliding window approach taken to scoring documents, a single + # token can appear in multiple documents. E.g. + # Doc: the man went to the store and bought a gallon of milk + # Span A: the man went to the + # Span B: to the store and bought + # Span C: and bought a gallon of + # ... + # + # Now the word 'bought' will have two scores from spans B and C. We only + # want to consider the score with "maximum context", which we define as + # the *minimum* of its left and right context (the *sum* of left and + # right context will always be the same, of course). + # + # In the example the maximum context for 'bought' would be span C since + # it has 1 left context and 3 right context, while span B has 4 left context + # and 0 right context. + best_score = None + best_span_index = None + for (span_index, doc_span) in enumerate(doc_spans): + end = doc_span.start + doc_span.length - 1 + if position < doc_span.start: + continue + if position > end: + continue + num_left_context = position - doc_span.start + num_right_context = end - position + score = min(num_left_context, num_right_context) + 0.01 * doc_span.length + if best_score is None or score > best_score: + best_score = score + best_span_index = span_index + + return cur_span_index == best_span_index + + +RawResult = collections.namedtuple("RawResult", + ["unique_id", "start_logits", "end_logits"]) + + +def get_answers(examples, features, results, args): + predictions = collections.defaultdict(list) #it is possible that one example corresponds to multiple features + Prediction = collections.namedtuple('Prediction', ['text', 'start_logit', 'end_logit']) + + if args.version_2_with_negative: + null_vals = collections.defaultdict(lambda: (float("inf"),0,0)) + for ex, feat, result in match_results(examples, features, results): + start_indices = _get_best_indices(result.start_logits, args.n_best_size) + end_indices = _get_best_indices(result.end_logits, args.n_best_size) + prelim_predictions = get_valid_prelim_predictions(start_indices, end_indices, feat, result, args) + prelim_predictions = sorted( + prelim_predictions, + key=lambda x: (x.start_logit + x.end_logit), + reverse=True) + if args.version_2_with_negative: + score = result.start_logits[0] + result.end_logits[0] + if score < null_vals[ex.qas_id][0]: + null_vals[ex.qas_id] = (score, result.start_logits[0], result.end_logits[0]) + + curr_predictions = [] + seen_predictions = [] + for pred in prelim_predictions: + if len(curr_predictions) == args.n_best_size: + break + if pred.start_index > 0: # this is a non-null prediction TODO: this probably is irrelevant + final_text = get_answer_text(ex, feat, pred, args) + if final_text in seen_predictions: + continue + else: + final_text = "" + + seen_predictions.append(final_text) + curr_predictions.append(Prediction(final_text, pred.start_logit, pred.end_logit)) + predictions[ex.qas_id] += curr_predictions + + #Add empty prediction + if args.version_2_with_negative: + for qas_id in predictions.keys(): + predictions[qas_id].append(Prediction('', + null_vals[ex.qas_id][1], + null_vals[ex.qas_id][2])) + + + nbest_answers = collections.defaultdict(list) + answers = {} + for qas_id, preds in predictions.items(): + nbest = sorted( + preds, + key=lambda x: (x.start_logit + x.end_logit), + reverse=True)[:args.n_best_size] + + # In very rare edge cases we could only have single null prediction. + # So we just create a nonce prediction in this case to avoid failure. + if not nbest: + nbest.append(Prediction(text="empty", start_logit=0.0, end_logit=0.0)) + + total_scores = [] + best_non_null_entry = None + for entry in nbest: + total_scores.append(entry.start_logit + entry.end_logit) + if not best_non_null_entry and entry.text: + best_non_null_entry = entry + probs = _compute_softmax(total_scores) + for (i, entry) in enumerate(nbest): + output = collections.OrderedDict() + output["text"] = entry.text + output["probability"] = probs[i] + output["start_logit"] = entry.start_logit + output["end_logit"] = entry.end_logit + nbest_answers[qas_id].append(output) + if args.version_2_with_negative: + score_diff = null_vals[qas_id][0] - best_non_null_entry.start_logit - best_non_null_entry.end_logit + if score_diff > args.null_score_diff_threshold: + answers[qas_id] = "" + else: + answers[qas_id] = best_non_null_entry.text + else: + answers[qas_id] = nbest_answers[qas_id][0]['text'] + + return answers, nbest_answers + +def get_answer_text(example, feature, pred, args): + tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] + orig_doc_start = feature.token_to_orig_map[pred.start_index] + orig_doc_end = feature.token_to_orig_map[pred.end_index] + orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] + tok_text = " ".join(tok_tokens) + + # De-tokenize WordPieces that have been split off. + tok_text = tok_text.replace(" ##", "") + tok_text = tok_text.replace("##", "") + + # Clean whitespace + tok_text = tok_text.strip() + tok_text = " ".join(tok_text.split()) + orig_text = " ".join(orig_tokens) + + final_text = get_final_text(tok_text, orig_text, args.do_lower_case, args.verbose_logging) + return final_text + +def get_valid_prelim_predictions(start_indices, end_indices, feature, result, args): + + _PrelimPrediction = collections.namedtuple( + "PrelimPrediction", + ["start_index", "end_index", "start_logit", "end_logit"]) + prelim_predictions = [] + for start_index in start_indices: + for end_index in end_indices: + if start_index >= len(feature.tokens): + continue + if end_index >= len(feature.tokens): + continue + if start_index not in feature.token_to_orig_map: + continue + if end_index not in feature.token_to_orig_map: + continue + if not feature.token_is_max_context.get(start_index, False): + continue + if end_index < start_index: + continue + length = end_index - start_index + 1 + if length > args.max_answer_length: + continue + prelim_predictions.append( + _PrelimPrediction( + start_index=start_index, + end_index=end_index, + start_logit=result.start_logits[start_index], + end_logit=result.end_logits[end_index])) + return prelim_predictions + +def match_results(examples, features, results): + unique_f_ids = set([f.unique_id for f in features]) + unique_r_ids = set([r.unique_id for r in results]) + matching_ids = unique_f_ids & unique_r_ids + features = [f for f in features if f.unique_id in matching_ids] + results = [r for r in results if r.unique_id in matching_ids] + features.sort(key=lambda x: x.unique_id) + results.sort(key=lambda x: x.unique_id) + + for f, r in zip(features, results): #original code assumes strict ordering of examples. TODO: rewrite this + yield examples[f.example_index], f, r + +def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): + """Project the tokenized prediction back to the original text.""" + + # When we created the data, we kept track of the alignment between original + # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So + # now `orig_text` contains the span of our original text corresponding to the + # span that we predicted. + # + # However, `orig_text` may contain extra characters that we don't want in + # our prediction. + # + # For example, let's say: + # pred_text = steve smith + # orig_text = Steve Smith's + # + # We don't want to return `orig_text` because it contains the extra "'s". + # + # We don't want to return `pred_text` because it's already been normalized + # (the SQuAD eval script also does punctuation stripping/lower casing but + # our tokenizer does additional normalization like stripping accent + # characters). + # + # What we really want to return is "Steve Smith". + # + # Therefore, we have to apply a semi-complicated alignment heruistic between + # `pred_text` and `orig_text` to get a character-to-charcter alignment. This + # can fail in certain cases in which case we just return `orig_text`. + + def _strip_spaces(text): + ns_chars = [] + ns_to_s_map = collections.OrderedDict() + for (i, c) in enumerate(text): + if c == " ": + continue + ns_to_s_map[len(ns_chars)] = i + ns_chars.append(c) + ns_text = "".join(ns_chars) + return (ns_text, ns_to_s_map) + + # We first tokenize `orig_text`, strip whitespace from the result + # and `pred_text`, and check if they are the same length. If they are + # NOT the same length, the heuristic has failed. If they are the same + # length, we assume the characters are one-to-one aligned. + + tokenizer = BasicTokenizer(do_lower_case=do_lower_case) + + tok_text = " ".join(tokenizer.tokenize(orig_text)) + + start_position = tok_text.find(pred_text) + if start_position == -1: + if verbose_logging: + logger.info( + "Unable to find text: '%s' in '%s'" % (pred_text, orig_text)) + return orig_text + end_position = start_position + len(pred_text) - 1 + + (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text) + (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text) + + if len(orig_ns_text) != len(tok_ns_text): + if verbose_logging: + logger.info("Length not equal after stripping spaces: '%s' vs '%s'", + orig_ns_text, tok_ns_text) + return orig_text + + # We then project the characters in `pred_text` back to `orig_text` using + # the character-to-character alignment. + tok_s_to_ns_map = {} + for (i, tok_index) in tok_ns_to_s_map.items(): + tok_s_to_ns_map[tok_index] = i + + orig_start_position = None + if start_position in tok_s_to_ns_map: + ns_start_position = tok_s_to_ns_map[start_position] + if ns_start_position in orig_ns_to_s_map: + orig_start_position = orig_ns_to_s_map[ns_start_position] + + if orig_start_position is None: + if verbose_logging: + logger.info("Couldn't map start position") + return orig_text + + orig_end_position = None + if end_position in tok_s_to_ns_map: + ns_end_position = tok_s_to_ns_map[end_position] + if ns_end_position in orig_ns_to_s_map: + orig_end_position = orig_ns_to_s_map[ns_end_position] + + if orig_end_position is None: + if verbose_logging: + logger.info("Couldn't map end position") + return orig_text + + output_text = orig_text[orig_start_position:(orig_end_position + 1)] + return output_text + + +def _get_best_indices(logits, n_best_size): + """Get the n-best logits from a list.""" + index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True) + + best_indices = [] + for i in range(len(index_and_score)): + if i >= n_best_size: + break + best_indices.append(index_and_score[i][0]) + return best_indices + + +def _compute_softmax(scores): + """Compute softmax probability over raw logits.""" + if not scores: + return [] + + max_score = None + for score in scores: + if max_score is None or score > max_score: + max_score = score + + exp_scores = [] + total_sum = 0.0 + for score in scores: + x = math.exp(score - max_score) + exp_scores.append(x) + total_sum += x + + probs = [] + for score in exp_scores: + probs.append(score / total_sum) + return probs + + + +# from apex.multi_tensor_apply import multi_tensor_applier +# class GradientClipper: +# """ +# Clips gradient norm of an iterable of parameters. +# """ +# def __init__(self, max_grad_norm): +# self.max_norm = max_grad_norm +# if multi_tensor_applier.available: +# import amp_C +# self._overflow_buf = torch.cuda.IntTensor([0]) +# self.multi_tensor_l2norm = amp_C.multi_tensor_l2norm +# self.multi_tensor_scale = amp_C.multi_tensor_scale +# else: +# raise RuntimeError('Gradient clipping requires cuda extensions') +# +# def step(self, parameters): +# l = [p.grad for p in parameters if p.grad is not None] +# total_norm, _ = multi_tensor_applier(self.multi_tensor_l2norm, self._overflow_buf, [l], False) +# total_norm = total_norm.item() +# if (total_norm == float('inf')): return +# clip_coef = self.max_norm / (total_norm + 1e-6) +# if clip_coef < 1: +# multi_tensor_applier(self.multi_tensor_scale, self._overflow_buf, [l, l], clip_coef) + + +def main(): + parser = argparse.ArgumentParser() + + ## Required parameters + parser.add_argument("--bert_model", default=None, type=str, required=True, + help="Bert pre-trained model selected in the list: bert-base-uncased, " + "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, " + "bert-base-multilingual-cased, bert-base-chinese.") + parser.add_argument("--output_dir", default=None, type=str, required=True, + help="The output directory where the model checkpoints and predictions will be written.") + parser.add_argument("--init_checkpoint", + default=None, + type=str, + required=True, + help="The checkpoint file from pretraining") + + ## Other parameters + parser.add_argument("--train_file", default=None, type=str, help="SQuAD json for training. E.g., train-v1.1.json") + parser.add_argument("--predict_file", default=None, type=str, + help="SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json") + parser.add_argument("--max_seq_length", default=384, type=int, + help="The maximum total input sequence length after WordPiece tokenization. Sequences " + "longer than this will be truncated, and sequences shorter than this will be padded.") + parser.add_argument("--doc_stride", default=128, type=int, + help="When splitting up a long document into chunks, how much stride to take between chunks.") + parser.add_argument("--max_query_length", default=64, type=int, + help="The maximum number of tokens for the question. Questions longer than this will " + "be truncated to this length.") + parser.add_argument("--do_train", action='store_true', help="Whether to run training.") + parser.add_argument("--do_predict", action='store_true', help="Whether to run eval on the dev set.") + parser.add_argument("--train_batch_size", default=32, type=int, help="Total batch size for training.") + parser.add_argument("--predict_batch_size", default=8, type=int, help="Total batch size for predictions.") + parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") + parser.add_argument("--num_train_epochs", default=3.0, type=float, + help="Total number of training epochs to perform.") + parser.add_argument("--max_steps", default=-1.0, type=float, + help="Total number of training steps to perform.") + parser.add_argument("--warmup_proportion", default=0.1, type=float, + help="Proportion of training to perform linear learning rate warmup for. E.g., 0.1 = 10%% " + "of training.") + parser.add_argument("--n_best_size", default=20, type=int, + help="The total number of n-best predictions to generate in the nbest_predictions.json " + "output file.") + parser.add_argument("--max_answer_length", default=30, type=int, + help="The maximum length of an answer that can be generated. This is needed because the start " + "and end predictions are not conditioned on one another.") + parser.add_argument("--verbose_logging", action='store_true', + help="If true, all of the warnings related to data processing will be printed. " + "A number of warnings are expected for a normal SQuAD evaluation.") + parser.add_argument("--no_cuda", + action='store_true', + help="Whether not to use CUDA when available") + parser.add_argument('--seed', + type=int, + default=42, + help="random seed for initialization") + parser.add_argument('--gradient_accumulation_steps', + type=int, + default=1, + help="Number of updates steps to accumulate before performing a backward/update pass.") + parser.add_argument("--do_lower_case", + action='store_true', + help="Whether to lower case the input text. True for uncased models, False for cased models.") + parser.add_argument("--local_rank", + type=int, + default=-1, + help="local_rank for distributed training on gpus") + parser.add_argument('--fp16', + default=False, + action='store_true', + help="Mixed precision training") + parser.add_argument('--amp', + default=False, + action='store_true', + help="Mixed precision training") + parser.add_argument('--loss_scale', + type=float, default=0, + help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n" + "0 (default value): dynamic loss scaling.\n" + "Positive power of 2: static loss scaling value.\n") + parser.add_argument('--version_2_with_negative', + action='store_true', + help='If true, the SQuAD examples contain some that do not have an answer.') + parser.add_argument('--null_score_diff_threshold', + type=float, default=0.0, + help="If null_score - best_non_null is greater than the threshold predict null.") + parser.add_argument('--vocab_file', + type=str, default=None, required=True, + help="Vocabulary mapping/file BERT was pretrainined on") + parser.add_argument("--config_file", + default=None, + type=str, + required=True, + help="The BERT model config") + parser.add_argument('--log_freq', + type=int, default=1, + help='frequency of logging loss.') + parser.add_argument('--json-summary', type=str, default="results/dllogger.json", + help='If provided, the json summary will be written to' + 'the specified file.') + parser.add_argument("--eval_script", + help="Script to evaluate squad predictions", + default="evaluate.py", + type=str) + parser.add_argument("--do_eval", + action='store_true', + help="Whether to use evaluate accuracy of predictions") + parser.add_argument("--use_env", + action='store_true', + help="Whether to read local rank from ENVVAR") + parser.add_argument('--skip_checkpoint', + default=False, + action='store_true', + help="Whether to save checkpoints") + parser.add_argument('--disable-progress-bar', + default=True, + action='store_true', + help='Disable tqdm progress bar') + parser.add_argument("--skip_cache", + default=False, + action='store_true', + help="Whether to cache train features") + parser.add_argument("--cache_dir", + default=None, + type=str, + help="Location to cache train feaures. Will default to the dataset directory") + parser.add_argument('--use_npu', + default=True, + action='store_true', + help='whether to use npu') + parser.add_argument('--npu_id', + type=int, default=0, + help='npu device id.') + parser.add_argument('--num_npu', + type=int, default=1, + help='number of npu devices to use.') + parser.add_argument("--addr", + default=None, + type=str, + help="addr used for distributed training") + + args = parser.parse_args() + args.fp16 = args.fp16 or args.amp + + if args.local_rank == -1 or args.no_cuda: + if args.use_npu: + torch.npu.set_device("npu:%d" % args.npu_id) + device = torch.device("npu:%d" % args.npu_id) + n_npu = 1 # this is the device number of one training process, usually one + else: + device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") + n_npu = torch.cuda.device_count() + else: + if args.use_npu: + os.environ['MASTER_ADDR'] = args.addr + os.environ['MASTER_PORT'] = '29668' + torch.npu.set_device("npu:%d" % args.local_rank) + device = torch.device("npu:%d" % args.local_rank) + torch.distributed.init_process_group(backend='hccl', world_size=8, rank=args.local_rank) + n_npu = 1 + else: + torch.cuda.set_device(args.local_rank) + device = torch.device("cuda", args.local_rank) + # Initializes the distributed backend which will take care of sychronizing nodes/GPUs + torch.distributed.init_process_group(backend='nccl', init_method='env://') + n_npu = 1 + + if is_main_process(): + dllogger.init(backends=[dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, + filename=args.json_summary), + dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE, step_format=format_step)]) + else: + dllogger.init(backends=[]) + + # print("device: {} n_npu: {}, distributed training: {}, 16-bits training: {}".format( + # device, n_npu, bool(args.local_rank != -1), args.fp16)) + print("train on device {}, rank {}".format(device, args.local_rank)) + + dllogger.log(step="PARAMETER", data={"Config": [str(args)]}) + + if args.gradient_accumulation_steps < 1: + raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( + args.gradient_accumulation_steps)) + + args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps + + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + dllogger.log(step="PARAMETER", data={"SEED": args.seed}) + + #if n_npu > 0: + # torch.cuda.manual_seed_all(args.seed) + + if not args.do_train and not args.do_predict: + raise ValueError("At least one of `do_train` or `do_predict` must be True.") + + if args.do_train: + if not args.train_file: + raise ValueError( + "If `do_train` is True, then `train_file` must be specified.") + if args.do_predict: + if not args.predict_file: + raise ValueError( + "If `do_predict` is True, then `predict_file` must be specified.") + + if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and os.listdir(args.output_dir)!=['logfile.txt']: + print("WARNING: Output directory {} already exists and is not empty.".format(args.output_dir), os.listdir(args.output_dir)) + if not os.path.exists(args.output_dir) and is_main_process(): + os.makedirs(args.output_dir) + + tokenizer = BertTokenizer(args.vocab_file, do_lower_case=args.do_lower_case, max_len=512) # for bert large + # tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) + + train_examples = None + num_train_optimization_steps = None + if args.do_train: + train_examples = read_squad_examples( + input_file=args.train_file, is_training=True, version_2_with_negative=args.version_2_with_negative) + num_train_optimization_steps = int( + len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps) * args.num_train_epochs + if args.local_rank != -1: + num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size() + + # Prepare model + config = modeling.BertConfig.from_json_file(args.config_file) + # Padding for divisibility by 8 + if config.vocab_size % 8 != 0: + config.vocab_size += 8 - (config.vocab_size % 8) + + + modeling.ACT2FN["bias_gelu"] = modeling.bias_gelu_training + model = modeling.BertForQuestionAnswering(config) + # model = modeling.BertForQuestionAnswering.from_pretrained(args.bert_model, + # cache_dir=os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(args.local_rank))) + dllogger.log(step="PARAMETER", data={"loading_checkpoint": True}) + model.load_state_dict(torch.load(args.init_checkpoint, map_location='cpu')["model"], strict=False) + dllogger.log(step="PARAMETER", data={"loaded_checkpoint": True}) + model.to(device) + num_weights = sum([p.numel() for p in model.parameters() if p.requires_grad]) + dllogger.log(step="PARAMETER", data={"model_weights_num":num_weights}) + + # Prepare optimizer + param_optimizer = list(model.named_parameters()) + + # hack to remove pooler, which is not used + # thus it produce None grad that break apex + param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]] + + no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] + optimizer_grouped_parameters = [ + {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, + {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} + ] + if args.do_train: + if args.fp16: + # try: + # from apex.optimizers import NpuFusedAdam + # except ImportError: + # raise ImportError( + # "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") + # optimizer = NpuFusedAdam(optimizer_grouped_parameters, + # lr=args.learning_rate) + + optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters, + lr=args.learning_rate, + warmup=args.warmup_proportion, + t_total=num_train_optimization_steps) + + if args.loss_scale == 0: + model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False, + loss_scale="dynamic") + else: + model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False, loss_scale=args.loss_scale, combine_grad=True) + if args.do_train: + scheduler = LinearWarmUpScheduler(optimizer, warmup=args.warmup_proportion, total_steps=num_train_optimization_steps) + + else: + optimizer = BertAdam(optimizer_grouped_parameters, + lr=args.learning_rate, + warmup=args.warmup_proportion, + t_total=num_train_optimization_steps) + + #model.qa_outputs.bias.data = model.qa_outputs.bias.data.float() # for ascend910 special + if args.local_rank != -1: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], broadcast_buffers=False, find_unused_parameters=True) + elif n_npu > 1: + model = torch.nn.DataParallel(model) + + global_step = 0 + if args.do_train: + print("Doing train...") + if args.cache_dir is None: + cached_train_features_file = args.train_file + '_{0}_{1}_{2}_{3}'.format( + list(filter(None, args.bert_model.split('/'))).pop(), str(args.max_seq_length), str(args.doc_stride), + str(args.max_query_length)) + else: + cached_train_features_file = args.cache_dir.strip('/') + '/' + args.train_file.split('/')[-1] + '_{0}_{1}_{2}_{3}'.format( + list(filter(None, args.bert_model.split('/'))).pop(), str(args.max_seq_length), str(args.doc_stride), + str(args.max_query_length)) + + train_features = None + try: + with open(cached_train_features_file, "rb") as reader: + train_features = pickle.load(reader) + except: + train_features = convert_examples_to_features( + examples=train_examples, + tokenizer=tokenizer, + max_seq_length=args.max_seq_length, + doc_stride=args.doc_stride, + max_query_length=args.max_query_length, + is_training=True) + + if not args.skip_cache and is_main_process(): + dllogger.log(step="PARAMETER", data={"Cached_train features_file": cached_train_features_file}) + with open(cached_train_features_file, "wb") as writer: + pickle.dump(train_features, writer) + + dllogger.log(step="PARAMETER", data={"train_start": True}) + dllogger.log(step="PARAMETER", data={"training_samples": len(train_examples)}) + dllogger.log(step="PARAMETER", data={"training_features": len(train_features)}) + dllogger.log(step="PARAMETER", data={"train_batch_size":args.train_batch_size}) + dllogger.log(step="PARAMETER", data={"steps":num_train_optimization_steps}) + # all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) + # all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) + # all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) + # all_start_positions = torch.tensor([f.start_position for f in train_features], dtype=torch.long) + # all_end_positions = torch.tensor([f.end_position for f in train_features], dtype=torch.long) + + all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.int32) + all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.int32) + all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.int32) + all_start_positions = torch.tensor([f.start_position for f in train_features], dtype=torch.int32) + all_end_positions = torch.tensor([f.end_position for f in train_features], dtype=torch.int32) + + train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, + all_start_positions, all_end_positions) + if args.local_rank == -1: + train_sampler = RandomSampler(train_data) + else: + train_sampler = DistributedSampler(train_data) + train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size * n_npu, num_workers=2, pin_memory=True, drop_last=True) + + model.train() + #gradClipper = GradientClipper(max_grad_norm=1.0) + final_loss = None + train_start = time.time() + for epoch in range(int(args.num_train_epochs)): + #train_iter = tqdm(train_dataloader, desc="Iteration", disable=args.disable_progress_bar) if is_main_process() else train_dataloader + train_iter = train_dataloader + step_start_time = time.time() + for step, batch in enumerate(train_iter): + torch.npu.enable_graph_mode() + # Terminate early for benchmarking + data_time = time.time() - step_start_time + if args.max_steps > 0 and global_step > args.max_steps: + break + + if n_npu == 1: + batch = tuple(t.to(device, non_blocking=True) for t in batch) # multi-gpu does scattering it-self + input_ids, input_mask, segment_ids, start_positions, end_positions = batch + start_logits, end_logits = model(input_ids, segment_ids, input_mask) + # If we are on multi-GPU, split add a dimension + if len(start_positions.size()) > 1: + start_positions = start_positions.squeeze(-1) + if len(end_positions.size()) > 1: + end_positions = end_positions.squeeze(-1) + # sometimes the start/end positions are outside our model inputs, we ignore these terms + ignored_index = start_logits.size(1) + start_positions.clamp_(0, ignored_index) + end_positions.clamp_(0, ignored_index) + + loss_fct = torch.nn.CrossEntropyLoss(ignore_index=ignored_index) + start_loss = loss_fct(start_logits, start_positions) + end_loss = loss_fct(end_logits, end_positions) + loss = (start_loss + end_loss) / 2 + if n_npu > 1: + loss = loss.mean() # mean() to average on multi-gpu. + if args.gradient_accumulation_steps > 1: + loss = loss / args.gradient_accumulation_steps + if args.fp16: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + + + if (step + 1) % args.gradient_accumulation_steps == 0: + if args.fp16 : + # modify learning rate with special warm up for BERT which FusedAdam doesn't do + scheduler.step() + optimizer.step() + optimizer.zero_grad() + global_step += 1 + torch.npu.launch_graph() + + final_loss = 0.0 + step_time = time.time() - step_start_time + if step % args.log_freq == 0: + # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss, + # "learning_rate": optimizer.param_groups[0]['lr']}) + dllogger.log(step=(str(epoch) + '/' + str(int(args.num_train_epochs)), + str(global_step) + '/' + str(int(num_train_optimization_steps)),), + data={"step_time": round(step_time, 4), "data_time": round(data_time, 4), + "step_loss": round(final_loss, 4), + "learning_rate": round(optimizer.param_groups[0]['lr'], 10)}) + step_start_time = time.time() + torch.npu.disable_graph_mode() + + time_to_train = time.time() - train_start + + if args.do_train and is_main_process() and not args.skip_checkpoint: + # Save a trained model and the associated configuration + model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self + output_model_file = os.path.join(args.output_dir, modeling.WEIGHTS_NAME) + torch.save({"model":model_to_save.state_dict()}, output_model_file) + output_config_file = os.path.join(args.output_dir, modeling.CONFIG_NAME) + with open(output_config_file, 'w') as f: + f.write(model_to_save.config.to_json_string()) + + if args.do_predict and (args.local_rank == -1 or is_main_process()): + print("Doing predict...") + if not args.do_train and args.fp16: + model.half() + + eval_examples = read_squad_examples( + input_file=args.predict_file, is_training=False, version_2_with_negative=args.version_2_with_negative) + eval_features = convert_examples_to_features( + examples=eval_examples, + tokenizer=tokenizer, + max_seq_length=args.max_seq_length, + doc_stride=args.doc_stride, + max_query_length=args.max_query_length, + is_training=False) + + dllogger.log(step="PARAMETER", data={"infer_start": True}) + dllogger.log(step="PARAMETER", data={"eval_samples": len(eval_examples)}) + dllogger.log(step="PARAMETER", data={"eval_features": len(eval_features)}) + dllogger.log(step="PARAMETER", data={"predict_batch_size": args.predict_batch_size}) + + all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) + all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) + all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) + # all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) + all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.int32) + eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index) + # Run prediction for full data + eval_sampler = SequentialSampler(eval_data) + eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.predict_batch_size) + + infer_start = time.time() + model.eval() + all_results = [] + dllogger.log(step="PARAMETER", data={"eval_start": True}) + # for input_ids, input_mask, segment_ids, example_indices in tqdm(eval_dataloader, desc="Evaluating", disable=args.disable_progress_bar): + for input_ids, input_mask, segment_ids, example_indices in eval_dataloader: + if len(all_results) % 1000 == 0: + dllogger.log(step="PARAMETER", data={"sample_number": len(all_results)}) + input_ids = input_ids.to(device) + input_mask = input_mask.to(device) + segment_ids = segment_ids.to(device) + with torch.no_grad(): + batch_start_logits, batch_end_logits = model(input_ids, segment_ids, input_mask) + for i, example_index in enumerate(example_indices): + start_logits = batch_start_logits[i].detach().cpu().tolist() + end_logits = batch_end_logits[i].detach().cpu().tolist() + eval_feature = eval_features[example_index.item()] + unique_id = int(eval_feature.unique_id) + all_results.append(RawResult(unique_id=unique_id, + start_logits=start_logits, + end_logits=end_logits)) + + time_to_infer = time.time() - infer_start + output_prediction_file = os.path.join(args.output_dir, "predictions.json") + output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json") + + answers, nbest_answers = get_answers(eval_examples, eval_features, all_results, args) + with open(output_prediction_file, "w") as f: + f.write(json.dumps(answers, indent=4) + "\n") + with open(output_nbest_file, "w") as f: + f.write(json.dumps(nbest_answers, indent=4) + "\n") + + # output_null_log_odds_file = os.path.join(args.output_dir, "null_odds.json") + # write_predictions(eval_examples, eval_features, all_results, + # args.n_best_size, args.max_answer_length, + # args.do_lower_case, output_prediction_file, + # output_nbest_file, output_null_log_odds_file, args.verbose_logging, + # args.version_2_with_negative, args.null_score_diff_threshold) + + if args.do_eval and is_main_process(): + import sys + import subprocess + eval_out = subprocess.check_output([sys.executable, args.eval_script, + args.predict_file, args.output_dir + "/predictions.json"]) + scores = str(eval_out).strip() + exact_match = float(scores.split(":")[1].split(",")[0]) + f1 = float(scores.split(":")[2].split("}")[0]) + + if args.do_train: + gpu_count = n_npu + if torch.distributed.is_initialized(): + gpu_count = torch.distributed.get_world_size() + + if args.max_steps == -1: + dllogger.log(step=tuple(), data={"e2e_train_time": time_to_train, + "training_sequences_per_second": len(train_features) * args.num_train_epochs / time_to_train, + "final_loss": final_loss}) + else: + dllogger.log(step=tuple(), data={"e2e_train_time": time_to_train, + "training_sequences_per_second": args.train_batch_size * args.gradient_accumulation_steps \ + * args.max_steps * gpu_count / time_to_train, + "final_loss": final_loss}) + if args.do_predict and is_main_process(): + dllogger.log(step=tuple(), data={"e2e_inference_time": time_to_infer, + "inference_sequences_per_second": len(eval_features) / time_to_infer}) + if args.do_eval and is_main_process(): + dllogger.log(step=tuple(), data={"exact_match": exact_match, "F1": f1}) + +if __name__ == "__main__": + option = {} + option["ACL_OP_SELECT_IMPL_MODE"] = "high_performance" + option["ACL_OPTYPELIST_FOR_IMPLMODE"] = "LayerNorm" + torch.npu.set_option(option) + main() + dllogger.flush() diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_bert_8p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_bert_8p.sh index f82c6f6fe267602380f6f65b11eacef29600be79..a29b7094281b8a9897bdfac7b25283b3783aa277 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_bert_8p.sh +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_bert_8p.sh @@ -1,189 +1,189 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 -export BMMV2_ENABLE=1 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path="" -ckpt_path="" - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="Bert-Squad_ID0470_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=32 -#训练step -train_steps= -#学习率 -learning_rate=2e-4 - - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_fp32_to_fp16" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_performance_1P.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --ckpt_path* ]];then - ckpt_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cp run_squad.py $cur_path/../ -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/../ -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - export ASCEND_DEVICE_ID=$RANK_ID - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - nohup python3.7 run_squad.py \ - --init_checkpoint ${ckpt_path}/bert_large_pretrained_amp.pt \ - --bert_model bert-large-uncased \ - --do_train \ - --train_file ${data_path}/train-v1.1.json \ - --train_batch_size ${batch_size} \ - --do_predict \ - --predict_batch_size ${batch_size} \ - --predict_file ${data_path}/dev-v1.1.json \ - --learning_rate ${learning_rate} \ - --num_train_epochs ${train_epochs} \ - --seed 1 \ - --fp16 \ - --max_steps 100 \ - --use_npu \ - --loss_scale 4096 \ - --vocab_file "data/uncased_L-24_H-1024_A-16/vocab.txt" \ - --do_eval \ - --eval_script ${data_path}/evaluate-v1.1.py \ - --npu_id ${ASCEND_DEVICE_ID} \ - --do_lower_case \ - --output_dir ${cur_path}/../results \ - --config_file bert_config.json \ - --num_npu 8 \ - --json-summary ${cur_path}/output/${ASCEND_DEVICE_ID}/dllogger.json> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -done -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'` - -FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'*'$RANK_SIZE'}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep 'F1 : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $10}'` -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p_bert'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep -r "step_loss :" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk '{print $19}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -rm -rf ${data_path}/train-v1.1.json_bert-large-uncased_384_128_64 +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +export BMMV2_ENABLE=1 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" +ckpt_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Bert-Squad_ID0470_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=32 +#训练step +train_steps= +#学习率 +learning_rate=2e-4 + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_fp32_to_fp16" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cp run_squad.py $cur_path/../ +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + export ASCEND_DEVICE_ID=$RANK_ID + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup python3.7 run_squad.py \ + --init_checkpoint ${ckpt_path}/bert_large_pretrained_amp.pt \ + --bert_model bert-large-uncased \ + --do_train \ + --train_file ${data_path}/train-v1.1.json \ + --train_batch_size ${batch_size} \ + --do_predict \ + --predict_batch_size ${batch_size} \ + --predict_file ${data_path}/dev-v1.1.json \ + --learning_rate ${learning_rate} \ + --num_train_epochs ${train_epochs} \ + --seed 1 \ + --fp16 \ + --max_steps 100 \ + --use_npu \ + --loss_scale 4096 \ + --vocab_file "data/uncased_L-24_H-1024_A-16/vocab.txt" \ + --do_eval \ + --eval_script ${data_path}/evaluate-v1.1.py \ + --npu_id ${ASCEND_DEVICE_ID} \ + --do_lower_case \ + --output_dir ${cur_path}/../results \ + --config_file bert_config.json \ + --num_npu 8 \ + --json-summary ${cur_path}/output/${ASCEND_DEVICE_ID}/dllogger.json> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'` + +FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'*'$RANK_SIZE'}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep 'F1 : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $10}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p_bert'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep -r "step_loss :" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk '{print $19}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +rm -rf ${data_path}/train-v1.1.json_bert-large-uncased_384_128_64 export BMMV2_ENABLE=0 \ No newline at end of file diff --git a/PyTorch/built-in/nlp/CPM_Finetune_for_PyTorch/bpe_3w_new/readme.md b/PyTorch/built-in/nlp/CPM_Finetune_for_PyTorch/bpe_3w_new/readme.md index 37ff1ffc758431750720898894fec570c933a9f2..0b57ec0161b8866b8ddc1a2c8eeaafdad95cc089 100644 --- a/PyTorch/built-in/nlp/CPM_Finetune_for_PyTorch/bpe_3w_new/readme.md +++ b/PyTorch/built-in/nlp/CPM_Finetune_for_PyTorch/bpe_3w_new/readme.md @@ -1,2 +1,2 @@ -由于文件较大已删除,如需使用请到如下地址下载相应文件 +由于文件较大已删除,如需使用请到如下地址下载相应文件 - https://github.com/TsinghuaAI/CPM-1-Generate/tree/main/bpe_3w_new \ No newline at end of file diff --git a/PyTorch/built-in/nlp/CPM_Finetune_for_PyTorch/test/eval_1p.sh b/PyTorch/built-in/nlp/CPM_Finetune_for_PyTorch/test/eval_1p.sh index ef05fefbeffaa2dd68116cbc3cd5c721a07a8c73..64b5f1196a756ec6b5a14cc4775743baa2feaf9c 100644 --- a/PyTorch/built-in/nlp/CPM_Finetune_for_PyTorch/test/eval_1p.sh +++ b/PyTorch/built-in/nlp/CPM_Finetune_for_PyTorch/test/eval_1p.sh @@ -1,116 +1,116 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ];then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi -source ${test_path_dir}/env_npu.sh -# 数据集路径,保持为空,不需要修改 -DATA_DIR="" -CHECKPOINT_PATH="" - -#网络名称,同目录名称 -Network="CPM_large_1p" -#训练batch_size -batch_size=1 -#集合通信参数 -RANK_SIZE=1 -MPSIZE=1 -RESULTS_DIR="../results/" -MODEL_NAME="zeroshot-test" -TOKENIZER_PATH="bpe_3w_new/" -NLAYERS=32 -NHIDDEN=2560 -NATT=32 -MAXSEQLEN=1024 -ASCEND_DEVICE_ID=0 - -#创建DeviceID输出目录,不需要修改 -if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ -else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -taskset -c 0-25 python3.7 -m torch.distributed.launch --master_port ${1-1122} --nproc_per_node ${RANK_SIZE} zero-shot_chid.py \ - --data_dir ${DATA_DIR} \ - --model-parallel-size ${MPSIZE} \ - --num-layers ${NLAYERS} \ - --hidden-size ${NHIDDEN} \ - --load ${CHECKPOINT_PATH} \ - --num-attention-heads ${NATT} \ - --seq-length ${MAXSEQLEN} \ - --max-position-embeddings 1024 \ - --tokenizer-type GPT2BPETokenizer \ - --fp16 \ - --out-seq-length 512 \ - --tokenizer-path ${TOKENIZER_PATH} \ - --vocab-size 30000 \ - --batch-size ${batch_size} \ - --seed 23333 \ - --results_dir ${RESULTS_DIR} \ - --model_name ${MODEL_NAME}> ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -wait - - - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" - -#输出性能FPS,需要模型审视修改 -FPS=`grep -a 'Test:' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print}' |awk -F '[ ]' '{print $17}'` -FPS=${FPS#* } - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a 'Acc:' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk 'END {print}'|awk -F "Acc:" '{print $NF}'|awk -F " " '{print $1}'` -train_accuracy=${train_accuracy%*${train_accuracy:(-6)}} -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep -r "Test:" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'Loss' '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}">> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi +source ${test_path_dir}/env_npu.sh +# 数据集路径,保持为空,不需要修改 +DATA_DIR="" +CHECKPOINT_PATH="" + +#网络名称,同目录名称 +Network="CPM_large_1p" +#训练batch_size +batch_size=1 +#集合通信参数 +RANK_SIZE=1 +MPSIZE=1 +RESULTS_DIR="../results/" +MODEL_NAME="zeroshot-test" +TOKENIZER_PATH="bpe_3w_new/" +NLAYERS=32 +NHIDDEN=2560 +NATT=32 +MAXSEQLEN=1024 +ASCEND_DEVICE_ID=0 + +#创建DeviceID输出目录,不需要修改 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID/ +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +taskset -c 0-25 python3.7 -m torch.distributed.launch --master_port ${1-1122} --nproc_per_node ${RANK_SIZE} zero-shot_chid.py \ + --data_dir ${DATA_DIR} \ + --model-parallel-size ${MPSIZE} \ + --num-layers ${NLAYERS} \ + --hidden-size ${NHIDDEN} \ + --load ${CHECKPOINT_PATH} \ + --num-attention-heads ${NATT} \ + --seq-length ${MAXSEQLEN} \ + --max-position-embeddings 1024 \ + --tokenizer-type GPT2BPETokenizer \ + --fp16 \ + --out-seq-length 512 \ + --tokenizer-path ${TOKENIZER_PATH} \ + --vocab-size 30000 \ + --batch-size ${batch_size} \ + --seed 23333 \ + --results_dir ${RESULTS_DIR} \ + --model_name ${MODEL_NAME}> ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait + + + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" + +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'Test:' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print}' |awk -F '[ ]' '{print $17}'` +FPS=${FPS#* } + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a 'Acc:' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk 'END {print}'|awk -F "Acc:" '{print $NF}'|awk -F " " '{print $1}'` +train_accuracy=${train_accuracy%*${train_accuracy:(-6)}} +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep -r "Test:" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'Loss' '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}">> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/nlp/GRU_for_PyTorch/test/train_full_8p.sh b/PyTorch/built-in/nlp/GRU_for_PyTorch/test/train_full_8p.sh index 7339a821a71f2ff4ec35ae6884deb2c09a9583fe..7044c327eb0209854e52425aae803ac8f8969d99 100644 --- a/PyTorch/built-in/nlp/GRU_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/built-in/nlp/GRU_for_PyTorch/test/train_full_8p.sh @@ -1,178 +1,178 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="GRU_ID0104_for_PyTorch" -#训练epoch -train_epochs=10 -#训练batch_size -batch_size=4096 -#训练step -train_steps= -#学习率 -learning_rate= - - - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#执行训练脚本,以下传参不需要修改,其他需要模型审视修改 -#--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path -for i in $(seq 0 7) -do - python3 ${cur_path}/../gru_8p.py \ - --addr=$(hostname -I |awk '{print $1}') \ - --data-dir $data_path \ - --seed 123456 \ - --workers 160 \ - --print-freq 1 \ - --dist-url 'tcp://127.0.0.1:50000' \ - --dist-backend 'hccl' \ - --multiprocessing-distributed \ - --world-size 1 \ - --batch-size $batch_size \ - --epoch $train_epochs \ - --rank 0 \ - --npu $i \ - --device-list '0,1,2,3,4,5,6,7' \ - --amp \ - --bleu-npu 0 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -done -wait - -###验证精度 -python3.7 ${cur_path}/../bleu_score.py \ - --workers 40 \ - --dist-url 'tcp://127.0.0.1:50000' \ - --data-dir $data_path \ - --world-size 1 \ - --npu 0 \ - --batch-size 512 \ - --epochs 10 \ - --rank 0 \ - --amp \ - --bleu-npu 0 \ - --ckptpath ./seq2seq-gru-model.pth.tar >> $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -#FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|tail -1|awk '{print $NF}'` -FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a 'BLEU' ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $4}'` - -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}'| tr -s '\n' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="GRU_ID0104_for_PyTorch" +#训练epoch +train_epochs=10 +#训练batch_size +batch_size=4096 +#训练step +train_steps= +#学习率 +learning_rate= + + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#执行训练脚本,以下传参不需要修改,其他需要模型审视修改 +#--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path +for i in $(seq 0 7) +do + python3 ${cur_path}/../gru_8p.py \ + --addr=$(hostname -I |awk '{print $1}') \ + --data-dir $data_path \ + --seed 123456 \ + --workers 160 \ + --print-freq 1 \ + --dist-url 'tcp://127.0.0.1:50000' \ + --dist-backend 'hccl' \ + --multiprocessing-distributed \ + --world-size 1 \ + --batch-size $batch_size \ + --epoch $train_epochs \ + --rank 0 \ + --npu $i \ + --device-list '0,1,2,3,4,5,6,7' \ + --amp \ + --bleu-npu 0 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +done +wait + +###验证精度 +python3.7 ${cur_path}/../bleu_score.py \ + --workers 40 \ + --dist-url 'tcp://127.0.0.1:50000' \ + --data-dir $data_path \ + --world-size 1 \ + --npu 0 \ + --batch-size 512 \ + --epochs 10 \ + --rank 0 \ + --amp \ + --bleu-npu 0 \ + --ckptpath ./seq2seq-gru-model.pth.tar >> $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +#FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|tail -1|awk '{print $NF}'` +FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a 'BLEU' ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $4}'` + +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'Loss' '{print $2}'|awk '{print $1}'| tr -s '\n' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/nlp/GRU_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/nlp/GRU_for_PyTorch/test/train_performance_1p.sh index 5ad15788e3c3d5ebfbc12164aeadc84023aaa711..84eddb91f6438e92fa3709512d255ab2a44cc6eb 100644 --- a/PyTorch/built-in/nlp/GRU_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/built-in/nlp/GRU_for_PyTorch/test/train_performance_1p.sh @@ -1,147 +1,147 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -#RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="GRU_ID0104_for_PyTorch" -#训练epoch -train_epochs=4 -#训练batch_size -batch_size=1536 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -python3 ${cur_path}/../gru_1p.py \ - --data-dir $data_path \ - --workers 32 \ - --dist-url 'tcp://127.0.0.1:50000' \ - --world-size 1 \ - --npu $ASCEND_DEVICE_ID \ - --batch-size $batch_size \ - --epochs $train_epochs \ - --rank 0 \ - --amp > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep npu|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +#RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="GRU_ID0104_for_PyTorch" +#训练epoch +train_epochs=4 +#训练batch_size +batch_size=1536 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +python3 ${cur_path}/../gru_1p.py \ + --data-dir $data_path \ + --workers 32 \ + --dist-url 'tcp://127.0.0.1:50000' \ + --world-size 1 \ + --npu $ASCEND_DEVICE_ID \ + --batch-size $batch_size \ + --epochs $train_epochs \ + --rank 0 \ + --amp > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep npu|awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/built-in/nlp/GRU_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/nlp/GRU_for_PyTorch/test/train_performance_8p.sh index ddc94fbb5f8c295d3109f9cae328b04aa4c2290b..d941c6ce7b29b4aaab80cc26a699df471372dcaa 100644 --- a/PyTorch/built-in/nlp/GRU_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/built-in/nlp/GRU_for_PyTorch/test/train_performance_8p.sh @@ -1,181 +1,181 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 - -#集合通信参数,不需要修改 -export HCCL_WHITELIST_DISABLE=1 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 -# source env.sh -#RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="GRU_ID0104_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=12288 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.045 - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--amp" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -if [ $(uname -m) = "aarch64" ] -then - for i in $(seq 0 7) - do - let p_start=0+20*i - let p_end=19+20*i - taskset -c $p_start-$p_end python3 ${cur_path}/../gru_8p.py \ - --addr=$(hostname -I |awk '{print $1}') \ - --data-dir $data_path \ - --seed 123456 \ - --workers 20 \ - --print-freq 1 \ - --dist-url 'tcp://127.0.0.1:50000' \ - --dist-backend 'hccl' \ - --multiprocessing-distributed \ - --world-size 1 \ - --batch-size $batch_size \ - --epoch $train_epochs \ - --rank 0 \ - --npu $i \ - --device-list '0,1,2,3,4,5,6,7' \ - --amp > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & - done -else - for i in $(seq 0 7) - do - python3 ${cur_path}/../gru_8p.py \ - --addr=$(hostname -I |awk '{print $1}') \ - --data-dir $data_path \ - --seed 123456 \ - --workers 80 \ - --print-freq 1 \ - --dist-url 'tcp://127.0.0.1:50000' \ - --dist-backend 'hccl' \ - --multiprocessing-distributed \ - --world-size 1 \ - --batch-size 12288 \ - --epoch 10 \ - --rank 0 \ - --npu $i \ - --device-list '0,1,2,3,4,5,6,7' \ - --amp > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & - done -fi -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` - -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep npu |awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#集合通信参数,不需要修改 +export HCCL_WHITELIST_DISABLE=1 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 +# source env.sh +#RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +# export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="GRU_ID0104_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=12288 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.045 + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--amp" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +if [ $(uname -m) = "aarch64" ] +then + for i in $(seq 0 7) + do + let p_start=0+20*i + let p_end=19+20*i + taskset -c $p_start-$p_end python3 ${cur_path}/../gru_8p.py \ + --addr=$(hostname -I |awk '{print $1}') \ + --data-dir $data_path \ + --seed 123456 \ + --workers 20 \ + --print-freq 1 \ + --dist-url 'tcp://127.0.0.1:50000' \ + --dist-backend 'hccl' \ + --multiprocessing-distributed \ + --world-size 1 \ + --batch-size $batch_size \ + --epoch $train_epochs \ + --rank 0 \ + --npu $i \ + --device-list '0,1,2,3,4,5,6,7' \ + --amp > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & + done +else + for i in $(seq 0 7) + do + python3 ${cur_path}/../gru_8p.py \ + --addr=$(hostname -I |awk '{print $1}') \ + --data-dir $data_path \ + --seed 123456 \ + --workers 80 \ + --print-freq 1 \ + --dist-url 'tcp://127.0.0.1:50000' \ + --dist-backend 'hccl' \ + --multiprocessing-distributed \ + --world-size 1 \ + --batch-size 12288 \ + --epoch 10 \ + --rank 0 \ + --npu $i \ + --device-list '0,1,2,3,4,5,6,7' \ + --amp > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & + done +fi +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep FPS ${cur_path}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk '{sum+=$1} END {print sum/NR}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep -a '* Acc@1' train_0.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` + +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep npu |awk -F 'Loss' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/built-in/nlp/Transformer_for_PyTorch/LICENSE b/PyTorch/built-in/nlp/Transformer_for_PyTorch/LICENSE index 8e39b8ee60855474c9e07a20a21389fe4e4f08b1..5dc227be93b21e2678575f82aa2a5b7924e99847 100644 --- a/PyTorch/built-in/nlp/Transformer_for_PyTorch/LICENSE +++ b/PyTorch/built-in/nlp/Transformer_for_PyTorch/LICENSE @@ -1,32 +1,32 @@ -BSD License - -For fairseq software - -Copyright (c) 2017-present, Facebook, Inc. All rights reserved. -Copyright (c) 2019-present, NVIDIA CORPORATION. All rights reserved. -Copyright 2020 Huawei Technologies Co., Ltd - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - * Neither the name Facebook nor the names of its contributors may be used to - endorse or promote products derived from this software without specific - prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +BSD License + +For fairseq software + +Copyright (c) 2017-present, Facebook, Inc. All rights reserved. +Copyright (c) 2019-present, NVIDIA CORPORATION. All rights reserved. +Copyright 2020 Huawei Technologies Co., Ltd + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Facebook nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/PyTorch/built-in/nlp/Transformer_for_PyTorch/PATENTS b/PyTorch/built-in/nlp/Transformer_for_PyTorch/PATENTS index 3e13bdabcbff1fdb7779a3274208a0543e2fdccd..1e75c56d30063c184fc927a5a22c143689af251d 100644 --- a/PyTorch/built-in/nlp/Transformer_for_PyTorch/PATENTS +++ b/PyTorch/built-in/nlp/Transformer_for_PyTorch/PATENTS @@ -1,33 +1,33 @@ -Additional Grant of Patent Rights Version 2 - -"Software" means the fairseq software distributed by Facebook, Inc. - -Facebook, Inc. ("Facebook") hereby grants to each recipient of the Software -("you") a perpetual, worldwide, royalty-free, non-exclusive, irrevocable -(subject to the termination provision below) license under any Necessary -Claims, to make, have made, use, sell, offer to sell, import, and otherwise -transfer the Software. For avoidance of doubt, no license is granted under -Facebook’s rights in any patent claims that are infringed by (i) modifications -to the Software made by you or any third party or (ii) the Software in -combination with any software or other technology. - -The license granted hereunder will terminate, automatically and without notice, -if you (or any of your subsidiaries, corporate affiliates or agents) initiate -directly or indirectly, or take a direct financial interest in, any Patent -Assertion: (i) against Facebook or any of its subsidiaries or corporate -affiliates, (ii) against any party if such Patent Assertion arises in whole or -in part from any software, technology, product or service of Facebook or any of -its subsidiaries or corporate affiliates, or (iii) against any party relating -to the Software. Notwithstanding the foregoing, if Facebook or any of its -subsidiaries or corporate affiliates files a lawsuit alleging patent -infringement against you in the first instance, and you respond by filing a -patent infringement counterclaim in that lawsuit against that party that is -unrelated to the Software, the license granted hereunder will not terminate -under section (i) of this paragraph due to such counterclaim. - -A "Necessary Claim" is a claim of a patent owned by Facebook that is -necessarily infringed by the Software standing alone. - -A "Patent Assertion" is any lawsuit or other action alleging direct, indirect, -or contributory infringement or inducement to infringe any patent, including a +Additional Grant of Patent Rights Version 2 + +"Software" means the fairseq software distributed by Facebook, Inc. + +Facebook, Inc. ("Facebook") hereby grants to each recipient of the Software +("you") a perpetual, worldwide, royalty-free, non-exclusive, irrevocable +(subject to the termination provision below) license under any Necessary +Claims, to make, have made, use, sell, offer to sell, import, and otherwise +transfer the Software. For avoidance of doubt, no license is granted under +Facebook’s rights in any patent claims that are infringed by (i) modifications +to the Software made by you or any third party or (ii) the Software in +combination with any software or other technology. + +The license granted hereunder will terminate, automatically and without notice, +if you (or any of your subsidiaries, corporate affiliates or agents) initiate +directly or indirectly, or take a direct financial interest in, any Patent +Assertion: (i) against Facebook or any of its subsidiaries or corporate +affiliates, (ii) against any party if such Patent Assertion arises in whole or +in part from any software, technology, product or service of Facebook or any of +its subsidiaries or corporate affiliates, or (iii) against any party relating +to the Software. Notwithstanding the foregoing, if Facebook or any of its +subsidiaries or corporate affiliates files a lawsuit alleging patent +infringement against you in the first instance, and you respond by filing a +patent infringement counterclaim in that lawsuit against that party that is +unrelated to the Software, the license granted hereunder will not terminate +under section (i) of this paragraph due to such counterclaim. + +A "Necessary Claim" is a claim of a patent owned by Facebook that is +necessarily infringed by the Software standing alone. + +A "Patent Assertion" is any lawsuit or other action alleging direct, indirect, +or contributory infringement or inducement to infringe any patent, including a cross-claim or counterclaim. \ No newline at end of file diff --git a/PyTorch/built-in/nlp/Transformer_for_PyTorch/README.md b/PyTorch/built-in/nlp/Transformer_for_PyTorch/README.md index 4a080bbc3e00ebc88a32cb580287d70fb04e4d0e..ca56f84cfda2de30b0fab05f98f9a0db1c438a38 100644 --- a/PyTorch/built-in/nlp/Transformer_for_PyTorch/README.md +++ b/PyTorch/built-in/nlp/Transformer_for_PyTorch/README.md @@ -1,84 +1,84 @@ -# Machine Translation with Transformer - -## Requirements -* NPU配套的run包安装 -* Python 3.7.5 -* PyTorch(NPU版本) -* apex(NPU版本) -* dllogger - - -## Dataset Prepare -1. 运行sh run_preprocessing.sh下载数据集,并处理 - -## 1P -1. 编辑 train_1p.sh device-id(NPU设备号) DATA_DIR(数据集目录) MODELDIR(日志和模型保存目录) -2. 运行 sh train_1p.sh -``` -python3 -u train_1p.py \ - ./data/dataset/wmt14_en_de_joined_dict/ \ - --device-id 7\ - --arch transformer_wmt_en_de \ - --share-all-embeddings \ - --optimizer adam \ - --adam-beta1 0.9 \ - --adam-beta2 0.997 \ - --adam-eps "1e-9" \ - --clip-norm 0.0 \ - --lr-scheduler inverse_sqrt \ - --warmup-init-lr 0.0 \ - --warmup-updates 4000 \ - --lr 0.0006 \ - --min-lr 0.0 \ - --dropout 0.1 \ - --weight-decay 0.0 \ - --criterion label_smoothed_cross_entropy \ - --label-smoothing 0.1 \ - --max-sentences 128\ - --max-tokens 102400\ - --seed 1 \ - --save-dir $MODELDIR \ - --save-interval 1\ - --update-freq 8\ - --log-interval 1\ - --stat-file $STAT_FILE\ - --distributed-world-size 1\ - --amp\ - --amp-level O2 - -``` -## 8P -1. 编辑 train_8p.sh device-id(NPU设备号) DATA_DIR(数据集目录) MODELDIR(日志和模型保存目录) addr(本机设备ip) -2. 运行 sh train_8p.sh - -``` - -python3 train_np.py $DATA_DIR \ - --arch transformer_wmt_en_de \ - --share-all-embeddings \ - --optimizer adam \ - --adam-beta1 0.9 \ - --adam-beta2 0.997 \ - --addr 'XX.XXX.XXX.XXX' \ - --adam-eps "1e-9" \ - --clip-norm 0.0 \ - --lr-scheduler inverse_sqrt \ - --warmup-init-lr 0.0 \ - --warmup-updates 4000 \ - --lr 0.0006 \ - --min-lr 0.0 \ - --dropout 0.1 \ - --weight-decay 0.0 \ - --criterion label_smoothed_cross_entropy \ - --label-smoothing 0.1 \ - --max-sentences 128\ - --max-tokens 102400 \ - --seed 1 \ - --save-dir $MODELDIR \ - --stat-file $STAT_FILE\ - --log-interval 1\ - --amp\ - --amp-level O2 - -``` - +# Machine Translation with Transformer + +## Requirements +* NPU配套的run包安装 +* Python 3.7.5 +* PyTorch(NPU版本) +* apex(NPU版本) +* dllogger + + +## Dataset Prepare +1. 运行sh run_preprocessing.sh下载数据集,并处理 + +## 1P +1. 编辑 train_1p.sh device-id(NPU设备号) DATA_DIR(数据集目录) MODELDIR(日志和模型保存目录) +2. 运行 sh train_1p.sh +``` +python3 -u train_1p.py \ + ./data/dataset/wmt14_en_de_joined_dict/ \ + --device-id 7\ + --arch transformer_wmt_en_de \ + --share-all-embeddings \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.997 \ + --adam-eps "1e-9" \ + --clip-norm 0.0 \ + --lr-scheduler inverse_sqrt \ + --warmup-init-lr 0.0 \ + --warmup-updates 4000 \ + --lr 0.0006 \ + --min-lr 0.0 \ + --dropout 0.1 \ + --weight-decay 0.0 \ + --criterion label_smoothed_cross_entropy \ + --label-smoothing 0.1 \ + --max-sentences 128\ + --max-tokens 102400\ + --seed 1 \ + --save-dir $MODELDIR \ + --save-interval 1\ + --update-freq 8\ + --log-interval 1\ + --stat-file $STAT_FILE\ + --distributed-world-size 1\ + --amp\ + --amp-level O2 + +``` +## 8P +1. 编辑 train_8p.sh device-id(NPU设备号) DATA_DIR(数据集目录) MODELDIR(日志和模型保存目录) addr(本机设备ip) +2. 运行 sh train_8p.sh + +``` + +python3 train_np.py $DATA_DIR \ + --arch transformer_wmt_en_de \ + --share-all-embeddings \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.997 \ + --addr 'XX.XXX.XXX.XXX' \ + --adam-eps "1e-9" \ + --clip-norm 0.0 \ + --lr-scheduler inverse_sqrt \ + --warmup-init-lr 0.0 \ + --warmup-updates 4000 \ + --lr 0.0006 \ + --min-lr 0.0 \ + --dropout 0.1 \ + --weight-decay 0.0 \ + --criterion label_smoothed_cross_entropy \ + --label-smoothing 0.1 \ + --max-sentences 128\ + --max-tokens 102400 \ + --seed 1 \ + --save-dir $MODELDIR \ + --stat-file $STAT_FILE\ + --log-interval 1\ + --amp\ + --amp-level O2 + +``` + diff --git a/PyTorch/built-in/nlp/Transformer_for_PyTorch/docker_start.sh b/PyTorch/built-in/nlp/Transformer_for_PyTorch/docker_start.sh index d29feef43922181514e2b4157c7cfd31d067e453..f2f5cfcc8d287868931a3d0710a44720d24092ae 100644 --- a/PyTorch/built-in/nlp/Transformer_for_PyTorch/docker_start.sh +++ b/PyTorch/built-in/nlp/Transformer_for_PyTorch/docker_start.sh @@ -1,26 +1,26 @@ - -#!/bin/bash - -docker_image=$1 -data_dir=$2 -model_dir=$3 - -docker run -it --ipc=host \ - --device=/dev/davinci0 \ - --device=/dev/davinci1 \ - --device=/dev/davinci2 \ - --device=/dev/davinci3 \ - --device=/dev/davinci4 \ - --device=/dev/davinci5 \ - --device=/dev/davinci6 \ - --device=/dev/davinci7 \ - --device=/dev/davinci_manager \ - --device=/dev/devmm_svm --device=/dev/hisi_hdc \ - -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ - -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ - -v ${model_dir}:${model_dir} \ - -v ${data_dir}:${data_dir} \ - -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \ - -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \ - -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \ + +#!/bin/bash + +docker_image=$1 +data_dir=$2 +model_dir=$3 + +docker run -it --ipc=host \ + --device=/dev/davinci0 \ + --device=/dev/davinci1 \ + --device=/dev/davinci2 \ + --device=/dev/davinci3 \ + --device=/dev/davinci4 \ + --device=/dev/davinci5 \ + --device=/dev/davinci6 \ + --device=/dev/davinci7 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ + -v ${model_dir}:${model_dir} \ + -v ${data_dir}:${data_dir} \ + -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \ + -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \ + -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \ /bin/bash \ No newline at end of file diff --git a/PyTorch/built-in/nlp/Transformer_for_PyTorch/modelzoo_level.txt b/PyTorch/built-in/nlp/Transformer_for_PyTorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/built-in/nlp/Transformer_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/built-in/nlp/Transformer_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/built-in/nlp/Transformer_for_PyTorch/modules/multihead_attention.py b/PyTorch/built-in/nlp/Transformer_for_PyTorch/modules/multihead_attention.py index ba5e353d0e44ca13cc2374477533f2ea1ae94a23..9c2484f701f83ae038ff20d6c52c4b7a7de1eef4 100644 --- a/PyTorch/built-in/nlp/Transformer_for_PyTorch/modules/multihead_attention.py +++ b/PyTorch/built-in/nlp/Transformer_for_PyTorch/modules/multihead_attention.py @@ -1,375 +1,375 @@ -# Copyright (c) 2017-present, Facebook, Inc. -# All rights reserved. -# Copyright 2020 Huawei Technologies Co., Ltd -# -# This source code is licensed under the license found in the LICENSE file in -# the root directory of this source tree. An additional grant of patent rights -# can be found in the PATENTS file in the same directory. -# -# ------------------------------------------------------------------------- -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from typing import Dict, Optional -import torch -from torch import nn, Tensor -from torch.nn import Parameter -import torch.nn.functional as F -from torch.autograd.variable import Variable -from utils import utils - -class QueryLinear(torch.autograd.Function): - @staticmethod - def forward(ctx, input, weights_q, scale_cpu, scale_npu): - - ctx.save_for_backward(input, weights_q, scale_cpu, scale_npu) - q = torch.addmm(input.view(input.size(0) * input.size(1), input.size(2)), - input.view(input.size(0) * input.size(1), input.size(2)), weights_q, beta=0.0, alpha=scale_cpu) - q = q.view(input.size(0), input.size(1), input.size(2)) - return q.detach() - - @staticmethod - def backward(ctx, q_grad): - input, weights_q, scale_cpu, scale_npu = ctx.saved_tensors - input = input.view(input.size(0) * input.size(1), input.size(2)).transpose(0, 1) - q = torch.addmm(q_grad.view(q_grad.size(0) * q_grad.size(1), q_grad.size(2)), - q_grad.view(q_grad.size(0) * q_grad.size(1), q_grad.size(2)), weights_q.transpose(0, 1), - beta=0.0, alpha=scale_cpu) - q = q.view(q_grad.size(0), q_grad.size(1), q_grad.size(2)) - q_grad = q_grad.view(q_grad.size(0) * q_grad.size(1), q_grad.size(2)) - weights_q_grad = scale_npu.type_as(input)*torch.mm(input, q_grad) - return q, weights_q_grad, None, None - - -class KeyValueLinears(torch.autograd.Function): - @staticmethod - def forward(ctx, input, weights_k, weights_v): - ctx.save_for_backward(input, weights_k, weights_v) - k = torch.addmm(input.view(input.size(0) * input.size(1), input.size(2)), - input.view(input.size(0) * input.size(1), input.size(2)), weights_k, beta=0.0, alpha=1.0) - k = k.view(input.size(0), input.size(1), input.size(2)) - v = torch.addmm(input.view(input.size(0) * input.size(1), input.size(2)), - input.view(input.size(0) * input.size(1), input.size(2)), weights_v, beta=0.0, alpha=1.0) - v = v.view(input.size(0), input.size(1), input.size(2)) - return k.detach(), v.detach() - - @staticmethod - def backward(ctx, k_grad, v_grad): - input, weights_k, weights_v = ctx.saved_tensors - input = input.view(input.size(0) * input.size(1), input.size(2)).transpose(0, 1) - k = torch.addmm(k_grad.view(k_grad.size(0) * k_grad.size(1), k_grad.size(2)), - k_grad.view(k_grad.size(0) * k_grad.size(1), k_grad.size(2)), weights_k.transpose(0, 1), - beta=0.0) - k_grad = k_grad.view(k_grad.size(0) * k_grad.size(1), k_grad.size(2)) - weights_k_grad = torch.mm(input, k_grad) - v = k.addmm_(v_grad.view(v_grad.size(0) * v_grad.size(1), v_grad.size(2)), weights_v.transpose(0, 1), beta=1.0) - v = v.view(v_grad.size(0), v_grad.size(1), v_grad.size(2)) - v_grad = v_grad.view(v_grad.size(0) * v_grad.size(1), v_grad.size(2)) - weights_v_grad = torch.mm(input, v_grad) - return v, weights_k_grad, weights_v_grad - - -class SelfAttentionLinears(torch.autograd.Function): - @staticmethod - def forward(ctx, input, weights_q, weights_k, weights_v, scale_cpu, scale_npu): - ctx.save_for_backward(input, weights_q, weights_k, weights_v, scale_cpu, scale_npu) - q = torch.addmm(input.contiguous().view(input.size(0) * input.size(1), input.size(2)), - input.contiguous().view(input.size(0) * input.size(1), input.size(2)), weights_q, beta=0.0, - alpha=scale_cpu) - q = q.view(input.size(0), input.size(1), input.size(2)) - k = torch.addmm(input.contiguous().view(input.size(0) * input.size(1), input.size(2)), - input.contiguous().view(input.size(0) * input.size(1), input.size(2)), weights_k, beta=0.0, - alpha=1.0) - k = k.view(input.size(0), input.size(1), input.size(2)) - v = torch.addmm(input.contiguous().view(input.size(0) * input.size(1), input.size(2)), - input.contiguous().view(input.size(0) * input.size(1), input.size(2)), weights_v, beta=0.0, - alpha=1.0) - v = v.view(input.size(0), input.size(1), input.size(2)) - return q.detach(), k.detach(), v.detach() - - @staticmethod - def backward(ctx, q_grad, k_grad, v_grad): - input, weights_q, weights_k, weights_v, scale_cpu, scale_npu = ctx.saved_tensors - input = input.contiguous().view(input.size(0) * input.size(1), input.size(2)).transpose(0, 1) - - q = torch.addmm(q_grad.view(q_grad.size(0) * q_grad.size(1), q_grad.size(2)), - q_grad.view(q_grad.size(0) * q_grad.size(1), q_grad.size(2)), weights_q.transpose(0, 1), - beta=0.0, alpha=scale_cpu) - q_grad = q_grad.view(q_grad.size(0) * q_grad.size(1), q_grad.size(2)) - weights_q_grad = scale_npu.type_as(input)*torch.mm(input,q_grad) - k = q.addmm_(k_grad.view(k_grad.size(0) * k_grad.size(1), k_grad.size(2)), weights_k.transpose(0, 1), beta=1.0) - k_grad = k_grad.view(k_grad.size(0) * k_grad.size(1), k_grad.size(2)) - weights_k_grad = torch.mm(input, k_grad) - v = k.addmm_(v_grad.view(v_grad.size(0) * v_grad.size(1), v_grad.size(2)), weights_v.transpose(0, 1), beta=1.0) - v = v.view(v_grad.size(0), v_grad.size(1), v_grad.size(2)) - v_grad = v_grad.view(v_grad.size(0) * v_grad.size(1), v_grad.size(2)) - weights_v_grad = torch.mm(input, v_grad) - return v, weights_q_grad, weights_k_grad, weights_v_grad, None, None - -class StridedBmm1Func(torch.autograd.Function): - @staticmethod - def forward(ctx, input1, input2): - ctx.save_for_backward(input1, input2) - output = torch.bmm(input1, input2) - return output.detach() - - @staticmethod - def backward(ctx, grad_output): - input1, input2 = ctx.saved_tensors - grad_output = grad_output.clone() - grad_input1 = torch.bmm(grad_output, input2.transpose(1, 2)) - grad_input2 = torch.bmm(grad_output.transpose(1, 2), input1).transpose(1, 2) - return grad_input1, grad_input2 - - -class StridedBmm2Func(torch.autograd.Function): - @staticmethod - def forward(ctx, input1, input2): - ctx.save_for_backward(input1, input2) - output = torch.bmm(input1, input2) - return output.detach() - - @staticmethod - def backward(ctx, grad_output): - input1, input2 = ctx.saved_tensors - grad_output = grad_output.clone() - grad_input1 = torch.bmm(grad_output, input2.transpose(1, 2)) - grad_input2 = torch.bmm(input1.transpose(1, 2), grad_output) - return grad_input1, grad_input2 - - -def query_linear(input: Tensor, weights_q: Tensor, scale_cpu: Tensor, scale_npu: Tensor): - return QueryLinear.apply(input, weights_q, scale_cpu, scale_npu) - -def key_value_linears(input: Tensor, weights_k: Tensor, weights_v: Tensor): - return KeyValueLinears.apply(input, weights_k, weights_v) - -def self_attn_linears(input: Tensor, weights_q: Tensor, weights_k: Tensor, weights_v: Tensor, scale_cpu: Tensor, scale_npu: Tensor): - return SelfAttentionLinears.apply(input, weights_q, weights_k, weights_v, scale_cpu, scale_npu) - -def strided_bmm1(input1: Tensor, input2: Tensor): - return StridedBmm1Func.apply(input1, input2) - -def strided_bmm2(input1: Tensor, input2: Tensor): - return StridedBmm2Func.apply(input1, input2) - -class MultiheadAttention(nn.Module): - """Multi-headed attention. - - See "Attention Is All You Need" for more details. - """ - - def __init__(self, embed_dim, num_heads, dropout=0., bias=False, seed=0): - super().__init__() - self.embed_dim = embed_dim - self.num_heads = num_heads - self.dropout = dropout - self.seed = seed - self.head_dim = embed_dim // num_heads - assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" - self.scaling = self.head_dim ** -0.5 - self.scaling_cpu = Variable(torch.tensor(self.scaling)) - self.scaling_npu = self.scaling_cpu.npu() - self._mask = torch.empty(0) - self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim)) - self.k_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim)) - self.v_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim)) - if bias: - self.in_proj_bias_q = Parameter(torch.Tensor(embed_dim)) - self.in_proj_bias_k = Parameter(torch.Tensor(embed_dim)) - self.in_proj_bias_v = Parameter(torch.Tensor(embed_dim)) - else: - self.register_parameter('in_proj_bias_k', None) - self.register_parameter('in_proj_bias_q', None) - self.register_parameter('in_proj_bias_v', None) - - self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias) - - self.cache_id = str(id(self)) - self.reset_parameters() - - def reset_parameters(self): - nn.init.xavier_uniform_(self.q_proj_weight) - nn.init.xavier_uniform_(self.k_proj_weight) - nn.init.xavier_uniform_(self.v_proj_weight) - nn.init.xavier_uniform_(self.out_proj.weight) - if self.in_proj_bias_k is not None: - nn.init.constant_(self.in_proj_bias_q, 0.) - nn.init.constant_(self.in_proj_bias_k, 0.) - nn.init.constant_(self.in_proj_bias_v, 0.) - nn.init.constant_(self.out_proj.bias, 0.) - - def forward(self, query: Tensor, key: Tensor, value: Tensor, - mask_future_timesteps: bool, - key_padding_mask: Optional[Tensor], - incremental_state: Optional[Dict[str, Dict[str, Tensor]]], - need_weights: bool, - static_kv: bool): - """Input shape: Time x Batch x Channel - - Self-attention can be implemented by passing in the same arguments for - query, key and value. Future timesteps can be masked with the - `mask_future_timesteps` argument. Padding elements can be excluded from - the key by passing a binary ByteTensor (`key_padding_mask`) with shape: - batch x src_len, where padding elements are indicated by 1s. - """ - qkv_same, kv_same = self._fast_same_check(query, key, value) - - tgt_len, bsz, embed_dim = query.size() - assert embed_dim == self.embed_dim - assert list(query.size()) == [tgt_len, bsz, embed_dim] - assert key.size() == value.size() - - k = v = query.new_empty(0) - if incremental_state is not None: - saved_state = self._get_input_buffer(incremental_state) - else: - saved_state = None - - if qkv_same: - q, k, v = self_attn_linears(query, self.q_proj_weight,self.k_proj_weight, self.v_proj_weight, - self.scaling_cpu, self.scaling_npu) - elif kv_same: - q = query_linear(query, self.q_proj_weight, self.scaling_cpu, self.scaling_npu) - if not (saved_state is not None and 'prev_key' in saved_state and static_kv): - k, v = key_value_linears(key, self.k_proj_weight, self.v_proj_weight) - else: - q = torch.addmm(query.view(query.size(0) * query.size(1), query.size(2)), - query.view(query.size(0) * query.size(1), query.size(2)), self.q_proj_weight, beta=0.0, - alpha=self.scaling) - if not (saved_state is not None and 'prev_key' in saved_state and static_kv): - k = F.linear(key, self.k_proj_weight, self.in_proj_bias_k) - v = F.linear(value, self.v_proj_weight, self.in_proj_bias_v) - - if saved_state is not None: - if 'prev_key' in saved_state: - k = torch.cat((saved_state['prev_key'], k), dim=0) - if 'prev_value' in saved_state: - v = torch.cat((saved_state['prev_value'], v), dim=0) - saved_state['prev_key'] = k - saved_state['prev_value'] = v - self._set_input_buffer(incremental_state, saved_state) - - src_len = k.size(0) - - if key_padding_mask is not None: - assert key_padding_mask.size(0) == bsz - assert key_padding_mask.size(1) == src_len - - q = q.contiguous().view(tgt_len, bsz * self.num_heads, self.head_dim).clone().transpose(0, 1).contiguous() - k = k.contiguous().view(src_len, bsz * self.num_heads, self.head_dim).clone().transpose(0, 1).contiguous() - v = v.contiguous().view(src_len, bsz * self.num_heads, self.head_dim).clone().transpose(0, 1).contiguous() - - attn_weights = strided_bmm1(q, k.transpose(1, 2)) - - assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] - - # only apply masking at training time (when incremental state is None) - if mask_future_timesteps and incremental_state is None: - assert query.size() == key.size(), \ - 'mask_future_timesteps only applies to self-attention' - attn_weights += self.buffered_mask(attn_weights).unsqueeze(0) - if key_padding_mask is not None: - # don't attend to padding symbols - attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) - - attn_weights = attn_weights.float().masked_fill( - key_padding_mask.unsqueeze(1).unsqueeze(2), - torch.finfo(torch.float32).min, - ).type_as(attn_weights) - - attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) - - attn_weights = F.softmax(attn_weights, dim=-1) - if self.training: - attn_weights, _, _ = torch.npu_dropoutV2(attn_weights, self.seed, p=self.dropout) - - attn = strided_bmm2(attn_weights, v) - assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] - attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) - - # linear - attn = self.out_proj(attn) - - if need_weights: - # average attention weights over heads - attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) - attn_weights = attn_weights.sum(dim=1) / self.num_heads - else: - attn_weights = attn_weights.new_empty(0) # Can't set to None because jit script reasons - - return attn, attn_weights - - - def in_proj_qkv(self, query): - return self._in_proj(query).chunk(3, dim=-1) - - def in_proj_kv(self, key): - return self._in_proj(key, start=self.embed_dim).chunk(2, dim=-1) - - def in_proj_q(self, query): - return self._in_proj(query, end=self.embed_dim) - - def in_proj_k(self, key): - return self._in_proj(key, start=self.embed_dim, end=2 * self.embed_dim) - - def in_proj_v(self, value): - return self._in_proj(value, start=2 * self.embed_dim) - - def _in_proj(self, input, start=None, end=None): - weight = self.in_proj_weight - bias = self.in_proj_bias - if end is not None: - weight = weight[:end, :] - if bias is not None: - bias = bias[:end] - if start is not None: - weight = weight[start:, :] - if bias is not None: - bias = bias[start:] - - res = F.linear(input, weight, bias) - return res - - def buffered_mask(self, tensor): - dim = tensor.size(-1) - if self._mask.size(0) == 0: - self._mask = torch.triu(utils.fill_with_neg_inf(tensor.new_empty(dim, dim)), 1) - if self._mask.size(0) < dim: - self._mask = torch.triu(utils.fill_with_neg_inf(self._mask.resize_(dim, dim)), 1) - return self._mask[:dim, :dim] - - def reorder_incremental_state(self, incremental_state, new_order): - """Reorder buffered internal state (for incremental generation).""" - input_buffer = self._get_input_buffer(incremental_state) - if input_buffer is not None: - for k in input_buffer.keys(): - input_buffer[k] = input_buffer[k].index_select(1, new_order) - self._set_input_buffer(incremental_state, input_buffer) - - def _get_input_buffer(self, incremental_state: Optional[Dict[str, Dict[str, Tensor]]]): - if incremental_state is None or self.cache_id not in incremental_state: - return {} - return incremental_state[self.cache_id] - - def _set_input_buffer(self, incremental_state: Optional[Dict[str, Dict[str, Tensor]]], buffer: Dict[str, Tensor]): - if incremental_state is not None: - incremental_state[self.cache_id] = buffer - - def _fast_same_check(self, q, k, v): - qkv_same = q.data_ptr() == k.data_ptr() == v.data_ptr() - kv_same = k.data_ptr() == v.data_ptr() - return qkv_same, kv_same - +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# Copyright 2020 Huawei Technologies Co., Ltd +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. +# +# ------------------------------------------------------------------------- +# +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import Dict, Optional +import torch +from torch import nn, Tensor +from torch.nn import Parameter +import torch.nn.functional as F +from torch.autograd.variable import Variable +from utils import utils + +class QueryLinear(torch.autograd.Function): + @staticmethod + def forward(ctx, input, weights_q, scale_cpu, scale_npu): + + ctx.save_for_backward(input, weights_q, scale_cpu, scale_npu) + q = torch.addmm(input.view(input.size(0) * input.size(1), input.size(2)), + input.view(input.size(0) * input.size(1), input.size(2)), weights_q, beta=0.0, alpha=scale_cpu) + q = q.view(input.size(0), input.size(1), input.size(2)) + return q.detach() + + @staticmethod + def backward(ctx, q_grad): + input, weights_q, scale_cpu, scale_npu = ctx.saved_tensors + input = input.view(input.size(0) * input.size(1), input.size(2)).transpose(0, 1) + q = torch.addmm(q_grad.view(q_grad.size(0) * q_grad.size(1), q_grad.size(2)), + q_grad.view(q_grad.size(0) * q_grad.size(1), q_grad.size(2)), weights_q.transpose(0, 1), + beta=0.0, alpha=scale_cpu) + q = q.view(q_grad.size(0), q_grad.size(1), q_grad.size(2)) + q_grad = q_grad.view(q_grad.size(0) * q_grad.size(1), q_grad.size(2)) + weights_q_grad = scale_npu.type_as(input)*torch.mm(input, q_grad) + return q, weights_q_grad, None, None + + +class KeyValueLinears(torch.autograd.Function): + @staticmethod + def forward(ctx, input, weights_k, weights_v): + ctx.save_for_backward(input, weights_k, weights_v) + k = torch.addmm(input.view(input.size(0) * input.size(1), input.size(2)), + input.view(input.size(0) * input.size(1), input.size(2)), weights_k, beta=0.0, alpha=1.0) + k = k.view(input.size(0), input.size(1), input.size(2)) + v = torch.addmm(input.view(input.size(0) * input.size(1), input.size(2)), + input.view(input.size(0) * input.size(1), input.size(2)), weights_v, beta=0.0, alpha=1.0) + v = v.view(input.size(0), input.size(1), input.size(2)) + return k.detach(), v.detach() + + @staticmethod + def backward(ctx, k_grad, v_grad): + input, weights_k, weights_v = ctx.saved_tensors + input = input.view(input.size(0) * input.size(1), input.size(2)).transpose(0, 1) + k = torch.addmm(k_grad.view(k_grad.size(0) * k_grad.size(1), k_grad.size(2)), + k_grad.view(k_grad.size(0) * k_grad.size(1), k_grad.size(2)), weights_k.transpose(0, 1), + beta=0.0) + k_grad = k_grad.view(k_grad.size(0) * k_grad.size(1), k_grad.size(2)) + weights_k_grad = torch.mm(input, k_grad) + v = k.addmm_(v_grad.view(v_grad.size(0) * v_grad.size(1), v_grad.size(2)), weights_v.transpose(0, 1), beta=1.0) + v = v.view(v_grad.size(0), v_grad.size(1), v_grad.size(2)) + v_grad = v_grad.view(v_grad.size(0) * v_grad.size(1), v_grad.size(2)) + weights_v_grad = torch.mm(input, v_grad) + return v, weights_k_grad, weights_v_grad + + +class SelfAttentionLinears(torch.autograd.Function): + @staticmethod + def forward(ctx, input, weights_q, weights_k, weights_v, scale_cpu, scale_npu): + ctx.save_for_backward(input, weights_q, weights_k, weights_v, scale_cpu, scale_npu) + q = torch.addmm(input.contiguous().view(input.size(0) * input.size(1), input.size(2)), + input.contiguous().view(input.size(0) * input.size(1), input.size(2)), weights_q, beta=0.0, + alpha=scale_cpu) + q = q.view(input.size(0), input.size(1), input.size(2)) + k = torch.addmm(input.contiguous().view(input.size(0) * input.size(1), input.size(2)), + input.contiguous().view(input.size(0) * input.size(1), input.size(2)), weights_k, beta=0.0, + alpha=1.0) + k = k.view(input.size(0), input.size(1), input.size(2)) + v = torch.addmm(input.contiguous().view(input.size(0) * input.size(1), input.size(2)), + input.contiguous().view(input.size(0) * input.size(1), input.size(2)), weights_v, beta=0.0, + alpha=1.0) + v = v.view(input.size(0), input.size(1), input.size(2)) + return q.detach(), k.detach(), v.detach() + + @staticmethod + def backward(ctx, q_grad, k_grad, v_grad): + input, weights_q, weights_k, weights_v, scale_cpu, scale_npu = ctx.saved_tensors + input = input.contiguous().view(input.size(0) * input.size(1), input.size(2)).transpose(0, 1) + + q = torch.addmm(q_grad.view(q_grad.size(0) * q_grad.size(1), q_grad.size(2)), + q_grad.view(q_grad.size(0) * q_grad.size(1), q_grad.size(2)), weights_q.transpose(0, 1), + beta=0.0, alpha=scale_cpu) + q_grad = q_grad.view(q_grad.size(0) * q_grad.size(1), q_grad.size(2)) + weights_q_grad = scale_npu.type_as(input)*torch.mm(input,q_grad) + k = q.addmm_(k_grad.view(k_grad.size(0) * k_grad.size(1), k_grad.size(2)), weights_k.transpose(0, 1), beta=1.0) + k_grad = k_grad.view(k_grad.size(0) * k_grad.size(1), k_grad.size(2)) + weights_k_grad = torch.mm(input, k_grad) + v = k.addmm_(v_grad.view(v_grad.size(0) * v_grad.size(1), v_grad.size(2)), weights_v.transpose(0, 1), beta=1.0) + v = v.view(v_grad.size(0), v_grad.size(1), v_grad.size(2)) + v_grad = v_grad.view(v_grad.size(0) * v_grad.size(1), v_grad.size(2)) + weights_v_grad = torch.mm(input, v_grad) + return v, weights_q_grad, weights_k_grad, weights_v_grad, None, None + +class StridedBmm1Func(torch.autograd.Function): + @staticmethod + def forward(ctx, input1, input2): + ctx.save_for_backward(input1, input2) + output = torch.bmm(input1, input2) + return output.detach() + + @staticmethod + def backward(ctx, grad_output): + input1, input2 = ctx.saved_tensors + grad_output = grad_output.clone() + grad_input1 = torch.bmm(grad_output, input2.transpose(1, 2)) + grad_input2 = torch.bmm(grad_output.transpose(1, 2), input1).transpose(1, 2) + return grad_input1, grad_input2 + + +class StridedBmm2Func(torch.autograd.Function): + @staticmethod + def forward(ctx, input1, input2): + ctx.save_for_backward(input1, input2) + output = torch.bmm(input1, input2) + return output.detach() + + @staticmethod + def backward(ctx, grad_output): + input1, input2 = ctx.saved_tensors + grad_output = grad_output.clone() + grad_input1 = torch.bmm(grad_output, input2.transpose(1, 2)) + grad_input2 = torch.bmm(input1.transpose(1, 2), grad_output) + return grad_input1, grad_input2 + + +def query_linear(input: Tensor, weights_q: Tensor, scale_cpu: Tensor, scale_npu: Tensor): + return QueryLinear.apply(input, weights_q, scale_cpu, scale_npu) + +def key_value_linears(input: Tensor, weights_k: Tensor, weights_v: Tensor): + return KeyValueLinears.apply(input, weights_k, weights_v) + +def self_attn_linears(input: Tensor, weights_q: Tensor, weights_k: Tensor, weights_v: Tensor, scale_cpu: Tensor, scale_npu: Tensor): + return SelfAttentionLinears.apply(input, weights_q, weights_k, weights_v, scale_cpu, scale_npu) + +def strided_bmm1(input1: Tensor, input2: Tensor): + return StridedBmm1Func.apply(input1, input2) + +def strided_bmm2(input1: Tensor, input2: Tensor): + return StridedBmm2Func.apply(input1, input2) + +class MultiheadAttention(nn.Module): + """Multi-headed attention. + + See "Attention Is All You Need" for more details. + """ + + def __init__(self, embed_dim, num_heads, dropout=0., bias=False, seed=0): + super().__init__() + self.embed_dim = embed_dim + self.num_heads = num_heads + self.dropout = dropout + self.seed = seed + self.head_dim = embed_dim // num_heads + assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" + self.scaling = self.head_dim ** -0.5 + self.scaling_cpu = Variable(torch.tensor(self.scaling)) + self.scaling_npu = self.scaling_cpu.npu() + self._mask = torch.empty(0) + self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim)) + self.k_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim)) + self.v_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim)) + if bias: + self.in_proj_bias_q = Parameter(torch.Tensor(embed_dim)) + self.in_proj_bias_k = Parameter(torch.Tensor(embed_dim)) + self.in_proj_bias_v = Parameter(torch.Tensor(embed_dim)) + else: + self.register_parameter('in_proj_bias_k', None) + self.register_parameter('in_proj_bias_q', None) + self.register_parameter('in_proj_bias_v', None) + + self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias) + + self.cache_id = str(id(self)) + self.reset_parameters() + + def reset_parameters(self): + nn.init.xavier_uniform_(self.q_proj_weight) + nn.init.xavier_uniform_(self.k_proj_weight) + nn.init.xavier_uniform_(self.v_proj_weight) + nn.init.xavier_uniform_(self.out_proj.weight) + if self.in_proj_bias_k is not None: + nn.init.constant_(self.in_proj_bias_q, 0.) + nn.init.constant_(self.in_proj_bias_k, 0.) + nn.init.constant_(self.in_proj_bias_v, 0.) + nn.init.constant_(self.out_proj.bias, 0.) + + def forward(self, query: Tensor, key: Tensor, value: Tensor, + mask_future_timesteps: bool, + key_padding_mask: Optional[Tensor], + incremental_state: Optional[Dict[str, Dict[str, Tensor]]], + need_weights: bool, + static_kv: bool): + """Input shape: Time x Batch x Channel + + Self-attention can be implemented by passing in the same arguments for + query, key and value. Future timesteps can be masked with the + `mask_future_timesteps` argument. Padding elements can be excluded from + the key by passing a binary ByteTensor (`key_padding_mask`) with shape: + batch x src_len, where padding elements are indicated by 1s. + """ + qkv_same, kv_same = self._fast_same_check(query, key, value) + + tgt_len, bsz, embed_dim = query.size() + assert embed_dim == self.embed_dim + assert list(query.size()) == [tgt_len, bsz, embed_dim] + assert key.size() == value.size() + + k = v = query.new_empty(0) + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + else: + saved_state = None + + if qkv_same: + q, k, v = self_attn_linears(query, self.q_proj_weight,self.k_proj_weight, self.v_proj_weight, + self.scaling_cpu, self.scaling_npu) + elif kv_same: + q = query_linear(query, self.q_proj_weight, self.scaling_cpu, self.scaling_npu) + if not (saved_state is not None and 'prev_key' in saved_state and static_kv): + k, v = key_value_linears(key, self.k_proj_weight, self.v_proj_weight) + else: + q = torch.addmm(query.view(query.size(0) * query.size(1), query.size(2)), + query.view(query.size(0) * query.size(1), query.size(2)), self.q_proj_weight, beta=0.0, + alpha=self.scaling) + if not (saved_state is not None and 'prev_key' in saved_state and static_kv): + k = F.linear(key, self.k_proj_weight, self.in_proj_bias_k) + v = F.linear(value, self.v_proj_weight, self.in_proj_bias_v) + + if saved_state is not None: + if 'prev_key' in saved_state: + k = torch.cat((saved_state['prev_key'], k), dim=0) + if 'prev_value' in saved_state: + v = torch.cat((saved_state['prev_value'], v), dim=0) + saved_state['prev_key'] = k + saved_state['prev_value'] = v + self._set_input_buffer(incremental_state, saved_state) + + src_len = k.size(0) + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + q = q.contiguous().view(tgt_len, bsz * self.num_heads, self.head_dim).clone().transpose(0, 1).contiguous() + k = k.contiguous().view(src_len, bsz * self.num_heads, self.head_dim).clone().transpose(0, 1).contiguous() + v = v.contiguous().view(src_len, bsz * self.num_heads, self.head_dim).clone().transpose(0, 1).contiguous() + + attn_weights = strided_bmm1(q, k.transpose(1, 2)) + + assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] + + # only apply masking at training time (when incremental state is None) + if mask_future_timesteps and incremental_state is None: + assert query.size() == key.size(), \ + 'mask_future_timesteps only applies to self-attention' + attn_weights += self.buffered_mask(attn_weights).unsqueeze(0) + if key_padding_mask is not None: + # don't attend to padding symbols + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + + attn_weights = attn_weights.float().masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2), + torch.finfo(torch.float32).min, + ).type_as(attn_weights) + + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + attn_weights = F.softmax(attn_weights, dim=-1) + if self.training: + attn_weights, _, _ = torch.npu_dropoutV2(attn_weights, self.seed, p=self.dropout) + + attn = strided_bmm2(attn_weights, v) + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + + # linear + attn = self.out_proj(attn) + + if need_weights: + # average attention weights over heads + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attn_weights = attn_weights.sum(dim=1) / self.num_heads + else: + attn_weights = attn_weights.new_empty(0) # Can't set to None because jit script reasons + + return attn, attn_weights + + + def in_proj_qkv(self, query): + return self._in_proj(query).chunk(3, dim=-1) + + def in_proj_kv(self, key): + return self._in_proj(key, start=self.embed_dim).chunk(2, dim=-1) + + def in_proj_q(self, query): + return self._in_proj(query, end=self.embed_dim) + + def in_proj_k(self, key): + return self._in_proj(key, start=self.embed_dim, end=2 * self.embed_dim) + + def in_proj_v(self, value): + return self._in_proj(value, start=2 * self.embed_dim) + + def _in_proj(self, input, start=None, end=None): + weight = self.in_proj_weight + bias = self.in_proj_bias + if end is not None: + weight = weight[:end, :] + if bias is not None: + bias = bias[:end] + if start is not None: + weight = weight[start:, :] + if bias is not None: + bias = bias[start:] + + res = F.linear(input, weight, bias) + return res + + def buffered_mask(self, tensor): + dim = tensor.size(-1) + if self._mask.size(0) == 0: + self._mask = torch.triu(utils.fill_with_neg_inf(tensor.new_empty(dim, dim)), 1) + if self._mask.size(0) < dim: + self._mask = torch.triu(utils.fill_with_neg_inf(self._mask.resize_(dim, dim)), 1) + return self._mask[:dim, :dim] + + def reorder_incremental_state(self, incremental_state, new_order): + """Reorder buffered internal state (for incremental generation).""" + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + for k in input_buffer.keys(): + input_buffer[k] = input_buffer[k].index_select(1, new_order) + self._set_input_buffer(incremental_state, input_buffer) + + def _get_input_buffer(self, incremental_state: Optional[Dict[str, Dict[str, Tensor]]]): + if incremental_state is None or self.cache_id not in incremental_state: + return {} + return incremental_state[self.cache_id] + + def _set_input_buffer(self, incremental_state: Optional[Dict[str, Dict[str, Tensor]]], buffer: Dict[str, Tensor]): + if incremental_state is not None: + incremental_state[self.cache_id] = buffer + + def _fast_same_check(self, q, k, v): + qkv_same = q.data_ptr() == k.data_ptr() == v.data_ptr() + kv_same = k.data_ptr() == v.data_ptr() + return qkv_same, kv_same + diff --git a/PyTorch/built-in/nlp/Transformer_for_PyTorch/optim/combined_adam_v3.py b/PyTorch/built-in/nlp/Transformer_for_PyTorch/optim/combined_adam_v3.py index 61f80b98e172af97af398680561da2ddd2aeaa4b..2b4edb0ab91ab15563a868bbdc1fdccc0b801ea4 100644 --- a/PyTorch/built-in/nlp/Transformer_for_PyTorch/optim/combined_adam_v3.py +++ b/PyTorch/built-in/nlp/Transformer_for_PyTorch/optim/combined_adam_v3.py @@ -1,257 +1,257 @@ -# Copyright (c) 2017-present, Facebook, Inc. -# All rights reserved. -# Copyright 2020 Huawei Technologies Co., Ltd -# -# This source code is licensed under the license found in the LICENSE file in -# the root directory of this source tree. An additional grant of patent rights -# can be found in the PATENTS file in the same directory. -# -#------------------------------------------------------------------------- -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import math -from torch.optim.optimizer import Optimizer, required -from change_data_ptr import change_data_ptr - - -def combine_tensor(list_of_tensor, copy_back=True): - total_numel = 0 - for tensor in list_of_tensor: - total_numel += tensor.storage().size() - combined_tensor = torch.randn(total_numel).npu().to(list_of_tensor[0].dtype) - - idx = 0 - if copy_back: - for tensor in list_of_tensor: - temp = tensor.clone() - temp.copy_(tensor) - change_data_ptr(tensor, combined_tensor, idx) - temp_data = tensor.data - temp_data.copy_(temp) - idx += temp.storage().size() - else: - for tensor in list_of_tensor: - change_data_ptr(tensor, combined_tensor, idx) - idx += tensor.storage().size() - return combined_tensor - - -def recombine_tensor(size, combined_tensor, index=0): - temp_grad = torch.zeros(size).npu().to(combined_tensor.dtype) - change_data_ptr(temp_grad, combined_tensor, index) - return temp_grad - - -class CombinedAdam(torch.optim.Optimizer): - def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=False, combine_grad=True): - if not 0.0 <= lr: - raise ValueError("Invalid learning rate: {}".format(lr)) - if not 0.0 <= eps: - raise ValueError("Invalid epsilon value: {}".format(eps)) - if not 0.0 <= betas[0] < 1.0: - raise ValueError("Invalid beta parameter at index 0".format(betas[0])) - if not 0.0 <= betas[1] < 1.0: - raise ValueError("Invalid beta parameter at index 1".format(betas[1])) - defaults = dict(lr=lr, betas=betas, eps=eps, - weight_decay=weight_decay, amsgrad=amsgrad) - super(CombinedAdam, self).__init__(params, defaults) - - self.combined = combine_grad - self.init_combine = False - self.first_init = True - self.opt_level_O2_has_bn = False - self.combined_grad = [] - self.combined_weight = [] - - def __setstate__(self, state): - super(CombinedAdam, self).__setstate__(state) - for group in self.param_groups: - group.setdefault('amsgrad', False) - - def split_combined_tensors(self, input_combined_grad_1, input_combined_grad_2=None): - if len(self.combined_weight) > 0: - # has big tensor before, release storage - for tensor in self.combined_weight: - tensor = None - self.first_init = False - self.combined_grad = [] - self.combined_weight = [] - - index_ops, index_bn = 0, 0 - for param_group in self.param_groups: - size_ops, size_bn = 0, 0 - ord_param_list = [] - spe_param_list = [] - check_param_size = 0 - for param in param_group["params"]: - if param.requires_grad and param.grad is not None: - temp_size = param.grad.storage().size() - check_param_size += param.storage().size() - if input_combined_grad_1.data_ptr() <= param.grad.data_ptr() < input_combined_grad_1.data_ptr() + input_combined_grad_1.numel() * input_combined_grad_1.element_size(): - size_ops += temp_size - ord_param_list.append(param) - else: - size_bn += temp_size - spe_param_list.append(param) - self.combined_grad.append(recombine_tensor(size_ops, input_combined_grad_1, index_ops)) - self.combined_weight.append(combine_tensor(ord_param_list, copy_back=True)) - - index_ops += size_ops - if input_combined_grad_2 is not None: - self.combined_grad.append(recombine_tensor(size_bn, input_combined_grad_2, index_bn)) - self.combined_weight.append(combine_tensor(spe_param_list, copy_back=True)) - index_bn += size_bn - - def _init_combined(self): - if not self.init_combine: - if hasattr(self, "_amp_stash"): - stash = self._amp_stash - if hasattr(stash, "all_fp32_params"): - if len(stash.grads_list) == 0: - raise RuntimeError("When use CombinedAdam, Apex O1 need to use combine_grad=True module!") - self.split_combined_tensors(stash.grads_list[-1]) - self.init_combine = True - elif hasattr(stash, "all_fp32_from_fp16_params"): - if len(stash.grads_list) == 0: - raise RuntimeError("When use CombinedAdam, Apex O2 need to usecombine_grad=True module!") - if stash.grads_list[1] is not None: - if stash.grads_list[2] is None: - self.split_combined_tensors(stash.grads_list[1]) - else: - self.split_combined_tensors(stash.grads_list[1], stash.grads_list[2]) - self.opt_level_O2_has_bn = True - else: - raise RuntimeError("Inapproperiate network which only have batchnorm layers!") - self.init_combine = True - else: - for param_group in self.param_groups: - lst_grad = [] - lst_weight = [] - for param in param_group["params"]: - if param.requires_grad and param.grad is not None: - lst_grad.append(param.grad) - lst_weight.append(param) - if len(lst_grad) > 0: - self.combined_grad.append(combine_tensor(lst_grad, True)) - self.combined_weight.append(combine_tensor(lst_weight, True)) - self.combined_momentum.append(torch.zeros_like(self.combined_grad[-1])) - self.init_combine = True - for idx, tensor in enumerate(self.combined_weight): - state = self.state[tensor] - if len(state) == 0: - state["step"] = 0 - state["exp_avg"] = torch.zeros_like(self.combined_weight[idx]) - state["exp_avg_sq"] = torch.zeros_like(self.combined_weight[idx]) - - def step_combined(self, idx, state, group): - combined_weight = self.combined_weight[idx] - combined_grad = self.combined_grad[idx] - exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"] - - amsgrad = group['amsgrad'] - if amsgrad: - if state["step"] == 0: - state["max_exp_avg_sq"] = torch.zeros_like(combined_weight) - beta1, beta2 = group['betas'] - - state["step"] += 1 - bias_correction1 = 1 - beta1 ** state["step"] - bias_correction2 = 1 - beta2 ** state["step"] - - exp_avg.mul_(beta1).add_(combined_grad, alpha=1-beta1) - exp_avg_sq.mul_(beta2).addcmul_(combined_grad, combined_grad, value=1-beta2) - - if amsgrad: - max_exp_avg_sq = state["max_exp_avg_sq"] - max_exp_avg_sq = torch.max(max_exp_avg_sq, exp_avg_sq) - denom = max_exp_avg_sq.sqrt().add_(group["eps"]) - else: - denom = exp_avg_sq.sqrt().add_(group["eps"]) - step_size = group["lr"] * math.sqrt(bias_correction2) / bias_correction1 - if group["weight_decay"] != 0: - combined_weight.data.add_(-group["weight_decay"] * group["lr"], combined_weight.data) - - combined_weight.data.addcdiv_(exp_avg, denom, value=-step_size) - - @torch.no_grad() - def step(self, closure=None, enable=True): - loss = None - if closure is not None: - with torch.enable_grad(): - loss = closure() - - idx = 0 - for group in self.param_groups: - if self.combined: - self._init_combined() - state = self.state[self.combined_weight[idx]] - self.step_combined(idx, state, group) - if self.opt_level_O2_has_bn: - idx += 1 - state = self.state[self.combined_weight[idx]] - self.step_combined(idx, state, group) - else: - for p in group['params']: - if p.grad is None: - continue - grad = p.grad.data - if grad.is_sparse: - raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead') - amsgrad = group['amsgrad'] - - state = self.state[p] - - # State initialization - if len(state) == 0: - state['step'] = 0 - # Exponential moving average of gradient values - state['exp_avg'] = torch.zeros_like(p.data) - # Exponential moving average of squared gradient values - state['exp_avg_sq'] = torch.zeros_like(p.data) - if amsgrad: - # Maintains max of all exp. moving avg. of sq. grad. values - state['max_exp_avg_sq'] = torch.zeros_like(p.data) - - exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] - if amsgrad: - max_exp_avg_sq = state['max_exp_avg_sq'] - beta1, beta2 = group['betas'] - - state['step'] += 1 - - # Decay the first and second moment running average coefficient - exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1) - exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2) - - if amsgrad: - # Maintains the maximum of all 2nd moment running avg. till now - max_exp_avg_sq = torch.max(max_exp_avg_sq, exp_avg_sq) - # Use the max. for normalizing running avg. of gradient - denom = max_exp_avg_sq.sqrt().add_(group['eps']) - else: - denom = exp_avg_sq.sqrt().add_(group['eps']) - - bias_correction1 = 1 - beta1 ** state['step'] - bias_correction2 = 1 - beta2 ** state['step'] - step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 - - if group['weight_decay'] != 0: - p.data.add_(-group['weight_decay'] * group['lr'], p.data) - - # p.data.addcdiv_(-step_size, exp_avg, denom) - p.data.addcdiv_(exp_avg, denom, value=-step_size) - idx += 1 +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# Copyright 2020 Huawei Technologies Co., Ltd +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. +# +#------------------------------------------------------------------------- +# +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import math +from torch.optim.optimizer import Optimizer, required +from change_data_ptr import change_data_ptr + + +def combine_tensor(list_of_tensor, copy_back=True): + total_numel = 0 + for tensor in list_of_tensor: + total_numel += tensor.storage().size() + combined_tensor = torch.randn(total_numel).npu().to(list_of_tensor[0].dtype) + + idx = 0 + if copy_back: + for tensor in list_of_tensor: + temp = tensor.clone() + temp.copy_(tensor) + change_data_ptr(tensor, combined_tensor, idx) + temp_data = tensor.data + temp_data.copy_(temp) + idx += temp.storage().size() + else: + for tensor in list_of_tensor: + change_data_ptr(tensor, combined_tensor, idx) + idx += tensor.storage().size() + return combined_tensor + + +def recombine_tensor(size, combined_tensor, index=0): + temp_grad = torch.zeros(size).npu().to(combined_tensor.dtype) + change_data_ptr(temp_grad, combined_tensor, index) + return temp_grad + + +class CombinedAdam(torch.optim.Optimizer): + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=False, combine_grad=True): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError("Invalid beta parameter at index 0".format(betas[0])) + if not 0.0 <= betas[1] < 1.0: + raise ValueError("Invalid beta parameter at index 1".format(betas[1])) + defaults = dict(lr=lr, betas=betas, eps=eps, + weight_decay=weight_decay, amsgrad=amsgrad) + super(CombinedAdam, self).__init__(params, defaults) + + self.combined = combine_grad + self.init_combine = False + self.first_init = True + self.opt_level_O2_has_bn = False + self.combined_grad = [] + self.combined_weight = [] + + def __setstate__(self, state): + super(CombinedAdam, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('amsgrad', False) + + def split_combined_tensors(self, input_combined_grad_1, input_combined_grad_2=None): + if len(self.combined_weight) > 0: + # has big tensor before, release storage + for tensor in self.combined_weight: + tensor = None + self.first_init = False + self.combined_grad = [] + self.combined_weight = [] + + index_ops, index_bn = 0, 0 + for param_group in self.param_groups: + size_ops, size_bn = 0, 0 + ord_param_list = [] + spe_param_list = [] + check_param_size = 0 + for param in param_group["params"]: + if param.requires_grad and param.grad is not None: + temp_size = param.grad.storage().size() + check_param_size += param.storage().size() + if input_combined_grad_1.data_ptr() <= param.grad.data_ptr() < input_combined_grad_1.data_ptr() + input_combined_grad_1.numel() * input_combined_grad_1.element_size(): + size_ops += temp_size + ord_param_list.append(param) + else: + size_bn += temp_size + spe_param_list.append(param) + self.combined_grad.append(recombine_tensor(size_ops, input_combined_grad_1, index_ops)) + self.combined_weight.append(combine_tensor(ord_param_list, copy_back=True)) + + index_ops += size_ops + if input_combined_grad_2 is not None: + self.combined_grad.append(recombine_tensor(size_bn, input_combined_grad_2, index_bn)) + self.combined_weight.append(combine_tensor(spe_param_list, copy_back=True)) + index_bn += size_bn + + def _init_combined(self): + if not self.init_combine: + if hasattr(self, "_amp_stash"): + stash = self._amp_stash + if hasattr(stash, "all_fp32_params"): + if len(stash.grads_list) == 0: + raise RuntimeError("When use CombinedAdam, Apex O1 need to use combine_grad=True module!") + self.split_combined_tensors(stash.grads_list[-1]) + self.init_combine = True + elif hasattr(stash, "all_fp32_from_fp16_params"): + if len(stash.grads_list) == 0: + raise RuntimeError("When use CombinedAdam, Apex O2 need to usecombine_grad=True module!") + if stash.grads_list[1] is not None: + if stash.grads_list[2] is None: + self.split_combined_tensors(stash.grads_list[1]) + else: + self.split_combined_tensors(stash.grads_list[1], stash.grads_list[2]) + self.opt_level_O2_has_bn = True + else: + raise RuntimeError("Inapproperiate network which only have batchnorm layers!") + self.init_combine = True + else: + for param_group in self.param_groups: + lst_grad = [] + lst_weight = [] + for param in param_group["params"]: + if param.requires_grad and param.grad is not None: + lst_grad.append(param.grad) + lst_weight.append(param) + if len(lst_grad) > 0: + self.combined_grad.append(combine_tensor(lst_grad, True)) + self.combined_weight.append(combine_tensor(lst_weight, True)) + self.combined_momentum.append(torch.zeros_like(self.combined_grad[-1])) + self.init_combine = True + for idx, tensor in enumerate(self.combined_weight): + state = self.state[tensor] + if len(state) == 0: + state["step"] = 0 + state["exp_avg"] = torch.zeros_like(self.combined_weight[idx]) + state["exp_avg_sq"] = torch.zeros_like(self.combined_weight[idx]) + + def step_combined(self, idx, state, group): + combined_weight = self.combined_weight[idx] + combined_grad = self.combined_grad[idx] + exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"] + + amsgrad = group['amsgrad'] + if amsgrad: + if state["step"] == 0: + state["max_exp_avg_sq"] = torch.zeros_like(combined_weight) + beta1, beta2 = group['betas'] + + state["step"] += 1 + bias_correction1 = 1 - beta1 ** state["step"] + bias_correction2 = 1 - beta2 ** state["step"] + + exp_avg.mul_(beta1).add_(combined_grad, alpha=1-beta1) + exp_avg_sq.mul_(beta2).addcmul_(combined_grad, combined_grad, value=1-beta2) + + if amsgrad: + max_exp_avg_sq = state["max_exp_avg_sq"] + max_exp_avg_sq = torch.max(max_exp_avg_sq, exp_avg_sq) + denom = max_exp_avg_sq.sqrt().add_(group["eps"]) + else: + denom = exp_avg_sq.sqrt().add_(group["eps"]) + step_size = group["lr"] * math.sqrt(bias_correction2) / bias_correction1 + if group["weight_decay"] != 0: + combined_weight.data.add_(-group["weight_decay"] * group["lr"], combined_weight.data) + + combined_weight.data.addcdiv_(exp_avg, denom, value=-step_size) + + @torch.no_grad() + def step(self, closure=None, enable=True): + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + + idx = 0 + for group in self.param_groups: + if self.combined: + self._init_combined() + state = self.state[self.combined_weight[idx]] + self.step_combined(idx, state, group) + if self.opt_level_O2_has_bn: + idx += 1 + state = self.state[self.combined_weight[idx]] + self.step_combined(idx, state, group) + else: + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data + if grad.is_sparse: + raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead') + amsgrad = group['amsgrad'] + + state = self.state[p] + + # State initialization + if len(state) == 0: + state['step'] = 0 + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p.data) + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = torch.zeros_like(p.data) + if amsgrad: + # Maintains max of all exp. moving avg. of sq. grad. values + state['max_exp_avg_sq'] = torch.zeros_like(p.data) + + exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] + if amsgrad: + max_exp_avg_sq = state['max_exp_avg_sq'] + beta1, beta2 = group['betas'] + + state['step'] += 1 + + # Decay the first and second moment running average coefficient + exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1) + exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2) + + if amsgrad: + # Maintains the maximum of all 2nd moment running avg. till now + max_exp_avg_sq = torch.max(max_exp_avg_sq, exp_avg_sq) + # Use the max. for normalizing running avg. of gradient + denom = max_exp_avg_sq.sqrt().add_(group['eps']) + else: + denom = exp_avg_sq.sqrt().add_(group['eps']) + + bias_correction1 = 1 - beta1 ** state['step'] + bias_correction2 = 1 - beta2 ** state['step'] + step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 + + if group['weight_decay'] != 0: + p.data.add_(-group['weight_decay'] * group['lr'], p.data) + + # p.data.addcdiv_(-step_size, exp_avg, denom) + p.data.addcdiv_(exp_avg, denom, value=-step_size) + idx += 1 return loss \ No newline at end of file diff --git a/PyTorch/built-in/nlp/Transformer_for_PyTorch/preprocess.py b/PyTorch/built-in/nlp/Transformer_for_PyTorch/preprocess.py index 61358b94a043ebb665d2d83c98562466512ef7e6..29ce6f999e7b052e464e8c944480e696f508dfb6 100644 --- a/PyTorch/built-in/nlp/Transformer_for_PyTorch/preprocess.py +++ b/PyTorch/built-in/nlp/Transformer_for_PyTorch/preprocess.py @@ -1,201 +1,201 @@ -#!/usr/bin/env python3 -# Copyright (c) 2017-present, Facebook, Inc. -# All rights reserved. -# Copyright 2020 Huawei Technologies Co., Ltd -# -# This source code is licensed under the license found in the LICENSE file in -# the root directory of this source tree. An additional grant of patent rights -# can be found in the PATENTS file in the same directory. -# - -import argparse -from itertools import zip_longest -import os -import shutil - -from data import indexed_dataset, dictionary -from data.tokenizer import Tokenizer, tokenize_line - -def get_parser(): - parser = argparse.ArgumentParser( - description='Data pre-processing: Create dictionary and store data in binary format') - parser.add_argument('-s', '--source-lang', default=None, metavar='SRC', help='source language') - parser.add_argument('-t', '--target-lang', default=None, metavar='TARGET', help='target language') - parser.add_argument('--trainpref', metavar='FP', default=None, help='train file prefix') - parser.add_argument('--validpref', metavar='FP', default=None, help='comma separated, valid file prefixes') - parser.add_argument('--testpref', metavar='FP', default=None, help='comma separated, test file prefixes') - parser.add_argument('--destdir', metavar='DIR', default='data-bin', help='destination dir') - parser.add_argument('--thresholdtgt', metavar='N', default=0, type=int, - help='map words appearing less than threshold times to unknown') - parser.add_argument('--thresholdsrc', metavar='N', default=0, type=int, - help='map words appearing less than threshold times to unknown') - parser.add_argument('--tgtdict', metavar='FP', help='reuse given target dictionary') - parser.add_argument('--srcdict', metavar='FP', help='reuse given source dictionary') - parser.add_argument('--nwordstgt', metavar='N', default=-1, type=int, help='number of target words to retain') - parser.add_argument('--nwordssrc', metavar='N', default=-1, type=int, help='number of source words to retain') - parser.add_argument('--alignfile', metavar='ALIGN', default=None, help='an alignment file (optional)') - parser.add_argument('--output-format', metavar='FORMAT', default='binary', choices=['binary', 'raw'], - help='output format (optional)') - parser.add_argument('--joined-dictionary', action='store_true', help='Generate joined dictionary') - parser.add_argument('--only-source', action='store_true', help='Only process the source language') - parser.add_argument('--padding-factor', metavar='N', default=8, type=int, - help='Pad dictionary size to be multiple of N') - return parser - - -def main(args): - print(args) - os.makedirs(args.destdir, exist_ok=True) - target = not args.only_source - - def build_dictionary(filenames): - d = dictionary.Dictionary() - for filename in filenames: - Tokenizer.add_file_to_dictionary(filename, d, tokenize_line) - return d - - def train_path(lang): - return '{}{}'.format(args.trainpref, ('.' + lang) if lang else '') - - def file_name(prefix, lang): - fname = prefix - if lang is not None: - fname += f'.{lang}' - return fname - - def dest_path(prefix, lang): - return os.path.join(args.destdir, file_name(prefix, lang)) - - def dict_path(lang): - return dest_path('dict', lang) + '.txt' - - def dataset_dest_path(output_prefix, lang, extension): - base = f'{args.destdir}/{output_prefix}' - lang_part = f'.{args.source_lang}-{args.target_lang}.{lang}' if lang is not None else '' - return f'{base}{lang_part}.{extension}' - - if args.joined_dictionary: - assert not args.srcdict, 'cannot combine --srcdict and --joined-dictionary' - assert not args.tgtdict, 'cannot combine --tgtdict and --joined-dictionary' - src_dict = build_dictionary(set([ - train_path(lang) - for lang in [args.source_lang, args.target_lang] - ])) - tgt_dict = src_dict - else: - if args.srcdict: - src_dict = dictionary.Dictionary.load(args.srcdict) - else: - assert args.trainpref, "--trainpref must be set if --srcdict is not specified" - src_dict = build_dictionary([train_path(args.source_lang)]) - if target: - if args.tgtdict: - tgt_dict = dictionary.Dictionary.load(args.tgtdict) - else: - assert args.trainpref, "--trainpref must be set if --tgtdict is not specified" - tgt_dict = build_dictionary([train_path(args.target_lang)]) - - src_dict.finalize( - threshold=args.thresholdsrc, - nwords=args.nwordssrc, - padding_factor=args.padding_factor, - ) - src_dict.save(dict_path(args.source_lang)) - if target: - if not args.joined_dictionary: - tgt_dict.finalize( - threshold=args.thresholdtgt, - nwords=args.nwordstgt, - padding_factor=args.padding_factor, - ) - tgt_dict.save(dict_path(args.target_lang)) - - def make_binary_dataset(input_prefix, output_prefix, lang): - dict = dictionary.Dictionary.load(dict_path(lang)) - print('| [{}] Dictionary: {} types'.format(lang, len(dict) - 1)) - - ds = indexed_dataset.IndexedDatasetBuilder(dataset_dest_path(output_prefix, lang, 'bin')) - - def consumer(tensor): - ds.add_item(tensor) - - input_file = '{}{}'.format(input_prefix, ('.' + lang) if lang is not None else '') - res = Tokenizer.binarize(input_file, dict, consumer) - print('| [{}] {}: {} sents, {} tokens, {:.3}% replaced by {}'.format( - lang, input_file, res['nseq'], res['ntok'], - 100 * res['nunk'] / res['ntok'], dict.unk_word)) - ds.finalize(dataset_dest_path(output_prefix, lang, 'idx')) - - def make_dataset(input_prefix, output_prefix, lang): - if args.output_format == 'binary': - make_binary_dataset(input_prefix, output_prefix, lang) - elif args.output_format == 'raw': - # Copy original text file to destination folder - output_text_file = dest_path( - output_prefix + '.{}-{}'.format(args.source_lang, args.target_lang), - lang, - ) - shutil.copyfile(file_name(input_prefix, lang), output_text_file) - - def make_all(lang): - if args.trainpref: - make_dataset(args.trainpref, 'train', lang) - if args.validpref: - for k, validpref in enumerate(args.validpref.split(',')): - outprefix = 'valid{}'.format(k) if k > 0 else 'valid' - make_dataset(validpref, outprefix, lang) - if args.testpref: - for k, testpref in enumerate(args.testpref.split(',')): - outprefix = 'test{}'.format(k) if k > 0 else 'test' - make_dataset(testpref, outprefix, lang) - - make_all(args.source_lang) - if target: - make_all(args.target_lang) - - print('| Wrote preprocessed data to {}'.format(args.destdir)) - - if args.alignfile: - assert args.trainpref, "--trainpref must be set if --alignfile is specified" - src_file_name = train_path(args.source_lang) - tgt_file_name = train_path(args.target_lang) - src_dict = dictionary.Dictionary.load(dict_path(args.source_lang)) - tgt_dict = dictionary.Dictionary.load(dict_path(args.target_lang)) - freq_map = {} - with open(args.alignfile, 'r') as align_file: - with open(src_file_name, 'r') as src_file: - with open(tgt_file_name, 'r') as tgt_file: - for a, s, t in zip_longest(align_file, src_file, tgt_file): - si = Tokenizer.tokenize(s, src_dict, add_if_not_exist=False) - ti = Tokenizer.tokenize(t, tgt_dict, add_if_not_exist=False) - ai = list(map(lambda x: tuple(x.split('-')), a.split())) - for sai, tai in ai: - srcidx = si[int(sai)] - tgtidx = ti[int(tai)] - if srcidx != src_dict.unk() and tgtidx != tgt_dict.unk(): - assert srcidx != src_dict.pad() - assert srcidx != src_dict.eos() - assert tgtidx != tgt_dict.pad() - assert tgtidx != tgt_dict.eos() - - if srcidx not in freq_map: - freq_map[srcidx] = {} - if tgtidx not in freq_map[srcidx]: - freq_map[srcidx][tgtidx] = 1 - else: - freq_map[srcidx][tgtidx] += 1 - - align_dict = {} - for srcidx in freq_map.keys(): - align_dict[srcidx] = max(freq_map[srcidx], key=freq_map[srcidx].get) - - with open(os.path.join(args.destdir, 'alignment.{}-{}.txt'.format( - args.source_lang, args.target_lang)), 'w') as f: - for k, v in align_dict.items(): - print('{} {}'.format(src_dict[k], tgt_dict[v]), file=f) - - -if __name__ == '__main__': - parser = get_parser() - args = parser.parse_args() +#!/usr/bin/env python3 +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# Copyright 2020 Huawei Technologies Co., Ltd +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. +# + +import argparse +from itertools import zip_longest +import os +import shutil + +from data import indexed_dataset, dictionary +from data.tokenizer import Tokenizer, tokenize_line + +def get_parser(): + parser = argparse.ArgumentParser( + description='Data pre-processing: Create dictionary and store data in binary format') + parser.add_argument('-s', '--source-lang', default=None, metavar='SRC', help='source language') + parser.add_argument('-t', '--target-lang', default=None, metavar='TARGET', help='target language') + parser.add_argument('--trainpref', metavar='FP', default=None, help='train file prefix') + parser.add_argument('--validpref', metavar='FP', default=None, help='comma separated, valid file prefixes') + parser.add_argument('--testpref', metavar='FP', default=None, help='comma separated, test file prefixes') + parser.add_argument('--destdir', metavar='DIR', default='data-bin', help='destination dir') + parser.add_argument('--thresholdtgt', metavar='N', default=0, type=int, + help='map words appearing less than threshold times to unknown') + parser.add_argument('--thresholdsrc', metavar='N', default=0, type=int, + help='map words appearing less than threshold times to unknown') + parser.add_argument('--tgtdict', metavar='FP', help='reuse given target dictionary') + parser.add_argument('--srcdict', metavar='FP', help='reuse given source dictionary') + parser.add_argument('--nwordstgt', metavar='N', default=-1, type=int, help='number of target words to retain') + parser.add_argument('--nwordssrc', metavar='N', default=-1, type=int, help='number of source words to retain') + parser.add_argument('--alignfile', metavar='ALIGN', default=None, help='an alignment file (optional)') + parser.add_argument('--output-format', metavar='FORMAT', default='binary', choices=['binary', 'raw'], + help='output format (optional)') + parser.add_argument('--joined-dictionary', action='store_true', help='Generate joined dictionary') + parser.add_argument('--only-source', action='store_true', help='Only process the source language') + parser.add_argument('--padding-factor', metavar='N', default=8, type=int, + help='Pad dictionary size to be multiple of N') + return parser + + +def main(args): + print(args) + os.makedirs(args.destdir, exist_ok=True) + target = not args.only_source + + def build_dictionary(filenames): + d = dictionary.Dictionary() + for filename in filenames: + Tokenizer.add_file_to_dictionary(filename, d, tokenize_line) + return d + + def train_path(lang): + return '{}{}'.format(args.trainpref, ('.' + lang) if lang else '') + + def file_name(prefix, lang): + fname = prefix + if lang is not None: + fname += f'.{lang}' + return fname + + def dest_path(prefix, lang): + return os.path.join(args.destdir, file_name(prefix, lang)) + + def dict_path(lang): + return dest_path('dict', lang) + '.txt' + + def dataset_dest_path(output_prefix, lang, extension): + base = f'{args.destdir}/{output_prefix}' + lang_part = f'.{args.source_lang}-{args.target_lang}.{lang}' if lang is not None else '' + return f'{base}{lang_part}.{extension}' + + if args.joined_dictionary: + assert not args.srcdict, 'cannot combine --srcdict and --joined-dictionary' + assert not args.tgtdict, 'cannot combine --tgtdict and --joined-dictionary' + src_dict = build_dictionary(set([ + train_path(lang) + for lang in [args.source_lang, args.target_lang] + ])) + tgt_dict = src_dict + else: + if args.srcdict: + src_dict = dictionary.Dictionary.load(args.srcdict) + else: + assert args.trainpref, "--trainpref must be set if --srcdict is not specified" + src_dict = build_dictionary([train_path(args.source_lang)]) + if target: + if args.tgtdict: + tgt_dict = dictionary.Dictionary.load(args.tgtdict) + else: + assert args.trainpref, "--trainpref must be set if --tgtdict is not specified" + tgt_dict = build_dictionary([train_path(args.target_lang)]) + + src_dict.finalize( + threshold=args.thresholdsrc, + nwords=args.nwordssrc, + padding_factor=args.padding_factor, + ) + src_dict.save(dict_path(args.source_lang)) + if target: + if not args.joined_dictionary: + tgt_dict.finalize( + threshold=args.thresholdtgt, + nwords=args.nwordstgt, + padding_factor=args.padding_factor, + ) + tgt_dict.save(dict_path(args.target_lang)) + + def make_binary_dataset(input_prefix, output_prefix, lang): + dict = dictionary.Dictionary.load(dict_path(lang)) + print('| [{}] Dictionary: {} types'.format(lang, len(dict) - 1)) + + ds = indexed_dataset.IndexedDatasetBuilder(dataset_dest_path(output_prefix, lang, 'bin')) + + def consumer(tensor): + ds.add_item(tensor) + + input_file = '{}{}'.format(input_prefix, ('.' + lang) if lang is not None else '') + res = Tokenizer.binarize(input_file, dict, consumer) + print('| [{}] {}: {} sents, {} tokens, {:.3}% replaced by {}'.format( + lang, input_file, res['nseq'], res['ntok'], + 100 * res['nunk'] / res['ntok'], dict.unk_word)) + ds.finalize(dataset_dest_path(output_prefix, lang, 'idx')) + + def make_dataset(input_prefix, output_prefix, lang): + if args.output_format == 'binary': + make_binary_dataset(input_prefix, output_prefix, lang) + elif args.output_format == 'raw': + # Copy original text file to destination folder + output_text_file = dest_path( + output_prefix + '.{}-{}'.format(args.source_lang, args.target_lang), + lang, + ) + shutil.copyfile(file_name(input_prefix, lang), output_text_file) + + def make_all(lang): + if args.trainpref: + make_dataset(args.trainpref, 'train', lang) + if args.validpref: + for k, validpref in enumerate(args.validpref.split(',')): + outprefix = 'valid{}'.format(k) if k > 0 else 'valid' + make_dataset(validpref, outprefix, lang) + if args.testpref: + for k, testpref in enumerate(args.testpref.split(',')): + outprefix = 'test{}'.format(k) if k > 0 else 'test' + make_dataset(testpref, outprefix, lang) + + make_all(args.source_lang) + if target: + make_all(args.target_lang) + + print('| Wrote preprocessed data to {}'.format(args.destdir)) + + if args.alignfile: + assert args.trainpref, "--trainpref must be set if --alignfile is specified" + src_file_name = train_path(args.source_lang) + tgt_file_name = train_path(args.target_lang) + src_dict = dictionary.Dictionary.load(dict_path(args.source_lang)) + tgt_dict = dictionary.Dictionary.load(dict_path(args.target_lang)) + freq_map = {} + with open(args.alignfile, 'r') as align_file: + with open(src_file_name, 'r') as src_file: + with open(tgt_file_name, 'r') as tgt_file: + for a, s, t in zip_longest(align_file, src_file, tgt_file): + si = Tokenizer.tokenize(s, src_dict, add_if_not_exist=False) + ti = Tokenizer.tokenize(t, tgt_dict, add_if_not_exist=False) + ai = list(map(lambda x: tuple(x.split('-')), a.split())) + for sai, tai in ai: + srcidx = si[int(sai)] + tgtidx = ti[int(tai)] + if srcidx != src_dict.unk() and tgtidx != tgt_dict.unk(): + assert srcidx != src_dict.pad() + assert srcidx != src_dict.eos() + assert tgtidx != tgt_dict.pad() + assert tgtidx != tgt_dict.eos() + + if srcidx not in freq_map: + freq_map[srcidx] = {} + if tgtidx not in freq_map[srcidx]: + freq_map[srcidx][tgtidx] = 1 + else: + freq_map[srcidx][tgtidx] += 1 + + align_dict = {} + for srcidx in freq_map.keys(): + align_dict[srcidx] = max(freq_map[srcidx], key=freq_map[srcidx].get) + + with open(os.path.join(args.destdir, 'alignment.{}-{}.txt'.format( + args.source_lang, args.target_lang)), 'w') as f: + for k, v in align_dict.items(): + print('{} {}'.format(src_dict[k], tgt_dict[v]), file=f) + + +if __name__ == '__main__': + parser = get_parser() + args = parser.parse_args() main(args) \ No newline at end of file diff --git a/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/test/train_full_1p.sh b/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/test/train_full_1p.sh index b7d8c17f16759f48bb892b21a3ba6b87d89860ea..4abf4787ff11a8e8fa23b749591ca85d59ec0058 100644 --- a/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/test/train_full_1p.sh +++ b/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/test/train_full_1p.sh @@ -1,217 +1,217 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -export ASCEND_SLOG_PRINT_TO_STDOUT=1 -export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID - -#集合通信参数,不需要修改 -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 - - -export NNPU=1 - -export HCCL_WHITELIST_DISABLE=1 -export MASTER_ADDR=127.0.0.1 -export MASTER_PORT=23456 -export RANK=0 -export WORLD_SIZE=1 - - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="XLM_ID0740_for_PyTorch" -#训练epoch -train_epochs=180 -#训练batch_size -batch_size=16 -#训练step -#train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.495 - -#TF2.X独有,不需要修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False -autotune=False - -# 帮助信息,不h需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_full_1p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/../ - - -#sed -i "s|./data|$data_path|g" examples/cats_and_dogs.py -#sed -i "s|epochs = 20|epochs = 1|g" examples/cats_and_dogs.py -#sed -i "s|pass|break|g" train.py - -#python3 setup.py install -#mkdir -p checkpoints -#mkdir -p /root/.cache/torch/hub/checkpoints -#cp $data_path/fcn_* /root/.cache/torch/hub/checkpoints - -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - - #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 - #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` - #cpustep=`expr $cpucount / 8` - #echo "taskset c steps:" $cpustep - #let a=RANK_ID*$cpustep - #let b=RANK_ID+1 - #let c=b*$cpustep-1 - nohup python3 train.py \ - --data_path $data_path/50k \ - --exp_name xlm_en_zh \ - --dump_path ./dumped \ - --lgs 'en-zh' \ - --clm_steps '' \ - --mlm_steps 'en,zh' \ - --emb_dim 1024 \ - --n_layers 12 \ - --n_heads 16 \ - --dropout 0.1 \ - --attention_dropout 0.1 \ - --gelu_activation true \ - --batch_size $batch_size \ - --bptt 256 \ - --optimizer npu_fused_adam_v2,lr=0.00005 \ - --epoch_size 300000 \ - --max_epoch $train_epochs \ - --validation_metrics _valid_mlm_ppl \ - --stopping_criterion _valid_mlm_ppl,8 \ - --fp16 true \ - --amp 2 \ - --seed 1 \ - --local_rank $ASCEND_DEVICE_ID \ - --max_epoch 1 > $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - -done -wait - -#恢复参数 -#sed -i "s|break|pass|g" train.py - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep "sent/s" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "sent/s -" '{print $2}'|awk '{print $1}'|tail -n +2|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` -#FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${perf}'}'` - - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` -train_accuracy=`grep "vaild_en_mlm_acc" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "vaild_en_mlm_acc ->" '{print $2}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据 -#吞吐量,不需要修改 -ActualFPS=${FPS} -#单迭代训练时长,不需要修改 -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "sent/s" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "MLM-en: " '{print $2}'|awk '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +export ASCEND_SLOG_PRINT_TO_STDOUT=1 +export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID + +#集合通信参数,不需要修改 +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + + +export NNPU=1 + +export HCCL_WHITELIST_DISABLE=1 +export MASTER_ADDR=127.0.0.1 +export MASTER_PORT=23456 +export RANK=0 +export WORLD_SIZE=1 + + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="XLM_ID0740_for_PyTorch" +#训练epoch +train_epochs=180 +#训练batch_size +batch_size=16 +#训练step +#train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.495 + +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不h需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ + + +#sed -i "s|./data|$data_path|g" examples/cats_and_dogs.py +#sed -i "s|epochs = 20|epochs = 1|g" examples/cats_and_dogs.py +#sed -i "s|pass|break|g" train.py + +#python3 setup.py install +#mkdir -p checkpoints +#mkdir -p /root/.cache/torch/hub/checkpoints +#cp $data_path/fcn_* /root/.cache/torch/hub/checkpoints + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 + #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` + #cpustep=`expr $cpucount / 8` + #echo "taskset c steps:" $cpustep + #let a=RANK_ID*$cpustep + #let b=RANK_ID+1 + #let c=b*$cpustep-1 + nohup python3 train.py \ + --data_path $data_path/50k \ + --exp_name xlm_en_zh \ + --dump_path ./dumped \ + --lgs 'en-zh' \ + --clm_steps '' \ + --mlm_steps 'en,zh' \ + --emb_dim 1024 \ + --n_layers 12 \ + --n_heads 16 \ + --dropout 0.1 \ + --attention_dropout 0.1 \ + --gelu_activation true \ + --batch_size $batch_size \ + --bptt 256 \ + --optimizer npu_fused_adam_v2,lr=0.00005 \ + --epoch_size 300000 \ + --max_epoch $train_epochs \ + --validation_metrics _valid_mlm_ppl \ + --stopping_criterion _valid_mlm_ppl,8 \ + --fp16 true \ + --amp 2 \ + --seed 1 \ + --local_rank $ASCEND_DEVICE_ID \ + --max_epoch 1 > $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +done +wait + +#恢复参数 +#sed -i "s|break|pass|g" train.py + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep "sent/s" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "sent/s -" '{print $2}'|awk '{print $1}'|tail -n +2|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` +#FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${perf}'}'` + + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` +train_accuracy=`grep "vaild_en_mlm_acc" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "vaild_en_mlm_acc ->" '{print $2}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "sent/s" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "MLM-en: " '{print $2}'|awk '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/test/train_performance_1p.sh index d155368b54b0b29a73c2f377e11ea1800d9daaff..4e873e4c4029c2cd4e8cc1c670d1dff21029ba71 100644 --- a/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch/test/train_performance_1p.sh @@ -1,216 +1,216 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -export ASCEND_SLOG_PRINT_TO_STDOUT=1 -export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID - -#集合通信参数,不需要修改 -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 - - -export NNPU=1 - -export HCCL_WHITELIST_DISABLE=1 -export MASTER_ADDR=127.0.0.1 -export MASTER_PORT=23456 -export RANK=0 -export WORLD_SIZE=1 - - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="XLM_ID0740_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=16 -#训练step -#train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.495 - -#TF2.X独有,不需要修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False -autotune=False - -# 帮助信息,不h需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_full_1p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/../ - - -#sed -i "s|./data|$data_path|g" examples/cats_and_dogs.py -#sed -i "s|epochs = 20|epochs = 1|g" examples/cats_and_dogs.py -sed -i "s|pass|break|g" train.py - -#python3 setup.py install -#mkdir -p checkpoints -#mkdir -p /root/.cache/torch/hub/checkpoints -#cp $data_path/fcn_* /root/.cache/torch/hub/checkpoints - -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - - #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 - #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` - #cpustep=`expr $cpucount / 8` - #echo "taskset c steps:" $cpustep - #let a=RANK_ID*$cpustep - #let b=RANK_ID+1 - #let c=b*$cpustep-1 - nohup python3 train.py \ - --data_path $data_path/50k \ - --exp_name xlm_en_zh \ - --dump_path ./dumped \ - --lgs 'en-zh' \ - --clm_steps '' \ - --mlm_steps 'en,zh' \ - --emb_dim 1024 \ - --n_layers 12 \ - --n_heads 16 \ - --dropout 0.1 \ - --attention_dropout 0.1 \ - --gelu_activation true \ - --batch_size $batch_size \ - --bptt 256 \ - --optimizer npu_fused_adam_v2,lr=0.00005 \ - --epoch_size 300000 \ - --max_epoch $train_epochs \ - --validation_metrics _valid_mlm_ppl \ - --stopping_criterion _valid_mlm_ppl,8 \ - --fp16 true \ - --amp 2 \ - --seed 1 \ - --local_rank $ASCEND_DEVICE_ID \ - --max_epoch 1 > $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - -done -wait - -#恢复参数 -sed -i "s|break|pass|g" train.py - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep "sent/s" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "sent/s -" '{print $2}'|awk '{print $1}'|tail -n +2|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` -#FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${perf}'}'` - - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -#echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据 -#吞吐量,不需要修改 -ActualFPS=${FPS} -#单迭代训练时长,不需要修改 -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "sent/s" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "MLM-en: " '{print $2}'|awk '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +export ASCEND_SLOG_PRINT_TO_STDOUT=1 +export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID + +#集合通信参数,不需要修改 +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + + +export NNPU=1 + +export HCCL_WHITELIST_DISABLE=1 +export MASTER_ADDR=127.0.0.1 +export MASTER_PORT=23456 +export RANK=0 +export WORLD_SIZE=1 + + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="XLM_ID0740_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=16 +#训练step +#train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.495 + +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不h需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ + + +#sed -i "s|./data|$data_path|g" examples/cats_and_dogs.py +#sed -i "s|epochs = 20|epochs = 1|g" examples/cats_and_dogs.py +sed -i "s|pass|break|g" train.py + +#python3 setup.py install +#mkdir -p checkpoints +#mkdir -p /root/.cache/torch/hub/checkpoints +#cp $data_path/fcn_* /root/.cache/torch/hub/checkpoints + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 + #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` + #cpustep=`expr $cpucount / 8` + #echo "taskset c steps:" $cpustep + #let a=RANK_ID*$cpustep + #let b=RANK_ID+1 + #let c=b*$cpustep-1 + nohup python3 train.py \ + --data_path $data_path/50k \ + --exp_name xlm_en_zh \ + --dump_path ./dumped \ + --lgs 'en-zh' \ + --clm_steps '' \ + --mlm_steps 'en,zh' \ + --emb_dim 1024 \ + --n_layers 12 \ + --n_heads 16 \ + --dropout 0.1 \ + --attention_dropout 0.1 \ + --gelu_activation true \ + --batch_size $batch_size \ + --bptt 256 \ + --optimizer npu_fused_adam_v2,lr=0.00005 \ + --epoch_size 300000 \ + --max_epoch $train_epochs \ + --validation_metrics _valid_mlm_ppl \ + --stopping_criterion _valid_mlm_ppl,8 \ + --fp16 true \ + --amp 2 \ + --seed 1 \ + --local_rank $ASCEND_DEVICE_ID \ + --max_epoch 1 > $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +done +wait + +#恢复参数 +sed -i "s|break|pass|g" train.py + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep "sent/s" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "sent/s -" '{print $2}'|awk '{print $1}'|tail -n +2|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` +#FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${perf}'}'` + + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +#echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "sent/s" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "MLM-en: " '{print $2}'|awk '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/set_conda.sh b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/set_conda.sh index 55087d8622f46e055bb105a2acd1fe7006d4af07..febb0fa34937160c9ad16c92dcb67cd4005e2f57 100644 --- a/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/set_conda.sh +++ b/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch/test/set_conda.sh @@ -1,2 +1,2 @@ -export PATH=/home/anaconda3/bin:$PATH +export PATH=/home/anaconda3/bin:$PATH export LD_LIBRARY_PATH=/home/anaconda3/lib:$LD_LIBRARY_PATH \ No newline at end of file diff --git a/PyTorch/built-in/others/WDL_for_PyTorch/LICENSE b/PyTorch/built-in/others/WDL_for_PyTorch/LICENSE index deeea2d8ccdb1354f351a6ea02ed456849d51422..b09cd7856d58590578ee1a4f3ad45d1310a97f87 100644 --- a/PyTorch/built-in/others/WDL_for_PyTorch/LICENSE +++ b/PyTorch/built-in/others/WDL_for_PyTorch/LICENSE @@ -1,201 +1,201 @@ -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/PyTorch/built-in/others/WDL_for_PyTorch/criteo_preprocess.py b/PyTorch/built-in/others/WDL_for_PyTorch/criteo_preprocess.py index a1c942a4275137a5918f0d24a412a24c4470fd16..ef2d419dcf06d3bec388b69a298421866c5b0345 100644 --- a/PyTorch/built-in/others/WDL_for_PyTorch/criteo_preprocess.py +++ b/PyTorch/built-in/others/WDL_for_PyTorch/criteo_preprocess.py @@ -1,54 +1,54 @@ -# -*- coding: utf-8 -*- - -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# less required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import sys -import os -import pandas as pd -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder, MinMaxScaler - - -if __name__ == '__main__': - data_path = os.path.abspath(sys.argv[1]) - - SPARSE_FEATURES_NUM = 27 - DENSE_FEATURES_NUM = 14 - - sparse_features = ['C' + str(i) for i in range(1, SPARSE_FEATURES_NUM)] - dense_features = ['I' + str(i) for i in range(1, DENSE_FEATURES_NUM)] - target = ['label'] - - name_column = target + dense_features + sparse_features - - data = pd.read_csv(data_path, names=name_column, sep='\t') - data[sparse_features] = data[sparse_features].fillna('-1', ) - data[dense_features] = data[dense_features].fillna(0, ) - - for feat in sparse_features: - print(feat) - lbe = LabelEncoder() - data[feat] = lbe.fit_transform(data[feat]) - mms = MinMaxScaler(feature_range=(0, 1)) - data[dense_features] = mms.fit_transform(data[dense_features]) - train, test = train_test_split(data, test_size=0.1, random_state=2020) - - pd.DataFrame(train, columns=name_column).to_pickle(os.path.dirname(data_path) + '/wdl_trainval.pkl') - pd.DataFrame(test, columns=name_column).to_pickle(os.path.dirname(data_path) + '/wdl_test.pkl') - - # the val dataset for inferring - infer_column = target + sparse_features + dense_features - pd.DataFrame(test, columns=infer_column).to_csv(os.path.dirname(data_path) + '/wdl_infer.txt', index=False) +# -*- coding: utf-8 -*- + +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# less required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import sys +import os +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder, MinMaxScaler + + +if __name__ == '__main__': + data_path = os.path.abspath(sys.argv[1]) + + SPARSE_FEATURES_NUM = 27 + DENSE_FEATURES_NUM = 14 + + sparse_features = ['C' + str(i) for i in range(1, SPARSE_FEATURES_NUM)] + dense_features = ['I' + str(i) for i in range(1, DENSE_FEATURES_NUM)] + target = ['label'] + + name_column = target + dense_features + sparse_features + + data = pd.read_csv(data_path, names=name_column, sep='\t') + data[sparse_features] = data[sparse_features].fillna('-1', ) + data[dense_features] = data[dense_features].fillna(0, ) + + for feat in sparse_features: + print(feat) + lbe = LabelEncoder() + data[feat] = lbe.fit_transform(data[feat]) + mms = MinMaxScaler(feature_range=(0, 1)) + data[dense_features] = mms.fit_transform(data[dense_features]) + train, test = train_test_split(data, test_size=0.1, random_state=2020) + + pd.DataFrame(train, columns=name_column).to_pickle(os.path.dirname(data_path) + '/wdl_trainval.pkl') + pd.DataFrame(test, columns=name_column).to_pickle(os.path.dirname(data_path) + '/wdl_test.pkl') + + # the val dataset for inferring + infer_column = target + sparse_features + dense_features + pd.DataFrame(test, columns=infer_column).to_csv(os.path.dirname(data_path) + '/wdl_infer.txt', index=False) diff --git a/PyTorch/built-in/others/WDL_for_PyTorch/infer/Dockerfile b/PyTorch/built-in/others/WDL_for_PyTorch/infer/Dockerfile index 499c927d66a694289181f04ae8c0e8ca8d5aaf9a..45963fec3c75661ef40ed69d3b29f536162dce91 100644 --- a/PyTorch/built-in/others/WDL_for_PyTorch/infer/Dockerfile +++ b/PyTorch/built-in/others/WDL_for_PyTorch/infer/Dockerfile @@ -1,30 +1,30 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# less required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -ARG FROM_IMAGE_NAME -FROM $FROM_IMAGE_NAME - -ARG SDK_PKG - -RUN ln -s /usr/local/python3.7.5/bin/python3.7 /usr/bin/python - -COPY requirements.txt . -RUN pip3.7 install -r requirements.txt - -COPY $SDK_PKG . -RUN ls -hrlt -RUN chmod +x ${SDK_PKG} && \ - ./${SDK_PKG} --install-path=/home/run --install && \ - bash -c "source ~/.bashrc" +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# less required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +ARG FROM_IMAGE_NAME +FROM $FROM_IMAGE_NAME + +ARG SDK_PKG + +RUN ln -s /usr/local/python3.7.5/bin/python3.7 /usr/bin/python + +COPY requirements.txt . +RUN pip3.7 install -r requirements.txt + +COPY $SDK_PKG . +RUN ls -hrlt +RUN chmod +x ${SDK_PKG} && \ + ./${SDK_PKG} --install-path=/home/run --install && \ + bash -c "source ~/.bashrc" diff --git a/PyTorch/built-in/others/WDL_for_PyTorch/infer/requirements.txt b/PyTorch/built-in/others/WDL_for_PyTorch/infer/requirements.txt index 614cfb822e968a89b2c13553bb2fc867ffa07fec..2377a25ade659e02764560acb9d13e8d472fc96c 100644 --- a/PyTorch/built-in/others/WDL_for_PyTorch/infer/requirements.txt +++ b/PyTorch/built-in/others/WDL_for_PyTorch/infer/requirements.txt @@ -1,4 +1,4 @@ -sklearn -pandas -tqdm +sklearn +pandas +tqdm numpy \ No newline at end of file diff --git a/PyTorch/built-in/others/WDL_for_PyTorch/modelzoo_level.txt b/PyTorch/built-in/others/WDL_for_PyTorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/built-in/others/WDL_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/built-in/others/WDL_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/built-in/others/WDL_for_PyTorch/requirements.txt b/PyTorch/built-in/others/WDL_for_PyTorch/requirements.txt index 0dda92456944604ae2c952524765cafa9722500e..83cfd38dfb33665b0293688c94f26d1e152d0751 100644 --- a/PyTorch/built-in/others/WDL_for_PyTorch/requirements.txt +++ b/PyTorch/built-in/others/WDL_for_PyTorch/requirements.txt @@ -1,4 +1,4 @@ -sklearn -pandas==1.3.3 -tqdm +sklearn +pandas==1.3.3 +tqdm numpy \ No newline at end of file diff --git a/PyTorch/built-in/others/WDL_for_PyTorch/run_classification_criteo_wdl.py b/PyTorch/built-in/others/WDL_for_PyTorch/run_classification_criteo_wdl.py index 077ebd6ff94bfa81509a9c9bea61a1c880e5825f..57bc0ed23b893da5cdf2e620f54603f88c3c7aaa 100644 --- a/PyTorch/built-in/others/WDL_for_PyTorch/run_classification_criteo_wdl.py +++ b/PyTorch/built-in/others/WDL_for_PyTorch/run_classification_criteo_wdl.py @@ -1,141 +1,141 @@ -# -*- coding: utf-8 -*- - -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# less required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - - -import os -import time -import random -import argparse - -import numpy as np -import pandas as pd - -import sklearn -import torch - -from sklearn.metrics import log_loss, roc_auc_score - -from deepctr_torch.inputs import SparseFeat, DenseFeat, get_feature_names -from deepctr_torch.models import WDL - - -def args_parser(): - parser = argparse.ArgumentParser(description='Wide&Deep') - parser.add_argument('--seed', default=1234, type=int, - help='seed for initializing training.') - parser.add_argument('--device_id', default=0, type=int, help='device id') - parser.add_argument('--rank', default=0, type=int, help='node rank for distributed training') - parser.add_argument('--dist', default=False, action='store_true', help='8p distributed training') - parser.add_argument('--device_num', default=1, type=int, - help='num of npu device for training') - parser.add_argument('--amp', default=False, action='store_true', - help='use amp to train the model') - parser.add_argument('--loss_scale', default=1024, type=float, - help='loss scale using in amp, default -1 means dynamic') - parser.add_argument('--opt_level', default='O1', type=str, - help='apex opt level') - parser.add_argument('--data_path', required=True, type=str, help='train data, and is to be') - parser.add_argument('--resume', default='', type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') - parser.add_argument('--checkpoint_save_path', default='./', type=str, metavar='PATH', - help='path to save latest checkpoint') - parser.add_argument('--lr', default=0.0001, type=float, help='learning rate for training') - parser.add_argument('--batch_size', default=1024, type=int, help='batch size for training') - parser.add_argument('--eval_batch_size', default=16000, type=int, help='batch size for testing') - parser.add_argument('--epochs', default=3, type=int, help='epochs for training') - parser.add_argument('--start_epoch', default=0, type=int, metavar='N', help='record of the start epoch to run') - parser.add_argument('--sparse_embed_dim', default=4, type=int, help='The embedding dims for sparse features') - parser.add_argument('--steps', default=0, type=int, help='steps for training') - - parser_args, _ = parser.parse_known_args() - return parser_args - - -def fix_random(seed): - torch.manual_seed(seed) - torch.cuda.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - np.random.seed(seed) - random.seed(seed) - - -if __name__ == "__main__": - args = args_parser() - print(args) - - fix_random(args.seed) - - sparse_features = ['C' + str(i) for i in range(1, 27)] - dense_features = ['I' + str(i) for i in range(1, 14)] - target = ['label'] - - # count #unique features for each sparse field,and record dense feature field name - start_time = time.time() - - data_trainval = pd.read_pickle(os.path.join(args.data_path, 'wdl_trainval.pkl')) - data_test = pd.read_pickle(os.path.join(args.data_path, 'wdl_test.pkl')) - - print('Data loaded in {}s'.format(time.time() - start_time)) - - sparse_nunique = [1460, 583, 10131227, 2202608, 305, 24, 12517, 633, 3, 93145, 5683, 8351593, 3194, 27, 14992, - 5461306, 10, 5652, 2173, 4, 7046547, 18, 15, 286181, 105, 142572] - fixlen_feature_columns = [SparseFeat(feat, sparse_nunique[idx], embedding_dim=args.sparse_embed_dim) - for idx, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, ) - for feat in dense_features] - print(fixlen_feature_columns) - - dnn_feature_columns = fixlen_feature_columns - linear_feature_columns = fixlen_feature_columns - - feature_names = get_feature_names( - linear_feature_columns + dnn_feature_columns) - - # generate input data for model - print('Generating input data for model...') - start_time = time.time() - train_model_input = {name: data_trainval[name].astype(float) for name in feature_names} - test_model_input = {name: data_test[name].astype(float) for name in feature_names} - print('Input data generated in {}s'.format(time.time() - start_time)) - - # Define Model,train,predict and evaluate - args.device_num = int(os.environ["RANK_SIZE"]) - if args.dist: - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '29680' - - args.rank = args.device_id - torch.distributed.init_process_group(backend='hccl', world_size=args.device_num, rank=args.rank) - print('distributed train enabled') - - device = 'npu:' + str(args.device_id) - torch.npu.set_device(device) - print('train on: ', device) - - model = WDL(linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns, - task='binary', dnn_hidden_units=(512, 256, 128), dnn_dropout=0.5, device=device, l2_reg_linear=1e-4, - l2_reg_embedding=1e-4, dist=args.dist) - - model.compile("adam", "binary_crossentropy", - metrics=["binary_crossentropy", "auc"], lr=args.lr, args=args) - - history = model.fit(train_model_input, data_trainval[target].values, batch_size=args.batch_size, epochs=args.epochs, - verbose=2, - validation_split=0.3, args=args) - - pred_ans = model.predict(test_model_input, args.eval_batch_size) - print("test LogLoss", round(log_loss(data_test[target].values, pred_ans), 4)) - print("test AUC", round(roc_auc_score(data_test[target].values, pred_ans), 4)) +# -*- coding: utf-8 -*- + +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# less required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + + +import os +import time +import random +import argparse + +import numpy as np +import pandas as pd + +import sklearn +import torch + +from sklearn.metrics import log_loss, roc_auc_score + +from deepctr_torch.inputs import SparseFeat, DenseFeat, get_feature_names +from deepctr_torch.models import WDL + + +def args_parser(): + parser = argparse.ArgumentParser(description='Wide&Deep') + parser.add_argument('--seed', default=1234, type=int, + help='seed for initializing training.') + parser.add_argument('--device_id', default=0, type=int, help='device id') + parser.add_argument('--rank', default=0, type=int, help='node rank for distributed training') + parser.add_argument('--dist', default=False, action='store_true', help='8p distributed training') + parser.add_argument('--device_num', default=1, type=int, + help='num of npu device for training') + parser.add_argument('--amp', default=False, action='store_true', + help='use amp to train the model') + parser.add_argument('--loss_scale', default=1024, type=float, + help='loss scale using in amp, default -1 means dynamic') + parser.add_argument('--opt_level', default='O1', type=str, + help='apex opt level') + parser.add_argument('--data_path', required=True, type=str, help='train data, and is to be') + parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') + parser.add_argument('--checkpoint_save_path', default='./', type=str, metavar='PATH', + help='path to save latest checkpoint') + parser.add_argument('--lr', default=0.0001, type=float, help='learning rate for training') + parser.add_argument('--batch_size', default=1024, type=int, help='batch size for training') + parser.add_argument('--eval_batch_size', default=16000, type=int, help='batch size for testing') + parser.add_argument('--epochs', default=3, type=int, help='epochs for training') + parser.add_argument('--start_epoch', default=0, type=int, metavar='N', help='record of the start epoch to run') + parser.add_argument('--sparse_embed_dim', default=4, type=int, help='The embedding dims for sparse features') + parser.add_argument('--steps', default=0, type=int, help='steps for training') + + parser_args, _ = parser.parse_known_args() + return parser_args + + +def fix_random(seed): + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + np.random.seed(seed) + random.seed(seed) + + +if __name__ == "__main__": + args = args_parser() + print(args) + + fix_random(args.seed) + + sparse_features = ['C' + str(i) for i in range(1, 27)] + dense_features = ['I' + str(i) for i in range(1, 14)] + target = ['label'] + + # count #unique features for each sparse field,and record dense feature field name + start_time = time.time() + + data_trainval = pd.read_pickle(os.path.join(args.data_path, 'wdl_trainval.pkl')) + data_test = pd.read_pickle(os.path.join(args.data_path, 'wdl_test.pkl')) + + print('Data loaded in {}s'.format(time.time() - start_time)) + + sparse_nunique = [1460, 583, 10131227, 2202608, 305, 24, 12517, 633, 3, 93145, 5683, 8351593, 3194, 27, 14992, + 5461306, 10, 5652, 2173, 4, 7046547, 18, 15, 286181, 105, 142572] + fixlen_feature_columns = [SparseFeat(feat, sparse_nunique[idx], embedding_dim=args.sparse_embed_dim) + for idx, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, ) + for feat in dense_features] + print(fixlen_feature_columns) + + dnn_feature_columns = fixlen_feature_columns + linear_feature_columns = fixlen_feature_columns + + feature_names = get_feature_names( + linear_feature_columns + dnn_feature_columns) + + # generate input data for model + print('Generating input data for model...') + start_time = time.time() + train_model_input = {name: data_trainval[name].astype(float) for name in feature_names} + test_model_input = {name: data_test[name].astype(float) for name in feature_names} + print('Input data generated in {}s'.format(time.time() - start_time)) + + # Define Model,train,predict and evaluate + args.device_num = int(os.environ["RANK_SIZE"]) + if args.dist: + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = '29680' + + args.rank = args.device_id + torch.distributed.init_process_group(backend='hccl', world_size=args.device_num, rank=args.rank) + print('distributed train enabled') + + device = 'npu:' + str(args.device_id) + torch.npu.set_device(device) + print('train on: ', device) + + model = WDL(linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns, + task='binary', dnn_hidden_units=(512, 256, 128), dnn_dropout=0.5, device=device, l2_reg_linear=1e-4, + l2_reg_embedding=1e-4, dist=args.dist) + + model.compile("adam", "binary_crossentropy", + metrics=["binary_crossentropy", "auc"], lr=args.lr, args=args) + + history = model.fit(train_model_input, data_trainval[target].values, batch_size=args.batch_size, epochs=args.epochs, + verbose=2, + validation_split=0.3, args=args) + + pred_ans = model.predict(test_model_input, args.eval_batch_size) + print("test LogLoss", round(log_loss(data_test[target].values, pred_ans), 4)) + print("test AUC", round(roc_auc_score(data_test[target].values, pred_ans), 4)) diff --git a/PyTorch/contrib/CONTRIBUTING.md b/PyTorch/contrib/CONTRIBUTING.md index 886fae1007aa07d5711d863a1941c4cd7356bdb9..dc9d11e8ea435028f1e4dd85543104c7547b7469 100644 --- a/PyTorch/contrib/CONTRIBUTING.md +++ b/PyTorch/contrib/CONTRIBUTING.md @@ -1,314 +1,314 @@ - **介绍** - -Ascend ModelZoo,欢迎各位开发者 - - **贡献要求** - -开发者提交的模型包括源码、readme、参考模型license文件、测试用例和readme,并遵循以下标准 - -请贡献者在提交代码之前签署CLA协议,“个人签署”,[链接](https://clasign.osinfra.cn/sign/Z2l0ZWUlMkZhc2NlbmQ=) - -如您完成签署,可在自己提交的PR评论区输入/check-cla进行核实校验 - - **一、源码** - -1、训练及在线推理请使用python代码实现,Ascend平台离线推理请使用C++或python代码,符合第四部分编码规范 - -2、参考[sample](https://gitee.com/ascend/modelzoo/tree/master/built-in/TensorFlow/Official/nlp/Transformer_for_TensorFlow) - -3、贡献者模型代码目录规则:"modelzoo/contrib/框架/Research/应用领域(nlp、cv、audio等)/网络名_IDxxx_for_TensorFlow"(以tf为例,社区管理团队会在贡献完成进行整合) - -4、从其他开源迁移的代码,请增加License声明 - - **二、License规则** - -* TensorFlow - - 迁移场景 - - 1、迁移TensorFlow模型中若源项目已包含License文件则必须拷贝引用,否则在模型顶层目录下添加TensorFlow Apache 2.0 License [TensorFlow License链接](https://github.com/tensorflow/tensorflow/blob/master/LICENSE) - - 2、迁移TensorFlow框架开发的模型,需要在模型目录下每个源文件附上源社区TensorFlow Apache 2.0 License头部声明,并在其下追加新增完整华为公司License声明 - - ``` - # Copyright 2017 The TensorFlow Authors. All Rights Reserved. - # - # Licensed under the Apache License, Version 2.0 (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - # ============================================================================ - # Copyright 2021 Huawei Technologies Co., Ltd - # - # Licensed under the Apache License, Version 2.0 (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - ``` - 开发场景 - - 1、基于TensorFlow框架开发模型,需在模型项目顶层目录下添加TensorFlow Apache 2.0 License [TensorFlow License链接](https://github.com/tensorflow/tensorflow/blob/master/LICENSE) - - 2、基于TensorFlow框架开发模型,需要在模型目录下每个源文件附上源社区华为公司Apache 2.0 License头部声明 - ``` - # Copyright 2021 Huawei Technologies Co., Ltd - # - # Licensed under the Apache License, Version 2.0 (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - ``` -* PyTorch - - 迁移场景 - - 1、迁移PyTorch模型中若源项目录已包含PyTorch License文件则必须拷贝引用,否则在模型顶层目录下添加PyTorch BSD-3 License [PyTorch License链接](https://github.com/pytorch/examples/blob/master/LICENSE) - - 2、迁移PyTorch第三方框架开发的模型,需要在模型目录下每个源文件附上源社区PyTorch BSD-3 License头部声明,并在其下追加新增一行华为公司License声明 - ``` - # BSD 3-Clause License - # - # Copyright (c) 2017 xxxx - # All rights reserved. - # Copyright 2021 Huawei Technologies Co., Ltd - # - # Redistribution and use in source and binary forms, with or without - # modification, are permitted provided that the following conditions are met: - # - # * Redistributions of source code must retain the above copyright notice, this - # list of conditions and the following disclaimer. - # - # * Redistributions in binary form must reproduce the above copyright notice, - # this list of conditions and the following disclaimer in the documentation - # and/or other materials provided with the distribution. - # - # * Neither the name of the copyright holder nor the names of its - # contributors may be used to endorse or promote products derived from - # this software without specific prior written permission. - # - # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - # ============================================================================ - ``` - - 开发场景 - - 1、基于PyTorch框架开发模型,需在模型项目下添加PyTorch BSD-3 License [PyTorch License链接](https://github.com/pytorch/examples/blob/master/LICENSE) - - 2、基于PyTorch框架开发模型,需要在模型目录下每个源文件附上源社区华为公司Apache 2.0 License头部声明 - ``` - # Copyright 2021 Huawei Technologies Co., Ltd - # - # Licensed under the BSD 3-Clause License (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # https://opensource.org/licenses/BSD-3-Clause - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - ``` - -* MindSpore/ACL - - 1、迁移或开发场景下MindSpore/ACL模型顶层目录下需要包含华为公司 License [华为公司 License链接](https://gitee.com/mindspore/mindspore/blob/master/LICENSE) - - 2、迁移或开发场景下MindSpore/ACL模型,需要在模型目录下每个源文件中添加区华为公司Apache 2.0 License头部声明 - ``` - # Copyright 2021 Huawei Technologies Co., Ltd - # - # Licensed under the Apache License, Version 2.0 (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - ``` - -> 关于License声明时间,应注意: 2021年新建的文件,应该是Copyright 2021 Huawei Technologies Co., Ltd 2020年创建年份,2020年修改年份,应该是Copyright 2020 Huawei Technologies Co., Ltd - - **三、readme** - -readme用于指导用户理解和部署样例,要包含如下内容: - -- 简介: - -1、模型的来源及原理; - -2、模型复现的步骤,含训练、eval、在线/离线推理等,入口请封装成`.sh`、`.py`; - -- 关键要求: - -1、模型的出处、对数据的要求、免责声明等,开源代码文件修改需要增加版权说明; - -2、模型转换得到的离线模型对输入数据的要求; - -3、环境变量设置,依赖的第三方软件包和库,以及安装方法; - -4、精度和性能达成要求:尽量达到原始模型水平; - -5、预训练checkpoint、结果checkpoint请提供归档OBS、网盘链接,如来自开源需明确来源地址 - -6、数据集说明 - -- [ ] 不允许直接提供数据集的下载链接,可使用词汇:用户自行准备好数据集,可选用“XXX”,“XXX”,“XXX” - - 例如:请用户自行准备好数据集,包含训练集和验证集两部分,可选用的数据集包括ImageNet2012,CIFAR10、Flower等,包含train和val两部分。 - -- [ ] 脚本中不允许提供链接下载数据集,如果开源脚本上存在对应的链接,请修改或者删除对应的脚本 - -训练ReadMe写作可参考下面两个链接: - -[Readme example1](https://gitee.com/ascend/modelzoo/blob/master/built-in/TensorFlow/Official/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/README.md) - -[Readme example2](https://www.hiascend.com/zh/software/modelzoo/detail/C/093ed0219cb14f068af33784c62cf7ec) - -离线推理ReadMe写作可参考下面链接: - -[Readme example1](https://gitee.com/ascend/modelzoo/tree/master/contrib/ACL_TensorFlow/Research/cv/AdvancedEAST_ID0130_for_ACL/README.md) - - **四、自测试用例** - -提供模型的自测试用例和readme,提交PR需要门禁及模型测试用例通过,性能和精度检查通过 - -- 简介: - -1、不同于完整的训练过程和全量数据集的推理,自测试用例的目的是验证提交代码基本功能可用,执行时长控制在10min之内(推理或训练只需执行有限的图片或step); - -2、提交PR中训练用例入口`train_testcase.sh`, 在线推理用例入口`online_inference_testcase.sh`, 离线推理用例入口`offline_inference_testcase.sh`; - -3、提交PR后,会自动触发门禁流水,后台会根据用例入口shell,自动将代码分发到对应执行环境; - -4、Jenkins预置账号:登录账号请联系华为工程师/接口人获取,登录之后,可以查看到用例执行日志 - -5、如果提交失败,请查看日志,修复代码或其他问题后,在你当前的PR中,评论“compile”即可重新触发用例执行 - -- 关键要求: - -1、自测试用例命名严格按照上述简介2要求来书写,否则门禁会校验失败; - -2、用例应当包含功能、精度(Loss值)、性能检查,检查通过打印"Run testcase success!",失败则打印"Run testcase failed!"; - -3、执行环境已预装软件包和Python3.7.5环境,调用命令"python3"、"python3.7"、"python3.7.5"均可,安装第三方库依赖使用"pip3"、"pip3.7"均可; - -4、数据集和模型:小于500M的文件,建议使用`obsutil`命令下载(已预装),过大的文件,建议提交Issue,注明数据集和下载地址,会提前下载到执行环境上, - -已预置数据集&python第三方库: [Environments](https://gitee.com/ascend/modelzoo/blob/master/contrib/ENVIRONMENTS.md) - -5、环境和其他问题,请提交Issue跟踪; - -6、测试用例开发参考: -- [训练](https://gitee.com/ascend/modelzoo/tree/master/built-in/TensorFlow/Official/nlp/Transformer_for_TensorFlow) -- [离线推理](https://gitee.com/alexcheng88/modelzoo/tree/master/contrib/TensorFlow/Research/cv/efficientnet-b8/ATC_efficientnet-b8_tf_nkxiaolei) - - **五、PR提交** - -- 关键要求: - -1、请将modelzoo仓fork到个人分支,基于个人分支新增、修改和提交PR; - -2、PR标题:线上活动,请在标题注明[线上贡献];高校活动,请注明[xxx学校][高校贡献]; - -3、built-in用户根据网络状态必须配置modelzoo_level.txt文件,且文件内容包含三个关键字段:FuncStatus(OK-流程通过/ **NOK-流程失败,不允许模型代码提交主仓** );PerfStatus(OK-持平GPU/POK-0.5倍GPU/NOK-小于0.5倍GPU/PERFECT-1.2倍GPU);PrecisionStatus(OK-精度达标,POK-Loss拟合但精度未实施, **NOK-Loss不拟合,不允许模型代码提交主仓** );内容格式如下所示(注:“:”两侧无需空格,英文格式;): - -``` - FuncStatus:OK/NOK - PerfStatus:PERFECT/OK/POK/NOK - PrecisionStatus:OK/POK/NOK -``` -4、contrib用户根据网络状态必须配置modelzoo_level.txt文件,且文件内容包含关键字段:GPUStatus(OK-GPU复现/NOK-GPU未复现); NPUMigrationStatus(OK-自动迁移成功/POK-自动迁移失败, 手写规避成功/NOK-均失败); FuncStatus(OK-基础功能打通/NOK-基础功能失败,不允许模型代码提交到master); PrecisionStatus(OK-精度达标/POK-Loss拟合但精度未完全达标/NOK-精度不达标, 不允许模型代码提交到master); AutoTune(OK-性能持平或高于GPU/POK-性能有提升但低于GPU/NOK-性能无提升或者功能失败); PerfStatus(训练:PERFECT-性能1.2倍GPU/OK-性能持平GPU/POK-性能0.5倍GPU/NOK-性能小于0.5倍GPU;推理:OK-4*310单卡>GPU/NOK-其它); ModelConvert:OK/NOK(仅推理, OK-om转换成功/NOK-om转换失败); QuantStatus:OK/NOK(仅推理, OK-精度损失1%以内,性能有提升/POK-性能有提升但未达标/NOK-量化失败); - -样例:modelzoo_level.txt文件 - ------仅限训练----- - - -``` -GPUStatus:OK/NOK -NPUMigrationStatus:OK/POK/NOK -``` - - ------仅限推理----- - -``` -ModelConvert:OK/POK/NOK -QuantStatus:OK/POK/NOK -``` - ------通用部分----- - -``` -FuncStatus:OK/NOK -PrecisionStatus:OK/POK/NOK -AutoTune:OK/POK/NOK -PerfStatus:PERFECT/OK/POK/NOK -``` -5、网络名称命名规范:*_for_框架,注:*代表任意内容,如网络名称或网络名称+网络ID; - - **六、编程规范** - -- 规范标准 - -1、C++代码遵循google编程规范:Google C++ Coding Guidelines;单元测测试遵循规范: Googletest Primer。 - -2、Python代码遵循PEP8规范:Python PEP 8 Coding Style;单元测试遵循规范: pytest - -- 规范备注 - -1、优先使用string类型,避免使用char*; - -2、禁止使用printf,一律使用cout; - -3、内存管理尽量使用智能指针; - -4、不准在函数里调用exit; - -5、禁止使用IDE等工具自动生成代码; - -6、控制第三方库依赖,如果引入第三方依赖,则需要提供第三方依赖安装和使用指导书; - -7、一律使用英文注释,注释率30%--40%,鼓励自注释; - -8、函数头必须有注释,说明函数作用,入参、出参; - -9、统一错误码,通过错误码可以确认那个分支返回错误; - -10、禁止出现打印一堆无影响的错误级别的日志; + **介绍** + +Ascend ModelZoo,欢迎各位开发者 + + **贡献要求** + +开发者提交的模型包括源码、readme、参考模型license文件、测试用例和readme,并遵循以下标准 + +请贡献者在提交代码之前签署CLA协议,“个人签署”,[链接](https://clasign.osinfra.cn/sign/Z2l0ZWUlMkZhc2NlbmQ=) + +如您完成签署,可在自己提交的PR评论区输入/check-cla进行核实校验 + + **一、源码** + +1、训练及在线推理请使用python代码实现,Ascend平台离线推理请使用C++或python代码,符合第四部分编码规范 + +2、参考[sample](https://gitee.com/ascend/modelzoo/tree/master/built-in/TensorFlow/Official/nlp/Transformer_for_TensorFlow) + +3、贡献者模型代码目录规则:"modelzoo/contrib/框架/Research/应用领域(nlp、cv、audio等)/网络名_IDxxx_for_TensorFlow"(以tf为例,社区管理团队会在贡献完成进行整合) + +4、从其他开源迁移的代码,请增加License声明 + + **二、License规则** + +* TensorFlow + + 迁移场景 + + 1、迁移TensorFlow模型中若源项目已包含License文件则必须拷贝引用,否则在模型顶层目录下添加TensorFlow Apache 2.0 License [TensorFlow License链接](https://github.com/tensorflow/tensorflow/blob/master/LICENSE) + + 2、迁移TensorFlow框架开发的模型,需要在模型目录下每个源文件附上源社区TensorFlow Apache 2.0 License头部声明,并在其下追加新增完整华为公司License声明 + + ``` + # Copyright 2017 The TensorFlow Authors. All Rights Reserved. + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + # ============================================================================ + # Copyright 2021 Huawei Technologies Co., Ltd + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + ``` + 开发场景 + + 1、基于TensorFlow框架开发模型,需在模型项目顶层目录下添加TensorFlow Apache 2.0 License [TensorFlow License链接](https://github.com/tensorflow/tensorflow/blob/master/LICENSE) + + 2、基于TensorFlow框架开发模型,需要在模型目录下每个源文件附上源社区华为公司Apache 2.0 License头部声明 + ``` + # Copyright 2021 Huawei Technologies Co., Ltd + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + ``` +* PyTorch + + 迁移场景 + + 1、迁移PyTorch模型中若源项目录已包含PyTorch License文件则必须拷贝引用,否则在模型顶层目录下添加PyTorch BSD-3 License [PyTorch License链接](https://github.com/pytorch/examples/blob/master/LICENSE) + + 2、迁移PyTorch第三方框架开发的模型,需要在模型目录下每个源文件附上源社区PyTorch BSD-3 License头部声明,并在其下追加新增一行华为公司License声明 + ``` + # BSD 3-Clause License + # + # Copyright (c) 2017 xxxx + # All rights reserved. + # Copyright 2021 Huawei Technologies Co., Ltd + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions are met: + # + # * Redistributions of source code must retain the above copyright notice, this + # list of conditions and the following disclaimer. + # + # * Redistributions in binary form must reproduce the above copyright notice, + # this list of conditions and the following disclaimer in the documentation + # and/or other materials provided with the distribution. + # + # * Neither the name of the copyright holder nor the names of its + # contributors may be used to endorse or promote products derived from + # this software without specific prior written permission. + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # ============================================================================ + ``` + + 开发场景 + + 1、基于PyTorch框架开发模型,需在模型项目下添加PyTorch BSD-3 License [PyTorch License链接](https://github.com/pytorch/examples/blob/master/LICENSE) + + 2、基于PyTorch框架开发模型,需要在模型目录下每个源文件附上源社区华为公司Apache 2.0 License头部声明 + ``` + # Copyright 2021 Huawei Technologies Co., Ltd + # + # Licensed under the BSD 3-Clause License (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # https://opensource.org/licenses/BSD-3-Clause + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + ``` + +* MindSpore/ACL + + 1、迁移或开发场景下MindSpore/ACL模型顶层目录下需要包含华为公司 License [华为公司 License链接](https://gitee.com/mindspore/mindspore/blob/master/LICENSE) + + 2、迁移或开发场景下MindSpore/ACL模型,需要在模型目录下每个源文件中添加区华为公司Apache 2.0 License头部声明 + ``` + # Copyright 2021 Huawei Technologies Co., Ltd + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + ``` + +> 关于License声明时间,应注意: 2021年新建的文件,应该是Copyright 2021 Huawei Technologies Co., Ltd 2020年创建年份,2020年修改年份,应该是Copyright 2020 Huawei Technologies Co., Ltd + + **三、readme** + +readme用于指导用户理解和部署样例,要包含如下内容: + +- 简介: + +1、模型的来源及原理; + +2、模型复现的步骤,含训练、eval、在线/离线推理等,入口请封装成`.sh`、`.py`; + +- 关键要求: + +1、模型的出处、对数据的要求、免责声明等,开源代码文件修改需要增加版权说明; + +2、模型转换得到的离线模型对输入数据的要求; + +3、环境变量设置,依赖的第三方软件包和库,以及安装方法; + +4、精度和性能达成要求:尽量达到原始模型水平; + +5、预训练checkpoint、结果checkpoint请提供归档OBS、网盘链接,如来自开源需明确来源地址 + +6、数据集说明 + +- [ ] 不允许直接提供数据集的下载链接,可使用词汇:用户自行准备好数据集,可选用“XXX”,“XXX”,“XXX” + + 例如:请用户自行准备好数据集,包含训练集和验证集两部分,可选用的数据集包括ImageNet2012,CIFAR10、Flower等,包含train和val两部分。 + +- [ ] 脚本中不允许提供链接下载数据集,如果开源脚本上存在对应的链接,请修改或者删除对应的脚本 + +训练ReadMe写作可参考下面两个链接: + +[Readme example1](https://gitee.com/ascend/modelzoo/blob/master/built-in/TensorFlow/Official/cv/image_classification/DenseNet121_ID0067_for_TensorFlow/README.md) + +[Readme example2](https://www.hiascend.com/zh/software/modelzoo/detail/C/093ed0219cb14f068af33784c62cf7ec) + +离线推理ReadMe写作可参考下面链接: + +[Readme example1](https://gitee.com/ascend/modelzoo/tree/master/contrib/ACL_TensorFlow/Research/cv/AdvancedEAST_ID0130_for_ACL/README.md) + + **四、自测试用例** + +提供模型的自测试用例和readme,提交PR需要门禁及模型测试用例通过,性能和精度检查通过 + +- 简介: + +1、不同于完整的训练过程和全量数据集的推理,自测试用例的目的是验证提交代码基本功能可用,执行时长控制在10min之内(推理或训练只需执行有限的图片或step); + +2、提交PR中训练用例入口`train_testcase.sh`, 在线推理用例入口`online_inference_testcase.sh`, 离线推理用例入口`offline_inference_testcase.sh`; + +3、提交PR后,会自动触发门禁流水,后台会根据用例入口shell,自动将代码分发到对应执行环境; + +4、Jenkins预置账号:登录账号请联系华为工程师/接口人获取,登录之后,可以查看到用例执行日志 + +5、如果提交失败,请查看日志,修复代码或其他问题后,在你当前的PR中,评论“compile”即可重新触发用例执行 + +- 关键要求: + +1、自测试用例命名严格按照上述简介2要求来书写,否则门禁会校验失败; + +2、用例应当包含功能、精度(Loss值)、性能检查,检查通过打印"Run testcase success!",失败则打印"Run testcase failed!"; + +3、执行环境已预装软件包和Python3.7.5环境,调用命令"python3"、"python3.7"、"python3.7.5"均可,安装第三方库依赖使用"pip3"、"pip3.7"均可; + +4、数据集和模型:小于500M的文件,建议使用`obsutil`命令下载(已预装),过大的文件,建议提交Issue,注明数据集和下载地址,会提前下载到执行环境上, + +已预置数据集&python第三方库: [Environments](https://gitee.com/ascend/modelzoo/blob/master/contrib/ENVIRONMENTS.md) + +5、环境和其他问题,请提交Issue跟踪; + +6、测试用例开发参考: +- [训练](https://gitee.com/ascend/modelzoo/tree/master/built-in/TensorFlow/Official/nlp/Transformer_for_TensorFlow) +- [离线推理](https://gitee.com/alexcheng88/modelzoo/tree/master/contrib/TensorFlow/Research/cv/efficientnet-b8/ATC_efficientnet-b8_tf_nkxiaolei) + + **五、PR提交** + +- 关键要求: + +1、请将modelzoo仓fork到个人分支,基于个人分支新增、修改和提交PR; + +2、PR标题:线上活动,请在标题注明[线上贡献];高校活动,请注明[xxx学校][高校贡献]; + +3、built-in用户根据网络状态必须配置modelzoo_level.txt文件,且文件内容包含三个关键字段:FuncStatus(OK-流程通过/ **NOK-流程失败,不允许模型代码提交主仓** );PerfStatus(OK-持平GPU/POK-0.5倍GPU/NOK-小于0.5倍GPU/PERFECT-1.2倍GPU);PrecisionStatus(OK-精度达标,POK-Loss拟合但精度未实施, **NOK-Loss不拟合,不允许模型代码提交主仓** );内容格式如下所示(注:“:”两侧无需空格,英文格式;): + +``` + FuncStatus:OK/NOK + PerfStatus:PERFECT/OK/POK/NOK + PrecisionStatus:OK/POK/NOK +``` +4、contrib用户根据网络状态必须配置modelzoo_level.txt文件,且文件内容包含关键字段:GPUStatus(OK-GPU复现/NOK-GPU未复现); NPUMigrationStatus(OK-自动迁移成功/POK-自动迁移失败, 手写规避成功/NOK-均失败); FuncStatus(OK-基础功能打通/NOK-基础功能失败,不允许模型代码提交到master); PrecisionStatus(OK-精度达标/POK-Loss拟合但精度未完全达标/NOK-精度不达标, 不允许模型代码提交到master); AutoTune(OK-性能持平或高于GPU/POK-性能有提升但低于GPU/NOK-性能无提升或者功能失败); PerfStatus(训练:PERFECT-性能1.2倍GPU/OK-性能持平GPU/POK-性能0.5倍GPU/NOK-性能小于0.5倍GPU;推理:OK-4*310单卡>GPU/NOK-其它); ModelConvert:OK/NOK(仅推理, OK-om转换成功/NOK-om转换失败); QuantStatus:OK/NOK(仅推理, OK-精度损失1%以内,性能有提升/POK-性能有提升但未达标/NOK-量化失败); + +样例:modelzoo_level.txt文件 + +-----仅限训练----- + + +``` +GPUStatus:OK/NOK +NPUMigrationStatus:OK/POK/NOK +``` + + +-----仅限推理----- + +``` +ModelConvert:OK/POK/NOK +QuantStatus:OK/POK/NOK +``` + +-----通用部分----- + +``` +FuncStatus:OK/NOK +PrecisionStatus:OK/POK/NOK +AutoTune:OK/POK/NOK +PerfStatus:PERFECT/OK/POK/NOK +``` +5、网络名称命名规范:*_for_框架,注:*代表任意内容,如网络名称或网络名称+网络ID; + + **六、编程规范** + +- 规范标准 + +1、C++代码遵循google编程规范:Google C++ Coding Guidelines;单元测测试遵循规范: Googletest Primer。 + +2、Python代码遵循PEP8规范:Python PEP 8 Coding Style;单元测试遵循规范: pytest + +- 规范备注 + +1、优先使用string类型,避免使用char*; + +2、禁止使用printf,一律使用cout; + +3、内存管理尽量使用智能指针; + +4、不准在函数里调用exit; + +5、禁止使用IDE等工具自动生成代码; + +6、控制第三方库依赖,如果引入第三方依赖,则需要提供第三方依赖安装和使用指导书; + +7、一律使用英文注释,注释率30%--40%,鼓励自注释; + +8、函数头必须有注释,说明函数作用,入参、出参; + +9、统一错误码,通过错误码可以确认那个分支返回错误; + +10、禁止出现打印一堆无影响的错误级别的日志; diff --git a/PyTorch/contrib/audio/FastPitch/fastpitch/loss_function.py b/PyTorch/contrib/audio/FastPitch/fastpitch/loss_function.py index 9baf4a6c31ccb0e0e038190d55ce38a93d4ad3ee..0fe133abd4fdf5d8397ab2bdebbd96a61e443959 100644 --- a/PyTorch/contrib/audio/FastPitch/fastpitch/loss_function.py +++ b/PyTorch/contrib/audio/FastPitch/fastpitch/loss_function.py @@ -1,108 +1,108 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import torch -import torch.nn.functional as F -from torch import nn - -from common.utils import mask_from_lens -from fastpitch.attn_loss_function import AttentionCTCLoss - - -class FastPitchLoss(nn.Module): - def __init__(self, dur_predictor_loss_scale=1.0, - pitch_predictor_loss_scale=1.0, attn_loss_scale=1.0, - energy_predictor_loss_scale=0.1): - super(FastPitchLoss, self).__init__() - self.dur_predictor_loss_scale = dur_predictor_loss_scale - self.pitch_predictor_loss_scale = pitch_predictor_loss_scale - self.energy_predictor_loss_scale = energy_predictor_loss_scale - self.attn_loss_scale = attn_loss_scale - self.attn_ctc_loss = AttentionCTCLoss() - - def forward(self, model_out, targets, is_training=True, meta_agg='mean'): - (mel_out, dec_mask, dur_pred, log_dur_pred, pitch_pred, pitch_tgt, - energy_pred, energy_tgt, attn_soft, attn_hard, attn_dur, - attn_logprob) = model_out - - (mel_tgt, in_lens, out_lens) = targets - - dur_tgt = attn_dur - dur_lens = in_lens - - mel_tgt.requires_grad = False - # (B,H,T) => (B,T,H) - mel_tgt = mel_tgt.transpose(1, 2) - - dur_mask = mask_from_lens(dur_lens, max_len=dur_tgt.size(1)) - dur_mask_sum = dur_mask.sum() - - log_dur_tgt = torch.log(dur_tgt.float() + 1) - loss_fn = F.mse_loss - dur_pred_loss = loss_fn(log_dur_pred, log_dur_tgt, reduction='none') - dur_pred_loss = (dur_pred_loss * dur_mask).sum() / dur_mask_sum - - ldiff = mel_tgt.size(1) - mel_out.size(1) - mel_out = F.pad(mel_out, (0, 0, 0, ldiff, 0, 0), value=0.0) - - mel_mask = mel_tgt.ne(0).float() - mel_mask_sum = mel_mask.sum() - - loss_fn = F.mse_loss - mel_loss = loss_fn(mel_out, mel_tgt, reduction='none') - mel_loss = (mel_loss * mel_mask).sum() / mel_mask_sum - - ldiff = pitch_tgt.size(2) - pitch_pred.size(2) - pitch_pred = F.pad(pitch_pred, (0, ldiff, 0, 0, 0, 0), value=0.0) - pitch_loss = F.mse_loss(pitch_tgt, pitch_pred, reduction='none') - pitch_loss = (pitch_loss * dur_mask.unsqueeze(1)).sum() / dur_mask_sum - - if energy_pred is not None: - energy_pred = F.pad(energy_pred, (0, ldiff, 0, 0), value=0.0) - energy_loss = F.mse_loss(energy_tgt, energy_pred, reduction='none') - energy_loss = (energy_loss * dur_mask).sum() / dur_mask_sum - else: - energy_loss = 0 - - # Attention loss - attn_loss = self.attn_ctc_loss(attn_logprob, in_lens, out_lens) - - loss = (mel_loss - + dur_pred_loss * self.dur_predictor_loss_scale - + pitch_loss * self.pitch_predictor_loss_scale - + energy_loss * self.energy_predictor_loss_scale - + attn_loss * self.attn_loss_scale) - - meta = { - 'loss': loss.clone().detach(), - 'mel_loss': mel_loss.clone().detach(), - 'duration_predictor_loss': dur_pred_loss.clone().detach(), - 'pitch_loss': pitch_loss.clone().detach(), - 'energy_loss': energy_loss.clone().detach(), - 'attn_loss': attn_loss.clone().detach(), - 'dur_mask_sum': dur_mask_sum.clone().detach(), - 'mel_mask_sum': mel_mask_sum.clone().detach(), - 'dur_error': (torch.abs(dur_pred - dur_tgt).sum() - / dur_mask_sum).detach(), - } - - if energy_pred is not None: - meta['energy_loss'] = energy_loss.clone().detach() - - assert meta_agg in ('sum', 'mean') - if meta_agg == 'sum': - bsz = mel_out.size(0) - meta = {k: v * bsz for k, v in meta.items()} - return loss, meta +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch +import torch.nn.functional as F +from torch import nn + +from common.utils import mask_from_lens +from fastpitch.attn_loss_function import AttentionCTCLoss + + +class FastPitchLoss(nn.Module): + def __init__(self, dur_predictor_loss_scale=1.0, + pitch_predictor_loss_scale=1.0, attn_loss_scale=1.0, + energy_predictor_loss_scale=0.1): + super(FastPitchLoss, self).__init__() + self.dur_predictor_loss_scale = dur_predictor_loss_scale + self.pitch_predictor_loss_scale = pitch_predictor_loss_scale + self.energy_predictor_loss_scale = energy_predictor_loss_scale + self.attn_loss_scale = attn_loss_scale + self.attn_ctc_loss = AttentionCTCLoss() + + def forward(self, model_out, targets, is_training=True, meta_agg='mean'): + (mel_out, dec_mask, dur_pred, log_dur_pred, pitch_pred, pitch_tgt, + energy_pred, energy_tgt, attn_soft, attn_hard, attn_dur, + attn_logprob) = model_out + + (mel_tgt, in_lens, out_lens) = targets + + dur_tgt = attn_dur + dur_lens = in_lens + + mel_tgt.requires_grad = False + # (B,H,T) => (B,T,H) + mel_tgt = mel_tgt.transpose(1, 2) + + dur_mask = mask_from_lens(dur_lens, max_len=dur_tgt.size(1)) + dur_mask_sum = dur_mask.sum() + + log_dur_tgt = torch.log(dur_tgt.float() + 1) + loss_fn = F.mse_loss + dur_pred_loss = loss_fn(log_dur_pred, log_dur_tgt, reduction='none') + dur_pred_loss = (dur_pred_loss * dur_mask).sum() / dur_mask_sum + + ldiff = mel_tgt.size(1) - mel_out.size(1) + mel_out = F.pad(mel_out, (0, 0, 0, ldiff, 0, 0), value=0.0) + + mel_mask = mel_tgt.ne(0).float() + mel_mask_sum = mel_mask.sum() + + loss_fn = F.mse_loss + mel_loss = loss_fn(mel_out, mel_tgt, reduction='none') + mel_loss = (mel_loss * mel_mask).sum() / mel_mask_sum + + ldiff = pitch_tgt.size(2) - pitch_pred.size(2) + pitch_pred = F.pad(pitch_pred, (0, ldiff, 0, 0, 0, 0), value=0.0) + pitch_loss = F.mse_loss(pitch_tgt, pitch_pred, reduction='none') + pitch_loss = (pitch_loss * dur_mask.unsqueeze(1)).sum() / dur_mask_sum + + if energy_pred is not None: + energy_pred = F.pad(energy_pred, (0, ldiff, 0, 0), value=0.0) + energy_loss = F.mse_loss(energy_tgt, energy_pred, reduction='none') + energy_loss = (energy_loss * dur_mask).sum() / dur_mask_sum + else: + energy_loss = 0 + + # Attention loss + attn_loss = self.attn_ctc_loss(attn_logprob, in_lens, out_lens) + + loss = (mel_loss + + dur_pred_loss * self.dur_predictor_loss_scale + + pitch_loss * self.pitch_predictor_loss_scale + + energy_loss * self.energy_predictor_loss_scale + + attn_loss * self.attn_loss_scale) + + meta = { + 'loss': loss.clone().detach(), + 'mel_loss': mel_loss.clone().detach(), + 'duration_predictor_loss': dur_pred_loss.clone().detach(), + 'pitch_loss': pitch_loss.clone().detach(), + 'energy_loss': energy_loss.clone().detach(), + 'attn_loss': attn_loss.clone().detach(), + 'dur_mask_sum': dur_mask_sum.clone().detach(), + 'mel_mask_sum': mel_mask_sum.clone().detach(), + 'dur_error': (torch.abs(dur_pred - dur_tgt).sum() + / dur_mask_sum).detach(), + } + + if energy_pred is not None: + meta['energy_loss'] = energy_loss.clone().detach() + + assert meta_agg in ('sum', 'mean') + if meta_agg == 'sum': + bsz = mel_out.size(0) + meta = {k: v * bsz for k, v in meta.items()} + return loss, meta diff --git a/PyTorch/contrib/audio/baseline-rawnet/README.md b/PyTorch/contrib/audio/baseline-rawnet/README.md index ecd0232b7e518572e6567b012f2bc8e5778090fd..91569eb766f104a7839db4961633291f8cac3b51 100644 --- a/PyTorch/contrib/audio/baseline-rawnet/README.md +++ b/PyTorch/contrib/audio/baseline-rawnet/README.md @@ -1,128 +1,128 @@ -# RawNet2 - -This implements training of RawNet2 on the VoxCeleb1&2 datasets of YouTube. - -- Reference implementation: - -``` -url=https://github.com/Jungjee/RawNet -dir=RawNet-master/python/RawNet2 -``` - -## Baseline-RawNet2 Detail - -As of the current date, Ascend-Pytorch is still inefficient for contiguous operations. Therefore, RawNet2 is re-implemented using semantics such as custom OP. - -## Requirements - -- Install PyTorch ([pytorch.org](http://pytorch.org)) -- `pip install -r requirements.txt` - -## DataSet - -``` -url: http://www.robots.ox.ac.uk/~vgg/data/voxceleb/ -``` - -- The training datasets are VoxCeleb2, the evaluation dataset is VoxCeleb1H & VoxCeleb1E. The datasets are large. Please ensure sufficient hard disk space when downloading and decompressing. -- Besides, the data in the VoxCeleb2 downloaded from the url above is in the format of .m4a. If you do not use the dataset which is converted already, you should firstly run the `m4a2wav.py`. -- You need to follow directory structure of the `data` as below. If you connect to the prepared data folder, you don't need to build the following directory. - -``` -${RawNet}/DB/VoxCeleb1/ -├── dev_wav -│   ├── id10001 -│   │   ├── 1zcIwhmdeo4 -│   │   │   ├── 00001.wav -│   │   │   ├── 00002.wav -│   │   │   └── 00003.wav -│   │   ├── 5ssVY9a5X-M -│   │   │   ├── 00001.wav -│   │   │   ├── 00002.wav -│   │   │   ├── 00003.wav -│   │   │   └── 00003.wav -│   └── ... -├── eval_wav -│   ├── id10270 -│   │   ├── 5r0dWxy17C8 -│   │   │   ├── 00001.wav -│   │   │   ├── 00002.wav -│   │   │   ├── 00003.wav -│   │   │   ├── 00004.wav -│   │   │   └── 00005.wav -│   └── ... -│   ├── _z_BR0ERa9g -│   ├── 00001.wav -│   ├── 00002.wav -│   └── 00003.wav -├── val_trial.txt -└── veri_test.txt - -${RawNet}/DB/VoxCeleb2/ -└── wav - ├── id00012 - │   ├── 21Uxsk56VDQ - │   │   ├── 00001.wav - │   │   ├── ... - │   │   └── 00059.wav - │   ├── 00-qODbtozw - │   │   ├── ... - │   │   ├── 00079.wav - │   │   └── 00080.wav - ├── ... - │   └── zw-4DTjqIA0 - │   ├── 00108.wav - │   └── 00109.wav - └── id09272 - └── u7VNkYraCw0 - ├── ... - └── 00027.wav -``` - -- You need to follow directory structure of the `output` as below. - -``` -${RawNet}/train/train_${device_count}P -|-- DNNS/${name}/ -| |-- models -| | |--best_opt_eval.pt ## The best perfomance model is saved here -| | |--TA_${epoch}_${eer}.pt ##The other model is saved here -| |-- results -| |-- log -| | |-- eval_epoch${epoch}.txt ## The training log is saved here -| |-- prof -| |-- eers.txt ##The eers is saved here -| |-- f_params.txt ##The params of the model are saved here -``` - -## Training # - -- Note that the `output` folder under the `test` directory will also save the code running log. -- To run the model, you should cd to the directory of test -- To train a model, run `train_1p.py` or `train_8p.py`: - -```bash -# 1p train perf -nohup bash train_performance_1p.sh --data_path=xxx & - -# 8p train perf -nohup bash train_performance_8p.sh --data_path=xxx & - -# 1p train full -nohup bash train_full_1p.sh --data_path=xxx & - -# 8p train full -nohup bash train_full_8p.sh --data_path=xxx & - -``` - -## RawNet2 training result - -| eer(percentage) | FPS(aver) | Npu_nums | Epochs | AMP_Type | -| :-------------------------: | :-------: | :------: | :----: | :------: | -| 0.14 | 7760 | 1 | 1 | O2 | -| 0.038(aver) and 0.035(high) | 8573 | 8 | 20 | O2 | - -### **Testing** - -The testing data in the paper is about the VoxCeleb1H and VoxCeleb1E. And here we use the dataset of the VoxCeleb1H, and the target of the eer in the paper is 0.0489. +# RawNet2 + +This implements training of RawNet2 on the VoxCeleb1&2 datasets of YouTube. + +- Reference implementation: + +``` +url=https://github.com/Jungjee/RawNet +dir=RawNet-master/python/RawNet2 +``` + +## Baseline-RawNet2 Detail + +As of the current date, Ascend-Pytorch is still inefficient for contiguous operations. Therefore, RawNet2 is re-implemented using semantics such as custom OP. + +## Requirements + +- Install PyTorch ([pytorch.org](http://pytorch.org)) +- `pip install -r requirements.txt` + +## DataSet + +``` +url: http://www.robots.ox.ac.uk/~vgg/data/voxceleb/ +``` + +- The training datasets are VoxCeleb2, the evaluation dataset is VoxCeleb1H & VoxCeleb1E. The datasets are large. Please ensure sufficient hard disk space when downloading and decompressing. +- Besides, the data in the VoxCeleb2 downloaded from the url above is in the format of .m4a. If you do not use the dataset which is converted already, you should firstly run the `m4a2wav.py`. +- You need to follow directory structure of the `data` as below. If you connect to the prepared data folder, you don't need to build the following directory. + +``` +${RawNet}/DB/VoxCeleb1/ +├── dev_wav +│   ├── id10001 +│   │   ├── 1zcIwhmdeo4 +│   │   │   ├── 00001.wav +│   │   │   ├── 00002.wav +│   │   │   └── 00003.wav +│   │   ├── 5ssVY9a5X-M +│   │   │   ├── 00001.wav +│   │   │   ├── 00002.wav +│   │   │   ├── 00003.wav +│   │   │   └── 00003.wav +│   └── ... +├── eval_wav +│   ├── id10270 +│   │   ├── 5r0dWxy17C8 +│   │   │   ├── 00001.wav +│   │   │   ├── 00002.wav +│   │   │   ├── 00003.wav +│   │   │   ├── 00004.wav +│   │   │   └── 00005.wav +│   └── ... +│   ├── _z_BR0ERa9g +│   ├── 00001.wav +│   ├── 00002.wav +│   └── 00003.wav +├── val_trial.txt +└── veri_test.txt + +${RawNet}/DB/VoxCeleb2/ +└── wav + ├── id00012 + │   ├── 21Uxsk56VDQ + │   │   ├── 00001.wav + │   │   ├── ... + │   │   └── 00059.wav + │   ├── 00-qODbtozw + │   │   ├── ... + │   │   ├── 00079.wav + │   │   └── 00080.wav + ├── ... + │   └── zw-4DTjqIA0 + │   ├── 00108.wav + │   └── 00109.wav + └── id09272 + └── u7VNkYraCw0 + ├── ... + └── 00027.wav +``` + +- You need to follow directory structure of the `output` as below. + +``` +${RawNet}/train/train_${device_count}P +|-- DNNS/${name}/ +| |-- models +| | |--best_opt_eval.pt ## The best perfomance model is saved here +| | |--TA_${epoch}_${eer}.pt ##The other model is saved here +| |-- results +| |-- log +| | |-- eval_epoch${epoch}.txt ## The training log is saved here +| |-- prof +| |-- eers.txt ##The eers is saved here +| |-- f_params.txt ##The params of the model are saved here +``` + +## Training # + +- Note that the `output` folder under the `test` directory will also save the code running log. +- To run the model, you should cd to the directory of test +- To train a model, run `train_1p.py` or `train_8p.py`: + +```bash +# 1p train perf +nohup bash train_performance_1p.sh --data_path=xxx & + +# 8p train perf +nohup bash train_performance_8p.sh --data_path=xxx & + +# 1p train full +nohup bash train_full_1p.sh --data_path=xxx & + +# 8p train full +nohup bash train_full_8p.sh --data_path=xxx & + +``` + +## RawNet2 training result + +| eer(percentage) | FPS(aver) | Npu_nums | Epochs | AMP_Type | +| :-------------------------: | :-------: | :------: | :----: | :------: | +| 0.14 | 7760 | 1 | 1 | O2 | +| 0.038(aver) and 0.035(high) | 8573 | 8 | 20 | O2 | + +### **Testing** + +The testing data in the paper is about the VoxCeleb1H and VoxCeleb1E. And here we use the dataset of the VoxCeleb1H, and the target of the eer in the paper is 0.0489. diff --git a/PyTorch/contrib/audio/baseline-rawnet/modelzoo_level.txt b/PyTorch/contrib/audio/baseline-rawnet/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/audio/baseline-rawnet/modelzoo_level.txt +++ b/PyTorch/contrib/audio/baseline-rawnet/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/audio/baseline-rawnet/requirements.txt b/PyTorch/contrib/audio/baseline-rawnet/requirements.txt index 3e7d7460a222239c1e0a3accc0acb113793ba8bd..45427a860f53981a780ff5bc1af4feba7dc7f74c 100644 --- a/PyTorch/contrib/audio/baseline-rawnet/requirements.txt +++ b/PyTorch/contrib/audio/baseline-rawnet/requirements.txt @@ -1,9 +1,9 @@ -torch==1.5.0+ascend.post3 -apex==0.1+ascend.20210930 -torchvision==0.2.2.post3 -numpy==1.21.2 -Pillow==8.3.2 -scikit-learn==1.0 -scipy==1.7.1 -tqdm==4.62.3 +torch==1.5.0+ascend.post3 +apex==0.1+ascend.20210930 +torchvision==0.2.2.post3 +numpy==1.21.2 +Pillow==8.3.2 +scikit-learn==1.0 +scipy==1.7.1 +tqdm==4.62.3 SoundFile==0.10.3.post1 \ No newline at end of file diff --git a/PyTorch/contrib/audio/deepspeech/README.md b/PyTorch/contrib/audio/deepspeech/README.md index 8e331e892399f9ba6d369b44e01f09e5aa783881..dce744d6576242af34015739c9fa5d4b816a1d1b 100644 --- a/PyTorch/contrib/audio/deepspeech/README.md +++ b/PyTorch/contrib/audio/deepspeech/README.md @@ -1,89 +1,89 @@ - - -# deepspeech.pytorch - -This implements training of deepspeech on NPU mainly modified from [deepspeech.pytorch](https://github.com/SeanNaren/deepspeech.pytorch) - -## installation - -### From Source - -install this fork Warp-CTC bindings: - -```shell -### npu环境变量 -source {deepspeech_root}/scripts/env_new.sh -git clone https://github.com/SeanNaren/warp-ctc.git -cd warp-ctc -git checkout -b pytorch_bindings origin/pytorch_bindings -mkdir build; cd build; cmake ..; make -cd ../pytorch_binding && python3.7 setup.py install -``` - -install requirements - -```shell -pip3 install -r requirements.txt -``` - -If you plan to use Multi-GPU/Multi-node training, you'll need etcd. Below is the command to install on Ubuntu. - -```shell -sudo apt-get install etcd -sudo apt-get install sox -``` - -## Training - -### Download Dataset - -All you need is entering the data directory and execute the follow scripts - -```shell -cd data -python3.7 an4.py -``` - -### Training a Model - -#### 1p training - -```shell -# The result will be placed in the current directory 1p_train.log -bash run_1p.sh -``` - -#### 8p training - -```shell -# The result will be placed in the current directory 8p_train.log -bash run_8p.sh -``` - -### Performance - -```shell -### 1p performance, the log will be placed in the current directory 1p_train_performance.log -bash train_performance_1p.sh -### 8p performance, the log will be placed in the current directory 8p_train_performance.log -bash train_performance_8p.sh -``` - -## Testing/Inference - -To evaluate a trained model on a test set (has to be in the same format as the training set): - -```shell -# if you want to see the final precision, you can execute the follow scripts after execute 1p or 8p training scripts -bash eval.sh -``` - -## Result - -| | WER | CER | Epochs | APEX | FPS | -| :---: | :----: | :----: | :----: | :--: | :--: | -| NPU1P | 9.444 | 5.723 | 70 | O2 | 4 | -| NPU8P | 17.464 | 10.926 | 70 | O2 | 22 | -| GPU1P | 10.349 | 7.076 | 70 | O2 | 94 | -| GPU8P | 15.265 | 9.834 | 70 | O2 | 377 | - + + +# deepspeech.pytorch + +This implements training of deepspeech on NPU mainly modified from [deepspeech.pytorch](https://github.com/SeanNaren/deepspeech.pytorch) + +## installation + +### From Source + +install this fork Warp-CTC bindings: + +```shell +### npu环境变量 +source {deepspeech_root}/scripts/env_new.sh +git clone https://github.com/SeanNaren/warp-ctc.git +cd warp-ctc +git checkout -b pytorch_bindings origin/pytorch_bindings +mkdir build; cd build; cmake ..; make +cd ../pytorch_binding && python3.7 setup.py install +``` + +install requirements + +```shell +pip3 install -r requirements.txt +``` + +If you plan to use Multi-GPU/Multi-node training, you'll need etcd. Below is the command to install on Ubuntu. + +```shell +sudo apt-get install etcd +sudo apt-get install sox +``` + +## Training + +### Download Dataset + +All you need is entering the data directory and execute the follow scripts + +```shell +cd data +python3.7 an4.py +``` + +### Training a Model + +#### 1p training + +```shell +# The result will be placed in the current directory 1p_train.log +bash run_1p.sh +``` + +#### 8p training + +```shell +# The result will be placed in the current directory 8p_train.log +bash run_8p.sh +``` + +### Performance + +```shell +### 1p performance, the log will be placed in the current directory 1p_train_performance.log +bash train_performance_1p.sh +### 8p performance, the log will be placed in the current directory 8p_train_performance.log +bash train_performance_8p.sh +``` + +## Testing/Inference + +To evaluate a trained model on a test set (has to be in the same format as the training set): + +```shell +# if you want to see the final precision, you can execute the follow scripts after execute 1p or 8p training scripts +bash eval.sh +``` + +## Result + +| | WER | CER | Epochs | APEX | FPS | +| :---: | :----: | :----: | :----: | :--: | :--: | +| NPU1P | 9.444 | 5.723 | 70 | O2 | 4 | +| NPU8P | 17.464 | 10.926 | 70 | O2 | 22 | +| GPU1P | 10.349 | 7.076 | 70 | O2 | 94 | +| GPU8P | 15.265 | 9.834 | 70 | O2 | 377 | + diff --git a/PyTorch/contrib/audio/tdnn/README.md b/PyTorch/contrib/audio/tdnn/README.md index 83fd02c3f2e6e43b28570ec3119decc7a80dda87..3598158ca6fb2f059af7dd9ff1b56977ed3f5840 100644 --- a/PyTorch/contrib/audio/tdnn/README.md +++ b/PyTorch/contrib/audio/tdnn/README.md @@ -1,144 +1,144 @@ - - -## 概述 - -ECAPA-TDNN基于传统TDNN模型进行了改进,主要有三个方面的优化,分别是:增加了一维SE残差模块(1-Dimensional Squeeze-Excitation Res2Block);多层特征融合(Multi-layer feature aggregation and summation);通道和上下文相关的统计池化(Channel- and context-dependent statistics pooling) - - - -- 参考实现: -[https://github.com/speechbrain/speechbrain/tree/develop/templates/speaker_id](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fspeechbrain%2Fspeechbrain%2Ftree%2Fdevelop%2Ftemplates%2Fspeaker_id) - - - - - -## 支持特性 - -| 特性列表 | 是否支持 | -| ---------- | -------- | -| 分布式训练 | 是 | -| 混合精度 | 是 | -| 并行数据 | 是 | - -## 混合精度训练 - -昇腾910 AI处理器提供自动混合精度功能,模型使用 opt_level="O1", loss_scale=128, combine_grad=True的配置进行amp.initialize - -脚本已默认开启混合精度,设置如下。 - - ``` - parser.add_argument( - "--auto_mix_prec", - action="store_true", - help="This flag enables training with automatic mixed-precision.", - ) - ``` - - -

训练环境准备

- -CANN版本:5.0.2 - -昇腾torch版本:1.5.0 - -#### speechbrain环境配置 - -(详情请参考speechbrain官方文档安装方法。) - -1. 安装torch 1.5.0 - -2. 安装torchaudio,npu安装方法请参考 - - https://e.gitee.com/HUAWEI-ASCEND/dashboard?issue=I48AZM - -3. cd tdnn - - pip install -r requirement.txt - - pip install --editable . - - -

快速上手

- -- 数据集准备 - -模型训练使用rirs_noises、train-clean-5数据集,数据集请用户自行获取。 - -- 模型训练 - -选择合适的下载方式下载源码包。 - -Before training, modify the data_folder in these scripts. - -```bash -# training 1p loss -bash ./test/train_full_1p.sh --data_folder="" - -# training 1p performance -bash ./test/train_performance_1p.sh --data_folder="" - -# training 8p loss -bash ./test/train_full_8p.sh --data_folder="" - -# training 8p performance -bash ./test/train_performance_8p.sh --data_folder="" -``` - -``` -Log path: - test/output/train_full_1p.log # 1p training result log - test/output/train_performance_1p.log # 1p training performance result log - test/output/train_full_8p.log # 8p training result log - test/output/train_performance_8p.log # 8p training performance result log -``` - -## 训练结果 - -| acc | FPS | Npu_nums | Epochs | AMP_Type | -| :--------: | :----: | :------: | :----: | :------: | -| - | 8.25 | 1 | 1 | O1 | -| 0.9062 | 43.863 | 8 | 5 | O1 | - -

高级参考

- -### 脚本和示例代码 - -``` -├── README.md //代码说明文档 -├── speechbrain //框架支持文件 -├── templates/speaker_id -│ ├──test //测试脚本 -│ ├──custom_model.py //简易TDNN模块 -| ├──mini_librispeech_prepare.py //数据清单文件 -│ ├──run_1p.sh //单卡运行启动脚本 -│ ├──run_8p.sh //8卡运行启动脚本 -│ ├──train.py //网络训练与测试代码 -│ ├──train.yaml //网络训练参数脚本 -``` - -### 脚本参数 - -``` ---seed 制作参数对象seed ---rank_size 使用NPU卡数量,默认:1 ---number_of_epochs 训练epoch次数,默认:5 ---data_folder 数据集路径,默认:./data ---output_folder 结果输出保存的文件路径,默认:./results/speaker_id/ ---batch_size 每个NPU的batch size,默认:64 -``` - - -## 训练过程 - -1. 通过“模型训练”中的训练指令启动单卡或者多卡训练。单卡和多卡通过运行不同脚本,支持单卡、8卡网络训练。 - -2. 参考脚本的模型存储路径为results//save,训练脚本log中包括如下信息。 - -``` -Epoch: 1, lr: 1.00e-03 - train loss: 2.70 - valid loss: 3.39, valid error: 9.47e-01 -Epoch loaded: 1 - test loss: 3.43, test error: 9.54e-01 -``` -## 注意事项 - + + +## 概述 + +ECAPA-TDNN基于传统TDNN模型进行了改进,主要有三个方面的优化,分别是:增加了一维SE残差模块(1-Dimensional Squeeze-Excitation Res2Block);多层特征融合(Multi-layer feature aggregation and summation);通道和上下文相关的统计池化(Channel- and context-dependent statistics pooling) + + + +- 参考实现: +[https://github.com/speechbrain/speechbrain/tree/develop/templates/speaker_id](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fspeechbrain%2Fspeechbrain%2Ftree%2Fdevelop%2Ftemplates%2Fspeaker_id) + + + + + +## 支持特性 + +| 特性列表 | 是否支持 | +| ---------- | -------- | +| 分布式训练 | 是 | +| 混合精度 | 是 | +| 并行数据 | 是 | + +## 混合精度训练 + +昇腾910 AI处理器提供自动混合精度功能,模型使用 opt_level="O1", loss_scale=128, combine_grad=True的配置进行amp.initialize + +脚本已默认开启混合精度,设置如下。 + + ``` + parser.add_argument( + "--auto_mix_prec", + action="store_true", + help="This flag enables training with automatic mixed-precision.", + ) + ``` + + +

训练环境准备

+ +CANN版本:5.0.2 + +昇腾torch版本:1.5.0 + +#### speechbrain环境配置 + +(详情请参考speechbrain官方文档安装方法。) + +1. 安装torch 1.5.0 + +2. 安装torchaudio,npu安装方法请参考 + + https://e.gitee.com/HUAWEI-ASCEND/dashboard?issue=I48AZM + +3. cd tdnn + + pip install -r requirement.txt + + pip install --editable . + + +

快速上手

+ +- 数据集准备 + +模型训练使用rirs_noises、train-clean-5数据集,数据集请用户自行获取。 + +- 模型训练 + +选择合适的下载方式下载源码包。 + +Before training, modify the data_folder in these scripts. + +```bash +# training 1p loss +bash ./test/train_full_1p.sh --data_folder="" + +# training 1p performance +bash ./test/train_performance_1p.sh --data_folder="" + +# training 8p loss +bash ./test/train_full_8p.sh --data_folder="" + +# training 8p performance +bash ./test/train_performance_8p.sh --data_folder="" +``` + +``` +Log path: + test/output/train_full_1p.log # 1p training result log + test/output/train_performance_1p.log # 1p training performance result log + test/output/train_full_8p.log # 8p training result log + test/output/train_performance_8p.log # 8p training performance result log +``` + +## 训练结果 + +| acc | FPS | Npu_nums | Epochs | AMP_Type | +| :--------: | :----: | :------: | :----: | :------: | +| - | 8.25 | 1 | 1 | O1 | +| 0.9062 | 43.863 | 8 | 5 | O1 | + +

高级参考

+ +### 脚本和示例代码 + +``` +├── README.md //代码说明文档 +├── speechbrain //框架支持文件 +├── templates/speaker_id +│ ├──test //测试脚本 +│ ├──custom_model.py //简易TDNN模块 +| ├──mini_librispeech_prepare.py //数据清单文件 +│ ├──run_1p.sh //单卡运行启动脚本 +│ ├──run_8p.sh //8卡运行启动脚本 +│ ├──train.py //网络训练与测试代码 +│ ├──train.yaml //网络训练参数脚本 +``` + +### 脚本参数 + +``` +--seed 制作参数对象seed +--rank_size 使用NPU卡数量,默认:1 +--number_of_epochs 训练epoch次数,默认:5 +--data_folder 数据集路径,默认:./data +--output_folder 结果输出保存的文件路径,默认:./results/speaker_id/ +--batch_size 每个NPU的batch size,默认:64 +``` + + +## 训练过程 + +1. 通过“模型训练”中的训练指令启动单卡或者多卡训练。单卡和多卡通过运行不同脚本,支持单卡、8卡网络训练。 + +2. 参考脚本的模型存储路径为results//save,训练脚本log中包括如下信息。 + +``` +Epoch: 1, lr: 1.00e-03 - train loss: 2.70 - valid loss: 3.39, valid error: 9.47e-01 +Epoch loaded: 1 - test loss: 3.43, test error: 9.54e-01 +``` +## 注意事项 + **该模型为了固定shape,修改了1、/speechbrain/dataio/dataio.py read_audio函数 2、/speechbrain/templates/speaker_id/train.py prepare_features函数 3、/speechbrain/core.py _train_loader_specifics里的sampler。其中第三个修改是因为数据集不足固定shape,实际使用模型务必还原回去。** \ No newline at end of file diff --git a/PyTorch/contrib/audio/tdnn/modelzoo_level.txt b/PyTorch/contrib/audio/tdnn/modelzoo_level.txt index 484664c2399ae4109859a67aba6cb9facff03cf1..55a9add9fa74832ca908108d73946cd76281a9cd 100644 --- a/PyTorch/contrib/audio/tdnn/modelzoo_level.txt +++ b/PyTorch/contrib/audio/tdnn/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/audio/tdnn/speechbrain/lobes/models/MetricGAN.py b/PyTorch/contrib/audio/tdnn/speechbrain/lobes/models/MetricGAN.py index a7af8cccaabe473c6865e80a952067e7d38062ee..38c2b82d53f54c4df7ab251d5544b9870646a1a7 100644 --- a/PyTorch/contrib/audio/tdnn/speechbrain/lobes/models/MetricGAN.py +++ b/PyTorch/contrib/audio/tdnn/speechbrain/lobes/models/MetricGAN.py @@ -13,175 +13,175 @@ # limitations under the License. # -"""Generator and discriminator used in MetricGAN - -Authors: -* Szu-Wei Fu 2020 -""" -import torch -import speechbrain as sb -from torch import nn -from torch.nn.utils import spectral_norm - - -def xavier_init_layer( - in_size, out_size=None, spec_norm=True, layer_type=nn.Linear, **kwargs -): - "Create a layer with spectral norm, xavier uniform init and zero bias" - if out_size is None: - out_size = in_size - - layer = layer_type(in_size, out_size, **kwargs) - if spec_norm: - layer = spectral_norm(layer) - - # Perform initialization - nn.init.xavier_uniform_(layer.weight, gain=1.0) - nn.init.zeros_(layer.bias) - - return layer - - -def shifted_sigmoid(x): - return 1.2 / (1 + torch.exp(-(1 / 1.6) * x)) - - -class Learnable_sigmoid(nn.Module): - def __init__(self, in_features=257): - super().__init__() - self.slope = nn.Parameter(torch.ones(in_features)) - self.slope.requiresGrad = True # set requiresGrad to true! - - # self.scale = nn.Parameter(torch.ones(1)) - # self.scale.requiresGrad = True # set requiresGrad to true! - - def forward(self, x): - return 1.2 / (1 + torch.exp(-(self.slope) * x)) - - -class EnhancementGenerator(nn.Module): - """Simple LSTM for enhancement with custom initialization. - - Arguments - --------- - input_size : int - Size of the input tensor's last dimension. - hidden_size : int - Number of neurons to use in the LSTM layers. - num_layers : int - Number of layers to use in the LSTM. - dropout : int - Fraction of neurons to drop during training. - """ - - def __init__( - self, input_size=257, hidden_size=200, num_layers=2, dropout=0, - ): - super().__init__() - self.activation = nn.LeakyReLU(negative_slope=0.3) - - self.blstm = sb.nnet.RNN.LSTM( - input_size=input_size, - hidden_size=hidden_size, - num_layers=num_layers, - dropout=dropout, - bidirectional=True, - ) - """ - Use orthogonal init for recurrent layers, xavier uniform for input layers - Bias is 0 - """ - for name, param in self.blstm.named_parameters(): - if "bias" in name: - nn.init.zeros_(param) - elif "weight_ih" in name: - nn.init.xavier_uniform_(param) - elif "weight_hh" in name: - nn.init.orthogonal_(param) - - self.linear1 = xavier_init_layer(400, 300, spec_norm=False) - self.linear2 = xavier_init_layer(300, 257, spec_norm=False) - - self.Learnable_sigmoid = Learnable_sigmoid() - self.sigmoid = nn.Sigmoid() - - def forward(self, x, lengths): - out, _ = self.blstm(x, lengths=lengths) - - out = self.linear1(out) - out = self.activation(out) - - out = self.linear2(out) - out = self.Learnable_sigmoid(out) - - return out - - -class MetricDiscriminator(nn.Module): - """Metric estimator for enhancement training. - - Consists of: - * four 2d conv layers - * channel averaging - * three linear layers - - Arguments - --------- - kernel_size : tuple - The dimensions of the 2-d kernel used for convolution. - base_channels : int - Number of channels used in each conv layer. - """ - - def __init__( - self, kernel_size=(5, 5), base_channels=15, activation=nn.LeakyReLU, - ): - super().__init__() - - self.activation = activation(negative_slope=0.3) - - self.BN = nn.BatchNorm2d(num_features=2, momentum=0.01) - - self.conv1 = xavier_init_layer( - 2, base_channels, layer_type=nn.Conv2d, kernel_size=kernel_size - ) - self.conv2 = xavier_init_layer( - base_channels, layer_type=nn.Conv2d, kernel_size=kernel_size - ) - self.conv3 = xavier_init_layer( - base_channels, layer_type=nn.Conv2d, kernel_size=kernel_size - ) - self.conv4 = xavier_init_layer( - base_channels, layer_type=nn.Conv2d, kernel_size=kernel_size - ) - - self.Linear1 = xavier_init_layer(base_channels, out_size=50) - self.Linear2 = xavier_init_layer(in_size=50, out_size=10) - self.Linear3 = xavier_init_layer(in_size=10, out_size=1) - - def forward(self, x): - out = self.BN(x) - - out = self.conv1(out) - out = self.activation(out) - - out = self.conv2(out) - out = self.activation(out) - - out = self.conv3(out) - out = self.activation(out) - - out = self.conv4(out) - out = self.activation(out) - - out = torch.mean(out, (2, 3)) - - out = self.Linear1(out) - out = self.activation(out) - - out = self.Linear2(out) - out = self.activation(out) - - out = self.Linear3(out) - - return out +"""Generator and discriminator used in MetricGAN + +Authors: +* Szu-Wei Fu 2020 +""" +import torch +import speechbrain as sb +from torch import nn +from torch.nn.utils import spectral_norm + + +def xavier_init_layer( + in_size, out_size=None, spec_norm=True, layer_type=nn.Linear, **kwargs +): + "Create a layer with spectral norm, xavier uniform init and zero bias" + if out_size is None: + out_size = in_size + + layer = layer_type(in_size, out_size, **kwargs) + if spec_norm: + layer = spectral_norm(layer) + + # Perform initialization + nn.init.xavier_uniform_(layer.weight, gain=1.0) + nn.init.zeros_(layer.bias) + + return layer + + +def shifted_sigmoid(x): + return 1.2 / (1 + torch.exp(-(1 / 1.6) * x)) + + +class Learnable_sigmoid(nn.Module): + def __init__(self, in_features=257): + super().__init__() + self.slope = nn.Parameter(torch.ones(in_features)) + self.slope.requiresGrad = True # set requiresGrad to true! + + # self.scale = nn.Parameter(torch.ones(1)) + # self.scale.requiresGrad = True # set requiresGrad to true! + + def forward(self, x): + return 1.2 / (1 + torch.exp(-(self.slope) * x)) + + +class EnhancementGenerator(nn.Module): + """Simple LSTM for enhancement with custom initialization. + + Arguments + --------- + input_size : int + Size of the input tensor's last dimension. + hidden_size : int + Number of neurons to use in the LSTM layers. + num_layers : int + Number of layers to use in the LSTM. + dropout : int + Fraction of neurons to drop during training. + """ + + def __init__( + self, input_size=257, hidden_size=200, num_layers=2, dropout=0, + ): + super().__init__() + self.activation = nn.LeakyReLU(negative_slope=0.3) + + self.blstm = sb.nnet.RNN.LSTM( + input_size=input_size, + hidden_size=hidden_size, + num_layers=num_layers, + dropout=dropout, + bidirectional=True, + ) + """ + Use orthogonal init for recurrent layers, xavier uniform for input layers + Bias is 0 + """ + for name, param in self.blstm.named_parameters(): + if "bias" in name: + nn.init.zeros_(param) + elif "weight_ih" in name: + nn.init.xavier_uniform_(param) + elif "weight_hh" in name: + nn.init.orthogonal_(param) + + self.linear1 = xavier_init_layer(400, 300, spec_norm=False) + self.linear2 = xavier_init_layer(300, 257, spec_norm=False) + + self.Learnable_sigmoid = Learnable_sigmoid() + self.sigmoid = nn.Sigmoid() + + def forward(self, x, lengths): + out, _ = self.blstm(x, lengths=lengths) + + out = self.linear1(out) + out = self.activation(out) + + out = self.linear2(out) + out = self.Learnable_sigmoid(out) + + return out + + +class MetricDiscriminator(nn.Module): + """Metric estimator for enhancement training. + + Consists of: + * four 2d conv layers + * channel averaging + * three linear layers + + Arguments + --------- + kernel_size : tuple + The dimensions of the 2-d kernel used for convolution. + base_channels : int + Number of channels used in each conv layer. + """ + + def __init__( + self, kernel_size=(5, 5), base_channels=15, activation=nn.LeakyReLU, + ): + super().__init__() + + self.activation = activation(negative_slope=0.3) + + self.BN = nn.BatchNorm2d(num_features=2, momentum=0.01) + + self.conv1 = xavier_init_layer( + 2, base_channels, layer_type=nn.Conv2d, kernel_size=kernel_size + ) + self.conv2 = xavier_init_layer( + base_channels, layer_type=nn.Conv2d, kernel_size=kernel_size + ) + self.conv3 = xavier_init_layer( + base_channels, layer_type=nn.Conv2d, kernel_size=kernel_size + ) + self.conv4 = xavier_init_layer( + base_channels, layer_type=nn.Conv2d, kernel_size=kernel_size + ) + + self.Linear1 = xavier_init_layer(base_channels, out_size=50) + self.Linear2 = xavier_init_layer(in_size=50, out_size=10) + self.Linear3 = xavier_init_layer(in_size=10, out_size=1) + + def forward(self, x): + out = self.BN(x) + + out = self.conv1(out) + out = self.activation(out) + + out = self.conv2(out) + out = self.activation(out) + + out = self.conv3(out) + out = self.activation(out) + + out = self.conv4(out) + out = self.activation(out) + + out = torch.mean(out, (2, 3)) + + out = self.Linear1(out) + out = self.activation(out) + + out = self.Linear2(out) + out = self.activation(out) + + out = self.Linear3(out) + + return out diff --git a/PyTorch/contrib/audio/tdnn/speechbrain/nnet/loss/stoi_loss.py b/PyTorch/contrib/audio/tdnn/speechbrain/nnet/loss/stoi_loss.py index e98acf3a965807d4aac08b9fdf12375fc879904d..96e1fc5583acb7d22b430634da415dc02602f650 100644 --- a/PyTorch/contrib/audio/tdnn/speechbrain/nnet/loss/stoi_loss.py +++ b/PyTorch/contrib/audio/tdnn/speechbrain/nnet/loss/stoi_loss.py @@ -13,198 +13,198 @@ # limitations under the License. # -# ################################ -# From paper: "End-to-End Waveform Utterance Enhancement for Direct Evaluation -# Metrics Optimization by Fully Convolutional Neural Networks", TASLP, 2018 -# Authors: Szu-Wei, Fu 2020 -# ################################ - -import torch -import torchaudio -import numpy as np -from speechbrain.utils.torch_audio_backend import get_torchaudio_backend - -torchaudio_backend = get_torchaudio_backend() -torchaudio.set_audio_backend(torchaudio_backend) -smallVal = np.finfo("float").eps # To avoid divide by zero - - -def thirdoct(fs, nfft, num_bands, min_freq): - """Returns the 1/3 octave band matrix. - - Arguments - --------- - fs : int - Sampling rate. - nfft : int - FFT size. - num_bands : int - Number of 1/3 octave bands. - min_freq : int - Center frequency of the lowest 1/3 octave band. - - Returns - ------- - obm : tensor - Octave Band Matrix. - """ - - f = torch.linspace(0, fs, nfft + 1) - f = f[: int(nfft / 2) + 1] - k = torch.from_numpy(np.array(range(num_bands)).astype(float)) - cf = torch.pow(2.0 ** (1.0 / 3), k) * min_freq - freq_low = min_freq * torch.pow(2.0, (2 * k - 1) / 6) - freq_high = min_freq * torch.pow(2.0, (2 * k + 1) / 6) - obm = torch.zeros(num_bands, len(f)) # a verifier - - for i in range(len(cf)): - # Match 1/3 oct band freq with fft frequency bin - f_bin = torch.argmin(torch.square(f - freq_low[i])) - freq_low[i] = f[f_bin] - fl_ii = f_bin - f_bin = torch.argmin(torch.square(f - freq_high[i])) - freq_high[i] = f[f_bin] - fh_ii = f_bin - # Assign to the octave band matrix - obm[i, fl_ii:fh_ii] = 1 - return obm - - -def removeSilentFrames(x, y, dyn_range=40, N=256, K=128): - w = torch.unsqueeze(torch.from_numpy(np.hanning(256)), 0).to(torch.float) - - X1 = x[0 : int(x.shape[0]) // N * N].reshape(int(x.shape[0]) // N, N).T - X2 = ( - x[128 : (int(x.shape[0]) - 128) // N * N + 128] - .reshape((int(x.shape[0]) - 128) // N, N) - .T - ) - X = torch.zeros(N, X1.shape[1] + X2.shape[1]) - X[:, 0::2] = X1 - X[:, 1::2] = X2 - - energy = 20 * torch.log10( - torch.sqrt(torch.matmul(w ** 2, X ** 2)) / 16.0 + smallVal - ) - - Max_energy = torch.max(energy) - msk = torch.squeeze((energy - Max_energy + dyn_range > 0)) - - Y1 = y[0 : int(y.shape[0]) // N * N].reshape(int(y.shape[0]) // N, N).T - Y2 = ( - y[128 : (int(y.shape[0]) - 128) // N * N + 128] - .reshape((int(y.shape[0]) - 128) // N, N) - .T - ) - Y = torch.zeros(N, Y1.shape[1] + Y2.shape[1]) - Y[:, 0::2] = Y1 - Y[:, 1::2] = Y2 - - x_sil = w.T.repeat(1, X[:, msk].shape[-1]) * X[:, msk] - y_sil = w.T.repeat(1, X[:, msk].shape[-1]) * Y[:, msk] - - x_sil = torch.cat( - ( - x_sil[0:128, 0], - (x_sil[0:128, 1:] + x_sil[128:, 0:-1]).T.flatten(), - x_sil[128:256, -1], - ), - axis=0, - ) - y_sil = torch.cat( - ( - y_sil[0:128, 0], - (y_sil[0:128, 1:] + y_sil[128:, 0:-1]).T.flatten(), - y_sil[128:256, -1], - ), - axis=0, - ) - - return [x_sil, y_sil] - - -def stoi_loss(y_pred_batch, y_true_batch, lens, reduction="mean"): - """Compute the STOI score and return -1 * that score. - - This function can be used as a loss function for training - with SGD-based updates. - - Arguments - --------- - y_pred_batch : torch.Tensor - The degraded (enhanced) waveforms. - y_true_batch : torch.Tensor - The clean (reference) waveforms. - lens : torch.Tensor - The relative lengths of the waveforms within the batch. - reduction : str - The type of reduction ("mean" or "batch") to use. - - Example - ------- - >>> a = torch.sin(torch.arange(16000, dtype=torch.float32)).unsqueeze(0) - >>> b = a + 0.001 - >>> -stoi_loss(b, a, torch.ones(1)) - tensor(0.7...) - """ - - y_pred_batch = torch.squeeze(y_pred_batch, dim=-1) - y_true_batch = torch.squeeze(y_true_batch, dim=-1) - - batch_size = y_pred_batch.shape[0] - - fs = 16000 # Sampling rate - N = 30 # length of temporal envelope vectors - J = 15.0 # Number of one-third octave bands - - octave_band = thirdoct(fs=10000, nfft=512, num_bands=15, min_freq=150) - c = 5.62341325 # 10^(-Beta/20) with Beta = -15 - D = torch.zeros(batch_size) - resampler = torchaudio.transforms.Resample(fs, 10000) - for i in range(0, batch_size): # Run over mini-batches - y_true = y_true_batch[i, 0 : int(lens[i] * y_pred_batch.shape[1])] - y_pred = y_pred_batch[i, 0 : int(lens[i] * y_pred_batch.shape[1])] - - y_true, y_pred = resampler(y_true), resampler(y_pred) - - [y_sil_true, y_sil_pred] = removeSilentFrames(y_true, y_pred) - - stft_true = torchaudio.transforms.Spectrogram( - n_fft=512, win_length=256, hop_length=128, power=2 - )(y_sil_true) - stft_pred = torchaudio.transforms.Spectrogram( - n_fft=512, win_length=256, hop_length=128, power=2 - )(y_sil_pred) - - OCT_true = torch.sqrt(torch.matmul(octave_band, stft_true) + 1e-14) - OCT_pred = torch.sqrt(torch.matmul(octave_band, stft_pred) + 1e-14) - - M = int( - stft_pred.shape[-1] - (N - 1) - ) # number of temporal envelope vectors - - X = torch.zeros(15 * M, 30) - Y = torch.zeros(15 * M, 30) - for m in range(0, M): # Run over temporal envelope vectors - X[m * 15 : (m + 1) * 15, :] = OCT_true[:, m : m + N] - Y[m * 15 : (m + 1) * 15, :] = OCT_pred[:, m : m + N] - - alpha = torch.norm(X, dim=-1, keepdim=True) / ( - torch.norm(Y, dim=-1, keepdim=True) + smallVal - ) - - ay = Y * alpha - y = torch.min(ay, X + X * c) - - xn = X - torch.mean(X, dim=-1, keepdim=True) - xn = xn / (torch.norm(xn, dim=-1, keepdim=True) + smallVal) - - yn = y - torch.mean(y, dim=-1, keepdim=True) - yn = yn / (torch.norm(yn, dim=-1, keepdim=True) + smallVal) - d = torch.sum(xn * yn) - D[i] = d / (J * M) - - if reduction == "mean": - return -D.mean() - - return -D +# ################################ +# From paper: "End-to-End Waveform Utterance Enhancement for Direct Evaluation +# Metrics Optimization by Fully Convolutional Neural Networks", TASLP, 2018 +# Authors: Szu-Wei, Fu 2020 +# ################################ + +import torch +import torchaudio +import numpy as np +from speechbrain.utils.torch_audio_backend import get_torchaudio_backend + +torchaudio_backend = get_torchaudio_backend() +torchaudio.set_audio_backend(torchaudio_backend) +smallVal = np.finfo("float").eps # To avoid divide by zero + + +def thirdoct(fs, nfft, num_bands, min_freq): + """Returns the 1/3 octave band matrix. + + Arguments + --------- + fs : int + Sampling rate. + nfft : int + FFT size. + num_bands : int + Number of 1/3 octave bands. + min_freq : int + Center frequency of the lowest 1/3 octave band. + + Returns + ------- + obm : tensor + Octave Band Matrix. + """ + + f = torch.linspace(0, fs, nfft + 1) + f = f[: int(nfft / 2) + 1] + k = torch.from_numpy(np.array(range(num_bands)).astype(float)) + cf = torch.pow(2.0 ** (1.0 / 3), k) * min_freq + freq_low = min_freq * torch.pow(2.0, (2 * k - 1) / 6) + freq_high = min_freq * torch.pow(2.0, (2 * k + 1) / 6) + obm = torch.zeros(num_bands, len(f)) # a verifier + + for i in range(len(cf)): + # Match 1/3 oct band freq with fft frequency bin + f_bin = torch.argmin(torch.square(f - freq_low[i])) + freq_low[i] = f[f_bin] + fl_ii = f_bin + f_bin = torch.argmin(torch.square(f - freq_high[i])) + freq_high[i] = f[f_bin] + fh_ii = f_bin + # Assign to the octave band matrix + obm[i, fl_ii:fh_ii] = 1 + return obm + + +def removeSilentFrames(x, y, dyn_range=40, N=256, K=128): + w = torch.unsqueeze(torch.from_numpy(np.hanning(256)), 0).to(torch.float) + + X1 = x[0 : int(x.shape[0]) // N * N].reshape(int(x.shape[0]) // N, N).T + X2 = ( + x[128 : (int(x.shape[0]) - 128) // N * N + 128] + .reshape((int(x.shape[0]) - 128) // N, N) + .T + ) + X = torch.zeros(N, X1.shape[1] + X2.shape[1]) + X[:, 0::2] = X1 + X[:, 1::2] = X2 + + energy = 20 * torch.log10( + torch.sqrt(torch.matmul(w ** 2, X ** 2)) / 16.0 + smallVal + ) + + Max_energy = torch.max(energy) + msk = torch.squeeze((energy - Max_energy + dyn_range > 0)) + + Y1 = y[0 : int(y.shape[0]) // N * N].reshape(int(y.shape[0]) // N, N).T + Y2 = ( + y[128 : (int(y.shape[0]) - 128) // N * N + 128] + .reshape((int(y.shape[0]) - 128) // N, N) + .T + ) + Y = torch.zeros(N, Y1.shape[1] + Y2.shape[1]) + Y[:, 0::2] = Y1 + Y[:, 1::2] = Y2 + + x_sil = w.T.repeat(1, X[:, msk].shape[-1]) * X[:, msk] + y_sil = w.T.repeat(1, X[:, msk].shape[-1]) * Y[:, msk] + + x_sil = torch.cat( + ( + x_sil[0:128, 0], + (x_sil[0:128, 1:] + x_sil[128:, 0:-1]).T.flatten(), + x_sil[128:256, -1], + ), + axis=0, + ) + y_sil = torch.cat( + ( + y_sil[0:128, 0], + (y_sil[0:128, 1:] + y_sil[128:, 0:-1]).T.flatten(), + y_sil[128:256, -1], + ), + axis=0, + ) + + return [x_sil, y_sil] + + +def stoi_loss(y_pred_batch, y_true_batch, lens, reduction="mean"): + """Compute the STOI score and return -1 * that score. + + This function can be used as a loss function for training + with SGD-based updates. + + Arguments + --------- + y_pred_batch : torch.Tensor + The degraded (enhanced) waveforms. + y_true_batch : torch.Tensor + The clean (reference) waveforms. + lens : torch.Tensor + The relative lengths of the waveforms within the batch. + reduction : str + The type of reduction ("mean" or "batch") to use. + + Example + ------- + >>> a = torch.sin(torch.arange(16000, dtype=torch.float32)).unsqueeze(0) + >>> b = a + 0.001 + >>> -stoi_loss(b, a, torch.ones(1)) + tensor(0.7...) + """ + + y_pred_batch = torch.squeeze(y_pred_batch, dim=-1) + y_true_batch = torch.squeeze(y_true_batch, dim=-1) + + batch_size = y_pred_batch.shape[0] + + fs = 16000 # Sampling rate + N = 30 # length of temporal envelope vectors + J = 15.0 # Number of one-third octave bands + + octave_band = thirdoct(fs=10000, nfft=512, num_bands=15, min_freq=150) + c = 5.62341325 # 10^(-Beta/20) with Beta = -15 + D = torch.zeros(batch_size) + resampler = torchaudio.transforms.Resample(fs, 10000) + for i in range(0, batch_size): # Run over mini-batches + y_true = y_true_batch[i, 0 : int(lens[i] * y_pred_batch.shape[1])] + y_pred = y_pred_batch[i, 0 : int(lens[i] * y_pred_batch.shape[1])] + + y_true, y_pred = resampler(y_true), resampler(y_pred) + + [y_sil_true, y_sil_pred] = removeSilentFrames(y_true, y_pred) + + stft_true = torchaudio.transforms.Spectrogram( + n_fft=512, win_length=256, hop_length=128, power=2 + )(y_sil_true) + stft_pred = torchaudio.transforms.Spectrogram( + n_fft=512, win_length=256, hop_length=128, power=2 + )(y_sil_pred) + + OCT_true = torch.sqrt(torch.matmul(octave_band, stft_true) + 1e-14) + OCT_pred = torch.sqrt(torch.matmul(octave_band, stft_pred) + 1e-14) + + M = int( + stft_pred.shape[-1] - (N - 1) + ) # number of temporal envelope vectors + + X = torch.zeros(15 * M, 30) + Y = torch.zeros(15 * M, 30) + for m in range(0, M): # Run over temporal envelope vectors + X[m * 15 : (m + 1) * 15, :] = OCT_true[:, m : m + N] + Y[m * 15 : (m + 1) * 15, :] = OCT_pred[:, m : m + N] + + alpha = torch.norm(X, dim=-1, keepdim=True) / ( + torch.norm(Y, dim=-1, keepdim=True) + smallVal + ) + + ay = Y * alpha + y = torch.min(ay, X + X * c) + + xn = X - torch.mean(X, dim=-1, keepdim=True) + xn = xn / (torch.norm(xn, dim=-1, keepdim=True) + smallVal) + + yn = y - torch.mean(y, dim=-1, keepdim=True) + yn = yn / (torch.norm(yn, dim=-1, keepdim=True) + smallVal) + d = torch.sum(xn * yn) + D[i] = d / (J * M) + + if reduction == "mean": + return -D.mean() + + return -D diff --git a/PyTorch/contrib/audio/tdnn/templates/speaker_id/mini_librispeech_prepare.py b/PyTorch/contrib/audio/tdnn/templates/speaker_id/mini_librispeech_prepare.py index 7a59d8259d130f4afde972e05b3795404efecf5b..adb9e51765a581fc49f5a2340c3b1a32445da2b0 100644 --- a/PyTorch/contrib/audio/tdnn/templates/speaker_id/mini_librispeech_prepare.py +++ b/PyTorch/contrib/audio/tdnn/templates/speaker_id/mini_librispeech_prepare.py @@ -13,209 +13,209 @@ # limitations under the License. # -""" -Downloads and creates data manifest files for Mini LibriSpeech (spk-id). -For speaker-id, different sentences of the same speaker must appear in train, -validation, and test sets. In this case, these sets are thus derived from -splitting the original training set intothree chunks. - -Authors: - * Mirco Ravanelli, 2021 -""" - -import os -import json -import shutil -import random -import logging -from speechbrain.utils.data_utils import get_all_files, download_file -from speechbrain.dataio.dataio import read_audio - -logger = logging.getLogger(__name__) -MINILIBRI_TRAIN_URL = "http://www.openslr.org/resources/31/train-clean-5.tar.gz" -SAMPLERATE = 16000 - - -def prepare_mini_librispeech( - data_folder, - save_json_train, - save_json_valid, - save_json_test, - split_ratio=[80, 10, 10], - batch_size=32, -): - """ - Prepares the json files for the Mini Librispeech dataset. - - Downloads the dataset if it is not found in the `data_folder`. - - Arguments - --------- - data_folder : str - Path to the folder where the Mini Librispeech dataset is stored. - save_json_train : str - Path where the train data specification file will be saved. - save_json_valid : str - Path where the validation data specification file will be saved. - save_json_test : str - Path where the test data specification file will be saved. - split_ratio: list - List composed of three integers that sets split ratios for train, valid, - and test sets, respectively. For instance split_ratio=[80, 10, 10] will - assign 80% of the sentences to training, 10% for validation, and 10% - for test. - - Example - ------- - >>> data_folder = '/path/to/mini_librispeech' - >>> prepare_mini_librispeech(data_folder, 'train.json', 'valid.json', 'test.json') - """ - - # Check if this phase is already done (if so, skip it) - - if skip(save_json_train, save_json_valid, save_json_test): - logger.info("Preparation completed in previous run, skipping.") - return - - # If the dataset doesn't exist yet, download it - train_folder = os.path.join(data_folder, "LibriSpeech", "train-clean-5") - if not check_folders(train_folder): - download_mini_librispeech(data_folder) - - # List files and create manifest from list - logger.info( - f"Creating {save_json_train}, {save_json_valid}, and {save_json_test}" - ) - extension = [".flac"] - wav_list = get_all_files(train_folder, match_and=extension) - - # Random split the signal list into train, valid, and test sets. - data_split = split_sets(wav_list, split_ratio, batch_size) - - # Creating json files - create_json(data_split["train"], save_json_train) - create_json(data_split["valid"], save_json_valid) - create_json(data_split["test"], save_json_test) - - -def create_json(wav_list, json_file): - """ - Creates the json file given a list of wav files. - - Arguments - --------- - wav_list : list of str - The list of wav files. - json_file : str - The path of the output json file - """ - # Processing all the wav files in the list - json_dict = {} - for wav_file in wav_list: - - # Reading the signal (to retrieve duration in seconds) - signal = read_audio(wav_file) - duration = signal.shape[0] / SAMPLERATE - - # Manipulate path to get relative path and uttid - path_parts = wav_file.split(os.path.sep) - uttid, _ = os.path.splitext(path_parts[-1]) - relative_path = os.path.join("{data_root}", *path_parts[-5:]) - - # Getting speaker-id from utterance-id - spk_id = uttid.split("-")[0] - - # Create entry for this utterance - json_dict[uttid] = { - "wav": relative_path, - "length": duration, - "spk_id": spk_id, - } - - # Writing the dictionary to the json file - with open(json_file, mode="w") as json_f: - json.dump(json_dict, json_f, indent=2) - - logger.info(f"{json_file} successfully created!") - - -def skip(*filenames): - """ - Detects if the data preparation has been already done. - If the preparation has been done, we can skip it. - - Returns - ------- - bool - if True, the preparation phase can be skipped. - if False, it must be done. - """ - for filename in filenames: - if not os.path.isfile(filename): - return False - return True - - -def check_folders(*folders): - """Returns False if any passed folder does not exist.""" - for folder in folders: - if not os.path.exists(folder): - return False - return True - - -def split_sets(wav_list, split_ratio, batch_size): - """Randomly splits the wav list into training, validation, and test lists. - Note that a better approach is to make sure that all the classes have the - same proportion of samples (e.g, spk01 should have 80% of samples in - training, 10% validation, 10% test, the same for speaker2 etc.). This - is the approach followed in some recipes such as the Voxceleb one. For - simplicity, we here simply split the full list without necessarily respecting - the split ratio within each class. - - Arguments - --------- - wav_lst : list - list of all the signals in the dataset - split_ratio: list - List composed of three integers that sets split ratios for train, valid, - and test sets, respectively. For instance split_ratio=[80, 10, 10] will - assign 80% of the sentences to training, 10% for validation, and 10% - for test. - - Returns - ------ - dictionary containing train, valid, and test splits. - """ - # Random shuffle of the list - random.shuffle(wav_list) - tot_split = sum(split_ratio) - tot_snts = len(wav_list) - data_split = {} - splits = ["train", "valid"] - - # for i, split in enumerate(splits): - # n_snts = int(tot_snts * split_ratio[i] / tot_split) - # data_split[split] = wav_list[0:n_snts] - # del wav_list[0:n_snts] - # data_split["test"] = wav_list - - for i, split in enumerate(splits): - n_snts = int(tot_snts * split_ratio[i] / tot_split // batch_size * batch_size) - data_split[split] = wav_list[0:n_snts] - del wav_list[0:n_snts] - data_split["test"] = wav_list[0:n_snts] - return data_split - - -def download_mini_librispeech(destination): - """Download dataset and unpack it. - - Arguments - --------- - destination : str - Place to put dataset. - """ - train_archive = os.path.join(destination, "train-clean-5.tar.gz") - download_file(MINILIBRI_TRAIN_URL, train_archive) - shutil.unpack_archive(train_archive, destination) +""" +Downloads and creates data manifest files for Mini LibriSpeech (spk-id). +For speaker-id, different sentences of the same speaker must appear in train, +validation, and test sets. In this case, these sets are thus derived from +splitting the original training set intothree chunks. + +Authors: + * Mirco Ravanelli, 2021 +""" + +import os +import json +import shutil +import random +import logging +from speechbrain.utils.data_utils import get_all_files, download_file +from speechbrain.dataio.dataio import read_audio + +logger = logging.getLogger(__name__) +MINILIBRI_TRAIN_URL = "http://www.openslr.org/resources/31/train-clean-5.tar.gz" +SAMPLERATE = 16000 + + +def prepare_mini_librispeech( + data_folder, + save_json_train, + save_json_valid, + save_json_test, + split_ratio=[80, 10, 10], + batch_size=32, +): + """ + Prepares the json files for the Mini Librispeech dataset. + + Downloads the dataset if it is not found in the `data_folder`. + + Arguments + --------- + data_folder : str + Path to the folder where the Mini Librispeech dataset is stored. + save_json_train : str + Path where the train data specification file will be saved. + save_json_valid : str + Path where the validation data specification file will be saved. + save_json_test : str + Path where the test data specification file will be saved. + split_ratio: list + List composed of three integers that sets split ratios for train, valid, + and test sets, respectively. For instance split_ratio=[80, 10, 10] will + assign 80% of the sentences to training, 10% for validation, and 10% + for test. + + Example + ------- + >>> data_folder = '/path/to/mini_librispeech' + >>> prepare_mini_librispeech(data_folder, 'train.json', 'valid.json', 'test.json') + """ + + # Check if this phase is already done (if so, skip it) + + if skip(save_json_train, save_json_valid, save_json_test): + logger.info("Preparation completed in previous run, skipping.") + return + + # If the dataset doesn't exist yet, download it + train_folder = os.path.join(data_folder, "LibriSpeech", "train-clean-5") + if not check_folders(train_folder): + download_mini_librispeech(data_folder) + + # List files and create manifest from list + logger.info( + f"Creating {save_json_train}, {save_json_valid}, and {save_json_test}" + ) + extension = [".flac"] + wav_list = get_all_files(train_folder, match_and=extension) + + # Random split the signal list into train, valid, and test sets. + data_split = split_sets(wav_list, split_ratio, batch_size) + + # Creating json files + create_json(data_split["train"], save_json_train) + create_json(data_split["valid"], save_json_valid) + create_json(data_split["test"], save_json_test) + + +def create_json(wav_list, json_file): + """ + Creates the json file given a list of wav files. + + Arguments + --------- + wav_list : list of str + The list of wav files. + json_file : str + The path of the output json file + """ + # Processing all the wav files in the list + json_dict = {} + for wav_file in wav_list: + + # Reading the signal (to retrieve duration in seconds) + signal = read_audio(wav_file) + duration = signal.shape[0] / SAMPLERATE + + # Manipulate path to get relative path and uttid + path_parts = wav_file.split(os.path.sep) + uttid, _ = os.path.splitext(path_parts[-1]) + relative_path = os.path.join("{data_root}", *path_parts[-5:]) + + # Getting speaker-id from utterance-id + spk_id = uttid.split("-")[0] + + # Create entry for this utterance + json_dict[uttid] = { + "wav": relative_path, + "length": duration, + "spk_id": spk_id, + } + + # Writing the dictionary to the json file + with open(json_file, mode="w") as json_f: + json.dump(json_dict, json_f, indent=2) + + logger.info(f"{json_file} successfully created!") + + +def skip(*filenames): + """ + Detects if the data preparation has been already done. + If the preparation has been done, we can skip it. + + Returns + ------- + bool + if True, the preparation phase can be skipped. + if False, it must be done. + """ + for filename in filenames: + if not os.path.isfile(filename): + return False + return True + + +def check_folders(*folders): + """Returns False if any passed folder does not exist.""" + for folder in folders: + if not os.path.exists(folder): + return False + return True + + +def split_sets(wav_list, split_ratio, batch_size): + """Randomly splits the wav list into training, validation, and test lists. + Note that a better approach is to make sure that all the classes have the + same proportion of samples (e.g, spk01 should have 80% of samples in + training, 10% validation, 10% test, the same for speaker2 etc.). This + is the approach followed in some recipes such as the Voxceleb one. For + simplicity, we here simply split the full list without necessarily respecting + the split ratio within each class. + + Arguments + --------- + wav_lst : list + list of all the signals in the dataset + split_ratio: list + List composed of three integers that sets split ratios for train, valid, + and test sets, respectively. For instance split_ratio=[80, 10, 10] will + assign 80% of the sentences to training, 10% for validation, and 10% + for test. + + Returns + ------ + dictionary containing train, valid, and test splits. + """ + # Random shuffle of the list + random.shuffle(wav_list) + tot_split = sum(split_ratio) + tot_snts = len(wav_list) + data_split = {} + splits = ["train", "valid"] + + # for i, split in enumerate(splits): + # n_snts = int(tot_snts * split_ratio[i] / tot_split) + # data_split[split] = wav_list[0:n_snts] + # del wav_list[0:n_snts] + # data_split["test"] = wav_list + + for i, split in enumerate(splits): + n_snts = int(tot_snts * split_ratio[i] / tot_split // batch_size * batch_size) + data_split[split] = wav_list[0:n_snts] + del wav_list[0:n_snts] + data_split["test"] = wav_list[0:n_snts] + return data_split + + +def download_mini_librispeech(destination): + """Download dataset and unpack it. + + Arguments + --------- + destination : str + Place to put dataset. + """ + train_archive = os.path.join(destination, "train-clean-5.tar.gz") + download_file(MINILIBRI_TRAIN_URL, train_archive) + shutil.unpack_archive(train_archive, destination) diff --git a/PyTorch/contrib/audio/tdnn/templates/speaker_id/modelzoo_level.txt b/PyTorch/contrib/audio/tdnn/templates/speaker_id/modelzoo_level.txt index b8f3c5693dfc8cbc81345111dae9c4f739edb244..c7d1879e5eedecc37bb4bafa3a15ee16f7c25cda 100644 --- a/PyTorch/contrib/audio/tdnn/templates/speaker_id/modelzoo_level.txt +++ b/PyTorch/contrib/audio/tdnn/templates/speaker_id/modelzoo_level.txt @@ -1,6 +1,6 @@ -GPUStatus:OK -NPUMigrationStatus:OK -FuncStatus:OK -PrecisionStatus:OK -AutoTune:NOK +GPUStatus:OK +NPUMigrationStatus:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:NOK PerfStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/audio/tdnn/templates/speaker_id/test/train_full_8p.sh b/PyTorch/contrib/audio/tdnn/templates/speaker_id/test/train_full_8p.sh index 702c19b5d9ab11d5e7c59c0a7acb416ab3c08ddf..d7f81cce38a697d9a9b58f9a377605ba511c6f58 100644 --- a/PyTorch/contrib/audio/tdnn/templates/speaker_id/test/train_full_8p.sh +++ b/PyTorch/contrib/audio/tdnn/templates/speaker_id/test/train_full_8p.sh @@ -1,159 +1,159 @@ -##################基础配置参数,需要模型审视修改################## -# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE -# 删除之前结果 -rm -rf ../results - -#集合通信参数,不需要修改 -export RANK_SIZE=8 - -# 数据集路径,保持为空,不需要修改 -data_folder="" -#网络名称,同目录名称,需要模型审视修改 -Network="Tdnn" - -#训练batch_size,,需要模型审视修改 -batch_size=64 - -#训练epoch -train_epochs=15 -# 指定训练所使用的npu device卡id -device_id=0 - - -# 参数校验,data_folder为必传参数, 其他参数的增删由模型自身决定;此处若新增参数需在上面有定义并赋值; -for para in $* -do - if [[ $para == --workers* ]];then - workers=`echo ${para#*=}` - elif [[ $para == --data_folder* ]];then - data_folder=`echo ${para#*=}` - fi -done - -# 校验是否传入data_folder,不需要修改 -if [[ $data_folder == "" ]];then - echo "[Error] para \"data_folder\" must be config" - exit 1 -fi - -##################指定训练脚本执行路径################## -# cd到与test文件同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -if [ $ASCEND_DEVICE_ID ];then - echo "device id is ${ASCEND_DEVICE_ID}" - ln -s source dest -elif [ ${device_id} ]; then - export ASCEND_DEVICE_ID=${device_id} - echo "device id is ${ASCEND_DEVICE_ID}" -else - echo "[Error] device id must be confing" - exit 1 -fi - - - -#################指定训练脚本执行路径################## -# cd到与test文件同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ]; then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi - -##################创建日志输出目录,不需要修改################## -ASCEND_DEVICE_ID=${device_id} -if [ -d ${test_path_dir}/output/$ASCEND_DEVICE_ID ];then - rm -rf ${test_path_dir}/output/$ASCEND_DEVICE_ID - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -fi - -##################启动训练脚本################## -# 训练开始时间,不需要修改 -start_time=$(date +%s) -# source 环境变量 -source ${test_path_dir}/env.sh -export WORLD_SIZE=8 -export MASTER_ADDR='127.0.0.1' -export MASTER_PORT='211225' - -for((RANK_ID=0;RANK_ID ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}_full_8p.log 2>&1 & - else - python3.7 train.py train.yaml \ - --distributed_launch \ - --distributed_backend=hccl \ - --local_rank ${RANK_ID} \ - --batch_size=$batch_size \ - --number_of_epochs=$train_epochs \ - --data_folder=$data_folder > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}_full_8p.log 2>&1 & - fi -done -wait - - -##################获取训练数据################## -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -# 结果打印,不需要修改 -echo "------------------ Final result ------------------" -# 输出性能FPS,需要模型审视修改 -FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}_full_8p.log|awk -F " " '{print $NF}'|awk 'END {print}'|cut -d ")" -f1` -# 打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -# 输出训练精度,需要模型审视修改 -train_accuracy=`grep "Epoch loaded:" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}_full_8p.log|awk -F "test error:" '{print $NF}'|awk '{printf("%0.4f\n",1-$1)}'` -# 打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -# 性能看护结果汇总 -# 训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -# 获取性能数据,不需要修改 -# 吞吐量 -ActualFPS=${FPS} -# 单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "npu id:" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${ASCEND_DEVICE_ID}_full_8p.log|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -# 最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -# 关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log - +##################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 删除之前结果 +rm -rf ../results + +#集合通信参数,不需要修改 +export RANK_SIZE=8 + +# 数据集路径,保持为空,不需要修改 +data_folder="" +#网络名称,同目录名称,需要模型审视修改 +Network="Tdnn" + +#训练batch_size,,需要模型审视修改 +batch_size=64 + +#训练epoch +train_epochs=15 +# 指定训练所使用的npu device卡id +device_id=0 + + +# 参数校验,data_folder为必传参数, 其他参数的增删由模型自身决定;此处若新增参数需在上面有定义并赋值; +for para in $* +do + if [[ $para == --workers* ]];then + workers=`echo ${para#*=}` + elif [[ $para == --data_folder* ]];then + data_folder=`echo ${para#*=}` + fi +done + +# 校验是否传入data_folder,不需要修改 +if [[ $data_folder == "" ]];then + echo "[Error] para \"data_folder\" must be config" + exit 1 +fi + +##################指定训练脚本执行路径################## +# cd到与test文件同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +if [ $ASCEND_DEVICE_ID ];then + echo "device id is ${ASCEND_DEVICE_ID}" + ln -s source dest +elif [ ${device_id} ]; then + export ASCEND_DEVICE_ID=${device_id} + echo "device id is ${ASCEND_DEVICE_ID}" +else + echo "[Error] device id must be confing" + exit 1 +fi + + + +#################指定训练脚本执行路径################## +# cd到与test文件同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ]; then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +##################创建日志输出目录,不需要修改################## +ASCEND_DEVICE_ID=${device_id} +if [ -d ${test_path_dir}/output/$ASCEND_DEVICE_ID ];then + rm -rf ${test_path_dir}/output/$ASCEND_DEVICE_ID + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +##################启动训练脚本################## +# 训练开始时间,不需要修改 +start_time=$(date +%s) +# source 环境变量 +source ${test_path_dir}/env.sh +export WORLD_SIZE=8 +export MASTER_ADDR='127.0.0.1' +export MASTER_PORT='211225' + +for((RANK_ID=0;RANK_ID ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}_full_8p.log 2>&1 & + else + python3.7 train.py train.yaml \ + --distributed_launch \ + --distributed_backend=hccl \ + --local_rank ${RANK_ID} \ + --batch_size=$batch_size \ + --number_of_epochs=$train_epochs \ + --data_folder=$data_folder > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}_full_8p.log 2>&1 & + fi +done +wait + + +##################获取训练数据################## +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}_full_8p.log|awk -F " " '{print $NF}'|awk 'END {print}'|cut -d ")" -f1` +# 打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +# 输出训练精度,需要模型审视修改 +train_accuracy=`grep "Epoch loaded:" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}_full_8p.log|awk -F "test error:" '{print $NF}'|awk '{printf("%0.4f\n",1-$1)}'` +# 打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +# 性能看护结果汇总 +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +# 获取性能数据,不需要修改 +# 吞吐量 +ActualFPS=${FPS} +# 单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "npu id:" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${ASCEND_DEVICE_ID}_full_8p.log|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +# 最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log + diff --git a/PyTorch/contrib/audio/tdnn/templates/speaker_id/test/train_performance_8p.sh b/PyTorch/contrib/audio/tdnn/templates/speaker_id/test/train_performance_8p.sh index 3d8549d6cf017e9b85e706e1b232b6d1ed0c3ccf..94bd08c5aca12532ce26561196174cca52d4da7e 100644 --- a/PyTorch/contrib/audio/tdnn/templates/speaker_id/test/train_performance_8p.sh +++ b/PyTorch/contrib/audio/tdnn/templates/speaker_id/test/train_performance_8p.sh @@ -1,154 +1,154 @@ -##################基础配置参数,需要模型审视修改################## -# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE -# 删除之前结果 -rm -rf ../results - -#集合通信参数,不需要修改 -export RANK_SIZE=8 - -# 数据集路径,保持为空,不需要修改 -data_folder="" -#网络名称,同目录名称,需要模型审视修改 -Network="Tdnn" - -#训练batch_size,,需要模型审视修改 -batch_size=64 - -#训练epoch -train_epochs=2 -# 指定训练所使用的npu device卡id -device_id=0 - - -# 参数校验,data_folder为必传参数, 其他参数的增删由模型自身决定;此处若新增参数需在上面有定义并赋值; -for para in $* -do - if [[ $para == --workers* ]];then - workers=`echo ${para#*=}` - elif [[ $para == --data_folder* ]];then - data_folder=`echo ${para#*=}` - fi -done - -# 校验是否传入data_folder,不需要修改 -if [[ $data_folder == "" ]];then - echo "[Error] para \"data_folder\" must be config" - exit 1 -fi - -##################指定训练脚本执行路径################## -# cd到与test文件同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -if [ $ASCEND_DEVICE_ID ];then - echo "device id is ${ASCEND_DEVICE_ID}" - ln -s source dest -elif [ ${device_id} ]; then - export ASCEND_DEVICE_ID=${device_id} - echo "device id is ${ASCEND_DEVICE_ID}" -else - echo "[Error] device id must be confing" - exit 1 -fi - - - -#################指定训练脚本执行路径################## -# cd到与test文件同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ]; then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi - -##################创建日志输出目录,不需要修改################## -ASCEND_DEVICE_ID=${device_id} -if [ -d ${test_path_dir}/output/$ASCEND_DEVICE_ID ];then - rm -rf ${test_path_dir}/output/$ASCEND_DEVICE_ID - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -fi - -##################启动训练脚本################## -# 训练开始时间,不需要修改 -start_time=$(date +%s) -# source 环境变量 -source ${test_path_dir}/env.sh -export WORLD_SIZE=8 -export MASTER_ADDR='127.0.0.1' -export MASTER_PORT='211225' - -for((RANK_ID=0;RANK_ID ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}_perf_8p.log 2>&1 & - else - python3.7 train.py train.yaml \ - --distributed_launch \ - --distributed_backend=hccl \ - --local_rank ${RANK_ID} \ - --batch_size=$batch_size \ - --number_of_epochs=$train_epochs \ - --data_folder=$data_folder > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}_perf_8p.log 2>&1 & - fi -done -wait - - -##################获取训练数据################## -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -# 结果打印,不需要修改 -echo "------------------ Final result ------------------" -# 输出性能FPS,需要模型审视修改 -FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}_perf_8p.log|awk -F "(" '{print $NF}'|awk 'END {print}'|cut -d ")" -f1` -# 打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -echo "E2E Training Duration sec : $e2e_time" - -# 性能看护结果汇总 -# 训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -# 获取性能数据,不需要修改 -# 吞吐量 -ActualFPS=${FPS} -# 单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "npu id:" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${ASCEND_DEVICE_ID}_perf_8p.log|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -# 最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -# 关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log - +##################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 删除之前结果 +rm -rf ../results + +#集合通信参数,不需要修改 +export RANK_SIZE=8 + +# 数据集路径,保持为空,不需要修改 +data_folder="" +#网络名称,同目录名称,需要模型审视修改 +Network="Tdnn" + +#训练batch_size,,需要模型审视修改 +batch_size=64 + +#训练epoch +train_epochs=2 +# 指定训练所使用的npu device卡id +device_id=0 + + +# 参数校验,data_folder为必传参数, 其他参数的增删由模型自身决定;此处若新增参数需在上面有定义并赋值; +for para in $* +do + if [[ $para == --workers* ]];then + workers=`echo ${para#*=}` + elif [[ $para == --data_folder* ]];then + data_folder=`echo ${para#*=}` + fi +done + +# 校验是否传入data_folder,不需要修改 +if [[ $data_folder == "" ]];then + echo "[Error] para \"data_folder\" must be config" + exit 1 +fi + +##################指定训练脚本执行路径################## +# cd到与test文件同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +if [ $ASCEND_DEVICE_ID ];then + echo "device id is ${ASCEND_DEVICE_ID}" + ln -s source dest +elif [ ${device_id} ]; then + export ASCEND_DEVICE_ID=${device_id} + echo "device id is ${ASCEND_DEVICE_ID}" +else + echo "[Error] device id must be confing" + exit 1 +fi + + + +#################指定训练脚本执行路径################## +# cd到与test文件同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ]; then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +##################创建日志输出目录,不需要修改################## +ASCEND_DEVICE_ID=${device_id} +if [ -d ${test_path_dir}/output/$ASCEND_DEVICE_ID ];then + rm -rf ${test_path_dir}/output/$ASCEND_DEVICE_ID + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +##################启动训练脚本################## +# 训练开始时间,不需要修改 +start_time=$(date +%s) +# source 环境变量 +source ${test_path_dir}/env.sh +export WORLD_SIZE=8 +export MASTER_ADDR='127.0.0.1' +export MASTER_PORT='211225' + +for((RANK_ID=0;RANK_ID ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}_perf_8p.log 2>&1 & + else + python3.7 train.py train.yaml \ + --distributed_launch \ + --distributed_backend=hccl \ + --local_rank ${RANK_ID} \ + --batch_size=$batch_size \ + --number_of_epochs=$train_epochs \ + --data_folder=$data_folder > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}_perf_8p.log 2>&1 & + fi +done +wait + + +##################获取训练数据################## +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}_perf_8p.log|awk -F "(" '{print $NF}'|awk 'END {print}'|cut -d ")" -f1` +# 打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +echo "E2E Training Duration sec : $e2e_time" + +# 性能看护结果汇总 +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +# 获取性能数据,不需要修改 +# 吞吐量 +ActualFPS=${FPS} +# 单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "npu id:" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${ASCEND_DEVICE_ID}_perf_8p.log|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +# 最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log + diff --git a/PyTorch/contrib/cv/classification/3d_attention_net/train.py b/PyTorch/contrib/cv/classification/3d_attention_net/train.py index fa3c27864012e21389862c9ab4e4d78ca069351d..30cb7368f276e99eba819d745d4e262e09c0aa64 100644 --- a/PyTorch/contrib/cv/classification/3d_attention_net/train.py +++ b/PyTorch/contrib/cv/classification/3d_attention_net/train.py @@ -1,236 +1,236 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function, division -from apex import amp -from apex.optimizers import NpuFusedSGD -import torch -import argparse -import torch.nn as nn -import torch.optim as optim -from torch.autograd import Variable -from torch.utils.data import Dataset, DataLoader -import numpy as np -import torchvision -from torchvision import transforms, datasets, models -import os -import cv2 -import time -import torch.npu -import torch.utils.data.distributed -from collections import OrderedDict -from torch.nn.parallel import DistributedDataParallel -from model.residual_attention_network import ResidualAttentionModel_92_32input_update as ResidualAttentionModel - -device = None -args = None -log_file = None - -def parse_args(): - parser = argparse.ArgumentParser(description='3D Attention Net') - parser.add_argument('--device_type', type=str) - parser.add_argument('--device_id', type=int) - parser.add_argument('--device_num', type=int) - parser.add_argument('--total_epochs', type=int) - parser.add_argument('--is_train', type=str) - parser.add_argument('--is_pretrain', type=str) - parser.add_argument('--num_classes', type=int) - parser.add_argument('--dist_url', type=str) - parser.add_argument('--train_batch_size', type=int) - parser.add_argument('--test_batch_size', type=int) - args = parser.parse_args() - return args - -# for test -def test(model, test_loader): - # Test - classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') - model.eval() - correct = 0 - total = 0 - class_correct = list(0. for i in range(10)) - class_total = list(0. for i in range(10)) - - for images, labels in test_loader: - images = Variable(images.to(device)) - labels = Variable(labels.to(device)) - outputs = model(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels.data).sum() - c = (predicted == labels.data).squeeze() - for i in range(len(labels.data)): - label = labels.data[i] - class_correct[label] += c[i] - class_total[label] += 1 - if args.device_id == 0: - print(f"Accuracy of the model(on device: {args.device_id}) on the test images: {100 * float(correct) / total} %") - write_log('Accuracy of the model on the test images: %d %%\n' % (100 * float(correct) / total)) - write_log(f'Accuracy of the model on the test images: {float(correct)/total} \n') - return float(correct) / total - -def write_log(output): - if log_file is not None: - log_file.write(output) - - -def main(): - global args - global device - args = parse_args() - print(args) - model_file = 'model_92_sgd.pkl' - train_batch_size = args.train_batch_size - test_batch_size = args.test_batch_size - lr = 0.1 - is_train = args.is_train == "True" - is_pretrain = args.is_pretrain == "True" - acc_best = 0 - total_epoch = args.total_epochs - distribute = args.device_num > 1 - if(args.device_type == "GPU"): - device = torch.device("cuda", args.device_id) - if distribute: - torch.cuda.set_device(args.device_id) - torch.distributed.init_process_group(backend="nccl", init_method=args.dist_url, world_size=args.device_num, rank=args.device_id) - else: - device = f"npu:{args.device_id}" - if distribute: - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '49876' - torch.npu.set_device(device) - print("rank:",args.device_id) - torch.distributed.init_process_group(backend="hccl", world_size=args.device_num, rank=args.device_id) - - # Image Preprocessing - transform = transforms.Compose([ - transforms.RandomHorizontalFlip(), - transforms.RandomCrop((32, 32), padding=4), - transforms.ToTensor() - ]) - test_transform = transforms.Compose([ - transforms.ToTensor() - ]) - - train_dataset = datasets.CIFAR10(root='./data/', train=True, transform=transform, download=False) - test_dataset = datasets.CIFAR10(root='./data/', train=False, transform=test_transform) - train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) if distribute else None - train_loader = torch.utils.data.DataLoader(dataset=train_dataset, \ - batch_size=train_batch_size, \ - shuffle=(train_sampler is None), \ - num_workers=8, \ - pin_memory=False, \ - sampler = train_sampler if is_train else None, \ - drop_last = True) - test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=test_batch_size, shuffle=False) - - model = ResidualAttentionModel(args.num_classes).to(device) - criterion = nn.CrossEntropyLoss().to(device) - optimizer = None - if args.device_type == "GPU": - optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=0.0001) - else: - optimizer = NpuFusedSGD(model.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=0.0001) - model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=128.0) - if distribute: - if args.device_type == "GPU": - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.device_id]) - else: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.device_id], broadcast_buffers=False) - global log_file - if args.device_id == 0: - log_file = open("train_log_file" if is_train else "test_log_file", mode="w+") - if is_train is True: - if is_pretrain == True: - base_weights = torch.load(model_file, map_location="cpu") - print('Loading base network...') - new_state_dict = OrderedDict() - for k, v in base_weights.items(): - if(k[0: 7] == "module."): - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - if "fc.weight" in new_state_dict: - print("pop fc layer weight") - new_state_dict.pop("fc.weight") - new_state_dict.pop("fc.bias") - model.load_state_dict(new_state_dict, strict=False) - - # Training - total_tims = 0 - total_samples = 0 - for epoch in range(total_epoch): - model.train() - tims = time.time() - epoch_samples = 0 - if train_sampler is not None: # is distributed - train_sampler.set_epoch(epoch) - for i, (images, labels) in enumerate(train_loader): - epoch_samples += images.shape[0] - if i == 5: - tims = time.time() - images = Variable(images.to(device)) - labels = Variable(labels.to(device)) - - # Forward + Backward + Optimize - optimizer.zero_grad() - outputs = model(images) - loss = criterion(outputs, labels) - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - optimizer.step() - - if (i+1) % 20 == 0 and args.device_id == 0: - print("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" %(epoch+1, total_epoch, i+1, len(train_loader), loss.item())) - write_log("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f \n" %(epoch+1, total_epoch, i+1, len(train_loader), loss.item())) - total_tims += time.time() - tims - total_samples += epoch_samples - if args.device_id == 0: - print(f'the epoch {epoch+1} takes time:',time.time()-tims) - print(f"epoch {epoch+1} FPS: {(epoch_samples - 5 * train_batch_size)* args.device_num / (time.time()-tims)}") - print('evaluate test set:') - write_log(f'the epoch {epoch+1} takes time: {time.time()-tims} \n') - write_log(f"epoch {epoch+1} FPS: {(epoch_samples - 5 * train_batch_size)* args.device_num / (time.time()-tims)} \n") - acc = test(model, test_loader) - if acc > acc_best: - acc_best = acc - print('current best acc,', acc_best) - if args.device_id == 0: - torch.save(model.state_dict(), model_file) - # Decaying Learning Rate - if (epoch+1) / float(total_epoch) == 0.3 or (epoch+1) / float(total_epoch) == 0.6 or (epoch+1) / float(total_epoch) == 0.9: - lr /= 10 - print('reset learning rate to:', lr) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - print(param_group['lr']) - # Save the Model - if args.device_id == 0: - torch.save(model.state_dict(), 'last_model_92_sgd.pkl') - elif args.device_id == 0: - base_weights = torch.load(model_file, map_location="cpu") - print('Loading base network...') - new_state_dict = OrderedDict() - for k, v in base_weights.items(): - if(k[0: 7] == "module."): - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - model.load_state_dict(new_state_dict) - test(model, test_loader) - -if __name__ == "__main__": - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function, division +from apex import amp +from apex.optimizers import NpuFusedSGD +import torch +import argparse +import torch.nn as nn +import torch.optim as optim +from torch.autograd import Variable +from torch.utils.data import Dataset, DataLoader +import numpy as np +import torchvision +from torchvision import transforms, datasets, models +import os +import cv2 +import time +import torch.npu +import torch.utils.data.distributed +from collections import OrderedDict +from torch.nn.parallel import DistributedDataParallel +from model.residual_attention_network import ResidualAttentionModel_92_32input_update as ResidualAttentionModel + +device = None +args = None +log_file = None + +def parse_args(): + parser = argparse.ArgumentParser(description='3D Attention Net') + parser.add_argument('--device_type', type=str) + parser.add_argument('--device_id', type=int) + parser.add_argument('--device_num', type=int) + parser.add_argument('--total_epochs', type=int) + parser.add_argument('--is_train', type=str) + parser.add_argument('--is_pretrain', type=str) + parser.add_argument('--num_classes', type=int) + parser.add_argument('--dist_url', type=str) + parser.add_argument('--train_batch_size', type=int) + parser.add_argument('--test_batch_size', type=int) + args = parser.parse_args() + return args + +# for test +def test(model, test_loader): + # Test + classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') + model.eval() + correct = 0 + total = 0 + class_correct = list(0. for i in range(10)) + class_total = list(0. for i in range(10)) + + for images, labels in test_loader: + images = Variable(images.to(device)) + labels = Variable(labels.to(device)) + outputs = model(images) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels.data).sum() + c = (predicted == labels.data).squeeze() + for i in range(len(labels.data)): + label = labels.data[i] + class_correct[label] += c[i] + class_total[label] += 1 + if args.device_id == 0: + print(f"Accuracy of the model(on device: {args.device_id}) on the test images: {100 * float(correct) / total} %") + write_log('Accuracy of the model on the test images: %d %%\n' % (100 * float(correct) / total)) + write_log(f'Accuracy of the model on the test images: {float(correct)/total} \n') + return float(correct) / total + +def write_log(output): + if log_file is not None: + log_file.write(output) + + +def main(): + global args + global device + args = parse_args() + print(args) + model_file = 'model_92_sgd.pkl' + train_batch_size = args.train_batch_size + test_batch_size = args.test_batch_size + lr = 0.1 + is_train = args.is_train == "True" + is_pretrain = args.is_pretrain == "True" + acc_best = 0 + total_epoch = args.total_epochs + distribute = args.device_num > 1 + if(args.device_type == "GPU"): + device = torch.device("cuda", args.device_id) + if distribute: + torch.cuda.set_device(args.device_id) + torch.distributed.init_process_group(backend="nccl", init_method=args.dist_url, world_size=args.device_num, rank=args.device_id) + else: + device = f"npu:{args.device_id}" + if distribute: + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = '49876' + torch.npu.set_device(device) + print("rank:",args.device_id) + torch.distributed.init_process_group(backend="hccl", world_size=args.device_num, rank=args.device_id) + + # Image Preprocessing + transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomCrop((32, 32), padding=4), + transforms.ToTensor() + ]) + test_transform = transforms.Compose([ + transforms.ToTensor() + ]) + + train_dataset = datasets.CIFAR10(root='./data/', train=True, transform=transform, download=False) + test_dataset = datasets.CIFAR10(root='./data/', train=False, transform=test_transform) + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) if distribute else None + train_loader = torch.utils.data.DataLoader(dataset=train_dataset, \ + batch_size=train_batch_size, \ + shuffle=(train_sampler is None), \ + num_workers=8, \ + pin_memory=False, \ + sampler = train_sampler if is_train else None, \ + drop_last = True) + test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=test_batch_size, shuffle=False) + + model = ResidualAttentionModel(args.num_classes).to(device) + criterion = nn.CrossEntropyLoss().to(device) + optimizer = None + if args.device_type == "GPU": + optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=0.0001) + else: + optimizer = NpuFusedSGD(model.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=0.0001) + model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=128.0) + if distribute: + if args.device_type == "GPU": + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.device_id]) + else: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.device_id], broadcast_buffers=False) + global log_file + if args.device_id == 0: + log_file = open("train_log_file" if is_train else "test_log_file", mode="w+") + if is_train is True: + if is_pretrain == True: + base_weights = torch.load(model_file, map_location="cpu") + print('Loading base network...') + new_state_dict = OrderedDict() + for k, v in base_weights.items(): + if(k[0: 7] == "module."): + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + if "fc.weight" in new_state_dict: + print("pop fc layer weight") + new_state_dict.pop("fc.weight") + new_state_dict.pop("fc.bias") + model.load_state_dict(new_state_dict, strict=False) + + # Training + total_tims = 0 + total_samples = 0 + for epoch in range(total_epoch): + model.train() + tims = time.time() + epoch_samples = 0 + if train_sampler is not None: # is distributed + train_sampler.set_epoch(epoch) + for i, (images, labels) in enumerate(train_loader): + epoch_samples += images.shape[0] + if i == 5: + tims = time.time() + images = Variable(images.to(device)) + labels = Variable(labels.to(device)) + + # Forward + Backward + Optimize + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + optimizer.step() + + if (i+1) % 20 == 0 and args.device_id == 0: + print("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" %(epoch+1, total_epoch, i+1, len(train_loader), loss.item())) + write_log("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f \n" %(epoch+1, total_epoch, i+1, len(train_loader), loss.item())) + total_tims += time.time() - tims + total_samples += epoch_samples + if args.device_id == 0: + print(f'the epoch {epoch+1} takes time:',time.time()-tims) + print(f"epoch {epoch+1} FPS: {(epoch_samples - 5 * train_batch_size)* args.device_num / (time.time()-tims)}") + print('evaluate test set:') + write_log(f'the epoch {epoch+1} takes time: {time.time()-tims} \n') + write_log(f"epoch {epoch+1} FPS: {(epoch_samples - 5 * train_batch_size)* args.device_num / (time.time()-tims)} \n") + acc = test(model, test_loader) + if acc > acc_best: + acc_best = acc + print('current best acc,', acc_best) + if args.device_id == 0: + torch.save(model.state_dict(), model_file) + # Decaying Learning Rate + if (epoch+1) / float(total_epoch) == 0.3 or (epoch+1) / float(total_epoch) == 0.6 or (epoch+1) / float(total_epoch) == 0.9: + lr /= 10 + print('reset learning rate to:', lr) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + print(param_group['lr']) + # Save the Model + if args.device_id == 0: + torch.save(model.state_dict(), 'last_model_92_sgd.pkl') + elif args.device_id == 0: + base_weights = torch.load(model_file, map_location="cpu") + print('Loading base network...') + new_state_dict = OrderedDict() + for k, v in base_weights.items(): + if(k[0: 7] == "module."): + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + model.load_state_dict(new_state_dict) + test(model, test_loader) + +if __name__ == "__main__": + main() diff --git a/PyTorch/contrib/cv/classification/AlignedReID/modelzoo_level.txt b/PyTorch/contrib/cv/classification/AlignedReID/modelzoo_level.txt index 9e95396651cc4382fe60ee1ee053674f527a448c..27e6c78b37535fe4f5a17029546fe257ad164d34 100644 --- a/PyTorch/contrib/cv/classification/AlignedReID/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/AlignedReID/modelzoo_level.txt @@ -1,4 +1,4 @@ -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/Deit_Small/mixup.py b/PyTorch/contrib/cv/classification/Deit_Small/mixup.py index 3e3b35b157764a66f6bf1e733e0e4a1697cd40e5..dbc166e4f56ffc3c87d35967cb356753dccf6de5 100644 --- a/PyTorch/contrib/cv/classification/Deit_Small/mixup.py +++ b/PyTorch/contrib/cv/classification/Deit_Small/mixup.py @@ -1,330 +1,330 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" Mixup and Cutmix - -Papers: -mixup: Beyond Empirical Risk Minimization (https://arxiv.org/abs/1710.09412) - -CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features (https://arxiv.org/abs/1905.04899) - -Code Reference: -CutMix: https://github.com/clovaai/CutMix-PyTorch - -Hacked together by / Copyright 2020 Ross Wightman -""" -import numpy as np -import torch - - -def one_hot(x, num_classes, on_value=1., off_value=0., device='cuda'): - x = x.long().view(-1, 1) - return torch.full((x.size()[0], num_classes), off_value, device=device).scatter_(1, x, on_value) - - -def mixup_target(target, num_classes, lam=1., smoothing=0.0, device='cuda'): - off_value = smoothing / num_classes - on_value = 1. - smoothing + off_value - y1 = one_hot(target, num_classes, on_value=on_value, off_value=off_value, device=device) - y2 = one_hot(target.flip(0), num_classes, on_value=on_value, off_value=off_value, device=device) - return y1 * lam + y2 * (1. - lam) - - -def rand_bbox(img_shape, lam, margin=0., count=None): - """ Standard CutMix bounding-box - Generates a random square bbox based on lambda value. This impl includes - support for enforcing a border margin as percent of bbox dimensions. - - Args: - img_shape (tuple): Image shape as tuple - lam (float): Cutmix lambda value - margin (float): Percentage of bbox dimension to enforce as margin (reduce amount of box outside image) - count (int): Number of bbox to generate - """ - ratio = np.sqrt(1 - lam) - img_h, img_w = img_shape[-2:] - cut_h, cut_w = int(img_h * ratio), int(img_w * ratio) - margin_y, margin_x = int(margin * cut_h), int(margin * cut_w) - cy = np.random.randint(0 + margin_y, img_h - margin_y, size=count) - cx = np.random.randint(0 + margin_x, img_w - margin_x, size=count) - yl = np.clip(cy - cut_h // 2, 0, img_h) - yh = np.clip(cy + cut_h // 2, 0, img_h) - xl = np.clip(cx - cut_w // 2, 0, img_w) - xh = np.clip(cx + cut_w // 2, 0, img_w) - return yl, yh, xl, xh - - -def rand_bbox_minmax(img_shape, minmax, count=None): - """ Min-Max CutMix bounding-box - Inspired by Darknet cutmix impl, generates a random rectangular bbox - based on min/max percent values applied to each dimension of the input image. - - Typical defaults for minmax are usually in the .2-.3 for min and .8-.9 range for max. - - Args: - img_shape (tuple): Image shape as tuple - minmax (tuple or list): Min and max bbox ratios (as percent of image size) - count (int): Number of bbox to generate - """ - assert len(minmax) == 2 - img_h, img_w = img_shape[-2:] - cut_h = np.random.randint(int(img_h * minmax[0]), int(img_h * minmax[1]), size=count) - cut_w = np.random.randint(int(img_w * minmax[0]), int(img_w * minmax[1]), size=count) - yl = np.random.randint(0, img_h - cut_h, size=count) - xl = np.random.randint(0, img_w - cut_w, size=count) - yu = yl + cut_h - xu = xl + cut_w - return yl, yu, xl, xu - - -def cutmix_bbox_and_lam(img_shape, lam, ratio_minmax=None, correct_lam=True, count=None): - """ Generate bbox and apply lambda correction. - """ - if ratio_minmax is not None: - yl, yu, xl, xu = rand_bbox_minmax(img_shape, ratio_minmax, count=count) - else: - yl, yu, xl, xu = rand_bbox(img_shape, lam, count=count) - if correct_lam or ratio_minmax is not None: - bbox_area = (yu - yl) * (xu - xl) - lam = 1. - bbox_area / float(img_shape[-2] * img_shape[-1]) - return (yl, yu, xl, xu), lam - - -class Mixup: - """ Mixup/Cutmix that applies different params to each element or whole batch - - Args: - mixup_alpha (float): mixup alpha value, mixup is active if > 0. - cutmix_alpha (float): cutmix alpha value, cutmix is active if > 0. - cutmix_minmax (List[float]): cutmix min/max image ratio, cutmix is active and uses this vs alpha if not None. - prob (float): probability of applying mixup or cutmix per batch or element - switch_prob (float): probability of switching to cutmix instead of mixup when both are active - mode (str): how to apply mixup/cutmix params (per 'batch', 'pair' (pair of elements), 'elem' (element) - correct_lam (bool): apply lambda correction when cutmix bbox clipped by image borders - label_smoothing (float): apply label smoothing to the mixed target tensor - num_classes (int): number of classes for target - """ - def __init__(self, mixup_alpha=1., cutmix_alpha=0., cutmix_minmax=None, prob=1.0, switch_prob=0.5, - mode='batch', correct_lam=True, label_smoothing=0.1, num_classes=1000): - self.mixup_alpha = mixup_alpha - self.cutmix_alpha = cutmix_alpha - self.cutmix_minmax = cutmix_minmax - if self.cutmix_minmax is not None: - assert len(self.cutmix_minmax) == 2 - # force cutmix alpha == 1.0 when minmax active to keep logic simple & safe - self.cutmix_alpha = 1.0 - self.mix_prob = prob - self.switch_prob = switch_prob - self.label_smoothing = label_smoothing - self.num_classes = num_classes - self.mode = mode - self.correct_lam = correct_lam # correct lambda based on clipped area for cutmix - self.mixup_enabled = True # set to false to disable mixing (intended tp be set by train loop) - - def _params_per_elem(self, batch_size): - lam = np.ones(batch_size, dtype=np.float32) - use_cutmix = np.zeros(batch_size, dtype=np.bool) - if self.mixup_enabled: - if self.mixup_alpha > 0. and self.cutmix_alpha > 0.: - use_cutmix = np.random.rand(batch_size) < self.switch_prob - lam_mix = np.where( - use_cutmix, - np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size), - np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size)) - elif self.mixup_alpha > 0.: - lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size) - elif self.cutmix_alpha > 0.: - use_cutmix = np.ones(batch_size, dtype=np.bool) - lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size) - else: - assert False, "One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true." - lam = np.where(np.random.rand(batch_size) < self.mix_prob, lam_mix.astype(np.float32), lam) - return lam, use_cutmix - - def _params_per_batch(self): - lam = 1. - use_cutmix = False - if self.mixup_enabled and np.random.rand() < self.mix_prob: - if self.mixup_alpha > 0. and self.cutmix_alpha > 0.: - use_cutmix = np.random.rand() < self.switch_prob - lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha) if use_cutmix else \ - np.random.beta(self.mixup_alpha, self.mixup_alpha) - elif self.mixup_alpha > 0.: - lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha) - elif self.cutmix_alpha > 0.: - use_cutmix = True - lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha) - else: - assert False, "One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true." - lam = float(lam_mix) - return lam, use_cutmix - - def _mix_elem(self, x): - batch_size = len(x) - lam_batch, use_cutmix = self._params_per_elem(batch_size) - x_orig = x.clone() # need to keep an unmodified original for mixing source - for i in range(batch_size): - j = batch_size - i - 1 - lam = lam_batch[i] - if lam != 1.: - if use_cutmix[i]: - (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( - x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) - x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh] - lam_batch[i] = lam - else: - x[i] = x[i] * lam + x_orig[j] * (1 - lam) - return torch.tensor(lam_batch, device=x.device, dtype=x.dtype).unsqueeze(1) - - def _mix_pair(self, x): - batch_size = len(x) - lam_batch, use_cutmix = self._params_per_elem(batch_size // 2) - x_orig = x.clone() # need to keep an unmodified original for mixing source - for i in range(batch_size // 2): - j = batch_size - i - 1 - lam = lam_batch[i] - if lam != 1.: - if use_cutmix[i]: - (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( - x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) - x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh] - x[j][:, yl:yh, xl:xh] = x_orig[i][:, yl:yh, xl:xh] - lam_batch[i] = lam - else: - x[i] = x[i] * lam + x_orig[j] * (1 - lam) - x[j] = x[j] * lam + x_orig[i] * (1 - lam) - lam_batch = np.concatenate((lam_batch, lam_batch[::-1])) - return torch.tensor(lam_batch, device=x.device, dtype=x.dtype).unsqueeze(1) - - def _mix_batch(self, x): - lam, use_cutmix = self._params_per_batch() - if lam == 1.: - return 1. - if use_cutmix: - (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( - x.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) - x[:, :, yl:yh, xl:xh] = x.flip(0)[:, :, yl:yh, xl:xh] - else: - x_flipped = x.flip(0).mul_(1. - lam) - x.mul_(lam).add_(x_flipped) - return lam - - def __call__(self, x, target): - assert len(x) % 2 == 0, 'Batch size should be even when using this' - if self.mode == 'elem': - lam = self._mix_elem(x) - elif self.mode == 'pair': - lam = self._mix_pair(x) - else: - lam = self._mix_batch(x) - target = mixup_target(target, self.num_classes, lam, self.label_smoothing,device='npu') - return x, target - - -class FastCollateMixup(Mixup): - """ Fast Collate w/ Mixup/Cutmix that applies different params to each element or whole batch - - A Mixup impl that's performed while collating the batches. - """ - - def _mix_elem_collate(self, output, batch, half=False): - batch_size = len(batch) - num_elem = batch_size // 2 if half else batch_size - assert len(output) == num_elem - lam_batch, use_cutmix = self._params_per_elem(num_elem) - for i in range(num_elem): - j = batch_size - i - 1 - lam = lam_batch[i] - mixed = batch[i][0] - if lam != 1.: - if use_cutmix[i]: - if not half: - mixed = mixed.copy() - (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( - output.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) - mixed[:, yl:yh, xl:xh] = batch[j][0][:, yl:yh, xl:xh] - lam_batch[i] = lam - else: - mixed = mixed.astype(np.float32) * lam + batch[j][0].astype(np.float32) * (1 - lam) - np.rint(mixed, out=mixed) - output[i] += torch.from_numpy(mixed.astype(np.uint8)) - if half: - lam_batch = np.concatenate((lam_batch, np.ones(num_elem))) - return torch.tensor(lam_batch).unsqueeze(1) - - def _mix_pair_collate(self, output, batch): - batch_size = len(batch) - lam_batch, use_cutmix = self._params_per_elem(batch_size // 2) - for i in range(batch_size // 2): - j = batch_size - i - 1 - lam = lam_batch[i] - mixed_i = batch[i][0] - mixed_j = batch[j][0] - assert 0 <= lam <= 1.0 - if lam < 1.: - if use_cutmix[i]: - (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( - output.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) - patch_i = mixed_i[:, yl:yh, xl:xh].copy() - mixed_i[:, yl:yh, xl:xh] = mixed_j[:, yl:yh, xl:xh] - mixed_j[:, yl:yh, xl:xh] = patch_i - lam_batch[i] = lam - else: - mixed_temp = mixed_i.astype(np.float32) * lam + mixed_j.astype(np.float32) * (1 - lam) - mixed_j = mixed_j.astype(np.float32) * lam + mixed_i.astype(np.float32) * (1 - lam) - mixed_i = mixed_temp - np.rint(mixed_j, out=mixed_j) - np.rint(mixed_i, out=mixed_i) - output[i] += torch.from_numpy(mixed_i.astype(np.uint8)) - output[j] += torch.from_numpy(mixed_j.astype(np.uint8)) - lam_batch = np.concatenate((lam_batch, lam_batch[::-1])) - return torch.tensor(lam_batch).unsqueeze(1) - - def _mix_batch_collate(self, output, batch): - batch_size = len(batch) - lam, use_cutmix = self._params_per_batch() - if use_cutmix: - (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( - output.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) - for i in range(batch_size): - j = batch_size - i - 1 - mixed = batch[i][0] - if lam != 1.: - if use_cutmix: - mixed = mixed.copy() # don't want to modify the original while iterating - mixed[:, yl:yh, xl:xh] = batch[j][0][:, yl:yh, xl:xh] - else: - mixed = mixed.astype(np.float32) * lam + batch[j][0].astype(np.float32) * (1 - lam) - np.rint(mixed, out=mixed) - output[i] += torch.from_numpy(mixed.astype(np.uint8)) - return lam - - def __call__(self, batch, _=None): - batch_size = len(batch) - assert batch_size % 2 == 0, 'Batch size should be even when using this' - half = 'half' in self.mode - if half: - batch_size //= 2 - output = torch.zeros((batch_size, *batch[0][0].shape), dtype=torch.uint8) - if self.mode == 'elem' or self.mode == 'half': - lam = self._mix_elem_collate(output, batch, half=half) - elif self.mode == 'pair': - lam = self._mix_pair_collate(output, batch) - else: - lam = self._mix_batch_collate(output, batch) - target = torch.tensor([b[1] for b in batch], dtype=torch.int64) - target = mixup_target(target, self.num_classes, lam, self.label_smoothing, device='cpu') - target = target[:batch_size] - return output, target - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" Mixup and Cutmix + +Papers: +mixup: Beyond Empirical Risk Minimization (https://arxiv.org/abs/1710.09412) + +CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features (https://arxiv.org/abs/1905.04899) + +Code Reference: +CutMix: https://github.com/clovaai/CutMix-PyTorch + +Hacked together by / Copyright 2020 Ross Wightman +""" +import numpy as np +import torch + + +def one_hot(x, num_classes, on_value=1., off_value=0., device='cuda'): + x = x.long().view(-1, 1) + return torch.full((x.size()[0], num_classes), off_value, device=device).scatter_(1, x, on_value) + + +def mixup_target(target, num_classes, lam=1., smoothing=0.0, device='cuda'): + off_value = smoothing / num_classes + on_value = 1. - smoothing + off_value + y1 = one_hot(target, num_classes, on_value=on_value, off_value=off_value, device=device) + y2 = one_hot(target.flip(0), num_classes, on_value=on_value, off_value=off_value, device=device) + return y1 * lam + y2 * (1. - lam) + + +def rand_bbox(img_shape, lam, margin=0., count=None): + """ Standard CutMix bounding-box + Generates a random square bbox based on lambda value. This impl includes + support for enforcing a border margin as percent of bbox dimensions. + + Args: + img_shape (tuple): Image shape as tuple + lam (float): Cutmix lambda value + margin (float): Percentage of bbox dimension to enforce as margin (reduce amount of box outside image) + count (int): Number of bbox to generate + """ + ratio = np.sqrt(1 - lam) + img_h, img_w = img_shape[-2:] + cut_h, cut_w = int(img_h * ratio), int(img_w * ratio) + margin_y, margin_x = int(margin * cut_h), int(margin * cut_w) + cy = np.random.randint(0 + margin_y, img_h - margin_y, size=count) + cx = np.random.randint(0 + margin_x, img_w - margin_x, size=count) + yl = np.clip(cy - cut_h // 2, 0, img_h) + yh = np.clip(cy + cut_h // 2, 0, img_h) + xl = np.clip(cx - cut_w // 2, 0, img_w) + xh = np.clip(cx + cut_w // 2, 0, img_w) + return yl, yh, xl, xh + + +def rand_bbox_minmax(img_shape, minmax, count=None): + """ Min-Max CutMix bounding-box + Inspired by Darknet cutmix impl, generates a random rectangular bbox + based on min/max percent values applied to each dimension of the input image. + + Typical defaults for minmax are usually in the .2-.3 for min and .8-.9 range for max. + + Args: + img_shape (tuple): Image shape as tuple + minmax (tuple or list): Min and max bbox ratios (as percent of image size) + count (int): Number of bbox to generate + """ + assert len(minmax) == 2 + img_h, img_w = img_shape[-2:] + cut_h = np.random.randint(int(img_h * minmax[0]), int(img_h * minmax[1]), size=count) + cut_w = np.random.randint(int(img_w * minmax[0]), int(img_w * minmax[1]), size=count) + yl = np.random.randint(0, img_h - cut_h, size=count) + xl = np.random.randint(0, img_w - cut_w, size=count) + yu = yl + cut_h + xu = xl + cut_w + return yl, yu, xl, xu + + +def cutmix_bbox_and_lam(img_shape, lam, ratio_minmax=None, correct_lam=True, count=None): + """ Generate bbox and apply lambda correction. + """ + if ratio_minmax is not None: + yl, yu, xl, xu = rand_bbox_minmax(img_shape, ratio_minmax, count=count) + else: + yl, yu, xl, xu = rand_bbox(img_shape, lam, count=count) + if correct_lam or ratio_minmax is not None: + bbox_area = (yu - yl) * (xu - xl) + lam = 1. - bbox_area / float(img_shape[-2] * img_shape[-1]) + return (yl, yu, xl, xu), lam + + +class Mixup: + """ Mixup/Cutmix that applies different params to each element or whole batch + + Args: + mixup_alpha (float): mixup alpha value, mixup is active if > 0. + cutmix_alpha (float): cutmix alpha value, cutmix is active if > 0. + cutmix_minmax (List[float]): cutmix min/max image ratio, cutmix is active and uses this vs alpha if not None. + prob (float): probability of applying mixup or cutmix per batch or element + switch_prob (float): probability of switching to cutmix instead of mixup when both are active + mode (str): how to apply mixup/cutmix params (per 'batch', 'pair' (pair of elements), 'elem' (element) + correct_lam (bool): apply lambda correction when cutmix bbox clipped by image borders + label_smoothing (float): apply label smoothing to the mixed target tensor + num_classes (int): number of classes for target + """ + def __init__(self, mixup_alpha=1., cutmix_alpha=0., cutmix_minmax=None, prob=1.0, switch_prob=0.5, + mode='batch', correct_lam=True, label_smoothing=0.1, num_classes=1000): + self.mixup_alpha = mixup_alpha + self.cutmix_alpha = cutmix_alpha + self.cutmix_minmax = cutmix_minmax + if self.cutmix_minmax is not None: + assert len(self.cutmix_minmax) == 2 + # force cutmix alpha == 1.0 when minmax active to keep logic simple & safe + self.cutmix_alpha = 1.0 + self.mix_prob = prob + self.switch_prob = switch_prob + self.label_smoothing = label_smoothing + self.num_classes = num_classes + self.mode = mode + self.correct_lam = correct_lam # correct lambda based on clipped area for cutmix + self.mixup_enabled = True # set to false to disable mixing (intended tp be set by train loop) + + def _params_per_elem(self, batch_size): + lam = np.ones(batch_size, dtype=np.float32) + use_cutmix = np.zeros(batch_size, dtype=np.bool) + if self.mixup_enabled: + if self.mixup_alpha > 0. and self.cutmix_alpha > 0.: + use_cutmix = np.random.rand(batch_size) < self.switch_prob + lam_mix = np.where( + use_cutmix, + np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size), + np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size)) + elif self.mixup_alpha > 0.: + lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size) + elif self.cutmix_alpha > 0.: + use_cutmix = np.ones(batch_size, dtype=np.bool) + lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size) + else: + assert False, "One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true." + lam = np.where(np.random.rand(batch_size) < self.mix_prob, lam_mix.astype(np.float32), lam) + return lam, use_cutmix + + def _params_per_batch(self): + lam = 1. + use_cutmix = False + if self.mixup_enabled and np.random.rand() < self.mix_prob: + if self.mixup_alpha > 0. and self.cutmix_alpha > 0.: + use_cutmix = np.random.rand() < self.switch_prob + lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha) if use_cutmix else \ + np.random.beta(self.mixup_alpha, self.mixup_alpha) + elif self.mixup_alpha > 0.: + lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha) + elif self.cutmix_alpha > 0.: + use_cutmix = True + lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha) + else: + assert False, "One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true." + lam = float(lam_mix) + return lam, use_cutmix + + def _mix_elem(self, x): + batch_size = len(x) + lam_batch, use_cutmix = self._params_per_elem(batch_size) + x_orig = x.clone() # need to keep an unmodified original for mixing source + for i in range(batch_size): + j = batch_size - i - 1 + lam = lam_batch[i] + if lam != 1.: + if use_cutmix[i]: + (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( + x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) + x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh] + lam_batch[i] = lam + else: + x[i] = x[i] * lam + x_orig[j] * (1 - lam) + return torch.tensor(lam_batch, device=x.device, dtype=x.dtype).unsqueeze(1) + + def _mix_pair(self, x): + batch_size = len(x) + lam_batch, use_cutmix = self._params_per_elem(batch_size // 2) + x_orig = x.clone() # need to keep an unmodified original for mixing source + for i in range(batch_size // 2): + j = batch_size - i - 1 + lam = lam_batch[i] + if lam != 1.: + if use_cutmix[i]: + (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( + x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) + x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh] + x[j][:, yl:yh, xl:xh] = x_orig[i][:, yl:yh, xl:xh] + lam_batch[i] = lam + else: + x[i] = x[i] * lam + x_orig[j] * (1 - lam) + x[j] = x[j] * lam + x_orig[i] * (1 - lam) + lam_batch = np.concatenate((lam_batch, lam_batch[::-1])) + return torch.tensor(lam_batch, device=x.device, dtype=x.dtype).unsqueeze(1) + + def _mix_batch(self, x): + lam, use_cutmix = self._params_per_batch() + if lam == 1.: + return 1. + if use_cutmix: + (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( + x.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) + x[:, :, yl:yh, xl:xh] = x.flip(0)[:, :, yl:yh, xl:xh] + else: + x_flipped = x.flip(0).mul_(1. - lam) + x.mul_(lam).add_(x_flipped) + return lam + + def __call__(self, x, target): + assert len(x) % 2 == 0, 'Batch size should be even when using this' + if self.mode == 'elem': + lam = self._mix_elem(x) + elif self.mode == 'pair': + lam = self._mix_pair(x) + else: + lam = self._mix_batch(x) + target = mixup_target(target, self.num_classes, lam, self.label_smoothing,device='npu') + return x, target + + +class FastCollateMixup(Mixup): + """ Fast Collate w/ Mixup/Cutmix that applies different params to each element or whole batch + + A Mixup impl that's performed while collating the batches. + """ + + def _mix_elem_collate(self, output, batch, half=False): + batch_size = len(batch) + num_elem = batch_size // 2 if half else batch_size + assert len(output) == num_elem + lam_batch, use_cutmix = self._params_per_elem(num_elem) + for i in range(num_elem): + j = batch_size - i - 1 + lam = lam_batch[i] + mixed = batch[i][0] + if lam != 1.: + if use_cutmix[i]: + if not half: + mixed = mixed.copy() + (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( + output.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) + mixed[:, yl:yh, xl:xh] = batch[j][0][:, yl:yh, xl:xh] + lam_batch[i] = lam + else: + mixed = mixed.astype(np.float32) * lam + batch[j][0].astype(np.float32) * (1 - lam) + np.rint(mixed, out=mixed) + output[i] += torch.from_numpy(mixed.astype(np.uint8)) + if half: + lam_batch = np.concatenate((lam_batch, np.ones(num_elem))) + return torch.tensor(lam_batch).unsqueeze(1) + + def _mix_pair_collate(self, output, batch): + batch_size = len(batch) + lam_batch, use_cutmix = self._params_per_elem(batch_size // 2) + for i in range(batch_size // 2): + j = batch_size - i - 1 + lam = lam_batch[i] + mixed_i = batch[i][0] + mixed_j = batch[j][0] + assert 0 <= lam <= 1.0 + if lam < 1.: + if use_cutmix[i]: + (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( + output.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) + patch_i = mixed_i[:, yl:yh, xl:xh].copy() + mixed_i[:, yl:yh, xl:xh] = mixed_j[:, yl:yh, xl:xh] + mixed_j[:, yl:yh, xl:xh] = patch_i + lam_batch[i] = lam + else: + mixed_temp = mixed_i.astype(np.float32) * lam + mixed_j.astype(np.float32) * (1 - lam) + mixed_j = mixed_j.astype(np.float32) * lam + mixed_i.astype(np.float32) * (1 - lam) + mixed_i = mixed_temp + np.rint(mixed_j, out=mixed_j) + np.rint(mixed_i, out=mixed_i) + output[i] += torch.from_numpy(mixed_i.astype(np.uint8)) + output[j] += torch.from_numpy(mixed_j.astype(np.uint8)) + lam_batch = np.concatenate((lam_batch, lam_batch[::-1])) + return torch.tensor(lam_batch).unsqueeze(1) + + def _mix_batch_collate(self, output, batch): + batch_size = len(batch) + lam, use_cutmix = self._params_per_batch() + if use_cutmix: + (yl, yh, xl, xh), lam = cutmix_bbox_and_lam( + output.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam) + for i in range(batch_size): + j = batch_size - i - 1 + mixed = batch[i][0] + if lam != 1.: + if use_cutmix: + mixed = mixed.copy() # don't want to modify the original while iterating + mixed[:, yl:yh, xl:xh] = batch[j][0][:, yl:yh, xl:xh] + else: + mixed = mixed.astype(np.float32) * lam + batch[j][0].astype(np.float32) * (1 - lam) + np.rint(mixed, out=mixed) + output[i] += torch.from_numpy(mixed.astype(np.uint8)) + return lam + + def __call__(self, batch, _=None): + batch_size = len(batch) + assert batch_size % 2 == 0, 'Batch size should be even when using this' + half = 'half' in self.mode + if half: + batch_size //= 2 + output = torch.zeros((batch_size, *batch[0][0].shape), dtype=torch.uint8) + if self.mode == 'elem' or self.mode == 'half': + lam = self._mix_elem_collate(output, batch, half=half) + elif self.mode == 'pair': + lam = self._mix_pair_collate(output, batch) + else: + lam = self._mix_batch_collate(output, batch) + target = torch.tensor([b[1] for b in batch], dtype=torch.int64) + target = mixup_target(target, self.num_classes, lam, self.label_smoothing, device='cpu') + target = target[:batch_size] + return output, target + diff --git a/PyTorch/contrib/cv/classification/Deit_Small/npu_fused_adamw.py b/PyTorch/contrib/cv/classification/Deit_Small/npu_fused_adamw.py index bd9f5d6a4d21ab8b3f6b58c7f6a8bacda4fa3ce5..d52d4e2d5ee5644990a601b234cecd7697d242a0 100644 --- a/PyTorch/contrib/cv/classification/Deit_Small/npu_fused_adamw.py +++ b/PyTorch/contrib/cv/classification/Deit_Small/npu_fused_adamw.py @@ -1,257 +1,257 @@ -# Copyright (c) 2020, Huawei Technologies. -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -from collections import defaultdict - -import torch -from torch.optim.optimizer import Optimizer - -from apex.contrib.combine_tensors import combine_npu - - -class NpuFusedAdamW(Optimizer): - """Implements AdamW algorithm. - - Currently NPU-only. Requires Apex to be installed via - ``pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--npu_float_status" ./``. - - This version of NPU fused AdamW implements 1 fusions. - - * A combine-tensor apply launch that batches the elementwise updates applied to all the model's parameters - into one or a few kernel launches. - - :class:`apex.optimizers.NpuFusedAdamW` may be used as a drop-in replacement for ``torch.optim.AdamW``:: - - opt = apex.optimizers.NpuFusedAdamW(model.parameters(), lr = ....) - ... - opt.step() - - :class:`apex.optimizers.FusedAdam` should be used with Amp. Currently, if you wish to use :class:`NpuFusedAdamW` - with Amp, only ``opt_level O1 and O2`` can be choosed:: - - opt = apex.optimizers.NpuFusedAdamW(model.parameters(), lr = ....) - model, opt = amp.initialize(model, opt, opt_level="O2") - ... - opt.step() - - - The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_. - The AdamW variant was proposed in `Decoupled Weight Decay Regularization`_. - - Arguments: - params (iterable): iterable of parameters to optimize or dicts defining - parameter groups - lr (float, optional, default: 1e-3): learning rate - betas (Tuple[float, float], optional, default: (0.9, 0.999)): coefficients used - for computing running averages of gradient and its square - eps (float, optional, default: 1e-8): term added to the denominator to improve - numerical stability - weight_decay (float, optional, default: 1e-2): weight decay coefficient - amsgrad (boolean, optional, default: False): whether to use the AMSGrad variant of - this algorithm from the paper `On the Convergence of Adam and Beyond`_ - - .. _Adam\: A Method for Stochastic Optimization: - https://arxiv.org/abs/1412.6980 - .. _Decoupled Weight Decay Regularization: - https://arxiv.org/abs/1711.05101 - .. _On the Convergence of Adam and Beyond: - https://openreview.net/forum?id=ryQu7f-RZ - """ - - def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, - weight_decay=1e-2, amsgrad=False): - if lr < 0.0: - raise ValueError("Invalid learning rate: {}".format(lr)) - if eps < 0.0: - raise ValueError("Invalid epsilon value: {}".format(eps)) - if betas[0] < 0.0 or betas[0] >= 1.0: - raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) - if betas[1] < 0.0 or betas[1] >= 1.0: - raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) - if weight_decay < 0.0: - raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) - defaults = dict(lr=lr, betas=betas, eps=eps, - weight_decay=weight_decay, amsgrad=amsgrad) - self.is_npu_fused_optimizer = True - super(NpuFusedAdamW, self).__init__(params, defaults) - - def __setstate__(self, state): - super(NpuFusedAdamW, self).__setstate__(state) - for group in self.param_groups: - group.setdefault('amsgrad', False) - - def _init_param_state(self, p, amsgrad): - state = self.state[p] - # State initialization - if len(state) == 0: - state['step'] = 0 - # Exponential moving average of gradient values - state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) - # Exponential moving average of squared gradient values - state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) - if amsgrad: - # Maintains max of all exp. moving avg. of sq. grad. values - state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) - else: - exp_avg_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) - exp_avg_tmp.copy_(state['exp_avg']) - state['exp_avg'] = exp_avg_tmp - - exp_avg_sq_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) - exp_avg_sq_tmp.copy_(state['exp_avg_sq']) - state['exp_avg_sq'] = exp_avg_sq_tmp - - if amsgrad: - max_exp_avg_sq_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) - max_exp_avg_sq_tmp.copy_(state['max_exp_avg_sq']) - state['max_exp_avg_sq'] = max_exp_avg_sq_tmp - - def _combine_group_param_states(self, group_index): - group = self.param_groups[group_index] - stash = self._amp_stash - group_params_list = stash.params_lists_indexed_by_group[group_index] - - amsgrad = group['amsgrad'] - - combined_param_states = [] - for params in group_params_list: - step_list = [] - exp_avg_list = [] - exp_avg_sq_list = [] - max_exp_avg_sq_list = [] - - for p in params: - if p.grad is None: - continue - grad = p.grad - if grad.is_sparse: - raise RuntimeError('NpuFusedAdamW does not support sparse gradients, ' - 'please consider SparseAdam instead') - - self._init_param_state(p, amsgrad) - state = self.state[p] - step_list.append(state['step']) - exp_avg_list.append(state['exp_avg']) - exp_avg_sq_list.append(state['exp_avg_sq']) - if amsgrad: - max_exp_avg_sq_list.append(state['max_exp_avg_sq']) - - combined_step = 0 - combined_exp_avg = None - combined_exp_avg_sq = None - combined_max_exp_avg_sq = None - - if len(exp_avg_list) > 0: - combined_step = step_list[0] - combined_exp_avg = combine_npu(exp_avg_list) - combined_exp_avg_sq = combine_npu(exp_avg_sq_list) - combined_max_exp_avg_sq = combine_npu(max_exp_avg_sq_list) - - combined_state = defaultdict(dict) - combined_state['step'] = combined_step - combined_state['exp_avg'] = combined_exp_avg - combined_state['exp_avg_sq'] = combined_exp_avg_sq - combined_state['max_exp_avg_sq'] = combined_max_exp_avg_sq - combined_param_states.append(combined_state) - stash.combined_param_states_indexed_by_group[group_index] = combined_param_states - - def _combine_param_states_by_group(self): - stash = self._amp_stash - if stash.param_states_are_combined_by_group: - return - - stash.combined_param_states_indexed_by_group = [] - for _ in self.param_groups: - stash.combined_param_states_indexed_by_group.append([]) - - for i, _ in enumerate(self.param_groups): - self._combine_group_param_states(i) - stash.param_states_are_combined_by_group = True - - def _group_step(self, group_index): - group = self.param_groups[group_index] - for p in group['params']: - if p.grad is None: - continue - - grad = p.grad - if grad.is_sparse: - raise RuntimeError('NpuFusedAdamW does not support sparse gradients, ' - 'please consider SparseAdam instead') - state_p = self.state[p] - state_p['step'] += 1 - - amsgrad = group['amsgrad'] - beta1, beta2 = group['betas'] - - stash = self._amp_stash - combined_group_params = stash.combined_params_indexed_by_group[group_index] - combined_group_grads = stash.combined_grads_indexed_by_group[group_index] - combined_group_param_states = stash.combined_param_states_indexed_by_group[group_index] - - for combined_param, combined_grad, combined_param_state in zip(combined_group_params, - combined_group_grads, - combined_group_param_states): - if combined_param is None or combined_grad is None: - continue - - # Perform stepweight decay. The fused method is used here to speed up the calculation - combined_param.mul_(1 - group['lr'] * group['weight_decay']) - - exp_avg, exp_avg_sq = combined_param_state['exp_avg'], combined_param_state['exp_avg_sq'] - if amsgrad: - max_exp_avg_sq = combined_param_state['max_exp_avg_sq'] - - combined_param_state['step'] += 1 - bias_correction1 = 1 - beta1 ** combined_param_state['step'] - bias_correction2 = 1 - beta2 ** combined_param_state['step'] - - # Decay the first and second moment running average coefficient - exp_avg.mul_(beta1).add_(combined_grad, alpha=1 - beta1) - exp_avg_sq.mul_(beta2).addcmul_(combined_grad, combined_grad, value=1 - beta2) - if amsgrad: - # Maintains the maximum of all 2nd moment running avg. till now - torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) - # Use the max. for normalizing running avg. of gradient - denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) - else: - denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) - - step_size = group['lr'] / bias_correction1 - - combined_param.addcdiv_(exp_avg, denom, value=-step_size) - - @torch.no_grad() - def step(self, closure=None): - if not hasattr(self, "_amp_stash"): - raise RuntimeError('apex.optimizers.NpuFusedAdamW should be used with AMP.') - - self._check_already_combined_params_and_grads() - # combine params and grads first - self._combine_params_and_grads_by_group() - # then combine param states - self._combine_param_states_by_group() - - loss = None - if closure is not None: - with torch.enable_grad(): - loss = closure() - - for i, _ in enumerate(self.param_groups): - self._group_step(i) - - return loss +# Copyright (c) 2020, Huawei Technologies. +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from collections import defaultdict + +import torch +from torch.optim.optimizer import Optimizer + +from apex.contrib.combine_tensors import combine_npu + + +class NpuFusedAdamW(Optimizer): + """Implements AdamW algorithm. + + Currently NPU-only. Requires Apex to be installed via + ``pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--npu_float_status" ./``. + + This version of NPU fused AdamW implements 1 fusions. + + * A combine-tensor apply launch that batches the elementwise updates applied to all the model's parameters + into one or a few kernel launches. + + :class:`apex.optimizers.NpuFusedAdamW` may be used as a drop-in replacement for ``torch.optim.AdamW``:: + + opt = apex.optimizers.NpuFusedAdamW(model.parameters(), lr = ....) + ... + opt.step() + + :class:`apex.optimizers.FusedAdam` should be used with Amp. Currently, if you wish to use :class:`NpuFusedAdamW` + with Amp, only ``opt_level O1 and O2`` can be choosed:: + + opt = apex.optimizers.NpuFusedAdamW(model.parameters(), lr = ....) + model, opt = amp.initialize(model, opt, opt_level="O2") + ... + opt.step() + + + The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_. + The AdamW variant was proposed in `Decoupled Weight Decay Regularization`_. + + Arguments: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float, optional, default: 1e-3): learning rate + betas (Tuple[float, float], optional, default: (0.9, 0.999)): coefficients used + for computing running averages of gradient and its square + eps (float, optional, default: 1e-8): term added to the denominator to improve + numerical stability + weight_decay (float, optional, default: 1e-2): weight decay coefficient + amsgrad (boolean, optional, default: False): whether to use the AMSGrad variant of + this algorithm from the paper `On the Convergence of Adam and Beyond`_ + + .. _Adam\: A Method for Stochastic Optimization: + https://arxiv.org/abs/1412.6980 + .. _Decoupled Weight Decay Regularization: + https://arxiv.org/abs/1711.05101 + .. _On the Convergence of Adam and Beyond: + https://openreview.net/forum?id=ryQu7f-RZ + """ + + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, + weight_decay=1e-2, amsgrad=False): + if lr < 0.0: + raise ValueError("Invalid learning rate: {}".format(lr)) + if eps < 0.0: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if betas[0] < 0.0 or betas[0] >= 1.0: + raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) + if betas[1] < 0.0 or betas[1] >= 1.0: + raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) + if weight_decay < 0.0: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + defaults = dict(lr=lr, betas=betas, eps=eps, + weight_decay=weight_decay, amsgrad=amsgrad) + self.is_npu_fused_optimizer = True + super(NpuFusedAdamW, self).__init__(params, defaults) + + def __setstate__(self, state): + super(NpuFusedAdamW, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('amsgrad', False) + + def _init_param_state(self, p, amsgrad): + state = self.state[p] + # State initialization + if len(state) == 0: + state['step'] = 0 + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + if amsgrad: + # Maintains max of all exp. moving avg. of sq. grad. values + state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + else: + exp_avg_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) + exp_avg_tmp.copy_(state['exp_avg']) + state['exp_avg'] = exp_avg_tmp + + exp_avg_sq_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) + exp_avg_sq_tmp.copy_(state['exp_avg_sq']) + state['exp_avg_sq'] = exp_avg_sq_tmp + + if amsgrad: + max_exp_avg_sq_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) + max_exp_avg_sq_tmp.copy_(state['max_exp_avg_sq']) + state['max_exp_avg_sq'] = max_exp_avg_sq_tmp + + def _combine_group_param_states(self, group_index): + group = self.param_groups[group_index] + stash = self._amp_stash + group_params_list = stash.params_lists_indexed_by_group[group_index] + + amsgrad = group['amsgrad'] + + combined_param_states = [] + for params in group_params_list: + step_list = [] + exp_avg_list = [] + exp_avg_sq_list = [] + max_exp_avg_sq_list = [] + + for p in params: + if p.grad is None: + continue + grad = p.grad + if grad.is_sparse: + raise RuntimeError('NpuFusedAdamW does not support sparse gradients, ' + 'please consider SparseAdam instead') + + self._init_param_state(p, amsgrad) + state = self.state[p] + step_list.append(state['step']) + exp_avg_list.append(state['exp_avg']) + exp_avg_sq_list.append(state['exp_avg_sq']) + if amsgrad: + max_exp_avg_sq_list.append(state['max_exp_avg_sq']) + + combined_step = 0 + combined_exp_avg = None + combined_exp_avg_sq = None + combined_max_exp_avg_sq = None + + if len(exp_avg_list) > 0: + combined_step = step_list[0] + combined_exp_avg = combine_npu(exp_avg_list) + combined_exp_avg_sq = combine_npu(exp_avg_sq_list) + combined_max_exp_avg_sq = combine_npu(max_exp_avg_sq_list) + + combined_state = defaultdict(dict) + combined_state['step'] = combined_step + combined_state['exp_avg'] = combined_exp_avg + combined_state['exp_avg_sq'] = combined_exp_avg_sq + combined_state['max_exp_avg_sq'] = combined_max_exp_avg_sq + combined_param_states.append(combined_state) + stash.combined_param_states_indexed_by_group[group_index] = combined_param_states + + def _combine_param_states_by_group(self): + stash = self._amp_stash + if stash.param_states_are_combined_by_group: + return + + stash.combined_param_states_indexed_by_group = [] + for _ in self.param_groups: + stash.combined_param_states_indexed_by_group.append([]) + + for i, _ in enumerate(self.param_groups): + self._combine_group_param_states(i) + stash.param_states_are_combined_by_group = True + + def _group_step(self, group_index): + group = self.param_groups[group_index] + for p in group['params']: + if p.grad is None: + continue + + grad = p.grad + if grad.is_sparse: + raise RuntimeError('NpuFusedAdamW does not support sparse gradients, ' + 'please consider SparseAdam instead') + state_p = self.state[p] + state_p['step'] += 1 + + amsgrad = group['amsgrad'] + beta1, beta2 = group['betas'] + + stash = self._amp_stash + combined_group_params = stash.combined_params_indexed_by_group[group_index] + combined_group_grads = stash.combined_grads_indexed_by_group[group_index] + combined_group_param_states = stash.combined_param_states_indexed_by_group[group_index] + + for combined_param, combined_grad, combined_param_state in zip(combined_group_params, + combined_group_grads, + combined_group_param_states): + if combined_param is None or combined_grad is None: + continue + + # Perform stepweight decay. The fused method is used here to speed up the calculation + combined_param.mul_(1 - group['lr'] * group['weight_decay']) + + exp_avg, exp_avg_sq = combined_param_state['exp_avg'], combined_param_state['exp_avg_sq'] + if amsgrad: + max_exp_avg_sq = combined_param_state['max_exp_avg_sq'] + + combined_param_state['step'] += 1 + bias_correction1 = 1 - beta1 ** combined_param_state['step'] + bias_correction2 = 1 - beta2 ** combined_param_state['step'] + + # Decay the first and second moment running average coefficient + exp_avg.mul_(beta1).add_(combined_grad, alpha=1 - beta1) + exp_avg_sq.mul_(beta2).addcmul_(combined_grad, combined_grad, value=1 - beta2) + if amsgrad: + # Maintains the maximum of all 2nd moment running avg. till now + torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) + # Use the max. for normalizing running avg. of gradient + denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) + else: + denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) + + step_size = group['lr'] / bias_correction1 + + combined_param.addcdiv_(exp_avg, denom, value=-step_size) + + @torch.no_grad() + def step(self, closure=None): + if not hasattr(self, "_amp_stash"): + raise RuntimeError('apex.optimizers.NpuFusedAdamW should be used with AMP.') + + self._check_already_combined_params_and_grads() + # combine params and grads first + self._combine_params_and_grads_by_group() + # then combine param states + self._combine_param_states_by_group() + + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + + for i, _ in enumerate(self.param_groups): + self._group_step(i) + + return loss diff --git a/PyTorch/contrib/cv/classification/DnCNN/.keep b/PyTorch/contrib/cv/classification/DnCNN/.keep old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/Dockerfile b/PyTorch/contrib/cv/classification/DnCNN/Dockerfile old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/README.md b/PyTorch/contrib/cv/classification/DnCNN/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/dataset.py b/PyTorch/contrib/cv/classification/DnCNN/dataset.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/demo.py b/PyTorch/contrib/cv/classification/DnCNN/demo.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/docker_start.sh b/PyTorch/contrib/cv/classification/DnCNN/docker_start.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/evalOnePic.py b/PyTorch/contrib/cv/classification/DnCNN/evalOnePic.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/models.py b/PyTorch/contrib/cv/classification/DnCNN/models.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/modelzoo_level.txt b/PyTorch/contrib/cv/classification/DnCNN/modelzoo_level.txt old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/pth2onnx.py b/PyTorch/contrib/cv/classification/DnCNN/pth2onnx.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/requirements.txt b/PyTorch/contrib/cv/classification/DnCNN/requirements.txt old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/scripts/demo.sh b/PyTorch/contrib/cv/classification/DnCNN/scripts/demo.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/scripts/eval.sh b/PyTorch/contrib/cv/classification/DnCNN/scripts/eval.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/scripts/pth2onnx.sh b/PyTorch/contrib/cv/classification/DnCNN/scripts/pth2onnx.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/scripts/set_npu_env.sh b/PyTorch/contrib/cv/classification/DnCNN/scripts/set_npu_env.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/scripts/train_1p.sh b/PyTorch/contrib/cv/classification/DnCNN/scripts/train_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/scripts/train_8p.sh b/PyTorch/contrib/cv/classification/DnCNN/scripts/train_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/test/demo.sh b/PyTorch/contrib/cv/classification/DnCNN/test/demo.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/test/pth2onnx.sh b/PyTorch/contrib/cv/classification/DnCNN/test/pth2onnx.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/test/train_eval_8p.sh b/PyTorch/contrib/cv/classification/DnCNN/test/train_eval_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/DnCNN/test/train_full_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/DnCNN/test/train_full_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/DnCNN/test/train_performance_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/DnCNN/test/train_performance_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/train_1p.py b/PyTorch/contrib/cv/classification/DnCNN/train_1p.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/train_8p.py b/PyTorch/contrib/cv/classification/DnCNN/train_8p.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/DnCNN/utils.py b/PyTorch/contrib/cv/classification/DnCNN/utils.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/modelzoo_level.txt b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/ImageNet_val_split.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/ImageNet_val_split.py index 60d468b73e666df16329511c05fec52b730c8fca..31376e467a15774fb37fc01bad22e8e2b9e66796 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/ImageNet_val_split.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/ImageNet_val_split.py @@ -1,61 +1,61 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import os -import scipy -import shutil - - -def move_valimg(val_dir='./val', devkit_dir='./ILSVRC2012_devkit_t12'): - """ - move valimg to correspongding folders. - val_id(start from 1) -> ILSVRC_ID(start from 1) -> WIND - organize like: - /val - /n01440764 - images - /n01443537 - images - ..... - """ - # load synset, val ground truth and val images list - synset = scipy.io.loadmat(os.path.join(devkit_dir, 'data', 'meta.mat')) - - ground_truth = open(os.path.join(devkit_dir, 'data', 'ILSVRC2012_validation_ground_truth.txt')) - lines = ground_truth.readlines() - labels = [int(line[:-1]) for line in lines] - - root, _, filenames = next(os.walk(val_dir)) - for filename in filenames: - # val image name -> ILSVRC ID -> WIND - val_id = int(filename.split('.')[0].split('_')[-1]) - ILSVRC_ID = labels[val_id-1] - WIND = synset['synsets'][ILSVRC_ID-1][0][1][0] - print("val_id:%d, ILSVRC_ID:%d, WIND:%s" % (val_id, ILSVRC_ID, WIND)) - - # move val images - output_dir = os.path.join(root, WIND) - if os.path.isdir(output_dir): - pass - else: - os.mkdir(output_dir) - shutil.move(os.path.join(root, filename), os.path.join(output_dir, filename)) - -def main(val_path, devkit_path): - move_valimg(val_path, devkit_path) - -if __name__ == '__main__': - val_path = sys.argv[1] - devkit_path = sys.argv[2] +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import os +import scipy +import shutil + + +def move_valimg(val_dir='./val', devkit_dir='./ILSVRC2012_devkit_t12'): + """ + move valimg to correspongding folders. + val_id(start from 1) -> ILSVRC_ID(start from 1) -> WIND + organize like: + /val + /n01440764 + images + /n01443537 + images + ..... + """ + # load synset, val ground truth and val images list + synset = scipy.io.loadmat(os.path.join(devkit_dir, 'data', 'meta.mat')) + + ground_truth = open(os.path.join(devkit_dir, 'data', 'ILSVRC2012_validation_ground_truth.txt')) + lines = ground_truth.readlines() + labels = [int(line[:-1]) for line in lines] + + root, _, filenames = next(os.walk(val_dir)) + for filename in filenames: + # val image name -> ILSVRC ID -> WIND + val_id = int(filename.split('.')[0].split('_')[-1]) + ILSVRC_ID = labels[val_id-1] + WIND = synset['synsets'][ILSVRC_ID-1][0][1][0] + print("val_id:%d, ILSVRC_ID:%d, WIND:%s" % (val_id, ILSVRC_ID, WIND)) + + # move val images + output_dir = os.path.join(root, WIND) + if os.path.isdir(output_dir): + pass + else: + os.mkdir(output_dir) + shutil.move(os.path.join(root, filename), os.path.join(output_dir, filename)) + +def main(val_path, devkit_path): + move_valimg(val_path, devkit_path) + +if __name__ == '__main__': + val_path = sys.argv[1] + devkit_path = sys.argv[2] main(val_path, devkit_path) \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/README.md b/PyTorch/contrib/cv/classification/EfficientNet-B1/README.md index 007a91b9f417fa4bd92d7adbe7bd7c0c862c6580..065d061eb2833c61e3cb58dbdacfcd54d9a2eb0c 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/README.md +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/README.md @@ -1,79 +1,79 @@ -# EfficientNet-B1 - -This implements training of Efficientnet-B1 on the ImageNet dataset, mainly modified from [pycls](https://github.com/facebookresearch/pycls). - -## EfficientNet-B1 Detail - -For details, see[pycls](https://github.com/facebookresearch/pycls). - - -## Requirements - -- Install PyTorch ([pytorch.org](http://pytorch.org)) -- apt install bc -- python3 -m pip install --upgrade Pillow -- git clone https://github.com/facebookresearch/pycls -- pip install -r requirements.txt -- Download the ImageNet2012 dataset - - train set:138GB - val set:6.3GB - ILSVRC2012_devkit_t12:2.45MB - - - Then, and move validation images to labeled subfolders, using ImageNet_val_split.py need download imageNet val and ILSVRC2012_devkit_t12 - - ```python - python3.7 ImageNet_val_split.py ./val ./ILSVRC2012_devkit_t12 - ``` - ``` - move valimg to correspongding folders. - official download the organize like: - /val - images - images - ...... - after the move the organize like: - - /val - /n01440764 - images - /n01443537 - images - ..... - ``` -## Training - -To train a model, run scripts with the desired model architecture and the path to the ImageNet dataset: - -```bash -# 1p training 1p -bash test/train_full_1p.sh --data_path=imageNet_root_path - -# 8p training 8p -bash test/train_full_8p.sh --data_path=imageNet_root_path - -# To ONNX -python3.7 Efficient-B1_pth2onnx.py ./Efficient-b1.onnx - -# eval default 8p, should support 1p -bash test/train_eval_8p.sh --data_path=imageNet_root_path - -# test performer -bash test/train_performance_1p.sh --data_path=imageNet_root_path -bash test/train_performance_8p.sh --data_path=imageNet_root_path - -# online inference demo -python3.7.5 demo.py - -``` - - -## EfficientNet-B1 training result - -| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | -| :----: | :--: | :------: | :----: | :------: | -| - | 451 | 1 | 100 | O2 | -| 74.445 | 2073 | 8 | 100 | O2 | - -FPS = BatchSize * num_devices / time_avg - +# EfficientNet-B1 + +This implements training of Efficientnet-B1 on the ImageNet dataset, mainly modified from [pycls](https://github.com/facebookresearch/pycls). + +## EfficientNet-B1 Detail + +For details, see[pycls](https://github.com/facebookresearch/pycls). + + +## Requirements + +- Install PyTorch ([pytorch.org](http://pytorch.org)) +- apt install bc +- python3 -m pip install --upgrade Pillow +- git clone https://github.com/facebookresearch/pycls +- pip install -r requirements.txt +- Download the ImageNet2012 dataset + + train set:138GB + val set:6.3GB + ILSVRC2012_devkit_t12:2.45MB + + - Then, and move validation images to labeled subfolders, using ImageNet_val_split.py need download imageNet val and ILSVRC2012_devkit_t12 + + ```python + python3.7 ImageNet_val_split.py ./val ./ILSVRC2012_devkit_t12 + ``` + ``` + move valimg to correspongding folders. + official download the organize like: + /val + images + images + ...... + after the move the organize like: + + /val + /n01440764 + images + /n01443537 + images + ..... + ``` +## Training + +To train a model, run scripts with the desired model architecture and the path to the ImageNet dataset: + +```bash +# 1p training 1p +bash test/train_full_1p.sh --data_path=imageNet_root_path + +# 8p training 8p +bash test/train_full_8p.sh --data_path=imageNet_root_path + +# To ONNX +python3.7 Efficient-B1_pth2onnx.py ./Efficient-b1.onnx + +# eval default 8p, should support 1p +bash test/train_eval_8p.sh --data_path=imageNet_root_path + +# test performer +bash test/train_performance_1p.sh --data_path=imageNet_root_path +bash test/train_performance_8p.sh --data_path=imageNet_root_path + +# online inference demo +python3.7.5 demo.py + +``` + + +## EfficientNet-B1 training result + +| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | +| :----: | :--: | :------: | :----: | :------: | +| - | 451 | 1 | 100 | O2 | +| 74.445 | 2073 | 8 | 100 | O2 | + +FPS = BatchSize * num_devices / time_avg + diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/anynet/R-110_nds_1gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/anynet/R-110_nds_1gpu.yaml index 74cdc86425e232c226b4bccf4b03f4531f5977f0..6eadd72f4373320d690aecb3460a2f71c1fcd449 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/anynet/R-110_nds_1gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/anynet/R-110_nds_1gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 10 -ANYNET: - STEM_TYPE: res_stem_cifar - STEM_W: 16 - BLOCK_TYPE: res_basic_block - DEPTHS: [18, 18, 18] - WIDTHS: [16, 32, 64] - STRIDES: [1, 2, 2] -BN: - USE_PRECISE_STATS: True - NUM_SAMPLES_PRECISE: 1024 -OPTIM: - BASE_LR: 0.1 - LR_POLICY: cos - MAX_EPOCH: 200 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0005 -TRAIN: - DATASET: cifar10 - SPLIT: train - BATCH_SIZE: 128 - IM_SIZE: 32 -TEST: - DATASET: cifar10 - SPLIT: test - BATCH_SIZE: 200 - IM_SIZE: 32 -NUM_GPUS: 1 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: False -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 10 +ANYNET: + STEM_TYPE: res_stem_cifar + STEM_W: 16 + BLOCK_TYPE: res_basic_block + DEPTHS: [18, 18, 18] + WIDTHS: [16, 32, 64] + STRIDES: [1, 2, 2] +BN: + USE_PRECISE_STATS: True + NUM_SAMPLES_PRECISE: 1024 +OPTIM: + BASE_LR: 0.1 + LR_POLICY: cos + MAX_EPOCH: 200 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0005 +TRAIN: + DATASET: cifar10 + SPLIT: train + BATCH_SIZE: 128 + IM_SIZE: 32 +TEST: + DATASET: cifar10 + SPLIT: test + BATCH_SIZE: 200 + IM_SIZE: 32 +NUM_GPUS: 1 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: False +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/anynet/R-56_nds_1gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/anynet/R-56_nds_1gpu.yaml index f03502a530fe8be9d545633f16e63c4acd0cc17b..1052cf8da16be2a4561f07c9e946ff82c07bbca1 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/anynet/R-56_nds_1gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/anynet/R-56_nds_1gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 10 -ANYNET: - STEM_TYPE: res_stem_cifar - STEM_W: 16 - BLOCK_TYPE: res_basic_block - DEPTHS: [9, 9, 9] - WIDTHS: [16, 32, 64] - STRIDES: [1, 2, 2] -BN: - USE_PRECISE_STATS: True - NUM_SAMPLES_PRECISE: 1024 -OPTIM: - BASE_LR: 0.1 - LR_POLICY: cos - MAX_EPOCH: 200 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0005 -TRAIN: - DATASET: cifar10 - SPLIT: train - BATCH_SIZE: 128 - IM_SIZE: 32 -TEST: - DATASET: cifar10 - SPLIT: test - BATCH_SIZE: 200 - IM_SIZE: 32 -NUM_GPUS: 1 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: False -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 10 +ANYNET: + STEM_TYPE: res_stem_cifar + STEM_W: 16 + BLOCK_TYPE: res_basic_block + DEPTHS: [9, 9, 9] + WIDTHS: [16, 32, 64] + STRIDES: [1, 2, 2] +BN: + USE_PRECISE_STATS: True + NUM_SAMPLES_PRECISE: 1024 +OPTIM: + BASE_LR: 0.1 + LR_POLICY: cos + MAX_EPOCH: 200 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0005 +TRAIN: + DATASET: cifar10 + SPLIT: train + BATCH_SIZE: 128 + IM_SIZE: 32 +TEST: + DATASET: cifar10 + SPLIT: test + BATCH_SIZE: 200 + IM_SIZE: 32 +NUM_GPUS: 1 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: False +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/anynet/V-56_nds_1gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/anynet/V-56_nds_1gpu.yaml index f360b943e2dd501d5cf0b36af4362e6e5996d173..615da0ba4e90b6f7474ee481eeb324b923f00bfa 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/anynet/V-56_nds_1gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/anynet/V-56_nds_1gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 10 -ANYNET: - STEM_TYPE: res_stem_cifar - STEM_W: 16 - BLOCK_TYPE: vanilla_block - DEPTHS: [9, 9, 9] - WIDTHS: [16, 32, 64] - STRIDES: [1, 2, 2] -BN: - USE_PRECISE_STATS: True - NUM_SAMPLES_PRECISE: 1024 -OPTIM: - BASE_LR: 0.1 - LR_POLICY: cos - MAX_EPOCH: 200 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0005 -TRAIN: - DATASET: cifar10 - SPLIT: train - BATCH_SIZE: 128 - IM_SIZE: 32 -TEST: - DATASET: cifar10 - SPLIT: test - BATCH_SIZE: 200 - IM_SIZE: 32 -NUM_GPUS: 1 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: False -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 10 +ANYNET: + STEM_TYPE: res_stem_cifar + STEM_W: 16 + BLOCK_TYPE: vanilla_block + DEPTHS: [9, 9, 9] + WIDTHS: [16, 32, 64] + STRIDES: [1, 2, 2] +BN: + USE_PRECISE_STATS: True + NUM_SAMPLES_PRECISE: 1024 +OPTIM: + BASE_LR: 0.1 + LR_POLICY: cos + MAX_EPOCH: 200 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0005 +TRAIN: + DATASET: cifar10 + SPLIT: train + BATCH_SIZE: 128 + IM_SIZE: 32 +TEST: + DATASET: cifar10 + SPLIT: test + BATCH_SIZE: 200 + IM_SIZE: 32 +NUM_GPUS: 1 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: False +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/resnet/R-110_nds_1gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/resnet/R-110_nds_1gpu.yaml index b9ba81d33afa469e22de2c4fa78e1a62572fd329..50948cbc4da8f60f493318d44a2f24fa008f9ff2 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/resnet/R-110_nds_1gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/resnet/R-110_nds_1gpu.yaml @@ -1,32 +1,32 @@ -MODEL: - TYPE: resnet - DEPTH: 110 - NUM_CLASSES: 10 -RESNET: - TRANS_FUN: basic_transform -BN: - USE_PRECISE_STATS: True - NUM_SAMPLES_PRECISE: 1024 -OPTIM: - BASE_LR: 0.1 - LR_POLICY: cos - MAX_EPOCH: 200 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0005 -TRAIN: - DATASET: cifar10 - SPLIT: train - BATCH_SIZE: 128 - IM_SIZE: 32 -TEST: - DATASET: cifar10 - SPLIT: test - BATCH_SIZE: 200 - IM_SIZE: 32 -NUM_GPUS: 1 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: False -OUT_DIR: . +MODEL: + TYPE: resnet + DEPTH: 110 + NUM_CLASSES: 10 +RESNET: + TRANS_FUN: basic_transform +BN: + USE_PRECISE_STATS: True + NUM_SAMPLES_PRECISE: 1024 +OPTIM: + BASE_LR: 0.1 + LR_POLICY: cos + MAX_EPOCH: 200 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0005 +TRAIN: + DATASET: cifar10 + SPLIT: train + BATCH_SIZE: 128 + IM_SIZE: 32 +TEST: + DATASET: cifar10 + SPLIT: test + BATCH_SIZE: 200 + IM_SIZE: 32 +NUM_GPUS: 1 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: False +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/resnet/R-56_nds_1gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/resnet/R-56_nds_1gpu.yaml index 8867c9ec3863dd15bf60cbd82160b73e27c4c666..afcf4901cf25efe929b75e9795c85a061680bb6e 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/resnet/R-56_nds_1gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/cifar/resnet/R-56_nds_1gpu.yaml @@ -1,32 +1,32 @@ -MODEL: - TYPE: resnet - DEPTH: 56 - NUM_CLASSES: 10 -RESNET: - TRANS_FUN: basic_transform -BN: - USE_PRECISE_STATS: True - NUM_SAMPLES_PRECISE: 1024 -OPTIM: - BASE_LR: 0.1 - LR_POLICY: cos - MAX_EPOCH: 200 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0005 -TRAIN: - DATASET: cifar10 - SPLIT: train - BATCH_SIZE: 128 - IM_SIZE: 32 -TEST: - DATASET: cifar10 - SPLIT: test - BATCH_SIZE: 200 - IM_SIZE: 32 -NUM_GPUS: 1 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: False -OUT_DIR: . +MODEL: + TYPE: resnet + DEPTH: 56 + NUM_CLASSES: 10 +RESNET: + TRANS_FUN: basic_transform +BN: + USE_PRECISE_STATS: True + NUM_SAMPLES_PRECISE: 1024 +OPTIM: + BASE_LR: 0.1 + LR_POLICY: cos + MAX_EPOCH: 200 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0005 +TRAIN: + DATASET: cifar10 + SPLIT: train + BATCH_SIZE: 128 + IM_SIZE: 32 +TEST: + DATASET: cifar10 + SPLIT: test + BATCH_SIZE: 200 + IM_SIZE: 32 +NUM_GPUS: 1 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: False +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-101-1x64d_step_1gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-101-1x64d_step_1gpu.yaml index cd102977a9c7247f7da7237feb2ab6e3545c45e7..a1c05a9120138e3c1364cca8586adfa270348158 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-101-1x64d_step_1gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-101-1x64d_step_1gpu.yaml @@ -1,39 +1,39 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 1000 -ANYNET: - STEM_TYPE: res_stem_in - STEM_W: 64 - BLOCK_TYPE: res_bottleneck_block - DEPTHS: [3, 4, 23, 3] - WIDTHS: [256, 512, 1024, 2048] - STRIDES: [1, 2, 2, 2] - BOT_MULS: [0.25, 0.25, 0.25, 0.25] - GROUP_WS: [64, 128, 256, 512] -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.0125 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 32 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 25 - IM_SIZE: 256 -NUM_GPUS: 1 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 1000 +ANYNET: + STEM_TYPE: res_stem_in + STEM_W: 64 + BLOCK_TYPE: res_bottleneck_block + DEPTHS: [3, 4, 23, 3] + WIDTHS: [256, 512, 1024, 2048] + STRIDES: [1, 2, 2, 2] + BOT_MULS: [0.25, 0.25, 0.25, 0.25] + GROUP_WS: [64, 128, 256, 512] +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.0125 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 32 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 25 + IM_SIZE: 256 +NUM_GPUS: 1 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-101-1x64d_step_2gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-101-1x64d_step_2gpu.yaml index e372857d2e312963701b27510e2aa09d12c23716..3ce5a54347e8f33209597cc1c175b5c49e5e250c 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-101-1x64d_step_2gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-101-1x64d_step_2gpu.yaml @@ -1,39 +1,39 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 1000 -ANYNET: - STEM_TYPE: res_stem_in - STEM_W: 64 - BLOCK_TYPE: res_bottleneck_block - DEPTHS: [3, 4, 23, 3] - WIDTHS: [256, 512, 1024, 2048] - STRIDES: [1, 2, 2, 2] - BOT_MULS: [0.25, 0.25, 0.25, 0.25] - GROUP_WS: [64, 128, 256, 512] -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.025 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 64 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 50 - IM_SIZE: 256 -NUM_GPUS: 2 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 1000 +ANYNET: + STEM_TYPE: res_stem_in + STEM_W: 64 + BLOCK_TYPE: res_bottleneck_block + DEPTHS: [3, 4, 23, 3] + WIDTHS: [256, 512, 1024, 2048] + STRIDES: [1, 2, 2, 2] + BOT_MULS: [0.25, 0.25, 0.25, 0.25] + GROUP_WS: [64, 128, 256, 512] +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.025 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 64 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 50 + IM_SIZE: 256 +NUM_GPUS: 2 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-101-1x64d_step_8gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-101-1x64d_step_8gpu.yaml index bf6bfa173f592b2f236839601f3f177f2be4782d..688dd1dc3e5e0b8fd33d7d11bca20a4b1a34f3ff 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-101-1x64d_step_8gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-101-1x64d_step_8gpu.yaml @@ -1,39 +1,39 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 1000 -ANYNET: - STEM_TYPE: res_stem_in - STEM_W: 64 - BLOCK_TYPE: res_bottleneck_block - DEPTHS: [3, 4, 23, 3] - WIDTHS: [256, 512, 1024, 2048] - STRIDES: [1, 2, 2, 2] - BOT_MULS: [0.25, 0.25, 0.25, 0.25] - GROUP_WS: [64, 128, 256, 512] -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.1 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 256 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 200 - IM_SIZE: 256 -NUM_GPUS: 8 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 1000 +ANYNET: + STEM_TYPE: res_stem_in + STEM_W: 64 + BLOCK_TYPE: res_bottleneck_block + DEPTHS: [3, 4, 23, 3] + WIDTHS: [256, 512, 1024, 2048] + STRIDES: [1, 2, 2, 2] + BOT_MULS: [0.25, 0.25, 0.25, 0.25] + GROUP_WS: [64, 128, 256, 512] +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.1 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 256 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 200 + IM_SIZE: 256 +NUM_GPUS: 8 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-50-1x64d_step_1gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-50-1x64d_step_1gpu.yaml index 993c43b5f4d28a5fd2772aeeccd99c022d32e6b9..a43a96606da528729c9a9d4a7c4b09daf7be0c40 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-50-1x64d_step_1gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-50-1x64d_step_1gpu.yaml @@ -1,39 +1,39 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 1000 -ANYNET: - STEM_TYPE: res_stem_in - STEM_W: 64 - BLOCK_TYPE: res_bottleneck_block - DEPTHS: [3, 4, 6, 3] - WIDTHS: [256, 512, 1024, 2048] - STRIDES: [1, 2, 2, 2] - BOT_MULS: [0.25, 0.25, 0.25, 0.25] - GROUP_WS: [64, 128, 256, 512] -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.0125 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 32 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 25 - IM_SIZE: 256 -NUM_GPUS: 1 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 1000 +ANYNET: + STEM_TYPE: res_stem_in + STEM_W: 64 + BLOCK_TYPE: res_bottleneck_block + DEPTHS: [3, 4, 6, 3] + WIDTHS: [256, 512, 1024, 2048] + STRIDES: [1, 2, 2, 2] + BOT_MULS: [0.25, 0.25, 0.25, 0.25] + GROUP_WS: [64, 128, 256, 512] +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.0125 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 32 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 25 + IM_SIZE: 256 +NUM_GPUS: 1 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-50-1x64d_step_2gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-50-1x64d_step_2gpu.yaml index 428e4cdb5d873e5e356e67040aca60b290ad0d0e..064992f3a362d22f8638305602cb2e3a4335355c 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-50-1x64d_step_2gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-50-1x64d_step_2gpu.yaml @@ -1,39 +1,39 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 1000 -ANYNET: - STEM_TYPE: res_stem_in - STEM_W: 64 - BLOCK_TYPE: res_bottleneck_block - DEPTHS: [3, 4, 6, 3] - WIDTHS: [256, 512, 1024, 2048] - STRIDES: [1, 2, 2, 2] - BOT_MULS: [0.25, 0.25, 0.25, 0.25] - GROUP_WS: [64, 128, 256, 512] -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.025 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 64 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 50 - IM_SIZE: 256 -NUM_GPUS: 2 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 1000 +ANYNET: + STEM_TYPE: res_stem_in + STEM_W: 64 + BLOCK_TYPE: res_bottleneck_block + DEPTHS: [3, 4, 6, 3] + WIDTHS: [256, 512, 1024, 2048] + STRIDES: [1, 2, 2, 2] + BOT_MULS: [0.25, 0.25, 0.25, 0.25] + GROUP_WS: [64, 128, 256, 512] +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.025 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 64 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 50 + IM_SIZE: 256 +NUM_GPUS: 2 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-50-1x64d_step_8gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-50-1x64d_step_8gpu.yaml index adae774a7400a44c8b41789b910c39ce5448f8f6..abe4b8c9ee2d1a96d2c4a61dcc453702afc6228c 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-50-1x64d_step_8gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/R-50-1x64d_step_8gpu.yaml @@ -1,39 +1,39 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 1000 -ANYNET: - STEM_TYPE: res_stem_in - STEM_W: 64 - BLOCK_TYPE: res_bottleneck_block - DEPTHS: [3, 4, 6, 3] - WIDTHS: [256, 512, 1024, 2048] - STRIDES: [1, 2, 2, 2] - BOT_MULS: [0.25, 0.25, 0.25, 0.25] - GROUP_WS: [64, 128, 256, 512] -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.1 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 256 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 200 - IM_SIZE: 256 -NUM_GPUS: 8 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 1000 +ANYNET: + STEM_TYPE: res_stem_in + STEM_W: 64 + BLOCK_TYPE: res_bottleneck_block + DEPTHS: [3, 4, 6, 3] + WIDTHS: [256, 512, 1024, 2048] + STRIDES: [1, 2, 2, 2] + BOT_MULS: [0.25, 0.25, 0.25, 0.25] + GROUP_WS: [64, 128, 256, 512] +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.1 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 256 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 200 + IM_SIZE: 256 +NUM_GPUS: 8 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-101-32x4d_step_1gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-101-32x4d_step_1gpu.yaml index 845ea5fe30cc70029669b65d6afc7db2e39d20cb..26b7b1f64e10f8d479c8024a8a0220f8412ba092 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-101-32x4d_step_1gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-101-32x4d_step_1gpu.yaml @@ -1,39 +1,39 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 1000 -ANYNET: - STEM_TYPE: res_stem_in - STEM_W: 64 - BLOCK_TYPE: res_bottleneck_block - DEPTHS: [3, 4, 23, 3] - WIDTHS: [256, 512, 1024, 2048] - STRIDES: [1, 2, 2, 2] - BOT_MULS: [0.5, 0.5, 0.5, 0.5] - GROUP_WS: [4, 8, 16, 32] -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.0125 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 32 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 25 - IM_SIZE: 256 -NUM_GPUS: 1 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 1000 +ANYNET: + STEM_TYPE: res_stem_in + STEM_W: 64 + BLOCK_TYPE: res_bottleneck_block + DEPTHS: [3, 4, 23, 3] + WIDTHS: [256, 512, 1024, 2048] + STRIDES: [1, 2, 2, 2] + BOT_MULS: [0.5, 0.5, 0.5, 0.5] + GROUP_WS: [4, 8, 16, 32] +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.0125 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 32 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 25 + IM_SIZE: 256 +NUM_GPUS: 1 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-101-32x4d_step_2gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-101-32x4d_step_2gpu.yaml index c4e15b8dfce21ef578cac98389bc3502c7c1d0df..317b4e88e2a57b9b140a67f6b87cc37818c1e172 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-101-32x4d_step_2gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-101-32x4d_step_2gpu.yaml @@ -1,39 +1,39 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 1000 -ANYNET: - STEM_TYPE: res_stem_in - STEM_W: 64 - BLOCK_TYPE: res_bottleneck_block - DEPTHS: [3, 4, 23, 3] - WIDTHS: [256, 512, 1024, 2048] - STRIDES: [1, 2, 2, 2] - BOT_MULS: [0.5, 0.5, 0.5, 0.5] - GROUP_WS: [4, 8, 16, 32] -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.025 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 64 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 50 - IM_SIZE: 256 -NUM_GPUS: 2 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 1000 +ANYNET: + STEM_TYPE: res_stem_in + STEM_W: 64 + BLOCK_TYPE: res_bottleneck_block + DEPTHS: [3, 4, 23, 3] + WIDTHS: [256, 512, 1024, 2048] + STRIDES: [1, 2, 2, 2] + BOT_MULS: [0.5, 0.5, 0.5, 0.5] + GROUP_WS: [4, 8, 16, 32] +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.025 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 64 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 50 + IM_SIZE: 256 +NUM_GPUS: 2 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-101-32x4d_step_8gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-101-32x4d_step_8gpu.yaml index 8969c2270cedbe3be27faf1163b52d601404f8fc..c4c0ff040b315e034e9b13dedafec704be33f198 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-101-32x4d_step_8gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-101-32x4d_step_8gpu.yaml @@ -1,39 +1,39 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 1000 -ANYNET: - STEM_TYPE: res_stem_in - STEM_W: 64 - BLOCK_TYPE: res_bottleneck_block - DEPTHS: [3, 4, 23, 3] - WIDTHS: [256, 512, 1024, 2048] - STRIDES: [1, 2, 2, 2] - BOT_MULS: [0.5, 0.5, 0.5, 0.5] - GROUP_WS: [4, 8, 16, 32] -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.1 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 256 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 200 - IM_SIZE: 256 -NUM_GPUS: 8 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 1000 +ANYNET: + STEM_TYPE: res_stem_in + STEM_W: 64 + BLOCK_TYPE: res_bottleneck_block + DEPTHS: [3, 4, 23, 3] + WIDTHS: [256, 512, 1024, 2048] + STRIDES: [1, 2, 2, 2] + BOT_MULS: [0.5, 0.5, 0.5, 0.5] + GROUP_WS: [4, 8, 16, 32] +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.1 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 256 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 200 + IM_SIZE: 256 +NUM_GPUS: 8 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-50-32x4d_step_1gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-50-32x4d_step_1gpu.yaml index 042570a12e307ec75824c6fa6e43f29fc41df050..2ad42223567bdd0f20a4290b471a1fed30fea940 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-50-32x4d_step_1gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-50-32x4d_step_1gpu.yaml @@ -1,39 +1,39 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 1000 -ANYNET: - STEM_TYPE: res_stem_in - STEM_W: 64 - BLOCK_TYPE: res_bottleneck_block - DEPTHS: [3, 4, 6, 3] - WIDTHS: [256, 512, 1024, 2048] - STRIDES: [1, 2, 2, 2] - BOT_MULS: [0.5, 0.5, 0.5, 0.5] - GROUP_WS: [4, 8, 16, 32] -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.0125 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 32 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 25 - IM_SIZE: 256 -NUM_GPUS: 1 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 1000 +ANYNET: + STEM_TYPE: res_stem_in + STEM_W: 64 + BLOCK_TYPE: res_bottleneck_block + DEPTHS: [3, 4, 6, 3] + WIDTHS: [256, 512, 1024, 2048] + STRIDES: [1, 2, 2, 2] + BOT_MULS: [0.5, 0.5, 0.5, 0.5] + GROUP_WS: [4, 8, 16, 32] +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.0125 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 32 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 25 + IM_SIZE: 256 +NUM_GPUS: 1 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-50-32x4d_step_2gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-50-32x4d_step_2gpu.yaml index df1123c51d5ca9db8bbe0e41ca527b8976c533bf..a899bb9513d9efeb779131e625647a2ace01231d 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-50-32x4d_step_2gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-50-32x4d_step_2gpu.yaml @@ -1,39 +1,39 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 1000 -ANYNET: - STEM_TYPE: res_stem_in - STEM_W: 64 - BLOCK_TYPE: res_bottleneck_block - DEPTHS: [3, 4, 6, 3] - WIDTHS: [256, 512, 1024, 2048] - STRIDES: [1, 2, 2, 2] - BOT_MULS: [0.5, 0.5, 0.5, 0.5] - GROUP_WS: [4, 8, 16, 32] -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.025 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 64 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 50 - IM_SIZE: 256 -NUM_GPUS: 2 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 1000 +ANYNET: + STEM_TYPE: res_stem_in + STEM_W: 64 + BLOCK_TYPE: res_bottleneck_block + DEPTHS: [3, 4, 6, 3] + WIDTHS: [256, 512, 1024, 2048] + STRIDES: [1, 2, 2, 2] + BOT_MULS: [0.5, 0.5, 0.5, 0.5] + GROUP_WS: [4, 8, 16, 32] +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.025 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 64 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 50 + IM_SIZE: 256 +NUM_GPUS: 2 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-50-32x4d_step_8gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-50-32x4d_step_8gpu.yaml index 9496b464c59c424a4e934d5470b979e367baccef..9746fac79a52a97c6f5227faaef45a6cd8f7f4f0 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-50-32x4d_step_8gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/anynet/X-50-32x4d_step_8gpu.yaml @@ -1,39 +1,39 @@ -MODEL: - TYPE: anynet - NUM_CLASSES: 1000 -ANYNET: - STEM_TYPE: res_stem_in - STEM_W: 64 - BLOCK_TYPE: res_bottleneck_block - DEPTHS: [3, 4, 6, 3] - WIDTHS: [256, 512, 1024, 2048] - STRIDES: [1, 2, 2, 2] - BOT_MULS: [0.5, 0.5, 0.5, 0.5] - GROUP_WS: [4, 8, 16, 32] -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.1 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 256 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 200 - IM_SIZE: 256 -NUM_GPUS: 8 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: anynet + NUM_CLASSES: 1000 +ANYNET: + STEM_TYPE: res_stem_in + STEM_W: 64 + BLOCK_TYPE: res_bottleneck_block + DEPTHS: [3, 4, 6, 3] + WIDTHS: [256, 512, 1024, 2048] + STRIDES: [1, 2, 2, 2] + BOT_MULS: [0.5, 0.5, 0.5, 0.5] + GROUP_WS: [4, 8, 16, 32] +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.1 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 256 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 200 + IM_SIZE: 256 +NUM_GPUS: 8 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-101-1x64d_step_1gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-101-1x64d_step_1gpu.yaml index f8e3c54657dfdbd8d403e65cc3bb114d66c06eac..757d69e49dd0505ebb625218d083d2c2870b618d 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-101-1x64d_step_1gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-101-1x64d_step_1gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: resnet - DEPTH: 101 - NUM_CLASSES: 1000 -RESNET: - TRANS_FUN: bottleneck_transform - NUM_GROUPS: 1 - WIDTH_PER_GROUP: 64 - STRIDE_1X1: False -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.0125 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 32 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 25 - IM_SIZE: 256 -NUM_GPUS: 1 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: resnet + DEPTH: 101 + NUM_CLASSES: 1000 +RESNET: + TRANS_FUN: bottleneck_transform + NUM_GROUPS: 1 + WIDTH_PER_GROUP: 64 + STRIDE_1X1: False +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.0125 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 32 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 25 + IM_SIZE: 256 +NUM_GPUS: 1 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-101-1x64d_step_2gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-101-1x64d_step_2gpu.yaml index c48ce042f083eca2f3e2a516d6c9d10d441d0282..e31acbc14638576c758b9f49074d9f4f899b054b 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-101-1x64d_step_2gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-101-1x64d_step_2gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: resnet - DEPTH: 101 - NUM_CLASSES: 1000 -RESNET: - TRANS_FUN: bottleneck_transform - NUM_GROUPS: 1 - WIDTH_PER_GROUP: 64 - STRIDE_1X1: False -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.025 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 64 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 50 - IM_SIZE: 256 -NUM_GPUS: 2 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: resnet + DEPTH: 101 + NUM_CLASSES: 1000 +RESNET: + TRANS_FUN: bottleneck_transform + NUM_GROUPS: 1 + WIDTH_PER_GROUP: 64 + STRIDE_1X1: False +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.025 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 64 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 50 + IM_SIZE: 256 +NUM_GPUS: 2 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-101-1x64d_step_8gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-101-1x64d_step_8gpu.yaml index 5f6cfa8394ee1f8db440ef2bcb538abeef0af73c..473a15b5fe6fc165b268e0c9e90f788f7617b1e4 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-101-1x64d_step_8gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-101-1x64d_step_8gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: resnet - DEPTH: 101 - NUM_CLASSES: 1000 -RESNET: - TRANS_FUN: bottleneck_transform - NUM_GROUPS: 1 - WIDTH_PER_GROUP: 64 - STRIDE_1X1: False -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.1 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 256 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 200 - IM_SIZE: 256 -NUM_GPUS: 8 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: resnet + DEPTH: 101 + NUM_CLASSES: 1000 +RESNET: + TRANS_FUN: bottleneck_transform + NUM_GROUPS: 1 + WIDTH_PER_GROUP: 64 + STRIDE_1X1: False +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.1 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 256 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 200 + IM_SIZE: 256 +NUM_GPUS: 8 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-50-1x64d_step_1gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-50-1x64d_step_1gpu.yaml index b1c97d7ddc7cdf6e9967a69ce0555493a8b7caba..327e5a59719752e9dace4b16d1fae544c7842c9e 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-50-1x64d_step_1gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-50-1x64d_step_1gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: resnet - DEPTH: 50 - NUM_CLASSES: 1000 -RESNET: - TRANS_FUN: bottleneck_transform - NUM_GROUPS: 1 - WIDTH_PER_GROUP: 64 - STRIDE_1X1: False -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.0125 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 32 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 25 - IM_SIZE: 256 -NUM_GPUS: 1 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: resnet + DEPTH: 50 + NUM_CLASSES: 1000 +RESNET: + TRANS_FUN: bottleneck_transform + NUM_GROUPS: 1 + WIDTH_PER_GROUP: 64 + STRIDE_1X1: False +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.0125 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 32 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 25 + IM_SIZE: 256 +NUM_GPUS: 1 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-50-1x64d_step_2gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-50-1x64d_step_2gpu.yaml index 973c3ffd4ece4bd1f6168831374c0b4f7a18b79d..1278d9c69c0a077b9eb3f3263bd87ce7d46bc468 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-50-1x64d_step_2gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-50-1x64d_step_2gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: resnet - DEPTH: 50 - NUM_CLASSES: 1000 -RESNET: - TRANS_FUN: bottleneck_transform - NUM_GROUPS: 1 - WIDTH_PER_GROUP: 64 - STRIDE_1X1: False -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.025 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 64 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 50 - IM_SIZE: 256 -NUM_GPUS: 2 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: resnet + DEPTH: 50 + NUM_CLASSES: 1000 +RESNET: + TRANS_FUN: bottleneck_transform + NUM_GROUPS: 1 + WIDTH_PER_GROUP: 64 + STRIDE_1X1: False +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.025 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 64 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 50 + IM_SIZE: 256 +NUM_GPUS: 2 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-50-1x64d_step_8gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-50-1x64d_step_8gpu.yaml index fabb4ec183de68a892b40623202801a9cdb1e5e3..e6739d7cab06f01818a3a661e7a751b4f43cd21d 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-50-1x64d_step_8gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/R-50-1x64d_step_8gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: resnet - DEPTH: 50 - NUM_CLASSES: 1000 -RESNET: - TRANS_FUN: bottleneck_transform - NUM_GROUPS: 1 - WIDTH_PER_GROUP: 64 - STRIDE_1X1: False -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.1 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 256 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 200 - IM_SIZE: 256 -NUM_GPUS: 8 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: resnet + DEPTH: 50 + NUM_CLASSES: 1000 +RESNET: + TRANS_FUN: bottleneck_transform + NUM_GROUPS: 1 + WIDTH_PER_GROUP: 64 + STRIDE_1X1: False +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.1 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 256 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 200 + IM_SIZE: 256 +NUM_GPUS: 8 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-101-32x4d_step_1gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-101-32x4d_step_1gpu.yaml index def7d1561f5d5ecd9e1af7a0cf3c9df088694052..91604235f804ea4755ac98a56510781ee3d1c85f 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-101-32x4d_step_1gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-101-32x4d_step_1gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: resnet - DEPTH: 101 - NUM_CLASSES: 1000 -RESNET: - TRANS_FUN: bottleneck_transform - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 4 - STRIDE_1X1: False -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.0125 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 32 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 25 - IM_SIZE: 256 -NUM_GPUS: 1 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: resnet + DEPTH: 101 + NUM_CLASSES: 1000 +RESNET: + TRANS_FUN: bottleneck_transform + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 4 + STRIDE_1X1: False +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.0125 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 32 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 25 + IM_SIZE: 256 +NUM_GPUS: 1 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-101-32x4d_step_2gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-101-32x4d_step_2gpu.yaml index 5b69a7b4e5d28a0f0a2b1c562223ced27bdfc250..f051c81b25470e8b52389a1db3e2baf9305a105d 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-101-32x4d_step_2gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-101-32x4d_step_2gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: resnet - DEPTH: 101 - NUM_CLASSES: 1000 -RESNET: - TRANS_FUN: bottleneck_transform - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 4 - STRIDE_1X1: False -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.025 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 64 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 50 - IM_SIZE: 256 -NUM_GPUS: 2 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: resnet + DEPTH: 101 + NUM_CLASSES: 1000 +RESNET: + TRANS_FUN: bottleneck_transform + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 4 + STRIDE_1X1: False +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.025 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 64 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 50 + IM_SIZE: 256 +NUM_GPUS: 2 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-101-32x4d_step_8gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-101-32x4d_step_8gpu.yaml index 60bcd42800929bd86469e0b27edda23d351f9578..c34fedbb03e1aaa8331b854284c1b40d221cc8b7 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-101-32x4d_step_8gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-101-32x4d_step_8gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: resnet - DEPTH: 101 - NUM_CLASSES: 1000 -RESNET: - TRANS_FUN: bottleneck_transform - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 4 - STRIDE_1X1: False -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.1 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 256 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 200 - IM_SIZE: 256 -NUM_GPUS: 8 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: resnet + DEPTH: 101 + NUM_CLASSES: 1000 +RESNET: + TRANS_FUN: bottleneck_transform + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 4 + STRIDE_1X1: False +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.1 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 256 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 200 + IM_SIZE: 256 +NUM_GPUS: 8 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-50-32x4d_step_1gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-50-32x4d_step_1gpu.yaml index a0449e8d8abbba4f94a0d29e87c61e3a1bab7fb4..130b619512cf483b3b0906b0bbd3584d641938a0 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-50-32x4d_step_1gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-50-32x4d_step_1gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: resnet - DEPTH: 50 - NUM_CLASSES: 1000 -RESNET: - TRANS_FUN: bottleneck_transform - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 4 - STRIDE_1X1: False -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.0125 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 32 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 25 - IM_SIZE: 256 -NUM_GPUS: 1 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: resnet + DEPTH: 50 + NUM_CLASSES: 1000 +RESNET: + TRANS_FUN: bottleneck_transform + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 4 + STRIDE_1X1: False +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.0125 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 32 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 25 + IM_SIZE: 256 +NUM_GPUS: 1 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-50-32x4d_step_2gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-50-32x4d_step_2gpu.yaml index 717365d00f1688bc1c5d59a0d7010a19905448e2..621d9f3a72e6e46debb31e1572f8ce186474cecf 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-50-32x4d_step_2gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-50-32x4d_step_2gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: resnet - DEPTH: 50 - NUM_CLASSES: 1000 -RESNET: - TRANS_FUN: bottleneck_transform - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 4 - STRIDE_1X1: False -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.025 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 64 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 50 - IM_SIZE: 256 -NUM_GPUS: 2 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: resnet + DEPTH: 50 + NUM_CLASSES: 1000 +RESNET: + TRANS_FUN: bottleneck_transform + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 4 + STRIDE_1X1: False +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.025 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 64 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 50 + IM_SIZE: 256 +NUM_GPUS: 2 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-50-32x4d_step_8gpu.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-50-32x4d_step_8gpu.yaml index 14cac7547c10f78ccaf3c19e6fe6c335a668242b..2a64873219a9ca75ebad4b29e420875d04805f7b 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-50-32x4d_step_8gpu.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/archive/imagenet/resnet/X-50-32x4d_step_8gpu.yaml @@ -1,36 +1,36 @@ -MODEL: - TYPE: resnet - DEPTH: 50 - NUM_CLASSES: 1000 -RESNET: - TRANS_FUN: bottleneck_transform - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 4 - STRIDE_1X1: False -BN: - ZERO_INIT_FINAL_GAMMA: True -OPTIM: - BASE_LR: 0.1 - LR_POLICY: steps - STEPS: [0, 30, 60, 90] - LR_MULT: 0.1 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - NESTEROV: True - WEIGHT_DECAY: 0.0001 -TRAIN: - DATASET: imagenet - SPLIT: train - BATCH_SIZE: 256 - IM_SIZE: 224 -TEST: - DATASET: imagenet - SPLIT: val - BATCH_SIZE: 200 - IM_SIZE: 256 -NUM_GPUS: 8 -DATA_LOADER: - NUM_WORKERS: 4 -CUDNN: - BENCHMARK: True -OUT_DIR: . +MODEL: + TYPE: resnet + DEPTH: 50 + NUM_CLASSES: 1000 +RESNET: + TRANS_FUN: bottleneck_transform + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 4 + STRIDE_1X1: False +BN: + ZERO_INIT_FINAL_GAMMA: True +OPTIM: + BASE_LR: 0.1 + LR_POLICY: steps + STEPS: [0, 30, 60, 90] + LR_MULT: 0.1 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + NESTEROV: True + WEIGHT_DECAY: 0.0001 +TRAIN: + DATASET: imagenet + SPLIT: train + BATCH_SIZE: 256 + IM_SIZE: 224 +TEST: + DATASET: imagenet + SPLIT: val + BATCH_SIZE: 200 + IM_SIZE: 256 +NUM_GPUS: 8 +DATA_LOADER: + NUM_WORKERS: 4 +CUDNN: + BENCHMARK: True +OUT_DIR: . diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_1npu_full.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_1npu_full.yaml index c8580e9cccdd7011faabb2106c8f9d40ad032ad6..973db49bdc297ba4af8d70e05cdfc5b7ceb17c38 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_1npu_full.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_1npu_full.yaml @@ -1,28 +1,28 @@ -MODEL: - TYPE: effnet - NUM_CLASSES: 1000 - ACTIVATION_FUN: silu -EN: - STEM_W: 32 - STRIDES: [1, 2, 2, 2, 1, 2, 1] - DEPTHS: [2, 3, 3, 4, 4, 5, 2] - WIDTHS: [16, 24, 40, 80, 112, 192, 320] - EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6] - KERNELS: [3, 3, 5, 3, 5, 5, 3] - HEAD_W: 1280 -OPTIM: - LR_POLICY: cos - BASE_LR: 0.4 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - WEIGHT_DECAY: 1e-5 -TRAIN: - DATASET: imagenet - IM_SIZE: 240 - BATCH_SIZE: 256 -TEST: - DATASET: imagenet - IM_SIZE: 274 - BATCH_SIZE: 200 -NUM_GPUS: 1 +MODEL: + TYPE: effnet + NUM_CLASSES: 1000 + ACTIVATION_FUN: silu +EN: + STEM_W: 32 + STRIDES: [1, 2, 2, 2, 1, 2, 1] + DEPTHS: [2, 3, 3, 4, 4, 5, 2] + WIDTHS: [16, 24, 40, 80, 112, 192, 320] + EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6] + KERNELS: [3, 3, 5, 3, 5, 5, 3] + HEAD_W: 1280 +OPTIM: + LR_POLICY: cos + BASE_LR: 0.4 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + WEIGHT_DECAY: 1e-5 +TRAIN: + DATASET: imagenet + IM_SIZE: 240 + BATCH_SIZE: 256 +TEST: + DATASET: imagenet + IM_SIZE: 274 + BATCH_SIZE: 200 +NUM_GPUS: 1 OUT_DIR: . \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_1npu_perf.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_1npu_perf.yaml index 33497cb51cec09825ecf17779e482ce770932f81..1cd1550453a8750b46c2d81ee62cde95aae888f7 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_1npu_perf.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_1npu_perf.yaml @@ -1,28 +1,28 @@ -MODEL: - TYPE: effnet - NUM_CLASSES: 1000 - ACTIVATION_FUN: silu -EN: - STEM_W: 32 - STRIDES: [1, 2, 2, 2, 1, 2, 1] - DEPTHS: [2, 3, 3, 4, 4, 5, 2] - WIDTHS: [16, 24, 40, 80, 112, 192, 320] - EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6] - KERNELS: [3, 3, 5, 3, 5, 5, 3] - HEAD_W: 1280 -OPTIM: - LR_POLICY: cos - BASE_LR: 0.4 - MAX_EPOCH: 1 - MOMENTUM: 0.9 - WEIGHT_DECAY: 1e-5 -TRAIN: - DATASET: imagenet - IM_SIZE: 240 - BATCH_SIZE: 256 -TEST: - DATASET: imagenet - IM_SIZE: 274 - BATCH_SIZE: 200 -NUM_GPUS: 1 +MODEL: + TYPE: effnet + NUM_CLASSES: 1000 + ACTIVATION_FUN: silu +EN: + STEM_W: 32 + STRIDES: [1, 2, 2, 2, 1, 2, 1] + DEPTHS: [2, 3, 3, 4, 4, 5, 2] + WIDTHS: [16, 24, 40, 80, 112, 192, 320] + EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6] + KERNELS: [3, 3, 5, 3, 5, 5, 3] + HEAD_W: 1280 +OPTIM: + LR_POLICY: cos + BASE_LR: 0.4 + MAX_EPOCH: 1 + MOMENTUM: 0.9 + WEIGHT_DECAY: 1e-5 +TRAIN: + DATASET: imagenet + IM_SIZE: 240 + BATCH_SIZE: 256 +TEST: + DATASET: imagenet + IM_SIZE: 274 + BATCH_SIZE: 200 +NUM_GPUS: 1 OUT_DIR: . \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_8npu_full.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_8npu_full.yaml index 989ace38859db0ef9047331e2ebb0e5cd808a82f..22bbe5937e28e0e1c75890b26da438f6a74a1daf 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_8npu_full.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_8npu_full.yaml @@ -1,28 +1,28 @@ -MODEL: - TYPE: effnet - NUM_CLASSES: 1000 - ACTIVATION_FUN: silu -EN: - STEM_W: 32 - STRIDES: [1, 2, 2, 2, 1, 2, 1] - DEPTHS: [2, 3, 3, 4, 4, 5, 2] - WIDTHS: [16, 24, 40, 80, 112, 192, 320] - EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6] - KERNELS: [3, 3, 5, 3, 5, 5, 3] - HEAD_W: 1280 -OPTIM: - LR_POLICY: cos - BASE_LR: 0.4 - MAX_EPOCH: 100 - MOMENTUM: 0.9 - WEIGHT_DECAY: 1e-5 -TRAIN: - DATASET: imagenet - IM_SIZE: 240 - BATCH_SIZE: 1280 -TEST: - DATASET: imagenet - IM_SIZE: 274 - BATCH_SIZE: 800 -NUM_GPUS: 8 +MODEL: + TYPE: effnet + NUM_CLASSES: 1000 + ACTIVATION_FUN: silu +EN: + STEM_W: 32 + STRIDES: [1, 2, 2, 2, 1, 2, 1] + DEPTHS: [2, 3, 3, 4, 4, 5, 2] + WIDTHS: [16, 24, 40, 80, 112, 192, 320] + EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6] + KERNELS: [3, 3, 5, 3, 5, 5, 3] + HEAD_W: 1280 +OPTIM: + LR_POLICY: cos + BASE_LR: 0.4 + MAX_EPOCH: 100 + MOMENTUM: 0.9 + WEIGHT_DECAY: 1e-5 +TRAIN: + DATASET: imagenet + IM_SIZE: 240 + BATCH_SIZE: 1280 +TEST: + DATASET: imagenet + IM_SIZE: 274 + BATCH_SIZE: 800 +NUM_GPUS: 8 OUT_DIR: . \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_8npu_perf.yaml b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_8npu_perf.yaml index 434bdc23b408510d813f447912dbe98fb2cf624d..91530e4977703ce226dac114a421d26ea793e1e2 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_8npu_perf.yaml +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/configs/dds_baselines/effnet/EN-B1_dds_8npu_perf.yaml @@ -1,28 +1,28 @@ -MODEL: - TYPE: effnet - NUM_CLASSES: 1000 - ACTIVATION_FUN: silu -EN: - STEM_W: 32 - STRIDES: [1, 2, 2, 2, 1, 2, 1] - DEPTHS: [2, 3, 3, 4, 4, 5, 2] - WIDTHS: [16, 24, 40, 80, 112, 192, 320] - EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6] - KERNELS: [3, 3, 5, 3, 5, 5, 3] - HEAD_W: 1280 -OPTIM: - LR_POLICY: cos - BASE_LR: 0.4 - MAX_EPOCH: 1 - MOMENTUM: 0.9 - WEIGHT_DECAY: 1e-5 -TRAIN: - DATASET: imagenet - IM_SIZE: 240 - BATCH_SIZE: 1280 -TEST: - DATASET: imagenet - IM_SIZE: 274 - BATCH_SIZE: 800 -NUM_GPUS: 8 +MODEL: + TYPE: effnet + NUM_CLASSES: 1000 + ACTIVATION_FUN: silu +EN: + STEM_W: 32 + STRIDES: [1, 2, 2, 2, 1, 2, 1] + DEPTHS: [2, 3, 3, 4, 4, 5, 2] + WIDTHS: [16, 24, 40, 80, 112, 192, 320] + EXP_RATIOS: [1, 6, 6, 6, 6, 6, 6] + KERNELS: [3, 3, 5, 3, 5, 5, 3] + HEAD_W: 1280 +OPTIM: + LR_POLICY: cos + BASE_LR: 0.4 + MAX_EPOCH: 1 + MOMENTUM: 0.9 + WEIGHT_DECAY: 1e-5 +TRAIN: + DATASET: imagenet + IM_SIZE: 240 + BATCH_SIZE: 1280 +TEST: + DATASET: imagenet + IM_SIZE: 274 + BATCH_SIZE: 800 +NUM_GPUS: 8 OUT_DIR: . \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/demo.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/demo.py index ffbbd4b77787faf9f39714d487dd699455da4366..dcfc3292a779d8e192e5eda5178e969bdf5e5d82 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/demo.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/demo.py @@ -1,64 +1,64 @@ -#!/usr/bin/env python3 -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import pycls.core.config as config -from pycls.core.config import cfg -import pycls.datasets.transforms as transforms -from pycls.models.effnet import EffNet - - -def build_model(): - config.merge_from_file('configs/dds_baselines/effnet/EN-B1_dds_8npu.yaml') - cfg.freeze() - model = EffNet() - checkpoint = torch.load('result/model.pyth') - model.load_state_dict(checkpoint["model_state"], False) - model.eval() - return model - - -def get_raw_data(): - from PIL import Image - from urllib.request import urlretrieve - IMAGE_URL = 'https://bbs-img.huaweicloud.com/blogs/img/thumb/1591951315139_8989_1363.png' - urlretrieve(IMAGE_URL, 'tmp.jpg') - img = Image.open("tmp.jpg") - img = img.convert('RGB') - return img - - -def pre_process(raw_data): - from torchvision import transforms - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - transforms_list = transforms.Compose([ - transforms.Resize(274), - transforms.CenterCrop(240), - transforms.ToTensor(), - normalize - ]) - input_data = transforms_list(raw_data) - return input_data.unsqueeze(0) - - -if __name__ == '__main__': - raw_data = get_raw_data() - model = build_model() - input_tensor = pre_process(raw_data) - output_tensor = model(input_tensor) - _, pred = output_tensor.topk(1, 1, True, True) +#!/usr/bin/env python3 +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import pycls.core.config as config +from pycls.core.config import cfg +import pycls.datasets.transforms as transforms +from pycls.models.effnet import EffNet + + +def build_model(): + config.merge_from_file('configs/dds_baselines/effnet/EN-B1_dds_8npu.yaml') + cfg.freeze() + model = EffNet() + checkpoint = torch.load('result/model.pyth') + model.load_state_dict(checkpoint["model_state"], False) + model.eval() + return model + + +def get_raw_data(): + from PIL import Image + from urllib.request import urlretrieve + IMAGE_URL = 'https://bbs-img.huaweicloud.com/blogs/img/thumb/1591951315139_8989_1363.png' + urlretrieve(IMAGE_URL, 'tmp.jpg') + img = Image.open("tmp.jpg") + img = img.convert('RGB') + return img + + +def pre_process(raw_data): + from torchvision import transforms + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + transforms_list = transforms.Compose([ + transforms.Resize(274), + transforms.CenterCrop(240), + transforms.ToTensor(), + normalize + ]) + input_data = transforms_list(raw_data) + return input_data.unsqueeze(0) + + +if __name__ == '__main__': + raw_data = get_raw_data() + model = build_model() + input_tensor = pre_process(raw_data) + output_tensor = model(input_tensor) + _, pred = output_tensor.topk(1, 1, True, True) print("class: ", pred[0][0].item()) \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/benchmark.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/benchmark.py index 25b78e5cad331b4db212445ff82eede950081817..cbad8985085771a741762f428dc01fc81cdc4091 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/benchmark.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/benchmark.py @@ -1,167 +1,167 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - - -"""Benchmarking functions.""" - -import pycls.core.logging as logging -import pycls.core.net as net -import pycls.datasets.loader as loader -import torch -from apex import amp -from pycls.core.config import cfg -from pycls.core.timer import Timer -import pycls.core.optimizer as optim -import torch.npu - -logger = logging.get_logger(__name__) - - -@torch.no_grad() -def compute_time_eval(model): - """Computes precise model forward test time using dummy data.""" - # Use eval mode - model.eval() - # Generate a dummy mini-batch and copy data to GPU - im_size, batch_size = cfg.TRAIN.IM_SIZE, int(cfg.TEST.BATCH_SIZE / cfg.NUM_GPUS) - #inputs = torch.zeros(batch_size, 3, im_size, im_size).cuda(non_blocking=False) - inputs = torch.zeros(batch_size, 3, im_size, im_size).npu() - inputs = inputs.to(non_blocking=False) - # Compute precise forward pass time - timer = Timer() - total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER - for cur_iter in range(total_iter): - # Reset the timers after the warmup phase - if cur_iter == cfg.PREC_TIME.WARMUP_ITER: - timer.reset() - # Forward - timer.tic() - model(inputs) - torch.npu.synchronize() - timer.toc() - return timer.average_time - - -def compute_time_train(model, loss_fun): - """Computes precise model forward + backward time using dummy data.""" - # Use train mode - model.train() - # Generate a dummy mini-batch and copy data to GPU - im_size, batch_size = cfg.TRAIN.IM_SIZE, int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS) - inputs = torch.rand(batch_size, 3, im_size, im_size).npu() - inputs = inputs.to(non_blocking=False) - labels = torch.zeros(batch_size, dtype=torch.int32) - labels_one_hot = net.smooth_one_hot_labels(labels) - labels_one_hot = labels_one_hot.npu() - labels_one_hot = labels_one_hot.to(non_blocking=False) - # Cache BatchNorm2D running stats - bns = [m for m in model.modules() if isinstance(m, torch.nn.BatchNorm2d)] - bn_stats = [[bn.running_mean.clone(), bn.running_var.clone()] for bn in bns] - # Create a GradScaler for mixed precision training - #scaler = ApexScaler() - # Compute precise forward backward pass time - fw_timer, bw_timer = Timer(), Timer() - total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER - for cur_iter in range(total_iter): - # Reset the timers after the warmup phase - if cur_iter == cfg.PREC_TIME.WARMUP_ITER: - fw_timer.reset() - bw_timer.reset() - # Forward - fw_timer.tic() - preds = model(inputs) - loss = loss_fun(preds, labels_one_hot) - torch.npu.synchronize() - fw_timer.toc() - # Backward - bw_timer.tic() - loss.backward() - torch.npu.synchronize() - bw_timer.toc() - # Restore BatchNorm2D running stats - for bn, (mean, var) in zip(bns, bn_stats): - bn.running_mean, bn.running_var = mean, var - return fw_timer.average_time, bw_timer.average_time - - -def compute_time_loader(data_loader): - """Computes loader time.""" - timer = Timer() - loader.shuffle(data_loader, 0) - data_loader_iterator = iter(data_loader) - total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER - total_iter = min(total_iter, len(data_loader)) - for cur_iter in range(total_iter): - if cur_iter == cfg.PREC_TIME.WARMUP_ITER: - timer.reset() - timer.tic() - next(data_loader_iterator) - timer.toc() - return timer.average_time - - -def compute_time_model(model, loss_fun): - """Times model.""" - logger.info("Computing model timings only...") - # Compute timings - test_fw_time = compute_time_eval(model) - train_fw_time, train_bw_time = compute_time_train(model, loss_fun) - train_fw_bw_time = train_fw_time + train_bw_time - # Output iter timing - iter_times = { - "test_fw_time": test_fw_time, - "train_fw_time": train_fw_time, - "train_bw_time": train_bw_time, - "train_fw_bw_time": train_fw_bw_time, - } - logger.info(logging.dump_log_data(iter_times, "iter_times")) - - -def compute_time_full(model, loss_fun, train_loader, test_loader): - """Times model and data loader.""" - logger.info("Computing model and loader timings...") - # Compute timings - test_fw_time = compute_time_eval(model) - train_fw_time, train_bw_time = compute_time_train(model, loss_fun) - train_fw_bw_time = train_fw_time + train_bw_time - train_loader_time = compute_time_loader(train_loader) - # Output iter timing - iter_times = { - "test_fw_time": test_fw_time, - "train_fw_time": train_fw_time, - "train_bw_time": train_bw_time, - "train_fw_bw_time": train_fw_bw_time, - "train_loader_time": train_loader_time, - } - logger.info(logging.dump_log_data(iter_times, "iter_times")) - # Output epoch timing - epoch_times = { - "test_fw_time": test_fw_time * len(test_loader), - "train_fw_time": train_fw_time * len(train_loader), - "train_bw_time": train_bw_time * len(train_loader), - "train_fw_bw_time": train_fw_bw_time * len(train_loader), - "train_loader_time": train_loader_time * len(train_loader), - } - logger.info(logging.dump_log_data(epoch_times, "epoch_times")) - # Compute data loader overhead (assuming DATA_LOADER.NUM_WORKERS>1) - overhead = max(0, train_loader_time - train_fw_bw_time) / train_fw_bw_time - logger.info("Overhead of data loader is {:.2f}%".format(overhead * 100)) +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +"""Benchmarking functions.""" + +import pycls.core.logging as logging +import pycls.core.net as net +import pycls.datasets.loader as loader +import torch +from apex import amp +from pycls.core.config import cfg +from pycls.core.timer import Timer +import pycls.core.optimizer as optim +import torch.npu + +logger = logging.get_logger(__name__) + + +@torch.no_grad() +def compute_time_eval(model): + """Computes precise model forward test time using dummy data.""" + # Use eval mode + model.eval() + # Generate a dummy mini-batch and copy data to GPU + im_size, batch_size = cfg.TRAIN.IM_SIZE, int(cfg.TEST.BATCH_SIZE / cfg.NUM_GPUS) + #inputs = torch.zeros(batch_size, 3, im_size, im_size).cuda(non_blocking=False) + inputs = torch.zeros(batch_size, 3, im_size, im_size).npu() + inputs = inputs.to(non_blocking=False) + # Compute precise forward pass time + timer = Timer() + total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER + for cur_iter in range(total_iter): + # Reset the timers after the warmup phase + if cur_iter == cfg.PREC_TIME.WARMUP_ITER: + timer.reset() + # Forward + timer.tic() + model(inputs) + torch.npu.synchronize() + timer.toc() + return timer.average_time + + +def compute_time_train(model, loss_fun): + """Computes precise model forward + backward time using dummy data.""" + # Use train mode + model.train() + # Generate a dummy mini-batch and copy data to GPU + im_size, batch_size = cfg.TRAIN.IM_SIZE, int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS) + inputs = torch.rand(batch_size, 3, im_size, im_size).npu() + inputs = inputs.to(non_blocking=False) + labels = torch.zeros(batch_size, dtype=torch.int32) + labels_one_hot = net.smooth_one_hot_labels(labels) + labels_one_hot = labels_one_hot.npu() + labels_one_hot = labels_one_hot.to(non_blocking=False) + # Cache BatchNorm2D running stats + bns = [m for m in model.modules() if isinstance(m, torch.nn.BatchNorm2d)] + bn_stats = [[bn.running_mean.clone(), bn.running_var.clone()] for bn in bns] + # Create a GradScaler for mixed precision training + #scaler = ApexScaler() + # Compute precise forward backward pass time + fw_timer, bw_timer = Timer(), Timer() + total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER + for cur_iter in range(total_iter): + # Reset the timers after the warmup phase + if cur_iter == cfg.PREC_TIME.WARMUP_ITER: + fw_timer.reset() + bw_timer.reset() + # Forward + fw_timer.tic() + preds = model(inputs) + loss = loss_fun(preds, labels_one_hot) + torch.npu.synchronize() + fw_timer.toc() + # Backward + bw_timer.tic() + loss.backward() + torch.npu.synchronize() + bw_timer.toc() + # Restore BatchNorm2D running stats + for bn, (mean, var) in zip(bns, bn_stats): + bn.running_mean, bn.running_var = mean, var + return fw_timer.average_time, bw_timer.average_time + + +def compute_time_loader(data_loader): + """Computes loader time.""" + timer = Timer() + loader.shuffle(data_loader, 0) + data_loader_iterator = iter(data_loader) + total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER + total_iter = min(total_iter, len(data_loader)) + for cur_iter in range(total_iter): + if cur_iter == cfg.PREC_TIME.WARMUP_ITER: + timer.reset() + timer.tic() + next(data_loader_iterator) + timer.toc() + return timer.average_time + + +def compute_time_model(model, loss_fun): + """Times model.""" + logger.info("Computing model timings only...") + # Compute timings + test_fw_time = compute_time_eval(model) + train_fw_time, train_bw_time = compute_time_train(model, loss_fun) + train_fw_bw_time = train_fw_time + train_bw_time + # Output iter timing + iter_times = { + "test_fw_time": test_fw_time, + "train_fw_time": train_fw_time, + "train_bw_time": train_bw_time, + "train_fw_bw_time": train_fw_bw_time, + } + logger.info(logging.dump_log_data(iter_times, "iter_times")) + + +def compute_time_full(model, loss_fun, train_loader, test_loader): + """Times model and data loader.""" + logger.info("Computing model and loader timings...") + # Compute timings + test_fw_time = compute_time_eval(model) + train_fw_time, train_bw_time = compute_time_train(model, loss_fun) + train_fw_bw_time = train_fw_time + train_bw_time + train_loader_time = compute_time_loader(train_loader) + # Output iter timing + iter_times = { + "test_fw_time": test_fw_time, + "train_fw_time": train_fw_time, + "train_bw_time": train_bw_time, + "train_fw_bw_time": train_fw_bw_time, + "train_loader_time": train_loader_time, + } + logger.info(logging.dump_log_data(iter_times, "iter_times")) + # Output epoch timing + epoch_times = { + "test_fw_time": test_fw_time * len(test_loader), + "train_fw_time": train_fw_time * len(train_loader), + "train_bw_time": train_bw_time * len(train_loader), + "train_fw_bw_time": train_fw_bw_time * len(train_loader), + "train_loader_time": train_loader_time * len(train_loader), + } + logger.info(logging.dump_log_data(epoch_times, "epoch_times")) + # Compute data loader overhead (assuming DATA_LOADER.NUM_WORKERS>1) + overhead = max(0, train_loader_time - train_fw_bw_time) / train_fw_bw_time + logger.info("Overhead of data loader is {:.2f}%".format(overhead * 100)) diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/builders.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/builders.py index 8e2688e96a50b44e16e6d8f4eb49e2c411ec303b..a40983713d02bd2e981f76fbc5f3479986ddde87 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/builders.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/builders.py @@ -1,68 +1,68 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -"""Model and loss construction functions.""" - -from pycls.core.config import cfg -from pycls.core.net import SoftCrossEntropyLoss -from pycls.models.effnet import EffNet - - -# Supported models -_models = {"effnet": EffNet} - -# Supported loss functions -_loss_funs = {"cross_entropy": SoftCrossEntropyLoss} - - -def get_model(): - """Gets the model class specified in the config.""" - err_str = "Model type '{}' not supported" - assert cfg.MODEL.TYPE in _models.keys(), err_str.format(cfg.MODEL.TYPE) - return _models[cfg.MODEL.TYPE] - - -def get_loss_fun(): - """Gets the loss function class specified in the config.""" - err_str = "Loss function type '{}' not supported" - assert cfg.MODEL.LOSS_FUN in _loss_funs.keys(), err_str.format(cfg.TRAIN.LOSS) - return _loss_funs[cfg.MODEL.LOSS_FUN] - - -def build_model(): - """Builds the model.""" - return get_model()() - - -def build_loss_fun(): - """Build the loss function.""" - return get_loss_fun()() - - -def register_model(name, ctor): - """Registers a model dynamically.""" - _models[name] = ctor - - -def register_loss_fun(name, ctor): - """Registers a loss function dynamically.""" - _loss_funs[name] = ctor +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Model and loss construction functions.""" + +from pycls.core.config import cfg +from pycls.core.net import SoftCrossEntropyLoss +from pycls.models.effnet import EffNet + + +# Supported models +_models = {"effnet": EffNet} + +# Supported loss functions +_loss_funs = {"cross_entropy": SoftCrossEntropyLoss} + + +def get_model(): + """Gets the model class specified in the config.""" + err_str = "Model type '{}' not supported" + assert cfg.MODEL.TYPE in _models.keys(), err_str.format(cfg.MODEL.TYPE) + return _models[cfg.MODEL.TYPE] + + +def get_loss_fun(): + """Gets the loss function class specified in the config.""" + err_str = "Loss function type '{}' not supported" + assert cfg.MODEL.LOSS_FUN in _loss_funs.keys(), err_str.format(cfg.TRAIN.LOSS) + return _loss_funs[cfg.MODEL.LOSS_FUN] + + +def build_model(): + """Builds the model.""" + return get_model()() + + +def build_loss_fun(): + """Build the loss function.""" + return get_loss_fun()() + + +def register_model(name, ctor): + """Registers a model dynamically.""" + _models[name] = ctor + + +def register_loss_fun(name, ctor): + """Registers a loss function dynamically.""" + _loss_funs[name] = ctor diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/distributed.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/distributed.py index 55a6652b47b65a7cf1855bca2d84428995b24249..3eb1dc083b9bb3935f6ebbd549c71bf328b579f3 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/distributed.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/distributed.py @@ -1,186 +1,186 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -"""Distributed helpers.""" - -import multiprocessing -import os -import random -import signal -import threading -import traceback -import torch.npu -import torch -from pycls.core.config import cfg - - -# Make work w recent PyTorch versions (https://github.com/pytorch/pytorch/issues/37377) -os.environ["MKL_THREADING_LAYER"] = "GNU" - - -def is_master_proc(): - """Determines if the current process is the master process. - - Master process is responsible for logging, writing and loading checkpoints. In - the multi GPU setting, we assign the master role to the rank 0 process. When - training using a single GPU, there is a single process which is considered master. - """ - return cfg.NUM_GPUS == 1 or torch.distributed.get_rank() == 0 - - -def init_process_group(proc_rank, world_size, port='29588'): - """Initializes the default process group.""" - # Set the GPU to use - print("begin init_process_group") - torch.npu.set_device(proc_rank) - print("[npu id:", proc_rank, "]", "Use NPU: {} for training".format(proc_rank)) - - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '29588' - - - # Initialize the process group - torch.distributed.init_process_group( - backend=cfg.DIST_BACKEND, - world_size=world_size, - rank=proc_rank, - ) - print("after init_process_group") - - -def destroy_process_group(): - """Destroys the default process group.""" - torch.distributed.destroy_process_group() - - -def scaled_all_reduce(tensors): - """Performs the scaled all_reduce operation on the provided tensors. - - The input tensors are modified in-place. Currently supports only the sum - reduction operator. The reduced values are scaled by the inverse size of the - process group (equivalent to cfg.NUM_GPUS). - """ - # There is no need for reduction in the single-proc case - if cfg.NUM_GPUS == 1: - return tensors - # Queue the reductions - reductions = [] - for tensor in tensors: - reduction = torch.distributed.all_reduce(tensor, async_op=True) - reductions.append(reduction) - # Wait for reductions to finish - for reduction in reductions: - reduction.wait() - # Scale the results - for tensor in tensors: - tensor.mul_(1.0 / cfg.NUM_GPUS) - return tensors - - -class ChildException(Exception): - """Wraps an exception from a child process.""" - - def __init__(self, child_trace): - super(ChildException, self).__init__(child_trace) - - -class ErrorHandler(object): - """Multiprocessing error handler (based on fairseq's). - - Listens for errors in child processes and propagates the tracebacks to the parent. - """ - - def __init__(self, error_queue): - # Shared error queue - self.error_queue = error_queue - # Children processes sharing the error queue - self.children_pids = [] - # Start a thread listening to errors - self.error_listener = threading.Thread(target=self.listen, daemon=True) - self.error_listener.start() - # Register the signal handler - signal.signal(signal.SIGUSR1, self.signal_handler) - - def add_child(self, pid): - """Registers a child process.""" - self.children_pids.append(pid) - - def listen(self): - """Listens for errors in the error queue.""" - # Wait until there is an error in the queue - child_trace = self.error_queue.get() - # Put the error back for the signal handler - self.error_queue.put(child_trace) - # Invoke the signal handler - os.kill(os.getpid(), signal.SIGUSR1) - - def signal_handler(self, _sig_num, _stack_frame): - """Signal handler.""" - # Kill children processes - for pid in self.children_pids: - os.kill(pid, signal.SIGINT) - # Propagate the error from the child process - raise ChildException(self.error_queue.get()) - - -def run(proc_rank, world_size, port, error_queue, fun, fun_args, fun_kwargs): - """Runs a function from a child process.""" - try: - # Initialize the process group - init_process_group(proc_rank, world_size, port) - # Run the function - fun(*fun_args, **fun_kwargs) - except KeyboardInterrupt: - # Killed by the parent process - pass - except Exception: - # Propagate exception to the parent process - error_queue.put(traceback.format_exc()) - finally: - # Destroy the process group - destroy_process_group() - - -def multi_proc_run(num_proc, fun, fun_args=(), fun_kwargs=None): - """Runs a function in a multi-proc setting (unless num_proc == 1).""" - # There is no need for multi-proc in the single-proc case - fun_kwargs = fun_kwargs if fun_kwargs else {} - if num_proc == 1: - fun(*fun_args, **fun_kwargs) - return - # Handle errors from training subprocesses - error_queue = multiprocessing.SimpleQueue() - error_handler = ErrorHandler(error_queue) - # Get a random port to use (without using global random number generator) - port = random.Random().randint(cfg.PORT_RANGE[0], cfg.PORT_RANGE[1]) - # Run each training subprocess - ps = [] - for i in range(num_proc): - p_i = multiprocessing.Process( - target=run, args=(i, num_proc, port, error_queue, fun, fun_args, fun_kwargs) - ) - ps.append(p_i) - p_i.start() - error_handler.add_child(p_i.pid) - # Wait for each subprocess to finish - for p in ps: - p.join() +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Distributed helpers.""" + +import multiprocessing +import os +import random +import signal +import threading +import traceback +import torch.npu +import torch +from pycls.core.config import cfg + + +# Make work w recent PyTorch versions (https://github.com/pytorch/pytorch/issues/37377) +os.environ["MKL_THREADING_LAYER"] = "GNU" + + +def is_master_proc(): + """Determines if the current process is the master process. + + Master process is responsible for logging, writing and loading checkpoints. In + the multi GPU setting, we assign the master role to the rank 0 process. When + training using a single GPU, there is a single process which is considered master. + """ + return cfg.NUM_GPUS == 1 or torch.distributed.get_rank() == 0 + + +def init_process_group(proc_rank, world_size, port='29588'): + """Initializes the default process group.""" + # Set the GPU to use + print("begin init_process_group") + torch.npu.set_device(proc_rank) + print("[npu id:", proc_rank, "]", "Use NPU: {} for training".format(proc_rank)) + + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = '29588' + + + # Initialize the process group + torch.distributed.init_process_group( + backend=cfg.DIST_BACKEND, + world_size=world_size, + rank=proc_rank, + ) + print("after init_process_group") + + +def destroy_process_group(): + """Destroys the default process group.""" + torch.distributed.destroy_process_group() + + +def scaled_all_reduce(tensors): + """Performs the scaled all_reduce operation on the provided tensors. + + The input tensors are modified in-place. Currently supports only the sum + reduction operator. The reduced values are scaled by the inverse size of the + process group (equivalent to cfg.NUM_GPUS). + """ + # There is no need for reduction in the single-proc case + if cfg.NUM_GPUS == 1: + return tensors + # Queue the reductions + reductions = [] + for tensor in tensors: + reduction = torch.distributed.all_reduce(tensor, async_op=True) + reductions.append(reduction) + # Wait for reductions to finish + for reduction in reductions: + reduction.wait() + # Scale the results + for tensor in tensors: + tensor.mul_(1.0 / cfg.NUM_GPUS) + return tensors + + +class ChildException(Exception): + """Wraps an exception from a child process.""" + + def __init__(self, child_trace): + super(ChildException, self).__init__(child_trace) + + +class ErrorHandler(object): + """Multiprocessing error handler (based on fairseq's). + + Listens for errors in child processes and propagates the tracebacks to the parent. + """ + + def __init__(self, error_queue): + # Shared error queue + self.error_queue = error_queue + # Children processes sharing the error queue + self.children_pids = [] + # Start a thread listening to errors + self.error_listener = threading.Thread(target=self.listen, daemon=True) + self.error_listener.start() + # Register the signal handler + signal.signal(signal.SIGUSR1, self.signal_handler) + + def add_child(self, pid): + """Registers a child process.""" + self.children_pids.append(pid) + + def listen(self): + """Listens for errors in the error queue.""" + # Wait until there is an error in the queue + child_trace = self.error_queue.get() + # Put the error back for the signal handler + self.error_queue.put(child_trace) + # Invoke the signal handler + os.kill(os.getpid(), signal.SIGUSR1) + + def signal_handler(self, _sig_num, _stack_frame): + """Signal handler.""" + # Kill children processes + for pid in self.children_pids: + os.kill(pid, signal.SIGINT) + # Propagate the error from the child process + raise ChildException(self.error_queue.get()) + + +def run(proc_rank, world_size, port, error_queue, fun, fun_args, fun_kwargs): + """Runs a function from a child process.""" + try: + # Initialize the process group + init_process_group(proc_rank, world_size, port) + # Run the function + fun(*fun_args, **fun_kwargs) + except KeyboardInterrupt: + # Killed by the parent process + pass + except Exception: + # Propagate exception to the parent process + error_queue.put(traceback.format_exc()) + finally: + # Destroy the process group + destroy_process_group() + + +def multi_proc_run(num_proc, fun, fun_args=(), fun_kwargs=None): + """Runs a function in a multi-proc setting (unless num_proc == 1).""" + # There is no need for multi-proc in the single-proc case + fun_kwargs = fun_kwargs if fun_kwargs else {} + if num_proc == 1: + fun(*fun_args, **fun_kwargs) + return + # Handle errors from training subprocesses + error_queue = multiprocessing.SimpleQueue() + error_handler = ErrorHandler(error_queue) + # Get a random port to use (without using global random number generator) + port = random.Random().randint(cfg.PORT_RANGE[0], cfg.PORT_RANGE[1]) + # Run each training subprocess + ps = [] + for i in range(num_proc): + p_i = multiprocessing.Process( + target=run, args=(i, num_proc, port, error_queue, fun, fun_args, fun_kwargs) + ) + ps.append(p_i) + p_i.start() + error_handler.add_child(p_i.pid) + # Wait for each subprocess to finish + for p in ps: + p.join() diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/env.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/env.py index c0d31579a4faec62c630b88ca1a66168f790dfce..48a8164b31bd510231ee34260f183f7317e99ffa 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/env.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/env.py @@ -1,56 +1,56 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import random - -import numpy as np -import pycls.core.config as config -import pycls.core.distributed as dist -import pycls.core.logging as logging -import torch -from iopath.common.file_io import g_pathmgr -from pycls.core.config import cfg - - -logger = logging.get_logger(__name__) - - -def setup_env(): - """Sets up environment for training or testing.""" - if dist.is_master_proc(): - # Ensure that the output dir exists - g_pathmgr.mkdirs(cfg.OUT_DIR) - # Save the config - config.dump_cfg() - # Setup logging - logging.setup_logging() - # Log torch, cuda, and cudnn versions - version = [torch.__version__, torch.version.cuda, torch.backends.cudnn.version()] - logger.info("PyTorch Version: torch={}, cuda={}, cudnn={}".format(*version)) - # Log the config as both human readable and as a json - logger.info("Config:\n{}".format(cfg)) if cfg.VERBOSE else () - logger.info(logging.dump_log_data(cfg, "cfg", None)) - # Fix the RNG seeds (see RNG comment in core/config.py for discussion) - np.random.seed(cfg.RNG_SEED) - torch.manual_seed(cfg.RNG_SEED) - random.seed(cfg.RNG_SEED) - # Configure the CUDNN backend - torch.backends.cudnn.benchmark = cfg.CUDNN.BENCHMARK +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random + +import numpy as np +import pycls.core.config as config +import pycls.core.distributed as dist +import pycls.core.logging as logging +import torch +from iopath.common.file_io import g_pathmgr +from pycls.core.config import cfg + + +logger = logging.get_logger(__name__) + + +def setup_env(): + """Sets up environment for training or testing.""" + if dist.is_master_proc(): + # Ensure that the output dir exists + g_pathmgr.mkdirs(cfg.OUT_DIR) + # Save the config + config.dump_cfg() + # Setup logging + logging.setup_logging() + # Log torch, cuda, and cudnn versions + version = [torch.__version__, torch.version.cuda, torch.backends.cudnn.version()] + logger.info("PyTorch Version: torch={}, cuda={}, cudnn={}".format(*version)) + # Log the config as both human readable and as a json + logger.info("Config:\n{}".format(cfg)) if cfg.VERBOSE else () + logger.info(logging.dump_log_data(cfg, "cfg", None)) + # Fix the RNG seeds (see RNG comment in core/config.py for discussion) + np.random.seed(cfg.RNG_SEED) + torch.manual_seed(cfg.RNG_SEED) + random.seed(cfg.RNG_SEED) + # Configure the CUDNN backend + torch.backends.cudnn.benchmark = cfg.CUDNN.BENCHMARK diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/io.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/io.py index 8bba8a2d45b9f41aa6e31a74b29a2fa15ec59dde..51a1f8292126ca611ec5e85811bdf558a502edd7 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/io.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/io.py @@ -1,93 +1,93 @@ -#!/usr/bin/env python3 - - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""IO utilities (adapted from Detectron)""" - -import logging -import os -import re -import sys -from urllib import request as urlrequest - -from iopath.common.file_io import g_pathmgr - - -logger = logging.getLogger(__name__) - -_PYCLS_BASE_URL = "https://dl.fbaipublicfiles.com/pycls" - - -def cache_url(url_or_file, cache_dir, base_url=_PYCLS_BASE_URL): - """Download the file specified by the URL to the cache_dir and return the path to - the cached file. If the argument is not a URL, simply return it as is. - """ - is_url = re.match(r"^(?:http)s?://", url_or_file, re.IGNORECASE) is not None - if not is_url: - return url_or_file - url = url_or_file - assert url.startswith(base_url), "url must start with: {}".format(base_url) - cache_file_path = url.replace(base_url, cache_dir) - if g_pathmgr.exists(cache_file_path): - return cache_file_path - cache_file_dir = os.path.dirname(cache_file_path) - if not g_pathmgr.exists(cache_file_dir): - g_pathmgr.mkdirs(cache_file_dir) - logger.info("Downloading remote file {} to {}".format(url, cache_file_path)) - download_url(url, cache_file_path) - return cache_file_path - - -def _progress_bar(count, total): - """Report download progress. Credit: - https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113 - """ - bar_len = 60 - filled_len = int(round(bar_len * count / float(total))) - percents = round(100.0 * count / float(total), 1) - bar = "=" * filled_len + "-" * (bar_len - filled_len) - sys.stdout.write( - " [{}] {}% of {:.1f}MB file \r".format(bar, percents, total / 1024 / 1024) - ) - sys.stdout.flush() - if count >= total: - sys.stdout.write("\n") - - -def download_url(url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar): - """Download url and write it to dst_file_path. Credit: - https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook - """ - req = urlrequest.Request(url) - response = urlrequest.urlopen(req) - total_size = response.info().get("Content-Length").strip() - total_size = int(total_size) - bytes_so_far = 0 - with g_pathmgr.open(dst_file_path, "wb") as f: - while 1: - chunk = response.read(chunk_size) - bytes_so_far += len(chunk) - if not chunk: - break - if progress_hook: - progress_hook(bytes_so_far, total_size) - f.write(chunk) - return bytes_so_far +#!/usr/bin/env python3 + + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""IO utilities (adapted from Detectron)""" + +import logging +import os +import re +import sys +from urllib import request as urlrequest + +from iopath.common.file_io import g_pathmgr + + +logger = logging.getLogger(__name__) + +_PYCLS_BASE_URL = "https://dl.fbaipublicfiles.com/pycls" + + +def cache_url(url_or_file, cache_dir, base_url=_PYCLS_BASE_URL): + """Download the file specified by the URL to the cache_dir and return the path to + the cached file. If the argument is not a URL, simply return it as is. + """ + is_url = re.match(r"^(?:http)s?://", url_or_file, re.IGNORECASE) is not None + if not is_url: + return url_or_file + url = url_or_file + assert url.startswith(base_url), "url must start with: {}".format(base_url) + cache_file_path = url.replace(base_url, cache_dir) + if g_pathmgr.exists(cache_file_path): + return cache_file_path + cache_file_dir = os.path.dirname(cache_file_path) + if not g_pathmgr.exists(cache_file_dir): + g_pathmgr.mkdirs(cache_file_dir) + logger.info("Downloading remote file {} to {}".format(url, cache_file_path)) + download_url(url, cache_file_path) + return cache_file_path + + +def _progress_bar(count, total): + """Report download progress. Credit: + https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113 + """ + bar_len = 60 + filled_len = int(round(bar_len * count / float(total))) + percents = round(100.0 * count / float(total), 1) + bar = "=" * filled_len + "-" * (bar_len - filled_len) + sys.stdout.write( + " [{}] {}% of {:.1f}MB file \r".format(bar, percents, total / 1024 / 1024) + ) + sys.stdout.flush() + if count >= total: + sys.stdout.write("\n") + + +def download_url(url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar): + """Download url and write it to dst_file_path. Credit: + https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook + """ + req = urlrequest.Request(url) + response = urlrequest.urlopen(req) + total_size = response.info().get("Content-Length").strip() + total_size = int(total_size) + bytes_so_far = 0 + with g_pathmgr.open(dst_file_path, "wb") as f: + while 1: + chunk = response.read(chunk_size) + bytes_so_far += len(chunk) + if not chunk: + break + if progress_hook: + progress_hook(bytes_so_far, total_size) + f.write(chunk) + return bytes_so_far diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/logging.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/logging.py index fc0f2aa70d519553c2ae2a1616444dfc4cb8d054..f5374e51c1e11079188782070ab7d3bd10cbcee1 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/logging.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/logging.py @@ -1,153 +1,153 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Logging.""" - -import builtins -import decimal -import logging -import os -import sys - -import pycls.core.distributed as dist -import simplejson -from iopath.common.file_io import g_pathmgr -from pycls.core.config import cfg - - -# Show filename and line number in logs -_FORMAT = "[%(filename)s: %(lineno)3d]: %(message)s" - -# Log file name (for cfg.LOG_DEST = 'file') -_LOG_FILE = "stdout.log" - -# Data output with dump_log_data(data, data_type) will be tagged w/ this -_TAG = "json_stats: " - -# Data output with dump_log_data(data, data_type) will have data[_TYPE]=data_type -_TYPE = "_type" - - -def _suppress_print(): - """Suppresses printing from the current process.""" - - def ignore(*_objects, _sep=" ", _end="\n", _file=sys.stdout, _flush=False): - pass - - builtins.print = ignore - - -def setup_logging(): - """Sets up the logging.""" - # Enable logging only for the master process - if dist.is_master_proc(): - # Clear the root logger to prevent any existing logging config - # (e.g. set by another module) from messing with our setup - logging.root.handlers = [] - # Construct logging configuration - logging_config = {"level": logging.INFO, "format": _FORMAT} - # Log either to stdout or to a file - if cfg.LOG_DEST == "stdout": - logging_config["stream"] = sys.stdout - else: - logging_config["filename"] = os.path.join(cfg.OUT_DIR, _LOG_FILE) - # Configure logging - logging.basicConfig(**logging_config) - else: - _suppress_print() - - -def get_logger(name): - """Retrieves the logger.""" - return logging.getLogger(name) - - -def dump_log_data(data, data_type, prec=4): - """Covert data (a dictionary) into tagged json string for logging.""" - data[_TYPE] = data_type - data = float_to_decimal(data, prec) - data_json = simplejson.dumps(data, sort_keys=True, use_decimal=True) - return "{:s}{:s}".format(_TAG, data_json) - - -def float_to_decimal(data, prec=4): - """Convert floats to decimals which allows for fixed width json.""" - if prec and isinstance(data, dict): - return {k: float_to_decimal(v, prec) for k, v in data.items()} - if prec and isinstance(data, float): - return decimal.Decimal(("{:." + str(prec) + "f}").format(data)) - else: - return data - - -def get_log_files(log_dir, name_filter="", log_file=_LOG_FILE): - """Get all log files in directory containing subdirs of trained models.""" - names = [n for n in sorted(g_pathmgr.ls(log_dir)) if name_filter in n] - files = [os.path.join(log_dir, n, log_file) for n in names] - f_n_ps = [(f, n) for (f, n) in zip(files, names) if g_pathmgr.exists(f)] - files, names = zip(*f_n_ps) if f_n_ps else ([], []) - return files, names - - -def load_log_data(log_file, data_types_to_skip=()): - """Loads log data into a dictionary of the form data[data_type][metric][index].""" - # Load log_file - assert g_pathmgr.exists(log_file), "Log file not found: {}".format(log_file) - with g_pathmgr.open(log_file, "r") as f: - lines = f.readlines() - # Extract and parse lines that start with _TAG and have a type specified - lines = [l[l.find(_TAG) + len(_TAG) :] for l in lines if _TAG in l] - lines = [simplejson.loads(l) for l in lines] - lines = [l for l in lines if _TYPE in l and not l[_TYPE] in data_types_to_skip] - # Generate data structure accessed by data[data_type][index][metric] - data_types = [l[_TYPE] for l in lines] - data = {t: [] for t in data_types} - for t, line in zip(data_types, lines): - del line[_TYPE] - data[t].append(line) - # Generate data structure accessed by data[data_type][metric][index] - for t in data: - metrics = sorted(data[t][0].keys()) - err_str = "Inconsistent metrics in log for _type={}: {}".format(t, metrics) - assert all(sorted(d.keys()) == metrics for d in data[t]), err_str - data[t] = {m: [d[m] for d in data[t]] for m in metrics} - return data - - -def sort_log_data(data): - """Sort each data[data_type][metric] by epoch or keep only first instance.""" - for t in data: - if "epoch" in data[t]: - assert "epoch_ind" not in data[t] and "epoch_max" not in data[t] - data[t]["epoch_ind"] = [int(e.split("/")[0]) for e in data[t]["epoch"]] - data[t]["epoch_max"] = [int(e.split("/")[1]) for e in data[t]["epoch"]] - epoch = data[t]["epoch_ind"] - if "iter" in data[t]: - assert "iter_ind" not in data[t] and "iter_max" not in data[t] - data[t]["iter_ind"] = [int(i.split("/")[0]) for i in data[t]["iter"]] - data[t]["iter_max"] = [int(i.split("/")[1]) for i in data[t]["iter"]] - itr = zip(epoch, data[t]["iter_ind"], data[t]["iter_max"]) - epoch = [e + (i_ind - 1) / i_max for e, i_ind, i_max in itr] - for m in data[t]: - data[t][m] = [v for _, v in sorted(zip(epoch, data[t][m]))] - else: - data[t] = {m: d[0] for m, d in data[t].items()} - return data +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Logging.""" + +import builtins +import decimal +import logging +import os +import sys + +import pycls.core.distributed as dist +import simplejson +from iopath.common.file_io import g_pathmgr +from pycls.core.config import cfg + + +# Show filename and line number in logs +_FORMAT = "[%(filename)s: %(lineno)3d]: %(message)s" + +# Log file name (for cfg.LOG_DEST = 'file') +_LOG_FILE = "stdout.log" + +# Data output with dump_log_data(data, data_type) will be tagged w/ this +_TAG = "json_stats: " + +# Data output with dump_log_data(data, data_type) will have data[_TYPE]=data_type +_TYPE = "_type" + + +def _suppress_print(): + """Suppresses printing from the current process.""" + + def ignore(*_objects, _sep=" ", _end="\n", _file=sys.stdout, _flush=False): + pass + + builtins.print = ignore + + +def setup_logging(): + """Sets up the logging.""" + # Enable logging only for the master process + if dist.is_master_proc(): + # Clear the root logger to prevent any existing logging config + # (e.g. set by another module) from messing with our setup + logging.root.handlers = [] + # Construct logging configuration + logging_config = {"level": logging.INFO, "format": _FORMAT} + # Log either to stdout or to a file + if cfg.LOG_DEST == "stdout": + logging_config["stream"] = sys.stdout + else: + logging_config["filename"] = os.path.join(cfg.OUT_DIR, _LOG_FILE) + # Configure logging + logging.basicConfig(**logging_config) + else: + _suppress_print() + + +def get_logger(name): + """Retrieves the logger.""" + return logging.getLogger(name) + + +def dump_log_data(data, data_type, prec=4): + """Covert data (a dictionary) into tagged json string for logging.""" + data[_TYPE] = data_type + data = float_to_decimal(data, prec) + data_json = simplejson.dumps(data, sort_keys=True, use_decimal=True) + return "{:s}{:s}".format(_TAG, data_json) + + +def float_to_decimal(data, prec=4): + """Convert floats to decimals which allows for fixed width json.""" + if prec and isinstance(data, dict): + return {k: float_to_decimal(v, prec) for k, v in data.items()} + if prec and isinstance(data, float): + return decimal.Decimal(("{:." + str(prec) + "f}").format(data)) + else: + return data + + +def get_log_files(log_dir, name_filter="", log_file=_LOG_FILE): + """Get all log files in directory containing subdirs of trained models.""" + names = [n for n in sorted(g_pathmgr.ls(log_dir)) if name_filter in n] + files = [os.path.join(log_dir, n, log_file) for n in names] + f_n_ps = [(f, n) for (f, n) in zip(files, names) if g_pathmgr.exists(f)] + files, names = zip(*f_n_ps) if f_n_ps else ([], []) + return files, names + + +def load_log_data(log_file, data_types_to_skip=()): + """Loads log data into a dictionary of the form data[data_type][metric][index].""" + # Load log_file + assert g_pathmgr.exists(log_file), "Log file not found: {}".format(log_file) + with g_pathmgr.open(log_file, "r") as f: + lines = f.readlines() + # Extract and parse lines that start with _TAG and have a type specified + lines = [l[l.find(_TAG) + len(_TAG) :] for l in lines if _TAG in l] + lines = [simplejson.loads(l) for l in lines] + lines = [l for l in lines if _TYPE in l and not l[_TYPE] in data_types_to_skip] + # Generate data structure accessed by data[data_type][index][metric] + data_types = [l[_TYPE] for l in lines] + data = {t: [] for t in data_types} + for t, line in zip(data_types, lines): + del line[_TYPE] + data[t].append(line) + # Generate data structure accessed by data[data_type][metric][index] + for t in data: + metrics = sorted(data[t][0].keys()) + err_str = "Inconsistent metrics in log for _type={}: {}".format(t, metrics) + assert all(sorted(d.keys()) == metrics for d in data[t]), err_str + data[t] = {m: [d[m] for d in data[t]] for m in metrics} + return data + + +def sort_log_data(data): + """Sort each data[data_type][metric] by epoch or keep only first instance.""" + for t in data: + if "epoch" in data[t]: + assert "epoch_ind" not in data[t] and "epoch_max" not in data[t] + data[t]["epoch_ind"] = [int(e.split("/")[0]) for e in data[t]["epoch"]] + data[t]["epoch_max"] = [int(e.split("/")[1]) for e in data[t]["epoch"]] + epoch = data[t]["epoch_ind"] + if "iter" in data[t]: + assert "iter_ind" not in data[t] and "iter_max" not in data[t] + data[t]["iter_ind"] = [int(i.split("/")[0]) for i in data[t]["iter"]] + data[t]["iter_max"] = [int(i.split("/")[1]) for i in data[t]["iter"]] + itr = zip(epoch, data[t]["iter_ind"], data[t]["iter_max"]) + epoch = [e + (i_ind - 1) / i_max for e, i_ind, i_max in itr] + for m in data[t]: + data[t][m] = [v for _, v in sorted(zip(epoch, data[t][m]))] + else: + data[t] = {m: d[0] for m, d in data[t].items()} + return data diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/plotting.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/plotting.py index 608f6ee442b3b8070e2bb9908be2e53e6489c657..bf7c18a6defe2040b9b5ae9ee25155e9986b66a2 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/plotting.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/plotting.py @@ -1,146 +1,146 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Plotting functions.""" - -import colorlover as cl -import matplotlib.pyplot as plt -import plotly.graph_objs as go -import plotly.offline as offline -import pycls.core.logging as logging - - -def get_plot_colors(max_colors, color_format="pyplot"): - """Generate colors for plotting.""" - colors = cl.scales["11"]["qual"]["Paired"] - if max_colors > len(colors): - colors = cl.to_rgb(cl.interp(colors, max_colors)) - if color_format == "pyplot": - return [[j / 255.0 for j in c] for c in cl.to_numeric(colors)] - return colors - - -def prepare_plot_data(log_files, names, metric="top1_err"): - """Load logs and extract data for plotting error curves.""" - plot_data = [] - for file, name in zip(log_files, names): - d, data = {}, logging.sort_log_data(logging.load_log_data(file)) - for phase in ["train", "test"]: - x = data[phase + "_epoch"]["epoch_ind"] - y = data[phase + "_epoch"][metric] - d["x_" + phase], d["y_" + phase] = x, y - d[phase + "_label"] = "[{:5.2f}] ".format(min(y) if y else 0) + name - plot_data.append(d) - assert len(plot_data) > 0, "No data to plot" - return plot_data - - -def plot_error_curves_plotly(log_files, names, filename, metric="top1_err"): - """Plot error curves using plotly and save to file.""" - plot_data = prepare_plot_data(log_files, names, metric) - colors = get_plot_colors(len(plot_data), "plotly") - # Prepare data for plots (3 sets, train duplicated w and w/o legend) - data = [] - for i, d in enumerate(plot_data): - s = str(i) - line_train = {"color": colors[i], "dash": "dashdot", "width": 1.5} - line_test = {"color": colors[i], "dash": "solid", "width": 1.5} - data.append( - go.Scatter( - x=d["x_train"], - y=d["y_train"], - mode="lines", - name=d["train_label"], - line=line_train, - legendgroup=s, - visible=True, - showlegend=False, - ) - ) - data.append( - go.Scatter( - x=d["x_test"], - y=d["y_test"], - mode="lines", - name=d["test_label"], - line=line_test, - legendgroup=s, - visible=True, - showlegend=True, - ) - ) - data.append( - go.Scatter( - x=d["x_train"], - y=d["y_train"], - mode="lines", - name=d["train_label"], - line=line_train, - legendgroup=s, - visible=False, - showlegend=True, - ) - ) - # Prepare layout w ability to toggle 'all', 'train', 'test' - titlefont = {"size": 18, "color": "#7f7f7f"} - vis = [[True, True, False], [False, False, True], [False, True, False]] - buttons = zip(["all", "train", "test"], [[{"visible": v}] for v in vis]) - buttons = [{"label": b, "args": v, "method": "update"} for b, v in buttons] - layout = go.Layout( - title=metric + " vs. epoch
[dash=train, solid=test]", - xaxis={"title": "epoch", "titlefont": titlefont}, - yaxis={"title": metric, "titlefont": titlefont}, - showlegend=True, - hoverlabel={"namelength": -1}, - updatemenus=[ - { - "buttons": buttons, - "direction": "down", - "showactive": True, - "x": 1.02, - "xanchor": "left", - "y": 1.08, - "yanchor": "top", - } - ], - ) - # Create plotly plot - offline.plot({"data": data, "layout": layout}, filename=filename) - - -def plot_error_curves_pyplot(log_files, names, filename=None, metric="top1_err"): - """Plot error curves using matplotlib.pyplot and save to file.""" - plot_data = prepare_plot_data(log_files, names, metric) - colors = get_plot_colors(len(names)) - for ind, d in enumerate(plot_data): - c, lbl = colors[ind], d["test_label"] - plt.plot(d["x_train"], d["y_train"], "--", c=c, alpha=0.8) - plt.plot(d["x_test"], d["y_test"], "-", c=c, alpha=0.8, label=lbl) - plt.title(metric + " vs. epoch\n[dash=train, solid=test]", fontsize=14) - plt.xlabel("epoch", fontsize=14) - plt.ylabel(metric, fontsize=14) - plt.grid(alpha=0.4) - plt.legend() - if filename: - plt.savefig(filename) - plt.clf() - else: - plt.show() +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Plotting functions.""" + +import colorlover as cl +import matplotlib.pyplot as plt +import plotly.graph_objs as go +import plotly.offline as offline +import pycls.core.logging as logging + + +def get_plot_colors(max_colors, color_format="pyplot"): + """Generate colors for plotting.""" + colors = cl.scales["11"]["qual"]["Paired"] + if max_colors > len(colors): + colors = cl.to_rgb(cl.interp(colors, max_colors)) + if color_format == "pyplot": + return [[j / 255.0 for j in c] for c in cl.to_numeric(colors)] + return colors + + +def prepare_plot_data(log_files, names, metric="top1_err"): + """Load logs and extract data for plotting error curves.""" + plot_data = [] + for file, name in zip(log_files, names): + d, data = {}, logging.sort_log_data(logging.load_log_data(file)) + for phase in ["train", "test"]: + x = data[phase + "_epoch"]["epoch_ind"] + y = data[phase + "_epoch"][metric] + d["x_" + phase], d["y_" + phase] = x, y + d[phase + "_label"] = "[{:5.2f}] ".format(min(y) if y else 0) + name + plot_data.append(d) + assert len(plot_data) > 0, "No data to plot" + return plot_data + + +def plot_error_curves_plotly(log_files, names, filename, metric="top1_err"): + """Plot error curves using plotly and save to file.""" + plot_data = prepare_plot_data(log_files, names, metric) + colors = get_plot_colors(len(plot_data), "plotly") + # Prepare data for plots (3 sets, train duplicated w and w/o legend) + data = [] + for i, d in enumerate(plot_data): + s = str(i) + line_train = {"color": colors[i], "dash": "dashdot", "width": 1.5} + line_test = {"color": colors[i], "dash": "solid", "width": 1.5} + data.append( + go.Scatter( + x=d["x_train"], + y=d["y_train"], + mode="lines", + name=d["train_label"], + line=line_train, + legendgroup=s, + visible=True, + showlegend=False, + ) + ) + data.append( + go.Scatter( + x=d["x_test"], + y=d["y_test"], + mode="lines", + name=d["test_label"], + line=line_test, + legendgroup=s, + visible=True, + showlegend=True, + ) + ) + data.append( + go.Scatter( + x=d["x_train"], + y=d["y_train"], + mode="lines", + name=d["train_label"], + line=line_train, + legendgroup=s, + visible=False, + showlegend=True, + ) + ) + # Prepare layout w ability to toggle 'all', 'train', 'test' + titlefont = {"size": 18, "color": "#7f7f7f"} + vis = [[True, True, False], [False, False, True], [False, True, False]] + buttons = zip(["all", "train", "test"], [[{"visible": v}] for v in vis]) + buttons = [{"label": b, "args": v, "method": "update"} for b, v in buttons] + layout = go.Layout( + title=metric + " vs. epoch
[dash=train, solid=test]", + xaxis={"title": "epoch", "titlefont": titlefont}, + yaxis={"title": metric, "titlefont": titlefont}, + showlegend=True, + hoverlabel={"namelength": -1}, + updatemenus=[ + { + "buttons": buttons, + "direction": "down", + "showactive": True, + "x": 1.02, + "xanchor": "left", + "y": 1.08, + "yanchor": "top", + } + ], + ) + # Create plotly plot + offline.plot({"data": data, "layout": layout}, filename=filename) + + +def plot_error_curves_pyplot(log_files, names, filename=None, metric="top1_err"): + """Plot error curves using matplotlib.pyplot and save to file.""" + plot_data = prepare_plot_data(log_files, names, metric) + colors = get_plot_colors(len(names)) + for ind, d in enumerate(plot_data): + c, lbl = colors[ind], d["test_label"] + plt.plot(d["x_train"], d["y_train"], "--", c=c, alpha=0.8) + plt.plot(d["x_test"], d["y_test"], "-", c=c, alpha=0.8, label=lbl) + plt.title(metric + " vs. epoch\n[dash=train, solid=test]", fontsize=14) + plt.xlabel("epoch", fontsize=14) + plt.ylabel(metric, fontsize=14) + plt.grid(alpha=0.4) + plt.legend() + if filename: + plt.savefig(filename) + plt.clf() + else: + plt.show() diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/sgd.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/sgd.py index d390654bc77c64f774d232635a2a6c544cf3d401..5f6800a198dd609f7f2100a23c4b01855b906622 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/sgd.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/sgd.py @@ -1,212 +1,212 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from torch.optim.optimizer import Optimizer, required -from collections import defaultdict -from pycls.core.combine_tensors import combine_npu - -class NpuFusedSGD(Optimizer): - r"""Implements stochastic gradient descent (optionally with momentum). - - Currently NPU-only. Requires Apex to be installed via - ``pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--npu_float_status" ./``. - - This version of fused SGD implements 1 fusions. - - * A combine-tensor apply launch that batches the elementwise updates applied to all the model's parameters into one or a few kernel launches. - - :class:`apex.optimizers.NpuFusedSGD` may be used as a drop-in replacement for ``torch.optim.SGD``:: - - opt = apex.optimizers.NpuFusedSGD(model.parameters(), lr = ....) - ... - opt.step() - - :class:`apex.optimizers.FusedSGD` should be used with Amp. Currently, if you wish to use :class:`NpuFusedSGD` with Amp, - only ``opt_level O2`` can be choosed:: - - opt = apex.optimizers.NpuFusedSGD(model.parameters(), lr = ....) - model, opt = amp.initialize(model, opt, opt_level="O2") - ... - opt.step() - - Nesterov momentum is based on the formula from - `On the importance of initialization and momentum in deep learning`__. - - Args: - params (iterable): iterable of parameters to optimize or dicts defining - parameter groups - lr (float): learning rate - momentum (float, optional): momentum factor (default: 0) - weight_decay (float, optional): weight decay (L2 penalty) (default: 0) - dampening (float, optional): dampening for momentum (default: 0) - nesterov (bool, optional): enables Nesterov momentum (default: False) - - Example: - >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) - >>> optimizer.zero_grad() - >>> loss_fn(model(input), target).backward() - >>> optimizer.step() - - __ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf - - .. note:: - The implementation of SGD with Momentum/Nesterov subtly differs from - Sutskever et. al. and implementations in some other frameworks. - - Considering the specific case of Momentum, the update can be written as - - .. math:: - \begin{aligned} - v_{t+1} & = \mu * v_{t} + g_{t+1}, \\ - p_{t+1} & = p_{t} - \text{lr} * v_{t+1}, - \end{aligned} - - where :math:`p`, :math:`g`, :math:`v` and :math:`\mu` denote the - parameters, gradient, velocity, and momentum respectively. - - This is in contrast to Sutskever et. al. and - other frameworks which employ an update of the form - - .. math:: - \begin{aligned} - v_{t+1} & = \mu * v_{t} + \text{lr} * g_{t+1}, \\ - p_{t+1} & = p_{t} - v_{t+1}. - \end{aligned} - - The Nesterov version is analogously modified. - """ - - def __init__(self, params, lr=required, momentum=0, dampening=0, - weight_decay=0, nesterov=False): - if lr is not required and lr < 0.0: - raise ValueError("Invalid learning rate: {}".format(lr)) - if momentum < 0.0: - raise ValueError("Invalid momentum value: {}".format(momentum)) - if weight_decay < 0.0: - raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) - - defaults = dict(lr=lr, momentum=momentum, dampening=dampening, - weight_decay=weight_decay, nesterov=nesterov) - if nesterov and (momentum <= 0 or dampening != 0): - raise ValueError("Nesterov momentum requires a momentum and zero dampening") - super(NpuFusedSGD, self).__init__(params, defaults) - - def __setstate__(self, state): - super(NpuFusedSGD, self).__setstate__(state) - for group in self.param_groups: - group.setdefault('nesterov', False) - - def combine_param_state_by_group(self, momentum_buffer_in_state_before): - if not hasattr(self, "_amp_stash"): - raise RuntimeError('apex.optimizers.NpuFusedSGD should be used with AMP.') - - momentum_buffer_in_state_before = True - - stash = self._amp_stash - if stash.param_state_combined: - return - - for group in self.param_groups: - weight_decay = group['weight_decay'] - momentum = group['momentum'] - if momentum == 0: - state_combined = defaultdict(dict) - state_combined['momentum_buffer'] = None - stash.param_state_combined_list.append(state_combined) - continue - - momentum_buffer_list = [] - for p in group['params']: - if p.grad is None: - continue - - d_p = p.grad - state = self.state[p] - if 'momentum_buffer' not in state: - momentum_buffer_in_state_before = False - if weight_decay != 0: - d_p = d_p.add(p, alpha=weight_decay) - state['momentum_buffer'] = torch.clone(d_p).detach() - else: - temp = torch.clone(d_p).detach() - temp.copy_(state['momentum_buffer']) - state['momentum_buffer'] = temp - - momentum_buffer_list.append(state['momentum_buffer']) - - momentum_buffer_combined = None - if len(momentum_buffer_list) > 0: - momentum_buffer_combined = combine_npu(momentum_buffer_list) - - state_combined = defaultdict(dict) - state_combined['momentum_buffer'] = momentum_buffer_combined - stash.param_state_combined_list.append(state_combined) - - stash.param_state_combined = True - - @torch.no_grad() - def step(self, closure=None): - """Performs a single optimization step. - - Arguments: - closure (callable, optional): A closure that reevaluates the model - and returns the loss. - """ - if not hasattr(self, "_amp_stash"): - raise RuntimeError('apex.optimizers.NpuFusedSGD should be used with AMP.') - - momentum_buffer_in_state_before = True - self._combine_params_and_grads_by_group() - self.combine_param_state_by_group(momentum_buffer_in_state_before) - - loss = None - if closure is not None: - with torch.enable_grad(): - loss = closure() - - stash = self._amp_stash - - for i, group in enumerate(self.param_groups): - weight_decay = group['weight_decay'] - momentum = group['momentum'] - dampening = group['dampening'] - nesterov = group['nesterov'] - - params_combined = stash.params_combined_list[i] - grads_combined = stash.grads_combined_list[i] - if params_combined is None or grads_combined is None: - continue - - if weight_decay != 0: - grads_combined = grads_combined.add(params_combined, alpha=weight_decay) - if momentum != 0: - param_state = stash.param_state_combined_list[i] - buf = param_state['momentum_buffer'] - if momentum_buffer_in_state_before: - buf.mul_(momentum).add_(grads_combined, alpha=1 - dampening) - - if nesterov: - grads_combined = grads_combined.add(buf, alpha=momentum) - else: - grads_combined = buf - - params_combined.add_(grads_combined, alpha=-group['lr']) - - return loss +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch.optim.optimizer import Optimizer, required +from collections import defaultdict +from pycls.core.combine_tensors import combine_npu + +class NpuFusedSGD(Optimizer): + r"""Implements stochastic gradient descent (optionally with momentum). + + Currently NPU-only. Requires Apex to be installed via + ``pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--npu_float_status" ./``. + + This version of fused SGD implements 1 fusions. + + * A combine-tensor apply launch that batches the elementwise updates applied to all the model's parameters into one or a few kernel launches. + + :class:`apex.optimizers.NpuFusedSGD` may be used as a drop-in replacement for ``torch.optim.SGD``:: + + opt = apex.optimizers.NpuFusedSGD(model.parameters(), lr = ....) + ... + opt.step() + + :class:`apex.optimizers.FusedSGD` should be used with Amp. Currently, if you wish to use :class:`NpuFusedSGD` with Amp, + only ``opt_level O2`` can be choosed:: + + opt = apex.optimizers.NpuFusedSGD(model.parameters(), lr = ....) + model, opt = amp.initialize(model, opt, opt_level="O2") + ... + opt.step() + + Nesterov momentum is based on the formula from + `On the importance of initialization and momentum in deep learning`__. + + Args: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float): learning rate + momentum (float, optional): momentum factor (default: 0) + weight_decay (float, optional): weight decay (L2 penalty) (default: 0) + dampening (float, optional): dampening for momentum (default: 0) + nesterov (bool, optional): enables Nesterov momentum (default: False) + + Example: + >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) + >>> optimizer.zero_grad() + >>> loss_fn(model(input), target).backward() + >>> optimizer.step() + + __ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf + + .. note:: + The implementation of SGD with Momentum/Nesterov subtly differs from + Sutskever et. al. and implementations in some other frameworks. + + Considering the specific case of Momentum, the update can be written as + + .. math:: + \begin{aligned} + v_{t+1} & = \mu * v_{t} + g_{t+1}, \\ + p_{t+1} & = p_{t} - \text{lr} * v_{t+1}, + \end{aligned} + + where :math:`p`, :math:`g`, :math:`v` and :math:`\mu` denote the + parameters, gradient, velocity, and momentum respectively. + + This is in contrast to Sutskever et. al. and + other frameworks which employ an update of the form + + .. math:: + \begin{aligned} + v_{t+1} & = \mu * v_{t} + \text{lr} * g_{t+1}, \\ + p_{t+1} & = p_{t} - v_{t+1}. + \end{aligned} + + The Nesterov version is analogously modified. + """ + + def __init__(self, params, lr=required, momentum=0, dampening=0, + weight_decay=0, nesterov=False): + if lr is not required and lr < 0.0: + raise ValueError("Invalid learning rate: {}".format(lr)) + if momentum < 0.0: + raise ValueError("Invalid momentum value: {}".format(momentum)) + if weight_decay < 0.0: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + + defaults = dict(lr=lr, momentum=momentum, dampening=dampening, + weight_decay=weight_decay, nesterov=nesterov) + if nesterov and (momentum <= 0 or dampening != 0): + raise ValueError("Nesterov momentum requires a momentum and zero dampening") + super(NpuFusedSGD, self).__init__(params, defaults) + + def __setstate__(self, state): + super(NpuFusedSGD, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('nesterov', False) + + def combine_param_state_by_group(self, momentum_buffer_in_state_before): + if not hasattr(self, "_amp_stash"): + raise RuntimeError('apex.optimizers.NpuFusedSGD should be used with AMP.') + + momentum_buffer_in_state_before = True + + stash = self._amp_stash + if stash.param_state_combined: + return + + for group in self.param_groups: + weight_decay = group['weight_decay'] + momentum = group['momentum'] + if momentum == 0: + state_combined = defaultdict(dict) + state_combined['momentum_buffer'] = None + stash.param_state_combined_list.append(state_combined) + continue + + momentum_buffer_list = [] + for p in group['params']: + if p.grad is None: + continue + + d_p = p.grad + state = self.state[p] + if 'momentum_buffer' not in state: + momentum_buffer_in_state_before = False + if weight_decay != 0: + d_p = d_p.add(p, alpha=weight_decay) + state['momentum_buffer'] = torch.clone(d_p).detach() + else: + temp = torch.clone(d_p).detach() + temp.copy_(state['momentum_buffer']) + state['momentum_buffer'] = temp + + momentum_buffer_list.append(state['momentum_buffer']) + + momentum_buffer_combined = None + if len(momentum_buffer_list) > 0: + momentum_buffer_combined = combine_npu(momentum_buffer_list) + + state_combined = defaultdict(dict) + state_combined['momentum_buffer'] = momentum_buffer_combined + stash.param_state_combined_list.append(state_combined) + + stash.param_state_combined = True + + @torch.no_grad() + def step(self, closure=None): + """Performs a single optimization step. + + Arguments: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + if not hasattr(self, "_amp_stash"): + raise RuntimeError('apex.optimizers.NpuFusedSGD should be used with AMP.') + + momentum_buffer_in_state_before = True + self._combine_params_and_grads_by_group() + self.combine_param_state_by_group(momentum_buffer_in_state_before) + + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + + stash = self._amp_stash + + for i, group in enumerate(self.param_groups): + weight_decay = group['weight_decay'] + momentum = group['momentum'] + dampening = group['dampening'] + nesterov = group['nesterov'] + + params_combined = stash.params_combined_list[i] + grads_combined = stash.grads_combined_list[i] + if params_combined is None or grads_combined is None: + continue + + if weight_decay != 0: + grads_combined = grads_combined.add(params_combined, alpha=weight_decay) + if momentum != 0: + param_state = stash.param_state_combined_list[i] + buf = param_state['momentum_buffer'] + if momentum_buffer_in_state_before: + buf.mul_(momentum).add_(grads_combined, alpha=1 - dampening) + + if nesterov: + grads_combined = grads_combined.add(buf, alpha=momentum) + else: + grads_combined = buf + + params_combined.add_(grads_combined, alpha=-group['lr']) + + return loss diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/timer.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/timer.py index f2c0c2284e0bed46d3f24b5edd65c003cd4de568..1478b7efaac7b4e00a9db6302d790f0dbcc51f9d 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/timer.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/core/timer.py @@ -1,53 +1,53 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Timer.""" - -import time - - -class Timer(object): - """A simple timer (adapted from Detectron).""" - - def __init__(self): - self.total_time = None - self.calls = None - self.start_time = None - self.diff = None - self.average_time = None - self.reset() - - def tic(self): - # using time.time as time.clock does not normalize for multithreading - self.start_time = time.time() - - def toc(self): - self.diff = time.time() - self.start_time - self.total_time += self.diff - self.calls += 1 - self.average_time = self.total_time / self.calls - - def reset(self): - self.total_time = 0.0 - self.calls = 0 - self.start_time = 0.0 - self.diff = 0.0 - self.average_time = 0.0 +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Timer.""" + +import time + + +class Timer(object): + """A simple timer (adapted from Detectron).""" + + def __init__(self): + self.total_time = None + self.calls = None + self.start_time = None + self.diff = None + self.average_time = None + self.reset() + + def tic(self): + # using time.time as time.clock does not normalize for multithreading + self.start_time = time.time() + + def toc(self): + self.diff = time.time() - self.start_time + self.total_time += self.diff + self.calls += 1 + self.average_time = self.total_time / self.calls + + def reset(self): + self.total_time = 0.0 + self.calls = 0 + self.start_time = 0.0 + self.diff = 0.0 + self.average_time = 0.0 diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/augment.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/augment.py index 4f29c66d77679562ed1fb2991e87ccf3caeff8c8..ba5e1426ed5276e63312be57621d68a4a265bb27 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/augment.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/augment.py @@ -1,248 +1,248 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Lightweight and simple implementation of AutoAugment and RandAugment. - -AutoAugment - https://arxiv.org/abs/1805.09501 -RandAugment - https://arxiv.org/abs/1909.13719 - -http://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/autoaugment.py -Note that the official implementation varies substantially from the papers :-( - -Our AutoAugment policy should be fairly identical to the official AutoAugment policy. -The main difference is we set POSTERIZE_MIN = 1, which avoids degenerate (all 0) images. -Our RandAugment policy differs, and uses transforms that increase in intensity with -increasing magnitude. This allows for a more natural control of the magnitude. That is, -setting magnitude = 0 results in ops that leaves the image unchanged, if possible. -We also set the range of the magnitude to be 0 to 1 to avoid setting a "max level". - -Our implementation is inspired by and uses policies that are the similar to those in: -https://github.com/rwightman/pytorch-image-models/blob/master/timm/data/auto_augment.py -Specifically our implementation can be *numerically identical* as the implementation in -timm if using timm's "v0" policy for AutoAugment and "inc" transforms for RandAugment -and if we set POSTERIZE_MIN = 0 (although as noted our default is POSTERIZE_MIN = 1). -Note that magnitude in our code ranges from 0 to 1 (compared to 0 to 10 in timm). - -Specifically, given the same seeds, the functions from timm: - out_auto = auto_augment_transform("v0", {"interpolation": 2})(im) - out_rand = rand_augment_transform("rand-inc1-n2-m05", {"interpolation": 2})(im) -Are numerically equivalent to: - POSTERIZE_MIN = 0 - out_auto = auto_augment(im) - out_rand = rand_augment(im, prob=0.5, n_ops=2, magnitude=0.5) -Tested as of 10/07/2020. Can alter corresponding params for both and should match. - -Finally, the ops and augmentations can be visualized as follows: - from PIL import Image - import pycls.datasets.augment as augment - im = Image.open("scratch.jpg") - im_ops = augment.visualize_ops(im) - im_rand = augment.visualize_aug(im, augment=augment.rand_augment, magnitude=0.5) - im_auto = augment.visualize_aug(im, augment=augment.auto_augment) - im_ops.show() - im_auto.show() - im_rand.show() -""" - -import random - -import numpy as np -from PIL import Image, ImageEnhance, ImageOps - - -# Minimum value for posterize (0 in EfficientNet implementation) -POSTERIZE_MIN = 1 - -# Parameters for affine warping and rotation -WARP_PARAMS = {"fillcolor": (128, 128, 128), "resample": Image.BILINEAR} - - -def affine_warp(im, data): - """Applies affine transform to image.""" - return im.transform(im.size, Image.AFFINE, data, **WARP_PARAMS) - - -OP_FUNCTIONS = { - # Each op takes an image x and a level v and returns an augmented image. - "auto_contrast": lambda x, _: ImageOps.autocontrast(x), - "equalize": lambda x, _: ImageOps.equalize(x), - "invert": lambda x, _: ImageOps.invert(x), - "rotate": lambda x, v: x.rotate(v, **WARP_PARAMS), - "posterize": lambda x, v: ImageOps.posterize(x, max(POSTERIZE_MIN, int(v))), - "posterize_inc": lambda x, v: ImageOps.posterize(x, max(POSTERIZE_MIN, 4 - int(v))), - "solarize": lambda x, v: x.point(lambda i: i if i < int(v) else 255 - i), - "solarize_inc": lambda x, v: x.point(lambda i: i if i < 256 - v else 255 - i), - "solarize_add": lambda x, v: x.point(lambda i: min(255, v + i) if i < 128 else i), - "color": lambda x, v: ImageEnhance.Color(x).enhance(v), - "contrast": lambda x, v: ImageEnhance.Contrast(x).enhance(v), - "brightness": lambda x, v: ImageEnhance.Brightness(x).enhance(v), - "sharpness": lambda x, v: ImageEnhance.Sharpness(x).enhance(v), - "color_inc": lambda x, v: ImageEnhance.Color(x).enhance(1 + v), - "contrast_inc": lambda x, v: ImageEnhance.Contrast(x).enhance(1 + v), - "brightness_inc": lambda x, v: ImageEnhance.Brightness(x).enhance(1 + v), - "sharpness_inc": lambda x, v: ImageEnhance.Sharpness(x).enhance(1 + v), - "shear_x": lambda x, v: affine_warp(x, (1, v, 0, 0, 1, 0)), - "shear_y": lambda x, v: affine_warp(x, (1, 0, 0, v, 1, 0)), - "trans_x": lambda x, v: affine_warp(x, (1, 0, v * x.size[0], 0, 1, 0)), - "trans_y": lambda x, v: affine_warp(x, (1, 0, 0, 0, 1, v * x.size[1])), -} - - -OP_RANGES = { - # Ranges for each op in the form of a (min, max, negate). - "auto_contrast": (0, 1, False), - "equalize": (0, 1, False), - "invert": (0, 1, False), - "rotate": (0.0, 30.0, True), - "posterize": (0, 4, False), - "posterize_inc": (0, 4, False), - "solarize": (0, 256, False), - "solarize_inc": (0, 256, False), - "solarize_add": (0, 110, False), - "color": (0.1, 1.9, False), - "contrast": (0.1, 1.9, False), - "brightness": (0.1, 1.9, False), - "sharpness": (0.1, 1.9, False), - "color_inc": (0, 0.9, True), - "contrast_inc": (0, 0.9, True), - "brightness_inc": (0, 0.9, True), - "sharpness_inc": (0, 0.9, True), - "shear_x": (0.0, 0.3, True), - "shear_y": (0.0, 0.3, True), - "trans_x": (0.0, 0.45, True), - "trans_y": (0.0, 0.45, True), -} - - -AUTOAUG_POLICY = [ - # AutoAugment "policy_v0" in form of (op, prob, magnitude), where magnitude <= 1. - [("equalize", 0.8, 0.1), ("shear_y", 0.8, 0.4)], - [("color", 0.4, 0.9), ("equalize", 0.6, 0.3)], - [("color", 0.4, 0.1), ("rotate", 0.6, 0.8)], - [("solarize", 0.8, 0.3), ("equalize", 0.4, 0.7)], - [("solarize", 0.4, 0.2), ("solarize", 0.6, 0.2)], - [("color", 0.2, 0.0), ("equalize", 0.8, 0.8)], - [("equalize", 0.4, 0.8), ("solarize_add", 0.8, 0.3)], - [("shear_x", 0.2, 0.9), ("rotate", 0.6, 0.8)], - [("color", 0.6, 0.1), ("equalize", 1.0, 0.2)], - [("invert", 0.4, 0.9), ("rotate", 0.6, 0.0)], - [("equalize", 1.0, 0.9), ("shear_y", 0.6, 0.3)], - [("color", 0.4, 0.7), ("equalize", 0.6, 0.0)], - [("posterize", 0.4, 0.6), ("auto_contrast", 0.4, 0.7)], - [("solarize", 0.6, 0.8), ("color", 0.6, 0.9)], - [("solarize", 0.2, 0.4), ("rotate", 0.8, 0.9)], - [("rotate", 1.0, 0.7), ("trans_y", 0.8, 0.9)], - [("shear_x", 0.0, 0.0), ("solarize", 0.8, 0.4)], - [("shear_y", 0.8, 0.0), ("color", 0.6, 0.4)], - [("color", 1.0, 0.0), ("rotate", 0.6, 0.2)], - [("equalize", 0.8, 0.4), ("equalize", 0.0, 0.8)], - [("equalize", 1.0, 0.4), ("auto_contrast", 0.6, 0.2)], - [("shear_y", 0.4, 0.7), ("solarize_add", 0.6, 0.7)], - [("posterize", 0.8, 0.2), ("solarize", 0.6, 1.0)], - [("solarize", 0.6, 0.8), ("equalize", 0.6, 0.1)], - [("color", 0.8, 0.6), ("rotate", 0.4, 0.5)], -] - - -RANDAUG_OPS = [ - # RandAugment list of operations using "increasing" transforms. - "auto_contrast", - "equalize", - "invert", - "rotate", - "posterize_inc", - "solarize_inc", - "solarize_add", - "color_inc", - "contrast_inc", - "brightness_inc", - "sharpness_inc", - "shear_x", - "shear_y", - "trans_x", - "trans_y", -] - - -def apply_op(im, op, prob, magnitude): - """Apply the selected op to image with given probability and magnitude.""" - # The magnitude is converted to an absolute value v for an op (some ops use -v or v) - assert 0 <= magnitude <= 1 - assert op in OP_RANGES and op in OP_FUNCTIONS, "unknown op " + op - if prob < 1 and random.random() > prob: - return im - min_v, max_v, negate = OP_RANGES[op] - v = magnitude * (max_v - min_v) + min_v - v = -v if negate and random.random() > 0.5 else v - return OP_FUNCTIONS[op](im, v) - - -def rand_augment(im, magnitude, ops=None, n_ops=2, prob=1.0): - """Applies random augmentation to an image.""" - ops = ops if ops else RANDAUG_OPS - for op in np.random.choice(ops, int(n_ops)): - im = apply_op(im, op, prob, magnitude) - return im - - -def auto_augment(im, policy=None): - """Apply auto augmentation to an image.""" - policy = policy if policy else AUTOAUG_POLICY - for op, prob, magnitude in random.choice(policy): - im = apply_op(im, op, prob, magnitude) - return im - - -def make_augment(augment_str): - """Generate augmentation function from separated parameter string. - The parameter string augment_str may be either "AutoAugment" or "RandAugment". - Undocumented use allows for specifying extra params, e.g. "RandAugment_N2_M0.5".""" - params = augment_str.split("_") - names = {"N": "n_ops", "M": "magnitude", "P": "prob"} - assert params[0] in ["RandAugment", "AutoAugment"] - assert all(p[0] in names for p in params[1:]) - keys = [names[p[0]] for p in params[1:]] - vals = [float(p[1:]) for p in params[1:]] - augment = rand_augment if params[0] == "RandAugment" else auto_augment - return lambda im: augment(im, **dict(zip(keys, vals))) - - -def visualize_ops(im, ops=None, num_steps=10): - """Visualize ops by applying each op by varying amounts.""" - ops = ops if ops else RANDAUG_OPS - w, h, magnitudes = im.size[0], im.size[1], np.linspace(0, 1, num_steps) - output = Image.new("RGB", (w * num_steps, h * len(ops))) - for i, op in enumerate(ops): - for j, m in enumerate(magnitudes): - out = apply_op(im, op, prob=1.0, magnitude=m) - output.paste(out, (j * w, i * h)) - return output - - -def visualize_aug(im, augment=rand_augment, num_trials=10, **kwargs): - """Visualize augmentation by applying random augmentations.""" - w, h = im.size[0], im.size[1] - output = Image.new("RGB", (w * num_trials, h * num_trials)) - for i in range(num_trials): - for j in range(num_trials): - output.paste(augment(im, **kwargs), (j * w, i * h)) - return output +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Lightweight and simple implementation of AutoAugment and RandAugment. + +AutoAugment - https://arxiv.org/abs/1805.09501 +RandAugment - https://arxiv.org/abs/1909.13719 + +http://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/autoaugment.py +Note that the official implementation varies substantially from the papers :-( + +Our AutoAugment policy should be fairly identical to the official AutoAugment policy. +The main difference is we set POSTERIZE_MIN = 1, which avoids degenerate (all 0) images. +Our RandAugment policy differs, and uses transforms that increase in intensity with +increasing magnitude. This allows for a more natural control of the magnitude. That is, +setting magnitude = 0 results in ops that leaves the image unchanged, if possible. +We also set the range of the magnitude to be 0 to 1 to avoid setting a "max level". + +Our implementation is inspired by and uses policies that are the similar to those in: +https://github.com/rwightman/pytorch-image-models/blob/master/timm/data/auto_augment.py +Specifically our implementation can be *numerically identical* as the implementation in +timm if using timm's "v0" policy for AutoAugment and "inc" transforms for RandAugment +and if we set POSTERIZE_MIN = 0 (although as noted our default is POSTERIZE_MIN = 1). +Note that magnitude in our code ranges from 0 to 1 (compared to 0 to 10 in timm). + +Specifically, given the same seeds, the functions from timm: + out_auto = auto_augment_transform("v0", {"interpolation": 2})(im) + out_rand = rand_augment_transform("rand-inc1-n2-m05", {"interpolation": 2})(im) +Are numerically equivalent to: + POSTERIZE_MIN = 0 + out_auto = auto_augment(im) + out_rand = rand_augment(im, prob=0.5, n_ops=2, magnitude=0.5) +Tested as of 10/07/2020. Can alter corresponding params for both and should match. + +Finally, the ops and augmentations can be visualized as follows: + from PIL import Image + import pycls.datasets.augment as augment + im = Image.open("scratch.jpg") + im_ops = augment.visualize_ops(im) + im_rand = augment.visualize_aug(im, augment=augment.rand_augment, magnitude=0.5) + im_auto = augment.visualize_aug(im, augment=augment.auto_augment) + im_ops.show() + im_auto.show() + im_rand.show() +""" + +import random + +import numpy as np +from PIL import Image, ImageEnhance, ImageOps + + +# Minimum value for posterize (0 in EfficientNet implementation) +POSTERIZE_MIN = 1 + +# Parameters for affine warping and rotation +WARP_PARAMS = {"fillcolor": (128, 128, 128), "resample": Image.BILINEAR} + + +def affine_warp(im, data): + """Applies affine transform to image.""" + return im.transform(im.size, Image.AFFINE, data, **WARP_PARAMS) + + +OP_FUNCTIONS = { + # Each op takes an image x and a level v and returns an augmented image. + "auto_contrast": lambda x, _: ImageOps.autocontrast(x), + "equalize": lambda x, _: ImageOps.equalize(x), + "invert": lambda x, _: ImageOps.invert(x), + "rotate": lambda x, v: x.rotate(v, **WARP_PARAMS), + "posterize": lambda x, v: ImageOps.posterize(x, max(POSTERIZE_MIN, int(v))), + "posterize_inc": lambda x, v: ImageOps.posterize(x, max(POSTERIZE_MIN, 4 - int(v))), + "solarize": lambda x, v: x.point(lambda i: i if i < int(v) else 255 - i), + "solarize_inc": lambda x, v: x.point(lambda i: i if i < 256 - v else 255 - i), + "solarize_add": lambda x, v: x.point(lambda i: min(255, v + i) if i < 128 else i), + "color": lambda x, v: ImageEnhance.Color(x).enhance(v), + "contrast": lambda x, v: ImageEnhance.Contrast(x).enhance(v), + "brightness": lambda x, v: ImageEnhance.Brightness(x).enhance(v), + "sharpness": lambda x, v: ImageEnhance.Sharpness(x).enhance(v), + "color_inc": lambda x, v: ImageEnhance.Color(x).enhance(1 + v), + "contrast_inc": lambda x, v: ImageEnhance.Contrast(x).enhance(1 + v), + "brightness_inc": lambda x, v: ImageEnhance.Brightness(x).enhance(1 + v), + "sharpness_inc": lambda x, v: ImageEnhance.Sharpness(x).enhance(1 + v), + "shear_x": lambda x, v: affine_warp(x, (1, v, 0, 0, 1, 0)), + "shear_y": lambda x, v: affine_warp(x, (1, 0, 0, v, 1, 0)), + "trans_x": lambda x, v: affine_warp(x, (1, 0, v * x.size[0], 0, 1, 0)), + "trans_y": lambda x, v: affine_warp(x, (1, 0, 0, 0, 1, v * x.size[1])), +} + + +OP_RANGES = { + # Ranges for each op in the form of a (min, max, negate). + "auto_contrast": (0, 1, False), + "equalize": (0, 1, False), + "invert": (0, 1, False), + "rotate": (0.0, 30.0, True), + "posterize": (0, 4, False), + "posterize_inc": (0, 4, False), + "solarize": (0, 256, False), + "solarize_inc": (0, 256, False), + "solarize_add": (0, 110, False), + "color": (0.1, 1.9, False), + "contrast": (0.1, 1.9, False), + "brightness": (0.1, 1.9, False), + "sharpness": (0.1, 1.9, False), + "color_inc": (0, 0.9, True), + "contrast_inc": (0, 0.9, True), + "brightness_inc": (0, 0.9, True), + "sharpness_inc": (0, 0.9, True), + "shear_x": (0.0, 0.3, True), + "shear_y": (0.0, 0.3, True), + "trans_x": (0.0, 0.45, True), + "trans_y": (0.0, 0.45, True), +} + + +AUTOAUG_POLICY = [ + # AutoAugment "policy_v0" in form of (op, prob, magnitude), where magnitude <= 1. + [("equalize", 0.8, 0.1), ("shear_y", 0.8, 0.4)], + [("color", 0.4, 0.9), ("equalize", 0.6, 0.3)], + [("color", 0.4, 0.1), ("rotate", 0.6, 0.8)], + [("solarize", 0.8, 0.3), ("equalize", 0.4, 0.7)], + [("solarize", 0.4, 0.2), ("solarize", 0.6, 0.2)], + [("color", 0.2, 0.0), ("equalize", 0.8, 0.8)], + [("equalize", 0.4, 0.8), ("solarize_add", 0.8, 0.3)], + [("shear_x", 0.2, 0.9), ("rotate", 0.6, 0.8)], + [("color", 0.6, 0.1), ("equalize", 1.0, 0.2)], + [("invert", 0.4, 0.9), ("rotate", 0.6, 0.0)], + [("equalize", 1.0, 0.9), ("shear_y", 0.6, 0.3)], + [("color", 0.4, 0.7), ("equalize", 0.6, 0.0)], + [("posterize", 0.4, 0.6), ("auto_contrast", 0.4, 0.7)], + [("solarize", 0.6, 0.8), ("color", 0.6, 0.9)], + [("solarize", 0.2, 0.4), ("rotate", 0.8, 0.9)], + [("rotate", 1.0, 0.7), ("trans_y", 0.8, 0.9)], + [("shear_x", 0.0, 0.0), ("solarize", 0.8, 0.4)], + [("shear_y", 0.8, 0.0), ("color", 0.6, 0.4)], + [("color", 1.0, 0.0), ("rotate", 0.6, 0.2)], + [("equalize", 0.8, 0.4), ("equalize", 0.0, 0.8)], + [("equalize", 1.0, 0.4), ("auto_contrast", 0.6, 0.2)], + [("shear_y", 0.4, 0.7), ("solarize_add", 0.6, 0.7)], + [("posterize", 0.8, 0.2), ("solarize", 0.6, 1.0)], + [("solarize", 0.6, 0.8), ("equalize", 0.6, 0.1)], + [("color", 0.8, 0.6), ("rotate", 0.4, 0.5)], +] + + +RANDAUG_OPS = [ + # RandAugment list of operations using "increasing" transforms. + "auto_contrast", + "equalize", + "invert", + "rotate", + "posterize_inc", + "solarize_inc", + "solarize_add", + "color_inc", + "contrast_inc", + "brightness_inc", + "sharpness_inc", + "shear_x", + "shear_y", + "trans_x", + "trans_y", +] + + +def apply_op(im, op, prob, magnitude): + """Apply the selected op to image with given probability and magnitude.""" + # The magnitude is converted to an absolute value v for an op (some ops use -v or v) + assert 0 <= magnitude <= 1 + assert op in OP_RANGES and op in OP_FUNCTIONS, "unknown op " + op + if prob < 1 and random.random() > prob: + return im + min_v, max_v, negate = OP_RANGES[op] + v = magnitude * (max_v - min_v) + min_v + v = -v if negate and random.random() > 0.5 else v + return OP_FUNCTIONS[op](im, v) + + +def rand_augment(im, magnitude, ops=None, n_ops=2, prob=1.0): + """Applies random augmentation to an image.""" + ops = ops if ops else RANDAUG_OPS + for op in np.random.choice(ops, int(n_ops)): + im = apply_op(im, op, prob, magnitude) + return im + + +def auto_augment(im, policy=None): + """Apply auto augmentation to an image.""" + policy = policy if policy else AUTOAUG_POLICY + for op, prob, magnitude in random.choice(policy): + im = apply_op(im, op, prob, magnitude) + return im + + +def make_augment(augment_str): + """Generate augmentation function from separated parameter string. + The parameter string augment_str may be either "AutoAugment" or "RandAugment". + Undocumented use allows for specifying extra params, e.g. "RandAugment_N2_M0.5".""" + params = augment_str.split("_") + names = {"N": "n_ops", "M": "magnitude", "P": "prob"} + assert params[0] in ["RandAugment", "AutoAugment"] + assert all(p[0] in names for p in params[1:]) + keys = [names[p[0]] for p in params[1:]] + vals = [float(p[1:]) for p in params[1:]] + augment = rand_augment if params[0] == "RandAugment" else auto_augment + return lambda im: augment(im, **dict(zip(keys, vals))) + + +def visualize_ops(im, ops=None, num_steps=10): + """Visualize ops by applying each op by varying amounts.""" + ops = ops if ops else RANDAUG_OPS + w, h, magnitudes = im.size[0], im.size[1], np.linspace(0, 1, num_steps) + output = Image.new("RGB", (w * num_steps, h * len(ops))) + for i, op in enumerate(ops): + for j, m in enumerate(magnitudes): + out = apply_op(im, op, prob=1.0, magnitude=m) + output.paste(out, (j * w, i * h)) + return output + + +def visualize_aug(im, augment=rand_augment, num_trials=10, **kwargs): + """Visualize augmentation by applying random augmentations.""" + w, h = im.size[0], im.size[1] + output = Image.new("RGB", (w * num_trials, h * num_trials)) + for i in range(num_trials): + for j in range(num_trials): + output.paste(augment(im, **kwargs), (j * w, i * h)) + return output diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/cifar10.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/cifar10.py index f2ed7434b2e6050b205406c4c75119016f497425..9922ca032af8bb394ca3eeebea25dc54d2a58668 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/cifar10.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/cifar10.py @@ -1,95 +1,95 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""CIFAR10 dataset.""" - -import os -import pickle - -import numpy as np -import pycls.core.logging as logging -import torch.utils.data -from iopath.common.file_io import g_pathmgr -from pycls.core.config import cfg - - -logger = logging.get_logger(__name__) - -# Per-channel mean and standard deviation values on CIFAR -_MEAN = [125.3, 123.0, 113.9] -_STD = [63.0, 62.1, 66.7] - - -class Cifar10(torch.utils.data.Dataset): - """CIFAR-10 dataset.""" - - def __init__(self, data_path, split): - assert g_pathmgr.exists(data_path), "Data path '{}' not found".format(data_path) - splits = ["train", "test"] - assert split in splits, "Split '{}' not supported for cifar".format(split) - logger.info("Constructing CIFAR-10 {}...".format(split)) - self._im_size = cfg.TRAIN.IM_SIZE - self._data_path, self._split = data_path, split - self._inputs, self._labels = self._load_data() - - def _load_data(self): - """Loads data into memory.""" - logger.info("{} data path: {}".format(self._split, self._data_path)) - # Compute data batch names - if self._split == "train": - batch_names = ["data_batch_{}".format(i) for i in range(1, 6)] - else: - batch_names = ["test_batch"] - # Load data batches - inputs, labels = [], [] - for batch_name in batch_names: - batch_path = os.path.join(self._data_path, batch_name) - with g_pathmgr.open(batch_path, "rb") as f: - data = pickle.load(f, encoding="bytes") - inputs.append(data[b"data"]) - labels += data[b"labels"] - # Combine and reshape the inputs - inputs = np.vstack(inputs).astype(np.float32) - inputs = inputs.reshape((-1, 3, self._im_size, self._im_size)) - return inputs, labels - - def _prepare_im(self, im): - """Prepares the image for network input.""" - for i in range(3): - # Perform per-channel normalization on CHW image - im[i] = (im[i] - _MEAN[i]) / _STD[i] - if self._split == "train": - # Randomly flip and crop center patch from CHW image - size = self._im_size - im = im[:, :, ::-1] if np.random.uniform() < 0.5 else im - im = np.pad(im, ((0, 0), (4, 4), (4, 4)), mode="constant") - y = np.random.randint(0, im.shape[1] - size) - x = np.random.randint(0, im.shape[2] - size) - im = im[:, y : (y + size), x : (x + size)] - return im - - def __getitem__(self, index): - im, label = self._inputs[index, ...].copy(), self._labels[index] - im = self._prepare_im(im) - return im, label - - def __len__(self): - return self._inputs.shape[0] +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""CIFAR10 dataset.""" + +import os +import pickle + +import numpy as np +import pycls.core.logging as logging +import torch.utils.data +from iopath.common.file_io import g_pathmgr +from pycls.core.config import cfg + + +logger = logging.get_logger(__name__) + +# Per-channel mean and standard deviation values on CIFAR +_MEAN = [125.3, 123.0, 113.9] +_STD = [63.0, 62.1, 66.7] + + +class Cifar10(torch.utils.data.Dataset): + """CIFAR-10 dataset.""" + + def __init__(self, data_path, split): + assert g_pathmgr.exists(data_path), "Data path '{}' not found".format(data_path) + splits = ["train", "test"] + assert split in splits, "Split '{}' not supported for cifar".format(split) + logger.info("Constructing CIFAR-10 {}...".format(split)) + self._im_size = cfg.TRAIN.IM_SIZE + self._data_path, self._split = data_path, split + self._inputs, self._labels = self._load_data() + + def _load_data(self): + """Loads data into memory.""" + logger.info("{} data path: {}".format(self._split, self._data_path)) + # Compute data batch names + if self._split == "train": + batch_names = ["data_batch_{}".format(i) for i in range(1, 6)] + else: + batch_names = ["test_batch"] + # Load data batches + inputs, labels = [], [] + for batch_name in batch_names: + batch_path = os.path.join(self._data_path, batch_name) + with g_pathmgr.open(batch_path, "rb") as f: + data = pickle.load(f, encoding="bytes") + inputs.append(data[b"data"]) + labels += data[b"labels"] + # Combine and reshape the inputs + inputs = np.vstack(inputs).astype(np.float32) + inputs = inputs.reshape((-1, 3, self._im_size, self._im_size)) + return inputs, labels + + def _prepare_im(self, im): + """Prepares the image for network input.""" + for i in range(3): + # Perform per-channel normalization on CHW image + im[i] = (im[i] - _MEAN[i]) / _STD[i] + if self._split == "train": + # Randomly flip and crop center patch from CHW image + size = self._im_size + im = im[:, :, ::-1] if np.random.uniform() < 0.5 else im + im = np.pad(im, ((0, 0), (4, 4), (4, 4)), mode="constant") + y = np.random.randint(0, im.shape[1] - size) + x = np.random.randint(0, im.shape[2] - size) + im = im[:, y : (y + size), x : (x + size)] + return im + + def __getitem__(self, index): + im, label = self._inputs[index, ...].copy(), self._labels[index] + im = self._prepare_im(im) + return im, label + + def __len__(self): + return self._inputs.shape[0] diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/imagenet.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/imagenet.py index 8234ec4a4032308941ca61f1148d8aed5bd058cf..31edfb723f19cb7c9329e73a4b4e16fba02af144 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/imagenet.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/imagenet.py @@ -1,116 +1,116 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""ImageNet dataset.""" - -import os -import re - -import cv2 -cv2.setNumThreads(1) -import numpy as np -import pycls.core.logging as logging -import pycls.datasets.transforms as transforms -import torch.utils.data -from pycls.core.config import cfg - - -logger = logging.get_logger(__name__) - -# Per-channel mean and standard deviation values on ImageNet (in RGB order) -# https://github.com/facebookarchive/fb.resnet.torch/blob/master/datasets/imagenet.lua -_MEAN = [0.485, 0.456, 0.406] -_STD = [0.229, 0.224, 0.225] - -# Constants for lighting normalization on ImageNet (in RGB order) -# https://github.com/facebookarchive/fb.resnet.torch/blob/master/datasets/imagenet.lua -_EIG_VALS = [[0.2175, 0.0188, 0.0045]] -_EIG_VECS = [ - [-0.5675, 0.7192, 0.4009], - [-0.5808, -0.0045, -0.8140], - [-0.5836, -0.6948, 0.4203], -] - - -class ImageNet(torch.utils.data.Dataset): - """ImageNet dataset.""" - - def __init__(self, data_path, split): - assert os.path.exists(data_path), "Data path '{}' not found".format(data_path) - splits = ["train", "val"] - assert split in splits, "Split '{}' not supported for ImageNet".format(split) - logger.info("Constructing ImageNet {}...".format(split)) - self._data_path, self._split = data_path, split - self._construct_imdb() - - def _construct_imdb(self): - """Constructs the imdb.""" - # Compile the split data path - split_path = os.path.join(self._data_path, self._split) - logger.info("{} data path: {}".format(self._split, split_path)) - # Images are stored per class in subdirs (format: n) - split_files = os.listdir(split_path) - self._class_ids = sorted(f for f in split_files if re.match(r"^n[0-9]+$", f)) - # Map ImageNet class ids to contiguous ids - self._class_id_cont_id = {v: i for i, v in enumerate(self._class_ids)} - # Construct the image db - self._imdb = [] - for class_id in self._class_ids: - cont_id = self._class_id_cont_id[class_id] - im_dir = os.path.join(split_path, class_id) - for im_name in os.listdir(im_dir): - im_path = os.path.join(im_dir, im_name) - self._imdb.append({"im_path": im_path, "class": cont_id}) - logger.info("Number of images: {}".format(len(self._imdb))) - logger.info("Number of classes: {}".format(len(self._class_ids))) - - def _prepare_im(self, im): - """Prepares the image for network input (HWC/BGR/int -> CHW/BGR/float).""" - # Convert HWC/BGR/int to HWC/RGB/float format for applying transforms - im = im[:, :, ::-1].astype(np.float32) / 255 - # Train and test setups differ - train_size, test_size = cfg.TRAIN.IM_SIZE, cfg.TEST.IM_SIZE - if self._split == "train": - # For training use random_sized_crop, horizontal_flip, augment, lighting - im = transforms.random_sized_crop(im, train_size) - im = transforms.horizontal_flip(im, prob=0.5) - im = transforms.augment(im, cfg.TRAIN.AUGMENT) - im = transforms.lighting(im, cfg.TRAIN.PCA_STD, _EIG_VALS, _EIG_VECS) - else: - # For testing use scale and center crop - im = transforms.scale_and_center_crop(im, test_size, train_size) - # For training and testing use color normalization - im = transforms.color_norm(im, _MEAN, _STD) - # Convert HWC/RGB/float to CHW/BGR/float format - im = np.ascontiguousarray(im[:, :, ::-1].transpose([2, 0, 1])) - return im - - def __getitem__(self, index): - # Load the image - im = cv2.imread(self._imdb[index]["im_path"]) - # Prepare the image for training / testing - im = self._prepare_im(im) - # Retrieve the label - label = self._imdb[index]["class"] - return im, label - - def __len__(self): - return len(self._imdb) +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""ImageNet dataset.""" + +import os +import re + +import cv2 +cv2.setNumThreads(1) +import numpy as np +import pycls.core.logging as logging +import pycls.datasets.transforms as transforms +import torch.utils.data +from pycls.core.config import cfg + + +logger = logging.get_logger(__name__) + +# Per-channel mean and standard deviation values on ImageNet (in RGB order) +# https://github.com/facebookarchive/fb.resnet.torch/blob/master/datasets/imagenet.lua +_MEAN = [0.485, 0.456, 0.406] +_STD = [0.229, 0.224, 0.225] + +# Constants for lighting normalization on ImageNet (in RGB order) +# https://github.com/facebookarchive/fb.resnet.torch/blob/master/datasets/imagenet.lua +_EIG_VALS = [[0.2175, 0.0188, 0.0045]] +_EIG_VECS = [ + [-0.5675, 0.7192, 0.4009], + [-0.5808, -0.0045, -0.8140], + [-0.5836, -0.6948, 0.4203], +] + + +class ImageNet(torch.utils.data.Dataset): + """ImageNet dataset.""" + + def __init__(self, data_path, split): + assert os.path.exists(data_path), "Data path '{}' not found".format(data_path) + splits = ["train", "val"] + assert split in splits, "Split '{}' not supported for ImageNet".format(split) + logger.info("Constructing ImageNet {}...".format(split)) + self._data_path, self._split = data_path, split + self._construct_imdb() + + def _construct_imdb(self): + """Constructs the imdb.""" + # Compile the split data path + split_path = os.path.join(self._data_path, self._split) + logger.info("{} data path: {}".format(self._split, split_path)) + # Images are stored per class in subdirs (format: n) + split_files = os.listdir(split_path) + self._class_ids = sorted(f for f in split_files if re.match(r"^n[0-9]+$", f)) + # Map ImageNet class ids to contiguous ids + self._class_id_cont_id = {v: i for i, v in enumerate(self._class_ids)} + # Construct the image db + self._imdb = [] + for class_id in self._class_ids: + cont_id = self._class_id_cont_id[class_id] + im_dir = os.path.join(split_path, class_id) + for im_name in os.listdir(im_dir): + im_path = os.path.join(im_dir, im_name) + self._imdb.append({"im_path": im_path, "class": cont_id}) + logger.info("Number of images: {}".format(len(self._imdb))) + logger.info("Number of classes: {}".format(len(self._class_ids))) + + def _prepare_im(self, im): + """Prepares the image for network input (HWC/BGR/int -> CHW/BGR/float).""" + # Convert HWC/BGR/int to HWC/RGB/float format for applying transforms + im = im[:, :, ::-1].astype(np.float32) / 255 + # Train and test setups differ + train_size, test_size = cfg.TRAIN.IM_SIZE, cfg.TEST.IM_SIZE + if self._split == "train": + # For training use random_sized_crop, horizontal_flip, augment, lighting + im = transforms.random_sized_crop(im, train_size) + im = transforms.horizontal_flip(im, prob=0.5) + im = transforms.augment(im, cfg.TRAIN.AUGMENT) + im = transforms.lighting(im, cfg.TRAIN.PCA_STD, _EIG_VALS, _EIG_VECS) + else: + # For testing use scale and center crop + im = transforms.scale_and_center_crop(im, test_size, train_size) + # For training and testing use color normalization + im = transforms.color_norm(im, _MEAN, _STD) + # Convert HWC/RGB/float to CHW/BGR/float format + im = np.ascontiguousarray(im[:, :, ::-1].transpose([2, 0, 1])) + return im + + def __getitem__(self, index): + # Load the image + im = cv2.imread(self._imdb[index]["im_path"]) + # Prepare the image for training / testing + im = self._prepare_im(im) + # Retrieve the label + label = self._imdb[index]["class"] + return im, label + + def __len__(self): + return len(self._imdb) diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/loader.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/loader.py index 72d411ddb141d9e752035d19d53da0aac4dbde33..6b772eccd4e825de41d28f091593cb6568af1889 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/loader.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/loader.py @@ -1,96 +1,96 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Data loader.""" - -import os - -import torch -from pycls.core.config import cfg -from pycls.datasets.cifar10 import Cifar10 -from pycls.datasets.imagenet import ImageNet -from torch.utils.data.distributed import DistributedSampler -from torch.utils.data.sampler import RandomSampler - - -# Supported datasets -_DATASETS = {"cifar10": Cifar10, "imagenet": ImageNet} - -# Default data directory (/path/pycls/pycls/datasets/data) -_DATA_DIR = "./pycls/datasets/data" - -# Relative data paths to default data directory -_PATHS = {"cifar10": "cifar10", "imagenet": "imagenet"} - - -def _construct_loader(dataset_name, split, batch_size, shuffle, drop_last): - """Constructs the data loader for the given dataset.""" - err_str = "Dataset '{}' not supported".format(dataset_name) - assert dataset_name in _DATASETS and dataset_name in _PATHS, err_str - # Retrieve the data path for the dataset - data_path = os.path.join(_DATA_DIR, _PATHS[dataset_name]) - # Construct the dataset - dataset = _DATASETS[dataset_name](data_path, split) - # Create a sampler for multi-process training - sampler = DistributedSampler(dataset) if cfg.NUM_GPUS > 1 else None - # Create a loader - loader = torch.utils.data.DataLoader( - dataset, - batch_size=batch_size, - shuffle=(False if sampler else shuffle), - sampler=sampler, - num_workers=cfg.DATA_LOADER.NUM_WORKERS, - pin_memory=cfg.DATA_LOADER.PIN_MEMORY, - drop_last=drop_last, - ) - return loader - - -def construct_train_loader(): - """Train loader wrapper.""" - return _construct_loader( - dataset_name=cfg.TRAIN.DATASET, - split=cfg.TRAIN.SPLIT, - batch_size=int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS), - shuffle=True, - drop_last=True, - ) - - -def construct_test_loader(): - """Test loader wrapper.""" - return _construct_loader( - dataset_name=cfg.TEST.DATASET, - split=cfg.TEST.SPLIT, - batch_size=int(cfg.TEST.BATCH_SIZE / cfg.NUM_GPUS), - shuffle=False, - drop_last=False, - ) - - -def shuffle(loader, cur_epoch): - """"Shuffles the data.""" - err_str = "Sampler type '{}' not supported".format(type(loader.sampler)) - assert isinstance(loader.sampler, (RandomSampler, DistributedSampler)), err_str - # RandomSampler handles shuffling automatically - if isinstance(loader.sampler, DistributedSampler): - # DistributedSampler shuffles data based on epoch - loader.sampler.set_epoch(cur_epoch) +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data loader.""" + +import os + +import torch +from pycls.core.config import cfg +from pycls.datasets.cifar10 import Cifar10 +from pycls.datasets.imagenet import ImageNet +from torch.utils.data.distributed import DistributedSampler +from torch.utils.data.sampler import RandomSampler + + +# Supported datasets +_DATASETS = {"cifar10": Cifar10, "imagenet": ImageNet} + +# Default data directory (/path/pycls/pycls/datasets/data) +_DATA_DIR = "./pycls/datasets/data" + +# Relative data paths to default data directory +_PATHS = {"cifar10": "cifar10", "imagenet": "imagenet"} + + +def _construct_loader(dataset_name, split, batch_size, shuffle, drop_last): + """Constructs the data loader for the given dataset.""" + err_str = "Dataset '{}' not supported".format(dataset_name) + assert dataset_name in _DATASETS and dataset_name in _PATHS, err_str + # Retrieve the data path for the dataset + data_path = os.path.join(_DATA_DIR, _PATHS[dataset_name]) + # Construct the dataset + dataset = _DATASETS[dataset_name](data_path, split) + # Create a sampler for multi-process training + sampler = DistributedSampler(dataset) if cfg.NUM_GPUS > 1 else None + # Create a loader + loader = torch.utils.data.DataLoader( + dataset, + batch_size=batch_size, + shuffle=(False if sampler else shuffle), + sampler=sampler, + num_workers=cfg.DATA_LOADER.NUM_WORKERS, + pin_memory=cfg.DATA_LOADER.PIN_MEMORY, + drop_last=drop_last, + ) + return loader + + +def construct_train_loader(): + """Train loader wrapper.""" + return _construct_loader( + dataset_name=cfg.TRAIN.DATASET, + split=cfg.TRAIN.SPLIT, + batch_size=int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS), + shuffle=True, + drop_last=True, + ) + + +def construct_test_loader(): + """Test loader wrapper.""" + return _construct_loader( + dataset_name=cfg.TEST.DATASET, + split=cfg.TEST.SPLIT, + batch_size=int(cfg.TEST.BATCH_SIZE / cfg.NUM_GPUS), + shuffle=False, + drop_last=False, + ) + + +def shuffle(loader, cur_epoch): + """"Shuffles the data.""" + err_str = "Sampler type '{}' not supported".format(type(loader.sampler)) + assert isinstance(loader.sampler, (RandomSampler, DistributedSampler)), err_str + # RandomSampler handles shuffling automatically + if isinstance(loader.sampler, DistributedSampler): + # DistributedSampler shuffles data based on epoch + loader.sampler.set_epoch(cur_epoch) diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/transforms.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/transforms.py index 4af2bcddb7fa21c25ab71662475304d5af92e78f..cd63be489ad00fe1eeb51c18e8b26bc6304d3402 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/transforms.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/datasets/transforms.py @@ -1,94 +1,94 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Image transformations on HWC float images with RGB channel order.""" - -from math import ceil, sqrt - -import cv2 -import numpy as np -from PIL import Image -from pycls.datasets.augment import make_augment - - -def scale_and_center_crop(im, scale_size, crop_size): - """Performs scaling and center cropping (used for testing).""" - h, w = im.shape[:2] - if w < h and w != scale_size: - w, h = scale_size, int(h / w * scale_size) - im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) - elif h <= w and h != scale_size: - w, h = int(w / h * scale_size), scale_size - im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) - x = ceil((w - crop_size) / 2) - y = ceil((h - crop_size) / 2) - return im[y : (y + crop_size), x : (x + crop_size), :] - - -def random_sized_crop(im, size, area_frac=0.08, max_iter=10): - """Performs Inception-style cropping (used for training).""" - h, w = im.shape[:2] - area = h * w - for _ in range(max_iter): - target_area = np.random.uniform(area_frac, 1.0) * area - aspect_ratio = np.random.uniform(3.0 / 4.0, 4.0 / 3.0) - w_crop = round(sqrt(target_area * aspect_ratio)) - h_crop = round(sqrt(target_area / aspect_ratio)) - if np.random.uniform() < 0.5: - w_crop, h_crop = h_crop, w_crop - if h_crop <= h and w_crop <= w: - y = 0 if h_crop == h else np.random.randint(0, h - h_crop) - x = 0 if w_crop == w else np.random.randint(0, w - w_crop) - im = im[y : (y + h_crop), x : (x + w_crop), :] - return cv2.resize(im, (size, size), interpolation=cv2.INTER_LINEAR) - return scale_and_center_crop(im, size, size) - - -def horizontal_flip(im, prob=0.5): - """Performs horizontal flip (used for training).""" - return im[:, ::-1, :] if np.random.uniform() < prob else im - - -def augment(im, augment_str): - """Augments image (used for training).""" - if augment_str: - im = Image.fromarray((im * 255).astype(np.uint8)) - im = make_augment(augment_str)(im) - im = np.asarray(im).astype(np.float32) / 255 - return im - - -def lighting(im, alpha_std, eig_val, eig_vec): - """Performs AlexNet-style PCA jitter (used for training).""" - alpha = np.random.normal(0, alpha_std, size=(1, 3)) - alpha = np.repeat(alpha, 3, axis=0) - eig_val = np.repeat(np.array(eig_val), 3, axis=0) - rgb = np.sum(np.array(eig_vec) * alpha * eig_val, axis=1) - for i in range(3): - im[:, :, i] = im[:, :, i] + rgb[i] - return im - - -def color_norm(im, mean, std): - """Performs per-channel normalization (used for training and testing).""" - for i in range(3): - im[:, :, i] = (im[:, :, i] - mean[i]) / std[i] - return im +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Image transformations on HWC float images with RGB channel order.""" + +from math import ceil, sqrt + +import cv2 +import numpy as np +from PIL import Image +from pycls.datasets.augment import make_augment + + +def scale_and_center_crop(im, scale_size, crop_size): + """Performs scaling and center cropping (used for testing).""" + h, w = im.shape[:2] + if w < h and w != scale_size: + w, h = scale_size, int(h / w * scale_size) + im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) + elif h <= w and h != scale_size: + w, h = int(w / h * scale_size), scale_size + im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) + x = ceil((w - crop_size) / 2) + y = ceil((h - crop_size) / 2) + return im[y : (y + crop_size), x : (x + crop_size), :] + + +def random_sized_crop(im, size, area_frac=0.08, max_iter=10): + """Performs Inception-style cropping (used for training).""" + h, w = im.shape[:2] + area = h * w + for _ in range(max_iter): + target_area = np.random.uniform(area_frac, 1.0) * area + aspect_ratio = np.random.uniform(3.0 / 4.0, 4.0 / 3.0) + w_crop = round(sqrt(target_area * aspect_ratio)) + h_crop = round(sqrt(target_area / aspect_ratio)) + if np.random.uniform() < 0.5: + w_crop, h_crop = h_crop, w_crop + if h_crop <= h and w_crop <= w: + y = 0 if h_crop == h else np.random.randint(0, h - h_crop) + x = 0 if w_crop == w else np.random.randint(0, w - w_crop) + im = im[y : (y + h_crop), x : (x + w_crop), :] + return cv2.resize(im, (size, size), interpolation=cv2.INTER_LINEAR) + return scale_and_center_crop(im, size, size) + + +def horizontal_flip(im, prob=0.5): + """Performs horizontal flip (used for training).""" + return im[:, ::-1, :] if np.random.uniform() < prob else im + + +def augment(im, augment_str): + """Augments image (used for training).""" + if augment_str: + im = Image.fromarray((im * 255).astype(np.uint8)) + im = make_augment(augment_str)(im) + im = np.asarray(im).astype(np.float32) / 255 + return im + + +def lighting(im, alpha_std, eig_val, eig_vec): + """Performs AlexNet-style PCA jitter (used for training).""" + alpha = np.random.normal(0, alpha_std, size=(1, 3)) + alpha = np.repeat(alpha, 3, axis=0) + eig_val = np.repeat(np.array(eig_val), 3, axis=0) + rgb = np.sum(np.array(eig_vec) * alpha * eig_val, axis=1) + for i in range(3): + im[:, :, i] = im[:, :, i] + rgb[i] + return im + + +def color_norm(im, mean, std): + """Performs per-channel normalization (used for training and testing).""" + for i in range(3): + im[:, :, i] = (im[:, :, i] - mean[i]) / std[i] + return im diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/__init__.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/__init__.py index 1f38943d17290e09b00f8976fd0aae38990a7886..9995f91fe8876cf4fe8e6f96163d6dc61083d2ad 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/__init__.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/__init__.py @@ -1,10 +1,10 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -"""Expose model zoo constructors.""" - -from pycls.models.model_zoo import effnet, regnetx, regnety, resnet, resnext +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +"""Expose model zoo constructors.""" + +from pycls.models.model_zoo import effnet, regnetx, regnety, resnet, resnext diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/anynet.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/anynet.py index 3b34dc2c31fc0c6deb6ca808d7175052675f79d9..f25ef7fe227fc10fa3e36621c9ecbe2679ca3972 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/anynet.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/anynet.py @@ -1,375 +1,375 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AnyNet models.""" - -from pycls.core.config import cfg -from pycls.models.blocks import ( - SE, - activation, - conv2d, - conv2d_cx, - gap2d, - gap2d_cx, - init_weights, - linear, - linear_cx, - norm2d, - norm2d_cx, - pool2d, - pool2d_cx, -) -from torch.nn import Module - - -def get_stem_fun(stem_type): - """Retrieves the stem function by name.""" - stem_funs = { - "res_stem_cifar": ResStemCifar, - "res_stem_in": ResStem, - "simple_stem_in": SimpleStem, - } - err_str = "Stem type '{}' not supported" - assert stem_type in stem_funs.keys(), err_str.format(stem_type) - return stem_funs[stem_type] - - -def get_block_fun(block_type): - """Retrieves the block function by name.""" - block_funs = { - "vanilla_block": VanillaBlock, - "res_basic_block": ResBasicBlock, - "res_bottleneck_block": ResBottleneckBlock, - } - err_str = "Block type '{}' not supported" - assert block_type in block_funs.keys(), err_str.format(block_type) - return block_funs[block_type] - - -class AnyHead(Module): - """AnyNet head: AvgPool, 1x1.""" - - def __init__(self, w_in, num_classes): - super(AnyHead, self).__init__() - self.avg_pool = gap2d(w_in) - self.fc = linear(w_in, num_classes, bias=True) - - def forward(self, x): - x = self.avg_pool(x) - x = x.view(x.size(0), -1) - x = self.fc(x) - return x - - @staticmethod - def complexity(cx, w_in, num_classes): - cx = gap2d_cx(cx, w_in) - cx = linear_cx(cx, w_in, num_classes, bias=True) - return cx - - -class VanillaBlock(Module): - """Vanilla block: [3x3 conv, BN, Relu] x2.""" - - def __init__(self, w_in, w_out, stride, _params): - super(VanillaBlock, self).__init__() - self.a = conv2d(w_in, w_out, 3, stride=stride) - self.a_bn = norm2d(w_out) - self.a_af = activation() - self.b = conv2d(w_out, w_out, 3) - self.b_bn = norm2d(w_out) - self.b_af = activation() - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - @staticmethod - def complexity(cx, w_in, w_out, stride, _params): - cx = conv2d_cx(cx, w_in, w_out, 3, stride=stride) - cx = norm2d_cx(cx, w_out) - cx = conv2d_cx(cx, w_out, w_out, 3) - cx = norm2d_cx(cx, w_out) - return cx - - -class BasicTransform(Module): - """Basic transformation: [3x3 conv, BN, Relu] x2.""" - - def __init__(self, w_in, w_out, stride, _params): - super(BasicTransform, self).__init__() - self.a = conv2d(w_in, w_out, 3, stride=stride) - self.a_bn = norm2d(w_out) - self.a_af = activation() - self.b = conv2d(w_out, w_out, 3) - self.b_bn = norm2d(w_out) - self.b_bn.final_bn = True - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - @staticmethod - def complexity(cx, w_in, w_out, stride, _params): - cx = conv2d_cx(cx, w_in, w_out, 3, stride=stride) - cx = norm2d_cx(cx, w_out) - cx = conv2d_cx(cx, w_out, w_out, 3) - cx = norm2d_cx(cx, w_out) - return cx - - -class ResBasicBlock(Module): - """Residual basic block: x + f(x), f = basic transform.""" - - def __init__(self, w_in, w_out, stride, params): - super(ResBasicBlock, self).__init__() - self.proj, self.bn = None, None - if (w_in != w_out) or (stride != 1): - self.proj = conv2d(w_in, w_out, 1, stride=stride) - self.bn = norm2d(w_out) - self.f = BasicTransform(w_in, w_out, stride, params) - self.af = activation() - - def forward(self, x): - x_p = self.bn(self.proj(x)) if self.proj else x - return self.af(x_p + self.f(x)) - - @staticmethod - def complexity(cx, w_in, w_out, stride, params): - if (w_in != w_out) or (stride != 1): - h, w = cx["h"], cx["w"] - cx = conv2d_cx(cx, w_in, w_out, 1, stride=stride) - cx = norm2d_cx(cx, w_out) - cx["h"], cx["w"] = h, w - cx = BasicTransform.complexity(cx, w_in, w_out, stride, params) - return cx - - -class BottleneckTransform(Module): - """Bottleneck transformation: 1x1, 3x3 [+SE], 1x1.""" - - def __init__(self, w_in, w_out, stride, params): - super(BottleneckTransform, self).__init__() - w_b = int(round(w_out * params["bot_mul"])) - w_se = int(round(w_in * params["se_r"])) - groups = w_b // params["group_w"] - self.a = conv2d(w_in, w_b, 1) - self.a_bn = norm2d(w_b) - self.a_af = activation() - self.b = conv2d(w_b, w_b, 3, stride=stride, groups=groups) - self.b_bn = norm2d(w_b) - self.b_af = activation() - self.se = SE(w_b, w_se) if w_se else None - self.c = conv2d(w_b, w_out, 1) - self.c_bn = norm2d(w_out) - self.c_bn.final_bn = True - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - @staticmethod - def complexity(cx, w_in, w_out, stride, params): - w_b = int(round(w_out * params["bot_mul"])) - w_se = int(round(w_in * params["se_r"])) - groups = w_b // params["group_w"] - cx = conv2d_cx(cx, w_in, w_b, 1) - cx = norm2d_cx(cx, w_b) - cx = conv2d_cx(cx, w_b, w_b, 3, stride=stride, groups=groups) - cx = norm2d_cx(cx, w_b) - cx = SE.complexity(cx, w_b, w_se) if w_se else cx - cx = conv2d_cx(cx, w_b, w_out, 1) - cx = norm2d_cx(cx, w_out) - return cx - - -class ResBottleneckBlock(Module): - """Residual bottleneck block: x + f(x), f = bottleneck transform.""" - - def __init__(self, w_in, w_out, stride, params): - super(ResBottleneckBlock, self).__init__() - self.proj, self.bn = None, None - if (w_in != w_out) or (stride != 1): - self.proj = conv2d(w_in, w_out, 1, stride=stride) - self.bn = norm2d(w_out) - self.f = BottleneckTransform(w_in, w_out, stride, params) - self.af = activation() - - def forward(self, x): - x_p = self.bn(self.proj(x)) if self.proj else x - return self.af(x_p + self.f(x)) - - @staticmethod - def complexity(cx, w_in, w_out, stride, params): - if (w_in != w_out) or (stride != 1): - h, w = cx["h"], cx["w"] - cx = conv2d_cx(cx, w_in, w_out, 1, stride=stride) - cx = norm2d_cx(cx, w_out) - cx["h"], cx["w"] = h, w - cx = BottleneckTransform.complexity(cx, w_in, w_out, stride, params) - return cx - - -class ResStemCifar(Module): - """ResNet stem for CIFAR: 3x3, BN, AF.""" - - def __init__(self, w_in, w_out): - super(ResStemCifar, self).__init__() - self.conv = conv2d(w_in, w_out, 3) - self.bn = norm2d(w_out) - self.af = activation() - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - @staticmethod - def complexity(cx, w_in, w_out): - cx = conv2d_cx(cx, w_in, w_out, 3) - cx = norm2d_cx(cx, w_out) - return cx - - -class ResStem(Module): - """ResNet stem for ImageNet: 7x7, BN, AF, MaxPool.""" - - def __init__(self, w_in, w_out): - super(ResStem, self).__init__() - self.conv = conv2d(w_in, w_out, 7, stride=2) - self.bn = norm2d(w_out) - self.af = activation() - self.pool = pool2d(w_out, 3, stride=2) - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - @staticmethod - def complexity(cx, w_in, w_out): - cx = conv2d_cx(cx, w_in, w_out, 7, stride=2) - cx = norm2d_cx(cx, w_out) - cx = pool2d_cx(cx, w_out, 3, stride=2) - return cx - - -class SimpleStem(Module): - """Simple stem for ImageNet: 3x3, BN, AF.""" - - def __init__(self, w_in, w_out): - super(SimpleStem, self).__init__() - self.conv = conv2d(w_in, w_out, 3, stride=2) - self.bn = norm2d(w_out) - self.af = activation() - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - @staticmethod - def complexity(cx, w_in, w_out): - cx = conv2d_cx(cx, w_in, w_out, 3, stride=2) - cx = norm2d_cx(cx, w_out) - return cx - - -class AnyStage(Module): - """AnyNet stage (sequence of blocks w/ the same output shape).""" - - def __init__(self, w_in, w_out, stride, d, block_fun, params): - super(AnyStage, self).__init__() - for i in range(d): - block = block_fun(w_in, w_out, stride, params) - self.add_module("b{}".format(i + 1), block) - stride, w_in = 1, w_out - - def forward(self, x): - for block in self.children(): - x = block(x) - return x - - @staticmethod - def complexity(cx, w_in, w_out, stride, d, block_fun, params): - for _ in range(d): - cx = block_fun.complexity(cx, w_in, w_out, stride, params) - stride, w_in = 1, w_out - return cx - - -class AnyNet(Module): - """AnyNet model.""" - - @staticmethod - def get_params(): - nones = [None for _ in cfg.ANYNET.DEPTHS] - return { - "stem_type": cfg.ANYNET.STEM_TYPE, - "stem_w": cfg.ANYNET.STEM_W, - "block_type": cfg.ANYNET.BLOCK_TYPE, - "depths": cfg.ANYNET.DEPTHS, - "widths": cfg.ANYNET.WIDTHS, - "strides": cfg.ANYNET.STRIDES, - "bot_muls": cfg.ANYNET.BOT_MULS if cfg.ANYNET.BOT_MULS else nones, - "group_ws": cfg.ANYNET.GROUP_WS if cfg.ANYNET.GROUP_WS else nones, - "se_r": cfg.ANYNET.SE_R if cfg.ANYNET.SE_ON else 0, - "num_classes": cfg.MODEL.NUM_CLASSES, - } - - def __init__(self, params=None): - super(AnyNet, self).__init__() - p = AnyNet.get_params() if not params else params - stem_fun = get_stem_fun(p["stem_type"]) - block_fun = get_block_fun(p["block_type"]) - self.stem = stem_fun(3, p["stem_w"]) - prev_w = p["stem_w"] - keys = ["depths", "widths", "strides", "bot_muls", "group_ws"] - for i, (d, w, s, b, g) in enumerate(zip(*[p[k] for k in keys])): - params = {"bot_mul": b, "group_w": g, "se_r": p["se_r"]} - stage = AnyStage(prev_w, w, s, d, block_fun, params) - self.add_module("s{}".format(i + 1), stage) - prev_w = w - self.head = AnyHead(prev_w, p["num_classes"]) - self.apply(init_weights) - - def forward(self, x): - for module in self.children(): - x = module(x) - return x - - @staticmethod - def complexity(cx, params=None): - """Computes model complexity (if you alter the model, make sure to update).""" - p = AnyNet.get_params() if not params else params - stem_fun = get_stem_fun(p["stem_type"]) - block_fun = get_block_fun(p["block_type"]) - cx = stem_fun.complexity(cx, 3, p["stem_w"]) - prev_w = p["stem_w"] - keys = ["depths", "widths", "strides", "bot_muls", "group_ws"] - for d, w, s, b, g in zip(*[p[k] for k in keys]): - params = {"bot_mul": b, "group_w": g, "se_r": p["se_r"]} - cx = AnyStage.complexity(cx, prev_w, w, s, d, block_fun, params) - prev_w = w - cx = AnyHead.complexity(cx, prev_w, p["num_classes"]) - return cx +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AnyNet models.""" + +from pycls.core.config import cfg +from pycls.models.blocks import ( + SE, + activation, + conv2d, + conv2d_cx, + gap2d, + gap2d_cx, + init_weights, + linear, + linear_cx, + norm2d, + norm2d_cx, + pool2d, + pool2d_cx, +) +from torch.nn import Module + + +def get_stem_fun(stem_type): + """Retrieves the stem function by name.""" + stem_funs = { + "res_stem_cifar": ResStemCifar, + "res_stem_in": ResStem, + "simple_stem_in": SimpleStem, + } + err_str = "Stem type '{}' not supported" + assert stem_type in stem_funs.keys(), err_str.format(stem_type) + return stem_funs[stem_type] + + +def get_block_fun(block_type): + """Retrieves the block function by name.""" + block_funs = { + "vanilla_block": VanillaBlock, + "res_basic_block": ResBasicBlock, + "res_bottleneck_block": ResBottleneckBlock, + } + err_str = "Block type '{}' not supported" + assert block_type in block_funs.keys(), err_str.format(block_type) + return block_funs[block_type] + + +class AnyHead(Module): + """AnyNet head: AvgPool, 1x1.""" + + def __init__(self, w_in, num_classes): + super(AnyHead, self).__init__() + self.avg_pool = gap2d(w_in) + self.fc = linear(w_in, num_classes, bias=True) + + def forward(self, x): + x = self.avg_pool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + return x + + @staticmethod + def complexity(cx, w_in, num_classes): + cx = gap2d_cx(cx, w_in) + cx = linear_cx(cx, w_in, num_classes, bias=True) + return cx + + +class VanillaBlock(Module): + """Vanilla block: [3x3 conv, BN, Relu] x2.""" + + def __init__(self, w_in, w_out, stride, _params): + super(VanillaBlock, self).__init__() + self.a = conv2d(w_in, w_out, 3, stride=stride) + self.a_bn = norm2d(w_out) + self.a_af = activation() + self.b = conv2d(w_out, w_out, 3) + self.b_bn = norm2d(w_out) + self.b_af = activation() + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + @staticmethod + def complexity(cx, w_in, w_out, stride, _params): + cx = conv2d_cx(cx, w_in, w_out, 3, stride=stride) + cx = norm2d_cx(cx, w_out) + cx = conv2d_cx(cx, w_out, w_out, 3) + cx = norm2d_cx(cx, w_out) + return cx + + +class BasicTransform(Module): + """Basic transformation: [3x3 conv, BN, Relu] x2.""" + + def __init__(self, w_in, w_out, stride, _params): + super(BasicTransform, self).__init__() + self.a = conv2d(w_in, w_out, 3, stride=stride) + self.a_bn = norm2d(w_out) + self.a_af = activation() + self.b = conv2d(w_out, w_out, 3) + self.b_bn = norm2d(w_out) + self.b_bn.final_bn = True + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + @staticmethod + def complexity(cx, w_in, w_out, stride, _params): + cx = conv2d_cx(cx, w_in, w_out, 3, stride=stride) + cx = norm2d_cx(cx, w_out) + cx = conv2d_cx(cx, w_out, w_out, 3) + cx = norm2d_cx(cx, w_out) + return cx + + +class ResBasicBlock(Module): + """Residual basic block: x + f(x), f = basic transform.""" + + def __init__(self, w_in, w_out, stride, params): + super(ResBasicBlock, self).__init__() + self.proj, self.bn = None, None + if (w_in != w_out) or (stride != 1): + self.proj = conv2d(w_in, w_out, 1, stride=stride) + self.bn = norm2d(w_out) + self.f = BasicTransform(w_in, w_out, stride, params) + self.af = activation() + + def forward(self, x): + x_p = self.bn(self.proj(x)) if self.proj else x + return self.af(x_p + self.f(x)) + + @staticmethod + def complexity(cx, w_in, w_out, stride, params): + if (w_in != w_out) or (stride != 1): + h, w = cx["h"], cx["w"] + cx = conv2d_cx(cx, w_in, w_out, 1, stride=stride) + cx = norm2d_cx(cx, w_out) + cx["h"], cx["w"] = h, w + cx = BasicTransform.complexity(cx, w_in, w_out, stride, params) + return cx + + +class BottleneckTransform(Module): + """Bottleneck transformation: 1x1, 3x3 [+SE], 1x1.""" + + def __init__(self, w_in, w_out, stride, params): + super(BottleneckTransform, self).__init__() + w_b = int(round(w_out * params["bot_mul"])) + w_se = int(round(w_in * params["se_r"])) + groups = w_b // params["group_w"] + self.a = conv2d(w_in, w_b, 1) + self.a_bn = norm2d(w_b) + self.a_af = activation() + self.b = conv2d(w_b, w_b, 3, stride=stride, groups=groups) + self.b_bn = norm2d(w_b) + self.b_af = activation() + self.se = SE(w_b, w_se) if w_se else None + self.c = conv2d(w_b, w_out, 1) + self.c_bn = norm2d(w_out) + self.c_bn.final_bn = True + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + @staticmethod + def complexity(cx, w_in, w_out, stride, params): + w_b = int(round(w_out * params["bot_mul"])) + w_se = int(round(w_in * params["se_r"])) + groups = w_b // params["group_w"] + cx = conv2d_cx(cx, w_in, w_b, 1) + cx = norm2d_cx(cx, w_b) + cx = conv2d_cx(cx, w_b, w_b, 3, stride=stride, groups=groups) + cx = norm2d_cx(cx, w_b) + cx = SE.complexity(cx, w_b, w_se) if w_se else cx + cx = conv2d_cx(cx, w_b, w_out, 1) + cx = norm2d_cx(cx, w_out) + return cx + + +class ResBottleneckBlock(Module): + """Residual bottleneck block: x + f(x), f = bottleneck transform.""" + + def __init__(self, w_in, w_out, stride, params): + super(ResBottleneckBlock, self).__init__() + self.proj, self.bn = None, None + if (w_in != w_out) or (stride != 1): + self.proj = conv2d(w_in, w_out, 1, stride=stride) + self.bn = norm2d(w_out) + self.f = BottleneckTransform(w_in, w_out, stride, params) + self.af = activation() + + def forward(self, x): + x_p = self.bn(self.proj(x)) if self.proj else x + return self.af(x_p + self.f(x)) + + @staticmethod + def complexity(cx, w_in, w_out, stride, params): + if (w_in != w_out) or (stride != 1): + h, w = cx["h"], cx["w"] + cx = conv2d_cx(cx, w_in, w_out, 1, stride=stride) + cx = norm2d_cx(cx, w_out) + cx["h"], cx["w"] = h, w + cx = BottleneckTransform.complexity(cx, w_in, w_out, stride, params) + return cx + + +class ResStemCifar(Module): + """ResNet stem for CIFAR: 3x3, BN, AF.""" + + def __init__(self, w_in, w_out): + super(ResStemCifar, self).__init__() + self.conv = conv2d(w_in, w_out, 3) + self.bn = norm2d(w_out) + self.af = activation() + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + @staticmethod + def complexity(cx, w_in, w_out): + cx = conv2d_cx(cx, w_in, w_out, 3) + cx = norm2d_cx(cx, w_out) + return cx + + +class ResStem(Module): + """ResNet stem for ImageNet: 7x7, BN, AF, MaxPool.""" + + def __init__(self, w_in, w_out): + super(ResStem, self).__init__() + self.conv = conv2d(w_in, w_out, 7, stride=2) + self.bn = norm2d(w_out) + self.af = activation() + self.pool = pool2d(w_out, 3, stride=2) + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + @staticmethod + def complexity(cx, w_in, w_out): + cx = conv2d_cx(cx, w_in, w_out, 7, stride=2) + cx = norm2d_cx(cx, w_out) + cx = pool2d_cx(cx, w_out, 3, stride=2) + return cx + + +class SimpleStem(Module): + """Simple stem for ImageNet: 3x3, BN, AF.""" + + def __init__(self, w_in, w_out): + super(SimpleStem, self).__init__() + self.conv = conv2d(w_in, w_out, 3, stride=2) + self.bn = norm2d(w_out) + self.af = activation() + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + @staticmethod + def complexity(cx, w_in, w_out): + cx = conv2d_cx(cx, w_in, w_out, 3, stride=2) + cx = norm2d_cx(cx, w_out) + return cx + + +class AnyStage(Module): + """AnyNet stage (sequence of blocks w/ the same output shape).""" + + def __init__(self, w_in, w_out, stride, d, block_fun, params): + super(AnyStage, self).__init__() + for i in range(d): + block = block_fun(w_in, w_out, stride, params) + self.add_module("b{}".format(i + 1), block) + stride, w_in = 1, w_out + + def forward(self, x): + for block in self.children(): + x = block(x) + return x + + @staticmethod + def complexity(cx, w_in, w_out, stride, d, block_fun, params): + for _ in range(d): + cx = block_fun.complexity(cx, w_in, w_out, stride, params) + stride, w_in = 1, w_out + return cx + + +class AnyNet(Module): + """AnyNet model.""" + + @staticmethod + def get_params(): + nones = [None for _ in cfg.ANYNET.DEPTHS] + return { + "stem_type": cfg.ANYNET.STEM_TYPE, + "stem_w": cfg.ANYNET.STEM_W, + "block_type": cfg.ANYNET.BLOCK_TYPE, + "depths": cfg.ANYNET.DEPTHS, + "widths": cfg.ANYNET.WIDTHS, + "strides": cfg.ANYNET.STRIDES, + "bot_muls": cfg.ANYNET.BOT_MULS if cfg.ANYNET.BOT_MULS else nones, + "group_ws": cfg.ANYNET.GROUP_WS if cfg.ANYNET.GROUP_WS else nones, + "se_r": cfg.ANYNET.SE_R if cfg.ANYNET.SE_ON else 0, + "num_classes": cfg.MODEL.NUM_CLASSES, + } + + def __init__(self, params=None): + super(AnyNet, self).__init__() + p = AnyNet.get_params() if not params else params + stem_fun = get_stem_fun(p["stem_type"]) + block_fun = get_block_fun(p["block_type"]) + self.stem = stem_fun(3, p["stem_w"]) + prev_w = p["stem_w"] + keys = ["depths", "widths", "strides", "bot_muls", "group_ws"] + for i, (d, w, s, b, g) in enumerate(zip(*[p[k] for k in keys])): + params = {"bot_mul": b, "group_w": g, "se_r": p["se_r"]} + stage = AnyStage(prev_w, w, s, d, block_fun, params) + self.add_module("s{}".format(i + 1), stage) + prev_w = w + self.head = AnyHead(prev_w, p["num_classes"]) + self.apply(init_weights) + + def forward(self, x): + for module in self.children(): + x = module(x) + return x + + @staticmethod + def complexity(cx, params=None): + """Computes model complexity (if you alter the model, make sure to update).""" + p = AnyNet.get_params() if not params else params + stem_fun = get_stem_fun(p["stem_type"]) + block_fun = get_block_fun(p["block_type"]) + cx = stem_fun.complexity(cx, 3, p["stem_w"]) + prev_w = p["stem_w"] + keys = ["depths", "widths", "strides", "bot_muls", "group_ws"] + for d, w, s, b, g in zip(*[p[k] for k in keys]): + params = {"bot_mul": b, "group_w": g, "se_r": p["se_r"]} + cx = AnyStage.complexity(cx, prev_w, w, s, d, block_fun, params) + prev_w = w + cx = AnyHead.complexity(cx, prev_w, p["num_classes"]) + return cx diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/blocks.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/blocks.py index 6c7773d188f52dd082510047081c450b71d8a0bd..eb14c7d70767892b1666cee1e472984630823979 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/blocks.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/blocks.py @@ -1,201 +1,201 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Common model blocks.""" - -import numpy as np -import torch -import torch.nn as nn -from pycls.core.config import cfg -from torch.nn import Module - - -# ----------------------- Shortcuts for common torch.nn layers ----------------------- # - - -def conv2d(w_in, w_out, k, *, stride=1, groups=1, bias=False): - """Helper for building a conv2d layer.""" - assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." - s, p, g, b = stride, (k - 1) // 2, groups, bias - return nn.Conv2d(w_in, w_out, k, stride=s, padding=p, groups=g, bias=b) - - -def norm2d(w_in): - """Helper for building a norm2d layer.""" - return nn.BatchNorm2d(num_features=w_in, eps=cfg.BN.EPS, momentum=cfg.BN.MOM) - - -def pool2d(_w_in, k, *, stride=1): - """Helper for building a pool2d layer.""" - assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." - return nn.MaxPool2d(k, stride=stride, padding=(k - 1) // 2) - - -def gap2d(_w_in): - """Helper for building a gap2d layer.""" - return nn.AdaptiveAvgPool2d((1, 1)) - - -def linear(w_in, w_out, *, bias=False): - """Helper for building a linear layer.""" - return nn.Linear(w_in, w_out, bias=bias) - - -def activation(): - """Helper for building an activation layer.""" - activation_fun = cfg.MODEL.ACTIVATION_FUN.lower() - if activation_fun == "relu": - return nn.ReLU(inplace=cfg.MODEL.ACTIVATION_INPLACE) - elif activation_fun == "silu" or activation_fun == "swish": - try: - return torch.nn.SiLU() - except AttributeError: - return SiLU() - else: - raise AssertionError("Unknown MODEL.ACTIVATION_FUN: " + activation_fun) - - -# --------------------------- Complexity (cx) calculations --------------------------- # - - -def conv2d_cx(cx, w_in, w_out, k, *, stride=1, groups=1, bias=False): - """Accumulates complexity of conv2d into cx = (h, w, flops, params, acts).""" - assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." - h, w, flops, params, acts = cx["h"], cx["w"], cx["flops"], cx["params"], cx["acts"] - h, w = (h - 1) // stride + 1, (w - 1) // stride + 1 - flops += k * k * w_in * w_out * h * w // groups + (w_out if bias else 0) - params += k * k * w_in * w_out // groups + (w_out if bias else 0) - acts += w_out * h * w - return {"h": h, "w": w, "flops": flops, "params": params, "acts": acts} - - -def norm2d_cx(cx, w_in): - """Accumulates complexity of norm2d into cx = (h, w, flops, params, acts).""" - h, w, flops, params, acts = cx["h"], cx["w"], cx["flops"], cx["params"], cx["acts"] - params += 2 * w_in - return {"h": h, "w": w, "flops": flops, "params": params, "acts": acts} - - -def pool2d_cx(cx, w_in, k, *, stride=1): - """Accumulates complexity of pool2d into cx = (h, w, flops, params, acts).""" - assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." - h, w, flops, params, acts = cx["h"], cx["w"], cx["flops"], cx["params"], cx["acts"] - h, w = (h - 1) // stride + 1, (w - 1) // stride + 1 - acts += w_in * h * w - return {"h": h, "w": w, "flops": flops, "params": params, "acts": acts} - - -def gap2d_cx(cx, _w_in): - """Accumulates complexity of gap2d into cx = (h, w, flops, params, acts).""" - flops, params, acts = cx["flops"], cx["params"], cx["acts"] - return {"h": 1, "w": 1, "flops": flops, "params": params, "acts": acts} - - -def linear_cx(cx, w_in, w_out, *, bias=False): - """Accumulates complexity of linear into cx = (h, w, flops, params, acts).""" - h, w, flops, params, acts = cx["h"], cx["w"], cx["flops"], cx["params"], cx["acts"] - flops += w_in * w_out + (w_out if bias else 0) - params += w_in * w_out + (w_out if bias else 0) - acts += w_out - return {"h": h, "w": w, "flops": flops, "params": params, "acts": acts} - - -# ---------------------------------- Shared blocks ----------------------------------- # - - -class SiLU(Module): - """SiLU activation function (also known as Swish): x * sigmoid(x).""" - - # Note: will be part of Pytorch 1.7, at which point can remove this. - - def __init__(self): - super(SiLU, self).__init__() - - def forward(self, x): - return x * torch.sigmoid(x) - - -class SE(Module): - """Squeeze-and-Excitation (SE) block: AvgPool, FC, Act, FC, Sigmoid.""" - - def __init__(self, w_in, w_se): - super(SE, self).__init__() - self.avg_pool = gap2d(w_in) - self.f_ex = nn.Sequential( - conv2d(w_in, w_se, 1, bias=True), - activation(), - conv2d(w_se, w_in, 1, bias=True), - nn.Sigmoid(), - ) - - def forward(self, x): - return x * self.f_ex(self.avg_pool(x)) - - @staticmethod - def complexity(cx, w_in, w_se): - h, w = cx["h"], cx["w"] - cx = gap2d_cx(cx, w_in) - cx = conv2d_cx(cx, w_in, w_se, 1, bias=True) - cx = conv2d_cx(cx, w_se, w_in, 1, bias=True) - cx["h"], cx["w"] = h, w - return cx - - -# ---------------------------------- Miscellaneous ----------------------------------- # - - -def adjust_block_compatibility(ws, bs, gs): - """Adjusts the compatibility of widths, bottlenecks, and groups.""" - assert len(ws) == len(bs) == len(gs) - assert all(w > 0 and b > 0 and g > 0 for w, b, g in zip(ws, bs, gs)) - vs = [int(max(1, w * b)) for w, b in zip(ws, bs)] - gs = [int(min(g, v)) for g, v in zip(gs, vs)] - ms = [np.lcm(g, b) if b > 1 else g for g, b in zip(gs, bs)] - vs = [max(m, int(round(v / m) * m)) for v, m in zip(vs, ms)] - ws = [int(v / b) for v, b in zip(vs, bs)] - assert all(w * b % g == 0 for w, b, g in zip(ws, bs, gs)) - return ws, bs, gs - - -def init_weights(m): - """Performs ResNet-style weight initialization.""" - if isinstance(m, nn.Conv2d): - # Note that there is no bias due to BN - fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(mean=0.0, std=np.sqrt(2.0 / fan_out)) - elif isinstance(m, nn.BatchNorm2d): - zero_init_gamma = cfg.BN.ZERO_INIT_FINAL_GAMMA - zero_init_gamma = hasattr(m, "final_bn") and m.final_bn and zero_init_gamma - m.weight.data.fill_(0.0 if zero_init_gamma else 1.0) - m.bias.data.zero_() - elif isinstance(m, nn.Linear): - m.weight.data.normal_(mean=0.0, std=0.01) - m.bias.data.zero_() - - -def drop_connect(x, drop_ratio): - """Drop connect (adapted from DARTS).""" - keep_ratio = 1.0 - drop_ratio - mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device) - mask.bernoulli_(keep_ratio) - x.div_(keep_ratio) - x.mul_(mask) - return x +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Common model blocks.""" + +import numpy as np +import torch +import torch.nn as nn +from pycls.core.config import cfg +from torch.nn import Module + + +# ----------------------- Shortcuts for common torch.nn layers ----------------------- # + + +def conv2d(w_in, w_out, k, *, stride=1, groups=1, bias=False): + """Helper for building a conv2d layer.""" + assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." + s, p, g, b = stride, (k - 1) // 2, groups, bias + return nn.Conv2d(w_in, w_out, k, stride=s, padding=p, groups=g, bias=b) + + +def norm2d(w_in): + """Helper for building a norm2d layer.""" + return nn.BatchNorm2d(num_features=w_in, eps=cfg.BN.EPS, momentum=cfg.BN.MOM) + + +def pool2d(_w_in, k, *, stride=1): + """Helper for building a pool2d layer.""" + assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." + return nn.MaxPool2d(k, stride=stride, padding=(k - 1) // 2) + + +def gap2d(_w_in): + """Helper for building a gap2d layer.""" + return nn.AdaptiveAvgPool2d((1, 1)) + + +def linear(w_in, w_out, *, bias=False): + """Helper for building a linear layer.""" + return nn.Linear(w_in, w_out, bias=bias) + + +def activation(): + """Helper for building an activation layer.""" + activation_fun = cfg.MODEL.ACTIVATION_FUN.lower() + if activation_fun == "relu": + return nn.ReLU(inplace=cfg.MODEL.ACTIVATION_INPLACE) + elif activation_fun == "silu" or activation_fun == "swish": + try: + return torch.nn.SiLU() + except AttributeError: + return SiLU() + else: + raise AssertionError("Unknown MODEL.ACTIVATION_FUN: " + activation_fun) + + +# --------------------------- Complexity (cx) calculations --------------------------- # + + +def conv2d_cx(cx, w_in, w_out, k, *, stride=1, groups=1, bias=False): + """Accumulates complexity of conv2d into cx = (h, w, flops, params, acts).""" + assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." + h, w, flops, params, acts = cx["h"], cx["w"], cx["flops"], cx["params"], cx["acts"] + h, w = (h - 1) // stride + 1, (w - 1) // stride + 1 + flops += k * k * w_in * w_out * h * w // groups + (w_out if bias else 0) + params += k * k * w_in * w_out // groups + (w_out if bias else 0) + acts += w_out * h * w + return {"h": h, "w": w, "flops": flops, "params": params, "acts": acts} + + +def norm2d_cx(cx, w_in): + """Accumulates complexity of norm2d into cx = (h, w, flops, params, acts).""" + h, w, flops, params, acts = cx["h"], cx["w"], cx["flops"], cx["params"], cx["acts"] + params += 2 * w_in + return {"h": h, "w": w, "flops": flops, "params": params, "acts": acts} + + +def pool2d_cx(cx, w_in, k, *, stride=1): + """Accumulates complexity of pool2d into cx = (h, w, flops, params, acts).""" + assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." + h, w, flops, params, acts = cx["h"], cx["w"], cx["flops"], cx["params"], cx["acts"] + h, w = (h - 1) // stride + 1, (w - 1) // stride + 1 + acts += w_in * h * w + return {"h": h, "w": w, "flops": flops, "params": params, "acts": acts} + + +def gap2d_cx(cx, _w_in): + """Accumulates complexity of gap2d into cx = (h, w, flops, params, acts).""" + flops, params, acts = cx["flops"], cx["params"], cx["acts"] + return {"h": 1, "w": 1, "flops": flops, "params": params, "acts": acts} + + +def linear_cx(cx, w_in, w_out, *, bias=False): + """Accumulates complexity of linear into cx = (h, w, flops, params, acts).""" + h, w, flops, params, acts = cx["h"], cx["w"], cx["flops"], cx["params"], cx["acts"] + flops += w_in * w_out + (w_out if bias else 0) + params += w_in * w_out + (w_out if bias else 0) + acts += w_out + return {"h": h, "w": w, "flops": flops, "params": params, "acts": acts} + + +# ---------------------------------- Shared blocks ----------------------------------- # + + +class SiLU(Module): + """SiLU activation function (also known as Swish): x * sigmoid(x).""" + + # Note: will be part of Pytorch 1.7, at which point can remove this. + + def __init__(self): + super(SiLU, self).__init__() + + def forward(self, x): + return x * torch.sigmoid(x) + + +class SE(Module): + """Squeeze-and-Excitation (SE) block: AvgPool, FC, Act, FC, Sigmoid.""" + + def __init__(self, w_in, w_se): + super(SE, self).__init__() + self.avg_pool = gap2d(w_in) + self.f_ex = nn.Sequential( + conv2d(w_in, w_se, 1, bias=True), + activation(), + conv2d(w_se, w_in, 1, bias=True), + nn.Sigmoid(), + ) + + def forward(self, x): + return x * self.f_ex(self.avg_pool(x)) + + @staticmethod + def complexity(cx, w_in, w_se): + h, w = cx["h"], cx["w"] + cx = gap2d_cx(cx, w_in) + cx = conv2d_cx(cx, w_in, w_se, 1, bias=True) + cx = conv2d_cx(cx, w_se, w_in, 1, bias=True) + cx["h"], cx["w"] = h, w + return cx + + +# ---------------------------------- Miscellaneous ----------------------------------- # + + +def adjust_block_compatibility(ws, bs, gs): + """Adjusts the compatibility of widths, bottlenecks, and groups.""" + assert len(ws) == len(bs) == len(gs) + assert all(w > 0 and b > 0 and g > 0 for w, b, g in zip(ws, bs, gs)) + vs = [int(max(1, w * b)) for w, b in zip(ws, bs)] + gs = [int(min(g, v)) for g, v in zip(gs, vs)] + ms = [np.lcm(g, b) if b > 1 else g for g, b in zip(gs, bs)] + vs = [max(m, int(round(v / m) * m)) for v, m in zip(vs, ms)] + ws = [int(v / b) for v, b in zip(vs, bs)] + assert all(w * b % g == 0 for w, b, g in zip(ws, bs, gs)) + return ws, bs, gs + + +def init_weights(m): + """Performs ResNet-style weight initialization.""" + if isinstance(m, nn.Conv2d): + # Note that there is no bias due to BN + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(mean=0.0, std=np.sqrt(2.0 / fan_out)) + elif isinstance(m, nn.BatchNorm2d): + zero_init_gamma = cfg.BN.ZERO_INIT_FINAL_GAMMA + zero_init_gamma = hasattr(m, "final_bn") and m.final_bn and zero_init_gamma + m.weight.data.fill_(0.0 if zero_init_gamma else 1.0) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + m.weight.data.normal_(mean=0.0, std=0.01) + m.bias.data.zero_() + + +def drop_connect(x, drop_ratio): + """Drop connect (adapted from DARTS).""" + keep_ratio = 1.0 - drop_ratio + mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device) + mask.bernoulli_(keep_ratio) + x.div_(keep_ratio) + x.mul_(mask) + return x diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/effnet.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/effnet.py index 53826fdae79fa23b0205b98123740f338dd21b4b..f58649ccde03d6778dba4dafd8f0c1344319389e 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/effnet.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/effnet.py @@ -1,211 +1,211 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""EfficientNet models.""" - -from pycls.core.config import cfg -from pycls.models.blocks import ( - SE, - activation, - conv2d, - conv2d_cx, - drop_connect, - gap2d, - gap2d_cx, - init_weights, - linear, - linear_cx, - norm2d, - norm2d_cx, -) -from torch.nn import Dropout, Module - - -class EffHead(Module): - """EfficientNet head: 1x1, BN, AF, AvgPool, Dropout, FC.""" - - def __init__(self, w_in, w_out, num_classes): - super(EffHead, self).__init__() - dropout_ratio = cfg.EN.DROPOUT_RATIO - self.conv = conv2d(w_in, w_out, 1) - self.conv_bn = norm2d(w_out) - self.conv_af = activation() - self.avg_pool = gap2d(w_out) - self.dropout = Dropout(p=dropout_ratio) if dropout_ratio > 0 else None - self.fc = linear(w_out, num_classes, bias=True) - - def forward(self, x): - x = self.conv_af(self.conv_bn(self.conv(x))) - x = self.avg_pool(x) - x = x.view(x.size(0), -1) - x = self.dropout(x) if self.dropout else x - x = self.fc(x) - return x - - @staticmethod - def complexity(cx, w_in, w_out, num_classes): - cx = conv2d_cx(cx, w_in, w_out, 1) - cx = norm2d_cx(cx, w_out) - cx = gap2d_cx(cx, w_out) - cx = linear_cx(cx, w_out, num_classes, bias=True) - return cx - - -class MBConv(Module): - """Mobile inverted bottleneck block with SE.""" - - def __init__(self, w_in, exp_r, k, stride, se_r, w_out): - # Expansion, kxk dwise, BN, AF, SE, 1x1, BN, skip_connection - super(MBConv, self).__init__() - self.exp = None - w_exp = int(w_in * exp_r) - if w_exp != w_in: - self.exp = conv2d(w_in, w_exp, 1) - self.exp_bn = norm2d(w_exp) - self.exp_af = activation() - self.dwise = conv2d(w_exp, w_exp, k, stride=stride, groups=w_exp) - self.dwise_bn = norm2d(w_exp) - self.dwise_af = activation() - self.se = SE(w_exp, int(w_in * se_r)) - self.lin_proj = conv2d(w_exp, w_out, 1) - self.lin_proj_bn = norm2d(w_out) - self.has_skip = stride == 1 and w_in == w_out - - def forward(self, x): - f_x = self.exp_af(self.exp_bn(self.exp(x))) if self.exp else x - f_x = self.dwise_af(self.dwise_bn(self.dwise(f_x))) - f_x = self.se(f_x) - f_x = self.lin_proj_bn(self.lin_proj(f_x)) - if self.has_skip: - if self.training and cfg.EN.DC_RATIO > 0.0: - f_x = drop_connect(f_x, cfg.EN.DC_RATIO) - f_x = x + f_x - return f_x - - @staticmethod - def complexity(cx, w_in, exp_r, k, stride, se_r, w_out): - w_exp = int(w_in * exp_r) - if w_exp != w_in: - cx = conv2d_cx(cx, w_in, w_exp, 1) - cx = norm2d_cx(cx, w_exp) - cx = conv2d_cx(cx, w_exp, w_exp, k, stride=stride, groups=w_exp) - cx = norm2d_cx(cx, w_exp) - cx = SE.complexity(cx, w_exp, int(w_in * se_r)) - cx = conv2d_cx(cx, w_exp, w_out, 1) - cx = norm2d_cx(cx, w_out) - return cx - - -class EffStage(Module): - """EfficientNet stage.""" - - def __init__(self, w_in, exp_r, k, stride, se_r, w_out, d): - super(EffStage, self).__init__() - for i in range(d): - block = MBConv(w_in, exp_r, k, stride, se_r, w_out) - self.add_module("b{}".format(i + 1), block) - stride, w_in = 1, w_out - - def forward(self, x): - for block in self.children(): - x = block(x) - return x - - @staticmethod - def complexity(cx, w_in, exp_r, k, stride, se_r, w_out, d): - for _ in range(d): - cx = MBConv.complexity(cx, w_in, exp_r, k, stride, se_r, w_out) - stride, w_in = 1, w_out - return cx - - -class StemIN(Module): - """EfficientNet stem for ImageNet: 3x3, BN, AF.""" - - def __init__(self, w_in, w_out): - super(StemIN, self).__init__() - self.conv = conv2d(w_in, w_out, 3, stride=2) - self.bn = norm2d(w_out) - self.af = activation() - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - @staticmethod - def complexity(cx, w_in, w_out): - cx = conv2d_cx(cx, w_in, w_out, 3, stride=2) - cx = norm2d_cx(cx, w_out) - return cx - - -class EffNet(Module): - """EfficientNet model.""" - - @staticmethod - def get_params(): - return { - "sw": cfg.EN.STEM_W, - "ds": cfg.EN.DEPTHS, - "ws": cfg.EN.WIDTHS, - "exp_rs": cfg.EN.EXP_RATIOS, - "se_r": cfg.EN.SE_R, - "ss": cfg.EN.STRIDES, - "ks": cfg.EN.KERNELS, - "hw": cfg.EN.HEAD_W, - "nc": cfg.MODEL.NUM_CLASSES, - } - - def __init__(self, params=None): - super(EffNet, self).__init__() - p = EffNet.get_params() if not params else params - vs = ["sw", "ds", "ws", "exp_rs", "se_r", "ss", "ks", "hw", "nc"] - sw, ds, ws, exp_rs, se_r, ss, ks, hw, nc = [p[v] for v in vs] - stage_params = list(zip(ds, ws, exp_rs, ss, ks)) - self.stem = StemIN(3, sw) - prev_w = sw - for i, (d, w, exp_r, stride, k) in enumerate(stage_params): - stage = EffStage(prev_w, exp_r, k, stride, se_r, w, d) - self.add_module("s{}".format(i + 1), stage) - prev_w = w - self.head = EffHead(prev_w, hw, nc) - self.apply(init_weights) - - def forward(self, x): - for module in self.children(): - x = module(x) - return x - - @staticmethod - def complexity(cx, params=None): - """Computes model complexity (if you alter the model, make sure to update).""" - p = EffNet.get_params() if not params else params - vs = ["sw", "ds", "ws", "exp_rs", "se_r", "ss", "ks", "hw", "nc"] - sw, ds, ws, exp_rs, se_r, ss, ks, hw, nc = [p[v] for v in vs] - stage_params = list(zip(ds, ws, exp_rs, ss, ks)) - cx = StemIN.complexity(cx, 3, sw) - prev_w = sw - for d, w, exp_r, stride, k in stage_params: - cx = EffStage.complexity(cx, prev_w, exp_r, k, stride, se_r, w, d) - prev_w = w - cx = EffHead.complexity(cx, prev_w, hw, nc) - return cx +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""EfficientNet models.""" + +from pycls.core.config import cfg +from pycls.models.blocks import ( + SE, + activation, + conv2d, + conv2d_cx, + drop_connect, + gap2d, + gap2d_cx, + init_weights, + linear, + linear_cx, + norm2d, + norm2d_cx, +) +from torch.nn import Dropout, Module + + +class EffHead(Module): + """EfficientNet head: 1x1, BN, AF, AvgPool, Dropout, FC.""" + + def __init__(self, w_in, w_out, num_classes): + super(EffHead, self).__init__() + dropout_ratio = cfg.EN.DROPOUT_RATIO + self.conv = conv2d(w_in, w_out, 1) + self.conv_bn = norm2d(w_out) + self.conv_af = activation() + self.avg_pool = gap2d(w_out) + self.dropout = Dropout(p=dropout_ratio) if dropout_ratio > 0 else None + self.fc = linear(w_out, num_classes, bias=True) + + def forward(self, x): + x = self.conv_af(self.conv_bn(self.conv(x))) + x = self.avg_pool(x) + x = x.view(x.size(0), -1) + x = self.dropout(x) if self.dropout else x + x = self.fc(x) + return x + + @staticmethod + def complexity(cx, w_in, w_out, num_classes): + cx = conv2d_cx(cx, w_in, w_out, 1) + cx = norm2d_cx(cx, w_out) + cx = gap2d_cx(cx, w_out) + cx = linear_cx(cx, w_out, num_classes, bias=True) + return cx + + +class MBConv(Module): + """Mobile inverted bottleneck block with SE.""" + + def __init__(self, w_in, exp_r, k, stride, se_r, w_out): + # Expansion, kxk dwise, BN, AF, SE, 1x1, BN, skip_connection + super(MBConv, self).__init__() + self.exp = None + w_exp = int(w_in * exp_r) + if w_exp != w_in: + self.exp = conv2d(w_in, w_exp, 1) + self.exp_bn = norm2d(w_exp) + self.exp_af = activation() + self.dwise = conv2d(w_exp, w_exp, k, stride=stride, groups=w_exp) + self.dwise_bn = norm2d(w_exp) + self.dwise_af = activation() + self.se = SE(w_exp, int(w_in * se_r)) + self.lin_proj = conv2d(w_exp, w_out, 1) + self.lin_proj_bn = norm2d(w_out) + self.has_skip = stride == 1 and w_in == w_out + + def forward(self, x): + f_x = self.exp_af(self.exp_bn(self.exp(x))) if self.exp else x + f_x = self.dwise_af(self.dwise_bn(self.dwise(f_x))) + f_x = self.se(f_x) + f_x = self.lin_proj_bn(self.lin_proj(f_x)) + if self.has_skip: + if self.training and cfg.EN.DC_RATIO > 0.0: + f_x = drop_connect(f_x, cfg.EN.DC_RATIO) + f_x = x + f_x + return f_x + + @staticmethod + def complexity(cx, w_in, exp_r, k, stride, se_r, w_out): + w_exp = int(w_in * exp_r) + if w_exp != w_in: + cx = conv2d_cx(cx, w_in, w_exp, 1) + cx = norm2d_cx(cx, w_exp) + cx = conv2d_cx(cx, w_exp, w_exp, k, stride=stride, groups=w_exp) + cx = norm2d_cx(cx, w_exp) + cx = SE.complexity(cx, w_exp, int(w_in * se_r)) + cx = conv2d_cx(cx, w_exp, w_out, 1) + cx = norm2d_cx(cx, w_out) + return cx + + +class EffStage(Module): + """EfficientNet stage.""" + + def __init__(self, w_in, exp_r, k, stride, se_r, w_out, d): + super(EffStage, self).__init__() + for i in range(d): + block = MBConv(w_in, exp_r, k, stride, se_r, w_out) + self.add_module("b{}".format(i + 1), block) + stride, w_in = 1, w_out + + def forward(self, x): + for block in self.children(): + x = block(x) + return x + + @staticmethod + def complexity(cx, w_in, exp_r, k, stride, se_r, w_out, d): + for _ in range(d): + cx = MBConv.complexity(cx, w_in, exp_r, k, stride, se_r, w_out) + stride, w_in = 1, w_out + return cx + + +class StemIN(Module): + """EfficientNet stem for ImageNet: 3x3, BN, AF.""" + + def __init__(self, w_in, w_out): + super(StemIN, self).__init__() + self.conv = conv2d(w_in, w_out, 3, stride=2) + self.bn = norm2d(w_out) + self.af = activation() + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + @staticmethod + def complexity(cx, w_in, w_out): + cx = conv2d_cx(cx, w_in, w_out, 3, stride=2) + cx = norm2d_cx(cx, w_out) + return cx + + +class EffNet(Module): + """EfficientNet model.""" + + @staticmethod + def get_params(): + return { + "sw": cfg.EN.STEM_W, + "ds": cfg.EN.DEPTHS, + "ws": cfg.EN.WIDTHS, + "exp_rs": cfg.EN.EXP_RATIOS, + "se_r": cfg.EN.SE_R, + "ss": cfg.EN.STRIDES, + "ks": cfg.EN.KERNELS, + "hw": cfg.EN.HEAD_W, + "nc": cfg.MODEL.NUM_CLASSES, + } + + def __init__(self, params=None): + super(EffNet, self).__init__() + p = EffNet.get_params() if not params else params + vs = ["sw", "ds", "ws", "exp_rs", "se_r", "ss", "ks", "hw", "nc"] + sw, ds, ws, exp_rs, se_r, ss, ks, hw, nc = [p[v] for v in vs] + stage_params = list(zip(ds, ws, exp_rs, ss, ks)) + self.stem = StemIN(3, sw) + prev_w = sw + for i, (d, w, exp_r, stride, k) in enumerate(stage_params): + stage = EffStage(prev_w, exp_r, k, stride, se_r, w, d) + self.add_module("s{}".format(i + 1), stage) + prev_w = w + self.head = EffHead(prev_w, hw, nc) + self.apply(init_weights) + + def forward(self, x): + for module in self.children(): + x = module(x) + return x + + @staticmethod + def complexity(cx, params=None): + """Computes model complexity (if you alter the model, make sure to update).""" + p = EffNet.get_params() if not params else params + vs = ["sw", "ds", "ws", "exp_rs", "se_r", "ss", "ks", "hw", "nc"] + sw, ds, ws, exp_rs, se_r, ss, ks, hw, nc = [p[v] for v in vs] + stage_params = list(zip(ds, ws, exp_rs, ss, ks)) + cx = StemIN.complexity(cx, 3, sw) + prev_w = sw + for d, w, exp_r, stride, k in stage_params: + cx = EffStage.complexity(cx, prev_w, exp_r, k, stride, se_r, w, d) + prev_w = w + cx = EffHead.complexity(cx, prev_w, hw, nc) + return cx diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/model_zoo.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/model_zoo.py index 301e3396deea1683f00f1e31a761213f5db1cde4..1391e1d00a70cf5fc85208585f82fed6b9de1693 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/model_zoo.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/model_zoo.py @@ -1,196 +1,196 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Model zoo.""" - -import os - -import pycls.core.builders as builders -import pycls.core.checkpoint as cp -from pycls.core.config import cfg, reset_cfg -from pycls.core.io import cache_url - - -# URL prefix for pretrained models -_URL_WEIGHTS = "https://dl.fbaipublicfiles.com/pycls" - -# URL prefix for model config files -_URL_CONFIGS = "https://raw.githubusercontent.com/facebookresearch/pycls/master/configs" - -# Model weights download cache directory -_DOWNLOAD_CACHE = "/tmp/pycls-download-cache" - -# Predefined model config files -_MODEL_ZOO_CONFIGS = { - "RegNetX-200MF": "dds_baselines/regnetx/RegNetX-200MF_dds_8gpu.yaml", - "RegNetX-400MF": "dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml", - "RegNetX-600MF": "dds_baselines/regnetx/RegNetX-600MF_dds_8gpu.yaml", - "RegNetX-800MF": "dds_baselines/regnetx/RegNetX-800MF_dds_8gpu.yaml", - "RegNetX-1.6GF": "dds_baselines/regnetx/RegNetX-1.6GF_dds_8gpu.yaml", - "RegNetX-3.2GF": "dds_baselines/regnetx/RegNetX-3.2GF_dds_8gpu.yaml", - "RegNetX-4.0GF": "dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml", - "RegNetX-6.4GF": "dds_baselines/regnetx/RegNetX-6.4GF_dds_8gpu.yaml", - "RegNetX-8.0GF": "dds_baselines/regnetx/RegNetX-8.0GF_dds_8gpu.yaml", - "RegNetX-12GF": "dds_baselines/regnetx/RegNetX-12GF_dds_8gpu.yaml", - "RegNetX-16GF": "dds_baselines/regnetx/RegNetX-16GF_dds_8gpu.yaml", - "RegNetX-32GF": "dds_baselines/regnetx/RegNetX-32GF_dds_8gpu.yaml", - "RegNetY-200MF": "dds_baselines/regnety/RegNetY-200MF_dds_8gpu.yaml", - "RegNetY-400MF": "dds_baselines/regnety/RegNetY-400MF_dds_8gpu.yaml", - "RegNetY-600MF": "dds_baselines/regnety/RegNetY-600MF_dds_8gpu.yaml", - "RegNetY-800MF": "dds_baselines/regnety/RegNetY-800MF_dds_8gpu.yaml", - "RegNetY-1.6GF": "dds_baselines/regnety/RegNetY-1.6GF_dds_8gpu.yaml", - "RegNetY-3.2GF": "dds_baselines/regnety/RegNetY-3.2GF_dds_8gpu.yaml", - "RegNetY-4.0GF": "dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml", - "RegNetY-6.4GF": "dds_baselines/regnety/RegNetY-6.4GF_dds_8gpu.yaml", - "RegNetY-8.0GF": "dds_baselines/regnety/RegNetY-8.0GF_dds_8gpu.yaml", - "RegNetY-12GF": "dds_baselines/regnety/RegNetY-12GF_dds_8gpu.yaml", - "RegNetY-16GF": "dds_baselines/regnety/RegNetY-16GF_dds_8gpu.yaml", - "RegNetY-32GF": "dds_baselines/regnety/RegNetY-32GF_dds_8gpu.yaml", - "ResNet-50": "dds_baselines/resnet/R-50-1x64d_dds_8gpu.yaml", - "ResNet-101": "dds_baselines/resnet/R-101-1x64d_dds_8gpu.yaml", - "ResNet-152": "dds_baselines/resnet/R-152-1x64d_dds_8gpu.yaml", - "ResNeXt-50": "dds_baselines/resnext/X-50-32x4d_dds_8gpu.yaml", - "ResNeXt-101": "dds_baselines/resnext/X-101-32x4d_dds_8gpu.yaml", - "ResNeXt-152": "dds_baselines/resnext/X-152-32x4d_dds_8gpu.yaml", - "EfficientNet-B0": "dds_baselines/effnet/EN-B0_dds_8gpu.yaml", - "EfficientNet-B1": "dds_baselines/effnet/EN-B1_dds_8gpu.yaml", - "EfficientNet-B2": "dds_baselines/effnet/EN-B2_dds_8gpu.yaml", - "EfficientNet-B3": "dds_baselines/effnet/EN-B3_dds_8gpu.yaml", - "EfficientNet-B4": "dds_baselines/effnet/EN-B4_dds_8gpu.yaml", - "EfficientNet-B5": "dds_baselines/effnet/EN-B5_dds_8gpu.yaml", -} - -# Predefined model weight files -_MODEL_ZOO_WEIGHTS = { - "RegNetX-200MF": "dds_baselines/160905981/RegNetX-200MF_dds_8gpu.pyth", - "RegNetX-400MF": "dds_baselines/160905967/RegNetX-400MF_dds_8gpu.pyth", - "RegNetX-600MF": "dds_baselines/160906442/RegNetX-600MF_dds_8gpu.pyth", - "RegNetX-800MF": "dds_baselines/160906036/RegNetX-800MF_dds_8gpu.pyth", - "RegNetX-1.6GF": "dds_baselines/160990626/RegNetX-1.6GF_dds_8gpu.pyth", - "RegNetX-3.2GF": "dds_baselines/160906139/RegNetX-3.2GF_dds_8gpu.pyth", - "RegNetX-4.0GF": "dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth", - "RegNetX-6.4GF": "dds_baselines/161116590/RegNetX-6.4GF_dds_8gpu.pyth", - "RegNetX-8.0GF": "dds_baselines/161107726/RegNetX-8.0GF_dds_8gpu.pyth", - "RegNetX-12GF": "dds_baselines/160906020/RegNetX-12GF_dds_8gpu.pyth", - "RegNetX-16GF": "dds_baselines/158460855/RegNetX-16GF_dds_8gpu.pyth", - "RegNetX-32GF": "dds_baselines/158188473/RegNetX-32GF_dds_8gpu.pyth", - "RegNetY-200MF": "dds_baselines/176245422/RegNetY-200MF_dds_8gpu.pyth", - "RegNetY-400MF": "dds_baselines/160906449/RegNetY-400MF_dds_8gpu.pyth", - "RegNetY-600MF": "dds_baselines/160981443/RegNetY-600MF_dds_8gpu.pyth", - "RegNetY-800MF": "dds_baselines/160906567/RegNetY-800MF_dds_8gpu.pyth", - "RegNetY-1.6GF": "dds_baselines/160906681/RegNetY-1.6GF_dds_8gpu.pyth", - "RegNetY-3.2GF": "dds_baselines/160906834/RegNetY-3.2GF_dds_8gpu.pyth", - "RegNetY-4.0GF": "dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth", - "RegNetY-6.4GF": "dds_baselines/160907112/RegNetY-6.4GF_dds_8gpu.pyth", - "RegNetY-8.0GF": "dds_baselines/161160905/RegNetY-8.0GF_dds_8gpu.pyth", - "RegNetY-12GF": "dds_baselines/160907100/RegNetY-12GF_dds_8gpu.pyth", - "RegNetY-16GF": "dds_baselines/161303400/RegNetY-16GF_dds_8gpu.pyth", - "RegNetY-32GF": "dds_baselines/161277763/RegNetY-32GF_dds_8gpu.pyth", - "ResNet-50": "dds_baselines/161235311/R-50-1x64d_dds_8gpu.pyth", - "ResNet-101": "dds_baselines/161167170/R-101-1x64d_dds_8gpu.pyth", - "ResNet-152": "dds_baselines/161167467/R-152-1x64d_dds_8gpu.pyth", - "ResNeXt-50": "dds_baselines/161167411/X-50-32x4d_dds_8gpu.pyth", - "ResNeXt-101": "dds_baselines/161167590/X-101-32x4d_dds_8gpu.pyth", - "ResNeXt-152": "dds_baselines/162471172/X-152-32x4d_dds_8gpu.pyth", - "EfficientNet-B0": "dds_baselines/161305613/EN-B0_dds_8gpu.pyth", - "EfficientNet-B1": "dds_baselines/161304979/EN-B1_dds_8gpu.pyth", - "EfficientNet-B2": "dds_baselines/161305015/EN-B2_dds_8gpu.pyth", - "EfficientNet-B3": "dds_baselines/161305060/EN-B3_dds_8gpu.pyth", - "EfficientNet-B4": "dds_baselines/161305098/EN-B4_dds_8gpu.pyth", - "EfficientNet-B5": "dds_baselines/161305138/EN-B5_dds_8gpu.pyth", -} - - -def get_model_list(): - """Get list of all valid models in model zoo.""" - return _MODEL_ZOO_WEIGHTS.keys() - - -def get_config_file(name): - """Get file with model config (downloads if necessary).""" - err_str = "Model {} not found in the model zoo.".format(name) - assert name in _MODEL_ZOO_CONFIGS.keys(), err_str - config_url = os.path.join(_URL_CONFIGS, _MODEL_ZOO_CONFIGS[name]) - return cache_url(config_url, _DOWNLOAD_CACHE, _URL_CONFIGS) - - -def get_weights_file(name): - """Get file with model weights (downloads if necessary).""" - err_str = "Model {} not found in the model zoo.".format(name) - assert name in _MODEL_ZOO_WEIGHTS.keys(), err_str - weights_url = os.path.join(_URL_WEIGHTS, _MODEL_ZOO_WEIGHTS[name]) - return cache_url(weights_url, _DOWNLOAD_CACHE, _URL_WEIGHTS) - - -def get_model_info(name): - """Return model info (useful for debugging).""" - config_url = _MODEL_ZOO_CONFIGS[name] - weight_url = _MODEL_ZOO_WEIGHTS[name] - model_id = weight_url.split("/")[1] - config_url_full = os.path.join(_URL_CONFIGS, _MODEL_ZOO_CONFIGS[name]) - weight_url_full = os.path.join(_URL_WEIGHTS, _MODEL_ZOO_WEIGHTS[name]) - return config_url, weight_url, model_id, config_url_full, weight_url_full - - -def build_model(name, pretrained=False, cfg_list=()): - """Constructs a predefined model (note: loads global config as well).""" - # Load the config - reset_cfg() - config_file = get_config_file(name) - cfg.merge_from_file(config_file) - cfg.merge_from_list(cfg_list) - # Construct model - model = builders.build_model() - # Load pretrained weights - if pretrained: - weights_file = get_weights_file(name) - cp.load_checkpoint(weights_file, model) - return model - - -def regnetx(name, pretrained=False, cfg_list=()): - """Constructs a RegNetX model (note: loads global config as well).""" - name = name if "RegNetX-" in name else "RegNetX-" + name - return build_model(name, pretrained, cfg_list) - - -def regnety(name, pretrained=False, cfg_list=()): - """Constructs a RegNetY model (note: loads global config as well).""" - name = name if "RegNetY-" in name else "RegNetY-" + name - return build_model(name, pretrained, cfg_list) - - -def resnet(name, pretrained=False, cfg_list=()): - """Constructs a ResNet model (note: loads global config as well).""" - name = name if "ResNet-" in name else "ResNet-" + name - return build_model(name, pretrained, cfg_list) - - -def resnext(name, pretrained=False, cfg_list=()): - """Constructs a ResNeXt model (note: loads global config as well).""" - name = name if "ResNeXt-" in name else "ResNeXt-" + name - return build_model(name, pretrained, cfg_list) - - -def effnet(name, pretrained=False, cfg_list=()): - """Constructs an EfficientNet model (note: loads global config as well).""" - name = name if "EfficientNet-" in name else "EfficientNet-" + name - return build_model(name, pretrained, cfg_list) +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Model zoo.""" + +import os + +import pycls.core.builders as builders +import pycls.core.checkpoint as cp +from pycls.core.config import cfg, reset_cfg +from pycls.core.io import cache_url + + +# URL prefix for pretrained models +_URL_WEIGHTS = "https://dl.fbaipublicfiles.com/pycls" + +# URL prefix for model config files +_URL_CONFIGS = "https://raw.githubusercontent.com/facebookresearch/pycls/master/configs" + +# Model weights download cache directory +_DOWNLOAD_CACHE = "/tmp/pycls-download-cache" + +# Predefined model config files +_MODEL_ZOO_CONFIGS = { + "RegNetX-200MF": "dds_baselines/regnetx/RegNetX-200MF_dds_8gpu.yaml", + "RegNetX-400MF": "dds_baselines/regnetx/RegNetX-400MF_dds_8gpu.yaml", + "RegNetX-600MF": "dds_baselines/regnetx/RegNetX-600MF_dds_8gpu.yaml", + "RegNetX-800MF": "dds_baselines/regnetx/RegNetX-800MF_dds_8gpu.yaml", + "RegNetX-1.6GF": "dds_baselines/regnetx/RegNetX-1.6GF_dds_8gpu.yaml", + "RegNetX-3.2GF": "dds_baselines/regnetx/RegNetX-3.2GF_dds_8gpu.yaml", + "RegNetX-4.0GF": "dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml", + "RegNetX-6.4GF": "dds_baselines/regnetx/RegNetX-6.4GF_dds_8gpu.yaml", + "RegNetX-8.0GF": "dds_baselines/regnetx/RegNetX-8.0GF_dds_8gpu.yaml", + "RegNetX-12GF": "dds_baselines/regnetx/RegNetX-12GF_dds_8gpu.yaml", + "RegNetX-16GF": "dds_baselines/regnetx/RegNetX-16GF_dds_8gpu.yaml", + "RegNetX-32GF": "dds_baselines/regnetx/RegNetX-32GF_dds_8gpu.yaml", + "RegNetY-200MF": "dds_baselines/regnety/RegNetY-200MF_dds_8gpu.yaml", + "RegNetY-400MF": "dds_baselines/regnety/RegNetY-400MF_dds_8gpu.yaml", + "RegNetY-600MF": "dds_baselines/regnety/RegNetY-600MF_dds_8gpu.yaml", + "RegNetY-800MF": "dds_baselines/regnety/RegNetY-800MF_dds_8gpu.yaml", + "RegNetY-1.6GF": "dds_baselines/regnety/RegNetY-1.6GF_dds_8gpu.yaml", + "RegNetY-3.2GF": "dds_baselines/regnety/RegNetY-3.2GF_dds_8gpu.yaml", + "RegNetY-4.0GF": "dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml", + "RegNetY-6.4GF": "dds_baselines/regnety/RegNetY-6.4GF_dds_8gpu.yaml", + "RegNetY-8.0GF": "dds_baselines/regnety/RegNetY-8.0GF_dds_8gpu.yaml", + "RegNetY-12GF": "dds_baselines/regnety/RegNetY-12GF_dds_8gpu.yaml", + "RegNetY-16GF": "dds_baselines/regnety/RegNetY-16GF_dds_8gpu.yaml", + "RegNetY-32GF": "dds_baselines/regnety/RegNetY-32GF_dds_8gpu.yaml", + "ResNet-50": "dds_baselines/resnet/R-50-1x64d_dds_8gpu.yaml", + "ResNet-101": "dds_baselines/resnet/R-101-1x64d_dds_8gpu.yaml", + "ResNet-152": "dds_baselines/resnet/R-152-1x64d_dds_8gpu.yaml", + "ResNeXt-50": "dds_baselines/resnext/X-50-32x4d_dds_8gpu.yaml", + "ResNeXt-101": "dds_baselines/resnext/X-101-32x4d_dds_8gpu.yaml", + "ResNeXt-152": "dds_baselines/resnext/X-152-32x4d_dds_8gpu.yaml", + "EfficientNet-B0": "dds_baselines/effnet/EN-B0_dds_8gpu.yaml", + "EfficientNet-B1": "dds_baselines/effnet/EN-B1_dds_8gpu.yaml", + "EfficientNet-B2": "dds_baselines/effnet/EN-B2_dds_8gpu.yaml", + "EfficientNet-B3": "dds_baselines/effnet/EN-B3_dds_8gpu.yaml", + "EfficientNet-B4": "dds_baselines/effnet/EN-B4_dds_8gpu.yaml", + "EfficientNet-B5": "dds_baselines/effnet/EN-B5_dds_8gpu.yaml", +} + +# Predefined model weight files +_MODEL_ZOO_WEIGHTS = { + "RegNetX-200MF": "dds_baselines/160905981/RegNetX-200MF_dds_8gpu.pyth", + "RegNetX-400MF": "dds_baselines/160905967/RegNetX-400MF_dds_8gpu.pyth", + "RegNetX-600MF": "dds_baselines/160906442/RegNetX-600MF_dds_8gpu.pyth", + "RegNetX-800MF": "dds_baselines/160906036/RegNetX-800MF_dds_8gpu.pyth", + "RegNetX-1.6GF": "dds_baselines/160990626/RegNetX-1.6GF_dds_8gpu.pyth", + "RegNetX-3.2GF": "dds_baselines/160906139/RegNetX-3.2GF_dds_8gpu.pyth", + "RegNetX-4.0GF": "dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth", + "RegNetX-6.4GF": "dds_baselines/161116590/RegNetX-6.4GF_dds_8gpu.pyth", + "RegNetX-8.0GF": "dds_baselines/161107726/RegNetX-8.0GF_dds_8gpu.pyth", + "RegNetX-12GF": "dds_baselines/160906020/RegNetX-12GF_dds_8gpu.pyth", + "RegNetX-16GF": "dds_baselines/158460855/RegNetX-16GF_dds_8gpu.pyth", + "RegNetX-32GF": "dds_baselines/158188473/RegNetX-32GF_dds_8gpu.pyth", + "RegNetY-200MF": "dds_baselines/176245422/RegNetY-200MF_dds_8gpu.pyth", + "RegNetY-400MF": "dds_baselines/160906449/RegNetY-400MF_dds_8gpu.pyth", + "RegNetY-600MF": "dds_baselines/160981443/RegNetY-600MF_dds_8gpu.pyth", + "RegNetY-800MF": "dds_baselines/160906567/RegNetY-800MF_dds_8gpu.pyth", + "RegNetY-1.6GF": "dds_baselines/160906681/RegNetY-1.6GF_dds_8gpu.pyth", + "RegNetY-3.2GF": "dds_baselines/160906834/RegNetY-3.2GF_dds_8gpu.pyth", + "RegNetY-4.0GF": "dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth", + "RegNetY-6.4GF": "dds_baselines/160907112/RegNetY-6.4GF_dds_8gpu.pyth", + "RegNetY-8.0GF": "dds_baselines/161160905/RegNetY-8.0GF_dds_8gpu.pyth", + "RegNetY-12GF": "dds_baselines/160907100/RegNetY-12GF_dds_8gpu.pyth", + "RegNetY-16GF": "dds_baselines/161303400/RegNetY-16GF_dds_8gpu.pyth", + "RegNetY-32GF": "dds_baselines/161277763/RegNetY-32GF_dds_8gpu.pyth", + "ResNet-50": "dds_baselines/161235311/R-50-1x64d_dds_8gpu.pyth", + "ResNet-101": "dds_baselines/161167170/R-101-1x64d_dds_8gpu.pyth", + "ResNet-152": "dds_baselines/161167467/R-152-1x64d_dds_8gpu.pyth", + "ResNeXt-50": "dds_baselines/161167411/X-50-32x4d_dds_8gpu.pyth", + "ResNeXt-101": "dds_baselines/161167590/X-101-32x4d_dds_8gpu.pyth", + "ResNeXt-152": "dds_baselines/162471172/X-152-32x4d_dds_8gpu.pyth", + "EfficientNet-B0": "dds_baselines/161305613/EN-B0_dds_8gpu.pyth", + "EfficientNet-B1": "dds_baselines/161304979/EN-B1_dds_8gpu.pyth", + "EfficientNet-B2": "dds_baselines/161305015/EN-B2_dds_8gpu.pyth", + "EfficientNet-B3": "dds_baselines/161305060/EN-B3_dds_8gpu.pyth", + "EfficientNet-B4": "dds_baselines/161305098/EN-B4_dds_8gpu.pyth", + "EfficientNet-B5": "dds_baselines/161305138/EN-B5_dds_8gpu.pyth", +} + + +def get_model_list(): + """Get list of all valid models in model zoo.""" + return _MODEL_ZOO_WEIGHTS.keys() + + +def get_config_file(name): + """Get file with model config (downloads if necessary).""" + err_str = "Model {} not found in the model zoo.".format(name) + assert name in _MODEL_ZOO_CONFIGS.keys(), err_str + config_url = os.path.join(_URL_CONFIGS, _MODEL_ZOO_CONFIGS[name]) + return cache_url(config_url, _DOWNLOAD_CACHE, _URL_CONFIGS) + + +def get_weights_file(name): + """Get file with model weights (downloads if necessary).""" + err_str = "Model {} not found in the model zoo.".format(name) + assert name in _MODEL_ZOO_WEIGHTS.keys(), err_str + weights_url = os.path.join(_URL_WEIGHTS, _MODEL_ZOO_WEIGHTS[name]) + return cache_url(weights_url, _DOWNLOAD_CACHE, _URL_WEIGHTS) + + +def get_model_info(name): + """Return model info (useful for debugging).""" + config_url = _MODEL_ZOO_CONFIGS[name] + weight_url = _MODEL_ZOO_WEIGHTS[name] + model_id = weight_url.split("/")[1] + config_url_full = os.path.join(_URL_CONFIGS, _MODEL_ZOO_CONFIGS[name]) + weight_url_full = os.path.join(_URL_WEIGHTS, _MODEL_ZOO_WEIGHTS[name]) + return config_url, weight_url, model_id, config_url_full, weight_url_full + + +def build_model(name, pretrained=False, cfg_list=()): + """Constructs a predefined model (note: loads global config as well).""" + # Load the config + reset_cfg() + config_file = get_config_file(name) + cfg.merge_from_file(config_file) + cfg.merge_from_list(cfg_list) + # Construct model + model = builders.build_model() + # Load pretrained weights + if pretrained: + weights_file = get_weights_file(name) + cp.load_checkpoint(weights_file, model) + return model + + +def regnetx(name, pretrained=False, cfg_list=()): + """Constructs a RegNetX model (note: loads global config as well).""" + name = name if "RegNetX-" in name else "RegNetX-" + name + return build_model(name, pretrained, cfg_list) + + +def regnety(name, pretrained=False, cfg_list=()): + """Constructs a RegNetY model (note: loads global config as well).""" + name = name if "RegNetY-" in name else "RegNetY-" + name + return build_model(name, pretrained, cfg_list) + + +def resnet(name, pretrained=False, cfg_list=()): + """Constructs a ResNet model (note: loads global config as well).""" + name = name if "ResNet-" in name else "ResNet-" + name + return build_model(name, pretrained, cfg_list) + + +def resnext(name, pretrained=False, cfg_list=()): + """Constructs a ResNeXt model (note: loads global config as well).""" + name = name if "ResNeXt-" in name else "ResNeXt-" + name + return build_model(name, pretrained, cfg_list) + + +def effnet(name, pretrained=False, cfg_list=()): + """Constructs an EfficientNet model (note: loads global config as well).""" + name = name if "EfficientNet-" in name else "EfficientNet-" + name + return build_model(name, pretrained, cfg_list) diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/regnet.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/regnet.py index 646336bbf572d23f759e103b0170eabaa880f593..271deeb4fa9be7aee39d5b39a9200c9b88753a2b 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/regnet.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/regnet.py @@ -1,83 +1,83 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""RegNet models.""" - -import numpy as np -import pycls.models.blocks as bk -from pycls.core.config import cfg -from pycls.models.anynet import AnyNet - - -def generate_regnet(w_a, w_0, w_m, d, q=8): - """Generates per stage widths and depths from RegNet parameters.""" - assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0 - # Generate continuous per-block ws - ws_cont = np.arange(d) * w_a + w_0 - # Generate quantized per-block ws - ks = np.round(np.log(ws_cont / w_0) / np.log(w_m)) - ws_all = w_0 * np.power(w_m, ks) - ws_all = np.round(np.divide(ws_all, q)).astype(int) * q - # Generate per stage ws and ds (assumes ws_all are sorted) - ws, ds = np.unique(ws_all, return_counts=True) - # Compute number of actual stages and total possible stages - num_stages, total_stages = len(ws), ks.max() + 1 - # Convert numpy arrays to lists and return - ws, ds, ws_all, ws_cont = (x.tolist() for x in (ws, ds, ws_all, ws_cont)) - return ws, ds, num_stages, total_stages, ws_all, ws_cont - - -class RegNet(AnyNet): - """RegNet model.""" - - @staticmethod - def get_params(): - """Convert RegNet to AnyNet parameter format.""" - # Generates per stage ws, ds, gs, bs, and ss from RegNet parameters - w_a, w_0, w_m, d = cfg.REGNET.WA, cfg.REGNET.W0, cfg.REGNET.WM, cfg.REGNET.DEPTH - ws, ds = generate_regnet(w_a, w_0, w_m, d)[0:2] - ss = [cfg.REGNET.STRIDE for _ in ws] - bs = [cfg.REGNET.BOT_MUL for _ in ws] - gs = [cfg.REGNET.GROUP_W for _ in ws] - ws, bs, gs = bk.adjust_block_compatibility(ws, bs, gs) - # Get AnyNet arguments defining the RegNet - return { - "stem_type": cfg.REGNET.STEM_TYPE, - "stem_w": cfg.REGNET.STEM_W, - "block_type": cfg.REGNET.BLOCK_TYPE, - "depths": ds, - "widths": ws, - "strides": ss, - "bot_muls": bs, - "group_ws": gs, - "se_r": cfg.REGNET.SE_R if cfg.REGNET.SE_ON else 0, - "num_classes": cfg.MODEL.NUM_CLASSES, - } - - def __init__(self): - params = RegNet.get_params() - super(RegNet, self).__init__(params) - - @staticmethod - def complexity(cx, params=None): - """Computes model complexity (if you alter the model, make sure to update).""" - params = RegNet.get_params() if not params else params - return AnyNet.complexity(cx, params) +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""RegNet models.""" + +import numpy as np +import pycls.models.blocks as bk +from pycls.core.config import cfg +from pycls.models.anynet import AnyNet + + +def generate_regnet(w_a, w_0, w_m, d, q=8): + """Generates per stage widths and depths from RegNet parameters.""" + assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0 + # Generate continuous per-block ws + ws_cont = np.arange(d) * w_a + w_0 + # Generate quantized per-block ws + ks = np.round(np.log(ws_cont / w_0) / np.log(w_m)) + ws_all = w_0 * np.power(w_m, ks) + ws_all = np.round(np.divide(ws_all, q)).astype(int) * q + # Generate per stage ws and ds (assumes ws_all are sorted) + ws, ds = np.unique(ws_all, return_counts=True) + # Compute number of actual stages and total possible stages + num_stages, total_stages = len(ws), ks.max() + 1 + # Convert numpy arrays to lists and return + ws, ds, ws_all, ws_cont = (x.tolist() for x in (ws, ds, ws_all, ws_cont)) + return ws, ds, num_stages, total_stages, ws_all, ws_cont + + +class RegNet(AnyNet): + """RegNet model.""" + + @staticmethod + def get_params(): + """Convert RegNet to AnyNet parameter format.""" + # Generates per stage ws, ds, gs, bs, and ss from RegNet parameters + w_a, w_0, w_m, d = cfg.REGNET.WA, cfg.REGNET.W0, cfg.REGNET.WM, cfg.REGNET.DEPTH + ws, ds = generate_regnet(w_a, w_0, w_m, d)[0:2] + ss = [cfg.REGNET.STRIDE for _ in ws] + bs = [cfg.REGNET.BOT_MUL for _ in ws] + gs = [cfg.REGNET.GROUP_W for _ in ws] + ws, bs, gs = bk.adjust_block_compatibility(ws, bs, gs) + # Get AnyNet arguments defining the RegNet + return { + "stem_type": cfg.REGNET.STEM_TYPE, + "stem_w": cfg.REGNET.STEM_W, + "block_type": cfg.REGNET.BLOCK_TYPE, + "depths": ds, + "widths": ws, + "strides": ss, + "bot_muls": bs, + "group_ws": gs, + "se_r": cfg.REGNET.SE_R if cfg.REGNET.SE_ON else 0, + "num_classes": cfg.MODEL.NUM_CLASSES, + } + + def __init__(self): + params = RegNet.get_params() + super(RegNet, self).__init__(params) + + @staticmethod + def complexity(cx, params=None): + """Computes model complexity (if you alter the model, make sure to update).""" + params = RegNet.get_params() if not params else params + return AnyNet.complexity(cx, params) diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/resnet.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/resnet.py index 5f3dbdde7b97cd2751534a657a0eaa58c2c60a84..2a26bbe8267eb14bb2c8f7e55c4e2aeef037d443 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/resnet.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/pycls/models/resnet.py @@ -1,301 +1,301 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""ResNe(X)t models.""" - -from pycls.core.config import cfg -from pycls.models.blocks import ( - activation, - conv2d, - conv2d_cx, - gap2d, - gap2d_cx, - init_weights, - linear, - linear_cx, - norm2d, - norm2d_cx, - pool2d, - pool2d_cx, -) -from torch.nn import Module - - -# Stage depths for ImageNet models -_IN_STAGE_DS = {50: (3, 4, 6, 3), 101: (3, 4, 23, 3), 152: (3, 8, 36, 3)} - - -def get_trans_fun(name): - """Retrieves the transformation function by name.""" - trans_funs = { - "basic_transform": BasicTransform, - "bottleneck_transform": BottleneckTransform, - } - err_str = "Transformation function '{}' not supported" - assert name in trans_funs.keys(), err_str.format(name) - return trans_funs[name] - - -class ResHead(Module): - """ResNet head: AvgPool, 1x1.""" - - def __init__(self, w_in, num_classes): - super(ResHead, self).__init__() - self.avg_pool = gap2d(w_in) - self.fc = linear(w_in, num_classes, bias=True) - - def forward(self, x): - x = self.avg_pool(x) - x = x.view(x.size(0), -1) - x = self.fc(x) - return x - - @staticmethod - def complexity(cx, w_in, num_classes): - cx = gap2d_cx(cx, w_in) - cx = linear_cx(cx, w_in, num_classes, bias=True) - return cx - - -class BasicTransform(Module): - """Basic transformation: 3x3, BN, AF, 3x3, BN.""" - - def __init__(self, w_in, w_out, stride, w_b=None, groups=1): - err_str = "Basic transform does not support w_b and groups options" - assert w_b is None and groups == 1, err_str - super(BasicTransform, self).__init__() - self.a = conv2d(w_in, w_out, 3, stride=stride) - self.a_bn = norm2d(w_out) - self.a_af = activation() - self.b = conv2d(w_out, w_out, 3) - self.b_bn = norm2d(w_out) - self.b_bn.final_bn = True - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - @staticmethod - def complexity(cx, w_in, w_out, stride, w_b=None, groups=1): - err_str = "Basic transform does not support w_b and groups options" - assert w_b is None and groups == 1, err_str - cx = conv2d_cx(cx, w_in, w_out, 3, stride=stride) - cx = norm2d_cx(cx, w_out) - cx = conv2d_cx(cx, w_out, w_out, 3) - cx = norm2d_cx(cx, w_out) - return cx - - -class BottleneckTransform(Module): - """Bottleneck transformation: 1x1, BN, AF, 3x3, BN, AF, 1x1, BN.""" - - def __init__(self, w_in, w_out, stride, w_b, groups): - super(BottleneckTransform, self).__init__() - # MSRA -> stride=2 is on 1x1; TH/C2 -> stride=2 is on 3x3 - (s1, s3) = (stride, 1) if cfg.RESNET.STRIDE_1X1 else (1, stride) - self.a = conv2d(w_in, w_b, 1, stride=s1) - self.a_bn = norm2d(w_b) - self.a_af = activation() - self.b = conv2d(w_b, w_b, 3, stride=s3, groups=groups) - self.b_bn = norm2d(w_b) - self.b_af = activation() - self.c = conv2d(w_b, w_out, 1) - self.c_bn = norm2d(w_out) - self.c_bn.final_bn = True - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - @staticmethod - def complexity(cx, w_in, w_out, stride, w_b, groups): - (s1, s3) = (stride, 1) if cfg.RESNET.STRIDE_1X1 else (1, stride) - cx = conv2d_cx(cx, w_in, w_b, 1, stride=s1) - cx = norm2d_cx(cx, w_b) - cx = conv2d_cx(cx, w_b, w_b, 3, stride=s3, groups=groups) - cx = norm2d_cx(cx, w_b) - cx = conv2d_cx(cx, w_b, w_out, 1) - cx = norm2d_cx(cx, w_out) - return cx - - -class ResBlock(Module): - """Residual block: x + f(x).""" - - def __init__(self, w_in, w_out, stride, trans_fun, w_b=None, groups=1): - super(ResBlock, self).__init__() - self.proj, self.bn = None, None - if (w_in != w_out) or (stride != 1): - self.proj = conv2d(w_in, w_out, 1, stride=stride) - self.bn = norm2d(w_out) - self.f = trans_fun(w_in, w_out, stride, w_b, groups) - self.af = activation() - - def forward(self, x): - x_p = self.bn(self.proj(x)) if self.proj else x - return self.af(x_p + self.f(x)) - - @staticmethod - def complexity(cx, w_in, w_out, stride, trans_fun, w_b, groups): - if (w_in != w_out) or (stride != 1): - h, w = cx["h"], cx["w"] - cx = conv2d_cx(cx, w_in, w_out, 1, stride=stride) - cx = norm2d_cx(cx, w_out) - cx["h"], cx["w"] = h, w - cx = trans_fun.complexity(cx, w_in, w_out, stride, w_b, groups) - return cx - - -class ResStage(Module): - """Stage of ResNet.""" - - def __init__(self, w_in, w_out, stride, d, w_b=None, groups=1): - super(ResStage, self).__init__() - for i in range(d): - b_stride = stride if i == 0 else 1 - b_w_in = w_in if i == 0 else w_out - trans_fun = get_trans_fun(cfg.RESNET.TRANS_FUN) - res_block = ResBlock(b_w_in, w_out, b_stride, trans_fun, w_b, groups) - self.add_module("b{}".format(i + 1), res_block) - - def forward(self, x): - for block in self.children(): - x = block(x) - return x - - @staticmethod - def complexity(cx, w_in, w_out, stride, d, w_b=None, groups=1): - for i in range(d): - b_stride = stride if i == 0 else 1 - b_w_in = w_in if i == 0 else w_out - trans_f = get_trans_fun(cfg.RESNET.TRANS_FUN) - cx = ResBlock.complexity(cx, b_w_in, w_out, b_stride, trans_f, w_b, groups) - return cx - - -class ResStemCifar(Module): - """ResNet stem for CIFAR: 3x3, BN, AF.""" - - def __init__(self, w_in, w_out): - super(ResStemCifar, self).__init__() - self.conv = conv2d(w_in, w_out, 3) - self.bn = norm2d(w_out) - self.af = activation() - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - @staticmethod - def complexity(cx, w_in, w_out): - cx = conv2d_cx(cx, w_in, w_out, 3) - cx = norm2d_cx(cx, w_out) - return cx - - -class ResStemIN(Module): - """ResNet stem for ImageNet: 7x7, BN, AF, MaxPool.""" - - def __init__(self, w_in, w_out): - super(ResStemIN, self).__init__() - self.conv = conv2d(w_in, w_out, 7, stride=2) - self.bn = norm2d(w_out) - self.af = activation() - self.pool = pool2d(w_out, 3, stride=2) - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - @staticmethod - def complexity(cx, w_in, w_out): - cx = conv2d_cx(cx, w_in, w_out, 7, stride=2) - cx = norm2d_cx(cx, w_out) - cx = pool2d_cx(cx, w_out, 3, stride=2) - return cx - - -class ResNet(Module): - """ResNet model.""" - - def __init__(self): - datasets = ["cifar10", "imagenet"] - err_str = "Dataset {} is not supported" - assert cfg.TRAIN.DATASET in datasets, err_str.format(cfg.TRAIN.DATASET) - assert cfg.TEST.DATASET in datasets, err_str.format(cfg.TEST.DATASET) - super(ResNet, self).__init__() - if "cifar" in cfg.TRAIN.DATASET: - self._construct_cifar() - else: - self._construct_imagenet() - self.apply(init_weights) - - def _construct_cifar(self): - err_str = "Model depth should be of the format 6n + 2 for cifar" - assert (cfg.MODEL.DEPTH - 2) % 6 == 0, err_str - d = int((cfg.MODEL.DEPTH - 2) / 6) - self.stem = ResStemCifar(3, 16) - self.s1 = ResStage(16, 16, stride=1, d=d) - self.s2 = ResStage(16, 32, stride=2, d=d) - self.s3 = ResStage(32, 64, stride=2, d=d) - self.head = ResHead(64, cfg.MODEL.NUM_CLASSES) - - def _construct_imagenet(self): - g, gw = cfg.RESNET.NUM_GROUPS, cfg.RESNET.WIDTH_PER_GROUP - (d1, d2, d3, d4) = _IN_STAGE_DS[cfg.MODEL.DEPTH] - w_b = gw * g - self.stem = ResStemIN(3, 64) - self.s1 = ResStage(64, 256, stride=1, d=d1, w_b=w_b, groups=g) - self.s2 = ResStage(256, 512, stride=2, d=d2, w_b=w_b * 2, groups=g) - self.s3 = ResStage(512, 1024, stride=2, d=d3, w_b=w_b * 4, groups=g) - self.s4 = ResStage(1024, 2048, stride=2, d=d4, w_b=w_b * 8, groups=g) - self.head = ResHead(2048, cfg.MODEL.NUM_CLASSES) - - def forward(self, x): - for module in self.children(): - x = module(x) - return x - - @staticmethod - def complexity(cx): - """Computes model complexity. If you alter the model, make sure to update.""" - if "cifar" in cfg.TRAIN.DATASET: - d = int((cfg.MODEL.DEPTH - 2) / 6) - cx = ResStemCifar.complexity(cx, 3, 16) - cx = ResStage.complexity(cx, 16, 16, stride=1, d=d) - cx = ResStage.complexity(cx, 16, 32, stride=2, d=d) - cx = ResStage.complexity(cx, 32, 64, stride=2, d=d) - cx = ResHead.complexity(cx, 64, cfg.MODEL.NUM_CLASSES) - else: - g, gw = cfg.RESNET.NUM_GROUPS, cfg.RESNET.WIDTH_PER_GROUP - (d1, d2, d3, d4) = _IN_STAGE_DS[cfg.MODEL.DEPTH] - w_b = gw * g - cx = ResStemIN.complexity(cx, 3, 64) - cx = ResStage.complexity(cx, 64, 256, 1, d=d1, w_b=w_b, groups=g) - cx = ResStage.complexity(cx, 256, 512, 2, d=d2, w_b=w_b * 2, groups=g) - cx = ResStage.complexity(cx, 512, 1024, 2, d=d3, w_b=w_b * 4, groups=g) - cx = ResStage.complexity(cx, 1024, 2048, 2, d=d4, w_b=w_b * 8, groups=g) - cx = ResHead.complexity(cx, 2048, cfg.MODEL.NUM_CLASSES) - return cx +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""ResNe(X)t models.""" + +from pycls.core.config import cfg +from pycls.models.blocks import ( + activation, + conv2d, + conv2d_cx, + gap2d, + gap2d_cx, + init_weights, + linear, + linear_cx, + norm2d, + norm2d_cx, + pool2d, + pool2d_cx, +) +from torch.nn import Module + + +# Stage depths for ImageNet models +_IN_STAGE_DS = {50: (3, 4, 6, 3), 101: (3, 4, 23, 3), 152: (3, 8, 36, 3)} + + +def get_trans_fun(name): + """Retrieves the transformation function by name.""" + trans_funs = { + "basic_transform": BasicTransform, + "bottleneck_transform": BottleneckTransform, + } + err_str = "Transformation function '{}' not supported" + assert name in trans_funs.keys(), err_str.format(name) + return trans_funs[name] + + +class ResHead(Module): + """ResNet head: AvgPool, 1x1.""" + + def __init__(self, w_in, num_classes): + super(ResHead, self).__init__() + self.avg_pool = gap2d(w_in) + self.fc = linear(w_in, num_classes, bias=True) + + def forward(self, x): + x = self.avg_pool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + return x + + @staticmethod + def complexity(cx, w_in, num_classes): + cx = gap2d_cx(cx, w_in) + cx = linear_cx(cx, w_in, num_classes, bias=True) + return cx + + +class BasicTransform(Module): + """Basic transformation: 3x3, BN, AF, 3x3, BN.""" + + def __init__(self, w_in, w_out, stride, w_b=None, groups=1): + err_str = "Basic transform does not support w_b and groups options" + assert w_b is None and groups == 1, err_str + super(BasicTransform, self).__init__() + self.a = conv2d(w_in, w_out, 3, stride=stride) + self.a_bn = norm2d(w_out) + self.a_af = activation() + self.b = conv2d(w_out, w_out, 3) + self.b_bn = norm2d(w_out) + self.b_bn.final_bn = True + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + @staticmethod + def complexity(cx, w_in, w_out, stride, w_b=None, groups=1): + err_str = "Basic transform does not support w_b and groups options" + assert w_b is None and groups == 1, err_str + cx = conv2d_cx(cx, w_in, w_out, 3, stride=stride) + cx = norm2d_cx(cx, w_out) + cx = conv2d_cx(cx, w_out, w_out, 3) + cx = norm2d_cx(cx, w_out) + return cx + + +class BottleneckTransform(Module): + """Bottleneck transformation: 1x1, BN, AF, 3x3, BN, AF, 1x1, BN.""" + + def __init__(self, w_in, w_out, stride, w_b, groups): + super(BottleneckTransform, self).__init__() + # MSRA -> stride=2 is on 1x1; TH/C2 -> stride=2 is on 3x3 + (s1, s3) = (stride, 1) if cfg.RESNET.STRIDE_1X1 else (1, stride) + self.a = conv2d(w_in, w_b, 1, stride=s1) + self.a_bn = norm2d(w_b) + self.a_af = activation() + self.b = conv2d(w_b, w_b, 3, stride=s3, groups=groups) + self.b_bn = norm2d(w_b) + self.b_af = activation() + self.c = conv2d(w_b, w_out, 1) + self.c_bn = norm2d(w_out) + self.c_bn.final_bn = True + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + @staticmethod + def complexity(cx, w_in, w_out, stride, w_b, groups): + (s1, s3) = (stride, 1) if cfg.RESNET.STRIDE_1X1 else (1, stride) + cx = conv2d_cx(cx, w_in, w_b, 1, stride=s1) + cx = norm2d_cx(cx, w_b) + cx = conv2d_cx(cx, w_b, w_b, 3, stride=s3, groups=groups) + cx = norm2d_cx(cx, w_b) + cx = conv2d_cx(cx, w_b, w_out, 1) + cx = norm2d_cx(cx, w_out) + return cx + + +class ResBlock(Module): + """Residual block: x + f(x).""" + + def __init__(self, w_in, w_out, stride, trans_fun, w_b=None, groups=1): + super(ResBlock, self).__init__() + self.proj, self.bn = None, None + if (w_in != w_out) or (stride != 1): + self.proj = conv2d(w_in, w_out, 1, stride=stride) + self.bn = norm2d(w_out) + self.f = trans_fun(w_in, w_out, stride, w_b, groups) + self.af = activation() + + def forward(self, x): + x_p = self.bn(self.proj(x)) if self.proj else x + return self.af(x_p + self.f(x)) + + @staticmethod + def complexity(cx, w_in, w_out, stride, trans_fun, w_b, groups): + if (w_in != w_out) or (stride != 1): + h, w = cx["h"], cx["w"] + cx = conv2d_cx(cx, w_in, w_out, 1, stride=stride) + cx = norm2d_cx(cx, w_out) + cx["h"], cx["w"] = h, w + cx = trans_fun.complexity(cx, w_in, w_out, stride, w_b, groups) + return cx + + +class ResStage(Module): + """Stage of ResNet.""" + + def __init__(self, w_in, w_out, stride, d, w_b=None, groups=1): + super(ResStage, self).__init__() + for i in range(d): + b_stride = stride if i == 0 else 1 + b_w_in = w_in if i == 0 else w_out + trans_fun = get_trans_fun(cfg.RESNET.TRANS_FUN) + res_block = ResBlock(b_w_in, w_out, b_stride, trans_fun, w_b, groups) + self.add_module("b{}".format(i + 1), res_block) + + def forward(self, x): + for block in self.children(): + x = block(x) + return x + + @staticmethod + def complexity(cx, w_in, w_out, stride, d, w_b=None, groups=1): + for i in range(d): + b_stride = stride if i == 0 else 1 + b_w_in = w_in if i == 0 else w_out + trans_f = get_trans_fun(cfg.RESNET.TRANS_FUN) + cx = ResBlock.complexity(cx, b_w_in, w_out, b_stride, trans_f, w_b, groups) + return cx + + +class ResStemCifar(Module): + """ResNet stem for CIFAR: 3x3, BN, AF.""" + + def __init__(self, w_in, w_out): + super(ResStemCifar, self).__init__() + self.conv = conv2d(w_in, w_out, 3) + self.bn = norm2d(w_out) + self.af = activation() + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + @staticmethod + def complexity(cx, w_in, w_out): + cx = conv2d_cx(cx, w_in, w_out, 3) + cx = norm2d_cx(cx, w_out) + return cx + + +class ResStemIN(Module): + """ResNet stem for ImageNet: 7x7, BN, AF, MaxPool.""" + + def __init__(self, w_in, w_out): + super(ResStemIN, self).__init__() + self.conv = conv2d(w_in, w_out, 7, stride=2) + self.bn = norm2d(w_out) + self.af = activation() + self.pool = pool2d(w_out, 3, stride=2) + + def forward(self, x): + for layer in self.children(): + x = layer(x) + return x + + @staticmethod + def complexity(cx, w_in, w_out): + cx = conv2d_cx(cx, w_in, w_out, 7, stride=2) + cx = norm2d_cx(cx, w_out) + cx = pool2d_cx(cx, w_out, 3, stride=2) + return cx + + +class ResNet(Module): + """ResNet model.""" + + def __init__(self): + datasets = ["cifar10", "imagenet"] + err_str = "Dataset {} is not supported" + assert cfg.TRAIN.DATASET in datasets, err_str.format(cfg.TRAIN.DATASET) + assert cfg.TEST.DATASET in datasets, err_str.format(cfg.TEST.DATASET) + super(ResNet, self).__init__() + if "cifar" in cfg.TRAIN.DATASET: + self._construct_cifar() + else: + self._construct_imagenet() + self.apply(init_weights) + + def _construct_cifar(self): + err_str = "Model depth should be of the format 6n + 2 for cifar" + assert (cfg.MODEL.DEPTH - 2) % 6 == 0, err_str + d = int((cfg.MODEL.DEPTH - 2) / 6) + self.stem = ResStemCifar(3, 16) + self.s1 = ResStage(16, 16, stride=1, d=d) + self.s2 = ResStage(16, 32, stride=2, d=d) + self.s3 = ResStage(32, 64, stride=2, d=d) + self.head = ResHead(64, cfg.MODEL.NUM_CLASSES) + + def _construct_imagenet(self): + g, gw = cfg.RESNET.NUM_GROUPS, cfg.RESNET.WIDTH_PER_GROUP + (d1, d2, d3, d4) = _IN_STAGE_DS[cfg.MODEL.DEPTH] + w_b = gw * g + self.stem = ResStemIN(3, 64) + self.s1 = ResStage(64, 256, stride=1, d=d1, w_b=w_b, groups=g) + self.s2 = ResStage(256, 512, stride=2, d=d2, w_b=w_b * 2, groups=g) + self.s3 = ResStage(512, 1024, stride=2, d=d3, w_b=w_b * 4, groups=g) + self.s4 = ResStage(1024, 2048, stride=2, d=d4, w_b=w_b * 8, groups=g) + self.head = ResHead(2048, cfg.MODEL.NUM_CLASSES) + + def forward(self, x): + for module in self.children(): + x = module(x) + return x + + @staticmethod + def complexity(cx): + """Computes model complexity. If you alter the model, make sure to update.""" + if "cifar" in cfg.TRAIN.DATASET: + d = int((cfg.MODEL.DEPTH - 2) / 6) + cx = ResStemCifar.complexity(cx, 3, 16) + cx = ResStage.complexity(cx, 16, 16, stride=1, d=d) + cx = ResStage.complexity(cx, 16, 32, stride=2, d=d) + cx = ResStage.complexity(cx, 32, 64, stride=2, d=d) + cx = ResHead.complexity(cx, 64, cfg.MODEL.NUM_CLASSES) + else: + g, gw = cfg.RESNET.NUM_GROUPS, cfg.RESNET.WIDTH_PER_GROUP + (d1, d2, d3, d4) = _IN_STAGE_DS[cfg.MODEL.DEPTH] + w_b = gw * g + cx = ResStemIN.complexity(cx, 3, 64) + cx = ResStage.complexity(cx, 64, 256, 1, d=d1, w_b=w_b, groups=g) + cx = ResStage.complexity(cx, 256, 512, 2, d=d2, w_b=w_b * 2, groups=g) + cx = ResStage.complexity(cx, 512, 1024, 2, d=d3, w_b=w_b * 4, groups=g) + cx = ResStage.complexity(cx, 1024, 2048, 2, d=d4, w_b=w_b * 8, groups=g) + cx = ResHead.complexity(cx, 2048, cfg.MODEL.NUM_CLASSES) + return cx diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/tools/test_net.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/tools/test_net.py index 626ee91bf4a87b8c102d864ca74010eada35ad24..c56c75efe7e957ac482c9791a4b8b71fe3ca9992 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/tools/test_net.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/tools/test_net.py @@ -1,40 +1,40 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -"""Test a trained classification model.""" -import sys -sys.path.append("./") -import pycls.core.config as config -import pycls.core.distributed as dist -import pycls.core.trainer as trainer -from pycls.core.config import cfg - - -def main(): - config.load_cfg_fom_args("Test a trained classification model.") - config.assert_and_infer_cfg() - cfg.freeze() - dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.test_model) - - -if __name__ == "__main__": - main() +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Test a trained classification model.""" +import sys +sys.path.append("./") +import pycls.core.config as config +import pycls.core.distributed as dist +import pycls.core.trainer as trainer +from pycls.core.config import cfg + + +def main(): + config.load_cfg_fom_args("Test a trained classification model.") + config.assert_and_infer_cfg() + cfg.freeze() + dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.test_model) + + +if __name__ == "__main__": + main() diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/tools/time_net.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/tools/time_net.py index ac6495a1db816fc80c2b325216626a22acd8656a..6ae03883617385c7fa14134309cb632200f40c08 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/tools/time_net.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/tools/time_net.py @@ -1,39 +1,39 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -"""Compute model and loader timings.""" - -import pycls.core.config as config -import pycls.core.distributed as dist -import pycls.core.trainer as trainer -from pycls.core.config import cfg - - -def main(): - config.load_cfg_fom_args("Compute model and loader timings.") - config.assert_and_infer_cfg() - cfg.freeze() - dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.time_model) - - -if __name__ == "__main__": - main() +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Compute model and loader timings.""" + +import pycls.core.config as config +import pycls.core.distributed as dist +import pycls.core.trainer as trainer +from pycls.core.config import cfg + + +def main(): + config.load_cfg_fom_args("Compute model and loader timings.") + config.assert_and_infer_cfg() + cfg.freeze() + dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.time_model) + + +if __name__ == "__main__": + main() diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B1/tools/train_net.py b/PyTorch/contrib/cv/classification/EfficientNet-B1/tools/train_net.py index 58be5ef71172d6df4feeed4e8cefa621dbabb929..b0277737d830dc3603f4043c7179c18f2dc53ff0 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B1/tools/train_net.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B1/tools/train_net.py @@ -1,106 +1,106 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - - -"""Train a classification model.""" -import sys -sys.path.append("./") -import pycls.core.config as config -import pycls.core.distributed as dist -import pycls.core.trainer as trainer -from pycls.core.config import cfg -import argparse,sys,os,torch -import torch - -def init_process_group(proc_rank, world_size, device_type="npu", port="29588"): - """Initializes the default process group.""" - - # Initialize the process group - print("==================================") - print('Begin init_process_group') - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = port - if device_type == "npu": - torch.distributed.init_process_group( - backend=cfg.DIST_BACKEND, - world_size=world_size, - rank=proc_rank - ) - elif device_type == "gpu": - torch.distributed.init_process_group( - backend=cfg.DIST_BACKEND, - init_method="tcp://{}:{}".format("127.0.0.1", port), - world_size=world_size, - rank=proc_rank - ) - - print("==================================") - print("Done init_process_group") - - # Set the GPU to use - #torch.cuda.set_device(proc_rank) - if device_type == "npu": - torch.npu.set_device(proc_rank) - elif device_type == "gpu": - torch.cuda.set_device(proc_rank) - print('Done set device', device_type, cfg.DIST_BACKEND, world_size, proc_rank) - -def main(): - """Load config from command line arguments and set any specified options.""" - parser = argparse.ArgumentParser(description="Config file options.") - parser.add_argument("--device", help="gpu or npu", default="npu", type=str) - parser.add_argument("--profperf", help="0 or 1", default=0, type=int) - help_s = "Config file location" - parser.add_argument("--cfg", dest="cfg_file", help=help_s, required=True, type=str) - parser.add_argument("--rank_id", dest="rank_id", default=0, type=int) - parser.add_argument("--device_id", dest="device_id", default=0, type=int) - help_s = "See pycls/core/config.py for all options" - parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER) - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - args = parser.parse_args() - print(args) - - config.merge_from_file(args.cfg_file) - config._C.merge_from_list(args.opts) - config.assert_and_infer_cfg() - cfg.freeze() - - if cfg.NUM_GPUS > 1: - init_process_group(proc_rank=args.rank_id, world_size=cfg.NUM_GPUS, device_type=args.device) - elif args.device == "npu": - torch.npu.set_device(args.device_id) - elif args.device == "gpu": - torch.cuda.set_device(args.device_id) - - if args.device == "npu": - cur_device = torch.npu.current_device() - elif args.device == "gpu": - cur_device = torch.cuda.current_device() - print('cur_device: ', cur_device) - - trainer.train_model(args.device, args.profperf) - - -if __name__ == "__main__": - main() +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +"""Train a classification model.""" +import sys +sys.path.append("./") +import pycls.core.config as config +import pycls.core.distributed as dist +import pycls.core.trainer as trainer +from pycls.core.config import cfg +import argparse,sys,os,torch +import torch + +def init_process_group(proc_rank, world_size, device_type="npu", port="29588"): + """Initializes the default process group.""" + + # Initialize the process group + print("==================================") + print('Begin init_process_group') + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = port + if device_type == "npu": + torch.distributed.init_process_group( + backend=cfg.DIST_BACKEND, + world_size=world_size, + rank=proc_rank + ) + elif device_type == "gpu": + torch.distributed.init_process_group( + backend=cfg.DIST_BACKEND, + init_method="tcp://{}:{}".format("127.0.0.1", port), + world_size=world_size, + rank=proc_rank + ) + + print("==================================") + print("Done init_process_group") + + # Set the GPU to use + #torch.cuda.set_device(proc_rank) + if device_type == "npu": + torch.npu.set_device(proc_rank) + elif device_type == "gpu": + torch.cuda.set_device(proc_rank) + print('Done set device', device_type, cfg.DIST_BACKEND, world_size, proc_rank) + +def main(): + """Load config from command line arguments and set any specified options.""" + parser = argparse.ArgumentParser(description="Config file options.") + parser.add_argument("--device", help="gpu or npu", default="npu", type=str) + parser.add_argument("--profperf", help="0 or 1", default=0, type=int) + help_s = "Config file location" + parser.add_argument("--cfg", dest="cfg_file", help=help_s, required=True, type=str) + parser.add_argument("--rank_id", dest="rank_id", default=0, type=int) + parser.add_argument("--device_id", dest="device_id", default=0, type=int) + help_s = "See pycls/core/config.py for all options" + parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER) + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + args = parser.parse_args() + print(args) + + config.merge_from_file(args.cfg_file) + config._C.merge_from_list(args.opts) + config.assert_and_infer_cfg() + cfg.freeze() + + if cfg.NUM_GPUS > 1: + init_process_group(proc_rank=args.rank_id, world_size=cfg.NUM_GPUS, device_type=args.device) + elif args.device == "npu": + torch.npu.set_device(args.device_id) + elif args.device == "gpu": + torch.cuda.set_device(args.device_id) + + if args.device == "npu": + cur_device = torch.npu.current_device() + elif args.device == "gpu": + cur_device = torch.cuda.current_device() + print('cur_device: ', cur_device) + + trainer.train_model(args.device, args.profperf) + + +if __name__ == "__main__": + main() diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B3/modelzoo_level.txt b/PyTorch/contrib/cv/classification/EfficientNet-B3/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B3/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/EfficientNet-B3/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/config.py b/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/config.py index 8bef12be1a1c99e8c056e77e80ac73faf2bf60a6..5cfeb0d2aeb590064287bcde0e2ae718cca5ae90 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/config.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/config.py @@ -1,429 +1,429 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -"""Configuration file (powered by YACS).""" - -import argparse,sys,os,torch -from iopath.common.file_io import g_pathmgr -from pycls.core.io import cache_url -from yacs.config import CfgNode as CfgNode - -# Global config object (example usage: from core.config import cfg) -_C = CfgNode() -cfg = _C - - -# ---------------------------------- Model options ----------------------------------- # -_C.MODEL = CfgNode() - -# Model type -_C.MODEL.TYPE = "" - -# Number of weight layers -_C.MODEL.DEPTH = 0 - -# Number of classes -_C.MODEL.NUM_CLASSES = 10 - -# Loss function (see pycls/models/loss.py for options) -_C.MODEL.LOSS_FUN = "cross_entropy" - -# Activation function (relu or silu/swish) -_C.MODEL.ACTIVATION_FUN = "relu" - -# Perform activation inplace if implemented -_C.MODEL.ACTIVATION_INPLACE = True - - -# ---------------------------------- ResNet options ---------------------------------- # -_C.RESNET = CfgNode() - -# Transformation function (see pycls/models/resnet.py for options) -_C.RESNET.TRANS_FUN = "basic_transform" - -# Number of groups to use (1 -> ResNet; > 1 -> ResNeXt) -_C.RESNET.NUM_GROUPS = 1 - -# Width of each group (64 -> ResNet; 4 -> ResNeXt) -_C.RESNET.WIDTH_PER_GROUP = 64 - -# Apply stride to 1x1 conv (True -> MSRA; False -> fb.torch) -_C.RESNET.STRIDE_1X1 = True - - -# ---------------------------------- AnyNet options ---------------------------------- # -_C.ANYNET = CfgNode() - -# Stem type -_C.ANYNET.STEM_TYPE = "simple_stem_in" - -# Stem width -_C.ANYNET.STEM_W = 32 - -# Block type -_C.ANYNET.BLOCK_TYPE = "res_bottleneck_block" - -# Depth for each stage (number of blocks in the stage) -_C.ANYNET.DEPTHS = [] - -# Width for each stage (width of each block in the stage) -_C.ANYNET.WIDTHS = [] - -# Strides for each stage (applies to the first block of each stage) -_C.ANYNET.STRIDES = [] - -# Bottleneck multipliers for each stage (applies to bottleneck block) -_C.ANYNET.BOT_MULS = [] - -# Group widths for each stage (applies to bottleneck block) -_C.ANYNET.GROUP_WS = [] - -# Whether SE is enabled for res_bottleneck_block -_C.ANYNET.SE_ON = False - -# SE ratio -_C.ANYNET.SE_R = 0.25 - - -# ---------------------------------- RegNet options ---------------------------------- # -_C.REGNET = CfgNode() - -# Stem type -_C.REGNET.STEM_TYPE = "simple_stem_in" - -# Stem width -_C.REGNET.STEM_W = 32 - -# Block type -_C.REGNET.BLOCK_TYPE = "res_bottleneck_block" - -# Stride of each stage -_C.REGNET.STRIDE = 2 - -# Squeeze-and-Excitation (RegNetY) -_C.REGNET.SE_ON = False -_C.REGNET.SE_R = 0.25 - -# Depth -_C.REGNET.DEPTH = 10 - -# Initial width -_C.REGNET.W0 = 32 - -# Slope -_C.REGNET.WA = 5.0 - -# Quantization -_C.REGNET.WM = 2.5 - -# Group width -_C.REGNET.GROUP_W = 16 - -# Bottleneck multiplier (bm = 1 / b from the paper) -_C.REGNET.BOT_MUL = 1.0 - - -# ------------------------------- EfficientNet options ------------------------------- # -_C.EN = CfgNode() - -# Stem width -_C.EN.STEM_W = 32 - -# Depth for each stage (number of blocks in the stage) -_C.EN.DEPTHS = [] - -# Width for each stage (width of each block in the stage) -_C.EN.WIDTHS = [] - -# Expansion ratios for MBConv blocks in each stage -_C.EN.EXP_RATIOS = [] - -# Squeeze-and-Excitation (SE) ratio -_C.EN.SE_R = 0.25 - -# Strides for each stage (applies to the first block of each stage) -_C.EN.STRIDES = [] - -# Kernel sizes for each stage -_C.EN.KERNELS = [] - -# Head width -_C.EN.HEAD_W = 1280 - -# Drop connect ratio -_C.EN.DC_RATIO = 0.0 - -# Dropout ratio -_C.EN.DROPOUT_RATIO = 0.0 - - -# -------------------------------- Batch norm options -------------------------------- # -_C.BN = CfgNode() - -# BN epsilon -_C.BN.EPS = 1e-5 - -# BN momentum (BN momentum in PyTorch = 1 - BN momentum in Caffe2) -_C.BN.MOM = 0.1 - -# Precise BN stats -_C.BN.USE_PRECISE_STATS = True -_C.BN.NUM_SAMPLES_PRECISE = 8192 - -# Initialize the gamma of the final BN of each block to zero -_C.BN.ZERO_INIT_FINAL_GAMMA = False - -# Use a different weight decay for BN layers -_C.BN.USE_CUSTOM_WEIGHT_DECAY = False -_C.BN.CUSTOM_WEIGHT_DECAY = 0.0 - - -# -------------------------------- Optimizer options --------------------------------- # -_C.OPTIM = CfgNode() - -# Learning rate ranges from BASE_LR to MIN_LR*BASE_LR according to the LR_POLICY -_C.OPTIM.BASE_LR = 0.1 -_C.OPTIM.MIN_LR = 0.0 - -# Learning rate policy select from {'cos', 'exp', 'lin', 'steps'} -_C.OPTIM.LR_POLICY = "cos" - -# Steps for 'steps' policy (in epochs) -_C.OPTIM.STEPS = [] - -# Learning rate multiplier for 'steps' policy -_C.OPTIM.LR_MULT = 0.1 - -# Maximal number of epochs -_C.OPTIM.MAX_EPOCH = 200 - -# Momentum -_C.OPTIM.MOMENTUM = 0.9 - -# Momentum dampening -_C.OPTIM.DAMPENING = 0.0 - -# Nesterov momentum -_C.OPTIM.NESTEROV = True - -# L2 regularization -_C.OPTIM.WEIGHT_DECAY = 5e-4 - -# Start the warm up from OPTIM.BASE_LR * OPTIM.WARMUP_FACTOR -_C.OPTIM.WARMUP_FACTOR = 0.1 - -# Gradually warm up the OPTIM.BASE_LR over this number of epochs -_C.OPTIM.WARMUP_EPOCHS = 0 - - -# --------------------------------- Training options --------------------------------- # -_C.TRAIN = CfgNode() - -# Dataset and split -_C.TRAIN.DATASET = "" -_C.TRAIN.SPLIT = "train" - -# Total mini-batch size -_C.TRAIN.BATCH_SIZE = 128 - -# Image size -_C.TRAIN.IM_SIZE = 224 - -# Resume training from the latest checkpoint in the output directory -_C.TRAIN.AUTO_RESUME = True - -# Weights to start training from -_C.TRAIN.WEIGHTS = "" -_C.TRAIN.PRETRAINED = "" - -# If True train using mixed precision -_C.TRAIN.MIXED_PRECISION = False - -# Label smoothing value in 0 to 1 where (0 gives no smoothing) -_C.TRAIN.LABEL_SMOOTHING = 0.0 - -# Batch mixup regularization value in 0 to 1 (0 gives no mixup) -_C.TRAIN.MIXUP_ALPHA = 0.0 - -# Standard deviation for AlexNet-style PCA jitter (0 gives no PCA jitter) -_C.TRAIN.PCA_STD = 0.1 - -# Data augmentation to use ("", "AutoAugment", "RandAugment_N2_M0.5", etc.) -_C.TRAIN.AUGMENT = "" - - -# --------------------------------- Testing options ---------------------------------- # -_C.TEST = CfgNode() - -# Dataset and split -_C.TEST.DATASET = "" -_C.TEST.SPLIT = "val" - -# Total mini-batch size -_C.TEST.BATCH_SIZE = 200 - -# Image size -_C.TEST.IM_SIZE = 256 - -# Weights to use for testing -_C.TEST.WEIGHTS = "" - - -# ------------------------------- Data loader options -------------------------------- # -_C.DATA_LOADER = CfgNode() - -# Number of data loader workers per process -_C.DATA_LOADER.NUM_WORKERS = 8 - -# Load data to pinned host memory -_C.DATA_LOADER.PIN_MEMORY = True - - -# ---------------------------------- CUDNN options ----------------------------------- # -_C.CUDNN = CfgNode() - -# Perform benchmarking to select fastest CUDNN algorithms (best for fixed input sizes) -_C.CUDNN.BENCHMARK = True - - -# ------------------------------- Precise time options ------------------------------- # -_C.PREC_TIME = CfgNode() - -# Number of iterations to warm up the caches -_C.PREC_TIME.WARMUP_ITER = 3 - -# Number of iterations to compute avg time -_C.PREC_TIME.NUM_ITER = 30 - - -# ----------------------------------- Misc options ----------------------------------- # -# Optional description of a config -_C.DESC = "" - -# If True output additional info to log -_C.VERBOSE = True - -# Number of GPUs to use (applies to both training and testing) -_C.NUM_GPUS = 1 - -# Output directory -_C.OUT_DIR = "/tmp" - -# Config destination (in OUT_DIR) -_C.CFG_DEST = "config.yaml" - -# Note that non-determinism is still be present due to non-deterministic GPU ops -_C.RNG_SEED = 1 - -# Log destination ('stdout' or 'file') -_C.LOG_DEST = "stdout" - -# Log period in iters -_C.LOG_PERIOD = 10 - -# Distributed backend -_C.DIST_BACKEND = "hccl" - -# Hostname and port range for multi-process groups (actual port selected randomly) -_C.HOST = "localhost" -_C.PORT_RANGE = [10000, 65000] - -# Models weights referred to by URL are downloaded to this local cache -_C.DOWNLOAD_CACHE = "/tmp/pycls-download-cache" - -# ---------------------------------- Default config ---------------------------------- # -_CFG_DEFAULT = _C.clone() -_CFG_DEFAULT.freeze() - - -# --------------------------------- Deprecated keys ---------------------------------- # -_C.register_deprecated_key("MEM") -_C.register_deprecated_key("MEM.RELU_INPLACE") -_C.register_deprecated_key("OPTIM.GAMMA") -_C.register_deprecated_key("PREC_TIME.BATCH_SIZE") -_C.register_deprecated_key("PREC_TIME.ENABLED") -_C.register_deprecated_key("PORT") -_C.register_deprecated_key("TRAIN.EVAL_PERIOD") -_C.register_deprecated_key("TRAIN.CHECKPOINT_PERIOD") - -# --------------------------------- Model Arts ---------------------------------- # -_C.data_url = "" -_C.train_url = "" - -def assert_and_infer_cfg(cache_urls=True): - """Checks config values invariants.""" - err_str = "The first lr step must start at 0" - assert not _C.OPTIM.STEPS or _C.OPTIM.STEPS[0] == 0, err_str - data_splits = ["train", "val", "test"] - err_str = "Data split '{}' not supported" - assert _C.TRAIN.SPLIT in data_splits, err_str.format(_C.TRAIN.SPLIT) - assert _C.TEST.SPLIT in data_splits, err_str.format(_C.TEST.SPLIT) - err_str = "Mini-batch size should be a multiple of NUM_GPUS." - assert _C.TRAIN.BATCH_SIZE % _C.NUM_GPUS == 0, err_str - assert _C.TEST.BATCH_SIZE % _C.NUM_GPUS == 0, err_str - err_str = "Log destination '{}' not supported" - assert _C.LOG_DEST in ["stdout", "file"], err_str.format(_C.LOG_DEST) - if cache_urls: - cache_cfg_urls() - - -def cache_cfg_urls(): - """Download URLs in config, cache them, and rewrite cfg to use cached file.""" - _C.TRAIN.WEIGHTS = cache_url(_C.TRAIN.WEIGHTS, _C.DOWNLOAD_CACHE) - _C.TEST.WEIGHTS = cache_url(_C.TEST.WEIGHTS, _C.DOWNLOAD_CACHE) - - -def merge_from_file(cfg_file): - with g_pathmgr.open(cfg_file, "r") as f: - cfg = _C.load_cfg(f) - _C.merge_from_other_cfg(cfg) - - -def dump_cfg(): - """Dumps the config to the output directory.""" - cfg_file = os.path.join(_C.OUT_DIR, _C.CFG_DEST) - with g_pathmgr.open(cfg_file, "w") as f: - _C.dump(stream=f) - - -def load_cfg(out_dir, cfg_dest="config.yaml"): - """Loads config from specified output directory.""" - cfg_file = os.path.join(out_dir, cfg_dest) - merge_from_file(cfg_file) - - -def reset_cfg(): - """Reset config to initial state.""" - cfg.merge_from_other_cfg(_CFG_DEFAULT) - - -def load_cfg_fom_args(description="Config file options."): - """Load config from command line arguments and set any specified options.""" - parser = argparse.ArgumentParser(description=description) - help_s = "Config file location" - parser.add_argument("--cfg", dest="cfg_file", help=help_s, required=True, type=str) - help_s = "See pycls/core/config.py for all options" - # modelarts modification - parser.add_argument('--train_url', - default="/cache/training", - type=str, - help="setting dir of training output") - parser.add_argument('--data_url', - metavar='DIR', - default='/cache/data_url', - help='path to dataset') - parser.add_argument('--onnx', default=True, action='store_true', - help="convert pth model to onnx") - parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER) - - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - args = parser.parse_args() - merge_from_file(args.cfg_file) - _C.merge_from_list(args.opts) - +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +"""Configuration file (powered by YACS).""" + +import argparse,sys,os,torch +from iopath.common.file_io import g_pathmgr +from pycls.core.io import cache_url +from yacs.config import CfgNode as CfgNode + +# Global config object (example usage: from core.config import cfg) +_C = CfgNode() +cfg = _C + + +# ---------------------------------- Model options ----------------------------------- # +_C.MODEL = CfgNode() + +# Model type +_C.MODEL.TYPE = "" + +# Number of weight layers +_C.MODEL.DEPTH = 0 + +# Number of classes +_C.MODEL.NUM_CLASSES = 10 + +# Loss function (see pycls/models/loss.py for options) +_C.MODEL.LOSS_FUN = "cross_entropy" + +# Activation function (relu or silu/swish) +_C.MODEL.ACTIVATION_FUN = "relu" + +# Perform activation inplace if implemented +_C.MODEL.ACTIVATION_INPLACE = True + + +# ---------------------------------- ResNet options ---------------------------------- # +_C.RESNET = CfgNode() + +# Transformation function (see pycls/models/resnet.py for options) +_C.RESNET.TRANS_FUN = "basic_transform" + +# Number of groups to use (1 -> ResNet; > 1 -> ResNeXt) +_C.RESNET.NUM_GROUPS = 1 + +# Width of each group (64 -> ResNet; 4 -> ResNeXt) +_C.RESNET.WIDTH_PER_GROUP = 64 + +# Apply stride to 1x1 conv (True -> MSRA; False -> fb.torch) +_C.RESNET.STRIDE_1X1 = True + + +# ---------------------------------- AnyNet options ---------------------------------- # +_C.ANYNET = CfgNode() + +# Stem type +_C.ANYNET.STEM_TYPE = "simple_stem_in" + +# Stem width +_C.ANYNET.STEM_W = 32 + +# Block type +_C.ANYNET.BLOCK_TYPE = "res_bottleneck_block" + +# Depth for each stage (number of blocks in the stage) +_C.ANYNET.DEPTHS = [] + +# Width for each stage (width of each block in the stage) +_C.ANYNET.WIDTHS = [] + +# Strides for each stage (applies to the first block of each stage) +_C.ANYNET.STRIDES = [] + +# Bottleneck multipliers for each stage (applies to bottleneck block) +_C.ANYNET.BOT_MULS = [] + +# Group widths for each stage (applies to bottleneck block) +_C.ANYNET.GROUP_WS = [] + +# Whether SE is enabled for res_bottleneck_block +_C.ANYNET.SE_ON = False + +# SE ratio +_C.ANYNET.SE_R = 0.25 + + +# ---------------------------------- RegNet options ---------------------------------- # +_C.REGNET = CfgNode() + +# Stem type +_C.REGNET.STEM_TYPE = "simple_stem_in" + +# Stem width +_C.REGNET.STEM_W = 32 + +# Block type +_C.REGNET.BLOCK_TYPE = "res_bottleneck_block" + +# Stride of each stage +_C.REGNET.STRIDE = 2 + +# Squeeze-and-Excitation (RegNetY) +_C.REGNET.SE_ON = False +_C.REGNET.SE_R = 0.25 + +# Depth +_C.REGNET.DEPTH = 10 + +# Initial width +_C.REGNET.W0 = 32 + +# Slope +_C.REGNET.WA = 5.0 + +# Quantization +_C.REGNET.WM = 2.5 + +# Group width +_C.REGNET.GROUP_W = 16 + +# Bottleneck multiplier (bm = 1 / b from the paper) +_C.REGNET.BOT_MUL = 1.0 + + +# ------------------------------- EfficientNet options ------------------------------- # +_C.EN = CfgNode() + +# Stem width +_C.EN.STEM_W = 32 + +# Depth for each stage (number of blocks in the stage) +_C.EN.DEPTHS = [] + +# Width for each stage (width of each block in the stage) +_C.EN.WIDTHS = [] + +# Expansion ratios for MBConv blocks in each stage +_C.EN.EXP_RATIOS = [] + +# Squeeze-and-Excitation (SE) ratio +_C.EN.SE_R = 0.25 + +# Strides for each stage (applies to the first block of each stage) +_C.EN.STRIDES = [] + +# Kernel sizes for each stage +_C.EN.KERNELS = [] + +# Head width +_C.EN.HEAD_W = 1280 + +# Drop connect ratio +_C.EN.DC_RATIO = 0.0 + +# Dropout ratio +_C.EN.DROPOUT_RATIO = 0.0 + + +# -------------------------------- Batch norm options -------------------------------- # +_C.BN = CfgNode() + +# BN epsilon +_C.BN.EPS = 1e-5 + +# BN momentum (BN momentum in PyTorch = 1 - BN momentum in Caffe2) +_C.BN.MOM = 0.1 + +# Precise BN stats +_C.BN.USE_PRECISE_STATS = True +_C.BN.NUM_SAMPLES_PRECISE = 8192 + +# Initialize the gamma of the final BN of each block to zero +_C.BN.ZERO_INIT_FINAL_GAMMA = False + +# Use a different weight decay for BN layers +_C.BN.USE_CUSTOM_WEIGHT_DECAY = False +_C.BN.CUSTOM_WEIGHT_DECAY = 0.0 + + +# -------------------------------- Optimizer options --------------------------------- # +_C.OPTIM = CfgNode() + +# Learning rate ranges from BASE_LR to MIN_LR*BASE_LR according to the LR_POLICY +_C.OPTIM.BASE_LR = 0.1 +_C.OPTIM.MIN_LR = 0.0 + +# Learning rate policy select from {'cos', 'exp', 'lin', 'steps'} +_C.OPTIM.LR_POLICY = "cos" + +# Steps for 'steps' policy (in epochs) +_C.OPTIM.STEPS = [] + +# Learning rate multiplier for 'steps' policy +_C.OPTIM.LR_MULT = 0.1 + +# Maximal number of epochs +_C.OPTIM.MAX_EPOCH = 200 + +# Momentum +_C.OPTIM.MOMENTUM = 0.9 + +# Momentum dampening +_C.OPTIM.DAMPENING = 0.0 + +# Nesterov momentum +_C.OPTIM.NESTEROV = True + +# L2 regularization +_C.OPTIM.WEIGHT_DECAY = 5e-4 + +# Start the warm up from OPTIM.BASE_LR * OPTIM.WARMUP_FACTOR +_C.OPTIM.WARMUP_FACTOR = 0.1 + +# Gradually warm up the OPTIM.BASE_LR over this number of epochs +_C.OPTIM.WARMUP_EPOCHS = 0 + + +# --------------------------------- Training options --------------------------------- # +_C.TRAIN = CfgNode() + +# Dataset and split +_C.TRAIN.DATASET = "" +_C.TRAIN.SPLIT = "train" + +# Total mini-batch size +_C.TRAIN.BATCH_SIZE = 128 + +# Image size +_C.TRAIN.IM_SIZE = 224 + +# Resume training from the latest checkpoint in the output directory +_C.TRAIN.AUTO_RESUME = True + +# Weights to start training from +_C.TRAIN.WEIGHTS = "" +_C.TRAIN.PRETRAINED = "" + +# If True train using mixed precision +_C.TRAIN.MIXED_PRECISION = False + +# Label smoothing value in 0 to 1 where (0 gives no smoothing) +_C.TRAIN.LABEL_SMOOTHING = 0.0 + +# Batch mixup regularization value in 0 to 1 (0 gives no mixup) +_C.TRAIN.MIXUP_ALPHA = 0.0 + +# Standard deviation for AlexNet-style PCA jitter (0 gives no PCA jitter) +_C.TRAIN.PCA_STD = 0.1 + +# Data augmentation to use ("", "AutoAugment", "RandAugment_N2_M0.5", etc.) +_C.TRAIN.AUGMENT = "" + + +# --------------------------------- Testing options ---------------------------------- # +_C.TEST = CfgNode() + +# Dataset and split +_C.TEST.DATASET = "" +_C.TEST.SPLIT = "val" + +# Total mini-batch size +_C.TEST.BATCH_SIZE = 200 + +# Image size +_C.TEST.IM_SIZE = 256 + +# Weights to use for testing +_C.TEST.WEIGHTS = "" + + +# ------------------------------- Data loader options -------------------------------- # +_C.DATA_LOADER = CfgNode() + +# Number of data loader workers per process +_C.DATA_LOADER.NUM_WORKERS = 8 + +# Load data to pinned host memory +_C.DATA_LOADER.PIN_MEMORY = True + + +# ---------------------------------- CUDNN options ----------------------------------- # +_C.CUDNN = CfgNode() + +# Perform benchmarking to select fastest CUDNN algorithms (best for fixed input sizes) +_C.CUDNN.BENCHMARK = True + + +# ------------------------------- Precise time options ------------------------------- # +_C.PREC_TIME = CfgNode() + +# Number of iterations to warm up the caches +_C.PREC_TIME.WARMUP_ITER = 3 + +# Number of iterations to compute avg time +_C.PREC_TIME.NUM_ITER = 30 + + +# ----------------------------------- Misc options ----------------------------------- # +# Optional description of a config +_C.DESC = "" + +# If True output additional info to log +_C.VERBOSE = True + +# Number of GPUs to use (applies to both training and testing) +_C.NUM_GPUS = 1 + +# Output directory +_C.OUT_DIR = "/tmp" + +# Config destination (in OUT_DIR) +_C.CFG_DEST = "config.yaml" + +# Note that non-determinism is still be present due to non-deterministic GPU ops +_C.RNG_SEED = 1 + +# Log destination ('stdout' or 'file') +_C.LOG_DEST = "stdout" + +# Log period in iters +_C.LOG_PERIOD = 10 + +# Distributed backend +_C.DIST_BACKEND = "hccl" + +# Hostname and port range for multi-process groups (actual port selected randomly) +_C.HOST = "localhost" +_C.PORT_RANGE = [10000, 65000] + +# Models weights referred to by URL are downloaded to this local cache +_C.DOWNLOAD_CACHE = "/tmp/pycls-download-cache" + +# ---------------------------------- Default config ---------------------------------- # +_CFG_DEFAULT = _C.clone() +_CFG_DEFAULT.freeze() + + +# --------------------------------- Deprecated keys ---------------------------------- # +_C.register_deprecated_key("MEM") +_C.register_deprecated_key("MEM.RELU_INPLACE") +_C.register_deprecated_key("OPTIM.GAMMA") +_C.register_deprecated_key("PREC_TIME.BATCH_SIZE") +_C.register_deprecated_key("PREC_TIME.ENABLED") +_C.register_deprecated_key("PORT") +_C.register_deprecated_key("TRAIN.EVAL_PERIOD") +_C.register_deprecated_key("TRAIN.CHECKPOINT_PERIOD") + +# --------------------------------- Model Arts ---------------------------------- # +_C.data_url = "" +_C.train_url = "" + +def assert_and_infer_cfg(cache_urls=True): + """Checks config values invariants.""" + err_str = "The first lr step must start at 0" + assert not _C.OPTIM.STEPS or _C.OPTIM.STEPS[0] == 0, err_str + data_splits = ["train", "val", "test"] + err_str = "Data split '{}' not supported" + assert _C.TRAIN.SPLIT in data_splits, err_str.format(_C.TRAIN.SPLIT) + assert _C.TEST.SPLIT in data_splits, err_str.format(_C.TEST.SPLIT) + err_str = "Mini-batch size should be a multiple of NUM_GPUS." + assert _C.TRAIN.BATCH_SIZE % _C.NUM_GPUS == 0, err_str + assert _C.TEST.BATCH_SIZE % _C.NUM_GPUS == 0, err_str + err_str = "Log destination '{}' not supported" + assert _C.LOG_DEST in ["stdout", "file"], err_str.format(_C.LOG_DEST) + if cache_urls: + cache_cfg_urls() + + +def cache_cfg_urls(): + """Download URLs in config, cache them, and rewrite cfg to use cached file.""" + _C.TRAIN.WEIGHTS = cache_url(_C.TRAIN.WEIGHTS, _C.DOWNLOAD_CACHE) + _C.TEST.WEIGHTS = cache_url(_C.TEST.WEIGHTS, _C.DOWNLOAD_CACHE) + + +def merge_from_file(cfg_file): + with g_pathmgr.open(cfg_file, "r") as f: + cfg = _C.load_cfg(f) + _C.merge_from_other_cfg(cfg) + + +def dump_cfg(): + """Dumps the config to the output directory.""" + cfg_file = os.path.join(_C.OUT_DIR, _C.CFG_DEST) + with g_pathmgr.open(cfg_file, "w") as f: + _C.dump(stream=f) + + +def load_cfg(out_dir, cfg_dest="config.yaml"): + """Loads config from specified output directory.""" + cfg_file = os.path.join(out_dir, cfg_dest) + merge_from_file(cfg_file) + + +def reset_cfg(): + """Reset config to initial state.""" + cfg.merge_from_other_cfg(_CFG_DEFAULT) + + +def load_cfg_fom_args(description="Config file options."): + """Load config from command line arguments and set any specified options.""" + parser = argparse.ArgumentParser(description=description) + help_s = "Config file location" + parser.add_argument("--cfg", dest="cfg_file", help=help_s, required=True, type=str) + help_s = "See pycls/core/config.py for all options" + # modelarts modification + parser.add_argument('--train_url', + default="/cache/training", + type=str, + help="setting dir of training output") + parser.add_argument('--data_url', + metavar='DIR', + default='/cache/data_url', + help='path to dataset') + parser.add_argument('--onnx', default=True, action='store_true', + help="convert pth model to onnx") + parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER) + + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + args = parser.parse_args() + merge_from_file(args.cfg_file) + _C.merge_from_list(args.opts) + diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/loader.py b/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/loader.py index b94e82d09f81640a2e6b2df4af9408e61e9ce408..4bf77a6c88939de6ed1506c1f27212c0c812a50d 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/loader.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/loader.py @@ -1,98 +1,98 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -"""Data loader.""" - -import os - -import torch -from pycls.core.config import cfg -from pycls.datasets.cifar10 import Cifar10 -from pycls.datasets.imagenet import ImageNet -from torch.utils.data.distributed import DistributedSampler -from torch.utils.data.sampler import RandomSampler - -import argparse -import moxing as mox - -# Supported datasets -_DATASETS = {"cifar10": Cifar10, "imagenet2012": ImageNet} - -# Default data directory (/path/pycls/pycls/datasets/data) -#_DATA_DIR = os.path.join(os.path.dirname(__file__), "data") - -# Relative data paths to default data directory -_PATHS = {"cifar10": "cifar10", "imagenet2012": "imagenet2012"} - -def _construct_loader(args, dataset_name, split, batch_size, shuffle, drop_last): - """Constructs the data loader for the given dataset.""" - err_str = "Dataset '{}' not supported".format(dataset_name) - assert dataset_name in _DATASETS and dataset_name in _PATHS, err_str - # Retrieve the data path for the dataset - #data_path = os.path.join(_DATA_DIR, _PATHS[dataset_name]) - # modelarts modification - real_path = '/cache/data_url' - if not os.path.exists(real_path): - os.makedirs(real_path) - print(args) - mox.file.copy_parallel(args.data_url, real_path) - print("training data finish copy to %s." % real_path) - data_path = os.path.join(real_path, _PATHS[dataset_name]) - print("real_path:",real_path) - print("real_path 子文件:", os.listdir(real_path)) - print("data_path:",data_path) - print("data_path 子文件:", os.listdir(data_path)) - # modelarts modification - # Construct the dataset - dataset = _DATASETS[dataset_name](data_path, split) - # Create a sampler for multi-process training - sampler = DistributedSampler(dataset) if cfg.NUM_GPUS > 1 else None - # Create a loader - loader = torch.utils.data.DataLoader( - dataset, - batch_size=batch_size, - shuffle=(False if sampler else shuffle), - sampler=sampler, - num_workers=cfg.DATA_LOADER.NUM_WORKERS, - pin_memory=cfg.DATA_LOADER.PIN_MEMORY, - drop_last=drop_last, - ) - return loader - - -def construct_train_loader(args): - """Train loader wrapper.""" - return _construct_loader( - args, - dataset_name=cfg.TRAIN.DATASET, - split=cfg.TRAIN.SPLIT, - batch_size=int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS), - shuffle=True, - drop_last=True, - ) - - -def construct_test_loader(args): - """Test loader wrapper.""" - return _construct_loader( - args, - dataset_name=cfg.TEST.DATASET, - split=cfg.TEST.SPLIT, - batch_size=int(cfg.TEST.BATCH_SIZE / cfg.NUM_GPUS), - shuffle=False, - drop_last=False, - ) - - -def shuffle(loader, cur_epoch): - """"Shuffles the data.""" - err_str = "Sampler type '{}' not supported".format(type(loader.sampler)) - assert isinstance(loader.sampler, (RandomSampler, DistributedSampler)), err_str - # RandomSampler handles shuffling automatically - if isinstance(loader.sampler, DistributedSampler): - # DistributedSampler shuffles data based on epoch - loader.sampler.set_epoch(cur_epoch) +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +"""Data loader.""" + +import os + +import torch +from pycls.core.config import cfg +from pycls.datasets.cifar10 import Cifar10 +from pycls.datasets.imagenet import ImageNet +from torch.utils.data.distributed import DistributedSampler +from torch.utils.data.sampler import RandomSampler + +import argparse +import moxing as mox + +# Supported datasets +_DATASETS = {"cifar10": Cifar10, "imagenet2012": ImageNet} + +# Default data directory (/path/pycls/pycls/datasets/data) +#_DATA_DIR = os.path.join(os.path.dirname(__file__), "data") + +# Relative data paths to default data directory +_PATHS = {"cifar10": "cifar10", "imagenet2012": "imagenet2012"} + +def _construct_loader(args, dataset_name, split, batch_size, shuffle, drop_last): + """Constructs the data loader for the given dataset.""" + err_str = "Dataset '{}' not supported".format(dataset_name) + assert dataset_name in _DATASETS and dataset_name in _PATHS, err_str + # Retrieve the data path for the dataset + #data_path = os.path.join(_DATA_DIR, _PATHS[dataset_name]) + # modelarts modification + real_path = '/cache/data_url' + if not os.path.exists(real_path): + os.makedirs(real_path) + print(args) + mox.file.copy_parallel(args.data_url, real_path) + print("training data finish copy to %s." % real_path) + data_path = os.path.join(real_path, _PATHS[dataset_name]) + print("real_path:",real_path) + print("real_path 子文件:", os.listdir(real_path)) + print("data_path:",data_path) + print("data_path 子文件:", os.listdir(data_path)) + # modelarts modification + # Construct the dataset + dataset = _DATASETS[dataset_name](data_path, split) + # Create a sampler for multi-process training + sampler = DistributedSampler(dataset) if cfg.NUM_GPUS > 1 else None + # Create a loader + loader = torch.utils.data.DataLoader( + dataset, + batch_size=batch_size, + shuffle=(False if sampler else shuffle), + sampler=sampler, + num_workers=cfg.DATA_LOADER.NUM_WORKERS, + pin_memory=cfg.DATA_LOADER.PIN_MEMORY, + drop_last=drop_last, + ) + return loader + + +def construct_train_loader(args): + """Train loader wrapper.""" + return _construct_loader( + args, + dataset_name=cfg.TRAIN.DATASET, + split=cfg.TRAIN.SPLIT, + batch_size=int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS), + shuffle=True, + drop_last=True, + ) + + +def construct_test_loader(args): + """Test loader wrapper.""" + return _construct_loader( + args, + dataset_name=cfg.TEST.DATASET, + split=cfg.TEST.SPLIT, + batch_size=int(cfg.TEST.BATCH_SIZE / cfg.NUM_GPUS), + shuffle=False, + drop_last=False, + ) + + +def shuffle(loader, cur_epoch): + """"Shuffles the data.""" + err_str = "Sampler type '{}' not supported".format(type(loader.sampler)) + assert isinstance(loader.sampler, (RandomSampler, DistributedSampler)), err_str + # RandomSampler handles shuffling automatically + if isinstance(loader.sampler, DistributedSampler): + # DistributedSampler shuffles data based on epoch + loader.sampler.set_epoch(cur_epoch) diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/train_net.py b/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/train_net.py index 862b2d3ba8ad16af6b15fea4699d29bbc0021312..14485f09e82a96b44d47647547eeee65cc5d87da 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/train_net.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/train_net.py @@ -1,106 +1,106 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Train a classification model.""" -import argparse,sys,os,torch -sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), '../')) - -import pycls.core.config as config -import pycls.core.distributed as dist -import pycls.core.trainer as trainer -import pycls.datasets.loader as data_loader - -from pycls.core.config import cfg - -def init_process_group(proc_rank, world_size, device_id, port='29588'): - """Initializes the default process group.""" - - # Initialize the process group - print('Initialize the process group') - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '29588' - torch.distributed.init_process_group( - backend=cfg.DIST_BACKEND, - #init_method="tcp://{}:{}".format(cfg.HOST, port), - world_size=world_size, - rank=proc_rank, - #rank=0 - ) - print("init_process_group done") - - # Set the GPU to use - #torch.cuda.set_device(proc_rank) - torch.npu.set_device(device_id) - print('set_device done.', cfg.DIST_BACKEND, world_size, proc_rank) - -def main(): - config.load_cfg_fom_args("Train a classification model.") - config.assert_and_infer_cfg() - cfg.freeze() - - dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.train_model) - - -if __name__ == "__main__": - """Load config from command line arguments and set any specified options.""" - parser = argparse.ArgumentParser(description="Config file options.") - help_s = "Config file location" - parser.add_argument("--cfg", dest="cfg_file", help=help_s, required=True, type=str) - parser.add_argument("--rank_id", dest="rank_id", default=0, type=int) - parser.add_argument("--device_id", dest="device_id", default=0, type=int) - help_s = "See pycls/core/config.py for all options" - parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER) - print("===============1================") - # modelarts modification - parser.add_argument('--data_url', - metavar='DIR', - default='/cache/data_url', - help='path to dataset') - parser.add_argument('--train_url', - default="/cache/training", - type=str, - help="setting dir of training output") - parser.add_argument('--onnx', default=True, action='store_true', - help="convert pth model to onnx") - parser.add_argument('--npu', - default=None, - type=int, - help='NPU id to use.') - print("===============2================") - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - args = parser.parse_args() - print(args) - print("args.data_url:",args.data_url) - print('cur dir:', os.listdir('./')) - config.merge_from_file(args.cfg_file) - config._C.merge_from_list(args.opts) - config.assert_and_infer_cfg() - cfg.freeze() - - init_process_group(proc_rank=args.rank_id, world_size=cfg.NUM_GPUS, device_id=args.device_id) - cur_device = torch.npu.current_device() - print('cur_device: ', cur_device) - - trainer.train_model(args) - trainer.convert_pth_to_onnx(args) - +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Train a classification model.""" +import argparse,sys,os,torch +sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), '../')) + +import pycls.core.config as config +import pycls.core.distributed as dist +import pycls.core.trainer as trainer +import pycls.datasets.loader as data_loader + +from pycls.core.config import cfg + +def init_process_group(proc_rank, world_size, device_id, port='29588'): + """Initializes the default process group.""" + + # Initialize the process group + print('Initialize the process group') + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = '29588' + torch.distributed.init_process_group( + backend=cfg.DIST_BACKEND, + #init_method="tcp://{}:{}".format(cfg.HOST, port), + world_size=world_size, + rank=proc_rank, + #rank=0 + ) + print("init_process_group done") + + # Set the GPU to use + #torch.cuda.set_device(proc_rank) + torch.npu.set_device(device_id) + print('set_device done.', cfg.DIST_BACKEND, world_size, proc_rank) + +def main(): + config.load_cfg_fom_args("Train a classification model.") + config.assert_and_infer_cfg() + cfg.freeze() + + dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.train_model) + + +if __name__ == "__main__": + """Load config from command line arguments and set any specified options.""" + parser = argparse.ArgumentParser(description="Config file options.") + help_s = "Config file location" + parser.add_argument("--cfg", dest="cfg_file", help=help_s, required=True, type=str) + parser.add_argument("--rank_id", dest="rank_id", default=0, type=int) + parser.add_argument("--device_id", dest="device_id", default=0, type=int) + help_s = "See pycls/core/config.py for all options" + parser.add_argument("opts", help=help_s, default=None, nargs=argparse.REMAINDER) + print("===============1================") + # modelarts modification + parser.add_argument('--data_url', + metavar='DIR', + default='/cache/data_url', + help='path to dataset') + parser.add_argument('--train_url', + default="/cache/training", + type=str, + help="setting dir of training output") + parser.add_argument('--onnx', default=True, action='store_true', + help="convert pth model to onnx") + parser.add_argument('--npu', + default=None, + type=int, + help='NPU id to use.') + print("===============2================") + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + args = parser.parse_args() + print(args) + print("args.data_url:",args.data_url) + print('cur dir:', os.listdir('./')) + config.merge_from_file(args.cfg_file) + config._C.merge_from_list(args.opts) + config.assert_and_infer_cfg() + cfg.freeze() + + init_process_group(proc_rank=args.rank_id, world_size=cfg.NUM_GPUS, device_id=args.device_id) + cur_device = torch.npu.current_device() + print('cur_device: ', cur_device) + + trainer.train_model(args) + trainer.convert_pth_to_onnx(args) + diff --git a/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/trainer.py b/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/trainer.py index 8c838dce87455d2d9362144a66a7bc80833811ff..34c479386eceab795813d3ca0ff3edbd12277256 100644 --- a/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/trainer.py +++ b/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch/modelasrts/trainer.py @@ -1,306 +1,306 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tools for training and testing a model.""" -import numpy as np -import pycls.core.benchmark as benchmark -import pycls.core.builders as builders -import pycls.core.checkpoint as cp -import pycls.core.distributed as dist -import pycls.core.env as env -import pycls.core.logging as logging -import pycls.core.meters as meters -import pycls.core.net as net -import pycls.core.optimizer as optim -import pycls.datasets.loader as data_loader -import torch -import torch.npu - -from pycls.core.net import unwrap_model -import glob -from apex import amp -from pycls.core.config import cfg -from pycls.core.cuda import ApexScaler -import os -import shutil - -import moxing as mox - -import torch.onnx -from collections import OrderedDict -from pycls.models.effnet import EffNet -from iopath.common.file_io import g_pathmgr -import pycls.core.config as config - -logger = logging.get_logger(__name__) - -cur_step = 0 -CACHE_TRAINING_URL = "/cache/training/" -CACHE_DATA_URL = "/cache/data_url" -CACHE_MODEL_URL = "/cache/model" - -def setup_model(): - - """Sets up a model for training or testing and log the results.""" - # Build the model - model = builders.build_model() - logger.info("Model:\n{}".format(model)) if cfg.VERBOSE else () - # Log model complexity - logger.info(logging.dump_log_data(net.complexity(model), "complexity")) - # Transfer the model to the current GPU device - err_str = "Cannot use more GPU devices than available" - #assert cfg.NUM_GPUS <= torch.cuda.device_count(), err_str - assert cfg.NUM_GPUS <= torch.npu.device_count(), err_str - cur_device = torch.npu.current_device() - model = model.to(cur_device) - optimizer = optim.construct_optimizer(model) - model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=128) - if cfg.NUM_GPUS > 1: - #Make model replica operate on the current device - ddp = torch.nn.parallel.DistributedDataParallel - model = ddp(model, device_ids=[cur_device], broadcast_buffers=False) - - return model,optimizer - - -def train_epoch(loader, model, loss_fun, optimizer, scaler, meter, cur_epoch): - """Performs one epoch of training.""" - # Shuffle the data - data_loader.shuffle(loader, cur_epoch) - # Update the learning rate - lr = optim.get_epoch_lr(cur_epoch) - optim.set_lr(optimizer, lr) - # Enable training mode - model.train() - meter.reset() - meter.iter_tic() - for cur_iter, (inputs, labels) in enumerate(loader): - # Transfer the data to the current GPU device - inputs = inputs.npu() - labels = labels.to(torch.int32).npu() - labels = labels.to(non_blocking=False) - # Convert labels to smoothed one-hot vector - p_labels = labels[:] - labels_one_hot = net.smooth_one_hot_labels(labels).npu() - # Apply mixup to the batch (no effect if mixup alpha is 0) - inputs, labels_one_hot, labels = net.mixup(inputs, labels_one_hot) - # Perform the forward pass and compute the loss - preds = model(inputs) - loss = loss_fun(preds, labels_one_hot) - # Perform the backward pass and update the parameters - optimizer.zero_grad() - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - optimizer.step() - # Compute the errors - top1_err, top5_err = meters.topk_errors(preds, p_labels, [1, 5]) - # Combine the errors across the GPUs (no reduction if 1 GPU used) - # Combine the stats across the GPUs (no reduction if 1 GPU used) - # loss, top1_err, top5_err = dist.scaled_all_reduce([loss, top1_err, top5_err]) - # Copy the stats from GPU to CPU (sync point) - loss, top1_err, top5_err = loss.item(), top1_err.item(), top5_err.item() - meter.iter_toc() - # Update and log stats - mb_size = inputs.size(0) * cfg.NUM_GPUS - meter.update_stats(top1_err, top5_err, loss, lr, mb_size) - meter.log_iter_stats(cur_epoch, cur_iter) - meter.iter_tic() - # Log epoch stats - meter.log_epoch_stats(cur_epoch) - -@torch.no_grad() -def test_epoch(loader, model, meter, cur_epoch): - """Evaluates the model on the test set.""" - # Enable eval mode - model.eval() - meter.reset() - meter.iter_tic() - for cur_iter, (inputs, labels) in enumerate(loader): - # Transfer the data to the current GPU device - inputs = inputs.npu() - labels = labels.to(torch.int32).npu() - labels = labels.to(non_blocking=False) - # Compute the predictions - preds = model(inputs) - # Compute the errors - top1_err, top5_err = meters.topk_errors(preds, labels, [1, 5]) - # Combine the errors across the GPUs (no reduction if 1 GPU used) - top1_err, top5_err = dist.scaled_all_reduce([top1_err, top5_err]) - # Copy the errors from GPU to CPU (sync point) - top1_err, top5_err = top1_err.item(), top5_err.item() - meter.iter_toc() - # Update and log stats - meter.update_stats(top1_err, top5_err, inputs.size(0) * cfg.NUM_GPUS) - meter.log_iter_stats(cur_epoch, cur_iter) - meter.iter_tic() - # Log epoch stats - meter.log_epoch_stats(cur_epoch) - -def train_model(args): - """Trains the model.""" - # Setup training/testing environment - env.setup_env() - # Construct the model, loss_fun, and optimizer - model,optimizer = setup_model() - loss_fun = builders.build_loss_fun().npu() - # Load checkpoint or initial weights - start_epoch = 0 - if cfg.TRAIN.PRETRAINED: - cp.pretrained_load_checkpoint(cfg.TRAIN.PRETRAINED, model) - logger.info("Loaded pretrained initial weights from: {}".format(cfg.TRAIN.PRETRAINED)) - elif cfg.TRAIN.AUTO_RESUME and cp.has_checkpoint(): - file = cp.get_last_checkpoint() - epoch = cp.load_checkpoint(file, model, optimizer) - logger.info("Loaded checkpoint from: {}".format(file)) - start_epoch = epoch + 1 - elif cfg.TRAIN.WEIGHTS: - #cp.load_checkpoint(cfg.TRAIN.WEIGHTS, model) - CACHE_MODEL_URL = "/cache/model" - os.makedirs(CACHE_MODEL_URL, exist_ok=True) - mox.file.copy_parallel(cfg.TRAIN.WEIGHTS, os.path.join(CACHE_MODEL_URL, "model.pyth")) - pretrained_weight = os.path.join(CACHE_MODEL_URL, "model.pyth") - cp.load_checkpoint(pretrained_weight, model) - logger.info("Loaded initial weights from: {}".format(cfg.TRAIN.WEIGHTS)) - # Create data loaders and meters - train_loader = data_loader.construct_train_loader(args) - test_loader = data_loader.construct_test_loader(args) - # modelarts modification - ''' - real_path = '/cache/data_url' - if not os.path.exists(real_path): - os.makedirs(real_path) - mox.file.copy_parallel(args.data_url, real_path) - print("training data finish copy to %s." % real_path) - train_loader = os.path.join(args.data, 'train') - test_loader = os.path.join(real_path, 'val') - ''' - # modelarts modification - train_meter = meters.TrainMeter(len(train_loader)) - test_meter = meters.TestMeter(len(test_loader)) - # Create a GradScaler for mixed precision training - scaler = ApexScaler() - # Compute model and loader timings - if start_epoch == 0 and cfg.PREC_TIME.NUM_ITER > 0: - benchmark.compute_time_full(model, loss_fun, train_loader, test_loader) - # Perform the training loop - logger.info("Start epoch: {}".format(start_epoch + 1)) - best_err = np.inf - - for cur_epoch in range(start_epoch, cfg.OPTIM.MAX_EPOCH): - # Train for one epoch - params = (train_loader, model, loss_fun, optimizer, scaler, train_meter) - train_epoch(*params, cur_epoch) - # Compute precise BN stats - if cfg.BN.USE_PRECISE_STATS: - net.compute_precise_bn_stats(model, train_loader) - # Evaluate the model - test_epoch(test_loader, model, test_meter, cur_epoch) - # Check if checkpoint is best so far (note: should checkpoint meters as well) - stats = test_meter.get_epoch_stats(cur_epoch) - best = stats["top1_err"] <= best_err - best_err = min(stats["top1_err"], best_err) - # Save a checkpoint - file = cp.save_checkpoint(model, optimizer, cur_epoch, best) - logger.info("Wrote checkpoint to: {}".format(file)) - - # --------------modelarts modification---------- - if args.onnx: - convert_pth_to_onnx(args) - mox.file.copy_parallel(CACHE_TRAINING_URL, args.train_url) - # --------------modelarts modification end---------- - -def test_model(): - """Evaluates a trained model.""" - # Setup training/testing environment - env.setup_env() - # Construct the model - model,optimizer = setup_model() - # Load model weights - cp.load_checkpoint(cfg.TEST.WEIGHTS, model) - logger.info("Loaded model weights from: {}".format(cfg.TEST.WEIGHTS)) - # Create data loaders and meters - test_loader = data_loader.construct_test_loader() - test_meter = meters.TestMeter(len(test_loader)) - # Evaluate the model - test_epoch(test_loader, model, test_meter, 0) - - -def time_model(): - """Times model.""" - # Setup training/testing environment - env.setup_env() - # Construct the model and loss_fun - model = setup_model() - loss_fun = builders.build_loss_fun().npu() - # Compute model and loader timings - benchmark.compute_time_model(model, loss_fun) - - -def time_model_and_loader(): - """Times model and data loader.""" - # Setup training/testing environment - env.setup_env() - # Construct the model and loss_fun - model = setup_model() - loss_fun = builders.build_loss_fun().npu() - # Create data loaders - train_loader = data_loader.construct_train_loader() - test_loader = data_loader.construct_test_loader() - # Compute model and loader timings - benchmark.compute_time_full(model, loss_fun, train_loader, test_loader) - -def convert_pth_to_onnx(args): - os.makedirs(CACHE_TRAINING_URL, exist_ok=True) - shutil.copy(os.path.join(cfg.OUT_DIR, 'model.pyth'), CACHE_TRAINING_URL) - pth_pattern = os.path.join(CACHE_TRAINING_URL, 'model.pyth') - pth_file_list = glob.glob(pth_pattern) - if not pth_file_list: - print("can't find pth {pth_pattern}") - pth_file = pth_file_list[0] - onnx_path = pth_file.split(".")[0] + '.onnx' - convert(pth_file, onnx_path) - -def proc_node_module(checkpoint, attr_name): - new_model_state = OrderedDict() - for k, v in checkpoint[attr_name].items(): - if(k[0: 7] == "module."): - name = k[7:] - else: - name = k[0:] - new_model_state[name] = v - return new_model_state - -def convert(pth_file_path, onnx_file_path): - """Sets up a model for training or testing and log the results.""" - loc = 'cpu' - with g_pathmgr.open(pth_file_path, "rb") as f: - checkpoint = torch.load(f, map_location=loc) - cfg_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../configs/dds_baselines/effnet/EN-B5_dds_1npu_full.yaml") - config.merge_from_file(cfg_file) - cfg.freeze() - model = EffNet().to(loc) - checkpoint['model_state'] = proc_node_module(checkpoint, 'model_state') - model.load_state_dict(checkpoint["model_state"], False) - model = model.to(loc) - input_names = ["actual_input_1"] - output_names = ["output1"] - dummy_input = torch.randn(1, 3, 456, 456).to(loc) +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tools for training and testing a model.""" +import numpy as np +import pycls.core.benchmark as benchmark +import pycls.core.builders as builders +import pycls.core.checkpoint as cp +import pycls.core.distributed as dist +import pycls.core.env as env +import pycls.core.logging as logging +import pycls.core.meters as meters +import pycls.core.net as net +import pycls.core.optimizer as optim +import pycls.datasets.loader as data_loader +import torch +import torch.npu + +from pycls.core.net import unwrap_model +import glob +from apex import amp +from pycls.core.config import cfg +from pycls.core.cuda import ApexScaler +import os +import shutil + +import moxing as mox + +import torch.onnx +from collections import OrderedDict +from pycls.models.effnet import EffNet +from iopath.common.file_io import g_pathmgr +import pycls.core.config as config + +logger = logging.get_logger(__name__) + +cur_step = 0 +CACHE_TRAINING_URL = "/cache/training/" +CACHE_DATA_URL = "/cache/data_url" +CACHE_MODEL_URL = "/cache/model" + +def setup_model(): + + """Sets up a model for training or testing and log the results.""" + # Build the model + model = builders.build_model() + logger.info("Model:\n{}".format(model)) if cfg.VERBOSE else () + # Log model complexity + logger.info(logging.dump_log_data(net.complexity(model), "complexity")) + # Transfer the model to the current GPU device + err_str = "Cannot use more GPU devices than available" + #assert cfg.NUM_GPUS <= torch.cuda.device_count(), err_str + assert cfg.NUM_GPUS <= torch.npu.device_count(), err_str + cur_device = torch.npu.current_device() + model = model.to(cur_device) + optimizer = optim.construct_optimizer(model) + model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=128) + if cfg.NUM_GPUS > 1: + #Make model replica operate on the current device + ddp = torch.nn.parallel.DistributedDataParallel + model = ddp(model, device_ids=[cur_device], broadcast_buffers=False) + + return model,optimizer + + +def train_epoch(loader, model, loss_fun, optimizer, scaler, meter, cur_epoch): + """Performs one epoch of training.""" + # Shuffle the data + data_loader.shuffle(loader, cur_epoch) + # Update the learning rate + lr = optim.get_epoch_lr(cur_epoch) + optim.set_lr(optimizer, lr) + # Enable training mode + model.train() + meter.reset() + meter.iter_tic() + for cur_iter, (inputs, labels) in enumerate(loader): + # Transfer the data to the current GPU device + inputs = inputs.npu() + labels = labels.to(torch.int32).npu() + labels = labels.to(non_blocking=False) + # Convert labels to smoothed one-hot vector + p_labels = labels[:] + labels_one_hot = net.smooth_one_hot_labels(labels).npu() + # Apply mixup to the batch (no effect if mixup alpha is 0) + inputs, labels_one_hot, labels = net.mixup(inputs, labels_one_hot) + # Perform the forward pass and compute the loss + preds = model(inputs) + loss = loss_fun(preds, labels_one_hot) + # Perform the backward pass and update the parameters + optimizer.zero_grad() + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + optimizer.step() + # Compute the errors + top1_err, top5_err = meters.topk_errors(preds, p_labels, [1, 5]) + # Combine the errors across the GPUs (no reduction if 1 GPU used) + # Combine the stats across the GPUs (no reduction if 1 GPU used) + # loss, top1_err, top5_err = dist.scaled_all_reduce([loss, top1_err, top5_err]) + # Copy the stats from GPU to CPU (sync point) + loss, top1_err, top5_err = loss.item(), top1_err.item(), top5_err.item() + meter.iter_toc() + # Update and log stats + mb_size = inputs.size(0) * cfg.NUM_GPUS + meter.update_stats(top1_err, top5_err, loss, lr, mb_size) + meter.log_iter_stats(cur_epoch, cur_iter) + meter.iter_tic() + # Log epoch stats + meter.log_epoch_stats(cur_epoch) + +@torch.no_grad() +def test_epoch(loader, model, meter, cur_epoch): + """Evaluates the model on the test set.""" + # Enable eval mode + model.eval() + meter.reset() + meter.iter_tic() + for cur_iter, (inputs, labels) in enumerate(loader): + # Transfer the data to the current GPU device + inputs = inputs.npu() + labels = labels.to(torch.int32).npu() + labels = labels.to(non_blocking=False) + # Compute the predictions + preds = model(inputs) + # Compute the errors + top1_err, top5_err = meters.topk_errors(preds, labels, [1, 5]) + # Combine the errors across the GPUs (no reduction if 1 GPU used) + top1_err, top5_err = dist.scaled_all_reduce([top1_err, top5_err]) + # Copy the errors from GPU to CPU (sync point) + top1_err, top5_err = top1_err.item(), top5_err.item() + meter.iter_toc() + # Update and log stats + meter.update_stats(top1_err, top5_err, inputs.size(0) * cfg.NUM_GPUS) + meter.log_iter_stats(cur_epoch, cur_iter) + meter.iter_tic() + # Log epoch stats + meter.log_epoch_stats(cur_epoch) + +def train_model(args): + """Trains the model.""" + # Setup training/testing environment + env.setup_env() + # Construct the model, loss_fun, and optimizer + model,optimizer = setup_model() + loss_fun = builders.build_loss_fun().npu() + # Load checkpoint or initial weights + start_epoch = 0 + if cfg.TRAIN.PRETRAINED: + cp.pretrained_load_checkpoint(cfg.TRAIN.PRETRAINED, model) + logger.info("Loaded pretrained initial weights from: {}".format(cfg.TRAIN.PRETRAINED)) + elif cfg.TRAIN.AUTO_RESUME and cp.has_checkpoint(): + file = cp.get_last_checkpoint() + epoch = cp.load_checkpoint(file, model, optimizer) + logger.info("Loaded checkpoint from: {}".format(file)) + start_epoch = epoch + 1 + elif cfg.TRAIN.WEIGHTS: + #cp.load_checkpoint(cfg.TRAIN.WEIGHTS, model) + CACHE_MODEL_URL = "/cache/model" + os.makedirs(CACHE_MODEL_URL, exist_ok=True) + mox.file.copy_parallel(cfg.TRAIN.WEIGHTS, os.path.join(CACHE_MODEL_URL, "model.pyth")) + pretrained_weight = os.path.join(CACHE_MODEL_URL, "model.pyth") + cp.load_checkpoint(pretrained_weight, model) + logger.info("Loaded initial weights from: {}".format(cfg.TRAIN.WEIGHTS)) + # Create data loaders and meters + train_loader = data_loader.construct_train_loader(args) + test_loader = data_loader.construct_test_loader(args) + # modelarts modification + ''' + real_path = '/cache/data_url' + if not os.path.exists(real_path): + os.makedirs(real_path) + mox.file.copy_parallel(args.data_url, real_path) + print("training data finish copy to %s." % real_path) + train_loader = os.path.join(args.data, 'train') + test_loader = os.path.join(real_path, 'val') + ''' + # modelarts modification + train_meter = meters.TrainMeter(len(train_loader)) + test_meter = meters.TestMeter(len(test_loader)) + # Create a GradScaler for mixed precision training + scaler = ApexScaler() + # Compute model and loader timings + if start_epoch == 0 and cfg.PREC_TIME.NUM_ITER > 0: + benchmark.compute_time_full(model, loss_fun, train_loader, test_loader) + # Perform the training loop + logger.info("Start epoch: {}".format(start_epoch + 1)) + best_err = np.inf + + for cur_epoch in range(start_epoch, cfg.OPTIM.MAX_EPOCH): + # Train for one epoch + params = (train_loader, model, loss_fun, optimizer, scaler, train_meter) + train_epoch(*params, cur_epoch) + # Compute precise BN stats + if cfg.BN.USE_PRECISE_STATS: + net.compute_precise_bn_stats(model, train_loader) + # Evaluate the model + test_epoch(test_loader, model, test_meter, cur_epoch) + # Check if checkpoint is best so far (note: should checkpoint meters as well) + stats = test_meter.get_epoch_stats(cur_epoch) + best = stats["top1_err"] <= best_err + best_err = min(stats["top1_err"], best_err) + # Save a checkpoint + file = cp.save_checkpoint(model, optimizer, cur_epoch, best) + logger.info("Wrote checkpoint to: {}".format(file)) + + # --------------modelarts modification---------- + if args.onnx: + convert_pth_to_onnx(args) + mox.file.copy_parallel(CACHE_TRAINING_URL, args.train_url) + # --------------modelarts modification end---------- + +def test_model(): + """Evaluates a trained model.""" + # Setup training/testing environment + env.setup_env() + # Construct the model + model,optimizer = setup_model() + # Load model weights + cp.load_checkpoint(cfg.TEST.WEIGHTS, model) + logger.info("Loaded model weights from: {}".format(cfg.TEST.WEIGHTS)) + # Create data loaders and meters + test_loader = data_loader.construct_test_loader() + test_meter = meters.TestMeter(len(test_loader)) + # Evaluate the model + test_epoch(test_loader, model, test_meter, 0) + + +def time_model(): + """Times model.""" + # Setup training/testing environment + env.setup_env() + # Construct the model and loss_fun + model = setup_model() + loss_fun = builders.build_loss_fun().npu() + # Compute model and loader timings + benchmark.compute_time_model(model, loss_fun) + + +def time_model_and_loader(): + """Times model and data loader.""" + # Setup training/testing environment + env.setup_env() + # Construct the model and loss_fun + model = setup_model() + loss_fun = builders.build_loss_fun().npu() + # Create data loaders + train_loader = data_loader.construct_train_loader() + test_loader = data_loader.construct_test_loader() + # Compute model and loader timings + benchmark.compute_time_full(model, loss_fun, train_loader, test_loader) + +def convert_pth_to_onnx(args): + os.makedirs(CACHE_TRAINING_URL, exist_ok=True) + shutil.copy(os.path.join(cfg.OUT_DIR, 'model.pyth'), CACHE_TRAINING_URL) + pth_pattern = os.path.join(CACHE_TRAINING_URL, 'model.pyth') + pth_file_list = glob.glob(pth_pattern) + if not pth_file_list: + print("can't find pth {pth_pattern}") + pth_file = pth_file_list[0] + onnx_path = pth_file.split(".")[0] + '.onnx' + convert(pth_file, onnx_path) + +def proc_node_module(checkpoint, attr_name): + new_model_state = OrderedDict() + for k, v in checkpoint[attr_name].items(): + if(k[0: 7] == "module."): + name = k[7:] + else: + name = k[0:] + new_model_state[name] = v + return new_model_state + +def convert(pth_file_path, onnx_file_path): + """Sets up a model for training or testing and log the results.""" + loc = 'cpu' + with g_pathmgr.open(pth_file_path, "rb") as f: + checkpoint = torch.load(f, map_location=loc) + cfg_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../configs/dds_baselines/effnet/EN-B5_dds_1npu_full.yaml") + config.merge_from_file(cfg_file) + cfg.freeze() + model = EffNet().to(loc) + checkpoint['model_state'] = proc_node_module(checkpoint, 'model_state') + model.load_state_dict(checkpoint["model_state"], False) + model = model.to(loc) + input_names = ["actual_input_1"] + output_names = ["output1"] + dummy_input = torch.randn(1, 3, 456, 456).to(loc) torch.onnx.export(model, dummy_input, onnx_file_path, input_names = input_names, output_names = output_names, opset_version=11) \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/LICENSE b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/LICENSE +++ b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/README.md b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/README.md index 3a09b50b40d753d58f86cc14dc7a9ac75e2c6df5..05e2f31ba924106e50f850408b068a858967caaf 100644 --- a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/README.md +++ b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/README.md @@ -1,59 +1,59 @@ -# GENet - -This implements training of GENET on the cifar10 dataset, mainly modified from [pytorch/examples](https://github.com/BayesWatch/pytorch-GENet). - -## GENet Details - -The configuration process and operation method of GENet are described in detail below. - -## SoftWare Package -CANN 5.0.2 - -## Requirements - -- Install PyTorch ([pytorch.org](http://pytorch.org)) -- `pip install -r requirements.txt` -- Download the cifar10 dataset by referring the original [repository](https://github.com/BayesWatch/pytorch-GENet) - - You can also without downloading them in advance. The cifar10 interface provided by torchvision will automatically download them for you. - -## Training - -To train a model, run `train.py` with the desired model architecture and the path to the cifar10 dataset. -Note: assuming that your dataset path is:**/opt/gpu/cifar10-batches-py/**, the real_data_path should be **/opt/gpu/** - -```bash -# training 1p accuracy -bash ./test/train_full_1p.sh --data_path=real_data_path - -# training 1p performance -bash ./test/train_performance_1p.sh --data_path=real_data_path - -# training 1p with pretrained model -bash ./test/train_finetune_1p.sh --data_path=real_data_path - -# training 8p accuracy -bash ./test/train_full_8p.sh --data_path=real_data_path - -# training 8p performance -bash ./test/train_performance_8p.sh --data_path=real_data_path - -# evaluating 8p performance -bash ./test/train_eval_8p.sh --data_path=real_data_path -``` -### Remarks: -All bash instructions output log files correctly. - -### Log path: -**training detail log:** - test/output/devie_id/train_${device_id}.log -**8p training performance result log:** test/output/devie_id/GENet_bs128_8p_perf.log -**8p training accuracy result log :** test/output/devie_id/GENet_bs128_8p_acc.log - -## GENET training result - -| Acc@1 | FPS | Device Type| Device Nums | Epochs | AMP_Type | -| :------: | :------: | :------: | :------: | :------: |:------: -| 94.73 | 1894.827 | NPU | 1 | 300 | O2 | -| 95.23 | 7858.025 |NPU |8 | 300 | O2 | -| 94.76 | 1350.074 |GPU |1 | 300 | O2 | +# GENet + +This implements training of GENET on the cifar10 dataset, mainly modified from [pytorch/examples](https://github.com/BayesWatch/pytorch-GENet). + +## GENet Details + +The configuration process and operation method of GENet are described in detail below. + +## SoftWare Package +CANN 5.0.2 + +## Requirements + +- Install PyTorch ([pytorch.org](http://pytorch.org)) +- `pip install -r requirements.txt` +- Download the cifar10 dataset by referring the original [repository](https://github.com/BayesWatch/pytorch-GENet) + - You can also without downloading them in advance. The cifar10 interface provided by torchvision will automatically download them for you. + +## Training + +To train a model, run `train.py` with the desired model architecture and the path to the cifar10 dataset. +Note: assuming that your dataset path is:**/opt/gpu/cifar10-batches-py/**, the real_data_path should be **/opt/gpu/** + +```bash +# training 1p accuracy +bash ./test/train_full_1p.sh --data_path=real_data_path + +# training 1p performance +bash ./test/train_performance_1p.sh --data_path=real_data_path + +# training 1p with pretrained model +bash ./test/train_finetune_1p.sh --data_path=real_data_path + +# training 8p accuracy +bash ./test/train_full_8p.sh --data_path=real_data_path + +# training 8p performance +bash ./test/train_performance_8p.sh --data_path=real_data_path + +# evaluating 8p performance +bash ./test/train_eval_8p.sh --data_path=real_data_path +``` +### Remarks: +All bash instructions output log files correctly. + +### Log path: +**training detail log:** + test/output/devie_id/train_${device_id}.log +**8p training performance result log:** test/output/devie_id/GENet_bs128_8p_perf.log +**8p training accuracy result log :** test/output/devie_id/GENet_bs128_8p_acc.log + +## GENET training result + +| Acc@1 | FPS | Device Type| Device Nums | Epochs | AMP_Type | +| :------: | :------: | :------: | :------: | :------: |:------: +| 94.73 | 1894.827 | NPU | 1 | 300 | O2 | +| 95.23 | 7858.025 |NPU |8 | 300 | O2 | +| 94.76 | 1350.074 |GPU |1 | 300 | O2 | | 94.81 | 6536.289 |GPU |8 | 300 | O2 | \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/modelzoo_level.txt b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/modelzoo_level.txt index 19bb24fce376e3e5b3c73914daa4c57db1ab7d74..24cbc51bb95d445da366254d926b471ffada7b4a 100644 --- a/PyTorch/contrib/cv/classification/GENet_for_Pytorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/GENet_for_Pytorch/modelzoo_level.txt @@ -1,6 +1,6 @@ -GPUStatus:OK -NPUMigrationStatus:OK -FuncStatus:OK -PrecisionStatus:POK -AutoTune:OK -PerfStatus:OK +GPUStatus:OK +NPUMigrationStatus:OK +FuncStatus:OK +PrecisionStatus:POK +AutoTune:OK +PerfStatus:OK diff --git a/PyTorch/contrib/cv/classification/GhostNet/LICENSE b/PyTorch/contrib/cv/classification/GhostNet/LICENSE index 4ba4fdcab3dbdb4d64ce4cccdfd990698b4d596a..a0e03103591c1158a839681f3c404ee9118b182e 100644 --- a/PyTorch/contrib/cv/classification/GhostNet/LICENSE +++ b/PyTorch/contrib/cv/classification/GhostNet/LICENSE @@ -1,29 +1,29 @@ -BSD 3-Clause License - -Copyright (c) 2017, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +BSD 3-Clause License + +Copyright (c) 2017, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/GhostNet/ghostnet/ghostnet_pytorch/ghostnet.py b/PyTorch/contrib/cv/classification/GhostNet/ghostnet/ghostnet_pytorch/ghostnet.py index eaedf42a5aaad2e9829040f0951635af1ced86ad..887f6318e6ed0135ae006e93e22fea247f22a19e 100644 --- a/PyTorch/contrib/cv/classification/GhostNet/ghostnet/ghostnet_pytorch/ghostnet.py +++ b/PyTorch/contrib/cv/classification/GhostNet/ghostnet/ghostnet_pytorch/ghostnet.py @@ -1,260 +1,260 @@ -# 2020.06.09-Changed for building GhostNet -# Huawei Technologies Co., Ltd. -""" -Creates a GhostNet Model as defined in: -GhostNet: More Features from Cheap Operations By Kai Han, Yunhe Wang, Qi Tian, Jianyuan Guo, Chunjing Xu, Chang Xu. -https://arxiv.org/abs/1911.11907 -Modified from https://github.com/d-li14/mobilenetv3.pytorch and https://github.com/rwightman/pytorch-image-models -""" -import torch -import torch.nn as nn -import torch.nn.functional as F -import math - - -__all__ = ['ghost_net'] - - -def _make_divisible(v, divisor, min_value=None): - """ - This function is taken from the original tf repo. - It ensures that all layers have a channel number that is divisible by 8 - It can be seen here: - https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py - """ - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -def hard_sigmoid(x, inplace: bool = False): - if inplace: - return x.add_(3.).clamp_(0., 6.).div_(6.) - else: - return F.relu6(x + 3.) / 6. - - -class SqueezeExcite(nn.Module): - def __init__(self, in_chs, se_ratio=0.25, reduced_base_chs=None, - act_layer=nn.ReLU, gate_fn=hard_sigmoid, divisor=4, **_): - super(SqueezeExcite, self).__init__() - self.gate_fn = gate_fn - reduced_chs = _make_divisible((reduced_base_chs or in_chs) * se_ratio, divisor) - self.avg_pool = nn.AdaptiveAvgPool2d(1) - self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True) - self.act1 = act_layer(inplace=True) - self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True) - - def forward(self, x): - x_se = self.avg_pool(x) - x_se = self.conv_reduce(x_se) - x_se = self.act1(x_se) - x_se = self.conv_expand(x_se) - x = x * self.gate_fn(x_se) - return x - - -class ConvBnAct(nn.Module): - def __init__(self, in_chs, out_chs, kernel_size, - stride=1, act_layer=nn.ReLU): - super(ConvBnAct, self).__init__() - self.conv = nn.Conv2d(in_chs, out_chs, kernel_size, stride, kernel_size//2, bias=False) - self.bn1 = nn.BatchNorm2d(out_chs) - self.act1 = act_layer(inplace=True) - - def forward(self, x): - x = self.conv(x) - x = self.bn1(x) - x = self.act1(x) - return x - - -class GhostModule(nn.Module): - def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True): - super(GhostModule, self).__init__() - self.oup = oup - init_channels = math.ceil(oup / ratio) - new_channels = init_channels*(ratio-1) - - self.primary_conv = nn.Sequential( - nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size//2, bias=False), - nn.BatchNorm2d(init_channels), - nn.ReLU(inplace=True) if relu else nn.Sequential(), - ) - - self.cheap_operation = nn.Sequential( - nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size//2, groups=init_channels, bias=False), - nn.BatchNorm2d(new_channels), - nn.ReLU(inplace=True) if relu else nn.Sequential(), - ) - - def forward(self, x): - x1 = self.primary_conv(x) - x2 = self.cheap_operation(x1) - out = torch.cat([x1,x2], dim=1) - # For avoiding OOM, refine raw code now: return out[:,:self.oup,:,:] - return out - - -class GhostBottleneck(nn.Module): - """ Ghost bottleneck w/ optional SE""" - - def __init__(self, in_chs, mid_chs, out_chs, dw_kernel_size=3, - stride=1, act_layer=nn.ReLU, se_ratio=0.): - super(GhostBottleneck, self).__init__() - has_se = se_ratio is not None and se_ratio > 0. - self.stride = stride - - # Point-wise expansion - self.ghost1 = GhostModule(in_chs, mid_chs, relu=True) - - # Depth-wise convolution - if self.stride > 1: - self.conv_dw = nn.Conv2d(mid_chs, mid_chs, dw_kernel_size, stride=stride, - padding=(dw_kernel_size-1)//2, - groups=mid_chs, bias=False) - self.bn_dw = nn.BatchNorm2d(mid_chs) - - # Squeeze-and-excitation - if has_se: - self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio) - else: - self.se = None - - # Point-wise linear projection - self.ghost2 = GhostModule(mid_chs, out_chs, relu=False) - - # shortcut - if (in_chs == out_chs and self.stride == 1): - self.shortcut = nn.Sequential() - else: - self.shortcut = nn.Sequential( - nn.Conv2d(in_chs, in_chs, dw_kernel_size, stride=stride, - padding=(dw_kernel_size-1)//2, groups=in_chs, bias=False), - nn.BatchNorm2d(in_chs), - nn.Conv2d(in_chs, out_chs, 1, stride=1, padding=0, bias=False), - nn.BatchNorm2d(out_chs), - ) - - - def forward(self, x): - residual = x - - # 1st ghost bottleneck - x = self.ghost1(x) - - # Depth-wise convolution - if self.stride > 1: - x = self.conv_dw(x) - x = self.bn_dw(x) - - # Squeeze-and-excitation - if self.se is not None: - x = self.se(x) - - # 2nd ghost bottleneck - x = self.ghost2(x) - - x += self.shortcut(residual) - return x - - -class GhostNet(nn.Module): - def __init__(self, cfgs, num_classes=1000, width=1.0, dropout=0.2): - super(GhostNet, self).__init__() - # setting of inverted residual blocks - self.cfgs = cfgs - self.dropout = dropout - - # building first layer - output_channel = _make_divisible(16 * width, 4) - self.conv_stem = nn.Conv2d(3, output_channel, 3, 2, 1, bias=False) - self.bn1 = nn.BatchNorm2d(output_channel) - self.act1 = nn.ReLU(inplace=True) - input_channel = output_channel - - # building inverted residual blocks - stages = [] - block = GhostBottleneck - for cfg in self.cfgs: - layers = [] - for k, exp_size, c, se_ratio, s in cfg: - output_channel = _make_divisible(c * width, 4) - hidden_channel = _make_divisible(exp_size * width, 4) - layers.append(block(input_channel, hidden_channel, output_channel, k, s, - se_ratio=se_ratio)) - input_channel = output_channel - stages.append(nn.Sequential(*layers)) - - output_channel = _make_divisible(exp_size * width, 4) - stages.append(nn.Sequential(ConvBnAct(input_channel, output_channel, 1))) - input_channel = output_channel - - self.blocks = nn.Sequential(*stages) - - # building last several layers - output_channel = 1280 - self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) - self.conv_head = nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=True) - self.act2 = nn.ReLU(inplace=True) - self.classifier = nn.Linear(output_channel, num_classes) - - def forward(self, x): - x = self.conv_stem(x) - x = self.bn1(x) - x = self.act1(x) - x = self.blocks(x) - x = self.global_pool(x) - x = self.conv_head(x) - x = self.act2(x) - x = x.view(x.size(0), -1) - if self.dropout > 0.: - x = F.dropout(x, p=self.dropout, training=self.training) - x = self.classifier(x) - return x - - -def ghostnet(**kwargs): - """ - Constructs a GhostNet model - """ - cfgs = [ - # k, t, c, SE, s - # stage1 - [[3, 16, 16, 0, 1]], - # stage2 - [[3, 48, 24, 0, 2]], - [[3, 72, 24, 0, 1]], - # stage3 - [[5, 72, 40, 0.25, 2]], - [[5, 120, 40, 0.25, 1]], - # stage4 - [[3, 240, 80, 0, 2]], - [[3, 200, 80, 0, 1], - [3, 184, 80, 0, 1], - [3, 184, 80, 0, 1], - [3, 480, 112, 0.25, 1], - [3, 672, 112, 0.25, 1] - ], - # stage5 - [[5, 672, 160, 0.25, 2]], - [[5, 960, 160, 0, 1], - [5, 960, 160, 0.25, 1], - [5, 960, 160, 0, 1], - [5, 960, 160, 0.25, 1] - ] - ] - return GhostNet(cfgs, **kwargs) - - -if __name__=='__main__': - model = ghostnet() - model.eval() - print(model) - input = torch.randn(32,3,320,256) - y = model(input) +# 2020.06.09-Changed for building GhostNet +# Huawei Technologies Co., Ltd. +""" +Creates a GhostNet Model as defined in: +GhostNet: More Features from Cheap Operations By Kai Han, Yunhe Wang, Qi Tian, Jianyuan Guo, Chunjing Xu, Chang Xu. +https://arxiv.org/abs/1911.11907 +Modified from https://github.com/d-li14/mobilenetv3.pytorch and https://github.com/rwightman/pytorch-image-models +""" +import torch +import torch.nn as nn +import torch.nn.functional as F +import math + + +__all__ = ['ghost_net'] + + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +def hard_sigmoid(x, inplace: bool = False): + if inplace: + return x.add_(3.).clamp_(0., 6.).div_(6.) + else: + return F.relu6(x + 3.) / 6. + + +class SqueezeExcite(nn.Module): + def __init__(self, in_chs, se_ratio=0.25, reduced_base_chs=None, + act_layer=nn.ReLU, gate_fn=hard_sigmoid, divisor=4, **_): + super(SqueezeExcite, self).__init__() + self.gate_fn = gate_fn + reduced_chs = _make_divisible((reduced_base_chs or in_chs) * se_ratio, divisor) + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True) + self.act1 = act_layer(inplace=True) + self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True) + + def forward(self, x): + x_se = self.avg_pool(x) + x_se = self.conv_reduce(x_se) + x_se = self.act1(x_se) + x_se = self.conv_expand(x_se) + x = x * self.gate_fn(x_se) + return x + + +class ConvBnAct(nn.Module): + def __init__(self, in_chs, out_chs, kernel_size, + stride=1, act_layer=nn.ReLU): + super(ConvBnAct, self).__init__() + self.conv = nn.Conv2d(in_chs, out_chs, kernel_size, stride, kernel_size//2, bias=False) + self.bn1 = nn.BatchNorm2d(out_chs) + self.act1 = act_layer(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn1(x) + x = self.act1(x) + return x + + +class GhostModule(nn.Module): + def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True): + super(GhostModule, self).__init__() + self.oup = oup + init_channels = math.ceil(oup / ratio) + new_channels = init_channels*(ratio-1) + + self.primary_conv = nn.Sequential( + nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size//2, bias=False), + nn.BatchNorm2d(init_channels), + nn.ReLU(inplace=True) if relu else nn.Sequential(), + ) + + self.cheap_operation = nn.Sequential( + nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size//2, groups=init_channels, bias=False), + nn.BatchNorm2d(new_channels), + nn.ReLU(inplace=True) if relu else nn.Sequential(), + ) + + def forward(self, x): + x1 = self.primary_conv(x) + x2 = self.cheap_operation(x1) + out = torch.cat([x1,x2], dim=1) + # For avoiding OOM, refine raw code now: return out[:,:self.oup,:,:] + return out + + +class GhostBottleneck(nn.Module): + """ Ghost bottleneck w/ optional SE""" + + def __init__(self, in_chs, mid_chs, out_chs, dw_kernel_size=3, + stride=1, act_layer=nn.ReLU, se_ratio=0.): + super(GhostBottleneck, self).__init__() + has_se = se_ratio is not None and se_ratio > 0. + self.stride = stride + + # Point-wise expansion + self.ghost1 = GhostModule(in_chs, mid_chs, relu=True) + + # Depth-wise convolution + if self.stride > 1: + self.conv_dw = nn.Conv2d(mid_chs, mid_chs, dw_kernel_size, stride=stride, + padding=(dw_kernel_size-1)//2, + groups=mid_chs, bias=False) + self.bn_dw = nn.BatchNorm2d(mid_chs) + + # Squeeze-and-excitation + if has_se: + self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio) + else: + self.se = None + + # Point-wise linear projection + self.ghost2 = GhostModule(mid_chs, out_chs, relu=False) + + # shortcut + if (in_chs == out_chs and self.stride == 1): + self.shortcut = nn.Sequential() + else: + self.shortcut = nn.Sequential( + nn.Conv2d(in_chs, in_chs, dw_kernel_size, stride=stride, + padding=(dw_kernel_size-1)//2, groups=in_chs, bias=False), + nn.BatchNorm2d(in_chs), + nn.Conv2d(in_chs, out_chs, 1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(out_chs), + ) + + + def forward(self, x): + residual = x + + # 1st ghost bottleneck + x = self.ghost1(x) + + # Depth-wise convolution + if self.stride > 1: + x = self.conv_dw(x) + x = self.bn_dw(x) + + # Squeeze-and-excitation + if self.se is not None: + x = self.se(x) + + # 2nd ghost bottleneck + x = self.ghost2(x) + + x += self.shortcut(residual) + return x + + +class GhostNet(nn.Module): + def __init__(self, cfgs, num_classes=1000, width=1.0, dropout=0.2): + super(GhostNet, self).__init__() + # setting of inverted residual blocks + self.cfgs = cfgs + self.dropout = dropout + + # building first layer + output_channel = _make_divisible(16 * width, 4) + self.conv_stem = nn.Conv2d(3, output_channel, 3, 2, 1, bias=False) + self.bn1 = nn.BatchNorm2d(output_channel) + self.act1 = nn.ReLU(inplace=True) + input_channel = output_channel + + # building inverted residual blocks + stages = [] + block = GhostBottleneck + for cfg in self.cfgs: + layers = [] + for k, exp_size, c, se_ratio, s in cfg: + output_channel = _make_divisible(c * width, 4) + hidden_channel = _make_divisible(exp_size * width, 4) + layers.append(block(input_channel, hidden_channel, output_channel, k, s, + se_ratio=se_ratio)) + input_channel = output_channel + stages.append(nn.Sequential(*layers)) + + output_channel = _make_divisible(exp_size * width, 4) + stages.append(nn.Sequential(ConvBnAct(input_channel, output_channel, 1))) + input_channel = output_channel + + self.blocks = nn.Sequential(*stages) + + # building last several layers + output_channel = 1280 + self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) + self.conv_head = nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=True) + self.act2 = nn.ReLU(inplace=True) + self.classifier = nn.Linear(output_channel, num_classes) + + def forward(self, x): + x = self.conv_stem(x) + x = self.bn1(x) + x = self.act1(x) + x = self.blocks(x) + x = self.global_pool(x) + x = self.conv_head(x) + x = self.act2(x) + x = x.view(x.size(0), -1) + if self.dropout > 0.: + x = F.dropout(x, p=self.dropout, training=self.training) + x = self.classifier(x) + return x + + +def ghostnet(**kwargs): + """ + Constructs a GhostNet model + """ + cfgs = [ + # k, t, c, SE, s + # stage1 + [[3, 16, 16, 0, 1]], + # stage2 + [[3, 48, 24, 0, 2]], + [[3, 72, 24, 0, 1]], + # stage3 + [[5, 72, 40, 0.25, 2]], + [[5, 120, 40, 0.25, 1]], + # stage4 + [[3, 240, 80, 0, 2]], + [[3, 200, 80, 0, 1], + [3, 184, 80, 0, 1], + [3, 184, 80, 0, 1], + [3, 480, 112, 0.25, 1], + [3, 672, 112, 0.25, 1] + ], + # stage5 + [[5, 672, 160, 0.25, 2]], + [[5, 960, 160, 0, 1], + [5, 960, 160, 0.25, 1], + [5, 960, 160, 0, 1], + [5, 960, 160, 0.25, 1] + ] + ] + return GhostNet(cfgs, **kwargs) + + +if __name__=='__main__': + model = ghostnet() + model.eval() + print(model) + input = torch.randn(32,3,320,256) + y = model(input) print(y.size()) \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/GhostNet/modelarts/train_start.py b/PyTorch/contrib/cv/classification/GhostNet/modelarts/train_start.py index 4335340a1458eabf210cd37febbd2c6e92c0d9d0..d0a33069ef6c28b5ba401cc6f44174a008a17bc3 100644 --- a/PyTorch/contrib/cv/classification/GhostNet/modelarts/train_start.py +++ b/PyTorch/contrib/cv/classification/GhostNet/modelarts/train_start.py @@ -1,778 +1,778 @@ -#!/usr/bin/env python -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" ImageNet Training Script -This is intended to be a lean and easily modifiable ImageNet training script that reproduces ImageNet -training results with some of the latest networks and training techniques. It favours canonical PyTorch -and standard Python style over trying to be able to 'do it all.' That said, it offers quite a few speed -and training result improvements over the usual PyTorch example scripts. Repurpose as you see fit. -This script was started from an early version of the PyTorch ImageNet example -(https://github.com/pytorch/examples/tree/master/imagenet) -NVIDIA CUDA specific speedups adopted from NVIDIA Apex examples -(https://github.com/NVIDIA/apex/tree/master/examples/imagenet) -Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman) -""" -import argparse -import time -import yaml -import os -import logging -from collections import OrderedDict -from contextlib import suppress -from datetime import datetime -import glob -import shutil - -import torch -import torch.nn as nn -import torchvision.utils -from torch.nn.parallel import DistributedDataParallel as NativeDDP -import torch.onnx - -from timm.data import create_dataset, create_loader, resolve_data_config, Mixup, FastCollateMixup, AugMixDataset -from timm.data import create_loader, resolve_data_config, Mixup, FastCollateMixup, AugMixDataset -from timm.models import create_model, resume_checkpoint, load_checkpoint, convert_splitbn_model -from timm.utils import * -from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy, JsdCrossEntropy -from timm.optim import create_optimizer -from timm.scheduler import create_scheduler -from timm.utils import ApexScaler, NativeScaler -from ghostnet.ghostnet_pytorch.ghostnet import ghostnet -import torch.npu - -# modelarts modification -import moxing as mox - -CALCULATE_DEVICE = "npu:0" - -try: - from apex import amp - from apex.parallel import DistributedDataParallel as ApexDDP - from apex.parallel import convert_syncbn_model - - has_apex = True -except ImportError: - has_apex = False - -has_native_amp = False -try: - if getattr(torch.cuda.amp, 'autocast') is not None: - has_native_amp = True -except AttributeError: - pass - -torch.backends.cudnn.benchmark = True -_logger = logging.getLogger('train') - -# The first arg parser parses out only the --config argument, this argument is used to -# load a yaml file containing key-values that override the defaults for the main parser below -config_parser = parser = argparse.ArgumentParser(description='Training Config', add_help=False) -parser.add_argument('-c', '--config', default='', type=str, metavar='FILE', - help='YAML config file specifying default arguments') -parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') -# Dataset / Model parameters -parser.add_argument('data_dir', metavar='DIR', - help='path to dataset') -parser.add_argument('--dataset', '-d', metavar='NAME', default='', - help='dataset type (default: ImageFolder/ImageTar if empty)') -parser.add_argument('--train-split', metavar='NAME', default='train', - help='dataset train split (default: train)') -parser.add_argument('--val-split', metavar='NAME', default='validation', - help='dataset validation split (default: validation)') -parser.add_argument('--model', default='resnet101', type=str, metavar='MODEL', - help='Name of model to train (default: "countception"') -parser.add_argument('--pretrained', action='store_true', default=False, - help='Start with pretrained version of specified network (if avail)') -parser.add_argument('--initial-checkpoint', default='', type=str, metavar='PATH', - help='Initialize model from this checkpoint (default: none)') -parser.add_argument('--resume', default='', type=str, metavar='PATH', - help='Resume full model and optimizer state from checkpoint (default: none)') -parser.add_argument('--no-resume-opt', action='store_true', default=False, - help='prevent resume of optimizer state when resuming model') -parser.add_argument('--num-classes', type=int, default=None, metavar='N', - help='number of label classes (Model default if None)') -parser.add_argument('--gp', default=None, type=str, metavar='POOL', - help='Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None.') -parser.add_argument('--img-size', type=int, default=None, metavar='N', - help='Image patch size (default: None => model default)') -parser.add_argument('--input-size', default=None, nargs=3, type=int, - metavar='N N N', - help='Input all image dimensions (d h w, e.g. --input-size 3 224 224), uses model default if empty') -parser.add_argument('--crop-pct', default=None, type=float, - metavar='N', help='Input image center crop percent (for validation only)') -parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', - help='Override mean pixel value of dataset') -parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', - help='Override std deviation of of dataset') -parser.add_argument('--interpolation', default='', type=str, metavar='NAME', - help='Image resize interpolation type (overrides model)') -parser.add_argument('-b', '--batch-size', type=int, default=1024, metavar='N', - help='input batch size for training (default: 32)') -parser.add_argument('-vb', '--validation-batch-size-multiplier', type=int, default=1, metavar='N', - help='ratio of validation batch size to training batch size (default: 1)') - -# Optimizer parameters -parser.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER', - help='Optimizer (default: "sgd"') -parser.add_argument('--opt-eps', default=None, type=float, metavar='EPSILON', - help='Optimizer Epsilon (default: None, use opt default)') -parser.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar='BETA', - help='Optimizer Betas (default: None, use opt default)') -parser.add_argument('--momentum', type=float, default=0.9, metavar='M', - help='Optimizer momentum (default: 0.9)') -parser.add_argument('--weight-decay', type=float, default=0.0001, - help='weight decay (default: 0.0001)') -parser.add_argument('--clip-grad', type=float, default=None, metavar='NORM', - help='Clip gradient norm (default: None, no clipping)') - -# Learning rate schedule parameters -parser.add_argument('--sched', default='step', type=str, metavar='SCHEDULER', - help='LR scheduler (default: "step"') -parser.add_argument('--lr', type=float, default=0.4, metavar='LR', - help='learning rate (default: 0.01)') -parser.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct', - help='learning rate noise on/off epoch percentages') -parser.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT', - help='learning rate noise limit percent (default: 0.67)') -parser.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV', - help='learning rate noise std-dev (default: 1.0)') -parser.add_argument('--lr-cycle-mul', type=float, default=1.0, metavar='MULT', - help='learning rate cycle len multiplier (default: 1.0)') -parser.add_argument('--lr-cycle-limit', type=int, default=1, metavar='N', - help='learning rate cycle limit') -parser.add_argument('--warmup-lr', type=float, default=0.0001, metavar='LR', - help='warmup learning rate (default: 0.0001)') -parser.add_argument('--min-lr', type=float, default=1e-5, metavar='LR', - help='lower lr bound for cyclic schedulers that hit 0 (1e-5)') -parser.add_argument('--epochs', type=int, default=200, metavar='N', - help='number of epochs to train (default: 2)') -parser.add_argument('--start-epoch', default=None, type=int, metavar='N', - help='manual epoch number (useful on restarts)') -parser.add_argument('--decay-epochs', type=float, default=30, metavar='N', - help='epoch interval to decay LR') -parser.add_argument('--warmup-epochs', type=int, default=3, metavar='N', - help='epochs to warmup LR, if scheduler supports') -parser.add_argument('--cooldown-epochs', type=int, default=0, metavar='N', - help='epochs to cooldown LR at min_lr, after cyclic schedule ends') -parser.add_argument('--patience-epochs', type=int, default=0, metavar='N', - help='patience epochs for Plateau LR scheduler (default: 10') -parser.add_argument('--decay-rate', '--dr', type=float, default=0.1, metavar='RATE', - help='LR decay rate (default: 0.1)') - -# Augmentation & regularization parameters -parser.add_argument('--no-aug', action='store_true', default=False, - help='Disable all training augmentation, override other train aug args') -parser.add_argument('--scale', type=float, nargs='+', default=[0.08, 1.0], metavar='PCT', - help='Random resize scale (default: 0.08 1.0)') -parser.add_argument('--ratio', type=float, nargs='+', default=[3. / 4., 4. / 3.], metavar='RATIO', - help='Random resize aspect ratio (default: 0.75 1.33)') -parser.add_argument('--hflip', type=float, default=0.5, - help='Horizontal flip training aug probability') -parser.add_argument('--vflip', type=float, default=0., - help='Vertical flip training aug probability') -parser.add_argument('--color-jitter', type=float, default=0.4, metavar='PCT', - help='Color jitter factor (default: 0.4)') -parser.add_argument('--aug-splits', type=int, default=0, - help='Number of augmentation splits (default: 0, valid: 0 or >=2)') -parser.add_argument('--jsd', action='store_true', default=False, - help='Enable Jensen-Shannon Divergence + CE loss. Use with `--aug-splits`.') -parser.add_argument('--recount', type=int, default=1, - help='Random erase count (default: 1)') -parser.add_argument('--resplit', action='store_true', default=False, - help='Do not random erase first (clean) augmentation split') -parser.add_argument('--mixup', type=float, default=0.0, - help='mixup alpha, mixup enabled if > 0. (default: 0.)') -parser.add_argument('--cutmix', type=float, default=0.0, - help='cutmix alpha, cutmix enabled if > 0. (default: 0.)') -parser.add_argument('--cutmix-minmax', type=float, nargs='+', default=None, - help='cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None)') -parser.add_argument('--mixup-prob', type=float, default=1.0, - help='Probability of performing mixup or cutmix when either/both is enabled') -parser.add_argument('--mixup-switch-prob', type=float, default=0.5, - help='Probability of switching to cutmix when both mixup and cutmix enabled') -parser.add_argument('--mixup-mode', type=str, default='batch', - help='How to apply mixup/cutmix params. Per "batch", "pair", or "elem"') -parser.add_argument('--mixup-off-epoch', default=0, type=int, metavar='N', - help='Turn off mixup after this epoch, disabled if 0 (default: 0)') -parser.add_argument('--smoothing', type=float, default=0.1, - help='Label smoothing (default: 0.1)') -parser.add_argument('--train-interpolation', type=str, default='random', - help='Training interpolation (random, bilinear, bicubic default: "random")') -parser.add_argument('--drop', type=float, default=0.2, metavar='PCT', - help='Dropout rate (default: 0.)') -parser.add_argument('--drop-path', type=float, default=None, metavar='PCT', - help='Drop path rate (default: None)') -parser.add_argument('--drop-block', type=float, default=None, metavar='PCT', - help='Drop block rate (default: None)') -parser.add_argument('--bn-tf', action='store_true', default=False, - help='Use Tensorflow BatchNorm defaults for models that support it (default: False)') -parser.add_argument('--bn-momentum', type=float, default=None, - help='BatchNorm momentum override (if not None)') -parser.add_argument('--bn-eps', type=float, default=None, - help='BatchNorm epsilon override (if not None)') -parser.add_argument('--sync-bn', action='store_true', - help='Enable NVIDIA Apex or Torch synchronized BatchNorm.') -parser.add_argument('--dist-bn', type=str, default='', - help='Distribute BatchNorm stats between nodes after each epoch ("broadcast", "reduce", or "")') -parser.add_argument('--split-bn', action='store_true', - help='Enable separate BN layers per augmentation split.') -# Misc -parser.add_argument('--seed', type=int, default=42, metavar='S', - help='random seed (default: 42)') -parser.add_argument('--log-interval', type=int, default=50, metavar='N', - help='how many batches to wait before logging training status') -parser.add_argument('--recovery-interval', type=int, default=0, metavar='N', - help='how many batches to wait before writing recovery checkpoint') -parser.add_argument('--checkpoint-hist', type=int, default=10, metavar='N', - help='number of checkpoints to keep (default: 10)') -parser.add_argument('-j', '--workers', type=int, default=4, metavar='N', - help='how many training processes to use (default: 1)') -parser.add_argument('--save-images', action='store_true', default=False, - help='save images of input bathes every log interval for debugging') -parser.add_argument('--amp', action='store_true', default=False, - help='use NVIDIA Apex AMP or Native AMP for mixed precision training') -parser.add_argument('--apex-amp', action='store_true', default=False, - help='Use NVIDIA Apex AMP mixed precision') -parser.add_argument('--native-amp', action='store_true', default=False, - help='Use Native Torch AMP mixed precision') -parser.add_argument('--channels-last', action='store_true', default=False, - help='Use channels_last memory layout') -parser.add_argument('--pin-mem', action='store_true', default=False, - help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.') -parser.add_argument('--no-prefetcher', action='store_true', default=True, - help='disable fast prefetcher') -parser.add_argument('--output', default='', type=str, metavar='PATH', - help='path to output folder (default: none, current dir)') -parser.add_argument('--eval-metric', default='top1', type=str, metavar='EVAL_METRIC', - help='Best metric (default: "top1"') -parser.add_argument('--tta', type=int, default=0, metavar='N', - help='Test/inference time augmentation (oversampling) factor. 0=None (default: 0)') -parser.add_argument("--local_rank", default=0, type=int) -parser.add_argument('--use-multi-epochs-loader', action='store_true', default=False, - help='use the multi-epochs-loader to save time at the beginning of every epoch') -parser.add_argument('--torchscript', dest='torchscript', action='store_true', - help='convert model torchscript for inference') -parser.add_argument('--width', type=float, default=1.0, - help='Width ratio (default: 1.0)') -parser.add_argument('--dist-url', default='tcp://127.0.0.1:50000', type=str, - help='url used to set up pretained training') -parser.add_argument('--dist-backend', default='nccl', type=str, - help='distributed backend') -parser.add_argument('--npu', default=None, type=int, - help='NPU id to use.') -#modelarts -parser.add_argument('--modelarts_mod', action='store_true', default=False, - help='Enable modelarts mode loss function to train') -parser.add_argument('--train_url', - default="/cache/training", - type=str, - help="setting dir of training output") -parser.add_argument('--pretrained_weight', default='', type=str, metavar='PATH', - help='path to pretrained weight') -parser.add_argument('--onnx', default=True, action='store_true', - help="convert pth model to onnx") -parser.add_argument('--data_url', - type=str, - default='/cache/data_url', - help='the training data') - -CACHE_TRAINING_URL = "/cache/training" -def _parse_args(): - # Do we have a config file to parse? - args_config, remaining = config_parser.parse_known_args() - if args_config.config: - with open(args_config.config, 'r') as f: - cfg = yaml.safe_load(f) - parser.set_defaults(**cfg) - # The main arg parser parses the rest of the args, the usual - # defaults will have been overridden if config file specified. - args = parser.parse_args(remaining) - print(args) - # Cache the args as a text string to save them in the output dir later - args_text = yaml.safe_dump(args.__dict__, default_flow_style=False) - return args, args_text - - -def main(): - setup_default_logging() - args, args_text = _parse_args() - - args.prefetcher = not args.no_prefetcher - args.distributed = False - if args.npu is None: - args.npu = 0 - CALCULATE_DEVICE = "npu:{}".format(args.npu) - torch.npu.set_device(CALCULATE_DEVICE) - print("use ", CALCULATE_DEVICE) - - if 'WORLD_SIZE' in os.environ: - args.distributed = int(os.environ['WORLD_SIZE']) > 1 - args.device = 'npu:0' - args.world_size = 1 - args.rank = 0 # global rank - _logger.info('Training with a single process on 1 NPUs.') - assert args.rank >= 0 - - # resolve AMP arguments based on PyTorch / Apex availability - use_amp = None - if args.amp: - # for backwards compat, `--amp` arg tries apex before native amp - if has_apex: - args.apex_amp = True - elif has_native_amp: - args.native_amp = True - if args.apex_amp and has_apex: - use_amp = 'apex' - elif args.native_amp and has_native_amp: - use_amp = 'native' - elif args.apex_amp or args.native_amp: - _logger.warning("Neither APEX or native Torch AMP is available, using float32. " - "Install NVIDA apex or upgrade to PyTorch 1.6") - - torch.manual_seed(args.seed + args.rank) - model = ghostnet(num_classes=args.num_classes, width=args.width, dropout=args.drop) - - if args.pretrained: - CACHE_MODEL_URL = "/cache/model" - os.makedirs(CACHE_MODEL_URL, exist_ok=True) - mox.file.copy_parallel(args.pretrained_weight, os.path.join(CACHE_MODEL_URL, "model_best.pth.tar")) - pretrained_weight = os.path.join(CACHE_MODEL_URL, "model_best.pth.tar") - pretrained_dict = torch.load(pretrained_weight)["state_dict"] - pretrained_model = {k.replace('module.', ''): v for k, v in pretrained_dict.items()} - if "classifier.weight" in pretrained_model: - pretrained_model.pop('classifier.weight') - pretrained_model.pop('classifier.bias') - model.load_state_dict(pretrained_model, strict=False) - - for param in model.parameters(): - param.requires_grad = False - - for param in model.classifier.parameters(): - param.requires_grad = True - - if args.num_classes is None: - assert hasattr(model, 'num_classes'), 'Model must have `num_classes` attr if not set on cmd line/config.' - args.num_classes = model.num_classes # FIXME handle model default vs config num_classes more elegantly - - if args.local_rank == 0: - _logger.info('Model %s created, param count: %d' % - (args.model, sum([m.numel() for m in model.parameters()]))) - - data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0) - - # setup augmentation batch splits for contrastive loss or split bn - num_aug_splits = 0 - if args.aug_splits > 0: - assert args.aug_splits > 1, 'A split of 1 makes no sense' - num_aug_splits = args.aug_splits - - # enable split bn (separate bn stats per batch-portion) - if args.split_bn: - assert num_aug_splits > 1 or args.resplit - model = convert_splitbn_model(model, max(num_aug_splits, 2)) - - # move model to GPU, enable channels last layout if set - model = model.to(CALCULATE_DEVICE) - if args.channels_last: - model = model.to(memory_format=torch.channels_last) - - if args.torchscript: - assert not use_amp == 'apex', 'Cannot use APEX AMP with torchscripted model' - assert not args.sync_bn, 'Cannot use SyncBatchNorm with torchscripted model' - model = torch.jit.script(model) - - optimizer = create_optimizer(args, model) - - # setup automatic mixed-precision (AMP) loss scaling and op casting - amp_autocast = suppress # do nothing - loss_scaler = None - if use_amp == 'apex': - model, optimizer = amp.initialize(model, optimizer, opt_level='O2', loss_scale=128) - loss_scaler = ApexScaler() - if args.local_rank == 0: - _logger.info('Using NVIDIA APEX AMP. Training in mixed precision.') - elif use_amp == 'native': - amp_autocast = torch.cuda.amp.autocast - loss_scaler = NativeScaler() - if args.local_rank == 0: - _logger.info('Using native Torch AMP. Training in mixed precision.') - else: - if args.local_rank == 0: - _logger.info('AMP not enabled. Training in float32.') - - # optionally resume from a checkpoint - resume_epoch = None - if args.resume: - resume_epoch = resume_checkpoint( - model, args.resume, - optimizer=None if args.no_resume_opt else optimizer, - loss_scaler=None if args.no_resume_opt else loss_scaler, - log_info=args.local_rank == 0) - - # setup exponential moving average of model weights, SWA could be used here too - model_ema = None - # setup learning rate schedule and starting epoch - lr_scheduler, num_epochs = create_scheduler(args, optimizer) - start_epoch = 0 - if args.start_epoch is not None: - # a specified start_epoch will always override the resume epoch - start_epoch = args.start_epoch - elif resume_epoch is not None: - start_epoch = resume_epoch - if lr_scheduler is not None and start_epoch > 0: - lr_scheduler.step(start_epoch) - - if args.local_rank == 0: - _logger.info('Scheduled epochs: {}'.format(num_epochs)) - - # create the train and eval datasets - real_path = '/cache/data_url' - if not os.path.exists(real_path): - os.makedirs(real_path) - mox.file.copy_parallel(args.data_url, real_path) - print("training data finish copy to %s." % real_path) - dataset_train = create_dataset( - args.dataset, root=real_path, split=args.train_split, is_training=True, batch_size=args.batch_size) - dataset_eval = create_dataset( - args.dataset, root=real_path, split=args.val_split, is_training=False, batch_size=args.batch_size) - # setup mixup / cutmix - collate_fn = None - mixup_fn = None - mixup_active = args.mixup > 0 or args.cutmix > 0. or args.cutmix_minmax is not None - if mixup_active: - mixup_args = dict( - mixup_alpha=args.mixup, cutmix_alpha=args.cutmix, cutmix_minmax=args.cutmix_minmax, - prob=args.mixup_prob, switch_prob=args.mixup_switch_prob, mode=args.mixup_mode, - label_smoothing=args.smoothing, num_classes=args.num_classes) - if args.prefetcher: - assert not num_aug_splits # collate conflict (need to support deinterleaving in collate mixup) - collate_fn = FastCollateMixup(**mixup_args) - else: - mixup_fn = Mixup(**mixup_args) - - # wrap dataset in AugMix helper - if num_aug_splits > 1: - dataset_train = AugMixDataset(dataset_train, num_splits=num_aug_splits) - - # create data loaders w/ augmentation pipeiine - train_interpolation = args.train_interpolation - if args.no_aug or not train_interpolation: - train_interpolation = data_config['interpolation'] - loader_train = create_loader( - dataset_train, - input_size=data_config['input_size'], - batch_size=args.batch_size, - is_training=True, - use_prefetcher=not args.no_prefetcher, - no_aug=args.no_aug, - re_count=args.recount, - re_split=args.resplit, - scale=args.scale, - ratio=args.ratio, - hflip=args.hflip, - vflip=args.vflip, - color_jitter=args.color_jitter, - num_aug_splits=num_aug_splits, - interpolation=train_interpolation, - mean=data_config['mean'], - std=data_config['std'], - num_workers=args.workers, - collate_fn=collate_fn, - pin_memory=args.pin_mem, - use_multi_epochs_loader=args.use_multi_epochs_loader - ) - - loader_eval = create_loader( - dataset_eval, - input_size=data_config['input_size'], - batch_size=args.validation_batch_size_multiplier * args.batch_size, - is_training=False, - use_prefetcher=not args.no_prefetcher, - interpolation=data_config['interpolation'], - mean=data_config['mean'], - std=data_config['std'], - num_workers=args.workers, - crop_pct=data_config['crop_pct'], - pin_memory=args.pin_mem, - ) - - # setup loss function - if args.jsd: - #assert num_aug_splits > 1 # JSD only valid with aug splits set - train_loss_fn = JsdCrossEntropy(num_splits=num_aug_splits, smoothing=args.smoothing).to(CALCULATE_DEVICE) - elif mixup_active: - # smoothing is handled with mixup target transform - train_loss_fn = SoftTargetCrossEntropy().to(CALCULATE_DEVICE) - elif args.smoothing: - train_loss_fn = LabelSmoothingCrossEntropy(smoothing=args.smoothing).to(CALCULATE_DEVICE) - else: - train_loss_fn = nn.CrossEntropyLoss().to(CALCULATE_DEVICE) - if args.modelarts_mod: - train_loss_fn = nn.CrossEntropyLoss().to(CALCULATE_DEVICE) - - validate_loss_fn = nn.CrossEntropyLoss().to(CALCULATE_DEVICE) - # setup checkpoint saver and eval metric tracking - eval_metric = args.eval_metric - best_metric = None - best_epoch = None - saver = None - output_dir = '' - if args.local_rank == 0: - output_base = args.output if args.output else './output' - exp_name = '-'.join([ - datetime.now().strftime("%Y%m%d-%H%M%S"), - args.model, - str(data_config['input_size'][-1]) - ]) - output_dir = get_outdir(output_base, 'train', exp_name) - decreasing = True if eval_metric == 'loss' else False - saver = CheckpointSaver( - model=model, optimizer=optimizer, args=args, amp_scaler=loss_scaler, - checkpoint_dir=output_dir, recovery_dir=output_dir, decreasing=decreasing, max_history=args.checkpoint_hist) - - with open(os.path.join(output_dir, 'args.yaml'), 'w') as f: - f.write(args_text) - - try: - for epoch in range(start_epoch, num_epochs): - - train_metrics = train_one_epoch( - epoch, model, loader_train, optimizer, train_loss_fn, args, - lr_scheduler=lr_scheduler, saver=saver, output_dir=output_dir, - amp_autocast=amp_autocast, loss_scaler=loss_scaler, mixup_fn=mixup_fn) - - eval_metrics = validate(model, loader_eval, validate_loss_fn, args, amp_autocast=amp_autocast) - - if lr_scheduler is not None: - # step LR for next epoch - lr_scheduler.step(epoch + 1, eval_metrics[eval_metric]) - - update_summary( - epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'), - write_header=best_metric is None) - - if saver is not None: - # save proper checkpoint with eval metric - save_metric = eval_metrics[eval_metric] - best_metric, best_epoch = saver.save_checkpoint(epoch, metric=save_metric) - - except KeyboardInterrupt: - pass - if best_metric is not None: - _logger.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch)) - - if args.onnx: - os.makedirs(CACHE_TRAINING_URL, exist_ok=True) - print("abspath:",os.path.abspath(output_dir)) - print("output_dir:",os.listdir(output_dir)) - shutil.copy(os.path.join(os.path.abspath(output_dir), 'model_best.pth.tar'), CACHE_TRAINING_URL) - pth_pattern = os.path.join(CACHE_TRAINING_URL, 'model_best.pth.tar') - print("pth_pattern:",os.path.abspath(pth_pattern)) - pth_file_list = glob.glob(pth_pattern) - if not pth_file_list: - print(f"can't find pth {pth_pattern}") - pth_file = pth_file_list[0] - onnx_path = pth_file.split(".")[0] + '.onnx' - convert(pth_file, onnx_path) - # --------------modelarts modification---------- - mox.file.copy_parallel(CACHE_TRAINING_URL, args.train_url) - # --------------modelarts modification end---------- - print("CACHE_TRAINING_URL:",os.listdir(CACHE_TRAINING_URL)) - -def train_one_epoch( - epoch, model, loader, optimizer, loss_fn, args, - lr_scheduler=None, saver=None, output_dir='', amp_autocast=suppress, - loss_scaler=None, mixup_fn=None): - if args.mixup_off_epoch and epoch >= args.mixup_off_epoch: - if args.prefetcher and loader.mixup_enabled: - loader.mixup_enabled = False - elif mixup_fn is not None: - mixup_fn.mixup_enabled = False - - second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order - batch_time_m = AverageMeter() - data_time_m = AverageMeter() - losses_m = AverageMeter(start_count_index=0) - - model.train() - - end = time.time() - last_idx = len(loader) - 1 - num_updates = epoch * len(loader) - for batch_idx, (input, target) in enumerate(loader): - last_batch = batch_idx == last_idx - data_time_m.update(time.time() - end) - if not args.prefetcher: - input, target = input.npu(), target.npu() - if mixup_fn is not None: - input, target = mixup_fn(input, target) - if args.channels_last: - input = input.contiguous(memory_format=torch.channels_last) - - with amp_autocast(): - output = model(input) - target = target.to(torch.int32) - loss = loss_fn(output, target) - - losses_m.update(loss.item(), input.size(0)) - - optimizer.zero_grad() - if loss_scaler is not None: - loss_scaler( - loss, optimizer, clip_grad=args.clip_grad, parameters=model.parameters(), create_graph=second_order) - else: - loss.backward(create_graph=second_order) - if args.clip_grad is not None: - torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad) - optimizer.step() - - num_updates += 1 - batch_time_m.update(time.time() - end) - if last_batch or batch_idx % args.log_interval == 0: - lrl = [param_group['lr'] for param_group in optimizer.param_groups] - lr = sum(lrl) / len(lrl) - - if args.local_rank == 0: - if batch_time_m.avg > 0: - _logger.info( - 'Train: {} [{:>4d}/{} ({:>3.0f}%)] ' - 'Loss: {loss.val:>9.6f} ({loss.avg:>6.4f}) ' - 'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s ' - '({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' - 'LR: {lr:.3e} ' - 'Data: {data_time.val:.3f} ({data_time.avg:.3f})' - 'fps: {fps:.3f} ' - 'Batch_Size:{batch_size:.1f} '.format( - epoch, - batch_idx, len(loader), - 100. * batch_idx / last_idx, - loss=losses_m, - batch_time=batch_time_m, - rate=input.size(0) * args.world_size / batch_time_m.val, - rate_avg=input.size(0) * args.world_size / batch_time_m.avg, - lr=lr, - data_time=data_time_m, - fps=args.batch_size / batch_time_m.avg, - batch_size=args.batch_size)) - - if args.save_images and output_dir: - torchvision.utils.save_image( - input, - os.path.join(output_dir, 'train-batch-%d.jpg' % batch_idx), - padding=0, - normalize=True) - - if saver is not None and args.recovery_interval and ( - last_batch or (batch_idx + 1) % args.recovery_interval == 0): - saver.save_recovery(epoch, batch_idx=batch_idx) - - if lr_scheduler is not None: - lr_scheduler.step_update(num_updates=num_updates, metric=losses_m.avg) - - end = time.time() - # end for - - if hasattr(optimizer, 'sync_lookahead'): - optimizer.sync_lookahead() - - return OrderedDict([('loss', losses_m.avg)]) - - -def validate(model, loader, loss_fn, args, amp_autocast=suppress, log_suffix=''): - batch_time_m = AverageMeter() - losses_m = AverageMeter(start_count_index=0) - top1_m = AverageMeter(start_count_index=0) - top5_m = AverageMeter(start_count_index=0) - model.eval() - - end = time.time() - last_idx = len(loader) - 1 - with torch.no_grad(): - for batch_idx, (input, target) in enumerate(loader): - last_batch = batch_idx == last_idx - if args.no_prefetcher: - input = input.npu() - target = target.npu() - if args.channels_last: - input = input.contiguous(memory_format=torch.channels_last) - - with amp_autocast(): - output = model(input) - if isinstance(output, (tuple, list)): - output = output[0] - - # augmentation reduction - reduce_factor = args.tta - if reduce_factor > 1: - output = output.unfold(0, reduce_factor, reduce_factor).mean(dim=2) - target = target[0:target.size(0):reduce_factor] - - target = target.to(torch.int32) - loss = loss_fn(output, target) - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - - reduced_loss = loss.data - - losses_m.update(reduced_loss.item(), input.size(0)) - top1_m.update(acc1.item(), output.size(0)) - top5_m.update(acc5.item(), output.size(0)) - - batch_time_m.update(time.time() - end) - end = time.time() - if args.local_rank == 0 and (last_batch or batch_idx % args.log_interval == 0): - if batch_time_m.avg > 0: - log_name = 'Test' + log_suffix - _logger.info( - '{0}: [{1:>4d}/{2}] ' - 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' - 'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) ' - 'Acc@1: {top1.val:>7.4f} ({top1.avg:>7.4f}) ' - 'Acc@5: {top5.val:>7.4f} ({top5.avg:>7.4f})' - 'fps: {fps:.3f} ' - 'Batch_Size:{batch_size:.1f} '.format( - log_name, batch_idx, last_idx, batch_time=batch_time_m, - loss=losses_m, top1=top1_m, top5=top5_m, fps=args.batch_size / batch_time_m.avg, - batch_size=args.batch_size)) - - metrics = OrderedDict([('loss', losses_m.avg), ('top1', top1_m.avg), ('top5', top5_m.avg)]) - - return metrics - -def proc_node_module(checkpoint, attr_name): - new_model_state = OrderedDict() - for k, v in checkpoint[attr_name].items(): - if(k[0: 7] == "module."): - name = k[7:] - else: - name = k[0:] - new_model_state[name] = v - return new_model_state - -def convert(pth_file_path, onnx_file_path): - args, args_text = _parse_args() - checkpoint = torch.load(pth_file_path, map_location='cpu') - checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') - model = ghostnet(num_classes=args.num_classes, width=args.width, dropout=args.drop) - model.load_state_dict(checkpoint['state_dict'], False) - model.eval() - - input_names = ["image"] - output_names = ["output1"] - dummy_input = torch.randn(2, 3, 224, 224) - torch.onnx.export(model, dummy_input, onnx_file_path, input_names=input_names, output_names=output_names, opset_version=11) - -if __name__ == '__main__': - main() - +#!/usr/bin/env python +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" ImageNet Training Script +This is intended to be a lean and easily modifiable ImageNet training script that reproduces ImageNet +training results with some of the latest networks and training techniques. It favours canonical PyTorch +and standard Python style over trying to be able to 'do it all.' That said, it offers quite a few speed +and training result improvements over the usual PyTorch example scripts. Repurpose as you see fit. +This script was started from an early version of the PyTorch ImageNet example +(https://github.com/pytorch/examples/tree/master/imagenet) +NVIDIA CUDA specific speedups adopted from NVIDIA Apex examples +(https://github.com/NVIDIA/apex/tree/master/examples/imagenet) +Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman) +""" +import argparse +import time +import yaml +import os +import logging +from collections import OrderedDict +from contextlib import suppress +from datetime import datetime +import glob +import shutil + +import torch +import torch.nn as nn +import torchvision.utils +from torch.nn.parallel import DistributedDataParallel as NativeDDP +import torch.onnx + +from timm.data import create_dataset, create_loader, resolve_data_config, Mixup, FastCollateMixup, AugMixDataset +from timm.data import create_loader, resolve_data_config, Mixup, FastCollateMixup, AugMixDataset +from timm.models import create_model, resume_checkpoint, load_checkpoint, convert_splitbn_model +from timm.utils import * +from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy, JsdCrossEntropy +from timm.optim import create_optimizer +from timm.scheduler import create_scheduler +from timm.utils import ApexScaler, NativeScaler +from ghostnet.ghostnet_pytorch.ghostnet import ghostnet +import torch.npu + +# modelarts modification +import moxing as mox + +CALCULATE_DEVICE = "npu:0" + +try: + from apex import amp + from apex.parallel import DistributedDataParallel as ApexDDP + from apex.parallel import convert_syncbn_model + + has_apex = True +except ImportError: + has_apex = False + +has_native_amp = False +try: + if getattr(torch.cuda.amp, 'autocast') is not None: + has_native_amp = True +except AttributeError: + pass + +torch.backends.cudnn.benchmark = True +_logger = logging.getLogger('train') + +# The first arg parser parses out only the --config argument, this argument is used to +# load a yaml file containing key-values that override the defaults for the main parser below +config_parser = parser = argparse.ArgumentParser(description='Training Config', add_help=False) +parser.add_argument('-c', '--config', default='', type=str, metavar='FILE', + help='YAML config file specifying default arguments') +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +# Dataset / Model parameters +parser.add_argument('data_dir', metavar='DIR', + help='path to dataset') +parser.add_argument('--dataset', '-d', metavar='NAME', default='', + help='dataset type (default: ImageFolder/ImageTar if empty)') +parser.add_argument('--train-split', metavar='NAME', default='train', + help='dataset train split (default: train)') +parser.add_argument('--val-split', metavar='NAME', default='validation', + help='dataset validation split (default: validation)') +parser.add_argument('--model', default='resnet101', type=str, metavar='MODEL', + help='Name of model to train (default: "countception"') +parser.add_argument('--pretrained', action='store_true', default=False, + help='Start with pretrained version of specified network (if avail)') +parser.add_argument('--initial-checkpoint', default='', type=str, metavar='PATH', + help='Initialize model from this checkpoint (default: none)') +parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='Resume full model and optimizer state from checkpoint (default: none)') +parser.add_argument('--no-resume-opt', action='store_true', default=False, + help='prevent resume of optimizer state when resuming model') +parser.add_argument('--num-classes', type=int, default=None, metavar='N', + help='number of label classes (Model default if None)') +parser.add_argument('--gp', default=None, type=str, metavar='POOL', + help='Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None.') +parser.add_argument('--img-size', type=int, default=None, metavar='N', + help='Image patch size (default: None => model default)') +parser.add_argument('--input-size', default=None, nargs=3, type=int, + metavar='N N N', + help='Input all image dimensions (d h w, e.g. --input-size 3 224 224), uses model default if empty') +parser.add_argument('--crop-pct', default=None, type=float, + metavar='N', help='Input image center crop percent (for validation only)') +parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', + help='Override mean pixel value of dataset') +parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', + help='Override std deviation of of dataset') +parser.add_argument('--interpolation', default='', type=str, metavar='NAME', + help='Image resize interpolation type (overrides model)') +parser.add_argument('-b', '--batch-size', type=int, default=1024, metavar='N', + help='input batch size for training (default: 32)') +parser.add_argument('-vb', '--validation-batch-size-multiplier', type=int, default=1, metavar='N', + help='ratio of validation batch size to training batch size (default: 1)') + +# Optimizer parameters +parser.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER', + help='Optimizer (default: "sgd"') +parser.add_argument('--opt-eps', default=None, type=float, metavar='EPSILON', + help='Optimizer Epsilon (default: None, use opt default)') +parser.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar='BETA', + help='Optimizer Betas (default: None, use opt default)') +parser.add_argument('--momentum', type=float, default=0.9, metavar='M', + help='Optimizer momentum (default: 0.9)') +parser.add_argument('--weight-decay', type=float, default=0.0001, + help='weight decay (default: 0.0001)') +parser.add_argument('--clip-grad', type=float, default=None, metavar='NORM', + help='Clip gradient norm (default: None, no clipping)') + +# Learning rate schedule parameters +parser.add_argument('--sched', default='step', type=str, metavar='SCHEDULER', + help='LR scheduler (default: "step"') +parser.add_argument('--lr', type=float, default=0.4, metavar='LR', + help='learning rate (default: 0.01)') +parser.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct', + help='learning rate noise on/off epoch percentages') +parser.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT', + help='learning rate noise limit percent (default: 0.67)') +parser.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV', + help='learning rate noise std-dev (default: 1.0)') +parser.add_argument('--lr-cycle-mul', type=float, default=1.0, metavar='MULT', + help='learning rate cycle len multiplier (default: 1.0)') +parser.add_argument('--lr-cycle-limit', type=int, default=1, metavar='N', + help='learning rate cycle limit') +parser.add_argument('--warmup-lr', type=float, default=0.0001, metavar='LR', + help='warmup learning rate (default: 0.0001)') +parser.add_argument('--min-lr', type=float, default=1e-5, metavar='LR', + help='lower lr bound for cyclic schedulers that hit 0 (1e-5)') +parser.add_argument('--epochs', type=int, default=200, metavar='N', + help='number of epochs to train (default: 2)') +parser.add_argument('--start-epoch', default=None, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('--decay-epochs', type=float, default=30, metavar='N', + help='epoch interval to decay LR') +parser.add_argument('--warmup-epochs', type=int, default=3, metavar='N', + help='epochs to warmup LR, if scheduler supports') +parser.add_argument('--cooldown-epochs', type=int, default=0, metavar='N', + help='epochs to cooldown LR at min_lr, after cyclic schedule ends') +parser.add_argument('--patience-epochs', type=int, default=0, metavar='N', + help='patience epochs for Plateau LR scheduler (default: 10') +parser.add_argument('--decay-rate', '--dr', type=float, default=0.1, metavar='RATE', + help='LR decay rate (default: 0.1)') + +# Augmentation & regularization parameters +parser.add_argument('--no-aug', action='store_true', default=False, + help='Disable all training augmentation, override other train aug args') +parser.add_argument('--scale', type=float, nargs='+', default=[0.08, 1.0], metavar='PCT', + help='Random resize scale (default: 0.08 1.0)') +parser.add_argument('--ratio', type=float, nargs='+', default=[3. / 4., 4. / 3.], metavar='RATIO', + help='Random resize aspect ratio (default: 0.75 1.33)') +parser.add_argument('--hflip', type=float, default=0.5, + help='Horizontal flip training aug probability') +parser.add_argument('--vflip', type=float, default=0., + help='Vertical flip training aug probability') +parser.add_argument('--color-jitter', type=float, default=0.4, metavar='PCT', + help='Color jitter factor (default: 0.4)') +parser.add_argument('--aug-splits', type=int, default=0, + help='Number of augmentation splits (default: 0, valid: 0 or >=2)') +parser.add_argument('--jsd', action='store_true', default=False, + help='Enable Jensen-Shannon Divergence + CE loss. Use with `--aug-splits`.') +parser.add_argument('--recount', type=int, default=1, + help='Random erase count (default: 1)') +parser.add_argument('--resplit', action='store_true', default=False, + help='Do not random erase first (clean) augmentation split') +parser.add_argument('--mixup', type=float, default=0.0, + help='mixup alpha, mixup enabled if > 0. (default: 0.)') +parser.add_argument('--cutmix', type=float, default=0.0, + help='cutmix alpha, cutmix enabled if > 0. (default: 0.)') +parser.add_argument('--cutmix-minmax', type=float, nargs='+', default=None, + help='cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None)') +parser.add_argument('--mixup-prob', type=float, default=1.0, + help='Probability of performing mixup or cutmix when either/both is enabled') +parser.add_argument('--mixup-switch-prob', type=float, default=0.5, + help='Probability of switching to cutmix when both mixup and cutmix enabled') +parser.add_argument('--mixup-mode', type=str, default='batch', + help='How to apply mixup/cutmix params. Per "batch", "pair", or "elem"') +parser.add_argument('--mixup-off-epoch', default=0, type=int, metavar='N', + help='Turn off mixup after this epoch, disabled if 0 (default: 0)') +parser.add_argument('--smoothing', type=float, default=0.1, + help='Label smoothing (default: 0.1)') +parser.add_argument('--train-interpolation', type=str, default='random', + help='Training interpolation (random, bilinear, bicubic default: "random")') +parser.add_argument('--drop', type=float, default=0.2, metavar='PCT', + help='Dropout rate (default: 0.)') +parser.add_argument('--drop-path', type=float, default=None, metavar='PCT', + help='Drop path rate (default: None)') +parser.add_argument('--drop-block', type=float, default=None, metavar='PCT', + help='Drop block rate (default: None)') +parser.add_argument('--bn-tf', action='store_true', default=False, + help='Use Tensorflow BatchNorm defaults for models that support it (default: False)') +parser.add_argument('--bn-momentum', type=float, default=None, + help='BatchNorm momentum override (if not None)') +parser.add_argument('--bn-eps', type=float, default=None, + help='BatchNorm epsilon override (if not None)') +parser.add_argument('--sync-bn', action='store_true', + help='Enable NVIDIA Apex or Torch synchronized BatchNorm.') +parser.add_argument('--dist-bn', type=str, default='', + help='Distribute BatchNorm stats between nodes after each epoch ("broadcast", "reduce", or "")') +parser.add_argument('--split-bn', action='store_true', + help='Enable separate BN layers per augmentation split.') +# Misc +parser.add_argument('--seed', type=int, default=42, metavar='S', + help='random seed (default: 42)') +parser.add_argument('--log-interval', type=int, default=50, metavar='N', + help='how many batches to wait before logging training status') +parser.add_argument('--recovery-interval', type=int, default=0, metavar='N', + help='how many batches to wait before writing recovery checkpoint') +parser.add_argument('--checkpoint-hist', type=int, default=10, metavar='N', + help='number of checkpoints to keep (default: 10)') +parser.add_argument('-j', '--workers', type=int, default=4, metavar='N', + help='how many training processes to use (default: 1)') +parser.add_argument('--save-images', action='store_true', default=False, + help='save images of input bathes every log interval for debugging') +parser.add_argument('--amp', action='store_true', default=False, + help='use NVIDIA Apex AMP or Native AMP for mixed precision training') +parser.add_argument('--apex-amp', action='store_true', default=False, + help='Use NVIDIA Apex AMP mixed precision') +parser.add_argument('--native-amp', action='store_true', default=False, + help='Use Native Torch AMP mixed precision') +parser.add_argument('--channels-last', action='store_true', default=False, + help='Use channels_last memory layout') +parser.add_argument('--pin-mem', action='store_true', default=False, + help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.') +parser.add_argument('--no-prefetcher', action='store_true', default=True, + help='disable fast prefetcher') +parser.add_argument('--output', default='', type=str, metavar='PATH', + help='path to output folder (default: none, current dir)') +parser.add_argument('--eval-metric', default='top1', type=str, metavar='EVAL_METRIC', + help='Best metric (default: "top1"') +parser.add_argument('--tta', type=int, default=0, metavar='N', + help='Test/inference time augmentation (oversampling) factor. 0=None (default: 0)') +parser.add_argument("--local_rank", default=0, type=int) +parser.add_argument('--use-multi-epochs-loader', action='store_true', default=False, + help='use the multi-epochs-loader to save time at the beginning of every epoch') +parser.add_argument('--torchscript', dest='torchscript', action='store_true', + help='convert model torchscript for inference') +parser.add_argument('--width', type=float, default=1.0, + help='Width ratio (default: 1.0)') +parser.add_argument('--dist-url', default='tcp://127.0.0.1:50000', type=str, + help='url used to set up pretained training') +parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') +parser.add_argument('--npu', default=None, type=int, + help='NPU id to use.') +#modelarts +parser.add_argument('--modelarts_mod', action='store_true', default=False, + help='Enable modelarts mode loss function to train') +parser.add_argument('--train_url', + default="/cache/training", + type=str, + help="setting dir of training output") +parser.add_argument('--pretrained_weight', default='', type=str, metavar='PATH', + help='path to pretrained weight') +parser.add_argument('--onnx', default=True, action='store_true', + help="convert pth model to onnx") +parser.add_argument('--data_url', + type=str, + default='/cache/data_url', + help='the training data') + +CACHE_TRAINING_URL = "/cache/training" +def _parse_args(): + # Do we have a config file to parse? + args_config, remaining = config_parser.parse_known_args() + if args_config.config: + with open(args_config.config, 'r') as f: + cfg = yaml.safe_load(f) + parser.set_defaults(**cfg) + # The main arg parser parses the rest of the args, the usual + # defaults will have been overridden if config file specified. + args = parser.parse_args(remaining) + print(args) + # Cache the args as a text string to save them in the output dir later + args_text = yaml.safe_dump(args.__dict__, default_flow_style=False) + return args, args_text + + +def main(): + setup_default_logging() + args, args_text = _parse_args() + + args.prefetcher = not args.no_prefetcher + args.distributed = False + if args.npu is None: + args.npu = 0 + CALCULATE_DEVICE = "npu:{}".format(args.npu) + torch.npu.set_device(CALCULATE_DEVICE) + print("use ", CALCULATE_DEVICE) + + if 'WORLD_SIZE' in os.environ: + args.distributed = int(os.environ['WORLD_SIZE']) > 1 + args.device = 'npu:0' + args.world_size = 1 + args.rank = 0 # global rank + _logger.info('Training with a single process on 1 NPUs.') + assert args.rank >= 0 + + # resolve AMP arguments based on PyTorch / Apex availability + use_amp = None + if args.amp: + # for backwards compat, `--amp` arg tries apex before native amp + if has_apex: + args.apex_amp = True + elif has_native_amp: + args.native_amp = True + if args.apex_amp and has_apex: + use_amp = 'apex' + elif args.native_amp and has_native_amp: + use_amp = 'native' + elif args.apex_amp or args.native_amp: + _logger.warning("Neither APEX or native Torch AMP is available, using float32. " + "Install NVIDA apex or upgrade to PyTorch 1.6") + + torch.manual_seed(args.seed + args.rank) + model = ghostnet(num_classes=args.num_classes, width=args.width, dropout=args.drop) + + if args.pretrained: + CACHE_MODEL_URL = "/cache/model" + os.makedirs(CACHE_MODEL_URL, exist_ok=True) + mox.file.copy_parallel(args.pretrained_weight, os.path.join(CACHE_MODEL_URL, "model_best.pth.tar")) + pretrained_weight = os.path.join(CACHE_MODEL_URL, "model_best.pth.tar") + pretrained_dict = torch.load(pretrained_weight)["state_dict"] + pretrained_model = {k.replace('module.', ''): v for k, v in pretrained_dict.items()} + if "classifier.weight" in pretrained_model: + pretrained_model.pop('classifier.weight') + pretrained_model.pop('classifier.bias') + model.load_state_dict(pretrained_model, strict=False) + + for param in model.parameters(): + param.requires_grad = False + + for param in model.classifier.parameters(): + param.requires_grad = True + + if args.num_classes is None: + assert hasattr(model, 'num_classes'), 'Model must have `num_classes` attr if not set on cmd line/config.' + args.num_classes = model.num_classes # FIXME handle model default vs config num_classes more elegantly + + if args.local_rank == 0: + _logger.info('Model %s created, param count: %d' % + (args.model, sum([m.numel() for m in model.parameters()]))) + + data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0) + + # setup augmentation batch splits for contrastive loss or split bn + num_aug_splits = 0 + if args.aug_splits > 0: + assert args.aug_splits > 1, 'A split of 1 makes no sense' + num_aug_splits = args.aug_splits + + # enable split bn (separate bn stats per batch-portion) + if args.split_bn: + assert num_aug_splits > 1 or args.resplit + model = convert_splitbn_model(model, max(num_aug_splits, 2)) + + # move model to GPU, enable channels last layout if set + model = model.to(CALCULATE_DEVICE) + if args.channels_last: + model = model.to(memory_format=torch.channels_last) + + if args.torchscript: + assert not use_amp == 'apex', 'Cannot use APEX AMP with torchscripted model' + assert not args.sync_bn, 'Cannot use SyncBatchNorm with torchscripted model' + model = torch.jit.script(model) + + optimizer = create_optimizer(args, model) + + # setup automatic mixed-precision (AMP) loss scaling and op casting + amp_autocast = suppress # do nothing + loss_scaler = None + if use_amp == 'apex': + model, optimizer = amp.initialize(model, optimizer, opt_level='O2', loss_scale=128) + loss_scaler = ApexScaler() + if args.local_rank == 0: + _logger.info('Using NVIDIA APEX AMP. Training in mixed precision.') + elif use_amp == 'native': + amp_autocast = torch.cuda.amp.autocast + loss_scaler = NativeScaler() + if args.local_rank == 0: + _logger.info('Using native Torch AMP. Training in mixed precision.') + else: + if args.local_rank == 0: + _logger.info('AMP not enabled. Training in float32.') + + # optionally resume from a checkpoint + resume_epoch = None + if args.resume: + resume_epoch = resume_checkpoint( + model, args.resume, + optimizer=None if args.no_resume_opt else optimizer, + loss_scaler=None if args.no_resume_opt else loss_scaler, + log_info=args.local_rank == 0) + + # setup exponential moving average of model weights, SWA could be used here too + model_ema = None + # setup learning rate schedule and starting epoch + lr_scheduler, num_epochs = create_scheduler(args, optimizer) + start_epoch = 0 + if args.start_epoch is not None: + # a specified start_epoch will always override the resume epoch + start_epoch = args.start_epoch + elif resume_epoch is not None: + start_epoch = resume_epoch + if lr_scheduler is not None and start_epoch > 0: + lr_scheduler.step(start_epoch) + + if args.local_rank == 0: + _logger.info('Scheduled epochs: {}'.format(num_epochs)) + + # create the train and eval datasets + real_path = '/cache/data_url' + if not os.path.exists(real_path): + os.makedirs(real_path) + mox.file.copy_parallel(args.data_url, real_path) + print("training data finish copy to %s." % real_path) + dataset_train = create_dataset( + args.dataset, root=real_path, split=args.train_split, is_training=True, batch_size=args.batch_size) + dataset_eval = create_dataset( + args.dataset, root=real_path, split=args.val_split, is_training=False, batch_size=args.batch_size) + # setup mixup / cutmix + collate_fn = None + mixup_fn = None + mixup_active = args.mixup > 0 or args.cutmix > 0. or args.cutmix_minmax is not None + if mixup_active: + mixup_args = dict( + mixup_alpha=args.mixup, cutmix_alpha=args.cutmix, cutmix_minmax=args.cutmix_minmax, + prob=args.mixup_prob, switch_prob=args.mixup_switch_prob, mode=args.mixup_mode, + label_smoothing=args.smoothing, num_classes=args.num_classes) + if args.prefetcher: + assert not num_aug_splits # collate conflict (need to support deinterleaving in collate mixup) + collate_fn = FastCollateMixup(**mixup_args) + else: + mixup_fn = Mixup(**mixup_args) + + # wrap dataset in AugMix helper + if num_aug_splits > 1: + dataset_train = AugMixDataset(dataset_train, num_splits=num_aug_splits) + + # create data loaders w/ augmentation pipeiine + train_interpolation = args.train_interpolation + if args.no_aug or not train_interpolation: + train_interpolation = data_config['interpolation'] + loader_train = create_loader( + dataset_train, + input_size=data_config['input_size'], + batch_size=args.batch_size, + is_training=True, + use_prefetcher=not args.no_prefetcher, + no_aug=args.no_aug, + re_count=args.recount, + re_split=args.resplit, + scale=args.scale, + ratio=args.ratio, + hflip=args.hflip, + vflip=args.vflip, + color_jitter=args.color_jitter, + num_aug_splits=num_aug_splits, + interpolation=train_interpolation, + mean=data_config['mean'], + std=data_config['std'], + num_workers=args.workers, + collate_fn=collate_fn, + pin_memory=args.pin_mem, + use_multi_epochs_loader=args.use_multi_epochs_loader + ) + + loader_eval = create_loader( + dataset_eval, + input_size=data_config['input_size'], + batch_size=args.validation_batch_size_multiplier * args.batch_size, + is_training=False, + use_prefetcher=not args.no_prefetcher, + interpolation=data_config['interpolation'], + mean=data_config['mean'], + std=data_config['std'], + num_workers=args.workers, + crop_pct=data_config['crop_pct'], + pin_memory=args.pin_mem, + ) + + # setup loss function + if args.jsd: + #assert num_aug_splits > 1 # JSD only valid with aug splits set + train_loss_fn = JsdCrossEntropy(num_splits=num_aug_splits, smoothing=args.smoothing).to(CALCULATE_DEVICE) + elif mixup_active: + # smoothing is handled with mixup target transform + train_loss_fn = SoftTargetCrossEntropy().to(CALCULATE_DEVICE) + elif args.smoothing: + train_loss_fn = LabelSmoothingCrossEntropy(smoothing=args.smoothing).to(CALCULATE_DEVICE) + else: + train_loss_fn = nn.CrossEntropyLoss().to(CALCULATE_DEVICE) + if args.modelarts_mod: + train_loss_fn = nn.CrossEntropyLoss().to(CALCULATE_DEVICE) + + validate_loss_fn = nn.CrossEntropyLoss().to(CALCULATE_DEVICE) + # setup checkpoint saver and eval metric tracking + eval_metric = args.eval_metric + best_metric = None + best_epoch = None + saver = None + output_dir = '' + if args.local_rank == 0: + output_base = args.output if args.output else './output' + exp_name = '-'.join([ + datetime.now().strftime("%Y%m%d-%H%M%S"), + args.model, + str(data_config['input_size'][-1]) + ]) + output_dir = get_outdir(output_base, 'train', exp_name) + decreasing = True if eval_metric == 'loss' else False + saver = CheckpointSaver( + model=model, optimizer=optimizer, args=args, amp_scaler=loss_scaler, + checkpoint_dir=output_dir, recovery_dir=output_dir, decreasing=decreasing, max_history=args.checkpoint_hist) + + with open(os.path.join(output_dir, 'args.yaml'), 'w') as f: + f.write(args_text) + + try: + for epoch in range(start_epoch, num_epochs): + + train_metrics = train_one_epoch( + epoch, model, loader_train, optimizer, train_loss_fn, args, + lr_scheduler=lr_scheduler, saver=saver, output_dir=output_dir, + amp_autocast=amp_autocast, loss_scaler=loss_scaler, mixup_fn=mixup_fn) + + eval_metrics = validate(model, loader_eval, validate_loss_fn, args, amp_autocast=amp_autocast) + + if lr_scheduler is not None: + # step LR for next epoch + lr_scheduler.step(epoch + 1, eval_metrics[eval_metric]) + + update_summary( + epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'), + write_header=best_metric is None) + + if saver is not None: + # save proper checkpoint with eval metric + save_metric = eval_metrics[eval_metric] + best_metric, best_epoch = saver.save_checkpoint(epoch, metric=save_metric) + + except KeyboardInterrupt: + pass + if best_metric is not None: + _logger.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch)) + + if args.onnx: + os.makedirs(CACHE_TRAINING_URL, exist_ok=True) + print("abspath:",os.path.abspath(output_dir)) + print("output_dir:",os.listdir(output_dir)) + shutil.copy(os.path.join(os.path.abspath(output_dir), 'model_best.pth.tar'), CACHE_TRAINING_URL) + pth_pattern = os.path.join(CACHE_TRAINING_URL, 'model_best.pth.tar') + print("pth_pattern:",os.path.abspath(pth_pattern)) + pth_file_list = glob.glob(pth_pattern) + if not pth_file_list: + print(f"can't find pth {pth_pattern}") + pth_file = pth_file_list[0] + onnx_path = pth_file.split(".")[0] + '.onnx' + convert(pth_file, onnx_path) + # --------------modelarts modification---------- + mox.file.copy_parallel(CACHE_TRAINING_URL, args.train_url) + # --------------modelarts modification end---------- + print("CACHE_TRAINING_URL:",os.listdir(CACHE_TRAINING_URL)) + +def train_one_epoch( + epoch, model, loader, optimizer, loss_fn, args, + lr_scheduler=None, saver=None, output_dir='', amp_autocast=suppress, + loss_scaler=None, mixup_fn=None): + if args.mixup_off_epoch and epoch >= args.mixup_off_epoch: + if args.prefetcher and loader.mixup_enabled: + loader.mixup_enabled = False + elif mixup_fn is not None: + mixup_fn.mixup_enabled = False + + second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order + batch_time_m = AverageMeter() + data_time_m = AverageMeter() + losses_m = AverageMeter(start_count_index=0) + + model.train() + + end = time.time() + last_idx = len(loader) - 1 + num_updates = epoch * len(loader) + for batch_idx, (input, target) in enumerate(loader): + last_batch = batch_idx == last_idx + data_time_m.update(time.time() - end) + if not args.prefetcher: + input, target = input.npu(), target.npu() + if mixup_fn is not None: + input, target = mixup_fn(input, target) + if args.channels_last: + input = input.contiguous(memory_format=torch.channels_last) + + with amp_autocast(): + output = model(input) + target = target.to(torch.int32) + loss = loss_fn(output, target) + + losses_m.update(loss.item(), input.size(0)) + + optimizer.zero_grad() + if loss_scaler is not None: + loss_scaler( + loss, optimizer, clip_grad=args.clip_grad, parameters=model.parameters(), create_graph=second_order) + else: + loss.backward(create_graph=second_order) + if args.clip_grad is not None: + torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad) + optimizer.step() + + num_updates += 1 + batch_time_m.update(time.time() - end) + if last_batch or batch_idx % args.log_interval == 0: + lrl = [param_group['lr'] for param_group in optimizer.param_groups] + lr = sum(lrl) / len(lrl) + + if args.local_rank == 0: + if batch_time_m.avg > 0: + _logger.info( + 'Train: {} [{:>4d}/{} ({:>3.0f}%)] ' + 'Loss: {loss.val:>9.6f} ({loss.avg:>6.4f}) ' + 'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s ' + '({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' + 'LR: {lr:.3e} ' + 'Data: {data_time.val:.3f} ({data_time.avg:.3f})' + 'fps: {fps:.3f} ' + 'Batch_Size:{batch_size:.1f} '.format( + epoch, + batch_idx, len(loader), + 100. * batch_idx / last_idx, + loss=losses_m, + batch_time=batch_time_m, + rate=input.size(0) * args.world_size / batch_time_m.val, + rate_avg=input.size(0) * args.world_size / batch_time_m.avg, + lr=lr, + data_time=data_time_m, + fps=args.batch_size / batch_time_m.avg, + batch_size=args.batch_size)) + + if args.save_images and output_dir: + torchvision.utils.save_image( + input, + os.path.join(output_dir, 'train-batch-%d.jpg' % batch_idx), + padding=0, + normalize=True) + + if saver is not None and args.recovery_interval and ( + last_batch or (batch_idx + 1) % args.recovery_interval == 0): + saver.save_recovery(epoch, batch_idx=batch_idx) + + if lr_scheduler is not None: + lr_scheduler.step_update(num_updates=num_updates, metric=losses_m.avg) + + end = time.time() + # end for + + if hasattr(optimizer, 'sync_lookahead'): + optimizer.sync_lookahead() + + return OrderedDict([('loss', losses_m.avg)]) + + +def validate(model, loader, loss_fn, args, amp_autocast=suppress, log_suffix=''): + batch_time_m = AverageMeter() + losses_m = AverageMeter(start_count_index=0) + top1_m = AverageMeter(start_count_index=0) + top5_m = AverageMeter(start_count_index=0) + model.eval() + + end = time.time() + last_idx = len(loader) - 1 + with torch.no_grad(): + for batch_idx, (input, target) in enumerate(loader): + last_batch = batch_idx == last_idx + if args.no_prefetcher: + input = input.npu() + target = target.npu() + if args.channels_last: + input = input.contiguous(memory_format=torch.channels_last) + + with amp_autocast(): + output = model(input) + if isinstance(output, (tuple, list)): + output = output[0] + + # augmentation reduction + reduce_factor = args.tta + if reduce_factor > 1: + output = output.unfold(0, reduce_factor, reduce_factor).mean(dim=2) + target = target[0:target.size(0):reduce_factor] + + target = target.to(torch.int32) + loss = loss_fn(output, target) + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + + reduced_loss = loss.data + + losses_m.update(reduced_loss.item(), input.size(0)) + top1_m.update(acc1.item(), output.size(0)) + top5_m.update(acc5.item(), output.size(0)) + + batch_time_m.update(time.time() - end) + end = time.time() + if args.local_rank == 0 and (last_batch or batch_idx % args.log_interval == 0): + if batch_time_m.avg > 0: + log_name = 'Test' + log_suffix + _logger.info( + '{0}: [{1:>4d}/{2}] ' + 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' + 'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) ' + 'Acc@1: {top1.val:>7.4f} ({top1.avg:>7.4f}) ' + 'Acc@5: {top5.val:>7.4f} ({top5.avg:>7.4f})' + 'fps: {fps:.3f} ' + 'Batch_Size:{batch_size:.1f} '.format( + log_name, batch_idx, last_idx, batch_time=batch_time_m, + loss=losses_m, top1=top1_m, top5=top5_m, fps=args.batch_size / batch_time_m.avg, + batch_size=args.batch_size)) + + metrics = OrderedDict([('loss', losses_m.avg), ('top1', top1_m.avg), ('top5', top5_m.avg)]) + + return metrics + +def proc_node_module(checkpoint, attr_name): + new_model_state = OrderedDict() + for k, v in checkpoint[attr_name].items(): + if(k[0: 7] == "module."): + name = k[7:] + else: + name = k[0:] + new_model_state[name] = v + return new_model_state + +def convert(pth_file_path, onnx_file_path): + args, args_text = _parse_args() + checkpoint = torch.load(pth_file_path, map_location='cpu') + checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') + model = ghostnet(num_classes=args.num_classes, width=args.width, dropout=args.drop) + model.load_state_dict(checkpoint['state_dict'], False) + model.eval() + + input_names = ["image"] + output_names = ["output1"] + dummy_input = torch.randn(2, 3, 224, 224) + torch.onnx.export(model, dummy_input, onnx_file_path, input_names=input_names, output_names=output_names, opset_version=11) + +if __name__ == '__main__': + main() + diff --git a/PyTorch/contrib/cv/classification/GhostNet/pthtar2onx.py b/PyTorch/contrib/cv/classification/GhostNet/pthtar2onx.py index bada64a75e5dc83e8a519293c46053c60c009c84..5d6c9459ad130b15514084e23def49b2aa70c06b 100644 --- a/PyTorch/contrib/cv/classification/GhostNet/pthtar2onx.py +++ b/PyTorch/contrib/cv/classification/GhostNet/pthtar2onx.py @@ -1,47 +1,47 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch -import torch.onnx - -from collections import OrderedDict -from ghostnet.ghostnet_pytorch.ghostnet import ghostnet - -def proc_nodes_module(checkpoint, AttrName): - new_state_dict = OrderedDict() - for k, v in checkpoint[AttrName].items(): - if (k[0:7] == "module."): - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - -def convert(): - checkpoint = torch.load("model_best.pth.tar", map_location='cpu') - checkpoint['state_dict'] = proc_nodes_module(checkpoint, 'state_dict') - - model = ghostnet() - model.load_state_dict(checkpoint['state_dict'], strict=False) - model.eval() - - input_names = ["image"] - output_names = ["output1"] - dummy_input = torch.randn(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, "ghostnet_b.onnx", input_names=input_names, output_names=output_names, opset_version=11) - - -if __name__ == "__main__": - convert() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch +import torch.onnx + +from collections import OrderedDict +from ghostnet.ghostnet_pytorch.ghostnet import ghostnet + +def proc_nodes_module(checkpoint, AttrName): + new_state_dict = OrderedDict() + for k, v in checkpoint[AttrName].items(): + if (k[0:7] == "module."): + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + +def convert(): + checkpoint = torch.load("model_best.pth.tar", map_location='cpu') + checkpoint['state_dict'] = proc_nodes_module(checkpoint, 'state_dict') + + model = ghostnet() + model.load_state_dict(checkpoint['state_dict'], strict=False) + model.eval() + + input_names = ["image"] + output_names = ["output1"] + dummy_input = torch.randn(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, "ghostnet_b.onnx", input_names=input_names, output_names=output_names, opset_version=11) + + +if __name__ == "__main__": + convert() diff --git a/PyTorch/contrib/cv/classification/GhostNet/requirements.txt b/PyTorch/contrib/cv/classification/GhostNet/requirements.txt index 30a76bac06b0edf95756ac1d337bbd951967cf40..2d29a27ced7a4e86323fada114fb2dfa7157dd2a 100644 --- a/PyTorch/contrib/cv/classification/GhostNet/requirements.txt +++ b/PyTorch/contrib/cv/classification/GhostNet/requirements.txt @@ -1,3 +1,3 @@ -torch>=1.4.0 -torchvision>=0.5.0 -pyyaml +torch>=1.4.0 +torchvision>=0.5.0 +pyyaml diff --git a/PyTorch/contrib/cv/classification/GhostNet/setup.cfg b/PyTorch/contrib/cv/classification/GhostNet/setup.cfg index b1ddcb2b793f98f4995bcb47b130f1c5d8a69f7f..6289c6c3a16da8a53297b8aa7b59a85277b5116e 100644 --- a/PyTorch/contrib/cv/classification/GhostNet/setup.cfg +++ b/PyTorch/contrib/cv/classification/GhostNet/setup.cfg @@ -1,5 +1,5 @@ -[dist_conda] - -conda_name_differences = 'torch:pytorch' -channels = pytorch -noarch = True +[dist_conda] + +conda_name_differences = 'torch:pytorch' +channels = pytorch +noarch = True diff --git a/PyTorch/contrib/cv/classification/GhostNet/setup.py b/PyTorch/contrib/cv/classification/GhostNet/setup.py index d0a9cfba3c9b18aad53a3a648c3c3af3c95e791b..882ed467a35a73aa99239d50eedea9c6cb7ce2fb 100644 --- a/PyTorch/contrib/cv/classification/GhostNet/setup.py +++ b/PyTorch/contrib/cv/classification/GhostNet/setup.py @@ -1,48 +1,48 @@ -""" Setup -""" -from setuptools import setup, find_packages -from codecs import open -from os import path - -here = path.abspath(path.dirname(__file__)) - -# Get the long description from the README file -with open(path.join(here, 'README.md'), encoding='utf-8') as f: - long_description = f.read() - -exec(open('timm/version.py').read()) -setup( - name='timm', - version=__version__, - description='(Unofficial) PyTorch Image Models', - long_description=long_description, - long_description_content_type='text/markdown', - url='https://github.com/rwightman/pytorch-image-models', - author='Ross Wightman', - author_email='hello@rwightman.com', - classifiers=[ - # How mature is this project? Common values are - # 3 - Alpha - # 4 - Beta - # 5 - Production/Stable - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'Topic :: Software Development', - 'Topic :: Software Development :: Libraries', - 'Topic :: Software Development :: Libraries :: Python Modules', - ], - - # Note that this is a string of words separated by whitespace, not a list. - keywords='pytorch pretrained models efficientnet mobilenetv3 mnasnet', - packages=find_packages(exclude=['convert', 'tests', 'results']), - include_package_data=True, - install_requires=['torch >= 1.4', 'torchvision'], - python_requires='>=3.6', -) +""" Setup +""" +from setuptools import setup, find_packages +from codecs import open +from os import path + +here = path.abspath(path.dirname(__file__)) + +# Get the long description from the README file +with open(path.join(here, 'README.md'), encoding='utf-8') as f: + long_description = f.read() + +exec(open('timm/version.py').read()) +setup( + name='timm', + version=__version__, + description='(Unofficial) PyTorch Image Models', + long_description=long_description, + long_description_content_type='text/markdown', + url='https://github.com/rwightman/pytorch-image-models', + author='Ross Wightman', + author_email='hello@rwightman.com', + classifiers=[ + # How mature is this project? Common values are + # 3 - Alpha + # 4 - Beta + # 5 - Production/Stable + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Education', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Software Development', + 'Topic :: Software Development :: Libraries', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + + # Note that this is a string of words separated by whitespace, not a list. + keywords='pytorch pretrained models efficientnet mobilenetv3 mnasnet', + packages=find_packages(exclude=['convert', 'tests', 'results']), + include_package_data=True, + install_requires=['torch >= 1.4', 'torchvision'], + python_requires='>=3.6', +) diff --git a/PyTorch/contrib/cv/classification/GhostNet/timm/loss/asymmetric_loss.py b/PyTorch/contrib/cv/classification/GhostNet/timm/loss/asymmetric_loss.py index 96a977882b9fa534990bfb1c8321e4c822c602ca..a8b10f9c797c2cb3b2652302717b592dada216f3 100644 --- a/PyTorch/contrib/cv/classification/GhostNet/timm/loss/asymmetric_loss.py +++ b/PyTorch/contrib/cv/classification/GhostNet/timm/loss/asymmetric_loss.py @@ -1,97 +1,97 @@ -import torch -import torch.nn as nn - - -class AsymmetricLossMultiLabel(nn.Module): - def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=False): - super(AsymmetricLossMultiLabel, self).__init__() - - self.gamma_neg = gamma_neg - self.gamma_pos = gamma_pos - self.clip = clip - self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss - self.eps = eps - - def forward(self, x, y): - """" - Parameters - ---------- - x: input logits - y: targets (multi-label binarized vector) - """ - - # Calculating Probabilities - x_sigmoid = torch.sigmoid(x) - xs_pos = x_sigmoid - xs_neg = 1 - x_sigmoid - - # Asymmetric Clipping - if self.clip is not None and self.clip > 0: - xs_neg = (xs_neg + self.clip).clamp(max=1) - - # Basic CE calculation - los_pos = y * torch.log(xs_pos.clamp(min=self.eps)) - los_neg = (1 - y) * torch.log(xs_neg.clamp(min=self.eps)) - loss = los_pos + los_neg - - # Asymmetric Focusing - if self.gamma_neg > 0 or self.gamma_pos > 0: - if self.disable_torch_grad_focal_loss: - torch._C.set_grad_enabled(False) - pt0 = xs_pos * y - pt1 = xs_neg * (1 - y) # pt = p if t > 0 else 1-p - pt = pt0 + pt1 - one_sided_gamma = self.gamma_pos * y + self.gamma_neg * (1 - y) - one_sided_w = torch.pow(1 - pt, one_sided_gamma) - if self.disable_torch_grad_focal_loss: - torch._C.set_grad_enabled(True) - loss *= one_sided_w - - return -loss.sum() - - -class AsymmetricLossSingleLabel(nn.Module): - def __init__(self, gamma_pos=1, gamma_neg=4, eps: float = 0.1, reduction='mean'): - super(AsymmetricLossSingleLabel, self).__init__() - - self.eps = eps - self.logsoftmax = nn.LogSoftmax(dim=-1) - self.targets_classes = [] # prevent gpu repeated memory allocation - self.gamma_pos = gamma_pos - self.gamma_neg = gamma_neg - self.reduction = reduction - - def forward(self, inputs, target, reduction=None): - """" - Parameters - ---------- - x: input logits - y: targets (1-hot vector) - """ - - num_classes = inputs.size()[-1] - log_preds = self.logsoftmax(inputs) - self.targets_classes = torch.zeros_like(inputs).scatter_(1, target.long().unsqueeze(1), 1) - - # ASL weights - targets = self.targets_classes - anti_targets = 1 - targets - xs_pos = torch.exp(log_preds) - xs_neg = 1 - xs_pos - xs_pos = xs_pos * targets - xs_neg = xs_neg * anti_targets - asymmetric_w = torch.pow(1 - xs_pos - xs_neg, - self.gamma_pos * targets + self.gamma_neg * anti_targets) - log_preds = log_preds * asymmetric_w - - if self.eps > 0: # label smoothing - self.targets_classes.mul_(1 - self.eps).add_(self.eps / num_classes) - - # loss calculation - loss = - self.targets_classes.mul(log_preds) - - loss = loss.sum(dim=-1) - if self.reduction == 'mean': - loss = loss.mean() - - return loss +import torch +import torch.nn as nn + + +class AsymmetricLossMultiLabel(nn.Module): + def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=False): + super(AsymmetricLossMultiLabel, self).__init__() + + self.gamma_neg = gamma_neg + self.gamma_pos = gamma_pos + self.clip = clip + self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss + self.eps = eps + + def forward(self, x, y): + """" + Parameters + ---------- + x: input logits + y: targets (multi-label binarized vector) + """ + + # Calculating Probabilities + x_sigmoid = torch.sigmoid(x) + xs_pos = x_sigmoid + xs_neg = 1 - x_sigmoid + + # Asymmetric Clipping + if self.clip is not None and self.clip > 0: + xs_neg = (xs_neg + self.clip).clamp(max=1) + + # Basic CE calculation + los_pos = y * torch.log(xs_pos.clamp(min=self.eps)) + los_neg = (1 - y) * torch.log(xs_neg.clamp(min=self.eps)) + loss = los_pos + los_neg + + # Asymmetric Focusing + if self.gamma_neg > 0 or self.gamma_pos > 0: + if self.disable_torch_grad_focal_loss: + torch._C.set_grad_enabled(False) + pt0 = xs_pos * y + pt1 = xs_neg * (1 - y) # pt = p if t > 0 else 1-p + pt = pt0 + pt1 + one_sided_gamma = self.gamma_pos * y + self.gamma_neg * (1 - y) + one_sided_w = torch.pow(1 - pt, one_sided_gamma) + if self.disable_torch_grad_focal_loss: + torch._C.set_grad_enabled(True) + loss *= one_sided_w + + return -loss.sum() + + +class AsymmetricLossSingleLabel(nn.Module): + def __init__(self, gamma_pos=1, gamma_neg=4, eps: float = 0.1, reduction='mean'): + super(AsymmetricLossSingleLabel, self).__init__() + + self.eps = eps + self.logsoftmax = nn.LogSoftmax(dim=-1) + self.targets_classes = [] # prevent gpu repeated memory allocation + self.gamma_pos = gamma_pos + self.gamma_neg = gamma_neg + self.reduction = reduction + + def forward(self, inputs, target, reduction=None): + """" + Parameters + ---------- + x: input logits + y: targets (1-hot vector) + """ + + num_classes = inputs.size()[-1] + log_preds = self.logsoftmax(inputs) + self.targets_classes = torch.zeros_like(inputs).scatter_(1, target.long().unsqueeze(1), 1) + + # ASL weights + targets = self.targets_classes + anti_targets = 1 - targets + xs_pos = torch.exp(log_preds) + xs_neg = 1 - xs_pos + xs_pos = xs_pos * targets + xs_neg = xs_neg * anti_targets + asymmetric_w = torch.pow(1 - xs_pos - xs_neg, + self.gamma_pos * targets + self.gamma_neg * anti_targets) + log_preds = log_preds * asymmetric_w + + if self.eps > 0: # label smoothing + self.targets_classes.mul_(1 - self.eps).add_(self.eps / num_classes) + + # loss calculation + loss = - self.targets_classes.mul(log_preds) + + loss = loss.sum(dim=-1) + if self.reduction == 'mean': + loss = loss.mean() + + return loss diff --git a/PyTorch/contrib/cv/classification/GoogleNet_ID1623_for_PyTorch/modelzoo_level.txt b/PyTorch/contrib/cv/classification/GoogleNet_ID1623_for_PyTorch/modelzoo_level.txt index 484664c2399ae4109859a67aba6cb9facff03cf1..55a9add9fa74832ca908108d73946cd76281a9cd 100644 --- a/PyTorch/contrib/cv/classification/GoogleNet_ID1623_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/GoogleNet_ID1623_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/InceptionV4_ID1778_for_PyTorch/infer/README.md b/PyTorch/contrib/cv/classification/InceptionV4_ID1778_for_PyTorch/infer/README.md index 1d61885fec1287177de1df7f2d84e75182a91c35..543986bf797a9f4d50dd45e2bc998373210c03f3 100644 --- a/PyTorch/contrib/cv/classification/InceptionV4_ID1778_for_PyTorch/infer/README.md +++ b/PyTorch/contrib/cv/classification/InceptionV4_ID1778_for_PyTorch/infer/README.md @@ -1,381 +1,381 @@ -# 交付件基本信息 - -**发布者(Publisher)**:Huawei - -**应用领域(Application Domain)**:Image Classification - -**版本(Version)**:1.2 - -**修改时间(Modified)**:2021.09.10 - -**大小(Size)**:onnx/163MB \(om\) - -**框架(Framework)**:PyTorch (1.5.0) - -**模型格式(Model Format)**:pth.tar/onnx/om - -**精度(Precision)**:O2(训练)、FP16(推理) - -**处理器(Processor)**:昇腾910/昇腾310 - -**应用级别(Categories)**:Released - -**描述(Description):**基于Pytorch框架的InceptionV4图像分类网络模型训练并保存模型,通过ATC工具转换,可在昇腾AI设备上运行 - -**CANN 版本**: CANN 5.0.2 B058 - -**Python 版本**: Python 3.7.5 - -**操作系统版本**: Ubuntu 18.04 - -# 推理 - -### 操作步骤 - -1. 单击“下载模型脚本”和“下载模型”,下载所需软件包。 - - - - ![img](https://r.huaweistatic.com/s/ascendstatic/lst/modelZooImg/public_sys-resources/note_3.0-zh-cn.png) - - - 下载模型脚本:下载训练和推理的脚本文件。 - - 下载模型:下载模型文件。 - - - -2. 将源码上传至推理服务器任意目录并解压(如:“/home/test”)。 - - - - ``` - # 在环境上执行 - unzip InceptionV4_ID1778_for_PyTorch.zip - cd {code_unzip_path}/InceptionV4_ID1778_for_PyTorch/infer - ``` - - ![img](https://r.huaweistatic.com/s/ascendstatic/lst/modelZooImg/public_sys-resources/note_3.0-zh-cn.png) - - - code_unzip_path:代码解压目录,以“/home/test”为例。 - - version:为模型版本。 - - - - - -3. 数据准备。 - - - - 由于后续推理均在容器中进行,因此需要把用于推理的图片、数据集、模型文件、代码等均映射到容器中 - - ``` - /home/data - ├── imagenet_val # 推理图片用于精度测试的数据集 - │ ├── ILSVRC2012_val_00016652.JPEG - │ └── ILSVRC2012_val_00024989.JPEG - ``` - - - - - -4. 文件格式转换 - - ``` - find ./ -name "*.sh" | xargs dos2unix - ``` - - - -5. 启动容器 - - - 进入到代码根目录,执行以下命令,启动容器。 - - **bash scripts/docker_start_infer.sh** *infer_image* *data_path* - - | 参数 | 说明 | - | ------------- | ----------------------------- | - | *infer_image* | 推理镜像名称,根据实际写入。 | - | *data_path* | 数据路径。如:“/home/test/”。 | - - 启动容器时会将推理芯片和数据路径挂载到容器中。 - - ``` - # 切换到代码跟目录下 - cd /home/path/to/InceptionV4_ID1778_for_PyTorch/ - bash scripts/docker_start_infer.sh infer:21.0.1 /home/path/to/path - ``` - -## 模型转换 - -### 前提条件 - -已进入推理容器环境。具体操作请参见“准备容器环境”。 - -### 操作步骤 - -1. 准备onnx模型文件。 - - - - onnx模型为在昇腾910服务器上导出的模型,导出onnx模型的详细步骤请参见“模型训练”。 - - 将模型文件保存在/path/to/InceptionV4_ID1778_for_PyTorch/infer/data 路径下 - - - -2. 执行以下命令,进行模型转换。 - - - - 模型转换时,可选择不同的AIPP配置文件进行转换。转换详细信息可查看转换脚本和对应的AIPP配置文件,转换命令如下。 - - **bash convert/onnx2om.sh** model_path output_model_name - - | 参数 | 说明 | - | ----------------- | ------------------------------------------------- | - | model_path | 转换脚本onnx文件路径。 | - | output_model_name | 生成的OM文件名,转换脚本会在此基础上添加.om后缀。 | - - 转换示例如下所示。 - - ``` - bash convert/onnx2om.sh ./data/InceptionV4_npu_1.onnx data/InceptionV4-pt - ``` - -## mxBase推理 - -### 前提条件 - -已进入推理容器环境。具体操作请参见“准备容器环境”。 - -### 操作步骤 - -1. 配置环境变量。 - - ``` - export ASCEND_HOME=/usr/local/Ascend - export ASCEND_VERSION=nnrt/latest - export ARCH_PATTERN=. - export LD_LIBRARY_PATH=${MX_SDK_HOME}/lib/modelpostprocessors:${LD_LIBRARY_PATH} - ``` - - - -2. 修改配置文件。 - - - - 可根据实际情况修改,配置文件位于“mxbase/src/main.cpp”中,可修改参数如下。 - - 若使用迁移学习生成的模型进行推理,请修改CLASS_NUM为迁移学习训练任务数据集的类别数量。如:修改CLASS_NUM=1001;om的模型路径;labelPath的路径 - - ``` - ... - namespace { - - const uint32_t CLASS_NUM = 1001; // 推理类别总数 - } - int main(int argc, char* argv[]){ - .... - initParam.labelPath = "../data/imagenet1000_clsidx_to_labels.names"; - initParam.topk = 5; - initParam.softmax = false; - initParam.checkTensor = true; - initParam.modelPath = "../data/inception-pt.om"; - .... - } - ... - ``` - - - -3. 编译工程。 - - ``` - cd /home/path/to/InceptionV4_ID1778_for_PyTorch/infer/mxbase - bash build.sh - ``` - -4. 运行推理服务 - - - - **./build/inceptionv4 *image_path* - - | 参数 | 说明 | - | ---------- | ---------------------------------------------- | - | image_path | 推理图片所在文件的路径。如:“../data/images”。 | - - - -5. 观察结果。 - - 分类结果会以*.txt的格式保存在result文件中。 - - ![image-20210929155913474](./image/image-20210929155913474.png) - - - -## MindX SDK 推理 - -### 前提条件 - -已进入推理容器环境。具体操作请参见“准备容器环境”。 - -### 操作步骤 - -1. 修改配置文件。可根据实际情况修改pipeline文件。 - - ``` - vim ../data/inceptionv4_opencv.pipeline - ``` - - 以inceptionv4_opencv.pipeline文件为例,作简要说明。 - - ``` - { - "im_inceptionv4": { - "stream_config": { - "deviceId": "0" # 指定推理卡,要与挂载到容器中的卡一致 - }, - "appsrc0": { - "props": { - "blocksize": "409600" - }, - "factory": "appsrc", - "next": "mxpi_imagedecoder0" - }, - ""mxpi_imagedecoder0"": { - "props": { - "handleMethod": "opencv" - }, - "factory": "mxpi_imagedecoder", - "next": "mxpi_imageresize0" - }, - "mxpi_imageresize0": { - "props": { - "handleMethod": "opencv", - "resizeHeight": "355", - "resizeWidth": "355", - "resizeType": "Resizer_Stretch" - }, - "factory": "mxpi_imageresize", - "next": "mxpi_opencvcentercrop0" - }, - "mxpi_opencvcentercrop0": { - "props": { - "dataSource": "mxpi_imageresize0", - "cropHeight": "299", - "cropWidth": "299" - }, - "factory": "mxpi_opencvcentercrop", - "next": "mxpi_tensorinfer0" - }, - - "mxpi_tensorinfer0": { - "props": { - "dataSource": "mxpi_opencvcentercrop0", - "modelPath": "../data/inceptionV4_pt_cfg.om", #模型存放路径 - "waitingTime": "2000", - "outputDeviceId": "-1" - }, - "factory": "mxpi_tensorinfer", - "next": "mxpi_classpostprocessor0" - }, - "mxpi_classpostprocessor0": { - "props": { - "dataSource": "mxpi_tensorinfer0", - "postProcessConfigPath": "../data/inceptionv4_aipp.cfg", #后处理的配置文件 - "labelPath": "../data/imagenet1000_clsidx_to_labels.names", #标签路径 - "postProcessLibPath": "../../../../mxVision-2.0.2/lib/modelpostprocessors/libresnet50postprocess.so" #后处理模块 - }, - "factory": "mxpi_classpostprocessor", - "next": "mxpi_dataserialize0" - }, - - .... - } - } - ``` - - - -2. 运行推理服务。 - - ``` - cd infer/sdk - bash run.sh /path/to/testImageDir /path/to/saveResultDir - ``` - - setp 1、查看推理结果。 - - 若设置推理结果路径为“infer/sdk/result”,示例如下所示。 - - ``` - root@97a4c6ab6482:/home/path/to/InceptionV4_ID1778_for_PyTorch/infer/sdk/result# ll - -rw-r--r-- 1 root root 21 Aug 24 07:09 ILSVRC2012_val_00042663_1.txt - -rw-r--r-- 1 root root 21 Aug 24 07:09 ILSVRC2012_val_00042820_1.txt - -rw-r--r-- 1 root root 21 Aug 24 07:09 ILSVRC2012_val_00042855_1.txt - -rw-r--r-- 1 root root 21 Aug 24 07:09 ILSVRC2012_val_00043055_1.txt - -rw-r--r-- 1 root root 21 Aug 24 07:09 ILSVRC2012_val_00043439_1.txt - -rw-r--r-- 1 root root 21 Aug 24 07:09 ILSVRC2012_val_00044460_1.txt - ``` - -3. 性能统计 - - step 1、打开性能统计开关。将“enable_ps”参数设置为true,“ps_interval_time”参数设置为6 - - **vi** */home/HwHiAiUser/mxManufacture/config/sdk.conf* - - ``` - # MindX SDK configuration file - - # whether to enable performance statistics, default is false [dynamic config] - - enable_ps=true - ... - ps_interval_time=6 - ... - ``` - - step 2、执行run.sh脚本。 - - ``` - cd infer/sdk - bash run.sh /path/to/testImageDir /path/to/saveResultDir - ``` - - step 3、在日志目录 “/home/HwHiAiUser/mxManufacture/logs/” 查看性能统计结果。 - - ``` - performance—statistics.log.e2e.xxx - performance—statistics.log.plugin.xxx - performance—statistics.log.tpr.xxx - ``` - - 其中e2e日志统计端到端时间,plugin日志统计单插件时间。 - -4. 执行精度测试。 - - 精度结果是在imageNet上进行的,使用classification_task_metric.py 进行测试。修改classification_task_metric.py - -``` -python3.7 classfication_task_metric.py result/ ./val_label.txt . ./result.json" -``` - -参数 1 : prediction file path -参数 2 : ground truth file -参数 3 : result store path -参数 4 : json file name - -查看精度结果。 - -``` -cat result.json -``` - - - -![image-20210929111906059](./image/image-20210929111906059.png) +# 交付件基本信息 + +**发布者(Publisher)**:Huawei + +**应用领域(Application Domain)**:Image Classification + +**版本(Version)**:1.2 + +**修改时间(Modified)**:2021.09.10 + +**大小(Size)**:onnx/163MB \(om\) + +**框架(Framework)**:PyTorch (1.5.0) + +**模型格式(Model Format)**:pth.tar/onnx/om + +**精度(Precision)**:O2(训练)、FP16(推理) + +**处理器(Processor)**:昇腾910/昇腾310 + +**应用级别(Categories)**:Released + +**描述(Description):**基于Pytorch框架的InceptionV4图像分类网络模型训练并保存模型,通过ATC工具转换,可在昇腾AI设备上运行 + +**CANN 版本**: CANN 5.0.2 B058 + +**Python 版本**: Python 3.7.5 + +**操作系统版本**: Ubuntu 18.04 + +# 推理 + +### 操作步骤 + +1. 单击“下载模型脚本”和“下载模型”,下载所需软件包。 + + + + ![img](https://r.huaweistatic.com/s/ascendstatic/lst/modelZooImg/public_sys-resources/note_3.0-zh-cn.png) + + - 下载模型脚本:下载训练和推理的脚本文件。 + - 下载模型:下载模型文件。 + + + +2. 将源码上传至推理服务器任意目录并解压(如:“/home/test”)。 + + + + ``` + # 在环境上执行 + unzip InceptionV4_ID1778_for_PyTorch.zip + cd {code_unzip_path}/InceptionV4_ID1778_for_PyTorch/infer + ``` + + ![img](https://r.huaweistatic.com/s/ascendstatic/lst/modelZooImg/public_sys-resources/note_3.0-zh-cn.png) + + - code_unzip_path:代码解压目录,以“/home/test”为例。 + - version:为模型版本。 + + + + + +3. 数据准备。 + + + + 由于后续推理均在容器中进行,因此需要把用于推理的图片、数据集、模型文件、代码等均映射到容器中 + + ``` + /home/data + ├── imagenet_val # 推理图片用于精度测试的数据集 + │ ├── ILSVRC2012_val_00016652.JPEG + │ └── ILSVRC2012_val_00024989.JPEG + ``` + + + + + +4. 文件格式转换 + + ``` + find ./ -name "*.sh" | xargs dos2unix + ``` + + + +5. 启动容器 + + + 进入到代码根目录,执行以下命令,启动容器。 + + **bash scripts/docker_start_infer.sh** *infer_image* *data_path* + + | 参数 | 说明 | + | ------------- | ----------------------------- | + | *infer_image* | 推理镜像名称,根据实际写入。 | + | *data_path* | 数据路径。如:“/home/test/”。 | + + 启动容器时会将推理芯片和数据路径挂载到容器中。 + + ``` + # 切换到代码跟目录下 + cd /home/path/to/InceptionV4_ID1778_for_PyTorch/ + bash scripts/docker_start_infer.sh infer:21.0.1 /home/path/to/path + ``` + +## 模型转换 + +### 前提条件 + +已进入推理容器环境。具体操作请参见“准备容器环境”。 + +### 操作步骤 + +1. 准备onnx模型文件。 + + + + onnx模型为在昇腾910服务器上导出的模型,导出onnx模型的详细步骤请参见“模型训练”。 + + 将模型文件保存在/path/to/InceptionV4_ID1778_for_PyTorch/infer/data 路径下 + + + +2. 执行以下命令,进行模型转换。 + + + + 模型转换时,可选择不同的AIPP配置文件进行转换。转换详细信息可查看转换脚本和对应的AIPP配置文件,转换命令如下。 + + **bash convert/onnx2om.sh** model_path output_model_name + + | 参数 | 说明 | + | ----------------- | ------------------------------------------------- | + | model_path | 转换脚本onnx文件路径。 | + | output_model_name | 生成的OM文件名,转换脚本会在此基础上添加.om后缀。 | + + 转换示例如下所示。 + + ``` + bash convert/onnx2om.sh ./data/InceptionV4_npu_1.onnx data/InceptionV4-pt + ``` + +## mxBase推理 + +### 前提条件 + +已进入推理容器环境。具体操作请参见“准备容器环境”。 + +### 操作步骤 + +1. 配置环境变量。 + + ``` + export ASCEND_HOME=/usr/local/Ascend + export ASCEND_VERSION=nnrt/latest + export ARCH_PATTERN=. + export LD_LIBRARY_PATH=${MX_SDK_HOME}/lib/modelpostprocessors:${LD_LIBRARY_PATH} + ``` + + + +2. 修改配置文件。 + + + + 可根据实际情况修改,配置文件位于“mxbase/src/main.cpp”中,可修改参数如下。 + + 若使用迁移学习生成的模型进行推理,请修改CLASS_NUM为迁移学习训练任务数据集的类别数量。如:修改CLASS_NUM=1001;om的模型路径;labelPath的路径 + + ``` + ... + namespace { + + const uint32_t CLASS_NUM = 1001; // 推理类别总数 + } + int main(int argc, char* argv[]){ + .... + initParam.labelPath = "../data/imagenet1000_clsidx_to_labels.names"; + initParam.topk = 5; + initParam.softmax = false; + initParam.checkTensor = true; + initParam.modelPath = "../data/inception-pt.om"; + .... + } + ... + ``` + + + +3. 编译工程。 + + ``` + cd /home/path/to/InceptionV4_ID1778_for_PyTorch/infer/mxbase + bash build.sh + ``` + +4. 运行推理服务 + + + + **./build/inceptionv4 *image_path* + + | 参数 | 说明 | + | ---------- | ---------------------------------------------- | + | image_path | 推理图片所在文件的路径。如:“../data/images”。 | + + + +5. 观察结果。 + + 分类结果会以*.txt的格式保存在result文件中。 + + ![image-20210929155913474](./image/image-20210929155913474.png) + + + +## MindX SDK 推理 + +### 前提条件 + +已进入推理容器环境。具体操作请参见“准备容器环境”。 + +### 操作步骤 + +1. 修改配置文件。可根据实际情况修改pipeline文件。 + + ``` + vim ../data/inceptionv4_opencv.pipeline + ``` + + 以inceptionv4_opencv.pipeline文件为例,作简要说明。 + + ``` + { + "im_inceptionv4": { + "stream_config": { + "deviceId": "0" # 指定推理卡,要与挂载到容器中的卡一致 + }, + "appsrc0": { + "props": { + "blocksize": "409600" + }, + "factory": "appsrc", + "next": "mxpi_imagedecoder0" + }, + ""mxpi_imagedecoder0"": { + "props": { + "handleMethod": "opencv" + }, + "factory": "mxpi_imagedecoder", + "next": "mxpi_imageresize0" + }, + "mxpi_imageresize0": { + "props": { + "handleMethod": "opencv", + "resizeHeight": "355", + "resizeWidth": "355", + "resizeType": "Resizer_Stretch" + }, + "factory": "mxpi_imageresize", + "next": "mxpi_opencvcentercrop0" + }, + "mxpi_opencvcentercrop0": { + "props": { + "dataSource": "mxpi_imageresize0", + "cropHeight": "299", + "cropWidth": "299" + }, + "factory": "mxpi_opencvcentercrop", + "next": "mxpi_tensorinfer0" + }, + + "mxpi_tensorinfer0": { + "props": { + "dataSource": "mxpi_opencvcentercrop0", + "modelPath": "../data/inceptionV4_pt_cfg.om", #模型存放路径 + "waitingTime": "2000", + "outputDeviceId": "-1" + }, + "factory": "mxpi_tensorinfer", + "next": "mxpi_classpostprocessor0" + }, + "mxpi_classpostprocessor0": { + "props": { + "dataSource": "mxpi_tensorinfer0", + "postProcessConfigPath": "../data/inceptionv4_aipp.cfg", #后处理的配置文件 + "labelPath": "../data/imagenet1000_clsidx_to_labels.names", #标签路径 + "postProcessLibPath": "../../../../mxVision-2.0.2/lib/modelpostprocessors/libresnet50postprocess.so" #后处理模块 + }, + "factory": "mxpi_classpostprocessor", + "next": "mxpi_dataserialize0" + }, + + .... + } + } + ``` + + + +2. 运行推理服务。 + + ``` + cd infer/sdk + bash run.sh /path/to/testImageDir /path/to/saveResultDir + ``` + + setp 1、查看推理结果。 + + 若设置推理结果路径为“infer/sdk/result”,示例如下所示。 + + ``` + root@97a4c6ab6482:/home/path/to/InceptionV4_ID1778_for_PyTorch/infer/sdk/result# ll + -rw-r--r-- 1 root root 21 Aug 24 07:09 ILSVRC2012_val_00042663_1.txt + -rw-r--r-- 1 root root 21 Aug 24 07:09 ILSVRC2012_val_00042820_1.txt + -rw-r--r-- 1 root root 21 Aug 24 07:09 ILSVRC2012_val_00042855_1.txt + -rw-r--r-- 1 root root 21 Aug 24 07:09 ILSVRC2012_val_00043055_1.txt + -rw-r--r-- 1 root root 21 Aug 24 07:09 ILSVRC2012_val_00043439_1.txt + -rw-r--r-- 1 root root 21 Aug 24 07:09 ILSVRC2012_val_00044460_1.txt + ``` + +3. 性能统计 + + step 1、打开性能统计开关。将“enable_ps”参数设置为true,“ps_interval_time”参数设置为6 + + **vi** */home/HwHiAiUser/mxManufacture/config/sdk.conf* + + ``` + # MindX SDK configuration file + + # whether to enable performance statistics, default is false [dynamic config] + + enable_ps=true + ... + ps_interval_time=6 + ... + ``` + + step 2、执行run.sh脚本。 + + ``` + cd infer/sdk + bash run.sh /path/to/testImageDir /path/to/saveResultDir + ``` + + step 3、在日志目录 “/home/HwHiAiUser/mxManufacture/logs/” 查看性能统计结果。 + + ``` + performance—statistics.log.e2e.xxx + performance—statistics.log.plugin.xxx + performance—statistics.log.tpr.xxx + ``` + + 其中e2e日志统计端到端时间,plugin日志统计单插件时间。 + +4. 执行精度测试。 + + 精度结果是在imageNet上进行的,使用classification_task_metric.py 进行测试。修改classification_task_metric.py + +``` +python3.7 classfication_task_metric.py result/ ./val_label.txt . ./result.json" +``` + +参数 1 : prediction file path +参数 2 : ground truth file +参数 3 : result store path +参数 4 : json file name + +查看精度结果。 + +``` +cat result.json +``` + + + +![image-20210929111906059](./image/image-20210929111906059.png) diff --git a/PyTorch/contrib/cv/classification/InceptionV4_ID1778_for_PyTorch/infer/sdk/classification_task_metric.py b/PyTorch/contrib/cv/classification/InceptionV4_ID1778_for_PyTorch/infer/sdk/classification_task_metric.py index 22385f773a97d0cee63f966357f2a49034ba6239..3987002029518061fc4eaf1777488f10439d8df2 100644 --- a/PyTorch/contrib/cv/classification/InceptionV4_ID1778_for_PyTorch/infer/sdk/classification_task_metric.py +++ b/PyTorch/contrib/cv/classification/InceptionV4_ID1778_for_PyTorch/infer/sdk/classification_task_metric.py @@ -1,188 +1,188 @@ -#coding = utf-8 -#Copyright 2020 Huawei Technologies Co., Ltd -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -#http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - img_name = temp[0].split(".")[0] - img_lab = temp[1] - img_gt_dict[img_name] = img_lab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, cls_ind in enumerate(temp): - data_vec[ind] = np.int32(cls_ind) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - res_cnt = 0 - n_labels = "" - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - - ret = load_statistical_predict_result(filepath) - #print("img_name:{}, filepath:{}, ret:{} ".format(img_name, filepath, ret)) - prediction = ret[0] - n_labels = ret[1] - - gt = img_gt_dict[img_name] - #print("gt:", gt) - if n_labels == 1000: - real_label = int(gt) - elif n_labels == 1001: - real_label = int(gt) + 1 - else: - real_label = int(gt) - 1 - - res_cnt = min(len(prediction), topn) - for i in range(res_cnt): - if str(real_label) == str(int(prediction[i])): - count_hit[i] += 1 - break - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(res_cnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Please enter target file folder | groud truth file | result folder | result json file name, such as" - "Such as: python3.7 classfication_task_metric.py result/ ./val_label.txt . ./result.json") - exit(1) - - if not os.path.exists(folder_davinci_target): - print("target file folder does not exist.") - exit() - - if not os.path.exists(annotation_file_path): - - print("Ground truth file does not exist.") - exit() - - if not os.path.exists(result_json_path): - print("Result folder doesn't exist.") - exit() - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) +#coding = utf-8 +#Copyright 2020 Huawei Technologies Co., Ltd +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + img_name = temp[0].split(".")[0] + img_lab = temp[1] + img_gt_dict[img_name] = img_lab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, cls_ind in enumerate(temp): + data_vec[ind] = np.int32(cls_ind) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + res_cnt = 0 + n_labels = "" + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + + ret = load_statistical_predict_result(filepath) + #print("img_name:{}, filepath:{}, ret:{} ".format(img_name, filepath, ret)) + prediction = ret[0] + n_labels = ret[1] + + gt = img_gt_dict[img_name] + #print("gt:", gt) + if n_labels == 1000: + real_label = int(gt) + elif n_labels == 1001: + real_label = int(gt) + 1 + else: + real_label = int(gt) - 1 + + res_cnt = min(len(prediction), topn) + for i in range(res_cnt): + if str(real_label) == str(int(prediction[i])): + count_hit[i] += 1 + break + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(res_cnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Please enter target file folder | groud truth file | result folder | result json file name, such as" + "Such as: python3.7 classfication_task_metric.py result/ ./val_label.txt . ./result.json") + exit(1) + + if not os.path.exists(folder_davinci_target): + print("target file folder does not exist.") + exit() + + if not os.path.exists(annotation_file_path): + + print("Ground truth file does not exist.") + exit() + + if not os.path.exists(result_json_path): + print("Result folder doesn't exist.") + exit() + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) diff --git a/PyTorch/contrib/cv/classification/MGN/README.md b/PyTorch/contrib/cv/classification/MGN/README.md index 4cb4c8a500c3e7db591323fe5a264ecc5c9e88b1..65b55576c8fed9e263569697817cca6ab430a6e5 100644 --- a/PyTorch/contrib/cv/classification/MGN/README.md +++ b/PyTorch/contrib/cv/classification/MGN/README.md @@ -1,58 +1,58 @@ -# MGN - -This implements training of MGN on the Market-1501 dataset, mainly modified from [GNAYUOHZ/ReID-MGN](https://github.com/GNAYUOHZ/ReID-MGN). - -## MGN Detail - -As of the current date, Ascend-Pytorch is still inefficient for contiguous operations.Therefore, MGN is re-implemented using semantics such as custom OP. - - -## Requirements - -- Install PyTorch ([pytorch.org](http://pytorch.org)) - -- `pip install -r requirements.txt` - -- Download the Market-1501 dataset from https://paperswithcode.com/dataset/market-1501 - - - ~~~shell - unzip Market-1501-v15.09.15.zip - ~~~ - -## Training - -To train a model, run `main.py` with the desired model architecture and the path to the market dataset: - -```bash -# training 1p accuracy -bash test/train_full_1p.sh --data_path=real_data_path - -# training 1p performance -bash test/train_performance_1p.sh --data_path=real_data_path - -# training 8p accuracy -bash test/train_full_8p.sh --data_path=real_data_path - -# training 8p performance -bash test/train_performance_8p.sh --data_path=real_data_path - -# finetune -bash test/train_finetune_1p.sh --data_path=real_data_path --weights=real_weight_path - -# Online inference demo -python demo.py --data_path real_data_path - -# To ONNX -python pthtar2onnx.py -``` - -## MGN training result - - -| | mAP | AMP_Type | Epochs | FPS | -| :----: | :---: | :------: | :----: | :-----: | -| 1p-GPU | - | O2 | 1 | 71.408 | -| 1p-NPU | - | O2 | 1 | 29.408 | -| 8p-GPU | 93.35 | O2 | 500 | 771.818 | -| 8p-NPU | 93.83 | O2 | 500 | 200.024 | - +# MGN + +This implements training of MGN on the Market-1501 dataset, mainly modified from [GNAYUOHZ/ReID-MGN](https://github.com/GNAYUOHZ/ReID-MGN). + +## MGN Detail + +As of the current date, Ascend-Pytorch is still inefficient for contiguous operations.Therefore, MGN is re-implemented using semantics such as custom OP. + + +## Requirements + +- Install PyTorch ([pytorch.org](http://pytorch.org)) + +- `pip install -r requirements.txt` + +- Download the Market-1501 dataset from https://paperswithcode.com/dataset/market-1501 + + - ~~~shell + unzip Market-1501-v15.09.15.zip + ~~~ + +## Training + +To train a model, run `main.py` with the desired model architecture and the path to the market dataset: + +```bash +# training 1p accuracy +bash test/train_full_1p.sh --data_path=real_data_path + +# training 1p performance +bash test/train_performance_1p.sh --data_path=real_data_path + +# training 8p accuracy +bash test/train_full_8p.sh --data_path=real_data_path + +# training 8p performance +bash test/train_performance_8p.sh --data_path=real_data_path + +# finetune +bash test/train_finetune_1p.sh --data_path=real_data_path --weights=real_weight_path + +# Online inference demo +python demo.py --data_path real_data_path + +# To ONNX +python pthtar2onnx.py +``` + +## MGN training result + + +| | mAP | AMP_Type | Epochs | FPS | +| :----: | :---: | :------: | :----: | :-----: | +| 1p-GPU | - | O2 | 1 | 71.408 | +| 1p-NPU | - | O2 | 1 | 29.408 | +| 8p-GPU | 93.35 | O2 | 500 | 771.818 | +| 8p-NPU | 93.83 | O2 | 500 | 200.024 | + diff --git a/PyTorch/contrib/cv/classification/MnasNet/infer/convert/aipp.aipconfig b/PyTorch/contrib/cv/classification/MnasNet/infer/convert/aipp.aipconfig old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MnasNet/infer/convert/onnx2om.sh b/PyTorch/contrib/cv/classification/MnasNet/infer/convert/onnx2om.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MnasNet/infer/mxbase/CMakeLists.txt b/PyTorch/contrib/cv/classification/MnasNet/infer/mxbase/CMakeLists.txt old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MnasNet/infer/mxbase/Mnasnet.cpp b/PyTorch/contrib/cv/classification/MnasNet/infer/mxbase/Mnasnet.cpp old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MnasNet/infer/mxbase/Mnasnet.h b/PyTorch/contrib/cv/classification/MnasNet/infer/mxbase/Mnasnet.h old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MnasNet/infer/mxbase/build.sh b/PyTorch/contrib/cv/classification/MnasNet/infer/mxbase/build.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MnasNet/infer/mxbase/main.cpp b/PyTorch/contrib/cv/classification/MnasNet/infer/mxbase/main.cpp old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MnasNet/infer/mxbase/run.sh b/PyTorch/contrib/cv/classification/MnasNet/infer/mxbase/run.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MnasNet/infer/sdk/main.py b/PyTorch/contrib/cv/classification/MnasNet/infer/sdk/main.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MnasNet/infer/sdk/mnasnet_aipp.cfg b/PyTorch/contrib/cv/classification/MnasNet/infer/sdk/mnasnet_aipp.cfg old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MnasNet/infer/sdk/mnasnet_opencv.pipeline b/PyTorch/contrib/cv/classification/MnasNet/infer/sdk/mnasnet_opencv.pipeline old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MnasNet/infer/sdk/run.sh b/PyTorch/contrib/cv/classification/MnasNet/infer/sdk/run.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MnasNet/infer/util/classification_task_metric.py b/PyTorch/contrib/cv/classification/MnasNet/infer/util/classification_task_metric.py old mode 100755 new mode 100644 index 972b7fa0ecf66c8ce762023c63110f15b07a9fd4..089aee8a40a3c6db2ea5b0eaada954a329b7426a --- a/PyTorch/contrib/cv/classification/MnasNet/infer/util/classification_task_metric.py +++ b/PyTorch/contrib/cv/classification/MnasNet/infer/util/classification_task_metric.py @@ -1,186 +1,186 @@ -# coding = utf-8 -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - img_name = temp[0].split(".")[0] - img_lab = temp[1] - img_gt_dict[img_name] = img_lab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label, in_type, color - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, cls_ind in enumerate(temp): - data_vec[ind] = np.int32(cls_ind) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - res_cnt = 0 - n_labels = "" - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - - ret = load_statistical_predict_result(filepath) - - prediction = ret[0] - n_labels = ret[1] - gt = img_gt_dict[img_name] - if n_labels == 1000: - real_label = int(gt) - elif n_labels == 1001: - real_label = int(gt) + 1 - else: - real_label = int(gt) - 1 - - res_cnt = min(len(prediction), topn) - for i in range(res_cnt): - if str(real_label) == str(int(prediction[i])): - count_hit[i] += 1 - break - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - for i in range(res_cnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Please enter target file folder | groud truth file | result folder | result json file name, such as" - "Such as: python3.7 classfication_task_metric.py result/ ./val_label.txt . ./result.json") - exit(1) - - if not os.path.exists(folder_davinci_target): - print("target file folder does not exist.") - exit() - - if not os.path.exists(annotation_file_path): - - print("Ground truth file does not exist.") - exit() - - if not os.path.exists(result_json_path): - print("Result folder doesn't exist.") - exit() - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - - elapsed = (time.time() - start) +# coding = utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import numpy as np +import time + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def gen_file_name(img_name): + full_name = img_name.split('/')[-1] + index = full_name.rfind('.') + return full_name[:index] + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if (gtfile != LABEL_FILE): + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + ret = gt["image"]["annotations"][0]["category_id"] + img_gt_dict[gen_file_name(gtfile)] = ret + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + temp = line.strip().split(" ") + img_name = temp[0].split(".")[0] + img_lab = temp[1] + img_gt_dict[img_name] = img_lab + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label, in_type, color + """ + with open(filepath, 'r')as f: + data = f.readline() + temp = data.strip().split(" ") + n_label = len(temp) + data_vec = np.zeros((n_label), dtype=np.float32) + in_type = '' + color = '' + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, cls_ind in enumerate(temp): + data_vec[ind] = np.int32(cls_ind) + return data_vec, n_label, in_type, color + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + res_cnt = 0 + n_labels = "" + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + + ret = load_statistical_predict_result(filepath) + + prediction = ret[0] + n_labels = ret[1] + gt = img_gt_dict[img_name] + if n_labels == 1000: + real_label = int(gt) + elif n_labels == 1001: + real_label = int(gt) + 1 + else: + real_label = int(gt) - 1 + + res_cnt = min(len(prediction), topn) + for i in range(res_cnt): + if str(real_label) == str(int(prediction[i])): + count_hit[i] += 1 + break + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + if count == 0: + accuracy = 0 + else: + accuracy = np.cumsum(count_hit) / count + for i in range(res_cnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + start = time.time() + try: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + except IndexError: + print("Please enter target file folder | groud truth file | result folder | result json file name, such as" + "Such as: python3.7 classfication_task_metric.py result/ ./val_label.txt . ./result.json") + exit(1) + + if not os.path.exists(folder_davinci_target): + print("target file folder does not exist.") + exit() + + if not os.path.exists(annotation_file_path): + + print("Ground truth file does not exist.") + exit() + + if not os.path.exists(result_json_path): + print("Result folder doesn't exist.") + exit() + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + + elapsed = (time.time() - start) diff --git a/PyTorch/contrib/cv/classification/MnasNet/modelArts/pth2onnx.py b/PyTorch/contrib/cv/classification/MnasNet/modelArts/pth2onnx.py index 2dece3cb8d3d2e38128ec503a8668ba578147aa6..f86ce4b281b092af31e7c18d31ab6863a2cc6ad4 100644 --- a/PyTorch/contrib/cv/classification/MnasNet/modelArts/pth2onnx.py +++ b/PyTorch/contrib/cv/classification/MnasNet/modelArts/pth2onnx.py @@ -1,98 +1,98 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import random -import shutil -import time -import warnings -import math -import glob -import numpy as np -import sys - -import torch -import torch.npu -import torch.nn as nn -from collections import OrderedDict -import torch.onnx -sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), '../')) -import mnasnet - -# modelarts modification -import moxing as mox - - -CACHE_TRAINING_URL = "/cache/training" -CACHE_MODEL_URL = "/cache/model" - -def proc_node_module(checkpoint, AttrName): - new_state_dict = OrderedDict() - for k, v in checkpoint[AttrName].items(): - if k[0:7] == "module.": - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - - -def convert(pth_file, onnx_path, class_num, train_url, npu): - - loc = 'npu:{}'.format(npu) - checkpoint = torch.load(pth_file, map_location=loc) - - checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') - model = mnasnet.mnasnet1_0(num_classes=class_num) - - model.to(loc) - model.load_state_dict(checkpoint['state_dict']) - model.eval() - input_names = ["actual_input_1"] - output_names = ["output1"] - dummy_input = torch.randn(16, 3, 224, 224) - dummy_input = dummy_input.to(loc, non_blocking=False) - torch.onnx.export(model, dummy_input, onnx_path, input_names=input_names, output_names=output_names, opset_version=11) - mox.file.copy_parallel(onnx_path, train_url + 'model.onnx') - -def convert_pth_to_onnx(config_args): - mox.file.copy_parallel(config_args.is_best_name, os.path.join(CACHE_MODEL_URL, "checkpoint.pth.tar")) - pth_pattern = os.path.join(CACHE_MODEL_URL, 'checkpoint.pth.tar') - pth_file_list = glob.glob(pth_pattern) - if not pth_file_list: - print(f"can't find pth {pth_pattern}") - return - pth_file = pth_file_list[0] - onnx_path = pth_file.split(".")[0] + '.onnx' - convert(pth_file, onnx_path, config_args.class_num, config_args.train_url, config_args.npu) -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') - # modelarts - parser.add_argument('--data_url', metavar='DIR', default='/cache/data_url', help='path to dataset') - parser.add_argument('--train_url', default="/cache/training", - type=str, - help="setting dir of training output") - parser.add_argument('--onnx', default=True, action='store_true', - help="convert pth model to onnx") - parser.add_argument('--class_num', default=1000, type=int, - help='number of class') - parser.add_argument('-a', '--arch', metavar='ARCH', default='mnasnet1_0') - parser.add_argument('--is_best_name', dest='is_best_name', - help=' weight dir') - args = parser.parse_args() - print('===========================') - print(args) - print('===========================') +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import random +import shutil +import time +import warnings +import math +import glob +import numpy as np +import sys + +import torch +import torch.npu +import torch.nn as nn +from collections import OrderedDict +import torch.onnx +sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), '../')) +import mnasnet + +# modelarts modification +import moxing as mox + + +CACHE_TRAINING_URL = "/cache/training" +CACHE_MODEL_URL = "/cache/model" + +def proc_node_module(checkpoint, AttrName): + new_state_dict = OrderedDict() + for k, v in checkpoint[AttrName].items(): + if k[0:7] == "module.": + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + + +def convert(pth_file, onnx_path, class_num, train_url, npu): + + loc = 'npu:{}'.format(npu) + checkpoint = torch.load(pth_file, map_location=loc) + + checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') + model = mnasnet.mnasnet1_0(num_classes=class_num) + + model.to(loc) + model.load_state_dict(checkpoint['state_dict']) + model.eval() + input_names = ["actual_input_1"] + output_names = ["output1"] + dummy_input = torch.randn(16, 3, 224, 224) + dummy_input = dummy_input.to(loc, non_blocking=False) + torch.onnx.export(model, dummy_input, onnx_path, input_names=input_names, output_names=output_names, opset_version=11) + mox.file.copy_parallel(onnx_path, train_url + 'model.onnx') + +def convert_pth_to_onnx(config_args): + mox.file.copy_parallel(config_args.is_best_name, os.path.join(CACHE_MODEL_URL, "checkpoint.pth.tar")) + pth_pattern = os.path.join(CACHE_MODEL_URL, 'checkpoint.pth.tar') + pth_file_list = glob.glob(pth_pattern) + if not pth_file_list: + print(f"can't find pth {pth_pattern}") + return + pth_file = pth_file_list[0] + onnx_path = pth_file.split(".")[0] + '.onnx' + convert(pth_file, onnx_path, config_args.class_num, config_args.train_url, config_args.npu) +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') + # modelarts + parser.add_argument('--data_url', metavar='DIR', default='/cache/data_url', help='path to dataset') + parser.add_argument('--train_url', default="/cache/training", + type=str, + help="setting dir of training output") + parser.add_argument('--onnx', default=True, action='store_true', + help="convert pth model to onnx") + parser.add_argument('--class_num', default=1000, type=int, + help='number of class') + parser.add_argument('-a', '--arch', metavar='ARCH', default='mnasnet1_0') + parser.add_argument('--is_best_name', dest='is_best_name', + help=' weight dir') + args = parser.parse_args() + print('===========================') + print(args) + print('===========================') convert_pth_to_onnx(args) \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/MnasNet/modelArts/train-modelarts.py b/PyTorch/contrib/cv/classification/MnasNet/modelArts/train-modelarts.py index 26ad841ce203b8aaef0dabdfd61f163c9cf31029..9f090b46b41f14c95f67da7b68e15fbf75691ed8 100644 --- a/PyTorch/contrib/cv/classification/MnasNet/modelArts/train-modelarts.py +++ b/PyTorch/contrib/cv/classification/MnasNet/modelArts/train-modelarts.py @@ -1,735 +1,735 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import random -import shutil -import time -import warnings -import math -import glob -import numpy as np -import sys - -import torch -import torch.npu -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.distributed as dist -import torch.nn.functional as F -import torch.optim -import torch.utils.data -import torch.utils.data.distributed -import torchvision.transforms as transforms -import torchvision.datasets as datasets -import torchvision.models as models -from collections import OrderedDict -import torch.onnx -import pth2onnx -sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), '../')) -import mnasnet - -# modelarts modification -import moxing as mox - - -try: - from apex.parallel import DistributedDataParallel as DDP - from apex.fp16_utils import * - from apex import amp, optimizers -except ImportError: - raise ImportError("Please install apex from https://www.github.com/nvidia/apex to run this example.") - - -parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') -parser.add_argument('--data', metavar='DIR', - help='path to dataset') -parser.add_argument('--device', default='npu', type=str, help='npu or gpu') -parser.add_argument('--device-list', default='0,1,2,3,4,5,6,7', type=str, help='device id list') -parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18') -parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', - help='number of data loading workers (default: 4)') -parser.add_argument('--epochs', default=90, type=int, metavar='N', - help='number of total epochs to run') -parser.add_argument('--start-epoch', default=0, type=int, metavar='N', - help='manual epoch number (useful on restarts)') -parser.add_argument('-b', '--batch-size', default=256, type=int, - metavar='N', - help='mini-batch size (default: 256), this is the total ' - 'batch size of all NPUs on the current node when ' - 'using Data Parallel or Distributed Data Parallel') - -parser.add_argument('--label-smoothing', '--ls', default=0.1, type=float) - -parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, - metavar='LR', help='initial learning rate', dest='lr') -parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') -parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') -parser.add_argument('-p', '--print-freq', default=10, type=int, - metavar='N', help='print frequency (default: 10)') -parser.add_argument('--resume', default='', type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', - help='evaluate model on validation set') -parser.add_argument('--pretrained', dest='pretrained', action='store_true', - help='use pre-trained model') -parser.add_argument('--world-size', default=-1, type=int, - help='number of nodes for distributed training') -parser.add_argument('--rank', default=-1, type=int, - help='node rank for distributed training') -parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, - help='url used to set up distributed training') -parser.add_argument('--dist-backend', default='nccl', type=str, - help='distributed backend') -parser.add_argument('--seed', default=1, type=int, - help='seed for initializing training. ') -parser.add_argument('--npu', default=0, type=int, - help='NPU id to use.') -parser.add_argument('--warmup', default=0, type=int, - help='Warmup epochs.') -parser.add_argument('--local_rank', default=0, type=int, - help="rank id of process") -parser.add_argument('--run-prof', action='store_true', help='only for prof') -parser.add_argument('--multiprocessing-distributed', action='store_true', - help='Use multi-processing distributed training to launch ' - 'N processes per node, which has N NPUs. This is the ' - 'fastest way to use PyTorch for either single node or ' - 'multi node data parallel training') - -parser.add_argument('--pretrained_weight', dest='pretrained_weight', - help='pretrained weight dir') - - -# modelarts -parser.add_argument('--data_url', metavar='DIR', default='/cache/data_url', help='path to dataset') -parser.add_argument('--train_url', default="/cache/training", - type=str, - help="setting dir of training output") -parser.add_argument('--onnx', default=True, action='store_true', - help="convert pth model to onnx") -parser.add_argument('--class_num', default=1000, type=int, - help='number of class') - - -best_acc1 = 0 -CALCULATE_DEVICE = "npu:0" -CACHE_TRAINING_URL = "/cache/training" -is_best_name = "checkpoint.pth.tar" -def main(): - args = parser.parse_args() - print('===========================') - print(args) - print('===========================') - - if args.npu is None: - args.npu = 0 - global CALCULATE_DEVICE - if 'npu' in CALCULATE_DEVICE: - torch.npu.set_device(CALCULATE_DEVICE) - - if args.seed is not None: - random.seed(args.seed) - torch.manual_seed(args.seed) - cudnn.deterministic = True - warnings.warn('You have chosen to seed training. ' - 'This will turn on the CUDNN deterministic setting, ' - 'which can slow down your training considerably! ' - 'You may see unexpected behavior when restarting ' - 'from checkpoints.') - - if args.npu is not None: - warnings.warn('You have chosen a specific NPU. This will completely ' - 'disable data parallelism.') - - if args.dist_url == "env://" and args.world_size == -1: - args.world_size = int(os.environ["WORLD_SIZE"]) - - args.distributed = args.world_size > 1 or args.multiprocessing_distributed - - # ngpus_per_node = torch.cuda.device_count() - args.process_device_map = device_id_to_process_device_map(args.device_list) - if args.device == 'npu': - ngpus_per_node = len(args.process_device_map) - else: - ngpus_per_node = torch.cuda.device_count() - - if args.multiprocessing_distributed: - # Since we have ngpus_per_node processes per node, the total world_size - # needs to be adjusted accordingly - args.world_size = ngpus_per_node * args.world_size - # Use torch.multiprocessing.spawn to launch distributed processes: the - # main_worker process function - main_worker(args.local_rank, ngpus_per_node, args) - else: - # Simply call main_worker function - main_worker(args.npu, ngpus_per_node, args) - - - -def main_worker(npu, ngpus_per_node, args): - global best_acc1 - # args.npu = npu - - # args.npu = args.process_device_map[npu] - - if args.npu is not None: - print("Use NPU: {} for training".format(args.npu)) - - if args.distributed: - if args.dist_url == "env://" and args.rank == -1: - args.rank = int(os.environ["RANK"]) - if args.multiprocessing_distributed: - # For multiprocessing distributed training, rank needs to be the - # global rank among all the processes - args.rank = args.rank * ngpus_per_node + npu - if args.device == 'npu': - os.environ['MASTER_ADDR'] = '127.0.0.1' # args.addr - os.environ['MASTER_PORT'] = '29688' - dist.init_process_group(backend=args.dist_backend, # init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - else: - dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - # dist.init_process_group(backend=args.dist_backend, world_size=args.world_size, rank=args.rank) - # create model - if args.pretrained: - print("=> using pre-trained model '{}'".format(args.arch)) - CACHE_MODEL_URL = "/cache/model" - # ------------------modelarts modification---------------------- - os.makedirs(CACHE_MODEL_URL, exist_ok=True) - mox.file.copy_parallel(args.pretrained_weight, os.path.join(CACHE_MODEL_URL, "checkpoint.pth")) - # ------------------modelarts modification--------------------- - pth = os.path.join(CACHE_MODEL_URL, "checkpoint.pth") - - pretrained_dict = torch.load(pth, map_location="cpu") - - model = mnasnet.mnasnet1_0(num_classes=args.class_num) - if "classifier.1.weight" in pretrained_dict: - pretrained_dict.pop("classifier.1.weight") - pretrained_dict.pop("classifier.1.bias") - if "module.classifier.1.weight" in pretrained_dict: - pretrained_dict.pop("module.classifier.1.weight") - pretrained_dict.pop("module.classifier.1.bias") - model.load_state_dict(pretrained_dict, strict=False) - - else: - print("=> creating model '{}'".format('mansnet')) - model = mnasnet.mnasnet1_0(num_classes=args.class_num) - # model = models.__dict__[args.arch]() - - args.loss_scale = 128 - - loc = 'npu:{}'.format(args.npu) - torch.npu.set_device(loc) - - args.batch_size = int(args.batch_size / ngpus_per_node) - - - # -------modelarts modification------- - real_path = '/cache/data_url' - if not os.path.exists(real_path): - os.makedirs(real_path) - mox.file.copy_parallel(args.data_url, real_path) - print("training data finish copy to %s." % real_path) - # ---------modelarts modification----- - # Data loading code - traindir = os.path.join(real_path, 'train') - valdir = os.path.join(real_path, 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), #Too slow - normalize, - ])) - val_dataset = datasets.ImageFolder(valdir, transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ])) - - train_sampler = None - val_sampler = None - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) - # val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), - num_workers=args.workers, - pin_memory=False, - sampler=train_sampler, - # collate_fn=fast_collate, - drop_last=True) - - val_loader = torch.utils.data.DataLoader( - val_dataset, - batch_size=args.batch_size, shuffle=False, - num_workers=args.workers, pin_memory=False, - sampler=val_sampler, - # collate_fn=fast_collate, - drop_last=True) - - model = model.to(loc) - # define loss function (criterion) and optimizer - # criterion = nn.CrossEntropyLoss().to(loc) - criterion = LabelSmoothingCrossEntropy().to(loc) - - optimizer = torch.optim.SGD(model.parameters(), args.lr, - momentum=args.momentum, - weight_decay=args.weight_decay, - nesterov=True) - lr_schedule = CosineWithWarmup(optimizer, args.warmup, 0.1, args.epochs) - - model, optimizer = amp.initialize(model, optimizer, opt_level="O1", loss_scale=args.loss_scale) - if args.multiprocessing_distributed: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.npu], broadcast_buffers=False) - - # optionally resume from a checkpoint - if args.resume: - if os.path.isfile(args.resume): - print("=> loading checkpoint '{}'".format(args.resume)) - checkpoint = torch.load(args.resume, map_location=loc) - args.start_epoch = checkpoint['epoch'] - best_acc1 = checkpoint['best_acc1'] - model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - amp.load_state_dict(checkpoint['amp']) - print("=> loaded checkpoint '{}' (epoch {})" - .format(args.resume, checkpoint['epoch'])) - else: - print("=> no checkpoint found at '{}'".format(args.resume)) - - cudnn.benchmark = True - - if args.evaluate: - validate(val_loader, model, criterion, args) - return - - for epoch in range(args.start_epoch, args.epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - lr_schedule.step(epoch) - if args.rank == 0: - print('lr = ', lr_schedule.get_lr()[0]) - file = open('log.txt', 'a') - print('lr = ', lr_schedule.get_lr()[0], file=file) - file.close() - - if args.run_prof: - runprof(train_loader, model, criterion, optimizer, epoch, args) - print('output to output.prof') - return - - # train for one epoch - train(train_loader, model, criterion, optimizer, epoch, args) - - # evaluate on validation set - acc1 = validate(val_loader, model, criterion, args) - - # remember best acc@1 and save checkpoint - is_best = acc1 >= best_acc1 - best_acc1 = max(acc1, best_acc1) - - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': args.arch, - 'state_dict': model.state_dict(), - 'best_acc1': best_acc1, - 'optimizer': optimizer.state_dict(), - 'amp': amp.state_dict(), - }, is_best, args) - - if args.onnx: - pth2onnx.convert_pth_to_onnx(args) - # --------------modelarts modification---------- - mox.file.copy_parallel(CACHE_TRAINING_URL, args.train_url) - # --------------modelarts modification end---------- - -def train(train_loader, model, criterion, optimizer, epoch, args): - batch_time = AverageMeter('Time', ':6.3f') - data_time = AverageMeter('Data', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - FPS = AverageMeter('FPS', ':6.2f') - progress = ProgressMeter( - len(train_loader), - [batch_time, data_time, losses, top1, top5, FPS], - prefix="Epoch: [{}]".format(epoch), - fpath='./log.txt') - - # switch to train mode - model.train() - - end = time.time() - # prefetcher = data_prefetcher(train_loader) - # images, target = prefetcher.next() - # i = -1 - # while images is not None: - for i, (images, target) in enumerate(train_loader): - # i += 1 - # measure data loading time - data_time.update(time.time() - end) - - loc = 'npu:{}'.format(args.npu) - target = target.to(torch.int32) - images, target = images.to(loc, non_blocking=False), target.to(loc, non_blocking=False) - - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # compute gradient and do SGD step - optimizer.zero_grad() - #loss.backward() - - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - - optimizer.step() - # measure elapsed time - batch_time_nw = time.time() - end; - if i >= 5: - batch_time.update(batch_time_nw) - if i >= 2: - batch_size = images.size(0) - FPS.update(batch_size / batch_time_nw * args.world_size) - - end = time.time() - if i % args.print_freq == 0 and args.rank == 0: - progress.display(i) - # images, target = prefetcher.next() - print('NPU: {}, solve {} batchs'.format(args.rank, i)) - - -def validate(val_loader, model, criterion, args): - batch_time = AverageMeter('Time', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(val_loader), - [batch_time, losses, top1, top5], - prefix='Test: ', - fpath='./log.txt') - - # switch to evaluate mode - model.eval() - - with torch.no_grad(): - end = time.time() - - # prefetcher = data_prefetcher(val_loader) - # images, target = prefetcher.next() - # i = -1 - # while images is not None: - for i, (images, target) in enumerate(val_loader): - # i += 1 - loc = 'npu:{}'.format(args.npu) - target = target.to(torch.int32) - images, target = images.to(loc, non_blocking=False), target.to(loc, non_blocking=False) - - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - - if i % args.print_freq == 0 and args.rank == 0: - progress.display(i) - # images, target = prefetcher.next() - - print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' - .format(top1=top1, top5=top5)) - file = open('log.txt', 'a') - print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' - .format(top1=top1, top5=top5), - file=file) - file.close() - return top1.avg - - -def runprof(train_loader, model, criterion, optimizer, epoch, args): - # switch to train mode - model.train() - prefetcher = data_prefetcher(train_loader) - images, target = prefetcher.next() - i = -1 - while images is not None: - i += 1 - if args.npu is not None: - images = images.cuda(args.npu, non_blocking=True) - - if 'npu' in CALCULATE_DEVICE: - target = target.to(torch.int32) - images, target = images.to(CALCULATE_DEVICE, non_blocking=True), target.to(CALCULATE_DEVICE, non_blocking=True) - - if i >= 5: - with torch.autograd.profiler.profile(use_cuda=True) as prof: - out = model(images) - loss = criterion(out, target) - optimizer.zero_grad() - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - optimizer.step() - prof.export_chrome_trace("output.prof") - return - else: - output = model(images) - loss = criterion(output, target) - optimizer.zero_grad() - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - optimizer.step() - images, target = prefetcher.next() - - -def save_checkpoint(state, is_best, args, filename='checkpoint.pth.tar'): - if not os.path.exists(CACHE_TRAINING_URL): - os.makedirs(CACHE_TRAINING_URL, 0o755) - - checkpoint_save_path = os.path.join(CACHE_TRAINING_URL, filename) - torch.save(state, checkpoint_save_path) - if is_best: - # shutil.copyfile(filename, 'model_best.pth.tar') - args.is_best_name = args.train_url + 'model_best_acc%.4f_epoch%d.pth.tar' % (state['best_acc1'], state['epoch']) - mox.file.copy_parallel(checkpoint_save_path, args.is_best_name) - - -class LabelSmoothingCrossEntropy(nn.Module): - def __init__(self, eps=0.1, reduction='mean'): - super(LabelSmoothingCrossEntropy, self).__init__() - self.eps = eps - self.reduction = reduction - - def forward(self, output, target): - c = output.size()[-1] - log_preds = F.log_softmax(output, dim=-1) - if self.reduction == 'sum': - loss = -log_preds.sum() - else: - loss = -log_preds.sum(dim=-1) - if self.reduction == 'mean': - loss = loss.mean() - return loss * self.eps / c + (1 - self.eps) * F.nll_loss(log_preds, target, reduction=self.reduction) - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self, name, fmt=':f'): - self.name = name - self.fmt = fmt - self.reset() - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - - -class ProgressMeter(object): - def __init__(self, num_batches, meters, prefix="", fpath=None): - self.batch_fmtstr = self._get_batch_fmtstr(num_batches) - self.meters = meters - self.prefix = prefix - if fpath is not None: - self.file = open(fpath, 'a') - - def display(self, batch): - entries = [self.prefix + self.batch_fmtstr.format(batch)] - entries += [str(meter) for meter in self.meters] - print('\t'.join(entries)) - if self.file is not None: - self.file.write('\t'.join(entries)) - self.file.write('\n') - self.file.flush() - - def _get_batch_fmtstr(self, num_batches): - num_digits = len(str(num_batches // 1)) - fmt = '{:' + str(num_digits) + 'd}' - return '[' + fmt + '/' + fmt.format(num_batches) + ']' - - def close(self): - if self.file is not None: - self.file.close() - - -class CosineWithWarmup(torch.optim.lr_scheduler._LRScheduler): - """ Implements a schedule where the first few epochs are linear warmup, and - then there's cosine annealing after that.""" - - def __init__(self, optimizer: torch.optim.Optimizer, warmup_len: int, - warmup_start_multiplier: float, max_epochs: int, - last_epoch: int = -1): - if warmup_len < 0: - raise ValueError("Warmup can't be less than 0.") - self.warmup_len = warmup_len - if not (0.0 <= warmup_start_multiplier <= 1.0): - raise ValueError( - "Warmup start multiplier must be within [0.0, 1.0].") - self.warmup_start_multiplier = warmup_start_multiplier - if max_epochs < 1 or max_epochs < warmup_len: - raise ValueError("Max epochs must be longer than warm-up.") - self.max_epochs = max_epochs - self.cosine_len = self.max_epochs - self.warmup_len - self.eta_min = 0.0 # Final LR multiplier of cosine annealing - super().__init__(optimizer, last_epoch) - - def get_lr(self): - if self.last_epoch > self.max_epochs: - raise ValueError( - "Epoch may not be greater than max_epochs={}.".format( - self.max_epochs)) - if self.last_epoch < self.warmup_len or self.cosine_len == 0: - # We're in warm-up, increase LR linearly. End multiplier is implicit 1.0. - slope = (1.0 - self.warmup_start_multiplier) / self.warmup_len - lr_multiplier = self.warmup_start_multiplier + slope * self.last_epoch - else: - # We're in the cosine annealing part. Note that the implementation - # is different from the paper in that there's no additive part and - # the "low" LR is not limited by eta_min. Instead, eta_min is - # treated as a multiplier as well. The paper implementation is - # designed for SGDR. - cosine_epoch = self.last_epoch - self.warmup_len - lr_multiplier = self.eta_min + (1.0 - self.eta_min) * ( - 1 + math.cos(math.pi * cosine_epoch / self.cosine_len)) / 2 - assert lr_multiplier >= 0.0 - return [base_lr * lr_multiplier for base_lr in self.base_lrs] - - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - - -def device_id_to_process_device_map(device_list): - devices = device_list.split(",") - devices = [int(x) for x in devices] - devices.sort() - - process_device_map = dict() - for process_id, device_id in enumerate(devices): - process_device_map[process_id] = device_id - - return process_device_map - - -def fast_collate(batch): - imgs = [img[0] for img in batch] - targets = torch.tensor([target[1] for target in batch], dtype=torch.int64) - w = imgs[0].size[0] - h = imgs[0].size[1] - tensor = torch.zeros((len(imgs), 3, h, w), dtype=torch.uint8) - for i, img in enumerate(imgs): - nump_array = np.asarray(img, dtype=np.uint8) - tens = torch.from_numpy(nump_array) - if (nump_array.ndim < 3): - nump_array = np.expand_dims(nump_array, axis=-1) - nump_array = np.rollaxis(nump_array, 2) - - tensor[i] += torch.from_numpy(nump_array) - - return tensor, targets - - -class data_prefetcher(): - def __init__(self, loader): - self.loader = iter(loader) - self.stream = torch.npu.Stream() - self.mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).npu().view(1, 3, 1, 1) - self.std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).npu().view(1, 3, 1, 1) - # With Amp, it isn't necessary to manually convert data to half. - # if args.fp16: - # self.mean = self.mean.half() - # self.std = self.std.half() - self.preload() - - def preload(self): - try: - self.next_input, self.next_target = next(self.loader) - except StopIteration: - self.next_input = None - self.next_target = None - return - with torch.npu.stream(self.stream): - self.next_input = self.next_input.npu(non_blocking=True) - self.next_target = self.next_target.npu(non_blocking=True) - # With Amp, it isn't necessary to manually convert data to half. - # if args.fp16: - # self.next_input = self.next_input.half() - # else: - self.next_input = self.next_input.float() - self.next_input = self.next_input.sub_(self.mean).div_(self.std) - - def next(self): - torch.npu.current_stream().wait_stream(self.stream) - input = self.next_input - target = self.next_target - self.preload() - return input, target - -if __name__ == '__main__': - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import random +import shutil +import time +import warnings +import math +import glob +import numpy as np +import sys + +import torch +import torch.npu +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.nn.functional as F +import torch.optim +import torch.utils.data +import torch.utils.data.distributed +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import torchvision.models as models +from collections import OrderedDict +import torch.onnx +import pth2onnx +sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), '../')) +import mnasnet + +# modelarts modification +import moxing as mox + + +try: + from apex.parallel import DistributedDataParallel as DDP + from apex.fp16_utils import * + from apex import amp, optimizers +except ImportError: + raise ImportError("Please install apex from https://www.github.com/nvidia/apex to run this example.") + + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument('--data', metavar='DIR', + help='path to dataset') +parser.add_argument('--device', default='npu', type=str, help='npu or gpu') +parser.add_argument('--device-list', default='0,1,2,3,4,5,6,7', type=str, help='device id list') +parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18') +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('--epochs', default=90, type=int, metavar='N', + help='number of total epochs to run') +parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('-b', '--batch-size', default=256, type=int, + metavar='N', + help='mini-batch size (default: 256), this is the total ' + 'batch size of all NPUs on the current node when ' + 'using Data Parallel or Distributed Data Parallel') + +parser.add_argument('--label-smoothing', '--ls', default=0.1, type=float) + +parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, + metavar='LR', help='initial learning rate', dest='lr') +parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') +parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') +parser.add_argument('-p', '--print-freq', default=10, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', + help='evaluate model on validation set') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--world-size', default=-1, type=int, + help='number of nodes for distributed training') +parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') +parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') +parser.add_argument('--seed', default=1, type=int, + help='seed for initializing training. ') +parser.add_argument('--npu', default=0, type=int, + help='NPU id to use.') +parser.add_argument('--warmup', default=0, type=int, + help='Warmup epochs.') +parser.add_argument('--local_rank', default=0, type=int, + help="rank id of process") +parser.add_argument('--run-prof', action='store_true', help='only for prof') +parser.add_argument('--multiprocessing-distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N NPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') + +parser.add_argument('--pretrained_weight', dest='pretrained_weight', + help='pretrained weight dir') + + +# modelarts +parser.add_argument('--data_url', metavar='DIR', default='/cache/data_url', help='path to dataset') +parser.add_argument('--train_url', default="/cache/training", + type=str, + help="setting dir of training output") +parser.add_argument('--onnx', default=True, action='store_true', + help="convert pth model to onnx") +parser.add_argument('--class_num', default=1000, type=int, + help='number of class') + + +best_acc1 = 0 +CALCULATE_DEVICE = "npu:0" +CACHE_TRAINING_URL = "/cache/training" +is_best_name = "checkpoint.pth.tar" +def main(): + args = parser.parse_args() + print('===========================') + print(args) + print('===========================') + + if args.npu is None: + args.npu = 0 + global CALCULATE_DEVICE + if 'npu' in CALCULATE_DEVICE: + torch.npu.set_device(CALCULATE_DEVICE) + + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn('You have chosen to seed training. ' + 'This will turn on the CUDNN deterministic setting, ' + 'which can slow down your training considerably! ' + 'You may see unexpected behavior when restarting ' + 'from checkpoints.') + + if args.npu is not None: + warnings.warn('You have chosen a specific NPU. This will completely ' + 'disable data parallelism.') + + if args.dist_url == "env://" and args.world_size == -1: + args.world_size = int(os.environ["WORLD_SIZE"]) + + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + + # ngpus_per_node = torch.cuda.device_count() + args.process_device_map = device_id_to_process_device_map(args.device_list) + if args.device == 'npu': + ngpus_per_node = len(args.process_device_map) + else: + ngpus_per_node = torch.cuda.device_count() + + if args.multiprocessing_distributed: + # Since we have ngpus_per_node processes per node, the total world_size + # needs to be adjusted accordingly + args.world_size = ngpus_per_node * args.world_size + # Use torch.multiprocessing.spawn to launch distributed processes: the + # main_worker process function + main_worker(args.local_rank, ngpus_per_node, args) + else: + # Simply call main_worker function + main_worker(args.npu, ngpus_per_node, args) + + + +def main_worker(npu, ngpus_per_node, args): + global best_acc1 + # args.npu = npu + + # args.npu = args.process_device_map[npu] + + if args.npu is not None: + print("Use NPU: {} for training".format(args.npu)) + + if args.distributed: + if args.dist_url == "env://" and args.rank == -1: + args.rank = int(os.environ["RANK"]) + if args.multiprocessing_distributed: + # For multiprocessing distributed training, rank needs to be the + # global rank among all the processes + args.rank = args.rank * ngpus_per_node + npu + if args.device == 'npu': + os.environ['MASTER_ADDR'] = '127.0.0.1' # args.addr + os.environ['MASTER_PORT'] = '29688' + dist.init_process_group(backend=args.dist_backend, # init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + else: + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + # dist.init_process_group(backend=args.dist_backend, world_size=args.world_size, rank=args.rank) + # create model + if args.pretrained: + print("=> using pre-trained model '{}'".format(args.arch)) + CACHE_MODEL_URL = "/cache/model" + # ------------------modelarts modification---------------------- + os.makedirs(CACHE_MODEL_URL, exist_ok=True) + mox.file.copy_parallel(args.pretrained_weight, os.path.join(CACHE_MODEL_URL, "checkpoint.pth")) + # ------------------modelarts modification--------------------- + pth = os.path.join(CACHE_MODEL_URL, "checkpoint.pth") + + pretrained_dict = torch.load(pth, map_location="cpu") + + model = mnasnet.mnasnet1_0(num_classes=args.class_num) + if "classifier.1.weight" in pretrained_dict: + pretrained_dict.pop("classifier.1.weight") + pretrained_dict.pop("classifier.1.bias") + if "module.classifier.1.weight" in pretrained_dict: + pretrained_dict.pop("module.classifier.1.weight") + pretrained_dict.pop("module.classifier.1.bias") + model.load_state_dict(pretrained_dict, strict=False) + + else: + print("=> creating model '{}'".format('mansnet')) + model = mnasnet.mnasnet1_0(num_classes=args.class_num) + # model = models.__dict__[args.arch]() + + args.loss_scale = 128 + + loc = 'npu:{}'.format(args.npu) + torch.npu.set_device(loc) + + args.batch_size = int(args.batch_size / ngpus_per_node) + + + # -------modelarts modification------- + real_path = '/cache/data_url' + if not os.path.exists(real_path): + os.makedirs(real_path) + mox.file.copy_parallel(args.data_url, real_path) + print("training data finish copy to %s." % real_path) + # ---------modelarts modification----- + # Data loading code + traindir = os.path.join(real_path, 'train') + valdir = os.path.join(real_path, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), #Too slow + normalize, + ])) + val_dataset = datasets.ImageFolder(valdir, transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ])) + + train_sampler = None + val_sampler = None + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + # val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), + num_workers=args.workers, + pin_memory=False, + sampler=train_sampler, + # collate_fn=fast_collate, + drop_last=True) + + val_loader = torch.utils.data.DataLoader( + val_dataset, + batch_size=args.batch_size, shuffle=False, + num_workers=args.workers, pin_memory=False, + sampler=val_sampler, + # collate_fn=fast_collate, + drop_last=True) + + model = model.to(loc) + # define loss function (criterion) and optimizer + # criterion = nn.CrossEntropyLoss().to(loc) + criterion = LabelSmoothingCrossEntropy().to(loc) + + optimizer = torch.optim.SGD(model.parameters(), args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay, + nesterov=True) + lr_schedule = CosineWithWarmup(optimizer, args.warmup, 0.1, args.epochs) + + model, optimizer = amp.initialize(model, optimizer, opt_level="O1", loss_scale=args.loss_scale) + if args.multiprocessing_distributed: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.npu], broadcast_buffers=False) + + # optionally resume from a checkpoint + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + checkpoint = torch.load(args.resume, map_location=loc) + args.start_epoch = checkpoint['epoch'] + best_acc1 = checkpoint['best_acc1'] + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + amp.load_state_dict(checkpoint['amp']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(args.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + + cudnn.benchmark = True + + if args.evaluate: + validate(val_loader, model, criterion, args) + return + + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + lr_schedule.step(epoch) + if args.rank == 0: + print('lr = ', lr_schedule.get_lr()[0]) + file = open('log.txt', 'a') + print('lr = ', lr_schedule.get_lr()[0], file=file) + file.close() + + if args.run_prof: + runprof(train_loader, model, criterion, optimizer, epoch, args) + print('output to output.prof') + return + + # train for one epoch + train(train_loader, model, criterion, optimizer, epoch, args) + + # evaluate on validation set + acc1 = validate(val_loader, model, criterion, args) + + # remember best acc@1 and save checkpoint + is_best = acc1 >= best_acc1 + best_acc1 = max(acc1, best_acc1) + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer': optimizer.state_dict(), + 'amp': amp.state_dict(), + }, is_best, args) + + if args.onnx: + pth2onnx.convert_pth_to_onnx(args) + # --------------modelarts modification---------- + mox.file.copy_parallel(CACHE_TRAINING_URL, args.train_url) + # --------------modelarts modification end---------- + +def train(train_loader, model, criterion, optimizer, epoch, args): + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + FPS = AverageMeter('FPS', ':6.2f') + progress = ProgressMeter( + len(train_loader), + [batch_time, data_time, losses, top1, top5, FPS], + prefix="Epoch: [{}]".format(epoch), + fpath='./log.txt') + + # switch to train mode + model.train() + + end = time.time() + # prefetcher = data_prefetcher(train_loader) + # images, target = prefetcher.next() + # i = -1 + # while images is not None: + for i, (images, target) in enumerate(train_loader): + # i += 1 + # measure data loading time + data_time.update(time.time() - end) + + loc = 'npu:{}'.format(args.npu) + target = target.to(torch.int32) + images, target = images.to(loc, non_blocking=False), target.to(loc, non_blocking=False) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # compute gradient and do SGD step + optimizer.zero_grad() + #loss.backward() + + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + + optimizer.step() + # measure elapsed time + batch_time_nw = time.time() - end; + if i >= 5: + batch_time.update(batch_time_nw) + if i >= 2: + batch_size = images.size(0) + FPS.update(batch_size / batch_time_nw * args.world_size) + + end = time.time() + if i % args.print_freq == 0 and args.rank == 0: + progress.display(i) + # images, target = prefetcher.next() + print('NPU: {}, solve {} batchs'.format(args.rank, i)) + + +def validate(val_loader, model, criterion, args): + batch_time = AverageMeter('Time', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(val_loader), + [batch_time, losses, top1, top5], + prefix='Test: ', + fpath='./log.txt') + + # switch to evaluate mode + model.eval() + + with torch.no_grad(): + end = time.time() + + # prefetcher = data_prefetcher(val_loader) + # images, target = prefetcher.next() + # i = -1 + # while images is not None: + for i, (images, target) in enumerate(val_loader): + # i += 1 + loc = 'npu:{}'.format(args.npu) + target = target.to(torch.int32) + images, target = images.to(loc, non_blocking=False), target.to(loc, non_blocking=False) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0 and args.rank == 0: + progress.display(i) + # images, target = prefetcher.next() + + print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' + .format(top1=top1, top5=top5)) + file = open('log.txt', 'a') + print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' + .format(top1=top1, top5=top5), + file=file) + file.close() + return top1.avg + + +def runprof(train_loader, model, criterion, optimizer, epoch, args): + # switch to train mode + model.train() + prefetcher = data_prefetcher(train_loader) + images, target = prefetcher.next() + i = -1 + while images is not None: + i += 1 + if args.npu is not None: + images = images.cuda(args.npu, non_blocking=True) + + if 'npu' in CALCULATE_DEVICE: + target = target.to(torch.int32) + images, target = images.to(CALCULATE_DEVICE, non_blocking=True), target.to(CALCULATE_DEVICE, non_blocking=True) + + if i >= 5: + with torch.autograd.profiler.profile(use_cuda=True) as prof: + out = model(images) + loss = criterion(out, target) + optimizer.zero_grad() + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + optimizer.step() + prof.export_chrome_trace("output.prof") + return + else: + output = model(images) + loss = criterion(output, target) + optimizer.zero_grad() + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + optimizer.step() + images, target = prefetcher.next() + + +def save_checkpoint(state, is_best, args, filename='checkpoint.pth.tar'): + if not os.path.exists(CACHE_TRAINING_URL): + os.makedirs(CACHE_TRAINING_URL, 0o755) + + checkpoint_save_path = os.path.join(CACHE_TRAINING_URL, filename) + torch.save(state, checkpoint_save_path) + if is_best: + # shutil.copyfile(filename, 'model_best.pth.tar') + args.is_best_name = args.train_url + 'model_best_acc%.4f_epoch%d.pth.tar' % (state['best_acc1'], state['epoch']) + mox.file.copy_parallel(checkpoint_save_path, args.is_best_name) + + +class LabelSmoothingCrossEntropy(nn.Module): + def __init__(self, eps=0.1, reduction='mean'): + super(LabelSmoothingCrossEntropy, self).__init__() + self.eps = eps + self.reduction = reduction + + def forward(self, output, target): + c = output.size()[-1] + log_preds = F.log_softmax(output, dim=-1) + if self.reduction == 'sum': + loss = -log_preds.sum() + else: + loss = -log_preds.sum(dim=-1) + if self.reduction == 'mean': + loss = loss.mean() + return loss * self.eps / c + (1 - self.eps) * F.nll_loss(log_preds, target, reduction=self.reduction) + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f'): + self.name = name + self.fmt = fmt + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + + +class ProgressMeter(object): + def __init__(self, num_batches, meters, prefix="", fpath=None): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + if fpath is not None: + self.file = open(fpath, 'a') + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + if self.file is not None: + self.file.write('\t'.join(entries)) + self.file.write('\n') + self.file.flush() + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + + def close(self): + if self.file is not None: + self.file.close() + + +class CosineWithWarmup(torch.optim.lr_scheduler._LRScheduler): + """ Implements a schedule where the first few epochs are linear warmup, and + then there's cosine annealing after that.""" + + def __init__(self, optimizer: torch.optim.Optimizer, warmup_len: int, + warmup_start_multiplier: float, max_epochs: int, + last_epoch: int = -1): + if warmup_len < 0: + raise ValueError("Warmup can't be less than 0.") + self.warmup_len = warmup_len + if not (0.0 <= warmup_start_multiplier <= 1.0): + raise ValueError( + "Warmup start multiplier must be within [0.0, 1.0].") + self.warmup_start_multiplier = warmup_start_multiplier + if max_epochs < 1 or max_epochs < warmup_len: + raise ValueError("Max epochs must be longer than warm-up.") + self.max_epochs = max_epochs + self.cosine_len = self.max_epochs - self.warmup_len + self.eta_min = 0.0 # Final LR multiplier of cosine annealing + super().__init__(optimizer, last_epoch) + + def get_lr(self): + if self.last_epoch > self.max_epochs: + raise ValueError( + "Epoch may not be greater than max_epochs={}.".format( + self.max_epochs)) + if self.last_epoch < self.warmup_len or self.cosine_len == 0: + # We're in warm-up, increase LR linearly. End multiplier is implicit 1.0. + slope = (1.0 - self.warmup_start_multiplier) / self.warmup_len + lr_multiplier = self.warmup_start_multiplier + slope * self.last_epoch + else: + # We're in the cosine annealing part. Note that the implementation + # is different from the paper in that there's no additive part and + # the "low" LR is not limited by eta_min. Instead, eta_min is + # treated as a multiplier as well. The paper implementation is + # designed for SGDR. + cosine_epoch = self.last_epoch - self.warmup_len + lr_multiplier = self.eta_min + (1.0 - self.eta_min) * ( + 1 + math.cos(math.pi * cosine_epoch / self.cosine_len)) / 2 + assert lr_multiplier >= 0.0 + return [base_lr * lr_multiplier for base_lr in self.base_lrs] + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +def device_id_to_process_device_map(device_list): + devices = device_list.split(",") + devices = [int(x) for x in devices] + devices.sort() + + process_device_map = dict() + for process_id, device_id in enumerate(devices): + process_device_map[process_id] = device_id + + return process_device_map + + +def fast_collate(batch): + imgs = [img[0] for img in batch] + targets = torch.tensor([target[1] for target in batch], dtype=torch.int64) + w = imgs[0].size[0] + h = imgs[0].size[1] + tensor = torch.zeros((len(imgs), 3, h, w), dtype=torch.uint8) + for i, img in enumerate(imgs): + nump_array = np.asarray(img, dtype=np.uint8) + tens = torch.from_numpy(nump_array) + if (nump_array.ndim < 3): + nump_array = np.expand_dims(nump_array, axis=-1) + nump_array = np.rollaxis(nump_array, 2) + + tensor[i] += torch.from_numpy(nump_array) + + return tensor, targets + + +class data_prefetcher(): + def __init__(self, loader): + self.loader = iter(loader) + self.stream = torch.npu.Stream() + self.mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).npu().view(1, 3, 1, 1) + self.std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).npu().view(1, 3, 1, 1) + # With Amp, it isn't necessary to manually convert data to half. + # if args.fp16: + # self.mean = self.mean.half() + # self.std = self.std.half() + self.preload() + + def preload(self): + try: + self.next_input, self.next_target = next(self.loader) + except StopIteration: + self.next_input = None + self.next_target = None + return + with torch.npu.stream(self.stream): + self.next_input = self.next_input.npu(non_blocking=True) + self.next_target = self.next_target.npu(non_blocking=True) + # With Amp, it isn't necessary to manually convert data to half. + # if args.fp16: + # self.next_input = self.next_input.half() + # else: + self.next_input = self.next_input.float() + self.next_input = self.next_input.sub_(self.mean).div_(self.std) + + def next(self): + torch.npu.current_stream().wait_stream(self.stream) + input = self.next_input + target = self.next_target + self.preload() + return input, target + +if __name__ == '__main__': + main() diff --git a/PyTorch/contrib/cv/classification/MobileNet/Dockerfile b/PyTorch/contrib/cv/classification/MobileNet/Dockerfile old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/LICENSE b/PyTorch/contrib/cv/classification/MobileNet/LICENSE old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/README.md b/PyTorch/contrib/cv/classification/MobileNet/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/demo.py b/PyTorch/contrib/cv/classification/MobileNet/demo.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/docker_start.sh b/PyTorch/contrib/cv/classification/MobileNet/docker_start.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/mobilenet.py b/PyTorch/contrib/cv/classification/MobileNet/mobilenet.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/modelzoo_level.txt b/PyTorch/contrib/cv/classification/MobileNet/modelzoo_level.txt old mode 100755 new mode 100644 index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 --- a/PyTorch/contrib/cv/classification/MobileNet/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/MobileNet/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/MobileNet/pthtar2onnx.py b/PyTorch/contrib/cv/classification/MobileNet/pthtar2onnx.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/requirements.txt b/PyTorch/contrib/cv/classification/MobileNet/requirements.txt old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/scripts/eval.sh b/PyTorch/contrib/cv/classification/MobileNet/scripts/eval.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/scripts/run_2onnx.sh b/PyTorch/contrib/cv/classification/MobileNet/scripts/run_2onnx.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/scripts/run_demo.sh b/PyTorch/contrib/cv/classification/MobileNet/scripts/run_demo.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/scripts/train_1p.sh b/PyTorch/contrib/cv/classification/MobileNet/scripts/train_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/scripts/train_8p.sh b/PyTorch/contrib/cv/classification/MobileNet/scripts/train_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/test/env_npu.sh b/PyTorch/contrib/cv/classification/MobileNet/test/env_npu.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/test/run_2onnx.sh b/PyTorch/contrib/cv/classification/MobileNet/test/run_2onnx.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/test/run_demo.sh b/PyTorch/contrib/cv/classification/MobileNet/test/run_demo.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/test/train_eval_8p.sh b/PyTorch/contrib/cv/classification/MobileNet/test/train_eval_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/MobileNet/test/train_full_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/MobileNet/test/train_full_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/MobileNet/test/train_performance_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/MobileNet/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/MobileNet/test/train_performance_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/OSNet/README.md b/PyTorch/contrib/cv/classification/OSNet/README.md index 25282368758f9ef3c8c1fc56b28bc62a2906c33f..d3978104b1af8d287f53d6ed92150293bb47a4cb 100644 --- a/PyTorch/contrib/cv/classification/OSNet/README.md +++ b/PyTorch/contrib/cv/classification/OSNet/README.md @@ -1,72 +1,72 @@ -# OSNet - -This implements training of OSNet on the Market-1501 dataset, mainly modified from [KaiyangZhou/deep-person-reid](https://github.com/KaiyangZhou/deep-person-reid). - -## OSNet Detail - -As of the current date, Ascend-Pytorch is still inefficient for contiguous operations.Therefore, OSNet is re-implemented using semantics such as custom OP. - - -## Requirements - -- Install PyTorch ([pytorch.org](http://pytorch.org)) - - -- `pip install -r requirements.txt` - -- Install torchreid - - - ~~~python - python setup.py develop - ~~~ - -- Download the Market-1501 dataset from https://paperswithcode.com/dataset/market-1501 - - - ~~~shell - unzip Market-1501-v15.09.15.zip - ~~~ - -- Move Market-1501 dataset to 'reid-data' path - - - ~~~shell - mkdir path_to_osnet/reid-data/ - mv Market-1501-v15.09.15 path_to_osnet/reid-data/market1501 - ~~~ -## Training - -To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset: - -```bash -# training 1p accuracy -bash test/train_full_1p.sh - -# training 1p performance -bash test/train_performance_1p.sh - -# training 8p accuracy -bash test/train_full_8p.sh - -# training 8p performance -bash test/train_performance_8p.sh - -# finetuning -bash test/train_finetune_1p.sh --data_path=real_data_path --weight=real_weight_path - -# Online inference demo -python demo.py -## 备注: 识别前后图片保存到 `inference/` 文件夹下 - -# To ONNX -python pthtar2onnx.py -``` - -## OSNet training result - - -| | mAP | AMP_Type | Epochs | FPS | -| :----: | :--: | :------: | :----: | :------: | -| 1p-GPU | - | O2 | 1 | 371.383 | -| 1p-NPU | - | O2 | 1 | 366.464 | -| 8p-GPU | 80.3 | O2 | 350 | 1045.535 | -| 8p-NPU | 80.2 | O2 | 350 | 1091.358 | - +# OSNet + +This implements training of OSNet on the Market-1501 dataset, mainly modified from [KaiyangZhou/deep-person-reid](https://github.com/KaiyangZhou/deep-person-reid). + +## OSNet Detail + +As of the current date, Ascend-Pytorch is still inefficient for contiguous operations.Therefore, OSNet is re-implemented using semantics such as custom OP. + + +## Requirements + +- Install PyTorch ([pytorch.org](http://pytorch.org)) + + +- `pip install -r requirements.txt` + +- Install torchreid + + - ~~~python + python setup.py develop + ~~~ + +- Download the Market-1501 dataset from https://paperswithcode.com/dataset/market-1501 + + - ~~~shell + unzip Market-1501-v15.09.15.zip + ~~~ + +- Move Market-1501 dataset to 'reid-data' path + + - ~~~shell + mkdir path_to_osnet/reid-data/ + mv Market-1501-v15.09.15 path_to_osnet/reid-data/market1501 + ~~~ +## Training + +To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset: + +```bash +# training 1p accuracy +bash test/train_full_1p.sh + +# training 1p performance +bash test/train_performance_1p.sh + +# training 8p accuracy +bash test/train_full_8p.sh + +# training 8p performance +bash test/train_performance_8p.sh + +# finetuning +bash test/train_finetune_1p.sh --data_path=real_data_path --weight=real_weight_path + +# Online inference demo +python demo.py +## 备注: 识别前后图片保存到 `inference/` 文件夹下 + +# To ONNX +python pthtar2onnx.py +``` + +## OSNet training result + + +| | mAP | AMP_Type | Epochs | FPS | +| :----: | :--: | :------: | :----: | :------: | +| 1p-GPU | - | O2 | 1 | 371.383 | +| 1p-NPU | - | O2 | 1 | 366.464 | +| 8p-GPU | 80.3 | O2 | 350 | 1045.535 | +| 8p-NPU | 80.2 | O2 | 350 | 1091.358 | + diff --git a/PyTorch/contrib/cv/classification/OSNet/main.py b/PyTorch/contrib/cv/classification/OSNet/main.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/OSNet/torchreid/metrics/rank_cylib/__init__.py b/PyTorch/contrib/cv/classification/OSNet/torchreid/metrics/rank_cylib/__init__.py index ec62d364d8b38dbbf3b84560aa33dccabf2ca1a7..f3bbfaaf4fd8806d74ba09433bd0b9d3dbee28d4 100644 --- a/PyTorch/contrib/cv/classification/OSNet/torchreid/metrics/rank_cylib/__init__.py +++ b/PyTorch/contrib/cv/classification/OSNet/torchreid/metrics/rank_cylib/__init__.py @@ -1,47 +1,47 @@ -""" -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - - -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the BSD 3-Clause License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -https://spdx.org/licenses/BSD-3-Clause.html - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the BSD 3-Clause License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://spdx.org/licenses/BSD-3-Clause.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. """ \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/PCB/README.md b/PyTorch/contrib/cv/classification/PCB/README.md index 1ea64639bac4bb35baec79266531e0cdc0c4c6a2..5bbfba757ea60c5dc5627687b1838750196655c2 100644 --- a/PyTorch/contrib/cv/classification/PCB/README.md +++ b/PyTorch/contrib/cv/classification/PCB/README.md @@ -1,61 +1,61 @@ -# PCB - -This implements training of PCB on the Market-1501 dataset, mainly modified from [syfafterzy/PCB_RPP_for_reID](https://github.com/syfafterzy/PCB_RPP_for_reID). - -## PCB Detail - -As of the current date, Ascend-Pytorch is still inefficient for contiguous operations.Therefore, PCB is re-implemented using semantics such as custom OP. - - -## Requirements - -- Install PyTorch ([pytorch.org](http://pytorch.org)) - - - ~~~ - PyTorch版本:CANN 5.0.T205 PT>=20210618 - ~~~ - -- `pip install -r requirements.txt` - -- Download the Market-1501 dataset from https://paperswithcode.com/dataset/market-1501 - - - ~~~shell - unzip Market-1501-v15.09.15.zip - ~~~ - -## Training - -To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset: - -```bash -# training 1p accuracy -bash scripts/train_full_1p.sh --data_path=real_data_path - -# training 1p performance -bash scripts/train_performance_1p.sh --data_path=real_data_path - -# training 8p accuracy -bash scripts/train_full_8p.sh --data_path=real_data_path - -# training 8p performance -bash scripts/train_performance_8p.sh --data_path=real_data_path - -# Online inference demo -python demo.py --data_path real_data_path --device npu -## 备注: 识别前后图片保存到 `inference/` 文件夹下 - -# To ONNX -python pthtar2onnx.py - -``` - -## PCB training result - - -| | mAP | AMP_Type | Epochs | FPS | -| :----: | :--: | :------: | :----: | :------: | -| 1p-GPU | - | O2 | 1 | 568.431 | -| 1p-NPU | - | O2 | 1 | 571.723 | -| 8p-GPU | 77.2 | O2 | 60 | 3600.983 | -| 8p-NPU | 77.5 | O2 | 60 | 2750.401 | - +# PCB + +This implements training of PCB on the Market-1501 dataset, mainly modified from [syfafterzy/PCB_RPP_for_reID](https://github.com/syfafterzy/PCB_RPP_for_reID). + +## PCB Detail + +As of the current date, Ascend-Pytorch is still inefficient for contiguous operations.Therefore, PCB is re-implemented using semantics such as custom OP. + + +## Requirements + +- Install PyTorch ([pytorch.org](http://pytorch.org)) + + - ~~~ + PyTorch版本:CANN 5.0.T205 PT>=20210618 + ~~~ + +- `pip install -r requirements.txt` + +- Download the Market-1501 dataset from https://paperswithcode.com/dataset/market-1501 + + - ~~~shell + unzip Market-1501-v15.09.15.zip + ~~~ + +## Training + +To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset: + +```bash +# training 1p accuracy +bash scripts/train_full_1p.sh --data_path=real_data_path + +# training 1p performance +bash scripts/train_performance_1p.sh --data_path=real_data_path + +# training 8p accuracy +bash scripts/train_full_8p.sh --data_path=real_data_path + +# training 8p performance +bash scripts/train_performance_8p.sh --data_path=real_data_path + +# Online inference demo +python demo.py --data_path real_data_path --device npu +## 备注: 识别前后图片保存到 `inference/` 文件夹下 + +# To ONNX +python pthtar2onnx.py + +``` + +## PCB training result + + +| | mAP | AMP_Type | Epochs | FPS | +| :----: | :--: | :------: | :----: | :------: | +| 1p-GPU | - | O2 | 1 | 568.431 | +| 1p-NPU | - | O2 | 1 | 571.723 | +| 8p-GPU | 77.2 | O2 | 60 | 3600.983 | +| 8p-NPU | 77.5 | O2 | 60 | 2750.401 | + diff --git a/PyTorch/contrib/cv/classification/PCB/README_raw.md b/PyTorch/contrib/cv/classification/PCB/README_raw.md index fb1ecd0cbc8ea48dd783d712cc9b6ad8d5548b0b..4cad0051b786313fd28ce14850cb45befdfeda20 100644 --- a/PyTorch/contrib/cv/classification/PCB/README_raw.md +++ b/PyTorch/contrib/cv/classification/PCB/README_raw.md @@ -1,51 +1,51 @@ -# Part-based Convolutional Baseline for Person Retrieval and the Refined Part Pooling - -Code for the paper [Beyond Part Models: Person Retrieval with Refined Part Pooling (and A Strong Convolutional Baseline)](https://arxiv.org/pdf/1711.09349.pdf). - -**This code is ONLY** released for academic use. - -## Preparation - - -**Prerequisite: Python 2.7 and Pytorch 0.3+** - -1. Install [Pytorch](https://pytorch.org/) - -2. Download dataset - a. Market-1501 [BaiduYun](https://pan.baidu.com/s/1ntIi2Op?errno=0&errmsg=Auth%20Login%20Sucess&&bduss=&ssnerror=0&traceid=) - b. DukeMTMC-reID[BaiduYun](https://pan.baidu.com/share/init?surl=jS0XM7Var5nQGcbf9xUztw) (password:bhbh) - c. Move them to ```~/datasets/Market-1501/(DukeMTMC-reID)``` - - -## train PCB - - -```sh train_PCB.sh``` -With Pytorch 0.4.0, we shall get about 93.0% rank-1 accuracy and 78.0% mAP on Market-1501. - - -## train RPP - - -```sh train_RPP.sh``` -With Pytorch 0.4.0, we shall get about 93.5% rank-1 accuracy and 81.5% mAP on Market-1501. - - -## Citiaion - - -Please cite this paper in your publications if it helps your research: - - -``` -@inproceedings{sun2018PCB, - author = {Yifan Sun and - Liang Zheng and - Yi Yang and - Qi Tian and - Shengjin Wang}, - title = {Beyond Part Models: Person Retrieval with Refined Part Pooling (and A Strong Convolutional Baseline)}, - booktitle = {ECCV}, - year = {2018}, -} -``` +# Part-based Convolutional Baseline for Person Retrieval and the Refined Part Pooling + +Code for the paper [Beyond Part Models: Person Retrieval with Refined Part Pooling (and A Strong Convolutional Baseline)](https://arxiv.org/pdf/1711.09349.pdf). + +**This code is ONLY** released for academic use. + +## Preparation + + +**Prerequisite: Python 2.7 and Pytorch 0.3+** + +1. Install [Pytorch](https://pytorch.org/) + +2. Download dataset + a. Market-1501 [BaiduYun](https://pan.baidu.com/s/1ntIi2Op?errno=0&errmsg=Auth%20Login%20Sucess&&bduss=&ssnerror=0&traceid=) + b. DukeMTMC-reID[BaiduYun](https://pan.baidu.com/share/init?surl=jS0XM7Var5nQGcbf9xUztw) (password:bhbh) + c. Move them to ```~/datasets/Market-1501/(DukeMTMC-reID)``` + + +## train PCB + + +```sh train_PCB.sh``` +With Pytorch 0.4.0, we shall get about 93.0% rank-1 accuracy and 78.0% mAP on Market-1501. + + +## train RPP + + +```sh train_RPP.sh``` +With Pytorch 0.4.0, we shall get about 93.5% rank-1 accuracy and 81.5% mAP on Market-1501. + + +## Citiaion + + +Please cite this paper in your publications if it helps your research: + + +``` +@inproceedings{sun2018PCB, + author = {Yifan Sun and + Liang Zheng and + Yi Yang and + Qi Tian and + Shengjin Wang}, + title = {Beyond Part Models: Person Retrieval with Refined Part Pooling (and A Strong Convolutional Baseline)}, + booktitle = {ECCV}, + year = {2018}, +} +``` diff --git a/PyTorch/contrib/cv/classification/RegNetY-1.6GF/utils/progress/setup.py b/PyTorch/contrib/cv/classification/RegNetY-1.6GF/utils/progress/setup.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/RegNetY-1.6GF/utils/progress/test_progress.py b/PyTorch/contrib/cv/classification/RegNetY-1.6GF/utils/progress/test_progress.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/RepVGG/arch.PNG b/PyTorch/contrib/cv/classification/RepVGG/arch.PNG deleted file mode 100644 index a17a7ac3926baf8c1108dee5bbcdaad2e2bf6a53..0000000000000000000000000000000000000000 Binary files a/PyTorch/contrib/cv/classification/RepVGG/arch.PNG and /dev/null differ diff --git a/PyTorch/contrib/cv/classification/RepVGG/speed_acc.PNG b/PyTorch/contrib/cv/classification/RepVGG/speed_acc.PNG deleted file mode 100644 index b785cfc6f159049a89281a940a894bbe0f542f68..0000000000000000000000000000000000000000 Binary files a/PyTorch/contrib/cv/classification/RepVGG/speed_acc.PNG and /dev/null differ diff --git a/PyTorch/contrib/cv/classification/RepVGG/table.PNG b/PyTorch/contrib/cv/classification/RepVGG/table.PNG deleted file mode 100644 index 242979935344fde11954ab33778132918a1a802f..0000000000000000000000000000000000000000 Binary files a/PyTorch/contrib/cv/classification/RepVGG/table.PNG and /dev/null differ diff --git a/PyTorch/contrib/cv/classification/ResNeXt-50-32x4d_ID1624_for_PyTorch/modelzoo_level.txt b/PyTorch/contrib/cv/classification/ResNeXt-50-32x4d_ID1624_for_PyTorch/modelzoo_level.txt index 5afcef9188bf9d39f1e34b45bd91324c6093137a..3117fffc3be7f5c479f10f09ba38a25c47739a00 100644 --- a/PyTorch/contrib/cv/classification/ResNeXt-50-32x4d_ID1624_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/ResNeXt-50-32x4d_ID1624_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/LICENSE b/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/LICENSE index 48eadfa5dfed22d5fdc95dfef2ca0625bc71338c..dfcc682b4b265c524b676eea5c382472c09f42c4 100644 --- a/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/LICENSE +++ b/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/LICENSE @@ -1,29 +1,29 @@ -BSD 3-Clause License - -Copyright (c) 2017, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +BSD 3-Clause License + +Copyright (c) 2017, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/README.md b/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/README.md index 8b557e9a40453be00dea57388ff47cbd92dceed2..c668ce4dedb2535b4c990e3064c13c9fd64c059f 100644 --- a/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/README.md +++ b/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/README.md @@ -1,50 +1,50 @@ -# ResNeXt-101-32x8d - -This implements training of ResNeXt-101-32x8d on the ImageNet dataset, mainly modified from [pytorch/examples](https://github.com/pytorch/examples/tree/master/imagenet). - -## ResNeXt-101-32x8d Detail - -As of the current date, Ascend-Pytorch is still inefficient for contiguous operations.Therefore, ResNeXt-101-32x8d is re-implemented using semantics such as custom OP. - - -## Requirements - -- Install PyTorch ([pytorch.org](http://pytorch.org)) -- `pip install -r requirements.txt` -- Download the ImageNet dataset from http://www.image-net.org/ - - Then, and move validation images to labeled subfolders, using [the following shell script](https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh) - -## Training - -To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset: - -# 1p training 1p -bash ./test/train_full_1p.sh --data_path=xxx # training accuracy - -bash ./test/train_performance_1p.sh --data_path=xxx # training performance - -# 8p training 8p -bash ./test/train_full_8p.sh --data_path=xxx # training accuracy - -bash ./test/train_performance_8p.sh --data_path=xxx # training performance - -# eval default 8p, should support 1p -bash ./test/train_eval_8p.sh --data_path=xxx - -# O2 online inference demo -source scripts/set_npu_env.sh -python3.7.5 demo.py - -# O2 To ONNX -source scripts/set_npu_env.sh -python3.7.5 pthtar2onnx.py - - -``` - -## ResNeXt-101-32x8d training result - -| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | -| :------: | :------: | :------: | :------: | :------: | -| - | 345 | 1 | 1 | O2 | -| 79.173 | 2673 | 8 | 90 | O2 | +# ResNeXt-101-32x8d + +This implements training of ResNeXt-101-32x8d on the ImageNet dataset, mainly modified from [pytorch/examples](https://github.com/pytorch/examples/tree/master/imagenet). + +## ResNeXt-101-32x8d Detail + +As of the current date, Ascend-Pytorch is still inefficient for contiguous operations.Therefore, ResNeXt-101-32x8d is re-implemented using semantics such as custom OP. + + +## Requirements + +- Install PyTorch ([pytorch.org](http://pytorch.org)) +- `pip install -r requirements.txt` +- Download the ImageNet dataset from http://www.image-net.org/ + - Then, and move validation images to labeled subfolders, using [the following shell script](https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh) + +## Training + +To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset: + +# 1p training 1p +bash ./test/train_full_1p.sh --data_path=xxx # training accuracy + +bash ./test/train_performance_1p.sh --data_path=xxx # training performance + +# 8p training 8p +bash ./test/train_full_8p.sh --data_path=xxx # training accuracy + +bash ./test/train_performance_8p.sh --data_path=xxx # training performance + +# eval default 8p, should support 1p +bash ./test/train_eval_8p.sh --data_path=xxx + +# O2 online inference demo +source scripts/set_npu_env.sh +python3.7.5 demo.py + +# O2 To ONNX +source scripts/set_npu_env.sh +python3.7.5 pthtar2onnx.py + + +``` + +## ResNeXt-101-32x8d training result + +| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | +| :------: | :------: | :------: | :------: | :------: | +| - | 345 | 1 | 1 | O2 | +| 79.173 | 2673 | 8 | 90 | O2 | diff --git a/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/demo.py b/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/demo.py index 52524242c67de7b77a8aa06dcec0d72e4883de55..aca3a3d39725aab089a0dae04cf05d936f51d068 100644 --- a/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/demo.py +++ b/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/demo.py @@ -1,73 +1,73 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import argparse -import torch -import torchvision -from torchvision import datasets, transforms -import models.resnet_0_6_0 as resnet_0_6_0 -from collections import OrderedDict - -def proc_node_module(checkpoint, attr_name): - new_state_dict = OrderedDict() - for k, v in checkpoint[attr_name].items(): - if(k[0: 7] == "module."): - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - -def get_raw_data(): - from PIL import Image - from urllib.request import urlretrieve - IMAGE_URL = 'https://bbs-img.huaweicloud.com/blogs/img/thumb/1591951315139_8989_1363.png' - urlretrieve(IMAGE_URL, 'tmp.jpg') - img = Image.open("tmp.jpg") - img = img.convert('RGB') - return img - -def test(): - loc = 'npu:0' - loc_cpu = 'cpu' - torch.npu.set_device(loc) - checkpoint = torch.load("./checkpoint.pth.tar", map_location=loc) - checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') - model = resnet_0_6_0.resnext101_32x8d() - model = model.to(loc) - model.load_state_dict(checkpoint['state_dict']) - model.eval() - - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - rd = get_raw_data() - data_transfrom = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize]) - - inputs = data_transfrom(rd) - inputs = inputs.unsqueeze(0) - inputs = inputs.to(loc) - output = model(inputs) - output = output.to(loc_cpu) - - _, pred = output.topk(1, 1, True, True) - result = torch.argmax(output, 1) - print("class: ", pred[0][0].item()) - print(result) - -if __name__ == "__main__": +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import argparse +import torch +import torchvision +from torchvision import datasets, transforms +import models.resnet_0_6_0 as resnet_0_6_0 +from collections import OrderedDict + +def proc_node_module(checkpoint, attr_name): + new_state_dict = OrderedDict() + for k, v in checkpoint[attr_name].items(): + if(k[0: 7] == "module."): + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + +def get_raw_data(): + from PIL import Image + from urllib.request import urlretrieve + IMAGE_URL = 'https://bbs-img.huaweicloud.com/blogs/img/thumb/1591951315139_8989_1363.png' + urlretrieve(IMAGE_URL, 'tmp.jpg') + img = Image.open("tmp.jpg") + img = img.convert('RGB') + return img + +def test(): + loc = 'npu:0' + loc_cpu = 'cpu' + torch.npu.set_device(loc) + checkpoint = torch.load("./checkpoint.pth.tar", map_location=loc) + checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') + model = resnet_0_6_0.resnext101_32x8d() + model = model.to(loc) + model.load_state_dict(checkpoint['state_dict']) + model.eval() + + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + rd = get_raw_data() + data_transfrom = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize]) + + inputs = data_transfrom(rd) + inputs = inputs.unsqueeze(0) + inputs = inputs.to(loc) + output = model(inputs) + output = output.to(loc_cpu) + + _, pred = output.topk(1, 1, True, True) + result = torch.argmax(output, 1) + print("class: ", pred[0][0].item()) + print(result) + +if __name__ == "__main__": test() \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/main.py b/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/main.py index 8273d29d996b9ab65df5444047979f1c09249813..2eae3c6dc7276974225bb5e6838fe30febc7191c 100644 --- a/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/main.py +++ b/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/main.py @@ -1,672 +1,672 @@ -# -*- coding: utf-8 -*- -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import warnings -import argparse -import os -import random -import shutil -import time -import warnings -import torch -import numpy as np -import apex -from apex import amp -import torch.nn as nn -import torch.nn.parallel -import torch.npu -import torch.backends.cudnn as cudnn -import torch.distributed as dist -import torch.optim -import torch.multiprocessing as mp -import torch.utils.data -import torch.utils.data.distributed -import torchvision.transforms as transforms -import torchvision.datasets as datasets -import models.resnet_0_6_0 as resnet_0_6_0 - -warnings.filterwarnings('ignore') -parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') -parser.add_argument('data', metavar='DIR', - help='path to dataset') -parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', - help='number of data loading workers (default: 4)') -parser.add_argument('--epochs', default=90, type=int, metavar='N', - help='number of total epochs to run') -parser.add_argument('--start-epoch', default=0, type=int, metavar='N', - help='manual epoch number (useful on restarts)') -parser.add_argument('-b', '--batch-size', default=256, type=int, - metavar='N', - help='mini-batch size (default: 256), this is the total ' - 'batch size of all GPUs on the current node when ' - 'using Data Parallel or Distributed Data Parallel') -parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, - metavar='LR', help='initial learning rate', dest='lr') -parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') -parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') -parser.add_argument('-p', '--print-freq', default=10, type=int, - metavar='N', help='print frequency (default: 10)') -parser.add_argument('--resume', default=None, type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', - help='evaluate model on validation set') -parser.add_argument('--pretrained', dest='pretrained', action='store_true', - help='use pre-trained model') -parser.add_argument('--world-size', default=-1, type=int, - help='number of nodes for distributed training') -parser.add_argument('--rank', default=-1, type=int, - help='node rank for distributed training') -parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, - help='url used to set up distributed training') -parser.add_argument('--dist-backend', default='nccl', type=str, - help='distributed backend') -parser.add_argument('--seed', default=None, type=int, - help='seed for initializing training. ') -parser.add_argument('--gpu', default=None, type=int, - help='GPU id to use.') -parser.add_argument('--multiprocessing-distributed', action='store_true', - help='Use multi-processing distributed training to launch ' - 'N processes per node, which has N GPUs. This is the ' - 'fastest way to use PyTorch for either single node or ' - 'multi node data parallel training') -## for ascend 910 -parser.add_argument('--device', default='npu', type=str, help='npu or gpu') -parser.add_argument('--addr', default='10.136.181.115', - type=str, help='master addr') -parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', - type=str, help='device id list') -parser.add_argument('--amp', default=False, action='store_true', - help='use amp to train the model') -parser.add_argument('--loss-scale', default=1024., type=float, - help='loss scale using in amp, default -1 means dynamic') -parser.add_argument('--opt-level', default='O2', type=str, - help='loss scale using in amp, default -1 means dynamic') -parser.add_argument('--prof', default=False, action='store_true', - help='use profiling to evaluate the performance of model') -parser.add_argument('--warm_up_epochs', default=5, type=int, - help='warm up') -best_acc1 = 0 - - -def device_id_to_process_device_map(device_list): - devices = device_list.split(",") - devices = [int(x) for x in devices] - devices.sort() - - process_device_map = dict() - for process_id, device_id in enumerate(devices): - process_device_map[process_id] = device_id - - return process_device_map - - -def main(): - args = parser.parse_args() - - os.environ['MASTER_ADDR'] = args.addr - os.environ['MASTER_PORT'] = '29688' - - if args.seed is not None: - random.seed(args.seed) - torch.manual_seed(args.seed) - cudnn.deterministic = True - warnings.warn('You have chosen to seed training. ' - 'This will turn on the CUDNN deterministic setting, ' - 'which can slow down your training considerably! ' - 'You may see unexpected behavior when restarting ' - 'from checkpoints.') - - if args.gpu is not None: - warnings.warn('You have chosen a specific GPU. This will completely ' - 'disable data parallelism.') - - if args.dist_url == "env://" and args.world_size == -1: - args.world_size = int(os.environ["WORLD_SIZE"]) - - args.distributed = args.world_size > 1 or args.multiprocessing_distributed - - args.process_device_map = device_id_to_process_device_map(args.device_list) - - if args.device == 'npu': - ngpus_per_node = len(args.process_device_map) - else: - if args.distributed: - ngpus_per_node = torch.cuda.device_count() - else: - ngpus_per_node = 1 - print('ngpus_per_node:', ngpus_per_node) - if args.multiprocessing_distributed: - # Since we have ngpus_per_node processes per node, the total world_size - # needs to be adjusted accordingly - args.world_size = ngpus_per_node * args.world_size - # Use torch.multiprocessing.spawn to launch distributed processes: the - # main_worker process function - #mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) - main_worker(args.gpu, ngpus_per_node, args) - - else: - # Simply call main_worker function - main_worker(args.gpu, ngpus_per_node, args) - - -def main_worker(gpu, ngpus_per_node, args): - global best_acc1 - args.gpu = args.process_device_map[gpu] - - if args.distributed: - args.gpu = args.rank - if args.dist_url == "env://" and args.rank == -1: - args.rank = int(os.environ["RANK"]) - if args.multiprocessing_distributed: - # For multiprocessing distributed training, rank needs to be the - # global rank among all the processes - args.rank = args.rank * ngpus_per_node + gpu - - if args.device == 'npu': - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '29688' - loc = 'npu:{}'.format(args.gpu) - torch.npu.set_device(loc) - dist.init_process_group(backend=args.dist_backend, # init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - else: - dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - # create model - if args.pretrained: - print("=> using pre-trained model resnext101_32x8d") - model = resnet_0_6_0.resnext101_32x8d(pretrained=False, progress=True) - print("Load my train models...") - pretrained_dict = \ - torch.load("checkpoint.pth.tar", map_location="cpu")["state_dict"] - model.load_state_dict({k.replace('module.', ''): v for k, v in pretrained_dict.items()}, strict=False) - if "fc.weight" in pretrained_dict: - pretrained_dict.pop('fc.weight') - pretrained_dict.pop('fc.bias') - if "module.fc.weight" in pretrained_dict: - pretrained_dict.pop('module.fc.weight') - pretrained_dict.pop('module.fc.bias') - for param in model.parameters(): - param.requires_grad = False - model.fc = nn.Linear(2048, 1000) - model.load_state_dict(pretrained_dict, strict=False) - else: - print("=> creating model resnext101_32x8d") - model = resnet_0_6_0.resnext101_32x8d() - - if args.distributed: - # For multiprocessing distributed, DistributedDataParallel constructor - # should always set the single device scope, otherwise, - # DistributedDataParallel will use all available devices. - if args.gpu is not None: - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - model = model.to(loc) - else: - torch.cuda.set_device(args.gpu) - model.cuda(args.gpu) - - # When using a single GPU per process and per - # DistributedDataParallel, we need to divide the batch size - # ourselves based on the total number of GPUs we have - args.batch_size = int(args.batch_size / args.world_size) - args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) - else: - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - model = model.to(loc) - else: - model.cuda() - # DistributedDataParallel will divide and allocate batch_size to all - # available GPUs if device_ids are not set - print("[gpu id:", args.gpu, "]", - "============================test args.gpu is not None else==========================") - elif args.gpu is not None: - print("[gpu id:", args.gpu, "]", - "============================test elif args.gpu is not None:==========================") - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - torch.npu.set_device(args.gpu) - model = model.to(loc) - else: - torch.cuda.set_device(args.gpu) - model = model.cuda(args.gpu) - - else: - # DataParallel will divide and allocate batch_size to all available GPUs - print("[gpu id:", args.gpu, "]", "============================test 1==========================") - print("[gpu id:", args.gpu, "]", "============================test 3==========================") - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - else: - print("before : model = torch.nn.DataParallel(model).cuda()") - - # define loss function (criterion) and optimizer - optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), args.lr, - momentum=args.momentum, - weight_decay=args.weight_decay) - - - if args.amp: - model, optimizer = amp.initialize( - model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale) - - if args.distributed: - # For multiprocessing distributed, DistributedDataParallel constructor - # should always set the single device scope, otherwise, - # DistributedDataParallel will use all available devices. - if args.gpu is not None: - # When using a single GPU per process and per - # DistributedDataParallel, we need to divide the batch size - # ourselves based on the total number of GPUs we have - if args.pretrained: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False, - find_unused_parameters=True) - else: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False) - else: - print("[gpu id:", args.gpu, "]", - "============================test args.gpu is not None else==========================") - model = torch.nn.parallel.DistributedDataParallel(model) - elif args.gpu is not None: - print("[gpu id:", args.gpu, "]", - "============================test elif args.gpu is not None:==========================") - else: - # DataParallel will divide and allocate batch_size to all available GPUs - print("[gpu id:", args.gpu, "]", "============================test 1==========================") - print("[gpu id:", args.gpu, "]", "============================test 3==========================") - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - model = torch.nn.DataParallel(model).to(loc) - else: - model = torch.nn.DataParallel(model).cuda() - print(torch.npu.synchronize(), "-- A 2.0 ------") - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - criterion = nn.CrossEntropyLoss().to(loc) - else: - criterion = nn.CrossEntropyLoss().cuda(args.gpu) - - # optionally resume from a checkpoint - if args.resume: - if os.path.isfile(args.resume): - print("=> loading checkpoint '{}'".format(args.resume)) - if args.gpu is None: - checkpoint = torch.load(args.resume) - else: - # Map model to be loaded to specified single gpu. - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - else: - loc = 'cuda:{}'.format(args.gpu) - checkpoint = torch.load(args.resume, map_location=loc) - args.start_epoch = checkpoint['epoch'] - best_acc1 = checkpoint['best_acc1'] - if args.gpu is not None: - # best_acc1 may be from a checkpoint from a different GPU - best_acc1 = best_acc1.to(args.gpu) - model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - if args.amp: - amp.load_state_dict(checkpoint['amp']) - print("=> loaded checkpoint '{}' (epoch {})" - .format(args.resume, checkpoint['epoch'])) - else: - print("=> no checkpoint found at '{}'".format(args.resume)) - - cudnn.benchmark = True - - # Data loading code - traindir = os.path.join(args.data, 'train') - valdir = os.path.join(args.data, 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset) - else: - train_sampler = None - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, shuffle=( - train_sampler is None), - num_workers=args.workers, pin_memory=False, sampler=train_sampler, drop_last=True) - - val_loader = torch.utils.data.DataLoader( - datasets.ImageFolder(valdir, transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ])), - batch_size=args.batch_size, shuffle=True, - num_workers=args.workers, pin_memory=False, drop_last=True) - - if args.evaluate: - validate(val_loader, model, criterion, args, ngpus_per_node) - return - - if args.prof: - profiling(train_loader, model, criterion, optimizer, args) - return - - start_time = time.time() - for epoch in range(args.start_epoch, args.epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - - adjust_learning_rate(optimizer, epoch, args) - - # train for one epoch - train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node) - - # evaluate on validation set - acc1 = validate(val_loader, model, criterion, args, ngpus_per_node) - - # remember best acc@1 and save checkpoint - is_best = acc1 > best_acc1 - best_acc1 = max(acc1, best_acc1) - if args.device == 'npu' and args.gpu == 0 and epoch == 89: - print("Complete 90 epoch training, take time:{}h".format(round((time.time() - start_time) / 3600.0, 2))) - - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - - ############## npu modify begin ############# - if args.amp: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': 'resnext101_32x8d', - 'state_dict': model.state_dict(), - 'best_acc1': best_acc1, - 'optimizer': optimizer.state_dict(), - 'amp': amp.state_dict(), - }, is_best) - else: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': 'resnext101_32x8d', - 'state_dict': model.state_dict(), - 'best_acc1': best_acc1, - 'optimizer': optimizer.state_dict(), - }, is_best) - ############## npu modify end ############# - - -def profiling(data_loader, model, criterion, optimizer, args): - # switch to train mode - model.train() - - def update(model, images, target, optimizer): - output = model(images) - loss = criterion(output, target) - if args.amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.zero_grad() - optimizer.step() - - for step, (images, target) in enumerate(data_loader): - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - images = images.to(loc, non_blocking=True).to(torch.float) - target = target.to(torch.int32).to(loc, non_blocking=True) - else: - images = images.cuda(args.gpu, non_blocking=True) - target = target.cuda(args.gpu, non_blocking=True) - - if step < 5: - update(model, images, target, optimizer) - else: - if args.device == 'npu': - with torch.autograd.profiler.profile(use_npu=True) as prof: - update(model, images, target, optimizer) - else: - with torch.autograd.profiler.profile(use_cuda=True) as prof: - update(model, images, target, optimizer) - break - - prof.export_chrome_trace("output.prof") - - -def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node): - batch_time = AverageMeter('Time', ':6.3f') - data_time = AverageMeter('Data', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(train_loader), - [batch_time, data_time, losses, top1, top5], - prefix="Epoch: [{}]".format(epoch)) - - # switch to train mode - model.train() - - end = time.time() - for i, (images, target) in enumerate(train_loader): - # measure data loading time - data_time.update(time.time() - end) - - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - images = images.to(loc, non_blocking=True).to(torch.float) - target = target.to(torch.int32).to(loc, non_blocking=True) - else: - images = images.cuda(args.gpu, non_blocking=True) - target = target.cuda(args.gpu, non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # compute gradient and do SGD step - optimizer.zero_grad() - if args.amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - if args.device == 'npu': - torch.npu.synchronize() - - # measure elapsed time - cost_time = time.time() - end - batch_time.update(cost_time) - end = time.time() - - if i % args.print_freq == 0: - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - progress.display(i) - - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - print("[npu id:", args.gpu, "]", "batch_size:", args.world_size * args.batch_size, - 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( - args.batch_size * args.world_size / batch_time.avg)) - - -def validate(val_loader, model, criterion, args, ngpus_per_node): - batch_time = AverageMeter('Time', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(val_loader), - [batch_time, losses, top1, top5], - prefix='Test: ') - - # switch to evaluate mode - model.eval() - - with torch.no_grad(): - end = time.time() - for i, (images, target) in enumerate(val_loader): - if args.gpu is not None: - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - images = images.to(loc).to(torch.float) - else: - images = images.cuda(args.gpu, non_blocking=True) - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - target = target.to(torch.int32).to(loc, non_blocking=True) - else: - target = target.cuda(args.gpu, non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # measure elapsed time - cost_time = time.time() - end - batch_time.update(cost_time) - end = time.time() - - if i % args.print_freq == 0: - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - progress.display(i) - - if i % args.print_freq == 0: - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - print("[gpu id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' - .format(top1=top1, top5=top5)) - - return top1.avg - - -def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): - torch.save(state, filename) - if is_best: - shutil.copyfile(filename, 'model_best.pth.tar') - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self, name, fmt=':f', start_count_index=2): - self.name = name - self.fmt = fmt - self.reset() - self.start_count_index = start_count_index - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - if self.count == 0: - self.N = n - - self.val = val - self.count += n - if self.count > (self.start_count_index * self.N): - self.sum += val * n - self.avg = self.sum / (self.count - self.start_count_index * self.N) - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - - -class ProgressMeter(object): - - def __init__(self, num_batches, meters, prefix=""): - self.batch_fmtstr = self._get_batch_fmtstr(num_batches) - self.meters = meters - self.prefix = prefix - - def display(self, batch): - entries = [self.prefix + self.batch_fmtstr.format(batch)] - entries += [str(meter) for meter in self.meters] - print('\t'.join(entries)) - - def _get_batch_fmtstr(self, num_batches): - num_digits = len(str(num_batches // 1)) - fmt = '{:' + str(num_digits) + 'd}' - return '[' + fmt + '/' + fmt.format(num_batches) + ']' - - -def adjust_learning_rate(optimizer, epoch, args): - """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" - # lr = args.lr * (0.1 ** (epoch // (args.epochs//3 - 3))) - - if args.warm_up_epochs > 0 and epoch < args.warm_up_epochs: - lr = args.lr * ((epoch + 1) / (args.warm_up_epochs + 1)) - else: - alpha = 0 - cosine_decay = 0.5 * ( - 1 + np.cos(np.pi * (epoch - args.warm_up_epochs) / (args.epochs - args.warm_up_epochs))) - decayed = (1 - alpha) * cosine_decay + alpha - lr = args.lr * decayed - - print("=> Epoch[%d] Setting lr: %.4f" % (epoch, lr)) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - - -if __name__ == '__main__': - main() +# -*- coding: utf-8 -*- +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import warnings +import argparse +import os +import random +import shutil +import time +import warnings +import torch +import numpy as np +import apex +from apex import amp +import torch.nn as nn +import torch.nn.parallel +import torch.npu +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.optim +import torch.multiprocessing as mp +import torch.utils.data +import torch.utils.data.distributed +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import models.resnet_0_6_0 as resnet_0_6_0 + +warnings.filterwarnings('ignore') +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument('data', metavar='DIR', + help='path to dataset') +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('--epochs', default=90, type=int, metavar='N', + help='number of total epochs to run') +parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('-b', '--batch-size', default=256, type=int, + metavar='N', + help='mini-batch size (default: 256), this is the total ' + 'batch size of all GPUs on the current node when ' + 'using Data Parallel or Distributed Data Parallel') +parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, + metavar='LR', help='initial learning rate', dest='lr') +parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') +parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') +parser.add_argument('-p', '--print-freq', default=10, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--resume', default=None, type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', + help='evaluate model on validation set') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--world-size', default=-1, type=int, + help='number of nodes for distributed training') +parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') +parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') +parser.add_argument('--seed', default=None, type=int, + help='seed for initializing training. ') +parser.add_argument('--gpu', default=None, type=int, + help='GPU id to use.') +parser.add_argument('--multiprocessing-distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') +## for ascend 910 +parser.add_argument('--device', default='npu', type=str, help='npu or gpu') +parser.add_argument('--addr', default='10.136.181.115', + type=str, help='master addr') +parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', + type=str, help='device id list') +parser.add_argument('--amp', default=False, action='store_true', + help='use amp to train the model') +parser.add_argument('--loss-scale', default=1024., type=float, + help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--opt-level', default='O2', type=str, + help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--prof', default=False, action='store_true', + help='use profiling to evaluate the performance of model') +parser.add_argument('--warm_up_epochs', default=5, type=int, + help='warm up') +best_acc1 = 0 + + +def device_id_to_process_device_map(device_list): + devices = device_list.split(",") + devices = [int(x) for x in devices] + devices.sort() + + process_device_map = dict() + for process_id, device_id in enumerate(devices): + process_device_map[process_id] = device_id + + return process_device_map + + +def main(): + args = parser.parse_args() + + os.environ['MASTER_ADDR'] = args.addr + os.environ['MASTER_PORT'] = '29688' + + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn('You have chosen to seed training. ' + 'This will turn on the CUDNN deterministic setting, ' + 'which can slow down your training considerably! ' + 'You may see unexpected behavior when restarting ' + 'from checkpoints.') + + if args.gpu is not None: + warnings.warn('You have chosen a specific GPU. This will completely ' + 'disable data parallelism.') + + if args.dist_url == "env://" and args.world_size == -1: + args.world_size = int(os.environ["WORLD_SIZE"]) + + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + + args.process_device_map = device_id_to_process_device_map(args.device_list) + + if args.device == 'npu': + ngpus_per_node = len(args.process_device_map) + else: + if args.distributed: + ngpus_per_node = torch.cuda.device_count() + else: + ngpus_per_node = 1 + print('ngpus_per_node:', ngpus_per_node) + if args.multiprocessing_distributed: + # Since we have ngpus_per_node processes per node, the total world_size + # needs to be adjusted accordingly + args.world_size = ngpus_per_node * args.world_size + # Use torch.multiprocessing.spawn to launch distributed processes: the + # main_worker process function + #mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) + main_worker(args.gpu, ngpus_per_node, args) + + else: + # Simply call main_worker function + main_worker(args.gpu, ngpus_per_node, args) + + +def main_worker(gpu, ngpus_per_node, args): + global best_acc1 + args.gpu = args.process_device_map[gpu] + + if args.distributed: + args.gpu = args.rank + if args.dist_url == "env://" and args.rank == -1: + args.rank = int(os.environ["RANK"]) + if args.multiprocessing_distributed: + # For multiprocessing distributed training, rank needs to be the + # global rank among all the processes + args.rank = args.rank * ngpus_per_node + gpu + + if args.device == 'npu': + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = '29688' + loc = 'npu:{}'.format(args.gpu) + torch.npu.set_device(loc) + dist.init_process_group(backend=args.dist_backend, # init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + else: + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + # create model + if args.pretrained: + print("=> using pre-trained model resnext101_32x8d") + model = resnet_0_6_0.resnext101_32x8d(pretrained=False, progress=True) + print("Load my train models...") + pretrained_dict = \ + torch.load("checkpoint.pth.tar", map_location="cpu")["state_dict"] + model.load_state_dict({k.replace('module.', ''): v for k, v in pretrained_dict.items()}, strict=False) + if "fc.weight" in pretrained_dict: + pretrained_dict.pop('fc.weight') + pretrained_dict.pop('fc.bias') + if "module.fc.weight" in pretrained_dict: + pretrained_dict.pop('module.fc.weight') + pretrained_dict.pop('module.fc.bias') + for param in model.parameters(): + param.requires_grad = False + model.fc = nn.Linear(2048, 1000) + model.load_state_dict(pretrained_dict, strict=False) + else: + print("=> creating model resnext101_32x8d") + model = resnet_0_6_0.resnext101_32x8d() + + if args.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + if args.gpu is not None: + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + model = model.to(loc) + else: + torch.cuda.set_device(args.gpu) + model.cuda(args.gpu) + + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs we have + args.batch_size = int(args.batch_size / args.world_size) + args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) + else: + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + model = model.to(loc) + else: + model.cuda() + # DistributedDataParallel will divide and allocate batch_size to all + # available GPUs if device_ids are not set + print("[gpu id:", args.gpu, "]", + "============================test args.gpu is not None else==========================") + elif args.gpu is not None: + print("[gpu id:", args.gpu, "]", + "============================test elif args.gpu is not None:==========================") + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + torch.npu.set_device(args.gpu) + model = model.to(loc) + else: + torch.cuda.set_device(args.gpu) + model = model.cuda(args.gpu) + + else: + # DataParallel will divide and allocate batch_size to all available GPUs + print("[gpu id:", args.gpu, "]", "============================test 1==========================") + print("[gpu id:", args.gpu, "]", "============================test 3==========================") + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + else: + print("before : model = torch.nn.DataParallel(model).cuda()") + + # define loss function (criterion) and optimizer + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay) + + + if args.amp: + model, optimizer = amp.initialize( + model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale) + + if args.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + if args.gpu is not None: + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs we have + if args.pretrained: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False, + find_unused_parameters=True) + else: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False) + else: + print("[gpu id:", args.gpu, "]", + "============================test args.gpu is not None else==========================") + model = torch.nn.parallel.DistributedDataParallel(model) + elif args.gpu is not None: + print("[gpu id:", args.gpu, "]", + "============================test elif args.gpu is not None:==========================") + else: + # DataParallel will divide and allocate batch_size to all available GPUs + print("[gpu id:", args.gpu, "]", "============================test 1==========================") + print("[gpu id:", args.gpu, "]", "============================test 3==========================") + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + model = torch.nn.DataParallel(model).to(loc) + else: + model = torch.nn.DataParallel(model).cuda() + print(torch.npu.synchronize(), "-- A 2.0 ------") + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + criterion = nn.CrossEntropyLoss().to(loc) + else: + criterion = nn.CrossEntropyLoss().cuda(args.gpu) + + # optionally resume from a checkpoint + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + if args.gpu is None: + checkpoint = torch.load(args.resume) + else: + # Map model to be loaded to specified single gpu. + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + else: + loc = 'cuda:{}'.format(args.gpu) + checkpoint = torch.load(args.resume, map_location=loc) + args.start_epoch = checkpoint['epoch'] + best_acc1 = checkpoint['best_acc1'] + if args.gpu is not None: + # best_acc1 may be from a checkpoint from a different GPU + best_acc1 = best_acc1.to(args.gpu) + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + if args.amp: + amp.load_state_dict(checkpoint['amp']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(args.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + + cudnn.benchmark = True + + # Data loading code + traindir = os.path.join(args.data, 'train') + valdir = os.path.join(args.data, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset) + else: + train_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, shuffle=( + train_sampler is None), + num_workers=args.workers, pin_memory=False, sampler=train_sampler, drop_last=True) + + val_loader = torch.utils.data.DataLoader( + datasets.ImageFolder(valdir, transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ])), + batch_size=args.batch_size, shuffle=True, + num_workers=args.workers, pin_memory=False, drop_last=True) + + if args.evaluate: + validate(val_loader, model, criterion, args, ngpus_per_node) + return + + if args.prof: + profiling(train_loader, model, criterion, optimizer, args) + return + + start_time = time.time() + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + + adjust_learning_rate(optimizer, epoch, args) + + # train for one epoch + train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node) + + # evaluate on validation set + acc1 = validate(val_loader, model, criterion, args, ngpus_per_node) + + # remember best acc@1 and save checkpoint + is_best = acc1 > best_acc1 + best_acc1 = max(acc1, best_acc1) + if args.device == 'npu' and args.gpu == 0 and epoch == 89: + print("Complete 90 epoch training, take time:{}h".format(round((time.time() - start_time) / 3600.0, 2))) + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + + ############## npu modify begin ############# + if args.amp: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': 'resnext101_32x8d', + 'state_dict': model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer': optimizer.state_dict(), + 'amp': amp.state_dict(), + }, is_best) + else: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': 'resnext101_32x8d', + 'state_dict': model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer': optimizer.state_dict(), + }, is_best) + ############## npu modify end ############# + + +def profiling(data_loader, model, criterion, optimizer, args): + # switch to train mode + model.train() + + def update(model, images, target, optimizer): + output = model(images) + loss = criterion(output, target) + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.zero_grad() + optimizer.step() + + for step, (images, target) in enumerate(data_loader): + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + images = images.to(loc, non_blocking=True).to(torch.float) + target = target.to(torch.int32).to(loc, non_blocking=True) + else: + images = images.cuda(args.gpu, non_blocking=True) + target = target.cuda(args.gpu, non_blocking=True) + + if step < 5: + update(model, images, target, optimizer) + else: + if args.device == 'npu': + with torch.autograd.profiler.profile(use_npu=True) as prof: + update(model, images, target, optimizer) + else: + with torch.autograd.profiler.profile(use_cuda=True) as prof: + update(model, images, target, optimizer) + break + + prof.export_chrome_trace("output.prof") + + +def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node): + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(train_loader), + [batch_time, data_time, losses, top1, top5], + prefix="Epoch: [{}]".format(epoch)) + + # switch to train mode + model.train() + + end = time.time() + for i, (images, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + images = images.to(loc, non_blocking=True).to(torch.float) + target = target.to(torch.int32).to(loc, non_blocking=True) + else: + images = images.cuda(args.gpu, non_blocking=True) + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # compute gradient and do SGD step + optimizer.zero_grad() + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + if args.device == 'npu': + torch.npu.synchronize() + + # measure elapsed time + cost_time = time.time() - end + batch_time.update(cost_time) + end = time.time() + + if i % args.print_freq == 0: + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + progress.display(i) + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + print("[npu id:", args.gpu, "]", "batch_size:", args.world_size * args.batch_size, + 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( + args.batch_size * args.world_size / batch_time.avg)) + + +def validate(val_loader, model, criterion, args, ngpus_per_node): + batch_time = AverageMeter('Time', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(val_loader), + [batch_time, losses, top1, top5], + prefix='Test: ') + + # switch to evaluate mode + model.eval() + + with torch.no_grad(): + end = time.time() + for i, (images, target) in enumerate(val_loader): + if args.gpu is not None: + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + images = images.to(loc).to(torch.float) + else: + images = images.cuda(args.gpu, non_blocking=True) + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + target = target.to(torch.int32).to(loc, non_blocking=True) + else: + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # measure elapsed time + cost_time = time.time() - end + batch_time.update(cost_time) + end = time.time() + + if i % args.print_freq == 0: + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + progress.display(i) + + if i % args.print_freq == 0: + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + print("[gpu id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' + .format(top1=top1, top5=top5)) + + return top1.avg + + +def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): + torch.save(state, filename) + if is_best: + shutil.copyfile(filename, 'model_best.pth.tar') + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f', start_count_index=2): + self.name = name + self.fmt = fmt + self.reset() + self.start_count_index = start_count_index + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + if self.count == 0: + self.N = n + + self.val = val + self.count += n + if self.count > (self.start_count_index * self.N): + self.sum += val * n + self.avg = self.sum / (self.count - self.start_count_index * self.N) + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + + +class ProgressMeter(object): + + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + + +def adjust_learning_rate(optimizer, epoch, args): + """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" + # lr = args.lr * (0.1 ** (epoch // (args.epochs//3 - 3))) + + if args.warm_up_epochs > 0 and epoch < args.warm_up_epochs: + lr = args.lr * ((epoch + 1) / (args.warm_up_epochs + 1)) + else: + alpha = 0 + cosine_decay = 0.5 * ( + 1 + np.cos(np.pi * (epoch - args.warm_up_epochs) / (args.epochs - args.warm_up_epochs))) + decayed = (1 - alpha) * cosine_decay + alpha + lr = args.lr * decayed + + print("=> Epoch[%d] Setting lr: %.4f" % (epoch, lr)) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +if __name__ == '__main__': + main() diff --git a/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/modelzoo_level.txt b/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/pthtar2onnx.py b/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/pthtar2onnx.py index dca390378c2c425a65447c2e03add404adf5b733..df34ce630fca0072666293e61af49417c45a2c7d 100644 --- a/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/pthtar2onnx.py +++ b/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/pthtar2onnx.py @@ -1,49 +1,49 @@ -""" -Copyright 2020 Huawei Technologies Co., Ltd -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import torch -import torchvision -import models.resnet_0_6_0 as resnet_0_6_0 -import torch.onnx -from collections import OrderedDict - - -def proc_node_module(checkpoint, AttrName): - new_state_dict = OrderedDict() - for k, v in checkpoint[AttrName].items(): - if(k[0:7] == "module."): - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - - -def convert(): - checkpoint = torch.load("./checkpoint.pth.tar", map_location='cpu') - checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') - model = resnet_0_6_0.resnext101_32x8d() - model.load_state_dict(checkpoint['state_dict']) - model.eval() - print(model) - - input_names = ["actual_input_1"] - output_names = ["output1"] - dummy_input = torch.randn(16, 3, 224, 224) - torch.onnx.export(model, dummy_input, "resnext101_32x8d_npu_16.onnx" - , input_names=input_names, output_names=output_names - , opset_version=11) - - -if __name__ == "__main__": - convert() +""" +Copyright 2020 Huawei Technologies Co., Ltd +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import torch +import torchvision +import models.resnet_0_6_0 as resnet_0_6_0 +import torch.onnx +from collections import OrderedDict + + +def proc_node_module(checkpoint, AttrName): + new_state_dict = OrderedDict() + for k, v in checkpoint[AttrName].items(): + if(k[0:7] == "module."): + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + + +def convert(): + checkpoint = torch.load("./checkpoint.pth.tar", map_location='cpu') + checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') + model = resnet_0_6_0.resnext101_32x8d() + model.load_state_dict(checkpoint['state_dict']) + model.eval() + print(model) + + input_names = ["actual_input_1"] + output_names = ["output1"] + dummy_input = torch.randn(16, 3, 224, 224) + torch.onnx.export(model, dummy_input, "resnext101_32x8d_npu_16.onnx" + , input_names=input_names, output_names=output_names + , opset_version=11) + + +if __name__ == "__main__": + convert() diff --git a/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/requirements.txt b/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/requirements.txt index 7f2fa08f376ac735f3c61579352a67ef3adb404a..fcf09628836e14b8082e329b646e56fadf7a9f81 100644 --- a/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/requirements.txt +++ b/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch/requirements.txt @@ -1,4 +1,4 @@ -#torch==1.5.0 -#apex -torchvision==0.2.0 -onnx +#torch==1.5.0 +#apex +torchvision==0.2.0 +onnx diff --git a/PyTorch/contrib/cv/classification/ResNet101_ID1595_for_PyTorch/modelzoo_level.txt b/PyTorch/contrib/cv/classification/ResNet101_ID1595_for_PyTorch/modelzoo_level.txt index 484664c2399ae4109859a67aba6cb9facff03cf1..55a9add9fa74832ca908108d73946cd76281a9cd 100644 --- a/PyTorch/contrib/cv/classification/ResNet101_ID1595_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/ResNet101_ID1595_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ResNet18_ID1593_for_PyTorch/modelzoo_level.txt b/PyTorch/contrib/cv/classification/ResNet18_ID1593_for_PyTorch/modelzoo_level.txt index 484664c2399ae4109859a67aba6cb9facff03cf1..55a9add9fa74832ca908108d73946cd76281a9cd 100644 --- a/PyTorch/contrib/cv/classification/ResNet18_ID1593_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/ResNet18_ID1593_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ResNet34_ID1594_for_PyTorch/modelzoo_level.txt b/PyTorch/contrib/cv/classification/ResNet34_ID1594_for_PyTorch/modelzoo_level.txt index 484664c2399ae4109859a67aba6cb9facff03cf1..55a9add9fa74832ca908108d73946cd76281a9cd 100644 --- a/PyTorch/contrib/cv/classification/ResNet34_ID1594_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/ResNet34_ID1594_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/SE-ResNet-50/main.py b/PyTorch/contrib/cv/classification/SE-ResNet-50/main.py index 2e56931d84916b1e1bc81d398578628d28802dd1..4d87b412ee1925bf6033acf53a15aa3cd9aa840b 100644 --- a/PyTorch/contrib/cv/classification/SE-ResNet-50/main.py +++ b/PyTorch/contrib/cv/classification/SE-ResNet-50/main.py @@ -1,655 +1,655 @@ -# -*- coding: utf-8 -*- -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import warnings -warnings.filterwarnings('ignore') -import argparse -import os -import random -import shutil -import time -import warnings -import torch -import numpy as np -import apex -from apex import amp -import torch.nn as nn -import torch.nn.parallel -import torch.npu -import torch.backends.cudnn as cudnn -import torch.distributed as dist -import torch.optim -import torch.multiprocessing as mp -import torch.utils.data -import torch.utils.data.distributed -import torchvision.transforms as transforms -import torchvision.datasets as datasets -import senet - -parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') -parser.add_argument('data', metavar='DIR', - help='path to dataset') -parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', - help='number of data loading workers (default: 4)') -parser.add_argument('--epochs', default=90, type=int, metavar='N', - help='number of total epochs to run') -parser.add_argument('--start-epoch', default=0, type=int, metavar='N', - help='manual epoch number (useful on restarts)') -parser.add_argument('-b', '--batch-size', default=256, type=int, - metavar='N', - help='mini-batch size (default: 256), this is the total ' - 'batch size of all GPUs on the current node when ' - 'using Data Parallel or Distributed Data Parallel') -parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, - metavar='LR', help='initial learning rate', dest='lr') -parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') -parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') -parser.add_argument('-p', '--print-freq', default=10, type=int, - metavar='N', help='print frequency (default: 10)') -parser.add_argument('--resume', default='./checkpoint.pth.tar', type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', - help='evaluate model on validation set') -parser.add_argument('--pretrained', dest='pretrained', action='store_true', - help='use pre-trained model') -parser.add_argument('--world-size', default=-1, type=int, - help='number of nodes for distributed training') -parser.add_argument('--rank', default=-1, type=int, - help='node rank for distributed training') -parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, - help='url used to set up distributed training') -parser.add_argument('--dist-backend', default='nccl', type=str, - help='distributed backend') -parser.add_argument('--seed', default=None, type=int, - help='seed for initializing training. ') -parser.add_argument('--gpu', default=None, type=int, - help='GPU id to use.') -parser.add_argument('--multiprocessing-distributed', action='store_true', - help='Use multi-processing distributed training to launch ' - 'N processes per node, which has N GPUs. This is the ' - 'fastest way to use PyTorch for either single node or ' - 'multi node data parallel training') -## for ascend 910 -parser.add_argument('--device', default='npu', type=str, help='npu or gpu') -parser.add_argument('--addr', default='10.136.181.115', - type=str, help='master addr') -parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', - type=str, help='device id list') -parser.add_argument('--amp', default=False, action='store_true', - help='use amp to train the model') -parser.add_argument('--loss-scale', default=1024., type=float, - help='loss scale using in amp, default -1 means dynamic') -parser.add_argument('--opt-level', default='O2', type=str, - help='loss scale using in amp, default -1 means dynamic') -parser.add_argument('--prof', default=False, action='store_true', - help='use profiling to evaluate the performance of model') -parser.add_argument('--warm_up_epochs', default=5, type=int, - help='warm up') -best_acc1 = 0 - - -def device_id_to_process_device_map(device_list): - devices = device_list.split(",") - devices = [int(x) for x in devices] - devices.sort() - - process_device_map = dict() - for process_id, device_id in enumerate(devices): - process_device_map[process_id] = device_id - - return process_device_map - - -def main(): - args = parser.parse_args() - print(args.device_list) - - os.environ['MASTER_ADDR'] = args.addr - os.environ['MASTER_PORT'] = '29688' - - if args.seed is not None: - random.seed(args.seed) - torch.manual_seed(args.seed) - cudnn.deterministic = True - warnings.warn('You have chosen to seed training. ' - 'This will turn on the CUDNN deterministic setting, ' - 'which can slow down your training considerably! ' - 'You may see unexpected behavior when restarting ' - 'from checkpoints.') - - if args.gpu is not None: - warnings.warn('You have chosen a specific GPU. This will completely ' - 'disable data parallelism.') - - if args.dist_url == "env://" and args.world_size == -1: - args.world_size = int(os.environ["WORLD_SIZE"]) - - args.distributed = args.world_size > 1 or args.multiprocessing_distributed - - args.process_device_map = device_id_to_process_device_map(args.device_list) - - if args.device == 'npu': - ngpus_per_node = len(args.process_device_map) - else: - if args.distributed: - ngpus_per_node = torch.cuda.device_count() - else: - ngpus_per_node = 1 - print('ngpus_per_node:', ngpus_per_node) - if args.multiprocessing_distributed: - # Since we have ngpus_per_node processes per node, the total world_size - # needs to be adjusted accordingly - args.world_size = ngpus_per_node * args.world_size - # Use torch.multiprocessing.spawn to launch distributed processes: the - # main_worker process function - mp.spawn(main_worker, nprocs=ngpus_per_node, - args=(ngpus_per_node, args)) - else: - # Simply call main_worker function - main_worker(args.gpu, ngpus_per_node, args) - - -def main_worker(gpu, ngpus_per_node, args): - global best_acc1 - args.gpu = args.process_device_map[gpu] - - if args.gpu is not None: - print("Use GPU: {} for training".format(args.gpu)) - - if args.distributed: - if args.dist_url == "env://" and args.rank == -1: - args.rank = int(os.environ["RANK"]) - if args.multiprocessing_distributed: - # For multiprocessing distributed training, rank needs to be the - # global rank among all the processes - args.rank = args.rank * ngpus_per_node + gpu - - if args.device == 'npu': - dist.init_process_group(backend=args.dist_backend, # init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - else: - dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - # create model - if args.pretrained: - print("=> using pre-trained model se_resnet50") - model = senet.se_resnet50(pretrained=True) - else: - print("=> creating model se_resnet50") - model = senet.se_resnet50() - - if args.distributed: - # For multiprocessing distributed, DistributedDataParallel constructor - # should always set the single device scope, otherwise, - # DistributedDataParallel will use all available devices. - if args.gpu is not None: - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - torch.npu.set_device(loc) - model = model.to(loc) - else: - torch.cuda.set_device(args.gpu) - model.cuda(args.gpu) - - # When using a single GPU per process and per - # DistributedDataParallel, we need to divide the batch size - # ourselves based on the total number of GPUs we have - args.batch_size = int(args.batch_size / args.world_size) - args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) - else: - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - model = model.to(loc) - else: - model.cuda() - # DistributedDataParallel will divide and allocate batch_size to all - # available GPUs if device_ids are not set - print("[gpu id:", args.gpu, "]", - "============================test args.gpu is not None else==========================") - elif args.gpu is not None: - print("[gpu id:", args.gpu, "]", - "============================test elif args.gpu is not None:==========================") - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - torch.npu.set_device(args.gpu) - model = model.to(loc) - else: - torch.cuda.set_device(args.gpu) - model = model.cuda(args.gpu) - - else: - # DataParallel will divide and allocate batch_size to all available GPUs - print("[gpu id:", args.gpu, "]", "============================test 1==========================") - print("[gpu id:", args.gpu, "]", "============================test 3==========================") - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - else: - print("before : model = torch.nn.DataParallel(model).cuda()") - - # define loss function (criterion) and optimizer - optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), args.lr, - momentum=args.momentum, - weight_decay=args.weight_decay) - - if args.amp: - model, optimizer = amp.initialize( - model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale) - - if args.distributed: - # For multiprocessing distributed, DistributedDataParallel constructor - # should always set the single device scope, otherwise, - # DistributedDataParallel will use all available devices. - if args.gpu is not None: - # When using a single GPU per process and per - # DistributedDataParallel, we need to divide the batch size - # ourselves based on the total number of GPUs we have - if args.pretrained: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False, - find_unused_parameters=True) - else: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False) - else: - print("[gpu id:", args.gpu, "]", - "============================test args.gpu is not None else==========================") - model = torch.nn.parallel.DistributedDataParallel(model) - elif args.gpu is not None: - print("[gpu id:", args.gpu, "]", - "============================test elif args.gpu is not None:==========================") - else: - # DataParallel will divide and allocate batch_size to all available GPUs - print("[gpu id:", args.gpu, "]", "============================test 1==========================") - print("[gpu id:", args.gpu, "]", "============================test 3==========================") - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - model = torch.nn.DataParallel(model).to(loc) - else: - model = torch.nn.DataParallel(model).cuda() - - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - criterion = nn.CrossEntropyLoss().to(loc) - else: - criterion = nn.CrossEntropyLoss().cuda(args.gpu) - - # optionally resume from a checkpoint - if args.resume: - if os.path.isfile(args.resume): - print("=> loading checkpoint '{}'".format(args.resume)) - if args.gpu is None: - checkpoint = torch.load(args.resume) - else: - # Map model to be loaded to specified single gpu. - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - else: - loc = 'cuda:{}'.format(args.gpu) - checkpoint = torch.load(args.resume, map_location=loc) - args.start_epoch = checkpoint['epoch'] - best_acc1 = checkpoint['best_acc1'] - if args.gpu is not None: - # best_acc1 may be from a checkpoint from a different GPU - best_acc1 = best_acc1.to(args.gpu) - model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - if args.amp: - amp.load_state_dict(checkpoint['amp']) - print("=> loaded checkpoint '{}' (epoch {})" - .format(args.resume, checkpoint['epoch'])) - else: - print("=> no checkpoint found at '{}'".format(args.resume)) - - cudnn.benchmark = True - - # Data loading code - traindir = os.path.join(args.data, 'train') - valdir = os.path.join(args.data, 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset) - else: - train_sampler = None - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, shuffle=( - train_sampler is None), - num_workers=args.workers, pin_memory=False, sampler=train_sampler, drop_last=True) - - val_loader = torch.utils.data.DataLoader( - datasets.ImageFolder(valdir, transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ])), - batch_size=args.batch_size, shuffle=True, - num_workers=args.workers, pin_memory=False, drop_last=True) - - if args.evaluate: - validate(val_loader, model, criterion, args, ngpus_per_node) - return - - if args.prof: - profiling(train_loader, model, criterion, optimizer, args) - return - - start_time = time.time() - for epoch in range(args.start_epoch, args.epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - - adjust_learning_rate(optimizer, epoch, args) - - # train for one epoch - train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node) - - # evaluate on validation set - acc1 = validate(val_loader, model, criterion, args, ngpus_per_node) - - # remember best acc@1 and save checkpoint - is_best = acc1 > best_acc1 - best_acc1 = max(acc1, best_acc1) - if args.device == 'npu' and args.gpu == 0 and epoch == 89: - print("Complete 90 epoch training, take time:{}h".format(round((time.time() - start_time) / 3600.0, 2))) - - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - - ############## npu modify begin ############# - if args.amp: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': 'resnext101_32x8d', - 'state_dict': model.state_dict(), - 'best_acc1': best_acc1, - 'optimizer': optimizer.state_dict(), - 'amp': amp.state_dict(), - }, is_best) - else: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': 'resnext101_32x8d', - 'state_dict': model.state_dict(), - 'best_acc1': best_acc1, - 'optimizer': optimizer.state_dict(), - }, is_best) - ############## npu modify end ############# - - -def profiling(data_loader, model, criterion, optimizer, args): - # switch to train mode - model.train() - - def update(model, images, target, optimizer): - output = model(images) - loss = criterion(output, target) - if args.amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.zero_grad() - optimizer.step() - - for step, (images, target) in enumerate(data_loader): - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - images = images.to(loc, non_blocking=True).to(torch.float) - target = target.to(torch.int32).to(loc, non_blocking=True) - else: - images = images.cuda(args.gpu, non_blocking=True) - target = target.cuda(args.gpu, non_blocking=True) - - if step < 5: - update(model, images, target, optimizer) - else: - if args.device == 'npu': - with torch.autograd.profiler.profile(use_npu=True) as prof: - update(model, images, target, optimizer) - else: - with torch.autograd.profiler.profile(use_cuda=True) as prof: - update(model, images, target, optimizer) - break - - prof.export_chrome_trace("output.prof") - - -def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node): - batch_time = AverageMeter('Time', ':6.3f') - data_time = AverageMeter('Data', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(train_loader), - [batch_time, data_time, losses, top1, top5], - prefix="Epoch: [{}]".format(epoch)) - - # switch to train mode - model.train() - - end = time.time() - for i, (images, target) in enumerate(train_loader): - # measure data loading time - data_time.update(time.time() - end) - - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - images = images.to(loc, non_blocking=True).to(torch.float) - target = target.to(torch.int32).to(loc, non_blocking=True) - else: - images = images.cuda(args.gpu, non_blocking=True) - target = target.cuda(args.gpu, non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # compute gradient and do SGD step - optimizer.zero_grad() - if args.amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - if args.device == 'npu': - torch.npu.synchronize() - - # measure elapsed time - cost_time = time.time() - end - batch_time.update(cost_time) - end = time.time() - - if i % args.print_freq == 0: - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - progress.display(i) - - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - print("[npu id:", args.gpu, "]", "batch_size:", args.world_size * args.batch_size, - 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( - args.batch_size * args.world_size / batch_time.avg)) - - -def validate(val_loader, model, criterion, args, ngpus_per_node): - batch_time = AverageMeter('Time', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(val_loader), - [batch_time, losses, top1, top5], - prefix='Test: ') - - # switch to evaluate mode - model.eval() - - with torch.no_grad(): - end = time.time() - for i, (images, target) in enumerate(val_loader): - if args.gpu is not None: - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - images = images.to(loc).to(torch.float) - else: - images = images.cuda(args.gpu, non_blocking=True) - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - target = target.to(torch.int32).to(loc, non_blocking=True) - else: - target = target.cuda(args.gpu, non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # measure elapsed time - cost_time = time.time() - end - batch_time.update(cost_time) - end = time.time() - - if i % args.print_freq == 0: - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - progress.display(i) - - if i % args.print_freq == 0: - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - print("[gpu id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' - .format(top1=top1, top5=top5)) - - return top1.avg - - -def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): - torch.save(state, filename) - if is_best: - shutil.copyfile(filename, 'model_best.pth.tar') - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self, name, fmt=':f', start_count_index=2): - self.name = name - self.fmt = fmt - self.reset() - self.start_count_index = start_count_index - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - if self.count == 0: - self.N = n - - self.val = val - self.count += n - if self.count > (self.start_count_index * self.N): - self.sum += val * n - self.avg = self.sum / (self.count - self.start_count_index * self.N) - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - - -class ProgressMeter(object): - - def __init__(self, num_batches, meters, prefix=""): - self.batch_fmtstr = self._get_batch_fmtstr(num_batches) - self.meters = meters - self.prefix = prefix - - def display(self, batch): - entries = [self.prefix + self.batch_fmtstr.format(batch)] - entries += [str(meter) for meter in self.meters] - print('\t'.join(entries)) - - def _get_batch_fmtstr(self, num_batches): - num_digits = len(str(num_batches // 1)) - fmt = '{:' + str(num_digits) + 'd}' - return '[' + fmt + '/' + fmt.format(num_batches) + ']' - - -def adjust_learning_rate(optimizer, epoch, args): - """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" - # lr = args.lr * (0.1 ** (epoch // (args.epochs//3 - 3))) - - if args.warm_up_epochs > 0 and epoch < args.warm_up_epochs: - lr = args.lr * ((epoch + 1) / (args.warm_up_epochs + 1)) - else: - alpha = 0 - cosine_decay = 0.5 * ( - 1 + np.cos(np.pi * (epoch - args.warm_up_epochs) / (args.epochs - args.warm_up_epochs))) - decayed = (1 - alpha) * cosine_decay + alpha - lr = args.lr * decayed - - print("=> Epoch[%d] Setting lr: %.4f" % (epoch, lr)) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - - -if __name__ == '__main__': - main() +# -*- coding: utf-8 -*- +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import warnings +warnings.filterwarnings('ignore') +import argparse +import os +import random +import shutil +import time +import warnings +import torch +import numpy as np +import apex +from apex import amp +import torch.nn as nn +import torch.nn.parallel +import torch.npu +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.optim +import torch.multiprocessing as mp +import torch.utils.data +import torch.utils.data.distributed +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import senet + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument('data', metavar='DIR', + help='path to dataset') +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('--epochs', default=90, type=int, metavar='N', + help='number of total epochs to run') +parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('-b', '--batch-size', default=256, type=int, + metavar='N', + help='mini-batch size (default: 256), this is the total ' + 'batch size of all GPUs on the current node when ' + 'using Data Parallel or Distributed Data Parallel') +parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, + metavar='LR', help='initial learning rate', dest='lr') +parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') +parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') +parser.add_argument('-p', '--print-freq', default=10, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--resume', default='./checkpoint.pth.tar', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', + help='evaluate model on validation set') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--world-size', default=-1, type=int, + help='number of nodes for distributed training') +parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') +parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') +parser.add_argument('--seed', default=None, type=int, + help='seed for initializing training. ') +parser.add_argument('--gpu', default=None, type=int, + help='GPU id to use.') +parser.add_argument('--multiprocessing-distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') +## for ascend 910 +parser.add_argument('--device', default='npu', type=str, help='npu or gpu') +parser.add_argument('--addr', default='10.136.181.115', + type=str, help='master addr') +parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', + type=str, help='device id list') +parser.add_argument('--amp', default=False, action='store_true', + help='use amp to train the model') +parser.add_argument('--loss-scale', default=1024., type=float, + help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--opt-level', default='O2', type=str, + help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--prof', default=False, action='store_true', + help='use profiling to evaluate the performance of model') +parser.add_argument('--warm_up_epochs', default=5, type=int, + help='warm up') +best_acc1 = 0 + + +def device_id_to_process_device_map(device_list): + devices = device_list.split(",") + devices = [int(x) for x in devices] + devices.sort() + + process_device_map = dict() + for process_id, device_id in enumerate(devices): + process_device_map[process_id] = device_id + + return process_device_map + + +def main(): + args = parser.parse_args() + print(args.device_list) + + os.environ['MASTER_ADDR'] = args.addr + os.environ['MASTER_PORT'] = '29688' + + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn('You have chosen to seed training. ' + 'This will turn on the CUDNN deterministic setting, ' + 'which can slow down your training considerably! ' + 'You may see unexpected behavior when restarting ' + 'from checkpoints.') + + if args.gpu is not None: + warnings.warn('You have chosen a specific GPU. This will completely ' + 'disable data parallelism.') + + if args.dist_url == "env://" and args.world_size == -1: + args.world_size = int(os.environ["WORLD_SIZE"]) + + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + + args.process_device_map = device_id_to_process_device_map(args.device_list) + + if args.device == 'npu': + ngpus_per_node = len(args.process_device_map) + else: + if args.distributed: + ngpus_per_node = torch.cuda.device_count() + else: + ngpus_per_node = 1 + print('ngpus_per_node:', ngpus_per_node) + if args.multiprocessing_distributed: + # Since we have ngpus_per_node processes per node, the total world_size + # needs to be adjusted accordingly + args.world_size = ngpus_per_node * args.world_size + # Use torch.multiprocessing.spawn to launch distributed processes: the + # main_worker process function + mp.spawn(main_worker, nprocs=ngpus_per_node, + args=(ngpus_per_node, args)) + else: + # Simply call main_worker function + main_worker(args.gpu, ngpus_per_node, args) + + +def main_worker(gpu, ngpus_per_node, args): + global best_acc1 + args.gpu = args.process_device_map[gpu] + + if args.gpu is not None: + print("Use GPU: {} for training".format(args.gpu)) + + if args.distributed: + if args.dist_url == "env://" and args.rank == -1: + args.rank = int(os.environ["RANK"]) + if args.multiprocessing_distributed: + # For multiprocessing distributed training, rank needs to be the + # global rank among all the processes + args.rank = args.rank * ngpus_per_node + gpu + + if args.device == 'npu': + dist.init_process_group(backend=args.dist_backend, # init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + else: + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + # create model + if args.pretrained: + print("=> using pre-trained model se_resnet50") + model = senet.se_resnet50(pretrained=True) + else: + print("=> creating model se_resnet50") + model = senet.se_resnet50() + + if args.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + if args.gpu is not None: + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + torch.npu.set_device(loc) + model = model.to(loc) + else: + torch.cuda.set_device(args.gpu) + model.cuda(args.gpu) + + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs we have + args.batch_size = int(args.batch_size / args.world_size) + args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) + else: + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + model = model.to(loc) + else: + model.cuda() + # DistributedDataParallel will divide and allocate batch_size to all + # available GPUs if device_ids are not set + print("[gpu id:", args.gpu, "]", + "============================test args.gpu is not None else==========================") + elif args.gpu is not None: + print("[gpu id:", args.gpu, "]", + "============================test elif args.gpu is not None:==========================") + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + torch.npu.set_device(args.gpu) + model = model.to(loc) + else: + torch.cuda.set_device(args.gpu) + model = model.cuda(args.gpu) + + else: + # DataParallel will divide and allocate batch_size to all available GPUs + print("[gpu id:", args.gpu, "]", "============================test 1==========================") + print("[gpu id:", args.gpu, "]", "============================test 3==========================") + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + else: + print("before : model = torch.nn.DataParallel(model).cuda()") + + # define loss function (criterion) and optimizer + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay) + + if args.amp: + model, optimizer = amp.initialize( + model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale) + + if args.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + if args.gpu is not None: + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs we have + if args.pretrained: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False, + find_unused_parameters=True) + else: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False) + else: + print("[gpu id:", args.gpu, "]", + "============================test args.gpu is not None else==========================") + model = torch.nn.parallel.DistributedDataParallel(model) + elif args.gpu is not None: + print("[gpu id:", args.gpu, "]", + "============================test elif args.gpu is not None:==========================") + else: + # DataParallel will divide and allocate batch_size to all available GPUs + print("[gpu id:", args.gpu, "]", "============================test 1==========================") + print("[gpu id:", args.gpu, "]", "============================test 3==========================") + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + model = torch.nn.DataParallel(model).to(loc) + else: + model = torch.nn.DataParallel(model).cuda() + + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + criterion = nn.CrossEntropyLoss().to(loc) + else: + criterion = nn.CrossEntropyLoss().cuda(args.gpu) + + # optionally resume from a checkpoint + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + if args.gpu is None: + checkpoint = torch.load(args.resume) + else: + # Map model to be loaded to specified single gpu. + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + else: + loc = 'cuda:{}'.format(args.gpu) + checkpoint = torch.load(args.resume, map_location=loc) + args.start_epoch = checkpoint['epoch'] + best_acc1 = checkpoint['best_acc1'] + if args.gpu is not None: + # best_acc1 may be from a checkpoint from a different GPU + best_acc1 = best_acc1.to(args.gpu) + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + if args.amp: + amp.load_state_dict(checkpoint['amp']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(args.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + + cudnn.benchmark = True + + # Data loading code + traindir = os.path.join(args.data, 'train') + valdir = os.path.join(args.data, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset) + else: + train_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, shuffle=( + train_sampler is None), + num_workers=args.workers, pin_memory=False, sampler=train_sampler, drop_last=True) + + val_loader = torch.utils.data.DataLoader( + datasets.ImageFolder(valdir, transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ])), + batch_size=args.batch_size, shuffle=True, + num_workers=args.workers, pin_memory=False, drop_last=True) + + if args.evaluate: + validate(val_loader, model, criterion, args, ngpus_per_node) + return + + if args.prof: + profiling(train_loader, model, criterion, optimizer, args) + return + + start_time = time.time() + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + + adjust_learning_rate(optimizer, epoch, args) + + # train for one epoch + train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node) + + # evaluate on validation set + acc1 = validate(val_loader, model, criterion, args, ngpus_per_node) + + # remember best acc@1 and save checkpoint + is_best = acc1 > best_acc1 + best_acc1 = max(acc1, best_acc1) + if args.device == 'npu' and args.gpu == 0 and epoch == 89: + print("Complete 90 epoch training, take time:{}h".format(round((time.time() - start_time) / 3600.0, 2))) + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + + ############## npu modify begin ############# + if args.amp: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': 'resnext101_32x8d', + 'state_dict': model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer': optimizer.state_dict(), + 'amp': amp.state_dict(), + }, is_best) + else: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': 'resnext101_32x8d', + 'state_dict': model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer': optimizer.state_dict(), + }, is_best) + ############## npu modify end ############# + + +def profiling(data_loader, model, criterion, optimizer, args): + # switch to train mode + model.train() + + def update(model, images, target, optimizer): + output = model(images) + loss = criterion(output, target) + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.zero_grad() + optimizer.step() + + for step, (images, target) in enumerate(data_loader): + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + images = images.to(loc, non_blocking=True).to(torch.float) + target = target.to(torch.int32).to(loc, non_blocking=True) + else: + images = images.cuda(args.gpu, non_blocking=True) + target = target.cuda(args.gpu, non_blocking=True) + + if step < 5: + update(model, images, target, optimizer) + else: + if args.device == 'npu': + with torch.autograd.profiler.profile(use_npu=True) as prof: + update(model, images, target, optimizer) + else: + with torch.autograd.profiler.profile(use_cuda=True) as prof: + update(model, images, target, optimizer) + break + + prof.export_chrome_trace("output.prof") + + +def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node): + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(train_loader), + [batch_time, data_time, losses, top1, top5], + prefix="Epoch: [{}]".format(epoch)) + + # switch to train mode + model.train() + + end = time.time() + for i, (images, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + images = images.to(loc, non_blocking=True).to(torch.float) + target = target.to(torch.int32).to(loc, non_blocking=True) + else: + images = images.cuda(args.gpu, non_blocking=True) + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # compute gradient and do SGD step + optimizer.zero_grad() + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + if args.device == 'npu': + torch.npu.synchronize() + + # measure elapsed time + cost_time = time.time() - end + batch_time.update(cost_time) + end = time.time() + + if i % args.print_freq == 0: + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + progress.display(i) + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + print("[npu id:", args.gpu, "]", "batch_size:", args.world_size * args.batch_size, + 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( + args.batch_size * args.world_size / batch_time.avg)) + + +def validate(val_loader, model, criterion, args, ngpus_per_node): + batch_time = AverageMeter('Time', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(val_loader), + [batch_time, losses, top1, top5], + prefix='Test: ') + + # switch to evaluate mode + model.eval() + + with torch.no_grad(): + end = time.time() + for i, (images, target) in enumerate(val_loader): + if args.gpu is not None: + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + images = images.to(loc).to(torch.float) + else: + images = images.cuda(args.gpu, non_blocking=True) + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + target = target.to(torch.int32).to(loc, non_blocking=True) + else: + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # measure elapsed time + cost_time = time.time() - end + batch_time.update(cost_time) + end = time.time() + + if i % args.print_freq == 0: + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + progress.display(i) + + if i % args.print_freq == 0: + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + print("[gpu id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' + .format(top1=top1, top5=top5)) + + return top1.avg + + +def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): + torch.save(state, filename) + if is_best: + shutil.copyfile(filename, 'model_best.pth.tar') + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f', start_count_index=2): + self.name = name + self.fmt = fmt + self.reset() + self.start_count_index = start_count_index + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + if self.count == 0: + self.N = n + + self.val = val + self.count += n + if self.count > (self.start_count_index * self.N): + self.sum += val * n + self.avg = self.sum / (self.count - self.start_count_index * self.N) + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + + +class ProgressMeter(object): + + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + + +def adjust_learning_rate(optimizer, epoch, args): + """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" + # lr = args.lr * (0.1 ** (epoch // (args.epochs//3 - 3))) + + if args.warm_up_epochs > 0 and epoch < args.warm_up_epochs: + lr = args.lr * ((epoch + 1) / (args.warm_up_epochs + 1)) + else: + alpha = 0 + cosine_decay = 0.5 * ( + 1 + np.cos(np.pi * (epoch - args.warm_up_epochs) / (args.epochs - args.warm_up_epochs))) + decayed = (1 - alpha) * cosine_decay + alpha + lr = args.lr * decayed + + print("=> Epoch[%d] Setting lr: %.4f" % (epoch, lr)) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +if __name__ == '__main__': + main() diff --git a/PyTorch/contrib/cv/classification/SE-ResNet-50/se_module.py b/PyTorch/contrib/cv/classification/SE-ResNet-50/se_module.py index b1638c0c67d9c175732a6551c5e494d46e70a08c..8b066534e58d3cc802450d608b8dcdd80ab0b1d1 100644 --- a/PyTorch/contrib/cv/classification/SE-ResNet-50/se_module.py +++ b/PyTorch/contrib/cv/classification/SE-ResNet-50/se_module.py @@ -1,34 +1,34 @@ -# -*- coding: utf-8 -*- -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -from torch import nn - - -class SELayer(nn.Module): - def __init__(self, channel, reduction=16): - super(SELayer, self).__init__() - self.avg_pool = nn.AdaptiveAvgPool2d(1) - self.fc = nn.Sequential( - nn.Linear(channel, channel // reduction, bias=False), - nn.ReLU(inplace=True), - nn.Linear(channel // reduction, channel, bias=False), - nn.Sigmoid() - ) - - def forward(self, x): - b, c, _, _ = x.size() - y = self.avg_pool(x).view(b, c) - y = self.fc(y).view(b, c, 1, 1) +# -*- coding: utf-8 -*- +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from torch import nn + + +class SELayer(nn.Module): + def __init__(self, channel, reduction=16): + super(SELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction, bias=False), + nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel, bias=False), + nn.Sigmoid() + ) + + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) return x * y.expand_as(x) \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/SE-ResNet-50/senet.py b/PyTorch/contrib/cv/classification/SE-ResNet-50/senet.py index 01f3c76f3ac0279b047c21240480e73c07bbfaeb..6dc5047cfce1c64e94e0cc9b1ec26fa0d6b4b4a8 100644 --- a/PyTorch/contrib/cv/classification/SE-ResNet-50/senet.py +++ b/PyTorch/contrib/cv/classification/SE-ResNet-50/senet.py @@ -1,300 +1,300 @@ -# -*- coding: utf-8 -*- -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch.nn as nn -from torch.hub import load_state_dict_from_url -from torchvision.models import ResNet -from se_module import SELayer - - -def conv3x3(in_planes, out_planes, stride=1): - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) - - -class SEBasicBlock(nn.Module): - expansion = 1 - - def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, - base_width=64, dilation=1, norm_layer=None, - *, reduction=16): - super(SEBasicBlock, self).__init__() - self.conv1 = conv3x3(inplanes, planes, stride) - self.bn1 = nn.BatchNorm2d(planes) - self.relu = nn.ReLU(inplace=True) - self.conv2 = conv3x3(planes, planes, 1) - self.bn2 = nn.BatchNorm2d(planes) - self.se = SELayer(planes, reduction) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.se(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class SEBottleneck(nn.Module): - expansion = 4 - - def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, - base_width=64, dilation=1, norm_layer=None, - *, reduction=16): - super(SEBottleneck, self).__init__() - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, - padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(planes * 4) - self.relu = nn.ReLU(inplace=True) - self.se = SELayer(planes * 4, reduction) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - out = self.se(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -def se_resnet18(num_classes=1_000): - """Constructs a ResNet-18 model. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(SEBasicBlock, [2, 2, 2, 2], num_classes=num_classes) - model.avgpool = nn.AdaptiveAvgPool2d(1) - return model - - -def se_resnet34(num_classes=1_000): - """Constructs a ResNet-34 model. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(SEBasicBlock, [3, 4, 6, 3], num_classes=num_classes) - model.avgpool = nn.AdaptiveAvgPool2d(1) - return model - - -def se_resnet50(num_classes=1_000, pretrained=False): - """Constructs a ResNet-50 model. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(SEBottleneck, [3, 4, 6, 3], num_classes=num_classes) - model.avgpool = nn.AdaptiveAvgPool2d(1) - if pretrained: - model.load_state_dict(load_state_dict_from_url( - "https://github.com/moskomule/senet.pytorch/releases/download/archive/seresnet50-60a8950a85b2b.pkl")) - return model - - -def se_resnet101(num_classes=1_000): - """Constructs a ResNet-101 model. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(SEBottleneck, [3, 4, 23, 3], num_classes=num_classes) - model.avgpool = nn.AdaptiveAvgPool2d(1) - return model - - -def se_resnet152(num_classes=1_000): - """Constructs a ResNet-152 model. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(SEBottleneck, [3, 8, 36, 3], num_classes=num_classes) - model.avgpool = nn.AdaptiveAvgPool2d(1) - return model - - -class CifarSEBasicBlock(nn.Module): - def __init__(self, inplanes, planes, stride=1, reduction=16): - super(CifarSEBasicBlock, self).__init__() - self.conv1 = conv3x3(inplanes, planes, stride) - self.bn1 = nn.BatchNorm2d(planes) - self.relu = nn.ReLU(inplace=True) - self.conv2 = conv3x3(planes, planes) - self.bn2 = nn.BatchNorm2d(planes) - self.se = SELayer(planes, reduction) - if inplanes != planes: - self.downsample = nn.Sequential(nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(planes)) - else: - self.downsample = lambda x: x - self.stride = stride - - def forward(self, x): - residual = self.downsample(x) - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.se(out) - - out += residual - out = self.relu(out) - - return out - - -class CifarSEResNet(nn.Module): - def __init__(self, block, n_size, num_classes=10, reduction=16): - super(CifarSEResNet, self).__init__() - self.inplane = 16 - self.conv1 = nn.Conv2d( - 3, self.inplane, kernel_size=3, stride=1, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(self.inplane) - self.relu = nn.ReLU(inplace=True) - self.layer1 = self._make_layer( - block, 16, blocks=n_size, stride=1, reduction=reduction) - self.layer2 = self._make_layer( - block, 32, blocks=n_size, stride=2, reduction=reduction) - self.layer3 = self._make_layer( - block, 64, blocks=n_size, stride=2, reduction=reduction) - self.avgpool = nn.AdaptiveAvgPool2d(1) - self.fc = nn.Linear(64, num_classes) - self.initialize() - - def initialize(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight) - elif isinstance(m, nn.BatchNorm2d): - nn.init.constant_(m.weight, 1) - nn.init.constant_(m.bias, 0) - - def _make_layer(self, block, planes, blocks, stride, reduction): - strides = [stride] + [1] * (blocks - 1) - layers = [] - for stride in strides: - layers.append(block(self.inplane, planes, stride, reduction)) - self.inplane = planes - - return nn.Sequential(*layers) - - def forward(self, x): - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - - x = self.avgpool(x) - x = x.view(x.size(0), -1) - x = self.fc(x) - - return x - - -class CifarSEPreActResNet(CifarSEResNet): - def __init__(self, block, n_size, num_classes=10, reduction=16): - super(CifarSEPreActResNet, self).__init__( - block, n_size, num_classes, reduction) - self.bn1 = nn.BatchNorm2d(self.inplane) - self.initialize() - - def forward(self, x): - x = self.conv1(x) - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - - x = self.bn1(x) - x = self.relu(x) - - x = self.avgpool(x) - x = x.view(x.size(0), -1) - x = self.fc(x) - - -def se_resnet20(**kwargs): - """Constructs a ResNet-18 model. - """ - model = CifarSEResNet(CifarSEBasicBlock, 3, **kwargs) - return model - - -def se_resnet32(**kwargs): - """Constructs a ResNet-34 model. - """ - model = CifarSEResNet(CifarSEBasicBlock, 5, **kwargs) - return model - - -def se_resnet56(**kwargs): - """Constructs a ResNet-34 model. - """ - model = CifarSEResNet(CifarSEBasicBlock, 9, **kwargs) - return model - - -def se_preactresnet20(**kwargs): - """Constructs a ResNet-18 model. - """ - model = CifarSEPreActResNet(CifarSEBasicBlock, 3, **kwargs) - return model - - -def se_preactresnet32(**kwargs): - """Constructs a ResNet-34 model. - """ - model = CifarSEPreActResNet(CifarSEBasicBlock, 5, **kwargs) - return model - - -def se_preactresnet56(**kwargs): - """Constructs a ResNet-34 model. - """ - model = CifarSEPreActResNet(CifarSEBasicBlock, 9, **kwargs) +# -*- coding: utf-8 -*- +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch.nn as nn +from torch.hub import load_state_dict_from_url +from torchvision.models import ResNet +from se_module import SELayer + + +def conv3x3(in_planes, out_planes, stride=1): + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) + + +class SEBasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None, + *, reduction=16): + super(SEBasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes, 1) + self.bn2 = nn.BatchNorm2d(planes) + self.se = SELayer(planes, reduction) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.se(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class SEBottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None, + *, reduction=16): + super(SEBottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.se = SELayer(planes * 4, reduction) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + out = self.se(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +def se_resnet18(num_classes=1_000): + """Constructs a ResNet-18 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(SEBasicBlock, [2, 2, 2, 2], num_classes=num_classes) + model.avgpool = nn.AdaptiveAvgPool2d(1) + return model + + +def se_resnet34(num_classes=1_000): + """Constructs a ResNet-34 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(SEBasicBlock, [3, 4, 6, 3], num_classes=num_classes) + model.avgpool = nn.AdaptiveAvgPool2d(1) + return model + + +def se_resnet50(num_classes=1_000, pretrained=False): + """Constructs a ResNet-50 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(SEBottleneck, [3, 4, 6, 3], num_classes=num_classes) + model.avgpool = nn.AdaptiveAvgPool2d(1) + if pretrained: + model.load_state_dict(load_state_dict_from_url( + "https://github.com/moskomule/senet.pytorch/releases/download/archive/seresnet50-60a8950a85b2b.pkl")) + return model + + +def se_resnet101(num_classes=1_000): + """Constructs a ResNet-101 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(SEBottleneck, [3, 4, 23, 3], num_classes=num_classes) + model.avgpool = nn.AdaptiveAvgPool2d(1) + return model + + +def se_resnet152(num_classes=1_000): + """Constructs a ResNet-152 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(SEBottleneck, [3, 8, 36, 3], num_classes=num_classes) + model.avgpool = nn.AdaptiveAvgPool2d(1) + return model + + +class CifarSEBasicBlock(nn.Module): + def __init__(self, inplanes, planes, stride=1, reduction=16): + super(CifarSEBasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.se = SELayer(planes, reduction) + if inplanes != planes: + self.downsample = nn.Sequential(nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes)) + else: + self.downsample = lambda x: x + self.stride = stride + + def forward(self, x): + residual = self.downsample(x) + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.se(out) + + out += residual + out = self.relu(out) + + return out + + +class CifarSEResNet(nn.Module): + def __init__(self, block, n_size, num_classes=10, reduction=16): + super(CifarSEResNet, self).__init__() + self.inplane = 16 + self.conv1 = nn.Conv2d( + 3, self.inplane, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(self.inplane) + self.relu = nn.ReLU(inplace=True) + self.layer1 = self._make_layer( + block, 16, blocks=n_size, stride=1, reduction=reduction) + self.layer2 = self._make_layer( + block, 32, blocks=n_size, stride=2, reduction=reduction) + self.layer3 = self._make_layer( + block, 64, blocks=n_size, stride=2, reduction=reduction) + self.avgpool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Linear(64, num_classes) + self.initialize() + + def initialize(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _make_layer(self, block, planes, blocks, stride, reduction): + strides = [stride] + [1] * (blocks - 1) + layers = [] + for stride in strides: + layers.append(block(self.inplane, planes, stride, reduction)) + self.inplane = planes + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x + + +class CifarSEPreActResNet(CifarSEResNet): + def __init__(self, block, n_size, num_classes=10, reduction=16): + super(CifarSEPreActResNet, self).__init__( + block, n_size, num_classes, reduction) + self.bn1 = nn.BatchNorm2d(self.inplane) + self.initialize() + + def forward(self, x): + x = self.conv1(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + + x = self.bn1(x) + x = self.relu(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + +def se_resnet20(**kwargs): + """Constructs a ResNet-18 model. + """ + model = CifarSEResNet(CifarSEBasicBlock, 3, **kwargs) + return model + + +def se_resnet32(**kwargs): + """Constructs a ResNet-34 model. + """ + model = CifarSEResNet(CifarSEBasicBlock, 5, **kwargs) + return model + + +def se_resnet56(**kwargs): + """Constructs a ResNet-34 model. + """ + model = CifarSEResNet(CifarSEBasicBlock, 9, **kwargs) + return model + + +def se_preactresnet20(**kwargs): + """Constructs a ResNet-18 model. + """ + model = CifarSEPreActResNet(CifarSEBasicBlock, 3, **kwargs) + return model + + +def se_preactresnet32(**kwargs): + """Constructs a ResNet-34 model. + """ + model = CifarSEPreActResNet(CifarSEBasicBlock, 5, **kwargs) + return model + + +def se_preactresnet56(**kwargs): + """Constructs a ResNet-34 model. + """ + model = CifarSEPreActResNet(CifarSEBasicBlock, 9, **kwargs) return model \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/SENet154/README.md b/PyTorch/contrib/cv/classification/SENet154/README.md old mode 100755 new mode 100644 index 3a719047ecd6d3461515e8e96844517619330e8f..3b0c8b7ca85cf10b84440bf4e2e25a020ec39387 --- a/PyTorch/contrib/cv/classification/SENet154/README.md +++ b/PyTorch/contrib/cv/classification/SENet154/README.md @@ -1,71 +1,71 @@ -# SENet154 - -This implements training of SENet154 on the ImageNet dataset. - -Code of SENet is mainly migrated and adjusted from [GitHub](https://github.com/Cadene/pretrained-models.pytorch#senet). - -## SENet154 Detail - -SENet involves group convolution, which may cause error on NPU platforms where group convolution is not well-supported. - -Label smoothing is required for qualified model accuracy. - -## Requirements -- pytorch_ascend, apex_ascend -- munch package, which can be installed via `pip install munch` -- Download the ImageNet dataset from http://www.image-net.org/ - - Then, and move validation images to labeled subfolders, using [the following shell script](https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh) - -## Training - -### 单卡训练流程 -1. 安装环境 -2. 修改参数 - 1. `--data DIR`:ImageNet数据集的存储目录,训练集与验证集分别位于DIR/train和DIR/val - 1. `--log-file FILENAME`:自定义日志文件名 - 2. `--device DEVICE`:所使用的单卡训练设备,如cuda:0或npu:0 - 3. `--opt-level L`:apex混合精度优化等级,支持O2(默认)或O1 - 4. `--loss-scale S`:apex混合精度使用的loss scale,默认为128 - 5. `--scheduler`:训练使用的学习率调整器,支持`step`(对应StepLR)和`cosine`(对应CosineAnnealingLR) -3. 开始训练 - ``` - bash ./test/train_full_1p.sh --data_path=数据集路径 # 精度训练 - bash ./test/train_performance_1p.sh --data_path=数据集路径 # 性能训练 - ``` - -### 多卡训练流程 -1. 安装环境 -2. 修改参数 - 1. `--device DEVICE`:所使用的多卡训练设备类别,支持cuda和npu - 2. `--distributed`:开启分布式训练模式 - 3. `--num-devices N`:参与训练的设备个数,设备ID依次为DEVICE:0 ... DEVICE:(N-1) - 4. `--batch-size N`:分配个每个设备的batch大小 -3. 开始训练 - ``` - bash ./test/train_full_1p.sh --data_path=数据集路径 # 精度训练 - bash ./test/train_performance_1p.sh --data_path=数据集路径 # 性能训练 - ``` - -### 训练结果 -日志保存在 ./test/output/device-id 路径下 - -最终训练模型输出至./model.pth,训练过程中生成的存档点位于./models文件夹下 - -Profile结果输出至./output.prof - -## SENet154 Training Result -$E$为当前一轮的Epoch序号,从0开始 - -### GPU 8p -|Epochs|Learning rate |Optimization type|FPS |Acc@1 |Acc@5 | -|:----:|:------------------------------------:|:---------------:|:-----:|:----:|:----:| -|120 |$0.6\times 0.1^{\lfloor E/30 \rfloor}$|O2 |955.433|79.130|94.058| -|120 |$1\times 0.45^{\lfloor E/10 \rfloor}$ |O2 |954.725|78.341|93.945| -|120 |$0.6\times 0.93^{E}$ |O2 |949.309|78.100|94.010| -|120 |$0.3\times (1+\cos{\frac{E\pi}{120}})$|O2 |951.374|80.161|94.879| - -### NPU 8p -|Epochs|Learning rate |Optimization type|FPS |Acc@1 |Acc@5 | -|:----:|:------------------------------------:|:---------------:|:------:|:----:|:----:| -|120 |$0.6\times 0.1^{\lfloor E/30 \rfloor}$|O2 |1524.537|78.599|93.849| +# SENet154 + +This implements training of SENet154 on the ImageNet dataset. + +Code of SENet is mainly migrated and adjusted from [GitHub](https://github.com/Cadene/pretrained-models.pytorch#senet). + +## SENet154 Detail + +SENet involves group convolution, which may cause error on NPU platforms where group convolution is not well-supported. + +Label smoothing is required for qualified model accuracy. + +## Requirements +- pytorch_ascend, apex_ascend +- munch package, which can be installed via `pip install munch` +- Download the ImageNet dataset from http://www.image-net.org/ + - Then, and move validation images to labeled subfolders, using [the following shell script](https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh) + +## Training + +### 单卡训练流程 +1. 安装环境 +2. 修改参数 + 1. `--data DIR`:ImageNet数据集的存储目录,训练集与验证集分别位于DIR/train和DIR/val + 1. `--log-file FILENAME`:自定义日志文件名 + 2. `--device DEVICE`:所使用的单卡训练设备,如cuda:0或npu:0 + 3. `--opt-level L`:apex混合精度优化等级,支持O2(默认)或O1 + 4. `--loss-scale S`:apex混合精度使用的loss scale,默认为128 + 5. `--scheduler`:训练使用的学习率调整器,支持`step`(对应StepLR)和`cosine`(对应CosineAnnealingLR) +3. 开始训练 + ``` + bash ./test/train_full_1p.sh --data_path=数据集路径 # 精度训练 + bash ./test/train_performance_1p.sh --data_path=数据集路径 # 性能训练 + ``` + +### 多卡训练流程 +1. 安装环境 +2. 修改参数 + 1. `--device DEVICE`:所使用的多卡训练设备类别,支持cuda和npu + 2. `--distributed`:开启分布式训练模式 + 3. `--num-devices N`:参与训练的设备个数,设备ID依次为DEVICE:0 ... DEVICE:(N-1) + 4. `--batch-size N`:分配个每个设备的batch大小 +3. 开始训练 + ``` + bash ./test/train_full_1p.sh --data_path=数据集路径 # 精度训练 + bash ./test/train_performance_1p.sh --data_path=数据集路径 # 性能训练 + ``` + +### 训练结果 +日志保存在 ./test/output/device-id 路径下 + +最终训练模型输出至./model.pth,训练过程中生成的存档点位于./models文件夹下 + +Profile结果输出至./output.prof + +## SENet154 Training Result +$E$为当前一轮的Epoch序号,从0开始 + +### GPU 8p +|Epochs|Learning rate |Optimization type|FPS |Acc@1 |Acc@5 | +|:----:|:------------------------------------:|:---------------:|:-----:|:----:|:----:| +|120 |$0.6\times 0.1^{\lfloor E/30 \rfloor}$|O2 |955.433|79.130|94.058| +|120 |$1\times 0.45^{\lfloor E/10 \rfloor}$ |O2 |954.725|78.341|93.945| +|120 |$0.6\times 0.93^{E}$ |O2 |949.309|78.100|94.010| +|120 |$0.3\times (1+\cos{\frac{E\pi}{120}})$|O2 |951.374|80.161|94.879| + +### NPU 8p +|Epochs|Learning rate |Optimization type|FPS |Acc@1 |Acc@5 | +|:----:|:------------------------------------:|:---------------:|:------:|:----:|:----:| +|120 |$0.6\times 0.1^{\lfloor E/30 \rfloor}$|O2 |1524.537|78.599|93.849| |120 |$0.3\times (1+\cos{\frac{E\pi}{120}})$|O2 |1522.120|80.048|94.799| \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/SENet154/checkpoint.py b/PyTorch/contrib/cv/classification/SENet154/checkpoint.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/data.py b/PyTorch/contrib/cv/classification/SENet154/data.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/demo.py b/PyTorch/contrib/cv/classification/SENet154/demo.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/lsr.py b/PyTorch/contrib/cv/classification/SENet154/lsr.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/modelzoo_level.txt b/PyTorch/contrib/cv/classification/SENet154/modelzoo_level.txt old mode 100755 new mode 100644 index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 --- a/PyTorch/contrib/cv/classification/SENet154/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/SENet154/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/SENet154/pth2onnx.py b/PyTorch/contrib/cv/classification/SENet154/pth2onnx.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/senet.py b/PyTorch/contrib/cv/classification/SENet154/senet.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/test/env_npu.sh b/PyTorch/contrib/cv/classification/SENet154/test/env_npu.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/test/run_2onnx.sh b/PyTorch/contrib/cv/classification/SENet154/test/run_2onnx.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/test/run_demo.sh b/PyTorch/contrib/cv/classification/SENet154/test/run_demo.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/test/train_eval_8p.sh b/PyTorch/contrib/cv/classification/SENet154/test/train_eval_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/SENet154/test/train_full_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/SENet154/test/train_full_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/SENet154/test/train_performance_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/SENet154/test/train_performance_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/train.py b/PyTorch/contrib/cv/classification/SENet154/train.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SENet154/utils.py b/PyTorch/contrib/cv/classification/SENet154/utils.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/Dockerfile b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/Dockerfile old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/LICENSE b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/LICENSE old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/README.md b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/README.md old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/demo.py b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/demo.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/docker_start.sh b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/docker_start.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/main.py b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/main.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/modelzoo_level.txt b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/modelzoo_level.txt old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/pthtar2onnx.py b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/pthtar2onnx.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/requirements.txt b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/requirements.txt old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/scripts/eval.sh b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/scripts/eval.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/scripts/npu_set_env.sh b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/scripts/npu_set_env.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/scripts/train_1p.sh b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/scripts/train_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/scripts/train_8p.sh b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/scripts/train_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/senet.py b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/senet.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/test/env_npu.sh b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/test/env_npu.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/test/train_eval_8p.sh b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/test/train_eval_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/test/train_full_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/test/train_full_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/test/train_performance_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d/test/train_performance_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/ShuffleNetV1_ID1625_for_PyTorch/Dockerfile b/PyTorch/contrib/cv/classification/ShuffleNetV1_ID1625_for_PyTorch/Dockerfile index 7e712fe1a166790798f57a2f2762c47394beb625..30a31af55804dd79571d2a36e6107a844cb7e549 100644 --- a/PyTorch/contrib/cv/classification/ShuffleNetV1_ID1625_for_PyTorch/Dockerfile +++ b/PyTorch/contrib/cv/classification/ShuffleNetV1_ID1625_for_PyTorch/Dockerfile @@ -1,5 +1,5 @@ -ARG FROM_IMAGE_NAME -FROM $FROM_IMAGE_NAME - -COPY requirements.txt . +ARG FROM_IMAGE_NAME +FROM $FROM_IMAGE_NAME + +COPY requirements.txt . RUN pip3.7 install -r requirements.txt \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ShuffleNetV1_ID1625_for_PyTorch/docker_start.sh b/PyTorch/contrib/cv/classification/ShuffleNetV1_ID1625_for_PyTorch/docker_start.sh index 46ce9a02ec0532d6db324beaee7f7eab501b4565..944bca3cdac8e3f2d47ceb0e2b6eb181a405de11 100644 --- a/PyTorch/contrib/cv/classification/ShuffleNetV1_ID1625_for_PyTorch/docker_start.sh +++ b/PyTorch/contrib/cv/classification/ShuffleNetV1_ID1625_for_PyTorch/docker_start.sh @@ -1,25 +1,25 @@ -#!/bin/bash - -docker_image=$1 -data_dir=$2 -model_dir=$3 - -docker run -it --ipc=host \ - --device=/dev/davinci0 \ - --device=/dev/davinci1 \ - --device=/dev/davinci2 \ - --device=/dev/davinci3 \ - --device=/dev/davinci4 \ - --device=/dev/davinci5 \ - --device=/dev/davinci6 \ - --device=/dev/davinci7 \ - --device=/dev/davinci_manager \ - --device=/dev/devmm_svm --device=/dev/hisi_hdc \ - -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ - -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ - -v ${model_dir}:${model_dir} \ - -v ${data_dir}:${data_dir} \ - -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \ - -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \ - -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \ +#!/bin/bash + +docker_image=$1 +data_dir=$2 +model_dir=$3 + +docker run -it --ipc=host \ + --device=/dev/davinci0 \ + --device=/dev/davinci1 \ + --device=/dev/davinci2 \ + --device=/dev/davinci3 \ + --device=/dev/davinci4 \ + --device=/dev/davinci5 \ + --device=/dev/davinci6 \ + --device=/dev/davinci7 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ + -v ${model_dir}:${model_dir} \ + -v ${data_dir}:${data_dir} \ + -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \ + -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \ + -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \ /bin/bash \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ShuffleNetV1_ID1625_for_PyTorch/modelzoo_level.txt b/PyTorch/contrib/cv/classification/ShuffleNetV1_ID1625_for_PyTorch/modelzoo_level.txt index 5afcef9188bf9d39f1e34b45bd91324c6093137a..3117fffc3be7f5c479f10f09ba38a25c47739a00 100644 --- a/PyTorch/contrib/cv/classification/ShuffleNetV1_ID1625_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/ShuffleNetV1_ID1625_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ShuffleNetV2Plus_ID1626_for_PyTorch/README.md b/PyTorch/contrib/cv/classification/ShuffleNetV2Plus_ID1626_for_PyTorch/README.md index 8e6c6f3a8684d96a14ae038746a5f9c6eb8fdd0f..f9b639886e4534148bc8225ccc412a0c2d1ca9ee 100644 --- a/PyTorch/contrib/cv/classification/ShuffleNetV2Plus_ID1626_for_PyTorch/README.md +++ b/PyTorch/contrib/cv/classification/ShuffleNetV2Plus_ID1626_for_PyTorch/README.md @@ -1,56 +1,56 @@ -# ShuffleNetV2Plus (size=Small) - -## ImageNet training with PyTorch - -This implements training of ShuffleNetV1 on the ImageNet dataset, mainly modified from [Github](https://github.com/pytorch/examples/tree/master/imagenet). - -## ShuffleNetV2Plus Detail - -Base version of the model from [the paper author's code on Github](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2%2B). -The training script is adapted from [the ShuffleNetV2 script on Gitee](https://gitee.com/ascend/modelzoo/tree/master/built-in/PyTorch/Official/cv/image_classification/Shufflenetv2_for_PyTorch). - -## Requirements - -- pytorch_ascend, apex_ascend, tochvision -- Download the ImageNet dataset from http://www.image-net.org/ - - Then, and move validation images to labeled subfolders, using [the following shell script](https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh). - -## Training -一、训练流程: - -单卡训练流程: - - 1.安装环境 - 2.修改参数device_id(单卡训练所使用的device id),为训练配置device_id,比如device_id=0 - 3.开始训练 - bash ./test/train_full_1p.sh --data_path=数据集路径 # 精度训练 - bash ./test/train_performance_1p.sh --data_path=数据集路径 # 性能训练 - - -多卡训练流程 - - 1.安装环境 - 2.修改参数device_id_list(多卡训练所使用的device id列表),为训练配置device_id,例如device_id=0,1,2,3,4,5,6,7 - 3.执行train_full_8p.sh开始训练 - bash ./test/train_full_8p.sh --data_path=数据集路径 # 精度训练 - bash ./test/train_performance_8p.sh --data_path=数据集路径 # 性能训练 - -二、测试结果 - -训练日志路径:网络脚本test下output文件夹内。例如: - - test/output/devie_id/train_${device_id}.log # 训练脚本原生日志 - test/output/devie_id/ShuffleNetV1_bs8192_8p_perf.log # 8p性能训练结果日志 - test/output/devie_id/ShuffleNetV1_bs8192_8p_acc.log # 8p精度训练结果日志 - -训练模型:训练生成的模型默认会写入到和test文件同一目录下。当训练正常结束时,checkpoint.pth.tar为最终结果。 - - - -## ShufflenetV2Plus training result - -| Acc@1 | FPS | Npu_nums| Epochs | Type | -| :------: | :------: | :------ | :------: | :------: | -| 73.132 | 6306 | 8 | 360 | O2 | - +# ShuffleNetV2Plus (size=Small) + +## ImageNet training with PyTorch + +This implements training of ShuffleNetV1 on the ImageNet dataset, mainly modified from [Github](https://github.com/pytorch/examples/tree/master/imagenet). + +## ShuffleNetV2Plus Detail + +Base version of the model from [the paper author's code on Github](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2%2B). +The training script is adapted from [the ShuffleNetV2 script on Gitee](https://gitee.com/ascend/modelzoo/tree/master/built-in/PyTorch/Official/cv/image_classification/Shufflenetv2_for_PyTorch). + +## Requirements + +- pytorch_ascend, apex_ascend, tochvision +- Download the ImageNet dataset from http://www.image-net.org/ + - Then, and move validation images to labeled subfolders, using [the following shell script](https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh). + +## Training +一、训练流程: + +单卡训练流程: + + 1.安装环境 + 2.修改参数device_id(单卡训练所使用的device id),为训练配置device_id,比如device_id=0 + 3.开始训练 + bash ./test/train_full_1p.sh --data_path=数据集路径 # 精度训练 + bash ./test/train_performance_1p.sh --data_path=数据集路径 # 性能训练 + + +多卡训练流程 + + 1.安装环境 + 2.修改参数device_id_list(多卡训练所使用的device id列表),为训练配置device_id,例如device_id=0,1,2,3,4,5,6,7 + 3.执行train_full_8p.sh开始训练 + bash ./test/train_full_8p.sh --data_path=数据集路径 # 精度训练 + bash ./test/train_performance_8p.sh --data_path=数据集路径 # 性能训练 + +二、测试结果 + +训练日志路径:网络脚本test下output文件夹内。例如: + + test/output/devie_id/train_${device_id}.log # 训练脚本原生日志 + test/output/devie_id/ShuffleNetV1_bs8192_8p_perf.log # 8p性能训练结果日志 + test/output/devie_id/ShuffleNetV1_bs8192_8p_acc.log # 8p精度训练结果日志 + +训练模型:训练生成的模型默认会写入到和test文件同一目录下。当训练正常结束时,checkpoint.pth.tar为最终结果。 + + + +## ShufflenetV2Plus training result + +| Acc@1 | FPS | Npu_nums| Epochs | Type | +| :------: | :------: | :------ | :------: | :------: | +| 73.132 | 6306 | 8 | 360 | O2 | + 备注:由于模型开发中发现NPU上clamp算子反向错误,以上结果为使用自行编写的clamp函数训练获得。见blocks.py中的注释掉的函数clamp。 \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/ShuffleNetV2Plus_ID1626_for_PyTorch/blocks.py b/PyTorch/contrib/cv/classification/ShuffleNetV2Plus_ID1626_for_PyTorch/blocks.py index 5619d2624d2235e7a5ac3f8401bfccae4cf8c903..044cd05d70d3322580abd7deb46897da17379e48 100644 --- a/PyTorch/contrib/cv/classification/ShuffleNetV2Plus_ID1626_for_PyTorch/blocks.py +++ b/PyTorch/contrib/cv/classification/ShuffleNetV2Plus_ID1626_for_PyTorch/blocks.py @@ -1,249 +1,249 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - - -import torch -import torch.nn as nn - -try: - from .channel_shuffle import ChannelShuffle -except: - from channel_shuffle import ChannelShuffle - -''' -def clamp(x, low=0, high=6): - low_mask = x < low - high_mask = x > high - keep_mask = ~(low_mask|high_mask) - out = x * keep_mask.to(x) + high * high_mask.to(x) + low * low_mask.to(x) - return out -''' - -class SELayer(nn.Module): - - def __init__(self, inplanes, isTensor=True): - super(SELayer, self).__init__() - if isTensor: - # if the input is (N, C, H, W) - self.SE_opr = nn.Sequential( - nn.AdaptiveAvgPool2d(1), - nn.Conv2d(inplanes, inplanes // 4, kernel_size=1, stride=1, bias=False), - nn.BatchNorm2d(inplanes // 4), - nn.ReLU(inplace=True), - nn.Conv2d(inplanes // 4, inplanes, kernel_size=1, stride=1, bias=False), - ) - else: - # if the input is (N, C) - self.SE_opr = nn.Sequential( - nn.AdaptiveAvgPool2d(1), - nn.Linear(inplanes, inplanes // 4, bias=False), - nn.BatchNorm1d(inplanes // 4), - nn.ReLU(inplace=True), - nn.Linear(inplanes // 4, inplanes, bias=False), - ) - - def forward(self, x): - atten = self.SE_opr(x) - #atten = clamp(atten + 3.0, 0.0, 6.0) / 6.0 - atten = torch.clamp(atten + 3, 0, 6) / 6 - return x * atten - - -class HS(nn.Module): - - def __init__(self): - super(HS, self).__init__() - - def forward(self, inputs): - clip = torch.clamp(inputs + 3, 0, 6) / 6 - #clip = clamp(inputs + 3.0, 0.0, 6.0) / 6.0 - return inputs * clip - - - -class Shufflenet(nn.Module): - - def __init__(self, inp, oup, base_mid_channels, *, ksize, stride, activation, useSE): - super(Shufflenet, self).__init__() - self.stride = stride - assert stride in [1, 2] - assert ksize in [3, 5, 7] - assert base_mid_channels == oup//2 - - self.base_mid_channel = base_mid_channels - self.ksize = ksize - pad = ksize // 2 - self.pad = pad - self.inp = inp - - outputs = oup - inp - - branch_main = [ - # pw - nn.Conv2d(inp, base_mid_channels, 1, 1, 0, bias=False), - nn.BatchNorm2d(base_mid_channels), - None, - # dw - nn.Conv2d(base_mid_channels, base_mid_channels, ksize, stride, pad, groups=base_mid_channels, bias=False), - nn.BatchNorm2d(base_mid_channels), - # pw-linear - nn.Conv2d(base_mid_channels, outputs, 1, 1, 0, bias=False), - nn.BatchNorm2d(outputs), - None, - ] - if activation == 'ReLU': - assert useSE == False - '''This model should not have SE with ReLU''' - branch_main[2] = nn.ReLU(inplace=True) - branch_main[-1] = nn.ReLU(inplace=True) - else: - branch_main[2] = HS() - branch_main[-1] = HS() - if useSE: - branch_main.append(SELayer(outputs)) - self.branch_main = nn.Sequential(*branch_main) - - if stride == 2: - branch_proj = [ - # dw - nn.Conv2d(inp, inp, ksize, stride, pad, groups=inp, bias=False), - nn.BatchNorm2d(inp), - # pw-linear - nn.Conv2d(inp, inp, 1, 1, 0, bias=False), - nn.BatchNorm2d(inp), - None, - ] - if activation == 'ReLU': - branch_proj[-1] = nn.ReLU(inplace=True) - else: - branch_proj[-1] = HS() - self.branch_proj = nn.Sequential(*branch_proj) - else: - self.branch_proj = None - - self.channel_shuffle = ChannelShuffle(inp*2) - - def forward(self, old_x): - if self.stride==1: - x_proj, x = self.channel_shuffle(old_x) - return torch.cat((x_proj, self.branch_main(x)), 1) - elif self.stride==2: - x_proj = old_x - x = old_x - - x1 = self.branch_proj(x_proj) - x2 = self.branch_main(x) - - return torch.cat((x1, x2), 1) - -class Shuffle_Xception(nn.Module): - - def __init__(self, inp, oup, base_mid_channels, *, stride, activation, useSE): - super(Shuffle_Xception, self).__init__() - - assert stride in [1, 2] - assert base_mid_channels == oup//2 - - self.base_mid_channel = base_mid_channels - self.stride = stride - self.ksize = 3 - self.pad = 1 - self.inp = inp - outputs = oup - inp - - branch_main = [ - # dw - nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), - nn.BatchNorm2d(inp), - # pw - nn.Conv2d(inp, base_mid_channels, 1, 1, 0, bias=False), - nn.BatchNorm2d(base_mid_channels), - None, - # dw - nn.Conv2d(base_mid_channels, base_mid_channels, 3, stride, 1, groups=base_mid_channels, bias=False), - nn.BatchNorm2d(base_mid_channels), - # pw - nn.Conv2d(base_mid_channels, base_mid_channels, 1, 1, 0, bias=False), - nn.BatchNorm2d(base_mid_channels), - None, - # dw - nn.Conv2d(base_mid_channels, base_mid_channels, 3, stride, 1, groups=base_mid_channels, bias=False), - nn.BatchNorm2d(base_mid_channels), - # pw - nn.Conv2d(base_mid_channels, outputs, 1, 1, 0, bias=False), - nn.BatchNorm2d(outputs), - None, - ] - - if activation == 'ReLU': - branch_main[4] = nn.ReLU(inplace=True) - branch_main[9] = nn.ReLU(inplace=True) - branch_main[14] = nn.ReLU(inplace=True) - else: - branch_main[4] = HS() - branch_main[9] = HS() - branch_main[14] = HS() - assert None not in branch_main - - if useSE: - assert activation != 'ReLU' - branch_main.append(SELayer(outputs)) - - self.branch_main = nn.Sequential(*branch_main) - - if self.stride == 2: - branch_proj = [ - # dw - nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), - nn.BatchNorm2d(inp), - # pw-linear - nn.Conv2d(inp, inp, 1, 1, 0, bias=False), - nn.BatchNorm2d(inp), - None, - ] - if activation == 'ReLU': - branch_proj[-1] = nn.ReLU(inplace=True) - else: - branch_proj[-1] = HS() - self.branch_proj = nn.Sequential(*branch_proj) - - self.channel_shuffle = ChannelShuffle(inp*2) - - def forward(self, old_x): - if self.stride==1: - x_proj, x = self.channel_shuffle(old_x) - return torch.cat((x_proj, self.branch_main(x)), 1) - elif self.stride==2: - x_proj = old_x - x = old_x - return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1) - +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + + +import torch +import torch.nn as nn + +try: + from .channel_shuffle import ChannelShuffle +except: + from channel_shuffle import ChannelShuffle + +''' +def clamp(x, low=0, high=6): + low_mask = x < low + high_mask = x > high + keep_mask = ~(low_mask|high_mask) + out = x * keep_mask.to(x) + high * high_mask.to(x) + low * low_mask.to(x) + return out +''' + +class SELayer(nn.Module): + + def __init__(self, inplanes, isTensor=True): + super(SELayer, self).__init__() + if isTensor: + # if the input is (N, C, H, W) + self.SE_opr = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(inplanes, inplanes // 4, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(inplanes // 4), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes // 4, inplanes, kernel_size=1, stride=1, bias=False), + ) + else: + # if the input is (N, C) + self.SE_opr = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Linear(inplanes, inplanes // 4, bias=False), + nn.BatchNorm1d(inplanes // 4), + nn.ReLU(inplace=True), + nn.Linear(inplanes // 4, inplanes, bias=False), + ) + + def forward(self, x): + atten = self.SE_opr(x) + #atten = clamp(atten + 3.0, 0.0, 6.0) / 6.0 + atten = torch.clamp(atten + 3, 0, 6) / 6 + return x * atten + + +class HS(nn.Module): + + def __init__(self): + super(HS, self).__init__() + + def forward(self, inputs): + clip = torch.clamp(inputs + 3, 0, 6) / 6 + #clip = clamp(inputs + 3.0, 0.0, 6.0) / 6.0 + return inputs * clip + + + +class Shufflenet(nn.Module): + + def __init__(self, inp, oup, base_mid_channels, *, ksize, stride, activation, useSE): + super(Shufflenet, self).__init__() + self.stride = stride + assert stride in [1, 2] + assert ksize in [3, 5, 7] + assert base_mid_channels == oup//2 + + self.base_mid_channel = base_mid_channels + self.ksize = ksize + pad = ksize // 2 + self.pad = pad + self.inp = inp + + outputs = oup - inp + + branch_main = [ + # pw + nn.Conv2d(inp, base_mid_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(base_mid_channels), + None, + # dw + nn.Conv2d(base_mid_channels, base_mid_channels, ksize, stride, pad, groups=base_mid_channels, bias=False), + nn.BatchNorm2d(base_mid_channels), + # pw-linear + nn.Conv2d(base_mid_channels, outputs, 1, 1, 0, bias=False), + nn.BatchNorm2d(outputs), + None, + ] + if activation == 'ReLU': + assert useSE == False + '''This model should not have SE with ReLU''' + branch_main[2] = nn.ReLU(inplace=True) + branch_main[-1] = nn.ReLU(inplace=True) + else: + branch_main[2] = HS() + branch_main[-1] = HS() + if useSE: + branch_main.append(SELayer(outputs)) + self.branch_main = nn.Sequential(*branch_main) + + if stride == 2: + branch_proj = [ + # dw + nn.Conv2d(inp, inp, ksize, stride, pad, groups=inp, bias=False), + nn.BatchNorm2d(inp), + # pw-linear + nn.Conv2d(inp, inp, 1, 1, 0, bias=False), + nn.BatchNorm2d(inp), + None, + ] + if activation == 'ReLU': + branch_proj[-1] = nn.ReLU(inplace=True) + else: + branch_proj[-1] = HS() + self.branch_proj = nn.Sequential(*branch_proj) + else: + self.branch_proj = None + + self.channel_shuffle = ChannelShuffle(inp*2) + + def forward(self, old_x): + if self.stride==1: + x_proj, x = self.channel_shuffle(old_x) + return torch.cat((x_proj, self.branch_main(x)), 1) + elif self.stride==2: + x_proj = old_x + x = old_x + + x1 = self.branch_proj(x_proj) + x2 = self.branch_main(x) + + return torch.cat((x1, x2), 1) + +class Shuffle_Xception(nn.Module): + + def __init__(self, inp, oup, base_mid_channels, *, stride, activation, useSE): + super(Shuffle_Xception, self).__init__() + + assert stride in [1, 2] + assert base_mid_channels == oup//2 + + self.base_mid_channel = base_mid_channels + self.stride = stride + self.ksize = 3 + self.pad = 1 + self.inp = inp + outputs = oup - inp + + branch_main = [ + # dw + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + # pw + nn.Conv2d(inp, base_mid_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(base_mid_channels), + None, + # dw + nn.Conv2d(base_mid_channels, base_mid_channels, 3, stride, 1, groups=base_mid_channels, bias=False), + nn.BatchNorm2d(base_mid_channels), + # pw + nn.Conv2d(base_mid_channels, base_mid_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(base_mid_channels), + None, + # dw + nn.Conv2d(base_mid_channels, base_mid_channels, 3, stride, 1, groups=base_mid_channels, bias=False), + nn.BatchNorm2d(base_mid_channels), + # pw + nn.Conv2d(base_mid_channels, outputs, 1, 1, 0, bias=False), + nn.BatchNorm2d(outputs), + None, + ] + + if activation == 'ReLU': + branch_main[4] = nn.ReLU(inplace=True) + branch_main[9] = nn.ReLU(inplace=True) + branch_main[14] = nn.ReLU(inplace=True) + else: + branch_main[4] = HS() + branch_main[9] = HS() + branch_main[14] = HS() + assert None not in branch_main + + if useSE: + assert activation != 'ReLU' + branch_main.append(SELayer(outputs)) + + self.branch_main = nn.Sequential(*branch_main) + + if self.stride == 2: + branch_proj = [ + # dw + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + # pw-linear + nn.Conv2d(inp, inp, 1, 1, 0, bias=False), + nn.BatchNorm2d(inp), + None, + ] + if activation == 'ReLU': + branch_proj[-1] = nn.ReLU(inplace=True) + else: + branch_proj[-1] = HS() + self.branch_proj = nn.Sequential(*branch_proj) + + self.channel_shuffle = ChannelShuffle(inp*2) + + def forward(self, old_x): + if self.stride==1: + x_proj, x = self.channel_shuffle(old_x) + return torch.cat((x_proj, self.branch_main(x)), 1) + elif self.stride==2: + x_proj = old_x + x = old_x + return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1) + diff --git a/PyTorch/contrib/cv/classification/ShuffleNetV2Plus_ID1626_for_PyTorch/channel_shuffle.py b/PyTorch/contrib/cv/classification/ShuffleNetV2Plus_ID1626_for_PyTorch/channel_shuffle.py index bda735f26f138fc412e020e4e69269cda732aeee..69d46a643aab83720d89c5424b70b50bc22f404e 100644 --- a/PyTorch/contrib/cv/classification/ShuffleNetV2Plus_ID1626_for_PyTorch/channel_shuffle.py +++ b/PyTorch/contrib/cv/classification/ShuffleNetV2Plus_ID1626_for_PyTorch/channel_shuffle.py @@ -1,156 +1,156 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import torch -import torch.nn as nn - - -class ChannelShuffle(nn.Module): - r"""Applies an NPU compatible channel shuffle operation. - - The origin implement is https://github.com/pytorch/vision/blob/master/torchvision/models/shufflenetv2.py#L21 - - In order to avoid contiguous operation which is not efficient on npu, we replaced the original operation - with a rewrite of the same semantics. Two discontinuous operations are replaced, transpose and chunk. - - .. note:: - Only group=2 is implemented, modify other group scenarios yourself. - - Args: - in_channels (int): The total number of channels in the input tensors - groups (int): The number of shuffle groups. Default: 2 - split_shuffle (bool): Whether to execute the chunk after shuffle. Default: True - - Shape: - - Input: :math:`(N, C_{in}, L_{in})`, `(N, C_{in}, L_{in})` - - Output: :math:`(N, C_{out}, L_{out})` - - Examples:: - >>> x1 = torch.randn(2,32,7,7) - >>> x2 = torch.randn(2,32,7,7) - >>> m = ChannelShuffle(64, split_shuffle=True) - >>> output = m(x1, x2) - - """ - - def __init__(self, in_channels, groups=2): - super(ChannelShuffle, self).__init__() - self.group_len = in_channels // groups - - # init out_channels - self.out_channels = np.array(list(range(in_channels))).reshape(groups, self.group_len).transpose(1, 0).flatten() - self.out_channels = torch.from_numpy(self.out_channels).long() - - # init index used in fp & bp - # Only group=2 is implemented, modify other group scenarios yourself. - self.fp_index1 = self.out_channels[:self.group_len] - self.fp_index2 = self.out_channels[self.group_len:] - self.bp_index = torch.tensor(list(range(0, in_channels, 2)) + list(range(1, in_channels, 2))) - - self.checked = False - - def check_self(self, x): - r"""Check device equipment between tensors. - """ - if self.bp_index.device == x.device: - self.checked = True - return - - device = x.device - if str(device).startswith('npu'): - self.fp_index1 = self.fp_index1.int() - self.fp_index2 = self.fp_index2.int() - self.bp_index = self.bp_index.int() - - self.fp_index1 = self.fp_index1.to(device) - self.fp_index2 = self.fp_index2.to(device) - self.bp_index = self.bp_index.to(device) - - - def forward(self, x): - if not self.checked: - self.check_self(x) - if self.training: - return IndexSelectHalfImplementation.apply(x, self.fp_index1, self.fp_index2, self.bp_index) - else: - return IndexSelectHalfImplementationForward(x, self.fp_index1, self.fp_index2, self.bp_index) - -def IndexSelectHalfImplementationForward(x, fp_index1, fp_index2, bp_index): - return x.index_select(1, fp_index1), x.index_select(1, fp_index2) - -class IndexSelectHalfImplementation(torch.autograd.Function): - @staticmethod - def forward(ctx, x, fp_index1, fp_index2, bp_index): - ctx.bp_index = bp_index - return x.index_select(1, fp_index1), x.index_select(1, fp_index2) - - @staticmethod - def backward(ctx, grad_output1, grad_output2): - grad_output = torch.cat([grad_output1, grad_output2], 1) - out = grad_output.index_select(1, ctx.bp_index) - return out, None, None, None, None - -def channel_shuffle_torchvision(x, groups=2): - # type: (torch.Tensor, int) -> torch.Tensor - batchsize, num_channels, height, width = x.data.size() - channels_per_group = num_channels // groups - - # reshape - x = x.view(batchsize, groups, - channels_per_group, height, width) - - x = torch.transpose(x, 1, 2).contiguous() - - # flatten - x = x.view(batchsize, -1, height, width) - - return x.chunk(2, 1) - -def channel_shuffle_megvii(x): - batchsize, num_channels, height, width = x.data.size() - assert (num_channels % 4 == 0) - x = x.reshape(batchsize * num_channels // 2, 2, height * width) - x = x.permute(1, 0, 2) - x = x.reshape(2, -1, num_channels // 2, height, width) - return x[0], x[1] - -if __name__ == '__main__': - device = 'cpu' - - if device.startswith('npu'): - torch.npu.set_device(device) - - channels = 8 - BZ = 2 - H = 1 - W = 1 - - # x = torch.randn(BZ, channels, H, W) - x = torch.arange(BZ*channels*H*W).reshape(BZ, channels, H, W) - print(x) - cs_model = ChannelShuffle(channels) - - x = x.to(device) - cs_model = cs_model.to(device) - - output1 = channel_shuffle_megvii(x) - print(output1[0]) - output2 = channel_shuffle_torchvision(x) - print(output2[0]) - output3 = cs_model(x) - print('output1-output2',sum((i-j).abs().sum() for i, j in zip(output1, output2))) - print('output2-output3',sum((i-j).abs().sum() for i, j in zip(output2, output3))) - print('output1-output3',sum((i-j).abs().sum() for i, j in zip(output1, output3))) - +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import torch +import torch.nn as nn + + +class ChannelShuffle(nn.Module): + r"""Applies an NPU compatible channel shuffle operation. + + The origin implement is https://github.com/pytorch/vision/blob/master/torchvision/models/shufflenetv2.py#L21 + + In order to avoid contiguous operation which is not efficient on npu, we replaced the original operation + with a rewrite of the same semantics. Two discontinuous operations are replaced, transpose and chunk. + + .. note:: + Only group=2 is implemented, modify other group scenarios yourself. + + Args: + in_channels (int): The total number of channels in the input tensors + groups (int): The number of shuffle groups. Default: 2 + split_shuffle (bool): Whether to execute the chunk after shuffle. Default: True + + Shape: + - Input: :math:`(N, C_{in}, L_{in})`, `(N, C_{in}, L_{in})` + - Output: :math:`(N, C_{out}, L_{out})` + + Examples:: + >>> x1 = torch.randn(2,32,7,7) + >>> x2 = torch.randn(2,32,7,7) + >>> m = ChannelShuffle(64, split_shuffle=True) + >>> output = m(x1, x2) + + """ + + def __init__(self, in_channels, groups=2): + super(ChannelShuffle, self).__init__() + self.group_len = in_channels // groups + + # init out_channels + self.out_channels = np.array(list(range(in_channels))).reshape(groups, self.group_len).transpose(1, 0).flatten() + self.out_channels = torch.from_numpy(self.out_channels).long() + + # init index used in fp & bp + # Only group=2 is implemented, modify other group scenarios yourself. + self.fp_index1 = self.out_channels[:self.group_len] + self.fp_index2 = self.out_channels[self.group_len:] + self.bp_index = torch.tensor(list(range(0, in_channels, 2)) + list(range(1, in_channels, 2))) + + self.checked = False + + def check_self(self, x): + r"""Check device equipment between tensors. + """ + if self.bp_index.device == x.device: + self.checked = True + return + + device = x.device + if str(device).startswith('npu'): + self.fp_index1 = self.fp_index1.int() + self.fp_index2 = self.fp_index2.int() + self.bp_index = self.bp_index.int() + + self.fp_index1 = self.fp_index1.to(device) + self.fp_index2 = self.fp_index2.to(device) + self.bp_index = self.bp_index.to(device) + + + def forward(self, x): + if not self.checked: + self.check_self(x) + if self.training: + return IndexSelectHalfImplementation.apply(x, self.fp_index1, self.fp_index2, self.bp_index) + else: + return IndexSelectHalfImplementationForward(x, self.fp_index1, self.fp_index2, self.bp_index) + +def IndexSelectHalfImplementationForward(x, fp_index1, fp_index2, bp_index): + return x.index_select(1, fp_index1), x.index_select(1, fp_index2) + +class IndexSelectHalfImplementation(torch.autograd.Function): + @staticmethod + def forward(ctx, x, fp_index1, fp_index2, bp_index): + ctx.bp_index = bp_index + return x.index_select(1, fp_index1), x.index_select(1, fp_index2) + + @staticmethod + def backward(ctx, grad_output1, grad_output2): + grad_output = torch.cat([grad_output1, grad_output2], 1) + out = grad_output.index_select(1, ctx.bp_index) + return out, None, None, None, None + +def channel_shuffle_torchvision(x, groups=2): + # type: (torch.Tensor, int) -> torch.Tensor + batchsize, num_channels, height, width = x.data.size() + channels_per_group = num_channels // groups + + # reshape + x = x.view(batchsize, groups, + channels_per_group, height, width) + + x = torch.transpose(x, 1, 2).contiguous() + + # flatten + x = x.view(batchsize, -1, height, width) + + return x.chunk(2, 1) + +def channel_shuffle_megvii(x): + batchsize, num_channels, height, width = x.data.size() + assert (num_channels % 4 == 0) + x = x.reshape(batchsize * num_channels // 2, 2, height * width) + x = x.permute(1, 0, 2) + x = x.reshape(2, -1, num_channels // 2, height, width) + return x[0], x[1] + +if __name__ == '__main__': + device = 'cpu' + + if device.startswith('npu'): + torch.npu.set_device(device) + + channels = 8 + BZ = 2 + H = 1 + W = 1 + + # x = torch.randn(BZ, channels, H, W) + x = torch.arange(BZ*channels*H*W).reshape(BZ, channels, H, W) + print(x) + cs_model = ChannelShuffle(channels) + + x = x.to(device) + cs_model = cs_model.to(device) + + output1 = channel_shuffle_megvii(x) + print(output1[0]) + output2 = channel_shuffle_torchvision(x) + print(output2[0]) + output3 = cs_model(x) + print('output1-output2',sum((i-j).abs().sum() for i, j in zip(output1, output2))) + print('output2-output3',sum((i-j).abs().sum() for i, j in zip(output2, output3))) + print('output1-output3',sum((i-j).abs().sum() for i, j in zip(output1, output3))) + diff --git a/PyTorch/contrib/cv/classification/SkresNet50/scripts/eval.sh b/PyTorch/contrib/cv/classification/SkresNet50/scripts/eval.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SkresNet50/scripts/npu_setenv.sh b/PyTorch/contrib/cv/classification/SkresNet50/scripts/npu_setenv.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SkresNet50/scripts/train_1p.sh b/PyTorch/contrib/cv/classification/SkresNet50/scripts/train_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SkresNet50/scripts/train_8p.sh b/PyTorch/contrib/cv/classification/SkresNet50/scripts/train_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SkresNet50/utils/progress/setup.py b/PyTorch/contrib/cv/classification/SkresNet50/utils/progress/setup.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/SkresNet50/utils/progress/test_progress.py b/PyTorch/contrib/cv/classification/SkresNet50/utils/progress/test_progress.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/TNT/npu_fused_adamw.py b/PyTorch/contrib/cv/classification/TNT/npu_fused_adamw.py index 8a8e3f5a8de82a0a7ebfc6bb41393e09ec5b78be..df662e48c114ef6406b9e5fc7bc50d4475deaa96 100644 --- a/PyTorch/contrib/cv/classification/TNT/npu_fused_adamw.py +++ b/PyTorch/contrib/cv/classification/TNT/npu_fused_adamw.py @@ -1,257 +1,257 @@ -# Copyright (c) 2020, Huawei Technologies. -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -from collections import defaultdict - -import torch -from torch.optim.optimizer import Optimizer - -from apex.contrib.combine_tensors import combine_npu - - -class NpuFusedAdamW(Optimizer): - """Implements AdamW algorithm. - - Currently NPU-only. Requires Apex to be installed via - ``pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--npu_float_status" ./``. - - This version of NPU fused AdamW implements 1 fusions. - - * A combine-tensor apply launch that batches the elementwise updates applied to all the model's parameters - into one or a few kernel launches. - - :class:`apex.optimizers.NpuFusedAdamW` may be used as a drop-in replacement for ``torch.optim.AdamW``:: - - opt = apex.optimizers.NpuFusedAdamW(model.parameters(), lr = ....) - ... - opt.step() - - :class:`apex.optimizers.FusedAdamW` should be used with Amp. Currently, if you wish to use :class:`NpuFusedAdamW` - with Amp, only ``opt_level O1 and O2`` can be choosed:: - - opt = apex.optimizers.NpuFusedAdamW(model.parameters(), lr = ....) - model, opt = amp.initialize(model, opt, opt_level="O2") - ... - opt.step() - - - The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_. - The AdamW variant was proposed in `Decoupled Weight Decay Regularization`_. - - Arguments: - params (iterable): iterable of parameters to optimize or dicts defining - parameter groups - lr (float, optional, default: 1e-3): learning rate - betas (Tuple[float, float], optional, default: (0.9, 0.999)): coefficients used - for computing running averages of gradient and its square - eps (float, optional, default: 1e-8): term added to the denominator to improve - numerical stability - weight_decay (float, optional, default: 1e-2): weight decay coefficient - amsgrad (boolean, optional, default: False): whether to use the AMSGrad variant of - this algorithm from the paper `On the Convergence of Adam and Beyond`_ - - .. _Adam\: A Method for Stochastic Optimization: - https://arxiv.org/abs/1412.6980 - .. _Decoupled Weight Decay Regularization: - https://arxiv.org/abs/1711.05101 - .. _On the Convergence of Adam and Beyond: - https://openreview.net/forum?id=ryQu7f-RZ - """ - - def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, - weight_decay=1e-2, amsgrad=False): - if lr < 0.0: - raise ValueError("Invalid learning rate: {}".format(lr)) - if eps < 0.0: - raise ValueError("Invalid epsilon value: {}".format(eps)) - if betas[0] < 0.0 or betas[0] >= 1.0: - raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) - if betas[1] < 0.0 or betas[1] >= 1.0: - raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) - if weight_decay < 0.0: - raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) - defaults = dict(lr=lr, betas=betas, eps=eps, - weight_decay=weight_decay, amsgrad=amsgrad) - self.is_npu_fused_optimizer = True - super(NpuFusedAdamW, self).__init__(params, defaults) - - def __setstate__(self, state): - super(NpuFusedAdamW, self).__setstate__(state) - for group in self.param_groups: - group.setdefault('amsgrad', False) - - def _init_param_state(self, p, amsgrad): - state = self.state[p] - # State initialization - if len(state) == 0: - state['step'] = 0 - # Exponential moving average of gradient values - state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) - # Exponential moving average of squared gradient values - state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) - if amsgrad: - # Maintains max of all exp. moving avg. of sq. grad. values - state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) - else: - exp_avg_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) - exp_avg_tmp.copy_(state['exp_avg']) - state['exp_avg'] = exp_avg_tmp - - exp_avg_sq_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) - exp_avg_sq_tmp.copy_(state['exp_avg_sq']) - state['exp_avg_sq'] = exp_avg_sq_tmp - - if amsgrad: - max_exp_avg_sq_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) - max_exp_avg_sq_tmp.copy_(state['max_exp_avg_sq']) - state['max_exp_avg_sq'] = max_exp_avg_sq_tmp - - def _combine_group_param_states(self, group_index): - group = self.param_groups[group_index] - stash = self._amp_stash - group_params_list = stash.params_lists_indexed_by_group[group_index] - - amsgrad = group['amsgrad'] - - combined_param_states = [] - for params in group_params_list: - step_list = [] - exp_avg_list = [] - exp_avg_sq_list = [] - max_exp_avg_sq_list = [] - - for p in params: - if p.grad is None: - continue - grad = p.grad - if grad.is_sparse: - raise RuntimeError('NpuFusedAdamW does not support sparse gradients, ' - 'please consider SparseAdam instead') - - self._init_param_state(p, amsgrad) - state = self.state[p] - step_list.append(state['step']) - exp_avg_list.append(state['exp_avg']) - exp_avg_sq_list.append(state['exp_avg_sq']) - if amsgrad: - max_exp_avg_sq_list.append(state['max_exp_avg_sq']) - - combined_step = 0 - combined_exp_avg = None - combined_exp_avg_sq = None - combined_max_exp_avg_sq = None - - if len(exp_avg_list) > 0: - combined_step = step_list[0] - combined_exp_avg = combine_npu(exp_avg_list) - combined_exp_avg_sq = combine_npu(exp_avg_sq_list) - combined_max_exp_avg_sq = combine_npu(max_exp_avg_sq_list) - - combined_state = defaultdict(dict) - combined_state['step'] = combined_step - combined_state['exp_avg'] = combined_exp_avg - combined_state['exp_avg_sq'] = combined_exp_avg_sq - combined_state['max_exp_avg_sq'] = combined_max_exp_avg_sq - combined_param_states.append(combined_state) - stash.combined_param_states_indexed_by_group[group_index] = combined_param_states - - def _combine_param_states_by_group(self): - stash = self._amp_stash - if stash.param_states_are_combined_by_group: - return - - stash.combined_param_states_indexed_by_group = [] - for _ in self.param_groups: - stash.combined_param_states_indexed_by_group.append([]) - - for i, _ in enumerate(self.param_groups): - self._combine_group_param_states(i) - stash.param_states_are_combined_by_group = True - - def _group_step(self, group_index): - group = self.param_groups[group_index] - for p in group['params']: - if p.grad is None: - continue - - grad = p.grad - if grad.is_sparse: - raise RuntimeError('NpuFusedAdamW does not support sparse gradients, ' - 'please consider SparseAdam instead') - state_p = self.state[p] - state_p['step'] += 1 - - amsgrad = group['amsgrad'] - beta1, beta2 = group['betas'] - - stash = self._amp_stash - combined_group_params = stash.combined_params_indexed_by_group[group_index] - combined_group_grads = stash.combined_grads_indexed_by_group[group_index] - combined_group_param_states = stash.combined_param_states_indexed_by_group[group_index] - - for combined_param, combined_grad, combined_param_state in zip(combined_group_params, - combined_group_grads, - combined_group_param_states): - if combined_param is None or combined_grad is None: - continue - - # Perform stepweight decay. The fused method is used here to speed up the calculation - combined_param.mul_(1 - group['lr'] * group['weight_decay']) - - exp_avg, exp_avg_sq = combined_param_state['exp_avg'], combined_param_state['exp_avg_sq'] - if amsgrad: - max_exp_avg_sq = combined_param_state['max_exp_avg_sq'] - - combined_param_state['step'] += 1 - bias_correction1 = 1 - beta1 ** combined_param_state['step'] - bias_correction2 = 1 - beta2 ** combined_param_state['step'] - - # Decay the first and second moment running average coefficient - exp_avg.mul_(beta1).add_(combined_grad, alpha=1 - beta1) - exp_avg_sq.mul_(beta2).addcmul_(combined_grad, combined_grad, value=1 - beta2) - if amsgrad: - # Maintains the maximum of all 2nd moment running avg. till now - torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) - # Use the max. for normalizing running avg. of gradient - denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) - else: - denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) - - step_size = group['lr'] / bias_correction1 - - combined_param.addcdiv_(exp_avg, denom, value=-step_size) - - @torch.no_grad() - def step(self, closure=None): - if not hasattr(self, "_amp_stash"): - raise RuntimeError('apex.optimizers.NpuFusedAdamW should be used with AMP.') - - self._check_already_combined_params_and_grads() - # combine params and grads first - self._combine_params_and_grads_by_group() - # then combine param states - self._combine_param_states_by_group() - - loss = None - if closure is not None: - with torch.enable_grad(): - loss = closure() - - for i, _ in enumerate(self.param_groups): - self._group_step(i) - - return loss +# Copyright (c) 2020, Huawei Technologies. +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from collections import defaultdict + +import torch +from torch.optim.optimizer import Optimizer + +from apex.contrib.combine_tensors import combine_npu + + +class NpuFusedAdamW(Optimizer): + """Implements AdamW algorithm. + + Currently NPU-only. Requires Apex to be installed via + ``pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--npu_float_status" ./``. + + This version of NPU fused AdamW implements 1 fusions. + + * A combine-tensor apply launch that batches the elementwise updates applied to all the model's parameters + into one or a few kernel launches. + + :class:`apex.optimizers.NpuFusedAdamW` may be used as a drop-in replacement for ``torch.optim.AdamW``:: + + opt = apex.optimizers.NpuFusedAdamW(model.parameters(), lr = ....) + ... + opt.step() + + :class:`apex.optimizers.FusedAdamW` should be used with Amp. Currently, if you wish to use :class:`NpuFusedAdamW` + with Amp, only ``opt_level O1 and O2`` can be choosed:: + + opt = apex.optimizers.NpuFusedAdamW(model.parameters(), lr = ....) + model, opt = amp.initialize(model, opt, opt_level="O2") + ... + opt.step() + + + The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_. + The AdamW variant was proposed in `Decoupled Weight Decay Regularization`_. + + Arguments: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float, optional, default: 1e-3): learning rate + betas (Tuple[float, float], optional, default: (0.9, 0.999)): coefficients used + for computing running averages of gradient and its square + eps (float, optional, default: 1e-8): term added to the denominator to improve + numerical stability + weight_decay (float, optional, default: 1e-2): weight decay coefficient + amsgrad (boolean, optional, default: False): whether to use the AMSGrad variant of + this algorithm from the paper `On the Convergence of Adam and Beyond`_ + + .. _Adam\: A Method for Stochastic Optimization: + https://arxiv.org/abs/1412.6980 + .. _Decoupled Weight Decay Regularization: + https://arxiv.org/abs/1711.05101 + .. _On the Convergence of Adam and Beyond: + https://openreview.net/forum?id=ryQu7f-RZ + """ + + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, + weight_decay=1e-2, amsgrad=False): + if lr < 0.0: + raise ValueError("Invalid learning rate: {}".format(lr)) + if eps < 0.0: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if betas[0] < 0.0 or betas[0] >= 1.0: + raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) + if betas[1] < 0.0 or betas[1] >= 1.0: + raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) + if weight_decay < 0.0: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + defaults = dict(lr=lr, betas=betas, eps=eps, + weight_decay=weight_decay, amsgrad=amsgrad) + self.is_npu_fused_optimizer = True + super(NpuFusedAdamW, self).__init__(params, defaults) + + def __setstate__(self, state): + super(NpuFusedAdamW, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('amsgrad', False) + + def _init_param_state(self, p, amsgrad): + state = self.state[p] + # State initialization + if len(state) == 0: + state['step'] = 0 + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + if amsgrad: + # Maintains max of all exp. moving avg. of sq. grad. values + state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + else: + exp_avg_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) + exp_avg_tmp.copy_(state['exp_avg']) + state['exp_avg'] = exp_avg_tmp + + exp_avg_sq_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) + exp_avg_sq_tmp.copy_(state['exp_avg_sq']) + state['exp_avg_sq'] = exp_avg_sq_tmp + + if amsgrad: + max_exp_avg_sq_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) + max_exp_avg_sq_tmp.copy_(state['max_exp_avg_sq']) + state['max_exp_avg_sq'] = max_exp_avg_sq_tmp + + def _combine_group_param_states(self, group_index): + group = self.param_groups[group_index] + stash = self._amp_stash + group_params_list = stash.params_lists_indexed_by_group[group_index] + + amsgrad = group['amsgrad'] + + combined_param_states = [] + for params in group_params_list: + step_list = [] + exp_avg_list = [] + exp_avg_sq_list = [] + max_exp_avg_sq_list = [] + + for p in params: + if p.grad is None: + continue + grad = p.grad + if grad.is_sparse: + raise RuntimeError('NpuFusedAdamW does not support sparse gradients, ' + 'please consider SparseAdam instead') + + self._init_param_state(p, amsgrad) + state = self.state[p] + step_list.append(state['step']) + exp_avg_list.append(state['exp_avg']) + exp_avg_sq_list.append(state['exp_avg_sq']) + if amsgrad: + max_exp_avg_sq_list.append(state['max_exp_avg_sq']) + + combined_step = 0 + combined_exp_avg = None + combined_exp_avg_sq = None + combined_max_exp_avg_sq = None + + if len(exp_avg_list) > 0: + combined_step = step_list[0] + combined_exp_avg = combine_npu(exp_avg_list) + combined_exp_avg_sq = combine_npu(exp_avg_sq_list) + combined_max_exp_avg_sq = combine_npu(max_exp_avg_sq_list) + + combined_state = defaultdict(dict) + combined_state['step'] = combined_step + combined_state['exp_avg'] = combined_exp_avg + combined_state['exp_avg_sq'] = combined_exp_avg_sq + combined_state['max_exp_avg_sq'] = combined_max_exp_avg_sq + combined_param_states.append(combined_state) + stash.combined_param_states_indexed_by_group[group_index] = combined_param_states + + def _combine_param_states_by_group(self): + stash = self._amp_stash + if stash.param_states_are_combined_by_group: + return + + stash.combined_param_states_indexed_by_group = [] + for _ in self.param_groups: + stash.combined_param_states_indexed_by_group.append([]) + + for i, _ in enumerate(self.param_groups): + self._combine_group_param_states(i) + stash.param_states_are_combined_by_group = True + + def _group_step(self, group_index): + group = self.param_groups[group_index] + for p in group['params']: + if p.grad is None: + continue + + grad = p.grad + if grad.is_sparse: + raise RuntimeError('NpuFusedAdamW does not support sparse gradients, ' + 'please consider SparseAdam instead') + state_p = self.state[p] + state_p['step'] += 1 + + amsgrad = group['amsgrad'] + beta1, beta2 = group['betas'] + + stash = self._amp_stash + combined_group_params = stash.combined_params_indexed_by_group[group_index] + combined_group_grads = stash.combined_grads_indexed_by_group[group_index] + combined_group_param_states = stash.combined_param_states_indexed_by_group[group_index] + + for combined_param, combined_grad, combined_param_state in zip(combined_group_params, + combined_group_grads, + combined_group_param_states): + if combined_param is None or combined_grad is None: + continue + + # Perform stepweight decay. The fused method is used here to speed up the calculation + combined_param.mul_(1 - group['lr'] * group['weight_decay']) + + exp_avg, exp_avg_sq = combined_param_state['exp_avg'], combined_param_state['exp_avg_sq'] + if amsgrad: + max_exp_avg_sq = combined_param_state['max_exp_avg_sq'] + + combined_param_state['step'] += 1 + bias_correction1 = 1 - beta1 ** combined_param_state['step'] + bias_correction2 = 1 - beta2 ** combined_param_state['step'] + + # Decay the first and second moment running average coefficient + exp_avg.mul_(beta1).add_(combined_grad, alpha=1 - beta1) + exp_avg_sq.mul_(beta2).addcmul_(combined_grad, combined_grad, value=1 - beta2) + if amsgrad: + # Maintains the maximum of all 2nd moment running avg. till now + torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) + # Use the max. for normalizing running avg. of gradient + denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) + else: + denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) + + step_size = group['lr'] / bias_correction1 + + combined_param.addcdiv_(exp_avg, denom, value=-step_size) + + @torch.no_grad() + def step(self, closure=None): + if not hasattr(self, "_amp_stash"): + raise RuntimeError('apex.optimizers.NpuFusedAdamW should be used with AMP.') + + self._check_already_combined_params_and_grads() + # combine params and grads first + self._combine_params_and_grads_by_group() + # then combine param states + self._combine_param_states_by_group() + + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + + for i, _ in enumerate(self.param_groups): + self._group_step(i) + + return loss diff --git a/PyTorch/contrib/cv/classification/VOLO/README.md b/PyTorch/contrib/cv/classification/VOLO/README.md index c752a0a80415d147bea0949296044ec4dfb4f03f..1dfba0b42eb7c0ce64d903f900d29eb607b096c2 100644 --- a/PyTorch/contrib/cv/classification/VOLO/README.md +++ b/PyTorch/contrib/cv/classification/VOLO/README.md @@ -1,64 +1,64 @@ -# VOLO - -This implements training of volo_d1 on the ImageNet-2012 dataset and token labeling, mainly modified from [sail-sg/volo](https://github.com/sail-sg/volo). - -## VOLO Detail - -There is an error of Col2im operator on NPU, and make it compute with the CPU. - - Because lacking of Roi_align on NPU, the function is re-implemented . - -if there is an error about `OP:ROIAlign`, please modify `/usr/local/Ascend/ascend-toolkit/5.0.x/x-linux/opp/op_impl/built-in/ai_core/tbe/impl/roi_align.py:line 2287` - -``` -#ori -with tik_instance.for_range(0, pool_h) as p_h: - with tik_instance.for_range(0, pool_w, thread_num=2) as p_w: -#right -with tik_instance.for_range(0, pool_h) as p_h: - with tik_instance.for_range(0, pool_w, thread_num=min(2, pool_w)) as p_w: -``` - -## Requirements - -- Install PyTorch ([pytorch.org](http://pytorch.org)) -- `pip install -r requirements.txt` -- Download the Imagenet-2012 dataset. Refer to the original repository https://github.com/rwightman/pytorch-image-models -- Download the token label data, please refer to the [sail-sg/volo](https://github.com/sail-sg/volo). - - -## Training - -To train a model, run `main.py` with the desired model architecture and the path to the Imagenet-2012 dataset, and : - -```bash -Modify the data_dir path/to/imagenet and label path/to/label_top5_train_nfnet in the shell file. -# training 1p accuracy -bash test/train_full_1p.sh - -# training 1p performance -bash test/train_performance_1p.sh - -# training 8p accuracy -bash test/train_full_8p.sh - -# training 8p performance -bash test/train_performance_8p.sh - -# finetune -bash test/train_finetune_1p.sh - -# Online inference demo -python demo.py --checkpoint real_checkpoint_path -``` - -## Volo training result - - -| | top1 | AMP_Type | Epochs | FPS | -| :----: | :---: | :------: | :----: | :-----: | -| 1p-GPU | - | O2 | 1 | 152.37 | -| 1p-NPU | - | O2 | 1 | 23.26 | -| 8p-GPU | 82.83 | O2 | 100 | 1080.81 | -| 8p-NPU | 81.79 | O2 | 100 | 180.31 | - +# VOLO + +This implements training of volo_d1 on the ImageNet-2012 dataset and token labeling, mainly modified from [sail-sg/volo](https://github.com/sail-sg/volo). + +## VOLO Detail + +There is an error of Col2im operator on NPU, and make it compute with the CPU. + + Because lacking of Roi_align on NPU, the function is re-implemented . + +if there is an error about `OP:ROIAlign`, please modify `/usr/local/Ascend/ascend-toolkit/5.0.x/x-linux/opp/op_impl/built-in/ai_core/tbe/impl/roi_align.py:line 2287` + +``` +#ori +with tik_instance.for_range(0, pool_h) as p_h: + with tik_instance.for_range(0, pool_w, thread_num=2) as p_w: +#right +with tik_instance.for_range(0, pool_h) as p_h: + with tik_instance.for_range(0, pool_w, thread_num=min(2, pool_w)) as p_w: +``` + +## Requirements + +- Install PyTorch ([pytorch.org](http://pytorch.org)) +- `pip install -r requirements.txt` +- Download the Imagenet-2012 dataset. Refer to the original repository https://github.com/rwightman/pytorch-image-models +- Download the token label data, please refer to the [sail-sg/volo](https://github.com/sail-sg/volo). + + +## Training + +To train a model, run `main.py` with the desired model architecture and the path to the Imagenet-2012 dataset, and : + +```bash +Modify the data_dir path/to/imagenet and label path/to/label_top5_train_nfnet in the shell file. +# training 1p accuracy +bash test/train_full_1p.sh + +# training 1p performance +bash test/train_performance_1p.sh + +# training 8p accuracy +bash test/train_full_8p.sh + +# training 8p performance +bash test/train_performance_8p.sh + +# finetune +bash test/train_finetune_1p.sh + +# Online inference demo +python demo.py --checkpoint real_checkpoint_path +``` + +## Volo training result + + +| | top1 | AMP_Type | Epochs | FPS | +| :----: | :---: | :------: | :----: | :-----: | +| 1p-GPU | - | O2 | 1 | 152.37 | +| 1p-NPU | - | O2 | 1 | 23.26 | +| 8p-GPU | 82.83 | O2 | 100 | 1080.81 | +| 8p-NPU | 81.79 | O2 | 100 | 180.31 | + diff --git a/PyTorch/contrib/cv/classification/VOLO/demo.py b/PyTorch/contrib/cv/classification/VOLO/demo.py index 07041b9dfe6e7aa6945526366ab424363964dc0e..7175a52cb14886a9dbfa39447fd02940d81ca1e2 100644 --- a/PyTorch/contrib/cv/classification/VOLO/demo.py +++ b/PyTorch/contrib/cv/classification/VOLO/demo.py @@ -1,137 +1,137 @@ -""" -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the BSD 3-Clause License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -https://spdx.org/licenses/BSD-3-Clause.html - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -import argparse -import torch -import torchvision -from torchvision import datasets, transforms -from collections import OrderedDict - -from timm.models import create_model -from timm.data.transforms import _pil_interp, RandomResizedCropAndInterpolation, ToNumpy, ToTensor - -DEFAULT_CROP_PCT = 0.875 - -parser = argparse.ArgumentParser(description='ImageNet Training') -parser.add_argument('--checkpoint', type=str, default='', - help='checkpoint path') -args = parser.parse_args() - -def proc_node_module(checkpoint, attr_name): - new_state_dict = OrderedDict() - for k, v in checkpoint[attr_name].items(): - if(k[0: 7] == "module."): - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - -def get_raw_data(): - from PIL import Image - from urllib.request import urlretrieve - IMAGE_URL = 'https://bbs-img.huaweicloud.com/blogs/img/thumb/1591951315139_8989_1363.png' - urlretrieve(IMAGE_URL, 'tmp.jpg') - img = Image.open("tmp.jpg") - img = img.convert('RGB') - return img - -def test(): - loc = 'npu:0' - loc_cpu = 'cpu' - torch.npu.set_device(loc) - if args.checkpoint == '': - print("please give the checkpoint path using --checkpoint param") - exit(0) - checkpoint = torch.load(args.checkpoint, map_location=loc) - checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') - - model = create_model( - 'volo_d1', - pretrained=False, - num_classes=None, - drop_rate=0.0, - drop_connect_rate=None, # DEPRECATED, use drop_path - drop_path_rate=0.0, - drop_block_rate=None, - global_pool=None, - bn_tf=False, - bn_momentum=None, - bn_eps=None, - scriptable=False, - checkpoint_path='', - img_size=224) - - model = model.to(loc) - model.load_state_dict(checkpoint['state_dict']) - model.eval() - - crop_pct = DEFAULT_CROP_PCT - img_size = 224 - scale_size = int(math.floor(img_size / crop_pct)) - interpolation = 'bilinear' - tfl = [ - transforms.Resize(scale_size, _pil_interp(interpolation)), - transforms.CenterCrop(img_size), - ] - tfl += [ToNumpy()] - data_transfrom = transforms.Compose(tfl) - - rd = get_raw_data() - - inputs = data_transfrom(rd) - inputs = inputs.unsqueeze(0) - inputs = inputs.to(loc) - output = model(inputs) - output = output.to(loc_cpu) - - _, pred = output.topk(1, 1, True, True) - result = torch.argmax(output, 1) - print("class: ", pred[0][0].item()) - print(result) - -if __name__ == "__main__": +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the BSD 3-Clause License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://spdx.org/licenses/BSD-3-Clause.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import argparse +import torch +import torchvision +from torchvision import datasets, transforms +from collections import OrderedDict + +from timm.models import create_model +from timm.data.transforms import _pil_interp, RandomResizedCropAndInterpolation, ToNumpy, ToTensor + +DEFAULT_CROP_PCT = 0.875 + +parser = argparse.ArgumentParser(description='ImageNet Training') +parser.add_argument('--checkpoint', type=str, default='', + help='checkpoint path') +args = parser.parse_args() + +def proc_node_module(checkpoint, attr_name): + new_state_dict = OrderedDict() + for k, v in checkpoint[attr_name].items(): + if(k[0: 7] == "module."): + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + +def get_raw_data(): + from PIL import Image + from urllib.request import urlretrieve + IMAGE_URL = 'https://bbs-img.huaweicloud.com/blogs/img/thumb/1591951315139_8989_1363.png' + urlretrieve(IMAGE_URL, 'tmp.jpg') + img = Image.open("tmp.jpg") + img = img.convert('RGB') + return img + +def test(): + loc = 'npu:0' + loc_cpu = 'cpu' + torch.npu.set_device(loc) + if args.checkpoint == '': + print("please give the checkpoint path using --checkpoint param") + exit(0) + checkpoint = torch.load(args.checkpoint, map_location=loc) + checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') + + model = create_model( + 'volo_d1', + pretrained=False, + num_classes=None, + drop_rate=0.0, + drop_connect_rate=None, # DEPRECATED, use drop_path + drop_path_rate=0.0, + drop_block_rate=None, + global_pool=None, + bn_tf=False, + bn_momentum=None, + bn_eps=None, + scriptable=False, + checkpoint_path='', + img_size=224) + + model = model.to(loc) + model.load_state_dict(checkpoint['state_dict']) + model.eval() + + crop_pct = DEFAULT_CROP_PCT + img_size = 224 + scale_size = int(math.floor(img_size / crop_pct)) + interpolation = 'bilinear' + tfl = [ + transforms.Resize(scale_size, _pil_interp(interpolation)), + transforms.CenterCrop(img_size), + ] + tfl += [ToNumpy()] + data_transfrom = transforms.Compose(tfl) + + rd = get_raw_data() + + inputs = data_transfrom(rd) + inputs = inputs.unsqueeze(0) + inputs = inputs.to(loc) + output = model(inputs) + output = output.to(loc_cpu) + + _, pred = output.topk(1, 1, True, True) + result = torch.argmax(output, 1) + print("class: ", pred[0][0].item()) + print(result) + +if __name__ == "__main__": test() \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/VOLO/loss/__init__.py b/PyTorch/contrib/cv/classification/VOLO/loss/__init__.py index 1eab72cf3aceed4c70528224ee52c7bec3cdadff..658c84ebfab5a3f436a69ec43701e8a0372be5ca 100644 --- a/PyTorch/contrib/cv/classification/VOLO/loss/__init__.py +++ b/PyTorch/contrib/cv/classification/VOLO/loss/__init__.py @@ -1,47 +1,47 @@ -""" -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the BSD 3-Clause License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -https://spdx.org/licenses/BSD-3-Clause.html - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the BSD 3-Clause License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://spdx.org/licenses/BSD-3-Clause.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from .cross_entropy import TokenLabelGTCrossEntropy, TokenLabelSoftTargetCrossEntropy, TokenLabelCrossEntropy \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/VOLO/models/__init__.py b/PyTorch/contrib/cv/classification/VOLO/models/__init__.py index fb1a0823dc0367759cb3f657d9fd48318517dbeb..658c948f3447a70ea47de99d5b5e04a283d57742 100644 --- a/PyTorch/contrib/cv/classification/VOLO/models/__init__.py +++ b/PyTorch/contrib/cv/classification/VOLO/models/__init__.py @@ -1,48 +1,48 @@ - -""" -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the BSD 3-Clause License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -https://spdx.org/licenses/BSD-3-Clause.html - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" + +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the BSD 3-Clause License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://spdx.org/licenses/BSD-3-Clause.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from .volo import * \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/VOLO/prof_demo.sh b/PyTorch/contrib/cv/classification/VOLO/prof_demo.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/VOLO/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/VOLO/test/train_full_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/VOLO/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/VOLO/test/train_performance_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/VOLO/timm/loss/asymmetric_loss.py b/PyTorch/contrib/cv/classification/VOLO/timm/loss/asymmetric_loss.py index d3a0d8650f81e0d79e6dc4321355f120ae0f4a3b..726f9410e43a3462675e9e92ce7cbb768be1a7ed 100644 --- a/PyTorch/contrib/cv/classification/VOLO/timm/loss/asymmetric_loss.py +++ b/PyTorch/contrib/cv/classification/VOLO/timm/loss/asymmetric_loss.py @@ -1,143 +1,143 @@ -""" -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the BSD 3-Clause License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -https://spdx.org/licenses/BSD-3-Clause.html - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -import torch -import torch.nn as nn - - -class AsymmetricLossMultiLabel(nn.Module): - def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=False): - super(AsymmetricLossMultiLabel, self).__init__() - - self.gamma_neg = gamma_neg - self.gamma_pos = gamma_pos - self.clip = clip - self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss - self.eps = eps - - def forward(self, x, y): - """" - Parameters - ---------- - x: input logits - y: targets (multi-label binarized vector) - """ - - # Calculating Probabilities - x_sigmoid = torch.sigmoid(x) - xs_pos = x_sigmoid - xs_neg = 1 - x_sigmoid - - # Asymmetric Clipping - if self.clip is not None and self.clip > 0: - xs_neg = (xs_neg + self.clip).clamp(max=1) - - # Basic CE calculation - los_pos = y * torch.log(xs_pos.clamp(min=self.eps)) - los_neg = (1 - y) * torch.log(xs_neg.clamp(min=self.eps)) - loss = los_pos + los_neg - - # Asymmetric Focusing - if self.gamma_neg > 0 or self.gamma_pos > 0: - if self.disable_torch_grad_focal_loss: - torch._C.set_grad_enabled(False) - pt0 = xs_pos * y - pt1 = xs_neg * (1 - y) # pt = p if t > 0 else 1-p - pt = pt0 + pt1 - one_sided_gamma = self.gamma_pos * y + self.gamma_neg * (1 - y) - one_sided_w = torch.pow(1 - pt, one_sided_gamma) - if self.disable_torch_grad_focal_loss: - torch._C.set_grad_enabled(True) - loss *= one_sided_w - - return -loss.sum() - - -class AsymmetricLossSingleLabel(nn.Module): - def __init__(self, gamma_pos=1, gamma_neg=4, eps: float = 0.1, reduction='mean'): - super(AsymmetricLossSingleLabel, self).__init__() - - self.eps = eps - self.logsoftmax = nn.LogSoftmax(dim=-1) - self.targets_classes = [] # prevent gpu repeated memory allocation - self.gamma_pos = gamma_pos - self.gamma_neg = gamma_neg - self.reduction = reduction - - def forward(self, inputs, target, reduction=None): - """" - Parameters - ---------- - x: input logits - y: targets (1-hot vector) - """ - - num_classes = inputs.size()[-1] - log_preds = self.logsoftmax(inputs) - self.targets_classes = torch.zeros_like(inputs).scatter_(1, target.long().unsqueeze(1), 1) - - # ASL weights - targets = self.targets_classes - anti_targets = 1 - targets - xs_pos = torch.exp(log_preds) - xs_neg = 1 - xs_pos - xs_pos = xs_pos * targets - xs_neg = xs_neg * anti_targets - asymmetric_w = torch.pow(1 - xs_pos - xs_neg, - self.gamma_pos * targets + self.gamma_neg * anti_targets) - log_preds = log_preds * asymmetric_w - - if self.eps > 0: # label smoothing - self.targets_classes.mul_(1 - self.eps).add_(self.eps / num_classes) - - # loss calculation - loss = - self.targets_classes.mul(log_preds) - - loss = loss.sum(dim=-1) - if self.reduction == 'mean': - loss = loss.mean() - - return loss +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the BSD 3-Clause License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://spdx.org/licenses/BSD-3-Clause.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import torch +import torch.nn as nn + + +class AsymmetricLossMultiLabel(nn.Module): + def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=False): + super(AsymmetricLossMultiLabel, self).__init__() + + self.gamma_neg = gamma_neg + self.gamma_pos = gamma_pos + self.clip = clip + self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss + self.eps = eps + + def forward(self, x, y): + """" + Parameters + ---------- + x: input logits + y: targets (multi-label binarized vector) + """ + + # Calculating Probabilities + x_sigmoid = torch.sigmoid(x) + xs_pos = x_sigmoid + xs_neg = 1 - x_sigmoid + + # Asymmetric Clipping + if self.clip is not None and self.clip > 0: + xs_neg = (xs_neg + self.clip).clamp(max=1) + + # Basic CE calculation + los_pos = y * torch.log(xs_pos.clamp(min=self.eps)) + los_neg = (1 - y) * torch.log(xs_neg.clamp(min=self.eps)) + loss = los_pos + los_neg + + # Asymmetric Focusing + if self.gamma_neg > 0 or self.gamma_pos > 0: + if self.disable_torch_grad_focal_loss: + torch._C.set_grad_enabled(False) + pt0 = xs_pos * y + pt1 = xs_neg * (1 - y) # pt = p if t > 0 else 1-p + pt = pt0 + pt1 + one_sided_gamma = self.gamma_pos * y + self.gamma_neg * (1 - y) + one_sided_w = torch.pow(1 - pt, one_sided_gamma) + if self.disable_torch_grad_focal_loss: + torch._C.set_grad_enabled(True) + loss *= one_sided_w + + return -loss.sum() + + +class AsymmetricLossSingleLabel(nn.Module): + def __init__(self, gamma_pos=1, gamma_neg=4, eps: float = 0.1, reduction='mean'): + super(AsymmetricLossSingleLabel, self).__init__() + + self.eps = eps + self.logsoftmax = nn.LogSoftmax(dim=-1) + self.targets_classes = [] # prevent gpu repeated memory allocation + self.gamma_pos = gamma_pos + self.gamma_neg = gamma_neg + self.reduction = reduction + + def forward(self, inputs, target, reduction=None): + """" + Parameters + ---------- + x: input logits + y: targets (1-hot vector) + """ + + num_classes = inputs.size()[-1] + log_preds = self.logsoftmax(inputs) + self.targets_classes = torch.zeros_like(inputs).scatter_(1, target.long().unsqueeze(1), 1) + + # ASL weights + targets = self.targets_classes + anti_targets = 1 - targets + xs_pos = torch.exp(log_preds) + xs_neg = 1 - xs_pos + xs_pos = xs_pos * targets + xs_neg = xs_neg * anti_targets + asymmetric_w = torch.pow(1 - xs_pos - xs_neg, + self.gamma_pos * targets + self.gamma_neg * anti_targets) + log_preds = log_preds * asymmetric_w + + if self.eps > 0: # label smoothing + self.targets_classes.mul_(1 - self.eps).add_(self.eps / num_classes) + + # loss calculation + loss = - self.targets_classes.mul(log_preds) + + loss = loss.sum(dim=-1) + if self.reduction == 'mean': + loss = loss.mean() + + return loss diff --git a/PyTorch/contrib/cv/classification/VOLO/tlt/__init__.py b/PyTorch/contrib/cv/classification/VOLO/tlt/__init__.py index a53b3e9888443e3fa26623b6c3de76b7b139151b..39dd95c737d325e40f517425d9cca3ffc0ee96d6 100644 --- a/PyTorch/contrib/cv/classification/VOLO/tlt/__init__.py +++ b/PyTorch/contrib/cv/classification/VOLO/tlt/__init__.py @@ -1,47 +1,47 @@ -""" -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the BSD 3-Clause License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -https://spdx.org/licenses/BSD-3-Clause.html - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the BSD 3-Clause License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://spdx.org/licenses/BSD-3-Clause.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from .utils import load_pretrained_weights \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/VOLO/tlt/data/random_augment_label.py b/PyTorch/contrib/cv/classification/VOLO/tlt/data/random_augment_label.py index a6a95782e13a32b75485215b72716c0105872e63..005494f1609acb9eb14a935125e59755771312a9 100644 --- a/PyTorch/contrib/cv/classification/VOLO/tlt/data/random_augment_label.py +++ b/PyTorch/contrib/cv/classification/VOLO/tlt/data/random_augment_label.py @@ -1,606 +1,606 @@ -""" -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the BSD 3-Clause License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -https://spdx.org/licenses/BSD-3-Clause.html - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -""" -Adapted from https://github.com/rwightman/pytorch-image-models/blob/master/timm/data/auto_augment.py and modified for token labeling -AutoAugment, RandAugment -""" -import random -import math -import re -from PIL import Image, ImageOps, ImageEnhance, ImageChops -import PIL -import numpy as np -from scipy import ndimage -import torch - -_PIL_VER = tuple([int(x) for x in PIL.__version__.split('.')[:2]]) - -_FILL = (128, 128, 128) - - - -# This signifies the max integer that the controller RNN could predict for the -# augmentation scheme. -_MAX_LEVEL = 10. - -_HPARAMS_DEFAULT = dict( - translate_const=250, - img_mean=_FILL, -) - -_RANDOM_INTERPOLATION = (Image.BILINEAR, Image.BICUBIC) - - -def _interpolation(kwargs): - interpolation = kwargs.pop('resample', Image.BILINEAR) - if isinstance(interpolation, (list, tuple)): - return random.choice(interpolation) - else: - return interpolation - -def affine_label(label, matrix): - - # label: 2, k, H, W - # label[0] value, label[1] index - a,b,c,d,e,f = matrix - affine_matrix = [[1,0,0,0],[0,a,b,c],[0,d,e,f]] - value = ndimage.affine_transform(label[0],matrix=affine_matrix, order=0, mode="constant") - index = ndimage.affine_transform(label[1],matrix=affine_matrix, order=0, mode="nearest") - - return torch.from_numpy(np.stack([value, index],axis=0)) - -def _check_args_tf(kwargs): - if 'fillcolor' in kwargs and _PIL_VER < (5, 0): - kwargs.pop('fillcolor') - kwargs['resample'] = _interpolation(kwargs) - - -def shear_x(img, factor, **kwargs): - _check_args_tf(kwargs) - return img.transform(img.size, Image.AFFINE, (1, factor, 0, 0, 1, 0), **kwargs) - -def shear_y_label(label, factor): - return affine_label(label, (1, factor, 0, 0, 1, 0)) - - -def shear_y(img, factor, **kwargs): - _check_args_tf(kwargs) - return img.transform(img.size, Image.AFFINE, (1, 0, 0, factor, 1, 0), **kwargs) - -def shear_x_label(label, factor): - return affine_label(label, (1, 0, 0, factor, 1, 0)) - -def translate_x_rel(img, pct, **kwargs): - pixels = pct * img.size[0] - _check_args_tf(kwargs) - return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs) - -def translate_y_rel_label(label, pct): - pixels = pct * label.size(2) - return affine_label(label, (1, 0, pixels, 0, 1, 0)) - - -def translate_y_rel(img, pct, **kwargs): - pixels = pct * img.size[1] - _check_args_tf(kwargs) - return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs) - -def translate_x_rel_label(label, pct): - pixels = pct * label.size(3) - return affine_label(label, (1, 0, 0, 0, 1, pixels)) - - -def translate_x_abs(img, pixels, **kwargs): - _check_args_tf(kwargs) - return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs) - - -def translate_y_abs(img, pixels, **kwargs): - _check_args_tf(kwargs) - return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs) - - -def rotate(img, degrees, **kwargs): - _check_args_tf(kwargs) - if _PIL_VER >= (5, 2): - return img.rotate(degrees, **kwargs) - elif _PIL_VER >= (5, 0): - w, h = img.size - post_trans = (0, 0) - rotn_center = (w / 2.0, h / 2.0) - angle = -math.radians(degrees) - matrix = [ - round(math.cos(angle), 15), - round(math.sin(angle), 15), - 0.0, - round(-math.sin(angle), 15), - round(math.cos(angle), 15), - 0.0, - ] - - def transform(x, y, matrix): - (a, b, c, d, e, f) = matrix - return a * x + b * y + c, d * x + e * y + f - - matrix[2], matrix[5] = transform( - -rotn_center[0] - post_trans[0], -rotn_center[1] - post_trans[1], matrix - ) - matrix[2] += rotn_center[0] - matrix[5] += rotn_center[1] - return img.transform(img.size, Image.AFFINE, matrix, **kwargs) - else: - return img.rotate(degrees, resample=kwargs['resample']) - -def rotate_label(label, degrees): - _,_, w, h = label.size() - post_trans = (0, 0) - rotn_center = (w / 2.0, h / 2.0) - angle = math.radians(degrees) - matrix = [ - round(math.cos(angle), 15), - round(math.sin(angle), 15), - 0.0, - round(-math.sin(angle), 15), - round(math.cos(angle), 15), - 0.0, - ] - - def transform(x, y, matrix): - (a, b, c, d, e, f) = matrix - return a * x + b * y + c, d * x + e * y + f - - matrix[2], matrix[5] = transform( - -rotn_center[0] - post_trans[0], -rotn_center[1] - post_trans[1], matrix - ) - matrix[2] += rotn_center[0] - matrix[5] += rotn_center[1] - return affine_label(label, matrix) - - -def auto_contrast(img, **__): - return ImageOps.autocontrast(img) - - -def invert(img, **__): - return ImageOps.invert(img) - - -def equalize(img, **__): - return ImageOps.equalize(img) - - -def solarize(img, thresh, **__): - return ImageOps.solarize(img, thresh) - - -def solarize_add(img, add, thresh=128, **__): - lut = [] - for i in range(256): - if i < thresh: - lut.append(min(255, i + add)) - else: - lut.append(i) - if img.mode in ("L", "RGB"): - if img.mode == "RGB" and len(lut) == 256: - lut = lut + lut + lut - return img.point(lut) - else: - return img - - -def posterize(img, bits_to_keep, **__): - if bits_to_keep >= 8: - return img - return ImageOps.posterize(img, bits_to_keep) - - -def contrast(img, factor, **__): - return ImageEnhance.Contrast(img).enhance(factor) - - -def color(img, factor, **__): - return ImageEnhance.Color(img).enhance(factor) - - -def brightness(img, factor, **__): - return ImageEnhance.Brightness(img).enhance(factor) - - -def sharpness(img, factor, **__): - return ImageEnhance.Sharpness(img).enhance(factor) - - -def _randomly_negate(v): - """With 50% prob, negate the value""" - return -v if random.random() > 0.5 else v - - -def _rotate_level_to_arg(level, _hparams): - # range [-30, 30] - level = (level / _MAX_LEVEL) * 30. - level = _randomly_negate(level) - return level, - - -def _enhance_level_to_arg(level, _hparams): - # range [0.1, 1.9] - return (level / _MAX_LEVEL) * 1.8 + 0.1, - - -def _enhance_increasing_level_to_arg(level, _hparams): - # the 'no change' level is 1.0, moving away from that towards 0. or 2.0 increases the enhancement blend - # range [0.1, 1.9] - level = (level / _MAX_LEVEL) * .9 - level = 1.0 + _randomly_negate(level) - return level, - - -def _shear_level_to_arg(level, _hparams): - # range [-0.3, 0.3] - level = (level / _MAX_LEVEL) * 0.3 - level = _randomly_negate(level) - return level, - - -def _translate_abs_level_to_arg(level, hparams): - translate_const = hparams['translate_const'] - level = (level / _MAX_LEVEL) * float(translate_const) - level = _randomly_negate(level) - return level, - - -def _translate_rel_level_to_arg(level, hparams): - # default range [-0.45, 0.45] - translate_pct = hparams.get('translate_pct', 0.45) - level = (level / _MAX_LEVEL) * translate_pct - level = _randomly_negate(level) - return level, - - -def _posterize_level_to_arg(level, _hparams): - # As per Tensorflow TPU EfficientNet impl - # range [0, 4], 'keep 0 up to 4 MSB of original image' - # intensity/severity of augmentation decreases with level - return int((level / _MAX_LEVEL) * 4), - - -def _posterize_increasing_level_to_arg(level, hparams): - # As per Tensorflow models research and UDA impl - # range [4, 0], 'keep 4 down to 0 MSB of original image', - # intensity/severity of augmentation increases with level - return 4 - _posterize_level_to_arg(level, hparams)[0], - - -def _posterize_original_level_to_arg(level, _hparams): - # As per original AutoAugment paper description - # range [4, 8], 'keep 4 up to 8 MSB of image' - # intensity/severity of augmentation decreases with level - return int((level / _MAX_LEVEL) * 4) + 4, - - -def _solarize_level_to_arg(level, _hparams): - # range [0, 256] - # intensity/severity of augmentation decreases with level - return int((level / _MAX_LEVEL) * 256), - - -def _solarize_increasing_level_to_arg(level, _hparams): - # range [0, 256] - # intensity/severity of augmentation increases with level - return 256 - _solarize_level_to_arg(level, _hparams)[0], - - -def _solarize_add_level_to_arg(level, _hparams): - # range [0, 110] - return int((level / _MAX_LEVEL) * 110), - -class AugmentOp: - - def __init__(self, name, prob=0.5, magnitude=10, hparams=None): - hparams = hparams or _HPARAMS_DEFAULT - self.name = name - self.aug_fn = NAME_TO_OP[name] - self.label_fn = NAME_TO_LABELOP[name] - self.level_fn = LEVEL_TO_ARG[name] - self.prob = prob - self.magnitude = magnitude - self.hparams = hparams.copy() - self.kwargs = dict( - fillcolor=hparams['img_mean'] if 'img_mean' in hparams else _FILL, - resample=hparams['interpolation'] if 'interpolation' in hparams else _RANDOM_INTERPOLATION, - ) - - # If magnitude_std is > 0, we introduce some randomness - # in the usually fixed policy and sample magnitude from a normal distribution - # with mean `magnitude` and std-dev of `magnitude_std`. - # NOTE This is my own hack, being tested, not in papers or reference impls. - self.magnitude_std = self.hparams.get('magnitude_std', 0) - - def __call__(self, img, label): - if self.prob < 1.0 and random.random() > self.prob: - return img, label - magnitude = self.magnitude - if self.magnitude_std and self.magnitude_std > 0: - magnitude = random.gauss(magnitude, self.magnitude_std) - magnitude = min(_MAX_LEVEL, max(0, magnitude)) # clip to valid range - level_args = self.level_fn(magnitude, self.hparams) if self.level_fn is not None else tuple() - if self.label_fn is not None: - - aug_label = self.label_fn(label, *level_args) - else: - aug_label = label - return self.aug_fn(img, *level_args, **self.kwargs), aug_label - -LEVEL_TO_ARG = { - 'AutoContrast': None, - 'Equalize': None, - 'Invert': None, - 'Rotate': _rotate_level_to_arg, - # There are several variations of the posterize level scaling in various Tensorflow/Google repositories/papers - 'Posterize': _posterize_level_to_arg, - 'PosterizeIncreasing': _posterize_increasing_level_to_arg, - 'PosterizeOriginal': _posterize_original_level_to_arg, - 'Solarize': _solarize_level_to_arg, - 'SolarizeIncreasing': _solarize_increasing_level_to_arg, - 'SolarizeAdd': _solarize_add_level_to_arg, - 'Color': _enhance_level_to_arg, - 'ColorIncreasing': _enhance_increasing_level_to_arg, - 'Contrast': _enhance_level_to_arg, - 'ContrastIncreasing': _enhance_increasing_level_to_arg, - 'Brightness': _enhance_level_to_arg, - 'BrightnessIncreasing': _enhance_increasing_level_to_arg, - 'Sharpness': _enhance_level_to_arg, - 'SharpnessIncreasing': _enhance_increasing_level_to_arg, - 'ShearX': _shear_level_to_arg, - 'ShearY': _shear_level_to_arg, - 'TranslateX': _translate_abs_level_to_arg, - 'TranslateY': _translate_abs_level_to_arg, - 'TranslateXRel': _translate_rel_level_to_arg, - 'TranslateYRel': _translate_rel_level_to_arg, -} - - -NAME_TO_OP = { - 'AutoContrast': auto_contrast, - 'Equalize': equalize, - 'Invert': invert, - 'Rotate': rotate, - 'Posterize': posterize, - 'PosterizeIncreasing': posterize, - 'PosterizeOriginal': posterize, - 'Solarize': solarize, - 'SolarizeIncreasing': solarize, - 'SolarizeAdd': solarize_add, - 'Color': color, - 'ColorIncreasing': color, - 'Contrast': contrast, - 'ContrastIncreasing': contrast, - 'Brightness': brightness, - 'BrightnessIncreasing': brightness, - 'Sharpness': sharpness, - 'SharpnessIncreasing': sharpness, - 'ShearX': shear_x, - 'ShearY': shear_y, - 'TranslateX': translate_x_abs, - 'TranslateY': translate_y_abs, - 'TranslateXRel': translate_x_rel, - 'TranslateYRel': translate_y_rel, -} -# Remove TranslateX and TranslateY here since it is actually not used in random aug -# Only spatial op should be applied to the label map -NAME_TO_LABELOP = { - 'AutoContrast': None, - 'Equalize': None, - 'Invert': None, - 'Rotate': rotate_label, - 'Posterize': None, - 'PosterizeIncreasing': None, - 'PosterizeOriginal': None, - 'Solarize': None, - 'SolarizeIncreasing': None, - 'SolarizeAdd': None, - 'Color': None, - 'ColorIncreasing': None, - 'Contrast': None, - 'ContrastIncreasing': None, - 'Brightness': None, - 'BrightnessIncreasing': None, - 'Sharpness': None, - 'SharpnessIncreasing': None, - 'ShearX': shear_x_label, - 'ShearY': shear_y_label, - 'TranslateX': None, - 'TranslateY': None, - 'TranslateXRel': translate_x_rel_label, - 'TranslateYRel': translate_y_rel_label, -} - - -_RAND_TRANSFORMS = [ - 'AutoContrast', - 'Equalize', - 'Invert', - 'Rotate', - 'Posterize', - 'Solarize', - 'SolarizeAdd', - 'Color', - 'Contrast', - 'Brightness', - 'Sharpness', - 'ShearX', - 'ShearY', - 'TranslateXRel', - 'TranslateYRel', - #'Cutout' -] - - -_RAND_INCREASING_TRANSFORMS = [ - 'AutoContrast', - 'Equalize', - 'Invert', - 'Rotate', - 'PosterizeIncreasing', - 'SolarizeIncreasing', - 'SolarizeAdd', - 'ColorIncreasing', - 'ContrastIncreasing', - 'BrightnessIncreasing', - 'SharpnessIncreasing', - 'ShearX', - 'ShearY', - 'TranslateXRel', - 'TranslateYRel', - #'Cutout' -] - - - -# These experimental weights are based loosely on the relative improvements mentioned in paper. -# They may not result in increased performance, but could likely be tuned to so. -_RAND_CHOICE_WEIGHTS_0 = { - 'Rotate': 0.3, - 'ShearX': 0.2, - 'ShearY': 0.2, - 'TranslateXRel': 0.1, - 'TranslateYRel': 0.1, - 'Color': .025, - 'Sharpness': 0.025, - 'AutoContrast': 0.025, - 'Solarize': .005, - 'SolarizeAdd': .005, - 'Contrast': .005, - 'Brightness': .005, - 'Equalize': .005, - 'Posterize': 0, - 'Invert': 0, -} - - -def _select_rand_weights(weight_idx=0, transforms=None): - transforms = transforms or _RAND_TRANSFORMS - assert weight_idx == 0 # only one set of weights currently - rand_weights = _RAND_CHOICE_WEIGHTS_0 - probs = [rand_weights[k] for k in transforms] - probs /= np.sum(probs) - return probs - - -def rand_augment_ops(magnitude=10, hparams=None, transforms=None): - hparams = hparams or _HPARAMS_DEFAULT - transforms = transforms or _RAND_TRANSFORMS - return [AugmentOp( - name, prob=0.5, magnitude=magnitude, hparams=hparams) for name in transforms] - - -class RandAugment: - ''' - Apply RandAug on both image and dense label map - ''' - def __init__(self, ops, num_layers=2, choice_weights=None): - self.ops = ops - self.num_layers = num_layers - self.choice_weights = choice_weights - - def __call__(self, img, label): - # no replacement when using weighted choice - ops = np.random.choice( - self.ops, self.num_layers, replace=self.choice_weights is None, p=self.choice_weights) - for op in ops: - img, label = op(img, label) - return img, label - - -def rand_augment_transform(config_str, hparams): - """ - Create a RandAugment transform with label - :param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by - dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining - sections, not order sepecific determine - 'm' - integer magnitude of rand augment - 'n' - integer num layers (number of transform ops selected per image) - 'w' - integer probabiliy weight index (index of a set of weights to influence choice of op) - 'mstd' - float std deviation of magnitude noise applied - 'inc' - integer (bool), use augmentations that increase in severity with magnitude (default: 0) - Ex 'rand-m9-n3-mstd0.5' results in RandAugment with magnitude 9, num_layers 3, magnitude_std 0.5 - 'rand-mstd1-w0' results in magnitude_std 1.0, weights 0, default magnitude of 10 and num_layers 2 - - :param hparams: Other hparams (kwargs) for the RandAugmentation scheme - - :return: A PyTorch compatible Transform - """ - magnitude = _MAX_LEVEL # default to _MAX_LEVEL for magnitude (currently 10) - num_layers = 2 # default to 2 ops per image - weight_idx = None # default to no probability weights for op choice - transforms = _RAND_TRANSFORMS - config = config_str.split('-') - assert config[0] == 'rand' - config = config[1:] - for c in config: - cs = re.split(r'(\d.*)', c) - if len(cs) < 2: - continue - key, val = cs[:2] - if key == 'mstd': - # noise param injected via hparams for now - hparams.setdefault('magnitude_std', float(val)) - elif key == 'inc': - if bool(val): - transforms = _RAND_INCREASING_TRANSFORMS - elif key == 'm': - magnitude = int(val) - elif key == 'n': - num_layers = int(val) - elif key == 'w': - weight_idx = int(val) - else: - assert False, 'Unknown RandAugment config section' - ra_ops = rand_augment_ops(magnitude=magnitude, hparams=hparams, transforms=transforms) - choice_weights = None if weight_idx is None else _select_rand_weights(weight_idx) - return RandAugment(ra_ops, num_layers, choice_weights=choice_weights) - +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the BSD 3-Clause License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://spdx.org/licenses/BSD-3-Clause.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +""" +Adapted from https://github.com/rwightman/pytorch-image-models/blob/master/timm/data/auto_augment.py and modified for token labeling +AutoAugment, RandAugment +""" +import random +import math +import re +from PIL import Image, ImageOps, ImageEnhance, ImageChops +import PIL +import numpy as np +from scipy import ndimage +import torch + +_PIL_VER = tuple([int(x) for x in PIL.__version__.split('.')[:2]]) + +_FILL = (128, 128, 128) + + + +# This signifies the max integer that the controller RNN could predict for the +# augmentation scheme. +_MAX_LEVEL = 10. + +_HPARAMS_DEFAULT = dict( + translate_const=250, + img_mean=_FILL, +) + +_RANDOM_INTERPOLATION = (Image.BILINEAR, Image.BICUBIC) + + +def _interpolation(kwargs): + interpolation = kwargs.pop('resample', Image.BILINEAR) + if isinstance(interpolation, (list, tuple)): + return random.choice(interpolation) + else: + return interpolation + +def affine_label(label, matrix): + + # label: 2, k, H, W + # label[0] value, label[1] index + a,b,c,d,e,f = matrix + affine_matrix = [[1,0,0,0],[0,a,b,c],[0,d,e,f]] + value = ndimage.affine_transform(label[0],matrix=affine_matrix, order=0, mode="constant") + index = ndimage.affine_transform(label[1],matrix=affine_matrix, order=0, mode="nearest") + + return torch.from_numpy(np.stack([value, index],axis=0)) + +def _check_args_tf(kwargs): + if 'fillcolor' in kwargs and _PIL_VER < (5, 0): + kwargs.pop('fillcolor') + kwargs['resample'] = _interpolation(kwargs) + + +def shear_x(img, factor, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, factor, 0, 0, 1, 0), **kwargs) + +def shear_y_label(label, factor): + return affine_label(label, (1, factor, 0, 0, 1, 0)) + + +def shear_y(img, factor, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, 0, factor, 1, 0), **kwargs) + +def shear_x_label(label, factor): + return affine_label(label, (1, 0, 0, factor, 1, 0)) + +def translate_x_rel(img, pct, **kwargs): + pixels = pct * img.size[0] + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs) + +def translate_y_rel_label(label, pct): + pixels = pct * label.size(2) + return affine_label(label, (1, 0, pixels, 0, 1, 0)) + + +def translate_y_rel(img, pct, **kwargs): + pixels = pct * img.size[1] + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs) + +def translate_x_rel_label(label, pct): + pixels = pct * label.size(3) + return affine_label(label, (1, 0, 0, 0, 1, pixels)) + + +def translate_x_abs(img, pixels, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs) + + +def translate_y_abs(img, pixels, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs) + + +def rotate(img, degrees, **kwargs): + _check_args_tf(kwargs) + if _PIL_VER >= (5, 2): + return img.rotate(degrees, **kwargs) + elif _PIL_VER >= (5, 0): + w, h = img.size + post_trans = (0, 0) + rotn_center = (w / 2.0, h / 2.0) + angle = -math.radians(degrees) + matrix = [ + round(math.cos(angle), 15), + round(math.sin(angle), 15), + 0.0, + round(-math.sin(angle), 15), + round(math.cos(angle), 15), + 0.0, + ] + + def transform(x, y, matrix): + (a, b, c, d, e, f) = matrix + return a * x + b * y + c, d * x + e * y + f + + matrix[2], matrix[5] = transform( + -rotn_center[0] - post_trans[0], -rotn_center[1] - post_trans[1], matrix + ) + matrix[2] += rotn_center[0] + matrix[5] += rotn_center[1] + return img.transform(img.size, Image.AFFINE, matrix, **kwargs) + else: + return img.rotate(degrees, resample=kwargs['resample']) + +def rotate_label(label, degrees): + _,_, w, h = label.size() + post_trans = (0, 0) + rotn_center = (w / 2.0, h / 2.0) + angle = math.radians(degrees) + matrix = [ + round(math.cos(angle), 15), + round(math.sin(angle), 15), + 0.0, + round(-math.sin(angle), 15), + round(math.cos(angle), 15), + 0.0, + ] + + def transform(x, y, matrix): + (a, b, c, d, e, f) = matrix + return a * x + b * y + c, d * x + e * y + f + + matrix[2], matrix[5] = transform( + -rotn_center[0] - post_trans[0], -rotn_center[1] - post_trans[1], matrix + ) + matrix[2] += rotn_center[0] + matrix[5] += rotn_center[1] + return affine_label(label, matrix) + + +def auto_contrast(img, **__): + return ImageOps.autocontrast(img) + + +def invert(img, **__): + return ImageOps.invert(img) + + +def equalize(img, **__): + return ImageOps.equalize(img) + + +def solarize(img, thresh, **__): + return ImageOps.solarize(img, thresh) + + +def solarize_add(img, add, thresh=128, **__): + lut = [] + for i in range(256): + if i < thresh: + lut.append(min(255, i + add)) + else: + lut.append(i) + if img.mode in ("L", "RGB"): + if img.mode == "RGB" and len(lut) == 256: + lut = lut + lut + lut + return img.point(lut) + else: + return img + + +def posterize(img, bits_to_keep, **__): + if bits_to_keep >= 8: + return img + return ImageOps.posterize(img, bits_to_keep) + + +def contrast(img, factor, **__): + return ImageEnhance.Contrast(img).enhance(factor) + + +def color(img, factor, **__): + return ImageEnhance.Color(img).enhance(factor) + + +def brightness(img, factor, **__): + return ImageEnhance.Brightness(img).enhance(factor) + + +def sharpness(img, factor, **__): + return ImageEnhance.Sharpness(img).enhance(factor) + + +def _randomly_negate(v): + """With 50% prob, negate the value""" + return -v if random.random() > 0.5 else v + + +def _rotate_level_to_arg(level, _hparams): + # range [-30, 30] + level = (level / _MAX_LEVEL) * 30. + level = _randomly_negate(level) + return level, + + +def _enhance_level_to_arg(level, _hparams): + # range [0.1, 1.9] + return (level / _MAX_LEVEL) * 1.8 + 0.1, + + +def _enhance_increasing_level_to_arg(level, _hparams): + # the 'no change' level is 1.0, moving away from that towards 0. or 2.0 increases the enhancement blend + # range [0.1, 1.9] + level = (level / _MAX_LEVEL) * .9 + level = 1.0 + _randomly_negate(level) + return level, + + +def _shear_level_to_arg(level, _hparams): + # range [-0.3, 0.3] + level = (level / _MAX_LEVEL) * 0.3 + level = _randomly_negate(level) + return level, + + +def _translate_abs_level_to_arg(level, hparams): + translate_const = hparams['translate_const'] + level = (level / _MAX_LEVEL) * float(translate_const) + level = _randomly_negate(level) + return level, + + +def _translate_rel_level_to_arg(level, hparams): + # default range [-0.45, 0.45] + translate_pct = hparams.get('translate_pct', 0.45) + level = (level / _MAX_LEVEL) * translate_pct + level = _randomly_negate(level) + return level, + + +def _posterize_level_to_arg(level, _hparams): + # As per Tensorflow TPU EfficientNet impl + # range [0, 4], 'keep 0 up to 4 MSB of original image' + # intensity/severity of augmentation decreases with level + return int((level / _MAX_LEVEL) * 4), + + +def _posterize_increasing_level_to_arg(level, hparams): + # As per Tensorflow models research and UDA impl + # range [4, 0], 'keep 4 down to 0 MSB of original image', + # intensity/severity of augmentation increases with level + return 4 - _posterize_level_to_arg(level, hparams)[0], + + +def _posterize_original_level_to_arg(level, _hparams): + # As per original AutoAugment paper description + # range [4, 8], 'keep 4 up to 8 MSB of image' + # intensity/severity of augmentation decreases with level + return int((level / _MAX_LEVEL) * 4) + 4, + + +def _solarize_level_to_arg(level, _hparams): + # range [0, 256] + # intensity/severity of augmentation decreases with level + return int((level / _MAX_LEVEL) * 256), + + +def _solarize_increasing_level_to_arg(level, _hparams): + # range [0, 256] + # intensity/severity of augmentation increases with level + return 256 - _solarize_level_to_arg(level, _hparams)[0], + + +def _solarize_add_level_to_arg(level, _hparams): + # range [0, 110] + return int((level / _MAX_LEVEL) * 110), + +class AugmentOp: + + def __init__(self, name, prob=0.5, magnitude=10, hparams=None): + hparams = hparams or _HPARAMS_DEFAULT + self.name = name + self.aug_fn = NAME_TO_OP[name] + self.label_fn = NAME_TO_LABELOP[name] + self.level_fn = LEVEL_TO_ARG[name] + self.prob = prob + self.magnitude = magnitude + self.hparams = hparams.copy() + self.kwargs = dict( + fillcolor=hparams['img_mean'] if 'img_mean' in hparams else _FILL, + resample=hparams['interpolation'] if 'interpolation' in hparams else _RANDOM_INTERPOLATION, + ) + + # If magnitude_std is > 0, we introduce some randomness + # in the usually fixed policy and sample magnitude from a normal distribution + # with mean `magnitude` and std-dev of `magnitude_std`. + # NOTE This is my own hack, being tested, not in papers or reference impls. + self.magnitude_std = self.hparams.get('magnitude_std', 0) + + def __call__(self, img, label): + if self.prob < 1.0 and random.random() > self.prob: + return img, label + magnitude = self.magnitude + if self.magnitude_std and self.magnitude_std > 0: + magnitude = random.gauss(magnitude, self.magnitude_std) + magnitude = min(_MAX_LEVEL, max(0, magnitude)) # clip to valid range + level_args = self.level_fn(magnitude, self.hparams) if self.level_fn is not None else tuple() + if self.label_fn is not None: + + aug_label = self.label_fn(label, *level_args) + else: + aug_label = label + return self.aug_fn(img, *level_args, **self.kwargs), aug_label + +LEVEL_TO_ARG = { + 'AutoContrast': None, + 'Equalize': None, + 'Invert': None, + 'Rotate': _rotate_level_to_arg, + # There are several variations of the posterize level scaling in various Tensorflow/Google repositories/papers + 'Posterize': _posterize_level_to_arg, + 'PosterizeIncreasing': _posterize_increasing_level_to_arg, + 'PosterizeOriginal': _posterize_original_level_to_arg, + 'Solarize': _solarize_level_to_arg, + 'SolarizeIncreasing': _solarize_increasing_level_to_arg, + 'SolarizeAdd': _solarize_add_level_to_arg, + 'Color': _enhance_level_to_arg, + 'ColorIncreasing': _enhance_increasing_level_to_arg, + 'Contrast': _enhance_level_to_arg, + 'ContrastIncreasing': _enhance_increasing_level_to_arg, + 'Brightness': _enhance_level_to_arg, + 'BrightnessIncreasing': _enhance_increasing_level_to_arg, + 'Sharpness': _enhance_level_to_arg, + 'SharpnessIncreasing': _enhance_increasing_level_to_arg, + 'ShearX': _shear_level_to_arg, + 'ShearY': _shear_level_to_arg, + 'TranslateX': _translate_abs_level_to_arg, + 'TranslateY': _translate_abs_level_to_arg, + 'TranslateXRel': _translate_rel_level_to_arg, + 'TranslateYRel': _translate_rel_level_to_arg, +} + + +NAME_TO_OP = { + 'AutoContrast': auto_contrast, + 'Equalize': equalize, + 'Invert': invert, + 'Rotate': rotate, + 'Posterize': posterize, + 'PosterizeIncreasing': posterize, + 'PosterizeOriginal': posterize, + 'Solarize': solarize, + 'SolarizeIncreasing': solarize, + 'SolarizeAdd': solarize_add, + 'Color': color, + 'ColorIncreasing': color, + 'Contrast': contrast, + 'ContrastIncreasing': contrast, + 'Brightness': brightness, + 'BrightnessIncreasing': brightness, + 'Sharpness': sharpness, + 'SharpnessIncreasing': sharpness, + 'ShearX': shear_x, + 'ShearY': shear_y, + 'TranslateX': translate_x_abs, + 'TranslateY': translate_y_abs, + 'TranslateXRel': translate_x_rel, + 'TranslateYRel': translate_y_rel, +} +# Remove TranslateX and TranslateY here since it is actually not used in random aug +# Only spatial op should be applied to the label map +NAME_TO_LABELOP = { + 'AutoContrast': None, + 'Equalize': None, + 'Invert': None, + 'Rotate': rotate_label, + 'Posterize': None, + 'PosterizeIncreasing': None, + 'PosterizeOriginal': None, + 'Solarize': None, + 'SolarizeIncreasing': None, + 'SolarizeAdd': None, + 'Color': None, + 'ColorIncreasing': None, + 'Contrast': None, + 'ContrastIncreasing': None, + 'Brightness': None, + 'BrightnessIncreasing': None, + 'Sharpness': None, + 'SharpnessIncreasing': None, + 'ShearX': shear_x_label, + 'ShearY': shear_y_label, + 'TranslateX': None, + 'TranslateY': None, + 'TranslateXRel': translate_x_rel_label, + 'TranslateYRel': translate_y_rel_label, +} + + +_RAND_TRANSFORMS = [ + 'AutoContrast', + 'Equalize', + 'Invert', + 'Rotate', + 'Posterize', + 'Solarize', + 'SolarizeAdd', + 'Color', + 'Contrast', + 'Brightness', + 'Sharpness', + 'ShearX', + 'ShearY', + 'TranslateXRel', + 'TranslateYRel', + #'Cutout' +] + + +_RAND_INCREASING_TRANSFORMS = [ + 'AutoContrast', + 'Equalize', + 'Invert', + 'Rotate', + 'PosterizeIncreasing', + 'SolarizeIncreasing', + 'SolarizeAdd', + 'ColorIncreasing', + 'ContrastIncreasing', + 'BrightnessIncreasing', + 'SharpnessIncreasing', + 'ShearX', + 'ShearY', + 'TranslateXRel', + 'TranslateYRel', + #'Cutout' +] + + + +# These experimental weights are based loosely on the relative improvements mentioned in paper. +# They may not result in increased performance, but could likely be tuned to so. +_RAND_CHOICE_WEIGHTS_0 = { + 'Rotate': 0.3, + 'ShearX': 0.2, + 'ShearY': 0.2, + 'TranslateXRel': 0.1, + 'TranslateYRel': 0.1, + 'Color': .025, + 'Sharpness': 0.025, + 'AutoContrast': 0.025, + 'Solarize': .005, + 'SolarizeAdd': .005, + 'Contrast': .005, + 'Brightness': .005, + 'Equalize': .005, + 'Posterize': 0, + 'Invert': 0, +} + + +def _select_rand_weights(weight_idx=0, transforms=None): + transforms = transforms or _RAND_TRANSFORMS + assert weight_idx == 0 # only one set of weights currently + rand_weights = _RAND_CHOICE_WEIGHTS_0 + probs = [rand_weights[k] for k in transforms] + probs /= np.sum(probs) + return probs + + +def rand_augment_ops(magnitude=10, hparams=None, transforms=None): + hparams = hparams or _HPARAMS_DEFAULT + transforms = transforms or _RAND_TRANSFORMS + return [AugmentOp( + name, prob=0.5, magnitude=magnitude, hparams=hparams) for name in transforms] + + +class RandAugment: + ''' + Apply RandAug on both image and dense label map + ''' + def __init__(self, ops, num_layers=2, choice_weights=None): + self.ops = ops + self.num_layers = num_layers + self.choice_weights = choice_weights + + def __call__(self, img, label): + # no replacement when using weighted choice + ops = np.random.choice( + self.ops, self.num_layers, replace=self.choice_weights is None, p=self.choice_weights) + for op in ops: + img, label = op(img, label) + return img, label + + +def rand_augment_transform(config_str, hparams): + """ + Create a RandAugment transform with label + :param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by + dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining + sections, not order sepecific determine + 'm' - integer magnitude of rand augment + 'n' - integer num layers (number of transform ops selected per image) + 'w' - integer probabiliy weight index (index of a set of weights to influence choice of op) + 'mstd' - float std deviation of magnitude noise applied + 'inc' - integer (bool), use augmentations that increase in severity with magnitude (default: 0) + Ex 'rand-m9-n3-mstd0.5' results in RandAugment with magnitude 9, num_layers 3, magnitude_std 0.5 + 'rand-mstd1-w0' results in magnitude_std 1.0, weights 0, default magnitude of 10 and num_layers 2 + + :param hparams: Other hparams (kwargs) for the RandAugmentation scheme + + :return: A PyTorch compatible Transform + """ + magnitude = _MAX_LEVEL # default to _MAX_LEVEL for magnitude (currently 10) + num_layers = 2 # default to 2 ops per image + weight_idx = None # default to no probability weights for op choice + transforms = _RAND_TRANSFORMS + config = config_str.split('-') + assert config[0] == 'rand' + config = config[1:] + for c in config: + cs = re.split(r'(\d.*)', c) + if len(cs) < 2: + continue + key, val = cs[:2] + if key == 'mstd': + # noise param injected via hparams for now + hparams.setdefault('magnitude_std', float(val)) + elif key == 'inc': + if bool(val): + transforms = _RAND_INCREASING_TRANSFORMS + elif key == 'm': + magnitude = int(val) + elif key == 'n': + num_layers = int(val) + elif key == 'w': + weight_idx = int(val) + else: + assert False, 'Unknown RandAugment config section' + ra_ops = rand_augment_ops(magnitude=magnitude, hparams=hparams, transforms=transforms) + choice_weights = None if weight_idx is None else _select_rand_weights(weight_idx) + return RandAugment(ra_ops, num_layers, choice_weights=choice_weights) + diff --git a/PyTorch/contrib/cv/classification/VOLO/tlt/loss/__init__.py b/PyTorch/contrib/cv/classification/VOLO/tlt/loss/__init__.py index 47166af5c809572250d0a7d76e2a8605b90c6540..e1019595216dc26960fcbae2e3c6154a075eda43 100644 --- a/PyTorch/contrib/cv/classification/VOLO/tlt/loss/__init__.py +++ b/PyTorch/contrib/cv/classification/VOLO/tlt/loss/__init__.py @@ -1,47 +1,47 @@ -""" -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the BSD 3-Clause License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -https://spdx.org/licenses/BSD-3-Clause.html - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the BSD 3-Clause License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://spdx.org/licenses/BSD-3-Clause.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from .cross_entropy import TokenLabelCrossEntropy,TokenLabelSoftTargetCrossEntropy \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/VOLO/tlt/utils/__init__.py b/PyTorch/contrib/cv/classification/VOLO/tlt/utils/__init__.py index a53b3e9888443e3fa26623b6c3de76b7b139151b..39dd95c737d325e40f517425d9cca3ffc0ee96d6 100644 --- a/PyTorch/contrib/cv/classification/VOLO/tlt/utils/__init__.py +++ b/PyTorch/contrib/cv/classification/VOLO/tlt/utils/__init__.py @@ -1,47 +1,47 @@ -""" -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the BSD 3-Clause License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -https://spdx.org/licenses/BSD-3-Clause.html - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the BSD 3-Clause License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://spdx.org/licenses/BSD-3-Clause.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from .utils import load_pretrained_weights \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/VOLO/utils/__init__.py b/PyTorch/contrib/cv/classification/VOLO/utils/__init__.py index a53b3e9888443e3fa26623b6c3de76b7b139151b..39dd95c737d325e40f517425d9cca3ffc0ee96d6 100644 --- a/PyTorch/contrib/cv/classification/VOLO/utils/__init__.py +++ b/PyTorch/contrib/cv/classification/VOLO/utils/__init__.py @@ -1,47 +1,47 @@ -""" -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the BSD 3-Clause License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -https://spdx.org/licenses/BSD-3-Clause.html - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the BSD 3-Clause License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://spdx.org/licenses/BSD-3-Clause.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" from .utils import load_pretrained_weights \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/modelzoo_level.txt b/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/modelzoo_level.txt index 5afcef9188bf9d39f1e34b45bd91324c6093137a..3117fffc3be7f5c479f10f09ba38a25c47739a00 100644 --- a/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/Vgg19_ID1631_for_PyTorch/modelzoo_level.txt b/PyTorch/contrib/cv/classification/Vgg19_ID1631_for_PyTorch/modelzoo_level.txt index 5afcef9188bf9d39f1e34b45bd91324c6093137a..3117fffc3be7f5c479f10f09ba38a25c47739a00 100644 --- a/PyTorch/contrib/cv/classification/Vgg19_ID1631_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/Vgg19_ID1631_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/README.md b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/README.md index 57d445ce4f69ffbe6eddeb7336e7eae0a8ac5d81..f2705c491b57bcb719d4d6381bdd7d9db6679c80 100644 --- a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/README.md +++ b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/README.md @@ -1,55 +1,55 @@ -# Vit_small_patch16_224 - -This implements training of vit_small_patch16_224 on the ImageNet-2012 dataset, mainly modified from [rwightman/pytorch-image-models](https://github.com/rwightman/pytorch-image-models). - -## Vit_small_patch16_224 Detail - -As of the current date, Ascend-Pytorch is still inefficient for contiguous operations.Therefore, vit_small_patch16_224 is re-implemented using semantics such as custom OP. - - -## Requirements - -- Install PyTorch ([pytorch.org](http://pytorch.org)) - -- `pip install -r requirements.txt` - -- Download the Imagenet-2012 dataset. Refer to the original repository https://github.com/rwightman/pytorch-image-models - - -## Training - -To train a model, run `train.py` with the desired model architecture and the path to the Imagenet-2012 dataset: - -```bash -# training 1p accuracy -bash test/train_full_1p.sh --data_path=real_data_path - -# training 1p performance -bash test/train_performance_1p.sh --data_path=real_data_path - -# training 8p accuracy -bash test/train_full_8p.sh --data_path=real_data_path - -# training 8p performance -bash test/train_performance_8p.sh --data_path=real_data_path - -# finetune -bash test/train_finetune_1p.sh --data_path=real_data_path --weight=real_weight_path - -# Online inference demo -python demo.py --checkpoint real_checkpoint_path - -# To ONNX -python pthtar2onnx.py -``` - -## Vit_small_patch16_224 training result - - -| | top1 | AMP_Type | Epochs | FPS | -| :----: | :---: | :------: | :----: | :-----: | -| 1p-GPU | - | O2 | 1 | 586.67 | -| 1p-NPU | - | O2 | 1 | 304.06 | -| 8p-GPU | 67.65 | O2 | 100 | 4556.28 | -| 8p-NPU | 67.67 | O2 | 100 | 2373.80 | - +# Vit_small_patch16_224 + +This implements training of vit_small_patch16_224 on the ImageNet-2012 dataset, mainly modified from [rwightman/pytorch-image-models](https://github.com/rwightman/pytorch-image-models). + +## Vit_small_patch16_224 Detail + +As of the current date, Ascend-Pytorch is still inefficient for contiguous operations.Therefore, vit_small_patch16_224 is re-implemented using semantics such as custom OP. + + +## Requirements + +- Install PyTorch ([pytorch.org](http://pytorch.org)) + +- `pip install -r requirements.txt` + +- Download the Imagenet-2012 dataset. Refer to the original repository https://github.com/rwightman/pytorch-image-models + + +## Training + +To train a model, run `train.py` with the desired model architecture and the path to the Imagenet-2012 dataset: + +```bash +# training 1p accuracy +bash test/train_full_1p.sh --data_path=real_data_path + +# training 1p performance +bash test/train_performance_1p.sh --data_path=real_data_path + +# training 8p accuracy +bash test/train_full_8p.sh --data_path=real_data_path + +# training 8p performance +bash test/train_performance_8p.sh --data_path=real_data_path + +# finetune +bash test/train_finetune_1p.sh --data_path=real_data_path --weight=real_weight_path + +# Online inference demo +python demo.py --checkpoint real_checkpoint_path + +# To ONNX +python pthtar2onnx.py +``` + +## Vit_small_patch16_224 training result + + +| | top1 | AMP_Type | Epochs | FPS | +| :----: | :---: | :------: | :----: | :-----: | +| 1p-GPU | - | O2 | 1 | 586.67 | +| 1p-NPU | - | O2 | 1 | 304.06 | +| 8p-GPU | 67.65 | O2 | 100 | 4556.28 | +| 8p-NPU | 67.67 | O2 | 100 | 2373.80 | + diff --git a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/demo.py b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/demo.py index fcad4e7d2ad11596dac9955bcc46f7eb8da7df3e..87fd6c849b7af4f31bf60c31cfa507129e032fb7 100644 --- a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/demo.py +++ b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/demo.py @@ -1,85 +1,85 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import argparse -import torch -import torchvision -from torchvision import datasets, transforms -from collections import OrderedDict - -from timm.models import create_model - -parser = argparse.ArgumentParser(description='ImageNet Training') -parser.add_argument('--checkpoint', type=str, default='', - help='checkpoint path') -args = parser.parse_args() - -def proc_node_module(checkpoint, attr_name): - new_state_dict = OrderedDict() - for k, v in checkpoint[attr_name].items(): - if(k[0: 7] == "module."): - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - -def get_raw_data(): - from PIL import Image - from urllib.request import urlretrieve - IMAGE_URL = 'https://bbs-img.huaweicloud.com/blogs/img/thumb/1591951315139_8989_1363.png' - urlretrieve(IMAGE_URL, 'tmp.jpg') - img = Image.open("tmp.jpg") - img = img.convert('RGB') - return img - -def test(): - loc = 'npu:0' - loc_cpu = 'cpu' - torch.npu.set_device(loc) - if args.checkpoint == '': - print("please give the checkpoint path using --checkpoint param") - exit(0) - checkpoint = torch.load(args.checkpoint, map_location=loc) - checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') - - model = create_model('vit_small_patch16_224', pretrained=False) - - model = model.to(loc) - model.load_state_dict(checkpoint['state_dict']) - model.eval() - - normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], - std=[0.5, 0.5, 0.5]) - rd = get_raw_data() - data_transfrom = transforms.Compose([ - transforms.RandomResizedCrop(224,interpolation=3), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ]) - - inputs = data_transfrom(rd) - inputs = inputs.unsqueeze(0) - inputs = inputs.to(loc) - output = model(inputs) - output = output.to(loc_cpu) - - _, pred = output.topk(1, 1, True, True) - result = torch.argmax(output, 1) - print("class: ", pred[0][0].item()) - print(result) - -if __name__ == "__main__": +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import argparse +import torch +import torchvision +from torchvision import datasets, transforms +from collections import OrderedDict + +from timm.models import create_model + +parser = argparse.ArgumentParser(description='ImageNet Training') +parser.add_argument('--checkpoint', type=str, default='', + help='checkpoint path') +args = parser.parse_args() + +def proc_node_module(checkpoint, attr_name): + new_state_dict = OrderedDict() + for k, v in checkpoint[attr_name].items(): + if(k[0: 7] == "module."): + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + +def get_raw_data(): + from PIL import Image + from urllib.request import urlretrieve + IMAGE_URL = 'https://bbs-img.huaweicloud.com/blogs/img/thumb/1591951315139_8989_1363.png' + urlretrieve(IMAGE_URL, 'tmp.jpg') + img = Image.open("tmp.jpg") + img = img.convert('RGB') + return img + +def test(): + loc = 'npu:0' + loc_cpu = 'cpu' + torch.npu.set_device(loc) + if args.checkpoint == '': + print("please give the checkpoint path using --checkpoint param") + exit(0) + checkpoint = torch.load(args.checkpoint, map_location=loc) + checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') + + model = create_model('vit_small_patch16_224', pretrained=False) + + model = model.to(loc) + model.load_state_dict(checkpoint['state_dict']) + model.eval() + + normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]) + rd = get_raw_data() + data_transfrom = transforms.Compose([ + transforms.RandomResizedCrop(224,interpolation=3), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ]) + + inputs = data_transfrom(rd) + inputs = inputs.unsqueeze(0) + inputs = inputs.to(loc) + output = model(inputs) + output = output.to(loc_cpu) + + _, pred = output.topk(1, 1, True, True) + result = torch.argmax(output, 1) + print("class: ", pred[0][0].item()) + print(result) + +if __name__ == "__main__": test() \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/distributed_train.sh b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/distributed_train.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/distributed_train_npu.sh b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/distributed_train_npu.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/pthtar2onnx.py b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/pthtar2onnx.py index d55c4c20c5d82404c50fc22b67773b9551f3e27d..ad98950f302d57c7c6a5b6a0a8e641ab9d3c65d6 100644 --- a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/pthtar2onnx.py +++ b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/pthtar2onnx.py @@ -1,62 +1,62 @@ -""" -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the BSD 3-Clause License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -https://spdx.org/licenses/BSD-3-Clause.html - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import torch -import timm - -def pth2onnx(model, output_file): - model.eval() - input_names = ["image"] - output_names = ["class"] - dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, opset_version=11, verbose=True) - print("save to %s" % output_file) - -if __name__ == "__main__": - model = timm.create_model('vit_small_patch16_224', pretrained=False) - pth2onnx(model, "vit_small_patch16_224.onnx") +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the BSD 3-Clause License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://spdx.org/licenses/BSD-3-Clause.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import torch +import timm + +def pth2onnx(model, output_file): + model.eval() + input_names = ["image"] + output_names = ["class"] + dynamic_axes = {'image': {0: '-1'}, 'class': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + torch.onnx.export(model, dummy_input, output_file, input_names = input_names, dynamic_axes = dynamic_axes, output_names = output_names, opset_version=11, verbose=True) + print("save to %s" % output_file) + +if __name__ == "__main__": + model = timm.create_model('vit_small_patch16_224', pretrained=False) + pth2onnx(model, "vit_small_patch16_224.onnx") diff --git a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/timm/loss/asymmetric_loss.py b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/timm/loss/asymmetric_loss.py index d3a0d8650f81e0d79e6dc4321355f120ae0f4a3b..726f9410e43a3462675e9e92ce7cbb768be1a7ed 100644 --- a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/timm/loss/asymmetric_loss.py +++ b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/timm/loss/asymmetric_loss.py @@ -1,143 +1,143 @@ -""" -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the BSD 3-Clause License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -https://spdx.org/licenses/BSD-3-Clause.html - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -import torch -import torch.nn as nn - - -class AsymmetricLossMultiLabel(nn.Module): - def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=False): - super(AsymmetricLossMultiLabel, self).__init__() - - self.gamma_neg = gamma_neg - self.gamma_pos = gamma_pos - self.clip = clip - self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss - self.eps = eps - - def forward(self, x, y): - """" - Parameters - ---------- - x: input logits - y: targets (multi-label binarized vector) - """ - - # Calculating Probabilities - x_sigmoid = torch.sigmoid(x) - xs_pos = x_sigmoid - xs_neg = 1 - x_sigmoid - - # Asymmetric Clipping - if self.clip is not None and self.clip > 0: - xs_neg = (xs_neg + self.clip).clamp(max=1) - - # Basic CE calculation - los_pos = y * torch.log(xs_pos.clamp(min=self.eps)) - los_neg = (1 - y) * torch.log(xs_neg.clamp(min=self.eps)) - loss = los_pos + los_neg - - # Asymmetric Focusing - if self.gamma_neg > 0 or self.gamma_pos > 0: - if self.disable_torch_grad_focal_loss: - torch._C.set_grad_enabled(False) - pt0 = xs_pos * y - pt1 = xs_neg * (1 - y) # pt = p if t > 0 else 1-p - pt = pt0 + pt1 - one_sided_gamma = self.gamma_pos * y + self.gamma_neg * (1 - y) - one_sided_w = torch.pow(1 - pt, one_sided_gamma) - if self.disable_torch_grad_focal_loss: - torch._C.set_grad_enabled(True) - loss *= one_sided_w - - return -loss.sum() - - -class AsymmetricLossSingleLabel(nn.Module): - def __init__(self, gamma_pos=1, gamma_neg=4, eps: float = 0.1, reduction='mean'): - super(AsymmetricLossSingleLabel, self).__init__() - - self.eps = eps - self.logsoftmax = nn.LogSoftmax(dim=-1) - self.targets_classes = [] # prevent gpu repeated memory allocation - self.gamma_pos = gamma_pos - self.gamma_neg = gamma_neg - self.reduction = reduction - - def forward(self, inputs, target, reduction=None): - """" - Parameters - ---------- - x: input logits - y: targets (1-hot vector) - """ - - num_classes = inputs.size()[-1] - log_preds = self.logsoftmax(inputs) - self.targets_classes = torch.zeros_like(inputs).scatter_(1, target.long().unsqueeze(1), 1) - - # ASL weights - targets = self.targets_classes - anti_targets = 1 - targets - xs_pos = torch.exp(log_preds) - xs_neg = 1 - xs_pos - xs_pos = xs_pos * targets - xs_neg = xs_neg * anti_targets - asymmetric_w = torch.pow(1 - xs_pos - xs_neg, - self.gamma_pos * targets + self.gamma_neg * anti_targets) - log_preds = log_preds * asymmetric_w - - if self.eps > 0: # label smoothing - self.targets_classes.mul_(1 - self.eps).add_(self.eps / num_classes) - - # loss calculation - loss = - self.targets_classes.mul(log_preds) - - loss = loss.sum(dim=-1) - if self.reduction == 'mean': - loss = loss.mean() - - return loss +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the BSD 3-Clause License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://spdx.org/licenses/BSD-3-Clause.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import torch +import torch.nn as nn + + +class AsymmetricLossMultiLabel(nn.Module): + def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=False): + super(AsymmetricLossMultiLabel, self).__init__() + + self.gamma_neg = gamma_neg + self.gamma_pos = gamma_pos + self.clip = clip + self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss + self.eps = eps + + def forward(self, x, y): + """" + Parameters + ---------- + x: input logits + y: targets (multi-label binarized vector) + """ + + # Calculating Probabilities + x_sigmoid = torch.sigmoid(x) + xs_pos = x_sigmoid + xs_neg = 1 - x_sigmoid + + # Asymmetric Clipping + if self.clip is not None and self.clip > 0: + xs_neg = (xs_neg + self.clip).clamp(max=1) + + # Basic CE calculation + los_pos = y * torch.log(xs_pos.clamp(min=self.eps)) + los_neg = (1 - y) * torch.log(xs_neg.clamp(min=self.eps)) + loss = los_pos + los_neg + + # Asymmetric Focusing + if self.gamma_neg > 0 or self.gamma_pos > 0: + if self.disable_torch_grad_focal_loss: + torch._C.set_grad_enabled(False) + pt0 = xs_pos * y + pt1 = xs_neg * (1 - y) # pt = p if t > 0 else 1-p + pt = pt0 + pt1 + one_sided_gamma = self.gamma_pos * y + self.gamma_neg * (1 - y) + one_sided_w = torch.pow(1 - pt, one_sided_gamma) + if self.disable_torch_grad_focal_loss: + torch._C.set_grad_enabled(True) + loss *= one_sided_w + + return -loss.sum() + + +class AsymmetricLossSingleLabel(nn.Module): + def __init__(self, gamma_pos=1, gamma_neg=4, eps: float = 0.1, reduction='mean'): + super(AsymmetricLossSingleLabel, self).__init__() + + self.eps = eps + self.logsoftmax = nn.LogSoftmax(dim=-1) + self.targets_classes = [] # prevent gpu repeated memory allocation + self.gamma_pos = gamma_pos + self.gamma_neg = gamma_neg + self.reduction = reduction + + def forward(self, inputs, target, reduction=None): + """" + Parameters + ---------- + x: input logits + y: targets (1-hot vector) + """ + + num_classes = inputs.size()[-1] + log_preds = self.logsoftmax(inputs) + self.targets_classes = torch.zeros_like(inputs).scatter_(1, target.long().unsqueeze(1), 1) + + # ASL weights + targets = self.targets_classes + anti_targets = 1 - targets + xs_pos = torch.exp(log_preds) + xs_neg = 1 - xs_pos + xs_pos = xs_pos * targets + xs_neg = xs_neg * anti_targets + asymmetric_w = torch.pow(1 - xs_pos - xs_neg, + self.gamma_pos * targets + self.gamma_neg * anti_targets) + log_preds = log_preds * asymmetric_w + + if self.eps > 0: # label smoothing + self.targets_classes.mul_(1 - self.eps).add_(self.eps / num_classes) + + # loss calculation + loss = - self.targets_classes.mul(log_preds) + + loss = loss.sum(dim=-1) + if self.reduction == 'mean': + loss = loss.mean() + + return loss diff --git a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/timm/optim/npu_fused_adamp.py b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/timm/optim/npu_fused_adamp.py index fc0bca28aac9ddacc2437bbbbbae7f8a0ee5e772..7f6a8f0c2fe96fd7aff72e5a473068cfa3f2bd40 100644 --- a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/timm/optim/npu_fused_adamp.py +++ b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/timm/optim/npu_fused_adamp.py @@ -1,298 +1,298 @@ -""" -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the BSD 3-Clause License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -https://spdx.org/licenses/BSD-3-Clause.html - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import math -from collections import defaultdict - -import torch -from torch.optim.optimizer import Optimizer -from apex.contrib.combine_tensors import combine_npu - - -class NpuFusedAdamP(Optimizer): - """Implements AdamP algorithm. - - Currently NPU-only. Requires Apex to be installed via - ``pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--npu_float_status" ./``. - - This version of NPU fused AdamP implements 1 fusions. - - * A combine-tensor apply launch that batches the elementwise updates applied to all the model's parameters - into one or a few kernel launches. - - :class:`apex.optimizers.NpuFusedAdamP` may be used as a drop-in replacement for ``torch.optim.AdamP``:: - - opt = apex.optimizers.NpuFusedAdamP(model.parameters(), lr = ....) - ... - opt.step() - - :class:`apex.optimizers.FusedAdam` should be used with Amp. Currently, if you wish to use :class:`NpuFusedAdamP` - with Amp, only ``opt_level O1 and O2`` can be choosed:: - - opt = apex.optimizers.NpuFusedAdamP(model.parameters(), lr = ....) - model, opt = amp.initialize(model, opt, opt_level="O2") - ... - opt.step() - - - The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_. - The AdamP variant was proposed in `Decoupled Weight Decay Regularization`_. - - Arguments: - params (iterable): iterable of parameters to optimize or dicts defining - parameter groups - lr (float, optional, default: 1e-3): learning rate - betas (Tuple[float, float], optional, default: (0.9, 0.999)): coefficients used - for computing running averages of gradient and its square - eps (float, optional, default: 1e-8): term added to the denominator to improve - numerical stability - weight_decay (float, optional, default: 1e-2): weight decay coefficient - amsgrad (boolean, optional, default: False): whether to use the AMSGrad variant of - this algorithm from the paper `On the Convergence of Adam and Beyond`_ - - .. _Adam\: A Method for Stochastic Optimization: - https://arxiv.org/abs/1412.6980 - .. _Decoupled Weight Decay Regularization: - https://arxiv.org/abs/1711.05101 - .. _On the Convergence of Adam and Beyond: - https://openreview.net/forum?id=ryQu7f-RZ - """ - - def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, - weight_decay=0, delta=0.1, wd_ratio=0.1, nesterov=False): - defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, - delta=delta, wd_ratio=wd_ratio, nesterov=nesterov) - self.is_npu_fused_optimizer = True - super(NpuFusedAdamP, self).__init__(params, defaults) - - def __setstate__(self, state): - super(NpuFusedAdamP, self).__setstate__(state) - for group in self.param_groups: - group.setdefault('nesterov', False) - - def _channel_view(self, x): - return x.view(x.size(0), -1) - - def _layer_view(self, x): - return x.view(1, -1) - - def _cosine_similarity(self, x, y, eps, view_func): - x = view_func(x) - y = view_func(y) - - x_norm = x.norm(dim=1).add_(eps) - y_norm = y.norm(dim=1).add_(eps) - dot = (x * y).sum(dim=1) - - return dot.abs() / x_norm / y_norm - - def _projection(self, p, grad, perturb, delta, wd_ratio, eps): - wd = 1 - expand_size = [-1] + [1] * (len(p.shape) - 1) - for view_func in [self._channel_view, self._layer_view]: - - cosine_sim = self._cosine_similarity(grad, p, eps, view_func) - - if cosine_sim.max() < delta / math.sqrt(view_func(p).size(1)): - p_n = p / view_func(p).norm(dim=1).view(expand_size).add_(eps) - perturb -= p_n * view_func(p_n * perturb).sum(dim=1).view(expand_size) - wd = wd_ratio - - return perturb, wd - - return perturb, wd - - def _init_param_state(self, p): - state = self.state[p] - # State initialization - if len(state) == 0: - state['step'] = 0 - # Exponential moving average of gradient values - state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) - # Exponential moving average of squared gradient values - state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) - else: - exp_avg_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) - exp_avg_tmp.copy_(state['exp_avg']) - state['exp_avg'] = exp_avg_tmp - - exp_avg_sq_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) - exp_avg_sq_tmp.copy_(state['exp_avg_sq']) - state['exp_avg_sq'] = exp_avg_sq_tmp - - def _combine_group_param_states(self, group_index): - group = self.param_groups[group_index] - stash = self._amp_stash - group_params_list = stash.params_lists_indexed_by_group[group_index] - - combined_param_states = [] - for params in group_params_list: - step_list = [] - exp_avg_list = [] - exp_avg_sq_list = [] - max_exp_avg_sq_list = [] - - for p in params: - if p.grad is None: - continue - grad = p.grad - if grad.is_sparse: - raise RuntimeError('NpuFusedAdamP does not support sparse gradients, ' - 'please consider SparseAdam instead') - - self._init_param_state(p) - state = self.state[p] - step_list.append(state['step']) - exp_avg_list.append(state['exp_avg']) - exp_avg_sq_list.append(state['exp_avg_sq']) - - combined_step = 0 - combined_exp_avg = None - combined_exp_avg_sq = None - combined_max_exp_avg_sq = None - - if len(exp_avg_list) > 0: - combined_step = step_list[0] - combined_exp_avg = combine_npu(exp_avg_list) - combined_exp_avg_sq = combine_npu(exp_avg_sq_list) - combined_max_exp_avg_sq = combine_npu(max_exp_avg_sq_list) - - combined_state = defaultdict(dict) - combined_state['step'] = combined_step - combined_state['exp_avg'] = combined_exp_avg - combined_state['exp_avg_sq'] = combined_exp_avg_sq - combined_state['max_exp_avg_sq'] = combined_max_exp_avg_sq - combined_param_states.append(combined_state) - stash.combined_param_states_indexed_by_group[group_index] = combined_param_states - - def _combine_param_states_by_group(self): - stash = self._amp_stash - if stash.param_states_are_combined_by_group: - return - - stash.combined_param_states_indexed_by_group = [] - for _ in self.param_groups: - stash.combined_param_states_indexed_by_group.append([]) - - for i, _ in enumerate(self.param_groups): - self._combine_group_param_states(i) - stash.param_states_are_combined_by_group = True - - def _group_step(self, group_index): - group = self.param_groups[group_index] - for p in group['params']: - if p.grad is None: - continue - - grad = p.grad.data - state = self.state[p] - state['step'] += 1 - - - beta1, beta2 = group['betas'] - nesterov = group['nesterov'] - - stash = self._amp_stash - combined_group_params = stash.combined_params_indexed_by_group[group_index] - combined_group_grads = stash.combined_grads_indexed_by_group[group_index] - combined_group_param_states = stash.combined_param_states_indexed_by_group[group_index] - - for combined_param, combined_grad, combined_param_state in zip(combined_group_params, - combined_group_grads, - combined_group_param_states): - if combined_param is None or combined_grad is None: - continue - - exp_avg, exp_avg_sq = combined_param_state['exp_avg'], combined_param_state['exp_avg_sq'] - - combined_param_state['step'] += 1 - bias_correction1 = 1 - beta1 ** combined_param_state['step'] - bias_correction2 = 1 - beta2 ** combined_param_state['step'] - - # Decay the first and second moment running average coefficient - exp_avg.mul_(beta1).add_(combined_grad, alpha=1 - beta1) - exp_avg_sq.mul_(beta2).addcmul_(combined_grad, combined_grad, value=1 - beta2) - denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) - step_size = group['lr'] / bias_correction1 - - if nesterov: - perturb = (beta1 * exp_avg + (1 - beta1) * combined_grad) / denom - else: - perturb = exp_avg / denom - - # Projection - wd_ratio = 1 - if len(combined_param.shape) > 1: - perturb, wd_ratio = self._projection(combined_param, combined_grad, perturb, group['delta'], group['wd_ratio'], group['eps']) - - # Weight decay - if group['weight_decay'] > 0: - combined_param.mul_(1 - group['lr'] * group['weight_decay'] * wd_ratio) - - # Step - combined_param.add_(-step_size, perturb) - - @torch.no_grad() - def step(self, closure=None): - if not hasattr(self, "_amp_stash"): - raise RuntimeError('apex.optimizers.NpuFusedAdamP should be used with AMP.') - - self._check_already_combined_params_and_grads() - # combine params and grads first - self._combine_params_and_grads_by_group() - # then combine param states - self._combine_param_states_by_group() - - loss = None - if closure is not None: - with torch.enable_grad(): - loss = closure() - - for i, _ in enumerate(self.param_groups): - self._group_step(i) - - return loss +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the BSD 3-Clause License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://spdx.org/licenses/BSD-3-Clause.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import math +from collections import defaultdict + +import torch +from torch.optim.optimizer import Optimizer +from apex.contrib.combine_tensors import combine_npu + + +class NpuFusedAdamP(Optimizer): + """Implements AdamP algorithm. + + Currently NPU-only. Requires Apex to be installed via + ``pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--npu_float_status" ./``. + + This version of NPU fused AdamP implements 1 fusions. + + * A combine-tensor apply launch that batches the elementwise updates applied to all the model's parameters + into one or a few kernel launches. + + :class:`apex.optimizers.NpuFusedAdamP` may be used as a drop-in replacement for ``torch.optim.AdamP``:: + + opt = apex.optimizers.NpuFusedAdamP(model.parameters(), lr = ....) + ... + opt.step() + + :class:`apex.optimizers.FusedAdam` should be used with Amp. Currently, if you wish to use :class:`NpuFusedAdamP` + with Amp, only ``opt_level O1 and O2`` can be choosed:: + + opt = apex.optimizers.NpuFusedAdamP(model.parameters(), lr = ....) + model, opt = amp.initialize(model, opt, opt_level="O2") + ... + opt.step() + + + The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_. + The AdamP variant was proposed in `Decoupled Weight Decay Regularization`_. + + Arguments: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float, optional, default: 1e-3): learning rate + betas (Tuple[float, float], optional, default: (0.9, 0.999)): coefficients used + for computing running averages of gradient and its square + eps (float, optional, default: 1e-8): term added to the denominator to improve + numerical stability + weight_decay (float, optional, default: 1e-2): weight decay coefficient + amsgrad (boolean, optional, default: False): whether to use the AMSGrad variant of + this algorithm from the paper `On the Convergence of Adam and Beyond`_ + + .. _Adam\: A Method for Stochastic Optimization: + https://arxiv.org/abs/1412.6980 + .. _Decoupled Weight Decay Regularization: + https://arxiv.org/abs/1711.05101 + .. _On the Convergence of Adam and Beyond: + https://openreview.net/forum?id=ryQu7f-RZ + """ + + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, + weight_decay=0, delta=0.1, wd_ratio=0.1, nesterov=False): + defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, + delta=delta, wd_ratio=wd_ratio, nesterov=nesterov) + self.is_npu_fused_optimizer = True + super(NpuFusedAdamP, self).__init__(params, defaults) + + def __setstate__(self, state): + super(NpuFusedAdamP, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('nesterov', False) + + def _channel_view(self, x): + return x.view(x.size(0), -1) + + def _layer_view(self, x): + return x.view(1, -1) + + def _cosine_similarity(self, x, y, eps, view_func): + x = view_func(x) + y = view_func(y) + + x_norm = x.norm(dim=1).add_(eps) + y_norm = y.norm(dim=1).add_(eps) + dot = (x * y).sum(dim=1) + + return dot.abs() / x_norm / y_norm + + def _projection(self, p, grad, perturb, delta, wd_ratio, eps): + wd = 1 + expand_size = [-1] + [1] * (len(p.shape) - 1) + for view_func in [self._channel_view, self._layer_view]: + + cosine_sim = self._cosine_similarity(grad, p, eps, view_func) + + if cosine_sim.max() < delta / math.sqrt(view_func(p).size(1)): + p_n = p / view_func(p).norm(dim=1).view(expand_size).add_(eps) + perturb -= p_n * view_func(p_n * perturb).sum(dim=1).view(expand_size) + wd = wd_ratio + + return perturb, wd + + return perturb, wd + + def _init_param_state(self, p): + state = self.state[p] + # State initialization + if len(state) == 0: + state['step'] = 0 + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + else: + exp_avg_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) + exp_avg_tmp.copy_(state['exp_avg']) + state['exp_avg'] = exp_avg_tmp + + exp_avg_sq_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) + exp_avg_sq_tmp.copy_(state['exp_avg_sq']) + state['exp_avg_sq'] = exp_avg_sq_tmp + + def _combine_group_param_states(self, group_index): + group = self.param_groups[group_index] + stash = self._amp_stash + group_params_list = stash.params_lists_indexed_by_group[group_index] + + combined_param_states = [] + for params in group_params_list: + step_list = [] + exp_avg_list = [] + exp_avg_sq_list = [] + max_exp_avg_sq_list = [] + + for p in params: + if p.grad is None: + continue + grad = p.grad + if grad.is_sparse: + raise RuntimeError('NpuFusedAdamP does not support sparse gradients, ' + 'please consider SparseAdam instead') + + self._init_param_state(p) + state = self.state[p] + step_list.append(state['step']) + exp_avg_list.append(state['exp_avg']) + exp_avg_sq_list.append(state['exp_avg_sq']) + + combined_step = 0 + combined_exp_avg = None + combined_exp_avg_sq = None + combined_max_exp_avg_sq = None + + if len(exp_avg_list) > 0: + combined_step = step_list[0] + combined_exp_avg = combine_npu(exp_avg_list) + combined_exp_avg_sq = combine_npu(exp_avg_sq_list) + combined_max_exp_avg_sq = combine_npu(max_exp_avg_sq_list) + + combined_state = defaultdict(dict) + combined_state['step'] = combined_step + combined_state['exp_avg'] = combined_exp_avg + combined_state['exp_avg_sq'] = combined_exp_avg_sq + combined_state['max_exp_avg_sq'] = combined_max_exp_avg_sq + combined_param_states.append(combined_state) + stash.combined_param_states_indexed_by_group[group_index] = combined_param_states + + def _combine_param_states_by_group(self): + stash = self._amp_stash + if stash.param_states_are_combined_by_group: + return + + stash.combined_param_states_indexed_by_group = [] + for _ in self.param_groups: + stash.combined_param_states_indexed_by_group.append([]) + + for i, _ in enumerate(self.param_groups): + self._combine_group_param_states(i) + stash.param_states_are_combined_by_group = True + + def _group_step(self, group_index): + group = self.param_groups[group_index] + for p in group['params']: + if p.grad is None: + continue + + grad = p.grad.data + state = self.state[p] + state['step'] += 1 + + + beta1, beta2 = group['betas'] + nesterov = group['nesterov'] + + stash = self._amp_stash + combined_group_params = stash.combined_params_indexed_by_group[group_index] + combined_group_grads = stash.combined_grads_indexed_by_group[group_index] + combined_group_param_states = stash.combined_param_states_indexed_by_group[group_index] + + for combined_param, combined_grad, combined_param_state in zip(combined_group_params, + combined_group_grads, + combined_group_param_states): + if combined_param is None or combined_grad is None: + continue + + exp_avg, exp_avg_sq = combined_param_state['exp_avg'], combined_param_state['exp_avg_sq'] + + combined_param_state['step'] += 1 + bias_correction1 = 1 - beta1 ** combined_param_state['step'] + bias_correction2 = 1 - beta2 ** combined_param_state['step'] + + # Decay the first and second moment running average coefficient + exp_avg.mul_(beta1).add_(combined_grad, alpha=1 - beta1) + exp_avg_sq.mul_(beta2).addcmul_(combined_grad, combined_grad, value=1 - beta2) + denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) + step_size = group['lr'] / bias_correction1 + + if nesterov: + perturb = (beta1 * exp_avg + (1 - beta1) * combined_grad) / denom + else: + perturb = exp_avg / denom + + # Projection + wd_ratio = 1 + if len(combined_param.shape) > 1: + perturb, wd_ratio = self._projection(combined_param, combined_grad, perturb, group['delta'], group['wd_ratio'], group['eps']) + + # Weight decay + if group['weight_decay'] > 0: + combined_param.mul_(1 - group['lr'] * group['weight_decay'] * wd_ratio) + + # Step + combined_param.add_(-step_size, perturb) + + @torch.no_grad() + def step(self, closure=None): + if not hasattr(self, "_amp_stash"): + raise RuntimeError('apex.optimizers.NpuFusedAdamP should be used with AMP.') + + self._check_already_combined_params_and_grads() + # combine params and grads first + self._combine_params_and_grads_by_group() + # then combine param states + self._combine_param_states_by_group() + + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + + for i, _ in enumerate(self.param_groups): + self._group_step(i) + + return loss diff --git a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/timm/optim/npu_fused_adamw.py b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/timm/optim/npu_fused_adamw.py index 043a281b3106cdbbea33ddd72b4edeb36d87e02a..87896fce404d5d897522f0364c114747194e1006 100644 --- a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/timm/optim/npu_fused_adamw.py +++ b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/timm/optim/npu_fused_adamw.py @@ -1,289 +1,289 @@ -""" -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the BSD 3-Clause License (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -https://spdx.org/licenses/BSD-3-Clause.html - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import math -from collections import defaultdict - -import torch -from torch.optim.optimizer import Optimizer - -from apex.contrib.combine_tensors import combine_npu - - -class NpuFusedAdamW(Optimizer): - """Implements AdamW algorithm. - - Currently NPU-only. Requires Apex to be installed via - ``pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--npu_float_status" ./``. - - This version of NPU fused AdamW implements 1 fusions. - - * A combine-tensor apply launch that batches the elementwise updates applied to all the model's parameters - into one or a few kernel launches. - - :class:`apex.optimizers.NpuFusedAdamW` may be used as a drop-in replacement for ``torch.optim.AdamW``:: - - opt = apex.optimizers.NpuFusedAdamW(model.parameters(), lr = ....) - ... - opt.step() - - :class:`apex.optimizers.FusedAdam` should be used with Amp. Currently, if you wish to use :class:`NpuFusedAdamW` - with Amp, only ``opt_level O1 and O2`` can be choosed:: - - opt = apex.optimizers.NpuFusedAdamW(model.parameters(), lr = ....) - model, opt = amp.initialize(model, opt, opt_level="O2") - ... - opt.step() - - - The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_. - The AdamW variant was proposed in `Decoupled Weight Decay Regularization`_. - - Arguments: - params (iterable): iterable of parameters to optimize or dicts defining - parameter groups - lr (float, optional): learning rate (default: 1e-3) - betas (Tuple[float, float], optional): coefficients used for computing - running averages of gradient and its square (default: (0.9, 0.999)) - eps (float, optional): term added to the denominator to improve - numerical stability (default: 1e-8) - weight_decay (float, optional): weight decay coefficient (default: 1e-2) - amsgrad (boolean, optional): whether to use the AMSGrad variant of this - algorithm from the paper `On the Convergence of Adam and Beyond`_ - (default: False) - - .. _Adam\: A Method for Stochastic Optimization: - https://arxiv.org/abs/1412.6980 - .. _Decoupled Weight Decay Regularization: - https://arxiv.org/abs/1711.05101 - .. _On the Convergence of Adam and Beyond: - https://openreview.net/forum?id=ryQu7f-RZ - """ - - def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, - weight_decay=1e-2, amsgrad=False): - if lr < 0.0: - raise ValueError("Invalid learning rate: {}".format(lr)) - if eps < 0.0: - raise ValueError("Invalid epsilon value: {}".format(eps)) - if betas[0] < 0.0 or betas[0] >= 1.0: - raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) - if betas[1] < 0.0 or betas[1] >= 1.0: - raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) - if weight_decay < 0.0: - raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) - defaults = dict(lr=lr, betas=betas, eps=eps, - weight_decay=weight_decay, amsgrad=amsgrad) - self.is_npu_fused_optimizer = True - super(NpuFusedAdamW, self).__init__(params, defaults) - - def __setstate__(self, state): - super(NpuFusedAdamW, self).__setstate__(state) - for group in self.param_groups: - group.setdefault('amsgrad', False) - - def _init_param_state(self, p, amsgrad): - state = self.state[p] - # State initialization - if len(state) == 0: - state['step'] = 0 - # Exponential moving average of gradient values - state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) - # Exponential moving average of squared gradient values - state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) - if amsgrad: - # Maintains max of all exp. moving avg. of sq. grad. values - state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) - else: - exp_avg_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) - exp_avg_tmp.copy_(state['exp_avg']) - state['exp_avg'] = exp_avg_tmp - - exp_avg_sq_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) - exp_avg_sq_tmp.copy_(state['exp_avg_sq']) - state['exp_avg_sq'] = exp_avg_sq_tmp - - if amsgrad: - max_exp_avg_sq_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) - max_exp_avg_sq_tmp.copy_(state['max_exp_avg_sq']) - state['max_exp_avg_sq'] = max_exp_avg_sq_tmp - - def _combine_group_param_states(self, group_index): - group = self.param_groups[group_index] - stash = self._amp_stash - group_params_list = stash.params_lists_indexed_by_group[group_index] - - amsgrad = group['amsgrad'] - - combined_param_states = [] - for params in group_params_list: - step_list = [] - exp_avg_list = [] - exp_avg_sq_list = [] - max_exp_avg_sq_list = [] - - for p in params: - if p.grad is None: - continue - grad = p.grad - if grad.is_sparse: - raise RuntimeError('NpuFusedAdamW does not support sparse gradients, ' - 'please consider SparseAdam instead') - - self._init_param_state(p, amsgrad) - state = self.state[p] - step_list.append(state['step']) - exp_avg_list.append(state['exp_avg']) - exp_avg_sq_list.append(state['exp_avg_sq']) - if amsgrad: - max_exp_avg_sq_list.append(state['max_exp_avg_sq']) - - combined_step = 0 - combined_exp_avg = None - combined_exp_avg_sq = None - combined_max_exp_avg_sq = None - - if len(exp_avg_list) > 0: - combined_step = step_list[0] - combined_exp_avg = combine_npu(exp_avg_list) - combined_exp_avg_sq = combine_npu(exp_avg_sq_list) - combined_max_exp_avg_sq = combine_npu(max_exp_avg_sq_list) - - combined_state = defaultdict(dict) - combined_state['step'] = combined_step - combined_state['exp_avg'] = combined_exp_avg - combined_state['exp_avg_sq'] = combined_exp_avg_sq - combined_state['max_exp_avg_sq'] = combined_max_exp_avg_sq - combined_param_states.append(combined_state) - stash.combined_param_states_indexed_by_group[group_index] = combined_param_states - - def _combine_param_states_by_group(self): - stash = self._amp_stash - if stash.param_states_are_combined_by_group: - return - - stash.combined_param_states_indexed_by_group = [] - for _ in self.param_groups: - stash.combined_param_states_indexed_by_group.append([]) - - for i, _ in enumerate(self.param_groups): - self._combine_group_param_states(i) - stash.param_states_are_combined_by_group = True - - def _group_step(self, group_index): - group = self.param_groups[group_index] - for p in group['params']: - if p.grad is None: - continue - - grad = p.grad - if grad.is_sparse: - raise RuntimeError('NpuFusedAdamW does not support sparse gradients, ' - 'please consider SparseAdam instead') - state_p = self.state[p] - state_p['step'] += 1 - - amsgrad = group['amsgrad'] - beta1, beta2 = group['betas'] - - stash = self._amp_stash - combined_group_params = stash.combined_params_indexed_by_group[group_index] - combined_group_grads = stash.combined_grads_indexed_by_group[group_index] - combined_group_param_states = stash.combined_param_states_indexed_by_group[group_index] - - for combined_param, combined_grad, combined_param_state in zip(combined_group_params, - combined_group_grads, - combined_group_param_states): - if combined_param is None or combined_grad is None: - continue - - # Perform stepweight decay. The fused method is used here to speed up the calculation - combined_param.mul_(1 - group['lr'] * group['weight_decay']) - - exp_avg, exp_avg_sq = combined_param_state['exp_avg'], combined_param_state['exp_avg_sq'] - if amsgrad: - max_exp_avg_sq = combined_param_state['max_exp_avg_sq'] - - combined_param_state['step'] += 1 - bias_correction1 = 1 - beta1 ** combined_param_state['step'] - bias_correction2 = 1 - beta2 ** combined_param_state['step'] - - # Decay the first and second moment running average coefficient - exp_avg.mul_(beta1).add_(combined_grad, alpha=1 - beta1) - exp_avg_sq.mul_(beta2).addcmul_(combined_grad, combined_grad, value=1 - beta2) - if amsgrad: - # Maintains the maximum of all 2nd moment running avg. till now - torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) - # Use the max. for normalizing running avg. of gradient - denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) - else: - denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) - - step_size = group['lr'] / bias_correction1 - - combined_param.addcdiv_(exp_avg, denom, value=-step_size) - - @torch.no_grad() - def step(self, closure=None): - if not hasattr(self, "_amp_stash"): - raise RuntimeError('apex.optimizers.NpuFusedAdamW should be used with AMP.') - - self._check_already_combined_params_and_grads() - # combine params and grads first - self._combine_params_and_grads_by_group() - # then combine param states - self._combine_param_states_by_group() - - loss = None - if closure is not None: - with torch.enable_grad(): - loss = closure() - - for i, _ in enumerate(self.param_groups): - self._group_step(i) - - return loss +""" +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright 2020 Huawei Technologies Co., Ltd + +Licensed under the BSD 3-Clause License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +https://spdx.org/licenses/BSD-3-Clause.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import math +from collections import defaultdict + +import torch +from torch.optim.optimizer import Optimizer + +from apex.contrib.combine_tensors import combine_npu + + +class NpuFusedAdamW(Optimizer): + """Implements AdamW algorithm. + + Currently NPU-only. Requires Apex to be installed via + ``pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--npu_float_status" ./``. + + This version of NPU fused AdamW implements 1 fusions. + + * A combine-tensor apply launch that batches the elementwise updates applied to all the model's parameters + into one or a few kernel launches. + + :class:`apex.optimizers.NpuFusedAdamW` may be used as a drop-in replacement for ``torch.optim.AdamW``:: + + opt = apex.optimizers.NpuFusedAdamW(model.parameters(), lr = ....) + ... + opt.step() + + :class:`apex.optimizers.FusedAdam` should be used with Amp. Currently, if you wish to use :class:`NpuFusedAdamW` + with Amp, only ``opt_level O1 and O2`` can be choosed:: + + opt = apex.optimizers.NpuFusedAdamW(model.parameters(), lr = ....) + model, opt = amp.initialize(model, opt, opt_level="O2") + ... + opt.step() + + + The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_. + The AdamW variant was proposed in `Decoupled Weight Decay Regularization`_. + + Arguments: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float, optional): learning rate (default: 1e-3) + betas (Tuple[float, float], optional): coefficients used for computing + running averages of gradient and its square (default: (0.9, 0.999)) + eps (float, optional): term added to the denominator to improve + numerical stability (default: 1e-8) + weight_decay (float, optional): weight decay coefficient (default: 1e-2) + amsgrad (boolean, optional): whether to use the AMSGrad variant of this + algorithm from the paper `On the Convergence of Adam and Beyond`_ + (default: False) + + .. _Adam\: A Method for Stochastic Optimization: + https://arxiv.org/abs/1412.6980 + .. _Decoupled Weight Decay Regularization: + https://arxiv.org/abs/1711.05101 + .. _On the Convergence of Adam and Beyond: + https://openreview.net/forum?id=ryQu7f-RZ + """ + + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, + weight_decay=1e-2, amsgrad=False): + if lr < 0.0: + raise ValueError("Invalid learning rate: {}".format(lr)) + if eps < 0.0: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if betas[0] < 0.0 or betas[0] >= 1.0: + raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) + if betas[1] < 0.0 or betas[1] >= 1.0: + raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) + if weight_decay < 0.0: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + defaults = dict(lr=lr, betas=betas, eps=eps, + weight_decay=weight_decay, amsgrad=amsgrad) + self.is_npu_fused_optimizer = True + super(NpuFusedAdamW, self).__init__(params, defaults) + + def __setstate__(self, state): + super(NpuFusedAdamW, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('amsgrad', False) + + def _init_param_state(self, p, amsgrad): + state = self.state[p] + # State initialization + if len(state) == 0: + state['step'] = 0 + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + if amsgrad: + # Maintains max of all exp. moving avg. of sq. grad. values + state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + else: + exp_avg_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) + exp_avg_tmp.copy_(state['exp_avg']) + state['exp_avg'] = exp_avg_tmp + + exp_avg_sq_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) + exp_avg_sq_tmp.copy_(state['exp_avg_sq']) + state['exp_avg_sq'] = exp_avg_sq_tmp + + if amsgrad: + max_exp_avg_sq_tmp = torch.zeros_like(p, memory_format=torch.preserve_format) + max_exp_avg_sq_tmp.copy_(state['max_exp_avg_sq']) + state['max_exp_avg_sq'] = max_exp_avg_sq_tmp + + def _combine_group_param_states(self, group_index): + group = self.param_groups[group_index] + stash = self._amp_stash + group_params_list = stash.params_lists_indexed_by_group[group_index] + + amsgrad = group['amsgrad'] + + combined_param_states = [] + for params in group_params_list: + step_list = [] + exp_avg_list = [] + exp_avg_sq_list = [] + max_exp_avg_sq_list = [] + + for p in params: + if p.grad is None: + continue + grad = p.grad + if grad.is_sparse: + raise RuntimeError('NpuFusedAdamW does not support sparse gradients, ' + 'please consider SparseAdam instead') + + self._init_param_state(p, amsgrad) + state = self.state[p] + step_list.append(state['step']) + exp_avg_list.append(state['exp_avg']) + exp_avg_sq_list.append(state['exp_avg_sq']) + if amsgrad: + max_exp_avg_sq_list.append(state['max_exp_avg_sq']) + + combined_step = 0 + combined_exp_avg = None + combined_exp_avg_sq = None + combined_max_exp_avg_sq = None + + if len(exp_avg_list) > 0: + combined_step = step_list[0] + combined_exp_avg = combine_npu(exp_avg_list) + combined_exp_avg_sq = combine_npu(exp_avg_sq_list) + combined_max_exp_avg_sq = combine_npu(max_exp_avg_sq_list) + + combined_state = defaultdict(dict) + combined_state['step'] = combined_step + combined_state['exp_avg'] = combined_exp_avg + combined_state['exp_avg_sq'] = combined_exp_avg_sq + combined_state['max_exp_avg_sq'] = combined_max_exp_avg_sq + combined_param_states.append(combined_state) + stash.combined_param_states_indexed_by_group[group_index] = combined_param_states + + def _combine_param_states_by_group(self): + stash = self._amp_stash + if stash.param_states_are_combined_by_group: + return + + stash.combined_param_states_indexed_by_group = [] + for _ in self.param_groups: + stash.combined_param_states_indexed_by_group.append([]) + + for i, _ in enumerate(self.param_groups): + self._combine_group_param_states(i) + stash.param_states_are_combined_by_group = True + + def _group_step(self, group_index): + group = self.param_groups[group_index] + for p in group['params']: + if p.grad is None: + continue + + grad = p.grad + if grad.is_sparse: + raise RuntimeError('NpuFusedAdamW does not support sparse gradients, ' + 'please consider SparseAdam instead') + state_p = self.state[p] + state_p['step'] += 1 + + amsgrad = group['amsgrad'] + beta1, beta2 = group['betas'] + + stash = self._amp_stash + combined_group_params = stash.combined_params_indexed_by_group[group_index] + combined_group_grads = stash.combined_grads_indexed_by_group[group_index] + combined_group_param_states = stash.combined_param_states_indexed_by_group[group_index] + + for combined_param, combined_grad, combined_param_state in zip(combined_group_params, + combined_group_grads, + combined_group_param_states): + if combined_param is None or combined_grad is None: + continue + + # Perform stepweight decay. The fused method is used here to speed up the calculation + combined_param.mul_(1 - group['lr'] * group['weight_decay']) + + exp_avg, exp_avg_sq = combined_param_state['exp_avg'], combined_param_state['exp_avg_sq'] + if amsgrad: + max_exp_avg_sq = combined_param_state['max_exp_avg_sq'] + + combined_param_state['step'] += 1 + bias_correction1 = 1 - beta1 ** combined_param_state['step'] + bias_correction2 = 1 - beta2 ** combined_param_state['step'] + + # Decay the first and second moment running average coefficient + exp_avg.mul_(beta1).add_(combined_grad, alpha=1 - beta1) + exp_avg_sq.mul_(beta2).addcmul_(combined_grad, combined_grad, value=1 - beta2) + if amsgrad: + # Maintains the maximum of all 2nd moment running avg. till now + torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) + # Use the max. for normalizing running avg. of gradient + denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) + else: + denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) + + step_size = group['lr'] / bias_correction1 + + combined_param.addcdiv_(exp_avg, denom, value=-step_size) + + @torch.no_grad() + def step(self, closure=None): + if not hasattr(self, "_amp_stash"): + raise RuntimeError('apex.optimizers.NpuFusedAdamW should be used with AMP.') + + self._check_already_combined_params_and_grads() + # combine params and grads first + self._combine_params_and_grads_by_group() + # then combine param states + self._combine_param_states_by_group() + + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + + for i, _ in enumerate(self.param_groups): + self._group_step(i) + + return loss diff --git a/PyTorch/contrib/cv/classification/Vit_small_patch16_224/train.py b/PyTorch/contrib/cv/classification/Vit_small_patch16_224/train.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/VoVNet39/test/train_eval_8p.sh b/PyTorch/contrib/cv/classification/VoVNet39/test/train_eval_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/VoVNet39/test/train_finetune_1p.sh b/PyTorch/contrib/cv/classification/VoVNet39/test/train_finetune_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/VoVNet39/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/VoVNet39/test/train_full_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/VoVNet39/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/VoVNet39/test/train_performance_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/VoVNet39/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/VoVNet39/test/train_performance_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/Dockerfile b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/Dockerfile index 16a84a07a12d6f9beb7ec418ac01964421d38163..5889f351145fc5d4aa1a7a1b210cc33304226580 100644 --- a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/Dockerfile +++ b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/Dockerfile @@ -1,22 +1,22 @@ -ARG FROM_IMAGE_NAME -FROM ${FROM_IMAGE_NAME} - -ARG SDK_PKG - -RUN ln -s /usr/local/python3.7.5/bin/python3.7 /usr/bin/python - -RUN apt-get update && \ - apt-get install libglib2.0-dev -y || \ - rm -rf /var/lib/dpkg/info && \ - mkdir /var/lib/dpkg/info && \ - apt-get install libglib2.0-dev -y && \ - pip install pytest-runner==5.3.0 -COPY sdk/requirements.txt . -RUN pip3.7 install -r requirements.txt - -# pip install sdk_run -COPY $SDK_PKG . -RUN ls -hrlt -RUN chmod +x ${SDK_PKG} && \ - ./${SDK_PKG} --install && \ +ARG FROM_IMAGE_NAME +FROM ${FROM_IMAGE_NAME} + +ARG SDK_PKG + +RUN ln -s /usr/local/python3.7.5/bin/python3.7 /usr/bin/python + +RUN apt-get update && \ + apt-get install libglib2.0-dev -y || \ + rm -rf /var/lib/dpkg/info && \ + mkdir /var/lib/dpkg/info && \ + apt-get install libglib2.0-dev -y && \ + pip install pytest-runner==5.3.0 +COPY sdk/requirements.txt . +RUN pip3.7 install -r requirements.txt + +# pip install sdk_run +COPY $SDK_PKG . +RUN ls -hrlt +RUN chmod +x ${SDK_PKG} && \ + ./${SDK_PKG} --install && \ bash -c "source ~/.bashrc" \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/convert/xception_pt_aipp.config b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/convert/xception_pt_aipp.config index 5c8ab6938007edf566beff150883d61b4e0f2984..649639850c2f7bd6367fbb8a3befb29c0df6a82f 100644 --- a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/convert/xception_pt_aipp.config +++ b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/convert/xception_pt_aipp.config @@ -1,16 +1,16 @@ -aipp_op { - aipp_mode: static - input_format : RGB888_U8 - csc_switch : false - rbuv_swap_switch : true - - mean_chn_0 : 0 - mean_chn_1 : 0 - mean_chn_2 : 0 - min_chn_0 : 127.5 - min_chn_1 : 127.5 - min_chn_2 : 127.5 - var_reci_chn_0 : 0.007843137254902 - var_reci_chn_1 : 0.007843137254902 - var_reci_chn_2 : 0.007843137254902 -} +aipp_op { + aipp_mode: static + input_format : RGB888_U8 + csc_switch : false + rbuv_swap_switch : true + + mean_chn_0 : 0 + mean_chn_1 : 0 + mean_chn_2 : 0 + min_chn_0 : 127.5 + min_chn_1 : 127.5 + min_chn_2 : 127.5 + var_reci_chn_0 : 0.007843137254902 + var_reci_chn_1 : 0.007843137254902 + var_reci_chn_2 : 0.007843137254902 +} diff --git a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/models/xception_aipp.cfg b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/models/xception_aipp.cfg index d01feefc8eb9859daa5cc76a4570d91c5371a3e5..75bba4acb3d512f91e3caa928c2969c66f0303c7 100644 --- a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/models/xception_aipp.cfg +++ b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/models/xception_aipp.cfg @@ -1,3 +1,3 @@ -CLASS_NUM=1000 -SOFTMAX=True +CLASS_NUM=1000 +SOFTMAX=True TOP_K=5 \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/CMakeLists.txt b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/CMakeLists.txt index 53ff21c352aa0c3d77c69dad849176c13cc4673a..77b66042819ff3af8f26d215aa80fa4d8dc8f92b 100644 --- a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/CMakeLists.txt +++ b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/CMakeLists.txt @@ -1,52 +1,52 @@ -cmake_minimum_required(VERSION 3.10.0) -project(xception) - -set(TARGET xception) - -add_definitions(-DENABLE_DVPP_INTERFACE) -add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) -add_definitions(-Dgoogle=mindxsdk_private) -add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall) -add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie) - -# Check environment variable -if(NOT DEFINED ENV{ASCEND_HOME}) - message(FATAL_ERROR "please define environment variable:ASCEND_HOME") -endif() -if(NOT DEFINED ENV{ASCEND_VERSION}) - message(WARNING "please define environment variable:ASCEND_VERSION") -endif() -if(NOT DEFINED ENV{ARCH_PATTERN}) - message(WARNING "please define environment variable:ARCH_PATTERN") -endif() -set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include) -set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64) - -set(MXBASE_ROOT_DIR $ENV{MX_SDK_HOME}) -set(MXBASE_INC ${MXBASE_ROOT_DIR}/include) -set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/lib) -set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/lib/modelpostprocessors) -set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/include/MxBase/postprocess/include) - -if(DEFINED ENV{MXSDK_OPENSOURCE_DIR}) - set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR}) -else() - set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource) -endif() -include_directories(/home/data/ics_yu/mxVision-2.0.2/lib/modelpostprocessors/) -include_directories(${ACL_INC_DIR}) -include_directories(${OPENSOURCE_DIR}/include) -include_directories(${OPENSOURCE_DIR}/include/opencv4) - -include_directories(${MXBASE_INC}) -include_directories(${MXBASE_POST_PROCESS_DIR}) - -link_directories(${ACL_LIB_DIR}) -link_directories(${OPENSOURCE_DIR}/lib) -link_directories(${MXBASE_LIB_DIR}) -link_directories(${MXBASE_POST_LIB_DIR}) - -add_executable(${TARGET} main.cpp XceptionClassify.cpp) -target_link_libraries(${TARGET} glog cpprest mxbase resnet50postprocess opencv_world stdc++fs) - -install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/) +cmake_minimum_required(VERSION 3.10.0) +project(xception) + +set(TARGET xception) + +add_definitions(-DENABLE_DVPP_INTERFACE) +add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) +add_definitions(-Dgoogle=mindxsdk_private) +add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall) +add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie) + +# Check environment variable +if(NOT DEFINED ENV{ASCEND_HOME}) + message(FATAL_ERROR "please define environment variable:ASCEND_HOME") +endif() +if(NOT DEFINED ENV{ASCEND_VERSION}) + message(WARNING "please define environment variable:ASCEND_VERSION") +endif() +if(NOT DEFINED ENV{ARCH_PATTERN}) + message(WARNING "please define environment variable:ARCH_PATTERN") +endif() +set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include) +set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64) + +set(MXBASE_ROOT_DIR $ENV{MX_SDK_HOME}) +set(MXBASE_INC ${MXBASE_ROOT_DIR}/include) +set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/lib) +set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/lib/modelpostprocessors) +set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/include/MxBase/postprocess/include) + +if(DEFINED ENV{MXSDK_OPENSOURCE_DIR}) + set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR}) +else() + set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource) +endif() +include_directories(/home/data/ics_yu/mxVision-2.0.2/lib/modelpostprocessors/) +include_directories(${ACL_INC_DIR}) +include_directories(${OPENSOURCE_DIR}/include) +include_directories(${OPENSOURCE_DIR}/include/opencv4) + +include_directories(${MXBASE_INC}) +include_directories(${MXBASE_POST_PROCESS_DIR}) + +link_directories(${ACL_LIB_DIR}) +link_directories(${OPENSOURCE_DIR}/lib) +link_directories(${MXBASE_LIB_DIR}) +link_directories(${MXBASE_POST_LIB_DIR}) + +add_executable(${TARGET} main.cpp XceptionClassify.cpp) +target_link_libraries(${TARGET} glog cpprest mxbase resnet50postprocess opencv_world stdc++fs) + +install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/) diff --git a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/XceptionClassify.cpp b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/XceptionClassify.cpp index 929bd67d85952f104c3a1aece707c2f6c21ec953..61acb36242dc692ea7edf7a7231c54462140bbc8 100644 --- a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/XceptionClassify.cpp +++ b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/XceptionClassify.cpp @@ -1,250 +1,250 @@ -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include "XceptionClassify.h" -#include "MxBase/DeviceManager/DeviceManager.h" -#include "MxBase/Log/Log.h" - -using namespace MxBase; -namespace { -const uint32_t YUV_BYTE_NU = 3; -const uint32_t YUV_BYTE_DE = 2; -const uint32_t VPC_H_ALIGN = 2; -} - -APP_ERROR XceptionClassify::Init(const InitParam &initParam) -{ - deviceId_ = initParam.deviceId; - APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); - if (ret != APP_ERR_OK) { - LogError << "Init devices failed, ret=" << ret << "."; - return ret; - } - ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); - if (ret != APP_ERR_OK) { - LogError << "Set context failed, ret=" << ret << "."; - return ret; - } - dvppWrapper_ = std::make_shared(); - ret = dvppWrapper_->Init(); - if (ret != APP_ERR_OK) { - LogError << "DvppWrapper init failed, ret=" << ret << "."; - return ret; - } - model_ = std::make_shared(); - ret = model_->Init(initParam.modelPath, modelDesc_); - if (ret != APP_ERR_OK) { - LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; - return ret; - } - MxBase::ConfigData configData; - const std::string softmax = initParam.softmax ? "true" : "false"; - const std::string checkTensor = initParam.checkTensor ? "true" : "false"; - - configData.SetJsonValue("CLASS_NUM", std::to_string(initParam.classNum)); - configData.SetJsonValue("TOP_K", std::to_string(initParam.topk)); - configData.SetJsonValue("SOFTMAX", softmax); - configData.SetJsonValue("CHECK_MODEL", checkTensor); - - auto jsonStr = configData.GetCfgJson().serialize(); - std::map> config; - config["postProcessConfigContent"] = std::make_shared(jsonStr); - config["labelPath"] = std::make_shared(initParam.labelPath); - - post_ = std::make_shared(); - ret = post_->Init(config); - if (ret != APP_ERR_OK) { - LogError << "Resnet50PostProcess init failed, ret=" << ret << "."; - return ret; - } - return APP_ERR_OK; -} - -APP_ERROR XceptionClassify::DeInit() -{ - dvppWrapper_->DeInit(); - model_->DeInit(); - post_->DeInit(); - MxBase::DeviceManager::GetInstance()->DestroyDevices(); - return APP_ERR_OK; -} - -void XceptionClassify::ReadImage(const std::string &imgPath, cv::Mat &imageMat) -{ - imageMat = cv::imread(imgPath, cv::IMREAD_COLOR); -} - -void XceptionClassify::CenterCropImage(cv::Mat &img, cv::Mat &cropImg) -{ - float central_fraction = 0.75; - int crop_x = img.cols * central_fraction; - int crop_y = img.rows * central_fraction; - int crop_x1 = (img.cols - crop_x) / 2; - int crop_y1 = (img.rows - crop_y) / 2; - - cv::Rect myROI(crop_x1, crop_y1, crop_x, crop_y); - LogInfo << "images crop_x1: " << crop_x1 << ", crop_x: " << crop_x << ", crop_y1: " << crop_y1 << ", crop_y: " << crop_y; - cropImg = img(myROI); -} - -void XceptionClassify::Resize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat) -{ - static constexpr uint32_t resizeHeight = 299; - static constexpr uint32_t resizeWidth = 299; - - cv::resize(srcImageMat, dstImageMat, cv::Size(resizeWidth, resizeHeight)); -} - -APP_ERROR XceptionClassify::CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase) -{ - const uint32_t dataSize = imageMat.cols * imageMat.rows * YUV444_RGB_WIDTH_NU; - MemoryData memoryDataDst(dataSize, MemoryData::MEMORY_DEVICE, deviceId_); - MemoryData memoryDataSrc(imageMat.data, dataSize, MemoryData::MEMORY_HOST_MALLOC); - - APP_ERROR ret = MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc); - if (ret != APP_ERR_OK) { - LogError << GetError(ret) << "Memory malloc failed."; - return ret; - } - std::vector shape = {imageMat.rows * YUV444_RGB_WIDTH_NU, static_cast(imageMat.cols)}; - tensorBase = TensorBase(memoryDataDst, false, shape, TENSOR_DTYPE_UINT8); - return APP_ERR_OK; -} - -APP_ERROR XceptionClassify::Inference(const std::vector &inputs, std::vector &outputs) -{ - auto dtypes = model_->GetOutputDataType(); - for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) { - std::vector shape = {}; - for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { - shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]); - } - TensorBase tensor(shape, dtypes[i], MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); - APP_ERROR ret = TensorBase::TensorBaseMalloc(tensor); - if (ret != APP_ERR_OK) { - LogError << "TensorBaseMalloc failed, ret=" << ret << "."; - return ret; - } - outputs.push_back(tensor); - } - DynamicInfo dynamicInfo = {}; - dynamicInfo.dynamicType = DynamicType::STATIC_BATCH; - auto startTime = std::chrono::high_resolution_clock::now(); // search for learning - APP_ERROR ret = model_->ModelInference(inputs, outputs, dynamicInfo); - auto endTime = std::chrono::high_resolution_clock::now(); - double costMs = std::chrono::duration(endTime - startTime).count(); - g_inferCost.push_back(costMs); - if (ret != APP_ERR_OK) { - LogError << "ModelInference failed, ret=" << ret << "."; - return ret; - } - return APP_ERR_OK; -} - -APP_ERROR XceptionClassify::PostProcess(const std::vector &inputs, std::vector> &clsInfos) -{ - - APP_ERROR ret = post_->Process(inputs, clsInfos); - - if (ret != APP_ERR_OK) { - LogError << "Process failed, ret=" << ret << "."; - return ret; - } - return APP_ERR_OK; -} - -APP_ERROR XceptionClassify::SaveInferResult(const std::string &imagePath, std::vector> &batchClsInfos) -{ - uint32_t batchIndex = 0; - LogInfo << "image path: " << imagePath; - std::string fileName = imagePath.substr(imagePath.find_last_of("/") + 1); - size_t dot = fileName.find_last_of("."); - - std::string resultPathName = "result"; - if (access(resultPathName.c_str(), 0) != 0) { - APP_ERROR ret = mkdir(resultPathName.c_str(), S_IRUSR | S_IWUSR | S_IXUSR); - if (ret != 0) { - LogError << "Failed to create result directory: " << resultPathName << ", ret = " << ret; - return APP_ERR_COMM_FAILURE; - } - } - std::string resFileName = "result/" + fileName.substr(0,dot) + "_1.txt"; - LogInfo << "file path for saving result: " << resFileName; - std::ofstream tfile(resFileName); - if (tfile.fail()) { - LogError << "Failed to open result file"; - return APP_ERR_COMM_FAILURE; - } - - for (auto clsInfos : batchClsInfos) { - std::string resultStr = ""; - for (auto clsInfo : clsInfos) { - LogDebug << "batchIndex: " << batchIndex << " className: " << clsInfo.className - << " confidence: " << clsInfo.confidence << " classIndex: " << clsInfo.classId; - resultStr += std::to_string(clsInfo.classId) + " "; - } - tfile << resultStr << std::endl; - batchIndex += 1; - } - tfile.close(); - return APP_ERR_OK; -} - -APP_ERROR XceptionClassify::Process(const std::string &imgPath) -{ - cv::Mat imageMat; - - ReadImage(imgPath, imageMat); - CenterCropImage(imageMat, imageMat); - Resize(imageMat, imageMat); - - std::vector inputs = {}; - std::vector outputs = {}; - TensorBase tensorBase; - - APP_ERROR ret = CVMatToTensorBase(imageMat, tensorBase); - if (ret != APP_ERR_OK) { - LogError << "CVMatToTensorBase failed, ret=" << ret << "."; - return ret; - } - - inputs.push_back(tensorBase); - - ret = Inference(inputs, outputs); - if (ret != APP_ERR_OK) { - LogError << "Inference failed, ret=" << ret << "."; - return ret; - } - - std::vector> BatchClsInfos = {}; - ret = PostProcess(outputs, BatchClsInfos); - if (ret != APP_ERR_OK) { - LogError << "PostProcess failed, ret=" << ret << "."; - return ret; - } - - ret = SaveInferResult(imgPath, BatchClsInfos); - if (ret != APP_ERR_OK) { - LogError << "Save results failed, ret: " << ret << "."; - return ret; - } - - imageMat.release(); - return APP_ERR_OK; -} - +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "XceptionClassify.h" +#include "MxBase/DeviceManager/DeviceManager.h" +#include "MxBase/Log/Log.h" + +using namespace MxBase; +namespace { +const uint32_t YUV_BYTE_NU = 3; +const uint32_t YUV_BYTE_DE = 2; +const uint32_t VPC_H_ALIGN = 2; +} + +APP_ERROR XceptionClassify::Init(const InitParam &initParam) +{ + deviceId_ = initParam.deviceId; + APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); + if (ret != APP_ERR_OK) { + LogError << "Init devices failed, ret=" << ret << "."; + return ret; + } + ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); + if (ret != APP_ERR_OK) { + LogError << "Set context failed, ret=" << ret << "."; + return ret; + } + dvppWrapper_ = std::make_shared(); + ret = dvppWrapper_->Init(); + if (ret != APP_ERR_OK) { + LogError << "DvppWrapper init failed, ret=" << ret << "."; + return ret; + } + model_ = std::make_shared(); + ret = model_->Init(initParam.modelPath, modelDesc_); + if (ret != APP_ERR_OK) { + LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; + return ret; + } + MxBase::ConfigData configData; + const std::string softmax = initParam.softmax ? "true" : "false"; + const std::string checkTensor = initParam.checkTensor ? "true" : "false"; + + configData.SetJsonValue("CLASS_NUM", std::to_string(initParam.classNum)); + configData.SetJsonValue("TOP_K", std::to_string(initParam.topk)); + configData.SetJsonValue("SOFTMAX", softmax); + configData.SetJsonValue("CHECK_MODEL", checkTensor); + + auto jsonStr = configData.GetCfgJson().serialize(); + std::map> config; + config["postProcessConfigContent"] = std::make_shared(jsonStr); + config["labelPath"] = std::make_shared(initParam.labelPath); + + post_ = std::make_shared(); + ret = post_->Init(config); + if (ret != APP_ERR_OK) { + LogError << "Resnet50PostProcess init failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR XceptionClassify::DeInit() +{ + dvppWrapper_->DeInit(); + model_->DeInit(); + post_->DeInit(); + MxBase::DeviceManager::GetInstance()->DestroyDevices(); + return APP_ERR_OK; +} + +void XceptionClassify::ReadImage(const std::string &imgPath, cv::Mat &imageMat) +{ + imageMat = cv::imread(imgPath, cv::IMREAD_COLOR); +} + +void XceptionClassify::CenterCropImage(cv::Mat &img, cv::Mat &cropImg) +{ + float central_fraction = 0.75; + int crop_x = img.cols * central_fraction; + int crop_y = img.rows * central_fraction; + int crop_x1 = (img.cols - crop_x) / 2; + int crop_y1 = (img.rows - crop_y) / 2; + + cv::Rect myROI(crop_x1, crop_y1, crop_x, crop_y); + LogInfo << "images crop_x1: " << crop_x1 << ", crop_x: " << crop_x << ", crop_y1: " << crop_y1 << ", crop_y: " << crop_y; + cropImg = img(myROI); +} + +void XceptionClassify::Resize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat) +{ + static constexpr uint32_t resizeHeight = 299; + static constexpr uint32_t resizeWidth = 299; + + cv::resize(srcImageMat, dstImageMat, cv::Size(resizeWidth, resizeHeight)); +} + +APP_ERROR XceptionClassify::CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase) +{ + const uint32_t dataSize = imageMat.cols * imageMat.rows * YUV444_RGB_WIDTH_NU; + MemoryData memoryDataDst(dataSize, MemoryData::MEMORY_DEVICE, deviceId_); + MemoryData memoryDataSrc(imageMat.data, dataSize, MemoryData::MEMORY_HOST_MALLOC); + + APP_ERROR ret = MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Memory malloc failed."; + return ret; + } + std::vector shape = {imageMat.rows * YUV444_RGB_WIDTH_NU, static_cast(imageMat.cols)}; + tensorBase = TensorBase(memoryDataDst, false, shape, TENSOR_DTYPE_UINT8); + return APP_ERR_OK; +} + +APP_ERROR XceptionClassify::Inference(const std::vector &inputs, std::vector &outputs) +{ + auto dtypes = model_->GetOutputDataType(); + for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) { + std::vector shape = {}; + for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { + shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]); + } + TensorBase tensor(shape, dtypes[i], MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); + APP_ERROR ret = TensorBase::TensorBaseMalloc(tensor); + if (ret != APP_ERR_OK) { + LogError << "TensorBaseMalloc failed, ret=" << ret << "."; + return ret; + } + outputs.push_back(tensor); + } + DynamicInfo dynamicInfo = {}; + dynamicInfo.dynamicType = DynamicType::STATIC_BATCH; + auto startTime = std::chrono::high_resolution_clock::now(); // search for learning + APP_ERROR ret = model_->ModelInference(inputs, outputs, dynamicInfo); + auto endTime = std::chrono::high_resolution_clock::now(); + double costMs = std::chrono::duration(endTime - startTime).count(); + g_inferCost.push_back(costMs); + if (ret != APP_ERR_OK) { + LogError << "ModelInference failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR XceptionClassify::PostProcess(const std::vector &inputs, std::vector> &clsInfos) +{ + + APP_ERROR ret = post_->Process(inputs, clsInfos); + + if (ret != APP_ERR_OK) { + LogError << "Process failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR XceptionClassify::SaveInferResult(const std::string &imagePath, std::vector> &batchClsInfos) +{ + uint32_t batchIndex = 0; + LogInfo << "image path: " << imagePath; + std::string fileName = imagePath.substr(imagePath.find_last_of("/") + 1); + size_t dot = fileName.find_last_of("."); + + std::string resultPathName = "result"; + if (access(resultPathName.c_str(), 0) != 0) { + APP_ERROR ret = mkdir(resultPathName.c_str(), S_IRUSR | S_IWUSR | S_IXUSR); + if (ret != 0) { + LogError << "Failed to create result directory: " << resultPathName << ", ret = " << ret; + return APP_ERR_COMM_FAILURE; + } + } + std::string resFileName = "result/" + fileName.substr(0,dot) + "_1.txt"; + LogInfo << "file path for saving result: " << resFileName; + std::ofstream tfile(resFileName); + if (tfile.fail()) { + LogError << "Failed to open result file"; + return APP_ERR_COMM_FAILURE; + } + + for (auto clsInfos : batchClsInfos) { + std::string resultStr = ""; + for (auto clsInfo : clsInfos) { + LogDebug << "batchIndex: " << batchIndex << " className: " << clsInfo.className + << " confidence: " << clsInfo.confidence << " classIndex: " << clsInfo.classId; + resultStr += std::to_string(clsInfo.classId) + " "; + } + tfile << resultStr << std::endl; + batchIndex += 1; + } + tfile.close(); + return APP_ERR_OK; +} + +APP_ERROR XceptionClassify::Process(const std::string &imgPath) +{ + cv::Mat imageMat; + + ReadImage(imgPath, imageMat); + CenterCropImage(imageMat, imageMat); + Resize(imageMat, imageMat); + + std::vector inputs = {}; + std::vector outputs = {}; + TensorBase tensorBase; + + APP_ERROR ret = CVMatToTensorBase(imageMat, tensorBase); + if (ret != APP_ERR_OK) { + LogError << "CVMatToTensorBase failed, ret=" << ret << "."; + return ret; + } + + inputs.push_back(tensorBase); + + ret = Inference(inputs, outputs); + if (ret != APP_ERR_OK) { + LogError << "Inference failed, ret=" << ret << "."; + return ret; + } + + std::vector> BatchClsInfos = {}; + ret = PostProcess(outputs, BatchClsInfos); + if (ret != APP_ERR_OK) { + LogError << "PostProcess failed, ret=" << ret << "."; + return ret; + } + + ret = SaveInferResult(imgPath, BatchClsInfos); + if (ret != APP_ERR_OK) { + LogError << "Save results failed, ret: " << ret << "."; + return ret; + } + + imageMat.release(); + return APP_ERR_OK; +} + diff --git a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/XceptionClassify.h b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/XceptionClassify.h index 4a857b681301df4a354bd67990eb43a618fa9ee5..6d37dae3c6d43d296064958d51b5ce91f8f9205e 100644 --- a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/XceptionClassify.h +++ b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/XceptionClassify.h @@ -1,58 +1,58 @@ -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef MXBASE_XCEPTIONCLASSIFY_H -#define MXBASE_XCEPTIONCLASSIFY_H - -#include -#include "MxBase/DvppWrapper/DvppWrapper.h" -#include "MxBase/ModelInfer/ModelInferenceProcessor.h" -#include "ClassPostProcessors/Resnet50PostProcess.h" -#include "MxBase/Tensor/TensorContext/TensorContext.h" - -extern std::vector g_inferCost; - -struct InitParam { - uint32_t deviceId; - std::string labelPath; - uint32_t classNum; - uint32_t topk; - bool softmax; - bool checkTensor; - std::string modelPath; -}; - -class XceptionClassify { -public: - APP_ERROR Init(const InitParam &initParam); - APP_ERROR DeInit(); - void ReadImage(const std::string &imgPath, cv::Mat &imageMat); - void Resize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat); - void CenterCropImage(cv::Mat &img, cv::Mat &cropImg); - APP_ERROR CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase); - APP_ERROR Inference(const std::vector &inputs, std::vector &outputs); - APP_ERROR PostProcess(const std::vector &inputs,std::vector> &clsInfos); - APP_ERROR SaveInferResult(const std::string &imgPath,std::vector> &batchClsInfos); - APP_ERROR Process(const std::string &imgPath); -private: - std::shared_ptr dvppWrapper_; - std::shared_ptr model_; - std::shared_ptr post_; - MxBase::ModelDesc modelDesc_; - uint32_t deviceId_ = 0; -}; -#endif +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef MXBASE_XCEPTIONCLASSIFY_H +#define MXBASE_XCEPTIONCLASSIFY_H + +#include +#include "MxBase/DvppWrapper/DvppWrapper.h" +#include "MxBase/ModelInfer/ModelInferenceProcessor.h" +#include "ClassPostProcessors/Resnet50PostProcess.h" +#include "MxBase/Tensor/TensorContext/TensorContext.h" + +extern std::vector g_inferCost; + +struct InitParam { + uint32_t deviceId; + std::string labelPath; + uint32_t classNum; + uint32_t topk; + bool softmax; + bool checkTensor; + std::string modelPath; +}; + +class XceptionClassify { +public: + APP_ERROR Init(const InitParam &initParam); + APP_ERROR DeInit(); + void ReadImage(const std::string &imgPath, cv::Mat &imageMat); + void Resize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat); + void CenterCropImage(cv::Mat &img, cv::Mat &cropImg); + APP_ERROR CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase); + APP_ERROR Inference(const std::vector &inputs, std::vector &outputs); + APP_ERROR PostProcess(const std::vector &inputs,std::vector> &clsInfos); + APP_ERROR SaveInferResult(const std::string &imgPath,std::vector> &batchClsInfos); + APP_ERROR Process(const std::string &imgPath); +private: + std::shared_ptr dvppWrapper_; + std::shared_ptr model_; + std::shared_ptr post_; + MxBase::ModelDesc modelDesc_; + uint32_t deviceId_ = 0; +}; +#endif diff --git a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/main.cpp b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/main.cpp index 2f231330e235f09f3bc538d73de9ec648ddb69f1..b616fe29208fd1f8102acd56ce913401f63b8581 100644 --- a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/main.cpp +++ b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/mxbase/main.cpp @@ -1,69 +1,69 @@ -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include "XceptionClassify.h" -#include "MxBase/Log/Log.h" - -namespace fs = std::experimental::filesystem; -namespace { -const uint32_t CLASS_NUM = 1000; -} -std::vector g_inferCost; - -int main(int argc, char* argv[]) -{ - if (argc <= 1) { - LogWarn << "Please input image path, such as './build/xception ./image_dir'."; - return APP_ERR_OK; - } - - InitParam initParam = {}; - initParam.deviceId = 0; - initParam.classNum = CLASS_NUM; - initParam.labelPath = "../sdk/models/imagenet1000_clsidx_to_labels.names"; - initParam.topk = 5; - initParam.softmax = true; - initParam.checkTensor = true; - initParam.modelPath = "../sdk/models/xception_pt_pytorch.om"; - auto xception = std::make_shared(); - APP_ERROR ret = xception->Init(initParam); - if (ret != APP_ERR_OK) { - LogError << "XceptionClassify init failed, ret=" << ret << "."; - return ret; - } - - std::string imgDir = argv[1]; - for (auto & entry : fs::directory_iterator(imgDir)) { - LogInfo << "read image path " << entry.path(); - ret = xception->Process(entry.path()); - if (ret != APP_ERR_OK) { - LogError << "XceptionClassify process failed, ret=" << ret << "."; - xception->DeInit(); - return ret; - } - } - xception->DeInit(); - double costSum = 0; - for (unsigned int i = 0; i < g_inferCost.size(); i++) { - costSum += g_inferCost[i]; - } - LogInfo << "Infer images sum " << g_inferCost.size() << ", cost total time: " << costSum << " ms."; - LogInfo << "The throughput: " << g_inferCost.size() * 1000 / costSum << " images/sec."; - return APP_ERR_OK; -} +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "XceptionClassify.h" +#include "MxBase/Log/Log.h" + +namespace fs = std::experimental::filesystem; +namespace { +const uint32_t CLASS_NUM = 1000; +} +std::vector g_inferCost; + +int main(int argc, char* argv[]) +{ + if (argc <= 1) { + LogWarn << "Please input image path, such as './build/xception ./image_dir'."; + return APP_ERR_OK; + } + + InitParam initParam = {}; + initParam.deviceId = 0; + initParam.classNum = CLASS_NUM; + initParam.labelPath = "../sdk/models/imagenet1000_clsidx_to_labels.names"; + initParam.topk = 5; + initParam.softmax = true; + initParam.checkTensor = true; + initParam.modelPath = "../sdk/models/xception_pt_pytorch.om"; + auto xception = std::make_shared(); + APP_ERROR ret = xception->Init(initParam); + if (ret != APP_ERR_OK) { + LogError << "XceptionClassify init failed, ret=" << ret << "."; + return ret; + } + + std::string imgDir = argv[1]; + for (auto & entry : fs::directory_iterator(imgDir)) { + LogInfo << "read image path " << entry.path(); + ret = xception->Process(entry.path()); + if (ret != APP_ERR_OK) { + LogError << "XceptionClassify process failed, ret=" << ret << "."; + xception->DeInit(); + return ret; + } + } + xception->DeInit(); + double costSum = 0; + for (unsigned int i = 0; i < g_inferCost.size(); i++) { + costSum += g_inferCost[i]; + } + LogInfo << "Infer images sum " << g_inferCost.size() << ", cost total time: " << costSum << " ms."; + LogInfo << "The throughput: " << g_inferCost.size() * 1000 / costSum << " images/sec."; + return APP_ERR_OK; +} diff --git a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/sdk/main_xception_opencv.py b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/sdk/main_xception_opencv.py index 7b970e106cc8b5331232b049a18d68342a83b337..3f88491da4540d39c321b57e64e5730b89e54040 100644 --- a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/sdk/main_xception_opencv.py +++ b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/sdk/main_xception_opencv.py @@ -1,95 +1,95 @@ -# Copyright 2021 Huawei Technologies Co., Ltd. - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from StreamManagerApi import StreamManagerApi, MxDataInput -import os -import cv2 -import json -import numpy as np -import MxpiDataType_pb2 as MxpiDataType -import datetime -import sys - -def run(): - # init stream manager - stream_manager_api = StreamManagerApi() - ret = stream_manager_api.InitManager() - if ret != 0: - print("Failed to init Stream manager, ret=%s" % str(ret)) - return - - # create streams by pipeline config file - with open("./pipeline/xception_opencv.pipeline", 'rb') as f: - pipeline_str = f.read() - ret = stream_manager_api.CreateMultipleStreams(pipeline_str) - - if ret != 0: - print("Failed to create Stream, ret=%s" % str(ret)) - return - - # Construct the input of the stream - data_input = MxDataInput() - - dir_name = './val_union/' - res_dir_name = 'xception_npu_result' - - file_list = os.listdir(dir_name) - if not os.path.exists(res_dir_name): - os.makedirs(res_dir_name) - for file_name in file_list: - print(file_name) - file_path = os.path.join(dir_name, file_name) - if not file_name.lower().endswith((".JPEG", ".jpeg", "JPG", "jpg")): - continue - portion = os.path.splitext(file_name) - with open(file_path, 'rb') as f: - data_input.data = f.read() - - empty_data = [] - - stream_name = b'im_xception' - in_plugin_id = 0 - uniqueId = stream_manager_api.SendData(stream_name, in_plugin_id, data_input) - if uniqueId < 0: - print("Failed to send data to stream.") - exit() - # Obtain the inference result by specifying stream_name and uniqueId. - start_time = datetime.datetime.now() - infer_result = stream_manager_api.GetResult(stream_name, uniqueId) - end_time = datetime.datetime.now() - print('sdk run time: {}'.format((end_time - start_time).microseconds)) - if infer_result.errorCode != 0: - print("GetResultWithUniqueId error. errorCode=%d, errorMsg=%s" % ( - infer_result.errorCode, infer_result.data.decode())) - exit() - # print the infer result - print(infer_result.data.decode()) - - load_dict = json.loads(infer_result.data.decode()) - if load_dict.get('MxpiClass') is None: - with open(res_dir_name + "/" + file_name[:-5] + '.txt', 'w') as f_write: - f_write.write("") - continue - res_vec = load_dict['MxpiClass'] - - with open(res_dir_name + "/" + file_name[:-5] + '_1.txt', 'w') as f_write: - list1 = [str(item.get("classId")) + " " for item in res_vec] - f_write.writelines(list1) - f_write.write('\n') - - # destroy streams - stream_manager_api.DestroyAllStreams() - -if __name__ == '__main__': - run() +# Copyright 2021 Huawei Technologies Co., Ltd. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from StreamManagerApi import StreamManagerApi, MxDataInput +import os +import cv2 +import json +import numpy as np +import MxpiDataType_pb2 as MxpiDataType +import datetime +import sys + +def run(): + # init stream manager + stream_manager_api = StreamManagerApi() + ret = stream_manager_api.InitManager() + if ret != 0: + print("Failed to init Stream manager, ret=%s" % str(ret)) + return + + # create streams by pipeline config file + with open("./pipeline/xception_opencv.pipeline", 'rb') as f: + pipeline_str = f.read() + ret = stream_manager_api.CreateMultipleStreams(pipeline_str) + + if ret != 0: + print("Failed to create Stream, ret=%s" % str(ret)) + return + + # Construct the input of the stream + data_input = MxDataInput() + + dir_name = './val_union/' + res_dir_name = 'xception_npu_result' + + file_list = os.listdir(dir_name) + if not os.path.exists(res_dir_name): + os.makedirs(res_dir_name) + for file_name in file_list: + print(file_name) + file_path = os.path.join(dir_name, file_name) + if not file_name.lower().endswith((".JPEG", ".jpeg", "JPG", "jpg")): + continue + portion = os.path.splitext(file_name) + with open(file_path, 'rb') as f: + data_input.data = f.read() + + empty_data = [] + + stream_name = b'im_xception' + in_plugin_id = 0 + uniqueId = stream_manager_api.SendData(stream_name, in_plugin_id, data_input) + if uniqueId < 0: + print("Failed to send data to stream.") + exit() + # Obtain the inference result by specifying stream_name and uniqueId. + start_time = datetime.datetime.now() + infer_result = stream_manager_api.GetResult(stream_name, uniqueId) + end_time = datetime.datetime.now() + print('sdk run time: {}'.format((end_time - start_time).microseconds)) + if infer_result.errorCode != 0: + print("GetResultWithUniqueId error. errorCode=%d, errorMsg=%s" % ( + infer_result.errorCode, infer_result.data.decode())) + exit() + # print the infer result + print(infer_result.data.decode()) + + load_dict = json.loads(infer_result.data.decode()) + if load_dict.get('MxpiClass') is None: + with open(res_dir_name + "/" + file_name[:-5] + '.txt', 'w') as f_write: + f_write.write("") + continue + res_vec = load_dict['MxpiClass'] + + with open(res_dir_name + "/" + file_name[:-5] + '_1.txt', 'w') as f_write: + list1 = [str(item.get("classId")) + " " for item in res_vec] + f_write.writelines(list1) + f_write.write('\n') + + # destroy streams + stream_manager_api.DestroyAllStreams() + +if __name__ == '__main__': + run() diff --git a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/sdk/pipeline/xception_opencv.pipeline b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/sdk/pipeline/xception_opencv.pipeline index 2944e83d0252d21f6b14a68488cab850de492d1f..e89cf88bc0520b39bb97a1f52c86741d8eac59bb 100644 --- a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/sdk/pipeline/xception_opencv.pipeline +++ b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/sdk/pipeline/xception_opencv.pipeline @@ -1,73 +1,73 @@ -{ - "im_xception": { - "stream_config": { - "deviceId": "0" - }, - "appsrc1": { - "props": { - "blocksize": "409600" - }, - "factory": "appsrc", - "next": "mxpi_imagedecoder0" - }, - "mxpi_imagedecoder0": { - "props": { - "handleMethod": "opencv" - }, - "factory": "mxpi_imagedecoder", - "next": "mxpi_imageresize0" - }, - "mxpi_imageresize0": { - "props": { - "handleMethod": "opencv", - "resizeType": "Resizer_Stretch", - "resizeHeight": "355", - "resizeWidth": "355" - }, - "factory": "mxpi_imageresize", - "next": "mxpi_opencvcentercrop0" - }, - "mxpi_opencvcentercrop0": { - "props": { - "dataSource": "mxpi_imageresize0", - "cropHeight": "299", - "cropWidth": "299" - }, - "factory": "mxpi_opencvcentercrop", - "next": "mxpi_tensorinfer0" - }, - "mxpi_tensorinfer0": { - "props": { - "dataSource": "mxpi_opencvcentercrop0", - "modelPath": "../models/xception_pt_pytorch.om", - "waitingTime": "2000", - "outputDeviceId": "-1" - }, - "factory": "mxpi_tensorinfer", - "next": "mxpi_classpostprocessor0" - }, - "mxpi_classpostprocessor0": { - "props": { - "dataSource": "mxpi_tensorinfer0", - "postProcessConfigPath": "../models/xception_aipp.cfg", - "labelPath": "../models/imagenet1000_clsidx_to_labels.names", - "postProcessLibPath": "/usr/local/sdk_home/mxManufacture/lib/modelpostprocessors/libresnet50postprocess.so" - }, - "factory": "mxpi_classpostprocessor", - "next": "mxpi_dataserialize0" - }, - "mxpi_dataserialize0": { - "props": { - "outputDataKeys": "mxpi_classpostprocessor0" - }, - "factory": "mxpi_dataserialize", - "next": "appsink0" - }, - "appsink0": { - "props": { - "blocksize": "4096000" - }, - "factory": "appsink" - } - } -} +{ + "im_xception": { + "stream_config": { + "deviceId": "0" + }, + "appsrc1": { + "props": { + "blocksize": "409600" + }, + "factory": "appsrc", + "next": "mxpi_imagedecoder0" + }, + "mxpi_imagedecoder0": { + "props": { + "handleMethod": "opencv" + }, + "factory": "mxpi_imagedecoder", + "next": "mxpi_imageresize0" + }, + "mxpi_imageresize0": { + "props": { + "handleMethod": "opencv", + "resizeType": "Resizer_Stretch", + "resizeHeight": "355", + "resizeWidth": "355" + }, + "factory": "mxpi_imageresize", + "next": "mxpi_opencvcentercrop0" + }, + "mxpi_opencvcentercrop0": { + "props": { + "dataSource": "mxpi_imageresize0", + "cropHeight": "299", + "cropWidth": "299" + }, + "factory": "mxpi_opencvcentercrop", + "next": "mxpi_tensorinfer0" + }, + "mxpi_tensorinfer0": { + "props": { + "dataSource": "mxpi_opencvcentercrop0", + "modelPath": "../models/xception_pt_pytorch.om", + "waitingTime": "2000", + "outputDeviceId": "-1" + }, + "factory": "mxpi_tensorinfer", + "next": "mxpi_classpostprocessor0" + }, + "mxpi_classpostprocessor0": { + "props": { + "dataSource": "mxpi_tensorinfer0", + "postProcessConfigPath": "../models/xception_aipp.cfg", + "labelPath": "../models/imagenet1000_clsidx_to_labels.names", + "postProcessLibPath": "/usr/local/sdk_home/mxManufacture/lib/modelpostprocessors/libresnet50postprocess.so" + }, + "factory": "mxpi_classpostprocessor", + "next": "mxpi_dataserialize0" + }, + "mxpi_dataserialize0": { + "props": { + "outputDataKeys": "mxpi_classpostprocessor0" + }, + "factory": "mxpi_dataserialize", + "next": "appsink0" + }, + "appsink0": { + "props": { + "blocksize": "4096000" + }, + "factory": "appsink" + } + } +} diff --git a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/utils/classification_task_metric.py b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/utils/classification_task_metric.py index 127542b272ad16eea79b90f39a370e0ad6000913..a12fd3a1c12d76850cbca6a5486f40590a441b34 100644 --- a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/utils/classification_task_metric.py +++ b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/infer/utils/classification_task_metric.py @@ -1,162 +1,162 @@ -# coding=utf-8 -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import json -import os -import sys - -import numpy as np - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if gtfile != LABEL_FILE: - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - image_name = os.path.splitext(gtfile.split('/')[-1]) - img_gt_dict[image_name] = gt["image"]["annotations"][0]["category_id"] - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - image_line_info = line.strip().split(" ") - img_name = image_line_info[0].split(".")[0] - img_gt_dict[img_name] = image_line_info[1] - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - function: - the prediction esult file data extraction - input: - result file:filepath - output: - n_label:numble of label - data_vec: the probabilitie of prediction in the 1000 - :return: probabilities, numble of label - """ - with open(filepath, 'r')as f: - label_info = f.readline().strip().split(" ") - data_vec = np.zeros((len(label_info)), dtype=np.float32) - n_label = len(label_info) - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, cls_ind in enumerate(label_info): - data_vec[ind] = np.int(cls_ind) - return data_vec, n_label - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, topn=5): - """ - :param prediction_file_path: - :param result_store_path: - :param json_file_name: - :param img_gt_dict: - :param topn: - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - res_cnt = 0 - n_labels = "" - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - prediction, n_labels = load_statistical_predict_result(filepath) - - if n_labels == 1001: - real_label = int(img_gt_dict[img_name]) + 1 - else: - real_label = int(img_gt_dict[img_name]) - - res_cnt = min(len(prediction), topn) - for i in range(res_cnt): - if real_label == int(prediction[i]): - count_hit[i] += 1 - break - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - accuracy = np.cumsum(count_hit) / count if count else 0 - for i in range(res_cnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - if len(sys.argv) == 5: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - else: - print("Please enter target file result folder | ground truth label file | result json file folder | " - "result json file name, such as ./result val_label.txt . result.json") - exit(1) - - if not os.path.exists(folder_davinci_target): - print("Target file folder does not exist.") - - if not os.path.exists(annotation_file_path): - print("Ground truth file does not exist.") - - if not os.path.exists(result_json_path): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, topn=5) - +# coding=utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import json +import os +import sys + +import numpy as np + +np.set_printoptions(threshold=sys.maxsize) + +LABEL_FILE = "HiAI_label.json" + + +def cre_groundtruth_dict(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + for gtfile in os.listdir(gtfile_path): + if gtfile != LABEL_FILE: + with open(os.path.join(gtfile_path, gtfile), 'r') as f: + gt = json.load(f) + image_name = os.path.splitext(gtfile.split('/')[-1]) + img_gt_dict[image_name] = gt["image"]["annotations"][0]["category_id"] + return img_gt_dict + + +def cre_groundtruth_dict_fromtxt(gtfile_path): + """ + :param filename: file contains the imagename and label number + :return: dictionary key imagename, value is label number + """ + img_gt_dict = {} + with open(gtfile_path, 'r')as f: + for line in f.readlines(): + image_line_info = line.strip().split(" ") + img_name = image_line_info[0].split(".")[0] + img_gt_dict[img_name] = image_line_info[1] + return img_gt_dict + + +def load_statistical_predict_result(filepath): + """ + function: + the prediction esult file data extraction + input: + result file:filepath + output: + n_label:numble of label + data_vec: the probabilitie of prediction in the 1000 + :return: probabilities, numble of label + """ + with open(filepath, 'r')as f: + label_info = f.readline().strip().split(" ") + data_vec = np.zeros((len(label_info)), dtype=np.float32) + n_label = len(label_info) + if n_label == 0: + in_type = f.readline() + color = f.readline() + else: + for ind, cls_ind in enumerate(label_info): + data_vec[ind] = np.int(cls_ind) + return data_vec, n_label + + +def create_visualization_statistical_result(prediction_file_path, + result_store_path, json_file_name, + img_gt_dict, topn=5): + """ + :param prediction_file_path: + :param result_store_path: + :param json_file_name: + :param img_gt_dict: + :param topn: + :return: + """ + writer = open(os.path.join(result_store_path, json_file_name), 'w') + table_dict = {} + table_dict["title"] = "Overall statistical evaluation" + table_dict["value"] = [] + + count = 0 + res_cnt = 0 + n_labels = "" + count_hit = np.zeros(topn) + for tfile_name in os.listdir(prediction_file_path): + count += 1 + temp = tfile_name.split('.')[0] + index = temp.rfind('_') + img_name = temp[:index] + filepath = os.path.join(prediction_file_path, tfile_name) + prediction, n_labels = load_statistical_predict_result(filepath) + + if n_labels == 1001: + real_label = int(img_gt_dict[img_name]) + 1 + else: + real_label = int(img_gt_dict[img_name]) + + res_cnt = min(len(prediction), topn) + for i in range(res_cnt): + if real_label == int(prediction[i]): + count_hit[i] += 1 + break + if 'value' not in table_dict.keys(): + print("the item value does not exist!") + else: + table_dict["value"].extend( + [{"key": "Number of images", "value": str(count)}, + {"key": "Number of classes", "value": str(n_labels)}]) + accuracy = np.cumsum(count_hit) / count if count else 0 + for i in range(res_cnt): + table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", + "value": str( + round(accuracy[i] * 100, 2)) + '%'}) + json.dump(table_dict, writer) + writer.close() + + +if __name__ == '__main__': + if len(sys.argv) == 5: + # txt file path + folder_davinci_target = sys.argv[1] + # annotation files path, "val_label.txt" + annotation_file_path = sys.argv[2] + # the path to store the results json path + result_json_path = sys.argv[3] + # result json file name + json_file_name = sys.argv[4] + else: + print("Please enter target file result folder | ground truth label file | result json file folder | " + "result json file name, such as ./result val_label.txt . result.json") + exit(1) + + if not os.path.exists(folder_davinci_target): + print("Target file folder does not exist.") + + if not os.path.exists(annotation_file_path): + print("Ground truth file does not exist.") + + if not os.path.exists(result_json_path): + print("Result folder doesn't exist.") + + img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) + create_visualization_statistical_result(folder_davinci_target, + result_json_path, json_file_name, + img_label_dict, topn=5) + diff --git a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/modelarts/train_start_xception.py b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/modelarts/train_start_xception.py index 9da0dbeec19a9f790d737f1c5bacbd593bb9cf96..3bac77ecee403b7a51e58a4de725dde59838ff8e 100644 --- a/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/modelarts/train_start_xception.py +++ b/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch/modelarts/train_start_xception.py @@ -1,596 +1,596 @@ -# Copyright (c) 2021, -# All rights reserved. -# -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import glob -import random -import shutil -import sys -import time -import warnings - -import torch -import numpy as np -from apex import amp - -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.distributed as dist -import torch.optim -import torch.multiprocessing as mp -import torch.utils.data -import torch.utils.data.distributed -import torchvision.transforms as transforms -import torchvision.datasets as datasets -import torchvision.models as models - -sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), '../')) -from pthtar2onx import convert -from xception import xception -import moxing as mox - -model_names = sorted(name for name in models.__dict__ - if name.islower() and not name.startswith("__") - and callable(models.__dict__[name])) - -parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') -parser.add_argument('--data_url', metavar='DIR', default='/cache/data_url', - help='path to dataset') -parser.add_argument('-a', '--arch', - metavar='ARCH', - default='xception', - choices=model_names, - help='model architecture: ' + - ' | '.join(model_names) + - ' (default: xception)') -parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', - help='number of data loading workers (default: 4)') -parser.add_argument('--epochs', default=3, type=int, metavar='N', - help='number of total epochs to run') -parser.add_argument('--start-epoch', default=0, type=int, metavar='N', - help='manual epoch number (useful on restarts)') -parser.add_argument('-b', '--batch-size', default=64, type=int, - metavar='N', - help='mini-batch size (default: 256), this is the total ' - 'batch size of all GPUs on the current node when ' - 'using Data Parallel or Distributed Data Parallel') -parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, - metavar='LR', help='initial learning rate', dest='lr') -parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') -parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') -parser.add_argument('-p', '--print-freq', default=10, type=int, - metavar='N', help='print frequency (default: 10)') -parser.add_argument('--resume', default='', type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', - help='evaluate model on validation set') -parser.add_argument('--pretrained', dest='pretrained', action='store_true', - help='use pre-trained model') -parser.add_argument('--pretrained_weight', default='', type=str, metavar='PATH', - help='path to pretrained weight') -parser.add_argument('--world-size', default=-1, type=int, - help='number of nodes for distributed training') -parser.add_argument('--rank', default=-1, type=int, - help='node rank for distributed training') -parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, - help='url used to set up distributed training') -parser.add_argument('--dist-backend', default='nccl', type=str, - help='distributed backend') -parser.add_argument('--seed', default=None, type=int, - help='seed for initializing training. ') -parser.add_argument('--gpu', default=None, type=int, - help='GPU id to use.') -parser.add_argument('--multiprocessing-distributed', action='store_true', - help='Use multi-processing distributed training to launch ' - 'N processes per node, which has N GPUs. This is the ' - 'fastest way to use PyTorch for either single node or ' - 'multi node data parallel training') -parser.add_argument('--npu', default=0, type=int, - help='NPU id to use.') -parser.add_argument('--addr', default='10.136.181.115', - type=str, help='master addr') -parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', - type=str, help='device id list') -parser.add_argument('--warm_up_epochs', default=0, type=int, - help='warm up') -parser.add_argument('--amp', default=False, action='store_true', - help='use amp to train the model') -parser.add_argument('--loss-scale', default=1024., type=float, - help='loss scale using in amp, default -1 means dynamic') -parser.add_argument('--opt-level', default='O2', type=str, - help='loss scale using in amp, default -1 means dynamic') -parser.add_argument('--prof', default=False, action='store_true', - help='use profiling to evaluate the performance of model') -parser.add_argument('--label-smoothing', - default=0.0, - type=float, - metavar='S', - help='label smoothing') -parser.add_argument('--train_url', - default="/cache/training", - type=str, - help="setting dir of training output") -parser.add_argument('--onnx', default=True, action='store_true', - help="convert pth model to onnx") - -best_acc1 = 0 -CACHE_TRAINING_URL = "/cache/training" - -def main(): - args = parser.parse_args() - print(args.device_list) - - os.environ['MASTER_ADDR'] = args.addr - os.environ['MASTER_PORT'] = '29688' - - if args.npu is None: - args.npu = 0 - global CALCULATE_DEVICE - global best_acc1 - - #-----modelarts modification----------------- - CALCULATE_DEVICE = "npu:{}".format(args.npu) - #-----modelarts modification----------------- - torch.npu.set_device(CALCULATE_DEVICE) - - if args.seed is not None: - random.seed(args.seed) - torch.manual_seed(args.seed) - cudnn.deterministic = True - warnings.warn('You have chosen to seed training. ' - 'This will turn on the CUDNN deterministic setting, ' - 'which can slow down your training considerably! ' - 'You may see unexpected behavior when restarting ' - 'from checkpoints.') - - if args.gpu is not None: - warnings.warn('You have chosen a specific GPU. This will completely ' - 'disable data parallelism.') - - if args.dist_url == "env://" and args.world_size == -1: - args.world_size = int(os.environ["WORLD_SIZE"]) - - args.distributed = args.world_size > 1 or args.multiprocessing_distributed - - ngpus_per_node = torch.npu.device_count() - - print('ngpus_per_node:',ngpus_per_node) - - - if args.multiprocessing_distributed: - # Since we have ngpus_per_node processes per node, the total world_size - # needs to be adjusted accordingly - args.world_size = ngpus_per_node * args.world_size - # Use torch.multiprocessing.spawn to launch distributed processes: the - # main_worker process function - mp.spawn(main_worker, nprocs=ngpus_per_node, - args=(ngpus_per_node, args)) - else: - # Simply call main_worker function - main_worker(args.gpu, ngpus_per_node, args) - - -def main_worker(gpu, ngpus_per_node, args): - global best_acc1 - args.gpu = gpu - - if args.gpu is not None: - print("Use NPU: {} for training".format(args.gpu)) - - if args.distributed: - if args.dist_url == "env://" and args.rank == -1: - args.rank = int(os.environ["RANK"]) - if args.multiprocessing_distributed: - # For multiprocessing distributed training, rank needs to be the - # global rank among all the processes - args.rank = args.rank * ngpus_per_node + gpu - - dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - # create model - if args.pretrained: - print("=> using pre-trained model '{}'".format(args.arch)) - model = xception() - CACHE_MODEL_URL = "/cache/model" - # ------------------modelarts modification---------------------- - os.makedirs(CACHE_MODEL_URL, exist_ok=True) - mox.file.copy_parallel(args.pretrained_weight, os.path.join(CACHE_MODEL_URL, "checkpoint.pth.tar")) - # ------------------modelarts modification--------------------- - pretrained_weight = os.path.join(CACHE_MODEL_URL, "checkpoint.pth.tar") - pretrained_dict = torch.load(pretrained_weight, map_location="cpu")["state_dict"] - if "fc.weight" in pretrained_dict: - pretrained_dict.pop("fc.weight") - pretrained_dict.pop("fc.bias") - - model.load_state_dict(pretrained_dict, strict=False) - - else: - print("=> creating model '{}'".format(args.arch)) - model = xception() - - if args.distributed: - # For multiprocessing distributed, DistributedDataParallel constructor - # should always set the single device scope, otherwise, - # DistributedDataParallel will use all available devices. - if args.gpu is not None: - torch.cuda.set_device(args.gpu) - model.cuda(args.gpu) - - # When using a single GPU per process and per - # DistributedDataParallel, we need to divide the batch size - # ourselves based on the total number of GPUs we have - args.batch_size = int(args.batch_size / args.world_size) - args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) - else: - model.cuda() - # DistributedDataParallel will divide and allocate batch_size to all - # available GPUs if device_ids are not set - model = torch.nn.parallel.DistributedDataParallel(model) - elif args.gpu is not None: - torch.cuda.set_device(args.gpu) - model = model.cuda(args.gpu) - - else: - model = model.to(CALCULATE_DEVICE) - - # define loss function (criterion) and optimizer - criterion = nn.CrossEntropyLoss().to(CALCULATE_DEVICE) - - optimizer = torch.optim.SGD(model.parameters(), args.lr, - momentum=args.momentum, - weight_decay=args.weight_decay) - - if args.amp: - model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale) - - # optionally resume from a checkpoint - if args.resume : - if os.path.isfile(args.resume): - print("=> loading checkpoint '{}'".format(args.resume)) - checkpoint = torch.load(args.resume, map_location=CALCULATE_DEVICE) - args.start_epoch = checkpoint['epoch'] - best_acc1 = checkpoint['best_acc1'] - model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - if args.amp: - amp.load_state_dict(checkpoint['amp']) - print("=> loaded checkpoint '{}' (epoch {})" - .format(args.resume, checkpoint['epoch'])) - else: - print("=> no checkpoint found at '{}'".format(args.resume)) - - cudnn.benchmark = True - - # -------modelarts modification------- - real_path = '/cache/data_url' - if not os.path.exists(real_path): - os.makedirs(real_path) - mox.file.copy_parallel(args.data_url, real_path) - print("training data finish copy to %s." % real_path) - # ---------modelarts modification----- - - # Data loading code - traindir = os.path.join(real_path, 'train') - valdir = os.path.join(real_path, 'val') - normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], - std=[0.5, 0.5, 0.5]) - - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(299), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) - else: - train_sampler = None - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), - num_workers=args.workers, pin_memory=False, sampler=train_sampler, drop_last=True) - - val_loader = torch.utils.data.DataLoader( - datasets.ImageFolder(valdir, transforms.Compose([ - transforms.Resize(342), - transforms.CenterCrop(299), - transforms.ToTensor(), - normalize, - ])), - batch_size=args.batch_size, shuffle=True, - num_workers=args.workers, pin_memory=False, drop_last=True) - - if args.evaluate: - validate(val_loader, model, criterion, args) - return - - start_time = time.time() - for epoch in range(args.start_epoch, args.epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - - adjust_learning_rate(optimizer, epoch, args) - - # train for one epoch - train(train_loader, model, criterion, optimizer, epoch, args) - - # evaluate on validation set - acc1 = validate(val_loader, model, criterion, args) - - # remember best acc@1 and save checkpoint - is_best = acc1 > best_acc1 - best_acc1 = max(acc1, best_acc1) - - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - if args.amp: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': args.arch, - 'state_dict': model.state_dict(), - 'best_acc1': best_acc1, - 'optimizer': optimizer.state_dict(), - 'amp': amp.state_dict(), - }, is_best) - else: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': args.arch, - 'state_dict': model.state_dict(), - 'best_acc1': best_acc1, - 'optimizer': optimizer.state_dict(), - }, is_best) - - if args.onnx: - convert_pth_to_onnx(args) - - # --------------modelarts modification---------- - mox.file.copy_parallel(CACHE_TRAINING_URL, args.train_url) - # --------------modelarts modification end---------- - -def convert_pth_to_onnx(config_args): - pth_pattern = os.path.join(CACHE_TRAINING_URL, 'checkpoint.pth.tar') - pth_file_list = glob.glob(pth_pattern) - if not pth_file_list: - print(f"can't find pth {pth_pattern}") - return - pth_file = pth_file_list[0] - onnx_path = pth_file.split(".")[0] + '.onnx' - convert(pth_file, onnx_path) - -def train(train_loader, model, criterion, optimizer, epoch, args): - batch_time = AverageMeter('Time', ':6.3f') - data_time = AverageMeter('Data', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(train_loader), - [batch_time, data_time, losses, top1, top5], - prefix="Epoch: [{}]".format(epoch)) - - # switch to train mode - model.train() - - end = time.time() - for i, (images, target) in enumerate(train_loader): - # measure data loading time - data_time.update(time.time() - end) - - images = images.to(CALCULATE_DEVICE, non_blocking=False).to(torch.float) - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=False) - - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # compute gradient and do SGD step - optimizer.zero_grad() - if args.amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - - # torch.npu.synchronize() - - # measure elapsed time - cost_time = time.time() - end - batch_time.update(cost_time) - end = time.time() - - if i % args.print_freq == 0: - progress.display(i) - - print('* FPS@all {:.3f}'.format(args.batch_size / (batch_time.avg + 0.001))) - - -def validate(val_loader, model, criterion, args): - batch_time = AverageMeter('Time', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(val_loader), - [batch_time, losses, top1, top5], - prefix='Test: ') - - # switch to evaluate mode - model.eval() - - with torch.no_grad(): - end = time.time() - for i, (images, target) in enumerate(val_loader): - - images = images.to(CALCULATE_DEVICE, non_blocking=False).to(torch.float) - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=False) - - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # measure elapsed time - cost_time = time.time() - end - batch_time.update(cost_time) - end = time.time() - - if i % args.print_freq == 0: - progress.display(i) - - print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) - - return top1.avg - - -def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): - if not os.path.exists(CACHE_TRAINING_URL): - os.makedirs(CACHE_TRAINING_URL, 0o755) - - checkpoint_save_path = os.path.join(CACHE_TRAINING_URL, filename) - torch.save(state, checkpoint_save_path) - if is_best: - shutil.copyfile(checkpoint_save_path, CACHE_TRAINING_URL + "/" + 'model_best_acc%.4f_epoch%d.pth.tar' % (state['best_acc1'], state['epoch'])) - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self, name, fmt=':f', start_count_index=2): - self.name = name - self.fmt = fmt - self.reset() - self.start_count_index = start_count_index - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - if self.count == 0: - self.N = n - - self.val = val - self.count += n - if self.count > (self.start_count_index * self.N): - self.sum += val * n - self.avg = self.sum / (self.count - self.start_count_index * self.N) - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - -class ProgressMeter(object): - def __init__(self, num_batches, meters, prefix=""): - self.batch_fmtstr = self._get_batch_fmtstr(num_batches) - self.meters = meters - self.prefix = prefix - - def display(self, batch): - entries = [self.prefix + self.batch_fmtstr.format(batch)] - entries += [str(meter) for meter in self.meters] - print('\t'.join(entries)) - - def _get_batch_fmtstr(self, num_batches): - num_digits = len(str(num_batches // 1)) - fmt = '{:' + str(num_digits) + 'd}' - return '[' + fmt + '/' + fmt.format(num_batches) + ']' - - -def adjust_learning_rate(optimizer, epoch, args): - """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" - - if args.warm_up_epochs > 0 and epoch < args.warm_up_epochs: - lr = args.lr * ((epoch + 1) / (args.warm_up_epochs + 1)) - else: - alpha = 0 - cosine_decay = 0.5 * ( - 1 + np.cos(np.pi * (epoch - args.warm_up_epochs) / (args.epochs - args.warm_up_epochs))) - decayed = (1 - alpha) * cosine_decay + alpha - lr = args.lr * decayed - - print("=> Epoch[%d] Setting lr: %.4f" % (epoch, lr)) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - -class LabelSmoothing(nn.Module): - """ - NLL loss with label smoothing. - """ - def __init__(self, loc, smoothing=0.0): - """ - Constructor for the LabelSmoothing module. - - :param smoothing: label smoothing factor - """ - super(LabelSmoothing, self).__init__() - self.confidence = 1.0 - smoothing - self.smoothing = smoothing - self.device = loc - - def forward(self, x, target): - target = target.to(torch.int64) - - logprobs = torch.nn.functional.log_softmax(x, dim=-1) - nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1).to(torch.int64)) - nll_loss = nll_loss.squeeze(1) - smooth_loss = -logprobs.mean(dim=-1) - loss = self.confidence * nll_loss + self.smoothing * smooth_loss - return loss.mean() - -if __name__ == '__main__': - main() +# Copyright (c) 2021, +# All rights reserved. +# +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import glob +import random +import shutil +import sys +import time +import warnings + +import torch +import numpy as np +from apex import amp + +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.optim +import torch.multiprocessing as mp +import torch.utils.data +import torch.utils.data.distributed +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import torchvision.models as models + +sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), '../')) +from pthtar2onx import convert +from xception import xception +import moxing as mox + +model_names = sorted(name for name in models.__dict__ + if name.islower() and not name.startswith("__") + and callable(models.__dict__[name])) + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument('--data_url', metavar='DIR', default='/cache/data_url', + help='path to dataset') +parser.add_argument('-a', '--arch', + metavar='ARCH', + default='xception', + choices=model_names, + help='model architecture: ' + + ' | '.join(model_names) + + ' (default: xception)') +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('--epochs', default=3, type=int, metavar='N', + help='number of total epochs to run') +parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('-b', '--batch-size', default=64, type=int, + metavar='N', + help='mini-batch size (default: 256), this is the total ' + 'batch size of all GPUs on the current node when ' + 'using Data Parallel or Distributed Data Parallel') +parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, + metavar='LR', help='initial learning rate', dest='lr') +parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') +parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') +parser.add_argument('-p', '--print-freq', default=10, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', + help='evaluate model on validation set') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--pretrained_weight', default='', type=str, metavar='PATH', + help='path to pretrained weight') +parser.add_argument('--world-size', default=-1, type=int, + help='number of nodes for distributed training') +parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') +parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') +parser.add_argument('--seed', default=None, type=int, + help='seed for initializing training. ') +parser.add_argument('--gpu', default=None, type=int, + help='GPU id to use.') +parser.add_argument('--multiprocessing-distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') +parser.add_argument('--npu', default=0, type=int, + help='NPU id to use.') +parser.add_argument('--addr', default='10.136.181.115', + type=str, help='master addr') +parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', + type=str, help='device id list') +parser.add_argument('--warm_up_epochs', default=0, type=int, + help='warm up') +parser.add_argument('--amp', default=False, action='store_true', + help='use amp to train the model') +parser.add_argument('--loss-scale', default=1024., type=float, + help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--opt-level', default='O2', type=str, + help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--prof', default=False, action='store_true', + help='use profiling to evaluate the performance of model') +parser.add_argument('--label-smoothing', + default=0.0, + type=float, + metavar='S', + help='label smoothing') +parser.add_argument('--train_url', + default="/cache/training", + type=str, + help="setting dir of training output") +parser.add_argument('--onnx', default=True, action='store_true', + help="convert pth model to onnx") + +best_acc1 = 0 +CACHE_TRAINING_URL = "/cache/training" + +def main(): + args = parser.parse_args() + print(args.device_list) + + os.environ['MASTER_ADDR'] = args.addr + os.environ['MASTER_PORT'] = '29688' + + if args.npu is None: + args.npu = 0 + global CALCULATE_DEVICE + global best_acc1 + + #-----modelarts modification----------------- + CALCULATE_DEVICE = "npu:{}".format(args.npu) + #-----modelarts modification----------------- + torch.npu.set_device(CALCULATE_DEVICE) + + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn('You have chosen to seed training. ' + 'This will turn on the CUDNN deterministic setting, ' + 'which can slow down your training considerably! ' + 'You may see unexpected behavior when restarting ' + 'from checkpoints.') + + if args.gpu is not None: + warnings.warn('You have chosen a specific GPU. This will completely ' + 'disable data parallelism.') + + if args.dist_url == "env://" and args.world_size == -1: + args.world_size = int(os.environ["WORLD_SIZE"]) + + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + + ngpus_per_node = torch.npu.device_count() + + print('ngpus_per_node:',ngpus_per_node) + + + if args.multiprocessing_distributed: + # Since we have ngpus_per_node processes per node, the total world_size + # needs to be adjusted accordingly + args.world_size = ngpus_per_node * args.world_size + # Use torch.multiprocessing.spawn to launch distributed processes: the + # main_worker process function + mp.spawn(main_worker, nprocs=ngpus_per_node, + args=(ngpus_per_node, args)) + else: + # Simply call main_worker function + main_worker(args.gpu, ngpus_per_node, args) + + +def main_worker(gpu, ngpus_per_node, args): + global best_acc1 + args.gpu = gpu + + if args.gpu is not None: + print("Use NPU: {} for training".format(args.gpu)) + + if args.distributed: + if args.dist_url == "env://" and args.rank == -1: + args.rank = int(os.environ["RANK"]) + if args.multiprocessing_distributed: + # For multiprocessing distributed training, rank needs to be the + # global rank among all the processes + args.rank = args.rank * ngpus_per_node + gpu + + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + # create model + if args.pretrained: + print("=> using pre-trained model '{}'".format(args.arch)) + model = xception() + CACHE_MODEL_URL = "/cache/model" + # ------------------modelarts modification---------------------- + os.makedirs(CACHE_MODEL_URL, exist_ok=True) + mox.file.copy_parallel(args.pretrained_weight, os.path.join(CACHE_MODEL_URL, "checkpoint.pth.tar")) + # ------------------modelarts modification--------------------- + pretrained_weight = os.path.join(CACHE_MODEL_URL, "checkpoint.pth.tar") + pretrained_dict = torch.load(pretrained_weight, map_location="cpu")["state_dict"] + if "fc.weight" in pretrained_dict: + pretrained_dict.pop("fc.weight") + pretrained_dict.pop("fc.bias") + + model.load_state_dict(pretrained_dict, strict=False) + + else: + print("=> creating model '{}'".format(args.arch)) + model = xception() + + if args.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + if args.gpu is not None: + torch.cuda.set_device(args.gpu) + model.cuda(args.gpu) + + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs we have + args.batch_size = int(args.batch_size / args.world_size) + args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) + else: + model.cuda() + # DistributedDataParallel will divide and allocate batch_size to all + # available GPUs if device_ids are not set + model = torch.nn.parallel.DistributedDataParallel(model) + elif args.gpu is not None: + torch.cuda.set_device(args.gpu) + model = model.cuda(args.gpu) + + else: + model = model.to(CALCULATE_DEVICE) + + # define loss function (criterion) and optimizer + criterion = nn.CrossEntropyLoss().to(CALCULATE_DEVICE) + + optimizer = torch.optim.SGD(model.parameters(), args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay) + + if args.amp: + model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale) + + # optionally resume from a checkpoint + if args.resume : + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + checkpoint = torch.load(args.resume, map_location=CALCULATE_DEVICE) + args.start_epoch = checkpoint['epoch'] + best_acc1 = checkpoint['best_acc1'] + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + if args.amp: + amp.load_state_dict(checkpoint['amp']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(args.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + + cudnn.benchmark = True + + # -------modelarts modification------- + real_path = '/cache/data_url' + if not os.path.exists(real_path): + os.makedirs(real_path) + mox.file.copy_parallel(args.data_url, real_path) + print("training data finish copy to %s." % real_path) + # ---------modelarts modification----- + + # Data loading code + traindir = os.path.join(real_path, 'train') + valdir = os.path.join(real_path, 'val') + normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(299), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + else: + train_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), + num_workers=args.workers, pin_memory=False, sampler=train_sampler, drop_last=True) + + val_loader = torch.utils.data.DataLoader( + datasets.ImageFolder(valdir, transforms.Compose([ + transforms.Resize(342), + transforms.CenterCrop(299), + transforms.ToTensor(), + normalize, + ])), + batch_size=args.batch_size, shuffle=True, + num_workers=args.workers, pin_memory=False, drop_last=True) + + if args.evaluate: + validate(val_loader, model, criterion, args) + return + + start_time = time.time() + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + + adjust_learning_rate(optimizer, epoch, args) + + # train for one epoch + train(train_loader, model, criterion, optimizer, epoch, args) + + # evaluate on validation set + acc1 = validate(val_loader, model, criterion, args) + + # remember best acc@1 and save checkpoint + is_best = acc1 > best_acc1 + best_acc1 = max(acc1, best_acc1) + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + if args.amp: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer': optimizer.state_dict(), + 'amp': amp.state_dict(), + }, is_best) + else: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer': optimizer.state_dict(), + }, is_best) + + if args.onnx: + convert_pth_to_onnx(args) + + # --------------modelarts modification---------- + mox.file.copy_parallel(CACHE_TRAINING_URL, args.train_url) + # --------------modelarts modification end---------- + +def convert_pth_to_onnx(config_args): + pth_pattern = os.path.join(CACHE_TRAINING_URL, 'checkpoint.pth.tar') + pth_file_list = glob.glob(pth_pattern) + if not pth_file_list: + print(f"can't find pth {pth_pattern}") + return + pth_file = pth_file_list[0] + onnx_path = pth_file.split(".")[0] + '.onnx' + convert(pth_file, onnx_path) + +def train(train_loader, model, criterion, optimizer, epoch, args): + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(train_loader), + [batch_time, data_time, losses, top1, top5], + prefix="Epoch: [{}]".format(epoch)) + + # switch to train mode + model.train() + + end = time.time() + for i, (images, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + images = images.to(CALCULATE_DEVICE, non_blocking=False).to(torch.float) + target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=False) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # compute gradient and do SGD step + optimizer.zero_grad() + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + + # torch.npu.synchronize() + + # measure elapsed time + cost_time = time.time() - end + batch_time.update(cost_time) + end = time.time() + + if i % args.print_freq == 0: + progress.display(i) + + print('* FPS@all {:.3f}'.format(args.batch_size / (batch_time.avg + 0.001))) + + +def validate(val_loader, model, criterion, args): + batch_time = AverageMeter('Time', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(val_loader), + [batch_time, losses, top1, top5], + prefix='Test: ') + + # switch to evaluate mode + model.eval() + + with torch.no_grad(): + end = time.time() + for i, (images, target) in enumerate(val_loader): + + images = images.to(CALCULATE_DEVICE, non_blocking=False).to(torch.float) + target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=False) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # measure elapsed time + cost_time = time.time() - end + batch_time.update(cost_time) + end = time.time() + + if i % args.print_freq == 0: + progress.display(i) + + print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) + + return top1.avg + + +def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): + if not os.path.exists(CACHE_TRAINING_URL): + os.makedirs(CACHE_TRAINING_URL, 0o755) + + checkpoint_save_path = os.path.join(CACHE_TRAINING_URL, filename) + torch.save(state, checkpoint_save_path) + if is_best: + shutil.copyfile(checkpoint_save_path, CACHE_TRAINING_URL + "/" + 'model_best_acc%.4f_epoch%d.pth.tar' % (state['best_acc1'], state['epoch'])) + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f', start_count_index=2): + self.name = name + self.fmt = fmt + self.reset() + self.start_count_index = start_count_index + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + if self.count == 0: + self.N = n + + self.val = val + self.count += n + if self.count > (self.start_count_index * self.N): + self.sum += val * n + self.avg = self.sum / (self.count - self.start_count_index * self.N) + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + +class ProgressMeter(object): + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + + +def adjust_learning_rate(optimizer, epoch, args): + """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" + + if args.warm_up_epochs > 0 and epoch < args.warm_up_epochs: + lr = args.lr * ((epoch + 1) / (args.warm_up_epochs + 1)) + else: + alpha = 0 + cosine_decay = 0.5 * ( + 1 + np.cos(np.pi * (epoch - args.warm_up_epochs) / (args.epochs - args.warm_up_epochs))) + decayed = (1 - alpha) * cosine_decay + alpha + lr = args.lr * decayed + + print("=> Epoch[%d] Setting lr: %.4f" % (epoch, lr)) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + +class LabelSmoothing(nn.Module): + """ + NLL loss with label smoothing. + """ + def __init__(self, loc, smoothing=0.0): + """ + Constructor for the LabelSmoothing module. + + :param smoothing: label smoothing factor + """ + super(LabelSmoothing, self).__init__() + self.confidence = 1.0 - smoothing + self.smoothing = smoothing + self.device = loc + + def forward(self, x, target): + target = target.to(torch.int64) + + logprobs = torch.nn.functional.log_softmax(x, dim=-1) + nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1).to(torch.int64)) + nll_loss = nll_loss.squeeze(1) + smooth_loss = -logprobs.mean(dim=-1) + loss = self.confidence * nll_loss + self.smoothing * smooth_loss + return loss.mean() + +if __name__ == '__main__': + main() diff --git a/PyTorch/contrib/cv/classification/csp_resnext50-mish/benchmark.py b/PyTorch/contrib/cv/classification/csp_resnext50-mish/benchmark.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/csp_resnext50-mish/timm/loss/asymmetric_loss.py b/PyTorch/contrib/cv/classification/csp_resnext50-mish/timm/loss/asymmetric_loss.py index 96a977882b9fa534990bfb1c8321e4c822c602ca..a8b10f9c797c2cb3b2652302717b592dada216f3 100644 --- a/PyTorch/contrib/cv/classification/csp_resnext50-mish/timm/loss/asymmetric_loss.py +++ b/PyTorch/contrib/cv/classification/csp_resnext50-mish/timm/loss/asymmetric_loss.py @@ -1,97 +1,97 @@ -import torch -import torch.nn as nn - - -class AsymmetricLossMultiLabel(nn.Module): - def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=False): - super(AsymmetricLossMultiLabel, self).__init__() - - self.gamma_neg = gamma_neg - self.gamma_pos = gamma_pos - self.clip = clip - self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss - self.eps = eps - - def forward(self, x, y): - """" - Parameters - ---------- - x: input logits - y: targets (multi-label binarized vector) - """ - - # Calculating Probabilities - x_sigmoid = torch.sigmoid(x) - xs_pos = x_sigmoid - xs_neg = 1 - x_sigmoid - - # Asymmetric Clipping - if self.clip is not None and self.clip > 0: - xs_neg = (xs_neg + self.clip).clamp(max=1) - - # Basic CE calculation - los_pos = y * torch.log(xs_pos.clamp(min=self.eps)) - los_neg = (1 - y) * torch.log(xs_neg.clamp(min=self.eps)) - loss = los_pos + los_neg - - # Asymmetric Focusing - if self.gamma_neg > 0 or self.gamma_pos > 0: - if self.disable_torch_grad_focal_loss: - torch._C.set_grad_enabled(False) - pt0 = xs_pos * y - pt1 = xs_neg * (1 - y) # pt = p if t > 0 else 1-p - pt = pt0 + pt1 - one_sided_gamma = self.gamma_pos * y + self.gamma_neg * (1 - y) - one_sided_w = torch.pow(1 - pt, one_sided_gamma) - if self.disable_torch_grad_focal_loss: - torch._C.set_grad_enabled(True) - loss *= one_sided_w - - return -loss.sum() - - -class AsymmetricLossSingleLabel(nn.Module): - def __init__(self, gamma_pos=1, gamma_neg=4, eps: float = 0.1, reduction='mean'): - super(AsymmetricLossSingleLabel, self).__init__() - - self.eps = eps - self.logsoftmax = nn.LogSoftmax(dim=-1) - self.targets_classes = [] # prevent gpu repeated memory allocation - self.gamma_pos = gamma_pos - self.gamma_neg = gamma_neg - self.reduction = reduction - - def forward(self, inputs, target, reduction=None): - """" - Parameters - ---------- - x: input logits - y: targets (1-hot vector) - """ - - num_classes = inputs.size()[-1] - log_preds = self.logsoftmax(inputs) - self.targets_classes = torch.zeros_like(inputs).scatter_(1, target.long().unsqueeze(1), 1) - - # ASL weights - targets = self.targets_classes - anti_targets = 1 - targets - xs_pos = torch.exp(log_preds) - xs_neg = 1 - xs_pos - xs_pos = xs_pos * targets - xs_neg = xs_neg * anti_targets - asymmetric_w = torch.pow(1 - xs_pos - xs_neg, - self.gamma_pos * targets + self.gamma_neg * anti_targets) - log_preds = log_preds * asymmetric_w - - if self.eps > 0: # label smoothing - self.targets_classes.mul_(1 - self.eps).add_(self.eps / num_classes) - - # loss calculation - loss = - self.targets_classes.mul(log_preds) - - loss = loss.sum(dim=-1) - if self.reduction == 'mean': - loss = loss.mean() - - return loss +import torch +import torch.nn as nn + + +class AsymmetricLossMultiLabel(nn.Module): + def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=False): + super(AsymmetricLossMultiLabel, self).__init__() + + self.gamma_neg = gamma_neg + self.gamma_pos = gamma_pos + self.clip = clip + self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss + self.eps = eps + + def forward(self, x, y): + """" + Parameters + ---------- + x: input logits + y: targets (multi-label binarized vector) + """ + + # Calculating Probabilities + x_sigmoid = torch.sigmoid(x) + xs_pos = x_sigmoid + xs_neg = 1 - x_sigmoid + + # Asymmetric Clipping + if self.clip is not None and self.clip > 0: + xs_neg = (xs_neg + self.clip).clamp(max=1) + + # Basic CE calculation + los_pos = y * torch.log(xs_pos.clamp(min=self.eps)) + los_neg = (1 - y) * torch.log(xs_neg.clamp(min=self.eps)) + loss = los_pos + los_neg + + # Asymmetric Focusing + if self.gamma_neg > 0 or self.gamma_pos > 0: + if self.disable_torch_grad_focal_loss: + torch._C.set_grad_enabled(False) + pt0 = xs_pos * y + pt1 = xs_neg * (1 - y) # pt = p if t > 0 else 1-p + pt = pt0 + pt1 + one_sided_gamma = self.gamma_pos * y + self.gamma_neg * (1 - y) + one_sided_w = torch.pow(1 - pt, one_sided_gamma) + if self.disable_torch_grad_focal_loss: + torch._C.set_grad_enabled(True) + loss *= one_sided_w + + return -loss.sum() + + +class AsymmetricLossSingleLabel(nn.Module): + def __init__(self, gamma_pos=1, gamma_neg=4, eps: float = 0.1, reduction='mean'): + super(AsymmetricLossSingleLabel, self).__init__() + + self.eps = eps + self.logsoftmax = nn.LogSoftmax(dim=-1) + self.targets_classes = [] # prevent gpu repeated memory allocation + self.gamma_pos = gamma_pos + self.gamma_neg = gamma_neg + self.reduction = reduction + + def forward(self, inputs, target, reduction=None): + """" + Parameters + ---------- + x: input logits + y: targets (1-hot vector) + """ + + num_classes = inputs.size()[-1] + log_preds = self.logsoftmax(inputs) + self.targets_classes = torch.zeros_like(inputs).scatter_(1, target.long().unsqueeze(1), 1) + + # ASL weights + targets = self.targets_classes + anti_targets = 1 - targets + xs_pos = torch.exp(log_preds) + xs_neg = 1 - xs_pos + xs_pos = xs_pos * targets + xs_neg = xs_neg * anti_targets + asymmetric_w = torch.pow(1 - xs_pos - xs_neg, + self.gamma_pos * targets + self.gamma_neg * anti_targets) + log_preds = log_preds * asymmetric_w + + if self.eps > 0: # label smoothing + self.targets_classes.mul_(1 - self.eps).add_(self.eps / num_classes) + + # loss calculation + loss = - self.targets_classes.mul(log_preds) + + loss = loss.sum(dim=-1) + if self.reduction == 'mean': + loss = loss.mean() + + return loss diff --git a/PyTorch/contrib/cv/classification/csp_resnext50-mish/train-1p.py b/PyTorch/contrib/cv/classification/csp_resnext50-mish/train-1p.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/csp_resnext50-mish/train-8p.py b/PyTorch/contrib/cv/classification/csp_resnext50-mish/train-8p.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/csp_resnext50-mish/validate.py b/PyTorch/contrib/cv/classification/csp_resnext50-mish/validate.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/classification/pointnetCNN/LICENSE b/PyTorch/contrib/cv/classification/pointnetCNN/LICENSE index 657549b86065a3d34c7dd038edee91cedb8cb05a..dcc65541a1b5f985560b92c275b8328469d50742 100644 --- a/PyTorch/contrib/cv/classification/pointnetCNN/LICENSE +++ b/PyTorch/contrib/cv/classification/pointnetCNN/LICENSE @@ -1,30 +1,30 @@ -BSD 3-Clause License - -Copyright (c) 2017, -All rights reserved. -Copyright 2020 Huawei Technologies Co., Ltd - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +BSD 3-Clause License + +Copyright (c) 2017, +All rights reserved. +Copyright 2020 Huawei Technologies Co., Ltd + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/pointnetCNN/README.md b/PyTorch/contrib/cv/classification/pointnetCNN/README.md index 0199ef0f24f0c1e053d49753b47eea4840c705a5..907e6f516e23590162abc936005bc183bd35dc75 100644 --- a/PyTorch/contrib/cv/classification/pointnetCNN/README.md +++ b/PyTorch/contrib/cv/classification/pointnetCNN/README.md @@ -1,48 +1,48 @@ -# PointNetCNN - -This implements training of PointNetCNN on the modelnet40_ply_hdf5_2048 dataset. - -## PointNetCNN Detail - - - -## Requirements - -- `pip install -r requirements.txt` -- Download the dataset from https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/PointNetCNN/modelnet40_ply_hdf5_2048.zip and unzip it to ./data - -## Training - -To train a model, run `train_pytorch.py` - -```bash -# training 1p accuracy -bash ./test/train_full_1p.sh --data_path=./data/modelnet40_ply_hdf5_2048/ - -# training 1p performance -bash ./test/train_performance_1p.sh --data_path=./data/modelnet40_ply_hdf5_2048/ - -# training 8p accuracy -bash ./test/train_full_8p.sh --data_path=./data/modelnet40_ply_hdf5_2048/ - -# training 8p performance -bash ./test/train_performance_8p.sh --data_path=./data/modelnet40_ply_hdf5_2048/ - - -# finetuning 1p -bash test/train_finetune_1p.sh --data_path=./data/modelnet40_ply_hdf5_2048/ -``` - -Log path: - ./test/output/1/train_1.log # 8p training performance and loss log - ./test/output/1/train_1.log # 1p training performance and loss log - - - - -## PointNetCNN training result - -| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | -| :------: | :------: | :------: | :------: | :------: | -| - | 20 | 1 | 1 | O1 | -| - | 160 | 8 | 250 | O1 | +# PointNetCNN + +This implements training of PointNetCNN on the modelnet40_ply_hdf5_2048 dataset. + +## PointNetCNN Detail + + + +## Requirements + +- `pip install -r requirements.txt` +- Download the dataset from https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E6%8E%A8%E7%90%86/cv/classfication/PointNetCNN/modelnet40_ply_hdf5_2048.zip and unzip it to ./data + +## Training + +To train a model, run `train_pytorch.py` + +```bash +# training 1p accuracy +bash ./test/train_full_1p.sh --data_path=./data/modelnet40_ply_hdf5_2048/ + +# training 1p performance +bash ./test/train_performance_1p.sh --data_path=./data/modelnet40_ply_hdf5_2048/ + +# training 8p accuracy +bash ./test/train_full_8p.sh --data_path=./data/modelnet40_ply_hdf5_2048/ + +# training 8p performance +bash ./test/train_performance_8p.sh --data_path=./data/modelnet40_ply_hdf5_2048/ + + +# finetuning 1p +bash test/train_finetune_1p.sh --data_path=./data/modelnet40_ply_hdf5_2048/ +``` + +Log path: + ./test/output/1/train_1.log # 8p training performance and loss log + ./test/output/1/train_1.log # 1p training performance and loss log + + + + +## PointNetCNN training result + +| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | +| :------: | :------: | :------: | :------: | :------: | +| - | 20 | 1 | 1 | O1 | +| - | 160 | 8 | 250 | O1 | diff --git a/PyTorch/contrib/cv/classification/pointnetCNN/modelzoo_level.txt b/PyTorch/contrib/cv/classification/pointnetCNN/modelzoo_level.txt index a17c8f95fa388fbc6d253e2cd7cfd0b73b734073..a829ab59b97a1022dd6fc33b59b7ae0d55009432 100644 --- a/PyTorch/contrib/cv/classification/pointnetCNN/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/pointnetCNN/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/pointnetCNN/requirements.txt b/PyTorch/contrib/cv/classification/pointnetCNN/requirements.txt index 366725ddde2a0d4ce60c5008cd07757edb8a383b..04d04ad3fea859e240bdff5cf5d67fb646d3d751 100644 --- a/PyTorch/contrib/cv/classification/pointnetCNN/requirements.txt +++ b/PyTorch/contrib/cv/classification/pointnetCNN/requirements.txt @@ -1,4 +1,4 @@ -torch == 1.5.0 -h5py -scipy -sklearn +torch == 1.5.0 +h5py +scipy +sklearn diff --git a/PyTorch/contrib/cv/classification/vit_base_patch32_224/LICENSE b/PyTorch/contrib/cv/classification/vit_base_patch32_224/LICENSE index 4ba4fdcab3dbdb4d64ce4cccdfd990698b4d596a..a0e03103591c1158a839681f3c404ee9118b182e 100644 --- a/PyTorch/contrib/cv/classification/vit_base_patch32_224/LICENSE +++ b/PyTorch/contrib/cv/classification/vit_base_patch32_224/LICENSE @@ -1,29 +1,29 @@ -BSD 3-Clause License - -Copyright (c) 2017, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +BSD 3-Clause License + +Copyright (c) 2017, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/vit_base_patch32_224/README.md b/PyTorch/contrib/cv/classification/vit_base_patch32_224/README.md index 3a818ad9fb59402c7bbb96051f4bba3633dc7373..34984ac0d020f08fda86db10d0309fcb2ad77ced 100644 --- a/PyTorch/contrib/cv/classification/vit_base_patch32_224/README.md +++ b/PyTorch/contrib/cv/classification/vit_base_patch32_224/README.md @@ -1,31 +1,31 @@ -## Vit_base_patch32_224
-This implements training of Vit_base_patch32_224 on the ImageNet dataset
-## Vit_base_patch32_224 Detail
-This model is one of open-source models by rwightman. See the source code at https://github.com/rwightman/pytorch-image-models/tree/master/timm.
-The whole model has achieved the requirement of accuracy and performance.
-## requirement
-Install PyTorch
-```pip install timm```
-```torchvision==0.5.0(x86) && torchvision==0.2.0(arm)``` -Please prepare the dataset by yourself, including training set and verification set. The optional dataset includes imagenet2012, including train and val. -## Training
-To train a model, run ```vit_train.py``` with the desired model architecture and the path to the ImageNet dataset:
-```bash -# training 1p performance -bash test/train_performance_1p.sh --data_path=real_data_path - -# training 8p accuracy -bash test/train_full_8p.sh --data_path=real_data_path - -# training 8p performance -bash test/train_performance_8p.sh --data_path=real_data_path - -# finetuning 1p -bash test/train_finetune_1p.sh --data_path=real_data_path --model-path=real_pre_train_model_path -``` -Log path: output/devie_id/train_${device_id}.log
-## Vit_base_patch32_224 Detail
-| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | -| :------: | :------: | :------: | :------: | :------: | -| - | 96.8 | 1 | 1 | O1 | +## Vit_base_patch32_224
+This implements training of Vit_base_patch32_224 on the ImageNet dataset
+## Vit_base_patch32_224 Detail
+This model is one of open-source models by rwightman. See the source code at https://github.com/rwightman/pytorch-image-models/tree/master/timm.
+The whole model has achieved the requirement of accuracy and performance.
+## requirement
+Install PyTorch
+```pip install timm```
+```torchvision==0.5.0(x86) && torchvision==0.2.0(arm)``` +Please prepare the dataset by yourself, including training set and verification set. The optional dataset includes imagenet2012, including train and val. +## Training
+To train a model, run ```vit_train.py``` with the desired model architecture and the path to the ImageNet dataset:
+```bash +# training 1p performance +bash test/train_performance_1p.sh --data_path=real_data_path + +# training 8p accuracy +bash test/train_full_8p.sh --data_path=real_data_path + +# training 8p performance +bash test/train_performance_8p.sh --data_path=real_data_path + +# finetuning 1p +bash test/train_finetune_1p.sh --data_path=real_data_path --model-path=real_pre_train_model_path +``` +Log path: output/devie_id/train_${device_id}.log
+## Vit_base_patch32_224 Detail
+| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | +| :------: | :------: | :------: | :------: | :------: | +| - | 96.8 | 1 | 1 | O1 | | 80.64 | 2981 | 8 | 8 | O1 | \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/vit_base_patch32_224/modelzoo_level.txt b/PyTorch/contrib/cv/classification/vit_base_patch32_224/modelzoo_level.txt index 41bdc0f3b06335fa8e0bed2287baad4eaa28ceae..810a09268cf30040cffd446a4242b3fec735ee66 100644 --- a/PyTorch/contrib/cv/classification/vit_base_patch32_224/modelzoo_level.txt +++ b/PyTorch/contrib/cv/classification/vit_base_patch32_224/modelzoo_level.txt @@ -1,5 +1,5 @@ -GPUStatus:OK -NPUMigrationStatus:OK -FuncStatus:OK -PrecisionStatus:OK +GPUStatus:OK +NPUMigrationStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/vit_base_patch32_224/requirements.txt b/PyTorch/contrib/cv/classification/vit_base_patch32_224/requirements.txt index bd9dc1313f5b68f0ae9a7e96db0bbe77e5dca143..c08eaabc347c993998951b302256f01c727d4c78 100644 --- a/PyTorch/contrib/cv/classification/vit_base_patch32_224/requirements.txt +++ b/PyTorch/contrib/cv/classification/vit_base_patch32_224/requirements.txt @@ -1,5 +1,5 @@ -#torch=1.5.0 -#torchvision>=0.5.0 -#torchvision>=0.2.0 -pyyaml +#torch=1.5.0 +#torchvision>=0.5.0 +#torchvision>=0.2.0 +pyyaml timm \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/vit_base_patch32_224/vit_train.py b/PyTorch/contrib/cv/classification/vit_base_patch32_224/vit_train.py index 277732b7abbed3862b302284fe64cddc206998e1..86884921f94c61053ebd84385e40a83ae51959ea 100644 --- a/PyTorch/contrib/cv/classification/vit_base_patch32_224/vit_train.py +++ b/PyTorch/contrib/cv/classification/vit_base_patch32_224/vit_train.py @@ -1,903 +1,903 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import argparse -import os -import random -import shutil -import time -import warnings -import torch -import numpy as np -import apex -from apex import amp -import torch.nn as nn -import torch.nn.parallel -import torch.npu -import torch.backends.cudnn as cudnn -import torch.distributed as dist -import torch.optim -import torch.multiprocessing as mp -import torch.utils.data -import torch.utils.data.distributed -import torchvision.transforms as transforms -import torchvision.datasets as datasets -import timm -from timm.utils import ApexScaler -from timm.data import create_dataset,create_loader,resolve_data_config,Mixup, FastCollateMixup, AugMixDataset -from timm.models import model_parameters, create_model -from collections import OrderedDict - -parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') -parser.add_argument('data_dir', metavar='DIR', - help='path to dataset') -parser.add_argument('--dataset', '-d', metavar='NAME', default='', - help='dataset type (default: ImageFolder/ImageTar if empty)') -parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', - help='number of data loading workers (default: 4)') -parser.add_argument('--epochs', default=90, type=int, metavar='N', - help='number of total epochs to run') -parser.add_argument('--start-epoch', default=0, type=int, metavar='N', - help='manual epoch number (useful on restarts)') -parser.add_argument('-b', '--batch-size', default=256, type=int, - metavar='N', - help='mini-batch size (default: 256), this is the total ' - 'batch size of all GPUs on the current node when ' - 'using Data Parallel or Distributed Data Parallel') -parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, - metavar='LR', help='initial learning rate', dest='lr') -parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') -parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') -parser.add_argument('-p', '--print-freq', default=10, type=int, - metavar='N', help='print frequency (default: 10)') -parser.add_argument('--resume', default='', type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', - help='evaluate model on validation set') -parser.add_argument('--pretrained', dest='pretrained', action='store_true', - help='use pre-trained model') -parser.add_argument('--world-size', default=-1, type=int, - help='number of nodes for distributed training') -parser.add_argument('--rank', default=-1, type=int, - help='node rank for distributed training') -parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, - help='url used to set up distributed training') -parser.add_argument('--dist-backend', default='nccl', type=str, - help='distributed backend') -parser.add_argument('--seed', default=None, type=int, - help='seed for initializing training. ') -parser.add_argument('--gpu', default=None, type=int, - help='GPU id to use.') -parser.add_argument('--multiprocessing-distributed', action='store_true', - help='Use multi-processing distributed training to launch ' - 'N processes per node, which has N GPUs. This is the ' - 'fastest way to use PyTorch for either single node or ' - 'multi node data parallel training') -parser.add_argument('--num-classes', default=1000, type=int, - help='The number of classes.') -## for ascend 910 -parser.add_argument('--device', default='npu', type=str, help='npu or gpu') -parser.add_argument('--addr', default='10.136.181.115', - type=str, help='master addr') -parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', - type=str, help='device id list') -parser.add_argument('--warm_up_epochs', default=0, type=int, - help='warm up') -parser.add_argument('--amp', default=False, action='store_true', - help='use amp to train the model') -parser.add_argument('--loss-scale', default=8, type=float, - help='loss scale using in amp, default -1 means dynamic') -parser.add_argument('--opt-level', default='O2', type=str, - help='loss scale using in amp, default -1 means dynamic') -parser.add_argument('--prof', default=False, action='store_true', - help='use profiling to evaluate the performance of model') - -#Additional parameters from author -parser.add_argument('--train-split', metavar='NAME', default='train', - help='dataset train split (default: train)') -parser.add_argument('--val-split', metavar='NAME', default='val', - help='dataset validation split (default: validation)') -parser.add_argument('--model', default='resnet50', type=str, metavar='MODEL', - help='Name of model to train (default: "resnet50"') -parser.add_argument('--initial-checkpoint', default='', type=str, metavar='PATH', - help='Initialize model from this checkpoint (default: none)') -parser.add_argument('--no-resume-opt', action='store_true', default=False, - help='prevent resume of optimizer state when resuming model') -parser.add_argument('--gp', default=None, type=str, metavar='POOL', - help='Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None.') -parser.add_argument('--img-size', type=int, default=None, metavar='N', - help='Image patch size (default: None => model default)') -parser.add_argument('--input-size', default=None, nargs=3, type=int, - metavar='N N N', help='Input all image dimensions (d h w, e.g. --input-size 3 224 224), uses model default if empty') -parser.add_argument('--crop-pct', default=None, type=float, - metavar='N', help='Input image center crop percent (for validation only)') -parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', - help='Override mean pixel value of dataset') -parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', - help='Override std deviation of of dataset') -parser.add_argument('--interpolation', default='', type=str, metavar='NAME', - help='Image resize interpolation type (overrides model)') -parser.add_argument('-vb', '--validation-batch-size', type=int, default=None, metavar='N', - help='validation batch size override (default: None)') - -# Optimizer parameters -parser.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER', - help='Optimizer (default: "sgd"') -parser.add_argument('--opt-eps', default=None, type=float, metavar='EPSILON', - help='Optimizer Epsilon (default: None, use opt default)') -parser.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar='BETA', - help='Optimizer Betas (default: None, use opt default)') -parser.add_argument('--clip-grad', type=float, default=None, metavar='NORM', - help='Clip gradient norm (default: None, no clipping)') -parser.add_argument('--clip-mode', type=str, default='norm', - help='Gradient clipping mode. One of ("norm", "value", "agc")') - - -# Learning rate schedule parameters -parser.add_argument('--sched', default='cosine', type=str, metavar='SCHEDULER', - help='LR scheduler (default: "step"') -parser.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct', - help='learning rate noise on/off epoch percentages') -parser.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT', - help='learning rate noise limit percent (default: 0.67)') -parser.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV', - help='learning rate noise std-dev (default: 1.0)') -parser.add_argument('--lr-cycle-mul', type=float, default=1.0, metavar='MULT', - help='learning rate cycle len multiplier (default: 1.0)') -parser.add_argument('--lr-cycle-decay', type=float, default=0.5, metavar='MULT', - help='amount to decay each learning rate cycle (default: 0.5)') -parser.add_argument('--lr-cycle-limit', type=int, default=1, metavar='N', - help='learning rate cycle limit, cycles enabled if > 1') -parser.add_argument('--lr-k-decay', type=float, default=1.0, - help='learning rate k-decay for cosine/poly (default: 1.0)') -parser.add_argument('--warmup-lr', type=float, default=0.0001, metavar='LR', - help='warmup learning rate (default: 0.0001)') -parser.add_argument('--min-lr', type=float, default=1e-6, metavar='LR', - help='lower lr bound for cyclic schedulers that hit 0 (1e-5)') -parser.add_argument('--epoch-repeats', type=float, default=0., metavar='N', - help='epoch repeat multiplier (number of times to repeat dataset epoch per train epoch).') -parser.add_argument('--decay-epochs', type=float, default=100, metavar='N', - help='epoch interval to decay LR') -parser.add_argument('--warmup-epochs', type=int, default=3, metavar='N', - help='epochs to warmup LR, if scheduler supports') -parser.add_argument('--cooldown-epochs', type=int, default=10, metavar='N', - help='epochs to cooldown LR at min_lr, after cyclic schedule ends') -parser.add_argument('--patience-epochs', type=int, default=10, metavar='N', - help='patience epochs for Plateau LR scheduler (default: 10') -parser.add_argument('--decay-rate', '--dr', type=float, default=0.1, metavar='RATE', - help='LR decay rate (default: 0.1)') - -# Augmentation & regularization parameters -parser.add_argument('--no-aug', action='store_true', default=False, - help='Disable all training augmentation, override other train aug args') -parser.add_argument('--scale', type=float, nargs='+', default=[0.08, 1.0], metavar='PCT', - help='Random resize scale (default: 0.08 1.0)') -parser.add_argument('--ratio', type=float, nargs='+', default=[3./4., 4./3.], metavar='RATIO', - help='Random resize aspect ratio (default: 0.75 1.33)') -parser.add_argument('--hflip', type=float, default=0.5, - help='Horizontal flip training aug probability') -parser.add_argument('--vflip', type=float, default=0., - help='Vertical flip training aug probability') -parser.add_argument('--color-jitter', type=float, default=0.4, metavar='PCT', - help='Color jitter factor (default: 0.4)') -parser.add_argument('--aa', type=str, default=None, metavar='NAME', - help='Use AutoAugment policy. "v0" or "original". (default: None)'), -parser.add_argument('--aug-repeats', type=int, default=0, - help='Number of augmentation repetitions (distributed training only) (default: 0)') -parser.add_argument('--aug-splits', type=int, default=0, - help='Number of augmentation splits (default: 0, valid: 0 or >=2)') -parser.add_argument('--jsd-loss', action='store_true', default=False, - help='Enable Jensen-Shannon Divergence + CE loss. Use with `--aug-splits`.') -parser.add_argument('--bce-loss', action='store_true', default=False, - help='Enable BCE loss w/ Mixup/CutMix use.') -parser.add_argument('--reprob', type=float, default=0., metavar='PCT', - help='Random erase prob (default: 0.)') -parser.add_argument('--remode', type=str, default='pixel', - help='Random erase mode (default: "pixel")') -parser.add_argument('--recount', type=int, default=1, - help='Random erase count (default: 1)') -parser.add_argument('--resplit', action='store_true', default=False, - help='Do not random erase first (clean) augmentation split') -parser.add_argument('--mixup', type=float, default=0.0, - help='mixup alpha, mixup enabled if > 0. (default: 0.)') -parser.add_argument('--cutmix', type=float, default=0.0, - help='cutmix alpha, cutmix enabled if > 0. (default: 0.)') -parser.add_argument('--cutmix-minmax', type=float, nargs='+', default=None, - help='cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None)') -parser.add_argument('--mixup-prob', type=float, default=1.0, - help='Probability of performing mixup or cutmix when either/both is enabled') -parser.add_argument('--mixup-switch-prob', type=float, default=0.5, - help='Probability of switching to cutmix when both mixup and cutmix enabled') -parser.add_argument('--mixup-mode', type=str, default='batch', - help='How to apply mixup/cutmix params. Per "batch", "pair", or "elem"') -parser.add_argument('--mixup-off-epoch', default=0, type=int, metavar='N', - help='Turn off mixup after this epoch, disabled if 0 (default: 0)') -parser.add_argument('--smoothing', type=float, default=0.1, - help='Label smoothing (default: 0.1)') -parser.add_argument('--train-interpolation', type=str, default='random', - help='Training interpolation (random, bilinear, bicubic default: "random")') -parser.add_argument('--drop', type=float, default=0.0, metavar='PCT', - help='Dropout rate (default: 0.)') -parser.add_argument('--drop-connect', type=float, default=None, metavar='PCT', - help='Drop connect rate, DEPRECATED, use drop-path (default: None)') -parser.add_argument('--drop-path', type=float, default=None, metavar='PCT', - help='Drop path rate (default: None)') -parser.add_argument('--drop-block', type=float, default=None, metavar='PCT', - help='Drop block rate (default: None)') - -# Batch norm parameters (only works with gen_efficientnet based models currently) -parser.add_argument('--bn-tf', action='store_true', default=False, - help='Use Tensorflow BatchNorm defaults for models that support it (default: False)') -parser.add_argument('--bn-momentum', type=float, default=None, - help='BatchNorm momentum override (if not None)') -parser.add_argument('--bn-eps', type=float, default=None, - help='BatchNorm epsilon override (if not None)') -parser.add_argument('--sync-bn', action='store_true', - help='Enable NVIDIA Apex or Torch synchronized BatchNorm.') -parser.add_argument('--dist-bn', type=str, default='reduce', - help='Distribute BatchNorm stats between nodes after each epoch ("broadcast", "reduce", or "")') -parser.add_argument('--split-bn', action='store_true', - help='Enable separate BN layers per augmentation split.') - -# Model Exponential Moving Average -parser.add_argument('--model-ema', action='store_true', default=False, - help='Enable tracking moving average of model weights') -parser.add_argument('--model-ema-force-cpu', action='store_true', default=False, - help='Force ema to be tracked on CPU, rank=0 node only. Disables EMA validation.') -parser.add_argument('--model-ema-decay', type=float, default=0.9998, - help='decay factor for model weights moving average (default: 0.9998)') - -# Misc -parser.add_argument('--log-interval', type=int, default=50, metavar='N', - help='how many batches to wait before logging training status') -parser.add_argument('--recovery-interval', type=int, default=0, metavar='N', - help='how many batches to wait before writing recovery checkpoint') -parser.add_argument('--checkpoint-hist', type=int, default=10, metavar='N', - help='number of checkpoints to keep (default: 10)') -parser.add_argument('--save-images', action='store_true', default=False, - help='save images of input bathes every log interval for debugging') -parser.add_argument('--apex-amp', action='store_true', default=False, - help='Use NVIDIA Apex AMP mixed precision') -parser.add_argument('--native-amp', action='store_true', default=False, - help='Use Native Torch AMP mixed precision') -parser.add_argument('--channels-last', action='store_true', default=False, - help='Use channels_last memory layout') -parser.add_argument('--pin-mem', action='store_true', default=False, - help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.') -parser.add_argument('--no-prefetcher', action='store_true', default=False, - help='disable fast prefetcher') -parser.add_argument('--output', default='', type=str, metavar='PATH', - help='path to output folder (default: none, current dir)') -parser.add_argument('--experiment', default='', type=str, metavar='NAME', - help='name of train experiment, name of sub-folder for output') -parser.add_argument('--eval-metric', default='top1', type=str, metavar='EVAL_METRIC', - help='Best metric (default: "top1"') -parser.add_argument('--tta', type=int, default=0, metavar='N', - help='Test/inference time augmentation (oversampling) factor. 0=None (default: 0)') -parser.add_argument("--local_rank", default=0, type=int) -parser.add_argument('--use-multi-epochs-loader', action='store_true', default=False, - help='use the multi-epochs-loader to save time at the beginning of every epoch') -parser.add_argument('--torchscript', dest='torchscript', action='store_true', - help='convert model torchscript for inference') -parser.add_argument('--log-wandb', action='store_true', default=False, - help='log training and validation metrics to wandb') -parser.add_argument('--model-path',type=str) -best_acc1 = 0 - - -def device_id_to_process_device_map(device_list): - devices = device_list.split(",") - devices = [int(x) for x in devices] - devices.sort() - - process_device_map = dict() - for process_id, device_id in enumerate(devices): - process_device_map[process_id] = device_id - - return process_device_map - -def proc_nodes_module(checkpoint,AttrName): - new_state_dict = OrderedDict() - for k, v in checkpoint[AttrName].items(): - if "module." in k: - name = k.replace("module.", "") - else: - name = k - new_state_dict[name] = v - return new_state_dict - - -def main(): - - args = parser.parse_args() - print(args.device_list) - - os.environ['MASTER_ADDR'] = args.addr - os.environ['MASTER_PORT'] = '29688' - - if args.seed is not None: - random.seed(args.seed) - torch.manual_seed(args.seed) - cudnn.deterministic = True - warnings.warn('You have chosen to seed training. ' - 'This will turn on the CUDNN deterministic setting, ' - 'which can slow down your training considerably! ' - 'You may see unexpected behavior when restarting ' - 'from checkpoints.') - - if args.gpu is not None: - warnings.warn('You have chosen a specific GPU. This will completely ' - 'disable data parallelism.') - - if args.dist_url == "env://" and args.world_size == -1: - args.world_size = int(os.environ["WORLD_SIZE"]) - - args.distributed = args.world_size > 1 or args.multiprocessing_distributed - - args.process_device_map = device_id_to_process_device_map(args.device_list) - - if args.device == 'npu': - ngpus_per_node = len(args.process_device_map) - else: - if args.distributed: - ngpus_per_node = torch.cuda.device_count() - else: - ngpus_per_node = 1 - print('ngpus_per_node:', ngpus_per_node) - if args.multiprocessing_distributed: - # Since we have ngpus_per_node processes per node, the total world_size - # needs to be adjusted accordingly - args.world_size = ngpus_per_node * args.world_size - # Use torch.multiprocessing.spawn to launch distributed processes: the - # main_worker process function - mp.spawn(main_worker, nprocs=ngpus_per_node, - args=(ngpus_per_node, args)) - else: - # Simply call main_worker function - main_worker(args.gpu, ngpus_per_node, args) - - -def main_worker(gpu, ngpus_per_node, args): - global best_acc1 - args.gpu = args.process_device_map[gpu] - - if args.gpu is not None: - print("Use GPU: {} for training".format(args.gpu)) - print("Is distributed train available?",dist.is_available()) - if args.distributed: - if args.dist_url == "env://" and args.rank == -1: - print("According to url") - args.rank = int(os.environ["RANK"]) - print("endif") - if args.multiprocessing_distributed: - # For multiprocessing distributed training, rank needs to be the - # global rank among all the processes - args.rank = args.rank * ngpus_per_node + gpu - - if args.device == 'npu': - print("enter npu dis") - dist.init_process_group(backend=args.dist_backend, # init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - print("leave npu dist") - else: - dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - # create model - if args.pretrained: - print("=> using pre-trained model vit_base_patch32") - model = create_model( - "vit_base_patch32_224_in21k", - pretrained=args.pretrained, - num_classes=1000, - drop_rate=args.drop, - drop_connect_rate=args.drop_connect, # DEPRECATED, use drop_path - drop_path_rate=args.drop_path, - drop_block_rate=args.drop_block, - global_pool=args.gp, - bn_tf=args.bn_tf, - bn_momentum=args.bn_momentum, - bn_eps=args.bn_eps, - scriptable=args.torchscript, - checkpoint_path=args.initial_checkpoint) - data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0) - #model = timm.create_model("vit_base_patch32_224_in21k",pretrained=True,num_classes=1000) - #print("loading model of yours...") - #pretrained_dict = torch.load("./model_best.pth.tar", map_location="cpu")["state_dict"] - #model.load_state_dict({k.replace('module.',''):v for k, v in pretrained_dict.items()}) - #if "fc.weight" in pretrained_dict: - # pretrained_dict.pop('fc.weight') - # pretrained_dict.pop('fc.bias') - #model.load_state_dict(pretrained_dict, strict=False) - else: - print("=> creating model vit_base_patch32") - model = model = timm.create_model("vit_base_patch32_224",num_classes=1000) - pretrained_dict = torch.load(args.model_path, map_location="cpu") - pretrained_dict['state_dict']=proc_nodes_module(pretrained_dict,'state_dict') - model.load_state_dict(pretrained_dict['state_dict']) - model.head = nn.Linear(768, args.num_classes) if args.num_classes > 0 else nn.Identity() - - - if args.distributed: - # For multiprocessing distributed, DistributedDataParallel constructor - # should always set the single device scope, otherwise, - # DistributedDataParallel will use all available devices. - if args.gpu is not None: - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - torch.npu.set_device(loc) - model = model.to(loc) - else: - torch.cuda.set_device(args.gpu) - model.cuda(args.gpu) - - # When using a single GPU per process and per - # DistributedDataParallel, we need to divide the batch size - # ourselves based on the total number of GPUs we have - args.batch_size = int(args.batch_size / args.world_size) - args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) - else: - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - model = model.to(loc) - else: - model.cuda() - # DistributedDataParallel will divide and allocate batch_size to all - # available GPUs if device_ids are not set - print("[gpu id:", args.gpu, "]", - "============================test args.gpu is not None else==========================") - elif args.gpu is not None: - print("[gpu id:", args.gpu, "]", - "============================test elif args.gpu is not None:==========================") - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - torch.npu.set_device(args.gpu) - model = model.to(loc) - else: - torch.cuda.set_device(args.gpu) - model = model.cuda(args.gpu) - - else: - # DataParallel will divide and allocate batch_size to all available GPUs - print("[gpu id:", args.gpu, "]", "============================test 1==========================") - print("[gpu id:", args.gpu, "]", "============================test 3==========================") - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - else: - print("before : model = torch.nn.DataParallel(model).cuda()") - - # define loss function (criterion) and optimizer - #optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, - # momentum=args.momentum, - # weight_decay=args.weight_decay,nesterov=True) - optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), args.lr, - momentum=args.momentum, - weight_decay=args.weight_decay) - - - if args.amp: - model, optimizer = amp.initialize( - model, optimizer, opt_level='O1',combine_grad=True) - loss_scaler=ApexScaler() - - if args.distributed: - # For multiprocessing distributed, DistributedDataParallel constructor - # should always set the single device scope, otherwise, - # DistributedDataParallel will use all available devices. - if args.gpu is not None: - # When using a single GPU per process and per - # DistributedDataParallel, we need to divide the batch size - # ourselves based on the total number of GPUs we have - if args.pretrained: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False, - find_unused_parameters=True) - else: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False) - else: - print("[gpu id:", args.gpu, "]", - "============================test args.gpu is not None else==========================") - model = torch.nn.parallel.DistributedDataParallel(model) - elif args.gpu is not None: - print("[gpu id:", args.gpu, "]", - "============================test elif args.gpu is not None:==========================") - else: - # DataParallel will divide and allocate batch_size to all available GPUs - print("[gpu id:", args.gpu, "]", "============================test 1==========================") - print("[gpu id:", args.gpu, "]", "============================test 3==========================") - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - model = torch.nn.DataParallel(model).to(loc) - else: - model = torch.nn.DataParallel(model).cuda() - - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - criterion = nn.CrossEntropyLoss().to(loc) - else: - criterion = nn.CrossEntropyLoss().cuda(args.gpu) - - # optionally resume from a checkpoint - if args.resume: - if os.path.isfile(args.resume): - print("=> loading checkpoint '{}'".format(args.resume)) - if args.gpu is None: - checkpoint = torch.load(args.resume) - else: - # Map model to be loaded to specified single gpu. - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - else: - loc = 'cuda:{}'.format(args.gpu) - checkpoint = torch.load(args.resume, map_location=loc) - args.start_epoch = checkpoint['epoch'] - best_acc1 = checkpoint['best_acc1'] - if args.gpu is not None: - # best_acc1 may be from a checkpoint from a different GPU - best_acc1 = best_acc1.to(args.gpu) - model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - if args.amp: - amp.load_state_dict(checkpoint['amp']) - print("=> loaded checkpoint '{}' (epoch {})" - .format(args.resume, checkpoint['epoch'])) - else: - print("=> no checkpoint found at '{}'".format(args.resume)) - - cudnn.benchmark = True - - # Data loading code - traindir = os.path.join(args.data_dir, 'train') - valdir = os.path.join(args.data_dir, 'val') - normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], - std=[0.5, 0.5, 0.5]) - - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224,interpolation=3), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset) - else: - train_sampler = None - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, shuffle=( - train_sampler is None), - num_workers=args.workers, pin_memory=False, sampler=train_sampler, drop_last=True) - - val_loader = torch.utils.data.DataLoader( - datasets.ImageFolder(valdir, transforms.Compose([ - transforms.Resize(256,interpolation=3), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ])), - batch_size=args.batch_size, shuffle=True, - num_workers=args.workers, pin_memory=False, drop_last=True) - if args.evaluate: - validate(val_loader, model, criterion, args, ngpus_per_node) - return - - if args.prof: - profiling(train_loader, model, criterion, optimizer, args) - return - - start_time = time.time() - for epoch in range(args.start_epoch, args.epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - # if args.distributed and hasattr(train_loader.sampler, 'set_epoch'): - # train_loader.sampler.set_epoch(epoch) - - adjust_learning_rate(optimizer, epoch, args) - - # train for one epoch - train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node,loss_scaler) - - # evaluate on validation set - acc1 = validate(val_loader, model, criterion, args, ngpus_per_node) - - # remember best acc@1 and save checkpoint - is_best = acc1 > best_acc1 - best_acc1 = max(acc1, best_acc1) - if args.device == 'npu' and args.gpu == 0 and epoch == 89: - print("Complete 90 epoch training, take time:{}h".format(round((time.time() - start_time) / 3600.0, 2))) - - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - - ############## npu modify begin ############# - if args.amp: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': 'wide_resnet50_2', - 'state_dict': model.state_dict(), - 'best_acc1': best_acc1, - 'optimizer': optimizer.state_dict(), - 'amp': amp.state_dict(), - }, is_best) - else: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': 'wide_resnet50_2', - 'state_dict': model.state_dict(), - 'best_acc1': best_acc1, - 'optimizer': optimizer.state_dict(), - }, is_best) - ############## npu modify end ############# - - -def profiling(data_loader, model, criterion, optimizer, args): - # switch to train mode - model.train() - - def update(model, images, target, optimizer): - output = model(images) - loss = criterion(output, target) - if args.amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.zero_grad() - optimizer.step() - - for step, (images, target) in enumerate(data_loader): - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - images = images.to(loc, non_blocking=True).to(torch.float) - target = target.to(torch.int32).to(loc, non_blocking=True) - else: - images = images.cuda(args.gpu, non_blocking=True) - target = target.cuda(args.gpu, non_blocking=True) - - if step < 5: - update(model, images, target, optimizer) - else: - if args.device == 'npu': - with torch.autograd.profiler.profile(use_npu=True) as prof: - update(model, images, target, optimizer) - else: - with torch.autograd.profiler.profile(use_cuda=True) as prof: - update(model, images, target, optimizer) - break - - prof.export_chrome_trace("output.prof") - - -def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node,loss_scaler): - batch_time = AverageMeter('Time', ':6.3f') - data_time = AverageMeter('Data', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(train_loader), - [batch_time, data_time, losses, top1, top5], - prefix="Epoch: [{}]".format(epoch)) - second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order - # switch to train mode - model.train() - - end = time.time() - for i, (images, target) in enumerate(train_loader): - # measure data loading time - data_time.update(time.time() - end) - - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - images = images.to(loc, non_blocking=True).to(torch.float) - target = target.to(torch.int32).to(loc, non_blocking=True) - else: - images = images.cuda(args.gpu, non_blocking=True) - target = target.cuda(args.gpu, non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # compute gradient and do SGD step - optimizer.zero_grad() - if args.amp: - loss_scaler( - loss, optimizer, - clip_grad=args.clip_grad, clip_mode=args.clip_mode, - parameters=model_parameters(model, exclude_head='agc' in args.clip_mode), - create_graph=second_order) - else: - loss.backward() - optimizer.step() - if args.device == 'npu': - torch.npu.synchronize() - - # measure elapsed time - cost_time = time.time() - end - batch_time.update(cost_time) - end = time.time() - - if i % args.print_freq == 0: - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - progress.display(i) - - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - if batch_time.avg: - print("[npu id:", args.gpu, "]", "batch_size:", args.world_size * args.batch_size, - 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( - args.batch_size * args.world_size / batch_time.avg)) - - -def validate(val_loader, model, criterion, args, ngpus_per_node): - batch_time = AverageMeter('Time', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(val_loader), - [batch_time, losses, top1, top5], - prefix='Test: ') - - # switch to evaluate mode - model.eval() - - with torch.no_grad(): - end = time.time() - for i, (images, target) in enumerate(val_loader): - if args.gpu is not None: - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - images = images.to(loc).to(torch.float) - else: - images = images.cuda(args.gpu, non_blocking=True) - if args.device == 'npu': - loc = 'npu:{}'.format(args.gpu) - target = target.to(torch.int32).to(loc, non_blocking=True) - else: - target = target.cuda(args.gpu, non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # measure elapsed time - cost_time = time.time() - end - batch_time.update(cost_time) - end = time.time() - - if i % args.print_freq == 0: - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - progress.display(i) - - if i % args.print_freq == 0: - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - print("[gpu id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' - .format(top1=top1, top5=top5)) - - return top1.avg - - -def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): - torch.save(state, filename) - if is_best: - shutil.copyfile(filename, 'model_best.pth.tar') - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self, name, fmt=':f', start_count_index=2): - self.name = name - self.fmt = fmt - self.reset() - self.start_count_index = start_count_index - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - if self.count == 0: - self.N = n - - self.val = val - self.count += n - if self.count > (self.start_count_index * self.N): - self.sum += val * n - self.avg = self.sum / (self.count - self.start_count_index * self.N) - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - - -class ProgressMeter(object): - - def __init__(self, num_batches, meters, prefix=""): - self.batch_fmtstr = self._get_batch_fmtstr(num_batches) - self.meters = meters - self.prefix = prefix - - def display(self, batch): - entries = [self.prefix + self.batch_fmtstr.format(batch)] - entries += [str(meter) for meter in self.meters] - print('\t'.join(entries)) - - def _get_batch_fmtstr(self, num_batches): - num_digits = len(str(num_batches // 1)) - fmt = '{:' + str(num_digits) + 'd}' - return '[' + fmt + '/' + fmt.format(num_batches) + ']' - - -def adjust_learning_rate(optimizer, epoch, args): - """Sets the learning rate to the initial LR decayed by cosine method""" - - if args.warm_up_epochs > 0 and epoch < args.warm_up_epochs: - lr = args.lr * ((epoch + 1) / (args.warm_up_epochs + 1)) - else: - alpha = 0 - cosine_decay = 0.5 * ( - 1 + np.cos(np.pi * (epoch - args.warm_up_epochs) / (args.epochs - args.warm_up_epochs))) - decayed = (1 - alpha) * cosine_decay + alpha - lr = args.lr * decayed - - print("=> Epoch[%d] Setting lr: %.4f" % (epoch, lr)) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - - -if __name__ == '__main__': - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import argparse +import os +import random +import shutil +import time +import warnings +import torch +import numpy as np +import apex +from apex import amp +import torch.nn as nn +import torch.nn.parallel +import torch.npu +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.optim +import torch.multiprocessing as mp +import torch.utils.data +import torch.utils.data.distributed +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import timm +from timm.utils import ApexScaler +from timm.data import create_dataset,create_loader,resolve_data_config,Mixup, FastCollateMixup, AugMixDataset +from timm.models import model_parameters, create_model +from collections import OrderedDict + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument('data_dir', metavar='DIR', + help='path to dataset') +parser.add_argument('--dataset', '-d', metavar='NAME', default='', + help='dataset type (default: ImageFolder/ImageTar if empty)') +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('--epochs', default=90, type=int, metavar='N', + help='number of total epochs to run') +parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('-b', '--batch-size', default=256, type=int, + metavar='N', + help='mini-batch size (default: 256), this is the total ' + 'batch size of all GPUs on the current node when ' + 'using Data Parallel or Distributed Data Parallel') +parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, + metavar='LR', help='initial learning rate', dest='lr') +parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') +parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') +parser.add_argument('-p', '--print-freq', default=10, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', + help='evaluate model on validation set') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--world-size', default=-1, type=int, + help='number of nodes for distributed training') +parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') +parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') +parser.add_argument('--seed', default=None, type=int, + help='seed for initializing training. ') +parser.add_argument('--gpu', default=None, type=int, + help='GPU id to use.') +parser.add_argument('--multiprocessing-distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') +parser.add_argument('--num-classes', default=1000, type=int, + help='The number of classes.') +## for ascend 910 +parser.add_argument('--device', default='npu', type=str, help='npu or gpu') +parser.add_argument('--addr', default='10.136.181.115', + type=str, help='master addr') +parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', + type=str, help='device id list') +parser.add_argument('--warm_up_epochs', default=0, type=int, + help='warm up') +parser.add_argument('--amp', default=False, action='store_true', + help='use amp to train the model') +parser.add_argument('--loss-scale', default=8, type=float, + help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--opt-level', default='O2', type=str, + help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--prof', default=False, action='store_true', + help='use profiling to evaluate the performance of model') + +#Additional parameters from author +parser.add_argument('--train-split', metavar='NAME', default='train', + help='dataset train split (default: train)') +parser.add_argument('--val-split', metavar='NAME', default='val', + help='dataset validation split (default: validation)') +parser.add_argument('--model', default='resnet50', type=str, metavar='MODEL', + help='Name of model to train (default: "resnet50"') +parser.add_argument('--initial-checkpoint', default='', type=str, metavar='PATH', + help='Initialize model from this checkpoint (default: none)') +parser.add_argument('--no-resume-opt', action='store_true', default=False, + help='prevent resume of optimizer state when resuming model') +parser.add_argument('--gp', default=None, type=str, metavar='POOL', + help='Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None.') +parser.add_argument('--img-size', type=int, default=None, metavar='N', + help='Image patch size (default: None => model default)') +parser.add_argument('--input-size', default=None, nargs=3, type=int, + metavar='N N N', help='Input all image dimensions (d h w, e.g. --input-size 3 224 224), uses model default if empty') +parser.add_argument('--crop-pct', default=None, type=float, + metavar='N', help='Input image center crop percent (for validation only)') +parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', + help='Override mean pixel value of dataset') +parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', + help='Override std deviation of of dataset') +parser.add_argument('--interpolation', default='', type=str, metavar='NAME', + help='Image resize interpolation type (overrides model)') +parser.add_argument('-vb', '--validation-batch-size', type=int, default=None, metavar='N', + help='validation batch size override (default: None)') + +# Optimizer parameters +parser.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER', + help='Optimizer (default: "sgd"') +parser.add_argument('--opt-eps', default=None, type=float, metavar='EPSILON', + help='Optimizer Epsilon (default: None, use opt default)') +parser.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar='BETA', + help='Optimizer Betas (default: None, use opt default)') +parser.add_argument('--clip-grad', type=float, default=None, metavar='NORM', + help='Clip gradient norm (default: None, no clipping)') +parser.add_argument('--clip-mode', type=str, default='norm', + help='Gradient clipping mode. One of ("norm", "value", "agc")') + + +# Learning rate schedule parameters +parser.add_argument('--sched', default='cosine', type=str, metavar='SCHEDULER', + help='LR scheduler (default: "step"') +parser.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct', + help='learning rate noise on/off epoch percentages') +parser.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT', + help='learning rate noise limit percent (default: 0.67)') +parser.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV', + help='learning rate noise std-dev (default: 1.0)') +parser.add_argument('--lr-cycle-mul', type=float, default=1.0, metavar='MULT', + help='learning rate cycle len multiplier (default: 1.0)') +parser.add_argument('--lr-cycle-decay', type=float, default=0.5, metavar='MULT', + help='amount to decay each learning rate cycle (default: 0.5)') +parser.add_argument('--lr-cycle-limit', type=int, default=1, metavar='N', + help='learning rate cycle limit, cycles enabled if > 1') +parser.add_argument('--lr-k-decay', type=float, default=1.0, + help='learning rate k-decay for cosine/poly (default: 1.0)') +parser.add_argument('--warmup-lr', type=float, default=0.0001, metavar='LR', + help='warmup learning rate (default: 0.0001)') +parser.add_argument('--min-lr', type=float, default=1e-6, metavar='LR', + help='lower lr bound for cyclic schedulers that hit 0 (1e-5)') +parser.add_argument('--epoch-repeats', type=float, default=0., metavar='N', + help='epoch repeat multiplier (number of times to repeat dataset epoch per train epoch).') +parser.add_argument('--decay-epochs', type=float, default=100, metavar='N', + help='epoch interval to decay LR') +parser.add_argument('--warmup-epochs', type=int, default=3, metavar='N', + help='epochs to warmup LR, if scheduler supports') +parser.add_argument('--cooldown-epochs', type=int, default=10, metavar='N', + help='epochs to cooldown LR at min_lr, after cyclic schedule ends') +parser.add_argument('--patience-epochs', type=int, default=10, metavar='N', + help='patience epochs for Plateau LR scheduler (default: 10') +parser.add_argument('--decay-rate', '--dr', type=float, default=0.1, metavar='RATE', + help='LR decay rate (default: 0.1)') + +# Augmentation & regularization parameters +parser.add_argument('--no-aug', action='store_true', default=False, + help='Disable all training augmentation, override other train aug args') +parser.add_argument('--scale', type=float, nargs='+', default=[0.08, 1.0], metavar='PCT', + help='Random resize scale (default: 0.08 1.0)') +parser.add_argument('--ratio', type=float, nargs='+', default=[3./4., 4./3.], metavar='RATIO', + help='Random resize aspect ratio (default: 0.75 1.33)') +parser.add_argument('--hflip', type=float, default=0.5, + help='Horizontal flip training aug probability') +parser.add_argument('--vflip', type=float, default=0., + help='Vertical flip training aug probability') +parser.add_argument('--color-jitter', type=float, default=0.4, metavar='PCT', + help='Color jitter factor (default: 0.4)') +parser.add_argument('--aa', type=str, default=None, metavar='NAME', + help='Use AutoAugment policy. "v0" or "original". (default: None)'), +parser.add_argument('--aug-repeats', type=int, default=0, + help='Number of augmentation repetitions (distributed training only) (default: 0)') +parser.add_argument('--aug-splits', type=int, default=0, + help='Number of augmentation splits (default: 0, valid: 0 or >=2)') +parser.add_argument('--jsd-loss', action='store_true', default=False, + help='Enable Jensen-Shannon Divergence + CE loss. Use with `--aug-splits`.') +parser.add_argument('--bce-loss', action='store_true', default=False, + help='Enable BCE loss w/ Mixup/CutMix use.') +parser.add_argument('--reprob', type=float, default=0., metavar='PCT', + help='Random erase prob (default: 0.)') +parser.add_argument('--remode', type=str, default='pixel', + help='Random erase mode (default: "pixel")') +parser.add_argument('--recount', type=int, default=1, + help='Random erase count (default: 1)') +parser.add_argument('--resplit', action='store_true', default=False, + help='Do not random erase first (clean) augmentation split') +parser.add_argument('--mixup', type=float, default=0.0, + help='mixup alpha, mixup enabled if > 0. (default: 0.)') +parser.add_argument('--cutmix', type=float, default=0.0, + help='cutmix alpha, cutmix enabled if > 0. (default: 0.)') +parser.add_argument('--cutmix-minmax', type=float, nargs='+', default=None, + help='cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None)') +parser.add_argument('--mixup-prob', type=float, default=1.0, + help='Probability of performing mixup or cutmix when either/both is enabled') +parser.add_argument('--mixup-switch-prob', type=float, default=0.5, + help='Probability of switching to cutmix when both mixup and cutmix enabled') +parser.add_argument('--mixup-mode', type=str, default='batch', + help='How to apply mixup/cutmix params. Per "batch", "pair", or "elem"') +parser.add_argument('--mixup-off-epoch', default=0, type=int, metavar='N', + help='Turn off mixup after this epoch, disabled if 0 (default: 0)') +parser.add_argument('--smoothing', type=float, default=0.1, + help='Label smoothing (default: 0.1)') +parser.add_argument('--train-interpolation', type=str, default='random', + help='Training interpolation (random, bilinear, bicubic default: "random")') +parser.add_argument('--drop', type=float, default=0.0, metavar='PCT', + help='Dropout rate (default: 0.)') +parser.add_argument('--drop-connect', type=float, default=None, metavar='PCT', + help='Drop connect rate, DEPRECATED, use drop-path (default: None)') +parser.add_argument('--drop-path', type=float, default=None, metavar='PCT', + help='Drop path rate (default: None)') +parser.add_argument('--drop-block', type=float, default=None, metavar='PCT', + help='Drop block rate (default: None)') + +# Batch norm parameters (only works with gen_efficientnet based models currently) +parser.add_argument('--bn-tf', action='store_true', default=False, + help='Use Tensorflow BatchNorm defaults for models that support it (default: False)') +parser.add_argument('--bn-momentum', type=float, default=None, + help='BatchNorm momentum override (if not None)') +parser.add_argument('--bn-eps', type=float, default=None, + help='BatchNorm epsilon override (if not None)') +parser.add_argument('--sync-bn', action='store_true', + help='Enable NVIDIA Apex or Torch synchronized BatchNorm.') +parser.add_argument('--dist-bn', type=str, default='reduce', + help='Distribute BatchNorm stats between nodes after each epoch ("broadcast", "reduce", or "")') +parser.add_argument('--split-bn', action='store_true', + help='Enable separate BN layers per augmentation split.') + +# Model Exponential Moving Average +parser.add_argument('--model-ema', action='store_true', default=False, + help='Enable tracking moving average of model weights') +parser.add_argument('--model-ema-force-cpu', action='store_true', default=False, + help='Force ema to be tracked on CPU, rank=0 node only. Disables EMA validation.') +parser.add_argument('--model-ema-decay', type=float, default=0.9998, + help='decay factor for model weights moving average (default: 0.9998)') + +# Misc +parser.add_argument('--log-interval', type=int, default=50, metavar='N', + help='how many batches to wait before logging training status') +parser.add_argument('--recovery-interval', type=int, default=0, metavar='N', + help='how many batches to wait before writing recovery checkpoint') +parser.add_argument('--checkpoint-hist', type=int, default=10, metavar='N', + help='number of checkpoints to keep (default: 10)') +parser.add_argument('--save-images', action='store_true', default=False, + help='save images of input bathes every log interval for debugging') +parser.add_argument('--apex-amp', action='store_true', default=False, + help='Use NVIDIA Apex AMP mixed precision') +parser.add_argument('--native-amp', action='store_true', default=False, + help='Use Native Torch AMP mixed precision') +parser.add_argument('--channels-last', action='store_true', default=False, + help='Use channels_last memory layout') +parser.add_argument('--pin-mem', action='store_true', default=False, + help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.') +parser.add_argument('--no-prefetcher', action='store_true', default=False, + help='disable fast prefetcher') +parser.add_argument('--output', default='', type=str, metavar='PATH', + help='path to output folder (default: none, current dir)') +parser.add_argument('--experiment', default='', type=str, metavar='NAME', + help='name of train experiment, name of sub-folder for output') +parser.add_argument('--eval-metric', default='top1', type=str, metavar='EVAL_METRIC', + help='Best metric (default: "top1"') +parser.add_argument('--tta', type=int, default=0, metavar='N', + help='Test/inference time augmentation (oversampling) factor. 0=None (default: 0)') +parser.add_argument("--local_rank", default=0, type=int) +parser.add_argument('--use-multi-epochs-loader', action='store_true', default=False, + help='use the multi-epochs-loader to save time at the beginning of every epoch') +parser.add_argument('--torchscript', dest='torchscript', action='store_true', + help='convert model torchscript for inference') +parser.add_argument('--log-wandb', action='store_true', default=False, + help='log training and validation metrics to wandb') +parser.add_argument('--model-path',type=str) +best_acc1 = 0 + + +def device_id_to_process_device_map(device_list): + devices = device_list.split(",") + devices = [int(x) for x in devices] + devices.sort() + + process_device_map = dict() + for process_id, device_id in enumerate(devices): + process_device_map[process_id] = device_id + + return process_device_map + +def proc_nodes_module(checkpoint,AttrName): + new_state_dict = OrderedDict() + for k, v in checkpoint[AttrName].items(): + if "module." in k: + name = k.replace("module.", "") + else: + name = k + new_state_dict[name] = v + return new_state_dict + + +def main(): + + args = parser.parse_args() + print(args.device_list) + + os.environ['MASTER_ADDR'] = args.addr + os.environ['MASTER_PORT'] = '29688' + + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn('You have chosen to seed training. ' + 'This will turn on the CUDNN deterministic setting, ' + 'which can slow down your training considerably! ' + 'You may see unexpected behavior when restarting ' + 'from checkpoints.') + + if args.gpu is not None: + warnings.warn('You have chosen a specific GPU. This will completely ' + 'disable data parallelism.') + + if args.dist_url == "env://" and args.world_size == -1: + args.world_size = int(os.environ["WORLD_SIZE"]) + + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + + args.process_device_map = device_id_to_process_device_map(args.device_list) + + if args.device == 'npu': + ngpus_per_node = len(args.process_device_map) + else: + if args.distributed: + ngpus_per_node = torch.cuda.device_count() + else: + ngpus_per_node = 1 + print('ngpus_per_node:', ngpus_per_node) + if args.multiprocessing_distributed: + # Since we have ngpus_per_node processes per node, the total world_size + # needs to be adjusted accordingly + args.world_size = ngpus_per_node * args.world_size + # Use torch.multiprocessing.spawn to launch distributed processes: the + # main_worker process function + mp.spawn(main_worker, nprocs=ngpus_per_node, + args=(ngpus_per_node, args)) + else: + # Simply call main_worker function + main_worker(args.gpu, ngpus_per_node, args) + + +def main_worker(gpu, ngpus_per_node, args): + global best_acc1 + args.gpu = args.process_device_map[gpu] + + if args.gpu is not None: + print("Use GPU: {} for training".format(args.gpu)) + print("Is distributed train available?",dist.is_available()) + if args.distributed: + if args.dist_url == "env://" and args.rank == -1: + print("According to url") + args.rank = int(os.environ["RANK"]) + print("endif") + if args.multiprocessing_distributed: + # For multiprocessing distributed training, rank needs to be the + # global rank among all the processes + args.rank = args.rank * ngpus_per_node + gpu + + if args.device == 'npu': + print("enter npu dis") + dist.init_process_group(backend=args.dist_backend, # init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + print("leave npu dist") + else: + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + # create model + if args.pretrained: + print("=> using pre-trained model vit_base_patch32") + model = create_model( + "vit_base_patch32_224_in21k", + pretrained=args.pretrained, + num_classes=1000, + drop_rate=args.drop, + drop_connect_rate=args.drop_connect, # DEPRECATED, use drop_path + drop_path_rate=args.drop_path, + drop_block_rate=args.drop_block, + global_pool=args.gp, + bn_tf=args.bn_tf, + bn_momentum=args.bn_momentum, + bn_eps=args.bn_eps, + scriptable=args.torchscript, + checkpoint_path=args.initial_checkpoint) + data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0) + #model = timm.create_model("vit_base_patch32_224_in21k",pretrained=True,num_classes=1000) + #print("loading model of yours...") + #pretrained_dict = torch.load("./model_best.pth.tar", map_location="cpu")["state_dict"] + #model.load_state_dict({k.replace('module.',''):v for k, v in pretrained_dict.items()}) + #if "fc.weight" in pretrained_dict: + # pretrained_dict.pop('fc.weight') + # pretrained_dict.pop('fc.bias') + #model.load_state_dict(pretrained_dict, strict=False) + else: + print("=> creating model vit_base_patch32") + model = model = timm.create_model("vit_base_patch32_224",num_classes=1000) + pretrained_dict = torch.load(args.model_path, map_location="cpu") + pretrained_dict['state_dict']=proc_nodes_module(pretrained_dict,'state_dict') + model.load_state_dict(pretrained_dict['state_dict']) + model.head = nn.Linear(768, args.num_classes) if args.num_classes > 0 else nn.Identity() + + + if args.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + if args.gpu is not None: + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + torch.npu.set_device(loc) + model = model.to(loc) + else: + torch.cuda.set_device(args.gpu) + model.cuda(args.gpu) + + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs we have + args.batch_size = int(args.batch_size / args.world_size) + args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) + else: + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + model = model.to(loc) + else: + model.cuda() + # DistributedDataParallel will divide and allocate batch_size to all + # available GPUs if device_ids are not set + print("[gpu id:", args.gpu, "]", + "============================test args.gpu is not None else==========================") + elif args.gpu is not None: + print("[gpu id:", args.gpu, "]", + "============================test elif args.gpu is not None:==========================") + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + torch.npu.set_device(args.gpu) + model = model.to(loc) + else: + torch.cuda.set_device(args.gpu) + model = model.cuda(args.gpu) + + else: + # DataParallel will divide and allocate batch_size to all available GPUs + print("[gpu id:", args.gpu, "]", "============================test 1==========================") + print("[gpu id:", args.gpu, "]", "============================test 3==========================") + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + else: + print("before : model = torch.nn.DataParallel(model).cuda()") + + # define loss function (criterion) and optimizer + #optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, + # momentum=args.momentum, + # weight_decay=args.weight_decay,nesterov=True) + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay) + + + if args.amp: + model, optimizer = amp.initialize( + model, optimizer, opt_level='O1',combine_grad=True) + loss_scaler=ApexScaler() + + if args.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + if args.gpu is not None: + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs we have + if args.pretrained: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False, + find_unused_parameters=True) + else: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False) + else: + print("[gpu id:", args.gpu, "]", + "============================test args.gpu is not None else==========================") + model = torch.nn.parallel.DistributedDataParallel(model) + elif args.gpu is not None: + print("[gpu id:", args.gpu, "]", + "============================test elif args.gpu is not None:==========================") + else: + # DataParallel will divide and allocate batch_size to all available GPUs + print("[gpu id:", args.gpu, "]", "============================test 1==========================") + print("[gpu id:", args.gpu, "]", "============================test 3==========================") + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + model = torch.nn.DataParallel(model).to(loc) + else: + model = torch.nn.DataParallel(model).cuda() + + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + criterion = nn.CrossEntropyLoss().to(loc) + else: + criterion = nn.CrossEntropyLoss().cuda(args.gpu) + + # optionally resume from a checkpoint + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + if args.gpu is None: + checkpoint = torch.load(args.resume) + else: + # Map model to be loaded to specified single gpu. + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + else: + loc = 'cuda:{}'.format(args.gpu) + checkpoint = torch.load(args.resume, map_location=loc) + args.start_epoch = checkpoint['epoch'] + best_acc1 = checkpoint['best_acc1'] + if args.gpu is not None: + # best_acc1 may be from a checkpoint from a different GPU + best_acc1 = best_acc1.to(args.gpu) + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + if args.amp: + amp.load_state_dict(checkpoint['amp']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(args.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + + cudnn.benchmark = True + + # Data loading code + traindir = os.path.join(args.data_dir, 'train') + valdir = os.path.join(args.data_dir, 'val') + normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224,interpolation=3), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset) + else: + train_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, shuffle=( + train_sampler is None), + num_workers=args.workers, pin_memory=False, sampler=train_sampler, drop_last=True) + + val_loader = torch.utils.data.DataLoader( + datasets.ImageFolder(valdir, transforms.Compose([ + transforms.Resize(256,interpolation=3), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ])), + batch_size=args.batch_size, shuffle=True, + num_workers=args.workers, pin_memory=False, drop_last=True) + if args.evaluate: + validate(val_loader, model, criterion, args, ngpus_per_node) + return + + if args.prof: + profiling(train_loader, model, criterion, optimizer, args) + return + + start_time = time.time() + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + # if args.distributed and hasattr(train_loader.sampler, 'set_epoch'): + # train_loader.sampler.set_epoch(epoch) + + adjust_learning_rate(optimizer, epoch, args) + + # train for one epoch + train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node,loss_scaler) + + # evaluate on validation set + acc1 = validate(val_loader, model, criterion, args, ngpus_per_node) + + # remember best acc@1 and save checkpoint + is_best = acc1 > best_acc1 + best_acc1 = max(acc1, best_acc1) + if args.device == 'npu' and args.gpu == 0 and epoch == 89: + print("Complete 90 epoch training, take time:{}h".format(round((time.time() - start_time) / 3600.0, 2))) + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + + ############## npu modify begin ############# + if args.amp: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': 'wide_resnet50_2', + 'state_dict': model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer': optimizer.state_dict(), + 'amp': amp.state_dict(), + }, is_best) + else: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': 'wide_resnet50_2', + 'state_dict': model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer': optimizer.state_dict(), + }, is_best) + ############## npu modify end ############# + + +def profiling(data_loader, model, criterion, optimizer, args): + # switch to train mode + model.train() + + def update(model, images, target, optimizer): + output = model(images) + loss = criterion(output, target) + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.zero_grad() + optimizer.step() + + for step, (images, target) in enumerate(data_loader): + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + images = images.to(loc, non_blocking=True).to(torch.float) + target = target.to(torch.int32).to(loc, non_blocking=True) + else: + images = images.cuda(args.gpu, non_blocking=True) + target = target.cuda(args.gpu, non_blocking=True) + + if step < 5: + update(model, images, target, optimizer) + else: + if args.device == 'npu': + with torch.autograd.profiler.profile(use_npu=True) as prof: + update(model, images, target, optimizer) + else: + with torch.autograd.profiler.profile(use_cuda=True) as prof: + update(model, images, target, optimizer) + break + + prof.export_chrome_trace("output.prof") + + +def train(train_loader, model, criterion, optimizer, epoch, args, ngpus_per_node,loss_scaler): + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(train_loader), + [batch_time, data_time, losses, top1, top5], + prefix="Epoch: [{}]".format(epoch)) + second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order + # switch to train mode + model.train() + + end = time.time() + for i, (images, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + images = images.to(loc, non_blocking=True).to(torch.float) + target = target.to(torch.int32).to(loc, non_blocking=True) + else: + images = images.cuda(args.gpu, non_blocking=True) + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # compute gradient and do SGD step + optimizer.zero_grad() + if args.amp: + loss_scaler( + loss, optimizer, + clip_grad=args.clip_grad, clip_mode=args.clip_mode, + parameters=model_parameters(model, exclude_head='agc' in args.clip_mode), + create_graph=second_order) + else: + loss.backward() + optimizer.step() + if args.device == 'npu': + torch.npu.synchronize() + + # measure elapsed time + cost_time = time.time() - end + batch_time.update(cost_time) + end = time.time() + + if i % args.print_freq == 0: + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + progress.display(i) + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + if batch_time.avg: + print("[npu id:", args.gpu, "]", "batch_size:", args.world_size * args.batch_size, + 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( + args.batch_size * args.world_size / batch_time.avg)) + + +def validate(val_loader, model, criterion, args, ngpus_per_node): + batch_time = AverageMeter('Time', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(val_loader), + [batch_time, losses, top1, top5], + prefix='Test: ') + + # switch to evaluate mode + model.eval() + + with torch.no_grad(): + end = time.time() + for i, (images, target) in enumerate(val_loader): + if args.gpu is not None: + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + images = images.to(loc).to(torch.float) + else: + images = images.cuda(args.gpu, non_blocking=True) + if args.device == 'npu': + loc = 'npu:{}'.format(args.gpu) + target = target.to(torch.int32).to(loc, non_blocking=True) + else: + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # measure elapsed time + cost_time = time.time() - end + batch_time.update(cost_time) + end = time.time() + + if i % args.print_freq == 0: + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + progress.display(i) + + if i % args.print_freq == 0: + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + print("[gpu id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' + .format(top1=top1, top5=top5)) + + return top1.avg + + +def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): + torch.save(state, filename) + if is_best: + shutil.copyfile(filename, 'model_best.pth.tar') + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f', start_count_index=2): + self.name = name + self.fmt = fmt + self.reset() + self.start_count_index = start_count_index + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + if self.count == 0: + self.N = n + + self.val = val + self.count += n + if self.count > (self.start_count_index * self.N): + self.sum += val * n + self.avg = self.sum / (self.count - self.start_count_index * self.N) + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + + +class ProgressMeter(object): + + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + + +def adjust_learning_rate(optimizer, epoch, args): + """Sets the learning rate to the initial LR decayed by cosine method""" + + if args.warm_up_epochs > 0 and epoch < args.warm_up_epochs: + lr = args.lr * ((epoch + 1) / (args.warm_up_epochs + 1)) + else: + alpha = 0 + cosine_decay = 0.5 * ( + 1 + np.cos(np.pi * (epoch - args.warm_up_epochs) / (args.epochs - args.warm_up_epochs))) + decayed = (1 - alpha) * cosine_decay + alpha + lr = args.lr * decayed + + print("=> Epoch[%d] Setting lr: %.4f" % (epoch, lr)) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +if __name__ == '__main__': + main() diff --git a/PyTorch/contrib/cv/detection/CascadedMaskRCNN/detectron2/structures/masks.py b/PyTorch/contrib/cv/detection/CascadedMaskRCNN/detectron2/structures/masks.py index a4d97d942eafbe3ea1bd71fad42b942e9aa5168d..3933471d084b42fc2e0e055d7452d1660e7dc0f1 100644 --- a/PyTorch/contrib/cv/detection/CascadedMaskRCNN/detectron2/structures/masks.py +++ b/PyTorch/contrib/cv/detection/CascadedMaskRCNN/detectron2/structures/masks.py @@ -1,442 +1,442 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import copy -import itertools -import numpy as np -from typing import Any, Iterator, List, Union -import pycocotools.mask as mask_util -import torch - -from detectron2.layers.roi_align import ROIAlign - -from .boxes import Boxes - - -def polygon_area(x, y): - # Using the shoelace formula - # https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates - return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) - - -def polygons_to_bitmask(polygons: List[np.ndarray], height: int, width: int) -> np.ndarray: - """ - Args: - polygons (list[ndarray]): each array has shape (Nx2,) - height, width (int) - - Returns: - ndarray: a bool mask of shape (height, width) - """ - assert len(polygons) > 0, "COCOAPI does not support empty polygons" - rles = mask_util.frPyObjects(polygons, height, width) - rle = mask_util.merge(rles) - return mask_util.decode(rle).astype(np.bool) - - -def rasterize_polygons_within_box( - polygons: List[np.ndarray], box: np.ndarray, mask_size: int -) -> torch.Tensor: - """ - Rasterize the polygons into a mask image and - crop the mask content in the given box. - The cropped mask is resized to (mask_size, mask_size). - - This function is used when generating training targets for mask head in Mask R-CNN. - Given original ground-truth masks for an image, new ground-truth mask - training targets in the size of `mask_size x mask_size` - must be provided for each predicted box. This function will be called to - produce such targets. - - Args: - polygons (list[ndarray[float]]): a list of polygons, which represents an instance. - box: 4-element numpy array - mask_size (int): - - Returns: - Tensor: BoolTensor of shape (mask_size, mask_size) - """ - # 1. Shift the polygons w.r.t the boxes - w, h = box[2] - box[0], box[3] - box[1] - - polygons = copy.deepcopy(polygons) - for p in polygons: - p[0::2] = p[0::2] - box[0] - p[1::2] = p[1::2] - box[1] - - # 2. Rescale the polygons to the new box size - # max() to avoid division by small number - ratio_h = mask_size / max(h, 0.1) - ratio_w = mask_size / max(w, 0.1) - - if ratio_h == ratio_w: - for p in polygons: - p *= ratio_h - else: - for p in polygons: - p[0::2] *= ratio_w - p[1::2] *= ratio_h - - # 3. Rasterize the polygons with coco api - mask = polygons_to_bitmask(polygons, mask_size, mask_size) - mask = torch.from_numpy(mask) - return mask - - -class BitMasks: - """ - This class stores the segmentation masks for all objects in one image, in - the form of bitmaps. - - Attributes: - tensor: bool Tensor of N,H,W, representing N instances in the image. - """ - - def __init__(self, tensor: Union[torch.Tensor, np.ndarray]): - """ - Args: - tensor: bool Tensor of N,H,W, representing N instances in the image. - """ - device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") - tensor = torch.as_tensor(tensor, dtype=torch.bool, device=device) - assert tensor.dim() == 3, tensor.size() - self.image_size = tensor.shape[1:] - self.tensor = tensor - - def to(self, *args: Any, **kwargs: Any) -> "BitMasks": - return BitMasks(self.tensor.to(*args, **kwargs)) - - @property - def device(self) -> torch.device: - return self.tensor.device - - def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks": - """ - Returns: - BitMasks: Create a new :class:`BitMasks` by indexing. - - The following usage are allowed: - - 1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask. - 2. `new_masks = masks[2:10]`: return a slice of masks. - 3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor - with `length = len(masks)`. Nonzero elements in the vector will be selected. - - Note that the returned object might share storage with this object, - subject to Pytorch's indexing semantics. - """ - if isinstance(item, int): - return BitMasks(self.tensor[item].view(1, -1)) - - if item.dtype == torch.int32: - m = self.tensor[item.long()] - else: - m = self.tensor[item] - - assert m.dim() == 3, "Indexing on BitMasks with {} returns a tensor with shape {}!".format( - item, m.shape - ) - return BitMasks(m) - - def __iter__(self) -> torch.Tensor: - yield from self.tensor - - def __repr__(self) -> str: - s = self.__class__.__name__ + "(" - s += "num_instances={})".format(len(self.tensor)) - return s - - def __len__(self) -> int: - return self.tensor.shape[0] - - def nonempty(self) -> torch.Tensor: - """ - Find masks that are non-empty. - - Returns: - Tensor: a BoolTensor which represents - whether each mask is empty (False) or non-empty (True). - """ - return self.tensor.flatten(1).any(dim=1) - - @staticmethod - def from_polygon_masks( - polygon_masks: Union["PolygonMasks", List[List[np.ndarray]]], height: int, width: int - ) -> "BitMasks": - """ - Args: - polygon_masks (list[list[ndarray]] or PolygonMasks) - height, width (int) - """ - if isinstance(polygon_masks, PolygonMasks): - polygon_masks = polygon_masks.polygons - masks = [polygons_to_bitmask(p, height, width) for p in polygon_masks] - return BitMasks(torch.stack([torch.from_numpy(x) for x in masks])) - - def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: - """ - Crop each bitmask by the given box, and resize results to (mask_size, mask_size). - This can be used to prepare training targets for Mask R-CNN. - It has less reconstruction error compared to rasterization with polygons. - However we observe no difference in accuracy, - but BitMasks requires more memory to store all the masks. - - Args: - boxes (Tensor): Nx4 tensor storing the boxes for each mask - mask_size (int): the size of the rasterized mask. - - Returns: - Tensor: - A bool tensor of shape (N, mask_size, mask_size), where - N is the number of predicted boxes for this image. - """ - assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) - device = self.tensor.device - - batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[:, None] - rois = torch.cat([batch_inds, boxes], dim=1) - bit_masks = self.tensor.to(dtype=torch.float32) - rois = rois.to(device=device) - - output = ( - ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True) - .forward(bit_masks[:, None, :, :], rois) - .squeeze(1) - ) - output = output >= 0.5 - return output - - def get_bounding_boxes(self) -> None: - # not needed now - raise NotImplementedError - - @staticmethod - def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks": - """ - Concatenates a list of BitMasks into a single BitMasks - - Arguments: - bitmasks_list (list[BitMasks]) - - Returns: - BitMasks: the concatenated BitMasks - """ - assert isinstance(bitmasks_list, (list, tuple)) - assert len(bitmasks_list) > 0 - assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list) - - cat_bitmasks = type(bitmasks_list[0])(torch.cat([bm.tensor for bm in bitmasks_list], dim=0)) - return cat_bitmasks - - -class PolygonMasks: - """ - This class stores the segmentation masks for all objects in one image, in the form of polygons. - - Attributes: - polygons: list[list[ndarray]]. Each ndarray is a float64 vector representing a polygon. - """ - - def __init__(self, polygons: List[List[Union[torch.Tensor, np.ndarray]]]): - """ - Arguments: - polygons (list[list[np.ndarray]]): The first - level of the list correspond to individual instances, - the second level to all the polygons that compose the - instance, and the third level to the polygon coordinates. - The third level array should have the format of - [x0, y0, x1, y1, ..., xn, yn] (n >= 3). - """ - assert isinstance(polygons, list), ( - "Cannot create PolygonMasks: Expect a list of list of polygons per image. " - "Got '{}' instead.".format(type(polygons)) - ) - - def _make_array(t: Union[torch.Tensor, np.ndarray]) -> np.ndarray: - # Use float64 for higher precision, because why not? - # Always put polygons on CPU (self.to is a no-op) since they - # are supposed to be small tensors. - # May need to change this assumption if GPU placement becomes useful - if isinstance(t, torch.Tensor): - t = t.cpu().numpy() - return np.asarray(t).astype("float64") - - def process_polygons( - polygons_per_instance: List[Union[torch.Tensor, np.ndarray]] - ) -> List[np.ndarray]: - assert isinstance(polygons_per_instance, list), ( - "Cannot create polygons: Expect a list of polygons per instance. " - "Got '{}' instead.".format(type(polygons_per_instance)) - ) - # transform the polygon to a tensor - polygons_per_instance = [_make_array(p) for p in polygons_per_instance] - for polygon in polygons_per_instance: - assert len(polygon) % 2 == 0 and len(polygon) >= 6 - return polygons_per_instance - - self.polygons: List[List[np.ndarray]] = [ - process_polygons(polygons_per_instance) for polygons_per_instance in polygons - ] - - def to(self, *args: Any, **kwargs: Any) -> "PolygonMasks": - return self - - @property - def device(self) -> torch.device: - return torch.device("cpu") - - def get_bounding_boxes(self) -> Boxes: - """ - Returns: - Boxes: tight bounding boxes around polygon masks. - """ - boxes = torch.zeros(len(self.polygons), 4, dtype=torch.float32) - for idx, polygons_per_instance in enumerate(self.polygons): - minxy = torch.as_tensor([float("inf"), float("inf")], dtype=torch.float32) - maxxy = torch.zeros(2, dtype=torch.float32) - for polygon in polygons_per_instance: - coords = torch.from_numpy(polygon).view(-1, 2).to(dtype=torch.float32) - minxy = torch.min(minxy, torch.min(coords, dim=0).values) - maxxy = torch.max(maxxy, torch.max(coords, dim=0).values) - boxes[idx, :2] = minxy - boxes[idx, 2:] = maxxy - return Boxes(boxes) - - def nonempty(self) -> torch.Tensor: - """ - Find masks that are non-empty. - - Returns: - Tensor: - a BoolTensor which represents whether each mask is empty (False) or not (True). - """ - keep = [1 if len(polygon) > 0 else 0 for polygon in self.polygons] - return torch.from_numpy(np.asarray(keep, dtype=np.bool)) - - def __getitem__(self, item: Union[int, slice, List[int], torch.BoolTensor]) -> "PolygonMasks": - """ - Support indexing over the instances and return a `PolygonMasks` object. - `item` can be: - - 1. An integer. It will return an object with only one instance. - 2. A slice. It will return an object with the selected instances. - 3. A list[int]. It will return an object with the selected instances, - correpsonding to the indices in the list. - 4. A vector mask of type BoolTensor, whose length is num_instances. - It will return an object with the instances whose mask is nonzero. - """ - if isinstance(item, int): - selected_polygons = [self.polygons[item]] - elif isinstance(item, slice): - selected_polygons = self.polygons[item] - elif isinstance(item, list): - selected_polygons = [self.polygons[i] for i in item] - elif isinstance(item, torch.Tensor): - # Polygons is a list, so we have to move the indices back to CPU. - if item.dtype == torch.bool: - assert item.dim() == 1, item.shape - item = item.nonzero().squeeze(1).cpu().numpy().tolist() - elif item.dtype in [torch.int32, torch.int64]: - item = item.cpu().numpy().tolist() - else: - raise ValueError("Unsupported tensor dtype={} for indexing!".format(item.dtype)) - selected_polygons = [self.polygons[i] for i in item] - return PolygonMasks(selected_polygons) - - def __iter__(self) -> Iterator[List[np.ndarray]]: - """ - Yields: - list[ndarray]: the polygons for one instance. - Each Tensor is a float64 vector representing a polygon. - """ - return iter(self.polygons) - - def __repr__(self) -> str: - s = self.__class__.__name__ + "(" - s += "num_instances={})".format(len(self.polygons)) - return s - - def __len__(self) -> int: - return len(self.polygons) - - def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: - """ - Crop each mask by the given box, and resize results to (mask_size, mask_size). - This can be used to prepare training targets for Mask R-CNN. - - Args: - boxes (Tensor): Nx4 tensor storing the boxes for each mask - mask_size (int): the size of the rasterized mask. - - Returns: - Tensor: A bool tensor of shape (N, mask_size, mask_size), where - N is the number of predicted boxes for this image. - """ - assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) - - device = boxes.device - # Put boxes on the CPU, as the polygon representation is not efficient GPU-wise - # (several small tensors for representing a single instance mask) - boxes = boxes.to(torch.device("cpu")) - - results = [ - rasterize_polygons_within_box(poly, box.numpy(), mask_size) - for poly, box in zip(self.polygons, boxes) - ] - """ - poly: list[list[float]], the polygons for one instance - box: a tensor of shape (4,) - """ - if len(results) == 0: - return torch.empty(0, mask_size, mask_size, dtype=torch.bool, device=device) - return torch.stack(results, dim=0).to(device=device) - - def area(self): - """ - Computes area of the mask. - Only works with Polygons, using the shoelace formula: - https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates - - Returns: - Tensor: a vector, area for each instance - """ - - area = [] - for polygons_per_instance in self.polygons: - area_per_instance = 0 - for p in polygons_per_instance: - area_per_instance += polygon_area(p[0::2], p[1::2]) - area.append(area_per_instance) - - return torch.tensor(area) - - @staticmethod - def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks": - """ - Concatenates a list of PolygonMasks into a single PolygonMasks - - Arguments: - polymasks_list (list[PolygonMasks]) - - Returns: - PolygonMasks: the concatenated PolygonMasks - """ - assert isinstance(polymasks_list, (list, tuple)) - assert len(polymasks_list) > 0 - assert all(isinstance(polymask, PolygonMasks) for polymask in polymasks_list) - - cat_polymasks = type(polymasks_list[0])( - list(itertools.chain.from_iterable(pm.polygons for pm in polymasks_list)) - ) - return cat_polymasks +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import itertools +import numpy as np +from typing import Any, Iterator, List, Union +import pycocotools.mask as mask_util +import torch + +from detectron2.layers.roi_align import ROIAlign + +from .boxes import Boxes + + +def polygon_area(x, y): + # Using the shoelace formula + # https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates + return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) + + +def polygons_to_bitmask(polygons: List[np.ndarray], height: int, width: int) -> np.ndarray: + """ + Args: + polygons (list[ndarray]): each array has shape (Nx2,) + height, width (int) + + Returns: + ndarray: a bool mask of shape (height, width) + """ + assert len(polygons) > 0, "COCOAPI does not support empty polygons" + rles = mask_util.frPyObjects(polygons, height, width) + rle = mask_util.merge(rles) + return mask_util.decode(rle).astype(np.bool) + + +def rasterize_polygons_within_box( + polygons: List[np.ndarray], box: np.ndarray, mask_size: int +) -> torch.Tensor: + """ + Rasterize the polygons into a mask image and + crop the mask content in the given box. + The cropped mask is resized to (mask_size, mask_size). + + This function is used when generating training targets for mask head in Mask R-CNN. + Given original ground-truth masks for an image, new ground-truth mask + training targets in the size of `mask_size x mask_size` + must be provided for each predicted box. This function will be called to + produce such targets. + + Args: + polygons (list[ndarray[float]]): a list of polygons, which represents an instance. + box: 4-element numpy array + mask_size (int): + + Returns: + Tensor: BoolTensor of shape (mask_size, mask_size) + """ + # 1. Shift the polygons w.r.t the boxes + w, h = box[2] - box[0], box[3] - box[1] + + polygons = copy.deepcopy(polygons) + for p in polygons: + p[0::2] = p[0::2] - box[0] + p[1::2] = p[1::2] - box[1] + + # 2. Rescale the polygons to the new box size + # max() to avoid division by small number + ratio_h = mask_size / max(h, 0.1) + ratio_w = mask_size / max(w, 0.1) + + if ratio_h == ratio_w: + for p in polygons: + p *= ratio_h + else: + for p in polygons: + p[0::2] *= ratio_w + p[1::2] *= ratio_h + + # 3. Rasterize the polygons with coco api + mask = polygons_to_bitmask(polygons, mask_size, mask_size) + mask = torch.from_numpy(mask) + return mask + + +class BitMasks: + """ + This class stores the segmentation masks for all objects in one image, in + the form of bitmaps. + + Attributes: + tensor: bool Tensor of N,H,W, representing N instances in the image. + """ + + def __init__(self, tensor: Union[torch.Tensor, np.ndarray]): + """ + Args: + tensor: bool Tensor of N,H,W, representing N instances in the image. + """ + device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") + tensor = torch.as_tensor(tensor, dtype=torch.bool, device=device) + assert tensor.dim() == 3, tensor.size() + self.image_size = tensor.shape[1:] + self.tensor = tensor + + def to(self, *args: Any, **kwargs: Any) -> "BitMasks": + return BitMasks(self.tensor.to(*args, **kwargs)) + + @property + def device(self) -> torch.device: + return self.tensor.device + + def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks": + """ + Returns: + BitMasks: Create a new :class:`BitMasks` by indexing. + + The following usage are allowed: + + 1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask. + 2. `new_masks = masks[2:10]`: return a slice of masks. + 3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor + with `length = len(masks)`. Nonzero elements in the vector will be selected. + + Note that the returned object might share storage with this object, + subject to Pytorch's indexing semantics. + """ + if isinstance(item, int): + return BitMasks(self.tensor[item].view(1, -1)) + + if item.dtype == torch.int32: + m = self.tensor[item.long()] + else: + m = self.tensor[item] + + assert m.dim() == 3, "Indexing on BitMasks with {} returns a tensor with shape {}!".format( + item, m.shape + ) + return BitMasks(m) + + def __iter__(self) -> torch.Tensor: + yield from self.tensor + + def __repr__(self) -> str: + s = self.__class__.__name__ + "(" + s += "num_instances={})".format(len(self.tensor)) + return s + + def __len__(self) -> int: + return self.tensor.shape[0] + + def nonempty(self) -> torch.Tensor: + """ + Find masks that are non-empty. + + Returns: + Tensor: a BoolTensor which represents + whether each mask is empty (False) or non-empty (True). + """ + return self.tensor.flatten(1).any(dim=1) + + @staticmethod + def from_polygon_masks( + polygon_masks: Union["PolygonMasks", List[List[np.ndarray]]], height: int, width: int + ) -> "BitMasks": + """ + Args: + polygon_masks (list[list[ndarray]] or PolygonMasks) + height, width (int) + """ + if isinstance(polygon_masks, PolygonMasks): + polygon_masks = polygon_masks.polygons + masks = [polygons_to_bitmask(p, height, width) for p in polygon_masks] + return BitMasks(torch.stack([torch.from_numpy(x) for x in masks])) + + def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: + """ + Crop each bitmask by the given box, and resize results to (mask_size, mask_size). + This can be used to prepare training targets for Mask R-CNN. + It has less reconstruction error compared to rasterization with polygons. + However we observe no difference in accuracy, + but BitMasks requires more memory to store all the masks. + + Args: + boxes (Tensor): Nx4 tensor storing the boxes for each mask + mask_size (int): the size of the rasterized mask. + + Returns: + Tensor: + A bool tensor of shape (N, mask_size, mask_size), where + N is the number of predicted boxes for this image. + """ + assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) + device = self.tensor.device + + batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[:, None] + rois = torch.cat([batch_inds, boxes], dim=1) + bit_masks = self.tensor.to(dtype=torch.float32) + rois = rois.to(device=device) + + output = ( + ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True) + .forward(bit_masks[:, None, :, :], rois) + .squeeze(1) + ) + output = output >= 0.5 + return output + + def get_bounding_boxes(self) -> None: + # not needed now + raise NotImplementedError + + @staticmethod + def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks": + """ + Concatenates a list of BitMasks into a single BitMasks + + Arguments: + bitmasks_list (list[BitMasks]) + + Returns: + BitMasks: the concatenated BitMasks + """ + assert isinstance(bitmasks_list, (list, tuple)) + assert len(bitmasks_list) > 0 + assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list) + + cat_bitmasks = type(bitmasks_list[0])(torch.cat([bm.tensor for bm in bitmasks_list], dim=0)) + return cat_bitmasks + + +class PolygonMasks: + """ + This class stores the segmentation masks for all objects in one image, in the form of polygons. + + Attributes: + polygons: list[list[ndarray]]. Each ndarray is a float64 vector representing a polygon. + """ + + def __init__(self, polygons: List[List[Union[torch.Tensor, np.ndarray]]]): + """ + Arguments: + polygons (list[list[np.ndarray]]): The first + level of the list correspond to individual instances, + the second level to all the polygons that compose the + instance, and the third level to the polygon coordinates. + The third level array should have the format of + [x0, y0, x1, y1, ..., xn, yn] (n >= 3). + """ + assert isinstance(polygons, list), ( + "Cannot create PolygonMasks: Expect a list of list of polygons per image. " + "Got '{}' instead.".format(type(polygons)) + ) + + def _make_array(t: Union[torch.Tensor, np.ndarray]) -> np.ndarray: + # Use float64 for higher precision, because why not? + # Always put polygons on CPU (self.to is a no-op) since they + # are supposed to be small tensors. + # May need to change this assumption if GPU placement becomes useful + if isinstance(t, torch.Tensor): + t = t.cpu().numpy() + return np.asarray(t).astype("float64") + + def process_polygons( + polygons_per_instance: List[Union[torch.Tensor, np.ndarray]] + ) -> List[np.ndarray]: + assert isinstance(polygons_per_instance, list), ( + "Cannot create polygons: Expect a list of polygons per instance. " + "Got '{}' instead.".format(type(polygons_per_instance)) + ) + # transform the polygon to a tensor + polygons_per_instance = [_make_array(p) for p in polygons_per_instance] + for polygon in polygons_per_instance: + assert len(polygon) % 2 == 0 and len(polygon) >= 6 + return polygons_per_instance + + self.polygons: List[List[np.ndarray]] = [ + process_polygons(polygons_per_instance) for polygons_per_instance in polygons + ] + + def to(self, *args: Any, **kwargs: Any) -> "PolygonMasks": + return self + + @property + def device(self) -> torch.device: + return torch.device("cpu") + + def get_bounding_boxes(self) -> Boxes: + """ + Returns: + Boxes: tight bounding boxes around polygon masks. + """ + boxes = torch.zeros(len(self.polygons), 4, dtype=torch.float32) + for idx, polygons_per_instance in enumerate(self.polygons): + minxy = torch.as_tensor([float("inf"), float("inf")], dtype=torch.float32) + maxxy = torch.zeros(2, dtype=torch.float32) + for polygon in polygons_per_instance: + coords = torch.from_numpy(polygon).view(-1, 2).to(dtype=torch.float32) + minxy = torch.min(minxy, torch.min(coords, dim=0).values) + maxxy = torch.max(maxxy, torch.max(coords, dim=0).values) + boxes[idx, :2] = minxy + boxes[idx, 2:] = maxxy + return Boxes(boxes) + + def nonempty(self) -> torch.Tensor: + """ + Find masks that are non-empty. + + Returns: + Tensor: + a BoolTensor which represents whether each mask is empty (False) or not (True). + """ + keep = [1 if len(polygon) > 0 else 0 for polygon in self.polygons] + return torch.from_numpy(np.asarray(keep, dtype=np.bool)) + + def __getitem__(self, item: Union[int, slice, List[int], torch.BoolTensor]) -> "PolygonMasks": + """ + Support indexing over the instances and return a `PolygonMasks` object. + `item` can be: + + 1. An integer. It will return an object with only one instance. + 2. A slice. It will return an object with the selected instances. + 3. A list[int]. It will return an object with the selected instances, + correpsonding to the indices in the list. + 4. A vector mask of type BoolTensor, whose length is num_instances. + It will return an object with the instances whose mask is nonzero. + """ + if isinstance(item, int): + selected_polygons = [self.polygons[item]] + elif isinstance(item, slice): + selected_polygons = self.polygons[item] + elif isinstance(item, list): + selected_polygons = [self.polygons[i] for i in item] + elif isinstance(item, torch.Tensor): + # Polygons is a list, so we have to move the indices back to CPU. + if item.dtype == torch.bool: + assert item.dim() == 1, item.shape + item = item.nonzero().squeeze(1).cpu().numpy().tolist() + elif item.dtype in [torch.int32, torch.int64]: + item = item.cpu().numpy().tolist() + else: + raise ValueError("Unsupported tensor dtype={} for indexing!".format(item.dtype)) + selected_polygons = [self.polygons[i] for i in item] + return PolygonMasks(selected_polygons) + + def __iter__(self) -> Iterator[List[np.ndarray]]: + """ + Yields: + list[ndarray]: the polygons for one instance. + Each Tensor is a float64 vector representing a polygon. + """ + return iter(self.polygons) + + def __repr__(self) -> str: + s = self.__class__.__name__ + "(" + s += "num_instances={})".format(len(self.polygons)) + return s + + def __len__(self) -> int: + return len(self.polygons) + + def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: + """ + Crop each mask by the given box, and resize results to (mask_size, mask_size). + This can be used to prepare training targets for Mask R-CNN. + + Args: + boxes (Tensor): Nx4 tensor storing the boxes for each mask + mask_size (int): the size of the rasterized mask. + + Returns: + Tensor: A bool tensor of shape (N, mask_size, mask_size), where + N is the number of predicted boxes for this image. + """ + assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) + + device = boxes.device + # Put boxes on the CPU, as the polygon representation is not efficient GPU-wise + # (several small tensors for representing a single instance mask) + boxes = boxes.to(torch.device("cpu")) + + results = [ + rasterize_polygons_within_box(poly, box.numpy(), mask_size) + for poly, box in zip(self.polygons, boxes) + ] + """ + poly: list[list[float]], the polygons for one instance + box: a tensor of shape (4,) + """ + if len(results) == 0: + return torch.empty(0, mask_size, mask_size, dtype=torch.bool, device=device) + return torch.stack(results, dim=0).to(device=device) + + def area(self): + """ + Computes area of the mask. + Only works with Polygons, using the shoelace formula: + https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates + + Returns: + Tensor: a vector, area for each instance + """ + + area = [] + for polygons_per_instance in self.polygons: + area_per_instance = 0 + for p in polygons_per_instance: + area_per_instance += polygon_area(p[0::2], p[1::2]) + area.append(area_per_instance) + + return torch.tensor(area) + + @staticmethod + def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks": + """ + Concatenates a list of PolygonMasks into a single PolygonMasks + + Arguments: + polymasks_list (list[PolygonMasks]) + + Returns: + PolygonMasks: the concatenated PolygonMasks + """ + assert isinstance(polymasks_list, (list, tuple)) + assert len(polymasks_list) > 0 + assert all(isinstance(polymask, PolygonMasks) for polymask in polymasks_list) + + cat_polymasks = type(polymasks_list[0])( + list(itertools.chain.from_iterable(pm.polygons for pm in polymasks_list)) + ) + return cat_polymasks diff --git a/PyTorch/contrib/cv/detection/CenterFace/LICENSE b/PyTorch/contrib/cv/detection/CenterFace/LICENSE index 4ba4fdcab3dbdb4d64ce4cccdfd990698b4d596a..a0e03103591c1158a839681f3c404ee9118b182e 100644 --- a/PyTorch/contrib/cv/detection/CenterFace/LICENSE +++ b/PyTorch/contrib/cv/detection/CenterFace/LICENSE @@ -1,29 +1,29 @@ -BSD 3-Clause License - -Copyright (c) 2017, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +BSD 3-Clause License + +Copyright (c) 2017, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/CenterFace/modelzoo_level.txt b/PyTorch/contrib/cv/detection/CenterFace/modelzoo_level.txt index 484664c2399ae4109859a67aba6cb9facff03cf1..55a9add9fa74832ca908108d73946cd76281a9cd 100644 --- a/PyTorch/contrib/cv/detection/CenterFace/modelzoo_level.txt +++ b/PyTorch/contrib/cv/detection/CenterFace/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/CenterFace/src/8p_npu_main.py b/PyTorch/contrib/cv/detection/CenterFace/src/8p_npu_main.py index 1244c6f2c82a62f9e0ef915b6a112d51c67917e5..4dc05e1ea2784d5b6b956444ce568106b2090a75 100644 --- a/PyTorch/contrib/cv/detection/CenterFace/src/8p_npu_main.py +++ b/PyTorch/contrib/cv/detection/CenterFace/src/8p_npu_main.py @@ -1,127 +1,127 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import _init_paths - -import os - -import torch -import torch.utils.data -from opts_pose import opts -from models.model import create_model, load_model, save_model -from models.data_parallel import DataParallel -from datasets.dataset_factory import get_dataset -from trains.train_factory import train_factory -from datasets.sample.multi_pose import Multiposebatch -from apex import amp -import torch.npu -import torch.distributed as dist -from torch.nn.parallel import DistributedDataParallel as DDP - - -def main(opt, qtepoch=[0,]): - - torch.manual_seed(opt.seed) - torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test - Dataset = get_dataset(opt.dataset, opt.task) - opt = opts().update_dataset_info_and_set_heads(opt, Dataset) - if opt.local_rank ==0: - print(opt) - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = opt.port - device_id = int(opt.device_list.split(',')[int(opt.local_rank)]) - opt.device = 'npu:{}'.format(device_id) - - torch.npu.set_device(opt.device) - - dist.init_process_group(backend='hccl', world_size=opt.world_size, rank=opt.local_rank) - print('process{},device:{}'.format(opt.local_rank,opt.device)) - print('Creating model...') - model = create_model(opt.arch, opt.heads, opt.head_conv) - model = model.to(opt.device) - if opt.pretrained: - checkpoint = torch.load(opt.pretrained_weight_path, map_location='cpu') - if 'module.' in list(checkpoint['state_dict'].keys())[0]: - checkpoint['state_dict'] = {k.replace('module.', ''): v for k, v in checkpoint['state_dict'].items()} - model.load_state_dict(checkpoint['state_dict'], strict=False) - - optimizer = torch.optim.Adam(model.parameters(), opt.lr) - model, optimizer = amp.initialize(model, optimizer, opt_level="O1",loss_scale=19.0) - start_epoch = 0 - if opt.load_model != '': - model, optimizer, start_epoch = load_model( - model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) - print('start_epoch:{}'.format(start_epoch)) - Trainer = train_factory[opt.task] - trainer = Trainer(opt, model, optimizer) - trainer.set_device(opt.device_list, opt.chunk_sizes, opt.device) - print('Setting up data...') - train_sampler = torch.utils.data.distributed.DistributedSampler(Dataset(opt, 'train')) - train_loader = torch.utils.data.DataLoader( - Dataset(opt, 'train'), - batch_size=opt.batch_size, - shuffle=(train_sampler is None), - num_workers=opt.num_workers, - sampler=train_sampler, - pin_memory=True, - drop_last=True, - collate_fn=Multiposebatch - ) - - print('Starting training...') - best = 1e10 - for epoch in range(start_epoch + 1, opt.num_epochs + 1): - qtepoch.append(epoch) - train_sampler.set_epoch(epoch) - mark = epoch if opt.save_all else 'last' - log_dict_train, _ = trainer.train(epoch, train_loader) - if opt.local_rank == 0: - str1 ='epoch:{}|'.format(epoch) - for k, v in log_dict_train.items(): - str2 ='{} {:8f}|'.format(k,v) - str1 = str1 +str2 - print(str1) - if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: - save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), - epoch, model, optimizer) - - print('best:{} metric:{} epotchs:{}'.format(best,log_dict_train[opt.metric],epoch)) - - if log_dict_train[opt.metric] < best: - best = log_dict_train[opt.metric] - save_model(os.path.join(opt.save_dir, 'model_best.pth'), - epoch, model) - else: - save_model(os.path.join(opt.save_dir, 'model_last.pth'), - epoch, model, optimizer) - - - if epoch in opt.lr_step: - - if opt.local_rank == 0: - save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) - lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1)) - if opt.local_rank == 0: - print('Drop LR to', lr) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -if __name__ == '__main__': - opt = opts().parse() - main(opt) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import _init_paths + +import os + +import torch +import torch.utils.data +from opts_pose import opts +from models.model import create_model, load_model, save_model +from models.data_parallel import DataParallel +from datasets.dataset_factory import get_dataset +from trains.train_factory import train_factory +from datasets.sample.multi_pose import Multiposebatch +from apex import amp +import torch.npu +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel as DDP + + +def main(opt, qtepoch=[0,]): + + torch.manual_seed(opt.seed) + torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test + Dataset = get_dataset(opt.dataset, opt.task) + opt = opts().update_dataset_info_and_set_heads(opt, Dataset) + if opt.local_rank ==0: + print(opt) + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = opt.port + device_id = int(opt.device_list.split(',')[int(opt.local_rank)]) + opt.device = 'npu:{}'.format(device_id) + + torch.npu.set_device(opt.device) + + dist.init_process_group(backend='hccl', world_size=opt.world_size, rank=opt.local_rank) + print('process{},device:{}'.format(opt.local_rank,opt.device)) + print('Creating model...') + model = create_model(opt.arch, opt.heads, opt.head_conv) + model = model.to(opt.device) + if opt.pretrained: + checkpoint = torch.load(opt.pretrained_weight_path, map_location='cpu') + if 'module.' in list(checkpoint['state_dict'].keys())[0]: + checkpoint['state_dict'] = {k.replace('module.', ''): v for k, v in checkpoint['state_dict'].items()} + model.load_state_dict(checkpoint['state_dict'], strict=False) + + optimizer = torch.optim.Adam(model.parameters(), opt.lr) + model, optimizer = amp.initialize(model, optimizer, opt_level="O1",loss_scale=19.0) + start_epoch = 0 + if opt.load_model != '': + model, optimizer, start_epoch = load_model( + model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) + print('start_epoch:{}'.format(start_epoch)) + Trainer = train_factory[opt.task] + trainer = Trainer(opt, model, optimizer) + trainer.set_device(opt.device_list, opt.chunk_sizes, opt.device) + print('Setting up data...') + train_sampler = torch.utils.data.distributed.DistributedSampler(Dataset(opt, 'train')) + train_loader = torch.utils.data.DataLoader( + Dataset(opt, 'train'), + batch_size=opt.batch_size, + shuffle=(train_sampler is None), + num_workers=opt.num_workers, + sampler=train_sampler, + pin_memory=True, + drop_last=True, + collate_fn=Multiposebatch + ) + + print('Starting training...') + best = 1e10 + for epoch in range(start_epoch + 1, opt.num_epochs + 1): + qtepoch.append(epoch) + train_sampler.set_epoch(epoch) + mark = epoch if opt.save_all else 'last' + log_dict_train, _ = trainer.train(epoch, train_loader) + if opt.local_rank == 0: + str1 ='epoch:{}|'.format(epoch) + for k, v in log_dict_train.items(): + str2 ='{} {:8f}|'.format(k,v) + str1 = str1 +str2 + print(str1) + if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: + save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), + epoch, model, optimizer) + + print('best:{} metric:{} epotchs:{}'.format(best,log_dict_train[opt.metric],epoch)) + + if log_dict_train[opt.metric] < best: + best = log_dict_train[opt.metric] + save_model(os.path.join(opt.save_dir, 'model_best.pth'), + epoch, model) + else: + save_model(os.path.join(opt.save_dir, 'model_last.pth'), + epoch, model, optimizer) + + + if epoch in opt.lr_step: + + if opt.local_rank == 0: + save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) + lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1)) + if opt.local_rank == 0: + print('Drop LR to', lr) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +if __name__ == '__main__': + opt = opts().parse() + main(opt) diff --git a/PyTorch/contrib/cv/detection/CenterFace/src/test/env_npu.sh b/PyTorch/contrib/cv/detection/CenterFace/src/test/env_npu.sh index 0ea66b3bc64619af088c962decb46161f26d4455..2b12ee99de31bca6c2e1120b98be597251e695d1 100644 --- a/PyTorch/contrib/cv/detection/CenterFace/src/test/env_npu.sh +++ b/PyTorch/contrib/cv/detection/CenterFace/src/test/env_npu.sh @@ -1,68 +1,68 @@ -#!/bin/bash -export install_path=/usr/local/Ascend - -if [ -d ${install_path}/toolkit ]; then - export LD_LIBRARY_PATH=/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} - export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH - export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH - export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH - export ASCEND_OPP_PATH=${install_path}/opp -else - if [ -d ${install_path}/nnae/latest ];then - export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/nnae/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH - export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ - export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ - export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so - export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH - export ASCEND_AICPU_PATH=${install_path}/nnae/latest - else - export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH - export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ - export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ - export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so - export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH - export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest - fi -fi - - -#将Host日志输出到串口,0-关闭/1-开启 -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -#设置默认日志级别,0-debug/1-info/2-warning/3-error -export ASCEND_GLOBAL_LOG_LEVEL=3 -#设置Event日志开启标志,0-关闭/1-开启 -export ASCEND_GLOBAL_EVENT_ENABLE=0 -#设置是否开启taskque,0-关闭/1-开启 -export TASK_QUEUE_ENABLE=1 -#设置是否开启PTCopy,0-关闭/1-开启 -export PTCOPY_ENABLE=1 -#设置是否开启combined标志,0-关闭/1-开启 -export COMBINED_ENABLE=1 -#设置特殊场景是否需要重新编译,不需要修改 -export DYNAMIC_OP="ADD#MUL" -#HCCL白名单开关,1-关闭/0-开启 -export HCCL_WHITELIST_DISABLE=1 -export HCCL_IF_IP=$(hostname -I |awk '{print $1}') - -ulimit -SHn 512000 - -path_lib=$(python3.7 -c """ -import sys -import re -result='' -for index in range(len(sys.path)): - match_sit = re.search('-packages', sys.path[index]) - if match_sit is not None: - match_lib = re.search('lib', sys.path[index]) - - if match_lib is not None: - end=match_lib.span()[1] - result += sys.path[index][0:end] + ':' - - result+=sys.path[index] + '/torch/lib:' -print(result)""" -) - -echo ${path_lib} - -export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH +#!/bin/bash +export install_path=/usr/local/Ascend + +if [ -d ${install_path}/toolkit ]; then + export LD_LIBRARY_PATH=/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} + export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH + export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH + export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=${install_path}/opp +else + if [ -d ${install_path}/nnae/latest ];then + export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/nnae/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/nnae/latest + else + export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest + fi +fi + + +#将Host日志输出到串口,0-关闭/1-开启 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +#设置默认日志级别,0-debug/1-info/2-warning/3-error +export ASCEND_GLOBAL_LOG_LEVEL=3 +#设置Event日志开启标志,0-关闭/1-开启 +export ASCEND_GLOBAL_EVENT_ENABLE=0 +#设置是否开启taskque,0-关闭/1-开启 +export TASK_QUEUE_ENABLE=1 +#设置是否开启PTCopy,0-关闭/1-开启 +export PTCOPY_ENABLE=1 +#设置是否开启combined标志,0-关闭/1-开启 +export COMBINED_ENABLE=1 +#设置特殊场景是否需要重新编译,不需要修改 +export DYNAMIC_OP="ADD#MUL" +#HCCL白名单开关,1-关闭/0-开启 +export HCCL_WHITELIST_DISABLE=1 +export HCCL_IF_IP=$(hostname -I |awk '{print $1}') + +ulimit -SHn 512000 + +path_lib=$(python3.7 -c """ +import sys +import re +result='' +for index in range(len(sys.path)): + match_sit = re.search('-packages', sys.path[index]) + if match_sit is not None: + match_lib = re.search('lib', sys.path[index]) + + if match_lib is not None: + end=match_lib.span()[1] + result += sys.path[index][0:end] + ':' + + result+=sys.path[index] + '/torch/lib:' +print(result)""" +) + +echo ${path_lib} + +export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH diff --git a/PyTorch/contrib/cv/detection/CenterNet/modelzoo_level.txt b/PyTorch/contrib/cv/detection/CenterNet/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/detection/CenterNet/modelzoo_level.txt +++ b/PyTorch/contrib/cv/detection/CenterNet/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/detectors/base_detector.py b/PyTorch/contrib/cv/detection/CenterNet/src/lib/detectors/base_detector.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/detectors/ctdet.py b/PyTorch/contrib/cv/detection/CenterNet/src/lib/detectors/ctdet.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/detectors/ddd.py b/PyTorch/contrib/cv/detection/CenterNet/src/lib/detectors/ddd.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/detectors/exdet.py b/PyTorch/contrib/cv/detection/CenterNet/src/lib/detectors/exdet.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/detectors/multi_pose.py b/PyTorch/contrib/cv/detection/CenterNet/src/lib/detectors/multi_pose.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/models/networks/DCNv2/make.sh b/PyTorch/contrib/cv/detection/CenterNet/src/lib/models/networks/DCNv2/make.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/models/networks/dcn/modules/deform_conv.py b/PyTorch/contrib/cv/detection/CenterNet/src/lib/models/networks/dcn/modules/deform_conv.py index 7850c278e65ea707d7e63436edc5dd9a52379bdc..4c1f102339d84f22e54aacbb983f859c785bd5d8 100644 --- a/PyTorch/contrib/cv/detection/CenterNet/src/lib/models/networks/dcn/modules/deform_conv.py +++ b/PyTorch/contrib/cv/detection/CenterNet/src/lib/models/networks/dcn/modules/deform_conv.py @@ -1,235 +1,235 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.nn.modules.utils import _pair, _single -import math - - -class ModulatedDeformConv2dFunction(Function): - - @staticmethod - def forward(ctx, - input_tensor, - offset_ori, - mask, - weight, - bias=None, - with_bias=False, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - sort_index_for_npu_fp=None, - sort_index_for_npu_bp=None, - ): - - input_tensor = input_tensor.float() - offset_ori = offset_ori.float() - mask = mask.float() - - ctx.stride = stride - ctx.padding = padding - ctx.dilation = dilation - ctx.groups = groups - ctx.deformable_groups = deformable_groups - ctx.sort_index_for_npu_bp = sort_index_for_npu_bp - ctx.with_bias = with_bias - - offset = offset_ori.index_select(1, sort_index_for_npu_fp) - offset_all = torch.cat([offset, mask], dim=1) - output, offset_out = torch.npu_deformable_conv2d( - input_tensor, weight, offset_all, bias, - kernel_size=[weight.shape[3], weight.shape[2]], - stride=[1, 1, ctx.stride, ctx.stride], - padding=[ctx.padding, ctx.padding, ctx.padding, ctx.padding], - dilation=[1, 1, ctx.dilation, ctx.dilation], - groups=ctx.groups, deformable_groups=ctx.deformable_groups, - modulated=True) - if weight.requires_grad or mask.requires_grad or offset.requires_grad \ - or input_tensor.requires_grad: - ctx.save_for_backward(input_tensor, weight, offset_out, offset_all) - return output - - @staticmethod - def backward(ctx, grad_output): - input_tensor, weight, offset_out, offset_all = ctx.saved_tensors - grad_input, grad_weight, grad_offset_all, grad_bias = torch.npu_deformable_conv2dbk( - input_tensor, grad_output, offset_out, weight, offset_all, - kernel_size=[weight.shape[3], weight.shape[2]], - stride=[1, 1, ctx.stride, ctx.stride], - padding=[ctx.padding, ctx.padding, ctx.padding, ctx.padding], - dilation=[1, 1, ctx.dilation, ctx.dilation], - groups=ctx.groups, deformable_groups=ctx.deformable_groups, modulated=True) - grad_offset = grad_offset_all.index_select(1, ctx.sort_index_for_npu_bp) - grad_mask = grad_offset_all[:, grad_offset.shape[1]:, :, :] - if not ctx.with_bias: - grad_bias = None - - return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, - None, None, None, None, None, None, None, None) - - -class ModulatedDeformConv(nn.Module): - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - bias=True, - pack=True, - ): - - r"""Applies an NPU based Modulated Deformable 2D convolution operation. - - Paper link: - [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168) - - Reference implementation link: - https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/modulated_deform_conv.py - - The implementation of this ModulatedDeformConv is mainly based - on the implementation of mmcv for design and reconstruction. - Through the modular ModulatedDeformConvFunction, the forward and reverse are customized, - and the input is reconstructed in combination with the NPU underlying operator IR, - and finally the function is completed. - - It is worth mentioning that deformconv (DCNv1) is also implemented - by setting modulated = False. Due to the difference between input - and initialization, there is no additional implementation here. - - - .. note:: - ModulatedDeformConv only implements operations under fp32 data types. - Notice, conv_ Weight and bias of offset must be initialized to 0. - - Args: - in_channels (int): Number of channels in the input image. - out_channels (int): Number of channels produced by the convolution. - kernel_size(int, tuple): Size of the convolving kernel. - stride(int, tuple): Stride of the convolution. Default: 1. - padding (int or tuple): Zero-padding added to both sides of the input. - Default: 0. - dilation (int or tuple): Spacing between kernel elements. Default: 1. - groups (int): Number of blocked connections from input. - channels to output channels. Default: 1. - deform_groups (int): Number of deformable group partitions. - bias (bool): If True, adds a learnable bias to the output. Default: False. - pack (bool): If True, conv_offset and mask will be included in this module. Default: True. - - Examples:: - >>> m = ModulatedDeformConv(32, 32, 1) - >>> input_tensor = torch.randn(2, 32, 5, 5) - >>> output = m(input_tensor) - """ - - super(ModulatedDeformConv, self).__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = _pair(kernel_size) - self.stride = stride - self.padding = padding - self.dilation = dilation - self.groups = groups - self.deformable_groups = deformable_groups - self.with_bias = bias - self.pack = pack - - self.weight = nn.Parameter( - torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) - if bias: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.bias = torch.zeros(self.weight.shape[0]) - - if self.pack: - self.conv_offset = nn.Conv2d( - self.in_channels, - self.deformable_groups * 3 * self.kernel_size[0] * - self.kernel_size[1], - kernel_size=self.kernel_size, - stride=_pair(self.stride), - padding=_pair(self.padding), - bias=True) - - self.split_num = self.deformable_groups * 2 * self.kernel_size[0] * self.kernel_size[1] - sort_index_for_npu = list(range(self.split_num)) - sort_index_for_npu_fp = sort_index_for_npu[1::2] + sort_index_for_npu[::2] - sort_index_for_npu_bp_dict = {i: idx for idx, i in enumerate(sort_index_for_npu_fp)} - sort_index_for_npu_bp = [sort_index_for_npu_bp_dict[i] for i in sort_index_for_npu] - self.sort_index_for_npu_fp = torch.IntTensor(sort_index_for_npu_fp) - self.sort_index_for_npu_bp = torch.IntTensor(sort_index_for_npu_bp) - self.sort_index_for_npu_todevice = False - - self.init_param() - - def init_param(self): - n = self.in_channels - for k in self.kernel_size: - n *= k - stdv = 1. / math.sqrt(n) - self.weight.data.uniform_(-stdv, stdv) - if self.bias is not None: - self.bias.data.zero_() - - if self.pack: - self.conv_offset.weight.data.zero_() - self.conv_offset.bias.data.zero_() - - def forward(self, x): - if self.pack: - out = self.conv_offset(x) - offset = out[:, :self.split_num, ...] - mask = torch.sigmoid(out[:, self.split_num:, ...]) - else: - x, offset, mask = x - - if not self.sort_index_for_npu_todevice: - self.sort_index_for_npu_fp = self.sort_index_for_npu_fp.to(x.device) - self.sort_index_for_npu_bp = self.sort_index_for_npu_bp.to(x.device) - self.bias = self.bias.to(x.device) - self.sort_index_for_npu_todevice = True - - return ModulatedDeformConv2dFunction.apply( - x, offset, mask, self.weight, self.bias, self.with_bias, - self.stride, self.padding, self.dilation, - self.groups, self.deformable_groups, - self.sort_index_for_npu_fp, - self.sort_index_for_npu_bp, - ) - - -DCNv2 = ModulatedDeformConv - -if __name__ == "__main__": - x = torch.randn(2, 32, 7, 7) - model = DCNv2(32, 32, 3, 2, 1) - - torch.npu.set_device(0) - x = x.npu() - model = model.npu() - - o = model(x) - l = o.sum() - l.backward() - print(l) +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.nn.modules.utils import _pair, _single +import math + + +class ModulatedDeformConv2dFunction(Function): + + @staticmethod + def forward(ctx, + input_tensor, + offset_ori, + mask, + weight, + bias=None, + with_bias=False, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + sort_index_for_npu_fp=None, + sort_index_for_npu_bp=None, + ): + + input_tensor = input_tensor.float() + offset_ori = offset_ori.float() + mask = mask.float() + + ctx.stride = stride + ctx.padding = padding + ctx.dilation = dilation + ctx.groups = groups + ctx.deformable_groups = deformable_groups + ctx.sort_index_for_npu_bp = sort_index_for_npu_bp + ctx.with_bias = with_bias + + offset = offset_ori.index_select(1, sort_index_for_npu_fp) + offset_all = torch.cat([offset, mask], dim=1) + output, offset_out = torch.npu_deformable_conv2d( + input_tensor, weight, offset_all, bias, + kernel_size=[weight.shape[3], weight.shape[2]], + stride=[1, 1, ctx.stride, ctx.stride], + padding=[ctx.padding, ctx.padding, ctx.padding, ctx.padding], + dilation=[1, 1, ctx.dilation, ctx.dilation], + groups=ctx.groups, deformable_groups=ctx.deformable_groups, + modulated=True) + if weight.requires_grad or mask.requires_grad or offset.requires_grad \ + or input_tensor.requires_grad: + ctx.save_for_backward(input_tensor, weight, offset_out, offset_all) + return output + + @staticmethod + def backward(ctx, grad_output): + input_tensor, weight, offset_out, offset_all = ctx.saved_tensors + grad_input, grad_weight, grad_offset_all, grad_bias = torch.npu_deformable_conv2dbk( + input_tensor, grad_output, offset_out, weight, offset_all, + kernel_size=[weight.shape[3], weight.shape[2]], + stride=[1, 1, ctx.stride, ctx.stride], + padding=[ctx.padding, ctx.padding, ctx.padding, ctx.padding], + dilation=[1, 1, ctx.dilation, ctx.dilation], + groups=ctx.groups, deformable_groups=ctx.deformable_groups, modulated=True) + grad_offset = grad_offset_all.index_select(1, ctx.sort_index_for_npu_bp) + grad_mask = grad_offset_all[:, grad_offset.shape[1]:, :, :] + if not ctx.with_bias: + grad_bias = None + + return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, + None, None, None, None, None, None, None, None) + + +class ModulatedDeformConv(nn.Module): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + bias=True, + pack=True, + ): + + r"""Applies an NPU based Modulated Deformable 2D convolution operation. + + Paper link: + [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168) + + Reference implementation link: + https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/modulated_deform_conv.py + + The implementation of this ModulatedDeformConv is mainly based + on the implementation of mmcv for design and reconstruction. + Through the modular ModulatedDeformConvFunction, the forward and reverse are customized, + and the input is reconstructed in combination with the NPU underlying operator IR, + and finally the function is completed. + + It is worth mentioning that deformconv (DCNv1) is also implemented + by setting modulated = False. Due to the difference between input + and initialization, there is no additional implementation here. + + + .. note:: + ModulatedDeformConv only implements operations under fp32 data types. + Notice, conv_ Weight and bias of offset must be initialized to 0. + + Args: + in_channels (int): Number of channels in the input image. + out_channels (int): Number of channels produced by the convolution. + kernel_size(int, tuple): Size of the convolving kernel. + stride(int, tuple): Stride of the convolution. Default: 1. + padding (int or tuple): Zero-padding added to both sides of the input. + Default: 0. + dilation (int or tuple): Spacing between kernel elements. Default: 1. + groups (int): Number of blocked connections from input. + channels to output channels. Default: 1. + deform_groups (int): Number of deformable group partitions. + bias (bool): If True, adds a learnable bias to the output. Default: False. + pack (bool): If True, conv_offset and mask will be included in this module. Default: True. + + Examples:: + >>> m = ModulatedDeformConv(32, 32, 1) + >>> input_tensor = torch.randn(2, 32, 5, 5) + >>> output = m(input_tensor) + """ + + super(ModulatedDeformConv, self).__init__() + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.deformable_groups = deformable_groups + self.with_bias = bias + self.pack = pack + + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_channels)) + else: + self.bias = torch.zeros(self.weight.shape[0]) + + if self.pack: + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deformable_groups * 3 * self.kernel_size[0] * + self.kernel_size[1], + kernel_size=self.kernel_size, + stride=_pair(self.stride), + padding=_pair(self.padding), + bias=True) + + self.split_num = self.deformable_groups * 2 * self.kernel_size[0] * self.kernel_size[1] + sort_index_for_npu = list(range(self.split_num)) + sort_index_for_npu_fp = sort_index_for_npu[1::2] + sort_index_for_npu[::2] + sort_index_for_npu_bp_dict = {i: idx for idx, i in enumerate(sort_index_for_npu_fp)} + sort_index_for_npu_bp = [sort_index_for_npu_bp_dict[i] for i in sort_index_for_npu] + self.sort_index_for_npu_fp = torch.IntTensor(sort_index_for_npu_fp) + self.sort_index_for_npu_bp = torch.IntTensor(sort_index_for_npu_bp) + self.sort_index_for_npu_todevice = False + + self.init_param() + + def init_param(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.zero_() + + if self.pack: + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + if self.pack: + out = self.conv_offset(x) + offset = out[:, :self.split_num, ...] + mask = torch.sigmoid(out[:, self.split_num:, ...]) + else: + x, offset, mask = x + + if not self.sort_index_for_npu_todevice: + self.sort_index_for_npu_fp = self.sort_index_for_npu_fp.to(x.device) + self.sort_index_for_npu_bp = self.sort_index_for_npu_bp.to(x.device) + self.bias = self.bias.to(x.device) + self.sort_index_for_npu_todevice = True + + return ModulatedDeformConv2dFunction.apply( + x, offset, mask, self.weight, self.bias, self.with_bias, + self.stride, self.padding, self.dilation, + self.groups, self.deformable_groups, + self.sort_index_for_npu_fp, + self.sort_index_for_npu_bp, + ) + + +DCNv2 = ModulatedDeformConv + +if __name__ == "__main__": + x = torch.randn(2, 32, 7, 7) + model = DCNv2(32, 32, 3, 2, 1) + + torch.npu.set_device(0) + x = x.npu() + model = model.npu() + + o = model(x) + l = o.sum() + l.backward() + print(l) diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/opts.py b/PyTorch/contrib/cv/detection/CenterNet/src/lib/opts.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/trains/base_trainer.py b/PyTorch/contrib/cv/detection/CenterNet/src/lib/trains/base_trainer.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/trains/ctdet.py b/PyTorch/contrib/cv/detection/CenterNet/src/lib/trains/ctdet.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/trains/ddd.py b/PyTorch/contrib/cv/detection/CenterNet/src/lib/trains/ddd.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/trains/exdet.py b/PyTorch/contrib/cv/detection/CenterNet/src/lib/trains/exdet.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/trains/multi_pose.py b/PyTorch/contrib/cv/detection/CenterNet/src/lib/trains/multi_pose.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/utils/__init__.py b/PyTorch/contrib/cv/detection/CenterNet/src/lib/utils/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/lib/utils/image.py b/PyTorch/contrib/cv/detection/CenterNet/src/lib/utils/image.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/main.py b/PyTorch/contrib/cv/detection/CenterNet/src/main.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/test.py b/PyTorch/contrib/cv/detection/CenterNet/src/test.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/src/tools/reval.py b/PyTorch/contrib/cv/detection/CenterNet/src/tools/reval.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/CenterNet/test/modelzoo_level.txt b/PyTorch/contrib/cv/detection/CenterNet/test/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/detection/CenterNet/test/modelzoo_level.txt +++ b/PyTorch/contrib/cv/detection/CenterNet/test/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/FCOS/README_raw.md b/PyTorch/contrib/cv/detection/FCOS/README_raw.md index 0b2c85262737aa6cae68a33d50ac113fd5603384..a58b943634f87935df007bbee3554ac38555064d 100644 --- a/PyTorch/contrib/cv/detection/FCOS/README_raw.md +++ b/PyTorch/contrib/cv/detection/FCOS/README_raw.md @@ -1,157 +1,157 @@ -# Warning -- 当前README为原生mmdetection自带的,请参考[README_raw.md](https://gitee.com/ascend/modelzoo/blob/master/contrib/PyTorch/Research/cv/image_object_detection/FCOS/README_raw.md)来进行FCOS模型训练 - - - -
- -
- -**News**: We released the technical report on [ArXiv](https://arxiv.org/abs/1906.07155). - -Documentation: https://mmdetection.readthedocs.io/ - -## Introduction - -MMDetection is an open source object detection toolbox based on PyTorch. It is -a part of the OpenMMLab project developed by [Multimedia Laboratory, CUHK](http://mmlab.ie.cuhk.edu.hk/). - -The master branch works with **PyTorch 1.3 to 1.6**. -The old v1.x branch works with PyTorch 1.1 to 1.4, but v2.0 is strongly recommended for faster speed, higher performance, better design and more friendly usage. - -![demo image](resources/coco_test_12510.jpg) - -### Major features - -- **Modular Design** - - We decompose the detection framework into different components and one can easily construct a customized object detection framework by combining different modules. - -- **Support of multiple frameworks out of box** - - The toolbox directly supports popular and contemporary detection frameworks, *e.g.* Faster RCNN, Mask RCNN, RetinaNet, etc. - -- **High efficiency** - - All basic bbox and mask operations run on GPUs. The training speed is faster than or comparable to other codebases, including [Detectron2](https://github.com/facebookresearch/detectron2), [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark) and [SimpleDet](https://github.com/TuSimple/simpledet). - -- **State of the art** - - The toolbox stems from the codebase developed by the *MMDet* team, who won [COCO Detection Challenge](http://cocodataset.org/#detection-leaderboard) in 2018, and we keep pushing it forward. - -Apart from MMDetection, we also released a library [mmcv](https://github.com/open-mmlab/mmcv) for computer vision research, which is heavily depended on by this toolbox. - -## License - -This project is released under the [Apache 2.0 license](LICENSE). - -## Changelog - -v2.6.0 was released in 1/11/2020. -Please refer to [changelog.md](docs/changelog.md) for details and release history. -A comparison between v1.x and v2.0 codebases can be found in [compatibility.md](docs/compatibility.md). - -## Benchmark and model zoo - -Results and models are available in the [model zoo](docs/model_zoo.md). - -Supported backbones: -- [x] ResNet -- [x] ResNeXt -- [x] VGG -- [x] HRNet -- [x] RegNet -- [x] Res2Net -- [x] ResNeSt - -Supported methods: -- [x] [RPN](configs/rpn) -- [x] [Fast R-CNN](configs/fast_rcnn) -- [x] [Faster R-CNN](configs/faster_rcnn) -- [x] [Mask R-CNN](configs/mask_rcnn) -- [x] [Cascade R-CNN](configs/cascade_rcnn) -- [x] [Cascade Mask R-CNN](configs/cascade_rcnn) -- [x] [SSD](configs/ssd) -- [x] [RetinaNet](configs/retinanet) -- [x] [GHM](configs/ghm) -- [x] [Mask Scoring R-CNN](configs/ms_rcnn) -- [x] [Double-Head R-CNN](configs/double_heads) -- [x] [Hybrid Task Cascade](configs/htc) -- [x] [Libra R-CNN](configs/libra_rcnn) -- [x] [Guided Anchoring](configs/guided_anchoring) -- [x] [FCOS](configs/fcos) -- [x] [RepPoints](configs/reppoints) -- [x] [Foveabox](configs/foveabox) -- [x] [FreeAnchor](configs/free_anchor) -- [x] [NAS-FPN](configs/nas_fpn) -- [x] [ATSS](configs/atss) -- [x] [FSAF](configs/fsaf) -- [x] [PAFPN](configs/pafpn) -- [x] [Dynamic R-CNN](configs/dynamic_rcnn) -- [x] [PointRend](configs/point_rend) -- [x] [CARAFE](configs/carafe/README.md) -- [x] [DCNv2](configs/dcn/README.md) -- [x] [Group Normalization](configs/gn/README.md) -- [x] [Weight Standardization](configs/gn+ws/README.md) -- [x] [OHEM](configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py) -- [x] [Soft-NMS](configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py) -- [x] [Generalized Attention](configs/empirical_attention/README.md) -- [x] [GCNet](configs/gcnet/README.md) -- [x] [Mixed Precision (FP16) Training](configs/fp16/README.md) -- [x] [InstaBoost](configs/instaboost/README.md) -- [x] [GRoIE](configs/groie/README.md) -- [x] [DetectoRS](configs/detectors/README.md) -- [x] [Generalized Focal Loss](configs/gfl/README.md) -- [x] [CornerNet](configs/cornernet/README.md) -- [x] [Side-Aware Boundary Localization](configs/sabl/README.md) -- [x] [YOLOv3](configs/yolo/README.md) -- [x] [PAA](configs/paa/README.md) -- [x] [YOLACT](configs/yolact/README.md) -- [x] [CentripetalNet](configs/centripetalnet/README.md) -- [x] [VFNet](configs/vfnet/README.md) - -Some other methods are also supported in [projects using MMDetection](./docs/projects.md). - -## Installation - -Please refer to [get_started.md](docs/get_started.md) for installation. - -## Getting Started - -Please see [get_started.md](docs/get_started.md) for the basic usage of MMDetection. -We provide [colab tutorial](demo/MMDet_Tutorial.ipynb), and full guidance for quick run [with existing dataset](docs/1_exist_data_model.md) and [with new dataset](docs/2_new_data_model.md) for beginners. -There are also tutorials for [finetuning models](docs/tutorials/finetune.md), [adding new dataset](docs/tutorials/new_dataset.md), [designing data pipeline](docs/tutorials/data_pipeline.md), [customizing models](docs/tutorials/customize_models.md), [customizing runtime settings](docs/tutorials/customize_runtime.md) and [useful tools](docs/useful_tools.md). - -Please refer to [FAQ](docs/faq.md) for frequently asked questions. - -## Contributing - -We appreciate all contributions to improve MMDetection. Please refer to [CONTRIBUTING.md](.github/CONTRIBUTING.md) for the contributing guideline. - -## Acknowledgement - -MMDetection is an open source project that is contributed by researchers and engineers from various colleges and companies. We appreciate all the contributors who implement their methods or add new features, as well as users who give valuable feedbacks. -We wish that the toolbox and benchmark could serve the growing research community by providing a flexible toolkit to reimplement existing methods and develop their own new detectors. - -## Citation - -If you use this toolbox or benchmark in your research, please cite this project. - -``` -@article{mmdetection, - title = {{MMDetection}: Open MMLab Detection Toolbox and Benchmark}, - author = {Chen, Kai and Wang, Jiaqi and Pang, Jiangmiao and Cao, Yuhang and - Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and - Liu, Ziwei and Xu, Jiarui and Zhang, Zheng and Cheng, Dazhi and - Zhu, Chenchen and Cheng, Tianheng and Zhao, Qijie and Li, Buyu and - Lu, Xin and Zhu, Rui and Wu, Yue and Dai, Jifeng and Wang, Jingdong - and Shi, Jianping and Ouyang, Wanli and Loy, Chen Change and Lin, Dahua}, - journal= {arXiv preprint arXiv:1906.07155}, - year={2019} -} -``` - -## Contact - -This repo is currently maintained by Kai Chen ([@hellock](http://github.com/hellock)), Yuhang Cao ([@yhcao6](https://github.com/yhcao6)), Wenwei Zhang ([@ZwwWayne](https://github.com/ZwwWayne)), -Jiarui Xu ([@xvjiarui](https://github.com/xvjiarui)). Other core developers include Jiangmiao Pang ([@OceanPang](https://github.com/OceanPang)) and Jiaqi Wang ([@myownskyW7](https://github.com/myownskyW7)). +# Warning +- 当前README为原生mmdetection自带的,请参考[README_raw.md](https://gitee.com/ascend/modelzoo/blob/master/contrib/PyTorch/Research/cv/image_object_detection/FCOS/README_raw.md)来进行FCOS模型训练 + + + +
+ +
+ +**News**: We released the technical report on [ArXiv](https://arxiv.org/abs/1906.07155). + +Documentation: https://mmdetection.readthedocs.io/ + +## Introduction + +MMDetection is an open source object detection toolbox based on PyTorch. It is +a part of the OpenMMLab project developed by [Multimedia Laboratory, CUHK](http://mmlab.ie.cuhk.edu.hk/). + +The master branch works with **PyTorch 1.3 to 1.6**. +The old v1.x branch works with PyTorch 1.1 to 1.4, but v2.0 is strongly recommended for faster speed, higher performance, better design and more friendly usage. + +![demo image](resources/coco_test_12510.jpg) + +### Major features + +- **Modular Design** + + We decompose the detection framework into different components and one can easily construct a customized object detection framework by combining different modules. + +- **Support of multiple frameworks out of box** + + The toolbox directly supports popular and contemporary detection frameworks, *e.g.* Faster RCNN, Mask RCNN, RetinaNet, etc. + +- **High efficiency** + + All basic bbox and mask operations run on GPUs. The training speed is faster than or comparable to other codebases, including [Detectron2](https://github.com/facebookresearch/detectron2), [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark) and [SimpleDet](https://github.com/TuSimple/simpledet). + +- **State of the art** + + The toolbox stems from the codebase developed by the *MMDet* team, who won [COCO Detection Challenge](http://cocodataset.org/#detection-leaderboard) in 2018, and we keep pushing it forward. + +Apart from MMDetection, we also released a library [mmcv](https://github.com/open-mmlab/mmcv) for computer vision research, which is heavily depended on by this toolbox. + +## License + +This project is released under the [Apache 2.0 license](LICENSE). + +## Changelog + +v2.6.0 was released in 1/11/2020. +Please refer to [changelog.md](docs/changelog.md) for details and release history. +A comparison between v1.x and v2.0 codebases can be found in [compatibility.md](docs/compatibility.md). + +## Benchmark and model zoo + +Results and models are available in the [model zoo](docs/model_zoo.md). + +Supported backbones: +- [x] ResNet +- [x] ResNeXt +- [x] VGG +- [x] HRNet +- [x] RegNet +- [x] Res2Net +- [x] ResNeSt + +Supported methods: +- [x] [RPN](configs/rpn) +- [x] [Fast R-CNN](configs/fast_rcnn) +- [x] [Faster R-CNN](configs/faster_rcnn) +- [x] [Mask R-CNN](configs/mask_rcnn) +- [x] [Cascade R-CNN](configs/cascade_rcnn) +- [x] [Cascade Mask R-CNN](configs/cascade_rcnn) +- [x] [SSD](configs/ssd) +- [x] [RetinaNet](configs/retinanet) +- [x] [GHM](configs/ghm) +- [x] [Mask Scoring R-CNN](configs/ms_rcnn) +- [x] [Double-Head R-CNN](configs/double_heads) +- [x] [Hybrid Task Cascade](configs/htc) +- [x] [Libra R-CNN](configs/libra_rcnn) +- [x] [Guided Anchoring](configs/guided_anchoring) +- [x] [FCOS](configs/fcos) +- [x] [RepPoints](configs/reppoints) +- [x] [Foveabox](configs/foveabox) +- [x] [FreeAnchor](configs/free_anchor) +- [x] [NAS-FPN](configs/nas_fpn) +- [x] [ATSS](configs/atss) +- [x] [FSAF](configs/fsaf) +- [x] [PAFPN](configs/pafpn) +- [x] [Dynamic R-CNN](configs/dynamic_rcnn) +- [x] [PointRend](configs/point_rend) +- [x] [CARAFE](configs/carafe/README.md) +- [x] [DCNv2](configs/dcn/README.md) +- [x] [Group Normalization](configs/gn/README.md) +- [x] [Weight Standardization](configs/gn+ws/README.md) +- [x] [OHEM](configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py) +- [x] [Soft-NMS](configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py) +- [x] [Generalized Attention](configs/empirical_attention/README.md) +- [x] [GCNet](configs/gcnet/README.md) +- [x] [Mixed Precision (FP16) Training](configs/fp16/README.md) +- [x] [InstaBoost](configs/instaboost/README.md) +- [x] [GRoIE](configs/groie/README.md) +- [x] [DetectoRS](configs/detectors/README.md) +- [x] [Generalized Focal Loss](configs/gfl/README.md) +- [x] [CornerNet](configs/cornernet/README.md) +- [x] [Side-Aware Boundary Localization](configs/sabl/README.md) +- [x] [YOLOv3](configs/yolo/README.md) +- [x] [PAA](configs/paa/README.md) +- [x] [YOLACT](configs/yolact/README.md) +- [x] [CentripetalNet](configs/centripetalnet/README.md) +- [x] [VFNet](configs/vfnet/README.md) + +Some other methods are also supported in [projects using MMDetection](./docs/projects.md). + +## Installation + +Please refer to [get_started.md](docs/get_started.md) for installation. + +## Getting Started + +Please see [get_started.md](docs/get_started.md) for the basic usage of MMDetection. +We provide [colab tutorial](demo/MMDet_Tutorial.ipynb), and full guidance for quick run [with existing dataset](docs/1_exist_data_model.md) and [with new dataset](docs/2_new_data_model.md) for beginners. +There are also tutorials for [finetuning models](docs/tutorials/finetune.md), [adding new dataset](docs/tutorials/new_dataset.md), [designing data pipeline](docs/tutorials/data_pipeline.md), [customizing models](docs/tutorials/customize_models.md), [customizing runtime settings](docs/tutorials/customize_runtime.md) and [useful tools](docs/useful_tools.md). + +Please refer to [FAQ](docs/faq.md) for frequently asked questions. + +## Contributing + +We appreciate all contributions to improve MMDetection. Please refer to [CONTRIBUTING.md](.github/CONTRIBUTING.md) for the contributing guideline. + +## Acknowledgement + +MMDetection is an open source project that is contributed by researchers and engineers from various colleges and companies. We appreciate all the contributors who implement their methods or add new features, as well as users who give valuable feedbacks. +We wish that the toolbox and benchmark could serve the growing research community by providing a flexible toolkit to reimplement existing methods and develop their own new detectors. + +## Citation + +If you use this toolbox or benchmark in your research, please cite this project. + +``` +@article{mmdetection, + title = {{MMDetection}: Open MMLab Detection Toolbox and Benchmark}, + author = {Chen, Kai and Wang, Jiaqi and Pang, Jiangmiao and Cao, Yuhang and + Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and + Liu, Ziwei and Xu, Jiarui and Zhang, Zheng and Cheng, Dazhi and + Zhu, Chenchen and Cheng, Tianheng and Zhao, Qijie and Li, Buyu and + Lu, Xin and Zhu, Rui and Wu, Yue and Dai, Jifeng and Wang, Jingdong + and Shi, Jianping and Ouyang, Wanli and Loy, Chen Change and Lin, Dahua}, + journal= {arXiv preprint arXiv:1906.07155}, + year={2019} +} +``` + +## Contact + +This repo is currently maintained by Kai Chen ([@hellock](http://github.com/hellock)), Yuhang Cao ([@yhcao6](https://github.com/yhcao6)), Wenwei Zhang ([@ZwwWayne](https://github.com/ZwwWayne)), +Jiarui Xu ([@xvjiarui](https://github.com/xvjiarui)). Other core developers include Jiangmiao Pang ([@OceanPang](https://github.com/OceanPang)) and Jiaqi Wang ([@myownskyW7](https://github.com/myownskyW7)). diff --git a/PyTorch/contrib/cv/detection/FCOS/infer/convert/fcos_aipp.cfg b/PyTorch/contrib/cv/detection/FCOS/infer/convert/fcos_aipp.cfg index 6ad5dc2c5988175025c112a796089f01eeb29bb5..73eb52fe14cd900e66427c5d0956cb43510577f5 100644 --- a/PyTorch/contrib/cv/detection/FCOS/infer/convert/fcos_aipp.cfg +++ b/PyTorch/contrib/cv/detection/FCOS/infer/convert/fcos_aipp.cfg @@ -1,16 +1,16 @@ -aipp_op { - aipp_mode: static - input_format: RGB888_U8 - related_input_rank: 0 - csc_switch: false - rbuv_swap_switch: false - - mean_chn_0: 103 - mean_chn_1: 116 - mean_chn_2: 123 - var_reci_chn_0: 1.0 - var_reci_chn_1: 1.0 - var_reci_chn_2: 1.0 - - -} +aipp_op { + aipp_mode: static + input_format: RGB888_U8 + related_input_rank: 0 + csc_switch: false + rbuv_swap_switch: false + + mean_chn_0: 103 + mean_chn_1: 116 + mean_chn_2: 123 + var_reci_chn_0: 1.0 + var_reci_chn_1: 1.0 + var_reci_chn_2: 1.0 + + +} diff --git a/PyTorch/contrib/cv/detection/FCOS/infer/docker_start_infer.sh b/PyTorch/contrib/cv/detection/FCOS/infer/docker_start_infer.sh index 07b61defd1489b9c21337b7d7ac6c911bf422e78..022ff222968145977df53c32a26d31fde5429d39 100644 --- a/PyTorch/contrib/cv/detection/FCOS/infer/docker_start_infer.sh +++ b/PyTorch/contrib/cv/detection/FCOS/infer/docker_start_infer.sh @@ -1,48 +1,48 @@ -#!/bin/bash - -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -docker_image=$1 -data_dir=$2 - -function show_help() { - echo "Usage: docker_start.sh docker_image data_dir" -} - -function param_check() { - if [ -z "${docker_image}" ]; then - echo "please input docker_image" - show_help - exit 1 - fi - - if [ -z "${data_dir}" ]; then - echo "please input data_dir" - show_help - exit 1 - fi -} - -param_check - -docker run -it \ - --device=/dev/davinci0 \ - --device=/dev/davinci_manager \ - --device=/dev/devmm_svm \ - --device=/dev/hisi_hdc \ - -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ - -v ${data_dir}:${data_dir} \ - ${docker_image} \ +#!/bin/bash + +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +docker_image=$1 +data_dir=$2 + +function show_help() { + echo "Usage: docker_start.sh docker_image data_dir" +} + +function param_check() { + if [ -z "${docker_image}" ]; then + echo "please input docker_image" + show_help + exit 1 + fi + + if [ -z "${data_dir}" ]; then + echo "please input data_dir" + show_help + exit 1 + fi +} + +param_check + +docker run -it \ + --device=/dev/davinci0 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm \ + --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v ${data_dir}:${data_dir} \ + ${docker_image} \ /bin/bash \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/FCOS/infer/mxbase/CMakeLists.txt b/PyTorch/contrib/cv/detection/FCOS/infer/mxbase/CMakeLists.txt index a690071accd34528776dc5ffe080fdb5a5b339b8..732aa7ab6d0b958959b598d0bc5e9efe19cba08e 100644 --- a/PyTorch/contrib/cv/detection/FCOS/infer/mxbase/CMakeLists.txt +++ b/PyTorch/contrib/cv/detection/FCOS/infer/mxbase/CMakeLists.txt @@ -1,52 +1,52 @@ -cmake_minimum_required(VERSION 3.14.0) -project(fcos) -set(TARGET fcos) - -add_definitions(-DENABLE_DVPP_INTERFACE) -add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) -add_definitions(-Dgoogle=mindxsdk_private) -add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall) -add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie) - -# Check environment variable -if(NOT DEFINED ENV{ASCEND_HOME}) - message(FATAL_ERROR "please define environment variable:ASCEND_HOME") -endif() -if(NOT DEFINED ENV{ASCEND_VERSION}) - message(WARNING "please define environment variable:ASCEND_VERSION") -endif() -if(NOT DEFINED ENV{ARCH_PATTERN}) - message(WARNING "please define environment variable:ARCH_PATTERN") -endif() - -set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include) -set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64) - -set(MXBASE_ROOT_DIR $ENV{MX_SDK_HOME}) -set(MXBASE_INC ${MXBASE_ROOT_DIR}/include) -set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/lib) -set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/lib/modelpostprocessors) -set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/include/MxBase/postprocess/include) - - -if(DEFINED ENV{MXSDK_OPENSOURCE_DIR}) - set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR}) -else() - set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource) -endif() -include_directories(${ACL_INC_DIR}) -include_directories(${OPENSOURCE_DIR}/include) -include_directories(${OPENSOURCE_DIR}/include/opencv4) - -include_directories(${MXBASE_INC}) -include_directories(${MXBASE_POST_PROCESS_DIR}) -link_directories(${ACL_LIB_DIR}) -link_directories(${OPENSOURCE_DIR}/lib) -link_directories(${MXBASE_LIB_DIR}) -link_directories(${MXBASE_POST_LIB_DIR}) - -add_executable(${TARGET} ./src/main.cpp ./src/fcos.cpp) - -target_link_libraries(${TARGET} glog cpprest mxbase opencv_world stdc++fs) - +cmake_minimum_required(VERSION 3.14.0) +project(fcos) +set(TARGET fcos) + +add_definitions(-DENABLE_DVPP_INTERFACE) +add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) +add_definitions(-Dgoogle=mindxsdk_private) +add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall) +add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie) + +# Check environment variable +if(NOT DEFINED ENV{ASCEND_HOME}) + message(FATAL_ERROR "please define environment variable:ASCEND_HOME") +endif() +if(NOT DEFINED ENV{ASCEND_VERSION}) + message(WARNING "please define environment variable:ASCEND_VERSION") +endif() +if(NOT DEFINED ENV{ARCH_PATTERN}) + message(WARNING "please define environment variable:ARCH_PATTERN") +endif() + +set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include) +set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64) + +set(MXBASE_ROOT_DIR $ENV{MX_SDK_HOME}) +set(MXBASE_INC ${MXBASE_ROOT_DIR}/include) +set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/lib) +set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/lib/modelpostprocessors) +set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/include/MxBase/postprocess/include) + + +if(DEFINED ENV{MXSDK_OPENSOURCE_DIR}) + set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR}) +else() + set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource) +endif() +include_directories(${ACL_INC_DIR}) +include_directories(${OPENSOURCE_DIR}/include) +include_directories(${OPENSOURCE_DIR}/include/opencv4) + +include_directories(${MXBASE_INC}) +include_directories(${MXBASE_POST_PROCESS_DIR}) +link_directories(${ACL_LIB_DIR}) +link_directories(${OPENSOURCE_DIR}/lib) +link_directories(${MXBASE_LIB_DIR}) +link_directories(${MXBASE_POST_LIB_DIR}) + +add_executable(${TARGET} ./src/main.cpp ./src/fcos.cpp) + +target_link_libraries(${TARGET} glog cpprest mxbase opencv_world stdc++fs) + install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/) \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/FCOS/infer/mxbase/src/fcos.cpp b/PyTorch/contrib/cv/detection/FCOS/infer/mxbase/src/fcos.cpp index 9a3594115852f4f89825c98cba0ea63aef54017f..7b88e0267325a6b52e3506d8eeb584f976ca7528 100644 --- a/PyTorch/contrib/cv/detection/FCOS/infer/mxbase/src/fcos.cpp +++ b/PyTorch/contrib/cv/detection/FCOS/infer/mxbase/src/fcos.cpp @@ -1,313 +1,313 @@ -/* - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the BSD 3-Clause License (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://opensource.org/licenses/BSD-3-Clause - - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "fcos.h" -#include "MxBase/DeviceManager/DeviceManager.h" -#include "MxBase/DvppWrapper/DvppWrapper.h" -#include "MxBase/Log/Log.h" - -using namespace MxBase; - -std::vector classes = {"person", "bicycle", "car", "motorcycle", "airplane", "bus", - "train", "truck", "boat", "traffic light", "fire hydrant", - "stop sign", "parking meter", "bench", "bird", "cat", "dog", - "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", - "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", - "skis", "snowboard", "sports ball", "kite", "baseball bat", - "baseball glove", "skateboard", "surfboard", "tennis racket", - "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", - "banana", "apple", "sandwich", "orange", "broccoli", "carrot", - "hot dog", "pizza", "donut", "cake", "chair", "couch", - "potted plant", "bed", "dining table", "toilet", "tv", "laptop", - "mouse", "remote", "keyboard", "cell phone", "microwave", - "oven", "toaster", "sink", "refrigerator", "book", "clock", - "vase", "scissors", "teddy bear", "hair drier", "toothbrush"}; - -APP_ERROR FCOS::Init(const InitParam& initParam) { - // Equipment initialization - APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); - if (ret != APP_ERR_OK) { - LogError << "Init devices failed, ret=" << ret << "."; - return ret; - } - // Context initialization - ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); - if (ret != APP_ERR_OK) { - LogError << "Set context failed, ret=" << ret << "."; - return ret; - } - - // Load model - model_ = std::make_shared(); - ret = model_->Init(initParam.modelPath, modelDesc_); - if (ret != APP_ERR_OK) { - LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; - return ret; - } - return APP_ERR_OK; -} - -APP_ERROR FCOS::DeInit() { - model_->DeInit(); - MxBase::DeviceManager::GetInstance()->DestroyDevices(); - return APP_ERR_OK; -} - -std::vector FCOS::GetFileList(const std::string &dirPath) { - /* - This function is getting data from dataset on the path. - - :param dirpath: a string of dataset path - :return: a collection of file paths - - */ - struct dirent *ptr; - DIR *dir = opendir(dirPath.c_str()); - std::vector files; - while ((ptr = readdir(dir)) != NULL) { - if (ptr->d_name[0] == '.') continue; - files.push_back(ptr->d_name); - } - closedir(dir); - return files; -} - -APP_ERROR FCOS::ReadImage(const std::string& imgPath, cv::Mat& imageMat, int& height, int& width) { - imageMat = cv::imread(imgPath, cv::IMREAD_COLOR); - width = imageMat.cols; - height = imageMat.rows; - - return APP_ERR_OK; -} - -APP_ERROR FCOS::ResizeImage(const cv::Mat& srcImageMat, cv::Mat& dstImageMat) { - float resizeHeight = 800; - float resizeWidth = 1333; - float scale = std::min(resizeWidth / srcImageMat.cols, resizeHeight / srcImageMat.rows); - int new_width = srcImageMat.cols * scale; - int new_height = srcImageMat.rows * scale; - const int average = 2; - int pad_w = resizeWidth - new_width; - int pad_h = resizeHeight - new_height; - int pad_left = pad_w / average; - int pad_right = pad_w - pad_left; - int pad_top = pad_h / average; - int pad_bottom = pad_h - pad_top; - - cv::resize(srcImageMat, dstImageMat, cv::Size(new_width,new_height), 0, 0, cv::INTER_CUBIC); //指定常量像素填充 - cv::copyMakeBorder(dstImageMat, dstImageMat, pad_top, pad_bottom, pad_left, pad_right, - cv::BorderTypes::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); - - return APP_ERR_OK; -} - -APP_ERROR FCOS::CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase) -{ - const uint32_t dataSize = imageMat.cols * imageMat.rows * YUV444_RGB_WIDTH_NU; - MemoryData memoryDataDst(dataSize, MemoryData::MEMORY_DEVICE, deviceId_); - MemoryData memoryDataSrc(imageMat.data, dataSize, MemoryData::MEMORY_HOST_MALLOC); - - APP_ERROR ret = MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc); - if (ret != APP_ERR_OK) { - LogError << GetError(ret) << "Memory malloc failed."; - return ret; - } - - std::vector shape = {imageMat.rows * YUV444_RGB_WIDTH_NU, static_cast(imageMat.cols)}; - tensorBase = TensorBase(memoryDataDst, false, shape, TENSOR_DTYPE_UINT8); - return APP_ERR_OK; -} - - -APP_ERROR FCOS::Inference(const std::vector& inputs, std::vector& outputs) { - auto dtypes = model_->GetOutputDataType(); - for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) { - std::vector shape = {}; - for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { - shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]); - } - TensorBase tensor(shape, dtypes[i], MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); - APP_ERROR ret = TensorBase::TensorBaseMalloc(tensor); - if (ret != APP_ERR_OK) { - LogError << "TensorBaseMalloc failed, ret=" << ret << "."; - return ret; - } - outputs.push_back(tensor); - } - DynamicInfo dynamicInfo = {}; - dynamicInfo.dynamicType = DynamicType::STATIC_BATCH; - APP_ERROR ret = model_->ModelInference(inputs, outputs, dynamicInfo); - if (ret != APP_ERR_OK) { - LogError << "ModelInference failed, ret=" << ret << "."; - return ret; - } - return APP_ERR_OK; -} - -APP_ERROR FCOS::PostProcess(const std::string& imgPath, std::vector& inputs, - const std::string &resultPath, int& height, int& width, const std::string& name, - std::string &showPath, float& PROB_THRES) { - MxBase::TensorBase& tensor = inputs[1]; //1*100 - int ret = tensor.ToHost(); - if (ret != APP_ERR_OK) { - LogError << GetError(ret) << "Tensor_1 deploy to host failed."; - return ret; - } - std::vector shape = tensor.GetShape(); - - auto labels = reinterpret_cast(tensor.GetBuffer()); //1*100 - - int label[100] = {0}; - for(int i = 0; i < 100; i++){ - label[i] = labels[i]; - } - - tensor = inputs[0]; //1*100*5 - ret = tensor.ToHost(); - if (ret != APP_ERR_OK) { - LogError << GetError(ret) << "Tensor_0 deploy to host failed."; - return ret; - } - - auto bbox = reinterpret_cast(tensor.GetBuffer()); - - // get infer coordinates - float image_size_w = width; - float image_size_h = height; - float net_input_width = 1333; - float net_input_height = 800; - float scale = std::min(net_input_width / image_size_w, net_input_height / image_size_h); - - int pad_w = net_input_width - image_size_w * scale; - int pad_h = net_input_height - image_size_h * scale; - int pad_left = pad_w / 2; - int pad_right = pad_w -pad_left; - int pad_top = pad_h / 2; - int pad_bottom = pad_h -pad_top; - - float prob_thres = PROB_THRES; - float ppbox[100][5] = {0}; - - for (int j = 0; j < 100; j++) { - ppbox[j][0] = (bbox[j][0]-pad_left)/scale; - ppbox[j][1] = (bbox[j][1]-pad_top)/scale; - ppbox[j][2] = (bbox[j][2]-pad_right)/scale; - ppbox[j][3] = (bbox[j][3]-pad_bottom)/scale; - ppbox[j][4] = bbox[j][4]; - - if (ppbox[j][0]<0) ppbox[j][0] = 0; - if (ppbox[j][1]<0) ppbox[j][1] = 0; - if (ppbox[j][2]>image_size_w) ppbox[j][2] = image_size_w; - if (ppbox[j][3]>image_size_h) ppbox[j][3] = image_size_h; - - } - - std::ofstream out(resultPath); - cv::Mat imgCur = cv::imread(imgPath); - for (int j = 0;j<100;j++) { - if (float(ppbox[j][4])80 ) { - continue; - } - - std::string class_name = classes[int(label[j])]; - std::string det_results_str = ""; - std::ostringstream oss; - oss< dirFileList = GetFileList(dirPath); - std::vector names, paths; - int i = 0; - for (auto imgFile : dirFileList) { - std::string imgPath = dirPath + "/" + imgFile; - std::string name = imgFile.substr(0, imgFile.find(".")); - std::string subresultPath = resultPath+"/"+name+".txt"; - cv::Mat imageMat; - int height, width; - APP_ERROR ret = ReadImage(imgPath, imageMat, height, width); - if (ret != APP_ERR_OK) { - LogError << "ReadImage failed, ret=" << ret << "."; - return ret; - } - ResizeImage(imageMat, imageMat); - TensorBase tensorBase; - ret = CVMatToTensorBase(imageMat, tensorBase); - if (ret != APP_ERR_OK) { - LogError << "CVMatToTensorBase failed, ret=" << ret << "."; - return ret; - } - std::vector inputs = {}; - std::vector outputs = {}; - inputs.push_back(tensorBase); - ret = Inference(inputs, outputs); - if (ret != APP_ERR_OK) { - LogError << "Inference failed, ret=" << ret << "."; - return ret; - } - ret = PostProcess(imgPath, outputs, subresultPath, height, width,name,showPath,PROB_THRES); - if (ret != APP_ERR_OK) { - LogError << "PostProcess failed, ret=" << ret << "."; - return ret; - } - i++; - LogInfo< +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "fcos.h" +#include "MxBase/DeviceManager/DeviceManager.h" +#include "MxBase/DvppWrapper/DvppWrapper.h" +#include "MxBase/Log/Log.h" + +using namespace MxBase; + +std::vector classes = {"person", "bicycle", "car", "motorcycle", "airplane", "bus", + "train", "truck", "boat", "traffic light", "fire hydrant", + "stop sign", "parking meter", "bench", "bird", "cat", "dog", + "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", + "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", + "baseball glove", "skateboard", "surfboard", "tennis racket", + "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", + "banana", "apple", "sandwich", "orange", "broccoli", "carrot", + "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", + "mouse", "remote", "keyboard", "cell phone", "microwave", + "oven", "toaster", "sink", "refrigerator", "book", "clock", + "vase", "scissors", "teddy bear", "hair drier", "toothbrush"}; + +APP_ERROR FCOS::Init(const InitParam& initParam) { + // Equipment initialization + APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); + if (ret != APP_ERR_OK) { + LogError << "Init devices failed, ret=" << ret << "."; + return ret; + } + // Context initialization + ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); + if (ret != APP_ERR_OK) { + LogError << "Set context failed, ret=" << ret << "."; + return ret; + } + + // Load model + model_ = std::make_shared(); + ret = model_->Init(initParam.modelPath, modelDesc_); + if (ret != APP_ERR_OK) { + LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR FCOS::DeInit() { + model_->DeInit(); + MxBase::DeviceManager::GetInstance()->DestroyDevices(); + return APP_ERR_OK; +} + +std::vector FCOS::GetFileList(const std::string &dirPath) { + /* + This function is getting data from dataset on the path. + + :param dirpath: a string of dataset path + :return: a collection of file paths + + */ + struct dirent *ptr; + DIR *dir = opendir(dirPath.c_str()); + std::vector files; + while ((ptr = readdir(dir)) != NULL) { + if (ptr->d_name[0] == '.') continue; + files.push_back(ptr->d_name); + } + closedir(dir); + return files; +} + +APP_ERROR FCOS::ReadImage(const std::string& imgPath, cv::Mat& imageMat, int& height, int& width) { + imageMat = cv::imread(imgPath, cv::IMREAD_COLOR); + width = imageMat.cols; + height = imageMat.rows; + + return APP_ERR_OK; +} + +APP_ERROR FCOS::ResizeImage(const cv::Mat& srcImageMat, cv::Mat& dstImageMat) { + float resizeHeight = 800; + float resizeWidth = 1333; + float scale = std::min(resizeWidth / srcImageMat.cols, resizeHeight / srcImageMat.rows); + int new_width = srcImageMat.cols * scale; + int new_height = srcImageMat.rows * scale; + const int average = 2; + int pad_w = resizeWidth - new_width; + int pad_h = resizeHeight - new_height; + int pad_left = pad_w / average; + int pad_right = pad_w - pad_left; + int pad_top = pad_h / average; + int pad_bottom = pad_h - pad_top; + + cv::resize(srcImageMat, dstImageMat, cv::Size(new_width,new_height), 0, 0, cv::INTER_CUBIC); //指定常量像素填充 + cv::copyMakeBorder(dstImageMat, dstImageMat, pad_top, pad_bottom, pad_left, pad_right, + cv::BorderTypes::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); + + return APP_ERR_OK; +} + +APP_ERROR FCOS::CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase) +{ + const uint32_t dataSize = imageMat.cols * imageMat.rows * YUV444_RGB_WIDTH_NU; + MemoryData memoryDataDst(dataSize, MemoryData::MEMORY_DEVICE, deviceId_); + MemoryData memoryDataSrc(imageMat.data, dataSize, MemoryData::MEMORY_HOST_MALLOC); + + APP_ERROR ret = MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Memory malloc failed."; + return ret; + } + + std::vector shape = {imageMat.rows * YUV444_RGB_WIDTH_NU, static_cast(imageMat.cols)}; + tensorBase = TensorBase(memoryDataDst, false, shape, TENSOR_DTYPE_UINT8); + return APP_ERR_OK; +} + + +APP_ERROR FCOS::Inference(const std::vector& inputs, std::vector& outputs) { + auto dtypes = model_->GetOutputDataType(); + for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) { + std::vector shape = {}; + for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { + shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]); + } + TensorBase tensor(shape, dtypes[i], MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); + APP_ERROR ret = TensorBase::TensorBaseMalloc(tensor); + if (ret != APP_ERR_OK) { + LogError << "TensorBaseMalloc failed, ret=" << ret << "."; + return ret; + } + outputs.push_back(tensor); + } + DynamicInfo dynamicInfo = {}; + dynamicInfo.dynamicType = DynamicType::STATIC_BATCH; + APP_ERROR ret = model_->ModelInference(inputs, outputs, dynamicInfo); + if (ret != APP_ERR_OK) { + LogError << "ModelInference failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR FCOS::PostProcess(const std::string& imgPath, std::vector& inputs, + const std::string &resultPath, int& height, int& width, const std::string& name, + std::string &showPath, float& PROB_THRES) { + MxBase::TensorBase& tensor = inputs[1]; //1*100 + int ret = tensor.ToHost(); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Tensor_1 deploy to host failed."; + return ret; + } + std::vector shape = tensor.GetShape(); + + auto labels = reinterpret_cast(tensor.GetBuffer()); //1*100 + + int label[100] = {0}; + for(int i = 0; i < 100; i++){ + label[i] = labels[i]; + } + + tensor = inputs[0]; //1*100*5 + ret = tensor.ToHost(); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Tensor_0 deploy to host failed."; + return ret; + } + + auto bbox = reinterpret_cast(tensor.GetBuffer()); + + // get infer coordinates + float image_size_w = width; + float image_size_h = height; + float net_input_width = 1333; + float net_input_height = 800; + float scale = std::min(net_input_width / image_size_w, net_input_height / image_size_h); + + int pad_w = net_input_width - image_size_w * scale; + int pad_h = net_input_height - image_size_h * scale; + int pad_left = pad_w / 2; + int pad_right = pad_w -pad_left; + int pad_top = pad_h / 2; + int pad_bottom = pad_h -pad_top; + + float prob_thres = PROB_THRES; + float ppbox[100][5] = {0}; + + for (int j = 0; j < 100; j++) { + ppbox[j][0] = (bbox[j][0]-pad_left)/scale; + ppbox[j][1] = (bbox[j][1]-pad_top)/scale; + ppbox[j][2] = (bbox[j][2]-pad_right)/scale; + ppbox[j][3] = (bbox[j][3]-pad_bottom)/scale; + ppbox[j][4] = bbox[j][4]; + + if (ppbox[j][0]<0) ppbox[j][0] = 0; + if (ppbox[j][1]<0) ppbox[j][1] = 0; + if (ppbox[j][2]>image_size_w) ppbox[j][2] = image_size_w; + if (ppbox[j][3]>image_size_h) ppbox[j][3] = image_size_h; + + } + + std::ofstream out(resultPath); + cv::Mat imgCur = cv::imread(imgPath); + for (int j = 0;j<100;j++) { + if (float(ppbox[j][4])80 ) { + continue; + } + + std::string class_name = classes[int(label[j])]; + std::string det_results_str = ""; + std::ostringstream oss; + oss< dirFileList = GetFileList(dirPath); + std::vector names, paths; + int i = 0; + for (auto imgFile : dirFileList) { + std::string imgPath = dirPath + "/" + imgFile; + std::string name = imgFile.substr(0, imgFile.find(".")); + std::string subresultPath = resultPath+"/"+name+".txt"; + cv::Mat imageMat; + int height, width; + APP_ERROR ret = ReadImage(imgPath, imageMat, height, width); + if (ret != APP_ERR_OK) { + LogError << "ReadImage failed, ret=" << ret << "."; + return ret; + } + ResizeImage(imageMat, imageMat); + TensorBase tensorBase; + ret = CVMatToTensorBase(imageMat, tensorBase); + if (ret != APP_ERR_OK) { + LogError << "CVMatToTensorBase failed, ret=" << ret << "."; + return ret; + } + std::vector inputs = {}; + std::vector outputs = {}; + inputs.push_back(tensorBase); + ret = Inference(inputs, outputs); + if (ret != APP_ERR_OK) { + LogError << "Inference failed, ret=" << ret << "."; + return ret; + } + ret = PostProcess(imgPath, outputs, subresultPath, height, width,name,showPath,PROB_THRES); + if (ret != APP_ERR_OK) { + LogError << "PostProcess failed, ret=" << ret << "."; + return ret; + } + i++; + LogInfo< -#include "MxBase/ModelInfer/ModelInferenceProcessor.h" -#include "MxBase/PostProcessBases/PostProcessDataType.h" -#include "MxBase/Tensor/TensorContext/TensorContext.h" - -struct InitParam { - uint32_t deviceId; - std::string modelPath; -}; - -class FCOS { -public: - - APP_ERROR Init(const InitParam &initParam); - APP_ERROR DeInit(); - APP_ERROR ReadImage(const std::string &imgPath, cv::Mat &imageMat, int& height, int& width); - APP_ERROR ResizeImage(const cv::Mat &srcImageMat, cv::Mat &dstImageMat); - APP_ERROR CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase); - APP_ERROR VectorToTensorBase(int* transMat, MxBase::TensorBase& tensorBase); - APP_ERROR Inference(const std::vector &inputs, std::vector &outputs); - APP_ERROR PostProcess(const std::string& imgPath, std::vector &inputs, - const std::string &subresultPath,int& height, int& width,const std::string& name, - std::string &showPath,float& PROB_THRES); - APP_ERROR Process(const std::string &dirPath, std::string &resultPath,std::string &showPath,float& PROB_THRES); - -private: - std::shared_ptr model_; - std::vector GetFileList(const std::string &dirPath); - MxBase::ModelDesc modelDesc_; - const int device_id = 0; - uint32_t deviceId_ = device_id; -}; -#endif +/* + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the BSD 3-Clause License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSD-3-Clause + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FCOS_H +#define FCOS_H + +#include +#include "MxBase/ModelInfer/ModelInferenceProcessor.h" +#include "MxBase/PostProcessBases/PostProcessDataType.h" +#include "MxBase/Tensor/TensorContext/TensorContext.h" + +struct InitParam { + uint32_t deviceId; + std::string modelPath; +}; + +class FCOS { +public: + + APP_ERROR Init(const InitParam &initParam); + APP_ERROR DeInit(); + APP_ERROR ReadImage(const std::string &imgPath, cv::Mat &imageMat, int& height, int& width); + APP_ERROR ResizeImage(const cv::Mat &srcImageMat, cv::Mat &dstImageMat); + APP_ERROR CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase); + APP_ERROR VectorToTensorBase(int* transMat, MxBase::TensorBase& tensorBase); + APP_ERROR Inference(const std::vector &inputs, std::vector &outputs); + APP_ERROR PostProcess(const std::string& imgPath, std::vector &inputs, + const std::string &subresultPath,int& height, int& width,const std::string& name, + std::string &showPath,float& PROB_THRES); + APP_ERROR Process(const std::string &dirPath, std::string &resultPath,std::string &showPath,float& PROB_THRES); + +private: + std::shared_ptr model_; + std::vector GetFileList(const std::string &dirPath); + MxBase::ModelDesc modelDesc_; + const int device_id = 0; + uint32_t deviceId_ = device_id; +}; +#endif diff --git a/PyTorch/contrib/cv/detection/FCOS/infer/mxbase/src/main.cpp b/PyTorch/contrib/cv/detection/FCOS/infer/mxbase/src/main.cpp index 6acf366700e8ece4c5653fe7204001ad31124e7b..d3a1d13c09e80f31f455ee95eb74d134c66c1a07 100644 --- a/PyTorch/contrib/cv/detection/FCOS/infer/mxbase/src/main.cpp +++ b/PyTorch/contrib/cv/detection/FCOS/infer/mxbase/src/main.cpp @@ -1,45 +1,45 @@ -/* - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the BSD 3-Clause License (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://opensource.org/licenses/BSD-3-Clause - - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "MxBase/Log/Log.h" -#include "fcos.h" - -int main() { - InitParam initParam = {}; - initParam.deviceId = 0; - initParam.modelPath = "../../data/model/fcos.om"; - FCOS fcos; - APP_ERROR ret = fcos.Init(initParam); - if (ret != APP_ERR_OK) { - LogError << "FCOS init failed, ret=" << ret << "."; - return ret; - } - - std::string imgPath = "../../data/input/COCO2017/val2017"; - std::string resultPath = "../data/infer_result"; - std::string showPath = "../data/show_result"; - float PROB_THRES = 0.05; - - ret = fcos.Process(imgPath,resultPath,showPath,PROB_THRES); - if (ret != APP_ERR_OK) { - LogError << "FCOS process failed, ret=" << ret << "."; - fcos.DeInit(); - return ret; - } - - fcos.DeInit(); - return APP_ERR_OK; -} +/* + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the BSD 3-Clause License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSD-3-Clause + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MxBase/Log/Log.h" +#include "fcos.h" + +int main() { + InitParam initParam = {}; + initParam.deviceId = 0; + initParam.modelPath = "../../data/model/fcos.om"; + FCOS fcos; + APP_ERROR ret = fcos.Init(initParam); + if (ret != APP_ERR_OK) { + LogError << "FCOS init failed, ret=" << ret << "."; + return ret; + } + + std::string imgPath = "../../data/input/COCO2017/val2017"; + std::string resultPath = "../data/infer_result"; + std::string showPath = "../data/show_result"; + float PROB_THRES = 0.05; + + ret = fcos.Process(imgPath,resultPath,showPath,PROB_THRES); + if (ret != APP_ERR_OK) { + LogError << "FCOS process failed, ret=" << ret << "."; + fcos.DeInit(); + return ret; + } + + fcos.DeInit(); + return APP_ERR_OK; +} diff --git a/PyTorch/contrib/cv/detection/FCOS/infer/sdk/fcos_opencv_all.py b/PyTorch/contrib/cv/detection/FCOS/infer/sdk/fcos_opencv_all.py index 2108426f9fe5c4dd60f15408878161638ec5640d..92551e0984479401b7c5ba901816f76fc2854625 100644 --- a/PyTorch/contrib/cv/detection/FCOS/infer/sdk/fcos_opencv_all.py +++ b/PyTorch/contrib/cv/detection/FCOS/infer/sdk/fcos_opencv_all.py @@ -1,289 +1,289 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import argparse -import base64 -import json -import os - -import cv2 -import numpy as np -from StreamManagerApi import MxDataInput -from StreamManagerApi import StreamManagerApi -from skimage import io - - -def parse_arg(): - parser = argparse.ArgumentParser(description="FCOS infer") - parser.add_argument("--dataset", default="../data/input/COCO2017/val2017") - parser.add_argument("--pipeline", default="../data/config/fcos.pipeline") - parser.add_argument("--test_annotation", - default="../data/input/COCO2017/coco2017.info") - parser.add_argument("--det_results_path", default="./data/infer_result") - parser.add_argument("--show_results_path", default="./data/show_result") - parser.add_argument("--net_input_width", default=1333) - parser.add_argument("--net_input_height", default=800) - parser.add_argument("--prob_thres", default=0.05) - parser.add_argument( - "--ifShowDetObj", - default="True", - action="store_true", - help="if input the para means True, neither False.") - - flags = parser.parse_args() - return flags - - -def get_dataset(path): - """ - This function is getting data from dataset on the path. - - :param path: a string of dataset path - - """ - for root, dirs, files in os.walk(path): - for file_name in files: - if file_name.endswith('jpg') or file_name.endswith('JPG'): - yield os.path.join(path, file_name) - break - - -def get_stream_manager(pipeline_path): - """ - This function is using stream_manager_api. - - :param pipeline_path: a string of pipeline path - :return: a stream manager - - """ - stream_manager_api = StreamManagerApi() - ret = stream_manager_api.InitManager() - if ret != 0: - print("Failed to init Stream manager, ret=%s" % str(ret)) - exit() - - with open(pipeline_path, 'rb') as f: - pipeline_content = f.read() - - ret = stream_manager_api.CreateMultipleStreams(pipeline_content) - if ret != 0: - print("Failed to create stream, ret=%s" % str(ret)) - exit() - return stream_manager_api - - -def do_infer_image(stream_manager_api, image_path): - """ - This function is executing the inference of images. - - :param stream_manager_api: a stream manager - :param image_path: a string of image path - :return: bbox, labels - - bbox,labels: (1,100,5),(1,100) - The model has two output tensors: - bbox:(x0, y0, x1, y1,confidence) - #the upper left and lower right coordinates of the detection boxes - labels: probability of 80 classes - """ - stream_name = b'im_fcos' - data_input = MxDataInput() - with open(image_path, 'rb') as f: - data_input.data = f.read() - - unique_id = stream_manager_api.SendData(stream_name, 0, data_input) - if unique_id < 0: - print("Failed to send data to stream.") - exit() - - infer_result = stream_manager_api.GetResult(stream_name, unique_id) - if infer_result.errorCode != 0: - print(f"GetResult error. errorCode={infer_result.errorCode}," - f"errorMsg={infer_result.data.decode()}") - exit() - - infer_result_json = json.loads(infer_result.data.decode()) - content = json.loads(infer_result_json['metaData'][0]['content']) - # print the infer result - print(infer_result.data.decode()) - infer_result_json = json.loads(infer_result.data.decode()) - content = json.loads(infer_result_json['metaData'][0]['content']) - tensor_vec = content['tensorPackageVec'][0]['tensorVec'][0] - data_str = tensor_vec['dataStr'] - tensor_shape = tensor_vec['tensorShape'] - bbox = np.frombuffer(base64.b64decode(data_str), dtype=np.float32) - bbox = np.reshape(bbox, tensor_shape[1:]) - # [bbox,labels] (1,100,5);(1,100) - - print("---------------------------bbox---------------------------") - print(bbox) - print() - print(bbox.shape) - print("-----------------------------------------------------------------") - - tensor_vec = content['tensorPackageVec'][0]['tensorVec'][1] - data_str = tensor_vec['dataStr'] - tensor_shape = tensor_vec['tensorShape'] - labels = np.frombuffer(base64.b64decode(data_str), dtype=np.int64) - labels = np.reshape(labels, tensor_shape[1:]) - print("---------------------------labels---------------------------") - print(labels) - print() - print(labels.shape) - print("-----------------------------------------------------------------") - return bbox, labels - - -CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', - 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] - - -def coco_postprocess(bbox: np.ndarray, image_size, - net_input_width, net_input_height): - """ - This function is postprocessing for FCOS output. - - Before calling this function, reshape the raw output of FCOS to - following form - numpy.ndarray: - [x0, y0, x1, y1, confidence, probability of 80 classes] - shape: (100,) - The postprocessing restore the bounding rectangles of FCOS output - to origin scale and filter with non-maximum suppression. - - :param bbox: a numpy array of the FCOS output - :param image_path: a string of image path - :return: three list for best bound, class and score - """ - w = image_size[0] - h = image_size[1] - scale = min(net_input_width / w, net_input_height / h) - - pad_w = net_input_width - w * scale - pad_h = net_input_height - h * scale - pad_left = pad_w // 2 - pad_top = pad_h // 2 - - # cal predict box on the image src - pbox = bbox - - pbox[:, 0] = (bbox[:, 0] - pad_left) / scale - pbox[:, 1] = (bbox[:, 1] - pad_top) / scale - pbox[:, 2] = (bbox[:, 2] - pad_left) / scale - pbox[:, 3] = (bbox[:, 3] - pad_top) / scale - - # make pbboxes value in valid range - pbox[:, 0] = np.maximum(pbox[:, 0], 0) - pbox[:, 1] = np.maximum(pbox[:, 1], 0) - pbox[:, 2] = np.minimum(pbox[:, 2], w) - pbox[:, 3] = np.minimum(pbox[:, 3], h) - - return pbox - - -def main(args): - i = 0 - path = args.dataset - print(args.ifShowDetObj, type(args.ifShowDetObj)) - det_results_path = args.det_results_path - show_results_path = args.show_results_path - os.makedirs(det_results_path, exist_ok=True) - os.makedirs(show_results_path, exist_ok=True) - stream_manager_api = get_stream_manager(args.pipeline) - img_size_dict = dict() - with open(args.test_annotation)as f: - for line in f.readlines(): - temp = line.split(" ") - img_file_path = temp[1] - img_name = temp[1].split("/")[-1].split(".")[0] - img_width = int(temp[2]) - img_height = int(temp[3]) - img_size_dict[img_name] = (img_width, img_height, img_file_path) - - for img_path in get_dataset(path): - image_1 = io.imread(img_path) - if len(image_1.shape) == 3: - if image_1.shape[2] != 3: - continue - file_name1 = os.path.basename(img_path) - file_name = file_name1.split('.')[0] - print(file_name1) - delete_img_name = ['000000374551.jpg', '000000003661.jpg', - '000000309391.jpg', '000000070254.jpg'] - if file_name1 in delete_img_name: - continue - - bbox, labels = do_infer_image(stream_manager_api, img_path) - - res_buff = [] - res_buff.append(bbox) - labels = np.reshape(labels, [100, 1]) - res_buff.append(labels) - res_tensor = np.concatenate(res_buff, axis=1) - current_img_size = img_size_dict[file_name] - print("[TEST]---------------------------concat{} imgsize{}".format( - len(res_tensor), current_img_size)) - predbox = coco_postprocess( - res_tensor, current_img_size, args.net_input_width, args.net_input_height) - - if args.ifShowDetObj == True: - imgCur = cv2.imread(img_path) - - det_results_str = '' - for idx, class_ind in enumerate(predbox[:, 5]): - if float(predbox[idx][4]) < float(args.prob_thres): - continue - # skip negative class index - if class_ind < 0 or class_ind > 80: - continue - - class_name = CLASSES[int(class_ind)] - det_results_str += "{} {} {} {} {} {}\n".format(class_name, str(predbox[idx][4]), predbox[idx][0], - predbox[idx][1], predbox[idx][2], predbox[idx][3]) - if args.ifShowDetObj == True: - imgCur = cv2.rectangle(imgCur, (int(predbox[idx][0]), int(predbox[idx][1])), - (int(predbox[idx][2]), int(predbox[idx][3])), (0, 255, 0), 1) - imgCur = cv2.putText(imgCur, class_name+'|'+str(predbox[idx][4]), - (int(predbox[idx][0]), int(predbox[idx][1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) - # Image, text content, coordinates, font, size, color and font thickness. - - if args.ifShowDetObj == True: - print(os.path.join(show_results_path, file_name + '.jpg')) - cv2.imwrite(os.path.join(show_results_path, file_name + - '.jpg'), imgCur, [int(cv2.IMWRITE_JPEG_QUALITY), 70]) - - det_results_file = os.path.join(det_results_path, file_name + ".txt") - with open(det_results_file, "w") as detf: - detf.write(det_results_str) - print(det_results_str) - i = i+1 - print(i) - - stream_manager_api.DestroyAllStreams() - - -if __name__ == "__main__": - args = parse_arg() - main(args) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import base64 +import json +import os + +import cv2 +import numpy as np +from StreamManagerApi import MxDataInput +from StreamManagerApi import StreamManagerApi +from skimage import io + + +def parse_arg(): + parser = argparse.ArgumentParser(description="FCOS infer") + parser.add_argument("--dataset", default="../data/input/COCO2017/val2017") + parser.add_argument("--pipeline", default="../data/config/fcos.pipeline") + parser.add_argument("--test_annotation", + default="../data/input/COCO2017/coco2017.info") + parser.add_argument("--det_results_path", default="./data/infer_result") + parser.add_argument("--show_results_path", default="./data/show_result") + parser.add_argument("--net_input_width", default=1333) + parser.add_argument("--net_input_height", default=800) + parser.add_argument("--prob_thres", default=0.05) + parser.add_argument( + "--ifShowDetObj", + default="True", + action="store_true", + help="if input the para means True, neither False.") + + flags = parser.parse_args() + return flags + + +def get_dataset(path): + """ + This function is getting data from dataset on the path. + + :param path: a string of dataset path + + """ + for root, dirs, files in os.walk(path): + for file_name in files: + if file_name.endswith('jpg') or file_name.endswith('JPG'): + yield os.path.join(path, file_name) + break + + +def get_stream_manager(pipeline_path): + """ + This function is using stream_manager_api. + + :param pipeline_path: a string of pipeline path + :return: a stream manager + + """ + stream_manager_api = StreamManagerApi() + ret = stream_manager_api.InitManager() + if ret != 0: + print("Failed to init Stream manager, ret=%s" % str(ret)) + exit() + + with open(pipeline_path, 'rb') as f: + pipeline_content = f.read() + + ret = stream_manager_api.CreateMultipleStreams(pipeline_content) + if ret != 0: + print("Failed to create stream, ret=%s" % str(ret)) + exit() + return stream_manager_api + + +def do_infer_image(stream_manager_api, image_path): + """ + This function is executing the inference of images. + + :param stream_manager_api: a stream manager + :param image_path: a string of image path + :return: bbox, labels + + bbox,labels: (1,100,5),(1,100) + The model has two output tensors: + bbox:(x0, y0, x1, y1,confidence) + #the upper left and lower right coordinates of the detection boxes + labels: probability of 80 classes + """ + stream_name = b'im_fcos' + data_input = MxDataInput() + with open(image_path, 'rb') as f: + data_input.data = f.read() + + unique_id = stream_manager_api.SendData(stream_name, 0, data_input) + if unique_id < 0: + print("Failed to send data to stream.") + exit() + + infer_result = stream_manager_api.GetResult(stream_name, unique_id) + if infer_result.errorCode != 0: + print(f"GetResult error. errorCode={infer_result.errorCode}," + f"errorMsg={infer_result.data.decode()}") + exit() + + infer_result_json = json.loads(infer_result.data.decode()) + content = json.loads(infer_result_json['metaData'][0]['content']) + # print the infer result + print(infer_result.data.decode()) + infer_result_json = json.loads(infer_result.data.decode()) + content = json.loads(infer_result_json['metaData'][0]['content']) + tensor_vec = content['tensorPackageVec'][0]['tensorVec'][0] + data_str = tensor_vec['dataStr'] + tensor_shape = tensor_vec['tensorShape'] + bbox = np.frombuffer(base64.b64decode(data_str), dtype=np.float32) + bbox = np.reshape(bbox, tensor_shape[1:]) + # [bbox,labels] (1,100,5);(1,100) + + print("---------------------------bbox---------------------------") + print(bbox) + print() + print(bbox.shape) + print("-----------------------------------------------------------------") + + tensor_vec = content['tensorPackageVec'][0]['tensorVec'][1] + data_str = tensor_vec['dataStr'] + tensor_shape = tensor_vec['tensorShape'] + labels = np.frombuffer(base64.b64decode(data_str), dtype=np.int64) + labels = np.reshape(labels, tensor_shape[1:]) + print("---------------------------labels---------------------------") + print(labels) + print() + print(labels.shape) + print("-----------------------------------------------------------------") + return bbox, labels + + +CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', + 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] + + +def coco_postprocess(bbox: np.ndarray, image_size, + net_input_width, net_input_height): + """ + This function is postprocessing for FCOS output. + + Before calling this function, reshape the raw output of FCOS to + following form + numpy.ndarray: + [x0, y0, x1, y1, confidence, probability of 80 classes] + shape: (100,) + The postprocessing restore the bounding rectangles of FCOS output + to origin scale and filter with non-maximum suppression. + + :param bbox: a numpy array of the FCOS output + :param image_path: a string of image path + :return: three list for best bound, class and score + """ + w = image_size[0] + h = image_size[1] + scale = min(net_input_width / w, net_input_height / h) + + pad_w = net_input_width - w * scale + pad_h = net_input_height - h * scale + pad_left = pad_w // 2 + pad_top = pad_h // 2 + + # cal predict box on the image src + pbox = bbox + + pbox[:, 0] = (bbox[:, 0] - pad_left) / scale + pbox[:, 1] = (bbox[:, 1] - pad_top) / scale + pbox[:, 2] = (bbox[:, 2] - pad_left) / scale + pbox[:, 3] = (bbox[:, 3] - pad_top) / scale + + # make pbboxes value in valid range + pbox[:, 0] = np.maximum(pbox[:, 0], 0) + pbox[:, 1] = np.maximum(pbox[:, 1], 0) + pbox[:, 2] = np.minimum(pbox[:, 2], w) + pbox[:, 3] = np.minimum(pbox[:, 3], h) + + return pbox + + +def main(args): + i = 0 + path = args.dataset + print(args.ifShowDetObj, type(args.ifShowDetObj)) + det_results_path = args.det_results_path + show_results_path = args.show_results_path + os.makedirs(det_results_path, exist_ok=True) + os.makedirs(show_results_path, exist_ok=True) + stream_manager_api = get_stream_manager(args.pipeline) + img_size_dict = dict() + with open(args.test_annotation)as f: + for line in f.readlines(): + temp = line.split(" ") + img_file_path = temp[1] + img_name = temp[1].split("/")[-1].split(".")[0] + img_width = int(temp[2]) + img_height = int(temp[3]) + img_size_dict[img_name] = (img_width, img_height, img_file_path) + + for img_path in get_dataset(path): + image_1 = io.imread(img_path) + if len(image_1.shape) == 3: + if image_1.shape[2] != 3: + continue + file_name1 = os.path.basename(img_path) + file_name = file_name1.split('.')[0] + print(file_name1) + delete_img_name = ['000000374551.jpg', '000000003661.jpg', + '000000309391.jpg', '000000070254.jpg'] + if file_name1 in delete_img_name: + continue + + bbox, labels = do_infer_image(stream_manager_api, img_path) + + res_buff = [] + res_buff.append(bbox) + labels = np.reshape(labels, [100, 1]) + res_buff.append(labels) + res_tensor = np.concatenate(res_buff, axis=1) + current_img_size = img_size_dict[file_name] + print("[TEST]---------------------------concat{} imgsize{}".format( + len(res_tensor), current_img_size)) + predbox = coco_postprocess( + res_tensor, current_img_size, args.net_input_width, args.net_input_height) + + if args.ifShowDetObj == True: + imgCur = cv2.imread(img_path) + + det_results_str = '' + for idx, class_ind in enumerate(predbox[:, 5]): + if float(predbox[idx][4]) < float(args.prob_thres): + continue + # skip negative class index + if class_ind < 0 or class_ind > 80: + continue + + class_name = CLASSES[int(class_ind)] + det_results_str += "{} {} {} {} {} {}\n".format(class_name, str(predbox[idx][4]), predbox[idx][0], + predbox[idx][1], predbox[idx][2], predbox[idx][3]) + if args.ifShowDetObj == True: + imgCur = cv2.rectangle(imgCur, (int(predbox[idx][0]), int(predbox[idx][1])), + (int(predbox[idx][2]), int(predbox[idx][3])), (0, 255, 0), 1) + imgCur = cv2.putText(imgCur, class_name+'|'+str(predbox[idx][4]), + (int(predbox[idx][0]), int(predbox[idx][1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) + # Image, text content, coordinates, font, size, color and font thickness. + + if args.ifShowDetObj == True: + print(os.path.join(show_results_path, file_name + '.jpg')) + cv2.imwrite(os.path.join(show_results_path, file_name + + '.jpg'), imgCur, [int(cv2.IMWRITE_JPEG_QUALITY), 70]) + + det_results_file = os.path.join(det_results_path, file_name + ".txt") + with open(det_results_file, "w") as detf: + detf.write(det_results_str) + print(det_results_str) + i = i+1 + print(i) + + stream_manager_api.DestroyAllStreams() + + +if __name__ == "__main__": + args = parse_arg() + main(args) diff --git a/PyTorch/contrib/cv/detection/FCOS/infer/util/coco_eval.py b/PyTorch/contrib/cv/detection/FCOS/infer/util/coco_eval.py index 9ffdd575d3a923036698837ada5260ad1bcde18e..d5376996c9d9349c5ee0bec7334d5379e812aaaf 100644 --- a/PyTorch/contrib/cv/detection/FCOS/infer/util/coco_eval.py +++ b/PyTorch/contrib/cv/detection/FCOS/infer/util/coco_eval.py @@ -1,96 +1,96 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License.import argparse - -import numpy as np -import argparse -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval - -CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', - 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] - -def coco_evaluation(annotation_json, result_json): - cocoGt = COCO(annotation_json) - cocoDt = cocoGt.loadRes(result_json) - iou_thrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) - iou_type = 'bbox' - - cocoEval = COCOeval(cocoGt, cocoDt, iou_type) - cocoEval.params.catIds = cocoGt.getCatIds(catNms=CLASSES) - cocoEval.params.imgIds = cocoGt.getImgIds() - delete_id=[3661,70254,309391,374551,190007] - for did in delete_id: - cocoEval.params.imgIds.remove(did) - - cocoEval.params.maxDets = [100, 300, 1000] # proposal number for evaluating recalls/mAPs. - cocoEval.params.iouThrs = iou_thrs - - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - - # mapping of cocoEval.stats - coco_metric_names = { - 'mAP': 0, - 'mAP_50': 1, - 'mAP_75': 2, - 'mAP_s': 3, - 'mAP_m': 4, - 'mAP_l': 5, - 'AR@100': 6, - 'AR@300': 7, - 'AR@1000': 8, - 'AR_s@1000': 9, - 'AR_m@1000': 10, - 'AR_l@1000': 11 - } - - metric_items = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l'] - eval_results = {} - - for metric_item in metric_items: - key = f'bbox_{metric_item}' - val = float( - f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' - ) - eval_results[key] = val - ap = cocoEval.stats[:6] - eval_results['bbox_mAP_copypaste'] = ( - f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' - f'{ap[4]:.3f} {ap[5]:.3f}') - - return eval_results - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--ground_truth", default="../data/input/COCO2017/annotations/instances_val2017.json") - parser.add_argument("--detection_result", default="coco_detection_result.json") - args = parser.parse_args() - result = coco_evaluation(args.ground_truth, args.detection_result) - print(result) - with open('./coco_detection_result.txt', 'w') as f: - for key, value in result.items(): +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.import argparse + +import numpy as np +import argparse +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', + 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] + +def coco_evaluation(annotation_json, result_json): + cocoGt = COCO(annotation_json) + cocoDt = cocoGt.loadRes(result_json) + iou_thrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) + iou_type = 'bbox' + + cocoEval = COCOeval(cocoGt, cocoDt, iou_type) + cocoEval.params.catIds = cocoGt.getCatIds(catNms=CLASSES) + cocoEval.params.imgIds = cocoGt.getImgIds() + delete_id=[3661,70254,309391,374551,190007] + for did in delete_id: + cocoEval.params.imgIds.remove(did) + + cocoEval.params.maxDets = [100, 300, 1000] # proposal number for evaluating recalls/mAPs. + cocoEval.params.iouThrs = iou_thrs + + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + # mapping of cocoEval.stats + coco_metric_names = { + 'mAP': 0, + 'mAP_50': 1, + 'mAP_75': 2, + 'mAP_s': 3, + 'mAP_m': 4, + 'mAP_l': 5, + 'AR@100': 6, + 'AR@300': 7, + 'AR@1000': 8, + 'AR_s@1000': 9, + 'AR_m@1000': 10, + 'AR_l@1000': 11 + } + + metric_items = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l'] + eval_results = {} + + for metric_item in metric_items: + key = f'bbox_{metric_item}' + val = float( + f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' + ) + eval_results[key] = val + ap = cocoEval.stats[:6] + eval_results['bbox_mAP_copypaste'] = ( + f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' + f'{ap[4]:.3f} {ap[5]:.3f}') + + return eval_results + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--ground_truth", default="../data/input/COCO2017/annotations/instances_val2017.json") + parser.add_argument("--detection_result", default="coco_detection_result.json") + args = parser.parse_args() + result = coco_evaluation(args.ground_truth, args.detection_result) + print(result) + with open('./coco_detection_result.txt', 'w') as f: + for key, value in result.items(): f.write(key + ': ' + str(value) + '\n') \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/FCOS/infer/util/txt_to_json.py b/PyTorch/contrib/cv/detection/FCOS/infer/util/txt_to_json.py index 9895c2156520ca6361030fd98c44c524864d98b2..5044a128f5c9a61f33082b99412e8b84f960c22d 100644 --- a/PyTorch/contrib/cv/detection/FCOS/infer/util/txt_to_json.py +++ b/PyTorch/contrib/cv/detection/FCOS/infer/util/txt_to_json.py @@ -1,112 +1,112 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import glob -import os -import sys -import argparse -import mmcv - -CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', - 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] - -cat_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, -24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, -48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, -72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] - -''' - 0,0 ------> x (width) - | - | (Left,Top) - | *_________ - | | | - | | - y |_________| - (height) * - (Right,Bottom) -''' - -def file_lines_to_list(path): - # open txt file lines to a list - with open(path) as f: - content = f.readlines() - # remove whitespace characters like `\n` at the end of each line - content = [x.strip() for x in content] - return content - - -def error(msg): - print(msg) - sys.exit(0) - - -def get_predict_list(file_path, gt_classes): - dr_files_list = glob.glob(file_path + '/*.txt') - dr_files_list.sort() - - bounding_boxes = [] - for txt_file in dr_files_list: - file_id = txt_file.split(".txt", 1)[0] - file_id = os.path.basename(os.path.normpath(file_id)) - lines = file_lines_to_list(txt_file) - for line in lines: - try: - sl = line.split() - if len(sl) > 6: - class_name = sl[0] + ' ' + sl[1] - scores, left, top, right, bottom = sl[2:] - else: - class_name, scores, left, top, right, bottom = sl - if float(scores) < 0.05: - continue - except ValueError: - error_msg = "Error: File " + txt_file + " wrong format.\n" - error_msg += " Expected: \n" - error_msg += " Received: " + line - error(error_msg) - - left = float(left) - right = float(right) - top = float(top) - bottom = float(bottom) - bbox = [left, top, right-left, bottom-top] - bounding_boxes.append({"image_id": int(file_id), "bbox": bbox, - "score": float(scores), "category_id": cat_ids[CLASSES.index(class_name)]}) - - return bounding_boxes - - - -if __name__ == '__main__': - parser = argparse.ArgumentParser('mAp calculate') - parser.add_argument('--npu_txt_path', default="../sdk/data/infer_result/", - help='the path of the predict result') - parser.add_argument("--json_output_file", default="coco_detection_result") - args = parser.parse_args() - - res_bbox = get_predict_list(args.npu_txt_path, CLASSES) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import os +import sys +import argparse +import mmcv + +CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', + 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] + +cat_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, +24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, +72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +''' + 0,0 ------> x (width) + | + | (Left,Top) + | *_________ + | | | + | | + y |_________| + (height) * + (Right,Bottom) +''' + +def file_lines_to_list(path): + # open txt file lines to a list + with open(path) as f: + content = f.readlines() + # remove whitespace characters like `\n` at the end of each line + content = [x.strip() for x in content] + return content + + +def error(msg): + print(msg) + sys.exit(0) + + +def get_predict_list(file_path, gt_classes): + dr_files_list = glob.glob(file_path + '/*.txt') + dr_files_list.sort() + + bounding_boxes = [] + for txt_file in dr_files_list: + file_id = txt_file.split(".txt", 1)[0] + file_id = os.path.basename(os.path.normpath(file_id)) + lines = file_lines_to_list(txt_file) + for line in lines: + try: + sl = line.split() + if len(sl) > 6: + class_name = sl[0] + ' ' + sl[1] + scores, left, top, right, bottom = sl[2:] + else: + class_name, scores, left, top, right, bottom = sl + if float(scores) < 0.05: + continue + except ValueError: + error_msg = "Error: File " + txt_file + " wrong format.\n" + error_msg += " Expected: \n" + error_msg += " Received: " + line + error(error_msg) + + left = float(left) + right = float(right) + top = float(top) + bottom = float(bottom) + bbox = [left, top, right-left, bottom-top] + bounding_boxes.append({"image_id": int(file_id), "bbox": bbox, + "score": float(scores), "category_id": cat_ids[CLASSES.index(class_name)]}) + + return bounding_boxes + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser('mAp calculate') + parser.add_argument('--npu_txt_path', default="../sdk/data/infer_result/", + help='the path of the predict result') + parser.add_argument("--json_output_file", default="coco_detection_result") + args = parser.parse_args() + + res_bbox = get_predict_list(args.npu_txt_path, CLASSES) mmcv.dump(res_bbox, args.json_output_file + '.json') \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/FCOS/mmcv_need/base_runner.py b/PyTorch/contrib/cv/detection/FCOS/mmcv_need/base_runner.py index 02c2d21c80a781114d195d3a51ded7b2c956456a..e7027b1a3797420cad21a0076268e07c55839a38 100644 --- a/PyTorch/contrib/cv/detection/FCOS/mmcv_need/base_runner.py +++ b/PyTorch/contrib/cv/detection/FCOS/mmcv_need/base_runner.py @@ -1,474 +1,474 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import logging -import os.path as osp -import warnings -from abc import ABCMeta, abstractmethod - -import torch -from torch.optim import Optimizer - -import mmcv -from ..parallel import is_module_wrapper -from .checkpoint import load_checkpoint -from .dist_utils import get_dist_info -from .hooks import HOOKS, Hook, IterTimerHook -from .log_buffer import LogBuffer -from .priority import get_priority -from .utils import get_time_str - - -class BaseRunner(metaclass=ABCMeta): - """The base class of Runner, a training helper for PyTorch. - - All subclasses should implement the following APIs: - - - ``run()`` - - ``train()`` - - ``val()`` - - ``save_checkpoint()`` - - Args: - model (:obj:`torch.nn.Module`): The model to be run. - batch_processor (callable): A callable method that process a data - batch. The interface of this method should be - `batch_processor(model, data, train_mode) -> dict` - optimizer (dict or :obj:`torch.optim.Optimizer`): It can be either an - optimizer (in most cases) or a dict of optimizers (in models that - requires more than one optimizer, e.g., GAN). - work_dir (str, optional): The working directory to save checkpoints - and logs. Defaults to None. - logger (:obj:`logging.Logger`): Logger used during training. - Defaults to None. (The default value is just for backward - compatibility) - meta (dict | None): A dict records some import information such as - environment info and seed, which will be logged in logger hook. - Defaults to None. - max_epochs (int, optional): Total training epochs. - max_iters (int, optional): Total training iterations. - """ - - def __init__(self, - model, - batch_processor=None, - optimizer=None, - work_dir=None, - logger=None, - meta=None, - max_iters=None, - max_epochs=None, - samples_per_gpu=2, # added by jyl - num_of_gpus=8): # added by jyl - if batch_processor is not None: - if not callable(batch_processor): - raise TypeError('batch_processor must be callable, ' - f'but got {type(batch_processor)}') - warnings.warn('batch_processor is deprecated, please implement ' - 'train_step() and val_step() in the model instead.') - # raise an error is `batch_processor` is not None and - # `model.train_step()` exists. - if is_module_wrapper(model): - _model = model.module - else: - _model = model - if hasattr(_model, 'train_step') or hasattr(_model, 'val_step'): - raise RuntimeError( - 'batch_processor and model.train_step()/model.val_step() ' - 'cannot be both available.') - else: - assert hasattr(model, 'train_step') - - # check the type of `optimizer` - if isinstance(optimizer, dict): - for name, optim in optimizer.items(): - if not isinstance(optim, Optimizer): - raise TypeError( - f'optimizer must be a dict of torch.optim.Optimizers, ' - f'but optimizer["{name}"] is a {type(optim)}') - elif not isinstance(optimizer, Optimizer) and optimizer is not None: - raise TypeError( - f'optimizer must be a torch.optim.Optimizer object ' - f'or dict or None, but got {type(optimizer)}') - - # check the type of `logger` - if not isinstance(logger, logging.Logger): - raise TypeError(f'logger must be a logging.Logger object, ' - f'but got {type(logger)}') - - # check the type of `meta` - if meta is not None and not isinstance(meta, dict): - raise TypeError( - f'meta must be a dict or None, but got {type(meta)}') - - self.model = model - self.batch_processor = batch_processor - self.optimizer = optimizer - self.logger = logger - self.meta = meta - self.samples_per_gpu = samples_per_gpu # added by jyl - self.num_of_gpus = num_of_gpus # added by jyl - - # create work_dir - if mmcv.is_str(work_dir): - self.work_dir = osp.abspath(work_dir) - mmcv.mkdir_or_exist(self.work_dir) - elif work_dir is None: - self.work_dir = None - else: - raise TypeError('"work_dir" must be a str or None') - - # get model name from the model class - if hasattr(self.model, 'module'): - self._model_name = self.model.module.__class__.__name__ - else: - self._model_name = self.model.__class__.__name__ - - self._rank, self._world_size = get_dist_info() - self.timestamp = get_time_str() - self.mode = None - self._hooks = [] - self._epoch = 0 - self._iter = 0 - self._inner_iter = 0 - - if max_epochs is not None and max_iters is not None: - raise ValueError( - 'Only one of `max_epochs` or `max_iters` can be set.') - - self._max_epochs = max_epochs - self._max_iters = max_iters - # TODO: Redesign LogBuffer, it is not flexible and elegant enough - self.log_buffer = LogBuffer() - - self.iter_timer_hook = IterTimerHook() # added by jyl - - @property - def model_name(self): - """str: Name of the model, usually the module class name.""" - return self._model_name - - @property - def rank(self): - """int: Rank of current process. (distributed training)""" - return self._rank - - @property - def world_size(self): - """int: Number of processes participating in the job. - (distributed training)""" - return self._world_size - - @property - def hooks(self): - """list[:obj:`Hook`]: A list of registered hooks.""" - return self._hooks - - @property - def epoch(self): - """int: Current epoch.""" - return self._epoch - - @property - def iter(self): - """int: Current iteration.""" - return self._iter - - @property - def inner_iter(self): - """int: Iteration in an epoch.""" - return self._inner_iter - - @property - def max_epochs(self): - """int: Maximum training epochs.""" - return self._max_epochs - - @property - def max_iters(self): - """int: Maximum training iterations.""" - return self._max_iters - - @abstractmethod - def train(self): - pass - - @abstractmethod - def val(self): - pass - - @abstractmethod - def run(self, data_loaders, workflow, **kwargs): - pass - - @abstractmethod - def save_checkpoint(self, - out_dir, - filename_tmpl, - save_optimizer=True, - meta=None, - create_symlink=True): - pass - - def current_lr(self): - """Get current learning rates. - - Returns: - list[float] | dict[str, list[float]]: Current learning rates of all - param groups. If the runner has a dict of optimizers, this - method will return a dict. - """ - if isinstance(self.optimizer, torch.optim.Optimizer): - lr = [group['lr'] for group in self.optimizer.param_groups] - elif isinstance(self.optimizer, dict): - lr = dict() - for name, optim in self.optimizer.items(): - lr[name] = [group['lr'] for group in optim.param_groups] - else: - raise RuntimeError( - 'lr is not applicable because optimizer does not exist.') - return lr - - def current_momentum(self): - """Get current momentums. - - Returns: - list[float] | dict[str, list[float]]: Current momentums of all - param groups. If the runner has a dict of optimizers, this - method will return a dict. - """ - - def _get_momentum(optimizer): - momentums = [] - for group in optimizer.param_groups: - if 'momentum' in group.keys(): - momentums.append(group['momentum']) - elif 'betas' in group.keys(): - momentums.append(group['betas'][0]) - else: - momentums.append(0) - return momentums - - if self.optimizer is None: - raise RuntimeError( - 'momentum is not applicable because optimizer does not exist.') - elif isinstance(self.optimizer, torch.optim.Optimizer): - momentums = _get_momentum(self.optimizer) - elif isinstance(self.optimizer, dict): - momentums = dict() - for name, optim in self.optimizer.items(): - momentums[name] = _get_momentum(optim) - return momentums - - def register_hook(self, hook, priority='NORMAL'): - """Register a hook into the hook list. - - The hook will be inserted into a priority queue, with the specified - priority (See :class:`Priority` for details of priorities). - For hooks with the same priority, they will be triggered in the same - order as they are registered. - - Args: - hook (:obj:`Hook`): The hook to be registered. - priority (int or str or :obj:`Priority`): Hook priority. - Lower value means higher priority. - """ - assert isinstance(hook, Hook) - if hasattr(hook, 'priority'): - raise ValueError('"priority" is a reserved attribute for hooks') - priority = get_priority(priority) - hook.priority = priority - # insert the hook to a sorted list - inserted = False - for i in range(len(self._hooks) - 1, -1, -1): - if priority >= self._hooks[i].priority: - self._hooks.insert(i + 1, hook) - inserted = True - break - if not inserted: - self._hooks.insert(0, hook) - - def register_hook_from_cfg(self, hook_cfg): - """Register a hook from its cfg. - - Args: - hook_cfg (dict): Hook config. It should have at least keys 'type' - and 'priority' indicating its type and priority. - - Notes: - The specific hook class to register should not use 'type' and - 'priority' arguments during initialization. - """ - hook_cfg = hook_cfg.copy() - priority = hook_cfg.pop('priority', 'NORMAL') - hook = mmcv.build_from_cfg(hook_cfg, HOOKS) - self.register_hook(hook, priority=priority) - - def call_hook(self, fn_name): - """Call all hooks. - - Args: - fn_name (str): The function name in each hook to be called, such as - "before_train_epoch". - """ - for hook in self._hooks: - getattr(hook, fn_name)(self) - - def load_checkpoint(self, filename, map_location='cpu', strict=False): - self.logger.info('load checkpoint from %s', filename) - return load_checkpoint(self.model, filename, map_location, strict, - self.logger) - - def resume(self, - checkpoint, - resume_optimizer=True, - map_location='default'): - if map_location == 'default': - if torch.cuda.is_available(): - device_id = torch.cuda.current_device() - checkpoint = self.load_checkpoint( - checkpoint, - map_location=lambda storage, loc: storage.cuda(device_id)) - else: - checkpoint = self.load_checkpoint(checkpoint) - else: - checkpoint = self.load_checkpoint( - checkpoint, map_location=map_location) - - self._epoch = checkpoint['meta']['epoch'] - self._iter = checkpoint['meta']['iter'] - if 'optimizer' in checkpoint and resume_optimizer: - if isinstance(self.optimizer, Optimizer): - self.optimizer.load_state_dict(checkpoint['optimizer']) - elif isinstance(self.optimizer, dict): - for k in self.optimizer.keys(): - self.optimizer[k].load_state_dict( - checkpoint['optimizer'][k]) - else: - raise TypeError( - 'Optimizer should be dict or torch.optim.Optimizer ' - f'but got {type(self.optimizer)}') - - self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter) - - def register_lr_hook(self, lr_config): - if isinstance(lr_config, dict): - assert 'policy' in lr_config - policy_type = lr_config.pop('policy') - # If the type of policy is all in lower case, e.g., 'cyclic', - # then its first letter will be capitalized, e.g., to be 'Cyclic'. - # This is for the convenient usage of Lr updater. - # Since this is not applicable for ` - # CosineAnnealingLrUpdater`, - # the string will not be changed if it contains capital letters. - if policy_type == policy_type.lower(): - policy_type = policy_type.title() - hook_type = policy_type + 'LrUpdaterHook' - lr_config['type'] = hook_type - hook = mmcv.build_from_cfg(lr_config, HOOKS) - else: - hook = lr_config - self.register_hook(hook) - - def register_momentum_hook(self, momentum_config): - if momentum_config is None: - return - if isinstance(momentum_config, dict): - assert 'policy' in momentum_config - policy_type = momentum_config.pop('policy') - # If the type of policy is all in lower case, e.g., 'cyclic', - # then its first letter will be capitalized, e.g., to be 'Cyclic'. - # This is for the convenient usage of momentum updater. - # Since this is not applicable for - # `CosineAnnealingMomentumUpdater`, - # the string will not be changed if it contains capital letters. - if policy_type == policy_type.lower(): - policy_type = policy_type.title() - hook_type = policy_type + 'MomentumUpdaterHook' - momentum_config['type'] = hook_type - hook = mmcv.build_from_cfg(momentum_config, HOOKS) - else: - hook = momentum_config - self.register_hook(hook) - - def register_optimizer_hook(self, optimizer_config): - if optimizer_config is None: - return - if isinstance(optimizer_config, dict): - optimizer_config.setdefault('type', 'OptimizerHook') - hook = mmcv.build_from_cfg(optimizer_config, HOOKS) - else: - hook = optimizer_config - self.register_hook(hook) - - def register_checkpoint_hook(self, checkpoint_config): - if checkpoint_config is None: - return - if isinstance(checkpoint_config, dict): - checkpoint_config.setdefault('type', 'CheckpointHook') - hook = mmcv.build_from_cfg(checkpoint_config, HOOKS) - else: - hook = checkpoint_config - self.register_hook(hook) - - def register_logger_hooks(self, log_config): - if log_config is None: - return - log_interval = log_config['interval'] - for info in log_config['hooks']: - logger_hook = mmcv.build_from_cfg( - info, HOOKS, default_args=dict(interval=log_interval)) - self.register_hook(logger_hook, priority='VERY_LOW') - - def register_training_hooks(self, - lr_config, - optimizer_config=None, - checkpoint_config=None, - log_config=None, - momentum_config=None): - """Register default hooks for training. - - Default hooks include: - - - LrUpdaterHook - - MomentumUpdaterHook - - OptimizerStepperHook - - CheckpointSaverHook - - IterTimerHook - - LoggerHook(s) - """ - self.register_lr_hook(lr_config) - self.register_momentum_hook(momentum_config) - self.register_optimizer_hook(optimizer_config) - self.register_checkpoint_hook(checkpoint_config) - # self.register_hook(IterTimerHook()) # changed by jyl - self.register_hook(self.iter_timer_hook) - self.register_logger_hooks(log_config) +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import logging +import os.path as osp +import warnings +from abc import ABCMeta, abstractmethod + +import torch +from torch.optim import Optimizer + +import mmcv +from ..parallel import is_module_wrapper +from .checkpoint import load_checkpoint +from .dist_utils import get_dist_info +from .hooks import HOOKS, Hook, IterTimerHook +from .log_buffer import LogBuffer +from .priority import get_priority +from .utils import get_time_str + + +class BaseRunner(metaclass=ABCMeta): + """The base class of Runner, a training helper for PyTorch. + + All subclasses should implement the following APIs: + + - ``run()`` + - ``train()`` + - ``val()`` + - ``save_checkpoint()`` + + Args: + model (:obj:`torch.nn.Module`): The model to be run. + batch_processor (callable): A callable method that process a data + batch. The interface of this method should be + `batch_processor(model, data, train_mode) -> dict` + optimizer (dict or :obj:`torch.optim.Optimizer`): It can be either an + optimizer (in most cases) or a dict of optimizers (in models that + requires more than one optimizer, e.g., GAN). + work_dir (str, optional): The working directory to save checkpoints + and logs. Defaults to None. + logger (:obj:`logging.Logger`): Logger used during training. + Defaults to None. (The default value is just for backward + compatibility) + meta (dict | None): A dict records some import information such as + environment info and seed, which will be logged in logger hook. + Defaults to None. + max_epochs (int, optional): Total training epochs. + max_iters (int, optional): Total training iterations. + """ + + def __init__(self, + model, + batch_processor=None, + optimizer=None, + work_dir=None, + logger=None, + meta=None, + max_iters=None, + max_epochs=None, + samples_per_gpu=2, # added by jyl + num_of_gpus=8): # added by jyl + if batch_processor is not None: + if not callable(batch_processor): + raise TypeError('batch_processor must be callable, ' + f'but got {type(batch_processor)}') + warnings.warn('batch_processor is deprecated, please implement ' + 'train_step() and val_step() in the model instead.') + # raise an error is `batch_processor` is not None and + # `model.train_step()` exists. + if is_module_wrapper(model): + _model = model.module + else: + _model = model + if hasattr(_model, 'train_step') or hasattr(_model, 'val_step'): + raise RuntimeError( + 'batch_processor and model.train_step()/model.val_step() ' + 'cannot be both available.') + else: + assert hasattr(model, 'train_step') + + # check the type of `optimizer` + if isinstance(optimizer, dict): + for name, optim in optimizer.items(): + if not isinstance(optim, Optimizer): + raise TypeError( + f'optimizer must be a dict of torch.optim.Optimizers, ' + f'but optimizer["{name}"] is a {type(optim)}') + elif not isinstance(optimizer, Optimizer) and optimizer is not None: + raise TypeError( + f'optimizer must be a torch.optim.Optimizer object ' + f'or dict or None, but got {type(optimizer)}') + + # check the type of `logger` + if not isinstance(logger, logging.Logger): + raise TypeError(f'logger must be a logging.Logger object, ' + f'but got {type(logger)}') + + # check the type of `meta` + if meta is not None and not isinstance(meta, dict): + raise TypeError( + f'meta must be a dict or None, but got {type(meta)}') + + self.model = model + self.batch_processor = batch_processor + self.optimizer = optimizer + self.logger = logger + self.meta = meta + self.samples_per_gpu = samples_per_gpu # added by jyl + self.num_of_gpus = num_of_gpus # added by jyl + + # create work_dir + if mmcv.is_str(work_dir): + self.work_dir = osp.abspath(work_dir) + mmcv.mkdir_or_exist(self.work_dir) + elif work_dir is None: + self.work_dir = None + else: + raise TypeError('"work_dir" must be a str or None') + + # get model name from the model class + if hasattr(self.model, 'module'): + self._model_name = self.model.module.__class__.__name__ + else: + self._model_name = self.model.__class__.__name__ + + self._rank, self._world_size = get_dist_info() + self.timestamp = get_time_str() + self.mode = None + self._hooks = [] + self._epoch = 0 + self._iter = 0 + self._inner_iter = 0 + + if max_epochs is not None and max_iters is not None: + raise ValueError( + 'Only one of `max_epochs` or `max_iters` can be set.') + + self._max_epochs = max_epochs + self._max_iters = max_iters + # TODO: Redesign LogBuffer, it is not flexible and elegant enough + self.log_buffer = LogBuffer() + + self.iter_timer_hook = IterTimerHook() # added by jyl + + @property + def model_name(self): + """str: Name of the model, usually the module class name.""" + return self._model_name + + @property + def rank(self): + """int: Rank of current process. (distributed training)""" + return self._rank + + @property + def world_size(self): + """int: Number of processes participating in the job. + (distributed training)""" + return self._world_size + + @property + def hooks(self): + """list[:obj:`Hook`]: A list of registered hooks.""" + return self._hooks + + @property + def epoch(self): + """int: Current epoch.""" + return self._epoch + + @property + def iter(self): + """int: Current iteration.""" + return self._iter + + @property + def inner_iter(self): + """int: Iteration in an epoch.""" + return self._inner_iter + + @property + def max_epochs(self): + """int: Maximum training epochs.""" + return self._max_epochs + + @property + def max_iters(self): + """int: Maximum training iterations.""" + return self._max_iters + + @abstractmethod + def train(self): + pass + + @abstractmethod + def val(self): + pass + + @abstractmethod + def run(self, data_loaders, workflow, **kwargs): + pass + + @abstractmethod + def save_checkpoint(self, + out_dir, + filename_tmpl, + save_optimizer=True, + meta=None, + create_symlink=True): + pass + + def current_lr(self): + """Get current learning rates. + + Returns: + list[float] | dict[str, list[float]]: Current learning rates of all + param groups. If the runner has a dict of optimizers, this + method will return a dict. + """ + if isinstance(self.optimizer, torch.optim.Optimizer): + lr = [group['lr'] for group in self.optimizer.param_groups] + elif isinstance(self.optimizer, dict): + lr = dict() + for name, optim in self.optimizer.items(): + lr[name] = [group['lr'] for group in optim.param_groups] + else: + raise RuntimeError( + 'lr is not applicable because optimizer does not exist.') + return lr + + def current_momentum(self): + """Get current momentums. + + Returns: + list[float] | dict[str, list[float]]: Current momentums of all + param groups. If the runner has a dict of optimizers, this + method will return a dict. + """ + + def _get_momentum(optimizer): + momentums = [] + for group in optimizer.param_groups: + if 'momentum' in group.keys(): + momentums.append(group['momentum']) + elif 'betas' in group.keys(): + momentums.append(group['betas'][0]) + else: + momentums.append(0) + return momentums + + if self.optimizer is None: + raise RuntimeError( + 'momentum is not applicable because optimizer does not exist.') + elif isinstance(self.optimizer, torch.optim.Optimizer): + momentums = _get_momentum(self.optimizer) + elif isinstance(self.optimizer, dict): + momentums = dict() + for name, optim in self.optimizer.items(): + momentums[name] = _get_momentum(optim) + return momentums + + def register_hook(self, hook, priority='NORMAL'): + """Register a hook into the hook list. + + The hook will be inserted into a priority queue, with the specified + priority (See :class:`Priority` for details of priorities). + For hooks with the same priority, they will be triggered in the same + order as they are registered. + + Args: + hook (:obj:`Hook`): The hook to be registered. + priority (int or str or :obj:`Priority`): Hook priority. + Lower value means higher priority. + """ + assert isinstance(hook, Hook) + if hasattr(hook, 'priority'): + raise ValueError('"priority" is a reserved attribute for hooks') + priority = get_priority(priority) + hook.priority = priority + # insert the hook to a sorted list + inserted = False + for i in range(len(self._hooks) - 1, -1, -1): + if priority >= self._hooks[i].priority: + self._hooks.insert(i + 1, hook) + inserted = True + break + if not inserted: + self._hooks.insert(0, hook) + + def register_hook_from_cfg(self, hook_cfg): + """Register a hook from its cfg. + + Args: + hook_cfg (dict): Hook config. It should have at least keys 'type' + and 'priority' indicating its type and priority. + + Notes: + The specific hook class to register should not use 'type' and + 'priority' arguments during initialization. + """ + hook_cfg = hook_cfg.copy() + priority = hook_cfg.pop('priority', 'NORMAL') + hook = mmcv.build_from_cfg(hook_cfg, HOOKS) + self.register_hook(hook, priority=priority) + + def call_hook(self, fn_name): + """Call all hooks. + + Args: + fn_name (str): The function name in each hook to be called, such as + "before_train_epoch". + """ + for hook in self._hooks: + getattr(hook, fn_name)(self) + + def load_checkpoint(self, filename, map_location='cpu', strict=False): + self.logger.info('load checkpoint from %s', filename) + return load_checkpoint(self.model, filename, map_location, strict, + self.logger) + + def resume(self, + checkpoint, + resume_optimizer=True, + map_location='default'): + if map_location == 'default': + if torch.cuda.is_available(): + device_id = torch.cuda.current_device() + checkpoint = self.load_checkpoint( + checkpoint, + map_location=lambda storage, loc: storage.cuda(device_id)) + else: + checkpoint = self.load_checkpoint(checkpoint) + else: + checkpoint = self.load_checkpoint( + checkpoint, map_location=map_location) + + self._epoch = checkpoint['meta']['epoch'] + self._iter = checkpoint['meta']['iter'] + if 'optimizer' in checkpoint and resume_optimizer: + if isinstance(self.optimizer, Optimizer): + self.optimizer.load_state_dict(checkpoint['optimizer']) + elif isinstance(self.optimizer, dict): + for k in self.optimizer.keys(): + self.optimizer[k].load_state_dict( + checkpoint['optimizer'][k]) + else: + raise TypeError( + 'Optimizer should be dict or torch.optim.Optimizer ' + f'but got {type(self.optimizer)}') + + self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter) + + def register_lr_hook(self, lr_config): + if isinstance(lr_config, dict): + assert 'policy' in lr_config + policy_type = lr_config.pop('policy') + # If the type of policy is all in lower case, e.g., 'cyclic', + # then its first letter will be capitalized, e.g., to be 'Cyclic'. + # This is for the convenient usage of Lr updater. + # Since this is not applicable for ` + # CosineAnnealingLrUpdater`, + # the string will not be changed if it contains capital letters. + if policy_type == policy_type.lower(): + policy_type = policy_type.title() + hook_type = policy_type + 'LrUpdaterHook' + lr_config['type'] = hook_type + hook = mmcv.build_from_cfg(lr_config, HOOKS) + else: + hook = lr_config + self.register_hook(hook) + + def register_momentum_hook(self, momentum_config): + if momentum_config is None: + return + if isinstance(momentum_config, dict): + assert 'policy' in momentum_config + policy_type = momentum_config.pop('policy') + # If the type of policy is all in lower case, e.g., 'cyclic', + # then its first letter will be capitalized, e.g., to be 'Cyclic'. + # This is for the convenient usage of momentum updater. + # Since this is not applicable for + # `CosineAnnealingMomentumUpdater`, + # the string will not be changed if it contains capital letters. + if policy_type == policy_type.lower(): + policy_type = policy_type.title() + hook_type = policy_type + 'MomentumUpdaterHook' + momentum_config['type'] = hook_type + hook = mmcv.build_from_cfg(momentum_config, HOOKS) + else: + hook = momentum_config + self.register_hook(hook) + + def register_optimizer_hook(self, optimizer_config): + if optimizer_config is None: + return + if isinstance(optimizer_config, dict): + optimizer_config.setdefault('type', 'OptimizerHook') + hook = mmcv.build_from_cfg(optimizer_config, HOOKS) + else: + hook = optimizer_config + self.register_hook(hook) + + def register_checkpoint_hook(self, checkpoint_config): + if checkpoint_config is None: + return + if isinstance(checkpoint_config, dict): + checkpoint_config.setdefault('type', 'CheckpointHook') + hook = mmcv.build_from_cfg(checkpoint_config, HOOKS) + else: + hook = checkpoint_config + self.register_hook(hook) + + def register_logger_hooks(self, log_config): + if log_config is None: + return + log_interval = log_config['interval'] + for info in log_config['hooks']: + logger_hook = mmcv.build_from_cfg( + info, HOOKS, default_args=dict(interval=log_interval)) + self.register_hook(logger_hook, priority='VERY_LOW') + + def register_training_hooks(self, + lr_config, + optimizer_config=None, + checkpoint_config=None, + log_config=None, + momentum_config=None): + """Register default hooks for training. + + Default hooks include: + + - LrUpdaterHook + - MomentumUpdaterHook + - OptimizerStepperHook + - CheckpointSaverHook + - IterTimerHook + - LoggerHook(s) + """ + self.register_lr_hook(lr_config) + self.register_momentum_hook(momentum_config) + self.register_optimizer_hook(optimizer_config) + self.register_checkpoint_hook(checkpoint_config) + # self.register_hook(IterTimerHook()) # changed by jyl + self.register_hook(self.iter_timer_hook) + self.register_logger_hooks(log_config) diff --git a/PyTorch/contrib/cv/detection/FCOS/mmcv_need/epoch_based_runner.py b/PyTorch/contrib/cv/detection/FCOS/mmcv_need/epoch_based_runner.py index 33e5ab36f2c05cb21f223dc34dd81647c8062120..0700575b0f36a6a2f9527422486a0e3d629620d3 100644 --- a/PyTorch/contrib/cv/detection/FCOS/mmcv_need/epoch_based_runner.py +++ b/PyTorch/contrib/cv/detection/FCOS/mmcv_need/epoch_based_runner.py @@ -1,223 +1,223 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - -import os.path as osp -import platform -import shutil -import time -import warnings -import sys # myy add - -import torch - -import mmcv -from .base_runner import BaseRunner -from .builder import RUNNERS -from .checkpoint import save_checkpoint -from .utils import get_host_info - - -@RUNNERS.register_module() -class EpochBasedRunner(BaseRunner): - """Epoch-based Runner. - - This runner train models epoch by epoch. - """ - - def run_iter(self, data_batch, train_mode, **kwargs): - if self.batch_processor is not None: - outputs = self.batch_processor( - self.model, data_batch, train_mode=train_mode, **kwargs) - elif train_mode: - outputs = self.model.train_step(data_batch, self.optimizer, - **kwargs) - else: - outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) - if not isinstance(outputs, dict): - raise TypeError('"batch_processor()" or "model.train_step()"' - 'and "model.val_step()" must return a dict') - if 'log_vars' in outputs: - self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) - self.outputs = outputs - - def train(self, data_loader, **kwargs): - self.model.train() - self.mode = 'train' - self.data_loader = data_loader - self._max_iters = self._max_epochs * len(self.data_loader) - self.call_hook('before_train_epoch') - time.sleep(2) # Prevent possible deadlock during epoch transition - for i, data_batch in enumerate(self.data_loader): - self._inner_iter = i - # if i==50: - # with torch.autograd.profiler.profile(use_npu=True) as prof: - # self.call_hook('before_train_iter') - # self.run_iter(data_batch, train_mode=True) - # self.call_hook('after_train_iter') - # prof.export_chrome_trace("output.prof") - # else: - self.call_hook('before_train_iter') - self.run_iter(data_batch, train_mode=True) - self.call_hook('after_train_iter') - self._iter += 1 - # added by jyl - self.logger.info('FPS: ' + str(self.samples_per_gpu * self.num_of_gpus / self.iter_timer_hook.time_all * (len(self.data_loader) - 5))) - - self.call_hook('after_train_epoch') - self._epoch += 1 - - def val(self, data_loader, **kwargs): - self.model.eval() - self.mode = 'val' - self.data_loader = data_loader - self.call_hook('before_val_epoch') - time.sleep(2) # Prevent possible deadlock during epoch transition - for i, data_batch in enumerate(self.data_loader): - self._inner_iter = i - self.call_hook('before_val_iter') - with torch.no_grad(): - self.run_iter(data_batch, train_mode=False) - self.call_hook('after_val_iter') - - self.call_hook('after_val_epoch') - - def run(self, data_loaders, workflow, max_epochs=None, **kwargs): - """Start running. - - Args: - data_loaders (list[:obj:`DataLoader`]): Dataloaders for training - and validation. - workflow (list[tuple]): A list of (phase, epochs) to specify the - running order and epochs. E.g, [('train', 2), ('val', 1)] means - running 2 epochs for training and 1 epoch for validation, - iteratively. - """ - assert isinstance(data_loaders, list) - assert mmcv.is_list_of(workflow, tuple) - assert len(data_loaders) == len(workflow) - if max_epochs is not None: - warnings.warn( - 'setting max_epochs in run is deprecated, ' - 'please set max_epochs in runner_config', DeprecationWarning) - self._max_epochs = max_epochs - - assert self._max_epochs is not None, ( - 'max_epochs must be specified during instantiation') - - for i, flow in enumerate(workflow): - mode, epochs = flow - if mode == 'train': - self._max_iters = self._max_epochs * len(data_loaders[i]) - break - - work_dir = self.work_dir if self.work_dir is not None else 'NONE' - self.logger.info('Start running, host: %s, work_dir: %s', - get_host_info(), work_dir) - self.logger.info('workflow: %s, max: %d epochs', workflow, - self._max_epochs) - self.call_hook('before_run') - - while self.epoch < self._max_epochs: - for i, flow in enumerate(workflow): - mode, epochs = flow - if isinstance(mode, str): # self.train() - if not hasattr(self, mode): - raise ValueError( - f'runner has no method named "{mode}" to run an ' - 'epoch') - epoch_runner = getattr(self, mode) - else: - raise TypeError( - 'mode in workflow must be a str, but got {}'.format( - type(mode))) - - for _ in range(epochs): - if mode == 'train' and self.epoch >= self._max_epochs: - break - epoch_runner(data_loaders[i], **kwargs) - - time.sleep(1) # wait for some hooks like loggers to finish - self.call_hook('after_run') - - def save_checkpoint(self, - out_dir, - filename_tmpl='epoch_{}.pth', - save_optimizer=True, - meta=None, - create_symlink=True): - """Save the checkpoint. - - Args: - out_dir (str): The directory that checkpoints are saved. - filename_tmpl (str, optional): The checkpoint filename template, - which contains a placeholder for the epoch number. - Defaults to 'epoch_{}.pth'. - save_optimizer (bool, optional): Whether to save the optimizer to - the checkpoint. Defaults to True. - meta (dict, optional): The meta information to be saved in the - checkpoint. Defaults to None. - create_symlink (bool, optional): Whether to create a symlink - "latest.pth" to point to the latest checkpoint. - Defaults to True. - """ - if meta is None: - meta = dict(epoch=self.epoch + 1, iter=self.iter) - elif isinstance(meta, dict): - meta.update(epoch=self.epoch + 1, iter=self.iter) - else: - raise TypeError( - f'meta should be a dict or None, but got {type(meta)}') - if self.meta is not None: - meta.update(self.meta) - - filename = filename_tmpl.format(self.epoch + 1) - filepath = osp.join(out_dir, filename) - optimizer = self.optimizer if save_optimizer else None - save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) - # in some environments, `os.symlink` is not supported, you may need to - # set `create_symlink` to False - if create_symlink: - dst_file = osp.join(out_dir, 'latest.pth') - if platform.system() != 'Windows': - mmcv.symlink(filename, dst_file) - else: - shutil.copy(filepath, dst_file) - - -@RUNNERS.register_module() -class Runner(EpochBasedRunner): - """Deprecated name of EpochBasedRunner.""" - - def __init__(self, *args, **kwargs): - warnings.warn( - 'Runner was deprecated, please use EpochBasedRunner instead') - super().__init__(*args, **kwargs) +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + +import os.path as osp +import platform +import shutil +import time +import warnings +import sys # myy add + +import torch + +import mmcv +from .base_runner import BaseRunner +from .builder import RUNNERS +from .checkpoint import save_checkpoint +from .utils import get_host_info + + +@RUNNERS.register_module() +class EpochBasedRunner(BaseRunner): + """Epoch-based Runner. + + This runner train models epoch by epoch. + """ + + def run_iter(self, data_batch, train_mode, **kwargs): + if self.batch_processor is not None: + outputs = self.batch_processor( + self.model, data_batch, train_mode=train_mode, **kwargs) + elif train_mode: + outputs = self.model.train_step(data_batch, self.optimizer, + **kwargs) + else: + outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) + if not isinstance(outputs, dict): + raise TypeError('"batch_processor()" or "model.train_step()"' + 'and "model.val_step()" must return a dict') + if 'log_vars' in outputs: + self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) + self.outputs = outputs + + def train(self, data_loader, **kwargs): + self.model.train() + self.mode = 'train' + self.data_loader = data_loader + self._max_iters = self._max_epochs * len(self.data_loader) + self.call_hook('before_train_epoch') + time.sleep(2) # Prevent possible deadlock during epoch transition + for i, data_batch in enumerate(self.data_loader): + self._inner_iter = i + # if i==50: + # with torch.autograd.profiler.profile(use_npu=True) as prof: + # self.call_hook('before_train_iter') + # self.run_iter(data_batch, train_mode=True) + # self.call_hook('after_train_iter') + # prof.export_chrome_trace("output.prof") + # else: + self.call_hook('before_train_iter') + self.run_iter(data_batch, train_mode=True) + self.call_hook('after_train_iter') + self._iter += 1 + # added by jyl + self.logger.info('FPS: ' + str(self.samples_per_gpu * self.num_of_gpus / self.iter_timer_hook.time_all * (len(self.data_loader) - 5))) + + self.call_hook('after_train_epoch') + self._epoch += 1 + + def val(self, data_loader, **kwargs): + self.model.eval() + self.mode = 'val' + self.data_loader = data_loader + self.call_hook('before_val_epoch') + time.sleep(2) # Prevent possible deadlock during epoch transition + for i, data_batch in enumerate(self.data_loader): + self._inner_iter = i + self.call_hook('before_val_iter') + with torch.no_grad(): + self.run_iter(data_batch, train_mode=False) + self.call_hook('after_val_iter') + + self.call_hook('after_val_epoch') + + def run(self, data_loaders, workflow, max_epochs=None, **kwargs): + """Start running. + + Args: + data_loaders (list[:obj:`DataLoader`]): Dataloaders for training + and validation. + workflow (list[tuple]): A list of (phase, epochs) to specify the + running order and epochs. E.g, [('train', 2), ('val', 1)] means + running 2 epochs for training and 1 epoch for validation, + iteratively. + """ + assert isinstance(data_loaders, list) + assert mmcv.is_list_of(workflow, tuple) + assert len(data_loaders) == len(workflow) + if max_epochs is not None: + warnings.warn( + 'setting max_epochs in run is deprecated, ' + 'please set max_epochs in runner_config', DeprecationWarning) + self._max_epochs = max_epochs + + assert self._max_epochs is not None, ( + 'max_epochs must be specified during instantiation') + + for i, flow in enumerate(workflow): + mode, epochs = flow + if mode == 'train': + self._max_iters = self._max_epochs * len(data_loaders[i]) + break + + work_dir = self.work_dir if self.work_dir is not None else 'NONE' + self.logger.info('Start running, host: %s, work_dir: %s', + get_host_info(), work_dir) + self.logger.info('workflow: %s, max: %d epochs', workflow, + self._max_epochs) + self.call_hook('before_run') + + while self.epoch < self._max_epochs: + for i, flow in enumerate(workflow): + mode, epochs = flow + if isinstance(mode, str): # self.train() + if not hasattr(self, mode): + raise ValueError( + f'runner has no method named "{mode}" to run an ' + 'epoch') + epoch_runner = getattr(self, mode) + else: + raise TypeError( + 'mode in workflow must be a str, but got {}'.format( + type(mode))) + + for _ in range(epochs): + if mode == 'train' and self.epoch >= self._max_epochs: + break + epoch_runner(data_loaders[i], **kwargs) + + time.sleep(1) # wait for some hooks like loggers to finish + self.call_hook('after_run') + + def save_checkpoint(self, + out_dir, + filename_tmpl='epoch_{}.pth', + save_optimizer=True, + meta=None, + create_symlink=True): + """Save the checkpoint. + + Args: + out_dir (str): The directory that checkpoints are saved. + filename_tmpl (str, optional): The checkpoint filename template, + which contains a placeholder for the epoch number. + Defaults to 'epoch_{}.pth'. + save_optimizer (bool, optional): Whether to save the optimizer to + the checkpoint. Defaults to True. + meta (dict, optional): The meta information to be saved in the + checkpoint. Defaults to None. + create_symlink (bool, optional): Whether to create a symlink + "latest.pth" to point to the latest checkpoint. + Defaults to True. + """ + if meta is None: + meta = dict(epoch=self.epoch + 1, iter=self.iter) + elif isinstance(meta, dict): + meta.update(epoch=self.epoch + 1, iter=self.iter) + else: + raise TypeError( + f'meta should be a dict or None, but got {type(meta)}') + if self.meta is not None: + meta.update(self.meta) + + filename = filename_tmpl.format(self.epoch + 1) + filepath = osp.join(out_dir, filename) + optimizer = self.optimizer if save_optimizer else None + save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) + # in some environments, `os.symlink` is not supported, you may need to + # set `create_symlink` to False + if create_symlink: + dst_file = osp.join(out_dir, 'latest.pth') + if platform.system() != 'Windows': + mmcv.symlink(filename, dst_file) + else: + shutil.copy(filepath, dst_file) + + +@RUNNERS.register_module() +class Runner(EpochBasedRunner): + """Deprecated name of EpochBasedRunner.""" + + def __init__(self, *args, **kwargs): + warnings.warn( + 'Runner was deprecated, please use EpochBasedRunner instead') + super().__init__(*args, **kwargs) diff --git a/PyTorch/contrib/cv/detection/FCOS/mmcv_need/iter_timer.py b/PyTorch/contrib/cv/detection/FCOS/mmcv_need/iter_timer.py index 0e89820c66ad1523cb15b18344711f719e867a3e..899293cb13c8272cb496ca2cee5f5416adf985a1 100644 --- a/PyTorch/contrib/cv/detection/FCOS/mmcv_need/iter_timer.py +++ b/PyTorch/contrib/cv/detection/FCOS/mmcv_need/iter_timer.py @@ -1,56 +1,56 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - -import time - -from .hook import HOOKS, Hook - - -@HOOKS.register_module() -class IterTimerHook(Hook): - - def before_epoch(self, runner): - self.t = time.time() - self.skip_step = 0 # added by jyl - self.time_all = 0 # added by jyl - - def before_iter(self, runner): - runner.log_buffer.update({'data_time': time.time() - self.t}) - - def after_iter(self, runner): - # runner.log_buffer.update({'time': time.time() - self.t}) # annoated by jyl - cur_time = time.time() # added by jyl - runner.log_buffer.update({'time': cur_time - self.t}) # added by jyl - if self.skip_step >= 5: # added by jyl - self.time_all += cur_time - self.t # added by jyl - self.skip_step += 1 # added by jyl - self.t = time.time() +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + +import time + +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class IterTimerHook(Hook): + + def before_epoch(self, runner): + self.t = time.time() + self.skip_step = 0 # added by jyl + self.time_all = 0 # added by jyl + + def before_iter(self, runner): + runner.log_buffer.update({'data_time': time.time() - self.t}) + + def after_iter(self, runner): + # runner.log_buffer.update({'time': time.time() - self.t}) # annoated by jyl + cur_time = time.time() # added by jyl + runner.log_buffer.update({'time': cur_time - self.t}) # added by jyl + if self.skip_step >= 5: # added by jyl + self.time_all += cur_time - self.t # added by jyl + self.skip_step += 1 # added by jyl + self.t = time.time() diff --git a/PyTorch/contrib/cv/detection/FCOS/modelarts/fcos_r50_caffe_fpn_4x4_1x_coco_new.py b/PyTorch/contrib/cv/detection/FCOS/modelarts/fcos_r50_caffe_fpn_4x4_1x_coco_new.py index b1acab0f4356720c8447b225a8c4013659763f95..917c30db9139c692470688c02ee34ea24e0b22a4 100644 --- a/PyTorch/contrib/cv/detection/FCOS/modelarts/fcos_r50_caffe_fpn_4x4_1x_coco_new.py +++ b/PyTorch/contrib/cv/detection/FCOS/modelarts/fcos_r50_caffe_fpn_4x4_1x_coco_new.py @@ -1,138 +1,138 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -_base_ = [ - '../_base_/datasets/coco_detection.py', - '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' -] -# model settings -model = dict( - type='FCOS', - pretrained='open-mmlab://detectron/resnet50_caffe',#'open-mmlab://detectron/resnet50_caffe''torchvision://resnet50' myy change resnet50_caffe to resnet50 - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=False), - norm_eval=True, - style='caffe'), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs=True, - extra_convs_on_inputs=False, # use P5 - num_outs=5, - relu_before_extra_convs=True), - bbox_head=dict( - type='FCOSHead', - num_classes=5, #change class number - in_channels=256, - stacked_convs=4, - feat_channels=256, - strides=[8, 16, 32, 64, 128], - norm_cfg=None, - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='IoULoss', loss_weight=1.0), - loss_centerness=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))) - -classes = ('person', 'car', 'cat', 'dog', 'train') #change class name - -# training and testing settings -train_cfg = dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False) -test_cfg = dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_threshold=0.5), - max_per_img=100) -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=1344), # change 32 to 1344 - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=1344), # change 32 toto 1344 - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] - -data = dict( - samples_per_gpu=2, # change 4 to 2 - workers_per_gpu=4, - train=dict(pipeline=train_pipeline,classes=classes), - val=dict(pipeline=test_pipeline,classes=classes), - test=dict(pipeline=test_pipeline,classes=classes)) -# optimizer -optimizer = dict( - lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.)) -optimizer_config = dict( - _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='constant', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[8, 11]) -total_epochs = 12 -# add for print log -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook') -]) - -amp = True # add for apex -dist_params = dict(backend='hccl') # add for npu - - - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + type='FCOS', + pretrained='open-mmlab://detectron/resnet50_caffe',#'open-mmlab://detectron/resnet50_caffe''torchvision://resnet50' myy change resnet50_caffe to resnet50 + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe'), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + extra_convs_on_inputs=False, # use P5 + num_outs=5, + relu_before_extra_convs=True), + bbox_head=dict( + type='FCOSHead', + num_classes=5, #change class number + in_channels=256, + stacked_convs=4, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + norm_cfg=None, + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='IoULoss', loss_weight=1.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))) + +classes = ('person', 'car', 'cat', 'dog', 'train') #change class name + +# training and testing settings +train_cfg = dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False) +test_cfg = dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100) +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1344), # change 32 to 1344 + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1344), # change 32 toto 1344 + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + samples_per_gpu=2, # change 4 to 2 + workers_per_gpu=4, + train=dict(pipeline=train_pipeline,classes=classes), + val=dict(pipeline=test_pipeline,classes=classes), + test=dict(pipeline=test_pipeline,classes=classes)) +# optimizer +optimizer = dict( + lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.)) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='constant', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +total_epochs = 12 +# add for print log +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') +]) + +amp = True # add for apex +dist_params = dict(backend='hccl') # add for npu + + + + diff --git a/PyTorch/contrib/cv/detection/FCOS/scripts/train_1p.sh b/PyTorch/contrib/cv/detection/FCOS/scripts/train_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/FCOS/scripts/train_8p.sh b/PyTorch/contrib/cv/detection/FCOS/scripts/train_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/FCOS/tools/dist_train.sh b/PyTorch/contrib/cv/detection/FCOS/tools/dist_train.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/FOTS/LICENSE b/PyTorch/contrib/cv/detection/FOTS/LICENSE index 185404d5515c393add9ecfbdd7cd83596e8a4b26..5b4cf39445b7b24f2e5d38062c3b9cca89ad8a90 100644 --- a/PyTorch/contrib/cv/detection/FOTS/LICENSE +++ b/PyTorch/contrib/cv/detection/FOTS/LICENSE @@ -1,204 +1,204 @@ -Copyright 2018-2019 Open-MMLab. All rights reserved. -Copyright 2021 Huawei Technologies Co., Ltd - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018-2019 Open-MMLab. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Copyright 2018-2019 Open-MMLab. All rights reserved. +Copyright 2021 Huawei Technologies Co., Ltd + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2019 Open-MMLab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/PyTorch/contrib/cv/detection/FOTS/README.md b/PyTorch/contrib/cv/detection/FOTS/README.md index e1dd7e515c082bbb51181613c2d9a027e4e3f982..73e72d2fddf3caf9561ca0813f92791a1652aa03 100644 --- a/PyTorch/contrib/cv/detection/FOTS/README.md +++ b/PyTorch/contrib/cv/detection/FOTS/README.md @@ -1,60 +1,60 @@ -# FOTS: Fast Oriented Text Spotting with a Unified Network text detection branch reimplementation (PyTorch) -# − 参考实现: - ``` - url=https://github.com/Wovchena/text-detection-fots.pytorch - ``` - -## FOTS Detail - -A unified end-to-end trainable Fast Oriented TextSpotting (FOTS) network for simultaneous detection andrecognition, sharing computation and visual information among the two complementary tasks. - - -## Requirements - -- Install PyTorch ([pytorch.org](http://pytorch.org)) -- Install packages required -- Prepare dataset: 1. SynthText for pretrain - 2. ICDAR2015 for finetune - -## Training - -To train a model, run `train.py` with the desired model architecture -1. pretrain with SynthText for 9 epochs -2. finetune with ICDAR2015 for 583 epochs - -```bash -# training 1p pretrain accuracy -bash ./test/train_full_pretrain_1p.sh - -# training 1p finetune accuracy -bash ./test/train_full_finetune_1p.sh - -# training 1p performance -bash ./test/train_performance_1p.sh - -# training 8p pretrain accuracy -bash ./test/train_full_pretrain_8p.sh - -# training 8p finetune accuracy -bash ./test/train_full_finetune_8p.sh - -# training 8p performance -bash ./test/train_performance_8p.sh - -#test 8p accuracy -bash ./test/test_8p.sh - -#test 1p accuracy -bash ./test/test_1p.sh -``` - -Log path: - FOTS/*.log - - -## FOTS training result - -| Acc@1 | Recall | Hmean | FPS | Npu_nums | Epochs | AMP_Type | -| :------: | :------: | :------: | :------: | :------: | :------: | :------: | -| - | - | - | 16.101 | 1 | 20 | O2 | +# FOTS: Fast Oriented Text Spotting with a Unified Network text detection branch reimplementation (PyTorch) +# − 参考实现: + ``` + url=https://github.com/Wovchena/text-detection-fots.pytorch + ``` + +## FOTS Detail + +A unified end-to-end trainable Fast Oriented TextSpotting (FOTS) network for simultaneous detection andrecognition, sharing computation and visual information among the two complementary tasks. + + +## Requirements + +- Install PyTorch ([pytorch.org](http://pytorch.org)) +- Install packages required +- Prepare dataset: 1. SynthText for pretrain + 2. ICDAR2015 for finetune + +## Training + +To train a model, run `train.py` with the desired model architecture +1. pretrain with SynthText for 9 epochs +2. finetune with ICDAR2015 for 583 epochs + +```bash +# training 1p pretrain accuracy +bash ./test/train_full_pretrain_1p.sh + +# training 1p finetune accuracy +bash ./test/train_full_finetune_1p.sh + +# training 1p performance +bash ./test/train_performance_1p.sh + +# training 8p pretrain accuracy +bash ./test/train_full_pretrain_8p.sh + +# training 8p finetune accuracy +bash ./test/train_full_finetune_8p.sh + +# training 8p performance +bash ./test/train_performance_8p.sh + +#test 8p accuracy +bash ./test/test_8p.sh + +#test 1p accuracy +bash ./test/test_1p.sh +``` + +Log path: + FOTS/*.log + + +## FOTS training result + +| Acc@1 | Recall | Hmean | FPS | Npu_nums | Epochs | AMP_Type | +| :------: | :------: | :------: | :------: | :------: | :------: | :------: | +| - | - | - | 16.101 | 1 | 20 | O2 | | 85.046 | 78.864 | 81.838 | 77.614 | 8 | 583 | O2 | \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/FOTS/requirments.txt b/PyTorch/contrib/cv/detection/FOTS/requirments.txt index 1dadf1794e53f2e75a9a13d9aa3dd7fefa6f61df..b46f8737697217f54cfcd4db129722fd570427a4 100644 --- a/PyTorch/contrib/cv/detection/FOTS/requirments.txt +++ b/PyTorch/contrib/cv/detection/FOTS/requirments.txt @@ -1,5 +1,5 @@ -torch==1.5.0 -shapely==1.6.4.post2 -opencv-python==3.4.3.18 -tqdm==4.31.1 +torch==1.5.0 +shapely==1.6.4.post2 +opencv-python==3.4.3.18 +tqdm==4.31.1 Polygon3 \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/data/config/fsaf.pipeline b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/data/config/fsaf.pipeline old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/docker_start_infer.sh b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/docker_start_infer.sh index 07b61defd1489b9c21337b7d7ac6c911bf422e78..022ff222968145977df53c32a26d31fde5429d39 100644 --- a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/docker_start_infer.sh +++ b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/docker_start_infer.sh @@ -1,48 +1,48 @@ -#!/bin/bash - -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -docker_image=$1 -data_dir=$2 - -function show_help() { - echo "Usage: docker_start.sh docker_image data_dir" -} - -function param_check() { - if [ -z "${docker_image}" ]; then - echo "please input docker_image" - show_help - exit 1 - fi - - if [ -z "${data_dir}" ]; then - echo "please input data_dir" - show_help - exit 1 - fi -} - -param_check - -docker run -it \ - --device=/dev/davinci0 \ - --device=/dev/davinci_manager \ - --device=/dev/devmm_svm \ - --device=/dev/hisi_hdc \ - -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ - -v ${data_dir}:${data_dir} \ - ${docker_image} \ +#!/bin/bash + +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +docker_image=$1 +data_dir=$2 + +function show_help() { + echo "Usage: docker_start.sh docker_image data_dir" +} + +function param_check() { + if [ -z "${docker_image}" ]; then + echo "please input docker_image" + show_help + exit 1 + fi + + if [ -z "${data_dir}" ]; then + echo "please input data_dir" + show_help + exit 1 + fi +} + +param_check + +docker run -it \ + --device=/dev/davinci0 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm \ + --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v ${data_dir}:${data_dir} \ + ${docker_image} \ /bin/bash \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/CMakeLists.txt b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/CMakeLists.txt old mode 100755 new mode 100644 index d6916791922570cb48fd6195f749ba511cd0f1d9..f03048071a7d2e36730588b52a1c262a8ced8aca --- a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/CMakeLists.txt +++ b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/CMakeLists.txt @@ -1,57 +1,57 @@ -cmake_minimum_required(VERSION 3.14.0) - -project(fsaf) - -set(TARGET fsaf) - -SET(CMAKE_BUILD_TYPE "Debug") -SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb") -SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") - -add_definitions(-DENABLE_DVPP_INTERFACE) -add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) -add_definitions(-Dgoogle=mindxsdk_private) -add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall) -add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie) - -# Check environment variable -if(NOT DEFINED ENV{ASCEND_HOME}) - message(FATAL_ERROR "please define environment variable:ASCEND_HOME") -endif() -if(NOT DEFINED ENV{ASCEND_VERSION}) - message(WARNING "please define environment variable:ASCEND_VERSION") -endif() -if(NOT DEFINED ENV{ARCH_PATTERN}) - message(WARNING "please define environment variable:ARCH_PATTERN") -endif() - -set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include) -set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64) - -set(MXBASE_ROOT_DIR $ENV{MX_SDK_HOME}) -set(MXBASE_INC ${MXBASE_ROOT_DIR}/include) -set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/lib) -set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/lib/modelpostprocessors) -set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/include/MxBase/postprocess/include) - - -if(DEFINED ENV{MXSDK_OPENSOURCE_DIR}) - set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR}) -else() - set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource) -endif() -include_directories(${ACL_INC_DIR}) -include_directories(${OPENSOURCE_DIR}/include) -include_directories(${OPENSOURCE_DIR}/include/opencv4) - -include_directories(${MXBASE_INC}) -include_directories(${MXBASE_POST_PROCESS_DIR}) -link_directories(${ACL_LIB_DIR}) -link_directories(${OPENSOURCE_DIR}/lib) -link_directories(${MXBASE_LIB_DIR}) -link_directories(${MXBASE_POST_LIB_DIR}) - -add_executable(${TARGET} ./src/main.cpp ./src/fsaf.cpp) -target_link_libraries(${TARGET} glog cpprest mxbase opencv_world stdc++fs) - -install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/) +cmake_minimum_required(VERSION 3.14.0) + +project(fsaf) + +set(TARGET fsaf) + +SET(CMAKE_BUILD_TYPE "Debug") +SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb") +SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") + +add_definitions(-DENABLE_DVPP_INTERFACE) +add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) +add_definitions(-Dgoogle=mindxsdk_private) +add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall) +add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie) + +# Check environment variable +if(NOT DEFINED ENV{ASCEND_HOME}) + message(FATAL_ERROR "please define environment variable:ASCEND_HOME") +endif() +if(NOT DEFINED ENV{ASCEND_VERSION}) + message(WARNING "please define environment variable:ASCEND_VERSION") +endif() +if(NOT DEFINED ENV{ARCH_PATTERN}) + message(WARNING "please define environment variable:ARCH_PATTERN") +endif() + +set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include) +set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64) + +set(MXBASE_ROOT_DIR $ENV{MX_SDK_HOME}) +set(MXBASE_INC ${MXBASE_ROOT_DIR}/include) +set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/lib) +set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/lib/modelpostprocessors) +set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/include/MxBase/postprocess/include) + + +if(DEFINED ENV{MXSDK_OPENSOURCE_DIR}) + set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR}) +else() + set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource) +endif() +include_directories(${ACL_INC_DIR}) +include_directories(${OPENSOURCE_DIR}/include) +include_directories(${OPENSOURCE_DIR}/include/opencv4) + +include_directories(${MXBASE_INC}) +include_directories(${MXBASE_POST_PROCESS_DIR}) +link_directories(${ACL_LIB_DIR}) +link_directories(${OPENSOURCE_DIR}/lib) +link_directories(${MXBASE_LIB_DIR}) +link_directories(${MXBASE_POST_LIB_DIR}) + +add_executable(${TARGET} ./src/main.cpp ./src/fsaf.cpp) +target_link_libraries(${TARGET} glog cpprest mxbase opencv_world stdc++fs) + +install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/) diff --git a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/build.sh b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/build.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/src/fsaf.cpp b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/src/fsaf.cpp old mode 100755 new mode 100644 index 36b7c50fb852b488d86275a4e9af48438e048082..7ad6a695100d6d2126071e6a8464c0d6913216ba --- a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/src/fsaf.cpp +++ b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/src/fsaf.cpp @@ -1,427 +1,427 @@ -/* - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the BSD 3-Clause License (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://opensource.org/licenses/BSD-3-Clause - - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "fsaf.h" - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "MxBase/DeviceManager/DeviceManager.h" -#include "MxBase/DvppWrapper/DvppWrapper.h" -#include "MxBase/Log/Log.h" - -using namespace MxBase; - -// Complete all initialization work. When you are going to -// use FSAF class, you should invoke this method immediately. -// -// you need construct InitParam object for Init. -APP_ERROR FSAF::Init(const InitParam& initParam) { - // initialize device - APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); - if (ret != APP_ERR_OK) { - LogError << "Init devices failed, ret=" << ret << "."; - return ret; - } - // initialize context - ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); - if (ret != APP_ERR_OK) { - LogError << "Set context failed, ret=" << ret << "."; - return ret; - } - - // load model - model_ = std::make_shared(); - ret = model_->Init(initParam.modelPath, modelDesc_); - if (ret != APP_ERR_OK) { - LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; - return ret; - } - - return APP_ERR_OK; -} - -APP_ERROR FSAF::DeInit() { - model_->DeInit(); - MxBase::DeviceManager::GetInstance()->DestroyDevices(); - return APP_ERR_OK; -} - -// Get all files under a directory. -// Parameters: -// dirPath: the directory path -// Return: -// a vector of filename, including the suffix. -std::vector FSAF::GetFileList(const std::string &dirPath) { - struct dirent *ptr; - DIR *dir = opendir(dirPath.c_str()); - std::vector files; - while ((ptr = readdir(dir)) != NULL) { - if (ptr->d_name[0] == '.') { - continue; - } - files.push_back(ptr->d_name); - } - closedir(dir); - return files; -} - -// Read image from a image path. -// Parameters: -// imgPath: string of image path -// imageMat: opencv Mat object, for storging image as matrix -// height: int, storge the image height -// width: int, storge the image width -// Return: -// APP_ERROR object, if read image successfully, return APP_ERR_OK. -APP_ERROR FSAF::ReadImage(const std::string& imgPath, cv::Mat& imageMat, int& height, int& width) { - imageMat = cv::imread(imgPath, cv::IMREAD_COLOR); - - // BGR -> RGB - cv::cvtColor(imageMat, imageMat, cv::COLOR_BGR2RGB); - - width = imageMat.cols; - height = imageMat.rows; - - return APP_ERR_OK; -} - -// Resize image to fix size. -// We use RightDown padding style. -// Parameters: -// srcImageMat: source opencv Mat object, image matrix -// dstImageMat: storge destination opencv Mat object -// resizedImageInfo: contain infomation about the image, including before and after scaling -//Return: -// APP_ERROR object, if resize image successfully, return APP_ERR_OK. -APP_ERROR FSAF::ResizeImage(const cv::Mat& srcImageMat, cv::Mat& dstImageMat, - MxBase::ResizedImageInfo& resizedImageInfo) { - float resizeHeight = 800; - float resizeWidth = 1216; - float scale = std::min(resizeWidth / srcImageMat.cols, resizeHeight / srcImageMat.rows); - int new_width = srcImageMat.cols * scale; - int new_height = srcImageMat.rows * scale; - - // calculate the padding - int pad_w = resizeWidth - new_width; - int pad_h = resizeHeight - new_height; - - resizedImageInfo.heightOriginal = srcImageMat.rows; - resizedImageInfo.heightResize = resizeHeight; - resizedImageInfo.widthOriginal = srcImageMat.cols; - resizedImageInfo.widthResize = resizeWidth; - resizedImageInfo.resizeType = RESIZER_MS_KEEP_ASPECT_RATIO; - - // invoke opencv method to resize and pad - cv::resize(srcImageMat, dstImageMat, cv::Size(new_width, new_height), 0, 0, cv::INTER_CUBIC); - cv::copyMakeBorder(dstImageMat, dstImageMat, 0, pad_h, 0, pad_w, - cv::BorderTypes::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); - - return APP_ERR_OK; -} - -APP_ERROR FSAF::Normalize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat) -{ - constexpr size_t ALPHA_AND_BETA_SIZE = 3; - cv::Mat float32Mat; - srcImageMat.convertTo(float32Mat, CV_32FC3); - - std::vector tmp; - cv::split(float32Mat, tmp); - - const std::vector mean = {103.53, 116.28, 123.675}; - const std::vector std = {57.375, 57.120, 58.395}; - for (size_t i = 0; i < ALPHA_AND_BETA_SIZE; ++i) { - tmp[i].convertTo(tmp[i], CV_32FC3, 1 / std[i], - mean[i] / std[i]); - } - cv::merge(tmp, dstImageMat); - return APP_ERR_OK; -} - -// Convert Mat to Tensor. -// Parameters: -// imageMat: input image matrix -// tensorBase: storge image as tensor -// Return: -// APP_ERROR object, if convert image successfully, return APP_ERR_OK. -APP_ERROR FSAF::CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase) { - // calculate the data size: width * height * depth - const uint32_t dataSize = imageMat.cols * imageMat.rows * imageMat.channels(); - // allocate memory - MemoryData memoryDataDst(dataSize, MemoryData::MEMORY_DEVICE, deviceId_); - MemoryData memoryDataSrc(imageMat.data, dataSize, MemoryData::MEMORY_HOST_MALLOC); - - APP_ERROR ret = MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc); - if (ret != APP_ERR_OK) { - LogError << GetError(ret) << "Memory malloc failed."; - return ret; - } - // get tensor shape - // std::vector shape = {imageMat.rows * YUV444_RGB_WIDTH_NU, static_cast(imageMat.cols)}; - std::vector shape = { - static_cast(imageMat.rows), - static_cast(imageMat.cols), - static_cast(imageMat.channels())}; - - // tensorBase = TensorBase(memoryDataDst, false, shape, TENSOR_DTYPE_UINT8); - tensorBase = TensorBase(memoryDataDst, false, shape, TENSOR_DTYPE_FLOAT32); - return APP_ERR_OK; -} - -// Model inference. -// Parameters: -// inputs: input image tensor -// outputs: result tensor of inference result -// Return: -// APP_ERROR object, if convert image successfully, return APP_ERR_OK. -APP_ERROR FSAF::Inference(const std::vector& inputs, std::vector& outputs) { - auto dtypes = model_->GetOutputDataType(); - - // modelDesc_ get the output tensor size through Init() - for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) { - std::vector shape = {}; - for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { - shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]); - } - TensorBase tensor(shape, dtypes[i], MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); - APP_ERROR ret = TensorBase::TensorBaseMalloc(tensor); - if (ret != APP_ERR_OK) { - LogError << "TensorBaseMalloc failed, ret=" << ret << "."; - return ret; - } - outputs.push_back(tensor); - } - - DynamicInfo dynamicInfo = {}; - dynamicInfo.dynamicType = DynamicType::STATIC_BATCH; - - // infer the result according to the input tensor - APP_ERROR ret = model_->ModelInference(inputs, outputs, dynamicInfo); - if (ret != APP_ERR_OK) { - LogError << "ModelInference failed, ret=" << ret << "."; - return ret; - } - - return APP_ERR_OK; -} - -// Post process for inference result. -// Scale the bbox to the origin image size. -// Parameters: -// imgPath: input image path -// inputs: tensor of image after inference -// resultPath: the path of storaging infer_result -// height, width: image's height and width -// name: image name, not including suffix -// showPath: the path of storaging visualizition result -// Return: -// APP_ERROR object, if post process image successfully, return APP_ERR_OK. -APP_ERROR FSAF::PostProcess(const std::string& imgPath, std::vector& inputs, - const std::string &resultPath, int& height, int& width, const std::string& name, std::string &showPath) { - // object num - int tensor_size = 100; - - MxBase::TensorBase& tensor = inputs[1]; // 1*100 - - int ret = tensor.ToHost(); - if (ret != APP_ERR_OK) { - LogError << GetError(ret) << "Tensor_1 deploy to host failed."; - return ret; - } - std::vector shape = tensor.GetShape(); - - auto labels = reinterpret_cast(tensor.GetBuffer()); // 1*100 - - std::cout << "---------------------------labels---------------------------" << std::endl; - int label[tensor_size] = {0}; - for(int i = 0; i < tensor_size; i++){ - std::cout << labels[i] << " "; - label[i] = labels[i]; - } - - tensor = inputs[0]; // 1*100*5 - ret = tensor.ToHost(); - if (ret != APP_ERR_OK) { - LogError << GetError(ret) << "Tensor_0 deploy to host failed."; - return ret; - } - - auto bbox = reinterpret_cast(tensor.GetBuffer()); - std::cout << "\n---------------------------bboxes--------------------------" << std::endl; - for(int i = 0; i < tensor_size; i++){ - std::cout << bbox[i][0] << ", " << bbox[i][1] << ", " << bbox[i][2] << ", " - << bbox[i][3] << ", " << bbox[i][4] << std::endl; - } - - // get infer coordinates - float image_size_w = width; - float image_size_h = height; - float net_input_width = 1216; - float net_input_height = 800; - float scale = std::min(net_input_width / (float)width, net_input_height / (float)height); - - int new_width = image_size_w * scale; - - float n_scale = (float)new_width / image_size_w; - - // probability threshold and all classes for label - float prob_thres = 0.05; - std::vector classes = {"person", "bicycle", "car", "motorcycle", "airplane", "bus", - "train", "truck", "boat", "traffic light", "fire hydrant", - "stop sign", "parking meter", "bench", "bird", "cat", "dog", - "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", - "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", - "skis", "snowboard", "sports ball", "kite", "baseball bat", - "baseball glove", "skateboard", "surfboard", "tennis racket", - "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", - "banana", "apple", "sandwich", "orange", "broccoli", "carrot", - "hot dog", "pizza", "donut", "cake", "chair", "couch", - "potted plant", "bed", "dining table", "toilet", "tv", "laptop", - "mouse", "remote", "keyboard", "cell phone", "microwave", - "oven", "toaster", "sink", "refrigerator", "book", "clock", - "vase", "scissors", "teddy bear", "hair drier", "toothbrush"}; - - // storge bbox after post processing - float ppbox[tensor_size][5] = {0}; - - for (int j = 0; j < tensor_size; ++j) { - // scale bbox - ppbox[j][0] = (bbox[j][0]) / n_scale; - ppbox[j][1] = (bbox[j][1]) / n_scale; - ppbox[j][2] = (bbox[j][2]) / n_scale; - ppbox[j][3] = (bbox[j][3]) / n_scale; - ppbox[j][4] = bbox[j][4]; - // limit bbox in a valid range - ppbox[j][0] = std::max((float)0, ppbox[j][0]); - ppbox[j][1] = std::max((float)0, ppbox[j][1]); - ppbox[j][2] = std::min(image_size_w, ppbox[j][2]); - ppbox[j][3] = std::min(image_size_h, ppbox[j][3]); - } - - std::ofstream out(resultPath); - cv::Mat imgCur = cv::imread(imgPath); - - for (int j = 0; j < tensor_size; ++j) { - if(float(ppbox[j][4]) < float(prob_thres)) { - continue; - } - if(label[j] < 0 || label[j] > 80) { - continue; - } - - // standard the output result - std::string class_name = classes[int(label[j])]; - std::string det_results_str = ""; - std::ostringstream oss; - oss << ppbox[j][4]; - std::string confidence(oss.str()); - char strff1[21], strff2[21], strff3[21], strff4[21], strff0[21]; - memset(strff1, 0, sizeof(strff1)); - memset(strff2, 0, sizeof(strff2)); - memset(strff3, 0, sizeof(strff3)); - memset(strff4, 0, sizeof(strff4)); - memset(strff0, 0, sizeof(strff0)); - // print ppbox to char* - sprintf(strff0, "%.8f", ppbox[j][0]); - sprintf(strff1, "%.8f", ppbox[j][1]); - sprintf(strff2, "%.8f", ppbox[j][2]); - sprintf(strff3, "%.8f", ppbox[j][3]); - sprintf(strff4, "%.8f", ppbox[j][4]); - det_results_str = det_results_str + class_name + " " + strff4 + " " + strff0 + " " + strff1 + " " - + strff2 + " " + strff3 + "\n"; - - out << det_results_str; - std::cout << det_results_str; - // visualization on the origin image - cv::Point p3((ppbox[j][0]), (ppbox[j][1])); - cv::Point p4((ppbox[j][2]), (ppbox[j][3])); - cv::Scalar colorRectangle1(0, 255, 1); - int thicknessRectangle1 = 1; - cv::rectangle(imgCur, p3, p4, colorRectangle1, thicknessRectangle1); - cv::putText(imgCur, class_name + "|" + confidence, p3, cv::FONT_HERSHEY_SIMPLEX, - 0.5, colorRectangle1, 1, 1, false); - } - out.close(); - cv::imwrite(showPath + "/" + name + ".jpg", imgCur); - - return APP_ERR_OK; -} - -// Primary method for process all images. -APP_ERROR FSAF::Process(const std::string &dirPath, std::string &resultPath, std::string &showPath) { - std::vector dirFileList = GetFileList(dirPath); - std::vector names, paths; - // for debug counting - int i = 0; - // process every image - for(auto imgFile : dirFileList) { - std::string imgPath = dirPath + "/" + imgFile; - std::string name = imgFile.substr(0, imgFile.find(".")); - std::string subresultPath = resultPath + "/" + name + ".txt"; - - cv::Mat imageMat; - int height = 0; - int width = 0; - // get image infomation - APP_ERROR ret = ReadImage(imgPath, imageMat, height, width); - if (ret != APP_ERR_OK) { - LogError << "ReadImage failed, ret=" << ret << "."; - return ret; - } - // resize image and pad it - ResizedImageInfo resizedImageInfo; - ResizeImage(imageMat, imageMat, resizedImageInfo); - - // convert image matrix to tensor - TensorBase tensorBase; - ret = CVMatToTensorBase(imageMat, tensorBase); - if (ret != APP_ERR_OK) { - LogError << "CVMatToTensorBase failed, ret=" << ret << "."; - return ret; - } - - std::vector inputs = {}; - std::vector outputs = {}; - inputs.push_back(tensorBase); - // infer and get output tensor - ret = Inference(inputs, outputs); - if (ret != APP_ERR_OK) { - LogError << "Inference failed, ret=" << ret << "."; - return ret; - } - // post process the bbox to the origin image, - // and implement visualizition. - ret = PostProcess(imgPath, outputs, subresultPath, height, width, name, showPath); - if (ret != APP_ERR_OK) { - LogError << "PostProcess failed, ret=" << ret << "."; - return ret; - } - // add count - i++; - std::cout << i << std::endl; - } - - return APP_ERR_OK; - -} +/* + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the BSD 3-Clause License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSD-3-Clause + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fsaf.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "MxBase/DeviceManager/DeviceManager.h" +#include "MxBase/DvppWrapper/DvppWrapper.h" +#include "MxBase/Log/Log.h" + +using namespace MxBase; + +// Complete all initialization work. When you are going to +// use FSAF class, you should invoke this method immediately. +// +// you need construct InitParam object for Init. +APP_ERROR FSAF::Init(const InitParam& initParam) { + // initialize device + APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); + if (ret != APP_ERR_OK) { + LogError << "Init devices failed, ret=" << ret << "."; + return ret; + } + // initialize context + ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); + if (ret != APP_ERR_OK) { + LogError << "Set context failed, ret=" << ret << "."; + return ret; + } + + // load model + model_ = std::make_shared(); + ret = model_->Init(initParam.modelPath, modelDesc_); + if (ret != APP_ERR_OK) { + LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; + return ret; + } + + return APP_ERR_OK; +} + +APP_ERROR FSAF::DeInit() { + model_->DeInit(); + MxBase::DeviceManager::GetInstance()->DestroyDevices(); + return APP_ERR_OK; +} + +// Get all files under a directory. +// Parameters: +// dirPath: the directory path +// Return: +// a vector of filename, including the suffix. +std::vector FSAF::GetFileList(const std::string &dirPath) { + struct dirent *ptr; + DIR *dir = opendir(dirPath.c_str()); + std::vector files; + while ((ptr = readdir(dir)) != NULL) { + if (ptr->d_name[0] == '.') { + continue; + } + files.push_back(ptr->d_name); + } + closedir(dir); + return files; +} + +// Read image from a image path. +// Parameters: +// imgPath: string of image path +// imageMat: opencv Mat object, for storging image as matrix +// height: int, storge the image height +// width: int, storge the image width +// Return: +// APP_ERROR object, if read image successfully, return APP_ERR_OK. +APP_ERROR FSAF::ReadImage(const std::string& imgPath, cv::Mat& imageMat, int& height, int& width) { + imageMat = cv::imread(imgPath, cv::IMREAD_COLOR); + + // BGR -> RGB + cv::cvtColor(imageMat, imageMat, cv::COLOR_BGR2RGB); + + width = imageMat.cols; + height = imageMat.rows; + + return APP_ERR_OK; +} + +// Resize image to fix size. +// We use RightDown padding style. +// Parameters: +// srcImageMat: source opencv Mat object, image matrix +// dstImageMat: storge destination opencv Mat object +// resizedImageInfo: contain infomation about the image, including before and after scaling +//Return: +// APP_ERROR object, if resize image successfully, return APP_ERR_OK. +APP_ERROR FSAF::ResizeImage(const cv::Mat& srcImageMat, cv::Mat& dstImageMat, + MxBase::ResizedImageInfo& resizedImageInfo) { + float resizeHeight = 800; + float resizeWidth = 1216; + float scale = std::min(resizeWidth / srcImageMat.cols, resizeHeight / srcImageMat.rows); + int new_width = srcImageMat.cols * scale; + int new_height = srcImageMat.rows * scale; + + // calculate the padding + int pad_w = resizeWidth - new_width; + int pad_h = resizeHeight - new_height; + + resizedImageInfo.heightOriginal = srcImageMat.rows; + resizedImageInfo.heightResize = resizeHeight; + resizedImageInfo.widthOriginal = srcImageMat.cols; + resizedImageInfo.widthResize = resizeWidth; + resizedImageInfo.resizeType = RESIZER_MS_KEEP_ASPECT_RATIO; + + // invoke opencv method to resize and pad + cv::resize(srcImageMat, dstImageMat, cv::Size(new_width, new_height), 0, 0, cv::INTER_CUBIC); + cv::copyMakeBorder(dstImageMat, dstImageMat, 0, pad_h, 0, pad_w, + cv::BorderTypes::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); + + return APP_ERR_OK; +} + +APP_ERROR FSAF::Normalize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat) +{ + constexpr size_t ALPHA_AND_BETA_SIZE = 3; + cv::Mat float32Mat; + srcImageMat.convertTo(float32Mat, CV_32FC3); + + std::vector tmp; + cv::split(float32Mat, tmp); + + const std::vector mean = {103.53, 116.28, 123.675}; + const std::vector std = {57.375, 57.120, 58.395}; + for (size_t i = 0; i < ALPHA_AND_BETA_SIZE; ++i) { + tmp[i].convertTo(tmp[i], CV_32FC3, 1 / std[i], - mean[i] / std[i]); + } + cv::merge(tmp, dstImageMat); + return APP_ERR_OK; +} + +// Convert Mat to Tensor. +// Parameters: +// imageMat: input image matrix +// tensorBase: storge image as tensor +// Return: +// APP_ERROR object, if convert image successfully, return APP_ERR_OK. +APP_ERROR FSAF::CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase) { + // calculate the data size: width * height * depth + const uint32_t dataSize = imageMat.cols * imageMat.rows * imageMat.channels(); + // allocate memory + MemoryData memoryDataDst(dataSize, MemoryData::MEMORY_DEVICE, deviceId_); + MemoryData memoryDataSrc(imageMat.data, dataSize, MemoryData::MEMORY_HOST_MALLOC); + + APP_ERROR ret = MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Memory malloc failed."; + return ret; + } + // get tensor shape + // std::vector shape = {imageMat.rows * YUV444_RGB_WIDTH_NU, static_cast(imageMat.cols)}; + std::vector shape = { + static_cast(imageMat.rows), + static_cast(imageMat.cols), + static_cast(imageMat.channels())}; + + // tensorBase = TensorBase(memoryDataDst, false, shape, TENSOR_DTYPE_UINT8); + tensorBase = TensorBase(memoryDataDst, false, shape, TENSOR_DTYPE_FLOAT32); + return APP_ERR_OK; +} + +// Model inference. +// Parameters: +// inputs: input image tensor +// outputs: result tensor of inference result +// Return: +// APP_ERROR object, if convert image successfully, return APP_ERR_OK. +APP_ERROR FSAF::Inference(const std::vector& inputs, std::vector& outputs) { + auto dtypes = model_->GetOutputDataType(); + + // modelDesc_ get the output tensor size through Init() + for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) { + std::vector shape = {}; + for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { + shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]); + } + TensorBase tensor(shape, dtypes[i], MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); + APP_ERROR ret = TensorBase::TensorBaseMalloc(tensor); + if (ret != APP_ERR_OK) { + LogError << "TensorBaseMalloc failed, ret=" << ret << "."; + return ret; + } + outputs.push_back(tensor); + } + + DynamicInfo dynamicInfo = {}; + dynamicInfo.dynamicType = DynamicType::STATIC_BATCH; + + // infer the result according to the input tensor + APP_ERROR ret = model_->ModelInference(inputs, outputs, dynamicInfo); + if (ret != APP_ERR_OK) { + LogError << "ModelInference failed, ret=" << ret << "."; + return ret; + } + + return APP_ERR_OK; +} + +// Post process for inference result. +// Scale the bbox to the origin image size. +// Parameters: +// imgPath: input image path +// inputs: tensor of image after inference +// resultPath: the path of storaging infer_result +// height, width: image's height and width +// name: image name, not including suffix +// showPath: the path of storaging visualizition result +// Return: +// APP_ERROR object, if post process image successfully, return APP_ERR_OK. +APP_ERROR FSAF::PostProcess(const std::string& imgPath, std::vector& inputs, + const std::string &resultPath, int& height, int& width, const std::string& name, std::string &showPath) { + // object num + int tensor_size = 100; + + MxBase::TensorBase& tensor = inputs[1]; // 1*100 + + int ret = tensor.ToHost(); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Tensor_1 deploy to host failed."; + return ret; + } + std::vector shape = tensor.GetShape(); + + auto labels = reinterpret_cast(tensor.GetBuffer()); // 1*100 + + std::cout << "---------------------------labels---------------------------" << std::endl; + int label[tensor_size] = {0}; + for(int i = 0; i < tensor_size; i++){ + std::cout << labels[i] << " "; + label[i] = labels[i]; + } + + tensor = inputs[0]; // 1*100*5 + ret = tensor.ToHost(); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Tensor_0 deploy to host failed."; + return ret; + } + + auto bbox = reinterpret_cast(tensor.GetBuffer()); + std::cout << "\n---------------------------bboxes--------------------------" << std::endl; + for(int i = 0; i < tensor_size; i++){ + std::cout << bbox[i][0] << ", " << bbox[i][1] << ", " << bbox[i][2] << ", " + << bbox[i][3] << ", " << bbox[i][4] << std::endl; + } + + // get infer coordinates + float image_size_w = width; + float image_size_h = height; + float net_input_width = 1216; + float net_input_height = 800; + float scale = std::min(net_input_width / (float)width, net_input_height / (float)height); + + int new_width = image_size_w * scale; + + float n_scale = (float)new_width / image_size_w; + + // probability threshold and all classes for label + float prob_thres = 0.05; + std::vector classes = {"person", "bicycle", "car", "motorcycle", "airplane", "bus", + "train", "truck", "boat", "traffic light", "fire hydrant", + "stop sign", "parking meter", "bench", "bird", "cat", "dog", + "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", + "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", + "baseball glove", "skateboard", "surfboard", "tennis racket", + "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", + "banana", "apple", "sandwich", "orange", "broccoli", "carrot", + "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", + "mouse", "remote", "keyboard", "cell phone", "microwave", + "oven", "toaster", "sink", "refrigerator", "book", "clock", + "vase", "scissors", "teddy bear", "hair drier", "toothbrush"}; + + // storge bbox after post processing + float ppbox[tensor_size][5] = {0}; + + for (int j = 0; j < tensor_size; ++j) { + // scale bbox + ppbox[j][0] = (bbox[j][0]) / n_scale; + ppbox[j][1] = (bbox[j][1]) / n_scale; + ppbox[j][2] = (bbox[j][2]) / n_scale; + ppbox[j][3] = (bbox[j][3]) / n_scale; + ppbox[j][4] = bbox[j][4]; + // limit bbox in a valid range + ppbox[j][0] = std::max((float)0, ppbox[j][0]); + ppbox[j][1] = std::max((float)0, ppbox[j][1]); + ppbox[j][2] = std::min(image_size_w, ppbox[j][2]); + ppbox[j][3] = std::min(image_size_h, ppbox[j][3]); + } + + std::ofstream out(resultPath); + cv::Mat imgCur = cv::imread(imgPath); + + for (int j = 0; j < tensor_size; ++j) { + if(float(ppbox[j][4]) < float(prob_thres)) { + continue; + } + if(label[j] < 0 || label[j] > 80) { + continue; + } + + // standard the output result + std::string class_name = classes[int(label[j])]; + std::string det_results_str = ""; + std::ostringstream oss; + oss << ppbox[j][4]; + std::string confidence(oss.str()); + char strff1[21], strff2[21], strff3[21], strff4[21], strff0[21]; + memset(strff1, 0, sizeof(strff1)); + memset(strff2, 0, sizeof(strff2)); + memset(strff3, 0, sizeof(strff3)); + memset(strff4, 0, sizeof(strff4)); + memset(strff0, 0, sizeof(strff0)); + // print ppbox to char* + sprintf(strff0, "%.8f", ppbox[j][0]); + sprintf(strff1, "%.8f", ppbox[j][1]); + sprintf(strff2, "%.8f", ppbox[j][2]); + sprintf(strff3, "%.8f", ppbox[j][3]); + sprintf(strff4, "%.8f", ppbox[j][4]); + det_results_str = det_results_str + class_name + " " + strff4 + " " + strff0 + " " + strff1 + " " + + strff2 + " " + strff3 + "\n"; + + out << det_results_str; + std::cout << det_results_str; + // visualization on the origin image + cv::Point p3((ppbox[j][0]), (ppbox[j][1])); + cv::Point p4((ppbox[j][2]), (ppbox[j][3])); + cv::Scalar colorRectangle1(0, 255, 1); + int thicknessRectangle1 = 1; + cv::rectangle(imgCur, p3, p4, colorRectangle1, thicknessRectangle1); + cv::putText(imgCur, class_name + "|" + confidence, p3, cv::FONT_HERSHEY_SIMPLEX, + 0.5, colorRectangle1, 1, 1, false); + } + out.close(); + cv::imwrite(showPath + "/" + name + ".jpg", imgCur); + + return APP_ERR_OK; +} + +// Primary method for process all images. +APP_ERROR FSAF::Process(const std::string &dirPath, std::string &resultPath, std::string &showPath) { + std::vector dirFileList = GetFileList(dirPath); + std::vector names, paths; + // for debug counting + int i = 0; + // process every image + for(auto imgFile : dirFileList) { + std::string imgPath = dirPath + "/" + imgFile; + std::string name = imgFile.substr(0, imgFile.find(".")); + std::string subresultPath = resultPath + "/" + name + ".txt"; + + cv::Mat imageMat; + int height = 0; + int width = 0; + // get image infomation + APP_ERROR ret = ReadImage(imgPath, imageMat, height, width); + if (ret != APP_ERR_OK) { + LogError << "ReadImage failed, ret=" << ret << "."; + return ret; + } + // resize image and pad it + ResizedImageInfo resizedImageInfo; + ResizeImage(imageMat, imageMat, resizedImageInfo); + + // convert image matrix to tensor + TensorBase tensorBase; + ret = CVMatToTensorBase(imageMat, tensorBase); + if (ret != APP_ERR_OK) { + LogError << "CVMatToTensorBase failed, ret=" << ret << "."; + return ret; + } + + std::vector inputs = {}; + std::vector outputs = {}; + inputs.push_back(tensorBase); + // infer and get output tensor + ret = Inference(inputs, outputs); + if (ret != APP_ERR_OK) { + LogError << "Inference failed, ret=" << ret << "."; + return ret; + } + // post process the bbox to the origin image, + // and implement visualizition. + ret = PostProcess(imgPath, outputs, subresultPath, height, width, name, showPath); + if (ret != APP_ERR_OK) { + LogError << "PostProcess failed, ret=" << ret << "."; + return ret; + } + // add count + i++; + std::cout << i << std::endl; + } + + return APP_ERR_OK; + +} diff --git a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/src/fsaf.h b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/src/fsaf.h old mode 100755 new mode 100644 index 8f20a8db67955250538d95660e6cfd3ef0ff1e33..571154919db9b742592df337a9f87d3aa8ebc870 --- a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/src/fsaf.h +++ b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/src/fsaf.h @@ -1,65 +1,65 @@ -/* - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the BSD 3-Clause License (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://opensource.org/licenses/BSD-3-Clause - - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef UNET_SEGMENTATION_H -#define UNET_SEGMENTATION_H - -#include - -#include "MxBase/ModelInfer/ModelInferenceProcessor.h" -#include "MxBase/PostProcessBases/PostProcessDataType.h" -#include "MxBase/Tensor/TensorContext/TensorContext.h" - -// parameters struct for initialize Model -struct InitParam { - // npu device id - uint32_t deviceId; - // model path of .om - std::string modelPath; -}; - -// FSAF Model class -// Example: -// ... -// FSAF fsaf; -// APP_ERROR ret = fsaf.Init(initParam); -// ret = fsaf.Process(imgPath,resultPath,showPath); -// ... -// fsaf.DeInit(); -class FSAF { -public: - APP_ERROR Init(const InitParam &initParam); - APP_ERROR DeInit(); - APP_ERROR ReadImage(const std::string &imgPath, cv::Mat &imageMat, int& height, int& width); - APP_ERROR ResizeImage(const cv::Mat &srcImageMat, cv::Mat &dstImageMat, - MxBase::ResizedImageInfo &resziedImageInfo); - APP_ERROR CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase); - APP_ERROR VectorToTensorBase(int* transMat, MxBase::TensorBase& tensorBase); - APP_ERROR Inference(const std::vector &inputs, std::vector &outputs); - APP_ERROR PostProcess(const std::string& imgPath, std::vector &inputs, - const std::string &subresultPath,int& height, int& width,const std::string& name,std::string &showPath); - APP_ERROR Process(const std::string &dirPath, std::string &resultPath, std::string &showPath); - APP_ERROR Normalize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat); - -private: - std::shared_ptr model_; - MxBase::ModelDesc modelDesc_; - uint32_t deviceId_ = 0; - - std::vector GetFileList(const std::string &dirPath); -}; - -#endif +/* + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the BSD 3-Clause License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSD-3-Clause + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef UNET_SEGMENTATION_H +#define UNET_SEGMENTATION_H + +#include + +#include "MxBase/ModelInfer/ModelInferenceProcessor.h" +#include "MxBase/PostProcessBases/PostProcessDataType.h" +#include "MxBase/Tensor/TensorContext/TensorContext.h" + +// parameters struct for initialize Model +struct InitParam { + // npu device id + uint32_t deviceId; + // model path of .om + std::string modelPath; +}; + +// FSAF Model class +// Example: +// ... +// FSAF fsaf; +// APP_ERROR ret = fsaf.Init(initParam); +// ret = fsaf.Process(imgPath,resultPath,showPath); +// ... +// fsaf.DeInit(); +class FSAF { +public: + APP_ERROR Init(const InitParam &initParam); + APP_ERROR DeInit(); + APP_ERROR ReadImage(const std::string &imgPath, cv::Mat &imageMat, int& height, int& width); + APP_ERROR ResizeImage(const cv::Mat &srcImageMat, cv::Mat &dstImageMat, + MxBase::ResizedImageInfo &resziedImageInfo); + APP_ERROR CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase); + APP_ERROR VectorToTensorBase(int* transMat, MxBase::TensorBase& tensorBase); + APP_ERROR Inference(const std::vector &inputs, std::vector &outputs); + APP_ERROR PostProcess(const std::string& imgPath, std::vector &inputs, + const std::string &subresultPath,int& height, int& width,const std::string& name,std::string &showPath); + APP_ERROR Process(const std::string &dirPath, std::string &resultPath, std::string &showPath); + APP_ERROR Normalize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat); + +private: + std::shared_ptr model_; + MxBase::ModelDesc modelDesc_; + uint32_t deviceId_ = 0; + + std::vector GetFileList(const std::string &dirPath); +}; + +#endif diff --git a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/src/main.cpp b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/src/main.cpp old mode 100755 new mode 100644 index 141ab3f167070a2dfe448b92cccef71ec32ab01c..4d85ce5862a41e4651f90526ee791dff095daaff --- a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/src/main.cpp +++ b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/mxbase/src/main.cpp @@ -1,53 +1,53 @@ -/* - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the BSD 3-Clause License (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://opensource.org/licenses/BSD-3-Clause - - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "fsaf.h" - -#include "MxBase/Log/Log.h" - -int main() { - // config the parameters for fsaf model - InitParam initParam = {}; - initParam.deviceId = 0; - initParam.modelPath = "../../data/model/fsaf.om"; - - // declare and initialize the fsaf model - FSAF fsaf; - APP_ERROR ret = fsaf.Init(initParam); - if (ret != APP_ERR_OK) { - LogError << "FSAF init failed, ret=" << ret << "."; - return ret; - } - - // coco2017 validation set for object detection - std::string imgPath = "../../data/input/val2017"; - - // directories for saving result - std::string outputPath = "../output/"; - std::string resultPath = outputPath + "infer_result"; - std::string showPath = outputPath + "show_result"; - - // call the process of fsaf model - ret = fsaf.Process(imgPath, resultPath, showPath); - if (ret != APP_ERR_OK) { - LogError << "FSAF process failed, ret=" << ret << "."; - fsaf.DeInit(); - return ret; - } - - fsaf.DeInit(); - return APP_ERR_OK; -} +/* + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the BSD 3-Clause License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSD-3-Clause + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fsaf.h" + +#include "MxBase/Log/Log.h" + +int main() { + // config the parameters for fsaf model + InitParam initParam = {}; + initParam.deviceId = 0; + initParam.modelPath = "../../data/model/fsaf.om"; + + // declare and initialize the fsaf model + FSAF fsaf; + APP_ERROR ret = fsaf.Init(initParam); + if (ret != APP_ERR_OK) { + LogError << "FSAF init failed, ret=" << ret << "."; + return ret; + } + + // coco2017 validation set for object detection + std::string imgPath = "../../data/input/val2017"; + + // directories for saving result + std::string outputPath = "../output/"; + std::string resultPath = outputPath + "infer_result"; + std::string showPath = outputPath + "show_result"; + + // call the process of fsaf model + ret = fsaf.Process(imgPath, resultPath, showPath); + if (ret != APP_ERR_OK) { + LogError << "FSAF process failed, ret=" << ret << "."; + fsaf.DeInit(); + return ret; + } + + fsaf.DeInit(); + return APP_ERR_OK; +} diff --git a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/sdk/run.sh b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/sdk/run.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/util/coco_eval.py b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/util/coco_eval.py old mode 100755 new mode 100644 index 92e70605ac1f99b0befcb4270abc4bcc4da3d181..47904787ce72531e2f5c510c1cf6bfa5330cffff --- a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/util/coco_eval.py +++ b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/util/coco_eval.py @@ -1,97 +1,97 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License.import argparse - -import argparse -import numpy as np -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval - -CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', - 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] - -def coco_evaluation(annotation_json, result_json): - cocoGt = COCO(annotation_json) - cocoDt = cocoGt.loadRes(result_json) - iou_thrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) - iou_type = 'bbox' - - cocoEval = COCOeval(cocoGt, cocoDt, iou_type) - cocoEval.params.catIds = cocoGt.getCatIds(catNms=CLASSES) - cocoEval.params.imgIds = cocoGt.getImgIds() - delete_id=[3661,70254,309391,374551,190007] - for did in delete_id: - cocoEval.params.imgIds.remove(did) - - cocoEval.params.maxDets = [100, 300, 1000] # proposal number for evaluating recalls/mAPs. - cocoEval.params.iouThrs = iou_thrs - - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - - # mapping of cocoEval.stats - coco_metric_names = { - 'mAP': 0, - 'mAP_50': 1, - 'mAP_75': 2, - 'mAP_s': 3, - 'mAP_m': 4, - 'mAP_l': 5, - 'AR@100': 6, - 'AR@300': 7, - 'AR@1000': 8, - 'AR_s@1000': 9, - 'AR_m@1000': 10, - 'AR_l@1000': 11 - } - - metric_items = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l'] - eval_results = {} - - for metric_item in metric_items: - key = f'bbox_{metric_item}' - val = float( - f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' - ) - eval_results[key] = val - ap = cocoEval.stats[:6] - eval_results['bbox_mAP_copypaste'] = ( - f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' - f'{ap[4]:.3f} {ap[5]:.3f}') - - return eval_results - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--ground_truth_file", default="../data/input/annotations/instances_val2017.json") - parser.add_argument("--detection_result_file", default="../sdk/output/coco_detection_result.json") - parser.add_argument("--eval_result_file", default="../sdk/output/eval_map_result.txt") - args = parser.parse_args() - result = coco_evaluation(args.ground_truth_file, args.detection_result_file) - print(result) - with open(args.eval_result_file, 'w') as f: - for key, value in result.items(): +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.import argparse + +import argparse +import numpy as np +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', + 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] + +def coco_evaluation(annotation_json, result_json): + cocoGt = COCO(annotation_json) + cocoDt = cocoGt.loadRes(result_json) + iou_thrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) + iou_type = 'bbox' + + cocoEval = COCOeval(cocoGt, cocoDt, iou_type) + cocoEval.params.catIds = cocoGt.getCatIds(catNms=CLASSES) + cocoEval.params.imgIds = cocoGt.getImgIds() + delete_id=[3661,70254,309391,374551,190007] + for did in delete_id: + cocoEval.params.imgIds.remove(did) + + cocoEval.params.maxDets = [100, 300, 1000] # proposal number for evaluating recalls/mAPs. + cocoEval.params.iouThrs = iou_thrs + + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + # mapping of cocoEval.stats + coco_metric_names = { + 'mAP': 0, + 'mAP_50': 1, + 'mAP_75': 2, + 'mAP_s': 3, + 'mAP_m': 4, + 'mAP_l': 5, + 'AR@100': 6, + 'AR@300': 7, + 'AR@1000': 8, + 'AR_s@1000': 9, + 'AR_m@1000': 10, + 'AR_l@1000': 11 + } + + metric_items = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l'] + eval_results = {} + + for metric_item in metric_items: + key = f'bbox_{metric_item}' + val = float( + f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' + ) + eval_results[key] = val + ap = cocoEval.stats[:6] + eval_results['bbox_mAP_copypaste'] = ( + f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' + f'{ap[4]:.3f} {ap[5]:.3f}') + + return eval_results + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--ground_truth_file", default="../data/input/annotations/instances_val2017.json") + parser.add_argument("--detection_result_file", default="../sdk/output/coco_detection_result.json") + parser.add_argument("--eval_result_file", default="../sdk/output/eval_map_result.txt") + args = parser.parse_args() + result = coco_evaluation(args.ground_truth_file, args.detection_result_file) + print(result) + with open(args.eval_result_file, 'w') as f: + for key, value in result.items(): f.write(key + ': ' + str(value) + '\n') \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/util/txt_to_json.py b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/util/txt_to_json.py old mode 100755 new mode 100644 index f713ec1c85067431d490c9bf88b9820d61e78cc6..31fe719e3ce4c70b133c6c1210100cd51cc878e0 --- a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/util/txt_to_json.py +++ b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/infer/util/txt_to_json.py @@ -1,113 +1,113 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License.import argparse - -import glob -import os -import sys -import argparse -import mmcv - -''' - 0,0 ------> x (width) - | - | (Left,Top) - | *_________ - | | | - | | - y |_________| - (height) * - (Right,Bottom) -''' -CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', - 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] - -cat_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, -24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, -48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, -72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] - - -def file_lines_to_list(path): - # open txt file lines to a list - with open(path) as f: - content = f.readlines() - # remove whitespace characters like `\n` at the end of each line - content = [x.strip() for x in content] - return content - - -def error(msg): - print(msg) - sys.exit(0) - - -def get_predict_list(file_path, gt_classes): - dr_files_list = glob.glob(file_path + '/*.txt') - dr_files_list.sort() - - bounding_boxes = [] - for txt_file in dr_files_list: - file_id = txt_file.split(".txt", 1)[0] - file_id = os.path.basename(os.path.normpath(file_id)) - lines = file_lines_to_list(txt_file) - for line in lines: - try: - sl = line.split() - if len(sl) > 6: - class_name = sl[0] + ' ' + sl[1] - scores, left, top, right, bottom = sl[2:] - else: - class_name, scores, left, top, right, bottom = sl - if float(scores) < 0.05: - continue - except ValueError: - error_msg = "Error: File " + txt_file + " wrong format.\n" - error_msg += " Expected: \n" - error_msg += " Received: " + line - error(error_msg) - - # bbox = left + " " + top + " " + right + " " + bottom - left = float(left) - right = float(right) - top = float(top) - bottom = float(bottom) - bbox = [left, top, right-left, bottom-top] - bounding_boxes.append({"image_id": int(file_id), "bbox": bbox, - "score": float(scores), "category_id": cat_ids[CLASSES.index(class_name)]}) - # sort detection-results by decreasing scores - # bounding_boxes.sort(key=lambda x: float(x['score']), reverse=True) - return bounding_boxes - - -if __name__ == '__main__': - parser = argparse.ArgumentParser('mAp calculate') - parser.add_argument('--infer_result_path', default="../sdk/output/infer_result/", - help='the path of the predict result') - parser.add_argument("--json_output_filename", default="../sdk/output/coco_detection_result") - args = parser.parse_args() - - res_bbox = get_predict_list(args.infer_result_path, CLASSES) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.import argparse + +import glob +import os +import sys +import argparse +import mmcv + +''' + 0,0 ------> x (width) + | + | (Left,Top) + | *_________ + | | | + | | + y |_________| + (height) * + (Right,Bottom) +''' +CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', + 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] + +cat_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, +24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, +72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + + +def file_lines_to_list(path): + # open txt file lines to a list + with open(path) as f: + content = f.readlines() + # remove whitespace characters like `\n` at the end of each line + content = [x.strip() for x in content] + return content + + +def error(msg): + print(msg) + sys.exit(0) + + +def get_predict_list(file_path, gt_classes): + dr_files_list = glob.glob(file_path + '/*.txt') + dr_files_list.sort() + + bounding_boxes = [] + for txt_file in dr_files_list: + file_id = txt_file.split(".txt", 1)[0] + file_id = os.path.basename(os.path.normpath(file_id)) + lines = file_lines_to_list(txt_file) + for line in lines: + try: + sl = line.split() + if len(sl) > 6: + class_name = sl[0] + ' ' + sl[1] + scores, left, top, right, bottom = sl[2:] + else: + class_name, scores, left, top, right, bottom = sl + if float(scores) < 0.05: + continue + except ValueError: + error_msg = "Error: File " + txt_file + " wrong format.\n" + error_msg += " Expected: \n" + error_msg += " Received: " + line + error(error_msg) + + # bbox = left + " " + top + " " + right + " " + bottom + left = float(left) + right = float(right) + top = float(top) + bottom = float(bottom) + bbox = [left, top, right-left, bottom-top] + bounding_boxes.append({"image_id": int(file_id), "bbox": bbox, + "score": float(scores), "category_id": cat_ids[CLASSES.index(class_name)]}) + # sort detection-results by decreasing scores + # bounding_boxes.sort(key=lambda x: float(x['score']), reverse=True) + return bounding_boxes + + +if __name__ == '__main__': + parser = argparse.ArgumentParser('mAp calculate') + parser.add_argument('--infer_result_path', default="../sdk/output/infer_result/", + help='the path of the predict result') + parser.add_argument("--json_output_filename", default="../sdk/output/coco_detection_result") + args = parser.parse_args() + + res_bbox = get_predict_list(args.infer_result_path, CLASSES) mmcv.dump(res_bbox, args.json_output_filename + '.json') \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/mmdetection/tools/dist_train.sh b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/mmdetection/tools/dist_train.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/mmdetection/tools/slurm_test.sh b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/mmdetection/tools/slurm_test.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/mmdetection/tools/slurm_train.sh b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/mmdetection/tools/slurm_train.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/scripts/train_8p.sh b/PyTorch/contrib/cv/detection/FSAF_for_Pytorch/scripts/train_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/FaceBoxes/README.md b/PyTorch/contrib/cv/detection/FaceBoxes/README.md index ef40a2e1a1e7038d8a2ee634ec1e22a49de6ba86..b3677532f3fac69acf1ac19bb44f7ba5b188c556 100644 --- a/PyTorch/contrib/cv/detection/FaceBoxes/README.md +++ b/PyTorch/contrib/cv/detection/FaceBoxes/README.md @@ -1,117 +1,117 @@ -# FaceBoxes - -本项目实现了FaceBoxes从GPU到NPU上训练的迁移,源开源代码仓[FaceBoxes.Pytorch](https://github.com/zisianw/FaceBoxes.PyTorch) - -## FaceBoxes Detail - -本项目对于FaceBoxes.Pytorch做出了如下更改: - -1. 将设备从Nvidia GPU迁移到Huawei NPU上。 -2. 在源代码的基础上添加了Apex混合精度进行优化。 -3. 在模型迁移到NPU上后一些不支持或性能较低的算子放到CPU上进行规避。 -4. 针对测试结果添加了新的评估脚本。 - -## Requirements - -```bash -pip install -r requirements.txt -``` - -- NPU 配套的run包安装 -- Python3.7.5 -- PyTorch(NPU版本) -- Apex(NPU版本) - -### 导入环境变量 - -```bash -source scripts/npu_set_env.sh -``` - -### 编译 - -```bash -git clone https://github.com/Levi0223/FDDB_Evaluation.git -cd FDDB_Evaluation -python3 setup.py build_ext --inplace -mv ../convert.py ../split.py ./ -``` - -### 准备数据集 -数据集下载参考源代码仓 - -1. 下载[WIDER_FACE](https://github.com/zisianw/FaceBoxes.PyTorch)数据集,将图片放在这个目录下(数据集包含32203张图片): - - ```bash - $FaceBoxes_ROOT/data/WIDER_FACE/images/ - ``` - - 下载转换后的[标注文件](https://github.com/zisianw/FaceBoxes.PyTorch),将他们放在这个目录下: - - ```bash - $FaceBoxes_ROOT/data/WIDER_FACE/annotations/ - ``` - - 最终数据集目录结构如下: - - ![输入图片说明](https://images.gitee.com/uploads/images/2021/0927/121855_9a16b40b_6515416.png "屏幕截图.png") - -2. 下载[FDDB](https://github.com/zisianw/FaceBoxes.PyTorch)数据集,将图片放在这个目录下(数据集包含2845张图片): - - ```bash - $FaceBoxes_ROOT/data/FDDB/images/ - ``` - - 最终数据集目录结构如下: - - ![输入图片说明](https://images.gitee.com/uploads/images/2021/0927/121924_9f00b12c_6515416.png "屏幕截图.png") - -## Trainning - -### 单卡性能评估 - -```bash -### 输出单卡FPS -bash scripts/train_performance_1p.sh -``` - -### 单卡训练 - -```bash -### 单卡全量训练 -bash scripts/train_1p.sh -## 日志文件在当前目录下的1p_train.log -``` - -### 多卡性能评估 - -```bash -### 输出多卡FPS -bash scripts/train_performance_8p.sh -``` - -### 多卡训练 - -```bash -### 多卡全量训练 -bash scripts/train_8p.sh -## 日志文件在当前目录下的8p_train.log -``` - -### Test - -```bash -### 测试训练得到的权重文件,生成FDDB_dets.txt -bash test.sh -## 日志文件在当前目录下的test.log -### 解析FDDB_dets.txt文件,打印最终精度 -bash eval.sh -``` - -## Performance - -| | AP | APEX | lOSS_SCALE | EPOCH | -| :-----: | :----: | :--: | :--------: | :---: | -| **GPU** | 0.9440 | O2 | 128 | 300 | -| **NPU** | 0.9396 | O2 | 128 | 300 | - +# FaceBoxes + +本项目实现了FaceBoxes从GPU到NPU上训练的迁移,源开源代码仓[FaceBoxes.Pytorch](https://github.com/zisianw/FaceBoxes.PyTorch) + +## FaceBoxes Detail + +本项目对于FaceBoxes.Pytorch做出了如下更改: + +1. 将设备从Nvidia GPU迁移到Huawei NPU上。 +2. 在源代码的基础上添加了Apex混合精度进行优化。 +3. 在模型迁移到NPU上后一些不支持或性能较低的算子放到CPU上进行规避。 +4. 针对测试结果添加了新的评估脚本。 + +## Requirements + +```bash +pip install -r requirements.txt +``` + +- NPU 配套的run包安装 +- Python3.7.5 +- PyTorch(NPU版本) +- Apex(NPU版本) + +### 导入环境变量 + +```bash +source scripts/npu_set_env.sh +``` + +### 编译 + +```bash +git clone https://github.com/Levi0223/FDDB_Evaluation.git +cd FDDB_Evaluation +python3 setup.py build_ext --inplace +mv ../convert.py ../split.py ./ +``` + +### 准备数据集 +数据集下载参考源代码仓 + +1. 下载[WIDER_FACE](https://github.com/zisianw/FaceBoxes.PyTorch)数据集,将图片放在这个目录下(数据集包含32203张图片): + + ```bash + $FaceBoxes_ROOT/data/WIDER_FACE/images/ + ``` + + 下载转换后的[标注文件](https://github.com/zisianw/FaceBoxes.PyTorch),将他们放在这个目录下: + + ```bash + $FaceBoxes_ROOT/data/WIDER_FACE/annotations/ + ``` + + 最终数据集目录结构如下: + + ![输入图片说明](https://images.gitee.com/uploads/images/2021/0927/121855_9a16b40b_6515416.png "屏幕截图.png") + +2. 下载[FDDB](https://github.com/zisianw/FaceBoxes.PyTorch)数据集,将图片放在这个目录下(数据集包含2845张图片): + + ```bash + $FaceBoxes_ROOT/data/FDDB/images/ + ``` + + 最终数据集目录结构如下: + + ![输入图片说明](https://images.gitee.com/uploads/images/2021/0927/121924_9f00b12c_6515416.png "屏幕截图.png") + +## Trainning + +### 单卡性能评估 + +```bash +### 输出单卡FPS +bash scripts/train_performance_1p.sh +``` + +### 单卡训练 + +```bash +### 单卡全量训练 +bash scripts/train_1p.sh +## 日志文件在当前目录下的1p_train.log +``` + +### 多卡性能评估 + +```bash +### 输出多卡FPS +bash scripts/train_performance_8p.sh +``` + +### 多卡训练 + +```bash +### 多卡全量训练 +bash scripts/train_8p.sh +## 日志文件在当前目录下的8p_train.log +``` + +### Test + +```bash +### 测试训练得到的权重文件,生成FDDB_dets.txt +bash test.sh +## 日志文件在当前目录下的test.log +### 解析FDDB_dets.txt文件,打印最终精度 +bash eval.sh +``` + +## Performance + +| | AP | APEX | lOSS_SCALE | EPOCH | +| :-----: | :----: | :--: | :--------: | :---: | +| **GPU** | 0.9440 | O2 | 128 | 300 | +| **NPU** | 0.9396 | O2 | 128 | 300 | + diff --git a/PyTorch/contrib/cv/detection/FaceBoxes/README_raw.md b/PyTorch/contrib/cv/detection/FaceBoxes/README_raw.md index c36af43fb8d9445d151b82a3ff34bc1e60184e40..3ae1866b888230654a79c13e77ef820a1e8fab22 100644 --- a/PyTorch/contrib/cv/detection/FaceBoxes/README_raw.md +++ b/PyTorch/contrib/cv/detection/FaceBoxes/README_raw.md @@ -1,92 +1,92 @@ -# FaceBoxes in PyTorch - -[![License](https://img.shields.io/badge/license-BSD-blue.svg)](LICENSE) - -By [Zisian Wong](https://github.com/zisianw), [Shifeng Zhang](http://www.cbsr.ia.ac.cn/users/sfzhang/) - -A [PyTorch](https://pytorch.org/) implementation of [FaceBoxes: A CPU Real-time Face Detector with High Accuracy](https://arxiv.org/abs/1708.05234). The official code in Caffe can be found [here](https://github.com/sfzhang15/FaceBoxes). - -## Performance -| Dataset | Original Caffe | PyTorch Implementation | -|:-|:-:|:-:| -| AFW | 98.98 % | 98.55% | -| PASCAL | 96.77 % | 97.05% | -| FDDB | 95.90 % | 96.00% | - -## Citation -Please cite the paper in your publications if it helps your research: - - @inproceedings{zhang2017faceboxes, - title = {Faceboxes: A CPU Real-time Face Detector with High Accuracy}, - author = {Zhang, Shifeng and Zhu, Xiangyu and Lei, Zhen and Shi, Hailin and Wang, Xiaobo and Li, Stan Z.}, - booktitle = {IJCB}, - year = {2017} - } - -### Contents -- [Installation](#installation) -- [Training](#training) -- [Evaluation](#evaluation) -- [References](#references) - -## Installation -1. Install [PyTorch](https://pytorch.org/) >= v1.0.0 following official instruction. - -2. Clone this repository. We will call the cloned directory as `$FaceBoxes_ROOT`. -```Shell -git clone https://github.com/zisianw/FaceBoxes.PyTorch.git -``` - -3. Compile the nms: -```Shell -./make.sh -``` - -_Note: Codes are based on Python 3+._ - -## Training -1. Download [WIDER FACE](https://github.com/zisianw/FaceBoxes.PyTorch) dataset, place the images under this directory: - ```Shell - $FaceBoxes_ROOT/data/WIDER_FACE/images - ``` -2. Convert WIDER FACE annotations to VOC format or download [our converted annotations](https://github.com/zisianw/FaceBoxes.PyTorch), place them under this directory: - ```Shell - $FaceBoxes_ROOT/data/WIDER_FACE/annotations - ``` - -3. Train the model using WIDER FACE: - ```Shell - cd $FaceBoxes_ROOT/ - python3 train.py - ``` - -If you do not wish to train the model, you can download [our pre-trained model](https://github.com/zisianw/FaceBoxes.PyTorch) and save it in `$FaceBoxes_ROOT/weights`. - - -## Evaluation -1. Download the images of [AFW](https://github.com/zisianw/FaceBoxes.PyTorch), [PASCAL Face](https://github.com/zisianw/FaceBoxes.PyTorch) and [FDDB]( -https://github.com/zisianw/FaceBoxes.PyTorch) to: -```Shell -$FaceBoxes_ROOT/data/AFW/images/ -$FaceBoxes_ROOT/data/PASCAL/images/ -$FaceBoxes_ROOT/data/FDDB/images/ -``` - -2. Evaluate the trained model using: -```Shell -# dataset choices = ['AFW', 'PASCAL', 'FDDB'] -python3 test.py --dataset FDDB -# evaluate using cpu -python3 test.py --cpu -# visualize detection results -python3 test.py -s --vis_thres 0.3 -``` - -3. Download [eval_tool](https://github.com/sfzhang15/face-eval) to evaluate the performance. - -## References -- [Official release (Caffe)](https://github.com/sfzhang15/FaceBoxes) -- A huge thank you to SSD ports in PyTorch that have been helpful: - * [ssd.pytorch](https://github.com/amdegroot/ssd.pytorch), [RFBNet](https://github.com/ruinmessi/RFBNet) - - _Note: If you can not download the converted annotations, the provided images and the trained model through the above links, you can download them through [BaiduYun](https://pan.baidu.com/s/1HoW3wbldnbmgW2PS4i4Irw)._ +# FaceBoxes in PyTorch + +[![License](https://img.shields.io/badge/license-BSD-blue.svg)](LICENSE) + +By [Zisian Wong](https://github.com/zisianw), [Shifeng Zhang](http://www.cbsr.ia.ac.cn/users/sfzhang/) + +A [PyTorch](https://pytorch.org/) implementation of [FaceBoxes: A CPU Real-time Face Detector with High Accuracy](https://arxiv.org/abs/1708.05234). The official code in Caffe can be found [here](https://github.com/sfzhang15/FaceBoxes). + +## Performance +| Dataset | Original Caffe | PyTorch Implementation | +|:-|:-:|:-:| +| AFW | 98.98 % | 98.55% | +| PASCAL | 96.77 % | 97.05% | +| FDDB | 95.90 % | 96.00% | + +## Citation +Please cite the paper in your publications if it helps your research: + + @inproceedings{zhang2017faceboxes, + title = {Faceboxes: A CPU Real-time Face Detector with High Accuracy}, + author = {Zhang, Shifeng and Zhu, Xiangyu and Lei, Zhen and Shi, Hailin and Wang, Xiaobo and Li, Stan Z.}, + booktitle = {IJCB}, + year = {2017} + } + +### Contents +- [Installation](#installation) +- [Training](#training) +- [Evaluation](#evaluation) +- [References](#references) + +## Installation +1. Install [PyTorch](https://pytorch.org/) >= v1.0.0 following official instruction. + +2. Clone this repository. We will call the cloned directory as `$FaceBoxes_ROOT`. +```Shell +git clone https://github.com/zisianw/FaceBoxes.PyTorch.git +``` + +3. Compile the nms: +```Shell +./make.sh +``` + +_Note: Codes are based on Python 3+._ + +## Training +1. Download [WIDER FACE](https://github.com/zisianw/FaceBoxes.PyTorch) dataset, place the images under this directory: + ```Shell + $FaceBoxes_ROOT/data/WIDER_FACE/images + ``` +2. Convert WIDER FACE annotations to VOC format or download [our converted annotations](https://github.com/zisianw/FaceBoxes.PyTorch), place them under this directory: + ```Shell + $FaceBoxes_ROOT/data/WIDER_FACE/annotations + ``` + +3. Train the model using WIDER FACE: + ```Shell + cd $FaceBoxes_ROOT/ + python3 train.py + ``` + +If you do not wish to train the model, you can download [our pre-trained model](https://github.com/zisianw/FaceBoxes.PyTorch) and save it in `$FaceBoxes_ROOT/weights`. + + +## Evaluation +1. Download the images of [AFW](https://github.com/zisianw/FaceBoxes.PyTorch), [PASCAL Face](https://github.com/zisianw/FaceBoxes.PyTorch) and [FDDB]( +https://github.com/zisianw/FaceBoxes.PyTorch) to: +```Shell +$FaceBoxes_ROOT/data/AFW/images/ +$FaceBoxes_ROOT/data/PASCAL/images/ +$FaceBoxes_ROOT/data/FDDB/images/ +``` + +2. Evaluate the trained model using: +```Shell +# dataset choices = ['AFW', 'PASCAL', 'FDDB'] +python3 test.py --dataset FDDB +# evaluate using cpu +python3 test.py --cpu +# visualize detection results +python3 test.py -s --vis_thres 0.3 +``` + +3. Download [eval_tool](https://github.com/sfzhang15/face-eval) to evaluate the performance. + +## References +- [Official release (Caffe)](https://github.com/sfzhang15/FaceBoxes) +- A huge thank you to SSD ports in PyTorch that have been helpful: + * [ssd.pytorch](https://github.com/amdegroot/ssd.pytorch), [RFBNet](https://github.com/ruinmessi/RFBNet) + + _Note: If you can not download the converted annotations, the provided images and the trained model through the above links, you can download them through [BaiduYun](https://pan.baidu.com/s/1HoW3wbldnbmgW2PS4i4Irw)._ diff --git a/PyTorch/contrib/cv/detection/GFocalV2/test/train_finetune_1p.sh b/PyTorch/contrib/cv/detection/GFocalV2/test/train_finetune_1p.sh index 68fe36b8b3753849b65b9e204523945bcc15f7e3..cc995eaca336a4c860b8cb7d76639d792a9a1a3e 100644 --- a/PyTorch/contrib/cv/detection/GFocalV2/test/train_finetune_1p.sh +++ b/PyTorch/contrib/cv/detection/GFocalV2/test/train_finetune_1p.sh @@ -1,136 +1,136 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 -export RANK_SIZE=1 - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#网络名称,同目录名称,需要模型审视修改 -Network="GFocal" - -#训练batch_size,,需要模型审视修改 -batch_size=8 -device_id=0 -checkpoint="./work_dirs/gfocal_r50_fpn_1x/latest.pth" -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --device_id* ]];then - device_id=`echo ${para#*=}` - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --checkpoint* ]];then - checkpoint=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 -if [ $ASCEND_DEVICE_ID ];then - echo "device id is ${ASCEND_DEVICE_ID}" -elif [ ${device_id} ];then - export ASCEND_DEVICE_ID=${device_id} - echo "device id is ${ASCEND_DEVICE_ID}" -else - "[Error] device id must be config" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#创建DeviceID输出目录,不需要修改 -if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID -else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID -fi - -#非平台场景时source 环境变量 -check_etp_flag=`env | grep etp_running_flag` -etp_flag=`echo ${check_etp_flag#*=}` -if [ x"${etp_flag}" != x"true" ];then - source ${cur_path}/test/env_npu.sh -fi - -#执行训练脚本,以下传参不需要修改,其他需要模型审视修改 -export NPUID=0 -export RANK=0 -python3.7 ./tools/train.py configs/gfocal/gfocal_r50_fpn_1x.py \ - --cfg-options \ - optimizer.lr=0.005 total_epochs=1 data_root=$data_path \ - --seed 0 \ - --gpu-ids 0 \ - --opt-level O1 \ - --checkpoint $checkpoint > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "FPS: " '{print $NF}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a 'bbox_mAP' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "bbox_mAP: " '{print $NF}'|awk -F "," '{print $1}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要模型审视修改 -grep Epoch: $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep eta:|awk -F "loss: " '{print $NF}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +export RANK_SIZE=1 + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#网络名称,同目录名称,需要模型审视修改 +Network="GFocal" + +#训练batch_size,,需要模型审视修改 +batch_size=8 +device_id=0 +checkpoint="./work_dirs/gfocal_r50_fpn_1x/latest.pth" +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --checkpoint* ]];then + checkpoint=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 +if [ $ASCEND_DEVICE_ID ];then + echo "device id is ${ASCEND_DEVICE_ID}" +elif [ ${device_id} ];then + export ASCEND_DEVICE_ID=${device_id} + echo "device id is ${ASCEND_DEVICE_ID}" +else + "[Error] device id must be config" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#创建DeviceID输出目录,不需要修改 +if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID +fi + +#非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${cur_path}/test/env_npu.sh +fi + +#执行训练脚本,以下传参不需要修改,其他需要模型审视修改 +export NPUID=0 +export RANK=0 +python3.7 ./tools/train.py configs/gfocal/gfocal_r50_fpn_1x.py \ + --cfg-options \ + optimizer.lr=0.005 total_epochs=1 data_root=$data_path \ + --seed 0 \ + --gpu-ids 0 \ + --opt-level O1 \ + --checkpoint $checkpoint > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "FPS: " '{print $NF}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a 'bbox_mAP' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "bbox_mAP: " '{print $NF}'|awk -F "," '{print $1}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要模型审视修改 +grep Epoch: $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep eta:|awk -F "loss: " '{print $NF}' | awk -F "," '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/Pointnetplus/README.md b/PyTorch/contrib/cv/detection/Pointnetplus/README.md index 3885666ce38e368d980041a12c4651785e3eb03a..74370c2a9b07455b86db2458046ea1088db8fbf9 100644 --- a/PyTorch/contrib/cv/detection/Pointnetplus/README.md +++ b/PyTorch/contrib/cv/detection/Pointnetplus/README.md @@ -1,49 +1,49 @@ -# Pointnetplus - -This implements training of Pointnetplus on the ImageNet dataset, mainly modified from [pytorch/examples](https://github.com/yanx27/Pointnet_Pointnet2_pytorch). - -## Pointnetplus Detail - -As of the current date, Ascend-Pytorch is still inefficient for contiguous operations.Therefore, Pointnetplus is re-implemented using semantics such as custom OP. - - -## Requirements - -- Install PyTorch ([pytorch.org](http://pytorch.org)) -- `pip3 install -r requirements.txt` -- Download the ImageNet dataset from (https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip) - -## Training - -To train a model, run `train_classification_xP.py` with the desired model architecture and the path to the ImageNet dataset: - then add [24,512,32,3],[24,1,128,259] transpose op to white list .pth is "/usr/local/Ascend/ascend-toolkit/latest/arm64-linux/opp/op_impl/built-in/ai_core/tbe/impl/dynamic/transpose.py" -```bash -# training 1p accuracy -bash ./test/train_full_1p.sh --data=real_data_path - -# training 1p performance -bash ./test/train_performance_1p.sh --data=real_data_path - -# training 8p accuracy -bash ./test/train_full_8p.sh --data=real_data_path - -# training 8p performance -bash ./test/train_performance_8p.sh --data=real_data_path - -# finetuning 1p -bash test/train_finetune_1p.sh --data=real_data_path --model_pth=real_pre_train_model_path -``` - -Log path: - test/output/devie_id/train_${ASCEND_DEVICE_ID}_${Network}.log # training detail log - test/output/devie_id/train_${ASCEND_DEVICE_ID}_${Network}.log # 8p training performance result log - test/output/devie_id/Pointnetplus_bs24_1p_acc.log # 8p training accuracy result log - - - -## WideResnet50_2 training result - -| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | -| :------: | :------: | :------: | :------: | :------: | -| - | 10.04 | 1 | 1 | O2 | +# Pointnetplus + +This implements training of Pointnetplus on the ImageNet dataset, mainly modified from [pytorch/examples](https://github.com/yanx27/Pointnet_Pointnet2_pytorch). + +## Pointnetplus Detail + +As of the current date, Ascend-Pytorch is still inefficient for contiguous operations.Therefore, Pointnetplus is re-implemented using semantics such as custom OP. + + +## Requirements + +- Install PyTorch ([pytorch.org](http://pytorch.org)) +- `pip3 install -r requirements.txt` +- Download the ImageNet dataset from (https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip) + +## Training + +To train a model, run `train_classification_xP.py` with the desired model architecture and the path to the ImageNet dataset: + then add [24,512,32,3],[24,1,128,259] transpose op to white list .pth is "/usr/local/Ascend/ascend-toolkit/latest/arm64-linux/opp/op_impl/built-in/ai_core/tbe/impl/dynamic/transpose.py" +```bash +# training 1p accuracy +bash ./test/train_full_1p.sh --data=real_data_path + +# training 1p performance +bash ./test/train_performance_1p.sh --data=real_data_path + +# training 8p accuracy +bash ./test/train_full_8p.sh --data=real_data_path + +# training 8p performance +bash ./test/train_performance_8p.sh --data=real_data_path + +# finetuning 1p +bash test/train_finetune_1p.sh --data=real_data_path --model_pth=real_pre_train_model_path +``` + +Log path: + test/output/devie_id/train_${ASCEND_DEVICE_ID}_${Network}.log # training detail log + test/output/devie_id/train_${ASCEND_DEVICE_ID}_${Network}.log # 8p training performance result log + test/output/devie_id/Pointnetplus_bs24_1p_acc.log # 8p training accuracy result log + + + +## WideResnet50_2 training result + +| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | +| :------: | :------: | :------: | :------: | :------: | +| - | 10.04 | 1 | 1 | O2 | | 92.0 | 64.1 | 8 | 200 | O2 | \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/Pointnetplus/finetune.py b/PyTorch/contrib/cv/detection/Pointnetplus/finetune.py index d429106fc708e6ea8d138ac6ea334ec643334706..ade55c9fb235bc11f5431f5ec9343e3ca52b50bb 100644 --- a/PyTorch/contrib/cv/detection/Pointnetplus/finetune.py +++ b/PyTorch/contrib/cv/detection/Pointnetplus/finetune.py @@ -1,74 +1,74 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import torch -import numpy as np -import argparse -from tqdm import tqdm -import models.provider -from models.ModelNetDataLoader import ModelNetDataLoader -import models.pointnet2_cls_ssg as models - -def inplace_relu(m): - classname = m.__class__.__name__ - if classname.find('ReLU') != -1: - m.inplace=True - -def parse_args(): - '''PARAMETERS''' - parser = argparse.ArgumentParser('training') - parser.add_argument('--batch_size', type=int, default=24, help='batch size in training') - parser.add_argument('--num_category', default=40, type=int, choices=[10, 40], help='training on ModelNet10/40') - parser.add_argument('--epoch', default=2, type=int, help='number of epoch in training') - parser.add_argument('--num_points', type=int, default=1024, help='Point Number') - parser.add_argument('--use_normals', action='store_true', default=False, help='use normals') - parser.add_argument('--process_data', action='store_true', default=True, help='save data offline') - parser.add_argument('--use_uniform_sample', action='store_true', default=False, help='use uniform sampiling') - parser.add_argument('--device', type=str,default='cpu',help='which device to use') - parser.add_argument('--data',type=str, default='./modelnet40_normal_resampled', help='data_path') - parser.add_argument('--num_class',type=int,default=41,help='num of class') - parser.add_argument('--num_point', type=int, default=1024, help='Point Number') - parser.add_argument('--model_pth', type=str, default='./log/classification/pointnet2_cls_ssg/checkpoints/', help='Point Number') - parser.add_argument('--worker', type=int, default=1, help='number ofs workers') - - return parser.parse_args() - -def main(args): - test_dataset = ModelNetDataLoader(root=args.data, args=args, split='test', process_data=args.process_data) - testDataLoader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.worker) - model_dict = torch.load(str(args.model_pth)+'best_model.pth', map_location='cpu')['model_state_dict'] - model_dict.pop('fc3.weight') - model_dict.pop('fc3.bias') - classifier = models.get_model(args.num_class, normal_channel=args.use_normals) - classifier.apply(inplace_relu) - if args.device !='cpu': - classifier = classifier.npu() - classifier.load_state_dict(model_dict, strict=False) - - for epoch in range(args.epoch): - for batch_id,(points, target) in tqdm(enumerate(testDataLoader, 0), total=len(testDataLoader)): - points = points.transpose(2, 1) - if args.device !='cpu': - points, target = points.npu(), target.npu() - pred, trans_feat = classifier(points) - pred_choice = pred.data.max(1)[1] - print("output class is",pred_choice) - -if __name__ == '__main__': - args = parse_args() - main(args) - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import torch +import numpy as np +import argparse +from tqdm import tqdm +import models.provider +from models.ModelNetDataLoader import ModelNetDataLoader +import models.pointnet2_cls_ssg as models + +def inplace_relu(m): + classname = m.__class__.__name__ + if classname.find('ReLU') != -1: + m.inplace=True + +def parse_args(): + '''PARAMETERS''' + parser = argparse.ArgumentParser('training') + parser.add_argument('--batch_size', type=int, default=24, help='batch size in training') + parser.add_argument('--num_category', default=40, type=int, choices=[10, 40], help='training on ModelNet10/40') + parser.add_argument('--epoch', default=2, type=int, help='number of epoch in training') + parser.add_argument('--num_points', type=int, default=1024, help='Point Number') + parser.add_argument('--use_normals', action='store_true', default=False, help='use normals') + parser.add_argument('--process_data', action='store_true', default=True, help='save data offline') + parser.add_argument('--use_uniform_sample', action='store_true', default=False, help='use uniform sampiling') + parser.add_argument('--device', type=str,default='cpu',help='which device to use') + parser.add_argument('--data',type=str, default='./modelnet40_normal_resampled', help='data_path') + parser.add_argument('--num_class',type=int,default=41,help='num of class') + parser.add_argument('--num_point', type=int, default=1024, help='Point Number') + parser.add_argument('--model_pth', type=str, default='./log/classification/pointnet2_cls_ssg/checkpoints/', help='Point Number') + parser.add_argument('--worker', type=int, default=1, help='number ofs workers') + + return parser.parse_args() + +def main(args): + test_dataset = ModelNetDataLoader(root=args.data, args=args, split='test', process_data=args.process_data) + testDataLoader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.worker) + model_dict = torch.load(str(args.model_pth)+'best_model.pth', map_location='cpu')['model_state_dict'] + model_dict.pop('fc3.weight') + model_dict.pop('fc3.bias') + classifier = models.get_model(args.num_class, normal_channel=args.use_normals) + classifier.apply(inplace_relu) + if args.device !='cpu': + classifier = classifier.npu() + classifier.load_state_dict(model_dict, strict=False) + + for epoch in range(args.epoch): + for batch_id,(points, target) in tqdm(enumerate(testDataLoader, 0), total=len(testDataLoader)): + points = points.transpose(2, 1) + if args.device !='cpu': + points, target = points.npu(), target.npu() + pred, trans_feat = classifier(points) + pred_choice = pred.data.max(1)[1] + print("output class is",pred_choice) + +if __name__ == '__main__': + args = parse_args() + main(args) + + diff --git a/PyTorch/contrib/cv/detection/Pointnetplus/modelzoo_level.txt b/PyTorch/contrib/cv/detection/Pointnetplus/modelzoo_level.txt index dd917ea530560d786765ef72fa9d4c05b10a170e..08016761e0d79f535349f23e5caf5783de1deea4 100644 --- a/PyTorch/contrib/cv/detection/Pointnetplus/modelzoo_level.txt +++ b/PyTorch/contrib/cv/detection/Pointnetplus/modelzoo_level.txt @@ -1,5 +1,5 @@ -GPUStatus:OK -NPUMigrationStatus:OK -FuncStatus:OK -PrecisionStatus:OK +GPUStatus:OK +NPUMigrationStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:NOK \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/Pointnetplus/pointnetplus_pth2onnx.py b/PyTorch/contrib/cv/detection/Pointnetplus/pointnetplus_pth2onnx.py index bb92b279f8849a1865e791bc60a632c473db756f..4728535261abf781fb2c64b17a541e62015b5fad 100644 --- a/PyTorch/contrib/cv/detection/Pointnetplus/pointnetplus_pth2onnx.py +++ b/PyTorch/contrib/cv/detection/Pointnetplus/pointnetplus_pth2onnx.py @@ -1,104 +1,104 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from collections import OrderedDict -import torch -import torch.onnx -import argparse -import sys -sys.path.append('./models/models') -import pointnet2_cls_ssg as pointnet2_cls -from pointnet2_utils import farthest_point_sample -from pointnet2_utils import sample_and_group - - -def parse_args(): - '''PARAMETERS''' - parser = argparse.ArgumentParser('off_line_pred') - parser.add_argument('--target_model', type=int, default=1, - required=True, help='target trans_models') - parser.add_argument('--pth_dir', type=str, default='', - required=False, help='target trans_models') - parser.add_argument('--batch_size', type=int, default=1, - required=False, help='batch size') - return parser.parse_args() - - -def proc_node_module(checkpoint, AttrName): - new_state_dict = OrderedDict() - for k, v in checkpoint[AttrName].items(): - if k[0:7] == "module.": - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - - -def model_convert(dir): - experiment_dir = dir - dummy_input = torch.randn(args.batch_size, 3, 1024) - checkpoint = torch.load(str(experiment_dir) + '/best_model.pth',map_location = 'cpu') - checkpoint['model_state_dict'] = proc_node_module(checkpoint,'model_state_dict') - model = pointnet2_cls.get_model_part1(normal_channel=False) - model.load_state_dict(checkpoint['model_state_dict']) - model.eval() - npoint = 512 - radius = 0.2 - nsample = 32 - points = None - test_input = dummy_input.permute(0, 2, 1) - centroid = farthest_point_sample(test_input, npoint) - new_xyz, new_points = sample_and_group(npoint, radius, nsample, test_input, points, centroid) - new_points = new_points.permute(0, 3, 2, 1) - input_names = ["xyz", "samp_points"] - output_names = ["l1_xyz", "l1_point"] - torch.onnx.export(model, (new_xyz, new_points), - "Pointnetplus_part1_bs{}.onnx".format(args.batch_size), - input_names=input_names, verbose=True, output_names=output_names, opset_version=11) - - -def model_convert2(dir): - experiment_dir = dir - dummy_xyz_input = torch.randn(args.batch_size, 3, 512) - dummy_point_input = torch.randn(args.batch_size, 128, 512) - checkpoint = torch.load(str(experiment_dir) + '/best_model.pth',map_location = 'cpu') - checkpoint['model_state_dict'] = proc_node_module(checkpoint,'model_state_dict') - model = pointnet2_cls.get_model_part2(normal_channel=False) - model.load_state_dict(checkpoint['model_state_dict']) - model.eval() - npoint = 128 - radius = 0.4 - nsample = 64 - points = None - test_input = dummy_xyz_input.permute(0, 2, 1) - test_points = dummy_point_input.permute(0, 2, 1) - centroid = farthest_point_sample(test_input, npoint) - new_xyz, new_points = sample_and_group(npoint, radius, nsample, test_input, test_points, centroid) - new_points = new_points.permute(0, 3, 2, 1) - new_xyz = new_xyz.permute(0, 2, 1) - input_names = ["l1_xyz", "l1_points"] - output_names = ["class", "l3_point"] - - torch.onnx.export(model, (new_xyz, new_points), - "Pointnetplus_part2_bs{}.onnx".format(args.batch_size), - input_names=input_names, verbose=True, output_names=output_names, opset_version=11) - - -if __name__ == '__main__': - args = parse_args() - if(args.target_model == 1): - model_convert(args.pth_dir) - else: - model_convert2(args.pth_dir) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict +import torch +import torch.onnx +import argparse +import sys +sys.path.append('./models/models') +import pointnet2_cls_ssg as pointnet2_cls +from pointnet2_utils import farthest_point_sample +from pointnet2_utils import sample_and_group + + +def parse_args(): + '''PARAMETERS''' + parser = argparse.ArgumentParser('off_line_pred') + parser.add_argument('--target_model', type=int, default=1, + required=True, help='target trans_models') + parser.add_argument('--pth_dir', type=str, default='', + required=False, help='target trans_models') + parser.add_argument('--batch_size', type=int, default=1, + required=False, help='batch size') + return parser.parse_args() + + +def proc_node_module(checkpoint, AttrName): + new_state_dict = OrderedDict() + for k, v in checkpoint[AttrName].items(): + if k[0:7] == "module.": + name = k[7:] + else: + name = k[0:] + new_state_dict[name] = v + return new_state_dict + + +def model_convert(dir): + experiment_dir = dir + dummy_input = torch.randn(args.batch_size, 3, 1024) + checkpoint = torch.load(str(experiment_dir) + '/best_model.pth',map_location = 'cpu') + checkpoint['model_state_dict'] = proc_node_module(checkpoint,'model_state_dict') + model = pointnet2_cls.get_model_part1(normal_channel=False) + model.load_state_dict(checkpoint['model_state_dict']) + model.eval() + npoint = 512 + radius = 0.2 + nsample = 32 + points = None + test_input = dummy_input.permute(0, 2, 1) + centroid = farthest_point_sample(test_input, npoint) + new_xyz, new_points = sample_and_group(npoint, radius, nsample, test_input, points, centroid) + new_points = new_points.permute(0, 3, 2, 1) + input_names = ["xyz", "samp_points"] + output_names = ["l1_xyz", "l1_point"] + torch.onnx.export(model, (new_xyz, new_points), + "Pointnetplus_part1_bs{}.onnx".format(args.batch_size), + input_names=input_names, verbose=True, output_names=output_names, opset_version=11) + + +def model_convert2(dir): + experiment_dir = dir + dummy_xyz_input = torch.randn(args.batch_size, 3, 512) + dummy_point_input = torch.randn(args.batch_size, 128, 512) + checkpoint = torch.load(str(experiment_dir) + '/best_model.pth',map_location = 'cpu') + checkpoint['model_state_dict'] = proc_node_module(checkpoint,'model_state_dict') + model = pointnet2_cls.get_model_part2(normal_channel=False) + model.load_state_dict(checkpoint['model_state_dict']) + model.eval() + npoint = 128 + radius = 0.4 + nsample = 64 + points = None + test_input = dummy_xyz_input.permute(0, 2, 1) + test_points = dummy_point_input.permute(0, 2, 1) + centroid = farthest_point_sample(test_input, npoint) + new_xyz, new_points = sample_and_group(npoint, radius, nsample, test_input, test_points, centroid) + new_points = new_points.permute(0, 3, 2, 1) + new_xyz = new_xyz.permute(0, 2, 1) + input_names = ["l1_xyz", "l1_points"] + output_names = ["class", "l3_point"] + + torch.onnx.export(model, (new_xyz, new_points), + "Pointnetplus_part2_bs{}.onnx".format(args.batch_size), + input_names=input_names, verbose=True, output_names=output_names, opset_version=11) + + +if __name__ == '__main__': + args = parse_args() + if(args.target_model == 1): + model_convert(args.pth_dir) + else: + model_convert2(args.pth_dir) diff --git a/PyTorch/contrib/cv/detection/Pointnetplus/requirements.txt b/PyTorch/contrib/cv/detection/Pointnetplus/requirements.txt index d9fdcd1a8753a518f8423fc1b0d7555d7aa2333e..309763d470004b757237e3a619fe97ba33408dc3 100644 --- a/PyTorch/contrib/cv/detection/Pointnetplus/requirements.txt +++ b/PyTorch/contrib/cv/detection/Pointnetplus/requirements.txt @@ -1,6 +1,6 @@ -apex -numpy==4.62.3 -torch==1.5.0 -torchvision==0.6.0 -onnx +apex +numpy==4.62.3 +torch==1.5.0 +torchvision==0.6.0 +onnx tqdm==4.62.3 \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/README.md b/PyTorch/contrib/cv/detection/Pyramidbox/README.md index d11beb2f2613928a41c9e894f6fed1a3c1c1918b..17b9e57acdfc1523505a5d65857c2b02a9914a92 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/README.md +++ b/PyTorch/contrib/cv/detection/Pyramidbox/README.md @@ -1,138 +1,138 @@ -## Pyramidbox - -​ A Context-assisted Single Shot Face Detector. - - - -### Requirements - -``` -根据requirements.txt安装依赖环境 -- Download the WIDER_FACE dataset from http://shuoyang1213.me/WIDERFACE/ -``` - - - -在代码仓下创建WIDER_FACE目录,存放数据集文件 - -``` -|-WIDER_FACE - |-wider_face_split - |-wider_face_test.mat - |-wider_face_test_filelist.txt - |-wider_face_train.mat - |-wider_face_train_bbx_gt.txt - |-wider_face_val.mat - |-wider_face_val_bbx_gt.txt - |-WIDER_train - |-images - |-0--Parade - |-1--Handshaking - ... - |-WIDER_val - |-images - |-0--Parade - |-1--Handshaking - ... -``` - - - -参照原代码仓README下载vgg权重,放在weights目录下 - -``` -|-Pyramidbox - |-weights - |-vgg16_reducedfc.pth -``` - - - -### prepare - -运行prepare_wider_data.py - -``` -python prepare_wider_data.py --data_path='数据集路径' -``` - - - -### Training - -单卡训练 - -``` -python train.py --batch_size=8 --lr=5e-4 -``` - -多卡训练 - -``` -python -m torch.distributed.launch --nproc_per_node=8 train.py --world_size=8 --batch_size=8 --lr=5e-4 --multinpu=True --device_list='0,1,2,3,4,5,6,7' -``` - - - -### Test - -在运行wider_test.py前,应先做以下修改: - -``` -1、修改第53行 -sys.path.append("/home/wch/Pyramidbox/") #根据代码仓实际所在位置进行修改 -``` - -修改后,运行wider_test.py - -``` -python tools/wider_test.py --model="/home/wch/Pyramidbox/weights/pyramidbox.pth" --data_path='数据集路径' -#model参数根据模型权重文件保存位置进行修改 -``` - -运行以下脚本,评估精度 - -``` -cd evaluate -python setup.py build_ext --inplace -python evaluation.py --pred ../output/pyramidbox1_val/ --gt '数据集路径/wider_face_split' -``` - - - -### 启动脚本 - -8卡训练,并显示性能和精度 - -``` -bash ./test/train_full_8p.sh --data_path='数据集路径' -``` - -测试单卡训练性能 - -``` -bash ./test/train_performance_1p.sh --data_path='数据集路径' -``` - -测试多卡训练性能 - -``` -bash ./test/train_performance_8p.sh --data_path='数据集路径' -``` - -模型迁移脚本,注意脚本中的resume参数只能指定为保存的“pyramidbox_checkpoint.pth”权重 - -``` -bash ./test/train_finetune_1p.sh --data_path='数据集路径' -``` - -精度数据 - -``` -==================== Results ==================== -Easy Val AP: 0.9519612346942784 -Medium Val AP: 0.9446576258551937 -Hard Val AP: 0.9053749943031708 -================================================= -``` - +## Pyramidbox + +​ A Context-assisted Single Shot Face Detector. + + + +### Requirements + +``` +根据requirements.txt安装依赖环境 +- Download the WIDER_FACE dataset from http://shuoyang1213.me/WIDERFACE/ +``` + + + +在代码仓下创建WIDER_FACE目录,存放数据集文件 + +``` +|-WIDER_FACE + |-wider_face_split + |-wider_face_test.mat + |-wider_face_test_filelist.txt + |-wider_face_train.mat + |-wider_face_train_bbx_gt.txt + |-wider_face_val.mat + |-wider_face_val_bbx_gt.txt + |-WIDER_train + |-images + |-0--Parade + |-1--Handshaking + ... + |-WIDER_val + |-images + |-0--Parade + |-1--Handshaking + ... +``` + + + +参照原代码仓README下载vgg权重,放在weights目录下 + +``` +|-Pyramidbox + |-weights + |-vgg16_reducedfc.pth +``` + + + +### prepare + +运行prepare_wider_data.py + +``` +python prepare_wider_data.py --data_path='数据集路径' +``` + + + +### Training + +单卡训练 + +``` +python train.py --batch_size=8 --lr=5e-4 +``` + +多卡训练 + +``` +python -m torch.distributed.launch --nproc_per_node=8 train.py --world_size=8 --batch_size=8 --lr=5e-4 --multinpu=True --device_list='0,1,2,3,4,5,6,7' +``` + + + +### Test + +在运行wider_test.py前,应先做以下修改: + +``` +1、修改第53行 +sys.path.append("/home/wch/Pyramidbox/") #根据代码仓实际所在位置进行修改 +``` + +修改后,运行wider_test.py + +``` +python tools/wider_test.py --model="/home/wch/Pyramidbox/weights/pyramidbox.pth" --data_path='数据集路径' +#model参数根据模型权重文件保存位置进行修改 +``` + +运行以下脚本,评估精度 + +``` +cd evaluate +python setup.py build_ext --inplace +python evaluation.py --pred ../output/pyramidbox1_val/ --gt '数据集路径/wider_face_split' +``` + + + +### 启动脚本 + +8卡训练,并显示性能和精度 + +``` +bash ./test/train_full_8p.sh --data_path='数据集路径' +``` + +测试单卡训练性能 + +``` +bash ./test/train_performance_1p.sh --data_path='数据集路径' +``` + +测试多卡训练性能 + +``` +bash ./test/train_performance_8p.sh --data_path='数据集路径' +``` + +模型迁移脚本,注意脚本中的resume参数只能指定为保存的“pyramidbox_checkpoint.pth”权重 + +``` +bash ./test/train_finetune_1p.sh --data_path='数据集路径' +``` + +精度数据 + +``` +==================== Results ==================== +Easy Val AP: 0.9519612346942784 +Medium Val AP: 0.9446576258551937 +Hard Val AP: 0.9053749943031708 +================================================= +``` + diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/README_raw.md b/PyTorch/contrib/cv/detection/Pyramidbox/README_raw.md index 9adcd839ae7bbd38696d2b0c875528e59362f1b4..94496c1511e40a88c3f8da3f61d9b8855f40577e 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/README_raw.md +++ b/PyTorch/contrib/cv/detection/Pyramidbox/README_raw.md @@ -1,72 +1,72 @@ -## PyramidBox: A Context-assisted Single Shot Face Detector.## -[A PyTorch Implementation of PyramidBox](https://arxiv.org/abs/1803.07737) - - -### Description -I train pyramidbox with pytorch and the result approaches the original paper result,the pretrained model can be downloaded in [vgg](https://pan.baidu.com/s/1Q-YqoxJyqvln6KTcIck1tQ),the final model can be downloaded in [Pyramidbox](https://pan.baidu.com/s/1VtzgB9srkJY4SUtVM3n8tw).the AP in WIDER FACE as following: - -| | Easy MAP | Medium MAP | hard MAP | -| -------- | ---------|------------| --------- | -| origin paper| 0.960 | 0.948 | 0.888 | -| this repo | 0.948 | 0.938 | 0.880 | - -the AP in AFW,PASCAL,FDDB as following: - -| AFW | PASCAL | FDDB | -| --------- |-----------| ---------| -| 99.65 | 99.02 | 0.983 | - -the gap is small with origin paper,I train 120k batch_size 4 which is different from paper,which maybe cause the gap,if you have more gpu ,the final result maybe better. - -### Requirement -* pytorch 0.3 -* opencv -* numpy -* easydict - -### Prepare data -1. download WIDER face dataset -2. modify data/config.py -3. ``` python prepare_wider_data.py``` - - -### Train -``` -python train.py --batch_size 4 - --lr 5e-4 -``` - -### Evalution -according to yourself dataset path,modify data/config.py -1. Evaluate on AFW. -``` -python tools/afw_test.py -``` -2. Evaluate on FDDB -``` -python tools/fddb_test.py -``` -3. Evaluate on PASCAL face -``` -python tools/pascal_test.py -``` -4. test on WIDER FACE -``` -python tools/wider_test.py -``` -### Demo -you can test yourself image -``` -python demo.py -``` - -### Result -
-demo -demo -
- -### References -* [PyramidBox: A Context-assisted Single Shot Face Detector](https://arxiv.org/abs/1803.07737) -* [PyramidBox model](https://github.com/PaddlePaddle/models/tree/develop/fluid/PaddleCV/face_detection) -* [ssd.pytorch](https://github.com/amdegroot/ssd.pytorch) +## PyramidBox: A Context-assisted Single Shot Face Detector.## +[A PyTorch Implementation of PyramidBox](https://arxiv.org/abs/1803.07737) + + +### Description +I train pyramidbox with pytorch and the result approaches the original paper result,the pretrained model can be downloaded in [vgg](https://pan.baidu.com/s/1Q-YqoxJyqvln6KTcIck1tQ),the final model can be downloaded in [Pyramidbox](https://pan.baidu.com/s/1VtzgB9srkJY4SUtVM3n8tw).the AP in WIDER FACE as following: + +| | Easy MAP | Medium MAP | hard MAP | +| -------- | ---------|------------| --------- | +| origin paper| 0.960 | 0.948 | 0.888 | +| this repo | 0.948 | 0.938 | 0.880 | + +the AP in AFW,PASCAL,FDDB as following: + +| AFW | PASCAL | FDDB | +| --------- |-----------| ---------| +| 99.65 | 99.02 | 0.983 | + +the gap is small with origin paper,I train 120k batch_size 4 which is different from paper,which maybe cause the gap,if you have more gpu ,the final result maybe better. + +### Requirement +* pytorch 0.3 +* opencv +* numpy +* easydict + +### Prepare data +1. download WIDER face dataset +2. modify data/config.py +3. ``` python prepare_wider_data.py``` + + +### Train +``` +python train.py --batch_size 4 + --lr 5e-4 +``` + +### Evalution +according to yourself dataset path,modify data/config.py +1. Evaluate on AFW. +``` +python tools/afw_test.py +``` +2. Evaluate on FDDB +``` +python tools/fddb_test.py +``` +3. Evaluate on PASCAL face +``` +python tools/pascal_test.py +``` +4. test on WIDER FACE +``` +python tools/wider_test.py +``` +### Demo +you can test yourself image +``` +python demo.py +``` + +### Result +
+demo +demo +
+ +### References +* [PyramidBox: A Context-assisted Single Shot Face Detector](https://arxiv.org/abs/1803.07737) +* [PyramidBox model](https://github.com/PaddlePaddle/models/tree/develop/fluid/PaddleCV/face_detection) +* [ssd.pytorch](https://github.com/amdegroot/ssd.pytorch) diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/data/config.py b/PyTorch/contrib/cv/detection/Pyramidbox/data/config.py index 10a42e3329ac64db39fca5aebee792fab019f2f6..5cead837e5760bcb9ea1acb480febb4bce2a894f 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/data/config.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/data/config.py @@ -1,87 +1,87 @@ -#-*- coding:utf-8 -*- -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - -import os -from easydict import EasyDict -import numpy as np - - -_C = EasyDict() -cfg = _C -# data augument config -_C.expand_prob = 0.5 -_C.expand_max_ratio = 4 -_C.hue_prob = 0.5 -_C.hue_delta = 18 -_C.contrast_prob = 0.5 -_C.contrast_delta = 0.5 -_C.saturation_prob = 0.5 -_C.saturation_delta = 0.5 -_C.brightness_prob = 0.5 -_C.brightness_delta = 0.125 -_C.data_anchor_sampling_prob = 0.5 -_C.min_face_size = 6.0 -_C.apply_distort = True -_C.apply_expand = False -_C.img_mean = np.array([104., 117., 123.])[:, np.newaxis, np.newaxis].astype( - 'float32') -_C.resize_width = 640 -_C.resize_height = 640 -_C.scale = 1 / 127.0 -_C.anchor_sampling = True -_C.filter_min_face = True - -# train config -_C.LR_STEPS = (80000,100000,120000) -_C.MAX_STEPS = 150000 -_C.EPOCHES = 100 - -# anchor config -_C.FEATURE_MAPS = [160, 80, 40, 20, 10, 5] -_C.INPUT_SIZE = 640 -_C.STEPS = [4, 8, 16, 32, 64, 128] -_C.ANCHOR_SIZES = [16, 32, 64, 128, 256, 512] -_C.CLIP = False -_C.VARIANCE = [0.1, 0.2] - -# loss config -_C.NUM_CLASSES = 2 -_C.OVERLAP_THRESH = 0.35 -_C.NEG_POS_RATIOS = 3 - - -# detection config -_C.NMS_THRESH = 0.3 -_C.TOP_K = 5000 -_C.KEEP_TOP_K = 750 -_C.CONF_THRESH = 0.05 - - -# dataset config -#_C.HOME = '/home/wch/Pyramidbox/' - -# face config -_C.FACE = EasyDict() -_C.FACE.TRAIN_FILE = 'data/face_train.txt' -_C.FACE.VAL_FILE = 'data/face_val.txt' -_C.FACE.FDDB_DIR = '/home/data/lj/FDDB' -_C.FACE.WIDER_DIR = '/home/wch/Pyramidbox/WIDER_FACE/' -_C.FACE.AFW_DIR = '/home/data/lj/AFW' -_C.FACE.PASCAL_DIR = '/home/data/lj/PASCAL_FACE' -_C.FACE.OVERLAP_THRESH = 0.35 - +#-*- coding:utf-8 -*- +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +import os +from easydict import EasyDict +import numpy as np + + +_C = EasyDict() +cfg = _C +# data augument config +_C.expand_prob = 0.5 +_C.expand_max_ratio = 4 +_C.hue_prob = 0.5 +_C.hue_delta = 18 +_C.contrast_prob = 0.5 +_C.contrast_delta = 0.5 +_C.saturation_prob = 0.5 +_C.saturation_delta = 0.5 +_C.brightness_prob = 0.5 +_C.brightness_delta = 0.125 +_C.data_anchor_sampling_prob = 0.5 +_C.min_face_size = 6.0 +_C.apply_distort = True +_C.apply_expand = False +_C.img_mean = np.array([104., 117., 123.])[:, np.newaxis, np.newaxis].astype( + 'float32') +_C.resize_width = 640 +_C.resize_height = 640 +_C.scale = 1 / 127.0 +_C.anchor_sampling = True +_C.filter_min_face = True + +# train config +_C.LR_STEPS = (80000,100000,120000) +_C.MAX_STEPS = 150000 +_C.EPOCHES = 100 + +# anchor config +_C.FEATURE_MAPS = [160, 80, 40, 20, 10, 5] +_C.INPUT_SIZE = 640 +_C.STEPS = [4, 8, 16, 32, 64, 128] +_C.ANCHOR_SIZES = [16, 32, 64, 128, 256, 512] +_C.CLIP = False +_C.VARIANCE = [0.1, 0.2] + +# loss config +_C.NUM_CLASSES = 2 +_C.OVERLAP_THRESH = 0.35 +_C.NEG_POS_RATIOS = 3 + + +# detection config +_C.NMS_THRESH = 0.3 +_C.TOP_K = 5000 +_C.KEEP_TOP_K = 750 +_C.CONF_THRESH = 0.05 + + +# dataset config +#_C.HOME = '/home/wch/Pyramidbox/' + +# face config +_C.FACE = EasyDict() +_C.FACE.TRAIN_FILE = 'data/face_train.txt' +_C.FACE.VAL_FILE = 'data/face_val.txt' +_C.FACE.FDDB_DIR = '/home/data/lj/FDDB' +_C.FACE.WIDER_DIR = '/home/wch/Pyramidbox/WIDER_FACE/' +_C.FACE.AFW_DIR = '/home/data/lj/AFW' +_C.FACE.PASCAL_DIR = '/home/data/lj/PASCAL_FACE' +_C.FACE.OVERLAP_THRESH = 0.35 + diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/data/widerface.py b/PyTorch/contrib/cv/detection/Pyramidbox/data/widerface.py index 5b6b67835f65f8202d613dbf2f6897b66dc46cfa..e81a461df6cf845db9bf191376035e3a3749a23c 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/data/widerface.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/data/widerface.py @@ -1,172 +1,172 @@ -#-*- coding:utf-8 -*- -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import torch -from PIL import Image, ImageDraw -import torch.utils.data as data -import numpy as np -import random -from utils.augmentations import preprocess - - -class WIDERDetection(data.Dataset): - """docstring for WIDERDetection""" - - def __init__(self, list_file, mode='train'): - super(WIDERDetection, self).__init__() - self.mode = mode - self.fnames = [] - self.boxes = [] - self.labels = [] - - with open(list_file) as f: - lines = f.readlines() - - for line in lines: - line = line.strip().split() - num_faces = int(line[1]) - box = [] - label = [] - for i in range(num_faces): - x = float(line[2 + 5 * i]) - y = float(line[3 + 5 * i]) - w = float(line[4 + 5 * i]) - h = float(line[5 + 5 * i]) - c = int(line[6 + 5 * i]) - if w <= 0 or h <= 0: - continue - box.append([x, y, x + w, y + h]) - label.append(c) - if len(box) > 0: - self.fnames.append(line[0]) - self.boxes.append(box) - self.labels.append(label) - - self.num_samples = len(self.boxes) - - def __len__(self): - return self.num_samples - - def __getitem__(self, index): - img, face_target,head_target = self.pull_item(index) - return img, face_target,head_target - - def pull_item(self, index): - while True: - image_path = self.fnames[index] - img = Image.open(image_path) - if img.mode == 'L': - img = img.convert('RGB') - - im_width, im_height = img.size - boxes = self.annotransform( - np.array(self.boxes[index]), im_width, im_height) - label = np.array(self.labels[index]) - bbox_labels = np.hstack((label[:, np.newaxis], boxes)).tolist() - img, sample_labels = preprocess( - img, bbox_labels, self.mode, image_path) - sample_labels = np.array(sample_labels) - if len(sample_labels) > 0: - face_target = np.hstack( - (sample_labels[:, 1:], sample_labels[:, 0][:, np.newaxis])) - - assert (face_target[:, 2] > face_target[:, 0]).any() - assert (face_target[:, 3] > face_target[:, 1]).any() - - #img = img.astype(np.float32) - face_box = face_target[:, :-1] - head_box = self.expand_bboxes(face_box) - head_target = np.hstack((head_box, face_target[ - :, -1][:, np.newaxis])) - break - else: - index = random.randrange(0, self.num_samples) - - - #img = Image.fromarray(img) - ''' - draw = ImageDraw.Draw(img) - w,h = img.size - for bbox in sample_labels: - bbox = (bbox[1:] * np.array([w, h, w, h])).tolist() - - draw.rectangle(bbox,outline='red') - img.save('image.jpg') - ''' - return torch.from_numpy(img), face_target, head_target - - - def annotransform(self, boxes, im_width, im_height): - boxes[:, 0] /= im_width - boxes[:, 1] /= im_height - boxes[:, 2] /= im_width - boxes[:, 3] /= im_height - return boxes - - def expand_bboxes(self, - bboxes, - expand_left=2., - expand_up=2., - expand_right=2., - expand_down=2.): - expand_bboxes = [] - for bbox in bboxes: - xmin = bbox[0] - ymin = bbox[1] - xmax = bbox[2] - ymax = bbox[3] - w = xmax - xmin - h = ymax - ymin - ex_xmin = max(xmin - w / expand_left, 0.) - ex_ymin = max(ymin - h / expand_up, 0.) - ex_xmax = max(xmax + w / expand_right, 0.) - ex_ymax = max(ymax + h / expand_down, 0.) - expand_bboxes.append([ex_xmin, ex_ymin, ex_xmax, ex_ymax]) - expand_bboxes = np.array(expand_bboxes) - return expand_bboxes - -def detection_collate(batch): - """Custom collate fn for dealing with batches of images that have a different - number of associated object annotations (bounding boxes). - - Arguments: - batch: (tuple) A tuple of tensor images and lists of annotations - - Return: - A tuple containing: - 1) (tensor) batch of images stacked on their 0 dim - 2) (list of tensors) annotations for a given image are stacked on - 0 dim - """ - face_targets = [] - head_targets = [] - - imgs = [] - for sample in batch: - imgs.append(sample[0]) - face_targets.append(torch.FloatTensor(sample[1])) - head_targets.append(torch.FloatTensor(sample[2])) - return torch.stack(imgs, 0), face_targets,head_targets - - - -if __name__ == '__main__': - from config import cfg - dataset = WIDERDetection(cfg.FACE.TRAIN_FILE) - #for i in range(len(dataset)): - dataset.pull_item(14) +#-*- coding:utf-8 -*- +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import torch +from PIL import Image, ImageDraw +import torch.utils.data as data +import numpy as np +import random +from utils.augmentations import preprocess + + +class WIDERDetection(data.Dataset): + """docstring for WIDERDetection""" + + def __init__(self, list_file, mode='train'): + super(WIDERDetection, self).__init__() + self.mode = mode + self.fnames = [] + self.boxes = [] + self.labels = [] + + with open(list_file) as f: + lines = f.readlines() + + for line in lines: + line = line.strip().split() + num_faces = int(line[1]) + box = [] + label = [] + for i in range(num_faces): + x = float(line[2 + 5 * i]) + y = float(line[3 + 5 * i]) + w = float(line[4 + 5 * i]) + h = float(line[5 + 5 * i]) + c = int(line[6 + 5 * i]) + if w <= 0 or h <= 0: + continue + box.append([x, y, x + w, y + h]) + label.append(c) + if len(box) > 0: + self.fnames.append(line[0]) + self.boxes.append(box) + self.labels.append(label) + + self.num_samples = len(self.boxes) + + def __len__(self): + return self.num_samples + + def __getitem__(self, index): + img, face_target,head_target = self.pull_item(index) + return img, face_target,head_target + + def pull_item(self, index): + while True: + image_path = self.fnames[index] + img = Image.open(image_path) + if img.mode == 'L': + img = img.convert('RGB') + + im_width, im_height = img.size + boxes = self.annotransform( + np.array(self.boxes[index]), im_width, im_height) + label = np.array(self.labels[index]) + bbox_labels = np.hstack((label[:, np.newaxis], boxes)).tolist() + img, sample_labels = preprocess( + img, bbox_labels, self.mode, image_path) + sample_labels = np.array(sample_labels) + if len(sample_labels) > 0: + face_target = np.hstack( + (sample_labels[:, 1:], sample_labels[:, 0][:, np.newaxis])) + + assert (face_target[:, 2] > face_target[:, 0]).any() + assert (face_target[:, 3] > face_target[:, 1]).any() + + #img = img.astype(np.float32) + face_box = face_target[:, :-1] + head_box = self.expand_bboxes(face_box) + head_target = np.hstack((head_box, face_target[ + :, -1][:, np.newaxis])) + break + else: + index = random.randrange(0, self.num_samples) + + + #img = Image.fromarray(img) + ''' + draw = ImageDraw.Draw(img) + w,h = img.size + for bbox in sample_labels: + bbox = (bbox[1:] * np.array([w, h, w, h])).tolist() + + draw.rectangle(bbox,outline='red') + img.save('image.jpg') + ''' + return torch.from_numpy(img), face_target, head_target + + + def annotransform(self, boxes, im_width, im_height): + boxes[:, 0] /= im_width + boxes[:, 1] /= im_height + boxes[:, 2] /= im_width + boxes[:, 3] /= im_height + return boxes + + def expand_bboxes(self, + bboxes, + expand_left=2., + expand_up=2., + expand_right=2., + expand_down=2.): + expand_bboxes = [] + for bbox in bboxes: + xmin = bbox[0] + ymin = bbox[1] + xmax = bbox[2] + ymax = bbox[3] + w = xmax - xmin + h = ymax - ymin + ex_xmin = max(xmin - w / expand_left, 0.) + ex_ymin = max(ymin - h / expand_up, 0.) + ex_xmax = max(xmax + w / expand_right, 0.) + ex_ymax = max(ymax + h / expand_down, 0.) + expand_bboxes.append([ex_xmin, ex_ymin, ex_xmax, ex_ymax]) + expand_bboxes = np.array(expand_bboxes) + return expand_bboxes + +def detection_collate(batch): + """Custom collate fn for dealing with batches of images that have a different + number of associated object annotations (bounding boxes). + + Arguments: + batch: (tuple) A tuple of tensor images and lists of annotations + + Return: + A tuple containing: + 1) (tensor) batch of images stacked on their 0 dim + 2) (list of tensors) annotations for a given image are stacked on + 0 dim + """ + face_targets = [] + head_targets = [] + + imgs = [] + for sample in batch: + imgs.append(sample[0]) + face_targets.append(torch.FloatTensor(sample[1])) + head_targets.append(torch.FloatTensor(sample[2])) + return torch.stack(imgs, 0), face_targets,head_targets + + + +if __name__ == '__main__': + from config import cfg + dataset = WIDERDetection(cfg.FACE.TRAIN_FILE) + #for i in range(len(dataset)): + dataset.pull_item(14) diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/demo.py b/PyTorch/contrib/cv/detection/Pyramidbox/demo.py index 53ee7dbbd64a70c25f504e138cfd0ff8821e12fc..44a9271b12266d07da5ae6ded8b60591b64d04f4 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/demo.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/demo.py @@ -1,141 +1,141 @@ -#-*- coding:utf-8 -*- -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - - -import os -import argparse -import torch -import torch.nn as nn -import torch.backends.cudnn as cudnn -from torch.autograd import Variable - -import torch.npu -import cv2 -import time -import numpy as np -from PIL import Image - -from data.config import cfg -from pyramidbox import build_net -from utils.augmentations import to_chw_bgr - -parser = argparse.ArgumentParser(description='pyramidbox demo') -parser.add_argument('--save_dir', - type=str, default='tmp/', - help='Directory for detect result') -parser.add_argument('--model', - type=str, default='/home/wch/Pyramidbox.pytorch-master/weights/pyramidbox_120000_99.02.pth', - help='trained model') -parser.add_argument('--thresh', - default=0.4, type=float, - help='Final confidence threshold') -args = parser.parse_args() - - -if not os.path.exists(args.save_dir): - os.makedirs(args.save_dir) - -use_npu = torch.npu.is_available() - -if use_npu: - device=f'npu:0' - torch.npu.set_device(device) - - -def detect(net, img_path, thresh): - img = Image.open(img_path) - if img.mode == 'L': - img = img.convert('RGB') - - img = np.array(img) - height, width, _ = img.shape - max_im_shrink = np.sqrt( - 1200 * 1100 / (img.shape[0] * img.shape[1])) - image = cv2.resize(img, None, None, fx=max_im_shrink, - fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) - - x = to_chw_bgr(image) - x = x.astype('float32') - x -= cfg.img_mean - x = x[[2, 1, 0], :, :] - #x = x * cfg.scale - - x = Variable(torch.from_numpy(x).unsqueeze(0)) - if use_npu: - x = x.npu() - t1 = time.time() - y = net(x) - detections = y.data - scale = torch.Tensor([img.shape[1], img.shape[0], - img.shape[1], img.shape[0]]) - - for i in range(detections.size(1)): - j = 0 - while detections[0, i, j, 0] >= thresh: - score = detections[0, i, j, 0] - pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int) - left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) - j += 1 - cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2) - conf = "{:.2f}".format(score) - text_size, baseline = cv2.getTextSize( - conf, cv2.FONT_HERSHEY_SIMPLEX, 0.3, 1) - p1 = (left_up[0], left_up[1] - text_size[1]) - cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline), - (p1[0] + text_size[0], p1[1] + text_size[1]), [255, 0, 0], -1) - cv2.putText(img, conf, (p1[0], p1[ - 1] + baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8) - - t2 = time.time() - print('detect:{} timer:{}'.format(img_path, t2 - t1)) - - cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), img) - - -if __name__ == '__main__': - net = build_net('test', cfg.NUM_CLASSES) - net.load_state_dict(torch.load(args.model,map_location=lambda storage, loc: storage)) - net.eval() - - if use_npu: - net.npu() - cudnn.benckmark = True - - img_path = './img' - img_list = [os.path.join(img_path, x) - for x in os.listdir(img_path) if x.endswith('jpg')] - for path in img_list: - detect(net, path, args.thresh) +#-*- coding:utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +import os +import argparse +import torch +import torch.nn as nn +import torch.backends.cudnn as cudnn +from torch.autograd import Variable + +import torch.npu +import cv2 +import time +import numpy as np +from PIL import Image + +from data.config import cfg +from pyramidbox import build_net +from utils.augmentations import to_chw_bgr + +parser = argparse.ArgumentParser(description='pyramidbox demo') +parser.add_argument('--save_dir', + type=str, default='tmp/', + help='Directory for detect result') +parser.add_argument('--model', + type=str, default='/home/wch/Pyramidbox.pytorch-master/weights/pyramidbox_120000_99.02.pth', + help='trained model') +parser.add_argument('--thresh', + default=0.4, type=float, + help='Final confidence threshold') +args = parser.parse_args() + + +if not os.path.exists(args.save_dir): + os.makedirs(args.save_dir) + +use_npu = torch.npu.is_available() + +if use_npu: + device=f'npu:0' + torch.npu.set_device(device) + + +def detect(net, img_path, thresh): + img = Image.open(img_path) + if img.mode == 'L': + img = img.convert('RGB') + + img = np.array(img) + height, width, _ = img.shape + max_im_shrink = np.sqrt( + 1200 * 1100 / (img.shape[0] * img.shape[1])) + image = cv2.resize(img, None, None, fx=max_im_shrink, + fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) + + x = to_chw_bgr(image) + x = x.astype('float32') + x -= cfg.img_mean + x = x[[2, 1, 0], :, :] + #x = x * cfg.scale + + x = Variable(torch.from_numpy(x).unsqueeze(0)) + if use_npu: + x = x.npu() + t1 = time.time() + y = net(x) + detections = y.data + scale = torch.Tensor([img.shape[1], img.shape[0], + img.shape[1], img.shape[0]]) + + for i in range(detections.size(1)): + j = 0 + while detections[0, i, j, 0] >= thresh: + score = detections[0, i, j, 0] + pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int) + left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) + j += 1 + cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2) + conf = "{:.2f}".format(score) + text_size, baseline = cv2.getTextSize( + conf, cv2.FONT_HERSHEY_SIMPLEX, 0.3, 1) + p1 = (left_up[0], left_up[1] - text_size[1]) + cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline), + (p1[0] + text_size[0], p1[1] + text_size[1]), [255, 0, 0], -1) + cv2.putText(img, conf, (p1[0], p1[ + 1] + baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8) + + t2 = time.time() + print('detect:{} timer:{}'.format(img_path, t2 - t1)) + + cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), img) + + +if __name__ == '__main__': + net = build_net('test', cfg.NUM_CLASSES) + net.load_state_dict(torch.load(args.model,map_location=lambda storage, loc: storage)) + net.eval() + + if use_npu: + net.npu() + cudnn.benckmark = True + + img_path = './img' + img_list = [os.path.join(img_path, x) + for x in os.listdir(img_path) if x.endswith('jpg')] + for path in img_list: + detect(net, path, args.thresh) diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/layers/__init__.py b/PyTorch/contrib/cv/detection/Pyramidbox/layers/__init__.py index 1c1dae6697759d04d081f0430b13b014fca73da1..f58076f3a350eb20c90909108b22a3f78f3e400f 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/layers/__init__.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/layers/__init__.py @@ -1,9 +1,9 @@ -#-*- coding:utf-8 -*- - -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - - -from .functions import * -from .modules import * +#-*- coding:utf-8 -*- + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +from .functions import * +from .modules import * diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/layers/bbox_utils.py b/PyTorch/contrib/cv/detection/Pyramidbox/layers/bbox_utils.py index 6381fc9ea2928f0b771db757117a95bb0c158f6f..fb7a1987be428f2fabac8fcd96fdc1a2ea2d16a0 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/layers/bbox_utils.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/layers/bbox_utils.py @@ -1,339 +1,339 @@ -#-*- coding:utf-8 -*- -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - - -import torch -import torch.npu - -def point_form(boxes): - """ Convert prior_boxes to (xmin, ymin, xmax, ymax) - representation for comparison to point form ground truth data. - Args: - boxes: (tensor) center-size default boxes from priorbox layers. - Return: - boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. - """ - return torch.cat((boxes[:, :2] - boxes[:, 2:] / 2, # xmin, ymin - boxes[:, :2] + boxes[:, 2:] / 2), 1) # xmax, ymax - - -def center_size(boxes): - """ Convert prior_boxes to (cx, cy, w, h) - representation for comparison to center-size form ground truth data. - Args: - boxes: (tensor) point_form boxes - Return: - boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. - """ - return torch.cat([(boxes[:, 2:] + boxes[:, :2]) / 2, # cx, cy - boxes[:, 2:] - boxes[:, :2]], 1) # w, h - - -def intersect(box_a, box_b): - """ We resize both tensors to [A,B,2] without new malloc: - [A,2] -> [A,1,2] -> [A,B,2] - [B,2] -> [1,B,2] -> [A,B,2] - Then we compute the area of intersect between box_a and box_b. - Args: - box_a: (tensor) bounding boxes, Shape: [A,4]. - box_b: (tensor) bounding boxes, Shape: [B,4]. - Return: - (tensor) intersection area, Shape: [A,B]. - """ - A = box_a.size(0) - B = box_b.size(0) - max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), - box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) - min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), - box_b[:, :2].unsqueeze(0).expand(A, B, 2)) - inter = torch.clamp((max_xy - min_xy), min=0) - return inter[:, :, 0] * inter[:, :, 1] - - -def jaccard(box_a, box_b): - """Compute the jaccard overlap of two sets of boxes. The jaccard overlap - is simply the intersection over union of two boxes. Here we operate on - ground truth boxes and default boxes. - E.g.: - A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) - Args: - box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] - box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] - Return: - jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] - """ - #when run in npu ,box_b is still in cpu - #box_b = box_b.npu() - inter = intersect(box_a, box_b) - area_a = ((box_a[:, 2] - box_a[:, 0]) * - (box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] - area_b = ((box_b[:, 2] - box_b[:, 0]) * - (box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] - union = area_a + area_b - inter - return inter / union # [A,B] - - -def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx): - """Match each prior box with the ground truth box of the highest jaccard - overlap, encode the bounding boxes, then return the matched indices - corresponding to both confidence and location preds. - Args: - threshold: (float) The overlap threshold used when mathing boxes. - truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. - priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. - variances: (tensor) Variances corresponding to each prior coord, - Shape: [num_priors, 4]. - labels: (tensor) All the class labels for the image, Shape: [num_obj]. - loc_t: (tensor) Tensor to be filled w/ endcoded location targets. - conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. - idx: (int) current batch index - Return: - The matched indices corresponding to 1)location and 2)confidence preds. - """ - # jaccard index - overlaps = jaccard( - truths, - point_form(priors) - ) - # (Bipartite Matching) - # [1,num_objects] best prior for each ground truth - best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) - # [1,num_priors] best ground truth for each prior - best_truth_overlap, best_truth_idx = overlaps.max( - 0, keepdim=True) # 0-2000 - best_truth_idx.squeeze_(0) - best_truth_overlap.squeeze_(0) - best_prior_idx.squeeze_(1) - best_prior_overlap.squeeze_(1) - best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior - # TODO refactor: index best_prior_idx with long tensor - # ensure every gt matches with its prior of max overlap - for j in range(best_prior_idx.size(0)): - best_truth_idx[best_prior_idx[j]] = j - _th1, _th2, _th3 = threshold # _th1 = 0.1 ,_th2 = 0.35,_th3 = 0.5 - - N = (torch.sum(best_prior_overlap >= _th2) + - torch.sum(best_prior_overlap >= _th3)) // 2 - matches = truths[best_truth_idx] # Shape: [num_priors,4] - conf = labels[best_truth_idx] # Shape: [num_priors] - conf[best_truth_overlap < _th2] = 0 # label as background - - best_truth_overlap_clone = best_truth_overlap.clone() - add_idx = best_truth_overlap_clone.gt( - _th1).eq(best_truth_overlap_clone.lt(_th2)) - best_truth_overlap_clone[1 - add_idx] = 0 - stage2_overlap, stage2_idx = best_truth_overlap_clone.sort(descending=True) - - stage2_overlap = stage2_overlap.gt(_th1) - - if N > 0: - N = torch.sum(stage2_overlap[:N]) if torch.sum( - stage2_overlap[:N]) < N else N - conf[stage2_idx[:N]] += 1 - - loc = encode(matches, priors, variances) - loc_t[idx] = loc # [num_priors,4] encoded offsets to learn - conf_t[idx] = conf # [num_priors] top class label for each prior - - -def match_ssd(threshold, truths, priors, variances, labels, loc_t, conf_t, idx): - """Match each prior box with the ground truth box of the highest jaccard - overlap, encode the bounding boxes, then return the matched indices - corresponding to both confidence and location preds. - Args: - threshold: (float) The overlap threshold used when mathing boxes. - truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. - priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. - variances: (tensor) Variances corresponding to each prior coord, - Shape: [num_priors, 4]. - labels: (tensor) All the class labels for the image, Shape: [num_obj]. - loc_t: (tensor) Tensor to be filled w/ endcoded location targets. - conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. - idx: (int) current batch index - Return: - The matched indices corresponding to 1)location and 2)confidence preds. - """ - # jaccard index - overlaps = jaccard(truths,point_form(priors)) - # (Bipartite Matching) - # [1,num_objects] best prior for each ground truth - best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) - # [1,num_priors] best ground truth for each prior - best_truth_overlap, best_truth_idx = overlaps.max( - 0, keepdim=True) # 0-2000 - best_truth_idx.squeeze_(0) - best_truth_overlap.squeeze_(0) - best_prior_idx.squeeze_(1) - best_prior_overlap.squeeze_(1) - best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior - # TODO refactor: index best_prior_idx with long tensor - # ensure every gt matches with its prior of max overlap - for j in range(best_prior_idx.size(0)): - best_truth_idx[best_prior_idx[j]] = j - matches = truths[best_truth_idx] # Shape: [num_priors,4] - conf = labels[best_truth_idx] # Shape: [num_priors] - conf[best_truth_overlap < threshold] = 0 # label as background - loc = encode(matches, priors, variances) - loc_t[idx] = loc # [num_priors,4] encoded offsets to learn - conf_t[idx] = conf # [num_priors] top class label for each prior - - -def encode(matched, priors, variances): - """Encode the variances from the priorbox layers into the ground truth boxes - we have matched (based on jaccard overlap) with the prior boxes. - Args: - matched: (tensor) Coords of ground truth for each prior in point-form - Shape: [num_priors, 4]. - priors: (tensor) Prior boxes in center-offset form - Shape: [num_priors,4]. - variances: (list[float]) Variances of priorboxes - Return: - encoded boxes (tensor), Shape: [num_priors, 4] - """ - - # dist b/t match center and prior's center - g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2] - # encode variance - g_cxcy /= (variances[0] * priors[:, 2:]) - # match wh / prior wh - g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] - #g_wh = torch.log(g_wh) / variances[1] - g_wh = torch.log(g_wh) / variances[1] - # return target for smooth_l1_loss - return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] - - -# Adapted from https://github.com/Hakuyume/chainer-ssd -def decode(loc, priors, variances): - """Decode locations from predictions using priors to undo - the encoding we did for offset regression at train time. - Args: - loc (tensor): location predictions for loc layers, - Shape: [num_priors,4] - priors (tensor): Prior boxes in center-offset form. - Shape: [num_priors,4]. - variances: (list[float]) Variances of priorboxes - Return: - decoded bounding box predictions - """ - - boxes = torch.cat(( - priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], - priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) - boxes[:, :2] -= boxes[:, 2:] / 2 - boxes[:, 2:] += boxes[:, :2] - return boxes - - -def log_sum_exp(x): - """Utility function for computing log_sum_exp while determining - This will be used to determine unaveraged confidence loss across - all examples in a batch. - Args: - x (Variable(tensor)): conf_preds from conf layers - """ - x_max = x.data.max() - return torch.log(torch.sum(torch.exp(x - x_max), 1, keepdim=True)) + x_max - - -# Original author: Francisco Massa: -# https://github.com/fmassa/object-detection.torch -# Ported to PyTorch by Max deGroot (02/01/2017) -def nms(boxes, scores, overlap=0.5, top_k=200): - """Apply non-maximum suppression at test time to avoid detecting too many - overlapping bounding boxes for a given object. - Args: - boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. - scores: (tensor) The class predscores for the img, Shape:[num_priors]. - overlap: (float) The overlap thresh for suppressing unnecessary boxes. - top_k: (int) The Maximum number of box preds to consider. - Return: - The indices of the kept boxes with respect to num_priors. - """ - - keep = scores.new(scores.size(0)).zero_().long() - if boxes.numel() == 0: - return keep - x1 = boxes[:, 0] - y1 = boxes[:, 1] - x2 = boxes[:, 2] - y2 = boxes[:, 3] - area = torch.mul(x2 - x1, y2 - y1) - v, idx = scores.sort(0) # sort in ascending order - # I = I[v >= 0.01] - idx = idx[-top_k:] # indices of the top-k largest vals - xx1 = boxes.new() - yy1 = boxes.new() - xx2 = boxes.new() - yy2 = boxes.new() - w = boxes.new() - h = boxes.new() - - # keep = torch.Tensor() - count = 0 - while idx.numel() > 0: - i = idx[-1] # index of current largest val - # keep.append(i) - keep[count] = i - count += 1 - if idx.size(0) == 1: - break - idx = idx[:-1] # remove kept element from view - #add code--------------------------------------- - idx= torch.autograd.Variable(idx, requires_grad=False) - idx = idx.data - x1 = torch.autograd.Variable(x1, requires_grad=False) - x1 = x1.data - y1 = torch.autograd.Variable(y1, requires_grad=False) - y1 = y1.data - x2 = torch.autograd.Variable(x2, requires_grad=False) - x2 = x2.data - y2 = torch.autograd.Variable(y2, requires_grad=False) - y2 = y2.data - #add code end-------------------------------------- - # load bboxes of next highest vals - torch.index_select(x1, 0, idx, out=xx1) - torch.index_select(y1, 0, idx, out=yy1) - torch.index_select(x2, 0, idx, out=xx2) - torch.index_select(y2, 0, idx, out=yy2) - # store element-wise max with next highest score - xx1 = torch.clamp(xx1, min=x1[i]) - yy1 = torch.clamp(yy1, min=y1[i]) - xx2 = torch.clamp(xx2, max=x2[i]) - yy2 = torch.clamp(yy2, max=y2[i]) - w.resize_as_(xx2) - h.resize_as_(yy2) - w = xx2 - xx1 - h = yy2 - yy1 - # check sizes of xx1 and xx2.. after each iteration - w = torch.clamp(w, min=0.0) - h = torch.clamp(h, min=0.0) - inter = w * h - #add code--------------------------------- - area = torch.autograd.Variable(area, requires_grad=False) - area = area.data - idx= torch.autograd.Variable(idx, requires_grad=False) - idx = idx.data - #add code end ----------------------------------- - # IoU = i / (area(a) + area(b) - i) - rem_areas = torch.index_select(area, 0, idx) # load remaining areas) - union = (rem_areas - inter) + area[i] - IoU = inter / union # store result in iou - # keep only elements with an IoU <= overlap - idx = idx[IoU.le(overlap)] - return keep, count +#-*- coding:utf-8 -*- +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +import torch +import torch.npu + +def point_form(boxes): + """ Convert prior_boxes to (xmin, ymin, xmax, ymax) + representation for comparison to point form ground truth data. + Args: + boxes: (tensor) center-size default boxes from priorbox layers. + Return: + boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. + """ + return torch.cat((boxes[:, :2] - boxes[:, 2:] / 2, # xmin, ymin + boxes[:, :2] + boxes[:, 2:] / 2), 1) # xmax, ymax + + +def center_size(boxes): + """ Convert prior_boxes to (cx, cy, w, h) + representation for comparison to center-size form ground truth data. + Args: + boxes: (tensor) point_form boxes + Return: + boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. + """ + return torch.cat([(boxes[:, 2:] + boxes[:, :2]) / 2, # cx, cy + boxes[:, 2:] - boxes[:, :2]], 1) # w, h + + +def intersect(box_a, box_b): + """ We resize both tensors to [A,B,2] without new malloc: + [A,2] -> [A,1,2] -> [A,B,2] + [B,2] -> [1,B,2] -> [A,B,2] + Then we compute the area of intersect between box_a and box_b. + Args: + box_a: (tensor) bounding boxes, Shape: [A,4]. + box_b: (tensor) bounding boxes, Shape: [B,4]. + Return: + (tensor) intersection area, Shape: [A,B]. + """ + A = box_a.size(0) + B = box_b.size(0) + max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), + box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) + min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), + box_b[:, :2].unsqueeze(0).expand(A, B, 2)) + inter = torch.clamp((max_xy - min_xy), min=0) + return inter[:, :, 0] * inter[:, :, 1] + + +def jaccard(box_a, box_b): + """Compute the jaccard overlap of two sets of boxes. The jaccard overlap + is simply the intersection over union of two boxes. Here we operate on + ground truth boxes and default boxes. + E.g.: + A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) + Args: + box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] + box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] + Return: + jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] + """ + #when run in npu ,box_b is still in cpu + #box_b = box_b.npu() + inter = intersect(box_a, box_b) + area_a = ((box_a[:, 2] - box_a[:, 0]) * + (box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] + area_b = ((box_b[:, 2] - box_b[:, 0]) * + (box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] + union = area_a + area_b - inter + return inter / union # [A,B] + + +def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx): + """Match each prior box with the ground truth box of the highest jaccard + overlap, encode the bounding boxes, then return the matched indices + corresponding to both confidence and location preds. + Args: + threshold: (float) The overlap threshold used when mathing boxes. + truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. + priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. + variances: (tensor) Variances corresponding to each prior coord, + Shape: [num_priors, 4]. + labels: (tensor) All the class labels for the image, Shape: [num_obj]. + loc_t: (tensor) Tensor to be filled w/ endcoded location targets. + conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. + idx: (int) current batch index + Return: + The matched indices corresponding to 1)location and 2)confidence preds. + """ + # jaccard index + overlaps = jaccard( + truths, + point_form(priors) + ) + # (Bipartite Matching) + # [1,num_objects] best prior for each ground truth + best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) + # [1,num_priors] best ground truth for each prior + best_truth_overlap, best_truth_idx = overlaps.max( + 0, keepdim=True) # 0-2000 + best_truth_idx.squeeze_(0) + best_truth_overlap.squeeze_(0) + best_prior_idx.squeeze_(1) + best_prior_overlap.squeeze_(1) + best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior + # TODO refactor: index best_prior_idx with long tensor + # ensure every gt matches with its prior of max overlap + for j in range(best_prior_idx.size(0)): + best_truth_idx[best_prior_idx[j]] = j + _th1, _th2, _th3 = threshold # _th1 = 0.1 ,_th2 = 0.35,_th3 = 0.5 + + N = (torch.sum(best_prior_overlap >= _th2) + + torch.sum(best_prior_overlap >= _th3)) // 2 + matches = truths[best_truth_idx] # Shape: [num_priors,4] + conf = labels[best_truth_idx] # Shape: [num_priors] + conf[best_truth_overlap < _th2] = 0 # label as background + + best_truth_overlap_clone = best_truth_overlap.clone() + add_idx = best_truth_overlap_clone.gt( + _th1).eq(best_truth_overlap_clone.lt(_th2)) + best_truth_overlap_clone[1 - add_idx] = 0 + stage2_overlap, stage2_idx = best_truth_overlap_clone.sort(descending=True) + + stage2_overlap = stage2_overlap.gt(_th1) + + if N > 0: + N = torch.sum(stage2_overlap[:N]) if torch.sum( + stage2_overlap[:N]) < N else N + conf[stage2_idx[:N]] += 1 + + loc = encode(matches, priors, variances) + loc_t[idx] = loc # [num_priors,4] encoded offsets to learn + conf_t[idx] = conf # [num_priors] top class label for each prior + + +def match_ssd(threshold, truths, priors, variances, labels, loc_t, conf_t, idx): + """Match each prior box with the ground truth box of the highest jaccard + overlap, encode the bounding boxes, then return the matched indices + corresponding to both confidence and location preds. + Args: + threshold: (float) The overlap threshold used when mathing boxes. + truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. + priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. + variances: (tensor) Variances corresponding to each prior coord, + Shape: [num_priors, 4]. + labels: (tensor) All the class labels for the image, Shape: [num_obj]. + loc_t: (tensor) Tensor to be filled w/ endcoded location targets. + conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. + idx: (int) current batch index + Return: + The matched indices corresponding to 1)location and 2)confidence preds. + """ + # jaccard index + overlaps = jaccard(truths,point_form(priors)) + # (Bipartite Matching) + # [1,num_objects] best prior for each ground truth + best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) + # [1,num_priors] best ground truth for each prior + best_truth_overlap, best_truth_idx = overlaps.max( + 0, keepdim=True) # 0-2000 + best_truth_idx.squeeze_(0) + best_truth_overlap.squeeze_(0) + best_prior_idx.squeeze_(1) + best_prior_overlap.squeeze_(1) + best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior + # TODO refactor: index best_prior_idx with long tensor + # ensure every gt matches with its prior of max overlap + for j in range(best_prior_idx.size(0)): + best_truth_idx[best_prior_idx[j]] = j + matches = truths[best_truth_idx] # Shape: [num_priors,4] + conf = labels[best_truth_idx] # Shape: [num_priors] + conf[best_truth_overlap < threshold] = 0 # label as background + loc = encode(matches, priors, variances) + loc_t[idx] = loc # [num_priors,4] encoded offsets to learn + conf_t[idx] = conf # [num_priors] top class label for each prior + + +def encode(matched, priors, variances): + """Encode the variances from the priorbox layers into the ground truth boxes + we have matched (based on jaccard overlap) with the prior boxes. + Args: + matched: (tensor) Coords of ground truth for each prior in point-form + Shape: [num_priors, 4]. + priors: (tensor) Prior boxes in center-offset form + Shape: [num_priors,4]. + variances: (list[float]) Variances of priorboxes + Return: + encoded boxes (tensor), Shape: [num_priors, 4] + """ + + # dist b/t match center and prior's center + g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2] + # encode variance + g_cxcy /= (variances[0] * priors[:, 2:]) + # match wh / prior wh + g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] + #g_wh = torch.log(g_wh) / variances[1] + g_wh = torch.log(g_wh) / variances[1] + # return target for smooth_l1_loss + return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] + + +# Adapted from https://github.com/Hakuyume/chainer-ssd +def decode(loc, priors, variances): + """Decode locations from predictions using priors to undo + the encoding we did for offset regression at train time. + Args: + loc (tensor): location predictions for loc layers, + Shape: [num_priors,4] + priors (tensor): Prior boxes in center-offset form. + Shape: [num_priors,4]. + variances: (list[float]) Variances of priorboxes + Return: + decoded bounding box predictions + """ + + boxes = torch.cat(( + priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], + priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) + boxes[:, :2] -= boxes[:, 2:] / 2 + boxes[:, 2:] += boxes[:, :2] + return boxes + + +def log_sum_exp(x): + """Utility function for computing log_sum_exp while determining + This will be used to determine unaveraged confidence loss across + all examples in a batch. + Args: + x (Variable(tensor)): conf_preds from conf layers + """ + x_max = x.data.max() + return torch.log(torch.sum(torch.exp(x - x_max), 1, keepdim=True)) + x_max + + +# Original author: Francisco Massa: +# https://github.com/fmassa/object-detection.torch +# Ported to PyTorch by Max deGroot (02/01/2017) +def nms(boxes, scores, overlap=0.5, top_k=200): + """Apply non-maximum suppression at test time to avoid detecting too many + overlapping bounding boxes for a given object. + Args: + boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. + scores: (tensor) The class predscores for the img, Shape:[num_priors]. + overlap: (float) The overlap thresh for suppressing unnecessary boxes. + top_k: (int) The Maximum number of box preds to consider. + Return: + The indices of the kept boxes with respect to num_priors. + """ + + keep = scores.new(scores.size(0)).zero_().long() + if boxes.numel() == 0: + return keep + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + area = torch.mul(x2 - x1, y2 - y1) + v, idx = scores.sort(0) # sort in ascending order + # I = I[v >= 0.01] + idx = idx[-top_k:] # indices of the top-k largest vals + xx1 = boxes.new() + yy1 = boxes.new() + xx2 = boxes.new() + yy2 = boxes.new() + w = boxes.new() + h = boxes.new() + + # keep = torch.Tensor() + count = 0 + while idx.numel() > 0: + i = idx[-1] # index of current largest val + # keep.append(i) + keep[count] = i + count += 1 + if idx.size(0) == 1: + break + idx = idx[:-1] # remove kept element from view + #add code--------------------------------------- + idx= torch.autograd.Variable(idx, requires_grad=False) + idx = idx.data + x1 = torch.autograd.Variable(x1, requires_grad=False) + x1 = x1.data + y1 = torch.autograd.Variable(y1, requires_grad=False) + y1 = y1.data + x2 = torch.autograd.Variable(x2, requires_grad=False) + x2 = x2.data + y2 = torch.autograd.Variable(y2, requires_grad=False) + y2 = y2.data + #add code end-------------------------------------- + # load bboxes of next highest vals + torch.index_select(x1, 0, idx, out=xx1) + torch.index_select(y1, 0, idx, out=yy1) + torch.index_select(x2, 0, idx, out=xx2) + torch.index_select(y2, 0, idx, out=yy2) + # store element-wise max with next highest score + xx1 = torch.clamp(xx1, min=x1[i]) + yy1 = torch.clamp(yy1, min=y1[i]) + xx2 = torch.clamp(xx2, max=x2[i]) + yy2 = torch.clamp(yy2, max=y2[i]) + w.resize_as_(xx2) + h.resize_as_(yy2) + w = xx2 - xx1 + h = yy2 - yy1 + # check sizes of xx1 and xx2.. after each iteration + w = torch.clamp(w, min=0.0) + h = torch.clamp(h, min=0.0) + inter = w * h + #add code--------------------------------- + area = torch.autograd.Variable(area, requires_grad=False) + area = area.data + idx= torch.autograd.Variable(idx, requires_grad=False) + idx = idx.data + #add code end ----------------------------------- + # IoU = i / (area(a) + area(b) - i) + rem_areas = torch.index_select(area, 0, idx) # load remaining areas) + union = (rem_areas - inter) + area[i] + IoU = inter / union # store result in iou + # keep only elements with an IoU <= overlap + idx = idx[IoU.le(overlap)] + return keep, count diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/layers/functions/__init__.py b/PyTorch/contrib/cv/detection/Pyramidbox/layers/functions/__init__.py index 5d4c8d36bbd7d73676d51d087aa928ec2328bc64..d8d93bae0453c74f51fe56dee8098a1ef816f0e0 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/layers/functions/__init__.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/layers/functions/__init__.py @@ -1,5 +1,5 @@ -from .prior_box import PriorBox -from .detection import Detect - -__all__=['Detect','PriorBox'] - +from .prior_box import PriorBox +from .detection import Detect + +__all__=['Detect','PriorBox'] + diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/layers/functions/detection.py b/PyTorch/contrib/cv/detection/Pyramidbox/layers/functions/detection.py index a37ae6bb94dbfa4dc57588f8648f8b1e749ebfb6..b68c7fb405e0bbccacd1c6d7f113cf66120a7fab 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/layers/functions/detection.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/layers/functions/detection.py @@ -1,82 +1,82 @@ -#-*- coding:utf-8 -*- -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - -import torch - -from ..bbox_utils import decode, nms -from torch.autograd import Function - - -class Detect(Function): - """At test time, Detect is the final layer of SSD. Decode location preds, - apply non-maximum suppression to location predictions based on conf - scores and threshold to a top_k number of output predictions for both - confidence score and locations. - """ - - def __init__(self, cfg): - self.num_classes = cfg.NUM_CLASSES - self.top_k = cfg.TOP_K - self.nms_thresh = cfg.NMS_THRESH - self.conf_thresh = cfg.CONF_THRESH - self.variance = cfg.VARIANCE - - def forward(self, loc_data, conf_data, prior_data): - """ - Args: - loc_data: (tensor) Loc preds from loc layers - Shape: [batch,num_priors*4] - conf_data: (tensor) Shape: Conf preds from conf layers - Shape: [batch*num_priors,num_classes] - prior_data: (tensor) Prior boxes and variances from priorbox layers - Shape: [1,num_priors,4] - """ - #print('loc_data device:{}'.format(loc_data.device)) - #print('conf_data device:{}'.format(conf_data.device)) - #print('prior_data device:{}'.format(prior_data.device)) - num = loc_data.size(0) - num_priors = prior_data.size(0) - - conf_preds = conf_data.view( - num, num_priors, self.num_classes).transpose(2, 1) - batch_priors = prior_data.view(-1, num_priors, - 4).expand(num, num_priors, 4) - batch_priors = batch_priors.contiguous().view(-1, 4) - - decoded_boxes = decode(loc_data.view(-1, 4), - batch_priors, self.variance) - decoded_boxes = decoded_boxes.view(num, num_priors, 4) - - output = torch.zeros(num, self.num_classes, self.top_k, 5) - - for i in range(num): - boxes = decoded_boxes[i].clone() - conf_scores = conf_preds[i].clone() - for cl in range(1, self.num_classes): - c_mask = conf_scores[cl].gt(self.conf_thresh) - scores = conf_scores[cl][c_mask] - #change code - if scores.numel() == 0: - continue - l_mask = c_mask.unsqueeze(1).expand_as(boxes) - boxes_ = boxes[l_mask].view(-1, 4) - ids, count = nms(boxes_, scores, self.nms_thresh, self.top_k) - output[i, cl, :count] = torch.cat((scores[ids[:count]].unsqueeze(1), - boxes_[ids[:count]]), 1) - - return output +#-*- coding:utf-8 -*- +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +import torch + +from ..bbox_utils import decode, nms +from torch.autograd import Function + + +class Detect(Function): + """At test time, Detect is the final layer of SSD. Decode location preds, + apply non-maximum suppression to location predictions based on conf + scores and threshold to a top_k number of output predictions for both + confidence score and locations. + """ + + def __init__(self, cfg): + self.num_classes = cfg.NUM_CLASSES + self.top_k = cfg.TOP_K + self.nms_thresh = cfg.NMS_THRESH + self.conf_thresh = cfg.CONF_THRESH + self.variance = cfg.VARIANCE + + def forward(self, loc_data, conf_data, prior_data): + """ + Args: + loc_data: (tensor) Loc preds from loc layers + Shape: [batch,num_priors*4] + conf_data: (tensor) Shape: Conf preds from conf layers + Shape: [batch*num_priors,num_classes] + prior_data: (tensor) Prior boxes and variances from priorbox layers + Shape: [1,num_priors,4] + """ + #print('loc_data device:{}'.format(loc_data.device)) + #print('conf_data device:{}'.format(conf_data.device)) + #print('prior_data device:{}'.format(prior_data.device)) + num = loc_data.size(0) + num_priors = prior_data.size(0) + + conf_preds = conf_data.view( + num, num_priors, self.num_classes).transpose(2, 1) + batch_priors = prior_data.view(-1, num_priors, + 4).expand(num, num_priors, 4) + batch_priors = batch_priors.contiguous().view(-1, 4) + + decoded_boxes = decode(loc_data.view(-1, 4), + batch_priors, self.variance) + decoded_boxes = decoded_boxes.view(num, num_priors, 4) + + output = torch.zeros(num, self.num_classes, self.top_k, 5) + + for i in range(num): + boxes = decoded_boxes[i].clone() + conf_scores = conf_preds[i].clone() + for cl in range(1, self.num_classes): + c_mask = conf_scores[cl].gt(self.conf_thresh) + scores = conf_scores[cl][c_mask] + #change code + if scores.numel() == 0: + continue + l_mask = c_mask.unsqueeze(1).expand_as(boxes) + boxes_ = boxes[l_mask].view(-1, 4) + ids, count = nms(boxes_, scores, self.nms_thresh, self.top_k) + output[i, cl, :count] = torch.cat((scores[ids[:count]].unsqueeze(1), + boxes_[ids[:count]]), 1) + + return output diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/layers/functions/prior_box.py b/PyTorch/contrib/cv/detection/Pyramidbox/layers/functions/prior_box.py index 48bd01be69ac66cfbb9f083fb56759247bf2de4f..b09f5707e04a6d8005dfa63e06e4718722d11ace 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/layers/functions/prior_box.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/layers/functions/prior_box.py @@ -1,73 +1,73 @@ -#-*- coding:utf-8 -*- -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - -import torch -from itertools import product as product -import math - - -class PriorBox(object): - """Compute priorbox coordinates in center-offset form for each source - feature map. - """ - - def __init__(self, input_size, feature_maps,cfg): - super(PriorBox, self).__init__() - self.imh = input_size[0] - self.imw = input_size[1] - - # number of priors for feature map location (either 4 or 6) - self.variance = cfg.VARIANCE or [0.1] - #self.feature_maps = cfg.FEATURE_MAPS - self.min_sizes = cfg.ANCHOR_SIZES - self.steps = cfg.STEPS - self.clip = cfg.CLIP - for v in self.variance: - if v <= 0: - raise ValueError('Variances must be greater than 0') - self.feature_maps = feature_maps - - - def forward(self): - mean = [] - for k in range(len(self.feature_maps)): - feath = self.feature_maps[k][0] - featw = self.feature_maps[k][1] - for i, j in product(range(feath), range(featw)): - f_kw = self.imw / self.steps[k] - f_kh = self.imh / self.steps[k] - - cx = (j + 0.5) / f_kw - cy = (i + 0.5) / f_kh - - s_kw = self.min_sizes[k] / self.imw - s_kh = self.min_sizes[k] / self.imh - - mean += [cx, cy, s_kw, s_kh] - - output = torch.Tensor(mean).view(-1, 4) - if self.clip: - output.clamp_(max=1, min=0) - return output - - -if __name__ == '__main__': - from data.config import cfg - p = PriorBox([640, 640], cfg) - out = p.forward() - print(out.size()) +#-*- coding:utf-8 -*- +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +import torch +from itertools import product as product +import math + + +class PriorBox(object): + """Compute priorbox coordinates in center-offset form for each source + feature map. + """ + + def __init__(self, input_size, feature_maps,cfg): + super(PriorBox, self).__init__() + self.imh = input_size[0] + self.imw = input_size[1] + + # number of priors for feature map location (either 4 or 6) + self.variance = cfg.VARIANCE or [0.1] + #self.feature_maps = cfg.FEATURE_MAPS + self.min_sizes = cfg.ANCHOR_SIZES + self.steps = cfg.STEPS + self.clip = cfg.CLIP + for v in self.variance: + if v <= 0: + raise ValueError('Variances must be greater than 0') + self.feature_maps = feature_maps + + + def forward(self): + mean = [] + for k in range(len(self.feature_maps)): + feath = self.feature_maps[k][0] + featw = self.feature_maps[k][1] + for i, j in product(range(feath), range(featw)): + f_kw = self.imw / self.steps[k] + f_kh = self.imh / self.steps[k] + + cx = (j + 0.5) / f_kw + cy = (i + 0.5) / f_kh + + s_kw = self.min_sizes[k] / self.imw + s_kh = self.min_sizes[k] / self.imh + + mean += [cx, cy, s_kw, s_kh] + + output = torch.Tensor(mean).view(-1, 4) + if self.clip: + output.clamp_(max=1, min=0) + return output + + +if __name__ == '__main__': + from data.config import cfg + p = PriorBox([640, 640], cfg) + out = p.forward() + print(out.size()) diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/layers/modules/__init__.py b/PyTorch/contrib/cv/detection/Pyramidbox/layers/modules/__init__.py index d1433bff7ffa09ad46d4a46e8c28bc2b170ef072..9abf8e13503525d87011d79acb5a2dc0bf4b7673 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/layers/modules/__init__.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/layers/modules/__init__.py @@ -1,12 +1,12 @@ -#-*- coding:utf-8 -*- - -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - - -from .l2norm import L2Norm -from .multibox_loss import MultiBoxLoss - -__all__ = ['L2Norm', 'MultiBoxLoss'] - +#-*- coding:utf-8 -*- + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +from .l2norm import L2Norm +from .multibox_loss import MultiBoxLoss + +__all__ = ['L2Norm', 'MultiBoxLoss'] + diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/layers/modules/l2norm.py b/PyTorch/contrib/cv/detection/Pyramidbox/layers/modules/l2norm.py index 0c77185ed8c281ec7a78d139e8d426d5b8dd562b..ca04e0bb3fbe4c17a434b353f5c40fb1a683bfa6 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/layers/modules/l2norm.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/layers/modules/l2norm.py @@ -1,48 +1,48 @@ -#-*- coding:utf-8 -*- -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - - -import torch -import torch.nn as nn -import torch.nn.init as init -from torch.autograd import Function -from torch.autograd import Variable - - - -class L2Norm(nn.Module): - def __init__(self,n_channels, scale): - super(L2Norm,self).__init__() - self.n_channels = n_channels - self.gamma = scale or None - self.eps = 1e-10 - self.weight = nn.Parameter(torch.Tensor(self.n_channels)) - self.reset_parameters() - - def reset_parameters(self): - init.constant_(self.weight,self.gamma) - - def forward(self, x): - norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps - #x /= norm - x = torch.div(x,norm) - out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x - return out - - +#-*- coding:utf-8 -*- +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + + +import torch +import torch.nn as nn +import torch.nn.init as init +from torch.autograd import Function +from torch.autograd import Variable + + + +class L2Norm(nn.Module): + def __init__(self,n_channels, scale): + super(L2Norm,self).__init__() + self.n_channels = n_channels + self.gamma = scale or None + self.eps = 1e-10 + self.weight = nn.Parameter(torch.Tensor(self.n_channels)) + self.reset_parameters() + + def reset_parameters(self): + init.constant_(self.weight,self.gamma) + + def forward(self, x): + norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps + #x /= norm + x = torch.div(x,norm) + out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x + return out + + \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/layers/modules/multibox_loss.py b/PyTorch/contrib/cv/detection/Pyramidbox/layers/modules/multibox_loss.py index 18d2ddff33f4363036e76534623577de44221e0d..9b8b1d1078825ed8bc36fb8de90f7ffff1628262 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/layers/modules/multibox_loss.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/layers/modules/multibox_loss.py @@ -1,159 +1,159 @@ -#-*- coding:utf-8 -*- -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - -import math -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd import Variable -import torch.npu - -from ..bbox_utils import match, log_sum_exp, match_ssd - - -class MultiBoxLoss(nn.Module): - """SSD Weighted Loss Function - Compute Targets: - 1) Produce Confidence Target Indices by matching ground truth boxes - with (default) 'priorboxes' that have jaccard index > threshold parameter - (default threshold: 0.5). - 2) Produce localization target by 'encoding' variance into offsets of ground - truth boxes and their matched 'priorboxes'. - 3) Hard negative mining to filter the excessive number of negative examples - that comes with using a large number of default bounding boxes. - (default negative:positive ratio 3:1) - Objective Loss: - L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N - Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss - weighted by α which is set to 1 by cross val. - Args: - c: class confidences, - l: predicted boxes, - g: ground truth boxes - N: number of matched default boxes - See: https://arxiv.org/pdf/1512.02325.pdf for more details. - """ - - def __init__(self, - cfg, - use_npu=True, - use_head_loss=False): - super(MultiBoxLoss, self).__init__() - self.use_npu = use_npu - self.num_classes = cfg.NUM_CLASSES - self.negpos_ratio = cfg.NEG_POS_RATIOS - self.variance = cfg.VARIANCE - self.use_head_loss = use_head_loss - self.threshold = cfg.FACE.OVERLAP_THRESH - self.match = match_ssd - - def forward(self, - predictions, - targets): - """Multibox Loss - Args: - predictions (tuple): A tuple containing loc preds, conf preds, - and prior boxes from SSD net. - conf shape: torch.size(batch_size,num_priors,num_classes) - loc shape: torch.size(batch_size,num_priors,4) - priors shape: torch.size(num_priors,4) - - targets (tensor): Ground truth boxes and labels for a batch, - shape: [batch_size,num_objs,5] (last idx is the label). - """ - if self.use_head_loss: - _, _, loc_data, conf_data, priors = predictions - else: - loc_data, conf_data, _, _, priors = predictions - loc_data = loc_data.cpu() - conf_data = conf_data.cpu() - priors = priors.cpu() - num = loc_data.size(0) - priors = priors[:loc_data.size(1), :] - num_priors = (priors.size(0)) - num_classes = self.num_classes - # match priors (default boxes) and ground truth boxes - loc_t = torch.Tensor(num, num_priors, 4) - conf_t = torch.LongTensor(num, num_priors) - for idx in range(num): - truths = targets[idx][:, :-1].data - labels = targets[idx][:, -1].data - defaults = priors.data - self.match(self.threshold, truths, defaults, self.variance, labels, - loc_t, conf_t, idx) - # wrap targets - loc_t = Variable(loc_t, requires_grad=False) - conf_t = Variable(conf_t, requires_grad=False) - - pos = conf_t > 0 - num_pos = pos.sum(dim=1, keepdim=True) - # Localization Loss (Smooth L1) - # Shape: [batch,num_priors,4] - pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) - loc_p = loc_data[pos_idx].view(-1, 4) - loc_t = loc_t[pos_idx].view(-1, 4) - loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) - batch_conf = conf_data.view(-1, self.num_classes) - loss_c = log_sum_exp(batch_conf) - \ - batch_conf.gather(1, conf_t.view(-1, 1)) - - # Hard Negative Mining - loss_c =loss_c.cpu() - pos1 = pos.view(-1,1) - loss_c[pos1] = 0 # filter out pos boxes for now - loss_c = loss_c.view(num, -1) - _, loss_idx = loss_c.sort(1, descending=True) - _, idx_rank = loss_idx.sort(1) - - num_pos = pos.long().sum(1, keepdim=True) - num_neg = torch.clamp(self.negpos_ratio * - num_pos, max=pos.size(1) - 1) - neg = idx_rank < num_neg.expand_as(idx_rank) - - # Confidence Loss Including Positive and Negative Examples - conf_data = conf_data.cpu() - pos_idx = pos.unsqueeze(2).expand_as(conf_data) - neg_idx = neg.unsqueeze(2).expand_as(conf_data) - conf_p = conf_data[(pos_idx + neg_idx).gt(0) - ].view(-1, self.num_classes) - targets_weighted = conf_t[(pos + neg).gt(0)] - loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) - - # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N - N = num_pos.data.sum() if num_pos.data.sum() > 0 else num - loss_l /= N - loss_c /= N - return loss_l, loss_c +#-*- coding:utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +import torch.npu + +from ..bbox_utils import match, log_sum_exp, match_ssd + + +class MultiBoxLoss(nn.Module): + """SSD Weighted Loss Function + Compute Targets: + 1) Produce Confidence Target Indices by matching ground truth boxes + with (default) 'priorboxes' that have jaccard index > threshold parameter + (default threshold: 0.5). + 2) Produce localization target by 'encoding' variance into offsets of ground + truth boxes and their matched 'priorboxes'. + 3) Hard negative mining to filter the excessive number of negative examples + that comes with using a large number of default bounding boxes. + (default negative:positive ratio 3:1) + Objective Loss: + L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N + Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss + weighted by α which is set to 1 by cross val. + Args: + c: class confidences, + l: predicted boxes, + g: ground truth boxes + N: number of matched default boxes + See: https://arxiv.org/pdf/1512.02325.pdf for more details. + """ + + def __init__(self, + cfg, + use_npu=True, + use_head_loss=False): + super(MultiBoxLoss, self).__init__() + self.use_npu = use_npu + self.num_classes = cfg.NUM_CLASSES + self.negpos_ratio = cfg.NEG_POS_RATIOS + self.variance = cfg.VARIANCE + self.use_head_loss = use_head_loss + self.threshold = cfg.FACE.OVERLAP_THRESH + self.match = match_ssd + + def forward(self, + predictions, + targets): + """Multibox Loss + Args: + predictions (tuple): A tuple containing loc preds, conf preds, + and prior boxes from SSD net. + conf shape: torch.size(batch_size,num_priors,num_classes) + loc shape: torch.size(batch_size,num_priors,4) + priors shape: torch.size(num_priors,4) + + targets (tensor): Ground truth boxes and labels for a batch, + shape: [batch_size,num_objs,5] (last idx is the label). + """ + if self.use_head_loss: + _, _, loc_data, conf_data, priors = predictions + else: + loc_data, conf_data, _, _, priors = predictions + loc_data = loc_data.cpu() + conf_data = conf_data.cpu() + priors = priors.cpu() + num = loc_data.size(0) + priors = priors[:loc_data.size(1), :] + num_priors = (priors.size(0)) + num_classes = self.num_classes + # match priors (default boxes) and ground truth boxes + loc_t = torch.Tensor(num, num_priors, 4) + conf_t = torch.LongTensor(num, num_priors) + for idx in range(num): + truths = targets[idx][:, :-1].data + labels = targets[idx][:, -1].data + defaults = priors.data + self.match(self.threshold, truths, defaults, self.variance, labels, + loc_t, conf_t, idx) + # wrap targets + loc_t = Variable(loc_t, requires_grad=False) + conf_t = Variable(conf_t, requires_grad=False) + + pos = conf_t > 0 + num_pos = pos.sum(dim=1, keepdim=True) + # Localization Loss (Smooth L1) + # Shape: [batch,num_priors,4] + pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) + loc_p = loc_data[pos_idx].view(-1, 4) + loc_t = loc_t[pos_idx].view(-1, 4) + loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) + batch_conf = conf_data.view(-1, self.num_classes) + loss_c = log_sum_exp(batch_conf) - \ + batch_conf.gather(1, conf_t.view(-1, 1)) + + # Hard Negative Mining + loss_c =loss_c.cpu() + pos1 = pos.view(-1,1) + loss_c[pos1] = 0 # filter out pos boxes for now + loss_c = loss_c.view(num, -1) + _, loss_idx = loss_c.sort(1, descending=True) + _, idx_rank = loss_idx.sort(1) + + num_pos = pos.long().sum(1, keepdim=True) + num_neg = torch.clamp(self.negpos_ratio * + num_pos, max=pos.size(1) - 1) + neg = idx_rank < num_neg.expand_as(idx_rank) + + # Confidence Loss Including Positive and Negative Examples + conf_data = conf_data.cpu() + pos_idx = pos.unsqueeze(2).expand_as(conf_data) + neg_idx = neg.unsqueeze(2).expand_as(conf_data) + conf_p = conf_data[(pos_idx + neg_idx).gt(0) + ].view(-1, self.num_classes) + targets_weighted = conf_t[(pos + neg).gt(0)] + loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) + + # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N + N = num_pos.data.sum() if num_pos.data.sum() > 0 else num + loss_l /= N + loss_c /= N + return loss_l, loss_c diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/modelzoo_level.txt b/PyTorch/contrib/cv/detection/Pyramidbox/modelzoo_level.txt index 7eeb8d729d7fb2dd94b91dcf79f8eabd5cfc5b77..a3e2322b3a5f07f4e0cccb874f54cc3d63326fc4 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/modelzoo_level.txt +++ b/PyTorch/contrib/cv/detection/Pyramidbox/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PerfStatus:OK +PrecisionStatus:OK diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/prepare_wider_data.py b/PyTorch/contrib/cv/detection/Pyramidbox/prepare_wider_data.py index 59b5d3872b8ef13008c6b12f7ad33d759bcea495..03b9eec1eb2660823be64d0d014506af7ec21d90 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/prepare_wider_data.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/prepare_wider_data.py @@ -1,121 +1,121 @@ -#-*- coding:utf-8 -*- -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - -import argparse -import os -from data.config import cfg -#import cv2 - -parser = argparse.ArgumentParser( - description='Pyramidbox face Detector Training With Pytorch') -parser.add_argument('--data_path', - default=None, type=str, - help='data_path') -args = parser.parse_args() - -train_list_file = os.path.join(args.data_path, 'wider_face_split', - 'wider_face_train_bbx_gt.txt') -val_list_file = os.path.join(args.data_path, 'wider_face_split', - 'wider_face_val_bbx_gt.txt') - -WIDER_TRAIN = os.path.join(args.data_path, 'WIDER_train', 'images') -WIDER_VAL = os.path.join(args.data_path, 'WIDER_val', 'images') - - -def parse_wider_file(root, file): - with open(file, 'r') as fr: - lines = fr.readlines() - face_count = [] - img_paths = [] - face_loc = [] - img_faces = [] - count = 0 - flag = False - for k, line in enumerate(lines): - line = line.strip().strip('\n') - if count > 0: - line = line.split(' ') - count -= 1 - loc = [int(line[0]), int(line[1]), int(line[2]), int(line[3])] - face_loc += [loc] - if flag: - face_count += [int(line)] - flag = False - count = int(line) - if 'jpg' in line: - img_paths += [os.path.join(root, line)] - flag = True - - total_face = 0 - for k in face_count: - face_ = [] - for x in range(total_face, total_face + k): - face_.append(face_loc[x]) - img_faces += [face_] - total_face += k - return img_paths, img_faces - - -def wider_data_file(): - img_paths, bbox = parse_wider_file(WIDER_TRAIN, train_list_file) - fw = open(cfg.FACE.TRAIN_FILE, 'w') - for index in range(len(img_paths)): - path = img_paths[index] - boxes = bbox[index] - fw.write(path) - fw.write(' {}'.format(len(boxes))) - for box in boxes: - data = ' {} {} {} {} {}'.format(box[0], box[1], box[2], box[3], 1) - fw.write(data) - fw.write('\n') - fw.close() - - img_paths, bbox = parse_wider_file(WIDER_VAL, val_list_file) - fw = open(cfg.FACE.VAL_FILE, 'w') - for index in range(len(img_paths)): - path = img_paths[index] - boxes = bbox[index] - fw.write(path) - fw.write(' {}'.format(len(boxes))) - for box in boxes: - data = ' {} {} {} {} {}'.format(box[0], box[1], box[2], box[3], 1) - fw.write(data) - fw.write('\n') - fw.close() - - -if __name__ == '__main__': - wider_data_file() +#-*- coding:utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +import argparse +import os +from data.config import cfg +#import cv2 + +parser = argparse.ArgumentParser( + description='Pyramidbox face Detector Training With Pytorch') +parser.add_argument('--data_path', + default=None, type=str, + help='data_path') +args = parser.parse_args() + +train_list_file = os.path.join(args.data_path, 'wider_face_split', + 'wider_face_train_bbx_gt.txt') +val_list_file = os.path.join(args.data_path, 'wider_face_split', + 'wider_face_val_bbx_gt.txt') + +WIDER_TRAIN = os.path.join(args.data_path, 'WIDER_train', 'images') +WIDER_VAL = os.path.join(args.data_path, 'WIDER_val', 'images') + + +def parse_wider_file(root, file): + with open(file, 'r') as fr: + lines = fr.readlines() + face_count = [] + img_paths = [] + face_loc = [] + img_faces = [] + count = 0 + flag = False + for k, line in enumerate(lines): + line = line.strip().strip('\n') + if count > 0: + line = line.split(' ') + count -= 1 + loc = [int(line[0]), int(line[1]), int(line[2]), int(line[3])] + face_loc += [loc] + if flag: + face_count += [int(line)] + flag = False + count = int(line) + if 'jpg' in line: + img_paths += [os.path.join(root, line)] + flag = True + + total_face = 0 + for k in face_count: + face_ = [] + for x in range(total_face, total_face + k): + face_.append(face_loc[x]) + img_faces += [face_] + total_face += k + return img_paths, img_faces + + +def wider_data_file(): + img_paths, bbox = parse_wider_file(WIDER_TRAIN, train_list_file) + fw = open(cfg.FACE.TRAIN_FILE, 'w') + for index in range(len(img_paths)): + path = img_paths[index] + boxes = bbox[index] + fw.write(path) + fw.write(' {}'.format(len(boxes))) + for box in boxes: + data = ' {} {} {} {} {}'.format(box[0], box[1], box[2], box[3], 1) + fw.write(data) + fw.write('\n') + fw.close() + + img_paths, bbox = parse_wider_file(WIDER_VAL, val_list_file) + fw = open(cfg.FACE.VAL_FILE, 'w') + for index in range(len(img_paths)): + path = img_paths[index] + boxes = bbox[index] + fw.write(path) + fw.write(' {}'.format(len(boxes))) + for box in boxes: + data = ' {} {} {} {} {}'.format(box[0], box[1], box[2], box[3], 1) + fw.write(data) + fw.write('\n') + fw.close() + + +if __name__ == '__main__': + wider_data_file() diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/pyramidbox.py b/PyTorch/contrib/cv/detection/Pyramidbox/pyramidbox.py index 048fabf22016662110259873202f68273cf4df9d..f8ebd2e0fd56a2fd50a134d4c426b713de707133 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/pyramidbox.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/pyramidbox.py @@ -1,434 +1,434 @@ -#-*- coding:utf-8 -*- -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import time -import torch -import torch.nn as nn -import torch.nn.init as init -from torch.autograd import Function -import torch.nn.functional as F -from torch.autograd import Variable -import os -from layers import * -from data.config import cfg -import numpy as np -import torch.npu -class conv_bn(nn.Module): - """docstring for conv""" - - def __init__(self, - in_plane, - out_plane, - kernel_size, - stride, - padding): - super(conv_bn, self).__init__() - self.conv1 = nn.Conv2d(in_plane, out_plane, - kernel_size=kernel_size, stride=stride, padding=padding) - self.bn1 = nn.BatchNorm2d(out_plane) - - def forward(self, x): - x = self.conv1(x) - return self.bn1(x) - - - -class CPM(nn.Module): - """docstring for CPM""" - - def __init__(self, in_plane): - super(CPM, self).__init__() - self.branch1 = conv_bn(in_plane, 1024, 1, 1, 0) - self.branch2a = conv_bn(in_plane, 256, 1, 1, 0) - self.branch2b = conv_bn(256, 256, 3, 1, 1) - self.branch2c = conv_bn(256, 1024, 1, 1, 0) - - self.ssh_1 = nn.Conv2d(1024, 256, kernel_size=3, stride=1, padding=1) - self.ssh_dimred = nn.Conv2d( - 1024, 128, kernel_size=3, stride=1, padding=1) - self.ssh_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) - self.ssh_3a = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) - self.ssh_3b = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) - - def forward(self, x): - out_residual = self.branch1(x) - x = F.relu(self.branch2a(x), inplace=True) - x = F.relu(self.branch2b(x), inplace=True) - x = self.branch2c(x) - rescomb = F.relu(x + out_residual, inplace=True) - ssh1 = self.ssh_1(rescomb) - ssh_dimred = F.relu(self.ssh_dimred(rescomb), inplace=True) - ssh_2 = self.ssh_2(ssh_dimred) - ssh_3a = F.relu(self.ssh_3a(ssh_dimred), inplace=True) - ssh_3b = self.ssh_3b(ssh_3a) - - ssh_out = torch.cat([ssh1, ssh_2, ssh_3b], dim=1) - ssh_out = F.relu(ssh_out, inplace=True) - return ssh_out - - -class PyramidBox(nn.Module): - """docstring for PyramidBox""" - - def __init__(self, - phase, - base, - extras, - lfpn_cpm, - head, - num_classes): - super(PyramidBox, self).__init__() - - self.vgg = nn.ModuleList(base) - self.extras = nn.ModuleList(extras) - self.bn64 = nn.BatchNorm2d(64) - self.L2Norm3_3 = L2Norm(256, 10) - self.L2Norm4_3 = L2Norm(512, 8) - self.L2Norm5_3 = L2Norm(512, 5) - - self.lfpn_topdown = nn.ModuleList(lfpn_cpm[0]) - self.lfpn_later = nn.ModuleList(lfpn_cpm[1]) - self.cpm = nn.ModuleList(lfpn_cpm[2]) - - self.loc_layers = nn.ModuleList(head[0]) - self.conf_layers = nn.ModuleList(head[1]) - - - - self.is_infer = False - if phase == 'test': - self.softmax = nn.Softmax(dim=-1) - self.detect = Detect(cfg) - self.is_infer = True - - def _upsample_prod(self, x, y): - _, _, H, W = y.size() - return F.interpolate(x, size=(H, W), mode='bilinear') * y - - def forward(self, x): - - use_npu = False - size = x.size()[2:] - bn_index = 0 - for k in range(16): - x = self.vgg[k](x) - if isinstance(self.vgg[k], nn.Conv2d): - if k == 2: - x = self.bn64(x) - conv3_3 = x - for k in range(16, 23): - x = self.vgg[k](x) - conv4_3 = x - for k in range(23, 30): - x = self.vgg[k](x) - - conv5_3 = x - - for k in range(30, len(self.vgg)): - x = self.vgg[k](x) - - convfc_7 = x - - for k in range(2): - x = F.relu(self.extras[k](x), inplace=True)#.npu() - conv6_2 = x - - for k in range(2, 4): - x = F.relu(self.extras[k](x), inplace=True)#.npu() - - conv7_2 = x - - x = F.relu(self.lfpn_topdown[0](convfc_7), inplace=True) - lfpn2_on_conv5 = F.relu(self._upsample_prod( - x, self.lfpn_later[0](conv5_3)), inplace=True) - - x = F.relu(self.lfpn_topdown[1](lfpn2_on_conv5), inplace=True) - lfpn1_on_conv4 = F.relu(self._upsample_prod( - x, self.lfpn_later[1](conv4_3)), inplace=True) - - - x = F.relu(self.lfpn_topdown[2](lfpn1_on_conv4), inplace=True) - lfpn0_on_conv3 = F.relu(self._upsample_prod( - x, self.lfpn_later[2](conv3_3)), inplace=True) - l2norm3 = self.L2Norm3_3(lfpn0_on_conv3) - l2norm4 = self.L2Norm4_3(lfpn1_on_conv4) - l2norm5 = self.L2Norm5_3(lfpn2_on_conv5) - - ssh_conv3_norm = self.cpm[0](l2norm3) - ssh_conv4_norm = self.cpm[1](l2norm4) - ssh_conv5_norm = self.cpm[2](l2norm5) - - ssh_convfc7 = self.cpm[3](convfc_7) - ssh_conv6 = self.cpm[4](conv6_2) - ssh_conv7 = self.cpm[5](conv7_2) - face_locs, face_confs = [], [] - head_locs, head_confs = [], [] - - N = ssh_conv3_norm.size(0) - - - mbox_loc = self.loc_layers[0](ssh_conv3_norm) - - face_loc, head_loc = torch.chunk(mbox_loc, 2, dim=1) - - face_loc = face_loc.permute(0, 2, 3, 1).contiguous().view(N, -1, 4) - if not self.is_infer: - head_loc = head_loc.permute(0, 2, 3, 1).contiguous().view(N, -1, 4) - - mbox_conf = self.conf_layers[0](ssh_conv3_norm) - - face_conf1 = mbox_conf[:, 3:4, :, :] - face_conf3_maxin, _ = torch.max(mbox_conf[:, 0:3, :, :], dim=1, keepdim=True) - - face_conf = torch.cat((face_conf3_maxin, face_conf1), dim=1) - face_conf = face_conf.permute(0, 2, 3, 1).contiguous().view(N, -1, 2) - if not self.is_infer: - head_conf3_maxin, _ = torch.max(mbox_conf[:, 4:7, :, :], dim=1, keepdim=True) - head_conf1 = mbox_conf[:, 7:, :, :] - - head_conf = torch.cat((head_conf3_maxin, head_conf1), dim=1) - head_conf = head_conf.permute(0, 2, 3, 1).contiguous().view(N, -1, 2) - - face_locs.append(face_loc) - face_confs.append(face_conf) - if not self.is_infer: - head_locs.append(head_loc) - head_confs.append(head_conf) - - - inputs = [ssh_conv4_norm, ssh_conv5_norm, - ssh_convfc7, ssh_conv6, ssh_conv7] - - feature_maps = [] - feat_size = ssh_conv3_norm.size()[2:] - feature_maps.append([feat_size[0], feat_size[1]]) - - for i, feat in enumerate(inputs): - - feat_size = feat.size()[2:] - feature_maps.append([feat_size[0], feat_size[1]]) - mbox_loc = self.loc_layers[i + 1](feat) - - face_loc, head_loc = torch.chunk(mbox_loc, 2, dim=1) - - face_loc = face_loc.permute(0, 2, 3, 1).contiguous().view(N, -1, 4) - if not self.is_infer: - head_loc = head_loc.permute(0, 2, 3, 1).contiguous().view(N, -1, 4) - - mbox_conf = self.conf_layers[i + 1](feat) - - face_conf1 = mbox_conf[:, 0:1, :, :] - face_conf3_maxin, _ = torch.max( - mbox_conf[:, 1:4, :, :], dim=1, keepdim=True) - - face_conf = torch.cat((face_conf1, face_conf3_maxin), dim=1) - - face_conf = face_conf.permute( - 0, 2, 3, 1).contiguous().view(N, -1, 2) - - if not self.is_infer: - head_conf = mbox_conf[:, 4:, :, :].permute( - 0, 2, 3, 1).contiguous().view(N, -1, 2) - - face_locs.append(face_loc) - face_confs.append(face_conf) - - if not self.is_infer: - head_locs.append(head_loc) - head_confs.append(head_conf) - - face_mbox_loc = torch.cat(face_locs, dim=1) - face_mbox_conf = torch.cat(face_confs, dim=1) - - if not self.is_infer: - head_mbox_loc = torch.cat(head_locs, dim=1) - head_mbox_conf = torch.cat(head_confs, dim=1) - - - priors_boxes = PriorBox(size, feature_maps, cfg) - with torch.no_grad(): - priors = Variable(priors_boxes.forward()) - - if not self.is_infer: - output = (face_mbox_loc, face_mbox_conf, - head_mbox_loc, head_mbox_conf, priors) - else: - face_mbox_loc = face_mbox_loc.cpu() - face_mbox_conf = face_mbox_conf.cpu() - output = self.detect.forward(face_mbox_loc, - self.softmax(face_mbox_conf), - priors).cpu() - return output - - def load_weights(self, base_file): - other, ext = os.path.splitext(base_file) - if ext == '.pkl' or '.pth': - print('Loading weights into state dict...') - mdata = torch.load(base_file, - map_location=lambda storage, loc: storage) - weights = mdata['weight'] - epoch = mdata['epoch'] - self.load_state_dict(weights) - print('Finished!') - else: - print('Sorry only .pth and .pkl files supported.') - return epoch - - def xavier(self, param): - with torch.no_grad(): - init.xavier_uniform(param) - - def weights_init(self, m): - if isinstance(m, nn.Conv2d): - - self.xavier(m.weight) - if 'bias' in m.state_dict().keys(): - m.bias.data.zero_() - - if isinstance(m, nn.ConvTranspose2d): - self.xavier(m.weight.data) - if 'bias' in m.state_dict().keys(): - m.bias.data.zero_() - - if isinstance(m, nn.BatchNorm2d): - m.weight.data[...] = 1 - m.bias.data.zero_() - - -vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', - 512, 512, 512, 'M'] - -extras_cfg = [256, 'S', 512, 128, 'S', 256] - -lfpn_cpm_cfg = [256, 512, 512, 1024, 512, 256] - -multibox_cfg = [512, 512, 512, 512, 512, 512] - - -def vgg(cfg, i, batch_norm=False): - layers = [] - in_channels = i - for v in cfg: - if v == 'M': - layers += [nn.MaxPool2d(kernel_size=2, stride=2)] - elif v == 'C': - layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)] - else: - conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) - if batch_norm: - layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] - else: - layers += [conv2d, nn.ReLU(inplace=True)] - in_channels = v - conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6) - conv7 = nn.Conv2d(1024, 1024, kernel_size=1) - layers += [conv6, - nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)] - return layers - - -def add_extras(cfg, i, batch_norm=False): - # Extra layers added to VGG for feature scaling - layers = [] - in_channels = i - flag = False - for k, v in enumerate(cfg): - if in_channels != 'S': - if v == 'S': - layers += [nn.Conv2d(in_channels, cfg[k + 1], - kernel_size=(1, 3)[flag], stride=2, padding=1)] - else: - layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])] - flag = not flag - in_channels = v - return layers - - -def add_lfpn_cpm(cfg): - lfpn_topdown_layers = [] - lfpn_latlayer = [] - cpm_layers = [] - - for k, v in enumerate(cfg): - cpm_layers.append(CPM(v)) - - fpn_list = cfg[::-1][2:] - for k, v in enumerate(fpn_list[:-1]): - lfpn_latlayer.append(nn.Conv2d( - fpn_list[k + 1], fpn_list[k + 1], kernel_size=1, stride=1, padding=0)) - lfpn_topdown_layers.append(nn.Conv2d( - v, fpn_list[k + 1], kernel_size=1, stride=1, padding=0)) - - return (lfpn_topdown_layers, lfpn_latlayer, cpm_layers) - - -def multibox(vgg, extra_layers): - loc_layers = [] - conf_layers = [] - vgg_source = [21, 28, -2] - i = 0 - loc_layers += [nn.Conv2d(multibox_cfg[i], - 8, kernel_size=3, padding=1)] - conf_layers += [nn.Conv2d(multibox_cfg[i], - 8, kernel_size=3, padding=1)] - i += 1 - for k, v in enumerate(vgg_source): - loc_layers += [nn.Conv2d(multibox_cfg[i], - 8, kernel_size=3, padding=1)] - conf_layers += [nn.Conv2d(multibox_cfg[i], - 6, kernel_size=3, padding=1)] - i += 1 - for k, v in enumerate(extra_layers[1::2], 2): - loc_layers += [nn.Conv2d(multibox_cfg[i], - 8, kernel_size=3, padding=1)] - conf_layers += [nn.Conv2d(multibox_cfg[i], - 6, kernel_size=3, padding=1)] - i += 1 - return vgg, extra_layers, (loc_layers, conf_layers) - - -def build_net(phase, num_classes=2): - base_, extras_, head_ = multibox( - vgg(vgg_cfg, 3), add_extras((extras_cfg), 1024)) - lfpn_cpm = add_lfpn_cpm(lfpn_cpm_cfg) - return PyramidBox(phase, base_, extras_, lfpn_cpm, head_, num_classes) - - -if __name__ == '__main__': - inputs = Variable(torch.randn(1, 3, 640, 640)) - net = build_net('train', num_classes=2) - print(net) - out = net(inputs) +#-*- coding:utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import time +import torch +import torch.nn as nn +import torch.nn.init as init +from torch.autograd import Function +import torch.nn.functional as F +from torch.autograd import Variable +import os +from layers import * +from data.config import cfg +import numpy as np +import torch.npu +class conv_bn(nn.Module): + """docstring for conv""" + + def __init__(self, + in_plane, + out_plane, + kernel_size, + stride, + padding): + super(conv_bn, self).__init__() + self.conv1 = nn.Conv2d(in_plane, out_plane, + kernel_size=kernel_size, stride=stride, padding=padding) + self.bn1 = nn.BatchNorm2d(out_plane) + + def forward(self, x): + x = self.conv1(x) + return self.bn1(x) + + + +class CPM(nn.Module): + """docstring for CPM""" + + def __init__(self, in_plane): + super(CPM, self).__init__() + self.branch1 = conv_bn(in_plane, 1024, 1, 1, 0) + self.branch2a = conv_bn(in_plane, 256, 1, 1, 0) + self.branch2b = conv_bn(256, 256, 3, 1, 1) + self.branch2c = conv_bn(256, 1024, 1, 1, 0) + + self.ssh_1 = nn.Conv2d(1024, 256, kernel_size=3, stride=1, padding=1) + self.ssh_dimred = nn.Conv2d( + 1024, 128, kernel_size=3, stride=1, padding=1) + self.ssh_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) + self.ssh_3a = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) + self.ssh_3b = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) + + def forward(self, x): + out_residual = self.branch1(x) + x = F.relu(self.branch2a(x), inplace=True) + x = F.relu(self.branch2b(x), inplace=True) + x = self.branch2c(x) + rescomb = F.relu(x + out_residual, inplace=True) + ssh1 = self.ssh_1(rescomb) + ssh_dimred = F.relu(self.ssh_dimred(rescomb), inplace=True) + ssh_2 = self.ssh_2(ssh_dimred) + ssh_3a = F.relu(self.ssh_3a(ssh_dimred), inplace=True) + ssh_3b = self.ssh_3b(ssh_3a) + + ssh_out = torch.cat([ssh1, ssh_2, ssh_3b], dim=1) + ssh_out = F.relu(ssh_out, inplace=True) + return ssh_out + + +class PyramidBox(nn.Module): + """docstring for PyramidBox""" + + def __init__(self, + phase, + base, + extras, + lfpn_cpm, + head, + num_classes): + super(PyramidBox, self).__init__() + + self.vgg = nn.ModuleList(base) + self.extras = nn.ModuleList(extras) + self.bn64 = nn.BatchNorm2d(64) + self.L2Norm3_3 = L2Norm(256, 10) + self.L2Norm4_3 = L2Norm(512, 8) + self.L2Norm5_3 = L2Norm(512, 5) + + self.lfpn_topdown = nn.ModuleList(lfpn_cpm[0]) + self.lfpn_later = nn.ModuleList(lfpn_cpm[1]) + self.cpm = nn.ModuleList(lfpn_cpm[2]) + + self.loc_layers = nn.ModuleList(head[0]) + self.conf_layers = nn.ModuleList(head[1]) + + + + self.is_infer = False + if phase == 'test': + self.softmax = nn.Softmax(dim=-1) + self.detect = Detect(cfg) + self.is_infer = True + + def _upsample_prod(self, x, y): + _, _, H, W = y.size() + return F.interpolate(x, size=(H, W), mode='bilinear') * y + + def forward(self, x): + + use_npu = False + size = x.size()[2:] + bn_index = 0 + for k in range(16): + x = self.vgg[k](x) + if isinstance(self.vgg[k], nn.Conv2d): + if k == 2: + x = self.bn64(x) + conv3_3 = x + for k in range(16, 23): + x = self.vgg[k](x) + conv4_3 = x + for k in range(23, 30): + x = self.vgg[k](x) + + conv5_3 = x + + for k in range(30, len(self.vgg)): + x = self.vgg[k](x) + + convfc_7 = x + + for k in range(2): + x = F.relu(self.extras[k](x), inplace=True)#.npu() + conv6_2 = x + + for k in range(2, 4): + x = F.relu(self.extras[k](x), inplace=True)#.npu() + + conv7_2 = x + + x = F.relu(self.lfpn_topdown[0](convfc_7), inplace=True) + lfpn2_on_conv5 = F.relu(self._upsample_prod( + x, self.lfpn_later[0](conv5_3)), inplace=True) + + x = F.relu(self.lfpn_topdown[1](lfpn2_on_conv5), inplace=True) + lfpn1_on_conv4 = F.relu(self._upsample_prod( + x, self.lfpn_later[1](conv4_3)), inplace=True) + + + x = F.relu(self.lfpn_topdown[2](lfpn1_on_conv4), inplace=True) + lfpn0_on_conv3 = F.relu(self._upsample_prod( + x, self.lfpn_later[2](conv3_3)), inplace=True) + l2norm3 = self.L2Norm3_3(lfpn0_on_conv3) + l2norm4 = self.L2Norm4_3(lfpn1_on_conv4) + l2norm5 = self.L2Norm5_3(lfpn2_on_conv5) + + ssh_conv3_norm = self.cpm[0](l2norm3) + ssh_conv4_norm = self.cpm[1](l2norm4) + ssh_conv5_norm = self.cpm[2](l2norm5) + + ssh_convfc7 = self.cpm[3](convfc_7) + ssh_conv6 = self.cpm[4](conv6_2) + ssh_conv7 = self.cpm[5](conv7_2) + face_locs, face_confs = [], [] + head_locs, head_confs = [], [] + + N = ssh_conv3_norm.size(0) + + + mbox_loc = self.loc_layers[0](ssh_conv3_norm) + + face_loc, head_loc = torch.chunk(mbox_loc, 2, dim=1) + + face_loc = face_loc.permute(0, 2, 3, 1).contiguous().view(N, -1, 4) + if not self.is_infer: + head_loc = head_loc.permute(0, 2, 3, 1).contiguous().view(N, -1, 4) + + mbox_conf = self.conf_layers[0](ssh_conv3_norm) + + face_conf1 = mbox_conf[:, 3:4, :, :] + face_conf3_maxin, _ = torch.max(mbox_conf[:, 0:3, :, :], dim=1, keepdim=True) + + face_conf = torch.cat((face_conf3_maxin, face_conf1), dim=1) + face_conf = face_conf.permute(0, 2, 3, 1).contiguous().view(N, -1, 2) + if not self.is_infer: + head_conf3_maxin, _ = torch.max(mbox_conf[:, 4:7, :, :], dim=1, keepdim=True) + head_conf1 = mbox_conf[:, 7:, :, :] + + head_conf = torch.cat((head_conf3_maxin, head_conf1), dim=1) + head_conf = head_conf.permute(0, 2, 3, 1).contiguous().view(N, -1, 2) + + face_locs.append(face_loc) + face_confs.append(face_conf) + if not self.is_infer: + head_locs.append(head_loc) + head_confs.append(head_conf) + + + inputs = [ssh_conv4_norm, ssh_conv5_norm, + ssh_convfc7, ssh_conv6, ssh_conv7] + + feature_maps = [] + feat_size = ssh_conv3_norm.size()[2:] + feature_maps.append([feat_size[0], feat_size[1]]) + + for i, feat in enumerate(inputs): + + feat_size = feat.size()[2:] + feature_maps.append([feat_size[0], feat_size[1]]) + mbox_loc = self.loc_layers[i + 1](feat) + + face_loc, head_loc = torch.chunk(mbox_loc, 2, dim=1) + + face_loc = face_loc.permute(0, 2, 3, 1).contiguous().view(N, -1, 4) + if not self.is_infer: + head_loc = head_loc.permute(0, 2, 3, 1).contiguous().view(N, -1, 4) + + mbox_conf = self.conf_layers[i + 1](feat) + + face_conf1 = mbox_conf[:, 0:1, :, :] + face_conf3_maxin, _ = torch.max( + mbox_conf[:, 1:4, :, :], dim=1, keepdim=True) + + face_conf = torch.cat((face_conf1, face_conf3_maxin), dim=1) + + face_conf = face_conf.permute( + 0, 2, 3, 1).contiguous().view(N, -1, 2) + + if not self.is_infer: + head_conf = mbox_conf[:, 4:, :, :].permute( + 0, 2, 3, 1).contiguous().view(N, -1, 2) + + face_locs.append(face_loc) + face_confs.append(face_conf) + + if not self.is_infer: + head_locs.append(head_loc) + head_confs.append(head_conf) + + face_mbox_loc = torch.cat(face_locs, dim=1) + face_mbox_conf = torch.cat(face_confs, dim=1) + + if not self.is_infer: + head_mbox_loc = torch.cat(head_locs, dim=1) + head_mbox_conf = torch.cat(head_confs, dim=1) + + + priors_boxes = PriorBox(size, feature_maps, cfg) + with torch.no_grad(): + priors = Variable(priors_boxes.forward()) + + if not self.is_infer: + output = (face_mbox_loc, face_mbox_conf, + head_mbox_loc, head_mbox_conf, priors) + else: + face_mbox_loc = face_mbox_loc.cpu() + face_mbox_conf = face_mbox_conf.cpu() + output = self.detect.forward(face_mbox_loc, + self.softmax(face_mbox_conf), + priors).cpu() + return output + + def load_weights(self, base_file): + other, ext = os.path.splitext(base_file) + if ext == '.pkl' or '.pth': + print('Loading weights into state dict...') + mdata = torch.load(base_file, + map_location=lambda storage, loc: storage) + weights = mdata['weight'] + epoch = mdata['epoch'] + self.load_state_dict(weights) + print('Finished!') + else: + print('Sorry only .pth and .pkl files supported.') + return epoch + + def xavier(self, param): + with torch.no_grad(): + init.xavier_uniform(param) + + def weights_init(self, m): + if isinstance(m, nn.Conv2d): + + self.xavier(m.weight) + if 'bias' in m.state_dict().keys(): + m.bias.data.zero_() + + if isinstance(m, nn.ConvTranspose2d): + self.xavier(m.weight.data) + if 'bias' in m.state_dict().keys(): + m.bias.data.zero_() + + if isinstance(m, nn.BatchNorm2d): + m.weight.data[...] = 1 + m.bias.data.zero_() + + +vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', + 512, 512, 512, 'M'] + +extras_cfg = [256, 'S', 512, 128, 'S', 256] + +lfpn_cpm_cfg = [256, 512, 512, 1024, 512, 256] + +multibox_cfg = [512, 512, 512, 512, 512, 512] + + +def vgg(cfg, i, batch_norm=False): + layers = [] + in_channels = i + for v in cfg: + if v == 'M': + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + elif v == 'C': + layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)] + else: + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) + if batch_norm: + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] + else: + layers += [conv2d, nn.ReLU(inplace=True)] + in_channels = v + conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6) + conv7 = nn.Conv2d(1024, 1024, kernel_size=1) + layers += [conv6, + nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)] + return layers + + +def add_extras(cfg, i, batch_norm=False): + # Extra layers added to VGG for feature scaling + layers = [] + in_channels = i + flag = False + for k, v in enumerate(cfg): + if in_channels != 'S': + if v == 'S': + layers += [nn.Conv2d(in_channels, cfg[k + 1], + kernel_size=(1, 3)[flag], stride=2, padding=1)] + else: + layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])] + flag = not flag + in_channels = v + return layers + + +def add_lfpn_cpm(cfg): + lfpn_topdown_layers = [] + lfpn_latlayer = [] + cpm_layers = [] + + for k, v in enumerate(cfg): + cpm_layers.append(CPM(v)) + + fpn_list = cfg[::-1][2:] + for k, v in enumerate(fpn_list[:-1]): + lfpn_latlayer.append(nn.Conv2d( + fpn_list[k + 1], fpn_list[k + 1], kernel_size=1, stride=1, padding=0)) + lfpn_topdown_layers.append(nn.Conv2d( + v, fpn_list[k + 1], kernel_size=1, stride=1, padding=0)) + + return (lfpn_topdown_layers, lfpn_latlayer, cpm_layers) + + +def multibox(vgg, extra_layers): + loc_layers = [] + conf_layers = [] + vgg_source = [21, 28, -2] + i = 0 + loc_layers += [nn.Conv2d(multibox_cfg[i], + 8, kernel_size=3, padding=1)] + conf_layers += [nn.Conv2d(multibox_cfg[i], + 8, kernel_size=3, padding=1)] + i += 1 + for k, v in enumerate(vgg_source): + loc_layers += [nn.Conv2d(multibox_cfg[i], + 8, kernel_size=3, padding=1)] + conf_layers += [nn.Conv2d(multibox_cfg[i], + 6, kernel_size=3, padding=1)] + i += 1 + for k, v in enumerate(extra_layers[1::2], 2): + loc_layers += [nn.Conv2d(multibox_cfg[i], + 8, kernel_size=3, padding=1)] + conf_layers += [nn.Conv2d(multibox_cfg[i], + 6, kernel_size=3, padding=1)] + i += 1 + return vgg, extra_layers, (loc_layers, conf_layers) + + +def build_net(phase, num_classes=2): + base_, extras_, head_ = multibox( + vgg(vgg_cfg, 3), add_extras((extras_cfg), 1024)) + lfpn_cpm = add_lfpn_cpm(lfpn_cpm_cfg) + return PyramidBox(phase, base_, extras_, lfpn_cpm, head_, num_classes) + + +if __name__ == '__main__': + inputs = Variable(torch.randn(1, 3, 640, 640)) + net = build_net('train', num_classes=2) + print(net) + out = net(inputs) diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/requirements.txt b/PyTorch/contrib/cv/detection/Pyramidbox/requirements.txt index 570522f1dbe893f9ea9b5b92d490be7e6729a1e4..cc42ce9b3fe7d10a57d89ff98154f33342fe4263 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/requirements.txt +++ b/PyTorch/contrib/cv/detection/Pyramidbox/requirements.txt @@ -1,26 +1,26 @@ -apex 0.1+ascend.20210825 -backcall -certifi -Cython -easydict -ipython -matplotlib-inline -mpmath -numpy -opencv-python -parso -pexpect -pickleshare -Pillow -scipy -six -sympy -te -tensor-fused-plugin 0.1+ascend -topi 0.4.0 -torch 1.5.0+ascend.post3.20210825 -torchvision 0.6.0a0+b68adcf -tqdm -traitlets -urllib3 +apex 0.1+ascend.20210825 +backcall +certifi +Cython +easydict +ipython +matplotlib-inline +mpmath +numpy +opencv-python +parso +pexpect +pickleshare +Pillow +scipy +six +sympy +te +tensor-fused-plugin 0.1+ascend +topi 0.4.0 +torch 1.5.0+ascend.post3.20210825 +torchvision 0.6.0a0+b68adcf +tqdm +traitlets +urllib3 wcwidth \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/tools/afw_test.py b/PyTorch/contrib/cv/detection/Pyramidbox/tools/afw_test.py index 0402237e25bf5fd714f3775b32170ac40db0b69c..291c9342b9cf43472b2b98cf0fc196cc5adf7fd1 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/tools/afw_test.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/tools/afw_test.py @@ -1,140 +1,140 @@ -#-*- coding:utf-8 -*- -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - -import os -import torch -import argparse -import torch.nn as nn -import torch.utils.data as data -import torch.backends.cudnn as cudnn -import torchvision.transforms as transforms -import os.path as osp - -import cv2 -import time -import numpy as np -from PIL import Image - -from data.config import cfg -from pyramidbox import build_net -from torch.autograd import Variable -from utils.augmentations import to_chw_bgr - - -parser = argparse.ArgumentParser(description='pyramidbox evaluatuon afw') -parser.add_argument('--model', - type=str,default='weights/pyramidbox.pth', help='trained model') -parser.add_argument('--thresh', - default=0.1, type=float, - help='Final confidence threshold') -args = parser.parse_args() - -use_cuda = torch.cuda.is_available() - -if use_cuda: - torch.set_default_tensor_type('torch.cuda.FloatTensor') -else: - torch.set_default_tensor_type('torch.FloatTensor') - -AFW_IMG_DIR = os.path.join(cfg.FACE.AFW_DIR, 'images') -AFW_RESULT_DIR = os.path.join(cfg.FACE.AFW_DIR, 'pyramidbox') -AFW_RESULT_IMG_DIR = os.path.join(AFW_RESULT_DIR, 'images') - -if not os.path.exists(AFW_RESULT_IMG_DIR): - os.makedirs(AFW_RESULT_IMG_DIR) - - -def detect_face(net, img, thresh): - height, width, _ = img.shape - im_shrink = 640.0 / max(height, width) - image = cv2.resize(img, None, None, fx=im_shrink, - fy=im_shrink, interpolation=cv2.INTER_LINEAR).copy() - - x = to_chw_bgr(image) - x = x.astype('float32') - x -= cfg.img_mean - x = x[[2, 1, 0], :, :] - - x = Variable(torch.from_numpy(x).unsqueeze(0)) - if use_cuda: - x = x.cuda() - - y = net(x) - detections = y.data - scale = torch.Tensor([img.shape[1], img.shape[0], - img.shape[1], img.shape[0]]) - - bboxes = [] - for i in range(detections.size(1)): - j = 0 - while detections[0, i, j, 0] >= thresh: - box = [] - score = detections[0, i, j, 0] - pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(np.int) - j += 1 - box += [pt[0], pt[1], pt[2], pt[3], score] - box[1] += 0.2 * (box[3] - box[1] + 1) - bboxes += [box] - - return bboxes - - -if __name__ == '__main__': - net = build_net('test', cfg.NUM_CLASSES) - net.load_state_dict(torch.load(args.model)) - net.eval() - - if use_cuda: - net.cuda() - cudnn.benckmark = True - - #transform = S3FDBasicTransform(cfg.INPUT_SIZE, cfg.MEANS) - - counter = 0 - txt_out = os.path.join(AFW_RESULT_DIR, 'pyramidbox_dets.txt') - txt_in = os.path.join('./tools/afw_img_list.txt') - - fout = open(txt_out, 'w') - fin = open(txt_in, 'r') - - for line in fin.readlines(): - line = line.strip() - img_file = os.path.join(AFW_IMG_DIR, line + '.jpg') - out_file = os.path.join(AFW_RESULT_IMG_DIR, line + '.jpg') - counter += 1 - t1 = time.time() - #img = cv2.imread(img_file, cv2.IMREAD_COLOR) - img = Image.open(img_file) - if img.mode == 'L': - img = img.convert('RGB') - img = np.array(img) - bboxes = detect_face(net, img, args.thresh) - t2 = time.time() - print('Detect %04d th image costs %.4f' % (counter, t2 - t1)) - for bbox in bboxes: - x1, y1, x2, y2, score = bbox - fout.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format( - line, score, x1, y1, x2, y2)) - for bbox in bboxes: - x1, y1, x2, y2, score = bbox - x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) - cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) - cv2.imwrite(out_file, img) - - fout.close() - fin.close() +#-*- coding:utf-8 -*- +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +import os +import torch +import argparse +import torch.nn as nn +import torch.utils.data as data +import torch.backends.cudnn as cudnn +import torchvision.transforms as transforms +import os.path as osp + +import cv2 +import time +import numpy as np +from PIL import Image + +from data.config import cfg +from pyramidbox import build_net +from torch.autograd import Variable +from utils.augmentations import to_chw_bgr + + +parser = argparse.ArgumentParser(description='pyramidbox evaluatuon afw') +parser.add_argument('--model', + type=str,default='weights/pyramidbox.pth', help='trained model') +parser.add_argument('--thresh', + default=0.1, type=float, + help='Final confidence threshold') +args = parser.parse_args() + +use_cuda = torch.cuda.is_available() + +if use_cuda: + torch.set_default_tensor_type('torch.cuda.FloatTensor') +else: + torch.set_default_tensor_type('torch.FloatTensor') + +AFW_IMG_DIR = os.path.join(cfg.FACE.AFW_DIR, 'images') +AFW_RESULT_DIR = os.path.join(cfg.FACE.AFW_DIR, 'pyramidbox') +AFW_RESULT_IMG_DIR = os.path.join(AFW_RESULT_DIR, 'images') + +if not os.path.exists(AFW_RESULT_IMG_DIR): + os.makedirs(AFW_RESULT_IMG_DIR) + + +def detect_face(net, img, thresh): + height, width, _ = img.shape + im_shrink = 640.0 / max(height, width) + image = cv2.resize(img, None, None, fx=im_shrink, + fy=im_shrink, interpolation=cv2.INTER_LINEAR).copy() + + x = to_chw_bgr(image) + x = x.astype('float32') + x -= cfg.img_mean + x = x[[2, 1, 0], :, :] + + x = Variable(torch.from_numpy(x).unsqueeze(0)) + if use_cuda: + x = x.cuda() + + y = net(x) + detections = y.data + scale = torch.Tensor([img.shape[1], img.shape[0], + img.shape[1], img.shape[0]]) + + bboxes = [] + for i in range(detections.size(1)): + j = 0 + while detections[0, i, j, 0] >= thresh: + box = [] + score = detections[0, i, j, 0] + pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(np.int) + j += 1 + box += [pt[0], pt[1], pt[2], pt[3], score] + box[1] += 0.2 * (box[3] - box[1] + 1) + bboxes += [box] + + return bboxes + + +if __name__ == '__main__': + net = build_net('test', cfg.NUM_CLASSES) + net.load_state_dict(torch.load(args.model)) + net.eval() + + if use_cuda: + net.cuda() + cudnn.benckmark = True + + #transform = S3FDBasicTransform(cfg.INPUT_SIZE, cfg.MEANS) + + counter = 0 + txt_out = os.path.join(AFW_RESULT_DIR, 'pyramidbox_dets.txt') + txt_in = os.path.join('./tools/afw_img_list.txt') + + fout = open(txt_out, 'w') + fin = open(txt_in, 'r') + + for line in fin.readlines(): + line = line.strip() + img_file = os.path.join(AFW_IMG_DIR, line + '.jpg') + out_file = os.path.join(AFW_RESULT_IMG_DIR, line + '.jpg') + counter += 1 + t1 = time.time() + #img = cv2.imread(img_file, cv2.IMREAD_COLOR) + img = Image.open(img_file) + if img.mode == 'L': + img = img.convert('RGB') + img = np.array(img) + bboxes = detect_face(net, img, args.thresh) + t2 = time.time() + print('Detect %04d th image costs %.4f' % (counter, t2 - t1)) + for bbox in bboxes: + x1, y1, x2, y2, score = bbox + fout.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format( + line, score, x1, y1, x2, y2)) + for bbox in bboxes: + x1, y1, x2, y2, score = bbox + x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) + cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) + cv2.imwrite(out_file, img) + + fout.close() + fin.close() diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/tools/fddb_test.py b/PyTorch/contrib/cv/detection/Pyramidbox/tools/fddb_test.py index 4958a67effa71bea4f164cfca2c66d39b7382901..1fede8bd71a2a02b6f23b97ae4b5d28f2a434299 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/tools/fddb_test.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/tools/fddb_test.py @@ -1,156 +1,156 @@ -#-*- coding:utf-8 -*- -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -import torch -import argparse -import torch.nn as nn -import torch.utils.data as data -import torch.backends.cudnn as cudnn -import torchvision.transforms as transforms - -import cv2 -import time -import numpy as np -from PIL import Image - -from data.config import cfg -from pyramidbox import build_net -from torch.autograd import Variable -from utils.augmentations import to_chw_bgr - -parser = argparse.ArgumentParser(description='pyramidbox evaluatuon fddb') -parser.add_argument('--model', - type=str, - default='weights/pyramidbox.pth', help='trained model') -parser.add_argument('--thresh', - default=0.1, type=float, - help='Final confidence threshold') -args = parser.parse_args() - - -use_cuda = torch.cuda.is_available() - -if use_cuda: - torch.set_default_tensor_type('torch.cuda.FloatTensor') -else: - torch.set_default_tensor_type('torch.FloatTensor') - - -FDDB_IMG_DIR = os.path.join(cfg.FACE.FDDB_DIR, 'images') -FDDB_FOLD_DIR = os.path.join(cfg.FACE.FDDB_DIR, 'FDDB-folds') -FDDB_RESULT_DIR = os.path.join(cfg.FACE.FDDB_DIR, 'pyramidbox') -FDDB_RESULT_IMG_DIR = os.path.join(FDDB_RESULT_DIR, 'images') - -if not os.path.exists(FDDB_RESULT_IMG_DIR): - os.makedirs(FDDB_RESULT_IMG_DIR) - - -def detect_face(net, img, thresh): - height, width, _ = img.shape - x = to_chw_bgr(img) - x = x.astype('float32') - x -= cfg.img_mean - x = x[[2, 1, 0], :, :] - - x = Variable(torch.from_numpy(x).unsqueeze(0)) - if use_cuda: - x = x.cuda() - - y = net(x) - detections = y.data - scale = torch.Tensor([img.shape[1], img.shape[0], - img.shape[1], img.shape[0]]) - - bboxes = [] - for i in range(detections.size(1)): - j = 0 - while detections[0, i, j, 0] >= thresh: - box = [] - score = detections[0, i, j, 0] - pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(np.int) - j += 1 - box += [pt[0], pt[1], pt[2] - pt[0], pt[3] - pt[1], score] - bboxes += [box] - - return bboxes - - -if __name__ == '__main__': - net = build_net('test', cfg.NUM_CLASSES) - net.load_state_dict(torch.load(args.model)) - net.eval() - - if use_cuda: - net.cuda() - cudnn.benckmark = True - - #transform = S3FDBasicTransform(cfg.INPUT_SIZE, cfg.MEANS) - - counter = 0 - - for i in range(10): - txt_in = os.path.join(FDDB_FOLD_DIR, 'FDDB-fold-%02d.txt' % (i + 1)) - txt_out = os.path.join(FDDB_RESULT_DIR, 'fold-%02d-out.txt' % (i + 1)) - answer_in = os.path.join( - FDDB_FOLD_DIR, 'FDDB-fold-%02d-ellipseList.txt' % (i + 1)) - with open(txt_in, 'r') as fr: - lines = fr.readlines() - fout = open(txt_out, 'w') - ain = open(answer_in, 'r') - for line in lines: - line = line.strip() - img_file = os.path.join(FDDB_IMG_DIR, line + '.jpg') - out_file = os.path.join( - FDDB_RESULT_IMG_DIR, line.replace('/', '_') + '.jpg') - counter += 1 - t1 = time.time() - #img = cv2.imread(img_file, cv2.IMREAD_COLOR) - img = Image.open(img_file) - if img.mode == 'L': - img = img.convert('RGB') - img = np.array(img) - bboxes = detect_face(net, img, args.thresh) - t2 = time.time() - print('Detect %04d th image costs %.4f' % (counter, t2 - t1)) - fout.write('%s\n' % line) - fout.write('%d\n' % len(bboxes)) - for bbox in bboxes: - x1, y1, w, h, score = bbox - fout.write('%d %d %d %d %lf\n' % (x1, y1, w, h, score)) - ain.readline() - n = int(ain.readline().strip()) - for i in range(n): - line = ain.readline().strip() - line_data = [float(_) for _ in line.split(' ')[:5]] - major_axis_radius, minor_axis_radius, angle, center_x, center_y = line_data - angle = angle / 3.1415926 * 180. - center_x, center_y = int(center_x), int(center_y) - major_axis_radius, minor_axis_radius = int( - major_axis_radius), int(minor_axis_radius) - cv2.ellipse(img, (center_x, center_y), (major_axis_radius, - minor_axis_radius), angle, 0, 360, (255, 0, 0), 2) - - for bbox in bboxes: - x1, y1, w, h, score = bbox - x1, y1, x2, y2 = int(x1), int(y1), int(x1 + w), int(y1 + h) - cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) - cv2.imwrite(out_file, img) - fout.close() - ain.close() +#-*- coding:utf-8 -*- +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +import os +import sys +import torch +import argparse +import torch.nn as nn +import torch.utils.data as data +import torch.backends.cudnn as cudnn +import torchvision.transforms as transforms + +import cv2 +import time +import numpy as np +from PIL import Image + +from data.config import cfg +from pyramidbox import build_net +from torch.autograd import Variable +from utils.augmentations import to_chw_bgr + +parser = argparse.ArgumentParser(description='pyramidbox evaluatuon fddb') +parser.add_argument('--model', + type=str, + default='weights/pyramidbox.pth', help='trained model') +parser.add_argument('--thresh', + default=0.1, type=float, + help='Final confidence threshold') +args = parser.parse_args() + + +use_cuda = torch.cuda.is_available() + +if use_cuda: + torch.set_default_tensor_type('torch.cuda.FloatTensor') +else: + torch.set_default_tensor_type('torch.FloatTensor') + + +FDDB_IMG_DIR = os.path.join(cfg.FACE.FDDB_DIR, 'images') +FDDB_FOLD_DIR = os.path.join(cfg.FACE.FDDB_DIR, 'FDDB-folds') +FDDB_RESULT_DIR = os.path.join(cfg.FACE.FDDB_DIR, 'pyramidbox') +FDDB_RESULT_IMG_DIR = os.path.join(FDDB_RESULT_DIR, 'images') + +if not os.path.exists(FDDB_RESULT_IMG_DIR): + os.makedirs(FDDB_RESULT_IMG_DIR) + + +def detect_face(net, img, thresh): + height, width, _ = img.shape + x = to_chw_bgr(img) + x = x.astype('float32') + x -= cfg.img_mean + x = x[[2, 1, 0], :, :] + + x = Variable(torch.from_numpy(x).unsqueeze(0)) + if use_cuda: + x = x.cuda() + + y = net(x) + detections = y.data + scale = torch.Tensor([img.shape[1], img.shape[0], + img.shape[1], img.shape[0]]) + + bboxes = [] + for i in range(detections.size(1)): + j = 0 + while detections[0, i, j, 0] >= thresh: + box = [] + score = detections[0, i, j, 0] + pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(np.int) + j += 1 + box += [pt[0], pt[1], pt[2] - pt[0], pt[3] - pt[1], score] + bboxes += [box] + + return bboxes + + +if __name__ == '__main__': + net = build_net('test', cfg.NUM_CLASSES) + net.load_state_dict(torch.load(args.model)) + net.eval() + + if use_cuda: + net.cuda() + cudnn.benckmark = True + + #transform = S3FDBasicTransform(cfg.INPUT_SIZE, cfg.MEANS) + + counter = 0 + + for i in range(10): + txt_in = os.path.join(FDDB_FOLD_DIR, 'FDDB-fold-%02d.txt' % (i + 1)) + txt_out = os.path.join(FDDB_RESULT_DIR, 'fold-%02d-out.txt' % (i + 1)) + answer_in = os.path.join( + FDDB_FOLD_DIR, 'FDDB-fold-%02d-ellipseList.txt' % (i + 1)) + with open(txt_in, 'r') as fr: + lines = fr.readlines() + fout = open(txt_out, 'w') + ain = open(answer_in, 'r') + for line in lines: + line = line.strip() + img_file = os.path.join(FDDB_IMG_DIR, line + '.jpg') + out_file = os.path.join( + FDDB_RESULT_IMG_DIR, line.replace('/', '_') + '.jpg') + counter += 1 + t1 = time.time() + #img = cv2.imread(img_file, cv2.IMREAD_COLOR) + img = Image.open(img_file) + if img.mode == 'L': + img = img.convert('RGB') + img = np.array(img) + bboxes = detect_face(net, img, args.thresh) + t2 = time.time() + print('Detect %04d th image costs %.4f' % (counter, t2 - t1)) + fout.write('%s\n' % line) + fout.write('%d\n' % len(bboxes)) + for bbox in bboxes: + x1, y1, w, h, score = bbox + fout.write('%d %d %d %d %lf\n' % (x1, y1, w, h, score)) + ain.readline() + n = int(ain.readline().strip()) + for i in range(n): + line = ain.readline().strip() + line_data = [float(_) for _ in line.split(' ')[:5]] + major_axis_radius, minor_axis_radius, angle, center_x, center_y = line_data + angle = angle / 3.1415926 * 180. + center_x, center_y = int(center_x), int(center_y) + major_axis_radius, minor_axis_radius = int( + major_axis_radius), int(minor_axis_radius) + cv2.ellipse(img, (center_x, center_y), (major_axis_radius, + minor_axis_radius), angle, 0, 360, (255, 0, 0), 2) + + for bbox in bboxes: + x1, y1, w, h, score = bbox + x1, y1, x2, y2 = int(x1), int(y1), int(x1 + w), int(y1 + h) + cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) + cv2.imwrite(out_file, img) + fout.close() + ain.close() diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/tools/pascal_test.py b/PyTorch/contrib/cv/detection/Pyramidbox/tools/pascal_test.py index c8f6a5429b4a58c0d775d18733609f9123d964c9..0a2f7b999add55d923f00e9f0079cdd4ae65c3ec 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/tools/pascal_test.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/tools/pascal_test.py @@ -1,140 +1,140 @@ -#-*- coding:utf-8 -*- -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - -import os -import torch -import argparse -import torch.nn as nn -import torch.utils.data as data -import torch.backends.cudnn as cudnn -import torchvision.transforms as transforms -import os.path as osp - -import cv2 -import time -import numpy as np -from PIL import Image - -from data.config import cfg -from pyramidbox import build_net -from torch.autograd import Variable -from utils.augmentations import to_chw_bgr - -parser = argparse.ArgumentParser(description='pyramidbox evaluatuon pascal') -parser.add_argument('--model', - type=str,default='weights/pyramidbox.pth', - help='trained model') -parser.add_argument('--thresh', - default=0.1, type=float, - help='Final confidence threshold') -args = parser.parse_args() - -use_cuda = torch.cuda.is_available() - -if use_cuda: - torch.set_default_tensor_type('torch.cuda.FloatTensor') -else: - torch.set_default_tensor_type('torch.FloatTensor') - -PASCAL_IMG_DIR = os.path.join(cfg.FACE.PASCAL_DIR, 'images') -PASCAL_RESULT_DIR = os.path.join(cfg.FACE.PASCAL_DIR, 'pyramidbox') -PASCAL_RESULT_IMG_DIR = os.path.join(PASCAL_RESULT_DIR, 'images') - -if not os.path.exists(PASCAL_RESULT_IMG_DIR): - os.makedirs(PASCAL_RESULT_IMG_DIR) - - -def detect_face(net, img, thresh): - height, width, _ = img.shape - im_shrink = 640.0 / max(height, width) - image = cv2.resize(img, None, None, fx=im_shrink, - fy=im_shrink, interpolation=cv2.INTER_LINEAR).copy() - - x = to_chw_bgr(image) - x = x.astype('float32') - x -= cfg.img_mean - x = x[[2, 1, 0], :, :] - - x = Variable(torch.from_numpy(x).unsqueeze(0)) - if use_cuda: - x = x.cuda() - - y = net(x) - detections = y.data - scale = torch.Tensor([img.shape[1], img.shape[0], - img.shape[1], img.shape[0]]) - - bboxes = [] - for i in range(detections.size(1)): - j = 0 - while detections[0, i, j, 0] >= thresh: - box = [] - score = detections[0, i, j, 0] - pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(np.int) - j += 1 - box += [pt[0], pt[1], pt[2], pt[3], score] - box[1] += 0.2 * (box[3] - box[1] + 1) - bboxes += [box] - - return bboxes - - -if __name__ == '__main__': - net = build_net('test', cfg.NUM_CLASSES) - net.load_state_dict(torch.load(args.model)) - net.eval() - - if use_cuda: - net.cuda() - cudnn.benckmark = True - - #transform = S3FDBasicTransform(cfg.INPUT_SIZE, cfg.MEANS) - - counter = 0 - txt_out = os.path.join(PASCAL_RESULT_DIR, 'pyramidbox_dets.txt') - txt_in = os.path.join('./tools/pascal_img_list.txt') - - fout = open(txt_out, 'w') - fin = open(txt_in, 'r') - - for line in fin.readlines(): - line = line.strip() - img_file = os.path.join(PASCAL_IMG_DIR, line) - out_file = os.path.join(PASCAL_RESULT_IMG_DIR, line) - counter += 1 - t1 = time.time() - #img = cv2.imread(img_file, cv2.IMREAD_COLOR) - img = Image.open(img_file) - if img.mode == 'L': - img = img.convert('RGB') - img = np.array(img) - bboxes = detect_face(net, img, args.thresh) - t2 = time.time() - print('Detect %04d th image costs %.4f' % (counter, t2 - t1)) - for bbox in bboxes: - x1, y1, x2, y2, score = bbox - fout.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format( - line, score, x1, y1, x2, y2)) - for bbox in bboxes: - x1, y1, x2, y2, score = bbox - x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) - cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) - cv2.imwrite(out_file, img) - - fout.close() - fin.close() +#-*- coding:utf-8 -*- +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +import os +import torch +import argparse +import torch.nn as nn +import torch.utils.data as data +import torch.backends.cudnn as cudnn +import torchvision.transforms as transforms +import os.path as osp + +import cv2 +import time +import numpy as np +from PIL import Image + +from data.config import cfg +from pyramidbox import build_net +from torch.autograd import Variable +from utils.augmentations import to_chw_bgr + +parser = argparse.ArgumentParser(description='pyramidbox evaluatuon pascal') +parser.add_argument('--model', + type=str,default='weights/pyramidbox.pth', + help='trained model') +parser.add_argument('--thresh', + default=0.1, type=float, + help='Final confidence threshold') +args = parser.parse_args() + +use_cuda = torch.cuda.is_available() + +if use_cuda: + torch.set_default_tensor_type('torch.cuda.FloatTensor') +else: + torch.set_default_tensor_type('torch.FloatTensor') + +PASCAL_IMG_DIR = os.path.join(cfg.FACE.PASCAL_DIR, 'images') +PASCAL_RESULT_DIR = os.path.join(cfg.FACE.PASCAL_DIR, 'pyramidbox') +PASCAL_RESULT_IMG_DIR = os.path.join(PASCAL_RESULT_DIR, 'images') + +if not os.path.exists(PASCAL_RESULT_IMG_DIR): + os.makedirs(PASCAL_RESULT_IMG_DIR) + + +def detect_face(net, img, thresh): + height, width, _ = img.shape + im_shrink = 640.0 / max(height, width) + image = cv2.resize(img, None, None, fx=im_shrink, + fy=im_shrink, interpolation=cv2.INTER_LINEAR).copy() + + x = to_chw_bgr(image) + x = x.astype('float32') + x -= cfg.img_mean + x = x[[2, 1, 0], :, :] + + x = Variable(torch.from_numpy(x).unsqueeze(0)) + if use_cuda: + x = x.cuda() + + y = net(x) + detections = y.data + scale = torch.Tensor([img.shape[1], img.shape[0], + img.shape[1], img.shape[0]]) + + bboxes = [] + for i in range(detections.size(1)): + j = 0 + while detections[0, i, j, 0] >= thresh: + box = [] + score = detections[0, i, j, 0] + pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(np.int) + j += 1 + box += [pt[0], pt[1], pt[2], pt[3], score] + box[1] += 0.2 * (box[3] - box[1] + 1) + bboxes += [box] + + return bboxes + + +if __name__ == '__main__': + net = build_net('test', cfg.NUM_CLASSES) + net.load_state_dict(torch.load(args.model)) + net.eval() + + if use_cuda: + net.cuda() + cudnn.benckmark = True + + #transform = S3FDBasicTransform(cfg.INPUT_SIZE, cfg.MEANS) + + counter = 0 + txt_out = os.path.join(PASCAL_RESULT_DIR, 'pyramidbox_dets.txt') + txt_in = os.path.join('./tools/pascal_img_list.txt') + + fout = open(txt_out, 'w') + fin = open(txt_in, 'r') + + for line in fin.readlines(): + line = line.strip() + img_file = os.path.join(PASCAL_IMG_DIR, line) + out_file = os.path.join(PASCAL_RESULT_IMG_DIR, line) + counter += 1 + t1 = time.time() + #img = cv2.imread(img_file, cv2.IMREAD_COLOR) + img = Image.open(img_file) + if img.mode == 'L': + img = img.convert('RGB') + img = np.array(img) + bboxes = detect_face(net, img, args.thresh) + t2 = time.time() + print('Detect %04d th image costs %.4f' % (counter, t2 - t1)) + for bbox in bboxes: + x1, y1, x2, y2, score = bbox + fout.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format( + line, score, x1, y1, x2, y2)) + for bbox in bboxes: + x1, y1, x2, y2, score = bbox + x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) + cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) + cv2.imwrite(out_file, img) + + fout.close() + fin.close() diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/tools/wider_test.py b/PyTorch/contrib/cv/detection/Pyramidbox/tools/wider_test.py index d79f4c8e0864f3bd3b17bc236b327c0c1d998eba..942d87bb7048de7b5900000de63ea918f2dac238 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/tools/wider_test.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/tools/wider_test.py @@ -1,283 +1,283 @@ -#-*- coding:utf-8 -*- -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - -import os -import torch -import argparse -import torch.nn as nn -import torch.utils.data as data -import torch.backends.cudnn as cudnn -import torchvision.transforms as transforms -import os.path as osp -import time -import cv2 -import time -import numpy as np -from PIL import Image -import scipy.io as sio - -import sys -#sys.path.append("/home/wch/Pyramidbox/") -from data.config import cfg -from pyramidbox import build_net -from torch.autograd import Variable -from utils.augmentations import to_chw_bgr -import torch.npu - -parser = argparse.ArgumentParser(description='pyramidbox evaluatuon wider') -parser.add_argument('--model', type=str, - default="weights/pyramidbox.pth", help='trained model') -parser.add_argument('--thresh', default=0.05, type=float, - help='Final confidence threshold') -parser.add_argument('--data_path', - default=None, type=str, - help='data_path') -args = parser.parse_args() - - -use_npu = torch.npu.is_available() - -def detect_face(net, img, shrink): - if shrink != 1: - img = cv2.resize(img, None, None, fx=shrink, fy=shrink, - interpolation=cv2.INTER_LINEAR) - - x = to_chw_bgr(img) - x = x.astype('float32') - x -= cfg.img_mean - x = x[[2, 1, 0], :, :] - image_x = x.shape[1] - image_y = x.shape[2] - if image_x <2500 and image_y<2500: - x0 = torch.Tensor(x[0]) - x1 = torch.Tensor(x[1]) - x2 = torch.Tensor(x[2]) - pad = nn.ZeroPad2d(padding=(0,2500-image_y,0,2500-image_x)) - x0 = pad(x0) - x1 = pad(x1) - x2 = pad(x2) - x0 =np.array(x0) - x1 =np.array(x1) - x2 =np.array(x2) - x = np.array([x0,x1,x2]) - x = Variable(torch.from_numpy(x).unsqueeze(0)) - - if use_npu: - x = x.npu() - y = net(x) - detections = y.data - detections = detections.cpu().numpy() - - det_conf = detections[0, 1, :, 0] - if image_x < 2500 and image_y < 2500: - det_xmin = 2500 * detections[0, 1, :, 1] / shrink - det_ymin = 2500 * detections[0, 1, :, 2] / shrink - det_xmax = 2500 * detections[0, 1, :, 3] / shrink - det_ymax = 2500 * detections[0, 1, :, 4] / shrink - else: - det_xmin = img.shape[1] * detections[0, 1, :, 1] / shrink - det_ymin = img.shape[0] * detections[0, 1, :, 2] / shrink - det_xmax = img.shape[1] * detections[0, 1, :, 3] / shrink - det_ymax = img.shape[0] * detections[0, 1, :, 4] / shrink - det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf)) - - keep_index = np.where(det[:, 4] >= args.thresh)[0] - det = det[keep_index, :] - return det - - -def flip_test(net, image, shrink): - image_f = cv2.flip(image, 1) - det_f = detect_face(net, image_f, shrink) - det_t = np.zeros(det_f.shape) - det_t[:, 0] = image.shape[1] - det_f[:, 2] - det_t[:, 1] = det_f[:, 1] - det_t[:, 2] = image.shape[1] - det_f[:, 0] - det_t[:, 3] = det_f[:, 3] - det_t[:, 4] = det_f[:, 4] - return det_t - - -def multi_scale_test(net, image, max_im_shrink): - # shrink detecting and shrink only detect big face - st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink - det_s = detect_face(net, image, st) - index = np.where(np.maximum( - det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0] - det_s = det_s[index, :] - - # enlarge one times - bt = min(2, max_im_shrink) if max_im_shrink > 1 else ( - st + max_im_shrink) / 2 - det_b = detect_face(net, image, bt) - - # enlarge small image x times for small face - if max_im_shrink > 2: - bt *= 2 - while bt < max_im_shrink: - det_b = np.row_stack((det_b, detect_face(net, image, bt))) - bt *= 2 - det_b = np.row_stack((det_b, detect_face(net, image, max_im_shrink))) - - # enlarge only detect small face - if bt > 1: - index = np.where(np.minimum( - det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0] - det_b = det_b[index, :] - else: - index = np.where(np.maximum( - det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0] - det_b = det_b[index, :] - - return det_s, det_b - - -def bbox_vote(det): - order = det[:, 4].ravel().argsort()[::-1] - det = det[order, :] - while det.shape[0] > 0: - # IOU - area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) - xx1 = np.maximum(det[0, 0], det[:, 0]) - yy1 = np.maximum(det[0, 1], det[:, 1]) - xx2 = np.minimum(det[0, 2], det[:, 2]) - yy2 = np.minimum(det[0, 3], det[:, 3]) - w = np.maximum(0.0, xx2 - xx1 + 1) - h = np.maximum(0.0, yy2 - yy1 + 1) - inter = w * h - o = inter / (area[0] + area[:] - inter) - - # get needed merge det and delete these det - merge_index = np.where(o >= 0.3)[0] - det_accu = det[merge_index, :] - det = np.delete(det, merge_index, 0) - - if merge_index.shape[0] <= 1: - continue - det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) - max_score = np.max(det_accu[:, 4]) - det_accu_sum = np.zeros((1, 5)) - det_accu_sum[:, 0:4] = np.sum( - det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) - det_accu_sum[:, 4] = max_score - try: - dets = np.row_stack((dets, det_accu_sum)) - except: - dets = det_accu_sum - - try: - dets = dets[0:750, :] - except: - dets = det - - return dets - - -def get_data(): - subset = 'val' - if subset is 'val': - wider_face = sio.loadmat(args.data_path+'wider_face_split/wider_face_val.mat') - else: - wider_face = sio.loadmat(args.data_path+'wider_face_split/wider_face_test.mat') - event_list = wider_face['event_list'] - file_list = wider_face['file_list'] - del wider_face - - imgs_path = os.path.join( - args.data_path, 'WIDER_{}'.format(subset), 'images') - save_path = 'output/pyramidbox1_{}'.format(subset) - - return event_list, file_list, imgs_path, save_path - -if __name__ == '__main__': - event_list, file_list, imgs_path, save_path = get_data() - cfg.USE_NMS = False - net = build_net('test', cfg.NUM_CLASSES) - net.load_state_dict(torch.load(args.model,map_location='cpu')) - - - net.eval() - - if use_npu: - net.npu() - cudnn.benckmark = True - - counter = 0 - print('start in ........') - for index, event in enumerate(event_list): - filelist = file_list[index][0] - path = os.path.join(save_path, str(event[0][0])) - if not os.path.exists(path): - os.makedirs(path) - - for num, file in enumerate(filelist): - im_name = file[0][0] - in_file = os.path.join(imgs_path, event[0][0], str(im_name[:]) + '.jpg') - img = Image.open(in_file) - if img.mode == 'L': - img = img.convert('RGB') - img = np.array(img) - max_im_shrink = np.sqrt( - 1700 * 1000 / (img.shape[0] * img.shape[1])) - - shrink = max_im_shrink if max_im_shrink < 1 else 1 - counter += 1 - - t1 = time.time() - det0 = detect_face(net, img, shrink) - det1 = flip_test(net, img, shrink) # flip test - [det2, det3] = multi_scale_test(net, img, max_im_shrink) - det = np.row_stack((det0, det1, det2, det3)) - if det.shape[0] ==1: - dets =det - else: - dets = bbox_vote(det) - - t2 = time.time() - print('Detect %04d th image costs %.4f' % (counter, t2 - t1)) - - fout = open(osp.join(save_path, str(event[0][ - 0]), im_name + '.txt'), 'w') - fout.write('{:s}\n'.format(str(event[0][0]) + '/' + im_name + '.jpg')) - fout.write('{:d}\n'.format(dets.shape[0])) - for i in range(dets.shape[0]): - xmin = dets[i][0] - ymin = dets[i][1] - xmax = dets[i][2] - ymax = dets[i][3] - score = dets[i][4] - fout.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'. - format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score)) +#-*- coding:utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +import os +import torch +import argparse +import torch.nn as nn +import torch.utils.data as data +import torch.backends.cudnn as cudnn +import torchvision.transforms as transforms +import os.path as osp +import time +import cv2 +import time +import numpy as np +from PIL import Image +import scipy.io as sio + +import sys +#sys.path.append("/home/wch/Pyramidbox/") +from data.config import cfg +from pyramidbox import build_net +from torch.autograd import Variable +from utils.augmentations import to_chw_bgr +import torch.npu + +parser = argparse.ArgumentParser(description='pyramidbox evaluatuon wider') +parser.add_argument('--model', type=str, + default="weights/pyramidbox.pth", help='trained model') +parser.add_argument('--thresh', default=0.05, type=float, + help='Final confidence threshold') +parser.add_argument('--data_path', + default=None, type=str, + help='data_path') +args = parser.parse_args() + + +use_npu = torch.npu.is_available() + +def detect_face(net, img, shrink): + if shrink != 1: + img = cv2.resize(img, None, None, fx=shrink, fy=shrink, + interpolation=cv2.INTER_LINEAR) + + x = to_chw_bgr(img) + x = x.astype('float32') + x -= cfg.img_mean + x = x[[2, 1, 0], :, :] + image_x = x.shape[1] + image_y = x.shape[2] + if image_x <2500 and image_y<2500: + x0 = torch.Tensor(x[0]) + x1 = torch.Tensor(x[1]) + x2 = torch.Tensor(x[2]) + pad = nn.ZeroPad2d(padding=(0,2500-image_y,0,2500-image_x)) + x0 = pad(x0) + x1 = pad(x1) + x2 = pad(x2) + x0 =np.array(x0) + x1 =np.array(x1) + x2 =np.array(x2) + x = np.array([x0,x1,x2]) + x = Variable(torch.from_numpy(x).unsqueeze(0)) + + if use_npu: + x = x.npu() + y = net(x) + detections = y.data + detections = detections.cpu().numpy() + + det_conf = detections[0, 1, :, 0] + if image_x < 2500 and image_y < 2500: + det_xmin = 2500 * detections[0, 1, :, 1] / shrink + det_ymin = 2500 * detections[0, 1, :, 2] / shrink + det_xmax = 2500 * detections[0, 1, :, 3] / shrink + det_ymax = 2500 * detections[0, 1, :, 4] / shrink + else: + det_xmin = img.shape[1] * detections[0, 1, :, 1] / shrink + det_ymin = img.shape[0] * detections[0, 1, :, 2] / shrink + det_xmax = img.shape[1] * detections[0, 1, :, 3] / shrink + det_ymax = img.shape[0] * detections[0, 1, :, 4] / shrink + det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf)) + + keep_index = np.where(det[:, 4] >= args.thresh)[0] + det = det[keep_index, :] + return det + + +def flip_test(net, image, shrink): + image_f = cv2.flip(image, 1) + det_f = detect_face(net, image_f, shrink) + det_t = np.zeros(det_f.shape) + det_t[:, 0] = image.shape[1] - det_f[:, 2] + det_t[:, 1] = det_f[:, 1] + det_t[:, 2] = image.shape[1] - det_f[:, 0] + det_t[:, 3] = det_f[:, 3] + det_t[:, 4] = det_f[:, 4] + return det_t + + +def multi_scale_test(net, image, max_im_shrink): + # shrink detecting and shrink only detect big face + st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink + det_s = detect_face(net, image, st) + index = np.where(np.maximum( + det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0] + det_s = det_s[index, :] + + # enlarge one times + bt = min(2, max_im_shrink) if max_im_shrink > 1 else ( + st + max_im_shrink) / 2 + det_b = detect_face(net, image, bt) + + # enlarge small image x times for small face + if max_im_shrink > 2: + bt *= 2 + while bt < max_im_shrink: + det_b = np.row_stack((det_b, detect_face(net, image, bt))) + bt *= 2 + det_b = np.row_stack((det_b, detect_face(net, image, max_im_shrink))) + + # enlarge only detect small face + if bt > 1: + index = np.where(np.minimum( + det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0] + det_b = det_b[index, :] + else: + index = np.where(np.maximum( + det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0] + det_b = det_b[index, :] + + return det_s, det_b + + +def bbox_vote(det): + order = det[:, 4].ravel().argsort()[::-1] + det = det[order, :] + while det.shape[0] > 0: + # IOU + area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) + xx1 = np.maximum(det[0, 0], det[:, 0]) + yy1 = np.maximum(det[0, 1], det[:, 1]) + xx2 = np.minimum(det[0, 2], det[:, 2]) + yy2 = np.minimum(det[0, 3], det[:, 3]) + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + o = inter / (area[0] + area[:] - inter) + + # get needed merge det and delete these det + merge_index = np.where(o >= 0.3)[0] + det_accu = det[merge_index, :] + det = np.delete(det, merge_index, 0) + + if merge_index.shape[0] <= 1: + continue + det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) + max_score = np.max(det_accu[:, 4]) + det_accu_sum = np.zeros((1, 5)) + det_accu_sum[:, 0:4] = np.sum( + det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) + det_accu_sum[:, 4] = max_score + try: + dets = np.row_stack((dets, det_accu_sum)) + except: + dets = det_accu_sum + + try: + dets = dets[0:750, :] + except: + dets = det + + return dets + + +def get_data(): + subset = 'val' + if subset is 'val': + wider_face = sio.loadmat(args.data_path+'wider_face_split/wider_face_val.mat') + else: + wider_face = sio.loadmat(args.data_path+'wider_face_split/wider_face_test.mat') + event_list = wider_face['event_list'] + file_list = wider_face['file_list'] + del wider_face + + imgs_path = os.path.join( + args.data_path, 'WIDER_{}'.format(subset), 'images') + save_path = 'output/pyramidbox1_{}'.format(subset) + + return event_list, file_list, imgs_path, save_path + +if __name__ == '__main__': + event_list, file_list, imgs_path, save_path = get_data() + cfg.USE_NMS = False + net = build_net('test', cfg.NUM_CLASSES) + net.load_state_dict(torch.load(args.model,map_location='cpu')) + + + net.eval() + + if use_npu: + net.npu() + cudnn.benckmark = True + + counter = 0 + print('start in ........') + for index, event in enumerate(event_list): + filelist = file_list[index][0] + path = os.path.join(save_path, str(event[0][0])) + if not os.path.exists(path): + os.makedirs(path) + + for num, file in enumerate(filelist): + im_name = file[0][0] + in_file = os.path.join(imgs_path, event[0][0], str(im_name[:]) + '.jpg') + img = Image.open(in_file) + if img.mode == 'L': + img = img.convert('RGB') + img = np.array(img) + max_im_shrink = np.sqrt( + 1700 * 1000 / (img.shape[0] * img.shape[1])) + + shrink = max_im_shrink if max_im_shrink < 1 else 1 + counter += 1 + + t1 = time.time() + det0 = detect_face(net, img, shrink) + det1 = flip_test(net, img, shrink) # flip test + [det2, det3] = multi_scale_test(net, img, max_im_shrink) + det = np.row_stack((det0, det1, det2, det3)) + if det.shape[0] ==1: + dets =det + else: + dets = bbox_vote(det) + + t2 = time.time() + print('Detect %04d th image costs %.4f' % (counter, t2 - t1)) + + fout = open(osp.join(save_path, str(event[0][ + 0]), im_name + '.txt'), 'w') + fout.write('{:s}\n'.format(str(event[0][0]) + '/' + im_name + '.jpg')) + fout.write('{:d}\n'.format(dets.shape[0])) + for i in range(dets.shape[0]): + xmin = dets[i][0] + ymin = dets[i][1] + xmax = dets[i][2] + ymax = dets[i][3] + score = dets[i][4] + fout.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'. + format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score)) diff --git a/PyTorch/contrib/cv/detection/Pyramidbox/train.py b/PyTorch/contrib/cv/detection/Pyramidbox/train.py index 65ba8997939c79990f9fa65a7379c2c74961c664..6307c7210fa51305824da6377aabb65a2175dc85 100644 --- a/PyTorch/contrib/cv/detection/Pyramidbox/train.py +++ b/PyTorch/contrib/cv/detection/Pyramidbox/train.py @@ -1,361 +1,361 @@ -#-*- coding:utf-8 -*- -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function - - -import torch.nn as nn -import torch.optim as optim -import torch.nn.init as init -import torch.utils.data as data - -import os -import time -import torch -import argparse - -import numpy as np -from torch.autograd import Variable -import torch.backends.cudnn as cudnn -import torch.npu -from data.config import cfg -from pyramidbox import build_net -from layers.modules import MultiBoxLoss -from data.widerface import WIDERDetection, detection_collate -from torch.nn.parallel import DistributedDataParallel as DDP -import apex -from apex import amp -import torch.distributed as dist -import torch.multiprocessing as mp - -parser = argparse.ArgumentParser( - description='Pyramidbox face Detector Training With Pytorch') -train_set = parser.add_mutually_exclusive_group() -parser.add_argument('--basenet', - default='vgg16_reducedfc.pth', - help='Pretrained base model') -parser.add_argument('--batch_size', - default=16, type=int, - help='Batch size for training') -parser.add_argument('--resume', - default=None, type=str, - help='Checkpoint state_dict file to resume training from') -parser.add_argument('--num_workers', - default=4, type=int, - help='Number of workers used in dataloading') -parser.add_argument('--npu', - default=True, type=bool, - help='Use NPU to train model') -parser.add_argument('--performance', - default=False, type=bool, - help='performance to train') -parser.add_argument('--lr', '--learning-rate', - default=1e-3, type=float, - help='initial learning rate') -parser.add_argument('--momentum', - default=0.9, type=float, - help='Momentum value for optim') -parser.add_argument('--weight_decay', - default=5e-4, type=float, - help='Weight decay for SGD') -parser.add_argument('--gamma', - default=0.1, type=float, - help='Gamma update for SGD') -parser.add_argument('--multinpu', - default=False, type=bool, - help='Use mutil Gpu training') -parser.add_argument('--save_folder', - default='weights/', - help='Directory for saving checkpoint models') -parser.add_argument('--local_rank', - default=-1, type=int, - help='rank for current process') -parser.add_argument('--world_size', default=-1, type=int, - help='number of distributed processes') -parser.add_argument('--device_list', default='0', type=str, - help='NPU id to use.') -args = parser.parse_args() - -if args.npu: - if args.multinpu: - device_id = int(args.device_list.split(',')[args.local_rank]) - device = 'npu:{}'.format(device_id) - else: - device = 'npu:0' - torch.npu.set_device(device) - -if not os.path.exists(args.save_folder): - os.makedirs(args.save_folder) - -train_dataset = WIDERDetection(cfg.FACE.TRAIN_FILE, mode='train') - -val_dataset = WIDERDetection(cfg.FACE.VAL_FILE, mode='val') -val_batchsize = 1 -val_loader = data.DataLoader(val_dataset, val_batchsize, - num_workers=1, - shuffle=False, - collate_fn=detection_collate, - pin_memory=True) - -min_loss = np.inf -def train(): - # torch.set_num_threads(1) - iteration = 0 - start_epoch = 0 - step_index = 0 - per_epoch_size = len(train_dataset) // args.batch_size - if args.local_rank==0 or args.multinpu==False: - print('------build_net start-------') - pyramidbox = build_net('train', cfg.NUM_CLASSES) - if args.local_rank==0 or args.multinpu==False: - print('------build_net end-------') - net = pyramidbox - if args.multinpu: - train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) - train_loader = torch.utils.data.DataLoader( - dataset=train_dataset, - batch_size=args.batch_size, - shuffle=(train_sampler is None), - num_workers=args.num_workers, - pin_memory=False, - sampler=train_sampler, - collate_fn=detection_collate, - drop_last=True) - else: - train_loader = data.DataLoader(train_dataset, args.batch_size, - num_workers=args.num_workers, - shuffle=False, - collate_fn=detection_collate, - pin_memory=True) - if args.resume: - print('Resuming training, loading {}...'.format(args.resume)) - start_epoch = net.load_weights(args.resume) - iteration = start_epoch * per_epoch_size - else: - vgg_weights = torch.load(args.save_folder + args.basenet) - if args.local_rank==0 or args.multinpu==False: - print('Load base network....') - net.vgg.load_state_dict(vgg_weights) - - if args.local_rank==0 or args.multinpu==False: - print('load base network end--------') - if not args.resume: - if args.local_rank==0 or args.multinpu==False: - print('Initializing weights...') - pyramidbox.bn64.apply(pyramidbox.weights_init) - pyramidbox.extras.apply(pyramidbox.weights_init) - pyramidbox.lfpn_topdown.apply(pyramidbox.weights_init) - pyramidbox.lfpn_later.apply(pyramidbox.weights_init) - pyramidbox.cpm.apply(pyramidbox.weights_init) - pyramidbox.loc_layers.apply(pyramidbox.weights_init) - pyramidbox.conf_layers.apply(pyramidbox.weights_init) - - optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, - weight_decay=args.weight_decay) - - if args.npu: - net.npu() - net, optimizer = amp.initialize(net, optimizer, opt_level="O1",loss_scale=64.0)#,combine_grad=True) - if args.multinpu: - device_id = int(args.device_list.split(',')[args.local_rank]) - device = 'npu:{}'.format(device_id) - net = DDP(net, device_ids=[device_id],broadcast_buffers=False) - cudnn.benckmark = True - criterion1 = MultiBoxLoss(cfg, args.npu) - criterion2 = MultiBoxLoss(cfg, args.npu, use_head_loss=True) - if args.local_rank==0 or args.multinpu==False: - print('Loading wider dataset...') - print('Using the specified args:') - print(args) - warmup_steps = 1000 - net.train() - if args.local_rank==0 or args.multinpu==False: - print('start train--------') - for epoch in range(start_epoch, cfg.EPOCHES): - if args.multinpu: - train_sampler.set_epoch(epoch) - losses = 0 - for batch_idx, (images, face_targets, head_targets) in enumerate(train_loader): - - if args.npu: - images = Variable(images.npu()) - with torch.no_grad(): - face_targets = [Variable(ann) for ann in face_targets] - head_targets = [Variable(ann) for ann in head_targets] - else: - images = Variable(images) - with torch.no_grad(): - face_targets = [Variable(ann) for ann in face_targets] - head_targets = [Variable(ann) for ann in head_targets] - adjust_learning_rate(optimizer,iteration,warmup_steps,15000) - t0 = time.time() - out = net(images) - optimizer.zero_grad() - face_loss_l, face_loss_c = criterion1(out, face_targets) - head_loss_l, head_loss_c = criterion2(out, head_targets) - loss = face_loss_l + face_loss_c + head_loss_l + head_loss_c - losses += loss.item() - if args.npu: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - t1 = time.time() - face_loss = (face_loss_l + face_loss_c).item() - head_loss = (head_loss_l + head_loss_c).item() - - if args.performance: - if iteration == 50: - t50_0 = time.time() - if iteration == 100: - t100_0 = time.time() - if args.multinpu: - if args.local_rank==0: - print('cost time:{} batch_size:{} num_gpu:{} FPS:{}'.format(t100_0-t50_0,args.batch_size,args.world_size,(50*args.batch_size*args.world_size)/(t100_0-t50_0))) - else: - print('cost time:{} batch_size:{} FPS:{}'.format(t100_0-t50_0,args.batch_size,(50*args.batch_size)/(t100_0-t50_0))) - if iteration == 110: - break - iteration += 1 - continue - if iteration % 10 == 0 and (args.local_rank==0 or args.multinpu==False): - loss_ = losses / (batch_idx + 1) - print('Timer: {:.4f} sec.'.format(t1 - t0)) - print('epoch ' + repr(epoch) + ' iter ' + - repr(iteration) + ' || Loss:%.4f' % (loss_)) - print('->> face Loss: {:.4f} || head loss : {:.4f}'.format( - face_loss, head_loss)) - print('->> lr: {}'.format(optimizer.param_groups[0]['lr'])) - if args.multinpu: - print('iter:{} cost time:{} batch_size:{} num_gpu:{} FPS:{}'.format(iteration,t1-t0,args.batch_size,args.world_size,(args.batch_size*args.world_size)/(t1-t0))) - else: - print('iter:{} cost time:{} batch_size:{} FPS:{}'.format(iteration,t1-t0,args.batch_size,args.batch_size/(t1-t0))) - if iteration != 0 and iteration % 2000 == 0 and (args.local_rank==0 or args.multinpu==False): - print('Saving state, iter:', iteration) - file = 'pyramidbox_' + repr(iteration) + '.pth' - torch.save(pyramidbox.state_dict(), - os.path.join(args.save_folder, file)) - iteration += 1 - if args.performance: - break - if epoch>50 and epoch%5==0: - val(epoch, net, pyramidbox, criterion1, criterion2) - net.train() - -def val(epoch, - net, - pyramidbox, - criterion1, - criterion2): - net.eval() - face_losses = 0 - head_losses = 0 - step = 0 - t1 = time.time() - for batch_idx, (images, face_targets, head_targets) in enumerate(val_loader): - if args.npu: - images = Variable(images.npu()) - with torch.no_grad(): - face_targets = [Variable(ann) for ann in face_targets] - head_targets = [Variable(ann) for ann in head_targets] - - else: - images = Variable(images) - with torch.no_grad(): - face_targets = [Variable(ann) - for ann in face_targets] - head_targets = [Variable(ann) - for ann in head_targets] - - out = net(images) - face_loss_l, face_loss_c = criterion1(out, face_targets) - head_loss_l, head_loss_c = criterion2(out, head_targets) - - face_losses += (face_loss_l + face_loss_c).item() - head_losses += (head_loss_l + head_loss_c).item() - step += 1 - - tloss = face_losses / step - - t2 = time.time() - if args.local_rank==0: - print('test Timer:{:.4f} .sec'.format(t2 - t1)) - print('epoch ' + repr(epoch) + ' || Loss:%.4f' % (tloss)) - - global min_loss - if tloss < min_loss and args.local_rank==0: - print('Saving best state,epoch', epoch) - torch.save(pyramidbox.state_dict(), os.path.join( - args.save_folder, 'pyramidbox.pth')) - min_loss = tloss - - states = { - 'epoch': epoch, - 'weight': pyramidbox.state_dict(), - } - if args.local_rank==0: - torch.save(states, os.path.join( - args.save_folder, 'pyramidbox_checkpoint.pth')) - - -def lr_warmup(optimizer,step,base_lr,warmup_steps): - if not step > face Loss: {:.4f} || head loss : {:.4f}'.format( + face_loss, head_loss)) + print('->> lr: {}'.format(optimizer.param_groups[0]['lr'])) + if args.multinpu: + print('iter:{} cost time:{} batch_size:{} num_gpu:{} FPS:{}'.format(iteration,t1-t0,args.batch_size,args.world_size,(args.batch_size*args.world_size)/(t1-t0))) + else: + print('iter:{} cost time:{} batch_size:{} FPS:{}'.format(iteration,t1-t0,args.batch_size,args.batch_size/(t1-t0))) + if iteration != 0 and iteration % 2000 == 0 and (args.local_rank==0 or args.multinpu==False): + print('Saving state, iter:', iteration) + file = 'pyramidbox_' + repr(iteration) + '.pth' + torch.save(pyramidbox.state_dict(), + os.path.join(args.save_folder, file)) + iteration += 1 + if args.performance: + break + if epoch>50 and epoch%5==0: + val(epoch, net, pyramidbox, criterion1, criterion2) + net.train() + +def val(epoch, + net, + pyramidbox, + criterion1, + criterion2): + net.eval() + face_losses = 0 + head_losses = 0 + step = 0 + t1 = time.time() + for batch_idx, (images, face_targets, head_targets) in enumerate(val_loader): + if args.npu: + images = Variable(images.npu()) + with torch.no_grad(): + face_targets = [Variable(ann) for ann in face_targets] + head_targets = [Variable(ann) for ann in head_targets] + + else: + images = Variable(images) + with torch.no_grad(): + face_targets = [Variable(ann) + for ann in face_targets] + head_targets = [Variable(ann) + for ann in head_targets] + + out = net(images) + face_loss_l, face_loss_c = criterion1(out, face_targets) + head_loss_l, head_loss_c = criterion2(out, head_targets) + + face_losses += (face_loss_l + face_loss_c).item() + head_losses += (head_loss_l + head_loss_c).item() + step += 1 + + tloss = face_losses / step + + t2 = time.time() + if args.local_rank==0: + print('test Timer:{:.4f} .sec'.format(t2 - t1)) + print('epoch ' + repr(epoch) + ' || Loss:%.4f' % (tloss)) + + global min_loss + if tloss < min_loss and args.local_rank==0: + print('Saving best state,epoch', epoch) + torch.save(pyramidbox.state_dict(), os.path.join( + args.save_folder, 'pyramidbox.pth')) + min_loss = tloss + + states = { + 'epoch': epoch, + 'weight': pyramidbox.state_dict(), + } + if args.local_rank==0: + torch.save(states, os.path.join( + args.save_folder, 'pyramidbox_checkpoint.pth')) + + +def lr_warmup(optimizer,step,base_lr,warmup_steps): + if not step 0.5: - img = random_brightness(img) - img = random_contrast(img) - img = random_saturation(img) - img = random_hue(img) - else: - img = random_brightness(img) - img = random_saturation(img) - img = random_hue(img) - img = random_contrast(img) - return img - - -def meet_emit_constraint(src_bbox, sample_bbox): - center_x = (src_bbox.xmax + src_bbox.xmin) / 2 - center_y = (src_bbox.ymax + src_bbox.ymin) / 2 - if center_x >= sample_bbox.xmin and \ - center_x <= sample_bbox.xmax and \ - center_y >= sample_bbox.ymin and \ - center_y <= sample_bbox.ymax: - return True - return False - - -def project_bbox(object_bbox, sample_bbox): - if object_bbox.xmin >= sample_bbox.xmax or \ - object_bbox.xmax <= sample_bbox.xmin or \ - object_bbox.ymin >= sample_bbox.ymax or \ - object_bbox.ymax <= sample_bbox.ymin: - return False - else: - proj_bbox = bbox(0, 0, 0, 0) - sample_width = sample_bbox.xmax - sample_bbox.xmin - sample_height = sample_bbox.ymax - sample_bbox.ymin - proj_bbox.xmin = (object_bbox.xmin - sample_bbox.xmin) / sample_width - proj_bbox.ymin = (object_bbox.ymin - sample_bbox.ymin) / sample_height - proj_bbox.xmax = (object_bbox.xmax - sample_bbox.xmin) / sample_width - proj_bbox.ymax = (object_bbox.ymax - sample_bbox.ymin) / sample_height - proj_bbox = clip_bbox(proj_bbox) - if bbox_area(proj_bbox) > 0: - return proj_bbox - else: - return False - - -def transform_labels(bbox_labels, sample_bbox): - sample_labels = [] - for i in range(len(bbox_labels)): - sample_label = [] - object_bbox = bbox(bbox_labels[i][1], bbox_labels[i][2], - bbox_labels[i][3], bbox_labels[i][4]) - if not meet_emit_constraint(object_bbox, sample_bbox): - continue - proj_bbox = project_bbox(object_bbox, sample_bbox) - if proj_bbox: - sample_label.append(bbox_labels[i][0]) - sample_label.append(float(proj_bbox.xmin)) - sample_label.append(float(proj_bbox.ymin)) - sample_label.append(float(proj_bbox.xmax)) - sample_label.append(float(proj_bbox.ymax)) - sample_label = sample_label + bbox_labels[i][5:] - sample_labels.append(sample_label) - return sample_labels - - -def expand_image(img, bbox_labels, img_width, img_height): - prob = np.random.uniform(0, 1) - if prob < cfg.expand_prob: - if cfg.expand_max_ratio - 1 >= 0.01: - expand_ratio = np.random.uniform(1, cfg.expand_max_ratio) - height = int(img_height * expand_ratio) - width = int(img_width * expand_ratio) - h_off = math.floor(np.random.uniform(0, height - img_height)) - w_off = math.floor(np.random.uniform(0, width - img_width)) - expand_bbox = bbox(-w_off / img_width, -h_off / img_height, - (width - w_off) / img_width, - (height - h_off) / img_height) - expand_img = np.ones((height, width, 3)) - expand_img = np.uint8(expand_img * np.squeeze(cfg.img_mean)) - expand_img = Image.fromarray(expand_img) - expand_img.paste(img, (int(w_off), int(h_off))) - bbox_labels = transform_labels(bbox_labels, expand_bbox) - return expand_img, bbox_labels, width, height - return img, bbox_labels, img_width, img_height - - -def clip_bbox(src_bbox): - src_bbox.xmin = max(min(src_bbox.xmin, 1.0), 0.0) - src_bbox.ymin = max(min(src_bbox.ymin, 1.0), 0.0) - src_bbox.xmax = max(min(src_bbox.xmax, 1.0), 0.0) - src_bbox.ymax = max(min(src_bbox.ymax, 1.0), 0.0) - return src_bbox - - -def bbox_area(src_bbox): - if src_bbox.xmax < src_bbox.xmin or src_bbox.ymax < src_bbox.ymin: - return 0. - else: - width = src_bbox.xmax - src_bbox.xmin - height = src_bbox.ymax - src_bbox.ymin - return width * height - - -def intersect_bbox(bbox1, bbox2): - if bbox2.xmin > bbox1.xmax or bbox2.xmax < bbox1.xmin or \ - bbox2.ymin > bbox1.ymax or bbox2.ymax < bbox1.ymin: - intersection_box = bbox(0.0, 0.0, 0.0, 0.0) - else: - intersection_box = bbox( - max(bbox1.xmin, bbox2.xmin), - max(bbox1.ymin, bbox2.ymin), - min(bbox1.xmax, bbox2.xmax), min(bbox1.ymax, bbox2.ymax)) - return intersection_box - - -def bbox_coverage(bbox1, bbox2): - inter_box = intersect_bbox(bbox1, bbox2) - intersect_size = bbox_area(inter_box) - - if intersect_size > 0: - bbox1_size = bbox_area(bbox1) - return intersect_size / bbox1_size - else: - return 0. - - -def generate_batch_random_samples(batch_sampler, bbox_labels, image_width, - image_height, scale_array, resize_width, - resize_height): - sampled_bbox = [] - for sampler in batch_sampler: - found = 0 - for i in range(sampler.max_trial): - if found >= sampler.max_sample: - break - sample_bbox = data_anchor_sampling( - sampler, bbox_labels, image_width, image_height, scale_array, - resize_width, resize_height) - if sample_bbox == 0: - break - if satisfy_sample_constraint(sampler, sample_bbox, bbox_labels): - sampled_bbox.append(sample_bbox) - found = found + 1 - return sampled_bbox - - -def data_anchor_sampling(sampler, bbox_labels, image_width, image_height, - scale_array, resize_width, resize_height): - num_gt = len(bbox_labels) - # np.random.randint range: [low, high) - rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0 - - if num_gt != 0: - norm_xmin = bbox_labels[rand_idx][1] - norm_ymin = bbox_labels[rand_idx][2] - norm_xmax = bbox_labels[rand_idx][3] - norm_ymax = bbox_labels[rand_idx][4] - - xmin = norm_xmin * image_width - ymin = norm_ymin * image_height - wid = image_width * (norm_xmax - norm_xmin) - hei = image_height * (norm_ymax - norm_ymin) - range_size = 0 - - area = wid * hei - for scale_ind in range(0, len(scale_array) - 1): - if area > scale_array[scale_ind] ** 2 and area < \ - scale_array[scale_ind + 1] ** 2: - range_size = scale_ind + 1 - break - - if area > scale_array[len(scale_array) - 2]**2: - range_size = len(scale_array) - 2 - scale_choose = 0.0 - if range_size == 0: - rand_idx_size = 0 - else: - # np.random.randint range: [low, high) - rng_rand_size = np.random.randint(0, range_size + 1) - rand_idx_size = rng_rand_size % (range_size + 1) - - if rand_idx_size == range_size: - min_resize_val = scale_array[rand_idx_size] / 2.0 - max_resize_val = min(2.0 * scale_array[rand_idx_size], - 2 * math.sqrt(wid * hei)) - scale_choose = random.uniform(min_resize_val, max_resize_val) - else: - min_resize_val = scale_array[rand_idx_size] / 2.0 - max_resize_val = 2.0 * scale_array[rand_idx_size] - scale_choose = random.uniform(min_resize_val, max_resize_val) - - sample_bbox_size = wid * resize_width / scale_choose - - w_off_orig = 0.0 - h_off_orig = 0.0 - if sample_bbox_size < max(image_height, image_width): - if wid <= sample_bbox_size: - w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size, - xmin) - else: - w_off_orig = np.random.uniform(xmin, - xmin + wid - sample_bbox_size) - - if hei <= sample_bbox_size: - h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size, - ymin) - else: - h_off_orig = np.random.uniform(ymin, - ymin + hei - sample_bbox_size) - - else: - w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0) - h_off_orig = np.random.uniform( - image_height - sample_bbox_size, 0.0) - - w_off_orig = math.floor(w_off_orig) - h_off_orig = math.floor(h_off_orig) - - # Figure out top left coordinates. - w_off = 0.0 - h_off = 0.0 - w_off = float(w_off_orig / image_width) - h_off = float(h_off_orig / image_height) - - sampled_bbox = bbox(w_off, h_off, - w_off + float(sample_bbox_size / image_width), - h_off + float(sample_bbox_size / image_height)) - - return sampled_bbox - else: - return 0 - - -def jaccard_overlap(sample_bbox, object_bbox): - if sample_bbox.xmin >= object_bbox.xmax or \ - sample_bbox.xmax <= object_bbox.xmin or \ - sample_bbox.ymin >= object_bbox.ymax or \ - sample_bbox.ymax <= object_bbox.ymin: - return 0 - intersect_xmin = max(sample_bbox.xmin, object_bbox.xmin) - intersect_ymin = max(sample_bbox.ymin, object_bbox.ymin) - intersect_xmax = min(sample_bbox.xmax, object_bbox.xmax) - intersect_ymax = min(sample_bbox.ymax, object_bbox.ymax) - intersect_size = (intersect_xmax - intersect_xmin) * ( - intersect_ymax - intersect_ymin) - sample_bbox_size = bbox_area(sample_bbox) - object_bbox_size = bbox_area(object_bbox) - overlap = intersect_size / ( - sample_bbox_size + object_bbox_size - intersect_size) - return overlap - - -def satisfy_sample_constraint(sampler, sample_bbox, bbox_labels): - if sampler.min_jaccard_overlap == 0 and sampler.max_jaccard_overlap == 0: - has_jaccard_overlap = False - else: - has_jaccard_overlap = True - if sampler.min_object_coverage == 0 and sampler.max_object_coverage == 0: - has_object_coverage = False - else: - has_object_coverage = True - - if not has_jaccard_overlap and not has_object_coverage: - return True - found = False - for i in range(len(bbox_labels)): - object_bbox = bbox(bbox_labels[i][1], bbox_labels[i][2], - bbox_labels[i][3], bbox_labels[i][4]) - if has_jaccard_overlap: - overlap = jaccard_overlap(sample_bbox, object_bbox) - if sampler.min_jaccard_overlap != 0 and \ - overlap < sampler.min_jaccard_overlap: - continue - if sampler.max_jaccard_overlap != 0 and \ - overlap > sampler.max_jaccard_overlap: - continue - found = True - if has_object_coverage: - object_coverage = bbox_coverage(object_bbox, sample_bbox) - if sampler.min_object_coverage != 0 and \ - object_coverage < sampler.min_object_coverage: - continue - if sampler.max_object_coverage != 0 and \ - object_coverage > sampler.max_object_coverage: - continue - found = True - if found: - return True - return found - - -def crop_image_sampling(img, bbox_labels, sample_bbox, image_width, - image_height, resize_width, resize_height, - min_face_size): - # no clipping here - xmin = int(sample_bbox.xmin * image_width) - xmax = int(sample_bbox.xmax * image_width) - ymin = int(sample_bbox.ymin * image_height) - ymax = int(sample_bbox.ymax * image_height) - w_off = xmin - h_off = ymin - width = xmax - xmin - height = ymax - ymin - - cross_xmin = max(0.0, float(w_off)) - cross_ymin = max(0.0, float(h_off)) - cross_xmax = min(float(w_off + width - 1.0), float(image_width)) - cross_ymax = min(float(h_off + height - 1.0), float(image_height)) - cross_width = cross_xmax - cross_xmin - cross_height = cross_ymax - cross_ymin - - roi_xmin = 0 if w_off >= 0 else abs(w_off) - roi_ymin = 0 if h_off >= 0 else abs(h_off) - roi_width = cross_width - roi_height = cross_height - - roi_y1 = int(roi_ymin) - roi_y2 = int(roi_ymin + roi_height) - roi_x1 = int(roi_xmin) - roi_x2 = int(roi_xmin + roi_width) - - cross_y1 = int(cross_ymin) - cross_y2 = int(cross_ymin + cross_height) - cross_x1 = int(cross_xmin) - cross_x2 = int(cross_xmin + cross_width) - - sample_img = np.zeros((height, width, 3)) - # print(sample_img.shape) - sample_img[roi_y1 : roi_y2, roi_x1 : roi_x2] = \ - img[cross_y1: cross_y2, cross_x1: cross_x2] - sample_img = cv2.resize( - sample_img, (resize_width, resize_height), interpolation=cv2.INTER_AREA) - - resize_val = resize_width - sample_labels = transform_labels_sampling(bbox_labels, sample_bbox, - resize_val, min_face_size) - return sample_img, sample_labels - - -def transform_labels_sampling(bbox_labels, sample_bbox, resize_val, - min_face_size): - sample_labels = [] - for i in range(len(bbox_labels)): - sample_label = [] - object_bbox = bbox(bbox_labels[i][1], bbox_labels[i][2], - bbox_labels[i][3], bbox_labels[i][4]) - if not meet_emit_constraint(object_bbox, sample_bbox): - continue - proj_bbox = project_bbox(object_bbox, sample_bbox) - if proj_bbox: - real_width = float((proj_bbox.xmax - proj_bbox.xmin) * resize_val) - real_height = float((proj_bbox.ymax - proj_bbox.ymin) * resize_val) - if real_width * real_height < float(min_face_size * min_face_size): - continue - else: - sample_label.append(bbox_labels[i][0]) - sample_label.append(float(proj_bbox.xmin)) - sample_label.append(float(proj_bbox.ymin)) - sample_label.append(float(proj_bbox.xmax)) - sample_label.append(float(proj_bbox.ymax)) - sample_label = sample_label + bbox_labels[i][5:] - sample_labels.append(sample_label) - - return sample_labels - - -def generate_sample(sampler, image_width, image_height): - scale = np.random.uniform(sampler.min_scale, sampler.max_scale) - aspect_ratio = np.random.uniform(sampler.min_aspect_ratio, - sampler.max_aspect_ratio) - aspect_ratio = max(aspect_ratio, (scale**2.0)) - aspect_ratio = min(aspect_ratio, 1 / (scale**2.0)) - - bbox_width = scale * (aspect_ratio**0.5) - bbox_height = scale / (aspect_ratio**0.5) - - # guarantee a squared image patch after cropping - if sampler.use_square: - if image_height < image_width: - bbox_width = bbox_height * image_height / image_width - else: - bbox_height = bbox_width * image_width / image_height - - xmin_bound = 1 - bbox_width - ymin_bound = 1 - bbox_height - xmin = np.random.uniform(0, xmin_bound) - ymin = np.random.uniform(0, ymin_bound) - xmax = xmin + bbox_width - ymax = ymin + bbox_height - sampled_bbox = bbox(xmin, ymin, xmax, ymax) - return sampled_bbox - - -def generate_batch_samples(batch_sampler, bbox_labels, image_width, - image_height): - sampled_bbox = [] - for sampler in batch_sampler: - found = 0 - for i in range(sampler.max_trial): - if found >= sampler.max_sample: - break - sample_bbox = generate_sample(sampler, image_width, image_height) - if satisfy_sample_constraint(sampler, sample_bbox, bbox_labels): - sampled_bbox.append(sample_bbox) - found = found + 1 - return sampled_bbox - - -def crop_image(img, bbox_labels, sample_bbox, image_width, image_height, - resize_width, resize_height, min_face_size): - sample_bbox = clip_bbox(sample_bbox) - xmin = int(sample_bbox.xmin * image_width) - xmax = int(sample_bbox.xmax * image_width) - ymin = int(sample_bbox.ymin * image_height) - ymax = int(sample_bbox.ymax * image_height) - - sample_img = img[ymin:ymax, xmin:xmax] - resize_val = resize_width - sample_labels = transform_labels_sampling(bbox_labels, sample_bbox, - resize_val, min_face_size) - return sample_img, sample_labels - - -def to_chw_bgr(image): - """ - Transpose image from HWC to CHW and from RBG to BGR. - Args: - image (np.array): an image with HWC and RBG layout. - """ - # HWC to CHW - if len(image.shape) == 3: - image = np.swapaxes(image, 1, 2) - image = np.swapaxes(image, 1, 0) - # RBG to BGR - image = image[[2, 1, 0], :, :] - return image - - -def anchor_crop_image_sampling(img, - bbox_labels, - scale_array, - img_width, - img_height): - mean = np.array([104, 117, 123], dtype=np.float32) - maxSize = 12000 # max size - infDistance = 9999999 - bbox_labels = np.array(bbox_labels) - scale = np.array([img_width, img_height, img_width, img_height]) - - boxes = bbox_labels[:, 1:5] * scale - labels = bbox_labels[:, 0] - - boxArea = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) - # argsort = np.argsort(boxArea) - # rand_idx = random.randint(min(len(argsort),6)) - # print('rand idx',rand_idx) - rand_idx = np.random.randint(len(boxArea)) - rand_Side = boxArea[rand_idx] ** 0.5 - # rand_Side = min(boxes[rand_idx,2] - boxes[rand_idx,0] + 1, - # boxes[rand_idx,3] - boxes[rand_idx,1] + 1) - - distance = infDistance - anchor_idx = 5 - for i, anchor in enumerate(scale_array): - if abs(anchor - rand_Side) < distance: - distance = abs(anchor - rand_Side) - anchor_idx = i - - target_anchor = random.choice(scale_array[0:min(anchor_idx + 1, 5) + 1]) - ratio = float(target_anchor) / rand_Side - ratio = ratio * (2**random.uniform(-1, 1)) - - if int(img_height * ratio * img_width * ratio) > maxSize * maxSize: - ratio = (maxSize * maxSize / (img_height * img_width))**0.5 - - interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, - cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4] - interp_method = random.choice(interp_methods) - image = cv2.resize(img, None, None, fx=ratio, - fy=ratio, interpolation=interp_method) - - boxes[:, 0] *= ratio - boxes[:, 1] *= ratio - boxes[:, 2] *= ratio - boxes[:, 3] *= ratio - - height, width, _ = image.shape - - sample_boxes = [] - - xmin = boxes[rand_idx, 0] - ymin = boxes[rand_idx, 1] - bw = (boxes[rand_idx, 2] - boxes[rand_idx, 0] + 1) - bh = (boxes[rand_idx, 3] - boxes[rand_idx, 1] + 1) - - w = h = 640 - - for _ in range(50): - if w < max(height, width): - if bw <= w: - w_off = random.uniform(xmin + bw - w, xmin) - else: - w_off = random.uniform(xmin, xmin + bw - w) - - if bh <= h: - h_off = random.uniform(ymin + bh - h, ymin) - else: - h_off = random.uniform(ymin, ymin + bh - h) - else: - w_off = random.uniform(width - w, 0) - h_off = random.uniform(height - h, 0) - - w_off = math.floor(w_off) - h_off = math.floor(h_off) - - # convert to integer rect x1,y1,x2,y2 - rect = np.array( - [int(w_off), int(h_off), int(w_off + w), int(h_off + h)]) - - # keep overlap with gt box IF center in sampled patch - centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 - # mask in all gt boxes that above and to the left of centers - m1 = (rect[0] <= boxes[:, 0]) * (rect[1] <= boxes[:, 1]) - # mask in all gt boxes that under and to the right of centers - m2 = (rect[2] >= boxes[:, 2]) * (rect[3] >= boxes[:, 3]) - # mask in that both m1 and m2 are true - mask = m1 * m2 - - overlap = jaccard_numpy(boxes, rect) - # have any valid boxes? try again if not - if not mask.any() and not overlap.max() > 0.7: - continue - else: - sample_boxes.append(rect) - - sampled_labels = [] - - if len(sample_boxes) > 0: - choice_idx = np.random.randint(len(sample_boxes)) - choice_box = sample_boxes[choice_idx] - # print('crop the box :',choice_box) - centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 - m1 = (choice_box[0] < centers[:, 0]) * \ - (choice_box[1] < centers[:, 1]) - m2 = (choice_box[2] > centers[:, 0]) * \ - (choice_box[3] > centers[:, 1]) - mask = m1 * m2 - current_boxes = boxes[mask, :].copy() - current_labels = labels[mask] - current_boxes[:, :2] -= choice_box[:2] - current_boxes[:, 2:] -= choice_box[:2] - - if choice_box[0] < 0 or choice_box[1] < 0: - new_img_width = width if choice_box[ - 0] >= 0 else width - choice_box[0] - new_img_height = height if choice_box[ - 1] >= 0 else height - choice_box[1] - image_pad = np.zeros( - (new_img_height, new_img_width, 3), dtype=float) - image_pad[:, :, :] = mean - start_left = 0 if choice_box[0] >= 0 else -choice_box[0] - start_top = 0 if choice_box[1] >= 0 else -choice_box[1] - image_pad[start_top:, start_left:, :] = image - - choice_box_w = choice_box[2] - choice_box[0] - choice_box_h = choice_box[3] - choice_box[1] - - start_left = choice_box[0] if choice_box[0] >= 0 else 0 - start_top = choice_box[1] if choice_box[1] >= 0 else 0 - end_right = start_left + choice_box_w - end_bottom = start_top + choice_box_h - current_image = image_pad[ - start_top:end_bottom, start_left:end_right, :].copy() - image_height, image_width, _ = current_image.shape - if cfg.filter_min_face: - bbox_w = current_boxes[:, 2] - current_boxes[:, 0] - bbox_h = current_boxes[:, 3] - current_boxes[:, 1] - bbox_area = bbox_w * bbox_h - mask = bbox_area > (cfg.min_face_size * cfg.min_face_size) - current_boxes = current_boxes[mask] - current_labels = current_labels[mask] - for i in range(len(current_boxes)): - sample_label = [] - sample_label.append(current_labels[i]) - sample_label.append(current_boxes[i][0] / image_width) - sample_label.append(current_boxes[i][1] / image_height) - sample_label.append(current_boxes[i][2] / image_width) - sample_label.append(current_boxes[i][3] / image_height) - sampled_labels += [sample_label] - sampled_labels = np.array(sampled_labels) - else: - current_boxes /= np.array([image_width, - image_height, image_width, image_height]) - sampled_labels = np.hstack( - (current_labels[:, np.newaxis], current_boxes)) - - return current_image, sampled_labels - - current_image = image[choice_box[1]:choice_box[ - 3], choice_box[0]:choice_box[2], :].copy() - image_height, image_width, _ = current_image.shape - - if cfg.filter_min_face: - bbox_w = current_boxes[:, 2] - current_boxes[:, 0] - bbox_h = current_boxes[:, 3] - current_boxes[:, 1] - bbox_area = bbox_w * bbox_h - mask = bbox_area > (cfg.min_face_size * cfg.min_face_size) - current_boxes = current_boxes[mask] - current_labels = current_labels[mask] - for i in range(len(current_boxes)): - sample_label = [] - sample_label.append(current_labels[i]) - sample_label.append(current_boxes[i][0] / image_width) - sample_label.append(current_boxes[i][1] / image_height) - sample_label.append(current_boxes[i][2] / image_width) - sample_label.append(current_boxes[i][3] / image_height) - sampled_labels += [sample_label] - sampled_labels = np.array(sampled_labels) - else: - current_boxes /= np.array([image_width, - image_height, image_width, image_height]) - sampled_labels = np.hstack( - (current_labels[:, np.newaxis], current_boxes)) - - return current_image, sampled_labels - else: - image_height, image_width, _ = image.shape - if cfg.filter_min_face: - bbox_w = boxes[:, 2] - boxes[:, 0] - bbox_h = boxes[:, 3] - boxes[:, 1] - bbox_area = bbox_w * bbox_h - mask = bbox_area > (cfg.min_face_size * cfg.min_face_size) - boxes = boxes[mask] - labels = labels[mask] - for i in range(len(boxes)): - sample_label = [] - sample_label.append(labels[i]) - sample_label.append(boxes[i][0] / image_width) - sample_label.append(boxes[i][1] / image_height) - sample_label.append(boxes[i][2] / image_width) - sample_label.append(boxes[i][3] / image_height) - sampled_labels += [sample_label] - sampled_labels = np.array(sampled_labels) - else: - boxes /= np.array([image_width, image_height, - image_width, image_height]) - sampled_labels = np.hstack( - (labels[:, np.newaxis], boxes)) - - return image, sampled_labels - - -def preprocess(img, bbox_labels, mode, image_path): - img_width, img_height = img.size - sampled_labels = bbox_labels - if mode == 'train': - if cfg.apply_distort: - img = distort_image(img) - if cfg.apply_expand: - img, bbox_labels, img_width, img_height = expand_image( - img, bbox_labels, img_width, img_height) - - batch_sampler = [] - prob = np.random.uniform(0., 1.) - if prob > cfg.data_anchor_sampling_prob and cfg.anchor_sampling: - scale_array = np.array([16, 32, 64, 128, 256, 512]) - ''' - batch_sampler.append( - sampler(1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.6, 0.0, True)) - sampled_bbox = generate_batch_random_samples( - batch_sampler, bbox_labels, img_width, img_height, scale_array, - cfg.resize_width, cfg.resize_height) - ''' - img = np.array(img) - img, sampled_labels = anchor_crop_image_sampling( - img, bbox_labels, scale_array, img_width, img_height) - ''' - if len(sampled_bbox) > 0: - idx = int(np.random.uniform(0, len(sampled_bbox))) - img, sampled_labels = crop_image_sampling( - img, bbox_labels, sampled_bbox[idx], img_width, img_height, - cfg.resize_width, cfg.resize_height, cfg.min_face_size) - ''' - img = img.astype('uint8') - img = Image.fromarray(img) - else: - batch_sampler.append(sampler(1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, - 0.0, True)) - batch_sampler.append(sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, - 0.0, True)) - batch_sampler.append(sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, - 0.0, True)) - batch_sampler.append(sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, - 0.0, True)) - batch_sampler.append(sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, - 0.0, True)) - sampled_bbox = generate_batch_samples( - batch_sampler, bbox_labels, img_width, img_height) - - img = np.array(img) - if len(sampled_bbox) > 0: - idx = int(np.random.uniform(0, len(sampled_bbox))) - img, sampled_labels = crop_image( - img, bbox_labels, sampled_bbox[idx], img_width, img_height, - cfg.resize_width, cfg.resize_height, cfg.min_face_size) - - img = Image.fromarray(img) - - interp_mode = [ - Image.BILINEAR, Image.HAMMING, Image.NEAREST, Image.BICUBIC, - Image.LANCZOS - ] - interp_indx = np.random.randint(0, 5) - - img = img.resize((cfg.resize_width, cfg.resize_height), - resample=interp_mode[interp_indx]) - - img = np.array(img) - - if mode == 'train': - mirror = int(np.random.uniform(0, 2)) - if mirror == 1: - img = img[:, ::-1, :] - for i in six.moves.xrange(len(sampled_labels)): - tmp = sampled_labels[i][1] - sampled_labels[i][1] = 1 - sampled_labels[i][3] - sampled_labels[i][3] = 1 - tmp - - #img = Image.fromarray(img) - img = to_chw_bgr(img) - img = img.astype('float32') - img -= cfg.img_mean - img = img[[2, 1, 0], :, :] # to RGB - #img = img * cfg.scale - - return img, sampled_labels +#-*- coding:utf-8 -*- +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +import torch +from torchvision import transforms +import cv2 +import numpy as np +import types +from PIL import Image, ImageEnhance, ImageDraw +import math +import six +from data.config import cfg +import random + + +class sampler(): + + def __init__(self, + max_sample, + max_trial, + min_scale, + max_scale, + min_aspect_ratio, + max_aspect_ratio, + min_jaccard_overlap, + max_jaccard_overlap, + min_object_coverage, + max_object_coverage, + use_square=False): + self.max_sample = max_sample + self.max_trial = max_trial + self.min_scale = min_scale + self.max_scale = max_scale + self.min_aspect_ratio = min_aspect_ratio + self.max_aspect_ratio = max_aspect_ratio + self.min_jaccard_overlap = min_jaccard_overlap + self.max_jaccard_overlap = max_jaccard_overlap + self.min_object_coverage = min_object_coverage + self.max_object_coverage = max_object_coverage + self.use_square = use_square + + +def intersect(box_a, box_b): + max_xy = np.minimum(box_a[:, 2:], box_b[2:]) + min_xy = np.maximum(box_a[:, :2], box_b[:2]) + inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf) + return inter[:, 0] * inter[:, 1] + + +def jaccard_numpy(box_a, box_b): + """Compute the jaccard overlap of two sets of boxes. The jaccard overlap + is simply the intersection over union of two boxes. + E.g.: + A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) + Args: + box_a: Multiple bounding boxes, Shape: [num_boxes,4] + box_b: Single bounding box, Shape: [4] + Return: + jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]] + """ + inter = intersect(box_a, box_b) + area_a = ((box_a[:, 2] - box_a[:, 0]) * + (box_a[:, 3] - box_a[:, 1])) # [A,B] + area_b = ((box_b[2] - box_b[0]) * + (box_b[3] - box_b[1])) # [A,B] + union = area_a + area_b - inter + return inter / union # [A,B] + + +class bbox(): + + def __init__(self, xmin, ymin, xmax, ymax): + self.xmin = xmin + self.ymin = ymin + self.xmax = xmax + self.ymax = ymax + + +def random_brightness(img): + prob = np.random.uniform(0, 1) + if prob < cfg.brightness_prob: + delta = np.random.uniform(-cfg.brightness_delta, + cfg.brightness_delta) + 1 + img = ImageEnhance.Brightness(img).enhance(delta) + return img + + +def random_contrast(img): + prob = np.random.uniform(0, 1) + if prob < cfg.contrast_prob: + delta = np.random.uniform(-cfg.contrast_delta, + cfg.contrast_delta) + 1 + img = ImageEnhance.Contrast(img).enhance(delta) + return img + + +def random_saturation(img): + prob = np.random.uniform(0, 1) + if prob < cfg.saturation_prob: + delta = np.random.uniform(-cfg.saturation_delta, + cfg.saturation_delta) + 1 + img = ImageEnhance.Color(img).enhance(delta) + return img + + +def random_hue(img): + prob = np.random.uniform(0, 1) + if prob < cfg.hue_prob: + delta = np.random.uniform(-cfg.hue_delta, cfg.hue_delta) + img_hsv = np.array(img.convert('HSV')) + img_hsv[:, :, 0] = img_hsv[:, :, 0] + delta + img = Image.fromarray(img_hsv, mode='HSV').convert('RGB') + return img + + +def distort_image(img): + prob = np.random.uniform(0, 1) + # Apply different distort order + if prob > 0.5: + img = random_brightness(img) + img = random_contrast(img) + img = random_saturation(img) + img = random_hue(img) + else: + img = random_brightness(img) + img = random_saturation(img) + img = random_hue(img) + img = random_contrast(img) + return img + + +def meet_emit_constraint(src_bbox, sample_bbox): + center_x = (src_bbox.xmax + src_bbox.xmin) / 2 + center_y = (src_bbox.ymax + src_bbox.ymin) / 2 + if center_x >= sample_bbox.xmin and \ + center_x <= sample_bbox.xmax and \ + center_y >= sample_bbox.ymin and \ + center_y <= sample_bbox.ymax: + return True + return False + + +def project_bbox(object_bbox, sample_bbox): + if object_bbox.xmin >= sample_bbox.xmax or \ + object_bbox.xmax <= sample_bbox.xmin or \ + object_bbox.ymin >= sample_bbox.ymax or \ + object_bbox.ymax <= sample_bbox.ymin: + return False + else: + proj_bbox = bbox(0, 0, 0, 0) + sample_width = sample_bbox.xmax - sample_bbox.xmin + sample_height = sample_bbox.ymax - sample_bbox.ymin + proj_bbox.xmin = (object_bbox.xmin - sample_bbox.xmin) / sample_width + proj_bbox.ymin = (object_bbox.ymin - sample_bbox.ymin) / sample_height + proj_bbox.xmax = (object_bbox.xmax - sample_bbox.xmin) / sample_width + proj_bbox.ymax = (object_bbox.ymax - sample_bbox.ymin) / sample_height + proj_bbox = clip_bbox(proj_bbox) + if bbox_area(proj_bbox) > 0: + return proj_bbox + else: + return False + + +def transform_labels(bbox_labels, sample_bbox): + sample_labels = [] + for i in range(len(bbox_labels)): + sample_label = [] + object_bbox = bbox(bbox_labels[i][1], bbox_labels[i][2], + bbox_labels[i][3], bbox_labels[i][4]) + if not meet_emit_constraint(object_bbox, sample_bbox): + continue + proj_bbox = project_bbox(object_bbox, sample_bbox) + if proj_bbox: + sample_label.append(bbox_labels[i][0]) + sample_label.append(float(proj_bbox.xmin)) + sample_label.append(float(proj_bbox.ymin)) + sample_label.append(float(proj_bbox.xmax)) + sample_label.append(float(proj_bbox.ymax)) + sample_label = sample_label + bbox_labels[i][5:] + sample_labels.append(sample_label) + return sample_labels + + +def expand_image(img, bbox_labels, img_width, img_height): + prob = np.random.uniform(0, 1) + if prob < cfg.expand_prob: + if cfg.expand_max_ratio - 1 >= 0.01: + expand_ratio = np.random.uniform(1, cfg.expand_max_ratio) + height = int(img_height * expand_ratio) + width = int(img_width * expand_ratio) + h_off = math.floor(np.random.uniform(0, height - img_height)) + w_off = math.floor(np.random.uniform(0, width - img_width)) + expand_bbox = bbox(-w_off / img_width, -h_off / img_height, + (width - w_off) / img_width, + (height - h_off) / img_height) + expand_img = np.ones((height, width, 3)) + expand_img = np.uint8(expand_img * np.squeeze(cfg.img_mean)) + expand_img = Image.fromarray(expand_img) + expand_img.paste(img, (int(w_off), int(h_off))) + bbox_labels = transform_labels(bbox_labels, expand_bbox) + return expand_img, bbox_labels, width, height + return img, bbox_labels, img_width, img_height + + +def clip_bbox(src_bbox): + src_bbox.xmin = max(min(src_bbox.xmin, 1.0), 0.0) + src_bbox.ymin = max(min(src_bbox.ymin, 1.0), 0.0) + src_bbox.xmax = max(min(src_bbox.xmax, 1.0), 0.0) + src_bbox.ymax = max(min(src_bbox.ymax, 1.0), 0.0) + return src_bbox + + +def bbox_area(src_bbox): + if src_bbox.xmax < src_bbox.xmin or src_bbox.ymax < src_bbox.ymin: + return 0. + else: + width = src_bbox.xmax - src_bbox.xmin + height = src_bbox.ymax - src_bbox.ymin + return width * height + + +def intersect_bbox(bbox1, bbox2): + if bbox2.xmin > bbox1.xmax or bbox2.xmax < bbox1.xmin or \ + bbox2.ymin > bbox1.ymax or bbox2.ymax < bbox1.ymin: + intersection_box = bbox(0.0, 0.0, 0.0, 0.0) + else: + intersection_box = bbox( + max(bbox1.xmin, bbox2.xmin), + max(bbox1.ymin, bbox2.ymin), + min(bbox1.xmax, bbox2.xmax), min(bbox1.ymax, bbox2.ymax)) + return intersection_box + + +def bbox_coverage(bbox1, bbox2): + inter_box = intersect_bbox(bbox1, bbox2) + intersect_size = bbox_area(inter_box) + + if intersect_size > 0: + bbox1_size = bbox_area(bbox1) + return intersect_size / bbox1_size + else: + return 0. + + +def generate_batch_random_samples(batch_sampler, bbox_labels, image_width, + image_height, scale_array, resize_width, + resize_height): + sampled_bbox = [] + for sampler in batch_sampler: + found = 0 + for i in range(sampler.max_trial): + if found >= sampler.max_sample: + break + sample_bbox = data_anchor_sampling( + sampler, bbox_labels, image_width, image_height, scale_array, + resize_width, resize_height) + if sample_bbox == 0: + break + if satisfy_sample_constraint(sampler, sample_bbox, bbox_labels): + sampled_bbox.append(sample_bbox) + found = found + 1 + return sampled_bbox + + +def data_anchor_sampling(sampler, bbox_labels, image_width, image_height, + scale_array, resize_width, resize_height): + num_gt = len(bbox_labels) + # np.random.randint range: [low, high) + rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0 + + if num_gt != 0: + norm_xmin = bbox_labels[rand_idx][1] + norm_ymin = bbox_labels[rand_idx][2] + norm_xmax = bbox_labels[rand_idx][3] + norm_ymax = bbox_labels[rand_idx][4] + + xmin = norm_xmin * image_width + ymin = norm_ymin * image_height + wid = image_width * (norm_xmax - norm_xmin) + hei = image_height * (norm_ymax - norm_ymin) + range_size = 0 + + area = wid * hei + for scale_ind in range(0, len(scale_array) - 1): + if area > scale_array[scale_ind] ** 2 and area < \ + scale_array[scale_ind + 1] ** 2: + range_size = scale_ind + 1 + break + + if area > scale_array[len(scale_array) - 2]**2: + range_size = len(scale_array) - 2 + scale_choose = 0.0 + if range_size == 0: + rand_idx_size = 0 + else: + # np.random.randint range: [low, high) + rng_rand_size = np.random.randint(0, range_size + 1) + rand_idx_size = rng_rand_size % (range_size + 1) + + if rand_idx_size == range_size: + min_resize_val = scale_array[rand_idx_size] / 2.0 + max_resize_val = min(2.0 * scale_array[rand_idx_size], + 2 * math.sqrt(wid * hei)) + scale_choose = random.uniform(min_resize_val, max_resize_val) + else: + min_resize_val = scale_array[rand_idx_size] / 2.0 + max_resize_val = 2.0 * scale_array[rand_idx_size] + scale_choose = random.uniform(min_resize_val, max_resize_val) + + sample_bbox_size = wid * resize_width / scale_choose + + w_off_orig = 0.0 + h_off_orig = 0.0 + if sample_bbox_size < max(image_height, image_width): + if wid <= sample_bbox_size: + w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size, + xmin) + else: + w_off_orig = np.random.uniform(xmin, + xmin + wid - sample_bbox_size) + + if hei <= sample_bbox_size: + h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size, + ymin) + else: + h_off_orig = np.random.uniform(ymin, + ymin + hei - sample_bbox_size) + + else: + w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0) + h_off_orig = np.random.uniform( + image_height - sample_bbox_size, 0.0) + + w_off_orig = math.floor(w_off_orig) + h_off_orig = math.floor(h_off_orig) + + # Figure out top left coordinates. + w_off = 0.0 + h_off = 0.0 + w_off = float(w_off_orig / image_width) + h_off = float(h_off_orig / image_height) + + sampled_bbox = bbox(w_off, h_off, + w_off + float(sample_bbox_size / image_width), + h_off + float(sample_bbox_size / image_height)) + + return sampled_bbox + else: + return 0 + + +def jaccard_overlap(sample_bbox, object_bbox): + if sample_bbox.xmin >= object_bbox.xmax or \ + sample_bbox.xmax <= object_bbox.xmin or \ + sample_bbox.ymin >= object_bbox.ymax or \ + sample_bbox.ymax <= object_bbox.ymin: + return 0 + intersect_xmin = max(sample_bbox.xmin, object_bbox.xmin) + intersect_ymin = max(sample_bbox.ymin, object_bbox.ymin) + intersect_xmax = min(sample_bbox.xmax, object_bbox.xmax) + intersect_ymax = min(sample_bbox.ymax, object_bbox.ymax) + intersect_size = (intersect_xmax - intersect_xmin) * ( + intersect_ymax - intersect_ymin) + sample_bbox_size = bbox_area(sample_bbox) + object_bbox_size = bbox_area(object_bbox) + overlap = intersect_size / ( + sample_bbox_size + object_bbox_size - intersect_size) + return overlap + + +def satisfy_sample_constraint(sampler, sample_bbox, bbox_labels): + if sampler.min_jaccard_overlap == 0 and sampler.max_jaccard_overlap == 0: + has_jaccard_overlap = False + else: + has_jaccard_overlap = True + if sampler.min_object_coverage == 0 and sampler.max_object_coverage == 0: + has_object_coverage = False + else: + has_object_coverage = True + + if not has_jaccard_overlap and not has_object_coverage: + return True + found = False + for i in range(len(bbox_labels)): + object_bbox = bbox(bbox_labels[i][1], bbox_labels[i][2], + bbox_labels[i][3], bbox_labels[i][4]) + if has_jaccard_overlap: + overlap = jaccard_overlap(sample_bbox, object_bbox) + if sampler.min_jaccard_overlap != 0 and \ + overlap < sampler.min_jaccard_overlap: + continue + if sampler.max_jaccard_overlap != 0 and \ + overlap > sampler.max_jaccard_overlap: + continue + found = True + if has_object_coverage: + object_coverage = bbox_coverage(object_bbox, sample_bbox) + if sampler.min_object_coverage != 0 and \ + object_coverage < sampler.min_object_coverage: + continue + if sampler.max_object_coverage != 0 and \ + object_coverage > sampler.max_object_coverage: + continue + found = True + if found: + return True + return found + + +def crop_image_sampling(img, bbox_labels, sample_bbox, image_width, + image_height, resize_width, resize_height, + min_face_size): + # no clipping here + xmin = int(sample_bbox.xmin * image_width) + xmax = int(sample_bbox.xmax * image_width) + ymin = int(sample_bbox.ymin * image_height) + ymax = int(sample_bbox.ymax * image_height) + w_off = xmin + h_off = ymin + width = xmax - xmin + height = ymax - ymin + + cross_xmin = max(0.0, float(w_off)) + cross_ymin = max(0.0, float(h_off)) + cross_xmax = min(float(w_off + width - 1.0), float(image_width)) + cross_ymax = min(float(h_off + height - 1.0), float(image_height)) + cross_width = cross_xmax - cross_xmin + cross_height = cross_ymax - cross_ymin + + roi_xmin = 0 if w_off >= 0 else abs(w_off) + roi_ymin = 0 if h_off >= 0 else abs(h_off) + roi_width = cross_width + roi_height = cross_height + + roi_y1 = int(roi_ymin) + roi_y2 = int(roi_ymin + roi_height) + roi_x1 = int(roi_xmin) + roi_x2 = int(roi_xmin + roi_width) + + cross_y1 = int(cross_ymin) + cross_y2 = int(cross_ymin + cross_height) + cross_x1 = int(cross_xmin) + cross_x2 = int(cross_xmin + cross_width) + + sample_img = np.zeros((height, width, 3)) + # print(sample_img.shape) + sample_img[roi_y1 : roi_y2, roi_x1 : roi_x2] = \ + img[cross_y1: cross_y2, cross_x1: cross_x2] + sample_img = cv2.resize( + sample_img, (resize_width, resize_height), interpolation=cv2.INTER_AREA) + + resize_val = resize_width + sample_labels = transform_labels_sampling(bbox_labels, sample_bbox, + resize_val, min_face_size) + return sample_img, sample_labels + + +def transform_labels_sampling(bbox_labels, sample_bbox, resize_val, + min_face_size): + sample_labels = [] + for i in range(len(bbox_labels)): + sample_label = [] + object_bbox = bbox(bbox_labels[i][1], bbox_labels[i][2], + bbox_labels[i][3], bbox_labels[i][4]) + if not meet_emit_constraint(object_bbox, sample_bbox): + continue + proj_bbox = project_bbox(object_bbox, sample_bbox) + if proj_bbox: + real_width = float((proj_bbox.xmax - proj_bbox.xmin) * resize_val) + real_height = float((proj_bbox.ymax - proj_bbox.ymin) * resize_val) + if real_width * real_height < float(min_face_size * min_face_size): + continue + else: + sample_label.append(bbox_labels[i][0]) + sample_label.append(float(proj_bbox.xmin)) + sample_label.append(float(proj_bbox.ymin)) + sample_label.append(float(proj_bbox.xmax)) + sample_label.append(float(proj_bbox.ymax)) + sample_label = sample_label + bbox_labels[i][5:] + sample_labels.append(sample_label) + + return sample_labels + + +def generate_sample(sampler, image_width, image_height): + scale = np.random.uniform(sampler.min_scale, sampler.max_scale) + aspect_ratio = np.random.uniform(sampler.min_aspect_ratio, + sampler.max_aspect_ratio) + aspect_ratio = max(aspect_ratio, (scale**2.0)) + aspect_ratio = min(aspect_ratio, 1 / (scale**2.0)) + + bbox_width = scale * (aspect_ratio**0.5) + bbox_height = scale / (aspect_ratio**0.5) + + # guarantee a squared image patch after cropping + if sampler.use_square: + if image_height < image_width: + bbox_width = bbox_height * image_height / image_width + else: + bbox_height = bbox_width * image_width / image_height + + xmin_bound = 1 - bbox_width + ymin_bound = 1 - bbox_height + xmin = np.random.uniform(0, xmin_bound) + ymin = np.random.uniform(0, ymin_bound) + xmax = xmin + bbox_width + ymax = ymin + bbox_height + sampled_bbox = bbox(xmin, ymin, xmax, ymax) + return sampled_bbox + + +def generate_batch_samples(batch_sampler, bbox_labels, image_width, + image_height): + sampled_bbox = [] + for sampler in batch_sampler: + found = 0 + for i in range(sampler.max_trial): + if found >= sampler.max_sample: + break + sample_bbox = generate_sample(sampler, image_width, image_height) + if satisfy_sample_constraint(sampler, sample_bbox, bbox_labels): + sampled_bbox.append(sample_bbox) + found = found + 1 + return sampled_bbox + + +def crop_image(img, bbox_labels, sample_bbox, image_width, image_height, + resize_width, resize_height, min_face_size): + sample_bbox = clip_bbox(sample_bbox) + xmin = int(sample_bbox.xmin * image_width) + xmax = int(sample_bbox.xmax * image_width) + ymin = int(sample_bbox.ymin * image_height) + ymax = int(sample_bbox.ymax * image_height) + + sample_img = img[ymin:ymax, xmin:xmax] + resize_val = resize_width + sample_labels = transform_labels_sampling(bbox_labels, sample_bbox, + resize_val, min_face_size) + return sample_img, sample_labels + + +def to_chw_bgr(image): + """ + Transpose image from HWC to CHW and from RBG to BGR. + Args: + image (np.array): an image with HWC and RBG layout. + """ + # HWC to CHW + if len(image.shape) == 3: + image = np.swapaxes(image, 1, 2) + image = np.swapaxes(image, 1, 0) + # RBG to BGR + image = image[[2, 1, 0], :, :] + return image + + +def anchor_crop_image_sampling(img, + bbox_labels, + scale_array, + img_width, + img_height): + mean = np.array([104, 117, 123], dtype=np.float32) + maxSize = 12000 # max size + infDistance = 9999999 + bbox_labels = np.array(bbox_labels) + scale = np.array([img_width, img_height, img_width, img_height]) + + boxes = bbox_labels[:, 1:5] * scale + labels = bbox_labels[:, 0] + + boxArea = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) + # argsort = np.argsort(boxArea) + # rand_idx = random.randint(min(len(argsort),6)) + # print('rand idx',rand_idx) + rand_idx = np.random.randint(len(boxArea)) + rand_Side = boxArea[rand_idx] ** 0.5 + # rand_Side = min(boxes[rand_idx,2] - boxes[rand_idx,0] + 1, + # boxes[rand_idx,3] - boxes[rand_idx,1] + 1) + + distance = infDistance + anchor_idx = 5 + for i, anchor in enumerate(scale_array): + if abs(anchor - rand_Side) < distance: + distance = abs(anchor - rand_Side) + anchor_idx = i + + target_anchor = random.choice(scale_array[0:min(anchor_idx + 1, 5) + 1]) + ratio = float(target_anchor) / rand_Side + ratio = ratio * (2**random.uniform(-1, 1)) + + if int(img_height * ratio * img_width * ratio) > maxSize * maxSize: + ratio = (maxSize * maxSize / (img_height * img_width))**0.5 + + interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, + cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4] + interp_method = random.choice(interp_methods) + image = cv2.resize(img, None, None, fx=ratio, + fy=ratio, interpolation=interp_method) + + boxes[:, 0] *= ratio + boxes[:, 1] *= ratio + boxes[:, 2] *= ratio + boxes[:, 3] *= ratio + + height, width, _ = image.shape + + sample_boxes = [] + + xmin = boxes[rand_idx, 0] + ymin = boxes[rand_idx, 1] + bw = (boxes[rand_idx, 2] - boxes[rand_idx, 0] + 1) + bh = (boxes[rand_idx, 3] - boxes[rand_idx, 1] + 1) + + w = h = 640 + + for _ in range(50): + if w < max(height, width): + if bw <= w: + w_off = random.uniform(xmin + bw - w, xmin) + else: + w_off = random.uniform(xmin, xmin + bw - w) + + if bh <= h: + h_off = random.uniform(ymin + bh - h, ymin) + else: + h_off = random.uniform(ymin, ymin + bh - h) + else: + w_off = random.uniform(width - w, 0) + h_off = random.uniform(height - h, 0) + + w_off = math.floor(w_off) + h_off = math.floor(h_off) + + # convert to integer rect x1,y1,x2,y2 + rect = np.array( + [int(w_off), int(h_off), int(w_off + w), int(h_off + h)]) + + # keep overlap with gt box IF center in sampled patch + centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 + # mask in all gt boxes that above and to the left of centers + m1 = (rect[0] <= boxes[:, 0]) * (rect[1] <= boxes[:, 1]) + # mask in all gt boxes that under and to the right of centers + m2 = (rect[2] >= boxes[:, 2]) * (rect[3] >= boxes[:, 3]) + # mask in that both m1 and m2 are true + mask = m1 * m2 + + overlap = jaccard_numpy(boxes, rect) + # have any valid boxes? try again if not + if not mask.any() and not overlap.max() > 0.7: + continue + else: + sample_boxes.append(rect) + + sampled_labels = [] + + if len(sample_boxes) > 0: + choice_idx = np.random.randint(len(sample_boxes)) + choice_box = sample_boxes[choice_idx] + # print('crop the box :',choice_box) + centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 + m1 = (choice_box[0] < centers[:, 0]) * \ + (choice_box[1] < centers[:, 1]) + m2 = (choice_box[2] > centers[:, 0]) * \ + (choice_box[3] > centers[:, 1]) + mask = m1 * m2 + current_boxes = boxes[mask, :].copy() + current_labels = labels[mask] + current_boxes[:, :2] -= choice_box[:2] + current_boxes[:, 2:] -= choice_box[:2] + + if choice_box[0] < 0 or choice_box[1] < 0: + new_img_width = width if choice_box[ + 0] >= 0 else width - choice_box[0] + new_img_height = height if choice_box[ + 1] >= 0 else height - choice_box[1] + image_pad = np.zeros( + (new_img_height, new_img_width, 3), dtype=float) + image_pad[:, :, :] = mean + start_left = 0 if choice_box[0] >= 0 else -choice_box[0] + start_top = 0 if choice_box[1] >= 0 else -choice_box[1] + image_pad[start_top:, start_left:, :] = image + + choice_box_w = choice_box[2] - choice_box[0] + choice_box_h = choice_box[3] - choice_box[1] + + start_left = choice_box[0] if choice_box[0] >= 0 else 0 + start_top = choice_box[1] if choice_box[1] >= 0 else 0 + end_right = start_left + choice_box_w + end_bottom = start_top + choice_box_h + current_image = image_pad[ + start_top:end_bottom, start_left:end_right, :].copy() + image_height, image_width, _ = current_image.shape + if cfg.filter_min_face: + bbox_w = current_boxes[:, 2] - current_boxes[:, 0] + bbox_h = current_boxes[:, 3] - current_boxes[:, 1] + bbox_area = bbox_w * bbox_h + mask = bbox_area > (cfg.min_face_size * cfg.min_face_size) + current_boxes = current_boxes[mask] + current_labels = current_labels[mask] + for i in range(len(current_boxes)): + sample_label = [] + sample_label.append(current_labels[i]) + sample_label.append(current_boxes[i][0] / image_width) + sample_label.append(current_boxes[i][1] / image_height) + sample_label.append(current_boxes[i][2] / image_width) + sample_label.append(current_boxes[i][3] / image_height) + sampled_labels += [sample_label] + sampled_labels = np.array(sampled_labels) + else: + current_boxes /= np.array([image_width, + image_height, image_width, image_height]) + sampled_labels = np.hstack( + (current_labels[:, np.newaxis], current_boxes)) + + return current_image, sampled_labels + + current_image = image[choice_box[1]:choice_box[ + 3], choice_box[0]:choice_box[2], :].copy() + image_height, image_width, _ = current_image.shape + + if cfg.filter_min_face: + bbox_w = current_boxes[:, 2] - current_boxes[:, 0] + bbox_h = current_boxes[:, 3] - current_boxes[:, 1] + bbox_area = bbox_w * bbox_h + mask = bbox_area > (cfg.min_face_size * cfg.min_face_size) + current_boxes = current_boxes[mask] + current_labels = current_labels[mask] + for i in range(len(current_boxes)): + sample_label = [] + sample_label.append(current_labels[i]) + sample_label.append(current_boxes[i][0] / image_width) + sample_label.append(current_boxes[i][1] / image_height) + sample_label.append(current_boxes[i][2] / image_width) + sample_label.append(current_boxes[i][3] / image_height) + sampled_labels += [sample_label] + sampled_labels = np.array(sampled_labels) + else: + current_boxes /= np.array([image_width, + image_height, image_width, image_height]) + sampled_labels = np.hstack( + (current_labels[:, np.newaxis], current_boxes)) + + return current_image, sampled_labels + else: + image_height, image_width, _ = image.shape + if cfg.filter_min_face: + bbox_w = boxes[:, 2] - boxes[:, 0] + bbox_h = boxes[:, 3] - boxes[:, 1] + bbox_area = bbox_w * bbox_h + mask = bbox_area > (cfg.min_face_size * cfg.min_face_size) + boxes = boxes[mask] + labels = labels[mask] + for i in range(len(boxes)): + sample_label = [] + sample_label.append(labels[i]) + sample_label.append(boxes[i][0] / image_width) + sample_label.append(boxes[i][1] / image_height) + sample_label.append(boxes[i][2] / image_width) + sample_label.append(boxes[i][3] / image_height) + sampled_labels += [sample_label] + sampled_labels = np.array(sampled_labels) + else: + boxes /= np.array([image_width, image_height, + image_width, image_height]) + sampled_labels = np.hstack( + (labels[:, np.newaxis], boxes)) + + return image, sampled_labels + + +def preprocess(img, bbox_labels, mode, image_path): + img_width, img_height = img.size + sampled_labels = bbox_labels + if mode == 'train': + if cfg.apply_distort: + img = distort_image(img) + if cfg.apply_expand: + img, bbox_labels, img_width, img_height = expand_image( + img, bbox_labels, img_width, img_height) + + batch_sampler = [] + prob = np.random.uniform(0., 1.) + if prob > cfg.data_anchor_sampling_prob and cfg.anchor_sampling: + scale_array = np.array([16, 32, 64, 128, 256, 512]) + ''' + batch_sampler.append( + sampler(1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.6, 0.0, True)) + sampled_bbox = generate_batch_random_samples( + batch_sampler, bbox_labels, img_width, img_height, scale_array, + cfg.resize_width, cfg.resize_height) + ''' + img = np.array(img) + img, sampled_labels = anchor_crop_image_sampling( + img, bbox_labels, scale_array, img_width, img_height) + ''' + if len(sampled_bbox) > 0: + idx = int(np.random.uniform(0, len(sampled_bbox))) + img, sampled_labels = crop_image_sampling( + img, bbox_labels, sampled_bbox[idx], img_width, img_height, + cfg.resize_width, cfg.resize_height, cfg.min_face_size) + ''' + img = img.astype('uint8') + img = Image.fromarray(img) + else: + batch_sampler.append(sampler(1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, + 0.0, True)) + batch_sampler.append(sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, + 0.0, True)) + batch_sampler.append(sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, + 0.0, True)) + batch_sampler.append(sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, + 0.0, True)) + batch_sampler.append(sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, + 0.0, True)) + sampled_bbox = generate_batch_samples( + batch_sampler, bbox_labels, img_width, img_height) + + img = np.array(img) + if len(sampled_bbox) > 0: + idx = int(np.random.uniform(0, len(sampled_bbox))) + img, sampled_labels = crop_image( + img, bbox_labels, sampled_bbox[idx], img_width, img_height, + cfg.resize_width, cfg.resize_height, cfg.min_face_size) + + img = Image.fromarray(img) + + interp_mode = [ + Image.BILINEAR, Image.HAMMING, Image.NEAREST, Image.BICUBIC, + Image.LANCZOS + ] + interp_indx = np.random.randint(0, 5) + + img = img.resize((cfg.resize_width, cfg.resize_height), + resample=interp_mode[interp_indx]) + + img = np.array(img) + + if mode == 'train': + mirror = int(np.random.uniform(0, 2)) + if mirror == 1: + img = img[:, ::-1, :] + for i in six.moves.xrange(len(sampled_labels)): + tmp = sampled_labels[i][1] + sampled_labels[i][1] = 1 - sampled_labels[i][3] + sampled_labels[i][3] = 1 - tmp + + #img = Image.fromarray(img) + img = to_chw_bgr(img) + img = img.astype('float32') + img -= cfg.img_mean + img = img[[2, 1, 0], :, :] # to RGB + #img = img * cfg.scale + + return img, sampled_labels diff --git a/PyTorch/contrib/cv/detection/RefineDet/README.md b/PyTorch/contrib/cv/detection/RefineDet/README.md index 9acc3e968d317ce8d1f20180ced4ce247e722926..b3fba97b024ea603c5b983721db33801ea333e36 100644 --- a/PyTorch/contrib/cv/detection/RefineDet/README.md +++ b/PyTorch/contrib/cv/detection/RefineDet/README.md @@ -1,49 +1,49 @@ -#RefineDet模型PyTorch 训练指导 - -## 1 环境准备 - -1.安装必要的依赖 - -pip install -r requirements.txt - -2.获取数据集 - -``` -sh data/scripts/VOC2007.sh -sh data/scripts/VOC2012.sh -``` -下载好的数据集位于 ./data/VOCdevkit - - -## 2 训练 - -路径要写到 VOCdevkit - -``` -# npu env -source test/env_npu.sh - -# 1p train perf -bash test/train_performance_1p.sh --data_path=xxx - -# 路径要写到 VOCdevkit -# 例如 - -bash test/train_performance_1p.sh --data_path=./data/VOCdevkit - -# 8p train perf -bash test/train_performance_8p.sh --data_path=xxx - -# 8p train full -bash test/train_full_8p.sh --data_path=xxx - -# 8p eval -bash test/train_eval_8p.sh --data_path=xxx - -# finetuning -bash test/train_finetune_1p.sh --data_path=xxx - -# online inference demo -python3.7 demo.py - +#RefineDet模型PyTorch 训练指导 + +## 1 环境准备 + +1.安装必要的依赖 + +pip install -r requirements.txt + +2.获取数据集 + +``` +sh data/scripts/VOC2007.sh +sh data/scripts/VOC2012.sh +``` +下载好的数据集位于 ./data/VOCdevkit + + +## 2 训练 + +路径要写到 VOCdevkit + +``` +# npu env +source test/env_npu.sh + +# 1p train perf +bash test/train_performance_1p.sh --data_path=xxx + +# 路径要写到 VOCdevkit +# 例如 + +bash test/train_performance_1p.sh --data_path=./data/VOCdevkit + +# 8p train perf +bash test/train_performance_8p.sh --data_path=xxx + +# 8p train full +bash test/train_full_8p.sh --data_path=xxx + +# 8p eval +bash test/train_eval_8p.sh --data_path=xxx + +# finetuning +bash test/train_finetune_1p.sh --data_path=xxx + +# online inference demo +python3.7 demo.py + ``` \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/RefineDet/demo.py b/PyTorch/contrib/cv/detection/RefineDet/demo.py index e1d12a5a1a6f2c3f91aec02b4b3d10c0f849d449..bce020fc3693c9c620b9083be0beea62f9a74aed 100644 --- a/PyTorch/contrib/cv/detection/RefineDet/demo.py +++ b/PyTorch/contrib/cv/detection/RefineDet/demo.py @@ -1,22 +1,22 @@ -#!/bin/bash -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from models.refinedet import build_refinedet -torch.npu.set_device('npu:0') -net = build_refinedet('test', 320, 21, batch_norm=True).npu() -input = torch.randn(size=(1, 3, 320, 320)).npu() -out = net(input) +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from models.refinedet import build_refinedet +torch.npu.set_device('npu:0') +net = build_refinedet('test', 320, 21, batch_norm=True).npu() +input = torch.randn(size=(1, 3, 320, 320)).npu() +out = net(input) print(out) \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/RefineDet/eval_refinedet.py b/PyTorch/contrib/cv/detection/RefineDet/eval_refinedet.py index 42bb8f17b29f5c65f937f722bf5421d0354e59ad..e960f367b07b4762eacc80e41d618a38a88d48d1 100644 --- a/PyTorch/contrib/cv/detection/RefineDet/eval_refinedet.py +++ b/PyTorch/contrib/cv/detection/RefineDet/eval_refinedet.py @@ -1,466 +1,466 @@ -#!/bin/bash -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Adapted from: - @longcw faster_rcnn_pytorch: https://github.com/longcw/faster_rcnn_pytorch - @rbgirshick py-faster-rcnn https://github.com/rbgirshick/py-faster-rcnn - Licensed under The MIT License [see LICENSE for details] -""" - -from __future__ import print_function -import torch -import torch.nn as nn -import torch.backends.cudnn as cudnn -from torch.autograd import Variable -from data import VOCAnnotationTransform, VOCDetection, BaseTransform -from data import VOC_CLASSES as labelmap -from data import voc_refinedet, detection_collate_test -import torch.utils.data as data - -from models.refinedet import build_refinedet - -import sys -import os -import time -import argparse -import numpy as np -import pickle -import cv2 -from apex import amp - -cfg = voc_refinedet['320'] - -if sys.version_info[0] == 2: - import xml.etree.cElementTree as ET -else: - import xml.etree.ElementTree as ET - -def str2bool(v): - return v.lower() in ("yes", "true", "t", "1") - -class Timer(object): - """A simple timer.""" - def __init__(self): - self.total_time = 0. - self.calls = 0 - self.start_time = 0. - self.diff = 0. - self.average_time = 0. - - def tic(self): - self.start_time = time.time() - - def toc(self, average=True): - self.diff = time.time() - self.start_time - self.total_time += self.diff - self.calls += 1 - self.average_time = self.total_time / self.calls - if average: - return self.average_time - else: - return self.diff - - -def parse_rec(filename): - """ Parse a PASCAL VOC xml file """ - tree = ET.parse(filename) - objects = [] - for obj in tree.findall('object'): - obj_struct = {} - obj_struct['name'] = obj.find('name').text - obj_struct['pose'] = obj.find('pose').text - obj_struct['truncated'] = int(obj.find('truncated').text) - obj_struct['difficult'] = int(obj.find('difficult').text) - bbox = obj.find('bndbox') - obj_struct['bbox'] = [int(bbox.find('xmin').text) - 1, - int(bbox.find('ymin').text) - 1, - int(bbox.find('xmax').text) - 1, - int(bbox.find('ymax').text) - 1] - objects.append(obj_struct) - - return objects - - -def get_output_dir(name, phase): - """Return the directory where experimental artifacts are placed. - If the directory does not exist, it is created. - A canonical path is built using the name from an imdb and a network - (if not None). - """ - filedir = os.path.join(name, phase) - if not os.path.exists(filedir): - os.makedirs(filedir) - return filedir - - -def get_voc_results_file_template(image_set, cls): - filename = 'det_' + image_set + '_%s.txt' % (cls) - filedir = os.path.join(devkit_path, 'results') - if not os.path.exists(filedir): - os.makedirs(filedir) - path = os.path.join(filedir, filename) - return path - - -def write_voc_results_file(all_boxes, dataset, set_type='test'): - for cls_ind, cls in enumerate(labelmap): - print('Writing {:s} VOC results file'.format(cls)) - filename = get_voc_results_file_template(set_type, cls) - with open(filename, 'wt') as f: - for im_ind, index in enumerate(dataset.ids): - dets = all_boxes[cls_ind+1][im_ind] - if dets == []: - continue - # the VOCdevkit expects 1-based indices - for k in range(dets.shape[0]): - f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. - format(index[1], dets[k, -1], - dets[k, 0] + 1, dets[k, 1] + 1, - dets[k, 2] + 1, dets[k, 3] + 1)) - - -def do_python_eval(output_dir='output', use_07=True, set_type='test'): - cachedir = os.path.join(devkit_path, 'annotations_cache') - aps = [] - # The PASCAL VOC metric changed in 2010 - use_07_metric = use_07 - print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No')) - if not os.path.isdir(output_dir): - os.mkdir(output_dir) - for i, cls in enumerate(labelmap): - filename = get_voc_results_file_template(set_type, cls) - rec, prec, ap = voc_eval( - filename, annopath, imgsetpath.format(set_type), cls, cachedir, - ovthresh=0.5, use_07_metric=use_07_metric) - aps += [ap] - print('AP for {} = {:.4f}'.format(cls, ap)) - with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f: - pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f) - mAp = np.mean(aps) - print('Mean AP on ' + set_type + ' set = {:.4f}'.format(mAp)) - return mAp.item() - - -def voc_ap(rec, prec, use_07_metric=True): - """ ap = voc_ap(rec, prec, [use_07_metric]) - Compute VOC AP given precision and recall. - If use_07_metric is true, uses the - VOC 07 11 point method (default:True). - """ - if use_07_metric: - # 11 point metric - ap = 0. - for t in np.arange(0., 1.1, 0.1): - if np.sum(rec >= t) == 0: - p = 0 - else: - p = np.max(prec[rec >= t]) - ap = ap + p / 11. - else: - # correct AP calculation - # first append sentinel values at the end - mrec = np.concatenate(([0.], rec, [1.])) - mpre = np.concatenate(([0.], prec, [0.])) - - # compute the precision envelope - for i in range(mpre.size - 1, 0, -1): - mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) - - # to calculate area under PR curve, look for points - # where X axis (recall) changes value - i = np.where(mrec[1:] != mrec[:-1])[0] - - # and sum (\Delta recall) * prec - ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) - return ap - - -def voc_eval(detpath, - annopath, - imagesetfile, - classname, - cachedir, - ovthresh=0.5, - use_07_metric=True): - """rec, prec, ap = voc_eval(detpath, - annopath, - imagesetfile, - classname, - [ovthresh], - [use_07_metric]) -Top level function that does the PASCAL VOC evaluation. -detpath: Path to detections - detpath.format(classname) should produce the detection results file. -annopath: Path to annotations - annopath.format(imagename) should be the xml annotations file. -imagesetfile: Text file containing the list of images, one image per line. -classname: Category name (duh) -cachedir: Directory for caching the annotations -[ovthresh]: Overlap threshold (default = 0.5) -[use_07_metric]: Whether to use VOC07's 11 point AP computation - (default True) -""" -# assumes detections are in detpath.format(classname) -# assumes annotations are in annopath.format(imagename) -# assumes imagesetfile is a text file with each line an image name -# cachedir caches the annotations in a pickle file -# first load gt - if not os.path.isdir(cachedir): - os.mkdir(cachedir) - cachefile = os.path.join(cachedir, 'annots.pkl') - # read list of images - with open(imagesetfile, 'r') as f: - lines = f.readlines() - imagenames = [x.strip() for x in lines] - if not os.path.isfile(cachefile): - # load annots - recs = {} - for i, imagename in enumerate(imagenames): - recs[imagename] = parse_rec(annopath % (imagename)) - if i % 100 == 0: - print('Reading annotation for {:d}/{:d}'.format( - i + 1, len(imagenames))) - # save - print('Saving cached annotations to {:s}'.format(cachefile)) - with open(cachefile, 'wb') as f: - pickle.dump(recs, f) - else: - # load - with open(cachefile, 'rb') as f: - recs = pickle.load(f) - - # extract gt objects for this class - class_recs = {} - npos = 0 - for imagename in imagenames: - R = [obj for obj in recs[imagename] if obj['name'] == classname] - bbox = np.array([x['bbox'] for x in R]) - difficult = np.array([x['difficult'] for x in R]).astype(np.bool) - det = [False] * len(R) - npos = npos + sum(~difficult) - class_recs[imagename] = {'bbox': bbox, - 'difficult': difficult, - 'det': det} - - # read dets - detfile = detpath.format(classname) - with open(detfile, 'r') as f: - lines = f.readlines() - if any(lines) == 1: - - splitlines = [x.strip().split(' ') for x in lines] - image_ids = [x[0] for x in splitlines] - confidence = np.array([float(x[1]) for x in splitlines]) - BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) - - # sort by confidence - sorted_ind = np.argsort(-confidence) - sorted_scores = np.sort(-confidence) - BB = BB[sorted_ind, :] - image_ids = [image_ids[x] for x in sorted_ind] - - # go down dets and mark TPs and FPs - nd = len(image_ids) - tp = np.zeros(nd) - fp = np.zeros(nd) - for d in range(nd): - R = class_recs[image_ids[d]] - bb = BB[d, :].astype(float) - ovmax = -np.inf - BBGT = R['bbox'].astype(float) - if BBGT.size > 0: - # compute overlaps - # intersection - ixmin = np.maximum(BBGT[:, 0], bb[0]) - iymin = np.maximum(BBGT[:, 1], bb[1]) - ixmax = np.minimum(BBGT[:, 2], bb[2]) - iymax = np.minimum(BBGT[:, 3], bb[3]) - iw = np.maximum(ixmax - ixmin, 0.) - ih = np.maximum(iymax - iymin, 0.) - inters = iw * ih - uni = ((bb[2] - bb[0]) * (bb[3] - bb[1]) + - (BBGT[:, 2] - BBGT[:, 0]) * - (BBGT[:, 3] - BBGT[:, 1]) - inters) - overlaps = inters / uni - ovmax = np.max(overlaps) - jmax = np.argmax(overlaps) - - if ovmax > ovthresh: - if not R['difficult'][jmax]: - if not R['det'][jmax]: - tp[d] = 1. - R['det'][jmax] = 1 - else: - fp[d] = 1. - else: - fp[d] = 1. - - # compute precision recall - fp = np.cumsum(fp) - tp = np.cumsum(tp) - rec = tp / float(npos) - # avoid divide by zero in case the first detection matches a difficult - # ground truth - prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) - ap = voc_ap(rec, prec, use_07_metric) - else: - rec = -1. - prec = -1. - ap = -1. - - return rec, prec, ap - - -def test_net(save_folder, net, cuda, dataset, dataloador, transform, top_k, - im_size=300, thresh=0.05, set_type='test'): - num_images = len(dataset) - # all detections are collected into: - # all_boxes[cls][image] = N x 5 array of detections in - # (x1, y1, x2, y2, score) - all_boxes = [[[] for _ in range(num_images)] - for _ in range(len(labelmap)+1)] - - # timers - _t = {'im_detect': Timer(), 'misc': Timer()} - output_dir = get_output_dir('ssd300_120000', set_type) - det_file = os.path.join(output_dir, 'detections.pkl') - - detection_list, h_list, w_list = [], [], [] - for i, item in enumerate(dataloador): - # im, gt, h, w = dataset.pull_item(i) - # print(im.shape,h,w) - - x, _, h, w = item - bs, _, _, _ = x.size() - # print(x.size()) - # x = Variable(im.unsqueeze(0)) - if cfg['cuda']: - x = x.cuda() - elif cfg['npu']: - x = x.npu() - _t['im_detect'].tic() - detections = net(x).data - detect_time = _t['im_detect'].toc(average=False) - detection_list.append(detections.cpu()) - h_list.extend(h) - w_list.extend(w) - print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, - len(dataloador), detect_time)) - # skip j = 0, because it's the background class - strat_time = time.time() - detections = torch.cat(detection_list, dim=0) - for idx in range(detections.size(0)): - h, w = h_list[idx], w_list[idx] - for j in range(1, detections.size(1)): - #dets = detections[0, j, :] - dets = detections[idx, j, :] - mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t() - dets = torch.masked_select(dets, mask).view(-1, 5) - if dets.size(0) == 0: - continue - boxes = dets[:, 1:] - boxes[:, 0] *= w - boxes[:, 2] *= w - boxes[:, 1] *= h - boxes[:, 3] *= h - scores = dets[:, 0].cpu().numpy() - cls_dets = np.hstack((boxes.cpu().numpy(), - scores[:, np.newaxis])).astype(np.float32, - copy=False) - # all_boxes[j][i] = cls_dets - all_boxes[j][idx] = cls_dets - end_time = time.time() - print('spend time: %.3fs'%(end_time-strat_time)) - - with open(det_file, 'wb') as f: - pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) - - print('Evaluating detections') - mAp = evaluate_detections(all_boxes, output_dir, dataset, set_type=set_type) - return mAp - - -def evaluate_detections(box_list, output_dir, dataset, set_type='test'): - write_voc_results_file(box_list, dataset, set_type=set_type) - mAp = do_python_eval(output_dir, set_type=set_type) - return mAp - - - - -if __name__ == '__main__': - pth_path, data_path = sys.argv[1:3] - if not os.path.exists(cfg['save_folder']): - os.makedirs(cfg['save_folder']) - - if torch.cuda.is_available(): - if cfg['cuda']: - torch.set_default_tensor_type('torch.cuda.FloatTensor') - if not cfg['cuda']: - print("WARNING: It looks like you have a CUDA device, but aren't using \ - CUDA. Run with --cuda for optimal eval speed.") - torch.set_default_tensor_type('torch.FloatTensor') - else: - torch.set_default_tensor_type('torch.FloatTensor') - - annopath = os.path.join(data_path, 'VOC2007', 'Annotations', '%s.xml') - imgpath = os.path.join(data_path, 'VOC2007', 'JPEGImages', '%s.jpg') - imgsetpath = os.path.join(data_path, 'VOC2007', 'ImageSets', - 'Main', '{:s}.txt') - YEAR = '2007' - devkit_path = data_path + 'VOC' + YEAR - dataset_mean = (104, 117, 123) - # load net - num_classes = len(labelmap) + 1 # +1 for background - net = build_refinedet('test', int(cfg['input_size']), num_classes, batch_norm=True) # initialize SSD - - - - # load data - set_type = 'test' - dataset = VOCDetection(root=data_path, - image_sets=[('2007', set_type)], - transform=BaseTransform(int(cfg['input_size']), dataset_mean), - target_transform=VOCAnnotationTransform(), - dataset_name='VOC07test') - - if cfg['cuda']: - net = net.cuda() - cudnn.benchmark = True - elif cfg['npu']: - net = net.npu() - cudnn.benchmark = True - - if cfg['amp']: - net = amp.initialize(net, opt_level='O1', loss_scale=128) - - - net.eval() - data_loader = data.DataLoader(dataset, - batch_size=128, - num_workers=16, - shuffle=False, - collate_fn=detection_collate_test, - pin_memory=True) - save_path = './RefineDet320_bn/RefineDet320_VOC_231.pth' - save_path = pth_path - net.load_state_dict(torch.load(save_path, map_location='cpu')) - print('Finished loading model! ' + save_path) - - # evaluation - with torch.no_grad(): - mAp = test_net(cfg['save_folder'], net, cfg['cuda'], dataset, data_loader, - BaseTransform(int(cfg['input_size']), dataset_mean), cfg['top_k'], int(cfg['input_size']), - thresh=cfg['confidence_threshold']) +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Adapted from: + @longcw faster_rcnn_pytorch: https://github.com/longcw/faster_rcnn_pytorch + @rbgirshick py-faster-rcnn https://github.com/rbgirshick/py-faster-rcnn + Licensed under The MIT License [see LICENSE for details] +""" + +from __future__ import print_function +import torch +import torch.nn as nn +import torch.backends.cudnn as cudnn +from torch.autograd import Variable +from data import VOCAnnotationTransform, VOCDetection, BaseTransform +from data import VOC_CLASSES as labelmap +from data import voc_refinedet, detection_collate_test +import torch.utils.data as data + +from models.refinedet import build_refinedet + +import sys +import os +import time +import argparse +import numpy as np +import pickle +import cv2 +from apex import amp + +cfg = voc_refinedet['320'] + +if sys.version_info[0] == 2: + import xml.etree.cElementTree as ET +else: + import xml.etree.ElementTree as ET + +def str2bool(v): + return v.lower() in ("yes", "true", "t", "1") + +class Timer(object): + """A simple timer.""" + def __init__(self): + self.total_time = 0. + self.calls = 0 + self.start_time = 0. + self.diff = 0. + self.average_time = 0. + + def tic(self): + self.start_time = time.time() + + def toc(self, average=True): + self.diff = time.time() - self.start_time + self.total_time += self.diff + self.calls += 1 + self.average_time = self.total_time / self.calls + if average: + return self.average_time + else: + return self.diff + + +def parse_rec(filename): + """ Parse a PASCAL VOC xml file """ + tree = ET.parse(filename) + objects = [] + for obj in tree.findall('object'): + obj_struct = {} + obj_struct['name'] = obj.find('name').text + obj_struct['pose'] = obj.find('pose').text + obj_struct['truncated'] = int(obj.find('truncated').text) + obj_struct['difficult'] = int(obj.find('difficult').text) + bbox = obj.find('bndbox') + obj_struct['bbox'] = [int(bbox.find('xmin').text) - 1, + int(bbox.find('ymin').text) - 1, + int(bbox.find('xmax').text) - 1, + int(bbox.find('ymax').text) - 1] + objects.append(obj_struct) + + return objects + + +def get_output_dir(name, phase): + """Return the directory where experimental artifacts are placed. + If the directory does not exist, it is created. + A canonical path is built using the name from an imdb and a network + (if not None). + """ + filedir = os.path.join(name, phase) + if not os.path.exists(filedir): + os.makedirs(filedir) + return filedir + + +def get_voc_results_file_template(image_set, cls): + filename = 'det_' + image_set + '_%s.txt' % (cls) + filedir = os.path.join(devkit_path, 'results') + if not os.path.exists(filedir): + os.makedirs(filedir) + path = os.path.join(filedir, filename) + return path + + +def write_voc_results_file(all_boxes, dataset, set_type='test'): + for cls_ind, cls in enumerate(labelmap): + print('Writing {:s} VOC results file'.format(cls)) + filename = get_voc_results_file_template(set_type, cls) + with open(filename, 'wt') as f: + for im_ind, index in enumerate(dataset.ids): + dets = all_boxes[cls_ind+1][im_ind] + if dets == []: + continue + # the VOCdevkit expects 1-based indices + for k in range(dets.shape[0]): + f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. + format(index[1], dets[k, -1], + dets[k, 0] + 1, dets[k, 1] + 1, + dets[k, 2] + 1, dets[k, 3] + 1)) + + +def do_python_eval(output_dir='output', use_07=True, set_type='test'): + cachedir = os.path.join(devkit_path, 'annotations_cache') + aps = [] + # The PASCAL VOC metric changed in 2010 + use_07_metric = use_07 + print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No')) + if not os.path.isdir(output_dir): + os.mkdir(output_dir) + for i, cls in enumerate(labelmap): + filename = get_voc_results_file_template(set_type, cls) + rec, prec, ap = voc_eval( + filename, annopath, imgsetpath.format(set_type), cls, cachedir, + ovthresh=0.5, use_07_metric=use_07_metric) + aps += [ap] + print('AP for {} = {:.4f}'.format(cls, ap)) + with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f: + pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f) + mAp = np.mean(aps) + print('Mean AP on ' + set_type + ' set = {:.4f}'.format(mAp)) + return mAp.item() + + +def voc_ap(rec, prec, use_07_metric=True): + """ ap = voc_ap(rec, prec, [use_07_metric]) + Compute VOC AP given precision and recall. + If use_07_metric is true, uses the + VOC 07 11 point method (default:True). + """ + if use_07_metric: + # 11 point metric + ap = 0. + for t in np.arange(0., 1.1, 0.1): + if np.sum(rec >= t) == 0: + p = 0 + else: + p = np.max(prec[rec >= t]) + ap = ap + p / 11. + else: + # correct AP calculation + # first append sentinel values at the end + mrec = np.concatenate(([0.], rec, [1.])) + mpre = np.concatenate(([0.], prec, [0.])) + + # compute the precision envelope + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (\Delta recall) * prec + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + + +def voc_eval(detpath, + annopath, + imagesetfile, + classname, + cachedir, + ovthresh=0.5, + use_07_metric=True): + """rec, prec, ap = voc_eval(detpath, + annopath, + imagesetfile, + classname, + [ovthresh], + [use_07_metric]) +Top level function that does the PASCAL VOC evaluation. +detpath: Path to detections + detpath.format(classname) should produce the detection results file. +annopath: Path to annotations + annopath.format(imagename) should be the xml annotations file. +imagesetfile: Text file containing the list of images, one image per line. +classname: Category name (duh) +cachedir: Directory for caching the annotations +[ovthresh]: Overlap threshold (default = 0.5) +[use_07_metric]: Whether to use VOC07's 11 point AP computation + (default True) +""" +# assumes detections are in detpath.format(classname) +# assumes annotations are in annopath.format(imagename) +# assumes imagesetfile is a text file with each line an image name +# cachedir caches the annotations in a pickle file +# first load gt + if not os.path.isdir(cachedir): + os.mkdir(cachedir) + cachefile = os.path.join(cachedir, 'annots.pkl') + # read list of images + with open(imagesetfile, 'r') as f: + lines = f.readlines() + imagenames = [x.strip() for x in lines] + if not os.path.isfile(cachefile): + # load annots + recs = {} + for i, imagename in enumerate(imagenames): + recs[imagename] = parse_rec(annopath % (imagename)) + if i % 100 == 0: + print('Reading annotation for {:d}/{:d}'.format( + i + 1, len(imagenames))) + # save + print('Saving cached annotations to {:s}'.format(cachefile)) + with open(cachefile, 'wb') as f: + pickle.dump(recs, f) + else: + # load + with open(cachefile, 'rb') as f: + recs = pickle.load(f) + + # extract gt objects for this class + class_recs = {} + npos = 0 + for imagename in imagenames: + R = [obj for obj in recs[imagename] if obj['name'] == classname] + bbox = np.array([x['bbox'] for x in R]) + difficult = np.array([x['difficult'] for x in R]).astype(np.bool) + det = [False] * len(R) + npos = npos + sum(~difficult) + class_recs[imagename] = {'bbox': bbox, + 'difficult': difficult, + 'det': det} + + # read dets + detfile = detpath.format(classname) + with open(detfile, 'r') as f: + lines = f.readlines() + if any(lines) == 1: + + splitlines = [x.strip().split(' ') for x in lines] + image_ids = [x[0] for x in splitlines] + confidence = np.array([float(x[1]) for x in splitlines]) + BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) + + # sort by confidence + sorted_ind = np.argsort(-confidence) + sorted_scores = np.sort(-confidence) + BB = BB[sorted_ind, :] + image_ids = [image_ids[x] for x in sorted_ind] + + # go down dets and mark TPs and FPs + nd = len(image_ids) + tp = np.zeros(nd) + fp = np.zeros(nd) + for d in range(nd): + R = class_recs[image_ids[d]] + bb = BB[d, :].astype(float) + ovmax = -np.inf + BBGT = R['bbox'].astype(float) + if BBGT.size > 0: + # compute overlaps + # intersection + ixmin = np.maximum(BBGT[:, 0], bb[0]) + iymin = np.maximum(BBGT[:, 1], bb[1]) + ixmax = np.minimum(BBGT[:, 2], bb[2]) + iymax = np.minimum(BBGT[:, 3], bb[3]) + iw = np.maximum(ixmax - ixmin, 0.) + ih = np.maximum(iymax - iymin, 0.) + inters = iw * ih + uni = ((bb[2] - bb[0]) * (bb[3] - bb[1]) + + (BBGT[:, 2] - BBGT[:, 0]) * + (BBGT[:, 3] - BBGT[:, 1]) - inters) + overlaps = inters / uni + ovmax = np.max(overlaps) + jmax = np.argmax(overlaps) + + if ovmax > ovthresh: + if not R['difficult'][jmax]: + if not R['det'][jmax]: + tp[d] = 1. + R['det'][jmax] = 1 + else: + fp[d] = 1. + else: + fp[d] = 1. + + # compute precision recall + fp = np.cumsum(fp) + tp = np.cumsum(tp) + rec = tp / float(npos) + # avoid divide by zero in case the first detection matches a difficult + # ground truth + prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) + ap = voc_ap(rec, prec, use_07_metric) + else: + rec = -1. + prec = -1. + ap = -1. + + return rec, prec, ap + + +def test_net(save_folder, net, cuda, dataset, dataloador, transform, top_k, + im_size=300, thresh=0.05, set_type='test'): + num_images = len(dataset) + # all detections are collected into: + # all_boxes[cls][image] = N x 5 array of detections in + # (x1, y1, x2, y2, score) + all_boxes = [[[] for _ in range(num_images)] + for _ in range(len(labelmap)+1)] + + # timers + _t = {'im_detect': Timer(), 'misc': Timer()} + output_dir = get_output_dir('ssd300_120000', set_type) + det_file = os.path.join(output_dir, 'detections.pkl') + + detection_list, h_list, w_list = [], [], [] + for i, item in enumerate(dataloador): + # im, gt, h, w = dataset.pull_item(i) + # print(im.shape,h,w) + + x, _, h, w = item + bs, _, _, _ = x.size() + # print(x.size()) + # x = Variable(im.unsqueeze(0)) + if cfg['cuda']: + x = x.cuda() + elif cfg['npu']: + x = x.npu() + _t['im_detect'].tic() + detections = net(x).data + detect_time = _t['im_detect'].toc(average=False) + detection_list.append(detections.cpu()) + h_list.extend(h) + w_list.extend(w) + print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, + len(dataloador), detect_time)) + # skip j = 0, because it's the background class + strat_time = time.time() + detections = torch.cat(detection_list, dim=0) + for idx in range(detections.size(0)): + h, w = h_list[idx], w_list[idx] + for j in range(1, detections.size(1)): + #dets = detections[0, j, :] + dets = detections[idx, j, :] + mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t() + dets = torch.masked_select(dets, mask).view(-1, 5) + if dets.size(0) == 0: + continue + boxes = dets[:, 1:] + boxes[:, 0] *= w + boxes[:, 2] *= w + boxes[:, 1] *= h + boxes[:, 3] *= h + scores = dets[:, 0].cpu().numpy() + cls_dets = np.hstack((boxes.cpu().numpy(), + scores[:, np.newaxis])).astype(np.float32, + copy=False) + # all_boxes[j][i] = cls_dets + all_boxes[j][idx] = cls_dets + end_time = time.time() + print('spend time: %.3fs'%(end_time-strat_time)) + + with open(det_file, 'wb') as f: + pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) + + print('Evaluating detections') + mAp = evaluate_detections(all_boxes, output_dir, dataset, set_type=set_type) + return mAp + + +def evaluate_detections(box_list, output_dir, dataset, set_type='test'): + write_voc_results_file(box_list, dataset, set_type=set_type) + mAp = do_python_eval(output_dir, set_type=set_type) + return mAp + + + + +if __name__ == '__main__': + pth_path, data_path = sys.argv[1:3] + if not os.path.exists(cfg['save_folder']): + os.makedirs(cfg['save_folder']) + + if torch.cuda.is_available(): + if cfg['cuda']: + torch.set_default_tensor_type('torch.cuda.FloatTensor') + if not cfg['cuda']: + print("WARNING: It looks like you have a CUDA device, but aren't using \ + CUDA. Run with --cuda for optimal eval speed.") + torch.set_default_tensor_type('torch.FloatTensor') + else: + torch.set_default_tensor_type('torch.FloatTensor') + + annopath = os.path.join(data_path, 'VOC2007', 'Annotations', '%s.xml') + imgpath = os.path.join(data_path, 'VOC2007', 'JPEGImages', '%s.jpg') + imgsetpath = os.path.join(data_path, 'VOC2007', 'ImageSets', + 'Main', '{:s}.txt') + YEAR = '2007' + devkit_path = data_path + 'VOC' + YEAR + dataset_mean = (104, 117, 123) + # load net + num_classes = len(labelmap) + 1 # +1 for background + net = build_refinedet('test', int(cfg['input_size']), num_classes, batch_norm=True) # initialize SSD + + + + # load data + set_type = 'test' + dataset = VOCDetection(root=data_path, + image_sets=[('2007', set_type)], + transform=BaseTransform(int(cfg['input_size']), dataset_mean), + target_transform=VOCAnnotationTransform(), + dataset_name='VOC07test') + + if cfg['cuda']: + net = net.cuda() + cudnn.benchmark = True + elif cfg['npu']: + net = net.npu() + cudnn.benchmark = True + + if cfg['amp']: + net = amp.initialize(net, opt_level='O1', loss_scale=128) + + + net.eval() + data_loader = data.DataLoader(dataset, + batch_size=128, + num_workers=16, + shuffle=False, + collate_fn=detection_collate_test, + pin_memory=True) + save_path = './RefineDet320_bn/RefineDet320_VOC_231.pth' + save_path = pth_path + net.load_state_dict(torch.load(save_path, map_location='cpu')) + print('Finished loading model! ' + save_path) + + # evaluation + with torch.no_grad(): + mAp = test_net(cfg['save_folder'], net, cfg['cuda'], dataset, data_loader, + BaseTransform(int(cfg['input_size']), dataset_mean), cfg['top_k'], int(cfg['input_size']), + thresh=cfg['confidence_threshold']) diff --git a/PyTorch/contrib/cv/detection/RefineDet/models/timeAcc.py b/PyTorch/contrib/cv/detection/RefineDet/models/timeAcc.py index 3c81c2329ccb89b0d6fde50d3565f1418bbc9c18..91507f0850c6a73de3ca62c277b70e79b35695bd 100644 --- a/PyTorch/contrib/cv/detection/RefineDet/models/timeAcc.py +++ b/PyTorch/contrib/cv/detection/RefineDet/models/timeAcc.py @@ -1,43 +1,43 @@ -#!/bin/bash -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self, name, fmt=':f', start_count_index=20): - self.name = name - self.fmt = fmt - self.reset() - self.start_count_index = start_count_index - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - if self.count == 0: - self.N = n - - self.val = val - self.count += n - if self.count > (self.start_count_index * self.N): - self.sum += val * n - self.avg = self.sum / (self.count - self.start_count_index * self.N) - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f', start_count_index=20): + self.name = name + self.fmt = fmt + self.reset() + self.start_count_index = start_count_index + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + if self.count == 0: + self.N = n + + self.val = val + self.count += n + if self.count > (self.start_count_index * self.N): + self.sum += val * n + self.avg = self.sum / (self.count - self.start_count_index * self.N) + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' return fmtstr.format(**self.__dict__) \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/RefineDet/modelzoo_level.txt b/PyTorch/contrib/cv/detection/RefineDet/modelzoo_level.txt index a17c8f95fa388fbc6d253e2cd7cfd0b73b734073..a829ab59b97a1022dd6fc33b59b7ae0d55009432 100644 --- a/PyTorch/contrib/cv/detection/RefineDet/modelzoo_level.txt +++ b/PyTorch/contrib/cv/detection/RefineDet/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/RefineDet/requirements.txt b/PyTorch/contrib/cv/detection/RefineDet/requirements.txt index 8b4a0d1903fad4657f9ca923c594109c5581dd24..9e1c0e66604f932c9a93457c02c0025ea8439c04 100644 --- a/PyTorch/contrib/cv/detection/RefineDet/requirements.txt +++ b/PyTorch/contrib/cv/detection/RefineDet/requirements.txt @@ -1,10 +1,10 @@ -torch==1.5.0+ascend.post3 -apex==0.1+ascend -future==0.18.2 -numpy==1.21.2 -opencv-python==4.5.3.56 -torchvision==0.2.2 -pillow==8.3.2 -six==1.16.0 -decorator==4.4.2 +torch==1.5.0+ascend.post3 +apex==0.1+ascend +future==0.18.2 +numpy==1.21.2 +opencv-python==4.5.3.56 +torchvision==0.2.2 +pillow==8.3.2 +six==1.16.0 +decorator==4.4.2 sympy==1.4 \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/RefineDet/train_1p.py b/PyTorch/contrib/cv/detection/RefineDet/train_1p.py index fcc73899b3e61e459ed09350bd6504a91ecf7dfb..3a72f5378558e7e011ee2296bf126a4bc06afb5f 100644 --- a/PyTorch/contrib/cv/detection/RefineDet/train_1p.py +++ b/PyTorch/contrib/cv/detection/RefineDet/train_1p.py @@ -1,284 +1,284 @@ -#!/bin/bash -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding=utf-8 -from models.timeAcc import AverageMeter -from data import VOC_CLASSES as labelmap -from data import VOCAnnotationTransform, VOCDetection, BaseTransform -from data import * -from utils.augmentations import SSDAugmentation -from layers.modules import RefineDetMultiBoxLoss -from models.refinedet import build_refinedet -from apex import amp -import apex -import os -import sys -import time -import torch -import torch.nn as nn -import torch.optim as optim -import torch.backends.cudnn as cudnn -import torch.nn.init as init -import torch.utils.data as data -import numpy as np -import argparse -from utils.logging import Logger -from torch.hub import load_state_dict_from_url -import torch.npu -CALCULATE_DEVICE = "npu" - -def str2bool(v): - return v.lower() in ("yes", "true", "t", "1") - -if sys.version_info[0] == 2: - import xml.etree.cElementTree as ET -else: - import xml.etree.ElementTree as ET -parser = argparse.ArgumentParser( - description='Single Shot MultiBox Detector Training With Pytorch') -train_set = parser.add_mutually_exclusive_group() -parser.add_argument('--dataset', default='VOC', choices=['VOC', 'COCO'], - type=str, help='VOC or COCO') -parser.add_argument('--input_size', default='320', choices=['320', '512'], - type=str, help='RefineDet320 or RefineDet512') -parser.add_argument('--dataset_root', default='/home/ljh/refinedet/data/VOCdevkit/', - help='Dataset root directory path') -parser.add_argument('--basenet', default='weights/vgg16_reducedfc.pth', - help='Pretrained base model') -parser.add_argument('--batch_size', default=32, type=int, - help='Batch size for training') -parser.add_argument('--resume', default=None, type=str, - help='Checkpoint state_dict file to resume training from') -parser.add_argument('--start_epoch', default=0, type=int, - help='Resume training at this epoch') -parser.add_argument('--num_epochs', default=232, type=int, - help='Total train epoch') -parser.add_argument('--num_workers', default=14, type=int, - help='Number of workers used in dataloading') -parser.add_argument('--cuda', default=False, type=str2bool, - help='Use CUDA to train model') -parser.add_argument('--npu', default=True, type=str2bool, - help='Use NPU to train model') -parser.add_argument('--lr', '--learning-rate', default=0.00095, type=float, - help='initial learning rate') -parser.add_argument('--momentum', default=0.9, type=float, - help='Momentum value for optim') -parser.add_argument('--weight_decay', default=5e-4, type=float, - help='Weight decay for SGD') -parser.add_argument('--gamma', default=0.1, type=float, - help='Gamma update for SGD') -parser.add_argument('--visdom', default=False, type=str2bool, - help='Use visdom for loss visualization') -parser.add_argument('--save_folder', default='weights/', - help='Directory for saving checkpoint models') -parser.add_argument('--local_rank', default=-1, type=int, - help='node rank for distributed training') -parser.add_argument('--finetune', default=None, type=str, - help='pretrained weight path') -parser.add_argument('--train_1p', default=False, type=str2bool, - help='node rank for distributed training') -parser.add_argument('--device_id', default=0, type=str, - help='device_id') -parser.add_argument('--amp', default=True, type=str2bool, - help='whether to use amp') -parser.add_argument('--num_classes', default=-1, type=int, - help='num classes') -args = parser.parse_args() - - -if torch.cuda.is_available(): - if args.cuda: - torch.set_default_tensor_type('torch.cuda.FloatTensor') - if not args.cuda: - print("WARNING: It looks like you have a CUDA device, but aren't " + - "using CUDA.\nRun with --cuda for optimal training speed.") - torch.set_default_tensor_type('torch.FloatTensor') -else: - torch.set_default_tensor_type('torch.FloatTensor') - -if not os.path.exists(args.save_folder): - os.mkdir(args.save_folder) - -sys.stdout = Logger(os.path.join(args.save_folder, 'log.txt')) - -def train(): - torch.npu.set_device('npu:' + str(args.device_id)) - - if args.dataset == 'VOC': - '''if args.dataset_root == COCO_ROOT: - parser.error('Must specify dataset if specifying dataset_root')''' - cfg = voc_refinedet[args.input_size] - dataset = VOCDetection(root=args.dataset_root, - transform=SSDAugmentation(cfg['min_dim'], MEANS)) # cfg['min_dim'] = 320 - - if args.finetune: - print('finetune numclass %d'%args.num_classes) - refinedet_net = build_refinedet('train', cfg['min_dim'], args.num_classes, batch_norm=True) - else: - refinedet_net = build_refinedet('train', cfg['min_dim'], cfg['num_classes'], batch_norm=True) - net = refinedet_net - if args.cuda: - net = net.cuda() - if args.npu: - net = net.npu() - if args.resume: - print('Resuming training, loading {}...'.format(args.resume)) - refinedet_net.load_weights(args.resume) - else: - print('Loading vgg...') - vgg_weights = load_state_dict_from_url('https://download.pytorch.org/models/vgg16_bn-6c64b313.pth', - progress=True) - from collections import OrderedDict - new_vgg_weights = OrderedDict() - for k, v in vgg_weights.items(): - fc, num, wb = k.split('.') - if fc == 'classifier': - continue - new_k = num + '.' + wb - new_vgg_weights[new_k] = v - refinedet_net.vgg.load_state_dict(new_vgg_weights, strict=False) - if not args.resume: - print('Initializing weights...') - refinedet_net.extras.apply(weights_init) - refinedet_net.arm_loc.apply(weights_init) - refinedet_net.arm_conf.apply(weights_init) - refinedet_net.odm_loc.apply(weights_init) - refinedet_net.odm_conf.apply(weights_init) - refinedet_net.tcb0.apply(weights_init) - refinedet_net.tcb1.apply(weights_init) - refinedet_net.tcb2.apply(weights_init) - optimizer = apex.optimizers.NpuFusedSGD(net.parameters(), lr=args.lr, momentum=args.momentum, - weight_decay=args.weight_decay) - arm_criterion = RefineDetMultiBoxLoss(2, 0.5, True, 0, True, 3, 0.5, - False, args.cuda, npu_device=CALCULATE_DEVICE) - if args.finetune: - stat_dict = torch.load(args.finetune, map_location='cpu') - for k in stat_dict.keys(): - if 'odm_conf' in k: - stat_dict.pop(k) - net.load_state_dict(stat_dict, strict=False) - odm_criterion = RefineDetMultiBoxLoss(args.num_classes, 0.5, True, 0, True, 3, 0.5, - False, args.cuda, use_ARM=True, npu_device=CALCULATE_DEVICE) - else: - odm_criterion = RefineDetMultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, - False, args.cuda, use_ARM=True, npu_device=CALCULATE_DEVICE) - if args.amp: - net, optimizer = amp.initialize(net, optimizer, opt_level='O1', loss_scale=128, combine_grad=True) - if args.cuda: - net = torch.nn.DataParallel(refinedet_net) - cudnn.benchmark = True - net.train() - arm_loc_loss = 0 - arm_conf_loss = 0 - odm_loc_loss = 0 - odm_conf_loss = 0 - print('Loading the dataset...') - epoch_size = len(dataset) // args.batch_size - if len(dataset) % args.batch_size != 0: - epoch_size += 1 - print('Training RefineDet on:', dataset.name) - print('Using the specified args:') - print(args) - step_index = 0 - data_loader = data.DataLoader(dataset, args.batch_size, - num_workers=args.num_workers, - shuffle=True, collate_fn=detection_collate, - pin_memory=True, - drop_last=True) - - if args.resume: - strat_iter = args.start_epoch * epoch_size - for step in cfg['lr_steps']: - if strat_iter > step: - step_index += 1 - adjust_learning_rate(optimizer, args.gamma, step_index) - - for epoch in range(args.start_epoch, args.num_epochs): - avg_time = AverageMeter('iter_time') - print('\n' + 'epoch ' + str(epoch)) - print('================================train model on trainval set================================') - for iteration, (images, targets) in zip(range(epoch * epoch_size, (epoch + 1) * epoch_size), data_loader): - if iteration in cfg['lr_steps']: - step_index += 1 - adjust_learning_rate(optimizer, args.gamma, step_index) - - if args.cuda: - images = images.cuda() - targets = [ann.cuda() for ann in targets] - elif args.npu: - images = images.to(CALCULATE_DEVICE) - targets = [ann.to(CALCULATE_DEVICE) for ann in targets] - else: - images = images - targets = [ann for ann in targets] - t0 = time.time() - out = net(images) - optimizer.zero_grad() - arm_loss_l, arm_loss_c = arm_criterion(out, targets) - odm_loss_l, odm_loss_c = odm_criterion(out, targets) - arm_loss = arm_loss_l + arm_loss_c - odm_loss = odm_loss_l + odm_loss_c - loss = arm_loss + odm_loss - if args.amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - t1 = time.time() - arm_loc_loss += arm_loss_l.item() - arm_conf_loss += arm_loss_c.item() - odm_loc_loss += odm_loss_l.item() - odm_conf_loss += odm_loss_c.item() - avg_time.update(t1 - t0) - if iteration % 10 == 0: - print('iter ' + repr( \ - iteration) + ' || ARM_L Loss: %.4f ARM_C Loss: %.4f ODM_L Loss: %.4f ODM_C Loss: %.4f ||' \ - % (arm_loss_l.item(), arm_loss_c.item(), odm_loss_l.item(), odm_loss_c.item()), end=' ') - print('timer: %.4f sec.' % (t1 - t0)) - - print('batch_size = ' + str(args.batch_size) + ' || num_devices = ' + '1' + ' || time_avg = %.4f' % avg_time.avg) - print('FPS = %.4f' % (args.batch_size / avg_time.avg)) - print('Saving state, iter:' + str(epoch_size * (epoch + 1) - 1) + ' , epoch:' + str(epoch)) - save_path = args.save_folder + '/RefineDet{}_{}_{}.pth'.format(args.input_size, args.dataset, epoch) - torch.save(refinedet_net.state_dict(), save_path) - -def adjust_learning_rate(optimizer, gamma, step): - """Sets the learning rate to the initial LR decayed by 10 at every - specified step - # Adapted from PyTorch Imagenet example: - # https://github.com/pytorch/examples/blob/master/imagenet/main.py - """ - lr = args.lr * (gamma ** (step)) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - -def xavier(param): - init.xavier_uniform_(param) - -def weights_init(m): - if isinstance(m, nn.Conv2d): - with torch.no_grad(): - init.xavier_uniform_(m.weight) - with torch.no_grad(): - m.bias.zero_() - elif isinstance(m, nn.ConvTranspose2d): - xavier(m.weight.data) - with torch.no_grad(): - m.bias.zero_() -if __name__ == '__main__': - train() - - +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding=utf-8 +from models.timeAcc import AverageMeter +from data import VOC_CLASSES as labelmap +from data import VOCAnnotationTransform, VOCDetection, BaseTransform +from data import * +from utils.augmentations import SSDAugmentation +from layers.modules import RefineDetMultiBoxLoss +from models.refinedet import build_refinedet +from apex import amp +import apex +import os +import sys +import time +import torch +import torch.nn as nn +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torch.nn.init as init +import torch.utils.data as data +import numpy as np +import argparse +from utils.logging import Logger +from torch.hub import load_state_dict_from_url +import torch.npu +CALCULATE_DEVICE = "npu" + +def str2bool(v): + return v.lower() in ("yes", "true", "t", "1") + +if sys.version_info[0] == 2: + import xml.etree.cElementTree as ET +else: + import xml.etree.ElementTree as ET +parser = argparse.ArgumentParser( + description='Single Shot MultiBox Detector Training With Pytorch') +train_set = parser.add_mutually_exclusive_group() +parser.add_argument('--dataset', default='VOC', choices=['VOC', 'COCO'], + type=str, help='VOC or COCO') +parser.add_argument('--input_size', default='320', choices=['320', '512'], + type=str, help='RefineDet320 or RefineDet512') +parser.add_argument('--dataset_root', default='/home/ljh/refinedet/data/VOCdevkit/', + help='Dataset root directory path') +parser.add_argument('--basenet', default='weights/vgg16_reducedfc.pth', + help='Pretrained base model') +parser.add_argument('--batch_size', default=32, type=int, + help='Batch size for training') +parser.add_argument('--resume', default=None, type=str, + help='Checkpoint state_dict file to resume training from') +parser.add_argument('--start_epoch', default=0, type=int, + help='Resume training at this epoch') +parser.add_argument('--num_epochs', default=232, type=int, + help='Total train epoch') +parser.add_argument('--num_workers', default=14, type=int, + help='Number of workers used in dataloading') +parser.add_argument('--cuda', default=False, type=str2bool, + help='Use CUDA to train model') +parser.add_argument('--npu', default=True, type=str2bool, + help='Use NPU to train model') +parser.add_argument('--lr', '--learning-rate', default=0.00095, type=float, + help='initial learning rate') +parser.add_argument('--momentum', default=0.9, type=float, + help='Momentum value for optim') +parser.add_argument('--weight_decay', default=5e-4, type=float, + help='Weight decay for SGD') +parser.add_argument('--gamma', default=0.1, type=float, + help='Gamma update for SGD') +parser.add_argument('--visdom', default=False, type=str2bool, + help='Use visdom for loss visualization') +parser.add_argument('--save_folder', default='weights/', + help='Directory for saving checkpoint models') +parser.add_argument('--local_rank', default=-1, type=int, + help='node rank for distributed training') +parser.add_argument('--finetune', default=None, type=str, + help='pretrained weight path') +parser.add_argument('--train_1p', default=False, type=str2bool, + help='node rank for distributed training') +parser.add_argument('--device_id', default=0, type=str, + help='device_id') +parser.add_argument('--amp', default=True, type=str2bool, + help='whether to use amp') +parser.add_argument('--num_classes', default=-1, type=int, + help='num classes') +args = parser.parse_args() + + +if torch.cuda.is_available(): + if args.cuda: + torch.set_default_tensor_type('torch.cuda.FloatTensor') + if not args.cuda: + print("WARNING: It looks like you have a CUDA device, but aren't " + + "using CUDA.\nRun with --cuda for optimal training speed.") + torch.set_default_tensor_type('torch.FloatTensor') +else: + torch.set_default_tensor_type('torch.FloatTensor') + +if not os.path.exists(args.save_folder): + os.mkdir(args.save_folder) + +sys.stdout = Logger(os.path.join(args.save_folder, 'log.txt')) + +def train(): + torch.npu.set_device('npu:' + str(args.device_id)) + + if args.dataset == 'VOC': + '''if args.dataset_root == COCO_ROOT: + parser.error('Must specify dataset if specifying dataset_root')''' + cfg = voc_refinedet[args.input_size] + dataset = VOCDetection(root=args.dataset_root, + transform=SSDAugmentation(cfg['min_dim'], MEANS)) # cfg['min_dim'] = 320 + + if args.finetune: + print('finetune numclass %d'%args.num_classes) + refinedet_net = build_refinedet('train', cfg['min_dim'], args.num_classes, batch_norm=True) + else: + refinedet_net = build_refinedet('train', cfg['min_dim'], cfg['num_classes'], batch_norm=True) + net = refinedet_net + if args.cuda: + net = net.cuda() + if args.npu: + net = net.npu() + if args.resume: + print('Resuming training, loading {}...'.format(args.resume)) + refinedet_net.load_weights(args.resume) + else: + print('Loading vgg...') + vgg_weights = load_state_dict_from_url('https://download.pytorch.org/models/vgg16_bn-6c64b313.pth', + progress=True) + from collections import OrderedDict + new_vgg_weights = OrderedDict() + for k, v in vgg_weights.items(): + fc, num, wb = k.split('.') + if fc == 'classifier': + continue + new_k = num + '.' + wb + new_vgg_weights[new_k] = v + refinedet_net.vgg.load_state_dict(new_vgg_weights, strict=False) + if not args.resume: + print('Initializing weights...') + refinedet_net.extras.apply(weights_init) + refinedet_net.arm_loc.apply(weights_init) + refinedet_net.arm_conf.apply(weights_init) + refinedet_net.odm_loc.apply(weights_init) + refinedet_net.odm_conf.apply(weights_init) + refinedet_net.tcb0.apply(weights_init) + refinedet_net.tcb1.apply(weights_init) + refinedet_net.tcb2.apply(weights_init) + optimizer = apex.optimizers.NpuFusedSGD(net.parameters(), lr=args.lr, momentum=args.momentum, + weight_decay=args.weight_decay) + arm_criterion = RefineDetMultiBoxLoss(2, 0.5, True, 0, True, 3, 0.5, + False, args.cuda, npu_device=CALCULATE_DEVICE) + if args.finetune: + stat_dict = torch.load(args.finetune, map_location='cpu') + for k in stat_dict.keys(): + if 'odm_conf' in k: + stat_dict.pop(k) + net.load_state_dict(stat_dict, strict=False) + odm_criterion = RefineDetMultiBoxLoss(args.num_classes, 0.5, True, 0, True, 3, 0.5, + False, args.cuda, use_ARM=True, npu_device=CALCULATE_DEVICE) + else: + odm_criterion = RefineDetMultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, + False, args.cuda, use_ARM=True, npu_device=CALCULATE_DEVICE) + if args.amp: + net, optimizer = amp.initialize(net, optimizer, opt_level='O1', loss_scale=128, combine_grad=True) + if args.cuda: + net = torch.nn.DataParallel(refinedet_net) + cudnn.benchmark = True + net.train() + arm_loc_loss = 0 + arm_conf_loss = 0 + odm_loc_loss = 0 + odm_conf_loss = 0 + print('Loading the dataset...') + epoch_size = len(dataset) // args.batch_size + if len(dataset) % args.batch_size != 0: + epoch_size += 1 + print('Training RefineDet on:', dataset.name) + print('Using the specified args:') + print(args) + step_index = 0 + data_loader = data.DataLoader(dataset, args.batch_size, + num_workers=args.num_workers, + shuffle=True, collate_fn=detection_collate, + pin_memory=True, + drop_last=True) + + if args.resume: + strat_iter = args.start_epoch * epoch_size + for step in cfg['lr_steps']: + if strat_iter > step: + step_index += 1 + adjust_learning_rate(optimizer, args.gamma, step_index) + + for epoch in range(args.start_epoch, args.num_epochs): + avg_time = AverageMeter('iter_time') + print('\n' + 'epoch ' + str(epoch)) + print('================================train model on trainval set================================') + for iteration, (images, targets) in zip(range(epoch * epoch_size, (epoch + 1) * epoch_size), data_loader): + if iteration in cfg['lr_steps']: + step_index += 1 + adjust_learning_rate(optimizer, args.gamma, step_index) + + if args.cuda: + images = images.cuda() + targets = [ann.cuda() for ann in targets] + elif args.npu: + images = images.to(CALCULATE_DEVICE) + targets = [ann.to(CALCULATE_DEVICE) for ann in targets] + else: + images = images + targets = [ann for ann in targets] + t0 = time.time() + out = net(images) + optimizer.zero_grad() + arm_loss_l, arm_loss_c = arm_criterion(out, targets) + odm_loss_l, odm_loss_c = odm_criterion(out, targets) + arm_loss = arm_loss_l + arm_loss_c + odm_loss = odm_loss_l + odm_loss_c + loss = arm_loss + odm_loss + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + t1 = time.time() + arm_loc_loss += arm_loss_l.item() + arm_conf_loss += arm_loss_c.item() + odm_loc_loss += odm_loss_l.item() + odm_conf_loss += odm_loss_c.item() + avg_time.update(t1 - t0) + if iteration % 10 == 0: + print('iter ' + repr( \ + iteration) + ' || ARM_L Loss: %.4f ARM_C Loss: %.4f ODM_L Loss: %.4f ODM_C Loss: %.4f ||' \ + % (arm_loss_l.item(), arm_loss_c.item(), odm_loss_l.item(), odm_loss_c.item()), end=' ') + print('timer: %.4f sec.' % (t1 - t0)) + + print('batch_size = ' + str(args.batch_size) + ' || num_devices = ' + '1' + ' || time_avg = %.4f' % avg_time.avg) + print('FPS = %.4f' % (args.batch_size / avg_time.avg)) + print('Saving state, iter:' + str(epoch_size * (epoch + 1) - 1) + ' , epoch:' + str(epoch)) + save_path = args.save_folder + '/RefineDet{}_{}_{}.pth'.format(args.input_size, args.dataset, epoch) + torch.save(refinedet_net.state_dict(), save_path) + +def adjust_learning_rate(optimizer, gamma, step): + """Sets the learning rate to the initial LR decayed by 10 at every + specified step + # Adapted from PyTorch Imagenet example: + # https://github.com/pytorch/examples/blob/master/imagenet/main.py + """ + lr = args.lr * (gamma ** (step)) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + +def xavier(param): + init.xavier_uniform_(param) + +def weights_init(m): + if isinstance(m, nn.Conv2d): + with torch.no_grad(): + init.xavier_uniform_(m.weight) + with torch.no_grad(): + m.bias.zero_() + elif isinstance(m, nn.ConvTranspose2d): + xavier(m.weight.data) + with torch.no_grad(): + m.bias.zero_() +if __name__ == '__main__': + train() + + diff --git a/PyTorch/contrib/cv/detection/RefineDet/train_8p.py b/PyTorch/contrib/cv/detection/RefineDet/train_8p.py index b18c84cf9bc47160ad7069bd6ac875535d2c2466..94aeb972652ab46e0ed3f3a2df17c2b3f05292ac 100644 --- a/PyTorch/contrib/cv/detection/RefineDet/train_8p.py +++ b/PyTorch/contrib/cv/detection/RefineDet/train_8p.py @@ -1,294 +1,294 @@ -#!/bin/bash -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding=utf-8 -import torchvision -from models.timeAcc import AverageMeter -from data import VOC_CLASSES as labelmap -from data import VOCAnnotationTransform, VOCDetection, BaseTransform -from data import * -from utils.augmentations import SSDAugmentation -from layers.modules import RefineDetMultiBoxLoss -from models.refinedet import build_refinedet -from apex import amp -import apex -from torch.hub import load_state_dict_from_url -import os -import sys -import time -import torch -import torch.nn as nn -import torch.optim as optim -import torch.backends.cudnn as cudnn -import torch.nn.init as init -import torch.utils.data as data -import numpy as np -import argparse -from utils.logging import Logger -import torch.npu -import torch.distributed as dist -import torch.multiprocessing as mp -CALCULATE_DEVICE = "npu" -def str2bool(v): - return v.lower() in ("yes", "true", "t", "1") -if sys.version_info[0] == 2: - import xml.etree.cElementTree as ET -else: - import xml.etree.ElementTree as ET -parser = argparse.ArgumentParser( - description='Single Shot MultiBox Detector Training With Pytorch') -train_set = parser.add_mutually_exclusive_group() -parser.add_argument('--dataset', default='VOC', choices=['VOC', 'COCO'], - type=str, help='VOC or COCO') -parser.add_argument('--input_size', default='320', choices=['320', '512'], - type=str, help='RefineDet320 or RefineDet512') -parser.add_argument('--data_path', default='./data/VOCdevkit', - help='Dataset root directory path') -parser.add_argument('--dataset_root', default='./data/VOCdevkit', - help='Dataset root directory path') -parser.add_argument('--basenet', default='weights/vgg16_reducedfc.pth', - help='Pretrained base model') -parser.add_argument('--batch_size', default=32, type=int, - help='Batch size for training') -parser.add_argument('--resume', default=None, type=str, - help='Checkpoint state_dict file to resume training from') -parser.add_argument('--start_epoch', default=0, type=int, - help='Resume training at this epoch') -parser.add_argument('--num_epochs', default=232, type=int, - help='Total train epoch') -parser.add_argument('--num_workers', default=8, type=int, - help='Number of workers used in dataloading') -parser.add_argument('--cuda', default=False, type=str2bool, - help='Use CUDA to train model') -parser.add_argument('--npu', default=True, type=str2bool, - help='Use NPU to train model') -parser.add_argument('--lr', '--learning-rate', default=0.00095, type=float, - help='initial learning rate') -parser.add_argument('--momentum', default=0.9, type=float, - help='Momentum value for optim') -parser.add_argument('--weight_decay', default=5e-4, type=float, - help='Weight decay for SGD') -parser.add_argument('--gamma', default=0.1, type=float, - help='Gamma update for SGD') -parser.add_argument('--visdom', default=False, type=str2bool, - help='Use visdom for loss visualization') -parser.add_argument('--save_folder', default='weights/', - help='Directory for saving checkpoint models') -parser.add_argument('--local_rank', default=0, type=int, - help='node rank for distributed training') -parser.add_argument('--world_size', default=8, type=int) -parser.add_argument('--bn', default=False, type=str2bool, - help='whether to use BN') -parser.add_argument('--amp', default=True, type=str2bool, - help='whether to use amp') -args = parser.parse_args() - -if torch.cuda.is_available(): - if args.cuda: - torch.set_default_tensor_type('torch.cuda.FloatTensor') - if not args.cuda: - print("WARNING: It looks like you have a CUDA device, but aren't " + - "using CUDA.\nRun with --cuda for optimal training speed.") - torch.set_default_tensor_type('torch.FloatTensor') -else: - torch.set_default_tensor_type('torch.FloatTensor') - -if args.local_rank == 0 and os.path.exists(args.save_folder)==False: - os.mkdir(args.save_folder) - -sys.stdout = Logger(os.path.join(args.save_folder, 'log.txt')) - - - -def init_dist(backend='hccl', **kwargs): - os.environ['MASTER_ADDR'] = "127.0.0.1" - os.environ['MASTER_PORT'] = '29530' - rank = int(args.local_rank) - num_npus = torch.npu.device_count() - dist.init_process_group(backend=backend, world_size=args.world_size, rank=rank) - print(args.world_size, rank) - torch.npu.set_device(rank % num_npus) - torch.backends.cudnn.benchmark = True - args.lr = args.lr * 8 - print('lr = ', args.lr) - -def train(): - import warnings - warnings.filterwarnings('ignore', category=Warning) - init_dist() - if args.dataset == 'VOC': - '''if args.dataset_root == COCO_ROOT: - parser.error('Must specify dataset if specifying dataset_root')''' - cfg = voc_refinedet[args.input_size] - dataset = VOCDetection(root=args.dataset_root, - transform=SSDAugmentation(cfg['min_dim'], - MEANS)) - refinedet_net = build_refinedet('train', cfg['min_dim'], cfg['num_classes'], batch_norm=args.bn) - net = refinedet_net - if args.npu: - net = net.npu() - optimizer = apex.optimizers.NpuFusedSGD(net.parameters(), lr=args.lr, momentum=args.momentum, - weight_decay=args.weight_decay) - arm_criterion = RefineDetMultiBoxLoss(2, 0.5, True, 0, True, 3, 0.5, - False, args.cuda, npu_device=CALCULATE_DEVICE) - odm_criterion = RefineDetMultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, - False, args.cuda, use_ARM=True, npu_device=CALCULATE_DEVICE) - if args.amp: - net, optimizer = amp.initialize(net, optimizer, opt_level='O1', loss_scale=128, combine_grad=True) - if torch.npu.device_count() > 1: - print('ddp') - net = nn.parallel.DistributedDataParallel(net, device_ids=[args.local_rank], broadcast_buffers=False) - if args.resume: - print('Resuming training, loading {}...'.format(args.resume)) - refinedet_net.load_weights(args.resume) - else: - print('Loading vgg...') - vgg_weights = load_state_dict_from_url('https://download.pytorch.org/models/vgg16_bn-6c64b313.pth', - progress=True) - from collections import OrderedDict - new_vgg_weights = OrderedDict() - for k, v in vgg_weights.items(): - fc, num, wb = k.split('.') - if fc == 'classifier': - continue - new_k = num + '.' + wb - new_vgg_weights[new_k] = v - refinedet_net.vgg.load_state_dict(new_vgg_weights, strict=False) - if not args.resume: - print('Initializing weights...') - refinedet_net.extras.apply(weights_init) - refinedet_net.arm_loc.apply(weights_init) - refinedet_net.arm_conf.apply(weights_init) - refinedet_net.odm_loc.apply(weights_init) - refinedet_net.odm_conf.apply(weights_init) - refinedet_net.tcb0.apply(weights_init) - refinedet_net.tcb1.apply(weights_init) - refinedet_net.tcb2.apply(weights_init) - - net.train() - arm_loc_loss = 0 - arm_conf_loss = 0 - odm_loc_loss = 0 - odm_conf_loss = 0 - epoch_size = len(dataset) // args.batch_size // args.world_size - if args.local_rank == 0: - print('Training RefineDet on:', dataset.name) - print('Using the specified args:') - print(args) - step_index = 0 - train_sampler = torch.utils.data.DistributedSampler(dataset) - data_loader = data.DataLoader(dataset=dataset, - batch_size=args.batch_size, - shuffle=False, - num_workers=args.num_workers, - pin_memory=False, - sampler=train_sampler, - collate_fn=detection_collate, - drop_last=True) - iteration = 0 - if args.resume: - iteration = args.start_epoch * epoch_size - for epoch_step in cfg['lr_step_epoch']: - if args.start_epoch > epoch_step: - step_index += 1 - adjust_learning_rate(optimizer, args.gamma, step_index) - for epoch in range(args.start_epoch, args.num_epochs): - train_sampler.set_epoch(epoch) - if epoch in cfg['lr_step_epoch']: - step_index += 1 - adjust_learning_rate(optimizer, args.gamma, step_index) - avg_time = AverageMeter('iter_time') - if args.local_rank == 0: - print('\n' + 'epoch ' + str(epoch)) - print('================================train model on trainval set================================') - for images, targets in data_loader: - if args.cuda: - images = images.cuda() - targets = [ann.cuda() for ann in targets] - elif args.npu: - images = images.to(CALCULATE_DEVICE) - targets = [ann.to(CALCULATE_DEVICE) for ann in targets] - else: - images = images - targets = [ann for ann in targets] - - t0 = time.time() - out = net(images) - optimizer.zero_grad() - arm_loss_l, arm_loss_c = arm_criterion(out, targets) - odm_loss_l, odm_loss_c = odm_criterion(out, targets) - arm_loss = arm_loss_l + arm_loss_c - odm_loss = odm_loss_l + odm_loss_c - loss = arm_loss + odm_loss - if args.amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - t1 = time.time() - arm_loc_loss += arm_loss_l.item() - arm_conf_loss += arm_loss_c.item() - odm_loc_loss += odm_loss_l.item() - odm_conf_loss += odm_loss_c.item() - avg_time.update(t1 - t0) - if iteration % 10 == 0 and args.local_rank == 0: - print('iter ' + repr( - iteration) + ' || ARM_L Loss: %.4f ARM_C Loss: %.4f ODM_L Loss: %.4f ODM_C Loss: %.4f ||'\ - % (arm_loss_l.item(), arm_loss_c.item(), odm_loss_l.item(), odm_loss_c.item()), end=' ') - print('timer: %.4f sec.' % (t1 - t0)) - iteration += 1 - if args.local_rank == 0: - print('batch_size = ' + str(args.batch_size) + ' || num_devices = ' + str( - torch.npu.device_count()) + ' || time_avg = %.4f' % avg_time.avg) - print('FPS = %.4f' % (args.batch_size * torch.npu.device_count() / avg_time.avg)) - print('Saving state, iter:' + str(iteration) + ' , epoch:' + str(epoch)) - save_path = args.save_folder + '/RefineDet{}_{}_{}.pth'.format(args.input_size, args.dataset, epoch) - torch.save(refinedet_net.state_dict(), save_path) - - -def adjust_learning_rate(optimizer, gamma, step): - """Sets the learning rate to the initial LR decayed by 10 at every - specified step - # Adapted from PyTorch Imagenet example: - # https://github.com/pytorch/examples/blob/master/imagenet/main.py - """ - lr = args.lr * (gamma ** (step)) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def xavier(param): - init.xavier_uniform_(param) - - -def weights_init(m): - if isinstance(m, nn.Conv2d): - with torch.no_grad(): - init.xavier_uniform_(m.weight) - with torch.no_grad(): - m.bias.zero_() - - - elif isinstance(m, nn.ConvTranspose2d): - xavier(m.weight.data) - with torch.no_grad(): - m.bias.zero_() - -if __name__ == '__main__': - - train() - - +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding=utf-8 +import torchvision +from models.timeAcc import AverageMeter +from data import VOC_CLASSES as labelmap +from data import VOCAnnotationTransform, VOCDetection, BaseTransform +from data import * +from utils.augmentations import SSDAugmentation +from layers.modules import RefineDetMultiBoxLoss +from models.refinedet import build_refinedet +from apex import amp +import apex +from torch.hub import load_state_dict_from_url +import os +import sys +import time +import torch +import torch.nn as nn +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torch.nn.init as init +import torch.utils.data as data +import numpy as np +import argparse +from utils.logging import Logger +import torch.npu +import torch.distributed as dist +import torch.multiprocessing as mp +CALCULATE_DEVICE = "npu" +def str2bool(v): + return v.lower() in ("yes", "true", "t", "1") +if sys.version_info[0] == 2: + import xml.etree.cElementTree as ET +else: + import xml.etree.ElementTree as ET +parser = argparse.ArgumentParser( + description='Single Shot MultiBox Detector Training With Pytorch') +train_set = parser.add_mutually_exclusive_group() +parser.add_argument('--dataset', default='VOC', choices=['VOC', 'COCO'], + type=str, help='VOC or COCO') +parser.add_argument('--input_size', default='320', choices=['320', '512'], + type=str, help='RefineDet320 or RefineDet512') +parser.add_argument('--data_path', default='./data/VOCdevkit', + help='Dataset root directory path') +parser.add_argument('--dataset_root', default='./data/VOCdevkit', + help='Dataset root directory path') +parser.add_argument('--basenet', default='weights/vgg16_reducedfc.pth', + help='Pretrained base model') +parser.add_argument('--batch_size', default=32, type=int, + help='Batch size for training') +parser.add_argument('--resume', default=None, type=str, + help='Checkpoint state_dict file to resume training from') +parser.add_argument('--start_epoch', default=0, type=int, + help='Resume training at this epoch') +parser.add_argument('--num_epochs', default=232, type=int, + help='Total train epoch') +parser.add_argument('--num_workers', default=8, type=int, + help='Number of workers used in dataloading') +parser.add_argument('--cuda', default=False, type=str2bool, + help='Use CUDA to train model') +parser.add_argument('--npu', default=True, type=str2bool, + help='Use NPU to train model') +parser.add_argument('--lr', '--learning-rate', default=0.00095, type=float, + help='initial learning rate') +parser.add_argument('--momentum', default=0.9, type=float, + help='Momentum value for optim') +parser.add_argument('--weight_decay', default=5e-4, type=float, + help='Weight decay for SGD') +parser.add_argument('--gamma', default=0.1, type=float, + help='Gamma update for SGD') +parser.add_argument('--visdom', default=False, type=str2bool, + help='Use visdom for loss visualization') +parser.add_argument('--save_folder', default='weights/', + help='Directory for saving checkpoint models') +parser.add_argument('--local_rank', default=0, type=int, + help='node rank for distributed training') +parser.add_argument('--world_size', default=8, type=int) +parser.add_argument('--bn', default=False, type=str2bool, + help='whether to use BN') +parser.add_argument('--amp', default=True, type=str2bool, + help='whether to use amp') +args = parser.parse_args() + +if torch.cuda.is_available(): + if args.cuda: + torch.set_default_tensor_type('torch.cuda.FloatTensor') + if not args.cuda: + print("WARNING: It looks like you have a CUDA device, but aren't " + + "using CUDA.\nRun with --cuda for optimal training speed.") + torch.set_default_tensor_type('torch.FloatTensor') +else: + torch.set_default_tensor_type('torch.FloatTensor') + +if args.local_rank == 0 and os.path.exists(args.save_folder)==False: + os.mkdir(args.save_folder) + +sys.stdout = Logger(os.path.join(args.save_folder, 'log.txt')) + + + +def init_dist(backend='hccl', **kwargs): + os.environ['MASTER_ADDR'] = "127.0.0.1" + os.environ['MASTER_PORT'] = '29530' + rank = int(args.local_rank) + num_npus = torch.npu.device_count() + dist.init_process_group(backend=backend, world_size=args.world_size, rank=rank) + print(args.world_size, rank) + torch.npu.set_device(rank % num_npus) + torch.backends.cudnn.benchmark = True + args.lr = args.lr * 8 + print('lr = ', args.lr) + +def train(): + import warnings + warnings.filterwarnings('ignore', category=Warning) + init_dist() + if args.dataset == 'VOC': + '''if args.dataset_root == COCO_ROOT: + parser.error('Must specify dataset if specifying dataset_root')''' + cfg = voc_refinedet[args.input_size] + dataset = VOCDetection(root=args.dataset_root, + transform=SSDAugmentation(cfg['min_dim'], + MEANS)) + refinedet_net = build_refinedet('train', cfg['min_dim'], cfg['num_classes'], batch_norm=args.bn) + net = refinedet_net + if args.npu: + net = net.npu() + optimizer = apex.optimizers.NpuFusedSGD(net.parameters(), lr=args.lr, momentum=args.momentum, + weight_decay=args.weight_decay) + arm_criterion = RefineDetMultiBoxLoss(2, 0.5, True, 0, True, 3, 0.5, + False, args.cuda, npu_device=CALCULATE_DEVICE) + odm_criterion = RefineDetMultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, + False, args.cuda, use_ARM=True, npu_device=CALCULATE_DEVICE) + if args.amp: + net, optimizer = amp.initialize(net, optimizer, opt_level='O1', loss_scale=128, combine_grad=True) + if torch.npu.device_count() > 1: + print('ddp') + net = nn.parallel.DistributedDataParallel(net, device_ids=[args.local_rank], broadcast_buffers=False) + if args.resume: + print('Resuming training, loading {}...'.format(args.resume)) + refinedet_net.load_weights(args.resume) + else: + print('Loading vgg...') + vgg_weights = load_state_dict_from_url('https://download.pytorch.org/models/vgg16_bn-6c64b313.pth', + progress=True) + from collections import OrderedDict + new_vgg_weights = OrderedDict() + for k, v in vgg_weights.items(): + fc, num, wb = k.split('.') + if fc == 'classifier': + continue + new_k = num + '.' + wb + new_vgg_weights[new_k] = v + refinedet_net.vgg.load_state_dict(new_vgg_weights, strict=False) + if not args.resume: + print('Initializing weights...') + refinedet_net.extras.apply(weights_init) + refinedet_net.arm_loc.apply(weights_init) + refinedet_net.arm_conf.apply(weights_init) + refinedet_net.odm_loc.apply(weights_init) + refinedet_net.odm_conf.apply(weights_init) + refinedet_net.tcb0.apply(weights_init) + refinedet_net.tcb1.apply(weights_init) + refinedet_net.tcb2.apply(weights_init) + + net.train() + arm_loc_loss = 0 + arm_conf_loss = 0 + odm_loc_loss = 0 + odm_conf_loss = 0 + epoch_size = len(dataset) // args.batch_size // args.world_size + if args.local_rank == 0: + print('Training RefineDet on:', dataset.name) + print('Using the specified args:') + print(args) + step_index = 0 + train_sampler = torch.utils.data.DistributedSampler(dataset) + data_loader = data.DataLoader(dataset=dataset, + batch_size=args.batch_size, + shuffle=False, + num_workers=args.num_workers, + pin_memory=False, + sampler=train_sampler, + collate_fn=detection_collate, + drop_last=True) + iteration = 0 + if args.resume: + iteration = args.start_epoch * epoch_size + for epoch_step in cfg['lr_step_epoch']: + if args.start_epoch > epoch_step: + step_index += 1 + adjust_learning_rate(optimizer, args.gamma, step_index) + for epoch in range(args.start_epoch, args.num_epochs): + train_sampler.set_epoch(epoch) + if epoch in cfg['lr_step_epoch']: + step_index += 1 + adjust_learning_rate(optimizer, args.gamma, step_index) + avg_time = AverageMeter('iter_time') + if args.local_rank == 0: + print('\n' + 'epoch ' + str(epoch)) + print('================================train model on trainval set================================') + for images, targets in data_loader: + if args.cuda: + images = images.cuda() + targets = [ann.cuda() for ann in targets] + elif args.npu: + images = images.to(CALCULATE_DEVICE) + targets = [ann.to(CALCULATE_DEVICE) for ann in targets] + else: + images = images + targets = [ann for ann in targets] + + t0 = time.time() + out = net(images) + optimizer.zero_grad() + arm_loss_l, arm_loss_c = arm_criterion(out, targets) + odm_loss_l, odm_loss_c = odm_criterion(out, targets) + arm_loss = arm_loss_l + arm_loss_c + odm_loss = odm_loss_l + odm_loss_c + loss = arm_loss + odm_loss + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + t1 = time.time() + arm_loc_loss += arm_loss_l.item() + arm_conf_loss += arm_loss_c.item() + odm_loc_loss += odm_loss_l.item() + odm_conf_loss += odm_loss_c.item() + avg_time.update(t1 - t0) + if iteration % 10 == 0 and args.local_rank == 0: + print('iter ' + repr( + iteration) + ' || ARM_L Loss: %.4f ARM_C Loss: %.4f ODM_L Loss: %.4f ODM_C Loss: %.4f ||'\ + % (arm_loss_l.item(), arm_loss_c.item(), odm_loss_l.item(), odm_loss_c.item()), end=' ') + print('timer: %.4f sec.' % (t1 - t0)) + iteration += 1 + if args.local_rank == 0: + print('batch_size = ' + str(args.batch_size) + ' || num_devices = ' + str( + torch.npu.device_count()) + ' || time_avg = %.4f' % avg_time.avg) + print('FPS = %.4f' % (args.batch_size * torch.npu.device_count() / avg_time.avg)) + print('Saving state, iter:' + str(iteration) + ' , epoch:' + str(epoch)) + save_path = args.save_folder + '/RefineDet{}_{}_{}.pth'.format(args.input_size, args.dataset, epoch) + torch.save(refinedet_net.state_dict(), save_path) + + +def adjust_learning_rate(optimizer, gamma, step): + """Sets the learning rate to the initial LR decayed by 10 at every + specified step + # Adapted from PyTorch Imagenet example: + # https://github.com/pytorch/examples/blob/master/imagenet/main.py + """ + lr = args.lr * (gamma ** (step)) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +def xavier(param): + init.xavier_uniform_(param) + + +def weights_init(m): + if isinstance(m, nn.Conv2d): + with torch.no_grad(): + init.xavier_uniform_(m.weight) + with torch.no_grad(): + m.bias.zero_() + + + elif isinstance(m, nn.ConvTranspose2d): + xavier(m.weight.data) + with torch.no_grad(): + m.bias.zero_() + +if __name__ == '__main__': + + train() + + diff --git a/PyTorch/contrib/cv/detection/RefineDet/utils/augmentations.py b/PyTorch/contrib/cv/detection/RefineDet/utils/augmentations.py index d84874a8638b440184f53a2a4555b9d2a1949b51..dfbb0e36d400001e174a40a3bd6115d1ed39b7e9 100644 --- a/PyTorch/contrib/cv/detection/RefineDet/utils/augmentations.py +++ b/PyTorch/contrib/cv/detection/RefineDet/utils/augmentations.py @@ -1,433 +1,433 @@ -#!/bin/bash -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from torchvision import transforms -import cv2 -import numpy as np -import types -from numpy import random -np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning) - - -def intersect(box_a, box_b): - max_xy = np.minimum(box_a[:, 2:], box_b[2:]) - min_xy = np.maximum(box_a[:, :2], box_b[:2]) - inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf) - return inter[:, 0] * inter[:, 1] - - -def jaccard_numpy(box_a, box_b): - """Compute the jaccard overlap of two sets of boxes. The jaccard overlap - is simply the intersection over union of two boxes. - E.g.: - A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) - Args: - box_a: Multiple bounding boxes, Shape: [num_boxes,4] - box_b: Single bounding box, Shape: [4] - Return: - jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]] - """ - inter = intersect(box_a, box_b) - area_a = ((box_a[:, 2]-box_a[:, 0]) * - (box_a[:, 3]-box_a[:, 1])) # [A,B] - area_b = ((box_b[2]-box_b[0]) * - (box_b[3]-box_b[1])) # [A,B] - union = area_a + area_b - inter - return inter / union # [A,B] - - -class Compose(object): - """Composes several augmentations together. - Args: - transforms (List[Transform]): list of transforms to compose. - Example: - >>> augmentations.Compose([ - >>> transforms.CenterCrop(10), - >>> transforms.ToTensor(), - >>> ]) - """ - - def __init__(self, transforms): - self.transforms = transforms - - def __call__(self, img, boxes=None, labels=None): - for t in self.transforms: - img, boxes, labels = t(img, boxes, labels) - return img, boxes, labels - - -class Lambda(object): - """Applies a lambda as a transform.""" - - def __init__(self, lambd): - assert isinstance(lambd, types.LambdaType) - self.lambd = lambd - - def __call__(self, img, boxes=None, labels=None): - return self.lambd(img, boxes, labels) - - -class ConvertFromInts(object): - def __call__(self, image, boxes=None, labels=None): - return image.astype(np.float32), boxes, labels - - -class SubtractMeans(object): - def __init__(self, mean): - self.mean = np.array(mean, dtype=np.float32) - - def __call__(self, image, boxes=None, labels=None): - image = image.astype(np.float32) - image -= self.mean - return image.astype(np.float32), boxes, labels - - -class ToAbsoluteCoords(object): - def __call__(self, image, boxes=None, labels=None): - height, width, channels = image.shape - boxes[:, 0] *= width - boxes[:, 2] *= width - boxes[:, 1] *= height - boxes[:, 3] *= height - - return image, boxes, labels - - -class ToPercentCoords(object): - def __call__(self, image, boxes=None, labels=None): - height, width, channels = image.shape - boxes[:, 0] /= width - boxes[:, 2] /= width - boxes[:, 1] /= height - boxes[:, 3] /= height - - return image, boxes, labels - - -class Resize(object): - def __init__(self, size=300): - self.size = size - - def __call__(self, image, boxes=None, labels=None): - image = cv2.resize(image, (self.size, - self.size)) - return image, boxes, labels - - -class RandomSaturation(object): - def __init__(self, lower=0.5, upper=1.5): - self.lower = lower - self.upper = upper - assert self.upper >= self.lower, "contrast upper must be >= lower." - assert self.lower >= 0, "contrast lower must be non-negative." - - def __call__(self, image, boxes=None, labels=None): - if random.randint(2): - image[:, :, 1] *= random.uniform(self.lower, self.upper) - - return image, boxes, labels - - -class RandomHue(object): - def __init__(self, delta=18.0): - assert delta >= 0.0 and delta <= 360.0 - self.delta = delta - - def __call__(self, image, boxes=None, labels=None): - if random.randint(2): - image[:, :, 0] += random.uniform(-self.delta, self.delta) - image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0 - image[:, :, 0][image[:, :, 0] < 0.0] += 360.0 - return image, boxes, labels - - -class RandomLightingNoise(object): - def __init__(self): - self.perms = ((0, 1, 2), (0, 2, 1), - (1, 0, 2), (1, 2, 0), - (2, 0, 1), (2, 1, 0)) - - def __call__(self, image, boxes=None, labels=None): - if random.randint(2): - swap = self.perms[random.randint(len(self.perms))] - shuffle = SwapChannels(swap) # shuffle channels - image = shuffle(image) - return image, boxes, labels - - -class ConvertColor(object): - def __init__(self, current='BGR', transform='HSV'): - self.transform = transform - self.current = current - - def __call__(self, image, boxes=None, labels=None): - if self.current == 'BGR' and self.transform == 'HSV': - image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) - elif self.current == 'HSV' and self.transform == 'BGR': - image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) - else: - raise NotImplementedError - return image, boxes, labels - - -class RandomContrast(object): - def __init__(self, lower=0.5, upper=1.5): - self.lower = lower - self.upper = upper - assert self.upper >= self.lower, "contrast upper must be >= lower." - assert self.lower >= 0, "contrast lower must be non-negative." - - # expects float image - def __call__(self, image, boxes=None, labels=None): - if random.randint(2): - alpha = random.uniform(self.lower, self.upper) - image *= alpha - return image, boxes, labels - - -class RandomBrightness(object): - def __init__(self, delta=32): - assert delta >= 0.0 - assert delta <= 255.0 - self.delta = delta - - def __call__(self, image, boxes=None, labels=None): - if random.randint(2): - delta = random.uniform(-self.delta, self.delta) - image += delta - return image, boxes, labels - - -class ToCV2Image(object): - def __call__(self, tensor, boxes=None, labels=None): - return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels - - -class ToTensor(object): - def __call__(self, cvimage, boxes=None, labels=None): - return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels - - -class RandomSampleCrop(object): - """Crop - Arguments: - img (Image): the image being input during training - boxes (Tensor): the original bounding boxes in pt form - labels (Tensor): the class labels for each bbox - mode (float tuple): the min and max jaccard overlaps - Return: - (img, boxes, classes) - img (Image): the cropped image - boxes (Tensor): the adjusted bounding boxes in pt form - labels (Tensor): the class labels for each bbox - """ - def __init__(self): - self.sample_options = ( - # using entire original input image - None, - # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9 - (0.1, None), - (0.3, None), - (0.7, None), - (0.9, None), - # randomly sample a patch - (None, None), - ) - - def __call__(self, image, boxes=None, labels=None): - height, width, _ = image.shape - while True: - # randomly choose a mode - mode = random.choice(self.sample_options) - if mode is None: - return image, boxes, labels - - min_iou, max_iou = mode - if min_iou is None: - min_iou = float('-inf') - if max_iou is None: - max_iou = float('inf') - - # max trails (50) - for _ in range(50): - current_image = image - - w = random.uniform(0.3 * width, width) - h = random.uniform(0.3 * height, height) - - # aspect ratio constraint b/t .5 & 2 - if h / w < 0.5 or h / w > 2: - continue - - left = random.uniform(width - w) - top = random.uniform(height - h) - - # convert to integer rect x1,y1,x2,y2 - rect = np.array([int(left), int(top), int(left+w), int(top+h)]) - - # calculate IoU (jaccard overlap) b/t the cropped and gt boxes - overlap = jaccard_numpy(boxes, rect) # boxes tensor x,y的绝对坐标 rect 剪裁的 x,y 坐标 - - # is min and max overlap constraint satisfied? if not try again - if overlap.min() < min_iou and max_iou < overlap.max(): - continue - - # cut the crop from the image - current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], - :] - - # keep overlap with gt box IF center in sampled patch - centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 - - # mask in all gt boxes that above and to the left of centers - m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1]) - - # mask in all gt boxes that under and to the right of centers - m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1]) - - # mask in that both m1 and m2 are true - mask = m1 * m2 - - # have any valid boxes? try again if not - if not mask.any(): - continue - - # take only matching gt boxes - current_boxes = boxes[mask, :].copy() - - # take only matching gt labels - current_labels = labels[mask] - - # should we use the box left and top corner or the crop's - current_boxes[:, :2] = np.maximum(current_boxes[:, :2], - rect[:2]) - # adjust to crop (by substracting crop's left,top) - current_boxes[:, :2] -= rect[:2] - - current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], - rect[2:]) - # adjust to crop (by substracting crop's left,top) - current_boxes[:, 2:] -= rect[:2] - - return current_image, current_boxes, current_labels - - -class Expand(object): - def __init__(self, mean): - self.mean = mean - - def __call__(self, image, boxes, labels): - if random.randint(2): - return image, boxes, labels - - height, width, depth = image.shape - ratio = random.uniform(1, 4) - left = random.uniform(0, width*ratio - width) - top = random.uniform(0, height*ratio - height) - - expand_image = np.zeros( - (int(height*ratio), int(width*ratio), depth), - dtype=image.dtype) - expand_image[:, :, :] = self.mean - expand_image[int(top):int(top + height), - int(left):int(left + width)] = image - image = expand_image - - boxes = boxes.copy() - boxes[:, :2] += (int(left), int(top)) - boxes[:, 2:] += (int(left), int(top)) - - return image, boxes, labels - - -class RandomMirror(object): - def __call__(self, image, boxes, classes): - _, width, _ = image.shape - if random.randint(2): - image = image[:, ::-1] - boxes = boxes.copy() - boxes[:, 0::2] = width - boxes[:, 2::-2] - return image, boxes, classes - - -class SwapChannels(object): - """Transforms a tensorized image by swapping the channels in the order - specified in the swap tuple. - Args: - swaps (int triple): final order of channels - eg: (2, 1, 0) - """ - - def __init__(self, swaps): - self.swaps = swaps - - def __call__(self, image): - """ - Args: - image (Tensor): image tensor to be transformed - Return: - a tensor with channels swapped according to swap - """ - # if torch.is_tensor(image): - # image = image.data.cpu().numpy() - # else: - # image = np.array(image) - image = image[:, :, self.swaps] - return image - - -class PhotometricDistort(object): - def __init__(self): - self.pd = [ - RandomContrast(), - ConvertColor(transform='HSV'), - RandomSaturation(), - RandomHue(), - ConvertColor(current='HSV', transform='BGR'), - RandomContrast() - ] - self.rand_brightness = RandomBrightness() - self.rand_light_noise = RandomLightingNoise() - - def __call__(self, image, boxes, labels): - im = image.copy() - im, boxes, labels = self.rand_brightness(im, boxes, labels) - if random.randint(2): - distort = Compose(self.pd[:-1]) - else: - distort = Compose(self.pd[1:]) - im, boxes, labels = distort(im, boxes, labels) - return self.rand_light_noise(im, boxes, labels) - - -class SSDAugmentation(object): - def __init__(self, size=300, mean=(104, 117, 123)): - self.mean = mean - self.size = size - self.augment = Compose([ - ConvertFromInts(), # int 转化为 float32 - ToAbsoluteCoords(), - PhotometricDistort(), - Expand(self.mean), - RandomSampleCrop(), - RandomMirror(), - ToPercentCoords(), - Resize(self.size), - SubtractMeans(self.mean) - ]) - - def __call__(self, img, boxes, labels): - return self.augment(img, boxes, labels) +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torchvision import transforms +import cv2 +import numpy as np +import types +from numpy import random +np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning) + + +def intersect(box_a, box_b): + max_xy = np.minimum(box_a[:, 2:], box_b[2:]) + min_xy = np.maximum(box_a[:, :2], box_b[:2]) + inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf) + return inter[:, 0] * inter[:, 1] + + +def jaccard_numpy(box_a, box_b): + """Compute the jaccard overlap of two sets of boxes. The jaccard overlap + is simply the intersection over union of two boxes. + E.g.: + A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) + Args: + box_a: Multiple bounding boxes, Shape: [num_boxes,4] + box_b: Single bounding box, Shape: [4] + Return: + jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]] + """ + inter = intersect(box_a, box_b) + area_a = ((box_a[:, 2]-box_a[:, 0]) * + (box_a[:, 3]-box_a[:, 1])) # [A,B] + area_b = ((box_b[2]-box_b[0]) * + (box_b[3]-box_b[1])) # [A,B] + union = area_a + area_b - inter + return inter / union # [A,B] + + +class Compose(object): + """Composes several augmentations together. + Args: + transforms (List[Transform]): list of transforms to compose. + Example: + >>> augmentations.Compose([ + >>> transforms.CenterCrop(10), + >>> transforms.ToTensor(), + >>> ]) + """ + + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, img, boxes=None, labels=None): + for t in self.transforms: + img, boxes, labels = t(img, boxes, labels) + return img, boxes, labels + + +class Lambda(object): + """Applies a lambda as a transform.""" + + def __init__(self, lambd): + assert isinstance(lambd, types.LambdaType) + self.lambd = lambd + + def __call__(self, img, boxes=None, labels=None): + return self.lambd(img, boxes, labels) + + +class ConvertFromInts(object): + def __call__(self, image, boxes=None, labels=None): + return image.astype(np.float32), boxes, labels + + +class SubtractMeans(object): + def __init__(self, mean): + self.mean = np.array(mean, dtype=np.float32) + + def __call__(self, image, boxes=None, labels=None): + image = image.astype(np.float32) + image -= self.mean + return image.astype(np.float32), boxes, labels + + +class ToAbsoluteCoords(object): + def __call__(self, image, boxes=None, labels=None): + height, width, channels = image.shape + boxes[:, 0] *= width + boxes[:, 2] *= width + boxes[:, 1] *= height + boxes[:, 3] *= height + + return image, boxes, labels + + +class ToPercentCoords(object): + def __call__(self, image, boxes=None, labels=None): + height, width, channels = image.shape + boxes[:, 0] /= width + boxes[:, 2] /= width + boxes[:, 1] /= height + boxes[:, 3] /= height + + return image, boxes, labels + + +class Resize(object): + def __init__(self, size=300): + self.size = size + + def __call__(self, image, boxes=None, labels=None): + image = cv2.resize(image, (self.size, + self.size)) + return image, boxes, labels + + +class RandomSaturation(object): + def __init__(self, lower=0.5, upper=1.5): + self.lower = lower + self.upper = upper + assert self.upper >= self.lower, "contrast upper must be >= lower." + assert self.lower >= 0, "contrast lower must be non-negative." + + def __call__(self, image, boxes=None, labels=None): + if random.randint(2): + image[:, :, 1] *= random.uniform(self.lower, self.upper) + + return image, boxes, labels + + +class RandomHue(object): + def __init__(self, delta=18.0): + assert delta >= 0.0 and delta <= 360.0 + self.delta = delta + + def __call__(self, image, boxes=None, labels=None): + if random.randint(2): + image[:, :, 0] += random.uniform(-self.delta, self.delta) + image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0 + image[:, :, 0][image[:, :, 0] < 0.0] += 360.0 + return image, boxes, labels + + +class RandomLightingNoise(object): + def __init__(self): + self.perms = ((0, 1, 2), (0, 2, 1), + (1, 0, 2), (1, 2, 0), + (2, 0, 1), (2, 1, 0)) + + def __call__(self, image, boxes=None, labels=None): + if random.randint(2): + swap = self.perms[random.randint(len(self.perms))] + shuffle = SwapChannels(swap) # shuffle channels + image = shuffle(image) + return image, boxes, labels + + +class ConvertColor(object): + def __init__(self, current='BGR', transform='HSV'): + self.transform = transform + self.current = current + + def __call__(self, image, boxes=None, labels=None): + if self.current == 'BGR' and self.transform == 'HSV': + image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + elif self.current == 'HSV' and self.transform == 'BGR': + image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) + else: + raise NotImplementedError + return image, boxes, labels + + +class RandomContrast(object): + def __init__(self, lower=0.5, upper=1.5): + self.lower = lower + self.upper = upper + assert self.upper >= self.lower, "contrast upper must be >= lower." + assert self.lower >= 0, "contrast lower must be non-negative." + + # expects float image + def __call__(self, image, boxes=None, labels=None): + if random.randint(2): + alpha = random.uniform(self.lower, self.upper) + image *= alpha + return image, boxes, labels + + +class RandomBrightness(object): + def __init__(self, delta=32): + assert delta >= 0.0 + assert delta <= 255.0 + self.delta = delta + + def __call__(self, image, boxes=None, labels=None): + if random.randint(2): + delta = random.uniform(-self.delta, self.delta) + image += delta + return image, boxes, labels + + +class ToCV2Image(object): + def __call__(self, tensor, boxes=None, labels=None): + return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels + + +class ToTensor(object): + def __call__(self, cvimage, boxes=None, labels=None): + return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels + + +class RandomSampleCrop(object): + """Crop + Arguments: + img (Image): the image being input during training + boxes (Tensor): the original bounding boxes in pt form + labels (Tensor): the class labels for each bbox + mode (float tuple): the min and max jaccard overlaps + Return: + (img, boxes, classes) + img (Image): the cropped image + boxes (Tensor): the adjusted bounding boxes in pt form + labels (Tensor): the class labels for each bbox + """ + def __init__(self): + self.sample_options = ( + # using entire original input image + None, + # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9 + (0.1, None), + (0.3, None), + (0.7, None), + (0.9, None), + # randomly sample a patch + (None, None), + ) + + def __call__(self, image, boxes=None, labels=None): + height, width, _ = image.shape + while True: + # randomly choose a mode + mode = random.choice(self.sample_options) + if mode is None: + return image, boxes, labels + + min_iou, max_iou = mode + if min_iou is None: + min_iou = float('-inf') + if max_iou is None: + max_iou = float('inf') + + # max trails (50) + for _ in range(50): + current_image = image + + w = random.uniform(0.3 * width, width) + h = random.uniform(0.3 * height, height) + + # aspect ratio constraint b/t .5 & 2 + if h / w < 0.5 or h / w > 2: + continue + + left = random.uniform(width - w) + top = random.uniform(height - h) + + # convert to integer rect x1,y1,x2,y2 + rect = np.array([int(left), int(top), int(left+w), int(top+h)]) + + # calculate IoU (jaccard overlap) b/t the cropped and gt boxes + overlap = jaccard_numpy(boxes, rect) # boxes tensor x,y的绝对坐标 rect 剪裁的 x,y 坐标 + + # is min and max overlap constraint satisfied? if not try again + if overlap.min() < min_iou and max_iou < overlap.max(): + continue + + # cut the crop from the image + current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], + :] + + # keep overlap with gt box IF center in sampled patch + centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 + + # mask in all gt boxes that above and to the left of centers + m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1]) + + # mask in all gt boxes that under and to the right of centers + m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1]) + + # mask in that both m1 and m2 are true + mask = m1 * m2 + + # have any valid boxes? try again if not + if not mask.any(): + continue + + # take only matching gt boxes + current_boxes = boxes[mask, :].copy() + + # take only matching gt labels + current_labels = labels[mask] + + # should we use the box left and top corner or the crop's + current_boxes[:, :2] = np.maximum(current_boxes[:, :2], + rect[:2]) + # adjust to crop (by substracting crop's left,top) + current_boxes[:, :2] -= rect[:2] + + current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], + rect[2:]) + # adjust to crop (by substracting crop's left,top) + current_boxes[:, 2:] -= rect[:2] + + return current_image, current_boxes, current_labels + + +class Expand(object): + def __init__(self, mean): + self.mean = mean + + def __call__(self, image, boxes, labels): + if random.randint(2): + return image, boxes, labels + + height, width, depth = image.shape + ratio = random.uniform(1, 4) + left = random.uniform(0, width*ratio - width) + top = random.uniform(0, height*ratio - height) + + expand_image = np.zeros( + (int(height*ratio), int(width*ratio), depth), + dtype=image.dtype) + expand_image[:, :, :] = self.mean + expand_image[int(top):int(top + height), + int(left):int(left + width)] = image + image = expand_image + + boxes = boxes.copy() + boxes[:, :2] += (int(left), int(top)) + boxes[:, 2:] += (int(left), int(top)) + + return image, boxes, labels + + +class RandomMirror(object): + def __call__(self, image, boxes, classes): + _, width, _ = image.shape + if random.randint(2): + image = image[:, ::-1] + boxes = boxes.copy() + boxes[:, 0::2] = width - boxes[:, 2::-2] + return image, boxes, classes + + +class SwapChannels(object): + """Transforms a tensorized image by swapping the channels in the order + specified in the swap tuple. + Args: + swaps (int triple): final order of channels + eg: (2, 1, 0) + """ + + def __init__(self, swaps): + self.swaps = swaps + + def __call__(self, image): + """ + Args: + image (Tensor): image tensor to be transformed + Return: + a tensor with channels swapped according to swap + """ + # if torch.is_tensor(image): + # image = image.data.cpu().numpy() + # else: + # image = np.array(image) + image = image[:, :, self.swaps] + return image + + +class PhotometricDistort(object): + def __init__(self): + self.pd = [ + RandomContrast(), + ConvertColor(transform='HSV'), + RandomSaturation(), + RandomHue(), + ConvertColor(current='HSV', transform='BGR'), + RandomContrast() + ] + self.rand_brightness = RandomBrightness() + self.rand_light_noise = RandomLightingNoise() + + def __call__(self, image, boxes, labels): + im = image.copy() + im, boxes, labels = self.rand_brightness(im, boxes, labels) + if random.randint(2): + distort = Compose(self.pd[:-1]) + else: + distort = Compose(self.pd[1:]) + im, boxes, labels = distort(im, boxes, labels) + return self.rand_light_noise(im, boxes, labels) + + +class SSDAugmentation(object): + def __init__(self, size=300, mean=(104, 117, 123)): + self.mean = mean + self.size = size + self.augment = Compose([ + ConvertFromInts(), # int 转化为 float32 + ToAbsoluteCoords(), + PhotometricDistort(), + Expand(self.mean), + RandomSampleCrop(), + RandomMirror(), + ToPercentCoords(), + Resize(self.size), + SubtractMeans(self.mean) + ]) + + def __call__(self, img, boxes, labels): + return self.augment(img, boxes, labels) diff --git a/PyTorch/contrib/cv/detection/RefineDet/utils/logging.py b/PyTorch/contrib/cv/detection/RefineDet/utils/logging.py index 50e75733a1ba3228c15f1b45bc95212ee28f12c8..838d1cf2605203721904775fc50df9d5e07ad657 100644 --- a/PyTorch/contrib/cv/detection/RefineDet/utils/logging.py +++ b/PyTorch/contrib/cv/detection/RefineDet/utils/logging.py @@ -1,54 +1,54 @@ -#!/bin/bash -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -import os -import sys - -from .osutils import mkdir_if_missing - - -class Logger(object): - def __init__(self, fpath=None): - self.console = sys.stdout - self.file = None - if fpath is not None: - mkdir_if_missing(os.path.dirname(fpath)) - self.file = open(fpath, 'w') - - def __del__(self): - self.close() - - def __enter__(self): - pass - - def __exit__(self, *args): - self.close() - - def write(self, msg): - self.console.write(msg) - if self.file is not None: - self.file.write(msg) - - def flush(self): - self.console.flush() - if self.file is not None: - self.file.flush() - os.fsync(self.file.fileno()) - - def close(self): - self.console.close() - if self.file is not None: - self.file.close() +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import os +import sys + +from .osutils import mkdir_if_missing + + +class Logger(object): + def __init__(self, fpath=None): + self.console = sys.stdout + self.file = None + if fpath is not None: + mkdir_if_missing(os.path.dirname(fpath)) + self.file = open(fpath, 'w') + + def __del__(self): + self.close() + + def __enter__(self): + pass + + def __exit__(self, *args): + self.close() + + def write(self, msg): + self.console.write(msg) + if self.file is not None: + self.file.write(msg) + + def flush(self): + self.console.flush() + if self.file is not None: + self.file.flush() + os.fsync(self.file.fileno()) + + def close(self): + self.console.close() + if self.file is not None: + self.file.close() diff --git a/PyTorch/contrib/cv/detection/RefineDet/utils/osutils.py b/PyTorch/contrib/cv/detection/RefineDet/utils/osutils.py index b91a51f7b33df39ccf67b8e5505d795cb71a2295..d1537c4e52ee8fadef5bc05b1f11b2f93be7edc8 100644 --- a/PyTorch/contrib/cv/detection/RefineDet/utils/osutils.py +++ b/PyTorch/contrib/cv/detection/RefineDet/utils/osutils.py @@ -1,26 +1,26 @@ -#!/bin/bash -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -import os -import errno - - -def mkdir_if_missing(dir_path): - try: - os.makedirs(dir_path) - except OSError as e: - if e.errno != errno.EEXIST: - raise +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import os +import errno + + +def mkdir_if_missing(dir_path): + try: + os.makedirs(dir_path) + except OSError as e: + if e.errno != errno.EEXIST: + raise diff --git a/PyTorch/contrib/cv/detection/Retinaface/modelzoo_level.txt b/PyTorch/contrib/cv/detection/Retinaface/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/detection/Retinaface/modelzoo_level.txt +++ b/PyTorch/contrib/cv/detection/Retinaface/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/SOLOv1/mmcv/README.rst b/PyTorch/contrib/cv/detection/SOLOv1/mmcv/README.rst deleted file mode 100644 index 663f6677e1a59bb4f39294670c97ac3d760b341c..0000000000000000000000000000000000000000 --- a/PyTorch/contrib/cv/detection/SOLOv1/mmcv/README.rst +++ /dev/null @@ -1,54 +0,0 @@ -MMCV -==== - -.. image:: https://travis-ci.com/open-mmlab/mmcv.svg?branch=master - :target: https://travis-ci.com/open-mmlab/mmcv - -.. image:: https://codecov.io/gh/open-mmlab/mmcv/branch/master/graph/badge.svg - :target: https://codecov.io/gh/open-mmlab/mmcv - -.. image:: https://img.shields.io/github/license/open-mmlab/mmcv.svg - :target: https://github.com/open-mmlab/mmcv/blob/master/LICENSE - - -Introduction ------------- - -MMCV is a foundational python library for computer vision research and supports many -research projects in MMLAB, such as `MMDetection `_ -and `MMAction `_. - -It provides the following functionalities. - -- Universal IO APIs -- Image processing -- Video processing -- Image and annotation visualization -- Useful utilities (progress bar, timer, ...) -- PyTorch runner with hooking mechanism -- Various CNN architectures - -See the `documentation `_ for more features and usage. - - -Installation ------------- - -Try and start with - -.. code:: - - pip install mmcv - - -or install from source - -.. code:: - - git clone https://github.com/open-mmlab/mmcv.git - cd mmcv - pip install -e . - -Note: If you would like to use :code:`opencv-python-headless` instead of :code:`opencv-python`, -e.g., in a minimum container environment or servers without GUI, -you can first install it before installing MMCV to skip the installation of :code:`opencv-python`. \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/SOLOv2/mmcv/README.rst b/PyTorch/contrib/cv/detection/SOLOv2/mmcv/README.rst deleted file mode 100644 index 663f6677e1a59bb4f39294670c97ac3d760b341c..0000000000000000000000000000000000000000 --- a/PyTorch/contrib/cv/detection/SOLOv2/mmcv/README.rst +++ /dev/null @@ -1,54 +0,0 @@ -MMCV -==== - -.. image:: https://travis-ci.com/open-mmlab/mmcv.svg?branch=master - :target: https://travis-ci.com/open-mmlab/mmcv - -.. image:: https://codecov.io/gh/open-mmlab/mmcv/branch/master/graph/badge.svg - :target: https://codecov.io/gh/open-mmlab/mmcv - -.. image:: https://img.shields.io/github/license/open-mmlab/mmcv.svg - :target: https://github.com/open-mmlab/mmcv/blob/master/LICENSE - - -Introduction ------------- - -MMCV is a foundational python library for computer vision research and supports many -research projects in MMLAB, such as `MMDetection `_ -and `MMAction `_. - -It provides the following functionalities. - -- Universal IO APIs -- Image processing -- Video processing -- Image and annotation visualization -- Useful utilities (progress bar, timer, ...) -- PyTorch runner with hooking mechanism -- Various CNN architectures - -See the `documentation `_ for more features and usage. - - -Installation ------------- - -Try and start with - -.. code:: - - pip install mmcv - - -or install from source - -.. code:: - - git clone https://github.com/open-mmlab/mmcv.git - cd mmcv - pip install -e . - -Note: If you would like to use :code:`opencv-python-headless` instead of :code:`opencv-python`, -e.g., in a minimum container environment or servers without GUI, -you can first install it before installing MMCV to skip the installation of :code:`opencv-python`. \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/Dockerfile b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/Dockerfile old mode 100755 new mode 100644 index 7e712fe1a166790798f57a2f2762c47394beb625..30a31af55804dd79571d2a36e6107a844cb7e549 --- a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/Dockerfile +++ b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/Dockerfile @@ -1,5 +1,5 @@ -ARG FROM_IMAGE_NAME -FROM $FROM_IMAGE_NAME - -COPY requirements.txt . +ARG FROM_IMAGE_NAME +FROM $FROM_IMAGE_NAME + +COPY requirements.txt . RUN pip3.7 install -r requirements.txt \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/LICENSE b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/LICENSE old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/README.md b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/README.md old mode 100755 new mode 100644 index 9eeb1a53abc21d0e2307fe830dfc22226893f993..87658addb0a99c52277a41d088d3ea31544322ee --- a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/README.md +++ b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/README.md @@ -1,88 +1,88 @@ -## Requirements -```angular2html -pytorch==1.5 -apex -pandas -opencv-python -``` - -## 下载数据集 -```angular2html -wget http://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar -wget http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar -wget http://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar - -将3个数据集放在目录 VOC0712下,目录结构为: - VOC0712 - | - |———————— VOC2007_trainval - | |——————Annotations - | |——————ImageSets - | |——————JPEGImages - | |——————SegmentationClass - | |——————SegmentationObject - |———————— VOC2012_trainval - | |——————Annotations - | |——————ImageSets - | |——————JPEGImages - | |——————SegmentationClass - | |——————SegmentationObject - |———————— VOC2007_test - |——————Annotations - |——————ImageSets - |——————JPEGImages - |——————SegmentationClass - |——————SegmentationObject -``` -## 下载预训练模型到 models 目录下 -``` -wget -P models https://storage.googleapis.com/models-hao/mb2-imagenet-71_8.pth -``` -## 训练 -```angular2html -# 1p train perf -# 是否正确输出了性能log文件 -bash test/train_performance_1p.sh --data_path xxx - -# 1p train full -# 是否正确输出了性能精度log文件,是否正确保存了模型文件 -bash test/train_full_1p.sh --data_path xxx - -# 8p train perf -# 是否正确输出了性能log文件 -bash test/train_performance_8p.sh --data_path xxx - -# 8p train full -# 是否正确输出了性能精度log文件,是否正确保存了模型文件 -bash test/train_full_8p.sh --data_path xxx - -# finetuning -# 是否正确执行迁移学习 -bash test/train_finetune_1p.sh --data_path xxx - -# online inference demo -# 是否正确输出预测结果,请确保输入固定tensor多次运行的输出结果一致 -python3.7.5 demo.py -``` -### 一些参数说明 -```angular2html ---data_path 数据集路径 ---base_net 预训练模型存放路径 ---num_epochs 训练epoch ---validation_epochs 验证epoch ---checkpoint_folder 模型保存路径 ---eval_dir 模型验证时产生文件的存放路径 ---device 使用的设备,npu或gpu ---gpu 设备卡号,单卡时使用 ---device_list 默认为 '0,1,2,3,4,5,6,7',多卡时使用 -``` -## evaluate -```angular2html -bash scripts/eval.sh -``` -### 一些参数说明 -```angular2html ---dataset 测试数据集 ---eval_dir 模型验证时产生文件的存放路径 ---lable_file 类别文件,训练时会在模型保存文件夹生成 -``` +## Requirements +```angular2html +pytorch==1.5 +apex +pandas +opencv-python +``` + +## 下载数据集 +```angular2html +wget http://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar +wget http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar +wget http://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar + +将3个数据集放在目录 VOC0712下,目录结构为: + VOC0712 + | + |———————— VOC2007_trainval + | |——————Annotations + | |——————ImageSets + | |——————JPEGImages + | |——————SegmentationClass + | |——————SegmentationObject + |———————— VOC2012_trainval + | |——————Annotations + | |——————ImageSets + | |——————JPEGImages + | |——————SegmentationClass + | |——————SegmentationObject + |———————— VOC2007_test + |——————Annotations + |——————ImageSets + |——————JPEGImages + |——————SegmentationClass + |——————SegmentationObject +``` +## 下载预训练模型到 models 目录下 +``` +wget -P models https://storage.googleapis.com/models-hao/mb2-imagenet-71_8.pth +``` +## 训练 +```angular2html +# 1p train perf +# 是否正确输出了性能log文件 +bash test/train_performance_1p.sh --data_path xxx + +# 1p train full +# 是否正确输出了性能精度log文件,是否正确保存了模型文件 +bash test/train_full_1p.sh --data_path xxx + +# 8p train perf +# 是否正确输出了性能log文件 +bash test/train_performance_8p.sh --data_path xxx + +# 8p train full +# 是否正确输出了性能精度log文件,是否正确保存了模型文件 +bash test/train_full_8p.sh --data_path xxx + +# finetuning +# 是否正确执行迁移学习 +bash test/train_finetune_1p.sh --data_path xxx + +# online inference demo +# 是否正确输出预测结果,请确保输入固定tensor多次运行的输出结果一致 +python3.7.5 demo.py +``` +### 一些参数说明 +```angular2html +--data_path 数据集路径 +--base_net 预训练模型存放路径 +--num_epochs 训练epoch +--validation_epochs 验证epoch +--checkpoint_folder 模型保存路径 +--eval_dir 模型验证时产生文件的存放路径 +--device 使用的设备,npu或gpu +--gpu 设备卡号,单卡时使用 +--device_list 默认为 '0,1,2,3,4,5,6,7',多卡时使用 +``` +## evaluate +```angular2html +bash scripts/eval.sh +``` +### 一些参数说明 +```angular2html +--dataset 测试数据集 +--eval_dir 模型验证时产生文件的存放路径 +--lable_file 类别文件,训练时会在模型保存文件夹生成 +``` diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/README_raw.md b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/README_raw.md old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/convert_to_caffe2_models.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/convert_to_caffe2_models.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/demo.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/demo.py old mode 100755 new mode 100644 index 582d0d536e8c28923379491069ac577966fbe7b7..f082419b38eb4792bca698dd9b26303b57699fcf --- a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/demo.py +++ b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/demo.py @@ -1,103 +1,103 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import argparse -import sys -import torch -import cv2 - -from vision.utils.misc import Timer -from vision.ssd.vgg_ssd import create_vgg_ssd, create_vgg_ssd_predictor -from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd, create_mobilenetv1_ssd_predictor -from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite, create_mobilenetv1_ssd_lite_predictor -from vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite, create_squeezenet_ssd_lite_predictor -from vision.ssd.mobilenet_v2_ssd_lite import create_mobilenetv2_ssd_lite, create_mobilenetv2_ssd_lite_predictor -from vision.ssd.mobilenetv3_ssd_lite import create_mobilenetv3_large_ssd_lite, create_mobilenetv3_small_ssd_lite - -parser = argparse.ArgumentParser() -parser.add_argument('--net', default="mb2-ssd-lite", - help="The network architecture, it should be of mb1-ssd, mb1-ssd-lite, mb2-ssd-lite or vgg16-ssd.") -parser.add_argument("--trained_model", default="models/1p/mb2-ssd-lite-Epoch-0-Loss-12.09216200136671.pth", type=str) -parser.add_argument('--img', default="demo.jpg", help="image file") -parser.add_argument("--label_file", default="models/1p/voc-model-labels.txt", type=str, help="The label file path.") -parser.add_argument('--device', default='npu', type=str, help='npu or gpu') -parser.add_argument('--gpu', default=0, type=int, - help='GPU id to use.') -parser.add_argument("--nms_method", type=str, default="hard") -parser.add_argument('--mb2_width_mult', default=1.0, type=float, - help='Width Multiplifier for MobilenetV2') -timer = Timer() -if __name__ == '__main__': - args = parser.parse_args() - if args.device == 'npu': - args.device = 'npu:{}'.format(args.gpu) - torch.npu.set_device(args.device) - elif args.device == 'gpu': - args.device = 'cuda:{}'.format(args.gpu) - torch.backends.cudnn.benchmark = True - - if args.net == 'vgg16-ssd': - create_net = create_vgg_ssd - create_predictor = lambda net: create_vgg_ssd_predictor(net, nms_method=args.nms_method, device=args.device) - elif args.net == 'mb1-ssd': - create_net = create_mobilenetv1_ssd - create_predictor = lambda net: create_mobilenetv1_ssd_predictor(net, nms_method=args.nms_method, - device=args.device) - elif args.net == 'mb1-ssd-lite': - create_net = create_mobilenetv1_ssd_lite - create_predictor = lambda net: create_mobilenetv1_ssd_lite_predictor(net, nms_method=args.nms_method, - device=args.device) - elif args.net == 'sq-ssd-lite': - create_net = create_squeezenet_ssd_lite - create_predictor = lambda net: create_squeezenet_ssd_lite_predictor(net, nms_method=args.nms_method, - device=args.device) - elif args.net == 'mb2-ssd-lite': - create_net = lambda num: create_mobilenetv2_ssd_lite(num, width_mult=args.mb2_width_mult, device=args.device) - create_predictor = lambda net: create_mobilenetv2_ssd_lite_predictor(net, nms_method=args.nms_method, - device=args.device) - elif args.net == 'mb3-large-ssd-lite': - create_net = lambda num: create_mobilenetv3_large_ssd_lite(num) - create_predictor = lambda net: create_mobilenetv2_ssd_lite_predictor(net, nms_method=args.nms_method, - device=args.device) - elif args.net == 'mb3-small-ssd-lite': - create_net = lambda num: create_mobilenetv3_small_ssd_lite(num) - create_predictor = lambda net: create_mobilenetv2_ssd_lite_predictor(net, nms_method=args.nms_method, - device=args.device) - else: - parser.print_help(sys.stderr) - sys.exit(1) - - # create model - class_names = [name.strip() for name in open(args.label_file).readlines()] - net = create_net(len(class_names)) - timer.start("Load Model") - pretrained_dic = torch.load(args.trained_model, map_location='cpu')['state_dict'] - pretrained_dic = {k.replace('module.', ''): v for k, v in pretrained_dic.items()} - net.load_state_dict(pretrained_dic) - - net = net.to(args.device) - print(f'It took {timer.end("Load Model")} seconds to load the model.') - - # create predictor - predictor = create_predictor(net) - - # load imge - image = cv2.imread(args.img) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - boxes, labels, probs = predictor.predict(image) - print('\n') - print('boxes: ', boxes) - print('lables: ', labels) - print('probs: ', probs) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import argparse +import sys +import torch +import cv2 + +from vision.utils.misc import Timer +from vision.ssd.vgg_ssd import create_vgg_ssd, create_vgg_ssd_predictor +from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd, create_mobilenetv1_ssd_predictor +from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite, create_mobilenetv1_ssd_lite_predictor +from vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite, create_squeezenet_ssd_lite_predictor +from vision.ssd.mobilenet_v2_ssd_lite import create_mobilenetv2_ssd_lite, create_mobilenetv2_ssd_lite_predictor +from vision.ssd.mobilenetv3_ssd_lite import create_mobilenetv3_large_ssd_lite, create_mobilenetv3_small_ssd_lite + +parser = argparse.ArgumentParser() +parser.add_argument('--net', default="mb2-ssd-lite", + help="The network architecture, it should be of mb1-ssd, mb1-ssd-lite, mb2-ssd-lite or vgg16-ssd.") +parser.add_argument("--trained_model", default="models/1p/mb2-ssd-lite-Epoch-0-Loss-12.09216200136671.pth", type=str) +parser.add_argument('--img', default="demo.jpg", help="image file") +parser.add_argument("--label_file", default="models/1p/voc-model-labels.txt", type=str, help="The label file path.") +parser.add_argument('--device', default='npu', type=str, help='npu or gpu') +parser.add_argument('--gpu', default=0, type=int, + help='GPU id to use.') +parser.add_argument("--nms_method", type=str, default="hard") +parser.add_argument('--mb2_width_mult', default=1.0, type=float, + help='Width Multiplifier for MobilenetV2') +timer = Timer() +if __name__ == '__main__': + args = parser.parse_args() + if args.device == 'npu': + args.device = 'npu:{}'.format(args.gpu) + torch.npu.set_device(args.device) + elif args.device == 'gpu': + args.device = 'cuda:{}'.format(args.gpu) + torch.backends.cudnn.benchmark = True + + if args.net == 'vgg16-ssd': + create_net = create_vgg_ssd + create_predictor = lambda net: create_vgg_ssd_predictor(net, nms_method=args.nms_method, device=args.device) + elif args.net == 'mb1-ssd': + create_net = create_mobilenetv1_ssd + create_predictor = lambda net: create_mobilenetv1_ssd_predictor(net, nms_method=args.nms_method, + device=args.device) + elif args.net == 'mb1-ssd-lite': + create_net = create_mobilenetv1_ssd_lite + create_predictor = lambda net: create_mobilenetv1_ssd_lite_predictor(net, nms_method=args.nms_method, + device=args.device) + elif args.net == 'sq-ssd-lite': + create_net = create_squeezenet_ssd_lite + create_predictor = lambda net: create_squeezenet_ssd_lite_predictor(net, nms_method=args.nms_method, + device=args.device) + elif args.net == 'mb2-ssd-lite': + create_net = lambda num: create_mobilenetv2_ssd_lite(num, width_mult=args.mb2_width_mult, device=args.device) + create_predictor = lambda net: create_mobilenetv2_ssd_lite_predictor(net, nms_method=args.nms_method, + device=args.device) + elif args.net == 'mb3-large-ssd-lite': + create_net = lambda num: create_mobilenetv3_large_ssd_lite(num) + create_predictor = lambda net: create_mobilenetv2_ssd_lite_predictor(net, nms_method=args.nms_method, + device=args.device) + elif args.net == 'mb3-small-ssd-lite': + create_net = lambda num: create_mobilenetv3_small_ssd_lite(num) + create_predictor = lambda net: create_mobilenetv2_ssd_lite_predictor(net, nms_method=args.nms_method, + device=args.device) + else: + parser.print_help(sys.stderr) + sys.exit(1) + + # create model + class_names = [name.strip() for name in open(args.label_file).readlines()] + net = create_net(len(class_names)) + timer.start("Load Model") + pretrained_dic = torch.load(args.trained_model, map_location='cpu')['state_dict'] + pretrained_dic = {k.replace('module.', ''): v for k, v in pretrained_dic.items()} + net.load_state_dict(pretrained_dic) + + net = net.to(args.device) + print(f'It took {timer.end("Load Model")} seconds to load the model.') + + # create predictor + predictor = create_predictor(net) + + # load imge + image = cv2.imread(args.img) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + boxes, labels, probs = predictor.predict(image) + print('\n') + print('boxes: ', boxes) + print('lables: ', labels) + print('probs: ', probs) diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/docker_start.sh b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/docker_start.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/draw_eval_results.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/draw_eval_results.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/eval_ssd.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/eval_ssd.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/extract_tf_weights.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/extract_tf_weights.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/modelzoo_level.txt b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/modelzoo_level.txt old mode 100755 new mode 100644 index 405b26618a0c92027927a9c583a4b47f640bcf7b..c45626e398eabe6022fe7b2e148f0ffce6400d6e --- a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/modelzoo_level.txt +++ b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:POK +FuncStatus:OK +PerfStatus:POK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/open_images_downloader.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/open_images_downloader.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/prune_alexnet.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/prune_alexnet.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/requirements.txt b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/requirements.txt old mode 100755 new mode 100644 index 38281151344fe0631267a2578d2779d745c2e7a3..9f9c71f54c4037e60ccda37bdf7f177dc1a86297 --- a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/requirements.txt +++ b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/requirements.txt @@ -1,5 +1,5 @@ -torch==1.5.0 -apex -torchvision==0.6.0 -Pandas +torch==1.5.0 +apex +torchvision==0.6.0 +Pandas opencv-python \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/run_ssd_example.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/run_ssd_example.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/run_ssd_live_caffe2.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/run_ssd_live_caffe2.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/run_ssd_live_demo.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/run_ssd_live_demo.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/scripts/eval.sh b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/scripts/eval.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/scripts/set_npu_env.sh b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/scripts/set_npu_env.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/scripts/train_1p.sh b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/scripts/train_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/scripts/train_8p.sh b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/scripts/train_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/test/env_npu.sh b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/test/env_npu.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/test/train_finetune_1p.sh b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/test/train_finetune_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/test/train_full_1p.sh b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/test/train_full_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/test/train_full_8p.sh b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/test/train_full_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/test/train_performance_1p.sh b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/test/train_performance_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/test/train_performance_8p.sh b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/test/train_performance_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/train_ssd.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/train_ssd.py old mode 100755 new mode 100644 index 248d41ef2728de73fd66a50f0141c6d1efeabc98..5da35feb7006a21940524507ccc7343197f92335 --- a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/train_ssd.py +++ b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/train_ssd.py @@ -1,493 +1,493 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import argparse -import os -import logging -import pathlib -import sys -import itertools -import apex -import torch -from torch.utils.data import DataLoader, ConcatDataset -from torch.optim.lr_scheduler import CosineAnnealingLR, MultiStepLR, LambdaLR - -from vision.utils.misc import Timer, freeze_net_layers, store_labels -from vision.ssd.ssd import MatchPrior -from vision.ssd.mobilenetv3_ssd_lite import create_mobilenetv3_large_ssd_lite, create_mobilenetv3_small_ssd_lite -from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd, create_mobilenetv1_ssd_predictor -from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite, create_mobilenetv1_ssd_lite_predictor -from vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite, create_squeezenet_ssd_lite_predictor -from vision.ssd.mobilenet_v2_ssd_lite import create_mobilenetv2_ssd_lite, create_mobilenetv2_ssd_lite_predictor -from vision.ssd.vgg_ssd import create_vgg_ssd, create_vgg_ssd_predictor -from vision.datasets.voc_dataset import VOCDataset -from vision.datasets.open_images import OpenImagesDataset -from vision.nn.multibox_loss import MultiboxLoss -from vision.ssd.config import vgg_ssd_config -from vision.ssd.config import mobilenetv1_ssd_config -from vision.ssd.config import squeezenet_ssd_config -from vision.ssd.data_preprocessing import TrainAugmentation, TestTransform -from eval_ssd import predicate -from vision.utils.misc import str2bool -from apex import amp - -parser = argparse.ArgumentParser( - description='Single Shot MultiBox Detector Training With Pytorch') - -# dataset setting -parser.add_argument("--dataset_type", default="voc", type=str, - help='Specify dataset type. Currently support voc and open_images.') -parser.add_argument('--data_path', default='') -parser.add_argument('--datasets', default=[], help='Dataset directory path') -parser.add_argument('--validation_dataset', help='Dataset directory path') -parser.add_argument('--balance_data', action='store_true', - help="Balance training data by down-sampling more frequent labels.") - -# Params for loading pretrained basenet or checkpoints. -parser.add_argument('--base_net', default='', - help='Pretrained base model') -parser.add_argument('--pretrained_ssd', default='', help='Pre-trained base model') -parser.add_argument('--resume', default=None, type=str, - help='Checkpoint state_dict file to resume training from') -parser.add_argument('--net', default="vgg16-ssd", - help="The network architecture, it can be mb1-ssd, mb1-lite-ssd, mb2-ssd-lite, mb3-large-ssd-lite, mb3-small-ssd-lite or vgg16-ssd.") -parser.add_argument('--freeze_base_net', action='store_true', - help="Freeze base net layers.") -parser.add_argument('--freeze_net', action='store_true', - help="Freeze all the layers except the prediction head.") -parser.add_argument('--mb2_width_mult', default=1.0, type=float, - help='Width Multiplifier for MobilenetV2') - -# Params for SGD -parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float, - help='initial learning rate') -parser.add_argument('--momentum', default=0.9, type=float, - help='Momentum value for optim') -parser.add_argument('--weight_decay', default=5e-4, type=float, - help='Weight decay for SGD') -parser.add_argument('--gamma', default=0.1, type=float, - help='Gamma update for SGD') -parser.add_argument('--base_net_lr', default=None, type=float, - help='initial learning rate for base net.') -parser.add_argument('--extra_layers_lr', default=None, type=float, - help='initial learning rate for the layers not in base net and prediction heads.') - -# Scheduler -parser.add_argument('--scheduler', default="multi-step", type=str, - help="Scheduler for SGD. It can one of multi-step and cosine") - -# Params for Multi-step Scheduler -parser.add_argument('--milestones', default="80,100", type=str, - help="milestones for MultiStepLR") - -# Params for Cosine Annealing -parser.add_argument('--t_max', default=120, type=float, - help='T_max value for Cosine Annealing Scheduler.') - -# Train params -parser.add_argument('--batch_size', default=32, type=int, - help='Batch size for training') -parser.add_argument('--num_epochs', default=120, type=int, - help='the number epochs') -parser.add_argument('--num_workers', default=4, type=int, - help='Number of workers used in dataloading') -parser.add_argument('--validation_epochs', default=5, type=int, - help='the number epochs') -parser.add_argument('--debug_steps', default=100, type=int, - help='Set the debug log output frequency.') -parser.add_argument('--checkpoint_folder', default='models/', - help='Directory for saving checkpoint models') -# eval params -parser.add_argument("--nms_method", type=str, default="hard") -parser.add_argument("--use_2007_metric", type=str2bool, default=True) -parser.add_argument("--iou_threshold", type=float, default=0.5, help="The threshold of Intersection over Union.") -parser.add_argument("--eval_dir", default="eval_results", type=str, help="The directory to store evaluation results.") - -# distributed setting -parser.add_argument('--distributed', default=False, action='store_true', - help='Use multi-processing distributed training to launch ') -parser.add_argument('--seed', default=None, type=int, - help='seed for initializing training. ') -parser.add_argument('--device', default='gpu', type=str, help='npu or gpu') -parser.add_argument('--gpu', default=0, type=int, - help='GPU id to use.') -parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', type=str, help='device id list') -parser.add_argument('--world_size', default=-1, type=int, - help='number of nodes for distributed training') -parser.add_argument('--rank', default=-1, type=int, - help='node rank for distributed training') -parser.add_argument('--dist_url', default='', type=str, - help='url used to set up distributed training') -parser.add_argument('--dist_backend', default='nccl', type=str, - help='distributed backend, nccl for GPU, hccl for NPU') -parser.add_argument('--addr', default='127.0.0.1', type=str, help='master addr') -parser.add_argument('--port', default='29688', type=str, help='master port') - -# apex setting -parser.add_argument('--amp', default=False, action='store_true', - help='use amp to train the model') -parser.add_argument('--opt_level', default='O2', type=str, help='apex optimize level') -parser.add_argument('--loss_scale_value', default=128.0, type=int, help='static loss scale value') - -# learning rate setting -parser.add_argument('--warm_up', default=False, action='store_true', help='use warm_up or not') -parser.add_argument('--warm_up_epochs', default=5, type=int, help='warm up epochs') -parser.add_argument('--stay_lr', default=-1, type=int, help='Epoch with constant learning rate') - -logging.basicConfig(stream=sys.stdout, level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') - - -def device_id_to_process_device_map(device_list): - devices = device_list.split(",") - devices = [int(x) for x in devices] - devices.sort() - - process_device_map = dict() - for process_id, device_id in enumerate(devices): - process_device_map[process_id] = device_id - - return process_device_map - - -def train(loader, net, criterion, optimizer, args, timer, debug_steps=100, epoch=-1): - net.train(True) - running_loss = 0.0 - running_regression_loss = 0.0 - running_classification_loss = 0.0 - timer.start('batch_time') - timer.start('multi_step_time') - multi_step = 0 - for i, data in enumerate(loader): - images, boxes, labels = data - boxes, labels = boxes.to(args.device), labels.to(args.device) - optimizer.zero_grad() - confidence, locations = net.forward(images) - regression_loss, classification_loss = criterion(confidence, locations, labels, boxes) # TODO CHANGE BOXES - loss = (regression_loss + classification_loss) - if args.amp: - with amp.scale_loss(loss, optimizer) as scale_loss: - scale_loss.backward() - else: - loss.backward() - optimizer.step() - - running_loss += loss.item() - running_regression_loss += regression_loss.item() - running_classification_loss += classification_loss.item() - multi_step += 1 - if (i % debug_steps == 0 or i == len(loader) - 1) and (not args.distributed or args.rank == 0): - avg_loss = running_loss / (i + 1) - avg_reg_loss = running_regression_loss / (i + 1) - avg_clf_loss = running_classification_loss / (i + 1) - multi_step_time = timer.end('multi_step_time') - logging.info( - f"Epoch: {epoch}, Step: {i}, " + - f"multi_step_time: {multi_step_time:.4f}, " + - f"step_avg_time: {multi_step_time / multi_step:.4f}, " + - f"Average Regression Loss {avg_reg_loss:.4f}, " + - f"Average Classification Loss: {avg_clf_loss:.4f}, " + - f"Average Loss: {avg_loss:.4f}" - ) - multi_step = 0 - timer.start('multi_step_time') - if not args.distributed or args.rank == 0: - batch_time = timer.end('batch_time') - logging.info(f"Epoch: {epoch}, " + - f"batch_time: {batch_time:.4f}, " + - f"FPS: {args.batch_size * args.ngpu * len(loader) / batch_time:.4f} ") - - -def test(loader, net, criterion, args, epoch=-1): - net.eval() - running_loss = 0.0 - running_regression_loss = 0.0 - running_classification_loss = 0.0 - num = 0 - for i, data in enumerate(loader): - images, boxes, labels = data - num += 1 - with torch.no_grad(): - confidence, locations = net(images) - regression_loss, classification_loss = criterion(confidence.cpu(), locations.cpu(), labels, boxes) - loss = regression_loss + classification_loss - running_loss += loss.item() - running_regression_loss += regression_loss.item() - running_classification_loss += classification_loss.item() - if not args.distributed or args.rank == 0: - logging.info( - f"Epoch: {epoch}, Step: {i}, " + - f"Average Regression Loss {running_regression_loss / (i + 1):.4f}, " + - f"Average Classification Loss: {running_classification_loss / (i + 1):.4f}, " + - f"Average Loss: {running_loss / (i + 1):.4f}" - ) - return running_loss / num, running_regression_loss / num, running_classification_loss / num - - -def main_worker(gpu, timer, args): - args.gpu = args.process_device_map[gpu] - print(args.gpu) - if args.distributed: - if args.device == 'npu': - torch.distributed.init_process_group(backend=args.dist_backend, - world_size=args.ngpu, - rank=args.rank) - else: - torch.distributed.init_process_group(backend=args.dist_backend, - init_method="env://", - world_size=args.ngpu, - rank=args.rank) - if args.device == 'npu': - args.device = 'npu:{}'.format(args.gpu) - print(args.device) - torch.npu.set_device(args.device) - logging.info('use NPU, {}'.format(args.device)) - elif args.device == 'gpu': - args.device = 'cuda:{}'.format(args.gpu) - torch.backends.cudnn.benchmark = True - logging.info('use GPU, {}'.format(args.device)) - - if args.net == 'vgg16-ssd': - create_net = create_vgg_ssd - create_predictor = lambda net: create_vgg_ssd_predictor(net, nms_method=args.nms_method, device=args.device) - config = vgg_ssd_config - elif args.net == 'mb1-ssd': - create_net = create_mobilenetv1_ssd - create_predictor = lambda net: create_mobilenetv1_ssd_predictor(net, nms_method=args.nms_method, - device=args.device) - config = mobilenetv1_ssd_config - elif args.net == 'mb1-ssd-lite': - create_net = create_mobilenetv1_ssd_lite - create_predictor = lambda net: create_mobilenetv1_ssd_lite_predictor(net, nms_method=args.nms_method, - device=args.device) - config = mobilenetv1_ssd_config - elif args.net == 'sq-ssd-lite': - create_net = create_squeezenet_ssd_lite - create_predictor = lambda net: create_squeezenet_ssd_lite_predictor(net, nms_method=args.nms_method, - device=args.device) - config = squeezenet_ssd_config - elif args.net == 'mb2-ssd-lite': - create_net = lambda num: create_mobilenetv2_ssd_lite(num, width_mult=args.mb2_width_mult, device=args.device) - create_predictor = lambda net: create_mobilenetv2_ssd_lite_predictor(net, nms_method=args.nms_method, - device=args.device) - config = mobilenetv1_ssd_config - elif args.net == 'mb3-large-ssd-lite': - create_net = lambda num: create_mobilenetv3_large_ssd_lite(num) - create_predictor = lambda net: create_mobilenetv2_ssd_lite_predictor(net, nms_method=args.nms_method, - device=args.device) - config = mobilenetv1_ssd_config - elif args.net == 'mb3-small-ssd-lite': - create_net = lambda num: create_mobilenetv3_small_ssd_lite(num) - create_predictor = lambda net: create_mobilenetv2_ssd_lite_predictor(net, nms_method=args.nms_method, - device=args.device) - config = mobilenetv1_ssd_config - else: - logging.fatal("The net type is wrong.") - parser.print_help(sys.stderr) - sys.exit(1) - train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) - target_transform = MatchPrior(config.priors, config.center_variance, - config.size_variance, 0.5) - - test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) - logging.info("Prepare training datasets.") - datasets = [] - for dataset_path in args.datasets: - if args.dataset_type == 'voc': - dataset = VOCDataset(dataset_path, transform=train_transform, - target_transform=target_transform) - label_file = os.path.join(args.checkpoint_folder, "voc-model-labels.txt") - store_labels(label_file, dataset.class_names) - num_classes = len(dataset.class_names) - elif args.dataset_type == 'open_images': - dataset = OpenImagesDataset(dataset_path, - transform=train_transform, target_transform=target_transform, - dataset_type="train", balance_data=args.balance_data) - label_file = os.path.join(args.checkpoint_folder, "open-images-model-labels.txt") - store_labels(label_file, dataset.class_names) - logging.info(dataset) - num_classes = len(dataset.class_names) - - else: - raise ValueError(f"Dataset type {args.dataset_type} is not supported.") - datasets.append(dataset) - logging.info(f"Stored labels into file {label_file}.") - train_dataset = ConcatDataset(datasets) - logging.info("Train dataset size: {}".format(len(train_dataset))) - - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) - train_loader = DataLoader(train_dataset, - args.batch_size, - num_workers=args.num_workers, - sampler=train_sampler if args.distributed else None, - shuffle=False if args.distributed else True) - logging.info("Prepare Validation datasets.") - if args.dataset_type == "voc": - val_dataset = VOCDataset(args.validation_dataset, transform=test_transform, - target_transform=target_transform, is_test=True) - elif args.dataset_type == 'open_images': - val_dataset = OpenImagesDataset(dataset_path, - transform=test_transform, target_transform=target_transform, - dataset_type="test") - logging.info(val_dataset) - logging.info("validation dataset size: {}".format(len(val_dataset))) - - val_loader = DataLoader(val_dataset, args.batch_size, - num_workers=args.num_workers, - shuffle=False) - - logging.info("Build network.") - net = create_net(num_classes) - min_loss = -10000.0 - last_epoch = -1 - - base_net_lr = args.base_net_lr if args.base_net_lr is not None else args.lr - extra_layers_lr = args.extra_layers_lr if args.extra_layers_lr is not None else args.lr - if args.freeze_base_net: - logging.info("Freeze base net.") - freeze_net_layers(net.base_net) - params = itertools.chain(net.source_layer_add_ons.parameters(), net.extras.parameters(), - net.regression_headers.parameters(), net.classification_headers.parameters()) - params = [ - {'params': itertools.chain( - net.source_layer_add_ons.parameters(), - net.extras.parameters() - ), 'lr': extra_layers_lr}, - {'params': itertools.chain( - net.regression_headers.parameters(), - net.classification_headers.parameters() - )} - ] - elif args.freeze_net: - freeze_net_layers(net.base_net) - freeze_net_layers(net.source_layer_add_ons) - freeze_net_layers(net.extras) - params = itertools.chain(net.regression_headers.parameters(), net.classification_headers.parameters()) - logging.info("Freeze all the layers except prediction heads.") - else: - params = [ - {'params': net.base_net.parameters(), 'lr': base_net_lr}, - {'params': itertools.chain( - net.source_layer_add_ons.parameters(), - net.extras.parameters() - ), 'lr': extra_layers_lr}, - {'params': itertools.chain( - net.regression_headers.parameters(), - net.classification_headers.parameters() - )} - ] - net.to(args.device) - criterion = MultiboxLoss(config.priors, iou_threshold=0.5, neg_pos_ratio=3, - center_variance=0.1, size_variance=0.2, device=args.device) - # npu: NpuFusedSGD - if 'npu' in args.device: - optimizer = apex.optimizers.NpuFusedSGD(params, lr=args.lr, momentum=args.momentum, - weight_decay=args.weight_decay) - else: - optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, - weight_decay=args.weight_decay) - timer.start("Load Model") - if args.resume: - logging.info(f"Resume from the model {args.resume}") - checkpoint = torch.load(args.resume, map_location='cpu') - pretrained_dic = {k.replace('module.', ''): v for k, v in checkpoint['state_dict'].items()} - net.load_state_dict(pretrained_dic) - optimizer.load_state_dict(checkpoint['optimizer']) - last_epoch = checkpoint['epoch'] - elif args.base_net: - logging.info(f"Init from base net {args.base_net}") - net.init_from_base_net(args.base_net) - elif args.pretrained_ssd: - logging.info(f"Init from pretrained ssd {args.pretrained_ssd}") - net.init_from_pretrained_ssd(args.pretrained_ssd) - logging.info(f'Took {timer.end("Load Model"):.2f} seconds to load the model.') - - if args.amp: - net, optimizer = amp.initialize(net, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale_value) - if args.distributed: - net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[args.rank], - broadcast_buffers=False if 'npu' in args.device else True) - logging.info(f"Learning rate: {args.lr}, Base net learning rate: {base_net_lr}, " - + f"Extra Layers learning rate: {extra_layers_lr}.") - - if args.scheduler == 'multi-step': - logging.info("Uses MultiStepLR scheduler.") - milestones = [int(v.strip()) for v in args.milestones.split(",")] - scheduler = MultiStepLR(optimizer, milestones=milestones, - gamma=0.1, last_epoch=last_epoch) - elif args.scheduler == 'cosine': - logging.info("Uses CosineAnnealingLR scheduler.") - scheduler = CosineAnnealingLR(optimizer, args.t_max, last_epoch=last_epoch) - else: - logging.fatal(f"Unsupported Scheduler: {args.scheduler}.") - parser.print_help(sys.stderr) - sys.exit(1) - if args.warm_up: - warm_up_scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: epoch / args.warm_up_epochs) - - logging.info(f"Start training from epoch {last_epoch + 1}.") - for epoch in range(last_epoch + 1, args.num_epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - if args.warm_up and epoch < args.warm_up_epochs: - warm_up_scheduler.step() - else: - scheduler.step() - train(train_loader, net, criterion, optimizer, args, timer, - debug_steps=args.debug_steps, epoch=epoch) - if (epoch % args.validation_epochs == 0 or epoch == args.num_epochs - 1) and ( - not args.distributed or args.rank == 0): - val_loss, val_regression_loss, val_classification_loss = test(val_loader, net, criterion, args, epoch) - logging.info( - f"Epoch: {epoch}, " + - f"Validation Regression Loss {val_regression_loss:.4f}, " + - f"Validation Classification Loss: {val_classification_loss:.4f}, " + - f"Validation Loss: {val_loss:.4f}" - ) - model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{val_loss}.pth") - torch.save({'state_dict': net.state_dict(), 'epoch': epoch, 'optimizer': optimizer.state_dict()}, - model_path) - logging.info(f"Saved model {model_path}") - - # 默认只测最后一轮的精度 - predictor = create_predictor(net) - val_dataset = VOCDataset(args.validation_dataset, is_test=True) - accuracy = predicate(val_dataset, predictor, args, dataset.class_names) - logging.info(f'epoch: {epoch}, accuracy: {accuracy}') - - -if __name__ == '__main__': - timer = Timer() - args = parser.parse_args() - if args.device == 'npu': - os.environ['MASTER_ADDR'] = args.addr - os.environ['MASTER_PORT'] = args.port - - logging.info(args) - args.process_device_map = device_id_to_process_device_map(args.device_list) - - if not os.path.exists(args.eval_dir): - os.makedirs(args.eval_dir) - if not os.path.exists(args.checkpoint_folder): - os.makedirs(args.checkpoint_folder) - args.datasets = [os.path.join(args.data_path, 'VOC2007_trainval'), os.path.join(args.data_path, 'VOC2012_trainval')] - args.validation_dataset = os.path.join(args.data_path, 'VOC2007_test') - if args.distributed: - args.ngpu = int(os.environ['RANK_SIZE']) - main_worker(args.rank, timer, args) - else: - args.ngpu = 1 - main_worker(args.gpu, timer, args) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import argparse +import os +import logging +import pathlib +import sys +import itertools +import apex +import torch +from torch.utils.data import DataLoader, ConcatDataset +from torch.optim.lr_scheduler import CosineAnnealingLR, MultiStepLR, LambdaLR + +from vision.utils.misc import Timer, freeze_net_layers, store_labels +from vision.ssd.ssd import MatchPrior +from vision.ssd.mobilenetv3_ssd_lite import create_mobilenetv3_large_ssd_lite, create_mobilenetv3_small_ssd_lite +from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd, create_mobilenetv1_ssd_predictor +from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite, create_mobilenetv1_ssd_lite_predictor +from vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite, create_squeezenet_ssd_lite_predictor +from vision.ssd.mobilenet_v2_ssd_lite import create_mobilenetv2_ssd_lite, create_mobilenetv2_ssd_lite_predictor +from vision.ssd.vgg_ssd import create_vgg_ssd, create_vgg_ssd_predictor +from vision.datasets.voc_dataset import VOCDataset +from vision.datasets.open_images import OpenImagesDataset +from vision.nn.multibox_loss import MultiboxLoss +from vision.ssd.config import vgg_ssd_config +from vision.ssd.config import mobilenetv1_ssd_config +from vision.ssd.config import squeezenet_ssd_config +from vision.ssd.data_preprocessing import TrainAugmentation, TestTransform +from eval_ssd import predicate +from vision.utils.misc import str2bool +from apex import amp + +parser = argparse.ArgumentParser( + description='Single Shot MultiBox Detector Training With Pytorch') + +# dataset setting +parser.add_argument("--dataset_type", default="voc", type=str, + help='Specify dataset type. Currently support voc and open_images.') +parser.add_argument('--data_path', default='') +parser.add_argument('--datasets', default=[], help='Dataset directory path') +parser.add_argument('--validation_dataset', help='Dataset directory path') +parser.add_argument('--balance_data', action='store_true', + help="Balance training data by down-sampling more frequent labels.") + +# Params for loading pretrained basenet or checkpoints. +parser.add_argument('--base_net', default='', + help='Pretrained base model') +parser.add_argument('--pretrained_ssd', default='', help='Pre-trained base model') +parser.add_argument('--resume', default=None, type=str, + help='Checkpoint state_dict file to resume training from') +parser.add_argument('--net', default="vgg16-ssd", + help="The network architecture, it can be mb1-ssd, mb1-lite-ssd, mb2-ssd-lite, mb3-large-ssd-lite, mb3-small-ssd-lite or vgg16-ssd.") +parser.add_argument('--freeze_base_net', action='store_true', + help="Freeze base net layers.") +parser.add_argument('--freeze_net', action='store_true', + help="Freeze all the layers except the prediction head.") +parser.add_argument('--mb2_width_mult', default=1.0, type=float, + help='Width Multiplifier for MobilenetV2') + +# Params for SGD +parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float, + help='initial learning rate') +parser.add_argument('--momentum', default=0.9, type=float, + help='Momentum value for optim') +parser.add_argument('--weight_decay', default=5e-4, type=float, + help='Weight decay for SGD') +parser.add_argument('--gamma', default=0.1, type=float, + help='Gamma update for SGD') +parser.add_argument('--base_net_lr', default=None, type=float, + help='initial learning rate for base net.') +parser.add_argument('--extra_layers_lr', default=None, type=float, + help='initial learning rate for the layers not in base net and prediction heads.') + +# Scheduler +parser.add_argument('--scheduler', default="multi-step", type=str, + help="Scheduler for SGD. It can one of multi-step and cosine") + +# Params for Multi-step Scheduler +parser.add_argument('--milestones', default="80,100", type=str, + help="milestones for MultiStepLR") + +# Params for Cosine Annealing +parser.add_argument('--t_max', default=120, type=float, + help='T_max value for Cosine Annealing Scheduler.') + +# Train params +parser.add_argument('--batch_size', default=32, type=int, + help='Batch size for training') +parser.add_argument('--num_epochs', default=120, type=int, + help='the number epochs') +parser.add_argument('--num_workers', default=4, type=int, + help='Number of workers used in dataloading') +parser.add_argument('--validation_epochs', default=5, type=int, + help='the number epochs') +parser.add_argument('--debug_steps', default=100, type=int, + help='Set the debug log output frequency.') +parser.add_argument('--checkpoint_folder', default='models/', + help='Directory for saving checkpoint models') +# eval params +parser.add_argument("--nms_method", type=str, default="hard") +parser.add_argument("--use_2007_metric", type=str2bool, default=True) +parser.add_argument("--iou_threshold", type=float, default=0.5, help="The threshold of Intersection over Union.") +parser.add_argument("--eval_dir", default="eval_results", type=str, help="The directory to store evaluation results.") + +# distributed setting +parser.add_argument('--distributed', default=False, action='store_true', + help='Use multi-processing distributed training to launch ') +parser.add_argument('--seed', default=None, type=int, + help='seed for initializing training. ') +parser.add_argument('--device', default='gpu', type=str, help='npu or gpu') +parser.add_argument('--gpu', default=0, type=int, + help='GPU id to use.') +parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', type=str, help='device id list') +parser.add_argument('--world_size', default=-1, type=int, + help='number of nodes for distributed training') +parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') +parser.add_argument('--dist_url', default='', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist_backend', default='nccl', type=str, + help='distributed backend, nccl for GPU, hccl for NPU') +parser.add_argument('--addr', default='127.0.0.1', type=str, help='master addr') +parser.add_argument('--port', default='29688', type=str, help='master port') + +# apex setting +parser.add_argument('--amp', default=False, action='store_true', + help='use amp to train the model') +parser.add_argument('--opt_level', default='O2', type=str, help='apex optimize level') +parser.add_argument('--loss_scale_value', default=128.0, type=int, help='static loss scale value') + +# learning rate setting +parser.add_argument('--warm_up', default=False, action='store_true', help='use warm_up or not') +parser.add_argument('--warm_up_epochs', default=5, type=int, help='warm up epochs') +parser.add_argument('--stay_lr', default=-1, type=int, help='Epoch with constant learning rate') + +logging.basicConfig(stream=sys.stdout, level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') + + +def device_id_to_process_device_map(device_list): + devices = device_list.split(",") + devices = [int(x) for x in devices] + devices.sort() + + process_device_map = dict() + for process_id, device_id in enumerate(devices): + process_device_map[process_id] = device_id + + return process_device_map + + +def train(loader, net, criterion, optimizer, args, timer, debug_steps=100, epoch=-1): + net.train(True) + running_loss = 0.0 + running_regression_loss = 0.0 + running_classification_loss = 0.0 + timer.start('batch_time') + timer.start('multi_step_time') + multi_step = 0 + for i, data in enumerate(loader): + images, boxes, labels = data + boxes, labels = boxes.to(args.device), labels.to(args.device) + optimizer.zero_grad() + confidence, locations = net.forward(images) + regression_loss, classification_loss = criterion(confidence, locations, labels, boxes) # TODO CHANGE BOXES + loss = (regression_loss + classification_loss) + if args.amp: + with amp.scale_loss(loss, optimizer) as scale_loss: + scale_loss.backward() + else: + loss.backward() + optimizer.step() + + running_loss += loss.item() + running_regression_loss += regression_loss.item() + running_classification_loss += classification_loss.item() + multi_step += 1 + if (i % debug_steps == 0 or i == len(loader) - 1) and (not args.distributed or args.rank == 0): + avg_loss = running_loss / (i + 1) + avg_reg_loss = running_regression_loss / (i + 1) + avg_clf_loss = running_classification_loss / (i + 1) + multi_step_time = timer.end('multi_step_time') + logging.info( + f"Epoch: {epoch}, Step: {i}, " + + f"multi_step_time: {multi_step_time:.4f}, " + + f"step_avg_time: {multi_step_time / multi_step:.4f}, " + + f"Average Regression Loss {avg_reg_loss:.4f}, " + + f"Average Classification Loss: {avg_clf_loss:.4f}, " + + f"Average Loss: {avg_loss:.4f}" + ) + multi_step = 0 + timer.start('multi_step_time') + if not args.distributed or args.rank == 0: + batch_time = timer.end('batch_time') + logging.info(f"Epoch: {epoch}, " + + f"batch_time: {batch_time:.4f}, " + + f"FPS: {args.batch_size * args.ngpu * len(loader) / batch_time:.4f} ") + + +def test(loader, net, criterion, args, epoch=-1): + net.eval() + running_loss = 0.0 + running_regression_loss = 0.0 + running_classification_loss = 0.0 + num = 0 + for i, data in enumerate(loader): + images, boxes, labels = data + num += 1 + with torch.no_grad(): + confidence, locations = net(images) + regression_loss, classification_loss = criterion(confidence.cpu(), locations.cpu(), labels, boxes) + loss = regression_loss + classification_loss + running_loss += loss.item() + running_regression_loss += regression_loss.item() + running_classification_loss += classification_loss.item() + if not args.distributed or args.rank == 0: + logging.info( + f"Epoch: {epoch}, Step: {i}, " + + f"Average Regression Loss {running_regression_loss / (i + 1):.4f}, " + + f"Average Classification Loss: {running_classification_loss / (i + 1):.4f}, " + + f"Average Loss: {running_loss / (i + 1):.4f}" + ) + return running_loss / num, running_regression_loss / num, running_classification_loss / num + + +def main_worker(gpu, timer, args): + args.gpu = args.process_device_map[gpu] + print(args.gpu) + if args.distributed: + if args.device == 'npu': + torch.distributed.init_process_group(backend=args.dist_backend, + world_size=args.ngpu, + rank=args.rank) + else: + torch.distributed.init_process_group(backend=args.dist_backend, + init_method="env://", + world_size=args.ngpu, + rank=args.rank) + if args.device == 'npu': + args.device = 'npu:{}'.format(args.gpu) + print(args.device) + torch.npu.set_device(args.device) + logging.info('use NPU, {}'.format(args.device)) + elif args.device == 'gpu': + args.device = 'cuda:{}'.format(args.gpu) + torch.backends.cudnn.benchmark = True + logging.info('use GPU, {}'.format(args.device)) + + if args.net == 'vgg16-ssd': + create_net = create_vgg_ssd + create_predictor = lambda net: create_vgg_ssd_predictor(net, nms_method=args.nms_method, device=args.device) + config = vgg_ssd_config + elif args.net == 'mb1-ssd': + create_net = create_mobilenetv1_ssd + create_predictor = lambda net: create_mobilenetv1_ssd_predictor(net, nms_method=args.nms_method, + device=args.device) + config = mobilenetv1_ssd_config + elif args.net == 'mb1-ssd-lite': + create_net = create_mobilenetv1_ssd_lite + create_predictor = lambda net: create_mobilenetv1_ssd_lite_predictor(net, nms_method=args.nms_method, + device=args.device) + config = mobilenetv1_ssd_config + elif args.net == 'sq-ssd-lite': + create_net = create_squeezenet_ssd_lite + create_predictor = lambda net: create_squeezenet_ssd_lite_predictor(net, nms_method=args.nms_method, + device=args.device) + config = squeezenet_ssd_config + elif args.net == 'mb2-ssd-lite': + create_net = lambda num: create_mobilenetv2_ssd_lite(num, width_mult=args.mb2_width_mult, device=args.device) + create_predictor = lambda net: create_mobilenetv2_ssd_lite_predictor(net, nms_method=args.nms_method, + device=args.device) + config = mobilenetv1_ssd_config + elif args.net == 'mb3-large-ssd-lite': + create_net = lambda num: create_mobilenetv3_large_ssd_lite(num) + create_predictor = lambda net: create_mobilenetv2_ssd_lite_predictor(net, nms_method=args.nms_method, + device=args.device) + config = mobilenetv1_ssd_config + elif args.net == 'mb3-small-ssd-lite': + create_net = lambda num: create_mobilenetv3_small_ssd_lite(num) + create_predictor = lambda net: create_mobilenetv2_ssd_lite_predictor(net, nms_method=args.nms_method, + device=args.device) + config = mobilenetv1_ssd_config + else: + logging.fatal("The net type is wrong.") + parser.print_help(sys.stderr) + sys.exit(1) + train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) + target_transform = MatchPrior(config.priors, config.center_variance, + config.size_variance, 0.5) + + test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) + logging.info("Prepare training datasets.") + datasets = [] + for dataset_path in args.datasets: + if args.dataset_type == 'voc': + dataset = VOCDataset(dataset_path, transform=train_transform, + target_transform=target_transform) + label_file = os.path.join(args.checkpoint_folder, "voc-model-labels.txt") + store_labels(label_file, dataset.class_names) + num_classes = len(dataset.class_names) + elif args.dataset_type == 'open_images': + dataset = OpenImagesDataset(dataset_path, + transform=train_transform, target_transform=target_transform, + dataset_type="train", balance_data=args.balance_data) + label_file = os.path.join(args.checkpoint_folder, "open-images-model-labels.txt") + store_labels(label_file, dataset.class_names) + logging.info(dataset) + num_classes = len(dataset.class_names) + + else: + raise ValueError(f"Dataset type {args.dataset_type} is not supported.") + datasets.append(dataset) + logging.info(f"Stored labels into file {label_file}.") + train_dataset = ConcatDataset(datasets) + logging.info("Train dataset size: {}".format(len(train_dataset))) + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + train_loader = DataLoader(train_dataset, + args.batch_size, + num_workers=args.num_workers, + sampler=train_sampler if args.distributed else None, + shuffle=False if args.distributed else True) + logging.info("Prepare Validation datasets.") + if args.dataset_type == "voc": + val_dataset = VOCDataset(args.validation_dataset, transform=test_transform, + target_transform=target_transform, is_test=True) + elif args.dataset_type == 'open_images': + val_dataset = OpenImagesDataset(dataset_path, + transform=test_transform, target_transform=target_transform, + dataset_type="test") + logging.info(val_dataset) + logging.info("validation dataset size: {}".format(len(val_dataset))) + + val_loader = DataLoader(val_dataset, args.batch_size, + num_workers=args.num_workers, + shuffle=False) + + logging.info("Build network.") + net = create_net(num_classes) + min_loss = -10000.0 + last_epoch = -1 + + base_net_lr = args.base_net_lr if args.base_net_lr is not None else args.lr + extra_layers_lr = args.extra_layers_lr if args.extra_layers_lr is not None else args.lr + if args.freeze_base_net: + logging.info("Freeze base net.") + freeze_net_layers(net.base_net) + params = itertools.chain(net.source_layer_add_ons.parameters(), net.extras.parameters(), + net.regression_headers.parameters(), net.classification_headers.parameters()) + params = [ + {'params': itertools.chain( + net.source_layer_add_ons.parameters(), + net.extras.parameters() + ), 'lr': extra_layers_lr}, + {'params': itertools.chain( + net.regression_headers.parameters(), + net.classification_headers.parameters() + )} + ] + elif args.freeze_net: + freeze_net_layers(net.base_net) + freeze_net_layers(net.source_layer_add_ons) + freeze_net_layers(net.extras) + params = itertools.chain(net.regression_headers.parameters(), net.classification_headers.parameters()) + logging.info("Freeze all the layers except prediction heads.") + else: + params = [ + {'params': net.base_net.parameters(), 'lr': base_net_lr}, + {'params': itertools.chain( + net.source_layer_add_ons.parameters(), + net.extras.parameters() + ), 'lr': extra_layers_lr}, + {'params': itertools.chain( + net.regression_headers.parameters(), + net.classification_headers.parameters() + )} + ] + net.to(args.device) + criterion = MultiboxLoss(config.priors, iou_threshold=0.5, neg_pos_ratio=3, + center_variance=0.1, size_variance=0.2, device=args.device) + # npu: NpuFusedSGD + if 'npu' in args.device: + optimizer = apex.optimizers.NpuFusedSGD(params, lr=args.lr, momentum=args.momentum, + weight_decay=args.weight_decay) + else: + optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, + weight_decay=args.weight_decay) + timer.start("Load Model") + if args.resume: + logging.info(f"Resume from the model {args.resume}") + checkpoint = torch.load(args.resume, map_location='cpu') + pretrained_dic = {k.replace('module.', ''): v for k, v in checkpoint['state_dict'].items()} + net.load_state_dict(pretrained_dic) + optimizer.load_state_dict(checkpoint['optimizer']) + last_epoch = checkpoint['epoch'] + elif args.base_net: + logging.info(f"Init from base net {args.base_net}") + net.init_from_base_net(args.base_net) + elif args.pretrained_ssd: + logging.info(f"Init from pretrained ssd {args.pretrained_ssd}") + net.init_from_pretrained_ssd(args.pretrained_ssd) + logging.info(f'Took {timer.end("Load Model"):.2f} seconds to load the model.') + + if args.amp: + net, optimizer = amp.initialize(net, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale_value) + if args.distributed: + net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[args.rank], + broadcast_buffers=False if 'npu' in args.device else True) + logging.info(f"Learning rate: {args.lr}, Base net learning rate: {base_net_lr}, " + + f"Extra Layers learning rate: {extra_layers_lr}.") + + if args.scheduler == 'multi-step': + logging.info("Uses MultiStepLR scheduler.") + milestones = [int(v.strip()) for v in args.milestones.split(",")] + scheduler = MultiStepLR(optimizer, milestones=milestones, + gamma=0.1, last_epoch=last_epoch) + elif args.scheduler == 'cosine': + logging.info("Uses CosineAnnealingLR scheduler.") + scheduler = CosineAnnealingLR(optimizer, args.t_max, last_epoch=last_epoch) + else: + logging.fatal(f"Unsupported Scheduler: {args.scheduler}.") + parser.print_help(sys.stderr) + sys.exit(1) + if args.warm_up: + warm_up_scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: epoch / args.warm_up_epochs) + + logging.info(f"Start training from epoch {last_epoch + 1}.") + for epoch in range(last_epoch + 1, args.num_epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + if args.warm_up and epoch < args.warm_up_epochs: + warm_up_scheduler.step() + else: + scheduler.step() + train(train_loader, net, criterion, optimizer, args, timer, + debug_steps=args.debug_steps, epoch=epoch) + if (epoch % args.validation_epochs == 0 or epoch == args.num_epochs - 1) and ( + not args.distributed or args.rank == 0): + val_loss, val_regression_loss, val_classification_loss = test(val_loader, net, criterion, args, epoch) + logging.info( + f"Epoch: {epoch}, " + + f"Validation Regression Loss {val_regression_loss:.4f}, " + + f"Validation Classification Loss: {val_classification_loss:.4f}, " + + f"Validation Loss: {val_loss:.4f}" + ) + model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{val_loss}.pth") + torch.save({'state_dict': net.state_dict(), 'epoch': epoch, 'optimizer': optimizer.state_dict()}, + model_path) + logging.info(f"Saved model {model_path}") + + # 默认只测最后一轮的精度 + predictor = create_predictor(net) + val_dataset = VOCDataset(args.validation_dataset, is_test=True) + accuracy = predicate(val_dataset, predictor, args, dataset.class_names) + logging.info(f'epoch: {epoch}, accuracy: {accuracy}') + + +if __name__ == '__main__': + timer = Timer() + args = parser.parse_args() + if args.device == 'npu': + os.environ['MASTER_ADDR'] = args.addr + os.environ['MASTER_PORT'] = args.port + + logging.info(args) + args.process_device_map = device_id_to_process_device_map(args.device_list) + + if not os.path.exists(args.eval_dir): + os.makedirs(args.eval_dir) + if not os.path.exists(args.checkpoint_folder): + os.makedirs(args.checkpoint_folder) + args.datasets = [os.path.join(args.data_path, 'VOC2007_trainval'), os.path.join(args.data_path, 'VOC2012_trainval')] + args.validation_dataset = os.path.join(args.data_path, 'VOC2007_test') + if args.distributed: + args.ngpu = int(os.environ['RANK_SIZE']) + main_worker(args.rank, timer, args) + else: + args.ngpu = 1 + main_worker(args.gpu, timer, args) diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/translate_tf_mobilenetv1.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/translate_tf_mobilenetv1.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/datasets/collation.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/datasets/collation.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/datasets/generate_vocdata.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/datasets/generate_vocdata.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/datasets/open_images.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/datasets/open_images.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/datasets/voc_dataset.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/datasets/voc_dataset.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/alexnet.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/alexnet.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/mobilenet.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/mobilenet.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/mobilenet_v2.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/mobilenet_v2.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/mobilenetv3.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/mobilenetv3.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/multibox_loss.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/multibox_loss.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/scaled_l2_norm.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/scaled_l2_norm.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/squeezenet.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/squeezenet.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/vgg.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/nn/vgg.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/prunning/prunner.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/prunning/prunner.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/config/mobilenetv1_ssd_config.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/config/mobilenetv1_ssd_config.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/config/squeezenet_ssd_config.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/config/squeezenet_ssd_config.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/config/vgg_ssd_config.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/config/vgg_ssd_config.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/data_preprocessing.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/data_preprocessing.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/fpn_mobilenetv1_ssd.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/fpn_mobilenetv1_ssd.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/fpn_ssd.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/fpn_ssd.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/mobilenet_v2_ssd_lite.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/mobilenet_v2_ssd_lite.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/mobilenetv1_ssd.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/mobilenetv1_ssd.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/mobilenetv1_ssd_lite.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/mobilenetv1_ssd_lite.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/mobilenetv3_ssd_lite.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/mobilenetv3_ssd_lite.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/predictor.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/predictor.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/squeezenet_ssd_lite.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/squeezenet_ssd_lite.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/ssd.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/ssd.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/vgg_ssd.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/ssd/vgg_ssd.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/test/test_vgg_ssd.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/test/test_vgg_ssd.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/transforms/transforms.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/transforms/transforms.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/utils/__init__.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/utils/__init__.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/utils/box_utils.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/utils/box_utils.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/utils/box_utils_numpy.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/utils/box_utils_numpy.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/utils/measurements.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/utils/measurements.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/utils/misc.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/utils/misc.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/utils/model_book.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/vision/utils/model_book.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-MobilenetV2/visual_tf_models.py b/PyTorch/contrib/cv/detection/SSD-MobilenetV2/visual_tf_models.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD-Resnet/README.md b/PyTorch/contrib/cv/detection/SSD-Resnet/README.md index aab3735567d0be24dd0c9d65b38b3e3669d8c903..96ba50816f429f14c4f6ce1a9a068ba743141a9f 100644 --- a/PyTorch/contrib/cv/detection/SSD-Resnet/README.md +++ b/PyTorch/contrib/cv/detection/SSD-Resnet/README.md @@ -1,61 +1,61 @@ -# #SSD-Resnet34 - -This implements training of SSD-Resnet34 on the 2017 COCO dataset. - - - -## #SSD-Resnet34 Detail - -On the basis of resnet34, a part of the feature layer is added for single target detection. - -## Requirements - -* Install Pytorch==1.5.0 and torchvision - -* Install requirements.txt - -* Steps to download pretrain-pth - - ``` - wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E8%AE%AD%E7%BB%83/cv/image_object_detection/SSD-ResNet34/resnet34-333f7ec4.pth - ``` - -* Steps to download data - - ``` - source download_dataset.sh - ``` - - - -# Training - -### To train a model, run `training.py` with the desired model architecture and the path to the coco dataset: - -``` -# training 1p accuracy -cd ./test -bash train_full_1p.sh --data_path=real_data_path -# training 1p performance -cd ./test -bash train_performance_1p.sh --data_path=real_data_path -# training 8p accuracy -cd ./test -bash train_full_8p.sh --data_path=real_data_path -# training 8p performance -cd ./test -bash train_performance_8p.sh --data_path=real_data_path -#test 8p accuracy -bash test/train_eval_8p.sh --data_path=real_data_path --checkpoint_path=real_pre_train_model_path -``` - -Log path: -test/output/{device_id}/train_{device_id}.log -test/output/{device_id}/train_performance_{device_id}.log - -## SSD-Resnet34 training result - -| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | -| ------ | ---- | -------- | ------ | -------- | -| - | 265 | 1 | 1 | O2 | +# #SSD-Resnet34 + +This implements training of SSD-Resnet34 on the 2017 COCO dataset. + + + +## #SSD-Resnet34 Detail + +On the basis of resnet34, a part of the feature layer is added for single target detection. + +## Requirements + +* Install Pytorch==1.5.0 and torchvision + +* Install requirements.txt + +* Steps to download pretrain-pth + + ``` + wget https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E9%AA%8C%E6%94%B6-%E8%AE%AD%E7%BB%83/cv/image_object_detection/SSD-ResNet34/resnet34-333f7ec4.pth + ``` + +* Steps to download data + + ``` + source download_dataset.sh + ``` + + + +# Training + +### To train a model, run `training.py` with the desired model architecture and the path to the coco dataset: + +``` +# training 1p accuracy +cd ./test +bash train_full_1p.sh --data_path=real_data_path +# training 1p performance +cd ./test +bash train_performance_1p.sh --data_path=real_data_path +# training 8p accuracy +cd ./test +bash train_full_8p.sh --data_path=real_data_path +# training 8p performance +cd ./test +bash train_performance_8p.sh --data_path=real_data_path +#test 8p accuracy +bash test/train_eval_8p.sh --data_path=real_data_path --checkpoint_path=real_pre_train_model_path +``` + +Log path: +test/output/{device_id}/train_{device_id}.log +test/output/{device_id}/train_performance_{device_id}.log + +## SSD-Resnet34 training result + +| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | +| ------ | ---- | -------- | ------ | -------- | +| - | 265 | 1 | 1 | O2 | | 0.2301 | 1700 | 8 | 90 | O2 | \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/SSD-Resnet/demo.py b/PyTorch/contrib/cv/detection/SSD-Resnet/demo.py index 5b2972956ba607caa4c7389434129293c4d254c7..4ac02fdea47926b6631d02f75102184ed3ed9ae5 100644 --- a/PyTorch/contrib/cv/detection/SSD-Resnet/demo.py +++ b/PyTorch/contrib/cv/detection/SSD-Resnet/demo.py @@ -1,489 +1,489 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from base_model import Loss -import sys -import os -from opt_loss import OptLoss -from mlperf_logger import configure_logger, log_start, log_end, log_event, set_seeds, get_rank, barrier -from mlperf_logging.mllog import constants -import torch -from torch.autograd import Variable -import time -import numpy as np -import io -from bisect import bisect # for lr_scheduler -from apex import amp -from ssd300 import SSD300 -from master_params import create_flat_master -from parse_config import parse_args, validate_arguments, validate_group_bn -from data.build_pipeline import prebuild_pipeline, build_pipeline -from box_coder import dboxes300_coco, build_ssd300_coder -from async_evaluator import AsyncEvaluator -from eval import coco_eval -from apex.optimizers import NpuFusedSGD -import gc -from torch.nn.parallel import DistributedDataParallel -# necessary pytorch imports -import torch.utils.data.distributed -import torch.distributed as dist - -# Apex imports -try: - import apex_C - import apex - from apex.parallel.LARC import LARC - from apex.parallel import DistributedDataParallel as DDP - from apex.fp16_utils import * - from apex.multi_tensor_apply import multi_tensor_applier - #import amp_C -except ImportError: - raise ImportError("Please install APEX from https://github.com/nvidia/apex") - -from contextlib import redirect_stdout - -import logging - - -class Logger(object): - logfile = "" - - def __init__(self, filename=""): - self.logfile = filename - self.terminal = sys.stdout - # self.log = open(filename, "a") - return - - def write(self, message): - self.terminal.write(message) - if self.logfile != "": - try: - self.log = open(self.logfile, "a") - self.log.write(message) - self.log.close() - except: - pass - - def flush(self): - pass - - -def print_message(rank, *print_args): - if rank == 0: - print(*print_args) - -def load_checkpoint(model, checkpoint): - print("loading model checkpoint", checkpoint) - od = torch.load(checkpoint) - # remove proceeding 'module' from checkpoint - saved_model = od["model"] - for k in list(saved_model.keys()): - if k.startswith('module.'): - saved_model[k[7:]] = saved_model.pop(k) - if k.startswith('mbox.'): - saved_model.pop(k) - model.load_state_dict(saved_model,strict=False) - -def check_async_evals(args, evaluator, threshold): - finished = 0 - # Note: only one rank does COCOEval, so we need to check there if we've - # finished -- we'll broadcast that to a "finished" tensor to determine - # if we should stop - # Note2: ssd_print contains a barrier() call, implemented with all_reduce - # If we conditional on rank 0, then an ssd_print all_reduce matches with - # the finished all_reduce and all hell breaks loose. - if args.rank == 0: - for epoch, current_accuracy in evaluator.finished_tasks().items(): - # Note: Move to per-iter check - # EVAL_START should be prior to the accuracy/score evaluation but adding the missing EVAL_START here for now - log_start(key=constants.EVAL_START, metadata={'epoch_num' : epoch}) - log_event(key=constants.EVAL_ACCURACY, - value=current_accuracy, - metadata={'epoch_num' : epoch}) - log_end(key=constants.EVAL_STOP, metadata={'epoch_num' : epoch}) - if current_accuracy >= threshold: - finished = 1 - - # handle the non-distributed case -- don't need to bcast, just take local result - if not args.distributed: - return finished == 1 - - # Now we know from all ranks if they're done - reduce result - # Note: Already caught the non-distributed case above, can assume broadcast is available - with torch.no_grad(): - finish_tensor = torch.tensor([finished], dtype=torch.int32, device=torch.device('npu')) - # torch.distributed.all_reduce(finish_tensor) - torch.distributed.broadcast(finish_tensor, src=0) - - # >= 1 ranks has seen final accuracy - if finish_tensor.item() >= 1: - return True - - # Default case: No results, or no accuracte enough results - return False - -def lr_warmup(optim, warmup_iter, iter_num, epoch, base_lr, args): - if iter_num < warmup_iter: - warmup_step = base_lr / (warmup_iter * (2 ** args.warmup_factor)) - new_lr = base_lr - (warmup_iter - iter_num) * warmup_step - - for param_group in optim.param_groups: - param_group['lr'] = new_lr - -def setup_distributed(args): - # Setup multi-GPU if necessary - - args.distributed = False - if 'WORLD_SIZE' in os.environ: - args.distributed = int(os.environ['WORLD_SIZE']) > 1 - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '29688' - if args.distributed: - torch.npu.set_device(args.local_rank) - torch.distributed.init_process_group(backend='hccl', - world_size=int(os.environ['WORLD_SIZE']), - rank=args.local_rank, - ) - args.local_seed = set_seeds(args) - # start timing here - if args.distributed: - args.N_gpu = torch.distributed.get_world_size() - args.rank = torch.distributed.get_rank() - else: - args.N_gpu = 1 - args.rank = 0 - - validate_group_bn(args.bn_group) - - return args - -def train300_mlperf_coco(args): - - - args = setup_distributed(args) - - # Build the model - model_options = { - 'use_nhwc' : args.nhwc, - 'pad_input' : args.pad_input, - 'bn_group' : args.bn_group, - } - - ssd300 = SSD300(args, args.num_classes, **model_options) - if args.checkpoint is not None: - load_checkpoint(ssd300, args.checkpoint) - - ssd300.train() - ssd300.npu() - dboxes = dboxes300_coco() - # Note: No reason not to use optimised loss - #loss_func = OptLoss() - loss_func = Loss(dboxes) - loss_func.npu() - - # Create optimizer. This must also be done after network_to_half. - global_batch_size = (args.N_gpu * args.batch_size) - log_event(key=constants.MODEL_BN_SPAN, value=args.bn_group*args.batch_size) - log_event(key=constants.GLOBAL_BATCH_SIZE, value=global_batch_size) - - # mlperf only allows base_lr scaled by an integer - base_lr = 2.5e-3 - requested_lr_multiplier = args.lr / base_lr - adjusted_multiplier = max(1, round(requested_lr_multiplier * global_batch_size / 32)) - - current_lr = base_lr * adjusted_multiplier - current_momentum = 0.9 - current_weight_decay = args.wd - static_loss_scale = args.loss_scale - - - optim = apex.optimizers.NpuFusedSGD(ssd300.parameters(), - lr=current_lr, - momentum=current_momentum, - weight_decay=current_weight_decay) - ssd300, optim = amp.initialize(ssd300, optim, opt_level='O2', loss_scale=static_loss_scale,combine_grad=True) - # Parallelize. Need to do this after network_to_half. - if args.distributed: - if args.delay_allreduce: - print_message(args.local_rank, "Delaying allreduces to the end of backward()") - ssd300 = DistributedDataParallel(ssd300, device_ids=[args.local_rank]) - - log_event(key=constants.OPT_BASE_LR, value=current_lr) - log_event(key=constants.OPT_LR_DECAY_BOUNDARY_EPOCHS, value=args.lr_decay_epochs) - log_event(key=constants.OPT_LR_DECAY_STEPS, value=args.lr_decay_epochs) - log_event(key=constants.OPT_WEIGHT_DECAY, value=current_weight_decay) - if args.warmup is not None: - log_event(key=constants.OPT_LR_WARMUP_STEPS, value=args.warmup) - log_event(key=constants.OPT_LR_WARMUP_FACTOR, value=args.warmup_factor) - - # Model is completely finished -- need to create separate copies, preserve parameters across - # them, and jit - ssd300_eval = SSD300(args, args.num_classes, **model_options).npu() - - if args.use_fp16: - convert_network(ssd300_eval, torch.half) - - # Get the existant state from the train model - # * if we use distributed, then we want .module - train_model = ssd300.module if args.distributed else ssd300 - ssd300_eval.load_state_dict(train_model.state_dict()) - ssd300_eval.eval() - - - print_message(args.local_rank, "epoch", "nbatch", "loss") - - iter_num = args.iteration - avg_loss = 0.0 - - start_elapsed_time = time.time() - last_printed_iter = args.iteration - num_elapsed_samples = 0 - - input_c = 4 if args.pad_input else 3 - example_shape = [args.batch_size, 300, 300, input_c] if args.nhwc else [args.batch_size, input_c, 300, 300] - example_input = torch.randn(*example_shape).npu() - - if args.use_fp16: - example_input = example_input.half() - - if args.jit: - # DDP has some Python-side control flow. If we JIT the entire DDP-wrapped module, - # the resulting ScriptModule will elide this control flow, resulting in allreduce - # hooks not being called. If we're running distributed, we need to extract and JIT - # the wrapped .module. - # Replacing a DDP-ed ssd300 with a script_module might also cause the AccumulateGrad hooks - # to go out of scope, and therefore silently disappear. - module_to_jit = ssd300.module if args.distributed else ssd300 - if args.distributed: - ssd300.module = torch.jit.trace(module_to_jit, example_input, check_trace=False) - else: - ssd300 = torch.jit.trace(module_to_jit, example_input, check_trace=False) - # JIT the eval model too - ssd300_eval = torch.jit.trace(ssd300_eval, example_input, check_trace=False) - - # do a dummy fprop & bprop to make sure cudnnFind etc. are timed here - ploc, plabel = ssd300(example_input) - - # produce a single dummy "loss" to make things easier - loss = ploc[0,0,0] + plabel[0,0,0] - dloss = torch.randn_like(loss) - # Cause cudnnFind for dgrad, wgrad to run - loss.backward(dloss) - - # Necessary import in init - #from pycocotools.coco import COCO - - encoder = build_ssd300_coder() - - evaluator = AsyncEvaluator(num_threads=1) - - log_end(key=constants.INIT_STOP) - - ##### END INIT - - # This is the first place we touch anything related to data - ##### START DATA TOUCHING - barrier() - log_start(key=constants.RUN_START) - barrier() - - train_pipe = prebuild_pipeline(args) - - train_loader, epoch_size = build_pipeline(args, training=True, pipe=train_pipe) - if args.rank == 0: - print("epoch size is: ", epoch_size, " images") - - val_loader, inv_map, cocoGt = build_pipeline(args, training=False) - if args.profile_gc_off: - gc.disable() - gc.collect() - - ##### END DATA TOUCHING - i_eval = 0 - block_start_epoch = 1 - log_start(key=constants.BLOCK_START, - metadata={'first_epoch_num': block_start_epoch, - 'epoch_count': args.evaluation[i_eval]}) - for epoch in range(args.epochs): - optim.zero_grad() - - - if epoch in args.evaluation: - # Get the existant state from the train model - # * if we use distributed, then we want .module - train_model = ssd300.module if args.distributed else ssd300 - - if args.distributed and args.allreduce_running_stats: - if args.rank == 0: print("averaging bn running means and vars") - # make sure every node has the same running bn stats before - # using them to evaluate, or saving the model for inference - world_size = float(torch.distributed.get_world_size()) - for bn_name, bn_buf in train_model.named_buffers(recurse=True): - if ('running_mean' in bn_name) or ('running_var' in bn_name): - torch.distributed.all_reduce(bn_buf, op=dist.ReduceOp.SUM) - bn_buf /= world_size - - if args.rank == 0: - if args.save: - print("saving model...") - if not os.path.isdir('./models'): - os.mkdir('./models') - torch.save({"model" : ssd300.state_dict()}, "./models/iter_{}.pt".format(iter_num)) - - ssd300_eval.load_state_dict(train_model.state_dict()) - # Note: No longer returns, evaluation is abstracted away inside evaluator - coco_eval(args, - ssd300_eval, - val_loader, - cocoGt, - encoder, - inv_map, - epoch, - iter_num, - evaluator=evaluator) - log_end(key=constants.BLOCK_STOP, metadata={'first_epoch_num': block_start_epoch}) - if epoch != max(args.evaluation): - i_eval += 1 - block_start_epoch = epoch + 1 - log_start(key=constants.BLOCK_START, - metadata={'first_epoch_num': block_start_epoch, - 'epoch_count': (args.evaluation[i_eval] - - args.evaluation[i_eval - 1])}) - - if epoch in args.lr_decay_epochs: - current_lr *= args.lr_decay_factor - print_message(args.rank, "lr decay step #" + str(bisect(args.lr_decay_epochs, epoch))) - for param_group in optim.param_groups: - param_group['lr'] = current_lr - - log_start(key=constants.EPOCH_START, - metadata={'epoch_num': epoch + 1, - 'current_iter_nufm': iter_num}) - - for i, data in enumerate(train_loader): - (img, bbox, label, _) = data - img = img.npu() - bbox = bbox.npu() - label = label.npu() - if args.profile_start is not None and iter_num == args.profile_start: - torch.npu.profiler.start() - torch.npu.synchronize() - if args.profile_nvtx: - torch.autograd._enable_profiler(torch.autograd.ProfilerState.NVTX) - - if args.profile is not None and iter_num == args.profile: - if args.profile_start is not None and iter_num >=args.profile_start: - # we turned npu and nvtx profiling on, better turn it off too - if args.profile_nvtx: - torch.autograd._disable_profiler() - torch.npu.profiler.stop() - return - - if args.warmup is not None: - lr_warmup(optim, args.warmup, iter_num, epoch, current_lr, args) - - if (img is None) or (bbox is None) or (label is None): - print("No labels in batch") - continue - - ploc, plabel = ssd300(img) - ploc, plabel = ploc.float(), plabel.float() - - N = img.shape[0] - bbox.requires_grad = False - label.requires_grad = False - # reshape (N*8732X4 -> Nx8732x4) and transpose (Nx8732x4 -> Nx4x8732) - bbox = bbox.view(N, -1, 4).transpose(1,2).contiguous() - # reshape (N*8732 -> Nx8732) and cast to Long - label = label.view(N, -1).long() - loss = loss_func(ploc, plabel, bbox, label) - - if np.isfinite(loss.item()): - avg_loss = 0.999*avg_loss + 0.001*loss.item() - else: - print("model exploded (corrupted by Inf or Nan)") - sys.exit() - - num_elapsed_samples += N - # if args.rank == 0 and iter_num % args.print_interval == 0: - if args.rank == 0 and iter_num % args.print_interval == 0: - end_elapsed_time = time.time() - elapsed_time = end_elapsed_time - start_elapsed_time - - avg_samples_per_sec = num_elapsed_samples * args.N_gpu / elapsed_time - - print("Epoch:{:4d}, Iteration: {:6d}, Loss function: {:5.3f}, Average Loss: {:.3f}, avg. samples / sec: {:.2f}"\ - .format(epoch, iter_num, loss.item(), avg_loss, avg_samples_per_sec), end="\n") - - last_printed_iter = iter_num - start_elapsed_time = time.time() - num_elapsed_samples = 0 - - - with amp.scale_loss(loss, optim) as scaled_loss: - scaled_loss.backward() - - - optim.step() - - - # Likely a decent skew here, let's take this opportunity to set the - # gradients to None. After DALI integration, playing with the - # placement of this is worth trying. - - optim.zero_grad() - - # Don't check every iteration due to cost of broadcast - if iter_num % 20 == 0: - finished = check_async_evals(args, evaluator, args.threshold) - - if finished: - return True - - iter_num += 1 - - #train_loader.reset() - log_end(key=constants.EPOCH_STOP, metadata={'epoch_num': epoch + 1}) - - return False - -def main(): - # torch.multiprocessing.set_start_method('spawn') - configure_logger(constants.SSD) - log_start(key=constants.INIT_START, log_all_ranks=True) - args = parse_args() - sys.stdout = Logger("test/output/%s/%s_%s.log"%(args.device_id,args.tag,args.device_id)) - sys.stderr = Logger("test/output/%s/%s_%s.log"%(args.device_id,args.tag,args.device_id))#1p - if args.local_rank == 0: - print(args) - - # make sure the epoch lists are in sorted order - args.evaluation.sort() - args.lr_decay_epochs.sort() - - validate_arguments(args) - - torch.set_num_threads(1) - torch.backends.cudnn.benchmark = not args.profile_cudnn_get - - success = train300_mlperf_coco(args) - status = 'success' if success else 'aborted' - - # end timing here - log_end(key=constants.RUN_STOP, metadata={'status': status}) - - -if __name__ == "__main__": - - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from base_model import Loss +import sys +import os +from opt_loss import OptLoss +from mlperf_logger import configure_logger, log_start, log_end, log_event, set_seeds, get_rank, barrier +from mlperf_logging.mllog import constants +import torch +from torch.autograd import Variable +import time +import numpy as np +import io +from bisect import bisect # for lr_scheduler +from apex import amp +from ssd300 import SSD300 +from master_params import create_flat_master +from parse_config import parse_args, validate_arguments, validate_group_bn +from data.build_pipeline import prebuild_pipeline, build_pipeline +from box_coder import dboxes300_coco, build_ssd300_coder +from async_evaluator import AsyncEvaluator +from eval import coco_eval +from apex.optimizers import NpuFusedSGD +import gc +from torch.nn.parallel import DistributedDataParallel +# necessary pytorch imports +import torch.utils.data.distributed +import torch.distributed as dist + +# Apex imports +try: + import apex_C + import apex + from apex.parallel.LARC import LARC + from apex.parallel import DistributedDataParallel as DDP + from apex.fp16_utils import * + from apex.multi_tensor_apply import multi_tensor_applier + #import amp_C +except ImportError: + raise ImportError("Please install APEX from https://github.com/nvidia/apex") + +from contextlib import redirect_stdout + +import logging + + +class Logger(object): + logfile = "" + + def __init__(self, filename=""): + self.logfile = filename + self.terminal = sys.stdout + # self.log = open(filename, "a") + return + + def write(self, message): + self.terminal.write(message) + if self.logfile != "": + try: + self.log = open(self.logfile, "a") + self.log.write(message) + self.log.close() + except: + pass + + def flush(self): + pass + + +def print_message(rank, *print_args): + if rank == 0: + print(*print_args) + +def load_checkpoint(model, checkpoint): + print("loading model checkpoint", checkpoint) + od = torch.load(checkpoint) + # remove proceeding 'module' from checkpoint + saved_model = od["model"] + for k in list(saved_model.keys()): + if k.startswith('module.'): + saved_model[k[7:]] = saved_model.pop(k) + if k.startswith('mbox.'): + saved_model.pop(k) + model.load_state_dict(saved_model,strict=False) + +def check_async_evals(args, evaluator, threshold): + finished = 0 + # Note: only one rank does COCOEval, so we need to check there if we've + # finished -- we'll broadcast that to a "finished" tensor to determine + # if we should stop + # Note2: ssd_print contains a barrier() call, implemented with all_reduce + # If we conditional on rank 0, then an ssd_print all_reduce matches with + # the finished all_reduce and all hell breaks loose. + if args.rank == 0: + for epoch, current_accuracy in evaluator.finished_tasks().items(): + # Note: Move to per-iter check + # EVAL_START should be prior to the accuracy/score evaluation but adding the missing EVAL_START here for now + log_start(key=constants.EVAL_START, metadata={'epoch_num' : epoch}) + log_event(key=constants.EVAL_ACCURACY, + value=current_accuracy, + metadata={'epoch_num' : epoch}) + log_end(key=constants.EVAL_STOP, metadata={'epoch_num' : epoch}) + if current_accuracy >= threshold: + finished = 1 + + # handle the non-distributed case -- don't need to bcast, just take local result + if not args.distributed: + return finished == 1 + + # Now we know from all ranks if they're done - reduce result + # Note: Already caught the non-distributed case above, can assume broadcast is available + with torch.no_grad(): + finish_tensor = torch.tensor([finished], dtype=torch.int32, device=torch.device('npu')) + # torch.distributed.all_reduce(finish_tensor) + torch.distributed.broadcast(finish_tensor, src=0) + + # >= 1 ranks has seen final accuracy + if finish_tensor.item() >= 1: + return True + + # Default case: No results, or no accuracte enough results + return False + +def lr_warmup(optim, warmup_iter, iter_num, epoch, base_lr, args): + if iter_num < warmup_iter: + warmup_step = base_lr / (warmup_iter * (2 ** args.warmup_factor)) + new_lr = base_lr - (warmup_iter - iter_num) * warmup_step + + for param_group in optim.param_groups: + param_group['lr'] = new_lr + +def setup_distributed(args): + # Setup multi-GPU if necessary + + args.distributed = False + if 'WORLD_SIZE' in os.environ: + args.distributed = int(os.environ['WORLD_SIZE']) > 1 + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = '29688' + if args.distributed: + torch.npu.set_device(args.local_rank) + torch.distributed.init_process_group(backend='hccl', + world_size=int(os.environ['WORLD_SIZE']), + rank=args.local_rank, + ) + args.local_seed = set_seeds(args) + # start timing here + if args.distributed: + args.N_gpu = torch.distributed.get_world_size() + args.rank = torch.distributed.get_rank() + else: + args.N_gpu = 1 + args.rank = 0 + + validate_group_bn(args.bn_group) + + return args + +def train300_mlperf_coco(args): + + + args = setup_distributed(args) + + # Build the model + model_options = { + 'use_nhwc' : args.nhwc, + 'pad_input' : args.pad_input, + 'bn_group' : args.bn_group, + } + + ssd300 = SSD300(args, args.num_classes, **model_options) + if args.checkpoint is not None: + load_checkpoint(ssd300, args.checkpoint) + + ssd300.train() + ssd300.npu() + dboxes = dboxes300_coco() + # Note: No reason not to use optimised loss + #loss_func = OptLoss() + loss_func = Loss(dboxes) + loss_func.npu() + + # Create optimizer. This must also be done after network_to_half. + global_batch_size = (args.N_gpu * args.batch_size) + log_event(key=constants.MODEL_BN_SPAN, value=args.bn_group*args.batch_size) + log_event(key=constants.GLOBAL_BATCH_SIZE, value=global_batch_size) + + # mlperf only allows base_lr scaled by an integer + base_lr = 2.5e-3 + requested_lr_multiplier = args.lr / base_lr + adjusted_multiplier = max(1, round(requested_lr_multiplier * global_batch_size / 32)) + + current_lr = base_lr * adjusted_multiplier + current_momentum = 0.9 + current_weight_decay = args.wd + static_loss_scale = args.loss_scale + + + optim = apex.optimizers.NpuFusedSGD(ssd300.parameters(), + lr=current_lr, + momentum=current_momentum, + weight_decay=current_weight_decay) + ssd300, optim = amp.initialize(ssd300, optim, opt_level='O2', loss_scale=static_loss_scale,combine_grad=True) + # Parallelize. Need to do this after network_to_half. + if args.distributed: + if args.delay_allreduce: + print_message(args.local_rank, "Delaying allreduces to the end of backward()") + ssd300 = DistributedDataParallel(ssd300, device_ids=[args.local_rank]) + + log_event(key=constants.OPT_BASE_LR, value=current_lr) + log_event(key=constants.OPT_LR_DECAY_BOUNDARY_EPOCHS, value=args.lr_decay_epochs) + log_event(key=constants.OPT_LR_DECAY_STEPS, value=args.lr_decay_epochs) + log_event(key=constants.OPT_WEIGHT_DECAY, value=current_weight_decay) + if args.warmup is not None: + log_event(key=constants.OPT_LR_WARMUP_STEPS, value=args.warmup) + log_event(key=constants.OPT_LR_WARMUP_FACTOR, value=args.warmup_factor) + + # Model is completely finished -- need to create separate copies, preserve parameters across + # them, and jit + ssd300_eval = SSD300(args, args.num_classes, **model_options).npu() + + if args.use_fp16: + convert_network(ssd300_eval, torch.half) + + # Get the existant state from the train model + # * if we use distributed, then we want .module + train_model = ssd300.module if args.distributed else ssd300 + ssd300_eval.load_state_dict(train_model.state_dict()) + ssd300_eval.eval() + + + print_message(args.local_rank, "epoch", "nbatch", "loss") + + iter_num = args.iteration + avg_loss = 0.0 + + start_elapsed_time = time.time() + last_printed_iter = args.iteration + num_elapsed_samples = 0 + + input_c = 4 if args.pad_input else 3 + example_shape = [args.batch_size, 300, 300, input_c] if args.nhwc else [args.batch_size, input_c, 300, 300] + example_input = torch.randn(*example_shape).npu() + + if args.use_fp16: + example_input = example_input.half() + + if args.jit: + # DDP has some Python-side control flow. If we JIT the entire DDP-wrapped module, + # the resulting ScriptModule will elide this control flow, resulting in allreduce + # hooks not being called. If we're running distributed, we need to extract and JIT + # the wrapped .module. + # Replacing a DDP-ed ssd300 with a script_module might also cause the AccumulateGrad hooks + # to go out of scope, and therefore silently disappear. + module_to_jit = ssd300.module if args.distributed else ssd300 + if args.distributed: + ssd300.module = torch.jit.trace(module_to_jit, example_input, check_trace=False) + else: + ssd300 = torch.jit.trace(module_to_jit, example_input, check_trace=False) + # JIT the eval model too + ssd300_eval = torch.jit.trace(ssd300_eval, example_input, check_trace=False) + + # do a dummy fprop & bprop to make sure cudnnFind etc. are timed here + ploc, plabel = ssd300(example_input) + + # produce a single dummy "loss" to make things easier + loss = ploc[0,0,0] + plabel[0,0,0] + dloss = torch.randn_like(loss) + # Cause cudnnFind for dgrad, wgrad to run + loss.backward(dloss) + + # Necessary import in init + #from pycocotools.coco import COCO + + encoder = build_ssd300_coder() + + evaluator = AsyncEvaluator(num_threads=1) + + log_end(key=constants.INIT_STOP) + + ##### END INIT + + # This is the first place we touch anything related to data + ##### START DATA TOUCHING + barrier() + log_start(key=constants.RUN_START) + barrier() + + train_pipe = prebuild_pipeline(args) + + train_loader, epoch_size = build_pipeline(args, training=True, pipe=train_pipe) + if args.rank == 0: + print("epoch size is: ", epoch_size, " images") + + val_loader, inv_map, cocoGt = build_pipeline(args, training=False) + if args.profile_gc_off: + gc.disable() + gc.collect() + + ##### END DATA TOUCHING + i_eval = 0 + block_start_epoch = 1 + log_start(key=constants.BLOCK_START, + metadata={'first_epoch_num': block_start_epoch, + 'epoch_count': args.evaluation[i_eval]}) + for epoch in range(args.epochs): + optim.zero_grad() + + + if epoch in args.evaluation: + # Get the existant state from the train model + # * if we use distributed, then we want .module + train_model = ssd300.module if args.distributed else ssd300 + + if args.distributed and args.allreduce_running_stats: + if args.rank == 0: print("averaging bn running means and vars") + # make sure every node has the same running bn stats before + # using them to evaluate, or saving the model for inference + world_size = float(torch.distributed.get_world_size()) + for bn_name, bn_buf in train_model.named_buffers(recurse=True): + if ('running_mean' in bn_name) or ('running_var' in bn_name): + torch.distributed.all_reduce(bn_buf, op=dist.ReduceOp.SUM) + bn_buf /= world_size + + if args.rank == 0: + if args.save: + print("saving model...") + if not os.path.isdir('./models'): + os.mkdir('./models') + torch.save({"model" : ssd300.state_dict()}, "./models/iter_{}.pt".format(iter_num)) + + ssd300_eval.load_state_dict(train_model.state_dict()) + # Note: No longer returns, evaluation is abstracted away inside evaluator + coco_eval(args, + ssd300_eval, + val_loader, + cocoGt, + encoder, + inv_map, + epoch, + iter_num, + evaluator=evaluator) + log_end(key=constants.BLOCK_STOP, metadata={'first_epoch_num': block_start_epoch}) + if epoch != max(args.evaluation): + i_eval += 1 + block_start_epoch = epoch + 1 + log_start(key=constants.BLOCK_START, + metadata={'first_epoch_num': block_start_epoch, + 'epoch_count': (args.evaluation[i_eval] - + args.evaluation[i_eval - 1])}) + + if epoch in args.lr_decay_epochs: + current_lr *= args.lr_decay_factor + print_message(args.rank, "lr decay step #" + str(bisect(args.lr_decay_epochs, epoch))) + for param_group in optim.param_groups: + param_group['lr'] = current_lr + + log_start(key=constants.EPOCH_START, + metadata={'epoch_num': epoch + 1, + 'current_iter_nufm': iter_num}) + + for i, data in enumerate(train_loader): + (img, bbox, label, _) = data + img = img.npu() + bbox = bbox.npu() + label = label.npu() + if args.profile_start is not None and iter_num == args.profile_start: + torch.npu.profiler.start() + torch.npu.synchronize() + if args.profile_nvtx: + torch.autograd._enable_profiler(torch.autograd.ProfilerState.NVTX) + + if args.profile is not None and iter_num == args.profile: + if args.profile_start is not None and iter_num >=args.profile_start: + # we turned npu and nvtx profiling on, better turn it off too + if args.profile_nvtx: + torch.autograd._disable_profiler() + torch.npu.profiler.stop() + return + + if args.warmup is not None: + lr_warmup(optim, args.warmup, iter_num, epoch, current_lr, args) + + if (img is None) or (bbox is None) or (label is None): + print("No labels in batch") + continue + + ploc, plabel = ssd300(img) + ploc, plabel = ploc.float(), plabel.float() + + N = img.shape[0] + bbox.requires_grad = False + label.requires_grad = False + # reshape (N*8732X4 -> Nx8732x4) and transpose (Nx8732x4 -> Nx4x8732) + bbox = bbox.view(N, -1, 4).transpose(1,2).contiguous() + # reshape (N*8732 -> Nx8732) and cast to Long + label = label.view(N, -1).long() + loss = loss_func(ploc, plabel, bbox, label) + + if np.isfinite(loss.item()): + avg_loss = 0.999*avg_loss + 0.001*loss.item() + else: + print("model exploded (corrupted by Inf or Nan)") + sys.exit() + + num_elapsed_samples += N + # if args.rank == 0 and iter_num % args.print_interval == 0: + if args.rank == 0 and iter_num % args.print_interval == 0: + end_elapsed_time = time.time() + elapsed_time = end_elapsed_time - start_elapsed_time + + avg_samples_per_sec = num_elapsed_samples * args.N_gpu / elapsed_time + + print("Epoch:{:4d}, Iteration: {:6d}, Loss function: {:5.3f}, Average Loss: {:.3f}, avg. samples / sec: {:.2f}"\ + .format(epoch, iter_num, loss.item(), avg_loss, avg_samples_per_sec), end="\n") + + last_printed_iter = iter_num + start_elapsed_time = time.time() + num_elapsed_samples = 0 + + + with amp.scale_loss(loss, optim) as scaled_loss: + scaled_loss.backward() + + + optim.step() + + + # Likely a decent skew here, let's take this opportunity to set the + # gradients to None. After DALI integration, playing with the + # placement of this is worth trying. + + optim.zero_grad() + + # Don't check every iteration due to cost of broadcast + if iter_num % 20 == 0: + finished = check_async_evals(args, evaluator, args.threshold) + + if finished: + return True + + iter_num += 1 + + #train_loader.reset() + log_end(key=constants.EPOCH_STOP, metadata={'epoch_num': epoch + 1}) + + return False + +def main(): + # torch.multiprocessing.set_start_method('spawn') + configure_logger(constants.SSD) + log_start(key=constants.INIT_START, log_all_ranks=True) + args = parse_args() + sys.stdout = Logger("test/output/%s/%s_%s.log"%(args.device_id,args.tag,args.device_id)) + sys.stderr = Logger("test/output/%s/%s_%s.log"%(args.device_id,args.tag,args.device_id))#1p + if args.local_rank == 0: + print(args) + + # make sure the epoch lists are in sorted order + args.evaluation.sort() + args.lr_decay_epochs.sort() + + validate_arguments(args) + + torch.set_num_threads(1) + torch.backends.cudnn.benchmark = not args.profile_cudnn_get + + success = train300_mlperf_coco(args) + status = 'success' if success else 'aborted' + + # end timing here + log_end(key=constants.RUN_STOP, metadata={'status': status}) + + +if __name__ == "__main__": + + main() diff --git a/PyTorch/contrib/cv/detection/SSD-Resnet/new.py b/PyTorch/contrib/cv/detection/SSD-Resnet/new.py index adebc268d6ea5f037f44d8257f79a3d474cf6b86..72f9d053b534e0cb0dc0f9bc1ad9a8805aa93a01 100644 --- a/PyTorch/contrib/cv/detection/SSD-Resnet/new.py +++ b/PyTorch/contrib/cv/detection/SSD-Resnet/new.py @@ -1,52 +1,52 @@ -# Copyright 2021 Huawei Technologies Co., Ltd - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -#sys.path.append('/share/home/litaotao/yzc/training_results_v0.7-master/NVIDIA/benchmarks/ssd/implementations/pytorch/')# -import os -#from base_model import Loss -from opt_loss import OptLoss -from mlperf_logger import configure_logger, log_start, log_end, log_event, set_seeds, get_rank, barrier -from mlperf_logging.mllog import constants -import torch -from torch.autograd import Variable -import time -import numpy as np -import io -from bisect import bisect # for lr_scheduler - -from ssd300 import SSD300 -from master_params import create_flat_master -from parse_config import parse_args, validate_arguments, validate_group_bn - -from async_evaluator import AsyncEvaluator -from eval import coco_eval - -#import sys -import gc -from data.native_pipeline import build_train_pipe -# necessary pytorch imports -import torch.utils.data.distributed -import torch.distributed as dist -configure_logger(constants.SSD) -log_start(key=constants.INIT_START, log_all_ranks=True) -args = parse_args() -# make sure the epoch lists are in sorted order -args.evaluation.sort() -args.lr_decay_epochs.sort() - -validate_arguments(args) - -torch.set_num_threads(1) -torch.backends.cudnn.benchmark = not args.profile_cudnn_get +# Copyright 2021 Huawei Technologies Co., Ltd + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +#sys.path.append('/share/home/litaotao/yzc/training_results_v0.7-master/NVIDIA/benchmarks/ssd/implementations/pytorch/')# +import os +#from base_model import Loss +from opt_loss import OptLoss +from mlperf_logger import configure_logger, log_start, log_end, log_event, set_seeds, get_rank, barrier +from mlperf_logging.mllog import constants +import torch +from torch.autograd import Variable +import time +import numpy as np +import io +from bisect import bisect # for lr_scheduler + +from ssd300 import SSD300 +from master_params import create_flat_master +from parse_config import parse_args, validate_arguments, validate_group_bn + +from async_evaluator import AsyncEvaluator +from eval import coco_eval + +#import sys +import gc +from data.native_pipeline import build_train_pipe +# necessary pytorch imports +import torch.utils.data.distributed +import torch.distributed as dist +configure_logger(constants.SSD) +log_start(key=constants.INIT_START, log_all_ranks=True) +args = parse_args() +# make sure the epoch lists are in sorted order +args.evaluation.sort() +args.lr_decay_epochs.sort() + +validate_arguments(args) + +torch.set_num_threads(1) +torch.backends.cudnn.benchmark = not args.profile_cudnn_get build_train_pipe(args) \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/SSD-Resnet/nms.py b/PyTorch/contrib/cv/detection/SSD-Resnet/nms.py index b6aa5577213272de7563a74808c1905af9e060e9..7a81227046624372527c6f326a998a7b26002b38 100644 --- a/PyTorch/contrib/cv/detection/SSD-Resnet/nms.py +++ b/PyTorch/contrib/cv/detection/SSD-Resnet/nms.py @@ -1,146 +1,146 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch - - -def npu_multiclass_nms(multi_bboxes, - multi_scores, - score_thr=0.05, - nms_thr=0.45, - max_num=50, - score_factors=None): - """NMS for multi-class bboxes using npu api. - - Origin implement from mmdetection is - https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/post_processing/bbox_nms.py#L7 - - This interface is similar to the original interface, but not exactly the same. - - Args: - multi_bboxes (Tensor): shape (n, #class, 4) or (n, 4) - multi_scores (Tensor): shape (n, #class+1), where the last column - contains scores of the background class, but this will be ignored. - On npu, in order to keep the semantics unblocked, we will unify the dimensions - score_thr (float): bbox threshold, bboxes with scores lower than it - will not be considered. - nms_thr (float): NMS IoU threshold. In the original implementation, a dictionary of {"iou_threshold": 0.45} - was passed, which is simplified here. - max_num (int): if there are more than max_num bboxes after NMS, - only top max_num will be kept; if there are less than max_num bboxes after NMS, - the output will zero pad to max_num. On the NPU, the memory needs to be requested in advance, - so the current max_num cannot be set to -1 at present - score_factors (Tensor): The factors multiplied to scores before applying NMS - - Returns: - tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels are 0-based. - """ - - num_classes = multi_scores.size(1) - 1 - num_boxes = multi_scores.size(0) - if score_factors is not None: - multi_scores = multi_scores[:, :-1] * score_factors[:, None] - else: - multi_scores = multi_scores[:, :-1] - multi_bboxes = multi_bboxes.reshape(1, num_boxes, multi_bboxes.numel() // 4 // num_boxes, 4) - multi_scores = multi_scores.reshape(1, num_boxes, num_classes) - - nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = torch.npu_batch_nms(multi_bboxes.half(), multi_scores.half(), - score_thr, nms_thr, - max_num, max_num) - - nmsed_boxes = nmsed_boxes.reshape(nmsed_boxes.shape[1:]) - nmsed_scores = nmsed_scores.reshape(nmsed_scores.shape[1]) - nmsed_classes = nmsed_classes.reshape(nmsed_classes.shape[1]) - - return torch.cat([nmsed_boxes, nmsed_scores[:, None]], -1), nmsed_classes - - -def npu_batched_multiclass_nms( - multi_bboxes, - multi_scores, - score_thr=0.05, - nms_thr=0.45, - max_num=50, - score_factors=None): - """NMS for batched multi-class bboxes using npu api. - - Origin implement from mmdetection is - https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/post_processing/bbox_nms.py#L7 - - This interface is similar to the original interface, but not exactly the same. - This interface implements the nms method under batch. - - Args: - multi_bboxes (Tensor): shape (bs, n, #class, 4) or (bs, n, 4) - multi_scores (Tensor): shape (bs, n, #class+1), where the last column - contains scores of the background class, but this will be ignored. - On npu, in order to keep the semantics unblocked, we will unify the dimensions - score_thr (float): bbox threshold, bboxes with scores lower than it - will not be considered. - nms_thr (float): NMS IoU threshold. In the original implementation, a dictionary of {"iou_threshold": 0.45} - was passed, which is simplified here. - max_num (int): if there are more than max_num bboxes after NMS, - only top max_num will be kept; if there are less than max_num bboxes after NMS, - the output will zero pad to max_num. On the NPU, the memory needs to be requested in advance, - so the current max_num cannot be set to -1 at present - score_factors (Tensor): The factors multiplied to scores before applying NMS - - Returns: - tuple: (bboxes, labels), tensors of shape (bs, k, 5) and (bs, k, 1). Labels are 0-based. - """ - - num_classes = multi_scores.size(2) - 1 - num_boxes = multi_scores.size(1) - batch_size = multi_scores.size(0) - if score_factors is not None: - multi_scores = multi_scores[..., :-1] * score_factors[..., None] - else: - multi_scores = multi_scores[..., :-1] - multi_bboxes = multi_bboxes.reshape(batch_size, num_boxes, multi_bboxes.numel() // 4 // num_boxes // batch_size, 4) - multi_scores = multi_scores.reshape(batch_size, num_boxes, num_classes) - - nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = torch.npu_batch_nms(multi_bboxes.half(), multi_scores.half(), - score_thr, nms_thr, - max_num, max_num) - - return torch.cat([nmsed_boxes, nmsed_scores[..., None]], -1), nmsed_classes - - -if __name__ == '__main__': - print('test npu_multiclass_nms') - boxes = torch.randint(1, 255, size=(1000, 4)) - scores = torch.randn(1000, 81) - - torch.npu.set_device(0) - boxes = boxes.npu().half() - scores = scores.npu().half() - - det_bboxes, det_labels = npu_multiclass_nms(boxes, scores) - print(det_bboxes.shape) - print(det_labels.shape) - - - print('test npu_batched_multiclass_nms') - boxes = torch.randint(1, 255, size=(4, 200, 80, 4)) - scores = torch.randn(4, 200, 81) - - torch.npu.set_device(0) - boxes = boxes.npu().half() - scores = scores.npu().half() - - det_bboxes, det_labels = npu_batched_multiclass_nms(boxes, scores) - print(det_bboxes.shape) - print(det_labels.shape) - +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + + +def npu_multiclass_nms(multi_bboxes, + multi_scores, + score_thr=0.05, + nms_thr=0.45, + max_num=50, + score_factors=None): + """NMS for multi-class bboxes using npu api. + + Origin implement from mmdetection is + https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/post_processing/bbox_nms.py#L7 + + This interface is similar to the original interface, but not exactly the same. + + Args: + multi_bboxes (Tensor): shape (n, #class, 4) or (n, 4) + multi_scores (Tensor): shape (n, #class+1), where the last column + contains scores of the background class, but this will be ignored. + On npu, in order to keep the semantics unblocked, we will unify the dimensions + score_thr (float): bbox threshold, bboxes with scores lower than it + will not be considered. + nms_thr (float): NMS IoU threshold. In the original implementation, a dictionary of {"iou_threshold": 0.45} + was passed, which is simplified here. + max_num (int): if there are more than max_num bboxes after NMS, + only top max_num will be kept; if there are less than max_num bboxes after NMS, + the output will zero pad to max_num. On the NPU, the memory needs to be requested in advance, + so the current max_num cannot be set to -1 at present + score_factors (Tensor): The factors multiplied to scores before applying NMS + + Returns: + tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels are 0-based. + """ + + num_classes = multi_scores.size(1) - 1 + num_boxes = multi_scores.size(0) + if score_factors is not None: + multi_scores = multi_scores[:, :-1] * score_factors[:, None] + else: + multi_scores = multi_scores[:, :-1] + multi_bboxes = multi_bboxes.reshape(1, num_boxes, multi_bboxes.numel() // 4 // num_boxes, 4) + multi_scores = multi_scores.reshape(1, num_boxes, num_classes) + + nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = torch.npu_batch_nms(multi_bboxes.half(), multi_scores.half(), + score_thr, nms_thr, + max_num, max_num) + + nmsed_boxes = nmsed_boxes.reshape(nmsed_boxes.shape[1:]) + nmsed_scores = nmsed_scores.reshape(nmsed_scores.shape[1]) + nmsed_classes = nmsed_classes.reshape(nmsed_classes.shape[1]) + + return torch.cat([nmsed_boxes, nmsed_scores[:, None]], -1), nmsed_classes + + +def npu_batched_multiclass_nms( + multi_bboxes, + multi_scores, + score_thr=0.05, + nms_thr=0.45, + max_num=50, + score_factors=None): + """NMS for batched multi-class bboxes using npu api. + + Origin implement from mmdetection is + https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/post_processing/bbox_nms.py#L7 + + This interface is similar to the original interface, but not exactly the same. + This interface implements the nms method under batch. + + Args: + multi_bboxes (Tensor): shape (bs, n, #class, 4) or (bs, n, 4) + multi_scores (Tensor): shape (bs, n, #class+1), where the last column + contains scores of the background class, but this will be ignored. + On npu, in order to keep the semantics unblocked, we will unify the dimensions + score_thr (float): bbox threshold, bboxes with scores lower than it + will not be considered. + nms_thr (float): NMS IoU threshold. In the original implementation, a dictionary of {"iou_threshold": 0.45} + was passed, which is simplified here. + max_num (int): if there are more than max_num bboxes after NMS, + only top max_num will be kept; if there are less than max_num bboxes after NMS, + the output will zero pad to max_num. On the NPU, the memory needs to be requested in advance, + so the current max_num cannot be set to -1 at present + score_factors (Tensor): The factors multiplied to scores before applying NMS + + Returns: + tuple: (bboxes, labels), tensors of shape (bs, k, 5) and (bs, k, 1). Labels are 0-based. + """ + + num_classes = multi_scores.size(2) - 1 + num_boxes = multi_scores.size(1) + batch_size = multi_scores.size(0) + if score_factors is not None: + multi_scores = multi_scores[..., :-1] * score_factors[..., None] + else: + multi_scores = multi_scores[..., :-1] + multi_bboxes = multi_bboxes.reshape(batch_size, num_boxes, multi_bboxes.numel() // 4 // num_boxes // batch_size, 4) + multi_scores = multi_scores.reshape(batch_size, num_boxes, num_classes) + + nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = torch.npu_batch_nms(multi_bboxes.half(), multi_scores.half(), + score_thr, nms_thr, + max_num, max_num) + + return torch.cat([nmsed_boxes, nmsed_scores[..., None]], -1), nmsed_classes + + +if __name__ == '__main__': + print('test npu_multiclass_nms') + boxes = torch.randint(1, 255, size=(1000, 4)) + scores = torch.randn(1000, 81) + + torch.npu.set_device(0) + boxes = boxes.npu().half() + scores = scores.npu().half() + + det_bboxes, det_labels = npu_multiclass_nms(boxes, scores) + print(det_bboxes.shape) + print(det_labels.shape) + + + print('test npu_batched_multiclass_nms') + boxes = torch.randint(1, 255, size=(4, 200, 80, 4)) + scores = torch.randn(4, 200, 81) + + torch.npu.set_device(0) + boxes = boxes.npu().half() + scores = scores.npu().half() + + det_bboxes, det_labels = npu_batched_multiclass_nms(boxes, scores) + print(det_bboxes.shape) + print(det_labels.shape) + diff --git a/PyTorch/contrib/cv/detection/SSD/scripts/eval.sh b/PyTorch/contrib/cv/detection/SSD/scripts/eval.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD/scripts/eval_1p.sh b/PyTorch/contrib/cv/detection/SSD/scripts/eval_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD/scripts/npu_set_env.sh b/PyTorch/contrib/cv/detection/SSD/scripts/npu_set_env.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD/scripts/train_1p.sh b/PyTorch/contrib/cv/detection/SSD/scripts/train_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD/scripts/train_8p.sh b/PyTorch/contrib/cv/detection/SSD/scripts/train_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD/setup.py b/PyTorch/contrib/cv/detection/SSD/setup.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD/tools/dist_test.sh b/PyTorch/contrib/cv/detection/SSD/tools/dist_test.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD/tools/dist_train.sh b/PyTorch/contrib/cv/detection/SSD/tools/dist_train.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD/tools/slurm_test.sh b/PyTorch/contrib/cv/detection/SSD/tools/slurm_test.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SSD/tools/slurm_train.sh b/PyTorch/contrib/cv/detection/SSD/tools/slurm_train.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/README_raw.md b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/README_raw.md index e04900eef2309f7d6afb4d59c8f547f7decc294f..51554173ec816781571193ff9685371666e98d87 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/README_raw.md +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/README_raw.md @@ -1,52 +1,52 @@ -# SimCLR_for_Pytorch - -This project enables the SimCLR model could be trained on NPU, and remains the similar precision compared to the results -of the GPU. - - -## Requirements - -- NPU配套的run包安装(建议安装 20.2.0.rc1 版本,请用以下脚本确认版本号,不确保其他版本能正常训练/测评) - - ```sh - ll /usr/local/Ascend/ascend-toolkit/latest - ``` - -- Python v3.7.5 -- PyTorch v1.5 (NPU版本) -- Apex (NPU版本) - - -## Training - -To train a model, run `xxx.sh` with the desired model architecture and the path to the CIFAR10 dataset: - -```bash -# training 1p accuracy -bash ./test/train_full_1p.sh --data_path=real_data_path - -# training 1p performance -bash ./test/train_performance_1p.sh --data_path=real_data_path - -# training 8p accuracy -bash ./test/train_full_8p.sh --data_path=real_data_path - -# training 8p performance -bash ./test/train_performance_8p.sh --data_path=real_data_path - -# finetuning 1p -bash test/train_finetune_1p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path -``` - -Log path: - test/output/devie_id/train_${device_id}.log # training detail log - - -## SimCLR Training Results - -| Acc@1 | FPS | # of NPU/GPU | Epochs | Opt-Level | Loss Scale | -| :------: | :------: | :------: | :------: | :------: | :------: | -| ------ | 1767.030 | 1P GPU | 100 | O2 | 128.0 | -| 60.352 | 2098.001 | 1P NPU | 100 | O2 | 128.0 | -| 55.859 | 5227.504 | 8P GPU | 100 | O2 | 128.0 | -| 58.594 | 9747.414 | 8P NPU | 100 | O2 | 128.0 | +# SimCLR_for_Pytorch + +This project enables the SimCLR model could be trained on NPU, and remains the similar precision compared to the results +of the GPU. + + +## Requirements + +- NPU配套的run包安装(建议安装 20.2.0.rc1 版本,请用以下脚本确认版本号,不确保其他版本能正常训练/测评) + + ```sh + ll /usr/local/Ascend/ascend-toolkit/latest + ``` + +- Python v3.7.5 +- PyTorch v1.5 (NPU版本) +- Apex (NPU版本) + + +## Training + +To train a model, run `xxx.sh` with the desired model architecture and the path to the CIFAR10 dataset: + +```bash +# training 1p accuracy +bash ./test/train_full_1p.sh --data_path=real_data_path + +# training 1p performance +bash ./test/train_performance_1p.sh --data_path=real_data_path + +# training 8p accuracy +bash ./test/train_full_8p.sh --data_path=real_data_path + +# training 8p performance +bash ./test/train_performance_8p.sh --data_path=real_data_path + +# finetuning 1p +bash test/train_finetune_1p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path +``` + +Log path: + test/output/devie_id/train_${device_id}.log # training detail log + + +## SimCLR Training Results + +| Acc@1 | FPS | # of NPU/GPU | Epochs | Opt-Level | Loss Scale | +| :------: | :------: | :------: | :------: | :------: | :------: | +| ------ | 1767.030 | 1P GPU | 100 | O2 | 128.0 | +| 60.352 | 2098.001 | 1P NPU | 100 | O2 | 128.0 | +| 55.859 | 5227.504 | 8P GPU | 100 | O2 | 128.0 | +| 58.594 | 9747.414 | 8P NPU | 100 | O2 | 128.0 | diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_1p.py b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_1p.py index 1152709ee94e90167a7b8a6bca1307e33886c034..4f1a016a0c2ebf80dead3933c5346f9878a01906 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_1p.py +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_1p.py @@ -1,270 +1,270 @@ -"""MIT License""" -# Copyright (c) 2020 Thalles Silva -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# ============================================================================ -import time -import argparse -import torch -import torch.npu -import torch.nn.functional as F -import torch.utils.data -import torch.backends.cudnn as cudnn -from torchvision import models -from utils import accuracy -from models.resnet_simclr import ResNetSimCLR - -from apex import amp -from data_aug.contrastive_learning_dataset import ContrastiveLearningDataset -from multi_epochs_dataloader import MultiEpochsDataLoader -import apex -from apex.optimizers import NpuFusedAdam - - -torch.manual_seed(0) - -model_names = sorted(name for name in models.__dict__ - if name.islower() and not name.startswith("__") - and callable(models.__dict__[name])) - -parser = argparse.ArgumentParser(description='PyTorch SimCLR') -parser.add_argument('data', metavar='DIR', - help='path to dataset') -parser.add_argument('--dataset_name', default='cifar10', - help='dataset name', choices=['stl10', 'cifar10']) -parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', - choices=model_names, - help='model architecture: ' + - ' | '.join(model_names) + - ' (default: resnet50)') -parser.add_argument('-j', '--workers', default=9, type=int, metavar='N', - help='number of data loading workers (default: 9)') -parser.add_argument('--epochs', default=100, type=int, metavar='N', - help='number of total epochs to run') -parser.add_argument('-b', '--batch_size', default=256, type=int, - metavar='N', - help='mini-batch size (default: 256), this is the total ' - 'batch size of all GPUs on the current node when ' - 'using Data Parallel or Distributed Data Parallel') -parser.add_argument('--lr', '--learning_rate', default=0.0003, type=float, - metavar='LR', help='initial learning rate', dest='lr') -parser.add_argument('--wd', '--weight_decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') -parser.add_argument('--out_dim', default=128, type=int, - help='feature dimension (default: 128)') -parser.add_argument('--log_every_n_steps', default=10, type=int, - help='Log every n steps') -parser.add_argument('--temperature', default=0.07, type=float, - help='softmax temperature (default: 0.07)') -parser.add_argument('--n_views', default=2, type=int, metavar='N', - help='Number of views for contrastive learning training.') -parser.add_argument('--rank', default=0, type=int, - help='node rank for distributed training') -parser.add_argument('--npu', default=0, type=int, - help='NPU id to use.') -parser.add_argument('--npus_per_node', default=1, type=int, - help='number of NPUs per node.') -parser.add_argument('--pretrained', dest='pretrained', action='store_true', - help='use pre-trained model') -parser.add_argument('--pth_path', default='', type=str, metavar='PATH', - help='path to pretrained checkpoint (default: none)') -parser.add_argument('--device_id', type=int, default=0, help="device id") - - -def main(): - print('Part1 : prepare for parameters <==> Begin') - args = parser.parse_args() - main_worker(args.npu, args.npus_per_node, args) - - -def info_nce_loss(args, features): - - labels = torch.cat([torch.arange(args.batch_size) for i in range(args.n_views)], dim=0).npu() - labels = (labels.unsqueeze(0) == labels.unsqueeze(1)).float() - labels = labels.npu() - - features = F.normalize(features, dim=1).npu() - - similarity_matrix = torch.matmul(features, features.T).npu() - - # discard the main diagonal from both: labels and similarities matrix - mask = torch.eye(labels.shape[0], dtype=torch.bool).npu() - labels = labels[~mask].view(labels.shape[0], -1).npu() - similarity_matrix = similarity_matrix[~mask].view(similarity_matrix.shape[0], -1).npu() - - # select and combine multiple positives - positives = similarity_matrix[labels.bool()].view(labels.shape[0], -1).npu() - - # select only the negatives the negatives - negatives = similarity_matrix[~labels.bool()].view(similarity_matrix.shape[0], -1).npu() - - logits = torch.cat([positives, negatives], dim=1).npu() - labels = torch.zeros(logits.shape[0], dtype=torch.long).npu() - - logits = logits / args.temperature - return logits, labels - - -def train(args, train_loader, model, criterion, optimizer, epoch_counter, npus_per_node, best_acc): - fps = AverageMeter() - - top1 = [0] - top5 = [0] - end = time.time() - for i, (images, _) in enumerate(train_loader): - images = torch.cat(images, dim=0) - images = images.npu() - - out = model(images).npu() - logits, labels = info_nce_loss(args, out) - loss = criterion(logits, labels) - optimizer.zero_grad() - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - optimizer.step() - - time_step = time.time() - end - fps.update(args.batch_size / time_step) - end = time.time() - - if i % args.log_every_n_steps == 0 and args.rank % npus_per_node == 0: - top1, top5 = accuracy(logits, labels, topk=(1, 5)) - if top1[0] > best_acc: - best_acc = top1[0] - print('Train Epoch: {0} Step: {1}/{2} Loss {loss:.4f} ' - '[AVG-ACC] * Acc@1 {top1:.3f} Acc@5 {top5:.3f} best_acc {best_acc:.3f} ' - 'LR {lr:.7f} FPS {fps:.7f}'.format( - epoch_counter, i, len(train_loader), loss=loss.item(), - top1=top1[0], top5=top5[0], best_acc=best_acc, - lr=optimizer.param_groups[0]['lr'], fps=fps.avg)) - - if (epoch_counter+1) % 5 == 0: - save_checkpoint({ - 'epoch': epoch_counter, - 'arch': model.state_dict(), - 'state_dict': model.state_dict(), - 'optimizer': optimizer.state_dict(), - }) - - -def save_checkpoint(state, filename='checkpoint.pth.tar'): - torch.save(state, filename) - - -def main_worker(npu, npus_per_node, args): - local_rank = 0 - args.npu = npu - assert args.n_views == 2, "Only two view training is supported. Please use --n-views 2." - args.rank = args.rank * npus_per_node + npu - print(args) - args.device_id = args.device_id + local_rank - print("device_id = ", args.device_id) - device = torch.device(f'npu:{args.device_id}') - torch.npu.set_device(device) - - # create model - if args.pretrained: - print("=> using pre-trained model ResNetSimCLR") - model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim) - print("loading model of yours...") - if args.pth_path: - print("load pth you give") - pretrained_dict = torch.load(args.pth_path, map_location="cpu")["state_dict"] - else: - pretrained_dict = torch.load("./checkpoint.pth.tar", map_location="cpu")["state_dict"] - model.load_state_dict(pretrained_dict, strict=False) - else: - print("=> creating model ResNetSimCLR") - model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim) - - print('Part1 : prepare for parameters <==> Done') - print('Part2 : Load Network <==> Begin') - - cudnn.deterministic = True - cudnn.benchmark = True - model = model.to(device) - - optimizer = NpuFusedAdam( - model.parameters(), - args.lr, - weight_decay=args.weight_decay - ) - - model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=128.0, combine_grad=True) - criterion = torch.nn.CrossEntropyLoss().to(device) - - print('Part2 : Load Network <==> Done') - print('Part3 : Load Dataset <==> Begin') - - dataset = ContrastiveLearningDataset(args.data) - train_dataset = dataset.get_dataset(args.dataset_name, args.n_views) - print(f'workers nums:{args.workers}') - print(f'device nums:{npus_per_node}') - - train_loader, train_loader_len, train_sampler = get_pytorch_train_loader(train_dataset, - args.batch_size, - workers=args.workers) - scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader), eta_min=0, - last_epoch=-1) - if args.rank % npus_per_node == 0: - print('Part3 : Load Dataset <==> Done') - print('Part4 : Train and Test <==> Begin') - - for epoch_counter in range(args.epochs): - best_acc = 0 - train(args, train_loader, model, criterion, optimizer, epoch_counter, npus_per_node, best_acc) - if epoch_counter >= 10: - scheduler.step() - print('Part4 : Train and Test <==> Done') - - -def get_pytorch_train_loader(train_dataset, batch_size, workers, _worker_init_fn=None): - train_sampler = None - - dataloader_fn = MultiEpochsDataLoader # torch.utils.data.DataLoader - train_loader = dataloader_fn( - train_dataset, batch_size=batch_size, shuffle=(train_sampler is None), - num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler, - drop_last=True) - return train_loader, len(train_loader), train_sampler - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self): - self.reset() - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - -if __name__ == '__main__': - main() +"""MIT License""" +# Copyright (c) 2020 Thalles Silva +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================ +import time +import argparse +import torch +import torch.npu +import torch.nn.functional as F +import torch.utils.data +import torch.backends.cudnn as cudnn +from torchvision import models +from utils import accuracy +from models.resnet_simclr import ResNetSimCLR + +from apex import amp +from data_aug.contrastive_learning_dataset import ContrastiveLearningDataset +from multi_epochs_dataloader import MultiEpochsDataLoader +import apex +from apex.optimizers import NpuFusedAdam + + +torch.manual_seed(0) + +model_names = sorted(name for name in models.__dict__ + if name.islower() and not name.startswith("__") + and callable(models.__dict__[name])) + +parser = argparse.ArgumentParser(description='PyTorch SimCLR') +parser.add_argument('data', metavar='DIR', + help='path to dataset') +parser.add_argument('--dataset_name', default='cifar10', + help='dataset name', choices=['stl10', 'cifar10']) +parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', + choices=model_names, + help='model architecture: ' + + ' | '.join(model_names) + + ' (default: resnet50)') +parser.add_argument('-j', '--workers', default=9, type=int, metavar='N', + help='number of data loading workers (default: 9)') +parser.add_argument('--epochs', default=100, type=int, metavar='N', + help='number of total epochs to run') +parser.add_argument('-b', '--batch_size', default=256, type=int, + metavar='N', + help='mini-batch size (default: 256), this is the total ' + 'batch size of all GPUs on the current node when ' + 'using Data Parallel or Distributed Data Parallel') +parser.add_argument('--lr', '--learning_rate', default=0.0003, type=float, + metavar='LR', help='initial learning rate', dest='lr') +parser.add_argument('--wd', '--weight_decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') +parser.add_argument('--out_dim', default=128, type=int, + help='feature dimension (default: 128)') +parser.add_argument('--log_every_n_steps', default=10, type=int, + help='Log every n steps') +parser.add_argument('--temperature', default=0.07, type=float, + help='softmax temperature (default: 0.07)') +parser.add_argument('--n_views', default=2, type=int, metavar='N', + help='Number of views for contrastive learning training.') +parser.add_argument('--rank', default=0, type=int, + help='node rank for distributed training') +parser.add_argument('--npu', default=0, type=int, + help='NPU id to use.') +parser.add_argument('--npus_per_node', default=1, type=int, + help='number of NPUs per node.') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--pth_path', default='', type=str, metavar='PATH', + help='path to pretrained checkpoint (default: none)') +parser.add_argument('--device_id', type=int, default=0, help="device id") + + +def main(): + print('Part1 : prepare for parameters <==> Begin') + args = parser.parse_args() + main_worker(args.npu, args.npus_per_node, args) + + +def info_nce_loss(args, features): + + labels = torch.cat([torch.arange(args.batch_size) for i in range(args.n_views)], dim=0).npu() + labels = (labels.unsqueeze(0) == labels.unsqueeze(1)).float() + labels = labels.npu() + + features = F.normalize(features, dim=1).npu() + + similarity_matrix = torch.matmul(features, features.T).npu() + + # discard the main diagonal from both: labels and similarities matrix + mask = torch.eye(labels.shape[0], dtype=torch.bool).npu() + labels = labels[~mask].view(labels.shape[0], -1).npu() + similarity_matrix = similarity_matrix[~mask].view(similarity_matrix.shape[0], -1).npu() + + # select and combine multiple positives + positives = similarity_matrix[labels.bool()].view(labels.shape[0], -1).npu() + + # select only the negatives the negatives + negatives = similarity_matrix[~labels.bool()].view(similarity_matrix.shape[0], -1).npu() + + logits = torch.cat([positives, negatives], dim=1).npu() + labels = torch.zeros(logits.shape[0], dtype=torch.long).npu() + + logits = logits / args.temperature + return logits, labels + + +def train(args, train_loader, model, criterion, optimizer, epoch_counter, npus_per_node, best_acc): + fps = AverageMeter() + + top1 = [0] + top5 = [0] + end = time.time() + for i, (images, _) in enumerate(train_loader): + images = torch.cat(images, dim=0) + images = images.npu() + + out = model(images).npu() + logits, labels = info_nce_loss(args, out) + loss = criterion(logits, labels) + optimizer.zero_grad() + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + optimizer.step() + + time_step = time.time() - end + fps.update(args.batch_size / time_step) + end = time.time() + + if i % args.log_every_n_steps == 0 and args.rank % npus_per_node == 0: + top1, top5 = accuracy(logits, labels, topk=(1, 5)) + if top1[0] > best_acc: + best_acc = top1[0] + print('Train Epoch: {0} Step: {1}/{2} Loss {loss:.4f} ' + '[AVG-ACC] * Acc@1 {top1:.3f} Acc@5 {top5:.3f} best_acc {best_acc:.3f} ' + 'LR {lr:.7f} FPS {fps:.7f}'.format( + epoch_counter, i, len(train_loader), loss=loss.item(), + top1=top1[0], top5=top5[0], best_acc=best_acc, + lr=optimizer.param_groups[0]['lr'], fps=fps.avg)) + + if (epoch_counter+1) % 5 == 0: + save_checkpoint({ + 'epoch': epoch_counter, + 'arch': model.state_dict(), + 'state_dict': model.state_dict(), + 'optimizer': optimizer.state_dict(), + }) + + +def save_checkpoint(state, filename='checkpoint.pth.tar'): + torch.save(state, filename) + + +def main_worker(npu, npus_per_node, args): + local_rank = 0 + args.npu = npu + assert args.n_views == 2, "Only two view training is supported. Please use --n-views 2." + args.rank = args.rank * npus_per_node + npu + print(args) + args.device_id = args.device_id + local_rank + print("device_id = ", args.device_id) + device = torch.device(f'npu:{args.device_id}') + torch.npu.set_device(device) + + # create model + if args.pretrained: + print("=> using pre-trained model ResNetSimCLR") + model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim) + print("loading model of yours...") + if args.pth_path: + print("load pth you give") + pretrained_dict = torch.load(args.pth_path, map_location="cpu")["state_dict"] + else: + pretrained_dict = torch.load("./checkpoint.pth.tar", map_location="cpu")["state_dict"] + model.load_state_dict(pretrained_dict, strict=False) + else: + print("=> creating model ResNetSimCLR") + model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim) + + print('Part1 : prepare for parameters <==> Done') + print('Part2 : Load Network <==> Begin') + + cudnn.deterministic = True + cudnn.benchmark = True + model = model.to(device) + + optimizer = NpuFusedAdam( + model.parameters(), + args.lr, + weight_decay=args.weight_decay + ) + + model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=128.0, combine_grad=True) + criterion = torch.nn.CrossEntropyLoss().to(device) + + print('Part2 : Load Network <==> Done') + print('Part3 : Load Dataset <==> Begin') + + dataset = ContrastiveLearningDataset(args.data) + train_dataset = dataset.get_dataset(args.dataset_name, args.n_views) + print(f'workers nums:{args.workers}') + print(f'device nums:{npus_per_node}') + + train_loader, train_loader_len, train_sampler = get_pytorch_train_loader(train_dataset, + args.batch_size, + workers=args.workers) + scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader), eta_min=0, + last_epoch=-1) + if args.rank % npus_per_node == 0: + print('Part3 : Load Dataset <==> Done') + print('Part4 : Train and Test <==> Begin') + + for epoch_counter in range(args.epochs): + best_acc = 0 + train(args, train_loader, model, criterion, optimizer, epoch_counter, npus_per_node, best_acc) + if epoch_counter >= 10: + scheduler.step() + print('Part4 : Train and Test <==> Done') + + +def get_pytorch_train_loader(train_dataset, batch_size, workers, _worker_init_fn=None): + train_sampler = None + + dataloader_fn = MultiEpochsDataLoader # torch.utils.data.DataLoader + train_loader = dataloader_fn( + train_dataset, batch_size=batch_size, shuffle=(train_sampler is None), + num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler, + drop_last=True) + return train_loader, len(train_loader), train_sampler + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +if __name__ == '__main__': + main() diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_8p.py b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_8p.py index 3f93c0d9ef883024ea2a0548765c06055915fd04..81952e8a491b536c6d1c322543516ed7d6f02802 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_8p.py +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/main_8p.py @@ -1,330 +1,330 @@ -"""MIT License""" -# Copyright (c) 2020 Thalles Silva -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# ============================================================================ -import os -import time -import argparse -import torch -import torch.npu -import torch.nn.functional as F -import torch.multiprocessing as mp -import torch.backends.cudnn as cudnn -from torchvision import models -from utils import accuracy -from models.resnet_simclr import ResNetSimCLR - -from apex import amp -from data_aug.contrastive_learning_dataset import ContrastiveLearningDataset -from multi_epochs_dataloader import MultiEpochsDataLoader -import apex -from apex.optimizers import NpuFusedAdam -import socket - -torch.manual_seed(0) - -model_names = sorted(name for name in models.__dict__ - if name.islower() and not name.startswith("__") - and callable(models.__dict__[name])) - -parser = argparse.ArgumentParser(description='PyTorch SimCLR') -parser.add_argument('data', metavar='DIR', - help='path to dataset') -parser.add_argument('--dataset_name', default='cifar10', - help='dataset name', choices=['stl10', 'cifar10']) -parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', - choices=model_names, - help='model architecture: ' + - ' | '.join(model_names) + - ' (default: resnet50)') -parser.add_argument('-j', '--workers', default=9, type=int, metavar='N', - help='number of data loading workers (default: 9)') -parser.add_argument('--epochs', default=100, type=int, metavar='N', - help='number of total epochs to run') -parser.add_argument('-b', '--batch_size', default=256, type=int, - metavar='N', - help='mini-batch size (default: 256), this is the total ' - 'batch size of all GPUs on the current node when ' - 'using Data Parallel or Distributed Data Parallel') -parser.add_argument('--lr', '--learning_rate', default=0.0012, type=float, - metavar='LR', help='initial learning rate', dest='lr') -parser.add_argument('--wd', '--weight_decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') -parser.add_argument('--out_dim', default=128, type=int, - help='feature dimension (default: 128)') -parser.add_argument('--log_every_n_steps', default=10, type=int, - help='Log every n steps') -parser.add_argument('--temperature', default=0.07, type=float, - help='softmax temperature (default: 0.07)') -parser.add_argument('--n_views', default=2, type=int, metavar='N', - help='Number of views for contrastive learning training.') -parser.add_argument('--rank', default=0, type=int, - help='node rank for distributed training') -parser.add_argument('--npu', default=0, type=int, - help='NPU id to use.') -parser.add_argument('--pretrained', dest='pretrained', action='store_true', - help='use pre-trained model') -parser.add_argument('--pth_path', default='', type=str, metavar='PATH', - help='path to pretrained checkpoint (default: none)') -parser.add_argument('--distributed', action='store_true', - help='Use multi-processing distributed training to launch ' - 'N processes per node, which has N GPUs.') -parser.add_argument('--nodes', type=int, default=1) -parser.add_argument('--device_id', type=int, default=0, help="device id") -parser.add_argument('--device_list', type=str, default="0,1,2,3,4,5,6,7", help="device id list") - - -def get_host_ip(): - """ - 查询本机ip地址 - :return: ip - """ - try: - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - s.connect(('8.8.8.8', 80)) - ip = s.getsockname()[0] - finally: - s.close() - - return ip - - -def device_id_to_process_device_map(device_list): - devices = device_list.split(",") - devices = [int(x) for x in devices] - devices.sort() - - process_device_map = dict() - for process_id, device_id in enumerate(devices): - process_device_map[process_id] = device_id - - return process_device_map - - -def main(): - print('Part1 : prepare for parameters <==> Begin') - args = parser.parse_args() - os.environ["MASTER_ADDR"] = get_host_ip() - os.environ["MASTER_PORT"] = "29688" - args.process_device_map = device_id_to_process_device_map(args.device_list) - if args.device_list != '': - npus_per_node = len(args.device_list.split(',')) - elif args.device_num != -1: - npus_per_node = args.device_num - elif args.device == 'npu': - npus_per_node = torch.npu.device_count() - else: - npus_per_node = torch.cuda.device_count() - - print('npus_per_node:', npus_per_node) - - if args.distributed: - mp.spawn(main_worker, nprocs=npus_per_node, args=(npus_per_node, args)) - else: - # Simply call main_worker function - main_worker(args.npu, npus_per_node, args) - - -def main_worker(npu, npus_per_node, args): - local_rank = 0 - args.npu = args.process_device_map[npu] - if args.distributed: - args.rank = args.rank * npus_per_node + npu - torch.distributed.init_process_group(backend="hccl", - world_size=args.nodes * npus_per_node, - rank=args.rank) - local_rank = torch.distributed.get_rank() - args.is_master_node = not args.distributed or local_rank == 0 - if args.is_master_node: - print(args) - args.device_id = args.device_id + local_rank - print("device_id = ", args.device_id) - device = torch.device(f'npu:{args.device_id}') - torch.npu.set_device(device) - - # create model - if args.pretrained: - print("=> using pre-trained model ResNetSimCLR") - model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim) - print("loading model of yours...") - if args.pth_path: - print("load pth you give") - pretrained_dict = torch.load(args.pth_path, map_location="cpu")["state_dict"] - else: - pretrained_dict = torch.load("./checkpoint.pth.tar", map_location="cpu")["state_dict"] - model.load_state_dict(pretrained_dict, strict=False) - else: - print("=> creating model ResNetSimCLR") - model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim) - - print('rank', args.rank, ' using npu...') - if args.rank % npus_per_node == 0: - print('Part1 : prepare for parameters <==> Done') - print('Part2 : Load Network <==> Begin') - - cudnn.deterministic = True - cudnn.benchmark = True - model = model.to(device) - optimizer = NpuFusedAdam( - model.parameters(), - args.lr, - weight_decay=args.weight_decay - ) - model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=128.0, combine_grad=True) - if args.distributed: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[local_rank], - broadcast_buffers=False) - criterion = torch.nn.CrossEntropyLoss().to(device) - - if args.rank % npus_per_node == 0: - print('Part2 : Load Network <==> Done') - print('Part3 : Load Dataset <==> Begin') - - dataset = ContrastiveLearningDataset(args.data) - train_dataset = dataset.get_dataset(args.dataset_name, args.n_views) - print(f'workers nums:{args.workers}') - print(f'device nums:{npus_per_node}') - - train_loader, train_loader_len, train_sampler = get_pytorch_train_loader(train_dataset, - args.batch_size, - workers=args.workers) - scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader), eta_min=0, - last_epoch=-1) - if args.rank % npus_per_node == 0: - print('Part3 : Load Dataset <==> Done') - print('Part4 : Train and Test <==> Begin') - - for epoch_counter in range(args.epochs): - if args.distributed: - train_sampler.set_epoch(epoch_counter) - best_acc = 0 - train(args, train_loader, model, criterion, optimizer, epoch_counter, npus_per_node, best_acc) - if epoch_counter >= 10: - scheduler.step() - print('Part4 : Train and Test <==> Done') - - -def info_nce_loss(args, features): - labels = torch.cat([torch.arange(args.batch_size) for i in range(args.n_views)], dim=0) - labels = (labels.unsqueeze(0) == labels.unsqueeze(1)).float() - labels = labels.npu() - features = F.normalize(features, dim=1) - similarity_matrix = torch.matmul(features, features.T) - - # discard the main diagonal from both: labels and similarities matrix - mask = torch.eye(labels.shape[0], dtype=torch.bool).npu() - labels = labels[~mask].view(labels.shape[0], -1) - similarity_matrix = similarity_matrix[~mask].view(similarity_matrix.shape[0], -1) - - # select and combine multiple positives - positives = similarity_matrix[labels.bool()].view(labels.shape[0], -1) - - # select only the negatives the negatives - negatives = similarity_matrix[~labels.bool()].view(similarity_matrix.shape[0], -1) - - logits = torch.cat([positives, negatives], dim=1) - labels = torch.zeros(logits.shape[0], dtype=torch.long).npu() - logits = logits / args.temperature - return logits, labels - - -def train(args, train_loader, model, criterion, optimizer, epoch_counter, npus_per_node, best_acc): - fps = AverageMeter() - - top1 = [0] - top5 = [0] - - end = time.time() - for i, (images, _) in enumerate(train_loader): - images = torch.cat(images, dim=0) - images = images.npu() - - out = model(images) - logits, labels = info_nce_loss(args, out) - loss = criterion(logits, labels) - optimizer.zero_grad() - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - optimizer.step() - - time_step = time.time() - end - fps.update(args.batch_size * npus_per_node / time_step) - torch.npu.synchronize() - end = time.time() - - if i % args.log_every_n_steps == 0 and args.is_master_node: - top1, top5 = accuracy(logits, labels, topk=(1, 5)) - if top1[0] > best_acc: - best_acc = top1[0] - - print('Train Epoch: {0} Step: {1}/{2} Loss {loss:.4f} Time {time:.4f}' - '[AVG-ACC] * Acc@1 {top1:.3f} Acc@5 {top5:.3f} best_acc {best_acc:.3f} ' - 'LR {lr:.7f} FPS {fps:.7f} '.format( - epoch_counter, i, len(train_loader), loss=loss.item(), time=time_step, - top1=top1[0], top5=top5[0], best_acc=best_acc, - lr=optimizer.param_groups[0]['lr'], fps=fps.avg)) - - if (epoch_counter+1) % 5 == 0: - save_checkpoint({ - 'epoch': epoch_counter, - 'arch': model.state_dict(), - 'state_dict': model.state_dict(), - 'optimizer': optimizer.state_dict(), - }) - - -def save_checkpoint(state, filename='checkpoint.pth.tar'): - torch.save(state, filename) - - -def get_pytorch_train_loader(train_dataset, batch_size, workers, _worker_init_fn=None): - train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) - - dataloader_fn = MultiEpochsDataLoader # torch.utils.data.DataLoader - train_loader = dataloader_fn( - train_dataset, batch_size=batch_size, shuffle=(train_sampler is None), - num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=False, sampler=train_sampler, - drop_last=True) - return train_loader, len(train_loader), train_sampler - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self): - self.reset() - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - -if __name__ == '__main__': - main() +"""MIT License""" +# Copyright (c) 2020 Thalles Silva +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================ +import os +import time +import argparse +import torch +import torch.npu +import torch.nn.functional as F +import torch.multiprocessing as mp +import torch.backends.cudnn as cudnn +from torchvision import models +from utils import accuracy +from models.resnet_simclr import ResNetSimCLR + +from apex import amp +from data_aug.contrastive_learning_dataset import ContrastiveLearningDataset +from multi_epochs_dataloader import MultiEpochsDataLoader +import apex +from apex.optimizers import NpuFusedAdam +import socket + +torch.manual_seed(0) + +model_names = sorted(name for name in models.__dict__ + if name.islower() and not name.startswith("__") + and callable(models.__dict__[name])) + +parser = argparse.ArgumentParser(description='PyTorch SimCLR') +parser.add_argument('data', metavar='DIR', + help='path to dataset') +parser.add_argument('--dataset_name', default='cifar10', + help='dataset name', choices=['stl10', 'cifar10']) +parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', + choices=model_names, + help='model architecture: ' + + ' | '.join(model_names) + + ' (default: resnet50)') +parser.add_argument('-j', '--workers', default=9, type=int, metavar='N', + help='number of data loading workers (default: 9)') +parser.add_argument('--epochs', default=100, type=int, metavar='N', + help='number of total epochs to run') +parser.add_argument('-b', '--batch_size', default=256, type=int, + metavar='N', + help='mini-batch size (default: 256), this is the total ' + 'batch size of all GPUs on the current node when ' + 'using Data Parallel or Distributed Data Parallel') +parser.add_argument('--lr', '--learning_rate', default=0.0012, type=float, + metavar='LR', help='initial learning rate', dest='lr') +parser.add_argument('--wd', '--weight_decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') +parser.add_argument('--out_dim', default=128, type=int, + help='feature dimension (default: 128)') +parser.add_argument('--log_every_n_steps', default=10, type=int, + help='Log every n steps') +parser.add_argument('--temperature', default=0.07, type=float, + help='softmax temperature (default: 0.07)') +parser.add_argument('--n_views', default=2, type=int, metavar='N', + help='Number of views for contrastive learning training.') +parser.add_argument('--rank', default=0, type=int, + help='node rank for distributed training') +parser.add_argument('--npu', default=0, type=int, + help='NPU id to use.') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--pth_path', default='', type=str, metavar='PATH', + help='path to pretrained checkpoint (default: none)') +parser.add_argument('--distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs.') +parser.add_argument('--nodes', type=int, default=1) +parser.add_argument('--device_id', type=int, default=0, help="device id") +parser.add_argument('--device_list', type=str, default="0,1,2,3,4,5,6,7", help="device id list") + + +def get_host_ip(): + """ + 查询本机ip地址 + :return: ip + """ + try: + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect(('8.8.8.8', 80)) + ip = s.getsockname()[0] + finally: + s.close() + + return ip + + +def device_id_to_process_device_map(device_list): + devices = device_list.split(",") + devices = [int(x) for x in devices] + devices.sort() + + process_device_map = dict() + for process_id, device_id in enumerate(devices): + process_device_map[process_id] = device_id + + return process_device_map + + +def main(): + print('Part1 : prepare for parameters <==> Begin') + args = parser.parse_args() + os.environ["MASTER_ADDR"] = get_host_ip() + os.environ["MASTER_PORT"] = "29688" + args.process_device_map = device_id_to_process_device_map(args.device_list) + if args.device_list != '': + npus_per_node = len(args.device_list.split(',')) + elif args.device_num != -1: + npus_per_node = args.device_num + elif args.device == 'npu': + npus_per_node = torch.npu.device_count() + else: + npus_per_node = torch.cuda.device_count() + + print('npus_per_node:', npus_per_node) + + if args.distributed: + mp.spawn(main_worker, nprocs=npus_per_node, args=(npus_per_node, args)) + else: + # Simply call main_worker function + main_worker(args.npu, npus_per_node, args) + + +def main_worker(npu, npus_per_node, args): + local_rank = 0 + args.npu = args.process_device_map[npu] + if args.distributed: + args.rank = args.rank * npus_per_node + npu + torch.distributed.init_process_group(backend="hccl", + world_size=args.nodes * npus_per_node, + rank=args.rank) + local_rank = torch.distributed.get_rank() + args.is_master_node = not args.distributed or local_rank == 0 + if args.is_master_node: + print(args) + args.device_id = args.device_id + local_rank + print("device_id = ", args.device_id) + device = torch.device(f'npu:{args.device_id}') + torch.npu.set_device(device) + + # create model + if args.pretrained: + print("=> using pre-trained model ResNetSimCLR") + model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim) + print("loading model of yours...") + if args.pth_path: + print("load pth you give") + pretrained_dict = torch.load(args.pth_path, map_location="cpu")["state_dict"] + else: + pretrained_dict = torch.load("./checkpoint.pth.tar", map_location="cpu")["state_dict"] + model.load_state_dict(pretrained_dict, strict=False) + else: + print("=> creating model ResNetSimCLR") + model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim) + + print('rank', args.rank, ' using npu...') + if args.rank % npus_per_node == 0: + print('Part1 : prepare for parameters <==> Done') + print('Part2 : Load Network <==> Begin') + + cudnn.deterministic = True + cudnn.benchmark = True + model = model.to(device) + optimizer = NpuFusedAdam( + model.parameters(), + args.lr, + weight_decay=args.weight_decay + ) + model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=128.0, combine_grad=True) + if args.distributed: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[local_rank], + broadcast_buffers=False) + criterion = torch.nn.CrossEntropyLoss().to(device) + + if args.rank % npus_per_node == 0: + print('Part2 : Load Network <==> Done') + print('Part3 : Load Dataset <==> Begin') + + dataset = ContrastiveLearningDataset(args.data) + train_dataset = dataset.get_dataset(args.dataset_name, args.n_views) + print(f'workers nums:{args.workers}') + print(f'device nums:{npus_per_node}') + + train_loader, train_loader_len, train_sampler = get_pytorch_train_loader(train_dataset, + args.batch_size, + workers=args.workers) + scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader), eta_min=0, + last_epoch=-1) + if args.rank % npus_per_node == 0: + print('Part3 : Load Dataset <==> Done') + print('Part4 : Train and Test <==> Begin') + + for epoch_counter in range(args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch_counter) + best_acc = 0 + train(args, train_loader, model, criterion, optimizer, epoch_counter, npus_per_node, best_acc) + if epoch_counter >= 10: + scheduler.step() + print('Part4 : Train and Test <==> Done') + + +def info_nce_loss(args, features): + labels = torch.cat([torch.arange(args.batch_size) for i in range(args.n_views)], dim=0) + labels = (labels.unsqueeze(0) == labels.unsqueeze(1)).float() + labels = labels.npu() + features = F.normalize(features, dim=1) + similarity_matrix = torch.matmul(features, features.T) + + # discard the main diagonal from both: labels and similarities matrix + mask = torch.eye(labels.shape[0], dtype=torch.bool).npu() + labels = labels[~mask].view(labels.shape[0], -1) + similarity_matrix = similarity_matrix[~mask].view(similarity_matrix.shape[0], -1) + + # select and combine multiple positives + positives = similarity_matrix[labels.bool()].view(labels.shape[0], -1) + + # select only the negatives the negatives + negatives = similarity_matrix[~labels.bool()].view(similarity_matrix.shape[0], -1) + + logits = torch.cat([positives, negatives], dim=1) + labels = torch.zeros(logits.shape[0], dtype=torch.long).npu() + logits = logits / args.temperature + return logits, labels + + +def train(args, train_loader, model, criterion, optimizer, epoch_counter, npus_per_node, best_acc): + fps = AverageMeter() + + top1 = [0] + top5 = [0] + + end = time.time() + for i, (images, _) in enumerate(train_loader): + images = torch.cat(images, dim=0) + images = images.npu() + + out = model(images) + logits, labels = info_nce_loss(args, out) + loss = criterion(logits, labels) + optimizer.zero_grad() + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + optimizer.step() + + time_step = time.time() - end + fps.update(args.batch_size * npus_per_node / time_step) + torch.npu.synchronize() + end = time.time() + + if i % args.log_every_n_steps == 0 and args.is_master_node: + top1, top5 = accuracy(logits, labels, topk=(1, 5)) + if top1[0] > best_acc: + best_acc = top1[0] + + print('Train Epoch: {0} Step: {1}/{2} Loss {loss:.4f} Time {time:.4f}' + '[AVG-ACC] * Acc@1 {top1:.3f} Acc@5 {top5:.3f} best_acc {best_acc:.3f} ' + 'LR {lr:.7f} FPS {fps:.7f} '.format( + epoch_counter, i, len(train_loader), loss=loss.item(), time=time_step, + top1=top1[0], top5=top5[0], best_acc=best_acc, + lr=optimizer.param_groups[0]['lr'], fps=fps.avg)) + + if (epoch_counter+1) % 5 == 0: + save_checkpoint({ + 'epoch': epoch_counter, + 'arch': model.state_dict(), + 'state_dict': model.state_dict(), + 'optimizer': optimizer.state_dict(), + }) + + +def save_checkpoint(state, filename='checkpoint.pth.tar'): + torch.save(state, filename) + + +def get_pytorch_train_loader(train_dataset, batch_size, workers, _worker_init_fn=None): + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + + dataloader_fn = MultiEpochsDataLoader # torch.utils.data.DataLoader + train_loader = dataloader_fn( + train_dataset, batch_size=batch_size, shuffle=(train_sampler is None), + num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=False, sampler=train_sampler, + drop_last=True) + return train_loader, len(train_loader), train_sampler + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +if __name__ == '__main__': + main() diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/modelzoo_level.txt b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/modelzoo_level.txt index 5b3e2194dbccbe34708e1e09ed7cb8b6f0a81796..13f1a4a78a9b6dad37a3a150f11efe1ec094bbee 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/modelzoo_level.txt @@ -1,8 +1,8 @@ -GPUStatus:OK -NPUMigrationStatus:POK -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK -PerfStatus:PERFECT -ModelConvert:NOK +GPUStatus:OK +NPUMigrationStatus:POK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK +PerfStatus:PERFECT +ModelConvert:NOK QuantStatus:NOK \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/multi_epochs_dataloader.py b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/multi_epochs_dataloader.py index 49b9ae910a987752af23c44d4235d54e1a74334d..256ccca3c96ab7e44d267217f714a3b8d7228522 100644 --- a/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/multi_epochs_dataloader.py +++ b/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch/multi_epochs_dataloader.py @@ -1,46 +1,46 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import torch - - -class MultiEpochsDataLoader(torch.utils.data.DataLoader): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._DataLoader__initialized = False - self.batch_sampler = _RepeatSampler(self.batch_sampler) - self._DataLoader__initialized = True - self.iterator = super().__iter__() - - def __len__(self): - return len(self.batch_sampler.sampler) - - def __iter__(self): - for _ in range(len(self)): - yield next(self.iterator) - - -class _RepeatSampler(object): - """ Sampler that repeats forever. - Args: - sampler (Sampler) - """ - - def __init__(self, sampler): - self.sampler = sampler - - def __iter__(self): - while True: - yield from iter(self.sampler) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch + + +class MultiEpochsDataLoader(torch.utils.data.DataLoader): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._DataLoader__initialized = False + self.batch_sampler = _RepeatSampler(self.batch_sampler) + self._DataLoader__initialized = True + self.iterator = super().__iter__() + + def __len__(self): + return len(self.batch_sampler.sampler) + + def __iter__(self): + for _ in range(len(self)): + yield next(self.iterator) + + +class _RepeatSampler(object): + """ Sampler that repeats forever. + Args: + sampler (Sampler) + """ + + def __init__(self, sampler): + self.sampler = sampler + + def __iter__(self): + while True: + yield from iter(self.sampler) diff --git a/PyTorch/contrib/cv/detection/StyleGAN2-ADA/README.md b/PyTorch/contrib/cv/detection/StyleGAN2-ADA/README.md index e6570405aeb0b1afc70e2f81fa99b3afea489b0c..60eeb9406cb55b01595b669dfa51767c550e19c1 100644 --- a/PyTorch/contrib/cv/detection/StyleGAN2-ADA/README.md +++ b/PyTorch/contrib/cv/detection/StyleGAN2-ADA/README.md @@ -1,64 +1,64 @@ -# Stylegan2-ADA-Pytorch模型PyTorch离线推理指导 - -## 1 环境准备 - -1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 - -``` -pip install -r requirements.txt -``` - - -2.获取,修改与安装开源模型代码 - -``` -(torch 1.7.0以上版本)git clone https://github.com/NVlabs/stylegan2-ada-pytorch.git model -(torch 1.5.0)git clone https://github.com/Hypersus/utils-for-stylegan2-ada-pytorch.git model -``` - - -3.获取权重文件 - -将权重文件G_ema_bs8_8p_kimg1000.pkl放到当前工作目录 - -4.数据集 -执行`python stylegan2-ada-pytorch_preprocess.py`,默认生成`batch_size=1`的一条输入,保存在`./input`目录下 - - - -5.获取msame工具 -将msame放到当前工作目录 - - - -## 2 离线推理 - -310上执行,执行时使npu-smi info查看设备状态,确保device空闲 - -``` -bash test/pth2om.sh -bash test/eval_acc_perf.sh -``` - **评测结果:** - -bs1在310上推理的性能 - -``` -Inference average time : 207.61 ms -Inference average time without first time: 207.59 ms -``` - -bs1 310单卡吞吐率:1000/(207.61/4)=19.27fps - -bs1在T4上推理的性能 - -``` -Inference average time : 317.90 ms -``` - - - -| 模型 | T4性能 | 310性能 | -| :-----------------------: | :------: | :------: | -| stylegan2-ada-pytorch bs1 | 12.58fps | 19.27fps | - +# Stylegan2-ADA-Pytorch模型PyTorch离线推理指导 + +## 1 环境准备 + +1.安装必要的依赖,测试环境可能已经安装其中的一些不同版本的库了,故手动测试时不推荐使用该命令安装 + +``` +pip install -r requirements.txt +``` + + +2.获取,修改与安装开源模型代码 + +``` +(torch 1.7.0以上版本)git clone https://github.com/NVlabs/stylegan2-ada-pytorch.git model +(torch 1.5.0)git clone https://github.com/Hypersus/utils-for-stylegan2-ada-pytorch.git model +``` + + +3.获取权重文件 + +将权重文件G_ema_bs8_8p_kimg1000.pkl放到当前工作目录 + +4.数据集 +执行`python stylegan2-ada-pytorch_preprocess.py`,默认生成`batch_size=1`的一条输入,保存在`./input`目录下 + + + +5.获取msame工具 +将msame放到当前工作目录 + + + +## 2 离线推理 + +310上执行,执行时使npu-smi info查看设备状态,确保device空闲 + +``` +bash test/pth2om.sh +bash test/eval_acc_perf.sh +``` + **评测结果:** + +bs1在310上推理的性能 + +``` +Inference average time : 207.61 ms +Inference average time without first time: 207.59 ms +``` + +bs1 310单卡吞吐率:1000/(207.61/4)=19.27fps + +bs1在T4上推理的性能 + +``` +Inference average time : 317.90 ms +``` + + + +| 模型 | T4性能 | 310性能 | +| :-----------------------: | :------: | :------: | +| stylegan2-ada-pytorch bs1 | 12.58fps | 19.27fps | + diff --git a/PyTorch/contrib/cv/detection/StyleGAN2-ADA/perf_gpu.py b/PyTorch/contrib/cv/detection/StyleGAN2-ADA/perf_gpu.py index 58c4ecd8b1dd43d487c06d8ebd25c028f5bb3ad7..cfb3a59f73596b0a3fc5678b2e09fa933987056d 100644 --- a/PyTorch/contrib/cv/detection/StyleGAN2-ADA/perf_gpu.py +++ b/PyTorch/contrib/cv/detection/StyleGAN2-ADA/perf_gpu.py @@ -1,122 +1,122 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the License); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - -import os -import sys -import time - -sys.path.append('./model') -import numpy as np -import torch -import pickle -import argparse -import PIL.Image -import functools - - -def save_image_grid(img, fname, drange, grid_size): - lo, hi = drange - img = np.asarray(img, dtype=np.float32) - img = (img - lo) * (255 / (hi - lo)) - img = np.rint(img).clip(0, 255).astype(np.uint8) - - gw, gh = grid_size - _N, C, H, W = img.shape - img = img.reshape(gh, gw, C, H, W) - img = img.transpose(0, 3, 1, 4, 2) - img = img.reshape(gh * H, gw * W, C) - - assert C in [1, 3] - if C == 1: - PIL.Image.fromarray(img[:, :, 0], 'L').save(fname) - if C == 3: - PIL.Image.fromarray(img, 'RGB').save(fname) - - -def main(args): - pkl_file = args.pkl_file - bs = args.batch_size - input_path = args.input_path - image_path = args.image_path - device = 'cuda:0' if torch.cuda.is_available() else 'cpu' - - grid_size = (1, 1) - input_path = os.path.join(input_path, 'bs{}'.format(bs)) - input_files = os.listdir(input_path) - input_files.sort() - image_path = os.path.join(image_path, 'bs{}_pkl'.format(bs)) - os.makedirs(image_path, exist_ok=True) - # load model - start = time.time() - with open(pkl_file, 'rb') as f: - G = pickle.load(f)['G_ema'].to(device) - - G.forward = functools.partial(G.forward, force_fp32=True) - for i in range(len(input_files)): - input_file = input_files[i] - input_file = os.path.join(input_path, input_file) - input_file = np.fromfile(input_file, dtype=np.float32) - z = torch.tensor(input_file).reshape(-1, G.z_dim).to(device) - c = torch.empty(bs, 0).to(device) - image = G(z, c) - image = image.reshape(-1, 3, 512, 512) - image = image.cpu() - save_image_grid(image, os.path.join(image_path, f'gen_image_{i:04d}') + '.png', drange=[-1, 1], - grid_size=grid_size) - - end = time.time() - print(f'Inference average time : {((end - start) * 1000 / len(input_files)):.2f} ms') - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--pkl_file', type=str, default='./G_ema_bs8_8p_kimg1000.pkl') - parser.add_argument('--input_path', type=str, default='./input') - parser.add_argument('--image_path', type=str, default='./results') - parser.add_argument('--batch_size', type=int, default=1) - args = parser.parse_args() - - main(args) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the License); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + +import os +import sys +import time + +sys.path.append('./model') +import numpy as np +import torch +import pickle +import argparse +import PIL.Image +import functools + + +def save_image_grid(img, fname, drange, grid_size): + lo, hi = drange + img = np.asarray(img, dtype=np.float32) + img = (img - lo) * (255 / (hi - lo)) + img = np.rint(img).clip(0, 255).astype(np.uint8) + + gw, gh = grid_size + _N, C, H, W = img.shape + img = img.reshape(gh, gw, C, H, W) + img = img.transpose(0, 3, 1, 4, 2) + img = img.reshape(gh * H, gw * W, C) + + assert C in [1, 3] + if C == 1: + PIL.Image.fromarray(img[:, :, 0], 'L').save(fname) + if C == 3: + PIL.Image.fromarray(img, 'RGB').save(fname) + + +def main(args): + pkl_file = args.pkl_file + bs = args.batch_size + input_path = args.input_path + image_path = args.image_path + device = 'cuda:0' if torch.cuda.is_available() else 'cpu' + + grid_size = (1, 1) + input_path = os.path.join(input_path, 'bs{}'.format(bs)) + input_files = os.listdir(input_path) + input_files.sort() + image_path = os.path.join(image_path, 'bs{}_pkl'.format(bs)) + os.makedirs(image_path, exist_ok=True) + # load model + start = time.time() + with open(pkl_file, 'rb') as f: + G = pickle.load(f)['G_ema'].to(device) + + G.forward = functools.partial(G.forward, force_fp32=True) + for i in range(len(input_files)): + input_file = input_files[i] + input_file = os.path.join(input_path, input_file) + input_file = np.fromfile(input_file, dtype=np.float32) + z = torch.tensor(input_file).reshape(-1, G.z_dim).to(device) + c = torch.empty(bs, 0).to(device) + image = G(z, c) + image = image.reshape(-1, 3, 512, 512) + image = image.cpu() + save_image_grid(image, os.path.join(image_path, f'gen_image_{i:04d}') + '.png', drange=[-1, 1], + grid_size=grid_size) + + end = time.time() + print(f'Inference average time : {((end - start) * 1000 / len(input_files)):.2f} ms') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--pkl_file', type=str, default='./G_ema_bs8_8p_kimg1000.pkl') + parser.add_argument('--input_path', type=str, default='./input') + parser.add_argument('--image_path', type=str, default='./results') + parser.add_argument('--batch_size', type=int, default=1) + args = parser.parse_args() + + main(args) diff --git a/PyTorch/contrib/cv/detection/StyleGAN2-ADA/requirements.txt b/PyTorch/contrib/cv/detection/StyleGAN2-ADA/requirements.txt index 3f021068343754dc45f33593e811ed9e93702ed9..017e9b5007a423ec1c830e2caac83953a7716ece 100644 --- a/PyTorch/contrib/cv/detection/StyleGAN2-ADA/requirements.txt +++ b/PyTorch/contrib/cv/detection/StyleGAN2-ADA/requirements.txt @@ -1,5 +1,5 @@ -requests -scipy -numpy -Pillow +requests +scipy +numpy +Pillow torch==1.5.0 \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/StyleGAN2-ADA/stylegan2-ada-pytorch_postprocess.py b/PyTorch/contrib/cv/detection/StyleGAN2-ADA/stylegan2-ada-pytorch_postprocess.py index 69bc1a47de649557643d2eca9fd3241c2b06c0db..d301391b97182b6c09e89f46ab1d44143e9925c8 100644 --- a/PyTorch/contrib/cv/detection/StyleGAN2-ADA/stylegan2-ada-pytorch_postprocess.py +++ b/PyTorch/contrib/cv/detection/StyleGAN2-ADA/stylegan2-ada-pytorch_postprocess.py @@ -1,78 +1,78 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the License); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - -import torch -import os -import numpy as np -import argparse -from perf_gpu import save_image_grid - - -def test_om(args): - bin_path = args.bin_path - image_path = args.image_path - bin_list = os.listdir(bin_path) - bin_list.sort() - for i in range(len(bin_list)): - images = np.fromfile(os.path.join(bin_path, bin_list[i]), dtype=np.float32) - images = torch.Tensor(images) - images = images.reshape(-1, 3, 512, 512) - bs = images.shape[0] - grid_size = (4, 4) if bs == 16 else (1, 1) - save_path = os.path.join(image_path, "bs{}_om".format(bs)) - os.makedirs(save_path, exist_ok=True) - save_image_grid(images, os.path.join(save_path, f'gen_image_{i:04d}') + ".png", drange=[-1, 1], - grid_size=grid_size) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--bin_path', type=str, required=True) - parser.add_argument('--image_path', type=str, default='./results') - args = parser.parse_args() - - test_om(args) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the License); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + +import torch +import os +import numpy as np +import argparse +from perf_gpu import save_image_grid + + +def test_om(args): + bin_path = args.bin_path + image_path = args.image_path + bin_list = os.listdir(bin_path) + bin_list.sort() + for i in range(len(bin_list)): + images = np.fromfile(os.path.join(bin_path, bin_list[i]), dtype=np.float32) + images = torch.Tensor(images) + images = images.reshape(-1, 3, 512, 512) + bs = images.shape[0] + grid_size = (4, 4) if bs == 16 else (1, 1) + save_path = os.path.join(image_path, "bs{}_om".format(bs)) + os.makedirs(save_path, exist_ok=True) + save_image_grid(images, os.path.join(save_path, f'gen_image_{i:04d}') + ".png", drange=[-1, 1], + grid_size=grid_size) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--bin_path', type=str, required=True) + parser.add_argument('--image_path', type=str, default='./results') + args = parser.parse_args() + + test_om(args) diff --git a/PyTorch/contrib/cv/detection/StyleGAN2-ADA/stylegan2-ada-pytorch_preprocess.py b/PyTorch/contrib/cv/detection/StyleGAN2-ADA/stylegan2-ada-pytorch_preprocess.py index fee1140fa65b9c8b528f4723886be37d87122382..9c90a9be83e63d837a1686572b9ef94c76fbfc2e 100644 --- a/PyTorch/contrib/cv/detection/StyleGAN2-ADA/stylegan2-ada-pytorch_preprocess.py +++ b/PyTorch/contrib/cv/detection/StyleGAN2-ADA/stylegan2-ada-pytorch_preprocess.py @@ -1,81 +1,81 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the License); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - -import os -import torch -import argparse - - -def main(args): - # set up option - z_dim = 512 - c_dim = 0 - bs = args.batch_size - num = args.num_input - save_path = args.save_path - - # create save path dir - save_path = os.path.join(save_path, "bs{}".format(bs)) - os.makedirs(save_path, exist_ok=True) - - # generate input - for i in range(num): - z = torch.randn([bs, z_dim]) - c = torch.empty([bs, c_dim]) - input = torch.cat((z, c), 1).numpy() - input.tofile(os.path.join(save_path, f'input_bs{bs}_{i:04d}.bin')) - - -# ---------------------------------------------------------------------------- -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--num_input', type=int, default=1) - parser.add_argument('--batch_size', type=int, default=1) - parser.add_argument('--save_path', type=str, default='./input') - args = parser.parse_args() - main(args) -# ---------------------------------------------------------------------------- +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the License); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + +import os +import torch +import argparse + + +def main(args): + # set up option + z_dim = 512 + c_dim = 0 + bs = args.batch_size + num = args.num_input + save_path = args.save_path + + # create save path dir + save_path = os.path.join(save_path, "bs{}".format(bs)) + os.makedirs(save_path, exist_ok=True) + + # generate input + for i in range(num): + z = torch.randn([bs, z_dim]) + c = torch.empty([bs, c_dim]) + input = torch.cat((z, c), 1).numpy() + input.tofile(os.path.join(save_path, f'input_bs{bs}_{i:04d}.bin')) + + +# ---------------------------------------------------------------------------- +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--num_input', type=int, default=1) + parser.add_argument('--batch_size', type=int, default=1) + parser.add_argument('--save_path', type=str, default='./input') + args = parser.parse_args() + main(args) +# ---------------------------------------------------------------------------- diff --git a/PyTorch/contrib/cv/detection/TextSnake/Dockerfile b/PyTorch/contrib/cv/detection/TextSnake/Dockerfile index 7e712fe1a166790798f57a2f2762c47394beb625..30a31af55804dd79571d2a36e6107a844cb7e549 100644 --- a/PyTorch/contrib/cv/detection/TextSnake/Dockerfile +++ b/PyTorch/contrib/cv/detection/TextSnake/Dockerfile @@ -1,5 +1,5 @@ -ARG FROM_IMAGE_NAME -FROM $FROM_IMAGE_NAME - -COPY requirements.txt . +ARG FROM_IMAGE_NAME +FROM $FROM_IMAGE_NAME + +COPY requirements.txt . RUN pip3.7 install -r requirements.txt \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/TextSnake/dataset/ctw.py b/PyTorch/contrib/cv/detection/TextSnake/dataset/ctw.py index 4e1ad12a819e98036586f198d3873933f1892331..eb1309d6c1e79cfb4dad830ae04bfca945f5568e 100644 --- a/PyTorch/contrib/cv/detection/TextSnake/dataset/ctw.py +++ b/PyTorch/contrib/cv/detection/TextSnake/dataset/ctw.py @@ -1,31 +1,31 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ============================================================================ \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/TextSnake/dataset/total_text/Evaluation_Protocol/ComputePrecisionRecall.m b/PyTorch/contrib/cv/detection/TextSnake/dataset/total_text/Evaluation_Protocol/ComputePrecisionRecall.m index 216a349259cbd1242be2db46f694fc497fbb3733..3df883ab2bd83034b6f957cec4d0f8ed7e138ed5 100644 --- a/PyTorch/contrib/cv/detection/TextSnake/dataset/total_text/Evaluation_Protocol/ComputePrecisionRecall.m +++ b/PyTorch/contrib/cv/detection/TextSnake/dataset/total_text/Evaluation_Protocol/ComputePrecisionRecall.m @@ -1,161 +1,161 @@ -function [ precision, recall ] = ComputePrecisionRecall( tau, sigma, tp,tr,k,fsc_k ) -%COMPUTEPRECISIONRECALL Summary of this function goes here -% Detailed explanation goes here - -if nargin == 2 - tr = 0.8; % recall threshold - tp = 0.4; % precision threshold - k = 2; % min number of matches, used in penalizing split & merge - fsc_k = 0.8;% penalize value of split or merge -end - -tot_gt = 0; -recall_accum = 0; - -tot_detected = 0; -precision_accum = 0; - -num_images = numel(tau); -assert(num_images == numel(sigma)); - -flag_gt = cell(1, num_images); -flag_det = cell(1, num_images); - -for ifile=1:num_images - - [num_gt, num_detected] = size( sigma{ifile} ); - tot_gt = tot_gt + num_gt; - tot_detected = tot_detected + num_detected; - - % ----- mark unprocessed - flag_gt{ifile} = zeros(num_gt, 1); - flag_det{ifile} = zeros(num_detected, 1); - - % --------------------------------------- - % check one-to-one match - % --------------------------------------- - for i_gt=1:num_gt - - num_detected_in_sigma = numel( find( sigma{ifile}(i_gt,:)>tr) ); - num_detected_in_tau = numel( find( tau{ifile}(i_gt,:)>tp) ); - - if num_detected_in_sigma == 1 && num_detected_in_tau == 1 - recall_accum = recall_accum + 1.0; - precision_accum = precision_accum + 1.0; - - % Mark the ground truth and detection, do not process twice - flag_gt{ifile}(i_gt) = 1; - idx_det = sigma{ifile}(i_gt,:)>tr; - flag_det{ifile}(idx_det) = 1; - end - end - - % --------------------------------------- - % check one-to-many match (split) - % one gt with many detected rectangles - % --------------------------------------- - for i_gt=1:num_gt - - if flag_gt{ifile}(i_gt) > 0 - continue; - end - - num_nonzero_in_sigma = sum( sigma{ifile}(i_gt,:)>0 ); - if num_nonzero_in_sigma >= k - - % ------------------------------------------------------------- - % Search the possible "many" partners for this "one" rectangle - % ------------------------------------------------------------- - - % ----- satisfy 1st condition - % only select unprocessed data - idx_detected_in_tau = find( (tau{ifile}(i_gt,:)'>=tp) & (flag_det{ifile}==0) ); - num_detected_in_tau = numel( idx_detected_in_tau ); - - if num_detected_in_tau == 1 - % Only one of the many-rectangles qualified -> - % This match degraded to a one-to-one match - if ( (tau{ifile}(i_gt, idx_detected_in_tau) >= tp) && ... - (sigma{ifile}(i_gt, idx_detected_in_tau) >= tr) ) - recall_accum = recall_accum + 1.0; - precision_accum = precision_accum + 1.0; - end - else - % satisfy 2nd condition - if sum( sigma{ifile}(i_gt,idx_detected_in_tau) ) >= tr - - % Mark the "one" rectangle - flag_gt{ifile}(i_gt) = 1; - - % Mark all the "many" rectangles - flag_det{ifile}(idx_detected_in_tau) = 1; - - recall_accum = recall_accum + fsc_k; - precision_accum = precision_accum + num_detected_in_tau * fsc_k; - - end - end - - end - - % No match - recall_accum = recall_accum + 0; - precision_accum = precision_accum + 0; - - end - - % --------------------------------------- - % check many-to-one match (merge) - % one detected rectangle with many gt - % --------------------------------------- - for i_test=1:num_detected - - if flag_det{ifile}(i_test) > 0 - continue; - end - - num_nonzero_in_tau = sum( tau{ifile}(:,i_test)>0 ); - if num_nonzero_in_tau >= k - - % satisfy 1st condition - % only select unprocessed data - idx_detected_in_sigma = find( (sigma{ifile}(:,i_test)>=tr) & (flag_gt{ifile}==0) ); - num_detected_in_sigma = numel( idx_detected_in_sigma ); - - if num_detected_in_sigma == 1 - % Only one of the many-rectangles qualified -> - % This match degraded to a one-to-one match - if ( (tau{ifile}(idx_detected_in_sigma, i_test) >= tp) && ... - (sigma{ifile}(idx_detected_in_sigma, i_test) >= tr) ) - recall_accum = recall_accum + 1.0; - precision_accum = precision_accum + 1.0; - end - else - % satisfy 2nd condition - if sum( tau{ifile}(idx_detected_in_sigma,i_test) ) >= tp - % Mark the "one" rectangle - flag_det{ifile}(i_test) = 1; - - % Mark all the "many" rectangles - flag_gt{ifile}(idx_detected_in_sigma) = 1; - - recall_accum = recall_accum + num_detected_in_sigma*fsc_k; - precision_accum = precision_accum + fsc_k; -% recall_accum = recall_accum + num_detected_in_sigma; -% precision_accum = precision_accum + 1.0; - end - end - - end - - % No match - recall_accum = recall_accum + 0; - precision_accum = precision_accum + 0; - end - -end -recall = recall_accum / tot_gt; -precision = precision_accum / tot_detected; - -end - +function [ precision, recall ] = ComputePrecisionRecall( tau, sigma, tp,tr,k,fsc_k ) +%COMPUTEPRECISIONRECALL Summary of this function goes here +% Detailed explanation goes here + +if nargin == 2 + tr = 0.8; % recall threshold + tp = 0.4; % precision threshold + k = 2; % min number of matches, used in penalizing split & merge + fsc_k = 0.8;% penalize value of split or merge +end + +tot_gt = 0; +recall_accum = 0; + +tot_detected = 0; +precision_accum = 0; + +num_images = numel(tau); +assert(num_images == numel(sigma)); + +flag_gt = cell(1, num_images); +flag_det = cell(1, num_images); + +for ifile=1:num_images + + [num_gt, num_detected] = size( sigma{ifile} ); + tot_gt = tot_gt + num_gt; + tot_detected = tot_detected + num_detected; + + % ----- mark unprocessed + flag_gt{ifile} = zeros(num_gt, 1); + flag_det{ifile} = zeros(num_detected, 1); + + % --------------------------------------- + % check one-to-one match + % --------------------------------------- + for i_gt=1:num_gt + + num_detected_in_sigma = numel( find( sigma{ifile}(i_gt,:)>tr) ); + num_detected_in_tau = numel( find( tau{ifile}(i_gt,:)>tp) ); + + if num_detected_in_sigma == 1 && num_detected_in_tau == 1 + recall_accum = recall_accum + 1.0; + precision_accum = precision_accum + 1.0; + + % Mark the ground truth and detection, do not process twice + flag_gt{ifile}(i_gt) = 1; + idx_det = sigma{ifile}(i_gt,:)>tr; + flag_det{ifile}(idx_det) = 1; + end + end + + % --------------------------------------- + % check one-to-many match (split) + % one gt with many detected rectangles + % --------------------------------------- + for i_gt=1:num_gt + + if flag_gt{ifile}(i_gt) > 0 + continue; + end + + num_nonzero_in_sigma = sum( sigma{ifile}(i_gt,:)>0 ); + if num_nonzero_in_sigma >= k + + % ------------------------------------------------------------- + % Search the possible "many" partners for this "one" rectangle + % ------------------------------------------------------------- + + % ----- satisfy 1st condition + % only select unprocessed data + idx_detected_in_tau = find( (tau{ifile}(i_gt,:)'>=tp) & (flag_det{ifile}==0) ); + num_detected_in_tau = numel( idx_detected_in_tau ); + + if num_detected_in_tau == 1 + % Only one of the many-rectangles qualified -> + % This match degraded to a one-to-one match + if ( (tau{ifile}(i_gt, idx_detected_in_tau) >= tp) && ... + (sigma{ifile}(i_gt, idx_detected_in_tau) >= tr) ) + recall_accum = recall_accum + 1.0; + precision_accum = precision_accum + 1.0; + end + else + % satisfy 2nd condition + if sum( sigma{ifile}(i_gt,idx_detected_in_tau) ) >= tr + + % Mark the "one" rectangle + flag_gt{ifile}(i_gt) = 1; + + % Mark all the "many" rectangles + flag_det{ifile}(idx_detected_in_tau) = 1; + + recall_accum = recall_accum + fsc_k; + precision_accum = precision_accum + num_detected_in_tau * fsc_k; + + end + end + + end + + % No match + recall_accum = recall_accum + 0; + precision_accum = precision_accum + 0; + + end + + % --------------------------------------- + % check many-to-one match (merge) + % one detected rectangle with many gt + % --------------------------------------- + for i_test=1:num_detected + + if flag_det{ifile}(i_test) > 0 + continue; + end + + num_nonzero_in_tau = sum( tau{ifile}(:,i_test)>0 ); + if num_nonzero_in_tau >= k + + % satisfy 1st condition + % only select unprocessed data + idx_detected_in_sigma = find( (sigma{ifile}(:,i_test)>=tr) & (flag_gt{ifile}==0) ); + num_detected_in_sigma = numel( idx_detected_in_sigma ); + + if num_detected_in_sigma == 1 + % Only one of the many-rectangles qualified -> + % This match degraded to a one-to-one match + if ( (tau{ifile}(idx_detected_in_sigma, i_test) >= tp) && ... + (sigma{ifile}(idx_detected_in_sigma, i_test) >= tr) ) + recall_accum = recall_accum + 1.0; + precision_accum = precision_accum + 1.0; + end + else + % satisfy 2nd condition + if sum( tau{ifile}(idx_detected_in_sigma,i_test) ) >= tp + % Mark the "one" rectangle + flag_det{ifile}(i_test) = 1; + + % Mark all the "many" rectangles + flag_gt{ifile}(idx_detected_in_sigma) = 1; + + recall_accum = recall_accum + num_detected_in_sigma*fsc_k; + precision_accum = precision_accum + fsc_k; +% recall_accum = recall_accum + num_detected_in_sigma; +% precision_accum = precision_accum + 1.0; + end + end + + end + + % No match + recall_accum = recall_accum + 0; + precision_accum = precision_accum + 0; + end + +end +recall = recall_accum / tot_gt; +precision = precision_accum / tot_detected; + +end + diff --git a/PyTorch/contrib/cv/detection/TextSnake/modelzoo_level.txt b/PyTorch/contrib/cv/detection/TextSnake/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/detection/TextSnake/modelzoo_level.txt +++ b/PyTorch/contrib/cv/detection/TextSnake/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YOLACT/layers/modules/multibox_loss.py b/PyTorch/contrib/cv/detection/YOLACT/layers/modules/multibox_loss.py index 3f97991c39e9c1a85a1e03044cee923a3a0c2b66..86006e7d4d856f8d4a77379e5e6db153f8f63828 100644 --- a/PyTorch/contrib/cv/detection/YOLACT/layers/modules/multibox_loss.py +++ b/PyTorch/contrib/cv/detection/YOLACT/layers/modules/multibox_loss.py @@ -1,737 +1,737 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -# -*- coding: utf-8 -*- -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd import Variable -from ..box_utils import match, log_sum_exp, decode, center_size, crop, elemwise_mask_iou, elemwise_box_iou - -from data import cfg, mask_type, activation_func - -class MultiBoxLoss(nn.Module): - """SSD Weighted Loss Function - Compute Targets: - 1) Produce Confidence Target Indices by matching ground truth boxes - with (default) 'priorboxes' that have jaccard index > threshold parameter - (default threshold: 0.5). - - 2) Produce localization target by 'encoding' variance into offsets of ground - truth boxes and their matched 'priorboxes'. - - 3) Hard negative mining to filter the excessive number of negative examples - that comes with using a large number of default bounding boxes. - (default negative:positive ratio 3:1) - - Objective Loss: - L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N - Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss - weighted by α which is set to 1 by cross val. - Args: - c: class confidences, - l: predicted boxes, - g: ground truth boxes - N: number of matched default boxes - See: https://arxiv.org/pdf/1512.02325.pdf for more details. - """ - - def __init__(self, num_classes, pos_threshold, neg_threshold, negpos_ratio): - super(MultiBoxLoss, self).__init__() - self.num_classes = num_classes - - self.pos_threshold = pos_threshold - self.neg_threshold = neg_threshold - self.negpos_ratio = negpos_ratio - - # If you output a proto mask with this area, your l1 loss will be l1_alpha - # Note that the area is relative (so 1 would be the entire image) - self.l1_expected_area = 20*20/70/70 - self.l1_alpha = 0.1 - - if cfg.use_class_balanced_conf: - self.class_instances = None - self.total_instances = 0 - - def forward(self, net, predictions, wrapper, wrapper_mask): - """Multibox Loss - Args: - predictions (tuple): A tuple containing loc preds, conf preds, - mask preds, and prior boxes from SSD net. - loc shape: torch.size(batch_size,num_priors,4) - conf shape: torch.size(batch_size,num_priors,num_classes) - masks shape: torch.size(batch_size,num_priors,mask_dim) - priors shape: torch.size(num_priors,4) - proto* shape: torch.size(batch_size,mask_h,mask_w,mask_dim) - - targets (list): Ground truth boxes and labels for a batch, - shape: [batch_size][num_objs,5] (last idx is the label). - - masks (list): Ground truth masks for each object in each image, - shape: [batch_size][num_objs,im_height,im_width] - - num_crowds (list): Number of crowd annotations per batch. The crowd - annotations should be the last num_crowds elements of targets and masks. - - * Only if mask_type == lincomb - """ - - targets, masks, num_crowds = wrapper.get_args(wrapper_mask) - targets = targets[0] - masks = masks[0] - num_crowds = num_crowds[0] - loc_data = predictions['loc'] - conf_data = predictions['conf'] - mask_data = predictions['mask'] - priors = predictions['priors'] - - if cfg.mask_type == mask_type.lincomb: - proto_data = predictions['proto'] - - score_data = predictions['score'] if cfg.use_mask_scoring else None - inst_data = predictions['inst'] if cfg.use_instance_coeff else None - - labels = [None] * len(targets) # Used in sem segm loss - - batch_size = loc_data.size(0) - num_priors = priors.size(0) - num_classes = self.num_classes - - # Match priors (default boxes) and ground truth boxes - # These tensors will be created with the same device as loc_data - loc_t = loc_data.new(batch_size, num_priors, 4) - gt_box_t = loc_data.new(batch_size, num_priors, 4) - conf_t = loc_data.new(batch_size, num_priors).long() - idx_t = loc_data.new(batch_size, num_priors).long() - - if cfg.use_class_existence_loss: - class_existence_t = loc_data.new(batch_size, num_classes-1) - - for idx in range(batch_size): - truths = targets[idx][:, :-1].data - labels[idx] = targets[idx][:, -1].data.long() - - if cfg.use_class_existence_loss: - # Construct a one-hot vector for each object and collapse it into an existence vector with max - # Also it's fine to include the crowd annotations here - class_existence_t[idx, :] = torch.eye(num_classes-1, device=conf_t.get_device())[labels[idx]].max(dim=0)[0] - - # Split the crowd annotations because they come bundled in - cur_crowds = num_crowds[idx] - if cur_crowds > 0: - split = lambda x: (x[-cur_crowds:], x[:-cur_crowds]) - crowd_boxes, truths = split(truths) - - # We don't use the crowd labels or masks - _, labels[idx] = split(labels[idx]) - _, masks[idx] = split(masks[idx]) - else: - crowd_boxes = None - - - match(self.pos_threshold, self.neg_threshold, - truths, priors.data, labels[idx], crowd_boxes, - loc_t, conf_t, idx_t, idx, loc_data[idx]) - - gt_box_t[idx, :, :] = truths[idx_t[idx]] - - # wrap targets - loc_t = Variable(loc_t, requires_grad=False) - conf_t = Variable(conf_t, requires_grad=False) - idx_t = Variable(idx_t, requires_grad=False) - - pos = conf_t > 0 - num_pos = pos.sum(dim=1, keepdim=True) - - # Shape: [batch,num_priors,4] - pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) - - losses = {} - - # Localization Loss (Smooth L1) - if cfg.train_boxes: - loc_p = loc_data.view(-1, 4) - loc_t = loc_t.view(-1, 4) - losses['B'] = F.smooth_l1_loss(loc_p, loc_t, reduction='none') * cfg.bbox_alpha - losses['B'][pos_idx.view(-1, 4) == False] = 0 - losses['B'] = losses['B'].sum() - - if cfg.train_masks: - if cfg.mask_type == mask_type.direct: - if cfg.use_gt_bboxes: - pos_masks = [] - for idx in range(batch_size): - pos_masks.append(masks[idx][idx_t[idx, pos[idx]]]) - masks_t = torch.cat(pos_masks, 0) - masks_p = mask_data[pos, :].view(-1, cfg.mask_dim) - losses['M'] = F.binary_cross_entropy(torch.clamp(masks_p, 0, 1), masks_t, reduction='sum') * cfg.mask_alpha - else: - losses['M'] = self.direct_mask_loss(pos_idx, idx_t, loc_data, mask_data, priors, masks) - elif cfg.mask_type == mask_type.lincomb: - ret = self.lincomb_mask_loss(pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, score_data, inst_data, labels) - if cfg.use_maskiou: - loss, maskiou_targets = ret - else: - loss = ret - losses.update(loss) - - if cfg.mask_proto_loss is not None: - if cfg.mask_proto_loss == 'l1': - losses['P'] = torch.mean(torch.abs(proto_data)) / self.l1_expected_area * self.l1_alpha - elif cfg.mask_proto_loss == 'disj': - losses['P'] = -torch.mean(torch.max(F.log_softmax(proto_data, dim=-1), dim=-1)[0]) - - # Confidence loss - if cfg.use_focal_loss: - if cfg.use_sigmoid_focal_loss: - losses['C'] = self.focal_conf_sigmoid_loss(conf_data, conf_t) - elif cfg.use_objectness_score: - losses['C'] = self.focal_conf_objectness_loss(conf_data, conf_t) - else: - losses['C'] = self.focal_conf_loss(conf_data, conf_t) - else: - if cfg.use_objectness_score: - losses['C'] = self.conf_objectness_loss(conf_data, conf_t, batch_size, loc_p, loc_t, priors) - else: - losses['C'] = self.ohem_conf_loss(conf_data, conf_t, pos, batch_size) - - # Mask IoU Loss - if cfg.use_maskiou and maskiou_targets is not None: - losses['I'] = self.mask_iou_loss(net, maskiou_targets) - - # These losses also don't depend on anchors - if cfg.use_class_existence_loss: - losses['E'] = self.class_existence_loss(predictions['classes'], class_existence_t) - if cfg.use_semantic_segmentation_loss: - losses['S'] = self.semantic_segmentation_loss(predictions['segm'], masks, labels) - - # Divide all losses by the number of positives. - # Don't do it for loss[P] because that doesn't depend on the anchors. - total_num_pos = num_pos.data.sum().float() - for k in losses: - if k not in ('P', 'E', 'S'): - losses[k] /= total_num_pos - else: - losses[k] /= batch_size - - # Loss Key: - # - B: Box Localization Loss - # - C: Class Confidence Loss - # - M: Mask Loss - # - P: Prototype Loss - # - D: Coefficient Diversity Loss - # - E: Class Existence Loss - # - S: Semantic Segmentation Loss - return losses - - def class_existence_loss(self, class_data, class_existence_t): - return cfg.class_existence_alpha * F.binary_cross_entropy_with_logits(class_data, class_existence_t, reduction='sum') - - def semantic_segmentation_loss(self, segment_data, mask_t, class_t, interpolation_mode='bilinear'): - # Note num_classes here is without the background class so cfg.num_classes-1 - batch_size, num_classes, mask_h, mask_w = segment_data.size() - loss_s = 0 - - for idx in range(batch_size): - cur_segment = segment_data[idx] - cur_class_t = class_t[idx] - - with torch.no_grad(): - downsampled_masks = F.interpolate(mask_t[idx].unsqueeze(0), (mask_h, mask_w), - mode=interpolation_mode, align_corners=False).squeeze(0) - downsampled_masks = downsampled_masks.gt(0.5).float() - - # Construct Semantic Segmentation - segment_t = torch.zeros_like(cur_segment, requires_grad=False) - for obj_idx in range(downsampled_masks.size(0)): - segment_t[cur_class_t[obj_idx]] = torch.max(segment_t[cur_class_t[obj_idx]].float(), downsampled_masks[obj_idx]) - - loss_s += F.binary_cross_entropy_with_logits(cur_segment, segment_t, reduction='sum') - - return loss_s / mask_h / mask_w * cfg.semantic_segmentation_alpha - - - def ohem_conf_loss(self, conf_data, conf_t, pos, num): - # Compute max conf across batch for hard negative mining - batch_conf = conf_data.view(-1, self.num_classes) - if cfg.ohem_use_most_confident: - # i.e. max(softmax) along classes > 0 - batch_conf = F.softmax(batch_conf, dim=1) - loss_c, _ = batch_conf[:, 1:].max(dim=1) - else: - # i.e. -softmax(class 0 confidence) - loss_c = log_sum_exp(batch_conf) - batch_conf[:, 0] - - # Hard Negative Mining - loss_c = loss_c.view(num, -1) - loss_c[pos] = 0 # filter out pos boxes - loss_c[conf_t < 0] = 0 # filter out neutrals (conf_t = -1) - _, loss_idx = loss_c.sort(1, descending=True) - _, idx_rank = loss_idx.sort(1) - num_pos = pos.long().sum(1, keepdim=True) - num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) - neg = idx_rank < num_neg.expand_as(idx_rank) - - # Just in case there aren't enough negatives, don't start using positives as negatives - neg[pos] = 0 - neg[conf_t < 0] = 0 # Filter out neutrals - - # Confidence Loss Including Positive and Negative Examples - pos_idx = pos.unsqueeze(2).expand_as(conf_data) - neg_idx = neg.unsqueeze(2).expand_as(conf_data) - tensor_gt = (pos + neg).gt(0) - conf_p = conf_data.view(-1, self.num_classes) - targets_weighted = conf_t.view(-1) - loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='none') - loss_c = loss_c[tensor_gt.view(-1) == True] - - if cfg.use_class_balanced_conf: - # Lazy initialization - if self.class_instances is None: - self.class_instances = torch.zeros(self.num_classes, device=targets_weighted.device) - - classes, counts = targets_weighted.unique(return_counts=True) - - for _cls, _cnt in zip(classes.cpu().numpy(), counts.cpu().numpy()): - self.class_instances[_cls] += _cnt - - self.total_instances += targets_weighted.size(0) - - weighting = 1 - (self.class_instances[targets_weighted] / self.total_instances) - weighting = torch.clamp(weighting, min=1/self.num_classes) - - # If you do the math, the average weight of self.class_instances is this - avg_weight = (self.num_classes - 1) / self.num_classes - - loss_c = (loss_c * weighting).sum() / avg_weight - else: - loss_c = loss_c.sum() - - return cfg.conf_alpha * loss_c - - def focal_conf_loss(self, conf_data, conf_t): - """ - Focal loss as described in https://arxiv.org/pdf/1708.02002.pdf - Adapted from https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py - Note that this uses softmax and not the original sigmoid from the paper. - """ - conf_t = conf_t.view(-1) # [batch_size*num_priors] - conf_data = conf_data.view(-1, conf_data.size(-1)) # [batch_size*num_priors, num_classes] - - # Ignore neutral samples (class < 0) - keep = (conf_t >= 0).float() - conf_t[conf_t < 0] = 0 # so that gather doesn't drum up a fuss - - logpt = F.log_softmax(conf_data, dim=-1) - logpt = logpt.gather(1, conf_t.unsqueeze(-1)) - logpt = logpt.view(-1) - pt = logpt.exp() - - # I adapted the alpha_t calculation here from - # https://github.com/pytorch/pytorch/blob/master/modules/detectron/softmax_focal_loss_op.cu - # You'd think you want all the alphas to sum to one, but in the original implementation they - # just give background an alpha of 1-alpha and each forground an alpha of alpha. - background = (conf_t == 0).float() - at = (1 - cfg.focal_loss_alpha) * background + cfg.focal_loss_alpha * (1 - background) - - loss = -at * (1 - pt) ** cfg.focal_loss_gamma * logpt - - # See comment above for keep - return cfg.conf_alpha * (loss * keep).sum() - - def focal_conf_sigmoid_loss(self, conf_data, conf_t): - """ - Focal loss but using sigmoid like the original paper. - Note: To make things mesh easier, the network still predicts 81 class confidences in this mode. - Because retinanet originally only predicts 80, we simply just don't use conf_data[..., 0] - """ - num_classes = conf_data.size(-1) - - conf_t = conf_t.view(-1) # [batch_size*num_priors] - conf_data = conf_data.view(-1, num_classes) # [batch_size*num_priors, num_classes] - - # Ignore neutral samples (class < 0) - keep = (conf_t >= 0).float() - conf_t[conf_t < 0] = 0 # can't mask with -1, so filter that out - - # Compute a one-hot embedding of conf_t - # From https://github.com/kuangliu/pytorch-retinanet/blob/master/utils.py - conf_one_t = torch.eye(num_classes, device=conf_t.get_device())[conf_t] - conf_pm_t = conf_one_t * 2 - 1 # -1 if background, +1 if forground for specific class - - logpt = F.logsigmoid(conf_data * conf_pm_t) # note: 1 - sigmoid(x) = sigmoid(-x) - pt = logpt.exp() - - at = cfg.focal_loss_alpha * conf_one_t + (1 - cfg.focal_loss_alpha) * (1 - conf_one_t) - at[..., 0] = 0 # Set alpha for the background class to 0 because sigmoid focal loss doesn't use it - - loss = -at * (1 - pt) ** cfg.focal_loss_gamma * logpt - loss = keep * loss.sum(dim=-1) - - return cfg.conf_alpha * loss.sum() - - def focal_conf_objectness_loss(self, conf_data, conf_t): - """ - Instead of using softmax, use class[0] to be the objectness score and do sigmoid focal loss on that. - Then for the rest of the classes, softmax them and apply CE for only the positive examples. - - If class[0] = 1 implies forground and class[0] = 0 implies background then you achieve something - similar during test-time to softmax by setting class[1:] = softmax(class[1:]) * class[0] and invert class[0]. - """ - - conf_t = conf_t.view(-1) # [batch_size*num_priors] - conf_data = conf_data.view(-1, conf_data.size(-1)) # [batch_size*num_priors, num_classes] - - # Ignore neutral samples (class < 0) - keep = (conf_t >= 0).float() - conf_t[conf_t < 0] = 0 # so that gather doesn't drum up a fuss - - background = (conf_t == 0).float() - at = (1 - cfg.focal_loss_alpha) * background + cfg.focal_loss_alpha * (1 - background) - - logpt = F.logsigmoid(conf_data[:, 0]) * (1 - background) + F.logsigmoid(-conf_data[:, 0]) * background - pt = logpt.exp() - - obj_loss = -at * (1 - pt) ** cfg.focal_loss_gamma * logpt - - # All that was the objectiveness loss--now time for the class confidence loss - pos_mask = conf_t > 0 - conf_data_pos = (conf_data[:, 1:])[pos_mask] # Now this has just 80 classes - conf_t_pos = conf_t[pos_mask] - 1 # So subtract 1 here - - class_loss = F.cross_entropy(conf_data_pos, conf_t_pos, reduction='sum') - - return cfg.conf_alpha * (class_loss + (obj_loss * keep).sum()) - - def conf_objectness_loss(self, conf_data, conf_t, batch_size, loc_p, loc_t, priors): - """ - Instead of using softmax, use class[0] to be p(obj) * p(IoU) as in YOLO. - Then for the rest of the classes, softmax them and apply CE for only the positive examples. - """ - - conf_t = conf_t.view(-1) # [batch_size*num_priors] - conf_data = conf_data.view(-1, conf_data.size(-1)) # [batch_size*num_priors, num_classes] - - pos_mask = (conf_t > 0) - neg_mask = (conf_t == 0) - - obj_data = conf_data[:, 0] - obj_data_pos = obj_data[pos_mask] - obj_data_neg = obj_data[neg_mask] - - # Don't be confused, this is just binary cross entropy similified - obj_neg_loss = - F.logsigmoid(-obj_data_neg).sum() - - with torch.no_grad(): - pos_priors = priors.unsqueeze(0).expand(batch_size, -1, -1).reshape(-1, 4)[pos_mask, :] - - boxes_pred = decode(loc_p, pos_priors, cfg.use_yolo_regressors) - boxes_targ = decode(loc_t, pos_priors, cfg.use_yolo_regressors) - - iou_targets = elemwise_box_iou(boxes_pred, boxes_targ) - - obj_pos_loss = - iou_targets * F.logsigmoid(obj_data_pos) - (1 - iou_targets) * F.logsigmoid(-obj_data_pos) - obj_pos_loss = obj_pos_loss.sum() - - # All that was the objectiveness loss--now time for the class confidence loss - conf_data_pos = (conf_data[:, 1:])[pos_mask] # Now this has just 80 classes - conf_t_pos = conf_t[pos_mask] - 1 # So subtract 1 here - - class_loss = F.cross_entropy(conf_data_pos, conf_t_pos, reduction='sum') - - return cfg.conf_alpha * (class_loss + obj_pos_loss + obj_neg_loss) - - - def direct_mask_loss(self, pos_idx, idx_t, loc_data, mask_data, priors, masks): - """ Crops the gt masks using the predicted bboxes, scales them down, and outputs the BCE loss. """ - loss_m = 0 - for idx in range(mask_data.size(0)): - with torch.no_grad(): - cur_pos_idx = pos_idx[idx, :, :] - cur_pos_idx_squeezed = cur_pos_idx[:, 1] - - # Shape: [num_priors, 4], decoded predicted bboxes - pos_bboxes = decode(loc_data[idx, :, :], priors.data, cfg.use_yolo_regressors) - pos_bboxes = pos_bboxes[cur_pos_idx].view(-1, 4).clamp(0, 1) - pos_lookup = idx_t[idx, cur_pos_idx_squeezed] - - cur_masks = masks[idx] - pos_masks = cur_masks[pos_lookup, :, :] - - # Convert bboxes to absolute coordinates - num_pos, img_height, img_width = pos_masks.size() - - # Take care of all the bad behavior that can be caused by out of bounds coordinates - x1, x2 = sanitize_coordinates(pos_bboxes[:, 0], pos_bboxes[:, 2], img_width) - y1, y2 = sanitize_coordinates(pos_bboxes[:, 1], pos_bboxes[:, 3], img_height) - - # Crop each gt mask with the predicted bbox and rescale to the predicted mask size - # Note that each bounding box crop is a different size so I don't think we can vectorize this - scaled_masks = [] - for jdx in range(num_pos): - tmp_mask = pos_masks[jdx, y1[jdx]:y2[jdx], x1[jdx]:x2[jdx]] - - # Restore any dimensions we've left out because our bbox was 1px wide - while tmp_mask.dim() < 2: - tmp_mask = tmp_mask.unsqueeze(0) - - new_mask = F.adaptive_avg_pool2d(tmp_mask.unsqueeze(0), cfg.mask_size) - scaled_masks.append(new_mask.view(1, -1)) - - mask_t = torch.cat(scaled_masks, 0).gt(0.5).float() # Threshold downsampled mask - - pos_mask_data = mask_data[idx, cur_pos_idx_squeezed, :] - loss_m += F.binary_cross_entropy(torch.clamp(pos_mask_data, 0, 1), mask_t, reduction='sum') * cfg.mask_alpha - - return loss_m - - - def coeff_diversity_loss(self, coeffs, instance_t): - """ - coeffs should be size [num_pos, num_coeffs] - instance_t should be size [num_pos] and be values from 0 to num_instances-1 - """ - num_pos = coeffs.size(0) - instance_t = instance_t.view(-1) # juuuust to make sure - - coeffs_norm = F.normalize(coeffs, dim=1) - cos_sim = coeffs_norm @ coeffs_norm.t() - - inst_eq = (instance_t[:, None].expand_as(cos_sim) == instance_t[None, :].expand_as(cos_sim)).float() - - # Rescale to be between 0 and 1 - cos_sim = (cos_sim + 1) / 2 - - # If they're the same instance, use cosine distance, else use cosine similarity - loss = (1 - cos_sim) * inst_eq + cos_sim * (1 - inst_eq) - - # Only divide by num_pos once because we're summing over a num_pos x num_pos tensor - # and all the losses will be divided by num_pos at the end, so just one extra time. - return cfg.mask_proto_coeff_diversity_alpha * loss.sum() / num_pos - - - def lincomb_mask_loss(self, pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, score_data, inst_data, labels, interpolation_mode='bilinear'): - mask_h = proto_data.size(1) - mask_w = proto_data.size(2) - - process_gt_bboxes = cfg.mask_proto_normalize_emulate_roi_pooling or cfg.mask_proto_crop - - if cfg.mask_proto_remove_empty_masks: - # Make sure to store a copy of this because we edit it to get rid of all-zero masks - pos = pos.clone() - - loss_m = 0 - loss_d = 0 # Coefficient diversity loss - - maskiou_t_list = [] - maskiou_net_input_list = [] - label_t_list = [] - - for idx in range(mask_data.size(0)): - with torch.no_grad(): - downsampled_masks = F.interpolate(masks[idx].unsqueeze(0), (mask_h, mask_w), - mode=interpolation_mode, align_corners=False).squeeze(0) - downsampled_masks = downsampled_masks.permute(1, 2, 0).contiguous() - - if cfg.mask_proto_binarize_downsampled_gt: - downsampled_masks = downsampled_masks.gt(0.5).float() - - if cfg.mask_proto_remove_empty_masks: - # Get rid of gt masks that are so small they get downsampled away - very_small_masks = (downsampled_masks.sum(dim=(0,1)) <= 0.0001) - for i in range(very_small_masks.size(0)): - if very_small_masks[i]: - pos[idx, idx_t[idx] == i] = 0 - - if cfg.mask_proto_reweight_mask_loss: - # Ensure that the gt is binary - if not cfg.mask_proto_binarize_downsampled_gt: - bin_gt = downsampled_masks.gt(0.5).float() - else: - bin_gt = downsampled_masks - - gt_foreground_norm = bin_gt / (torch.sum(bin_gt, dim=(0,1), keepdim=True) + 0.0001) - gt_background_norm = (1-bin_gt) / (torch.sum(1-bin_gt, dim=(0,1), keepdim=True) + 0.0001) - - mask_reweighting = gt_foreground_norm * cfg.mask_proto_reweight_coeff + gt_background_norm - mask_reweighting *= mask_h * mask_w - - cur_pos = pos[idx] - pos_idx_t = idx_t[idx, cur_pos] - - if process_gt_bboxes: - # Note: this is in point-form - if cfg.mask_proto_crop_with_pred_box: - pos_gt_box_t = decode(loc_data[idx, :, :], priors.data, cfg.use_yolo_regressors)[cur_pos] - else: - pos_gt_box_t = gt_box_t[idx, cur_pos] - - if pos_idx_t.size(0) == 0: - continue - - proto_masks = proto_data[idx] - proto_coef = mask_data[idx, cur_pos, :] - if cfg.use_mask_scoring: - mask_scores = score_data[idx, cur_pos, :] - - if cfg.mask_proto_coeff_diversity_loss: - if inst_data is not None: - div_coeffs = inst_data[idx, cur_pos, :] - else: - div_coeffs = proto_coef - - loss_d += self.coeff_diversity_loss(div_coeffs, pos_idx_t) - - # If we have over the allowed number of masks, select a random sample - old_num_pos = proto_coef.size(0) - if old_num_pos > cfg.masks_to_train: - perm = torch.randperm(proto_coef.size(0)) - select = perm[:cfg.masks_to_train] - - proto_coef = proto_coef[select, :] - pos_idx_t = pos_idx_t[select] - - if process_gt_bboxes: - pos_gt_box_t = pos_gt_box_t[select, :] - if cfg.use_mask_scoring: - mask_scores = mask_scores[select, :] - - num_pos = proto_coef.size(0) - mask_t = downsampled_masks[:, :, pos_idx_t] - label_t = labels[idx][pos_idx_t] - - # Size: [mask_h, mask_w, num_pos] - #pred_masks = proto_masks @ proto_coef.t() - pred_masks = torch.matmul(proto_masks, proto_coef.T) - pred_masks = cfg.mask_proto_mask_activation(pred_masks) - - if cfg.mask_proto_double_loss: - if cfg.mask_proto_mask_activation == activation_func.sigmoid: - pre_loss = F.binary_cross_entropy(torch.clamp(pred_masks, 0, 1), mask_t, reduction='sum') - else: - pre_loss = F.smooth_l1_loss(pred_masks, mask_t, reduction='sum') - - loss_m += cfg.mask_proto_double_loss_alpha * pre_loss - - if cfg.mask_proto_crop: - pred_masks = crop(pred_masks, pos_gt_box_t) - - if cfg.mask_proto_mask_activation == activation_func.sigmoid: - pre_loss = F.binary_cross_entropy(torch.clamp(pred_masks, 0, 1), mask_t, reduction='none') - else: - pre_loss = F.smooth_l1_loss(pred_masks, mask_t, reduction='none') - - if cfg.mask_proto_normalize_mask_loss_by_sqrt_area: - gt_area = torch.sum(mask_t, dim=(0, 1), keepdim=True) - pre_loss = pre_loss / (torch.sqrt(gt_area) + 0.0001) - - if cfg.mask_proto_reweight_mask_loss: - pre_loss = pre_loss * mask_reweighting[:, :, pos_idx_t] - - if cfg.mask_proto_normalize_emulate_roi_pooling: - weight = mask_h * mask_w if cfg.mask_proto_crop else 1 - pos_gt_csize = center_size(pos_gt_box_t) - gt_box_width = pos_gt_csize[:, 2] * mask_w - gt_box_height = pos_gt_csize[:, 3] * mask_h - pre_loss = pre_loss.sum(dim=(0, 1)) / gt_box_width / gt_box_height * weight - - # If the number of masks were limited scale the loss accordingly - if old_num_pos > num_pos: - pre_loss *= old_num_pos / num_pos - - loss_m += torch.sum(pre_loss) - - if cfg.use_maskiou: - if cfg.discard_mask_area > 0: - gt_mask_area = torch.sum(mask_t, dim=(0, 1)) - select = gt_mask_area > cfg.discard_mask_area - - if torch.sum(select) < 1: - continue - - pos_gt_box_t = pos_gt_box_t[select, :] - pred_masks = pred_masks[:, :, select] - mask_t = mask_t[:, :, select] - label_t = label_t[select] - - maskiou_net_input = pred_masks.permute(2, 0, 1).contiguous().unsqueeze(1) - pred_masks = pred_masks.gt(0.5).float() - maskiou_t = self._mask_iou(pred_masks, mask_t) - - maskiou_net_input_list.append(maskiou_net_input) - maskiou_t_list.append(maskiou_t) - label_t_list.append(label_t) - - losses = {'M': loss_m * cfg.mask_alpha / mask_h / mask_w} - - if cfg.mask_proto_coeff_diversity_loss: - losses['D'] = loss_d - - if cfg.use_maskiou: - # discard_mask_area discarded every mask in the batch, so nothing to do here - if len(maskiou_t_list) == 0: - return losses, None - - maskiou_t = torch.cat(maskiou_t_list) - label_t = torch.cat(label_t_list) - maskiou_net_input = torch.cat(maskiou_net_input_list) - - num_samples = maskiou_t.size(0) - if cfg.maskious_to_train > 0 and num_samples > cfg.maskious_to_train: - perm = torch.randperm(num_samples) - select = perm[:cfg.masks_to_train] - maskiou_t = maskiou_t[select] - label_t = label_t[select] - maskiou_net_input = maskiou_net_input[select] - - return losses, [maskiou_net_input, maskiou_t, label_t] - - return losses - - def _mask_iou(self, mask1, mask2): - intersection = torch.sum(mask1*mask2, dim=(0, 1)) - area1 = torch.sum(mask1, dim=(0, 1)) - area2 = torch.sum(mask2, dim=(0, 1)) - union = (area1 + area2) - intersection - ret = intersection / union - return ret - - def mask_iou_loss(self, net, maskiou_targets): - maskiou_net_input, maskiou_t, label_t = maskiou_targets - - maskiou_p = net.maskiou_net(maskiou_net_input) - - label_t = label_t[:, None] - maskiou_p = torch.gather(maskiou_p, dim=1, index=label_t).view(-1) - - loss_i = F.smooth_l1_loss(maskiou_p, maskiou_t, reduction='sum') - - return loss_i * cfg.maskiou_alpha +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +# -*- coding: utf-8 -*- +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +from ..box_utils import match, log_sum_exp, decode, center_size, crop, elemwise_mask_iou, elemwise_box_iou + +from data import cfg, mask_type, activation_func + +class MultiBoxLoss(nn.Module): + """SSD Weighted Loss Function + Compute Targets: + 1) Produce Confidence Target Indices by matching ground truth boxes + with (default) 'priorboxes' that have jaccard index > threshold parameter + (default threshold: 0.5). + + 2) Produce localization target by 'encoding' variance into offsets of ground + truth boxes and their matched 'priorboxes'. + + 3) Hard negative mining to filter the excessive number of negative examples + that comes with using a large number of default bounding boxes. + (default negative:positive ratio 3:1) + + Objective Loss: + L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N + Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss + weighted by α which is set to 1 by cross val. + Args: + c: class confidences, + l: predicted boxes, + g: ground truth boxes + N: number of matched default boxes + See: https://arxiv.org/pdf/1512.02325.pdf for more details. + """ + + def __init__(self, num_classes, pos_threshold, neg_threshold, negpos_ratio): + super(MultiBoxLoss, self).__init__() + self.num_classes = num_classes + + self.pos_threshold = pos_threshold + self.neg_threshold = neg_threshold + self.negpos_ratio = negpos_ratio + + # If you output a proto mask with this area, your l1 loss will be l1_alpha + # Note that the area is relative (so 1 would be the entire image) + self.l1_expected_area = 20*20/70/70 + self.l1_alpha = 0.1 + + if cfg.use_class_balanced_conf: + self.class_instances = None + self.total_instances = 0 + + def forward(self, net, predictions, wrapper, wrapper_mask): + """Multibox Loss + Args: + predictions (tuple): A tuple containing loc preds, conf preds, + mask preds, and prior boxes from SSD net. + loc shape: torch.size(batch_size,num_priors,4) + conf shape: torch.size(batch_size,num_priors,num_classes) + masks shape: torch.size(batch_size,num_priors,mask_dim) + priors shape: torch.size(num_priors,4) + proto* shape: torch.size(batch_size,mask_h,mask_w,mask_dim) + + targets (list): Ground truth boxes and labels for a batch, + shape: [batch_size][num_objs,5] (last idx is the label). + + masks (list): Ground truth masks for each object in each image, + shape: [batch_size][num_objs,im_height,im_width] + + num_crowds (list): Number of crowd annotations per batch. The crowd + annotations should be the last num_crowds elements of targets and masks. + + * Only if mask_type == lincomb + """ + + targets, masks, num_crowds = wrapper.get_args(wrapper_mask) + targets = targets[0] + masks = masks[0] + num_crowds = num_crowds[0] + loc_data = predictions['loc'] + conf_data = predictions['conf'] + mask_data = predictions['mask'] + priors = predictions['priors'] + + if cfg.mask_type == mask_type.lincomb: + proto_data = predictions['proto'] + + score_data = predictions['score'] if cfg.use_mask_scoring else None + inst_data = predictions['inst'] if cfg.use_instance_coeff else None + + labels = [None] * len(targets) # Used in sem segm loss + + batch_size = loc_data.size(0) + num_priors = priors.size(0) + num_classes = self.num_classes + + # Match priors (default boxes) and ground truth boxes + # These tensors will be created with the same device as loc_data + loc_t = loc_data.new(batch_size, num_priors, 4) + gt_box_t = loc_data.new(batch_size, num_priors, 4) + conf_t = loc_data.new(batch_size, num_priors).long() + idx_t = loc_data.new(batch_size, num_priors).long() + + if cfg.use_class_existence_loss: + class_existence_t = loc_data.new(batch_size, num_classes-1) + + for idx in range(batch_size): + truths = targets[idx][:, :-1].data + labels[idx] = targets[idx][:, -1].data.long() + + if cfg.use_class_existence_loss: + # Construct a one-hot vector for each object and collapse it into an existence vector with max + # Also it's fine to include the crowd annotations here + class_existence_t[idx, :] = torch.eye(num_classes-1, device=conf_t.get_device())[labels[idx]].max(dim=0)[0] + + # Split the crowd annotations because they come bundled in + cur_crowds = num_crowds[idx] + if cur_crowds > 0: + split = lambda x: (x[-cur_crowds:], x[:-cur_crowds]) + crowd_boxes, truths = split(truths) + + # We don't use the crowd labels or masks + _, labels[idx] = split(labels[idx]) + _, masks[idx] = split(masks[idx]) + else: + crowd_boxes = None + + + match(self.pos_threshold, self.neg_threshold, + truths, priors.data, labels[idx], crowd_boxes, + loc_t, conf_t, idx_t, idx, loc_data[idx]) + + gt_box_t[idx, :, :] = truths[idx_t[idx]] + + # wrap targets + loc_t = Variable(loc_t, requires_grad=False) + conf_t = Variable(conf_t, requires_grad=False) + idx_t = Variable(idx_t, requires_grad=False) + + pos = conf_t > 0 + num_pos = pos.sum(dim=1, keepdim=True) + + # Shape: [batch,num_priors,4] + pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) + + losses = {} + + # Localization Loss (Smooth L1) + if cfg.train_boxes: + loc_p = loc_data.view(-1, 4) + loc_t = loc_t.view(-1, 4) + losses['B'] = F.smooth_l1_loss(loc_p, loc_t, reduction='none') * cfg.bbox_alpha + losses['B'][pos_idx.view(-1, 4) == False] = 0 + losses['B'] = losses['B'].sum() + + if cfg.train_masks: + if cfg.mask_type == mask_type.direct: + if cfg.use_gt_bboxes: + pos_masks = [] + for idx in range(batch_size): + pos_masks.append(masks[idx][idx_t[idx, pos[idx]]]) + masks_t = torch.cat(pos_masks, 0) + masks_p = mask_data[pos, :].view(-1, cfg.mask_dim) + losses['M'] = F.binary_cross_entropy(torch.clamp(masks_p, 0, 1), masks_t, reduction='sum') * cfg.mask_alpha + else: + losses['M'] = self.direct_mask_loss(pos_idx, idx_t, loc_data, mask_data, priors, masks) + elif cfg.mask_type == mask_type.lincomb: + ret = self.lincomb_mask_loss(pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, score_data, inst_data, labels) + if cfg.use_maskiou: + loss, maskiou_targets = ret + else: + loss = ret + losses.update(loss) + + if cfg.mask_proto_loss is not None: + if cfg.mask_proto_loss == 'l1': + losses['P'] = torch.mean(torch.abs(proto_data)) / self.l1_expected_area * self.l1_alpha + elif cfg.mask_proto_loss == 'disj': + losses['P'] = -torch.mean(torch.max(F.log_softmax(proto_data, dim=-1), dim=-1)[0]) + + # Confidence loss + if cfg.use_focal_loss: + if cfg.use_sigmoid_focal_loss: + losses['C'] = self.focal_conf_sigmoid_loss(conf_data, conf_t) + elif cfg.use_objectness_score: + losses['C'] = self.focal_conf_objectness_loss(conf_data, conf_t) + else: + losses['C'] = self.focal_conf_loss(conf_data, conf_t) + else: + if cfg.use_objectness_score: + losses['C'] = self.conf_objectness_loss(conf_data, conf_t, batch_size, loc_p, loc_t, priors) + else: + losses['C'] = self.ohem_conf_loss(conf_data, conf_t, pos, batch_size) + + # Mask IoU Loss + if cfg.use_maskiou and maskiou_targets is not None: + losses['I'] = self.mask_iou_loss(net, maskiou_targets) + + # These losses also don't depend on anchors + if cfg.use_class_existence_loss: + losses['E'] = self.class_existence_loss(predictions['classes'], class_existence_t) + if cfg.use_semantic_segmentation_loss: + losses['S'] = self.semantic_segmentation_loss(predictions['segm'], masks, labels) + + # Divide all losses by the number of positives. + # Don't do it for loss[P] because that doesn't depend on the anchors. + total_num_pos = num_pos.data.sum().float() + for k in losses: + if k not in ('P', 'E', 'S'): + losses[k] /= total_num_pos + else: + losses[k] /= batch_size + + # Loss Key: + # - B: Box Localization Loss + # - C: Class Confidence Loss + # - M: Mask Loss + # - P: Prototype Loss + # - D: Coefficient Diversity Loss + # - E: Class Existence Loss + # - S: Semantic Segmentation Loss + return losses + + def class_existence_loss(self, class_data, class_existence_t): + return cfg.class_existence_alpha * F.binary_cross_entropy_with_logits(class_data, class_existence_t, reduction='sum') + + def semantic_segmentation_loss(self, segment_data, mask_t, class_t, interpolation_mode='bilinear'): + # Note num_classes here is without the background class so cfg.num_classes-1 + batch_size, num_classes, mask_h, mask_w = segment_data.size() + loss_s = 0 + + for idx in range(batch_size): + cur_segment = segment_data[idx] + cur_class_t = class_t[idx] + + with torch.no_grad(): + downsampled_masks = F.interpolate(mask_t[idx].unsqueeze(0), (mask_h, mask_w), + mode=interpolation_mode, align_corners=False).squeeze(0) + downsampled_masks = downsampled_masks.gt(0.5).float() + + # Construct Semantic Segmentation + segment_t = torch.zeros_like(cur_segment, requires_grad=False) + for obj_idx in range(downsampled_masks.size(0)): + segment_t[cur_class_t[obj_idx]] = torch.max(segment_t[cur_class_t[obj_idx]].float(), downsampled_masks[obj_idx]) + + loss_s += F.binary_cross_entropy_with_logits(cur_segment, segment_t, reduction='sum') + + return loss_s / mask_h / mask_w * cfg.semantic_segmentation_alpha + + + def ohem_conf_loss(self, conf_data, conf_t, pos, num): + # Compute max conf across batch for hard negative mining + batch_conf = conf_data.view(-1, self.num_classes) + if cfg.ohem_use_most_confident: + # i.e. max(softmax) along classes > 0 + batch_conf = F.softmax(batch_conf, dim=1) + loss_c, _ = batch_conf[:, 1:].max(dim=1) + else: + # i.e. -softmax(class 0 confidence) + loss_c = log_sum_exp(batch_conf) - batch_conf[:, 0] + + # Hard Negative Mining + loss_c = loss_c.view(num, -1) + loss_c[pos] = 0 # filter out pos boxes + loss_c[conf_t < 0] = 0 # filter out neutrals (conf_t = -1) + _, loss_idx = loss_c.sort(1, descending=True) + _, idx_rank = loss_idx.sort(1) + num_pos = pos.long().sum(1, keepdim=True) + num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) + neg = idx_rank < num_neg.expand_as(idx_rank) + + # Just in case there aren't enough negatives, don't start using positives as negatives + neg[pos] = 0 + neg[conf_t < 0] = 0 # Filter out neutrals + + # Confidence Loss Including Positive and Negative Examples + pos_idx = pos.unsqueeze(2).expand_as(conf_data) + neg_idx = neg.unsqueeze(2).expand_as(conf_data) + tensor_gt = (pos + neg).gt(0) + conf_p = conf_data.view(-1, self.num_classes) + targets_weighted = conf_t.view(-1) + loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='none') + loss_c = loss_c[tensor_gt.view(-1) == True] + + if cfg.use_class_balanced_conf: + # Lazy initialization + if self.class_instances is None: + self.class_instances = torch.zeros(self.num_classes, device=targets_weighted.device) + + classes, counts = targets_weighted.unique(return_counts=True) + + for _cls, _cnt in zip(classes.cpu().numpy(), counts.cpu().numpy()): + self.class_instances[_cls] += _cnt + + self.total_instances += targets_weighted.size(0) + + weighting = 1 - (self.class_instances[targets_weighted] / self.total_instances) + weighting = torch.clamp(weighting, min=1/self.num_classes) + + # If you do the math, the average weight of self.class_instances is this + avg_weight = (self.num_classes - 1) / self.num_classes + + loss_c = (loss_c * weighting).sum() / avg_weight + else: + loss_c = loss_c.sum() + + return cfg.conf_alpha * loss_c + + def focal_conf_loss(self, conf_data, conf_t): + """ + Focal loss as described in https://arxiv.org/pdf/1708.02002.pdf + Adapted from https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py + Note that this uses softmax and not the original sigmoid from the paper. + """ + conf_t = conf_t.view(-1) # [batch_size*num_priors] + conf_data = conf_data.view(-1, conf_data.size(-1)) # [batch_size*num_priors, num_classes] + + # Ignore neutral samples (class < 0) + keep = (conf_t >= 0).float() + conf_t[conf_t < 0] = 0 # so that gather doesn't drum up a fuss + + logpt = F.log_softmax(conf_data, dim=-1) + logpt = logpt.gather(1, conf_t.unsqueeze(-1)) + logpt = logpt.view(-1) + pt = logpt.exp() + + # I adapted the alpha_t calculation here from + # https://github.com/pytorch/pytorch/blob/master/modules/detectron/softmax_focal_loss_op.cu + # You'd think you want all the alphas to sum to one, but in the original implementation they + # just give background an alpha of 1-alpha and each forground an alpha of alpha. + background = (conf_t == 0).float() + at = (1 - cfg.focal_loss_alpha) * background + cfg.focal_loss_alpha * (1 - background) + + loss = -at * (1 - pt) ** cfg.focal_loss_gamma * logpt + + # See comment above for keep + return cfg.conf_alpha * (loss * keep).sum() + + def focal_conf_sigmoid_loss(self, conf_data, conf_t): + """ + Focal loss but using sigmoid like the original paper. + Note: To make things mesh easier, the network still predicts 81 class confidences in this mode. + Because retinanet originally only predicts 80, we simply just don't use conf_data[..., 0] + """ + num_classes = conf_data.size(-1) + + conf_t = conf_t.view(-1) # [batch_size*num_priors] + conf_data = conf_data.view(-1, num_classes) # [batch_size*num_priors, num_classes] + + # Ignore neutral samples (class < 0) + keep = (conf_t >= 0).float() + conf_t[conf_t < 0] = 0 # can't mask with -1, so filter that out + + # Compute a one-hot embedding of conf_t + # From https://github.com/kuangliu/pytorch-retinanet/blob/master/utils.py + conf_one_t = torch.eye(num_classes, device=conf_t.get_device())[conf_t] + conf_pm_t = conf_one_t * 2 - 1 # -1 if background, +1 if forground for specific class + + logpt = F.logsigmoid(conf_data * conf_pm_t) # note: 1 - sigmoid(x) = sigmoid(-x) + pt = logpt.exp() + + at = cfg.focal_loss_alpha * conf_one_t + (1 - cfg.focal_loss_alpha) * (1 - conf_one_t) + at[..., 0] = 0 # Set alpha for the background class to 0 because sigmoid focal loss doesn't use it + + loss = -at * (1 - pt) ** cfg.focal_loss_gamma * logpt + loss = keep * loss.sum(dim=-1) + + return cfg.conf_alpha * loss.sum() + + def focal_conf_objectness_loss(self, conf_data, conf_t): + """ + Instead of using softmax, use class[0] to be the objectness score and do sigmoid focal loss on that. + Then for the rest of the classes, softmax them and apply CE for only the positive examples. + + If class[0] = 1 implies forground and class[0] = 0 implies background then you achieve something + similar during test-time to softmax by setting class[1:] = softmax(class[1:]) * class[0] and invert class[0]. + """ + + conf_t = conf_t.view(-1) # [batch_size*num_priors] + conf_data = conf_data.view(-1, conf_data.size(-1)) # [batch_size*num_priors, num_classes] + + # Ignore neutral samples (class < 0) + keep = (conf_t >= 0).float() + conf_t[conf_t < 0] = 0 # so that gather doesn't drum up a fuss + + background = (conf_t == 0).float() + at = (1 - cfg.focal_loss_alpha) * background + cfg.focal_loss_alpha * (1 - background) + + logpt = F.logsigmoid(conf_data[:, 0]) * (1 - background) + F.logsigmoid(-conf_data[:, 0]) * background + pt = logpt.exp() + + obj_loss = -at * (1 - pt) ** cfg.focal_loss_gamma * logpt + + # All that was the objectiveness loss--now time for the class confidence loss + pos_mask = conf_t > 0 + conf_data_pos = (conf_data[:, 1:])[pos_mask] # Now this has just 80 classes + conf_t_pos = conf_t[pos_mask] - 1 # So subtract 1 here + + class_loss = F.cross_entropy(conf_data_pos, conf_t_pos, reduction='sum') + + return cfg.conf_alpha * (class_loss + (obj_loss * keep).sum()) + + def conf_objectness_loss(self, conf_data, conf_t, batch_size, loc_p, loc_t, priors): + """ + Instead of using softmax, use class[0] to be p(obj) * p(IoU) as in YOLO. + Then for the rest of the classes, softmax them and apply CE for only the positive examples. + """ + + conf_t = conf_t.view(-1) # [batch_size*num_priors] + conf_data = conf_data.view(-1, conf_data.size(-1)) # [batch_size*num_priors, num_classes] + + pos_mask = (conf_t > 0) + neg_mask = (conf_t == 0) + + obj_data = conf_data[:, 0] + obj_data_pos = obj_data[pos_mask] + obj_data_neg = obj_data[neg_mask] + + # Don't be confused, this is just binary cross entropy similified + obj_neg_loss = - F.logsigmoid(-obj_data_neg).sum() + + with torch.no_grad(): + pos_priors = priors.unsqueeze(0).expand(batch_size, -1, -1).reshape(-1, 4)[pos_mask, :] + + boxes_pred = decode(loc_p, pos_priors, cfg.use_yolo_regressors) + boxes_targ = decode(loc_t, pos_priors, cfg.use_yolo_regressors) + + iou_targets = elemwise_box_iou(boxes_pred, boxes_targ) + + obj_pos_loss = - iou_targets * F.logsigmoid(obj_data_pos) - (1 - iou_targets) * F.logsigmoid(-obj_data_pos) + obj_pos_loss = obj_pos_loss.sum() + + # All that was the objectiveness loss--now time for the class confidence loss + conf_data_pos = (conf_data[:, 1:])[pos_mask] # Now this has just 80 classes + conf_t_pos = conf_t[pos_mask] - 1 # So subtract 1 here + + class_loss = F.cross_entropy(conf_data_pos, conf_t_pos, reduction='sum') + + return cfg.conf_alpha * (class_loss + obj_pos_loss + obj_neg_loss) + + + def direct_mask_loss(self, pos_idx, idx_t, loc_data, mask_data, priors, masks): + """ Crops the gt masks using the predicted bboxes, scales them down, and outputs the BCE loss. """ + loss_m = 0 + for idx in range(mask_data.size(0)): + with torch.no_grad(): + cur_pos_idx = pos_idx[idx, :, :] + cur_pos_idx_squeezed = cur_pos_idx[:, 1] + + # Shape: [num_priors, 4], decoded predicted bboxes + pos_bboxes = decode(loc_data[idx, :, :], priors.data, cfg.use_yolo_regressors) + pos_bboxes = pos_bboxes[cur_pos_idx].view(-1, 4).clamp(0, 1) + pos_lookup = idx_t[idx, cur_pos_idx_squeezed] + + cur_masks = masks[idx] + pos_masks = cur_masks[pos_lookup, :, :] + + # Convert bboxes to absolute coordinates + num_pos, img_height, img_width = pos_masks.size() + + # Take care of all the bad behavior that can be caused by out of bounds coordinates + x1, x2 = sanitize_coordinates(pos_bboxes[:, 0], pos_bboxes[:, 2], img_width) + y1, y2 = sanitize_coordinates(pos_bboxes[:, 1], pos_bboxes[:, 3], img_height) + + # Crop each gt mask with the predicted bbox and rescale to the predicted mask size + # Note that each bounding box crop is a different size so I don't think we can vectorize this + scaled_masks = [] + for jdx in range(num_pos): + tmp_mask = pos_masks[jdx, y1[jdx]:y2[jdx], x1[jdx]:x2[jdx]] + + # Restore any dimensions we've left out because our bbox was 1px wide + while tmp_mask.dim() < 2: + tmp_mask = tmp_mask.unsqueeze(0) + + new_mask = F.adaptive_avg_pool2d(tmp_mask.unsqueeze(0), cfg.mask_size) + scaled_masks.append(new_mask.view(1, -1)) + + mask_t = torch.cat(scaled_masks, 0).gt(0.5).float() # Threshold downsampled mask + + pos_mask_data = mask_data[idx, cur_pos_idx_squeezed, :] + loss_m += F.binary_cross_entropy(torch.clamp(pos_mask_data, 0, 1), mask_t, reduction='sum') * cfg.mask_alpha + + return loss_m + + + def coeff_diversity_loss(self, coeffs, instance_t): + """ + coeffs should be size [num_pos, num_coeffs] + instance_t should be size [num_pos] and be values from 0 to num_instances-1 + """ + num_pos = coeffs.size(0) + instance_t = instance_t.view(-1) # juuuust to make sure + + coeffs_norm = F.normalize(coeffs, dim=1) + cos_sim = coeffs_norm @ coeffs_norm.t() + + inst_eq = (instance_t[:, None].expand_as(cos_sim) == instance_t[None, :].expand_as(cos_sim)).float() + + # Rescale to be between 0 and 1 + cos_sim = (cos_sim + 1) / 2 + + # If they're the same instance, use cosine distance, else use cosine similarity + loss = (1 - cos_sim) * inst_eq + cos_sim * (1 - inst_eq) + + # Only divide by num_pos once because we're summing over a num_pos x num_pos tensor + # and all the losses will be divided by num_pos at the end, so just one extra time. + return cfg.mask_proto_coeff_diversity_alpha * loss.sum() / num_pos + + + def lincomb_mask_loss(self, pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, score_data, inst_data, labels, interpolation_mode='bilinear'): + mask_h = proto_data.size(1) + mask_w = proto_data.size(2) + + process_gt_bboxes = cfg.mask_proto_normalize_emulate_roi_pooling or cfg.mask_proto_crop + + if cfg.mask_proto_remove_empty_masks: + # Make sure to store a copy of this because we edit it to get rid of all-zero masks + pos = pos.clone() + + loss_m = 0 + loss_d = 0 # Coefficient diversity loss + + maskiou_t_list = [] + maskiou_net_input_list = [] + label_t_list = [] + + for idx in range(mask_data.size(0)): + with torch.no_grad(): + downsampled_masks = F.interpolate(masks[idx].unsqueeze(0), (mask_h, mask_w), + mode=interpolation_mode, align_corners=False).squeeze(0) + downsampled_masks = downsampled_masks.permute(1, 2, 0).contiguous() + + if cfg.mask_proto_binarize_downsampled_gt: + downsampled_masks = downsampled_masks.gt(0.5).float() + + if cfg.mask_proto_remove_empty_masks: + # Get rid of gt masks that are so small they get downsampled away + very_small_masks = (downsampled_masks.sum(dim=(0,1)) <= 0.0001) + for i in range(very_small_masks.size(0)): + if very_small_masks[i]: + pos[idx, idx_t[idx] == i] = 0 + + if cfg.mask_proto_reweight_mask_loss: + # Ensure that the gt is binary + if not cfg.mask_proto_binarize_downsampled_gt: + bin_gt = downsampled_masks.gt(0.5).float() + else: + bin_gt = downsampled_masks + + gt_foreground_norm = bin_gt / (torch.sum(bin_gt, dim=(0,1), keepdim=True) + 0.0001) + gt_background_norm = (1-bin_gt) / (torch.sum(1-bin_gt, dim=(0,1), keepdim=True) + 0.0001) + + mask_reweighting = gt_foreground_norm * cfg.mask_proto_reweight_coeff + gt_background_norm + mask_reweighting *= mask_h * mask_w + + cur_pos = pos[idx] + pos_idx_t = idx_t[idx, cur_pos] + + if process_gt_bboxes: + # Note: this is in point-form + if cfg.mask_proto_crop_with_pred_box: + pos_gt_box_t = decode(loc_data[idx, :, :], priors.data, cfg.use_yolo_regressors)[cur_pos] + else: + pos_gt_box_t = gt_box_t[idx, cur_pos] + + if pos_idx_t.size(0) == 0: + continue + + proto_masks = proto_data[idx] + proto_coef = mask_data[idx, cur_pos, :] + if cfg.use_mask_scoring: + mask_scores = score_data[idx, cur_pos, :] + + if cfg.mask_proto_coeff_diversity_loss: + if inst_data is not None: + div_coeffs = inst_data[idx, cur_pos, :] + else: + div_coeffs = proto_coef + + loss_d += self.coeff_diversity_loss(div_coeffs, pos_idx_t) + + # If we have over the allowed number of masks, select a random sample + old_num_pos = proto_coef.size(0) + if old_num_pos > cfg.masks_to_train: + perm = torch.randperm(proto_coef.size(0)) + select = perm[:cfg.masks_to_train] + + proto_coef = proto_coef[select, :] + pos_idx_t = pos_idx_t[select] + + if process_gt_bboxes: + pos_gt_box_t = pos_gt_box_t[select, :] + if cfg.use_mask_scoring: + mask_scores = mask_scores[select, :] + + num_pos = proto_coef.size(0) + mask_t = downsampled_masks[:, :, pos_idx_t] + label_t = labels[idx][pos_idx_t] + + # Size: [mask_h, mask_w, num_pos] + #pred_masks = proto_masks @ proto_coef.t() + pred_masks = torch.matmul(proto_masks, proto_coef.T) + pred_masks = cfg.mask_proto_mask_activation(pred_masks) + + if cfg.mask_proto_double_loss: + if cfg.mask_proto_mask_activation == activation_func.sigmoid: + pre_loss = F.binary_cross_entropy(torch.clamp(pred_masks, 0, 1), mask_t, reduction='sum') + else: + pre_loss = F.smooth_l1_loss(pred_masks, mask_t, reduction='sum') + + loss_m += cfg.mask_proto_double_loss_alpha * pre_loss + + if cfg.mask_proto_crop: + pred_masks = crop(pred_masks, pos_gt_box_t) + + if cfg.mask_proto_mask_activation == activation_func.sigmoid: + pre_loss = F.binary_cross_entropy(torch.clamp(pred_masks, 0, 1), mask_t, reduction='none') + else: + pre_loss = F.smooth_l1_loss(pred_masks, mask_t, reduction='none') + + if cfg.mask_proto_normalize_mask_loss_by_sqrt_area: + gt_area = torch.sum(mask_t, dim=(0, 1), keepdim=True) + pre_loss = pre_loss / (torch.sqrt(gt_area) + 0.0001) + + if cfg.mask_proto_reweight_mask_loss: + pre_loss = pre_loss * mask_reweighting[:, :, pos_idx_t] + + if cfg.mask_proto_normalize_emulate_roi_pooling: + weight = mask_h * mask_w if cfg.mask_proto_crop else 1 + pos_gt_csize = center_size(pos_gt_box_t) + gt_box_width = pos_gt_csize[:, 2] * mask_w + gt_box_height = pos_gt_csize[:, 3] * mask_h + pre_loss = pre_loss.sum(dim=(0, 1)) / gt_box_width / gt_box_height * weight + + # If the number of masks were limited scale the loss accordingly + if old_num_pos > num_pos: + pre_loss *= old_num_pos / num_pos + + loss_m += torch.sum(pre_loss) + + if cfg.use_maskiou: + if cfg.discard_mask_area > 0: + gt_mask_area = torch.sum(mask_t, dim=(0, 1)) + select = gt_mask_area > cfg.discard_mask_area + + if torch.sum(select) < 1: + continue + + pos_gt_box_t = pos_gt_box_t[select, :] + pred_masks = pred_masks[:, :, select] + mask_t = mask_t[:, :, select] + label_t = label_t[select] + + maskiou_net_input = pred_masks.permute(2, 0, 1).contiguous().unsqueeze(1) + pred_masks = pred_masks.gt(0.5).float() + maskiou_t = self._mask_iou(pred_masks, mask_t) + + maskiou_net_input_list.append(maskiou_net_input) + maskiou_t_list.append(maskiou_t) + label_t_list.append(label_t) + + losses = {'M': loss_m * cfg.mask_alpha / mask_h / mask_w} + + if cfg.mask_proto_coeff_diversity_loss: + losses['D'] = loss_d + + if cfg.use_maskiou: + # discard_mask_area discarded every mask in the batch, so nothing to do here + if len(maskiou_t_list) == 0: + return losses, None + + maskiou_t = torch.cat(maskiou_t_list) + label_t = torch.cat(label_t_list) + maskiou_net_input = torch.cat(maskiou_net_input_list) + + num_samples = maskiou_t.size(0) + if cfg.maskious_to_train > 0 and num_samples > cfg.maskious_to_train: + perm = torch.randperm(num_samples) + select = perm[:cfg.masks_to_train] + maskiou_t = maskiou_t[select] + label_t = label_t[select] + maskiou_net_input = maskiou_net_input[select] + + return losses, [maskiou_net_input, maskiou_t, label_t] + + return losses + + def _mask_iou(self, mask1, mask2): + intersection = torch.sum(mask1*mask2, dim=(0, 1)) + area1 = torch.sum(mask1, dim=(0, 1)) + area2 = torch.sum(mask2, dim=(0, 1)) + union = (area1 + area2) - intersection + ret = intersection / union + return ret + + def mask_iou_loss(self, net, maskiou_targets): + maskiou_net_input, maskiou_t, label_t = maskiou_targets + + maskiou_p = net.maskiou_net(maskiou_net_input) + + label_t = label_t[:, None] + maskiou_p = torch.gather(maskiou_p, dim=1, index=label_t).view(-1) + + loss_i = F.smooth_l1_loss(maskiou_p, maskiou_t, reduction='sum') + + return loss_i * cfg.maskiou_alpha diff --git a/PyTorch/contrib/cv/detection/YOLACT/modelzoo_level.txt b/PyTorch/contrib/cv/detection/YOLACT/modelzoo_level.txt index 282c3ff3b30404101a02cc86c5bfeb6308d198e7..c5c4a9d8001fae97c66831abcfdbe02dd6261c37 100644 --- a/PyTorch/contrib/cv/detection/YOLACT/modelzoo_level.txt +++ b/PyTorch/contrib/cv/detection/YOLACT/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PrecisionStatus:OK +FuncStatus:OK +PrecisionStatus:OK PerfStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YOLACT/yolact.py b/PyTorch/contrib/cv/detection/YOLACT/yolact.py index e65f748d32a040c69ff12a17c19f62cde71fce6f..3174bde53609bcc463b977ab43dc7c3e3ec70e29 100644 --- a/PyTorch/contrib/cv/detection/YOLACT/yolact.py +++ b/PyTorch/contrib/cv/detection/YOLACT/yolact.py @@ -1,754 +1,754 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import torch, torchvision -import torch.nn as nn -import torch.nn.functional as F -from torchvision.models.resnet import Bottleneck -import numpy as np -from itertools import product -from math import sqrt -from typing import List -from collections import defaultdict - -from data.config import cfg, mask_type -from layers import Detect -from layers.interpolate import InterpolateModule -from backbone import construct_backbone - -import torch.backends.cudnn as cudnn -from utils import timer -from utils.functions import MovingAverage, make_net - -# This is required for Pytorch 1.0.1 on Windows to initialize Cuda on some driver versions. -# See the bug report here: https://github.com/pytorch/pytorch/issues/17108 - -# As of March 10, 2019, Pytorch DataParallel still doesn't support JIT Script Modules -use_jit = False -if not use_jit: - print('Multiple GPUs detected! Turning off JIT.') - -ScriptModuleWrapper = torch.jit.ScriptModule if use_jit else nn.Module -script_method_wrapper = torch.jit.script_method if use_jit else lambda fn, _rcn=None: fn - - - -class Concat(nn.Module): - def __init__(self, nets, extra_params): - super().__init__() - - self.nets = nn.ModuleList(nets) - self.extra_params = extra_params - - def forward(self, x): - # Concat each along the channel dimension - return torch.cat([net(x) for net in self.nets], dim=1, **self.extra_params) - -prior_cache = defaultdict(lambda: None) - -class PredictionModule(nn.Module): - """ - The (c) prediction module adapted from DSSD: - https://arxiv.org/pdf/1701.06659.pdf - - Note that this is slightly different to the module in the paper - because the Bottleneck block actually has a 3x3 convolution in - the middle instead of a 1x1 convolution. Though, I really can't - be arsed to implement it myself, and, who knows, this might be - better. - - Args: - - in_channels: The input feature size. - - out_channels: The output feature size (must be a multiple of 4). - - aspect_ratios: A list of lists of priorbox aspect ratios (one list per scale). - - scales: A list of priorbox scales relative to this layer's convsize. - For instance: If this layer has convouts of size 30x30 for - an image of size 600x600, the 'default' (scale - of 1) for this layer would produce bounding - boxes with an area of 20x20px. If the scale is - .5 on the other hand, this layer would consider - bounding boxes with area 10x10px, etc. - - parent: If parent is a PredictionModule, this module will use all the layers - from parent instead of from this module. - """ - - def __init__(self, in_channels, out_channels=1024, aspect_ratios=[[1]], scales=[1], parent=None, index=0): - super().__init__() - - self.num_classes = cfg.num_classes - self.mask_dim = cfg.mask_dim # Defined by Yolact - self.num_priors = sum(len(x)*len(scales) for x in aspect_ratios) - self.parent = [parent] # Don't include this in the state dict - self.index = index - self.num_heads = cfg.num_heads # Defined by Yolact - - if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb: - self.mask_dim = self.mask_dim // self.num_heads - - if cfg.mask_proto_prototypes_as_features: - in_channels += self.mask_dim - - if parent is None: - if cfg.extra_head_net is None: - out_channels = in_channels - else: - self.upfeature, out_channels = make_net(in_channels, cfg.extra_head_net) - - if cfg.use_prediction_module: - self.block = Bottleneck(out_channels, out_channels // 4) - self.conv = nn.Conv2d(out_channels, out_channels, kernel_size=1, bias=True) - self.bn = nn.BatchNorm2d(out_channels) - - self.bbox_layer = nn.Conv2d(out_channels, self.num_priors * 4, **cfg.head_layer_params) - self.conf_layer = nn.Conv2d(out_channels, self.num_priors * self.num_classes, **cfg.head_layer_params) - self.mask_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim, **cfg.head_layer_params) - - if cfg.use_mask_scoring: - self.score_layer = nn.Conv2d(out_channels, self.num_priors, **cfg.head_layer_params) - - if cfg.use_instance_coeff: - self.inst_layer = nn.Conv2d(out_channels, self.num_priors * cfg.num_instance_coeffs, **cfg.head_layer_params) - - # What is this ugly lambda doing in the middle of all this clean prediction module code? - def make_extra(num_layers): - if num_layers == 0: - return lambda x: x - else: - # Looks more complicated than it is. This just creates an array of num_layers alternating conv-relu - return nn.Sequential(*sum([[ - nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), - nn.ReLU(inplace=True) - ] for _ in range(num_layers)], [])) - - self.bbox_extra, self.conf_extra, self.mask_extra = [make_extra(x) for x in cfg.extra_layers] - - if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_coeff_gate: - self.gate_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim, kernel_size=3, padding=1) - - self.aspect_ratios = aspect_ratios - self.scales = scales - - self.priors = None - self.last_conv_size = None - self.last_img_size = None - - def forward(self, x): - """ - Args: - - x: The convOut from a layer in the backbone network - Size: [batch_size, in_channels, conv_h, conv_w]) - - Returns a tuple (bbox_coords, class_confs, mask_output, prior_boxes) with sizes - - bbox_coords: [batch_size, conv_h*conv_w*num_priors, 4] - - class_confs: [batch_size, conv_h*conv_w*num_priors, num_classes] - - mask_output: [batch_size, conv_h*conv_w*num_priors, mask_dim] - - prior_boxes: [conv_h*conv_w*num_priors, 4] - """ - # In case we want to use another module's layers - src = self if self.parent[0] is None else self.parent[0] - - conv_h = x.size(2) - conv_w = x.size(3) - - if cfg.extra_head_net is not None: - x = src.upfeature(x) - - if cfg.use_prediction_module: - # The two branches of PM design (c) - a = src.block(x) - - b = src.conv(x) - b = src.bn(b) - b = F.relu(b) - - # TODO: Possibly switch this out for a product - x = a + b - - bbox_x = src.bbox_extra(x) - conf_x = src.conf_extra(x) - mask_x = src.mask_extra(x) - - bbox = src.bbox_layer(bbox_x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, 4) - conf = src.conf_layer(conf_x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.num_classes) - - if cfg.eval_mask_branch: - mask = src.mask_layer(mask_x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.mask_dim) - else: - mask = torch.zeros(x.size(0), bbox.size(1), self.mask_dim, device=bbox.device) - - if cfg.use_mask_scoring: - score = src.score_layer(x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, 1) - - if cfg.use_instance_coeff: - inst = src.inst_layer(x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, cfg.num_instance_coeffs) - - # See box_utils.decode for an explanation of this - if cfg.use_yolo_regressors: - bbox[:, :, :2] = torch.sigmoid(bbox[:, :, :2]) - 0.5 - bbox[:, :, 0] /= conv_w - bbox[:, :, 1] /= conv_h - - if cfg.eval_mask_branch: - if cfg.mask_type == mask_type.direct: - mask = torch.sigmoid(mask) - elif cfg.mask_type == mask_type.lincomb: - mask = cfg.mask_proto_coeff_activation(mask) - - if cfg.mask_proto_coeff_gate: - gate = src.gate_layer(x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.mask_dim) - mask = mask * torch.sigmoid(gate) - - if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb: - mask = F.pad(mask, (self.index * self.mask_dim, (self.num_heads - self.index - 1) * self.mask_dim), mode='constant', value=0) - - priors = self.make_priors(conv_h, conv_w, x.device) - - preds = { 'loc': bbox, 'conf': conf, 'mask': mask, 'priors': priors } - - if cfg.use_mask_scoring: - preds['score'] = score - - if cfg.use_instance_coeff: - preds['inst'] = inst - - return preds - - def make_priors(self, conv_h, conv_w, device): - """ Note that priors are [x,y,width,height] where (x,y) is the center of the box. """ - global prior_cache - size = (conv_h, conv_w) - - with timer.env('makepriors'): - if self.last_img_size != (cfg._tmp_img_w, cfg._tmp_img_h): - prior_data = [] - - # Iteration order is important (it has to sync up with the convout) - for j, i in product(range(conv_h), range(conv_w)): - # +0.5 because priors are in center-size notation - x = (i + 0.5) / conv_w - y = (j + 0.5) / conv_h - - for ars in self.aspect_ratios: - for scale in self.scales: - for ar in ars: - if not cfg.backbone.preapply_sqrt: - ar = sqrt(ar) - - if cfg.backbone.use_pixel_scales: - w = scale * ar / cfg.max_size - h = scale / ar / cfg.max_size - else: - w = scale * ar / conv_w - h = scale / ar / conv_h - - # This is for backward compatability with a bug where I made everything square by accident - if cfg.backbone.use_square_anchors: - h = w - - prior_data += [x, y, w, h] - - self.priors = torch.Tensor(prior_data).to(device).view(-1, 4).detach() - self.priors.requires_grad = False - self.last_img_size = (cfg._tmp_img_w, cfg._tmp_img_h) - self.last_conv_size = (conv_w, conv_h) - prior_cache[size] = None - elif self.priors.device != device: - # This whole weird situation is so that DataParalell doesn't copy the priors each iteration - if prior_cache[size] is None: - prior_cache[size] = {} - - if device not in prior_cache[size]: - prior_cache[size][device] = self.priors.to(device) - - self.priors = prior_cache[size][device] - - return self.priors - -class FPN(ScriptModuleWrapper): - """ - Implements a general version of the FPN introduced in - https://arxiv.org/pdf/1612.03144.pdf - - Parameters (in cfg.fpn): - - num_features (int): The number of output features in the fpn layers. - - interpolation_mode (str): The mode to pass to F.interpolate. - - num_downsample (int): The number of downsampled layers to add onto the selected layers. - These extra layers are downsampled from the last selected layer. - - Args: - - in_channels (list): For each conv layer you supply in the forward pass, - how many features will it have? - """ - __constants__ = ['interpolation_mode', 'num_downsample', 'use_conv_downsample', 'relu_pred_layers', - 'lat_layers', 'pred_layers', 'downsample_layers', 'relu_downsample_layers'] - - def __init__(self, in_channels): - super().__init__() - - self.lat_layers = nn.ModuleList([ - nn.Conv2d(x, cfg.fpn.num_features, kernel_size=1) - for x in reversed(in_channels) - ]) - - # This is here for backwards compatability - padding = 1 if cfg.fpn.pad else 0 - self.pred_layers = nn.ModuleList([ - nn.Conv2d(cfg.fpn.num_features, cfg.fpn.num_features, kernel_size=3, padding=padding) - for _ in in_channels - ]) - - if cfg.fpn.use_conv_downsample: - self.downsample_layers = nn.ModuleList([ - nn.Conv2d(cfg.fpn.num_features, cfg.fpn.num_features, kernel_size=3, padding=1, stride=2) - for _ in range(cfg.fpn.num_downsample) - ]) - - self.interpolation_mode = cfg.fpn.interpolation_mode - self.num_downsample = cfg.fpn.num_downsample - self.use_conv_downsample = cfg.fpn.use_conv_downsample - self.relu_downsample_layers = cfg.fpn.relu_downsample_layers - self.relu_pred_layers = cfg.fpn.relu_pred_layers - - @script_method_wrapper - def forward(self, convouts:List[torch.Tensor]): - """ - Args: - - convouts (list): A list of convouts for the corresponding layers in in_channels. - Returns: - - A list of FPN convouts in the same order as x with extra downsample layers if requested. - """ - - out = [] - x = torch.zeros(1, device=convouts[0].device) - for i in range(len(convouts)): - out.append(x) - - # For backward compatability, the conv layers are stored in reverse but the input and output is - # given in the correct order. Thus, use j=-i-1 for the input and output and i for the conv layers. - j = len(convouts) - for lat_layer in self.lat_layers: - j -= 1 - - if j < len(convouts) - 1: - _, _, h, w = convouts[j].size() - x = F.interpolate(x, size=(h, w), mode=self.interpolation_mode, align_corners=False) - - x = x + lat_layer(convouts[j]) - out[j] = x - - # This janky second loop is here because TorchScript. - j = len(convouts) - for pred_layer in self.pred_layers: - j -= 1 - out[j] = pred_layer(out[j]) - - if self.relu_pred_layers: - F.relu(out[j], inplace=True) - - cur_idx = len(out) - - # In the original paper, this takes care of P6 - if self.use_conv_downsample: - for downsample_layer in self.downsample_layers: - out.append(downsample_layer(out[-1])) - else: - for idx in range(self.num_downsample): - # Note: this is an untested alternative to out.append(out[-1][:, :, ::2, ::2]). Thanks TorchScript. - out.append(nn.functional.max_pool2d(out[-1], 1, stride=2)) - - if self.relu_downsample_layers: - for idx in range(len(out) - cur_idx): - out[idx] = F.relu(out[idx + cur_idx], inplace=False) - - return out - -class FastMaskIoUNet(ScriptModuleWrapper): - - def __init__(self): - super().__init__() - input_channels = 1 - last_layer = [(cfg.num_classes-1, 1, {})] - self.maskiou_net, _ = make_net(input_channels, cfg.maskiou_net + last_layer, include_last_relu=True) - - def forward(self, x): - x = self.maskiou_net(x) - maskiou_p = F.max_pool2d(x, kernel_size=x.size()[2:]).squeeze(-1).squeeze(-1) - - return maskiou_p - - - -class Yolact(nn.Module): - """ - - - ██╗ ██╗ ██████╗ ██╗ █████╗ ██████╗████████╗ - ╚██╗ ██╔╝██╔═══██╗██║ ██╔══██╗██╔════╝╚══██╔══╝ - ╚████╔╝ ██║ ██║██║ ███████║██║ ██║ - ╚██╔╝ ██║ ██║██║ ██╔══██║██║ ██║ - ██║ ╚██████╔╝███████╗██║ ██║╚██████╗ ██║ - ╚═╝ ╚═════╝ ╚══════╝╚═╝ ╚═╝ ╚═════╝ ╚═╝ - - - You can set the arguments by changing them in the backbone config object in config.py. - - Parameters (in cfg.backbone): - - selected_layers: The indices of the conv layers to use for prediction. - - pred_scales: A list with len(selected_layers) containing tuples of scales (see PredictionModule) - - pred_aspect_ratios: A list of lists of aspect ratios with len(selected_layers) (see PredictionModule) - """ - - def __init__(self): - super().__init__() - - self.backbone = construct_backbone(cfg.backbone) #backbone: resnetbackbone. backbone_modules:{list:104}. bn1:{BatchNorm2d} - - if cfg.freeze_bn: # it's true - self.freeze_bn() - - # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early! - if cfg.mask_type == mask_type.direct: - cfg.mask_dim = cfg.mask_size**2 - elif cfg.mask_type == mask_type.lincomb: # the module will execute this branch - if cfg.mask_proto_use_grid: - self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file)) - self.num_grids = self.grid.size(0) - else: # the module will execute this branch - self.num_grids = 0 - - self.proto_src = cfg.mask_proto_src - - if self.proto_src is None: in_channels = 3 - elif cfg.fpn is not None: in_channels = cfg.fpn.num_features - else: in_channels = self.backbone.channels[self.proto_src] - in_channels += self.num_grids - #in_channels will be 256 - # The include_last_relu=false here is because we might want to change it to another function - self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) - - if cfg.mask_proto_bias: - cfg.mask_dim += 1 - - - self.selected_layers = cfg.backbone.selected_layers - src_channels = self.backbone.channels - - if cfg.use_maskiou: #false - self.maskiou_net = FastMaskIoUNet() - - if cfg.fpn is not None: #true - # Some hacky rewiring to accomodate the FPN - self.fpn = FPN([src_channels[i] for i in self.selected_layers]) - self.selected_layers = list(range(len(self.selected_layers) + cfg.fpn.num_downsample)) - src_channels = [cfg.fpn.num_features] * len(self.selected_layers) - - - self.prediction_layers = nn.ModuleList() - cfg.num_heads = len(self.selected_layers) - - for idx, layer_idx in enumerate(self.selected_layers): - # If we're sharing prediction module weights, have every module's parent be the first one - parent = None - if cfg.share_prediction_module and idx > 0: #cfg.share_prediction_module is True - parent = self.prediction_layers[0] - - pred = PredictionModule(src_channels[layer_idx], src_channels[layer_idx], - aspect_ratios = cfg.backbone.pred_aspect_ratios[idx], - scales = cfg.backbone.pred_scales[idx], - parent = parent, - index = idx) - self.prediction_layers.append(pred) - - # Extra parameters for the extra losses - if cfg.use_class_existence_loss: #false - # This comes from the smallest layer selected - # Also note that cfg.num_classes includes background - self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) - - if cfg.use_semantic_segmentation_loss: #true - self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes-1, kernel_size=1) - - # For use in evaluation - self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=cfg.nms_top_k, - conf_thresh=cfg.nms_conf_thresh, nms_thresh=cfg.nms_thresh) - - def save_weights(self, path): - """ Saves the model's weights using compression because the file sizes were getting too big. """ - torch.save(self.state_dict(), path) - - def load_weights(self, path, useCuda = True): - """ Loads weights from a compressed save file. """ - state_dict = torch.load(path) if useCuda else torch.load(path, map_location=torch.device('cpu')) - - # For backward compatability, remove these (the new variable is called layers) - for key in list(state_dict.keys()): - if key.startswith('backbone.layer') and not key.startswith('backbone.layers'): - del state_dict[key] - - # Also for backward compatibility with v1.0 weights, do this check - if key.startswith('fpn.downsample_layers.'): - if cfg.fpn is not None and int(key.split('.')[2]) >= cfg.fpn.num_downsample: - del state_dict[key] - self.load_state_dict(state_dict) - - def init_weights(self, backbone_path): - """ Initialize weights for training. """ - # Initialize the backbone with the pretrained weights. - self.backbone.init_backbone(backbone_path) - - conv_constants = getattr(nn.Conv2d(1, 1, 1), '__constants__') - - # Quick lambda to test if one list contains the other - def all_in(x, y): - for _x in x: - if _x not in y: - return False - return True - - # Initialize the rest of the conv layers with xavier - for name, module in self.named_modules(): - # See issue #127 for why we need such a complicated condition if the module is a WeakScriptModuleProxy - # Broke in 1.3 (see issue #175), WeakScriptModuleProxy was turned into just ScriptModule. - # Broke in 1.4 (see issue #292), where RecursiveScriptModule is the new star of the show. - # Note that this might break with future pytorch updates, so let me know if it does - is_script_conv = False - if 'Script' in type(module).__name__: - # 1.4 workaround: now there's an original_name member so just use that - if hasattr(module, 'original_name'): - is_script_conv = 'Conv' in module.original_name - # 1.3 workaround: check if this has the same constants as a conv module - else: - is_script_conv = ( - all_in(module.__dict__['_constants_set'], conv_constants) - and all_in(conv_constants, module.__dict__['_constants_set'])) - - is_conv_layer = isinstance(module, nn.Conv2d) or is_script_conv - - if is_conv_layer and module not in self.backbone.backbone_modules: - nn.init.xavier_uniform_(module.weight.data) - - if module.bias is not None: - if cfg.use_focal_loss and 'conf_layer' in name: - if not cfg.use_sigmoid_focal_loss: - # Initialize the last layer as in the focal loss paper. - # Because we use softmax and not sigmoid, I had to derive an alternate expression - # on a notecard. Define pi to be the probability of outputting a foreground detection. - # Then let z = sum(exp(x)) - exp(x_0). Finally let c be the number of foreground classes. - # Chugging through the math, this gives us - # x_0 = log(z * (1 - pi) / pi) where 0 is the background class - # x_i = log(z / c) for all i > 0 - # For simplicity (and because we have a degree of freedom here), set z = 1. Then we have - # x_0 = log((1 - pi) / pi) note: don't split up the log for numerical stability - # x_i = -log(c) for all i > 0 - module.bias.data[0] = np.log((1 - cfg.focal_loss_init_pi) / cfg.focal_loss_init_pi) - module.bias.data[1:] = -np.log(module.bias.size(0) - 1) - else: - module.bias.data[0] = -np.log(cfg.focal_loss_init_pi / (1 - cfg.focal_loss_init_pi)) - module.bias.data[1:] = -np.log((1 - cfg.focal_loss_init_pi) / cfg.focal_loss_init_pi) - else: - module.bias.data.zero_() - - def train(self, mode=True): - super().train(mode) - - if cfg.freeze_bn: - self.freeze_bn() - - def freeze_bn(self, enable=False): - """ Adapted from https://discuss.pytorch.org/t/how-to-train-with-frozen-batchnorm/12106/8 """ - for module in self.modules(): - if isinstance(module, nn.BatchNorm2d): - module.train() if enable else module.eval() - - module.weight.requires_grad = enable - module.bias.requires_grad = enable - - def forward(self, x): - """ The input should be of size [batch_size, 3, img_h, img_w] """ - _, _, img_h, img_w = x.size() - cfg._tmp_img_h = img_h - cfg._tmp_img_w = img_w - - with timer.env('backbone'): - outs = self.backbone(x) - - if cfg.fpn is not None: - with timer.env('fpn'): - # Use backbone.selected_layers because we overwrote self.selected_layers - outs = [outs[i] for i in cfg.backbone.selected_layers] - outs = self.fpn(outs) - - proto_out = None - if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch: - with timer.env('proto'): - proto_x = x if self.proto_src is None else outs[self.proto_src] - - if self.num_grids > 0: - grids = self.grid.repeat(proto_x.size(0), 1, 1, 1) - proto_x = torch.cat([proto_x, grids], dim=1) - - proto_out = self.proto_net(proto_x) - proto_out = cfg.mask_proto_prototype_activation(proto_out) - - if cfg.mask_proto_prototypes_as_features: - # Clone here because we don't want to permute this, though idk if contiguous makes this unnecessary - proto_downsampled = proto_out.clone() - - if cfg.mask_proto_prototypes_as_features_no_grad: - proto_downsampled = proto_out.detach() - - # Move the features last so the multiplication is easy - proto_out = proto_out.permute(0, 2, 3, 1).contiguous() - - if cfg.mask_proto_bias: - bias_shape = [x for x in proto_out.size()] - bias_shape[-1] = 1 - proto_out = torch.cat([proto_out, torch.ones(*bias_shape)], -1) - - - with timer.env('pred_heads'): - pred_outs = { 'loc': [], 'conf': [], 'mask': [], 'priors': [] } - - if cfg.use_mask_scoring: - pred_outs['score'] = [] - - if cfg.use_instance_coeff: - pred_outs['inst'] = [] - - for idx, pred_layer in zip(self.selected_layers, self.prediction_layers): - pred_x = outs[idx] - - if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_prototypes_as_features: - # Scale the prototypes down to the current prediction layer's size and add it as inputs - proto_downsampled = F.interpolate(proto_downsampled, size=outs[idx].size()[2:], mode='bilinear', align_corners=False) - pred_x = torch.cat([pred_x, proto_downsampled], dim=1) - - # A hack for the way dataparallel works - if cfg.share_prediction_module and pred_layer is not self.prediction_layers[0]: - pred_layer.parent = [self.prediction_layers[0]] - - p = pred_layer(pred_x) - - for k, v in p.items(): - pred_outs[k].append(v) - - for k, v in pred_outs.items(): - pred_outs[k] = torch.cat(v, -2) - - if proto_out is not None: - pred_outs['proto'] = proto_out - - if self.training: - # For the extra loss functions - if cfg.use_class_existence_loss: - pred_outs['classes'] = self.class_existence_fc(outs[-1].mean(dim=(2, 3))) - - if cfg.use_semantic_segmentation_loss: - pred_outs['segm'] = self.semantic_seg_conv(outs[0]) - - return pred_outs - else: - if cfg.use_mask_scoring: - pred_outs['score'] = torch.sigmoid(pred_outs['score']) - - if cfg.use_focal_loss: - if cfg.use_sigmoid_focal_loss: - # Note: even though conf[0] exists, this mode doesn't train it so don't use it - pred_outs['conf'] = torch.sigmoid(pred_outs['conf']) - if cfg.use_mask_scoring: - pred_outs['conf'] *= pred_outs['score'] - elif cfg.use_objectness_score: - # See focal_loss_sigmoid in multibox_loss.py for details - objectness = torch.sigmoid(pred_outs['conf'][:, :, 0]) - pred_outs['conf'][:, :, 1:] = objectness[:, :, None] * F.softmax(pred_outs['conf'][:, :, 1:], -1) - pred_outs['conf'][:, :, 0 ] = 1 - objectness - else: - pred_outs['conf'] = F.softmax(pred_outs['conf'], -1) - else: - - if cfg.use_objectness_score: - objectness = torch.sigmoid(pred_outs['conf'][:, :, 0]) - - pred_outs['conf'][:, :, 1:] = (objectness > 0.10)[..., None] \ - * F.softmax(pred_outs['conf'][:, :, 1:], dim=-1) - - else: - pred_outs['conf'] = F.softmax(pred_outs['conf'], -1) - - return self.detect(pred_outs) - - - - -# Some testing code -if __name__ == '__main__': - from utils.functions import init_console - init_console() - - # Use the first argument to set the config if you want - import sys - if len(sys.argv) > 1: - from data.config import set_cfg - set_cfg(sys.argv[1]) - - net = Yolact() - net.train() - net.init_weights(backbone_path='weights/' + cfg.backbone.path) - - # GPU - net = net.cuda() - torch.set_default_tensor_type('torch.cuda.FloatTensor') - - x = torch.zeros((1, 3, cfg.max_size, cfg.max_size)) - y = net(x) - - for p in net.prediction_layers: - print(p.last_conv_size) - - print() - for k, a in y.items(): - print(k + ': ', a.size(), torch.sum(a)) - exit() - - net(x) - # timer.disable('pass2') - avg = MovingAverage() - try: - while True: - timer.reset() - with timer.env('everything else'): - net(x) - avg.add(timer.total_time()) - print('\033[2J') # Moves console cursor to 0,0 - timer.print_stats() - print('Avg fps: %.2f\tAvg ms: %.2f ' % (1/avg.get_avg(), avg.get_avg()*1000)) - except KeyboardInterrupt: - pass +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import torch, torchvision +import torch.nn as nn +import torch.nn.functional as F +from torchvision.models.resnet import Bottleneck +import numpy as np +from itertools import product +from math import sqrt +from typing import List +from collections import defaultdict + +from data.config import cfg, mask_type +from layers import Detect +from layers.interpolate import InterpolateModule +from backbone import construct_backbone + +import torch.backends.cudnn as cudnn +from utils import timer +from utils.functions import MovingAverage, make_net + +# This is required for Pytorch 1.0.1 on Windows to initialize Cuda on some driver versions. +# See the bug report here: https://github.com/pytorch/pytorch/issues/17108 + +# As of March 10, 2019, Pytorch DataParallel still doesn't support JIT Script Modules +use_jit = False +if not use_jit: + print('Multiple GPUs detected! Turning off JIT.') + +ScriptModuleWrapper = torch.jit.ScriptModule if use_jit else nn.Module +script_method_wrapper = torch.jit.script_method if use_jit else lambda fn, _rcn=None: fn + + + +class Concat(nn.Module): + def __init__(self, nets, extra_params): + super().__init__() + + self.nets = nn.ModuleList(nets) + self.extra_params = extra_params + + def forward(self, x): + # Concat each along the channel dimension + return torch.cat([net(x) for net in self.nets], dim=1, **self.extra_params) + +prior_cache = defaultdict(lambda: None) + +class PredictionModule(nn.Module): + """ + The (c) prediction module adapted from DSSD: + https://arxiv.org/pdf/1701.06659.pdf + + Note that this is slightly different to the module in the paper + because the Bottleneck block actually has a 3x3 convolution in + the middle instead of a 1x1 convolution. Though, I really can't + be arsed to implement it myself, and, who knows, this might be + better. + + Args: + - in_channels: The input feature size. + - out_channels: The output feature size (must be a multiple of 4). + - aspect_ratios: A list of lists of priorbox aspect ratios (one list per scale). + - scales: A list of priorbox scales relative to this layer's convsize. + For instance: If this layer has convouts of size 30x30 for + an image of size 600x600, the 'default' (scale + of 1) for this layer would produce bounding + boxes with an area of 20x20px. If the scale is + .5 on the other hand, this layer would consider + bounding boxes with area 10x10px, etc. + - parent: If parent is a PredictionModule, this module will use all the layers + from parent instead of from this module. + """ + + def __init__(self, in_channels, out_channels=1024, aspect_ratios=[[1]], scales=[1], parent=None, index=0): + super().__init__() + + self.num_classes = cfg.num_classes + self.mask_dim = cfg.mask_dim # Defined by Yolact + self.num_priors = sum(len(x)*len(scales) for x in aspect_ratios) + self.parent = [parent] # Don't include this in the state dict + self.index = index + self.num_heads = cfg.num_heads # Defined by Yolact + + if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb: + self.mask_dim = self.mask_dim // self.num_heads + + if cfg.mask_proto_prototypes_as_features: + in_channels += self.mask_dim + + if parent is None: + if cfg.extra_head_net is None: + out_channels = in_channels + else: + self.upfeature, out_channels = make_net(in_channels, cfg.extra_head_net) + + if cfg.use_prediction_module: + self.block = Bottleneck(out_channels, out_channels // 4) + self.conv = nn.Conv2d(out_channels, out_channels, kernel_size=1, bias=True) + self.bn = nn.BatchNorm2d(out_channels) + + self.bbox_layer = nn.Conv2d(out_channels, self.num_priors * 4, **cfg.head_layer_params) + self.conf_layer = nn.Conv2d(out_channels, self.num_priors * self.num_classes, **cfg.head_layer_params) + self.mask_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim, **cfg.head_layer_params) + + if cfg.use_mask_scoring: + self.score_layer = nn.Conv2d(out_channels, self.num_priors, **cfg.head_layer_params) + + if cfg.use_instance_coeff: + self.inst_layer = nn.Conv2d(out_channels, self.num_priors * cfg.num_instance_coeffs, **cfg.head_layer_params) + + # What is this ugly lambda doing in the middle of all this clean prediction module code? + def make_extra(num_layers): + if num_layers == 0: + return lambda x: x + else: + # Looks more complicated than it is. This just creates an array of num_layers alternating conv-relu + return nn.Sequential(*sum([[ + nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), + nn.ReLU(inplace=True) + ] for _ in range(num_layers)], [])) + + self.bbox_extra, self.conf_extra, self.mask_extra = [make_extra(x) for x in cfg.extra_layers] + + if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_coeff_gate: + self.gate_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim, kernel_size=3, padding=1) + + self.aspect_ratios = aspect_ratios + self.scales = scales + + self.priors = None + self.last_conv_size = None + self.last_img_size = None + + def forward(self, x): + """ + Args: + - x: The convOut from a layer in the backbone network + Size: [batch_size, in_channels, conv_h, conv_w]) + + Returns a tuple (bbox_coords, class_confs, mask_output, prior_boxes) with sizes + - bbox_coords: [batch_size, conv_h*conv_w*num_priors, 4] + - class_confs: [batch_size, conv_h*conv_w*num_priors, num_classes] + - mask_output: [batch_size, conv_h*conv_w*num_priors, mask_dim] + - prior_boxes: [conv_h*conv_w*num_priors, 4] + """ + # In case we want to use another module's layers + src = self if self.parent[0] is None else self.parent[0] + + conv_h = x.size(2) + conv_w = x.size(3) + + if cfg.extra_head_net is not None: + x = src.upfeature(x) + + if cfg.use_prediction_module: + # The two branches of PM design (c) + a = src.block(x) + + b = src.conv(x) + b = src.bn(b) + b = F.relu(b) + + # TODO: Possibly switch this out for a product + x = a + b + + bbox_x = src.bbox_extra(x) + conf_x = src.conf_extra(x) + mask_x = src.mask_extra(x) + + bbox = src.bbox_layer(bbox_x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, 4) + conf = src.conf_layer(conf_x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.num_classes) + + if cfg.eval_mask_branch: + mask = src.mask_layer(mask_x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.mask_dim) + else: + mask = torch.zeros(x.size(0), bbox.size(1), self.mask_dim, device=bbox.device) + + if cfg.use_mask_scoring: + score = src.score_layer(x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, 1) + + if cfg.use_instance_coeff: + inst = src.inst_layer(x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, cfg.num_instance_coeffs) + + # See box_utils.decode for an explanation of this + if cfg.use_yolo_regressors: + bbox[:, :, :2] = torch.sigmoid(bbox[:, :, :2]) - 0.5 + bbox[:, :, 0] /= conv_w + bbox[:, :, 1] /= conv_h + + if cfg.eval_mask_branch: + if cfg.mask_type == mask_type.direct: + mask = torch.sigmoid(mask) + elif cfg.mask_type == mask_type.lincomb: + mask = cfg.mask_proto_coeff_activation(mask) + + if cfg.mask_proto_coeff_gate: + gate = src.gate_layer(x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.mask_dim) + mask = mask * torch.sigmoid(gate) + + if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb: + mask = F.pad(mask, (self.index * self.mask_dim, (self.num_heads - self.index - 1) * self.mask_dim), mode='constant', value=0) + + priors = self.make_priors(conv_h, conv_w, x.device) + + preds = { 'loc': bbox, 'conf': conf, 'mask': mask, 'priors': priors } + + if cfg.use_mask_scoring: + preds['score'] = score + + if cfg.use_instance_coeff: + preds['inst'] = inst + + return preds + + def make_priors(self, conv_h, conv_w, device): + """ Note that priors are [x,y,width,height] where (x,y) is the center of the box. """ + global prior_cache + size = (conv_h, conv_w) + + with timer.env('makepriors'): + if self.last_img_size != (cfg._tmp_img_w, cfg._tmp_img_h): + prior_data = [] + + # Iteration order is important (it has to sync up with the convout) + for j, i in product(range(conv_h), range(conv_w)): + # +0.5 because priors are in center-size notation + x = (i + 0.5) / conv_w + y = (j + 0.5) / conv_h + + for ars in self.aspect_ratios: + for scale in self.scales: + for ar in ars: + if not cfg.backbone.preapply_sqrt: + ar = sqrt(ar) + + if cfg.backbone.use_pixel_scales: + w = scale * ar / cfg.max_size + h = scale / ar / cfg.max_size + else: + w = scale * ar / conv_w + h = scale / ar / conv_h + + # This is for backward compatability with a bug where I made everything square by accident + if cfg.backbone.use_square_anchors: + h = w + + prior_data += [x, y, w, h] + + self.priors = torch.Tensor(prior_data).to(device).view(-1, 4).detach() + self.priors.requires_grad = False + self.last_img_size = (cfg._tmp_img_w, cfg._tmp_img_h) + self.last_conv_size = (conv_w, conv_h) + prior_cache[size] = None + elif self.priors.device != device: + # This whole weird situation is so that DataParalell doesn't copy the priors each iteration + if prior_cache[size] is None: + prior_cache[size] = {} + + if device not in prior_cache[size]: + prior_cache[size][device] = self.priors.to(device) + + self.priors = prior_cache[size][device] + + return self.priors + +class FPN(ScriptModuleWrapper): + """ + Implements a general version of the FPN introduced in + https://arxiv.org/pdf/1612.03144.pdf + + Parameters (in cfg.fpn): + - num_features (int): The number of output features in the fpn layers. + - interpolation_mode (str): The mode to pass to F.interpolate. + - num_downsample (int): The number of downsampled layers to add onto the selected layers. + These extra layers are downsampled from the last selected layer. + + Args: + - in_channels (list): For each conv layer you supply in the forward pass, + how many features will it have? + """ + __constants__ = ['interpolation_mode', 'num_downsample', 'use_conv_downsample', 'relu_pred_layers', + 'lat_layers', 'pred_layers', 'downsample_layers', 'relu_downsample_layers'] + + def __init__(self, in_channels): + super().__init__() + + self.lat_layers = nn.ModuleList([ + nn.Conv2d(x, cfg.fpn.num_features, kernel_size=1) + for x in reversed(in_channels) + ]) + + # This is here for backwards compatability + padding = 1 if cfg.fpn.pad else 0 + self.pred_layers = nn.ModuleList([ + nn.Conv2d(cfg.fpn.num_features, cfg.fpn.num_features, kernel_size=3, padding=padding) + for _ in in_channels + ]) + + if cfg.fpn.use_conv_downsample: + self.downsample_layers = nn.ModuleList([ + nn.Conv2d(cfg.fpn.num_features, cfg.fpn.num_features, kernel_size=3, padding=1, stride=2) + for _ in range(cfg.fpn.num_downsample) + ]) + + self.interpolation_mode = cfg.fpn.interpolation_mode + self.num_downsample = cfg.fpn.num_downsample + self.use_conv_downsample = cfg.fpn.use_conv_downsample + self.relu_downsample_layers = cfg.fpn.relu_downsample_layers + self.relu_pred_layers = cfg.fpn.relu_pred_layers + + @script_method_wrapper + def forward(self, convouts:List[torch.Tensor]): + """ + Args: + - convouts (list): A list of convouts for the corresponding layers in in_channels. + Returns: + - A list of FPN convouts in the same order as x with extra downsample layers if requested. + """ + + out = [] + x = torch.zeros(1, device=convouts[0].device) + for i in range(len(convouts)): + out.append(x) + + # For backward compatability, the conv layers are stored in reverse but the input and output is + # given in the correct order. Thus, use j=-i-1 for the input and output and i for the conv layers. + j = len(convouts) + for lat_layer in self.lat_layers: + j -= 1 + + if j < len(convouts) - 1: + _, _, h, w = convouts[j].size() + x = F.interpolate(x, size=(h, w), mode=self.interpolation_mode, align_corners=False) + + x = x + lat_layer(convouts[j]) + out[j] = x + + # This janky second loop is here because TorchScript. + j = len(convouts) + for pred_layer in self.pred_layers: + j -= 1 + out[j] = pred_layer(out[j]) + + if self.relu_pred_layers: + F.relu(out[j], inplace=True) + + cur_idx = len(out) + + # In the original paper, this takes care of P6 + if self.use_conv_downsample: + for downsample_layer in self.downsample_layers: + out.append(downsample_layer(out[-1])) + else: + for idx in range(self.num_downsample): + # Note: this is an untested alternative to out.append(out[-1][:, :, ::2, ::2]). Thanks TorchScript. + out.append(nn.functional.max_pool2d(out[-1], 1, stride=2)) + + if self.relu_downsample_layers: + for idx in range(len(out) - cur_idx): + out[idx] = F.relu(out[idx + cur_idx], inplace=False) + + return out + +class FastMaskIoUNet(ScriptModuleWrapper): + + def __init__(self): + super().__init__() + input_channels = 1 + last_layer = [(cfg.num_classes-1, 1, {})] + self.maskiou_net, _ = make_net(input_channels, cfg.maskiou_net + last_layer, include_last_relu=True) + + def forward(self, x): + x = self.maskiou_net(x) + maskiou_p = F.max_pool2d(x, kernel_size=x.size()[2:]).squeeze(-1).squeeze(-1) + + return maskiou_p + + + +class Yolact(nn.Module): + """ + + + ██╗ ██╗ ██████╗ ██╗ █████╗ ██████╗████████╗ + ╚██╗ ██╔╝██╔═══██╗██║ ██╔══██╗██╔════╝╚══██╔══╝ + ╚████╔╝ ██║ ██║██║ ███████║██║ ██║ + ╚██╔╝ ██║ ██║██║ ██╔══██║██║ ██║ + ██║ ╚██████╔╝███████╗██║ ██║╚██████╗ ██║ + ╚═╝ ╚═════╝ ╚══════╝╚═╝ ╚═╝ ╚═════╝ ╚═╝ + + + You can set the arguments by changing them in the backbone config object in config.py. + + Parameters (in cfg.backbone): + - selected_layers: The indices of the conv layers to use for prediction. + - pred_scales: A list with len(selected_layers) containing tuples of scales (see PredictionModule) + - pred_aspect_ratios: A list of lists of aspect ratios with len(selected_layers) (see PredictionModule) + """ + + def __init__(self): + super().__init__() + + self.backbone = construct_backbone(cfg.backbone) #backbone: resnetbackbone. backbone_modules:{list:104}. bn1:{BatchNorm2d} + + if cfg.freeze_bn: # it's true + self.freeze_bn() + + # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early! + if cfg.mask_type == mask_type.direct: + cfg.mask_dim = cfg.mask_size**2 + elif cfg.mask_type == mask_type.lincomb: # the module will execute this branch + if cfg.mask_proto_use_grid: + self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file)) + self.num_grids = self.grid.size(0) + else: # the module will execute this branch + self.num_grids = 0 + + self.proto_src = cfg.mask_proto_src + + if self.proto_src is None: in_channels = 3 + elif cfg.fpn is not None: in_channels = cfg.fpn.num_features + else: in_channels = self.backbone.channels[self.proto_src] + in_channels += self.num_grids + #in_channels will be 256 + # The include_last_relu=false here is because we might want to change it to another function + self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) + + if cfg.mask_proto_bias: + cfg.mask_dim += 1 + + + self.selected_layers = cfg.backbone.selected_layers + src_channels = self.backbone.channels + + if cfg.use_maskiou: #false + self.maskiou_net = FastMaskIoUNet() + + if cfg.fpn is not None: #true + # Some hacky rewiring to accomodate the FPN + self.fpn = FPN([src_channels[i] for i in self.selected_layers]) + self.selected_layers = list(range(len(self.selected_layers) + cfg.fpn.num_downsample)) + src_channels = [cfg.fpn.num_features] * len(self.selected_layers) + + + self.prediction_layers = nn.ModuleList() + cfg.num_heads = len(self.selected_layers) + + for idx, layer_idx in enumerate(self.selected_layers): + # If we're sharing prediction module weights, have every module's parent be the first one + parent = None + if cfg.share_prediction_module and idx > 0: #cfg.share_prediction_module is True + parent = self.prediction_layers[0] + + pred = PredictionModule(src_channels[layer_idx], src_channels[layer_idx], + aspect_ratios = cfg.backbone.pred_aspect_ratios[idx], + scales = cfg.backbone.pred_scales[idx], + parent = parent, + index = idx) + self.prediction_layers.append(pred) + + # Extra parameters for the extra losses + if cfg.use_class_existence_loss: #false + # This comes from the smallest layer selected + # Also note that cfg.num_classes includes background + self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) + + if cfg.use_semantic_segmentation_loss: #true + self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes-1, kernel_size=1) + + # For use in evaluation + self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=cfg.nms_top_k, + conf_thresh=cfg.nms_conf_thresh, nms_thresh=cfg.nms_thresh) + + def save_weights(self, path): + """ Saves the model's weights using compression because the file sizes were getting too big. """ + torch.save(self.state_dict(), path) + + def load_weights(self, path, useCuda = True): + """ Loads weights from a compressed save file. """ + state_dict = torch.load(path) if useCuda else torch.load(path, map_location=torch.device('cpu')) + + # For backward compatability, remove these (the new variable is called layers) + for key in list(state_dict.keys()): + if key.startswith('backbone.layer') and not key.startswith('backbone.layers'): + del state_dict[key] + + # Also for backward compatibility with v1.0 weights, do this check + if key.startswith('fpn.downsample_layers.'): + if cfg.fpn is not None and int(key.split('.')[2]) >= cfg.fpn.num_downsample: + del state_dict[key] + self.load_state_dict(state_dict) + + def init_weights(self, backbone_path): + """ Initialize weights for training. """ + # Initialize the backbone with the pretrained weights. + self.backbone.init_backbone(backbone_path) + + conv_constants = getattr(nn.Conv2d(1, 1, 1), '__constants__') + + # Quick lambda to test if one list contains the other + def all_in(x, y): + for _x in x: + if _x not in y: + return False + return True + + # Initialize the rest of the conv layers with xavier + for name, module in self.named_modules(): + # See issue #127 for why we need such a complicated condition if the module is a WeakScriptModuleProxy + # Broke in 1.3 (see issue #175), WeakScriptModuleProxy was turned into just ScriptModule. + # Broke in 1.4 (see issue #292), where RecursiveScriptModule is the new star of the show. + # Note that this might break with future pytorch updates, so let me know if it does + is_script_conv = False + if 'Script' in type(module).__name__: + # 1.4 workaround: now there's an original_name member so just use that + if hasattr(module, 'original_name'): + is_script_conv = 'Conv' in module.original_name + # 1.3 workaround: check if this has the same constants as a conv module + else: + is_script_conv = ( + all_in(module.__dict__['_constants_set'], conv_constants) + and all_in(conv_constants, module.__dict__['_constants_set'])) + + is_conv_layer = isinstance(module, nn.Conv2d) or is_script_conv + + if is_conv_layer and module not in self.backbone.backbone_modules: + nn.init.xavier_uniform_(module.weight.data) + + if module.bias is not None: + if cfg.use_focal_loss and 'conf_layer' in name: + if not cfg.use_sigmoid_focal_loss: + # Initialize the last layer as in the focal loss paper. + # Because we use softmax and not sigmoid, I had to derive an alternate expression + # on a notecard. Define pi to be the probability of outputting a foreground detection. + # Then let z = sum(exp(x)) - exp(x_0). Finally let c be the number of foreground classes. + # Chugging through the math, this gives us + # x_0 = log(z * (1 - pi) / pi) where 0 is the background class + # x_i = log(z / c) for all i > 0 + # For simplicity (and because we have a degree of freedom here), set z = 1. Then we have + # x_0 = log((1 - pi) / pi) note: don't split up the log for numerical stability + # x_i = -log(c) for all i > 0 + module.bias.data[0] = np.log((1 - cfg.focal_loss_init_pi) / cfg.focal_loss_init_pi) + module.bias.data[1:] = -np.log(module.bias.size(0) - 1) + else: + module.bias.data[0] = -np.log(cfg.focal_loss_init_pi / (1 - cfg.focal_loss_init_pi)) + module.bias.data[1:] = -np.log((1 - cfg.focal_loss_init_pi) / cfg.focal_loss_init_pi) + else: + module.bias.data.zero_() + + def train(self, mode=True): + super().train(mode) + + if cfg.freeze_bn: + self.freeze_bn() + + def freeze_bn(self, enable=False): + """ Adapted from https://discuss.pytorch.org/t/how-to-train-with-frozen-batchnorm/12106/8 """ + for module in self.modules(): + if isinstance(module, nn.BatchNorm2d): + module.train() if enable else module.eval() + + module.weight.requires_grad = enable + module.bias.requires_grad = enable + + def forward(self, x): + """ The input should be of size [batch_size, 3, img_h, img_w] """ + _, _, img_h, img_w = x.size() + cfg._tmp_img_h = img_h + cfg._tmp_img_w = img_w + + with timer.env('backbone'): + outs = self.backbone(x) + + if cfg.fpn is not None: + with timer.env('fpn'): + # Use backbone.selected_layers because we overwrote self.selected_layers + outs = [outs[i] for i in cfg.backbone.selected_layers] + outs = self.fpn(outs) + + proto_out = None + if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch: + with timer.env('proto'): + proto_x = x if self.proto_src is None else outs[self.proto_src] + + if self.num_grids > 0: + grids = self.grid.repeat(proto_x.size(0), 1, 1, 1) + proto_x = torch.cat([proto_x, grids], dim=1) + + proto_out = self.proto_net(proto_x) + proto_out = cfg.mask_proto_prototype_activation(proto_out) + + if cfg.mask_proto_prototypes_as_features: + # Clone here because we don't want to permute this, though idk if contiguous makes this unnecessary + proto_downsampled = proto_out.clone() + + if cfg.mask_proto_prototypes_as_features_no_grad: + proto_downsampled = proto_out.detach() + + # Move the features last so the multiplication is easy + proto_out = proto_out.permute(0, 2, 3, 1).contiguous() + + if cfg.mask_proto_bias: + bias_shape = [x for x in proto_out.size()] + bias_shape[-1] = 1 + proto_out = torch.cat([proto_out, torch.ones(*bias_shape)], -1) + + + with timer.env('pred_heads'): + pred_outs = { 'loc': [], 'conf': [], 'mask': [], 'priors': [] } + + if cfg.use_mask_scoring: + pred_outs['score'] = [] + + if cfg.use_instance_coeff: + pred_outs['inst'] = [] + + for idx, pred_layer in zip(self.selected_layers, self.prediction_layers): + pred_x = outs[idx] + + if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_prototypes_as_features: + # Scale the prototypes down to the current prediction layer's size and add it as inputs + proto_downsampled = F.interpolate(proto_downsampled, size=outs[idx].size()[2:], mode='bilinear', align_corners=False) + pred_x = torch.cat([pred_x, proto_downsampled], dim=1) + + # A hack for the way dataparallel works + if cfg.share_prediction_module and pred_layer is not self.prediction_layers[0]: + pred_layer.parent = [self.prediction_layers[0]] + + p = pred_layer(pred_x) + + for k, v in p.items(): + pred_outs[k].append(v) + + for k, v in pred_outs.items(): + pred_outs[k] = torch.cat(v, -2) + + if proto_out is not None: + pred_outs['proto'] = proto_out + + if self.training: + # For the extra loss functions + if cfg.use_class_existence_loss: + pred_outs['classes'] = self.class_existence_fc(outs[-1].mean(dim=(2, 3))) + + if cfg.use_semantic_segmentation_loss: + pred_outs['segm'] = self.semantic_seg_conv(outs[0]) + + return pred_outs + else: + if cfg.use_mask_scoring: + pred_outs['score'] = torch.sigmoid(pred_outs['score']) + + if cfg.use_focal_loss: + if cfg.use_sigmoid_focal_loss: + # Note: even though conf[0] exists, this mode doesn't train it so don't use it + pred_outs['conf'] = torch.sigmoid(pred_outs['conf']) + if cfg.use_mask_scoring: + pred_outs['conf'] *= pred_outs['score'] + elif cfg.use_objectness_score: + # See focal_loss_sigmoid in multibox_loss.py for details + objectness = torch.sigmoid(pred_outs['conf'][:, :, 0]) + pred_outs['conf'][:, :, 1:] = objectness[:, :, None] * F.softmax(pred_outs['conf'][:, :, 1:], -1) + pred_outs['conf'][:, :, 0 ] = 1 - objectness + else: + pred_outs['conf'] = F.softmax(pred_outs['conf'], -1) + else: + + if cfg.use_objectness_score: + objectness = torch.sigmoid(pred_outs['conf'][:, :, 0]) + + pred_outs['conf'][:, :, 1:] = (objectness > 0.10)[..., None] \ + * F.softmax(pred_outs['conf'][:, :, 1:], dim=-1) + + else: + pred_outs['conf'] = F.softmax(pred_outs['conf'], -1) + + return self.detect(pred_outs) + + + + +# Some testing code +if __name__ == '__main__': + from utils.functions import init_console + init_console() + + # Use the first argument to set the config if you want + import sys + if len(sys.argv) > 1: + from data.config import set_cfg + set_cfg(sys.argv[1]) + + net = Yolact() + net.train() + net.init_weights(backbone_path='weights/' + cfg.backbone.path) + + # GPU + net = net.cuda() + torch.set_default_tensor_type('torch.cuda.FloatTensor') + + x = torch.zeros((1, 3, cfg.max_size, cfg.max_size)) + y = net(x) + + for p in net.prediction_layers: + print(p.last_conv_size) + + print() + for k, a in y.items(): + print(k + ': ', a.size(), torch.sum(a)) + exit() + + net(x) + # timer.disable('pass2') + avg = MovingAverage() + try: + while True: + timer.reset() + with timer.env('everything else'): + net(x) + avg.add(timer.total_time()) + print('\033[2J') # Moves console cursor to 0,0 + timer.print_stats() + print('Avg fps: %.2f\tAvg ms: %.2f ' % (1/avg.get_avg(), avg.get_avg()*1000)) + except KeyboardInterrupt: + pass diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/LICENSE b/PyTorch/contrib/cv/detection/YOLACT_plus/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/LICENSE +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/README.md b/PyTorch/contrib/cv/detection/YOLACT_plus/README.md index a6ecf3f43000245a3f07a14ae65388f4fcffe1a8..6bf6a4c468cd5e008e774db6bf399fb4c5e21853 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/README.md +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/README.md @@ -1,49 +1,49 @@ -# YOLACT++ - -This implements training of Yolact++ on the COCO2017 dataset, mainly modified from [yolact++](https://github.com/dbolya/yolact). - -## YOLACT++ Detail - -## Requirements - -- Install PyTorch ([pytorch.org](http://pytorch.org)) -- `pip install -r requirements.txt` -- Download the dataset by -- `sh data/scripts/COCO.sh` -- Download an imagenet-pretrained model and put it in ./weights. - - For Resnet101, download resnet101_reducedfc.pth. - - For Resnet50, download resnet50-19c8e357.pth. - - For Darknet53, download darknet53.pth. - -## Training - -To train a model, run `train.py` with the desired model architecture and the path to dataset: - -```bash -# training 1p accuracy -bash ./test/train_full_1p.sh --data_path=data_path - -# training 1p performance -bash ./test/train_performance_1p.sh --data_path=data_path - -# training 8p accuracy -bash ./test/train_full_8p.sh --data_path=data_path - -# training 8p performance -bash ./test/train_performance_8p.sh --data_path=data_path - -#test 8p accuracy -bash test/train_eval_1p.sh --$pth_path=pth_path -``` - -Log path: - ${YOLACT_ROOT}/test/output/0/train_0.log # training detail log - - - -## Yolact++ training result - -| 名称 | 精度 | FPS | AMP_Type | -| :------: | :------: | :------: | :------: | -| NPU-1p | - | 3.153 | O0 | -| NPU-8p | 33.49 | 14.677 | O0 | +# YOLACT++ + +This implements training of Yolact++ on the COCO2017 dataset, mainly modified from [yolact++](https://github.com/dbolya/yolact). + +## YOLACT++ Detail + +## Requirements + +- Install PyTorch ([pytorch.org](http://pytorch.org)) +- `pip install -r requirements.txt` +- Download the dataset by +- `sh data/scripts/COCO.sh` +- Download an imagenet-pretrained model and put it in ./weights. + - For Resnet101, download resnet101_reducedfc.pth. + - For Resnet50, download resnet50-19c8e357.pth. + - For Darknet53, download darknet53.pth. + +## Training + +To train a model, run `train.py` with the desired model architecture and the path to dataset: + +```bash +# training 1p accuracy +bash ./test/train_full_1p.sh --data_path=data_path + +# training 1p performance +bash ./test/train_performance_1p.sh --data_path=data_path + +# training 8p accuracy +bash ./test/train_full_8p.sh --data_path=data_path + +# training 8p performance +bash ./test/train_performance_8p.sh --data_path=data_path + +#test 8p accuracy +bash test/train_eval_1p.sh --$pth_path=pth_path +``` + +Log path: + ${YOLACT_ROOT}/test/output/0/train_0.log # training detail log + + + +## Yolact++ training result + +| 名称 | 精度 | FPS | AMP_Type | +| :------: | :------: | :------: | :------: | +| NPU-1p | - | 3.153 | O0 | +| NPU-8p | 33.49 | 14.677 | O0 | diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/backbone.py b/PyTorch/contrib/cv/detection/YOLACT_plus/backbone.py index a3cf652028f1ed08c925fd68dc0fc8fef5bec9d5..15da87a98a63fb68bdeae2a261192a9277059ca4 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/backbone.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/backbone.py @@ -1,474 +1,474 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import torch -import torch.nn as nn -import pickle - -from collections import OrderedDict - -try: - from deform_conv import DCNv2 as DCN -except ImportError: - def DCN(*args, **kwdargs): - raise Exception('DCN could not be imported. If you want to use YOLACT++ models, compile DCN. Check the README for instructions.') - -class Bottleneck(nn.Module): - """ Adapted from torchvision.models.resnet """ - expansion = 4 - - def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=nn.BatchNorm2d, dilation=1, use_dcn=False): - super(Bottleneck, self).__init__() - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False, dilation=dilation) - self.bn1 = norm_layer(planes) - if use_dcn: - self.conv2 = DCN(planes, planes, kernel_size=3, stride=stride, - padding=dilation, dilation=dilation, deformable_groups=1) - self.conv2.bias.data.zero_() - self.conv2.conv_offset_mask.weight.data.zero_() - self.conv2.conv_offset_mask.bias.data.zero_() - else: - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, - padding=dilation, bias=False, dilation=dilation) - self.bn2 = norm_layer(planes) - self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False, dilation=dilation) - self.bn3 = norm_layer(planes * 4) - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class ResNetBackbone(nn.Module): - """ Adapted from torchvision.models.resnet """ - - def __init__(self, layers, dcn_layers=[0, 0, 0, 0], dcn_interval=1, atrous_layers=[], block=Bottleneck, norm_layer=nn.BatchNorm2d): - super().__init__() - - # These will be populated by _make_layer - self.num_base_layers = len(layers) - self.layers = nn.ModuleList() - self.channels = [] - self.norm_layer = norm_layer - self.dilation = 1 - self.atrous_layers = atrous_layers - - # From torchvision.models.resnet.Resnet - self.inplanes = 64 - - self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) - self.bn1 = norm_layer(64) - self.relu = nn.ReLU(inplace=True) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - - self._make_layer(block, 64, layers[0], dcn_layers=dcn_layers[0], dcn_interval=dcn_interval) - self._make_layer(block, 128, layers[1], stride=2, dcn_layers=dcn_layers[1], dcn_interval=dcn_interval) - self._make_layer(block, 256, layers[2], stride=2, dcn_layers=dcn_layers[2], dcn_interval=dcn_interval) - self._make_layer(block, 512, layers[3], stride=2, dcn_layers=dcn_layers[3], dcn_interval=dcn_interval) - - # This contains every module that should be initialized by loading in pretrained weights. - # Any extra layers added onto this that won't be initialized by init_backbone will not be - # in this list. That way, Yolact::init_weights knows which backbone weights to initialize - # with xavier, and which ones to leave alone. - self.backbone_modules = [m for m in self.modules() if isinstance(m, nn.Conv2d)] - - - def _make_layer(self, block, planes, blocks, stride=1, dcn_layers=0, dcn_interval=1): - """ Here one layer means a string of n Bottleneck blocks. """ - downsample = None - - # This is actually just to create the connection between layers, and not necessarily to - # downsample. Even if the second condition is met, it only downsamples when stride != 1 - if stride != 1 or self.inplanes != planes * block.expansion: - if len(self.layers) in self.atrous_layers: - self.dilation += 1 - stride = 1 - - downsample = nn.Sequential( - nn.Conv2d(self.inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias=False, - dilation=self.dilation), - self.norm_layer(planes * block.expansion), - ) - - layers = [] - use_dcn = (dcn_layers >= blocks) - layers.append(block(self.inplanes, planes, stride, downsample, self.norm_layer, self.dilation, use_dcn=use_dcn)) - self.inplanes = planes * block.expansion - for i in range(1, blocks): - use_dcn = ((i+dcn_layers) >= blocks) and (i % dcn_interval == 0) - layers.append(block(self.inplanes, planes, norm_layer=self.norm_layer, use_dcn=use_dcn)) - layer = nn.Sequential(*layers) - - self.channels.append(planes * block.expansion) - self.layers.append(layer) - - return layer - - def forward(self, x): - """ Returns a list of convouts for each layer. """ - - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - x = self.maxpool(x) - - outs = [] - for layer in self.layers: - x = layer(x) - outs.append(x) - - return tuple(outs) - - def init_backbone(self, path): - """ Initializes the backbone weights for training. """ - state_dict = torch.load(path) - - # Replace layer1 -> layers.0 etc. - keys = list(state_dict) - for key in keys: - if key.startswith('layer'): - idx = int(key[5]) - new_key = 'layers.' + str(idx-1) + key[6:] - state_dict[new_key] = state_dict.pop(key) - - # Note: Using strict=False is berry scary. Triple check this. - self.load_state_dict(state_dict, strict=False) - - def add_layer(self, conv_channels=1024, downsample=2, depth=1, block=Bottleneck): - """ Add a downsample layer to the backbone as per what SSD does. """ - self._make_layer(block, conv_channels // block.expansion, blocks=depth, stride=downsample) - - - - -class ResNetBackboneGN(ResNetBackbone): - - def __init__(self, layers, num_groups=32): - super().__init__(layers, norm_layer=lambda x: nn.GroupNorm(num_groups, x)) - - def init_backbone(self, path): - """ The path here comes from detectron. So we load it differently. """ - with open(path, 'rb') as f: - state_dict = pickle.load(f, encoding='latin1') # From the detectron source - state_dict = state_dict['blobs'] - - our_state_dict_keys = list(self.state_dict().keys()) - new_state_dict = {} - - gn_trans = lambda x: ('gn_s' if x == 'weight' else 'gn_b') - layeridx2res = lambda x: 'res' + str(int(x)+2) - block2branch = lambda x: 'branch2' + ('a', 'b', 'c')[int(x[-1:])-1] - - # Transcribe each Detectron weights name to a Yolact weights name - for key in our_state_dict_keys: - parts = key.split('.') - transcribed_key = '' - - if (parts[0] == 'conv1'): - transcribed_key = 'conv1_w' - elif (parts[0] == 'bn1'): - transcribed_key = 'conv1_' + gn_trans(parts[1]) - elif (parts[0] == 'layers'): - if int(parts[1]) >= self.num_base_layers: continue - - transcribed_key = layeridx2res(parts[1]) - transcribed_key += '_' + parts[2] + '_' - - if parts[3] == 'downsample': - transcribed_key += 'branch1_' - - if parts[4] == '0': - transcribed_key += 'w' - else: - transcribed_key += gn_trans(parts[5]) - else: - transcribed_key += block2branch(parts[3]) + '_' - - if 'conv' in parts[3]: - transcribed_key += 'w' - else: - transcribed_key += gn_trans(parts[4]) - - new_state_dict[key] = torch.Tensor(state_dict[transcribed_key]) - - # strict=False because we may have extra unitialized layers at this point - self.load_state_dict(new_state_dict, strict=False) - - - - - - - -def darknetconvlayer(in_channels, out_channels, *args, **kwdargs): - """ - Implements a conv, activation, then batch norm. - Arguments are passed into the conv layer. - """ - return nn.Sequential( - nn.Conv2d(in_channels, out_channels, *args, **kwdargs, bias=False), - nn.BatchNorm2d(out_channels), - # Darknet uses 0.1 here. - # See https://github.com/pjreddie/darknet/blob/680d3bde1924c8ee2d1c1dea54d3e56a05ca9a26/src/activations.h#L39 - nn.LeakyReLU(0.1, inplace=True) - ) - -class DarkNetBlock(nn.Module): - """ Note: channels is the lesser of the two. The output will be expansion * channels. """ - - expansion = 2 - - def __init__(self, in_channels, channels): - super().__init__() - - self.conv1 = darknetconvlayer(in_channels, channels, kernel_size=1) - self.conv2 = darknetconvlayer(channels, channels * self.expansion, kernel_size=3, padding=1) - - def forward(self, x): - return self.conv2(self.conv1(x)) + x - - - - -class DarkNetBackbone(nn.Module): - """ - An implementation of YOLOv3's Darnet53 in - https://pjreddie.com/media/files/papers/YOLOv3.pdf - - This is based off of the implementation of Resnet above. - """ - - def __init__(self, layers=[1, 2, 8, 8, 4], block=DarkNetBlock): - super().__init__() - - # These will be populated by _make_layer - self.num_base_layers = len(layers) - self.layers = nn.ModuleList() - self.channels = [] - - self._preconv = darknetconvlayer(3, 32, kernel_size=3, padding=1) - self.in_channels = 32 - - self._make_layer(block, 32, layers[0]) - self._make_layer(block, 64, layers[1]) - self._make_layer(block, 128, layers[2]) - self._make_layer(block, 256, layers[3]) - self._make_layer(block, 512, layers[4]) - - # This contains every module that should be initialized by loading in pretrained weights. - # Any extra layers added onto this that won't be initialized by init_backbone will not be - # in this list. That way, Yolact::init_weights knows which backbone weights to initialize - # with xavier, and which ones to leave alone. - self.backbone_modules = [m for m in self.modules() if isinstance(m, nn.Conv2d)] - - def _make_layer(self, block, channels, num_blocks, stride=2): - """ Here one layer means a string of n blocks. """ - layer_list = [] - - # The downsample layer - layer_list.append( - darknetconvlayer(self.in_channels, channels * block.expansion, - kernel_size=3, padding=1, stride=stride)) - - # Each block inputs channels and outputs channels * expansion - self.in_channels = channels * block.expansion - layer_list += [block(self.in_channels, channels) for _ in range(num_blocks)] - - self.channels.append(self.in_channels) - self.layers.append(nn.Sequential(*layer_list)) - - def forward(self, x): - """ Returns a list of convouts for each layer. """ - - x = self._preconv(x) - - outs = [] - for layer in self.layers: - x = layer(x) - outs.append(x) - - return tuple(outs) - - def add_layer(self, conv_channels=1024, stride=2, depth=1, block=DarkNetBlock): - """ Add a downsample layer to the backbone as per what SSD does. """ - self._make_layer(block, conv_channels // block.expansion, num_blocks=depth, stride=stride) - - def init_backbone(self, path): - """ Initializes the backbone weights for training. """ - # Note: Using strict=False is berry scary. Triple check this. - self.load_state_dict(torch.load(path), strict=False) - - - - - -class VGGBackbone(nn.Module): - """ - Args: - - cfg: A list of layers given as lists. Layers can be either 'M' signifying - a max pooling layer, a number signifying that many feature maps in - a conv layer, or a tuple of 'M' or a number and a kwdargs dict to pass - into the function that creates the layer (e.g. nn.MaxPool2d for 'M'). - - extra_args: A list of lists of arguments to pass into add_layer. - - norm_layers: Layers indices that need to pass through an l2norm layer. - """ - - def __init__(self, cfg, extra_args=[], norm_layers=[]): - super().__init__() - - self.channels = [] - self.layers = nn.ModuleList() - self.in_channels = 3 - self.extra_args = list(reversed(extra_args)) # So I can use it as a stack - - # Keeps track of what the corresponding key will be in the state dict of the - # pretrained model. For instance, layers.0.2 for us is 2 for the pretrained - # model but layers.1.1 is 5. - self.total_layer_count = 0 - self.state_dict_lookup = {} - - for idx, layer_cfg in enumerate(cfg): - self._make_layer(layer_cfg) - - self.norms = nn.ModuleList([nn.BatchNorm2d(self.channels[l]) for l in norm_layers]) - self.norm_lookup = {l: idx for idx, l in enumerate(norm_layers)} - - # These modules will be initialized by init_backbone, - # so don't overwrite their initialization later. - self.backbone_modules = [m for m in self.modules() if isinstance(m, nn.Conv2d)] - - def _make_layer(self, cfg): - """ - Each layer is a sequence of conv layers usually preceded by a max pooling. - Adapted from torchvision.models.vgg.make_layers. - """ - - layers = [] - - for v in cfg: - # VGG in SSD requires some special layers, so allow layers to be tuples of - # (, kwdargs dict) - args = None - if isinstance(v, tuple): - args = v[1] - v = v[0] - - # v should be either M or a number - if v == 'M': - # Set default arguments - if args is None: - args = {'kernel_size': 2, 'stride': 2} - - layers.append(nn.MaxPool2d(**args)) - else: - # See the comment in __init__ for an explanation of this - cur_layer_idx = self.total_layer_count + len(layers) - self.state_dict_lookup[cur_layer_idx] = '%d.%d' % (len(self.layers), len(layers)) - - # Set default arguments - if args is None: - args = {'kernel_size': 3, 'padding': 1} - - # Add the layers - layers.append(nn.Conv2d(self.in_channels, v, **args)) - layers.append(nn.ReLU(inplace=True)) - self.in_channels = v - - self.total_layer_count += len(layers) - self.channels.append(self.in_channels) - self.layers.append(nn.Sequential(*layers)) - - def forward(self, x): - """ Returns a list of convouts for each layer. """ - outs = [] - - for idx, layer in enumerate(self.layers): - x = layer(x) - - # Apply an l2norm module to the selected layers - # Note that this differs from the original implemenetation - if idx in self.norm_lookup: - x = self.norms[self.norm_lookup[idx]](x) - outs.append(x) - - return tuple(outs) - - def transform_key(self, k): - """ Transform e.g. features.24.bias to layers.4.1.bias """ - vals = k.split('.') - layerIdx = self.state_dict_lookup[int(vals[0])] - return 'layers.%s.%s' % (layerIdx, vals[1]) - - def init_backbone(self, path): - """ Initializes the backbone weights for training. """ - state_dict = torch.load(path) - state_dict = OrderedDict([(self.transform_key(k), v) for k,v in state_dict.items()]) - - self.load_state_dict(state_dict, strict=False) - - def add_layer(self, conv_channels=128, downsample=2): - """ Add a downsample layer to the backbone as per what SSD does. """ - if len(self.extra_args) > 0: - conv_channels, downsample = self.extra_args.pop() - - padding = 1 if downsample > 1 else 0 - - layer = nn.Sequential( - nn.Conv2d(self.in_channels, conv_channels, kernel_size=1), - nn.ReLU(inplace=True), - nn.Conv2d(conv_channels, conv_channels*2, kernel_size=3, stride=downsample, padding=padding), - nn.ReLU(inplace=True) - ) - - self.in_channels = conv_channels*2 - self.channels.append(self.in_channels) - self.layers.append(layer) - - - - -def construct_backbone(cfg): - """ Constructs a backbone given a backbone config object (see config.py). """ - backbone = cfg.type(*cfg.args) - - # Add downsampling layers until we reach the number we need - num_layers = max(cfg.selected_layers) + 1 - - while len(backbone.layers) < num_layers: - backbone.add_layer() - - return backbone +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch +import torch.nn as nn +import pickle + +from collections import OrderedDict + +try: + from deform_conv import DCNv2 as DCN +except ImportError: + def DCN(*args, **kwdargs): + raise Exception('DCN could not be imported. If you want to use YOLACT++ models, compile DCN. Check the README for instructions.') + +class Bottleneck(nn.Module): + """ Adapted from torchvision.models.resnet """ + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=nn.BatchNorm2d, dilation=1, use_dcn=False): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False, dilation=dilation) + self.bn1 = norm_layer(planes) + if use_dcn: + self.conv2 = DCN(planes, planes, kernel_size=3, stride=stride, + padding=dilation, dilation=dilation, deformable_groups=1) + self.conv2.bias.data.zero_() + self.conv2.conv_offset_mask.weight.data.zero_() + self.conv2.conv_offset_mask.bias.data.zero_() + else: + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=dilation, bias=False, dilation=dilation) + self.bn2 = norm_layer(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False, dilation=dilation) + self.bn3 = norm_layer(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNetBackbone(nn.Module): + """ Adapted from torchvision.models.resnet """ + + def __init__(self, layers, dcn_layers=[0, 0, 0, 0], dcn_interval=1, atrous_layers=[], block=Bottleneck, norm_layer=nn.BatchNorm2d): + super().__init__() + + # These will be populated by _make_layer + self.num_base_layers = len(layers) + self.layers = nn.ModuleList() + self.channels = [] + self.norm_layer = norm_layer + self.dilation = 1 + self.atrous_layers = atrous_layers + + # From torchvision.models.resnet.Resnet + self.inplanes = 64 + + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) + self.bn1 = norm_layer(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + self._make_layer(block, 64, layers[0], dcn_layers=dcn_layers[0], dcn_interval=dcn_interval) + self._make_layer(block, 128, layers[1], stride=2, dcn_layers=dcn_layers[1], dcn_interval=dcn_interval) + self._make_layer(block, 256, layers[2], stride=2, dcn_layers=dcn_layers[2], dcn_interval=dcn_interval) + self._make_layer(block, 512, layers[3], stride=2, dcn_layers=dcn_layers[3], dcn_interval=dcn_interval) + + # This contains every module that should be initialized by loading in pretrained weights. + # Any extra layers added onto this that won't be initialized by init_backbone will not be + # in this list. That way, Yolact::init_weights knows which backbone weights to initialize + # with xavier, and which ones to leave alone. + self.backbone_modules = [m for m in self.modules() if isinstance(m, nn.Conv2d)] + + + def _make_layer(self, block, planes, blocks, stride=1, dcn_layers=0, dcn_interval=1): + """ Here one layer means a string of n Bottleneck blocks. """ + downsample = None + + # This is actually just to create the connection between layers, and not necessarily to + # downsample. Even if the second condition is met, it only downsamples when stride != 1 + if stride != 1 or self.inplanes != planes * block.expansion: + if len(self.layers) in self.atrous_layers: + self.dilation += 1 + stride = 1 + + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False, + dilation=self.dilation), + self.norm_layer(planes * block.expansion), + ) + + layers = [] + use_dcn = (dcn_layers >= blocks) + layers.append(block(self.inplanes, planes, stride, downsample, self.norm_layer, self.dilation, use_dcn=use_dcn)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + use_dcn = ((i+dcn_layers) >= blocks) and (i % dcn_interval == 0) + layers.append(block(self.inplanes, planes, norm_layer=self.norm_layer, use_dcn=use_dcn)) + layer = nn.Sequential(*layers) + + self.channels.append(planes * block.expansion) + self.layers.append(layer) + + return layer + + def forward(self, x): + """ Returns a list of convouts for each layer. """ + + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + outs = [] + for layer in self.layers: + x = layer(x) + outs.append(x) + + return tuple(outs) + + def init_backbone(self, path): + """ Initializes the backbone weights for training. """ + state_dict = torch.load(path) + + # Replace layer1 -> layers.0 etc. + keys = list(state_dict) + for key in keys: + if key.startswith('layer'): + idx = int(key[5]) + new_key = 'layers.' + str(idx-1) + key[6:] + state_dict[new_key] = state_dict.pop(key) + + # Note: Using strict=False is berry scary. Triple check this. + self.load_state_dict(state_dict, strict=False) + + def add_layer(self, conv_channels=1024, downsample=2, depth=1, block=Bottleneck): + """ Add a downsample layer to the backbone as per what SSD does. """ + self._make_layer(block, conv_channels // block.expansion, blocks=depth, stride=downsample) + + + + +class ResNetBackboneGN(ResNetBackbone): + + def __init__(self, layers, num_groups=32): + super().__init__(layers, norm_layer=lambda x: nn.GroupNorm(num_groups, x)) + + def init_backbone(self, path): + """ The path here comes from detectron. So we load it differently. """ + with open(path, 'rb') as f: + state_dict = pickle.load(f, encoding='latin1') # From the detectron source + state_dict = state_dict['blobs'] + + our_state_dict_keys = list(self.state_dict().keys()) + new_state_dict = {} + + gn_trans = lambda x: ('gn_s' if x == 'weight' else 'gn_b') + layeridx2res = lambda x: 'res' + str(int(x)+2) + block2branch = lambda x: 'branch2' + ('a', 'b', 'c')[int(x[-1:])-1] + + # Transcribe each Detectron weights name to a Yolact weights name + for key in our_state_dict_keys: + parts = key.split('.') + transcribed_key = '' + + if (parts[0] == 'conv1'): + transcribed_key = 'conv1_w' + elif (parts[0] == 'bn1'): + transcribed_key = 'conv1_' + gn_trans(parts[1]) + elif (parts[0] == 'layers'): + if int(parts[1]) >= self.num_base_layers: continue + + transcribed_key = layeridx2res(parts[1]) + transcribed_key += '_' + parts[2] + '_' + + if parts[3] == 'downsample': + transcribed_key += 'branch1_' + + if parts[4] == '0': + transcribed_key += 'w' + else: + transcribed_key += gn_trans(parts[5]) + else: + transcribed_key += block2branch(parts[3]) + '_' + + if 'conv' in parts[3]: + transcribed_key += 'w' + else: + transcribed_key += gn_trans(parts[4]) + + new_state_dict[key] = torch.Tensor(state_dict[transcribed_key]) + + # strict=False because we may have extra unitialized layers at this point + self.load_state_dict(new_state_dict, strict=False) + + + + + + + +def darknetconvlayer(in_channels, out_channels, *args, **kwdargs): + """ + Implements a conv, activation, then batch norm. + Arguments are passed into the conv layer. + """ + return nn.Sequential( + nn.Conv2d(in_channels, out_channels, *args, **kwdargs, bias=False), + nn.BatchNorm2d(out_channels), + # Darknet uses 0.1 here. + # See https://github.com/pjreddie/darknet/blob/680d3bde1924c8ee2d1c1dea54d3e56a05ca9a26/src/activations.h#L39 + nn.LeakyReLU(0.1, inplace=True) + ) + +class DarkNetBlock(nn.Module): + """ Note: channels is the lesser of the two. The output will be expansion * channels. """ + + expansion = 2 + + def __init__(self, in_channels, channels): + super().__init__() + + self.conv1 = darknetconvlayer(in_channels, channels, kernel_size=1) + self.conv2 = darknetconvlayer(channels, channels * self.expansion, kernel_size=3, padding=1) + + def forward(self, x): + return self.conv2(self.conv1(x)) + x + + + + +class DarkNetBackbone(nn.Module): + """ + An implementation of YOLOv3's Darnet53 in + https://pjreddie.com/media/files/papers/YOLOv3.pdf + + This is based off of the implementation of Resnet above. + """ + + def __init__(self, layers=[1, 2, 8, 8, 4], block=DarkNetBlock): + super().__init__() + + # These will be populated by _make_layer + self.num_base_layers = len(layers) + self.layers = nn.ModuleList() + self.channels = [] + + self._preconv = darknetconvlayer(3, 32, kernel_size=3, padding=1) + self.in_channels = 32 + + self._make_layer(block, 32, layers[0]) + self._make_layer(block, 64, layers[1]) + self._make_layer(block, 128, layers[2]) + self._make_layer(block, 256, layers[3]) + self._make_layer(block, 512, layers[4]) + + # This contains every module that should be initialized by loading in pretrained weights. + # Any extra layers added onto this that won't be initialized by init_backbone will not be + # in this list. That way, Yolact::init_weights knows which backbone weights to initialize + # with xavier, and which ones to leave alone. + self.backbone_modules = [m for m in self.modules() if isinstance(m, nn.Conv2d)] + + def _make_layer(self, block, channels, num_blocks, stride=2): + """ Here one layer means a string of n blocks. """ + layer_list = [] + + # The downsample layer + layer_list.append( + darknetconvlayer(self.in_channels, channels * block.expansion, + kernel_size=3, padding=1, stride=stride)) + + # Each block inputs channels and outputs channels * expansion + self.in_channels = channels * block.expansion + layer_list += [block(self.in_channels, channels) for _ in range(num_blocks)] + + self.channels.append(self.in_channels) + self.layers.append(nn.Sequential(*layer_list)) + + def forward(self, x): + """ Returns a list of convouts for each layer. """ + + x = self._preconv(x) + + outs = [] + for layer in self.layers: + x = layer(x) + outs.append(x) + + return tuple(outs) + + def add_layer(self, conv_channels=1024, stride=2, depth=1, block=DarkNetBlock): + """ Add a downsample layer to the backbone as per what SSD does. """ + self._make_layer(block, conv_channels // block.expansion, num_blocks=depth, stride=stride) + + def init_backbone(self, path): + """ Initializes the backbone weights for training. """ + # Note: Using strict=False is berry scary. Triple check this. + self.load_state_dict(torch.load(path), strict=False) + + + + + +class VGGBackbone(nn.Module): + """ + Args: + - cfg: A list of layers given as lists. Layers can be either 'M' signifying + a max pooling layer, a number signifying that many feature maps in + a conv layer, or a tuple of 'M' or a number and a kwdargs dict to pass + into the function that creates the layer (e.g. nn.MaxPool2d for 'M'). + - extra_args: A list of lists of arguments to pass into add_layer. + - norm_layers: Layers indices that need to pass through an l2norm layer. + """ + + def __init__(self, cfg, extra_args=[], norm_layers=[]): + super().__init__() + + self.channels = [] + self.layers = nn.ModuleList() + self.in_channels = 3 + self.extra_args = list(reversed(extra_args)) # So I can use it as a stack + + # Keeps track of what the corresponding key will be in the state dict of the + # pretrained model. For instance, layers.0.2 for us is 2 for the pretrained + # model but layers.1.1 is 5. + self.total_layer_count = 0 + self.state_dict_lookup = {} + + for idx, layer_cfg in enumerate(cfg): + self._make_layer(layer_cfg) + + self.norms = nn.ModuleList([nn.BatchNorm2d(self.channels[l]) for l in norm_layers]) + self.norm_lookup = {l: idx for idx, l in enumerate(norm_layers)} + + # These modules will be initialized by init_backbone, + # so don't overwrite their initialization later. + self.backbone_modules = [m for m in self.modules() if isinstance(m, nn.Conv2d)] + + def _make_layer(self, cfg): + """ + Each layer is a sequence of conv layers usually preceded by a max pooling. + Adapted from torchvision.models.vgg.make_layers. + """ + + layers = [] + + for v in cfg: + # VGG in SSD requires some special layers, so allow layers to be tuples of + # (, kwdargs dict) + args = None + if isinstance(v, tuple): + args = v[1] + v = v[0] + + # v should be either M or a number + if v == 'M': + # Set default arguments + if args is None: + args = {'kernel_size': 2, 'stride': 2} + + layers.append(nn.MaxPool2d(**args)) + else: + # See the comment in __init__ for an explanation of this + cur_layer_idx = self.total_layer_count + len(layers) + self.state_dict_lookup[cur_layer_idx] = '%d.%d' % (len(self.layers), len(layers)) + + # Set default arguments + if args is None: + args = {'kernel_size': 3, 'padding': 1} + + # Add the layers + layers.append(nn.Conv2d(self.in_channels, v, **args)) + layers.append(nn.ReLU(inplace=True)) + self.in_channels = v + + self.total_layer_count += len(layers) + self.channels.append(self.in_channels) + self.layers.append(nn.Sequential(*layers)) + + def forward(self, x): + """ Returns a list of convouts for each layer. """ + outs = [] + + for idx, layer in enumerate(self.layers): + x = layer(x) + + # Apply an l2norm module to the selected layers + # Note that this differs from the original implemenetation + if idx in self.norm_lookup: + x = self.norms[self.norm_lookup[idx]](x) + outs.append(x) + + return tuple(outs) + + def transform_key(self, k): + """ Transform e.g. features.24.bias to layers.4.1.bias """ + vals = k.split('.') + layerIdx = self.state_dict_lookup[int(vals[0])] + return 'layers.%s.%s' % (layerIdx, vals[1]) + + def init_backbone(self, path): + """ Initializes the backbone weights for training. """ + state_dict = torch.load(path) + state_dict = OrderedDict([(self.transform_key(k), v) for k,v in state_dict.items()]) + + self.load_state_dict(state_dict, strict=False) + + def add_layer(self, conv_channels=128, downsample=2): + """ Add a downsample layer to the backbone as per what SSD does. """ + if len(self.extra_args) > 0: + conv_channels, downsample = self.extra_args.pop() + + padding = 1 if downsample > 1 else 0 + + layer = nn.Sequential( + nn.Conv2d(self.in_channels, conv_channels, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(conv_channels, conv_channels*2, kernel_size=3, stride=downsample, padding=padding), + nn.ReLU(inplace=True) + ) + + self.in_channels = conv_channels*2 + self.channels.append(self.in_channels) + self.layers.append(layer) + + + + +def construct_backbone(cfg): + """ Constructs a backbone given a backbone config object (see config.py). """ + backbone = cfg.type(*cfg.args) + + # Add downsampling layers until we reach the number we need + num_layers = max(cfg.selected_layers) + 1 + + while len(backbone.layers) < num_layers: + backbone.add_layer() + + return backbone diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/data/__init__.py b/PyTorch/contrib/cv/detection/YOLACT_plus/data/__init__.py index 65b2da8e8896a90e57ab5b2a9f77fb49a91dcc27..67c0f019a0d1207c72eca1ab1026c6a6aedba262 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/data/__init__.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/data/__init__.py @@ -1,6 +1,6 @@ -from .config import * -from .coco import * - -import torch -import cv2 -import numpy as np +from .config import * +from .coco import * + +import torch +import cv2 +import numpy as np diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/data/coco.py b/PyTorch/contrib/cv/detection/YOLACT_plus/data/coco.py index 8dd0f1c27903a42bec8529aefd1f7bbe7e013664..35f42e56240acf6f24e306ad33a8219ea5eb0543 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/data/coco.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/data/coco.py @@ -1,311 +1,311 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import os -import os.path as osp -import sys -import torch -import torch.utils.data as data -import torch.nn.functional as F -import cv2 -import numpy as np -from .config import cfg -from pycocotools import mask as maskUtils -import random - -def get_label_map(): - if cfg.dataset.label_map is None: - return {x+1: x+1 for x in range(len(cfg.dataset.class_names))} - else: - return cfg.dataset.label_map - -class COCOAnnotationTransform(object): - """Transforms a COCO annotation into a Tensor of bbox coords and label index - Initilized with a dictionary lookup of classnames to indexes - """ - def __init__(self): - self.label_map = get_label_map() - - def __call__(self, target, width, height): - """ - Args: - target (dict): COCO target json annotation as a python dict - height (int): height - width (int): width - Returns: - a list containing lists of bounding boxes [bbox coords, class idx] - """ - scale = np.array([width, height, width, height]) - res = [] - for obj in target: - if 'bbox' in obj: - bbox = obj['bbox'] - label_idx = obj['category_id'] - if label_idx >= 0: - label_idx = self.label_map[label_idx] - 1 - final_box = list(np.array([bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]])/scale) - final_box.append(label_idx) - res += [final_box] # [xmin, ymin, xmax, ymax, label_idx] - else: - print("No bbox found for object ", obj) - - return res - - -class COCODetection(data.Dataset): - """`MS Coco Detection `_ Dataset. - Args: - root (string): Root directory where images are downloaded to. - 这里即为image_path,数据集图像路径 - - set_name (string): Name of the specific set of COCO images. - 自定义,用于标记 - - transform (callable, optional): A function/transform that augments the - raw images` - 图像增强方法 - - target_transform (callable, optional): A function/transform that takes - in the target (bbox) and transforms it. - 将数据集中的目标检测框(bounding box)等封装为一个专门的数据结构 - - prep_crowds (bool): Whether or not to prepare crowds for the evaluation step. - ? - """ - - def __init__(self, image_path, info_file, transform=None, - target_transform=None, - dataset_name='MS COCO', has_gt=True): - #has_gt中的gt代表ground truth,在监督学习中,数据是有标注的,以(x, t)的形式出现,其中x是输入数据,t是标注. - # 正确的t标注是ground truth, 错误的标记则不是。(也有人将所有标注数据都叫做ground truth) - - # Do this here because we have too many things named COCO - from pycocotools.coco import COCO - - if target_transform is None: - target_transform = COCOAnnotationTransform() - - self.root = image_path - self.coco = COCO(info_file) #加载标注文件, 其中COCO对象属性有1、anns :所有标注;2、catToImgs:{种类:[图像list]}的映射字典; - # 3、cats:图片中包含物体的种类;4、dataset:内含信息、许可、标注等;5、imgToAnns:{图像:[标注list]}的映射字典;6、imgs:全部图像 - - self.ids = list(self.coco.imgToAnns.keys()) - if len(self.ids) == 0 or not has_gt: #显然这个if块内语句基本不可能执行 - self.ids = list(self.coco.imgs.keys()) - - self.transform = transform #默认情况下,训练数据使用:SSDAugmentation,验证数据使用:baseTransform - self.target_transform = COCOAnnotationTransform() #82行的if块毫无意义 - - self.name = dataset_name - self.has_gt = has_gt - - def __getitem__(self, index): - """ - Args: - index (int): Index - Returns: - tuple: Tuple (image, (target, masks, num_crowds)). - target is the object returned by ``coco.loadAnns``. - """ - im, gt, masks, h, w, num_crowds = self.pull_item(index) - return (index, im), (gt, masks, num_crowds) - - def __len__(self): - return len(self.ids) - - def pull_item(self, index): - """ - Args: - index (int): Index - Returns: - tuple: Tuple (image, target, masks, height, width, crowd). - target is the object returned by ``coco.loadAnns``. - Note that if no crowd annotations exist, crowd will be None - """ - img_id = self.ids[index] - - if self.has_gt: - ann_ids = self.coco.getAnnIds(imgIds=img_id) - - # Target has {'segmentation', 'area', iscrowd', 'image_id', 'bbox', 'category_id'} - target = [x for x in self.coco.loadAnns(ann_ids) if x['image_id'] == img_id] - else: - target = [] - - # Separate out crowd annotations. These are annotations that signify a large crowd of - # objects of said class, where there is no annotation for each individual object. Both - # during testing and training, consider these crowds as neutral. - crowd = [x for x in target if ('iscrowd' in x and x['iscrowd'])] - target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])] - num_crowds = len(crowd) - - for x in crowd: - x['category_id'] = -1 - - # This is so we ensure that all crowd annotations are at the end of the array - target += crowd - - # The split here is to have compatibility with both COCO2014 and 2017 annotations. - # In 2014, images have the pattern COCO_{train/val}2014_%012d.jpg, while in 2017 it's %012d.jpg. - # Our script downloads the images as %012d.jpg so convert accordingly. - file_name = self.coco.loadImgs(img_id)[0]['file_name'] - - if file_name.startswith('COCO'): - file_name = file_name.split('_')[-1] - - path = osp.join(self.root, file_name) - assert osp.exists(path), 'Image path does not exist: {}'.format(path) - - img = cv2.imread(path) - height, width, _ = img.shape - - if len(target) > 0: - # Pool all the masks for this image into one [num_objects,height,width] matrix - masks = [self.coco.annToMask(obj).reshape(-1) for obj in target] - masks = np.vstack(masks) - masks = masks.reshape(-1, height, width) - - if self.target_transform is not None and len(target) > 0: - target = self.target_transform(target, width, height) - - if self.transform is not None: - if len(target) > 0: - target = np.array(target) - img, masks, boxes, labels = self.transform(img, masks, target[:, :4], - {'num_crowds': num_crowds, 'labels': target[:, 4]}) - - # I stored num_crowds in labels so I didn't have to modify the entirety of augmentations - num_crowds = labels['num_crowds'] - labels = labels['labels'] - - target = np.hstack((boxes, np.expand_dims(labels, axis=1))) - else: - img, _, _, _ = self.transform(img, np.zeros((1, height, width), dtype=np.float), np.array([[0, 0, 1, 1]]), - {'num_crowds': 0, 'labels': np.array([0])}) - masks = None - target = None - - if target.shape[0] == 0: - print('Warning: Augmentation output an example with no ground truth. Resampling...') - return self.pull_item(random.randint(0, len(self.ids)-1)) - - return torch.from_numpy(img).permute(2, 0, 1), target, masks, height, width, num_crowds - - def pull_image(self, index): - '''Returns the original image object at index in PIL form - - Note: not using self.__getitem__(), as any transformations passed in - could mess up this functionality. - - Argument: - index (int): index of img to show - Return: - cv2 img - ''' - img_id = self.ids[index] - path = self.coco.loadImgs(img_id)[0]['file_name'] - return cv2.imread(osp.join(self.root, path), cv2.IMREAD_COLOR) - - def pull_anno(self, index): - '''Returns the original annotation of image at index - - Note: not using self.__getitem__(), as any transformations passed in - could mess up this functionality. - - Argument: - index (int): index of img to get annotation of - Return: - list: [img_id, [(label, bbox coords),...]] - eg: ('001718', [('dog', (96, 13, 438, 332))]) - ''' - img_id = self.ids[index] - ann_ids = self.coco.getAnnIds(imgIds=img_id) - return self.coco.loadAnns(ann_ids) - - def __repr__(self): - fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' - fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) - fmt_str += ' Root Location: {}\n'.format(self.root) - tmp = ' Transforms (if any): ' - fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) - tmp = ' Target Transforms (if any): ' - fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) - return fmt_str - -def enforce_size(img, targets, masks, num_crowds, new_w, new_h): - """ Ensures that the image is the given size without distorting aspect ratio. """ - with torch.no_grad(): - _, h, w = img.size() - - if h == new_h and w == new_w: - return img, targets, masks, num_crowds - - # Resize the image so that it fits within new_w, new_h - w_prime = new_w - h_prime = h * new_w / w - - if h_prime > new_h: - w_prime *= new_h / h_prime - h_prime = new_h - - w_prime = int(w_prime) - h_prime = int(h_prime) - - # Do all the resizing - img = F.interpolate(img.unsqueeze(0), (h_prime, w_prime), mode='bilinear', align_corners=False) - img.squeeze_(0) - - # Act like each object is a color channel - masks = F.interpolate(masks.unsqueeze(0), (h_prime, w_prime), mode='bilinear', align_corners=False) - masks.squeeze_(0) - - # Scale bounding boxes (this will put them in the top left corner in the case of padding) - targets[:, [0, 2]] *= (w_prime / new_w) - targets[:, [1, 3]] *= (h_prime / new_h) - - # Finally, pad everything to be the new_w, new_h - pad_dims = (0, new_w - w_prime, 0, new_h - h_prime) - img = F.pad( img, pad_dims, mode='constant', value=0) - masks = F.pad(masks, pad_dims, mode='constant', value=0) - - return img, targets, masks, num_crowds - - - - -def detection_collate(batch): - """Custom collate fn for dealing with batches of images that have a different - number of associated object annotations (bounding boxes). - - Arguments: - batch: (tuple) A tuple of tensor images and (lists of annotations, masks) - - Return: - A tuple containing: - 1) (tensor) batch of images stacked on their 0 dim - 2) (list, list, list) annotations for a given image are stacked - on 0 dim. The output gt is a tuple of annotations and masks. - """ - targets = [] - imgs = [] - masks = [] - num_crowds = [] - - for sample in batch: - imgs.append(sample[0]) - targets.append(torch.FloatTensor(sample[1][0])) - masks.append(torch.FloatTensor(sample[1][1])) - num_crowds.append(sample[1][2]) - - return imgs, (targets, masks, num_crowds) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import os +import os.path as osp +import sys +import torch +import torch.utils.data as data +import torch.nn.functional as F +import cv2 +import numpy as np +from .config import cfg +from pycocotools import mask as maskUtils +import random + +def get_label_map(): + if cfg.dataset.label_map is None: + return {x+1: x+1 for x in range(len(cfg.dataset.class_names))} + else: + return cfg.dataset.label_map + +class COCOAnnotationTransform(object): + """Transforms a COCO annotation into a Tensor of bbox coords and label index + Initilized with a dictionary lookup of classnames to indexes + """ + def __init__(self): + self.label_map = get_label_map() + + def __call__(self, target, width, height): + """ + Args: + target (dict): COCO target json annotation as a python dict + height (int): height + width (int): width + Returns: + a list containing lists of bounding boxes [bbox coords, class idx] + """ + scale = np.array([width, height, width, height]) + res = [] + for obj in target: + if 'bbox' in obj: + bbox = obj['bbox'] + label_idx = obj['category_id'] + if label_idx >= 0: + label_idx = self.label_map[label_idx] - 1 + final_box = list(np.array([bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]])/scale) + final_box.append(label_idx) + res += [final_box] # [xmin, ymin, xmax, ymax, label_idx] + else: + print("No bbox found for object ", obj) + + return res + + +class COCODetection(data.Dataset): + """`MS Coco Detection `_ Dataset. + Args: + root (string): Root directory where images are downloaded to. + 这里即为image_path,数据集图像路径 + + set_name (string): Name of the specific set of COCO images. + 自定义,用于标记 + + transform (callable, optional): A function/transform that augments the + raw images` + 图像增强方法 + + target_transform (callable, optional): A function/transform that takes + in the target (bbox) and transforms it. + 将数据集中的目标检测框(bounding box)等封装为一个专门的数据结构 + + prep_crowds (bool): Whether or not to prepare crowds for the evaluation step. + ? + """ + + def __init__(self, image_path, info_file, transform=None, + target_transform=None, + dataset_name='MS COCO', has_gt=True): + #has_gt中的gt代表ground truth,在监督学习中,数据是有标注的,以(x, t)的形式出现,其中x是输入数据,t是标注. + # 正确的t标注是ground truth, 错误的标记则不是。(也有人将所有标注数据都叫做ground truth) + + # Do this here because we have too many things named COCO + from pycocotools.coco import COCO + + if target_transform is None: + target_transform = COCOAnnotationTransform() + + self.root = image_path + self.coco = COCO(info_file) #加载标注文件, 其中COCO对象属性有1、anns :所有标注;2、catToImgs:{种类:[图像list]}的映射字典; + # 3、cats:图片中包含物体的种类;4、dataset:内含信息、许可、标注等;5、imgToAnns:{图像:[标注list]}的映射字典;6、imgs:全部图像 + + self.ids = list(self.coco.imgToAnns.keys()) + if len(self.ids) == 0 or not has_gt: #显然这个if块内语句基本不可能执行 + self.ids = list(self.coco.imgs.keys()) + + self.transform = transform #默认情况下,训练数据使用:SSDAugmentation,验证数据使用:baseTransform + self.target_transform = COCOAnnotationTransform() #82行的if块毫无意义 + + self.name = dataset_name + self.has_gt = has_gt + + def __getitem__(self, index): + """ + Args: + index (int): Index + Returns: + tuple: Tuple (image, (target, masks, num_crowds)). + target is the object returned by ``coco.loadAnns``. + """ + im, gt, masks, h, w, num_crowds = self.pull_item(index) + return (index, im), (gt, masks, num_crowds) + + def __len__(self): + return len(self.ids) + + def pull_item(self, index): + """ + Args: + index (int): Index + Returns: + tuple: Tuple (image, target, masks, height, width, crowd). + target is the object returned by ``coco.loadAnns``. + Note that if no crowd annotations exist, crowd will be None + """ + img_id = self.ids[index] + + if self.has_gt: + ann_ids = self.coco.getAnnIds(imgIds=img_id) + + # Target has {'segmentation', 'area', iscrowd', 'image_id', 'bbox', 'category_id'} + target = [x for x in self.coco.loadAnns(ann_ids) if x['image_id'] == img_id] + else: + target = [] + + # Separate out crowd annotations. These are annotations that signify a large crowd of + # objects of said class, where there is no annotation for each individual object. Both + # during testing and training, consider these crowds as neutral. + crowd = [x for x in target if ('iscrowd' in x and x['iscrowd'])] + target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])] + num_crowds = len(crowd) + + for x in crowd: + x['category_id'] = -1 + + # This is so we ensure that all crowd annotations are at the end of the array + target += crowd + + # The split here is to have compatibility with both COCO2014 and 2017 annotations. + # In 2014, images have the pattern COCO_{train/val}2014_%012d.jpg, while in 2017 it's %012d.jpg. + # Our script downloads the images as %012d.jpg so convert accordingly. + file_name = self.coco.loadImgs(img_id)[0]['file_name'] + + if file_name.startswith('COCO'): + file_name = file_name.split('_')[-1] + + path = osp.join(self.root, file_name) + assert osp.exists(path), 'Image path does not exist: {}'.format(path) + + img = cv2.imread(path) + height, width, _ = img.shape + + if len(target) > 0: + # Pool all the masks for this image into one [num_objects,height,width] matrix + masks = [self.coco.annToMask(obj).reshape(-1) for obj in target] + masks = np.vstack(masks) + masks = masks.reshape(-1, height, width) + + if self.target_transform is not None and len(target) > 0: + target = self.target_transform(target, width, height) + + if self.transform is not None: + if len(target) > 0: + target = np.array(target) + img, masks, boxes, labels = self.transform(img, masks, target[:, :4], + {'num_crowds': num_crowds, 'labels': target[:, 4]}) + + # I stored num_crowds in labels so I didn't have to modify the entirety of augmentations + num_crowds = labels['num_crowds'] + labels = labels['labels'] + + target = np.hstack((boxes, np.expand_dims(labels, axis=1))) + else: + img, _, _, _ = self.transform(img, np.zeros((1, height, width), dtype=np.float), np.array([[0, 0, 1, 1]]), + {'num_crowds': 0, 'labels': np.array([0])}) + masks = None + target = None + + if target.shape[0] == 0: + print('Warning: Augmentation output an example with no ground truth. Resampling...') + return self.pull_item(random.randint(0, len(self.ids)-1)) + + return torch.from_numpy(img).permute(2, 0, 1), target, masks, height, width, num_crowds + + def pull_image(self, index): + '''Returns the original image object at index in PIL form + + Note: not using self.__getitem__(), as any transformations passed in + could mess up this functionality. + + Argument: + index (int): index of img to show + Return: + cv2 img + ''' + img_id = self.ids[index] + path = self.coco.loadImgs(img_id)[0]['file_name'] + return cv2.imread(osp.join(self.root, path), cv2.IMREAD_COLOR) + + def pull_anno(self, index): + '''Returns the original annotation of image at index + + Note: not using self.__getitem__(), as any transformations passed in + could mess up this functionality. + + Argument: + index (int): index of img to get annotation of + Return: + list: [img_id, [(label, bbox coords),...]] + eg: ('001718', [('dog', (96, 13, 438, 332))]) + ''' + img_id = self.ids[index] + ann_ids = self.coco.getAnnIds(imgIds=img_id) + return self.coco.loadAnns(ann_ids) + + def __repr__(self): + fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' + fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) + fmt_str += ' Root Location: {}\n'.format(self.root) + tmp = ' Transforms (if any): ' + fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) + tmp = ' Target Transforms (if any): ' + fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) + return fmt_str + +def enforce_size(img, targets, masks, num_crowds, new_w, new_h): + """ Ensures that the image is the given size without distorting aspect ratio. """ + with torch.no_grad(): + _, h, w = img.size() + + if h == new_h and w == new_w: + return img, targets, masks, num_crowds + + # Resize the image so that it fits within new_w, new_h + w_prime = new_w + h_prime = h * new_w / w + + if h_prime > new_h: + w_prime *= new_h / h_prime + h_prime = new_h + + w_prime = int(w_prime) + h_prime = int(h_prime) + + # Do all the resizing + img = F.interpolate(img.unsqueeze(0), (h_prime, w_prime), mode='bilinear', align_corners=False) + img.squeeze_(0) + + # Act like each object is a color channel + masks = F.interpolate(masks.unsqueeze(0), (h_prime, w_prime), mode='bilinear', align_corners=False) + masks.squeeze_(0) + + # Scale bounding boxes (this will put them in the top left corner in the case of padding) + targets[:, [0, 2]] *= (w_prime / new_w) + targets[:, [1, 3]] *= (h_prime / new_h) + + # Finally, pad everything to be the new_w, new_h + pad_dims = (0, new_w - w_prime, 0, new_h - h_prime) + img = F.pad( img, pad_dims, mode='constant', value=0) + masks = F.pad(masks, pad_dims, mode='constant', value=0) + + return img, targets, masks, num_crowds + + + + +def detection_collate(batch): + """Custom collate fn for dealing with batches of images that have a different + number of associated object annotations (bounding boxes). + + Arguments: + batch: (tuple) A tuple of tensor images and (lists of annotations, masks) + + Return: + A tuple containing: + 1) (tensor) batch of images stacked on their 0 dim + 2) (list, list, list) annotations for a given image are stacked + on 0 dim. The output gt is a tuple of annotations and masks. + """ + targets = [] + imgs = [] + masks = [] + num_crowds = [] + + for sample in batch: + imgs.append(sample[0]) + targets.append(torch.FloatTensor(sample[1][0])) + masks.append(torch.FloatTensor(sample[1][1])) + num_crowds.append(sample[1][2]) + + return imgs, (targets, masks, num_crowds) diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/data/config.py b/PyTorch/contrib/cv/detection/YOLACT_plus/data/config.py index 2d66aec919716495e34e25612a0dbb4772be68e4..ee30bae3ba05640a1fbc21f2eb4a180bb937b28f 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/data/config.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/data/config.py @@ -1,842 +1,842 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -from backbone import ResNetBackbone, VGGBackbone, ResNetBackboneGN, DarkNetBackbone -from math import sqrt -import torch - -# for making bounding boxes pretty -COLORS = ((244, 67, 54), - (233, 30, 99), - (156, 39, 176), - (103, 58, 183), - ( 63, 81, 181), - ( 33, 150, 243), - ( 3, 169, 244), - ( 0, 188, 212), - ( 0, 150, 136), - ( 76, 175, 80), - (139, 195, 74), - (205, 220, 57), - (255, 235, 59), - (255, 193, 7), - (255, 152, 0), - (255, 87, 34), - (121, 85, 72), - (158, 158, 158), - ( 96, 125, 139)) - - -# These are in BGR and are for ImageNet -MEANS = (103.94, 116.78, 123.68) -STD = (57.38, 57.12, 58.40) - -COCO_CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', - 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', - 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', - 'scissors', 'teddy bear', 'hair drier', 'toothbrush') - -COCO_LABEL_MAP = { 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, - 9: 9, 10: 10, 11: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, - 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, - 27: 25, 28: 26, 31: 27, 32: 28, 33: 29, 34: 30, 35: 31, 36: 32, - 37: 33, 38: 34, 39: 35, 40: 36, 41: 37, 42: 38, 43: 39, 44: 40, - 46: 41, 47: 42, 48: 43, 49: 44, 50: 45, 51: 46, 52: 47, 53: 48, - 54: 49, 55: 50, 56: 51, 57: 52, 58: 53, 59: 54, 60: 55, 61: 56, - 62: 57, 63: 58, 64: 59, 65: 60, 67: 61, 70: 62, 72: 63, 73: 64, - 74: 65, 75: 66, 76: 67, 77: 68, 78: 69, 79: 70, 80: 71, 81: 72, - 82: 73, 84: 74, 85: 75, 86: 76, 87: 77, 88: 78, 89: 79, 90: 80} - - - -# ----------------------- CONFIG CLASS ----------------------- # - -class Config(object): - """ - Holds the configuration for anything you want it to. - To get the currently active config, call get_cfg(). - - To use, just do cfg.x instead of cfg['x']. - I made this because doing cfg['x'] all the time is dumb. - """ - - def __init__(self, config_dict): - for key, val in config_dict.items(): - self.__setattr__(key, val) - - def copy(self, new_config_dict={}): - """ - Copies this config into a new config object, making - the changes given by new_config_dict. - """ - - ret = Config(vars(self)) - - for key, val in new_config_dict.items(): - ret.__setattr__(key, val) - - return ret - - def replace(self, new_config_dict): - """ - Copies new_config_dict into this config object. - Note: new_config_dict can also be a config object. - """ - if isinstance(new_config_dict, Config): - new_config_dict = vars(new_config_dict) - - for key, val in new_config_dict.items(): - self.__setattr__(key, val) - - def print(self): - for k, v in vars(self).items(): - print(k, ' = ', v) - - - - - -# ----------------------- DATASETS ----------------------- # - -dataset_base = Config({ - 'name': 'Base Dataset', - - # Training images and annotations - 'train_images': './data/coco/images/', - 'train_info': 'path_to_annotation_file', - - # Validation images and annotations. - 'valid_images': './data/coco/images/', - 'valid_info': 'path_to_annotation_file', - - # Whether or not to load GT. If this is False, eval.py quantitative evaluation won't work. - 'has_gt': True, - - # A list of names for each of you classes. - 'class_names': COCO_CLASSES, - - # COCO class ids aren't sequential, so this is a bandage fix. If your ids aren't sequential, - # provide a map from category_id -> index in class_names + 1 (the +1 is there because it's 1-indexed). - # If not specified, this just assumes category ids start at 1 and increase sequentially. - 'label_map': None -}) - -coco2014_dataset = dataset_base.copy({ - 'name': 'COCO 2014', - - 'train_info': './data/coco/annotations/instances_train2014.json', - 'valid_info': './data/coco/annotations/instances_val2014.json', - - 'label_map': COCO_LABEL_MAP -}) - -coco2017_dataset = dataset_base.copy({ - 'name': 'COCO 2017', - - 'train_info': './data/coco/annotations/instances_train2017.json', - 'valid_info': './data/coco/annotations/instances_val2017.json', - - 'label_map': COCO_LABEL_MAP -}) - -coco2017_testdev_dataset = dataset_base.copy({ - 'name': 'COCO 2017 Test-Dev', - - 'valid_info': './data/coco/annotations/image_info_test-dev2017.json', - 'has_gt': False, - - 'label_map': COCO_LABEL_MAP -}) - -PASCAL_CLASSES = ("aeroplane", "bicycle", "bird", "boat", "bottle", - "bus", "car", "cat", "chair", "cow", "diningtable", - "dog", "horse", "motorbike", "person", "pottedplant", - "sheep", "sofa", "train", "tvmonitor") - -pascal_sbd_dataset = dataset_base.copy({ - 'name': 'Pascal SBD 2012', - - 'train_images': './data/sbd/img', - 'valid_images': './data/sbd/img', - - 'train_info': './data/sbd/pascal_sbd_train.json', - 'valid_info': './data/sbd/pascal_sbd_val.json', - - 'class_names': PASCAL_CLASSES, -}) - - - - - -# ----------------------- TRANSFORMS ----------------------- # - -resnet_transform = Config({ - 'channel_order': 'RGB', - 'normalize': True, - 'subtract_means': False, - 'to_float': False, -}) - -vgg_transform = Config({ - # Note that though vgg is traditionally BGR, - # the channel order of vgg_reducedfc.pth is RGB. - 'channel_order': 'RGB', - 'normalize': False, - 'subtract_means': True, - 'to_float': False, -}) - -darknet_transform = Config({ - 'channel_order': 'RGB', - 'normalize': False, - 'subtract_means': False, - 'to_float': True, -}) - - - - - -# ----------------------- BACKBONES ----------------------- # - -backbone_base = Config({ - 'name': 'Base Backbone', - 'path': 'path/to/pretrained/weights', - 'type': object, - 'args': tuple(), - 'transform': resnet_transform, - - 'selected_layers': list(), - 'pred_scales': list(), - 'pred_aspect_ratios': list(), - - 'use_pixel_scales': False, - 'preapply_sqrt': True, - 'use_square_anchors': False, -}) - -resnet101_backbone = backbone_base.copy({ - 'name': 'ResNet101', - 'path': 'resnet101_reducedfc.pth', - 'type': ResNetBackbone, - 'args': ([3, 4, 23, 3],), - 'transform': resnet_transform, - - 'selected_layers': list(range(2, 8)), - 'pred_scales': [[1]]*6, - 'pred_aspect_ratios': [ [[0.66685089, 1.7073535, 0.87508774, 1.16524493, 0.49059086]] ] * 6, -}) - -resnet101_gn_backbone = backbone_base.copy({ - 'name': 'ResNet101_GN', - 'path': 'R-101-GN.pkl', - 'type': ResNetBackboneGN, - 'args': ([3, 4, 23, 3],), - 'transform': resnet_transform, - - 'selected_layers': list(range(2, 8)), - 'pred_scales': [[1]]*6, - 'pred_aspect_ratios': [ [[0.66685089, 1.7073535, 0.87508774, 1.16524493, 0.49059086]] ] * 6, -}) - -resnet101_dcn_inter3_backbone = resnet101_backbone.copy({ - 'name': 'ResNet101_DCN_Interval3', - 'args': ([3, 4, 23, 3], [0, 4, 23, 3], 3), -}) - -resnet50_backbone = resnet101_backbone.copy({ - 'name': 'ResNet50', - 'path': 'resnet50-19c8e357.pth', - 'type': ResNetBackbone, - 'args': ([3, 4, 6, 3],), - 'transform': resnet_transform, -}) - -resnet50_dcnv2_backbone = resnet50_backbone.copy({ - 'name': 'ResNet50_DCNv2', - 'args': ([3, 4, 6, 3], [0, 4, 6, 3]), -}) - -darknet53_backbone = backbone_base.copy({ - 'name': 'DarkNet53', - 'path': 'darknet53.pth', - 'type': DarkNetBackbone, - 'args': ([1, 2, 8, 8, 4],), - 'transform': darknet_transform, - - 'selected_layers': list(range(3, 9)), - 'pred_scales': [[3.5, 4.95], [3.6, 4.90], [3.3, 4.02], [2.7, 3.10], [2.1, 2.37], [1.8, 1.92]], - 'pred_aspect_ratios': [ [[1, sqrt(2), 1/sqrt(2), sqrt(3), 1/sqrt(3)][:n], [1]] for n in [3, 5, 5, 5, 3, 3] ], -}) - -vgg16_arch = [[64, 64], - [ 'M', 128, 128], - [ 'M', 256, 256, 256], - [('M', {'kernel_size': 2, 'stride': 2, 'ceil_mode': True}), 512, 512, 512], - [ 'M', 512, 512, 512], - [('M', {'kernel_size': 3, 'stride': 1, 'padding': 1}), - (1024, {'kernel_size': 3, 'padding': 6, 'dilation': 6}), - (1024, {'kernel_size': 1})]] - -vgg16_backbone = backbone_base.copy({ - 'name': 'VGG16', - 'path': 'vgg16_reducedfc.pth', - 'type': VGGBackbone, - 'args': (vgg16_arch, [(256, 2), (128, 2), (128, 1), (128, 1)], [3]), - 'transform': vgg_transform, - - 'selected_layers': [3] + list(range(5, 10)), - 'pred_scales': [[5, 4]]*6, - 'pred_aspect_ratios': [ [[1], [1, sqrt(2), 1/sqrt(2), sqrt(3), 1/sqrt(3)][:n]] for n in [3, 5, 5, 5, 3, 3] ], -}) - - - - - -# ----------------------- MASK BRANCH TYPES ----------------------- # - -mask_type = Config({ - # Direct produces masks directly as the output of each pred module. - # This is denoted as fc-mask in the paper. - # Parameters: mask_size, use_gt_bboxes - 'direct': 0, - - # Lincomb produces coefficients as the output of each pred module then uses those coefficients - # to linearly combine features from a prototype network to create image-sized masks. - # Parameters: - # - masks_to_train (int): Since we're producing (near) full image masks, it'd take too much - # vram to backprop on every single mask. Thus we select only a subset. - # - mask_proto_src (int): The input layer to the mask prototype generation network. This is an - # index in backbone.layers. Use to use the image itself instead. - # - mask_proto_net (list): A list of layers in the mask proto network with the last one - # being where the masks are taken from. Each conv layer is in - # the form (num_features, kernel_size, **kwdargs). An empty - # list means to use the source for prototype masks. If the - # kernel_size is negative, this creates a deconv layer instead. - # If the kernel_size is negative and the num_features is None, - # this creates a simple bilinear interpolation layer instead. - # - mask_proto_bias (bool): Whether to include an extra coefficient that corresponds to a proto - # mask of all ones. - # - mask_proto_prototype_activation (func): The activation to apply to each prototype mask. - # - mask_proto_mask_activation (func): After summing the prototype masks with the predicted - # coeffs, what activation to apply to the final mask. - # - mask_proto_coeff_activation (func): The activation to apply to the mask coefficients. - # - mask_proto_crop (bool): If True, crop the mask with the predicted bbox during training. - # - mask_proto_crop_expand (float): If cropping, the percent to expand the cropping bbox by - # in each direction. This is to make the model less reliant - # on perfect bbox predictions. - # - mask_proto_loss (str [l1|disj]): If not None, apply an l1 or disjunctive regularization - # loss directly to the prototype masks. - # - mask_proto_binarize_downsampled_gt (bool): Binarize GT after dowsnampling during training? - # - mask_proto_normalize_mask_loss_by_sqrt_area (bool): Whether to normalize mask loss by sqrt(sum(gt)) - # - mask_proto_reweight_mask_loss (bool): Reweight mask loss such that background is divided by - # #background and foreground is divided by #foreground. - # - mask_proto_grid_file (str): The path to the grid file to use with the next option. - # This should be a numpy.dump file with shape [numgrids, h, w] - # where h and w are w.r.t. the mask_proto_src convout. - # - mask_proto_use_grid (bool): Whether to add extra grid features to the proto_net input. - # - mask_proto_coeff_gate (bool): Add an extra set of sigmoided coefficients that is multiplied - # into the predicted coefficients in order to "gate" them. - # - mask_proto_prototypes_as_features (bool): For each prediction module, downsample the prototypes - # to the convout size of that module and supply the prototypes as input - # in addition to the already supplied backbone features. - # - mask_proto_prototypes_as_features_no_grad (bool): If the above is set, don't backprop gradients to - # to the prototypes from the network head. - # - mask_proto_remove_empty_masks (bool): Remove masks that are downsampled to 0 during loss calculations. - # - mask_proto_reweight_coeff (float): The coefficient to multiple the forground pixels with if reweighting. - # - mask_proto_coeff_diversity_loss (bool): Apply coefficient diversity loss on the coefficients so that the same - # instance has similar coefficients. - # - mask_proto_coeff_diversity_alpha (float): The weight to use for the coefficient diversity loss. - # - mask_proto_normalize_emulate_roi_pooling (bool): Normalize the mask loss to emulate roi pooling's affect on loss. - # - mask_proto_double_loss (bool): Whether to use the old loss in addition to any special new losses. - # - mask_proto_double_loss_alpha (float): The alpha to weight the above loss. - # - mask_proto_split_prototypes_by_head (bool): If true, this will give each prediction head its own prototypes. - # - mask_proto_crop_with_pred_box (bool): Whether to crop with the predicted box or the gt box. - 'lincomb': 1, -}) - - - - - -# ----------------------- ACTIVATION FUNCTIONS ----------------------- # - -activation_func = Config({ - 'tanh': torch.tanh, - 'sigmoid': torch.sigmoid, - 'softmax': lambda x: torch.nn.functional.softmax(x, dim=-1), - 'relu': lambda x: torch.nn.functional.relu(x, inplace=True), - 'none': lambda x: x, -}) - - - - - -# ----------------------- FPN DEFAULTS ----------------------- # - -fpn_base = Config({ - # The number of features to have in each FPN layer - 'num_features': 256, - - # The upsampling mode used - 'interpolation_mode': 'bilinear', - - # The number of extra layers to be produced by downsampling starting at P5 - 'num_downsample': 1, - - # Whether to down sample with a 3x3 stride 2 conv layer instead of just a stride 2 selection - 'use_conv_downsample': False, - - # Whether to pad the pred layers with 1 on each side (I forgot to add this at the start) - # This is just here for backwards compatibility - 'pad': True, - - # Whether to add relu to the downsampled layers. - 'relu_downsample_layers': False, - - # Whether to add relu to the regular layers - 'relu_pred_layers': True, -}) - - - - - -# ----------------------- CONFIG DEFAULTS ----------------------- # - -coco_base_config = Config({ - 'dataset': coco2014_dataset, - 'num_classes': 81, # This should include the background class - - 'max_iter': 400000, - - # The maximum number of detections for evaluation - 'max_num_detections': 100, - - # dw' = momentum * dw - lr * (grad + decay * w) - 'lr': 1e-3, - 'momentum': 0.9, - 'decay': 5e-4, - - # For each lr step, what to multiply the lr with - 'gamma': 0.1, - 'lr_steps': (280000, 360000, 400000), - - # Initial learning rate to linearly warmup from (if until > 0) - 'lr_warmup_init': 1e-4, - - # If > 0 then increase the lr linearly from warmup_init to lr each iter for until iters - 'lr_warmup_until': 500, - - # The terms to scale the respective loss by - 'conf_alpha': 1, - 'bbox_alpha': 1.5, - 'mask_alpha': 0.4 / 256 * 140 * 140, # Some funky equation. Don't worry about it. - - # Eval.py sets this if you just want to run YOLACT as a detector - 'eval_mask_branch': True, - - # Top_k examples to consider for NMS - 'nms_top_k': 200, - # Examples with confidence less than this are not considered by NMS - 'nms_conf_thresh': 0.05, - # Boxes with IoU overlap greater than this threshold will be culled during NMS - 'nms_thresh': 0.5, - - # See mask_type for details. - 'mask_type': mask_type.direct, - 'mask_size': 16, - 'masks_to_train': 100, - 'mask_proto_src': None, - 'mask_proto_net': [(256, 3, {}), (256, 3, {})], - 'mask_proto_bias': False, - 'mask_proto_prototype_activation': activation_func.relu, - 'mask_proto_mask_activation': activation_func.sigmoid, - 'mask_proto_coeff_activation': activation_func.tanh, - 'mask_proto_crop': True, - 'mask_proto_crop_expand': 0, - 'mask_proto_loss': None, - 'mask_proto_binarize_downsampled_gt': True, - 'mask_proto_normalize_mask_loss_by_sqrt_area': False, - 'mask_proto_reweight_mask_loss': False, - 'mask_proto_grid_file': 'data/grid.npy', - 'mask_proto_use_grid': False, - 'mask_proto_coeff_gate': False, - 'mask_proto_prototypes_as_features': False, - 'mask_proto_prototypes_as_features_no_grad': False, - 'mask_proto_remove_empty_masks': False, - 'mask_proto_reweight_coeff': 1, - 'mask_proto_coeff_diversity_loss': False, - 'mask_proto_coeff_diversity_alpha': 1, - 'mask_proto_normalize_emulate_roi_pooling': False, - 'mask_proto_double_loss': False, - 'mask_proto_double_loss_alpha': 1, - 'mask_proto_split_prototypes_by_head': False, - 'mask_proto_crop_with_pred_box': False, - - # SSD data augmentation parameters - # Randomize hue, vibrance, etc. - 'augment_photometric_distort': True, - # Have a chance to scale down the image and pad (to emulate smaller detections) - 'augment_expand': True, - # Potentialy sample a random crop from the image and put it in a random place - 'augment_random_sample_crop': True, - # Mirror the image with a probability of 1/2 - 'augment_random_mirror': True, - # Flip the image vertically with a probability of 1/2 - 'augment_random_flip': False, - # With uniform probability, rotate the image [0,90,180,270] degrees - 'augment_random_rot90': False, - - # Discard detections with width and height smaller than this (in absolute width and height) - 'discard_box_width': 4 / 550, - 'discard_box_height': 4 / 550, - - # If using batchnorm anywhere in the backbone, freeze the batchnorm layer during training. - # Note: any additional batch norm layers after the backbone will not be frozen. - 'freeze_bn': False, - - # Set this to a config object if you want an FPN (inherit from fpn_base). See fpn_base for details. - 'fpn': None, - - # Use the same weights for each network head - 'share_prediction_module': False, - - # For hard negative mining, instead of using the negatives that are leastl confidently background, - # use negatives that are most confidently not background. - 'ohem_use_most_confident': False, - - # Use focal loss as described in https://arxiv.org/pdf/1708.02002.pdf instead of OHEM - 'use_focal_loss': False, - 'focal_loss_alpha': 0.25, - 'focal_loss_gamma': 2, - - # The initial bias toward forground objects, as specified in the focal loss paper - 'focal_loss_init_pi': 0.01, - - # Keeps track of the average number of examples for each class, and weights the loss for that class accordingly. - 'use_class_balanced_conf': False, - - # Whether to use sigmoid focal loss instead of softmax, all else being the same. - 'use_sigmoid_focal_loss': False, - - # Use class[0] to be the objectness score and class[1:] to be the softmax predicted class. - # Note: at the moment this is only implemented if use_focal_loss is on. - 'use_objectness_score': False, - - # Adds a global pool + fc layer to the smallest selected layer that predicts the existence of each of the 80 classes. - # This branch is only evaluated during training time and is just there for multitask learning. - 'use_class_existence_loss': False, - 'class_existence_alpha': 1, - - # Adds a 1x1 convolution directly to the biggest selected layer that predicts a semantic segmentations for each of the 80 classes. - # This branch is only evaluated during training time and is just there for multitask learning. - 'use_semantic_segmentation_loss': False, - 'semantic_segmentation_alpha': 1, - - # Adds another branch to the netwok to predict Mask IoU. - 'use_mask_scoring': False, - 'mask_scoring_alpha': 1, - - # Match gt boxes using the Box2Pix change metric instead of the standard IoU metric. - # Note that the threshold you set for iou_threshold should be negative with this setting on. - 'use_change_matching': False, - - # Uses the same network format as mask_proto_net, except this time it's for adding extra head layers before the final - # prediction in prediction modules. If this is none, no extra layers will be added. - 'extra_head_net': None, - - # What params should the final head layers have (the ones that predict box, confidence, and mask coeffs) - 'head_layer_params': {'kernel_size': 3, 'padding': 1}, - - # Add extra layers between the backbone and the network heads - # The order is (bbox, conf, mask) - 'extra_layers': (0, 0, 0), - - # During training, to match detections with gt, first compute the maximum gt IoU for each prior. - # Then, any of those priors whose maximum overlap is over the positive threshold, mark as positive. - # For any priors whose maximum is less than the negative iou threshold, mark them as negative. - # The rest are neutral and not used in calculating the loss. - 'positive_iou_threshold': 0.5, - 'negative_iou_threshold': 0.5, - - # When using ohem, the ratio between positives and negatives (3 means 3 negatives to 1 positive) - 'ohem_negpos_ratio': 3, - - # If less than 1, anchors treated as a negative that have a crowd iou over this threshold with - # the crowd boxes will be treated as a neutral. - 'crowd_iou_threshold': 1, - - # This is filled in at runtime by Yolact's __init__, so don't touch it - 'mask_dim': None, - - # Input image size. - 'max_size': 300, - - # Whether or not to do post processing on the cpu at test time - 'force_cpu_nms': True, - - # Whether to use mask coefficient cosine similarity nms instead of bbox iou nms - 'use_coeff_nms': False, - - # Whether or not to have a separate branch whose sole purpose is to act as the coefficients for coeff_diversity_loss - # Remember to turn on coeff_diversity_loss, or these extra coefficients won't do anything! - # To see their effect, also remember to turn on use_coeff_nms. - 'use_instance_coeff': False, - 'num_instance_coeffs': 64, - - # Whether or not to tie the mask loss / box loss to 0 - 'train_masks': True, - 'train_boxes': True, - # If enabled, the gt masks will be cropped using the gt bboxes instead of the predicted ones. - # This speeds up training time considerably but results in much worse mAP at test time. - 'use_gt_bboxes': False, - - # Whether or not to preserve aspect ratio when resizing the image. - # If True, this will resize all images to be max_size^2 pixels in area while keeping aspect ratio. - # If False, all images are resized to max_size x max_size - 'preserve_aspect_ratio': False, - - # Whether or not to use the prediction module (c) from DSSD - 'use_prediction_module': False, - - # Whether or not to use the predicted coordinate scheme from Yolo v2 - 'use_yolo_regressors': False, - - # For training, bboxes are considered "positive" if their anchors have a 0.5 IoU overlap - # or greater with a ground truth box. If this is true, instead of using the anchor boxes - # for this IoU computation, the matching function will use the predicted bbox coordinates. - # Don't turn this on if you're not using yolo regressors! - 'use_prediction_matching': False, - - # A list of settings to apply after the specified iteration. Each element of the list should look like - # (iteration, config_dict) where config_dict is a dictionary you'd pass into a config object's init. - 'delayed_settings': [], - - # Use command-line arguments to set this. - 'no_jit': False, - - 'backbone': None, - 'name': 'base_config', - - # Fast Mask Re-scoring Network - # Inspried by Mask Scoring R-CNN (https://arxiv.org/abs/1903.00241) - # Do not crop out the mask with bbox but slide a convnet on the image-size mask, - # then use global pooling to get the final mask score - 'use_maskiou': False, - - # Archecture for the mask iou network. A (num_classes-1, 1, {}) layer is appended to the end. - 'maskiou_net': [], - - # Discard predicted masks whose area is less than this - 'discard_mask_area': -1, - - 'maskiou_alpha': 1.0, - 'rescore_mask': False, - 'rescore_bbox': False, - 'maskious_to_train': -1, -}) - - - - - -# ----------------------- YOLACT v1.0 CONFIGS ----------------------- # - -yolact_base_config = coco_base_config.copy({ - 'name': 'yolact_base', - - # Dataset stuff - 'dataset': coco2017_dataset, - 'num_classes': len(coco2017_dataset.class_names) + 1, - - # Image Size - 'max_size': 550, - - # Training params - 'lr_steps': (280000, 600000, 700000, 750000), - 'max_iter': 1200000, - - # Backbone Settings - 'backbone': resnet101_backbone.copy({ - 'selected_layers': list(range(1, 4)), - 'use_pixel_scales': True, - 'preapply_sqrt': False, - 'use_square_anchors': True, # This is for backward compatability with a bug - - 'pred_aspect_ratios': [ [[1, 1/2, 2]] ]*5, - 'pred_scales': [[24], [48], [96], [192], [384]], - }), - - # FPN Settings - 'fpn': fpn_base.copy({ - 'use_conv_downsample': True, - 'num_downsample': 2, - }), - - # Mask Settings - 'mask_type': mask_type.lincomb, - 'mask_alpha': 6.125, - 'mask_proto_src': 0, - 'mask_proto_net': [(256, 3, {'padding': 1})] * 3 + [(None, -2, {}), (256, 3, {'padding': 1})] + [(32, 1, {})], - 'mask_proto_normalize_emulate_roi_pooling': True, - - # Other stuff - 'share_prediction_module': True, - 'extra_head_net': [(256, 3, {'padding': 1})], - - 'positive_iou_threshold': 0.5, - 'negative_iou_threshold': 0.4, - - 'crowd_iou_threshold': 0.7, - - 'use_semantic_segmentation_loss': True, -}) - -yolact_im400_config = yolact_base_config.copy({ - 'name': 'yolact_im400', - - 'max_size': 400, - 'backbone': yolact_base_config.backbone.copy({ - 'pred_scales': [[int(x[0] / yolact_base_config.max_size * 400)] for x in yolact_base_config.backbone.pred_scales], - }), -}) - -yolact_im700_config = yolact_base_config.copy({ - 'name': 'yolact_im700', - - 'masks_to_train': 300, - 'max_size': 700, - 'backbone': yolact_base_config.backbone.copy({ - 'pred_scales': [[int(x[0] / yolact_base_config.max_size * 700)] for x in yolact_base_config.backbone.pred_scales], - }), -}) - -yolact_darknet53_config = yolact_base_config.copy({ - 'name': 'yolact_darknet53', - - 'backbone': darknet53_backbone.copy({ - 'selected_layers': list(range(2, 5)), - - 'pred_scales': yolact_base_config.backbone.pred_scales, - 'pred_aspect_ratios': yolact_base_config.backbone.pred_aspect_ratios, - 'use_pixel_scales': True, - 'preapply_sqrt': False, - 'use_square_anchors': True, # This is for backward compatability with a bug - }), -}) - -yolact_resnet50_config = yolact_base_config.copy({ - 'name': 'yolact_resnet50', - - 'backbone': resnet50_backbone.copy({ - 'selected_layers': list(range(1, 4)), - - 'pred_scales': yolact_base_config.backbone.pred_scales, - 'pred_aspect_ratios': yolact_base_config.backbone.pred_aspect_ratios, - 'use_pixel_scales': True, - 'preapply_sqrt': False, - 'use_square_anchors': True, # This is for backward compatability with a bug - }), -}) - - -yolact_resnet50_pascal_config = yolact_resnet50_config.copy({ - 'name': None, # Will default to yolact_resnet50_pascal - - # Dataset stuff - 'dataset': pascal_sbd_dataset, - 'num_classes': len(pascal_sbd_dataset.class_names) + 1, - - 'max_iter': 120000, - 'lr_steps': (60000, 100000), - - 'backbone': yolact_resnet50_config.backbone.copy({ - 'pred_scales': [[32], [64], [128], [256], [512]], - 'use_square_anchors': False, - }) -}) - -# ----------------------- YOLACT++ CONFIGS ----------------------- # - -yolact_plus_base_config = yolact_base_config.copy({ - 'name': 'yolact_plus_base', - - 'backbone': resnet101_dcn_inter3_backbone.copy({ - 'selected_layers': list(range(1, 4)), - - 'pred_aspect_ratios': [ [[1, 1/2, 2]] ]*5, - 'pred_scales': [[i * 2 ** (j / 3.0) for j in range(3)] for i in [24, 48, 96, 192, 384]], - 'use_pixel_scales': True, - 'preapply_sqrt': False, - 'use_square_anchors': False, - }), - - 'use_maskiou': False, - 'maskiou_net': [(8, 3, {'stride': 2}), (16, 3, {'stride': 2}), (32, 3, {'stride': 2}), (64, 3, {'stride': 2}), (128, 3, {'stride': 2})], - 'maskiou_alpha': 25, - 'rescore_bbox': False, - 'rescore_mask': True, - - 'discard_mask_area': 5*5, -}) - -yolact_plus_resnet50_config = yolact_plus_base_config.copy({ - 'name': 'yolact_plus_resnet50', - - 'backbone': resnet50_dcnv2_backbone.copy({ - 'selected_layers': list(range(1, 4)), - - 'pred_aspect_ratios': [ [[1, 1/2, 2]] ]*5, - 'pred_scales': [[i * 2 ** (j / 3.0) for j in range(3)] for i in [24, 48, 96, 192, 384]], - 'use_pixel_scales': True, - 'preapply_sqrt': False, - 'use_square_anchors': False, - }), -}) -yolact_plus_resnet50_inf_config = yolact_plus_resnet50_config.copy({ - 'use_maskiou': True -}) - -# Default config -cfg = yolact_base_config.copy() - -def set_cfg(config_name:str): - """ Sets the active config. Works even if cfg is already imported! """ - global cfg - - # Note this is not just an eval because I'm lazy, but also because it can - # be used like ssd300_config.copy({'max_size': 400}) for extreme fine-tuning - cfg.replace(eval(config_name)) - - if cfg.name is None: - cfg.name = config_name.split('_config')[0] - -def set_dataset(dataset_name:str): - """ Sets the dataset of the current config. """ - cfg.dataset = eval(dataset_name) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from backbone import ResNetBackbone, VGGBackbone, ResNetBackboneGN, DarkNetBackbone +from math import sqrt +import torch + +# for making bounding boxes pretty +COLORS = ((244, 67, 54), + (233, 30, 99), + (156, 39, 176), + (103, 58, 183), + ( 63, 81, 181), + ( 33, 150, 243), + ( 3, 169, 244), + ( 0, 188, 212), + ( 0, 150, 136), + ( 76, 175, 80), + (139, 195, 74), + (205, 220, 57), + (255, 235, 59), + (255, 193, 7), + (255, 152, 0), + (255, 87, 34), + (121, 85, 72), + (158, 158, 158), + ( 96, 125, 139)) + + +# These are in BGR and are for ImageNet +MEANS = (103.94, 116.78, 123.68) +STD = (57.38, 57.12, 58.40) + +COCO_CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', + 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', + 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', + 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', + 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', + 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', + 'scissors', 'teddy bear', 'hair drier', 'toothbrush') + +COCO_LABEL_MAP = { 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, + 9: 9, 10: 10, 11: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, + 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, + 27: 25, 28: 26, 31: 27, 32: 28, 33: 29, 34: 30, 35: 31, 36: 32, + 37: 33, 38: 34, 39: 35, 40: 36, 41: 37, 42: 38, 43: 39, 44: 40, + 46: 41, 47: 42, 48: 43, 49: 44, 50: 45, 51: 46, 52: 47, 53: 48, + 54: 49, 55: 50, 56: 51, 57: 52, 58: 53, 59: 54, 60: 55, 61: 56, + 62: 57, 63: 58, 64: 59, 65: 60, 67: 61, 70: 62, 72: 63, 73: 64, + 74: 65, 75: 66, 76: 67, 77: 68, 78: 69, 79: 70, 80: 71, 81: 72, + 82: 73, 84: 74, 85: 75, 86: 76, 87: 77, 88: 78, 89: 79, 90: 80} + + + +# ----------------------- CONFIG CLASS ----------------------- # + +class Config(object): + """ + Holds the configuration for anything you want it to. + To get the currently active config, call get_cfg(). + + To use, just do cfg.x instead of cfg['x']. + I made this because doing cfg['x'] all the time is dumb. + """ + + def __init__(self, config_dict): + for key, val in config_dict.items(): + self.__setattr__(key, val) + + def copy(self, new_config_dict={}): + """ + Copies this config into a new config object, making + the changes given by new_config_dict. + """ + + ret = Config(vars(self)) + + for key, val in new_config_dict.items(): + ret.__setattr__(key, val) + + return ret + + def replace(self, new_config_dict): + """ + Copies new_config_dict into this config object. + Note: new_config_dict can also be a config object. + """ + if isinstance(new_config_dict, Config): + new_config_dict = vars(new_config_dict) + + for key, val in new_config_dict.items(): + self.__setattr__(key, val) + + def print(self): + for k, v in vars(self).items(): + print(k, ' = ', v) + + + + + +# ----------------------- DATASETS ----------------------- # + +dataset_base = Config({ + 'name': 'Base Dataset', + + # Training images and annotations + 'train_images': './data/coco/images/', + 'train_info': 'path_to_annotation_file', + + # Validation images and annotations. + 'valid_images': './data/coco/images/', + 'valid_info': 'path_to_annotation_file', + + # Whether or not to load GT. If this is False, eval.py quantitative evaluation won't work. + 'has_gt': True, + + # A list of names for each of you classes. + 'class_names': COCO_CLASSES, + + # COCO class ids aren't sequential, so this is a bandage fix. If your ids aren't sequential, + # provide a map from category_id -> index in class_names + 1 (the +1 is there because it's 1-indexed). + # If not specified, this just assumes category ids start at 1 and increase sequentially. + 'label_map': None +}) + +coco2014_dataset = dataset_base.copy({ + 'name': 'COCO 2014', + + 'train_info': './data/coco/annotations/instances_train2014.json', + 'valid_info': './data/coco/annotations/instances_val2014.json', + + 'label_map': COCO_LABEL_MAP +}) + +coco2017_dataset = dataset_base.copy({ + 'name': 'COCO 2017', + + 'train_info': './data/coco/annotations/instances_train2017.json', + 'valid_info': './data/coco/annotations/instances_val2017.json', + + 'label_map': COCO_LABEL_MAP +}) + +coco2017_testdev_dataset = dataset_base.copy({ + 'name': 'COCO 2017 Test-Dev', + + 'valid_info': './data/coco/annotations/image_info_test-dev2017.json', + 'has_gt': False, + + 'label_map': COCO_LABEL_MAP +}) + +PASCAL_CLASSES = ("aeroplane", "bicycle", "bird", "boat", "bottle", + "bus", "car", "cat", "chair", "cow", "diningtable", + "dog", "horse", "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor") + +pascal_sbd_dataset = dataset_base.copy({ + 'name': 'Pascal SBD 2012', + + 'train_images': './data/sbd/img', + 'valid_images': './data/sbd/img', + + 'train_info': './data/sbd/pascal_sbd_train.json', + 'valid_info': './data/sbd/pascal_sbd_val.json', + + 'class_names': PASCAL_CLASSES, +}) + + + + + +# ----------------------- TRANSFORMS ----------------------- # + +resnet_transform = Config({ + 'channel_order': 'RGB', + 'normalize': True, + 'subtract_means': False, + 'to_float': False, +}) + +vgg_transform = Config({ + # Note that though vgg is traditionally BGR, + # the channel order of vgg_reducedfc.pth is RGB. + 'channel_order': 'RGB', + 'normalize': False, + 'subtract_means': True, + 'to_float': False, +}) + +darknet_transform = Config({ + 'channel_order': 'RGB', + 'normalize': False, + 'subtract_means': False, + 'to_float': True, +}) + + + + + +# ----------------------- BACKBONES ----------------------- # + +backbone_base = Config({ + 'name': 'Base Backbone', + 'path': 'path/to/pretrained/weights', + 'type': object, + 'args': tuple(), + 'transform': resnet_transform, + + 'selected_layers': list(), + 'pred_scales': list(), + 'pred_aspect_ratios': list(), + + 'use_pixel_scales': False, + 'preapply_sqrt': True, + 'use_square_anchors': False, +}) + +resnet101_backbone = backbone_base.copy({ + 'name': 'ResNet101', + 'path': 'resnet101_reducedfc.pth', + 'type': ResNetBackbone, + 'args': ([3, 4, 23, 3],), + 'transform': resnet_transform, + + 'selected_layers': list(range(2, 8)), + 'pred_scales': [[1]]*6, + 'pred_aspect_ratios': [ [[0.66685089, 1.7073535, 0.87508774, 1.16524493, 0.49059086]] ] * 6, +}) + +resnet101_gn_backbone = backbone_base.copy({ + 'name': 'ResNet101_GN', + 'path': 'R-101-GN.pkl', + 'type': ResNetBackboneGN, + 'args': ([3, 4, 23, 3],), + 'transform': resnet_transform, + + 'selected_layers': list(range(2, 8)), + 'pred_scales': [[1]]*6, + 'pred_aspect_ratios': [ [[0.66685089, 1.7073535, 0.87508774, 1.16524493, 0.49059086]] ] * 6, +}) + +resnet101_dcn_inter3_backbone = resnet101_backbone.copy({ + 'name': 'ResNet101_DCN_Interval3', + 'args': ([3, 4, 23, 3], [0, 4, 23, 3], 3), +}) + +resnet50_backbone = resnet101_backbone.copy({ + 'name': 'ResNet50', + 'path': 'resnet50-19c8e357.pth', + 'type': ResNetBackbone, + 'args': ([3, 4, 6, 3],), + 'transform': resnet_transform, +}) + +resnet50_dcnv2_backbone = resnet50_backbone.copy({ + 'name': 'ResNet50_DCNv2', + 'args': ([3, 4, 6, 3], [0, 4, 6, 3]), +}) + +darknet53_backbone = backbone_base.copy({ + 'name': 'DarkNet53', + 'path': 'darknet53.pth', + 'type': DarkNetBackbone, + 'args': ([1, 2, 8, 8, 4],), + 'transform': darknet_transform, + + 'selected_layers': list(range(3, 9)), + 'pred_scales': [[3.5, 4.95], [3.6, 4.90], [3.3, 4.02], [2.7, 3.10], [2.1, 2.37], [1.8, 1.92]], + 'pred_aspect_ratios': [ [[1, sqrt(2), 1/sqrt(2), sqrt(3), 1/sqrt(3)][:n], [1]] for n in [3, 5, 5, 5, 3, 3] ], +}) + +vgg16_arch = [[64, 64], + [ 'M', 128, 128], + [ 'M', 256, 256, 256], + [('M', {'kernel_size': 2, 'stride': 2, 'ceil_mode': True}), 512, 512, 512], + [ 'M', 512, 512, 512], + [('M', {'kernel_size': 3, 'stride': 1, 'padding': 1}), + (1024, {'kernel_size': 3, 'padding': 6, 'dilation': 6}), + (1024, {'kernel_size': 1})]] + +vgg16_backbone = backbone_base.copy({ + 'name': 'VGG16', + 'path': 'vgg16_reducedfc.pth', + 'type': VGGBackbone, + 'args': (vgg16_arch, [(256, 2), (128, 2), (128, 1), (128, 1)], [3]), + 'transform': vgg_transform, + + 'selected_layers': [3] + list(range(5, 10)), + 'pred_scales': [[5, 4]]*6, + 'pred_aspect_ratios': [ [[1], [1, sqrt(2), 1/sqrt(2), sqrt(3), 1/sqrt(3)][:n]] for n in [3, 5, 5, 5, 3, 3] ], +}) + + + + + +# ----------------------- MASK BRANCH TYPES ----------------------- # + +mask_type = Config({ + # Direct produces masks directly as the output of each pred module. + # This is denoted as fc-mask in the paper. + # Parameters: mask_size, use_gt_bboxes + 'direct': 0, + + # Lincomb produces coefficients as the output of each pred module then uses those coefficients + # to linearly combine features from a prototype network to create image-sized masks. + # Parameters: + # - masks_to_train (int): Since we're producing (near) full image masks, it'd take too much + # vram to backprop on every single mask. Thus we select only a subset. + # - mask_proto_src (int): The input layer to the mask prototype generation network. This is an + # index in backbone.layers. Use to use the image itself instead. + # - mask_proto_net (list): A list of layers in the mask proto network with the last one + # being where the masks are taken from. Each conv layer is in + # the form (num_features, kernel_size, **kwdargs). An empty + # list means to use the source for prototype masks. If the + # kernel_size is negative, this creates a deconv layer instead. + # If the kernel_size is negative and the num_features is None, + # this creates a simple bilinear interpolation layer instead. + # - mask_proto_bias (bool): Whether to include an extra coefficient that corresponds to a proto + # mask of all ones. + # - mask_proto_prototype_activation (func): The activation to apply to each prototype mask. + # - mask_proto_mask_activation (func): After summing the prototype masks with the predicted + # coeffs, what activation to apply to the final mask. + # - mask_proto_coeff_activation (func): The activation to apply to the mask coefficients. + # - mask_proto_crop (bool): If True, crop the mask with the predicted bbox during training. + # - mask_proto_crop_expand (float): If cropping, the percent to expand the cropping bbox by + # in each direction. This is to make the model less reliant + # on perfect bbox predictions. + # - mask_proto_loss (str [l1|disj]): If not None, apply an l1 or disjunctive regularization + # loss directly to the prototype masks. + # - mask_proto_binarize_downsampled_gt (bool): Binarize GT after dowsnampling during training? + # - mask_proto_normalize_mask_loss_by_sqrt_area (bool): Whether to normalize mask loss by sqrt(sum(gt)) + # - mask_proto_reweight_mask_loss (bool): Reweight mask loss such that background is divided by + # #background and foreground is divided by #foreground. + # - mask_proto_grid_file (str): The path to the grid file to use with the next option. + # This should be a numpy.dump file with shape [numgrids, h, w] + # where h and w are w.r.t. the mask_proto_src convout. + # - mask_proto_use_grid (bool): Whether to add extra grid features to the proto_net input. + # - mask_proto_coeff_gate (bool): Add an extra set of sigmoided coefficients that is multiplied + # into the predicted coefficients in order to "gate" them. + # - mask_proto_prototypes_as_features (bool): For each prediction module, downsample the prototypes + # to the convout size of that module and supply the prototypes as input + # in addition to the already supplied backbone features. + # - mask_proto_prototypes_as_features_no_grad (bool): If the above is set, don't backprop gradients to + # to the prototypes from the network head. + # - mask_proto_remove_empty_masks (bool): Remove masks that are downsampled to 0 during loss calculations. + # - mask_proto_reweight_coeff (float): The coefficient to multiple the forground pixels with if reweighting. + # - mask_proto_coeff_diversity_loss (bool): Apply coefficient diversity loss on the coefficients so that the same + # instance has similar coefficients. + # - mask_proto_coeff_diversity_alpha (float): The weight to use for the coefficient diversity loss. + # - mask_proto_normalize_emulate_roi_pooling (bool): Normalize the mask loss to emulate roi pooling's affect on loss. + # - mask_proto_double_loss (bool): Whether to use the old loss in addition to any special new losses. + # - mask_proto_double_loss_alpha (float): The alpha to weight the above loss. + # - mask_proto_split_prototypes_by_head (bool): If true, this will give each prediction head its own prototypes. + # - mask_proto_crop_with_pred_box (bool): Whether to crop with the predicted box or the gt box. + 'lincomb': 1, +}) + + + + + +# ----------------------- ACTIVATION FUNCTIONS ----------------------- # + +activation_func = Config({ + 'tanh': torch.tanh, + 'sigmoid': torch.sigmoid, + 'softmax': lambda x: torch.nn.functional.softmax(x, dim=-1), + 'relu': lambda x: torch.nn.functional.relu(x, inplace=True), + 'none': lambda x: x, +}) + + + + + +# ----------------------- FPN DEFAULTS ----------------------- # + +fpn_base = Config({ + # The number of features to have in each FPN layer + 'num_features': 256, + + # The upsampling mode used + 'interpolation_mode': 'bilinear', + + # The number of extra layers to be produced by downsampling starting at P5 + 'num_downsample': 1, + + # Whether to down sample with a 3x3 stride 2 conv layer instead of just a stride 2 selection + 'use_conv_downsample': False, + + # Whether to pad the pred layers with 1 on each side (I forgot to add this at the start) + # This is just here for backwards compatibility + 'pad': True, + + # Whether to add relu to the downsampled layers. + 'relu_downsample_layers': False, + + # Whether to add relu to the regular layers + 'relu_pred_layers': True, +}) + + + + + +# ----------------------- CONFIG DEFAULTS ----------------------- # + +coco_base_config = Config({ + 'dataset': coco2014_dataset, + 'num_classes': 81, # This should include the background class + + 'max_iter': 400000, + + # The maximum number of detections for evaluation + 'max_num_detections': 100, + + # dw' = momentum * dw - lr * (grad + decay * w) + 'lr': 1e-3, + 'momentum': 0.9, + 'decay': 5e-4, + + # For each lr step, what to multiply the lr with + 'gamma': 0.1, + 'lr_steps': (280000, 360000, 400000), + + # Initial learning rate to linearly warmup from (if until > 0) + 'lr_warmup_init': 1e-4, + + # If > 0 then increase the lr linearly from warmup_init to lr each iter for until iters + 'lr_warmup_until': 500, + + # The terms to scale the respective loss by + 'conf_alpha': 1, + 'bbox_alpha': 1.5, + 'mask_alpha': 0.4 / 256 * 140 * 140, # Some funky equation. Don't worry about it. + + # Eval.py sets this if you just want to run YOLACT as a detector + 'eval_mask_branch': True, + + # Top_k examples to consider for NMS + 'nms_top_k': 200, + # Examples with confidence less than this are not considered by NMS + 'nms_conf_thresh': 0.05, + # Boxes with IoU overlap greater than this threshold will be culled during NMS + 'nms_thresh': 0.5, + + # See mask_type for details. + 'mask_type': mask_type.direct, + 'mask_size': 16, + 'masks_to_train': 100, + 'mask_proto_src': None, + 'mask_proto_net': [(256, 3, {}), (256, 3, {})], + 'mask_proto_bias': False, + 'mask_proto_prototype_activation': activation_func.relu, + 'mask_proto_mask_activation': activation_func.sigmoid, + 'mask_proto_coeff_activation': activation_func.tanh, + 'mask_proto_crop': True, + 'mask_proto_crop_expand': 0, + 'mask_proto_loss': None, + 'mask_proto_binarize_downsampled_gt': True, + 'mask_proto_normalize_mask_loss_by_sqrt_area': False, + 'mask_proto_reweight_mask_loss': False, + 'mask_proto_grid_file': 'data/grid.npy', + 'mask_proto_use_grid': False, + 'mask_proto_coeff_gate': False, + 'mask_proto_prototypes_as_features': False, + 'mask_proto_prototypes_as_features_no_grad': False, + 'mask_proto_remove_empty_masks': False, + 'mask_proto_reweight_coeff': 1, + 'mask_proto_coeff_diversity_loss': False, + 'mask_proto_coeff_diversity_alpha': 1, + 'mask_proto_normalize_emulate_roi_pooling': False, + 'mask_proto_double_loss': False, + 'mask_proto_double_loss_alpha': 1, + 'mask_proto_split_prototypes_by_head': False, + 'mask_proto_crop_with_pred_box': False, + + # SSD data augmentation parameters + # Randomize hue, vibrance, etc. + 'augment_photometric_distort': True, + # Have a chance to scale down the image and pad (to emulate smaller detections) + 'augment_expand': True, + # Potentialy sample a random crop from the image and put it in a random place + 'augment_random_sample_crop': True, + # Mirror the image with a probability of 1/2 + 'augment_random_mirror': True, + # Flip the image vertically with a probability of 1/2 + 'augment_random_flip': False, + # With uniform probability, rotate the image [0,90,180,270] degrees + 'augment_random_rot90': False, + + # Discard detections with width and height smaller than this (in absolute width and height) + 'discard_box_width': 4 / 550, + 'discard_box_height': 4 / 550, + + # If using batchnorm anywhere in the backbone, freeze the batchnorm layer during training. + # Note: any additional batch norm layers after the backbone will not be frozen. + 'freeze_bn': False, + + # Set this to a config object if you want an FPN (inherit from fpn_base). See fpn_base for details. + 'fpn': None, + + # Use the same weights for each network head + 'share_prediction_module': False, + + # For hard negative mining, instead of using the negatives that are leastl confidently background, + # use negatives that are most confidently not background. + 'ohem_use_most_confident': False, + + # Use focal loss as described in https://arxiv.org/pdf/1708.02002.pdf instead of OHEM + 'use_focal_loss': False, + 'focal_loss_alpha': 0.25, + 'focal_loss_gamma': 2, + + # The initial bias toward forground objects, as specified in the focal loss paper + 'focal_loss_init_pi': 0.01, + + # Keeps track of the average number of examples for each class, and weights the loss for that class accordingly. + 'use_class_balanced_conf': False, + + # Whether to use sigmoid focal loss instead of softmax, all else being the same. + 'use_sigmoid_focal_loss': False, + + # Use class[0] to be the objectness score and class[1:] to be the softmax predicted class. + # Note: at the moment this is only implemented if use_focal_loss is on. + 'use_objectness_score': False, + + # Adds a global pool + fc layer to the smallest selected layer that predicts the existence of each of the 80 classes. + # This branch is only evaluated during training time and is just there for multitask learning. + 'use_class_existence_loss': False, + 'class_existence_alpha': 1, + + # Adds a 1x1 convolution directly to the biggest selected layer that predicts a semantic segmentations for each of the 80 classes. + # This branch is only evaluated during training time and is just there for multitask learning. + 'use_semantic_segmentation_loss': False, + 'semantic_segmentation_alpha': 1, + + # Adds another branch to the netwok to predict Mask IoU. + 'use_mask_scoring': False, + 'mask_scoring_alpha': 1, + + # Match gt boxes using the Box2Pix change metric instead of the standard IoU metric. + # Note that the threshold you set for iou_threshold should be negative with this setting on. + 'use_change_matching': False, + + # Uses the same network format as mask_proto_net, except this time it's for adding extra head layers before the final + # prediction in prediction modules. If this is none, no extra layers will be added. + 'extra_head_net': None, + + # What params should the final head layers have (the ones that predict box, confidence, and mask coeffs) + 'head_layer_params': {'kernel_size': 3, 'padding': 1}, + + # Add extra layers between the backbone and the network heads + # The order is (bbox, conf, mask) + 'extra_layers': (0, 0, 0), + + # During training, to match detections with gt, first compute the maximum gt IoU for each prior. + # Then, any of those priors whose maximum overlap is over the positive threshold, mark as positive. + # For any priors whose maximum is less than the negative iou threshold, mark them as negative. + # The rest are neutral and not used in calculating the loss. + 'positive_iou_threshold': 0.5, + 'negative_iou_threshold': 0.5, + + # When using ohem, the ratio between positives and negatives (3 means 3 negatives to 1 positive) + 'ohem_negpos_ratio': 3, + + # If less than 1, anchors treated as a negative that have a crowd iou over this threshold with + # the crowd boxes will be treated as a neutral. + 'crowd_iou_threshold': 1, + + # This is filled in at runtime by Yolact's __init__, so don't touch it + 'mask_dim': None, + + # Input image size. + 'max_size': 300, + + # Whether or not to do post processing on the cpu at test time + 'force_cpu_nms': True, + + # Whether to use mask coefficient cosine similarity nms instead of bbox iou nms + 'use_coeff_nms': False, + + # Whether or not to have a separate branch whose sole purpose is to act as the coefficients for coeff_diversity_loss + # Remember to turn on coeff_diversity_loss, or these extra coefficients won't do anything! + # To see their effect, also remember to turn on use_coeff_nms. + 'use_instance_coeff': False, + 'num_instance_coeffs': 64, + + # Whether or not to tie the mask loss / box loss to 0 + 'train_masks': True, + 'train_boxes': True, + # If enabled, the gt masks will be cropped using the gt bboxes instead of the predicted ones. + # This speeds up training time considerably but results in much worse mAP at test time. + 'use_gt_bboxes': False, + + # Whether or not to preserve aspect ratio when resizing the image. + # If True, this will resize all images to be max_size^2 pixels in area while keeping aspect ratio. + # If False, all images are resized to max_size x max_size + 'preserve_aspect_ratio': False, + + # Whether or not to use the prediction module (c) from DSSD + 'use_prediction_module': False, + + # Whether or not to use the predicted coordinate scheme from Yolo v2 + 'use_yolo_regressors': False, + + # For training, bboxes are considered "positive" if their anchors have a 0.5 IoU overlap + # or greater with a ground truth box. If this is true, instead of using the anchor boxes + # for this IoU computation, the matching function will use the predicted bbox coordinates. + # Don't turn this on if you're not using yolo regressors! + 'use_prediction_matching': False, + + # A list of settings to apply after the specified iteration. Each element of the list should look like + # (iteration, config_dict) where config_dict is a dictionary you'd pass into a config object's init. + 'delayed_settings': [], + + # Use command-line arguments to set this. + 'no_jit': False, + + 'backbone': None, + 'name': 'base_config', + + # Fast Mask Re-scoring Network + # Inspried by Mask Scoring R-CNN (https://arxiv.org/abs/1903.00241) + # Do not crop out the mask with bbox but slide a convnet on the image-size mask, + # then use global pooling to get the final mask score + 'use_maskiou': False, + + # Archecture for the mask iou network. A (num_classes-1, 1, {}) layer is appended to the end. + 'maskiou_net': [], + + # Discard predicted masks whose area is less than this + 'discard_mask_area': -1, + + 'maskiou_alpha': 1.0, + 'rescore_mask': False, + 'rescore_bbox': False, + 'maskious_to_train': -1, +}) + + + + + +# ----------------------- YOLACT v1.0 CONFIGS ----------------------- # + +yolact_base_config = coco_base_config.copy({ + 'name': 'yolact_base', + + # Dataset stuff + 'dataset': coco2017_dataset, + 'num_classes': len(coco2017_dataset.class_names) + 1, + + # Image Size + 'max_size': 550, + + # Training params + 'lr_steps': (280000, 600000, 700000, 750000), + 'max_iter': 1200000, + + # Backbone Settings + 'backbone': resnet101_backbone.copy({ + 'selected_layers': list(range(1, 4)), + 'use_pixel_scales': True, + 'preapply_sqrt': False, + 'use_square_anchors': True, # This is for backward compatability with a bug + + 'pred_aspect_ratios': [ [[1, 1/2, 2]] ]*5, + 'pred_scales': [[24], [48], [96], [192], [384]], + }), + + # FPN Settings + 'fpn': fpn_base.copy({ + 'use_conv_downsample': True, + 'num_downsample': 2, + }), + + # Mask Settings + 'mask_type': mask_type.lincomb, + 'mask_alpha': 6.125, + 'mask_proto_src': 0, + 'mask_proto_net': [(256, 3, {'padding': 1})] * 3 + [(None, -2, {}), (256, 3, {'padding': 1})] + [(32, 1, {})], + 'mask_proto_normalize_emulate_roi_pooling': True, + + # Other stuff + 'share_prediction_module': True, + 'extra_head_net': [(256, 3, {'padding': 1})], + + 'positive_iou_threshold': 0.5, + 'negative_iou_threshold': 0.4, + + 'crowd_iou_threshold': 0.7, + + 'use_semantic_segmentation_loss': True, +}) + +yolact_im400_config = yolact_base_config.copy({ + 'name': 'yolact_im400', + + 'max_size': 400, + 'backbone': yolact_base_config.backbone.copy({ + 'pred_scales': [[int(x[0] / yolact_base_config.max_size * 400)] for x in yolact_base_config.backbone.pred_scales], + }), +}) + +yolact_im700_config = yolact_base_config.copy({ + 'name': 'yolact_im700', + + 'masks_to_train': 300, + 'max_size': 700, + 'backbone': yolact_base_config.backbone.copy({ + 'pred_scales': [[int(x[0] / yolact_base_config.max_size * 700)] for x in yolact_base_config.backbone.pred_scales], + }), +}) + +yolact_darknet53_config = yolact_base_config.copy({ + 'name': 'yolact_darknet53', + + 'backbone': darknet53_backbone.copy({ + 'selected_layers': list(range(2, 5)), + + 'pred_scales': yolact_base_config.backbone.pred_scales, + 'pred_aspect_ratios': yolact_base_config.backbone.pred_aspect_ratios, + 'use_pixel_scales': True, + 'preapply_sqrt': False, + 'use_square_anchors': True, # This is for backward compatability with a bug + }), +}) + +yolact_resnet50_config = yolact_base_config.copy({ + 'name': 'yolact_resnet50', + + 'backbone': resnet50_backbone.copy({ + 'selected_layers': list(range(1, 4)), + + 'pred_scales': yolact_base_config.backbone.pred_scales, + 'pred_aspect_ratios': yolact_base_config.backbone.pred_aspect_ratios, + 'use_pixel_scales': True, + 'preapply_sqrt': False, + 'use_square_anchors': True, # This is for backward compatability with a bug + }), +}) + + +yolact_resnet50_pascal_config = yolact_resnet50_config.copy({ + 'name': None, # Will default to yolact_resnet50_pascal + + # Dataset stuff + 'dataset': pascal_sbd_dataset, + 'num_classes': len(pascal_sbd_dataset.class_names) + 1, + + 'max_iter': 120000, + 'lr_steps': (60000, 100000), + + 'backbone': yolact_resnet50_config.backbone.copy({ + 'pred_scales': [[32], [64], [128], [256], [512]], + 'use_square_anchors': False, + }) +}) + +# ----------------------- YOLACT++ CONFIGS ----------------------- # + +yolact_plus_base_config = yolact_base_config.copy({ + 'name': 'yolact_plus_base', + + 'backbone': resnet101_dcn_inter3_backbone.copy({ + 'selected_layers': list(range(1, 4)), + + 'pred_aspect_ratios': [ [[1, 1/2, 2]] ]*5, + 'pred_scales': [[i * 2 ** (j / 3.0) for j in range(3)] for i in [24, 48, 96, 192, 384]], + 'use_pixel_scales': True, + 'preapply_sqrt': False, + 'use_square_anchors': False, + }), + + 'use_maskiou': False, + 'maskiou_net': [(8, 3, {'stride': 2}), (16, 3, {'stride': 2}), (32, 3, {'stride': 2}), (64, 3, {'stride': 2}), (128, 3, {'stride': 2})], + 'maskiou_alpha': 25, + 'rescore_bbox': False, + 'rescore_mask': True, + + 'discard_mask_area': 5*5, +}) + +yolact_plus_resnet50_config = yolact_plus_base_config.copy({ + 'name': 'yolact_plus_resnet50', + + 'backbone': resnet50_dcnv2_backbone.copy({ + 'selected_layers': list(range(1, 4)), + + 'pred_aspect_ratios': [ [[1, 1/2, 2]] ]*5, + 'pred_scales': [[i * 2 ** (j / 3.0) for j in range(3)] for i in [24, 48, 96, 192, 384]], + 'use_pixel_scales': True, + 'preapply_sqrt': False, + 'use_square_anchors': False, + }), +}) +yolact_plus_resnet50_inf_config = yolact_plus_resnet50_config.copy({ + 'use_maskiou': True +}) + +# Default config +cfg = yolact_base_config.copy() + +def set_cfg(config_name:str): + """ Sets the active config. Works even if cfg is already imported! """ + global cfg + + # Note this is not just an eval because I'm lazy, but also because it can + # be used like ssd300_config.copy({'max_size': 400}) for extreme fine-tuning + cfg.replace(eval(config_name)) + + if cfg.name is None: + cfg.name = config_name.split('_config')[0] + +def set_dataset(dataset_name:str): + """ Sets the dataset of the current config. """ + cfg.dataset = eval(dataset_name) + diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/data/scripts/COCO.sh b/PyTorch/contrib/cv/detection/YOLACT_plus/data/scripts/COCO.sh index da03b184e476f515c5a2c75954e25de98181f997..799cb135fbdf5c0546601af8556837d8e2faea60 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/data/scripts/COCO.sh +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/data/scripts/COCO.sh @@ -1,70 +1,70 @@ -#!/bin/bash - -start=`date +%s` - -# handle optional download dir -if [ -z "$1" ] - then - # navigate to ./data - echo "navigating to ./data/ ..." - mkdir -p ./data - cd ./data/ - mkdir -p ./coco - cd ./coco - mkdir -p ./images - mkdir -p ./annotations - else - # check if specified dir is valid - if [ ! -d $1 ]; then - echo $1 " is not a valid directory" - exit 0 - fi - echo "navigating to " $1 " ..." - cd $1 -fi - -if [ ! -d images ] - then - mkdir -p ./images -fi - -# Download the image data. -cd ./images -echo "Downloading MSCOCO train images ..." -curl -LO http://images.cocodataset.org/zips/train2017.zip -echo "Downloading MSCOCO val images ..." -curl -LO http://images.cocodataset.org/zips/val2017.zip - -cd ../ -if [ ! -d annotations ] - then - mkdir -p ./annotations -fi - -# Download the annotation data. -cd ./annotations -echo "Downloading MSCOCO train/val annotations ..." -curl -LO http://images.cocodataset.org/annotations/annotations_trainval2014.zip -curl -LO http://images.cocodataset.org/annotations/annotations_trainval2017.zip -echo "Finished downloading. Now extracting ..." - -# Unzip data -echo "Extracting train images ..." -unzip -qqjd ../images ../images/train2017.zip -echo "Extracting val images ..." -unzip -qqjd ../images ../images/val2017.zip -echo "Extracting annotations ..." -unzip -qqd .. ./annotations_trainval2014.zip -unzip -qqd .. ./annotations_trainval2017.zip - -echo "Removing zip files ..." -rm ../images/train2017.zip -rm ../images/val2017.zip -rm ./annotations_trainval2014.zip -rm ./annotations_trainval2017.zip - - -end=`date +%s` -runtime=$((end-start)) - -echo "Completed in " $runtime " seconds" +#!/bin/bash + +start=`date +%s` + +# handle optional download dir +if [ -z "$1" ] + then + # navigate to ./data + echo "navigating to ./data/ ..." + mkdir -p ./data + cd ./data/ + mkdir -p ./coco + cd ./coco + mkdir -p ./images + mkdir -p ./annotations + else + # check if specified dir is valid + if [ ! -d $1 ]; then + echo $1 " is not a valid directory" + exit 0 + fi + echo "navigating to " $1 " ..." + cd $1 +fi + +if [ ! -d images ] + then + mkdir -p ./images +fi + +# Download the image data. +cd ./images +echo "Downloading MSCOCO train images ..." +curl -LO http://images.cocodataset.org/zips/train2017.zip +echo "Downloading MSCOCO val images ..." +curl -LO http://images.cocodataset.org/zips/val2017.zip + +cd ../ +if [ ! -d annotations ] + then + mkdir -p ./annotations +fi + +# Download the annotation data. +cd ./annotations +echo "Downloading MSCOCO train/val annotations ..." +curl -LO http://images.cocodataset.org/annotations/annotations_trainval2014.zip +curl -LO http://images.cocodataset.org/annotations/annotations_trainval2017.zip +echo "Finished downloading. Now extracting ..." + +# Unzip data +echo "Extracting train images ..." +unzip -qqjd ../images ../images/train2017.zip +echo "Extracting val images ..." +unzip -qqjd ../images ../images/val2017.zip +echo "Extracting annotations ..." +unzip -qqd .. ./annotations_trainval2014.zip +unzip -qqd .. ./annotations_trainval2017.zip + +echo "Removing zip files ..." +rm ../images/train2017.zip +rm ../images/val2017.zip +rm ./annotations_trainval2014.zip +rm ./annotations_trainval2017.zip + + +end=`date +%s` +runtime=$((end-start)) + +echo "Completed in " $runtime " seconds" diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/data/scripts/COCO_test.sh b/PyTorch/contrib/cv/detection/YOLACT_plus/data/scripts/COCO_test.sh index 56d337b4d9cf4c90c89b99d429ebda742a9e570d..c876e57edd3010a17e3bddfbb66a0f9fd2c06c06 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/data/scripts/COCO_test.sh +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/data/scripts/COCO_test.sh @@ -1,62 +1,62 @@ -#!/bin/bash - -start=`date +%s` - -# handle optional download dir -if [ -z "$1" ] - then - # navigate to ./data - echo "navigating to ./data/ ..." - mkdir -p ./data - cd ./data/ - mkdir -p ./coco - cd ./coco - mkdir -p ./images - mkdir -p ./annotations - else - # check if specified dir is valid - if [ ! -d $1 ]; then - echo $1 " is not a valid directory" - exit 0 - fi - echo "navigating to " $1 " ..." - cd $1 -fi - -if [ ! -d images ] - then - mkdir -p ./images -fi - -# Download the image data. -cd ./images -echo "Downloading MSCOCO test images ..." -curl -LO http://images.cocodataset.org/zips/test2017.zip - -cd ../ -if [ ! -d annotations ] - then - mkdir -p ./annotations -fi - -# Download the annotation data. -cd ./annotations -echo "Downloading MSCOCO test info ..." -curl -LO http://images.cocodataset.org/annotations/image_info_test2017.zip -echo "Finished downloading. Now extracting ..." - -# Unzip data -echo "Extracting train images ..." -unzip -qqjd ../images ../images/test2017.zip -echo "Extracting info ..." -unzip -qqd .. ./image_info_test2017.zip - -echo "Removing zip files ..." -rm ../images/test2017.zip -rm ./image_info_test2017.zip - - -end=`date +%s` -runtime=$((end-start)) - -echo "Completed in " $runtime " seconds" +#!/bin/bash + +start=`date +%s` + +# handle optional download dir +if [ -z "$1" ] + then + # navigate to ./data + echo "navigating to ./data/ ..." + mkdir -p ./data + cd ./data/ + mkdir -p ./coco + cd ./coco + mkdir -p ./images + mkdir -p ./annotations + else + # check if specified dir is valid + if [ ! -d $1 ]; then + echo $1 " is not a valid directory" + exit 0 + fi + echo "navigating to " $1 " ..." + cd $1 +fi + +if [ ! -d images ] + then + mkdir -p ./images +fi + +# Download the image data. +cd ./images +echo "Downloading MSCOCO test images ..." +curl -LO http://images.cocodataset.org/zips/test2017.zip + +cd ../ +if [ ! -d annotations ] + then + mkdir -p ./annotations +fi + +# Download the annotation data. +cd ./annotations +echo "Downloading MSCOCO test info ..." +curl -LO http://images.cocodataset.org/annotations/image_info_test2017.zip +echo "Finished downloading. Now extracting ..." + +# Unzip data +echo "Extracting train images ..." +unzip -qqjd ../images ../images/test2017.zip +echo "Extracting info ..." +unzip -qqd .. ./image_info_test2017.zip + +echo "Removing zip files ..." +rm ../images/test2017.zip +rm ./image_info_test2017.zip + + +end=`date +%s` +runtime=$((end-start)) + +echo "Completed in " $runtime " seconds" diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/data/scripts/mix_sets.py b/PyTorch/contrib/cv/detection/YOLACT_plus/data/scripts/mix_sets.py index 96e1a875890cdf265d0f4411b73e917093d43c5d..5f70eacb53ccace83f917e483e456fc8731594b0 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/data/scripts/mix_sets.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/data/scripts/mix_sets.py @@ -1,74 +1,74 @@ -import json -import os -import sys -from collections import defaultdict - -usage_text = """ -This script creates a coco annotation file by mixing one or more existing annotation files. - -Usage: python data/scripts/mix_sets.py output_name [set1 range1 [set2 range2 [...]]] - -To use, specify the output annotation name and any number of set + range pairs, where the sets -are in the form instances_.json and ranges are python-evalable ranges. The resulting -json will be spit out as instances_.json in the same folder as the input sets. - -For instance, - python data/scripts/mix_sets.py trainval35k train2014 : val2014 :-5000 - -This will create an instance_trainval35k.json file with all images and corresponding annotations -from train2014 and the first 35000 images from val2014. - -You can also specify only one set: - python data/scripts/mix_sets.py minival5k val2014 -5000: - -This will take the last 5k images from val2014 and put it in instances_minival5k.json. -""" - -annotations_path = 'data/coco/annotations/instances_%s.json' -fields_to_combine = ('images', 'annotations') -fields_to_steal = ('info', 'categories', 'licenses') - -if __name__ == '__main__': - if len(sys.argv) < 4 or len(sys.argv) % 2 != 0: - print(usage_text) - exit() - - out_name = sys.argv[1] - sets = sys.argv[2:] - sets = [(sets[2*i], sets[2*i+1]) for i in range(len(sets)//2)] - - out = {x: [] for x in fields_to_combine} - - for idx, (set_name, range_str) in enumerate(sets): - print('Loading set %s...' % set_name) - with open(annotations_path % set_name, 'r') as f: - set_json = json.load(f) - - # "Steal" some fields that don't need to be combined from the first set - if idx == 0: - for field in fields_to_steal: - out[field] = set_json[field] - - print('Building image index...') - image_idx = {x['id']: x for x in set_json['images']} - - print('Collecting annotations...') - anns_idx = defaultdict(lambda: []) - - for ann in set_json['annotations']: - anns_idx[ann['image_id']].append(ann) - - export_ids = list(image_idx.keys()) - export_ids.sort() - export_ids = eval('export_ids[%s]' % range_str, {}, {'export_ids': export_ids}) - - print('Adding %d images...' % len(export_ids)) - for _id in export_ids: - out['images'].append(image_idx[_id]) - out['annotations'] += anns_idx[_id] - - print('Done.\n') - - print('Saving result...') - with open(annotations_path % (out_name), 'w') as out_file: - json.dump(out, out_file) +import json +import os +import sys +from collections import defaultdict + +usage_text = """ +This script creates a coco annotation file by mixing one or more existing annotation files. + +Usage: python data/scripts/mix_sets.py output_name [set1 range1 [set2 range2 [...]]] + +To use, specify the output annotation name and any number of set + range pairs, where the sets +are in the form instances_.json and ranges are python-evalable ranges. The resulting +json will be spit out as instances_.json in the same folder as the input sets. + +For instance, + python data/scripts/mix_sets.py trainval35k train2014 : val2014 :-5000 + +This will create an instance_trainval35k.json file with all images and corresponding annotations +from train2014 and the first 35000 images from val2014. + +You can also specify only one set: + python data/scripts/mix_sets.py minival5k val2014 -5000: + +This will take the last 5k images from val2014 and put it in instances_minival5k.json. +""" + +annotations_path = 'data/coco/annotations/instances_%s.json' +fields_to_combine = ('images', 'annotations') +fields_to_steal = ('info', 'categories', 'licenses') + +if __name__ == '__main__': + if len(sys.argv) < 4 or len(sys.argv) % 2 != 0: + print(usage_text) + exit() + + out_name = sys.argv[1] + sets = sys.argv[2:] + sets = [(sets[2*i], sets[2*i+1]) for i in range(len(sets)//2)] + + out = {x: [] for x in fields_to_combine} + + for idx, (set_name, range_str) in enumerate(sets): + print('Loading set %s...' % set_name) + with open(annotations_path % set_name, 'r') as f: + set_json = json.load(f) + + # "Steal" some fields that don't need to be combined from the first set + if idx == 0: + for field in fields_to_steal: + out[field] = set_json[field] + + print('Building image index...') + image_idx = {x['id']: x for x in set_json['images']} + + print('Collecting annotations...') + anns_idx = defaultdict(lambda: []) + + for ann in set_json['annotations']: + anns_idx[ann['image_id']].append(ann) + + export_ids = list(image_idx.keys()) + export_ids.sort() + export_ids = eval('export_ids[%s]' % range_str, {}, {'export_ids': export_ids}) + + print('Adding %d images...' % len(export_ids)) + for _id in export_ids: + out['images'].append(image_idx[_id]) + out['annotations'] += anns_idx[_id] + + print('Done.\n') + + print('Saving result...') + with open(annotations_path % (out_name), 'w') as out_file: + json.dump(out, out_file) diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/deform_conv.py b/PyTorch/contrib/cv/detection/YOLACT_plus/deform_conv.py index bcf59fae04c35c6fc615afdf11077bcb50e52f4e..b1ed2c6203317008d838bcbab6e39650d7bb3849 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/deform_conv.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/deform_conv.py @@ -1,235 +1,235 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.nn.modules.utils import _pair, _single -import math - - -class ModulatedDeformConv2dFunction(Function): - - @staticmethod - def forward(ctx, - input_tensor, - offset_ori, - mask, - weight, - bias=None, - with_bias=False, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - sort_index_for_npu_fp=None, - sort_index_for_npu_bp=None, - ): - - input_tensor = input_tensor.float() - offset_ori = offset_ori.float() - mask = mask.float() - - ctx.stride = stride - ctx.padding = padding - ctx.dilation = dilation - ctx.groups = groups - ctx.deformable_groups = deformable_groups - ctx.sort_index_for_npu_bp = sort_index_for_npu_bp - ctx.with_bias = with_bias - - offset = offset_ori.index_select(1, sort_index_for_npu_fp) - offset_all = torch.cat([offset, mask], dim=1) - output, offset_out = torch.npu_deformable_conv2d( - input_tensor, weight, offset_all, bias, - kernel_size=[weight.shape[3], weight.shape[2]], - stride=[1, 1, ctx.stride, ctx.stride], - padding=[ctx.padding, ctx.padding, ctx.padding, ctx.padding], - dilation=[1, 1, ctx.dilation, ctx.dilation], - groups=ctx.groups, deformable_groups=ctx.deformable_groups, - modulated=True) - if weight.requires_grad or mask.requires_grad or offset.requires_grad \ - or input_tensor.requires_grad: - ctx.save_for_backward(input_tensor, weight, offset_out, offset_all) - return output - - @staticmethod - def backward(ctx, grad_output): - input_tensor, weight, offset_out, offset_all = ctx.saved_tensors - grad_input, grad_weight, grad_offset_all, grad_bias = torch.npu_deformable_conv2dbk( - input_tensor, grad_output, offset_out, weight, offset_all, - kernel_size=[weight.shape[3], weight.shape[2]], - stride=[1, 1, ctx.stride, ctx.stride], - padding=[ctx.padding, ctx.padding, ctx.padding, ctx.padding], - dilation=[1, 1, ctx.dilation, ctx.dilation], - groups=ctx.groups, deformable_groups=ctx.deformable_groups, modulated=True) - grad_offset = grad_offset_all.index_select(1, ctx.sort_index_for_npu_bp) - grad_mask = grad_offset_all[:, grad_offset.shape[1]:, :, :] - if not ctx.with_bias: - grad_bias = None - - return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, - None, None, None, None, None, None, None, None) - - -class ModulatedDeformConv(nn.Module): - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - bias=True, - pack=True, - ): - - r"""Applies an NPU based Modulated Deformable 2D convolution operation. - - Paper link: - [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168) - - Reference implementation link: - https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/modulated_deform_conv.py - - The implementation of this ModulatedDeformConv is mainly based - on the implementation of mmcv for design and reconstruction. - - In ModulatedDeformConvFunction, the forward and backward are customized, - and the input tensor is reconstructed ito match the NPU based function. - - It is worth mentioning that DeformConv(DCNv1) is also implemented - by setting modulated==False. Due to the difference between input - and initialization, there is no additional implementation here. - - .. note:: - ModulatedDeformConv only implements operations under fp32 data types. - Notice, weight and bias in conv_offset must be initialized to 0. - - Args: - in_channels (int): Number of channels in the input image. - out_channels (int): Number of channels produced by the convolution. - kernel_size(int, tuple): Size of the convolving kernel. - stride(int, tuple): Stride of the convolution. Default: 1. - padding (int or tuple): Zero-padding added to both sides of the input. - Default: 0. - dilation (int or tuple): Spacing between kernel elements. Default: 1. - groups (int): Number of blocked connections from input. - channels to output channels. Default: 1. - deform_groups (int): Number of deformable group partitions. - bias (bool): If True, adds a learnable bias to the output. Default: False. - pack (bool): If True, conv_offset and mask will be included in this module. Default: True. - - Examples:: - >>> m = ModulatedDeformConv(32, 32, 1) - >>> input_tensor = torch.randn(2, 32, 5, 5) - >>> output = m(input_tensor) - """ - - super(ModulatedDeformConv, self).__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = _pair(kernel_size) - self.stride = stride - self.padding = padding - self.dilation = dilation - self.groups = groups - self.deformable_groups = deformable_groups - self.with_bias = bias - self.pack = pack - - self.weight = nn.Parameter( - torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) - if bias: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.bias = torch.zeros(self.weight.shape[0]) - - if self.pack: - self.conv_offset_mask = nn.Conv2d( - self.in_channels, - self.deformable_groups * 3 * self.kernel_size[0] * - self.kernel_size[1], - kernel_size=self.kernel_size, - stride=_pair(self.stride), - padding=_pair(self.padding), - bias=True) - - self.split_num = self.deformable_groups * 2 * self.kernel_size[0] * self.kernel_size[1] - sort_index_for_npu = list(range(self.split_num)) - sort_index_for_npu_fp = sort_index_for_npu[1::2] + sort_index_for_npu[::2] - sort_index_for_npu_bp_dict = {i: idx for idx, i in enumerate(sort_index_for_npu_fp)} - sort_index_for_npu_bp = [sort_index_for_npu_bp_dict[i] for i in sort_index_for_npu] - self.sort_index_for_npu_fp = torch.IntTensor(sort_index_for_npu_fp) - self.sort_index_for_npu_bp = torch.IntTensor(sort_index_for_npu_bp) - self.sort_index_for_npu_todevice = False - - self.init_param() - - def init_param(self): - n = self.in_channels - for k in self.kernel_size: - n *= k - stdv = 1. / math.sqrt(n) - self.weight.data.uniform_(-stdv, stdv) - if self.bias is not None: - self.bias.data.zero_() - - if self.pack: - self.conv_offset_mask.weight.data.zero_() - self.conv_offset_mask.bias.data.zero_() - - def forward(self, x): - if self.pack: - out = self.conv_offset_mask(x) - offset = out[:, :self.split_num, ...] - mask = torch.sigmoid(out[:, self.split_num:, ...]) - else: - x, offset, mask = x - - if not self.sort_index_for_npu_todevice: - self.sort_index_for_npu_fp = self.sort_index_for_npu_fp.to(x.device) - self.sort_index_for_npu_bp = self.sort_index_for_npu_bp.to(x.device) - self.bias = self.bias.to(x.device) - self.sort_index_for_npu_todevice = True - - return ModulatedDeformConv2dFunction.apply( - x, offset, mask, self.weight, self.bias, self.with_bias, - self.stride, self.padding, self.dilation, - self.groups, self.deformable_groups, - self.sort_index_for_npu_fp, - self.sort_index_for_npu_bp, - ) - - -DCNv2 = ModulatedDeformConv - -if __name__ == "__main__": - x = torch.randn(2, 32, 7, 7) - model = DCNv2(32, 32, 3, 2, 1) - - torch.npu.set_device(0) - x = x.npu() - model = model.npu() - - o = model(x) - l = o.sum() - l.backward() - print(l) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.nn.modules.utils import _pair, _single +import math + + +class ModulatedDeformConv2dFunction(Function): + + @staticmethod + def forward(ctx, + input_tensor, + offset_ori, + mask, + weight, + bias=None, + with_bias=False, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + sort_index_for_npu_fp=None, + sort_index_for_npu_bp=None, + ): + + input_tensor = input_tensor.float() + offset_ori = offset_ori.float() + mask = mask.float() + + ctx.stride = stride + ctx.padding = padding + ctx.dilation = dilation + ctx.groups = groups + ctx.deformable_groups = deformable_groups + ctx.sort_index_for_npu_bp = sort_index_for_npu_bp + ctx.with_bias = with_bias + + offset = offset_ori.index_select(1, sort_index_for_npu_fp) + offset_all = torch.cat([offset, mask], dim=1) + output, offset_out = torch.npu_deformable_conv2d( + input_tensor, weight, offset_all, bias, + kernel_size=[weight.shape[3], weight.shape[2]], + stride=[1, 1, ctx.stride, ctx.stride], + padding=[ctx.padding, ctx.padding, ctx.padding, ctx.padding], + dilation=[1, 1, ctx.dilation, ctx.dilation], + groups=ctx.groups, deformable_groups=ctx.deformable_groups, + modulated=True) + if weight.requires_grad or mask.requires_grad or offset.requires_grad \ + or input_tensor.requires_grad: + ctx.save_for_backward(input_tensor, weight, offset_out, offset_all) + return output + + @staticmethod + def backward(ctx, grad_output): + input_tensor, weight, offset_out, offset_all = ctx.saved_tensors + grad_input, grad_weight, grad_offset_all, grad_bias = torch.npu_deformable_conv2dbk( + input_tensor, grad_output, offset_out, weight, offset_all, + kernel_size=[weight.shape[3], weight.shape[2]], + stride=[1, 1, ctx.stride, ctx.stride], + padding=[ctx.padding, ctx.padding, ctx.padding, ctx.padding], + dilation=[1, 1, ctx.dilation, ctx.dilation], + groups=ctx.groups, deformable_groups=ctx.deformable_groups, modulated=True) + grad_offset = grad_offset_all.index_select(1, ctx.sort_index_for_npu_bp) + grad_mask = grad_offset_all[:, grad_offset.shape[1]:, :, :] + if not ctx.with_bias: + grad_bias = None + + return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, + None, None, None, None, None, None, None, None) + + +class ModulatedDeformConv(nn.Module): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + bias=True, + pack=True, + ): + + r"""Applies an NPU based Modulated Deformable 2D convolution operation. + + Paper link: + [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168) + + Reference implementation link: + https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/modulated_deform_conv.py + + The implementation of this ModulatedDeformConv is mainly based + on the implementation of mmcv for design and reconstruction. + + In ModulatedDeformConvFunction, the forward and backward are customized, + and the input tensor is reconstructed ito match the NPU based function. + + It is worth mentioning that DeformConv(DCNv1) is also implemented + by setting modulated==False. Due to the difference between input + and initialization, there is no additional implementation here. + + .. note:: + ModulatedDeformConv only implements operations under fp32 data types. + Notice, weight and bias in conv_offset must be initialized to 0. + + Args: + in_channels (int): Number of channels in the input image. + out_channels (int): Number of channels produced by the convolution. + kernel_size(int, tuple): Size of the convolving kernel. + stride(int, tuple): Stride of the convolution. Default: 1. + padding (int or tuple): Zero-padding added to both sides of the input. + Default: 0. + dilation (int or tuple): Spacing between kernel elements. Default: 1. + groups (int): Number of blocked connections from input. + channels to output channels. Default: 1. + deform_groups (int): Number of deformable group partitions. + bias (bool): If True, adds a learnable bias to the output. Default: False. + pack (bool): If True, conv_offset and mask will be included in this module. Default: True. + + Examples:: + >>> m = ModulatedDeformConv(32, 32, 1) + >>> input_tensor = torch.randn(2, 32, 5, 5) + >>> output = m(input_tensor) + """ + + super(ModulatedDeformConv, self).__init__() + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.deformable_groups = deformable_groups + self.with_bias = bias + self.pack = pack + + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_channels)) + else: + self.bias = torch.zeros(self.weight.shape[0]) + + if self.pack: + self.conv_offset_mask = nn.Conv2d( + self.in_channels, + self.deformable_groups * 3 * self.kernel_size[0] * + self.kernel_size[1], + kernel_size=self.kernel_size, + stride=_pair(self.stride), + padding=_pair(self.padding), + bias=True) + + self.split_num = self.deformable_groups * 2 * self.kernel_size[0] * self.kernel_size[1] + sort_index_for_npu = list(range(self.split_num)) + sort_index_for_npu_fp = sort_index_for_npu[1::2] + sort_index_for_npu[::2] + sort_index_for_npu_bp_dict = {i: idx for idx, i in enumerate(sort_index_for_npu_fp)} + sort_index_for_npu_bp = [sort_index_for_npu_bp_dict[i] for i in sort_index_for_npu] + self.sort_index_for_npu_fp = torch.IntTensor(sort_index_for_npu_fp) + self.sort_index_for_npu_bp = torch.IntTensor(sort_index_for_npu_bp) + self.sort_index_for_npu_todevice = False + + self.init_param() + + def init_param(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.zero_() + + if self.pack: + self.conv_offset_mask.weight.data.zero_() + self.conv_offset_mask.bias.data.zero_() + + def forward(self, x): + if self.pack: + out = self.conv_offset_mask(x) + offset = out[:, :self.split_num, ...] + mask = torch.sigmoid(out[:, self.split_num:, ...]) + else: + x, offset, mask = x + + if not self.sort_index_for_npu_todevice: + self.sort_index_for_npu_fp = self.sort_index_for_npu_fp.to(x.device) + self.sort_index_for_npu_bp = self.sort_index_for_npu_bp.to(x.device) + self.bias = self.bias.to(x.device) + self.sort_index_for_npu_todevice = True + + return ModulatedDeformConv2dFunction.apply( + x, offset, mask, self.weight, self.bias, self.with_bias, + self.stride, self.padding, self.dilation, + self.groups, self.deformable_groups, + self.sort_index_for_npu_fp, + self.sort_index_for_npu_bp, + ) + + +DCNv2 = ModulatedDeformConv + +if __name__ == "__main__": + x = torch.randn(2, 32, 7, 7) + model = DCNv2(32, 32, 3, 2, 1) + + torch.npu.set_device(0) + x = x.npu() + model = model.npu() + + o = model(x) + l = o.sum() + l.backward() + print(l) diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/eval.py b/PyTorch/contrib/cv/detection/YOLACT_plus/eval.py index 72264fd183a8743cae7b6c773509fa5fc4222be1..b57372aacaf16eb51559224fec25b00f8b7cf061 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/eval.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/eval.py @@ -1,1158 +1,1158 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -from data import COCODetection, get_label_map, MEANS, COLORS -from yolact import Yolact -from utils.augmentations import BaseTransform, FastBaseTransform, Resize -from utils.functions import MovingAverage, ProgressBar -from layers.box_utils import jaccard, center_size, mask_iou -from utils import timer -from utils.functions import SavePath -from layers.output_utils import postprocess, undo_image_transformation -import pycocotools - -from data import cfg, set_cfg, set_dataset -from tqdm import tqdm -import numpy as np -import torch -import torch.backends.cudnn as cudnn -from torch.autograd import Variable -import argparse -import time -import random -import cProfile -import pickle -import json -import os -from collections import defaultdict -from pathlib import Path -from collections import OrderedDict -from PIL import Image - -import matplotlib.pyplot as plt -import cv2 - -def str2bool(v): - if v.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - -def parse_args(argv=None): - parser = argparse.ArgumentParser( - description='YOLACT COCO Evaluation') - parser.add_argument('--trained_model', - default='weights/ssd300_mAP_77.43_v2.pth', type=str, - help='Trained state_dict file path to open. If "interrupt", this will open the interrupt file.') - parser.add_argument('--data_path', default=None, type=str, - help='data path') - parser.add_argument('--top_k', default=5, type=int, - help='Further restrict the number of predictions to parse') - parser.add_argument('--cuda', default=True, type=str2bool, - help='Use cuda to evaulate model') - parser.add_argument('--fast_nms', default=True, type=str2bool, - help='Whether to use a faster, but not entirely correct version of NMS.') - parser.add_argument('--cross_class_nms', default=False, type=str2bool, - help='Whether compute NMS cross-class or per-class.') - parser.add_argument('--display_masks', default=True, type=str2bool, - help='Whether or not to display masks over bounding boxes') - parser.add_argument('--display_bboxes', default=True, type=str2bool, - help='Whether or not to display bboxes around masks') - parser.add_argument('--display_text', default=True, type=str2bool, - help='Whether or not to display text (class [score])') - parser.add_argument('--display_scores', default=True, type=str2bool, - help='Whether or not to display scores in addition to classes') - parser.add_argument('--display', dest='display', action='store_true', - help='Display qualitative results instead of quantitative ones.') - parser.add_argument('--shuffle', dest='shuffle', action='store_true', - help='Shuffles the images when displaying them. Doesn\'t have much of an effect when display is off though.') - parser.add_argument('--ap_data_file', default='results/ap_data.pkl', type=str, - help='In quantitative mode, the file to save detections before calculating mAP.') - parser.add_argument('--resume', dest='resume', action='store_true', - help='If display not set, this resumes mAP calculations from the ap_data_file.') - parser.add_argument('--max_images', default=-1, type=int, - help='The maximum number of images from the dataset to consider. Use -1 for all.') - parser.add_argument('--output_coco_json', dest='output_coco_json', action='store_true', - help='If display is not set, instead of processing IoU values, this just dumps detections into the coco json file.') - parser.add_argument('--bbox_det_file', default='results/bbox_detections.json', type=str, - help='The output file for coco bbox results if --coco_results is set.') - parser.add_argument('--mask_det_file', default='results/mask_detections.json', type=str, - help='The output file for coco mask results if --coco_results is set.') - parser.add_argument('--config', default='yolact_plus_resnet50_config', - help='The config object to use.') - parser.add_argument('--output_web_json', dest='output_web_json', action='store_true', - help='If display is not set, instead of processing IoU values, this dumps detections for usage with the detections viewer web thingy.') - parser.add_argument('--web_det_path', default='web/dets/', type=str, - help='If output_web_json is set, this is the path to dump detections into.') - parser.add_argument('--no_bar', dest='no_bar', action='store_true', - help='Do not output the status bar. This is useful for when piping to a file.') - parser.add_argument('--display_lincomb', default=False, type=str2bool, - help='If the config uses lincomb masks, output a visualization of how those masks are created.') - parser.add_argument('--benchmark', default=False, dest='benchmark', action='store_true', - help='Equivalent to running display mode but without displaying an image.') - parser.add_argument('--no_sort', default=False, dest='no_sort', action='store_true', - help='Do not sort images by hashed image ID.') - parser.add_argument('--seed', default=None, type=int, - help='The seed to pass into random.seed. Note: this is only really for the shuffle and does not (I think) affect cuda stuff.') - parser.add_argument('--mask_proto_debug', default=False, dest='mask_proto_debug', action='store_true', - help='Outputs stuff for scripts/compute_mask.py.') - parser.add_argument('--no_crop', default=False, dest='crop', action='store_false', - help='Do not crop output masks with the predicted bounding box.') - parser.add_argument('--image', default=None, type=str, - help='A path to an image to use for display.') - parser.add_argument('--images', default=None, type=str, - help='An input folder of images and output folder to save detected images. Should be in the format input->output.') - parser.add_argument('--video', default=None, type=str, - help='A path to a video to evaluate on. Passing in a number will use that index webcam.') - parser.add_argument('--video_multiframe', default=1, type=int, - help='The number of frames to evaluate in parallel to make videos play at higher fps.') - parser.add_argument('--score_threshold', default=0, type=float, - help='Detections with a score under this threshold will not be considered. This currently only works in display mode.') - parser.add_argument('--dataset', default=None, type=str, - help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).') - parser.add_argument('--detect', default=False, dest='detect', action='store_true', - help='Don\'t evauluate the mask branch at all and only do object detection. This only works for --display and --benchmark.') - parser.add_argument('--display_fps', default=False, dest='display_fps', action='store_true', - help='When displaying / saving video, draw the FPS on the frame') - parser.add_argument('--emulate_playback', default=False, dest='emulate_playback', action='store_true', - help='When saving a video, emulate the framerate that you\'d get running in real-time mode.') - parser.add_argument('--rank_id', default=0, type=int) - parser.set_defaults(no_bar=False, display=False, resume=False, output_coco_json=False, output_web_json=False, shuffle=False, - benchmark=False, no_sort=False, no_hash=False, mask_proto_debug=False, crop=True, detect=False, display_fps=False, - emulate_playback=False) - - global args - args = parser.parse_args(argv) - - if args.output_web_json: - args.output_coco_json = True - - if args.seed is not None: - random.seed(args.seed) - -iou_thresholds = [x / 100 for x in range(50, 100, 5)] -coco_cats = {} # Call prep_coco_cats to fill this -coco_cats_inv = {} -color_cache = defaultdict(lambda: {}) - -def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): - """ - Note: If undo_transform=False then im_h and im_w are allowed to be None. - """ - if undo_transform: - img_numpy = undo_image_transformation(img, w, h) - if args.cuda: - img_gpu = torch.Tensor(img_numpy).npu() - else: - img_gpu = torch.Tensor(img_numpy) - print('img_shape:', img_gpu) - else: - img_gpu = img / 255.0 - h, w, _ = img.shape - print('h, w, _ :', img.shape) - - with timer.env('Postprocess'): - save = cfg.rescore_bbox - cfg.rescore_bbox = True - t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb, - crop_masks = args.crop, - score_threshold = args.score_threshold) - cfg.rescore_bbox = save - - with timer.env('Copy'): - idx = t[1].argsort(0, descending=True)[:args.top_k] - - if cfg.eval_mask_branch: - # Masks are drawn on the GPU, so don't copy - masks = t[3][idx] - classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] - - num_dets_to_consider = min(args.top_k, classes.shape[0]) - for j in range(num_dets_to_consider): - if scores[j] < args.score_threshold: - num_dets_to_consider = j - break - - # Quick and dirty lambda for selecting the color for a particular index - # Also keeps track of a per-gpu color cache for maximum speed - def get_color(j, on_gpu=None): - global color_cache - color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) - - if on_gpu is not None and color_idx in color_cache[on_gpu]: - return color_cache[on_gpu][color_idx] - else: - color = COLORS[color_idx] - if not undo_transform: - # The image might come in as RGB or BRG, depending - color = (color[2], color[1], color[0]) - if on_gpu is not None: - color = torch.Tensor(color).to(on_gpu).float() / 255. - color_cache[on_gpu][color_idx] = color - return color - - # First, draw the masks on the GPU where we can do it really fast - # Beware: very fast but possibly unintelligible mask-drawing code ahead - # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice - if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: - # After this, mask is of size [num_dets, h, w, 1] - masks = masks[:num_dets_to_consider, :, :, None] - - # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) - colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0) - masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha - - # This is 1 everywhere except for 1-mask_alpha where the mask is - inv_alph_masks = masks * (-mask_alpha) + 1 - - # I did the math for this on pen and paper. This whole block should be equivalent to: - # for j in range(num_dets_to_consider): - # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] - masks_color_summand = masks_color[0] - if num_dets_to_consider > 1: - inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0) - masks_color_cumul = masks_color[1:] * inv_alph_cumul - masks_color_summand += masks_color_cumul.sum(dim=0) - - img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand - - if args.display_fps: - # Draw the box for the fps on the GPU - font_face = cv2.FONT_HERSHEY_DUPLEX - font_scale = 0.6 - font_thickness = 1 - - text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] - - img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha - - - # Then draw the stuff that needs to be done on the cpu - # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason - img_numpy = (img_gpu * 255).byte().cpu().numpy() - - if args.display_fps: - # Draw the text on the CPU - text_pt = (4, text_h + 2) - text_color = [255, 255, 255] - - cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) - - if num_dets_to_consider == 0: - return img_numpy - - if args.display_text or args.display_bboxes: - for j in reversed(range(num_dets_to_consider)): - x1, y1, x2, y2 = boxes[j, :] - color = get_color(j) - score = scores[j] - - if args.display_bboxes: - cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) - - if args.display_text: - _class = cfg.dataset.class_names[classes[j]] - text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class - - font_face = cv2.FONT_HERSHEY_DUPLEX - font_scale = 0.6 - font_thickness = 1 - - text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] - - text_pt = (x1, y1 - 3) - text_color = [255, 255, 255] - - cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) - cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) - - - return img_numpy - -def prep_benchmark(dets_out, h, w): - with timer.env('Postprocess'): - t = postprocess(dets_out, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) - - with timer.env('Copy'): - classes, scores, boxes, masks = [x[:args.top_k] for x in t] - if isinstance(scores, list): - box_scores = scores[0].cpu().numpy() - mask_scores = scores[1].cpu().numpy() - else: - scores = scores.cpu().numpy() - classes = classes.cpu().numpy() - boxes = boxes.cpu().numpy() - masks = masks.cpu().numpy() - - with timer.env('Sync'): - # Just in case - torch.npu.synchronize() - -def prep_coco_cats(): - """ Prepare inverted table for category id lookup given a coco cats object. """ - for coco_cat_id, transformed_cat_id_p1 in get_label_map().items(): - transformed_cat_id = transformed_cat_id_p1 - 1 - coco_cats[transformed_cat_id] = coco_cat_id - coco_cats_inv[coco_cat_id] = transformed_cat_id - - -def get_coco_cat(transformed_cat_id): - """ transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """ - return coco_cats[transformed_cat_id] - -def get_transformed_cat(coco_cat_id): - """ transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """ - return coco_cats_inv[coco_cat_id] - - -class Detections: - - def __init__(self): - self.bbox_data = [] - self.mask_data = [] - - def add_bbox(self, image_id:int, category_id:int, bbox:list, score:float): - """ Note that bbox should be a list or tuple of (x1, y1, x2, y2) """ - bbox = [bbox[0], bbox[1], bbox[2]-bbox[0], bbox[3]-bbox[1]] - - # Round to the nearest 10th to avoid huge file sizes, as COCO suggests - bbox = [round(float(x)*10)/10 for x in bbox] - - self.bbox_data.append({ - 'image_id': int(image_id), - 'category_id': get_coco_cat(int(category_id)), - 'bbox': bbox, - 'score': float(score) - }) - - def add_mask(self, image_id:int, category_id:int, segmentation:np.ndarray, score:float): - """ The segmentation should be the full mask, the size of the image and with size [h, w]. """ - rle = pycocotools.mask.encode(np.asfortranarray(segmentation.astype(np.uint8))) - rle['counts'] = rle['counts'].decode('ascii') # json.dump doesn't like bytes strings - - self.mask_data.append({ - 'image_id': int(image_id), - 'category_id': get_coco_cat(int(category_id)), - 'segmentation': rle, - 'score': float(score) - }) - - def dump(self): - dump_arguments = [ - (self.bbox_data, args.bbox_det_file), - (self.mask_data, args.mask_det_file) - ] - - for data, path in dump_arguments: - with open(path, 'w') as f: - json.dump(data, f) - - def dump_web(self): - """ Dumps it in the format for my web app. Warning: bad code ahead! """ - config_outs = ['preserve_aspect_ratio', 'use_prediction_module', - 'use_yolo_regressors', 'use_prediction_matching', - 'train_masks'] - - output = { - 'info' : { - 'Config': {key: getattr(cfg, key) for key in config_outs}, - } - } - - image_ids = list(set([x['image_id'] for x in self.bbox_data])) - image_ids.sort() - image_lookup = {_id: idx for idx, _id in enumerate(image_ids)} - - output['images'] = [{'image_id': image_id, 'dets': []} for image_id in image_ids] - - # These should already be sorted by score with the way prep_metrics works. - for bbox, mask in zip(self.bbox_data, self.mask_data): - image_obj = output['images'][image_lookup[bbox['image_id']]] - image_obj['dets'].append({ - 'score': bbox['score'], - 'bbox': bbox['bbox'], - 'category': cfg.dataset.class_names[get_transformed_cat(bbox['category_id'])], - 'mask': mask['segmentation'], - }) - - with open(os.path.join(args.web_det_path, '%s.json' % cfg.name), 'w') as f: - json.dump(output, f) - - - - -def _mask_iou(mask1, mask2, iscrowd=False): - with timer.env('Mask IoU'): - ret = mask_iou(mask1, mask2, iscrowd) - return ret.cpu() - -def _bbox_iou(bbox1, bbox2, iscrowd=False): - with timer.env('BBox IoU'): - ret = jaccard(bbox1, bbox2, iscrowd) - return ret.cpu() - -def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, detections:Detections=None): - """ Returns a list of APs for this image, with each element being for a class """ - if not args.output_coco_json: - with timer.env('Prepare gt'): - gt_boxes = torch.Tensor(gt[:, :4]) if args.cuda else torch.Tensor(gt[:, :4]) - gt_boxes[:, [0, 2]] *= w - gt_boxes[:, [1, 3]] *= h - gt_classes = list(gt[:, 4].astype(int)) - gt_masks = torch.Tensor(gt_masks).view(-1, h*w) if args.cuda else torch.Tensor(gt_masks).view(-1, h*w) - - if num_crowd > 0: - split = lambda x: (x[-num_crowd:], x[:-num_crowd]) - crowd_boxes , gt_boxes = split(gt_boxes) - crowd_masks , gt_masks = split(gt_masks) - crowd_classes, gt_classes = split(gt_classes) - - with timer.env('Postprocess'): - classes, scores, boxes, masks = postprocess(dets, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) - - if classes.size(0) == 0: - return - - classes = list(classes.cpu().numpy().astype(int)) - if isinstance(scores, list): - box_scores = list(scores[0].cpu().numpy().astype(float)) - mask_scores = list(scores[1].cpu().numpy().astype(float)) - else: - scores = list(scores.cpu().numpy().astype(float)) - box_scores = scores - mask_scores = scores - # if args.cuda: - # masks = masks.view(-1, h*w).npu() - # boxes = boxes.npu() - # else: - masks = masks.view(-1, h*w) - - - if args.output_coco_json: - with timer.env('JSON Output'): - boxes = boxes.cpu().numpy() - masks = masks.view(-1, h, w).cpu().numpy() - for i in range(masks.shape[0]): - # Make sure that the bounding box actually makes sense and a mask was produced - if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0: - detections.add_bbox(image_id, classes[i], boxes[i,:], box_scores[i]) - detections.add_mask(image_id, classes[i], masks[i,:,:], mask_scores[i]) - return - - with timer.env('Eval Setup'): - num_pred = len(classes) - num_gt = len(gt_classes) - - mask_iou_cache = _mask_iou(masks, gt_masks) - bbox_iou_cache = _bbox_iou(boxes.float(), gt_boxes.float()) - - if num_crowd > 0: - crowd_mask_iou_cache = _mask_iou(masks, crowd_masks, iscrowd=True) - crowd_bbox_iou_cache = _bbox_iou(boxes.float(), crowd_boxes.float(), iscrowd=True) - else: - crowd_mask_iou_cache = None - crowd_bbox_iou_cache = None - - box_indices = sorted(range(num_pred), key=lambda i: -box_scores[i]) - mask_indices = sorted(box_indices, key=lambda i: -mask_scores[i]) - - iou_types = [ - ('box', lambda i,j: bbox_iou_cache[i, j].item(), - lambda i,j: crowd_bbox_iou_cache[i,j].item(), - lambda i: box_scores[i], box_indices), - ('mask', lambda i,j: mask_iou_cache[i, j].item(), - lambda i,j: crowd_mask_iou_cache[i,j].item(), - lambda i: mask_scores[i], mask_indices) - ] - - timer.start('Main loop') - for _class in set(classes + gt_classes): - ap_per_iou = [] - num_gt_for_class = sum([1 for x in gt_classes if x == _class]) - - for iouIdx in range(len(iou_thresholds)): - iou_threshold = iou_thresholds[iouIdx] - - for iou_type, iou_func, crowd_func, score_func, indices in iou_types: - gt_used = [False] * len(gt_classes) - - ap_obj = ap_data[iou_type][iouIdx][_class] - ap_obj.add_gt_positives(num_gt_for_class) - - for i in indices: - if classes[i] != _class: - continue - - max_iou_found = iou_threshold - max_match_idx = -1 - for j in range(num_gt): - if gt_used[j] or gt_classes[j] != _class: - continue - - iou = iou_func(i, j) - - if iou > max_iou_found: - max_iou_found = iou - max_match_idx = j - - if max_match_idx >= 0: - gt_used[max_match_idx] = True - ap_obj.push(score_func(i), True) - else: - # If the detection matches a crowd, we can just ignore it - matched_crowd = False - - if num_crowd > 0: - for j in range(len(crowd_classes)): - if crowd_classes[j] != _class: - continue - - iou = crowd_func(i, j) - - if iou > iou_threshold: - matched_crowd = True - break - - # All this crowd code so that we can make sure that our eval code gives the - # same result as COCOEval. There aren't even that many crowd annotations to - # begin with, but accuracy is of the utmost importance. - if not matched_crowd: - ap_obj.push(score_func(i), False) - timer.stop('Main loop') - - -class APDataObject: - """ - Stores all the information necessary to calculate the AP for one IoU and one class. - Note: I type annotated this because why not. - """ - - def __init__(self): - self.data_points = [] - self.num_gt_positives = 0 - - def push(self, score:float, is_true:bool): - self.data_points.append((score, is_true)) - - def add_gt_positives(self, num_positives:int): - """ Call this once per image. """ - self.num_gt_positives += num_positives - - def is_empty(self) -> bool: - return len(self.data_points) == 0 and self.num_gt_positives == 0 - - def get_ap(self) -> float: - """ Warning: result not cached. """ - - if self.num_gt_positives == 0: - return 0 - - # Sort descending by score - self.data_points.sort(key=lambda x: -x[0]) - - precisions = [] - recalls = [] - num_true = 0 - num_false = 0 - - # Compute the precision-recall curve. The x axis is recalls and the y axis precisions. - for datum in self.data_points: - # datum[1] is whether the detection a true or false positive - if datum[1]: num_true += 1 - else: num_false += 1 - - precision = num_true / (num_true + num_false) - recall = num_true / self.num_gt_positives - - precisions.append(precision) - recalls.append(recall) - - # Smooth the curve by computing [max(precisions[i:]) for i in range(len(precisions))] - # Basically, remove any temporary dips from the curve. - # At least that's what I think, idk. COCOEval did it so I do too. - for i in range(len(precisions)-1, 0, -1): - if precisions[i] > precisions[i-1]: - precisions[i-1] = precisions[i] - - # Compute the integral of precision(recall) d_recall from recall=0->1 using fixed-length riemann summation with 101 bars. - y_range = [0] * 101 # idx 0 is recall == 0.0 and idx 100 is recall == 1.00 - x_range = np.array([x / 100 for x in range(101)]) - recalls = np.array(recalls) - - # I realize this is weird, but all it does is find the nearest precision(x) for a given x in x_range. - # Basically, if the closest recall we have to 0.01 is 0.009 this sets precision(0.01) = precision(0.009). - # I approximate the integral this way, because that's how COCOEval does it. - indices = np.searchsorted(recalls, x_range, side='left') - for bar_idx, precision_idx in enumerate(indices): - if precision_idx < len(precisions): - y_range[bar_idx] = precisions[precision_idx] - - # Finally compute the riemann sum to get our integral. - # avg([precision(x) for x in 0:0.01:1]) - return sum(y_range) / len(y_range) - -def badhash(x): - """ - Just a quick and dirty hash function for doing a deterministic shuffle based on image_id. - - Source: - https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key - """ - x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF - x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF - x = ((x >> 16) ^ x) & 0xFFFFFFFF - return x - -def evalimage(net:Yolact, path:str, save_path:str=None): - if args.cuda: - frame = torch.from_numpy(cv2.imread(path)).npu().float() - else: - frame = torch.from_numpy(cv2.imread(path)).float() - print('frame: ', frame) - batch = FastBaseTransform()(frame.unsqueeze(0)) - print('batch_pred:', batch) - print('batch_shape:', batch.shape) - preds = net(batch) - - img_numpy = prep_display(preds, frame, None, None, undo_transform=False) - - if save_path is None: - img_numpy = img_numpy[:, :, (2, 1, 0)] - - if save_path is None: - plt.imshow(img_numpy) - plt.title(path) - plt.show() - else: - cv2.imwrite(save_path, img_numpy) - -def evalimages(net:Yolact, input_folder:str, output_folder:str): - if not os.path.exists(output_folder): - os.mkdir(output_folder) - - print() - for p in Path(input_folder).glob('*'): - path = str(p) - name = os.path.basename(path) - name = '.'.join(name.split('.')[:-1]) + '.png' - out_path = os.path.join(output_folder, name) - - evalimage(net, path, out_path) - print(path + ' -> ' + out_path) - print('Done.') - -from multiprocessing.pool import ThreadPool -from queue import Queue - -class CustomDataParallel(torch.nn.DataParallel): - """ A Custom Data Parallel class that properly gathers lists of dictionaries. """ - def gather(self, outputs, output_device): - # Note that I don't actually want to convert everything to the output_device - return sum(outputs, []) - -def evalvideo(net:Yolact, path:str, out_path:str=None): - # If the path is a digit, parse it as a webcam index - is_webcam = path.isdigit() - - # If the input image size is constant, this make things faster (hence why we can use it in a video setting). - cudnn.benchmark = True - - if is_webcam: - vid = cv2.VideoCapture(int(path)) - else: - vid = cv2.VideoCapture(path) - - if not vid.isOpened(): - print('Could not open video "%s"' % path) - exit(-1) - - target_fps = round(vid.get(cv2.CAP_PROP_FPS)) - frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) - frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) - - if is_webcam: - num_frames = float('inf') - else: - num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT)) - - net = CustomDataParallel(net).npu() - transform = torch.nn.DataParallel(FastBaseTransform()).npu() - frame_times = MovingAverage(100) - fps = 0 - frame_time_target = 1 / target_fps - running = True - fps_str = '' - vid_done = False - frames_displayed = 0 - - if out_path is not None: - out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), target_fps, (frame_width, frame_height)) - - def cleanup_and_exit(): - print() - pool.terminate() - vid.release() - if out_path is not None: - out.release() - cv2.destroyAllWindows() - exit() - - def get_next_frame(vid): - frames = [] - for idx in range(args.video_multiframe): - frame = vid.read()[1] - if frame is None: - return frames - frames.append(frame) - return frames - - def transform_frame(frames): - with torch.no_grad(): - frames = [torch.from_numpy(frame).npu().float() for frame in frames] - return frames, transform(torch.stack(frames, 0)) - - def eval_network(inp): - with torch.no_grad(): - frames, imgs = inp - num_extra = 0 - while imgs.size(0) < args.video_multiframe: - imgs = torch.cat([imgs, imgs[0].unsqueeze(0)], dim=0) - num_extra += 1 - out = net(imgs) - if num_extra > 0: - out = out[:-num_extra] - return frames, out - - def prep_frame(inp, fps_str): - with torch.no_grad(): - frame, preds = inp - return prep_display(preds, frame, None, None, undo_transform=False, class_color=True, fps_str=fps_str) - - frame_buffer = Queue() - video_fps = 0 - - # All this timing code to make sure that - def play_video(): - try: - nonlocal frame_buffer, running, video_fps, is_webcam, num_frames, frames_displayed, vid_done - - video_frame_times = MovingAverage(100) - frame_time_stabilizer = frame_time_target - last_time = None - stabilizer_step = 0.0005 - progress_bar = ProgressBar(30, num_frames) - - while running: - frame_time_start = time.time() - - if not frame_buffer.empty(): - next_time = time.time() - if last_time is not None: - video_frame_times.add(next_time - last_time) - video_fps = 1 / video_frame_times.get_avg() - if out_path is None: - cv2.imshow(path, frame_buffer.get()) - else: - out.write(frame_buffer.get()) - frames_displayed += 1 - last_time = next_time - - if out_path is not None: - if video_frame_times.get_avg() == 0: - fps = 0 - else: - fps = 1 / video_frame_times.get_avg() - progress = frames_displayed / num_frames * 100 - progress_bar.set_val(frames_displayed) - - print('\rProcessing Frames %s %6d / %6d (%5.2f%%) %5.2f fps ' - % (repr(progress_bar), frames_displayed, num_frames, progress, fps), end='') - - - # This is split because you don't want savevideo to require cv2 display functionality (see #197) - if out_path is None and cv2.waitKey(1) == 27: - # Press Escape to close - running = False - if not (frames_displayed < num_frames): - running = False - - if not vid_done: - buffer_size = frame_buffer.qsize() - if buffer_size < args.video_multiframe: - frame_time_stabilizer += stabilizer_step - elif buffer_size > args.video_multiframe: - frame_time_stabilizer -= stabilizer_step - if frame_time_stabilizer < 0: - frame_time_stabilizer = 0 - - new_target = frame_time_stabilizer if is_webcam else max(frame_time_stabilizer, frame_time_target) - else: - new_target = frame_time_target - - next_frame_target = max(2 * new_target - video_frame_times.get_avg(), 0) - target_time = frame_time_start + next_frame_target - 0.001 # Let's just subtract a millisecond to be safe - - if out_path is None or args.emulate_playback: - # This gives more accurate timing than if sleeping the whole amount at once - while time.time() < target_time: - time.sleep(0.001) - else: - # Let's not starve the main thread, now - time.sleep(0.001) - except: - # See issue #197 for why this is necessary - import traceback - traceback.print_exc() - - - extract_frame = lambda x, i: (x[0][i] if x[1][i]['detection'] is None else x[0][i].to(x[1][i]['detection']['box'].device), [x[1][i]]) - - # Prime the network on the first frame because I do some thread unsafe things otherwise - print('Initializing model... ', end='') - first_batch = eval_network(transform_frame(get_next_frame(vid))) - print('Done.') - - # For each frame the sequence of functions it needs to go through to be processed (in reversed order) - sequence = [prep_frame, eval_network, transform_frame] - pool = ThreadPool(processes=len(sequence) + args.video_multiframe + 2) - pool.apply_async(play_video) - active_frames = [{'value': extract_frame(first_batch, i), 'idx': 0} for i in range(len(first_batch[0]))] - - print() - if out_path is None: print('Press Escape to close.') - try: - while vid.isOpened() and running: - # Hard limit on frames in buffer so we don't run out of memory >.> - while frame_buffer.qsize() > 100: - time.sleep(0.001) - - start_time = time.time() - - # Start loading the next frames from the disk - if not vid_done: - next_frames = pool.apply_async(get_next_frame, args=(vid,)) - else: - next_frames = None - - if not (vid_done and len(active_frames) == 0): - # For each frame in our active processing queue, dispatch a job - # for that frame using the current function in the sequence - for frame in active_frames: - _args = [frame['value']] - if frame['idx'] == 0: - _args.append(fps_str) - frame['value'] = pool.apply_async(sequence[frame['idx']], args=_args) - - # For each frame whose job was the last in the sequence (i.e. for all final outputs) - for frame in active_frames: - if frame['idx'] == 0: - frame_buffer.put(frame['value'].get()) - - # Remove the finished frames from the processing queue - active_frames = [x for x in active_frames if x['idx'] > 0] - - # Finish evaluating every frame in the processing queue and advanced their position in the sequence - for frame in list(reversed(active_frames)): - frame['value'] = frame['value'].get() - frame['idx'] -= 1 - - if frame['idx'] == 0: - # Split this up into individual threads for prep_frame since it doesn't support batch size - active_frames += [{'value': extract_frame(frame['value'], i), 'idx': 0} for i in range(1, len(frame['value'][0]))] - frame['value'] = extract_frame(frame['value'], 0) - - # Finish loading in the next frames and add them to the processing queue - if next_frames is not None: - frames = next_frames.get() - if len(frames) == 0: - vid_done = True - else: - active_frames.append({'value': frames, 'idx': len(sequence)-1}) - - # Compute FPS - frame_times.add(time.time() - start_time) - fps = args.video_multiframe / frame_times.get_avg() - else: - fps = 0 - - fps_str = 'Processing FPS: %.2f | Video Playback FPS: %.2f | Frames in Buffer: %d' % (fps, video_fps, frame_buffer.qsize()) - if not args.display_fps: - print('\r' + fps_str + ' ', end='') - - except KeyboardInterrupt: - print('\nStopping...') - - cleanup_and_exit() - -def evaluate(net:Yolact, dataset, train_mode=False, trainCuda = True): - net.detect.use_fast_nms = args.fast_nms - net.detect.use_cross_class_nms = args.cross_class_nms - cfg.mask_proto_debug = args.mask_proto_debug - - # TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo - if args.image is not None: - if ':' in args.image: - inp, out = args.image.split(':') - evalimage(net, inp, out) - else: - evalimage(net, args.image) - return - elif args.images is not None: - inp, out = args.images.split(':') - evalimages(net, inp, out) - return - elif args.video is not None: - if ':' in args.video: - inp, out = args.video.split(':') - evalvideo(net, inp, out) - else: - evalvideo(net, args.video) - return - - frame_times = MovingAverage() - dataset_size = len(dataset) if args.max_images < 0 else min(args.max_images, len(dataset)) - progress_bar = ProgressBar(30, dataset_size) - - print() - - if not args.display and not args.benchmark: - # For each class and iou, stores tuples (score, isPositive) - # Index ap_data[type][iouIdx][classIdx] - ap_data = { - 'box' : [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds], - 'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds] - } - detections = Detections() - else: - timer.disable('Load Data') - - dataset_indices = list(range(len(dataset))) - - if args.shuffle: - random.shuffle(dataset_indices) - elif not args.no_sort: - # Do a deterministic shuffle based on the image ids - # - # I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's - # the order of insertion. That means on python 3.6, the images come in the order they are in - # in the annotations file. For some reason, the first images in the annotations file are - # the hardest. To combat this, I use a hard-coded hash function based on the image ids - # to shuffle the indices we use. That way, no matter what python version or how pycocotools - # handles the data, we get the same result every time. - hashed = [badhash(x) for x in dataset.ids] - dataset_indices.sort(key=lambda x: hashed[x]) - - dataset_indices = dataset_indices[:dataset_size] - - try: - # Main eval loop - for it, image_idx in enumerate(tqdm(dataset_indices)): - timer.reset() - - with timer.env('Load Data'): - img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) - # Test flag, do not upvote - if cfg.mask_proto_debug: - with open('scripts/info.txt', 'w') as f: - f.write(str(dataset.ids[image_idx])) - np.save('scripts/gt.npy', gt_masks) - - batch = Variable(img.unsqueeze(0)) - if args.cuda: - if train_mode: - batch = batch.npu() if trainCuda else batch - else: - batch = batch.npu() - - with timer.env('Network Extra'): - preds = net(batch) - if preds is not None and preds[0] is not None: - if preds[0]['box'] is not None: - preds[0]['box'] = preds[0]['box'].cpu() - if preds[0]['mask'] is not None: - preds[0]['mask'] = preds[0]['mask'].cpu() - if preds[0]['class'] is not None: - preds[0]['class'] = preds[0]['class'].cpu() - if preds[0]['score'] is not None: - preds[0]['score'] = preds[0]['score'].cpu() - if preds[0]['proto'] is not None: - preds[0]['proto'] = preds[0]['proto'].cpu() - - # Perform the meat of the operation here depending on our mode. - if args.display: - img_numpy = prep_display(preds, img, h, w) - elif args.benchmark: - prep_benchmark(preds, h, w) - else: - prep_metrics(ap_data, preds, img, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], detections) - - # First couple of images take longer because we're constructing the graph. - # Since that's technically initialization, don't include those in the FPS calculations. - if it > 1: - frame_times.add(timer.total_time()) - - if args.display: - if it > 1: - print('Avg FPS: %.4f' % (1 / frame_times.get_avg())) - plt.imshow(img_numpy) - plt.title(str(dataset.ids[image_idx])) - plt.show() - elif not args.no_bar: - if it > 1: fps = 1 / frame_times.get_avg() - else: fps = 0 - progress = (it+1) / dataset_size * 100 - progress_bar.set_val(it+1) - print('\rProcessing Images %s %6d / %6d (%5.2f%%) %5.2f fps ' - % (repr(progress_bar), it+1, dataset_size, progress, fps), end='') - - - - if not args.display and not args.benchmark: - print() - if args.output_coco_json: - print('Dumping detections...') - if args.output_web_json: - detections.dump_web() - else: - detections.dump() - else: - if not train_mode: - print('Saving data...') - with open(args.ap_data_file, 'wb') as f: - pickle.dump(ap_data, f) - - return calc_map(ap_data) - elif args.benchmark: - print() - print() - print('Stats for the last frame:') - timer.print_stats() - avg_seconds = frame_times.get_avg() - print('Average: %5.2f fps, %5.2f ms' % (1 / frame_times.get_avg(), 1000*avg_seconds)) - - except KeyboardInterrupt: - print('Stopping...') - - -def calc_map(ap_data): - print('Calculating mAP...') - aps = [{'box': [], 'mask': []} for _ in iou_thresholds] - - for _class in range(len(cfg.dataset.class_names)): - for iou_idx in range(len(iou_thresholds)): - for iou_type in ('box', 'mask'): - ap_obj = ap_data[iou_type][iou_idx][_class] - - if not ap_obj.is_empty(): - aps[iou_idx][iou_type].append(ap_obj.get_ap()) - - all_maps = {'box': OrderedDict(), 'mask': OrderedDict()} - - # Looking back at it, this code is really hard to read :/ - for iou_type in ('box', 'mask'): - all_maps[iou_type]['all'] = 0 # Make this first in the ordereddict - for i, threshold in enumerate(iou_thresholds): - mAP = sum(aps[i][iou_type]) / len(aps[i][iou_type]) * 100 if len(aps[i][iou_type]) > 0 else 0 - all_maps[iou_type][int(threshold*100)] = mAP - all_maps[iou_type]['all'] = (sum(all_maps[iou_type].values()) / (len(all_maps[iou_type].values())-1)) - - print_maps(all_maps) - - # Put in a prettier format so we can serialize it to json during training - all_maps = {k: {j: round(u, 2) for j, u in v.items()} for k, v in all_maps.items()} - return all_maps - -def print_maps(all_maps): - # Warning: hacky - make_row = lambda vals: (' %5s |' * len(vals)) % tuple(vals) - make_sep = lambda n: ('-------+' * n) - - print() - print(make_row([''] + [('.%d ' % x if isinstance(x, int) else x + ' ') for x in all_maps['box'].keys()])) - print(make_sep(len(all_maps['box']) + 1)) - for iou_type in ('box', 'mask'): - print(make_row([iou_type] + ['%.2f' % x if x < 100 else '%.1f' % x for x in all_maps[iou_type].values()])) - print(make_sep(len(all_maps['box']) + 1)) - print() - - - -if __name__ == '__main__': - parse_args() - - if args.config is not None: - set_cfg(args.config) - - if args.trained_model == 'interrupt': - - args.trained_model = SavePath.get_interrupt('weights/') - elif args.trained_model == 'latest': - args.trained_model = SavePath.get_latest('weights/', cfg.name) - - if args.config is None: - model_path = SavePath.from_str(args.trained_model) - # TODO: Bad practice? Probably want to do a name lookup instead. - args.config = model_path.model_name + '_config' - print('Config not specified. Parsed %s from the file name.\n' % args.config) - set_cfg(args.config) - - if args.detect: - cfg.eval_mask_branch = False - - if args.dataset is not None: - set_dataset(args.dataset) - - if args.data_path: - cfg.dataset.valid_images = args.data_path + '/images/' - cfg.dataset.valid_info = args.data_path + '/annotations/instances_val2017.json' - - with torch.no_grad(): - if not os.path.exists('results'): - os.makedirs('results') - - if args.cuda: - cudnn.fastest = True - args.device = torch.device(f'npu:{args.rank_id}') - torch.npu.set_device(args.device) - #torch.set_default_tensor_type('torch.npu.FloatTensor') - else: - torch.set_default_tensor_type('torch.FloatTensor') - - if args.resume and not args.display: - with open(args.ap_data_file, 'rb') as f: - ap_data = pickle.load(f) - calc_map(ap_data) - exit() - - if args.image is None and args.video is None and args.images is None: - dataset = COCODetection(cfg.dataset.valid_images, cfg.dataset.valid_info, - transform=BaseTransform(), has_gt=cfg.dataset.has_gt) - prep_coco_cats() - else: - dataset = None - - print('Loading model...', end='') - net = Yolact() - net.load_weights(args.trained_model) - net.eval() - print(' Done.') - - if args.cuda: - net = net.npu() - - evaluate(net, dataset) - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from data import COCODetection, get_label_map, MEANS, COLORS +from yolact import Yolact +from utils.augmentations import BaseTransform, FastBaseTransform, Resize +from utils.functions import MovingAverage, ProgressBar +from layers.box_utils import jaccard, center_size, mask_iou +from utils import timer +from utils.functions import SavePath +from layers.output_utils import postprocess, undo_image_transformation +import pycocotools + +from data import cfg, set_cfg, set_dataset +from tqdm import tqdm +import numpy as np +import torch +import torch.backends.cudnn as cudnn +from torch.autograd import Variable +import argparse +import time +import random +import cProfile +import pickle +import json +import os +from collections import defaultdict +from pathlib import Path +from collections import OrderedDict +from PIL import Image + +import matplotlib.pyplot as plt +import cv2 + +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +def parse_args(argv=None): + parser = argparse.ArgumentParser( + description='YOLACT COCO Evaluation') + parser.add_argument('--trained_model', + default='weights/ssd300_mAP_77.43_v2.pth', type=str, + help='Trained state_dict file path to open. If "interrupt", this will open the interrupt file.') + parser.add_argument('--data_path', default=None, type=str, + help='data path') + parser.add_argument('--top_k', default=5, type=int, + help='Further restrict the number of predictions to parse') + parser.add_argument('--cuda', default=True, type=str2bool, + help='Use cuda to evaulate model') + parser.add_argument('--fast_nms', default=True, type=str2bool, + help='Whether to use a faster, but not entirely correct version of NMS.') + parser.add_argument('--cross_class_nms', default=False, type=str2bool, + help='Whether compute NMS cross-class or per-class.') + parser.add_argument('--display_masks', default=True, type=str2bool, + help='Whether or not to display masks over bounding boxes') + parser.add_argument('--display_bboxes', default=True, type=str2bool, + help='Whether or not to display bboxes around masks') + parser.add_argument('--display_text', default=True, type=str2bool, + help='Whether or not to display text (class [score])') + parser.add_argument('--display_scores', default=True, type=str2bool, + help='Whether or not to display scores in addition to classes') + parser.add_argument('--display', dest='display', action='store_true', + help='Display qualitative results instead of quantitative ones.') + parser.add_argument('--shuffle', dest='shuffle', action='store_true', + help='Shuffles the images when displaying them. Doesn\'t have much of an effect when display is off though.') + parser.add_argument('--ap_data_file', default='results/ap_data.pkl', type=str, + help='In quantitative mode, the file to save detections before calculating mAP.') + parser.add_argument('--resume', dest='resume', action='store_true', + help='If display not set, this resumes mAP calculations from the ap_data_file.') + parser.add_argument('--max_images', default=-1, type=int, + help='The maximum number of images from the dataset to consider. Use -1 for all.') + parser.add_argument('--output_coco_json', dest='output_coco_json', action='store_true', + help='If display is not set, instead of processing IoU values, this just dumps detections into the coco json file.') + parser.add_argument('--bbox_det_file', default='results/bbox_detections.json', type=str, + help='The output file for coco bbox results if --coco_results is set.') + parser.add_argument('--mask_det_file', default='results/mask_detections.json', type=str, + help='The output file for coco mask results if --coco_results is set.') + parser.add_argument('--config', default='yolact_plus_resnet50_config', + help='The config object to use.') + parser.add_argument('--output_web_json', dest='output_web_json', action='store_true', + help='If display is not set, instead of processing IoU values, this dumps detections for usage with the detections viewer web thingy.') + parser.add_argument('--web_det_path', default='web/dets/', type=str, + help='If output_web_json is set, this is the path to dump detections into.') + parser.add_argument('--no_bar', dest='no_bar', action='store_true', + help='Do not output the status bar. This is useful for when piping to a file.') + parser.add_argument('--display_lincomb', default=False, type=str2bool, + help='If the config uses lincomb masks, output a visualization of how those masks are created.') + parser.add_argument('--benchmark', default=False, dest='benchmark', action='store_true', + help='Equivalent to running display mode but without displaying an image.') + parser.add_argument('--no_sort', default=False, dest='no_sort', action='store_true', + help='Do not sort images by hashed image ID.') + parser.add_argument('--seed', default=None, type=int, + help='The seed to pass into random.seed. Note: this is only really for the shuffle and does not (I think) affect cuda stuff.') + parser.add_argument('--mask_proto_debug', default=False, dest='mask_proto_debug', action='store_true', + help='Outputs stuff for scripts/compute_mask.py.') + parser.add_argument('--no_crop', default=False, dest='crop', action='store_false', + help='Do not crop output masks with the predicted bounding box.') + parser.add_argument('--image', default=None, type=str, + help='A path to an image to use for display.') + parser.add_argument('--images', default=None, type=str, + help='An input folder of images and output folder to save detected images. Should be in the format input->output.') + parser.add_argument('--video', default=None, type=str, + help='A path to a video to evaluate on. Passing in a number will use that index webcam.') + parser.add_argument('--video_multiframe', default=1, type=int, + help='The number of frames to evaluate in parallel to make videos play at higher fps.') + parser.add_argument('--score_threshold', default=0, type=float, + help='Detections with a score under this threshold will not be considered. This currently only works in display mode.') + parser.add_argument('--dataset', default=None, type=str, + help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).') + parser.add_argument('--detect', default=False, dest='detect', action='store_true', + help='Don\'t evauluate the mask branch at all and only do object detection. This only works for --display and --benchmark.') + parser.add_argument('--display_fps', default=False, dest='display_fps', action='store_true', + help='When displaying / saving video, draw the FPS on the frame') + parser.add_argument('--emulate_playback', default=False, dest='emulate_playback', action='store_true', + help='When saving a video, emulate the framerate that you\'d get running in real-time mode.') + parser.add_argument('--rank_id', default=0, type=int) + parser.set_defaults(no_bar=False, display=False, resume=False, output_coco_json=False, output_web_json=False, shuffle=False, + benchmark=False, no_sort=False, no_hash=False, mask_proto_debug=False, crop=True, detect=False, display_fps=False, + emulate_playback=False) + + global args + args = parser.parse_args(argv) + + if args.output_web_json: + args.output_coco_json = True + + if args.seed is not None: + random.seed(args.seed) + +iou_thresholds = [x / 100 for x in range(50, 100, 5)] +coco_cats = {} # Call prep_coco_cats to fill this +coco_cats_inv = {} +color_cache = defaultdict(lambda: {}) + +def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): + """ + Note: If undo_transform=False then im_h and im_w are allowed to be None. + """ + if undo_transform: + img_numpy = undo_image_transformation(img, w, h) + if args.cuda: + img_gpu = torch.Tensor(img_numpy).npu() + else: + img_gpu = torch.Tensor(img_numpy) + print('img_shape:', img_gpu) + else: + img_gpu = img / 255.0 + h, w, _ = img.shape + print('h, w, _ :', img.shape) + + with timer.env('Postprocess'): + save = cfg.rescore_bbox + cfg.rescore_bbox = True + t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb, + crop_masks = args.crop, + score_threshold = args.score_threshold) + cfg.rescore_bbox = save + + with timer.env('Copy'): + idx = t[1].argsort(0, descending=True)[:args.top_k] + + if cfg.eval_mask_branch: + # Masks are drawn on the GPU, so don't copy + masks = t[3][idx] + classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] + + num_dets_to_consider = min(args.top_k, classes.shape[0]) + for j in range(num_dets_to_consider): + if scores[j] < args.score_threshold: + num_dets_to_consider = j + break + + # Quick and dirty lambda for selecting the color for a particular index + # Also keeps track of a per-gpu color cache for maximum speed + def get_color(j, on_gpu=None): + global color_cache + color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) + + if on_gpu is not None and color_idx in color_cache[on_gpu]: + return color_cache[on_gpu][color_idx] + else: + color = COLORS[color_idx] + if not undo_transform: + # The image might come in as RGB or BRG, depending + color = (color[2], color[1], color[0]) + if on_gpu is not None: + color = torch.Tensor(color).to(on_gpu).float() / 255. + color_cache[on_gpu][color_idx] = color + return color + + # First, draw the masks on the GPU where we can do it really fast + # Beware: very fast but possibly unintelligible mask-drawing code ahead + # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice + if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: + # After this, mask is of size [num_dets, h, w, 1] + masks = masks[:num_dets_to_consider, :, :, None] + + # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) + colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0) + masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha + + # This is 1 everywhere except for 1-mask_alpha where the mask is + inv_alph_masks = masks * (-mask_alpha) + 1 + + # I did the math for this on pen and paper. This whole block should be equivalent to: + # for j in range(num_dets_to_consider): + # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] + masks_color_summand = masks_color[0] + if num_dets_to_consider > 1: + inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0) + masks_color_cumul = masks_color[1:] * inv_alph_cumul + masks_color_summand += masks_color_cumul.sum(dim=0) + + img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand + + if args.display_fps: + # Draw the box for the fps on the GPU + font_face = cv2.FONT_HERSHEY_DUPLEX + font_scale = 0.6 + font_thickness = 1 + + text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] + + img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha + + + # Then draw the stuff that needs to be done on the cpu + # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason + img_numpy = (img_gpu * 255).byte().cpu().numpy() + + if args.display_fps: + # Draw the text on the CPU + text_pt = (4, text_h + 2) + text_color = [255, 255, 255] + + cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) + + if num_dets_to_consider == 0: + return img_numpy + + if args.display_text or args.display_bboxes: + for j in reversed(range(num_dets_to_consider)): + x1, y1, x2, y2 = boxes[j, :] + color = get_color(j) + score = scores[j] + + if args.display_bboxes: + cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) + + if args.display_text: + _class = cfg.dataset.class_names[classes[j]] + text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class + + font_face = cv2.FONT_HERSHEY_DUPLEX + font_scale = 0.6 + font_thickness = 1 + + text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] + + text_pt = (x1, y1 - 3) + text_color = [255, 255, 255] + + cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) + cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) + + + return img_numpy + +def prep_benchmark(dets_out, h, w): + with timer.env('Postprocess'): + t = postprocess(dets_out, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) + + with timer.env('Copy'): + classes, scores, boxes, masks = [x[:args.top_k] for x in t] + if isinstance(scores, list): + box_scores = scores[0].cpu().numpy() + mask_scores = scores[1].cpu().numpy() + else: + scores = scores.cpu().numpy() + classes = classes.cpu().numpy() + boxes = boxes.cpu().numpy() + masks = masks.cpu().numpy() + + with timer.env('Sync'): + # Just in case + torch.npu.synchronize() + +def prep_coco_cats(): + """ Prepare inverted table for category id lookup given a coco cats object. """ + for coco_cat_id, transformed_cat_id_p1 in get_label_map().items(): + transformed_cat_id = transformed_cat_id_p1 - 1 + coco_cats[transformed_cat_id] = coco_cat_id + coco_cats_inv[coco_cat_id] = transformed_cat_id + + +def get_coco_cat(transformed_cat_id): + """ transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """ + return coco_cats[transformed_cat_id] + +def get_transformed_cat(coco_cat_id): + """ transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """ + return coco_cats_inv[coco_cat_id] + + +class Detections: + + def __init__(self): + self.bbox_data = [] + self.mask_data = [] + + def add_bbox(self, image_id:int, category_id:int, bbox:list, score:float): + """ Note that bbox should be a list or tuple of (x1, y1, x2, y2) """ + bbox = [bbox[0], bbox[1], bbox[2]-bbox[0], bbox[3]-bbox[1]] + + # Round to the nearest 10th to avoid huge file sizes, as COCO suggests + bbox = [round(float(x)*10)/10 for x in bbox] + + self.bbox_data.append({ + 'image_id': int(image_id), + 'category_id': get_coco_cat(int(category_id)), + 'bbox': bbox, + 'score': float(score) + }) + + def add_mask(self, image_id:int, category_id:int, segmentation:np.ndarray, score:float): + """ The segmentation should be the full mask, the size of the image and with size [h, w]. """ + rle = pycocotools.mask.encode(np.asfortranarray(segmentation.astype(np.uint8))) + rle['counts'] = rle['counts'].decode('ascii') # json.dump doesn't like bytes strings + + self.mask_data.append({ + 'image_id': int(image_id), + 'category_id': get_coco_cat(int(category_id)), + 'segmentation': rle, + 'score': float(score) + }) + + def dump(self): + dump_arguments = [ + (self.bbox_data, args.bbox_det_file), + (self.mask_data, args.mask_det_file) + ] + + for data, path in dump_arguments: + with open(path, 'w') as f: + json.dump(data, f) + + def dump_web(self): + """ Dumps it in the format for my web app. Warning: bad code ahead! """ + config_outs = ['preserve_aspect_ratio', 'use_prediction_module', + 'use_yolo_regressors', 'use_prediction_matching', + 'train_masks'] + + output = { + 'info' : { + 'Config': {key: getattr(cfg, key) for key in config_outs}, + } + } + + image_ids = list(set([x['image_id'] for x in self.bbox_data])) + image_ids.sort() + image_lookup = {_id: idx for idx, _id in enumerate(image_ids)} + + output['images'] = [{'image_id': image_id, 'dets': []} for image_id in image_ids] + + # These should already be sorted by score with the way prep_metrics works. + for bbox, mask in zip(self.bbox_data, self.mask_data): + image_obj = output['images'][image_lookup[bbox['image_id']]] + image_obj['dets'].append({ + 'score': bbox['score'], + 'bbox': bbox['bbox'], + 'category': cfg.dataset.class_names[get_transformed_cat(bbox['category_id'])], + 'mask': mask['segmentation'], + }) + + with open(os.path.join(args.web_det_path, '%s.json' % cfg.name), 'w') as f: + json.dump(output, f) + + + + +def _mask_iou(mask1, mask2, iscrowd=False): + with timer.env('Mask IoU'): + ret = mask_iou(mask1, mask2, iscrowd) + return ret.cpu() + +def _bbox_iou(bbox1, bbox2, iscrowd=False): + with timer.env('BBox IoU'): + ret = jaccard(bbox1, bbox2, iscrowd) + return ret.cpu() + +def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, detections:Detections=None): + """ Returns a list of APs for this image, with each element being for a class """ + if not args.output_coco_json: + with timer.env('Prepare gt'): + gt_boxes = torch.Tensor(gt[:, :4]) if args.cuda else torch.Tensor(gt[:, :4]) + gt_boxes[:, [0, 2]] *= w + gt_boxes[:, [1, 3]] *= h + gt_classes = list(gt[:, 4].astype(int)) + gt_masks = torch.Tensor(gt_masks).view(-1, h*w) if args.cuda else torch.Tensor(gt_masks).view(-1, h*w) + + if num_crowd > 0: + split = lambda x: (x[-num_crowd:], x[:-num_crowd]) + crowd_boxes , gt_boxes = split(gt_boxes) + crowd_masks , gt_masks = split(gt_masks) + crowd_classes, gt_classes = split(gt_classes) + + with timer.env('Postprocess'): + classes, scores, boxes, masks = postprocess(dets, w, h, crop_masks=args.crop, score_threshold=args.score_threshold) + + if classes.size(0) == 0: + return + + classes = list(classes.cpu().numpy().astype(int)) + if isinstance(scores, list): + box_scores = list(scores[0].cpu().numpy().astype(float)) + mask_scores = list(scores[1].cpu().numpy().astype(float)) + else: + scores = list(scores.cpu().numpy().astype(float)) + box_scores = scores + mask_scores = scores + # if args.cuda: + # masks = masks.view(-1, h*w).npu() + # boxes = boxes.npu() + # else: + masks = masks.view(-1, h*w) + + + if args.output_coco_json: + with timer.env('JSON Output'): + boxes = boxes.cpu().numpy() + masks = masks.view(-1, h, w).cpu().numpy() + for i in range(masks.shape[0]): + # Make sure that the bounding box actually makes sense and a mask was produced + if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0: + detections.add_bbox(image_id, classes[i], boxes[i,:], box_scores[i]) + detections.add_mask(image_id, classes[i], masks[i,:,:], mask_scores[i]) + return + + with timer.env('Eval Setup'): + num_pred = len(classes) + num_gt = len(gt_classes) + + mask_iou_cache = _mask_iou(masks, gt_masks) + bbox_iou_cache = _bbox_iou(boxes.float(), gt_boxes.float()) + + if num_crowd > 0: + crowd_mask_iou_cache = _mask_iou(masks, crowd_masks, iscrowd=True) + crowd_bbox_iou_cache = _bbox_iou(boxes.float(), crowd_boxes.float(), iscrowd=True) + else: + crowd_mask_iou_cache = None + crowd_bbox_iou_cache = None + + box_indices = sorted(range(num_pred), key=lambda i: -box_scores[i]) + mask_indices = sorted(box_indices, key=lambda i: -mask_scores[i]) + + iou_types = [ + ('box', lambda i,j: bbox_iou_cache[i, j].item(), + lambda i,j: crowd_bbox_iou_cache[i,j].item(), + lambda i: box_scores[i], box_indices), + ('mask', lambda i,j: mask_iou_cache[i, j].item(), + lambda i,j: crowd_mask_iou_cache[i,j].item(), + lambda i: mask_scores[i], mask_indices) + ] + + timer.start('Main loop') + for _class in set(classes + gt_classes): + ap_per_iou = [] + num_gt_for_class = sum([1 for x in gt_classes if x == _class]) + + for iouIdx in range(len(iou_thresholds)): + iou_threshold = iou_thresholds[iouIdx] + + for iou_type, iou_func, crowd_func, score_func, indices in iou_types: + gt_used = [False] * len(gt_classes) + + ap_obj = ap_data[iou_type][iouIdx][_class] + ap_obj.add_gt_positives(num_gt_for_class) + + for i in indices: + if classes[i] != _class: + continue + + max_iou_found = iou_threshold + max_match_idx = -1 + for j in range(num_gt): + if gt_used[j] or gt_classes[j] != _class: + continue + + iou = iou_func(i, j) + + if iou > max_iou_found: + max_iou_found = iou + max_match_idx = j + + if max_match_idx >= 0: + gt_used[max_match_idx] = True + ap_obj.push(score_func(i), True) + else: + # If the detection matches a crowd, we can just ignore it + matched_crowd = False + + if num_crowd > 0: + for j in range(len(crowd_classes)): + if crowd_classes[j] != _class: + continue + + iou = crowd_func(i, j) + + if iou > iou_threshold: + matched_crowd = True + break + + # All this crowd code so that we can make sure that our eval code gives the + # same result as COCOEval. There aren't even that many crowd annotations to + # begin with, but accuracy is of the utmost importance. + if not matched_crowd: + ap_obj.push(score_func(i), False) + timer.stop('Main loop') + + +class APDataObject: + """ + Stores all the information necessary to calculate the AP for one IoU and one class. + Note: I type annotated this because why not. + """ + + def __init__(self): + self.data_points = [] + self.num_gt_positives = 0 + + def push(self, score:float, is_true:bool): + self.data_points.append((score, is_true)) + + def add_gt_positives(self, num_positives:int): + """ Call this once per image. """ + self.num_gt_positives += num_positives + + def is_empty(self) -> bool: + return len(self.data_points) == 0 and self.num_gt_positives == 0 + + def get_ap(self) -> float: + """ Warning: result not cached. """ + + if self.num_gt_positives == 0: + return 0 + + # Sort descending by score + self.data_points.sort(key=lambda x: -x[0]) + + precisions = [] + recalls = [] + num_true = 0 + num_false = 0 + + # Compute the precision-recall curve. The x axis is recalls and the y axis precisions. + for datum in self.data_points: + # datum[1] is whether the detection a true or false positive + if datum[1]: num_true += 1 + else: num_false += 1 + + precision = num_true / (num_true + num_false) + recall = num_true / self.num_gt_positives + + precisions.append(precision) + recalls.append(recall) + + # Smooth the curve by computing [max(precisions[i:]) for i in range(len(precisions))] + # Basically, remove any temporary dips from the curve. + # At least that's what I think, idk. COCOEval did it so I do too. + for i in range(len(precisions)-1, 0, -1): + if precisions[i] > precisions[i-1]: + precisions[i-1] = precisions[i] + + # Compute the integral of precision(recall) d_recall from recall=0->1 using fixed-length riemann summation with 101 bars. + y_range = [0] * 101 # idx 0 is recall == 0.0 and idx 100 is recall == 1.00 + x_range = np.array([x / 100 for x in range(101)]) + recalls = np.array(recalls) + + # I realize this is weird, but all it does is find the nearest precision(x) for a given x in x_range. + # Basically, if the closest recall we have to 0.01 is 0.009 this sets precision(0.01) = precision(0.009). + # I approximate the integral this way, because that's how COCOEval does it. + indices = np.searchsorted(recalls, x_range, side='left') + for bar_idx, precision_idx in enumerate(indices): + if precision_idx < len(precisions): + y_range[bar_idx] = precisions[precision_idx] + + # Finally compute the riemann sum to get our integral. + # avg([precision(x) for x in 0:0.01:1]) + return sum(y_range) / len(y_range) + +def badhash(x): + """ + Just a quick and dirty hash function for doing a deterministic shuffle based on image_id. + + Source: + https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key + """ + x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF + x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF + x = ((x >> 16) ^ x) & 0xFFFFFFFF + return x + +def evalimage(net:Yolact, path:str, save_path:str=None): + if args.cuda: + frame = torch.from_numpy(cv2.imread(path)).npu().float() + else: + frame = torch.from_numpy(cv2.imread(path)).float() + print('frame: ', frame) + batch = FastBaseTransform()(frame.unsqueeze(0)) + print('batch_pred:', batch) + print('batch_shape:', batch.shape) + preds = net(batch) + + img_numpy = prep_display(preds, frame, None, None, undo_transform=False) + + if save_path is None: + img_numpy = img_numpy[:, :, (2, 1, 0)] + + if save_path is None: + plt.imshow(img_numpy) + plt.title(path) + plt.show() + else: + cv2.imwrite(save_path, img_numpy) + +def evalimages(net:Yolact, input_folder:str, output_folder:str): + if not os.path.exists(output_folder): + os.mkdir(output_folder) + + print() + for p in Path(input_folder).glob('*'): + path = str(p) + name = os.path.basename(path) + name = '.'.join(name.split('.')[:-1]) + '.png' + out_path = os.path.join(output_folder, name) + + evalimage(net, path, out_path) + print(path + ' -> ' + out_path) + print('Done.') + +from multiprocessing.pool import ThreadPool +from queue import Queue + +class CustomDataParallel(torch.nn.DataParallel): + """ A Custom Data Parallel class that properly gathers lists of dictionaries. """ + def gather(self, outputs, output_device): + # Note that I don't actually want to convert everything to the output_device + return sum(outputs, []) + +def evalvideo(net:Yolact, path:str, out_path:str=None): + # If the path is a digit, parse it as a webcam index + is_webcam = path.isdigit() + + # If the input image size is constant, this make things faster (hence why we can use it in a video setting). + cudnn.benchmark = True + + if is_webcam: + vid = cv2.VideoCapture(int(path)) + else: + vid = cv2.VideoCapture(path) + + if not vid.isOpened(): + print('Could not open video "%s"' % path) + exit(-1) + + target_fps = round(vid.get(cv2.CAP_PROP_FPS)) + frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) + frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + if is_webcam: + num_frames = float('inf') + else: + num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT)) + + net = CustomDataParallel(net).npu() + transform = torch.nn.DataParallel(FastBaseTransform()).npu() + frame_times = MovingAverage(100) + fps = 0 + frame_time_target = 1 / target_fps + running = True + fps_str = '' + vid_done = False + frames_displayed = 0 + + if out_path is not None: + out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), target_fps, (frame_width, frame_height)) + + def cleanup_and_exit(): + print() + pool.terminate() + vid.release() + if out_path is not None: + out.release() + cv2.destroyAllWindows() + exit() + + def get_next_frame(vid): + frames = [] + for idx in range(args.video_multiframe): + frame = vid.read()[1] + if frame is None: + return frames + frames.append(frame) + return frames + + def transform_frame(frames): + with torch.no_grad(): + frames = [torch.from_numpy(frame).npu().float() for frame in frames] + return frames, transform(torch.stack(frames, 0)) + + def eval_network(inp): + with torch.no_grad(): + frames, imgs = inp + num_extra = 0 + while imgs.size(0) < args.video_multiframe: + imgs = torch.cat([imgs, imgs[0].unsqueeze(0)], dim=0) + num_extra += 1 + out = net(imgs) + if num_extra > 0: + out = out[:-num_extra] + return frames, out + + def prep_frame(inp, fps_str): + with torch.no_grad(): + frame, preds = inp + return prep_display(preds, frame, None, None, undo_transform=False, class_color=True, fps_str=fps_str) + + frame_buffer = Queue() + video_fps = 0 + + # All this timing code to make sure that + def play_video(): + try: + nonlocal frame_buffer, running, video_fps, is_webcam, num_frames, frames_displayed, vid_done + + video_frame_times = MovingAverage(100) + frame_time_stabilizer = frame_time_target + last_time = None + stabilizer_step = 0.0005 + progress_bar = ProgressBar(30, num_frames) + + while running: + frame_time_start = time.time() + + if not frame_buffer.empty(): + next_time = time.time() + if last_time is not None: + video_frame_times.add(next_time - last_time) + video_fps = 1 / video_frame_times.get_avg() + if out_path is None: + cv2.imshow(path, frame_buffer.get()) + else: + out.write(frame_buffer.get()) + frames_displayed += 1 + last_time = next_time + + if out_path is not None: + if video_frame_times.get_avg() == 0: + fps = 0 + else: + fps = 1 / video_frame_times.get_avg() + progress = frames_displayed / num_frames * 100 + progress_bar.set_val(frames_displayed) + + print('\rProcessing Frames %s %6d / %6d (%5.2f%%) %5.2f fps ' + % (repr(progress_bar), frames_displayed, num_frames, progress, fps), end='') + + + # This is split because you don't want savevideo to require cv2 display functionality (see #197) + if out_path is None and cv2.waitKey(1) == 27: + # Press Escape to close + running = False + if not (frames_displayed < num_frames): + running = False + + if not vid_done: + buffer_size = frame_buffer.qsize() + if buffer_size < args.video_multiframe: + frame_time_stabilizer += stabilizer_step + elif buffer_size > args.video_multiframe: + frame_time_stabilizer -= stabilizer_step + if frame_time_stabilizer < 0: + frame_time_stabilizer = 0 + + new_target = frame_time_stabilizer if is_webcam else max(frame_time_stabilizer, frame_time_target) + else: + new_target = frame_time_target + + next_frame_target = max(2 * new_target - video_frame_times.get_avg(), 0) + target_time = frame_time_start + next_frame_target - 0.001 # Let's just subtract a millisecond to be safe + + if out_path is None or args.emulate_playback: + # This gives more accurate timing than if sleeping the whole amount at once + while time.time() < target_time: + time.sleep(0.001) + else: + # Let's not starve the main thread, now + time.sleep(0.001) + except: + # See issue #197 for why this is necessary + import traceback + traceback.print_exc() + + + extract_frame = lambda x, i: (x[0][i] if x[1][i]['detection'] is None else x[0][i].to(x[1][i]['detection']['box'].device), [x[1][i]]) + + # Prime the network on the first frame because I do some thread unsafe things otherwise + print('Initializing model... ', end='') + first_batch = eval_network(transform_frame(get_next_frame(vid))) + print('Done.') + + # For each frame the sequence of functions it needs to go through to be processed (in reversed order) + sequence = [prep_frame, eval_network, transform_frame] + pool = ThreadPool(processes=len(sequence) + args.video_multiframe + 2) + pool.apply_async(play_video) + active_frames = [{'value': extract_frame(first_batch, i), 'idx': 0} for i in range(len(first_batch[0]))] + + print() + if out_path is None: print('Press Escape to close.') + try: + while vid.isOpened() and running: + # Hard limit on frames in buffer so we don't run out of memory >.> + while frame_buffer.qsize() > 100: + time.sleep(0.001) + + start_time = time.time() + + # Start loading the next frames from the disk + if not vid_done: + next_frames = pool.apply_async(get_next_frame, args=(vid,)) + else: + next_frames = None + + if not (vid_done and len(active_frames) == 0): + # For each frame in our active processing queue, dispatch a job + # for that frame using the current function in the sequence + for frame in active_frames: + _args = [frame['value']] + if frame['idx'] == 0: + _args.append(fps_str) + frame['value'] = pool.apply_async(sequence[frame['idx']], args=_args) + + # For each frame whose job was the last in the sequence (i.e. for all final outputs) + for frame in active_frames: + if frame['idx'] == 0: + frame_buffer.put(frame['value'].get()) + + # Remove the finished frames from the processing queue + active_frames = [x for x in active_frames if x['idx'] > 0] + + # Finish evaluating every frame in the processing queue and advanced their position in the sequence + for frame in list(reversed(active_frames)): + frame['value'] = frame['value'].get() + frame['idx'] -= 1 + + if frame['idx'] == 0: + # Split this up into individual threads for prep_frame since it doesn't support batch size + active_frames += [{'value': extract_frame(frame['value'], i), 'idx': 0} for i in range(1, len(frame['value'][0]))] + frame['value'] = extract_frame(frame['value'], 0) + + # Finish loading in the next frames and add them to the processing queue + if next_frames is not None: + frames = next_frames.get() + if len(frames) == 0: + vid_done = True + else: + active_frames.append({'value': frames, 'idx': len(sequence)-1}) + + # Compute FPS + frame_times.add(time.time() - start_time) + fps = args.video_multiframe / frame_times.get_avg() + else: + fps = 0 + + fps_str = 'Processing FPS: %.2f | Video Playback FPS: %.2f | Frames in Buffer: %d' % (fps, video_fps, frame_buffer.qsize()) + if not args.display_fps: + print('\r' + fps_str + ' ', end='') + + except KeyboardInterrupt: + print('\nStopping...') + + cleanup_and_exit() + +def evaluate(net:Yolact, dataset, train_mode=False, trainCuda = True): + net.detect.use_fast_nms = args.fast_nms + net.detect.use_cross_class_nms = args.cross_class_nms + cfg.mask_proto_debug = args.mask_proto_debug + + # TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo + if args.image is not None: + if ':' in args.image: + inp, out = args.image.split(':') + evalimage(net, inp, out) + else: + evalimage(net, args.image) + return + elif args.images is not None: + inp, out = args.images.split(':') + evalimages(net, inp, out) + return + elif args.video is not None: + if ':' in args.video: + inp, out = args.video.split(':') + evalvideo(net, inp, out) + else: + evalvideo(net, args.video) + return + + frame_times = MovingAverage() + dataset_size = len(dataset) if args.max_images < 0 else min(args.max_images, len(dataset)) + progress_bar = ProgressBar(30, dataset_size) + + print() + + if not args.display and not args.benchmark: + # For each class and iou, stores tuples (score, isPositive) + # Index ap_data[type][iouIdx][classIdx] + ap_data = { + 'box' : [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds], + 'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds] + } + detections = Detections() + else: + timer.disable('Load Data') + + dataset_indices = list(range(len(dataset))) + + if args.shuffle: + random.shuffle(dataset_indices) + elif not args.no_sort: + # Do a deterministic shuffle based on the image ids + # + # I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's + # the order of insertion. That means on python 3.6, the images come in the order they are in + # in the annotations file. For some reason, the first images in the annotations file are + # the hardest. To combat this, I use a hard-coded hash function based on the image ids + # to shuffle the indices we use. That way, no matter what python version or how pycocotools + # handles the data, we get the same result every time. + hashed = [badhash(x) for x in dataset.ids] + dataset_indices.sort(key=lambda x: hashed[x]) + + dataset_indices = dataset_indices[:dataset_size] + + try: + # Main eval loop + for it, image_idx in enumerate(tqdm(dataset_indices)): + timer.reset() + + with timer.env('Load Data'): + img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) + # Test flag, do not upvote + if cfg.mask_proto_debug: + with open('scripts/info.txt', 'w') as f: + f.write(str(dataset.ids[image_idx])) + np.save('scripts/gt.npy', gt_masks) + + batch = Variable(img.unsqueeze(0)) + if args.cuda: + if train_mode: + batch = batch.npu() if trainCuda else batch + else: + batch = batch.npu() + + with timer.env('Network Extra'): + preds = net(batch) + if preds is not None and preds[0] is not None: + if preds[0]['box'] is not None: + preds[0]['box'] = preds[0]['box'].cpu() + if preds[0]['mask'] is not None: + preds[0]['mask'] = preds[0]['mask'].cpu() + if preds[0]['class'] is not None: + preds[0]['class'] = preds[0]['class'].cpu() + if preds[0]['score'] is not None: + preds[0]['score'] = preds[0]['score'].cpu() + if preds[0]['proto'] is not None: + preds[0]['proto'] = preds[0]['proto'].cpu() + + # Perform the meat of the operation here depending on our mode. + if args.display: + img_numpy = prep_display(preds, img, h, w) + elif args.benchmark: + prep_benchmark(preds, h, w) + else: + prep_metrics(ap_data, preds, img, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], detections) + + # First couple of images take longer because we're constructing the graph. + # Since that's technically initialization, don't include those in the FPS calculations. + if it > 1: + frame_times.add(timer.total_time()) + + if args.display: + if it > 1: + print('Avg FPS: %.4f' % (1 / frame_times.get_avg())) + plt.imshow(img_numpy) + plt.title(str(dataset.ids[image_idx])) + plt.show() + elif not args.no_bar: + if it > 1: fps = 1 / frame_times.get_avg() + else: fps = 0 + progress = (it+1) / dataset_size * 100 + progress_bar.set_val(it+1) + print('\rProcessing Images %s %6d / %6d (%5.2f%%) %5.2f fps ' + % (repr(progress_bar), it+1, dataset_size, progress, fps), end='') + + + + if not args.display and not args.benchmark: + print() + if args.output_coco_json: + print('Dumping detections...') + if args.output_web_json: + detections.dump_web() + else: + detections.dump() + else: + if not train_mode: + print('Saving data...') + with open(args.ap_data_file, 'wb') as f: + pickle.dump(ap_data, f) + + return calc_map(ap_data) + elif args.benchmark: + print() + print() + print('Stats for the last frame:') + timer.print_stats() + avg_seconds = frame_times.get_avg() + print('Average: %5.2f fps, %5.2f ms' % (1 / frame_times.get_avg(), 1000*avg_seconds)) + + except KeyboardInterrupt: + print('Stopping...') + + +def calc_map(ap_data): + print('Calculating mAP...') + aps = [{'box': [], 'mask': []} for _ in iou_thresholds] + + for _class in range(len(cfg.dataset.class_names)): + for iou_idx in range(len(iou_thresholds)): + for iou_type in ('box', 'mask'): + ap_obj = ap_data[iou_type][iou_idx][_class] + + if not ap_obj.is_empty(): + aps[iou_idx][iou_type].append(ap_obj.get_ap()) + + all_maps = {'box': OrderedDict(), 'mask': OrderedDict()} + + # Looking back at it, this code is really hard to read :/ + for iou_type in ('box', 'mask'): + all_maps[iou_type]['all'] = 0 # Make this first in the ordereddict + for i, threshold in enumerate(iou_thresholds): + mAP = sum(aps[i][iou_type]) / len(aps[i][iou_type]) * 100 if len(aps[i][iou_type]) > 0 else 0 + all_maps[iou_type][int(threshold*100)] = mAP + all_maps[iou_type]['all'] = (sum(all_maps[iou_type].values()) / (len(all_maps[iou_type].values())-1)) + + print_maps(all_maps) + + # Put in a prettier format so we can serialize it to json during training + all_maps = {k: {j: round(u, 2) for j, u in v.items()} for k, v in all_maps.items()} + return all_maps + +def print_maps(all_maps): + # Warning: hacky + make_row = lambda vals: (' %5s |' * len(vals)) % tuple(vals) + make_sep = lambda n: ('-------+' * n) + + print() + print(make_row([''] + [('.%d ' % x if isinstance(x, int) else x + ' ') for x in all_maps['box'].keys()])) + print(make_sep(len(all_maps['box']) + 1)) + for iou_type in ('box', 'mask'): + print(make_row([iou_type] + ['%.2f' % x if x < 100 else '%.1f' % x for x in all_maps[iou_type].values()])) + print(make_sep(len(all_maps['box']) + 1)) + print() + + + +if __name__ == '__main__': + parse_args() + + if args.config is not None: + set_cfg(args.config) + + if args.trained_model == 'interrupt': + + args.trained_model = SavePath.get_interrupt('weights/') + elif args.trained_model == 'latest': + args.trained_model = SavePath.get_latest('weights/', cfg.name) + + if args.config is None: + model_path = SavePath.from_str(args.trained_model) + # TODO: Bad practice? Probably want to do a name lookup instead. + args.config = model_path.model_name + '_config' + print('Config not specified. Parsed %s from the file name.\n' % args.config) + set_cfg(args.config) + + if args.detect: + cfg.eval_mask_branch = False + + if args.dataset is not None: + set_dataset(args.dataset) + + if args.data_path: + cfg.dataset.valid_images = args.data_path + '/images/' + cfg.dataset.valid_info = args.data_path + '/annotations/instances_val2017.json' + + with torch.no_grad(): + if not os.path.exists('results'): + os.makedirs('results') + + if args.cuda: + cudnn.fastest = True + args.device = torch.device(f'npu:{args.rank_id}') + torch.npu.set_device(args.device) + #torch.set_default_tensor_type('torch.npu.FloatTensor') + else: + torch.set_default_tensor_type('torch.FloatTensor') + + if args.resume and not args.display: + with open(args.ap_data_file, 'rb') as f: + ap_data = pickle.load(f) + calc_map(ap_data) + exit() + + if args.image is None and args.video is None and args.images is None: + dataset = COCODetection(cfg.dataset.valid_images, cfg.dataset.valid_info, + transform=BaseTransform(), has_gt=cfg.dataset.has_gt) + prep_coco_cats() + else: + dataset = None + + print('Loading model...', end='') + net = Yolact() + net.load_weights(args.trained_model) + net.eval() + print(' Done.') + + if args.cuda: + net = net.npu() + + evaluate(net, dataset) + + diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/__init__.py b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/__init__.py index 59dfcbd639b692434275a446f1081315497fa99e..53a3f4b5160995d93bc7911e808b3045d74362c9 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/__init__.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/__init__.py @@ -1,2 +1,2 @@ -from .functions import * -from .modules import * +from .functions import * +from .modules import * diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/box_utils.py b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/box_utils.py index 6f84f55ab056f2d402bb4dc6eb2bb16975e80a35..7114b7b120806d342148939e2e6b8ecd6bd65740 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/box_utils.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/box_utils.py @@ -1,403 +1,403 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -# -*- coding: utf-8 -*- -import torch -from utils import timer - -from data import cfg - -@torch.jit.script -def point_form(boxes): - """ Convert prior_boxes to (xmin, ymin, xmax, ymax) - representation for comparison to point form ground truth data. - Args: - boxes: (tensor) center-size default boxes from priorbox layers. - Return: - boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. - """ - return torch.cat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin - boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax - - -@torch.jit.script -def center_size(boxes): - """ Convert prior_boxes to (cx, cy, w, h) - representation for comparison to center-size form ground truth data. - Args: - boxes: (tensor) point_form boxes - Return: - boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. - """ - return torch.cat(( (boxes[:, 2:] + boxes[:, :2])/2, # cx, cy - boxes[:, 2:] - boxes[:, :2] ), 1) # w, h - -@torch.jit.script -def intersect(box_a, box_b): - """ We resize both tensors to [A,B,2] without new malloc: - [A,2] -> [A,1,2] -> [A,B,2] - [B,2] -> [1,B,2] -> [A,B,2] - Then we compute the area of intersect between box_a and box_b. - Args: - box_a: (tensor) bounding boxes, Shape: [n,A,4]. - box_b: (tensor) bounding boxes, Shape: [n,B,4]. - Return: - (tensor) intersection area, Shape: [n,A,B]. - """ - n = box_a.size(0) - A = box_a.size(1) - B = box_b.size(1) - max_xy = torch.min(box_a[:, :, 2:].unsqueeze(2).expand(n, A, B, 2), - box_b[:, :, 2:].unsqueeze(1).expand(n, A, B, 2)) - min_xy = torch.max(box_a[:, :, :2].unsqueeze(2).expand(n, A, B, 2), - box_b[:, :, :2].unsqueeze(1).expand(n, A, B, 2)) - return torch.clamp(max_xy - min_xy, min=0).prod(3) # inter - - -def jaccard(box_a, box_b, iscrowd:bool=False): - """Compute the jaccard overlap of two sets of boxes. The jaccard overlap - is simply the intersection over union of two boxes. Here we operate on - ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b. - E.g.: - A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) - Args: - box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] - box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] - Return: - jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] - """ - use_batch = True - if box_a.dim() == 2: - use_batch = False - box_a = box_a[None, ...] - box_b = box_b[None, ...] - - inter = intersect(box_a, box_b) - area_a = ((box_a[:, :, 2]-box_a[:, :, 0]) * - (box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter) # [A,B] - area_b = ((box_b[:, :, 2]-box_b[:, :, 0]) * - (box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter) # [A,B] - union = area_a + area_b - inter - - out = inter / area_a if iscrowd else inter / union - return out if use_batch else out.squeeze(0) - -def elemwise_box_iou(box_a, box_b): - """ Does the same as above but instead of pairwise, elementwise along the inner dimension. """ - max_xy = torch.min(box_a[:, 2:], box_b[:, 2:]) - min_xy = torch.max(box_a[:, :2], box_b[:, :2]) - inter = torch.clamp((max_xy - min_xy), min=0) - inter = inter[:, 0] * inter[:, 1] - - area_a = (box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1]) - area_b = (box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1]) - - union = area_a + area_b - inter - union = torch.clamp(union, min=0.1) - - # Return value is [n] for inputs [n, 4] - return torch.clamp(inter / union, max=1) - -def mask_iou(masks_a, masks_b, iscrowd=False): - """ - Computes the pariwise mask IoU between two sets of masks of size [a, h, w] and [b, h, w]. - The output is of size [a, b]. - - Wait I thought this was "box_utils", why am I putting this in here? - """ - - masks_a = masks_a.view(masks_a.size(0), -1) - masks_b = masks_b.view(masks_b.size(0), -1) - - intersection = masks_a @ masks_b.t() - area_a = masks_a.sum(dim=1).unsqueeze(1) - area_b = masks_b.sum(dim=1).unsqueeze(0) - - return intersection / (area_a + area_b - intersection) if not iscrowd else intersection / area_a - -def elemwise_mask_iou(masks_a, masks_b): - """ Does the same as above but instead of pairwise, elementwise along the outer dimension. """ - masks_a = masks_a.view(-1, masks_a.size(-1)) - masks_b = masks_b.view(-1, masks_b.size(-1)) - - intersection = (masks_a * masks_b).sum(dim=0) - area_a = masks_a.sum(dim=0) - area_b = masks_b.sum(dim=0) - - # Return value is [n] for inputs [h, w, n] - return torch.clamp(intersection / torch.clamp(area_a + area_b - intersection, min=0.1), max=1) - - - -def change(gt, priors): - """ - Compute the d_change metric proposed in Box2Pix: - https://lmb.informatik.uni-freiburg.de/Publications/2018/UB18/paper-box2pix.pdf - - Input should be in point form (xmin, ymin, xmax, ymax). - - Output is of shape [num_gt, num_priors] - Note this returns -change so it can be a drop in replacement for - """ - num_priors = priors.size(0) - num_gt = gt.size(0) - - gt_w = (gt[:, 2] - gt[:, 0])[:, None].expand(num_gt, num_priors) - gt_h = (gt[:, 3] - gt[:, 1])[:, None].expand(num_gt, num_priors) - - gt_mat = gt[:, None, :].expand(num_gt, num_priors, 4) - pr_mat = priors[None, :, :].expand(num_gt, num_priors, 4) - - diff = gt_mat - pr_mat - diff[:, :, 0] /= gt_w - diff[:, :, 2] /= gt_w - diff[:, :, 1] /= gt_h - diff[:, :, 3] /= gt_h - - return -torch.sqrt( (diff ** 2).sum(dim=2) ) - - - - -def match(pos_thresh, neg_thresh, truths, priors, labels, crowd_boxes, loc_t, conf_t, idx_t, idx, loc_data): - """Match each prior box with the ground truth box of the highest jaccard - overlap, encode the bounding boxes, then return the matched indices - corresponding to both confidence and location preds. - Args: - pos_thresh: (float) IoU > pos_thresh ==> positive. - neg_thresh: (float) IoU < neg_thresh ==> negative. - truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. - priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. - labels: (tensor) All the class labels for the image, Shape: [num_obj]. - crowd_boxes: (tensor) All the crowd box annotations or None if there are none. - loc_t: (tensor) Tensor to be filled w/ endcoded location targets. - conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. Note: -1 means neutral. - idx_t: (tensor) Tensor to be filled w/ the index of the matched gt box for each prior. - idx: (int) current batch index. - loc_data: (tensor) The predicted bbox regression coordinates for this batch. - Return: - The matched indices corresponding to 1)location and 2)confidence preds. - """ - decoded_priors = decode(loc_data, priors, cfg.use_yolo_regressors) if cfg.use_prediction_matching else point_form(priors) - - # Size [num_objects, num_priors] - overlaps = jaccard(truths, decoded_priors) if not cfg.use_change_matching else change(truths, decoded_priors) - - # Size [num_priors] best ground truth for each prior - best_truth_overlap, best_truth_idx = overlaps.max(0) - - # We want to ensure that each gt gets used at least once so that we don't - # waste any training data. In order to do that, find the max overlap anchor - # with each gt, and force that anchor to use that gt. - for _ in range(overlaps.size(0)): - # Find j, the gt with the highest overlap with a prior - # In effect, this will loop through overlaps.size(0) in a "smart" order, - # always choosing the highest overlap first. - best_prior_overlap, best_prior_idx = overlaps.max(1) - j = best_prior_overlap.max(0)[1] - - # Find i, the highest overlap anchor with this gt - i = best_prior_idx[j] - - # Set all other overlaps with i to be -1 so that no other gt uses it - overlaps[:, i] = -1 - # Set all other overlaps with j to be -1 so that this loop never uses j again - overlaps[j, :] = -1 - - # Overwrite i's score to be 2 so it doesn't get thresholded ever - best_truth_overlap[i] = 2 - # Set the gt to be used for i to be j, overwriting whatever was there - best_truth_idx[i] = j - - matches = truths[best_truth_idx] # Shape: [num_priors,4] - conf = labels[best_truth_idx] + 1 # Shape: [num_priors] - - conf[best_truth_overlap < pos_thresh] = -1 # label as neutral - conf[best_truth_overlap < neg_thresh] = 0 # label as background - - # Deal with crowd annotations for COCO - if crowd_boxes is not None and cfg.crowd_iou_threshold < 1: - # Size [num_priors, num_crowds] - crowd_overlaps = jaccard(decoded_priors, crowd_boxes, iscrowd=True) - # Size [num_priors] - best_crowd_overlap, best_crowd_idx = crowd_overlaps.max(1) - # Set non-positives with crowd iou of over the threshold to be neutral. - conf[(conf <= 0) & (best_crowd_overlap > cfg.crowd_iou_threshold)] = -1 - - loc = encode(matches, priors, cfg.use_yolo_regressors) - loc_t[idx] = loc # [num_priors,4] encoded offsets to learn - conf_t[idx] = conf # [num_priors] top class label for each prior - idx_t[idx] = best_truth_idx # [num_priors] indices for lookup - -@torch.jit.script -def encode(matched, priors, use_yolo_regressors:bool=False): - """ - Encode bboxes matched with each prior into the format - produced by the network. See decode for more details on - this format. Note that encode(decode(x, p), p) = x. - - Args: - - matched: A tensor of bboxes in point form with shape [num_priors, 4] - - priors: The tensor of all priors with shape [num_priors, 4] - Return: A tensor with encoded relative coordinates in the format - outputted by the network (see decode). Size: [num_priors, 4] - """ - - if use_yolo_regressors: - # Exactly the reverse of what we did in decode - # In fact encode(decode(x, p), p) should be x - boxes = center_size(matched) - - loc = torch.cat(( - boxes[:, :2] - priors[:, :2], - torch.log(boxes[:, 2:] / priors[:, 2:]) - ), 1) - else: - variances = [0.1, 0.2] - - # dist b/t match center and prior's center - g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] - # encode variance - g_cxcy /= (variances[0] * priors[:, 2:]) - # match wh / prior wh - g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] - g_wh = torch.log(g_wh) / variances[1] - # return target for smooth_l1_loss - loc = torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] - - return loc - -@torch.jit.script -def decode(loc, priors, use_yolo_regressors:bool=False): - """ - Decode predicted bbox coordinates using the same scheme - employed by Yolov2: https://arxiv.org/pdf/1612.08242.pdf - - b_x = (sigmoid(pred_x) - .5) / conv_w + prior_x - b_y = (sigmoid(pred_y) - .5) / conv_h + prior_y - b_w = prior_w * exp(loc_w) - b_h = prior_h * exp(loc_h) - - Note that loc is inputed as [(s(x)-.5)/conv_w, (s(y)-.5)/conv_h, w, h] - while priors are inputed as [x, y, w, h] where each coordinate - is relative to size of the image (even sigmoid(x)). We do this - in the network by dividing by the 'cell size', which is just - the size of the convouts. - - Also note that prior_x and prior_y are center coordinates which - is why we have to subtract .5 from sigmoid(pred_x and pred_y). - - Args: - - loc: The predicted bounding boxes of size [num_priors, 4] - - priors: The priorbox coords with size [num_priors, 4] - - Returns: A tensor of decoded relative coordinates in point form - form with size [num_priors, 4] - """ - - if use_yolo_regressors: - # Decoded boxes in center-size notation - boxes = torch.cat(( - loc[:, :2] + priors[:, :2], - priors[:, 2:] * torch.exp(loc[:, 2:]) - ), 1) - - boxes = point_form(boxes) - else: - variances = [0.1, 0.2] - - boxes = torch.cat(( - priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], - priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) - boxes[:, :2] -= boxes[:, 2:] / 2 - boxes[:, 2:] += boxes[:, :2] - - return boxes - - - -def log_sum_exp(x): - """Utility function for computing log_sum_exp while determining - This will be used to determine unaveraged confidence loss across - all examples in a batch. - Args: - x (Variable(tensor)): conf_preds from conf layers - """ - x_max = x.data.max() - return torch.log(torch.sum(torch.exp(x-x_max), 1)) + x_max - - -@torch.jit.script -def sanitize_coordinates(_x1, _x2, img_size:int, padding:int=0, cast:bool=True): - """ - Sanitizes the input coordinates so that x1 < x2, x1 != x2, x1 >= 0, and x2 <= image_size. - Also converts from relative to absolute coordinates and casts the results to long tensors. - - If cast is false, the result won't be cast to longs. - Warning: this does things in-place behind the scenes so copy if necessary. - """ - _x1 = _x1 * img_size - _x2 = _x2 * img_size - if cast: - _x1 = _x1.long() - _x2 = _x2.long() - x1 = torch.min(_x1, _x2) - x2 = torch.max(_x1, _x2) - x1 = torch.clamp(x1-padding, min=0) - x2 = torch.clamp(x2+padding, max=img_size) - - return x1, x2 - - -@torch.jit.script -def crop(masks, boxes, padding:int=1): - """ - "Crop" predicted masks by zeroing out everything not in the predicted bbox. - Vectorized by Chong (thanks Chong). - - Args: - - masks should be a size [h, w, n] tensor of masks - - boxes should be a size [n, 4] tensor of bbox coords in relative point form - """ - h, w, n = masks.size() - x1, x2 = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, padding, cast=False) - y1, y2 = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, padding, cast=False) - - rows = torch.arange(w, device=masks.device, dtype=x1.dtype).view(1, -1, 1).expand(h, w, n) - cols = torch.arange(h, device=masks.device, dtype=x1.dtype).view(-1, 1, 1).expand(h, w, n) - - masks_left = rows >= x1.view(1, 1, -1) - masks_right = rows < x2.view(1, 1, -1) - masks_up = cols >= y1.view(1, 1, -1) - masks_down = cols < y2.view(1, 1, -1) - - crop_mask = masks_left * masks_right * masks_up * masks_down - - return masks * crop_mask.float() - - -def index2d(src, idx): - """ - Indexes a tensor by a 2d index. - - In effect, this does - out[i, j] = src[i, idx[i, j]] - - Both src and idx should have the same size. - """ - - offs = torch.arange(idx.size(0), device=idx.device)[:, None].expand_as(idx) - idx = idx + offs * idx.size(1) - - return src.view(-1)[idx.view(-1)].view(idx.size()) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# -*- coding: utf-8 -*- +import torch +from utils import timer + +from data import cfg + +@torch.jit.script +def point_form(boxes): + """ Convert prior_boxes to (xmin, ymin, xmax, ymax) + representation for comparison to point form ground truth data. + Args: + boxes: (tensor) center-size default boxes from priorbox layers. + Return: + boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. + """ + return torch.cat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin + boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax + + +@torch.jit.script +def center_size(boxes): + """ Convert prior_boxes to (cx, cy, w, h) + representation for comparison to center-size form ground truth data. + Args: + boxes: (tensor) point_form boxes + Return: + boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. + """ + return torch.cat(( (boxes[:, 2:] + boxes[:, :2])/2, # cx, cy + boxes[:, 2:] - boxes[:, :2] ), 1) # w, h + +@torch.jit.script +def intersect(box_a, box_b): + """ We resize both tensors to [A,B,2] without new malloc: + [A,2] -> [A,1,2] -> [A,B,2] + [B,2] -> [1,B,2] -> [A,B,2] + Then we compute the area of intersect between box_a and box_b. + Args: + box_a: (tensor) bounding boxes, Shape: [n,A,4]. + box_b: (tensor) bounding boxes, Shape: [n,B,4]. + Return: + (tensor) intersection area, Shape: [n,A,B]. + """ + n = box_a.size(0) + A = box_a.size(1) + B = box_b.size(1) + max_xy = torch.min(box_a[:, :, 2:].unsqueeze(2).expand(n, A, B, 2), + box_b[:, :, 2:].unsqueeze(1).expand(n, A, B, 2)) + min_xy = torch.max(box_a[:, :, :2].unsqueeze(2).expand(n, A, B, 2), + box_b[:, :, :2].unsqueeze(1).expand(n, A, B, 2)) + return torch.clamp(max_xy - min_xy, min=0).prod(3) # inter + + +def jaccard(box_a, box_b, iscrowd:bool=False): + """Compute the jaccard overlap of two sets of boxes. The jaccard overlap + is simply the intersection over union of two boxes. Here we operate on + ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b. + E.g.: + A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) + Args: + box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] + box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] + Return: + jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] + """ + use_batch = True + if box_a.dim() == 2: + use_batch = False + box_a = box_a[None, ...] + box_b = box_b[None, ...] + + inter = intersect(box_a, box_b) + area_a = ((box_a[:, :, 2]-box_a[:, :, 0]) * + (box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter) # [A,B] + area_b = ((box_b[:, :, 2]-box_b[:, :, 0]) * + (box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter) # [A,B] + union = area_a + area_b - inter + + out = inter / area_a if iscrowd else inter / union + return out if use_batch else out.squeeze(0) + +def elemwise_box_iou(box_a, box_b): + """ Does the same as above but instead of pairwise, elementwise along the inner dimension. """ + max_xy = torch.min(box_a[:, 2:], box_b[:, 2:]) + min_xy = torch.max(box_a[:, :2], box_b[:, :2]) + inter = torch.clamp((max_xy - min_xy), min=0) + inter = inter[:, 0] * inter[:, 1] + + area_a = (box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1]) + area_b = (box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1]) + + union = area_a + area_b - inter + union = torch.clamp(union, min=0.1) + + # Return value is [n] for inputs [n, 4] + return torch.clamp(inter / union, max=1) + +def mask_iou(masks_a, masks_b, iscrowd=False): + """ + Computes the pariwise mask IoU between two sets of masks of size [a, h, w] and [b, h, w]. + The output is of size [a, b]. + + Wait I thought this was "box_utils", why am I putting this in here? + """ + + masks_a = masks_a.view(masks_a.size(0), -1) + masks_b = masks_b.view(masks_b.size(0), -1) + + intersection = masks_a @ masks_b.t() + area_a = masks_a.sum(dim=1).unsqueeze(1) + area_b = masks_b.sum(dim=1).unsqueeze(0) + + return intersection / (area_a + area_b - intersection) if not iscrowd else intersection / area_a + +def elemwise_mask_iou(masks_a, masks_b): + """ Does the same as above but instead of pairwise, elementwise along the outer dimension. """ + masks_a = masks_a.view(-1, masks_a.size(-1)) + masks_b = masks_b.view(-1, masks_b.size(-1)) + + intersection = (masks_a * masks_b).sum(dim=0) + area_a = masks_a.sum(dim=0) + area_b = masks_b.sum(dim=0) + + # Return value is [n] for inputs [h, w, n] + return torch.clamp(intersection / torch.clamp(area_a + area_b - intersection, min=0.1), max=1) + + + +def change(gt, priors): + """ + Compute the d_change metric proposed in Box2Pix: + https://lmb.informatik.uni-freiburg.de/Publications/2018/UB18/paper-box2pix.pdf + + Input should be in point form (xmin, ymin, xmax, ymax). + + Output is of shape [num_gt, num_priors] + Note this returns -change so it can be a drop in replacement for + """ + num_priors = priors.size(0) + num_gt = gt.size(0) + + gt_w = (gt[:, 2] - gt[:, 0])[:, None].expand(num_gt, num_priors) + gt_h = (gt[:, 3] - gt[:, 1])[:, None].expand(num_gt, num_priors) + + gt_mat = gt[:, None, :].expand(num_gt, num_priors, 4) + pr_mat = priors[None, :, :].expand(num_gt, num_priors, 4) + + diff = gt_mat - pr_mat + diff[:, :, 0] /= gt_w + diff[:, :, 2] /= gt_w + diff[:, :, 1] /= gt_h + diff[:, :, 3] /= gt_h + + return -torch.sqrt( (diff ** 2).sum(dim=2) ) + + + + +def match(pos_thresh, neg_thresh, truths, priors, labels, crowd_boxes, loc_t, conf_t, idx_t, idx, loc_data): + """Match each prior box with the ground truth box of the highest jaccard + overlap, encode the bounding boxes, then return the matched indices + corresponding to both confidence and location preds. + Args: + pos_thresh: (float) IoU > pos_thresh ==> positive. + neg_thresh: (float) IoU < neg_thresh ==> negative. + truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. + priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. + labels: (tensor) All the class labels for the image, Shape: [num_obj]. + crowd_boxes: (tensor) All the crowd box annotations or None if there are none. + loc_t: (tensor) Tensor to be filled w/ endcoded location targets. + conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. Note: -1 means neutral. + idx_t: (tensor) Tensor to be filled w/ the index of the matched gt box for each prior. + idx: (int) current batch index. + loc_data: (tensor) The predicted bbox regression coordinates for this batch. + Return: + The matched indices corresponding to 1)location and 2)confidence preds. + """ + decoded_priors = decode(loc_data, priors, cfg.use_yolo_regressors) if cfg.use_prediction_matching else point_form(priors) + + # Size [num_objects, num_priors] + overlaps = jaccard(truths, decoded_priors) if not cfg.use_change_matching else change(truths, decoded_priors) + + # Size [num_priors] best ground truth for each prior + best_truth_overlap, best_truth_idx = overlaps.max(0) + + # We want to ensure that each gt gets used at least once so that we don't + # waste any training data. In order to do that, find the max overlap anchor + # with each gt, and force that anchor to use that gt. + for _ in range(overlaps.size(0)): + # Find j, the gt with the highest overlap with a prior + # In effect, this will loop through overlaps.size(0) in a "smart" order, + # always choosing the highest overlap first. + best_prior_overlap, best_prior_idx = overlaps.max(1) + j = best_prior_overlap.max(0)[1] + + # Find i, the highest overlap anchor with this gt + i = best_prior_idx[j] + + # Set all other overlaps with i to be -1 so that no other gt uses it + overlaps[:, i] = -1 + # Set all other overlaps with j to be -1 so that this loop never uses j again + overlaps[j, :] = -1 + + # Overwrite i's score to be 2 so it doesn't get thresholded ever + best_truth_overlap[i] = 2 + # Set the gt to be used for i to be j, overwriting whatever was there + best_truth_idx[i] = j + + matches = truths[best_truth_idx] # Shape: [num_priors,4] + conf = labels[best_truth_idx] + 1 # Shape: [num_priors] + + conf[best_truth_overlap < pos_thresh] = -1 # label as neutral + conf[best_truth_overlap < neg_thresh] = 0 # label as background + + # Deal with crowd annotations for COCO + if crowd_boxes is not None and cfg.crowd_iou_threshold < 1: + # Size [num_priors, num_crowds] + crowd_overlaps = jaccard(decoded_priors, crowd_boxes, iscrowd=True) + # Size [num_priors] + best_crowd_overlap, best_crowd_idx = crowd_overlaps.max(1) + # Set non-positives with crowd iou of over the threshold to be neutral. + conf[(conf <= 0) & (best_crowd_overlap > cfg.crowd_iou_threshold)] = -1 + + loc = encode(matches, priors, cfg.use_yolo_regressors) + loc_t[idx] = loc # [num_priors,4] encoded offsets to learn + conf_t[idx] = conf # [num_priors] top class label for each prior + idx_t[idx] = best_truth_idx # [num_priors] indices for lookup + +@torch.jit.script +def encode(matched, priors, use_yolo_regressors:bool=False): + """ + Encode bboxes matched with each prior into the format + produced by the network. See decode for more details on + this format. Note that encode(decode(x, p), p) = x. + + Args: + - matched: A tensor of bboxes in point form with shape [num_priors, 4] + - priors: The tensor of all priors with shape [num_priors, 4] + Return: A tensor with encoded relative coordinates in the format + outputted by the network (see decode). Size: [num_priors, 4] + """ + + if use_yolo_regressors: + # Exactly the reverse of what we did in decode + # In fact encode(decode(x, p), p) should be x + boxes = center_size(matched) + + loc = torch.cat(( + boxes[:, :2] - priors[:, :2], + torch.log(boxes[:, 2:] / priors[:, 2:]) + ), 1) + else: + variances = [0.1, 0.2] + + # dist b/t match center and prior's center + g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] + # encode variance + g_cxcy /= (variances[0] * priors[:, 2:]) + # match wh / prior wh + g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] + g_wh = torch.log(g_wh) / variances[1] + # return target for smooth_l1_loss + loc = torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] + + return loc + +@torch.jit.script +def decode(loc, priors, use_yolo_regressors:bool=False): + """ + Decode predicted bbox coordinates using the same scheme + employed by Yolov2: https://arxiv.org/pdf/1612.08242.pdf + + b_x = (sigmoid(pred_x) - .5) / conv_w + prior_x + b_y = (sigmoid(pred_y) - .5) / conv_h + prior_y + b_w = prior_w * exp(loc_w) + b_h = prior_h * exp(loc_h) + + Note that loc is inputed as [(s(x)-.5)/conv_w, (s(y)-.5)/conv_h, w, h] + while priors are inputed as [x, y, w, h] where each coordinate + is relative to size of the image (even sigmoid(x)). We do this + in the network by dividing by the 'cell size', which is just + the size of the convouts. + + Also note that prior_x and prior_y are center coordinates which + is why we have to subtract .5 from sigmoid(pred_x and pred_y). + + Args: + - loc: The predicted bounding boxes of size [num_priors, 4] + - priors: The priorbox coords with size [num_priors, 4] + + Returns: A tensor of decoded relative coordinates in point form + form with size [num_priors, 4] + """ + + if use_yolo_regressors: + # Decoded boxes in center-size notation + boxes = torch.cat(( + loc[:, :2] + priors[:, :2], + priors[:, 2:] * torch.exp(loc[:, 2:]) + ), 1) + + boxes = point_form(boxes) + else: + variances = [0.1, 0.2] + + boxes = torch.cat(( + priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], + priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) + boxes[:, :2] -= boxes[:, 2:] / 2 + boxes[:, 2:] += boxes[:, :2] + + return boxes + + + +def log_sum_exp(x): + """Utility function for computing log_sum_exp while determining + This will be used to determine unaveraged confidence loss across + all examples in a batch. + Args: + x (Variable(tensor)): conf_preds from conf layers + """ + x_max = x.data.max() + return torch.log(torch.sum(torch.exp(x-x_max), 1)) + x_max + + +@torch.jit.script +def sanitize_coordinates(_x1, _x2, img_size:int, padding:int=0, cast:bool=True): + """ + Sanitizes the input coordinates so that x1 < x2, x1 != x2, x1 >= 0, and x2 <= image_size. + Also converts from relative to absolute coordinates and casts the results to long tensors. + + If cast is false, the result won't be cast to longs. + Warning: this does things in-place behind the scenes so copy if necessary. + """ + _x1 = _x1 * img_size + _x2 = _x2 * img_size + if cast: + _x1 = _x1.long() + _x2 = _x2.long() + x1 = torch.min(_x1, _x2) + x2 = torch.max(_x1, _x2) + x1 = torch.clamp(x1-padding, min=0) + x2 = torch.clamp(x2+padding, max=img_size) + + return x1, x2 + + +@torch.jit.script +def crop(masks, boxes, padding:int=1): + """ + "Crop" predicted masks by zeroing out everything not in the predicted bbox. + Vectorized by Chong (thanks Chong). + + Args: + - masks should be a size [h, w, n] tensor of masks + - boxes should be a size [n, 4] tensor of bbox coords in relative point form + """ + h, w, n = masks.size() + x1, x2 = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, padding, cast=False) + y1, y2 = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, padding, cast=False) + + rows = torch.arange(w, device=masks.device, dtype=x1.dtype).view(1, -1, 1).expand(h, w, n) + cols = torch.arange(h, device=masks.device, dtype=x1.dtype).view(-1, 1, 1).expand(h, w, n) + + masks_left = rows >= x1.view(1, 1, -1) + masks_right = rows < x2.view(1, 1, -1) + masks_up = cols >= y1.view(1, 1, -1) + masks_down = cols < y2.view(1, 1, -1) + + crop_mask = masks_left * masks_right * masks_up * masks_down + + return masks * crop_mask.float() + + +def index2d(src, idx): + """ + Indexes a tensor by a 2d index. + + In effect, this does + out[i, j] = src[i, idx[i, j]] + + Both src and idx should have the same size. + """ + + offs = torch.arange(idx.size(0), device=idx.device)[:, None].expand_as(idx) + idx = idx + offs * idx.size(1) + + return src.view(-1)[idx.view(-1)].view(idx.size()) diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/functions/__init__.py b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/functions/__init__.py index d370fa2fea2b086c018856f60040a8457f6b1b97..56ef07f464ffa8a96de17e3a4f6bf512a5018865 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/functions/__init__.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/functions/__init__.py @@ -1,4 +1,4 @@ -from .detection import Detect - - -__all__ = ['Detect'] +from .detection import Detect + + +__all__ = ['Detect'] diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/functions/detection.py b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/functions/detection.py index c1fbea9366897a8d0b7ef71d63b1ad1ecf688f33..277684228e94293cf27a4e127008a07827a2dc9c 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/functions/detection.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/functions/detection.py @@ -1,242 +1,242 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch -import torch.nn.functional as F -from ..box_utils import decode, jaccard, index2d -from utils import timer - -from data import cfg, mask_type - -import numpy as np - - -class Detect(object): - """At test time, Detect is the final layer of SSD. Decode location preds, - apply non-maximum suppression to location predictions based on conf - scores and threshold to a top_k number of output predictions for both - confidence score and locations, as the predicted masks. - """ - # TODO: Refactor this whole class away. It needs to go. - - def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh): - self.num_classes = num_classes - self.background_label = bkg_label - self.top_k = top_k - # Parameters used in nms. - self.nms_thresh = nms_thresh - if nms_thresh <= 0: - raise ValueError('nms_threshold must be non negative.') - self.conf_thresh = conf_thresh - - self.use_cross_class_nms = False - self.use_fast_nms = False - - def __call__(self, predictions): - """ - Args: - loc_data: (tensor) Loc preds from loc layers - Shape: [batch, num_priors, 4] - conf_data: (tensor) Shape: Conf preds from conf layers - Shape: [batch, num_priors, num_classes] - mask_data: (tensor) Mask preds from mask layers - Shape: [batch, num_priors, mask_dim] - prior_data: (tensor) Prior boxes and variances from priorbox layers - Shape: [num_priors, 4] - proto_data: (tensor) If using mask_type.lincomb, the prototype masks - Shape: [batch, mask_h, mask_w, mask_dim] - - Returns: - output of shape (batch_size, top_k, 1 + 1 + 4 + mask_dim) - These outputs are in the order: class idx, confidence, bbox coords, and mask. - - Note that the outputs are sorted only if cross_class_nms is False - """ - - loc_data = predictions['loc'] - conf_data = predictions['conf'] - mask_data = predictions['mask'] - prior_data = predictions['priors'] - - proto_data = predictions['proto'] if 'proto' in predictions else None - inst_data = predictions['inst'] if 'inst' in predictions else None - - out = [] - - with timer.env('Detect'): - batch_size = loc_data.size(0) - num_priors = prior_data.size(0) - - conf_preds = conf_data.view(batch_size, num_priors, self.num_classes).transpose(2, 1).contiguous() - - for batch_idx in range(batch_size): - decoded_boxes = decode(loc_data[batch_idx], prior_data) - result = self.detect(batch_idx, conf_preds, decoded_boxes, mask_data, inst_data) - - if result is not None and proto_data is not None: - result['proto'] = proto_data[batch_idx] - - out.append(result) - - return out - - - def detect(self, batch_idx, conf_preds, decoded_boxes, mask_data, inst_data): - """ Perform nms for only the max scoring class that isn't background (class 0) """ - cur_scores = conf_preds[batch_idx, 1:, :] - conf_scores, _ = torch.max(cur_scores, dim=0) - - keep = (conf_scores > self.conf_thresh) - scores = cur_scores[:, keep] - boxes = decoded_boxes[keep, :] - masks = mask_data[batch_idx, keep, :] - - if inst_data is not None: - inst = inst_data[batch_idx, keep, :] - - if scores.size(1) == 0: - return None - - if self.use_fast_nms: - if self.use_cross_class_nms: - boxes, masks, classes, scores = self.cc_fast_nms(boxes, masks, scores, self.nms_thresh, self.top_k) - else: - boxes, masks, classes, scores = self.fast_nms(boxes, masks, scores, self.nms_thresh, self.top_k) - else: - boxes, masks, classes, scores = self.traditional_nms(boxes, masks, scores, self.nms_thresh, self.conf_thresh) - - if self.use_cross_class_nms: - print('Warning: Cross Class Traditional NMS is not implemented.') - - return {'box': boxes, 'mask': masks, 'class': classes, 'score': scores} - - - def cc_fast_nms(self, boxes, masks, scores, iou_threshold:float=0.5, top_k:int=200): - # Collapse all the classes into 1 - scores, classes = scores.max(dim=0) - - _, idx = scores.sort(0, descending=True) - idx = idx[:top_k] - - boxes_idx = boxes[idx] - - # Compute the pairwise IoU between the boxes - iou = jaccard(boxes_idx, boxes_idx) - - # Zero out the lower triangle of the cosine similarity matrix and diagonal - iou.triu_(diagonal=1) - - # Now that everything in the diagonal and below is zeroed out, if we take the max - # of the IoU matrix along the columns, each column will represent the maximum IoU - # between this element and every element with a higher score than this element. - iou_max, _ = torch.max(iou, dim=0) - - # Now just filter out the ones greater than the threshold, i.e., only keep boxes that - # don't have a higher scoring box that would supress it in normal NMS. - idx_out = idx[iou_max <= iou_threshold] - - return boxes[idx_out], masks[idx_out], classes[idx_out], scores[idx_out] - - def fast_nms(self, boxes, masks, scores, iou_threshold:float=0.5, top_k:int=200, second_threshold:bool=False): - scores, idx = scores.sort(1, descending=True) - - idx = idx[:, :top_k].contiguous() - scores = scores[:, :top_k] - - num_classes, num_dets = idx.size() - - boxes = boxes[idx.view(-1), :].view(num_classes, num_dets, 4) - masks = masks[idx.view(-1), :].view(num_classes, num_dets, -1) - - iou = jaccard(boxes, boxes) - iou.triu_(diagonal=1) - iou_max, _ = iou.max(dim=1) - - # Now just filter out the ones higher than the threshold - keep = (iou_max <= iou_threshold) - - # We should also only keep detections over the confidence threshold, but at the cost of - # maxing out your detection count for every image, you can just not do that. Because we - # have such a minimal amount of computation per detection (matrix mulitplication only), - # this increase doesn't affect us much (+0.2 mAP for 34 -> 33 fps), so we leave it out. - # However, when you implement this in your method, you should do this second threshold. - if second_threshold: - keep *= (scores > self.conf_thresh) - - # Assign each kept detection to its corresponding class - classes = torch.arange(num_classes, device=boxes.device)[:, None].expand_as(keep) - classes = classes[keep] - - boxes = boxes[keep] - masks = masks[keep] - scores = scores[keep] - - # Only keep the top cfg.max_num_detections highest scores across all classes - scores, idx = scores.sort(0, descending=True) - idx = idx[:cfg.max_num_detections] - scores = scores[:cfg.max_num_detections] - - classes = classes[idx] - boxes = boxes[idx] - masks = masks[idx] - - return boxes, masks, classes, scores - - def traditional_nms(self, boxes, masks, scores, iou_threshold=0.5, conf_thresh=0.05): - import pyximport - pyximport.install(setup_args={"include_dirs":np.get_include()}, reload_support=True) - - from utils.cython_nms import nms as cnms - - num_classes = scores.size(0) - - idx_lst = [] - cls_lst = [] - scr_lst = [] - - # Multiplying by max_size is necessary because of how cnms computes its area and intersections - boxes = boxes * cfg.max_size - - for _cls in range(num_classes): - cls_scores = scores[_cls, :] - conf_mask = cls_scores > conf_thresh - idx = torch.arange(cls_scores.size(0), device=boxes.device) - - cls_scores = cls_scores[conf_mask] - idx = idx[conf_mask] - - if cls_scores.size(0) == 0: - continue - - preds = torch.cat([boxes[conf_mask], cls_scores[:, None]], dim=1).detach().numpy() - keep = cnms(preds, iou_threshold) - keep = torch.Tensor(keep, device=boxes.device).long() - - idx_lst.append(idx[keep]) - cls_lst.append(keep * 0 + _cls) - scr_lst.append(cls_scores[keep]) - - idx = torch.cat(idx_lst, dim=0) - classes = torch.cat(cls_lst, dim=0) - scores = torch.cat(scr_lst, dim=0) - - scores, idx2 = scores.sort(0, descending=True) - idx2 = idx2[:cfg.max_num_detections] - scores = scores[:cfg.max_num_detections] - - idx = idx[idx2] - classes = classes[idx2] - - # Undo the multiplication above - return boxes[idx] / cfg.max_size, masks[idx], classes, scores +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch +import torch.nn.functional as F +from ..box_utils import decode, jaccard, index2d +from utils import timer + +from data import cfg, mask_type + +import numpy as np + + +class Detect(object): + """At test time, Detect is the final layer of SSD. Decode location preds, + apply non-maximum suppression to location predictions based on conf + scores and threshold to a top_k number of output predictions for both + confidence score and locations, as the predicted masks. + """ + # TODO: Refactor this whole class away. It needs to go. + + def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh): + self.num_classes = num_classes + self.background_label = bkg_label + self.top_k = top_k + # Parameters used in nms. + self.nms_thresh = nms_thresh + if nms_thresh <= 0: + raise ValueError('nms_threshold must be non negative.') + self.conf_thresh = conf_thresh + + self.use_cross_class_nms = False + self.use_fast_nms = False + + def __call__(self, predictions): + """ + Args: + loc_data: (tensor) Loc preds from loc layers + Shape: [batch, num_priors, 4] + conf_data: (tensor) Shape: Conf preds from conf layers + Shape: [batch, num_priors, num_classes] + mask_data: (tensor) Mask preds from mask layers + Shape: [batch, num_priors, mask_dim] + prior_data: (tensor) Prior boxes and variances from priorbox layers + Shape: [num_priors, 4] + proto_data: (tensor) If using mask_type.lincomb, the prototype masks + Shape: [batch, mask_h, mask_w, mask_dim] + + Returns: + output of shape (batch_size, top_k, 1 + 1 + 4 + mask_dim) + These outputs are in the order: class idx, confidence, bbox coords, and mask. + + Note that the outputs are sorted only if cross_class_nms is False + """ + + loc_data = predictions['loc'] + conf_data = predictions['conf'] + mask_data = predictions['mask'] + prior_data = predictions['priors'] + + proto_data = predictions['proto'] if 'proto' in predictions else None + inst_data = predictions['inst'] if 'inst' in predictions else None + + out = [] + + with timer.env('Detect'): + batch_size = loc_data.size(0) + num_priors = prior_data.size(0) + + conf_preds = conf_data.view(batch_size, num_priors, self.num_classes).transpose(2, 1).contiguous() + + for batch_idx in range(batch_size): + decoded_boxes = decode(loc_data[batch_idx], prior_data) + result = self.detect(batch_idx, conf_preds, decoded_boxes, mask_data, inst_data) + + if result is not None and proto_data is not None: + result['proto'] = proto_data[batch_idx] + + out.append(result) + + return out + + + def detect(self, batch_idx, conf_preds, decoded_boxes, mask_data, inst_data): + """ Perform nms for only the max scoring class that isn't background (class 0) """ + cur_scores = conf_preds[batch_idx, 1:, :] + conf_scores, _ = torch.max(cur_scores, dim=0) + + keep = (conf_scores > self.conf_thresh) + scores = cur_scores[:, keep] + boxes = decoded_boxes[keep, :] + masks = mask_data[batch_idx, keep, :] + + if inst_data is not None: + inst = inst_data[batch_idx, keep, :] + + if scores.size(1) == 0: + return None + + if self.use_fast_nms: + if self.use_cross_class_nms: + boxes, masks, classes, scores = self.cc_fast_nms(boxes, masks, scores, self.nms_thresh, self.top_k) + else: + boxes, masks, classes, scores = self.fast_nms(boxes, masks, scores, self.nms_thresh, self.top_k) + else: + boxes, masks, classes, scores = self.traditional_nms(boxes, masks, scores, self.nms_thresh, self.conf_thresh) + + if self.use_cross_class_nms: + print('Warning: Cross Class Traditional NMS is not implemented.') + + return {'box': boxes, 'mask': masks, 'class': classes, 'score': scores} + + + def cc_fast_nms(self, boxes, masks, scores, iou_threshold:float=0.5, top_k:int=200): + # Collapse all the classes into 1 + scores, classes = scores.max(dim=0) + + _, idx = scores.sort(0, descending=True) + idx = idx[:top_k] + + boxes_idx = boxes[idx] + + # Compute the pairwise IoU between the boxes + iou = jaccard(boxes_idx, boxes_idx) + + # Zero out the lower triangle of the cosine similarity matrix and diagonal + iou.triu_(diagonal=1) + + # Now that everything in the diagonal and below is zeroed out, if we take the max + # of the IoU matrix along the columns, each column will represent the maximum IoU + # between this element and every element with a higher score than this element. + iou_max, _ = torch.max(iou, dim=0) + + # Now just filter out the ones greater than the threshold, i.e., only keep boxes that + # don't have a higher scoring box that would supress it in normal NMS. + idx_out = idx[iou_max <= iou_threshold] + + return boxes[idx_out], masks[idx_out], classes[idx_out], scores[idx_out] + + def fast_nms(self, boxes, masks, scores, iou_threshold:float=0.5, top_k:int=200, second_threshold:bool=False): + scores, idx = scores.sort(1, descending=True) + + idx = idx[:, :top_k].contiguous() + scores = scores[:, :top_k] + + num_classes, num_dets = idx.size() + + boxes = boxes[idx.view(-1), :].view(num_classes, num_dets, 4) + masks = masks[idx.view(-1), :].view(num_classes, num_dets, -1) + + iou = jaccard(boxes, boxes) + iou.triu_(diagonal=1) + iou_max, _ = iou.max(dim=1) + + # Now just filter out the ones higher than the threshold + keep = (iou_max <= iou_threshold) + + # We should also only keep detections over the confidence threshold, but at the cost of + # maxing out your detection count for every image, you can just not do that. Because we + # have such a minimal amount of computation per detection (matrix mulitplication only), + # this increase doesn't affect us much (+0.2 mAP for 34 -> 33 fps), so we leave it out. + # However, when you implement this in your method, you should do this second threshold. + if second_threshold: + keep *= (scores > self.conf_thresh) + + # Assign each kept detection to its corresponding class + classes = torch.arange(num_classes, device=boxes.device)[:, None].expand_as(keep) + classes = classes[keep] + + boxes = boxes[keep] + masks = masks[keep] + scores = scores[keep] + + # Only keep the top cfg.max_num_detections highest scores across all classes + scores, idx = scores.sort(0, descending=True) + idx = idx[:cfg.max_num_detections] + scores = scores[:cfg.max_num_detections] + + classes = classes[idx] + boxes = boxes[idx] + masks = masks[idx] + + return boxes, masks, classes, scores + + def traditional_nms(self, boxes, masks, scores, iou_threshold=0.5, conf_thresh=0.05): + import pyximport + pyximport.install(setup_args={"include_dirs":np.get_include()}, reload_support=True) + + from utils.cython_nms import nms as cnms + + num_classes = scores.size(0) + + idx_lst = [] + cls_lst = [] + scr_lst = [] + + # Multiplying by max_size is necessary because of how cnms computes its area and intersections + boxes = boxes * cfg.max_size + + for _cls in range(num_classes): + cls_scores = scores[_cls, :] + conf_mask = cls_scores > conf_thresh + idx = torch.arange(cls_scores.size(0), device=boxes.device) + + cls_scores = cls_scores[conf_mask] + idx = idx[conf_mask] + + if cls_scores.size(0) == 0: + continue + + preds = torch.cat([boxes[conf_mask], cls_scores[:, None]], dim=1).detach().numpy() + keep = cnms(preds, iou_threshold) + keep = torch.Tensor(keep, device=boxes.device).long() + + idx_lst.append(idx[keep]) + cls_lst.append(keep * 0 + _cls) + scr_lst.append(cls_scores[keep]) + + idx = torch.cat(idx_lst, dim=0) + classes = torch.cat(cls_lst, dim=0) + scores = torch.cat(scr_lst, dim=0) + + scores, idx2 = scores.sort(0, descending=True) + idx2 = idx2[:cfg.max_num_detections] + scores = scores[:cfg.max_num_detections] + + idx = idx[idx2] + classes = classes[idx2] + + # Undo the multiplication above + return boxes[idx] / cfg.max_size, masks[idx], classes, scores diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/interpolate.py b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/interpolate.py index 4ee3f067bc4a44d83f9296c4cb9b7f9b521b0db5..19cacf8d15f7d2768240cdeeb0ad80b633ee639d 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/interpolate.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/interpolate.py @@ -1,31 +1,31 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch.nn as nn -import torch.nn.functional as F - -class InterpolateModule(nn.Module): - """ - This is a module version of F.interpolate (rip nn.Upsampling). - Any arguments you give it just get passed along for the ride. - """ - - def __init__(self, *args, **kwdargs): - super().__init__() - - self.args = args - self.kwdargs = kwdargs - - def forward(self, x): - return F.interpolate(x, *self.args, **self.kwdargs) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch.nn as nn +import torch.nn.functional as F + +class InterpolateModule(nn.Module): + """ + This is a module version of F.interpolate (rip nn.Upsampling). + Any arguments you give it just get passed along for the ride. + """ + + def __init__(self, *args, **kwdargs): + super().__init__() + + self.args = args + self.kwdargs = kwdargs + + def forward(self, x): + return F.interpolate(x, *self.args, **self.kwdargs) diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/modules/__init__.py b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/modules/__init__.py index 028f5306fb0fa0ca29fe51782841da6fc1658dc7..cf24bddbf283f233d0b93fc074a2bac2f5c044a9 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/modules/__init__.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/modules/__init__.py @@ -1,3 +1,3 @@ -from .multibox_loss import MultiBoxLoss - -__all__ = ['MultiBoxLoss'] +from .multibox_loss import MultiBoxLoss + +__all__ = ['MultiBoxLoss'] diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/modules/multibox_loss.py b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/modules/multibox_loss.py index 3abe7e0b07226210dc0abdb86c2192d105996bf9..12a98e8242181b67a5af5515c1bf76d342c10e78 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/modules/multibox_loss.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/modules/multibox_loss.py @@ -1,721 +1,721 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -# -*- coding: utf-8 -*- -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd import Variable -from ..box_utils import match, log_sum_exp, decode, center_size, crop, elemwise_mask_iou, elemwise_box_iou - -from data import cfg, mask_type, activation_func - - -class MultiBoxLoss(nn.Module): - """SSD Weighted Loss Function - Compute Targets: - 1) Produce Confidence Target Indices by matching ground truth boxes - with (default) 'priorboxes' that have jaccard index > threshold parameter - (default threshold: 0.5). - - 2) Produce localization target by 'encoding' variance into offsets of ground - truth boxes and their matched 'priorboxes'. - - 3) Hard negative mining to filter the excessive number of negative examples - that comes with using a large number of default bounding boxes. - (default negative:positive ratio 3:1) - - Objective Loss: - L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N - Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss - weighted by α which is set to 1 by cross val. - Args: - c: class confidences, - l: predicted boxes, - g: ground truth boxes - N: number of matched default boxes - See: https://arxiv.org/pdf/1512.02325.pdf for more details. - """ - - def __init__(self, num_classes, pos_threshold, neg_threshold, negpos_ratio): - super(MultiBoxLoss, self).__init__() - self.num_classes = num_classes - - self.pos_threshold = pos_threshold - self.neg_threshold = neg_threshold - self.negpos_ratio = negpos_ratio - - # If you output a proto mask with this area, your l1 loss will be l1_alpha - # Note that the area is relative (so 1 would be the entire image) - self.l1_expected_area = 20 * 20 / 70 / 70 - self.l1_alpha = 0.1 - - if cfg.use_class_balanced_conf: - self.class_instances = None - self.total_instances = 0 - - def forward(self, net, predictions, wrapper, wrapper_mask): - """Multibox Loss - Args: - predictions (tuple): A tuple containing loc preds, conf preds, - mask preds, and prior boxes from SSD net. - loc shape: torch.size(batch_size,num_priors,4) - conf shape: torch.size(batch_size,num_priors,num_classes) - masks shape: torch.size(batch_size,num_priors,mask_dim) - priors shape: torch.size(num_priors,4) - proto* shape: torch.size(batch_size,mask_h,mask_w,mask_dim) - - targets (list): Ground truth boxes and labels for a batch, - shape: [batch_size][num_objs,5] (last idx is the label). - - masks (list): Ground truth masks for each object in each image, - shape: [batch_size][num_objs,im_height,im_width] - - num_crowds (list): Number of crowd annotations per batch. The crowd - annotations should be the last num_crowds elements of targets and masks. - - * Only if mask_type == lincomb - """ - - targets, masks, num_crowds = wrapper.get_args(wrapper_mask) - targets = targets[0] - masks = masks[0] - num_crowds = num_crowds[0] - loc_data = predictions['loc'] - conf_data = predictions['conf'] - mask_data = predictions['mask'] - priors = predictions['priors'] - - if cfg.mask_type == mask_type.lincomb: - proto_data = predictions['proto'] - - score_data = predictions['score'] if cfg.use_mask_scoring else None - inst_data = predictions['inst'] if cfg.use_instance_coeff else None - - labels = [None] * len(targets) # Used in sem segm loss - - batch_size = loc_data.size(0) - num_priors = priors.size(0) - num_classes = self.num_classes - - # Match priors (default boxes) and ground truth boxes - # These tensors will be created with the same device as loc_data - loc_t = loc_data.new(batch_size, num_priors, 4) - gt_box_t = loc_data.new(batch_size, num_priors, 4) - conf_t = loc_data.new(batch_size, num_priors).long() - idx_t = loc_data.new(batch_size, num_priors).long() - - if cfg.use_class_existence_loss: - class_existence_t = loc_data.new(batch_size, num_classes - 1) - - for idx in range(batch_size): - truths = targets[idx][:, :-1].data - labels[idx] = targets[idx][:, -1].data.long() - - if cfg.use_class_existence_loss: - # Construct a one-hot vector for each object and collapse it into an existence vector with max - # Also it's fine to include the crowd annotations here - class_existence_t[idx, :] = \ - torch.eye(num_classes - 1, device=conf_t.get_device())[labels[idx]].max(dim=0)[0] - - # Split the crowd annotations because they come bundled in - cur_crowds = num_crowds[idx] - if cur_crowds > 0: - split = lambda x: (x[-cur_crowds:], x[:-cur_crowds]) - crowd_boxes, truths = split(truths) - - # We don't use the crowd labels or masks - _, labels[idx] = split(labels[idx]) - _, masks[idx] = split(masks[idx]) - else: - crowd_boxes = None - - match(self.pos_threshold, self.neg_threshold, - truths, priors.data, labels[idx], crowd_boxes, - loc_t, conf_t, idx_t, idx, loc_data[idx]) - - gt_box_t[idx, :, :] = truths[idx_t[idx]] - - # wrap targets - loc_t = Variable(loc_t, requires_grad=False) - conf_t = Variable(conf_t, requires_grad=False) - idx_t = Variable(idx_t, requires_grad=False) - - pos = conf_t > 0 - num_pos = pos.sum(dim=1, keepdim=True) - - # Shape: [batch,num_priors,4] - pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) - - losses = {} - - # Localization Loss (Smooth L1) - if cfg.train_boxes: - # loc_p = loc_data[pos_idx].view(-1, 4) - # loc_t = loc_t[pos_idx].view(-1, 4) - # losses['B'] = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') * cfg.bbox_alpha - loc_p = loc_data.view(-1, 4) - loc_t = loc_t.view(-1, 4) - losses['B'] = F.smooth_l1_loss(loc_p, loc_t, reduction='none') * cfg.bbox_alpha - losses['B'][pos_idx.view(-1, 4) == False] = 0 - losses['B'] = losses['B'].sum() - - if cfg.train_masks: - if cfg.mask_type == mask_type.direct: - if cfg.use_gt_bboxes: - pos_masks = [] - for idx in range(batch_size): - pos_masks.append(masks[idx][idx_t[idx, pos[idx]]]) - masks_t = torch.cat(pos_masks, 0) - masks_p = mask_data[pos, :].view(-1, cfg.mask_dim) - losses['M'] = F.binary_cross_entropy(torch.clamp(masks_p, 0, 1), masks_t, - reduction='sum') * cfg.mask_alpha - else: - losses['M'] = self.direct_mask_loss(pos_idx, idx_t, loc_data, mask_data, priors, masks) - elif cfg.mask_type == mask_type.lincomb: - ret = self.lincomb_mask_loss(pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, - score_data, inst_data, labels) - if cfg.use_maskiou: - loss, maskiou_targets = ret - else: - loss = ret - losses.update(loss) - - if cfg.mask_proto_loss is not None: - if cfg.mask_proto_loss == 'l1': - losses['P'] = torch.mean(torch.abs(proto_data)) / self.l1_expected_area * self.l1_alpha - elif cfg.mask_proto_loss == 'disj': - losses['P'] = -torch.mean(torch.max(F.log_softmax(proto_data, dim=-1), dim=-1)[0]) - - # Confidence loss - if cfg.use_focal_loss: - if cfg.use_sigmoid_focal_loss: - losses['C'] = self.focal_conf_sigmoid_loss(conf_data, conf_t) - elif cfg.use_objectness_score: - losses['C'] = self.focal_conf_objectness_loss(conf_data, conf_t) - else: - losses['C'] = self.focal_conf_loss(conf_data, conf_t) - else: - if cfg.use_objectness_score: - losses['C'] = self.conf_objectness_loss(conf_data, conf_t, batch_size, loc_p, loc_t, priors) - else: - losses['C'] = self.ohem_conf_loss(conf_data, conf_t, pos, batch_size) - - # Mask IoU Loss - if cfg.use_maskiou and maskiou_targets is not None: - losses['I'] = self.mask_iou_loss(net, maskiou_targets) - - # These losses also don't depend on anchors - if cfg.use_class_existence_loss: - losses['E'] = self.class_existence_loss(predictions['classes'], class_existence_t) - if cfg.use_semantic_segmentation_loss: - losses['S'] = self.semantic_segmentation_loss(predictions['segm'], masks, labels) - - # Divide all losses by the number of positives. - # Don't do it for loss[P] because that doesn't depend on the anchors. - total_num_pos = num_pos.data.sum().float() - for k in losses: - if k not in ('P', 'E', 'S'): - losses[k] /= total_num_pos - else: - losses[k] /= batch_size - - # Loss Key: - # - B: Box Localization Loss - # - C: Class Confidence Loss - # - M: Mask Loss - # - P: Prototype Loss - # - D: Coefficient Diversity Loss - # - E: Class Existence Loss - # - S: Semantic Segmentation Loss - return losses - - def class_existence_loss(self, class_data, class_existence_t): - return cfg.class_existence_alpha * F.binary_cross_entropy_with_logits(class_data, class_existence_t, - reduction='sum') - - def semantic_segmentation_loss(self, segment_data, mask_t, class_t, interpolation_mode='bilinear'): - # Note num_classes here is without the background class so cfg.num_classes-1 - batch_size, num_classes, mask_h, mask_w = segment_data.size() - loss_s = 0 - - for idx in range(batch_size): - cur_segment = segment_data[idx] - cur_class_t = class_t[idx] - - with torch.no_grad(): - downsampled_masks = F.interpolate(mask_t[idx].unsqueeze(0), (mask_h, mask_w), - mode=interpolation_mode, align_corners=False).squeeze(0) - downsampled_masks = downsampled_masks.gt(0.5).float() - - # Construct Semantic Segmentation - segment_t = torch.zeros_like(cur_segment, requires_grad=False) - for obj_idx in range(downsampled_masks.size(0)): - segment_t[cur_class_t[obj_idx]] = torch.max(segment_t[cur_class_t[obj_idx]].float(), - downsampled_masks[obj_idx]) - - loss_s += F.binary_cross_entropy_with_logits(cur_segment, segment_t, reduction='sum') - - return loss_s / mask_h / mask_w * cfg.semantic_segmentation_alpha - - def ohem_conf_loss(self, conf_data, conf_t, pos, num): - # Compute max conf across batch for hard negative mining - batch_conf = conf_data.view(-1, self.num_classes) - if cfg.ohem_use_most_confident: - # i.e. max(softmax) along classes > 0 - batch_conf = F.softmax(batch_conf, dim=1) - loss_c, _ = batch_conf[:, 1:].max(dim=1) - else: - # i.e. -softmax(class 0 confidence) - loss_c = log_sum_exp(batch_conf) - batch_conf[:, 0] - - # Hard Negative Mining - loss_c = loss_c.view(num, -1) - loss_c[pos] = 0 # filter out pos boxes - loss_c[conf_t < 0] = 0 # filter out neutrals (conf_t = -1) - _, loss_idx = loss_c.sort(1, descending=True) - _, idx_rank = loss_idx.sort(1) - num_pos = pos.long().sum(1, keepdim=True) - num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) - neg = idx_rank < num_neg.expand_as(idx_rank) - - # Just in case there aren't enough negatives, don't start using positives as negatives - neg[pos] = 0 - neg[conf_t < 0] = 0 # Filter out neutrals - - # Confidence Loss Including Positive and Negative Examples - # pos_idx = pos.unsqueeze(2).expand_as(conf_data) - # neg_idx = neg.unsqueeze(2).expand_as(conf_data) - - # conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes) - # targets_weighted = conf_t[(pos+neg).gt(0)] - ## loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='none') - loss_c = F.cross_entropy(conf_data.view(-1, self.num_classes), conf_t.view(-1), reduction='none').view(-1, - 57744) - loss_c[(pos + neg).gt(0) == False] = 0 - - if cfg.use_class_balanced_conf: - # Lazy initialization - if self.class_instances is None: - self.class_instances = torch.zeros(self.num_classes, device=targets_weighted.device) - - classes, counts = targets_weighted.unique(return_counts=True) - - for _cls, _cnt in zip(classes.cpu().numpy(), counts.cpu().numpy()): - self.class_instances[_cls] += _cnt - - self.total_instances += targets_weighted.size(0) - - weighting = 1 - (self.class_instances[targets_weighted] / self.total_instances) - weighting = torch.clamp(weighting, min=1 / self.num_classes) - - # If you do the math, the average weight of self.class_instances is this - avg_weight = (self.num_classes - 1) / self.num_classes - - loss_c = (loss_c * weighting).sum() / avg_weight - else: - loss_c = loss_c.sum() - - return cfg.conf_alpha * loss_c - - def focal_conf_loss(self, conf_data, conf_t): - """ - Focal loss as described in https://arxiv.org/pdf/1708.02002.pdf - Adapted from https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py - Note that this uses softmax and not the original sigmoid from the paper. - """ - conf_t = conf_t.view(-1) # [batch_size*num_priors] - conf_data = conf_data.view(-1, conf_data.size(-1)) # [batch_size*num_priors, num_classes] - - # Ignore neutral samples (class < 0) - keep = (conf_t >= 0).float() - conf_t[conf_t < 0] = 0 # so that gather doesn't drum up a fuss - - logpt = F.log_softmax(conf_data, dim=-1) - logpt = logpt.gather(1, conf_t.unsqueeze(-1)) - logpt = logpt.view(-1) - pt = logpt.exp() - - # I adapted the alpha_t calculation here from - # https://github.com/pytorch/pytorch/blob/master/modules/detectron/softmax_focal_loss_op.cu - # You'd think you want all the alphas to sum to one, but in the original implementation they - # just give background an alpha of 1-alpha and each forground an alpha of alpha. - background = (conf_t == 0).float() - at = (1 - cfg.focal_loss_alpha) * background + cfg.focal_loss_alpha * (1 - background) - - loss = -at * (1 - pt) ** cfg.focal_loss_gamma * logpt - - # See comment above for keep - return cfg.conf_alpha * (loss * keep).sum() - - def focal_conf_sigmoid_loss(self, conf_data, conf_t): - """ - Focal loss but using sigmoid like the original paper. - Note: To make things mesh easier, the network still predicts 81 class confidences in this mode. - Because retinanet originally only predicts 80, we simply just don't use conf_data[..., 0] - """ - num_classes = conf_data.size(-1) - - conf_t = conf_t.view(-1) # [batch_size*num_priors] - conf_data = conf_data.view(-1, num_classes) # [batch_size*num_priors, num_classes] - - # Ignore neutral samples (class < 0) - keep = (conf_t >= 0).float() - conf_t[conf_t < 0] = 0 # can't mask with -1, so filter that out - - # Compute a one-hot embedding of conf_t - # From https://github.com/kuangliu/pytorch-retinanet/blob/master/utils.py - conf_one_t = torch.eye(num_classes, device=conf_t.get_device())[conf_t] - conf_pm_t = conf_one_t * 2 - 1 # -1 if background, +1 if forground for specific class - - logpt = F.logsigmoid(conf_data * conf_pm_t) # note: 1 - sigmoid(x) = sigmoid(-x) - pt = logpt.exp() - - at = cfg.focal_loss_alpha * conf_one_t + (1 - cfg.focal_loss_alpha) * (1 - conf_one_t) - at[..., 0] = 0 # Set alpha for the background class to 0 because sigmoid focal loss doesn't use it - - loss = -at * (1 - pt) ** cfg.focal_loss_gamma * logpt - loss = keep * loss.sum(dim=-1) - - return cfg.conf_alpha * loss.sum() - - def focal_conf_objectness_loss(self, conf_data, conf_t): - """ - Instead of using softmax, use class[0] to be the objectness score and do sigmoid focal loss on that. - Then for the rest of the classes, softmax them and apply CE for only the positive examples. - - If class[0] = 1 implies forground and class[0] = 0 implies background then you achieve something - similar during test-time to softmax by setting class[1:] = softmax(class[1:]) * class[0] and invert class[0]. - """ - - conf_t = conf_t.view(-1) # [batch_size*num_priors] - conf_data = conf_data.view(-1, conf_data.size(-1)) # [batch_size*num_priors, num_classes] - - # Ignore neutral samples (class < 0) - keep = (conf_t >= 0).float() - conf_t[conf_t < 0] = 0 # so that gather doesn't drum up a fuss - - background = (conf_t == 0).float() - at = (1 - cfg.focal_loss_alpha) * background + cfg.focal_loss_alpha * (1 - background) - - logpt = F.logsigmoid(conf_data[:, 0]) * (1 - background) + F.logsigmoid(-conf_data[:, 0]) * background - pt = logpt.exp() - - obj_loss = -at * (1 - pt) ** cfg.focal_loss_gamma * logpt - - # All that was the objectiveness loss--now time for the class confidence loss - pos_mask = conf_t > 0 - conf_data_pos = (conf_data[:, 1:])[pos_mask] # Now this has just 80 classes - conf_t_pos = conf_t[pos_mask] - 1 # So subtract 1 here - - class_loss = F.cross_entropy(conf_data_pos, conf_t_pos, reduction='sum') - - return cfg.conf_alpha * (class_loss + (obj_loss * keep).sum()) - - def conf_objectness_loss(self, conf_data, conf_t, batch_size, loc_p, loc_t, priors): - """ - Instead of using softmax, use class[0] to be p(obj) * p(IoU) as in YOLO. - Then for the rest of the classes, softmax them and apply CE for only the positive examples. - """ - - conf_t = conf_t.view(-1) # [batch_size*num_priors] - conf_data = conf_data.view(-1, conf_data.size(-1)) # [batch_size*num_priors, num_classes] - - pos_mask = (conf_t > 0) - neg_mask = (conf_t == 0) - - obj_data = conf_data[:, 0] - obj_data_pos = obj_data[pos_mask] - obj_data_neg = obj_data[neg_mask] - - # Don't be confused, this is just binary cross entropy similified - obj_neg_loss = - F.logsigmoid(-obj_data_neg).sum() - - with torch.no_grad(): - pos_priors = priors.unsqueeze(0).expand(batch_size, -1, -1).reshape(-1, 4)[pos_mask, :] - - boxes_pred = decode(loc_p, pos_priors, cfg.use_yolo_regressors) - boxes_targ = decode(loc_t, pos_priors, cfg.use_yolo_regressors) - - iou_targets = elemwise_box_iou(boxes_pred, boxes_targ) - - obj_pos_loss = - iou_targets * F.logsigmoid(obj_data_pos) - (1 - iou_targets) * F.logsigmoid(-obj_data_pos) - obj_pos_loss = obj_pos_loss.sum() - - # All that was the objectiveness loss--now time for the class confidence loss - conf_data_pos = (conf_data[:, 1:])[pos_mask] # Now this has just 80 classes - conf_t_pos = conf_t[pos_mask] - 1 # So subtract 1 here - - class_loss = F.cross_entropy(conf_data_pos, conf_t_pos, reduction='sum') - - return cfg.conf_alpha * (class_loss + obj_pos_loss + obj_neg_loss) - - def direct_mask_loss(self, pos_idx, idx_t, loc_data, mask_data, priors, masks): - """ Crops the gt masks using the predicted bboxes, scales them down, and outputs the BCE loss. """ - loss_m = 0 - for idx in range(mask_data.size(0)): - with torch.no_grad(): - cur_pos_idx = pos_idx[idx, :, :] - cur_pos_idx_squeezed = cur_pos_idx[:, 1] - - # Shape: [num_priors, 4], decoded predicted bboxes - pos_bboxes = decode(loc_data[idx, :, :], priors.data, cfg.use_yolo_regressors) - pos_bboxes = pos_bboxes[cur_pos_idx].view(-1, 4).clamp(0, 1) - pos_lookup = idx_t[idx, cur_pos_idx_squeezed] - - cur_masks = masks[idx] - pos_masks = cur_masks[pos_lookup, :, :] - - # Convert bboxes to absolute coordinates - num_pos, img_height, img_width = pos_masks.size() - - # Take care of all the bad behavior that can be caused by out of bounds coordinates - x1, x2 = sanitize_coordinates(pos_bboxes[:, 0], pos_bboxes[:, 2], img_width) - y1, y2 = sanitize_coordinates(pos_bboxes[:, 1], pos_bboxes[:, 3], img_height) - - # Crop each gt mask with the predicted bbox and rescale to the predicted mask size - # Note that each bounding box crop is a different size so I don't think we can vectorize this - scaled_masks = [] - for jdx in range(num_pos): - tmp_mask = pos_masks[jdx, y1[jdx]:y2[jdx], x1[jdx]:x2[jdx]] - - # Restore any dimensions we've left out because our bbox was 1px wide - while tmp_mask.dim() < 2: - tmp_mask = tmp_mask.unsqueeze(0) - - new_mask = F.adaptive_avg_pool2d(tmp_mask.unsqueeze(0), cfg.mask_size) - scaled_masks.append(new_mask.view(1, -1)) - - mask_t = torch.cat(scaled_masks, 0).gt(0.5).float() # Threshold downsampled mask - - pos_mask_data = mask_data[idx, cur_pos_idx_squeezed, :] - loss_m += F.binary_cross_entropy(torch.clamp(pos_mask_data, 0, 1), mask_t, reduction='sum') * cfg.mask_alpha - - return loss_m - - def coeff_diversity_loss(self, coeffs, instance_t): - """ - coeffs should be size [num_pos, num_coeffs] - instance_t should be size [num_pos] and be values from 0 to num_instances-1 - """ - num_pos = coeffs.size(0) - instance_t = instance_t.view(-1) # juuuust to make sure - - coeffs_norm = F.normalize(coeffs, dim=1) - cos_sim = coeffs_norm @ coeffs_norm.t() - - inst_eq = (instance_t[:, None].expand_as(cos_sim) == instance_t[None, :].expand_as(cos_sim)).float() - - # Rescale to be between 0 and 1 - cos_sim = (cos_sim + 1) / 2 - - # If they're the same instance, use cosine distance, else use cosine similarity - loss = (1 - cos_sim) * inst_eq + cos_sim * (1 - inst_eq) - - # Only divide by num_pos once because we're summing over a num_pos x num_pos tensor - # and all the losses will be divided by num_pos at the end, so just one extra time. - return cfg.mask_proto_coeff_diversity_alpha * loss.sum() / num_pos - - def lincomb_mask_loss(self, pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, score_data, - inst_data, labels, interpolation_mode='bilinear'): - mask_h = proto_data.size(1) - mask_w = proto_data.size(2) - - process_gt_bboxes = cfg.mask_proto_normalize_emulate_roi_pooling or cfg.mask_proto_crop - - if cfg.mask_proto_remove_empty_masks: - # Make sure to store a copy of this because we edit it to get rid of all-zero masks - pos = pos.clone() - - loss_m = 0 - loss_d = 0 # Coefficient diversity loss - - maskiou_t_list = [] - maskiou_net_input_list = [] - label_t_list = [] - - for idx in range(mask_data.size(0)): - with torch.no_grad(): - downsampled_masks = F.interpolate(masks[idx].unsqueeze(0), (mask_h, mask_w), - mode=interpolation_mode, align_corners=False).squeeze(0) - downsampled_masks = downsampled_masks.permute(1, 2, 0).contiguous() - if cfg.mask_proto_binarize_downsampled_gt: - downsampled_masks = downsampled_masks.gt(0.5).float() - - if cfg.mask_proto_remove_empty_masks: - # Get rid of gt masks that are so small they get downsampled away - very_small_masks = (downsampled_masks.sum(dim=(0, 1)) <= 0.0001) - for i in range(very_small_masks.size(0)): - if very_small_masks[i]: - pos[idx, idx_t[idx] == i] = 0 - if cfg.mask_proto_reweight_mask_loss: - # Ensure that the gt is binary - if not cfg.mask_proto_binarize_downsampled_gt: - bin_gt = downsampled_masks.gt(0.5).float() - else: - bin_gt = downsampled_masks - - gt_foreground_norm = bin_gt / (torch.sum(bin_gt, dim=(0, 1), keepdim=True) + 0.0001) - gt_background_norm = (1 - bin_gt) / (torch.sum(1 - bin_gt, dim=(0, 1), keepdim=True) + 0.0001) - - mask_reweighting = gt_foreground_norm * cfg.mask_proto_reweight_coeff + gt_background_norm - mask_reweighting *= mask_h * mask_w - - cur_pos = pos[idx] - pos_idx_t = idx_t[idx, cur_pos] - if process_gt_bboxes: - # Note: this is in point-form - if cfg.mask_proto_crop_with_pred_box: - pos_gt_box_t = decode(loc_data[idx, :, :], priors.data, cfg.use_yolo_regressors)[cur_pos] - else: - pos_gt_box_t = gt_box_t[idx, cur_pos] - if pos_idx_t.size(0) == 0: - continue - - proto_masks = proto_data[idx] - proto_coef = mask_data[idx, cur_pos, :] - if cfg.use_mask_scoring: - mask_scores = score_data[idx, cur_pos, :] - if cfg.mask_proto_coeff_diversity_loss: - if inst_data is not None: - div_coeffs = inst_data[idx, cur_pos, :] - else: - div_coeffs = proto_coef - - loss_d += self.coeff_diversity_loss(div_coeffs, pos_idx_t) - - # If we have over the allowed number of masks, select a random sample - old_num_pos = proto_coef.size(0) - if old_num_pos > cfg.masks_to_train: - perm = torch.randperm(proto_coef.size(0)) - select = perm[:cfg.masks_to_train] - - proto_coef = proto_coef[select, :] - pos_idx_t = pos_idx_t[select] - - if process_gt_bboxes: - pos_gt_box_t = pos_gt_box_t[select, :] - if cfg.use_mask_scoring: - mask_scores = mask_scores[select, :] - - num_pos = proto_coef.size(0) - mask_t = downsampled_masks[:, :, pos_idx_t] - label_t = labels[idx][pos_idx_t] - # Size: [mask_h, mask_w, num_pos] - pred_masks = proto_masks @ proto_coef.t() - pred_masks = cfg.mask_proto_mask_activation(pred_masks) - if cfg.mask_proto_double_loss: - if cfg.mask_proto_mask_activation == activation_func.sigmoid: - pre_loss = F.binary_cross_entropy(torch.clamp(pred_masks, 0, 1), mask_t, reduction='sum') - else: - pre_loss = F.smooth_l1_loss(pred_masks, mask_t, reduction='sum') - - loss_m += cfg.mask_proto_double_loss_alpha * pre_loss - - if cfg.mask_proto_crop: - pred_masks = crop(pred_masks, pos_gt_box_t) - - if cfg.mask_proto_mask_activation == activation_func.sigmoid: - pre_loss = F.binary_cross_entropy(torch.clamp(pred_masks, 0, 1), mask_t, reduction='none') - else: - pre_loss = F.smooth_l1_loss(pred_masks, mask_t, reduction='none') - - if cfg.mask_proto_normalize_mask_loss_by_sqrt_area: - gt_area = torch.sum(mask_t, dim=(0, 1), keepdim=True) - pre_loss = pre_loss / (torch.sqrt(gt_area) + 0.0001) - - if cfg.mask_proto_reweight_mask_loss: - pre_loss = pre_loss * mask_reweighting[:, :, pos_idx_t] - - if cfg.mask_proto_normalize_emulate_roi_pooling: - weight = mask_h * mask_w if cfg.mask_proto_crop else 1 - pos_gt_csize = center_size(pos_gt_box_t) - gt_box_width = pos_gt_csize[:, 2] * mask_w - gt_box_height = pos_gt_csize[:, 3] * mask_h - pre_loss = pre_loss.sum(dim=(0, 1)) / gt_box_width / gt_box_height * weight - - # If the number of masks were limited scale the loss accordingly - if old_num_pos > num_pos: - pre_loss *= old_num_pos / num_pos - - loss_m += torch.sum(pre_loss) - - if cfg.use_maskiou: - if cfg.discard_mask_area > 0: - gt_mask_area = torch.sum(mask_t, dim=(0, 1)) - - select = gt_mask_area > cfg.discard_mask_area - - if torch.sum(select) < 1: - continue - - - pos_gt_box_t = pos_gt_box_t[select, :] - pred_masks = pred_masks[:, :, select] - mask_t = mask_t[:, :, select] - label_t = label_t[select] - - maskiou_net_input = pred_masks.permute(2, 0, 1).contiguous().unsqueeze(1) - pred_masks = pred_masks.gt(0.5).float() - maskiou_t = self._mask_iou(pred_masks, mask_t) - - maskiou_net_input_list.append(maskiou_net_input) - maskiou_t_list.append(maskiou_t) - label_t_list.append(label_t) - - losses = {'M': loss_m * cfg.mask_alpha / mask_h / mask_w} - - if cfg.mask_proto_coeff_diversity_loss: - losses['D'] = loss_d - - if cfg.use_maskiou: - # discard_mask_area discarded every mask in the batch, so nothing to do here - if len(maskiou_t_list) == 0: - return losses, None - - maskiou_t = torch.cat(maskiou_t_list) - label_t = torch.cat(label_t_list) - maskiou_net_input = torch.cat(maskiou_net_input_list) - - num_samples = maskiou_t.size(0) - if cfg.maskious_to_train > 0 and num_samples > cfg.maskious_to_train: - perm = torch.randperm(num_samples) - select = perm[:cfg.masks_to_train] - maskiou_t = maskiou_t[select] - label_t = label_t[select] - maskiou_net_input = maskiou_net_input[select] - - return losses, [maskiou_net_input, maskiou_t, label_t] - - return losses - - def _mask_iou(self, mask1, mask2): - intersection = torch.sum(mask1 * mask2, dim=(0, 1)) - area1 = torch.sum(mask1, dim=(0, 1)) - area2 = torch.sum(mask2, dim=(0, 1)) - union = (area1 + area2) - intersection - ret = intersection / union - return ret - - def mask_iou_loss(self, net, maskiou_targets): - maskiou_net_input, maskiou_t, label_t = maskiou_targets - - maskiou_p = net.maskiou_net(maskiou_net_input) - - label_t = label_t[:, None] - maskiou_p = torch.gather(maskiou_p, dim=1, index=label_t).view(-1) - - loss_i = F.smooth_l1_loss(maskiou_p, maskiou_t, reduction='sum') - - return loss_i * cfg.maskiou_alpha +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# -*- coding: utf-8 -*- +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +from ..box_utils import match, log_sum_exp, decode, center_size, crop, elemwise_mask_iou, elemwise_box_iou + +from data import cfg, mask_type, activation_func + + +class MultiBoxLoss(nn.Module): + """SSD Weighted Loss Function + Compute Targets: + 1) Produce Confidence Target Indices by matching ground truth boxes + with (default) 'priorboxes' that have jaccard index > threshold parameter + (default threshold: 0.5). + + 2) Produce localization target by 'encoding' variance into offsets of ground + truth boxes and their matched 'priorboxes'. + + 3) Hard negative mining to filter the excessive number of negative examples + that comes with using a large number of default bounding boxes. + (default negative:positive ratio 3:1) + + Objective Loss: + L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N + Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss + weighted by α which is set to 1 by cross val. + Args: + c: class confidences, + l: predicted boxes, + g: ground truth boxes + N: number of matched default boxes + See: https://arxiv.org/pdf/1512.02325.pdf for more details. + """ + + def __init__(self, num_classes, pos_threshold, neg_threshold, negpos_ratio): + super(MultiBoxLoss, self).__init__() + self.num_classes = num_classes + + self.pos_threshold = pos_threshold + self.neg_threshold = neg_threshold + self.negpos_ratio = negpos_ratio + + # If you output a proto mask with this area, your l1 loss will be l1_alpha + # Note that the area is relative (so 1 would be the entire image) + self.l1_expected_area = 20 * 20 / 70 / 70 + self.l1_alpha = 0.1 + + if cfg.use_class_balanced_conf: + self.class_instances = None + self.total_instances = 0 + + def forward(self, net, predictions, wrapper, wrapper_mask): + """Multibox Loss + Args: + predictions (tuple): A tuple containing loc preds, conf preds, + mask preds, and prior boxes from SSD net. + loc shape: torch.size(batch_size,num_priors,4) + conf shape: torch.size(batch_size,num_priors,num_classes) + masks shape: torch.size(batch_size,num_priors,mask_dim) + priors shape: torch.size(num_priors,4) + proto* shape: torch.size(batch_size,mask_h,mask_w,mask_dim) + + targets (list): Ground truth boxes and labels for a batch, + shape: [batch_size][num_objs,5] (last idx is the label). + + masks (list): Ground truth masks for each object in each image, + shape: [batch_size][num_objs,im_height,im_width] + + num_crowds (list): Number of crowd annotations per batch. The crowd + annotations should be the last num_crowds elements of targets and masks. + + * Only if mask_type == lincomb + """ + + targets, masks, num_crowds = wrapper.get_args(wrapper_mask) + targets = targets[0] + masks = masks[0] + num_crowds = num_crowds[0] + loc_data = predictions['loc'] + conf_data = predictions['conf'] + mask_data = predictions['mask'] + priors = predictions['priors'] + + if cfg.mask_type == mask_type.lincomb: + proto_data = predictions['proto'] + + score_data = predictions['score'] if cfg.use_mask_scoring else None + inst_data = predictions['inst'] if cfg.use_instance_coeff else None + + labels = [None] * len(targets) # Used in sem segm loss + + batch_size = loc_data.size(0) + num_priors = priors.size(0) + num_classes = self.num_classes + + # Match priors (default boxes) and ground truth boxes + # These tensors will be created with the same device as loc_data + loc_t = loc_data.new(batch_size, num_priors, 4) + gt_box_t = loc_data.new(batch_size, num_priors, 4) + conf_t = loc_data.new(batch_size, num_priors).long() + idx_t = loc_data.new(batch_size, num_priors).long() + + if cfg.use_class_existence_loss: + class_existence_t = loc_data.new(batch_size, num_classes - 1) + + for idx in range(batch_size): + truths = targets[idx][:, :-1].data + labels[idx] = targets[idx][:, -1].data.long() + + if cfg.use_class_existence_loss: + # Construct a one-hot vector for each object and collapse it into an existence vector with max + # Also it's fine to include the crowd annotations here + class_existence_t[idx, :] = \ + torch.eye(num_classes - 1, device=conf_t.get_device())[labels[idx]].max(dim=0)[0] + + # Split the crowd annotations because they come bundled in + cur_crowds = num_crowds[idx] + if cur_crowds > 0: + split = lambda x: (x[-cur_crowds:], x[:-cur_crowds]) + crowd_boxes, truths = split(truths) + + # We don't use the crowd labels or masks + _, labels[idx] = split(labels[idx]) + _, masks[idx] = split(masks[idx]) + else: + crowd_boxes = None + + match(self.pos_threshold, self.neg_threshold, + truths, priors.data, labels[idx], crowd_boxes, + loc_t, conf_t, idx_t, idx, loc_data[idx]) + + gt_box_t[idx, :, :] = truths[idx_t[idx]] + + # wrap targets + loc_t = Variable(loc_t, requires_grad=False) + conf_t = Variable(conf_t, requires_grad=False) + idx_t = Variable(idx_t, requires_grad=False) + + pos = conf_t > 0 + num_pos = pos.sum(dim=1, keepdim=True) + + # Shape: [batch,num_priors,4] + pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) + + losses = {} + + # Localization Loss (Smooth L1) + if cfg.train_boxes: + # loc_p = loc_data[pos_idx].view(-1, 4) + # loc_t = loc_t[pos_idx].view(-1, 4) + # losses['B'] = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') * cfg.bbox_alpha + loc_p = loc_data.view(-1, 4) + loc_t = loc_t.view(-1, 4) + losses['B'] = F.smooth_l1_loss(loc_p, loc_t, reduction='none') * cfg.bbox_alpha + losses['B'][pos_idx.view(-1, 4) == False] = 0 + losses['B'] = losses['B'].sum() + + if cfg.train_masks: + if cfg.mask_type == mask_type.direct: + if cfg.use_gt_bboxes: + pos_masks = [] + for idx in range(batch_size): + pos_masks.append(masks[idx][idx_t[idx, pos[idx]]]) + masks_t = torch.cat(pos_masks, 0) + masks_p = mask_data[pos, :].view(-1, cfg.mask_dim) + losses['M'] = F.binary_cross_entropy(torch.clamp(masks_p, 0, 1), masks_t, + reduction='sum') * cfg.mask_alpha + else: + losses['M'] = self.direct_mask_loss(pos_idx, idx_t, loc_data, mask_data, priors, masks) + elif cfg.mask_type == mask_type.lincomb: + ret = self.lincomb_mask_loss(pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, + score_data, inst_data, labels) + if cfg.use_maskiou: + loss, maskiou_targets = ret + else: + loss = ret + losses.update(loss) + + if cfg.mask_proto_loss is not None: + if cfg.mask_proto_loss == 'l1': + losses['P'] = torch.mean(torch.abs(proto_data)) / self.l1_expected_area * self.l1_alpha + elif cfg.mask_proto_loss == 'disj': + losses['P'] = -torch.mean(torch.max(F.log_softmax(proto_data, dim=-1), dim=-1)[0]) + + # Confidence loss + if cfg.use_focal_loss: + if cfg.use_sigmoid_focal_loss: + losses['C'] = self.focal_conf_sigmoid_loss(conf_data, conf_t) + elif cfg.use_objectness_score: + losses['C'] = self.focal_conf_objectness_loss(conf_data, conf_t) + else: + losses['C'] = self.focal_conf_loss(conf_data, conf_t) + else: + if cfg.use_objectness_score: + losses['C'] = self.conf_objectness_loss(conf_data, conf_t, batch_size, loc_p, loc_t, priors) + else: + losses['C'] = self.ohem_conf_loss(conf_data, conf_t, pos, batch_size) + + # Mask IoU Loss + if cfg.use_maskiou and maskiou_targets is not None: + losses['I'] = self.mask_iou_loss(net, maskiou_targets) + + # These losses also don't depend on anchors + if cfg.use_class_existence_loss: + losses['E'] = self.class_existence_loss(predictions['classes'], class_existence_t) + if cfg.use_semantic_segmentation_loss: + losses['S'] = self.semantic_segmentation_loss(predictions['segm'], masks, labels) + + # Divide all losses by the number of positives. + # Don't do it for loss[P] because that doesn't depend on the anchors. + total_num_pos = num_pos.data.sum().float() + for k in losses: + if k not in ('P', 'E', 'S'): + losses[k] /= total_num_pos + else: + losses[k] /= batch_size + + # Loss Key: + # - B: Box Localization Loss + # - C: Class Confidence Loss + # - M: Mask Loss + # - P: Prototype Loss + # - D: Coefficient Diversity Loss + # - E: Class Existence Loss + # - S: Semantic Segmentation Loss + return losses + + def class_existence_loss(self, class_data, class_existence_t): + return cfg.class_existence_alpha * F.binary_cross_entropy_with_logits(class_data, class_existence_t, + reduction='sum') + + def semantic_segmentation_loss(self, segment_data, mask_t, class_t, interpolation_mode='bilinear'): + # Note num_classes here is without the background class so cfg.num_classes-1 + batch_size, num_classes, mask_h, mask_w = segment_data.size() + loss_s = 0 + + for idx in range(batch_size): + cur_segment = segment_data[idx] + cur_class_t = class_t[idx] + + with torch.no_grad(): + downsampled_masks = F.interpolate(mask_t[idx].unsqueeze(0), (mask_h, mask_w), + mode=interpolation_mode, align_corners=False).squeeze(0) + downsampled_masks = downsampled_masks.gt(0.5).float() + + # Construct Semantic Segmentation + segment_t = torch.zeros_like(cur_segment, requires_grad=False) + for obj_idx in range(downsampled_masks.size(0)): + segment_t[cur_class_t[obj_idx]] = torch.max(segment_t[cur_class_t[obj_idx]].float(), + downsampled_masks[obj_idx]) + + loss_s += F.binary_cross_entropy_with_logits(cur_segment, segment_t, reduction='sum') + + return loss_s / mask_h / mask_w * cfg.semantic_segmentation_alpha + + def ohem_conf_loss(self, conf_data, conf_t, pos, num): + # Compute max conf across batch for hard negative mining + batch_conf = conf_data.view(-1, self.num_classes) + if cfg.ohem_use_most_confident: + # i.e. max(softmax) along classes > 0 + batch_conf = F.softmax(batch_conf, dim=1) + loss_c, _ = batch_conf[:, 1:].max(dim=1) + else: + # i.e. -softmax(class 0 confidence) + loss_c = log_sum_exp(batch_conf) - batch_conf[:, 0] + + # Hard Negative Mining + loss_c = loss_c.view(num, -1) + loss_c[pos] = 0 # filter out pos boxes + loss_c[conf_t < 0] = 0 # filter out neutrals (conf_t = -1) + _, loss_idx = loss_c.sort(1, descending=True) + _, idx_rank = loss_idx.sort(1) + num_pos = pos.long().sum(1, keepdim=True) + num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) + neg = idx_rank < num_neg.expand_as(idx_rank) + + # Just in case there aren't enough negatives, don't start using positives as negatives + neg[pos] = 0 + neg[conf_t < 0] = 0 # Filter out neutrals + + # Confidence Loss Including Positive and Negative Examples + # pos_idx = pos.unsqueeze(2).expand_as(conf_data) + # neg_idx = neg.unsqueeze(2).expand_as(conf_data) + + # conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes) + # targets_weighted = conf_t[(pos+neg).gt(0)] + ## loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='none') + loss_c = F.cross_entropy(conf_data.view(-1, self.num_classes), conf_t.view(-1), reduction='none').view(-1, + 57744) + loss_c[(pos + neg).gt(0) == False] = 0 + + if cfg.use_class_balanced_conf: + # Lazy initialization + if self.class_instances is None: + self.class_instances = torch.zeros(self.num_classes, device=targets_weighted.device) + + classes, counts = targets_weighted.unique(return_counts=True) + + for _cls, _cnt in zip(classes.cpu().numpy(), counts.cpu().numpy()): + self.class_instances[_cls] += _cnt + + self.total_instances += targets_weighted.size(0) + + weighting = 1 - (self.class_instances[targets_weighted] / self.total_instances) + weighting = torch.clamp(weighting, min=1 / self.num_classes) + + # If you do the math, the average weight of self.class_instances is this + avg_weight = (self.num_classes - 1) / self.num_classes + + loss_c = (loss_c * weighting).sum() / avg_weight + else: + loss_c = loss_c.sum() + + return cfg.conf_alpha * loss_c + + def focal_conf_loss(self, conf_data, conf_t): + """ + Focal loss as described in https://arxiv.org/pdf/1708.02002.pdf + Adapted from https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py + Note that this uses softmax and not the original sigmoid from the paper. + """ + conf_t = conf_t.view(-1) # [batch_size*num_priors] + conf_data = conf_data.view(-1, conf_data.size(-1)) # [batch_size*num_priors, num_classes] + + # Ignore neutral samples (class < 0) + keep = (conf_t >= 0).float() + conf_t[conf_t < 0] = 0 # so that gather doesn't drum up a fuss + + logpt = F.log_softmax(conf_data, dim=-1) + logpt = logpt.gather(1, conf_t.unsqueeze(-1)) + logpt = logpt.view(-1) + pt = logpt.exp() + + # I adapted the alpha_t calculation here from + # https://github.com/pytorch/pytorch/blob/master/modules/detectron/softmax_focal_loss_op.cu + # You'd think you want all the alphas to sum to one, but in the original implementation they + # just give background an alpha of 1-alpha and each forground an alpha of alpha. + background = (conf_t == 0).float() + at = (1 - cfg.focal_loss_alpha) * background + cfg.focal_loss_alpha * (1 - background) + + loss = -at * (1 - pt) ** cfg.focal_loss_gamma * logpt + + # See comment above for keep + return cfg.conf_alpha * (loss * keep).sum() + + def focal_conf_sigmoid_loss(self, conf_data, conf_t): + """ + Focal loss but using sigmoid like the original paper. + Note: To make things mesh easier, the network still predicts 81 class confidences in this mode. + Because retinanet originally only predicts 80, we simply just don't use conf_data[..., 0] + """ + num_classes = conf_data.size(-1) + + conf_t = conf_t.view(-1) # [batch_size*num_priors] + conf_data = conf_data.view(-1, num_classes) # [batch_size*num_priors, num_classes] + + # Ignore neutral samples (class < 0) + keep = (conf_t >= 0).float() + conf_t[conf_t < 0] = 0 # can't mask with -1, so filter that out + + # Compute a one-hot embedding of conf_t + # From https://github.com/kuangliu/pytorch-retinanet/blob/master/utils.py + conf_one_t = torch.eye(num_classes, device=conf_t.get_device())[conf_t] + conf_pm_t = conf_one_t * 2 - 1 # -1 if background, +1 if forground for specific class + + logpt = F.logsigmoid(conf_data * conf_pm_t) # note: 1 - sigmoid(x) = sigmoid(-x) + pt = logpt.exp() + + at = cfg.focal_loss_alpha * conf_one_t + (1 - cfg.focal_loss_alpha) * (1 - conf_one_t) + at[..., 0] = 0 # Set alpha for the background class to 0 because sigmoid focal loss doesn't use it + + loss = -at * (1 - pt) ** cfg.focal_loss_gamma * logpt + loss = keep * loss.sum(dim=-1) + + return cfg.conf_alpha * loss.sum() + + def focal_conf_objectness_loss(self, conf_data, conf_t): + """ + Instead of using softmax, use class[0] to be the objectness score and do sigmoid focal loss on that. + Then for the rest of the classes, softmax them and apply CE for only the positive examples. + + If class[0] = 1 implies forground and class[0] = 0 implies background then you achieve something + similar during test-time to softmax by setting class[1:] = softmax(class[1:]) * class[0] and invert class[0]. + """ + + conf_t = conf_t.view(-1) # [batch_size*num_priors] + conf_data = conf_data.view(-1, conf_data.size(-1)) # [batch_size*num_priors, num_classes] + + # Ignore neutral samples (class < 0) + keep = (conf_t >= 0).float() + conf_t[conf_t < 0] = 0 # so that gather doesn't drum up a fuss + + background = (conf_t == 0).float() + at = (1 - cfg.focal_loss_alpha) * background + cfg.focal_loss_alpha * (1 - background) + + logpt = F.logsigmoid(conf_data[:, 0]) * (1 - background) + F.logsigmoid(-conf_data[:, 0]) * background + pt = logpt.exp() + + obj_loss = -at * (1 - pt) ** cfg.focal_loss_gamma * logpt + + # All that was the objectiveness loss--now time for the class confidence loss + pos_mask = conf_t > 0 + conf_data_pos = (conf_data[:, 1:])[pos_mask] # Now this has just 80 classes + conf_t_pos = conf_t[pos_mask] - 1 # So subtract 1 here + + class_loss = F.cross_entropy(conf_data_pos, conf_t_pos, reduction='sum') + + return cfg.conf_alpha * (class_loss + (obj_loss * keep).sum()) + + def conf_objectness_loss(self, conf_data, conf_t, batch_size, loc_p, loc_t, priors): + """ + Instead of using softmax, use class[0] to be p(obj) * p(IoU) as in YOLO. + Then for the rest of the classes, softmax them and apply CE for only the positive examples. + """ + + conf_t = conf_t.view(-1) # [batch_size*num_priors] + conf_data = conf_data.view(-1, conf_data.size(-1)) # [batch_size*num_priors, num_classes] + + pos_mask = (conf_t > 0) + neg_mask = (conf_t == 0) + + obj_data = conf_data[:, 0] + obj_data_pos = obj_data[pos_mask] + obj_data_neg = obj_data[neg_mask] + + # Don't be confused, this is just binary cross entropy similified + obj_neg_loss = - F.logsigmoid(-obj_data_neg).sum() + + with torch.no_grad(): + pos_priors = priors.unsqueeze(0).expand(batch_size, -1, -1).reshape(-1, 4)[pos_mask, :] + + boxes_pred = decode(loc_p, pos_priors, cfg.use_yolo_regressors) + boxes_targ = decode(loc_t, pos_priors, cfg.use_yolo_regressors) + + iou_targets = elemwise_box_iou(boxes_pred, boxes_targ) + + obj_pos_loss = - iou_targets * F.logsigmoid(obj_data_pos) - (1 - iou_targets) * F.logsigmoid(-obj_data_pos) + obj_pos_loss = obj_pos_loss.sum() + + # All that was the objectiveness loss--now time for the class confidence loss + conf_data_pos = (conf_data[:, 1:])[pos_mask] # Now this has just 80 classes + conf_t_pos = conf_t[pos_mask] - 1 # So subtract 1 here + + class_loss = F.cross_entropy(conf_data_pos, conf_t_pos, reduction='sum') + + return cfg.conf_alpha * (class_loss + obj_pos_loss + obj_neg_loss) + + def direct_mask_loss(self, pos_idx, idx_t, loc_data, mask_data, priors, masks): + """ Crops the gt masks using the predicted bboxes, scales them down, and outputs the BCE loss. """ + loss_m = 0 + for idx in range(mask_data.size(0)): + with torch.no_grad(): + cur_pos_idx = pos_idx[idx, :, :] + cur_pos_idx_squeezed = cur_pos_idx[:, 1] + + # Shape: [num_priors, 4], decoded predicted bboxes + pos_bboxes = decode(loc_data[idx, :, :], priors.data, cfg.use_yolo_regressors) + pos_bboxes = pos_bboxes[cur_pos_idx].view(-1, 4).clamp(0, 1) + pos_lookup = idx_t[idx, cur_pos_idx_squeezed] + + cur_masks = masks[idx] + pos_masks = cur_masks[pos_lookup, :, :] + + # Convert bboxes to absolute coordinates + num_pos, img_height, img_width = pos_masks.size() + + # Take care of all the bad behavior that can be caused by out of bounds coordinates + x1, x2 = sanitize_coordinates(pos_bboxes[:, 0], pos_bboxes[:, 2], img_width) + y1, y2 = sanitize_coordinates(pos_bboxes[:, 1], pos_bboxes[:, 3], img_height) + + # Crop each gt mask with the predicted bbox and rescale to the predicted mask size + # Note that each bounding box crop is a different size so I don't think we can vectorize this + scaled_masks = [] + for jdx in range(num_pos): + tmp_mask = pos_masks[jdx, y1[jdx]:y2[jdx], x1[jdx]:x2[jdx]] + + # Restore any dimensions we've left out because our bbox was 1px wide + while tmp_mask.dim() < 2: + tmp_mask = tmp_mask.unsqueeze(0) + + new_mask = F.adaptive_avg_pool2d(tmp_mask.unsqueeze(0), cfg.mask_size) + scaled_masks.append(new_mask.view(1, -1)) + + mask_t = torch.cat(scaled_masks, 0).gt(0.5).float() # Threshold downsampled mask + + pos_mask_data = mask_data[idx, cur_pos_idx_squeezed, :] + loss_m += F.binary_cross_entropy(torch.clamp(pos_mask_data, 0, 1), mask_t, reduction='sum') * cfg.mask_alpha + + return loss_m + + def coeff_diversity_loss(self, coeffs, instance_t): + """ + coeffs should be size [num_pos, num_coeffs] + instance_t should be size [num_pos] and be values from 0 to num_instances-1 + """ + num_pos = coeffs.size(0) + instance_t = instance_t.view(-1) # juuuust to make sure + + coeffs_norm = F.normalize(coeffs, dim=1) + cos_sim = coeffs_norm @ coeffs_norm.t() + + inst_eq = (instance_t[:, None].expand_as(cos_sim) == instance_t[None, :].expand_as(cos_sim)).float() + + # Rescale to be between 0 and 1 + cos_sim = (cos_sim + 1) / 2 + + # If they're the same instance, use cosine distance, else use cosine similarity + loss = (1 - cos_sim) * inst_eq + cos_sim * (1 - inst_eq) + + # Only divide by num_pos once because we're summing over a num_pos x num_pos tensor + # and all the losses will be divided by num_pos at the end, so just one extra time. + return cfg.mask_proto_coeff_diversity_alpha * loss.sum() / num_pos + + def lincomb_mask_loss(self, pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, score_data, + inst_data, labels, interpolation_mode='bilinear'): + mask_h = proto_data.size(1) + mask_w = proto_data.size(2) + + process_gt_bboxes = cfg.mask_proto_normalize_emulate_roi_pooling or cfg.mask_proto_crop + + if cfg.mask_proto_remove_empty_masks: + # Make sure to store a copy of this because we edit it to get rid of all-zero masks + pos = pos.clone() + + loss_m = 0 + loss_d = 0 # Coefficient diversity loss + + maskiou_t_list = [] + maskiou_net_input_list = [] + label_t_list = [] + + for idx in range(mask_data.size(0)): + with torch.no_grad(): + downsampled_masks = F.interpolate(masks[idx].unsqueeze(0), (mask_h, mask_w), + mode=interpolation_mode, align_corners=False).squeeze(0) + downsampled_masks = downsampled_masks.permute(1, 2, 0).contiguous() + if cfg.mask_proto_binarize_downsampled_gt: + downsampled_masks = downsampled_masks.gt(0.5).float() + + if cfg.mask_proto_remove_empty_masks: + # Get rid of gt masks that are so small they get downsampled away + very_small_masks = (downsampled_masks.sum(dim=(0, 1)) <= 0.0001) + for i in range(very_small_masks.size(0)): + if very_small_masks[i]: + pos[idx, idx_t[idx] == i] = 0 + if cfg.mask_proto_reweight_mask_loss: + # Ensure that the gt is binary + if not cfg.mask_proto_binarize_downsampled_gt: + bin_gt = downsampled_masks.gt(0.5).float() + else: + bin_gt = downsampled_masks + + gt_foreground_norm = bin_gt / (torch.sum(bin_gt, dim=(0, 1), keepdim=True) + 0.0001) + gt_background_norm = (1 - bin_gt) / (torch.sum(1 - bin_gt, dim=(0, 1), keepdim=True) + 0.0001) + + mask_reweighting = gt_foreground_norm * cfg.mask_proto_reweight_coeff + gt_background_norm + mask_reweighting *= mask_h * mask_w + + cur_pos = pos[idx] + pos_idx_t = idx_t[idx, cur_pos] + if process_gt_bboxes: + # Note: this is in point-form + if cfg.mask_proto_crop_with_pred_box: + pos_gt_box_t = decode(loc_data[idx, :, :], priors.data, cfg.use_yolo_regressors)[cur_pos] + else: + pos_gt_box_t = gt_box_t[idx, cur_pos] + if pos_idx_t.size(0) == 0: + continue + + proto_masks = proto_data[idx] + proto_coef = mask_data[idx, cur_pos, :] + if cfg.use_mask_scoring: + mask_scores = score_data[idx, cur_pos, :] + if cfg.mask_proto_coeff_diversity_loss: + if inst_data is not None: + div_coeffs = inst_data[idx, cur_pos, :] + else: + div_coeffs = proto_coef + + loss_d += self.coeff_diversity_loss(div_coeffs, pos_idx_t) + + # If we have over the allowed number of masks, select a random sample + old_num_pos = proto_coef.size(0) + if old_num_pos > cfg.masks_to_train: + perm = torch.randperm(proto_coef.size(0)) + select = perm[:cfg.masks_to_train] + + proto_coef = proto_coef[select, :] + pos_idx_t = pos_idx_t[select] + + if process_gt_bboxes: + pos_gt_box_t = pos_gt_box_t[select, :] + if cfg.use_mask_scoring: + mask_scores = mask_scores[select, :] + + num_pos = proto_coef.size(0) + mask_t = downsampled_masks[:, :, pos_idx_t] + label_t = labels[idx][pos_idx_t] + # Size: [mask_h, mask_w, num_pos] + pred_masks = proto_masks @ proto_coef.t() + pred_masks = cfg.mask_proto_mask_activation(pred_masks) + if cfg.mask_proto_double_loss: + if cfg.mask_proto_mask_activation == activation_func.sigmoid: + pre_loss = F.binary_cross_entropy(torch.clamp(pred_masks, 0, 1), mask_t, reduction='sum') + else: + pre_loss = F.smooth_l1_loss(pred_masks, mask_t, reduction='sum') + + loss_m += cfg.mask_proto_double_loss_alpha * pre_loss + + if cfg.mask_proto_crop: + pred_masks = crop(pred_masks, pos_gt_box_t) + + if cfg.mask_proto_mask_activation == activation_func.sigmoid: + pre_loss = F.binary_cross_entropy(torch.clamp(pred_masks, 0, 1), mask_t, reduction='none') + else: + pre_loss = F.smooth_l1_loss(pred_masks, mask_t, reduction='none') + + if cfg.mask_proto_normalize_mask_loss_by_sqrt_area: + gt_area = torch.sum(mask_t, dim=(0, 1), keepdim=True) + pre_loss = pre_loss / (torch.sqrt(gt_area) + 0.0001) + + if cfg.mask_proto_reweight_mask_loss: + pre_loss = pre_loss * mask_reweighting[:, :, pos_idx_t] + + if cfg.mask_proto_normalize_emulate_roi_pooling: + weight = mask_h * mask_w if cfg.mask_proto_crop else 1 + pos_gt_csize = center_size(pos_gt_box_t) + gt_box_width = pos_gt_csize[:, 2] * mask_w + gt_box_height = pos_gt_csize[:, 3] * mask_h + pre_loss = pre_loss.sum(dim=(0, 1)) / gt_box_width / gt_box_height * weight + + # If the number of masks were limited scale the loss accordingly + if old_num_pos > num_pos: + pre_loss *= old_num_pos / num_pos + + loss_m += torch.sum(pre_loss) + + if cfg.use_maskiou: + if cfg.discard_mask_area > 0: + gt_mask_area = torch.sum(mask_t, dim=(0, 1)) + + select = gt_mask_area > cfg.discard_mask_area + + if torch.sum(select) < 1: + continue + + + pos_gt_box_t = pos_gt_box_t[select, :] + pred_masks = pred_masks[:, :, select] + mask_t = mask_t[:, :, select] + label_t = label_t[select] + + maskiou_net_input = pred_masks.permute(2, 0, 1).contiguous().unsqueeze(1) + pred_masks = pred_masks.gt(0.5).float() + maskiou_t = self._mask_iou(pred_masks, mask_t) + + maskiou_net_input_list.append(maskiou_net_input) + maskiou_t_list.append(maskiou_t) + label_t_list.append(label_t) + + losses = {'M': loss_m * cfg.mask_alpha / mask_h / mask_w} + + if cfg.mask_proto_coeff_diversity_loss: + losses['D'] = loss_d + + if cfg.use_maskiou: + # discard_mask_area discarded every mask in the batch, so nothing to do here + if len(maskiou_t_list) == 0: + return losses, None + + maskiou_t = torch.cat(maskiou_t_list) + label_t = torch.cat(label_t_list) + maskiou_net_input = torch.cat(maskiou_net_input_list) + + num_samples = maskiou_t.size(0) + if cfg.maskious_to_train > 0 and num_samples > cfg.maskious_to_train: + perm = torch.randperm(num_samples) + select = perm[:cfg.masks_to_train] + maskiou_t = maskiou_t[select] + label_t = label_t[select] + maskiou_net_input = maskiou_net_input[select] + + return losses, [maskiou_net_input, maskiou_t, label_t] + + return losses + + def _mask_iou(self, mask1, mask2): + intersection = torch.sum(mask1 * mask2, dim=(0, 1)) + area1 = torch.sum(mask1, dim=(0, 1)) + area2 = torch.sum(mask2, dim=(0, 1)) + union = (area1 + area2) - intersection + ret = intersection / union + return ret + + def mask_iou_loss(self, net, maskiou_targets): + maskiou_net_input, maskiou_t, label_t = maskiou_targets + + maskiou_p = net.maskiou_net(maskiou_net_input) + + label_t = label_t[:, None] + maskiou_p = torch.gather(maskiou_p, dim=1, index=label_t).view(-1) + + loss_i = F.smooth_l1_loss(maskiou_p, maskiou_t, reduction='sum') + + return loss_i * cfg.maskiou_alpha diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/output_utils.py b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/output_utils.py index f84c02899d578889003174a39ee69ca0201d45cd..dbda93adba3a5800e6c6d0a09fde22af4bfa208e 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/layers/output_utils.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/layers/output_utils.py @@ -1,200 +1,200 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" Contains functions used to sanitize and prepare the output of Yolact. """ - - -import torch -import torch.nn as nn -import torch.nn.functional as F -import numpy as np -import cv2 - -from data import cfg, mask_type, MEANS, STD, activation_func -from utils.augmentations import Resize -from utils import timer -from .box_utils import crop, sanitize_coordinates - -def postprocess(det_output, w, h, batch_idx=0, interpolation_mode='bilinear', - visualize_lincomb=False, crop_masks=True, score_threshold=0): - """ - Postprocesses the output of Yolact on testing mode into a format that makes sense, - accounting for all the possible configuration settings. - - Args: - - det_output: The lost of dicts that Detect outputs. - - w: The real with of the image. - - h: The real height of the image. - - batch_idx: If you have multiple images for this batch, the image's index in the batch. - - interpolation_mode: Can be 'nearest' | 'area' | 'bilinear' (see torch.nn.functional.interpolate) - - Returns 4 torch Tensors (in the following order): - - classes [num_det]: The class idx for each detection. - - scores [num_det]: The confidence score for each detection. - - boxes [num_det, 4]: The bounding box for each detection in absolute point form. - - masks [num_det, h, w]: Full image masks for each detection. - """ - - dets = det_output[batch_idx] - - if dets is None: - return [torch.Tensor()] * 4 # Warning, this is 4 copies of the same thing - - if score_threshold > 0: - keep = dets['score'] > score_threshold - - for k in dets: - if k != 'proto': - dets[k] = dets[k][keep] - - if dets['score'].size(0) == 0: - return [torch.Tensor()] * 4 - - # Actually extract everything from dets now - classes = dets['class'] - boxes = dets['box'] - scores = dets['score'] - masks = dets['mask'] - - if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch: - # At this points masks is only the coefficients - proto_data = dets['proto'] - - # Test flag, do not upvote - if cfg.mask_proto_debug: - np.save('scripts/proto.npy', proto_data.cpu().numpy()) - - if visualize_lincomb: - display_lincomb(proto_data, masks) - - masks = proto_data @ masks.t() - masks = cfg.mask_proto_mask_activation(masks) - - # Crop masks before upsampling because you know why - if crop_masks: - masks = crop(masks, boxes) - - # Permute into the correct output shape [num_dets, proto_h, proto_w] - masks = masks.permute(2, 0, 1).contiguous() - - if cfg.use_maskiou: - with timer.env('maskiou_net'): - with torch.no_grad(): - maskiou_p = net.maskiou_net(masks.unsqueeze(1)) - maskiou_p = torch.gather(maskiou_p, dim=1, index=classes.unsqueeze(1)).squeeze(1) - if cfg.rescore_mask: - if cfg.rescore_bbox: - scores = scores * maskiou_p - else: - scores = [scores, scores * maskiou_p] - - # Scale masks up to the full image - masks = F.interpolate(masks.unsqueeze(0), (h, w), mode=interpolation_mode, align_corners=False).squeeze(0) - - # Binarize the masks - masks.gt_(0.5) - - - boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, cast=False) - boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, cast=False) - boxes = boxes.long() - - if cfg.mask_type == mask_type.direct and cfg.eval_mask_branch: - # Upscale masks - full_masks = torch.zeros(masks.size(0), h, w) - - for jdx in range(masks.size(0)): - x1, y1, x2, y2 = boxes[jdx, :] - - mask_w = x2 - x1 - mask_h = y2 - y1 - - # Just in case - if mask_w * mask_h <= 0 or mask_w < 0: - continue - - mask = masks[jdx, :].view(1, 1, cfg.mask_size, cfg.mask_size) - mask = F.interpolate(mask, (mask_h, mask_w), mode=interpolation_mode, align_corners=False) - mask = mask.gt(0.5).float() - full_masks[jdx, y1:y2, x1:x2] = mask - - masks = full_masks - - return classes, scores, boxes, masks - - - - - -def undo_image_transformation(img, w, h): - """ - Takes a transformed image tensor and returns a numpy ndarray that is untransformed. - Arguments w and h are the original height and width of the image. - """ - img_numpy = img.permute(1, 2, 0).cpu().numpy() - img_numpy = img_numpy[:, :, (2, 1, 0)] # To BRG - - if cfg.backbone.transform.normalize: - img_numpy = (img_numpy * np.array(STD) + np.array(MEANS)) / 255.0 - elif cfg.backbone.transform.subtract_means: - img_numpy = (img_numpy / 255.0 + np.array(MEANS) / 255.0).astype(np.float32) - - img_numpy = img_numpy[:, :, (2, 1, 0)] # To RGB - img_numpy = np.clip(img_numpy, 0, 1) - - return cv2.resize(img_numpy, (w,h)) - - -def display_lincomb(proto_data, masks): - out_masks = torch.matmul(proto_data, masks.t()) - # out_masks = cfg.mask_proto_mask_activation(out_masks) - - for kdx in range(1): - jdx = kdx + 0 - import matplotlib.pyplot as plt - coeffs = masks[jdx, :].cpu().numpy() - idx = np.argsort(-np.abs(coeffs)) - # plt.bar(list(range(idx.shape[0])), coeffs[idx]) - # plt.show() - - coeffs_sort = coeffs[idx] - arr_h, arr_w = (4,8) - proto_h, proto_w, _ = proto_data.size() - arr_img = np.zeros([proto_h*arr_h, proto_w*arr_w]) - arr_run = np.zeros([proto_h*arr_h, proto_w*arr_w]) - test = torch.sum(proto_data, -1).cpu().numpy() - - for y in range(arr_h): - for x in range(arr_w): - i = arr_w * y + x - - if i == 0: - running_total = proto_data[:, :, idx[i]].cpu().numpy() * coeffs_sort[i] - else: - running_total += proto_data[:, :, idx[i]].cpu().numpy() * coeffs_sort[i] - - running_total_nonlin = running_total - if cfg.mask_proto_mask_activation == activation_func.sigmoid: - running_total_nonlin = (1/(1+np.exp(-running_total_nonlin))) - - arr_img[y*proto_h:(y+1)*proto_h, x*proto_w:(x+1)*proto_w] = (proto_data[:, :, idx[i]] / torch.max(proto_data[:, :, idx[i]])).cpu().numpy() * coeffs_sort[i] - arr_run[y*proto_h:(y+1)*proto_h, x*proto_w:(x+1)*proto_w] = (running_total_nonlin > 0.5).astype(np.float) - plt.imshow(arr_img) - plt.show() - # plt.imshow(arr_run) - # plt.show() - # plt.imshow(test) - # plt.show() - plt.imshow(out_masks[:, :, jdx].cpu().numpy()) - plt.show() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" Contains functions used to sanitize and prepare the output of Yolact. """ + + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +import cv2 + +from data import cfg, mask_type, MEANS, STD, activation_func +from utils.augmentations import Resize +from utils import timer +from .box_utils import crop, sanitize_coordinates + +def postprocess(det_output, w, h, batch_idx=0, interpolation_mode='bilinear', + visualize_lincomb=False, crop_masks=True, score_threshold=0): + """ + Postprocesses the output of Yolact on testing mode into a format that makes sense, + accounting for all the possible configuration settings. + + Args: + - det_output: The lost of dicts that Detect outputs. + - w: The real with of the image. + - h: The real height of the image. + - batch_idx: If you have multiple images for this batch, the image's index in the batch. + - interpolation_mode: Can be 'nearest' | 'area' | 'bilinear' (see torch.nn.functional.interpolate) + + Returns 4 torch Tensors (in the following order): + - classes [num_det]: The class idx for each detection. + - scores [num_det]: The confidence score for each detection. + - boxes [num_det, 4]: The bounding box for each detection in absolute point form. + - masks [num_det, h, w]: Full image masks for each detection. + """ + + dets = det_output[batch_idx] + + if dets is None: + return [torch.Tensor()] * 4 # Warning, this is 4 copies of the same thing + + if score_threshold > 0: + keep = dets['score'] > score_threshold + + for k in dets: + if k != 'proto': + dets[k] = dets[k][keep] + + if dets['score'].size(0) == 0: + return [torch.Tensor()] * 4 + + # Actually extract everything from dets now + classes = dets['class'] + boxes = dets['box'] + scores = dets['score'] + masks = dets['mask'] + + if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch: + # At this points masks is only the coefficients + proto_data = dets['proto'] + + # Test flag, do not upvote + if cfg.mask_proto_debug: + np.save('scripts/proto.npy', proto_data.cpu().numpy()) + + if visualize_lincomb: + display_lincomb(proto_data, masks) + + masks = proto_data @ masks.t() + masks = cfg.mask_proto_mask_activation(masks) + + # Crop masks before upsampling because you know why + if crop_masks: + masks = crop(masks, boxes) + + # Permute into the correct output shape [num_dets, proto_h, proto_w] + masks = masks.permute(2, 0, 1).contiguous() + + if cfg.use_maskiou: + with timer.env('maskiou_net'): + with torch.no_grad(): + maskiou_p = net.maskiou_net(masks.unsqueeze(1)) + maskiou_p = torch.gather(maskiou_p, dim=1, index=classes.unsqueeze(1)).squeeze(1) + if cfg.rescore_mask: + if cfg.rescore_bbox: + scores = scores * maskiou_p + else: + scores = [scores, scores * maskiou_p] + + # Scale masks up to the full image + masks = F.interpolate(masks.unsqueeze(0), (h, w), mode=interpolation_mode, align_corners=False).squeeze(0) + + # Binarize the masks + masks.gt_(0.5) + + + boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, cast=False) + boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, cast=False) + boxes = boxes.long() + + if cfg.mask_type == mask_type.direct and cfg.eval_mask_branch: + # Upscale masks + full_masks = torch.zeros(masks.size(0), h, w) + + for jdx in range(masks.size(0)): + x1, y1, x2, y2 = boxes[jdx, :] + + mask_w = x2 - x1 + mask_h = y2 - y1 + + # Just in case + if mask_w * mask_h <= 0 or mask_w < 0: + continue + + mask = masks[jdx, :].view(1, 1, cfg.mask_size, cfg.mask_size) + mask = F.interpolate(mask, (mask_h, mask_w), mode=interpolation_mode, align_corners=False) + mask = mask.gt(0.5).float() + full_masks[jdx, y1:y2, x1:x2] = mask + + masks = full_masks + + return classes, scores, boxes, masks + + + + + +def undo_image_transformation(img, w, h): + """ + Takes a transformed image tensor and returns a numpy ndarray that is untransformed. + Arguments w and h are the original height and width of the image. + """ + img_numpy = img.permute(1, 2, 0).cpu().numpy() + img_numpy = img_numpy[:, :, (2, 1, 0)] # To BRG + + if cfg.backbone.transform.normalize: + img_numpy = (img_numpy * np.array(STD) + np.array(MEANS)) / 255.0 + elif cfg.backbone.transform.subtract_means: + img_numpy = (img_numpy / 255.0 + np.array(MEANS) / 255.0).astype(np.float32) + + img_numpy = img_numpy[:, :, (2, 1, 0)] # To RGB + img_numpy = np.clip(img_numpy, 0, 1) + + return cv2.resize(img_numpy, (w,h)) + + +def display_lincomb(proto_data, masks): + out_masks = torch.matmul(proto_data, masks.t()) + # out_masks = cfg.mask_proto_mask_activation(out_masks) + + for kdx in range(1): + jdx = kdx + 0 + import matplotlib.pyplot as plt + coeffs = masks[jdx, :].cpu().numpy() + idx = np.argsort(-np.abs(coeffs)) + # plt.bar(list(range(idx.shape[0])), coeffs[idx]) + # plt.show() + + coeffs_sort = coeffs[idx] + arr_h, arr_w = (4,8) + proto_h, proto_w, _ = proto_data.size() + arr_img = np.zeros([proto_h*arr_h, proto_w*arr_w]) + arr_run = np.zeros([proto_h*arr_h, proto_w*arr_w]) + test = torch.sum(proto_data, -1).cpu().numpy() + + for y in range(arr_h): + for x in range(arr_w): + i = arr_w * y + x + + if i == 0: + running_total = proto_data[:, :, idx[i]].cpu().numpy() * coeffs_sort[i] + else: + running_total += proto_data[:, :, idx[i]].cpu().numpy() * coeffs_sort[i] + + running_total_nonlin = running_total + if cfg.mask_proto_mask_activation == activation_func.sigmoid: + running_total_nonlin = (1/(1+np.exp(-running_total_nonlin))) + + arr_img[y*proto_h:(y+1)*proto_h, x*proto_w:(x+1)*proto_w] = (proto_data[:, :, idx[i]] / torch.max(proto_data[:, :, idx[i]])).cpu().numpy() * coeffs_sort[i] + arr_run[y*proto_h:(y+1)*proto_h, x*proto_w:(x+1)*proto_w] = (running_total_nonlin > 0.5).astype(np.float) + plt.imshow(arr_img) + plt.show() + # plt.imshow(arr_run) + # plt.show() + # plt.imshow(test) + # plt.show() + plt.imshow(out_masks[:, :, jdx].cpu().numpy()) + plt.show() diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/modelzoo_level.txt b/PyTorch/contrib/cv/detection/YOLACT_plus/modelzoo_level.txt index 0bfa93c4ef64e409485139f9c4153f9d3f660a34..59d0003ba877b641b70b96d465943c6acb16ef7e 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/modelzoo_level.txt +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/modelzoo_level.txt @@ -1,6 +1,6 @@ -GPUStatus:OK -NPUMigrationStatus:OK -FuncStatus:OK -PrecisionStatus:OK -AutoTune:POK +GPUStatus:OK +NPUMigrationStatus:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:POK PerfStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/augment_bbox.py b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/augment_bbox.py index 8688f4f2806fc99dc5c2c36c19dc2a2c8b35e12f..d823a92822d0589d7341ac2f0079222f9fcdc8cb 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/augment_bbox.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/augment_bbox.py @@ -1,184 +1,184 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import os.path as osp -import json, pickle -import sys -from math import sqrt -from itertools import product -import torch -from numpy import random - -import numpy as np - - -max_image_size = 550 -augment_idx = 0 -dump_file = 'weights/bboxes_aug.pkl' -box_file = 'weights/bboxes.pkl' - -def augment_boxes(bboxes): - bboxes_rel = [] - for box in bboxes: - bboxes_rel.append(prep_box(box)) - bboxes_rel = np.concatenate(bboxes_rel, axis=0) - - with open(dump_file, 'wb') as f: - pickle.dump(bboxes_rel, f) - -def prep_box(box_list): - global augment_idx - boxes = np.array([box_list[2:]], dtype=np.float32) - - # Image width and height - width, height = box_list[:2] - - # To point form - boxes[:, 2:] += boxes[:, :2] - - - # Expand - ratio = random.uniform(1, 4) - left = random.uniform(0, width*ratio - width) - top = random.uniform(0, height*ratio - height) - - height *= ratio - width *= ratio - - boxes[:, :2] += (int(left), int(top)) - boxes[:, 2:] += (int(left), int(top)) - - - # RandomSampleCrop - height, width, boxes = random_sample_crop(height, width, boxes) - - - # RandomMirror - if random.randint(0, 2): - boxes[:, 0::2] = width - boxes[:, 2::-2] - - - # Resize - boxes[:, [0, 2]] *= (max_image_size / width) - boxes[:, [1, 3]] *= (max_image_size / height) - width = height = max_image_size - - - # ToPercentCoords - boxes[:, [0, 2]] /= width - boxes[:, [1, 3]] /= height - - if augment_idx % 50000 == 0: - print('Current idx: %d' % augment_idx) - - augment_idx += 1 - - return boxes - - - - -sample_options = ( - # using entire original input image - None, - # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9 - (0.1, None), - (0.3, None), - (0.7, None), - (0.9, None), - # randomly sample a patch - (None, None), -) - -def intersect(box_a, box_b): - max_xy = np.minimum(box_a[:, 2:], box_b[2:]) - min_xy = np.maximum(box_a[:, :2], box_b[:2]) - inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf) - return inter[:, 0] * inter[:, 1] - - -def jaccard_numpy(box_a, box_b): - """Compute the jaccard overlap of two sets of boxes. The jaccard overlap - is simply the intersection over union of two boxes. - E.g.: - A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) - Args: - box_a: Multiple bounding boxes, Shape: [num_boxes,4] - box_b: Single bounding box, Shape: [4] - Return: - jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]] - """ - inter = intersect(box_a, box_b) - area_a = ((box_a[:, 2]-box_a[:, 0]) * - (box_a[:, 3]-box_a[:, 1])) # [A,B] - area_b = ((box_b[2]-box_b[0]) * - (box_b[3]-box_b[1])) # [A,B] - union = area_a + area_b - inter - return inter / union # [A,B] - - -def random_sample_crop(height, width, boxes=None): - global sample_options - - while True: - # randomly choose a mode - mode = random.choice(sample_options) - if mode is None: - return height, width, boxes - - min_iou, max_iou = mode - if min_iou is None: - min_iou = float('-inf') - if max_iou is None: - max_iou = float('inf') - - for _ in range(50): - w = random.uniform(0.3 * width, width) - h = random.uniform(0.3 * height, height) - - if h / w < 0.5 or h / w > 2: - continue - - left = random.uniform(0, width - w) - top = random.uniform(0, height - h) - - rect = np.array([int(left), int(top), int(left+w), int(top+h)]) - overlap = jaccard_numpy(boxes, rect) - if overlap.min() < min_iou and max_iou < overlap.max(): - continue - - centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 - - m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1]) - m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1]) - mask = m1 * m2 - - if not mask.any(): - continue - - current_boxes = boxes[mask, :].copy() - current_boxes[:, :2] = np.maximum(current_boxes[:, :2], rect[:2]) - current_boxes[:, :2] -= rect[:2] - current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], rect[2:]) - current_boxes[:, 2:] -= rect[:2] - - return h, w, current_boxes - - -if __name__ == '__main__': - - with open(box_file, 'rb') as f: - bboxes = pickle.load(f) - - augment_boxes(bboxes) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import os.path as osp +import json, pickle +import sys +from math import sqrt +from itertools import product +import torch +from numpy import random + +import numpy as np + + +max_image_size = 550 +augment_idx = 0 +dump_file = 'weights/bboxes_aug.pkl' +box_file = 'weights/bboxes.pkl' + +def augment_boxes(bboxes): + bboxes_rel = [] + for box in bboxes: + bboxes_rel.append(prep_box(box)) + bboxes_rel = np.concatenate(bboxes_rel, axis=0) + + with open(dump_file, 'wb') as f: + pickle.dump(bboxes_rel, f) + +def prep_box(box_list): + global augment_idx + boxes = np.array([box_list[2:]], dtype=np.float32) + + # Image width and height + width, height = box_list[:2] + + # To point form + boxes[:, 2:] += boxes[:, :2] + + + # Expand + ratio = random.uniform(1, 4) + left = random.uniform(0, width*ratio - width) + top = random.uniform(0, height*ratio - height) + + height *= ratio + width *= ratio + + boxes[:, :2] += (int(left), int(top)) + boxes[:, 2:] += (int(left), int(top)) + + + # RandomSampleCrop + height, width, boxes = random_sample_crop(height, width, boxes) + + + # RandomMirror + if random.randint(0, 2): + boxes[:, 0::2] = width - boxes[:, 2::-2] + + + # Resize + boxes[:, [0, 2]] *= (max_image_size / width) + boxes[:, [1, 3]] *= (max_image_size / height) + width = height = max_image_size + + + # ToPercentCoords + boxes[:, [0, 2]] /= width + boxes[:, [1, 3]] /= height + + if augment_idx % 50000 == 0: + print('Current idx: %d' % augment_idx) + + augment_idx += 1 + + return boxes + + + + +sample_options = ( + # using entire original input image + None, + # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9 + (0.1, None), + (0.3, None), + (0.7, None), + (0.9, None), + # randomly sample a patch + (None, None), +) + +def intersect(box_a, box_b): + max_xy = np.minimum(box_a[:, 2:], box_b[2:]) + min_xy = np.maximum(box_a[:, :2], box_b[:2]) + inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf) + return inter[:, 0] * inter[:, 1] + + +def jaccard_numpy(box_a, box_b): + """Compute the jaccard overlap of two sets of boxes. The jaccard overlap + is simply the intersection over union of two boxes. + E.g.: + A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) + Args: + box_a: Multiple bounding boxes, Shape: [num_boxes,4] + box_b: Single bounding box, Shape: [4] + Return: + jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]] + """ + inter = intersect(box_a, box_b) + area_a = ((box_a[:, 2]-box_a[:, 0]) * + (box_a[:, 3]-box_a[:, 1])) # [A,B] + area_b = ((box_b[2]-box_b[0]) * + (box_b[3]-box_b[1])) # [A,B] + union = area_a + area_b - inter + return inter / union # [A,B] + + +def random_sample_crop(height, width, boxes=None): + global sample_options + + while True: + # randomly choose a mode + mode = random.choice(sample_options) + if mode is None: + return height, width, boxes + + min_iou, max_iou = mode + if min_iou is None: + min_iou = float('-inf') + if max_iou is None: + max_iou = float('inf') + + for _ in range(50): + w = random.uniform(0.3 * width, width) + h = random.uniform(0.3 * height, height) + + if h / w < 0.5 or h / w > 2: + continue + + left = random.uniform(0, width - w) + top = random.uniform(0, height - h) + + rect = np.array([int(left), int(top), int(left+w), int(top+h)]) + overlap = jaccard_numpy(boxes, rect) + if overlap.min() < min_iou and max_iou < overlap.max(): + continue + + centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 + + m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1]) + m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1]) + mask = m1 * m2 + + if not mask.any(): + continue + + current_boxes = boxes[mask, :].copy() + current_boxes[:, :2] = np.maximum(current_boxes[:, :2], rect[:2]) + current_boxes[:, :2] -= rect[:2] + current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], rect[2:]) + current_boxes[:, 2:] -= rect[:2] + + return h, w, current_boxes + + +if __name__ == '__main__': + + with open(box_file, 'rb') as f: + bboxes = pickle.load(f) + + augment_boxes(bboxes) diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/bbox_recall.py b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/bbox_recall.py index b9d2565ff51ce110e73f842cd4308e427f23f287..3a2745760cbea5fae74858f27d90ce0db75106cb 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/bbox_recall.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/bbox_recall.py @@ -1,195 +1,195 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -This script compiles all the bounding boxes in the training data and -clusters them for each convout resolution on which they're used. - -Run this script from the Yolact root directory. -""" - -import os.path as osp -import json, pickle -import sys -from math import sqrt -from itertools import product -import torch -import random - -import numpy as np - -dump_file = 'weights/bboxes.pkl' -aug_file = 'weights/bboxes_aug.pkl' - -use_augmented_boxes = True - - -def intersect(box_a, box_b): - """ We resize both tensors to [A,B,2] without new malloc: - [A,2] -> [A,1,2] -> [A,B,2] - [B,2] -> [1,B,2] -> [A,B,2] - Then we compute the area of intersect between box_a and box_b. - Args: - box_a: (tensor) bounding boxes, Shape: [A,4]. - box_b: (tensor) bounding boxes, Shape: [B,4]. - Return: - (tensor) intersection area, Shape: [A,B]. - """ - A = box_a.size(0) - B = box_b.size(0) - max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), - box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) - min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), - box_b[:, :2].unsqueeze(0).expand(A, B, 2)) - inter = torch.clamp((max_xy - min_xy), min=0) - return inter[:, :, 0] * inter[:, :, 1] - - -def jaccard(box_a, box_b, iscrowd=False): - """Compute the jaccard overlap of two sets of boxes. The jaccard overlap - is simply the intersection over union of two boxes. Here we operate on - ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b. - E.g.: - A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) - Args: - box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] - box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] - Return: - jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] - """ - inter = intersect(box_a, box_b) - area_a = ((box_a[:, 2]-box_a[:, 0]) * - (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] - area_b = ((box_b[:, 2]-box_b[:, 0]) * - (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] - union = area_a + area_b - inter - - if iscrowd: - return inter / area_a - else: - return inter / union # [A,B] - -# Also convert to point form -def to_relative(bboxes): - return np.concatenate((bboxes[:, 2:4] / bboxes[:, :2], (bboxes[:, 2:4] + bboxes[:, 4:]) / bboxes[:, :2]), axis=1) - - -def make_priors(conv_size, scales, aspect_ratios): - prior_data = [] - conv_h = conv_size[0] - conv_w = conv_size[1] - - # Iteration order is important (it has to sync up with the convout) - for j, i in product(range(conv_h), range(conv_w)): - x = (i + 0.5) / conv_w - y = (j + 0.5) / conv_h - - for scale, ars in zip(scales, aspect_ratios): - for ar in ars: - w = scale * ar / conv_w - h = scale / ar / conv_h - - # Point form - prior_data += [x - w/2, y - h/2, x + w/2, y + h/2] - - return np.array(prior_data).reshape(-1, 4) - -# fixed_ssd_config -# scales = [[3.5, 4.95], [3.6, 4.90], [3.3, 4.02], [2.7, 3.10], [2.1, 2.37], [2.1, 2.37], [1.8, 1.92]] -# aspect_ratios = [ [[1, sqrt(2), 1/sqrt(2), sqrt(3), 1/sqrt(3)][:n], [1]] for n in [3, 5, 5, 5, 3, 3, 3] ] -# conv_sizes = [(35, 35), (18, 18), (9, 9), (5, 5), (3, 3), (2, 2)] - -scales = [[1.68, 2.91], - [2.95, 2.22, 0.84], - [2.23, 2.17, 3.12], - [0.76, 1.94, 2.72], - [2.10, 2.65], - [1.80, 1.92]] -aspect_ratios = [[[0.72, 0.96], [0.68, 1.17]], - [[1.28, 0.66], [0.63, 1.23], [0.89, 1.40]], - [[2.05, 1.24], [0.57, 0.83], [0.61, 1.15]], - [[1.00, 2.21], [0.47, 1.60], [1.44, 0.79]], - [[1.00, 1.41, 0.71, 1.73, 0.58], [1.08]], - [[1.00, 1.41, 0.71, 1.73, 0.58], [1.00]]] -conv_sizes = [(35, 35), (18, 18), (9, 9), (5, 5), (3, 3), (2, 2)] - -# yrm33_config -# scales = [ [5.3] ] * 5 -# aspect_ratios = [ [[1, 1/sqrt(2), sqrt(2)]] ]*5 -# conv_sizes = [(136, 136), (67, 67), (33, 33), (16, 16), (8, 8)] - - -SMALL = 0 -MEDIUM = 1 -LARGE = 2 - -if __name__ == '__main__': - - with open(dump_file, 'rb') as f: - bboxes = pickle.load(f) - - sizes = [] - smalls = [] - for i in range(len(bboxes)): - area = bboxes[i][4] * bboxes[i][5] - if area < 32 ** 2: - sizes.append(SMALL) - smalls.append(area) - elif area < 96 ** 2: - sizes.append(MEDIUM) - else: - sizes.append(LARGE) - - # Each box is in the form [im_w, im_h, pos_x, pos_y, size_x, size_y] - - if use_augmented_boxes: - with open(aug_file, 'rb') as f: - bboxes_rel = pickle.load(f) - else: - bboxes_rel = to_relative(np.array(bboxes)) - - - with torch.no_grad(): - sizes = torch.Tensor(sizes) - - anchors = [make_priors(cs, s, ar) for cs, s, ar in zip(conv_sizes, scales, aspect_ratios)] - anchors = np.concatenate(anchors, axis=0) - anchors = torch.Tensor(anchors).cuda() - - bboxes_rel = torch.Tensor(bboxes_rel).cuda() - perGTAnchorMax = torch.zeros(bboxes_rel.shape[0]).cuda() - - chunk_size = 1000 - for i in range((bboxes_rel.size(0) // chunk_size) + 1): - start = i * chunk_size - end = min((i + 1) * chunk_size, bboxes_rel.size(0)) - - ious = jaccard(bboxes_rel[start:end, :], anchors) - maxes, maxidx = torch.max(ious, dim=1) - - perGTAnchorMax[start:end] = maxes - - - hits = (perGTAnchorMax > 0.5).float() - - print('Total recall: %.2f' % (torch.sum(hits) / hits.size(0) * 100)) - print() - - for i, metric in zip(range(3), ('small', 'medium', 'large')): - _hits = hits[sizes == i] - _size = (1 if _hits.size(0) == 0 else _hits.size(0)) - print(metric + ' recall: %.2f' % ((torch.sum(_hits) / _size) * 100)) - - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +This script compiles all the bounding boxes in the training data and +clusters them for each convout resolution on which they're used. + +Run this script from the Yolact root directory. +""" + +import os.path as osp +import json, pickle +import sys +from math import sqrt +from itertools import product +import torch +import random + +import numpy as np + +dump_file = 'weights/bboxes.pkl' +aug_file = 'weights/bboxes_aug.pkl' + +use_augmented_boxes = True + + +def intersect(box_a, box_b): + """ We resize both tensors to [A,B,2] without new malloc: + [A,2] -> [A,1,2] -> [A,B,2] + [B,2] -> [1,B,2] -> [A,B,2] + Then we compute the area of intersect between box_a and box_b. + Args: + box_a: (tensor) bounding boxes, Shape: [A,4]. + box_b: (tensor) bounding boxes, Shape: [B,4]. + Return: + (tensor) intersection area, Shape: [A,B]. + """ + A = box_a.size(0) + B = box_b.size(0) + max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), + box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) + min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), + box_b[:, :2].unsqueeze(0).expand(A, B, 2)) + inter = torch.clamp((max_xy - min_xy), min=0) + return inter[:, :, 0] * inter[:, :, 1] + + +def jaccard(box_a, box_b, iscrowd=False): + """Compute the jaccard overlap of two sets of boxes. The jaccard overlap + is simply the intersection over union of two boxes. Here we operate on + ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b. + E.g.: + A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) + Args: + box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] + box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] + Return: + jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] + """ + inter = intersect(box_a, box_b) + area_a = ((box_a[:, 2]-box_a[:, 0]) * + (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] + area_b = ((box_b[:, 2]-box_b[:, 0]) * + (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] + union = area_a + area_b - inter + + if iscrowd: + return inter / area_a + else: + return inter / union # [A,B] + +# Also convert to point form +def to_relative(bboxes): + return np.concatenate((bboxes[:, 2:4] / bboxes[:, :2], (bboxes[:, 2:4] + bboxes[:, 4:]) / bboxes[:, :2]), axis=1) + + +def make_priors(conv_size, scales, aspect_ratios): + prior_data = [] + conv_h = conv_size[0] + conv_w = conv_size[1] + + # Iteration order is important (it has to sync up with the convout) + for j, i in product(range(conv_h), range(conv_w)): + x = (i + 0.5) / conv_w + y = (j + 0.5) / conv_h + + for scale, ars in zip(scales, aspect_ratios): + for ar in ars: + w = scale * ar / conv_w + h = scale / ar / conv_h + + # Point form + prior_data += [x - w/2, y - h/2, x + w/2, y + h/2] + + return np.array(prior_data).reshape(-1, 4) + +# fixed_ssd_config +# scales = [[3.5, 4.95], [3.6, 4.90], [3.3, 4.02], [2.7, 3.10], [2.1, 2.37], [2.1, 2.37], [1.8, 1.92]] +# aspect_ratios = [ [[1, sqrt(2), 1/sqrt(2), sqrt(3), 1/sqrt(3)][:n], [1]] for n in [3, 5, 5, 5, 3, 3, 3] ] +# conv_sizes = [(35, 35), (18, 18), (9, 9), (5, 5), (3, 3), (2, 2)] + +scales = [[1.68, 2.91], + [2.95, 2.22, 0.84], + [2.23, 2.17, 3.12], + [0.76, 1.94, 2.72], + [2.10, 2.65], + [1.80, 1.92]] +aspect_ratios = [[[0.72, 0.96], [0.68, 1.17]], + [[1.28, 0.66], [0.63, 1.23], [0.89, 1.40]], + [[2.05, 1.24], [0.57, 0.83], [0.61, 1.15]], + [[1.00, 2.21], [0.47, 1.60], [1.44, 0.79]], + [[1.00, 1.41, 0.71, 1.73, 0.58], [1.08]], + [[1.00, 1.41, 0.71, 1.73, 0.58], [1.00]]] +conv_sizes = [(35, 35), (18, 18), (9, 9), (5, 5), (3, 3), (2, 2)] + +# yrm33_config +# scales = [ [5.3] ] * 5 +# aspect_ratios = [ [[1, 1/sqrt(2), sqrt(2)]] ]*5 +# conv_sizes = [(136, 136), (67, 67), (33, 33), (16, 16), (8, 8)] + + +SMALL = 0 +MEDIUM = 1 +LARGE = 2 + +if __name__ == '__main__': + + with open(dump_file, 'rb') as f: + bboxes = pickle.load(f) + + sizes = [] + smalls = [] + for i in range(len(bboxes)): + area = bboxes[i][4] * bboxes[i][5] + if area < 32 ** 2: + sizes.append(SMALL) + smalls.append(area) + elif area < 96 ** 2: + sizes.append(MEDIUM) + else: + sizes.append(LARGE) + + # Each box is in the form [im_w, im_h, pos_x, pos_y, size_x, size_y] + + if use_augmented_boxes: + with open(aug_file, 'rb') as f: + bboxes_rel = pickle.load(f) + else: + bboxes_rel = to_relative(np.array(bboxes)) + + + with torch.no_grad(): + sizes = torch.Tensor(sizes) + + anchors = [make_priors(cs, s, ar) for cs, s, ar in zip(conv_sizes, scales, aspect_ratios)] + anchors = np.concatenate(anchors, axis=0) + anchors = torch.Tensor(anchors).cuda() + + bboxes_rel = torch.Tensor(bboxes_rel).cuda() + perGTAnchorMax = torch.zeros(bboxes_rel.shape[0]).cuda() + + chunk_size = 1000 + for i in range((bboxes_rel.size(0) // chunk_size) + 1): + start = i * chunk_size + end = min((i + 1) * chunk_size, bboxes_rel.size(0)) + + ious = jaccard(bboxes_rel[start:end, :], anchors) + maxes, maxidx = torch.max(ious, dim=1) + + perGTAnchorMax[start:end] = maxes + + + hits = (perGTAnchorMax > 0.5).float() + + print('Total recall: %.2f' % (torch.sum(hits) / hits.size(0) * 100)) + print() + + for i, metric in zip(range(3), ('small', 'medium', 'large')): + _hits = hits[sizes == i] + _size = (1 if _hits.size(0) == 0 else _hits.size(0)) + print(metric + ' recall: %.2f' % ((torch.sum(_hits) / _size) * 100)) + + + diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/cluster_bbox_sizes.py b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/cluster_bbox_sizes.py index 95ba75de62454930d7e0c7aa7c40d03481a8ceeb..e1ad776af225bdff87fc39443b9509f3c3933dc0 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/cluster_bbox_sizes.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/cluster_bbox_sizes.py @@ -1,83 +1,83 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -This script compiles all the bounding boxes in the training data and -clusters them for each convout resolution on which they're used. - -Run this script from the Yolact root directory. -""" - -import os.path as osp -import json, pickle -import sys - -import numpy as np -import sklearn.cluster as cluster - -dump_file = 'weights/bboxes.pkl' -max_size = 550 - -num_scale_clusters = 5 -num_aspect_ratio_clusters = 3 - -def to_relative(bboxes): - return bboxes[:, 2:4] / bboxes[:, :2] - -def process(bboxes): - return to_relative(bboxes) * max_size - -if __name__ == '__main__': - - with open(dump_file, 'rb') as f: - bboxes = pickle.load(f) - - bboxes = np.array(bboxes) - bboxes = process(bboxes) - bboxes = bboxes[(bboxes[:, 0] > 1) * (bboxes[:, 1] > 1)] - - scale = np.sqrt(bboxes[:, 0] * bboxes[:, 1]).reshape(-1, 1) - - clusterer = cluster.KMeans(num_scale_clusters, random_state=99, n_jobs=4) - assignments = clusterer.fit_predict(scale) - counts = np.bincount(assignments) - - cluster_centers = clusterer.cluster_centers_ - - center_indices = list(range(num_scale_clusters)) - center_indices.sort(key=lambda x: cluster_centers[x, 0]) - - for idx in center_indices: - center = cluster_centers[idx, 0] - boxes_for_center = bboxes[assignments == idx] - aspect_ratios = (boxes_for_center[:,0] / boxes_for_center[:,1]).reshape(-1, 1) - - c = cluster.KMeans(num_aspect_ratio_clusters, random_state=idx, n_jobs=4) - ca = c.fit_predict(aspect_ratios) - cc = np.bincount(ca) - - c = list(c.cluster_centers_.reshape(-1)) - cidx = list(range(num_aspect_ratio_clusters)) - cidx.sort(key=lambda x: -cc[x]) - - # import code - # code.interact(local=locals()) - - print('%.3f (%d) aspect ratios:' % (center, counts[idx])) - for idx in cidx: - print('\t%.2f (%d)' % (c[idx], cc[idx])) - print() - # exit() - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +This script compiles all the bounding boxes in the training data and +clusters them for each convout resolution on which they're used. + +Run this script from the Yolact root directory. +""" + +import os.path as osp +import json, pickle +import sys + +import numpy as np +import sklearn.cluster as cluster + +dump_file = 'weights/bboxes.pkl' +max_size = 550 + +num_scale_clusters = 5 +num_aspect_ratio_clusters = 3 + +def to_relative(bboxes): + return bboxes[:, 2:4] / bboxes[:, :2] + +def process(bboxes): + return to_relative(bboxes) * max_size + +if __name__ == '__main__': + + with open(dump_file, 'rb') as f: + bboxes = pickle.load(f) + + bboxes = np.array(bboxes) + bboxes = process(bboxes) + bboxes = bboxes[(bboxes[:, 0] > 1) * (bboxes[:, 1] > 1)] + + scale = np.sqrt(bboxes[:, 0] * bboxes[:, 1]).reshape(-1, 1) + + clusterer = cluster.KMeans(num_scale_clusters, random_state=99, n_jobs=4) + assignments = clusterer.fit_predict(scale) + counts = np.bincount(assignments) + + cluster_centers = clusterer.cluster_centers_ + + center_indices = list(range(num_scale_clusters)) + center_indices.sort(key=lambda x: cluster_centers[x, 0]) + + for idx in center_indices: + center = cluster_centers[idx, 0] + boxes_for_center = bboxes[assignments == idx] + aspect_ratios = (boxes_for_center[:,0] / boxes_for_center[:,1]).reshape(-1, 1) + + c = cluster.KMeans(num_aspect_ratio_clusters, random_state=idx, n_jobs=4) + ca = c.fit_predict(aspect_ratios) + cc = np.bincount(ca) + + c = list(c.cluster_centers_.reshape(-1)) + cidx = list(range(num_aspect_ratio_clusters)) + cidx.sort(key=lambda x: -cc[x]) + + # import code + # code.interact(local=locals()) + + print('%.3f (%d) aspect ratios:' % (center, counts[idx])) + for idx in cidx: + print('\t%.2f (%d)' % (c[idx], cc[idx])) + print() + # exit() + + diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/compute_masks.py b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/compute_masks.py index b48358395892af9dd0e813280b18512ff8653288..4153ccd48e90922b1523f3198ccc897379b8de9e 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/compute_masks.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/compute_masks.py @@ -1,108 +1,108 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import numpy as np -import matplotlib.pyplot as plt -import cv2 -import torch -import torch.nn.functional as F - -COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128), - (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128)) - -def mask_iou(mask1, mask2): - """ - Inputs inputs are matricies of size _ x N. Output is size _1 x _2. - Note: if iscrowd is True, then mask2 should be the crowd. - """ - intersection = torch.matmul(mask1, mask2.t()) - area1 = torch.sum(mask1, dim=1).view(1, -1) - area2 = torch.sum(mask2, dim=1).view(1, -1) - union = (area1.t() + area2) - intersection - - return intersection / union - -def paint_mask(img_numpy, mask, color): - h, w, _ = img_numpy.shape - img_numpy = img_numpy.copy() - - mask = np.tile(mask.reshape(h, w, 1), (1, 1, 3)) - color_np = np.array(color[:3]).reshape(1, 1, 3) - color_np = np.tile(color_np, (h, w, 1)) - mask_color = mask * color_np - - mask_alpha = 0.3 - - # Blend image and mask - image_crop = img_numpy * mask - img_numpy *= (1-mask) - img_numpy += image_crop * (1-mask_alpha) + mask_color * mask_alpha - - return img_numpy - -# Inverse sigmoid -def logit(x): - return np.log(x / (1-x + 0.0001) + 0.0001) - -def sigmoid(x): - return 1 / (1 + np.exp(-x)) - -img_fmt = '../data/coco/images/%012d.jpg' -with open('info.txt', 'r') as f: - img_id = int(f.read()) - -img = plt.imread(img_fmt % img_id).astype(np.float32) -h, w, _ = img.shape - -gt_masks = np.load('gt.npy').astype(np.float32).transpose(1, 2, 0) -proto_masks = np.load('proto.npy').astype(np.float32) - -proto_masks = torch.Tensor(proto_masks).permute(2, 0, 1).contiguous().unsqueeze(0) -proto_masks = F.interpolate(proto_masks, (h, w), mode='bilinear', align_corners=False).squeeze(0) -proto_masks = proto_masks.permute(1, 2, 0).numpy() - -# # A x = b -ls_A = proto_masks.reshape(-1, proto_masks.shape[-1]) -ls_b = gt_masks.reshape(-1, gt_masks.shape[-1]) - -# x is size [256, num_gt] -x = np.linalg.lstsq(ls_A, ls_b, rcond=None)[0] - -approximated_masks = (np.matmul(proto_masks, x) > 0.5).astype(np.float32) - -num_gt = approximated_masks.shape[2] -ious = mask_iou(torch.Tensor(approximated_masks.reshape(-1, num_gt).T), - torch.Tensor(gt_masks.reshape(-1, num_gt).T)) - -ious = [int(ious[i, i].item() * 100) for i in range(num_gt)] -ious.sort(key=lambda x: -x) - -print(ious) - -gt_img = img.copy() - -for i in range(num_gt): - gt_img = paint_mask(gt_img, gt_masks[:, :, i], COLORS[i % len(COLORS)]) - -plt.imshow(gt_img / 255) -plt.title('GT') -plt.show() - -for i in range(num_gt): - img = paint_mask(img, approximated_masks[:, :, i], COLORS[i % len(COLORS)]) - -plt.imshow(img / 255) -plt.title('Approximated') -plt.show() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import matplotlib.pyplot as plt +import cv2 +import torch +import torch.nn.functional as F + +COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128), + (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128)) + +def mask_iou(mask1, mask2): + """ + Inputs inputs are matricies of size _ x N. Output is size _1 x _2. + Note: if iscrowd is True, then mask2 should be the crowd. + """ + intersection = torch.matmul(mask1, mask2.t()) + area1 = torch.sum(mask1, dim=1).view(1, -1) + area2 = torch.sum(mask2, dim=1).view(1, -1) + union = (area1.t() + area2) - intersection + + return intersection / union + +def paint_mask(img_numpy, mask, color): + h, w, _ = img_numpy.shape + img_numpy = img_numpy.copy() + + mask = np.tile(mask.reshape(h, w, 1), (1, 1, 3)) + color_np = np.array(color[:3]).reshape(1, 1, 3) + color_np = np.tile(color_np, (h, w, 1)) + mask_color = mask * color_np + + mask_alpha = 0.3 + + # Blend image and mask + image_crop = img_numpy * mask + img_numpy *= (1-mask) + img_numpy += image_crop * (1-mask_alpha) + mask_color * mask_alpha + + return img_numpy + +# Inverse sigmoid +def logit(x): + return np.log(x / (1-x + 0.0001) + 0.0001) + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + +img_fmt = '../data/coco/images/%012d.jpg' +with open('info.txt', 'r') as f: + img_id = int(f.read()) + +img = plt.imread(img_fmt % img_id).astype(np.float32) +h, w, _ = img.shape + +gt_masks = np.load('gt.npy').astype(np.float32).transpose(1, 2, 0) +proto_masks = np.load('proto.npy').astype(np.float32) + +proto_masks = torch.Tensor(proto_masks).permute(2, 0, 1).contiguous().unsqueeze(0) +proto_masks = F.interpolate(proto_masks, (h, w), mode='bilinear', align_corners=False).squeeze(0) +proto_masks = proto_masks.permute(1, 2, 0).numpy() + +# # A x = b +ls_A = proto_masks.reshape(-1, proto_masks.shape[-1]) +ls_b = gt_masks.reshape(-1, gt_masks.shape[-1]) + +# x is size [256, num_gt] +x = np.linalg.lstsq(ls_A, ls_b, rcond=None)[0] + +approximated_masks = (np.matmul(proto_masks, x) > 0.5).astype(np.float32) + +num_gt = approximated_masks.shape[2] +ious = mask_iou(torch.Tensor(approximated_masks.reshape(-1, num_gt).T), + torch.Tensor(gt_masks.reshape(-1, num_gt).T)) + +ious = [int(ious[i, i].item() * 100) for i in range(num_gt)] +ious.sort(key=lambda x: -x) + +print(ious) + +gt_img = img.copy() + +for i in range(num_gt): + gt_img = paint_mask(gt_img, gt_masks[:, :, i], COLORS[i % len(COLORS)]) + +plt.imshow(gt_img / 255) +plt.title('GT') +plt.show() + +for i in range(num_gt): + img = paint_mask(img, approximated_masks[:, :, i], COLORS[i % len(COLORS)]) + +plt.imshow(img / 255) +plt.title('Approximated') +plt.show() diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/convert_darknet.py b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/convert_darknet.py index 1c2c40f12522e8297d7a1a2997e89d74cc78a169..d68d59f96a973cfc8893d60b16e1f7ec9bd4ae33 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/convert_darknet.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/convert_darknet.py @@ -1,63 +1,63 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -from backbone import DarkNetBackbone -import h5py -import torch - -f = h5py.File('darknet53.h5', 'r') -m = f['model_weights'] - -yolo_keys = list(m.keys()) -yolo_keys = [x for x in yolo_keys if len(m[x].keys()) > 0] -yolo_keys.sort() - -sd = DarkNetBackbone().state_dict() - -sd_keys = list(sd.keys()) -sd_keys.sort() - -# Note this won't work if there are 10 elements in some list but whatever that doesn't happen -layer_keys = list(set(['.'.join(x.split('.')[:-2]) for x in sd_keys])) -layer_keys.sort() - -# print([x for x in sd_keys if x.startswith(layer_keys[0])]) - -mapping = { - '.0.weight' : ('conv2d_%d', 'kernel:0'), - '.1.bias' : ('batch_normalization_%d', 'beta:0'), - '.1.weight' : ('batch_normalization_%d', 'gamma:0'), - '.1.running_var' : ('batch_normalization_%d', 'moving_variance:0'), - '.1.running_mean': ('batch_normalization_%d', 'moving_mean:0'), - '.1.num_batches_tracked': None, -} - -for i, layer_key in zip(range(1, len(layer_keys) + 1), layer_keys): - # This is pretty inefficient but I don't care - for weight_key in [x for x in sd_keys if x.startswith(layer_key)]: - diff = weight_key[len(layer_key):] - - if mapping[diff] is not None: - yolo_key = mapping[diff][0] % i - sub_key = mapping[diff][1] - - yolo_weight = torch.Tensor(m[yolo_key][yolo_key][sub_key].value) - if (len(yolo_weight.size()) == 4): - yolo_weight = yolo_weight.permute(3, 2, 0, 1).contiguous() - - sd[weight_key] = yolo_weight - -torch.save(sd, 'weights/darknet53.pth') - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from backbone import DarkNetBackbone +import h5py +import torch + +f = h5py.File('darknet53.h5', 'r') +m = f['model_weights'] + +yolo_keys = list(m.keys()) +yolo_keys = [x for x in yolo_keys if len(m[x].keys()) > 0] +yolo_keys.sort() + +sd = DarkNetBackbone().state_dict() + +sd_keys = list(sd.keys()) +sd_keys.sort() + +# Note this won't work if there are 10 elements in some list but whatever that doesn't happen +layer_keys = list(set(['.'.join(x.split('.')[:-2]) for x in sd_keys])) +layer_keys.sort() + +# print([x for x in sd_keys if x.startswith(layer_keys[0])]) + +mapping = { + '.0.weight' : ('conv2d_%d', 'kernel:0'), + '.1.bias' : ('batch_normalization_%d', 'beta:0'), + '.1.weight' : ('batch_normalization_%d', 'gamma:0'), + '.1.running_var' : ('batch_normalization_%d', 'moving_variance:0'), + '.1.running_mean': ('batch_normalization_%d', 'moving_mean:0'), + '.1.num_batches_tracked': None, +} + +for i, layer_key in zip(range(1, len(layer_keys) + 1), layer_keys): + # This is pretty inefficient but I don't care + for weight_key in [x for x in sd_keys if x.startswith(layer_key)]: + diff = weight_key[len(layer_key):] + + if mapping[diff] is not None: + yolo_key = mapping[diff][0] % i + sub_key = mapping[diff][1] + + yolo_weight = torch.Tensor(m[yolo_key][yolo_key][sub_key].value) + if (len(yolo_weight.size()) == 4): + yolo_weight = yolo_weight.permute(3, 2, 0, 1).contiguous() + + sd[weight_key] = yolo_weight + +torch.save(sd, 'weights/darknet53.pth') + diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/convert_sbd.py b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/convert_sbd.py index a1c940061a328d9bbebf506a99cbcd08acbdbbfc..61f049a3662cbb9beb1874019ff8384194202361 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/convert_sbd.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/convert_sbd.py @@ -1,88 +1,88 @@ -import scipy.io, scipy.ndimage -import os.path, json -import pycocotools.mask -import numpy as np - -def mask2bbox(mask): - rows = np.any(mask, axis=1) - cols = np.any(mask, axis=0) - rmin, rmax = np.where(rows)[0][[0, -1]] - cmin, cmax = np.where(cols)[0][[0, -1]] - - return cmin, rmin, cmax - cmin, rmax - rmin - - - -inst_path = './inst/' -img_path = './img/' -img_name_fmt = '%s.jpg' -ann_name_fmt = '%s.mat' - -image_id = 1 -ann_id = 1 - -types = ['train', 'val'] - -for t in types: - with open('%s.txt' % t, 'r') as f: - names = f.read().strip().split('\n') - - images = [] - annotations = [] - - for name in names: - img_name = img_name_fmt % name - - ann_path = os.path.join(inst_path, ann_name_fmt % name) - ann = scipy.io.loadmat(ann_path)['GTinst'][0][0] - - classes = [int(x[0]) for x in ann[2]] - seg = ann[0] - - for idx in range(len(classes)): - mask = (seg == (idx + 1)).astype(np.float) - - rle = pycocotools.mask.encode(np.asfortranarray(mask.astype(np.uint8))) - rle['counts'] = rle['counts'].decode('ascii') - - annotations.append({ - 'id': ann_id, - 'image_id': image_id, - 'category_id': classes[idx], - 'segmentation': rle, - 'area': float(mask.sum()), - 'bbox': [int(x) for x in mask2bbox(mask)], - 'iscrowd': 0 - }) - - ann_id += 1 - - img_name = img_name_fmt % name - img = scipy.ndimage.imread(os.path.join(img_path, img_name)) - - images.append({ - 'id': image_id, - 'width': img.shape[1], - 'height': img.shape[0], - 'file_name': img_name - }) - - image_id += 1 - - info = { - 'year': 2012, - 'version': 1, - 'description': 'Pascal SBD', - } - - categories = [{'id': x+1} for x in range(20)] - - with open('pascal_sbd_%s.json' % t, 'w') as f: - json.dump({ - 'info': info, - 'images': images, - 'annotations': annotations, - 'licenses': {}, - 'categories': categories - }, f) - +import scipy.io, scipy.ndimage +import os.path, json +import pycocotools.mask +import numpy as np + +def mask2bbox(mask): + rows = np.any(mask, axis=1) + cols = np.any(mask, axis=0) + rmin, rmax = np.where(rows)[0][[0, -1]] + cmin, cmax = np.where(cols)[0][[0, -1]] + + return cmin, rmin, cmax - cmin, rmax - rmin + + + +inst_path = './inst/' +img_path = './img/' +img_name_fmt = '%s.jpg' +ann_name_fmt = '%s.mat' + +image_id = 1 +ann_id = 1 + +types = ['train', 'val'] + +for t in types: + with open('%s.txt' % t, 'r') as f: + names = f.read().strip().split('\n') + + images = [] + annotations = [] + + for name in names: + img_name = img_name_fmt % name + + ann_path = os.path.join(inst_path, ann_name_fmt % name) + ann = scipy.io.loadmat(ann_path)['GTinst'][0][0] + + classes = [int(x[0]) for x in ann[2]] + seg = ann[0] + + for idx in range(len(classes)): + mask = (seg == (idx + 1)).astype(np.float) + + rle = pycocotools.mask.encode(np.asfortranarray(mask.astype(np.uint8))) + rle['counts'] = rle['counts'].decode('ascii') + + annotations.append({ + 'id': ann_id, + 'image_id': image_id, + 'category_id': classes[idx], + 'segmentation': rle, + 'area': float(mask.sum()), + 'bbox': [int(x) for x in mask2bbox(mask)], + 'iscrowd': 0 + }) + + ann_id += 1 + + img_name = img_name_fmt % name + img = scipy.ndimage.imread(os.path.join(img_path, img_name)) + + images.append({ + 'id': image_id, + 'width': img.shape[1], + 'height': img.shape[0], + 'file_name': img_name + }) + + image_id += 1 + + info = { + 'year': 2012, + 'version': 1, + 'description': 'Pascal SBD', + } + + categories = [{'id': x+1} for x in range(20)] + + with open('pascal_sbd_%s.json' % t, 'w') as f: + json.dump({ + 'info': info, + 'images': images, + 'annotations': annotations, + 'licenses': {}, + 'categories': categories + }, f) + diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/eval.sh b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/eval.sh index c038b910949ac851b17518894b334f4882a88be4..fbd5a429dd3d69c3b35a5f6331e590aa9081d1f3 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/eval.sh +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/eval.sh @@ -1,14 +1,14 @@ -#!/bin/bash -#SBATCH -p GPU-small -#SBATCH -t 2:00:00 -#SBATCH --gres=gpu:p100:1 -#SBATCH --no-requeue - -# Usage: ./eval.sh weights extra_args - -module load python/3.6.4_gcc5_np1.14.5 -module load cuda/9.0 - -cd $SCRATCH/yolact - -python3 eval.py --trained_model=$1 --no_bar $2 > logs/eval/$(basename -- $1).log 2>&1 +#!/bin/bash +#SBATCH -p GPU-small +#SBATCH -t 2:00:00 +#SBATCH --gres=gpu:p100:1 +#SBATCH --no-requeue + +# Usage: ./eval.sh weights extra_args + +module load python/3.6.4_gcc5_np1.14.5 +module load cuda/9.0 + +cd $SCRATCH/yolact + +python3 eval.py --trained_model=$1 --no_bar $2 > logs/eval/$(basename -- $1).log 2>&1 diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/make_grid.py b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/make_grid.py index fd24ca8120c6bdc0a5f1ea9c1b68e700d9c1804a..4039e106c3b1bf191ca90f9ef5e9ee29c7b72672 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/make_grid.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/make_grid.py @@ -1,218 +1,218 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import numpy as np -import math, random - -import matplotlib.pyplot as plt -from matplotlib.widgets import Slider, Button - - -fig, ax = plt.subplots() -plt.subplots_adjust(bottom=0.24) -im_handle = None - -save_path = 'grid.npy' - -center_x, center_y = (0.5, 0.5) -grid_w, grid_h = (35, 35) -spacing = 0 -scale = 4 -angle = 0 -grid = None - -all_grids = [] -unique = False - -# A hack -disable_render = False - -def render(): - if disable_render: - return - - x = np.tile(np.array(list(range(grid_w)), dtype=np.float).reshape(1, grid_w), [grid_h, 1]) - grid_w * center_x - y = np.tile(np.array(list(range(grid_h)), dtype=np.float).reshape(grid_h, 1), [1, grid_w]) - grid_h * center_y - - x /= scale - y /= scale - - a1 = angle + math.pi / 3 - a2 = -angle + math.pi / 3 - a3 = angle - - z1 = x * math.sin(a1) + y * math.cos(a1) - z2 = x * math.sin(a2) - y * math.cos(a2) - z3 = x * math.sin(a3) + y * math.cos(a3) - - s1 = np.square(np.sin(z1)) - s2 = np.square(np.sin(z2)) - s3 = np.square(np.sin(z3)) - - line_1 = np.exp(s1 * spacing) * s1 - line_2 = np.exp(s2 * spacing) * s2 - line_3 = np.exp(s3 * spacing) * s3 - - global grid - grid = np.clip(1 - (line_1 + line_2 + line_3) / 3, 0, 1) - - global im_handle - if im_handle is None: - im_handle = plt.imshow(grid) - else: - im_handle.set_data(grid) - fig.canvas.draw_idle() - -def update_scale(val): - global scale - scale = val - - render() - -def update_angle(val): - global angle - angle = val - - render() - -def update_centerx(val): - global center_x - center_x = val - - render() - -def update_centery(val): - global center_y - center_y = val - - render() - -def update_spacing(val): - global spacing - spacing = val - - render() - -def randomize(val): - global center_x, center_y, spacing, scale, angle, disable_render - - center_x, center_y = (random.uniform(0, 1), random.uniform(0, 1)) - spacing = random.uniform(-0.2, 2) - scale = 4 * math.exp(random.uniform(-1, 1)) - angle = random.uniform(-math.pi, math.pi) - - disable_render = True - - scale_slider.set_val(scale) - angle_slider.set_val(angle) - centx_slider.set_val(center_x) - centy_slider.set_val(center_y) - spaci_slider.set_val(spacing) - - disable_render = False - - render() - -def add(val): - all_grids.append(grid) - - global unique - if not unique: - unique = test_uniqueness(np.stack(all_grids)) - - export_len_text.set_text('Num Grids: ' + str(len(all_grids))) - fig.canvas.draw_idle() - -def add_randomize(val): - add(val) - randomize(val) - -def export(val): - np.save(save_path, np.stack(all_grids)) - print('Saved %d grids to "%s"' % (len(all_grids), save_path)) - - global unique - unique = False - all_grids.clear() - - export_len_text.set_text('Num Grids: ' + str(len(all_grids))) - fig.canvas.draw_idle() - -def test_uniqueness(grids): - # Grids shape [ngrids, h, w] - grids = grids.reshape((-1, grid_h, grid_w)) - - for y in range(grid_h): - for x in range(grid_h): - pixel_features = grids[:, y, x] - - # l1 distance for this pixel with every other - l1_dist = np.sum(np.abs(grids - np.tile(pixel_features, grid_h*grid_w).reshape((-1, grid_h, grid_w))), axis=0) - - # Equal if l1 distance is really small. Note that this will include this pixel - num_equal = np.sum((l1_dist < 0.0001).astype(np.int32)) - - if num_equal > 1: - print('Pixel at (%d, %d) has %d other pixel%s with the same representation.' % (x, y, num_equal-1, '' if num_equal==2 else 's')) - return False - - print('Each pixel has a distinct representation.') - return True - - - -render() - -axis = plt.axes([0.22, 0.19, 0.59, 0.03], facecolor='lightgoldenrodyellow') -scale_slider = Slider(axis, 'Scale', 0.1, 20, valinit=scale, valstep=0.1) -scale_slider.on_changed(update_scale) - -axis = plt.axes([0.22, 0.15, 0.59, 0.03], facecolor='lightgoldenrodyellow') -angle_slider = Slider(axis, 'Angle', -math.pi, math.pi, valinit=angle, valstep=0.1) -angle_slider.on_changed(update_angle) - -axis = plt.axes([0.22, 0.11, 0.59, 0.03], facecolor='lightgoldenrodyellow') -centx_slider = Slider(axis, 'Center X', 0, 1, valinit=center_x, valstep=0.05) -centx_slider.on_changed(update_centerx) - -axis = plt.axes([0.22, 0.07, 0.59, 0.03], facecolor='lightgoldenrodyellow') -centy_slider = Slider(axis, 'Center Y', 0, 1, valinit=center_y, valstep=0.05) -centy_slider.on_changed(update_centery) - -axis = plt.axes([0.22, 0.03, 0.59, 0.03], facecolor='lightgoldenrodyellow') -spaci_slider = Slider(axis, 'Spacing', -1, 2, valinit=spacing, valstep=0.05) -spaci_slider.on_changed(update_spacing) - -axis = plt.axes([0.8, 0.54, 0.15, 0.05], facecolor='lightgoldenrodyellow') -rando_button = Button(axis, 'Randomize') -rando_button.on_clicked(randomize) - -axis = plt.axes([0.8, 0.48, 0.15, 0.05], facecolor='lightgoldenrodyellow') -addgr_button = Button(axis, 'Add') -addgr_button.on_clicked(add) - -# Likely not a good way to do this but whatever -export_len_text = plt.text(0, 3, 'Num Grids: 0') - -axis = plt.axes([0.8, 0.42, 0.15, 0.05], facecolor='lightgoldenrodyellow') -addra_button = Button(axis, 'Add / Rand') -addra_button.on_clicked(add_randomize) - -axis = plt.axes([0.8, 0.36, 0.15, 0.05], facecolor='lightgoldenrodyellow') -saveg_button = Button(axis, 'Save') -saveg_button.on_clicked(export) - - - -plt.show() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import math, random + +import matplotlib.pyplot as plt +from matplotlib.widgets import Slider, Button + + +fig, ax = plt.subplots() +plt.subplots_adjust(bottom=0.24) +im_handle = None + +save_path = 'grid.npy' + +center_x, center_y = (0.5, 0.5) +grid_w, grid_h = (35, 35) +spacing = 0 +scale = 4 +angle = 0 +grid = None + +all_grids = [] +unique = False + +# A hack +disable_render = False + +def render(): + if disable_render: + return + + x = np.tile(np.array(list(range(grid_w)), dtype=np.float).reshape(1, grid_w), [grid_h, 1]) - grid_w * center_x + y = np.tile(np.array(list(range(grid_h)), dtype=np.float).reshape(grid_h, 1), [1, grid_w]) - grid_h * center_y + + x /= scale + y /= scale + + a1 = angle + math.pi / 3 + a2 = -angle + math.pi / 3 + a3 = angle + + z1 = x * math.sin(a1) + y * math.cos(a1) + z2 = x * math.sin(a2) - y * math.cos(a2) + z3 = x * math.sin(a3) + y * math.cos(a3) + + s1 = np.square(np.sin(z1)) + s2 = np.square(np.sin(z2)) + s3 = np.square(np.sin(z3)) + + line_1 = np.exp(s1 * spacing) * s1 + line_2 = np.exp(s2 * spacing) * s2 + line_3 = np.exp(s3 * spacing) * s3 + + global grid + grid = np.clip(1 - (line_1 + line_2 + line_3) / 3, 0, 1) + + global im_handle + if im_handle is None: + im_handle = plt.imshow(grid) + else: + im_handle.set_data(grid) + fig.canvas.draw_idle() + +def update_scale(val): + global scale + scale = val + + render() + +def update_angle(val): + global angle + angle = val + + render() + +def update_centerx(val): + global center_x + center_x = val + + render() + +def update_centery(val): + global center_y + center_y = val + + render() + +def update_spacing(val): + global spacing + spacing = val + + render() + +def randomize(val): + global center_x, center_y, spacing, scale, angle, disable_render + + center_x, center_y = (random.uniform(0, 1), random.uniform(0, 1)) + spacing = random.uniform(-0.2, 2) + scale = 4 * math.exp(random.uniform(-1, 1)) + angle = random.uniform(-math.pi, math.pi) + + disable_render = True + + scale_slider.set_val(scale) + angle_slider.set_val(angle) + centx_slider.set_val(center_x) + centy_slider.set_val(center_y) + spaci_slider.set_val(spacing) + + disable_render = False + + render() + +def add(val): + all_grids.append(grid) + + global unique + if not unique: + unique = test_uniqueness(np.stack(all_grids)) + + export_len_text.set_text('Num Grids: ' + str(len(all_grids))) + fig.canvas.draw_idle() + +def add_randomize(val): + add(val) + randomize(val) + +def export(val): + np.save(save_path, np.stack(all_grids)) + print('Saved %d grids to "%s"' % (len(all_grids), save_path)) + + global unique + unique = False + all_grids.clear() + + export_len_text.set_text('Num Grids: ' + str(len(all_grids))) + fig.canvas.draw_idle() + +def test_uniqueness(grids): + # Grids shape [ngrids, h, w] + grids = grids.reshape((-1, grid_h, grid_w)) + + for y in range(grid_h): + for x in range(grid_h): + pixel_features = grids[:, y, x] + + # l1 distance for this pixel with every other + l1_dist = np.sum(np.abs(grids - np.tile(pixel_features, grid_h*grid_w).reshape((-1, grid_h, grid_w))), axis=0) + + # Equal if l1 distance is really small. Note that this will include this pixel + num_equal = np.sum((l1_dist < 0.0001).astype(np.int32)) + + if num_equal > 1: + print('Pixel at (%d, %d) has %d other pixel%s with the same representation.' % (x, y, num_equal-1, '' if num_equal==2 else 's')) + return False + + print('Each pixel has a distinct representation.') + return True + + + +render() + +axis = plt.axes([0.22, 0.19, 0.59, 0.03], facecolor='lightgoldenrodyellow') +scale_slider = Slider(axis, 'Scale', 0.1, 20, valinit=scale, valstep=0.1) +scale_slider.on_changed(update_scale) + +axis = plt.axes([0.22, 0.15, 0.59, 0.03], facecolor='lightgoldenrodyellow') +angle_slider = Slider(axis, 'Angle', -math.pi, math.pi, valinit=angle, valstep=0.1) +angle_slider.on_changed(update_angle) + +axis = plt.axes([0.22, 0.11, 0.59, 0.03], facecolor='lightgoldenrodyellow') +centx_slider = Slider(axis, 'Center X', 0, 1, valinit=center_x, valstep=0.05) +centx_slider.on_changed(update_centerx) + +axis = plt.axes([0.22, 0.07, 0.59, 0.03], facecolor='lightgoldenrodyellow') +centy_slider = Slider(axis, 'Center Y', 0, 1, valinit=center_y, valstep=0.05) +centy_slider.on_changed(update_centery) + +axis = plt.axes([0.22, 0.03, 0.59, 0.03], facecolor='lightgoldenrodyellow') +spaci_slider = Slider(axis, 'Spacing', -1, 2, valinit=spacing, valstep=0.05) +spaci_slider.on_changed(update_spacing) + +axis = plt.axes([0.8, 0.54, 0.15, 0.05], facecolor='lightgoldenrodyellow') +rando_button = Button(axis, 'Randomize') +rando_button.on_clicked(randomize) + +axis = plt.axes([0.8, 0.48, 0.15, 0.05], facecolor='lightgoldenrodyellow') +addgr_button = Button(axis, 'Add') +addgr_button.on_clicked(add) + +# Likely not a good way to do this but whatever +export_len_text = plt.text(0, 3, 'Num Grids: 0') + +axis = plt.axes([0.8, 0.42, 0.15, 0.05], facecolor='lightgoldenrodyellow') +addra_button = Button(axis, 'Add / Rand') +addra_button.on_clicked(add_randomize) + +axis = plt.axes([0.8, 0.36, 0.15, 0.05], facecolor='lightgoldenrodyellow') +saveg_button = Button(axis, 'Save') +saveg_button.on_clicked(export) + + + +plt.show() diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/optimize_bboxes.py b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/optimize_bboxes.py index 4e7916ee1a271f6603e055cb850ca97300ed4305..5db9ef67e988e00d321e235fc8aaca06be6f244e 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/optimize_bboxes.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/optimize_bboxes.py @@ -1,218 +1,218 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -Instead of clustering bbox widths and heights, this script -directly optimizes average IoU across the training set given -the specified number of anchor boxes. - -Run this script from the Yolact root directory. -""" - -import pickle -import random -from itertools import product -from math import sqrt - -import numpy as np -import torch -from scipy.optimize import minimize - -dump_file = 'weights/bboxes.pkl' -aug_file = 'weights/bboxes_aug.pkl' - -use_augmented_boxes = True - - -def intersect(box_a, box_b): - """ We resize both tensors to [A,B,2] without new malloc: - [A,2] -> [A,1,2] -> [A,B,2] - [B,2] -> [1,B,2] -> [A,B,2] - Then we compute the area of intersect between box_a and box_b. - Args: - box_a: (tensor) bounding boxes, Shape: [A,4]. - box_b: (tensor) bounding boxes, Shape: [B,4]. - Return: - (tensor) intersection area, Shape: [A,B]. - """ - A = box_a.size(0) - B = box_b.size(0) - max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), - box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) - min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), - box_b[:, :2].unsqueeze(0).expand(A, B, 2)) - inter = torch.clamp((max_xy - min_xy), min=0) - return inter[:, :, 0] * inter[:, :, 1] - - -def jaccard(box_a, box_b, iscrowd=False): - """Compute the jaccard overlap of two sets of boxes. The jaccard overlap - is simply the intersection over union of two boxes. Here we operate on - ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b. - E.g.: - A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) - Args: - box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] - box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] - Return: - jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] - """ - inter = intersect(box_a, box_b) - area_a = ((box_a[:, 2]-box_a[:, 0]) * - (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] - area_b = ((box_b[:, 2]-box_b[:, 0]) * - (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] - union = area_a + area_b - inter - - if iscrowd: - return inter / area_a - else: - return inter / union # [A,B] - -# Also convert to point form -def to_relative(bboxes): - return np.concatenate((bboxes[:, 2:4] / bboxes[:, :2], (bboxes[:, 2:4] + bboxes[:, 4:]) / bboxes[:, :2]), axis=1) - - -def make_priors(conv_size, scales, aspect_ratios): - prior_data = [] - conv_h = conv_size[0] - conv_w = conv_size[1] - - # Iteration order is important (it has to sync up with the convout) - for j, i in product(range(conv_h), range(conv_w)): - x = (i + 0.5) / conv_w - y = (j + 0.5) / conv_h - - for scale, ars in zip(scales, aspect_ratios): - for ar in ars: - w = scale * ar / conv_w - h = scale / ar / conv_h - - # Point form - prior_data += [x - w/2, y - h/2, x + w/2, y + h/2] - return torch.Tensor(prior_data).view(-1, 4).cuda() - - - -scales = [[1.68, 2.91], [2.95, 2.22, 0.84], [2.17, 2.22, 3.22], [0.76, 2.06, 2.81], [5.33, 2.79], [13.69]] -aspect_ratios = [[[0.72, 0.96], [0.68, 1.17]], [[1.30, 0.66], [0.63, 1.23], [0.87, 1.41]], [[1.96, 1.23], [0.58, 0.84], [0.61, 1.15]], [[19.79, 2.21], [0.47, 1.76], [1.38, 0.79]], [[4.79, 17.96], [1.04]], [[14.82]]] -conv_sizes = [(35, 35), (18, 18), (9, 9), (5, 5), (3, 3), (2, 2)] - -optimize_scales = False - -batch_idx = 0 - - -def compute_hits(bboxes, anchors, iou_threshold=0.5): - ious = jaccard(bboxes, anchors) - perGTAnchorMax, _ = torch.max(ious, dim=1) - - return (perGTAnchorMax > iou_threshold) - -def compute_recall(hits, base_hits): - hits = (hits | base_hits).float() - return torch.sum(hits) / hits.size(0) - - -def step(x, x_func, bboxes, base_hits, optim_idx): - # This should set the scale and aspect ratio - x_func(x, scales[optim_idx], aspect_ratios[optim_idx]) - - anchors = make_priors(conv_sizes[optim_idx], scales[optim_idx], aspect_ratios[optim_idx]) - - return -float(compute_recall(compute_hits(bboxes, anchors), base_hits).cpu()) - - -def optimize(full_bboxes, optim_idx, batch_size=5000): - global batch_idx, scales, aspect_ratios, conv_sizes - - start = batch_idx * batch_size - end = min((batch_idx + 1) * batch_size, full_bboxes.size(0)) - - if batch_idx > (full_bboxes.size(0) // batch_size): - batch_idx = 0 - - bboxes = full_bboxes[start:end, :] - - anchor_base = [ - make_priors(conv_sizes[idx], scales[idx], aspect_ratios[idx]) - for idx in range(len(conv_sizes)) if idx != optim_idx] - base_hits = compute_hits(bboxes, torch.cat(anchor_base, dim=0)) - - - def set_x(x, scales, aspect_ratios): - if optimize_scales: - for i in range(len(scales)): - scales[i] = max(x[i], 0) - else: - k = 0 - for i in range(len(aspect_ratios)): - for j in range(len(aspect_ratios[i])): - aspect_ratios[i][j] = x[k] - k += 1 - - - res = minimize(step, x0=scales[optim_idx] if optimize_scales else sum(aspect_ratios[optim_idx], []), method='Powell', - args = (set_x, bboxes, base_hits, optim_idx),) - - -def pretty_str(x:list): - if isinstance(x, list): - return '[' + ', '.join([pretty_str(y) for y in x]) + ']' - elif isinstance(x, np.ndarray): - return pretty_str(list(x)) - else: - return '%.2f' % x - -if __name__ == '__main__': - - if use_augmented_boxes: - with open(aug_file, 'rb') as f: - bboxes = pickle.load(f) - else: - # Load widths and heights from a dump file. Obtain this with - # python3 scripts/save_bboxes.py - with open(dump_file, 'rb') as f: - bboxes = pickle.load(f) - - bboxes = np.array(bboxes) - bboxes = to_relative(bboxes) - - with torch.no_grad(): - bboxes = torch.Tensor(bboxes).cuda() - - def print_out(): - if optimize_scales: - print('Scales: ' + pretty_str(scales)) - else: - print('Aspect Ratios: ' + pretty_str(aspect_ratios)) - - for p in range(10): - print('(Sub Iteration) ', end='') - for i in range(len(conv_sizes)): - print('%d ' % i, end='', flush=True) - optimize(bboxes, i) - print('Done', end='\r') - - print('(Iteration %d) ' % p, end='') - print_out() - print() - - optimize_scales = not optimize_scales - - print('scales = ' + pretty_str(scales)) - print('aspect_ratios = ' + pretty_str(aspect_ratios)) - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Instead of clustering bbox widths and heights, this script +directly optimizes average IoU across the training set given +the specified number of anchor boxes. + +Run this script from the Yolact root directory. +""" + +import pickle +import random +from itertools import product +from math import sqrt + +import numpy as np +import torch +from scipy.optimize import minimize + +dump_file = 'weights/bboxes.pkl' +aug_file = 'weights/bboxes_aug.pkl' + +use_augmented_boxes = True + + +def intersect(box_a, box_b): + """ We resize both tensors to [A,B,2] without new malloc: + [A,2] -> [A,1,2] -> [A,B,2] + [B,2] -> [1,B,2] -> [A,B,2] + Then we compute the area of intersect between box_a and box_b. + Args: + box_a: (tensor) bounding boxes, Shape: [A,4]. + box_b: (tensor) bounding boxes, Shape: [B,4]. + Return: + (tensor) intersection area, Shape: [A,B]. + """ + A = box_a.size(0) + B = box_b.size(0) + max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), + box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) + min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), + box_b[:, :2].unsqueeze(0).expand(A, B, 2)) + inter = torch.clamp((max_xy - min_xy), min=0) + return inter[:, :, 0] * inter[:, :, 1] + + +def jaccard(box_a, box_b, iscrowd=False): + """Compute the jaccard overlap of two sets of boxes. The jaccard overlap + is simply the intersection over union of two boxes. Here we operate on + ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b. + E.g.: + A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) + Args: + box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] + box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] + Return: + jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] + """ + inter = intersect(box_a, box_b) + area_a = ((box_a[:, 2]-box_a[:, 0]) * + (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] + area_b = ((box_b[:, 2]-box_b[:, 0]) * + (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] + union = area_a + area_b - inter + + if iscrowd: + return inter / area_a + else: + return inter / union # [A,B] + +# Also convert to point form +def to_relative(bboxes): + return np.concatenate((bboxes[:, 2:4] / bboxes[:, :2], (bboxes[:, 2:4] + bboxes[:, 4:]) / bboxes[:, :2]), axis=1) + + +def make_priors(conv_size, scales, aspect_ratios): + prior_data = [] + conv_h = conv_size[0] + conv_w = conv_size[1] + + # Iteration order is important (it has to sync up with the convout) + for j, i in product(range(conv_h), range(conv_w)): + x = (i + 0.5) / conv_w + y = (j + 0.5) / conv_h + + for scale, ars in zip(scales, aspect_ratios): + for ar in ars: + w = scale * ar / conv_w + h = scale / ar / conv_h + + # Point form + prior_data += [x - w/2, y - h/2, x + w/2, y + h/2] + return torch.Tensor(prior_data).view(-1, 4).cuda() + + + +scales = [[1.68, 2.91], [2.95, 2.22, 0.84], [2.17, 2.22, 3.22], [0.76, 2.06, 2.81], [5.33, 2.79], [13.69]] +aspect_ratios = [[[0.72, 0.96], [0.68, 1.17]], [[1.30, 0.66], [0.63, 1.23], [0.87, 1.41]], [[1.96, 1.23], [0.58, 0.84], [0.61, 1.15]], [[19.79, 2.21], [0.47, 1.76], [1.38, 0.79]], [[4.79, 17.96], [1.04]], [[14.82]]] +conv_sizes = [(35, 35), (18, 18), (9, 9), (5, 5), (3, 3), (2, 2)] + +optimize_scales = False + +batch_idx = 0 + + +def compute_hits(bboxes, anchors, iou_threshold=0.5): + ious = jaccard(bboxes, anchors) + perGTAnchorMax, _ = torch.max(ious, dim=1) + + return (perGTAnchorMax > iou_threshold) + +def compute_recall(hits, base_hits): + hits = (hits | base_hits).float() + return torch.sum(hits) / hits.size(0) + + +def step(x, x_func, bboxes, base_hits, optim_idx): + # This should set the scale and aspect ratio + x_func(x, scales[optim_idx], aspect_ratios[optim_idx]) + + anchors = make_priors(conv_sizes[optim_idx], scales[optim_idx], aspect_ratios[optim_idx]) + + return -float(compute_recall(compute_hits(bboxes, anchors), base_hits).cpu()) + + +def optimize(full_bboxes, optim_idx, batch_size=5000): + global batch_idx, scales, aspect_ratios, conv_sizes + + start = batch_idx * batch_size + end = min((batch_idx + 1) * batch_size, full_bboxes.size(0)) + + if batch_idx > (full_bboxes.size(0) // batch_size): + batch_idx = 0 + + bboxes = full_bboxes[start:end, :] + + anchor_base = [ + make_priors(conv_sizes[idx], scales[idx], aspect_ratios[idx]) + for idx in range(len(conv_sizes)) if idx != optim_idx] + base_hits = compute_hits(bboxes, torch.cat(anchor_base, dim=0)) + + + def set_x(x, scales, aspect_ratios): + if optimize_scales: + for i in range(len(scales)): + scales[i] = max(x[i], 0) + else: + k = 0 + for i in range(len(aspect_ratios)): + for j in range(len(aspect_ratios[i])): + aspect_ratios[i][j] = x[k] + k += 1 + + + res = minimize(step, x0=scales[optim_idx] if optimize_scales else sum(aspect_ratios[optim_idx], []), method='Powell', + args = (set_x, bboxes, base_hits, optim_idx),) + + +def pretty_str(x:list): + if isinstance(x, list): + return '[' + ', '.join([pretty_str(y) for y in x]) + ']' + elif isinstance(x, np.ndarray): + return pretty_str(list(x)) + else: + return '%.2f' % x + +if __name__ == '__main__': + + if use_augmented_boxes: + with open(aug_file, 'rb') as f: + bboxes = pickle.load(f) + else: + # Load widths and heights from a dump file. Obtain this with + # python3 scripts/save_bboxes.py + with open(dump_file, 'rb') as f: + bboxes = pickle.load(f) + + bboxes = np.array(bboxes) + bboxes = to_relative(bboxes) + + with torch.no_grad(): + bboxes = torch.Tensor(bboxes).cuda() + + def print_out(): + if optimize_scales: + print('Scales: ' + pretty_str(scales)) + else: + print('Aspect Ratios: ' + pretty_str(aspect_ratios)) + + for p in range(10): + print('(Sub Iteration) ', end='') + for i in range(len(conv_sizes)): + print('%d ' % i, end='', flush=True) + optimize(bboxes, i) + print('Done', end='\r') + + print('(Iteration %d) ' % p, end='') + print_out() + print() + + optimize_scales = not optimize_scales + + print('scales = ' + pretty_str(scales)) + print('aspect_ratios = ' + pretty_str(aspect_ratios)) + + diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/parse_eval.py b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/parse_eval.py index c6788ba07f6e77a751a34953489ee1d5b51d5d46..481ae586f89c3896414944b29a2bacfae364b6f0 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/parse_eval.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/parse_eval.py @@ -1,63 +1,63 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import re, sys, os -import matplotlib.pyplot as plt -from matplotlib._color_data import XKCD_COLORS - -with open(sys.argv[1], 'r') as f: - txt = f.read() - -txt, overall = txt.split('overall performance') - -class_names = [] -mAP_overall = [] -mAP_small = [] -mAP_medium = [] -mAP_large = [] - -for class_result in txt.split('evaluate category: ')[1:]: - lines = class_result.split('\n') - class_names.append(lines[0]) - - def grabMAP(string): - return float(string.split('] = ')[1]) * 100 - - mAP_overall.append(grabMAP(lines[ 7])) - mAP_small .append(grabMAP(lines[10])) - mAP_medium .append(grabMAP(lines[11])) - mAP_large .append(grabMAP(lines[12])) - -mAP_map = { - 'small': mAP_small, - 'medium': mAP_medium, - 'large': mAP_large, -} - -if len(sys.argv) > 2: - bars = plt.bar(class_names, mAP_map[sys.argv[2]]) - plt.title(sys.argv[2] + ' mAP per class') -else: - bars = plt.bar(class_names, mAP_overall) - plt.title('overall mAP per class') - -colors = list(XKCD_COLORS.values()) - -for idx, bar in enumerate(bars): - # Mmm pseudorandom colors - char_sum = sum([ord(char) for char in class_names[idx]]) - bar.set_color(colors[char_sum % len(colors)]) - -plt.xticks(rotation='vertical') -plt.show() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import re, sys, os +import matplotlib.pyplot as plt +from matplotlib._color_data import XKCD_COLORS + +with open(sys.argv[1], 'r') as f: + txt = f.read() + +txt, overall = txt.split('overall performance') + +class_names = [] +mAP_overall = [] +mAP_small = [] +mAP_medium = [] +mAP_large = [] + +for class_result in txt.split('evaluate category: ')[1:]: + lines = class_result.split('\n') + class_names.append(lines[0]) + + def grabMAP(string): + return float(string.split('] = ')[1]) * 100 + + mAP_overall.append(grabMAP(lines[ 7])) + mAP_small .append(grabMAP(lines[10])) + mAP_medium .append(grabMAP(lines[11])) + mAP_large .append(grabMAP(lines[12])) + +mAP_map = { + 'small': mAP_small, + 'medium': mAP_medium, + 'large': mAP_large, +} + +if len(sys.argv) > 2: + bars = plt.bar(class_names, mAP_map[sys.argv[2]]) + plt.title(sys.argv[2] + ' mAP per class') +else: + bars = plt.bar(class_names, mAP_overall) + plt.title('overall mAP per class') + +colors = list(XKCD_COLORS.values()) + +for idx, bar in enumerate(bars): + # Mmm pseudorandom colors + char_sum = sum([ord(char) for char in class_names[idx]]) + bar.set_color(colors[char_sum % len(colors)]) + +plt.xticks(rotation='vertical') +plt.show() diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/plot_loss.py b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/plot_loss.py index 415d1041d6f28035c5d590270d6ddfccd3f342c4..0442bc633b43cfe85aa295b65cd19ebde7855e97 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/plot_loss.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/plot_loss.py @@ -1,92 +1,92 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import re, sys, os -import matplotlib.pyplot as plt - -from utils.functions import MovingAverage - -with open(sys.argv[1], 'r') as f: - inp = f.read() - -patterns = { - 'train': re.compile(r'\[\s*(?P\d+)\]\s*(?P\d+) \|\| B: (?P\S+) \| C: (?P\S+) \| M: (?P\S+) \|( S: (?P\S+) \|)? T: (?P\S+)'), - 'val': re.compile(r'\s*(?P[a-z]+) \|\s*(?P\S+)') -} -data = {key: [] for key in patterns} - -for line in inp.split('\n'): - for key, pattern in patterns.items(): - f = pattern.search(line) - - if f is not None: - datum = f.groupdict() - for k, v in datum.items(): - if v is not None: - try: - v = float(v) - except ValueError: - pass - datum[k] = v - - if key == 'val': - datum = (datum, data['train'][-1]) - data[key].append(datum) - break - - -def smoother(y, interval=100): - avg = MovingAverage(interval) - - for i in range(len(y)): - avg.append(y[i]) - y[i] = avg.get_avg() - - return y - -def plot_train(data): - plt.title(os.path.basename(sys.argv[1]) + ' Training Loss') - plt.xlabel('Iteration') - plt.ylabel('Loss') - - loss_names = ['BBox Loss', 'Conf Loss', 'Mask Loss'] - - x = [x['iteration'] for x in data] - plt.plot(x, smoother([y['b'] for y in data])) - plt.plot(x, smoother([y['c'] for y in data])) - plt.plot(x, smoother([y['m'] for y in data])) - - if data[0]['s'] is not None: - plt.plot(x, smoother([y['s'] for y in data])) - loss_names.append('Segmentation Loss') - - plt.legend(loss_names) - plt.show() - -def plot_val(data): - plt.title(os.path.basename(sys.argv[1]) + ' Validation mAP') - plt.xlabel('Epoch') - plt.ylabel('mAP') - - x = [x[1]['epoch'] for x in data if x[0]['type'] == 'box'] - plt.plot(x, [x[0]['all'] for x in data if x[0]['type'] == 'box']) - plt.plot(x, [x[0]['all'] for x in data if x[0]['type'] == 'mask']) - - plt.legend(['BBox mAP', 'Mask mAP']) - plt.show() - -if len(sys.argv) > 2 and sys.argv[2] == 'val': - plot_val(data['val']) -else: - plot_train(data['train']) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import re, sys, os +import matplotlib.pyplot as plt + +from utils.functions import MovingAverage + +with open(sys.argv[1], 'r') as f: + inp = f.read() + +patterns = { + 'train': re.compile(r'\[\s*(?P\d+)\]\s*(?P\d+) \|\| B: (?P\S+) \| C: (?P\S+) \| M: (?P\S+) \|( S: (?P\S+) \|)? T: (?P\S+)'), + 'val': re.compile(r'\s*(?P[a-z]+) \|\s*(?P\S+)') +} +data = {key: [] for key in patterns} + +for line in inp.split('\n'): + for key, pattern in patterns.items(): + f = pattern.search(line) + + if f is not None: + datum = f.groupdict() + for k, v in datum.items(): + if v is not None: + try: + v = float(v) + except ValueError: + pass + datum[k] = v + + if key == 'val': + datum = (datum, data['train'][-1]) + data[key].append(datum) + break + + +def smoother(y, interval=100): + avg = MovingAverage(interval) + + for i in range(len(y)): + avg.append(y[i]) + y[i] = avg.get_avg() + + return y + +def plot_train(data): + plt.title(os.path.basename(sys.argv[1]) + ' Training Loss') + plt.xlabel('Iteration') + plt.ylabel('Loss') + + loss_names = ['BBox Loss', 'Conf Loss', 'Mask Loss'] + + x = [x['iteration'] for x in data] + plt.plot(x, smoother([y['b'] for y in data])) + plt.plot(x, smoother([y['c'] for y in data])) + plt.plot(x, smoother([y['m'] for y in data])) + + if data[0]['s'] is not None: + plt.plot(x, smoother([y['s'] for y in data])) + loss_names.append('Segmentation Loss') + + plt.legend(loss_names) + plt.show() + +def plot_val(data): + plt.title(os.path.basename(sys.argv[1]) + ' Validation mAP') + plt.xlabel('Epoch') + plt.ylabel('mAP') + + x = [x[1]['epoch'] for x in data if x[0]['type'] == 'box'] + plt.plot(x, [x[0]['all'] for x in data if x[0]['type'] == 'box']) + plt.plot(x, [x[0]['all'] for x in data if x[0]['type'] == 'mask']) + + plt.legend(['BBox mAP', 'Mask mAP']) + plt.show() + +if len(sys.argv) > 2 and sys.argv[2] == 'val': + plot_val(data['val']) +else: + plot_train(data['train']) diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/resume.sh b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/resume.sh index 336af0552f9b6efb02b33020607a379bd4fdf89e..d4857876bdf69b0a1d7b0e0e08697d1552b623dc 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/resume.sh +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/resume.sh @@ -1,14 +1,14 @@ -#!/bin/bash -#SBATCH -p GPU-shared -#SBATCH -t 48:00:00 -#SBATCH --gres=gpu:p100:1 -#SBATCH --no-requeue - -# Usage: ./resume.sh config batch_size resume_file - -module load python/3.6.4_gcc5_np1.14.5 -module load cuda/9.0 - -cd $SCRATCH/yolact - -python3 train.py --config $1 --batch_size $2 --resume=$3 --save_interval 5000 --start_iter=-1 >>logs/$1_log 2>&1 +#!/bin/bash +#SBATCH -p GPU-shared +#SBATCH -t 48:00:00 +#SBATCH --gres=gpu:p100:1 +#SBATCH --no-requeue + +# Usage: ./resume.sh config batch_size resume_file + +module load python/3.6.4_gcc5_np1.14.5 +module load cuda/9.0 + +cd $SCRATCH/yolact + +python3 train.py --config $1 --batch_size $2 --resume=$3 --save_interval 5000 --start_iter=-1 >>logs/$1_log 2>&1 diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/save_bboxes.py b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/save_bboxes.py index 0218314152fe17b9cb6a357d802284c7921eb949..64aee2eb36dabfc0ae760f445cf24ce9b9236f45 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/save_bboxes.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/save_bboxes.py @@ -1,47 +1,47 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" This script transforms and saves bbox coordinates into a pickle object for easy loading. """ - - -import os.path as osp -import json, pickle -import sys - -import numpy as np - -COCO_ROOT = osp.join('.', 'data/coco/') - -annotation_file = 'instances_train2017.json' -annotation_path = osp.join(COCO_ROOT, 'annotations/', annotation_file) - -dump_file = 'weights/bboxes.pkl' - -with open(annotation_path, 'r') as f: - annotations_json = json.load(f) - -annotations = annotations_json['annotations'] -images = annotations_json['images'] -images = {image['id']: image for image in images} -bboxes = [] - -for ann in annotations: - image = images[ann['image_id']] - w,h = (image['width'], image['height']) - - if 'bbox' in ann: - bboxes.append([w, h] + ann['bbox']) - -with open(dump_file, 'wb') as f: - pickle.dump(bboxes, f) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" This script transforms and saves bbox coordinates into a pickle object for easy loading. """ + + +import os.path as osp +import json, pickle +import sys + +import numpy as np + +COCO_ROOT = osp.join('.', 'data/coco/') + +annotation_file = 'instances_train2017.json' +annotation_path = osp.join(COCO_ROOT, 'annotations/', annotation_file) + +dump_file = 'weights/bboxes.pkl' + +with open(annotation_path, 'r') as f: + annotations_json = json.load(f) + +annotations = annotations_json['annotations'] +images = annotations_json['images'] +images = {image['id']: image for image in images} +bboxes = [] + +for ann in annotations: + image = images[ann['image_id']] + w,h = (image['width'], image['height']) + + if 'bbox' in ann: + bboxes.append([w, h] + ann['bbox']) + +with open(dump_file, 'wb') as f: + pickle.dump(bboxes, f) diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/train.sh b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/train.sh index 36dff27c5069ea7e9224d953f1aad5ad99124393..b526d93c92bc6b4c262d622be05abf342fece9d0 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/train.sh +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/train.sh @@ -1,14 +1,14 @@ -#!/bin/bash -#SBATCH -p GPU-shared -#SBATCH -t 48:00:00 -#SBATCH --gres=gpu:p100:1 -#SBATCH --no-requeue - -# Usage: ./train.sh config batch_size - -module load python/3.6.4_gcc5_np1.14.5 -module load cuda/9.0 - -cd $SCRATCH/yolact - -python3 train.py --config $1 --batch_size $2 --save_interval 5000 &>logs/$1_log +#!/bin/bash +#SBATCH -p GPU-shared +#SBATCH -t 48:00:00 +#SBATCH --gres=gpu:p100:1 +#SBATCH --no-requeue + +# Usage: ./train.sh config batch_size + +module load python/3.6.4_gcc5_np1.14.5 +module load cuda/9.0 + +cd $SCRATCH/yolact + +python3 train.py --config $1 --batch_size $2 --save_interval 5000 &>logs/$1_log diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/unpack_statedict.py b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/unpack_statedict.py index 7f1fa5f47b7bceddab7695e15005286d0b2530e4..41b7314c35cc74bb61035b804a683ae9b8c3559b 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/unpack_statedict.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/scripts/unpack_statedict.py @@ -1,30 +1,30 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch -import sys, os - -# Usage python scripts/unpack_statedict.py path_to_pth out_folder/ -# Make sure to include that slash after your out folder, since I can't -# be arsed to do path concatenation so I'd rather type out this comment - -print('Loading state dict...') -state = torch.load(sys.argv[1]) - -if not os.path.exists(sys.argv[2]): - os.mkdir(sys.argv[2]) - -print('Saving stuff...') -for key, val in state.items(): - torch.save(val, sys.argv[2] + key) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch +import sys, os + +# Usage python scripts/unpack_statedict.py path_to_pth out_folder/ +# Make sure to include that slash after your out folder, since I can't +# be arsed to do path concatenation so I'd rather type out this comment + +print('Loading state dict...') +state = torch.load(sys.argv[1]) + +if not os.path.exists(sys.argv[2]): + os.mkdir(sys.argv[2]) + +print('Saving stuff...') +for key, val in state.items(): + torch.save(val, sys.argv[2] + key) diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/test/env_npu.sh b/PyTorch/contrib/cv/detection/YOLACT_plus/test/env_npu.sh index 3d4584311c99e1d5945db268a6e50b25cf844d39..d7213fd06e5ba36e8ecb005b9ad1b64da6357227 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/test/env_npu.sh +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/test/env_npu.sh @@ -1,75 +1,75 @@ -#!/bin/bash -export install_path=/usr/local/Ascend - -if [ -d ${install_path}/toolkit ]; then - export LD_LIBRARY_PATH=${install_path}/fwkacllib/lib64/:/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} - export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH - export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH - export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH - export ASCEND_OPP_PATH=${install_path}/opp -else - if [ -d ${install_path}/nnae/latest ];then - export LD_LIBRARY_PATH=${install_path}/nnae/latest/fwkacllib/lib64/:/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH - export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ - export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ - export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so - export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH - export ASCEND_AICPU_PATH=${install_path}/nnae/latest - else - export LD_LIBRARY_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH - export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ - export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ - export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so - export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH - export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest - fi -fi - -${install_path}/driver/tools/msnpureport -g error -d 0 -${install_path}/driver/tools/msnpureport -g error -d 1 -${install_path}/driver/tools/msnpureport -g error -d 2 -${install_path}/driver/tools/msnpureport -g error -d 3 -${install_path}/driver/tools/msnpureport -g error -d 4 -${install_path}/driver/tools/msnpureport -g error -d 5 -${install_path}/driver/tools/msnpureport -g error -d 6 -${install_path}/driver/tools/msnpureport -g error -d 7 - -#将Host日志输出到串口,0-关闭/1-开启 -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -#设置默认日志级别,0-debug/1-info/2-warning/3-error -export ASCEND_GLOBAL_LOG_LEVEL=3 -#设置Event日志开启标志,0-关闭/1-开启 -export ASCEND_GLOBAL_EVENT_ENABLE=0 -#设置是否开启taskque,0-关闭/1-开启 -export TASK_QUEUE_ENABLE=1 -#设置是否开启PTCopy,0-关闭/1-开启 -export PTCOPY_ENABLE=1 -#设置是否开启combined标志,0-关闭/1-开启 -export COMBINED_ENABLE=0 -#设置特殊场景是否需要重新编译,不需要修改 -export DYNAMIC_OP="ADD#MUL" -#HCCL白名单开关,1-关闭/0-开启 -export HCCL_WHITELIST_DISABLE=1 - -ulimit -SHn 512000 - -path_lib=$(python3.7 -c """ -import sys -import re -result='' -for index in range(len(sys.path)): - match_sit = re.search('-packages', sys.path[index]) - if match_sit is not None: - match_lib = re.search('lib', sys.path[index]) - - if match_lib is not None: - end=match_lib.span()[1] - result += sys.path[index][0:end] + ':' - - result+=sys.path[index] + '/torch/lib:' -print(result)""" -) - -echo ${path_lib} - -export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH +#!/bin/bash +export install_path=/usr/local/Ascend + +if [ -d ${install_path}/toolkit ]; then + export LD_LIBRARY_PATH=${install_path}/fwkacllib/lib64/:/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} + export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH + export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH + export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=${install_path}/opp +else + if [ -d ${install_path}/nnae/latest ];then + export LD_LIBRARY_PATH=${install_path}/nnae/latest/fwkacllib/lib64/:/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/nnae/latest + else + export LD_LIBRARY_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest + fi +fi + +${install_path}/driver/tools/msnpureport -g error -d 0 +${install_path}/driver/tools/msnpureport -g error -d 1 +${install_path}/driver/tools/msnpureport -g error -d 2 +${install_path}/driver/tools/msnpureport -g error -d 3 +${install_path}/driver/tools/msnpureport -g error -d 4 +${install_path}/driver/tools/msnpureport -g error -d 5 +${install_path}/driver/tools/msnpureport -g error -d 6 +${install_path}/driver/tools/msnpureport -g error -d 7 + +#将Host日志输出到串口,0-关闭/1-开启 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +#设置默认日志级别,0-debug/1-info/2-warning/3-error +export ASCEND_GLOBAL_LOG_LEVEL=3 +#设置Event日志开启标志,0-关闭/1-开启 +export ASCEND_GLOBAL_EVENT_ENABLE=0 +#设置是否开启taskque,0-关闭/1-开启 +export TASK_QUEUE_ENABLE=1 +#设置是否开启PTCopy,0-关闭/1-开启 +export PTCOPY_ENABLE=1 +#设置是否开启combined标志,0-关闭/1-开启 +export COMBINED_ENABLE=0 +#设置特殊场景是否需要重新编译,不需要修改 +export DYNAMIC_OP="ADD#MUL" +#HCCL白名单开关,1-关闭/0-开启 +export HCCL_WHITELIST_DISABLE=1 + +ulimit -SHn 512000 + +path_lib=$(python3.7 -c """ +import sys +import re +result='' +for index in range(len(sys.path)): + match_sit = re.search('-packages', sys.path[index]) + if match_sit is not None: + match_lib = re.search('lib', sys.path[index]) + + if match_lib is not None: + end=match_lib.span()[1] + result += sys.path[index][0:end] + ':' + + result+=sys.path[index] + '/torch/lib:' +print(result)""" +) + +echo ${path_lib} + +export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_eval_1p.sh b/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_eval_1p.sh index 50a1c105abefe2e5ed5a329ba57827bfe5f176b6..7ec0becd650635034a96579173c81625c1babf6d 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_eval_1p.sh +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_eval_1p.sh @@ -1,141 +1,141 @@ -#!/bin/bash - -################基础配置参数,需要模型审视修改################## -# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE -# 网络名称,同目录名称 -Network="YOLACT-plus" -# 训练batch_size -batch_size=16 -# 推理使用的npu卡数 -export RANK_SIZE=1 -# 数据集路径,保持为空,不需要修改 --data_path=./data/TrainDataset -data_path="" - -# checkpoint文件路径,以实际路径为准 --pth_path=./checkopint.pth.tar -pth_path='' - -# 指定推理所使用的npu device卡id -device_id=0 - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --device_id* ]];then - device_id=`echo ${para#*=}` - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --pth_path* ]];then - pth_path=`echo ${para#*=}` - fi -done - -# 校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi -# 校验是否传入 pth_path , 验证脚本需要传入此参数 -if [[ $pth_path == "" ]];then - echo "[Error] para \"pth_path\" must be confing" - exit 1 -fi -# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 -if [ $ASCEND_DEVICE_ID ];then - echo "device id is ${ASCEND_DEVICE_ID}" -elif [ ${device_id} ];then - export ASCEND_DEVICE_ID=${device_id} - echo "device id is ${ASCEND_DEVICE_ID}" -else - "[Error] device id must be config" - exit 1 -fi - -###############指定训练脚本执行路径############### -# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ];then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi - -#################创建日志输出目录,不需要修改################# - -if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then - #rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -fi - -#################启动训练(train)脚本################# -#训练开始时间,不需要修改 -start_time=$(date +%s) -# 非平台场景时source 环境变量 -check_etp_flag=`env | grep etp_running_flag` -etp_flag=`echo ${check_etp_flag#*=}` -if [ x"${etp_flag}" != x"true" ];then - source ${test_path_dir}/env_npu.sh -fi -export RANK_ID=${ASCEND_DEVICE_ID} -# 获取训练生成的log文件 -TRAIN_LOG_FILE_0=${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log - -##################计算并获取精度################ -LOG_FILE=${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log - -python3.7 eval.py --trained_model=${pth_path} \ - --data_path=${data_path} > $LOG_FILE 2>&1 & - -wait - -##################计算并获取精度################ -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep -a 'FPS' ${TRAIN_LOG_FILE_0} | awk -F " " '{print $2}'|awk 'END {print}'` -#打印,不需要修改 -echo "Final Performance images/sec : ${FPS}" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a 'mask |' ${LOG_FILE} |awk -F " " '{print $3}'|awk 'END {print}'` -# #打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "||" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |awk -F '[|][|]' '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` -echo "train_accuracy: ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="YOLACT-plus" +# 训练batch_size +batch_size=16 +# 推理使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 --data_path=./data/TrainDataset +data_path="" + +# checkpoint文件路径,以实际路径为准 --pth_path=./checkopint.pth.tar +pth_path='' + +# 指定推理所使用的npu device卡id +device_id=0 + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --pth_path* ]];then + pth_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +# 校验是否传入 pth_path , 验证脚本需要传入此参数 +if [[ $pth_path == "" ]];then + echo "[Error] para \"pth_path\" must be confing" + exit 1 +fi +# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 +if [ $ASCEND_DEVICE_ID ];then + echo "device id is ${ASCEND_DEVICE_ID}" +elif [ ${device_id} ];then + export ASCEND_DEVICE_ID=${device_id} + echo "device id is ${ASCEND_DEVICE_ID}" +else + "[Error] device id must be config" + exit 1 +fi + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +#################创建日志输出目录,不需要修改################# + +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + #rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +#################启动训练(train)脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi +export RANK_ID=${ASCEND_DEVICE_ID} +# 获取训练生成的log文件 +TRAIN_LOG_FILE_0=${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log + +##################计算并获取精度################ +LOG_FILE=${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log + +python3.7 eval.py --trained_model=${pth_path} \ + --data_path=${data_path} > $LOG_FILE 2>&1 & + +wait + +##################计算并获取精度################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${TRAIN_LOG_FILE_0} | awk -F " " '{print $2}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : ${FPS}" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a 'mask |' ${LOG_FILE} |awk -F " " '{print $3}'|awk 'END {print}'` +# #打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "||" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |awk -F '[|][|]' '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` +echo "train_accuracy: ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_full_1p.sh b/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_full_1p.sh index 09ada4f9ca70d8f80b1ae564439e392fa231a37f..852823749dbdead5e14d50cf460181245de64e61 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_full_1p.sh @@ -1,152 +1,152 @@ -#!/bin/bash - -################基础配置参数,需要模型审视修改################## -# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE -# 网络名称,同目录名称 -Network="YOLACT-plus" -# 训练batch_size -batch_size=16 -# 训练使用的npu卡数 -export RANK_SIZE=1 -# 数据集路径,保持为空,不需要修改 --data_path=./data/TrainDataset -data_path="" - -# checkpoint文件路径,以实际路径为准 --pth_path=./checkopint.pth.tar -pth_path='' - -# 训练step -max_iter=1250000 -# 指定训练所使用的npu device卡id -device_id=0 -# 加载数据进程数 -workers=8 - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --device_id* ]];then - device_id=`echo ${para#*=}` - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --pth_path* ]];then - pth_path=`echo ${para#*=}` - fi -done - -# 校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi -# 校验是否传入 pth_path , 验证脚本需要传入此参数 -#if [[ $pth_path == "" ]];then -# echo "[Error] para \"pth_path\" must be confing" -# exit 1 -#fi - -# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 -if [ $ASCEND_DEVICE_ID ];then - echo "device id is ${ASCEND_DEVICE_ID}" -elif [ ${device_id} ];then - export ASCEND_DEVICE_ID=${device_id} - echo "device id is ${ASCEND_DEVICE_ID}" -else - "[Error] device id must be config" - exit 1 -fi - - - -###############指定训练脚本执行路径############### -# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ];then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi - - -#################创建日志输出目录,不需要修改################# - if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID - else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID - fi - - -#################启动训练脚本################# -#训练开始时间,不需要修改 -start_time=$(date +%s) -# 非平台场景时source 环境变量 -check_etp_flag=`env | grep etp_running_flag` -etp_flag=`echo ${check_etp_flag#*=}` -if [ x"${etp_flag}" != x"true" ];then - source ${test_path_dir}/env_npu.sh -fi -export RANK_ID=${ASCEND_DEVICE_ID} -python3.7 ./train.py \ - --data_path=${data_path} \ - --num_workers=${workers} \ - --lr=2e-3 \ - --config=yolact_plus_resnet50_config \ - --max_iter=${max_iter} \ - --resume=${pth_path} \ - --save_interval=62500 \ - --batch_size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -wait - -python3.7 eval.py --trained_model=weights/yolact_plus.pth > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 -wait - -##################获取训练数据################ -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F " " '{print $2}'|awk 'END {print}'` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a 'mask |' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log |awk -F " " '{print $3}'|awk 'END {print}'` -# #打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "||" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |awk -F '[|][|]' '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` -echo "train_accuracy: ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="YOLACT-plus" +# 训练batch_size +batch_size=16 +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 --data_path=./data/TrainDataset +data_path="" + +# checkpoint文件路径,以实际路径为准 --pth_path=./checkopint.pth.tar +pth_path='' + +# 训练step +max_iter=1250000 +# 指定训练所使用的npu device卡id +device_id=0 +# 加载数据进程数 +workers=8 + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --pth_path* ]];then + pth_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +# 校验是否传入 pth_path , 验证脚本需要传入此参数 +#if [[ $pth_path == "" ]];then +# echo "[Error] para \"pth_path\" must be confing" +# exit 1 +#fi + +# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 +if [ $ASCEND_DEVICE_ID ];then + echo "device id is ${ASCEND_DEVICE_ID}" +elif [ ${device_id} ];then + export ASCEND_DEVICE_ID=${device_id} + echo "device id is ${ASCEND_DEVICE_ID}" +else + "[Error] device id must be config" + exit 1 +fi + + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# + if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID + else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID + fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi +export RANK_ID=${ASCEND_DEVICE_ID} +python3.7 ./train.py \ + --data_path=${data_path} \ + --num_workers=${workers} \ + --lr=2e-3 \ + --config=yolact_plus_resnet50_config \ + --max_iter=${max_iter} \ + --resume=${pth_path} \ + --save_interval=62500 \ + --batch_size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait + +python3.7 eval.py --trained_model=weights/yolact_plus.pth > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 +wait + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F " " '{print $2}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a 'mask |' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log |awk -F " " '{print $3}'|awk 'END {print}'` +# #打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "||" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |awk -F '[|][|]' '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` +echo "train_accuracy: ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_full_8p.sh b/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_full_8p.sh index 60c34bd048e2658b36c10d5233669831273778ee..2bfef08bce800ad6aef21b3c941a7821650cd48e 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_full_8p.sh @@ -1,173 +1,173 @@ -#!/bin/bash - -################基础配置参数,需要模型审视修改################## -# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE -# 网络名称,同目录名称 -Network="YOLACT-plus" -# 训练batch_size -batch_size=16 -# 训练使用的npu卡数 -export RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 --data_path=./data/TrainDataset -data_path="" - -# checkpoint文件路径,以实际路径为准 --pth_path=./checkopint.pth.tar -pth_path='' - -# 训练step -max_iter=1250000 -# 指定训练所使用的npu device卡id -device_id=0 -# 加载数据进程数 -workers=8 - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --device_id* ]];then - device_id=`echo ${para#*=}` - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --pth_path* ]];then - pth_path=`echo ${para#*=}` - fi -done - -# 校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi -# 校验是否传入 pth_path , 验证脚本需要传入此参数 -#if [[ $pth_path == "" ]];then -# echo "[Error] para \"pth_path\" must be confing" -# exit 1 -#fi - -# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 -if [ $ASCEND_DEVICE_ID ];then - echo "device id is ${ASCEND_DEVICE_ID}" -elif [ ${device_id} ];then - export ASCEND_DEVICE_ID=${device_id} - echo "device id is ${ASCEND_DEVICE_ID}" -else - "[Error] device id must be config" - exit 1 -fi - - - -###############指定训练脚本执行路径############### -# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ];then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi - - -#################创建日志输出目录,不需要修改################# - if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID - else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID - fi - - -#################启动训练脚本################# -#训练开始时间,不需要修改 -start_time=$(date +%s) -# 非平台场景时source 环境变量 -check_etp_flag=`env | grep etp_running_flag` -etp_flag=`echo ${check_etp_flag#*=}` -if [ x"${etp_flag}" != x"true" ];then - source ${test_path_dir}/env_npu.sh -fi - -for((RANK_ID=0;RANK_ID ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - else - python3.7 -u train.py \ - --data_path=${data_path} \ - --num_workers=${workers} \ - --lr=2e-3 \ - --config=yolact_plus_resnet50_config \ - --max_iter=${max_iter} \ - --resume=${pth_path} \ - --save_interval=62500 \ - --batch_size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - fi -done -wait -python3.7 eval.py --trained_model=weights/yolact_plus.pth > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 -wait - -##################获取训练数据################ -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F " " '{print $2}'|awk 'END {print}'` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a 'mask |' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log |awk -F " " '{print $3}'|awk 'END {print}'` -# #打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "||" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |awk -F '[|][|]' '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` -echo "train_accuracy: ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="YOLACT-plus" +# 训练batch_size +batch_size=16 +# 训练使用的npu卡数 +export RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 --data_path=./data/TrainDataset +data_path="" + +# checkpoint文件路径,以实际路径为准 --pth_path=./checkopint.pth.tar +pth_path='' + +# 训练step +max_iter=1250000 +# 指定训练所使用的npu device卡id +device_id=0 +# 加载数据进程数 +workers=8 + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --pth_path* ]];then + pth_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +# 校验是否传入 pth_path , 验证脚本需要传入此参数 +#if [[ $pth_path == "" ]];then +# echo "[Error] para \"pth_path\" must be confing" +# exit 1 +#fi + +# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 +if [ $ASCEND_DEVICE_ID ];then + echo "device id is ${ASCEND_DEVICE_ID}" +elif [ ${device_id} ];then + export ASCEND_DEVICE_ID=${device_id} + echo "device id is ${ASCEND_DEVICE_ID}" +else + "[Error] device id must be config" + exit 1 +fi + + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# + if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID + else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID + fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +for((RANK_ID=0;RANK_ID ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + else + python3.7 -u train.py \ + --data_path=${data_path} \ + --num_workers=${workers} \ + --lr=2e-3 \ + --config=yolact_plus_resnet50_config \ + --max_iter=${max_iter} \ + --resume=${pth_path} \ + --save_interval=62500 \ + --batch_size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + fi +done +wait +python3.7 eval.py --trained_model=weights/yolact_plus.pth > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log 2>&1 +wait + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F " " '{print $2}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a 'mask |' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log |awk -F " " '{print $3}'|awk 'END {print}'` +# #打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "||" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |awk -F '[|][|]' '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` +echo "train_accuracy: ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_performance_1p.sh b/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_performance_1p.sh index d2d8a4923b5183c89e3a16d645ce0a2e68b91312..62aacb3977d3f38f265b72a834a70593279655fb 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/test/train_performance_1p.sh @@ -1,149 +1,149 @@ -#!/bin/bash - -################基础配置参数,需要模型审视修改################## -# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE -# 网络名称,同目录名称 -Network="YOLACT-plus" -# 训练batch_size -batch_size=16 -# 训练使用的npu卡数 -export RANK_SIZE=1 -# 数据集路径,保持为空,不需要修改 --data_path=./data/TrainDataset -data_path="" - -# checkpoint文件路径,以实际路径为准 --pth_path=./checkopint.pth.tar -pth_path='' - -# 训练step -max_iter=1000 -# 指定训练所使用的npu device卡id -device_id=0 -# 加载数据进程数 -workers=8 - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --device_id* ]];then - device_id=`echo ${para#*=}` - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --pth_path* ]];then - pth_path=`echo ${para#*=}` - fi -done - -# 校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi -# 校验是否传入 pth_path , 验证脚本需要传入此参数 -#if [[ $pth_path == "" ]];then -# echo "[Error] para \"pth_path\" must be confing" -# exit 1 -#fi - -# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 -if [ $ASCEND_DEVICE_ID ];then - echo "device id is ${ASCEND_DEVICE_ID}" -elif [ ${device_id} ];then - export ASCEND_DEVICE_ID=${device_id} - echo "device id is ${ASCEND_DEVICE_ID}" -else - "[Error] device id must be config" - exit 1 -fi - - - -###############指定训练脚本执行路径############### -# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ];then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi - - -#################创建日志输出目录,不需要修改################# - if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID - else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID - fi - - -#################启动训练脚本################# -#训练开始时间,不需要修改 -start_time=$(date +%s) -# 非平台场景时source 环境变量 -check_etp_flag=`env | grep etp_running_flag` -etp_flag=`echo ${check_etp_flag#*=}` -if [ x"${etp_flag}" != x"true" ];then - source ${test_path_dir}/env_npu.sh -fi -export RANK_ID=${ASCEND_DEVICE_ID} -python3.7 ./train.py \ - --data_path=${data_path} \ - --num_workers=${workers} \ - --lr=2e-3 \ - --config=yolact_plus_resnet50_config \ - --max_iter=${max_iter} \ - --resume=${pth_path} \ - --save_interval=1000 \ - --batch_size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -wait - -##################获取训练数据################ -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F " " '{print $2}'|awk 'END {print}'` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a 'mask |' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log |awk -F " " '{print $3}'|awk 'END {print}'` -# #打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "||" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |awk -F '[|][|]' '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` -echo "train_accuracy: ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="YOLACT-plus" +# 训练batch_size +batch_size=16 +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 --data_path=./data/TrainDataset +data_path="" + +# checkpoint文件路径,以实际路径为准 --pth_path=./checkopint.pth.tar +pth_path='' + +# 训练step +max_iter=1000 +# 指定训练所使用的npu device卡id +device_id=0 +# 加载数据进程数 +workers=8 + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --pth_path* ]];then + pth_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +# 校验是否传入 pth_path , 验证脚本需要传入此参数 +#if [[ $pth_path == "" ]];then +# echo "[Error] para \"pth_path\" must be confing" +# exit 1 +#fi + +# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 +if [ $ASCEND_DEVICE_ID ];then + echo "device id is ${ASCEND_DEVICE_ID}" +elif [ ${device_id} ];then + export ASCEND_DEVICE_ID=${device_id} + echo "device id is ${ASCEND_DEVICE_ID}" +else + "[Error] device id must be config" + exit 1 +fi + + + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# + if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID + else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID + fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi +export RANK_ID=${ASCEND_DEVICE_ID} +python3.7 ./train.py \ + --data_path=${data_path} \ + --num_workers=${workers} \ + --lr=2e-3 \ + --config=yolact_plus_resnet50_config \ + --max_iter=${max_iter} \ + --resume=${pth_path} \ + --save_interval=1000 \ + --batch_size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F " " '{print $2}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a 'mask |' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/eval_${ASCEND_DEVICE_ID}.log |awk -F " " '{print $3}'|awk 'END {print}'` +# #打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "||" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |awk -F '[|][|]' '{print $2}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` +echo "train_accuracy: ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/train.py b/PyTorch/contrib/cv/detection/YOLACT_plus/train.py index 6d1e302204ac7fd3c7dc2c9eecbc6c9099842153..63d430a3d31ea430aca4ebe79d9ae3354d715f39 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/train.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/train.py @@ -1,660 +1,660 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -from data import * -from utils.augmentations import SSDAugmentation, BaseTransform -from utils.functions import MovingAverage, SavePath -from utils.logger import Log -from utils import timer -from layers.modules import MultiBoxLoss -from yolact import Yolact -import os -import sys -import time -import math, random -from pathlib import Path -import torch -from torch.autograd import Variable -import torch.nn as nn -import torch.optim as optim -import torch.backends.cudnn as cudnn -import torch.nn.init as init -import torch.utils.data as data -import numpy as np -import argparse -import datetime -from torch.nn.parallel import DistributedDataParallel as DDP -import torch.multiprocessing as mp -import torch.distributed as dist -# Oof -import eval as eval_script -from apex import amp - -def str2bool(v): - return v.lower() in ("yes", "true", "t", "1") - - -parser = argparse.ArgumentParser( - description='Yolact Training Script') -parser.add_argument('--batch_size', default=8, type=int, - help='Batch size for training') -parser.add_argument('--data_path', default=None, type=str, - help='data path') -parser.add_argument('--resume', default=None, type=str, - help='Checkpoint state_dict file to resume training from. If this is "interrupt"' \ - ', the model will resume training from the interrupt file.') -parser.add_argument('--start_iter', default=-1, type=int, - help='Resume training at this iter. If this is -1, the iteration will be' \ - 'determined from the file name.') -parser.add_argument('--fps_start_iter', default=100, type=int, - help='calculate fps at this iter. If this is -1, the iteration will be' \ - 'determined from the file name.') -parser.add_argument('--num_workers', default=0, type=int, - help='Number of workers used in dataloading') -parser.add_argument('--cuda', default=True, type=str2bool, - help='Use CUDA to train model') -parser.add_argument('--lr', '--learning_rate', default=None, type=float, - help='Initial learning rate. Leave as None to read this from the config.') -parser.add_argument('--momentum', default=None, type=float, - help='Momentum for SGD. Leave as None to read this from the config.') -parser.add_argument('--decay', '--weight_decay', default=None, type=float, - help='Weight decay for SGD. Leave as None to read this from the config.') -parser.add_argument('--gamma', default=None, type=float, - help='For each lr step, what to multiply the lr by. Leave as None to read this from the config.') -parser.add_argument('--save_folder', default='weights/', - help='Directory for saving checkpoint models.') -parser.add_argument('--log_folder', default='logs/', - help='Directory for saving logs.') -parser.add_argument('--config', default='yolact_base_config', - help='The config object to use.') -parser.add_argument('--max_iter', default=-1, type=int, - help='num of iter will train') -parser.add_argument('--save_interval', default=2000, type=int, - help='The number of iterations between saving the model.') -parser.add_argument('--validation_size', default=5000, type=int, - help='The number of images to use for validation.') -parser.add_argument('--validation_epoch', default=-1, type=int, - help='Output validation information every n iterations. If -1, do no validation.') -parser.add_argument('--keep_latest', dest='keep_latest', action='store_true', - help='Only keep the latest checkpoint instead of each one.') -parser.add_argument('--keep_latest_interval', default=100000, type=int, - help='When --keep_latest is on, don\'t delete the latest file at these intervals. This should be a multiple of save_interval or 0.') -parser.add_argument('--dataset', default=None, type=str, - help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).') -parser.add_argument('--no_log', dest='log', action='store_false', - help='Don\'t log per iteration information into log_folder.') -parser.add_argument('--log_gpu', dest='log_gpu', action='store_true', - help='Include GPU information in the logs. Nvidia-smi tends to be slow, so set this with caution.') -parser.add_argument('--no_interrupt', dest='interrupt', action='store_false', - help='Don\'t save an interrupt when KeyboardInterrupt is caught.') -parser.add_argument('--batch_alloc', default=None, type=str, - help='If using multiple GPUS, you can set this to be a comma separated list detailing which GPUs should get what local batch size (It should add up to your total batch size).') -parser.add_argument('--no_autoscale', dest='autoscale', action='store_false', - help='YOLACT will automatically scale the lr and the number of iterations depending on the batch size. Set this if you want to disable that.') -parser.add_argument('--useDDP', default=True, type=bool, help='use DistributedDataParallel or not') -parser.add_argument('--seed', default=None, type=int, help='set PyTorch seed') - -parser.set_defaults(keep_latest=False, log=True, log_gpu=False, interrupt=True, autoscale=True) -args = parser.parse_args() - -# 设置配置文件,无用 -if args.config is not None: - set_cfg(args.config) - -# 设置项目数据集,无用 -if args.dataset is not None: - set_dataset(args.dataset) - -if args.data_path: - cfg.dataset.train_images = args.data_path + '/train2017/' - cfg.dataset.train_info = args.data_path + '/annotations/instances_train2017.json' - -if args.autoscale and args.batch_size != 8: - factor = args.batch_size / 8 - if __name__ == '__main__': - print('Scaling parameters by %.2f to account for a batch size of %d.' % (factor, args.batch_size)) - - cfg.lr *= factor - cfg.max_iter //= factor - cfg.lr_steps = [x // factor for x in cfg.lr_steps] - -if args.max_iter>0: - cfg.max_iter = args.max_iter - -# Update training parameters from the config if necessary -def replace(name): - if getattr(args, name) == None: setattr(args, name, getattr(cfg, name)) - - -# 将args中参数替换为config中预设的值,便于后续调用 -replace('lr') -replace('decay') -replace('gamma') -replace('momentum') - -# This is managed by set_lr -# 两个学习率都有用,在后续自动更新学习率中,可以使用 -cur_lr = args.lr - -# 检查环境 -if torch.npu.device_count() == 0: - print('No GPUs detected. Exiting...') - exit(-1) - -# 当一块显卡中的图像个数大于等于6时,才启用batch normalization -if args.batch_size // torch.npu.device_count() < 6 and (not args.useDDP): - if __name__ == '__main__': - print('Per-NPU batch size is less than the recommended limit for batch norm. Disabling batch norm.') - cfg.freeze_bn = True - -if args.seed is not None: - seed = args.seed - os.environ['PYTHONHASHSEED'] = str(seed) - torch.manual_seed(seed) - torch.cuda.manual_seed(seed) - torch.npu.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - torch.backends.cudnn.deterministic = True - print('Finish set seed, seed is :', seed) - -loss_types = ['B', 'C', 'M', 'P', 'D', 'E', 'S', 'I'] - -if torch.npu.is_available(): - print("npu environment is okay!, and current device count is", torch.npu.device_count()) - - -# if args.cuda: -# torch.set_default_tensor_type('torch.cuda.FloatTensor') -# if not args.cuda: -# print("WARNING: It looks like you have a CUDA device, but aren't " + -# "using CUDA.\nRun with --cuda for optimal training speed.") -# torch.set_default_tensor_type('torch.FloatTensor') -# else: -# torch.set_default_tensor_type('torch.FloatTensor') - -class NetLoss(nn.Module): - """ - A wrapper for running the network and computing the loss - This is so we can more efficiently use DataParallel. - - 损失函数模块,YOLACT只使用Multibox Loss,但单独封装NetLoss模块的目的是多卡训练 - """ - - def __init__(self, net: Yolact, criterion: MultiBoxLoss): - super().__init__() - - self.net = net - self.criterion = criterion - - def forward(self, images, targets, masks, num_crowds): - preds = self.net(images) - losses = self.criterion(self.net, preds, targets, masks, num_crowds) - return losses - - -class CustomDataParallel(nn.DataParallel): - """ - This is a custom version of DataParallel that works better with our training data. - It should also be faster than the general case. - """ - - def scatter(self, inputs, kwargs, device_ids): - # More like scatter and data prep at the same time. The point is we prep the data in such a way - # that no scatter is necessary, and there's no need to shuffle stuff around different GPUs. - devices = ['cuda:' + str(x) for x in device_ids] - splits = prepare_data(inputs[0], devices, allocation=args.batch_alloc) - - return [[split[device_idx] for split in splits] for device_idx in range(len(devices))], \ - [kwargs] * len(devices) - - def gather(self, outputs, output_device): - out = {} - - for k in outputs[0]: - out[k] = torch.stack([output[k].to(output_device) for output in outputs]) - - return out - - -class ScatterWrapper: - """ Input is any number of lists. This will preserve them through a dataparallel scatter. """ - - def __init__(self, *args): - for arg in args: - if not isinstance(arg, list): - print('Warning: ScatterWrapper got input of non-list type.') - self.args = args - self.batch_size = len(args[0]) - - def make_mask(self): - out = torch.Tensor(list(range(self.batch_size))).long() - if args.cuda: - return out.npu() - else: - return out - - def get_args(self, mask): - device = mask.device - mask = [int(x) for x in mask] - out_args = [[] for _ in self.args] - - for out, arg in zip(out_args, self.args): - for idx in mask: - x = arg[idx] - if isinstance(x, torch.Tensor): - x = x.to(device) - out.append(x) - - return out_args - - -def train(args): - # 创建模型权重文件存储目录 - if not os.path.exists(args.save_folder): - os.mkdir(args.save_folder) - - args.rank_id = int(os.environ['RANK_ID']) - args.world_size = int(os.environ['RANK_SIZE']) - args.device = torch.device(f'npu:{args.rank_id}') - torch.npu.set_device(args.device) - - args.is_master_node = args.world_size == 1 or args.rank_id == 0 - - if args.is_master_node: - print(args) - - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '83215' - dist.init_process_group(backend='hccl', world_size=args.world_size, rank=args.rank_id) - - # 创建数据集,dataset为训练数据集 - dataset = COCODetection(image_path=cfg.dataset.train_images, - info_file=cfg.dataset.train_info, - transform=SSDAugmentation(MEANS)) - if args.world_size>1: - train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) - else: - train_sampler = None - if args.validation_epoch > 0: - # 调整eval.py脚本对应参数 - setup_eval() - - # 创建数据集,val_dataset为验证数据集,5000张图像 - val_dataset = COCODetection(image_path=cfg.dataset.valid_images, - info_file=cfg.dataset.valid_info, - transform=BaseTransform(MEANS)) - - # Parallel wraps the underlying module, but when saving and loading we don't want that - yolact_net = Yolact() - net = yolact_net - net.train() - - if args.log: - log = Log(cfg.name+'_time_'+time.strftime('%Y-%m-%d-%H-%M'), args.log_folder, - overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # 构造日志类 - - # I don't use the timer during training (I use a different timing method). - # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. - timer.disable_all() - - # Both of these can set args.resume to None, so do them before the check - if args.resume == 'interrupt': - args.resume = SavePath.get_interrupt(args.save_folder) - elif args.resume == 'latest': - args.resume = SavePath.get_latest(args.save_folder, cfg.name) - - if args.resume is not None and args.resume != '': - print('Resuming training, loading {}...'.format(args.resume)) - yolact_net.load_weights(args.resume) - - if args.start_iter == -1: - args.start_iter = SavePath.from_str(args.resume).iteration - else: - print('Initializing weights...') - yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) - - # 损失函数,multibox loss - # threshold : 门限阈值 - # pos_threshold 即为 : 高于这个值,那么就说明预测正确足够confident,即可以认为识别正确 - # pos_threshold 即为: 低于这个值,那么就可以自信认为识别错误 - - # ohem_negpos_ratio - criterion = MultiBoxLoss(num_classes=cfg.num_classes, - pos_threshold=cfg.positive_iou_threshold, - neg_threshold=cfg.negative_iou_threshold, - negpos_ratio=cfg.ohem_negpos_ratio) - - if args.batch_alloc is not None: - args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] - if sum(args.batch_alloc) != args.batch_size: - print('Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) - exit(-1) - - if args.cuda: - net = net.to(args.device) - criterion = criterion.to(args.device) - # 优化器SGD,随机梯度下降法 - optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) - net, optimizer = amp.initialize(net, optimizer, opt_level="O0", loss_scale=16) - net = nn.parallel.DistributedDataParallel(net, device_ids=[args.rank_id]) - else: - net = net.to('cpu') - net.src_device_obj = torch.device('cpu') - - # 优化器SGD,随机梯度下降法 - optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) - - # Initialize everything - if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means - if args.cuda: - yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).npu()) - else: - yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size)) - if not cfg.freeze_bn: yolact_net.freeze_bn(True) - - # loss counters - loc_loss = 0 - conf_loss = 0 - iteration = max(args.start_iter, 0) - last_time = time.time() - - epoch_size = len(dataset) // (args.batch_size * args.world_size) - num_epochs = math.ceil(cfg.max_iter / epoch_size) - - # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index - step_index = 0 - if args.world_size>1: - data_loader = data.DataLoader(dataset, args.batch_size, - num_workers=args.num_workers, - shuffle=False, - collate_fn=detection_collate, - pin_memory=True, sampler=train_sampler) - else: - data_loader = data.DataLoader(dataset, args.batch_size, - num_workers=args.num_workers, - shuffle=True, - collate_fn=detection_collate, - pin_memory=True) - - save_path = lambda epoch, iteration: SavePath(cfg.name, epoch, iteration).get_path(root=args.save_folder) - time_avg = MovingAverage() - - global loss_types # Forms the print order - loss_avgs = {k: MovingAverage(100) for k in loss_types} - - print('Begin training! NPU :', args.rank_id, '[', time.time(), ']') - print() - # try-except so you can use ctrl+c to save early and stop training - try: - for epoch in range(num_epochs): - # Resume from start_iter - if (epoch + 1) * epoch_size < iteration: - continue - if train_sampler: - train_sampler.set_epoch(epoch) - for idx, datum in enumerate(data_loader): - # Stop if we've reached an epoch if we're resuming from start_iter - if iteration == (epoch + 1) * epoch_size: - break - - # Stop at the configured number of iterations even if mid-epoch - if iteration == cfg.max_iter / args.world_size: - break - - # Change a config setting if we've reached the specified iteration - changed = False - for change in cfg.delayed_settings: - if iteration >= change[0]: - changed = True - cfg.replace(change[1]) - - # Reset the loss averages because things might have changed - for avg in loss_avgs: - avg.reset() - - # If a config setting was changed, remove it from the list so we don't keep checking - if changed: - cfg.delayed_settings = [x for x in cfg.delayed_settings if x[0] > iteration] - - # Warm up by linearly interpolating the learning rate from some smaller value - if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: - set_lr(optimizer, - (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) - - # Adjust the learning rate at the given iterations, but also if we resume from past that iteration - while step_index < len(cfg.lr_steps) and iteration >= cfg.lr_steps[step_index]: - step_index += 1 - set_lr(optimizer, args.lr * (args.gamma ** step_index)) - - prep_data_device = ['npu:' + str(args.rank_id)] - datum[0] = [item[1] for item in datum[0]] - images, targets, masks, num_crowds = prepare_data(datum, prep_data_device) - - out = net(images[0]) - optimizer.zero_grad() - wrapper = ScatterWrapper(targets, masks, num_crowds) - losses = criterion(net.module, out, wrapper, wrapper.make_mask()) - - losses = {k: (v).mean() for k, v in losses.items()} # Mean here because Dataparallel - loss = sum([losses[k] for k in losses]) - - # no_inf_mean removes some components from the loss, so make sure to backward through all of it - # all_loss = sum([v.mean() for v in losses.values()]) - - # Backprop - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - if torch.isfinite(loss).item(): - optimizer.step() - print('\t finish one step! NPU :', args.rank_id, '[', time.time(), ']') - - # Add the loss to the moving average for bookkeeping - for k in losses: - loss_avgs[k].add(losses[k].item()) - - cur_time = time.time() - elapsed = cur_time - last_time - last_time = cur_time - - # Exclude graph setup from the timing information - if iteration > args.fps_start_iter: - time_avg.add(elapsed) - - if iteration % 10 == 0: - eta_str = \ - str(datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] - - total = sum([loss_avgs[k].get_avg() for k in losses]) - loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) - - print(('[%3d] %7d ||' + ( - ' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f' + ' || NPU: ' + str( - args.rank_id)) - % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) - if args.log: - precision = 5 - loss_info = {k: round(losses[k].item(), precision) for k in losses} - loss_info['T'] = round(loss.item(), precision) - - if args.log_gpu: - log.log_gpu_stats = (iteration % 10 == 0) # nvidia-smi is sloooow - - log.log('train', loss=loss_info, epoch=epoch, iter=iteration, - lr=round(cur_lr, 10), elapsed=elapsed) - - log.log_gpu_stats = args.log_gpu - - iteration += 1 - - if iteration % args.save_interval == 0 and iteration != args.start_iter: - if args.keep_latest: - latest = SavePath.get_latest(args.save_folder, cfg.name) - - print('Saving state, iter:', iteration) - if args.is_master_node: - yolact_net.save_weights(save_path(epoch, iteration)) - - if args.keep_latest and latest is not None: - if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: - print('Deleting old save...') - os.remove(latest) - - # This is done per epoch - if args.validation_epoch > 0: - if epoch % args.validation_epoch == 0 and epoch > 0: - compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) - - log.log('FPS', fps=args.world_size * args.batch_size / time_avg.get_avg()) - print('FPS', args.world_size * args.batch_size / time_avg.get_avg()) - - # Compute validation mAP after training is finished - # compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) - except KeyboardInterrupt: - if args.interrupt: - print('Stopping early. Saving network...') - - # Delete previous copy of the interrupted network so we don't spam the weights folder - SavePath.remove_interrupt(args.save_folder) - - if args.is_master_node: - yolact_net.save_weights(save_path(epoch, repr(iteration) + '_interrupt')) - exit() - - if args.is_master_node: - yolact_net.save_weights('./weights/yolact_plus.pth') - - -def set_lr(optimizer, new_lr): - for param_group in optimizer.param_groups: - param_group['lr'] = new_lr - - global cur_lr - cur_lr = new_lr - - -def gradinator(x): - x.requires_grad = False - return x - - -def prepare_data(datum, devices: list = None, allocation: list = None): - with torch.no_grad(): - if devices is None: - devices = ['npu:0'] if args.cuda else ['cpu'] - if allocation is None: - allocation = [args.batch_size // len(devices)] * (len(devices) - 1) - allocation.append(args.batch_size - sum(allocation)) # The rest might need more/less - - images, (targets, masks, num_crowds) = datum - - cur_idx = 0 - for device, alloc in zip(devices, allocation): - for _ in range(alloc): - images[cur_idx] = gradinator(images[cur_idx].to(device)) - targets[cur_idx] = gradinator(targets[cur_idx].to(device)) - masks[cur_idx] = gradinator(masks[cur_idx].to(device)) - cur_idx += 1 - - if cfg.preserve_aspect_ratio: - # Choose a random size from the batch - _, h, w = images[random.randint(0, len(images) - 1)].size() - - for idx, (image, target, mask, num_crowd) in enumerate(zip(images, targets, masks, num_crowds)): - images[idx], targets[idx], masks[idx], num_crowds[idx] \ - = enforce_size(image, target, mask, num_crowd, w, h) - - cur_idx = 0 - split_images, split_targets, split_masks, split_numcrowds \ - = [[None for alloc in allocation] for _ in range(4)] - - for device_idx, alloc in enumerate(allocation): - split_images[device_idx] = torch.stack(images[cur_idx:cur_idx + alloc], dim=0) - split_targets[device_idx] = targets[cur_idx:cur_idx + alloc] - split_masks[device_idx] = masks[cur_idx:cur_idx + alloc] - split_numcrowds[device_idx] = num_crowds[cur_idx:cur_idx + alloc] - - cur_idx += alloc - - return split_images, split_targets, split_masks, split_numcrowds - - -def no_inf_mean(x: torch.Tensor): - """ - Computes the mean of a vector, throwing out all inf values. - If there are no non-inf values, this will return inf (i.e., just the normal mean). - """ - - no_inf = [a for a in x if torch.isfinite(a)] - - if len(no_inf) > 0: - return sum(no_inf) / len(no_inf) - else: - return x.mean() - - -def compute_validation_loss(net, data_loader, criterion): - global loss_types - - with torch.no_grad(): - losses = {} - - # Don't switch to eval mode because we want to get losses - iterations = 0 - for datum in data_loader: - images, targets, masks, num_crowds = prepare_data(datum) - out = net(images) - - wrapper = ScatterWrapper(targets, masks, num_crowds) - _losses = criterion(out, wrapper, wrapper.make_mask()) - - for k, v in _losses.items(): - v = v.mean().item() - if k in losses: - losses[k] += v - else: - losses[k] = v - - iterations += 1 - if args.validation_size <= iterations * args.batch_size: - break - - for k in losses: - losses[k] /= iterations - - loss_labels = sum([[k, losses[k]] for k in loss_types if k in losses], []) - print(('Validation ||' + (' %s: %.3f |' * len(losses)) + ')') % tuple(loss_labels), flush=True) - - -def compute_validation_map(epoch, iteration, yolact_net, dataset, log: Log = None): - with torch.no_grad(): - yolact_net.eval() - - start = time.time() - print() - print("Computing validation mAP (this may take a while)...", flush=True) - val_info = eval_script.evaluate(yolact_net, dataset, train_mode=True, trainCuda=args.cuda) - end = time.time() - - if log is not None: - log.log('val', val_info, elapsed=(end - start), epoch=epoch, iter=iteration) - - yolact_net.train() - - -def setup_eval(): - eval_script.parse_args(['--no_bar', '--max_images=' + str(args.validation_size)]) - - -if __name__ == '__main__': - train(args) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from data import * +from utils.augmentations import SSDAugmentation, BaseTransform +from utils.functions import MovingAverage, SavePath +from utils.logger import Log +from utils import timer +from layers.modules import MultiBoxLoss +from yolact import Yolact +import os +import sys +import time +import math, random +from pathlib import Path +import torch +from torch.autograd import Variable +import torch.nn as nn +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torch.nn.init as init +import torch.utils.data as data +import numpy as np +import argparse +import datetime +from torch.nn.parallel import DistributedDataParallel as DDP +import torch.multiprocessing as mp +import torch.distributed as dist +# Oof +import eval as eval_script +from apex import amp + +def str2bool(v): + return v.lower() in ("yes", "true", "t", "1") + + +parser = argparse.ArgumentParser( + description='Yolact Training Script') +parser.add_argument('--batch_size', default=8, type=int, + help='Batch size for training') +parser.add_argument('--data_path', default=None, type=str, + help='data path') +parser.add_argument('--resume', default=None, type=str, + help='Checkpoint state_dict file to resume training from. If this is "interrupt"' \ + ', the model will resume training from the interrupt file.') +parser.add_argument('--start_iter', default=-1, type=int, + help='Resume training at this iter. If this is -1, the iteration will be' \ + 'determined from the file name.') +parser.add_argument('--fps_start_iter', default=100, type=int, + help='calculate fps at this iter. If this is -1, the iteration will be' \ + 'determined from the file name.') +parser.add_argument('--num_workers', default=0, type=int, + help='Number of workers used in dataloading') +parser.add_argument('--cuda', default=True, type=str2bool, + help='Use CUDA to train model') +parser.add_argument('--lr', '--learning_rate', default=None, type=float, + help='Initial learning rate. Leave as None to read this from the config.') +parser.add_argument('--momentum', default=None, type=float, + help='Momentum for SGD. Leave as None to read this from the config.') +parser.add_argument('--decay', '--weight_decay', default=None, type=float, + help='Weight decay for SGD. Leave as None to read this from the config.') +parser.add_argument('--gamma', default=None, type=float, + help='For each lr step, what to multiply the lr by. Leave as None to read this from the config.') +parser.add_argument('--save_folder', default='weights/', + help='Directory for saving checkpoint models.') +parser.add_argument('--log_folder', default='logs/', + help='Directory for saving logs.') +parser.add_argument('--config', default='yolact_base_config', + help='The config object to use.') +parser.add_argument('--max_iter', default=-1, type=int, + help='num of iter will train') +parser.add_argument('--save_interval', default=2000, type=int, + help='The number of iterations between saving the model.') +parser.add_argument('--validation_size', default=5000, type=int, + help='The number of images to use for validation.') +parser.add_argument('--validation_epoch', default=-1, type=int, + help='Output validation information every n iterations. If -1, do no validation.') +parser.add_argument('--keep_latest', dest='keep_latest', action='store_true', + help='Only keep the latest checkpoint instead of each one.') +parser.add_argument('--keep_latest_interval', default=100000, type=int, + help='When --keep_latest is on, don\'t delete the latest file at these intervals. This should be a multiple of save_interval or 0.') +parser.add_argument('--dataset', default=None, type=str, + help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).') +parser.add_argument('--no_log', dest='log', action='store_false', + help='Don\'t log per iteration information into log_folder.') +parser.add_argument('--log_gpu', dest='log_gpu', action='store_true', + help='Include GPU information in the logs. Nvidia-smi tends to be slow, so set this with caution.') +parser.add_argument('--no_interrupt', dest='interrupt', action='store_false', + help='Don\'t save an interrupt when KeyboardInterrupt is caught.') +parser.add_argument('--batch_alloc', default=None, type=str, + help='If using multiple GPUS, you can set this to be a comma separated list detailing which GPUs should get what local batch size (It should add up to your total batch size).') +parser.add_argument('--no_autoscale', dest='autoscale', action='store_false', + help='YOLACT will automatically scale the lr and the number of iterations depending on the batch size. Set this if you want to disable that.') +parser.add_argument('--useDDP', default=True, type=bool, help='use DistributedDataParallel or not') +parser.add_argument('--seed', default=None, type=int, help='set PyTorch seed') + +parser.set_defaults(keep_latest=False, log=True, log_gpu=False, interrupt=True, autoscale=True) +args = parser.parse_args() + +# 设置配置文件,无用 +if args.config is not None: + set_cfg(args.config) + +# 设置项目数据集,无用 +if args.dataset is not None: + set_dataset(args.dataset) + +if args.data_path: + cfg.dataset.train_images = args.data_path + '/train2017/' + cfg.dataset.train_info = args.data_path + '/annotations/instances_train2017.json' + +if args.autoscale and args.batch_size != 8: + factor = args.batch_size / 8 + if __name__ == '__main__': + print('Scaling parameters by %.2f to account for a batch size of %d.' % (factor, args.batch_size)) + + cfg.lr *= factor + cfg.max_iter //= factor + cfg.lr_steps = [x // factor for x in cfg.lr_steps] + +if args.max_iter>0: + cfg.max_iter = args.max_iter + +# Update training parameters from the config if necessary +def replace(name): + if getattr(args, name) == None: setattr(args, name, getattr(cfg, name)) + + +# 将args中参数替换为config中预设的值,便于后续调用 +replace('lr') +replace('decay') +replace('gamma') +replace('momentum') + +# This is managed by set_lr +# 两个学习率都有用,在后续自动更新学习率中,可以使用 +cur_lr = args.lr + +# 检查环境 +if torch.npu.device_count() == 0: + print('No GPUs detected. Exiting...') + exit(-1) + +# 当一块显卡中的图像个数大于等于6时,才启用batch normalization +if args.batch_size // torch.npu.device_count() < 6 and (not args.useDDP): + if __name__ == '__main__': + print('Per-NPU batch size is less than the recommended limit for batch norm. Disabling batch norm.') + cfg.freeze_bn = True + +if args.seed is not None: + seed = args.seed + os.environ['PYTHONHASHSEED'] = str(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.npu.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.deterministic = True + print('Finish set seed, seed is :', seed) + +loss_types = ['B', 'C', 'M', 'P', 'D', 'E', 'S', 'I'] + +if torch.npu.is_available(): + print("npu environment is okay!, and current device count is", torch.npu.device_count()) + + +# if args.cuda: +# torch.set_default_tensor_type('torch.cuda.FloatTensor') +# if not args.cuda: +# print("WARNING: It looks like you have a CUDA device, but aren't " + +# "using CUDA.\nRun with --cuda for optimal training speed.") +# torch.set_default_tensor_type('torch.FloatTensor') +# else: +# torch.set_default_tensor_type('torch.FloatTensor') + +class NetLoss(nn.Module): + """ + A wrapper for running the network and computing the loss + This is so we can more efficiently use DataParallel. + + 损失函数模块,YOLACT只使用Multibox Loss,但单独封装NetLoss模块的目的是多卡训练 + """ + + def __init__(self, net: Yolact, criterion: MultiBoxLoss): + super().__init__() + + self.net = net + self.criterion = criterion + + def forward(self, images, targets, masks, num_crowds): + preds = self.net(images) + losses = self.criterion(self.net, preds, targets, masks, num_crowds) + return losses + + +class CustomDataParallel(nn.DataParallel): + """ + This is a custom version of DataParallel that works better with our training data. + It should also be faster than the general case. + """ + + def scatter(self, inputs, kwargs, device_ids): + # More like scatter and data prep at the same time. The point is we prep the data in such a way + # that no scatter is necessary, and there's no need to shuffle stuff around different GPUs. + devices = ['cuda:' + str(x) for x in device_ids] + splits = prepare_data(inputs[0], devices, allocation=args.batch_alloc) + + return [[split[device_idx] for split in splits] for device_idx in range(len(devices))], \ + [kwargs] * len(devices) + + def gather(self, outputs, output_device): + out = {} + + for k in outputs[0]: + out[k] = torch.stack([output[k].to(output_device) for output in outputs]) + + return out + + +class ScatterWrapper: + """ Input is any number of lists. This will preserve them through a dataparallel scatter. """ + + def __init__(self, *args): + for arg in args: + if not isinstance(arg, list): + print('Warning: ScatterWrapper got input of non-list type.') + self.args = args + self.batch_size = len(args[0]) + + def make_mask(self): + out = torch.Tensor(list(range(self.batch_size))).long() + if args.cuda: + return out.npu() + else: + return out + + def get_args(self, mask): + device = mask.device + mask = [int(x) for x in mask] + out_args = [[] for _ in self.args] + + for out, arg in zip(out_args, self.args): + for idx in mask: + x = arg[idx] + if isinstance(x, torch.Tensor): + x = x.to(device) + out.append(x) + + return out_args + + +def train(args): + # 创建模型权重文件存储目录 + if not os.path.exists(args.save_folder): + os.mkdir(args.save_folder) + + args.rank_id = int(os.environ['RANK_ID']) + args.world_size = int(os.environ['RANK_SIZE']) + args.device = torch.device(f'npu:{args.rank_id}') + torch.npu.set_device(args.device) + + args.is_master_node = args.world_size == 1 or args.rank_id == 0 + + if args.is_master_node: + print(args) + + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = '83215' + dist.init_process_group(backend='hccl', world_size=args.world_size, rank=args.rank_id) + + # 创建数据集,dataset为训练数据集 + dataset = COCODetection(image_path=cfg.dataset.train_images, + info_file=cfg.dataset.train_info, + transform=SSDAugmentation(MEANS)) + if args.world_size>1: + train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) + else: + train_sampler = None + if args.validation_epoch > 0: + # 调整eval.py脚本对应参数 + setup_eval() + + # 创建数据集,val_dataset为验证数据集,5000张图像 + val_dataset = COCODetection(image_path=cfg.dataset.valid_images, + info_file=cfg.dataset.valid_info, + transform=BaseTransform(MEANS)) + + # Parallel wraps the underlying module, but when saving and loading we don't want that + yolact_net = Yolact() + net = yolact_net + net.train() + + if args.log: + log = Log(cfg.name+'_time_'+time.strftime('%Y-%m-%d-%H-%M'), args.log_folder, + overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # 构造日志类 + + # I don't use the timer during training (I use a different timing method). + # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. + timer.disable_all() + + # Both of these can set args.resume to None, so do them before the check + if args.resume == 'interrupt': + args.resume = SavePath.get_interrupt(args.save_folder) + elif args.resume == 'latest': + args.resume = SavePath.get_latest(args.save_folder, cfg.name) + + if args.resume is not None and args.resume != '': + print('Resuming training, loading {}...'.format(args.resume)) + yolact_net.load_weights(args.resume) + + if args.start_iter == -1: + args.start_iter = SavePath.from_str(args.resume).iteration + else: + print('Initializing weights...') + yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) + + # 损失函数,multibox loss + # threshold : 门限阈值 + # pos_threshold 即为 : 高于这个值,那么就说明预测正确足够confident,即可以认为识别正确 + # pos_threshold 即为: 低于这个值,那么就可以自信认为识别错误 + + # ohem_negpos_ratio + criterion = MultiBoxLoss(num_classes=cfg.num_classes, + pos_threshold=cfg.positive_iou_threshold, + neg_threshold=cfg.negative_iou_threshold, + negpos_ratio=cfg.ohem_negpos_ratio) + + if args.batch_alloc is not None: + args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] + if sum(args.batch_alloc) != args.batch_size: + print('Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) + exit(-1) + + if args.cuda: + net = net.to(args.device) + criterion = criterion.to(args.device) + # 优化器SGD,随机梯度下降法 + optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) + net, optimizer = amp.initialize(net, optimizer, opt_level="O0", loss_scale=16) + net = nn.parallel.DistributedDataParallel(net, device_ids=[args.rank_id]) + else: + net = net.to('cpu') + net.src_device_obj = torch.device('cpu') + + # 优化器SGD,随机梯度下降法 + optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) + + # Initialize everything + if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means + if args.cuda: + yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).npu()) + else: + yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size)) + if not cfg.freeze_bn: yolact_net.freeze_bn(True) + + # loss counters + loc_loss = 0 + conf_loss = 0 + iteration = max(args.start_iter, 0) + last_time = time.time() + + epoch_size = len(dataset) // (args.batch_size * args.world_size) + num_epochs = math.ceil(cfg.max_iter / epoch_size) + + # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index + step_index = 0 + if args.world_size>1: + data_loader = data.DataLoader(dataset, args.batch_size, + num_workers=args.num_workers, + shuffle=False, + collate_fn=detection_collate, + pin_memory=True, sampler=train_sampler) + else: + data_loader = data.DataLoader(dataset, args.batch_size, + num_workers=args.num_workers, + shuffle=True, + collate_fn=detection_collate, + pin_memory=True) + + save_path = lambda epoch, iteration: SavePath(cfg.name, epoch, iteration).get_path(root=args.save_folder) + time_avg = MovingAverage() + + global loss_types # Forms the print order + loss_avgs = {k: MovingAverage(100) for k in loss_types} + + print('Begin training! NPU :', args.rank_id, '[', time.time(), ']') + print() + # try-except so you can use ctrl+c to save early and stop training + try: + for epoch in range(num_epochs): + # Resume from start_iter + if (epoch + 1) * epoch_size < iteration: + continue + if train_sampler: + train_sampler.set_epoch(epoch) + for idx, datum in enumerate(data_loader): + # Stop if we've reached an epoch if we're resuming from start_iter + if iteration == (epoch + 1) * epoch_size: + break + + # Stop at the configured number of iterations even if mid-epoch + if iteration == cfg.max_iter / args.world_size: + break + + # Change a config setting if we've reached the specified iteration + changed = False + for change in cfg.delayed_settings: + if iteration >= change[0]: + changed = True + cfg.replace(change[1]) + + # Reset the loss averages because things might have changed + for avg in loss_avgs: + avg.reset() + + # If a config setting was changed, remove it from the list so we don't keep checking + if changed: + cfg.delayed_settings = [x for x in cfg.delayed_settings if x[0] > iteration] + + # Warm up by linearly interpolating the learning rate from some smaller value + if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: + set_lr(optimizer, + (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) + + # Adjust the learning rate at the given iterations, but also if we resume from past that iteration + while step_index < len(cfg.lr_steps) and iteration >= cfg.lr_steps[step_index]: + step_index += 1 + set_lr(optimizer, args.lr * (args.gamma ** step_index)) + + prep_data_device = ['npu:' + str(args.rank_id)] + datum[0] = [item[1] for item in datum[0]] + images, targets, masks, num_crowds = prepare_data(datum, prep_data_device) + + out = net(images[0]) + optimizer.zero_grad() + wrapper = ScatterWrapper(targets, masks, num_crowds) + losses = criterion(net.module, out, wrapper, wrapper.make_mask()) + + losses = {k: (v).mean() for k, v in losses.items()} # Mean here because Dataparallel + loss = sum([losses[k] for k in losses]) + + # no_inf_mean removes some components from the loss, so make sure to backward through all of it + # all_loss = sum([v.mean() for v in losses.values()]) + + # Backprop + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + if torch.isfinite(loss).item(): + optimizer.step() + print('\t finish one step! NPU :', args.rank_id, '[', time.time(), ']') + + # Add the loss to the moving average for bookkeeping + for k in losses: + loss_avgs[k].add(losses[k].item()) + + cur_time = time.time() + elapsed = cur_time - last_time + last_time = cur_time + + # Exclude graph setup from the timing information + if iteration > args.fps_start_iter: + time_avg.add(elapsed) + + if iteration % 10 == 0: + eta_str = \ + str(datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] + + total = sum([loss_avgs[k].get_avg() for k in losses]) + loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) + + print(('[%3d] %7d ||' + ( + ' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f' + ' || NPU: ' + str( + args.rank_id)) + % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) + if args.log: + precision = 5 + loss_info = {k: round(losses[k].item(), precision) for k in losses} + loss_info['T'] = round(loss.item(), precision) + + if args.log_gpu: + log.log_gpu_stats = (iteration % 10 == 0) # nvidia-smi is sloooow + + log.log('train', loss=loss_info, epoch=epoch, iter=iteration, + lr=round(cur_lr, 10), elapsed=elapsed) + + log.log_gpu_stats = args.log_gpu + + iteration += 1 + + if iteration % args.save_interval == 0 and iteration != args.start_iter: + if args.keep_latest: + latest = SavePath.get_latest(args.save_folder, cfg.name) + + print('Saving state, iter:', iteration) + if args.is_master_node: + yolact_net.save_weights(save_path(epoch, iteration)) + + if args.keep_latest and latest is not None: + if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: + print('Deleting old save...') + os.remove(latest) + + # This is done per epoch + if args.validation_epoch > 0: + if epoch % args.validation_epoch == 0 and epoch > 0: + compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) + + log.log('FPS', fps=args.world_size * args.batch_size / time_avg.get_avg()) + print('FPS', args.world_size * args.batch_size / time_avg.get_avg()) + + # Compute validation mAP after training is finished + # compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) + except KeyboardInterrupt: + if args.interrupt: + print('Stopping early. Saving network...') + + # Delete previous copy of the interrupted network so we don't spam the weights folder + SavePath.remove_interrupt(args.save_folder) + + if args.is_master_node: + yolact_net.save_weights(save_path(epoch, repr(iteration) + '_interrupt')) + exit() + + if args.is_master_node: + yolact_net.save_weights('./weights/yolact_plus.pth') + + +def set_lr(optimizer, new_lr): + for param_group in optimizer.param_groups: + param_group['lr'] = new_lr + + global cur_lr + cur_lr = new_lr + + +def gradinator(x): + x.requires_grad = False + return x + + +def prepare_data(datum, devices: list = None, allocation: list = None): + with torch.no_grad(): + if devices is None: + devices = ['npu:0'] if args.cuda else ['cpu'] + if allocation is None: + allocation = [args.batch_size // len(devices)] * (len(devices) - 1) + allocation.append(args.batch_size - sum(allocation)) # The rest might need more/less + + images, (targets, masks, num_crowds) = datum + + cur_idx = 0 + for device, alloc in zip(devices, allocation): + for _ in range(alloc): + images[cur_idx] = gradinator(images[cur_idx].to(device)) + targets[cur_idx] = gradinator(targets[cur_idx].to(device)) + masks[cur_idx] = gradinator(masks[cur_idx].to(device)) + cur_idx += 1 + + if cfg.preserve_aspect_ratio: + # Choose a random size from the batch + _, h, w = images[random.randint(0, len(images) - 1)].size() + + for idx, (image, target, mask, num_crowd) in enumerate(zip(images, targets, masks, num_crowds)): + images[idx], targets[idx], masks[idx], num_crowds[idx] \ + = enforce_size(image, target, mask, num_crowd, w, h) + + cur_idx = 0 + split_images, split_targets, split_masks, split_numcrowds \ + = [[None for alloc in allocation] for _ in range(4)] + + for device_idx, alloc in enumerate(allocation): + split_images[device_idx] = torch.stack(images[cur_idx:cur_idx + alloc], dim=0) + split_targets[device_idx] = targets[cur_idx:cur_idx + alloc] + split_masks[device_idx] = masks[cur_idx:cur_idx + alloc] + split_numcrowds[device_idx] = num_crowds[cur_idx:cur_idx + alloc] + + cur_idx += alloc + + return split_images, split_targets, split_masks, split_numcrowds + + +def no_inf_mean(x: torch.Tensor): + """ + Computes the mean of a vector, throwing out all inf values. + If there are no non-inf values, this will return inf (i.e., just the normal mean). + """ + + no_inf = [a for a in x if torch.isfinite(a)] + + if len(no_inf) > 0: + return sum(no_inf) / len(no_inf) + else: + return x.mean() + + +def compute_validation_loss(net, data_loader, criterion): + global loss_types + + with torch.no_grad(): + losses = {} + + # Don't switch to eval mode because we want to get losses + iterations = 0 + for datum in data_loader: + images, targets, masks, num_crowds = prepare_data(datum) + out = net(images) + + wrapper = ScatterWrapper(targets, masks, num_crowds) + _losses = criterion(out, wrapper, wrapper.make_mask()) + + for k, v in _losses.items(): + v = v.mean().item() + if k in losses: + losses[k] += v + else: + losses[k] = v + + iterations += 1 + if args.validation_size <= iterations * args.batch_size: + break + + for k in losses: + losses[k] /= iterations + + loss_labels = sum([[k, losses[k]] for k in loss_types if k in losses], []) + print(('Validation ||' + (' %s: %.3f |' * len(losses)) + ')') % tuple(loss_labels), flush=True) + + +def compute_validation_map(epoch, iteration, yolact_net, dataset, log: Log = None): + with torch.no_grad(): + yolact_net.eval() + + start = time.time() + print() + print("Computing validation mAP (this may take a while)...", flush=True) + val_info = eval_script.evaluate(yolact_net, dataset, train_mode=True, trainCuda=args.cuda) + end = time.time() + + if log is not None: + log.log('val', val_info, elapsed=(end - start), epoch=epoch, iter=iteration) + + yolact_net.train() + + +def setup_eval(): + eval_script.parse_args(['--no_bar', '--max_images=' + str(args.validation_size)]) + + +if __name__ == '__main__': + train(args) diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/utils/augmentations.py b/PyTorch/contrib/cv/detection/YOLACT_plus/utils/augmentations.py index 5f32d182af0fae1b331e98d649c7b5bd05226e83..f18fd70932c38750b1f29247ea7f7aa8300be91b 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/utils/augmentations.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/utils/augmentations.py @@ -1,702 +1,702 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch -from torchvision import transforms -import cv2 -import numpy as np -import types -from numpy import random -from math import sqrt - -from data import cfg, MEANS, STD - - -def intersect(box_a, box_b): - max_xy = np.minimum(box_a[:, 2:], box_b[2:]) - min_xy = np.maximum(box_a[:, :2], box_b[:2]) - inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf) - return inter[:, 0] * inter[:, 1] - - -def jaccard_numpy(box_a, box_b): - """Compute the jaccard overlap of two sets of boxes. The jaccard overlap - is simply the intersection over union of two boxes. - E.g.: - A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) - Args: - box_a: Multiple bounding boxes, Shape: [num_boxes,4] - box_b: Single bounding box, Shape: [4] - Return: - jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]] - """ - inter = intersect(box_a, box_b) - area_a = ((box_a[:, 2]-box_a[:, 0]) * - (box_a[:, 3]-box_a[:, 1])) # [A,B] - area_b = ((box_b[2]-box_b[0]) * - (box_b[3]-box_b[1])) # [A,B] - union = area_a + area_b - inter - return inter / union # [A,B] - - -class Compose(object): - """Composes several augmentations together. - Args: - transforms (List[Transform]): list of transforms to compose. - Example: - >>> augmentations.Compose([ - >>> transforms.CenterCrop(10), - >>> transforms.ToTensor(), - >>> ]) - """ - - def __init__(self, transforms): - self.transforms = transforms - - def __call__(self, img, masks=None, boxes=None, labels=None): - for t in self.transforms: - img, masks, boxes, labels = t(img, masks, boxes, labels) - return img, masks, boxes, labels - - -class Lambda(object): - """Applies a lambda as a transform.""" - - def __init__(self, lambd): - assert isinstance(lambd, types.LambdaType) - self.lambd = lambd - - def __call__(self, img, masks=None, boxes=None, labels=None): - return self.lambd(img, masks, boxes, labels) - - -class ConvertFromInts(object): - def __call__(self, image, masks=None, boxes=None, labels=None): - return image.astype(np.float32), masks, boxes, labels - - - -class ToAbsoluteCoords(object): - def __call__(self, image, masks=None, boxes=None, labels=None): - height, width, channels = image.shape - boxes[:, 0] *= width - boxes[:, 2] *= width - boxes[:, 1] *= height - boxes[:, 3] *= height - - return image, masks, boxes, labels - - -class ToPercentCoords(object): - def __call__(self, image, masks=None, boxes=None, labels=None): - height, width, channels = image.shape - boxes[:, 0] /= width - boxes[:, 2] /= width - boxes[:, 1] /= height - boxes[:, 3] /= height - - return image, masks, boxes, labels - - -class Pad(object): - """ - Pads the image to the input width and height, filling the - background with mean and putting the image in the top-left. - - Note: this expects im_w <= width and im_h <= height - """ - def __init__(self, width, height, mean=MEANS, pad_gt=True): - self.mean = mean - self.width = width - self.height = height - self.pad_gt = pad_gt - - def __call__(self, image, masks, boxes=None, labels=None): - im_h, im_w, depth = image.shape - - expand_image = np.zeros( - (self.height, self.width, depth), - dtype=image.dtype) - expand_image[:, :, :] = self.mean - expand_image[:im_h, :im_w] = image - - if self.pad_gt: - expand_masks = np.zeros( - (masks.shape[0], self.height, self.width), - dtype=masks.dtype) - expand_masks[:,:im_h,:im_w] = masks - masks = expand_masks - - return expand_image, masks, boxes, labels - -class Resize(object): - """ If preserve_aspect_ratio is true, this resizes to an approximate area of max_size * max_size """ - - @staticmethod - def calc_size_preserve_ar(img_w, img_h, max_size): - """ I mathed this one out on the piece of paper. Resulting width*height = approx max_size^2 """ - ratio = sqrt(img_w / img_h) - w = max_size * ratio - h = max_size / ratio - return int(w), int(h) - - def __init__(self, resize_gt=True): - self.resize_gt = resize_gt - self.max_size = cfg.max_size - self.preserve_aspect_ratio = cfg.preserve_aspect_ratio - - def __call__(self, image, masks, boxes, labels=None): - img_h, img_w, _ = image.shape - - if self.preserve_aspect_ratio: - width, height = Resize.calc_size_preserve_ar(img_w, img_h, self.max_size) - else: - width, height = self.max_size, self.max_size - - image = cv2.resize(image, (width, height)) - - if self.resize_gt: - # Act like each object is a color channel - masks = masks.transpose((1, 2, 0)) - masks = cv2.resize(masks, (width, height)) - - # OpenCV resizes a (w,h,1) array to (s,s), so fix that - if len(masks.shape) == 2: - masks = np.expand_dims(masks, 0) - else: - masks = masks.transpose((2, 0, 1)) - - # Scale bounding boxes (which are currently absolute coordinates) - boxes[:, [0, 2]] *= (width / img_w) - boxes[:, [1, 3]] *= (height / img_h) - - # Discard boxes that are smaller than we'd like - w = boxes[:, 2] - boxes[:, 0] - h = boxes[:, 3] - boxes[:, 1] - - keep = (w > cfg.discard_box_width) * (h > cfg.discard_box_height) - masks = masks[keep] - boxes = boxes[keep] - labels['labels'] = labels['labels'][keep] - labels['num_crowds'] = (labels['labels'] < 0).sum() - - return image, masks, boxes, labels - - -class RandomSaturation(object): - def __init__(self, lower=0.5, upper=1.5): - self.lower = lower - self.upper = upper - assert self.upper >= self.lower, "contrast upper must be >= lower." - assert self.lower >= 0, "contrast lower must be non-negative." - - def __call__(self, image, masks=None, boxes=None, labels=None): - if random.randint(2): - image[:, :, 1] *= random.uniform(self.lower, self.upper) - - return image, masks, boxes, labels - - -class RandomHue(object): - def __init__(self, delta=18.0): - assert delta >= 0.0 and delta <= 360.0 - self.delta = delta - - def __call__(self, image, masks=None, boxes=None, labels=None): - if random.randint(2): - image[:, :, 0] += random.uniform(-self.delta, self.delta) - image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0 - image[:, :, 0][image[:, :, 0] < 0.0] += 360.0 - return image, masks, boxes, labels - - -class RandomLightingNoise(object): - def __init__(self): - self.perms = ((0, 1, 2), (0, 2, 1), - (1, 0, 2), (1, 2, 0), - (2, 0, 1), (2, 1, 0)) - - def __call__(self, image, masks=None, boxes=None, labels=None): - # Don't shuffle the channels please, why would you do this - - # if random.randint(2): - # swap = self.perms[random.randint(len(self.perms))] - # shuffle = SwapChannels(swap) # shuffle channels - # image = shuffle(image) - return image, masks, boxes, labels - - -class ConvertColor(object): - def __init__(self, current='BGR', transform='HSV'): - self.transform = transform - self.current = current - - def __call__(self, image, masks=None, boxes=None, labels=None): - if self.current == 'BGR' and self.transform == 'HSV': - image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) - elif self.current == 'HSV' and self.transform == 'BGR': - image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) - else: - raise NotImplementedError - return image, masks, boxes, labels - - -class RandomContrast(object): - def __init__(self, lower=0.5, upper=1.5): - self.lower = lower - self.upper = upper - assert self.upper >= self.lower, "contrast upper must be >= lower." - assert self.lower >= 0, "contrast lower must be non-negative." - - # expects float image - def __call__(self, image, masks=None, boxes=None, labels=None): - if random.randint(2): - alpha = random.uniform(self.lower, self.upper) - image *= alpha - return image, masks, boxes, labels - - -class RandomBrightness(object): - def __init__(self, delta=32): - assert delta >= 0.0 - assert delta <= 255.0 - self.delta = delta - - def __call__(self, image, masks=None, boxes=None, labels=None): - if random.randint(2): - delta = random.uniform(-self.delta, self.delta) - image += delta - return image, masks, boxes, labels - - -class ToCV2Image(object): - def __call__(self, tensor, masks=None, boxes=None, labels=None): - return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), masks, boxes, labels - - -class ToTensor(object): - def __call__(self, cvimage, masks=None, boxes=None, labels=None): - return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), masks, boxes, labels - - -class RandomSampleCrop(object): - """Crop - Arguments: - img (Image): the image being input during training - boxes (Tensor): the original bounding boxes in pt form - labels (Tensor): the class labels for each bbox - mode (float tuple): the min and max jaccard overlaps - Return: - (img, boxes, classes) - img (Image): the cropped image - boxes (Tensor): the adjusted bounding boxes in pt form - labels (Tensor): the class labels for each bbox - """ - def __init__(self): - self.sample_options = ( - # using entire original input image - None, - # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9 - (0.1, None), - (0.3, None), - (0.7, None), - (0.9, None), - # randomly sample a patch - (None, None), - ) - - def __call__(self, image, masks, boxes=None, labels=None): - height, width, _ = image.shape - while True: - # randomly choose a mode - mode = random.choice(self.sample_options) - if mode is None: - return image, masks, boxes, labels - - min_iou, max_iou = mode - if min_iou is None: - min_iou = float('-inf') - if max_iou is None: - max_iou = float('inf') - - # max trails (50) - for _ in range(50): - current_image = image - - w = random.uniform(0.3 * width, width) - h = random.uniform(0.3 * height, height) - - # aspect ratio constraint b/t .5 & 2 - if h / w < 0.5 or h / w > 2: - continue - - left = random.uniform(width - w) - top = random.uniform(height - h) - - # convert to integer rect x1,y1,x2,y2 - rect = np.array([int(left), int(top), int(left+w), int(top+h)]) - - # calculate IoU (jaccard overlap) b/t the cropped and gt boxes - overlap = jaccard_numpy(boxes, rect) - - # This piece of code is bugged and does nothing: - # https://github.com/amdegroot/ssd.pytorch/issues/68 - # - # However, when I fixed it with overlap.max() < min_iou, - # it cut the mAP in half (after 8k iterations). So it stays. - # - # is min and max overlap constraint satisfied? if not try again - if overlap.min() < min_iou and max_iou < overlap.max(): - continue - - # cut the crop from the image - current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], - :] - - # keep overlap with gt box IF center in sampled patch - centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 - - # mask in all gt boxes that above and to the left of centers - m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1]) - - # mask in all gt boxes that under and to the right of centers - m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1]) - - # mask in that both m1 and m2 are true - mask = m1 * m2 - - # [0 ... 0 for num_gt and then 1 ... 1 for num_crowds] - num_crowds = labels['num_crowds'] - crowd_mask = np.zeros(mask.shape, dtype=np.int32) - - if num_crowds > 0: - crowd_mask[-num_crowds:] = 1 - - # have any valid boxes? try again if not - # Also make sure you have at least one regular gt - if not mask.any() or np.sum(1-crowd_mask[mask]) == 0: - continue - - # take only the matching gt masks - current_masks = masks[mask, :, :].copy() - - # take only matching gt boxes - current_boxes = boxes[mask, :].copy() - - # take only matching gt labels - labels['labels'] = labels['labels'][mask] - current_labels = labels - - # We now might have fewer crowd annotations - if num_crowds > 0: - labels['num_crowds'] = np.sum(crowd_mask[mask]) - - # should we use the box left and top corner or the crop's - current_boxes[:, :2] = np.maximum(current_boxes[:, :2], - rect[:2]) - # adjust to crop (by substracting crop's left,top) - current_boxes[:, :2] -= rect[:2] - - current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], - rect[2:]) - # adjust to crop (by substracting crop's left,top) - current_boxes[:, 2:] -= rect[:2] - - # crop the current masks to the same dimensions as the image - current_masks = current_masks[:, rect[1]:rect[3], rect[0]:rect[2]] - - return current_image, current_masks, current_boxes, current_labels - - -class Expand(object): - def __init__(self, mean): - self.mean = mean - - def __call__(self, image, masks, boxes, labels): - if random.randint(2): - return image, masks, boxes, labels - - height, width, depth = image.shape - ratio = random.uniform(1, 4) - left = random.uniform(0, width*ratio - width) - top = random.uniform(0, height*ratio - height) - - expand_image = np.zeros( - (int(height*ratio), int(width*ratio), depth), - dtype=image.dtype) - expand_image[:, :, :] = self.mean - expand_image[int(top):int(top + height), - int(left):int(left + width)] = image - image = expand_image - - expand_masks = np.zeros( - (masks.shape[0], int(height*ratio), int(width*ratio)), - dtype=masks.dtype) - expand_masks[:,int(top):int(top + height), - int(left):int(left + width)] = masks - masks = expand_masks - - boxes = boxes.copy() - boxes[:, :2] += (int(left), int(top)) - boxes[:, 2:] += (int(left), int(top)) - - return image, masks, boxes, labels - - -class RandomMirror(object): - def __call__(self, image, masks, boxes, labels): - _, width, _ = image.shape - if random.randint(2): - image = image[:, ::-1] - masks = masks[:, :, ::-1] - boxes = boxes.copy() - boxes[:, 0::2] = width - boxes[:, 2::-2] - return image, masks, boxes, labels - - -class RandomFlip(object): - def __call__(self, image, masks, boxes, labels): - height , _ , _ = image.shape - if random.randint(2): - image = image[::-1, :] - masks = masks[:, ::-1, :] - boxes = boxes.copy() - boxes[:, 1::2] = height - boxes[:, 3::-2] - return image, masks, boxes, labels - - -class RandomRot90(object): - def __call__(self, image, masks, boxes, labels): - old_height , old_width , _ = image.shape - k = random.randint(4) - image = np.rot90(image,k) - masks = np.array([np.rot90(mask,k) for mask in masks]) - boxes = boxes.copy() - for _ in range(k): - boxes = np.array([[box[1], old_width - 1 - box[2], box[3], old_width - 1 - box[0]] for box in boxes]) - old_width, old_height = old_height, old_width - return image, masks, boxes, labels - - -class SwapChannels(object): - """Transforms a tensorized image by swapping the channels in the order - specified in the swap tuple. - Args: - swaps (int triple): final order of channels - eg: (2, 1, 0) - """ - - def __init__(self, swaps): - self.swaps = swaps - - def __call__(self, image): - """ - Args: - image (Tensor): image tensor to be transformed - Return: - a tensor with channels swapped according to swap - """ - # if torch.is_tensor(image): - # image = image.data.cpu().numpy() - # else: - # image = np.array(image) - image = image[:, :, self.swaps] - return image - - -class PhotometricDistort(object): - def __init__(self): - self.pd = [ - RandomContrast(), - ConvertColor(transform='HSV'), - RandomSaturation(), - RandomHue(), - ConvertColor(current='HSV', transform='BGR'), - RandomContrast() - ] - self.rand_brightness = RandomBrightness() - self.rand_light_noise = RandomLightingNoise() - - def __call__(self, image, masks, boxes, labels): - im = image.copy() - im, masks, boxes, labels = self.rand_brightness(im, masks, boxes, labels) - if random.randint(2): - distort = Compose(self.pd[:-1]) - else: - distort = Compose(self.pd[1:]) - im, masks, boxes, labels = distort(im, masks, boxes, labels) - return self.rand_light_noise(im, masks, boxes, labels) - -class PrepareMasks(object): - """ - Prepares the gt masks for use_gt_bboxes by cropping with the gt box - and downsampling the resulting mask to mask_size, mask_size. This - function doesn't do anything if cfg.use_gt_bboxes is False. - """ - - def __init__(self, mask_size, use_gt_bboxes): - self.mask_size = mask_size - self.use_gt_bboxes = use_gt_bboxes - - def __call__(self, image, masks, boxes, labels=None): - if not self.use_gt_bboxes: - return image, masks, boxes, labels - - height, width, _ = image.shape - - new_masks = np.zeros((masks.shape[0], self.mask_size ** 2)) - - for i in range(len(masks)): - x1, y1, x2, y2 = boxes[i, :] - x1 *= width - x2 *= width - y1 *= height - y2 *= height - x1, y1, x2, y2 = (int(x1), int(y1), int(x2), int(y2)) - - # +1 So that if y1=10.6 and y2=10.9 we still have a bounding box - cropped_mask = masks[i, y1:(y2+1), x1:(x2+1)] - scaled_mask = cv2.resize(cropped_mask, (self.mask_size, self.mask_size)) - - new_masks[i, :] = scaled_mask.reshape(1, -1) - - # Binarize - new_masks[new_masks > 0.5] = 1 - new_masks[new_masks <= 0.5] = 0 - - return image, new_masks, boxes, labels - -class BackboneTransform(object): - """ - Transforms a BRG image made of floats in the range [0, 255] to whatever - input the current backbone network needs. - - transform is a transform config object (see config.py). - in_channel_order is probably 'BGR' but you do you, kid. - """ - def __init__(self, transform, mean, std, in_channel_order): - self.mean = np.array(mean, dtype=np.float32) - self.std = np.array(std, dtype=np.float32) - self.transform = transform - - # Here I use "Algorithms and Coding" to convert string permutations to numbers - self.channel_map = {c: idx for idx, c in enumerate(in_channel_order)} - self.channel_permutation = [self.channel_map[c] for c in transform.channel_order] - - def __call__(self, img, masks=None, boxes=None, labels=None): - - img = img.astype(np.float32) - - if self.transform.normalize: - img = (img - self.mean) / self.std - elif self.transform.subtract_means: - img = (img - self.mean) - elif self.transform.to_float: - img = img / 255 - - img = img[:, :, self.channel_permutation] - - return img.astype(np.float32), masks, boxes, labels - - - - -class BaseTransform(object): - """ Transorm to be used when evaluating. """ - - def __init__(self, mean=MEANS, std=STD): - self.augment = Compose([ - ConvertFromInts(), - Resize(resize_gt=False), - BackboneTransform(cfg.backbone.transform, mean, std, 'BGR') - ]) - - def __call__(self, img, masks=None, boxes=None, labels=None): - return self.augment(img, masks, boxes, labels) - -import torch.nn.functional as F - -class FastBaseTransform(torch.nn.Module): - """ - Transform that does all operations on the GPU for super speed. - This doesn't suppport a lot of config settings and should only be used for production. - Maintain this as necessary. - """ - - def __init__(self): - super().__init__() - - self.mean = torch.Tensor(MEANS).float().cuda()[None, :, None, None] - self.std = torch.Tensor( STD ).float().cuda()[None, :, None, None] - self.transform = cfg.backbone.transform - - def forward(self, img): - self.mean = self.mean.to(img.device) - self.std = self.std.to(img.device) - - # img assumed to be a pytorch BGR image with channel order [n, h, w, c] - if cfg.preserve_aspect_ratio: - _, h, w, _ = img.size() - img_size = Resize.calc_size_preserve_ar(w, h, cfg.max_size) - img_size = (img_size[1], img_size[0]) # Pytorch needs h, w - else: - img_size = (cfg.max_size, cfg.max_size) - - img = img.permute(0, 3, 1, 2).contiguous() - img = F.interpolate(img, img_size, mode='bilinear', align_corners=False) - - if self.transform.normalize: - img = (img - self.mean) / self.std - elif self.transform.subtract_means: - img = (img - self.mean) - elif self.transform.to_float: - img = img / 255 - - if self.transform.channel_order != 'RGB': - raise NotImplementedError - - img = img[:, (2, 1, 0), :, :].contiguous() - - # Return value is in channel order [n, c, h, w] and RGB - return img - -def do_nothing(img=None, masks=None, boxes=None, labels=None): - return img, masks, boxes, labels - - -def enable_if(condition, obj): - return obj if condition else do_nothing - -class SSDAugmentation(object): - """ Transform to be used when training. """ - - def __init__(self, mean=MEANS, std=STD): - self.augment = Compose([ - ConvertFromInts(), - ToAbsoluteCoords(), - enable_if(cfg.augment_photometric_distort, PhotometricDistort()), - enable_if(cfg.augment_expand, Expand(mean)), - enable_if(cfg.augment_random_sample_crop, RandomSampleCrop()), - enable_if(cfg.augment_random_mirror, RandomMirror()), - enable_if(cfg.augment_random_flip, RandomFlip()), - enable_if(cfg.augment_random_flip, RandomRot90()), - Resize(), - enable_if(not cfg.preserve_aspect_ratio, Pad(cfg.max_size, cfg.max_size, mean)), - ToPercentCoords(), - PrepareMasks(cfg.mask_size, cfg.use_gt_bboxes), - BackboneTransform(cfg.backbone.transform, mean, std, 'BGR') - ]) - - def __call__(self, img, masks, boxes, labels): - return self.augment(img, masks, boxes, labels) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch +from torchvision import transforms +import cv2 +import numpy as np +import types +from numpy import random +from math import sqrt + +from data import cfg, MEANS, STD + + +def intersect(box_a, box_b): + max_xy = np.minimum(box_a[:, 2:], box_b[2:]) + min_xy = np.maximum(box_a[:, :2], box_b[:2]) + inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf) + return inter[:, 0] * inter[:, 1] + + +def jaccard_numpy(box_a, box_b): + """Compute the jaccard overlap of two sets of boxes. The jaccard overlap + is simply the intersection over union of two boxes. + E.g.: + A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) + Args: + box_a: Multiple bounding boxes, Shape: [num_boxes,4] + box_b: Single bounding box, Shape: [4] + Return: + jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]] + """ + inter = intersect(box_a, box_b) + area_a = ((box_a[:, 2]-box_a[:, 0]) * + (box_a[:, 3]-box_a[:, 1])) # [A,B] + area_b = ((box_b[2]-box_b[0]) * + (box_b[3]-box_b[1])) # [A,B] + union = area_a + area_b - inter + return inter / union # [A,B] + + +class Compose(object): + """Composes several augmentations together. + Args: + transforms (List[Transform]): list of transforms to compose. + Example: + >>> augmentations.Compose([ + >>> transforms.CenterCrop(10), + >>> transforms.ToTensor(), + >>> ]) + """ + + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, img, masks=None, boxes=None, labels=None): + for t in self.transforms: + img, masks, boxes, labels = t(img, masks, boxes, labels) + return img, masks, boxes, labels + + +class Lambda(object): + """Applies a lambda as a transform.""" + + def __init__(self, lambd): + assert isinstance(lambd, types.LambdaType) + self.lambd = lambd + + def __call__(self, img, masks=None, boxes=None, labels=None): + return self.lambd(img, masks, boxes, labels) + + +class ConvertFromInts(object): + def __call__(self, image, masks=None, boxes=None, labels=None): + return image.astype(np.float32), masks, boxes, labels + + + +class ToAbsoluteCoords(object): + def __call__(self, image, masks=None, boxes=None, labels=None): + height, width, channels = image.shape + boxes[:, 0] *= width + boxes[:, 2] *= width + boxes[:, 1] *= height + boxes[:, 3] *= height + + return image, masks, boxes, labels + + +class ToPercentCoords(object): + def __call__(self, image, masks=None, boxes=None, labels=None): + height, width, channels = image.shape + boxes[:, 0] /= width + boxes[:, 2] /= width + boxes[:, 1] /= height + boxes[:, 3] /= height + + return image, masks, boxes, labels + + +class Pad(object): + """ + Pads the image to the input width and height, filling the + background with mean and putting the image in the top-left. + + Note: this expects im_w <= width and im_h <= height + """ + def __init__(self, width, height, mean=MEANS, pad_gt=True): + self.mean = mean + self.width = width + self.height = height + self.pad_gt = pad_gt + + def __call__(self, image, masks, boxes=None, labels=None): + im_h, im_w, depth = image.shape + + expand_image = np.zeros( + (self.height, self.width, depth), + dtype=image.dtype) + expand_image[:, :, :] = self.mean + expand_image[:im_h, :im_w] = image + + if self.pad_gt: + expand_masks = np.zeros( + (masks.shape[0], self.height, self.width), + dtype=masks.dtype) + expand_masks[:,:im_h,:im_w] = masks + masks = expand_masks + + return expand_image, masks, boxes, labels + +class Resize(object): + """ If preserve_aspect_ratio is true, this resizes to an approximate area of max_size * max_size """ + + @staticmethod + def calc_size_preserve_ar(img_w, img_h, max_size): + """ I mathed this one out on the piece of paper. Resulting width*height = approx max_size^2 """ + ratio = sqrt(img_w / img_h) + w = max_size * ratio + h = max_size / ratio + return int(w), int(h) + + def __init__(self, resize_gt=True): + self.resize_gt = resize_gt + self.max_size = cfg.max_size + self.preserve_aspect_ratio = cfg.preserve_aspect_ratio + + def __call__(self, image, masks, boxes, labels=None): + img_h, img_w, _ = image.shape + + if self.preserve_aspect_ratio: + width, height = Resize.calc_size_preserve_ar(img_w, img_h, self.max_size) + else: + width, height = self.max_size, self.max_size + + image = cv2.resize(image, (width, height)) + + if self.resize_gt: + # Act like each object is a color channel + masks = masks.transpose((1, 2, 0)) + masks = cv2.resize(masks, (width, height)) + + # OpenCV resizes a (w,h,1) array to (s,s), so fix that + if len(masks.shape) == 2: + masks = np.expand_dims(masks, 0) + else: + masks = masks.transpose((2, 0, 1)) + + # Scale bounding boxes (which are currently absolute coordinates) + boxes[:, [0, 2]] *= (width / img_w) + boxes[:, [1, 3]] *= (height / img_h) + + # Discard boxes that are smaller than we'd like + w = boxes[:, 2] - boxes[:, 0] + h = boxes[:, 3] - boxes[:, 1] + + keep = (w > cfg.discard_box_width) * (h > cfg.discard_box_height) + masks = masks[keep] + boxes = boxes[keep] + labels['labels'] = labels['labels'][keep] + labels['num_crowds'] = (labels['labels'] < 0).sum() + + return image, masks, boxes, labels + + +class RandomSaturation(object): + def __init__(self, lower=0.5, upper=1.5): + self.lower = lower + self.upper = upper + assert self.upper >= self.lower, "contrast upper must be >= lower." + assert self.lower >= 0, "contrast lower must be non-negative." + + def __call__(self, image, masks=None, boxes=None, labels=None): + if random.randint(2): + image[:, :, 1] *= random.uniform(self.lower, self.upper) + + return image, masks, boxes, labels + + +class RandomHue(object): + def __init__(self, delta=18.0): + assert delta >= 0.0 and delta <= 360.0 + self.delta = delta + + def __call__(self, image, masks=None, boxes=None, labels=None): + if random.randint(2): + image[:, :, 0] += random.uniform(-self.delta, self.delta) + image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0 + image[:, :, 0][image[:, :, 0] < 0.0] += 360.0 + return image, masks, boxes, labels + + +class RandomLightingNoise(object): + def __init__(self): + self.perms = ((0, 1, 2), (0, 2, 1), + (1, 0, 2), (1, 2, 0), + (2, 0, 1), (2, 1, 0)) + + def __call__(self, image, masks=None, boxes=None, labels=None): + # Don't shuffle the channels please, why would you do this + + # if random.randint(2): + # swap = self.perms[random.randint(len(self.perms))] + # shuffle = SwapChannels(swap) # shuffle channels + # image = shuffle(image) + return image, masks, boxes, labels + + +class ConvertColor(object): + def __init__(self, current='BGR', transform='HSV'): + self.transform = transform + self.current = current + + def __call__(self, image, masks=None, boxes=None, labels=None): + if self.current == 'BGR' and self.transform == 'HSV': + image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + elif self.current == 'HSV' and self.transform == 'BGR': + image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) + else: + raise NotImplementedError + return image, masks, boxes, labels + + +class RandomContrast(object): + def __init__(self, lower=0.5, upper=1.5): + self.lower = lower + self.upper = upper + assert self.upper >= self.lower, "contrast upper must be >= lower." + assert self.lower >= 0, "contrast lower must be non-negative." + + # expects float image + def __call__(self, image, masks=None, boxes=None, labels=None): + if random.randint(2): + alpha = random.uniform(self.lower, self.upper) + image *= alpha + return image, masks, boxes, labels + + +class RandomBrightness(object): + def __init__(self, delta=32): + assert delta >= 0.0 + assert delta <= 255.0 + self.delta = delta + + def __call__(self, image, masks=None, boxes=None, labels=None): + if random.randint(2): + delta = random.uniform(-self.delta, self.delta) + image += delta + return image, masks, boxes, labels + + +class ToCV2Image(object): + def __call__(self, tensor, masks=None, boxes=None, labels=None): + return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), masks, boxes, labels + + +class ToTensor(object): + def __call__(self, cvimage, masks=None, boxes=None, labels=None): + return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), masks, boxes, labels + + +class RandomSampleCrop(object): + """Crop + Arguments: + img (Image): the image being input during training + boxes (Tensor): the original bounding boxes in pt form + labels (Tensor): the class labels for each bbox + mode (float tuple): the min and max jaccard overlaps + Return: + (img, boxes, classes) + img (Image): the cropped image + boxes (Tensor): the adjusted bounding boxes in pt form + labels (Tensor): the class labels for each bbox + """ + def __init__(self): + self.sample_options = ( + # using entire original input image + None, + # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9 + (0.1, None), + (0.3, None), + (0.7, None), + (0.9, None), + # randomly sample a patch + (None, None), + ) + + def __call__(self, image, masks, boxes=None, labels=None): + height, width, _ = image.shape + while True: + # randomly choose a mode + mode = random.choice(self.sample_options) + if mode is None: + return image, masks, boxes, labels + + min_iou, max_iou = mode + if min_iou is None: + min_iou = float('-inf') + if max_iou is None: + max_iou = float('inf') + + # max trails (50) + for _ in range(50): + current_image = image + + w = random.uniform(0.3 * width, width) + h = random.uniform(0.3 * height, height) + + # aspect ratio constraint b/t .5 & 2 + if h / w < 0.5 or h / w > 2: + continue + + left = random.uniform(width - w) + top = random.uniform(height - h) + + # convert to integer rect x1,y1,x2,y2 + rect = np.array([int(left), int(top), int(left+w), int(top+h)]) + + # calculate IoU (jaccard overlap) b/t the cropped and gt boxes + overlap = jaccard_numpy(boxes, rect) + + # This piece of code is bugged and does nothing: + # https://github.com/amdegroot/ssd.pytorch/issues/68 + # + # However, when I fixed it with overlap.max() < min_iou, + # it cut the mAP in half (after 8k iterations). So it stays. + # + # is min and max overlap constraint satisfied? if not try again + if overlap.min() < min_iou and max_iou < overlap.max(): + continue + + # cut the crop from the image + current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], + :] + + # keep overlap with gt box IF center in sampled patch + centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 + + # mask in all gt boxes that above and to the left of centers + m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1]) + + # mask in all gt boxes that under and to the right of centers + m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1]) + + # mask in that both m1 and m2 are true + mask = m1 * m2 + + # [0 ... 0 for num_gt and then 1 ... 1 for num_crowds] + num_crowds = labels['num_crowds'] + crowd_mask = np.zeros(mask.shape, dtype=np.int32) + + if num_crowds > 0: + crowd_mask[-num_crowds:] = 1 + + # have any valid boxes? try again if not + # Also make sure you have at least one regular gt + if not mask.any() or np.sum(1-crowd_mask[mask]) == 0: + continue + + # take only the matching gt masks + current_masks = masks[mask, :, :].copy() + + # take only matching gt boxes + current_boxes = boxes[mask, :].copy() + + # take only matching gt labels + labels['labels'] = labels['labels'][mask] + current_labels = labels + + # We now might have fewer crowd annotations + if num_crowds > 0: + labels['num_crowds'] = np.sum(crowd_mask[mask]) + + # should we use the box left and top corner or the crop's + current_boxes[:, :2] = np.maximum(current_boxes[:, :2], + rect[:2]) + # adjust to crop (by substracting crop's left,top) + current_boxes[:, :2] -= rect[:2] + + current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], + rect[2:]) + # adjust to crop (by substracting crop's left,top) + current_boxes[:, 2:] -= rect[:2] + + # crop the current masks to the same dimensions as the image + current_masks = current_masks[:, rect[1]:rect[3], rect[0]:rect[2]] + + return current_image, current_masks, current_boxes, current_labels + + +class Expand(object): + def __init__(self, mean): + self.mean = mean + + def __call__(self, image, masks, boxes, labels): + if random.randint(2): + return image, masks, boxes, labels + + height, width, depth = image.shape + ratio = random.uniform(1, 4) + left = random.uniform(0, width*ratio - width) + top = random.uniform(0, height*ratio - height) + + expand_image = np.zeros( + (int(height*ratio), int(width*ratio), depth), + dtype=image.dtype) + expand_image[:, :, :] = self.mean + expand_image[int(top):int(top + height), + int(left):int(left + width)] = image + image = expand_image + + expand_masks = np.zeros( + (masks.shape[0], int(height*ratio), int(width*ratio)), + dtype=masks.dtype) + expand_masks[:,int(top):int(top + height), + int(left):int(left + width)] = masks + masks = expand_masks + + boxes = boxes.copy() + boxes[:, :2] += (int(left), int(top)) + boxes[:, 2:] += (int(left), int(top)) + + return image, masks, boxes, labels + + +class RandomMirror(object): + def __call__(self, image, masks, boxes, labels): + _, width, _ = image.shape + if random.randint(2): + image = image[:, ::-1] + masks = masks[:, :, ::-1] + boxes = boxes.copy() + boxes[:, 0::2] = width - boxes[:, 2::-2] + return image, masks, boxes, labels + + +class RandomFlip(object): + def __call__(self, image, masks, boxes, labels): + height , _ , _ = image.shape + if random.randint(2): + image = image[::-1, :] + masks = masks[:, ::-1, :] + boxes = boxes.copy() + boxes[:, 1::2] = height - boxes[:, 3::-2] + return image, masks, boxes, labels + + +class RandomRot90(object): + def __call__(self, image, masks, boxes, labels): + old_height , old_width , _ = image.shape + k = random.randint(4) + image = np.rot90(image,k) + masks = np.array([np.rot90(mask,k) for mask in masks]) + boxes = boxes.copy() + for _ in range(k): + boxes = np.array([[box[1], old_width - 1 - box[2], box[3], old_width - 1 - box[0]] for box in boxes]) + old_width, old_height = old_height, old_width + return image, masks, boxes, labels + + +class SwapChannels(object): + """Transforms a tensorized image by swapping the channels in the order + specified in the swap tuple. + Args: + swaps (int triple): final order of channels + eg: (2, 1, 0) + """ + + def __init__(self, swaps): + self.swaps = swaps + + def __call__(self, image): + """ + Args: + image (Tensor): image tensor to be transformed + Return: + a tensor with channels swapped according to swap + """ + # if torch.is_tensor(image): + # image = image.data.cpu().numpy() + # else: + # image = np.array(image) + image = image[:, :, self.swaps] + return image + + +class PhotometricDistort(object): + def __init__(self): + self.pd = [ + RandomContrast(), + ConvertColor(transform='HSV'), + RandomSaturation(), + RandomHue(), + ConvertColor(current='HSV', transform='BGR'), + RandomContrast() + ] + self.rand_brightness = RandomBrightness() + self.rand_light_noise = RandomLightingNoise() + + def __call__(self, image, masks, boxes, labels): + im = image.copy() + im, masks, boxes, labels = self.rand_brightness(im, masks, boxes, labels) + if random.randint(2): + distort = Compose(self.pd[:-1]) + else: + distort = Compose(self.pd[1:]) + im, masks, boxes, labels = distort(im, masks, boxes, labels) + return self.rand_light_noise(im, masks, boxes, labels) + +class PrepareMasks(object): + """ + Prepares the gt masks for use_gt_bboxes by cropping with the gt box + and downsampling the resulting mask to mask_size, mask_size. This + function doesn't do anything if cfg.use_gt_bboxes is False. + """ + + def __init__(self, mask_size, use_gt_bboxes): + self.mask_size = mask_size + self.use_gt_bboxes = use_gt_bboxes + + def __call__(self, image, masks, boxes, labels=None): + if not self.use_gt_bboxes: + return image, masks, boxes, labels + + height, width, _ = image.shape + + new_masks = np.zeros((masks.shape[0], self.mask_size ** 2)) + + for i in range(len(masks)): + x1, y1, x2, y2 = boxes[i, :] + x1 *= width + x2 *= width + y1 *= height + y2 *= height + x1, y1, x2, y2 = (int(x1), int(y1), int(x2), int(y2)) + + # +1 So that if y1=10.6 and y2=10.9 we still have a bounding box + cropped_mask = masks[i, y1:(y2+1), x1:(x2+1)] + scaled_mask = cv2.resize(cropped_mask, (self.mask_size, self.mask_size)) + + new_masks[i, :] = scaled_mask.reshape(1, -1) + + # Binarize + new_masks[new_masks > 0.5] = 1 + new_masks[new_masks <= 0.5] = 0 + + return image, new_masks, boxes, labels + +class BackboneTransform(object): + """ + Transforms a BRG image made of floats in the range [0, 255] to whatever + input the current backbone network needs. + + transform is a transform config object (see config.py). + in_channel_order is probably 'BGR' but you do you, kid. + """ + def __init__(self, transform, mean, std, in_channel_order): + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + self.transform = transform + + # Here I use "Algorithms and Coding" to convert string permutations to numbers + self.channel_map = {c: idx for idx, c in enumerate(in_channel_order)} + self.channel_permutation = [self.channel_map[c] for c in transform.channel_order] + + def __call__(self, img, masks=None, boxes=None, labels=None): + + img = img.astype(np.float32) + + if self.transform.normalize: + img = (img - self.mean) / self.std + elif self.transform.subtract_means: + img = (img - self.mean) + elif self.transform.to_float: + img = img / 255 + + img = img[:, :, self.channel_permutation] + + return img.astype(np.float32), masks, boxes, labels + + + + +class BaseTransform(object): + """ Transorm to be used when evaluating. """ + + def __init__(self, mean=MEANS, std=STD): + self.augment = Compose([ + ConvertFromInts(), + Resize(resize_gt=False), + BackboneTransform(cfg.backbone.transform, mean, std, 'BGR') + ]) + + def __call__(self, img, masks=None, boxes=None, labels=None): + return self.augment(img, masks, boxes, labels) + +import torch.nn.functional as F + +class FastBaseTransform(torch.nn.Module): + """ + Transform that does all operations on the GPU for super speed. + This doesn't suppport a lot of config settings and should only be used for production. + Maintain this as necessary. + """ + + def __init__(self): + super().__init__() + + self.mean = torch.Tensor(MEANS).float().cuda()[None, :, None, None] + self.std = torch.Tensor( STD ).float().cuda()[None, :, None, None] + self.transform = cfg.backbone.transform + + def forward(self, img): + self.mean = self.mean.to(img.device) + self.std = self.std.to(img.device) + + # img assumed to be a pytorch BGR image with channel order [n, h, w, c] + if cfg.preserve_aspect_ratio: + _, h, w, _ = img.size() + img_size = Resize.calc_size_preserve_ar(w, h, cfg.max_size) + img_size = (img_size[1], img_size[0]) # Pytorch needs h, w + else: + img_size = (cfg.max_size, cfg.max_size) + + img = img.permute(0, 3, 1, 2).contiguous() + img = F.interpolate(img, img_size, mode='bilinear', align_corners=False) + + if self.transform.normalize: + img = (img - self.mean) / self.std + elif self.transform.subtract_means: + img = (img - self.mean) + elif self.transform.to_float: + img = img / 255 + + if self.transform.channel_order != 'RGB': + raise NotImplementedError + + img = img[:, (2, 1, 0), :, :].contiguous() + + # Return value is in channel order [n, c, h, w] and RGB + return img + +def do_nothing(img=None, masks=None, boxes=None, labels=None): + return img, masks, boxes, labels + + +def enable_if(condition, obj): + return obj if condition else do_nothing + +class SSDAugmentation(object): + """ Transform to be used when training. """ + + def __init__(self, mean=MEANS, std=STD): + self.augment = Compose([ + ConvertFromInts(), + ToAbsoluteCoords(), + enable_if(cfg.augment_photometric_distort, PhotometricDistort()), + enable_if(cfg.augment_expand, Expand(mean)), + enable_if(cfg.augment_random_sample_crop, RandomSampleCrop()), + enable_if(cfg.augment_random_mirror, RandomMirror()), + enable_if(cfg.augment_random_flip, RandomFlip()), + enable_if(cfg.augment_random_flip, RandomRot90()), + Resize(), + enable_if(not cfg.preserve_aspect_ratio, Pad(cfg.max_size, cfg.max_size, mean)), + ToPercentCoords(), + PrepareMasks(cfg.mask_size, cfg.use_gt_bboxes), + BackboneTransform(cfg.backbone.transform, mean, std, 'BGR') + ]) + + def __call__(self, img, masks, boxes, labels): + return self.augment(img, masks, boxes, labels) diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/utils/cython_nms.pyx b/PyTorch/contrib/cv/detection/YOLACT_plus/utils/cython_nms.pyx index 4a4a5bda8030ec1c2f9ce9e4a2df9dc76ef437a0..892be872edc6b560c1a5b7bff0c6fca18a929b53 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/utils/cython_nms.pyx +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/utils/cython_nms.pyx @@ -1,74 +1,74 @@ -## Note: Figure out the license details later. -# -# Based on: -# -------------------------------------------------------- -# Fast R-CNN -# Copyright (c) 2015 Microsoft -# Licensed under The MIT License [see LICENSE for details] -# Written by Ross Girshick -# -------------------------------------------------------- - -cimport cython -import numpy as np -cimport numpy as np - -cdef inline np.float32_t max(np.float32_t a, np.float32_t b) nogil: - return a if a >= b else b - -cdef inline np.float32_t min(np.float32_t a, np.float32_t b) nogil: - return a if a <= b else b - -@cython.boundscheck(False) -@cython.cdivision(True) -@cython.wraparound(False) -def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float32_t thresh): - cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] - cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] - cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] - cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] - cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] - - cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) - cdef np.ndarray[np.int64_t, ndim=1] order = scores.argsort()[::-1] - - cdef int ndets = dets.shape[0] - cdef np.ndarray[np.int_t, ndim=1] suppressed = \ - np.zeros((ndets), dtype=np.int) - - # nominal indices - cdef int _i, _j - # sorted indices - cdef int i, j - # temp variables for box i's (the box currently under consideration) - cdef np.float32_t ix1, iy1, ix2, iy2, iarea - # variables for computing overlap with box j (lower scoring box) - cdef np.float32_t xx1, yy1, xx2, yy2 - cdef np.float32_t w, h - cdef np.float32_t inter, ovr - - with nogil: - for _i in range(ndets): - i = order[_i] - if suppressed[i] == 1: - continue - ix1 = x1[i] - iy1 = y1[i] - ix2 = x2[i] - iy2 = y2[i] - iarea = areas[i] - for _j in range(_i + 1, ndets): - j = order[_j] - if suppressed[j] == 1: - continue - xx1 = max(ix1, x1[j]) - yy1 = max(iy1, y1[j]) - xx2 = min(ix2, x2[j]) - yy2 = min(iy2, y2[j]) - w = max(0.0, xx2 - xx1 + 1) - h = max(0.0, yy2 - yy1 + 1) - inter = w * h - ovr = inter / (iarea + areas[j] - inter) - if ovr >= thresh: - suppressed[j] = 1 - - return np.where(suppressed == 0)[0] +## Note: Figure out the license details later. +# +# Based on: +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +cimport cython +import numpy as np +cimport numpy as np + +cdef inline np.float32_t max(np.float32_t a, np.float32_t b) nogil: + return a if a >= b else b + +cdef inline np.float32_t min(np.float32_t a, np.float32_t b) nogil: + return a if a <= b else b + +@cython.boundscheck(False) +@cython.cdivision(True) +@cython.wraparound(False) +def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float32_t thresh): + cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] + cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] + cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] + cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] + cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] + + cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) + cdef np.ndarray[np.int64_t, ndim=1] order = scores.argsort()[::-1] + + cdef int ndets = dets.shape[0] + cdef np.ndarray[np.int_t, ndim=1] suppressed = \ + np.zeros((ndets), dtype=np.int) + + # nominal indices + cdef int _i, _j + # sorted indices + cdef int i, j + # temp variables for box i's (the box currently under consideration) + cdef np.float32_t ix1, iy1, ix2, iy2, iarea + # variables for computing overlap with box j (lower scoring box) + cdef np.float32_t xx1, yy1, xx2, yy2 + cdef np.float32_t w, h + cdef np.float32_t inter, ovr + + with nogil: + for _i in range(ndets): + i = order[_i] + if suppressed[i] == 1: + continue + ix1 = x1[i] + iy1 = y1[i] + ix2 = x2[i] + iy2 = y2[i] + iarea = areas[i] + for _j in range(_i + 1, ndets): + j = order[_j] + if suppressed[j] == 1: + continue + xx1 = max(ix1, x1[j]) + yy1 = max(iy1, y1[j]) + xx2 = min(ix2, x2[j]) + yy2 = min(iy2, y2[j]) + w = max(0.0, xx2 - xx1 + 1) + h = max(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (iarea + areas[j] - inter) + if ovr >= thresh: + suppressed[j] = 1 + + return np.where(suppressed == 0)[0] diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/utils/functions.py b/PyTorch/contrib/cv/detection/YOLACT_plus/utils/functions.py index d9ce98c4f98deed917ef3885611792f2783dce80..6dfeff2d4add5508cae91b2f6442099c514f3eb7 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/utils/functions.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/utils/functions.py @@ -1,227 +1,227 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch -import torch.nn as nn -import os -import math -from collections import deque -from pathlib import Path -from layers.interpolate import InterpolateModule - -class MovingAverage(): - """ Keeps an average window of the specified number of items. """ - - def __init__(self, max_window_size=1000): - self.max_window_size = max_window_size - self.reset() - - def add(self, elem): - """ Adds an element to the window, removing the earliest element if necessary. """ - if not math.isfinite(elem): - print('Warning: Moving average ignored a value of %f' % elem) - return - - self.window.append(elem) - self.sum += elem - - if len(self.window) > self.max_window_size: - self.sum -= self.window.popleft() - - def append(self, elem): - """ Same as add just more pythonic. """ - self.add(elem) - - def reset(self): - """ Resets the MovingAverage to its initial state. """ - self.window = deque() - self.sum = 0 - - def get_avg(self): - """ Returns the average of the elements in the window. """ - return self.sum / max(len(self.window), 1) - - def __str__(self): - return str(self.get_avg()) - - def __repr__(self): - return repr(self.get_avg()) - - def __len__(self): - return len(self.window) - - -class ProgressBar(): - """ A simple progress bar that just outputs a string. """ - - def __init__(self, length, max_val): - self.max_val = max_val - self.length = length - self.cur_val = 0 - - self.cur_num_bars = -1 - self._update_str() - - def set_val(self, new_val): - self.cur_val = new_val - - if self.cur_val > self.max_val: - self.cur_val = self.max_val - if self.cur_val < 0: - self.cur_val = 0 - - self._update_str() - - def is_finished(self): - return self.cur_val == self.max_val - - def _update_str(self): - num_bars = int(self.length * (self.cur_val / self.max_val)) - - if num_bars != self.cur_num_bars: - self.cur_num_bars = num_bars - self.string = '█' * num_bars + '░' * (self.length - num_bars) - - def __repr__(self): - return self.string - - def __str__(self): - return self.string - - -def init_console(): - """ - Initialize the console to be able to use ANSI escape characters on Windows. - """ - if os.name == 'nt': - from colorama import init - init() - - -class SavePath: - """ - Why is this a class? - Why do I have a class for creating and parsing save paths? - What am I doing with my life? - """ - - def __init__(self, model_name:str, epoch:int, iteration:int): - self.model_name = model_name - self.epoch = epoch - self.iteration = iteration - - def get_path(self, root:str=''): - file_name = self.model_name + '_' + str(self.epoch) + '_' + str(self.iteration) + '.pth' - return os.path.join(root, file_name) - - @staticmethod - def from_str(path:str): - file_name = os.path.basename(path) - - if file_name.endswith('.pth'): - file_name = file_name[:-4] - - params = file_name.split('_') - - if file_name.endswith('interrupt'): - params = params[:-1] - - model_name = '_'.join(params[:-2]) - epoch = params[-2] - iteration = params[-1] - - return SavePath(model_name, int(epoch), int(iteration)) - - @staticmethod - def remove_interrupt(save_folder): - for p in Path(save_folder).glob('*_interrupt.pth'): - p.unlink() - - @staticmethod - def get_interrupt(save_folder): - for p in Path(save_folder).glob('*_interrupt.pth'): - return str(p) - return None - - @staticmethod - def get_latest(save_folder, config): - """ Note: config should be config.name. """ - max_iter = -1 - max_name = None - - for p in Path(save_folder).glob(config + '_*'): - path_name = str(p) - - try: - save = SavePath.from_str(path_name) - except: - continue - - if save.model_name == config and save.iteration > max_iter: - max_iter = save.iteration - max_name = path_name - - return max_name - -def make_net(in_channels, conf, include_last_relu=True): - """ - A helper function to take a config setting and turn it into a network. - Used by protonet and extrahead. Returns (network, out_channels) - """ - def make_layer(layer_cfg): - nonlocal in_channels - - # Possible patterns: - # ( 256, 3, {}) -> conv - # ( 256,-2, {}) -> deconv - # (None,-2, {}) -> bilinear interpolate - # ('cat',[],{}) -> concat the subnetworks in the list - # - # You know it would have probably been simpler just to adopt a 'c' 'd' 'u' naming scheme. - # Whatever, it's too late now. - if isinstance(layer_cfg[0], str): - layer_name = layer_cfg[0] - - if layer_name == 'cat': - nets = [make_net(in_channels, x) for x in layer_cfg[1]] - layer = Concat([net[0] for net in nets], layer_cfg[2]) - num_channels = sum([net[1] for net in nets]) - else: - num_channels = layer_cfg[0] - kernel_size = layer_cfg[1] - - if kernel_size > 0: - layer = nn.Conv2d(in_channels, num_channels, kernel_size, **layer_cfg[2]) - else: - if num_channels is None: - layer = InterpolateModule(scale_factor=-kernel_size, mode='bilinear', align_corners=False, **layer_cfg[2]) - else: - layer = nn.ConvTranspose2d(in_channels, num_channels, -kernel_size, **layer_cfg[2]) - - in_channels = num_channels if num_channels is not None else in_channels - - # Don't return a ReLU layer if we're doing an upsample. This probably doesn't affect anything - # output-wise, but there's no need to go through a ReLU here. - # Commented out for backwards compatibility with previous models - # if num_channels is None: - # return [layer] - # else: - return [layer, nn.ReLU(inplace=True)] - - # Use sum to concat together all the component layer lists - net = sum([make_layer(x) for x in conf], []) - if not include_last_relu: - net = net[:-1] - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch +import torch.nn as nn +import os +import math +from collections import deque +from pathlib import Path +from layers.interpolate import InterpolateModule + +class MovingAverage(): + """ Keeps an average window of the specified number of items. """ + + def __init__(self, max_window_size=1000): + self.max_window_size = max_window_size + self.reset() + + def add(self, elem): + """ Adds an element to the window, removing the earliest element if necessary. """ + if not math.isfinite(elem): + print('Warning: Moving average ignored a value of %f' % elem) + return + + self.window.append(elem) + self.sum += elem + + if len(self.window) > self.max_window_size: + self.sum -= self.window.popleft() + + def append(self, elem): + """ Same as add just more pythonic. """ + self.add(elem) + + def reset(self): + """ Resets the MovingAverage to its initial state. """ + self.window = deque() + self.sum = 0 + + def get_avg(self): + """ Returns the average of the elements in the window. """ + return self.sum / max(len(self.window), 1) + + def __str__(self): + return str(self.get_avg()) + + def __repr__(self): + return repr(self.get_avg()) + + def __len__(self): + return len(self.window) + + +class ProgressBar(): + """ A simple progress bar that just outputs a string. """ + + def __init__(self, length, max_val): + self.max_val = max_val + self.length = length + self.cur_val = 0 + + self.cur_num_bars = -1 + self._update_str() + + def set_val(self, new_val): + self.cur_val = new_val + + if self.cur_val > self.max_val: + self.cur_val = self.max_val + if self.cur_val < 0: + self.cur_val = 0 + + self._update_str() + + def is_finished(self): + return self.cur_val == self.max_val + + def _update_str(self): + num_bars = int(self.length * (self.cur_val / self.max_val)) + + if num_bars != self.cur_num_bars: + self.cur_num_bars = num_bars + self.string = '█' * num_bars + '░' * (self.length - num_bars) + + def __repr__(self): + return self.string + + def __str__(self): + return self.string + + +def init_console(): + """ + Initialize the console to be able to use ANSI escape characters on Windows. + """ + if os.name == 'nt': + from colorama import init + init() + + +class SavePath: + """ + Why is this a class? + Why do I have a class for creating and parsing save paths? + What am I doing with my life? + """ + + def __init__(self, model_name:str, epoch:int, iteration:int): + self.model_name = model_name + self.epoch = epoch + self.iteration = iteration + + def get_path(self, root:str=''): + file_name = self.model_name + '_' + str(self.epoch) + '_' + str(self.iteration) + '.pth' + return os.path.join(root, file_name) + + @staticmethod + def from_str(path:str): + file_name = os.path.basename(path) + + if file_name.endswith('.pth'): + file_name = file_name[:-4] + + params = file_name.split('_') + + if file_name.endswith('interrupt'): + params = params[:-1] + + model_name = '_'.join(params[:-2]) + epoch = params[-2] + iteration = params[-1] + + return SavePath(model_name, int(epoch), int(iteration)) + + @staticmethod + def remove_interrupt(save_folder): + for p in Path(save_folder).glob('*_interrupt.pth'): + p.unlink() + + @staticmethod + def get_interrupt(save_folder): + for p in Path(save_folder).glob('*_interrupt.pth'): + return str(p) + return None + + @staticmethod + def get_latest(save_folder, config): + """ Note: config should be config.name. """ + max_iter = -1 + max_name = None + + for p in Path(save_folder).glob(config + '_*'): + path_name = str(p) + + try: + save = SavePath.from_str(path_name) + except: + continue + + if save.model_name == config and save.iteration > max_iter: + max_iter = save.iteration + max_name = path_name + + return max_name + +def make_net(in_channels, conf, include_last_relu=True): + """ + A helper function to take a config setting and turn it into a network. + Used by protonet and extrahead. Returns (network, out_channels) + """ + def make_layer(layer_cfg): + nonlocal in_channels + + # Possible patterns: + # ( 256, 3, {}) -> conv + # ( 256,-2, {}) -> deconv + # (None,-2, {}) -> bilinear interpolate + # ('cat',[],{}) -> concat the subnetworks in the list + # + # You know it would have probably been simpler just to adopt a 'c' 'd' 'u' naming scheme. + # Whatever, it's too late now. + if isinstance(layer_cfg[0], str): + layer_name = layer_cfg[0] + + if layer_name == 'cat': + nets = [make_net(in_channels, x) for x in layer_cfg[1]] + layer = Concat([net[0] for net in nets], layer_cfg[2]) + num_channels = sum([net[1] for net in nets]) + else: + num_channels = layer_cfg[0] + kernel_size = layer_cfg[1] + + if kernel_size > 0: + layer = nn.Conv2d(in_channels, num_channels, kernel_size, **layer_cfg[2]) + else: + if num_channels is None: + layer = InterpolateModule(scale_factor=-kernel_size, mode='bilinear', align_corners=False, **layer_cfg[2]) + else: + layer = nn.ConvTranspose2d(in_channels, num_channels, -kernel_size, **layer_cfg[2]) + + in_channels = num_channels if num_channels is not None else in_channels + + # Don't return a ReLU layer if we're doing an upsample. This probably doesn't affect anything + # output-wise, but there's no need to go through a ReLU here. + # Commented out for backwards compatibility with previous models + # if num_channels is None: + # return [layer] + # else: + return [layer, nn.ReLU(inplace=True)] + + # Use sum to concat together all the component layer lists + net = sum([make_layer(x) for x in conf], []) + if not include_last_relu: + net = net[:-1] + return nn.Sequential(*(net)), in_channels \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/utils/logger.py b/PyTorch/contrib/cv/detection/YOLACT_plus/utils/logger.py index cfc407bad59e34b006d6f2b94af7691f1dbb22eb..7f84f57816ad4166b2f1f9a8dfbdc3c6bf92dc6e 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/utils/logger.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/utils/logger.py @@ -1,503 +1,503 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import os -import json -import time -import sys - -from typing import Union -import datetime - -from collections import defaultdict -import matplotlib.pyplot as plt -import numpy as np - -# Because Python's package heierarchy system sucks -if __name__ == '__main__': - from utils.nvinfo import gpu_info, visible_gpus, nvsmi_available - from utils.functions import MovingAverage -else: - from .nvinfo import gpu_info, visible_gpus, nvsmi_available - from .functions import MovingAverage - -class Log: - """ - A class to log information during training per information and save it out. - It also can include extra debug information like GPU usage / temp automatically. - - Extra args: - - session_data: If you have any data unique to this session, put it here. - - overwrite: Whether or not to overwrite a pre-existing log with this name. - - log_gpu_stats: Whether or not to log gpu information like temp, usage, memory. - Note that this requires nvidia-smi to be present in your PATH. - - log_time: Also log the time in each iteration. - """ - - def __init__(self, log_name:str, log_dir:str='logs/', session_data:dict={}, - overwrite:bool=False, log_gpu_stats:bool=True, log_time:bool=True): - - if log_gpu_stats and not nvsmi_available(): - print('Warning: Log created with log_gpu_stats=True, but nvidia-smi ' \ - 'was not found. Setting log_gpu_stats to False.') - log_gpu_stats = False - - if not os.path.exists(log_dir): - os.makedirs(log_dir) - self.log_path = os.path.join(log_dir, log_name + '.log') - - # if os.path.exists(self.log_path) and overwrite: - # os.unlink(self.log_path) - - if os.path.exists(self.log_path): - # Log already exists, so we're going to add to it. Increment the session counter. - with open(self.log_path, 'r') as f: - for last in f: pass - - if len(last) > 1: - self.session = json.loads(last)['session'] + 1 - else: - self.session = 0 - else: - self.session = 0 - - - self.log_gpu_stats = log_gpu_stats - self.log_time = log_time - - if self.log_gpu_stats: - self.visible_gpus = visible_gpus() - - - self._log_session_header(session_data) - - - def _log_session_header(self, session_data:dict): - """ - Log information that does not change between iterations here. - This is to cut down on the file size so you're not outputing this every iteration. - """ - info = {} - info['type'] = 'session' - info['session'] = self.session - - info['data'] = session_data - - if self.log_gpu_stats: - keys = ['idx', 'name', 'uuid', 'pwr_cap', 'mem_total'] - - gpus = gpu_info() - info['gpus'] = [{k: gpus[i][k] for k in keys} for i in self.visible_gpus] - - if self.log_time: - info['time'] = time.time() - - out = json.dumps(info) + '\n' - - with open(self.log_path, 'a') as f: - f.write(out) - - - def log(self, type:str, data:dict={}, **kwdargs): - """ - Add an iteration to the log with the specified data points. - Type should be the type of information this is (e.g., train, valid, etc.) - - You can either pass data points as kwdargs, or as a dictionary (or both!). - Values should be json-serializable. - """ - info = {} - - info['type'] = type - info['session'] = self.session - - kwdargs.update(data) - info['data'] = kwdargs - - if self.log_gpu_stats: - keys = ['fan_spd', 'temp', 'pwr_used', 'mem_used', 'util'] - - gpus = gpu_info() - info['gpus'] = [{k: gpus[i][k] for k in keys} for i in self.visible_gpus] - - if self.log_time: - info['time'] = time.time() - - - out = json.dumps(info) + '\n' - - with open(self.log_path, 'a') as f: - f.write(out) - - -class LogEntry(): - """ A class that allows you to navigate a dictonary using x.a.b[2].c, etc. """ - - def __init__(self, entry:Union[dict, list]): - self._ = entry - - def __getattr__(self, name): - if name == '_': - return self.__dict__['_'] - - res = self.__dict__['_'][name] - - if type(res) == dict or type(res) == list: - return LogEntry(res) - else: - return res - - def __getitem__(self, name): - return self.__getattr__(name) - - def __len__(self): - return len(self.__dict__['_']) - -class LogVisualizer(): - - COLORS = [ - 'xkcd:azure', - 'xkcd:coral', - 'xkcd:turquoise', - 'xkcd:orchid', - 'xkcd:orange', - - 'xkcd:blue', - 'xkcd:red', - 'xkcd:teal', - 'xkcd:magenta', - 'xkcd:orangered' - ] - - def __init__(self): - self.logs = [] - self.total_logs = [] - self.log_names = [] - - def _decode(self, query:str) -> list: - path, select = (query.split(';') + [''])[:2] - - if select.strip() == '': - select = lambda x, s: True - else: - select = eval('lambda x, s: ' + select) - - if path.strip() == '': - path = lambda x, s: x - else: - path = eval('lambda x, s: ' + path) - - return path, select - - def _follow(self, entry:LogEntry, query:list): - path, select = query - - try: - if select(entry, entry._s): - res = path(entry, entry._s) - - if type(res) == LogEntry: - return res.__dict__['_'] - else: - return res - else: - return None - except (KeyError, IndexError): - return None - - def _color(self, idx:int): - return self.COLORS[idx % len(self.COLORS)] - - def sessions(self, path:str): - """ Prints statistics about the sessions in the file. """ - - if not os.path.exists(path): - print(path + ' doesn\'t exist!') - return - - cur_session = None - cur_time = 0 - last_time = 0 - num_entries = 0 - - def pop_session(): - delta = last_time - cur_time - time_str = str(datetime.timedelta(seconds=delta)).split('.')[0] - print('Session % 3d: % 8d entries | %s elapsed' % (cur_session, num_entries, time_str)) - - with open(path, 'r') as f: - for line in f: - line = line.strip() - if len(line) > 0: - js = json.loads(line) - if js['type'] == 'session': - if cur_session is not None: - pop_session() - cur_time = js['time'] - cur_session = js['session'] - num_entries = 0 - last_time = js['time'] - num_entries += 1 - - pop_session() - - def add(self, path:str, session:Union[int,list]=None): - """ Add a log file to the list of logs being considered. """ - - log = defaultdict(lambda: []) - total_log = [] - - if not os.path.exists(path): - print(path + ' doesn\'t exist!') - return - - session_idx = 0 - ignoring = True - - def valid(idx): - if session is None: - return True - elif type(session) == int: - return (idx == session) - else: - return idx in session - - with open(path, 'r') as f: - for line in f: - line = line.strip() - if len(line) > 0: - js = json.loads(line) - - _type = js['type'] - if _type == 'session': - session_idx = js['session'] - ignoring = not valid(session_idx) - - if not ignoring: - ljs = LogEntry(js) - if _type == 'session': - js['_s'] = ljs - else: - js['_s'] =log['session'][-1] - log[_type].append(ljs) - total_log.append(ljs) - - name = os.path.basename(path) - if session is not None: - name += ' (Session %s)' % session - - self.logs.append(log) - self.total_logs.append(total_log) - self.log_names.append(name) - - def query(self, x:Union[str, list], entry_type:str=None, x_idx:int=None, log_idx:int=None) -> list: - """ - Given a query string (can be already decoded for faster computation), query the entire log - and return all values found by that query. If both log_idx and x_idx is None, this will be - a list of lists in the form [log_idx][result_idx]. If x_idx is not None, then the result - will be a list of [log_idx]. If both are not none, the return value will be a single query - return value. With entry_type=None, this will search the entire log. - """ - - if type(x) is not list: - x = self._decode(x) - - res = [] - - for idx in (range(len(self.logs)) if log_idx is None else [log_idx]): - candidates = [] - log = self.total_logs[idx] if entry_type is None else self.logs[idx][entry_type] - - for entry in log: - candidate = self._follow(entry, x) - if candidate is not None: - candidates.append(candidate) - - if x_idx is not None: - candidates = candidates[x_idx] - res.append(candidates) - - if log_idx is not None: - res = res[0] - return res - - def check(self, entry_type:str, x:str): - """ Checks the log for the valid keys for this input. """ - keys = set() - x = self._decode(x) - - for log in self.logs: - for datum in log[entry_type]: - res = self._follow(datum, x) - - if type(res) == dict: - for key in res.keys(): - keys.add(key) - elif type(res) == list: - keys.add('< %d' % len(res)) - - return list(keys) - - def plot(self, entry_type:str, x:str, y:str, smoothness:int=0): - """ Plot sequential log data. """ - - query_x = self._decode(x) - query_y = self._decode(y) - - for idx, (log, name) in enumerate(zip(self.logs, self.log_names)): - log = log[entry_type] - - if smoothness > 1: - avg = MovingAverage(smoothness) - - _x = [] - _y = [] - - for datum in log: - val_x = self._follow(datum, query_x) - val_y = self._follow(datum, query_y) - - if val_x is not None and val_y is not None: - if smoothness > 1: - avg.append(val_y) - val_y = avg.get_avg() - - if len(avg) < smoothness // 10: - continue - - _x.append(val_x) - _y.append(val_y) - - plt.plot(_x, _y, color=self._color(idx), label=name) - - plt.title(y.replace('x.', entry_type + '.')) - plt.legend() - plt.grid(linestyle=':', linewidth=0.5) - plt.show() - - def bar(self, entry_type:str, x:str, labels:list=None, diff:bool=False, x_idx:int=-1): - """ Plot a bar chart. The result of x should be list or dictionary. """ - - query = self._decode(x) - - data_points = [] - - for idx, (log, name) in enumerate(zip(self.logs, self.log_names)): - log = log[entry_type] - - candidates = [] - - for entry in log: - test = self._follow(entry, query) - - if type(test) == dict: - candidates.append(test) - elif type(test) == list: - candidates.append({idx: v for idx, v in enumerate(test)}) - - if len(candidates) > 0: - data_points.append((name, candidates[x_idx])) - - if len(data_points) == 0: - print('Warning: Nothing to show in bar chart!') - return - - names = [x[0] for x in data_points] - data_points = [x[1] for x in data_points] - - # Construct the labels for the data - if labels is not None: - data_labels = labels - else: - data_labels = set() - for datum in data_points: - for k in datum: - data_labels.add(k) - - data_labels = list(data_labels) - data_labels.sort() - - - data_values = [[(datum[k] if k in datum else None) for k in data_labels] for datum in data_points] - - if diff: - for idx in reversed(range(len(data_values))): - for jdx in range(len(data_labels)): - if data_values[0][jdx] is None or data_values[idx][jdx] is None: - data_values[idx][jdx] = None - else: - data_values[idx][jdx] -= data_values[0][jdx] - - - series_labels = names - - # Plot the graph now - num_bars = len(series_labels) - bar_width = 1 / (num_bars + 1) - - # Set position of bar on X axis - positions = [np.arange(len(data_labels))] - for _ in range(1, num_bars): - positions.append([x + bar_width for x in positions[-1]]) - - # Make the plot - for idx, (series, data, pos) in enumerate(zip(series_labels, data_values, positions)): - plt.bar(pos, data, color=self._color(idx), width=bar_width, edgecolor='white', label=series) - - # Add xticks on the middle of the group bars - plt.title(x.replace('x.', entry_type + '.') + (' diff' if diff else '')) - plt.xticks([r + bar_width for r in range(len(data_labels))], data_labels) - - # Create legend & Show graphic - plt.legend() - plt.show() - - - - def elapsed_time(self, cond1:str='', cond2:str='', legible:bool=True) -> list: - """ - Returns the elapsed time between two entries based on the given conditionals. - If a query isn't specified, the first / last entry will be used. The first query - uses the first value and the second query uses the last value in the results. - - Setting legible to true returns human-readable results, while false returns seconds. - """ - q1 = 'x.time; ' + cond1 - q2 = 'x.time; ' + cond2 - - x1 = self.query(q1, x_idx=0) - x2 = self.query(q2, x_idx=-1) - - diff = (lambda x: str(datetime.timedelta(seconds=x)).split('.')[0]) if legible else lambda x: x - - return [diff(b - a) for a, b in zip(x1, x2)] - - - - - - - - - - - -if __name__ == '__main__': - if len(sys.argv) < 4+1: - print('Usage: python utils/logger.py ') - exit() - - vis = LogVisualizer() - vis.add(sys.argv[1]) - vis.plot(sys.argv[2], sys.argv[3], sys.argv[4]) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import os +import json +import time +import sys + +from typing import Union +import datetime + +from collections import defaultdict +import matplotlib.pyplot as plt +import numpy as np + +# Because Python's package heierarchy system sucks +if __name__ == '__main__': + from utils.nvinfo import gpu_info, visible_gpus, nvsmi_available + from utils.functions import MovingAverage +else: + from .nvinfo import gpu_info, visible_gpus, nvsmi_available + from .functions import MovingAverage + +class Log: + """ + A class to log information during training per information and save it out. + It also can include extra debug information like GPU usage / temp automatically. + + Extra args: + - session_data: If you have any data unique to this session, put it here. + - overwrite: Whether or not to overwrite a pre-existing log with this name. + - log_gpu_stats: Whether or not to log gpu information like temp, usage, memory. + Note that this requires nvidia-smi to be present in your PATH. + - log_time: Also log the time in each iteration. + """ + + def __init__(self, log_name:str, log_dir:str='logs/', session_data:dict={}, + overwrite:bool=False, log_gpu_stats:bool=True, log_time:bool=True): + + if log_gpu_stats and not nvsmi_available(): + print('Warning: Log created with log_gpu_stats=True, but nvidia-smi ' \ + 'was not found. Setting log_gpu_stats to False.') + log_gpu_stats = False + + if not os.path.exists(log_dir): + os.makedirs(log_dir) + self.log_path = os.path.join(log_dir, log_name + '.log') + + # if os.path.exists(self.log_path) and overwrite: + # os.unlink(self.log_path) + + if os.path.exists(self.log_path): + # Log already exists, so we're going to add to it. Increment the session counter. + with open(self.log_path, 'r') as f: + for last in f: pass + + if len(last) > 1: + self.session = json.loads(last)['session'] + 1 + else: + self.session = 0 + else: + self.session = 0 + + + self.log_gpu_stats = log_gpu_stats + self.log_time = log_time + + if self.log_gpu_stats: + self.visible_gpus = visible_gpus() + + + self._log_session_header(session_data) + + + def _log_session_header(self, session_data:dict): + """ + Log information that does not change between iterations here. + This is to cut down on the file size so you're not outputing this every iteration. + """ + info = {} + info['type'] = 'session' + info['session'] = self.session + + info['data'] = session_data + + if self.log_gpu_stats: + keys = ['idx', 'name', 'uuid', 'pwr_cap', 'mem_total'] + + gpus = gpu_info() + info['gpus'] = [{k: gpus[i][k] for k in keys} for i in self.visible_gpus] + + if self.log_time: + info['time'] = time.time() + + out = json.dumps(info) + '\n' + + with open(self.log_path, 'a') as f: + f.write(out) + + + def log(self, type:str, data:dict={}, **kwdargs): + """ + Add an iteration to the log with the specified data points. + Type should be the type of information this is (e.g., train, valid, etc.) + + You can either pass data points as kwdargs, or as a dictionary (or both!). + Values should be json-serializable. + """ + info = {} + + info['type'] = type + info['session'] = self.session + + kwdargs.update(data) + info['data'] = kwdargs + + if self.log_gpu_stats: + keys = ['fan_spd', 'temp', 'pwr_used', 'mem_used', 'util'] + + gpus = gpu_info() + info['gpus'] = [{k: gpus[i][k] for k in keys} for i in self.visible_gpus] + + if self.log_time: + info['time'] = time.time() + + + out = json.dumps(info) + '\n' + + with open(self.log_path, 'a') as f: + f.write(out) + + +class LogEntry(): + """ A class that allows you to navigate a dictonary using x.a.b[2].c, etc. """ + + def __init__(self, entry:Union[dict, list]): + self._ = entry + + def __getattr__(self, name): + if name == '_': + return self.__dict__['_'] + + res = self.__dict__['_'][name] + + if type(res) == dict or type(res) == list: + return LogEntry(res) + else: + return res + + def __getitem__(self, name): + return self.__getattr__(name) + + def __len__(self): + return len(self.__dict__['_']) + +class LogVisualizer(): + + COLORS = [ + 'xkcd:azure', + 'xkcd:coral', + 'xkcd:turquoise', + 'xkcd:orchid', + 'xkcd:orange', + + 'xkcd:blue', + 'xkcd:red', + 'xkcd:teal', + 'xkcd:magenta', + 'xkcd:orangered' + ] + + def __init__(self): + self.logs = [] + self.total_logs = [] + self.log_names = [] + + def _decode(self, query:str) -> list: + path, select = (query.split(';') + [''])[:2] + + if select.strip() == '': + select = lambda x, s: True + else: + select = eval('lambda x, s: ' + select) + + if path.strip() == '': + path = lambda x, s: x + else: + path = eval('lambda x, s: ' + path) + + return path, select + + def _follow(self, entry:LogEntry, query:list): + path, select = query + + try: + if select(entry, entry._s): + res = path(entry, entry._s) + + if type(res) == LogEntry: + return res.__dict__['_'] + else: + return res + else: + return None + except (KeyError, IndexError): + return None + + def _color(self, idx:int): + return self.COLORS[idx % len(self.COLORS)] + + def sessions(self, path:str): + """ Prints statistics about the sessions in the file. """ + + if not os.path.exists(path): + print(path + ' doesn\'t exist!') + return + + cur_session = None + cur_time = 0 + last_time = 0 + num_entries = 0 + + def pop_session(): + delta = last_time - cur_time + time_str = str(datetime.timedelta(seconds=delta)).split('.')[0] + print('Session % 3d: % 8d entries | %s elapsed' % (cur_session, num_entries, time_str)) + + with open(path, 'r') as f: + for line in f: + line = line.strip() + if len(line) > 0: + js = json.loads(line) + if js['type'] == 'session': + if cur_session is not None: + pop_session() + cur_time = js['time'] + cur_session = js['session'] + num_entries = 0 + last_time = js['time'] + num_entries += 1 + + pop_session() + + def add(self, path:str, session:Union[int,list]=None): + """ Add a log file to the list of logs being considered. """ + + log = defaultdict(lambda: []) + total_log = [] + + if not os.path.exists(path): + print(path + ' doesn\'t exist!') + return + + session_idx = 0 + ignoring = True + + def valid(idx): + if session is None: + return True + elif type(session) == int: + return (idx == session) + else: + return idx in session + + with open(path, 'r') as f: + for line in f: + line = line.strip() + if len(line) > 0: + js = json.loads(line) + + _type = js['type'] + if _type == 'session': + session_idx = js['session'] + ignoring = not valid(session_idx) + + if not ignoring: + ljs = LogEntry(js) + if _type == 'session': + js['_s'] = ljs + else: + js['_s'] =log['session'][-1] + log[_type].append(ljs) + total_log.append(ljs) + + name = os.path.basename(path) + if session is not None: + name += ' (Session %s)' % session + + self.logs.append(log) + self.total_logs.append(total_log) + self.log_names.append(name) + + def query(self, x:Union[str, list], entry_type:str=None, x_idx:int=None, log_idx:int=None) -> list: + """ + Given a query string (can be already decoded for faster computation), query the entire log + and return all values found by that query. If both log_idx and x_idx is None, this will be + a list of lists in the form [log_idx][result_idx]. If x_idx is not None, then the result + will be a list of [log_idx]. If both are not none, the return value will be a single query + return value. With entry_type=None, this will search the entire log. + """ + + if type(x) is not list: + x = self._decode(x) + + res = [] + + for idx in (range(len(self.logs)) if log_idx is None else [log_idx]): + candidates = [] + log = self.total_logs[idx] if entry_type is None else self.logs[idx][entry_type] + + for entry in log: + candidate = self._follow(entry, x) + if candidate is not None: + candidates.append(candidate) + + if x_idx is not None: + candidates = candidates[x_idx] + res.append(candidates) + + if log_idx is not None: + res = res[0] + return res + + def check(self, entry_type:str, x:str): + """ Checks the log for the valid keys for this input. """ + keys = set() + x = self._decode(x) + + for log in self.logs: + for datum in log[entry_type]: + res = self._follow(datum, x) + + if type(res) == dict: + for key in res.keys(): + keys.add(key) + elif type(res) == list: + keys.add('< %d' % len(res)) + + return list(keys) + + def plot(self, entry_type:str, x:str, y:str, smoothness:int=0): + """ Plot sequential log data. """ + + query_x = self._decode(x) + query_y = self._decode(y) + + for idx, (log, name) in enumerate(zip(self.logs, self.log_names)): + log = log[entry_type] + + if smoothness > 1: + avg = MovingAverage(smoothness) + + _x = [] + _y = [] + + for datum in log: + val_x = self._follow(datum, query_x) + val_y = self._follow(datum, query_y) + + if val_x is not None and val_y is not None: + if smoothness > 1: + avg.append(val_y) + val_y = avg.get_avg() + + if len(avg) < smoothness // 10: + continue + + _x.append(val_x) + _y.append(val_y) + + plt.plot(_x, _y, color=self._color(idx), label=name) + + plt.title(y.replace('x.', entry_type + '.')) + plt.legend() + plt.grid(linestyle=':', linewidth=0.5) + plt.show() + + def bar(self, entry_type:str, x:str, labels:list=None, diff:bool=False, x_idx:int=-1): + """ Plot a bar chart. The result of x should be list or dictionary. """ + + query = self._decode(x) + + data_points = [] + + for idx, (log, name) in enumerate(zip(self.logs, self.log_names)): + log = log[entry_type] + + candidates = [] + + for entry in log: + test = self._follow(entry, query) + + if type(test) == dict: + candidates.append(test) + elif type(test) == list: + candidates.append({idx: v for idx, v in enumerate(test)}) + + if len(candidates) > 0: + data_points.append((name, candidates[x_idx])) + + if len(data_points) == 0: + print('Warning: Nothing to show in bar chart!') + return + + names = [x[0] for x in data_points] + data_points = [x[1] for x in data_points] + + # Construct the labels for the data + if labels is not None: + data_labels = labels + else: + data_labels = set() + for datum in data_points: + for k in datum: + data_labels.add(k) + + data_labels = list(data_labels) + data_labels.sort() + + + data_values = [[(datum[k] if k in datum else None) for k in data_labels] for datum in data_points] + + if diff: + for idx in reversed(range(len(data_values))): + for jdx in range(len(data_labels)): + if data_values[0][jdx] is None or data_values[idx][jdx] is None: + data_values[idx][jdx] = None + else: + data_values[idx][jdx] -= data_values[0][jdx] + + + series_labels = names + + # Plot the graph now + num_bars = len(series_labels) + bar_width = 1 / (num_bars + 1) + + # Set position of bar on X axis + positions = [np.arange(len(data_labels))] + for _ in range(1, num_bars): + positions.append([x + bar_width for x in positions[-1]]) + + # Make the plot + for idx, (series, data, pos) in enumerate(zip(series_labels, data_values, positions)): + plt.bar(pos, data, color=self._color(idx), width=bar_width, edgecolor='white', label=series) + + # Add xticks on the middle of the group bars + plt.title(x.replace('x.', entry_type + '.') + (' diff' if diff else '')) + plt.xticks([r + bar_width for r in range(len(data_labels))], data_labels) + + # Create legend & Show graphic + plt.legend() + plt.show() + + + + def elapsed_time(self, cond1:str='', cond2:str='', legible:bool=True) -> list: + """ + Returns the elapsed time between two entries based on the given conditionals. + If a query isn't specified, the first / last entry will be used. The first query + uses the first value and the second query uses the last value in the results. + + Setting legible to true returns human-readable results, while false returns seconds. + """ + q1 = 'x.time; ' + cond1 + q2 = 'x.time; ' + cond2 + + x1 = self.query(q1, x_idx=0) + x2 = self.query(q2, x_idx=-1) + + diff = (lambda x: str(datetime.timedelta(seconds=x)).split('.')[0]) if legible else lambda x: x + + return [diff(b - a) for a, b in zip(x1, x2)] + + + + + + + + + + + +if __name__ == '__main__': + if len(sys.argv) < 4+1: + print('Usage: python utils/logger.py ') + exit() + + vis = LogVisualizer() + vis.add(sys.argv[1]) + vis.plot(sys.argv[2], sys.argv[3], sys.argv[4]) diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/utils/nvinfo.py b/PyTorch/contrib/cv/detection/YOLACT_plus/utils/nvinfo.py index 689abc140c4be56e23c044208bcfb7cb1a5e9099..62052d26838ded8ec18b82e484fa7ec05b9a2743 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/utils/nvinfo.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/utils/nvinfo.py @@ -1,77 +1,77 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -# My version of nvgpu because nvgpu didn't have all the information I was looking for. -import re -import subprocess -import shutil -import os - -def gpu_info() -> list: - """ - Returns a dictionary of stats mined from nvidia-smi for each gpu in a list. - Adapted from nvgpu: https://pypi.org/project/nvgpu/, but mine has more info. - """ - gpus = [line for line in _run_cmd(['nvidia-smi', '-L']) if line] - gpu_infos = [re.match('GPU ([0-9]+): ([^(]+) \(UUID: ([^)]+)\)', gpu).groups() for gpu in gpus] - gpu_infos = [dict(zip(['idx', 'name', 'uuid'], info)) for info in gpu_infos] - gpu_count = len(gpus) - - lines = _run_cmd(['nvidia-smi']) - selected_lines = lines[7:7 + 3 * gpu_count] - for i in range(gpu_count): - mem_used, mem_total = [int(m.strip().replace('MiB', '')) for m in - selected_lines[3 * i + 1].split('|')[2].strip().split('/')] - - pw_tmp_info, mem_info, util_info = [x.strip() for x in selected_lines[3 * i + 1].split('|')[1:-1]] - - pw_tmp_info = [x[:-1] for x in pw_tmp_info.split(' ') if len(x) > 0] - fan_speed, temperature, pwr_used, pwr_cap = [int(pw_tmp_info[i]) for i in (0, 1, 3, 5)] - gpu_infos[i]['fan_spd' ] = fan_speed - gpu_infos[i]['temp' ] = temperature - gpu_infos[i]['pwr_used'] = pwr_used - gpu_infos[i]['pwr_cap' ] = pwr_cap - - mem_used, mem_total = [int(x) for x in mem_info.replace('MiB', '').split(' / ')] - gpu_infos[i]['mem_used' ] = mem_used - gpu_infos[i]['mem_total'] = mem_total - - utilization = int(util_info.split(' ')[0][:-1]) - gpu_infos[i]['util'] = utilization - - gpu_infos[i]['idx'] = int(gpu_infos[i]['idx']) - - return gpu_infos - -def nvsmi_available() -> bool: - """ Returns whether or not nvidia-smi is present in this system's PATH. """ - return shutil.which('nvidia-smi') is not None - - -def visible_gpus() -> list: - """ Returns a list of the indexes of all the gpus visible to pytorch. """ - - if 'CUDA_VISIBLE_DEVICES' not in os.environ: - return list(range(len(gpu_info()))) - else: - return [int(x.strip()) for x in os.environ['CUDA_VISIBLE_DEVICES'].split(',')] - - - - -def _run_cmd(cmd:list) -> list: - """ Runs a command and returns a list of output lines. """ - output = subprocess.check_output(cmd) - output = output.decode('UTF-8') +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# My version of nvgpu because nvgpu didn't have all the information I was looking for. +import re +import subprocess +import shutil +import os + +def gpu_info() -> list: + """ + Returns a dictionary of stats mined from nvidia-smi for each gpu in a list. + Adapted from nvgpu: https://pypi.org/project/nvgpu/, but mine has more info. + """ + gpus = [line for line in _run_cmd(['nvidia-smi', '-L']) if line] + gpu_infos = [re.match('GPU ([0-9]+): ([^(]+) \(UUID: ([^)]+)\)', gpu).groups() for gpu in gpus] + gpu_infos = [dict(zip(['idx', 'name', 'uuid'], info)) for info in gpu_infos] + gpu_count = len(gpus) + + lines = _run_cmd(['nvidia-smi']) + selected_lines = lines[7:7 + 3 * gpu_count] + for i in range(gpu_count): + mem_used, mem_total = [int(m.strip().replace('MiB', '')) for m in + selected_lines[3 * i + 1].split('|')[2].strip().split('/')] + + pw_tmp_info, mem_info, util_info = [x.strip() for x in selected_lines[3 * i + 1].split('|')[1:-1]] + + pw_tmp_info = [x[:-1] for x in pw_tmp_info.split(' ') if len(x) > 0] + fan_speed, temperature, pwr_used, pwr_cap = [int(pw_tmp_info[i]) for i in (0, 1, 3, 5)] + gpu_infos[i]['fan_spd' ] = fan_speed + gpu_infos[i]['temp' ] = temperature + gpu_infos[i]['pwr_used'] = pwr_used + gpu_infos[i]['pwr_cap' ] = pwr_cap + + mem_used, mem_total = [int(x) for x in mem_info.replace('MiB', '').split(' / ')] + gpu_infos[i]['mem_used' ] = mem_used + gpu_infos[i]['mem_total'] = mem_total + + utilization = int(util_info.split(' ')[0][:-1]) + gpu_infos[i]['util'] = utilization + + gpu_infos[i]['idx'] = int(gpu_infos[i]['idx']) + + return gpu_infos + +def nvsmi_available() -> bool: + """ Returns whether or not nvidia-smi is present in this system's PATH. """ + return shutil.which('nvidia-smi') is not None + + +def visible_gpus() -> list: + """ Returns a list of the indexes of all the gpus visible to pytorch. """ + + if 'CUDA_VISIBLE_DEVICES' not in os.environ: + return list(range(len(gpu_info()))) + else: + return [int(x.strip()) for x in os.environ['CUDA_VISIBLE_DEVICES'].split(',')] + + + + +def _run_cmd(cmd:list) -> list: + """ Runs a command and returns a list of output lines. """ + output = subprocess.check_output(cmd) + output = output.decode('UTF-8') return output.split('\n') \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/utils/timer.py b/PyTorch/contrib/cv/detection/YOLACT_plus/utils/timer.py index f5f4404a81b5b364c4fb19ccc7a7951a6cc8b56c..2323e97e471c9b3a1fdfe3b90a578bbe35469d09 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/utils/timer.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/utils/timer.py @@ -1,145 +1,145 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import time -from collections import defaultdict - -_total_times = defaultdict(lambda: 0) -_start_times = defaultdict(lambda: -1) -_disabled_names = set() -_timer_stack = [] -_running_timer = None -_disable_all = False - -def disable_all(): - global _disable_all - _disable_all = True - -def enable_all(): - global _disable_all - _disable_all = False - -def disable(fn_name): - """ Disables the given function name fom being considered for the average or outputted in print_stats. """ - _disabled_names.add(fn_name) - -def enable(fn_name): - """ Enables function names disabled by disable. """ - _disabled_names.remove(fn_name) - -def reset(): - """ Resets the current timer. Call this at the start of an iteration. """ - global _running_timer - _total_times.clear() - _start_times.clear() - _timer_stack.clear() - _running_timer = None - -def start(fn_name, use_stack=True): - """ - Start timing the specific function. - Note: If use_stack is True, only one timer can be active at a time. - Once you stop this timer, the previous one will start again. - """ - global _running_timer, _disable_all - - if _disable_all: - return - - if use_stack: - if _running_timer is not None: - stop(_running_timer, use_stack=False) - _timer_stack.append(_running_timer) - start(fn_name, use_stack=False) - _running_timer = fn_name - else: - _start_times[fn_name] = time.perf_counter() - -def stop(fn_name=None, use_stack=True): - """ - If use_stack is True, this will stop the currently running timer and restore - the previous timer on the stack if that exists. Note if use_stack is True, - fn_name will be ignored. - - If use_stack is False, this will just stop timing the timer fn_name. - """ - global _running_timer, _disable_all - - if _disable_all: - return - - if use_stack: - if _running_timer is not None: - stop(_running_timer, use_stack=False) - if len(_timer_stack) > 0: - _running_timer = _timer_stack.pop() - start(_running_timer, use_stack=False) - else: - _running_timer = None - else: - print('Warning: timer stopped with no timer running!') - else: - if _start_times[fn_name] > -1: - _total_times[fn_name] += time.perf_counter() - _start_times[fn_name] - else: - print('Warning: timer for %s stopped before starting!' % fn_name) - - -def print_stats(): - """ Prints the current timing information into a table. """ - print() - - all_fn_names = [k for k in _total_times.keys() if k not in _disabled_names] - - max_name_width = max([len(k) for k in all_fn_names] + [4]) - if max_name_width % 2 == 1: max_name_width += 1 - format_str = ' {:>%d} | {:>10.4f} ' % max_name_width - - header = (' {:^%d} | {:^10} ' % max_name_width).format('Name', 'Time (ms)') - print(header) - - sep_idx = header.find('|') - sep_text = ('-' * sep_idx) + '+' + '-' * (len(header)-sep_idx-1) - print(sep_text) - - for name in all_fn_names: - print(format_str.format(name, _total_times[name]*1000)) - - print(sep_text) - print(format_str.format('Total', total_time()*1000)) - print() - -def total_time(): - """ Returns the total amount accumulated across all functions in seconds. """ - return sum([elapsed_time for name, elapsed_time in _total_times.items() if name not in _disabled_names]) - - -class env(): - """ - A class that lets you go: - with timer.env(fn_name): - # (...) - That automatically manages a timer start and stop for you. - """ - - def __init__(self, fn_name, use_stack=True): - self.fn_name = fn_name - self.use_stack = use_stack - - def __enter__(self): - start(self.fn_name, use_stack=self.use_stack) - - def __exit__(self, e, ev, t): - stop(self.fn_name, use_stack=self.use_stack) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import time +from collections import defaultdict + +_total_times = defaultdict(lambda: 0) +_start_times = defaultdict(lambda: -1) +_disabled_names = set() +_timer_stack = [] +_running_timer = None +_disable_all = False + +def disable_all(): + global _disable_all + _disable_all = True + +def enable_all(): + global _disable_all + _disable_all = False + +def disable(fn_name): + """ Disables the given function name fom being considered for the average or outputted in print_stats. """ + _disabled_names.add(fn_name) + +def enable(fn_name): + """ Enables function names disabled by disable. """ + _disabled_names.remove(fn_name) + +def reset(): + """ Resets the current timer. Call this at the start of an iteration. """ + global _running_timer + _total_times.clear() + _start_times.clear() + _timer_stack.clear() + _running_timer = None + +def start(fn_name, use_stack=True): + """ + Start timing the specific function. + Note: If use_stack is True, only one timer can be active at a time. + Once you stop this timer, the previous one will start again. + """ + global _running_timer, _disable_all + + if _disable_all: + return + + if use_stack: + if _running_timer is not None: + stop(_running_timer, use_stack=False) + _timer_stack.append(_running_timer) + start(fn_name, use_stack=False) + _running_timer = fn_name + else: + _start_times[fn_name] = time.perf_counter() + +def stop(fn_name=None, use_stack=True): + """ + If use_stack is True, this will stop the currently running timer and restore + the previous timer on the stack if that exists. Note if use_stack is True, + fn_name will be ignored. + + If use_stack is False, this will just stop timing the timer fn_name. + """ + global _running_timer, _disable_all + + if _disable_all: + return + + if use_stack: + if _running_timer is not None: + stop(_running_timer, use_stack=False) + if len(_timer_stack) > 0: + _running_timer = _timer_stack.pop() + start(_running_timer, use_stack=False) + else: + _running_timer = None + else: + print('Warning: timer stopped with no timer running!') + else: + if _start_times[fn_name] > -1: + _total_times[fn_name] += time.perf_counter() - _start_times[fn_name] + else: + print('Warning: timer for %s stopped before starting!' % fn_name) + + +def print_stats(): + """ Prints the current timing information into a table. """ + print() + + all_fn_names = [k for k in _total_times.keys() if k not in _disabled_names] + + max_name_width = max([len(k) for k in all_fn_names] + [4]) + if max_name_width % 2 == 1: max_name_width += 1 + format_str = ' {:>%d} | {:>10.4f} ' % max_name_width + + header = (' {:^%d} | {:^10} ' % max_name_width).format('Name', 'Time (ms)') + print(header) + + sep_idx = header.find('|') + sep_text = ('-' * sep_idx) + '+' + '-' * (len(header)-sep_idx-1) + print(sep_text) + + for name in all_fn_names: + print(format_str.format(name, _total_times[name]*1000)) + + print(sep_text) + print(format_str.format('Total', total_time()*1000)) + print() + +def total_time(): + """ Returns the total amount accumulated across all functions in seconds. """ + return sum([elapsed_time for name, elapsed_time in _total_times.items() if name not in _disabled_names]) + + +class env(): + """ + A class that lets you go: + with timer.env(fn_name): + # (...) + That automatically manages a timer start and stop for you. + """ + + def __init__(self, fn_name, use_stack=True): + self.fn_name = fn_name + self.use_stack = use_stack + + def __enter__(self): + start(self.fn_name, use_stack=self.use_stack) + + def __exit__(self, e, ev, t): + stop(self.fn_name, use_stack=self.use_stack) + diff --git a/PyTorch/contrib/cv/detection/YOLACT_plus/yolact.py b/PyTorch/contrib/cv/detection/YOLACT_plus/yolact.py index a59a15bdf4889c6e0841b243bf1ad7a6a7bf1ccd..59490549ed6f4eefee04cf947706bc1bc3eaf26c 100644 --- a/PyTorch/contrib/cv/detection/YOLACT_plus/yolact.py +++ b/PyTorch/contrib/cv/detection/YOLACT_plus/yolact.py @@ -1,739 +1,739 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch, torchvision -import torch.nn as nn -import torch.nn.functional as F -from torchvision.models.resnet import Bottleneck -import numpy as np -from itertools import product -from math import sqrt -from typing import List -from collections import defaultdict - -from data.config import cfg, mask_type -from layers import Detect -from layers.interpolate import InterpolateModule -from backbone import construct_backbone - -import torch.backends.cudnn as cudnn -from utils import timer -from utils.functions import MovingAverage, make_net - -# This is required for Pytorch 1.0.1 on Windows to initialize Cuda on some driver versions. -# See the bug report here: https://github.com/pytorch/pytorch/issues/17108 -#torch.cuda.current_device() - -# As of March 10, 2019, Pytorch DataParallel still doesn't support JIT Script Modules -use_jit = False -if not use_jit: - print('Multiple GPUs detected! Turning off JIT.') - -ScriptModuleWrapper = torch.jit.ScriptModule if use_jit else nn.Module -script_method_wrapper = torch.jit.script_method if use_jit else lambda fn, _rcn=None: fn - - - -class Concat(nn.Module): - def __init__(self, nets, extra_params): - super().__init__() - - self.nets = nn.ModuleList(nets) - self.extra_params = extra_params - - def forward(self, x): - # Concat each along the channel dimension - return torch.cat([net(x) for net in self.nets], dim=1, **self.extra_params) - -prior_cache = defaultdict(lambda: None) - -class PredictionModule(nn.Module): - """ - The (c) prediction module adapted from DSSD: - https://arxiv.org/pdf/1701.06659.pdf - - Note that this is slightly different to the module in the paper - because the Bottleneck block actually has a 3x3 convolution in - the middle instead of a 1x1 convolution. Though, I really can't - be arsed to implement it myself, and, who knows, this might be - better. - - Args: - - in_channels: The input feature size. - - out_channels: The output feature size (must be a multiple of 4). - - aspect_ratios: A list of lists of priorbox aspect ratios (one list per scale). - - scales: A list of priorbox scales relative to this layer's convsize. - For instance: If this layer has convouts of size 30x30 for - an image of size 600x600, the 'default' (scale - of 1) for this layer would produce bounding - boxes with an area of 20x20px. If the scale is - .5 on the other hand, this layer would consider - bounding boxes with area 10x10px, etc. - - parent: If parent is a PredictionModule, this module will use all the layers - from parent instead of from this module. - """ - - def __init__(self, in_channels, out_channels=1024, aspect_ratios=[[1]], scales=[1], parent=None, index=0): - super().__init__() - - self.num_classes = cfg.num_classes - self.mask_dim = cfg.mask_dim # Defined by Yolact - self.num_priors = sum(len(x)*len(scales) for x in aspect_ratios) - self.parent = [parent] # Don't include this in the state dict - self.index = index - self.num_heads = cfg.num_heads # Defined by Yolact - - if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb: - self.mask_dim = self.mask_dim // self.num_heads - - if cfg.mask_proto_prototypes_as_features: - in_channels += self.mask_dim - - if parent is None: - if cfg.extra_head_net is None: - out_channels = in_channels - else: - self.upfeature, out_channels = make_net(in_channels, cfg.extra_head_net) - - if cfg.use_prediction_module: - self.block = Bottleneck(out_channels, out_channels // 4) - self.conv = nn.Conv2d(out_channels, out_channels, kernel_size=1, bias=True) - self.bn = nn.BatchNorm2d(out_channels) - - self.bbox_layer = nn.Conv2d(out_channels, self.num_priors * 4, **cfg.head_layer_params) - self.conf_layer = nn.Conv2d(out_channels, self.num_priors * self.num_classes, **cfg.head_layer_params) - self.mask_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim, **cfg.head_layer_params) - - if cfg.use_mask_scoring: - self.score_layer = nn.Conv2d(out_channels, self.num_priors, **cfg.head_layer_params) - - if cfg.use_instance_coeff: - self.inst_layer = nn.Conv2d(out_channels, self.num_priors * cfg.num_instance_coeffs, **cfg.head_layer_params) - - # What is this ugly lambda doing in the middle of all this clean prediction module code? - def make_extra(num_layers): - if num_layers == 0: - return lambda x: x - else: - # Looks more complicated than it is. This just creates an array of num_layers alternating conv-relu - return nn.Sequential(*sum([[ - nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), - nn.ReLU(inplace=True) - ] for _ in range(num_layers)], [])) - - self.bbox_extra, self.conf_extra, self.mask_extra = [make_extra(x) for x in cfg.extra_layers] - - if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_coeff_gate: - self.gate_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim, kernel_size=3, padding=1) - - self.aspect_ratios = aspect_ratios - self.scales = scales - - self.priors = None - self.last_conv_size = None - self.last_img_size = None - - def forward(self, x): - """ - Args: - - x: The convOut from a layer in the backbone network - Size: [batch_size, in_channels, conv_h, conv_w]) - - Returns a tuple (bbox_coords, class_confs, mask_output, prior_boxes) with sizes - - bbox_coords: [batch_size, conv_h*conv_w*num_priors, 4] - - class_confs: [batch_size, conv_h*conv_w*num_priors, num_classes] - - mask_output: [batch_size, conv_h*conv_w*num_priors, mask_dim] - - prior_boxes: [conv_h*conv_w*num_priors, 4] - """ - # In case we want to use another module's layers - src = self if self.parent[0] is None else self.parent[0] - - conv_h = x.size(2) - conv_w = x.size(3) - - if cfg.extra_head_net is not None: - x = src.upfeature(x) - - if cfg.use_prediction_module: - # The two branches of PM design (c) - a = src.block(x) - - b = src.conv(x) - b = src.bn(b) - b = F.relu(b) - - # TODO: Possibly switch this out for a product - x = a + b - - bbox_x = src.bbox_extra(x) - conf_x = src.conf_extra(x) - mask_x = src.mask_extra(x) - - bbox = src.bbox_layer(bbox_x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, 4) - conf = src.conf_layer(conf_x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.num_classes) - - if cfg.eval_mask_branch: - mask = src.mask_layer(mask_x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.mask_dim) - else: - mask = torch.zeros(x.size(0), bbox.size(1), self.mask_dim, device=bbox.device) - - if cfg.use_mask_scoring: - score = src.score_layer(x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, 1) - - if cfg.use_instance_coeff: - inst = src.inst_layer(x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, cfg.num_instance_coeffs) - - # See box_utils.decode for an explanation of this - if cfg.use_yolo_regressors: - bbox[:, :, :2] = torch.sigmoid(bbox[:, :, :2]) - 0.5 - bbox[:, :, 0] /= conv_w - bbox[:, :, 1] /= conv_h - - if cfg.eval_mask_branch: - if cfg.mask_type == mask_type.direct: - mask = torch.sigmoid(mask) - elif cfg.mask_type == mask_type.lincomb: - mask = cfg.mask_proto_coeff_activation(mask) - - if cfg.mask_proto_coeff_gate: - gate = src.gate_layer(x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.mask_dim) - mask = mask * torch.sigmoid(gate) - - if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb: - mask = F.pad(mask, (self.index * self.mask_dim, (self.num_heads - self.index - 1) * self.mask_dim), mode='constant', value=0) - - priors = self.make_priors(conv_h, conv_w, x.device) - - preds = { 'loc': bbox, 'conf': conf, 'mask': mask, 'priors': priors } - - if cfg.use_mask_scoring: - preds['score'] = score - - if cfg.use_instance_coeff: - preds['inst'] = inst - - return preds - - def make_priors(self, conv_h, conv_w, device): - """ Note that priors are [x,y,width,height] where (x,y) is the center of the box. """ - global prior_cache - size = (conv_h, conv_w) - - with timer.env('makepriors'): - if self.last_img_size != (cfg._tmp_img_w, cfg._tmp_img_h): - prior_data = [] - - # Iteration order is important (it has to sync up with the convout) - for j, i in product(range(conv_h), range(conv_w)): - # +0.5 because priors are in center-size notation - x = (i + 0.5) / conv_w - y = (j + 0.5) / conv_h - - for ars in self.aspect_ratios: - for scale in self.scales: - for ar in ars: - if not cfg.backbone.preapply_sqrt: - ar = sqrt(ar) - - if cfg.backbone.use_pixel_scales: - w = scale * ar / cfg.max_size - h = scale / ar / cfg.max_size - else: - w = scale * ar / conv_w - h = scale / ar / conv_h - - # This is for backward compatability with a bug where I made everything square by accident - if cfg.backbone.use_square_anchors: - h = w - - prior_data += [x, y, w, h] - - self.priors = torch.Tensor(prior_data).to(device).view(-1, 4).detach() - self.priors.requires_grad = False - self.last_img_size = (cfg._tmp_img_w, cfg._tmp_img_h) - self.last_conv_size = (conv_w, conv_h) - prior_cache[size] = None - elif self.priors.device != device: - # This whole weird situation is so that DataParalell doesn't copy the priors each iteration - if prior_cache[size] is None: - prior_cache[size] = {} - - if device not in prior_cache[size]: - prior_cache[size][device] = self.priors.to(device) - - self.priors = prior_cache[size][device] - - return self.priors - -class FPN(ScriptModuleWrapper): - """ - Implements a general version of the FPN introduced in - https://arxiv.org/pdf/1612.03144.pdf - - Parameters (in cfg.fpn): - - num_features (int): The number of output features in the fpn layers. - - interpolation_mode (str): The mode to pass to F.interpolate. - - num_downsample (int): The number of downsampled layers to add onto the selected layers. - These extra layers are downsampled from the last selected layer. - - Args: - - in_channels (list): For each conv layer you supply in the forward pass, - how many features will it have? - """ - __constants__ = ['interpolation_mode', 'num_downsample', 'use_conv_downsample', 'relu_pred_layers', - 'lat_layers', 'pred_layers', 'downsample_layers', 'relu_downsample_layers'] - - def __init__(self, in_channels): - super().__init__() - - self.lat_layers = nn.ModuleList([ - nn.Conv2d(x, cfg.fpn.num_features, kernel_size=1) - for x in reversed(in_channels) - ]) - - # This is here for backwards compatability - padding = 1 if cfg.fpn.pad else 0 - self.pred_layers = nn.ModuleList([ - nn.Conv2d(cfg.fpn.num_features, cfg.fpn.num_features, kernel_size=3, padding=padding) - for _ in in_channels - ]) - - if cfg.fpn.use_conv_downsample: - self.downsample_layers = nn.ModuleList([ - nn.Conv2d(cfg.fpn.num_features, cfg.fpn.num_features, kernel_size=3, padding=1, stride=2) - for _ in range(cfg.fpn.num_downsample) - ]) - - self.interpolation_mode = cfg.fpn.interpolation_mode - self.num_downsample = cfg.fpn.num_downsample - self.use_conv_downsample = cfg.fpn.use_conv_downsample - self.relu_downsample_layers = cfg.fpn.relu_downsample_layers - self.relu_pred_layers = cfg.fpn.relu_pred_layers - - @script_method_wrapper - def forward(self, convouts:List[torch.Tensor]): - """ - Args: - - convouts (list): A list of convouts for the corresponding layers in in_channels. - Returns: - - A list of FPN convouts in the same order as x with extra downsample layers if requested. - """ - - out = [] - x = torch.zeros(1, device=convouts[0].device) - for i in range(len(convouts)): - out.append(x) - - # For backward compatability, the conv layers are stored in reverse but the input and output is - # given in the correct order. Thus, use j=-i-1 for the input and output and i for the conv layers. - j = len(convouts) - for lat_layer in self.lat_layers: - j -= 1 - - if j < len(convouts) - 1: - _, _, h, w = convouts[j].size() - x = F.interpolate(x, size=(h, w), mode=self.interpolation_mode, align_corners=False) - - x = x + lat_layer(convouts[j]) - out[j] = x - - # This janky second loop is here because TorchScript. - j = len(convouts) - for pred_layer in self.pred_layers: - j -= 1 - out[j] = pred_layer(out[j]) - - if self.relu_pred_layers: - F.relu(out[j], inplace=True) - - cur_idx = len(out) - - # In the original paper, this takes care of P6 - if self.use_conv_downsample: - for downsample_layer in self.downsample_layers: - out.append(downsample_layer(out[-1])) - else: - for idx in range(self.num_downsample): - # Note: this is an untested alternative to out.append(out[-1][:, :, ::2, ::2]). Thanks TorchScript. - out.append(nn.functional.max_pool2d(out[-1], 1, stride=2)) - - if self.relu_downsample_layers: - for idx in range(len(out) - cur_idx): - out[idx] = F.relu(out[idx + cur_idx], inplace=False) - - return out - -class FastMaskIoUNet(ScriptModuleWrapper): - - def __init__(self): - super().__init__() - input_channels = 1 - last_layer = [(cfg.num_classes-1, 1, {})] - self.maskiou_net, _ = make_net(input_channels, cfg.maskiou_net + last_layer, include_last_relu=True) - - def forward(self, x): - x = self.maskiou_net(x) - maskiou_p = F.max_pool2d(x, kernel_size=x.size()[2:]).squeeze(-1).squeeze(-1) - - return maskiou_p - - - -class Yolact(nn.Module): - """ - - - ██╗ ██╗ ██████╗ ██╗ █████╗ ██████╗████████╗ - ╚██╗ ██╔╝██╔═══██╗██║ ██╔══██╗██╔════╝╚══██╔══╝ - ╚████╔╝ ██║ ██║██║ ███████║██║ ██║ - ╚██╔╝ ██║ ██║██║ ██╔══██║██║ ██║ - ██║ ╚██████╔╝███████╗██║ ██║╚██████╗ ██║ - ╚═╝ ╚═════╝ ╚══════╝╚═╝ ╚═╝ ╚═════╝ ╚═╝ - - - You can set the arguments by changing them in the backbone config object in config.py. - - Parameters (in cfg.backbone): - - selected_layers: The indices of the conv layers to use for prediction. - - pred_scales: A list with len(selected_layers) containing tuples of scales (see PredictionModule) - - pred_aspect_ratios: A list of lists of aspect ratios with len(selected_layers) (see PredictionModule) - """ - - def __init__(self): - super().__init__() - - self.backbone = construct_backbone(cfg.backbone) #backbone: resnetbackbone. backbone_modules:{list:104}. bn1:{BatchNorm2d} - - if cfg.freeze_bn: # it's true - self.freeze_bn() - - # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early! - if cfg.mask_type == mask_type.direct: - cfg.mask_dim = cfg.mask_size**2 - elif cfg.mask_type == mask_type.lincomb: # the module will execute this branch - if cfg.mask_proto_use_grid: - self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file)) - self.num_grids = self.grid.size(0) - else: # the module will execute this branch - self.num_grids = 0 - - self.proto_src = cfg.mask_proto_src - - if self.proto_src is None: in_channels = 3 - elif cfg.fpn is not None: in_channels = cfg.fpn.num_features - else: in_channels = self.backbone.channels[self.proto_src] - in_channels += self.num_grids - #in_channels will be 256 - # The include_last_relu=false here is because we might want to change it to another function - self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) - - if cfg.mask_proto_bias: - cfg.mask_dim += 1 - - - self.selected_layers = cfg.backbone.selected_layers - src_channels = self.backbone.channels - - if cfg.use_maskiou: #false - self.maskiou_net = FastMaskIoUNet() - - if cfg.fpn is not None: #true - # Some hacky rewiring to accomodate the FPN - self.fpn = FPN([src_channels[i] for i in self.selected_layers]) - self.selected_layers = list(range(len(self.selected_layers) + cfg.fpn.num_downsample)) - src_channels = [cfg.fpn.num_features] * len(self.selected_layers) - - - self.prediction_layers = nn.ModuleList() - cfg.num_heads = len(self.selected_layers) - - for idx, layer_idx in enumerate(self.selected_layers): - # If we're sharing prediction module weights, have every module's parent be the first one - parent = None - if cfg.share_prediction_module and idx > 0: #cfg.share_prediction_module is True - parent = self.prediction_layers[0] - - pred = PredictionModule(src_channels[layer_idx], src_channels[layer_idx], - aspect_ratios = cfg.backbone.pred_aspect_ratios[idx], - scales = cfg.backbone.pred_scales[idx], - parent = parent, - index = idx) - self.prediction_layers.append(pred) - - # Extra parameters for the extra losses - if cfg.use_class_existence_loss: #false - # This comes from the smallest layer selected - # Also note that cfg.num_classes includes background - self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) - - if cfg.use_semantic_segmentation_loss: #true - self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes-1, kernel_size=1) - - # For use in evaluation - self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=cfg.nms_top_k, - conf_thresh=cfg.nms_conf_thresh, nms_thresh=cfg.nms_thresh) - - def save_weights(self, path): - """ Saves the model's weights using compression because the file sizes were getting too big. """ - torch.save(self.state_dict(), path) - - def load_weights(self, path): - """ Loads weights from a compressed save file. """ - state_dict = torch.load(path, map_location=torch.device('cpu')) - - # For backward compatability, remove these (the new variable is called layers) - for key in list(state_dict.keys()): - if key.startswith('backbone.layer') and not key.startswith('backbone.layers'): - del state_dict[key] - - # Also for backward compatibility with v1.0 weights, do this check - if key.startswith('fpn.downsample_layers.'): - if cfg.fpn is not None and int(key.split('.')[2]) >= cfg.fpn.num_downsample: - del state_dict[key] - self.load_state_dict(state_dict) - - def init_weights(self, backbone_path): - """ Initialize weights for training. """ - # Initialize the backbone with the pretrained weights. - self.backbone.init_backbone(backbone_path) - - conv_constants = getattr(nn.Conv2d(1, 1, 1), '__constants__') - - # Quick lambda to test if one list contains the other - def all_in(x, y): - for _x in x: - if _x not in y: - return False - return True - - # Initialize the rest of the conv layers with xavier - for name, module in self.named_modules(): - # See issue #127 for why we need such a complicated condition if the module is a WeakScriptModuleProxy - # Broke in 1.3 (see issue #175), WeakScriptModuleProxy was turned into just ScriptModule. - # Broke in 1.4 (see issue #292), where RecursiveScriptModule is the new star of the show. - # Note that this might break with future pytorch updates, so let me know if it does - is_script_conv = False - if 'Script' in type(module).__name__: - # 1.4 workaround: now there's an original_name member so just use that - if hasattr(module, 'original_name'): - is_script_conv = 'Conv' in module.original_name - # 1.3 workaround: check if this has the same constants as a conv module - else: - is_script_conv = ( - all_in(module.__dict__['_constants_set'], conv_constants) - and all_in(conv_constants, module.__dict__['_constants_set'])) - - is_conv_layer = isinstance(module, nn.Conv2d) or is_script_conv - - if is_conv_layer and module not in self.backbone.backbone_modules: - nn.init.xavier_uniform_(module.weight.data) - - if module.bias is not None: - if cfg.use_focal_loss and 'conf_layer' in name: - if not cfg.use_sigmoid_focal_loss: - # Initialize the last layer as in the focal loss paper. - # Because we use softmax and not sigmoid, I had to derive an alternate expression - # on a notecard. Define pi to be the probability of outputting a foreground detection. - # Then let z = sum(exp(x)) - exp(x_0). Finally let c be the number of foreground classes. - # Chugging through the math, this gives us - # x_0 = log(z * (1 - pi) / pi) where 0 is the background class - # x_i = log(z / c) for all i > 0 - # For simplicity (and because we have a degree of freedom here), set z = 1. Then we have - # x_0 = log((1 - pi) / pi) note: don't split up the log for numerical stability - # x_i = -log(c) for all i > 0 - module.bias.data[0] = np.log((1 - cfg.focal_loss_init_pi) / cfg.focal_loss_init_pi) - module.bias.data[1:] = -np.log(module.bias.size(0) - 1) - else: - module.bias.data[0] = -np.log(cfg.focal_loss_init_pi / (1 - cfg.focal_loss_init_pi)) - module.bias.data[1:] = -np.log((1 - cfg.focal_loss_init_pi) / cfg.focal_loss_init_pi) - else: - module.bias.data.zero_() - - def train(self, mode=True): - super().train(mode) - - if cfg.freeze_bn: - self.freeze_bn() - - def freeze_bn(self, enable=False): - """ Adapted from https://discuss.pytorch.org/t/how-to-train-with-frozen-batchnorm/12106/8 """ - for module in self.modules(): - if isinstance(module, nn.BatchNorm2d): - module.train() if enable else module.eval() - - module.weight.requires_grad = enable - module.bias.requires_grad = enable - - def forward(self, x): - """ The input should be of size [batch_size, 3, img_h, img_w] """ - _, _, img_h, img_w = x.size() - cfg._tmp_img_h = img_h - cfg._tmp_img_w = img_w - - with timer.env('backbone'): - outs = self.backbone(x) - - if cfg.fpn is not None: - with timer.env('fpn'): - # Use backbone.selected_layers because we overwrote self.selected_layers - outs = [outs[i] for i in cfg.backbone.selected_layers] - outs = self.fpn(outs) - - proto_out = None - if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch: - with timer.env('proto'): - proto_x = x if self.proto_src is None else outs[self.proto_src] - - if self.num_grids > 0: - grids = self.grid.repeat(proto_x.size(0), 1, 1, 1) - proto_x = torch.cat([proto_x, grids], dim=1) - - proto_out = self.proto_net(proto_x) - proto_out = cfg.mask_proto_prototype_activation(proto_out) - - if cfg.mask_proto_prototypes_as_features: - # Clone here because we don't want to permute this, though idk if contiguous makes this unnecessary - proto_downsampled = proto_out.clone() - - if cfg.mask_proto_prototypes_as_features_no_grad: - proto_downsampled = proto_out.detach() - - # Move the features last so the multiplication is easy - proto_out = proto_out.permute(0, 2, 3, 1).contiguous() - - if cfg.mask_proto_bias: - bias_shape = [x for x in proto_out.size()] - bias_shape[-1] = 1 - proto_out = torch.cat([proto_out, torch.ones(*bias_shape)], -1) - - - with timer.env('pred_heads'): - pred_outs = { 'loc': [], 'conf': [], 'mask': [], 'priors': [] } - - if cfg.use_mask_scoring: - pred_outs['score'] = [] - - if cfg.use_instance_coeff: - pred_outs['inst'] = [] - - for idx, pred_layer in zip(self.selected_layers, self.prediction_layers): - pred_x = outs[idx] - - if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_prototypes_as_features: - # Scale the prototypes down to the current prediction layer's size and add it as inputs - proto_downsampled = F.interpolate(proto_downsampled, size=outs[idx].size()[2:], mode='bilinear', align_corners=False) - pred_x = torch.cat([pred_x, proto_downsampled], dim=1) - - # A hack for the way dataparallel works - if cfg.share_prediction_module and pred_layer is not self.prediction_layers[0]: - pred_layer.parent = [self.prediction_layers[0]] - - p = pred_layer(pred_x) - - for k, v in p.items(): - pred_outs[k].append(v) - - for k, v in pred_outs.items(): - pred_outs[k] = torch.cat(v, -2) - - if proto_out is not None: - pred_outs['proto'] = proto_out - - if self.training: - # For the extra loss functions - if cfg.use_class_existence_loss: - pred_outs['classes'] = self.class_existence_fc(outs[-1].mean(dim=(2, 3))) - - if cfg.use_semantic_segmentation_loss: - pred_outs['segm'] = self.semantic_seg_conv(outs[0]) - - return pred_outs - else: - if cfg.use_mask_scoring: - pred_outs['score'] = torch.sigmoid(pred_outs['score']) - - if cfg.use_focal_loss: - if cfg.use_sigmoid_focal_loss: - # Note: even though conf[0] exists, this mode doesn't train it so don't use it - pred_outs['conf'] = torch.sigmoid(pred_outs['conf']) - if cfg.use_mask_scoring: - pred_outs['conf'] *= pred_outs['score'] - elif cfg.use_objectness_score: - # See focal_loss_sigmoid in multibox_loss.py for details - objectness = torch.sigmoid(pred_outs['conf'][:, :, 0]) - pred_outs['conf'][:, :, 1:] = objectness[:, :, None] * F.softmax(pred_outs['conf'][:, :, 1:], -1) - pred_outs['conf'][:, :, 0 ] = 1 - objectness - else: - pred_outs['conf'] = F.softmax(pred_outs['conf'], -1) - else: - - if cfg.use_objectness_score: - objectness = torch.sigmoid(pred_outs['conf'][:, :, 0]) - - pred_outs['conf'][:, :, 1:] = (objectness > 0.10)[..., None] \ - * F.softmax(pred_outs['conf'][:, :, 1:], dim=-1) - - else: - pred_outs['conf'] = F.softmax(pred_outs['conf'], -1) - for i in pred_outs.keys(): - pred_outs[i] = pred_outs[i].cpu() - return self.detect(pred_outs) - - - - -# Some testing code -if __name__ == '__main__': - from utils.functions import init_console - init_console() - - # Use the first argument to set the config if you want - import sys - if len(sys.argv) > 1: - from data.config import set_cfg - set_cfg(sys.argv[1]) - - net = Yolact() - net.train() - net.init_weights(backbone_path='weights/' + cfg.backbone.path) - - # GPU - net = net.cuda() - torch.set_default_tensor_type('torch.cuda.FloatTensor') - - x = torch.zeros((1, 3, cfg.max_size, cfg.max_size)) - y = net(x) - - for p in net.prediction_layers: - print(p.last_conv_size) - - print() - for k, a in y.items(): - print(k + ': ', a.size(), torch.sum(a)) - exit() - - net(x) - # timer.disable('pass2') - avg = MovingAverage() - try: - while True: - timer.reset() - with timer.env('everything else'): - net(x) - avg.add(timer.total_time()) - print('\033[2J') # Moves console cursor to 0,0 - timer.print_stats() - print('Avg fps: %.2f\tAvg ms: %.2f ' % (1/avg.get_avg(), avg.get_avg()*1000)) - except KeyboardInterrupt: - pass +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch, torchvision +import torch.nn as nn +import torch.nn.functional as F +from torchvision.models.resnet import Bottleneck +import numpy as np +from itertools import product +from math import sqrt +from typing import List +from collections import defaultdict + +from data.config import cfg, mask_type +from layers import Detect +from layers.interpolate import InterpolateModule +from backbone import construct_backbone + +import torch.backends.cudnn as cudnn +from utils import timer +from utils.functions import MovingAverage, make_net + +# This is required for Pytorch 1.0.1 on Windows to initialize Cuda on some driver versions. +# See the bug report here: https://github.com/pytorch/pytorch/issues/17108 +#torch.cuda.current_device() + +# As of March 10, 2019, Pytorch DataParallel still doesn't support JIT Script Modules +use_jit = False +if not use_jit: + print('Multiple GPUs detected! Turning off JIT.') + +ScriptModuleWrapper = torch.jit.ScriptModule if use_jit else nn.Module +script_method_wrapper = torch.jit.script_method if use_jit else lambda fn, _rcn=None: fn + + + +class Concat(nn.Module): + def __init__(self, nets, extra_params): + super().__init__() + + self.nets = nn.ModuleList(nets) + self.extra_params = extra_params + + def forward(self, x): + # Concat each along the channel dimension + return torch.cat([net(x) for net in self.nets], dim=1, **self.extra_params) + +prior_cache = defaultdict(lambda: None) + +class PredictionModule(nn.Module): + """ + The (c) prediction module adapted from DSSD: + https://arxiv.org/pdf/1701.06659.pdf + + Note that this is slightly different to the module in the paper + because the Bottleneck block actually has a 3x3 convolution in + the middle instead of a 1x1 convolution. Though, I really can't + be arsed to implement it myself, and, who knows, this might be + better. + + Args: + - in_channels: The input feature size. + - out_channels: The output feature size (must be a multiple of 4). + - aspect_ratios: A list of lists of priorbox aspect ratios (one list per scale). + - scales: A list of priorbox scales relative to this layer's convsize. + For instance: If this layer has convouts of size 30x30 for + an image of size 600x600, the 'default' (scale + of 1) for this layer would produce bounding + boxes with an area of 20x20px. If the scale is + .5 on the other hand, this layer would consider + bounding boxes with area 10x10px, etc. + - parent: If parent is a PredictionModule, this module will use all the layers + from parent instead of from this module. + """ + + def __init__(self, in_channels, out_channels=1024, aspect_ratios=[[1]], scales=[1], parent=None, index=0): + super().__init__() + + self.num_classes = cfg.num_classes + self.mask_dim = cfg.mask_dim # Defined by Yolact + self.num_priors = sum(len(x)*len(scales) for x in aspect_ratios) + self.parent = [parent] # Don't include this in the state dict + self.index = index + self.num_heads = cfg.num_heads # Defined by Yolact + + if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb: + self.mask_dim = self.mask_dim // self.num_heads + + if cfg.mask_proto_prototypes_as_features: + in_channels += self.mask_dim + + if parent is None: + if cfg.extra_head_net is None: + out_channels = in_channels + else: + self.upfeature, out_channels = make_net(in_channels, cfg.extra_head_net) + + if cfg.use_prediction_module: + self.block = Bottleneck(out_channels, out_channels // 4) + self.conv = nn.Conv2d(out_channels, out_channels, kernel_size=1, bias=True) + self.bn = nn.BatchNorm2d(out_channels) + + self.bbox_layer = nn.Conv2d(out_channels, self.num_priors * 4, **cfg.head_layer_params) + self.conf_layer = nn.Conv2d(out_channels, self.num_priors * self.num_classes, **cfg.head_layer_params) + self.mask_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim, **cfg.head_layer_params) + + if cfg.use_mask_scoring: + self.score_layer = nn.Conv2d(out_channels, self.num_priors, **cfg.head_layer_params) + + if cfg.use_instance_coeff: + self.inst_layer = nn.Conv2d(out_channels, self.num_priors * cfg.num_instance_coeffs, **cfg.head_layer_params) + + # What is this ugly lambda doing in the middle of all this clean prediction module code? + def make_extra(num_layers): + if num_layers == 0: + return lambda x: x + else: + # Looks more complicated than it is. This just creates an array of num_layers alternating conv-relu + return nn.Sequential(*sum([[ + nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), + nn.ReLU(inplace=True) + ] for _ in range(num_layers)], [])) + + self.bbox_extra, self.conf_extra, self.mask_extra = [make_extra(x) for x in cfg.extra_layers] + + if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_coeff_gate: + self.gate_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim, kernel_size=3, padding=1) + + self.aspect_ratios = aspect_ratios + self.scales = scales + + self.priors = None + self.last_conv_size = None + self.last_img_size = None + + def forward(self, x): + """ + Args: + - x: The convOut from a layer in the backbone network + Size: [batch_size, in_channels, conv_h, conv_w]) + + Returns a tuple (bbox_coords, class_confs, mask_output, prior_boxes) with sizes + - bbox_coords: [batch_size, conv_h*conv_w*num_priors, 4] + - class_confs: [batch_size, conv_h*conv_w*num_priors, num_classes] + - mask_output: [batch_size, conv_h*conv_w*num_priors, mask_dim] + - prior_boxes: [conv_h*conv_w*num_priors, 4] + """ + # In case we want to use another module's layers + src = self if self.parent[0] is None else self.parent[0] + + conv_h = x.size(2) + conv_w = x.size(3) + + if cfg.extra_head_net is not None: + x = src.upfeature(x) + + if cfg.use_prediction_module: + # The two branches of PM design (c) + a = src.block(x) + + b = src.conv(x) + b = src.bn(b) + b = F.relu(b) + + # TODO: Possibly switch this out for a product + x = a + b + + bbox_x = src.bbox_extra(x) + conf_x = src.conf_extra(x) + mask_x = src.mask_extra(x) + + bbox = src.bbox_layer(bbox_x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, 4) + conf = src.conf_layer(conf_x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.num_classes) + + if cfg.eval_mask_branch: + mask = src.mask_layer(mask_x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.mask_dim) + else: + mask = torch.zeros(x.size(0), bbox.size(1), self.mask_dim, device=bbox.device) + + if cfg.use_mask_scoring: + score = src.score_layer(x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, 1) + + if cfg.use_instance_coeff: + inst = src.inst_layer(x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, cfg.num_instance_coeffs) + + # See box_utils.decode for an explanation of this + if cfg.use_yolo_regressors: + bbox[:, :, :2] = torch.sigmoid(bbox[:, :, :2]) - 0.5 + bbox[:, :, 0] /= conv_w + bbox[:, :, 1] /= conv_h + + if cfg.eval_mask_branch: + if cfg.mask_type == mask_type.direct: + mask = torch.sigmoid(mask) + elif cfg.mask_type == mask_type.lincomb: + mask = cfg.mask_proto_coeff_activation(mask) + + if cfg.mask_proto_coeff_gate: + gate = src.gate_layer(x).permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.mask_dim) + mask = mask * torch.sigmoid(gate) + + if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb: + mask = F.pad(mask, (self.index * self.mask_dim, (self.num_heads - self.index - 1) * self.mask_dim), mode='constant', value=0) + + priors = self.make_priors(conv_h, conv_w, x.device) + + preds = { 'loc': bbox, 'conf': conf, 'mask': mask, 'priors': priors } + + if cfg.use_mask_scoring: + preds['score'] = score + + if cfg.use_instance_coeff: + preds['inst'] = inst + + return preds + + def make_priors(self, conv_h, conv_w, device): + """ Note that priors are [x,y,width,height] where (x,y) is the center of the box. """ + global prior_cache + size = (conv_h, conv_w) + + with timer.env('makepriors'): + if self.last_img_size != (cfg._tmp_img_w, cfg._tmp_img_h): + prior_data = [] + + # Iteration order is important (it has to sync up with the convout) + for j, i in product(range(conv_h), range(conv_w)): + # +0.5 because priors are in center-size notation + x = (i + 0.5) / conv_w + y = (j + 0.5) / conv_h + + for ars in self.aspect_ratios: + for scale in self.scales: + for ar in ars: + if not cfg.backbone.preapply_sqrt: + ar = sqrt(ar) + + if cfg.backbone.use_pixel_scales: + w = scale * ar / cfg.max_size + h = scale / ar / cfg.max_size + else: + w = scale * ar / conv_w + h = scale / ar / conv_h + + # This is for backward compatability with a bug where I made everything square by accident + if cfg.backbone.use_square_anchors: + h = w + + prior_data += [x, y, w, h] + + self.priors = torch.Tensor(prior_data).to(device).view(-1, 4).detach() + self.priors.requires_grad = False + self.last_img_size = (cfg._tmp_img_w, cfg._tmp_img_h) + self.last_conv_size = (conv_w, conv_h) + prior_cache[size] = None + elif self.priors.device != device: + # This whole weird situation is so that DataParalell doesn't copy the priors each iteration + if prior_cache[size] is None: + prior_cache[size] = {} + + if device not in prior_cache[size]: + prior_cache[size][device] = self.priors.to(device) + + self.priors = prior_cache[size][device] + + return self.priors + +class FPN(ScriptModuleWrapper): + """ + Implements a general version of the FPN introduced in + https://arxiv.org/pdf/1612.03144.pdf + + Parameters (in cfg.fpn): + - num_features (int): The number of output features in the fpn layers. + - interpolation_mode (str): The mode to pass to F.interpolate. + - num_downsample (int): The number of downsampled layers to add onto the selected layers. + These extra layers are downsampled from the last selected layer. + + Args: + - in_channels (list): For each conv layer you supply in the forward pass, + how many features will it have? + """ + __constants__ = ['interpolation_mode', 'num_downsample', 'use_conv_downsample', 'relu_pred_layers', + 'lat_layers', 'pred_layers', 'downsample_layers', 'relu_downsample_layers'] + + def __init__(self, in_channels): + super().__init__() + + self.lat_layers = nn.ModuleList([ + nn.Conv2d(x, cfg.fpn.num_features, kernel_size=1) + for x in reversed(in_channels) + ]) + + # This is here for backwards compatability + padding = 1 if cfg.fpn.pad else 0 + self.pred_layers = nn.ModuleList([ + nn.Conv2d(cfg.fpn.num_features, cfg.fpn.num_features, kernel_size=3, padding=padding) + for _ in in_channels + ]) + + if cfg.fpn.use_conv_downsample: + self.downsample_layers = nn.ModuleList([ + nn.Conv2d(cfg.fpn.num_features, cfg.fpn.num_features, kernel_size=3, padding=1, stride=2) + for _ in range(cfg.fpn.num_downsample) + ]) + + self.interpolation_mode = cfg.fpn.interpolation_mode + self.num_downsample = cfg.fpn.num_downsample + self.use_conv_downsample = cfg.fpn.use_conv_downsample + self.relu_downsample_layers = cfg.fpn.relu_downsample_layers + self.relu_pred_layers = cfg.fpn.relu_pred_layers + + @script_method_wrapper + def forward(self, convouts:List[torch.Tensor]): + """ + Args: + - convouts (list): A list of convouts for the corresponding layers in in_channels. + Returns: + - A list of FPN convouts in the same order as x with extra downsample layers if requested. + """ + + out = [] + x = torch.zeros(1, device=convouts[0].device) + for i in range(len(convouts)): + out.append(x) + + # For backward compatability, the conv layers are stored in reverse but the input and output is + # given in the correct order. Thus, use j=-i-1 for the input and output and i for the conv layers. + j = len(convouts) + for lat_layer in self.lat_layers: + j -= 1 + + if j < len(convouts) - 1: + _, _, h, w = convouts[j].size() + x = F.interpolate(x, size=(h, w), mode=self.interpolation_mode, align_corners=False) + + x = x + lat_layer(convouts[j]) + out[j] = x + + # This janky second loop is here because TorchScript. + j = len(convouts) + for pred_layer in self.pred_layers: + j -= 1 + out[j] = pred_layer(out[j]) + + if self.relu_pred_layers: + F.relu(out[j], inplace=True) + + cur_idx = len(out) + + # In the original paper, this takes care of P6 + if self.use_conv_downsample: + for downsample_layer in self.downsample_layers: + out.append(downsample_layer(out[-1])) + else: + for idx in range(self.num_downsample): + # Note: this is an untested alternative to out.append(out[-1][:, :, ::2, ::2]). Thanks TorchScript. + out.append(nn.functional.max_pool2d(out[-1], 1, stride=2)) + + if self.relu_downsample_layers: + for idx in range(len(out) - cur_idx): + out[idx] = F.relu(out[idx + cur_idx], inplace=False) + + return out + +class FastMaskIoUNet(ScriptModuleWrapper): + + def __init__(self): + super().__init__() + input_channels = 1 + last_layer = [(cfg.num_classes-1, 1, {})] + self.maskiou_net, _ = make_net(input_channels, cfg.maskiou_net + last_layer, include_last_relu=True) + + def forward(self, x): + x = self.maskiou_net(x) + maskiou_p = F.max_pool2d(x, kernel_size=x.size()[2:]).squeeze(-1).squeeze(-1) + + return maskiou_p + + + +class Yolact(nn.Module): + """ + + + ██╗ ██╗ ██████╗ ██╗ █████╗ ██████╗████████╗ + ╚██╗ ██╔╝██╔═══██╗██║ ██╔══██╗██╔════╝╚══██╔══╝ + ╚████╔╝ ██║ ██║██║ ███████║██║ ██║ + ╚██╔╝ ██║ ██║██║ ██╔══██║██║ ██║ + ██║ ╚██████╔╝███████╗██║ ██║╚██████╗ ██║ + ╚═╝ ╚═════╝ ╚══════╝╚═╝ ╚═╝ ╚═════╝ ╚═╝ + + + You can set the arguments by changing them in the backbone config object in config.py. + + Parameters (in cfg.backbone): + - selected_layers: The indices of the conv layers to use for prediction. + - pred_scales: A list with len(selected_layers) containing tuples of scales (see PredictionModule) + - pred_aspect_ratios: A list of lists of aspect ratios with len(selected_layers) (see PredictionModule) + """ + + def __init__(self): + super().__init__() + + self.backbone = construct_backbone(cfg.backbone) #backbone: resnetbackbone. backbone_modules:{list:104}. bn1:{BatchNorm2d} + + if cfg.freeze_bn: # it's true + self.freeze_bn() + + # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early! + if cfg.mask_type == mask_type.direct: + cfg.mask_dim = cfg.mask_size**2 + elif cfg.mask_type == mask_type.lincomb: # the module will execute this branch + if cfg.mask_proto_use_grid: + self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file)) + self.num_grids = self.grid.size(0) + else: # the module will execute this branch + self.num_grids = 0 + + self.proto_src = cfg.mask_proto_src + + if self.proto_src is None: in_channels = 3 + elif cfg.fpn is not None: in_channels = cfg.fpn.num_features + else: in_channels = self.backbone.channels[self.proto_src] + in_channels += self.num_grids + #in_channels will be 256 + # The include_last_relu=false here is because we might want to change it to another function + self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) + + if cfg.mask_proto_bias: + cfg.mask_dim += 1 + + + self.selected_layers = cfg.backbone.selected_layers + src_channels = self.backbone.channels + + if cfg.use_maskiou: #false + self.maskiou_net = FastMaskIoUNet() + + if cfg.fpn is not None: #true + # Some hacky rewiring to accomodate the FPN + self.fpn = FPN([src_channels[i] for i in self.selected_layers]) + self.selected_layers = list(range(len(self.selected_layers) + cfg.fpn.num_downsample)) + src_channels = [cfg.fpn.num_features] * len(self.selected_layers) + + + self.prediction_layers = nn.ModuleList() + cfg.num_heads = len(self.selected_layers) + + for idx, layer_idx in enumerate(self.selected_layers): + # If we're sharing prediction module weights, have every module's parent be the first one + parent = None + if cfg.share_prediction_module and idx > 0: #cfg.share_prediction_module is True + parent = self.prediction_layers[0] + + pred = PredictionModule(src_channels[layer_idx], src_channels[layer_idx], + aspect_ratios = cfg.backbone.pred_aspect_ratios[idx], + scales = cfg.backbone.pred_scales[idx], + parent = parent, + index = idx) + self.prediction_layers.append(pred) + + # Extra parameters for the extra losses + if cfg.use_class_existence_loss: #false + # This comes from the smallest layer selected + # Also note that cfg.num_classes includes background + self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) + + if cfg.use_semantic_segmentation_loss: #true + self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes-1, kernel_size=1) + + # For use in evaluation + self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=cfg.nms_top_k, + conf_thresh=cfg.nms_conf_thresh, nms_thresh=cfg.nms_thresh) + + def save_weights(self, path): + """ Saves the model's weights using compression because the file sizes were getting too big. """ + torch.save(self.state_dict(), path) + + def load_weights(self, path): + """ Loads weights from a compressed save file. """ + state_dict = torch.load(path, map_location=torch.device('cpu')) + + # For backward compatability, remove these (the new variable is called layers) + for key in list(state_dict.keys()): + if key.startswith('backbone.layer') and not key.startswith('backbone.layers'): + del state_dict[key] + + # Also for backward compatibility with v1.0 weights, do this check + if key.startswith('fpn.downsample_layers.'): + if cfg.fpn is not None and int(key.split('.')[2]) >= cfg.fpn.num_downsample: + del state_dict[key] + self.load_state_dict(state_dict) + + def init_weights(self, backbone_path): + """ Initialize weights for training. """ + # Initialize the backbone with the pretrained weights. + self.backbone.init_backbone(backbone_path) + + conv_constants = getattr(nn.Conv2d(1, 1, 1), '__constants__') + + # Quick lambda to test if one list contains the other + def all_in(x, y): + for _x in x: + if _x not in y: + return False + return True + + # Initialize the rest of the conv layers with xavier + for name, module in self.named_modules(): + # See issue #127 for why we need such a complicated condition if the module is a WeakScriptModuleProxy + # Broke in 1.3 (see issue #175), WeakScriptModuleProxy was turned into just ScriptModule. + # Broke in 1.4 (see issue #292), where RecursiveScriptModule is the new star of the show. + # Note that this might break with future pytorch updates, so let me know if it does + is_script_conv = False + if 'Script' in type(module).__name__: + # 1.4 workaround: now there's an original_name member so just use that + if hasattr(module, 'original_name'): + is_script_conv = 'Conv' in module.original_name + # 1.3 workaround: check if this has the same constants as a conv module + else: + is_script_conv = ( + all_in(module.__dict__['_constants_set'], conv_constants) + and all_in(conv_constants, module.__dict__['_constants_set'])) + + is_conv_layer = isinstance(module, nn.Conv2d) or is_script_conv + + if is_conv_layer and module not in self.backbone.backbone_modules: + nn.init.xavier_uniform_(module.weight.data) + + if module.bias is not None: + if cfg.use_focal_loss and 'conf_layer' in name: + if not cfg.use_sigmoid_focal_loss: + # Initialize the last layer as in the focal loss paper. + # Because we use softmax and not sigmoid, I had to derive an alternate expression + # on a notecard. Define pi to be the probability of outputting a foreground detection. + # Then let z = sum(exp(x)) - exp(x_0). Finally let c be the number of foreground classes. + # Chugging through the math, this gives us + # x_0 = log(z * (1 - pi) / pi) where 0 is the background class + # x_i = log(z / c) for all i > 0 + # For simplicity (and because we have a degree of freedom here), set z = 1. Then we have + # x_0 = log((1 - pi) / pi) note: don't split up the log for numerical stability + # x_i = -log(c) for all i > 0 + module.bias.data[0] = np.log((1 - cfg.focal_loss_init_pi) / cfg.focal_loss_init_pi) + module.bias.data[1:] = -np.log(module.bias.size(0) - 1) + else: + module.bias.data[0] = -np.log(cfg.focal_loss_init_pi / (1 - cfg.focal_loss_init_pi)) + module.bias.data[1:] = -np.log((1 - cfg.focal_loss_init_pi) / cfg.focal_loss_init_pi) + else: + module.bias.data.zero_() + + def train(self, mode=True): + super().train(mode) + + if cfg.freeze_bn: + self.freeze_bn() + + def freeze_bn(self, enable=False): + """ Adapted from https://discuss.pytorch.org/t/how-to-train-with-frozen-batchnorm/12106/8 """ + for module in self.modules(): + if isinstance(module, nn.BatchNorm2d): + module.train() if enable else module.eval() + + module.weight.requires_grad = enable + module.bias.requires_grad = enable + + def forward(self, x): + """ The input should be of size [batch_size, 3, img_h, img_w] """ + _, _, img_h, img_w = x.size() + cfg._tmp_img_h = img_h + cfg._tmp_img_w = img_w + + with timer.env('backbone'): + outs = self.backbone(x) + + if cfg.fpn is not None: + with timer.env('fpn'): + # Use backbone.selected_layers because we overwrote self.selected_layers + outs = [outs[i] for i in cfg.backbone.selected_layers] + outs = self.fpn(outs) + + proto_out = None + if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch: + with timer.env('proto'): + proto_x = x if self.proto_src is None else outs[self.proto_src] + + if self.num_grids > 0: + grids = self.grid.repeat(proto_x.size(0), 1, 1, 1) + proto_x = torch.cat([proto_x, grids], dim=1) + + proto_out = self.proto_net(proto_x) + proto_out = cfg.mask_proto_prototype_activation(proto_out) + + if cfg.mask_proto_prototypes_as_features: + # Clone here because we don't want to permute this, though idk if contiguous makes this unnecessary + proto_downsampled = proto_out.clone() + + if cfg.mask_proto_prototypes_as_features_no_grad: + proto_downsampled = proto_out.detach() + + # Move the features last so the multiplication is easy + proto_out = proto_out.permute(0, 2, 3, 1).contiguous() + + if cfg.mask_proto_bias: + bias_shape = [x for x in proto_out.size()] + bias_shape[-1] = 1 + proto_out = torch.cat([proto_out, torch.ones(*bias_shape)], -1) + + + with timer.env('pred_heads'): + pred_outs = { 'loc': [], 'conf': [], 'mask': [], 'priors': [] } + + if cfg.use_mask_scoring: + pred_outs['score'] = [] + + if cfg.use_instance_coeff: + pred_outs['inst'] = [] + + for idx, pred_layer in zip(self.selected_layers, self.prediction_layers): + pred_x = outs[idx] + + if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_prototypes_as_features: + # Scale the prototypes down to the current prediction layer's size and add it as inputs + proto_downsampled = F.interpolate(proto_downsampled, size=outs[idx].size()[2:], mode='bilinear', align_corners=False) + pred_x = torch.cat([pred_x, proto_downsampled], dim=1) + + # A hack for the way dataparallel works + if cfg.share_prediction_module and pred_layer is not self.prediction_layers[0]: + pred_layer.parent = [self.prediction_layers[0]] + + p = pred_layer(pred_x) + + for k, v in p.items(): + pred_outs[k].append(v) + + for k, v in pred_outs.items(): + pred_outs[k] = torch.cat(v, -2) + + if proto_out is not None: + pred_outs['proto'] = proto_out + + if self.training: + # For the extra loss functions + if cfg.use_class_existence_loss: + pred_outs['classes'] = self.class_existence_fc(outs[-1].mean(dim=(2, 3))) + + if cfg.use_semantic_segmentation_loss: + pred_outs['segm'] = self.semantic_seg_conv(outs[0]) + + return pred_outs + else: + if cfg.use_mask_scoring: + pred_outs['score'] = torch.sigmoid(pred_outs['score']) + + if cfg.use_focal_loss: + if cfg.use_sigmoid_focal_loss: + # Note: even though conf[0] exists, this mode doesn't train it so don't use it + pred_outs['conf'] = torch.sigmoid(pred_outs['conf']) + if cfg.use_mask_scoring: + pred_outs['conf'] *= pred_outs['score'] + elif cfg.use_objectness_score: + # See focal_loss_sigmoid in multibox_loss.py for details + objectness = torch.sigmoid(pred_outs['conf'][:, :, 0]) + pred_outs['conf'][:, :, 1:] = objectness[:, :, None] * F.softmax(pred_outs['conf'][:, :, 1:], -1) + pred_outs['conf'][:, :, 0 ] = 1 - objectness + else: + pred_outs['conf'] = F.softmax(pred_outs['conf'], -1) + else: + + if cfg.use_objectness_score: + objectness = torch.sigmoid(pred_outs['conf'][:, :, 0]) + + pred_outs['conf'][:, :, 1:] = (objectness > 0.10)[..., None] \ + * F.softmax(pred_outs['conf'][:, :, 1:], dim=-1) + + else: + pred_outs['conf'] = F.softmax(pred_outs['conf'], -1) + for i in pred_outs.keys(): + pred_outs[i] = pred_outs[i].cpu() + return self.detect(pred_outs) + + + + +# Some testing code +if __name__ == '__main__': + from utils.functions import init_console + init_console() + + # Use the first argument to set the config if you want + import sys + if len(sys.argv) > 1: + from data.config import set_cfg + set_cfg(sys.argv[1]) + + net = Yolact() + net.train() + net.init_weights(backbone_path='weights/' + cfg.backbone.path) + + # GPU + net = net.cuda() + torch.set_default_tensor_type('torch.cuda.FloatTensor') + + x = torch.zeros((1, 3, cfg.max_size, cfg.max_size)) + y = net(x) + + for p in net.prediction_layers: + print(p.last_conv_size) + + print() + for k, a in y.items(): + print(k + ': ', a.size(), torch.sum(a)) + exit() + + net(x) + # timer.disable('pass2') + avg = MovingAverage() + try: + while True: + timer.reset() + with timer.env('everything else'): + net(x) + avg.add(timer.total_time()) + print('\033[2J') # Moves console cursor to 0,0 + timer.print_stats() + print('Avg fps: %.2f\tAvg ms: %.2f ' % (1/avg.get_avg(), avg.get_avg()*1000)) + except KeyboardInterrupt: + pass diff --git a/PyTorch/contrib/cv/detection/YOLOR/README.md b/PyTorch/contrib/cv/detection/YOLOR/README.md index 5e19e9243b7e8180f11fd502f65905764ea5fafa..cb96fdada6b9bfbdbaa8e397b164bdcec40753a9 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/README.md +++ b/PyTorch/contrib/cv/detection/YOLOR/README.md @@ -1,129 +1,129 @@ -# YOLOR 模型使用说明 - -## Requirements -* NPU配套的run包安装 -* Python 3.7.5 -* PyTorch(NPU版本) -* apex(NPU版本) -* (可选)参考《Pytorch 网络模型移植&训练指南》6.4.2章节,配置cpu为性能模式,以达到模型最佳性能;不开启不影响功能。 - -安装其他依赖(先安装NPU版本的pytorch和apex,再安装其他依赖): -``` -pip install -r requirements.txt -``` - -## Dataset -1. 下载coco数据集,包含图片、annotations、labels图片、annotations: - ``` - cd yolor - bash scripts/get_coco.sh - ``` -2. 将coco数据集放于工程根目录下 - coco目录结构如下: - ``` - coco - |-- LICENSE - |-- README.txt - |-- annotations - | `-- instances_val2017.json - |-- images - | |-- test2017 - | |-- train2017 - | `-- val2017 - |-- labels - | |-- train2017 - | |-- train2017.cache3 - | |-- val2017 - | `-- val2017.cache3 - |-- test-dev2017.txt - |-- train2017.cache - |-- train2017.txt - |-- val2017.cache - `-- val2017.txt - ``` - -注:数据集路径在./yolor/data/coco.yaml中设定,训练前请确认路径无误,如果路径不一致需要修改。 -原coco.yaml文件如图: -![coco_yaml](./figure/coco_yaml.png) - -## Train Model -### GPU 1P:在目录yolor下,运行 ./test/train_gpu_1p.sh -``` -chmod +x ./test/train_gpu_1p.sh -bash ./test/train_gpu_1p.sh -``` -若需要指定训练使用的卡号, 可修改train_gpu_1p.sh文件 "--device 0"配置项,其中卡号为0-7 - -### GPU 8P:在目录yolor下,运行 ./test/train_gpu_8p.sh -``` -chmod +x ./test/train_gpu_8p.sh -bash ./test/train_gpu_8p.sh -``` - -### NPU 1P:在目录yolor下,运行 train_performance_1p.sh -``` -chmod +x ./test/train_performance_1p.sh -bash ./test/train_performance_1p.sh -``` -若需要指定训练使用的卡号, 可修改train_performance_1p.sh文件 "--npu 0"配置项,其中卡号为0-7 - -### NPU 8P:在目录yolor下,运行 train_performance_8p.sh -``` -chmod +x ./test/train_performance_8p.sh -bash ./test/train_performance_8p.sh -``` - -注:在模型训练结束时,程序默认会将save的最后一个模型去除optimizer以减少模型大小,这会load模型,而在8P训练中,save和load的进程可能会不同步导致先load后save而报错,所以train_performance_8p.sh默认训练3个epoch(只要大于1个epoch即可)。 -如果用户只想要训练一个epoch并且保留模型的optimizer,在train_mp.py中注释掉如下部分代码即可: -![strip_optimizer](./figure/strip_optimizer.jpg) - - -### NPU 8P Full:在目录yolor下,运行 train_full_8p.sh -``` -chmod +x ./test/train_full_8p.sh -bash ./test/train_full_8p.sh -``` - -## Evaluation -复制训练好的last.pt到pretrained文件夹下,运行evaluation_npu.sh (npu) / evaluation_gpu.sh (gpu) -``` -chmod +x ./test/evaluation_xxx.sh -bash ./test/evaluation_xxx.sh -``` - -## 迁移学习 -参考https://github.com/WongKinYiu/yolor/issues/103,更改./cfg/yolo_p6.cfg中**对应行**的classes和filters: - -以coco为例,原80类别现在改为81:classes = 81, filters = anchor * (5 + classes) = 3 * (5 + 81) = 258,更改后的.cfg命名为yolor_p6_finetune.cfg,复制训练好的last.pt到pretrained文件夹下,运行train_finetune_1p.sh -``` -chmod +x ./test/train_finetune_1p.sh -bash ./test/train_finetune_1p.sh -``` - -## 白名单 -### Transpose whilte list - -路径:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe/impl/dynamic/transpose.py -#120行左右 -``` -[8,3,160,160,85], [8,3,80,80,85], [8,3,40,40,85], [8,3,20,20,85], [8,3,85,160,160], [8,3,85,80,80] -``` -### Slice_d whilte list -路径:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe/impl/slice_d.py -#7500行左右 -``` -["float16", [32,3,96,168,85], [32,3,96,168,2]], -["float16", [32,3,96,168,85], [32,3,96,168,4]], -["float16", [32,3,80,168,85], [32,3,80,168,2]], -["float16", [32,3,80,168,85], [32,3,80,168,4]], -["float16", [32,3,48,84,85], [32,3,48,84,2]], -["float16", [32,3,48,84,85], [32,3,48,84,4]], -["float16", [32,3,40,84,85], [32,3,40,84,2]], -["float16", [32,3,40,84,85], [32,3,40,84,4]], -["float16", [32,3,24,42,85], [32,3,24,42,2]], -["float16", [32,3,24,42,85], [32,3,24,42,4]], -["float16", [32,3,20,42,85], [32,3,20,42,2]], -["float16", [32,3,20,42,85], [32,3,20,42,4]], -["float32", [8, 3, 160, 160, 85], [8, 3, 160, 160, 1]], -["float32", [8, 3, 80, 80, 85], [8, 3, 80, 80, 1]], +# YOLOR 模型使用说明 + +## Requirements +* NPU配套的run包安装 +* Python 3.7.5 +* PyTorch(NPU版本) +* apex(NPU版本) +* (可选)参考《Pytorch 网络模型移植&训练指南》6.4.2章节,配置cpu为性能模式,以达到模型最佳性能;不开启不影响功能。 + +安装其他依赖(先安装NPU版本的pytorch和apex,再安装其他依赖): +``` +pip install -r requirements.txt +``` + +## Dataset +1. 下载coco数据集,包含图片、annotations、labels图片、annotations: + ``` + cd yolor + bash scripts/get_coco.sh + ``` +2. 将coco数据集放于工程根目录下 + coco目录结构如下: + ``` + coco + |-- LICENSE + |-- README.txt + |-- annotations + | `-- instances_val2017.json + |-- images + | |-- test2017 + | |-- train2017 + | `-- val2017 + |-- labels + | |-- train2017 + | |-- train2017.cache3 + | |-- val2017 + | `-- val2017.cache3 + |-- test-dev2017.txt + |-- train2017.cache + |-- train2017.txt + |-- val2017.cache + `-- val2017.txt + ``` + +注:数据集路径在./yolor/data/coco.yaml中设定,训练前请确认路径无误,如果路径不一致需要修改。 +原coco.yaml文件如图: +![coco_yaml](./figure/coco_yaml.png) + +## Train Model +### GPU 1P:在目录yolor下,运行 ./test/train_gpu_1p.sh +``` +chmod +x ./test/train_gpu_1p.sh +bash ./test/train_gpu_1p.sh +``` +若需要指定训练使用的卡号, 可修改train_gpu_1p.sh文件 "--device 0"配置项,其中卡号为0-7 + +### GPU 8P:在目录yolor下,运行 ./test/train_gpu_8p.sh +``` +chmod +x ./test/train_gpu_8p.sh +bash ./test/train_gpu_8p.sh +``` + +### NPU 1P:在目录yolor下,运行 train_performance_1p.sh +``` +chmod +x ./test/train_performance_1p.sh +bash ./test/train_performance_1p.sh +``` +若需要指定训练使用的卡号, 可修改train_performance_1p.sh文件 "--npu 0"配置项,其中卡号为0-7 + +### NPU 8P:在目录yolor下,运行 train_performance_8p.sh +``` +chmod +x ./test/train_performance_8p.sh +bash ./test/train_performance_8p.sh +``` + +注:在模型训练结束时,程序默认会将save的最后一个模型去除optimizer以减少模型大小,这会load模型,而在8P训练中,save和load的进程可能会不同步导致先load后save而报错,所以train_performance_8p.sh默认训练3个epoch(只要大于1个epoch即可)。 +如果用户只想要训练一个epoch并且保留模型的optimizer,在train_mp.py中注释掉如下部分代码即可: +![strip_optimizer](./figure/strip_optimizer.jpg) + + +### NPU 8P Full:在目录yolor下,运行 train_full_8p.sh +``` +chmod +x ./test/train_full_8p.sh +bash ./test/train_full_8p.sh +``` + +## Evaluation +复制训练好的last.pt到pretrained文件夹下,运行evaluation_npu.sh (npu) / evaluation_gpu.sh (gpu) +``` +chmod +x ./test/evaluation_xxx.sh +bash ./test/evaluation_xxx.sh +``` + +## 迁移学习 +参考https://github.com/WongKinYiu/yolor/issues/103,更改./cfg/yolo_p6.cfg中**对应行**的classes和filters: + +以coco为例,原80类别现在改为81:classes = 81, filters = anchor * (5 + classes) = 3 * (5 + 81) = 258,更改后的.cfg命名为yolor_p6_finetune.cfg,复制训练好的last.pt到pretrained文件夹下,运行train_finetune_1p.sh +``` +chmod +x ./test/train_finetune_1p.sh +bash ./test/train_finetune_1p.sh +``` + +## 白名单 +### Transpose whilte list + +路径:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe/impl/dynamic/transpose.py +#120行左右 +``` +[8,3,160,160,85], [8,3,80,80,85], [8,3,40,40,85], [8,3,20,20,85], [8,3,85,160,160], [8,3,85,80,80] +``` +### Slice_d whilte list +路径:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe/impl/slice_d.py +#7500行左右 +``` +["float16", [32,3,96,168,85], [32,3,96,168,2]], +["float16", [32,3,96,168,85], [32,3,96,168,4]], +["float16", [32,3,80,168,85], [32,3,80,168,2]], +["float16", [32,3,80,168,85], [32,3,80,168,4]], +["float16", [32,3,48,84,85], [32,3,48,84,2]], +["float16", [32,3,48,84,85], [32,3,48,84,4]], +["float16", [32,3,40,84,85], [32,3,40,84,2]], +["float16", [32,3,40,84,85], [32,3,40,84,4]], +["float16", [32,3,24,42,85], [32,3,24,42,2]], +["float16", [32,3,24,42,85], [32,3,24,42,4]], +["float16", [32,3,20,42,85], [32,3,20,42,2]], +["float16", [32,3,20,42,85], [32,3,20,42,4]], +["float32", [8, 3, 160, 160, 85], [8, 3, 160, 160, 1]], +["float32", [8, 3, 80, 80, 85], [8, 3, 80, 80, 1]], ``` \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_csp.cfg b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_csp.cfg index 810273b197f2a6f0c099ec54cb993e9ab0ed5d36..9f5f3ab421200036bf5d5c58f9be964f6d2e47a2 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_csp.cfg +++ b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_csp.cfg @@ -1,1376 +1,1376 @@ -[net] -# Testing -#batch=1 -#subdivisions=1 -# Training -batch=64 -subdivisions=8 -width=512 -height=512 -channels=3 -momentum=0.949 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.00261 -burn_in=1000 -max_batches = 500500 -policy=steps -steps=400000,450000 -scales=.1,.1 - -#cutmix=1 -mosaic=1 - - -# ============ Backbone ============ # - -# Stem - -# 0 -[convolutional] -batch_normalize=1 -filters=32 -size=3 -stride=1 -pad=1 -activation=silu - -# P1 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=2 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=32 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=silu - -# 4 (previous+1+3k) -[shortcut] -from=-3 -activation=linear - -# P2 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-10 - -# Transition last - -# 17 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -# P3 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1 -(4+3k)] - -[route] -layers = -1,-28 - -# Transition last - -# 48 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -# P4 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1 -(3k+4)] - -[route] -layers = -1,-28 - -# Transition last - -# 79 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# P5 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1 -(3k+4)] - -[route] -layers = -1,-16 - -# Transition last - -# 98 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=1024 -size=1 -stride=1 -pad=1 -activation=silu - -# ============ End of Backbone ============ # - -# ============ Neck ============ # - -# CSPSPP - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -### SPP ### -[maxpool] -stride=1 -size=5 - -[route] -layers=-2 - -[maxpool] -stride=1 -size=9 - -[route] -layers=-4 - -[maxpool] -stride=1 -size=13 - -[route] -layers=-1,-3,-5,-6 -### End SPP ### - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[route] -layers = -1, -13 - -# 113 (previous+6+5+2k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# End of CSPSPP - - -# FPN-4 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[upsample] -stride=2 - -[route] -layers = 79 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -6 - -# Transition last - -# 127 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - - -# FPN-3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[upsample] -stride=2 - -[route] -layers = 48 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=silu - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -6 - -# Transition last - -# 141 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - - -# PAN-4 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=256 -activation=silu - -[route] -layers = -1, 127 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -[route] -layers = -1,-6 - -# Transition last - -# 152 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - - -# PAN-5 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=512 -activation=silu - -[route] -layers = -1, 113 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[route] -layers = -1,-6 - -# Transition last - -# 163 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# ============ End of Neck ============ # - -# 164 -[implicit_add] -filters=256 - -# 165 -[implicit_add] -filters=512 - -# 166 -[implicit_add] -filters=1024 - -# 167 -[implicit_mul] -filters=255 - -# 168 -[implicit_mul] -filters=255 - -# 169 -[implicit_mul] -filters=255 - -# ============ Head ============ # - -# YOLO-3 - -[route] -layers = 141 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -[shift_channels] -from=164 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[control_channels] -from=167 - -[yolo] -mask = 0,1,2 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-4 - -[route] -layers = 152 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[shift_channels] -from=165 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[control_channels] -from=168 - -[yolo] -mask = 3,4,5 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-5 - -[route] -layers = 163 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=silu - -[shift_channels] -from=166 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[control_channels] -from=169 - -[yolo] -mask = 6,7,8 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=512 +height=512 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.00261 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +#cutmix=1 +mosaic=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=silu + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +# 4 (previous+1+3k) +[shortcut] +from=-3 +activation=linear + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-10 + +# Transition last + +# 17 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(4+3k)] + +[route] +layers = -1,-28 + +# Transition last + +# 48 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-28 + +# Transition last + +# 79 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-16 + +# Transition last + +# 98 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[route] +layers = -1, -13 + +# 113 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -6 + +# Transition last + +# 127 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -6 + +# Transition last + +# 141 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=silu + +[route] +layers = -1, 127 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[route] +layers = -1,-6 + +# Transition last + +# 152 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=silu + +[route] +layers = -1, 113 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[route] +layers = -1,-6 + +# Transition last + +# 163 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Neck ============ # + +# 164 +[implicit_add] +filters=256 + +# 165 +[implicit_add] +filters=512 + +# 166 +[implicit_add] +filters=1024 + +# 167 +[implicit_mul] +filters=255 + +# 168 +[implicit_mul] +filters=255 + +# 169 +[implicit_mul] +filters=255 + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 141 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[shift_channels] +from=164 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=167 + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-4 + +[route] +layers = 152 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[shift_channels] +from=165 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=168 + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-5 + +[route] +layers = 163 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=silu + +[shift_channels] +from=166 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=169 + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 diff --git a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_csp_x.cfg b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_csp_x.cfg index 995aefea61010c1762a681939b4d6d057ab55131..55a54109bf4882055ebc02b5a8688bfd3d618e4d 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_csp_x.cfg +++ b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_csp_x.cfg @@ -1,1576 +1,1576 @@ -[net] -# Testing -#batch=1 -#subdivisions=1 -# Training -batch=64 -subdivisions=8 -width=512 -height=512 -channels=3 -momentum=0.949 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.00261 -burn_in=1000 -max_batches = 500500 -policy=steps -steps=400000,450000 -scales=.1,.1 - -#cutmix=1 -mosaic=1 - - -# ============ Backbone ============ # - -# Stem - -# 0 -[convolutional] -batch_normalize=1 -filters=32 -size=3 -stride=1 -pad=1 -activation=silu - -# P1 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=2 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=40 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=silu - -# 4 (previous+1+3k) -[shortcut] -from=-3 -activation=linear - -# P2 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-13 - -# Transition last - -# 20 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -# P3 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1 -(4+3k)] - -[route] -layers = -1,-34 - -# Transition last - -# 57 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -# P4 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1 -(3k+4)] - -[route] -layers = -1,-34 - -# Transition last - -# 94 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -# P5 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1280 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1 -(3k+4)] - -[route] -layers = -1,-19 - -# Transition last - -# 116 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=1280 -size=1 -stride=1 -pad=1 -activation=silu - -# ============ End of Backbone ============ # - -# ============ Neck ============ # - -# CSPSPP - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -### SPP ### -[maxpool] -stride=1 -size=5 - -[route] -layers=-2 - -[maxpool] -stride=1 -size=9 - -[route] -layers=-4 - -[maxpool] -stride=1 -size=13 - -[route] -layers=-1,-3,-5,-6 -### End SPP ### - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=silu - -[route] -layers = -1, -15 - -# 133 (previous+6+5+2k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -# End of CSPSPP - - -# FPN-4 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[upsample] -stride=2 - -[route] -layers = 94 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=silu - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 149 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - - -# FPN-3 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[upsample] -stride=2 - -[route] -layers = 57 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=160 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=160 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=160 -activation=silu - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 165 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - - -# PAN-4 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=320 -activation=silu - -[route] -layers = -1, 149 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=silu - -[route] -layers = -1,-8 - -# Transition last - -# 178 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - - -# PAN-5 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=640 -activation=silu - -[route] -layers = -1, 133 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=silu - -[route] -layers = -1,-8 - -# Transition last - -# 191 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -# ============ End of Neck ============ # - -# 192 -[implicit_add] -filters=320 - -# 193 -[implicit_add] -filters=640 - -# 194 -[implicit_add] -filters=1280 - -# 195 -[implicit_mul] -filters=255 - -# 196 -[implicit_mul] -filters=255 - -# 197 -[implicit_mul] -filters=255 - -# ============ Head ============ # - -# YOLO-3 - -[route] -layers = 165 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=silu - -[shift_channels] -from=192 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[control_channels] -from=195 - -[yolo] -mask = 0,1,2 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-4 - -[route] -layers = 178 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=silu - -[shift_channels] -from=193 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[control_channels] -from=196 - -[yolo] -mask = 3,4,5 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-5 - -[route] -layers = 191 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1280 -activation=silu - -[shift_channels] -from=194 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[control_channels] -from=197 - -[yolo] -mask = 6,7,8 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=512 +height=512 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.00261 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +#cutmix=1 +mosaic=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=silu + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=2 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=40 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=silu + +# 4 (previous+1+3k) +[shortcut] +from=-3 +activation=linear + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 20 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(4+3k)] + +[route] +layers = -1,-34 + +# Transition last + +# 57 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-34 + +# Transition last + +# 94 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1280 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-19 + +# Transition last + +# 116 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1280 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[route] +layers = -1, -15 + +# 133 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 94 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 149 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 57 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 165 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=320 +activation=silu + +[route] +layers = -1, 149 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[route] +layers = -1,-8 + +# Transition last + +# 178 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=640 +activation=silu + +[route] +layers = -1, 133 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[route] +layers = -1,-8 + +# Transition last + +# 191 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Neck ============ # + +# 192 +[implicit_add] +filters=320 + +# 193 +[implicit_add] +filters=640 + +# 194 +[implicit_add] +filters=1280 + +# 195 +[implicit_mul] +filters=255 + +# 196 +[implicit_mul] +filters=255 + +# 197 +[implicit_mul] +filters=255 + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 165 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[shift_channels] +from=192 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=195 + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-4 + +[route] +layers = 178 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[shift_channels] +from=193 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=196 + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-5 + +[route] +layers = 191 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1280 +activation=silu + +[shift_channels] +from=194 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=197 + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 diff --git a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_p6.cfg b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_p6.cfg index 62b19e87734c2c45040623f2d0170ef2ae17f871..88ddd686d757636c8e3cc069b195f200600562c4 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_p6.cfg +++ b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_p6.cfg @@ -1,1762 +1,1762 @@ -[net] -batch=64 -subdivisions=8 -width=1280 -height=1280 -channels=3 -momentum=0.949 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - - - -learning_rate=0.00261 -burn_in=1000 -max_batches = 500500 -policy=steps -steps=400000,450000 -scales=.1,.1 - -mosaic=1 - - -# ============ Backbone ============ # - -# Stem - -# P1 - -# Downsample - -# 0 -[reorg] - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=swish - - -# P2 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=64 -#size=1 -#stride=1 -#pad=1 -#activation=swish - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-12 - -# Transition last - -# 16 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - - -# P3 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=128 -#size=1 -#stride=1 -#pad=1 -#activation=swish - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-24 - -# Transition last - -# 43 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - - -# P4 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=384 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=192 -#size=1 -#stride=1 -#pad=1 -#activation=swish - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-24 - -# Transition last - -# 70 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=swish - - -# P5 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=256 -#size=1 -#stride=1 -#pad=1 -#activation=swish - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-12 - -# Transition last - -# 85 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - - -# P6 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=320 -#size=1 -#stride=1 -#pad=1 -#activation=swish - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-12 - -# Transition last - -# 100 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -# ============ End of Backbone ============ # - -# ============ Neck ============ # - -# CSPSPP - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -### SPP ### -[maxpool] -stride=1 -size=5 - -[route] -layers=-2 - -[maxpool] -stride=1 -size=9 - -[route] -layers=-4 - -[maxpool] -stride=1 -size=13 - -[route] -layers=-1,-3,-5,-6 -### End SPP ### - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[route] -layers = -1, -13 - -# 115 (previous+6+5+2k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -# End of CSPSPP - - -# FPN-5 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[upsample] -stride=2 - -[route] -layers = 85 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 131 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - - -# FPN-4 - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[upsample] -stride=2 - -[route] -layers = 70 - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=192 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=192 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=192 -activation=swish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 147 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - - -# FPN-3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[upsample] -stride=2 - -[route] -layers = 43 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=swish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 163 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - - -# PAN-4 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=192 -activation=swish - -[route] -layers = -1, 147 - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=192 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=192 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=192 -activation=swish - -[route] -layers = -1,-8 - -# Transition last - -# 176 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - - -# PAN-5 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=256 -activation=swish - -[route] -layers = -1, 131 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[route] -layers = -1,-8 - -# Transition last - -# 189 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - - -# PAN-6 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=320 -activation=swish - -[route] -layers = -1, 115 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[route] -layers = -1,-8 - -# Transition last - -# 202 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -# ============ End of Neck ============ # - -# 203 -[implicit_add] -filters=256 - -# 204 -[implicit_add] -filters=384 - -# 205 -[implicit_add] -filters=512 - -# 206 -[implicit_add] -filters=640 - -# 207 -[implicit_mul] -filters=255 - -# 208 -[implicit_mul] -filters=255 - -# 209 -[implicit_mul] -filters=255 - -# 210 -[implicit_mul] -filters=255 - -# ============ Head ============ # - -# YOLO-3 - -[route] -layers = 163 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[shift_channels] -from=203 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[control_channels] -from=207 - -[yolo] -mask = 0,1,2 -anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 -classes=80 -num=12 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-4 - -[route] -layers = 176 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=384 -activation=swish - -[shift_channels] -from=204 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[control_channels] -from=208 - -[yolo] -mask = 3,4,5 -anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 -classes=80 -num=12 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-5 - -[route] -layers = 189 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=swish - -[shift_channels] -from=205 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[control_channels] -from=209 - -[yolo] -mask = 6,7,8 -anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 -classes=80 -num=12 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-6 - -[route] -layers = 202 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=swish - -[shift_channels] -from=206 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[control_channels] -from=210 - -[yolo] -mask = 9,10,11 -anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 -classes=80 -num=12 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - +[net] +batch=64 +subdivisions=8 +width=1280 +height=1280 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + + + +learning_rate=0.00261 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + + +# ============ Backbone ============ # + +# Stem + +# P1 + +# Downsample + +# 0 +[reorg] + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=64 +#size=1 +#stride=1 +#pad=1 +#activation=swish + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-12 + +# Transition last + +# 16 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=128 +#size=1 +#stride=1 +#pad=1 +#activation=swish + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-24 + +# Transition last + +# 43 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=384 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=192 +#size=1 +#stride=1 +#pad=1 +#activation=swish + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-24 + +# Transition last + +# 70 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=swish + + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=256 +#size=1 +#stride=1 +#pad=1 +#activation=swish + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-12 + +# Transition last + +# 85 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + + +# P6 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=320 +#size=1 +#stride=1 +#pad=1 +#activation=swish + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-12 + +# Transition last + +# 100 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1, -13 + +# 115 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# End of CSPSPP + + +# FPN-5 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 85 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 131 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 70 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 147 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 43 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 163 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=192 +activation=swish + +[route] +layers = -1, 147 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 176 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=swish + +[route] +layers = -1, 131 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 189 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-6 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1, 115 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 202 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# ============ End of Neck ============ # + +# 203 +[implicit_add] +filters=256 + +# 204 +[implicit_add] +filters=384 + +# 205 +[implicit_add] +filters=512 + +# 206 +[implicit_add] +filters=640 + +# 207 +[implicit_mul] +filters=255 + +# 208 +[implicit_mul] +filters=255 + +# 209 +[implicit_mul] +filters=255 + +# 210 +[implicit_mul] +filters=255 + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 163 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[shift_channels] +from=203 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=207 + +[yolo] +mask = 0,1,2 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=80 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-4 + +[route] +layers = 176 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=384 +activation=swish + +[shift_channels] +from=204 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=208 + +[yolo] +mask = 3,4,5 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=80 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-5 + +[route] +layers = 189 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[shift_channels] +from=205 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=209 + +[yolo] +mask = 6,7,8 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=80 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-6 + +[route] +layers = 202 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[shift_channels] +from=206 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=210 + +[yolo] +mask = 9,10,11 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=80 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + # ============ End of Head ============ # \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_p6_finetune.cfg b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_p6_finetune.cfg index 0869eac0dca717534b73a9078aec113f94c89c07..ab74810f6bb49917e0e5923dc4458bac22e027b4 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_p6_finetune.cfg +++ b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_p6_finetune.cfg @@ -1,1760 +1,1760 @@ -[net] -batch=64 -subdivisions=8 -width=1280 -height=1280 -channels=3 -momentum=0.949 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.00261 -burn_in=1000 -max_batches = 500500 -policy=steps -steps=400000,450000 -scales=.1,.1 - -mosaic=1 - - -# ============ Backbone ============ # - -# Stem - -# P1 - -# Downsample - -# 0 -[reorg] - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=swish - - -# P2 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=64 -#size=1 -#stride=1 -#pad=1 -#activation=swish - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-12 - -# Transition last - -# 16 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - - -# P3 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=128 -#size=1 -#stride=1 -#pad=1 -#activation=swish - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-24 - -# Transition last - -# 43 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - - -# P4 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=384 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=192 -#size=1 -#stride=1 -#pad=1 -#activation=swish - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-24 - -# Transition last - -# 70 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=swish - - -# P5 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=256 -#size=1 -#stride=1 -#pad=1 -#activation=swish - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-12 - -# Transition last - -# 85 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - - -# P6 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=320 -#size=1 -#stride=1 -#pad=1 -#activation=swish - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-12 - -# Transition last - -# 100 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -# ============ End of Backbone ============ # - -# ============ Neck ============ # - -# CSPSPP - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -### SPP ### -[maxpool] -stride=1 -size=5 - -[route] -layers=-2 - -[maxpool] -stride=1 -size=9 - -[route] -layers=-4 - -[maxpool] -stride=1 -size=13 - -[route] -layers=-1,-3,-5,-6 -### End SPP ### - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[route] -layers = -1, -13 - -# 115 (previous+6+5+2k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -# End of CSPSPP - - -# FPN-5 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[upsample] -stride=2 - -[route] -layers = 85 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 131 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - - -# FPN-4 - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[upsample] -stride=2 - -[route] -layers = 70 - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=192 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=192 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=192 -activation=swish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 147 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - - -# FPN-3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[upsample] -stride=2 - -[route] -layers = 43 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=swish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 163 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - - -# PAN-4 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=192 -activation=swish - -[route] -layers = -1, 147 - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=192 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=192 -activation=swish - -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=192 -activation=swish - -[route] -layers = -1,-8 - -# Transition last - -# 176 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=192 -size=1 -stride=1 -pad=1 -activation=swish - - -# PAN-5 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=256 -activation=swish - -[route] -layers = -1, 131 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[route] -layers = -1,-8 - -# Transition last - -# 189 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - - -# PAN-6 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=320 -activation=swish - -[route] -layers = -1, 115 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[route] -layers = -1,-8 - -# Transition last - -# 202 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -# ============ End of Neck ============ # - -# 203 -[implicit_add] -filters=256 - -# 204 -[implicit_add] -filters=384 - -# 205 -[implicit_add] -filters=512 - -# 206 -[implicit_add] -filters=640 - -# 207 -[implicit_mul] -filters=258 - -# 208 -[implicit_mul] -filters=258 - -# 209 -[implicit_mul] -filters=258 - -# 210 -[implicit_mul] -filters=258 - -# ============ Head ============ # - -# YOLO-3 - -[route] -layers = 163 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[shift_channels] -from=203 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=258 -activation=linear - -[control_channels] -from=207 - -[yolo] -mask = 0,1,2 -anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 -classes=81 -num=12 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-4 - -[route] -layers = 176 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=384 -activation=swish - -[shift_channels] -from=204 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=258 -activation=linear - -[control_channels] -from=208 - -[yolo] -mask = 3,4,5 -anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 -classes=81 -num=12 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-5 - -[route] -layers = 189 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=swish - -[shift_channels] -from=205 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=258 -activation=linear - -[control_channels] -from=209 - -[yolo] -mask = 6,7,8 -anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 -classes=81 -num=12 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-6 - -[route] -layers = 202 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=swish - -[shift_channels] -from=206 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=258 -activation=linear - -[control_channels] -from=210 - -[yolo] -mask = 9,10,11 -anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 -classes=81 -num=12 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - +[net] +batch=64 +subdivisions=8 +width=1280 +height=1280 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.00261 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + + +# ============ Backbone ============ # + +# Stem + +# P1 + +# Downsample + +# 0 +[reorg] + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=64 +#size=1 +#stride=1 +#pad=1 +#activation=swish + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-12 + +# Transition last + +# 16 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=128 +#size=1 +#stride=1 +#pad=1 +#activation=swish + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-24 + +# Transition last + +# 43 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=384 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=192 +#size=1 +#stride=1 +#pad=1 +#activation=swish + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-24 + +# Transition last + +# 70 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=swish + + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=256 +#size=1 +#stride=1 +#pad=1 +#activation=swish + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-12 + +# Transition last + +# 85 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + + +# P6 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=320 +#size=1 +#stride=1 +#pad=1 +#activation=swish + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-12 + +# Transition last + +# 100 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1, -13 + +# 115 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# End of CSPSPP + + +# FPN-5 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 85 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 131 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 70 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 147 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 43 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 163 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=192 +activation=swish + +[route] +layers = -1, 147 + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=swish + +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=192 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 176 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=192 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=swish + +[route] +layers = -1, 131 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 189 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-6 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1, 115 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 202 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# ============ End of Neck ============ # + +# 203 +[implicit_add] +filters=256 + +# 204 +[implicit_add] +filters=384 + +# 205 +[implicit_add] +filters=512 + +# 206 +[implicit_add] +filters=640 + +# 207 +[implicit_mul] +filters=258 + +# 208 +[implicit_mul] +filters=258 + +# 209 +[implicit_mul] +filters=258 + +# 210 +[implicit_mul] +filters=258 + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 163 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[shift_channels] +from=203 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=258 +activation=linear + +[control_channels] +from=207 + +[yolo] +mask = 0,1,2 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=81 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-4 + +[route] +layers = 176 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=384 +activation=swish + +[shift_channels] +from=204 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=258 +activation=linear + +[control_channels] +from=208 + +[yolo] +mask = 3,4,5 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=81 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-5 + +[route] +layers = 189 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[shift_channels] +from=205 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=258 +activation=linear + +[control_channels] +from=209 + +[yolo] +mask = 6,7,8 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=81 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-6 + +[route] +layers = 202 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[shift_channels] +from=206 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=258 +activation=linear + +[control_channels] +from=210 + +[yolo] +mask = 9,10,11 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=81 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + # ============ End of Head ============ # \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_w6.cfg b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_w6.cfg index 4052603dd2b1e2d2085b78939a84e1b2e3e3ddbe..b91167a2e06a1f0f15d9c88234d9c5971c3ad29c 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_w6.cfg +++ b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolor_w6.cfg @@ -1,1760 +1,1760 @@ -[net] -batch=64 -subdivisions=8 -width=1280 -height=1280 -channels=3 -momentum=0.949 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.00261 -burn_in=1000 -max_batches = 500500 -policy=steps -steps=400000,450000 -scales=.1,.1 - -mosaic=1 - - -# ============ Backbone ============ # - -# Stem - -# P1 - -# Downsample - -# 0 -[reorg] - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=silu - - -# P2 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=64 -#size=1 -#stride=1 -#pad=1 -#activation=silu - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-12 - -# Transition last - -# 16 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - - -# P3 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=128 -#size=1 -#stride=1 -#pad=1 -#activation=silu - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-24 - -# Transition last - -# 43 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - - -# P4 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=256 -#size=1 -#stride=1 -#pad=1 -#activation=silu - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-24 - -# Transition last - -# 70 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - - -# P5 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=768 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=384 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=384 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=384 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=384 -#size=1 -#stride=1 -#pad=1 -#activation=silu - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-12 - -# Transition last - -# 85 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=768 -size=1 -stride=1 -pad=1 -activation=silu - - -# P6 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first -# -#[convolutional] -#batch_normalize=1 -#filters=512 -#size=1 -#stride=1 -#pad=1 -#activation=silu - -# Merge [-1, -(3k+3)] - -[route] -layers = -1,-12 - -# Transition last - -# 100 (previous+6+3k) -[convolutional] -batch_normalize=1 -filters=1024 -size=1 -stride=1 -pad=1 -activation=silu - -# ============ End of Backbone ============ # - -# ============ Neck ============ # - -# CSPSPP - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -### SPP ### -[maxpool] -stride=1 -size=5 - -[route] -layers=-2 - -[maxpool] -stride=1 -size=9 - -[route] -layers=-4 - -[maxpool] -stride=1 -size=13 - -[route] -layers=-1,-3,-5,-6 -### End SPP ### - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[route] -layers = -1, -13 - -# 115 (previous+6+5+2k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# End of CSPSPP - - -# FPN-5 - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -[upsample] -stride=2 - -[route] -layers = 85 - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=384 -activation=silu - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=384 -activation=silu - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=384 -activation=silu - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 131 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - - -# FPN-4 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[upsample] -stride=2 - -[route] -layers = 70 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 147 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - - -# FPN-3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[upsample] -stride=2 - -[route] -layers = 43 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=silu - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 163 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - - -# PAN-4 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=256 -activation=silu - -[route] -layers = -1, 147 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -[route] -layers = -1,-8 - -# Transition last - -# 176 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - - -# PAN-5 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=384 -activation=silu - -[route] -layers = -1, 131 - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=384 -activation=silu - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=384 -activation=silu - -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=384 -activation=silu - -[route] -layers = -1,-8 - -# Transition last - -# 189 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=384 -size=1 -stride=1 -pad=1 -activation=silu - - -# PAN-6 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=512 -activation=silu - -[route] -layers = -1, 115 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[route] -layers = -1,-8 - -# Transition last - -# 202 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# ============ End of Neck ============ # - -# 203 -[implicit_add] -filters=256 - -# 204 -[implicit_add] -filters=512 - -# 205 -[implicit_add] -filters=768 - -# 206 -[implicit_add] -filters=1024 - -# 207 -[implicit_mul] -filters=255 - -# 208 -[implicit_mul] -filters=255 - -# 209 -[implicit_mul] -filters=255 - -# 210 -[implicit_mul] -filters=255 - -# ============ Head ============ # - -# YOLO-3 - -[route] -layers = 163 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -[shift_channels] -from=203 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[control_channels] -from=207 - -[yolo] -mask = 0,1,2 -anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 -classes=80 -num=12 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-4 - -[route] -layers = 176 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[shift_channels] -from=204 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[control_channels] -from=208 - -[yolo] -mask = 3,4,5 -anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 -classes=80 -num=12 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-5 - -[route] -layers = 189 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=768 -activation=silu - -[shift_channels] -from=205 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[control_channels] -from=209 - -[yolo] -mask = 6,7,8 -anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 -classes=80 -num=12 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-6 - -[route] -layers = 202 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=silu - -[shift_channels] -from=206 - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[control_channels] -from=210 - -[yolo] -mask = 9,10,11 -anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 -classes=80 -num=12 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - -# ============ End of Head ============ # +[net] +batch=64 +subdivisions=8 +width=1280 +height=1280 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.00261 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + + +# ============ Backbone ============ # + +# Stem + +# P1 + +# Downsample + +# 0 +[reorg] + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=64 +#size=1 +#stride=1 +#pad=1 +#activation=silu + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-12 + +# Transition last + +# 16 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=128 +#size=1 +#stride=1 +#pad=1 +#activation=silu + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-24 + +# Transition last + +# 43 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=256 +#size=1 +#stride=1 +#pad=1 +#activation=silu + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-24 + +# Transition last + +# 70 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=768 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=384 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=384 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=384 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=384 +#size=1 +#stride=1 +#pad=1 +#activation=silu + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-12 + +# Transition last + +# 85 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=768 +size=1 +stride=1 +pad=1 +activation=silu + + +# P6 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first +# +#[convolutional] +#batch_normalize=1 +#filters=512 +#size=1 +#stride=1 +#pad=1 +#activation=silu + +# Merge [-1, -(3k+3)] + +[route] +layers = -1,-12 + +# Transition last + +# 100 (previous+6+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[route] +layers = -1, -13 + +# 115 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# End of CSPSPP + + +# FPN-5 + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 85 + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=384 +activation=silu + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=384 +activation=silu + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=384 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 131 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 70 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 147 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 43 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 163 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=silu + +[route] +layers = -1, 147 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[route] +layers = -1,-8 + +# Transition last + +# 176 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=384 +activation=silu + +[route] +layers = -1, 131 + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=384 +activation=silu + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=384 +activation=silu + +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=384 +activation=silu + +[route] +layers = -1,-8 + +# Transition last + +# 189 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=384 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-6 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=silu + +[route] +layers = -1, 115 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[route] +layers = -1,-8 + +# Transition last + +# 202 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Neck ============ # + +# 203 +[implicit_add] +filters=256 + +# 204 +[implicit_add] +filters=512 + +# 205 +[implicit_add] +filters=768 + +# 206 +[implicit_add] +filters=1024 + +# 207 +[implicit_mul] +filters=255 + +# 208 +[implicit_mul] +filters=255 + +# 209 +[implicit_mul] +filters=255 + +# 210 +[implicit_mul] +filters=255 + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 163 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[shift_channels] +from=203 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=207 + +[yolo] +mask = 0,1,2 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=80 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-4 + +[route] +layers = 176 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[shift_channels] +from=204 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=208 + +[yolo] +mask = 3,4,5 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=80 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-5 + +[route] +layers = 189 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=768 +activation=silu + +[shift_channels] +from=205 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=209 + +[yolo] +mask = 6,7,8 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=80 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-6 + +[route] +layers = 202 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=silu + +[shift_channels] +from=206 + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[control_channels] +from=210 + +[yolo] +mask = 9,10,11 +anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +classes=80 +num=12 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + +# ============ End of Head ============ # diff --git a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_csp.cfg b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_csp.cfg index 487dc48ad6b2bcd52f4cbc32049d964d40bcc076..c387ce968e193fa396e920c40914fb3fa5640df1 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_csp.cfg +++ b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_csp.cfg @@ -1,1334 +1,1334 @@ -[net] -# Testing -#batch=1 -#subdivisions=1 -# Training -batch=64 -subdivisions=8 -width=512 -height=512 -channels=3 -momentum=0.949 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.00261 -burn_in=1000 -max_batches = 500500 -policy=steps -steps=400000,450000 -scales=.1,.1 - -#cutmix=1 -mosaic=1 - - -# ============ Backbone ============ # - -# Stem - -# 0 -[convolutional] -batch_normalize=1 -filters=32 -size=3 -stride=1 -pad=1 -activation=silu - -# P1 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=2 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=32 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=silu - -# 4 (previous+1+3k) -[shortcut] -from=-3 -activation=linear - -# P2 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-10 - -# Transition last - -# 17 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -# P3 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1 -(4+3k)] - -[route] -layers = -1,-28 - -# Transition last - -# 48 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -# P4 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1 -(3k+4)] - -[route] -layers = -1,-28 - -# Transition last - -# 79 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# P5 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1 -(3k+4)] - -[route] -layers = -1,-16 - -# Transition last - -# 98 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=1024 -size=1 -stride=1 -pad=1 -activation=silu - -# ============ End of Backbone ============ # - -# ============ Neck ============ # - -# CSPSPP - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -### SPP ### -[maxpool] -stride=1 -size=5 - -[route] -layers=-2 - -[maxpool] -stride=1 -size=9 - -[route] -layers=-4 - -[maxpool] -stride=1 -size=13 - -[route] -layers=-1,-3,-5,-6 -### End SPP ### - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[route] -layers = -1, -13 - -# 113 (previous+6+5+2k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# End of CSPSPP - - -# FPN-4 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[upsample] -stride=2 - -[route] -layers = 79 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -6 - -# Transition last - -# 127 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - - -# FPN-3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[upsample] -stride=2 - -[route] -layers = 48 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=silu - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=silu - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -6 - -# Transition last - -# 141 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=silu - - -# PAN-4 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=256 -activation=silu - -[route] -layers = -1, 127 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -[route] -layers = -1,-6 - -# Transition last - -# 152 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=silu - - -# PAN-5 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=512 -activation=silu - -[route] -layers = -1, 113 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[route] -layers = -1,-6 - -# Transition last - -# 163 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=silu - -# ============ End of Neck ============ # - -# ============ Head ============ # - -# YOLO-3 - -[route] -layers = 141 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=silu - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[yolo] -mask = 0,1,2 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-4 - -[route] -layers = 152 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=silu - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[yolo] -mask = 3,4,5 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-5 - -[route] -layers = 163 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=silu - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[yolo] -mask = 6,7,8 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=512 +height=512 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.00261 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +#cutmix=1 +mosaic=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=silu + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +# 4 (previous+1+3k) +[shortcut] +from=-3 +activation=linear + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-10 + +# Transition last + +# 17 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(4+3k)] + +[route] +layers = -1,-28 + +# Transition last + +# 48 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-28 + +# Transition last + +# 79 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-16 + +# Transition last + +# 98 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[route] +layers = -1, -13 + +# 113 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -6 + +# Transition last + +# 127 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=silu + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -6 + +# Transition last + +# 141 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=silu + +[route] +layers = -1, 127 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[route] +layers = -1,-6 + +# Transition last + +# 152 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=silu + +[route] +layers = -1, 113 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[route] +layers = -1,-6 + +# Transition last + +# 163 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 141 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=silu + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-4 + +[route] +layers = 152 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=silu + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-5 + +[route] +layers = 163 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=silu + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 diff --git a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_csp_x.cfg b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_csp_x.cfg index 2816f2a0168322dab6ad5c79d189c94da0f3a5b6..285abc4d871f4313e631fcf6c1d090f2e389d636 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_csp_x.cfg +++ b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_csp_x.cfg @@ -1,1534 +1,1534 @@ -[net] -# Testing -#batch=1 -#subdivisions=1 -# Training -batch=64 -subdivisions=8 -width=512 -height=512 -channels=3 -momentum=0.949 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.00261 -burn_in=1000 -max_batches = 500500 -policy=steps -steps=400000,450000 -scales=.1,.1 - -#cutmix=1 -mosaic=1 - - -# ============ Backbone ============ # - -# Stem - -# 0 -[convolutional] -batch_normalize=1 -filters=32 -size=3 -stride=1 -pad=1 -activation=silu - -# P1 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=2 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=40 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=silu - -# 4 (previous+1+3k) -[shortcut] -from=-3 -activation=linear - -# P2 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-13 - -# Transition last - -# 20 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -# P3 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1 -(4+3k)] - -[route] -layers = -1,-34 - -# Transition last - -# 57 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -# P4 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1 -(3k+4)] - -[route] -layers = -1,-34 - -# Transition last - -# 94 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -# P5 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1280 -size=3 -stride=2 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=silu - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -# Merge [-1 -(3k+4)] - -[route] -layers = -1,-19 - -# Transition last - -# 116 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=1280 -size=1 -stride=1 -pad=1 -activation=silu - -# ============ End of Backbone ============ # - -# ============ Neck ============ # - -# CSPSPP - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -### SPP ### -[maxpool] -stride=1 -size=5 - -[route] -layers=-2 - -[maxpool] -stride=1 -size=9 - -[route] -layers=-4 - -[maxpool] -stride=1 -size=13 - -[route] -layers=-1,-3,-5,-6 -### End SPP ### - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=silu - -[route] -layers = -1, -15 - -# 133 (previous+6+5+2k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -# End of CSPSPP - - -# FPN-4 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[upsample] -stride=2 - -[route] -layers = 94 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=silu - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 149 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - - -# FPN-3 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[upsample] -stride=2 - -[route] -layers = 57 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=160 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=160 -activation=silu - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=160 -activation=silu - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 165 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=silu - - -# PAN-4 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=320 -activation=silu - -[route] -layers = -1, 149 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=silu - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=silu - -[route] -layers = -1,-8 - -# Transition last - -# 178 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=silu - - -# PAN-5 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=640 -activation=silu - -[route] -layers = -1, 133 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -# Split - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=silu - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=silu - -[route] -layers = -1,-8 - -# Transition last - -# 191 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=silu - -# ============ End of Neck ============ # - -# ============ Head ============ # - -# YOLO-3 - -[route] -layers = 165 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=silu - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[yolo] -mask = 0,1,2 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-4 - -[route] -layers = 178 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=silu - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[yolo] -mask = 3,4,5 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-5 - -[route] -layers = 191 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1280 -activation=silu - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[yolo] -mask = 6,7,8 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=512 +height=512 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.00261 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +#cutmix=1 +mosaic=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=silu + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=2 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=40 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=silu + +# 4 (previous+1+3k) +[shortcut] +from=-3 +activation=linear + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 20 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(4+3k)] + +[route] +layers = -1,-34 + +# Transition last + +# 57 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-34 + +# Transition last + +# 94 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1280 +size=3 +stride=2 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=silu + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-19 + +# Transition last + +# 116 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1280 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[route] +layers = -1, -15 + +# 133 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 94 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 149 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[upsample] +stride=2 + +[route] +layers = 57 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=silu + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=silu + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 165 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=320 +activation=silu + +[route] +layers = -1, 149 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[route] +layers = -1,-8 + +# Transition last + +# 178 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=silu + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=640 +activation=silu + +[route] +layers = -1, 133 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[route] +layers = -1,-8 + +# Transition last + +# 191 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=silu + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 165 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=silu + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-4 + +[route] +layers = 178 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=silu + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-5 + +[route] +layers = 191 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1280 +activation=silu + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 diff --git a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_p6.cfg b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_p6.cfg index 3a891329249e9530986be3ad5b0675917d14ae58..1a4088414ba37efa2aca4cf9d20c45aa21458d3d 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_p6.cfg +++ b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_p6.cfg @@ -1,2260 +1,2260 @@ -[net] -batch=64 -subdivisions=8 -width=1280 -height=1280 -channels=3 -momentum=0.949 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.00261 -burn_in=1000 -max_batches = 500500 -policy=steps -steps=400000,450000 -scales=.1,.1 - -mosaic=1 - - -# ============ Backbone ============ # - -# Stem - -# 0 -[convolutional] -batch_normalize=1 -filters=32 -size=3 -stride=1 -pad=1 -activation=mish - - -# P1 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=2 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=32 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=32 -size=1 -stride=1 -pad=1 -activation=mish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=32 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=32 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=32 -size=1 -stride=1 -pad=1 -activation=mish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-7 - -# Transition last - -# 10 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - - -# P2 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=2 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-13 - -# Transition last - -# 26 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - - -# P3 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=2 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-49 - -# Transition last - -# 78 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - - -# P4 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=2 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-49 - -# Transition last - -# 130 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - - -# P5 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=2 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-25 - -# Transition last - -# 158 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=1024 -size=1 -stride=1 -pad=1 -activation=mish - - -# P6 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=2 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-25 - -# Transition last - -# 186 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=1024 -size=1 -stride=1 -pad=1 -activation=mish - -# ============ End of Backbone ============ # - -# ============ Neck ============ # - -# CSPSPP - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -### SPP ### -[maxpool] -stride=1 -size=5 - -[route] -layers=-2 - -[maxpool] -stride=1 -size=9 - -[route] -layers=-4 - -[maxpool] -stride=1 -size=13 - -[route] -layers=-1,-3,-5,-6 -### End SPP ### - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=mish - -[route] -layers = -1, -13 - -# 201 (previous+6+5+2k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -# End of CSPSPP - - -# FPN-5 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[upsample] -stride=2 - -[route] -layers = 158 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=mish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 217 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - - -# FPN-4 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[upsample] -stride=2 - -[route] -layers = 130 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=mish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 233 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - - -# FPN-3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[upsample] -stride=2 - -[route] -layers = 78 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=mish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 249 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - - -# PAN-4 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=256 -activation=mish - -[route] -layers = -1, 233 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=mish - -[route] -layers = -1,-8 - -# Transition last - -# 262 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - - -# PAN-5 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=512 -activation=mish - -[route] -layers = -1, 217 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=mish - -[route] -layers = -1,-8 - -# Transition last - -# 275 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - - -# PAN-6 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=512 -activation=mish - -[route] -layers = -1, 201 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=mish - -[route] -layers = -1,-8 - -# Transition last - -# 288 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -# ============ End of Neck ============ # - -# ============ Head ============ # - -# YOLO-3 - -[route] -layers = 249 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=mish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=340 -activation=linear - -[yolo] -mask = 0,1,2,3 -anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 -classes=80 -num=16 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-4 - -[route] -layers = 262 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=mish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=340 -activation=linear - -[yolo] -mask = 4,5,6,7 -anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 -classes=80 -num=16 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-5 - -[route] -layers = 275 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=mish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=340 -activation=linear - -[yolo] -mask = 8,9,10,11 -anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 -classes=80 -num=16 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-6 - -[route] -layers = 288 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=mish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=340 -activation=linear - -[yolo] -mask = 12,13,14,15 -anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 -classes=80 -num=16 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - -# ============ End of Head ============ # +[net] +batch=64 +subdivisions=8 +width=1280 +height=1280 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.00261 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-7 + +# Transition last + +# 10 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 26 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 78 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 130 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-25 + +# Transition last + +# 158 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish + + +# P6 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-25 + +# Transition last + +# 186 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -13 + +# 201 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# End of CSPSPP + + +# FPN-5 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 158 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 217 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 130 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 233 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 78 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 249 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, 233 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 262 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, 217 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 275 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-6 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, 201 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 288 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 249 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=linear + +[yolo] +mask = 0,1,2,3 +anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 +classes=80 +num=16 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-4 + +[route] +layers = 262 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=linear + +[yolo] +mask = 4,5,6,7 +anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 +classes=80 +num=16 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-5 + +[route] +layers = 275 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=linear + +[yolo] +mask = 8,9,10,11 +anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 +classes=80 +num=16 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-6 + +[route] +layers = 288 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=linear + +[yolo] +mask = 12,13,14,15 +anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024 +classes=80 +num=16 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + +# ============ End of Head ============ # diff --git a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_p7.cfg b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_p7.cfg index 18cea793b05235297561dbefa2eba74677d37e6e..10379a0e759265d9cc39c8926e2606f46fa1083b 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_p7.cfg +++ b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov4_p7.cfg @@ -1,2714 +1,2714 @@ -[net] -batch=64 -subdivisions=8 -width=1536 -height=1536 -channels=3 -momentum=0.949 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.00261 -burn_in=1000 -max_batches = 500500 -policy=steps -steps=400000,450000 -scales=.1,.1 - -mosaic=1 - - -# ============ Backbone ============ # - -# Stem - -# 0 -[convolutional] -batch_normalize=1 -filters=40 -size=3 -stride=1 -pad=1 -activation=mish - - -# P1 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=2 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=40 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=40 -size=1 -stride=1 -pad=1 -activation=mish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=40 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=40 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=40 -size=1 -stride=1 -pad=1 -activation=mish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-7 - -# Transition last - -# 10 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=mish - - -# P2 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=2 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=mish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=mish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-13 - -# Transition last - -# 26 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - - -# P3 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=2 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-49 - -# Transition last - -# 78 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - - -# P4 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=2 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-49 - -# Transition last - -# 130 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - - -# P5 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1280 -size=3 -stride=2 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-25 - -# Transition last - -# 158 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=1280 -size=1 -stride=1 -pad=1 -activation=mish - - -# P6 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1280 -size=3 -stride=2 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-25 - -# Transition last - -# 186 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=1280 -size=1 -stride=1 -pad=1 -activation=mish - - -# P7 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1280 -size=3 -stride=2 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-25 - -# Transition last - -# 214 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=1280 -size=1 -stride=1 -pad=1 -activation=mish - -# ============ End of Backbone ============ # - -# ============ Neck ============ # - -# CSPSPP - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -### SPP ### -[maxpool] -stride=1 -size=5 - -[route] -layers=-2 - -[maxpool] -stride=1 -size=9 - -[route] -layers=-4 - -[maxpool] -stride=1 -size=13 - -[route] -layers=-1,-3,-5,-6 -### End SPP ### - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[route] -layers = -1, -13 - -# 229 (previous+6+5+2k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -# End of CSPSPP - - -# FPN-6 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[upsample] -stride=2 - -[route] -layers = 186 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 245 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - - -# FPN-5 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[upsample] -stride=2 - -[route] -layers = 158 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 261 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - - -# FPN-4 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[upsample] -stride=2 - -[route] -layers = 130 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=mish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 277 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - - -# FPN-3 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[upsample] -stride=2 - -[route] -layers = 78 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=160 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=160 -activation=mish - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=160 -activation=mish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 293 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=mish - - -# PAN-4 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=320 -activation=mish - -[route] -layers = -1, 277 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=mish - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=mish - -[route] -layers = -1,-8 - -# Transition last - -# 306 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=mish - - -# PAN-5 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=640 -activation=mish - -[route] -layers = -1, 261 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[route] -layers = -1,-8 - -# Transition last - -# 319 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - - -# PAN-6 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=640 -activation=mish - -[route] -layers = -1, 245 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[route] -layers = -1,-8 - -# Transition last - -# 332 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - - -# PAN-7 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=640 -activation=mish - -[route] -layers = -1, 229 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -# Split - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[route] -layers = -1,-8 - -# Transition last - -# 345 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=mish - -# ============ End of Neck ============ # - -# ============ Head ============ # - -# YOLO-3 - -[route] -layers = 293 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=mish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=340 -activation=linear - -[yolo] -mask = 0,1,2,3 -anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408 -classes=80 -num=20 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-4 - -[route] -layers = 306 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=mish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=340 -activation=linear - -[yolo] -mask = 4,5,6,7 -anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408 -classes=80 -num=20 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-5 - -[route] -layers = 319 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1280 -activation=mish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=340 -activation=linear - -[yolo] -mask = 8,9,10,11 -anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408 -classes=80 -num=20 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-6 - -[route] -layers = 332 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1280 -activation=mish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=340 -activation=linear - -[yolo] -mask = 12,13,14,15 -anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408 -classes=80 -num=20 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -# YOLO-7 - -[route] -layers = 345 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1280 -activation=mish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=340 -activation=linear - -[yolo] -mask = 16,17,18,19 -anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408 -classes=80 -num=20 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - -# ============ End of Head ============ # +[net] +batch=64 +subdivisions=8 +width=1536 +height=1536 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.00261 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=40 +size=3 +stride=1 +pad=1 +activation=mish + + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=40 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=40 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=40 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=40 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=40 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-7 + +# Transition last + +# 10 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=mish + + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 26 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 78 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-49 + +# Transition last + +# 130 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1280 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-25 + +# Transition last + +# 158 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1280 +size=1 +stride=1 +pad=1 +activation=mish + + +# P6 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1280 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-25 + +# Transition last + +# 186 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1280 +size=1 +stride=1 +pad=1 +activation=mish + + +# P7 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1280 +size=3 +stride=2 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-25 + +# Transition last + +# 214 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1280 +size=1 +stride=1 +pad=1 +activation=mish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[route] +layers = -1, -13 + +# 229 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +# End of CSPSPP + + +# FPN-6 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 186 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 245 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + + +# FPN-5 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 158 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 261 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 130 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 277 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 78 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=mish + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=mish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 293 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=320 +activation=mish + +[route] +layers = -1, 277 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=mish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 306 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=640 +activation=mish + +[route] +layers = -1, 261 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 319 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-6 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=640 +activation=mish + +[route] +layers = -1, 245 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 332 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + + +# PAN-7 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=640 +activation=mish + +[route] +layers = -1, 229 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[route] +layers = -1,-8 + +# Transition last + +# 345 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=mish + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 293 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=linear + +[yolo] +mask = 0,1,2,3 +anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408 +classes=80 +num=20 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-4 + +[route] +layers = 306 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=linear + +[yolo] +mask = 4,5,6,7 +anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408 +classes=80 +num=20 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-5 + +[route] +layers = 319 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1280 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=linear + +[yolo] +mask = 8,9,10,11 +anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408 +classes=80 +num=20 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-6 + +[route] +layers = 332 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1280 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=linear + +[yolo] +mask = 12,13,14,15 +anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408 +classes=80 +num=20 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + + +# YOLO-7 + +[route] +layers = 345 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1280 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=340 +activation=linear + +[yolo] +mask = 16,17,18,19 +anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408 +classes=80 +num=20 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 + +# ============ End of Head ============ # diff --git a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov5x.cfg b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov5x.cfg index 1c62dc4b192ee7bbc74fb039a0022403218b0f38..c06c663f2cde20418cc1c47d9cbdbf871ccbf66c 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov5x.cfg +++ b/PyTorch/contrib/cv/detection/YOLOR/cfg/yolov5x.cfg @@ -1,43 +1,43 @@ -nc: 80 -depth_multiple: 1.33 -width_multiple: 1.25 - -anchors: - - [10,13, 16,30 33,23] - - [30,61, 62,45, 59,119] - - [116,90, 156,198, 373,326] - -backbone: - [[-1, 1, Focus, [64,3]], - [-1, 1, Conv, [128, 3, 2]], - [-1, 3, BottleneckCSP, [128]], - [-1, 1, Conv, [256, 3, 2]], - [-1, 3, BottleneckCSP, [256]], - [-1, 1, Conv, [512, 3, 2]], - [-1, 3, BottleneckCSP, [512]], - [-1, 1, Conv, [1024, 3, 2]], - [-1, 1, SPP, [1024, [5, 9, 13]]], - [-1, 3, BottleneckCSP, [1024, False]], - ] - -head: - [[-1, 1, Conv, [512, 1, 1]], - [-1, 1, nn.Upsample, [None, 2, 'nearest']], - [[-1, 6], 1, Concat, [1]], - [-1, 3, BottleneckCSP, [512, False]], - - [-1, 1, Conv, [256, 1, 1]], - [-1, 1, nn.Upsample, [None, 2, 'nearest']], - [[-1, 4], 1, Concat, [1]], - [-1, 3, BottleneckCSP, [256, False]], - - [-1, 1, Conv, [256, 3, 2]], - [[-1, 14], 1, Concat, [1]], - [-1, 3, BottleneckCSP, [512, False]], - - [-1, 1, Conv, [512, 3, 2]], - [[-1, 10], 1, Concat, [1]], - [-1, 3, BottleneckCSP, [1024, False]], - - [[17, 20, 23], 1, Detect, [nc, anchors]], - ] +nc: 80 +depth_multiple: 1.33 +width_multiple: 1.25 + +anchors: + - [10,13, 16,30 33,23] + - [30,61, 62,45, 59,119] + - [116,90, 156,198, 373,326] + +backbone: + [[-1, 1, Focus, [64,3]], + [-1, 1, Conv, [128, 3, 2]], + [-1, 3, BottleneckCSP, [128]], + [-1, 1, Conv, [256, 3, 2]], + [-1, 3, BottleneckCSP, [256]], + [-1, 1, Conv, [512, 3, 2]], + [-1, 3, BottleneckCSP, [512]], + [-1, 1, Conv, [1024, 3, 2]], + [-1, 1, SPP, [1024, [5, 9, 13]]], + [-1, 3, BottleneckCSP, [1024, False]], + ] + +head: + [[-1, 1, Conv, [512, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], + [-1, 3, BottleneckCSP, [512, False]], + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], + [-1, 3, BottleneckCSP, [256, False]], + + [-1, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], + [-1, 3, BottleneckCSP, [512, False]], + + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], + [-1, 3, BottleneckCSP, [1024, False]], + + [[17, 20, 23], 1, Detect, [nc, anchors]], + ] diff --git a/PyTorch/contrib/cv/detection/YOLOR/darknet/README.md b/PyTorch/contrib/cv/detection/YOLOR/darknet/README.md index 1d3d11e054d320a1b11ae0ed2a92cbaf10ed302a..d2fc579741572cb0eaa03ca74598eee6da50985f 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/darknet/README.md +++ b/PyTorch/contrib/cv/detection/YOLOR/darknet/README.md @@ -1,63 +1,63 @@ -## Model Zoo - -| Model | Test Size | APval | AP50val | AP75val | APSval | APMval | APLval | batch1 throughput | -| :-- | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | -| **YOLOv4-CSP** | 640 | **49.1%** | **67.7%** | **53.8%** | **32.1%** | **54.4%** | **63.2%** | 76 *fps* | -| **YOLOR-CSP** | 640 | **49.2%** | **67.6%** | **53.7%** | **32.9%** | **54.4%** | **63.0%** | - | -| | | | | | | | -| **YOLOv4-CSP-X** | 640 | **50.9%** | **69.3%** | **55.4%** | **35.3%** | **55.8%** | **64.8%** | 53 *fps* | -| **YOLOR-CSP-X** | 640 | **51.1%** | **69.6%** | **55.7%** | **35.7%** | **56.0%** | **65.2%** | - | -| | | | | | | | - -## Installation - -https://github.com/AlexeyAB/darknet - -Docker environment (recommended) -
Expand - -``` -# get code -git clone https://github.com/AlexeyAB/darknet - -# create the docker container, you can change the share memory size if you have more. -nvidia-docker run --name yolor -it -v your_coco_path/:/coco/ -v your_code_path/:/yolor --shm-size=64g nvcr.io/nvidia/pytorch:21.02-py3 - -# apt install required packages -apt update -apt install -y libopencv-dev - -# edit Makefile -#GPU=1 -#CUDNN=1 -#CUDNN_HALF=1 -#OPENCV=1 -#AVX=1 -#OPENMP=1 -#LIBSO=1 -#ZED_CAMERA=0 -#ZED_CAMERA_v2_8=0 -# -#USE_CPP=0 -#DEBUG=0 -# -#ARCH= -gencode arch=compute_52,code=[sm_70,compute_70] \ -# -gencode arch=compute_61,code=[sm_75,compute_75] \ -# -gencode arch=compute_61,code=[sm_80,compute_80] \ -# -gencode arch=compute_61,code=[sm_86,compute_86] -# -#... - -# build -make -j8 -``` - -
- -## Testing - -To reproduce inference speed, using: - -``` -CUDA_VISIBLE_DEVICES=0 ./darknet detector demo cfg/coco.data cfg/yolov4-csp.cfg weights/yolov4-csp.weights source/test.mp4 -dont_show -benchmark -``` +## Model Zoo + +| Model | Test Size | APval | AP50val | AP75val | APSval | APMval | APLval | batch1 throughput | +| :-- | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | +| **YOLOv4-CSP** | 640 | **49.1%** | **67.7%** | **53.8%** | **32.1%** | **54.4%** | **63.2%** | 76 *fps* | +| **YOLOR-CSP** | 640 | **49.2%** | **67.6%** | **53.7%** | **32.9%** | **54.4%** | **63.0%** | - | +| | | | | | | | +| **YOLOv4-CSP-X** | 640 | **50.9%** | **69.3%** | **55.4%** | **35.3%** | **55.8%** | **64.8%** | 53 *fps* | +| **YOLOR-CSP-X** | 640 | **51.1%** | **69.6%** | **55.7%** | **35.7%** | **56.0%** | **65.2%** | - | +| | | | | | | | + +## Installation + +https://github.com/AlexeyAB/darknet + +Docker environment (recommended) +
Expand + +``` +# get code +git clone https://github.com/AlexeyAB/darknet + +# create the docker container, you can change the share memory size if you have more. +nvidia-docker run --name yolor -it -v your_coco_path/:/coco/ -v your_code_path/:/yolor --shm-size=64g nvcr.io/nvidia/pytorch:21.02-py3 + +# apt install required packages +apt update +apt install -y libopencv-dev + +# edit Makefile +#GPU=1 +#CUDNN=1 +#CUDNN_HALF=1 +#OPENCV=1 +#AVX=1 +#OPENMP=1 +#LIBSO=1 +#ZED_CAMERA=0 +#ZED_CAMERA_v2_8=0 +# +#USE_CPP=0 +#DEBUG=0 +# +#ARCH= -gencode arch=compute_52,code=[sm_70,compute_70] \ +# -gencode arch=compute_61,code=[sm_75,compute_75] \ +# -gencode arch=compute_61,code=[sm_80,compute_80] \ +# -gencode arch=compute_61,code=[sm_86,compute_86] +# +#... + +# build +make -j8 +``` + +
+ +## Testing + +To reproduce inference speed, using: + +``` +CUDA_VISIBLE_DEVICES=0 ./darknet detector demo cfg/coco.data cfg/yolov4-csp.cfg weights/yolov4-csp.weights source/test.mp4 -dont_show -benchmark +``` diff --git a/PyTorch/contrib/cv/detection/YOLOR/darknet/cfg/yolov4-csp-x.cfg b/PyTorch/contrib/cv/detection/YOLOR/darknet/cfg/yolov4-csp-x.cfg index cdb71565ac1cd97d79c0c334da9b3d193a624f09..e7acf9ef4e49aa9d97ddfe41f72447ac033804cf 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/darknet/cfg/yolov4-csp-x.cfg +++ b/PyTorch/contrib/cv/detection/YOLOR/darknet/cfg/yolov4-csp-x.cfg @@ -1,1555 +1,1555 @@ - -[net] -# Testing -#batch=1 -#subdivisions=1 -# Training -batch=64 -subdivisions=8 -width=640 -height=640 -channels=3 -momentum=0.949 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.001 -burn_in=1000 -max_batches = 500500 -policy=steps -steps=400000,450000 -scales=.1,.1 - -mosaic=1 - -letter_box=1 - -ema_alpha=0.9998 - -#optimized_memory=1 - - -# ============ Backbone ============ # - -# Stem - -# 0 -[convolutional] -batch_normalize=1 -filters=32 -size=3 -stride=1 -pad=1 -activation=swish - -# P1 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=2 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=40 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=swish - -# 4 (previous+1+3k) -[shortcut] -from=-3 -activation=linear - -# P2 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=80 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=80 -size=1 -stride=1 -pad=1 -activation=swish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-13 - -# Transition last - -# 20 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -# P3 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=160 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -# Merge [-1 -(4+3k)] - -[route] -layers = -1,-34 - -# Transition last - -# 57 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -# P4 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -# Merge [-1 -(3k+4)] - -[route] -layers = -1,-34 - -# Transition last - -# 94 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -# P5 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1280 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=640 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -# Merge [-1 -(3k+4)] - -[route] -layers = -1,-19 - -# Transition last - -# 116 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=1280 -size=1 -stride=1 -pad=1 -activation=swish - -# ============ End of Backbone ============ # - -# ============ Neck ============ # - -# CSPSPP - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=swish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -### SPP ### -[maxpool] -stride=1 -size=5 - -[route] -layers=-2 - -[maxpool] -stride=1 -size=9 - -[route] -layers=-4 - -[maxpool] -stride=1 -size=13 - -[route] -layers=-1,-3,-5,-6 -### End SPP ### - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=swish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=swish - -[route] -layers = -1, -15 - -# 133 (previous+6+5+2k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -# End of CSPSPP - - -# FPN-4 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[upsample] -stride=2 - -[route] -layers = 94 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 149 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - - -# FPN-3 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[upsample] -stride=2 - -[route] -layers = 57 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=160 -activation=swish - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=160 -activation=swish - -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=160 -activation=swish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -8 - -# Transition last - -# 165 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=160 -size=1 -stride=1 -pad=1 -activation=swish - - -# PAN-4 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=320 -activation=swish - -[route] -layers = -1, 149 - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[route] -layers = -1,-8 - -# Transition last - -# 178 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=320 -size=1 -stride=1 -pad=1 -activation=swish - - -# PAN-5 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=640 -activation=swish - -[route] -layers = -1, 133 - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=swish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=swish - -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=swish - -[route] -layers = -1,-8 - -# Transition last - -# 191 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=640 -size=1 -stride=1 -pad=1 -activation=swish - -# ============ End of Neck ============ # - -# ============ Head ============ # - -# YOLO-3 - -[route] -layers = 165 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=320 -activation=swish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=logistic - -[yolo] -mask = 0,1,2 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.1 -scale_x_y = 2.0 -objectness_smooth=1 -ignore_thresh = .7 -truth_thresh = 1 -#random=1 -resize=1.5 -iou_thresh=0.2 -iou_normalizer=0.05 -cls_normalizer=0.5 -obj_normalizer=0.4 -iou_loss=ciou -nms_kind=diounms -beta_nms=0.6 -new_coords=1 -max_delta=2 - - -# YOLO-4 - -[route] -layers = 178 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=640 -activation=swish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=logistic - -[yolo] -mask = 3,4,5 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.1 -scale_x_y = 2.0 -objectness_smooth=1 -ignore_thresh = .7 -truth_thresh = 1 -#random=1 -resize=1.5 -iou_thresh=0.2 -iou_normalizer=0.05 -cls_normalizer=0.5 -obj_normalizer=0.4 -iou_loss=ciou -nms_kind=diounms -beta_nms=0.6 -new_coords=1 -max_delta=2 - - -# YOLO-5 - -[route] -layers = 191 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1280 -activation=swish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=logistic - -[yolo] -mask = 6,7,8 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.1 -scale_x_y = 2.0 -objectness_smooth=1 -ignore_thresh = .7 -truth_thresh = 1 -#random=1 -resize=1.5 -iou_thresh=0.2 -iou_normalizer=0.05 -cls_normalizer=0.5 -obj_normalizer=0.4 -iou_loss=ciou -nms_kind=diounms -beta_nms=0.6 -new_coords=1 -max_delta=2 + +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=640 +height=640 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +ema_alpha=0.9998 + +#optimized_memory=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=swish + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=2 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=40 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +# 4 (previous+1+3k) +[shortcut] +from=-3 +activation=linear + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 20 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(4+3k)] + +[route] +layers = -1,-34 + +# Transition last + +# 57 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-34 + +# Transition last + +# 94 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1280 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-19 + +# Transition last + +# 116 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1280 +size=1 +stride=1 +pad=1 +activation=swish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1, -15 + +# 133 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 94 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 149 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 57 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 165 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1, 149 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 178 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1, 133 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 191 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 165 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-4 + +[route] +layers = 178 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-5 + +[route] +layers = 191 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1280 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 diff --git a/PyTorch/contrib/cv/detection/YOLOR/darknet/cfg/yolov4-csp.cfg b/PyTorch/contrib/cv/detection/YOLOR/darknet/cfg/yolov4-csp.cfg index fd1bdc6bdc692106e1cbf810d7be97902cb2129a..a47c9f7160e77e1aea809f5840c93498b2443978 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/darknet/cfg/yolov4-csp.cfg +++ b/PyTorch/contrib/cv/detection/YOLOR/darknet/cfg/yolov4-csp.cfg @@ -1,1354 +1,1354 @@ -[net] -# Testing -#batch=1 -#subdivisions=1 -# Training -batch=64 -subdivisions=8 -width=640 -height=640 -channels=3 -momentum=0.949 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.001 -burn_in=1000 -max_batches = 500500 -policy=steps -steps=400000,450000 -scales=.1,.1 - -mosaic=1 - -letter_box=1 - -ema_alpha=0.9998 - -#optimized_memory=1 - - -# ============ Backbone ============ # - -# Stem - -# 0 -[convolutional] -batch_normalize=1 -filters=32 -size=3 -stride=1 -pad=1 -activation=swish - -# P1 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=2 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=32 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=swish - -# 4 (previous+1+3k) -[shortcut] -from=-3 -activation=linear - -# P2 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=swish - -# Merge [-1, -(3k+4)] - -[route] -layers = -1,-10 - -# Transition last - -# 17 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -# P3 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -# Merge [-1 -(4+3k)] - -[route] -layers = -1,-28 - -# Transition last - -# 48 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -# P4 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -# Merge [-1 -(3k+4)] - -[route] -layers = -1,-28 - -# Transition last - -# 79 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -# P5 - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=2 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -# Residual Block - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=swish - -[shortcut] -from=-3 -activation=linear - -# Transition first - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -# Merge [-1 -(3k+4)] - -[route] -layers = -1,-16 - -# Transition last - -# 98 (previous+7+3k) -[convolutional] -batch_normalize=1 -filters=1024 -size=1 -stride=1 -pad=1 -activation=swish - -# ============ End of Backbone ============ # - -# ============ Neck ============ # - -# CSPSPP - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=swish - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -### SPP ### -[maxpool] -stride=1 -size=5 - -[route] -layers=-2 - -[maxpool] -stride=1 -size=9 - -[route] -layers=-4 - -[maxpool] -stride=1 -size=13 - -[route] -layers=-1,-3,-5,-6 -### End SPP ### - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=swish - -[route] -layers = -1, -13 - -# 113 (previous+6+5+2k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -# End of CSPSPP - - -# FPN-4 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[upsample] -stride=2 - -[route] -layers = 79 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -6 - -# Transition last - -# 127 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - - -# FPN-3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[upsample] -stride=2 - -[route] -layers = 48 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=swish - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=128 -activation=swish - -# Merge [-1, -(2k+2)] - -[route] -layers = -1, -6 - -# Transition last - -# 141 (previous+6+4+2k) -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=swish - - -# PAN-4 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=256 -activation=swish - -[route] -layers = -1, 127 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[route] -layers = -1,-6 - -# Transition last - -# 152 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=swish - - -# PAN-5 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=512 -activation=swish - -[route] -layers = -1, 113 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -# Split - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -[route] -layers = -2 - -# Plain Block - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=swish - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=swish - -[route] -layers = -1,-6 - -# Transition last - -# 163 (previous+3+4+2k) -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=swish - -# ============ End of Neck ============ # - -# ============ Head ============ # - -# YOLO-3 - -[route] -layers = 141 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=swish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=logistic - -[yolo] -mask = 0,1,2 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.1 -scale_x_y = 2.0 -objectness_smooth=1 -ignore_thresh = .7 -truth_thresh = 1 -#random=1 -resize=1.5 -iou_thresh=0.2 -iou_normalizer=0.05 -cls_normalizer=0.5 -obj_normalizer=0.4 -iou_loss=ciou -nms_kind=diounms -beta_nms=0.6 -new_coords=1 -max_delta=2 - - -# YOLO-4 - -[route] -layers = 152 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=swish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=logistic - -[yolo] -mask = 3,4,5 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.1 -scale_x_y = 2.0 -objectness_smooth=1 -ignore_thresh = .7 -truth_thresh = 1 -#random=1 -resize=1.5 -iou_thresh=0.2 -iou_normalizer=0.05 -cls_normalizer=0.5 -obj_normalizer=0.4 -iou_loss=ciou -nms_kind=diounms -beta_nms=0.6 -new_coords=1 -max_delta=2 - - -# YOLO-5 - -[route] -layers = 163 - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=swish - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=logistic - -[yolo] -mask = 6,7,8 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.1 -scale_x_y = 2.0 -objectness_smooth=1 -ignore_thresh = .7 -truth_thresh = 1 -#random=1 -resize=1.5 -iou_thresh=0.2 -iou_normalizer=0.05 -cls_normalizer=0.5 -obj_normalizer=0.4 -iou_loss=ciou -nms_kind=diounms -beta_nms=0.6 -new_coords=1 -max_delta=2 +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=640 +height=640 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +ema_alpha=0.9998 + +#optimized_memory=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=swish + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +# 4 (previous+1+3k) +[shortcut] +from=-3 +activation=linear + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-10 + +# Transition last + +# 17 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(4+3k)] + +[route] +layers = -1,-28 + +# Transition last + +# 48 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-28 + +# Transition last + +# 79 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-16 + +# Transition last + +# 98 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=swish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[route] +layers = -1, -13 + +# 113 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -6 + +# Transition last + +# 127 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -6 + +# Transition last + +# 141 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=swish + +[route] +layers = -1, 127 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[route] +layers = -1,-6 + +# Transition last + +# 152 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=swish + +[route] +layers = -1, 113 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[route] +layers = -1,-6 + +# Transition last + +# 163 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 141 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-4 + +[route] +layers = 152 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-5 + +[route] +layers = 163 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 diff --git a/PyTorch/contrib/cv/detection/YOLOR/darknet/new_layers.md b/PyTorch/contrib/cv/detection/YOLOR/darknet/new_layers.md index b9b5fb84a8cb747d608d637fae09ba7a201257e8..9f7a35c02ae6564f35688aab29b3aed0f3ad3a2b 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/darknet/new_layers.md +++ b/PyTorch/contrib/cv/detection/YOLOR/darknet/new_layers.md @@ -1,329 +1,329 @@ -![Implicit Modeling](https://github.com/WongKinYiu/yolor/blob/main/figure/implicit_modeling.png) - -### 1. silence layer - -Usage: - -``` -[silence] -``` - -PyTorch code: - -``` python -class Silence(nn.Module): - def __init__(self): - super(Silence, self).__init__() - def forward(self, x): - return x -``` - - -### 2. implicit_add layer - -Usage: - -``` -[implicit_add] -filters=128 -``` - -PyTorch code: - -``` python -class ImplicitA(nn.Module): - def __init__(self, channel): - super(ImplicitA, self).__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) - nn.init.normal_(self.implicit, std=.02) - - def forward(self): - return self.implicit -``` - - -### 3. shift_channels layer - -Usage: - -``` -[shift_channels] -from=101 -``` - -PyTorch code: - -``` python -class ShiftChannel(nn.Module): - def __init__(self, layers): - super(ShiftChannel, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]] - return a.expand_as(x) + x -``` - - -### 4. implicit_mul layer - -Usage: - -``` -[implicit_mul] -filters=128 -``` - -PyTorch code: - -``` python -class ImplicitM(nn.Module): - def __init__(self, channel): - super(ImplicitM, self).__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1)) - nn.init.normal_(self.implicit, mean=1., std=.02) - - def forward(self): - return self.implicit -``` - - -### 5. control_channels layer - -Usage: - -``` -[control_channels] -from=101 -``` - -PyTorch code: - -``` python -class ControlChannel(nn.Module): - def __init__(self, layers): - super(ControlChannel, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]] - return a.expand_as(x) * x -``` - - -### 6. implicit_cat layer - -Usage: - -``` -[implicit_cat] -filters=128 -``` - -PyTorch code: (same as ImplicitA) - -``` python -class ImplicitC(nn.Module): - def __init__(self, channel): - super(ImplicitC, self).__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) - nn.init.normal_(self.implicit, std=.02) - - def forward(self): - return self.implicit -``` - - -### 7. alternate_channels layer - -Usage: - -``` -[alternate_channels] -from=101 -``` - -PyTorch code: - -``` python -class AlternateChannel(nn.Module): - def __init__(self, layers): - super(AlternateChannel, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]] - return torch.cat([a.expand_as(x), x], dim=1) -``` - - -### 8. implicit_add_2d layer - -Usage: - -``` -[implicit_add_2d] -filters=128 -atoms=128 -``` - -PyTorch code: - -``` python -class Implicit2DA(nn.Module): - def __init__(self, atom, channel): - super(Implicit2DA, self).__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.zeros(1, atom, channel, 1)) - nn.init.normal_(self.implicit, std=.02) - - def forward(self): - return self.implicit -``` - - -### 9. shift_channels_2d layer - -Usage: - -``` -[shift_channels_2d] -from=101 -``` - -PyTorch code: - -``` python -class ShiftChannel2D(nn.Module): - def __init__(self, layers): - super(ShiftChannel2D, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]].view(1,-1,1,1) - return a.expand_as(x) + x -``` - - -### 10. implicit_mul_2d layer - -Usage: - -``` -[implicit_mul_2d] -filters=128 -atoms=128 -``` - -PyTorch code: - -``` python -class Implicit2DM(nn.Module): - def __init__(self, atom, channel): - super(Implicit2DM, self).__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.ones(1, atom, channel, 1)) - nn.init.normal_(self.implicit, mean=1., std=.02) - - def forward(self): - return self.implicit -``` - - -### 11. control_channels_2d layer - -Usage: - -``` -[control_channels_2d] -from=101 -``` - -PyTorch code: - -``` python -class ControlChannel2D(nn.Module): - def __init__(self, layers): - super(ControlChannel2D, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]].view(1,-1,1,1) - return a.expand_as(x) * x -``` - - -### 12. implicit_cat_2d layer - -Usage: - -``` -[implicit_cat_2d] -filters=128 -atoms=128 -``` - -PyTorch code: (same as Implicit2DA) - -``` python -class Implicit2DC(nn.Module): - def __init__(self, atom, channel): - super(Implicit2DC, self).__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.zeros(1, atom, channel, 1)) - nn.init.normal_(self.implicit, std=.02) - - def forward(self): - return self.implicit -``` - - -### 13. alternate_channels_2d layer - -Usage: - -``` -[alternate_channels_2d] -from=101 -``` - -PyTorch code: - -``` python -class AlternateChannel2D(nn.Module): - def __init__(self, layers): - super(AlternateChannel2D, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]].view(1,-1,1,1) - return torch.cat([a.expand_as(x), x], dim=1) -``` - - -### 14. dwt layer - -Usage: - -``` -[dwt] -``` - -PyTorch code: - -``` python -# https://github.com/fbcotter/pytorch_wavelets -from pytorch_wavelets import DWTForward, DWTInverse -class DWT(nn.Module): - def __init__(self): - super(DWT, self).__init__() - self.xfm = DWTForward(J=1, wave='db1', mode='zero') - - def forward(self, x): - b,c,w,h = x.shape - yl, yh = self.xfm(x) - return torch.cat([yl/2., yh[0].view(b,-1,w//2,h//2)/2.+.5], 1) -``` +![Implicit Modeling](https://github.com/WongKinYiu/yolor/blob/main/figure/implicit_modeling.png) + +### 1. silence layer + +Usage: + +``` +[silence] +``` + +PyTorch code: + +``` python +class Silence(nn.Module): + def __init__(self): + super(Silence, self).__init__() + def forward(self, x): + return x +``` + + +### 2. implicit_add layer + +Usage: + +``` +[implicit_add] +filters=128 +``` + +PyTorch code: + +``` python +class ImplicitA(nn.Module): + def __init__(self, channel): + super(ImplicitA, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) + nn.init.normal_(self.implicit, std=.02) + + def forward(self): + return self.implicit +``` + + +### 3. shift_channels layer + +Usage: + +``` +[shift_channels] +from=101 +``` + +PyTorch code: + +``` python +class ShiftChannel(nn.Module): + def __init__(self, layers): + super(ShiftChannel, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return a.expand_as(x) + x +``` + + +### 4. implicit_mul layer + +Usage: + +``` +[implicit_mul] +filters=128 +``` + +PyTorch code: + +``` python +class ImplicitM(nn.Module): + def __init__(self, channel): + super(ImplicitM, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1)) + nn.init.normal_(self.implicit, mean=1., std=.02) + + def forward(self): + return self.implicit +``` + + +### 5. control_channels layer + +Usage: + +``` +[control_channels] +from=101 +``` + +PyTorch code: + +``` python +class ControlChannel(nn.Module): + def __init__(self, layers): + super(ControlChannel, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return a.expand_as(x) * x +``` + + +### 6. implicit_cat layer + +Usage: + +``` +[implicit_cat] +filters=128 +``` + +PyTorch code: (same as ImplicitA) + +``` python +class ImplicitC(nn.Module): + def __init__(self, channel): + super(ImplicitC, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) + nn.init.normal_(self.implicit, std=.02) + + def forward(self): + return self.implicit +``` + + +### 7. alternate_channels layer + +Usage: + +``` +[alternate_channels] +from=101 +``` + +PyTorch code: + +``` python +class AlternateChannel(nn.Module): + def __init__(self, layers): + super(AlternateChannel, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return torch.cat([a.expand_as(x), x], dim=1) +``` + + +### 8. implicit_add_2d layer + +Usage: + +``` +[implicit_add_2d] +filters=128 +atoms=128 +``` + +PyTorch code: + +``` python +class Implicit2DA(nn.Module): + def __init__(self, atom, channel): + super(Implicit2DA, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.zeros(1, atom, channel, 1)) + nn.init.normal_(self.implicit, std=.02) + + def forward(self): + return self.implicit +``` + + +### 9. shift_channels_2d layer + +Usage: + +``` +[shift_channels_2d] +from=101 +``` + +PyTorch code: + +``` python +class ShiftChannel2D(nn.Module): + def __init__(self, layers): + super(ShiftChannel2D, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]].view(1,-1,1,1) + return a.expand_as(x) + x +``` + + +### 10. implicit_mul_2d layer + +Usage: + +``` +[implicit_mul_2d] +filters=128 +atoms=128 +``` + +PyTorch code: + +``` python +class Implicit2DM(nn.Module): + def __init__(self, atom, channel): + super(Implicit2DM, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.ones(1, atom, channel, 1)) + nn.init.normal_(self.implicit, mean=1., std=.02) + + def forward(self): + return self.implicit +``` + + +### 11. control_channels_2d layer + +Usage: + +``` +[control_channels_2d] +from=101 +``` + +PyTorch code: + +``` python +class ControlChannel2D(nn.Module): + def __init__(self, layers): + super(ControlChannel2D, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]].view(1,-1,1,1) + return a.expand_as(x) * x +``` + + +### 12. implicit_cat_2d layer + +Usage: + +``` +[implicit_cat_2d] +filters=128 +atoms=128 +``` + +PyTorch code: (same as Implicit2DA) + +``` python +class Implicit2DC(nn.Module): + def __init__(self, atom, channel): + super(Implicit2DC, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.zeros(1, atom, channel, 1)) + nn.init.normal_(self.implicit, std=.02) + + def forward(self): + return self.implicit +``` + + +### 13. alternate_channels_2d layer + +Usage: + +``` +[alternate_channels_2d] +from=101 +``` + +PyTorch code: + +``` python +class AlternateChannel2D(nn.Module): + def __init__(self, layers): + super(AlternateChannel2D, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]].view(1,-1,1,1) + return torch.cat([a.expand_as(x), x], dim=1) +``` + + +### 14. dwt layer + +Usage: + +``` +[dwt] +``` + +PyTorch code: + +``` python +# https://github.com/fbcotter/pytorch_wavelets +from pytorch_wavelets import DWTForward, DWTInverse +class DWT(nn.Module): + def __init__(self): + super(DWT, self).__init__() + self.xfm = DWTForward(J=1, wave='db1', mode='zero') + + def forward(self, x): + b,c,w,h = x.shape + yl, yh = self.xfm(x) + return torch.cat([yl/2., yh[0].view(b,-1,w//2,h//2)/2.+.5], 1) +``` diff --git a/PyTorch/contrib/cv/detection/YOLOR/data/coco.names b/PyTorch/contrib/cv/detection/YOLOR/data/coco.names index a1a11c4e116132d374a6e399b4716f1ec6da5bf2..941cb4e1392266f6a6c09b1fdc5f79503b2e5df6 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/data/coco.names +++ b/PyTorch/contrib/cv/detection/YOLOR/data/coco.names @@ -1,80 +1,80 @@ -person -bicycle -car -motorcycle -airplane -bus -train -truck -boat -traffic light -fire hydrant -stop sign -parking meter -bench -bird -cat -dog -horse -sheep -cow -elephant -bear -zebra -giraffe -backpack -umbrella -handbag -tie -suitcase -frisbee -skis -snowboard -sports ball -kite -baseball bat -baseball glove -skateboard -surfboard -tennis racket -bottle -wine glass -cup -fork -knife -spoon -bowl -banana -apple -sandwich -orange -broccoli -carrot -hot dog -pizza -donut -cake -chair -couch -potted plant -bed -dining table -toilet -tv -laptop -mouse -remote -keyboard -cell phone -microwave -oven -toaster -sink -refrigerator -book -clock -vase -scissors -teddy bear -hair drier -toothbrush +person +bicycle +car +motorcycle +airplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +couch +potted plant +bed +dining table +toilet +tv +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush diff --git a/PyTorch/contrib/cv/detection/YOLOR/data/coco.yaml b/PyTorch/contrib/cv/detection/YOLOR/data/coco.yaml index 6b34a4e99a4b45d9e97439aeabfc7ded409605e7..b0e6bd4baec07854ed2b69dd240e4e74bc0ac8ca 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/data/coco.yaml +++ b/PyTorch/contrib/cv/detection/YOLOR/data/coco.yaml @@ -1,18 +1,18 @@ -# train and val datasets (image directory or *.txt file with image paths) -train: /npu/traindata/yolov5_data/train2017.txt # 118k images -val: /npu/traindata/yolov5_data/val2017.txt # 5k images -test: /npu/traindata/yolov5_data/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 - -# number of classes -nc: 80 - -# class names -names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', - 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', - 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', - 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', - 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', - 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', - 'hair drier', 'toothbrush'] +# train and val datasets (image directory or *.txt file with image paths) +train: /npu/traindata/yolov5_data/train2017.txt # 118k images +val: /npu/traindata/yolov5_data/val2017.txt # 5k images +test: /npu/traindata/yolov5_data/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 + +# number of classes +nc: 80 + +# class names +names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', + 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', + 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', + 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', + 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', + 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', + 'hair drier', 'toothbrush'] diff --git a/PyTorch/contrib/cv/detection/YOLOR/data/hyp.finetune.1280.yaml b/PyTorch/contrib/cv/detection/YOLOR/data/hyp.finetune.1280.yaml index 4cef10c4f53aaa5c0c83e09f6c4d458ab5384ae4..d3ebbe10f1e6c70007eed0d38d31afba3c0348aa 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/data/hyp.finetune.1280.yaml +++ b/PyTorch/contrib/cv/detection/YOLOR/data/hyp.finetune.1280.yaml @@ -1,28 +1,28 @@ -lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) -lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) -momentum: 0.937 # SGD momentum/Adam beta1 -weight_decay: 0.0005 # optimizer weight decay 5e-4 -warmup_epochs: 3.0 # warmup epochs (fractions ok) -warmup_momentum: 0.8 # warmup initial momentum -warmup_bias_lr: 0.1 # warmup initial bias lr -box: 0.05 # box loss gain -cls: 0.5 # cls loss gain -cls_pw: 1.0 # cls BCELoss positive_weight -obj: 1.0 # obj loss gain (scale with pixels) -obj_pw: 1.0 # obj BCELoss positive_weight -iou_t: 0.20 # IoU training threshold -anchor_t: 4.0 # anchor-multiple threshold -# anchors: 3 # anchors per output layer (0 to ignore) -fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) -hsv_h: 0.015 # image HSV-Hue augmentation (fraction) -hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) -hsv_v: 0.4 # image HSV-Value augmentation (fraction) -degrees: 0.0 # image rotation (+/- deg) -translate: 0.5 # image translation (+/- fraction) -scale: 0.8 # image scale (+/- gain) -shear: 0.0 # image shear (+/- deg) -perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 -flipud: 0.0 # image flip up-down (probability) -fliplr: 0.5 # image flip left-right (probability) -mosaic: 1.0 # image mosaic (probability) -mixup: 0.2 # image mixup (probability) +lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) +lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) +momentum: 0.937 # SGD momentum/Adam beta1 +weight_decay: 0.0005 # optimizer weight decay 5e-4 +warmup_epochs: 3.0 # warmup epochs (fractions ok) +warmup_momentum: 0.8 # warmup initial momentum +warmup_bias_lr: 0.1 # warmup initial bias lr +box: 0.05 # box loss gain +cls: 0.5 # cls loss gain +cls_pw: 1.0 # cls BCELoss positive_weight +obj: 1.0 # obj loss gain (scale with pixels) +obj_pw: 1.0 # obj BCELoss positive_weight +iou_t: 0.20 # IoU training threshold +anchor_t: 4.0 # anchor-multiple threshold +# anchors: 3 # anchors per output layer (0 to ignore) +fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) +hsv_h: 0.015 # image HSV-Hue augmentation (fraction) +hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) +hsv_v: 0.4 # image HSV-Value augmentation (fraction) +degrees: 0.0 # image rotation (+/- deg) +translate: 0.5 # image translation (+/- fraction) +scale: 0.8 # image scale (+/- gain) +shear: 0.0 # image shear (+/- deg) +perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 +flipud: 0.0 # image flip up-down (probability) +fliplr: 0.5 # image flip left-right (probability) +mosaic: 1.0 # image mosaic (probability) +mixup: 0.2 # image mixup (probability) diff --git a/PyTorch/contrib/cv/detection/YOLOR/data/hyp.scratch.1280.yaml b/PyTorch/contrib/cv/detection/YOLOR/data/hyp.scratch.1280.yaml index ece2c96e5dee39d5dc8d53b31f130914b3c442b7..3b0f84b96ade2d97bb91ab6e6e4765cb5a64606b 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/data/hyp.scratch.1280.yaml +++ b/PyTorch/contrib/cv/detection/YOLOR/data/hyp.scratch.1280.yaml @@ -1,28 +1,28 @@ -lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) -lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) -momentum: 0.937 # SGD momentum/Adam beta1 -weight_decay: 0.0005 # optimizer weight decay 5e-4 -warmup_epochs: 3.0 # warmup epochs (fractions ok) -warmup_momentum: 0.8 # warmup initial momentum -warmup_bias_lr: 0.1 # warmup initial bias lr -box: 0.05 # box loss gain -cls: 0.5 # cls loss gain -cls_pw: 1.0 # cls BCELoss positive_weight -obj: 1.0 # obj loss gain (scale with pixels) -obj_pw: 1.0 # obj BCELoss positive_weight -iou_t: 0.20 # IoU training threshold -anchor_t: 4.0 # anchor-multiple threshold -# anchors: 3 # anchors per output layer (0 to ignore) -fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) -hsv_h: 0.015 # image HSV-Hue augmentation (fraction) -hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) -hsv_v: 0.4 # image HSV-Value augmentation (fraction) -degrees: 0.0 # image rotation (+/- deg) -translate: 0.5 # image translation (+/- fraction) -scale: 0.5 # image scale (+/- gain) -shear: 0.0 # image shear (+/- deg) -perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 -flipud: 0.0 # image flip up-down (probability) -fliplr: 0.5 # image flip left-right (probability) -mosaic: 1.0 # image mosaic (probability) -mixup: 0.0 # image mixup (probability) +lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) +lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) +momentum: 0.937 # SGD momentum/Adam beta1 +weight_decay: 0.0005 # optimizer weight decay 5e-4 +warmup_epochs: 3.0 # warmup epochs (fractions ok) +warmup_momentum: 0.8 # warmup initial momentum +warmup_bias_lr: 0.1 # warmup initial bias lr +box: 0.05 # box loss gain +cls: 0.5 # cls loss gain +cls_pw: 1.0 # cls BCELoss positive_weight +obj: 1.0 # obj loss gain (scale with pixels) +obj_pw: 1.0 # obj BCELoss positive_weight +iou_t: 0.20 # IoU training threshold +anchor_t: 4.0 # anchor-multiple threshold +# anchors: 3 # anchors per output layer (0 to ignore) +fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) +hsv_h: 0.015 # image HSV-Hue augmentation (fraction) +hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) +hsv_v: 0.4 # image HSV-Value augmentation (fraction) +degrees: 0.0 # image rotation (+/- deg) +translate: 0.5 # image translation (+/- fraction) +scale: 0.5 # image scale (+/- gain) +shear: 0.0 # image shear (+/- deg) +perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 +flipud: 0.0 # image flip up-down (probability) +fliplr: 0.5 # image flip left-right (probability) +mosaic: 1.0 # image mosaic (probability) +mixup: 0.0 # image mixup (probability) diff --git a/PyTorch/contrib/cv/detection/YOLOR/data/hyp.scratch.640.yaml b/PyTorch/contrib/cv/detection/YOLOR/data/hyp.scratch.640.yaml index 336fa526e4420cb02bfc30f869c062546013d43c..00e458ae3a562bb1b03e832ca8d31703c7c24c4f 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/data/hyp.scratch.640.yaml +++ b/PyTorch/contrib/cv/detection/YOLOR/data/hyp.scratch.640.yaml @@ -1,28 +1,28 @@ -lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) -lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) -momentum: 0.937 # SGD momentum/Adam beta1 -weight_decay: 0.0005 # optimizer weight decay 5e-4 -warmup_epochs: 3.0 # warmup epochs (fractions ok) -warmup_momentum: 0.8 # warmup initial momentum -warmup_bias_lr: 0.1 # warmup initial bias lr -box: 0.05 # box loss gain -cls: 0.3 # cls loss gain -cls_pw: 1.0 # cls BCELoss positive_weight -obj: 0.7 # obj loss gain (scale with pixels) -obj_pw: 1.0 # obj BCELoss positive_weight -iou_t: 0.20 # IoU training threshold -anchor_t: 4.0 # anchor-multiple threshold -# anchors: 3 # anchors per output layer (0 to ignore) -fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) -hsv_h: 0.015 # image HSV-Hue augmentation (fraction) -hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) -hsv_v: 0.4 # image HSV-Value augmentation (fraction) -degrees: 0.0 # image rotation (+/- deg) -translate: 0.1 # image translation (+/- fraction) -scale: 0.9 # image scale (+/- gain) -shear: 0.0 # image shear (+/- deg) -perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 -flipud: 0.0 # image flip up-down (probability) -fliplr: 0.5 # image flip left-right (probability) -mosaic: 1.0 # image mosaic (probability) -mixup: 0.0 # image mixup (probability) +lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) +lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) +momentum: 0.937 # SGD momentum/Adam beta1 +weight_decay: 0.0005 # optimizer weight decay 5e-4 +warmup_epochs: 3.0 # warmup epochs (fractions ok) +warmup_momentum: 0.8 # warmup initial momentum +warmup_bias_lr: 0.1 # warmup initial bias lr +box: 0.05 # box loss gain +cls: 0.3 # cls loss gain +cls_pw: 1.0 # cls BCELoss positive_weight +obj: 0.7 # obj loss gain (scale with pixels) +obj_pw: 1.0 # obj BCELoss positive_weight +iou_t: 0.20 # IoU training threshold +anchor_t: 4.0 # anchor-multiple threshold +# anchors: 3 # anchors per output layer (0 to ignore) +fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) +hsv_h: 0.015 # image HSV-Hue augmentation (fraction) +hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) +hsv_v: 0.4 # image HSV-Value augmentation (fraction) +degrees: 0.0 # image rotation (+/- deg) +translate: 0.1 # image translation (+/- fraction) +scale: 0.9 # image scale (+/- gain) +shear: 0.0 # image shear (+/- deg) +perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 +flipud: 0.0 # image flip up-down (probability) +fliplr: 0.5 # image flip left-right (probability) +mosaic: 1.0 # image mosaic (probability) +mixup: 0.0 # image mixup (probability) diff --git a/PyTorch/contrib/cv/detection/YOLOR/models/__init__.py b/PyTorch/contrib/cv/detection/YOLOR/models/__init__.py index d3f5a12faa99758192ecc4ed3fc22c9249232e86..8b137891791fe96927ad78e64b0aad7bded08bdc 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/models/__init__.py +++ b/PyTorch/contrib/cv/detection/YOLOR/models/__init__.py @@ -1 +1 @@ - + diff --git a/PyTorch/contrib/cv/detection/YOLOR/models/export.py b/PyTorch/contrib/cv/detection/YOLOR/models/export.py index 43651c4e8a2a88d555351ef05c552f3b4edfc057..0816faccf2c80eea96072d95ccc0163ed35a0b45 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/models/export.py +++ b/PyTorch/contrib/cv/detection/YOLOR/models/export.py @@ -1,82 +1,82 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse - -import torch - -from utils.google_utils import attempt_download - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--weights', type=str, default='./yolov4.pt', help='weights path') - parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') - parser.add_argument('--batch-size', type=int, default=1, help='batch size') - opt = parser.parse_args() - opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand - print(opt) - - # Input - img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection - - # Load PyTorch model - attempt_download(opt.weights) - model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float() - model.eval() - model.model[-1].export = True # set Detect() layer export=True - y = model(img) # dry run - - # TorchScript export - try: - print('\nStarting TorchScript export with torch %s...' % torch.__version__) - f = opt.weights.replace('.pt', '.torchscript.pt') # filename - ts = torch.jit.trace(model, img) - ts.save(f) - print('TorchScript export success, saved as %s' % f) - except Exception as e: - print('TorchScript export failure: %s' % e) - - # ONNX export - try: - import onnx - - print('\nStarting ONNX export with onnx %s...' % onnx.__version__) - f = opt.weights.replace('.pt', '.onnx') # filename - model.fuse() # only for ONNX - torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], - output_names=['classes', 'boxes'] if y is None else ['output']) - - # Checks - onnx_model = onnx.load(f) # load onnx model - onnx.checker.check_model(onnx_model) # check onnx model - print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model - print('ONNX export success, saved as %s' % f) - except Exception as e: - print('ONNX export failure: %s' % e) - - # CoreML export - try: - import coremltools as ct - - print('\nStarting CoreML export with coremltools %s...' % ct.__version__) - # convert model from torchscript and apply pixel scaling as per detect.py - model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) - f = opt.weights.replace('.pt', '.mlmodel') # filename - model.save(f) - print('CoreML export success, saved as %s' % f) - except Exception as e: - print('CoreML export failure: %s' % e) - - # Finish - print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.') +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + +import torch + +from utils.google_utils import attempt_download + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--weights', type=str, default='./yolov4.pt', help='weights path') + parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') + parser.add_argument('--batch-size', type=int, default=1, help='batch size') + opt = parser.parse_args() + opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand + print(opt) + + # Input + img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection + + # Load PyTorch model + attempt_download(opt.weights) + model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float() + model.eval() + model.model[-1].export = True # set Detect() layer export=True + y = model(img) # dry run + + # TorchScript export + try: + print('\nStarting TorchScript export with torch %s...' % torch.__version__) + f = opt.weights.replace('.pt', '.torchscript.pt') # filename + ts = torch.jit.trace(model, img) + ts.save(f) + print('TorchScript export success, saved as %s' % f) + except Exception as e: + print('TorchScript export failure: %s' % e) + + # ONNX export + try: + import onnx + + print('\nStarting ONNX export with onnx %s...' % onnx.__version__) + f = opt.weights.replace('.pt', '.onnx') # filename + model.fuse() # only for ONNX + torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], + output_names=['classes', 'boxes'] if y is None else ['output']) + + # Checks + onnx_model = onnx.load(f) # load onnx model + onnx.checker.check_model(onnx_model) # check onnx model + print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model + print('ONNX export success, saved as %s' % f) + except Exception as e: + print('ONNX export failure: %s' % e) + + # CoreML export + try: + import coremltools as ct + + print('\nStarting CoreML export with coremltools %s...' % ct.__version__) + # convert model from torchscript and apply pixel scaling as per detect.py + model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) + f = opt.weights.replace('.pt', '.mlmodel') # filename + model.save(f) + print('CoreML export success, saved as %s' % f) + except Exception as e: + print('CoreML export failure: %s' % e) + + # Finish + print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.') diff --git a/PyTorch/contrib/cv/detection/YOLOR/models/models.py b/PyTorch/contrib/cv/detection/YOLOR/models/models.py index 7a532e283fed103eafe66fe9d727f2ebc1d87b3f..47b0dce49576c002192aa19275ef3d2461e06bbb 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/models/models.py +++ b/PyTorch/contrib/cv/detection/YOLOR/models/models.py @@ -1,775 +1,775 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from utils.google_utils import * -from utils.layers import * -from utils.parse_config import * -from utils import torch_utils - -ONNX_EXPORT = False - - -def create_modules(module_defs, img_size, cfg): - # Constructs module list of layer blocks from module configuration in module_defs - - img_size = [img_size] * 2 if isinstance(img_size, int) else img_size # expand if necessary - _ = module_defs.pop(0) # cfg training hyperparams (unused) - output_filters = [3] # input channels - module_list = nn.ModuleList() - routs = [] # list of layers which rout to deeper layers - yolo_index = -1 - - for i, mdef in enumerate(module_defs): - modules = nn.Sequential() - - if mdef['type'] == 'convolutional': - bn = mdef['batch_normalize'] - filters = mdef['filters'] - k = mdef['size'] # kernel size - stride = mdef['stride'] if 'stride' in mdef else (mdef['stride_y'], mdef['stride_x']) - if isinstance(k, int): # single-size conv - modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1], - out_channels=filters, - kernel_size=k, - stride=stride, - padding=k // 2 if mdef['pad'] else 0, - groups=mdef['groups'] if 'groups' in mdef else 1, - bias=not bn)) - else: # multiple-size conv - modules.add_module('MixConv2d', MixConv2d(in_ch=output_filters[-1], - out_ch=filters, - k=k, - stride=stride, - bias=not bn)) - - if bn: - modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4)) - else: - routs.append(i) # detection output (goes into yolo layer) - - if mdef['activation'] == 'leaky': # activation study https://github.com/ultralytics/yolov3/issues/441 - modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True)) - elif mdef['activation'] == 'swish': - modules.add_module('activation', Swish()) - elif mdef['activation'] == 'mish': - modules.add_module('activation', Mish()) - elif mdef['activation'] == 'emb': - modules.add_module('activation', F.normalize()) - elif mdef['activation'] == 'logistic': - modules.add_module('activation', nn.Sigmoid()) - elif mdef['activation'] == 'silu': - modules.add_module('activation', nn.SiLU()) - - elif mdef['type'] == 'deformableconvolutional': - bn = mdef['batch_normalize'] - filters = mdef['filters'] - k = mdef['size'] # kernel size - stride = mdef['stride'] if 'stride' in mdef else (mdef['stride_y'], mdef['stride_x']) - if isinstance(k, int): # single-size conv - modules.add_module('DeformConv2d', DeformConv2d(output_filters[-1], - filters, - kernel_size=k, - padding=k // 2 if mdef['pad'] else 0, - stride=stride, - bias=not bn, - modulation=True)) - else: # multiple-size conv - modules.add_module('MixConv2d', MixConv2d(in_ch=output_filters[-1], - out_ch=filters, - k=k, - stride=stride, - bias=not bn)) - - if bn: - modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4)) - else: - routs.append(i) # detection output (goes into yolo layer) - - if mdef['activation'] == 'leaky': # activation study https://github.com/ultralytics/yolov3/issues/441 - modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True)) - elif mdef['activation'] == 'swish': - modules.add_module('activation', Swish()) - elif mdef['activation'] == 'mish': - modules.add_module('activation', Mish()) - elif mdef['activation'] == 'silu': - modules.add_module('activation', nn.SiLU()) - - elif mdef['type'] == 'dropout': - p = mdef['probability'] - modules = nn.Dropout(p) - - elif mdef['type'] == 'avgpool': - modules = GAP() - - elif mdef['type'] == 'silence': - filters = output_filters[-1] - modules = Silence() - - elif mdef['type'] == 'scale_channels': # nn.Sequential() placeholder for 'shortcut' layer - layers = mdef['from'] - filters = output_filters[-1] - routs.extend([i + l if l < 0 else l for l in layers]) - modules = ScaleChannel(layers=layers) - - elif mdef['type'] == 'shift_channels': # nn.Sequential() placeholder for 'shortcut' layer - layers = mdef['from'] - filters = output_filters[-1] - routs.extend([i + l if l < 0 else l for l in layers]) - modules = ShiftChannel(layers=layers) - - elif mdef['type'] == 'shift_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer - layers = mdef['from'] - filters = output_filters[-1] - routs.extend([i + l if l < 0 else l for l in layers]) - modules = ShiftChannel2D(layers=layers) - - elif mdef['type'] == 'control_channels': # nn.Sequential() placeholder for 'shortcut' layer - layers = mdef['from'] - filters = output_filters[-1] - routs.extend([i + l if l < 0 else l for l in layers]) - modules = ControlChannel(layers=layers) - - elif mdef['type'] == 'control_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer - layers = mdef['from'] - filters = output_filters[-1] - routs.extend([i + l if l < 0 else l for l in layers]) - modules = ControlChannel2D(layers=layers) - - elif mdef['type'] == 'alternate_channels': # nn.Sequential() placeholder for 'shortcut' layer - layers = mdef['from'] - filters = output_filters[-1] * 2 - routs.extend([i + l if l < 0 else l for l in layers]) - modules = AlternateChannel(layers=layers) - - elif mdef['type'] == 'alternate_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer - layers = mdef['from'] - filters = output_filters[-1] * 2 - routs.extend([i + l if l < 0 else l for l in layers]) - modules = AlternateChannel2D(layers=layers) - - elif mdef['type'] == 'select_channels': # nn.Sequential() placeholder for 'shortcut' layer - layers = mdef['from'] - filters = output_filters[-1] - routs.extend([i + l if l < 0 else l for l in layers]) - modules = SelectChannel(layers=layers) - - elif mdef['type'] == 'select_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer - layers = mdef['from'] - filters = output_filters[-1] - routs.extend([i + l if l < 0 else l for l in layers]) - modules = SelectChannel2D(layers=layers) - - elif mdef['type'] == 'sam': # nn.Sequential() placeholder for 'shortcut' layer - layers = mdef['from'] - filters = output_filters[-1] - routs.extend([i + l if l < 0 else l for l in layers]) - modules = ScaleSpatial(layers=layers) - - elif mdef['type'] == 'BatchNorm2d': - filters = output_filters[-1] - modules = nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4) - if i == 0 and filters == 3: # normalize RGB image - # imagenet mean and var https://pytorch.org/docs/stable/torchvision/models.html#classification - modules.running_mean = torch.tensor([0.485, 0.456, 0.406]) - modules.running_var = torch.tensor([0.0524, 0.0502, 0.0506]) - - elif mdef['type'] == 'maxpool': - k = mdef['size'] # kernel size - stride = mdef['stride'] - maxpool = nn.MaxPool2d(kernel_size=k, stride=stride, padding=(k - 1) // 2) - if k == 2 and stride == 1: # yolov3-tiny - modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1))) - modules.add_module('MaxPool2d', maxpool) - else: - modules = maxpool - - elif mdef['type'] == 'local_avgpool': - k = mdef['size'] # kernel size - stride = mdef['stride'] - avgpool = nn.AvgPool2d(kernel_size=k, stride=stride, padding=(k - 1) // 2) - if k == 2 and stride == 1: # yolov3-tiny - modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1))) - modules.add_module('AvgPool2d', avgpool) - else: - modules = avgpool - - elif mdef['type'] == 'upsample': - if ONNX_EXPORT: # explicitly state size, avoid scale_factor - g = (yolo_index + 1) * 2 / 32 # gain - modules = nn.Upsample(size=tuple(int(x * g) for x in img_size)) # img_size = (320, 192) - else: - modules = nn.Upsample(scale_factor=mdef['stride']) - - elif mdef['type'] == 'route': # nn.Sequential() placeholder for 'route' layer - layers = mdef['layers'] - filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers]) - routs.extend([i + l if l < 0 else l for l in layers]) - modules = FeatureConcat(layers=layers) - - elif mdef['type'] == 'route2': # nn.Sequential() placeholder for 'route' layer - layers = mdef['layers'] - filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers]) - routs.extend([i + l if l < 0 else l for l in layers]) - modules = FeatureConcat2(layers=layers) - - elif mdef['type'] == 'route3': # nn.Sequential() placeholder for 'route' layer - layers = mdef['layers'] - filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers]) - routs.extend([i + l if l < 0 else l for l in layers]) - modules = FeatureConcat3(layers=layers) - - elif mdef['type'] == 'route_lhalf': # nn.Sequential() placeholder for 'route' layer - layers = mdef['layers'] - filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers])//2 - routs.extend([i + l if l < 0 else l for l in layers]) - modules = FeatureConcat_l(layers=layers) - - elif mdef['type'] == 'shortcut': # nn.Sequential() placeholder for 'shortcut' layer - layers = mdef['from'] - filters = output_filters[-1] - routs.extend([i + l if l < 0 else l for l in layers]) - modules = WeightedFeatureFusion(layers=layers, weight='weights_type' in mdef) - - elif mdef['type'] == 'reorg3d': # yolov3-spp-pan-scale - pass - - elif mdef['type'] == 'reorg': # yolov3-spp-pan-scale - filters = 4 * output_filters[-1] - modules.add_module('Reorg', Reorg()) - - elif mdef['type'] == 'dwt': # yolov3-spp-pan-scale - filters = 4 * output_filters[-1] - modules.add_module('DWT', DWT()) - - elif mdef['type'] == 'implicit_add': # yolov3-spp-pan-scale - filters = mdef['filters'] - modules = ImplicitA(channel=filters) - - elif mdef['type'] == 'implicit_mul': # yolov3-spp-pan-scale - filters = mdef['filters'] - modules = ImplicitM(channel=filters) - - elif mdef['type'] == 'implicit_cat': # yolov3-spp-pan-scale - filters = mdef['filters'] - modules = ImplicitC(channel=filters) - - elif mdef['type'] == 'implicit_add_2d': # yolov3-spp-pan-scale - channels = mdef['filters'] - filters = mdef['atoms'] - modules = Implicit2DA(atom=filters, channel=channels) - - elif mdef['type'] == 'implicit_mul_2d': # yolov3-spp-pan-scale - channels = mdef['filters'] - filters = mdef['atoms'] - modules = Implicit2DM(atom=filters, channel=channels) - - elif mdef['type'] == 'implicit_cat_2d': # yolov3-spp-pan-scale - channels = mdef['filters'] - filters = mdef['atoms'] - modules = Implicit2DC(atom=filters, channel=channels) - - elif mdef['type'] == 'yolo': - yolo_index += 1 - stride = [8, 16, 32, 64, 128] # P3, P4, P5, P6, P7 strides - if any(x in cfg for x in ['yolov4-tiny', 'fpn', 'yolov3']): # P5, P4, P3 strides - stride = [32, 16, 8] - layers = mdef['from'] if 'from' in mdef else [] - modules = YOLOLayer(anchors=mdef['anchors'][mdef['mask']], # anchor list - nc=mdef['classes'], # number of classes - img_size=img_size, # (416, 416) - yolo_index=yolo_index, # 0, 1, 2... - layers=layers, # output layers - stride=stride[yolo_index]) - - # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3) - try: - j = layers[yolo_index] if 'from' in mdef else -2 - bias_ = module_list[j][0].bias # shape(255,) - bias = bias_[:modules.no * modules.na].view(modules.na, -1) # shape(3,85) - #bias[:, 4] += -4.5 # obj - bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2) # obj (8 objects per 640 image) - bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99)) # cls (sigmoid(p) = 1/nc) - module_list[j][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad) - - #j = [-2, -5, -8] - #for sj in j: - # bias_ = module_list[sj][0].bias - # bias = bias_[:modules.no * 1].view(1, -1) - # bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2) - # bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99)) - # module_list[sj][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad) - except: - print('WARNING: smart bias initialization failure.') - - elif mdef['type'] == 'jde': - yolo_index += 1 - stride = [8, 16, 32, 64, 128] # P3, P4, P5, P6, P7 strides - if any(x in cfg for x in ['yolov4-tiny', 'fpn', 'yolov3']): # P5, P4, P3 strides - stride = [32, 16, 8] - layers = mdef['from'] if 'from' in mdef else [] - modules = JDELayer(anchors=mdef['anchors'][mdef['mask']], # anchor list - nc=mdef['classes'], # number of classes - img_size=img_size, # (416, 416) - yolo_index=yolo_index, # 0, 1, 2... - layers=layers, # output layers - stride=stride[yolo_index]) - - # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3) - try: - j = layers[yolo_index] if 'from' in mdef else -1 - bias_ = module_list[j][0].bias # shape(255,) - bias = bias_[:modules.no * modules.na].view(modules.na, -1) # shape(3,85) - #bias[:, 4] += -4.5 # obj - bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2) # obj (8 objects per 640 image) - bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99)) # cls (sigmoid(p) = 1/nc) - module_list[j][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad) - except: - print('WARNING: smart bias initialization failure.') - - else: - print('Warning: Unrecognized Layer Type: ' + mdef['type']) - - # Register module list and number of output filters - module_list.append(modules) - output_filters.append(filters) - - routs_binary = [False] * (i + 1) - for i in routs: - routs_binary[i] = True - return module_list, routs_binary - - -class YOLOLayer(nn.Module): - def __init__(self, anchors, nc, img_size, yolo_index, layers, stride): - super(YOLOLayer, self).__init__() - self.anchors = torch.Tensor(anchors) - self.index = yolo_index # index of this layer in layers - self.layers = layers # model output layer indices - self.stride = stride # layer stride - self.nl = len(layers) # number of output layers (3) - self.na = len(anchors) # number of anchors (3) - self.nc = nc # number of classes (80) - self.no = nc + 5 # number of outputs (85) - self.nx, self.ny, self.ng = 0, 0, 0 # initialize number of x, y gridpoints - self.anchor_vec = self.anchors / self.stride - self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2) - - if ONNX_EXPORT: - self.training = False - self.create_grids((img_size[1] // stride, img_size[0] // stride)) # number x, y grid points - - def create_grids(self, ng=(13, 13), device='cpu'): - self.nx, self.ny = ng # x and y grid size - self.ng = torch.tensor(ng, dtype=torch.float) - - # build xy offsets - if not self.training: - yv, xv = torch.meshgrid([torch.arange(self.ny, device=device), torch.arange(self.nx, device=device)]) - self.grid = torch.stack((xv, yv), 2).view((1, 1, self.ny, self.nx, 2)).float() - - if self.anchor_vec.device != device: - self.anchor_vec = self.anchor_vec.to(device) - self.anchor_wh = self.anchor_wh.to(device) - - def forward(self, p, out): - ASFF = False # https://arxiv.org/abs/1911.09516 - if ASFF: - i, n = self.index, self.nl # index in layers, number of layers - p = out[self.layers[i]] - bs, _, ny, nx = p.shape # bs, 255, 13, 13 - if (self.nx, self.ny) != (nx, ny): - self.create_grids((nx, ny), p.device) - - # outputs and weights - # w = F.softmax(p[:, -n:], 1) # normalized weights - w = torch.sigmoid(p[:, -n:]) * (2 / n) # sigmoid weights (faster) - # w = w / w.sum(1).unsqueeze(1) # normalize across layer dimension - - # weighted ASFF sum - p = out[self.layers[i]][:, :-n] * w[:, i:i + 1] - for j in range(n): - if j != i: - p += w[:, j:j + 1] * \ - F.interpolate(out[self.layers[j]][:, :-n], size=[ny, nx], mode='bilinear', align_corners=False) - - elif ONNX_EXPORT: - bs = 1 # batch size - else: - bs, _, ny, nx = p.shape # bs, 255, 13, 13 - if (self.nx, self.ny) != (nx, ny): - self.create_grids((nx, ny), p.device) - - # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh) - p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction - - if self.training: - return p - - elif ONNX_EXPORT: - # Avoid broadcasting for ANE operations - m = self.na * self.nx * self.ny - ng = 1. / self.ng.repeat(m, 1) - grid = self.grid.repeat(1, self.na, 1, 1, 1).view(m, 2) - anchor_wh = self.anchor_wh.repeat(1, 1, self.nx, self.ny, 1).view(m, 2) * ng - - p = p.view(m, self.no) - xy = torch.sigmoid(p[:, 0:2]) + grid # x, y - wh = torch.exp(p[:, 2:4]) * anchor_wh # width, height - p_cls = torch.sigmoid(p[:, 4:5]) if self.nc == 1 else \ - torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5]) # conf - return p_cls, xy * ng, wh - - else: # inference - io = p.sigmoid() - io[..., :2] = (io[..., :2] * 2. - 0.5 + self.grid) - io[..., 2:4] = (io[..., 2:4] * 2) ** 2 * self.anchor_wh - io[..., :4] *= self.stride - #io = p.clone() # inference output - #io[..., :2] = torch.sigmoid(io[..., :2]) + self.grid # xy - #io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method - #io[..., :4] *= self.stride - #torch.sigmoid_(io[..., 4:]) - return io.view(bs, -1, self.no), p # view [1, 3, 13, 13, 85] as [1, 507, 85] - - -class JDELayer(nn.Module): - def __init__(self, anchors, nc, img_size, yolo_index, layers, stride): - super(JDELayer, self).__init__() - self.anchors = torch.Tensor(anchors) - self.index = yolo_index # index of this layer in layers - self.layers = layers # model output layer indices - self.stride = stride # layer stride - self.nl = len(layers) # number of output layers (3) - self.na = len(anchors) # number of anchors (3) - self.nc = nc # number of classes (80) - self.no = nc + 5 # number of outputs (85) - self.nx, self.ny, self.ng = 0, 0, 0 # initialize number of x, y gridpoints - self.anchor_vec = self.anchors / self.stride - self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2) - - if ONNX_EXPORT: - self.training = False - self.create_grids((img_size[1] // stride, img_size[0] // stride)) # number x, y grid points - - def create_grids(self, ng=(13, 13), device='cpu'): - self.nx, self.ny = ng # x and y grid size - self.ng = torch.tensor(ng, dtype=torch.float) - - # build xy offsets - if not self.training: - yv, xv = torch.meshgrid([torch.arange(self.ny, device=device), torch.arange(self.nx, device=device)]) - self.grid = torch.stack((xv, yv), 2).view((1, 1, self.ny, self.nx, 2)).float() - - if self.anchor_vec.device != device: - self.anchor_vec = self.anchor_vec.to(device) - self.anchor_wh = self.anchor_wh.to(device) - - def forward(self, p, out): - ASFF = False # https://arxiv.org/abs/1911.09516 - if ASFF: - i, n = self.index, self.nl # index in layers, number of layers - p = out[self.layers[i]] - bs, _, ny, nx = p.shape # bs, 255, 13, 13 - if (self.nx, self.ny) != (nx, ny): - self.create_grids((nx, ny), p.device) - - # outputs and weights - # w = F.softmax(p[:, -n:], 1) # normalized weights - w = torch.sigmoid(p[:, -n:]) * (2 / n) # sigmoid weights (faster) - # w = w / w.sum(1).unsqueeze(1) # normalize across layer dimension - - # weighted ASFF sum - p = out[self.layers[i]][:, :-n] * w[:, i:i + 1] - for j in range(n): - if j != i: - p += w[:, j:j + 1] * \ - F.interpolate(out[self.layers[j]][:, :-n], size=[ny, nx], mode='bilinear', align_corners=False) - - elif ONNX_EXPORT: - bs = 1 # batch size - else: - bs, _, ny, nx = p.shape # bs, 255, 13, 13 - if (self.nx, self.ny) != (nx, ny): - self.create_grids((nx, ny), p.device) - - # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh) - p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction - - if self.training: - return p - - elif ONNX_EXPORT: - # Avoid broadcasting for ANE operations - m = self.na * self.nx * self.ny - ng = 1. / self.ng.repeat(m, 1) - grid = self.grid.repeat(1, self.na, 1, 1, 1).view(m, 2) - anchor_wh = self.anchor_wh.repeat(1, 1, self.nx, self.ny, 1).view(m, 2) * ng - - p = p.view(m, self.no) - xy = torch.sigmoid(p[:, 0:2]) + grid # x, y - wh = torch.exp(p[:, 2:4]) * anchor_wh # width, height - p_cls = torch.sigmoid(p[:, 4:5]) if self.nc == 1 else \ - torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5]) # conf - return p_cls, xy * ng, wh - - else: # inference - #io = p.sigmoid() - #io[..., :2] = (io[..., :2] * 2. - 0.5 + self.grid) - #io[..., 2:4] = (io[..., 2:4] * 2) ** 2 * self.anchor_wh - #io[..., :4] *= self.stride - io = p.clone() # inference output - io[..., :2] = torch.sigmoid(io[..., :2]) * 2. - 0.5 + self.grid # xy - io[..., 2:4] = (torch.sigmoid(io[..., 2:4]) * 2) ** 2 * self.anchor_wh # wh yolo method - io[..., :4] *= self.stride - io[..., 4:] = F.softmax(io[..., 4:]) - return io.view(bs, -1, self.no), p # view [1, 3, 13, 13, 85] as [1, 507, 85] - -class Darknet(nn.Module): - # YOLOv3 object detection model - - def __init__(self, cfg, img_size=(416, 416), verbose=False): - super(Darknet, self).__init__() - - self.module_defs = parse_model_cfg(cfg) - self.module_list, self.routs = create_modules(self.module_defs, img_size, cfg) - self.yolo_layers = get_yolo_layers(self) - # torch_utils.initialize_weights(self) - - # Darknet Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 - self.version = np.array([0, 2, 5], dtype=np.int32) # (int32) version info: major, minor, revision - self.seen = np.array([0], dtype=np.int64) # (int64) number of images seen during training - self.info(verbose) if not ONNX_EXPORT else None # print model description - - def forward(self, x, augment=False, verbose=False): - - if not augment: - return self.forward_once(x) - else: # Augment images (inference and test only) https://github.com/ultralytics/yolov3/issues/931 - img_size = x.shape[-2:] # height, width - s = [0.83, 0.67] # scales - y = [] - for i, xi in enumerate((x, - torch_utils.scale_img(x.flip(3), s[0], same_shape=False), # flip-lr and scale - torch_utils.scale_img(x, s[1], same_shape=False), # scale - )): - # cv2.imwrite('img%g.jpg' % i, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1]) - y.append(self.forward_once(xi)[0]) - - y[1][..., :4] /= s[0] # scale - y[1][..., 0] = img_size[1] - y[1][..., 0] # flip lr - y[2][..., :4] /= s[1] # scale - - # for i, yi in enumerate(y): # coco small, medium, large = < 32**2 < 96**2 < - # area = yi[..., 2:4].prod(2)[:, :, None] - # if i == 1: - # yi *= (area < 96. ** 2).float() - # elif i == 2: - # yi *= (area > 32. ** 2).float() - # y[i] = yi - - y = torch.cat(y, 1) - return y, None - - def forward_once(self, x, augment=False, verbose=False): - img_size = x.shape[-2:] # height, width - yolo_out, out = [], [] - if verbose: - print('0', x.shape) - str = '' - - # Augment images (inference and test only) - if augment: # https://github.com/ultralytics/yolov3/issues/931 - nb = x.shape[0] # batch size - s = [0.83, 0.67] # scales - x = torch.cat((x, - torch_utils.scale_img(x.flip(3), s[0]), # flip-lr and scale - torch_utils.scale_img(x, s[1]), # scale - ), 0) - - for i, module in enumerate(self.module_list): - name = module.__class__.__name__ - #print(name) - if name in ['WeightedFeatureFusion', 'FeatureConcat', 'FeatureConcat2', 'FeatureConcat3', 'FeatureConcat_l', 'ScaleChannel', 'ShiftChannel', 'ShiftChannel2D', 'ControlChannel', 'ControlChannel2D', 'AlternateChannel', 'AlternateChannel2D', 'SelectChannel', 'SelectChannel2D', 'ScaleSpatial']: # sum, concat - if verbose: - l = [i - 1] + module.layers # layers - sh = [list(x.shape)] + [list(out[i].shape) for i in module.layers] # shapes - str = ' >> ' + ' + '.join(['layer %g %s' % x for x in zip(l, sh)]) - x = module(x, out) # WeightedFeatureFusion(), FeatureConcat() - elif name in ['ImplicitA', 'ImplicitM', 'ImplicitC', 'Implicit2DA', 'Implicit2DM', 'Implicit2DC']: - x = module() - elif name == 'YOLOLayer': - yolo_out.append(module(x, out)) - elif name == 'JDELayer': - yolo_out.append(module(x, out)) - else: # run module directly, i.e. mtype = 'convolutional', 'upsample', 'maxpool', 'batchnorm2d' etc. - #print(module) - #print(x.shape) - x = module(x) - - out.append(x if self.routs[i] else []) - if verbose: - print('%g/%g %s -' % (i, len(self.module_list), name), list(x.shape), str) - str = '' - - if self.training: # train - return yolo_out - elif ONNX_EXPORT: # export - x = [torch.cat(x, 0) for x in zip(*yolo_out)] - return x[0], torch.cat(x[1:3], 1) # scores, boxes: 3780x80, 3780x4 - else: # inference or test - x, p = zip(*yolo_out) # inference output, training output - x = torch.cat(x, 1) # cat yolo outputs - if augment: # de-augment results - x = torch.split(x, nb, dim=0) - x[1][..., :4] /= s[0] # scale - x[1][..., 0] = img_size[1] - x[1][..., 0] # flip lr - x[2][..., :4] /= s[1] # scale - x = torch.cat(x, 1) - return x, p - - def fuse(self): - # Fuse Conv2d + BatchNorm2d layers throughout model - print('Fusing layers...') - fused_list = nn.ModuleList() - for a in list(self.children())[0]: - if isinstance(a, nn.Sequential): - for i, b in enumerate(a): - if isinstance(b, nn.modules.batchnorm.BatchNorm2d): - # fuse this bn layer with the previous conv2d layer - conv = a[i - 1] - fused = torch_utils.fuse_conv_and_bn(conv, b) - a = nn.Sequential(fused, *list(a.children())[i + 1:]) - break - fused_list.append(a) - self.module_list = fused_list - self.info() if not ONNX_EXPORT else None # yolov3-spp reduced from 225 to 152 layers - - def info(self, verbose=False): - torch_utils.model_info(self, verbose) - - -def get_yolo_layers(model): - return [i for i, m in enumerate(model.module_list) if m.__class__.__name__ in ['YOLOLayer', 'JDELayer']] # [89, 101, 113] - - -def load_darknet_weights(self, weights, cutoff=-1): - # Parses and loads the weights stored in 'weights' - - # Establish cutoffs (load layers between 0 and cutoff. if cutoff = -1 all are loaded) - file = Path(weights).name - if file == 'darknet53.conv.74': - cutoff = 75 - elif file == 'yolov3-tiny.conv.15': - cutoff = 15 - - # Read weights file - with open(weights, 'rb') as f: - # Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 - self.version = np.fromfile(f, dtype=np.int32, count=3) # (int32) version info: major, minor, revision - self.seen = np.fromfile(f, dtype=np.int64, count=1) # (int64) number of images seen during training - - weights = np.fromfile(f, dtype=np.float32) # the rest are weights - - ptr = 0 - for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): - if mdef['type'] == 'convolutional': - conv = module[0] - if mdef['batch_normalize']: - # Load BN bias, weights, running mean and running variance - bn = module[1] - nb = bn.bias.numel() # number of biases - # Bias - bn.bias.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.bias)) - ptr += nb - # Weight - bn.weight.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.weight)) - ptr += nb - # Running Mean - bn.running_mean.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.running_mean)) - ptr += nb - # Running Var - bn.running_var.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.running_var)) - ptr += nb - else: - # Load conv. bias - nb = conv.bias.numel() - conv_b = torch.from_numpy(weights[ptr:ptr + nb]).view_as(conv.bias) - conv.bias.data.copy_(conv_b) - ptr += nb - # Load conv. weights - nw = conv.weight.numel() # number of weights - conv.weight.data.copy_(torch.from_numpy(weights[ptr:ptr + nw]).view_as(conv.weight)) - ptr += nw - - -def save_weights(self, path='model.weights', cutoff=-1): - # Converts a PyTorch model to Darket format (*.pt to *.weights) - # Note: Does not work if model.fuse() is applied - with open(path, 'wb') as f: - # Write Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 - self.version.tofile(f) # (int32) version info: major, minor, revision - self.seen.tofile(f) # (int64) number of images seen during training - - # Iterate through layers - for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): - if mdef['type'] == 'convolutional': - conv_layer = module[0] - # If batch norm, load bn first - if mdef['batch_normalize']: - bn_layer = module[1] - bn_layer.bias.data.cpu().numpy().tofile(f) - bn_layer.weight.data.cpu().numpy().tofile(f) - bn_layer.running_mean.data.cpu().numpy().tofile(f) - bn_layer.running_var.data.cpu().numpy().tofile(f) - # Load conv bias - else: - conv_layer.bias.data.cpu().numpy().tofile(f) - # Load conv weights - conv_layer.weight.data.cpu().numpy().tofile(f) - - -def convert(cfg='cfg/yolov3-spp.cfg', weights='weights/yolov3-spp.weights', saveto='converted.weights'): - # Converts between PyTorch and Darknet format per extension (i.e. *.weights convert to *.pt and vice versa) - # from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.weights') - - # Initialize model - model = Darknet(cfg) - ckpt = torch.load(weights) # load checkpoint - try: - ckpt['model'] = {k: v for k, v in ckpt['model'].items() if model.state_dict()[k].numel() == v.numel()} - model.load_state_dict(ckpt['model'], strict=False) - save_weights(model, path=saveto, cutoff=-1) - except KeyError as e: - print(e) - -def attempt_download(weights): - # Attempt to download pretrained weights if not found locally - weights = weights.strip() - msg = weights + ' missing, try downloading from https://drive.google.com/open?id=1LezFG5g3BCW6iYaV89B2i64cqEUZD7e0' - - if len(weights) > 0 and not os.path.isfile(weights): - d = {''} - - file = Path(weights).name - if file in d: - r = gdrive_download(id=d[file], name=weights) - else: # download from pjreddie.com - url = 'https://pjreddie.com/media/files/' + file - print('Downloading ' + url) - r = os.system('curl -f ' + url + ' -o ' + weights) - - # Error check - if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6): # weights exist and > 1MB - os.system('rm ' + weights) # remove partial downloads - raise Exception(msg) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from utils.google_utils import * +from utils.layers import * +from utils.parse_config import * +from utils import torch_utils + +ONNX_EXPORT = False + + +def create_modules(module_defs, img_size, cfg): + # Constructs module list of layer blocks from module configuration in module_defs + + img_size = [img_size] * 2 if isinstance(img_size, int) else img_size # expand if necessary + _ = module_defs.pop(0) # cfg training hyperparams (unused) + output_filters = [3] # input channels + module_list = nn.ModuleList() + routs = [] # list of layers which rout to deeper layers + yolo_index = -1 + + for i, mdef in enumerate(module_defs): + modules = nn.Sequential() + + if mdef['type'] == 'convolutional': + bn = mdef['batch_normalize'] + filters = mdef['filters'] + k = mdef['size'] # kernel size + stride = mdef['stride'] if 'stride' in mdef else (mdef['stride_y'], mdef['stride_x']) + if isinstance(k, int): # single-size conv + modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1], + out_channels=filters, + kernel_size=k, + stride=stride, + padding=k // 2 if mdef['pad'] else 0, + groups=mdef['groups'] if 'groups' in mdef else 1, + bias=not bn)) + else: # multiple-size conv + modules.add_module('MixConv2d', MixConv2d(in_ch=output_filters[-1], + out_ch=filters, + k=k, + stride=stride, + bias=not bn)) + + if bn: + modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4)) + else: + routs.append(i) # detection output (goes into yolo layer) + + if mdef['activation'] == 'leaky': # activation study https://github.com/ultralytics/yolov3/issues/441 + modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True)) + elif mdef['activation'] == 'swish': + modules.add_module('activation', Swish()) + elif mdef['activation'] == 'mish': + modules.add_module('activation', Mish()) + elif mdef['activation'] == 'emb': + modules.add_module('activation', F.normalize()) + elif mdef['activation'] == 'logistic': + modules.add_module('activation', nn.Sigmoid()) + elif mdef['activation'] == 'silu': + modules.add_module('activation', nn.SiLU()) + + elif mdef['type'] == 'deformableconvolutional': + bn = mdef['batch_normalize'] + filters = mdef['filters'] + k = mdef['size'] # kernel size + stride = mdef['stride'] if 'stride' in mdef else (mdef['stride_y'], mdef['stride_x']) + if isinstance(k, int): # single-size conv + modules.add_module('DeformConv2d', DeformConv2d(output_filters[-1], + filters, + kernel_size=k, + padding=k // 2 if mdef['pad'] else 0, + stride=stride, + bias=not bn, + modulation=True)) + else: # multiple-size conv + modules.add_module('MixConv2d', MixConv2d(in_ch=output_filters[-1], + out_ch=filters, + k=k, + stride=stride, + bias=not bn)) + + if bn: + modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4)) + else: + routs.append(i) # detection output (goes into yolo layer) + + if mdef['activation'] == 'leaky': # activation study https://github.com/ultralytics/yolov3/issues/441 + modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True)) + elif mdef['activation'] == 'swish': + modules.add_module('activation', Swish()) + elif mdef['activation'] == 'mish': + modules.add_module('activation', Mish()) + elif mdef['activation'] == 'silu': + modules.add_module('activation', nn.SiLU()) + + elif mdef['type'] == 'dropout': + p = mdef['probability'] + modules = nn.Dropout(p) + + elif mdef['type'] == 'avgpool': + modules = GAP() + + elif mdef['type'] == 'silence': + filters = output_filters[-1] + modules = Silence() + + elif mdef['type'] == 'scale_channels': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = ScaleChannel(layers=layers) + + elif mdef['type'] == 'shift_channels': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = ShiftChannel(layers=layers) + + elif mdef['type'] == 'shift_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = ShiftChannel2D(layers=layers) + + elif mdef['type'] == 'control_channels': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = ControlChannel(layers=layers) + + elif mdef['type'] == 'control_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = ControlChannel2D(layers=layers) + + elif mdef['type'] == 'alternate_channels': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] * 2 + routs.extend([i + l if l < 0 else l for l in layers]) + modules = AlternateChannel(layers=layers) + + elif mdef['type'] == 'alternate_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] * 2 + routs.extend([i + l if l < 0 else l for l in layers]) + modules = AlternateChannel2D(layers=layers) + + elif mdef['type'] == 'select_channels': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = SelectChannel(layers=layers) + + elif mdef['type'] == 'select_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = SelectChannel2D(layers=layers) + + elif mdef['type'] == 'sam': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = ScaleSpatial(layers=layers) + + elif mdef['type'] == 'BatchNorm2d': + filters = output_filters[-1] + modules = nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4) + if i == 0 and filters == 3: # normalize RGB image + # imagenet mean and var https://pytorch.org/docs/stable/torchvision/models.html#classification + modules.running_mean = torch.tensor([0.485, 0.456, 0.406]) + modules.running_var = torch.tensor([0.0524, 0.0502, 0.0506]) + + elif mdef['type'] == 'maxpool': + k = mdef['size'] # kernel size + stride = mdef['stride'] + maxpool = nn.MaxPool2d(kernel_size=k, stride=stride, padding=(k - 1) // 2) + if k == 2 and stride == 1: # yolov3-tiny + modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1))) + modules.add_module('MaxPool2d', maxpool) + else: + modules = maxpool + + elif mdef['type'] == 'local_avgpool': + k = mdef['size'] # kernel size + stride = mdef['stride'] + avgpool = nn.AvgPool2d(kernel_size=k, stride=stride, padding=(k - 1) // 2) + if k == 2 and stride == 1: # yolov3-tiny + modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1))) + modules.add_module('AvgPool2d', avgpool) + else: + modules = avgpool + + elif mdef['type'] == 'upsample': + if ONNX_EXPORT: # explicitly state size, avoid scale_factor + g = (yolo_index + 1) * 2 / 32 # gain + modules = nn.Upsample(size=tuple(int(x * g) for x in img_size)) # img_size = (320, 192) + else: + modules = nn.Upsample(scale_factor=mdef['stride']) + + elif mdef['type'] == 'route': # nn.Sequential() placeholder for 'route' layer + layers = mdef['layers'] + filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers]) + routs.extend([i + l if l < 0 else l for l in layers]) + modules = FeatureConcat(layers=layers) + + elif mdef['type'] == 'route2': # nn.Sequential() placeholder for 'route' layer + layers = mdef['layers'] + filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers]) + routs.extend([i + l if l < 0 else l for l in layers]) + modules = FeatureConcat2(layers=layers) + + elif mdef['type'] == 'route3': # nn.Sequential() placeholder for 'route' layer + layers = mdef['layers'] + filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers]) + routs.extend([i + l if l < 0 else l for l in layers]) + modules = FeatureConcat3(layers=layers) + + elif mdef['type'] == 'route_lhalf': # nn.Sequential() placeholder for 'route' layer + layers = mdef['layers'] + filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers])//2 + routs.extend([i + l if l < 0 else l for l in layers]) + modules = FeatureConcat_l(layers=layers) + + elif mdef['type'] == 'shortcut': # nn.Sequential() placeholder for 'shortcut' layer + layers = mdef['from'] + filters = output_filters[-1] + routs.extend([i + l if l < 0 else l for l in layers]) + modules = WeightedFeatureFusion(layers=layers, weight='weights_type' in mdef) + + elif mdef['type'] == 'reorg3d': # yolov3-spp-pan-scale + pass + + elif mdef['type'] == 'reorg': # yolov3-spp-pan-scale + filters = 4 * output_filters[-1] + modules.add_module('Reorg', Reorg()) + + elif mdef['type'] == 'dwt': # yolov3-spp-pan-scale + filters = 4 * output_filters[-1] + modules.add_module('DWT', DWT()) + + elif mdef['type'] == 'implicit_add': # yolov3-spp-pan-scale + filters = mdef['filters'] + modules = ImplicitA(channel=filters) + + elif mdef['type'] == 'implicit_mul': # yolov3-spp-pan-scale + filters = mdef['filters'] + modules = ImplicitM(channel=filters) + + elif mdef['type'] == 'implicit_cat': # yolov3-spp-pan-scale + filters = mdef['filters'] + modules = ImplicitC(channel=filters) + + elif mdef['type'] == 'implicit_add_2d': # yolov3-spp-pan-scale + channels = mdef['filters'] + filters = mdef['atoms'] + modules = Implicit2DA(atom=filters, channel=channels) + + elif mdef['type'] == 'implicit_mul_2d': # yolov3-spp-pan-scale + channels = mdef['filters'] + filters = mdef['atoms'] + modules = Implicit2DM(atom=filters, channel=channels) + + elif mdef['type'] == 'implicit_cat_2d': # yolov3-spp-pan-scale + channels = mdef['filters'] + filters = mdef['atoms'] + modules = Implicit2DC(atom=filters, channel=channels) + + elif mdef['type'] == 'yolo': + yolo_index += 1 + stride = [8, 16, 32, 64, 128] # P3, P4, P5, P6, P7 strides + if any(x in cfg for x in ['yolov4-tiny', 'fpn', 'yolov3']): # P5, P4, P3 strides + stride = [32, 16, 8] + layers = mdef['from'] if 'from' in mdef else [] + modules = YOLOLayer(anchors=mdef['anchors'][mdef['mask']], # anchor list + nc=mdef['classes'], # number of classes + img_size=img_size, # (416, 416) + yolo_index=yolo_index, # 0, 1, 2... + layers=layers, # output layers + stride=stride[yolo_index]) + + # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3) + try: + j = layers[yolo_index] if 'from' in mdef else -2 + bias_ = module_list[j][0].bias # shape(255,) + bias = bias_[:modules.no * modules.na].view(modules.na, -1) # shape(3,85) + #bias[:, 4] += -4.5 # obj + bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2) # obj (8 objects per 640 image) + bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99)) # cls (sigmoid(p) = 1/nc) + module_list[j][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad) + + #j = [-2, -5, -8] + #for sj in j: + # bias_ = module_list[sj][0].bias + # bias = bias_[:modules.no * 1].view(1, -1) + # bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2) + # bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99)) + # module_list[sj][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad) + except: + print('WARNING: smart bias initialization failure.') + + elif mdef['type'] == 'jde': + yolo_index += 1 + stride = [8, 16, 32, 64, 128] # P3, P4, P5, P6, P7 strides + if any(x in cfg for x in ['yolov4-tiny', 'fpn', 'yolov3']): # P5, P4, P3 strides + stride = [32, 16, 8] + layers = mdef['from'] if 'from' in mdef else [] + modules = JDELayer(anchors=mdef['anchors'][mdef['mask']], # anchor list + nc=mdef['classes'], # number of classes + img_size=img_size, # (416, 416) + yolo_index=yolo_index, # 0, 1, 2... + layers=layers, # output layers + stride=stride[yolo_index]) + + # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3) + try: + j = layers[yolo_index] if 'from' in mdef else -1 + bias_ = module_list[j][0].bias # shape(255,) + bias = bias_[:modules.no * modules.na].view(modules.na, -1) # shape(3,85) + #bias[:, 4] += -4.5 # obj + bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2) # obj (8 objects per 640 image) + bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99)) # cls (sigmoid(p) = 1/nc) + module_list[j][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad) + except: + print('WARNING: smart bias initialization failure.') + + else: + print('Warning: Unrecognized Layer Type: ' + mdef['type']) + + # Register module list and number of output filters + module_list.append(modules) + output_filters.append(filters) + + routs_binary = [False] * (i + 1) + for i in routs: + routs_binary[i] = True + return module_list, routs_binary + + +class YOLOLayer(nn.Module): + def __init__(self, anchors, nc, img_size, yolo_index, layers, stride): + super(YOLOLayer, self).__init__() + self.anchors = torch.Tensor(anchors) + self.index = yolo_index # index of this layer in layers + self.layers = layers # model output layer indices + self.stride = stride # layer stride + self.nl = len(layers) # number of output layers (3) + self.na = len(anchors) # number of anchors (3) + self.nc = nc # number of classes (80) + self.no = nc + 5 # number of outputs (85) + self.nx, self.ny, self.ng = 0, 0, 0 # initialize number of x, y gridpoints + self.anchor_vec = self.anchors / self.stride + self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2) + + if ONNX_EXPORT: + self.training = False + self.create_grids((img_size[1] // stride, img_size[0] // stride)) # number x, y grid points + + def create_grids(self, ng=(13, 13), device='cpu'): + self.nx, self.ny = ng # x and y grid size + self.ng = torch.tensor(ng, dtype=torch.float) + + # build xy offsets + if not self.training: + yv, xv = torch.meshgrid([torch.arange(self.ny, device=device), torch.arange(self.nx, device=device)]) + self.grid = torch.stack((xv, yv), 2).view((1, 1, self.ny, self.nx, 2)).float() + + if self.anchor_vec.device != device: + self.anchor_vec = self.anchor_vec.to(device) + self.anchor_wh = self.anchor_wh.to(device) + + def forward(self, p, out): + ASFF = False # https://arxiv.org/abs/1911.09516 + if ASFF: + i, n = self.index, self.nl # index in layers, number of layers + p = out[self.layers[i]] + bs, _, ny, nx = p.shape # bs, 255, 13, 13 + if (self.nx, self.ny) != (nx, ny): + self.create_grids((nx, ny), p.device) + + # outputs and weights + # w = F.softmax(p[:, -n:], 1) # normalized weights + w = torch.sigmoid(p[:, -n:]) * (2 / n) # sigmoid weights (faster) + # w = w / w.sum(1).unsqueeze(1) # normalize across layer dimension + + # weighted ASFF sum + p = out[self.layers[i]][:, :-n] * w[:, i:i + 1] + for j in range(n): + if j != i: + p += w[:, j:j + 1] * \ + F.interpolate(out[self.layers[j]][:, :-n], size=[ny, nx], mode='bilinear', align_corners=False) + + elif ONNX_EXPORT: + bs = 1 # batch size + else: + bs, _, ny, nx = p.shape # bs, 255, 13, 13 + if (self.nx, self.ny) != (nx, ny): + self.create_grids((nx, ny), p.device) + + # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh) + p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction + + if self.training: + return p + + elif ONNX_EXPORT: + # Avoid broadcasting for ANE operations + m = self.na * self.nx * self.ny + ng = 1. / self.ng.repeat(m, 1) + grid = self.grid.repeat(1, self.na, 1, 1, 1).view(m, 2) + anchor_wh = self.anchor_wh.repeat(1, 1, self.nx, self.ny, 1).view(m, 2) * ng + + p = p.view(m, self.no) + xy = torch.sigmoid(p[:, 0:2]) + grid # x, y + wh = torch.exp(p[:, 2:4]) * anchor_wh # width, height + p_cls = torch.sigmoid(p[:, 4:5]) if self.nc == 1 else \ + torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5]) # conf + return p_cls, xy * ng, wh + + else: # inference + io = p.sigmoid() + io[..., :2] = (io[..., :2] * 2. - 0.5 + self.grid) + io[..., 2:4] = (io[..., 2:4] * 2) ** 2 * self.anchor_wh + io[..., :4] *= self.stride + #io = p.clone() # inference output + #io[..., :2] = torch.sigmoid(io[..., :2]) + self.grid # xy + #io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method + #io[..., :4] *= self.stride + #torch.sigmoid_(io[..., 4:]) + return io.view(bs, -1, self.no), p # view [1, 3, 13, 13, 85] as [1, 507, 85] + + +class JDELayer(nn.Module): + def __init__(self, anchors, nc, img_size, yolo_index, layers, stride): + super(JDELayer, self).__init__() + self.anchors = torch.Tensor(anchors) + self.index = yolo_index # index of this layer in layers + self.layers = layers # model output layer indices + self.stride = stride # layer stride + self.nl = len(layers) # number of output layers (3) + self.na = len(anchors) # number of anchors (3) + self.nc = nc # number of classes (80) + self.no = nc + 5 # number of outputs (85) + self.nx, self.ny, self.ng = 0, 0, 0 # initialize number of x, y gridpoints + self.anchor_vec = self.anchors / self.stride + self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2) + + if ONNX_EXPORT: + self.training = False + self.create_grids((img_size[1] // stride, img_size[0] // stride)) # number x, y grid points + + def create_grids(self, ng=(13, 13), device='cpu'): + self.nx, self.ny = ng # x and y grid size + self.ng = torch.tensor(ng, dtype=torch.float) + + # build xy offsets + if not self.training: + yv, xv = torch.meshgrid([torch.arange(self.ny, device=device), torch.arange(self.nx, device=device)]) + self.grid = torch.stack((xv, yv), 2).view((1, 1, self.ny, self.nx, 2)).float() + + if self.anchor_vec.device != device: + self.anchor_vec = self.anchor_vec.to(device) + self.anchor_wh = self.anchor_wh.to(device) + + def forward(self, p, out): + ASFF = False # https://arxiv.org/abs/1911.09516 + if ASFF: + i, n = self.index, self.nl # index in layers, number of layers + p = out[self.layers[i]] + bs, _, ny, nx = p.shape # bs, 255, 13, 13 + if (self.nx, self.ny) != (nx, ny): + self.create_grids((nx, ny), p.device) + + # outputs and weights + # w = F.softmax(p[:, -n:], 1) # normalized weights + w = torch.sigmoid(p[:, -n:]) * (2 / n) # sigmoid weights (faster) + # w = w / w.sum(1).unsqueeze(1) # normalize across layer dimension + + # weighted ASFF sum + p = out[self.layers[i]][:, :-n] * w[:, i:i + 1] + for j in range(n): + if j != i: + p += w[:, j:j + 1] * \ + F.interpolate(out[self.layers[j]][:, :-n], size=[ny, nx], mode='bilinear', align_corners=False) + + elif ONNX_EXPORT: + bs = 1 # batch size + else: + bs, _, ny, nx = p.shape # bs, 255, 13, 13 + if (self.nx, self.ny) != (nx, ny): + self.create_grids((nx, ny), p.device) + + # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh) + p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction + + if self.training: + return p + + elif ONNX_EXPORT: + # Avoid broadcasting for ANE operations + m = self.na * self.nx * self.ny + ng = 1. / self.ng.repeat(m, 1) + grid = self.grid.repeat(1, self.na, 1, 1, 1).view(m, 2) + anchor_wh = self.anchor_wh.repeat(1, 1, self.nx, self.ny, 1).view(m, 2) * ng + + p = p.view(m, self.no) + xy = torch.sigmoid(p[:, 0:2]) + grid # x, y + wh = torch.exp(p[:, 2:4]) * anchor_wh # width, height + p_cls = torch.sigmoid(p[:, 4:5]) if self.nc == 1 else \ + torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5]) # conf + return p_cls, xy * ng, wh + + else: # inference + #io = p.sigmoid() + #io[..., :2] = (io[..., :2] * 2. - 0.5 + self.grid) + #io[..., 2:4] = (io[..., 2:4] * 2) ** 2 * self.anchor_wh + #io[..., :4] *= self.stride + io = p.clone() # inference output + io[..., :2] = torch.sigmoid(io[..., :2]) * 2. - 0.5 + self.grid # xy + io[..., 2:4] = (torch.sigmoid(io[..., 2:4]) * 2) ** 2 * self.anchor_wh # wh yolo method + io[..., :4] *= self.stride + io[..., 4:] = F.softmax(io[..., 4:]) + return io.view(bs, -1, self.no), p # view [1, 3, 13, 13, 85] as [1, 507, 85] + +class Darknet(nn.Module): + # YOLOv3 object detection model + + def __init__(self, cfg, img_size=(416, 416), verbose=False): + super(Darknet, self).__init__() + + self.module_defs = parse_model_cfg(cfg) + self.module_list, self.routs = create_modules(self.module_defs, img_size, cfg) + self.yolo_layers = get_yolo_layers(self) + # torch_utils.initialize_weights(self) + + # Darknet Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 + self.version = np.array([0, 2, 5], dtype=np.int32) # (int32) version info: major, minor, revision + self.seen = np.array([0], dtype=np.int64) # (int64) number of images seen during training + self.info(verbose) if not ONNX_EXPORT else None # print model description + + def forward(self, x, augment=False, verbose=False): + + if not augment: + return self.forward_once(x) + else: # Augment images (inference and test only) https://github.com/ultralytics/yolov3/issues/931 + img_size = x.shape[-2:] # height, width + s = [0.83, 0.67] # scales + y = [] + for i, xi in enumerate((x, + torch_utils.scale_img(x.flip(3), s[0], same_shape=False), # flip-lr and scale + torch_utils.scale_img(x, s[1], same_shape=False), # scale + )): + # cv2.imwrite('img%g.jpg' % i, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1]) + y.append(self.forward_once(xi)[0]) + + y[1][..., :4] /= s[0] # scale + y[1][..., 0] = img_size[1] - y[1][..., 0] # flip lr + y[2][..., :4] /= s[1] # scale + + # for i, yi in enumerate(y): # coco small, medium, large = < 32**2 < 96**2 < + # area = yi[..., 2:4].prod(2)[:, :, None] + # if i == 1: + # yi *= (area < 96. ** 2).float() + # elif i == 2: + # yi *= (area > 32. ** 2).float() + # y[i] = yi + + y = torch.cat(y, 1) + return y, None + + def forward_once(self, x, augment=False, verbose=False): + img_size = x.shape[-2:] # height, width + yolo_out, out = [], [] + if verbose: + print('0', x.shape) + str = '' + + # Augment images (inference and test only) + if augment: # https://github.com/ultralytics/yolov3/issues/931 + nb = x.shape[0] # batch size + s = [0.83, 0.67] # scales + x = torch.cat((x, + torch_utils.scale_img(x.flip(3), s[0]), # flip-lr and scale + torch_utils.scale_img(x, s[1]), # scale + ), 0) + + for i, module in enumerate(self.module_list): + name = module.__class__.__name__ + #print(name) + if name in ['WeightedFeatureFusion', 'FeatureConcat', 'FeatureConcat2', 'FeatureConcat3', 'FeatureConcat_l', 'ScaleChannel', 'ShiftChannel', 'ShiftChannel2D', 'ControlChannel', 'ControlChannel2D', 'AlternateChannel', 'AlternateChannel2D', 'SelectChannel', 'SelectChannel2D', 'ScaleSpatial']: # sum, concat + if verbose: + l = [i - 1] + module.layers # layers + sh = [list(x.shape)] + [list(out[i].shape) for i in module.layers] # shapes + str = ' >> ' + ' + '.join(['layer %g %s' % x for x in zip(l, sh)]) + x = module(x, out) # WeightedFeatureFusion(), FeatureConcat() + elif name in ['ImplicitA', 'ImplicitM', 'ImplicitC', 'Implicit2DA', 'Implicit2DM', 'Implicit2DC']: + x = module() + elif name == 'YOLOLayer': + yolo_out.append(module(x, out)) + elif name == 'JDELayer': + yolo_out.append(module(x, out)) + else: # run module directly, i.e. mtype = 'convolutional', 'upsample', 'maxpool', 'batchnorm2d' etc. + #print(module) + #print(x.shape) + x = module(x) + + out.append(x if self.routs[i] else []) + if verbose: + print('%g/%g %s -' % (i, len(self.module_list), name), list(x.shape), str) + str = '' + + if self.training: # train + return yolo_out + elif ONNX_EXPORT: # export + x = [torch.cat(x, 0) for x in zip(*yolo_out)] + return x[0], torch.cat(x[1:3], 1) # scores, boxes: 3780x80, 3780x4 + else: # inference or test + x, p = zip(*yolo_out) # inference output, training output + x = torch.cat(x, 1) # cat yolo outputs + if augment: # de-augment results + x = torch.split(x, nb, dim=0) + x[1][..., :4] /= s[0] # scale + x[1][..., 0] = img_size[1] - x[1][..., 0] # flip lr + x[2][..., :4] /= s[1] # scale + x = torch.cat(x, 1) + return x, p + + def fuse(self): + # Fuse Conv2d + BatchNorm2d layers throughout model + print('Fusing layers...') + fused_list = nn.ModuleList() + for a in list(self.children())[0]: + if isinstance(a, nn.Sequential): + for i, b in enumerate(a): + if isinstance(b, nn.modules.batchnorm.BatchNorm2d): + # fuse this bn layer with the previous conv2d layer + conv = a[i - 1] + fused = torch_utils.fuse_conv_and_bn(conv, b) + a = nn.Sequential(fused, *list(a.children())[i + 1:]) + break + fused_list.append(a) + self.module_list = fused_list + self.info() if not ONNX_EXPORT else None # yolov3-spp reduced from 225 to 152 layers + + def info(self, verbose=False): + torch_utils.model_info(self, verbose) + + +def get_yolo_layers(model): + return [i for i, m in enumerate(model.module_list) if m.__class__.__name__ in ['YOLOLayer', 'JDELayer']] # [89, 101, 113] + + +def load_darknet_weights(self, weights, cutoff=-1): + # Parses and loads the weights stored in 'weights' + + # Establish cutoffs (load layers between 0 and cutoff. if cutoff = -1 all are loaded) + file = Path(weights).name + if file == 'darknet53.conv.74': + cutoff = 75 + elif file == 'yolov3-tiny.conv.15': + cutoff = 15 + + # Read weights file + with open(weights, 'rb') as f: + # Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 + self.version = np.fromfile(f, dtype=np.int32, count=3) # (int32) version info: major, minor, revision + self.seen = np.fromfile(f, dtype=np.int64, count=1) # (int64) number of images seen during training + + weights = np.fromfile(f, dtype=np.float32) # the rest are weights + + ptr = 0 + for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): + if mdef['type'] == 'convolutional': + conv = module[0] + if mdef['batch_normalize']: + # Load BN bias, weights, running mean and running variance + bn = module[1] + nb = bn.bias.numel() # number of biases + # Bias + bn.bias.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.bias)) + ptr += nb + # Weight + bn.weight.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.weight)) + ptr += nb + # Running Mean + bn.running_mean.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.running_mean)) + ptr += nb + # Running Var + bn.running_var.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.running_var)) + ptr += nb + else: + # Load conv. bias + nb = conv.bias.numel() + conv_b = torch.from_numpy(weights[ptr:ptr + nb]).view_as(conv.bias) + conv.bias.data.copy_(conv_b) + ptr += nb + # Load conv. weights + nw = conv.weight.numel() # number of weights + conv.weight.data.copy_(torch.from_numpy(weights[ptr:ptr + nw]).view_as(conv.weight)) + ptr += nw + + +def save_weights(self, path='model.weights', cutoff=-1): + # Converts a PyTorch model to Darket format (*.pt to *.weights) + # Note: Does not work if model.fuse() is applied + with open(path, 'wb') as f: + # Write Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 + self.version.tofile(f) # (int32) version info: major, minor, revision + self.seen.tofile(f) # (int64) number of images seen during training + + # Iterate through layers + for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): + if mdef['type'] == 'convolutional': + conv_layer = module[0] + # If batch norm, load bn first + if mdef['batch_normalize']: + bn_layer = module[1] + bn_layer.bias.data.cpu().numpy().tofile(f) + bn_layer.weight.data.cpu().numpy().tofile(f) + bn_layer.running_mean.data.cpu().numpy().tofile(f) + bn_layer.running_var.data.cpu().numpy().tofile(f) + # Load conv bias + else: + conv_layer.bias.data.cpu().numpy().tofile(f) + # Load conv weights + conv_layer.weight.data.cpu().numpy().tofile(f) + + +def convert(cfg='cfg/yolov3-spp.cfg', weights='weights/yolov3-spp.weights', saveto='converted.weights'): + # Converts between PyTorch and Darknet format per extension (i.e. *.weights convert to *.pt and vice versa) + # from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.weights') + + # Initialize model + model = Darknet(cfg) + ckpt = torch.load(weights) # load checkpoint + try: + ckpt['model'] = {k: v for k, v in ckpt['model'].items() if model.state_dict()[k].numel() == v.numel()} + model.load_state_dict(ckpt['model'], strict=False) + save_weights(model, path=saveto, cutoff=-1) + except KeyError as e: + print(e) + +def attempt_download(weights): + # Attempt to download pretrained weights if not found locally + weights = weights.strip() + msg = weights + ' missing, try downloading from https://drive.google.com/open?id=1LezFG5g3BCW6iYaV89B2i64cqEUZD7e0' + + if len(weights) > 0 and not os.path.isfile(weights): + d = {''} + + file = Path(weights).name + if file in d: + r = gdrive_download(id=d[file], name=weights) + else: # download from pjreddie.com + url = 'https://pjreddie.com/media/files/' + file + print('Downloading ' + url) + r = os.system('curl -f ' + url + ' -o ' + weights) + + # Error check + if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6): # weights exist and > 1MB + os.system('rm ' + weights) # remove partial downloads + raise Exception(msg) diff --git a/PyTorch/contrib/cv/detection/YOLOR/requirements-GPU.txt b/PyTorch/contrib/cv/detection/YOLOR/requirements-GPU.txt index d62e7d20ea2daacbcfa8dbc4c075570ff3d8d57c..c98049d8777db49aabe488b79aab92b6d0ba6a4a 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/requirements-GPU.txt +++ b/PyTorch/contrib/cv/detection/YOLOR/requirements-GPU.txt @@ -1,43 +1,43 @@ -# pip install -U -r requirements-GPU.txt - -# base ---------------------------------------- -Cython -numpy -opencv-python -torch>=1.5.0 -matplotlib -pillow -tensorboard -PyYAML>=5.3 -torchvision>=0.6 -scipy -tqdm -pycocotools>=2.0 - -# extras -------------------------------------- -# thop # FLOPS computation - -# logging ------------------------------------- -# wandb - -# plotting ------------------------------------ -# seaborn>=0.11.0 -# pandas - -# export -------------------------------------- -# coremltools>=4.1 -# onnx>=1.8.1 -# scikit-learn==0.19.2 # for coreml quantization - -# Nvidia Apex for mixed precision training -------------------------- -# git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex - -# Conda commands (in place of pip) --------------------------------------------- -# conda update -yn base -c defaults conda -# conda install -yc anaconda numpy opencv matplotlib tqdm pillow ipython -# conda install -yc conda-forge scikit-image pycocotools tensorboard -# conda install -yc spyder-ide spyder-line-profiler -# conda install -yc pytorch pytorch torchvision -# conda install -yc conda-forge protobuf numpy && pip install onnx==1.6.0 # https://github.com/onnx/onnx#linux-and-macos - - +# pip install -U -r requirements-GPU.txt + +# base ---------------------------------------- +Cython +numpy +opencv-python +torch>=1.5.0 +matplotlib +pillow +tensorboard +PyYAML>=5.3 +torchvision>=0.6 +scipy +tqdm +pycocotools>=2.0 + +# extras -------------------------------------- +# thop # FLOPS computation + +# logging ------------------------------------- +# wandb + +# plotting ------------------------------------ +# seaborn>=0.11.0 +# pandas + +# export -------------------------------------- +# coremltools>=4.1 +# onnx>=1.8.1 +# scikit-learn==0.19.2 # for coreml quantization + +# Nvidia Apex for mixed precision training -------------------------- +# git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex + +# Conda commands (in place of pip) --------------------------------------------- +# conda update -yn base -c defaults conda +# conda install -yc anaconda numpy opencv matplotlib tqdm pillow ipython +# conda install -yc conda-forge scikit-image pycocotools tensorboard +# conda install -yc spyder-ide spyder-line-profiler +# conda install -yc pytorch pytorch torchvision +# conda install -yc conda-forge protobuf numpy && pip install onnx==1.6.0 # https://github.com/onnx/onnx#linux-and-macos + + diff --git a/PyTorch/contrib/cv/detection/YOLOR/requirements.txt b/PyTorch/contrib/cv/detection/YOLOR/requirements.txt index 3e444758361d798ad0024955f6f3e2744fa4c6c5..da9cc83a5ab95b9de1ff0da6c210f0e702ff84a2 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/requirements.txt +++ b/PyTorch/contrib/cv/detection/YOLOR/requirements.txt @@ -1,30 +1,30 @@ -# pip install -qr requirements.txt - -# base ---------------------------------------- -Cython -matplotlib -numpy -opencv-python -Pillow -PyYAML -scipy -tensorboard -tqdm -sympy>=1.8 -decorator>=5.0.9 -pycocotools>=2.0 # COCO mAP - -# extras -------------------------------------- -# thop # FLOPS computation - -# logging ------------------------------------- -# wandb - -# plotting ------------------------------------ -# seaborn>=0.11.0 -# pandas - -# export -------------------------------------- -# coremltools>=4.1 -# onnx>=1.8.1 -# scikit-learn==0.19.2 # for coreml quantization +# pip install -qr requirements.txt + +# base ---------------------------------------- +Cython +matplotlib +numpy +opencv-python +Pillow +PyYAML +scipy +tensorboard +tqdm +sympy>=1.8 +decorator>=5.0.9 +pycocotools>=2.0 # COCO mAP + +# extras -------------------------------------- +# thop # FLOPS computation + +# logging ------------------------------------- +# wandb + +# plotting ------------------------------------ +# seaborn>=0.11.0 +# pandas + +# export -------------------------------------- +# coremltools>=4.1 +# onnx>=1.8.1 +# scikit-learn==0.19.2 # for coreml quantization diff --git a/PyTorch/contrib/cv/detection/YOLOR/test.py b/PyTorch/contrib/cv/detection/YOLOR/test.py index 614f167330029a45e8b01901d3cc77931f974cc5..4fe2c17ef7ebf8534e09e264fe42dc6f5f6d4e32 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/test.py +++ b/PyTorch/contrib/cv/detection/YOLOR/test.py @@ -1,378 +1,378 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import glob -import json -import os -from pathlib import Path - -import numpy as np -import torch -import yaml -from tqdm import tqdm - -from utils.datasets import create_dataloader -from utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, box_iou, \ - non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, clip_coords, set_logging, increment_path -from utils.loss import compute_loss -from utils.metrics import ap_per_class -from utils.plots import plot_images, output_to_target -from utils.torch_utils import select_device, time_synchronized - -from models.models import * -from apex import amp - -def load_classes(path): - # Loads *.names file at 'path' - with open(path, 'r') as f: - names = f.read().split('\n') - return list(filter(None, names)) # filter removes empty strings (such as last line) - - -def set_seed_everything(seed): - random.seed(seed) - os.environ['PYTHONHASHSEED'] = str(seed) - np.random.seed(seed) - torch.manual_seed(seed) - torch.cuda.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False - - -def test(data, - weights=None, - batch_size=16, - imgsz=640, - conf_thres=0.001, - iou_thres=0.6, # for NMS - save_json=False, - single_cls=False, - augment=False, - verbose=False, - model=None, - dataloader=None, - save_dir=Path(''), # for saving images - save_txt=False, # for auto-labelling - save_conf=False, - plots=True, - log_imgs=0): # number of logged images - - # Initialize/load model and set device - # set_seed_everything(1234) - training = model is not None - if training: # called by train.py - device = next(model.parameters()).device # get model device - else: # called directly - set_logging() - device = select_device(opt.device, opt.npu, batch_size=batch_size) - save_txt = opt.save_txt # save *.txt labels - - # Directories - save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run - (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir - - # Load model - model = Darknet(opt.cfg).to(device) - - # load model - try: - ckpt = torch.load(weights[0], map_location=device) # load checkpoint - ckpt['model'] = {k: v for k, v in ckpt['model'].items() if model.state_dict()[k].numel() == v.numel()} - model.load_state_dict(ckpt['model'], strict=False) - except: - load_darknet_weights(model, weights[0]) - - imgsz = check_img_size(imgsz, s=64) # check img_size - - model = amp.initialize(model, opt_level='O1', verbosity=0, loss_scale=64) - - # Half - half = device.type != 'cpu' # half precision only supported on CUDA - if half: - model.half() - - # Configure - model.eval() - is_coco = data.endswith('coco.yaml') # is COCO dataset - with open(data) as f: - data = yaml.load(f, Loader=yaml.FullLoader) # model dict - check_dataset(data) # check - nc = 1 if single_cls else int(data['nc']) # number of classes - iouv = torch.linspace(0.5, 0.95, 10) # iou vector for mAP@0.5:0.95 - niou = iouv.numel() - - # ---------------------------------not using wandb--------------------------------- - # Logging - log_imgs, wandb = min(log_imgs, 100), None # ceil - # try: - # import wandb # Weights & Biases - # except ImportError: - log_imgs = 0 - # ---------------------------------not using wandb--------------------------------- - - # Dataloader - if not training: - img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img - _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once - path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images - dataloader = create_dataloader(path, imgsz, batch_size, 64, opt, - hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0] - - seen = 0 - try: - names = model.names if hasattr(model, 'names') else model.module.names - except: - names = load_classes(opt.names) - coco91class = coco80_to_coco91_class() - s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95') - p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. - loss = torch.zeros(3, device=device) - jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] - - pbar = tqdm(dataloader) - for batch_i, (img, targets, paths, shapes) in enumerate(pbar): - img = img.to(device, non_blocking=True) - img = img.half() if half else img.float() # uint8 to fp16/32 - img /= 255.0 # 0 - 255 to 0.0 - 1.0 - targets = targets.to(device) - nb, _, height, width = img.shape # batch size, channels, height, width - whwh = torch.Tensor([width, height, width, height]) - - # Disable gradients - with torch.no_grad(): - # Run model - t = time_synchronized() - inf_out, _ = model(img, augment=augment) # inference and training outputs - t0 += time_synchronized() - t - - # Compute loss / no test during training - # if training: # if model has loss hyperparameters - # loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls - - # Run NMS - t = time_synchronized() - output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) - t1 += time_synchronized() - t - targets = targets.cpu() - - # Statistics per image - for si, pred in enumerate(output): - labels = targets[targets[:, 0] == si, 1:] - nl = len(labels) - tcls = labels[:, 0].tolist() if nl else [] # target class - seen += 1 - - if len(pred) == 0: - if nl: - stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) - continue - - # Append to text file - path = Path(paths[si]) - if save_txt: - gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh - x = pred.clone() - x[:, :4] = scale_coords(img[si].shape[1:], x[:, :4], shapes[si][0], shapes[si][1]) # to original - for *xyxy, conf, cls in x: - xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh - line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format - with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f: - f.write(('%g ' * len(line)).rstrip() % line + '\n') - - # W&B logging - if plots and len(wandb_images) < log_imgs: - box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, - "class_id": int(cls), - "box_caption": "%s %.3f" % (names[cls], conf), - "scores": {"class_score": conf}, - "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()] - boxes = {"predictions": {"box_data": box_data, "class_labels": names}} - wandb_images.append(wandb.Image(img[si], boxes=boxes, caption=path.name)) - - # Clip boxes to image bounds - clip_coords(pred, (height, width)) - - # Append to pycocotools JSON dictionary - if save_json: - # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... - image_id = int(path.stem) if path.stem.isnumeric() else path.stem - box = pred[:, :4].clone() # xyxy - scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape - box = xyxy2xywh(box) # xywh - box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner - for p, b in zip(pred.tolist(), box.tolist()): - jdict.append({'image_id': image_id, - 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]), - 'bbox': [round(x, 3) for x in b], - 'score': round(p[4], 5)}) - - # Assign all predictions as incorrect - correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool) - if nl: - detected = [] # target indices - tcls_tensor = labels[:, 0] - - # target boxes - tbox = xywh2xyxy(labels[:, 1:5]) * whwh - - # Per target class - for cls in torch.unique(tcls_tensor): - ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices - pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices - - # Search for detections - if pi.shape[0]: - # Prediction to target ious - ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices - - # Append detections - temp_nonzero_idx = (ious > iouv[0]).nonzero(as_tuple=False) - for j in temp_nonzero_idx: - d = ti[i[j]] # detected target - if d not in detected: - detected.append(d) - correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn - if len(detected) == nl: # all targets already located in image - break - - # Append statistics (correct, conf, pcls, tcls) - stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) - - # Plot images - if plots and batch_i < 3: - f = save_dir / f'{batch_i}_labels.jpg' # filename - plot_images(img, targets, paths, f, names) # labels - f = save_dir / f'{batch_i}_pred.jpg' - plot_images(img, output_to_target(output, width, height), paths, f, names) # predictions - - # Compute statistics - stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy - if len(stats) and stats[0].any(): - p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, fname=save_dir / 'precision-recall_curve.png') - p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, AP@0.5, AP@0.5:0.95] - mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() - nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class - else: - nt = torch.zeros(1) - - # W&B logging - if plots and wandb: - wandb.log({"Images": wandb_images}) - wandb.log({"Validation": [wandb.Image(str(x), caption=x.name) for x in sorted(save_dir.glob('test*.jpg'))]}) - - # Print results - pf = '%20s' + '%12.3g' * 6 # print format - print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) - - # Print results per class - if verbose and nc > 1 and len(stats): - for i, c in enumerate(ap_class): - print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) - - # Print speeds - t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple - if not training: - print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) - - # Save JSON - if save_json and len(jdict): - w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights - anno_json = glob.glob('../data/coco/annotations/instances_val*.json')[0] # annotations json - pred_json = str(save_dir / f"{w}_predictions.json") # predictions json - print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) - with open(pred_json, 'w') as f: - json.dump(jdict, f) - - try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb - from pycocotools.coco import COCO - from pycocotools.cocoeval import COCOeval - - anno = COCO(anno_json) # init annotations api - pred = anno.loadRes(pred_json) # init predictions api - eval = COCOeval(anno, pred, 'bbox') - if is_coco: - eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] # image IDs to evaluate - eval.evaluate() - eval.accumulate() - eval.summarize() - map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) - except Exception as e: - print('ERROR: pycocotools unable to run: %s' % e) - - # Return results - if not training: - print('Results saved to %s' % save_dir) - model.float() # for training - maps = np.zeros(nc) + map - for i, c in enumerate(ap_class): - maps[c] = ap[i] - return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(prog='test.py') - parser.add_argument('--weights', nargs='+', type=str, default='yolor_p6.pt', help='model.pt path(s)') - parser.add_argument('--data', type=str, default='data/coco.yaml', help='*.data path') - parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch') - parser.add_argument('--img-size', type=int, default=1280, help='inference size (pixels)') - parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold') - parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS') - parser.add_argument('--task', default='val', help="'val', 'test', 'study'") - parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') - parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') - parser.add_argument('--augment', action='store_true', help='augmented inference') - parser.add_argument('--verbose', action='store_true', help='report mAP by class') - parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') - parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') - parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file') - parser.add_argument('--project', default='runs/test', help='save to project/name') - parser.add_argument('--name', default='exp', help='save to project/name') - parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') - parser.add_argument('--cfg', type=str, default='cfg/yolor_p6.cfg', help='*.cfg path') - parser.add_argument('--names', type=str, default='data/coco.names', help='*.cfg path') - parser.add_argument('--npu', default=None, type=int, help='NPU id to use.') - opt = parser.parse_args() - opt.save_json |= opt.data.endswith('coco.yaml') - opt.data = check_file(opt.data) # check file - print(opt) - - if opt.task in ['val', 'test']: # run normally - test(opt.data, - opt.weights, - opt.batch_size, - opt.img_size, - opt.conf_thres, - opt.iou_thres, - opt.save_json, - opt.single_cls, - opt.augment, - opt.verbose, - save_txt=opt.save_txt, - save_conf=opt.save_conf, - ) - - elif opt.task == 'study': # run over a range of settings and save/plot - for weights in ['yolor_p6.pt', 'yolor_w6.pt']: - f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem) # filename to save to - x = list(range(320, 800, 64)) # x axis - y = [] # y axis - for i in x: # img-size - print('\nRunning %s point %s...' % (f, i)) - r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json) - y.append(r + t) # results and times - np.savetxt(f, y, fmt='%10.4g') # save - os.system('zip -r study.zip study_*.txt') - # utils.general.plot_study_txt(f, x) # plot +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import glob +import json +import os +from pathlib import Path + +import numpy as np +import torch +import yaml +from tqdm import tqdm + +from utils.datasets import create_dataloader +from utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, box_iou, \ + non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, clip_coords, set_logging, increment_path +from utils.loss import compute_loss +from utils.metrics import ap_per_class +from utils.plots import plot_images, output_to_target +from utils.torch_utils import select_device, time_synchronized + +from models.models import * +from apex import amp + +def load_classes(path): + # Loads *.names file at 'path' + with open(path, 'r') as f: + names = f.read().split('\n') + return list(filter(None, names)) # filter removes empty strings (such as last line) + + +def set_seed_everything(seed): + random.seed(seed) + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + +def test(data, + weights=None, + batch_size=16, + imgsz=640, + conf_thres=0.001, + iou_thres=0.6, # for NMS + save_json=False, + single_cls=False, + augment=False, + verbose=False, + model=None, + dataloader=None, + save_dir=Path(''), # for saving images + save_txt=False, # for auto-labelling + save_conf=False, + plots=True, + log_imgs=0): # number of logged images + + # Initialize/load model and set device + # set_seed_everything(1234) + training = model is not None + if training: # called by train.py + device = next(model.parameters()).device # get model device + else: # called directly + set_logging() + device = select_device(opt.device, opt.npu, batch_size=batch_size) + save_txt = opt.save_txt # save *.txt labels + + # Directories + save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run + (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir + + # Load model + model = Darknet(opt.cfg).to(device) + + # load model + try: + ckpt = torch.load(weights[0], map_location=device) # load checkpoint + ckpt['model'] = {k: v for k, v in ckpt['model'].items() if model.state_dict()[k].numel() == v.numel()} + model.load_state_dict(ckpt['model'], strict=False) + except: + load_darknet_weights(model, weights[0]) + + imgsz = check_img_size(imgsz, s=64) # check img_size + + model = amp.initialize(model, opt_level='O1', verbosity=0, loss_scale=64) + + # Half + half = device.type != 'cpu' # half precision only supported on CUDA + if half: + model.half() + + # Configure + model.eval() + is_coco = data.endswith('coco.yaml') # is COCO dataset + with open(data) as f: + data = yaml.load(f, Loader=yaml.FullLoader) # model dict + check_dataset(data) # check + nc = 1 if single_cls else int(data['nc']) # number of classes + iouv = torch.linspace(0.5, 0.95, 10) # iou vector for mAP@0.5:0.95 + niou = iouv.numel() + + # ---------------------------------not using wandb--------------------------------- + # Logging + log_imgs, wandb = min(log_imgs, 100), None # ceil + # try: + # import wandb # Weights & Biases + # except ImportError: + log_imgs = 0 + # ---------------------------------not using wandb--------------------------------- + + # Dataloader + if not training: + img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img + _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once + path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images + dataloader = create_dataloader(path, imgsz, batch_size, 64, opt, + hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0] + + seen = 0 + try: + names = model.names if hasattr(model, 'names') else model.module.names + except: + names = load_classes(opt.names) + coco91class = coco80_to_coco91_class() + s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95') + p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. + loss = torch.zeros(3, device=device) + jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] + + pbar = tqdm(dataloader) + for batch_i, (img, targets, paths, shapes) in enumerate(pbar): + img = img.to(device, non_blocking=True) + img = img.half() if half else img.float() # uint8 to fp16/32 + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + targets = targets.to(device) + nb, _, height, width = img.shape # batch size, channels, height, width + whwh = torch.Tensor([width, height, width, height]) + + # Disable gradients + with torch.no_grad(): + # Run model + t = time_synchronized() + inf_out, _ = model(img, augment=augment) # inference and training outputs + t0 += time_synchronized() - t + + # Compute loss / no test during training + # if training: # if model has loss hyperparameters + # loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls + + # Run NMS + t = time_synchronized() + output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) + t1 += time_synchronized() - t + targets = targets.cpu() + + # Statistics per image + for si, pred in enumerate(output): + labels = targets[targets[:, 0] == si, 1:] + nl = len(labels) + tcls = labels[:, 0].tolist() if nl else [] # target class + seen += 1 + + if len(pred) == 0: + if nl: + stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) + continue + + # Append to text file + path = Path(paths[si]) + if save_txt: + gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh + x = pred.clone() + x[:, :4] = scale_coords(img[si].shape[1:], x[:, :4], shapes[si][0], shapes[si][1]) # to original + for *xyxy, conf, cls in x: + xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh + line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format + with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f: + f.write(('%g ' * len(line)).rstrip() % line + '\n') + + # W&B logging + if plots and len(wandb_images) < log_imgs: + box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, + "class_id": int(cls), + "box_caption": "%s %.3f" % (names[cls], conf), + "scores": {"class_score": conf}, + "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()] + boxes = {"predictions": {"box_data": box_data, "class_labels": names}} + wandb_images.append(wandb.Image(img[si], boxes=boxes, caption=path.name)) + + # Clip boxes to image bounds + clip_coords(pred, (height, width)) + + # Append to pycocotools JSON dictionary + if save_json: + # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... + image_id = int(path.stem) if path.stem.isnumeric() else path.stem + box = pred[:, :4].clone() # xyxy + scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape + box = xyxy2xywh(box) # xywh + box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner + for p, b in zip(pred.tolist(), box.tolist()): + jdict.append({'image_id': image_id, + 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]), + 'bbox': [round(x, 3) for x in b], + 'score': round(p[4], 5)}) + + # Assign all predictions as incorrect + correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool) + if nl: + detected = [] # target indices + tcls_tensor = labels[:, 0] + + # target boxes + tbox = xywh2xyxy(labels[:, 1:5]) * whwh + + # Per target class + for cls in torch.unique(tcls_tensor): + ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices + pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices + + # Search for detections + if pi.shape[0]: + # Prediction to target ious + ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices + + # Append detections + temp_nonzero_idx = (ious > iouv[0]).nonzero(as_tuple=False) + for j in temp_nonzero_idx: + d = ti[i[j]] # detected target + if d not in detected: + detected.append(d) + correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn + if len(detected) == nl: # all targets already located in image + break + + # Append statistics (correct, conf, pcls, tcls) + stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) + + # Plot images + if plots and batch_i < 3: + f = save_dir / f'{batch_i}_labels.jpg' # filename + plot_images(img, targets, paths, f, names) # labels + f = save_dir / f'{batch_i}_pred.jpg' + plot_images(img, output_to_target(output, width, height), paths, f, names) # predictions + + # Compute statistics + stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy + if len(stats) and stats[0].any(): + p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, fname=save_dir / 'precision-recall_curve.png') + p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, AP@0.5, AP@0.5:0.95] + mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() + nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class + else: + nt = torch.zeros(1) + + # W&B logging + if plots and wandb: + wandb.log({"Images": wandb_images}) + wandb.log({"Validation": [wandb.Image(str(x), caption=x.name) for x in sorted(save_dir.glob('test*.jpg'))]}) + + # Print results + pf = '%20s' + '%12.3g' * 6 # print format + print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) + + # Print results per class + if verbose and nc > 1 and len(stats): + for i, c in enumerate(ap_class): + print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) + + # Print speeds + t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple + if not training: + print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) + + # Save JSON + if save_json and len(jdict): + w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights + anno_json = glob.glob('../data/coco/annotations/instances_val*.json')[0] # annotations json + pred_json = str(save_dir / f"{w}_predictions.json") # predictions json + print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) + with open(pred_json, 'w') as f: + json.dump(jdict, f) + + try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb + from pycocotools.coco import COCO + from pycocotools.cocoeval import COCOeval + + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, 'bbox') + if is_coco: + eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] # image IDs to evaluate + eval.evaluate() + eval.accumulate() + eval.summarize() + map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) + except Exception as e: + print('ERROR: pycocotools unable to run: %s' % e) + + # Return results + if not training: + print('Results saved to %s' % save_dir) + model.float() # for training + maps = np.zeros(nc) + map + for i, c in enumerate(ap_class): + maps[c] = ap[i] + return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(prog='test.py') + parser.add_argument('--weights', nargs='+', type=str, default='yolor_p6.pt', help='model.pt path(s)') + parser.add_argument('--data', type=str, default='data/coco.yaml', help='*.data path') + parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch') + parser.add_argument('--img-size', type=int, default=1280, help='inference size (pixels)') + parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold') + parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS') + parser.add_argument('--task', default='val', help="'val', 'test', 'study'") + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') + parser.add_argument('--augment', action='store_true', help='augmented inference') + parser.add_argument('--verbose', action='store_true', help='report mAP by class') + parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') + parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') + parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file') + parser.add_argument('--project', default='runs/test', help='save to project/name') + parser.add_argument('--name', default='exp', help='save to project/name') + parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') + parser.add_argument('--cfg', type=str, default='cfg/yolor_p6.cfg', help='*.cfg path') + parser.add_argument('--names', type=str, default='data/coco.names', help='*.cfg path') + parser.add_argument('--npu', default=None, type=int, help='NPU id to use.') + opt = parser.parse_args() + opt.save_json |= opt.data.endswith('coco.yaml') + opt.data = check_file(opt.data) # check file + print(opt) + + if opt.task in ['val', 'test']: # run normally + test(opt.data, + opt.weights, + opt.batch_size, + opt.img_size, + opt.conf_thres, + opt.iou_thres, + opt.save_json, + opt.single_cls, + opt.augment, + opt.verbose, + save_txt=opt.save_txt, + save_conf=opt.save_conf, + ) + + elif opt.task == 'study': # run over a range of settings and save/plot + for weights in ['yolor_p6.pt', 'yolor_w6.pt']: + f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem) # filename to save to + x = list(range(320, 800, 64)) # x axis + y = [] # y axis + for i in x: # img-size + print('\nRunning %s point %s...' % (f, i)) + r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json) + y.append(r + t) # results and times + np.savetxt(f, y, fmt='%10.4g') # save + os.system('zip -r study.zip study_*.txt') + # utils.general.plot_study_txt(f, x) # plot diff --git a/PyTorch/contrib/cv/detection/YOLOR/train.py b/PyTorch/contrib/cv/detection/YOLOR/train.py index 8f59aeea094002d9fafddd53f52bedc4daf32276..75329751e2daa8cae9c54af80b3fdfe5d7b7ebf4 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/train.py +++ b/PyTorch/contrib/cv/detection/YOLOR/train.py @@ -1,739 +1,739 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import logging -import math -import os -import random -import time -from pathlib import Path -from warnings import warn - -import numpy as np -import torch.distributed as dist -import torch.nn.functional as F -import torch.optim as optim -import torch.optim.lr_scheduler as lr_scheduler -import torch.utils.data -import yaml -from torch.nn.parallel import DistributedDataParallel as DDP -from torch.utils.tensorboard import SummaryWriter -import torch.multiprocessing as mp - -import test # import test.py to get mAP after each epoch -#from models.yolo import Model -from models.models import * -from utils.autoanchor import check_anchors -from utils.datasets import create_dataloader -from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \ - fitness, fitness_p, fitness_r, fitness_ap50, fitness_ap, fitness_f, strip_optimizer, get_latest_run,\ - check_dataset, check_file, check_git_status, check_img_size, print_mutation, set_logging -from utils.google_utils import attempt_download -from utils.loss import compute_loss -from utils.plots import plot_images, plot_labels, plot_results, plot_evolution, output_to_target -from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first - -logger = logging.getLogger(__name__) - -mixed_precision = True -try: - import apex - from apex import amp -except: - print('Apex recommended for faster mixed precision training: https://github.com/NVIDIA/apex') - mixed_precision = False # not installed - -use_wandb = False -if use_wandb: - try: - import wandb - except: - print("Install Weights & Biases for experiment logging via 'pip install wandb' (recommended)") -else: - wandb = None - - -def train(hyp, opt, device, tb_writer=None, wandb=None): - print(f'Hyperparameters {hyp}') - save_dir, epochs, batch_size, total_batch_size, weights, rank = \ - Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.local_rank - - # Directories - wdir = save_dir / 'weights' - wdir.mkdir(parents=True, exist_ok=True) # make dir - last = wdir / 'last.pt' - best = wdir / 'best.pt' - results_file = save_dir / 'results.txt' - - # Save run settings - with open(save_dir / 'hyp.yaml', 'w') as f: - yaml.dump(hyp, f, sort_keys=False) - with open(save_dir / 'opt.yaml', 'w') as f: - yaml.dump(vars(opt), f, sort_keys=False) - - # Configure - plots = not opt.evolve # create plots - cuda = device.type != 'cpu' - init_seeds(2 + rank) - with open(opt.data) as f: - data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict - with torch_distributed_zero_first(rank): - check_dataset(data_dict) # check - train_path = data_dict['train'] - test_path = data_dict['val'] - nc, names = (1, ['item']) if opt.single_cls else (int(data_dict['nc']), data_dict['names']) # number classes, names - assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data) # check - - # Model - pretrained = weights.endswith('.pt') - if pretrained: - with torch_distributed_zero_first(rank): - attempt_download(weights) # download if not found locally - ckpt = torch.load(weights, map_location=device) # load checkpoint - model = Darknet(opt.cfg).to(device) # create - state_dict = {k: v for k, v in ckpt['model'].items() if model.state_dict()[k].numel() == v.numel()} - model.load_state_dict(state_dict, strict=False) - print('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report - else: - model = Darknet(opt.cfg).to(device) # create - - # Image sizes - gs = 64 #int(max(model.stride)) # grid size (max stride) - imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples - - # Optimizer - nbs = 64 # nominal batch size - accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing - hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay - - pg0, pg1, pg2 = [], [], [] # optimizer parameter groups - for k, v in dict(model.named_parameters()).items(): - if '.bias' in k: - pg2.append(v) # biases - elif 'Conv2d.weight' in k: - pg1.append(v) # apply weight_decay - elif 'm.weight' in k: - pg1.append(v) # apply weight_decay - elif 'w.weight' in k: - pg1.append(v) # apply weight_decay - else: - pg0.append(v) # all else - - if opt.adam: - optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum - else: - if device.type == 'npu': - optimizer = apex.optimizers.NpuFusedSGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) - else: - optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) - - optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay - optimizer.add_param_group({'params': pg2}) # add pg2 (biases) - print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) - del pg0, pg1, pg2 - - # Logging - if wandb and wandb.run is None: - opt.hyp = hyp # add hyperparameters - wandb_run = wandb.init(config=opt, resume="allow", - project='YOLOR' if opt.project == 'runs/train' else Path(opt.project).stem, - name=save_dir.stem, - id=ckpt.get('wandb_id') if 'ckpt' in locals() else None) - - # Resume - start_epoch = 0 - # best_fitness, best_fitness_p, best_fitness_r, best_fitness_ap50, best_fitness_ap, best_fitness_f = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 - if pretrained: - # Optimizer - if ckpt['optimizer'] is not None: - optimizer.load_state_dict(ckpt['optimizer']) - # best_fitness = ckpt['best_fitness'] - # best_fitness_p = ckpt['best_fitness_p'] - # best_fitness_r = ckpt['best_fitness_r'] - # best_fitness_ap50 = ckpt['best_fitness_ap50'] - # best_fitness_ap = ckpt['best_fitness_ap'] - # best_fitness_f = ckpt['best_fitness_f'] - - # Results - if ckpt.get('training_results') is not None: - with open(results_file, 'w') as file: - file.write(ckpt['training_results']) # write results.txt - - # Epochs - start_epoch = ckpt['epoch'] + 1 - if opt.resume: - assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs) - if epochs < start_epoch: - print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % - (weights, ckpt['epoch'], epochs)) - epochs += ckpt['epoch'] # finetune additional epochs - - del ckpt, state_dict - - # Mixed precision training https://github.com/NVIDIA/apex - if mixed_precision: - if device.type == 'npu': - model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0, loss_scale=64, combine_grad=True) - else: - model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0, loss_scale=64) - - # Scheduler https://arxiv.org/pdf/1812.01187.pdf - # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR - lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp['lrf']) + hyp['lrf'] # cosine - scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) - # plot_lr_scheduler(optimizer, scheduler, epochs) - - - # SyncBatchNorm - if opt.sync_bn and cuda and rank != -1: - model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) - print('Using SyncBatchNorm()') - - # EMA - ema = ModelEMA(model) if rank in [-1, 0] else None - - # DDP mode - if device.type == 'cuda' and rank != -1: - model = DDP(model, device_ids=[rank], output_device=rank) - elif device.type == 'npu' and rank != -1: - model = DDP(model, device_ids=[rank], broadcast_buffers=False) - - # Trainloader - dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, - hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, - rank=rank, world_size=opt.world_size, workers=opt.workers) - mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class - nb = len(dataloader) # number of batches - assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1) - - # Model parameters - hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset - model.nc = nc # attach number of classes to model - model.hyp = hyp # attach hyperparameters to model - model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) - model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights - model.names = names - - # Process 0 - if rank in [-1, 0]: - ema.updates = start_epoch * nb // accumulate # set EMA updates - testloader = create_dataloader(test_path, imgsz_test, batch_size*2, gs, opt, - hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, - rank=-1, world_size=opt.world_size, workers=opt.workers)[0] # testloader - - if not opt.resume: - labels = np.concatenate(dataset.labels, 0) - c = torch.tensor(labels[:, 0]) # classes - # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency - # model._initialize_biases(cf.to(device)) - if plots: - plot_labels(labels, save_dir=save_dir) - if tb_writer: - tb_writer.add_histogram('classes', c, 0) - if wandb: - wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('*labels*.png')]}) - - # Anchors - # if not opt.noautoanchor: - # check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) - - - # Start training - t0 = time.time() - nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) - # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training - maps = np.zeros(nc) # mAP per class - results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) - scheduler.last_epoch = start_epoch - 1 # do not move - if rank in [0, -1]: - print('Image sizes %g train, %g test\n' - 'Using %g dataloader workers\nLogging results to %s\n' - 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, save_dir, epochs)) - - for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ - model.train() - - # Update image weights (optional) - # When in DDP mode, the generated indices will be broadcasted to synchronize dataset. - if opt.image_weights: - # Generate indices - if rank in [-1, 0]: - cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights - iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights - dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx - # Broadcast if DDP - if rank != -1: - indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() - dist.broadcast(indices, 0) - if rank != 0: - dataset.indices = indices.cpu().numpy() - - # Update mosaic border - # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) - # dataset.mosaic_border = [b - imgsz, -b] # height, width borders - - mloss = torch.zeros(4, device=device) # mean losses - if rank != -1: - dataloader.sampler.set_epoch(epoch) - optimizer.zero_grad() - start_time = time.time() - d_1 = time.time() - - for i, (imgs, targets, paths, _) in enumerate(dataloader): # batch ------------------------------------------------------------- - t_time = time.time() - d_time = t_time - d_1 - ni = i + nb * epoch # number integrated batches (since train start) - imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 - - # Warmup - if ni <= nw: - xi = [0, nw] # x interp - # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) - accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) - for j, x in enumerate(optimizer.param_groups): - # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 - x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) - if 'momentum' in x: - x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) - - # Multi-scale - if opt.multi_scale: - sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size - sf = sz / max(imgs.shape[2:]) # scale factor - if sf != 1: - ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) - imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) - - # Forward - pred = model(imgs) - - # Loss - loss, loss_items = compute_loss(pred, targets.to(device), model) # scaled by batch_size - if rank != -1: - loss *= opt.world_size # gradient averaged between devices in DDP mode - if not torch.isfinite(loss): - print('WARNING: non-finite loss, ending training ', loss_items) - return results - - # Backward - if mixed_precision: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - - # Optimize - if ni % accumulate == 0: - optimizer.step() - optimizer.zero_grad() - if ema is not None: - x = torch.tensor([1.]).to(device) - if device.type == 'npu': - params_fp32_fused = optimizer.get_model_combined_params() - ema.update(model, x, params_fp32_fused[0]) - else: - ema.update(model, x) - - if i <= 10: - sum_time = (time.time() - start_time) / (i + 1) - if i == 10: - start_time = time.time() - else: - sum_time = (time.time() - start_time) / (i - 10) - ptime = time.time() - d_1 - # Print - if rank in [-1, 0]: - mloss = (mloss * i + loss_items) / (i + 1) # update mean losses - mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0) # (GB) - s = ('%10s' * 2 + '%10.4g' * 6) % ( - '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) - print( - 'Epoch:[%2g][%4g/%4g][%s][FPS:%3.1f][mTime:%3.3f][pTime:%3.3f][dTime:%3.3f] GIoU:%.3f objectness:%.3f classfication:%.3f totalLoss:%.3f' % ( - epoch, i, nb, device, opt.total_batch_size / sum_time, sum_time, ptime, d_time, *mloss)) - - # Plot - if plots and ni < 3: - f = save_dir / f'train_batch{ni}.jpg' # filename - plot_images(images=imgs, targets=targets, paths=paths, fname=f) - # if tb_writer: - # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) - # tb_writer.add_graph(model, imgs) # add model to tensorboard - elif plots and ni == 3 and wandb: - wandb.log({"Mosaics": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg')]}) - if i > 170: - break - d_1 = time.time() - # end batch ------------------------------------------------------------------------------------------------ - # end epoch ---------------------------------------------------------------------------------------------------- - - # Scheduler - lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard - scheduler.step() - - # DDP process 0 or single-GPU - if rank in [-1, 0]: - # mAP - if ema: - ema.update_attr(model) - final_epoch = epoch + 1 == epochs - if False: # No test during training - results, maps, _ = test.test(opt.data, - batch_size=batch_size*2, - imgsz=imgsz_test, - model=ema.ema.module if hasattr(ema.ema, 'module') else ema.ema, - single_cls=opt.single_cls, - dataloader=testloader, - save_dir=save_dir, - plots=True) - - # Write - with open(results_file, 'a') as f: - f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) - if len(opt.name) and opt.bucket: - os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) - - # Log - tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss - 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', - 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss - 'x/lr0', 'x/lr1', 'x/lr2'] # params - for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): - if tb_writer: - tb_writer.add_scalar(tag, x, epoch) # tensorboard - if wandb: - wandb.log({tag: x}) # W&B - - # # Update best mAP - # fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] - # fi_p = fitness_p(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] - # fi_r = fitness_r(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] - # fi_ap50 = fitness_ap50(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] - # fi_ap = fitness_ap(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] - # if (fi_p > 0.0) or (fi_r > 0.0): - # fi_f = fitness_f(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] - # else: - # fi_f = 0.0 - # if fi > best_fitness: - # best_fitness = fi - # if fi_p > best_fitness_p: - # best_fitness_p = fi_p - # if fi_r > best_fitness_r: - # best_fitness_r = fi_r - # if fi_ap50 > best_fitness_ap50: - # best_fitness_ap50 = fi_ap50 - # if fi_ap > best_fitness_ap: - # best_fitness_ap = fi_ap - # if fi_f > best_fitness_f: - # best_fitness_f = fi_f - - # Save model - save = (not opt.nosave) or (final_epoch and not opt.evolve) - if save: - with open(results_file, 'r') as f: # create checkpoint - ckpt = {'epoch': epoch, - # 'best_fitness': best_fitness, - # 'best_fitness_p': best_fitness_p, - # 'best_fitness_r': best_fitness_r, - # 'best_fitness_ap50': best_fitness_ap50, - # 'best_fitness_ap': best_fitness_ap, - # 'best_fitness_f': best_fitness_f, - 'training_results': f.read(), - 'model': ema.ema.module.state_dict() if hasattr(ema, 'module') else ema.ema.state_dict(), - 'optimizer': None if final_epoch else optimizer.state_dict(), - 'wandb_id': wandb_run.id if wandb else None} - - # Save last, best and delete - torch.save(ckpt, last) - # if best_fitness == fi: - # torch.save(ckpt, best) - # if (best_fitness == fi) and (epoch >= 200): - # torch.save(ckpt, wdir / 'best_{:03d}.pt'.format(epoch)) - # if best_fitness == fi: - # torch.save(ckpt, wdir / 'best_overall.pt') - # if best_fitness_p == fi_p: - # torch.save(ckpt, wdir / 'best_p.pt') - # if best_fitness_r == fi_r: - # torch.save(ckpt, wdir / 'best_r.pt') - # if best_fitness_ap50 == fi_ap50: - # torch.save(ckpt, wdir / 'best_ap50.pt') - # if best_fitness_ap == fi_ap: - # torch.save(ckpt, wdir / 'best_ap.pt') - # if best_fitness_f == fi_f: - # torch.save(ckpt, wdir / 'best_f.pt') - if epoch == 0: - torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch)) - if ((epoch+1) % 25) == 0: - torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch)) - if epoch >= (epochs-5): - torch.save(ckpt, wdir / 'last_{:03d}.pt'.format(epoch)) - elif epoch >= 420: - torch.save(ckpt, wdir / 'last_{:03d}.pt'.format(epoch)) - del ckpt - # end epoch ---------------------------------------------------------------------------------------------------- - # end training - - if rank in [-1, 0]: - # Strip optimizers - n = opt.name if opt.name.isnumeric() else '' - fresults, flast, fbest = save_dir / f'results{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt' - for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file], [flast, fbest, fresults]): - if f1.exists(): - os.rename(f1, f2) # rename - if str(f2).endswith('.pt'): # is *.pt - strip_optimizer(f2) # strip optimizer - os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload - # Finish - if plots: - plot_results(save_dir=save_dir) # save as results.png - if wandb: - wandb.log({"Results": [wandb.Image(str(save_dir / x), caption=x) for x in - ['results.png', 'precision-recall_curve.png']]}) - print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) - - else: - dist.destroy_process_group() - - wandb.run.finish() if wandb and wandb.run else None - torch.cuda.empty_cache() - - return results - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--weights', type=str, default='yolor_p6.pt', help='initial weights path') - parser.add_argument('--cfg', type=str, default='', help='model.yaml path') - parser.add_argument('--data', type=str, default='data/coco.yaml', help='data.yaml path') - parser.add_argument('--hyp', type=str, default='data/hyp.scratch.1280.yaml', help='hyperparameters path') - parser.add_argument('--epochs', type=int, default=300) - parser.add_argument('--batch-size', type=int, default=8, help='total batch size for all GPUs') - parser.add_argument('--img-size', nargs='+', type=int, default=[1280, 1280], help='[train, test] image sizes') - parser.add_argument('--rect', action='store_true', help='rectangular training') - parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') - parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') - parser.add_argument('--notest', action='store_true', help='only test final epoch') - parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check') - parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters') - parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') - parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') - parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') - parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu or npu') - parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') - parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') - parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer') - parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') - parser.add_argument('--full', action='store_true', help='full mode') - parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify') - parser.add_argument('--log-imgs', type=int, default=16, help='number of images for W&B logging, max 100') - parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers') - parser.add_argument('--project', default='runs/train', help='save to project/name') - parser.add_argument('--name', default='exp', help='save to project/name') - parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') - parser.add_argument('--npu', default=-1, type=int, help='NPU id to use.') - - # NPU DDP mode - parser.add_argument('--world-size', default=1, type=int, help='number of nodes for distributed training') - parser.add_argument('--device-num', default=1, type=int, help='multi NPU parameter, GPU or CPU do not modify') - parser.add_argument('--addr', default='127.0.0.1', type=str, help='DDP master node IP') - parser.add_argument('--dist-url', default='tcp://127.0.0.1:29501', type=str, - help='url used to set up distributed training') - opt = parser.parse_args() - - if opt.dist_url == "env://": - opt.world_size = int(os.environ["WORLD_SIZE"]) - ngpus_per_node = opt.device_num - opt.npu_ddp = (opt.device_num > 1 or opt.world_size > 1) - if opt.npu_ddp: - print('multi npu training') - os.environ['MASTER_ADDR'] = opt.addr # master ip - os.environ['MASTER_PORT'] = '29501' - os.environ['KERNEL_NAME_ID'] = str(0) - opt.world_size = ngpus_per_node * opt.world_size # the sum of GPU or NPU in all the nodes - mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, opt)) - else: - print('1p training') - main_worker(opt.npu, ngpus_per_node, opt) - - # # Set DDP variables - # opt.total_batch_size = opt.batch_size - # opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1 - # opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1 - # set_logging(opt.global_rank) - # if opt.global_rank in [-1, 0]: - # check_git_status() - - # # DDP mode - # device = select_device(opt.device, batch_size=opt.batch_size) - # if opt.local_rank != -1: - # assert torch.cuda.device_count() > opt.local_rank - # torch.cuda.set_device(opt.local_rank) - # device = torch.device('cuda', opt.local_rank) - # dist.init_process_group(backend='nccl', init_method='env://') # distributed backend - # assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count' - # opt.batch_size = opt.total_batch_size // opt.world_size - - -def main_worker(npu, ngpus_per_node, opt): - # Resume - if opt.resume: # resume an interrupted run - ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path - assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist' - with open(Path(ckpt).parent.parent / 'opt.yaml') as f: - opt = argparse.Namespace(**yaml.load(f, Loader=yaml.FullLoader)) # replace - opt.cfg, opt.weights, opt.resume = '', ckpt, True - print('Resuming training from %s' % ckpt) - else: - # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml') - opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp) # check files - assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' - opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) - opt.name = 'evolve' if opt.evolve else opt.name - opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve) # increment run - - # Hyperparameters - with open(opt.hyp) as f: - hyp = yaml.load(f, Loader=yaml.FullLoader) # load hyps - if 'box' not in hyp: - warn('Compatibility: %s missing "box" which was renamed from "giou" in %s' % - (opt.hyp, 'https://github.com/ultralytics/yolov5/pull/1120')) - hyp['box'] = hyp.pop('giou') - - # npu DDP - if opt.npu_ddp: - opt.npu = npu - os.environ['KERNEL_NAME_ID'] = str(npu) - print("[npu id:", opt.npu, "]", "+++++++++++++++++++++++++++KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID']) - opt.local_rank = opt.local_rank * ngpus_per_node + npu - global mixed_precision - device = torch_utils.select_device(opt.device, opt.npu, apex=mixed_precision, batch_size=opt.batch_size) - opt.total_batch_size = opt.batch_size - if device.type == 'cpu': - mixed_precision = False - elif opt.local_rank != -1 and device.type == 'cuda': - # DDP mode - assert torch.cuda.device_count() > opt.local_rank - torch.cuda.set_device(opt.local_rank) - device = torch.device("cuda", opt.local_rank) - dist.init_process_group(backend='nccl', init_method='env://') # distributed backend - - opt.world_size = dist.get_world_size() - assert opt.batch_size % opt.world_size == 0, "Batch size is not a multiple of the number of devices given!" - opt.batch_size = opt.total_batch_size // opt.world_size - elif opt.local_rank != -1 and device.type == 'npu': - dist.init_process_group(backend='hccl', world_size=opt.world_size, rank=opt.local_rank) - assert opt.batch_size % opt.world_size == 0, "Batch size is not a multiple of the number of devices given!" - opt.batch_size = opt.total_batch_size // opt.world_size - - # Train - print(opt) - if not opt.evolve: - tb_writer = None # init loggers - if opt.local_rank in [-1, 0]: - print(f'Start Tensorboard with "tensorboard --logdir {opt.project}", view at http://localhost:6006/') - tb_writer = SummaryWriter(opt.save_dir) # Tensorboard - train(hyp, opt, device, tb_writer, wandb) - - # Evolve hyperparameters (optional) - else: - # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) - meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) - 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) - 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 - 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay - 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) - 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum - 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr - 'box': (1, 0.02, 0.2), # box loss gain - 'cls': (1, 0.2, 4.0), # cls loss gain - 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight - 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) - 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight - 'iou_t': (0, 0.1, 0.7), # IoU training threshold - 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold - 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) - 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) - 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) - 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) - 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) - 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) - 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) - 'scale': (1, 0.0, 0.9), # image scale (+/- gain) - 'shear': (1, 0.0, 10.0), # image shear (+/- deg) - 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 - 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) - 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) - 'mosaic': (1, 0.0, 1.0), # image mixup (probability) - 'mixup': (1, 0.0, 1.0)} # image mixup (probability) - - assert opt.local_rank == -1, 'DDP mode not implemented for --evolve' - opt.notest, opt.nosave = True, True # only test/save final epoch - # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices - yaml_file = Path(opt.save_dir) / 'hyp_evolved.yaml' # save best result here - if opt.bucket: - os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists - - for _ in range(300): # generations to evolve - if Path('evolve.txt').exists(): # if evolve.txt exists: select best hyps and mutate - # Select parent(s) - parent = 'single' # parent selection method: 'single' or 'weighted' - x = np.loadtxt('evolve.txt', ndmin=2) - n = min(5, len(x)) # number of previous results to consider - x = x[np.argsort(-fitness(x))][:n] # top n mutations - w = fitness(x) - fitness(x).min() # weights - if parent == 'single' or len(x) == 1: - # x = x[random.randint(0, n - 1)] # random selection - x = x[random.choices(range(n), weights=w)[0]] # weighted selection - elif parent == 'weighted': - x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination - - # Mutate - mp, s = 0.8, 0.2 # mutation probability, sigma - npr = np.random - npr.seed(int(time.time())) - g = np.array([x[0] for x in meta.values()]) # gains 0-1 - ng = len(meta) - v = np.ones(ng) - while all(v == 1): # mutate until a change occurs (prevent duplicates) - v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) - for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) - hyp[k] = float(x[i + 7] * v[i]) # mutate - - # Constrain to limits - for k, v in meta.items(): - hyp[k] = max(hyp[k], v[1]) # lower limit - hyp[k] = min(hyp[k], v[2]) # upper limit - hyp[k] = round(hyp[k], 5) # significant digits - - # Train mutation - results = train(hyp.copy(), opt, device, wandb=wandb) - - # Write mutation results - print_mutation(hyp.copy(), results, yaml_file, opt.bucket) - - # Plot results - plot_evolution(yaml_file) - print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n' - f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}') - - -if __name__ == '__main__': - # option = {} - # option["ACL_OP_DEBUG_LEVEL"] = 3 # 算子debug功能,暂不开启 - # option["ACL_DEBUG_DIR"] = "debug_file" # 算子debug功能对应文件夹,暂不开启 - # option["ACL_OP_COMPILER_CACHE_MODE"] = "enable" # cache功能启用 - # option["ACL_OP_COMPILER_CACHE_DIR"] = "./kernel_meta" # cache所在文件夹 - # print("option:",option) - # torch.npu.set_option(option) - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import logging +import math +import os +import random +import time +from pathlib import Path +from warnings import warn + +import numpy as np +import torch.distributed as dist +import torch.nn.functional as F +import torch.optim as optim +import torch.optim.lr_scheduler as lr_scheduler +import torch.utils.data +import yaml +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.tensorboard import SummaryWriter +import torch.multiprocessing as mp + +import test # import test.py to get mAP after each epoch +#from models.yolo import Model +from models.models import * +from utils.autoanchor import check_anchors +from utils.datasets import create_dataloader +from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \ + fitness, fitness_p, fitness_r, fitness_ap50, fitness_ap, fitness_f, strip_optimizer, get_latest_run,\ + check_dataset, check_file, check_git_status, check_img_size, print_mutation, set_logging +from utils.google_utils import attempt_download +from utils.loss import compute_loss +from utils.plots import plot_images, plot_labels, plot_results, plot_evolution, output_to_target +from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first + +logger = logging.getLogger(__name__) + +mixed_precision = True +try: + import apex + from apex import amp +except: + print('Apex recommended for faster mixed precision training: https://github.com/NVIDIA/apex') + mixed_precision = False # not installed + +use_wandb = False +if use_wandb: + try: + import wandb + except: + print("Install Weights & Biases for experiment logging via 'pip install wandb' (recommended)") +else: + wandb = None + + +def train(hyp, opt, device, tb_writer=None, wandb=None): + print(f'Hyperparameters {hyp}') + save_dir, epochs, batch_size, total_batch_size, weights, rank = \ + Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.local_rank + + # Directories + wdir = save_dir / 'weights' + wdir.mkdir(parents=True, exist_ok=True) # make dir + last = wdir / 'last.pt' + best = wdir / 'best.pt' + results_file = save_dir / 'results.txt' + + # Save run settings + with open(save_dir / 'hyp.yaml', 'w') as f: + yaml.dump(hyp, f, sort_keys=False) + with open(save_dir / 'opt.yaml', 'w') as f: + yaml.dump(vars(opt), f, sort_keys=False) + + # Configure + plots = not opt.evolve # create plots + cuda = device.type != 'cpu' + init_seeds(2 + rank) + with open(opt.data) as f: + data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict + with torch_distributed_zero_first(rank): + check_dataset(data_dict) # check + train_path = data_dict['train'] + test_path = data_dict['val'] + nc, names = (1, ['item']) if opt.single_cls else (int(data_dict['nc']), data_dict['names']) # number classes, names + assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data) # check + + # Model + pretrained = weights.endswith('.pt') + if pretrained: + with torch_distributed_zero_first(rank): + attempt_download(weights) # download if not found locally + ckpt = torch.load(weights, map_location=device) # load checkpoint + model = Darknet(opt.cfg).to(device) # create + state_dict = {k: v for k, v in ckpt['model'].items() if model.state_dict()[k].numel() == v.numel()} + model.load_state_dict(state_dict, strict=False) + print('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report + else: + model = Darknet(opt.cfg).to(device) # create + + # Image sizes + gs = 64 #int(max(model.stride)) # grid size (max stride) + imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples + + # Optimizer + nbs = 64 # nominal batch size + accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing + hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay + + pg0, pg1, pg2 = [], [], [] # optimizer parameter groups + for k, v in dict(model.named_parameters()).items(): + if '.bias' in k: + pg2.append(v) # biases + elif 'Conv2d.weight' in k: + pg1.append(v) # apply weight_decay + elif 'm.weight' in k: + pg1.append(v) # apply weight_decay + elif 'w.weight' in k: + pg1.append(v) # apply weight_decay + else: + pg0.append(v) # all else + + if opt.adam: + optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum + else: + if device.type == 'npu': + optimizer = apex.optimizers.NpuFusedSGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) + else: + optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) + + optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay + optimizer.add_param_group({'params': pg2}) # add pg2 (biases) + print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) + del pg0, pg1, pg2 + + # Logging + if wandb and wandb.run is None: + opt.hyp = hyp # add hyperparameters + wandb_run = wandb.init(config=opt, resume="allow", + project='YOLOR' if opt.project == 'runs/train' else Path(opt.project).stem, + name=save_dir.stem, + id=ckpt.get('wandb_id') if 'ckpt' in locals() else None) + + # Resume + start_epoch = 0 + # best_fitness, best_fitness_p, best_fitness_r, best_fitness_ap50, best_fitness_ap, best_fitness_f = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + if pretrained: + # Optimizer + if ckpt['optimizer'] is not None: + optimizer.load_state_dict(ckpt['optimizer']) + # best_fitness = ckpt['best_fitness'] + # best_fitness_p = ckpt['best_fitness_p'] + # best_fitness_r = ckpt['best_fitness_r'] + # best_fitness_ap50 = ckpt['best_fitness_ap50'] + # best_fitness_ap = ckpt['best_fitness_ap'] + # best_fitness_f = ckpt['best_fitness_f'] + + # Results + if ckpt.get('training_results') is not None: + with open(results_file, 'w') as file: + file.write(ckpt['training_results']) # write results.txt + + # Epochs + start_epoch = ckpt['epoch'] + 1 + if opt.resume: + assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs) + if epochs < start_epoch: + print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % + (weights, ckpt['epoch'], epochs)) + epochs += ckpt['epoch'] # finetune additional epochs + + del ckpt, state_dict + + # Mixed precision training https://github.com/NVIDIA/apex + if mixed_precision: + if device.type == 'npu': + model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0, loss_scale=64, combine_grad=True) + else: + model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0, loss_scale=64) + + # Scheduler https://arxiv.org/pdf/1812.01187.pdf + # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR + lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp['lrf']) + hyp['lrf'] # cosine + scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) + # plot_lr_scheduler(optimizer, scheduler, epochs) + + + # SyncBatchNorm + if opt.sync_bn and cuda and rank != -1: + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) + print('Using SyncBatchNorm()') + + # EMA + ema = ModelEMA(model) if rank in [-1, 0] else None + + # DDP mode + if device.type == 'cuda' and rank != -1: + model = DDP(model, device_ids=[rank], output_device=rank) + elif device.type == 'npu' and rank != -1: + model = DDP(model, device_ids=[rank], broadcast_buffers=False) + + # Trainloader + dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, + hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, + rank=rank, world_size=opt.world_size, workers=opt.workers) + mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class + nb = len(dataloader) # number of batches + assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1) + + # Model parameters + hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset + model.nc = nc # attach number of classes to model + model.hyp = hyp # attach hyperparameters to model + model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) + model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights + model.names = names + + # Process 0 + if rank in [-1, 0]: + ema.updates = start_epoch * nb // accumulate # set EMA updates + testloader = create_dataloader(test_path, imgsz_test, batch_size*2, gs, opt, + hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, + rank=-1, world_size=opt.world_size, workers=opt.workers)[0] # testloader + + if not opt.resume: + labels = np.concatenate(dataset.labels, 0) + c = torch.tensor(labels[:, 0]) # classes + # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency + # model._initialize_biases(cf.to(device)) + if plots: + plot_labels(labels, save_dir=save_dir) + if tb_writer: + tb_writer.add_histogram('classes', c, 0) + if wandb: + wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('*labels*.png')]}) + + # Anchors + # if not opt.noautoanchor: + # check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) + + + # Start training + t0 = time.time() + nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) + # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training + maps = np.zeros(nc) # mAP per class + results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) + scheduler.last_epoch = start_epoch - 1 # do not move + if rank in [0, -1]: + print('Image sizes %g train, %g test\n' + 'Using %g dataloader workers\nLogging results to %s\n' + 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, save_dir, epochs)) + + for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ + model.train() + + # Update image weights (optional) + # When in DDP mode, the generated indices will be broadcasted to synchronize dataset. + if opt.image_weights: + # Generate indices + if rank in [-1, 0]: + cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights + iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights + dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx + # Broadcast if DDP + if rank != -1: + indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() + dist.broadcast(indices, 0) + if rank != 0: + dataset.indices = indices.cpu().numpy() + + # Update mosaic border + # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) + # dataset.mosaic_border = [b - imgsz, -b] # height, width borders + + mloss = torch.zeros(4, device=device) # mean losses + if rank != -1: + dataloader.sampler.set_epoch(epoch) + optimizer.zero_grad() + start_time = time.time() + d_1 = time.time() + + for i, (imgs, targets, paths, _) in enumerate(dataloader): # batch ------------------------------------------------------------- + t_time = time.time() + d_time = t_time - d_1 + ni = i + nb * epoch # number integrated batches (since train start) + imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 + + # Warmup + if ni <= nw: + xi = [0, nw] # x interp + # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) + accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) + for j, x in enumerate(optimizer.param_groups): + # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 + x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) + if 'momentum' in x: + x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) + + # Multi-scale + if opt.multi_scale: + sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size + sf = sz / max(imgs.shape[2:]) # scale factor + if sf != 1: + ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) + imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) + + # Forward + pred = model(imgs) + + # Loss + loss, loss_items = compute_loss(pred, targets.to(device), model) # scaled by batch_size + if rank != -1: + loss *= opt.world_size # gradient averaged between devices in DDP mode + if not torch.isfinite(loss): + print('WARNING: non-finite loss, ending training ', loss_items) + return results + + # Backward + if mixed_precision: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + + # Optimize + if ni % accumulate == 0: + optimizer.step() + optimizer.zero_grad() + if ema is not None: + x = torch.tensor([1.]).to(device) + if device.type == 'npu': + params_fp32_fused = optimizer.get_model_combined_params() + ema.update(model, x, params_fp32_fused[0]) + else: + ema.update(model, x) + + if i <= 10: + sum_time = (time.time() - start_time) / (i + 1) + if i == 10: + start_time = time.time() + else: + sum_time = (time.time() - start_time) / (i - 10) + ptime = time.time() - d_1 + # Print + if rank in [-1, 0]: + mloss = (mloss * i + loss_items) / (i + 1) # update mean losses + mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0) # (GB) + s = ('%10s' * 2 + '%10.4g' * 6) % ( + '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) + print( + 'Epoch:[%2g][%4g/%4g][%s][FPS:%3.1f][mTime:%3.3f][pTime:%3.3f][dTime:%3.3f] GIoU:%.3f objectness:%.3f classfication:%.3f totalLoss:%.3f' % ( + epoch, i, nb, device, opt.total_batch_size / sum_time, sum_time, ptime, d_time, *mloss)) + + # Plot + if plots and ni < 3: + f = save_dir / f'train_batch{ni}.jpg' # filename + plot_images(images=imgs, targets=targets, paths=paths, fname=f) + # if tb_writer: + # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) + # tb_writer.add_graph(model, imgs) # add model to tensorboard + elif plots and ni == 3 and wandb: + wandb.log({"Mosaics": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg')]}) + if i > 170: + break + d_1 = time.time() + # end batch ------------------------------------------------------------------------------------------------ + # end epoch ---------------------------------------------------------------------------------------------------- + + # Scheduler + lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard + scheduler.step() + + # DDP process 0 or single-GPU + if rank in [-1, 0]: + # mAP + if ema: + ema.update_attr(model) + final_epoch = epoch + 1 == epochs + if False: # No test during training + results, maps, _ = test.test(opt.data, + batch_size=batch_size*2, + imgsz=imgsz_test, + model=ema.ema.module if hasattr(ema.ema, 'module') else ema.ema, + single_cls=opt.single_cls, + dataloader=testloader, + save_dir=save_dir, + plots=True) + + # Write + with open(results_file, 'a') as f: + f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) + if len(opt.name) and opt.bucket: + os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) + + # Log + tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss + 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', + 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss + 'x/lr0', 'x/lr1', 'x/lr2'] # params + for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): + if tb_writer: + tb_writer.add_scalar(tag, x, epoch) # tensorboard + if wandb: + wandb.log({tag: x}) # W&B + + # # Update best mAP + # fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] + # fi_p = fitness_p(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] + # fi_r = fitness_r(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] + # fi_ap50 = fitness_ap50(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] + # fi_ap = fitness_ap(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] + # if (fi_p > 0.0) or (fi_r > 0.0): + # fi_f = fitness_f(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] + # else: + # fi_f = 0.0 + # if fi > best_fitness: + # best_fitness = fi + # if fi_p > best_fitness_p: + # best_fitness_p = fi_p + # if fi_r > best_fitness_r: + # best_fitness_r = fi_r + # if fi_ap50 > best_fitness_ap50: + # best_fitness_ap50 = fi_ap50 + # if fi_ap > best_fitness_ap: + # best_fitness_ap = fi_ap + # if fi_f > best_fitness_f: + # best_fitness_f = fi_f + + # Save model + save = (not opt.nosave) or (final_epoch and not opt.evolve) + if save: + with open(results_file, 'r') as f: # create checkpoint + ckpt = {'epoch': epoch, + # 'best_fitness': best_fitness, + # 'best_fitness_p': best_fitness_p, + # 'best_fitness_r': best_fitness_r, + # 'best_fitness_ap50': best_fitness_ap50, + # 'best_fitness_ap': best_fitness_ap, + # 'best_fitness_f': best_fitness_f, + 'training_results': f.read(), + 'model': ema.ema.module.state_dict() if hasattr(ema, 'module') else ema.ema.state_dict(), + 'optimizer': None if final_epoch else optimizer.state_dict(), + 'wandb_id': wandb_run.id if wandb else None} + + # Save last, best and delete + torch.save(ckpt, last) + # if best_fitness == fi: + # torch.save(ckpt, best) + # if (best_fitness == fi) and (epoch >= 200): + # torch.save(ckpt, wdir / 'best_{:03d}.pt'.format(epoch)) + # if best_fitness == fi: + # torch.save(ckpt, wdir / 'best_overall.pt') + # if best_fitness_p == fi_p: + # torch.save(ckpt, wdir / 'best_p.pt') + # if best_fitness_r == fi_r: + # torch.save(ckpt, wdir / 'best_r.pt') + # if best_fitness_ap50 == fi_ap50: + # torch.save(ckpt, wdir / 'best_ap50.pt') + # if best_fitness_ap == fi_ap: + # torch.save(ckpt, wdir / 'best_ap.pt') + # if best_fitness_f == fi_f: + # torch.save(ckpt, wdir / 'best_f.pt') + if epoch == 0: + torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch)) + if ((epoch+1) % 25) == 0: + torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch)) + if epoch >= (epochs-5): + torch.save(ckpt, wdir / 'last_{:03d}.pt'.format(epoch)) + elif epoch >= 420: + torch.save(ckpt, wdir / 'last_{:03d}.pt'.format(epoch)) + del ckpt + # end epoch ---------------------------------------------------------------------------------------------------- + # end training + + if rank in [-1, 0]: + # Strip optimizers + n = opt.name if opt.name.isnumeric() else '' + fresults, flast, fbest = save_dir / f'results{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt' + for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file], [flast, fbest, fresults]): + if f1.exists(): + os.rename(f1, f2) # rename + if str(f2).endswith('.pt'): # is *.pt + strip_optimizer(f2) # strip optimizer + os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload + # Finish + if plots: + plot_results(save_dir=save_dir) # save as results.png + if wandb: + wandb.log({"Results": [wandb.Image(str(save_dir / x), caption=x) for x in + ['results.png', 'precision-recall_curve.png']]}) + print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) + + else: + dist.destroy_process_group() + + wandb.run.finish() if wandb and wandb.run else None + torch.cuda.empty_cache() + + return results + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--weights', type=str, default='yolor_p6.pt', help='initial weights path') + parser.add_argument('--cfg', type=str, default='', help='model.yaml path') + parser.add_argument('--data', type=str, default='data/coco.yaml', help='data.yaml path') + parser.add_argument('--hyp', type=str, default='data/hyp.scratch.1280.yaml', help='hyperparameters path') + parser.add_argument('--epochs', type=int, default=300) + parser.add_argument('--batch-size', type=int, default=8, help='total batch size for all GPUs') + parser.add_argument('--img-size', nargs='+', type=int, default=[1280, 1280], help='[train, test] image sizes') + parser.add_argument('--rect', action='store_true', help='rectangular training') + parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') + parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') + parser.add_argument('--notest', action='store_true', help='only test final epoch') + parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check') + parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters') + parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') + parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') + parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu or npu') + parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') + parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') + parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer') + parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') + parser.add_argument('--full', action='store_true', help='full mode') + parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify') + parser.add_argument('--log-imgs', type=int, default=16, help='number of images for W&B logging, max 100') + parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers') + parser.add_argument('--project', default='runs/train', help='save to project/name') + parser.add_argument('--name', default='exp', help='save to project/name') + parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') + parser.add_argument('--npu', default=-1, type=int, help='NPU id to use.') + + # NPU DDP mode + parser.add_argument('--world-size', default=1, type=int, help='number of nodes for distributed training') + parser.add_argument('--device-num', default=1, type=int, help='multi NPU parameter, GPU or CPU do not modify') + parser.add_argument('--addr', default='127.0.0.1', type=str, help='DDP master node IP') + parser.add_argument('--dist-url', default='tcp://127.0.0.1:29501', type=str, + help='url used to set up distributed training') + opt = parser.parse_args() + + if opt.dist_url == "env://": + opt.world_size = int(os.environ["WORLD_SIZE"]) + ngpus_per_node = opt.device_num + opt.npu_ddp = (opt.device_num > 1 or opt.world_size > 1) + if opt.npu_ddp: + print('multi npu training') + os.environ['MASTER_ADDR'] = opt.addr # master ip + os.environ['MASTER_PORT'] = '29501' + os.environ['KERNEL_NAME_ID'] = str(0) + opt.world_size = ngpus_per_node * opt.world_size # the sum of GPU or NPU in all the nodes + mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, opt)) + else: + print('1p training') + main_worker(opt.npu, ngpus_per_node, opt) + + # # Set DDP variables + # opt.total_batch_size = opt.batch_size + # opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1 + # opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1 + # set_logging(opt.global_rank) + # if opt.global_rank in [-1, 0]: + # check_git_status() + + # # DDP mode + # device = select_device(opt.device, batch_size=opt.batch_size) + # if opt.local_rank != -1: + # assert torch.cuda.device_count() > opt.local_rank + # torch.cuda.set_device(opt.local_rank) + # device = torch.device('cuda', opt.local_rank) + # dist.init_process_group(backend='nccl', init_method='env://') # distributed backend + # assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count' + # opt.batch_size = opt.total_batch_size // opt.world_size + + +def main_worker(npu, ngpus_per_node, opt): + # Resume + if opt.resume: # resume an interrupted run + ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path + assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist' + with open(Path(ckpt).parent.parent / 'opt.yaml') as f: + opt = argparse.Namespace(**yaml.load(f, Loader=yaml.FullLoader)) # replace + opt.cfg, opt.weights, opt.resume = '', ckpt, True + print('Resuming training from %s' % ckpt) + else: + # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml') + opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp) # check files + assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' + opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) + opt.name = 'evolve' if opt.evolve else opt.name + opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve) # increment run + + # Hyperparameters + with open(opt.hyp) as f: + hyp = yaml.load(f, Loader=yaml.FullLoader) # load hyps + if 'box' not in hyp: + warn('Compatibility: %s missing "box" which was renamed from "giou" in %s' % + (opt.hyp, 'https://github.com/ultralytics/yolov5/pull/1120')) + hyp['box'] = hyp.pop('giou') + + # npu DDP + if opt.npu_ddp: + opt.npu = npu + os.environ['KERNEL_NAME_ID'] = str(npu) + print("[npu id:", opt.npu, "]", "+++++++++++++++++++++++++++KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID']) + opt.local_rank = opt.local_rank * ngpus_per_node + npu + global mixed_precision + device = torch_utils.select_device(opt.device, opt.npu, apex=mixed_precision, batch_size=opt.batch_size) + opt.total_batch_size = opt.batch_size + if device.type == 'cpu': + mixed_precision = False + elif opt.local_rank != -1 and device.type == 'cuda': + # DDP mode + assert torch.cuda.device_count() > opt.local_rank + torch.cuda.set_device(opt.local_rank) + device = torch.device("cuda", opt.local_rank) + dist.init_process_group(backend='nccl', init_method='env://') # distributed backend + + opt.world_size = dist.get_world_size() + assert opt.batch_size % opt.world_size == 0, "Batch size is not a multiple of the number of devices given!" + opt.batch_size = opt.total_batch_size // opt.world_size + elif opt.local_rank != -1 and device.type == 'npu': + dist.init_process_group(backend='hccl', world_size=opt.world_size, rank=opt.local_rank) + assert opt.batch_size % opt.world_size == 0, "Batch size is not a multiple of the number of devices given!" + opt.batch_size = opt.total_batch_size // opt.world_size + + # Train + print(opt) + if not opt.evolve: + tb_writer = None # init loggers + if opt.local_rank in [-1, 0]: + print(f'Start Tensorboard with "tensorboard --logdir {opt.project}", view at http://localhost:6006/') + tb_writer = SummaryWriter(opt.save_dir) # Tensorboard + train(hyp, opt, device, tb_writer, wandb) + + # Evolve hyperparameters (optional) + else: + # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) + meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) + 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) + 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 + 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay + 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) + 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum + 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr + 'box': (1, 0.02, 0.2), # box loss gain + 'cls': (1, 0.2, 4.0), # cls loss gain + 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight + 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) + 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight + 'iou_t': (0, 0.1, 0.7), # IoU training threshold + 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold + 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) + 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) + 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) + 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) + 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) + 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) + 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) + 'scale': (1, 0.0, 0.9), # image scale (+/- gain) + 'shear': (1, 0.0, 10.0), # image shear (+/- deg) + 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 + 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) + 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) + 'mosaic': (1, 0.0, 1.0), # image mixup (probability) + 'mixup': (1, 0.0, 1.0)} # image mixup (probability) + + assert opt.local_rank == -1, 'DDP mode not implemented for --evolve' + opt.notest, opt.nosave = True, True # only test/save final epoch + # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices + yaml_file = Path(opt.save_dir) / 'hyp_evolved.yaml' # save best result here + if opt.bucket: + os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists + + for _ in range(300): # generations to evolve + if Path('evolve.txt').exists(): # if evolve.txt exists: select best hyps and mutate + # Select parent(s) + parent = 'single' # parent selection method: 'single' or 'weighted' + x = np.loadtxt('evolve.txt', ndmin=2) + n = min(5, len(x)) # number of previous results to consider + x = x[np.argsort(-fitness(x))][:n] # top n mutations + w = fitness(x) - fitness(x).min() # weights + if parent == 'single' or len(x) == 1: + # x = x[random.randint(0, n - 1)] # random selection + x = x[random.choices(range(n), weights=w)[0]] # weighted selection + elif parent == 'weighted': + x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination + + # Mutate + mp, s = 0.8, 0.2 # mutation probability, sigma + npr = np.random + npr.seed(int(time.time())) + g = np.array([x[0] for x in meta.values()]) # gains 0-1 + ng = len(meta) + v = np.ones(ng) + while all(v == 1): # mutate until a change occurs (prevent duplicates) + v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) + for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) + hyp[k] = float(x[i + 7] * v[i]) # mutate + + # Constrain to limits + for k, v in meta.items(): + hyp[k] = max(hyp[k], v[1]) # lower limit + hyp[k] = min(hyp[k], v[2]) # upper limit + hyp[k] = round(hyp[k], 5) # significant digits + + # Train mutation + results = train(hyp.copy(), opt, device, wandb=wandb) + + # Write mutation results + print_mutation(hyp.copy(), results, yaml_file, opt.bucket) + + # Plot results + plot_evolution(yaml_file) + print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n' + f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}') + + +if __name__ == '__main__': + # option = {} + # option["ACL_OP_DEBUG_LEVEL"] = 3 # 算子debug功能,暂不开启 + # option["ACL_DEBUG_DIR"] = "debug_file" # 算子debug功能对应文件夹,暂不开启 + # option["ACL_OP_COMPILER_CACHE_MODE"] = "enable" # cache功能启用 + # option["ACL_OP_COMPILER_CACHE_DIR"] = "./kernel_meta" # cache所在文件夹 + # print("option:",option) + # torch.npu.set_option(option) + main() diff --git a/PyTorch/contrib/cv/detection/YOLOR/utils/__init__.py b/PyTorch/contrib/cv/detection/YOLOR/utils/__init__.py index d3f5a12faa99758192ecc4ed3fc22c9249232e86..8b137891791fe96927ad78e64b0aad7bded08bdc 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/utils/__init__.py +++ b/PyTorch/contrib/cv/detection/YOLOR/utils/__init__.py @@ -1 +1 @@ - + diff --git a/PyTorch/contrib/cv/detection/YOLOR/utils/activations.py b/PyTorch/contrib/cv/detection/YOLOR/utils/activations.py index 8b64eae41c5ac8763562785deabf7947d6248457..8bf1e62efbd623a784f99191cb9577fbe2541f5a 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/utils/activations.py +++ b/PyTorch/contrib/cv/detection/YOLOR/utils/activations.py @@ -1,85 +1,85 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Activation functions - -import torch -import torch.nn as nn -import torch.nn.functional as F - -# Swish https://arxiv.org/pdf/1905.02244.pdf --------------------------------------------------------------------------- -class Swish(nn.Module): # - @staticmethod - def forward(x): - return x * torch.sigmoid(x) - - -class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() - @staticmethod - def forward(x): - # return x * F.hardsigmoid(x) # for torchscript and CoreML - return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX - - -class MemoryEfficientSwish(nn.Module): - class F(torch.autograd.Function): - @staticmethod - def forward(ctx, x): - ctx.save_for_backward(x) - return x * torch.sigmoid(x) - - @staticmethod - def backward(ctx, grad_output): - x = ctx.saved_tensors[0] - sx = torch.sigmoid(x) - return grad_output * (sx * (1 + x * (1 - sx))) - - def forward(self, x): - return self.F.apply(x) - - -# Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- -class Mish(nn.Module): - @staticmethod - def forward(x): - return x * F.softplus(x).tanh() - - -class MemoryEfficientMish(nn.Module): - class F(torch.autograd.Function): - @staticmethod - def forward(ctx, x): - ctx.save_for_backward(x) - return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) - - @staticmethod - def backward(ctx, grad_output): - x = ctx.saved_tensors[0] - sx = torch.sigmoid(x) - fx = F.softplus(x).tanh() - return grad_output * (fx + x * sx * (1 - fx * fx)) - - def forward(self, x): - return self.F.apply(x) - - -# FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- -class FReLU(nn.Module): - def __init__(self, c1, k=3): # ch_in, kernel - super().__init__() - self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1) - self.bn = nn.BatchNorm2d(c1) - - def forward(self, x): - return torch.max(x, self.bn(self.conv(x))) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Activation functions + +import torch +import torch.nn as nn +import torch.nn.functional as F + +# Swish https://arxiv.org/pdf/1905.02244.pdf --------------------------------------------------------------------------- +class Swish(nn.Module): # + @staticmethod + def forward(x): + return x * torch.sigmoid(x) + + +class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() + @staticmethod + def forward(x): + # return x * F.hardsigmoid(x) # for torchscript and CoreML + return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX + + +class MemoryEfficientSwish(nn.Module): + class F(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return x * torch.sigmoid(x) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + sx = torch.sigmoid(x) + return grad_output * (sx * (1 + x * (1 - sx))) + + def forward(self, x): + return self.F.apply(x) + + +# Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- +class Mish(nn.Module): + @staticmethod + def forward(x): + return x * F.softplus(x).tanh() + + +class MemoryEfficientMish(nn.Module): + class F(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + sx = torch.sigmoid(x) + fx = F.softplus(x).tanh() + return grad_output * (fx + x * sx * (1 - fx * fx)) + + def forward(self, x): + return self.F.apply(x) + + +# FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- +class FReLU(nn.Module): + def __init__(self, c1, k=3): # ch_in, kernel + super().__init__() + self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1) + self.bn = nn.BatchNorm2d(c1) + + def forward(self, x): + return torch.max(x, self.bn(self.conv(x))) diff --git a/PyTorch/contrib/cv/detection/YOLOR/utils/autoanchor.py b/PyTorch/contrib/cv/detection/YOLOR/utils/autoanchor.py index 2fd6d56da54e6fc7072b337a75236ce7105043e8..709b00f4b1e9e6b3bb9dfc93962fa60ec68ad56f 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/utils/autoanchor.py +++ b/PyTorch/contrib/cv/detection/YOLOR/utils/autoanchor.py @@ -1,166 +1,166 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Auto-anchor utils - -import numpy as np -import torch -import yaml -from scipy.cluster.vq import kmeans -from tqdm import tqdm - - -def check_anchor_order(m): - # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary - a = m.anchor_grid.prod(-1).view(-1) # anchor area - da = a[-1] - a[0] # delta a - ds = m.stride[-1] - m.stride[0] # delta s - if da.sign() != ds.sign(): # same order - print('Reversing anchor order') - m.anchors[:] = m.anchors.flip(0) - m.anchor_grid[:] = m.anchor_grid.flip(0) - - -def check_anchors(dataset, model, thr=4.0, imgsz=640): - # Check anchor fit to data, recompute if necessary - print('\nAnalyzing anchors... ', end='') - m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() - shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) - scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale - wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh - - def metric(k): # compute metric - r = wh[:, None] / k[None] - x = torch.min(r, 1. / r).min(2)[0] # ratio metric - best = x.max(1)[0] # best_x - aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold - bpr = (best > 1. / thr).float().mean() # best possible recall - return bpr, aat - - bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2)) - print('anchors/target = %.2f, Best Possible Recall (BPR) = %.4f' % (aat, bpr), end='') - if bpr < 0.98: # threshold to recompute - print('. Attempting to improve anchors, please wait...') - na = m.anchor_grid.numel() // 2 # number of anchors - new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) - new_bpr = metric(new_anchors.reshape(-1, 2))[0] - if new_bpr > bpr: # replace anchors - new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors) - m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) # for inference - m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss - check_anchor_order(m) - print('New anchors saved to model. Update model *.yaml to use these anchors in the future.') - else: - print('Original anchors better than new anchors. Proceeding with original anchors.') - print('') # newline - - -def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): - """ Creates kmeans-evolved anchors from training dataset - - Arguments: - path: path to dataset *.yaml, or a loaded dataset - n: number of anchors - img_size: image size used for training - thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 - gen: generations to evolve anchors using genetic algorithm - verbose: print all results - - Return: - k: kmeans evolved anchors - - Usage: - from utils.general import *; _ = kmean_anchors() - """ - thr = 1. / thr - - def metric(k, wh): # compute metrics - r = wh[:, None] / k[None] - x = torch.min(r, 1. / r).min(2)[0] # ratio metric - # x = wh_iou(wh, torch.tensor(k)) # iou metric - return x, x.max(1)[0] # x, best_x - - def anchor_fitness(k): # mutation fitness - _, best = metric(torch.tensor(k, dtype=torch.float32), wh) - return (best * (best > thr).float()).mean() # fitness - - def print_results(k): - k = k[np.argsort(k.prod(1))] # sort small to large - x, best = metric(k, wh0) - bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr - print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat)) - print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' % - (n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='') - for i, x in enumerate(k): - print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg - return k - - if isinstance(path, str): # *.yaml file - with open(path) as f: - data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict - from utils.datasets import LoadImagesAndLabels - dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) - else: - dataset = path # dataset - - # Get label wh - shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) - wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh - - # Filter - i = (wh0 < 3.0).any(1).sum() - if i: - print('WARNING: Extremely small objects found. ' - '%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0))) - wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels - - # Kmeans calculation - print('Running kmeans for %g anchors on %g points...' % (n, len(wh))) - s = wh.std(0) # sigmas for whitening - k, dist = kmeans(wh / s, n, iter=30) # points, mean distance - k *= s - wh = torch.tensor(wh, dtype=torch.float32) # filtered - wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered - k = print_results(k) - - # Plot - # k, d = [None] * 20, [None] * 20 - # for i in tqdm(range(1, 21)): - # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance - # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) - # ax = ax.ravel() - # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') - # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh - # ax[0].hist(wh[wh[:, 0]<100, 0],400) - # ax[1].hist(wh[wh[:, 1]<100, 1],400) - # fig.tight_layout() - # fig.savefig('wh.png', dpi=200) - - # Evolve - npr = np.random - f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma - pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm') # progress bar - for _ in pbar: - v = np.ones(sh) - while (v == 1).all(): # mutate until a change occurs (prevent duplicates) - v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) - kg = (k.copy() * v).clip(min=2.0) - fg = anchor_fitness(kg) - if fg > f: - f, k = fg, kg.copy() - pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f - if verbose: - print_results(k) - - return print_results(k) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Auto-anchor utils + +import numpy as np +import torch +import yaml +from scipy.cluster.vq import kmeans +from tqdm import tqdm + + +def check_anchor_order(m): + # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary + a = m.anchor_grid.prod(-1).view(-1) # anchor area + da = a[-1] - a[0] # delta a + ds = m.stride[-1] - m.stride[0] # delta s + if da.sign() != ds.sign(): # same order + print('Reversing anchor order') + m.anchors[:] = m.anchors.flip(0) + m.anchor_grid[:] = m.anchor_grid.flip(0) + + +def check_anchors(dataset, model, thr=4.0, imgsz=640): + # Check anchor fit to data, recompute if necessary + print('\nAnalyzing anchors... ', end='') + m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() + shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) + scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale + wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh + + def metric(k): # compute metric + r = wh[:, None] / k[None] + x = torch.min(r, 1. / r).min(2)[0] # ratio metric + best = x.max(1)[0] # best_x + aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold + bpr = (best > 1. / thr).float().mean() # best possible recall + return bpr, aat + + bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2)) + print('anchors/target = %.2f, Best Possible Recall (BPR) = %.4f' % (aat, bpr), end='') + if bpr < 0.98: # threshold to recompute + print('. Attempting to improve anchors, please wait...') + na = m.anchor_grid.numel() // 2 # number of anchors + new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) + new_bpr = metric(new_anchors.reshape(-1, 2))[0] + if new_bpr > bpr: # replace anchors + new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors) + m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) # for inference + m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss + check_anchor_order(m) + print('New anchors saved to model. Update model *.yaml to use these anchors in the future.') + else: + print('Original anchors better than new anchors. Proceeding with original anchors.') + print('') # newline + + +def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): + """ Creates kmeans-evolved anchors from training dataset + + Arguments: + path: path to dataset *.yaml, or a loaded dataset + n: number of anchors + img_size: image size used for training + thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 + gen: generations to evolve anchors using genetic algorithm + verbose: print all results + + Return: + k: kmeans evolved anchors + + Usage: + from utils.general import *; _ = kmean_anchors() + """ + thr = 1. / thr + + def metric(k, wh): # compute metrics + r = wh[:, None] / k[None] + x = torch.min(r, 1. / r).min(2)[0] # ratio metric + # x = wh_iou(wh, torch.tensor(k)) # iou metric + return x, x.max(1)[0] # x, best_x + + def anchor_fitness(k): # mutation fitness + _, best = metric(torch.tensor(k, dtype=torch.float32), wh) + return (best * (best > thr).float()).mean() # fitness + + def print_results(k): + k = k[np.argsort(k.prod(1))] # sort small to large + x, best = metric(k, wh0) + bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr + print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat)) + print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' % + (n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='') + for i, x in enumerate(k): + print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg + return k + + if isinstance(path, str): # *.yaml file + with open(path) as f: + data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict + from utils.datasets import LoadImagesAndLabels + dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) + else: + dataset = path # dataset + + # Get label wh + shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) + wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh + + # Filter + i = (wh0 < 3.0).any(1).sum() + if i: + print('WARNING: Extremely small objects found. ' + '%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0))) + wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels + + # Kmeans calculation + print('Running kmeans for %g anchors on %g points...' % (n, len(wh))) + s = wh.std(0) # sigmas for whitening + k, dist = kmeans(wh / s, n, iter=30) # points, mean distance + k *= s + wh = torch.tensor(wh, dtype=torch.float32) # filtered + wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered + k = print_results(k) + + # Plot + # k, d = [None] * 20, [None] * 20 + # for i in tqdm(range(1, 21)): + # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance + # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) + # ax = ax.ravel() + # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') + # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh + # ax[0].hist(wh[wh[:, 0]<100, 0],400) + # ax[1].hist(wh[wh[:, 1]<100, 1],400) + # fig.tight_layout() + # fig.savefig('wh.png', dpi=200) + + # Evolve + npr = np.random + f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma + pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm') # progress bar + for _ in pbar: + v = np.ones(sh) + while (v == 1).all(): # mutate until a change occurs (prevent duplicates) + v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) + kg = (k.copy() * v).clip(min=2.0) + fg = anchor_fitness(kg) + if fg > f: + f, k = fg, kg.copy() + pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f + if verbose: + print_results(k) + + return print_results(k) diff --git a/PyTorch/contrib/cv/detection/YOLOR/utils/datasets.py b/PyTorch/contrib/cv/detection/YOLOR/utils/datasets.py index 8b9be1318b11a9df6d296db65f848da03d5ab3f2..35b509d37e4c88f7e707a69459fffea3e5b5033d 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/utils/datasets.py +++ b/PyTorch/contrib/cv/detection/YOLOR/utils/datasets.py @@ -1,1311 +1,1311 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Dataset utils and dataloaders - -import glob -import math -import os -import random -import shutil -import time -from itertools import repeat -from multiprocessing.pool import ThreadPool -from pathlib import Path -from threading import Thread - -import cv2 -import numpy as np -import torch -from PIL import Image, ExifTags -from torch.utils.data import Dataset -from tqdm import tqdm - -import pickle -from copy import deepcopy -from pycocotools import mask as maskUtils -from torchvision.utils import save_image - -from utils.general import xyxy2xywh, xywh2xyxy -from utils.torch_utils import torch_distributed_zero_first - -# Parameters -help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' -img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes -vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes - -# Get orientation exif tag -for orientation in ExifTags.TAGS.keys(): - if ExifTags.TAGS[orientation] == 'Orientation': - break - - -def get_hash(files): - # Returns a single hash value of a list of files - return sum(os.path.getsize(f) for f in files if os.path.isfile(f)) - - -def exif_size(img): - # Returns exif-corrected PIL size - s = img.size # (width, height) - try: - rotation = dict(img._getexif().items())[orientation] - if rotation == 6: # rotation 270 - s = (s[1], s[0]) - elif rotation == 8: # rotation 90 - s = (s[1], s[0]) - except: - pass - - return s - - -def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False, - rank=-1, world_size=1, workers=8): - # Make sure only the first process in DDP process the dataset first, and the following others can use the cache - with torch_distributed_zero_first(rank): - dataset = LoadImagesAndLabels(path, imgsz, batch_size, - augment=augment, # augment images - hyp=hyp, # augmentation hyperparameters - rect=rect, # rectangular training - cache_images=cache, - single_cls=opt.single_cls, - stride=int(stride), - pad=pad, - rank=rank) - - batch_size = min(batch_size, len(dataset)) - nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers - sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None - dataloader = InfiniteDataLoader(dataset, - batch_size=batch_size, - num_workers=nw, - sampler=sampler, - pin_memory=True, - collate_fn=LoadImagesAndLabels.collate_fn) # torch.utils.data.DataLoader() - return dataloader, dataset - - -def create_dataloader9(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False, - rank=-1, world_size=1, workers=8): - # Make sure only the first process in DDP process the dataset first, and the following others can use the cache - with torch_distributed_zero_first(rank): - dataset = LoadImagesAndLabels9(path, imgsz, batch_size, - augment=augment, # augment images - hyp=hyp, # augmentation hyperparameters - rect=rect, # rectangular training - cache_images=cache, - single_cls=opt.single_cls, - stride=int(stride), - pad=pad, - rank=rank) - - batch_size = min(batch_size, len(dataset)) - nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers - sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None - dataloader = InfiniteDataLoader(dataset, - batch_size=batch_size, - num_workers=nw, - sampler=sampler, - pin_memory=True, - collate_fn=LoadImagesAndLabels9.collate_fn) # torch.utils.data.DataLoader() - return dataloader, dataset - - -class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader): - """ Dataloader that reuses workers - - Uses same syntax as vanilla DataLoader - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler)) - self.iterator = super().__iter__() - - def __len__(self): - return len(self.batch_sampler.sampler) - - def __iter__(self): - for i in range(len(self)): - yield next(self.iterator) - - -class _RepeatSampler(object): - """ Sampler that repeats forever - - Args: - sampler (Sampler) - """ - - def __init__(self, sampler): - self.sampler = sampler - - def __iter__(self): - while True: - yield from iter(self.sampler) - - -class LoadImages: # for inference - def __init__(self, path, img_size=640, auto_size=32): - p = str(Path(path)) # os-agnostic - p = os.path.abspath(p) # absolute path - if '*' in p: - files = sorted(glob.glob(p, recursive=True)) # glob - elif os.path.isdir(p): - files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir - elif os.path.isfile(p): - files = [p] # files - else: - raise Exception('ERROR: %s does not exist' % p) - - images = [x for x in files if x.split('.')[-1].lower() in img_formats] - videos = [x for x in files if x.split('.')[-1].lower() in vid_formats] - ni, nv = len(images), len(videos) - - self.img_size = img_size - self.auto_size = auto_size - self.files = images + videos - self.nf = ni + nv # number of files - self.video_flag = [False] * ni + [True] * nv - self.mode = 'images' - if any(videos): - self.new_video(videos[0]) # new video - else: - self.cap = None - assert self.nf > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \ - (p, img_formats, vid_formats) - - def __iter__(self): - self.count = 0 - return self - - def __next__(self): - if self.count == self.nf: - raise StopIteration - path = self.files[self.count] - - if self.video_flag[self.count]: - # Read video - self.mode = 'video' - ret_val, img0 = self.cap.read() - if not ret_val: - self.count += 1 - self.cap.release() - if self.count == self.nf: # last video - raise StopIteration - else: - path = self.files[self.count] - self.new_video(path) - ret_val, img0 = self.cap.read() - - self.frame += 1 - print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nf, self.frame, self.nframes, path), end='') - - else: - # Read image - self.count += 1 - img0 = cv2.imread(path) # BGR - assert img0 is not None, 'Image Not Found ' + path - print('image %g/%g %s: ' % (self.count, self.nf, path), end='') - - # Padded resize - img = letterbox(img0, new_shape=self.img_size, auto_size=self.auto_size)[0] - - # Convert - img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 - img = np.ascontiguousarray(img) - - return path, img, img0, self.cap - - def new_video(self, path): - self.frame = 0 - self.cap = cv2.VideoCapture(path) - self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) - - def __len__(self): - return self.nf # number of files - - -class LoadWebcam: # for inference - def __init__(self, pipe='0', img_size=640): - self.img_size = img_size - - if pipe.isnumeric(): - pipe = eval(pipe) # local camera - # pipe = 'rtsp://192.168.1.64/1' # IP camera - # pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login - # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera - - self.pipe = pipe - self.cap = cv2.VideoCapture(pipe) # video capture object - self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size - - def __iter__(self): - self.count = -1 - return self - - def __next__(self): - self.count += 1 - if cv2.waitKey(1) == ord('q'): # q to quit - self.cap.release() - cv2.destroyAllWindows() - raise StopIteration - - # Read frame - if self.pipe == 0: # local camera - ret_val, img0 = self.cap.read() - img0 = cv2.flip(img0, 1) # flip left-right - else: # IP camera - n = 0 - while True: - n += 1 - self.cap.grab() - if n % 30 == 0: # skip frames - ret_val, img0 = self.cap.retrieve() - if ret_val: - break - - # Print - assert ret_val, 'Camera Error %s' % self.pipe - img_path = 'webcam.jpg' - print('webcam %g: ' % self.count, end='') - - # Padded resize - img = letterbox(img0, new_shape=self.img_size)[0] - - # Convert - img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 - img = np.ascontiguousarray(img) - - return img_path, img, img0, None - - def __len__(self): - return 0 - - -class LoadStreams: # multiple IP or RTSP cameras - def __init__(self, sources='streams.txt', img_size=640): - self.mode = 'images' - self.img_size = img_size - - if os.path.isfile(sources): - with open(sources, 'r') as f: - sources = [x.strip() for x in f.read().splitlines() if len(x.strip())] - else: - sources = [sources] - - n = len(sources) - self.imgs = [None] * n - self.sources = sources - for i, s in enumerate(sources): - # Start the thread to read frames from the video stream - print('%g/%g: %s... ' % (i + 1, n, s), end='') - cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s) - assert cap.isOpened(), 'Failed to open %s' % s - w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = cap.get(cv2.CAP_PROP_FPS) % 100 - _, self.imgs[i] = cap.read() # guarantee first frame - thread = Thread(target=self.update, args=([i, cap]), daemon=True) - print(' success (%gx%g at %.2f FPS).' % (w, h, fps)) - thread.start() - print('') # newline - - # check for common shapes - s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes - self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal - if not self.rect: - print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.') - - def update(self, index, cap): - # Read next stream frame in a daemon thread - n = 0 - while cap.isOpened(): - n += 1 - # _, self.imgs[index] = cap.read() - cap.grab() - if n == 4: # read every 4th frame - _, self.imgs[index] = cap.retrieve() - n = 0 - time.sleep(0.01) # wait time - - def __iter__(self): - self.count = -1 - return self - - def __next__(self): - self.count += 1 - img0 = self.imgs.copy() - if cv2.waitKey(1) == ord('q'): # q to quit - cv2.destroyAllWindows() - raise StopIteration - - # Letterbox - img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0] - - # Stack - img = np.stack(img, 0) - - # Convert - img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 - img = np.ascontiguousarray(img) - - return self.sources, img, img0, None - - def __len__(self): - return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years - - -class LoadImagesAndLabels(Dataset): # for training/testing - def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, - cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1): - self.img_size = img_size - self.augment = augment - self.hyp = hyp - self.image_weights = image_weights - self.rect = False if image_weights else rect - self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) - self.mosaic_border = [-img_size // 2, -img_size // 2] - self.stride = stride - - def img2label_paths(img_paths): - # Define label paths as a function of image paths - sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings - return [x.replace(sa, sb, 1).replace(x.split('.')[-1], 'txt') for x in img_paths] - - try: - f = [] # image files - for p in path if isinstance(path, list) else [path]: - p = Path(p) # os-agnostic - if p.is_dir(): # dir - f += glob.glob(str(p / '**' / '*.*'), recursive=True) - elif p.is_file(): # file - with open(p, 'r') as t: - t = t.read().splitlines() - parent = str(p.parent) + os.sep - f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path - else: - raise Exception('%s does not exist' % p) - self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats]) - assert self.img_files, 'No images found' - except Exception as e: - raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url)) - - # Check cache - self.label_files = img2label_paths(self.img_files) # labels - cache_path = str(Path(self.label_files[0]).parent) + '.cache3' # cached labels - if os.path.isfile(cache_path): - cache = torch.load(cache_path) # load - if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed - cache = self.cache_labels(cache_path) # re-cache - else: - cache = self.cache_labels(cache_path) # cache - - # Read cache - cache.pop('hash') # remove hash - labels, shapes = zip(*cache.values()) - self.labels = list(labels) - self.shapes = np.array(shapes, dtype=np.float64) - self.img_files = list(cache.keys()) # update - self.label_files = img2label_paths(cache.keys()) # update - - n = len(shapes) # number of images - bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index - nb = bi[-1] + 1 # number of batches - self.batch = bi # batch index of image - self.n = n - - # Rectangular Training - if self.rect: - # Sort by aspect ratio - s = self.shapes # wh - ar = s[:, 1] / s[:, 0] # aspect ratio - irect = ar.argsort() - self.img_files = [self.img_files[i] for i in irect] - self.label_files = [self.label_files[i] for i in irect] - self.labels = [self.labels[i] for i in irect] - self.shapes = s[irect] # wh - ar = ar[irect] - - # Set training image shapes - shapes = [[1, 1]] * nb - for i in range(nb): - ari = ar[bi == i] - mini, maxi = ari.min(), ari.max() - if maxi < 1: - shapes[i] = [maxi, 1] - elif mini > 1: - shapes[i] = [1, 1 / mini] - - self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride - - # Check labels - create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False - nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate - pbar = enumerate(self.label_files) - if rank in [-1, 0]: - pbar = tqdm(pbar) - for i, file in pbar: - l = self.labels[i] # label - if l is not None and l.shape[0]: - assert l.shape[1] == 5, '> 5 label columns: %s' % file - assert (l >= 0).all(), 'negative labels: %s' % file - assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file - if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows - nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows - if single_cls: - l[:, 0] = 0 # force dataset into single-class mode - self.labels[i] = l - nf += 1 # file found - - # Create subdataset (a smaller dataset) - if create_datasubset and ns < 1E4: - if ns == 0: - create_folder(path='./datasubset') - os.makedirs('./datasubset/images') - exclude_classes = 43 - if exclude_classes not in l[:, 0]: - ns += 1 - # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image - with open('./datasubset/images.txt', 'a') as f: - f.write(self.img_files[i] + '\n') - - # Extract object detection boxes for a second stage classifier - if extract_bounding_boxes: - p = Path(self.img_files[i]) - img = cv2.imread(str(p)) - h, w = img.shape[:2] - for j, x in enumerate(l): - f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) - if not os.path.exists(Path(f).parent): - os.makedirs(Path(f).parent) # make new output folder - - b = x[1:] * [w, h, w, h] # box - b[2:] = b[2:].max() # rectangle to square - b[2:] = b[2:] * 1.3 + 30 # pad - b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) - - b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image - b[[1, 3]] = np.clip(b[[1, 3]], 0, h) - assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes' - else: - ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty - # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove - - if rank in [-1, 0]: - pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % ( - cache_path, nf, nm, ne, nd, n) - if nf == 0: - s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url) - print(s) - assert not augment, '%s. Can not train without labels.' % s - - # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) - self.imgs = [None] * n - if cache_images: - gb = 0 # Gigabytes of cached images - self.img_hw0, self.img_hw = [None] * n, [None] * n - results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads - pbar = tqdm(enumerate(results), total=n) - for i, x in pbar: - self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i) - gb += self.imgs[i].nbytes - pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) - - def cache_labels(self, path='labels.cache3'): - # Cache dataset labels, check images and read shapes - x = {} # dict - pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files)) - for (img, label) in pbar: - try: - l = [] - im = Image.open(img) - im.verify() # PIL verify - shape = exif_size(im) # image size - assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels' - if os.path.isfile(label): - with open(label, 'r') as f: - l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels - if len(l) == 0: - l = np.zeros((0, 5), dtype=np.float32) - x[img] = [l, shape] - except Exception as e: - print('WARNING: Ignoring corrupted image and/or label %s: %s' % (img, e)) - - x['hash'] = get_hash(self.label_files + self.img_files) - torch.save(x, path) # save for next time - return x - - def __len__(self): - return len(self.img_files) - - # def __iter__(self): - # self.count = -1 - # print('ran dataset iter') - # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) - # return self - - def __getitem__(self, index): - if self.image_weights: - index = self.indices[index] - - hyp = self.hyp - mosaic = self.mosaic and random.random() < hyp['mosaic'] - if mosaic: - # Load mosaic - img, labels = load_mosaic(self, index) - #img, labels = load_mosaic9(self, index) - shapes = None - - # MixUp https://arxiv.org/pdf/1710.09412.pdf - if random.random() < hyp['mixup']: - img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1)) - #img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1)) - r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0 - img = (img * r + img2 * (1 - r)).astype(np.uint8) - labels = np.concatenate((labels, labels2), 0) - - else: - # Load image - img, (h0, w0), (h, w) = load_image(self, index) - - # Letterbox - shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape - img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) - shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling - - # Load labels - labels = [] - x = self.labels[index] - if x.size > 0: - # Normalized xywh to pixel xyxy format - labels = x.copy() - labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width - labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height - labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] - labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] - - if self.augment: - # Augment imagespace - if not mosaic: - img, labels = random_perspective(img, labels, - degrees=hyp['degrees'], - translate=hyp['translate'], - scale=hyp['scale'], - shear=hyp['shear'], - perspective=hyp['perspective']) - - # Augment colorspace - augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) - - # Apply cutouts - # if random.random() < 0.9: - # labels = cutout(img, labels) - - nL = len(labels) # number of labels - if nL: - labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh - labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1 - labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1 - - if self.augment: - # flip up-down - if random.random() < hyp['flipud']: - img = np.flipud(img) - if nL: - labels[:, 2] = 1 - labels[:, 2] - - # flip left-right - if random.random() < hyp['fliplr']: - img = np.fliplr(img) - if nL: - labels[:, 1] = 1 - labels[:, 1] - - labels_out = torch.zeros((nL, 6)) - if nL: - labels_out[:, 1:] = torch.from_numpy(labels) - - # Convert - img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 - img = np.ascontiguousarray(img) - - return torch.from_numpy(img), labels_out, self.img_files[index], shapes - - @staticmethod - def collate_fn(batch): - img, label, path, shapes = zip(*batch) # transposed - for i, l in enumerate(label): - l[:, 0] = i # add target image index for build_targets() - return torch.stack(img, 0), torch.cat(label, 0), path, shapes - - -class LoadImagesAndLabels9(Dataset): # for training/testing - def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, - cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1): - self.img_size = img_size - self.augment = augment - self.hyp = hyp - self.image_weights = image_weights - self.rect = False if image_weights else rect - self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) - self.mosaic_border = [-img_size // 2, -img_size // 2] - self.stride = stride - - def img2label_paths(img_paths): - # Define label paths as a function of image paths - sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings - return [x.replace(sa, sb, 1).replace(x.split('.')[-1], 'txt') for x in img_paths] - - try: - f = [] # image files - for p in path if isinstance(path, list) else [path]: - p = Path(p) # os-agnostic - if p.is_dir(): # dir - f += glob.glob(str(p / '**' / '*.*'), recursive=True) - elif p.is_file(): # file - with open(p, 'r') as t: - t = t.read().splitlines() - parent = str(p.parent) + os.sep - f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path - else: - raise Exception('%s does not exist' % p) - self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats]) - assert self.img_files, 'No images found' - except Exception as e: - raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url)) - - # Check cache - self.label_files = img2label_paths(self.img_files) # labels - cache_path = str(Path(self.label_files[0]).parent) + '.cache3' # cached labels - if os.path.isfile(cache_path): - cache = torch.load(cache_path) # load - if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed - cache = self.cache_labels(cache_path) # re-cache - else: - cache = self.cache_labels(cache_path) # cache - - # Read cache - cache.pop('hash') # remove hash - labels, shapes = zip(*cache.values()) - self.labels = list(labels) - self.shapes = np.array(shapes, dtype=np.float64) - self.img_files = list(cache.keys()) # update - self.label_files = img2label_paths(cache.keys()) # update - - n = len(shapes) # number of images - bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index - nb = bi[-1] + 1 # number of batches - self.batch = bi # batch index of image - self.n = n - - # Rectangular Training - if self.rect: - # Sort by aspect ratio - s = self.shapes # wh - ar = s[:, 1] / s[:, 0] # aspect ratio - irect = ar.argsort() - self.img_files = [self.img_files[i] for i in irect] - self.label_files = [self.label_files[i] for i in irect] - self.labels = [self.labels[i] for i in irect] - self.shapes = s[irect] # wh - ar = ar[irect] - - # Set training image shapes - shapes = [[1, 1]] * nb - for i in range(nb): - ari = ar[bi == i] - mini, maxi = ari.min(), ari.max() - if maxi < 1: - shapes[i] = [maxi, 1] - elif mini > 1: - shapes[i] = [1, 1 / mini] - - self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride - - # Check labels - create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False - nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate - pbar = enumerate(self.label_files) - if rank in [-1, 0]: - pbar = tqdm(pbar) - for i, file in pbar: - l = self.labels[i] # label - if l is not None and l.shape[0]: - assert l.shape[1] == 5, '> 5 label columns: %s' % file - assert (l >= 0).all(), 'negative labels: %s' % file - assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file - if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows - nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows - if single_cls: - l[:, 0] = 0 # force dataset into single-class mode - self.labels[i] = l - nf += 1 # file found - - # Create subdataset (a smaller dataset) - if create_datasubset and ns < 1E4: - if ns == 0: - create_folder(path='./datasubset') - os.makedirs('./datasubset/images') - exclude_classes = 43 - if exclude_classes not in l[:, 0]: - ns += 1 - # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image - with open('./datasubset/images.txt', 'a') as f: - f.write(self.img_files[i] + '\n') - - # Extract object detection boxes for a second stage classifier - if extract_bounding_boxes: - p = Path(self.img_files[i]) - img = cv2.imread(str(p)) - h, w = img.shape[:2] - for j, x in enumerate(l): - f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) - if not os.path.exists(Path(f).parent): - os.makedirs(Path(f).parent) # make new output folder - - b = x[1:] * [w, h, w, h] # box - b[2:] = b[2:].max() # rectangle to square - b[2:] = b[2:] * 1.3 + 30 # pad - b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) - - b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image - b[[1, 3]] = np.clip(b[[1, 3]], 0, h) - assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes' - else: - ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty - # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove - - if rank in [-1, 0]: - pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % ( - cache_path, nf, nm, ne, nd, n) - if nf == 0: - s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url) - print(s) - assert not augment, '%s. Can not train without labels.' % s - - # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) - self.imgs = [None] * n - if cache_images: - gb = 0 # Gigabytes of cached images - self.img_hw0, self.img_hw = [None] * n, [None] * n - results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads - pbar = tqdm(enumerate(results), total=n) - for i, x in pbar: - self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i) - gb += self.imgs[i].nbytes - pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) - - def cache_labels(self, path='labels.cache3'): - # Cache dataset labels, check images and read shapes - x = {} # dict - pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files)) - for (img, label) in pbar: - try: - l = [] - im = Image.open(img) - im.verify() # PIL verify - shape = exif_size(im) # image size - assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels' - if os.path.isfile(label): - with open(label, 'r') as f: - l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels - if len(l) == 0: - l = np.zeros((0, 5), dtype=np.float32) - x[img] = [l, shape] - except Exception as e: - print('WARNING: Ignoring corrupted image and/or label %s: %s' % (img, e)) - - x['hash'] = get_hash(self.label_files + self.img_files) - torch.save(x, path) # save for next time - return x - - def __len__(self): - return len(self.img_files) - - # def __iter__(self): - # self.count = -1 - # print('ran dataset iter') - # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) - # return self - - def __getitem__(self, index): - if self.image_weights: - index = self.indices[index] - - hyp = self.hyp - mosaic = self.mosaic and random.random() < hyp['mosaic'] - if mosaic: - # Load mosaic - #img, labels = load_mosaic(self, index) - img, labels = load_mosaic9(self, index) - shapes = None - - # MixUp https://arxiv.org/pdf/1710.09412.pdf - if random.random() < hyp['mixup']: - #img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1)) - img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1)) - r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0 - img = (img * r + img2 * (1 - r)).astype(np.uint8) - labels = np.concatenate((labels, labels2), 0) - - else: - # Load image - img, (h0, w0), (h, w) = load_image(self, index) - - # Letterbox - shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape - img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) - shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling - - # Load labels - labels = [] - x = self.labels[index] - if x.size > 0: - # Normalized xywh to pixel xyxy format - labels = x.copy() - labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width - labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height - labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] - labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] - - if self.augment: - # Augment imagespace - if not mosaic: - img, labels = random_perspective(img, labels, - degrees=hyp['degrees'], - translate=hyp['translate'], - scale=hyp['scale'], - shear=hyp['shear'], - perspective=hyp['perspective']) - - # Augment colorspace - augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) - - # Apply cutouts - # if random.random() < 0.9: - # labels = cutout(img, labels) - - nL = len(labels) # number of labels - if nL: - labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh - labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1 - labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1 - - if self.augment: - # flip up-down - if random.random() < hyp['flipud']: - img = np.flipud(img) - if nL: - labels[:, 2] = 1 - labels[:, 2] - - # flip left-right - if random.random() < hyp['fliplr']: - img = np.fliplr(img) - if nL: - labels[:, 1] = 1 - labels[:, 1] - - labels_out = torch.zeros((nL, 6)) - if nL: - labels_out[:, 1:] = torch.from_numpy(labels) - - # Convert - img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 - img = np.ascontiguousarray(img) - - return torch.from_numpy(img), labels_out, self.img_files[index], shapes - - @staticmethod - def collate_fn(batch): - img, label, path, shapes = zip(*batch) # transposed - for i, l in enumerate(label): - l[:, 0] = i # add target image index for build_targets() - return torch.stack(img, 0), torch.cat(label, 0), path, shapes - - -# Ancillary functions -------------------------------------------------------------------------------------------------- -def load_image(self, index): - # loads 1 image from dataset, returns img, original hw, resized hw - img = self.imgs[index] - if img is None: # not cached - path = self.img_files[index] - img = cv2.imread(path) # BGR - assert img is not None, 'Image Not Found ' + path - h0, w0 = img.shape[:2] # orig hw - r = self.img_size / max(h0, w0) # resize image to img_size - if r != 1: # always resize down, only resize up if training with augmentation - interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR - img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) - return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized - else: - return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized - - -def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): - r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains - hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) - dtype = img.dtype # uint8 - - x = np.arange(0, 256, dtype=np.int16) - lut_hue = ((x * r[0]) % 180).astype(dtype) - lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) - lut_val = np.clip(x * r[2], 0, 255).astype(dtype) - - img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype) - cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed - - # Histogram equalization - # if random.random() < 0.2: - # for i in range(3): - # img[:, :, i] = cv2.equalizeHist(img[:, :, i]) - - -def load_mosaic(self, index): - # loads images in a mosaic - - labels4 = [] - s = self.img_size - yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y - indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices - for i, index in enumerate(indices): - # Load image - img, _, (h, w) = load_image(self, index) - - # place img in img4 - if i == 0: # top left - img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles - x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) - x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) - elif i == 1: # top right - x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc - x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h - elif i == 2: # bottom left - x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) - x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) - elif i == 3: # bottom right - x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) - x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) - - img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] - padw = x1a - x1b - padh = y1a - y1b - - # Labels - x = self.labels[index] - labels = x.copy() - if x.size > 0: # Normalized xywh to pixel xyxy format - labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw - labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh - labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw - labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh - labels4.append(labels) - - # Concat/clip labels - if len(labels4): - labels4 = np.concatenate(labels4, 0) - np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_perspective - # img4, labels4 = replicate(img4, labels4) # replicate - - # Augment - img4, labels4 = random_perspective(img4, labels4, - degrees=self.hyp['degrees'], - translate=self.hyp['translate'], - scale=self.hyp['scale'], - shear=self.hyp['shear'], - perspective=self.hyp['perspective'], - border=self.mosaic_border) # border to remove - - return img4, labels4 - - -def load_mosaic9(self, index): - # loads images in a 9-mosaic - - labels9 = [] - s = self.img_size - indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(8)] # 8 additional image indices - for i, index in enumerate(indices): - # Load image - img, _, (h, w) = load_image(self, index) - - # place img in img9 - if i == 0: # center - img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles - h0, w0 = h, w - c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates - elif i == 1: # top - c = s, s - h, s + w, s - elif i == 2: # top right - c = s + wp, s - h, s + wp + w, s - elif i == 3: # right - c = s + w0, s, s + w0 + w, s + h - elif i == 4: # bottom right - c = s + w0, s + hp, s + w0 + w, s + hp + h - elif i == 5: # bottom - c = s + w0 - w, s + h0, s + w0, s + h0 + h - elif i == 6: # bottom left - c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h - elif i == 7: # left - c = s - w, s + h0 - h, s, s + h0 - elif i == 8: # top left - c = s - w, s + h0 - hp - h, s, s + h0 - hp - - padx, pady = c[:2] - x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords - - # Labels - x = self.labels[index] - labels = x.copy() - if x.size > 0: # Normalized xywh to pixel xyxy format - labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padx - labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + pady - labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padx - labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + pady - labels9.append(labels) - - # Image - img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax] - hp, wp = h, w # height, width previous - - # Offset - yc, xc = [int(random.uniform(0, s)) for x in self.mosaic_border] # mosaic center x, y - img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s] - - # Concat/clip labels - if len(labels9): - labels9 = np.concatenate(labels9, 0) - labels9[:, [1, 3]] -= xc - labels9[:, [2, 4]] -= yc - - np.clip(labels9[:, 1:], 0, 2 * s, out=labels9[:, 1:]) # use with random_perspective - # img9, labels9 = replicate(img9, labels9) # replicate - - # Augment - img9, labels9 = random_perspective(img9, labels9, - degrees=self.hyp['degrees'], - translate=self.hyp['translate'], - scale=self.hyp['scale'], - shear=self.hyp['shear'], - perspective=self.hyp['perspective'], - border=self.mosaic_border) # border to remove - - return img9, labels9 - - -def replicate(img, labels): - # Replicate labels - h, w = img.shape[:2] - boxes = labels[:, 1:].astype(int) - x1, y1, x2, y2 = boxes.T - s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels) - for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices - x1b, y1b, x2b, y2b = boxes[i] - bh, bw = y2b - y1b, x2b - x1b - yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y - x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh] - img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] - labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0) - - return img, labels - - -def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, auto_size=32): - # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232 - shape = img.shape[:2] # current shape [height, width] - if isinstance(new_shape, int): - new_shape = (new_shape, new_shape) - - # Scale ratio (new / old) - r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) - if not scaleup: # only scale down, do not scale up (for better test mAP) - r = min(r, 1.0) - - # Compute padding - ratio = r, r # width, height ratios - new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) - dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding - if auto: # minimum rectangle - dw, dh = np.mod(dw, auto_size), np.mod(dh, auto_size) # wh padding - elif scaleFill: # stretch - dw, dh = 0.0, 0.0 - new_unpad = (new_shape[1], new_shape[0]) - ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios - - dw /= 2 # divide padding into 2 sides - dh /= 2 - - if shape[::-1] != new_unpad: # resize - img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) - top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) - left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) - img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border - return img, ratio, (dw, dh) - - -def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)): - # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) - # targets = [cls, xyxy] - - height = img.shape[0] + border[0] * 2 # shape(h,w,c) - width = img.shape[1] + border[1] * 2 - - # Center - C = np.eye(3) - C[0, 2] = -img.shape[1] / 2 # x translation (pixels) - C[1, 2] = -img.shape[0] / 2 # y translation (pixels) - - # Perspective - P = np.eye(3) - P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) - P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) - - # Rotation and Scale - R = np.eye(3) - a = random.uniform(-degrees, degrees) - # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations - s = random.uniform(1 - scale, 1 + scale) - # s = 2 ** random.uniform(-scale, scale) - R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) - - # Shear - S = np.eye(3) - S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) - S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) - - # Translation - T = np.eye(3) - T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels) - T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels) - - # Combined rotation matrix - M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT - if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed - if perspective: - img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114)) - else: # affine - img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) - - # Visualize - # import matplotlib.pyplot as plt - # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() - # ax[0].imshow(img[:, :, ::-1]) # base - # ax[1].imshow(img2[:, :, ::-1]) # warped - - # Transform label coordinates - n = len(targets) - if n: - # warp points - xy = np.ones((n * 4, 3)) - xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 - xy = xy @ M.T # transform - if perspective: - xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale - else: # affine - xy = xy[:, :2].reshape(n, 8) - - # create new boxes - x = xy[:, [0, 2, 4, 6]] - y = xy[:, [1, 3, 5, 7]] - xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T - - # # apply angle-based reduction of bounding boxes - # radians = a * math.pi / 180 - # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 - # x = (xy[:, 2] + xy[:, 0]) / 2 - # y = (xy[:, 3] + xy[:, 1]) / 2 - # w = (xy[:, 2] - xy[:, 0]) * reduction - # h = (xy[:, 3] - xy[:, 1]) * reduction - # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T - - # clip boxes - xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) - xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) - - # filter candidates - i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T) - targets = targets[i] - targets[:, 1:5] = xy[i] - - return img, targets - - -def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1): # box1(4,n), box2(4,n) - # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio - w1, h1 = box1[2] - box1[0], box1[3] - box1[1] - w2, h2 = box2[2] - box2[0], box2[3] - box2[1] - ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio - return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr) # candidates - - -def cutout(image, labels): - # Applies image cutout augmentation https://arxiv.org/abs/1708.04552 - h, w = image.shape[:2] - - def bbox_ioa(box1, box2): - # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2 - box2 = box2.transpose() - - # Get the coordinates of bounding boxes - b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] - b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] - - # Intersection area - inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \ - (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0) - - # box2 area - box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16 - - # Intersection over box2 area - return inter_area / box2_area - - # create random masks - scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction - for s in scales: - mask_h = random.randint(1, int(h * s)) - mask_w = random.randint(1, int(w * s)) - - # box - xmin = max(0, random.randint(0, w) - mask_w // 2) - ymin = max(0, random.randint(0, h) - mask_h // 2) - xmax = min(w, xmin + mask_w) - ymax = min(h, ymin + mask_h) - - # apply random color mask - image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)] - - # return unobscured labels - if len(labels) and s > 0.03: - box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32) - ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area - labels = labels[ioa < 0.60] # remove >60% obscured labels - - return labels - - -def create_folder(path='./new'): - # Create folder - if os.path.exists(path): - shutil.rmtree(path) # delete output folder - os.makedirs(path) # make new output folder - - -def flatten_recursive(path='../coco128'): - # Flatten a recursive directory by bringing all files to top level - new_path = Path(path + '_flat') - create_folder(new_path) - for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)): - shutil.copyfile(file, new_path / Path(file).name) - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Dataset utils and dataloaders + +import glob +import math +import os +import random +import shutil +import time +from itertools import repeat +from multiprocessing.pool import ThreadPool +from pathlib import Path +from threading import Thread + +import cv2 +import numpy as np +import torch +from PIL import Image, ExifTags +from torch.utils.data import Dataset +from tqdm import tqdm + +import pickle +from copy import deepcopy +from pycocotools import mask as maskUtils +from torchvision.utils import save_image + +from utils.general import xyxy2xywh, xywh2xyxy +from utils.torch_utils import torch_distributed_zero_first + +# Parameters +help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' +img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes +vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes + +# Get orientation exif tag +for orientation in ExifTags.TAGS.keys(): + if ExifTags.TAGS[orientation] == 'Orientation': + break + + +def get_hash(files): + # Returns a single hash value of a list of files + return sum(os.path.getsize(f) for f in files if os.path.isfile(f)) + + +def exif_size(img): + # Returns exif-corrected PIL size + s = img.size # (width, height) + try: + rotation = dict(img._getexif().items())[orientation] + if rotation == 6: # rotation 270 + s = (s[1], s[0]) + elif rotation == 8: # rotation 90 + s = (s[1], s[0]) + except: + pass + + return s + + +def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False, + rank=-1, world_size=1, workers=8): + # Make sure only the first process in DDP process the dataset first, and the following others can use the cache + with torch_distributed_zero_first(rank): + dataset = LoadImagesAndLabels(path, imgsz, batch_size, + augment=augment, # augment images + hyp=hyp, # augmentation hyperparameters + rect=rect, # rectangular training + cache_images=cache, + single_cls=opt.single_cls, + stride=int(stride), + pad=pad, + rank=rank) + + batch_size = min(batch_size, len(dataset)) + nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers + sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None + dataloader = InfiniteDataLoader(dataset, + batch_size=batch_size, + num_workers=nw, + sampler=sampler, + pin_memory=True, + collate_fn=LoadImagesAndLabels.collate_fn) # torch.utils.data.DataLoader() + return dataloader, dataset + + +def create_dataloader9(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False, + rank=-1, world_size=1, workers=8): + # Make sure only the first process in DDP process the dataset first, and the following others can use the cache + with torch_distributed_zero_first(rank): + dataset = LoadImagesAndLabels9(path, imgsz, batch_size, + augment=augment, # augment images + hyp=hyp, # augmentation hyperparameters + rect=rect, # rectangular training + cache_images=cache, + single_cls=opt.single_cls, + stride=int(stride), + pad=pad, + rank=rank) + + batch_size = min(batch_size, len(dataset)) + nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers + sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None + dataloader = InfiniteDataLoader(dataset, + batch_size=batch_size, + num_workers=nw, + sampler=sampler, + pin_memory=True, + collate_fn=LoadImagesAndLabels9.collate_fn) # torch.utils.data.DataLoader() + return dataloader, dataset + + +class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader): + """ Dataloader that reuses workers + + Uses same syntax as vanilla DataLoader + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler)) + self.iterator = super().__iter__() + + def __len__(self): + return len(self.batch_sampler.sampler) + + def __iter__(self): + for i in range(len(self)): + yield next(self.iterator) + + +class _RepeatSampler(object): + """ Sampler that repeats forever + + Args: + sampler (Sampler) + """ + + def __init__(self, sampler): + self.sampler = sampler + + def __iter__(self): + while True: + yield from iter(self.sampler) + + +class LoadImages: # for inference + def __init__(self, path, img_size=640, auto_size=32): + p = str(Path(path)) # os-agnostic + p = os.path.abspath(p) # absolute path + if '*' in p: + files = sorted(glob.glob(p, recursive=True)) # glob + elif os.path.isdir(p): + files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir + elif os.path.isfile(p): + files = [p] # files + else: + raise Exception('ERROR: %s does not exist' % p) + + images = [x for x in files if x.split('.')[-1].lower() in img_formats] + videos = [x for x in files if x.split('.')[-1].lower() in vid_formats] + ni, nv = len(images), len(videos) + + self.img_size = img_size + self.auto_size = auto_size + self.files = images + videos + self.nf = ni + nv # number of files + self.video_flag = [False] * ni + [True] * nv + self.mode = 'images' + if any(videos): + self.new_video(videos[0]) # new video + else: + self.cap = None + assert self.nf > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \ + (p, img_formats, vid_formats) + + def __iter__(self): + self.count = 0 + return self + + def __next__(self): + if self.count == self.nf: + raise StopIteration + path = self.files[self.count] + + if self.video_flag[self.count]: + # Read video + self.mode = 'video' + ret_val, img0 = self.cap.read() + if not ret_val: + self.count += 1 + self.cap.release() + if self.count == self.nf: # last video + raise StopIteration + else: + path = self.files[self.count] + self.new_video(path) + ret_val, img0 = self.cap.read() + + self.frame += 1 + print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nf, self.frame, self.nframes, path), end='') + + else: + # Read image + self.count += 1 + img0 = cv2.imread(path) # BGR + assert img0 is not None, 'Image Not Found ' + path + print('image %g/%g %s: ' % (self.count, self.nf, path), end='') + + # Padded resize + img = letterbox(img0, new_shape=self.img_size, auto_size=self.auto_size)[0] + + # Convert + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + img = np.ascontiguousarray(img) + + return path, img, img0, self.cap + + def new_video(self, path): + self.frame = 0 + self.cap = cv2.VideoCapture(path) + self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + def __len__(self): + return self.nf # number of files + + +class LoadWebcam: # for inference + def __init__(self, pipe='0', img_size=640): + self.img_size = img_size + + if pipe.isnumeric(): + pipe = eval(pipe) # local camera + # pipe = 'rtsp://192.168.1.64/1' # IP camera + # pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login + # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera + + self.pipe = pipe + self.cap = cv2.VideoCapture(pipe) # video capture object + self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size + + def __iter__(self): + self.count = -1 + return self + + def __next__(self): + self.count += 1 + if cv2.waitKey(1) == ord('q'): # q to quit + self.cap.release() + cv2.destroyAllWindows() + raise StopIteration + + # Read frame + if self.pipe == 0: # local camera + ret_val, img0 = self.cap.read() + img0 = cv2.flip(img0, 1) # flip left-right + else: # IP camera + n = 0 + while True: + n += 1 + self.cap.grab() + if n % 30 == 0: # skip frames + ret_val, img0 = self.cap.retrieve() + if ret_val: + break + + # Print + assert ret_val, 'Camera Error %s' % self.pipe + img_path = 'webcam.jpg' + print('webcam %g: ' % self.count, end='') + + # Padded resize + img = letterbox(img0, new_shape=self.img_size)[0] + + # Convert + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + img = np.ascontiguousarray(img) + + return img_path, img, img0, None + + def __len__(self): + return 0 + + +class LoadStreams: # multiple IP or RTSP cameras + def __init__(self, sources='streams.txt', img_size=640): + self.mode = 'images' + self.img_size = img_size + + if os.path.isfile(sources): + with open(sources, 'r') as f: + sources = [x.strip() for x in f.read().splitlines() if len(x.strip())] + else: + sources = [sources] + + n = len(sources) + self.imgs = [None] * n + self.sources = sources + for i, s in enumerate(sources): + # Start the thread to read frames from the video stream + print('%g/%g: %s... ' % (i + 1, n, s), end='') + cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s) + assert cap.isOpened(), 'Failed to open %s' % s + w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = cap.get(cv2.CAP_PROP_FPS) % 100 + _, self.imgs[i] = cap.read() # guarantee first frame + thread = Thread(target=self.update, args=([i, cap]), daemon=True) + print(' success (%gx%g at %.2f FPS).' % (w, h, fps)) + thread.start() + print('') # newline + + # check for common shapes + s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes + self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal + if not self.rect: + print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.') + + def update(self, index, cap): + # Read next stream frame in a daemon thread + n = 0 + while cap.isOpened(): + n += 1 + # _, self.imgs[index] = cap.read() + cap.grab() + if n == 4: # read every 4th frame + _, self.imgs[index] = cap.retrieve() + n = 0 + time.sleep(0.01) # wait time + + def __iter__(self): + self.count = -1 + return self + + def __next__(self): + self.count += 1 + img0 = self.imgs.copy() + if cv2.waitKey(1) == ord('q'): # q to quit + cv2.destroyAllWindows() + raise StopIteration + + # Letterbox + img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0] + + # Stack + img = np.stack(img, 0) + + # Convert + img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 + img = np.ascontiguousarray(img) + + return self.sources, img, img0, None + + def __len__(self): + return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years + + +class LoadImagesAndLabels(Dataset): # for training/testing + def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, + cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1): + self.img_size = img_size + self.augment = augment + self.hyp = hyp + self.image_weights = image_weights + self.rect = False if image_weights else rect + self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) + self.mosaic_border = [-img_size // 2, -img_size // 2] + self.stride = stride + + def img2label_paths(img_paths): + # Define label paths as a function of image paths + sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings + return [x.replace(sa, sb, 1).replace(x.split('.')[-1], 'txt') for x in img_paths] + + try: + f = [] # image files + for p in path if isinstance(path, list) else [path]: + p = Path(p) # os-agnostic + if p.is_dir(): # dir + f += glob.glob(str(p / '**' / '*.*'), recursive=True) + elif p.is_file(): # file + with open(p, 'r') as t: + t = t.read().splitlines() + parent = str(p.parent) + os.sep + f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path + else: + raise Exception('%s does not exist' % p) + self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats]) + assert self.img_files, 'No images found' + except Exception as e: + raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url)) + + # Check cache + self.label_files = img2label_paths(self.img_files) # labels + cache_path = str(Path(self.label_files[0]).parent) + '.cache3' # cached labels + if os.path.isfile(cache_path): + cache = torch.load(cache_path) # load + if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed + cache = self.cache_labels(cache_path) # re-cache + else: + cache = self.cache_labels(cache_path) # cache + + # Read cache + cache.pop('hash') # remove hash + labels, shapes = zip(*cache.values()) + self.labels = list(labels) + self.shapes = np.array(shapes, dtype=np.float64) + self.img_files = list(cache.keys()) # update + self.label_files = img2label_paths(cache.keys()) # update + + n = len(shapes) # number of images + bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index + nb = bi[-1] + 1 # number of batches + self.batch = bi # batch index of image + self.n = n + + # Rectangular Training + if self.rect: + # Sort by aspect ratio + s = self.shapes # wh + ar = s[:, 1] / s[:, 0] # aspect ratio + irect = ar.argsort() + self.img_files = [self.img_files[i] for i in irect] + self.label_files = [self.label_files[i] for i in irect] + self.labels = [self.labels[i] for i in irect] + self.shapes = s[irect] # wh + ar = ar[irect] + + # Set training image shapes + shapes = [[1, 1]] * nb + for i in range(nb): + ari = ar[bi == i] + mini, maxi = ari.min(), ari.max() + if maxi < 1: + shapes[i] = [maxi, 1] + elif mini > 1: + shapes[i] = [1, 1 / mini] + + self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride + + # Check labels + create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False + nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate + pbar = enumerate(self.label_files) + if rank in [-1, 0]: + pbar = tqdm(pbar) + for i, file in pbar: + l = self.labels[i] # label + if l is not None and l.shape[0]: + assert l.shape[1] == 5, '> 5 label columns: %s' % file + assert (l >= 0).all(), 'negative labels: %s' % file + assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file + if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows + nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows + if single_cls: + l[:, 0] = 0 # force dataset into single-class mode + self.labels[i] = l + nf += 1 # file found + + # Create subdataset (a smaller dataset) + if create_datasubset and ns < 1E4: + if ns == 0: + create_folder(path='./datasubset') + os.makedirs('./datasubset/images') + exclude_classes = 43 + if exclude_classes not in l[:, 0]: + ns += 1 + # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image + with open('./datasubset/images.txt', 'a') as f: + f.write(self.img_files[i] + '\n') + + # Extract object detection boxes for a second stage classifier + if extract_bounding_boxes: + p = Path(self.img_files[i]) + img = cv2.imread(str(p)) + h, w = img.shape[:2] + for j, x in enumerate(l): + f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) + if not os.path.exists(Path(f).parent): + os.makedirs(Path(f).parent) # make new output folder + + b = x[1:] * [w, h, w, h] # box + b[2:] = b[2:].max() # rectangle to square + b[2:] = b[2:] * 1.3 + 30 # pad + b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) + + b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image + b[[1, 3]] = np.clip(b[[1, 3]], 0, h) + assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes' + else: + ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty + # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove + + if rank in [-1, 0]: + pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % ( + cache_path, nf, nm, ne, nd, n) + if nf == 0: + s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url) + print(s) + assert not augment, '%s. Can not train without labels.' % s + + # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) + self.imgs = [None] * n + if cache_images: + gb = 0 # Gigabytes of cached images + self.img_hw0, self.img_hw = [None] * n, [None] * n + results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads + pbar = tqdm(enumerate(results), total=n) + for i, x in pbar: + self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i) + gb += self.imgs[i].nbytes + pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) + + def cache_labels(self, path='labels.cache3'): + # Cache dataset labels, check images and read shapes + x = {} # dict + pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files)) + for (img, label) in pbar: + try: + l = [] + im = Image.open(img) + im.verify() # PIL verify + shape = exif_size(im) # image size + assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels' + if os.path.isfile(label): + with open(label, 'r') as f: + l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels + if len(l) == 0: + l = np.zeros((0, 5), dtype=np.float32) + x[img] = [l, shape] + except Exception as e: + print('WARNING: Ignoring corrupted image and/or label %s: %s' % (img, e)) + + x['hash'] = get_hash(self.label_files + self.img_files) + torch.save(x, path) # save for next time + return x + + def __len__(self): + return len(self.img_files) + + # def __iter__(self): + # self.count = -1 + # print('ran dataset iter') + # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) + # return self + + def __getitem__(self, index): + if self.image_weights: + index = self.indices[index] + + hyp = self.hyp + mosaic = self.mosaic and random.random() < hyp['mosaic'] + if mosaic: + # Load mosaic + img, labels = load_mosaic(self, index) + #img, labels = load_mosaic9(self, index) + shapes = None + + # MixUp https://arxiv.org/pdf/1710.09412.pdf + if random.random() < hyp['mixup']: + img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1)) + #img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1)) + r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0 + img = (img * r + img2 * (1 - r)).astype(np.uint8) + labels = np.concatenate((labels, labels2), 0) + + else: + # Load image + img, (h0, w0), (h, w) = load_image(self, index) + + # Letterbox + shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape + img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) + shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling + + # Load labels + labels = [] + x = self.labels[index] + if x.size > 0: + # Normalized xywh to pixel xyxy format + labels = x.copy() + labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width + labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height + labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] + labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] + + if self.augment: + # Augment imagespace + if not mosaic: + img, labels = random_perspective(img, labels, + degrees=hyp['degrees'], + translate=hyp['translate'], + scale=hyp['scale'], + shear=hyp['shear'], + perspective=hyp['perspective']) + + # Augment colorspace + augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) + + # Apply cutouts + # if random.random() < 0.9: + # labels = cutout(img, labels) + + nL = len(labels) # number of labels + if nL: + labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh + labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1 + labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1 + + if self.augment: + # flip up-down + if random.random() < hyp['flipud']: + img = np.flipud(img) + if nL: + labels[:, 2] = 1 - labels[:, 2] + + # flip left-right + if random.random() < hyp['fliplr']: + img = np.fliplr(img) + if nL: + labels[:, 1] = 1 - labels[:, 1] + + labels_out = torch.zeros((nL, 6)) + if nL: + labels_out[:, 1:] = torch.from_numpy(labels) + + # Convert + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + img = np.ascontiguousarray(img) + + return torch.from_numpy(img), labels_out, self.img_files[index], shapes + + @staticmethod + def collate_fn(batch): + img, label, path, shapes = zip(*batch) # transposed + for i, l in enumerate(label): + l[:, 0] = i # add target image index for build_targets() + return torch.stack(img, 0), torch.cat(label, 0), path, shapes + + +class LoadImagesAndLabels9(Dataset): # for training/testing + def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, + cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1): + self.img_size = img_size + self.augment = augment + self.hyp = hyp + self.image_weights = image_weights + self.rect = False if image_weights else rect + self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) + self.mosaic_border = [-img_size // 2, -img_size // 2] + self.stride = stride + + def img2label_paths(img_paths): + # Define label paths as a function of image paths + sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings + return [x.replace(sa, sb, 1).replace(x.split('.')[-1], 'txt') for x in img_paths] + + try: + f = [] # image files + for p in path if isinstance(path, list) else [path]: + p = Path(p) # os-agnostic + if p.is_dir(): # dir + f += glob.glob(str(p / '**' / '*.*'), recursive=True) + elif p.is_file(): # file + with open(p, 'r') as t: + t = t.read().splitlines() + parent = str(p.parent) + os.sep + f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path + else: + raise Exception('%s does not exist' % p) + self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats]) + assert self.img_files, 'No images found' + except Exception as e: + raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url)) + + # Check cache + self.label_files = img2label_paths(self.img_files) # labels + cache_path = str(Path(self.label_files[0]).parent) + '.cache3' # cached labels + if os.path.isfile(cache_path): + cache = torch.load(cache_path) # load + if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed + cache = self.cache_labels(cache_path) # re-cache + else: + cache = self.cache_labels(cache_path) # cache + + # Read cache + cache.pop('hash') # remove hash + labels, shapes = zip(*cache.values()) + self.labels = list(labels) + self.shapes = np.array(shapes, dtype=np.float64) + self.img_files = list(cache.keys()) # update + self.label_files = img2label_paths(cache.keys()) # update + + n = len(shapes) # number of images + bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index + nb = bi[-1] + 1 # number of batches + self.batch = bi # batch index of image + self.n = n + + # Rectangular Training + if self.rect: + # Sort by aspect ratio + s = self.shapes # wh + ar = s[:, 1] / s[:, 0] # aspect ratio + irect = ar.argsort() + self.img_files = [self.img_files[i] for i in irect] + self.label_files = [self.label_files[i] for i in irect] + self.labels = [self.labels[i] for i in irect] + self.shapes = s[irect] # wh + ar = ar[irect] + + # Set training image shapes + shapes = [[1, 1]] * nb + for i in range(nb): + ari = ar[bi == i] + mini, maxi = ari.min(), ari.max() + if maxi < 1: + shapes[i] = [maxi, 1] + elif mini > 1: + shapes[i] = [1, 1 / mini] + + self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride + + # Check labels + create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False + nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate + pbar = enumerate(self.label_files) + if rank in [-1, 0]: + pbar = tqdm(pbar) + for i, file in pbar: + l = self.labels[i] # label + if l is not None and l.shape[0]: + assert l.shape[1] == 5, '> 5 label columns: %s' % file + assert (l >= 0).all(), 'negative labels: %s' % file + assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file + if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows + nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows + if single_cls: + l[:, 0] = 0 # force dataset into single-class mode + self.labels[i] = l + nf += 1 # file found + + # Create subdataset (a smaller dataset) + if create_datasubset and ns < 1E4: + if ns == 0: + create_folder(path='./datasubset') + os.makedirs('./datasubset/images') + exclude_classes = 43 + if exclude_classes not in l[:, 0]: + ns += 1 + # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image + with open('./datasubset/images.txt', 'a') as f: + f.write(self.img_files[i] + '\n') + + # Extract object detection boxes for a second stage classifier + if extract_bounding_boxes: + p = Path(self.img_files[i]) + img = cv2.imread(str(p)) + h, w = img.shape[:2] + for j, x in enumerate(l): + f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) + if not os.path.exists(Path(f).parent): + os.makedirs(Path(f).parent) # make new output folder + + b = x[1:] * [w, h, w, h] # box + b[2:] = b[2:].max() # rectangle to square + b[2:] = b[2:] * 1.3 + 30 # pad + b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) + + b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image + b[[1, 3]] = np.clip(b[[1, 3]], 0, h) + assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes' + else: + ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty + # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove + + if rank in [-1, 0]: + pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % ( + cache_path, nf, nm, ne, nd, n) + if nf == 0: + s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url) + print(s) + assert not augment, '%s. Can not train without labels.' % s + + # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) + self.imgs = [None] * n + if cache_images: + gb = 0 # Gigabytes of cached images + self.img_hw0, self.img_hw = [None] * n, [None] * n + results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads + pbar = tqdm(enumerate(results), total=n) + for i, x in pbar: + self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i) + gb += self.imgs[i].nbytes + pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) + + def cache_labels(self, path='labels.cache3'): + # Cache dataset labels, check images and read shapes + x = {} # dict + pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files)) + for (img, label) in pbar: + try: + l = [] + im = Image.open(img) + im.verify() # PIL verify + shape = exif_size(im) # image size + assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels' + if os.path.isfile(label): + with open(label, 'r') as f: + l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels + if len(l) == 0: + l = np.zeros((0, 5), dtype=np.float32) + x[img] = [l, shape] + except Exception as e: + print('WARNING: Ignoring corrupted image and/or label %s: %s' % (img, e)) + + x['hash'] = get_hash(self.label_files + self.img_files) + torch.save(x, path) # save for next time + return x + + def __len__(self): + return len(self.img_files) + + # def __iter__(self): + # self.count = -1 + # print('ran dataset iter') + # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) + # return self + + def __getitem__(self, index): + if self.image_weights: + index = self.indices[index] + + hyp = self.hyp + mosaic = self.mosaic and random.random() < hyp['mosaic'] + if mosaic: + # Load mosaic + #img, labels = load_mosaic(self, index) + img, labels = load_mosaic9(self, index) + shapes = None + + # MixUp https://arxiv.org/pdf/1710.09412.pdf + if random.random() < hyp['mixup']: + #img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1)) + img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1)) + r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0 + img = (img * r + img2 * (1 - r)).astype(np.uint8) + labels = np.concatenate((labels, labels2), 0) + + else: + # Load image + img, (h0, w0), (h, w) = load_image(self, index) + + # Letterbox + shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape + img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) + shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling + + # Load labels + labels = [] + x = self.labels[index] + if x.size > 0: + # Normalized xywh to pixel xyxy format + labels = x.copy() + labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width + labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height + labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] + labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] + + if self.augment: + # Augment imagespace + if not mosaic: + img, labels = random_perspective(img, labels, + degrees=hyp['degrees'], + translate=hyp['translate'], + scale=hyp['scale'], + shear=hyp['shear'], + perspective=hyp['perspective']) + + # Augment colorspace + augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) + + # Apply cutouts + # if random.random() < 0.9: + # labels = cutout(img, labels) + + nL = len(labels) # number of labels + if nL: + labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh + labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1 + labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1 + + if self.augment: + # flip up-down + if random.random() < hyp['flipud']: + img = np.flipud(img) + if nL: + labels[:, 2] = 1 - labels[:, 2] + + # flip left-right + if random.random() < hyp['fliplr']: + img = np.fliplr(img) + if nL: + labels[:, 1] = 1 - labels[:, 1] + + labels_out = torch.zeros((nL, 6)) + if nL: + labels_out[:, 1:] = torch.from_numpy(labels) + + # Convert + img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + img = np.ascontiguousarray(img) + + return torch.from_numpy(img), labels_out, self.img_files[index], shapes + + @staticmethod + def collate_fn(batch): + img, label, path, shapes = zip(*batch) # transposed + for i, l in enumerate(label): + l[:, 0] = i # add target image index for build_targets() + return torch.stack(img, 0), torch.cat(label, 0), path, shapes + + +# Ancillary functions -------------------------------------------------------------------------------------------------- +def load_image(self, index): + # loads 1 image from dataset, returns img, original hw, resized hw + img = self.imgs[index] + if img is None: # not cached + path = self.img_files[index] + img = cv2.imread(path) # BGR + assert img is not None, 'Image Not Found ' + path + h0, w0 = img.shape[:2] # orig hw + r = self.img_size / max(h0, w0) # resize image to img_size + if r != 1: # always resize down, only resize up if training with augmentation + interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR + img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) + return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized + else: + return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized + + +def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): + r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains + hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) + dtype = img.dtype # uint8 + + x = np.arange(0, 256, dtype=np.int16) + lut_hue = ((x * r[0]) % 180).astype(dtype) + lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) + lut_val = np.clip(x * r[2], 0, 255).astype(dtype) + + img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype) + cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed + + # Histogram equalization + # if random.random() < 0.2: + # for i in range(3): + # img[:, :, i] = cv2.equalizeHist(img[:, :, i]) + + +def load_mosaic(self, index): + # loads images in a mosaic + + labels4 = [] + s = self.img_size + yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y + indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices + for i, index in enumerate(indices): + # Load image + img, _, (h, w) = load_image(self, index) + + # place img in img4 + if i == 0: # top left + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) + elif i == 1: # top right + x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc + x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h + elif i == 2: # bottom left + x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) + x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) + elif i == 3: # bottom right + x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) + x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) + + img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + padw = x1a - x1b + padh = y1a - y1b + + # Labels + x = self.labels[index] + labels = x.copy() + if x.size > 0: # Normalized xywh to pixel xyxy format + labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw + labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh + labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw + labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh + labels4.append(labels) + + # Concat/clip labels + if len(labels4): + labels4 = np.concatenate(labels4, 0) + np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_perspective + # img4, labels4 = replicate(img4, labels4) # replicate + + # Augment + img4, labels4 = random_perspective(img4, labels4, + degrees=self.hyp['degrees'], + translate=self.hyp['translate'], + scale=self.hyp['scale'], + shear=self.hyp['shear'], + perspective=self.hyp['perspective'], + border=self.mosaic_border) # border to remove + + return img4, labels4 + + +def load_mosaic9(self, index): + # loads images in a 9-mosaic + + labels9 = [] + s = self.img_size + indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(8)] # 8 additional image indices + for i, index in enumerate(indices): + # Load image + img, _, (h, w) = load_image(self, index) + + # place img in img9 + if i == 0: # center + img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + h0, w0 = h, w + c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates + elif i == 1: # top + c = s, s - h, s + w, s + elif i == 2: # top right + c = s + wp, s - h, s + wp + w, s + elif i == 3: # right + c = s + w0, s, s + w0 + w, s + h + elif i == 4: # bottom right + c = s + w0, s + hp, s + w0 + w, s + hp + h + elif i == 5: # bottom + c = s + w0 - w, s + h0, s + w0, s + h0 + h + elif i == 6: # bottom left + c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h + elif i == 7: # left + c = s - w, s + h0 - h, s, s + h0 + elif i == 8: # top left + c = s - w, s + h0 - hp - h, s, s + h0 - hp + + padx, pady = c[:2] + x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords + + # Labels + x = self.labels[index] + labels = x.copy() + if x.size > 0: # Normalized xywh to pixel xyxy format + labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padx + labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + pady + labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padx + labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + pady + labels9.append(labels) + + # Image + img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax] + hp, wp = h, w # height, width previous + + # Offset + yc, xc = [int(random.uniform(0, s)) for x in self.mosaic_border] # mosaic center x, y + img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s] + + # Concat/clip labels + if len(labels9): + labels9 = np.concatenate(labels9, 0) + labels9[:, [1, 3]] -= xc + labels9[:, [2, 4]] -= yc + + np.clip(labels9[:, 1:], 0, 2 * s, out=labels9[:, 1:]) # use with random_perspective + # img9, labels9 = replicate(img9, labels9) # replicate + + # Augment + img9, labels9 = random_perspective(img9, labels9, + degrees=self.hyp['degrees'], + translate=self.hyp['translate'], + scale=self.hyp['scale'], + shear=self.hyp['shear'], + perspective=self.hyp['perspective'], + border=self.mosaic_border) # border to remove + + return img9, labels9 + + +def replicate(img, labels): + # Replicate labels + h, w = img.shape[:2] + boxes = labels[:, 1:].astype(int) + x1, y1, x2, y2 = boxes.T + s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels) + for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices + x1b, y1b, x2b, y2b = boxes[i] + bh, bw = y2b - y1b, x2b - x1b + yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y + x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh] + img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0) + + return img, labels + + +def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, auto_size=32): + # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232 + shape = img.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better test mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, auto_size), np.mod(dh, auto_size) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return img, ratio, (dw, dh) + + +def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)): + # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) + # targets = [cls, xyxy] + + height = img.shape[0] + border[0] * 2 # shape(h,w,c) + width = img.shape[1] + border[1] * 2 + + # Center + C = np.eye(3) + C[0, 2] = -img.shape[1] / 2 # x translation (pixels) + C[1, 2] = -img.shape[0] / 2 # y translation (pixels) + + # Perspective + P = np.eye(3) + P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) + P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) + + # Rotation and Scale + R = np.eye(3) + a = random.uniform(-degrees, degrees) + # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations + s = random.uniform(1 - scale, 1 + scale) + # s = 2 ** random.uniform(-scale, scale) + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) + + # Shear + S = np.eye(3) + S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) + + # Translation + T = np.eye(3) + T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels) + T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels) + + # Combined rotation matrix + M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT + if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed + if perspective: + img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114)) + else: # affine + img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) + + # Visualize + # import matplotlib.pyplot as plt + # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() + # ax[0].imshow(img[:, :, ::-1]) # base + # ax[1].imshow(img2[:, :, ::-1]) # warped + + # Transform label coordinates + n = len(targets) + if n: + # warp points + xy = np.ones((n * 4, 3)) + xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 + xy = xy @ M.T # transform + if perspective: + xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale + else: # affine + xy = xy[:, :2].reshape(n, 8) + + # create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T + + # # apply angle-based reduction of bounding boxes + # radians = a * math.pi / 180 + # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 + # x = (xy[:, 2] + xy[:, 0]) / 2 + # y = (xy[:, 3] + xy[:, 1]) / 2 + # w = (xy[:, 2] - xy[:, 0]) * reduction + # h = (xy[:, 3] - xy[:, 1]) * reduction + # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T + + # clip boxes + xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) + xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) + + # filter candidates + i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T) + targets = targets[i] + targets[:, 1:5] = xy[i] + + return img, targets + + +def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1): # box1(4,n), box2(4,n) + # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio + w1, h1 = box1[2] - box1[0], box1[3] - box1[1] + w2, h2 = box2[2] - box2[0], box2[3] - box2[1] + ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio + return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr) # candidates + + +def cutout(image, labels): + # Applies image cutout augmentation https://arxiv.org/abs/1708.04552 + h, w = image.shape[:2] + + def bbox_ioa(box1, box2): + # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2 + box2 = box2.transpose() + + # Get the coordinates of bounding boxes + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + + # Intersection area + inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \ + (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0) + + # box2 area + box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16 + + # Intersection over box2 area + return inter_area / box2_area + + # create random masks + scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction + for s in scales: + mask_h = random.randint(1, int(h * s)) + mask_w = random.randint(1, int(w * s)) + + # box + xmin = max(0, random.randint(0, w) - mask_w // 2) + ymin = max(0, random.randint(0, h) - mask_h // 2) + xmax = min(w, xmin + mask_w) + ymax = min(h, ymin + mask_h) + + # apply random color mask + image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)] + + # return unobscured labels + if len(labels) and s > 0.03: + box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32) + ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area + labels = labels[ioa < 0.60] # remove >60% obscured labels + + return labels + + +def create_folder(path='./new'): + # Create folder + if os.path.exists(path): + shutil.rmtree(path) # delete output folder + os.makedirs(path) # make new output folder + + +def flatten_recursive(path='../coco128'): + # Flatten a recursive directory by bringing all files to top level + new_path = Path(path + '_flat') + create_folder(new_path) + for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)): + shutil.copyfile(file, new_path / Path(file).name) + + diff --git a/PyTorch/contrib/cv/detection/YOLOR/utils/google_utils.py b/PyTorch/contrib/cv/detection/YOLOR/utils/google_utils.py index 7cd3e8a2f4167f3db779b6e487be83575b4dd3fd..b69abe710b472e748fa3766a7ee702299806ea3e 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/utils/google_utils.py +++ b/PyTorch/contrib/cv/detection/YOLOR/utils/google_utils.py @@ -1,134 +1,134 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Google utils: https://cloud.google.com/storage/docs/reference/libraries - -import os -import platform -import subprocess -import time -from pathlib import Path - -import torch - - -def gsutil_getsize(url=''): - # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du - s = subprocess.check_output('gsutil du %s' % url, shell=True).decode('utf-8') - return eval(s.split(' ')[0]) if len(s) else 0 # bytes - - -def attempt_download(weights): - # Attempt to download pretrained weights if not found locally - weights = weights.strip().replace("'", '') - file = Path(weights).name - - msg = weights + ' missing, try downloading from https://github.com/WongKinYiu/yolor/releases/' - models = ['yolor_p6.pt', 'yolor_w6.pt'] # available models - - if file in models and not os.path.isfile(weights): - - try: # GitHub - url = 'https://github.com/WongKinYiu/yolor/releases/download/v1.0/' + file - print('Downloading %s to %s...' % (url, weights)) - torch.hub.download_url_to_file(url, weights) - assert os.path.exists(weights) and os.path.getsize(weights) > 1E6 # check - except Exception as e: # GCP - print('ERROR: Download failure.') - print('') - - -def attempt_load(weights, map_location=None): - # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a - model = Ensemble() - for w in weights if isinstance(weights, list) else [weights]: - attempt_download(w) - model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model - - if len(model) == 1: - return model[-1] # return model - else: - print('Ensemble created with %s\n' % weights) - for k in ['names', 'stride']: - setattr(model, k, getattr(model[-1], k)) - return model # return ensemble - - -def gdrive_download(id='1n_oKgR81BJtqk75b00eAjdv03qVCQn2f', name='coco128.zip'): - # Downloads a file from Google Drive. from utils.google_utils import *; gdrive_download() - t = time.time() - - print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') - os.remove(name) if os.path.exists(name) else None # remove existing - os.remove('cookie') if os.path.exists('cookie') else None - - # Attempt file download - out = "NUL" if platform.system() == "Windows" else "/dev/null" - os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out)) - if os.path.exists('cookie'): # large file - s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name) - else: # small file - s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id) - r = os.system(s) # execute, capture return - os.remove('cookie') if os.path.exists('cookie') else None - - # Error check - if r != 0: - os.remove(name) if os.path.exists(name) else None # remove partial - print('Download error ') # raise Exception('Download error') - return r - - # Unzip if archive - if name.endswith('.zip'): - print('unzipping... ', end='') - os.system('unzip -q %s' % name) # unzip - os.remove(name) # remove zip to free space - - print('Done (%.1fs)' % (time.time() - t)) - return r - - -def get_token(cookie="./cookie"): - with open(cookie) as f: - for line in f: - if "download" in line: - return line.split()[-1] - return "" - -# def upload_blob(bucket_name, source_file_name, destination_blob_name): -# # Uploads a file to a bucket -# # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python -# -# storage_client = storage.Client() -# bucket = storage_client.get_bucket(bucket_name) -# blob = bucket.blob(destination_blob_name) -# -# blob.upload_from_filename(source_file_name) -# -# print('File {} uploaded to {}.'.format( -# source_file_name, -# destination_blob_name)) -# -# -# def download_blob(bucket_name, source_blob_name, destination_file_name): -# # Uploads a blob from a bucket -# storage_client = storage.Client() -# bucket = storage_client.get_bucket(bucket_name) -# blob = bucket.blob(source_blob_name) -# -# blob.download_to_filename(destination_file_name) -# -# print('Blob {} downloaded to {}.'.format( -# source_blob_name, -# destination_file_name)) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Google utils: https://cloud.google.com/storage/docs/reference/libraries + +import os +import platform +import subprocess +import time +from pathlib import Path + +import torch + + +def gsutil_getsize(url=''): + # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du + s = subprocess.check_output('gsutil du %s' % url, shell=True).decode('utf-8') + return eval(s.split(' ')[0]) if len(s) else 0 # bytes + + +def attempt_download(weights): + # Attempt to download pretrained weights if not found locally + weights = weights.strip().replace("'", '') + file = Path(weights).name + + msg = weights + ' missing, try downloading from https://github.com/WongKinYiu/yolor/releases/' + models = ['yolor_p6.pt', 'yolor_w6.pt'] # available models + + if file in models and not os.path.isfile(weights): + + try: # GitHub + url = 'https://github.com/WongKinYiu/yolor/releases/download/v1.0/' + file + print('Downloading %s to %s...' % (url, weights)) + torch.hub.download_url_to_file(url, weights) + assert os.path.exists(weights) and os.path.getsize(weights) > 1E6 # check + except Exception as e: # GCP + print('ERROR: Download failure.') + print('') + + +def attempt_load(weights, map_location=None): + # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a + model = Ensemble() + for w in weights if isinstance(weights, list) else [weights]: + attempt_download(w) + model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model + + if len(model) == 1: + return model[-1] # return model + else: + print('Ensemble created with %s\n' % weights) + for k in ['names', 'stride']: + setattr(model, k, getattr(model[-1], k)) + return model # return ensemble + + +def gdrive_download(id='1n_oKgR81BJtqk75b00eAjdv03qVCQn2f', name='coco128.zip'): + # Downloads a file from Google Drive. from utils.google_utils import *; gdrive_download() + t = time.time() + + print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') + os.remove(name) if os.path.exists(name) else None # remove existing + os.remove('cookie') if os.path.exists('cookie') else None + + # Attempt file download + out = "NUL" if platform.system() == "Windows" else "/dev/null" + os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out)) + if os.path.exists('cookie'): # large file + s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name) + else: # small file + s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id) + r = os.system(s) # execute, capture return + os.remove('cookie') if os.path.exists('cookie') else None + + # Error check + if r != 0: + os.remove(name) if os.path.exists(name) else None # remove partial + print('Download error ') # raise Exception('Download error') + return r + + # Unzip if archive + if name.endswith('.zip'): + print('unzipping... ', end='') + os.system('unzip -q %s' % name) # unzip + os.remove(name) # remove zip to free space + + print('Done (%.1fs)' % (time.time() - t)) + return r + + +def get_token(cookie="./cookie"): + with open(cookie) as f: + for line in f: + if "download" in line: + return line.split()[-1] + return "" + +# def upload_blob(bucket_name, source_file_name, destination_blob_name): +# # Uploads a file to a bucket +# # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python +# +# storage_client = storage.Client() +# bucket = storage_client.get_bucket(bucket_name) +# blob = bucket.blob(destination_blob_name) +# +# blob.upload_from_filename(source_file_name) +# +# print('File {} uploaded to {}.'.format( +# source_file_name, +# destination_blob_name)) +# +# +# def download_blob(bucket_name, source_blob_name, destination_file_name): +# # Uploads a blob from a bucket +# storage_client = storage.Client() +# bucket = storage_client.get_bucket(bucket_name) +# blob = bucket.blob(source_blob_name) +# +# blob.download_to_filename(destination_file_name) +# +# print('Blob {} downloaded to {}.'.format( +# source_blob_name, +# destination_file_name)) diff --git a/PyTorch/contrib/cv/detection/YOLOR/utils/layers.py b/PyTorch/contrib/cv/detection/YOLOR/utils/layers.py index a0dfadae460ca7134e07c0f6aceb3828fad0d8ac..de842fc4aa349c56fec29adcbf4d29cb0d451266 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/utils/layers.py +++ b/PyTorch/contrib/cv/detection/YOLOR/utils/layers.py @@ -1,548 +1,548 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch.nn.functional as F - -from utils.general import * - -import torch -from torch import nn - -try: - from mish_cuda import MishCuda as Mish - -except: - class Mish(nn.Module): # https://github.com/digantamisra98/Mish - def forward(self, x): - return x * F.softplus(x).tanh() - -try: - from pytorch_wavelets import DWTForward, DWTInverse - - class DWT(nn.Module): - def __init__(self): - super(DWT, self).__init__() - self.xfm = DWTForward(J=1, wave='db1', mode='zero') - - def forward(self, x): - b,c,w,h = x.shape - yl, yh = self.xfm(x) - return torch.cat([yl/2., yh[0].view(b,-1,w//2,h//2)/2.+.5], 1) - -except: # using Reorg instead - class DWT(nn.Module): - def forward(self, x): - return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1) - - -class Reorg(nn.Module): - def forward(self, x): - return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1) - - -def make_divisible(v, divisor): - # Function ensures all layers have a channel number that is divisible by 8 - # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py - return math.ceil(v / divisor) * divisor - - -class Flatten(nn.Module): - # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions - def forward(self, x): - return x.view(x.size(0), -1) - - -class Concat(nn.Module): - # Concatenate a list of tensors along dimension - def __init__(self, dimension=1): - super(Concat, self).__init__() - self.d = dimension - - def forward(self, x): - return torch.cat(x, self.d) - - -class FeatureConcat(nn.Module): - def __init__(self, layers): - super(FeatureConcat, self).__init__() - self.layers = layers # layer indices - self.multiple = len(layers) > 1 # multiple layers flag - - def forward(self, x, outputs): - return torch.cat([outputs[i] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]] - - -class FeatureConcat2(nn.Module): - def __init__(self, layers): - super(FeatureConcat2, self).__init__() - self.layers = layers # layer indices - self.multiple = len(layers) > 1 # multiple layers flag - - def forward(self, x, outputs): - return torch.cat([outputs[self.layers[0]], outputs[self.layers[1]].detach()], 1) - - -class FeatureConcat3(nn.Module): - def __init__(self, layers): - super(FeatureConcat3, self).__init__() - self.layers = layers # layer indices - self.multiple = len(layers) > 1 # multiple layers flag - - def forward(self, x, outputs): - return torch.cat([outputs[self.layers[0]], outputs[self.layers[1]].detach(), outputs[self.layers[2]].detach()], 1) - - -class FeatureConcat_l(nn.Module): - def __init__(self, layers): - super(FeatureConcat_l, self).__init__() - self.layers = layers # layer indices - self.multiple = len(layers) > 1 # multiple layers flag - - def forward(self, x, outputs): - return torch.cat([outputs[i][:,:outputs[i].shape[1]//2,:,:] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]][:,:outputs[self.layers[0]].shape[1]//2,:,:] - - -class WeightedFeatureFusion(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 - def __init__(self, layers, weight=False): - super(WeightedFeatureFusion, self).__init__() - self.layers = layers # layer indices - self.weight = weight # apply weights boolean - self.n = len(layers) + 1 # number of layers - if weight: - self.w = nn.Parameter(torch.zeros(self.n), requires_grad=True) # layer weights - - def forward(self, x, outputs): - # Weights - if self.weight: - w = torch.sigmoid(self.w) * (2 / self.n) # sigmoid weights (0-1) - x = x * w[0] - - # Fusion - nx = x.shape[1] # input channels - for i in range(self.n - 1): - a = outputs[self.layers[i]] * w[i + 1] if self.weight else outputs[self.layers[i]] # feature to add - na = a.shape[1] # feature channels - - # Adjust channels - if nx == na: # same shape - x = x + a - elif nx > na: # slice input - x[:, :na] = x[:, :na] + a # or a = nn.ZeroPad2d((0, 0, 0, 0, 0, dc))(a); x = x + a - else: # slice feature - x = x + a[:, :nx] - - return x - - -class MixConv2d(nn.Module): # MixConv: Mixed Depthwise Convolutional Kernels https://arxiv.org/abs/1907.09595 - def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'): - super(MixConv2d, self).__init__() - - groups = len(k) - if method == 'equal_ch': # equal channels per group - i = torch.linspace(0, groups - 1E-6, out_ch).floor() # out_ch indices - ch = [(i == g).sum() for g in range(groups)] - else: # 'equal_params': equal parameter count per group - b = [out_ch] + [0] * groups - a = np.eye(groups + 1, groups, k=-1) - a -= np.roll(a, 1, axis=1) - a *= np.array(k) ** 2 - a[0] = 1 - ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int) # solve for equal weight indices, ax = b - - self.m = nn.ModuleList([nn.Conv2d(in_channels=in_ch, - out_channels=ch[g], - kernel_size=k[g], - stride=stride, - padding=k[g] // 2, # 'same' pad - dilation=dilation, - bias=bias) for g in range(groups)]) - - def forward(self, x): - return torch.cat([m(x) for m in self.m], 1) - - -# Activation functions below ------------------------------------------------------------------------------------------- -class SwishImplementation(torch.autograd.Function): - @staticmethod - def forward(ctx, x): - ctx.save_for_backward(x) - return x * torch.sigmoid(x) - - @staticmethod - def backward(ctx, grad_output): - x = ctx.saved_tensors[0] - sx = torch.sigmoid(x) # sigmoid(ctx) - return grad_output * (sx * (1 + x * (1 - sx))) - - -class MishImplementation(torch.autograd.Function): - @staticmethod - def forward(ctx, x): - ctx.save_for_backward(x) - return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) - - @staticmethod - def backward(ctx, grad_output): - x = ctx.saved_tensors[0] - sx = torch.sigmoid(x) - fx = F.softplus(x).tanh() - return grad_output * (fx + x * sx * (1 - fx * fx)) - - -class MemoryEfficientSwish(nn.Module): - def forward(self, x): - return SwishImplementation.apply(x) - - -class MemoryEfficientMish(nn.Module): - def forward(self, x): - return MishImplementation.apply(x) - - -class Swish(nn.Module): - def forward(self, x): - return x * torch.sigmoid(x) - - -class HardSwish(nn.Module): # https://arxiv.org/pdf/1905.02244.pdf - def forward(self, x): - return x * F.hardtanh(x + 3, 0., 6., True) / 6. - - -class DeformConv2d(nn.Module): - def __init__(self, inc, outc, kernel_size=3, padding=1, stride=1, bias=None, modulation=False): - """ - Args: - modulation (bool, optional): If True, Modulated Defomable Convolution (Deformable ConvNets v2). - """ - super(DeformConv2d, self).__init__() - self.kernel_size = kernel_size - self.padding = padding - self.stride = stride - self.zero_padding = nn.ZeroPad2d(padding) - self.conv = nn.Conv2d(inc, outc, kernel_size=kernel_size, stride=kernel_size, bias=bias) - - self.p_conv = nn.Conv2d(inc, 2*kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride) - nn.init.constant_(self.p_conv.weight, 0) - self.p_conv.register_backward_hook(self._set_lr) - - self.modulation = modulation - if modulation: - self.m_conv = nn.Conv2d(inc, kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride) - nn.init.constant_(self.m_conv.weight, 0) - self.m_conv.register_backward_hook(self._set_lr) - - @staticmethod - def _set_lr(module, grad_input, grad_output): - grad_input = (grad_input[i] * 0.1 for i in range(len(grad_input))) - grad_output = (grad_output[i] * 0.1 for i in range(len(grad_output))) - - def forward(self, x): - offset = self.p_conv(x) - if self.modulation: - m = torch.sigmoid(self.m_conv(x)) - - dtype = offset.data.type() - ks = self.kernel_size - N = offset.size(1) // 2 - - if self.padding: - x = self.zero_padding(x) - - # (b, 2N, h, w) - p = self._get_p(offset, dtype) - - # (b, h, w, 2N) - p = p.contiguous().permute(0, 2, 3, 1) - q_lt = p.detach().floor() - q_rb = q_lt + 1 - - q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2)-1), torch.clamp(q_lt[..., N:], 0, x.size(3)-1)], dim=-1).long() - q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2)-1), torch.clamp(q_rb[..., N:], 0, x.size(3)-1)], dim=-1).long() - q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], dim=-1) - q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], dim=-1) - - # clip p - p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2)-1), torch.clamp(p[..., N:], 0, x.size(3)-1)], dim=-1) - - # bilinear kernel (b, h, w, N) - g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:])) - g_rb = (1 - (q_rb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_rb[..., N:].type_as(p) - p[..., N:])) - g_lb = (1 + (q_lb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_lb[..., N:].type_as(p) - p[..., N:])) - g_rt = (1 - (q_rt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_rt[..., N:].type_as(p) - p[..., N:])) - - # (b, c, h, w, N) - x_q_lt = self._get_x_q(x, q_lt, N) - x_q_rb = self._get_x_q(x, q_rb, N) - x_q_lb = self._get_x_q(x, q_lb, N) - x_q_rt = self._get_x_q(x, q_rt, N) - - # (b, c, h, w, N) - x_offset = g_lt.unsqueeze(dim=1) * x_q_lt + \ - g_rb.unsqueeze(dim=1) * x_q_rb + \ - g_lb.unsqueeze(dim=1) * x_q_lb + \ - g_rt.unsqueeze(dim=1) * x_q_rt - - # modulation - if self.modulation: - m = m.contiguous().permute(0, 2, 3, 1) - m = m.unsqueeze(dim=1) - m = torch.cat([m for _ in range(x_offset.size(1))], dim=1) - x_offset *= m - - x_offset = self._reshape_x_offset(x_offset, ks) - out = self.conv(x_offset) - - return out - - def _get_p_n(self, N, dtype): - p_n_x, p_n_y = torch.meshgrid( - torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1), - torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1)) - # (2N, 1) - p_n = torch.cat([torch.flatten(p_n_x), torch.flatten(p_n_y)], 0) - p_n = p_n.view(1, 2*N, 1, 1).type(dtype) - - return p_n - - def _get_p_0(self, h, w, N, dtype): - p_0_x, p_0_y = torch.meshgrid( - torch.arange(1, h*self.stride+1, self.stride), - torch.arange(1, w*self.stride+1, self.stride)) - p_0_x = torch.flatten(p_0_x).view(1, 1, h, w).repeat(1, N, 1, 1) - p_0_y = torch.flatten(p_0_y).view(1, 1, h, w).repeat(1, N, 1, 1) - p_0 = torch.cat([p_0_x, p_0_y], 1).type(dtype) - - return p_0 - - def _get_p(self, offset, dtype): - N, h, w = offset.size(1)//2, offset.size(2), offset.size(3) - - # (1, 2N, 1, 1) - p_n = self._get_p_n(N, dtype) - # (1, 2N, h, w) - p_0 = self._get_p_0(h, w, N, dtype) - p = p_0 + p_n + offset - return p - - def _get_x_q(self, x, q, N): - b, h, w, _ = q.size() - padded_w = x.size(3) - c = x.size(1) - # (b, c, h*w) - x = x.contiguous().view(b, c, -1) - - # (b, h, w, N) - index = q[..., :N]*padded_w + q[..., N:] # offset_x*w + offset_y - # (b, c, h*w*N) - index = index.contiguous().unsqueeze(dim=1).expand(-1, c, -1, -1, -1).contiguous().view(b, c, -1) - - x_offset = x.gather(dim=-1, index=index).contiguous().view(b, c, h, w, N) - - return x_offset - - @staticmethod - def _reshape_x_offset(x_offset, ks): - b, c, h, w, N = x_offset.size() - x_offset = torch.cat([x_offset[..., s:s+ks].contiguous().view(b, c, h, w*ks) for s in range(0, N, ks)], dim=-1) - x_offset = x_offset.contiguous().view(b, c, h*ks, w*ks) - - return x_offset - - -class GAP(nn.Module): - def __init__(self): - super(GAP, self).__init__() - self.avg_pool = nn.AdaptiveAvgPool2d(1) - def forward(self, x): - #b, c, _, _ = x.size() - return self.avg_pool(x)#.view(b, c) - - -class Silence(nn.Module): - def __init__(self): - super(Silence, self).__init__() - def forward(self, x): - return x - - -class ScaleChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 - def __init__(self, layers): - super(ScaleChannel, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]] - return x.expand_as(a) * a - - -class ShiftChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 - def __init__(self, layers): - super(ShiftChannel, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]] - return a.expand_as(x) + x - - -class ShiftChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 - def __init__(self, layers): - super(ShiftChannel2D, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]].view(1,-1,1,1) - return a.expand_as(x) + x - - -class ControlChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 - def __init__(self, layers): - super(ControlChannel, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]] - return a.expand_as(x) * x - - -class ControlChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 - def __init__(self, layers): - super(ControlChannel2D, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]].view(1,-1,1,1) - return a.expand_as(x) * x - - -class AlternateChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 - def __init__(self, layers): - super(AlternateChannel, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]] - return torch.cat([a.expand_as(x), x], dim=1) - - -class AlternateChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 - def __init__(self, layers): - super(AlternateChannel2D, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]].view(1,-1,1,1) - return torch.cat([a.expand_as(x), x], dim=1) - - -class SelectChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 - def __init__(self, layers): - super(SelectChannel, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]] - return a.sigmoid().expand_as(x) * x - - -class SelectChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 - def __init__(self, layers): - super(SelectChannel2D, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]].view(1,-1,1,1) - return a.sigmoid().expand_as(x) * x - - -class ScaleSpatial(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 - def __init__(self, layers): - super(ScaleSpatial, self).__init__() - self.layers = layers # layer indices - - def forward(self, x, outputs): - a = outputs[self.layers[0]] - return x * a - - -class ImplicitA(nn.Module): - def __init__(self, channel): - super(ImplicitA, self).__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) - nn.init.normal_(self.implicit, std=.02) - - def forward(self): - return self.implicit - - -class ImplicitC(nn.Module): - def __init__(self, channel): - super(ImplicitC, self).__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) - nn.init.normal_(self.implicit, std=.02) - - def forward(self): - return self.implicit - - -class ImplicitM(nn.Module): - def __init__(self, channel): - super(ImplicitM, self).__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1)) - nn.init.normal_(self.implicit, mean=1., std=.02) - - def forward(self): - return self.implicit - - - -class Implicit2DA(nn.Module): - def __init__(self, atom, channel): - super(Implicit2DA, self).__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.zeros(1, atom, channel, 1)) - nn.init.normal_(self.implicit, std=.02) - - def forward(self): - return self.implicit - - -class Implicit2DC(nn.Module): - def __init__(self, atom, channel): - super(Implicit2DC, self).__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.zeros(1, atom, channel, 1)) - nn.init.normal_(self.implicit, std=.02) - - def forward(self): - return self.implicit - - -class Implicit2DM(nn.Module): - def __init__(self, atom, channel): - super(Implicit2DM, self).__init__() - self.channel = channel - self.implicit = nn.Parameter(torch.ones(1, atom, channel, 1)) - nn.init.normal_(self.implicit, mean=1., std=.02) - - def forward(self): - return self.implicit - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch.nn.functional as F + +from utils.general import * + +import torch +from torch import nn + +try: + from mish_cuda import MishCuda as Mish + +except: + class Mish(nn.Module): # https://github.com/digantamisra98/Mish + def forward(self, x): + return x * F.softplus(x).tanh() + +try: + from pytorch_wavelets import DWTForward, DWTInverse + + class DWT(nn.Module): + def __init__(self): + super(DWT, self).__init__() + self.xfm = DWTForward(J=1, wave='db1', mode='zero') + + def forward(self, x): + b,c,w,h = x.shape + yl, yh = self.xfm(x) + return torch.cat([yl/2., yh[0].view(b,-1,w//2,h//2)/2.+.5], 1) + +except: # using Reorg instead + class DWT(nn.Module): + def forward(self, x): + return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1) + + +class Reorg(nn.Module): + def forward(self, x): + return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1) + + +def make_divisible(v, divisor): + # Function ensures all layers have a channel number that is divisible by 8 + # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + return math.ceil(v / divisor) * divisor + + +class Flatten(nn.Module): + # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions + def forward(self, x): + return x.view(x.size(0), -1) + + +class Concat(nn.Module): + # Concatenate a list of tensors along dimension + def __init__(self, dimension=1): + super(Concat, self).__init__() + self.d = dimension + + def forward(self, x): + return torch.cat(x, self.d) + + +class FeatureConcat(nn.Module): + def __init__(self, layers): + super(FeatureConcat, self).__init__() + self.layers = layers # layer indices + self.multiple = len(layers) > 1 # multiple layers flag + + def forward(self, x, outputs): + return torch.cat([outputs[i] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]] + + +class FeatureConcat2(nn.Module): + def __init__(self, layers): + super(FeatureConcat2, self).__init__() + self.layers = layers # layer indices + self.multiple = len(layers) > 1 # multiple layers flag + + def forward(self, x, outputs): + return torch.cat([outputs[self.layers[0]], outputs[self.layers[1]].detach()], 1) + + +class FeatureConcat3(nn.Module): + def __init__(self, layers): + super(FeatureConcat3, self).__init__() + self.layers = layers # layer indices + self.multiple = len(layers) > 1 # multiple layers flag + + def forward(self, x, outputs): + return torch.cat([outputs[self.layers[0]], outputs[self.layers[1]].detach(), outputs[self.layers[2]].detach()], 1) + + +class FeatureConcat_l(nn.Module): + def __init__(self, layers): + super(FeatureConcat_l, self).__init__() + self.layers = layers # layer indices + self.multiple = len(layers) > 1 # multiple layers flag + + def forward(self, x, outputs): + return torch.cat([outputs[i][:,:outputs[i].shape[1]//2,:,:] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]][:,:outputs[self.layers[0]].shape[1]//2,:,:] + + +class WeightedFeatureFusion(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers, weight=False): + super(WeightedFeatureFusion, self).__init__() + self.layers = layers # layer indices + self.weight = weight # apply weights boolean + self.n = len(layers) + 1 # number of layers + if weight: + self.w = nn.Parameter(torch.zeros(self.n), requires_grad=True) # layer weights + + def forward(self, x, outputs): + # Weights + if self.weight: + w = torch.sigmoid(self.w) * (2 / self.n) # sigmoid weights (0-1) + x = x * w[0] + + # Fusion + nx = x.shape[1] # input channels + for i in range(self.n - 1): + a = outputs[self.layers[i]] * w[i + 1] if self.weight else outputs[self.layers[i]] # feature to add + na = a.shape[1] # feature channels + + # Adjust channels + if nx == na: # same shape + x = x + a + elif nx > na: # slice input + x[:, :na] = x[:, :na] + a # or a = nn.ZeroPad2d((0, 0, 0, 0, 0, dc))(a); x = x + a + else: # slice feature + x = x + a[:, :nx] + + return x + + +class MixConv2d(nn.Module): # MixConv: Mixed Depthwise Convolutional Kernels https://arxiv.org/abs/1907.09595 + def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'): + super(MixConv2d, self).__init__() + + groups = len(k) + if method == 'equal_ch': # equal channels per group + i = torch.linspace(0, groups - 1E-6, out_ch).floor() # out_ch indices + ch = [(i == g).sum() for g in range(groups)] + else: # 'equal_params': equal parameter count per group + b = [out_ch] + [0] * groups + a = np.eye(groups + 1, groups, k=-1) + a -= np.roll(a, 1, axis=1) + a *= np.array(k) ** 2 + a[0] = 1 + ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int) # solve for equal weight indices, ax = b + + self.m = nn.ModuleList([nn.Conv2d(in_channels=in_ch, + out_channels=ch[g], + kernel_size=k[g], + stride=stride, + padding=k[g] // 2, # 'same' pad + dilation=dilation, + bias=bias) for g in range(groups)]) + + def forward(self, x): + return torch.cat([m(x) for m in self.m], 1) + + +# Activation functions below ------------------------------------------------------------------------------------------- +class SwishImplementation(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return x * torch.sigmoid(x) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + sx = torch.sigmoid(x) # sigmoid(ctx) + return grad_output * (sx * (1 + x * (1 - sx))) + + +class MishImplementation(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + sx = torch.sigmoid(x) + fx = F.softplus(x).tanh() + return grad_output * (fx + x * sx * (1 - fx * fx)) + + +class MemoryEfficientSwish(nn.Module): + def forward(self, x): + return SwishImplementation.apply(x) + + +class MemoryEfficientMish(nn.Module): + def forward(self, x): + return MishImplementation.apply(x) + + +class Swish(nn.Module): + def forward(self, x): + return x * torch.sigmoid(x) + + +class HardSwish(nn.Module): # https://arxiv.org/pdf/1905.02244.pdf + def forward(self, x): + return x * F.hardtanh(x + 3, 0., 6., True) / 6. + + +class DeformConv2d(nn.Module): + def __init__(self, inc, outc, kernel_size=3, padding=1, stride=1, bias=None, modulation=False): + """ + Args: + modulation (bool, optional): If True, Modulated Defomable Convolution (Deformable ConvNets v2). + """ + super(DeformConv2d, self).__init__() + self.kernel_size = kernel_size + self.padding = padding + self.stride = stride + self.zero_padding = nn.ZeroPad2d(padding) + self.conv = nn.Conv2d(inc, outc, kernel_size=kernel_size, stride=kernel_size, bias=bias) + + self.p_conv = nn.Conv2d(inc, 2*kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride) + nn.init.constant_(self.p_conv.weight, 0) + self.p_conv.register_backward_hook(self._set_lr) + + self.modulation = modulation + if modulation: + self.m_conv = nn.Conv2d(inc, kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride) + nn.init.constant_(self.m_conv.weight, 0) + self.m_conv.register_backward_hook(self._set_lr) + + @staticmethod + def _set_lr(module, grad_input, grad_output): + grad_input = (grad_input[i] * 0.1 for i in range(len(grad_input))) + grad_output = (grad_output[i] * 0.1 for i in range(len(grad_output))) + + def forward(self, x): + offset = self.p_conv(x) + if self.modulation: + m = torch.sigmoid(self.m_conv(x)) + + dtype = offset.data.type() + ks = self.kernel_size + N = offset.size(1) // 2 + + if self.padding: + x = self.zero_padding(x) + + # (b, 2N, h, w) + p = self._get_p(offset, dtype) + + # (b, h, w, 2N) + p = p.contiguous().permute(0, 2, 3, 1) + q_lt = p.detach().floor() + q_rb = q_lt + 1 + + q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2)-1), torch.clamp(q_lt[..., N:], 0, x.size(3)-1)], dim=-1).long() + q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2)-1), torch.clamp(q_rb[..., N:], 0, x.size(3)-1)], dim=-1).long() + q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], dim=-1) + q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], dim=-1) + + # clip p + p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2)-1), torch.clamp(p[..., N:], 0, x.size(3)-1)], dim=-1) + + # bilinear kernel (b, h, w, N) + g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:])) + g_rb = (1 - (q_rb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_rb[..., N:].type_as(p) - p[..., N:])) + g_lb = (1 + (q_lb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_lb[..., N:].type_as(p) - p[..., N:])) + g_rt = (1 - (q_rt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_rt[..., N:].type_as(p) - p[..., N:])) + + # (b, c, h, w, N) + x_q_lt = self._get_x_q(x, q_lt, N) + x_q_rb = self._get_x_q(x, q_rb, N) + x_q_lb = self._get_x_q(x, q_lb, N) + x_q_rt = self._get_x_q(x, q_rt, N) + + # (b, c, h, w, N) + x_offset = g_lt.unsqueeze(dim=1) * x_q_lt + \ + g_rb.unsqueeze(dim=1) * x_q_rb + \ + g_lb.unsqueeze(dim=1) * x_q_lb + \ + g_rt.unsqueeze(dim=1) * x_q_rt + + # modulation + if self.modulation: + m = m.contiguous().permute(0, 2, 3, 1) + m = m.unsqueeze(dim=1) + m = torch.cat([m for _ in range(x_offset.size(1))], dim=1) + x_offset *= m + + x_offset = self._reshape_x_offset(x_offset, ks) + out = self.conv(x_offset) + + return out + + def _get_p_n(self, N, dtype): + p_n_x, p_n_y = torch.meshgrid( + torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1), + torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1)) + # (2N, 1) + p_n = torch.cat([torch.flatten(p_n_x), torch.flatten(p_n_y)], 0) + p_n = p_n.view(1, 2*N, 1, 1).type(dtype) + + return p_n + + def _get_p_0(self, h, w, N, dtype): + p_0_x, p_0_y = torch.meshgrid( + torch.arange(1, h*self.stride+1, self.stride), + torch.arange(1, w*self.stride+1, self.stride)) + p_0_x = torch.flatten(p_0_x).view(1, 1, h, w).repeat(1, N, 1, 1) + p_0_y = torch.flatten(p_0_y).view(1, 1, h, w).repeat(1, N, 1, 1) + p_0 = torch.cat([p_0_x, p_0_y], 1).type(dtype) + + return p_0 + + def _get_p(self, offset, dtype): + N, h, w = offset.size(1)//2, offset.size(2), offset.size(3) + + # (1, 2N, 1, 1) + p_n = self._get_p_n(N, dtype) + # (1, 2N, h, w) + p_0 = self._get_p_0(h, w, N, dtype) + p = p_0 + p_n + offset + return p + + def _get_x_q(self, x, q, N): + b, h, w, _ = q.size() + padded_w = x.size(3) + c = x.size(1) + # (b, c, h*w) + x = x.contiguous().view(b, c, -1) + + # (b, h, w, N) + index = q[..., :N]*padded_w + q[..., N:] # offset_x*w + offset_y + # (b, c, h*w*N) + index = index.contiguous().unsqueeze(dim=1).expand(-1, c, -1, -1, -1).contiguous().view(b, c, -1) + + x_offset = x.gather(dim=-1, index=index).contiguous().view(b, c, h, w, N) + + return x_offset + + @staticmethod + def _reshape_x_offset(x_offset, ks): + b, c, h, w, N = x_offset.size() + x_offset = torch.cat([x_offset[..., s:s+ks].contiguous().view(b, c, h, w*ks) for s in range(0, N, ks)], dim=-1) + x_offset = x_offset.contiguous().view(b, c, h*ks, w*ks) + + return x_offset + + +class GAP(nn.Module): + def __init__(self): + super(GAP, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + def forward(self, x): + #b, c, _, _ = x.size() + return self.avg_pool(x)#.view(b, c) + + +class Silence(nn.Module): + def __init__(self): + super(Silence, self).__init__() + def forward(self, x): + return x + + +class ScaleChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(ScaleChannel, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return x.expand_as(a) * a + + +class ShiftChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(ShiftChannel, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return a.expand_as(x) + x + + +class ShiftChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(ShiftChannel2D, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]].view(1,-1,1,1) + return a.expand_as(x) + x + + +class ControlChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(ControlChannel, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return a.expand_as(x) * x + + +class ControlChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(ControlChannel2D, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]].view(1,-1,1,1) + return a.expand_as(x) * x + + +class AlternateChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(AlternateChannel, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return torch.cat([a.expand_as(x), x], dim=1) + + +class AlternateChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(AlternateChannel2D, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]].view(1,-1,1,1) + return torch.cat([a.expand_as(x), x], dim=1) + + +class SelectChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(SelectChannel, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return a.sigmoid().expand_as(x) * x + + +class SelectChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(SelectChannel2D, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]].view(1,-1,1,1) + return a.sigmoid().expand_as(x) * x + + +class ScaleSpatial(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers): + super(ScaleSpatial, self).__init__() + self.layers = layers # layer indices + + def forward(self, x, outputs): + a = outputs[self.layers[0]] + return x * a + + +class ImplicitA(nn.Module): + def __init__(self, channel): + super(ImplicitA, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) + nn.init.normal_(self.implicit, std=.02) + + def forward(self): + return self.implicit + + +class ImplicitC(nn.Module): + def __init__(self, channel): + super(ImplicitC, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) + nn.init.normal_(self.implicit, std=.02) + + def forward(self): + return self.implicit + + +class ImplicitM(nn.Module): + def __init__(self, channel): + super(ImplicitM, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1)) + nn.init.normal_(self.implicit, mean=1., std=.02) + + def forward(self): + return self.implicit + + + +class Implicit2DA(nn.Module): + def __init__(self, atom, channel): + super(Implicit2DA, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.zeros(1, atom, channel, 1)) + nn.init.normal_(self.implicit, std=.02) + + def forward(self): + return self.implicit + + +class Implicit2DC(nn.Module): + def __init__(self, atom, channel): + super(Implicit2DC, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.zeros(1, atom, channel, 1)) + nn.init.normal_(self.implicit, std=.02) + + def forward(self): + return self.implicit + + +class Implicit2DM(nn.Module): + def __init__(self, atom, channel): + super(Implicit2DM, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.ones(1, atom, channel, 1)) + nn.init.normal_(self.implicit, mean=1., std=.02) + + def forward(self): + return self.implicit + + \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/YOLOR/utils/loss.py b/PyTorch/contrib/cv/detection/YOLOR/utils/loss.py index 9d2d3db08723ecf26ccc3f80675018a3ae487a3f..b3f75fb94de682168b1b728f398470860f7be04a 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/utils/loss.py +++ b/PyTorch/contrib/cv/detection/YOLOR/utils/loss.py @@ -1,356 +1,356 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Loss functions - -import torch -import torch.nn as nn - -from utils.general import bbox_iou -from utils.torch_utils import is_parallel - - -def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 - # return positive, negative label smoothing BCE targets - return 1.0 - 0.5 * eps, 0.5 * eps - - -class BCEBlurWithLogitsLoss(nn.Module): - # BCEwithLogitLoss() with reduced missing label effects. - def __init__(self, alpha=0.05): - super(BCEBlurWithLogitsLoss, self).__init__() - self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss() - self.alpha = alpha - - def forward(self, pred, true): - loss = self.loss_fcn(pred, true) - pred = torch.sigmoid(pred) # prob from logits - dx = pred - true # reduce only missing label effects - # dx = (pred - true).abs() # reduce missing label and false label effects - alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4)) - loss *= alpha_factor - return loss.mean() - -class FocalLoss(nn.Module): - # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) - def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): - super(FocalLoss, self).__init__() - self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() - self.gamma = gamma - self.alpha = alpha - self.reduction = loss_fcn.reduction - self.loss_fcn.reduction = 'none' # required to apply FL to each element - - def forward(self, pred, true): - loss = self.loss_fcn(pred, true) - # p_t = torch.exp(-loss) - # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability - - # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py - pred_prob = torch.sigmoid(pred) # prob from logits - p_t = true * pred_prob + (1 - true) * (1 - pred_prob) - alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) - modulating_factor = (1.0 - p_t) ** self.gamma - loss *= alpha_factor * modulating_factor - - if self.reduction == 'mean': - return loss.mean() - elif self.reduction == 'sum': - return loss.sum() - else: # 'none' - return loss - - -class DeterministicIndex(torch.autograd.Function): - @staticmethod - def forward(ctx, x, indices_list): - ctx.x = x - ctx.indices_list = indices_list - return x[indices_list[0], indices_list[1], :, indices_list[2], indices_list[3]] - - @staticmethod - def backward(ctx, grad_output): - tmp = torch.zeros_like(ctx.x) - ind0, ind1, ind2, ind3 = ctx.indices_list - tmp[ind0, ind1, :, ind2, ind3] = grad_output - return tmp, None - - -# @torchsnooper.snoop(output='/data/wyh/yolor/yolorDebug_1P640.txt') -def compute_loss(p, targets, model): # predictions, targets, model - device = targets.device - - targets = targets.T - for i in range(len(p)): - p[i] = p[i].permute(0, 1, 4, 2, 3) #(6, 3, 80, 80, 85)->(6, 3, 85, 80, 80) - - ft = torch.cuda.FloatTensor if p[0].is_cuda else torch.Tensor - lcls, lbox, lobj = ft([0]).to(device), ft([0]).to(device), ft([0]).to(device) - tcls, tbox, indices, anchors, targets_mask, targets_sum_mask = build_targets(p, targets, model) # targets - h = model.hyp # hyperparameters - - # Define criteria - BCEcls = nn.BCEWithLogitsLoss(pos_weight=ft([h['cls_pw']]), reduction='sum').to(device) - BCEobj = nn.BCEWithLogitsLoss(pos_weight=ft([h['obj_pw']]), reduction='mean').to(device) - - # class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 - cp, cn = smooth_BCE(eps=0.0) - - # Focal loss - g = h['fl_gamma'] # focal loss gamma - if g > 0: - BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) - - # per output - nt = 0 # number of targets - np = len(p) # number of outputs - balance = [4.0, 1.0, 0.4] if np == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6 - balance = [4.0, 1.0, 0.5, 0.4, 0.1] if np == 5 else balance - for i, pi in enumerate(p): # layer index, layer predictions - b, a, gj, gi = indices[i] # image, anchor, gridy, gridx - allmask = targets_mask[i] - sum_mask = targets_sum_mask[i] - # tobj = torch.zeros_like(pi[..., 0], device=device) # target obj - tobj = torch.zeros_like(pi[:, :, 0, :, :]).to(device) # target obj - - nb = b.shape[0] # number of targets - if sum_mask.item() > 0: - nt += nb # cumulative targets - # ps = pi[b, a,:, gj, gi] # prediction subset corresponding to targets - ps = DeterministicIndex.apply(pi, (b, a, gj, gi)).permute(1, 0).contiguous() - # GIoU - pxy = ps.index_select(0, torch.tensor([0, 1], device=device)) - pwh = ps.index_select(0, torch.tensor([2, 3], device=device)) - - pxy = pxy.sigmoid() * 2. - 0.5 - pwh = (pwh.sigmoid() * 2) ** 2 * (anchors[i].T) - pbox = torch.cat((pxy, pwh), 0) # predicted box - giou = bbox_iou(pbox, tbox[i], x1y1x2y2=False, GIoU=True) - giou = giou * (allmask) + (1. - allmask) - lbox += (1.0 - giou).sum() / (sum_mask) # giou loss - # Obj - giou = giou * (allmask) - tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * giou.detach().clamp(0).type(tobj.dtype) # giou ratio - - # Class - if model.nc > 1: # cls loss (only if multiple classes) - tmp = ps[5:, :] - tmp = tmp * (allmask) - (1.- allmask) * 50. - t = torch.full_like(tmp, cn).to(device) # targets - range_nb = torch.arange(nb, device=device).long() - t[tcls[i], range_nb] = cp - - t = t * (allmask) - lcls += (BCEcls(tmp, t) / (sum_mask * t.shape[0]).float()) # BCE - - # Append targets to text file - # with open('targets.txt', 'a') as file: - # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] - - # lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss - lobj += BCEobj(pi[:, :, 4, :, :], tobj) * balance[i] # obj loss - - s = 3 / np # output count scaling - lbox *= h['box'] * s - lobj *= h['obj'] * s * (1.4 if np >= 4 else 1.) - lcls *= h['cls'] * s - bs = tobj.shape[0] # batch size - - loss = lbox + lobj + lcls - return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach() - - -def build_targets(p, targets, model): - # Build targets for compute_loss(), input targets(image,class,x,y,w,h) - na, nt, device, batch_size = 3, targets.shape[1], targets.device, p[0].shape[0] - - # align targets in batch size - nt_max = 32 * batch_size - while nt > nt_max: - nt_max *= 2 - print('**************** nt max=', nt_max) - max_target = torch.zeros(6, nt_max, device=device) # (6, nt) - for i in range(6): - try: - max_target[i, :nt] = targets[i, :] - # print('Check------', max_target.shape, max_target.device, device) - # print('Check------', targets.shape, targets.device, device) - except Exception as e: - print(e) - # print('Check------', max_target.shape, max_target.device, device) - # print('Check------', targets.shape, targets.device, device) - - tcls, tbox, indices, anch, targets_mask, targets_sum_mask = [], [], [], [], [], [] - gain = torch.ones(6, device=device) # normalized to gridspace gain - off_list = [ - torch.tensor([[1.], [0.]], device=device), - torch.tensor([[0.], [1.]], device=device), - torch.tensor([[-1.], [0.]], device=device), - torch.tensor([[0.], [-1.]], device=device) - ] - # # create indices with anchor and max_target - # # anchor tensor, same as .repeat_interleave(nt) (x, 3) - at = torch.arange(na).view(na, 1).repeat(1, nt_max) - a = at.view(-1) - a = torch.cat((a, a, a, a, a), 0) - - g = 0.5 # offset - # multi_gpu = type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) - multi_gpu = is_parallel(model) - for i, j in enumerate(model.module.yolo_layers if multi_gpu else model.yolo_layers): - # get number of grid points and anchor vec for this yolo layer - anchors = model.module.module_list[j].anchor_vec if multi_gpu else model.module_list[j].anchor_vec - # iou of targets-anchors b,a,c,y,x-> b,a,y,x,c - # gain[2:] = torch.tensor(p[i].shape)[[3, 2, 3, 2]].float() # xyxy gain - gain[2:] = torch.tensor(p[i].shape)[[4, 3, 4, 3]].float() # xyxy gain - - # Match targets to anchors - t, offsets = max_target * gain[:, None], 0 - allmask = torch.zeros((na * nt_max)).to(device) - sum_mask = torch.zeros((1)).to(device) - if nt: - r = t[None, 4:6, :] / anchors[..., None] # wh ratio - fmask = torch.max(r, 1. / r).max(1)[0] < model.hyp['anchor_t'] # compare - fmask = fmask.view(1, -1) - # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2)) - t = t.repeat(1, 1, na).view(6, -1) # filter - - # overlaps - gxy = t.index_select(0, torch.tensor([2, 3], device=device)) - z = torch.zeros_like(gxy) - - jk = (gxy % 1. < g) & (gxy > 1.) - lm = (gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]][:, None] - 1.)) - jk, lm = jk&fmask, lm&fmask - allmask = torch.cat((fmask, jk, lm), 0).view(1, -1).float() - t = torch.cat((t, t, t, t, t), 1) - offsets = torch.cat((z, z + off_list[0], z + off_list[1], z + off_list[2], z + off_list[3]), 1) * g - - # print('----------------------------------------------------------------------------------') - # print('a.shape, t.shape:') - # print(a.shape, t.shape) - # print('gxy.shape, offsets.shape') - # print(gxy.shape, offsets.shape) - # print('fmask.shape, allmask.shape, jk, lm:') - # print(fmask.shape, allmask.shape, jk.shape, lm.shape) - # print('----------------------------------------------------------------------------------') - - sum_mask = allmask.sum() - t = t * allmask - - # Define - b = t.index_select(0, torch.tensor([0], device=device)).long().view(-1) #(3072 * 5) - c = t.index_select(0, torch.tensor([1], device=device)).long().view(-1) #(3072 * 5) - gxy = t.index_select(0, torch.tensor([2, 3], device=device)) #(2, 3072 * 5) - gwh = t.index_select(0, torch.tensor([4, 5], device=device)) #(2, 3072 * 5) - gij = gxy - offsets - gij2 = gij.long() - gi = gij2.index_select(0, torch.tensor([0], device=device)).view(-1) #(2, 3072 * 5) - gj = gij2.index_select(0, torch.tensor([1], device=device)).view(-1) #(2, 3072 * 5) - - # Append - # indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices - indices.append((b, a, gj, gi)) # image, anchor, grid indices - tbox.append(torch.cat((gxy - gij2.float(), gwh), 0)) # box - anch.append(anchors[a]) # anchors - tcls.append(c) # class - targets_mask.append(allmask) - targets_sum_mask.append(sum_mask) - - return tcls, tbox, indices, anch, targets_mask, targets_sum_mask - -# def build_targets(p, targets, model): -# nt = targets.shape[0] # number of anchors, targets -# tcls, tbox, indices, anch, targets_mask, targets_sum_mask = [], [], [], [], [], [] -# gain = torch.ones(6, device=targets.device) # normalized to gridspace gain -# off = torch.tensor([[1, 0], [0, 1], [-1, 0], [0, -1]], device=targets.device).float() # overlap offsets - -# # align targets in batch size -# batch_size = p[0].shape[0] -# nt_max = 32 * batch_size -# while nt > nt_max: -# nt_max *= 2 -# print('**************** nt max=', nt_max) -# max_target = torch.zeros(nt_max, 6, device=targets.device) # (nt,6) -# for i in range(6): -# max_target[:nt, i] = targets[:, i] - -# g = 0.5 # offset -# multi_gpu = is_parallel(model) -# for i, jj in enumerate(model.module.yolo_layers if multi_gpu else model.yolo_layers): -# # get number of grid points and anchor vec for this yolo layer -# anchors = model.module.module_list[jj].anchor_vec if multi_gpu else model.module_list[jj].anchor_vec -# gain[2:] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain - -# # Match targets to anchors -# a, t, offsets = [], max_target * gain, 0 - -# if nt: -# na = anchors.shape[0] # number of anchors -# allmask = torch.zeros((na * nt_max)).to(targets.device) -# sum_mask = torch.zeros((1)).to(targets.device) -# at = torch.arange(na).view(na, 1).repeat(1, nt_max) # anchor tensor, same as .repeat_interleave(nt) -# r = t[None, :, 4:6] / anchors[:, None] # wh ratio -# fmask = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] # compare -# # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2)) -# a, t = at[fmask], t.repeat(na, 1, 1)[fmask] # filter - -# print('----------------------------------------------------------------------------------') -# print('a.shape, at.shape, t.shape:') -# print(a.shape, at.shape, t.shape) -# print('----------------------------------------------------------------------------------') - -# # overlaps -# gxy = t[:, 2:4] # grid xy -# z = torch.zeros_like(gxy) -# j, k = ((gxy % 1. < g) & (gxy > 1.)).T -# l, m = ((gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]] - 1.))).T - -# print(a.shape, a[j].shape, a[k].shape, a[l].shape, a[m].shape) -# print(t.shape, t[j].shape, t[k].shape, t[l].shape, t[m].shape) - -# a, t = torch.cat((a, a[j], a[k], a[l], a[m]), 0), torch.cat((t, t[j], t[k], t[l], t[m]), 0) -# offsets = torch.cat((z, z[j] + off[0], z[k] + off[1], z[l] + off[2], z[m] + off[3]), 0) * g - -# allmask = torch.cat((j, k, l, m), 1).float() -# sum_mask = allmask.sum() - -# print('----------------------------------------------------------------------------------') -# print('a.shape, t.shape:') -# print(a.shape, t.shape) -# print('gxy.shape, offsets.shape') -# print(gxy.shape, offsets.shape) -# print('fmask.shape, allmask.shape, j, k, l, m:') -# print(fmask.shape, allmask.shape, j.shape, k.shape, l.shape, m.shape) -# print('----------------------------------------------------------------------------------') - -# t = t * allmask - -# # Define -# b, c = t[:, :2].long().T # image, class -# gxy = t[:, 2:4] # grid xy -# gwh = t[:, 4:6] # grid wh -# gij = (gxy - offsets).long() -# gi, gj = gij.T # grid xy indices - -# # Append -# #indices.append((b, a, gj, gi)) # image, anchor, grid indices -# indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices -# tbox.append(torch.cat((gxy - gij, gwh), 1)) # box -# anch.append(anchors[a]) # anchors -# tcls.append(c) # class -# targets_mask.append(allmask) -# targets_sum_mask.append(sum_mask) - -# return tcls, tbox, indices, anch, targets_mask, targets_sum_mask +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Loss functions + +import torch +import torch.nn as nn + +from utils.general import bbox_iou +from utils.torch_utils import is_parallel + + +def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 + # return positive, negative label smoothing BCE targets + return 1.0 - 0.5 * eps, 0.5 * eps + + +class BCEBlurWithLogitsLoss(nn.Module): + # BCEwithLogitLoss() with reduced missing label effects. + def __init__(self, alpha=0.05): + super(BCEBlurWithLogitsLoss, self).__init__() + self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss() + self.alpha = alpha + + def forward(self, pred, true): + loss = self.loss_fcn(pred, true) + pred = torch.sigmoid(pred) # prob from logits + dx = pred - true # reduce only missing label effects + # dx = (pred - true).abs() # reduce missing label and false label effects + alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4)) + loss *= alpha_factor + return loss.mean() + +class FocalLoss(nn.Module): + # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) + def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): + super(FocalLoss, self).__init__() + self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() + self.gamma = gamma + self.alpha = alpha + self.reduction = loss_fcn.reduction + self.loss_fcn.reduction = 'none' # required to apply FL to each element + + def forward(self, pred, true): + loss = self.loss_fcn(pred, true) + # p_t = torch.exp(-loss) + # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability + + # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py + pred_prob = torch.sigmoid(pred) # prob from logits + p_t = true * pred_prob + (1 - true) * (1 - pred_prob) + alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) + modulating_factor = (1.0 - p_t) ** self.gamma + loss *= alpha_factor * modulating_factor + + if self.reduction == 'mean': + return loss.mean() + elif self.reduction == 'sum': + return loss.sum() + else: # 'none' + return loss + + +class DeterministicIndex(torch.autograd.Function): + @staticmethod + def forward(ctx, x, indices_list): + ctx.x = x + ctx.indices_list = indices_list + return x[indices_list[0], indices_list[1], :, indices_list[2], indices_list[3]] + + @staticmethod + def backward(ctx, grad_output): + tmp = torch.zeros_like(ctx.x) + ind0, ind1, ind2, ind3 = ctx.indices_list + tmp[ind0, ind1, :, ind2, ind3] = grad_output + return tmp, None + + +# @torchsnooper.snoop(output='/data/wyh/yolor/yolorDebug_1P640.txt') +def compute_loss(p, targets, model): # predictions, targets, model + device = targets.device + + targets = targets.T + for i in range(len(p)): + p[i] = p[i].permute(0, 1, 4, 2, 3) #(6, 3, 80, 80, 85)->(6, 3, 85, 80, 80) + + ft = torch.cuda.FloatTensor if p[0].is_cuda else torch.Tensor + lcls, lbox, lobj = ft([0]).to(device), ft([0]).to(device), ft([0]).to(device) + tcls, tbox, indices, anchors, targets_mask, targets_sum_mask = build_targets(p, targets, model) # targets + h = model.hyp # hyperparameters + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=ft([h['cls_pw']]), reduction='sum').to(device) + BCEobj = nn.BCEWithLogitsLoss(pos_weight=ft([h['obj_pw']]), reduction='mean').to(device) + + # class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + cp, cn = smooth_BCE(eps=0.0) + + # Focal loss + g = h['fl_gamma'] # focal loss gamma + if g > 0: + BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) + + # per output + nt = 0 # number of targets + np = len(p) # number of outputs + balance = [4.0, 1.0, 0.4] if np == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6 + balance = [4.0, 1.0, 0.5, 0.4, 0.1] if np == 5 else balance + for i, pi in enumerate(p): # layer index, layer predictions + b, a, gj, gi = indices[i] # image, anchor, gridy, gridx + allmask = targets_mask[i] + sum_mask = targets_sum_mask[i] + # tobj = torch.zeros_like(pi[..., 0], device=device) # target obj + tobj = torch.zeros_like(pi[:, :, 0, :, :]).to(device) # target obj + + nb = b.shape[0] # number of targets + if sum_mask.item() > 0: + nt += nb # cumulative targets + # ps = pi[b, a,:, gj, gi] # prediction subset corresponding to targets + ps = DeterministicIndex.apply(pi, (b, a, gj, gi)).permute(1, 0).contiguous() + # GIoU + pxy = ps.index_select(0, torch.tensor([0, 1], device=device)) + pwh = ps.index_select(0, torch.tensor([2, 3], device=device)) + + pxy = pxy.sigmoid() * 2. - 0.5 + pwh = (pwh.sigmoid() * 2) ** 2 * (anchors[i].T) + pbox = torch.cat((pxy, pwh), 0) # predicted box + giou = bbox_iou(pbox, tbox[i], x1y1x2y2=False, GIoU=True) + giou = giou * (allmask) + (1. - allmask) + lbox += (1.0 - giou).sum() / (sum_mask) # giou loss + # Obj + giou = giou * (allmask) + tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * giou.detach().clamp(0).type(tobj.dtype) # giou ratio + + # Class + if model.nc > 1: # cls loss (only if multiple classes) + tmp = ps[5:, :] + tmp = tmp * (allmask) - (1.- allmask) * 50. + t = torch.full_like(tmp, cn).to(device) # targets + range_nb = torch.arange(nb, device=device).long() + t[tcls[i], range_nb] = cp + + t = t * (allmask) + lcls += (BCEcls(tmp, t) / (sum_mask * t.shape[0]).float()) # BCE + + # Append targets to text file + # with open('targets.txt', 'a') as file: + # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] + + # lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss + lobj += BCEobj(pi[:, :, 4, :, :], tobj) * balance[i] # obj loss + + s = 3 / np # output count scaling + lbox *= h['box'] * s + lobj *= h['obj'] * s * (1.4 if np >= 4 else 1.) + lcls *= h['cls'] * s + bs = tobj.shape[0] # batch size + + loss = lbox + lobj + lcls + return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach() + + +def build_targets(p, targets, model): + # Build targets for compute_loss(), input targets(image,class,x,y,w,h) + na, nt, device, batch_size = 3, targets.shape[1], targets.device, p[0].shape[0] + + # align targets in batch size + nt_max = 32 * batch_size + while nt > nt_max: + nt_max *= 2 + print('**************** nt max=', nt_max) + max_target = torch.zeros(6, nt_max, device=device) # (6, nt) + for i in range(6): + try: + max_target[i, :nt] = targets[i, :] + # print('Check------', max_target.shape, max_target.device, device) + # print('Check------', targets.shape, targets.device, device) + except Exception as e: + print(e) + # print('Check------', max_target.shape, max_target.device, device) + # print('Check------', targets.shape, targets.device, device) + + tcls, tbox, indices, anch, targets_mask, targets_sum_mask = [], [], [], [], [], [] + gain = torch.ones(6, device=device) # normalized to gridspace gain + off_list = [ + torch.tensor([[1.], [0.]], device=device), + torch.tensor([[0.], [1.]], device=device), + torch.tensor([[-1.], [0.]], device=device), + torch.tensor([[0.], [-1.]], device=device) + ] + # # create indices with anchor and max_target + # # anchor tensor, same as .repeat_interleave(nt) (x, 3) + at = torch.arange(na).view(na, 1).repeat(1, nt_max) + a = at.view(-1) + a = torch.cat((a, a, a, a, a), 0) + + g = 0.5 # offset + # multi_gpu = type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) + multi_gpu = is_parallel(model) + for i, j in enumerate(model.module.yolo_layers if multi_gpu else model.yolo_layers): + # get number of grid points and anchor vec for this yolo layer + anchors = model.module.module_list[j].anchor_vec if multi_gpu else model.module_list[j].anchor_vec + # iou of targets-anchors b,a,c,y,x-> b,a,y,x,c + # gain[2:] = torch.tensor(p[i].shape)[[3, 2, 3, 2]].float() # xyxy gain + gain[2:] = torch.tensor(p[i].shape)[[4, 3, 4, 3]].float() # xyxy gain + + # Match targets to anchors + t, offsets = max_target * gain[:, None], 0 + allmask = torch.zeros((na * nt_max)).to(device) + sum_mask = torch.zeros((1)).to(device) + if nt: + r = t[None, 4:6, :] / anchors[..., None] # wh ratio + fmask = torch.max(r, 1. / r).max(1)[0] < model.hyp['anchor_t'] # compare + fmask = fmask.view(1, -1) + # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2)) + t = t.repeat(1, 1, na).view(6, -1) # filter + + # overlaps + gxy = t.index_select(0, torch.tensor([2, 3], device=device)) + z = torch.zeros_like(gxy) + + jk = (gxy % 1. < g) & (gxy > 1.) + lm = (gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]][:, None] - 1.)) + jk, lm = jk&fmask, lm&fmask + allmask = torch.cat((fmask, jk, lm), 0).view(1, -1).float() + t = torch.cat((t, t, t, t, t), 1) + offsets = torch.cat((z, z + off_list[0], z + off_list[1], z + off_list[2], z + off_list[3]), 1) * g + + # print('----------------------------------------------------------------------------------') + # print('a.shape, t.shape:') + # print(a.shape, t.shape) + # print('gxy.shape, offsets.shape') + # print(gxy.shape, offsets.shape) + # print('fmask.shape, allmask.shape, jk, lm:') + # print(fmask.shape, allmask.shape, jk.shape, lm.shape) + # print('----------------------------------------------------------------------------------') + + sum_mask = allmask.sum() + t = t * allmask + + # Define + b = t.index_select(0, torch.tensor([0], device=device)).long().view(-1) #(3072 * 5) + c = t.index_select(0, torch.tensor([1], device=device)).long().view(-1) #(3072 * 5) + gxy = t.index_select(0, torch.tensor([2, 3], device=device)) #(2, 3072 * 5) + gwh = t.index_select(0, torch.tensor([4, 5], device=device)) #(2, 3072 * 5) + gij = gxy - offsets + gij2 = gij.long() + gi = gij2.index_select(0, torch.tensor([0], device=device)).view(-1) #(2, 3072 * 5) + gj = gij2.index_select(0, torch.tensor([1], device=device)).view(-1) #(2, 3072 * 5) + + # Append + # indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices + indices.append((b, a, gj, gi)) # image, anchor, grid indices + tbox.append(torch.cat((gxy - gij2.float(), gwh), 0)) # box + anch.append(anchors[a]) # anchors + tcls.append(c) # class + targets_mask.append(allmask) + targets_sum_mask.append(sum_mask) + + return tcls, tbox, indices, anch, targets_mask, targets_sum_mask + +# def build_targets(p, targets, model): +# nt = targets.shape[0] # number of anchors, targets +# tcls, tbox, indices, anch, targets_mask, targets_sum_mask = [], [], [], [], [], [] +# gain = torch.ones(6, device=targets.device) # normalized to gridspace gain +# off = torch.tensor([[1, 0], [0, 1], [-1, 0], [0, -1]], device=targets.device).float() # overlap offsets + +# # align targets in batch size +# batch_size = p[0].shape[0] +# nt_max = 32 * batch_size +# while nt > nt_max: +# nt_max *= 2 +# print('**************** nt max=', nt_max) +# max_target = torch.zeros(nt_max, 6, device=targets.device) # (nt,6) +# for i in range(6): +# max_target[:nt, i] = targets[:, i] + +# g = 0.5 # offset +# multi_gpu = is_parallel(model) +# for i, jj in enumerate(model.module.yolo_layers if multi_gpu else model.yolo_layers): +# # get number of grid points and anchor vec for this yolo layer +# anchors = model.module.module_list[jj].anchor_vec if multi_gpu else model.module_list[jj].anchor_vec +# gain[2:] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain + +# # Match targets to anchors +# a, t, offsets = [], max_target * gain, 0 + +# if nt: +# na = anchors.shape[0] # number of anchors +# allmask = torch.zeros((na * nt_max)).to(targets.device) +# sum_mask = torch.zeros((1)).to(targets.device) +# at = torch.arange(na).view(na, 1).repeat(1, nt_max) # anchor tensor, same as .repeat_interleave(nt) +# r = t[None, :, 4:6] / anchors[:, None] # wh ratio +# fmask = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] # compare +# # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2)) +# a, t = at[fmask], t.repeat(na, 1, 1)[fmask] # filter + +# print('----------------------------------------------------------------------------------') +# print('a.shape, at.shape, t.shape:') +# print(a.shape, at.shape, t.shape) +# print('----------------------------------------------------------------------------------') + +# # overlaps +# gxy = t[:, 2:4] # grid xy +# z = torch.zeros_like(gxy) +# j, k = ((gxy % 1. < g) & (gxy > 1.)).T +# l, m = ((gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]] - 1.))).T + +# print(a.shape, a[j].shape, a[k].shape, a[l].shape, a[m].shape) +# print(t.shape, t[j].shape, t[k].shape, t[l].shape, t[m].shape) + +# a, t = torch.cat((a, a[j], a[k], a[l], a[m]), 0), torch.cat((t, t[j], t[k], t[l], t[m]), 0) +# offsets = torch.cat((z, z[j] + off[0], z[k] + off[1], z[l] + off[2], z[m] + off[3]), 0) * g + +# allmask = torch.cat((j, k, l, m), 1).float() +# sum_mask = allmask.sum() + +# print('----------------------------------------------------------------------------------') +# print('a.shape, t.shape:') +# print(a.shape, t.shape) +# print('gxy.shape, offsets.shape') +# print(gxy.shape, offsets.shape) +# print('fmask.shape, allmask.shape, j, k, l, m:') +# print(fmask.shape, allmask.shape, j.shape, k.shape, l.shape, m.shape) +# print('----------------------------------------------------------------------------------') + +# t = t * allmask + +# # Define +# b, c = t[:, :2].long().T # image, class +# gxy = t[:, 2:4] # grid xy +# gwh = t[:, 4:6] # grid wh +# gij = (gxy - offsets).long() +# gi, gj = gij.T # grid xy indices + +# # Append +# #indices.append((b, a, gj, gi)) # image, anchor, grid indices +# indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices +# tbox.append(torch.cat((gxy - gij, gwh), 1)) # box +# anch.append(anchors[a]) # anchors +# tcls.append(c) # class +# targets_mask.append(allmask) +# targets_sum_mask.append(sum_mask) + +# return tcls, tbox, indices, anch, targets_mask, targets_sum_mask diff --git a/PyTorch/contrib/cv/detection/YOLOR/utils/metrics.py b/PyTorch/contrib/cv/detection/YOLOR/utils/metrics.py index fc4fa4f77d897f0c93debcad4cb26eb253c632eb..51622cb73d8ea438157f73bce771bfe25c00452f 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/utils/metrics.py +++ b/PyTorch/contrib/cv/detection/YOLOR/utils/metrics.py @@ -1,154 +1,154 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Model validation metrics - -import matplotlib.pyplot as plt -import numpy as np - - -def fitness(x): - # Model fitness as a weighted combination of metrics - w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] - return (x[:, :4] * w).sum(1) - - -def fitness_p(x): - # Model fitness as a weighted combination of metrics - w = [1.0, 0.0, 0.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] - return (x[:, :4] * w).sum(1) - - -def fitness_r(x): - # Model fitness as a weighted combination of metrics - w = [0.0, 1.0, 0.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] - return (x[:, :4] * w).sum(1) - - -def fitness_ap50(x): - # Model fitness as a weighted combination of metrics - w = [0.0, 0.0, 1.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] - return (x[:, :4] * w).sum(1) - - -def fitness_ap(x): - # Model fitness as a weighted combination of metrics - w = [0.0, 0.0, 0.0, 1.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] - return (x[:, :4] * w).sum(1) - - -def fitness_f(x): - # Model fitness as a weighted combination of metrics - #w = [0.0, 0.0, 0.0, 1.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] - return ((x[:, 0]*x[:, 1])/(x[:, 0]+x[:, 1])) - - -def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, fname='precision-recall_curve.png'): - """ Compute the average precision, given the recall and precision curves. - Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. - # Arguments - tp: True positives (nparray, nx1 or nx10). - conf: Objectness value from 0-1 (nparray). - pred_cls: Predicted object classes (nparray). - target_cls: True object classes (nparray). - plot: Plot precision-recall curve at mAP@0.5 - fname: Plot filename - # Returns - The average precision as computed in py-faster-rcnn. - """ - - # Sort by objectness - i = np.argsort(-conf) - tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] - - # Find unique classes - unique_classes = np.unique(target_cls) - - # Create Precision-Recall curve and compute AP for each class - px, py = np.linspace(0, 1, 1000), [] # for plotting - pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898 - s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95) - ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s) - for ci, c in enumerate(unique_classes): - i = pred_cls == c - n_l = (target_cls == c).sum() # number of labels - n_p = i.sum() # number of predictions - - if n_p == 0 or n_l == 0: - continue - else: - # Accumulate FPs and TPs - fpc = (1 - tp[i]).cumsum(0) - tpc = tp[i].cumsum(0) - - # Recall - recall = tpc / (n_l + 1e-16) # recall curve - r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) # r at pr_score, negative x, xp because xp decreases - - # Precision - precision = tpc / (tpc + fpc) # precision curve - p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score - - # AP from recall-precision curve - for j in range(tp.shape[1]): - ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) - if j == 0: - py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 - - # Compute F1 score (harmonic mean of precision and recall) - f1 = 2 * p * r / (p + r + 1e-16) - - if plot: - py = np.stack(py, axis=1) - fig, ax = plt.subplots(1, 1, figsize=(5, 5)) - ax.plot(px, py, linewidth=0.5, color='grey') # plot(recall, precision) - ax.plot(px, py.mean(1), linewidth=2, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) - ax.set_xlabel('Recall') - ax.set_ylabel('Precision') - ax.set_xlim(0, 1) - ax.set_ylim(0, 1) - plt.legend() - fig.tight_layout() - fig.savefig(fname, dpi=200) - - return p, r, ap, f1, unique_classes.astype('int32') - - -def compute_ap(recall, precision): - """ Compute the average precision, given the recall and precision curves. - Source: https://github.com/rbgirshick/py-faster-rcnn. - # Arguments - recall: The recall curve (list). - precision: The precision curve (list). - # Returns - The average precision as computed in py-faster-rcnn. - """ - - # Append sentinel values to beginning and end - mrec = np.concatenate(([0.0], recall, [1.0])) - mpre = np.concatenate(([1.0], precision, [0.0])) - - # Compute the precision envelope - mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) - - # Integrate area under curve - method = 'interp' # methods: 'continuous', 'interp' - if method == 'interp': - x = np.linspace(0, 1, 101) # 101-point interp (COCO) - ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate - else: # 'continuous' - i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes - ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve - - return ap, mpre, mrec +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Model validation metrics + +import matplotlib.pyplot as plt +import numpy as np + + +def fitness(x): + # Model fitness as a weighted combination of metrics + w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) + + +def fitness_p(x): + # Model fitness as a weighted combination of metrics + w = [1.0, 0.0, 0.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) + + +def fitness_r(x): + # Model fitness as a weighted combination of metrics + w = [0.0, 1.0, 0.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) + + +def fitness_ap50(x): + # Model fitness as a weighted combination of metrics + w = [0.0, 0.0, 1.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) + + +def fitness_ap(x): + # Model fitness as a weighted combination of metrics + w = [0.0, 0.0, 0.0, 1.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) + + +def fitness_f(x): + # Model fitness as a weighted combination of metrics + #w = [0.0, 0.0, 0.0, 1.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return ((x[:, 0]*x[:, 1])/(x[:, 0]+x[:, 1])) + + +def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, fname='precision-recall_curve.png'): + """ Compute the average precision, given the recall and precision curves. + Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. + # Arguments + tp: True positives (nparray, nx1 or nx10). + conf: Objectness value from 0-1 (nparray). + pred_cls: Predicted object classes (nparray). + target_cls: True object classes (nparray). + plot: Plot precision-recall curve at mAP@0.5 + fname: Plot filename + # Returns + The average precision as computed in py-faster-rcnn. + """ + + # Sort by objectness + i = np.argsort(-conf) + tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] + + # Find unique classes + unique_classes = np.unique(target_cls) + + # Create Precision-Recall curve and compute AP for each class + px, py = np.linspace(0, 1, 1000), [] # for plotting + pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898 + s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95) + ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s) + for ci, c in enumerate(unique_classes): + i = pred_cls == c + n_l = (target_cls == c).sum() # number of labels + n_p = i.sum() # number of predictions + + if n_p == 0 or n_l == 0: + continue + else: + # Accumulate FPs and TPs + fpc = (1 - tp[i]).cumsum(0) + tpc = tp[i].cumsum(0) + + # Recall + recall = tpc / (n_l + 1e-16) # recall curve + r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) # r at pr_score, negative x, xp because xp decreases + + # Precision + precision = tpc / (tpc + fpc) # precision curve + p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score + + # AP from recall-precision curve + for j in range(tp.shape[1]): + ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) + if j == 0: + py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 + + # Compute F1 score (harmonic mean of precision and recall) + f1 = 2 * p * r / (p + r + 1e-16) + + if plot: + py = np.stack(py, axis=1) + fig, ax = plt.subplots(1, 1, figsize=(5, 5)) + ax.plot(px, py, linewidth=0.5, color='grey') # plot(recall, precision) + ax.plot(px, py.mean(1), linewidth=2, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) + ax.set_xlabel('Recall') + ax.set_ylabel('Precision') + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + plt.legend() + fig.tight_layout() + fig.savefig(fname, dpi=200) + + return p, r, ap, f1, unique_classes.astype('int32') + + +def compute_ap(recall, precision): + """ Compute the average precision, given the recall and precision curves. + Source: https://github.com/rbgirshick/py-faster-rcnn. + # Arguments + recall: The recall curve (list). + precision: The precision curve (list). + # Returns + The average precision as computed in py-faster-rcnn. + """ + + # Append sentinel values to beginning and end + mrec = np.concatenate(([0.0], recall, [1.0])) + mpre = np.concatenate(([1.0], precision, [0.0])) + + # Compute the precision envelope + mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) + + # Integrate area under curve + method = 'interp' # methods: 'continuous', 'interp' + if method == 'interp': + x = np.linspace(0, 1, 101) # 101-point interp (COCO) + ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate + else: # 'continuous' + i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve + + return ap, mpre, mrec diff --git a/PyTorch/contrib/cv/detection/YOLOR/utils/parse_config.py b/PyTorch/contrib/cv/detection/YOLOR/utils/parse_config.py index 8315f86a19f752c906fad8791e408f582b5fb1af..f8bfcb5f3abb182c66aa4de042372ac3ab68fee1 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/utils/parse_config.py +++ b/PyTorch/contrib/cv/detection/YOLOR/utils/parse_config.py @@ -1,85 +1,85 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import numpy as np - - -def parse_model_cfg(path): - # Parse the yolo *.cfg file and return module definitions path may be 'cfg/yolov3.cfg', 'yolov3.cfg', or 'yolov3' - if not path.endswith('.cfg'): # add .cfg suffix if omitted - path += '.cfg' - if not os.path.exists(path) and os.path.exists('cfg' + os.sep + path): # add cfg/ prefix if omitted - path = 'cfg' + os.sep + path - - with open(path, 'r') as f: - lines = f.read().split('\n') - lines = [x for x in lines if x and not x.startswith('#')] - lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces - mdefs = [] # module definitions - for line in lines: - if line.startswith('['): # This marks the start of a new block - mdefs.append({}) - mdefs[-1]['type'] = line[1:-1].rstrip() - if mdefs[-1]['type'] == 'convolutional': - mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later) - - else: - key, val = line.split("=") - key = key.rstrip() - - if key == 'anchors': # return nparray - mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors - elif (key in ['from', 'layers', 'mask']) or (key == 'size' and ',' in val): # return array - mdefs[-1][key] = [int(x) for x in val.split(',')] - else: - val = val.strip() - if val.isnumeric(): # return int or float - mdefs[-1][key] = int(val) if (int(val) - float(val)) == 0 else float(val) - else: - mdefs[-1][key] = val # return string - - # Check all fields are supported - supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups', - 'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random', - 'stride_x', 'stride_y', 'weights_type', 'weights_normalization', 'scale_x_y', 'beta_nms', 'nms_kind', - 'iou_loss', 'iou_normalizer', 'cls_normalizer', 'iou_thresh', 'atoms', 'na', 'nc'] - - f = [] # fields - for x in mdefs[1:]: - [f.append(k) for k in x if k not in f] - u = [x for x in f if x not in supported] # unsupported fields - assert not any(u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path) - - return mdefs - - -def parse_data_cfg(path): - # Parses the data configuration file - if not os.path.exists(path) and os.path.exists('data' + os.sep + path): # add data/ prefix if omitted - path = 'data' + os.sep + path - - with open(path, 'r') as f: - lines = f.readlines() - - options = dict() - for line in lines: - line = line.strip() - if line == '' or line.startswith('#'): - continue - key, val = line.split('=') - options[key.strip()] = val.strip() - - return options +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import numpy as np + + +def parse_model_cfg(path): + # Parse the yolo *.cfg file and return module definitions path may be 'cfg/yolov3.cfg', 'yolov3.cfg', or 'yolov3' + if not path.endswith('.cfg'): # add .cfg suffix if omitted + path += '.cfg' + if not os.path.exists(path) and os.path.exists('cfg' + os.sep + path): # add cfg/ prefix if omitted + path = 'cfg' + os.sep + path + + with open(path, 'r') as f: + lines = f.read().split('\n') + lines = [x for x in lines if x and not x.startswith('#')] + lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces + mdefs = [] # module definitions + for line in lines: + if line.startswith('['): # This marks the start of a new block + mdefs.append({}) + mdefs[-1]['type'] = line[1:-1].rstrip() + if mdefs[-1]['type'] == 'convolutional': + mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later) + + else: + key, val = line.split("=") + key = key.rstrip() + + if key == 'anchors': # return nparray + mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors + elif (key in ['from', 'layers', 'mask']) or (key == 'size' and ',' in val): # return array + mdefs[-1][key] = [int(x) for x in val.split(',')] + else: + val = val.strip() + if val.isnumeric(): # return int or float + mdefs[-1][key] = int(val) if (int(val) - float(val)) == 0 else float(val) + else: + mdefs[-1][key] = val # return string + + # Check all fields are supported + supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups', + 'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random', + 'stride_x', 'stride_y', 'weights_type', 'weights_normalization', 'scale_x_y', 'beta_nms', 'nms_kind', + 'iou_loss', 'iou_normalizer', 'cls_normalizer', 'iou_thresh', 'atoms', 'na', 'nc'] + + f = [] # fields + for x in mdefs[1:]: + [f.append(k) for k in x if k not in f] + u = [x for x in f if x not in supported] # unsupported fields + assert not any(u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path) + + return mdefs + + +def parse_data_cfg(path): + # Parses the data configuration file + if not os.path.exists(path) and os.path.exists('data' + os.sep + path): # add data/ prefix if omitted + path = 'data' + os.sep + path + + with open(path, 'r') as f: + lines = f.readlines() + + options = dict() + for line in lines: + line = line.strip() + if line == '' or line.startswith('#'): + continue + key, val = line.split('=') + options[key.strip()] = val.strip() + + return options diff --git a/PyTorch/contrib/cv/detection/YOLOR/utils/plots.py b/PyTorch/contrib/cv/detection/YOLOR/utils/plots.py index af98b973bbd214fc48d62277d608467ba20afc71..dff7b2c63f9c9063e12c56cb895072e007747167 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/utils/plots.py +++ b/PyTorch/contrib/cv/detection/YOLOR/utils/plots.py @@ -1,394 +1,394 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Plotting utils - -import glob -import math -import os -import random -from copy import copy -from pathlib import Path - -import cv2 -import matplotlib -import matplotlib.pyplot as plt -import numpy as np -import torch -import yaml -from PIL import Image -from scipy.signal import butter, filtfilt - -from utils.general import xywh2xyxy, xyxy2xywh -from utils.metrics import fitness - -# Settings -matplotlib.use('Agg') # for writing to files only - - -def color_list(): - # Return first 10 plt colors as (r,g,b) https://stackoverflow.com/questions/51350872/python-from-color-name-to-rgb - def hex2rgb(h): - return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) - - return [hex2rgb(h) for h in plt.rcParams['axes.prop_cycle'].by_key()['color']] - - -def hist2d(x, y, n=100): - # 2d histogram used in labels.png and evolve.png - xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n) - hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges)) - xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1) - yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1) - return np.log(hist[xidx, yidx]) - - -def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5): - # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy - def butter_lowpass(cutoff, fs, order): - nyq = 0.5 * fs - normal_cutoff = cutoff / nyq - return butter(order, normal_cutoff, btype='low', analog=False) - - b, a = butter_lowpass(cutoff, fs, order=order) - return filtfilt(b, a, data) # forward-backward filter - - -def plot_one_box(x, img, color=None, label=None, line_thickness=None): - # Plots one bounding box on image img - tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness - color = color or [random.randint(0, 255) for _ in range(3)] - c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) - cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) - if label: - tf = max(tl - 1, 1) # font thickness - t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] - c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 - cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled - cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) - - -def plot_wh_methods(): # from utils.general import *; plot_wh_methods() - # Compares the two methods for width-height anchor multiplication - # https://github.com/ultralytics/yolov3/issues/168 - x = np.arange(-4.0, 4.0, .1) - ya = np.exp(x) - yb = torch.sigmoid(torch.from_numpy(x)).numpy() * 2 - - fig = plt.figure(figsize=(6, 3), dpi=150) - plt.plot(x, ya, '.-', label='YOLO') - plt.plot(x, yb ** 2, '.-', label='YOLO ^2') - plt.plot(x, yb ** 1.6, '.-', label='YOLO ^1.6') - plt.xlim(left=-4, right=4) - plt.ylim(bottom=0, top=6) - plt.xlabel('input') - plt.ylabel('output') - plt.grid() - plt.legend() - fig.tight_layout() - fig.savefig('comparison.png', dpi=200) - - -def output_to_target(output, width, height): - # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] - if isinstance(output, torch.Tensor): - output = output.cpu().numpy() - - targets = [] - for i, o in enumerate(output): - if o is not None: - for pred in o: - box = pred[:4] - w = (box[2] - box[0]) / width - h = (box[3] - box[1]) / height - x = box[0] / width + w / 2 - y = box[1] / height + h / 2 - conf = pred[4] - cls = int(pred[5]) - - targets.append([item.cpu() if isinstance(item, torch.Tensor) else item for item in [i, cls, x, y, w, h, conf]]) - - return np.array(targets) - - -def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16): - # Plot image grid with labels - - if isinstance(images, torch.Tensor): - images = images.cpu().float().numpy() - if isinstance(targets, torch.Tensor): - targets = targets.cpu().numpy() - - # un-normalise - if np.max(images[0]) <= 1: - images *= 255 - - tl = 3 # line thickness - tf = max(tl - 1, 1) # font thickness - bs, _, h, w = images.shape # batch size, _, height, width - bs = min(bs, max_subplots) # limit plot images - ns = np.ceil(bs ** 0.5) # number of subplots (square) - - # Check if we should resize - scale_factor = max_size / max(h, w) - if scale_factor < 1: - h = math.ceil(scale_factor * h) - w = math.ceil(scale_factor * w) - - colors = color_list() # list of colors - mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init - for i, img in enumerate(images): - if i == max_subplots: # if last batch has fewer images than we expect - break - - block_x = int(w * (i // ns)) - block_y = int(h * (i % ns)) - - img = img.transpose(1, 2, 0) - if scale_factor < 1: - img = cv2.resize(img, (w, h)) - - mosaic[block_y:block_y + h, block_x:block_x + w, :] = img - if len(targets) > 0: - image_targets = targets[targets[:, 0] == i] - boxes = xywh2xyxy(image_targets[:, 2:6]).T - classes = image_targets[:, 1].astype('int') - labels = image_targets.shape[1] == 6 # labels if no conf column - conf = None if labels else image_targets[:, 6] # check for confidence presence (label vs pred) - - boxes[[0, 2]] *= w - boxes[[0, 2]] += block_x - boxes[[1, 3]] *= h - boxes[[1, 3]] += block_y - for j, box in enumerate(boxes.T): - cls = int(classes[j]) - color = colors[cls % len(colors)] - cls = names[cls] if names else cls - if labels or conf[j] > 0.25: # 0.25 conf thresh - label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j]) - plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) - - # Draw image filename labels - if paths: - label = Path(paths[i]).name[:40] # trim to 40 char - t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] - cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf, - lineType=cv2.LINE_AA) - - # Image border - cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3) - - if fname: - r = min(1280. / max(h, w) / ns, 1.0) # ratio to limit image size - mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA) - # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save - Image.fromarray(mosaic).save(fname) # PIL save - return mosaic - - -def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''): - # Plot LR simulating training for full epochs - optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals - y = [] - for _ in range(epochs): - scheduler.step() - y.append(optimizer.param_groups[0]['lr']) - plt.plot(y, '.-', label='LR') - plt.xlabel('epoch') - plt.ylabel('LR') - plt.grid() - plt.xlim(0, epochs) - plt.ylim(0) - plt.tight_layout() - plt.savefig(Path(save_dir) / 'LR.png', dpi=200) - - -def plot_test_txt(): # from utils.general import *; plot_test() - # Plot test.txt histograms - x = np.loadtxt('test.txt', dtype=np.float32) - box = xyxy2xywh(x[:, :4]) - cx, cy = box[:, 0], box[:, 1] - - fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True) - ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0) - ax.set_aspect('equal') - plt.savefig('hist2d.png', dpi=300) - - fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True) - ax[0].hist(cx, bins=600) - ax[1].hist(cy, bins=600) - plt.savefig('hist1d.png', dpi=200) - - -def plot_targets_txt(): # from utils.general import *; plot_targets_txt() - # Plot targets.txt histograms - x = np.loadtxt('targets.txt', dtype=np.float32).T - s = ['x targets', 'y targets', 'width targets', 'height targets'] - fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) - ax = ax.ravel() - for i in range(4): - ax[i].hist(x[i], bins=100, label='%.3g +/- %.3g' % (x[i].mean(), x[i].std())) - ax[i].legend() - ax[i].set_title(s[i]) - plt.savefig('targets.jpg', dpi=200) - - -def plot_study_txt(f='study.txt', x=None): # from utils.general import *; plot_study_txt() - # Plot study.txt generated by test.py - fig, ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True) - ax = ax.ravel() - - fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True) - for f in ['study/study_coco_yolo%s.txt' % x for x in ['s', 'm', 'l', 'x']]: - y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T - x = np.arange(y.shape[1]) if x is None else np.array(x) - s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_inference (ms/img)', 't_NMS (ms/img)', 't_total (ms/img)'] - for i in range(7): - ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8) - ax[i].set_title(s[i]) - - j = y[3].argmax() + 1 - ax2.plot(y[6, :j], y[3, :j] * 1E2, '.-', linewidth=2, markersize=8, - label=Path(f).stem.replace('study_coco_', '').replace('yolo', 'YOLO')) - - ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5], - 'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet') - - ax2.grid() - ax2.set_xlim(0, 30) - ax2.set_ylim(28, 50) - ax2.set_yticks(np.arange(30, 55, 5)) - ax2.set_xlabel('GPU Speed (ms/img)') - ax2.set_ylabel('COCO AP val') - ax2.legend(loc='lower right') - plt.savefig('study_mAP_latency.png', dpi=300) - plt.savefig(f.replace('.txt', '.png'), dpi=300) - - -def plot_labels(labels, save_dir=''): - # plot dataset labels - c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes - nc = int(c.max() + 1) # number of classes - - fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) - ax = ax.ravel() - ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8) - ax[0].set_xlabel('classes') - ax[1].scatter(b[0], b[1], c=hist2d(b[0], b[1], 90), cmap='jet') - ax[1].set_xlabel('x') - ax[1].set_ylabel('y') - ax[2].scatter(b[2], b[3], c=hist2d(b[2], b[3], 90), cmap='jet') - ax[2].set_xlabel('width') - ax[2].set_ylabel('height') - plt.savefig(Path(save_dir) / 'labels.png', dpi=200) - plt.close() - - # seaborn correlogram - try: - import seaborn as sns - import pandas as pd - x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height']) - sns.pairplot(x, corner=True, diag_kind='hist', kind='scatter', markers='o', - plot_kws=dict(s=3, edgecolor=None, linewidth=1, alpha=0.02), - diag_kws=dict(bins=50)) - plt.savefig(Path(save_dir) / 'labels_correlogram.png', dpi=200) - plt.close() - except Exception as e: - pass - - -def plot_evolution(yaml_file='data/hyp.finetune.yaml'): # from utils.general import *; plot_evolution() - # Plot hyperparameter evolution results in evolve.txt - with open(yaml_file) as f: - hyp = yaml.load(f, Loader=yaml.FullLoader) - x = np.loadtxt('evolve.txt', ndmin=2) - f = fitness(x) - # weights = (f - f.min()) ** 2 # for weighted results - plt.figure(figsize=(10, 12), tight_layout=True) - matplotlib.rc('font', **{'size': 8}) - for i, (k, v) in enumerate(hyp.items()): - y = x[:, i + 7] - # mu = (y * weights).sum() / weights.sum() # best weighted result - mu = y[f.argmax()] # best single result - plt.subplot(6, 5, i + 1) - plt.scatter(y, f, c=hist2d(y, f, 20), cmap='viridis', alpha=.8, edgecolors='none') - plt.plot(mu, f.max(), 'k+', markersize=15) - plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9}) # limit to 40 characters - if i % 5 != 0: - plt.yticks([]) - print('%15s: %.3g' % (k, mu)) - plt.savefig('evolve.png', dpi=200) - print('\nPlot saved as evolve.png') - - -def plot_results_overlay(start=0, stop=0): # from utils.general import *; plot_results_overlay() - # Plot training 'results*.txt', overlaying train and val losses - s = ['train', 'train', 'train', 'Precision', 'mAP@0.5', 'val', 'val', 'val', 'Recall', 'mAP@0.5:0.95'] # legends - t = ['Box', 'Objectness', 'Classification', 'P-R', 'mAP-F1'] # titles - for f in sorted(glob.glob('results*.txt') + glob.glob('../../Downloads/results*.txt')): - results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T - n = results.shape[1] # number of rows - x = range(start, min(stop, n) if stop else n) - fig, ax = plt.subplots(1, 5, figsize=(14, 3.5), tight_layout=True) - ax = ax.ravel() - for i in range(5): - for j in [i, i + 5]: - y = results[j, x] - ax[i].plot(x, y, marker='.', label=s[j]) - # y_smooth = butter_lowpass_filtfilt(y) - # ax[i].plot(x, np.gradient(y_smooth), marker='.', label=s[j]) - - ax[i].set_title(t[i]) - ax[i].legend() - ax[i].set_ylabel(f) if i == 0 else None # add filename - fig.savefig(f.replace('.txt', '.png'), dpi=200) - - -def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''): - # from utils.general import *; plot_results(save_dir='runs/train/exp0') - # Plot training 'results*.txt' - fig, ax = plt.subplots(2, 5, figsize=(12, 6)) - ax = ax.ravel() - s = ['Box', 'Objectness', 'Classification', 'Precision', 'Recall', - 'val Box', 'val Objectness', 'val Classification', 'mAP@0.5', 'mAP@0.5:0.95'] - if bucket: - # os.system('rm -rf storage.googleapis.com') - # files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id] - files = ['%g.txt' % x for x in id] - c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple('gs://%s/%g.txt' % (bucket, x) for x in id) - os.system(c) - else: - files = glob.glob(str(Path(save_dir) / '*.txt')) + glob.glob('../../Downloads/results*.txt') - assert len(files), 'No results.txt files found in %s, nothing to plot.' % os.path.abspath(save_dir) - for fi, f in enumerate(files): - try: - results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T - n = results.shape[1] # number of rows - x = range(start, min(stop, n) if stop else n) - for i in range(10): - y = results[i, x] - if i in [0, 1, 2, 5, 6, 7]: - y[y == 0] = np.nan # don't show zero loss values - # y /= y[0] # normalize - label = labels[fi] if len(labels) else Path(f).stem - ax[i].plot(x, y, marker='.', label=label, linewidth=1, markersize=6) - ax[i].set_title(s[i]) - # if i in [5, 6, 7]: # share train and val loss y axes - # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) - except Exception as e: - print('Warning: Plotting error for %s; %s' % (f, e)) - - fig.tight_layout() - ax[1].legend() - fig.savefig(Path(save_dir) / 'results.png', dpi=200) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Plotting utils + +import glob +import math +import os +import random +from copy import copy +from pathlib import Path + +import cv2 +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import torch +import yaml +from PIL import Image +from scipy.signal import butter, filtfilt + +from utils.general import xywh2xyxy, xyxy2xywh +from utils.metrics import fitness + +# Settings +matplotlib.use('Agg') # for writing to files only + + +def color_list(): + # Return first 10 plt colors as (r,g,b) https://stackoverflow.com/questions/51350872/python-from-color-name-to-rgb + def hex2rgb(h): + return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) + + return [hex2rgb(h) for h in plt.rcParams['axes.prop_cycle'].by_key()['color']] + + +def hist2d(x, y, n=100): + # 2d histogram used in labels.png and evolve.png + xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n) + hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges)) + xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1) + yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1) + return np.log(hist[xidx, yidx]) + + +def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5): + # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy + def butter_lowpass(cutoff, fs, order): + nyq = 0.5 * fs + normal_cutoff = cutoff / nyq + return butter(order, normal_cutoff, btype='low', analog=False) + + b, a = butter_lowpass(cutoff, fs, order=order) + return filtfilt(b, a, data) # forward-backward filter + + +def plot_one_box(x, img, color=None, label=None, line_thickness=None): + # Plots one bounding box on image img + tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) + if label: + tf = max(tl - 1, 1) # font thickness + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled + cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) + + +def plot_wh_methods(): # from utils.general import *; plot_wh_methods() + # Compares the two methods for width-height anchor multiplication + # https://github.com/ultralytics/yolov3/issues/168 + x = np.arange(-4.0, 4.0, .1) + ya = np.exp(x) + yb = torch.sigmoid(torch.from_numpy(x)).numpy() * 2 + + fig = plt.figure(figsize=(6, 3), dpi=150) + plt.plot(x, ya, '.-', label='YOLO') + plt.plot(x, yb ** 2, '.-', label='YOLO ^2') + plt.plot(x, yb ** 1.6, '.-', label='YOLO ^1.6') + plt.xlim(left=-4, right=4) + plt.ylim(bottom=0, top=6) + plt.xlabel('input') + plt.ylabel('output') + plt.grid() + plt.legend() + fig.tight_layout() + fig.savefig('comparison.png', dpi=200) + + +def output_to_target(output, width, height): + # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] + if isinstance(output, torch.Tensor): + output = output.cpu().numpy() + + targets = [] + for i, o in enumerate(output): + if o is not None: + for pred in o: + box = pred[:4] + w = (box[2] - box[0]) / width + h = (box[3] - box[1]) / height + x = box[0] / width + w / 2 + y = box[1] / height + h / 2 + conf = pred[4] + cls = int(pred[5]) + + targets.append([item.cpu() if isinstance(item, torch.Tensor) else item for item in [i, cls, x, y, w, h, conf]]) + + return np.array(targets) + + +def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16): + # Plot image grid with labels + + if isinstance(images, torch.Tensor): + images = images.cpu().float().numpy() + if isinstance(targets, torch.Tensor): + targets = targets.cpu().numpy() + + # un-normalise + if np.max(images[0]) <= 1: + images *= 255 + + tl = 3 # line thickness + tf = max(tl - 1, 1) # font thickness + bs, _, h, w = images.shape # batch size, _, height, width + bs = min(bs, max_subplots) # limit plot images + ns = np.ceil(bs ** 0.5) # number of subplots (square) + + # Check if we should resize + scale_factor = max_size / max(h, w) + if scale_factor < 1: + h = math.ceil(scale_factor * h) + w = math.ceil(scale_factor * w) + + colors = color_list() # list of colors + mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init + for i, img in enumerate(images): + if i == max_subplots: # if last batch has fewer images than we expect + break + + block_x = int(w * (i // ns)) + block_y = int(h * (i % ns)) + + img = img.transpose(1, 2, 0) + if scale_factor < 1: + img = cv2.resize(img, (w, h)) + + mosaic[block_y:block_y + h, block_x:block_x + w, :] = img + if len(targets) > 0: + image_targets = targets[targets[:, 0] == i] + boxes = xywh2xyxy(image_targets[:, 2:6]).T + classes = image_targets[:, 1].astype('int') + labels = image_targets.shape[1] == 6 # labels if no conf column + conf = None if labels else image_targets[:, 6] # check for confidence presence (label vs pred) + + boxes[[0, 2]] *= w + boxes[[0, 2]] += block_x + boxes[[1, 3]] *= h + boxes[[1, 3]] += block_y + for j, box in enumerate(boxes.T): + cls = int(classes[j]) + color = colors[cls % len(colors)] + cls = names[cls] if names else cls + if labels or conf[j] > 0.25: # 0.25 conf thresh + label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j]) + plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) + + # Draw image filename labels + if paths: + label = Path(paths[i]).name[:40] # trim to 40 char + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf, + lineType=cv2.LINE_AA) + + # Image border + cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3) + + if fname: + r = min(1280. / max(h, w) / ns, 1.0) # ratio to limit image size + mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA) + # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save + Image.fromarray(mosaic).save(fname) # PIL save + return mosaic + + +def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''): + # Plot LR simulating training for full epochs + optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals + y = [] + for _ in range(epochs): + scheduler.step() + y.append(optimizer.param_groups[0]['lr']) + plt.plot(y, '.-', label='LR') + plt.xlabel('epoch') + plt.ylabel('LR') + plt.grid() + plt.xlim(0, epochs) + plt.ylim(0) + plt.tight_layout() + plt.savefig(Path(save_dir) / 'LR.png', dpi=200) + + +def plot_test_txt(): # from utils.general import *; plot_test() + # Plot test.txt histograms + x = np.loadtxt('test.txt', dtype=np.float32) + box = xyxy2xywh(x[:, :4]) + cx, cy = box[:, 0], box[:, 1] + + fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True) + ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0) + ax.set_aspect('equal') + plt.savefig('hist2d.png', dpi=300) + + fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True) + ax[0].hist(cx, bins=600) + ax[1].hist(cy, bins=600) + plt.savefig('hist1d.png', dpi=200) + + +def plot_targets_txt(): # from utils.general import *; plot_targets_txt() + # Plot targets.txt histograms + x = np.loadtxt('targets.txt', dtype=np.float32).T + s = ['x targets', 'y targets', 'width targets', 'height targets'] + fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) + ax = ax.ravel() + for i in range(4): + ax[i].hist(x[i], bins=100, label='%.3g +/- %.3g' % (x[i].mean(), x[i].std())) + ax[i].legend() + ax[i].set_title(s[i]) + plt.savefig('targets.jpg', dpi=200) + + +def plot_study_txt(f='study.txt', x=None): # from utils.general import *; plot_study_txt() + # Plot study.txt generated by test.py + fig, ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True) + ax = ax.ravel() + + fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True) + for f in ['study/study_coco_yolo%s.txt' % x for x in ['s', 'm', 'l', 'x']]: + y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T + x = np.arange(y.shape[1]) if x is None else np.array(x) + s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_inference (ms/img)', 't_NMS (ms/img)', 't_total (ms/img)'] + for i in range(7): + ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8) + ax[i].set_title(s[i]) + + j = y[3].argmax() + 1 + ax2.plot(y[6, :j], y[3, :j] * 1E2, '.-', linewidth=2, markersize=8, + label=Path(f).stem.replace('study_coco_', '').replace('yolo', 'YOLO')) + + ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5], + 'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet') + + ax2.grid() + ax2.set_xlim(0, 30) + ax2.set_ylim(28, 50) + ax2.set_yticks(np.arange(30, 55, 5)) + ax2.set_xlabel('GPU Speed (ms/img)') + ax2.set_ylabel('COCO AP val') + ax2.legend(loc='lower right') + plt.savefig('study_mAP_latency.png', dpi=300) + plt.savefig(f.replace('.txt', '.png'), dpi=300) + + +def plot_labels(labels, save_dir=''): + # plot dataset labels + c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes + nc = int(c.max() + 1) # number of classes + + fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) + ax = ax.ravel() + ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8) + ax[0].set_xlabel('classes') + ax[1].scatter(b[0], b[1], c=hist2d(b[0], b[1], 90), cmap='jet') + ax[1].set_xlabel('x') + ax[1].set_ylabel('y') + ax[2].scatter(b[2], b[3], c=hist2d(b[2], b[3], 90), cmap='jet') + ax[2].set_xlabel('width') + ax[2].set_ylabel('height') + plt.savefig(Path(save_dir) / 'labels.png', dpi=200) + plt.close() + + # seaborn correlogram + try: + import seaborn as sns + import pandas as pd + x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height']) + sns.pairplot(x, corner=True, diag_kind='hist', kind='scatter', markers='o', + plot_kws=dict(s=3, edgecolor=None, linewidth=1, alpha=0.02), + diag_kws=dict(bins=50)) + plt.savefig(Path(save_dir) / 'labels_correlogram.png', dpi=200) + plt.close() + except Exception as e: + pass + + +def plot_evolution(yaml_file='data/hyp.finetune.yaml'): # from utils.general import *; plot_evolution() + # Plot hyperparameter evolution results in evolve.txt + with open(yaml_file) as f: + hyp = yaml.load(f, Loader=yaml.FullLoader) + x = np.loadtxt('evolve.txt', ndmin=2) + f = fitness(x) + # weights = (f - f.min()) ** 2 # for weighted results + plt.figure(figsize=(10, 12), tight_layout=True) + matplotlib.rc('font', **{'size': 8}) + for i, (k, v) in enumerate(hyp.items()): + y = x[:, i + 7] + # mu = (y * weights).sum() / weights.sum() # best weighted result + mu = y[f.argmax()] # best single result + plt.subplot(6, 5, i + 1) + plt.scatter(y, f, c=hist2d(y, f, 20), cmap='viridis', alpha=.8, edgecolors='none') + plt.plot(mu, f.max(), 'k+', markersize=15) + plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9}) # limit to 40 characters + if i % 5 != 0: + plt.yticks([]) + print('%15s: %.3g' % (k, mu)) + plt.savefig('evolve.png', dpi=200) + print('\nPlot saved as evolve.png') + + +def plot_results_overlay(start=0, stop=0): # from utils.general import *; plot_results_overlay() + # Plot training 'results*.txt', overlaying train and val losses + s = ['train', 'train', 'train', 'Precision', 'mAP@0.5', 'val', 'val', 'val', 'Recall', 'mAP@0.5:0.95'] # legends + t = ['Box', 'Objectness', 'Classification', 'P-R', 'mAP-F1'] # titles + for f in sorted(glob.glob('results*.txt') + glob.glob('../../Downloads/results*.txt')): + results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T + n = results.shape[1] # number of rows + x = range(start, min(stop, n) if stop else n) + fig, ax = plt.subplots(1, 5, figsize=(14, 3.5), tight_layout=True) + ax = ax.ravel() + for i in range(5): + for j in [i, i + 5]: + y = results[j, x] + ax[i].plot(x, y, marker='.', label=s[j]) + # y_smooth = butter_lowpass_filtfilt(y) + # ax[i].plot(x, np.gradient(y_smooth), marker='.', label=s[j]) + + ax[i].set_title(t[i]) + ax[i].legend() + ax[i].set_ylabel(f) if i == 0 else None # add filename + fig.savefig(f.replace('.txt', '.png'), dpi=200) + + +def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''): + # from utils.general import *; plot_results(save_dir='runs/train/exp0') + # Plot training 'results*.txt' + fig, ax = plt.subplots(2, 5, figsize=(12, 6)) + ax = ax.ravel() + s = ['Box', 'Objectness', 'Classification', 'Precision', 'Recall', + 'val Box', 'val Objectness', 'val Classification', 'mAP@0.5', 'mAP@0.5:0.95'] + if bucket: + # os.system('rm -rf storage.googleapis.com') + # files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id] + files = ['%g.txt' % x for x in id] + c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple('gs://%s/%g.txt' % (bucket, x) for x in id) + os.system(c) + else: + files = glob.glob(str(Path(save_dir) / '*.txt')) + glob.glob('../../Downloads/results*.txt') + assert len(files), 'No results.txt files found in %s, nothing to plot.' % os.path.abspath(save_dir) + for fi, f in enumerate(files): + try: + results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T + n = results.shape[1] # number of rows + x = range(start, min(stop, n) if stop else n) + for i in range(10): + y = results[i, x] + if i in [0, 1, 2, 5, 6, 7]: + y[y == 0] = np.nan # don't show zero loss values + # y /= y[0] # normalize + label = labels[fi] if len(labels) else Path(f).stem + ax[i].plot(x, y, marker='.', label=label, linewidth=1, markersize=6) + ax[i].set_title(s[i]) + # if i in [5, 6, 7]: # share train and val loss y axes + # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) + except Exception as e: + print('Warning: Plotting error for %s; %s' % (f, e)) + + fig.tight_layout() + ax[1].legend() + fig.savefig(Path(save_dir) / 'results.png', dpi=200) diff --git a/PyTorch/contrib/cv/detection/YOLOR/utils/torch_utils.py b/PyTorch/contrib/cv/detection/YOLOR/utils/torch_utils.py index 229bb26bc200dd7f4bc57794be7d246529bf66f8..707a4f5496c660785d475383797736029c238665 100644 --- a/PyTorch/contrib/cv/detection/YOLOR/utils/torch_utils.py +++ b/PyTorch/contrib/cv/detection/YOLOR/utils/torch_utils.py @@ -1,313 +1,313 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# PyTorch utils - -import logging -import math -import os -import time -from contextlib import contextmanager -from copy import deepcopy - -import torch -import torch.backends.cudnn as cudnn -import torch.nn as nn -import torch.nn.functional as F -import torchvision - -logger = logging.getLogger(__name__) - - -@contextmanager -def torch_distributed_zero_first(local_rank: int): - """ - Decorator to make all processes in distributed training wait for each local_master to do something. - """ - if local_rank not in [-1, 0]: - torch.distributed.barrier() - yield - if local_rank == 0: - torch.distributed.barrier() - - -def init_torch_seeds(seed=0): - # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html - torch.manual_seed(seed) - if seed == 0: # slower, more reproducible - cudnn.deterministic = True - cudnn.benchmark = False - else: # faster, less reproducible - cudnn.deterministic = False - cudnn.benchmark = True - - -def select_device(device='', npu='', apex=False, batch_size=None): - npu_request = device.lower() == 'npu' - if npu_request and npu != -1: - torch.npu.set_device("npu:%d" % npu) - print('Using NPU %d to train' % npu) - return torch.device("npu:%d" % npu) - # device = 'cpu' or '0' or '0,1,2,3' - cpu_request = device.lower() == 'cpu' - if device and not cpu_request: # if device requested other than 'cpu' - os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable - assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity - - cuda = False if cpu_request else torch.cuda.is_available() - if cuda: - c = 1024 ** 2 # bytes to MB - ng = torch.cuda.device_count() - if ng > 1 and batch_size: # check that batch_size is compatible with device_count - assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng) - x = [torch.cuda.get_device_properties(i) for i in range(ng)] - s = 'Using CUDA ' + ('Apex ' if apex else '') # apex for mixed precision https://github.com/NVIDIA/apex - for i in range(0, ng): - if i == 1: - s = ' ' * len(s) - print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % - (s, i, x[i].name, x[i].total_memory / c)) - else: - print('Using CPU') - - print('') # skip a line - return torch.device('cuda:0' if cuda else 'cpu') - - -def time_synchronized(): - torch.cuda.synchronize() if torch.cuda.is_available() else None - return time.time() - - -def is_parallel(model): - return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) - - -def intersect_dicts(da, db, exclude=()): - # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values - return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} - - -def initialize_weights(model): - for m in model.modules(): - t = type(m) - if t is nn.Conv2d: - pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') - elif t is nn.BatchNorm2d: - m.eps = 1e-3 - m.momentum = 0.03 - elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: - m.inplace = True - - -def find_modules(model, mclass=nn.Conv2d): - # Finds layer indices matching module class 'mclass' - return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] - - -def sparsity(model): - # Return global model sparsity - a, b = 0., 0. - for p in model.parameters(): - a += p.numel() - b += (p == 0).sum() - return b / a - - -def prune(model, amount=0.3): - # Prune model to requested global sparsity - import torch.nn.utils.prune as prune - print('Pruning model... ', end='') - for name, m in model.named_modules(): - if isinstance(m, nn.Conv2d): - prune.l1_unstructured(m, name='weight', amount=amount) # prune - prune.remove(m, 'weight') # make permanent - print(' %.3g global sparsity' % sparsity(model)) - - -def fuse_conv_and_bn(conv, bn): - # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ - fusedconv = nn.Conv2d(conv.in_channels, - conv.out_channels, - kernel_size=conv.kernel_size, - stride=conv.stride, - padding=conv.padding, - groups=conv.groups, - bias=True).requires_grad_(False).to(conv.weight.device) - - # prepare filters - w_conv = conv.weight.clone().view(conv.out_channels, -1) - w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) - fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) - - # prepare spatial bias - b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias - b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) - fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) - - return fusedconv - - -def model_info(model, verbose=False, img_size=640): - # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320] - n_p = sum(x.numel() for x in model.parameters()) # number parameters - n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients - if verbose: - print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) - for i, (name, p) in enumerate(model.named_parameters()): - name = name.replace('module_list.', '') - print('%5g %40s %9s %12g %20s %10.3g %10.3g' % - (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) - - try: # FLOPS - from thop import profile - flops = profile(deepcopy(model), inputs=(torch.zeros(1, 3, img_size, img_size),), verbose=False)[0] / 1E9 * 2 - img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float - fs = ', %.9f GFLOPS' % (flops) # 640x640 FLOPS - except (ImportError, Exception): - fs = '' - - logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") - - -def load_classifier(name='resnet101', n=2): - # Loads a pretrained model reshaped to n-class output - model = torchvision.models.__dict__[name](pretrained=True) - - # ResNet model properties - # input_size = [3, 224, 224] - # input_space = 'RGB' - # input_range = [0, 1] - # mean = [0.485, 0.456, 0.406] - # std = [0.229, 0.224, 0.225] - - # Reshape output to n classes - filters = model.fc.weight.shape[1] - model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) - model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) - model.fc.out_features = n - return model - - -def scale_img(img, ratio=1.0, same_shape=False): # img(16,3,256,416), r=ratio - # scales img(bs,3,y,x) by ratio - if ratio == 1.0: - return img - else: - h, w = img.shape[2:] - s = (int(h * ratio), int(w * ratio)) # new size - img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize - if not same_shape: # pad/crop img - gs = 64 # (pixels) grid size - h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] - return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean - - -def copy_attr(a, b, include=(), exclude=()): - # Copy attributes from b to a, options to only include [...] and to exclude [...] - for k, v in b.__dict__.items(): - if (len(include) and k not in include) or k.startswith('_') or k in exclude: - continue - else: - setattr(a, k, v) - - -class ModelEMA: - """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models - Keep a moving average of everything in the model state_dict (parameters and buffers). - This is intended to allow functionality like - https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage - A smoothed version of the weights is necessary for some training schemes to perform well. - This class is sensitive where it is initialized in the sequence of model init, - GPU assignment and distributed training wrappers. - """ - - def __init__(self, model, decay=0.9999, updates=0): - # Create EMA - self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA - # if next(model.parameters()).device.type != 'cpu': - # self.ema.half() # FP16 EMA - self.updates = updates # number of EMA updates - self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) - for p in self.ema.parameters(): - p.requires_grad_(False) - - self.is_fused = False - - - def update(self, model, x, model_params_fused=None): - # Update EMA parameters - with torch.no_grad(): - self.updates += 1 - d = self.decay(self.updates) - - if x.device.type == 'npu': - # if False: - from apex.contrib.combine_tensors import combine_npu - d_inv = 1. - d - d = torch.tensor([d], device=x.device) - - if not self.is_fused: - pg0, pg1, pg2 = [], [], [] # optimizer parameters groups - - # this process needs special attention, the order of params should be identical to model - for name, p in self.ema.named_parameters(): - if p.dtype.is_floating_point: - if '.bias' in name: - pg2.append(p) # biases - elif 'Conv2d.weight' in name: - pg1.append(p) # apply weight_decay - elif 'm.weight' in name: - pg1.append(p) # apply weight_decay - elif 'w.weight' in name: - pg1.append(p) # apply weight_decay - else: - pg0.append(p) # all else - ema_all_params = pg0 + pg1 + pg2 - self.ema_params_fused = combine_npu(ema_all_params) - - ema_all_buffers = [] - for name, b in self.ema.named_buffers(): - if b.dtype.is_floating_point: - ema_all_buffers.append(b) - else: - continue - self.ema_buffers_fused = combine_npu(ema_all_buffers) - - model_all_buffers = [] - for name, b in model.named_buffers(): - if b.dtype.is_floating_point: - model_all_buffers.append(b) - else: - continue - self.model_buffers_fused = combine_npu(model_all_buffers) - - self.is_fused = True - - self.ema_params_fused *= d - self.ema_params_fused.add_(model_params_fused, alpha=d_inv) - - self.ema_buffers_fused *= d - self.ema_buffers_fused.add_(self.model_buffers_fused, alpha=d_inv) - - else: - msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict - for k, v in self.ema.state_dict().items(): - if v.dtype.is_floating_point: - v *= d - v += (1. - d) * msd[k].detach() - - def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): - # Update EMA attributes - copy_attr(self.ema, model, include, exclude) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# PyTorch utils + +import logging +import math +import os +import time +from contextlib import contextmanager +from copy import deepcopy + +import torch +import torch.backends.cudnn as cudnn +import torch.nn as nn +import torch.nn.functional as F +import torchvision + +logger = logging.getLogger(__name__) + + +@contextmanager +def torch_distributed_zero_first(local_rank: int): + """ + Decorator to make all processes in distributed training wait for each local_master to do something. + """ + if local_rank not in [-1, 0]: + torch.distributed.barrier() + yield + if local_rank == 0: + torch.distributed.barrier() + + +def init_torch_seeds(seed=0): + # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html + torch.manual_seed(seed) + if seed == 0: # slower, more reproducible + cudnn.deterministic = True + cudnn.benchmark = False + else: # faster, less reproducible + cudnn.deterministic = False + cudnn.benchmark = True + + +def select_device(device='', npu='', apex=False, batch_size=None): + npu_request = device.lower() == 'npu' + if npu_request and npu != -1: + torch.npu.set_device("npu:%d" % npu) + print('Using NPU %d to train' % npu) + return torch.device("npu:%d" % npu) + # device = 'cpu' or '0' or '0,1,2,3' + cpu_request = device.lower() == 'cpu' + if device and not cpu_request: # if device requested other than 'cpu' + os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable + assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity + + cuda = False if cpu_request else torch.cuda.is_available() + if cuda: + c = 1024 ** 2 # bytes to MB + ng = torch.cuda.device_count() + if ng > 1 and batch_size: # check that batch_size is compatible with device_count + assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng) + x = [torch.cuda.get_device_properties(i) for i in range(ng)] + s = 'Using CUDA ' + ('Apex ' if apex else '') # apex for mixed precision https://github.com/NVIDIA/apex + for i in range(0, ng): + if i == 1: + s = ' ' * len(s) + print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % + (s, i, x[i].name, x[i].total_memory / c)) + else: + print('Using CPU') + + print('') # skip a line + return torch.device('cuda:0' if cuda else 'cpu') + + +def time_synchronized(): + torch.cuda.synchronize() if torch.cuda.is_available() else None + return time.time() + + +def is_parallel(model): + return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) + + +def intersect_dicts(da, db, exclude=()): + # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values + return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} + + +def initialize_weights(model): + for m in model.modules(): + t = type(m) + if t is nn.Conv2d: + pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif t is nn.BatchNorm2d: + m.eps = 1e-3 + m.momentum = 0.03 + elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: + m.inplace = True + + +def find_modules(model, mclass=nn.Conv2d): + # Finds layer indices matching module class 'mclass' + return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] + + +def sparsity(model): + # Return global model sparsity + a, b = 0., 0. + for p in model.parameters(): + a += p.numel() + b += (p == 0).sum() + return b / a + + +def prune(model, amount=0.3): + # Prune model to requested global sparsity + import torch.nn.utils.prune as prune + print('Pruning model... ', end='') + for name, m in model.named_modules(): + if isinstance(m, nn.Conv2d): + prune.l1_unstructured(m, name='weight', amount=amount) # prune + prune.remove(m, 'weight') # make permanent + print(' %.3g global sparsity' % sparsity(model)) + + +def fuse_conv_and_bn(conv, bn): + # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ + fusedconv = nn.Conv2d(conv.in_channels, + conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + groups=conv.groups, + bias=True).requires_grad_(False).to(conv.weight.device) + + # prepare filters + w_conv = conv.weight.clone().view(conv.out_channels, -1) + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) + + # prepare spatial bias + b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) + fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) + + return fusedconv + + +def model_info(model, verbose=False, img_size=640): + # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320] + n_p = sum(x.numel() for x in model.parameters()) # number parameters + n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients + if verbose: + print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) + for i, (name, p) in enumerate(model.named_parameters()): + name = name.replace('module_list.', '') + print('%5g %40s %9s %12g %20s %10.3g %10.3g' % + (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) + + try: # FLOPS + from thop import profile + flops = profile(deepcopy(model), inputs=(torch.zeros(1, 3, img_size, img_size),), verbose=False)[0] / 1E9 * 2 + img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float + fs = ', %.9f GFLOPS' % (flops) # 640x640 FLOPS + except (ImportError, Exception): + fs = '' + + logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") + + +def load_classifier(name='resnet101', n=2): + # Loads a pretrained model reshaped to n-class output + model = torchvision.models.__dict__[name](pretrained=True) + + # ResNet model properties + # input_size = [3, 224, 224] + # input_space = 'RGB' + # input_range = [0, 1] + # mean = [0.485, 0.456, 0.406] + # std = [0.229, 0.224, 0.225] + + # Reshape output to n classes + filters = model.fc.weight.shape[1] + model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) + model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) + model.fc.out_features = n + return model + + +def scale_img(img, ratio=1.0, same_shape=False): # img(16,3,256,416), r=ratio + # scales img(bs,3,y,x) by ratio + if ratio == 1.0: + return img + else: + h, w = img.shape[2:] + s = (int(h * ratio), int(w * ratio)) # new size + img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize + if not same_shape: # pad/crop img + gs = 64 # (pixels) grid size + h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] + return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean + + +def copy_attr(a, b, include=(), exclude=()): + # Copy attributes from b to a, options to only include [...] and to exclude [...] + for k, v in b.__dict__.items(): + if (len(include) and k not in include) or k.startswith('_') or k in exclude: + continue + else: + setattr(a, k, v) + + +class ModelEMA: + """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models + Keep a moving average of everything in the model state_dict (parameters and buffers). + This is intended to allow functionality like + https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage + A smoothed version of the weights is necessary for some training schemes to perform well. + This class is sensitive where it is initialized in the sequence of model init, + GPU assignment and distributed training wrappers. + """ + + def __init__(self, model, decay=0.9999, updates=0): + # Create EMA + self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA + # if next(model.parameters()).device.type != 'cpu': + # self.ema.half() # FP16 EMA + self.updates = updates # number of EMA updates + self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) + for p in self.ema.parameters(): + p.requires_grad_(False) + + self.is_fused = False + + + def update(self, model, x, model_params_fused=None): + # Update EMA parameters + with torch.no_grad(): + self.updates += 1 + d = self.decay(self.updates) + + if x.device.type == 'npu': + # if False: + from apex.contrib.combine_tensors import combine_npu + d_inv = 1. - d + d = torch.tensor([d], device=x.device) + + if not self.is_fused: + pg0, pg1, pg2 = [], [], [] # optimizer parameters groups + + # this process needs special attention, the order of params should be identical to model + for name, p in self.ema.named_parameters(): + if p.dtype.is_floating_point: + if '.bias' in name: + pg2.append(p) # biases + elif 'Conv2d.weight' in name: + pg1.append(p) # apply weight_decay + elif 'm.weight' in name: + pg1.append(p) # apply weight_decay + elif 'w.weight' in name: + pg1.append(p) # apply weight_decay + else: + pg0.append(p) # all else + ema_all_params = pg0 + pg1 + pg2 + self.ema_params_fused = combine_npu(ema_all_params) + + ema_all_buffers = [] + for name, b in self.ema.named_buffers(): + if b.dtype.is_floating_point: + ema_all_buffers.append(b) + else: + continue + self.ema_buffers_fused = combine_npu(ema_all_buffers) + + model_all_buffers = [] + for name, b in model.named_buffers(): + if b.dtype.is_floating_point: + model_all_buffers.append(b) + else: + continue + self.model_buffers_fused = combine_npu(model_all_buffers) + + self.is_fused = True + + self.ema_params_fused *= d + self.ema_params_fused.add_(model_params_fused, alpha=d_inv) + + self.ema_buffers_fused *= d + self.ema_buffers_fused.add_(self.model_buffers_fused, alpha=d_inv) + + else: + msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict + for k, v in self.ema.state_dict().items(): + if v.dtype.is_floating_point: + v *= d + v += (1. - d) * msd[k].detach() + + def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): + # Update EMA attributes + copy_attr(self.ema, model, include, exclude) diff --git a/PyTorch/contrib/cv/detection/centernet2/README.CN.md b/PyTorch/contrib/cv/detection/centernet2/README.CN.md index 0737bf97bf23e4285ff96e505a0e245e2ea2015f..45a8eb89e79d1a7bc4ceff35acf3d0333dd4fff1 100644 --- a/PyTorch/contrib/cv/detection/centernet2/README.CN.md +++ b/PyTorch/contrib/cv/detection/centernet2/README.CN.md @@ -1,120 +1,120 @@ -# CenterNet2 - -本项目实现了 CenterNet2 在 NPU 上的训练. -[CenterNet2 github链接](https://github.com/xingyizhou/CenterNet2) - -## 1.CenterNet2 Detail - -本项目对 CenterNet2 做了如下更改: -1. 迁移到 NPU 上 -2. 使用混合精度训练、测试 -3. 对于一些操作,固定动态 shape 、使用 NPU 算子优化性能、同时将一些操作转移到 CPU 上进行 - - -## 2.Requirements -### 2.1 安装NPU软件 - -* NPU配套的run包安装 -* PyTorch(NPU版本) -* apex(NPU版本) - -### 2.2 安装第三方软件 - -(1) 通过pip 安装部分第三方软件: - -``` -pip3 install -r requirements.txt -``` - -(2) 安装opencv - -``` -pip install opencv-python -``` - -(3) 编译安装torchvision - -``` -git clone https://github.com/pytorch/vision -cd vision -git checkout v0.6.0 -python3 setup.py install -``` - -**注:您必须编译安装torchvision,直接使用pip安装训练将无法启动** - -(4) 编译detectron2 - -进入模型脚本根目录,编译detectron2 - -``` -python3 setup.py build develop -``` - - -(5) 下载预训练模型 R-50.pkl ,projects/CenterNet2/configs/Base-CenterNet2.yaml配置文件中MODEL.WEIGHTS 设置为R-101.pkl的绝对路径 -## 3.Dataset - -(1) 下载coco2017数据集; - -(2) 解压数据集,解压后的目录结构如下 -``` -│ ├── coco -│ ├── annotations -│ ├── train2017 -│ ├── val2017 -``` -(3) 通过设置环境变量DETECTRON2_DATASETS=“coco 所在数据集路径”进行设置,如 export DETECTRON2_DATASETS=/home/,则 coco 数据集放在 /home/ 目录中 - -## 4.Training - -### 4.1 NPU 1P - -在模型根目录下,运行 train_full_1p.sh,同时传入参数--data_path,指定为coco数据集的路径父路径(例如数据集路径为/home/coco,则--data_path=/home) - -``` -bash ./test/train_full_1p.sh --data_path=/home -``` -模型训练结束后,会在result/CenterNet2/CenterNet2_R50_1x目录下保存模型文件model_final.pth,训练结束后若要评估精度需运行eval脚本,参考第6节 - -### 4.2 NPU 8P - -在模型根目录下,运行 train_full_8p.sh,同时传入参数--data_path,指定为coco数据集的路径父路径(例如数据集路径为/home/coco,则--data_path=/home) - -``` -bash ./test/train_full_8p.sh --data_path=/home -``` - -模型训练结束后,会在result/CenterNet2/CenterNet2_R50_1x目录下保存模型文件model_final.pth,训练结束后若要评估精度需运行eval脚本,参考第6节 - -## 5.Finetune - -请将projects/CenterNet2/configs/Base-CenterNet2.yaml中的字段WEIGHTS修改为第3节中model_final.pth的绝对路径,如修改为 -``` -WEIGHTS: "/home/CenterNet2/result/CenterNet2/CenterNet2_R50_1x/model_final.pth" -``` -然后启动评估脚本 - -``` -bash ./test/train_finetune_1p.sh --data_path=/home -``` - -## 6.评估 -训练结束后,需要手动启动评估程序。请将projects/CenterNet2/configs/Base-CenterNet2.yaml中的字段WEIGHTS修改为第3节中model_final.pth的绝对路径,如修改为 -``` -WEIGHTS: "/home/CenterNet2/result/CenterNet2/CenterNet2_R50_1x/model_final.pth" -``` - -然后启动评估脚本 - -``` -bash ./test/train_eval_1p.sh --data_path=/home -``` -## CenterNet2 Result - -| 名称 | 精度 | 性能 | -| ------ | ----- | -------- | -| GPU-1p | - | 12.3fps | -| NPU-1p | - | 2.86 fps | -| GPU-8p | 43.68 | 90.5fps | +# CenterNet2 + +本项目实现了 CenterNet2 在 NPU 上的训练. +[CenterNet2 github链接](https://github.com/xingyizhou/CenterNet2) + +## 1.CenterNet2 Detail + +本项目对 CenterNet2 做了如下更改: +1. 迁移到 NPU 上 +2. 使用混合精度训练、测试 +3. 对于一些操作,固定动态 shape 、使用 NPU 算子优化性能、同时将一些操作转移到 CPU 上进行 + + +## 2.Requirements +### 2.1 安装NPU软件 + +* NPU配套的run包安装 +* PyTorch(NPU版本) +* apex(NPU版本) + +### 2.2 安装第三方软件 + +(1) 通过pip 安装部分第三方软件: + +``` +pip3 install -r requirements.txt +``` + +(2) 安装opencv + +``` +pip install opencv-python +``` + +(3) 编译安装torchvision + +``` +git clone https://github.com/pytorch/vision +cd vision +git checkout v0.6.0 +python3 setup.py install +``` + +**注:您必须编译安装torchvision,直接使用pip安装训练将无法启动** + +(4) 编译detectron2 + +进入模型脚本根目录,编译detectron2 + +``` +python3 setup.py build develop +``` + + +(5) 下载预训练模型 R-50.pkl ,projects/CenterNet2/configs/Base-CenterNet2.yaml配置文件中MODEL.WEIGHTS 设置为R-101.pkl的绝对路径 +## 3.Dataset + +(1) 下载coco2017数据集; + +(2) 解压数据集,解压后的目录结构如下 +``` +│ ├── coco +│ ├── annotations +│ ├── train2017 +│ ├── val2017 +``` +(3) 通过设置环境变量DETECTRON2_DATASETS=“coco 所在数据集路径”进行设置,如 export DETECTRON2_DATASETS=/home/,则 coco 数据集放在 /home/ 目录中 + +## 4.Training + +### 4.1 NPU 1P + +在模型根目录下,运行 train_full_1p.sh,同时传入参数--data_path,指定为coco数据集的路径父路径(例如数据集路径为/home/coco,则--data_path=/home) + +``` +bash ./test/train_full_1p.sh --data_path=/home +``` +模型训练结束后,会在result/CenterNet2/CenterNet2_R50_1x目录下保存模型文件model_final.pth,训练结束后若要评估精度需运行eval脚本,参考第6节 + +### 4.2 NPU 8P + +在模型根目录下,运行 train_full_8p.sh,同时传入参数--data_path,指定为coco数据集的路径父路径(例如数据集路径为/home/coco,则--data_path=/home) + +``` +bash ./test/train_full_8p.sh --data_path=/home +``` + +模型训练结束后,会在result/CenterNet2/CenterNet2_R50_1x目录下保存模型文件model_final.pth,训练结束后若要评估精度需运行eval脚本,参考第6节 + +## 5.Finetune + +请将projects/CenterNet2/configs/Base-CenterNet2.yaml中的字段WEIGHTS修改为第3节中model_final.pth的绝对路径,如修改为 +``` +WEIGHTS: "/home/CenterNet2/result/CenterNet2/CenterNet2_R50_1x/model_final.pth" +``` +然后启动评估脚本 + +``` +bash ./test/train_finetune_1p.sh --data_path=/home +``` + +## 6.评估 +训练结束后,需要手动启动评估程序。请将projects/CenterNet2/configs/Base-CenterNet2.yaml中的字段WEIGHTS修改为第3节中model_final.pth的绝对路径,如修改为 +``` +WEIGHTS: "/home/CenterNet2/result/CenterNet2/CenterNet2_R50_1x/model_final.pth" +``` + +然后启动评估脚本 + +``` +bash ./test/train_eval_1p.sh --data_path=/home +``` +## CenterNet2 Result + +| 名称 | 精度 | 性能 | +| ------ | ----- | -------- | +| GPU-1p | - | 12.3fps | +| NPU-1p | - | 2.86 fps | +| GPU-8p | 43.68 | 90.5fps | | NPU-8p | 43.5 | 18.7fps | \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/centernet2/requirements.txt b/PyTorch/contrib/cv/detection/centernet2/requirements.txt index f2e8ae6b481a2272fa00156d4acb1f88367ee105..5642e4abe9102eb7e666f5dcffe1a23f5165d466 100644 --- a/PyTorch/contrib/cv/detection/centernet2/requirements.txt +++ b/PyTorch/contrib/cv/detection/centernet2/requirements.txt @@ -1,17 +1,17 @@ -numpy -decorator==5.1.0 -sympy==1.9 -termcolor>=1.1 -Pillow>=8.4.0 -yacs>=0.1.6 -tabulate -cloudpickle -matplotlib -tqdm>4.29.0 -tensorboard -fvcore -iopath>=0.1.2 -pycocotools>=2.0.2 -future -pydot -omegaconf +numpy +decorator==5.1.0 +sympy==1.9 +termcolor>=1.1 +Pillow>=8.4.0 +yacs>=0.1.6 +tabulate +cloudpickle +matplotlib +tqdm>4.29.0 +tensorboard +fvcore +iopath>=0.1.2 +pycocotools>=2.0.2 +future +pydot +omegaconf diff --git a/PyTorch/contrib/cv/others/ADNet/LICENSE b/PyTorch/contrib/cv/others/ADNet/LICENSE index 29f81d812f3e768fa89638d1f72920dbfd1413a8..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 100644 --- a/PyTorch/contrib/cv/others/ADNet/LICENSE +++ b/PyTorch/contrib/cv/others/ADNet/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/PyTorch/contrib/cv/others/ADNet/README.md b/PyTorch/contrib/cv/others/ADNet/README.md index a2bc84b397231468d55dc83b15379465445b78b5..9fd16c6d627786829d148b6899f3bbb68ac90b90 100644 --- a/PyTorch/contrib/cv/others/ADNet/README.md +++ b/PyTorch/contrib/cv/others/ADNet/README.md @@ -1,167 +1,167 @@ -# ADNet训练 - -``` -## Atention-guided CNN for image denoising(ADNet)by Chunwei Tian, Yong Xu, Zuoyong Li, Wangmeng Zuo, Lunke Fei and Hong Liu is publised by Neural Networks (IF:8.05), 2020 (https://www.sciencedirect.com/science/article/pii/S0893608019304241) and it is implemented by Pytorch. - - -## This paper is pushed on home page of the Nueral Networks. Also, it is reported by wechat public accounts at https://mp.weixin.qq.com/s/Debh7PZSFTBtOVxpFh9yfQ and https://wx.zsxq.com/mweb/views/topicdetail/topicdetail.html?topic_id=548112815452544&group_id=142181451122&user_id=28514284588581&from=timeline. - -## This paper is the first paper via deep network properties for addressing image denoising with complex background. - -## Absract -#### Deep convolutional neural networks (CNNs) have attracted considerable interest in low-level computer vision. Researches are usually devoted to improving the performance via very deep CNNs. However, as the depth increases, influences of the shallow layers on deep layers are weakened. Inspired by the fact, we propose an attention-guided denoising convolutional neural network (ADNet), mainly including a sparse block (SB), a feature enhancement block (FEB), an attention block (AB) and a reconstruction block (RB) for image denoising. Specifically, the SB makes a tradeoff between performance and efficiency by using dilated and common convolutions to remove the noise. The FEB integrates global and local features information via a long path to enhance the expressive ability of the denoising model. The AB is used to finely extract the noise information hidden in the complex background, which is very effective for complex noisy images, especially real noisy images and bind denoising. Also, the FEB is integrated with the AB to improve the efficiency and reduce the complexity for training a denoising model. Finally, a RB aims to construct the clean image through the obtained noise mapping and the given noisy image. Additionally, comprehensive experiments show that the proposed ADNet performs very well in three tasks (i.e., synthetic and real noisy images, and blind denoising) in terms of both quantitative and qualitative evaluations. The code of ADNet is accessible at https://github.com/hellloxiaotian/ADNet. -``` - -For more detail:https://www.sciencedirect.com/science/article/abs/pii/S0893608019304241 - - - -## Requirements - -use pytorch, you can use pip or conda to install the requirements - -``` -# for pip -cd $project -pip3.7 install -r requirements.txt -CANN 5.0.3.alpha001 -torchvision==0.6.0 -``` - - - -## 数据集准备 - -1.从以下网址获取pristine_images_gray.tar.gz作为训练集 - -https://pan.baidu.com/s/1nkY-b5_mdzliL7Y7N9JQRQ - -2.从以下网址获取BSD68作为标签 - -暂时没有在网上找到免费资源,可以从以下网址付费下载。 - -https://download.csdn.net/download/iteapoy/10902860 - -文件结构如下: - - -``` -ADNET -|-- data -| |-- BSD68 -| |-- pristine_images_gray -| |-- demo_img -| |--result -|-- test -|-- dataset.py -|-- demo.py -|-- models.py -|-- preprocess.py -|-- test.py -|-- train.py -|-- utils.py - -``` - -将数据集按照以上结构放在代码目录下 - -## 处理数据 - -source环境变量 - -``` -source ./test/env.sh -``` - -执行数据预处理脚本,将训练集图片裁剪成50*50的图片用与训练,运行成功会生成train.h5和val.h5文件,预处理需要h5py环境,请自行安装。 - -``` -python3.7.5 preprocess.py --preprocess True --mode S -``` - - - -## TRAIN - -### 单p训练 - -source 环境变量 - -``` -source ./test/env.sh -``` - -性能脚本: - -``` -bash ./test/train_performance_1p.sh -``` - -精度脚本: - -``` -bash ./test/train_full_1p.sh -``` - - - -### 多p训练 - -source 环境变量 - -``` -source ./test/env.sh -``` - -性能脚本: - -``` -bash ./test/train_performance_8p.sh -``` - -精度脚本: - -``` -bash ./test/train_full_8p.sh -``` - -模型保存在 - -运行日志保存至./logssigma25.0_2021-09-05-19-23-13目录下(2021-09-05-19-23-13是运行train.py的时间,会根据当前时间自动更新),其中best_model.pth是在验证集上精度最高的模型。 - -## TEST - -测试精度 - -使用sh文件 - -``` -bash test/eval_1p.sh -``` - -测试之前请指定测试的模型路径。打开./test/eval.sh文件,如下所示: - -``` -python3.7.5 test.py --is_distributed 0 --DeviceID 0 --num_gpus 1 --num_of_layers 17 --logdir logssigma25.0_2021-08-31-19-13-09 --test_data BSD68 --test_noiseL 25 | tee -a eval_1p.log -``` - -请指定需要测试的模型路径,将--logdir参数设置为需要测试的模型目录。 - -## Demo -将一张图片放在./data/demo_img中,将--demo_pth_path设置为训练好的pth文件目录,执行以下程序。模型的运行结果保存在./data/demo_img/result文件夹里。 -``` -python3.7.5 demo.py --DeviceID 0 --num_of_layers 17 --test_noiseL 25 --demo_pth_path logssigma25.0_2021-09-03-10-39-34 -``` - -### 精度对比 - -由于NPU上使用torch.optim.Adam出现loss极大的情况,在使用apex.optimizers.NpuFusedSGD优化器后,loss正常,但是精度会有所损失。 - -| | opt_level | loss_scale | optimizer | PSNR | -| ------ | --------- | ---------- | -------------------------------------------------------- | ----- | -| GPU-8p | o2 | 128 | optim.Adam | 28.98 | -| GPU-8p | o2 | 128 | optim.SGD | 27.83 | -| NPU-8p | 02 | 64 | apex.optimizers.NpuFusedAdam(不稳定,不能稳定复现精度) | 28.92 | -| NPU-8p | o2 | 8 | apex.optimizers.NpuFusedSGD | 28.49 | - +# ADNet训练 + +``` +## Atention-guided CNN for image denoising(ADNet)by Chunwei Tian, Yong Xu, Zuoyong Li, Wangmeng Zuo, Lunke Fei and Hong Liu is publised by Neural Networks (IF:8.05), 2020 (https://www.sciencedirect.com/science/article/pii/S0893608019304241) and it is implemented by Pytorch. + + +## This paper is pushed on home page of the Nueral Networks. Also, it is reported by wechat public accounts at https://mp.weixin.qq.com/s/Debh7PZSFTBtOVxpFh9yfQ and https://wx.zsxq.com/mweb/views/topicdetail/topicdetail.html?topic_id=548112815452544&group_id=142181451122&user_id=28514284588581&from=timeline. + +## This paper is the first paper via deep network properties for addressing image denoising with complex background. + +## Absract +#### Deep convolutional neural networks (CNNs) have attracted considerable interest in low-level computer vision. Researches are usually devoted to improving the performance via very deep CNNs. However, as the depth increases, influences of the shallow layers on deep layers are weakened. Inspired by the fact, we propose an attention-guided denoising convolutional neural network (ADNet), mainly including a sparse block (SB), a feature enhancement block (FEB), an attention block (AB) and a reconstruction block (RB) for image denoising. Specifically, the SB makes a tradeoff between performance and efficiency by using dilated and common convolutions to remove the noise. The FEB integrates global and local features information via a long path to enhance the expressive ability of the denoising model. The AB is used to finely extract the noise information hidden in the complex background, which is very effective for complex noisy images, especially real noisy images and bind denoising. Also, the FEB is integrated with the AB to improve the efficiency and reduce the complexity for training a denoising model. Finally, a RB aims to construct the clean image through the obtained noise mapping and the given noisy image. Additionally, comprehensive experiments show that the proposed ADNet performs very well in three tasks (i.e., synthetic and real noisy images, and blind denoising) in terms of both quantitative and qualitative evaluations. The code of ADNet is accessible at https://github.com/hellloxiaotian/ADNet. +``` + +For more detail:https://www.sciencedirect.com/science/article/abs/pii/S0893608019304241 + + + +## Requirements + +use pytorch, you can use pip or conda to install the requirements + +``` +# for pip +cd $project +pip3.7 install -r requirements.txt +CANN 5.0.3.alpha001 +torchvision==0.6.0 +``` + + + +## 数据集准备 + +1.从以下网址获取pristine_images_gray.tar.gz作为训练集 + +https://pan.baidu.com/s/1nkY-b5_mdzliL7Y7N9JQRQ + +2.从以下网址获取BSD68作为标签 + +暂时没有在网上找到免费资源,可以从以下网址付费下载。 + +https://download.csdn.net/download/iteapoy/10902860 + +文件结构如下: + + +``` +ADNET +|-- data +| |-- BSD68 +| |-- pristine_images_gray +| |-- demo_img +| |--result +|-- test +|-- dataset.py +|-- demo.py +|-- models.py +|-- preprocess.py +|-- test.py +|-- train.py +|-- utils.py + +``` + +将数据集按照以上结构放在代码目录下 + +## 处理数据 + +source环境变量 + +``` +source ./test/env.sh +``` + +执行数据预处理脚本,将训练集图片裁剪成50*50的图片用与训练,运行成功会生成train.h5和val.h5文件,预处理需要h5py环境,请自行安装。 + +``` +python3.7.5 preprocess.py --preprocess True --mode S +``` + + + +## TRAIN + +### 单p训练 + +source 环境变量 + +``` +source ./test/env.sh +``` + +性能脚本: + +``` +bash ./test/train_performance_1p.sh +``` + +精度脚本: + +``` +bash ./test/train_full_1p.sh +``` + + + +### 多p训练 + +source 环境变量 + +``` +source ./test/env.sh +``` + +性能脚本: + +``` +bash ./test/train_performance_8p.sh +``` + +精度脚本: + +``` +bash ./test/train_full_8p.sh +``` + +模型保存在 + +运行日志保存至./logssigma25.0_2021-09-05-19-23-13目录下(2021-09-05-19-23-13是运行train.py的时间,会根据当前时间自动更新),其中best_model.pth是在验证集上精度最高的模型。 + +## TEST + +测试精度 + +使用sh文件 + +``` +bash test/eval_1p.sh +``` + +测试之前请指定测试的模型路径。打开./test/eval.sh文件,如下所示: + +``` +python3.7.5 test.py --is_distributed 0 --DeviceID 0 --num_gpus 1 --num_of_layers 17 --logdir logssigma25.0_2021-08-31-19-13-09 --test_data BSD68 --test_noiseL 25 | tee -a eval_1p.log +``` + +请指定需要测试的模型路径,将--logdir参数设置为需要测试的模型目录。 + +## Demo +将一张图片放在./data/demo_img中,将--demo_pth_path设置为训练好的pth文件目录,执行以下程序。模型的运行结果保存在./data/demo_img/result文件夹里。 +``` +python3.7.5 demo.py --DeviceID 0 --num_of_layers 17 --test_noiseL 25 --demo_pth_path logssigma25.0_2021-09-03-10-39-34 +``` + +### 精度对比 + +由于NPU上使用torch.optim.Adam出现loss极大的情况,在使用apex.optimizers.NpuFusedSGD优化器后,loss正常,但是精度会有所损失。 + +| | opt_level | loss_scale | optimizer | PSNR | +| ------ | --------- | ---------- | -------------------------------------------------------- | ----- | +| GPU-8p | o2 | 128 | optim.Adam | 28.98 | +| GPU-8p | o2 | 128 | optim.SGD | 27.83 | +| NPU-8p | 02 | 64 | apex.optimizers.NpuFusedAdam(不稳定,不能稳定复现精度) | 28.92 | +| NPU-8p | o2 | 8 | apex.optimizers.NpuFusedSGD | 28.49 | + 在NPU上使用apex.optimizers.NpuFusedAdam不能稳定复现精度,有时候会出现loss极大的情况,导致训练失败。 \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/ADNet/demo.py b/PyTorch/contrib/cv/others/ADNet/demo.py index 41907621c32437fee82e418c689a09d451d69cdc..3403781dcc2cc0e67c2e54ecb07cb43dbf1f3ee9 100644 --- a/PyTorch/contrib/cv/others/ADNet/demo.py +++ b/PyTorch/contrib/cv/others/ADNet/demo.py @@ -1,111 +1,111 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import cv2 -import os -import argparse -import glob -import numpy as np -from PIL import Image -import torch -import torch.nn as nn -from torch.autograd import Variable -from models import ADNet -from utils import * -from collections import OrderedDict -import torch.distributed as dist - -parser = argparse.ArgumentParser(description="ADNet_Test") -parser.add_argument("--num_of_layers", type=int, default=17, help="Number of total layers") -parser.add_argument("--logdir", type=str, default="logs", help='path of log files') -parser.add_argument("--test_data", type=str, default='BSD68', help='test on Set12 or Set68') -parser.add_argument("--test_noiseL", type=float, default=25, help='noise level used on test set') -parser.add_argument("--DeviceID", type=int, default=0, help='choose a device id to use') -parser.add_argument("--demo_img_path", type=str, default='demo_img') -parser.add_argument("--demo_pth_path", type=str, default='data') -opt = parser.parse_args() - - -def normalize(data): - return data / 255. -def proc_nodes_module(checkpoint): - new_state_dict = OrderedDict() - for k, v in checkpoint.items(): - if "module." in k: - name = k.replace("module.", "") - else: - name = k - new_state_dict[name] = v - return new_state_dict - -def main(): - # Build model - local_device = torch.device(f'npu:{opt.DeviceID}') - torch.npu.set_device(local_device) - print("using npu :{}".format(opt.DeviceID)) - print('Loading model ...\n') - net = ADNet(channels=1, num_of_layers=17) - model = net #model = nn.DataParallel(net, device_ids=device_ids).cuda() - checkpoint = torch.load(os.path.join(opt.demo_pth_path, 'best_model.pth'), map_location=local_device) - checkpoint = proc_nodes_module(checkpoint) - model.load_state_dict(checkpoint) - model = model.npu() - model.eval() - # load data info - print('Loading data info ...\n') - files_source = glob.glob(os.path.join('data', opt.demo_img_path, '*.png')) - files_source.sort() - # process data - psnr_test = 0 - for f in files_source: - # image - Img = cv2.imread(f) - Img = normalize(np.float32(Img[:, :, 0])) - Img = np.expand_dims(Img, 0) - Img = np.expand_dims(Img, 1) - ISource = torch.Tensor(Img) - # noise - torch.manual_seed(0) # set the seed - noise = torch.FloatTensor(ISource.size()).normal_(mean=0, std=opt.test_noiseL / 255.) - # noisy image - INoisy = ISource + noise - ISource = Variable(ISource) - INoisy = Variable(INoisy) - ISource = ISource.npu() - INoisy = INoisy.npu() - with torch.no_grad(): # this can save much memory - Out = torch.clamp(model(INoisy), 0., 1.) - psnr = batch_PSNR(Out, ISource, 1.) - psnr_test += psnr - print("%s PSNR %f" % (f, psnr)) - INoisy = INoisy*255 - INoisy = INoisy.data.cpu().numpy() - INoisy = np.squeeze(INoisy) - Imag_noise = Image.fromarray(INoisy.astype('uint8')) - if not os.path.exists('./data/demo_img/result'): - os.mkdir('./data/demo_img/result') - Imag_noise.save(os.path.join('data', opt.demo_img_path, 'result', 'image_add_noise.png')) - print('original image stored in:', os.path.join('data', opt.demo_img_path)) - print('image added noise stored in:', os.path.join('data', opt.demo_img_path, 'result', 'image_add_noise.png')) - result = Out*255 - result = result.data.cpu().numpy() - result = np.squeeze(result) - result = Image.fromarray(result.astype('uint8')) - result.save(os.path.join('data', opt.demo_img_path, 'result', 'image_after_processing.png')) - print('image denoised stored in:', os.path.join('data', opt.demo_img_path, 'result', 'image_after_processing.png')) - psnr_test /= len(files_source) - print("\nPSNR on demo image %f" % psnr_test) - - -if __name__ == "__main__": - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cv2 +import os +import argparse +import glob +import numpy as np +from PIL import Image +import torch +import torch.nn as nn +from torch.autograd import Variable +from models import ADNet +from utils import * +from collections import OrderedDict +import torch.distributed as dist + +parser = argparse.ArgumentParser(description="ADNet_Test") +parser.add_argument("--num_of_layers", type=int, default=17, help="Number of total layers") +parser.add_argument("--logdir", type=str, default="logs", help='path of log files') +parser.add_argument("--test_data", type=str, default='BSD68', help='test on Set12 or Set68') +parser.add_argument("--test_noiseL", type=float, default=25, help='noise level used on test set') +parser.add_argument("--DeviceID", type=int, default=0, help='choose a device id to use') +parser.add_argument("--demo_img_path", type=str, default='demo_img') +parser.add_argument("--demo_pth_path", type=str, default='data') +opt = parser.parse_args() + + +def normalize(data): + return data / 255. +def proc_nodes_module(checkpoint): + new_state_dict = OrderedDict() + for k, v in checkpoint.items(): + if "module." in k: + name = k.replace("module.", "") + else: + name = k + new_state_dict[name] = v + return new_state_dict + +def main(): + # Build model + local_device = torch.device(f'npu:{opt.DeviceID}') + torch.npu.set_device(local_device) + print("using npu :{}".format(opt.DeviceID)) + print('Loading model ...\n') + net = ADNet(channels=1, num_of_layers=17) + model = net #model = nn.DataParallel(net, device_ids=device_ids).cuda() + checkpoint = torch.load(os.path.join(opt.demo_pth_path, 'best_model.pth'), map_location=local_device) + checkpoint = proc_nodes_module(checkpoint) + model.load_state_dict(checkpoint) + model = model.npu() + model.eval() + # load data info + print('Loading data info ...\n') + files_source = glob.glob(os.path.join('data', opt.demo_img_path, '*.png')) + files_source.sort() + # process data + psnr_test = 0 + for f in files_source: + # image + Img = cv2.imread(f) + Img = normalize(np.float32(Img[:, :, 0])) + Img = np.expand_dims(Img, 0) + Img = np.expand_dims(Img, 1) + ISource = torch.Tensor(Img) + # noise + torch.manual_seed(0) # set the seed + noise = torch.FloatTensor(ISource.size()).normal_(mean=0, std=opt.test_noiseL / 255.) + # noisy image + INoisy = ISource + noise + ISource = Variable(ISource) + INoisy = Variable(INoisy) + ISource = ISource.npu() + INoisy = INoisy.npu() + with torch.no_grad(): # this can save much memory + Out = torch.clamp(model(INoisy), 0., 1.) + psnr = batch_PSNR(Out, ISource, 1.) + psnr_test += psnr + print("%s PSNR %f" % (f, psnr)) + INoisy = INoisy*255 + INoisy = INoisy.data.cpu().numpy() + INoisy = np.squeeze(INoisy) + Imag_noise = Image.fromarray(INoisy.astype('uint8')) + if not os.path.exists('./data/demo_img/result'): + os.mkdir('./data/demo_img/result') + Imag_noise.save(os.path.join('data', opt.demo_img_path, 'result', 'image_add_noise.png')) + print('original image stored in:', os.path.join('data', opt.demo_img_path)) + print('image added noise stored in:', os.path.join('data', opt.demo_img_path, 'result', 'image_add_noise.png')) + result = Out*255 + result = result.data.cpu().numpy() + result = np.squeeze(result) + result = Image.fromarray(result.astype('uint8')) + result.save(os.path.join('data', opt.demo_img_path, 'result', 'image_after_processing.png')) + print('image denoised stored in:', os.path.join('data', opt.demo_img_path, 'result', 'image_after_processing.png')) + psnr_test /= len(files_source) + print("\nPSNR on demo image %f" % psnr_test) + + +if __name__ == "__main__": + main() diff --git a/PyTorch/contrib/cv/others/ADNet/modelzoo_level.txt b/PyTorch/contrib/cv/others/ADNet/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/others/ADNet/modelzoo_level.txt +++ b/PyTorch/contrib/cv/others/ADNet/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/ADNet/preprocess.py b/PyTorch/contrib/cv/others/ADNet/preprocess.py index 906b8a58a2c593e7110bc612746876558454f6da..f6328b1aa6c093a94566123d4f64190ff148739f 100644 --- a/PyTorch/contrib/cv/others/ADNet/preprocess.py +++ b/PyTorch/contrib/cv/others/ADNet/preprocess.py @@ -1,64 +1,64 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import argparse - -import torch -import sys -import torch.nn as nn -import torch.optim as optim -import apex.amp as amp -import apex -import logging -from torch.nn.parallel import DistributedDataParallel as DDP -from torch.utils.data.distributed import DistributedSampler -import torch.distributed as dist -import time -from torch.autograd import Variable -from torch.utils.data import DataLoader - -from torch.nn.modules.loss import _Loss -from models import ADNet -from dataset import prepare_data, Dataset -from utils import * - - -parser = argparse.ArgumentParser(description="DnCNN") -parser.add_argument("--preprocess", type=bool, default=False, help='run prepare_data or not') -parser.add_argument("--batchSize", type=int, default=128, help="Training batch size") -parser.add_argument("--num_of_layers", type=int, default=17, help="Number of total layers") -parser.add_argument("--epochs", type=int, default=70, help="Number of training epochs") -parser.add_argument("--milestone", type=int, default=30, help="When to decay learning rate; should be less than epochs") -parser.add_argument("--lr", type=float, default=1e-3, help="Initial learning rate") -parser.add_argument("--outf", type=str, default="logs", help='path of log files') -parser.add_argument("--mode", type=str, default="S", help='with known noise level (S) or blind training (B)') -parser.add_argument("--noiseL", type=float, default=15, help='noise level; ignored when mode=B') -parser.add_argument("--val_noiseL", type=float, default=15, help='noise level used on validation set') -parser.add_argument("--is_distributed", type=int, default=0, help='choose ddp or not') -parser.add_argument('--world_size', default=-1, type=int, help='number of nodes for distributed training') -parser.add_argument('--local_rank', type=int, default=0) -parser.add_argument('--DeviceID', type=str, default="0") -parser.add_argument("--num_gpus", default=1, type=int) -''' -parser.add_argument("--clip",type=float,default=0.005,help='Clipping Gradients. Default=0.4') #tcw201809131446tcw -parser.add_argument("--momentum",default=0.9,type='float',help = 'Momentum, Default:0.9') #tcw201809131447tcw -parser.add_argument("--weight-decay","-wd",default=1e-3,type=float,help='Weight decay, Default:1e-4') #tcw20180913347tcw -''' -opt = parser.parse_args() -if __name__ == "__main__": - if opt.preprocess: - if opt.mode == 'S': - prepare_data(data_path='data', patch_size=50, stride=40, aug_times=1) - if opt.mode == 'B': - prepare_data(data_path='data', patch_size=50, stride=10, aug_times=2) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import argparse + +import torch +import sys +import torch.nn as nn +import torch.optim as optim +import apex.amp as amp +import apex +import logging +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.data.distributed import DistributedSampler +import torch.distributed as dist +import time +from torch.autograd import Variable +from torch.utils.data import DataLoader + +from torch.nn.modules.loss import _Loss +from models import ADNet +from dataset import prepare_data, Dataset +from utils import * + + +parser = argparse.ArgumentParser(description="DnCNN") +parser.add_argument("--preprocess", type=bool, default=False, help='run prepare_data or not') +parser.add_argument("--batchSize", type=int, default=128, help="Training batch size") +parser.add_argument("--num_of_layers", type=int, default=17, help="Number of total layers") +parser.add_argument("--epochs", type=int, default=70, help="Number of training epochs") +parser.add_argument("--milestone", type=int, default=30, help="When to decay learning rate; should be less than epochs") +parser.add_argument("--lr", type=float, default=1e-3, help="Initial learning rate") +parser.add_argument("--outf", type=str, default="logs", help='path of log files') +parser.add_argument("--mode", type=str, default="S", help='with known noise level (S) or blind training (B)') +parser.add_argument("--noiseL", type=float, default=15, help='noise level; ignored when mode=B') +parser.add_argument("--val_noiseL", type=float, default=15, help='noise level used on validation set') +parser.add_argument("--is_distributed", type=int, default=0, help='choose ddp or not') +parser.add_argument('--world_size', default=-1, type=int, help='number of nodes for distributed training') +parser.add_argument('--local_rank', type=int, default=0) +parser.add_argument('--DeviceID', type=str, default="0") +parser.add_argument("--num_gpus", default=1, type=int) +''' +parser.add_argument("--clip",type=float,default=0.005,help='Clipping Gradients. Default=0.4') #tcw201809131446tcw +parser.add_argument("--momentum",default=0.9,type='float',help = 'Momentum, Default:0.9') #tcw201809131447tcw +parser.add_argument("--weight-decay","-wd",default=1e-3,type=float,help='Weight decay, Default:1e-4') #tcw20180913347tcw +''' +opt = parser.parse_args() +if __name__ == "__main__": + if opt.preprocess: + if opt.mode == 'S': + prepare_data(data_path='data', patch_size=50, stride=40, aug_times=1) + if opt.mode == 'B': + prepare_data(data_path='data', patch_size=50, stride=10, aug_times=2) diff --git a/PyTorch/contrib/cv/others/BigGAN/LICENSE b/PyTorch/contrib/cv/others/BigGAN/LICENSE index 753842b6720f7980d411ecf2c78eb4ef220b9df8..f49a4e16e68b128803cc2dcea614603632b04eac 100644 --- a/PyTorch/contrib/cv/others/BigGAN/LICENSE +++ b/PyTorch/contrib/cv/others/BigGAN/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/BigGAN/datasets.py b/PyTorch/contrib/cv/others/BigGAN/datasets.py index 378f1b336ffbb413c298153b51a0d7989b98a065..751bd97941571e391a664123e65ecf334812e94c 100644 --- a/PyTorch/contrib/cv/others/BigGAN/datasets.py +++ b/PyTorch/contrib/cv/others/BigGAN/datasets.py @@ -1,378 +1,378 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" Datasets - This file contains definitions for our CIFAR, ImageFolder, and HDF5 datasets -""" -import os -import os.path -import sys -import numpy as np -import torch.utils.data as data -import torchvision.datasets as dset -from PIL import Image -from tqdm import tqdm - -IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm'] - - -def is_image_file(filename): - """Checks if a file is an image. - - Args: - filename (string): path to a file - - Returns: - bool: True if the filename ends with a known image extension - """ - filename_lower = filename.lower() - return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS) - - -def find_classes(dir): - classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] - classes.sort() - class_to_idx = {classes[i]: i for i in range(len(classes))} - return classes, class_to_idx - - -def make_dataset(dir, class_to_idx): - images = [] - dir = os.path.expanduser(dir) - for target in tqdm(sorted(os.listdir(dir))): - d = os.path.join(dir, target) - if not os.path.isdir(d): - continue - - for root, _, fnames in sorted(os.walk(d)): - for fname in sorted(fnames): - if is_image_file(fname): - path = os.path.join(root, fname) - item = (path, class_to_idx[target]) - images.append(item) - - return images - - -def pil_loader(path): - # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) - with open(path, 'rb') as f: - img = Image.open(f) - return img.convert('RGB') - - -def accimage_loader(path): - import accimage - try: - return accimage.Image(path) - except IOError: - # Potentially a decoding problem, fall back to PIL.Image - return pil_loader(path) - - -def default_loader(path): - from torchvision import get_image_backend - if get_image_backend() == 'accimage': - return accimage_loader(path) - else: - return pil_loader(path) - - -class ImageFolder(data.Dataset): - """A generic data loader where the images are arranged in this way: :: - - root/dogball/xxx.png - root/dogball/xxy.png - root/dogball/xxz.png - - root/cat/123.png - root/cat/nsdf3.png - root/cat/asd932_.png - - Args: - root (string): Root directory path. - transform (callable, optional): A function/transform that takes in an PIL image - and returns a transformed version. E.g, ``transforms.RandomCrop`` - target_transform (callable, optional): A function/transform that takes in the - target and transforms it. - loader (callable, optional): A function to load an image given its path. - - Attributes: - classes (list): List of the class names. - class_to_idx (dict): Dict with items (class_name, class_index). - imgs (list): List of (image path, class_index) tuples - """ - - def __init__(self, root, transform=None, target_transform=None, - loader=default_loader, load_in_mem=False, - index_filename='imagenet_imgs.npz', **kwargs): - classes, class_to_idx = find_classes(root) - # Load pre-computed image directory walk - if os.path.exists(index_filename): - print('Loading pre-saved Index file %s...' % index_filename) - imgs = np.load(index_filename)['imgs'] - # If first time, walk the folder directory and save the - # results to a pre-computed file. - else: - print('Generating Index file %s...' % index_filename) - imgs = make_dataset(root, class_to_idx) - np.savez_compressed(index_filename, **{'imgs': imgs}) - if len(imgs) == 0: - raise (RuntimeError("Found 0 images in subfolders of: " + root + "\n" - "Supported image extensions are: " + ",".join( - IMG_EXTENSIONS))) - - self.root = root - self.imgs = imgs - self.classes = classes - self.class_to_idx = class_to_idx - self.transform = transform - self.target_transform = target_transform - self.loader = loader - self.load_in_mem = load_in_mem - - if self.load_in_mem: - print('Loading all images into memory...') - self.data, self.labels = [], [] - for index in tqdm(range(len(self.imgs))): - path, target = imgs[index][0], imgs[index][1] - self.data.append(self.transform(self.loader(path))) - self.labels.append(target) - - def __getitem__(self, index): - """ - Args: - index (int): Index - - Returns: - tuple: (image, target) where target is class_index of the target class. - """ - if self.load_in_mem: - img = self.data[index] - target = self.labels[index] - else: - path, target = self.imgs[index] - img = self.loader(str(path)) - if self.transform is not None: - img = self.transform(img) - - if self.target_transform is not None: - target = self.target_transform(target) - - # print(img.size(), target) - return img, int(target) - - def __len__(self): - return len(self.imgs) - - def __repr__(self): - fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' - fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) - fmt_str += ' Root Location: {}\n'.format(self.root) - tmp = ' Transforms (if any): ' - fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) - tmp = ' Target Transforms (if any): ' - fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) - return fmt_str - - -''' ILSVRC_HDF5: A dataset to support I/O from an HDF5 to avoid - having to load individual images all the time. ''' -import h5py as h5 -import torch - - -class ILSVRC_HDF5(data.Dataset): - def __init__(self, root, transform=None, target_transform=None, - load_in_mem=False, train=True, download=False, validate_seed=0, - val_split=0, **kwargs): # last four are dummies - - self.root = root - self.num_imgs = len(h5.File(root, 'r')['labels']) - - # self.transform = transform - self.target_transform = target_transform - - # Set the transform here - self.transform = transform - - # load the entire dataset into memory? - self.load_in_mem = load_in_mem - - # If loading into memory, do so now - if self.load_in_mem: - print('Loading %s into memory...' % root) - with h5.File(root, 'r') as f: - self.data = f['imgs'][:] - self.labels = f['labels'][:] - - def __getitem__(self, index): - """ - Args: - index (int): Index - - Returns: - tuple: (image, target) where target is class_index of the target class. - """ - # If loaded the entire dataset in RAM, get image from memory - if self.load_in_mem: - img = self.data[index] - target = self.labels[index] - - # Else load it from disk - else: - with h5.File(self.root, 'r') as f: - img = f['imgs'][index] - target = f['labels'][index] - - # if self.transform is not None: - # img = self.transform(img) - # Apply my own transform - img = ((torch.from_numpy(img).float() / 255) - 0.5) * 2 - - if self.target_transform is not None: - target = self.target_transform(target) - - return img, int(target) - - def __len__(self): - return self.num_imgs - # return len(self.f['imgs']) - - -import pickle - - -class CIFAR10(dset.CIFAR10): - - def __init__(self, root, train=True, - transform=None, target_transform=None, - download=True, validate_seed=0, - val_split=0, load_in_mem=True, **kwargs): - self.root = os.path.expanduser(root) - self.transform = transform - self.target_transform = target_transform - self.train = train # training set or test set - self.val_split = val_split - - if download: - self.download() - - if not self._check_integrity(): - raise RuntimeError('Dataset not found or corrupted.' + - ' You can use download=True to download it') - - # now load the picked numpy arrays - self.data = [] - self.labels = [] - for fentry in self.train_list: - f = fentry[0] - file = os.path.join(self.root, self.base_folder, f) - fo = open(file, 'rb') - if sys.version_info[0] == 2: - entry = pickle.load(fo) - else: - entry = pickle.load(fo, encoding='latin1') - self.data.append(entry['data']) - if 'labels' in entry: - self.labels += entry['labels'] - else: - self.labels += entry['fine_labels'] - fo.close() - - self.data = np.concatenate(self.data) - # Randomly select indices for validation - if self.val_split > 0: - label_indices = [[] for _ in range(max(self.labels) + 1)] - for i, l in enumerate(self.labels): - label_indices[l] += [i] - label_indices = np.asarray(label_indices) - - # randomly grab 500 elements of each class - np.random.seed(validate_seed) - self.val_indices = [] - for l_i in label_indices: - self.val_indices += list(l_i[np.random.choice(len(l_i), - int(len(self.data) * val_split) // (max(self.labels) + 1), - replace=False)]) - - if self.train == 'validate': - self.data = self.data[self.val_indices] - self.labels = list(np.asarray(self.labels)[self.val_indices]) - - self.data = self.data.reshape((int(50e3 * self.val_split), 3, 32, 32)) - self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC - - elif self.train: - print(np.shape(self.data)) - if self.val_split > 0: - self.data = np.delete(self.data, self.val_indices, axis=0) - self.labels = list(np.delete(np.asarray(self.labels), self.val_indices, axis=0)) - - self.data = self.data.reshape((int(50e3 * (1. - self.val_split)), 3, 32, 32)) - self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC - else: - f = self.test_list[0][0] - file = os.path.join(self.root, self.base_folder, f) - fo = open(file, 'rb') - if sys.version_info[0] == 2: - entry = pickle.load(fo) - else: - entry = pickle.load(fo, encoding='latin1') - self.data = entry['data'] - if 'labels' in entry: - self.labels = entry['labels'] - else: - self.labels = entry['fine_labels'] - fo.close() - self.data = self.data.reshape((10000, 3, 32, 32)) - self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC - - def __getitem__(self, index): - """ - Args: - index (int): Index - Returns: - tuple: (image, target) where target is index of the target class. - """ - img, target = self.data[index], self.labels[index] - - # doing this so that it is consistent with all other datasets - # to return a PIL Image - img = Image.fromarray(img) - - if self.transform is not None: - img = self.transform(img) - - if self.target_transform is not None: - target = self.target_transform(target) - - return img, target - - def __len__(self): - return len(self.data) - - -class CIFAR100(CIFAR10): - base_folder = 'cifar-100-python' - url = "http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" - filename = "cifar-100-python.tar.gz" - tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85' - train_list = [ - ['train', '16019d7e3df5f24257cddd939b257f8d'], - ] - - test_list = [ - ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'], - ] +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" Datasets + This file contains definitions for our CIFAR, ImageFolder, and HDF5 datasets +""" +import os +import os.path +import sys +import numpy as np +import torch.utils.data as data +import torchvision.datasets as dset +from PIL import Image +from tqdm import tqdm + +IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm'] + + +def is_image_file(filename): + """Checks if a file is an image. + + Args: + filename (string): path to a file + + Returns: + bool: True if the filename ends with a known image extension + """ + filename_lower = filename.lower() + return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS) + + +def find_classes(dir): + classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] + classes.sort() + class_to_idx = {classes[i]: i for i in range(len(classes))} + return classes, class_to_idx + + +def make_dataset(dir, class_to_idx): + images = [] + dir = os.path.expanduser(dir) + for target in tqdm(sorted(os.listdir(dir))): + d = os.path.join(dir, target) + if not os.path.isdir(d): + continue + + for root, _, fnames in sorted(os.walk(d)): + for fname in sorted(fnames): + if is_image_file(fname): + path = os.path.join(root, fname) + item = (path, class_to_idx[target]) + images.append(item) + + return images + + +def pil_loader(path): + # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) + with open(path, 'rb') as f: + img = Image.open(f) + return img.convert('RGB') + + +def accimage_loader(path): + import accimage + try: + return accimage.Image(path) + except IOError: + # Potentially a decoding problem, fall back to PIL.Image + return pil_loader(path) + + +def default_loader(path): + from torchvision import get_image_backend + if get_image_backend() == 'accimage': + return accimage_loader(path) + else: + return pil_loader(path) + + +class ImageFolder(data.Dataset): + """A generic data loader where the images are arranged in this way: :: + + root/dogball/xxx.png + root/dogball/xxy.png + root/dogball/xxz.png + + root/cat/123.png + root/cat/nsdf3.png + root/cat/asd932_.png + + Args: + root (string): Root directory path. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + loader (callable, optional): A function to load an image given its path. + + Attributes: + classes (list): List of the class names. + class_to_idx (dict): Dict with items (class_name, class_index). + imgs (list): List of (image path, class_index) tuples + """ + + def __init__(self, root, transform=None, target_transform=None, + loader=default_loader, load_in_mem=False, + index_filename='imagenet_imgs.npz', **kwargs): + classes, class_to_idx = find_classes(root) + # Load pre-computed image directory walk + if os.path.exists(index_filename): + print('Loading pre-saved Index file %s...' % index_filename) + imgs = np.load(index_filename)['imgs'] + # If first time, walk the folder directory and save the + # results to a pre-computed file. + else: + print('Generating Index file %s...' % index_filename) + imgs = make_dataset(root, class_to_idx) + np.savez_compressed(index_filename, **{'imgs': imgs}) + if len(imgs) == 0: + raise (RuntimeError("Found 0 images in subfolders of: " + root + "\n" + "Supported image extensions are: " + ",".join( + IMG_EXTENSIONS))) + + self.root = root + self.imgs = imgs + self.classes = classes + self.class_to_idx = class_to_idx + self.transform = transform + self.target_transform = target_transform + self.loader = loader + self.load_in_mem = load_in_mem + + if self.load_in_mem: + print('Loading all images into memory...') + self.data, self.labels = [], [] + for index in tqdm(range(len(self.imgs))): + path, target = imgs[index][0], imgs[index][1] + self.data.append(self.transform(self.loader(path))) + self.labels.append(target) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where target is class_index of the target class. + """ + if self.load_in_mem: + img = self.data[index] + target = self.labels[index] + else: + path, target = self.imgs[index] + img = self.loader(str(path)) + if self.transform is not None: + img = self.transform(img) + + if self.target_transform is not None: + target = self.target_transform(target) + + # print(img.size(), target) + return img, int(target) + + def __len__(self): + return len(self.imgs) + + def __repr__(self): + fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' + fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) + fmt_str += ' Root Location: {}\n'.format(self.root) + tmp = ' Transforms (if any): ' + fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) + tmp = ' Target Transforms (if any): ' + fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) + return fmt_str + + +''' ILSVRC_HDF5: A dataset to support I/O from an HDF5 to avoid + having to load individual images all the time. ''' +import h5py as h5 +import torch + + +class ILSVRC_HDF5(data.Dataset): + def __init__(self, root, transform=None, target_transform=None, + load_in_mem=False, train=True, download=False, validate_seed=0, + val_split=0, **kwargs): # last four are dummies + + self.root = root + self.num_imgs = len(h5.File(root, 'r')['labels']) + + # self.transform = transform + self.target_transform = target_transform + + # Set the transform here + self.transform = transform + + # load the entire dataset into memory? + self.load_in_mem = load_in_mem + + # If loading into memory, do so now + if self.load_in_mem: + print('Loading %s into memory...' % root) + with h5.File(root, 'r') as f: + self.data = f['imgs'][:] + self.labels = f['labels'][:] + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where target is class_index of the target class. + """ + # If loaded the entire dataset in RAM, get image from memory + if self.load_in_mem: + img = self.data[index] + target = self.labels[index] + + # Else load it from disk + else: + with h5.File(self.root, 'r') as f: + img = f['imgs'][index] + target = f['labels'][index] + + # if self.transform is not None: + # img = self.transform(img) + # Apply my own transform + img = ((torch.from_numpy(img).float() / 255) - 0.5) * 2 + + if self.target_transform is not None: + target = self.target_transform(target) + + return img, int(target) + + def __len__(self): + return self.num_imgs + # return len(self.f['imgs']) + + +import pickle + + +class CIFAR10(dset.CIFAR10): + + def __init__(self, root, train=True, + transform=None, target_transform=None, + download=True, validate_seed=0, + val_split=0, load_in_mem=True, **kwargs): + self.root = os.path.expanduser(root) + self.transform = transform + self.target_transform = target_transform + self.train = train # training set or test set + self.val_split = val_split + + if download: + self.download() + + if not self._check_integrity(): + raise RuntimeError('Dataset not found or corrupted.' + + ' You can use download=True to download it') + + # now load the picked numpy arrays + self.data = [] + self.labels = [] + for fentry in self.train_list: + f = fentry[0] + file = os.path.join(self.root, self.base_folder, f) + fo = open(file, 'rb') + if sys.version_info[0] == 2: + entry = pickle.load(fo) + else: + entry = pickle.load(fo, encoding='latin1') + self.data.append(entry['data']) + if 'labels' in entry: + self.labels += entry['labels'] + else: + self.labels += entry['fine_labels'] + fo.close() + + self.data = np.concatenate(self.data) + # Randomly select indices for validation + if self.val_split > 0: + label_indices = [[] for _ in range(max(self.labels) + 1)] + for i, l in enumerate(self.labels): + label_indices[l] += [i] + label_indices = np.asarray(label_indices) + + # randomly grab 500 elements of each class + np.random.seed(validate_seed) + self.val_indices = [] + for l_i in label_indices: + self.val_indices += list(l_i[np.random.choice(len(l_i), + int(len(self.data) * val_split) // (max(self.labels) + 1), + replace=False)]) + + if self.train == 'validate': + self.data = self.data[self.val_indices] + self.labels = list(np.asarray(self.labels)[self.val_indices]) + + self.data = self.data.reshape((int(50e3 * self.val_split), 3, 32, 32)) + self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC + + elif self.train: + print(np.shape(self.data)) + if self.val_split > 0: + self.data = np.delete(self.data, self.val_indices, axis=0) + self.labels = list(np.delete(np.asarray(self.labels), self.val_indices, axis=0)) + + self.data = self.data.reshape((int(50e3 * (1. - self.val_split)), 3, 32, 32)) + self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC + else: + f = self.test_list[0][0] + file = os.path.join(self.root, self.base_folder, f) + fo = open(file, 'rb') + if sys.version_info[0] == 2: + entry = pickle.load(fo) + else: + entry = pickle.load(fo, encoding='latin1') + self.data = entry['data'] + if 'labels' in entry: + self.labels = entry['labels'] + else: + self.labels = entry['fine_labels'] + fo.close() + self.data = self.data.reshape((10000, 3, 32, 32)) + self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC + + def __getitem__(self, index): + """ + Args: + index (int): Index + Returns: + tuple: (image, target) where target is index of the target class. + """ + img, target = self.data[index], self.labels[index] + + # doing this so that it is consistent with all other datasets + # to return a PIL Image + img = Image.fromarray(img) + + if self.transform is not None: + img = self.transform(img) + + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def __len__(self): + return len(self.data) + + +class CIFAR100(CIFAR10): + base_folder = 'cifar-100-python' + url = "http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" + filename = "cifar-100-python.tar.gz" + tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85' + train_list = [ + ['train', '16019d7e3df5f24257cddd939b257f8d'], + ] + + test_list = [ + ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'], + ] diff --git a/PyTorch/contrib/cv/others/BigGAN/evaluation.py b/PyTorch/contrib/cv/others/BigGAN/evaluation.py index f08855891eabeab6713acdc00306f2cfcc8dcd72..4c6032cc362b831f6b77f2292d5b82ec71af0d4b 100644 --- a/PyTorch/contrib/cv/others/BigGAN/evaluation.py +++ b/PyTorch/contrib/cv/others/BigGAN/evaluation.py @@ -1,86 +1,86 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import functools -import torch -import inception_utils -import utils -from train import get_device_name - - -def evaluation(config): - config['resolution'] = utils.imsize_dict[config['dataset']] - config['n_classes'] = utils.nclass_dict[config['dataset']] - config['G_activation'] = utils.activation_dict[config['G_nl']] - config['D_activation'] = utils.activation_dict[config['D_nl']] - config = utils.update_config_roots(config) - # Seed RNG - utils.seed_rng(config['seed']) - - # By default, skip init if resuming training. - if config['resume']: - print('Skipping initialization for training resumption...') - config['skip_init'] = True - - # init device - device_loc = get_device_name(config['device'], config['gpu']) - config['loc'] = device_loc - # set device - print('set_device ', device_loc) - if config['device'] == 'npu': - torch.npu.set_device(device_loc) - else: - torch.cuda.set_device(config['gpu']) - - # model - # Import the model--this line allows us to dynamically select different files. - model = __import__(config['model']) - # Next, build the model - G = model.Generator(**config).to(device_loc) - - state_dict = {'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, - 'best_IS': 0, 'best_FID': 999999, 'config': config} - - # If loading from a pre-trained model, load weights - if config['resume']: - print('Loading weights...gpu id : ', config['gpu']) - utils.load_weights(G, None, state_dict, - config['weights_root'], config['experiment_name'], - config['load_weights'] if config['load_weights'] else None, - None, root=config['weights_path'], load_optim=False) - print("load weights ok") - - # prepare input - G_batch_size = max(config['G_batch_size'], config['batch_size']) - z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device_loc) - # Prepare Sample function for use with inception metrics - sample = functools.partial(utils.sample, G=G, z_=z_, y_=y_, config=config) - # Prepare inception metrics: FID and IS - get_inception_metrics = inception_utils.prepare_inception_metrics(config['dataset'], config['parallel'], - config['no_fid'], config['loc'], - config['use_fp16'], config['opt_level']) - if config['G_eval_mode']: - G.eval() - IS_mean, IS_std, FID = get_inception_metrics(sample, config['num_inception_images'], num_splits=10) - log_string = "IS_mean: {:.5f}, IS_std: {:.5f}, FID: {:.5f}".format(IS_mean, IS_std, FID) - print(log_string) - with open("evaluation_log.log", "a+") as f: - f.write("itr: {} , {:s}\n".format(state_dict['itr'], log_string)) - - -if __name__ == "__main__": - # parse command line and run - parser = utils.prepare_parser() - config = vars(parser.parse_args()) - evaluation(config) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import functools +import torch +import inception_utils +import utils +from train import get_device_name + + +def evaluation(config): + config['resolution'] = utils.imsize_dict[config['dataset']] + config['n_classes'] = utils.nclass_dict[config['dataset']] + config['G_activation'] = utils.activation_dict[config['G_nl']] + config['D_activation'] = utils.activation_dict[config['D_nl']] + config = utils.update_config_roots(config) + # Seed RNG + utils.seed_rng(config['seed']) + + # By default, skip init if resuming training. + if config['resume']: + print('Skipping initialization for training resumption...') + config['skip_init'] = True + + # init device + device_loc = get_device_name(config['device'], config['gpu']) + config['loc'] = device_loc + # set device + print('set_device ', device_loc) + if config['device'] == 'npu': + torch.npu.set_device(device_loc) + else: + torch.cuda.set_device(config['gpu']) + + # model + # Import the model--this line allows us to dynamically select different files. + model = __import__(config['model']) + # Next, build the model + G = model.Generator(**config).to(device_loc) + + state_dict = {'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, + 'best_IS': 0, 'best_FID': 999999, 'config': config} + + # If loading from a pre-trained model, load weights + if config['resume']: + print('Loading weights...gpu id : ', config['gpu']) + utils.load_weights(G, None, state_dict, + config['weights_root'], config['experiment_name'], + config['load_weights'] if config['load_weights'] else None, + None, root=config['weights_path'], load_optim=False) + print("load weights ok") + + # prepare input + G_batch_size = max(config['G_batch_size'], config['batch_size']) + z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device_loc) + # Prepare Sample function for use with inception metrics + sample = functools.partial(utils.sample, G=G, z_=z_, y_=y_, config=config) + # Prepare inception metrics: FID and IS + get_inception_metrics = inception_utils.prepare_inception_metrics(config['dataset'], config['parallel'], + config['no_fid'], config['loc'], + config['use_fp16'], config['opt_level']) + if config['G_eval_mode']: + G.eval() + IS_mean, IS_std, FID = get_inception_metrics(sample, config['num_inception_images'], num_splits=10) + log_string = "IS_mean: {:.5f}, IS_std: {:.5f}, FID: {:.5f}".format(IS_mean, IS_std, FID) + print(log_string) + with open("evaluation_log.log", "a+") as f: + f.write("itr: {} , {:s}\n".format(state_dict['itr'], log_string)) + + +if __name__ == "__main__": + # parse command line and run + parser = utils.prepare_parser() + config = vars(parser.parse_args()) + evaluation(config) diff --git a/PyTorch/contrib/cv/others/BigGAN/modelzoo_level.txt b/PyTorch/contrib/cv/others/BigGAN/modelzoo_level.txt index 5afcef9188bf9d39f1e34b45bd91324c6093137a..3117fffc3be7f5c479f10f09ba38a25c47739a00 100644 --- a/PyTorch/contrib/cv/others/BigGAN/modelzoo_level.txt +++ b/PyTorch/contrib/cv/others/BigGAN/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/BigGAN/requirements.txt b/PyTorch/contrib/cv/others/BigGAN/requirements.txt index 5648e61754231c103062dc569331e32788ecad6e..3b0b6a7bb2cb26830dd313a999fed8b0eb02ec71 100644 --- a/PyTorch/contrib/cv/others/BigGAN/requirements.txt +++ b/PyTorch/contrib/cv/others/BigGAN/requirements.txt @@ -1,8 +1,8 @@ -numpy -torchvision -tqdm -h5py -pillow -six -scipy +numpy +torchvision +tqdm +h5py +pillow +six +scipy torch \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CGAN/LICENSE b/PyTorch/contrib/cv/others/CGAN/LICENSE index 29f81d812f3e768fa89638d1f72920dbfd1413a8..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 100644 --- a/PyTorch/contrib/cv/others/CGAN/LICENSE +++ b/PyTorch/contrib/cv/others/CGAN/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/PyTorch/contrib/cv/others/CGAN/README.md b/PyTorch/contrib/cv/others/CGAN/README.md index aa146c4a2cb09c088c2174855be3211995c7a8e1..2a14c1b90f5be0fcf9c212bf536faf867d7cfd30 100644 --- a/PyTorch/contrib/cv/others/CGAN/README.md +++ b/PyTorch/contrib/cv/others/CGAN/README.md @@ -1,130 +1,130 @@ -CGAN训练 - -``` -Generative adversarial nets can be extended to a conditional model if both the generator and discriminator are conditioned on some extra information y. y could be any kind of auxiliary information,such as class labels or data from other modalities. The author perform the conditioning by feeding y into the both the discriminator and generator as additional input layer.In the generator the prior input noise pz(z), and y are combined in joint hidden representation, and the adversarial training framework allows for considerable flexibility in how this hidden representation is composed. In the discriminator x and y are presented as inputs and to a discriminative function. -``` - -For more detail:https://arxiv.org/abs/1411.1784 - -The original gpu code:https://github.com/znxlwm/pytorch-generative-model-collections/ - -## Requirements - -use pytorch, you can use pip or conda to install the requirements - -``` -# for pip -cd $project -pip3.7 install -r requirements.txt -CANN 5.0.3 -torchvision==0.6.0 -``` - - - -## 数据集准备 - -1.下载mnist数据集作为训练集,dataloader.py中有自动下载mnist数据集的代码,执行训练命令会自动调用dataloader.py下载数据集,并保存在“./data/mnist“目录下。(请保持网络通畅) - -文件结构如下: - - -``` -CGAN -|-- data /数据集文件夹 -| |-- mnist /验证集,测试集 -|-- demo /demo.py的输出 -|--models /生成器和判别器模型保存目录 -|-- test /脚本文件夹 -| |--env.sh /环境配置文件 -| |--eval_1p.sh /单卡测试脚本 -| |--train_full_1p.sh /单卡精度测试脚本 -| |--train_full_8p.sh /8卡精度测试脚本 -| |--train_performance_1p.sh /单卡性能测试脚本 -| |--train_performance_8p.sh /8卡性能测试脚本 -|--results /生成器生成图片保存路径 -|-- CGAN.py /模型定义脚本 -|-- demo.py /例子脚本 -|-- dataloaderpy /数据预处理文件 -|-- main.py /主函数,训练启动脚本 -|-- utils.py /其它需要调用的函数脚本 -``` - - - -## TRAIN - -### 单p训练 - -source 环境变量 - -``` -source ./test/env_npu.sh -``` - -性能脚本: - -``` -bash ./test/train_performance_1p.sh -``` - -精度脚本: - -``` -bash ./test/train_full_1p.sh -``` - - - -### 多p训练 - -source 环境变量 - -``` -source ./test/env_npu.sh -``` - -性能脚本: - -``` -bash ./test/train_performance_8p.sh -``` - -精度脚本: - -``` -bash ./test/train_full_8p.sh -``` - -模型保存在”./models“目录下,模型生成的图片保存在”./result“目录下 - -模型训练的loss曲线保存在”./models"目录下。 - -## TEST - -对比GPU和NPU模型生成的图片和训练loss曲线,两者大致一致。 - -| name | Epoch 50 | GIF | Loss | -| :---------- | --------------------------------------------------- | ------------------------------------------------------------ | ----------------------------------------------- | -| CGAN on GPU | ![](README.assets/CGAN_epoch050-16371345386081.png) | ![](README.assets/CGAN_generate_animation-16371345738152.gif) | ![](README.assets/CGAN_loss-16371346002224.png) | -| CGAN on NPU | ![](README.assets/CGAN_epoch050-16371346136555.png) | ![](README.assets/CGAN_generate_animation-16371346226546.gif) | ![](README.assets/CGAN_loss-16371346305157.png) | - -## Pth2onnx - -执行以下命令,完成pth到onnx模型的转换 - -``` -python3.7 pth2onnx.py --pth_path ./models/mnist/CGAN/CGAN_G.pth --onnx_path ./CGAN.onnx -``` - -## Demo - -执行以下命令,程序会自动生成输入并经过网络产生输出,将输出保存在"demo/demo_result.png"中 -``` -python3.7 demo.py --pth_path ./models/mnist/CGAN/CGAN_G.pth --save_path ./demo -``` - -### 精度对比 - -对比GPU和NPU生成的图片和loss曲线,两者差异不大,精度达标。 - +CGAN训练 + +``` +Generative adversarial nets can be extended to a conditional model if both the generator and discriminator are conditioned on some extra information y. y could be any kind of auxiliary information,such as class labels or data from other modalities. The author perform the conditioning by feeding y into the both the discriminator and generator as additional input layer.In the generator the prior input noise pz(z), and y are combined in joint hidden representation, and the adversarial training framework allows for considerable flexibility in how this hidden representation is composed. In the discriminator x and y are presented as inputs and to a discriminative function. +``` + +For more detail:https://arxiv.org/abs/1411.1784 + +The original gpu code:https://github.com/znxlwm/pytorch-generative-model-collections/ + +## Requirements + +use pytorch, you can use pip or conda to install the requirements + +``` +# for pip +cd $project +pip3.7 install -r requirements.txt +CANN 5.0.3 +torchvision==0.6.0 +``` + + + +## 数据集准备 + +1.下载mnist数据集作为训练集,dataloader.py中有自动下载mnist数据集的代码,执行训练命令会自动调用dataloader.py下载数据集,并保存在“./data/mnist“目录下。(请保持网络通畅) + +文件结构如下: + + +``` +CGAN +|-- data /数据集文件夹 +| |-- mnist /验证集,测试集 +|-- demo /demo.py的输出 +|--models /生成器和判别器模型保存目录 +|-- test /脚本文件夹 +| |--env.sh /环境配置文件 +| |--eval_1p.sh /单卡测试脚本 +| |--train_full_1p.sh /单卡精度测试脚本 +| |--train_full_8p.sh /8卡精度测试脚本 +| |--train_performance_1p.sh /单卡性能测试脚本 +| |--train_performance_8p.sh /8卡性能测试脚本 +|--results /生成器生成图片保存路径 +|-- CGAN.py /模型定义脚本 +|-- demo.py /例子脚本 +|-- dataloaderpy /数据预处理文件 +|-- main.py /主函数,训练启动脚本 +|-- utils.py /其它需要调用的函数脚本 +``` + + + +## TRAIN + +### 单p训练 + +source 环境变量 + +``` +source ./test/env_npu.sh +``` + +性能脚本: + +``` +bash ./test/train_performance_1p.sh +``` + +精度脚本: + +``` +bash ./test/train_full_1p.sh +``` + + + +### 多p训练 + +source 环境变量 + +``` +source ./test/env_npu.sh +``` + +性能脚本: + +``` +bash ./test/train_performance_8p.sh +``` + +精度脚本: + +``` +bash ./test/train_full_8p.sh +``` + +模型保存在”./models“目录下,模型生成的图片保存在”./result“目录下 + +模型训练的loss曲线保存在”./models"目录下。 + +## TEST + +对比GPU和NPU模型生成的图片和训练loss曲线,两者大致一致。 + +| name | Epoch 50 | GIF | Loss | +| :---------- | --------------------------------------------------- | ------------------------------------------------------------ | ----------------------------------------------- | +| CGAN on GPU | ![](README.assets/CGAN_epoch050-16371345386081.png) | ![](README.assets/CGAN_generate_animation-16371345738152.gif) | ![](README.assets/CGAN_loss-16371346002224.png) | +| CGAN on NPU | ![](README.assets/CGAN_epoch050-16371346136555.png) | ![](README.assets/CGAN_generate_animation-16371346226546.gif) | ![](README.assets/CGAN_loss-16371346305157.png) | + +## Pth2onnx + +执行以下命令,完成pth到onnx模型的转换 + +``` +python3.7 pth2onnx.py --pth_path ./models/mnist/CGAN/CGAN_G.pth --onnx_path ./CGAN.onnx +``` + +## Demo + +执行以下命令,程序会自动生成输入并经过网络产生输出,将输出保存在"demo/demo_result.png"中 +``` +python3.7 demo.py --pth_path ./models/mnist/CGAN/CGAN_G.pth --save_path ./demo +``` + +### 精度对比 + +对比GPU和NPU生成的图片和loss曲线,两者差异不大,精度达标。 + diff --git a/PyTorch/contrib/cv/others/CGAN/demo.py b/PyTorch/contrib/cv/others/CGAN/demo.py index ee8acb635e61a7096aa5aefecd67cb479c80680d..e2f24d669021e1bc5d6cc4ccbec41651a6bf221e 100644 --- a/PyTorch/contrib/cv/others/CGAN/demo.py +++ b/PyTorch/contrib/cv/others/CGAN/demo.py @@ -1,103 +1,103 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -from collections import OrderedDict -import torch -import torch.nn as nn -from CGAN import generator -import argparse -import os -import numpy as np -import utils - - -def proc_nodes_module(checkpoint): - new_state_dict = OrderedDict() - for k, v in checkpoint.items(): - if "module." in k: - name = k.replace("module.", "") - else: - name = k - new_state_dict[name] = v - return new_state_dict - - -def parse_args(): - desc = "Pytorch implementation of CGAN collections" - parser = argparse.ArgumentParser(description=desc) - parser.add_argument('--input_dim', type=int, default=62, help="The input_dim") - parser.add_argument('--output_dim', type=int, default=3, help="The output_dim") - parser.add_argument('--input_size', type=int, default=28, help="The image size of MNIST") - parser.add_argument('--class_num', type=int, default=10, help="The num of classes of MNIST") - parser.add_argument('--pth_path', type=str, default='CGAN_G.pth', help='pth model path') - parser.add_argument('--onnx_path', type=str, default="CGAN.onnx", help='onnx model path') - parser.add_argument('--save_path', type=str, default='demo', help="the generated image path") - return parser.parse_args() - - -def prep_preocess(args): - sample_num = args.class_num**2 - z_dim = args.input_dim - sample_z_ = torch.zeros((sample_num, z_dim)) - for i in range(args.class_num): - sample_z_[i * args.class_num] = torch.rand(1,z_dim) - for j in range(1, args.class_num): - sample_z_[i * args.class_num + j] = sample_z_[i * args.class_num] - - if not os.path.exists(os.path.join(args.save_path)): - os.makedirs(os.path.join(args.save_path)) - - temp = torch.zeros((args.class_num, 1)) - for i in range(args.class_num): - temp[i, 0] = i - - temp_y = torch.zeros((sample_num, 1)) - for i in range(args.class_num): - temp_y[i * args.class_num: (i + 1) * args.class_num] = temp - - sample_y_ = torch.zeros((sample_num, args.class_num)).scatter_(1, temp_y.type(torch.LongTensor), 1) - - return sample_z_, sample_y_ - - -def main(): - args = parse_args() - # Build model - local_device = torch.device("npu:0") - torch.npu.set_device(local_device) - print("using npu :{}".format(local_device)) - print('Loading model ...\n') - net = generator(input_dim=args.input_dim, output_dim=args.output_dim, - input_size=args.input_size, class_num=args.class_num) - model = net - checkpoint = torch.load(args.pth_path, map_location='cpu') - checkpoint = proc_nodes_module(checkpoint) - model.load_state_dict(checkpoint) - model.eval() - z,y=prep_preocess(args) - result = model(z,y) - result = result.cpu().data.numpy().transpose(0, 2, 3, 1) - result = (result + 1)/2 - sample_num = args.class_num**2 - image_frame_dim = int(np.floor(np.sqrt(sample_num))) - if not os.path.exists(os.path.join(args.save_path)): - os.makedirs(os.path.join(args.save_path)) - utils.save_images(result[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], - os.path.join(args.save_path,'demo_result.png')) - print("demo image stored in:", os.path.join(args.save_path,'demo_result.png')) - - -if __name__ == "__main__": +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from collections import OrderedDict +import torch +import torch.nn as nn +from CGAN import generator +import argparse +import os +import numpy as np +import utils + + +def proc_nodes_module(checkpoint): + new_state_dict = OrderedDict() + for k, v in checkpoint.items(): + if "module." in k: + name = k.replace("module.", "") + else: + name = k + new_state_dict[name] = v + return new_state_dict + + +def parse_args(): + desc = "Pytorch implementation of CGAN collections" + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('--input_dim', type=int, default=62, help="The input_dim") + parser.add_argument('--output_dim', type=int, default=3, help="The output_dim") + parser.add_argument('--input_size', type=int, default=28, help="The image size of MNIST") + parser.add_argument('--class_num', type=int, default=10, help="The num of classes of MNIST") + parser.add_argument('--pth_path', type=str, default='CGAN_G.pth', help='pth model path') + parser.add_argument('--onnx_path', type=str, default="CGAN.onnx", help='onnx model path') + parser.add_argument('--save_path', type=str, default='demo', help="the generated image path") + return parser.parse_args() + + +def prep_preocess(args): + sample_num = args.class_num**2 + z_dim = args.input_dim + sample_z_ = torch.zeros((sample_num, z_dim)) + for i in range(args.class_num): + sample_z_[i * args.class_num] = torch.rand(1,z_dim) + for j in range(1, args.class_num): + sample_z_[i * args.class_num + j] = sample_z_[i * args.class_num] + + if not os.path.exists(os.path.join(args.save_path)): + os.makedirs(os.path.join(args.save_path)) + + temp = torch.zeros((args.class_num, 1)) + for i in range(args.class_num): + temp[i, 0] = i + + temp_y = torch.zeros((sample_num, 1)) + for i in range(args.class_num): + temp_y[i * args.class_num: (i + 1) * args.class_num] = temp + + sample_y_ = torch.zeros((sample_num, args.class_num)).scatter_(1, temp_y.type(torch.LongTensor), 1) + + return sample_z_, sample_y_ + + +def main(): + args = parse_args() + # Build model + local_device = torch.device("npu:0") + torch.npu.set_device(local_device) + print("using npu :{}".format(local_device)) + print('Loading model ...\n') + net = generator(input_dim=args.input_dim, output_dim=args.output_dim, + input_size=args.input_size, class_num=args.class_num) + model = net + checkpoint = torch.load(args.pth_path, map_location='cpu') + checkpoint = proc_nodes_module(checkpoint) + model.load_state_dict(checkpoint) + model.eval() + z,y=prep_preocess(args) + result = model(z,y) + result = result.cpu().data.numpy().transpose(0, 2, 3, 1) + result = (result + 1)/2 + sample_num = args.class_num**2 + image_frame_dim = int(np.floor(np.sqrt(sample_num))) + if not os.path.exists(os.path.join(args.save_path)): + os.makedirs(os.path.join(args.save_path)) + utils.save_images(result[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], + os.path.join(args.save_path,'demo_result.png')) + print("demo image stored in:", os.path.join(args.save_path,'demo_result.png')) + + +if __name__ == "__main__": main() \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CGAN/modelzoo_level.txt b/PyTorch/contrib/cv/others/CGAN/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/others/CGAN/modelzoo_level.txt +++ b/PyTorch/contrib/cv/others/CGAN/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CGAN/pth2onnx.py b/PyTorch/contrib/cv/others/CGAN/pth2onnx.py index 43cad179c3a2ce08168afdb97b885d1f4384788a..cf793675995efd5667fa458cde704fd6e4a81bc5 100644 --- a/PyTorch/contrib/cv/others/CGAN/pth2onnx.py +++ b/PyTorch/contrib/cv/others/CGAN/pth2onnx.py @@ -1,67 +1,67 @@ - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -from CGAN import generator -import torch -import torch.onnx -import sys -from collections import OrderedDict -import argparse - - -def parse_args(): - desc = "Pytorch implementation of CGAN collections" - parser = argparse.ArgumentParser(description=desc) - parser.add_argument('--input_dim', type=int, default=62, help="The input_dim") - parser.add_argument('--output_dim', type=int, default=3, help="The output_dim") - parser.add_argument('--input_size', type=int, default=28, help="The image size of MNIST") - parser.add_argument('--class_num', type=int, default=10, help="The num of classes of MNIST") - parser.add_argument('--pth_path', type=str, default='CGAN_G.pth', help='pth model path') - parser.add_argument('--onnx_path', type=str, default="CGAN.onnx", help='onnx model path') - return parser.parse_args() - - -def proc_nodes_module(checkpoint): - new_state_dict = OrderedDict() - for k, v in checkpoint.items(): - if "module." in k: - name = k.replace("module.", "") - else: - name = k - new_state_dict[name] = v - return new_state_dict - -def pth2onnx(): - args = parse_args() - net = generator(input_dim=args.input_dim, output_dim=args.output_dim, - input_size=args.input_size, class_num=args.class_num) - model = net - checkpoint = torch.load(args.pth_path, map_location='cpu') - checkpoint = proc_nodes_module(checkpoint) - model.load_state_dict(checkpoint) - model.eval() - input_names = ["image"] - output_names = ["output1"] - #dynamic_axes = {'image': {0: '-1'}, 'output1': {0: '-1'}} - dummy_input1 = torch.randn(100, 62) - dummy_input2 = torch.randn(100, 10) - torch.onnx.export(model, (dummy_input1,dummy_input2), args.onnx_path, input_names=input_names, - output_names=output_names, opset_version=11, verbose=True) - print("this model could generete pictures, specifically digits") - print('onnx export done.') - - -if __name__ == "__main__": + +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from CGAN import generator +import torch +import torch.onnx +import sys +from collections import OrderedDict +import argparse + + +def parse_args(): + desc = "Pytorch implementation of CGAN collections" + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('--input_dim', type=int, default=62, help="The input_dim") + parser.add_argument('--output_dim', type=int, default=3, help="The output_dim") + parser.add_argument('--input_size', type=int, default=28, help="The image size of MNIST") + parser.add_argument('--class_num', type=int, default=10, help="The num of classes of MNIST") + parser.add_argument('--pth_path', type=str, default='CGAN_G.pth', help='pth model path') + parser.add_argument('--onnx_path', type=str, default="CGAN.onnx", help='onnx model path') + return parser.parse_args() + + +def proc_nodes_module(checkpoint): + new_state_dict = OrderedDict() + for k, v in checkpoint.items(): + if "module." in k: + name = k.replace("module.", "") + else: + name = k + new_state_dict[name] = v + return new_state_dict + +def pth2onnx(): + args = parse_args() + net = generator(input_dim=args.input_dim, output_dim=args.output_dim, + input_size=args.input_size, class_num=args.class_num) + model = net + checkpoint = torch.load(args.pth_path, map_location='cpu') + checkpoint = proc_nodes_module(checkpoint) + model.load_state_dict(checkpoint) + model.eval() + input_names = ["image"] + output_names = ["output1"] + #dynamic_axes = {'image': {0: '-1'}, 'output1': {0: '-1'}} + dummy_input1 = torch.randn(100, 62) + dummy_input2 = torch.randn(100, 10) + torch.onnx.export(model, (dummy_input1,dummy_input2), args.onnx_path, input_names=input_names, + output_names=output_names, opset_version=11, verbose=True) + print("this model could generete pictures, specifically digits") + print('onnx export done.') + + +if __name__ == "__main__": pth2onnx() \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/LICENSE.txt b/PyTorch/contrib/cv/others/CenterMask2/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/README.md b/PyTorch/contrib/cv/others/CenterMask2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..39bd0e215c7434861e9e8352501b4b5e7d16636d --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/README.md @@ -0,0 +1,61 @@ +# Centermask2 + +This implements training of CenterMask2 on the COCO2017 dataset, mainly modified from [Centermask2](https://link.zhihu.com/?target=https%3A//github.com/youngwanLEE/CenterMask) + + +## Requirements + +```shell +// 环境配置 +//1. 使用pip安装必须依赖的包 +pip install -r requirements.txt +//2. 安装axcend适配的的torch和apex +pip install torch-1.5.0+ascend.post3.20210930 +pip install apex-0.1+ascend.20210930 +//3. 安装torchvision==v0.5.0 +git clone --branch v0.5.0 https://github.com/pytorch/vision.git +cd vision +python setup.py build develop +pip install -e . +cd .. +//4. 安装修改过后的detectron2==v0.3 +cd detectron2 +python setup.py build develop +pip install -e . +cd .. +``` + +## Training + +To train a model, run `train_net.py` with the desired model architecture and the path to the COCO2017 dataset: + +```bash +#training 1p performance +bash test/train_performance_1p.sh --data_path=real_data_path + +# training 8p accuracy +bash test/train_full_8p.sh --data_path=real_data_path + +# training 8p performance +bash test/train_performance_8p.sh --data_path=real_data_path + +#test 8p accuracy +bash test/train_eval_8p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path + +# finetuning 1p +bash test/train_finetune_1p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path +``` + +Log path: + test/output/devie_id/train_${device_id}.log # training detail log + test/output/devie_id/centermask2.log # 8p training performance result log + test/output/devie_id/centermask2.log # 8p training accuracy result log + + + +## Centermask2 training result + +| Acc | FPS | Npu_nums | Iters | AMP_Type | +| :-----: | :----: | :------: | :---: | :------: | +| - | 0.4743 | 1 | 1 | O2 | +| 13.6968 | 2.2928 | 8 | 3699 | O1 | \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e44b35449969359cf80170076a1aa7d0b487992e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/__init__.py @@ -0,0 +1,3 @@ +from centermask import modeling + +__version__ = "0.1" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/config/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..be9446346b7e569cc4630108009564a0ed21b3d6 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/config/__init__.py @@ -0,0 +1,5 @@ +from .config import get_cfg + +__all__ = [ + "get_cfg", +] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/config/config.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/config/config.py new file mode 100644 index 0000000000000000000000000000000000000000..3501d32d97baec344c58e0d699d6f8684aba24b3 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/config/config.py @@ -0,0 +1,13 @@ +from detectron2.config import CfgNode + + +def get_cfg() -> CfgNode: + """ + Get a copy of the default config. + + Returns: + a detectron2 CfgNode instance. + """ + from .defaults import _C + + return _C.clone() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/config/defaults.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/config/defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..f9a053191ee9ede8ee1a7223e23867b69f91cc03 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/config/defaults.py @@ -0,0 +1,86 @@ +# Copyright (c) Youngwan Lee (ETRI) All Rights Reserved. +from detectron2.config.defaults import _C +from detectron2.config import CfgNode as CN + + +# ---------------------------------------------------------------------------- # +# Additional Configs +# ---------------------------------------------------------------------------- # +_C.MODEL.MOBILENET = False + +# ---------------------------------------------------------------------------- # +# FCOS Head +# ---------------------------------------------------------------------------- # +_C.MODEL.FCOS = CN() + +# This is the number of foreground classes. +_C.MODEL.FCOS.NUM_CLASSES = 80 +_C.MODEL.FCOS.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"] +_C.MODEL.FCOS.FPN_STRIDES = [8, 16, 32, 64, 128] +_C.MODEL.FCOS.PRIOR_PROB = 0.01 +_C.MODEL.FCOS.INFERENCE_TH_TRAIN = 0.05 +_C.MODEL.FCOS.INFERENCE_TH_TEST = 0.05 +_C.MODEL.FCOS.NMS_TH = 0.6 +_C.MODEL.FCOS.PRE_NMS_TOPK_TRAIN = 1000 +_C.MODEL.FCOS.PRE_NMS_TOPK_TEST = 1000 +_C.MODEL.FCOS.POST_NMS_TOPK_TRAIN = 100 +_C.MODEL.FCOS.POST_NMS_TOPK_TEST = 100 +_C.MODEL.FCOS.TOP_LEVELS = 2 +_C.MODEL.FCOS.NORM = "GN" # Support GN or none +_C.MODEL.FCOS.USE_SCALE = True + +# Multiply centerness before threshold +# This will affect the final performance by about 0.05 AP but save some time +_C.MODEL.FCOS.THRESH_WITH_CTR = False + +# Focal loss parameters +_C.MODEL.FCOS.LOSS_ALPHA = 0.25 +_C.MODEL.FCOS.LOSS_GAMMA = 2.0 +_C.MODEL.FCOS.SIZES_OF_INTEREST = [64, 128, 256, 512] +_C.MODEL.FCOS.USE_RELU = True +_C.MODEL.FCOS.USE_DEFORMABLE = False + +# the number of convolutions used in the cls and bbox tower +_C.MODEL.FCOS.NUM_CLS_CONVS = 4 +_C.MODEL.FCOS.NUM_BOX_CONVS = 4 +_C.MODEL.FCOS.NUM_SHARE_CONVS = 0 +_C.MODEL.FCOS.CENTER_SAMPLE = True +_C.MODEL.FCOS.POS_RADIUS = 1.5 +_C.MODEL.FCOS.LOC_LOSS_TYPE = 'giou' + + +# ---------------------------------------------------------------------------- # +# VoVNet backbone +# ---------------------------------------------------------------------------- # + +_C.MODEL.VOVNET = CN() + +_C.MODEL.VOVNET.CONV_BODY = "V-39-eSE" +_C.MODEL.VOVNET.OUT_FEATURES = ["stage2", "stage3", "stage4", "stage5"] +# Options: FrozenBN, GN, "SyncBN", "BN" +_C.MODEL.VOVNET.NORM = "FrozenBN" +_C.MODEL.VOVNET.OUT_CHANNELS = 256 +_C.MODEL.VOVNET.BACKBONE_OUT_CHANNELS = 256 +_C.MODEL.VOVNET.STAGE_WITH_DCN = (False, False, False, False) +_C.MODEL.VOVNET.WITH_MODULATED_DCN = False +_C.MODEL.VOVNET.DEFORMABLE_GROUPS = 1 + + +# ---------------------------------------------------------------------------- # +# CenterMask +# ---------------------------------------------------------------------------- # +_C.MODEL.ROI_MASK_HEAD.ASSIGN_CRITERION = "area" +_C.MODEL.MASKIOU_ON = False +_C.MODEL.MASKIOU_LOSS_WEIGHT = 1.0 + +_C.MODEL.ROI_MASKIOU_HEAD = CN() +_C.MODEL.ROI_MASKIOU_HEAD.NAME = "MaskIoUHead" +_C.MODEL.ROI_MASKIOU_HEAD.CONV_DIM = 256 +_C.MODEL.ROI_MASKIOU_HEAD.NUM_CONV = 4 + + +# ---------------------------------------------------------------------------- # +# Keypoint Head +# ---------------------------------------------------------------------------- # +_C.MODEL.ROI_KEYPOINT_HEAD.IN_FEATURES = ["p2", "p3", "p4", "p5"] +_C.MODEL.ROI_KEYPOINT_HEAD.ASSIGN_CRITERION = "ratio" \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/evaluation/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..002814f8d479fcf71db2815d4e5ac8d579ef1176 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/evaluation/__init__.py @@ -0,0 +1,2 @@ +from .coco_evaluation import COCOEvaluator +from .cityscapes_evaluation import CityscapesInstanceEvaluator,CityscapesSemSegEvaluator diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/evaluation/cityscapes_evaluation.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/evaluation/cityscapes_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..bcf821837f5012402d5c528d89cb19dadfa4fef9 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/evaluation/cityscapes_evaluation.py @@ -0,0 +1,198 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import glob +import logging +import numpy as np +import os +import tempfile +from collections import OrderedDict +import torch +from PIL import Image + +from detectron2.data import MetadataCatalog +from detectron2.utils import comm +from detectron2.utils.file_io import PathManager + +from detectron2.evaluation.evaluator import DatasetEvaluator + + +class CityscapesEvaluator(DatasetEvaluator): + """ + Base class for evaluation using cityscapes API. + """ + + def __init__(self, dataset_name): + """ + Args: + dataset_name (str): the name of the dataset. + It must have the following metadata associated with it: + "thing_classes", "gt_dir". + """ + self._metadata = MetadataCatalog.get(dataset_name) + self._cpu_device = torch.device("cpu") + self._logger = logging.getLogger(__name__) + + def reset(self): + self._working_dir = tempfile.TemporaryDirectory(prefix="cityscapes_eval_") + self._temp_dir = self._working_dir.name + # All workers will write to the same results directory + # TODO this does not work in distributed training + self._temp_dir = comm.all_gather(self._temp_dir)[0] + if self._temp_dir != self._working_dir.name: + self._working_dir.cleanup() + self._logger.info( + "Writing cityscapes results to temporary directory {} ...".format(self._temp_dir) + ) + + +class CityscapesInstanceEvaluator(CityscapesEvaluator): + """ + Evaluate instance segmentation results on cityscapes dataset using cityscapes API. + + Note: + * It does not work in multi-machine distributed training. + * It contains a synchronization, therefore has to be used on all ranks. + * Only the main process runs evaluation. + """ + + def process(self, inputs, outputs): + from cityscapesscripts.helpers.labels import name2label + + for input, output in zip(inputs, outputs): + file_name = input["file_name"] + basename = os.path.splitext(os.path.basename(file_name))[0] + pred_txt = os.path.join(self._temp_dir, basename + "_pred.txt") + + if "instances" in output: + output = output["instances"].to(self._cpu_device) + has_mask_scores = output.has("mask_scores") + if has_mask_scores: + self._logger.info("having mask scores...") + num_instances = len(output) + with open(pred_txt, "w") as fout: + for i in range(num_instances): + pred_class = output.pred_classes[i] + classes = self._metadata.thing_classes[pred_class] + class_id = name2label[classes].id + # score = output.scores[i] + score = output.mask_scores[i] if has_mask_scores else output.scores[i] + mask = output.pred_masks[i].numpy().astype("uint8") + png_filename = os.path.join( + self._temp_dir, basename + "_{}_{}.png".format(i, classes) + ) + + Image.fromarray(mask * 255).save(png_filename) + fout.write( + "{} {} {}\n".format(os.path.basename(png_filename), class_id, score) + ) + else: + # Cityscapes requires a prediction file for every ground truth image. + with open(pred_txt, "w") as fout: + pass + + def evaluate(self): + """ + Returns: + dict: has a key "segm", whose value is a dict of "AP" and "AP50". + """ + comm.synchronize() + if comm.get_rank() > 0: + return + import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval + + self._logger.info("Evaluating results under {} ...".format(self._temp_dir)) + + # set some global states in cityscapes evaluation API, before evaluating + cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir) + cityscapes_eval.args.predictionWalk = None + cityscapes_eval.args.JSONOutput = False + cityscapes_eval.args.colorized = False + cityscapes_eval.args.gtInstancesFile = os.path.join(self._temp_dir, "gtInstances.json") + + # These lines are adopted from + # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py # noqa + gt_dir = PathManager.get_local_path(self._metadata.gt_dir) + groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_instanceIds.png")) + assert len( + groundTruthImgList + ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format( + cityscapes_eval.args.groundTruthSearch + ) + predictionImgList = [] + for gt in groundTruthImgList: + predictionImgList.append(cityscapes_eval.getPrediction(gt, cityscapes_eval.args)) + results = cityscapes_eval.evaluateImgLists( + predictionImgList, groundTruthImgList, cityscapes_eval.args + )["averages"] + + ret = OrderedDict() + ret["segm"] = {"AP": results["allAp"] * 100, "AP50": results["allAp50%"] * 100} + self._working_dir.cleanup() + return ret + + +class CityscapesSemSegEvaluator(CityscapesEvaluator): + """ + Evaluate semantic segmentation results on cityscapes dataset using cityscapes API. + + Note: + * It does not work in multi-machine distributed training. + * It contains a synchronization, therefore has to be used on all ranks. + * Only the main process runs evaluation. + """ + + def process(self, inputs, outputs): + from cityscapesscripts.helpers.labels import trainId2label + + for input, output in zip(inputs, outputs): + file_name = input["file_name"] + basename = os.path.splitext(os.path.basename(file_name))[0] + pred_filename = os.path.join(self._temp_dir, basename + "_pred.png") + + output = output["sem_seg"].argmax(dim=0).to(self._cpu_device).numpy() + pred = 255 * np.ones(output.shape, dtype=np.uint8) + for train_id, label in trainId2label.items(): + if label.ignoreInEval: + continue + pred[output == train_id] = label.id + Image.fromarray(pred).save(pred_filename) + + def evaluate(self): + comm.synchronize() + if comm.get_rank() > 0: + return + # Load the Cityscapes eval script *after* setting the required env var, + # since the script reads CITYSCAPES_DATASET into global variables at load time. + import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as cityscapes_eval + + self._logger.info("Evaluating results under {} ...".format(self._temp_dir)) + + # set some global states in cityscapes evaluation API, before evaluating + cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir) + cityscapes_eval.args.predictionWalk = None + cityscapes_eval.args.JSONOutput = False + cityscapes_eval.args.colorized = False + + # These lines are adopted from + # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalPixelLevelSemanticLabeling.py # noqa + gt_dir = PathManager.get_local_path(self._metadata.gt_dir) + groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_labelIds.png")) + assert len( + groundTruthImgList + ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format( + cityscapes_eval.args.groundTruthSearch + ) + predictionImgList = [] + for gt in groundTruthImgList: + predictionImgList.append(cityscapes_eval.getPrediction(cityscapes_eval.args, gt)) + results = cityscapes_eval.evaluateImgLists( + predictionImgList, groundTruthImgList, cityscapes_eval.args + ) + ret = OrderedDict() + ret["sem_seg"] = { + "IoU": 100.0 * results["averageScoreClasses"], + "iIoU": 100.0 * results["averageScoreInstClasses"], + "IoU_sup": 100.0 * results["averageScoreCategories"], + "iIoU_sup": 100.0 * results["averageScoreInstCategories"], + } + self._working_dir.cleanup() + return ret \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/evaluation/coco_evaluation.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/evaluation/coco_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..d6f40c050996f319d651a1098d1a1deeb58de6b4 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/evaluation/coco_evaluation.py @@ -0,0 +1,591 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# Modified by Sangrok Lee and Youngwan Lee (ETRI), 2020. +# We modify COCOEvaluator for adopting mask_score in mask evalaution. + +import contextlib +import copy +import io +import itertools +import json +import logging +import numpy as np +import os +import pickle +from collections import OrderedDict +import pycocotools.mask as mask_util +import torch +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tabulate import tabulate + +import detectron2.utils.comm as comm +from detectron2.config import CfgNode +from detectron2.data import MetadataCatalog +from detectron2.data.datasets.coco import convert_to_coco_json +from detectron2.evaluation.fast_eval_api import COCOeval_opt +from detectron2.structures import Boxes, BoxMode, pairwise_iou +from detectron2.utils.file_io import PathManager +from detectron2.utils.logger import create_small_table + +from detectron2.evaluation.evaluator import DatasetEvaluator + + +class COCOEvaluator(DatasetEvaluator): + """ + Evaluate AR for object proposals, AP for instance detection/segmentation, AP + for keypoint detection outputs using COCO's metrics. + See http://cocodataset.org/#detection-eval and + http://cocodataset.org/#keypoints-eval to understand its metrics. + + In addition to COCO, this evaluator is able to support any bounding box detection, + instance segmentation, or keypoint detection dataset. + """ + + def __init__( + self, + dataset_name, + tasks=None, + distributed=True, + output_dir=None, + *, + use_fast_impl=True, + kpt_oks_sigmas=(), + ): + """ + Args: + dataset_name (str): name of the dataset to be evaluated. + It must have either the following corresponding metadata: + + "json_file": the path to the COCO format annotation + + Or it must be in detectron2's standard dataset format + so it can be converted to COCO format automatically. + tasks (tuple[str]): tasks that can be evaluated under the given + configuration. A task is one of "bbox", "segm", "keypoints". + By default, will infer this automatically from predictions. + distributed (True): if True, will collect results from all ranks and run evaluation + in the main process. + Otherwise, will only evaluate the results in the current process. + output_dir (str): optional, an output directory to dump all + results predicted on the dataset. The dump contains two files: + + 1. "instances_predictions.pth" a file in torch serialization + format that contains all the raw original predictions. + 2. "coco_instances_results.json" a json file in COCO's result + format. + use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP. + Although the results should be very close to the official implementation in COCO + API, it is still recommended to compute results with the official API for use in + papers. The faster implementation also uses more RAM. + kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS. + See http://cocodataset.org/#keypoints-eval + When empty, it will use the defaults in COCO. + Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. + """ + self._logger = logging.getLogger(__name__) + self._distributed = distributed + self._output_dir = output_dir + self._use_fast_impl = use_fast_impl + + if tasks is not None and isinstance(tasks, CfgNode): + kpt_oks_sigmas = ( + tasks.TEST.KEYPOINT_OKS_SIGMAS if not kpt_oks_sigmas else kpt_oks_sigmas + ) + self._logger.warn( + "COCO Evaluator instantiated using config, this is deprecated behavior." + " Please pass in explicit arguments instead." + ) + self._tasks = None # Infering it from predictions should be better + else: + self._tasks = tasks + + self._cpu_device = torch.device("cpu") + + self._metadata = MetadataCatalog.get(dataset_name) + if not hasattr(self._metadata, "json_file"): + self._logger.info( + f"'{dataset_name}' is not registered by `register_coco_instances`." + " Therefore trying to convert it to COCO format ..." + ) + + cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json") + self._metadata.json_file = cache_path + convert_to_coco_json(dataset_name, cache_path) + + json_file = PathManager.get_local_path(self._metadata.json_file) + with contextlib.redirect_stdout(io.StringIO()): + self._coco_api = COCO(json_file) + + # Test set json files do not contain annotations (evaluation must be + # performed using the COCO evaluation server). + self._do_evaluation = "annotations" in self._coco_api.dataset + if self._do_evaluation: + self._kpt_oks_sigmas = kpt_oks_sigmas + + def reset(self): + self._predictions = [] + + def process(self, inputs, outputs): + """ + Args: + inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). + It is a list of dict. Each dict corresponds to an image and + contains keys like "height", "width", "file_name", "image_id". + outputs: the outputs of a COCO model. It is a list of dicts with key + "instances" that contains :class:`Instances`. + """ + for input, output in zip(inputs, outputs): + prediction = {"image_id": input["image_id"]} + + if "instances" in output: + instances = output["instances"].to(self._cpu_device) + prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) + if "proposals" in output: + prediction["proposals"] = output["proposals"].to(self._cpu_device) + if len(prediction) > 1: + self._predictions.append(prediction) + + def evaluate(self, img_ids=None): + """ + Args: + img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset + """ + if self._distributed: + comm.synchronize() + predictions = comm.gather(self._predictions, dst=0) + predictions = list(itertools.chain(*predictions)) + + if not comm.is_main_process(): + return {} + else: + predictions = self._predictions + + if len(predictions) == 0: + self._logger.warning("[COCOEvaluator] Did not receive valid predictions.") + return {} + + if self._output_dir: + PathManager.mkdirs(self._output_dir) + file_path = os.path.join(self._output_dir, "instances_predictions.pth") + with PathManager.open(file_path, "wb") as f: + torch.save(predictions, f) + + self._results = OrderedDict() + if "proposals" in predictions[0]: + self._eval_box_proposals(predictions) + if "instances" in predictions[0]: + self._eval_predictions(predictions, img_ids=img_ids) + # Copy so the caller can do whatever with results + return copy.deepcopy(self._results) + + def _tasks_from_predictions(self, predictions): + """ + Get COCO API "tasks" (i.e. iou_type) from COCO-format predictions. + """ + tasks = {"bbox"} + for pred in predictions: + if "segmentation" in pred: + tasks.add("segm") + if "keypoints" in pred: + tasks.add("keypoints") + return sorted(tasks) + + def _eval_predictions(self, predictions, img_ids=None): + """ + Evaluate predictions. Fill self._results with the metrics of the tasks. + """ + self._logger.info("Preparing results for COCO format ...") + coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) + tasks = self._tasks or self._tasks_from_predictions(coco_results) + + # unmap the category ids for COCO + if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): + dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id + all_contiguous_ids = list(dataset_id_to_contiguous_id.values()) + num_classes = len(all_contiguous_ids) + assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1 + + reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()} + for result in coco_results: + category_id = result["category_id"] + assert category_id < num_classes, ( + f"A prediction has class={category_id}, " + f"but the dataset only has {num_classes} classes and " + f"predicted class id should be in [0, {num_classes - 1}]." + ) + result["category_id"] = reverse_id_mapping[category_id] + + if self._output_dir: + file_path = os.path.join(self._output_dir, "coco_instances_results.json") + self._logger.info("Saving results to {}".format(file_path)) + with PathManager.open(file_path, "w") as f: + f.write(json.dumps(coco_results)) + f.flush() + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info( + "Evaluating predictions with {} COCO API...".format( + "unofficial" if self._use_fast_impl else "official" + ) + ) + for task in sorted(tasks): + coco_eval = ( + _evaluate_predictions_on_coco( + self._coco_api, + coco_results, + task, + kpt_oks_sigmas=self._kpt_oks_sigmas, + use_fast_impl=self._use_fast_impl, + img_ids=img_ids, + ) + if len(coco_results) > 0 + else None # cocoapi does not handle empty results very well + ) + + res = self._derive_coco_results( + coco_eval, task, class_names=self._metadata.get("thing_classes") + ) + self._results[task] = res + + def _eval_box_proposals(self, predictions): + """ + Evaluate the box proposals in predictions. + Fill self._results with the metrics for "box_proposals" task. + """ + if self._output_dir: + # Saving generated box proposals to file. + # Predicted box_proposals are in XYXY_ABS mode. + bbox_mode = BoxMode.XYXY_ABS.value + ids, boxes, objectness_logits = [], [], [] + for prediction in predictions: + ids.append(prediction["image_id"]) + boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) + objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) + + proposal_data = { + "boxes": boxes, + "objectness_logits": objectness_logits, + "ids": ids, + "bbox_mode": bbox_mode, + } + with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: + pickle.dump(proposal_data, f) + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info("Evaluating bbox proposals ...") + res = {} + areas = {"all": "", "small": "s", "medium": "m", "large": "l"} + for limit in [100, 1000]: + for area, suffix in areas.items(): + stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit) + key = "AR{}@{:d}".format(suffix, limit) + res[key] = float(stats["ar"].item() * 100) + self._logger.info("Proposal metrics: \n" + create_small_table(res)) + self._results["box_proposals"] = res + + def _derive_coco_results(self, coco_eval, iou_type, class_names=None): + """ + Derive the desired score numbers from summarized COCOeval. + + Args: + coco_eval (None or COCOEval): None represents no predictions from model. + iou_type (str): + class_names (None or list[str]): if provided, will use it to predict + per-category AP. + + Returns: + a dict of {metric name: score} + """ + + metrics = { + "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"], + "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"], + "keypoints": ["AP", "AP50", "AP75", "APm", "APl"], + }[iou_type] + + if coco_eval is None: + self._logger.warn("No predictions from the model!") + return {metric: float("nan") for metric in metrics} + + # the standard metrics + results = { + metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan") + for idx, metric in enumerate(metrics) + } + self._logger.info( + "Evaluation results for {}: \n".format(iou_type) + create_small_table(results) + ) + if not np.isfinite(sum(results.values())): + self._logger.info("Some metrics cannot be computed and is shown as NaN.") + + if class_names is None or len(class_names) <= 1: + return results + # Compute per-category AP + # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa + precisions = coco_eval.eval["precision"] + # precision has dims (iou, recall, cls, area range, max dets) + assert len(class_names) == precisions.shape[2] + + results_per_category = [] + for idx, name in enumerate(class_names): + # area range index 0: all area ranges + # max dets index -1: typically 100 per image + precision = precisions[:, :, idx, 0, -1] + precision = precision[precision > -1] + ap = np.mean(precision) if precision.size else float("nan") + results_per_category.append(("{}".format(name), float(ap * 100))) + + # tabulate it + N_COLS = min(6, len(results_per_category) * 2) + results_flatten = list(itertools.chain(*results_per_category)) + results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)]) + table = tabulate( + results_2d, + tablefmt="pipe", + floatfmt=".3f", + headers=["category", "AP"] * (N_COLS // 2), + numalign="left", + ) + self._logger.info("Per-category {} AP: \n".format(iou_type) + table) + + results.update({"AP-" + name: ap for name, ap in results_per_category}) + return results + + +def instances_to_coco_json(instances, img_id): + """ + Dump an "Instances" object to a COCO-format json that's used for evaluation. + + Args: + instances (Instances): + img_id (int): the image id + + Returns: + list[dict]: list of json annotations in COCO format. + """ + num_instance = len(instances) + if num_instance == 0: + return [] + + boxes = instances.pred_boxes.tensor.numpy() + boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) + boxes = boxes.tolist() + scores = instances.scores.tolist() + classes = instances.pred_classes.tolist() + + has_mask = instances.has("pred_masks") + has_mask_scores = instances.has("mask_scores") + if has_mask: + # use RLE to encode the masks, because they are too large and takes memory + # since this evaluator stores outputs of the entire dataset + rles = [ + mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] + for mask in instances.pred_masks + ] + for rle in rles: + # "counts" is an array encoded by mask_util as a byte-stream. Python3's + # json writer which always produces strings cannot serialize a bytestream + # unless you decode it. Thankfully, utf-8 works out (which is also what + # the pycocotools/_mask.pyx does). + rle["counts"] = rle["counts"].decode("utf-8") + + if has_mask_scores: + mask_scores = instances.mask_scores.tolist() + + has_keypoints = instances.has("pred_keypoints") + if has_keypoints: + keypoints = instances.pred_keypoints + + results = [] + for k in range(num_instance): + result = { + "image_id": img_id, + "category_id": classes[k], + "bbox": boxes[k], + "score": scores[k], + } + if has_mask: + result["segmentation"] = rles[k] + if has_mask_scores: + result["mask_score"] = mask_scores[k] + if has_keypoints: + # In COCO annotations, + # keypoints coordinates are pixel indices. + # However our predictions are floating point coordinates. + # Therefore we subtract 0.5 to be consistent with the annotation format. + # This is the inverse of data loading logic in `datasets/coco.py`. + keypoints[k][:, :2] -= 0.5 + result["keypoints"] = keypoints[k].flatten().tolist() + results.append(result) + return results + + +# inspired from Detectron: +# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa +def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None): + """ + Evaluate detection proposal recall metrics. This function is a much + faster alternative to the official COCO API recall evaluation code. However, + it produces slightly different results. + """ + # Record max overlap value for each gt box + # Return vector of overlap values + areas = { + "all": 0, + "small": 1, + "medium": 2, + "large": 3, + "96-128": 4, + "128-256": 5, + "256-512": 6, + "512-inf": 7, + } + area_ranges = [ + [0 ** 2, 1e5 ** 2], # all + [0 ** 2, 32 ** 2], # small + [32 ** 2, 96 ** 2], # medium + [96 ** 2, 1e5 ** 2], # large + [96 ** 2, 128 ** 2], # 96-128 + [128 ** 2, 256 ** 2], # 128-256 + [256 ** 2, 512 ** 2], # 256-512 + [512 ** 2, 1e5 ** 2], + ] # 512-inf + assert area in areas, "Unknown area range: {}".format(area) + area_range = area_ranges[areas[area]] + gt_overlaps = [] + num_pos = 0 + + for prediction_dict in dataset_predictions: + predictions = prediction_dict["proposals"] + + # sort predictions in descending order + # TODO maybe remove this and make it explicit in the documentation + inds = predictions.objectness_logits.sort(descending=True)[1] + predictions = predictions[inds] + + ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"]) + anno = coco_api.loadAnns(ann_ids) + gt_boxes = [ + BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) + for obj in anno + if obj["iscrowd"] == 0 + ] + gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes + gt_boxes = Boxes(gt_boxes) + gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0]) + + if len(gt_boxes) == 0 or len(predictions) == 0: + continue + + valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) + gt_boxes = gt_boxes[valid_gt_inds] + + num_pos += len(gt_boxes) + + if len(gt_boxes) == 0: + continue + + if limit is not None and len(predictions) > limit: + predictions = predictions[:limit] + + overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) + + _gt_overlaps = torch.zeros(len(gt_boxes)) + for j in range(min(len(predictions), len(gt_boxes))): + # find which proposal box maximally covers each gt box + # and get the iou amount of coverage for each gt box + max_overlaps, argmax_overlaps = overlaps.max(dim=0) + + # find which gt box is 'best' covered (i.e. 'best' = most iou) + gt_ovr, gt_ind = max_overlaps.max(dim=0) + assert gt_ovr >= 0 + # find the proposal box that covers the best covered gt box + box_ind = argmax_overlaps[gt_ind] + # record the iou coverage of this gt box + _gt_overlaps[j] = overlaps[box_ind, gt_ind] + assert _gt_overlaps[j] == gt_ovr + # mark the proposal box and the gt box as used + overlaps[box_ind, :] = -1 + overlaps[:, gt_ind] = -1 + + # append recorded iou coverage level + gt_overlaps.append(_gt_overlaps) + gt_overlaps = ( + torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32) + ) + gt_overlaps, _ = torch.sort(gt_overlaps) + + if thresholds is None: + step = 0.05 + thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) + recalls = torch.zeros_like(thresholds) + # compute recall for each iou threshold + for i, t in enumerate(thresholds): + recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) + # ar = 2 * np.trapz(recalls, thresholds) + ar = recalls.mean() + return { + "ar": ar, + "recalls": recalls, + "thresholds": thresholds, + "gt_overlaps": gt_overlaps, + "num_pos": num_pos, + } + + +def _evaluate_predictions_on_coco( + coco_gt, coco_results, iou_type, kpt_oks_sigmas=None, use_fast_impl=True, img_ids=None +): + """ + Evaluate the coco results using COCOEval API. + """ + assert len(coco_results) > 0 + + if iou_type == "segm": + coco_results = copy.deepcopy(coco_results) + # When evaluating mask AP, if the results contain bbox, cocoapi will + # use the box area as the area of the instance, instead of the mask area. + # This leads to a different definition of small/medium/large. + # We remove the bbox field to let mask AP use mask area. + has_mask_scores = "mask_score" in coco_results[0] + + for c in coco_results: + c.pop("bbox", None) + if has_mask_scores: + c["score"] = c["mask_score"] + del c["mask_score"] + + coco_dt = coco_gt.loadRes(coco_results) + coco_eval = (COCOeval_opt if use_fast_impl else COCOeval)(coco_gt, coco_dt, iou_type) + if img_ids is not None: + coco_eval.params.imgIds = img_ids + + if iou_type == "keypoints": + # Use the COCO default keypoint OKS sigmas unless overrides are specified + if kpt_oks_sigmas: + assert hasattr(coco_eval.params, "kpt_oks_sigmas"), "pycocotools is too old!" + coco_eval.params.kpt_oks_sigmas = np.array(kpt_oks_sigmas) + # COCOAPI requires every detection and every gt to have keypoints, so + # we just take the first entry from both + num_keypoints_dt = len(coco_results[0]["keypoints"]) // 3 + num_keypoints_gt = len(next(iter(coco_gt.anns.values()))["keypoints"]) // 3 + num_keypoints_oks = len(coco_eval.params.kpt_oks_sigmas) + assert num_keypoints_oks == num_keypoints_dt == num_keypoints_gt, ( + f"[COCOEvaluator] Prediction contain {num_keypoints_dt} keypoints. " + f"Ground truth contains {num_keypoints_gt} keypoints. " + f"The length of cfg.TEST.KEYPOINT_OKS_SIGMAS is {num_keypoints_oks}. " + "They have to agree with each other. For meaning of OKS, please refer to " + "http://cocodataset.org/#keypoints-eval." + ) + + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + return coco_eval \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..af658556ea6e5c1b0b18a52b5f5f52a3ca586177 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/__init__.py @@ -0,0 +1,7 @@ +from .deform_conv import DFConv2d +from .ml_nms import ml_nms +from .iou_loss import IOULoss +from .conv_with_kaiming_uniform import conv_with_kaiming_uniform +from .wrappers import MaxPool2d, Linear, Max + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/conv_with_kaiming_uniform.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/conv_with_kaiming_uniform.py new file mode 100644 index 0000000000000000000000000000000000000000..f4bd00122a6409fd737cbf52c316dedb4ae00e29 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/conv_with_kaiming_uniform.py @@ -0,0 +1,52 @@ +from torch import nn + +from detectron2.layers import Conv2d +from .deform_conv import DFConv2d +from detectron2.layers.batch_norm import get_norm + + +def conv_with_kaiming_uniform( + norm=None, activation=None, + use_deformable=False, use_sep=False): + def make_conv( + in_channels, out_channels, kernel_size, stride=1, dilation=1 + ): + if use_deformable: + conv_func = DFConv2d + else: + conv_func = Conv2d + if use_sep: + assert in_channels == out_channels + groups = in_channels + else: + groups = 1 + conv = conv_func( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=dilation * (kernel_size - 1) // 2, + dilation=dilation, + groups=groups, + bias=(norm is None) + ) + if not use_deformable: + # Caffe2 implementation uses XavierFill, which in fact + # corresponds to kaiming_uniform_ in PyTorch + nn.init.kaiming_uniform_(conv.weight, a=1) + if norm is None: + nn.init.constant_(conv.bias, 0) + module = [conv,] + if norm is not None: + if norm == "GN": + norm_module = nn.GroupNorm(32, out_channels) + else: + norm_module = get_norm(norm, out_channels) + module.append(norm_module) + if activation is not None: + module.append(nn.ReLU(inplace=True)) + if len(module) > 1: + return nn.Sequential(*module) + return conv + + return make_conv diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/deform_conv.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/deform_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..be79ebecd2fc5f7538b05bf4592fafe33cdc91e0 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/deform_conv.py @@ -0,0 +1,112 @@ +import torch +from torch import nn + +from detectron2.layers import Conv2d + + +class _NewEmptyTensorOp(torch.autograd.Function): + @staticmethod + def forward(ctx, x, new_shape): + ctx.shape = x.shape + return x.new_empty(new_shape) + + @staticmethod + def backward(ctx, grad): + shape = ctx.shape + return _NewEmptyTensorOp.apply(grad, shape), None + + +class DFConv2d(nn.Module): + """Deformable convolutional layer""" + def __init__( + self, + in_channels, + out_channels, + with_modulated_dcn=True, + kernel_size=3, + stride=1, + groups=1, + dilation=1, + deformable_groups=1, + bias=False, + padding=None + ): + super(DFConv2d, self).__init__() + if isinstance(kernel_size, (list, tuple)): + assert isinstance(stride, (list, tuple)) + assert isinstance(dilation, (list, tuple)) + assert len(kernel_size) == 2 + assert len(stride) == 2 + assert len(dilation) == 2 + padding = ( + dilation[0] * (kernel_size[0] - 1) // 2, + dilation[1] * (kernel_size[1] - 1) // 2 + ) + offset_base_channels = kernel_size[0] * kernel_size[1] + else: + padding = dilation * (kernel_size - 1) // 2 + offset_base_channels = kernel_size * kernel_size + if with_modulated_dcn: + from .deform_conv import ModulatedDeformConv + offset_channels = offset_base_channels * 3 # default: 27 + conv_block = ModulatedDeformConv + else: + from .deform_conv import DeformConv + offset_channels = offset_base_channels * 2 # default: 18 + conv_block = DeformConv + self.offset = Conv2d( + in_channels, + deformable_groups * offset_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=1, + dilation=dilation + ) + for l in [self.offset, ]: + nn.init.kaiming_uniform_(l.weight, a=1) + torch.nn.init.constant_(l.bias, 0.) + self.conv = conv_block( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + deformable_groups=deformable_groups, + bias=bias + ) + self.with_modulated_dcn = with_modulated_dcn + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.dilation = dilation + self.offset_split = offset_base_channels * deformable_groups * 2 + + def forward(self, x, return_offset=False): + if x.numel() > 0: + if not self.with_modulated_dcn: + offset_mask = self.offset(x) + x = self.conv(x, offset_mask) + else: + offset_mask = self.offset(x) + offset = offset_mask[:, :self.offset_split, :, :] + mask = offset_mask[:, self.offset_split:, :, :].sigmoid() + x = self.conv(x, offset, mask) + if return_offset: + return x, offset_mask + return x + # get output shape + output_shape = [ + (i + 2 * p - (di * (k - 1) + 1)) // d + 1 + for i, p, di, k, d in zip( + x.shape[-2:], + self.padding, + self.dilation, + self.kernel_size, + self.stride + ) + ] + output_shape = [x.shape[0], self.conv.weight.shape[0]] + output_shape + return _NewEmptyTensorOp.apply(x, output_shape) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/iou_loss.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/iou_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..efcd961f9751d2e64b619a558c2ed15cd3a501a7 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/iou_loss.py @@ -0,0 +1,57 @@ +import torch +from torch import nn + + +class IOULoss(nn.Module): + def __init__(self, loc_loss_type='iou'): + super(IOULoss, self).__init__() + self.loc_loss_type = loc_loss_type + + def forward(self, pred, target, weight=None,pos_mask=None): + pred_left = pred[:, 0] + pred_top = pred[:, 1] + pred_right = pred[:, 2] + pred_bottom = pred[:, 3] + + target_left = target[:, 0] + target_top = target[:, 1] + target_right = target[:, 2] + target_bottom = target[:, 3] + + target_aera = (target_left + target_right) * \ + (target_top + target_bottom) + pred_aera = (pred_left + pred_right) * \ + (pred_top + pred_bottom) + + w_intersect = torch.min(pred_left, target_left) + \ + torch.min(pred_right, target_right) + h_intersect = torch.min(pred_bottom, target_bottom) + \ + torch.min(pred_top, target_top) + + g_w_intersect = torch.max(pred_left, target_left) + \ + torch.max(pred_right, target_right) + g_h_intersect = torch.max(pred_bottom, target_bottom) + \ + torch.max(pred_top, target_top) + ac_uion = g_w_intersect * g_h_intersect + # add + if pos_mask is not None: + ac_uion = ac_uion + 1 + ac_uion = ac_uion - pos_mask + area_intersect = w_intersect * h_intersect + area_union = target_aera + pred_aera - area_intersect + + ious = (area_intersect + 1.0) / (area_union + 1.0) + gious = ious - (ac_uion - area_union) / ac_uion + if self.loc_loss_type == 'iou': + losses = -torch.log(ious) + elif self.loc_loss_type == 'linear_iou': + losses = 1 - ious + elif self.loc_loss_type == 'giou': + losses = 1 - gious + else: + raise NotImplementedError + + if weight is not None: + return (losses * weight).sum() + else: + return losses.sum() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/ml_nms.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/ml_nms.py new file mode 100644 index 0000000000000000000000000000000000000000..13fd37e9ebbd82d3dde0a3fe0a1b06797acdd79f --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/ml_nms.py @@ -0,0 +1,39 @@ +from detectron2.layers import batched_nms +import pdb +import torch +from detectron2.layers import batched_nms_npu + +# lzy 12.21 add nF +# from torch.contrib.npu.optimized_lib import function as nF +# end +def ml_nms(boxlist, nms_thresh, max_proposals=-1, + score_field="scores", label_field="labels"): + """ + Performs non-maximum suppression on a boxlist, with scores specified + in a boxlist field via score_field. + Arguments: + boxlist(BoxList) + nms_thresh (float) + max_proposals (int): if > 0, then only the top max_proposals are kept + after non-maximum suppression + score_field (str) + """ + if nms_thresh <= 0: + return boxlist + # lzy 12.20 turn tensor into cpu + boxes = boxlist.pred_boxes.tensor.cpu() + boxes_l = boxlist.pred_boxes.tensor + + scores = boxlist.scores.cpu() + scores_l = boxlist.scores + + # end + labels = boxlist.pred_classes + + keep = batched_nms(boxes, scores, labels, nms_thresh) + + + if max_proposals > 0: + keep = keep[: max_proposals].long() + boxlist = boxlist[keep] + return boxlist diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/wrappers.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..df62fe030c8a87f41c3673a7e71ec20d2e5bb97e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/layers/wrappers.py @@ -0,0 +1,98 @@ +# Author, Sangrok Lee, github.com/lsrock1 + +import torch + + +# from facebook detectron2 +class _NewEmptyTensorOp(torch.autograd.Function): + @staticmethod + def forward(ctx, x, new_shape): + ctx.shape = x.shape + return x.new_empty(new_shape) + + @staticmethod + def backward(ctx, grad): + shape = ctx.shape + return _NewEmptyTensorOp.apply(grad, shape), None + + +class MaxPool2d(torch.nn.MaxPool2d): + """ + A wrapper around :class:`torch.nn.MaxPool2d` to support empty inputs and more features. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._make_iteratable() + + def forward(self, x): + if x.numel() == 0: + # When input is empty, we want to return a empty tensor with "correct" shape, + # So that the following operations will not panic + # if they check for the shape of the tensor. + # This computes the height and width of the output tensor + + output_shape = [ + (i + 2 * p - (di * (k - 1) + 1)) // s + 1 + for i, p, di, k, s in zip( + x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride + ) + ] + output_shape = [x.shape[0], x.shape[1]] + output_shape + empty = _NewEmptyTensorOp.apply(x, output_shape) + + return empty + + x = super().forward(x) + + return x + + def _make_iteratable(self): + if not isinstance(self.padding, list): + self.padding = [self.padding, self.padding] + + if not isinstance(self.dilation, list): + self.dilation = [self.dilation, self.dilation] + + if not isinstance(self.kernel_size, list): + self.kernel_size = [self.kernel_size, self.kernel_size] + + if not isinstance(self.stride, list): + self.stride = [self.stride, self.stride] + + +class Linear(torch.nn.Linear): + """ + A wrapper around :class:`torch.nn.Linear` to support empty inputs and more features. + Because of https://github.com/pytorch/pytorch/issues/34202 + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def forward(self, x): + if x.numel() == 0: + output_shape = [x.shape[0], self.weight.shape[0]] + + empty = _NewEmptyTensorOp.apply(x, output_shape) + if self.training: + # This is to make DDP happy. + # DDP expects all workers to have gradient w.r.t the same set of parameters. + _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + _dummy + else: + return empty + + x = super().forward(x) + return x + + +def Max(x): + """ + A wrapper around torch.max in Spatial Attention Module (SAM) to support empty inputs and more features. + """ + if x.numel() == 0: + output_shape = [x.shape[0], 1, x.shape[2], x.shape[3]] + empty = _NewEmptyTensorOp.apply(x, output_shape) + return empty + return torch.max(x, dim=1, keepdim=True)[0] \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c595912e6cdf133e5e5dd89a1ded6bbaddd114cb --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/__init__.py @@ -0,0 +1,3 @@ +from .fcos import FCOS +from .backbone import build_fcos_resnet_fpn_backbone +from .centermask import CenterROIHeads diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/backbone/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/backbone/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b777f5a361f886bccc20209643e8359a2e9439b7 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/backbone/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Youngwan Lee (ETRI) All Rights Reserved. +from .fpn import build_fcos_resnet_fpn_backbone, LastLevelP6P7, LastLevelP6 +from .vovnet import build_vovnet_fpn_backbone, build_vovnet_backbone, build_fcos_vovnet_fpn_backbone +from .mobilenet import build_mnv2_backbone, build_mobilenetv2_fpn_backbone, build_fcos_mobilenetv2_fpn_backbone diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/backbone/fpn.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/backbone/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..b457efb12c7fcfd583c32054c57aa897fef0b9be --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/backbone/fpn.py @@ -0,0 +1,87 @@ +from torch import nn +import torch.nn.functional as F +import fvcore.nn.weight_init as weight_init + +from detectron2.modeling.backbone import FPN, build_resnet_backbone +from detectron2.layers import ShapeSpec +from detectron2.modeling.backbone.build import BACKBONE_REGISTRY + + +__all__ = [ + "FPN", + "LastLevelP6P7", + "LastLevelP6", + "build_fcos_resnet_fpn_backbone" +] + +class LastLevelP6P7(nn.Module): + """ + This module is used in RetinaNet and FCOS to generate extra layers, P6 and P7 from + C5 or P5 feature. + """ + + def __init__(self, in_channels, out_channels, in_features="res5"): + super().__init__() + self.num_levels = 2 + self.in_feature = in_features + self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) + self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) + for module in [self.p6, self.p7]: + weight_init.c2_xavier_fill(module) + + def forward(self, x): + p6 = self.p6(x) + p7 = self.p7(F.relu(p6)) + return [p6, p7] + + +class LastLevelP6(nn.Module): + """ + This module is used in FCOS to generate extra layers + """ + + def __init__(self, in_channels, out_channels, in_features="res5"): + super().__init__() + self.num_levels = 1 + self.in_feature = in_features + self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) + for module in [self.p6]: + weight_init.c2_xavier_fill(module) + + def forward(self, x): + p6 = self.p6(x) + return [p6] + + +@BACKBONE_REGISTRY.register() +def build_fcos_resnet_fpn_backbone(cfg, input_shape: ShapeSpec): + """ + Args: + cfg: a detectron2 CfgNode + + Returns: + backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. + """ + if cfg.MODEL.MOBILENET: + bottom_up = build_mnv2_backbone(cfg, input_shape) + else: + bottom_up = build_resnet_backbone(cfg, input_shape) + in_features = cfg.MODEL.FPN.IN_FEATURES + out_channels = cfg.MODEL.FPN.OUT_CHANNELS + top_levels = cfg.MODEL.FCOS.TOP_LEVELS + in_channels_top = out_channels + if top_levels == 2: + top_block = LastLevelP6P7(in_channels_top, out_channels, "p5") + if top_levels == 1: + top_block = LastLevelP6(in_channels_top, out_channels, "p5") + elif top_levels == 0: + top_block = None + backbone = FPN( + bottom_up=bottom_up, + in_features=in_features, + out_channels=out_channels, + norm=cfg.MODEL.FPN.NORM, + top_block=top_block, + fuse_type=cfg.MODEL.FPN.FUSE_TYPE, + ) + return backbone diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/backbone/mobilenet.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/backbone/mobilenet.py new file mode 100644 index 0000000000000000000000000000000000000000..6872e0d19957ab3fc5c6f2d830ce496b375a6a63 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/backbone/mobilenet.py @@ -0,0 +1,215 @@ +# taken from https://github.com/tonylins/pytorch-mobilenet-v2/ +# Published by Ji Lin, tonylins +# licensed under the Apache License, Version 2.0, January 2004 +# Modified by Youngwan Lee, Feburary 2020 + +from torch import nn +from torch.nn import BatchNorm2d +from detectron2.layers import Conv2d, FrozenBatchNorm2d, ShapeSpec +from detectron2.modeling.backbone.build import BACKBONE_REGISTRY +from detectron2.modeling.backbone import Backbone +from detectron2.modeling.backbone.fpn import FPN, LastLevelMaxPool + +from .fpn import LastLevelP6, LastLevelP6P7 + +__all__ = [ + "MobileNetV2", + "build_mnv2_backbone", + "build_mobilenetv2_fpn_backbone", + "build_fcos_mobilenetv2_fpn_backbone" +] + +def conv_bn(inp, oup, stride): + return nn.Sequential( + Conv2d(inp, oup, 3, stride, 1, bias=False), + FrozenBatchNorm2d(oup), + nn.ReLU6(inplace=True) + ) + + +def conv_1x1_bn(inp, oup): + return nn.Sequential( + Conv2d(inp, oup, 1, 1, 0, bias=False), + FrozenBatchNorm2d(oup), + nn.ReLU6(inplace=True) + ) + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + if expand_ratio == 1: + self.conv = nn.Sequential( + # dw + Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), + FrozenBatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # pw-linear + Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + FrozenBatchNorm2d(oup), + ) + else: + self.conv = nn.Sequential( + # pw + Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), + FrozenBatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # dw + Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), + FrozenBatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # pw-linear + Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + FrozenBatchNorm2d(oup), + ) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2(Backbone): + """ + Should freeze bn + """ + def __init__(self, cfg, n_class=1000, input_size=224, width_mult=1.): + super(MobileNetV2, self).__init__() + block = InvertedResidual + input_channel = 32 + interverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + + # building first layer + assert input_size % 32 == 0 + input_channel = int(input_channel * width_mult) + self.return_features_indices = [3, 6, 13, 17] + self.return_features_num_channels = [] + self.features = nn.ModuleList([conv_bn(3, input_channel, 2)]) + # building inverted residual blocks + for t, c, n, s in interverted_residual_setting: + output_channel = int(c * width_mult) + for i in range(n): + if i == 0: + self.features.append(block(input_channel, output_channel, s, expand_ratio=t)) + else: + self.features.append(block(input_channel, output_channel, 1, expand_ratio=t)) + input_channel = output_channel + if len(self.features) - 1 in self.return_features_indices: + self.return_features_num_channels.append(output_channel) + + self._initialize_weights() + self._freeze_backbone(cfg.MODEL.BACKBONE.FREEZE_AT) + + def _freeze_backbone(self, freeze_at): + for layer_index in range(freeze_at): + for p in self.features[layer_index].parameters(): + p.requires_grad = False + + def forward(self, x): + res = [] + for i, m in enumerate(self.features): + x = m(x) + if i in self.return_features_indices: + res.append(x) + return {'res{}'.format(i + 2): r for i, r in enumerate(res)} + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, (2. / n) ** 0.5) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + n = m.weight.size(1) + m.weight.data.normal_(0, 0.01) + m.bias.data.zero_() + +@BACKBONE_REGISTRY.register() +def build_mnv2_backbone(cfg, input_shape): + """ + Create a MobileNetV2 instance from config. + Returns: + MobileNetV2: a :class:`MobileNetV2` instance. + """ + out_features = cfg.MODEL.RESNETS.OUT_FEATURES + + out_feature_channels = {"res2": 24, "res3": 32, + "res4": 96, "res5": 320} + out_feature_strides = {"res2": 4, "res3": 8, "res4": 16, "res5": 32} + model = MobileNetV2(cfg) + model._out_features = out_features + model._out_feature_channels = out_feature_channels + model._out_feature_strides = out_feature_strides + return model + + +@BACKBONE_REGISTRY.register() +def build_mobilenetv2_fpn_backbone(cfg, input_shape: ShapeSpec): + """ + Args: + cfg: a detectron2 CfgNode + Returns: + backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. + """ + bottom_up = build_mnv2_backbone(cfg, input_shape) + in_features = cfg.MODEL.FPN.IN_FEATURES + out_channels = cfg.MODEL.FPN.OUT_CHANNELS + backbone = FPN( + bottom_up=bottom_up, + in_features=in_features, + out_channels=out_channels, + norm=cfg.MODEL.FPN.NORM, + top_block=LastLevelMaxPool(), + fuse_type=cfg.MODEL.FPN.FUSE_TYPE, + ) + return backbone + + +@BACKBONE_REGISTRY.register() +def build_fcos_mobilenetv2_fpn_backbone(cfg, input_shape: ShapeSpec): + """ + Args: + cfg: a detectron2 CfgNode + Returns: + backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. + """ + bottom_up = build_mnv2_backbone(cfg, input_shape) + in_features = cfg.MODEL.FPN.IN_FEATURES + out_channels = cfg.MODEL.FPN.OUT_CHANNELS + top_levels = cfg.MODEL.FCOS.TOP_LEVELS + in_channels_top = out_channels + if top_levels == 2: + top_block = LastLevelP6P7(in_channels_top, out_channels, "p5") + if top_levels == 1: + top_block = LastLevelP6(in_channels_top, out_channels, "p5") + elif top_levels == 0: + top_block = None + backbone = FPN( + bottom_up=bottom_up, + in_features=in_features, + out_channels=out_channels, + norm=cfg.MODEL.FPN.NORM, + top_block=top_block, + fuse_type=cfg.MODEL.FPN.FUSE_TYPE, + ) + return backbone \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/backbone/vovnet.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/backbone/vovnet.py new file mode 100644 index 0000000000000000000000000000000000000000..0161afe5d5613d650e33423e853cdb3e0864a8dd --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/backbone/vovnet.py @@ -0,0 +1,573 @@ +# Copyright (c) Youngwan Lee (ETRI) All Rights Reserved. +from collections import OrderedDict +import torch +import torch.nn as nn +import torch.nn.functional as F + +import fvcore.nn.weight_init as weight_init +from detectron2.modeling.backbone import Backbone +from detectron2.modeling.backbone.build import BACKBONE_REGISTRY +from detectron2.modeling.backbone.fpn import FPN, LastLevelMaxPool +from detectron2.layers import ( + Conv2d, + DeformConv, + ModulatedDeformConv, + FrozenBatchNorm2d, + ShapeSpec, + get_norm, +) +from .fpn import LastLevelP6, LastLevelP6P7 +import pdb + +__all__ = [ + "VoVNet", + "build_vovnet_backbone", + "build_vovnet_fpn_backbone", + "build_fcos_vovnet_fpn_backbone" +] + +_NORM = False + +VoVNet19_slim_dw_eSE = { + 'stem': [64, 64, 64], + 'stage_conv_ch': [64, 80, 96, 112], + 'stage_out_ch': [112, 256, 384, 512], + "layer_per_block": 3, + "block_per_stage": [1, 1, 1, 1], + "eSE": True, + "dw" : True +} + +VoVNet19_dw_eSE = { + 'stem': [64, 64, 64], + "stage_conv_ch": [128, 160, 192, 224], + "stage_out_ch": [256, 512, 768, 1024], + "layer_per_block": 3, + "block_per_stage": [1, 1, 1, 1], + "eSE": True, + "dw" : True +} + +VoVNet19_slim_eSE = { + 'stem': [64, 64, 128], + 'stage_conv_ch': [64, 80, 96, 112], + 'stage_out_ch': [112, 256, 384, 512], + 'layer_per_block': 3, + 'block_per_stage': [1, 1, 1, 1], + 'eSE' : True, + "dw" : False +} + +VoVNet19_eSE = { + 'stem': [64, 64, 128], + "stage_conv_ch": [128, 160, 192, 224], + "stage_out_ch": [256, 512, 768, 1024], + "layer_per_block": 3, + "block_per_stage": [1, 1, 1, 1], + "eSE": True, + "dw" : False +} + +VoVNet39_eSE = { + 'stem': [64, 64, 128], + "stage_conv_ch": [128, 160, 192, 224], + "stage_out_ch": [256, 512, 768, 1024], + "layer_per_block": 5, + "block_per_stage": [1, 1, 2, 2], + "eSE": True, + "dw" : False +} + +VoVNet57_eSE = { + 'stem': [64, 64, 128], + "stage_conv_ch": [128, 160, 192, 224], + "stage_out_ch": [256, 512, 768, 1024], + "layer_per_block": 5, + "block_per_stage": [1, 1, 4, 3], + "eSE": True, + "dw" : False +} + +VoVNet99_eSE = { + 'stem': [64, 64, 128], + "stage_conv_ch": [128, 160, 192, 224], + "stage_out_ch": [256, 512, 768, 1024], + "layer_per_block": 5, + "block_per_stage": [1, 3, 9, 3], + "eSE": True, + "dw" : False +} + +_STAGE_SPECS = { + "V-19-slim-dw-eSE": VoVNet19_slim_dw_eSE, + "V-19-dw-eSE": VoVNet19_dw_eSE, + "V-19-slim-eSE": VoVNet19_slim_eSE, + "V-19-eSE": VoVNet19_eSE, + "V-39-eSE": VoVNet39_eSE, + "V-57-eSE": VoVNet57_eSE, + "V-99-eSE": VoVNet99_eSE, +} + +def dw_conv3x3(in_channels, out_channels, module_name, postfix, + stride=1, kernel_size=3, padding=1): + """3x3 convolution with padding""" + return [ + ('{}_{}/dw_conv3x3'.format(module_name, postfix), + nn.Conv2d(in_channels, out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=out_channels, + bias=False)), + ('{}_{}/pw_conv1x1'.format(module_name, postfix), + nn.Conv2d(in_channels, out_channels, + kernel_size=1, + stride=1, + padding=0, + groups=1, + bias=False)), + ('{}_{}/pw_norm'.format(module_name, postfix), get_norm(_NORM, out_channels)), + ('{}_{}/pw_relu'.format(module_name, postfix), nn.ReLU(inplace=True)), + ] + +class DFConv3x3(nn.Module): + def __init__( + self, + in_channels, + out_channels, + module_name, + postfix, + dilation=1, + groups=1, + with_modulated_dcn=None, + deformable_groups=1 + ): + super(DFConv3x3, self).__init__() + self.module_names = [] + self.with_modulated_dcn = with_modulated_dcn + if self.with_modulated_dcn: + deform_conv_op = ModulatedDeformConv + # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size + offset_channels = 27 + else: + deform_conv_op = DeformConv + offset_channels = 18 + + unit_name = f"{module_name}_{postfix}/conv_offset" + self.module_names.append(unit_name) + self.add_module(unit_name, Conv2d( + in_channels, + offset_channels * deformable_groups, + kernel_size=3, + stride=1, + padding=1 * dilation, + dilation=dilation, + )) + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.constant_(m.weight, 0) + nn.init.constant_(m.bias, 0) + + unit_name = f"{module_name}_{postfix}/conv" + self.module_names.append(unit_name) + self.add_module(f"{module_name}_{postfix}/conv", deform_conv_op( + in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1 * dilation, + bias=False, + groups=groups, + dilation=1, + deformable_groups=deformable_groups, + )) + unit_name = f"{module_name}_{postfix}/norm" + self.module_names.append(unit_name) + self.add_module(unit_name, get_norm(_NORM, out_channels)) + + + def forward(self, x): + if self.with_modulated_dcn: + #offset conv + offset_mask = getattr(self, self.module_names[0])(x) + offset_x, offset_y, mask = torch.chunk(offset_mask, 3, dim=1) + offset = torch.cat((offset_x, offset_y), dim=1) + mask = mask.sigmoid() + #conv + out = getattr(self, self.module_names[1])(x, offset, mask) + else: + offset = getattr(self, self.module_names[0])(x) + out = getattr(self, self.module_names[1])(x, offset) + + return F.relu_(getattr(self, self.module_names[2])(out)) + + + +def conv3x3(in_channels, out_channels, module_name, postfix, + stride=1, groups=1, kernel_size=3, padding=1): + """3x3 convolution with padding""" + return [ + (f'{module_name}_{postfix}/conv', + nn.Conv2d(in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=groups, + bias=False)), + (f'{module_name}_{postfix}/norm', get_norm(_NORM, out_channels)), + (f'{module_name}_{postfix}/relu', nn.ReLU(inplace=True)) + ] + + +def conv1x1(in_channels, out_channels, module_name, postfix, + stride=1, groups=1, kernel_size=1, padding=0): + """1x1 convolution with padding""" + return [ + (f'{module_name}_{postfix}/conv', + nn.Conv2d(in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=groups, + bias=False)), + (f'{module_name}_{postfix}/norm', get_norm(_NORM, out_channels)), + (f'{module_name}_{postfix}/relu', nn.ReLU(inplace=True)) + ] + +class Hsigmoid(nn.Module): + def __init__(self, inplace=True): + super(Hsigmoid, self).__init__() + self.inplace = inplace + + def forward(self, x): + return F.relu6(x + 3., inplace=self.inplace) / 6. + + +class eSEModule(nn.Module): + def __init__(self, channel, reduction=4): + super(eSEModule, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Conv2d(channel,channel, kernel_size=1, + padding=0) + self.hsigmoid = Hsigmoid() + + def forward(self, x): + input = x + x = self.avg_pool(x) + x = self.fc(x) + x = self.hsigmoid(x) + return input * x + + +class _OSA_module(nn.Module): + + def __init__(self, + in_ch, + stage_ch, + concat_ch, + layer_per_block, + module_name, + SE=False, + identity=False, + depthwise=False, + dcn_config={}, + ): + + super(_OSA_module, self).__init__() + + self.identity = identity + self.depthwise = depthwise + self.isReduced = False + self.layers = nn.ModuleList() + in_channel = in_ch + if self.depthwise and in_channel != stage_ch: + self.isReduced = True + self.conv_reduction = nn.Sequential( + OrderedDict(conv1x1(in_channel, stage_ch, + "{}_reduction".format(module_name), "0"))) + with_dcn = dcn_config.get("stage_with_dcn", False) + for i in range(layer_per_block): + if self.depthwise: + self.layers.append( + nn.Sequential(OrderedDict(dw_conv3x3(stage_ch, stage_ch, module_name, i)))) + elif with_dcn: + deformable_groups = dcn_config.get("deformable_groups", 1) + with_modulated_dcn = dcn_config.get("with_modulated_dcn", False) + self.layers.append(DFConv3x3(in_channel, stage_ch, module_name, i, + with_modulated_dcn=with_modulated_dcn, deformable_groups=deformable_groups)) + else: + self.layers.append(nn.Sequential(OrderedDict(conv3x3(in_channel, stage_ch, module_name, i)))) + in_channel = stage_ch + + # feature aggregation + in_channel = in_ch + layer_per_block * stage_ch + self.concat = nn.Sequential(OrderedDict(conv1x1(in_channel, concat_ch, module_name, "concat"))) + print("concat_ch:",concat_ch) + self.ese = eSEModule(concat_ch) + + + def forward(self, x): + + identity_feat = x + + output = [] + output.append(x) + + if self.depthwise and self.isReduced: + x = self.conv_reduction(x) + + for layer in self.layers: + x = layer(x) + output.append(x) + + x = torch.cat(output, dim=1) + xt = self.concat(x) + + # pdb.set_trace() + xt = self.ese(xt) + + if self.identity: + + #pdb.set_trace() + # print(xt.shape) + # x.retain_grad() + # xt.retain_grad() + # xt.backward(torch.ones(xt.size()).npu(),retain_graph=True) + xt = (xt+ identity_feat) + # xt.backward(torch.ones(xt.size()).npu(),retain_graph=True) + + + return xt + + +class _OSA_stage(nn.Sequential): + + def __init__(self, + in_ch, + stage_ch, + concat_ch, + block_per_stage, + layer_per_block, + stage_num, + SE=False, + depthwise=False, + dcn_config={}): + super(_OSA_stage, self).__init__() + + if not stage_num == 2: + self.add_module("Pooling", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)) + + if block_per_stage != 1: + SE = False + module_name = f"OSA{stage_num}_1" + self.add_module(module_name, _OSA_module(in_ch, + stage_ch, + concat_ch, + layer_per_block, + module_name, + SE, + depthwise=depthwise, + dcn_config=dcn_config)) + for i in range(block_per_stage - 1): + if i != block_per_stage - 2: #last block + SE = False + module_name = f"OSA{stage_num}_{i + 2}" + self.add_module(module_name, + _OSA_module(concat_ch, + stage_ch, + concat_ch, + layer_per_block, + module_name, + SE, + identity=True, + depthwise=depthwise, + dcn_config=dcn_config)) + + + +class VoVNet(Backbone): + + def __init__(self, cfg, input_ch, out_features=None): + """ + Args: + input_ch(int) : the number of input channel + out_features (list[str]): name of the layers whose outputs should + be returned in forward. Can be anything in "stem", "stage2" ... + """ + super(VoVNet, self).__init__() + + global _NORM + _NORM = cfg.MODEL.VOVNET.NORM + + stage_specs = _STAGE_SPECS[cfg.MODEL.VOVNET.CONV_BODY] + + stem_ch = stage_specs["stem"] + config_stage_ch = stage_specs["stage_conv_ch"] + config_concat_ch = stage_specs["stage_out_ch"] + block_per_stage = stage_specs["block_per_stage"] + layer_per_block = stage_specs["layer_per_block"] + SE = stage_specs["eSE"] + depthwise = stage_specs["dw"] + + self._out_features = out_features + + + # Stem module + conv_type = dw_conv3x3 if depthwise else conv3x3 + stem = conv3x3(input_ch, stem_ch[0], "stem", "1", 2) + stem += conv_type(stem_ch[0], stem_ch[1], "stem", "2", 1) + stem += conv_type(stem_ch[1], stem_ch[2], "stem", "3", 2) + self.add_module("stem", nn.Sequential((OrderedDict(stem)))) + current_stirde = 4 + self._out_feature_strides = {"stem": current_stirde, "stage2": current_stirde} + self._out_feature_channels = {"stem": stem_ch[2]} + + stem_out_ch = [stem_ch[2]] + in_ch_list = stem_out_ch + config_concat_ch[:-1] + # OSA stages + self.stage_names = [] + for i in range(4): # num_stages + name = "stage%d" % (i + 2) # stage 2 ... stage 5 + self.stage_names.append(name) + self.add_module(name, _OSA_stage(in_ch_list[i], + config_stage_ch[i], + config_concat_ch[i], + block_per_stage[i], + layer_per_block, + i + 2, + SE, + depthwise, + dcn_config = { + "stage_with_dcn": cfg.MODEL.VOVNET.STAGE_WITH_DCN[i], + "with_modulated_dcn": cfg.MODEL.VOVNET.WITH_MODULATED_DCN, + "deformable_groups": cfg.MODEL.VOVNET.DEFORMABLE_GROUPS, + } + )) + + self._out_feature_channels[name] = config_concat_ch[i] + if not i == 0: + self._out_feature_strides[name] = current_stirde = int( + current_stirde * 2) + + # initialize weights + # self._initialize_weights() + # Optionally freeze (requires_grad=False) parts of the backbone + self._freeze_backbone(cfg.MODEL.BACKBONE.FREEZE_AT) + + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight) + + def _freeze_backbone(self, freeze_at): + if freeze_at < 0: + return + # freeze BN layers + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + freeze_bn_params(m) + for stage_index in range(freeze_at): + if stage_index == 0: + m = self.stem # stage 0 is the stem + else: + m = getattr(self, "stage" + str(stage_index+1)) + for p in m.parameters(): + p.requires_grad = False + FrozenBatchNorm2d.convert_frozen_batchnorm(self) + + def forward(self, x): + outputs = {} + x = self.stem(x) + if "stem" in self._out_features: + outputs["stem"] = x + for name in self.stage_names: + x = getattr(self, name)(x) + if name in self._out_features: + outputs[name] = x + # pdb.set_trace() + # for name in self.stage_names: + # if name in self._out_features: + # x.retain_grad() + + # outputs["stage5"].backward(torch.ones(outputs["stage5"].size()).npu(),retain_graph=True) + # outputs["stage4"].backward(torch.ones(outputs["stage4"].size()).npu(),retain_graph=True) + # outputs["stage3"].backward(torch.ones(outputs["stage3"].size()).npu(),retain_graph=True) + # outputs["stem"].backward(torch.ones(outputs["stem"].size()).npu(),retain_graph=True) + return outputs + + def output_shape(self): + return { + name: ShapeSpec( + channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] + ) + for name in self._out_features + } + + +@BACKBONE_REGISTRY.register() +def build_vovnet_backbone(cfg, input_shape): + """ + Create a VoVNet instance from config. + + Returns: + VoVNet: a :class:`VoVNet` instance. + """ + out_features = cfg.MODEL.VOVNET.OUT_FEATURES + return VoVNet(cfg, input_shape.channels, out_features=out_features) + + +@BACKBONE_REGISTRY.register() +def build_vovnet_fpn_backbone(cfg, input_shape: ShapeSpec): + """ + Args: + cfg: a detectron2 CfgNode + + Returns: + backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. + """ + bottom_up = build_vovnet_backbone(cfg, input_shape) + in_features = cfg.MODEL.FPN.IN_FEATURES + out_channels = cfg.MODEL.FPN.OUT_CHANNELS + backbone = FPN( + bottom_up=bottom_up, + in_features=in_features, + out_channels=out_channels, + norm=cfg.MODEL.FPN.NORM, + top_block=LastLevelMaxPool(), + fuse_type=cfg.MODEL.FPN.FUSE_TYPE, + ) + return backbone + + +@BACKBONE_REGISTRY.register() +def build_fcos_vovnet_fpn_backbone(cfg, input_shape: ShapeSpec): + """ + Args: + cfg: a detectron2 CfgNode + + Returns: + backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. + """ + bottom_up = build_vovnet_backbone(cfg, input_shape) + in_features = cfg.MODEL.FPN.IN_FEATURES + out_channels = cfg.MODEL.FPN.OUT_CHANNELS + top_levels = cfg.MODEL.FCOS.TOP_LEVELS + in_channels_top = out_channels + if top_levels == 2: + top_block = LastLevelP6P7(in_channels_top, out_channels, "p5") + if top_levels == 1: + top_block = LastLevelP6(in_channels_top, out_channels, "p5") + elif top_levels == 0: + top_block = None + backbone = FPN( + bottom_up=bottom_up, + in_features=in_features, + out_channels=out_channels, + norm=cfg.MODEL.FPN.NORM, + top_block=top_block, + fuse_type=cfg.MODEL.FPN.FUSE_TYPE, + ) + return backbone diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/.proposal_utils.py.swp b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/.proposal_utils.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..206a16d6ea5741a1ec0bf59de998e0e3ef2fff6f Binary files /dev/null and b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/.proposal_utils.py.swp differ diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5e17734cc6aa759edb3dc9051194e8f3859d1e86 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Youngwan Lee (ETRI) All Rights Reserved. +from .center_heads import CenterROIHeads +from .proposal_utils import ( + add_ground_truth_to_proposals, + add_ground_truth_to_proposals_single_image +) +from .sam import SpatialAttentionMaskHead +from .pooler import ROIPooler +from. mask_head import build_mask_head, mask_rcnn_loss, mask_rcnn_inference +from .maskiou_head import build_maskiou_head, mask_iou_loss, mask_iou_inference +from .keypoint_head import build_keypoint_head, keypoint_rcnn_loss, keypoint_rcnn_inference \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/center_heads.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/center_heads.py new file mode 100644 index 0000000000000000000000000000000000000000..3c767eda961030e3eb840c79a45b29469b1c861e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/center_heads.py @@ -0,0 +1,605 @@ +# Copyright (c) Youngwan Lee (ETRI) All Rights Reserved. +import torch +from torch import nn +from typing import Dict, List, Optional, Tuple, Union +import numpy as np + +from detectron2.modeling.roi_heads import ( + ROI_HEADS_REGISTRY, +) +from detectron2.structures import Boxes, Instances, pairwise_iou, ImageList +from detectron2.utils.events import get_event_storage +from detectron2.modeling.matcher import Matcher +from detectron2.modeling.sampling import subsample_labels +from detectron2.layers import ShapeSpec +# from detectron2.modeling.roi_heads.keypoint_head import build_keypoint_head +from .keypoint_head import build_keypoint_head + + +from .mask_head import build_mask_head, mask_rcnn_loss, mask_rcnn_inference +from .maskiou_head import build_maskiou_head, mask_iou_loss, mask_iou_inference +from .proposal_utils import add_ground_truth_to_proposals +from .pooler import ROIPooler + +import datetime + +import pdb +__all__ = ["CenterROIHeads"] + + +def select_foreground_proposals(proposals, bg_label): + """ + Given a list of N Instances (for N images), each containing a `gt_classes` field, + return a list of Instances that contain only instances with `gt_classes != -1 && + gt_classes != bg_label`. + + Args: + proposals (list[Instances]): A list of N Instances, where N is the number of + images in the batch. + bg_label: label index of background class. + + Returns: + list[Instances]: N Instances, each contains only the selected foreground instances. + list[Tensor]: N boolean vector, correspond to the selection mask of + each Instances object. True for selected instances. + """ + assert isinstance(proposals, (list, tuple)) + assert isinstance(proposals[0], Instances) + assert proposals[0].has("gt_classes") + fg_proposals = [] + fg_selection_masks = [] + for proposals_per_image in proposals: + gt_classes = proposals_per_image.gt_classes + fg_selection_mask = (gt_classes != -1) & (gt_classes != bg_label) + fg_idxs = fg_selection_mask.nonzero().squeeze(1) + fg_proposals.append(proposals_per_image[fg_idxs]) + fg_selection_masks.append(fg_selection_mask) + return fg_proposals, fg_selection_masks + +##edit by zsc: add code to provide the dynastic shape +def select_foreground_proposals_fix_shape( proposals, bg_label): + """ + Given a list of N Instances (for N images), each containing a `gt_classes` field, + return a list of Instances that contain instances with `gt_classes != -1 && + gt_classes != bg_label` . + In order to solve the task of dynasmic shape, we set the max instances per image + is 128 and fix the shape problem. + + Args: + proposals (list[Instances]): A list of N Instances, where N is the number of + images in the batch. + bg_label: label index of background class. + + Returns: + list[Instances]: N Instances, each contains only the selected foreground instances. + list[Tensor]: N boolean vector, correspond to the selection mask of + each Instances object. True for selected instances. + """ + assert isinstance(proposals, (list, tuple)) + assert isinstance(proposals[0], Instances) + assert proposals[0].has("gt_classes") + fg_proposals = [] + fg_selection_masks = torch.empty(0).bool() + for proposals_per_image in proposals: + gt_classes = proposals_per_image.gt_classes + fg_selection_masks = fg_selection_masks.to(gt_classes.device) + fg_selection_mask = (gt_classes != -1) & (gt_classes != bg_label) + fg_idxs = fg_selection_mask.nonzero().squeeze(1) + fix_num = 128 #128 is the max instances + pos_idxs = torch.zeros(fix_num, device = fg_selection_mask.device).long() + weight_mask = pos_idxs.bool().scatter(0,fg_idxs,(fg_idxs+1).bool()) + pos_idxs = pos_idxs.scatter(0,fg_idxs,fg_idxs) + + fg_proposals.append(proposals_per_image[pos_idxs]) + fg_selection_masks = torch.cat((fg_selection_masks,weight_mask),0) + return fg_proposals, fg_selection_masks +###end + + +def select_proposals_with_visible_keypoints(proposals: List[Instances]) -> List[Instances]: + """ + Args: + proposals (list[Instances]): a list of N Instances, where N is the + number of images. + + Returns: + proposals: only contains proposals with at least one visible keypoint. + + Note that this is still slightly different from Detectron. + In Detectron, proposals for training keypoint head are re-sampled from + all the proposals with IOU>threshold & >=1 visible keypoint. + + Here, the proposals are first sampled from all proposals with + IOU>threshold, then proposals with no visible keypoint are filtered out. + This strategy seems to make no difference on Detectron and is easier to implement. + """ + ret = [] + all_num_fg = [] + for proposals_per_image in proposals: + # If empty/unannotated image (hard negatives), skip filtering for train + if len(proposals_per_image) == 0: + ret.append(proposals_per_image) + continue + gt_keypoints = proposals_per_image.gt_keypoints.tensor + # #fg x K x 3 + vis_mask = gt_keypoints[:, :, 2] >= 1 + xs, ys = gt_keypoints[:, :, 0], gt_keypoints[:, :, 1] + proposal_boxes = proposals_per_image.proposal_boxes.tensor.unsqueeze(dim=1) # #fg x 1 x 4 + kp_in_box = ( + (xs >= proposal_boxes[:, :, 0]) + & (xs <= proposal_boxes[:, :, 2]) + & (ys >= proposal_boxes[:, :, 1]) + & (ys <= proposal_boxes[:, :, 3]) + ) + selection = (kp_in_box & vis_mask).any(dim=1) + selection_idxs = torch.nonzero(selection).squeeze(1) + all_num_fg.append(selection_idxs.numel()) + ret.append(proposals_per_image[selection_idxs]) + + # storage = get_event_storage() + # storage.put_scalar("keypoint_head/num_fg_samples", np.mean(all_num_fg)) + return ret + + +class ROIHeads(nn.Module): + """ + ROIHeads perform all per-region computation in an R-CNN. + + It contains logic of cropping the regions, extract per-region features, + and make per-region predictions. + + It can have many variants, implemented as subclasses of this class. + """ + + def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): + super(ROIHeads, self).__init__() + + # fmt: off + self.batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE + self.positive_sample_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION + self.test_score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST + self.test_nms_thresh = cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST + self.test_detections_per_img = cfg.TEST.DETECTIONS_PER_IMAGE + self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES + self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES + self.proposal_append_gt = cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT + self.feature_strides = {k: v.stride for k, v in input_shape.items()} + self.feature_channels = {k: v.channels for k, v in input_shape.items()} + # fmt: on + + # Matcher to assign box proposals to gt boxes + self.proposal_matcher = Matcher( + cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS, + cfg.MODEL.ROI_HEADS.IOU_LABELS, + allow_low_quality_matches=False, + ) + + def _sample_proposals(self, matched_idxs, matched_labels, gt_classes): + """ + Based on the matching between N proposals and M groundtruth, + sample the proposals and set their classification labels. + + Args: + matched_idxs (Tensor): a vector of length N, each is the best-matched + gt index in [0, M) for each proposal. + matched_labels (Tensor): a vector of length N, the matcher's label + (one of cfg.MODEL.ROI_HEADS.IOU_LABELS) for each proposal. + gt_classes (Tensor): a vector of length M. + + Returns: + Tensor: a vector of indices of sampled proposals. Each is in [0, N). + Tensor: a vector of the same length, the classification label for + each sampled proposal. Each sample is labeled as either a category in + [0, num_classes) or the background (num_classes). + """ + device = gt_classes.device + has_gt = gt_classes.numel() > 0 + # Get the corresponding GT for each proposal + + if has_gt: + gt_classes = gt_classes[matched_idxs] + # Label unmatched proposals (0 label from matcher) as background (label=num_classes) + gt_classes[matched_labels == 0] = self.num_classes + # Label ignore proposals (-1 label) + gt_classes[matched_labels == -1] = -1 + else: + gt_classes = torch.zeros_like(matched_idxs) + self.num_classes + + sampled_fg_idxs, sampled_bg_idxs = subsample_labels( + gt_classes, self.batch_size_per_image, self.positive_sample_fraction, self.num_classes + ) + + sampled_idxs = torch.cat([sampled_fg_idxs.cpu(), sampled_bg_idxs.cpu()], dim=0).to(device).long() + + return sampled_idxs, gt_classes[sampled_idxs] + + @torch.no_grad() + def label_and_sample_proposals(self, proposals, targets): + """ + Prepare some proposals to be used to train the ROI heads. + It performs box matching between `proposals` and `targets`, and assigns + training labels to the proposals. + It returns ``self.batch_size_per_image`` random samples from proposals and groundtruth + boxes, with a fraction of positives that is no larger than + ``self.positive_sample_fraction``. + + Args: + See :meth:`ROIHeads.forward` + + Returns: + list[Instances]: + length `N` list of `Instances`s containing the proposals + sampled for training. Each `Instances` has the following fields: + + - proposal_boxes: the proposal boxes + - gt_boxes: the ground-truth box that the proposal is assigned to + (this is only meaningful if the proposal has a label > 0; if label = 0 + then the ground-truth box is random) + + Other fields such as "gt_classes", "gt_masks", that's included in `targets`. + """ + # ywlee for using targets.gt_classes + # in add_ground_truth_to_proposal() + # gt_boxes = [x.gt_boxes for x in targets] + + # Augment proposals with ground-truth boxes. + # In the case of learned proposals (e.g., RPN), when training starts + # the proposals will be low quality due to random initialization. + # It's possible that none of these initial + # proposals have high enough overlap with the gt objects to be used + # as positive examples for the second stage components (box head, + # cls head, mask head). Adding the gt boxes to the set of proposals + # ensures that the second stage components will have some positive + # examples from the start of training. For RPN, this augmentation improves + # convergence and empirically improves box AP on COCO by about 0.5 + # points (under one tested configuration). + # import pdb;pdb.set_trace() + if self.proposal_append_gt: + proposals = add_ground_truth_to_proposals(targets, proposals) + + proposals_with_gt = [] + + num_fg_samples = [] + num_bg_samples = [] + for proposals_per_image, targets_per_image in zip(proposals, targets): + # import pdb;pdb.set_trace() + has_gt = len(targets_per_image) > 0 + match_quality_matrix = pairwise_iou( + targets_per_image.gt_boxes, proposals_per_image.proposal_boxes + ) + # martix = torch.load("/home/zsclzy/centermask_npu2/martix.pt") + # matched_idxs_new, matched_labels_new = self.proposal_matcher(martix) + matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix) + # import pdb;pdb.set_trace() + sampled_idxs, gt_classes = self._sample_proposals( + matched_idxs, matched_labels, targets_per_image.gt_classes + ) + + # Set target attributes of the sampled proposals: + proposals_per_image = proposals_per_image[sampled_idxs] + proposals_per_image.gt_classes = gt_classes + + # We index all the attributes of targets that start with "gt_" + # and have not been added to proposals yet (="gt_classes"). + if has_gt: + sampled_targets = matched_idxs[sampled_idxs] + # NOTE: here the indexing waste some compute, because heads + # like masks, keypoints, etc, will filter the proposals again, + # (by foreground/background, or number of keypoints in the image, etc) + # so we essentially index the data twice. + for (trg_name, trg_value) in targets_per_image.get_fields().items(): + if trg_name.startswith("gt_") and not proposals_per_image.has(trg_name): + proposals_per_image.set(trg_name, trg_value[sampled_targets]) + else: + gt_boxes = Boxes( + targets_per_image.gt_boxes.tensor.new_zeros((len(sampled_idxs), 4)) + ) + proposals_per_image.gt_boxes = gt_boxes + + num_bg_samples.append((gt_classes == self.num_classes).sum().item()) + num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) + proposals_with_gt.append(proposals_per_image) + + # Log the number of fg/bg samples that are selected for training ROI heads + # storage = get_event_storage() + # storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) + # storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) + + return proposals_with_gt + + def forward(self, images, features, proposals, targets=None): + """ + Args: + images (ImageList): + features (dict[str: Tensor]): input data as a mapping from feature + map name to tensor. Axis 0 represents the number of images `N` in + the input data; axes 1-3 are channels, height, and width, which may + vary between feature maps (e.g., if a feature pyramid is used). + proposals (list[Instances]): length `N` list of `Instances`s. The i-th + `Instances` contains object proposals for the i-th input image, + with fields "proposal_boxes" and "objectness_logits". + targets (list[Instances], optional): length `N` list of `Instances`s. The i-th + `Instances` contains the ground-truth per-instance annotations + for the i-th input image. Specify `targets` during training only. + It may have the following fields: + + - gt_boxes: the bounding box of each instance. + - gt_classes: the label for each instance with a category ranging in [0, #class]. + - gt_masks: PolygonMasks or BitMasks, the ground-truth masks of each instance. + - gt_keypoints: NxKx3, the groud-truth keypoints for each instance. + + Returns: + results (list[Instances]): length `N` list of `Instances`s containing the + detected instances. Returned during inference only; may be [] during training. + + losses (dict[str->Tensor]): + mapping from a named loss to a tensor storing the loss. Used during training only. + """ + raise NotImplementedError() + + + + +@ROI_HEADS_REGISTRY.register() +class CenterROIHeads(ROIHeads): + """ + It's "standard" in a sense that there is no ROI transform sharing + or feature sharing between tasks. + The cropped rois go to separate branches masks directly. + This way, it is easier to make separate abstractions for different branches. + + This class is used by most models, such as FPN and C5. + To implement more models, you can subclass it and implement a different + :meth:`forward()` or a head. + """ + + def __init__(self, cfg, input_shape): + super(CenterROIHeads, self).__init__(cfg, input_shape) + self._init_mask_head(cfg) + self._init_mask_iou_head(cfg) + self._init_keypoint_head(cfg, input_shape) + + + def _init_mask_head(self, cfg): + # fmt: off + self.mask_on = cfg.MODEL.MASK_ON + if not self.mask_on: + return + pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION + pooler_scales = tuple(1.0 / self.feature_strides[k] for k in self.in_features) + sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO + pooler_type = cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE + assign_crit = cfg.MODEL.ROI_MASK_HEAD.ASSIGN_CRITERION + + # fmt: on + + in_channels = [self.feature_channels[f] for f in self.in_features][0] + + self.mask_pooler = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + assign_crit=assign_crit, + ) + self.mask_head = build_mask_head( + cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) + ) + + + def _init_mask_iou_head(self, cfg): + # fmt: off + self.maskiou_on = cfg.MODEL.MASKIOU_ON + if not self.maskiou_on: + return + in_channels = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM + pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION + self.maskiou_weight = cfg.MODEL.MASKIOU_LOSS_WEIGHT + + # fmt : on + + self.maskiou_head = build_maskiou_head( + cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) + ) + + + def _init_keypoint_head(self, cfg, input_shape): + # fmt: off + self.keypoint_on = cfg.MODEL.KEYPOINT_ON + if not self.keypoint_on: + return + self.kp_in_features = cfg.MODEL.ROI_KEYPOINT_HEAD.IN_FEATURES + pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION + pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.kp_in_features) # noqa + sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO + pooler_type = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE + assign_crit = cfg.MODEL.ROI_KEYPOINT_HEAD.ASSIGN_CRITERION + # fmt: on + + in_channels = [input_shape[f].channels for f in self.kp_in_features][0] + + self.keypoint_pooler = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + assign_crit=assign_crit, + ) + self.keypoint_head = build_keypoint_head( + cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) + ) + + def forward( + self, + images: ImageList, + features: Dict[str, torch.Tensor], + proposals: List[Instances], + targets: Optional[List[Instances]] = None, + ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]: + """ + See :class:`ROIHeads.forward`. + """ + del images + if self.training: + # print("before label_and_sample_proposals", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')) + proposals = self.label_and_sample_proposals(proposals, targets) + # print("after label_and_sample_proposals", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')) + del targets + + if self.training: + if self.maskiou_on: + # print("before forward_mask", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')) + #import pdb;pdb.set_trace() + losses, mask_features, selected_mask, labels, maskiou_targets, weight = self._forward_mask(features, proposals) + losses.update(self._forward_maskiou(mask_features, proposals, selected_mask, labels, maskiou_targets, weight)) + else: + losses = self._forward_mask(features, proposals) + losses.update(self._forward_keypoint(features, proposals)) + return proposals, losses + else: + # During inference cascaded prediction is used: the mask and keypoints heads are only + # applied to the top scoring box detections. + pred_instances = self.forward_with_given_boxes(features, proposals) + return pred_instances, {} + + def forward_with_given_boxes( + self, features: Dict[str, torch.Tensor], instances: List[Instances] + ) -> List[Instances]: + """ + Use the given boxes in `instances` to produce other (non-box) per-ROI outputs. + + This is useful for downstream tasks where a box is known, but need to obtain + other attributes (outputs of other heads). + Test-time augmentation also uses this. + + Args: + features: same as in `forward()` + instances (list[Instances]): instances to predict other outputs. Expect the keys + "pred_boxes" and "pred_classes" to exist. + + Returns: + instances (list[Instances]): + the same `Instances` objects, with extra + fields such as `pred_masks` or `pred_keypoints`. + """ + assert not self.training + assert instances[0].has("pred_boxes") and instances[0].has("pred_classes") + + if self.maskiou_on: + instances, mask_features = self._forward_mask(features, instances) + instances = self._forward_maskiou(mask_features, instances) + else: + instances = self._forward_mask(features, instances) + + instances = self._forward_keypoint(features, instances) + + return instances + + + def _forward_mask( + self, features: Dict[str, torch.Tensor], instances: List[Instances] + ) -> Union[Dict[str, torch.Tensor], List[Instances]]: + """ + Forward logic of the mask prediction branch. + + Args: + features (dict[str, Tensor]): mapping from feature map names to tensor. + Same as in :meth:`ROIHeads.forward`. + instances (list[Instances]): the per-image instances to train/predict masks. + In training, they can be the proposals. + In inference, they can be the predicted boxes. + + Returns: + In training, a dict of losses. + In inference, update `instances` with new fields "pred_masks" and return it. + """ + if not self.mask_on: + return {} if self.training else instances + + features = [features[f] for f in self.in_features] + + if self.training: + # The loss is only defined on positive proposals. + proposals, weight = select_foreground_proposals_fix_shape(instances, self.num_classes) + # proposal_boxes = [x.proposal_boxes for x in proposals] + mask_features = self.mask_pooler(features, proposals, self.training) + mask_logits = self.mask_head(mask_features) + if self.maskiou_on: + loss, selected_mask, labels, maskiou_targets = mask_rcnn_loss(mask_logits, proposals, self.maskiou_on, weight) + return {"loss_mask": loss}, mask_features, selected_mask, labels, maskiou_targets, weight + else: + return {"loss_mask": mask_rcnn_loss(mask_logits, proposals, self.maskiou_on)} + else: + # pred_boxes = [x.pred_boxes for x in instances] + mask_features = self.mask_pooler(features, instances) + mask_logits = self.mask_head(mask_features) + mask_rcnn_inference(mask_logits, instances) + + if self.maskiou_on: + return instances, mask_features + else: + return instances + + + def _forward_maskiou(self, mask_features, instances, selected_mask=None, labels=None, maskiou_targets=None, weight = None): + """ + Forward logic of the mask iou prediction branch. + Args: + features (list[Tensor]): #level input features for mask prediction + instances (list[Instances]): the per-image instances to train/predict masks. + In training, they can be the proposals. + In inference, they can be the predicted boxes. + Returns: + In training, a dict of losses. + In inference, calibrate instances' scores. + """ + if not self.maskiou_on: + return {} if self.training else instances + + if self.training: + pred_maskiou = self.maskiou_head(mask_features, selected_mask) + return {"loss_maskiou": mask_iou_loss(labels, pred_maskiou, maskiou_targets, self.maskiou_weight, weight)} + + else: + selected_mask = torch.cat([i.pred_masks for i in instances], 0) + if selected_mask.shape[0] == 0: + return instances + pred_maskiou = self.maskiou_head(mask_features, selected_mask) + mask_iou_inference(instances, pred_maskiou) + return instances + + + def _forward_keypoint( + self, features: Dict[str, torch.Tensor], instances: List[Instances] + ) -> Union[Dict[str, torch.Tensor], List[Instances]]: + """ + Forward logic of the keypoint prediction branch. + + Args: + features (dict[str, Tensor]): mapping from feature map names to tensor. + Same as in :meth:`ROIHeads.forward`. + instances (list[Instances]): the per-image instances to train/predict keypoints. + In training, they can be the proposals. + In inference, they can be the predicted boxes. + + Returns: + In training, a dict of losses. + In inference, update `instances` with new fields "pred_keypoints" and return it. + """ + if not self.keypoint_on: + return {} if self.training else instances + + features = [features[f] for f in self.kp_in_features] + + if self.training: + # The loss is defined on positive proposals with at >=1 visible keypoints. + proposals, _ = select_foreground_proposals(instances, self.num_classes) + proposals = select_proposals_with_visible_keypoints(proposals) + # proposal_boxes = [x.proposal_boxes for x in proposals] + + keypoint_features = self.keypoint_pooler(features, proposals, self.training) + return self.keypoint_head(keypoint_features, proposals) + else: + # pred_boxes = [x.pred_boxes for x in instances] + keypoint_features = self.keypoint_pooler(features, instances) + return self.keypoint_head(keypoint_features, instances) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/keypoint_head.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/keypoint_head.py new file mode 100644 index 0000000000000000000000000000000000000000..bd31aebbed60c2a2b9da0b54df72f2b0f31e3a28 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/keypoint_head.py @@ -0,0 +1,224 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +from typing import List +import torch +from torch import nn +from torch.nn import functional as F + +from detectron2.layers import Conv2d, ConvTranspose2d, ShapeSpec, cat, interpolate +from detectron2.structures import Instances, heatmaps_to_keypoints +from detectron2.utils.events import get_event_storage +from detectron2.utils.registry import Registry + +_TOTAL_SKIPPED = 0 + +ROI_KEYPOINT_HEAD_REGISTRY = Registry("ROI_KEYPOINT_HEAD") +ROI_KEYPOINT_HEAD_REGISTRY.__doc__ = """ +Registry for keypoint heads, which make keypoint predictions from per-region features. + +The registered object will be called with `obj(cfg, input_shape)`. +""" + + +def build_keypoint_head(cfg, input_shape): + """ + Build a keypoint head from `cfg.MODEL.ROI_KEYPOINT_HEAD.NAME`. + """ + name = cfg.MODEL.ROI_KEYPOINT_HEAD.NAME + return ROI_KEYPOINT_HEAD_REGISTRY.get(name)(cfg, input_shape) + + +def keypoint_rcnn_loss(pred_keypoint_logits, instances, normalizer): + """ + Arguments: + pred_keypoint_logits (Tensor): A tensor of shape (N, K, S, S) where N is the total number + of instances in the batch, K is the number of keypoints, and S is the side length + of the keypoint heatmap. The values are spatial logits. + instances (list[Instances]): A list of M Instances, where M is the batch size. + These instances are predictions from the model + that are in 1:1 correspondence with pred_keypoint_logits. + Each Instances should contain a `gt_keypoints` field containing a `structures.Keypoint` + instance. + normalizer (float): Normalize the loss by this amount. + If not specified, we normalize by the number of visible keypoints in the minibatch. + + Returns a scalar tensor containing the loss. + """ + heatmaps = [] + valid = [] + + keypoint_side_len = pred_keypoint_logits.shape[2] + for instances_per_image in instances: + if len(instances_per_image) == 0: + continue + keypoints = instances_per_image.gt_keypoints + heatmaps_per_image, valid_per_image = keypoints.to_heatmap( + instances_per_image.proposal_boxes.tensor, keypoint_side_len + ) + heatmaps.append(heatmaps_per_image.view(-1)) + valid.append(valid_per_image.view(-1)) + + if len(heatmaps): + keypoint_targets = cat(heatmaps, dim=0) + valid = cat(valid, dim=0).to(dtype=torch.uint8) + valid = torch.nonzero(valid).squeeze(1) + + # torch.mean (in binary_cross_entropy_with_logits) doesn't + # accept empty tensors, so handle it separately + if len(heatmaps) == 0 or valid.numel() == 0: + global _TOTAL_SKIPPED + _TOTAL_SKIPPED += 1 + storage = get_event_storage() + storage.put_scalar("kpts_num_skipped_batches", _TOTAL_SKIPPED, smoothing_hint=False) + return pred_keypoint_logits.sum() * 0 + + N, K, H, W = pred_keypoint_logits.shape + pred_keypoint_logits = pred_keypoint_logits.view(N * K, H * W) + + keypoint_loss = F.cross_entropy( + pred_keypoint_logits[valid], keypoint_targets[valid], reduction="sum" + ) + + # If a normalizer isn't specified, normalize by the number of visible keypoints in the minibatch + if normalizer is None: + normalizer = valid.numel() + keypoint_loss /= normalizer + + return keypoint_loss + + +def keypoint_rcnn_inference(pred_keypoint_logits, pred_instances): + """ + Post process each predicted keypoint heatmap in `pred_keypoint_logits` into (x, y, score) + and add it to the `pred_instances` as a `pred_keypoints` field. + + Args: + pred_keypoint_logits (Tensor): A tensor of shape (R, K, S, S) where R is the total number + of instances in the batch, K is the number of keypoints, and S is the side length of + the keypoint heatmap. The values are spatial logits. + pred_instances (list[Instances]): A list of N Instances, where N is the number of images. + + Returns: + None. Each element in pred_instances will contain an extra "pred_keypoints" field. + The field is a tensor of shape (#instance, K, 3) where the last + dimension corresponds to (x, y, score). + The scores are larger than 0. + """ + # flatten all bboxes from all images together (list[Boxes] -> Rx4 tensor) + bboxes_flat = cat([b.pred_boxes.tensor for b in pred_instances], dim=0) + + keypoint_results = heatmaps_to_keypoints(pred_keypoint_logits.detach(), bboxes_flat.detach()) + num_instances_per_image = [len(i) for i in pred_instances] + keypoint_results = keypoint_results[:, :, [0, 1, 3]].split(num_instances_per_image, dim=0) + + for keypoint_results_per_image, instances_per_image in zip(keypoint_results, pred_instances): + # keypoint_results_per_image is (num instances)x(num keypoints)x(x, y, score) + instances_per_image.pred_keypoints = keypoint_results_per_image + + +class BaseKeypointRCNNHead(nn.Module): + """ + Implement the basic Keypoint R-CNN losses and inference logic. + """ + + def __init__(self, cfg, input_shape): + super().__init__() + # fmt: off + self.loss_weight = cfg.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT + self.normalize_by_visible_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS # noqa + self.num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS + batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE + positive_sample_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION + # fmt: on + self.normalizer_per_img = ( + self.num_keypoints * batch_size_per_image * positive_sample_fraction + ) + + def forward(self, x, instances: List[Instances]): + """ + Args: + x: input region feature(s) provided by :class:`ROIHeads`. + instances (list[Instances]): contains the boxes & labels corresponding + to the input features. + Exact format is up to its caller to decide. + Typically, this is the foreground instances in training, with + "proposal_boxes" field and other gt annotations. + In inference, it contains boxes that are already predicted. + + Returns: + A dict of losses if in training. The predicted "instances" if in inference. + """ + x = self.layers(x) + if self.training: + num_images = len(instances) + normalizer = ( + None + if self.normalize_by_visible_keypoints + else num_images * self.normalizer_per_img + ) + return { + "loss_keypoint": keypoint_rcnn_loss(x, instances, normalizer=normalizer) + * self.loss_weight + } + else: + keypoint_rcnn_inference(x, instances) + return instances + + def layers(self, x): + """ + Neural network layers that makes predictions from regional input features. + """ + raise NotImplementedError + + +@ROI_KEYPOINT_HEAD_REGISTRY.register() +class KRCNNConvDeconvUpsampleHead(BaseKeypointRCNNHead): + """ + A standard keypoint head containing a series of 3x3 convs, followed by + a transpose convolution and bilinear interpolation for upsampling. + """ + + def __init__(self, cfg, input_shape: ShapeSpec): + """ + The following attributes are parsed from config: + conv_dims: an iterable of output channel counts for each conv in the head + e.g. (512, 512, 512) for three convs outputting 512 channels. + num_keypoints: number of keypoint heatmaps to predicts, determines the number of + channels in the final output. + """ + super().__init__(cfg, input_shape) + + # fmt: off + # default up_scale to 2 (this can eventually be moved to config) + up_scale = 2 + conv_dims = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS + num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS + in_channels = input_shape.channels + # fmt: on + + self.blocks = [] + for idx, layer_channels in enumerate(conv_dims, 1): + module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1) + self.add_module("conv_fcn{}".format(idx), module) + self.blocks.append(module) + in_channels = layer_channels + + deconv_kernel = 4 + self.score_lowres = ConvTranspose2d( + in_channels, num_keypoints, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1 + ) + self.up_scale = up_scale + + for name, param in self.named_parameters(): + if "bias" in name: + nn.init.constant_(param, 0) + elif "weight" in name: + # Caffe2 implementation uses MSRAFill, which in fact + # corresponds to kaiming_normal_ in PyTorch + nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") + + def layers(self, x): + for layer in self.blocks: + x = F.relu(layer(x)) + x = self.score_lowres(x) + x = interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False) + return x diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/mask_head.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/mask_head.py new file mode 100644 index 0000000000000000000000000000000000000000..3b0be57f8116ef59418aaf9b5dd97b58626c11d1 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/mask_head.py @@ -0,0 +1,299 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Modified by Sangrok Lee and Youngwan Lee (ETRI), 2020. All Rights Reserved. +import fvcore.nn.weight_init as weight_init +import torch +from torch import nn +from torch.nn import functional as F +import numpy as np +from typing import List +import copy +import pycocotools.mask as mask_utils + +from detectron2.layers import Conv2d, ConvTranspose2d, ShapeSpec, cat, get_norm +from detectron2.utils.events import get_event_storage +from detectron2.utils.registry import Registry +from detectron2.structures.masks import PolygonMasks + +ROI_MASK_HEAD_REGISTRY = Registry("ROI_MASK_HEAD") +ROI_MASK_HEAD_REGISTRY.__doc__ = """ +Registry for mask heads, which predicts instance masks given +per-region features. + +The registered object will be called with `obj(cfg, input_shape)`. +""" + + +def crop(polygons: List[List[np.ndarray]], boxes: torch.Tensor) -> "PolygonMasks": + boxes = boxes.to(torch.device("cpu")).numpy() + results = [ + _crop(polygon, box) for polygon, box in zip(polygons, boxes) + ] + + return PolygonMasks(results) + + +def _crop(polygons: np.ndarray, box: np.ndarray) -> List[np.ndarray]: + w, h = box[2] - box[0], box[3] - box[1] + + polygons = copy.deepcopy(polygons) + for p in polygons: + p[0::2] = p[0::2] - box[0] # .clamp(min=0, max=w) + p[1::2] = p[1::2] - box[1] # .clamp(min=0, max=h) + + return polygons + + +def mask_rcnn_loss(pred_mask_logits, instances, maskiou_on, weight = None): + """ + Compute the mask prediction loss defined in the Mask R-CNN paper. + + Args: + pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask) + for class-specific or class-agnostic, where B is the total number of predicted masks + in all images, C is the number of foreground classes, and Hmask, Wmask are the height + and width of the mask predictions. The values are logits. + instances (list[Instances]): A list of N Instances, where N is the number of images + in the batch. These instances are in 1:1 + correspondence with the pred_mask_logits. The ground-truth labels (class, box, mask, + ...) associated with each instance are stored in fields. + + Returns: + mask_loss (Tensor): A scalar tensor containing the loss. + """ + cls_agnostic_mask = pred_mask_logits.size(1) == 1 + total_num_masks = pred_mask_logits.size(0) + mask_side_len = pred_mask_logits.size(2) + assert pred_mask_logits.size(2) == pred_mask_logits.size(3), "Mask prediction must be square!" + + gt_classes = [] + gt_masks = [] + mask_ratios = [] + for instances_per_image in instances: + if len(instances_per_image) == 0: + continue + + if not cls_agnostic_mask: + gt_classes_per_image = instances_per_image.gt_classes.to(dtype=torch.int64) + gt_classes.append(gt_classes_per_image) + + if maskiou_on: + cropped_mask = crop(instances_per_image.gt_masks.polygons, instances_per_image.proposal_boxes.tensor) + cropped_mask = torch.tensor( + [mask_utils.area(mask_utils.frPyObjects([p for p in obj], box[3]-box[1], box[2]-box[0])).sum().astype(float) + for obj, box in zip(cropped_mask.polygons, instances_per_image.proposal_boxes.tensor)] + ) + + # lzy 12.20 when use func clamp,it need the cpu tensor + # mask_ratios.append( + # (cropped_mask / instances_per_image.gt_masks.area()) + # .to(device=pred_mask_logits.device).clamp(min=0., max=1.) + # ) + unNamedComputeCputensor = (cropped_mask/ instances_per_image.gt_masks.area()).to('cpu').clamp(min=0.,max=1.) + mask_ratios.append(torch.tensor(unNamedComputeCputensor,dtype=torch.float32).to(device=pred_mask_logits.device)) + # end transform + gt_masks_per_image = instances_per_image.gt_masks.crop_and_resize( + instances_per_image.proposal_boxes.tensor, mask_side_len + ).to(device=pred_mask_logits.device) + # A tensor of shape (N, M, M), N=#instances in the image; M=mask_side_len + gt_masks.append(gt_masks_per_image) + + #gt_classes = cat(gt_classes, dim=0) + + if len(gt_masks) == 0: + gt_classes = torch.LongTensor(gt_classes) + if maskiou_on: + selected_index = torch.arange(pred_mask_logits.shape[0], device=pred_mask_logits.device) + if cls_agnostic_mask: + selected_mask = pred_mask_logits[:, 0] + else: + # gt_classes = torch.LongTensor(gt_classes) + selected_mask = pred_mask_logits[selected_index, gt_classes] + mask_num, mask_h, mask_w = selected_mask.shape + selected_mask = selected_mask.reshape(mask_num, 1, mask_h, mask_w) + return pred_mask_logits.sum() * 0, selected_mask, gt_classes, None + + else: + return pred_mask_logits.sum() * 0 + + gt_masks = cat(gt_masks, dim=0) + + if cls_agnostic_mask: + pred_mask_logits = pred_mask_logits[:, 0] + gt_classes = torch.zeros(total_num_masks, dtype=torch.int64) + else: + indices = torch.arange(total_num_masks) + gt_classes = cat(gt_classes, dim=0) #ywlee + pred_mask_logits = pred_mask_logits[indices, gt_classes] # (num_mask, Hmask, Wmask) + + if gt_masks.dtype == torch.bool: + gt_masks_bool = gt_masks + else: + # Here we allow gt_masks to be float as well (depend on the implementation of rasterize()) + gt_masks_bool = gt_masks > 0.5 + + # Log the training accuracy (using gt classes and 0.5 threshold) + mask_incorrect = (pred_mask_logits > 0.0) != gt_masks_bool + mask_accuracy = 1 - (mask_incorrect.sum().item() / max(mask_incorrect.numel(), 1.0)) + num_positive = gt_masks_bool.sum().item() + false_positive = (mask_incorrect & ~gt_masks_bool).sum().item() / max( + gt_masks_bool.numel() - num_positive, 1.0 + ) + false_negative = (mask_incorrect & gt_masks_bool).sum().item() / max(num_positive, 1.0) + + # storage = get_event_storage() + # storage.put_scalar("mask_rcnn/accuracy", mask_accuracy) + # storage.put_scalar("mask_rcnn/false_positive", false_positive) + # storage.put_scalar("mask_rcnn/false_negative", false_negative) + #import pdb; pdb.set_trace() + mask_loss = F.binary_cross_entropy_with_logits( + pred_mask_logits, gt_masks.to(dtype=torch.float32), reduction="none" + ) + if weight != None: + mask_loss = mask_loss.mean(dim=(1,2)) * weight.float() + + mask_loss = mask_loss.sum()/weight.sum() + + #mask_loss = mask_loss.mean() + + if maskiou_on: + #import pdb;pdb.set_trace() + mask_ratios = cat(mask_ratios, dim=0) + + data_type = mask_ratios[0].dtype + value_eps = 1e-10 * torch.ones(gt_masks.shape[0], device=gt_masks.device).to(data_type) + mask_ratios = torch.max(mask_ratios, value_eps) + + pred_masks = pred_mask_logits > 0 + + mask_targets_full_area = gt_masks.sum(dim=[1,2]) / mask_ratios + + mask_ovr_area = (pred_masks * gt_masks).sum(dim=[1,2]).float() + mask_union_area = pred_masks.sum(dim=[1,2]) + mask_targets_full_area - mask_ovr_area + data_type_2 = mask_union_area[0].dtype + value_1 = torch.ones(pred_masks.shape[0], device=gt_masks.device).to(data_type_2) + value_0 = torch.zeros(pred_masks.shape[0], device=gt_masks.device) + + mask_union_area = torch.max(mask_union_area, value_1) + mask_ovr_area = torch.max(mask_ovr_area, value_0) + maskiou_targets = mask_ovr_area / mask_union_area + mask_num, mask_h, mask_w = pred_mask_logits.shape + selected_mask = pred_mask_logits.reshape(mask_num, 1, mask_h, mask_w) + selected_mask = selected_mask.sigmoid() + return mask_loss, selected_mask, gt_classes, maskiou_targets.detach() + else: + return mask_loss + + +def mask_rcnn_inference(pred_mask_logits, pred_instances): + """ + Convert pred_mask_logits to estimated foreground probability masks while also + extracting only the masks for the predicted classes in pred_instances. For each + predicted box, the mask of the same class is attached to the instance by adding a + new "pred_masks" field to pred_instances. + + Args: + pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask) + for class-specific or class-agnostic, where B is the total number of predicted masks + in all images, C is the number of foreground classes, and Hmask, Wmask are the height + and width of the mask predictions. The values are logits. + pred_instances (list[Instances]): A list of N Instances, where N is the number of images + in the batch. Each Instances must have field "pred_classes". + + Returns: + None. pred_instances will contain an extra "pred_masks" field storing a mask of size (Hmask, + Wmask) for predicted class. Note that the masks are returned as a soft (non-quantized) + masks the resolution predicted by the network; post-processing steps, such as resizing + the predicted masks to the original image resolution and/or binarizing them, is left + to the caller. + """ + cls_agnostic_mask = pred_mask_logits.size(1) == 1 + + if cls_agnostic_mask: + mask_probs_pred = pred_mask_logits.sigmoid() + else: + # Select masks corresponding to the predicted classes + num_masks = pred_mask_logits.shape[0] + class_pred = cat([i.pred_classes for i in pred_instances]) + # import pdb;pdb.set_trace() + indices = torch.arange(num_masks, device=class_pred.device).long() + mask_probs_pred = pred_mask_logits[indices, class_pred][:, None].sigmoid() + # mask_probs_pred.shape: (B, 1, Hmask, Wmask) + + num_boxes_per_image = [len(i) for i in pred_instances] + mask_probs_pred = mask_probs_pred.split(num_boxes_per_image, dim=0) + + for prob, instances in zip(mask_probs_pred, pred_instances): + instances.pred_masks = prob # (1, Hmask, Wmask) + + +@ROI_MASK_HEAD_REGISTRY.register() +class MaskRCNNConvUpsampleHead(nn.Module): + """ + A mask head with several conv layers, plus an upsample layer (with `ConvTranspose2d`). + """ + + def __init__(self, cfg, input_shape: ShapeSpec): + """ + The following attributes are parsed from config: + num_conv: the number of conv layers + conv_dim: the dimension of the conv layers + norm: normalization for the conv layers + """ + super(MaskRCNNConvUpsampleHead, self).__init__() + + # fmt: off + num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES + conv_dims = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM + self.norm = cfg.MODEL.ROI_MASK_HEAD.NORM + num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV + input_channels = input_shape.channels + cls_agnostic_mask = cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK + # fmt: on + + self.conv_norm_relus = [] + + for k in range(num_conv): + conv = Conv2d( + input_channels if k == 0 else conv_dims, + conv_dims, + kernel_size=3, + stride=1, + padding=1, + bias=not self.norm, + norm=get_norm(self.norm, conv_dims), + activation=F.relu, + ) + self.add_module("mask_fcn{}".format(k + 1), conv) + self.conv_norm_relus.append(conv) + + self.deconv = ConvTranspose2d( + conv_dims if num_conv > 0 else input_channels, + conv_dims, + kernel_size=2, + stride=2, + padding=0, + ) + + num_mask_classes = 1 if cls_agnostic_mask else num_classes + self.predictor = Conv2d(conv_dims, num_mask_classes, kernel_size=1, stride=1, padding=0) + + for layer in self.conv_norm_relus + [self.deconv]: + weight_init.c2_msra_fill(layer) + # use normal distribution initialization for mask prediction layer + nn.init.normal_(self.predictor.weight, std=0.001) + if self.predictor.bias is not None: + nn.init.constant_(self.predictor.bias, 0) + + def forward(self, x): + for layer in self.conv_norm_relus: + x = layer(x) + x = F.relu(self.deconv(x)) + return self.predictor(x) + + +def build_mask_head(cfg, input_shape): + """ + Build a mask head defined by `cfg.MODEL.ROI_MASK_HEAD.NAME`. + """ + name = cfg.MODEL.ROI_MASK_HEAD.NAME + return ROI_MASK_HEAD_REGISTRY.get(name)(cfg, input_shape) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/maskiou_head.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/maskiou_head.py new file mode 100644 index 0000000000000000000000000000000000000000..01ba265ce1d5bf9dcd4e7518f001c585aa8eacb0 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/maskiou_head.py @@ -0,0 +1,123 @@ +# Copyright (c) Sangrok Lee and Youngwan Lee (ETRI) All Rights Reserved. +import torch +from torch import nn +from torch.nn import functional as F + +from detectron2.layers import Conv2d, ShapeSpec, cat +from detectron2.utils.registry import Registry +from centermask.layers import MaxPool2d, Linear + +ROI_MASKIOU_HEAD_REGISTRY = Registry("ROI_MASKIOU_HEAD") +ROI_MASKIOU_HEAD_REGISTRY.__doc__ = """ +Registry for maskiou heads, which predicts predicted mask iou. + +The registered object will be called with `obj(cfg, input_shape)`. +""" + + +def mask_iou_loss(labels, pred_maskiou, gt_maskiou, loss_weight, weight): + """ + Compute the maskiou loss. + + Args: + labels (Tensor): Given mask labels (num of instance,) + pred_maskiou (Tensor): A tensor of shape (num of instance, C) + gt_maskiou (Tensor): Ground Truth IOU generated in mask head (num of instance,) + """ + def l2_loss(input, target): + """ + very similar to the smooth_l1_loss from pytorch, but with + the extra beta parameter + """ + pos_inds = target > 0.0 + if pos_inds.sum() > 0: + cond = torch.abs(input - target) + loss = 0.5 * cond**2 / pos_inds.sum() + else: + loss = input * 0.0 + return (loss*weight.float()*pos_inds.float()).sum() + + if labels.numel() == 0: + return pred_maskiou.sum() * 0 + + index = torch.arange(pred_maskiou.shape[0]).to(device=pred_maskiou.device) + maskiou_loss = l2_loss(pred_maskiou[index, labels], gt_maskiou) + maskiou_loss = loss_weight * maskiou_loss + + return maskiou_loss + + +def mask_iou_inference(pred_instances, pred_maskiou): + labels = cat([i.pred_classes for i in pred_instances]) + num_masks = pred_maskiou.shape[0] + index = torch.arange(num_masks, device=labels.device).long() + num_boxes_per_image = [len(i) for i in pred_instances] + maskious = pred_maskiou[index, labels].split(num_boxes_per_image, dim=0) + for maskiou, box in zip(maskious, pred_instances): + box.mask_scores = box.scores * maskiou + + +@ROI_MASKIOU_HEAD_REGISTRY.register() +class MaskIoUHead(nn.Module): + def __init__(self, cfg, input_shape: ShapeSpec): + super(MaskIoUHead, self).__init__() + + # fmt: off + num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES + conv_dims = cfg.MODEL.ROI_MASKIOU_HEAD.CONV_DIM + num_conv = cfg.MODEL.ROI_MASKIOU_HEAD.NUM_CONV + input_channels = input_shape.channels + 1 + resolution = input_shape.width // 2 + # fmt: on + + self.conv_relus = [] + stride = 1 + for k in range(num_conv): + if (k+1) == num_conv: + stride = 2 + conv = Conv2d( + input_channels if k == 0 else conv_dims, + conv_dims, + kernel_size=3, + stride=stride, + padding=1, + activation=F.relu + ) + self.add_module("maskiou_fcn{}".format(k+1), conv) + self.conv_relus.append(conv) + self.maskiou_fc1 = Linear(conv_dims*resolution**2, 1024) + self.maskiou_fc2 = Linear(1024, 1024) + self.maskiou = Linear(1024, num_classes) + self.pooling = MaxPool2d(kernel_size=2, stride=2) + + + for l in self.conv_relus: + nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu") + nn.init.constant_(l.bias, 0) + for l in [self.maskiou_fc1, self.maskiou_fc2]: + nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu") + nn.init.constant_(l.bias, 0) + + + nn.init.normal_(self.maskiou.weight, mean=0, std=0.01) + nn.init.constant_(self.maskiou.bias, 0) + + def forward(self, x, mask): + mask_pool = self.pooling(mask) + x = torch.cat((x, mask_pool), 1) + + for layer in self.conv_relus: + x = layer(x) + x = torch.flatten(x, 1) + x = F.relu(self.maskiou_fc1(x)) + x = F.relu(self.maskiou_fc2(x)) + x = self.maskiou(x) + return x + + +def build_maskiou_head(cfg, input_shape): + """ + Build a mask iou head defined by `cfg.MODEL.ROI_MASKIOU_HEAD.NAME`. + """ + name = cfg.MODEL.ROI_MASKIOU_HEAD.NAME + return ROI_MASKIOU_HEAD_REGISTRY.get(name)(cfg, input_shape) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/pooler.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/pooler.py new file mode 100644 index 0000000000000000000000000000000000000000..ed3c59ec25a7c6b2fb274a77e5272ddba920c339 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/pooler.py @@ -0,0 +1,310 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Modified by Youngwan Lee (ETRI), 2020. All Rights Reserved. +import math +import sys +import torch +from torch import nn +from torchvision.ops import RoIPool + +from detectron2.layers import ROIAlign, ROIAlignRotated, cat +from detectron2.modeling.poolers import ( + convert_boxes_to_pooler_format, assign_boxes_to_levels +) + +def _img_area(instance): + + device = instance.pred_classes.device + image_size = instance.image_size + area = torch.as_tensor(image_size[0] * image_size[1], dtype=torch.float, device=device) + tmp = torch.zeros((len(instance.pred_classes), 1), dtype=torch.float, device=device) + + return (area + tmp).squeeze(1) + + +def assign_boxes_to_levels_by_ratio(instances, min_level, max_level, is_train=False): + """ + Map each box in `instances` to a feature map level index by adaptive ROI mapping function + in CenterMask paper and return the assignment + vector. + + Args: + instances (list[Instances]): the per-image instances to train/predict masks. + min_level (int): Smallest feature map level index. The input is considered index 0, + the output of stage 1 is index 1, and so. + max_level (int): Largest feature map level index. + + Returns: + A tensor of length M, where M is the total number of boxes aggregated over all + N batch images. The memory layout corresponds to the concatenation of boxes + from all images. Each element is the feature map index, as an offset from + `self.min_level`, for the corresponding box (so value i means the box is at + `self.min_level + i`). + """ + eps = sys.float_info.epsilon + if is_train: + box_lists = [x.proposal_boxes for x in instances] + else: + box_lists = [x.pred_boxes for x in instances] + box_areas = cat([boxes.area() for boxes in box_lists]) + img_areas = cat([_img_area(instance_i) for instance_i in instances]) + + # Eqn.(2) in the CenterMask paper + level_assignments = torch.ceil( + max_level - torch.log2(img_areas / box_areas + eps) + ) + + # clamp level to (min, max), in case the box size is too large or too small + # for the available feature maps + level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level) + return level_assignments.to(torch.int64) - min_level + + +def assign_boxes_to_levels(box_lists, min_level, max_level, canonical_box_size, canonical_level): + """ + Map each box in `box_lists` to a feature map level index and return the assignment + vector. + + Args: + box_lists (list[Boxes] | list[RotatedBoxes]): A list of N Boxes or N RotatedBoxes, + where N is the number of images in the batch. + min_level (int): Smallest feature map level index. The input is considered index 0, + the output of stage 1 is index 1, and so. + max_level (int): Largest feature map level index. + canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). + canonical_level (int): The feature map level index on which a canonically-sized box + should be placed. + + Returns: + A tensor of length M, where M is the total number of boxes aggregated over all + N batch images. The memory layout corresponds to the concatenation of boxes + from all images. Each element is the feature map index, as an offset from + `self.min_level`, for the corresponding box (so value i means the box is at + `self.min_level + i`). + """ + eps = sys.float_info.epsilon + box_sizes = torch.sqrt(cat([boxes.area() for boxes in box_lists])) + # Eqn.(1) in FPN paper + level_assignments = torch.floor( + canonical_level + torch.log2(box_sizes / canonical_box_size + eps) + ) + # clamp level to (min, max), in case the box size is too large or too small + # for the available feature maps + level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level) + return level_assignments.to(torch.int64) - min_level + + +def convert_boxes_to_pooler_format(box_lists): + """ + Convert all boxes in `box_lists` to the low-level format used by ROI pooling ops + (see description under Returns). + + Args: + box_lists (list[Boxes] | list[RotatedBoxes]): + A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch. + + Returns: + When input is list[Boxes]: + A tensor of shape (M, 5), where M is the total number of boxes aggregated over all + N batch images. + The 5 columns are (batch index, x0, y0, x1, y1), where batch index + is the index in [0, N) identifying which batch image the box with corners at + (x0, y0, x1, y1) comes from. + When input is list[RotatedBoxes]: + A tensor of shape (M, 6), where M is the total number of boxes aggregated over all + N batch images. + The 6 columns are (batch index, x_ctr, y_ctr, width, height, angle_degrees), + where batch index is the index in [0, N) identifying which batch image the + rotated box (x_ctr, y_ctr, width, height, angle_degrees) comes from. + """ + + def fmt_box_list(box_tensor, batch_index): + repeated_index = torch.full( + (len(box_tensor), 1), batch_index, dtype=box_tensor.dtype, device=box_tensor.device + ) + return cat((repeated_index, box_tensor), dim=1) + + pooler_fmt_boxes = cat( + [fmt_box_list(box_list.tensor, i) for i, box_list in enumerate(box_lists)], dim=0 + ) + + return pooler_fmt_boxes + + +class ROIPooler(nn.Module): + """ + Region of interest feature map pooler that supports pooling from one or more + feature maps. + """ + + def __init__( + self, + output_size, + scales, + sampling_ratio, + pooler_type, + canonical_box_size=224, + canonical_level=4, + assign_crit="area", + ): + """ + Args: + output_size (int, tuple[int] or list[int]): output size of the pooled region, + e.g., 14 x 14. If tuple or list is given, the length must be 2. + scales (list[float]): The scale for each low-level pooling op relative to + the input image. For a feature map with stride s relative to the input + image, scale is defined as a 1 / s. The stride must be power of 2. + When there are multiple scales, they must form a pyramid, i.e. they must be + a monotically decreasing geometric sequence with a factor of 1/2. + sampling_ratio (int): The `sampling_ratio` parameter for the ROIAlign op. + pooler_type (string): Name of the type of pooling operation that should be applied. + For instance, "ROIPool" or "ROIAlignV2". + canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). The default + is heuristically defined as 224 pixels in the FPN paper (based on ImageNet + pre-training). + canonical_level (int): The feature map level index from which a canonically-sized box + should be placed. The default is defined as level 4 (stride=16) in the FPN paper, + i.e., a box of size 224x224 will be placed on the feature with stride=16. + The box placement for all boxes will be determined from their sizes w.r.t + canonical_box_size. For example, a box whose area is 4x that of a canonical box + should be used to pool features from feature level ``canonical_level+1``. + + Note that the actual input feature maps given to this module may not have + sufficiently many levels for the input boxes. If the boxes are too large or too + small for the input feature maps, the closest level will be used. + """ + super().__init__() + + if isinstance(output_size, int): + output_size = (output_size, output_size) + assert len(output_size) == 2 + assert isinstance(output_size[0], int) and isinstance(output_size[1], int) + self.output_size = output_size + + if pooler_type == "ROIAlign": + self.level_poolers = nn.ModuleList( + ROIAlign( + output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=False + ) + for scale in scales + ) + elif pooler_type == "ROIAlignV2": + self.level_poolers = nn.ModuleList( + ROIAlign( + output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=True + ) + for scale in scales + ) + elif pooler_type == "ROIPool": + self.level_poolers = nn.ModuleList( + RoIPool(output_size, spatial_scale=scale) for scale in scales + ) + elif pooler_type == "ROIAlignRotated": + self.level_poolers = nn.ModuleList( + ROIAlignRotated(output_size, spatial_scale=scale, sampling_ratio=sampling_ratio) + for scale in scales + ) + else: + raise ValueError("Unknown pooler type: {}".format(pooler_type)) + + # Map scale (defined as 1 / stride) to its feature map level under the + # assumption that stride is a power of 2. + min_level = -math.log2(scales[0]) + max_level = -math.log2(scales[-1]) + assert math.isclose(min_level, int(min_level)) and math.isclose( + max_level, int(max_level) + ), "Featuremap stride is not power of 2!" + self.min_level = int(min_level) + self.max_level = int(max_level) + assert ( + len(scales) == self.max_level - self.min_level + 1 + ), "[ROIPooler] Sizes of input featuremaps do not form a pyramid!" + assert 0 < self.min_level and self.min_level <= self.max_level + if len(scales) > 1: + # When there is only one feature map, canonical_level is redundant and we should not + # require it to be a sensible value. Therefore we skip this assertion + assert self.min_level <= canonical_level and canonical_level <= self.max_level + self.canonical_level = canonical_level + assert canonical_box_size > 0 + self.canonical_box_size = canonical_box_size + self.assign_crit = assign_crit #ywlee + + def forward(self, x, instances, is_train=False): + """ + Args: + x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those + used to construct this module. + instances (list[Instances]): the per-image instances to train/predict masks. + In training, they can be the proposals. + In inference, they can be the predicted boxes. + is_train (True/False) + + Returns: + Tensor: + A tensor of shape (M, C, output_size, output_size) where M is the total number of + boxes aggregated over all N batch images and C is the number of channels in `x`. + """ + if is_train: + box_lists = [x.proposal_boxes for x in instances] + else: + box_lists = [x.pred_boxes for x in instances] + + num_level_assignments = len(self.level_poolers) + + assert isinstance(x, list) and isinstance( + box_lists, list + ), "Arguments to pooler must be lists" + assert ( + len(x) == num_level_assignments + ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format( + num_level_assignments, len(x) + ) + + assert len(box_lists) == x[0].size( + 0 + ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format( + x[0].size(0), len(box_lists) + ) + + pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists) + + if num_level_assignments == 1: + return self.level_poolers[0](x[0], pooler_fmt_boxes) + + if self.assign_crit == "ratio": + level_assignments = assign_boxes_to_levels_by_ratio( + instances, self.min_level, self.max_level, is_train + ) + else: #default + level_assignments = assign_boxes_to_levels( + box_lists, self.min_level, self.max_level, self.canonical_box_size, self.canonical_level + ) + + num_boxes = len(pooler_fmt_boxes) + num_channels = x[0].shape[1] + output_size = self.output_size[0] + + dtype, device = x[0].dtype, x[0].device + output = torch.zeros( + (num_boxes, num_channels, output_size, output_size), dtype=dtype, device=device + ) + + for level, (x_level, pooler) in enumerate(zip(x, self.level_poolers)): + # import pdb;pdb.set_trace() + inds = torch.nonzero(level_assignments == level).squeeze(1) + inds_mask = (level_assignments == level).float().reshape(len(level_assignments),1) + #for i in : + # inter_mask = torch.full((1,num_channels,output_size,output_size),inds_mask[i], device=device) + # if i == 0: + # inter_mask_sum = inter_mask + # else: + # inter_mask_sum = torch.cat((inter_mask_sum,inter_mask), dim=0) + # import pdb;pdb.set_trace() + # print("inds.shape",inds.shape) + # pooler_fmt_boxes_level = pooler_fmt_boxes[inds] + #import pdb; pdb.set_trace() + #import pdb;pdb.set_trace() + output_1 = (pooler(x_level.cpu(), torch.mul((pooler_fmt_boxes),inds_mask).cpu())).to(dtype).to(device) + # output[inds] = pooler(x_level, pooler_fmt_boxes).half()[inds] + output = output + output_1 + + return output diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/proposal_utils.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/proposal_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..fcbb46af09241d38feb03ca86d37458b0ffc8409 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/proposal_utils.py @@ -0,0 +1,79 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Modified by Youngwan Lee (ETRI) in 28/01/2020. +import math +import torch + +from detectron2.structures import Instances +from detectron2.structures import Boxes + +def add_ground_truth_to_proposals(targets, proposals): + """ + Call `add_ground_truth_to_proposals_single_image` for all images. + + Args: + targets(list[Instances]): list of N elements. Element i is a Boxes + representing the gound-truth for image i. + proposals (list[Instances]): list of N elements. Element i is a Instances + representing the proposals for image i. + + Returns: + list[Instances]: list of N Instances. Each is the proposals for the image, + with field "proposal_boxes" and "objectness_logits". + """ + assert targets is not None + + assert len(proposals) == len(targets) + if len(proposals) == 0: + return proposals + + return [ + add_ground_truth_to_proposals_single_image(tagets_i, proposals_i) + for tagets_i, proposals_i in zip(targets, proposals) + ] + + +def add_ground_truth_to_proposals_single_image(targets_i, proposals): + """ + Augment `proposals` with ground-truth boxes from `gt_boxes`. + + Args: + Same as `add_ground_truth_to_proposals`, but with targets and proposals + per image. + + Returns: + Same as `add_ground_truth_to_proposals`, but for only one image. + """ + device = proposals.scores.device + proposals.proposal_boxes = proposals.pred_boxes + proposals.remove("pred_boxes") + # Concatenating gt_boxes with proposals requires them to have the same fields + # Assign all ground-truth boxes an objectness logit corresponding to P(object) \approx 1. + gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10))) + gt_logits = gt_logit_value * torch.ones(len(targets_i), device=device) + gt_proposal = Instances(proposals.image_size) + gt_proposal.proposal_boxes = targets_i.gt_boxes + # to have the same fields with proposals + gt_proposal.scores = gt_logits + gt_proposal.pred_classes = targets_i.gt_classes + gt_proposal.locations = torch.ones((len(targets_i), 2), device=device) + + '''try to fix dynamic shape problem''' + proposals_new=Instances(proposals.image_size) + + fix_num=108 + proposals_num = len(proposals) + #import pdb; pdb.set_trace() + proposals_new.proposal_boxes = (torch.zeros((fix_num,4), device=device)).float() + proposals_boxes_indx = torch.tensor([[i,i,i,i] for i in range(0,proposals_num)],device=device) + proposals_new.proposal_boxes = Boxes(proposals_new.proposal_boxes.scatter(0,proposals_boxes_indx,proposals.proposal_boxes.tensor)) + proposals_new.scores = (gt_logit_value * torch.ones(fix_num, dtype = proposals.scores.dtype, device = device)) + proposals_scores_indx = torch.tensor(list(range(0,proposals_num)),device=device) + proposals_new.scores = proposals_new.scores.scatter(0,proposals_scores_indx,proposals.scores) + proposals_new.pred_classes = (80 * torch.ones(fix_num, dtype = torch.int32, device = device)) + proposals_new.pred_classes = proposals_new.pred_classes.scatter(0, proposals_scores_indx, proposals.pred_classes) + proposals_new.locations = torch.ones((fix_num, 2), device=device, dtype= torch.float) + proposals_locations_indx = torch.tensor([[i, i] for i in range(0, proposals_num)],device=device) + proposals_new.locations = proposals_new.locations.scatter(0,proposals_locations_indx,proposals.locations) + new_proposals = Instances.cat([proposals_new, gt_proposal]) + + return new_proposals diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/sam.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/sam.py new file mode 100644 index 0000000000000000000000000000000000000000..bf0eb13aea5f03c96c51491564417b6da9e56fbe --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/centermask/sam.py @@ -0,0 +1,97 @@ +# Copyright (c) Youngwan Lee (ETRI) All Rights Reserved. +import fvcore.nn.weight_init as weight_init +import torch +from torch import nn +from torch.nn import functional as F + +from detectron2.layers import Conv2d, ConvTranspose2d, ShapeSpec, get_norm +from .mask_head import ROI_MASK_HEAD_REGISTRY +from centermask.layers import Max + + +class SpatialAttention(nn.Module): + def __init__(self, kernel_size=3): + super(SpatialAttention, self).__init__() + + assert kernel_size in (3, 7), 'kernel size must be 3 or 7' + padding = 3 if kernel_size == 7 else 1 + + self.conv = Conv2d(2, 1, kernel_size, padding=padding, bias=False) + weight_init.c2_msra_fill(self.conv) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avg_out = torch.mean(x, dim=1, keepdim=True) + max_out = Max(x) + scale = torch.cat([avg_out, max_out], dim=1) + scale = self.conv(scale) + return x * self.sigmoid(scale) + + +@ROI_MASK_HEAD_REGISTRY.register() +class SpatialAttentionMaskHead(nn.Module): + """ + A mask head with several conv layers and spatial attention module + in CenterMask paper, plus an upsample layer (with `ConvTranspose2d`). + """ + + def __init__(self, cfg, input_shape: ShapeSpec): + """ + The following attributes are parsed from config: + num_conv: the number of conv layers + conv_dim: the dimension of the conv layers + norm: normalization for the conv layers + """ + super(SpatialAttentionMaskHead, self).__init__() + + # fmt: off + num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES + conv_dims = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM + self.norm = cfg.MODEL.ROI_MASK_HEAD.NORM + num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV + input_channels = input_shape.channels + cls_agnostic_mask = cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK + # fmt: on + + self.conv_norm_relus = [] + + for k in range(num_conv): + conv = Conv2d( + input_channels if k == 0 else conv_dims, + conv_dims, + kernel_size=3, + stride=1, + padding=1, + bias=not self.norm, + norm=get_norm(self.norm, conv_dims), + activation=F.relu, + ) + self.add_module("mask_fcn{}".format(k + 1), conv) + self.conv_norm_relus.append(conv) + + self.spatialAtt = SpatialAttention() + + self.deconv = ConvTranspose2d( + conv_dims if num_conv > 0 else input_channels, + conv_dims, + kernel_size=2, + stride=2, + padding=0, + ) + + num_mask_classes = 1 if cls_agnostic_mask else num_classes + self.predictor = Conv2d(conv_dims, num_mask_classes, kernel_size=1, stride=1, padding=0) + + for layer in self.conv_norm_relus + [self.deconv]: + weight_init.c2_msra_fill(layer) + # use normal distribution initialization for mask prediction layer + nn.init.normal_(self.predictor.weight, std=0.001) + if self.predictor.bias is not None: + nn.init.constant_(self.predictor.bias, 0) + + def forward(self, x): + for layer in self.conv_norm_relus: + x = layer(x) + x = self.spatialAtt(x) + x = F.relu(self.deconv(x)) + return self.predictor(x) \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/fcos/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/fcos/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6571ba106b6195340cff57ba6c1d77cb423d3163 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/fcos/__init__.py @@ -0,0 +1 @@ +from .fcos import FCOS diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/fcos/fcos.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/fcos/fcos.py new file mode 100644 index 0000000000000000000000000000000000000000..f8324fdce9ba0f70f317ad4226bca35d25ec00b0 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/fcos/fcos.py @@ -0,0 +1,244 @@ +import math +from typing import List, Dict +import torch +from torch import nn +from torch.nn import functional as F + +from detectron2.layers import ShapeSpec +from detectron2.modeling.proposal_generator.build import PROPOSAL_GENERATOR_REGISTRY + +from centermask.layers import DFConv2d, IOULoss +from .fcos_outputs import FCOSOutputs + + +__all__ = ["FCOS"] + +INF = 100000000 + + +class Scale(nn.Module): + def __init__(self, init_value=1.0): + super(Scale, self).__init__() + self.scale = nn.Parameter(torch.FloatTensor([init_value])) + + def forward(self, input): + return input * self.scale + + +@PROPOSAL_GENERATOR_REGISTRY.register() +class FCOS(nn.Module): + def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): + super().__init__() + # fmt: off + self.in_features = cfg.MODEL.FCOS.IN_FEATURES + self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES + self.focal_loss_alpha = cfg.MODEL.FCOS.LOSS_ALPHA + self.focal_loss_gamma = cfg.MODEL.FCOS.LOSS_GAMMA + self.center_sample = cfg.MODEL.FCOS.CENTER_SAMPLE + self.strides = cfg.MODEL.FCOS.FPN_STRIDES + self.radius = cfg.MODEL.FCOS.POS_RADIUS + self.pre_nms_thresh_train = cfg.MODEL.FCOS.INFERENCE_TH_TRAIN + self.pre_nms_thresh_test = cfg.MODEL.FCOS.INFERENCE_TH_TEST + self.pre_nms_topk_train = cfg.MODEL.FCOS.PRE_NMS_TOPK_TRAIN + self.pre_nms_topk_test = cfg.MODEL.FCOS.PRE_NMS_TOPK_TEST + self.nms_thresh = cfg.MODEL.FCOS.NMS_TH + self.post_nms_topk_train = cfg.MODEL.FCOS.POST_NMS_TOPK_TRAIN + self.post_nms_topk_test = cfg.MODEL.FCOS.POST_NMS_TOPK_TEST + self.thresh_with_ctr = cfg.MODEL.FCOS.THRESH_WITH_CTR + self.mask_on = cfg.MODEL.MASK_ON #ywlee + # fmt: on + self.iou_loss = IOULoss(cfg.MODEL.FCOS.LOC_LOSS_TYPE) + # generate sizes of interest + soi = [] + prev_size = -1 + for s in cfg.MODEL.FCOS.SIZES_OF_INTEREST: + soi.append([prev_size, s]) + prev_size = s + soi.append([prev_size, INF]) + self.sizes_of_interest = soi + self.fcos_head = FCOSHead(cfg, [input_shape[f] for f in self.in_features]) + + def forward(self, images, features, gt_instances): + """ + Arguments: + images (list[Tensor] or ImageList): images to be processed + targets (list[BoxList]): ground-truth boxes present in the image (optional) + + Returns: + result (list[BoxList] or dict[Tensor]): the output from the model. + During training, it returns a dict[Tensor] which contains the losses. + During testing, it returns list[BoxList] contains additional fields + like `scores`, `labels` and `mask` (for Mask R-CNN models). + + """ + features = [features[f] for f in self.in_features] + locations = self.compute_locations(features) + logits_pred, reg_pred, ctrness_pred, bbox_towers = self.fcos_head(features) + + + if self.training: + pre_nms_thresh = self.pre_nms_thresh_train + pre_nms_topk = self.pre_nms_topk_train + post_nms_topk = self.post_nms_topk_train + else: + pre_nms_thresh = self.pre_nms_thresh_test + pre_nms_topk = self.pre_nms_topk_test + post_nms_topk = self.post_nms_topk_test + + outputs = FCOSOutputs( + images, + locations, + logits_pred, + reg_pred, + ctrness_pred, + self.focal_loss_alpha, + self.focal_loss_gamma, + self.iou_loss, + self.center_sample, + self.sizes_of_interest, + self.strides, + self.radius, + self.fcos_head.num_classes, + pre_nms_thresh, + pre_nms_topk, + self.nms_thresh, + post_nms_topk, + self.thresh_with_ctr, + gt_instances, + ) + + if self.training: + losses, _ = outputs.losses() + if self.mask_on: + proposals = outputs.predict_proposals() + return proposals, losses + else: + return None, losses + else: + proposals = outputs.predict_proposals() + return proposals, {} + + def compute_locations(self, features): + locations = [] + for level, feature in enumerate(features): + h, w = feature.size()[-2:] + locations_per_level = self.compute_locations_per_level( + h, w, self.fpn_strides[level], + feature.device + ) + locations.append(locations_per_level) + return locations + + def compute_locations_per_level(self, h, w, stride, device): + shifts_x = torch.arange( + 0, w * stride, step=stride, + dtype=torch.float32, device=device + ) + shifts_y = torch.arange( + 0, h * stride, step=stride, + dtype=torch.float32, device=device + ) + shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) + shift_x = shift_x.reshape(-1) + shift_y = shift_y.reshape(-1) + locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2 + return locations + + +class FCOSHead(nn.Module): + def __init__(self, cfg, input_shape: List[ShapeSpec]): + """ + Arguments: + in_channels (int): number of channels of the input feature + """ + super().__init__() + # TODO: Implement the sigmoid version first. + self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES + self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES + head_configs = {"cls": (cfg.MODEL.FCOS.NUM_CLS_CONVS, + False), + "bbox": (cfg.MODEL.FCOS.NUM_BOX_CONVS, + cfg.MODEL.FCOS.USE_DEFORMABLE), + "share": (cfg.MODEL.FCOS.NUM_SHARE_CONVS, + cfg.MODEL.FCOS.USE_DEFORMABLE)} + norm = None if cfg.MODEL.FCOS.NORM == "none" else cfg.MODEL.FCOS.NORM + + in_channels = [s.channels for s in input_shape] + assert len(set(in_channels)) == 1, "Each level must have the same channel!" + in_channels = in_channels[0] + + for head in head_configs: + tower = [] + num_convs, use_deformable = head_configs[head] + if use_deformable: + conv_func = DFConv2d + else: + conv_func = nn.Conv2d + for i in range(num_convs): + tower.append(conv_func( + in_channels, in_channels, + kernel_size=3, stride=1, + padding=1, bias=True + )) + if norm == "GN": + tower.append(nn.GroupNorm(32, in_channels)) + tower.append(nn.ReLU()) + self.add_module('{}_tower'.format(head), + nn.Sequential(*tower)) + + self.cls_logits = nn.Conv2d( + in_channels, self.num_classes, + kernel_size=3, stride=1, + padding=1 + ) + self.bbox_pred = nn.Conv2d( + in_channels, 4, kernel_size=3, + stride=1, padding=1 + ) + self.ctrness = nn.Conv2d( + in_channels, 1, kernel_size=3, + stride=1, padding=1 + ) + + if cfg.MODEL.FCOS.USE_SCALE: + self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in self.fpn_strides]) + else: + self.scales = None + + for modules in [ + self.cls_tower, self.bbox_tower, + self.share_tower, self.cls_logits, + self.bbox_pred, self.ctrness + ]: + for l in modules.modules(): + if isinstance(l, nn.Conv2d): + torch.nn.init.normal_(l.weight, std=0.01) + torch.nn.init.constant_(l.bias, 0) + + # initialize the bias for focal loss + prior_prob = cfg.MODEL.FCOS.PRIOR_PROB + bias_value = -math.log((1 - prior_prob) / prior_prob) + torch.nn.init.constant_(self.cls_logits.bias, bias_value) + + def forward(self, x): + logits = [] + bbox_reg = [] + ctrness = [] + bbox_towers = [] + + for l, feature in enumerate(x): + dtype = feature.dtype + feature = self.share_tower(feature) + + cls_tower = self.cls_tower(feature) + bbox_tower = self.bbox_tower(feature) + + logits.append(self.cls_logits(cls_tower)) + ctrness.append(self.ctrness(bbox_tower)) + reg = self.bbox_pred(bbox_tower).float() + if self.scales is not None: + reg = self.scales[l](reg) + # Note that we use relu, as in the improved FCOS, instead of exp. + bbox_reg.append(F.relu(reg).to(dtype)) + + return logits, bbox_reg, ctrness, bbox_towers diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/fcos/fcos_outputs.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/fcos/fcos_outputs.py new file mode 100644 index 0000000000000000000000000000000000000000..cf4deef2c3fda91718d9491efcd46eaf47949e54 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/modeling/fcos/fcos_outputs.py @@ -0,0 +1,490 @@ +import logging +import torch +import torch.nn.functional as F + +from detectron2.layers import cat +from detectron2.structures import Instances, Boxes +from centermask.utils.comm import get_world_size +from fvcore.nn import sigmoid_focal_loss_jit + +from centermask.utils.comm import reduce_sum +from centermask.layers import ml_nms + + + +logger = logging.getLogger(__name__) + +INF = 100000000 + +""" +Shape shorthand in this module: + + N: number of images in the minibatch + L: number of feature maps per image on which RPN is run + Hi, Wi: height and width of the i-th feature map + 4: size of the box parameterization + +Naming convention: + + labels: refers to the ground-truth class of an position. + + reg_targets: refers to the 4-d (left, top, right, bottom) distances that parameterize the ground-truth box. + + logits_pred: predicted classification scores in [-inf, +inf]; + + reg_pred: the predicted (left, top, right, bottom), corresponding to reg_targets + + ctrness_pred: predicted centerness scores + +""" + + +def compute_ctrness_targets(reg_targets): + + + if len(reg_targets) == 0: + return reg_targets.new_zeros(len(reg_targets)) + left_right = reg_targets[:, [0, 2]] + top_bottom = reg_targets[:, [1, 3]] + ctrness = (left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * \ + (top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0]) + # add sqrt + ctrness= ctrness * ctrness + ctrness = torch.sqrt(ctrness) + # end + return torch.sqrt(ctrness) + + +def fcos_losses( + labels, + reg_targets, + logits_pred, + reg_pred, + ctrness_pred, + focal_loss_alpha, + focal_loss_gamma, + iou_loss, +): + device = labels.device + + num_classes = logits_pred.size(1) + num_pixels = logits_pred.size(0) + + labels = labels.flatten() + pos_inds = (labels != num_classes) + pos_mask = pos_inds.float() + num_pos_local = pos_mask.sum() + num_gpus = get_world_size() + total_num_pos = reduce_sum(pos_mask.new_tensor([num_pos_local])).item() + num_pos_avg = max(total_num_pos / num_gpus, 1.0) + class_target = torch.tensor([list(range(0,80))]).to(device).float() + class_target = torch.repeat_interleave(class_target,num_pixels,dim=0) + + labels = labels.float() + labels_new = torch.repeat_interleave(labels.reshape(num_pixels,1),num_classes,dim=1).float() + class_target = class_target.eq(labels_new).float() + + class_loss = sigmoid_focal_loss_jit( + logits_pred, + class_target.to(logits_pred.dtype), + alpha=focal_loss_alpha, + gamma=focal_loss_gamma, + reduction="sum", + ) / num_pos_avg + + reg_pred = torch.mul(reg_pred,pos_mask.reshape(num_pixels,1)) + ctrness_pred = ctrness_pred * pos_mask + ctrness_targets = compute_ctrness_targets(reg_targets)*pos_mask + reg_targets = torch.mul(reg_targets,pos_mask.reshape(num_pixels,1)) + + ctrness_targets_sum = ctrness_targets.sum() + ctrness_norm = max(reduce_sum(ctrness_targets_sum).item() / num_gpus, 1e-6) + reg_loss = iou_loss( + reg_pred, + reg_targets, + ctrness_targets, + pos_mask + ) / ctrness_norm + ctrness_loss = F.binary_cross_entropy_with_logits( + ctrness_pred, + ctrness_targets, + reduction="sum", + weight= pos_mask + ) / num_pos_avg + + + losses = { + "loss_fcos_cls": class_loss, + "loss_fcos_loc": reg_loss, + "loss_fcos_ctr": ctrness_loss + } + return losses, {} + + +class FCOSOutputs(object): + def __init__( + self, + images, + locations, + logits_pred, + reg_pred, + ctrness_pred, + focal_loss_alpha, + focal_loss_gamma, + iou_loss, + center_sample, + sizes_of_interest, + strides, + radius, + num_classes, + pre_nms_thresh, + pre_nms_top_n, + nms_thresh, + fpn_post_nms_top_n, + thresh_with_ctr, + gt_instances=None, + ): + self.logits_pred = logits_pred + self.reg_pred = reg_pred + self.ctrness_pred = ctrness_pred + self.locations = locations + + self.gt_instances = gt_instances + self.num_feature_maps = len(logits_pred) + self.num_images = len(images) + self.image_sizes = images.image_sizes + self.focal_loss_alpha = focal_loss_alpha + self.focal_loss_gamma = focal_loss_gamma + self.iou_loss = iou_loss + self.center_sample = center_sample + self.sizes_of_interest = sizes_of_interest + self.strides = strides + self.radius = radius + self.num_classes = num_classes + self.pre_nms_thresh = pre_nms_thresh + self.pre_nms_top_n = pre_nms_top_n + self.nms_thresh = nms_thresh + self.fpn_post_nms_top_n = fpn_post_nms_top_n + self.thresh_with_ctr = thresh_with_ctr + + def _transpose(self, training_targets, num_loc_list): + ''' + This function is used to transpose image first training targets to level first ones + :return: level first training targets + ''' + for im_i in range(len(training_targets)): + training_targets[im_i] = torch.split( + training_targets[im_i], num_loc_list, dim=0 + ) + + targets_level_first = [] + for targets_per_level in zip(*training_targets): + targets_level_first.append( + torch.cat(targets_per_level, dim=0) + ) + return targets_level_first + + def _get_ground_truth(self): + num_loc_list = [len(loc) for loc in self.locations] + self.num_loc_list = num_loc_list + + # compute locations to size ranges + loc_to_size_range = [] + for l, loc_per_level in enumerate(self.locations): + loc_to_size_range_per_level = loc_per_level.new_tensor(self.sizes_of_interest[l]) + loc_to_size_range.append( + loc_to_size_range_per_level[None].expand(num_loc_list[l], -1) + ) + + loc_to_size_range = torch.cat(loc_to_size_range, dim=0) + locations = torch.cat(self.locations, dim=0) + + training_targets = self.compute_targets_for_locations( + locations, self.gt_instances, loc_to_size_range + ) + + # transpose im first training_targets to level first ones + training_targets = { + k: self._transpose(v, num_loc_list) for k, v in training_targets.items() + } + + # we normalize reg_targets by FPN's strides here + reg_targets = training_targets["reg_targets"] + for l in range(len(reg_targets)): + reg_targets[l] = reg_targets[l] / float(self.strides[l]) + + return training_targets + + def get_sample_region(self, gt, strides, num_loc_list, loc_xs, loc_ys, radius=1): + num_gts = gt.shape[0] + K = len(loc_xs) + gt = gt[None].expand(K, num_gts, 4) + center_x = (gt[..., 0] + gt[..., 2]) / 2 + center_y = (gt[..., 1] + gt[..., 3]) / 2 + center_gt = gt.new_zeros(gt.shape) + # no gt + if center_x.numel() == 0 or center_x[..., 0].sum() == 0: + return loc_xs.new_zeros(loc_xs.shape, dtype=torch.uint8) + beg = 0 + for level, num_loc in enumerate(num_loc_list): + end = beg + num_loc + stride = strides[level] * radius + xmin = center_x[beg:end] - stride + ymin = center_y[beg:end] - stride + xmax = center_x[beg:end] + stride + ymax = center_y[beg:end] + stride + # limit sample region in gt + center_gt[beg:end, :, 0] = torch.where(xmin > gt[beg:end, :, 0], xmin, gt[beg:end, :, 0]) + center_gt[beg:end, :, 1] = torch.where(ymin > gt[beg:end, :, 1], ymin, gt[beg:end, :, 1]) + center_gt[beg:end, :, 2] = torch.where(xmax > gt[beg:end, :, 2], gt[beg:end, :, 2], xmax) + center_gt[beg:end, :, 3] = torch.where(ymax > gt[beg:end, :, 3], gt[beg:end, :, 3], ymax) + beg = end + left = loc_xs[:, None] - center_gt[..., 0] + right = center_gt[..., 2] - loc_xs[:, None] + top = loc_ys[:, None] - center_gt[..., 1] + bottom = center_gt[..., 3] - loc_ys[:, None] + center_bbox = torch.stack((left, top, right, bottom), -1) + inside_gt_bbox_mask = center_bbox.min(-1)[0] > 0 + return inside_gt_bbox_mask + + def compute_targets_for_locations(self, locations, targets, size_ranges): + labels = [] + reg_targets = [] + xs, ys = locations[:, 0], locations[:, 1] + + for im_i in range(len(targets)): + targets_per_im = targets[im_i] + bboxes = targets_per_im.gt_boxes.tensor + labels_per_im = targets_per_im.gt_classes + + # no gt + if bboxes.numel() == 0: + labels.append(labels_per_im.new_zeros(locations.size(0)) + self.num_classes) + reg_targets.append(locations.new_zeros((locations.size(0), 4))) + continue + + area = targets_per_im.gt_boxes.area() + + l = xs[:, None] - bboxes[:, 0][None] + t = ys[:, None] - bboxes[:, 1][None] + r = bboxes[:, 2][None] - xs[:, None] + b = bboxes[:, 3][None] - ys[:, None] + reg_targets_per_im = torch.stack([l, t, r, b], dim=2) + + if self.center_sample: + is_in_boxes = self.get_sample_region( + bboxes, self.strides, self.num_loc_list, + xs, ys, radius=self.radius + ) + else: + is_in_boxes = reg_targets_per_im.min(dim=2)[0] > 0 + + max_reg_targets_per_im = reg_targets_per_im.max(dim=2)[0] + # limit the regression range for each location + is_cared_in_the_level = \ + (max_reg_targets_per_im >= size_ranges[:, [0]]) & \ + (max_reg_targets_per_im <= size_ranges[:, [1]]) + + locations_to_gt_area = area[None].repeat(len(locations), 1) + locations_to_gt_area[is_in_boxes == 0] = INF + locations_to_gt_area[is_cared_in_the_level == 0] = INF + + # if there are still more than one objects for a location, + # we choose the one with minimal area + locations_to_min_area, locations_to_gt_inds = locations_to_gt_area.min(dim=1) + + reg_targets_per_im = reg_targets_per_im[range(len(locations)), locations_to_gt_inds] + + labels_per_im = labels_per_im[locations_to_gt_inds] + labels_per_im[locations_to_min_area == INF] = self.num_classes + + labels.append(labels_per_im) + reg_targets.append(reg_targets_per_im) + + return {"labels": labels, "reg_targets": reg_targets} + + def losses(self): + """ + Return the losses from a set of FCOS predictions and their associated ground-truth. + + Returns: + dict[loss name -> loss value]: A dict mapping from loss name to loss value. + """ + + training_targets = self._get_ground_truth() + labels, reg_targets = training_targets["labels"], training_targets["reg_targets"] + + # Collect all logits and regression predictions over feature maps + # and images to arrive at the same shape as the labels and targets + # The final ordering is L, N, H, W from slowest to fastest axis. + logits_pred = cat( + [ + # Reshape: (N, C, Hi, Wi) -> (N, Hi, Wi, C) -> (N*Hi*Wi, C) + x.permute(0, 2, 3, 1).reshape(-1, self.num_classes) + for x in self.logits_pred + ], dim=0,) + reg_pred = cat( + [ + # Reshape: (N, B, Hi, Wi) -> (N, Hi, Wi, B) -> (N*Hi*Wi, B) + x.permute(0, 2, 3, 1).reshape(-1, 4) + for x in self.reg_pred + ], dim=0,) + ctrness_pred = cat( + [ + # Reshape: (N, 1, Hi, Wi) -> (N*Hi*Wi,) + x.reshape(-1) for x in self.ctrness_pred + ], dim=0,) + + labels = cat( + [ + # Reshape: (N, 1, Hi, Wi) -> (N*Hi*Wi,) + x.reshape(-1) for x in labels + ], dim=0,) + + reg_targets = cat( + [ + # Reshape: (N, Hi, Wi, 4) -> (N*Hi*Wi, 4) + x.reshape(-1, 4) for x in reg_targets + ], dim=0,) + + return fcos_losses( + labels, + reg_targets, + logits_pred, + reg_pred, + ctrness_pred, + self.focal_loss_alpha, + self.focal_loss_gamma, + self.iou_loss + ) + + def predict_proposals(self): + sampled_boxes = [] + + bundle = ( + self.locations, self.logits_pred, + self.reg_pred, self.ctrness_pred, + self.strides + ) + + for i, (l, o, r, c, s) in enumerate(zip(*bundle)): + # recall that during training, we normalize regression targets with FPN's stride. + # we denormalize them here. + r = r * s + sampled_boxes.append( + self.forward_for_single_feature_map( + l, o, r, c, self.image_sizes + ) + ) + boxlists = list(zip(*sampled_boxes)) + boxlists = [Instances.cat(boxlist) for boxlist in boxlists] + boxlists = self.select_over_all_levels(boxlists) + return boxlists + + def forward_for_single_feature_map( + self, locations, box_cls, + reg_pred, ctrness, image_sizes + ): + N, C, H, W = box_cls.shape + + device = box_cls.device + + # put in the same format as locations + box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1) + box_cls = box_cls.reshape(N, -1, C).sigmoid() + box_regression = reg_pred.view(N, 4, H, W).permute(0, 2, 3, 1) + box_regression = box_regression.reshape(N, -1, 4) + ctrness = ctrness.view(N, 1, H, W).permute(0, 2, 3, 1) + ctrness = ctrness.reshape(N, -1).sigmoid() + + # if self.thresh_with_ctr is True, we multiply the classification + # scores with centerness scores before applying the threshold. + if self.thresh_with_ctr: + box_cls = box_cls * ctrness[:, :, None] + candidate_inds = box_cls > self.pre_nms_thresh + # pre_nms_top_n = candidate_inds.view(N, -1).sum(1) + pre_nms_top_n = candidate_inds.reshape(N, -1).sum(1) + pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) + + if not self.thresh_with_ctr: + box_cls = box_cls * ctrness[:, :, None] + + results = [] + for i in range(N): + per_box_cls = box_cls[i] + per_candidate_inds = candidate_inds[i] + #edit by zsc + per_box_cls = torch.sqrt(per_box_cls.float()) + ### + per_box_cls = per_box_cls[per_candidate_inds] + + # per_candidate_nonzeros = per_candidate_inds.nonzero() + per_candidate_nonzeros = torch.nonzero(per_candidate_inds, as_tuple=False) + per_box_loc = per_candidate_nonzeros[:, 0] + per_class = per_candidate_nonzeros[:, 1] + + per_box_regression = box_regression[i] + per_box_regression = per_box_regression[per_box_loc] + per_locations = locations[per_box_loc] + + per_pre_nms_top_n = pre_nms_top_n[i] + + if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): + per_box_cls, top_k_indices = \ + per_box_cls.topk(per_pre_nms_top_n, sorted=False) + per_class = per_class[top_k_indices] + per_box_regression = per_box_regression[top_k_indices] + per_locations = per_locations[top_k_indices] + + per_locations = per_locations.cpu() + per_box_regression = per_box_regression.cpu() + + detections = torch.stack([ + per_locations[:, 0] - per_box_regression[:, 0], + per_locations[:, 1] - per_box_regression[:, 1], + per_locations[:, 0] + per_box_regression[:, 2], + per_locations[:, 1] + per_box_regression[:, 3], + ], dim=1) + detections = detections.to(device) + per_locations = per_locations.to(device) + + boxlist = Instances(image_sizes[i]) + boxlist.pred_boxes = Boxes(detections) + #edit zsc + #boxlist.scores = torch.sqrt(per_box_cls) + boxlist.scores = per_box_cls + ### + boxlist.pred_classes = per_class + boxlist.locations = per_locations + + results.append(boxlist) + + return results + + def select_over_all_levels(self, boxlists): + num_images = len(boxlists) + results = [] + for i in range(num_images): + # multiclass nms + result = ml_nms(boxlists[i], self.nms_thresh) + number_of_detections = len(result) + + # Limit to max_per_image detections **over all classes** + if number_of_detections > self.fpn_post_nms_top_n > 0: + cls_scores = result.scores + # add cpu() + + cls_scores = cls_scores.cpu() + image_thresh, _ = torch.kthvalue( + cls_scores, + number_of_detections - self.fpn_post_nms_top_n + 1 + ) + # image_thresh = image_thresh + # end + + keep = cls_scores >= image_thresh.item() + + keep = torch.nonzero(keep).squeeze(1) + result = result[keep] + results.append(result) + return results diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/train_net.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/train_net.py new file mode 100644 index 0000000000000000000000000000000000000000..d4921b0901fccb5668ce4984a0b07556e1a0186f --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/train_net.py @@ -0,0 +1,162 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Modified by Youngwan Lee (ETRI), 2020. All Rights Reserved. +import logging +import os +from collections import OrderedDict +import torch + +import detectron2.utils.comm as comm +from detectron2.data import MetadataCatalog +from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch +from detectron2.evaluation import ( + # CityscapesInstanceEvaluator, + # CityscapesSemSegEvaluator, + # COCOEvaluator, + COCOPanopticEvaluator, + DatasetEvaluators, + LVISEvaluator, + PascalVOCDetectionEvaluator, + SemSegEvaluator, + verify_results, +) +from centermask.evaluation import ( + COCOEvaluator, + CityscapesInstanceEvaluator, + CityscapesSemSegEvaluator +) +from detectron2.modeling import GeneralizedRCNNWithTTA +from detectron2.checkpoint import DetectionCheckpointer +from centermask.config import get_cfg + +import flower.flower_data +class Trainer(DefaultTrainer): + """ + This is the same Trainer except that we rewrite the + `build_train_loader` method. + """ + + + + @classmethod + def build_evaluator(cls, cfg, dataset_name, output_folder=None): + """ + Create evaluator(s) for a given dataset. + This uses the special metadata "evaluator_type" associated with each builtin dataset. + For your own dataset, you can simply create an evaluator manually in your + script and do not have to worry about the hacky if-else logic here. + """ + if output_folder is None: + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") + evaluator_list = [] + evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type + # print("evaluator_type:",evaluator_type) + if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: + evaluator_list.append( + SemSegEvaluator( + dataset_name, + distributed=True, + output_dir=output_folder, + ) + ) + if evaluator_type in ["coco", "coco_panoptic_seg"]: + evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder)) + if evaluator_type == "coco_panoptic_seg": + evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) + if evaluator_type == "cityscapes_instance": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesInstanceEvaluator(dataset_name) + if evaluator_type == "cityscapes_sem_seg": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesSemSegEvaluator(dataset_name) + elif evaluator_type == "pascal_voc": + return PascalVOCDetectionEvaluator(dataset_name) + elif evaluator_type == "lvis": + return LVISEvaluator(dataset_name, output_dir=output_folder) + if len(evaluator_list) == 0: + raise NotImplementedError( + "no Evaluator for the dataset {} with the type {}".format( + dataset_name, evaluator_type + ) + ) + elif len(evaluator_list) == 1: + return evaluator_list[0] + return DatasetEvaluators(evaluator_list) + + @classmethod + def test_with_TTA(cls, cfg, model): + logger = logging.getLogger("detectron2.trainer") + # In the end of training, run an evaluation with TTA + # Only support some R-CNN models. + logger.info("Running inference with test-time augmentation ...") + model = GeneralizedRCNNWithTTA(cfg, model) + evaluators = [ + cls.build_evaluator( + cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") + ) + for name in cfg.DATASETS.TEST + ] + res = cls.test(cfg, model, evaluators) + res = OrderedDict({k + "_TTA": v for k, v in res.items()}) + return res + + + +def setup(args): + """ + Create configs and perform basic setups. + """ + cfg = get_cfg() + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + default_setup(cfg, args) + return cfg + + +def main(args): + # lzy 12.20 add npu device + # torch.npu.set_device() + # finish + + cfg = setup(args) + + if args.eval_only: + model = Trainer.build_model(cfg) + DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( + cfg.MODEL.WEIGHTS, resume=args.resume + ) + res = Trainer.test(cfg, model) + if cfg.TEST.AUG.ENABLED: + res.update(Trainer.test_with_TTA(cfg, model)) + if comm.is_main_process(): + verify_results(cfg, res) + return res + + """ + If you'd like to do anything fancier than the standard training logic, + consider writing your own training loop or subclassing the trainer. + """ + trainer = Trainer(cfg) + trainer.resume_or_load(resume=args.resume) + if cfg.TEST.AUG.ENABLED: + trainer.register_hooks( + [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] + ) + return trainer.train() + + +if __name__ == "__main__": + args = default_argument_parser().parse_args() + print("Command Line Args:", args) + launch( + main, + args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=(args,), + ) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/utils/comm.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/utils/comm.py new file mode 100644 index 0000000000000000000000000000000000000000..3632d6bf4179e5086dc2e411b4c3ec48733f35fc --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/utils/comm.py @@ -0,0 +1,11 @@ +import torch.distributed as dist +from detectron2.utils.comm import get_world_size + + +def reduce_sum(tensor): + world_size = get_world_size() + if world_size < 2: + return tensor + tensor = tensor.clone() + dist.all_reduce(tensor, op=dist.ReduceOp.SUM) + return tensor diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/utils/measures.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/utils/measures.py new file mode 100644 index 0000000000000000000000000000000000000000..b99a4beb1eac7958d4f7c89e0c56ffb3d674b3d9 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/centermask/utils/measures.py @@ -0,0 +1,192 @@ +# coding: utf-8 +# Adapted from https://github.com/ShichenLiu/CondenseNet/blob/master/utils.py +from __future__ import absolute_import +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division + +import operator + +from functools import reduce + + +def get_num_gen(gen): + return sum(1 for x in gen) + + +def is_pruned(layer): + try: + layer.mask + return True + except AttributeError: + return False + + +def is_leaf(model): + return get_num_gen(model.children()) == 0 + + +def get_layer_info(layer): + layer_str = str(layer) + type_name = layer_str[:layer_str.find('(')].strip() + return type_name + + +def get_layer_param(model): + return sum([reduce(operator.mul, i.size(), 1) for i in model.parameters()]) + + +### The input batch size should be 1 to call this function +def measure_layer(layer, *args): + global count_ops, count_params + + for x in args: + delta_ops = 0 + delta_params = 0 + multi_add = 1 + type_name = get_layer_info(layer) + + ### ops_conv + if type_name in ['Conv2d']: + out_h = int((x.size()[2] + 2 * layer.padding[0] / layer.dilation[0] - layer.kernel_size[0]) / + layer.stride[0] + 1) + out_w = int((x.size()[3] + 2 * layer.padding[1] / layer.dilation[1] - layer.kernel_size[1]) / + layer.stride[1] + 1) + delta_ops = layer.in_channels * layer.out_channels * layer.kernel_size[0] * layer.kernel_size[1] * out_h * out_w / layer.groups * multi_add + delta_params = get_layer_param(layer) + + elif type_name in ['ConvTranspose2d']: + _, _, in_h, in_w = x.size() + out_h = int((in_h-1)*layer.stride[0] - 2 * layer.padding[0] + layer.kernel_size[0] + layer.output_padding[0]) + out_w = int((in_w-1)*layer.stride[1] - 2 * layer.padding[1] + layer.kernel_size[1] + layer.output_padding[1]) + delta_ops = layer.in_channels * layer.out_channels * layer.kernel_size[0] * \ + layer.kernel_size[1] * out_h * out_w / layer.groups * multi_add + delta_params = get_layer_param(layer) + + ### ops_learned_conv + elif type_name in ['LearnedGroupConv']: + measure_layer(layer.relu, x) + measure_layer(layer.norm, x) + conv = layer.conv + out_h = int((x.size()[2] + 2 * conv.padding[0] - conv.kernel_size[0]) / + conv.stride[0] + 1) + out_w = int((x.size()[3] + 2 * conv.padding[1] - conv.kernel_size[1]) / + conv.stride[1] + 1) + delta_ops = conv.in_channels * conv.out_channels * conv.kernel_size[0] * conv.kernel_size[1] * out_h * out_w / layer.condense_factor * multi_add + delta_params = get_layer_param(conv) / layer.condense_factor + + ### ops_nonlinearity + elif type_name in ['ReLU', 'ReLU6']: + delta_ops = x.numel() + delta_params = get_layer_param(layer) + + ### ops_pooling + elif type_name in ['AvgPool2d', 'MaxPool2d']: + in_w = x.size()[2] + kernel_ops = layer.kernel_size * layer.kernel_size + out_w = int((in_w + 2 * layer.padding - layer.kernel_size) / layer.stride + 1) + out_h = int((in_w + 2 * layer.padding - layer.kernel_size) / layer.stride + 1) + delta_ops = x.size()[0] * x.size()[1] * out_w * out_h * kernel_ops + delta_params = get_layer_param(layer) + + elif type_name in ['LastLevelMaxPool']: + pass + + elif type_name in ['AdaptiveAvgPool2d']: + delta_ops = x.size()[0] * x.size()[1] * x.size()[2] * x.size()[3] + delta_params = get_layer_param(layer) + + elif type_name in ['ZeroPad2d', 'RetinaNetPostProcessor']: + pass + #delta_ops = x.size()[0] * x.size()[1] * x.size()[2] * x.size()[3] + #delta_params = get_layer_param(layer) + + ### ops_linear + elif type_name in ['Linear']: + weight_ops = layer.weight.numel() * multi_add + bias_ops = layer.bias.numel() + delta_ops = x.size()[0] * (weight_ops + bias_ops) + delta_params = get_layer_param(layer) + + ### ops_nothing + elif type_name in ['BatchNorm2d', 'Dropout2d', 'DropChannel', 'Dropout', 'FrozenBatchNorm2d', 'GroupNorm']: + delta_params = get_layer_param(layer) + + elif type_name in ['SumTwo']: + delta_ops = x.numel() + + elif type_name in ['AggregateCell']: + if not layer.pre_transform: + delta_ops = 2 * x.numel() # twice for each input + else: + measure_layer(layer.branch_1, x) + measure_layer(layer.branch_2, x) + delta_params = get_layer_param(layer) + + elif type_name in ['Identity', 'Zero']: + pass + + elif type_name in ['Scale']: + delta_params = get_layer_param(layer) + delta_ops = x.numel() + + elif type_name in ['FCOSPostProcessor', 'RPNPostProcessor', 'KeypointPostProcessor', + 'ROIAlign', 'PostProcessor', 'KeypointRCNNPredictor', + 'NaiveSyncBatchNorm', 'Upsample', 'Sequential']: + pass + + elif type_name in ['DeformConv']: + # don't count bilinear + offset_conv = list(layer.parameters())[0] + delta_ops = reduce(operator.mul, offset_conv.size(), x.size()[2] * x.size()[3]) + out_h = int((x.size()[2] + 2 * layer.padding[0] / layer.dilation[0] + - layer.kernel_size[0]) / layer.stride[0] + 1) + out_w = int((x.size()[3] + 2 * layer.padding[1] / layer.dilation[1] + - layer.kernel_size[1]) / layer.stride[1] + 1) + delta_ops += layer.in_channels * layer.out_channels * layer.kernel_size[0] * layer.kernel_size[1] * out_h * out_w / layer.groups * multi_add + delta_params = get_layer_param(layer) + + ### unknown layer type + else: + raise TypeError('unknown layer type: %s' % type_name) + + count_ops += delta_ops + count_params += delta_params + return + + +def measure_model(model, x): + global count_ops, count_params + count_ops = 0 + count_params = 0 + + def should_measure(x): + return is_leaf(x) or is_pruned(x) + + def modify_forward(model): + for child in model.children(): + if should_measure(child): + def new_forward(m): + def lambda_forward(*args): + measure_layer(m, *args) + return m.old_forward(*args) + return lambda_forward + child.old_forward = child.forward + child.forward = new_forward(child) + else: + modify_forward(child) + + def restore_forward(model): + for child in model.children(): + # leaf node + if is_leaf(child) and hasattr(child, 'old_forward'): + child.forward = child.old_forward + child.old_forward = None + else: + restore_forward(child) + + modify_forward(model) + out = model.forward(x) + restore_forward(model) + + return out, count_ops, count_params diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-CenterMask-Lite-VoVNet.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-CenterMask-Lite-VoVNet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b332f21e828234ade368305f34c97f90024cc06 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-CenterMask-Lite-VoVNet.yaml @@ -0,0 +1,43 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + NAME: "build_fcos_vovnet_fpn_backbone" + FREEZE_AT: 0 + VOVNET: + OUT_FEATURES: ["stage3", "stage4", "stage5"] + FPN: + IN_FEATURES: ["stage3", "stage4", "stage5"] + OUT_CHANNELS: 128 + PROPOSAL_GENERATOR: + NAME: "FCOS" + FCOS: + POST_NMS_TOPK_TEST: 50 + NUM_CLS_CONVS: 2 + NUM_BOX_CONVS: 2 + MASK_ON: True + MASKIOU_ON: True + ROI_HEADS: + NAME: "CenterROIHeads" + IN_FEATURES: ["p3", "p4", "p5"] + ROI_MASK_HEAD: + NAME: "SpatialAttentionMaskHead" + ASSIGN_CRITERION: "ratio" + NUM_CONV: 2 + CONV_DIM: 128 + ROI_MASKIOU_HEAD: + NUM_CONV: 2 + CONV_DIM: 128 +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + CHECKPOINT_PERIOD: 10000 + IMS_PER_BATCH: 16 + BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (580, 600) + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 600 + MAX_SIZE_TEST: 1000 \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-CenterMask-ResNet.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-CenterMask-ResNet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a4b03de5dc106c247b4dfa9ea36a22eecd8ea6e7 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-CenterMask-ResNet.yaml @@ -0,0 +1,33 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + NAME: "build_fcos_resnet_fpn_backbone" + RESNETS: + OUT_FEATURES: ["res3", "res4", "res5"] + FPN: + IN_FEATURES: ["res3", "res4", "res5"] + PROPOSAL_GENERATOR: + NAME: "FCOS" + FCOS: + POST_NMS_TOPK_TEST: 50 + # PIXEL_MEAN: [102.9801, 115.9465, 122.7717] + MASK_ON: True + MASKIOU_ON: True + ROI_HEADS: + NAME: "CenterROIHeads" + IN_FEATURES: ["p3", "p4", "p5"] + ROI_MASK_HEAD: + NAME: "SpatialAttentionMaskHead" + ASSIGN_CRITERION: "ratio" + NUM_CONV: 4 + POOLER_RESOLUTION: 14 +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-CenterMask-VoVNet.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-CenterMask-VoVNet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..421c55f746b65d6a00baa33c77c0e300030319df --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-CenterMask-VoVNet.yaml @@ -0,0 +1,35 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + NAME: "build_fcos_vovnet_fpn_backbone" + FREEZE_AT: 0 + VOVNET: + OUT_FEATURES: ["stage3", "stage4", "stage5"] + FPN: + IN_FEATURES: ["stage3", "stage4", "stage5"] + PROPOSAL_GENERATOR: + NAME: "FCOS" + FCOS: + POST_NMS_TOPK_TEST: 50 + # PIXEL_MEAN: [102.9801, 115.9465, 122.7717] + MASK_ON: True + MASKIOU_ON: True + ROI_HEADS: + NAME: "CenterROIHeads" + IN_FEATURES: ["p3", "p4", "p5"] + ROI_MASK_HEAD: + NAME: "SpatialAttentionMaskHead" + ASSIGN_CRITERION: "ratio" + NUM_CONV: 4 + POOLER_RESOLUTION: 14 +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + CHECKPOINT_PERIOD: 10000 + IMS_PER_BATCH: 2 + BASE_LR: 0.0001 # Note that RetinaNet uses a different default learning rate + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-CenterMask-VoVNet_AMP.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-CenterMask-VoVNet_AMP.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7bb565f21db1d5d5114a34e43aee7b6bd2d9fd71 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-CenterMask-VoVNet_AMP.yaml @@ -0,0 +1,41 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + NAME: "build_fcos_vovnet_fpn_backbone" + FREEZE_AT: 0 + VOVNET: + OUT_FEATURES: ["stage3", "stage4", "stage5"] + FPN: + IN_FEATURES: ["stage3", "stage4", "stage5"] + PROPOSAL_GENERATOR: + NAME: "FCOS" + FCOS: + POST_NMS_TOPK_TEST: 50 + # PIXEL_MEAN: [102.9801, 115.9465, 122.7717] + MASK_ON: True + MASKIOU_ON: True + ROI_HEADS: + NAME: "CenterROIHeads" + IN_FEATURES: ["p3", "p4", "p5"] + ROI_MASK_HEAD: + NAME: "SpatialAttentionMaskHead" + ASSIGN_CRITERION: "ratio" + NUM_CONV: 4 + POOLER_RESOLUTION: 14 +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) + #TRAIN: ("my_train",) + #TEST: ("my_val",) +SOLVER: + CHECKPOINT_PERIOD: 100 + IMS_PER_BATCH: 32 + BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + # MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) + MIN_SIZE_TRAIN: (800,) +AMP: 1 +OPT_LEVEL: O1 +LOSS_SCALE_VALUE: None diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-Panoptic-ResNet-FPN.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-Panoptic-ResNet-FPN.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12810093b67c0b8c6f94f1aa65e2ec293157b152 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-Panoptic-ResNet-FPN.yaml @@ -0,0 +1,12 @@ +_BASE_: "Base-CenterMask-ResNet.yaml" +MODEL: + META_ARCHITECTURE: "PanopticFPN" + RESNETS: + OUT_FEATURES: ["res2", "res3", "res4", "res5"] + FPN: + IN_FEATURES: ["res2", "res3", "res4", "res5"] + SEM_SEG_HEAD: + LOSS_WEIGHT: 0.5 +DATASETS: + TRAIN: ("coco_2017_train_panoptic_separated",) + TEST: ("coco_2017_val_panoptic_separated",) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-Panoptic-VoVNet-FPN.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-Panoptic-VoVNet-FPN.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f9a2b8a825c9230fc623acc3d3eff2be6f81752 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/Base-Panoptic-VoVNet-FPN.yaml @@ -0,0 +1,12 @@ +_BASE_: "Base-CenterMask-VoVNet.yaml" +MODEL: + META_ARCHITECTURE: "PanopticFPN" + VOVNET: + OUT_FEATURES: ["stage2", "stage3", "stage4", "stage5"] + FPN: + IN_FEATURES: ["stage2", "stage3", "stage4", "stage5"] + SEM_SEG_HEAD: + LOSS_WEIGHT: 0.5 +DATASETS: + TRAIN: ("coco_2017_train_panoptic_separated",) + TEST: ("coco_2017_val_panoptic_separated",) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_R_101_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_R_101_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..965027499909f932112c6f9ffc4e62e605b7338d --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_R_101_FPN_ms_3x.yaml @@ -0,0 +1,10 @@ +_BASE_: "Base-CenterMask-ResNet.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/CenterMask-R-101-FPN-ms-3x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_R_50_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_R_50_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b5fc59fb346e7f644f9539e2a0c792890dab8e28 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_R_50_FPN_ms_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "Base-CenterMask-ResNet.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/CenterMask-R-50-FPN-ms-3x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_39_eSE_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_39_eSE_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d239a95a858f154b4a041dbc7fb60f7480081da9 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_39_eSE_FPN_ms_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "Base-CenterMask-VoVNet.yaml" +MODEL: + WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1" + VOVNET: + CONV_BODY : "V-39-eSE" +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/centermask/CenterMask-V-39-ms-3x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_39_eSE_dcn_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_39_eSE_dcn_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..50e8787416da5b871a257bf0a1ec83cc28e180fe --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_39_eSE_dcn_FPN_ms_3x.yaml @@ -0,0 +1,12 @@ +_BASE_: "Base-CenterMask-VoVNet.yaml" +MODEL: + WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1" + VOVNET: + CONV_BODY : "V-39-eSE" + STAGE_WITH_DCN: (False, True, True, True) + WITH_MODULATED_DCN: True + DEFORMABLE_GROUPS: 1 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/centermask/CenterMask-V-39-dcn-ms-3x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_57_eSE_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_57_eSE_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3407ed082351c91a9fc1cec5108708257f1d970c --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_57_eSE_FPN_ms_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "Base-CenterMask-VoVNet.yaml" +MODEL: + WEIGHTS: "https://www.dropbox.com/s/8xl0cb3jj51f45a/vovnet57_ese_detectron2.pth?dl=1" + VOVNET: + CONV_BODY : "V-57-eSE" +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/centermask/CenterMask-V-57-ms-3x" \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_57_eSE_dcn_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_57_eSE_dcn_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20336699ea8d36f1fae1c5c610bf0225478a7c14 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_57_eSE_dcn_FPN_ms_3x.yaml @@ -0,0 +1,12 @@ +_BASE_: "Base-CenterMask-VoVNet.yaml" +MODEL: + WEIGHTS: "https://www.dropbox.com/s/8xl0cb3jj51f45a/vovnet57_ese_detectron2.pth?dl=1" + VOVNET: + CONV_BODY : "V-57-eSE" + STAGE_WITH_DCN: (False, True, True, True) + WITH_MODULATED_DCN: True + DEFORMABLE_GROUPS: 1 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/centermask/CenterMask-V-57-dcn-ms-3x" \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_99_eSE_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_99_eSE_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0208f95459afd8ef1be98427672efa59973ec4af --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_99_eSE_FPN_ms_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "Base-CenterMask-VoVNet.yaml" +MODEL: + WEIGHTS: "https://www.dropbox.com/s/1mlv31coewx8trd/vovnet99_ese_detectron2.pth?dl=1" + VOVNET: + CONV_BODY : "V-99-eSE" +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/centermask/CenterMask-V-99-ms-3x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_99_eSE_dcn_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_99_eSE_dcn_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0097a53f62b8a995203fee9476186cf436397a15 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_V_99_eSE_dcn_FPN_ms_3x.yaml @@ -0,0 +1,12 @@ +_BASE_: "Base-CenterMask-VoVNet.yaml" +MODEL: + WEIGHTS: "https://www.dropbox.com/s/1mlv31coewx8trd/vovnet99_ese_detectron2.pth?dl=1" + VOVNET: + CONV_BODY : "V-99-eSE" + STAGE_WITH_DCN: (False, True, True, True) + WITH_MODULATED_DCN: True + DEFORMABLE_GROUPS: 1 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/centermask/CenterMask-V-99-dcn-ms-3x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_X_101_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_X_101_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8108ed1ae8e5113f7ef175aca03f7f7078c16631 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_X_101_FPN_ms_3x.yaml @@ -0,0 +1,13 @@ +_BASE_: "Base-CenterMask-ResNet.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/X-101-32x8d.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/CenterMask-X-101-FPN-ms-3x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_Mv2_FPN_ms_4x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_Mv2_FPN_ms_4x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f22e0b503d95d2ee3e33d18f00470ebb7f781ec5 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_Mv2_FPN_ms_4x.yaml @@ -0,0 +1,45 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + WEIGHTS: "https://www.dropbox.com/s/yduxbc13s3ip6qn/mobilenet_v2_detectron2.pth?dl=1" + BACKBONE: + NAME: "build_fcos_mobilenetv2_fpn_backbone" + FREEZE_AT: 0 + RESNETS: + OUT_FEATURES: ["res3", "res4", "res5"] + FPN: + IN_FEATURES: ["res3", "res4", "res5"] + OUT_CHANNELS: 128 + PROPOSAL_GENERATOR: + NAME: "FCOS" + FCOS: + POST_NMS_TOPK_TEST: 50 + NUM_CLS_CONVS: 2 + NUM_BOX_CONVS: 2 + MASK_ON: True + MASKIOU_ON: True + ROI_HEADS: + NAME: "CenterROIHeads" + IN_FEATURES: ["p3", "p4", "p5"] + ROI_MASK_HEAD: + NAME: "SpatialAttentionMaskHead" + ASSIGN_CRITERION: "ratio" + NUM_CONV: 2 + CONV_DIM: 128 + ROI_MASKIOU_HEAD: + NUM_CONV: 2 + CONV_DIM: 128 +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + CHECKPOINT_PERIOD: 10000 + IMS_PER_BATCH: 16 + BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate + STEPS: (300000, 340000) + MAX_ITER: 360000 +INPUT: + MIN_SIZE_TRAIN: (580, 600) + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 600 + MAX_SIZE_TEST: 1000 +OUTPUT_DIR: "output/centermask/CenterMask-Lite-MNv2-ms-4x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_V_19_eSE_FPN_ms_4x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_V_19_eSE_FPN_ms_4x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0812c425760584ec763eaea8858c6a649da1eb60 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_V_19_eSE_FPN_ms_4x.yaml @@ -0,0 +1,9 @@ +_BASE_: "Base-CenterMask-Lite-VoVNet.yaml" +MODEL: + WEIGHTS: "https://www.dropbox.com/s/rptgw6stppbiw1u/vovnet19_ese_detectron2.pth?dl=1" + VOVNET: + CONV_BODY : "V-19-eSE" +SOLVER: + STEPS: (300000, 340000) + MAX_ITER: 360000 +OUTPUT_DIR: "output/centermask/CenterMask-Lite-V-19-ms-4x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_V_19_slim_dw_eSE_FPN_ms_4x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_V_19_slim_dw_eSE_FPN_ms_4x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2fc85648159b7646b63a73829f137c58c9d81312 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_V_19_slim_dw_eSE_FPN_ms_4x.yaml @@ -0,0 +1,9 @@ +_BASE_: "Base-CenterMask-Lite-VoVNet.yaml" +MODEL: + WEIGHTS: "https://www.dropbox.com/s/f3s7ospitqoals1/vovnet19_ese_slim_dw_detectron2.pth?dl=1" + VOVNET: + CONV_BODY : "V-19-slim-dw-eSE" +SOLVER: + STEPS: (300000, 340000) + MAX_ITER: 360000 +OUTPUT_DIR: "output/centermask/CenterMask-Lite-V-19-slim-dw-ms-4x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_V_19_slim_eSE_FPN_ms_4x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_V_19_slim_eSE_FPN_ms_4x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..317e682d592b65b66da0067b47ea6cced21cd6c4 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_V_19_slim_eSE_FPN_ms_4x.yaml @@ -0,0 +1,9 @@ +_BASE_: "Base-CenterMask-Lite-VoVNet.yaml" +MODEL: + WEIGHTS: "https://www.dropbox.com/s/8h5ybmi4ftbcom0/vovnet19_ese_slim_detectron2.pth?dl=1" + VOVNET: + CONV_BODY : "V-19-slim-eSE" +SOLVER: + STEPS: (300000, 340000) + MAX_ITER: 360000 +OUTPUT_DIR: "output/centermask/CenterMask-Lite-V-19-slim-ms-4x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_V_39_eSE_FPN_ms_4x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_V_39_eSE_FPN_ms_4x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c290891322d445d3e85e60de07c14e894344e13 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/centermask_lite_V_39_eSE_FPN_ms_4x.yaml @@ -0,0 +1,9 @@ +_BASE_: "Base-CenterMask-Lite-VoVNet.yaml" +MODEL: + WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1" + VOVNET: + CONV_BODY : "V-39-eSE" +SOLVER: + STEPS: (300000, 340000) + MAX_ITER: 360000 +OUTPUT_DIR: "output/centermask/CenterMask-Lite-V-39-ms-4x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_R_101_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_R_101_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..19bb02079da1775389718ffc9318a96ec60254cb --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_R_101_FPN_ms_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "Base-Panoptic-ResNet-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/Panoptic-CenterMask-R-101-FPN-ms-3x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_R_50_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_R_50_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..527453b72927aa5239fc36fc31395c617c11c5e8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_R_50_FPN_ms_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "Base-Panoptic-ResNet-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/Panoptic-CenterMask-R-50-FPN-ms-3x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_V_39_eSE_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_V_39_eSE_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58a61bf3ffde2530cf531913bd6b9f4272d29797 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_V_39_eSE_FPN_ms_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "Base-Panoptic-VoVNet-FPN.yaml" +MODEL: + WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1" + VOVNET: + CONV_BODY : "V-39-eSE" +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/Panoptic-CenterMask-V-39-eSE-FPN-ms-3x" \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_V_57_eSE_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_V_57_eSE_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd1d24a15ed5cdebed387b6ecc2f768bfe6304f2 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_V_57_eSE_FPN_ms_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "Base-Panoptic-VoVNet-FPN.yaml" +MODEL: + WEIGHTS: "https://www.dropbox.com/s/8xl0cb3jj51f45a/vovnet57_ese_detectron2.pth?dl=1" + VOVNET: + CONV_BODY : "V-57-eSE" +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/Panoptic-CenterMask-V-57-eSE-FPN-ms-3x" \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_V_99_eSE_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_V_99_eSE_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d52490ccf1b4e0375ac1a3a8b462ca4728ae0a8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_V_99_eSE_FPN_ms_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "Base-Panoptic-VoVNet-FPN.yaml" +MODEL: + WEIGHTS: "https://www.dropbox.com/s/1mlv31coewx8trd/vovnet99_ese_detectron2.pth?dl=1" + VOVNET: + CONV_BODY : "V-99-eSE" +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/Panoptic-CenterMask-V-99-eSE-FPN-ms-3x" \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_X_101_FPN_ms_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_X_101_FPN_ms_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8056446786698846043ffdeef600b3f9842dcb8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/panoptic_centermask_X_101_FPN_ms_3x.yaml @@ -0,0 +1,13 @@ +_BASE_: "Base-Panoptic-ResNet-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/X-101-32x8d.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/Panoptic-CenterMask-X-101-32x8d-FPN-ms-3x" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/zsclzy_model_config.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/zsclzy_model_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1da8df687da3703e3d31a499e42ca52630615e29 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/zsclzy_model_config.yaml @@ -0,0 +1,9 @@ +_BASE_: "Base-CenterMask-VoVNet.yaml" +MODEL: + WEIGHTS: "/home/zsclzy/centermask_npu/vovnet39_ese_detectron2.pth" + VOVNET: + CONV_BODY : "V-39-eSE" +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +OUTPUT_DIR: "output/centermask/zsclzy_model_output" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/zsclzy_model_config_amp.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/zsclzy_model_config_amp.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c8e7055ba58ad3dd23d82ea5379e80e5b4ec0b3 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/configs/centermask/zsclzy_model_config_amp.yaml @@ -0,0 +1,25 @@ +_BASE_: "Base-CenterMask-VoVNet_AMP.yaml" +MODEL: + WEIGHTS: "vovnet39_ese_detectron2.pth" + VOVNET: + CONV_BODY : "V-39-eSE" +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 250000 +OUTPUT_DIR: "output/centermask/zsclzy_model_output_0329" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + CHECKPOINT_PERIOD: 100 + IMS_PER_BATCH: 32 + BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + # MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) + MIN_SIZE_TRAIN: (800,) +AMP: 1 +OPT_LEVEL: O1 +LOSS_SCALE_VALUE: None + diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/datasets/README.md b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/datasets/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dad1837512117f4f9ba02cf90a34c3e810cca782 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/datasets/README.md @@ -0,0 +1,85 @@ + +For a few datasets that detectron2 natively supports, +the datasets are assumed to exist in a directory called +"datasets/", under the directory where you launch the program. +They need to have the following directory structure: + +## Expected dataset structure for COCO instance/keypoint detection: + +``` +coco/ + annotations/ + instances_{train,val}2017.json + person_keypoints_{train,val}2017.json + {train,val}2017/ + # image files that are mentioned in the corresponding json +``` + +You can use the 2014 version of the dataset as well. + +Some of the builtin tests (`dev/run_*_tests.sh`) uses a tiny version of the COCO dataset, +which you can download with `./prepare_for_tests.sh`. + +## Expected dataset structure for PanopticFPN: + +``` +coco/ + annotations/ + panoptic_{train,val}2017.json + panoptic_{train,val}2017/ + # png annotations + panoptic_stuff_{train,val}2017/ # generated by the script mentioned below +``` + +Install panopticapi by: +``` +pip install git+https://github.com/cocodataset/panopticapi.git +``` +Then, run `python prepare_panoptic_fpn.py`, to extract semantic annotations from panoptic annotations. + +## Expected dataset structure for LVIS instance segmentation: +``` +coco/ + {train,val,test}2017/ +lvis/ + lvis_v0.5_{train,val}.json + lvis_v0.5_image_info_test.json +``` + +Install lvis-api by: +``` +pip install git+https://github.com/lvis-dataset/lvis-api.git +``` + +## Expected dataset structure for cityscapes: +``` +cityscapes/ + gtFine/ + train/ + aachen/ + color.png, instanceIds.png, labelIds.png, polygons.json, + labelTrainIds.png + ... + val/ + test/ + leftImg8bit/ + train/ + val/ + test/ +``` +Install cityscapes scripts by: +``` +pip install git+https://github.com/mcordts/cityscapesScripts.git +``` + +Note: +labelTrainIds.png are created by `cityscapesscripts/preparation/createTrainIdLabelImgs.py`. +They are not needed for instance segmentation. + +## Expected dataset structure for Pascal VOC: +``` +VOC20{07,12}/ + Annotations/ + ImageSets/ + JPEGImages/ +``` diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/datasets/prepare_for_tests.sh b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/datasets/prepare_for_tests.sh new file mode 100644 index 0000000000000000000000000000000000000000..d59b5643c95095921863dddd2f1e4d9be28e06ee --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/datasets/prepare_for_tests.sh @@ -0,0 +1,22 @@ +#!/bin/bash -e +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +# Download some files needed for running tests. + +cd "${0%/*}" + +BASE=https://dl.fbaipublicfiles.com/detectron2 +mkdir -p coco/annotations + +for anno in instances_val2017_100 \ + person_keypoints_val2017_100 \ + instances_minival2014_100 \ + person_keypoints_minival2014_100; do + + dest=coco/annotations/$anno.json + [[ -s $dest ]] && { + echo "$dest exists. Skipping ..." + } || { + wget $BASE/annotations/coco/$anno.json -O $dest + } +done diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/datasets/prepare_panoptic_fpn.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/datasets/prepare_panoptic_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..c2c383df8a306fcf9ab0abc54f22226888960add --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/datasets/prepare_panoptic_fpn.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import time +import functools +import json +import multiprocessing as mp +import numpy as np +import os +from PIL import Image + +from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES +from fvcore.common.download import download + +from panopticapi.utils import rgb2id + + +def _process_panoptic_to_semantic(input_panoptic, output_semantic, segments, id_map): + panoptic = np.asarray(Image.open(input_panoptic), dtype=np.uint32) + panoptic = rgb2id(panoptic) + output = np.zeros_like(panoptic, dtype=np.uint8) + 255 + for seg in segments: + cat_id = seg["category_id"] + new_cat_id = id_map[cat_id] + output[panoptic == seg["id"]] = new_cat_id + Image.fromarray(output).save(output_semantic) + + +def separate_coco_semantic_from_panoptic(panoptic_json, panoptic_root, sem_seg_root, categories): + """ + Create semantic segmentation annotations from panoptic segmentation + annotations, to be used by PanopticFPN. + + It maps all thing categories to class 0, and maps all unlabeled pixels to class 255. + It maps all stuff categories to contiguous ids starting from 1. + + Args: + panoptic_json (str): path to the panoptic json file, in COCO's format. + panoptic_root (str): a directory with panoptic annotation files, in COCO's format. + sem_seg_root (str): a directory to output semantic annotation files + categories (list[dict]): category metadata. Each dict needs to have: + "id": corresponds to the "category_id" in the json annotations + "isthing": 0 or 1 + """ + os.makedirs(sem_seg_root, exist_ok=True) + + stuff_ids = [k["id"] for k in categories if k["isthing"] == 0] + thing_ids = [k["id"] for k in categories if k["isthing"] == 1] + id_map = {} # map from category id to id in the output semantic annotation + assert len(stuff_ids) <= 254 + for i, stuff_id in enumerate(stuff_ids): + id_map[stuff_id] = i + 1 + for thing_id in thing_ids: + id_map[thing_id] = 0 + id_map[0] = 255 + + with open(panoptic_json) as f: + obj = json.load(f) + + pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4)) + + def iter_annotations(): + for anno in obj["annotations"]: + file_name = anno["file_name"] + segments = anno["segments_info"] + input = os.path.join(panoptic_root, file_name) + output = os.path.join(sem_seg_root, file_name) + yield input, output, segments + + print("Start writing to {} ...".format(sem_seg_root)) + start = time.time() + pool.starmap( + functools.partial(_process_panoptic_to_semantic, id_map=id_map), + iter_annotations(), + chunksize=100, + ) + print("Finished. time: {:.2f}s".format(time.time() - start)) + + +if __name__ == "__main__": + dataset_dir = os.path.join(os.path.dirname(__file__), "coco") + for s in ["val2017", "train2017"]: + separate_coco_semantic_from_panoptic( + os.path.join(dataset_dir, "annotations/panoptic_{}.json".format(s)), + os.path.join(dataset_dir, "panoptic_{}".format(s)), + os.path.join(dataset_dir, "panoptic_stuff_{}".format(s)), + COCO_CATEGORIES, + ) + + # Prepare val2017_100 for quick testing: + + dest_dir = os.path.join(dataset_dir, "annotations/") + URL_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" + download(URL_PREFIX + "annotations/coco/panoptic_val2017_100.json", dest_dir) + with open(os.path.join(dest_dir, "panoptic_val2017_100.json")) as f: + obj = json.load(f) + + def link_val100(dir_full, dir_100): + print("Creating " + dir_100 + " ...") + os.makedirs(dir_100, exist_ok=True) + for img in obj["images"]: + basename = os.path.splitext(img["file_name"])[0] + src = os.path.join(dir_full, basename + ".png") + dst = os.path.join(dir_100, basename + ".png") + src = os.path.relpath(src, start=dir_100) + os.symlink(src, dst) + + link_val100( + os.path.join(dataset_dir, "panoptic_val2017"), + os.path.join(dataset_dir, "panoptic_val2017_100"), + ) + + link_val100( + os.path.join(dataset_dir, "panoptic_stuff_val2017"), + os.path.join(dataset_dir, "panoptic_stuff_val2017_100"), + ) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/demo/demo.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/demo/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..9c9460ffa1878cfe16c33a0f08e3b7ad78cf093e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/demo/demo.py @@ -0,0 +1,156 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +import argparse +import glob +import multiprocessing as mp +import os +import time +import cv2 +import tqdm +import sys + +#TODO : this is a temporary expedient +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from detectron2.data.detection_utils import read_image +from detectron2.utils.logger import setup_logger + +from predictor import VisualizationDemo +from centermask.config import get_cfg + +# constants +WINDOW_NAME = "COCO detections" + + +def setup_cfg(args): + # load config from file and command-line arguments + cfg = get_cfg() + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + # Set score_threshold for builtin models + cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold + cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold + cfg.MODEL.FCOS.INFERENCE_TH_TEST = args.confidence_threshold + cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold + cfg.freeze() + return cfg + + +def get_parser(): + parser = argparse.ArgumentParser(description="Detectron2 Demo") + parser.add_argument( + "--config-file", + default="configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_inference_acc_test.yaml", + metavar="FILE", + help="path to config file", + ) + parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.") + parser.add_argument("--video-input", help="Path to video file.") + parser.add_argument("--input", nargs="+", help="A list of space separated input images") + parser.add_argument( + "--output", + help="A file or directory to save output visualizations. " + "If not given, will show output in an OpenCV window.", + ) + + parser.add_argument( + "--confidence-threshold", + type=float, + default=0.4, + help="Minimum score for instance predictions to be shown", + ) + parser.add_argument( + "--opts", + help="Modify config options using the command-line 'KEY VALUE' pairs", + default=[], + nargs=argparse.REMAINDER, + ) + return parser + + +if __name__ == "__main__": + mp.set_start_method("spawn", force=True) + args = get_parser().parse_args() + logger = setup_logger() + logger.info("Arguments: " + str(args)) + + cfg = setup_cfg(args) + + demo = VisualizationDemo(cfg) + + if args.input: + if os.path.isdir(args.input[0]): + args.input = [os.path.join(args.input[0], fname) for fname in os.listdir(args.input[0])] + elif len(args.input) == 1: + args.input = glob.glob(os.path.expanduser(args.input[0])) + assert args.input, "The input path(s) was not found" + for path in tqdm.tqdm(args.input, disable=not args.output): + # use PIL, to be consistent with evaluation + img = read_image(path, format="BGR") + start_time = time.time() + predictions, visualized_output = demo.run_on_image(img) + logger.info( + "{}: detected {} instances in {:.2f}s".format( + path, len(predictions["instances"]), time.time() - start_time + ) + ) + + if args.output: + if os.path.isdir(args.output): + assert os.path.isdir(args.output), args.output + out_filename = os.path.join(args.output, os.path.basename(path)) + else: + assert len(args.input) == 1, "Please specify a directory with args.output" + out_filename = args.output + visualized_output.save(out_filename) + else: + cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) + if cv2.waitKey(0) == 27: + break # esc to quit + elif args.webcam: + assert args.input is None, "Cannot have both --input and --webcam!" + cam = cv2.VideoCapture(0) + for vis in tqdm.tqdm(demo.run_on_video(cam)): + cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) + cv2.imshow(WINDOW_NAME, vis) + if cv2.waitKey(1) == 27: + break # esc to quit + cv2.destroyAllWindows() + elif args.video_input: + video = cv2.VideoCapture(args.video_input) + width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + frames_per_second = video.get(cv2.CAP_PROP_FPS) + num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + basename = os.path.basename(args.video_input) + + if args.output: + if os.path.isdir(args.output): + output_fname = os.path.join(args.output, basename) + output_fname = os.path.splitext(output_fname)[0] + ".mkv" + else: + output_fname = args.output + assert not os.path.isfile(output_fname), output_fname + output_file = cv2.VideoWriter( + filename=output_fname, + # some installation of opencv may not support x264 (due to its license), + # you can try other format (e.g. MPEG) + # fourcc=cv2.VideoWriter_fourcc(*"x264"), + fourcc = cv2.VideoWriter_fourcc(*"mp4v"), + fps=float(frames_per_second), + frameSize=(width, height), + isColor=True, + ) + assert os.path.isfile(args.video_input) + for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames): + if args.output: + output_file.write(vis_frame) + else: + cv2.namedWindow(basename, cv2.WINDOW_NORMAL) + cv2.imshow(basename, vis_frame) + if cv2.waitKey(1) == 27: + break # esc to quit + video.release() + if args.output: + output_file.release() + else: + cv2.destroyAllWindows() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/demo/predictor.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/demo/predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..1a3844234395d22bfe6fbd54e03e49e33c38eef2 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/demo/predictor.py @@ -0,0 +1,244 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +import numpy as np +import atexit +import bisect +import multiprocessing as mp +from collections import deque +import cv2 +import torch +import matplotlib.pyplot as plt + +from detectron2.data import MetadataCatalog +from detectron2.engine.defaults import DefaultPredictor +from detectron2.utils.visualizer import ColorMode, Visualizer + +from video_visualizer import VideoVisualizer + + +class VisualizationDemo(object): + def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): + """ + Args: + cfg (CfgNode): + instance_mode (ColorMode): + parallel (bool): whether to run the model in different processes from visualization. + Useful since the visualization logic can be slow. + """ + self.metadata = MetadataCatalog.get( + cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused" + ) + self.cpu_device = torch.device("cpu") + self.instance_mode = instance_mode + + self.parallel = parallel + if parallel: + num_gpu = torch.cuda.device_count() + self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) + else: + self.predictor = DefaultPredictor(cfg) + + def run_on_image(self, image): + """ + Args: + image (np.ndarray): an image of shape (H, W, C) (in BGR order). + This is the format used by OpenCV. + + Returns: + predictions (dict): the output of the model. + vis_output (VisImage): the visualized image output. + """ + vis_output = None + predictions = self.predictor(image) + # Convert image from OpenCV BGR format to Matplotlib RGB format. + image = image[:, :, ::-1] + visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) + if "inst" in predictions: + visualizer.vis_inst(predictions["inst"]) + if "bases" in predictions: + self.vis_bases(predictions["bases"]) + if "panoptic_seg" in predictions: + panoptic_seg, segments_info = predictions["panoptic_seg"] + vis_output = visualizer.draw_panoptic_seg_predictions( + panoptic_seg.to(self.cpu_device), segments_info + ) + else: + if "sem_seg" in predictions: + vis_output = visualizer.draw_sem_seg( + predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) + if "instances" in predictions: + instances = predictions["instances"].to(self.cpu_device) + + vis_output = visualizer.draw_instance_predictions(predictions=instances) + + return predictions, vis_output + + def _frame_from_video(self, video): + while video.isOpened(): + success, frame = video.read() + if success: + yield frame + else: + break + + def vis_bases(self, bases): + basis_colors = [[2, 200, 255], [107, 220, 255], [30, 200, 255], [60, 220, 255]] + bases = bases[0].squeeze() + bases = (bases / 8).tanh().cpu().numpy() + num_bases = len(bases) + fig, axes = plt.subplots(nrows=num_bases // 2, ncols=2) + for i, basis in enumerate(bases): + basis = (basis + 1) / 2 + basis = basis / basis.max() + basis_viz = np.zeros((basis.shape[0], basis.shape[1], 3), dtype=np.uint8) + basis_viz[:, :, 0] = basis_colors[i][0] + basis_viz[:, :, 1] = basis_colors[i][1] + basis_viz[:, :, 2] = np.uint8(basis * 255) + basis_viz = cv2.cvtColor(basis_viz, cv2.COLOR_HSV2RGB) + axes[i // 2][i % 2].imshow(basis_viz) + plt.show() + + def run_on_video(self, video): + """ + Visualizes predictions on frames of the input video. + + Args: + video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be + either a webcam or a video file. + + Yields: + ndarray: BGR visualizations of each video frame. + """ + video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) + + def process_predictions(frame, predictions): + frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) + if "panoptic_seg" in predictions: + panoptic_seg, segments_info = predictions["panoptic_seg"] + vis_frame = video_visualizer.draw_panoptic_seg_predictions( + frame, panoptic_seg.to(self.cpu_device), segments_info + ) + elif "instances" in predictions: + predictions = predictions["instances"].to(self.cpu_device) + vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) + elif "sem_seg" in predictions: + vis_frame = video_visualizer.draw_sem_seg( + frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) + ) + + # Converts Matplotlib RGB format to OpenCV BGR format + vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) + return vis_frame + + frame_gen = self._frame_from_video(video) + if self.parallel: + buffer_size = self.predictor.default_buffer_size + + frame_data = deque() + + for cnt, frame in enumerate(frame_gen): + frame_data.append(frame) + self.predictor.put(frame) + + if cnt >= buffer_size: + frame = frame_data.popleft() + predictions = self.predictor.get() + yield process_predictions(frame, predictions) + + while len(frame_data): + frame = frame_data.popleft() + predictions = self.predictor.get() + yield process_predictions(frame, predictions) + else: + for frame in frame_gen: + yield process_predictions(frame, self.predictor(frame)) + + +class AsyncPredictor: + """ + A predictor that runs the model asynchronously, possibly on >1 GPUs. + Because rendering the visualization takes considerably amount of time, + this helps improve throughput when rendering videos. + """ + + class _StopToken: + pass + + class _PredictWorker(mp.Process): + def __init__(self, cfg, task_queue, result_queue): + self.cfg = cfg + self.task_queue = task_queue + self.result_queue = result_queue + super().__init__() + + def run(self): + predictor = DefaultPredictor(self.cfg) + + while True: + task = self.task_queue.get() + if isinstance(task, AsyncPredictor._StopToken): + break + idx, data = task + result = predictor(data) + self.result_queue.put((idx, result)) + + def __init__(self, cfg, num_gpus: int = 1): + """ + Args: + cfg (CfgNode): + num_gpus (int): if 0, will run on CPU + """ + num_workers = max(num_gpus, 1) + self.task_queue = mp.Queue(maxsize=num_workers * 3) + self.result_queue = mp.Queue(maxsize=num_workers * 3) + self.procs = [] + for gpuid in range(max(num_gpus, 1)): + cfg = cfg.clone() + cfg.defrost() + cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu" + self.procs.append( + AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue) + ) + + self.put_idx = 0 + self.get_idx = 0 + self.result_rank = [] + self.result_data = [] + + for p in self.procs: + p.start() + atexit.register(self.shutdown) + + def put(self, image): + self.put_idx += 1 + self.task_queue.put((self.put_idx, image)) + + def get(self): + self.get_idx += 1 # the index needed for this request + if len(self.result_rank) and self.result_rank[0] == self.get_idx: + res = self.result_data[0] + del self.result_data[0], self.result_rank[0] + return res + + while True: + # make sure the results are returned in the correct order + idx, res = self.result_queue.get() + if idx == self.get_idx: + return res + insert = bisect.bisect(self.result_rank, idx) + self.result_rank.insert(insert, idx) + self.result_data.insert(insert, res) + + def __len__(self): + return self.put_idx - self.get_idx + + def __call__(self, image): + self.put(image) + return self.get() + + def shutdown(self): + for _ in self.procs: + self.task_queue.put(AsyncPredictor._StopToken()) + + @property + def default_buffer_size(self): + return len(self.procs) * 5 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/demo/video_visualizer.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/demo/video_visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..fad8e70dd1bc6102f21131a36fe5c63dcb129afb --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/demo/video_visualizer.py @@ -0,0 +1,236 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +import numpy as np +import pycocotools.mask as mask_util + +from detectron2.utils.visualizer import ( + ColorMode, + Visualizer, + _create_text_labels, + _PanopticPrediction, +) + +from detectron2.utils.colormap import random_color + + +class _DetectedInstance: + """ + Used to store data about detected objects in video frame, + in order to transfer color to objects in the future frames. + + Attributes: + label (int): + bbox (tuple[float]): + mask_rle (dict): + color (tuple[float]): RGB colors in range (0, 1) + ttl (int): time-to-live for the instance. For example, if ttl=2, + the instance color can be transferred to objects in the next two frames. + """ + + __slots__ = ["label", "bbox", "mask_rle", "color", "ttl"] + + def __init__(self, label, bbox, mask_rle, color, ttl): + self.label = label + self.bbox = bbox + self.mask_rle = mask_rle + self.color = color + self.ttl = ttl + + +class VideoVisualizer: + def __init__(self, metadata, instance_mode=ColorMode.IMAGE): + """ + Args: + metadata (MetadataCatalog): image metadata. + """ + self.metadata = metadata + self._old_instances = [] + assert instance_mode in [ + ColorMode.IMAGE, + ColorMode.IMAGE_BW, + ], "Other mode not supported yet." + self._instance_mode = instance_mode + + def draw_instance_predictions(self, frame, predictions): + """ + Draw instance-level prediction results on an image. + + Args: + frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. + predictions (Instances): the output of an instance detection/segmentation + model. Following fields will be used to draw: + "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). + + Returns: + output (VisImage): image object with visualizations. + """ + frame_visualizer = Visualizer(frame, self.metadata) + num_instances = len(predictions) + if num_instances == 0: + return frame_visualizer.output + + boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None + scores = predictions.scores if predictions.has("scores") else None + classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None + keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None + + if predictions.has("pred_masks"): + masks = predictions.pred_masks + # mask IOU is not yet enabled + # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F")) + # assert len(masks_rles) == num_instances + else: + masks = None + + detected = [ + _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8) + for i in range(num_instances) + ] + colors = self._assign_colors(detected) + + labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) + + if self._instance_mode == ColorMode.IMAGE_BW: + # any() returns uint8 tensor + frame_visualizer.output.img = frame_visualizer._create_grayscale_image( + (masks.any(dim=0) > 0).numpy() if masks is not None else None + ) + alpha = 0.3 + else: + alpha = 0.5 + + frame_visualizer.overlay_instances( + # boxes=None if masks is not None else boxes, # boxes are a bit distracting + boxes=boxes, + masks=masks, + labels=labels, + keypoints=keypoints, + assigned_colors=colors, + alpha=alpha, + ) + + return frame_visualizer.output + + def draw_sem_seg(self, frame, sem_seg, area_threshold=None): + """ + Args: + sem_seg (ndarray or Tensor): semantic segmentation of shape (H, W), + each value is the integer label. + area_threshold (Optional[int]): only draw segmentations larger than the threshold + """ + # don't need to do anything special + frame_visualizer = Visualizer(frame, self.metadata) + frame_visualizer.draw_sem_seg(sem_seg, area_threshold=None) + return frame_visualizer.output + + def draw_panoptic_seg_predictions( + self, frame, panoptic_seg, segments_info, area_threshold=None, alpha=0.5 + ): + frame_visualizer = Visualizer(frame, self.metadata) + pred = _PanopticPrediction(panoptic_seg, segments_info) + + if self._instance_mode == ColorMode.IMAGE_BW: + frame_visualizer.output.img = frame_visualizer._create_grayscale_image( + pred.non_empty_mask() + ) + + # draw mask for all semantic segments first i.e. "stuff" + for mask, sinfo in pred.semantic_masks(): + category_idx = sinfo["category_id"] + try: + mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]] + except AttributeError: + mask_color = None + + frame_visualizer.draw_binary_mask( + mask, + color=mask_color, + text=self.metadata.stuff_classes[category_idx], + alpha=alpha, + area_threshold=area_threshold, + ) + + all_instances = list(pred.instance_masks()) + if len(all_instances) == 0: + return frame_visualizer.output + # draw mask for all instances second + masks, sinfo = list(zip(*all_instances)) + num_instances = len(masks) + masks_rles = mask_util.encode( + np.asarray(np.asarray(masks).transpose(1, 2, 0), dtype=np.uint8, order="F") + ) + assert len(masks_rles) == num_instances + + category_ids = [x["category_id"] for x in sinfo] + detected = [ + _DetectedInstance(category_ids[i], bbox=None, mask_rle=masks_rles[i], color=None, ttl=8) + for i in range(num_instances) + ] + colors = self._assign_colors(detected) + labels = [self.metadata.thing_classes[k] for k in category_ids] + + frame_visualizer.overlay_instances( + boxes=None, + masks=masks, + labels=labels, + keypoints=None, + assigned_colors=colors, + alpha=alpha, + ) + return frame_visualizer.output + + def _assign_colors(self, instances): + """ + Naive tracking heuristics to assign same color to the same instance, + will update the internal state of tracked instances. + + Returns: + list[tuple[float]]: list of colors. + """ + + # Compute iou with either boxes or masks: + is_crowd = np.zeros((len(instances),), dtype=np.bool) + if instances[0].bbox is None: + assert instances[0].mask_rle is not None + # use mask iou only when box iou is None + # because box seems good enough + rles_old = [x.mask_rle for x in self._old_instances] + rles_new = [x.mask_rle for x in instances] + ious = mask_util.iou(rles_old, rles_new, is_crowd) + threshold = 0.5 + else: + boxes_old = [x.bbox for x in self._old_instances] + boxes_new = [x.bbox for x in instances] + ious = mask_util.iou(boxes_old, boxes_new, is_crowd) + threshold = 0.6 + if len(ious) == 0: + ious = np.zeros((len(self._old_instances), len(instances)), dtype="float32") + + # Only allow matching instances of the same label: + for old_idx, old in enumerate(self._old_instances): + for new_idx, new in enumerate(instances): + if old.label != new.label: + ious[old_idx, new_idx] = 0 + + matched_new_per_old = np.asarray(ious).argmax(axis=1) + max_iou_per_old = np.asarray(ious).max(axis=1) + + # Try to find match for each old instance: + extra_instances = [] + for idx, inst in enumerate(self._old_instances): + if max_iou_per_old[idx] > threshold: + newidx = matched_new_per_old[idx] + if instances[newidx].color is None: + instances[newidx].color = inst.color + continue + # If an old instance does not match any new instances, + # keep it for the next frame in case it is just missed by the detector + inst.ttl -= 1 + if inst.ttl > 0: + extra_instances.append(inst) + + # Assign random color to newly-detected instances: + for inst in instances: + if inst.color is None: + inst.color = random_color(rgb=True, maximum=1) + self._old_instances = instances[:] + extra_instances + return [d.color for d in instances] \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/train_net.py b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/train_net.py new file mode 100644 index 0000000000000000000000000000000000000000..8d651b85f456de0d7ae4a2d2c04b734793d36b02 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/centermask2/train_net.py @@ -0,0 +1,163 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Modified by Youngwan Lee (ETRI), 2020. All Rights Reserved. +import logging +import os +from collections import OrderedDict +import torch + +import detectron2.utils.comm as comm +from detectron2.data import MetadataCatalog +from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch +from detectron2.evaluation import ( + # CityscapesInstanceEvaluator, + # CityscapesSemSegEvaluator, + # COCOEvaluator, + COCOPanopticEvaluator, + DatasetEvaluators, + LVISEvaluator, + PascalVOCDetectionEvaluator, + SemSegEvaluator, + verify_results, +) +from centermask.evaluation import ( + COCOEvaluator, + CityscapesInstanceEvaluator, + CityscapesSemSegEvaluator +) +from detectron2.modeling import GeneralizedRCNNWithTTA +from detectron2.checkpoint import DetectionCheckpointer +from centermask.config import get_cfg + +#import flower.flower_data +class Trainer(DefaultTrainer): + """ + This is the same Trainer except that we rewrite the + `build_train_loader` method. + """ + + + + @classmethod + def build_evaluator(cls, cfg, dataset_name, output_folder=None): + """ + Create evaluator(s) for a given dataset. + This uses the special metadata "evaluator_type" associated with each builtin dataset. + For your own dataset, you can simply create an evaluator manually in your + script and do not have to worry about the hacky if-else logic here. + """ + if output_folder is None: + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") + evaluator_list = [] + evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type + # print("evaluator_type:",evaluator_type) + if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: + evaluator_list.append( + SemSegEvaluator( + dataset_name, + distributed=True, + output_dir=output_folder, + ) + ) + if evaluator_type in ["coco", "coco_panoptic_seg"]: + evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder)) + if evaluator_type == "coco_panoptic_seg": + evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) + if evaluator_type == "cityscapes_instance": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesInstanceEvaluator(dataset_name) + if evaluator_type == "cityscapes_sem_seg": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesSemSegEvaluator(dataset_name) + elif evaluator_type == "pascal_voc": + return PascalVOCDetectionEvaluator(dataset_name) + elif evaluator_type == "lvis": + return LVISEvaluator(dataset_name, output_dir=output_folder) + if len(evaluator_list) == 0: + raise NotImplementedError( + "no Evaluator for the dataset {} with the type {}".format( + dataset_name, evaluator_type + ) + ) + elif len(evaluator_list) == 1: + return evaluator_list[0] + return DatasetEvaluators(evaluator_list) + + @classmethod + def test_with_TTA(cls, cfg, model): + logger = logging.getLogger("detectron2.trainer") + # In the end of training, run an evaluation with TTA + # Only support some R-CNN models. + logger.info("Running inference with test-time augmentation ...") + model = GeneralizedRCNNWithTTA(cfg, model) + evaluators = [ + cls.build_evaluator( + cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") + ) + for name in cfg.DATASETS.TEST + ] + res = cls.test(cfg, model, evaluators) + res = OrderedDict({k + "_TTA": v for k, v in res.items()}) + return res + + + +def setup(args): + """ + Create configs and perform basic setups. + """ + cfg = get_cfg() + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + default_setup(cfg, args) + return cfg + + +def main(args): + # lzy 12.20 add npu device + # torch.npu.set_device() + # finish + + cfg = setup(args) + + if args.eval_only: + model = Trainer.build_model(cfg) + DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( + cfg.MODEL.WEIGHTS, resume=args.resume + ) + res = Trainer.test(cfg, model) + if cfg.TEST.AUG.ENABLED: + res.update(Trainer.test_with_TTA(cfg, model)) + if comm.is_main_process(): + verify_results(cfg, res) + return res + + """ + If you'd like to do anything fancier than the standard training logic, + consider writing your own training loop or subclassing the trainer. + """ + trainer = Trainer(cfg) + trainer.resume_or_load(resume=args.resume) + if cfg.TEST.AUG.ENABLED: + trainer.register_hooks( + [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] + ) + return trainer.train() + + +if __name__ == "__main__": + args = default_argument_parser().parse_args() + print("Command Line Args:", args) + launch( + main, + args.num_gpus, + local_rank=args.local_rank, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=(args,), + ) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Base-RCNN-C4.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Base-RCNN-C4.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fbf34a0ea57a587e09997edd94c4012d69d0b6ad --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Base-RCNN-C4.yaml @@ -0,0 +1,18 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + RPN: + PRE_NMS_TOPK_TEST: 6000 + POST_NMS_TOPK_TEST: 1000 + ROI_HEADS: + NAME: "Res5ROIHeads" +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.02 + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Base-RCNN-DilatedC5.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Base-RCNN-DilatedC5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0d6d16bdaf532f09e4976f0aa240a49e748da27 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Base-RCNN-DilatedC5.yaml @@ -0,0 +1,31 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + RESNETS: + OUT_FEATURES: ["res5"] + RES5_DILATION: 2 + RPN: + IN_FEATURES: ["res5"] + PRE_NMS_TOPK_TEST: 6000 + POST_NMS_TOPK_TEST: 1000 + ROI_HEADS: + NAME: "StandardROIHeads" + IN_FEATURES: ["res5"] + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_FC: 2 + POOLER_RESOLUTION: 7 + ROI_MASK_HEAD: + NAME: "MaskRCNNConvUpsampleHead" + NUM_CONV: 4 + POOLER_RESOLUTION: 14 +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.02 + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Base-RCNN-FPN.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Base-RCNN-FPN.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bbeb7e16c4edd24af959489355cf51de19b31774 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Base-RCNN-FPN.yaml @@ -0,0 +1,46 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + NAME: "build_resnet_fpn_backbone" + RESNETS: + OUT_FEATURES: ["res2", "res3", "res4", "res5"] + FPN: + IN_FEATURES: ["res2", "res3", "res4", "res5"] + ANCHOR_GENERATOR: + SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map + ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) + RPN: + IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] + PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level + PRE_NMS_TOPK_TEST: 1000 # Per FPN level + # Detectron1 uses 2000 proposals per-batch, + # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) + # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. + POST_NMS_TOPK_TRAIN: 1000 + POST_NMS_TOPK_TEST: 1000 + ROI_HEADS: + NAME: "StandardROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_FC: 2 + POOLER_RESOLUTION: 7 + ROI_MASK_HEAD: + NAME: "MaskRCNNConvUpsampleHead" + NUM_CONV: 4 + POOLER_RESOLUTION: 14 +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.02 + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) + MASK_FORMAT: "polygon" # alternative: "bitmask" +VERSION: 2 +AMP: 0 +OPT_LEVEL: O0 +LOSS_SCALE_VALUE: 64 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Base-RetinaNet.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Base-RetinaNet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b45b982bbf84b34d2a6a172ab0a946b1029f7c8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Base-RetinaNet.yaml @@ -0,0 +1,25 @@ +MODEL: + META_ARCHITECTURE: "RetinaNet" + BACKBONE: + NAME: "build_retinanet_resnet_fpn_backbone" + RESNETS: + OUT_FEATURES: ["res3", "res4", "res5"] + ANCHOR_GENERATOR: + SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"] + FPN: + IN_FEATURES: ["res3", "res4", "res5"] + RETINANET: + IOU_THRESHOLDS: [0.4, 0.5] + IOU_LABELS: [0, -1, 1] + SMOOTH_L1_LOSS_BETA: 0.0 +DATASETS: + TRAIN: ("coco_2017_train",) + TEST: ("coco_2017_val",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..773ac10e87c626760d00d831bf664ce9ff073c49 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,17 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + LOAD_PROPOSALS: True + RESNETS: + DEPTH: 50 + PROPOSAL_GENERATOR: + NAME: "PrecomputedProposals" +DATASETS: + TRAIN: ("coco_2017_train",) + PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", ) + TEST: ("coco_2017_val",) + PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) +DATALOADER: + # proposals are part of the dataset_dicts, and take a lot of RAM + NUM_WORKERS: 2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db142cd671c1841b4f64cf130bee7f7954ecdd28 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: False + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bceb6b343618d8cd9a6c414ff9eb86ab31cc230a --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-DilatedC5.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: False + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..46409dbd43afdab25695f124e134c849e952ce28 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml @@ -0,0 +1,13 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +INPUT: + FIX_SHAPE: (1344, 1344) +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: False + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +DATALOADER: + ASPECT_RATIO_GROUPING: False diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f96130105c3ba6ab393e0932870903875f5cb732 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml @@ -0,0 +1,6 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc51bce390a85ee3529ffdcebde05748e1646be0 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0fe96f57febdac5790ea4cec168fa4b97ac4807a --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml @@ -0,0 +1,6 @@ +_BASE_: "../Base-RCNN-DilatedC5.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..33fadeb87d1ef67ab2b55926b9a652ab4ac4a27d --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-DilatedC5.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3262019a1211b910d3b371569199ed1afaacf6a4 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,6 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..41395182bf5c9dd8ab1241c4414068817298d554 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c9b5ab77157baa581d90d9847c045c19ed6ffa3 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml @@ -0,0 +1,13 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + MASK_ON: False + WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4abb1b9a547957aa6afc0b29129e00f89cf98d59 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml @@ -0,0 +1,8 @@ +_BASE_: "../Base-RetinaNet.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a24ce3a9a108a8792e18c8aabfb7b712f0d3725 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml @@ -0,0 +1,5 @@ +_BASE_: "../Base-RetinaNet.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b5412d4a7aef1d6c3f7c1e34f94007de639b833 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml @@ -0,0 +1,8 @@ +_BASE_: "../Base-RetinaNet.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e04821156b0376ba5215d5ce5b7010a36b43e6a1 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml @@ -0,0 +1,10 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + META_ARCHITECTURE: "ProposalNetwork" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 + RPN: + PRE_NMS_TOPK_TEST: 12000 + POST_NMS_TOPK_TEST: 2000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc9c95203b1c3c9cd9bb9876bb8d9a5dd9b31d9a --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "ProposalNetwork" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 + RPN: + POST_NMS_TOPK_TEST: 2000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a94cc45a0f2aaa8c92e14871c553b736545e327 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67b70cf4be8c19f5dc735b6f55a8690698f34b69 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-DilatedC5.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..76e2113b06d5f140dda57ba6450e51d998f7c939 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml @@ -0,0 +1,13 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +INPUT: + FIX_SHAPE: (1344, 1344) +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 +DATALOADER: + ASPECT_RATIO_GROUPING: False diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9aeb4eac38026dbb867e799f9fd3a8d8eb3af80 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml @@ -0,0 +1,6 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38ed867d897dfec839cbcf11a2e2dc8abb92f07c --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b13eefab2a049c48d94d5051c82ceb6dbde40579 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml @@ -0,0 +1,6 @@ +_BASE_: "../Base-RCNN-DilatedC5.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d401016358f967f6619d88b1c9bd5673a1cdeba8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-DilatedC5.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88fdca046d9bc17fffc72084fc3031c22aa79c90 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,8 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +INPUT: + FIX_SHAPE: (1344, 1344) +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bec680ee17a474fefe527b7b79d26266e75c09f0 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml @@ -0,0 +1,12 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + RPN: + BBOX_REG_LOSS_TYPE: "giou" + BBOX_REG_LOSS_WEIGHT: 2.0 + ROI_BOX_HEAD: + BBOX_REG_LOSS_TYPE: "giou" + BBOX_REG_LOSS_WEIGHT: 10.0 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be7d06b8e0f032ee7fcaabd7c122158518489fd2 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d14c63f74383bfc308750f51d51344398b02a239 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml @@ -0,0 +1,13 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + MASK_ON: True + WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e03944a42d2e497da5ceca17c8fda797dac3f82 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml @@ -0,0 +1,15 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + KEYPOINT_ON: True + ROI_HEADS: + NUM_CLASSES: 1 + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 0.5 # Keypoint AP degrades (though box AP improves) when using plain L1 loss + RPN: + # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2. + # 1000 proposals per-image is found to hurt box AP. + # Therefore we increase it to 1500 per-image. + POST_NMS_TOPK_TRAIN: 1500 +DATASETS: + TRAIN: ("keypoints_coco_2017_train",) + TEST: ("keypoints_coco_2017_val",) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9309535c57a1aa7d23297aac80a9bd78a6c79fcc --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml @@ -0,0 +1,8 @@ +_BASE_: "Base-Keypoint-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7bf85cf745b53b3e7ab28fe94b7f4f9e7fe6e335 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,5 @@ +_BASE_: "Base-Keypoint-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a07f243f650a497b9372501e3face75194cf0941 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml @@ -0,0 +1,8 @@ +_BASE_: "Base-Keypoint-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4bfa20a98c0a65c6bd60e93b07e8f4b7d92a867 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml @@ -0,0 +1,12 @@ +_BASE_: "Base-Keypoint-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f00d54b760c2b9271c75643e0a1ab1ffc0d9543a --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "PanopticFPN" + MASK_ON: True + SEM_SEG_HEAD: + LOSS_WEIGHT: 0.5 +DATASETS: + TRAIN: ("coco_2017_train_panoptic_separated",) + TEST: ("coco_2017_val_panoptic_separated",) +DATALOADER: + FILTER_EMPTY_ANNOTATIONS: False diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e01f6fb31e9b00b1857b7de3b5074184d1f4a21 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml @@ -0,0 +1,8 @@ +_BASE_: "Base-Panoptic-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6afa2c1cc92495309ed1553a17359fe5d7d6566e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml @@ -0,0 +1,5 @@ +_BASE_: "Base-Panoptic-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b956b3f673e78649184fe2c50e2700b3f1f14794 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml @@ -0,0 +1,8 @@ +_BASE_: "Base-Panoptic-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a7aaeb961581ed9492c4cfe5a69a1eb60495b3e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml @@ -0,0 +1,27 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + # For better, more stable performance initialize from COCO + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" + MASK_ON: True + ROI_HEADS: + NUM_CLASSES: 8 +# This is similar to the setting used in Mask R-CNN paper, Appendix A +# But there are some differences, e.g., we did not initialize the output +# layer using the corresponding classes from COCO +INPUT: + MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) + MIN_SIZE_TRAIN_SAMPLING: "choice" + MIN_SIZE_TEST: 1024 + MAX_SIZE_TRAIN: 2048 + MAX_SIZE_TEST: 2048 +DATASETS: + TRAIN: ("cityscapes_fine_instance_seg_train",) + TEST: ("cityscapes_fine_instance_seg_val",) +SOLVER: + BASE_LR: 0.01 + STEPS: (18000,) + MAX_ITER: 24000 + IMS_PER_BATCH: 8 +TEST: + EVAL_PERIOD: 8000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Detectron1-Comparisons/README.md b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Detectron1-Comparisons/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a90ed9e433a00b8b9f43961d7a2696d5b9013127 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Detectron1-Comparisons/README.md @@ -0,0 +1,83 @@ + +Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron. + +The differences in implementation details are shared in +[Compatibility with Other Libraries](../../docs/notes/compatibility.md). + +The differences in model zoo's experimental settings include: +* Use scale augmentation during training. This improves AP with lower training cost. +* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may + affect other AP. +* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP. +* Use `ROIAlignV2`. This does not significantly affect AP. + +In this directory, we provide a few configs that __do not__ have the above changes. +They mimic Detectron's behavior as close as possible, +and provide a fair comparison of accuracy and speed against Detectron. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
kp.
AP
model iddownload
Faster R-CNN1x0.2190.0383.136.9137781054model | metrics
Keypoint R-CNN1x0.3130.0715.053.164.2137781195model | metrics
Mask R-CNN1x0.2730.0433.437.834.9137781281model | metrics
+ +## Comparisons: + +* Faster R-CNN: Detectron's AP is 36.7, similar to ours. +* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's + [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be + compensated back by some parameter tuning. +* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation. + +For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html). diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ce77f137fa2c4e5254a62b58c18b8b76096f2aa --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml @@ -0,0 +1,17 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 + # Detectron1 uses smooth L1 loss with some magic beta values. + # The defaults are changed to L1 loss in Detectron2. + RPN: + SMOOTH_L1_BETA: 0.1111 + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" +INPUT: + # no scale augmentation + MIN_SIZE_TRAIN: (800, ) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aacf868ba5290c752031c130a2081af48afc0808 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,27 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + KEYPOINT_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NUM_CLASSES: 1 + ROI_KEYPOINT_HEAD: + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" + # Detectron1 uses smooth L1 loss with some magic beta values. + # The defaults are changed to L1 loss in Detectron2. + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" + RPN: + SMOOTH_L1_BETA: 0.1111 + # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2 + # 1000 proposals per-image is found to hurt box AP. + # Therefore we increase it to 1500 per-image. + POST_NMS_TOPK_TRAIN: 1500 +DATASETS: + TRAIN: ("keypoints_coco_2017_train",) + TEST: ("keypoints_coco_2017_val",) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ea86a8d8e2cd3e51cbc7311b0d00710c07d01f6 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml @@ -0,0 +1,20 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + # Detectron1 uses smooth L1 loss with some magic beta values. + # The defaults are changed to L1 loss in Detectron2. + RPN: + SMOOTH_L1_BETA: 0.1111 + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" + ROI_MASK_HEAD: + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" +INPUT: + # no scale augmentation + MIN_SIZE_TRAIN: (800, ) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0c3a1bbc0a09e1384de522f30c443ba1e36fafa --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml @@ -0,0 +1,19 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 1230 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v0.5_train",) + TEST: ("lvis_v0.5_val",) +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..64b4caa4ef2b284782367ea702e1ae6653472630 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,19 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NUM_CLASSES: 1230 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v0.5_train",) + TEST: ("lvis_v0.5_val",) +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c8b822c6c006ba642f4caf9b55e7983f6797427a --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml @@ -0,0 +1,23 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + MASK_ON: True + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 1230 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v0.5_train",) + TEST: ("lvis_v0.5_val",) +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca4dd97144561276ecaabbb6c254e3a7737ac157 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml @@ -0,0 +1,22 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + MASK_ON: True + RESNETS: + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 1203 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v1_train",) + TEST: ("lvis_v1_val",) +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +SOLVER: + STEPS: (120000, 160000) + MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f313295ee5f0d553d394ce2efe003810c79af47d --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,22 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NUM_CLASSES: 1203 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v1_train",) + TEST: ("lvis_v1_val",) +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +SOLVER: + STEPS: (120000, 160000) + MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6528f7c31c8cfbf139c14fd0cae598592d8e898 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml @@ -0,0 +1,26 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" + PIXEL_STD: [57.375, 57.120, 58.395] + MASK_ON: True + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 101 + ROI_HEADS: + NUM_CLASSES: 1203 + SCORE_THRESH_TEST: 0.0001 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +DATASETS: + TRAIN: ("lvis_v1_train",) + TEST: ("lvis_v1_val",) +SOLVER: + STEPS: (120000, 160000) + MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs +TEST: + DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 +DATALOADER: + SAMPLER_TRAIN: "RepeatFactorTrainingSampler" + REPEAT_THRESHOLD: 0.001 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..abb33b618932e94b66239945ac892f4c84a6e8f8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml @@ -0,0 +1,12 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NAME: CascadeROIHeads + ROI_BOX_HEAD: + CLS_AGNOSTIC_BBOX_REG: True + RPN: + POST_NMS_TOPK_TRAIN: 2000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2201ad5c46ded91ccfa47b7698a521625c5e447 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml @@ -0,0 +1,15 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + NAME: CascadeROIHeads + ROI_BOX_HEAD: + CLS_AGNOSTIC_BBOX_REG: True + RPN: + POST_NMS_TOPK_TRAIN: 2000 +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc117f6b5e3e51558ec2f01b73c5365622e5ce25 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml @@ -0,0 +1,36 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + MASK_ON: True + WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k" + RESNETS: + STRIDE_IN_1X1: False # this is a C2 model + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 + DEPTH: 152 + DEFORM_ON_PER_STAGE: [False, True, True, True] + ROI_HEADS: + NAME: "CascadeROIHeads" + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_CONV: 4 + NUM_FC: 1 + NORM: "GN" + CLS_AGNOSTIC_BBOX_REG: True + ROI_MASK_HEAD: + NUM_CONV: 8 + NORM: "GN" + RPN: + POST_NMS_TOPK_TRAIN: 2000 +SOLVER: + IMS_PER_BATCH: 128 + STEPS: (35000, 45000) + MAX_ITER: 50000 + BASE_LR: 0.16 +INPUT: + MIN_SIZE_TRAIN: (640, 864) + MIN_SIZE_TRAIN_SAMPLING: "range" + MAX_SIZE_TRAIN: 1440 + CROP: + ENABLED: True +TEST: + EVAL_PERIOD: 2500 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4c3b767ff473bbab7225cc8a4a92608543d78246 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml @@ -0,0 +1,10 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + ROI_BOX_HEAD: + CLS_AGNOSTIC_BBOX_REG: True + ROI_MASK_HEAD: + CLS_AGNOSTIC_MASK: True diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04ff988d073ef9169ee4ca2cbce0d6f030c15232 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml @@ -0,0 +1,8 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 + DEFORM_MODULATED: False diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..68c0ca58d7df97ca728c339da0ca9828fe6be318 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 + DEFORM_MODULATED: False +SOLVER: + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74d274e5a529b5a8afe186940868f9d48c6112b3 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml @@ -0,0 +1,21 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN" + MASK_ON: True + RESNETS: + DEPTH: 50 + NORM: "GN" + STRIDE_IN_1X1: False + FPN: + NORM: "GN" + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_CONV: 4 + NUM_FC: 1 + NORM: "GN" + ROI_MASK_HEAD: + NORM: "GN" +SOLVER: + # 3x schedule + STEPS: (210000, 250000) + MAX_ITER: 270000 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11ebb076ba529f26c71a0d972e96ca4c2d6a830b --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml @@ -0,0 +1,24 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + NORM: "SyncBN" + STRIDE_IN_1X1: True + FPN: + NORM: "SyncBN" + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_CONV: 4 + NUM_FC: 1 + NORM: "SyncBN" + ROI_MASK_HEAD: + NORM: "SyncBN" +SOLVER: + # 3x schedule + STEPS: (210000, 250000) + MAX_ITER: 270000 +TEST: + PRECISE_BN: + ENABLED: True diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34016cea3ca9d7fb69ef4fe01d6b47ee8690a13b --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml @@ -0,0 +1,26 @@ +# A large PanopticFPN for demo purposes. +# Use GN on backbone to support semantic seg. +# Use Cascade + Deform Conv to improve localization. +_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml" +MODEL: + WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN" + RESNETS: + DEPTH: 101 + NORM: "GN" + DEFORM_ON_PER_STAGE: [False, True, True, True] + STRIDE_IN_1X1: False + FPN: + NORM: "GN" + ROI_HEADS: + NAME: CascadeROIHeads + ROI_BOX_HEAD: + CLS_AGNOSTIC_BBOX_REG: True + ROI_MASK_HEAD: + NORM: "GN" + RPN: + POST_NMS_TOPK_TRAIN: 2000 +SOLVER: + STEPS: (105000, 125000) + MAX_ITER: 135000 + IMS_PER_BATCH: 32 + BASE_LR: 0.04 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3400288cde242fcf66eef7f63b5a9165ca663c5 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml @@ -0,0 +1,13 @@ +_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" +MODEL: + # Train from random initialization. + WEIGHTS: "" + # It makes sense to divide by STD when training from scratch + # But it seems to make no difference on the results and C2's models didn't do this. + # So we keep things consistent with C2. + # PIXEL_STD: [57.375, 57.12, 58.395] + MASK_ON: True + BACKBONE: + FREEZE_AT: 0 +# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 +# to learn what you need for training from scratch. diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d90c9ff0ef4573252ee165b4c958ec5f74178176 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml @@ -0,0 +1,19 @@ +_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" +MODEL: + PIXEL_STD: [57.375, 57.12, 58.395] + WEIGHTS: "" + MASK_ON: True + RESNETS: + STRIDE_IN_1X1: False + BACKBONE: + FREEZE_AT: 0 +SOLVER: + # 9x schedule + IMS_PER_BATCH: 64 # 4x the standard + STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k + MAX_ITER: 202500 # 90k * 9 / 4 + BASE_LR: 0.08 +TEST: + EVAL_PERIOD: 2500 +# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 +# to learn what you need for training from scratch. diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60d4e42330e396a1901437df8e17b262d5ad547a --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml @@ -0,0 +1,19 @@ +_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml" +MODEL: + PIXEL_STD: [57.375, 57.12, 58.395] + WEIGHTS: "" + MASK_ON: True + RESNETS: + STRIDE_IN_1X1: False + BACKBONE: + FREEZE_AT: 0 +SOLVER: + # 9x schedule + IMS_PER_BATCH: 64 # 4x the standard + STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k + MAX_ITER: 202500 # 90k * 9 / 4 + BASE_LR: 0.08 +TEST: + EVAL_PERIOD: 2500 +# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 +# to learn what you need for training from scratch. diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac256e1372770ab3d9ae522c962de0fd0dbceeb5 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "SemanticSegmentor" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +DATASETS: + TRAIN: ("coco_2017_train_panoptic_stuffonly",) + TEST: ("coco_2017_val_panoptic_stuffonly",) +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea2a6baaebd1a186db18f2904430ffb25901898e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml @@ -0,0 +1,18 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 + ROI_HEADS: + NUM_CLASSES: 20 +INPUT: + MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) + MIN_SIZE_TEST: 800 +DATASETS: + TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') + TEST: ('voc_2007_test',) +SOLVER: + STEPS: (12000, 16000) + MAX_ITER: 18000 # 17.4 epochs + WARMUP_ITERS: 100 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e554cab18a358a27b630c1ab0c2359666b0e1514 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml @@ -0,0 +1,18 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 + ROI_HEADS: + NUM_CLASSES: 20 +INPUT: + MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) + MIN_SIZE_TEST: 800 +DATASETS: + TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') + TEST: ('voc_2007_test',) +SOLVER: + STEPS: (12000, 16000) + MAX_ITER: 18000 # 17.4 epochs + WARMUP_ITERS: 100 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/README.md b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a278199b8557a1e2fb341fe6757786a6cecb82b3 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/README.md @@ -0,0 +1 @@ +These are quick configs for performance or accuracy regression tracking purposes. diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc5a4116cb096278823049c1f823e99f8e16e97e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" +MODEL: + WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP", 43.87, 0.02]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e41a0fe7ffe9c3531741df49e546aa45cfe4fdee --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a2f37e5e2cc2a9e195e13703e9930e67e0f9a896 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52fc0ec03c8b87ab2be1dda97bec1e8c93e6bb5c --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml @@ -0,0 +1,15 @@ +_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" +DATASETS: + TRAIN: ("coco_2017_val_100",) + PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) + TEST: ("coco_2017_val_100",) + PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14cf2aa82aec52ad44e28ead0665dad811d55457 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl" +DATASETS: + TEST: ("keypoints_coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc09034bdd3db9d3e0dc62a017a3883dbe79c649 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml @@ -0,0 +1,14 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + KEYPOINT_ON: True +DATASETS: + TRAIN: ("keypoints_coco_2017_val_100",) + TEST: ("keypoints_coco_2017_val_100",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b92392f1c4457033ae4c87a521e339fe9e184ce --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml @@ -0,0 +1,30 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + KEYPOINT_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + NUM_CLASSES: 1 + ROI_KEYPOINT_HEAD: + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False + LOSS_WEIGHT: 4.0 + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss + RPN: + SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss +DATASETS: + TRAIN: ("keypoints_coco_2017_val",) + TEST: ("keypoints_coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +SOLVER: + WARMUP_FACTOR: 0.33333333 + WARMUP_ITERS: 100 + STEPS: (5500, 5800) + MAX_ITER: 6000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9bd962878fea64035887c48981beeb8d41bfdbd0 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml @@ -0,0 +1,28 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + KEYPOINT_ON: True + RESNETS: + DEPTH: 50 + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + NUM_CLASSES: 1 + ROI_KEYPOINT_HEAD: + POOLER_RESOLUTION: 14 + POOLER_SAMPLING_RATIO: 2 + ROI_BOX_HEAD: + SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss + RPN: + SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss +DATASETS: + TRAIN: ("keypoints_coco_2017_val",) + TEST: ("keypoints_coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +SOLVER: + WARMUP_FACTOR: 0.33333333 + WARMUP_ITERS: 100 + STEPS: (5500, 5800) + MAX_ITER: 6000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab6e69812b94ea7e071f29d9a6937d5c70805b5b --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml @@ -0,0 +1,18 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.001 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 + CLIP_GRADIENTS: + ENABLED: True + CLIP_TYPE: "value" + CLIP_VALUE: 1.0 +DATALOADER: + NUM_WORKERS: 2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b2d5b7ff87e069f8c774a230bdfd47b8c12d18a3 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c4f1214efa520944fd941daec082ad45c164a23 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml @@ -0,0 +1,14 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.001 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f68dd8f96c7896b5fc95d694a399f2ce417c1deb --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml @@ -0,0 +1,22 @@ +_BASE_: "../Base-RCNN-C4.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (600,) + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1000 +SOLVER: + IMS_PER_BATCH: 8 # base uses 16 + WARMUP_FACTOR: 0.33333 + WARMUP_ITERS: 100 + STEPS: (11000, 11600) + MAX_ITER: 12000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3ce6cf922ae07fba5b5e01edbac19bf58a8e9dd --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5454bfd95cc37749c50aec7866f32d9a80ca2b7 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,10 @@ +_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP", 42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]] + AUG: + ENABLED: True + MIN_SIZES: (700, 800) # to save some time diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6dbfcde0bf837990634d419a6dda1e2909c3cd7f --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml @@ -0,0 +1,14 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52f78762bda23331c97afd523cf98a5c118b113e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml @@ -0,0 +1,6 @@ +_BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml" +MODEL: + ROI_BOX_HEAD: + TRAIN_ON_PRED_BOXES: True +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aadae4ce898761e1e40e5af65a9e5ea01053b936 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml @@ -0,0 +1,21 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + MASK_ON: True +DATASETS: + TRAIN: ("coco_2017_val",) + TEST: ("coco_2017_val",) +INPUT: + MIN_SIZE_TRAIN: (600,) + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 800 + MAX_SIZE_TEST: 1000 +SOLVER: + WARMUP_FACTOR: 0.3333333 + WARMUP_ITERS: 100 + STEPS: (5500, 5800) + MAX_ITER: 6000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70874e3a92c9034d75cbbebb145b61084ba15e42 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl" +DATASETS: + TEST: ("coco_2017_val_100_panoptic_separated",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7cdee7bfcf6dc75dda52602a0d9177ad0a9cc6ed --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml @@ -0,0 +1,19 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "PanopticFPN" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + SEM_SEG_HEAD: + LOSS_WEIGHT: 0.5 +DATASETS: + TRAIN: ("coco_2017_val_100_panoptic_separated",) + TEST: ("coco_2017_val_100_panoptic_separated",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 1 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05816316f851690e60ee54b852b6f49ede73c886 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml @@ -0,0 +1,20 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "PanopticFPN" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: True + RESNETS: + DEPTH: 50 + SEM_SEG_HEAD: + LOSS_WEIGHT: 0.5 +DATASETS: + TRAIN: ("coco_2017_val_panoptic_separated",) + TEST: ("coco_2017_val_panoptic_separated",) +SOLVER: + BASE_LR: 0.01 + WARMUP_FACTOR: 0.001 + WARMUP_ITERS: 500 + STEPS: (5500,) + MAX_ITER: 7000 +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 38.73, 0.7], ["sem_seg", "mIoU", 64.73, 1.2], ["panoptic_seg", "PQ", 48.13, 0.8]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb666c1a6b3e351227046bc9c2af8799408858e8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d95c1f614296716374686b22055a587ccd052b9 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml @@ -0,0 +1,13 @@ +_BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c7c3f908a9e80e98b2d25b6d384a60acaba9d4f8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl" +DATASETS: + TEST: ("coco_2017_val_100",) +TEST: + EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..402d432477507dc36f04c4a9777cb80fe06b2809 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml @@ -0,0 +1,13 @@ +_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" +DATASETS: + TRAIN: ("coco_2017_val_100",) + TEST: ("coco_2017_val_100",) +SOLVER: + STEPS: (30,) + MAX_ITER: 40 + BASE_LR: 0.005 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bca74987d5218736983617883e0fe37f79d219b7 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,10 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "SemanticSegmentor" + WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl" + RESNETS: + DEPTH: 50 +DATASETS: + TEST: ("coco_2017_val_100_panoptic_stuffonly",) +TEST: + EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14ab606f219b462fe37fcc7d5fbdbe65cb5c2642 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml @@ -0,0 +1,18 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "SemanticSegmentor" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +DATASETS: + TRAIN: ("coco_2017_val_100_panoptic_stuffonly",) + TEST: ("coco_2017_val_100_panoptic_stuffonly",) +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +SOLVER: + BASE_LR: 0.005 + STEPS: (30,) + MAX_ITER: 40 + IMS_PER_BATCH: 4 +DATALOADER: + NUM_WORKERS: 2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f78d775889b11e9e76743de5ddb8139198edf61 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml @@ -0,0 +1,20 @@ +_BASE_: "../Base-RCNN-FPN.yaml" +MODEL: + META_ARCHITECTURE: "SemanticSegmentor" + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +DATASETS: + TRAIN: ("coco_2017_val_panoptic_stuffonly",) + TEST: ("coco_2017_val_panoptic_stuffonly",) +SOLVER: + BASE_LR: 0.01 + WARMUP_FACTOR: 0.001 + WARMUP_ITERS: 300 + STEPS: (5500,) + MAX_ITER: 7000 +TEST: + EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]] +INPUT: + # no scale augmentation + MIN_SIZE_TRAIN: (800, ) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/datasets/README.md b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/datasets/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b888ee9714697fcbc03aa6b08f920edd8924fec3 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/datasets/README.md @@ -0,0 +1,115 @@ +# Use Builtin Datasets + +A dataset can be used by accessing [DatasetCatalog](https://detectron2.readthedocs.io/modules/data.html#detectron2.data.DatasetCatalog) +for its data, or [MetadataCatalog](https://detectron2.readthedocs.io/modules/data.html#detectron2.data.MetadataCatalog) for its metadata (class names, etc). +This document explains how to setup the builtin datasets so they can be used by the above APIs. +[Use Custom Datasets](https://detectron2.readthedocs.io/tutorials/datasets.html) gives a deeper dive on how to use `DatasetCatalog` and `MetadataCatalog`, +and how to add new datasets to them. + +Detectron2 has builtin support for a few datasets. +The datasets are assumed to exist in a directory specified by the environment variable +`DETECTRON2_DATASETS`. +Under this directory, detectron2 will look for datasets in the structure described below, if needed. +``` +$DETECTRON2_DATASETS/ + coco/ + lvis/ + cityscapes/ + VOC20{07,12}/ +``` + +You can set the location for builtin datasets by `export DETECTRON2_DATASETS=/path/to/datasets`. +If left unset, the default is `./datasets` relative to your current working directory. + +The [model zoo](https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md) +contains configs and models that use these builtin datasets. + +## Expected dataset structure for COCO instance/keypoint detection: + +``` +coco/ + annotations/ + instances_{train,val}2017.json + person_keypoints_{train,val}2017.json + {train,val}2017/ + # image files that are mentioned in the corresponding json +``` + +You can use the 2014 version of the dataset as well. + +Some of the builtin tests (`dev/run_*_tests.sh`) uses a tiny version of the COCO dataset, +which you can download with `./prepare_for_tests.sh`. + +## Expected dataset structure for PanopticFPN: + +``` +coco/ + annotations/ + panoptic_{train,val}2017.json + panoptic_{train,val}2017/ # png annotations + panoptic_stuff_{train,val}2017/ # generated by the script mentioned below +``` + +Install panopticapi by: +``` +pip install git+https://github.com/cocodataset/panopticapi.git +``` +Then, run `python prepare_panoptic_fpn.py`, to extract semantic annotations from panoptic annotations. + +## Expected dataset structure for LVIS instance segmentation: +``` +coco/ + {train,val,test}2017/ +lvis/ + lvis_v0.5_{train,val}.json + lvis_v0.5_image_info_test.json + lvis_v1_{train,val}.json + lvis_v1_image_info_test{,_challenge}.json +``` + +Install lvis-api by: +``` +pip install git+https://github.com/lvis-dataset/lvis-api.git +``` + +To evaluate models trained on the COCO dataset using LVIS annotations, +run `python prepare_cocofied_lvis.py` to prepare "cocofied" LVIS annotations. + +## Expected dataset structure for cityscapes: +``` +cityscapes/ + gtFine/ + train/ + aachen/ + color.png, instanceIds.png, labelIds.png, polygons.json, + labelTrainIds.png + ... + val/ + test/ + leftImg8bit/ + train/ + val/ + test/ +``` +Install cityscapes scripts by: +``` +pip install git+https://github.com/mcordts/cityscapesScripts.git +``` + +Note: to create labelTrainIds.png, first prepare the above structure, then run cityscapesescript with: +``` +CITYSCAPES_DATASET=/path/to/abovementioned/cityscapes python cityscapesscripts/preparation/createTrainIdLabelImgs.py +``` +These files are not needed for instance segmentation. + +## Expected dataset structure for Pascal VOC: +``` +VOC20{07,12}/ + Annotations/ + ImageSets/ + Main/ + trainval.txt + test.txt + # train.txt or val.txt, if you use these splits + JPEGImages/ +``` diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/datasets/prepare_cocofied_lvis.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/datasets/prepare_cocofied_lvis.py new file mode 100644 index 0000000000000000000000000000000000000000..d66279fc80541909a44b274485bfe2efeb988376 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/datasets/prepare_cocofied_lvis.py @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import json +import os +from collections import defaultdict + +# This mapping is extracted from the official LVIS mapping: +# https://github.com/lvis-dataset/lvis-api/blob/master/data/coco_to_synset.json +COCO_SYNSET_CATEGORIES = [ + {"synset": "person.n.01", "coco_cat_id": 1}, + {"synset": "bicycle.n.01", "coco_cat_id": 2}, + {"synset": "car.n.01", "coco_cat_id": 3}, + {"synset": "motorcycle.n.01", "coco_cat_id": 4}, + {"synset": "airplane.n.01", "coco_cat_id": 5}, + {"synset": "bus.n.01", "coco_cat_id": 6}, + {"synset": "train.n.01", "coco_cat_id": 7}, + {"synset": "truck.n.01", "coco_cat_id": 8}, + {"synset": "boat.n.01", "coco_cat_id": 9}, + {"synset": "traffic_light.n.01", "coco_cat_id": 10}, + {"synset": "fireplug.n.01", "coco_cat_id": 11}, + {"synset": "stop_sign.n.01", "coco_cat_id": 13}, + {"synset": "parking_meter.n.01", "coco_cat_id": 14}, + {"synset": "bench.n.01", "coco_cat_id": 15}, + {"synset": "bird.n.01", "coco_cat_id": 16}, + {"synset": "cat.n.01", "coco_cat_id": 17}, + {"synset": "dog.n.01", "coco_cat_id": 18}, + {"synset": "horse.n.01", "coco_cat_id": 19}, + {"synset": "sheep.n.01", "coco_cat_id": 20}, + {"synset": "beef.n.01", "coco_cat_id": 21}, + {"synset": "elephant.n.01", "coco_cat_id": 22}, + {"synset": "bear.n.01", "coco_cat_id": 23}, + {"synset": "zebra.n.01", "coco_cat_id": 24}, + {"synset": "giraffe.n.01", "coco_cat_id": 25}, + {"synset": "backpack.n.01", "coco_cat_id": 27}, + {"synset": "umbrella.n.01", "coco_cat_id": 28}, + {"synset": "bag.n.04", "coco_cat_id": 31}, + {"synset": "necktie.n.01", "coco_cat_id": 32}, + {"synset": "bag.n.06", "coco_cat_id": 33}, + {"synset": "frisbee.n.01", "coco_cat_id": 34}, + {"synset": "ski.n.01", "coco_cat_id": 35}, + {"synset": "snowboard.n.01", "coco_cat_id": 36}, + {"synset": "ball.n.06", "coco_cat_id": 37}, + {"synset": "kite.n.03", "coco_cat_id": 38}, + {"synset": "baseball_bat.n.01", "coco_cat_id": 39}, + {"synset": "baseball_glove.n.01", "coco_cat_id": 40}, + {"synset": "skateboard.n.01", "coco_cat_id": 41}, + {"synset": "surfboard.n.01", "coco_cat_id": 42}, + {"synset": "tennis_racket.n.01", "coco_cat_id": 43}, + {"synset": "bottle.n.01", "coco_cat_id": 44}, + {"synset": "wineglass.n.01", "coco_cat_id": 46}, + {"synset": "cup.n.01", "coco_cat_id": 47}, + {"synset": "fork.n.01", "coco_cat_id": 48}, + {"synset": "knife.n.01", "coco_cat_id": 49}, + {"synset": "spoon.n.01", "coco_cat_id": 50}, + {"synset": "bowl.n.03", "coco_cat_id": 51}, + {"synset": "banana.n.02", "coco_cat_id": 52}, + {"synset": "apple.n.01", "coco_cat_id": 53}, + {"synset": "sandwich.n.01", "coco_cat_id": 54}, + {"synset": "orange.n.01", "coco_cat_id": 55}, + {"synset": "broccoli.n.01", "coco_cat_id": 56}, + {"synset": "carrot.n.01", "coco_cat_id": 57}, + {"synset": "frank.n.02", "coco_cat_id": 58}, + {"synset": "pizza.n.01", "coco_cat_id": 59}, + {"synset": "doughnut.n.02", "coco_cat_id": 60}, + {"synset": "cake.n.03", "coco_cat_id": 61}, + {"synset": "chair.n.01", "coco_cat_id": 62}, + {"synset": "sofa.n.01", "coco_cat_id": 63}, + {"synset": "pot.n.04", "coco_cat_id": 64}, + {"synset": "bed.n.01", "coco_cat_id": 65}, + {"synset": "dining_table.n.01", "coco_cat_id": 67}, + {"synset": "toilet.n.02", "coco_cat_id": 70}, + {"synset": "television_receiver.n.01", "coco_cat_id": 72}, + {"synset": "laptop.n.01", "coco_cat_id": 73}, + {"synset": "mouse.n.04", "coco_cat_id": 74}, + {"synset": "remote_control.n.01", "coco_cat_id": 75}, + {"synset": "computer_keyboard.n.01", "coco_cat_id": 76}, + {"synset": "cellular_telephone.n.01", "coco_cat_id": 77}, + {"synset": "microwave.n.02", "coco_cat_id": 78}, + {"synset": "oven.n.01", "coco_cat_id": 79}, + {"synset": "toaster.n.02", "coco_cat_id": 80}, + {"synset": "sink.n.01", "coco_cat_id": 81}, + {"synset": "electric_refrigerator.n.01", "coco_cat_id": 82}, + {"synset": "book.n.01", "coco_cat_id": 84}, + {"synset": "clock.n.01", "coco_cat_id": 85}, + {"synset": "vase.n.01", "coco_cat_id": 86}, + {"synset": "scissors.n.01", "coco_cat_id": 87}, + {"synset": "teddy.n.01", "coco_cat_id": 88}, + {"synset": "hand_blower.n.01", "coco_cat_id": 89}, + {"synset": "toothbrush.n.01", "coco_cat_id": 90}, +] + + +def cocofy_lvis(input_filename, output_filename): + """ + Filter LVIS instance segmentation annotations to remove all categories that are not included in + COCO. The new json files can be used to evaluate COCO AP using `lvis-api`. The category ids in + the output json are the incontiguous COCO dataset ids. + + Args: + input_filename (str): path to the LVIS json file. + output_filename (str): path to the COCOfied json file. + """ + + with open(input_filename, "r") as f: + lvis_json = json.load(f) + + lvis_annos = lvis_json.pop("annotations") + cocofied_lvis = copy.deepcopy(lvis_json) + lvis_json["annotations"] = lvis_annos + + # Mapping from lvis cat id to coco cat id via synset + lvis_cat_id_to_synset = {cat["id"]: cat["synset"] for cat in lvis_json["categories"]} + synset_to_coco_cat_id = {x["synset"]: x["coco_cat_id"] for x in COCO_SYNSET_CATEGORIES} + # Synsets that we will keep in the dataset + synsets_to_keep = set(synset_to_coco_cat_id.keys()) + coco_cat_id_with_instances = defaultdict(int) + + new_annos = [] + ann_id = 1 + for ann in lvis_annos: + lvis_cat_id = ann["category_id"] + synset = lvis_cat_id_to_synset[lvis_cat_id] + if synset not in synsets_to_keep: + continue + coco_cat_id = synset_to_coco_cat_id[synset] + new_ann = copy.deepcopy(ann) + new_ann["category_id"] = coco_cat_id + new_ann["id"] = ann_id + ann_id += 1 + new_annos.append(new_ann) + coco_cat_id_with_instances[coco_cat_id] += 1 + cocofied_lvis["annotations"] = new_annos + + for image in cocofied_lvis["images"]: + for key in ["not_exhaustive_category_ids", "neg_category_ids"]: + new_category_list = [] + for lvis_cat_id in image[key]: + synset = lvis_cat_id_to_synset[lvis_cat_id] + if synset not in synsets_to_keep: + continue + coco_cat_id = synset_to_coco_cat_id[synset] + new_category_list.append(coco_cat_id) + coco_cat_id_with_instances[coco_cat_id] += 1 + image[key] = new_category_list + + coco_cat_id_with_instances = set(coco_cat_id_with_instances.keys()) + + new_categories = [] + for cat in lvis_json["categories"]: + synset = cat["synset"] + if synset not in synsets_to_keep: + continue + coco_cat_id = synset_to_coco_cat_id[synset] + if coco_cat_id not in coco_cat_id_with_instances: + continue + new_cat = copy.deepcopy(cat) + new_cat["id"] = coco_cat_id + new_categories.append(new_cat) + cocofied_lvis["categories"] = new_categories + + with open(output_filename, "w") as f: + json.dump(cocofied_lvis, f) + print("{} is COCOfied and stored in {}.".format(input_filename, output_filename)) + + +if __name__ == "__main__": + dataset_dir = os.path.join(os.getenv("DETECTRON2_DATASETS", "datasets"), "lvis") + for s in ["lvis_v0.5_train", "lvis_v0.5_val"]: + print("Start COCOfing {}.".format(s)) + cocofy_lvis( + os.path.join(dataset_dir, "{}.json".format(s)), + os.path.join(dataset_dir, "{}_cocofied.json".format(s)), + ) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/datasets/prepare_for_tests.sh b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/datasets/prepare_for_tests.sh new file mode 100644 index 0000000000000000000000000000000000000000..d59b5643c95095921863dddd2f1e4d9be28e06ee --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/datasets/prepare_for_tests.sh @@ -0,0 +1,22 @@ +#!/bin/bash -e +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +# Download some files needed for running tests. + +cd "${0%/*}" + +BASE=https://dl.fbaipublicfiles.com/detectron2 +mkdir -p coco/annotations + +for anno in instances_val2017_100 \ + person_keypoints_val2017_100 \ + instances_minival2014_100 \ + person_keypoints_minival2014_100; do + + dest=coco/annotations/$anno.json + [[ -s $dest ]] && { + echo "$dest exists. Skipping ..." + } || { + wget $BASE/annotations/coco/$anno.json -O $dest + } +done diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/datasets/prepare_panoptic_fpn.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/datasets/prepare_panoptic_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..7cf1b2d436c9902d3ef4ce85ac29f337ebb8ab90 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/datasets/prepare_panoptic_fpn.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import json +import multiprocessing as mp +import numpy as np +import os +import time +from fvcore.common.download import download +from PIL import Image + +from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES + +from panopticapi.utils import rgb2id + + +def _process_panoptic_to_semantic(input_panoptic, output_semantic, segments, id_map): + panoptic = np.asarray(Image.open(input_panoptic), dtype=np.uint32) + panoptic = rgb2id(panoptic) + output = np.zeros_like(panoptic, dtype=np.uint8) + 255 + for seg in segments: + cat_id = seg["category_id"] + new_cat_id = id_map[cat_id] + output[panoptic == seg["id"]] = new_cat_id + Image.fromarray(output).save(output_semantic) + + +def separate_coco_semantic_from_panoptic(panoptic_json, panoptic_root, sem_seg_root, categories): + """ + Create semantic segmentation annotations from panoptic segmentation + annotations, to be used by PanopticFPN. + + It maps all thing categories to class 0, and maps all unlabeled pixels to class 255. + It maps all stuff categories to contiguous ids starting from 1. + + Args: + panoptic_json (str): path to the panoptic json file, in COCO's format. + panoptic_root (str): a directory with panoptic annotation files, in COCO's format. + sem_seg_root (str): a directory to output semantic annotation files + categories (list[dict]): category metadata. Each dict needs to have: + "id": corresponds to the "category_id" in the json annotations + "isthing": 0 or 1 + """ + os.makedirs(sem_seg_root, exist_ok=True) + + stuff_ids = [k["id"] for k in categories if k["isthing"] == 0] + thing_ids = [k["id"] for k in categories if k["isthing"] == 1] + id_map = {} # map from category id to id in the output semantic annotation + assert len(stuff_ids) <= 254 + for i, stuff_id in enumerate(stuff_ids): + id_map[stuff_id] = i + 1 + for thing_id in thing_ids: + id_map[thing_id] = 0 + id_map[0] = 255 + + with open(panoptic_json) as f: + obj = json.load(f) + + pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4)) + + def iter_annotations(): + for anno in obj["annotations"]: + file_name = anno["file_name"] + segments = anno["segments_info"] + input = os.path.join(panoptic_root, file_name) + output = os.path.join(sem_seg_root, file_name) + yield input, output, segments + + print("Start writing to {} ...".format(sem_seg_root)) + start = time.time() + pool.starmap( + functools.partial(_process_panoptic_to_semantic, id_map=id_map), + iter_annotations(), + chunksize=100, + ) + print("Finished. time: {:.2f}s".format(time.time() - start)) + + +if __name__ == "__main__": + dataset_dir = os.path.join(os.getenv("DETECTRON2_DATASETS", "datasets"), "coco") + for s in ["val2017", "train2017"]: + separate_coco_semantic_from_panoptic( + os.path.join(dataset_dir, "annotations/panoptic_{}.json".format(s)), + os.path.join(dataset_dir, "panoptic_{}".format(s)), + os.path.join(dataset_dir, "panoptic_stuff_{}".format(s)), + COCO_CATEGORIES, + ) + + # Prepare val2017_100 for quick testing: + + dest_dir = os.path.join(dataset_dir, "annotations/") + URL_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" + download(URL_PREFIX + "annotations/coco/panoptic_val2017_100.json", dest_dir) + with open(os.path.join(dest_dir, "panoptic_val2017_100.json")) as f: + obj = json.load(f) + + def link_val100(dir_full, dir_100): + print("Creating " + dir_100 + " ...") + os.makedirs(dir_100, exist_ok=True) + for img in obj["images"]: + basename = os.path.splitext(img["file_name"])[0] + src = os.path.join(dir_full, basename + ".png") + dst = os.path.join(dir_100, basename + ".png") + src = os.path.relpath(src, start=dir_100) + os.symlink(src, dst) + + link_val100( + os.path.join(dataset_dir, "panoptic_val2017"), + os.path.join(dataset_dir, "panoptic_val2017_100"), + ) + + link_val100( + os.path.join(dataset_dir, "panoptic_stuff_val2017"), + os.path.join(dataset_dir, "panoptic_stuff_val2017_100"), + ) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c1a03e6e7d277f3a1106cb8a7731e3be338eeb33 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .utils.env import setup_environment + +setup_environment() + + +# This line will be programatically read/write by setup.py. +# Leave them at the bottom of this file and don't touch them. +__version__ = "0.2" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/checkpoint/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/checkpoint/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0f5dca135b8b25a0e8412baf9c84c19ac9b49f30 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/checkpoint/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from . import catalog as _UNUSED # register the handler +from .detection_checkpoint import DetectionCheckpointer +from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer + +__all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/checkpoint/c2_model_loading.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/checkpoint/c2_model_loading.py new file mode 100644 index 0000000000000000000000000000000000000000..7da2312feabe0f5d403d7e5e9b8aca00ceb624be --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/checkpoint/c2_model_loading.py @@ -0,0 +1,326 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import logging +import re +import torch +from fvcore.common.checkpoint import ( + get_missing_parameters_message, + get_unexpected_parameters_message, +) + + +def convert_basic_c2_names(original_keys): + """ + Apply some basic name conversion to names in C2 weights. + It only deals with typical backbone models. + + Args: + original_keys (list[str]): + Returns: + list[str]: The same number of strings matching those in original_keys. + """ + layer_keys = copy.deepcopy(original_keys) + layer_keys = [ + {"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys + ] # some hard-coded mappings + + layer_keys = [k.replace("_", ".") for k in layer_keys] + layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys] + layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys] + # Uniform both bn and gn names to "norm" + layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys] + layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys] + layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys] + layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys] + layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys] + layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys] + layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys] + layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys] + layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys] + layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys] + + # stem + layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys] + # to avoid mis-matching with "conv1" in other components (e.g. detection head) + layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys] + + # layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5) + # layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys] + # layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys] + # layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys] + # layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys] + + # blocks + layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys] + layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys] + layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys] + layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys] + + # DensePose substitutions + layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys] + layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys] + layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys] + layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys] + layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys] + return layer_keys + + +def convert_c2_detectron_names(weights): + """ + Map Caffe2 Detectron weight names to Detectron2 names. + + Args: + weights (dict): name -> tensor + + Returns: + dict: detectron2 names -> tensor + dict: detectron2 names -> C2 names + """ + logger = logging.getLogger(__name__) + logger.info("Remapping C2 weights ......") + original_keys = sorted(weights.keys()) + layer_keys = copy.deepcopy(original_keys) + + layer_keys = convert_basic_c2_names(layer_keys) + + # -------------------------------------------------------------------------- + # RPN hidden representation conv + # -------------------------------------------------------------------------- + # FPN case + # In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then + # shared for all other levels, hence the appearance of "fpn2" + layer_keys = [ + k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys + ] + # Non-FPN case + layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys] + + # -------------------------------------------------------------------------- + # RPN box transformation conv + # -------------------------------------------------------------------------- + # FPN case (see note above about "fpn2") + layer_keys = [ + k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas") + for k in layer_keys + ] + layer_keys = [ + k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits") + for k in layer_keys + ] + # Non-FPN case + layer_keys = [ + k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys + ] + layer_keys = [ + k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits") + for k in layer_keys + ] + + # -------------------------------------------------------------------------- + # Fast R-CNN box head + # -------------------------------------------------------------------------- + layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys] + layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys] + layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys] + layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys] + # 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s + layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys] + + # -------------------------------------------------------------------------- + # FPN lateral and output convolutions + # -------------------------------------------------------------------------- + def fpn_map(name): + """ + Look for keys with the following patterns: + 1) Starts with "fpn.inner." + Example: "fpn.inner.res2.2.sum.lateral.weight" + Meaning: These are lateral pathway convolutions + 2) Starts with "fpn.res" + Example: "fpn.res2.2.sum.weight" + Meaning: These are FPN output convolutions + """ + splits = name.split(".") + norm = ".norm" if "norm" in splits else "" + if name.startswith("fpn.inner."): + # splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight'] + stage = int(splits[2][len("res") :]) + return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1]) + elif name.startswith("fpn.res"): + # splits example: ['fpn', 'res2', '2', 'sum', 'weight'] + stage = int(splits[1][len("res") :]) + return "fpn_output{}{}.{}".format(stage, norm, splits[-1]) + return name + + layer_keys = [fpn_map(k) for k in layer_keys] + + # -------------------------------------------------------------------------- + # Mask R-CNN mask head + # -------------------------------------------------------------------------- + # roi_heads.StandardROIHeads case + layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys] + layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys] + layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys] + # roi_heads.Res5ROIHeads case + layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys] + + # -------------------------------------------------------------------------- + # Keypoint R-CNN head + # -------------------------------------------------------------------------- + # interestingly, the keypoint head convs have blob names that are simply "conv_fcnX" + layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys] + layer_keys = [ + k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys + ] + layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys] + + # -------------------------------------------------------------------------- + # Done with replacements + # -------------------------------------------------------------------------- + assert len(set(layer_keys)) == len(layer_keys) + assert len(original_keys) == len(layer_keys) + + new_weights = {} + new_keys_to_original_keys = {} + for orig, renamed in zip(original_keys, layer_keys): + new_keys_to_original_keys[renamed] = orig + if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."): + # remove the meaningless prediction weight for background class + new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1 + new_weights[renamed] = weights[orig][new_start_idx:] + logger.info( + "Remove prediction weight for background class in {}. The shape changes from " + "{} to {}.".format( + renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape) + ) + ) + elif renamed.startswith("cls_score."): + # move weights of bg class from original index 0 to last index + logger.info( + "Move classification weights for background class in {} from index 0 to " + "index {}.".format(renamed, weights[orig].shape[0] - 1) + ) + new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]]) + else: + new_weights[renamed] = weights[orig] + + return new_weights, new_keys_to_original_keys + + +# Note the current matching is not symmetric. +# it assumes model_state_dict will have longer names. +def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion=True): + """ + Match names between the two state-dict, and update the values of model_state_dict in-place with + copies of the matched tensor in ckpt_state_dict. + If `c2_conversion==True`, `ckpt_state_dict` is assumed to be a Caffe2 + model and will be renamed at first. + + Strategy: suppose that the models that we will create will have prefixes appended + to each of its keys, for example due to an extra level of nesting that the original + pre-trained weights from ImageNet won't contain. For example, model.state_dict() + might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains + res2.conv1.weight. We thus want to match both parameters together. + For that, we look for each model weight, look among all loaded keys if there is one + that is a suffix of the current weight name, and use it if that's the case. + If multiple matches exist, take the one with longest size + of the corresponding name. For example, for the same model as before, the pretrained + weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case, + we want to match backbone[0].body.conv1.weight to conv1.weight, and + backbone[0].body.res2.conv1.weight to res2.conv1.weight. + """ + model_keys = sorted(model_state_dict.keys()) + if c2_conversion: + ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict) + # original_keys: the name in the original dict (before renaming) + else: + original_keys = {x: x for x in ckpt_state_dict.keys()} + ckpt_keys = sorted(ckpt_state_dict.keys()) + + def match(a, b): + # Matched ckpt_key should be a complete (starts with '.') suffix. + # For example, roi_heads.mesh_head.whatever_conv1 does not match conv1, + # but matches whatever_conv1 or mesh_head.whatever_conv1. + return a == b or a.endswith("." + b) + + # get a matrix of string matches, where each (i, j) entry correspond to the size of the + # ckpt_key string, if it matches + match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys] + match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys)) + # use the matched one with longest size in case of multiple matches + max_match_size, idxs = match_matrix.max(1) + # remove indices that correspond to no-match + idxs[max_match_size == 0] = -1 + + # used for logging + max_len_model = max(len(key) for key in model_keys) if model_keys else 1 + max_len_ckpt = max(len(key) for key in ckpt_keys) if ckpt_keys else 1 + log_str_template = "{: <{}} loaded from {: <{}} of shape {}" + logger = logging.getLogger(__name__) + # matched_pairs (matched checkpoint key --> matched model key) + matched_keys = {} + for idx_model, idx_ckpt in enumerate(idxs.tolist()): + if idx_ckpt == -1: + continue + key_model = model_keys[idx_model] + key_ckpt = ckpt_keys[idx_ckpt] + value_ckpt = ckpt_state_dict[key_ckpt] + shape_in_model = model_state_dict[key_model].shape + + if shape_in_model != value_ckpt.shape: + logger.warning( + "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format( + key_ckpt, value_ckpt.shape, key_model, shape_in_model + ) + ) + logger.warning( + "{} will not be loaded. Please double check and see if this is desired.".format( + key_ckpt + ) + ) + continue + + model_state_dict[key_model] = value_ckpt.clone() + if key_ckpt in matched_keys: # already added to matched_keys + logger.error( + "Ambiguity found for {} in checkpoint!" + "It matches at least two keys in the model ({} and {}).".format( + key_ckpt, key_model, matched_keys[key_ckpt] + ) + ) + raise ValueError("Cannot match one checkpoint key to multiple keys in the model.") + + matched_keys[key_ckpt] = key_model + logger.info( + log_str_template.format( + key_model, + max_len_model, + original_keys[key_ckpt], + max_len_ckpt, + tuple(shape_in_model), + ) + ) + matched_model_keys = matched_keys.values() + matched_ckpt_keys = matched_keys.keys() + # print warnings about unmatched keys on both side + unmatched_model_keys = [k for k in model_keys if k not in matched_model_keys] + if len(unmatched_model_keys): + logger.info(get_missing_parameters_message(unmatched_model_keys)) + + unmatched_ckpt_keys = [k for k in ckpt_keys if k not in matched_ckpt_keys] + if len(unmatched_ckpt_keys): + logger.info( + get_unexpected_parameters_message(original_keys[x] for x in unmatched_ckpt_keys) + ) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/checkpoint/catalog.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/checkpoint/catalog.py new file mode 100644 index 0000000000000000000000000000000000000000..77c4244294cd0861a6d0c41f33a82ae4c291d015 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/checkpoint/catalog.py @@ -0,0 +1,147 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +from fvcore.common.file_io import PathHandler, PathManager + + +class ModelCatalog(object): + """ + Store mappings from names to third-party models. + """ + + S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron" + + # MSRA models have STRIDE_IN_1X1=True. False otherwise. + # NOTE: all BN models here have fused BN into an affine layer. + # As a result, you should only load them to a model with "FrozenBN". + # Loading them to a model with regular BN or SyncBN is wrong. + # Even when loaded to FrozenBN, it is still different from affine by an epsilon, + # which should be negligible for training. + # NOTE: all models here uses PIXEL_STD=[1,1,1] + # NOTE: Most of the BN models here are no longer used. We use the + # re-converted pre-trained models under detectron2 model zoo instead. + C2_IMAGENET_MODELS = { + "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl", + "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl", + "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl", + "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl", + "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl", + "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl", + "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl", + } + + C2_DETECTRON_PATH_FORMAT = ( + "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" # noqa B950 + ) + + C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival" + C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival" + + # format: {model_name} -> part of the url + C2_DETECTRON_MODELS = { + "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950 + "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950 + "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950 + "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950 + "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB", # noqa B950 + "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC", # noqa B950 + "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT", # noqa B950 + "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI", # noqa B950 + "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q", # noqa B950 + "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao", # noqa B950 + "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L", # noqa B950 + "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179", # noqa B950 + "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2", # noqa B950 + } + + @staticmethod + def get(name): + if name.startswith("Caffe2Detectron/COCO"): + return ModelCatalog._get_c2_detectron_baseline(name) + if name.startswith("ImageNetPretrained/"): + return ModelCatalog._get_c2_imagenet_pretrained(name) + raise RuntimeError("model not present in the catalog: {}".format(name)) + + @staticmethod + def _get_c2_imagenet_pretrained(name): + prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX + name = name[len("ImageNetPretrained/") :] + name = ModelCatalog.C2_IMAGENET_MODELS[name] + url = "/".join([prefix, name]) + return url + + @staticmethod + def _get_c2_detectron_baseline(name): + name = name[len("Caffe2Detectron/COCO/") :] + url = ModelCatalog.C2_DETECTRON_MODELS[name] + if "keypoint_rcnn" in name: + dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS + else: + dataset = ModelCatalog.C2_DATASET_COCO + + if "35998355/rpn_R-50-C4_1x" in name: + # this one model is somehow different from others .. + type = "rpn" + else: + type = "generalized_rcnn" + + # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`. + url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format( + prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset + ) + return url + + +class ModelCatalogHandler(PathHandler): + """ + Resolve URL like catalog://. + """ + + PREFIX = "catalog://" + + def _get_supported_prefixes(self): + return [self.PREFIX] + + def _get_local_path(self, path): + logger = logging.getLogger(__name__) + catalog_path = ModelCatalog.get(path[len(self.PREFIX) :]) + logger.info("Catalog entry {} points to {}".format(path, catalog_path)) + return PathManager.get_local_path(catalog_path) + + def _open(self, path, mode="r", **kwargs): + return PathManager.open(self._get_local_path(path), mode, **kwargs) + + +class Detectron2Handler(PathHandler): + """ + Resolve anything that's in Detectron2 model zoo. + """ + + PREFIX = "detectron2://" + S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" + + def _get_supported_prefixes(self): + return [self.PREFIX] + + def _get_local_path(self, path): + name = path[len(self.PREFIX) :] + return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name) + + def _open(self, path, mode="r", **kwargs): + return PathManager.open(self._get_local_path(path), mode, **kwargs) + + +PathManager.register_handler(ModelCatalogHandler()) +PathManager.register_handler(Detectron2Handler()) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/checkpoint/detection_checkpoint.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/checkpoint/detection_checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..2d8be4697343cb2665aeeef7ff55d1efb9dc0991 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/checkpoint/detection_checkpoint.py @@ -0,0 +1,86 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pickle +from fvcore.common.checkpoint import Checkpointer +from fvcore.common.file_io import PathManager + +import detectron2.utils.comm as comm + +from .c2_model_loading import align_and_update_state_dicts + + +class DetectionCheckpointer(Checkpointer): + """ + Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2 + model zoo, and apply conversions for legacy models. + """ + + def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables): + is_main_process = comm.is_main_process() + super().__init__( + model, + save_dir, + save_to_disk=is_main_process if save_to_disk is None else save_to_disk, + **checkpointables, + ) + + def _load_file(self, filename): + if filename.endswith(".pkl"): + with PathManager.open(filename, "rb") as f: + data = pickle.load(f, encoding="latin1") + if "model" in data and "__author__" in data: + # file is in Detectron2 model zoo format + self.logger.info("Reading a file from '{}'".format(data["__author__"])) + return data + else: + # assume file is from Caffe2 / Detectron1 model zoo + if "blobs" in data: + # Detection models have "blobs", but ImageNet models don't + data = data["blobs"] + data = {k: v for k, v in data.items() if not k.endswith("_momentum")} + return {"model": data, "__author__": "Caffe2", "matching_heuristics": True} + + loaded = super()._load_file(filename) # load native pth checkpoint + if "model" not in loaded: + loaded = {"model": loaded} + return loaded + + def _load_model(self, checkpoint): + if checkpoint.get("matching_heuristics", False): + self._convert_ndarray_to_tensor(checkpoint["model"]) + # convert weights by name-matching heuristics + model_state_dict = self.model.state_dict() + align_and_update_state_dicts( + model_state_dict, + checkpoint["model"], + c2_conversion=checkpoint.get("__author__", None) == "Caffe2", + ) + checkpoint["model"] = model_state_dict + # for non-caffe2 models, use standard ways to load it + incompatible = super()._load_model(checkpoint) + if incompatible is None: # support older versions of fvcore + return None + + model_buffers = dict(self.model.named_buffers(recurse=False)) + for k in ["pixel_mean", "pixel_std"]: + # Ignore missing key message about pixel_mean/std. + # Though they may be missing in old checkpoints, they will be correctly + # initialized from config anyway. + if k in model_buffers: + try: + incompatible.missing_keys.remove(k) + except ValueError: + pass + return incompatible diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/config/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4b355feb2879505add1b3ae31881ceb08be1f2d4 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/config/__init__.py @@ -0,0 +1,26 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .compat import downgrade_config, upgrade_config +from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable + +__all__ = [ + "CfgNode", + "get_cfg", + "global_cfg", + "set_global_cfg", + "downgrade_config", + "upgrade_config", + "configurable", +] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/config/compat.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/config/compat.py new file mode 100644 index 0000000000000000000000000000000000000000..1a53b5a7ee6646c9224c9b974b21204250c626f4 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/config/compat.py @@ -0,0 +1,242 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Backward compatibility of configs. + +Instructions to bump version: ++ It's not needed to bump version if new keys are added. + It's only needed when backward-incompatible changes happen + (i.e., some existing keys disappear, or the meaning of a key changes) ++ To bump version, do the following: + 1. Increment _C.VERSION in defaults.py + 2. Add a converter in this file. + + Each ConverterVX has a function "upgrade" which in-place upgrades config from X-1 to X, + and a function "downgrade" which in-place downgrades config from X to X-1 + + In each function, VERSION is left unchanged. + + Each converter assumes that its input has the relevant keys + (i.e., the input is not a partial config). + 3. Run the tests (test_config.py) to make sure the upgrade & downgrade + functions are consistent. +""" + +import logging +from typing import List, Optional, Tuple + +from .config import CfgNode as CN +from .defaults import _C + +__all__ = ["upgrade_config", "downgrade_config"] + + +def upgrade_config(cfg: CN, to_version: Optional[int] = None) -> CN: + """ + Upgrade a config from its current version to a newer version. + + Args: + cfg (CfgNode): + to_version (int): defaults to the latest version. + """ + cfg = cfg.clone() + if to_version is None: + to_version = _C.VERSION + + assert cfg.VERSION <= to_version, "Cannot upgrade from v{} to v{}!".format( + cfg.VERSION, to_version + ) + for k in range(cfg.VERSION, to_version): + converter = globals()["ConverterV" + str(k + 1)] + converter.upgrade(cfg) + cfg.VERSION = k + 1 + return cfg + + +def downgrade_config(cfg: CN, to_version: int) -> CN: + """ + Downgrade a config from its current version to an older version. + + Args: + cfg (CfgNode): + to_version (int): + + Note: + A general downgrade of arbitrary configs is not always possible due to the + different functionalities in different versions. + The purpose of downgrade is only to recover the defaults in old versions, + allowing it to load an old partial yaml config. + Therefore, the implementation only needs to fill in the default values + in the old version when a general downgrade is not possible. + """ + cfg = cfg.clone() + assert cfg.VERSION >= to_version, "Cannot downgrade from v{} to v{}!".format( + cfg.VERSION, to_version + ) + for k in range(cfg.VERSION, to_version, -1): + converter = globals()["ConverterV" + str(k)] + converter.downgrade(cfg) + cfg.VERSION = k - 1 + return cfg + + +def guess_version(cfg: CN, filename: str) -> int: + """ + Guess the version of a partial config where the VERSION field is not specified. + Returns the version, or the latest if cannot make a guess. + + This makes it easier for users to migrate. + """ + logger = logging.getLogger(__name__) + + def _has(name: str) -> bool: + cur = cfg + for n in name.split("."): + if n not in cur: + return False + cur = cur[n] + return True + + # Most users' partial configs have "MODEL.WEIGHT", so guess on it + ret = None + if _has("MODEL.WEIGHT") or _has("TEST.AUG_ON"): + ret = 1 + + if ret is not None: + logger.warning("Config '{}' has no VERSION. Assuming it to be v{}.".format(filename, ret)) + else: + ret = _C.VERSION + logger.warning( + "Config '{}' has no VERSION. Assuming it to be compatible with latest v{}.".format( + filename, ret + ) + ) + return ret + + +def _rename(cfg: CN, old: str, new: str) -> None: + old_keys = old.split(".") + new_keys = new.split(".") + + def _set(key_seq: List[str], val: str) -> None: + cur = cfg + for k in key_seq[:-1]: + if k not in cur: + cur[k] = CN() + cur = cur[k] + cur[key_seq[-1]] = val + + def _get(key_seq: List[str]) -> CN: + cur = cfg + for k in key_seq: + cur = cur[k] + return cur + + def _del(key_seq: List[str]) -> None: + cur = cfg + for k in key_seq[:-1]: + cur = cur[k] + del cur[key_seq[-1]] + if len(cur) == 0 and len(key_seq) > 1: + _del(key_seq[:-1]) + + _set(new_keys, _get(old_keys)) + _del(old_keys) + + +class _RenameConverter: + """ + A converter that handles simple rename. + """ + + RENAME: List[Tuple[str, str]] = [] # list of tuples of (old name, new name) + + @classmethod + def upgrade(cls, cfg: CN) -> None: + for old, new in cls.RENAME: + _rename(cfg, old, new) + + @classmethod + def downgrade(cls, cfg: CN) -> None: + for old, new in cls.RENAME[::-1]: + _rename(cfg, new, old) + + +class ConverterV1(_RenameConverter): + RENAME = [("MODEL.RPN_HEAD.NAME", "MODEL.RPN.HEAD_NAME")] + + +class ConverterV2(_RenameConverter): + """ + A large bulk of rename, before public release. + """ + + RENAME = [ + ("MODEL.WEIGHT", "MODEL.WEIGHTS"), + ("MODEL.PANOPTIC_FPN.SEMANTIC_LOSS_SCALE", "MODEL.SEM_SEG_HEAD.LOSS_WEIGHT"), + ("MODEL.PANOPTIC_FPN.RPN_LOSS_SCALE", "MODEL.RPN.LOSS_WEIGHT"), + ("MODEL.PANOPTIC_FPN.INSTANCE_LOSS_SCALE", "MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT"), + ("MODEL.PANOPTIC_FPN.COMBINE_ON", "MODEL.PANOPTIC_FPN.COMBINE.ENABLED"), + ( + "MODEL.PANOPTIC_FPN.COMBINE_OVERLAP_THRESHOLD", + "MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH", + ), + ( + "MODEL.PANOPTIC_FPN.COMBINE_STUFF_AREA_LIMIT", + "MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT", + ), + ( + "MODEL.PANOPTIC_FPN.COMBINE_INSTANCES_CONFIDENCE_THRESHOLD", + "MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH", + ), + ("MODEL.ROI_HEADS.SCORE_THRESH", "MODEL.ROI_HEADS.SCORE_THRESH_TEST"), + ("MODEL.ROI_HEADS.NMS", "MODEL.ROI_HEADS.NMS_THRESH_TEST"), + ("MODEL.RETINANET.INFERENCE_SCORE_THRESHOLD", "MODEL.RETINANET.SCORE_THRESH_TEST"), + ("MODEL.RETINANET.INFERENCE_TOPK_CANDIDATES", "MODEL.RETINANET.TOPK_CANDIDATES_TEST"), + ("MODEL.RETINANET.INFERENCE_NMS_THRESHOLD", "MODEL.RETINANET.NMS_THRESH_TEST"), + ("TEST.DETECTIONS_PER_IMG", "TEST.DETECTIONS_PER_IMAGE"), + ("TEST.AUG_ON", "TEST.AUG.ENABLED"), + ("TEST.AUG_MIN_SIZES", "TEST.AUG.MIN_SIZES"), + ("TEST.AUG_MAX_SIZE", "TEST.AUG.MAX_SIZE"), + ("TEST.AUG_FLIP", "TEST.AUG.FLIP"), + ] + + @classmethod + def upgrade(cls, cfg: CN) -> None: + super().upgrade(cfg) + + if cfg.MODEL.META_ARCHITECTURE == "RetinaNet": + _rename( + cfg, "MODEL.RETINANET.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS" + ) + _rename(cfg, "MODEL.RETINANET.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES") + del cfg["MODEL"]["RPN"]["ANCHOR_SIZES"] + del cfg["MODEL"]["RPN"]["ANCHOR_ASPECT_RATIOS"] + else: + _rename(cfg, "MODEL.RPN.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS") + _rename(cfg, "MODEL.RPN.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES") + del cfg["MODEL"]["RETINANET"]["ANCHOR_SIZES"] + del cfg["MODEL"]["RETINANET"]["ANCHOR_ASPECT_RATIOS"] + del cfg["MODEL"]["RETINANET"]["ANCHOR_STRIDES"] + + @classmethod + def downgrade(cls, cfg: CN) -> None: + super().downgrade(cfg) + + _rename(cfg, "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS", "MODEL.RPN.ANCHOR_ASPECT_RATIOS") + _rename(cfg, "MODEL.ANCHOR_GENERATOR.SIZES", "MODEL.RPN.ANCHOR_SIZES") + cfg.MODEL.RETINANET.ANCHOR_ASPECT_RATIOS = cfg.MODEL.RPN.ANCHOR_ASPECT_RATIOS + cfg.MODEL.RETINANET.ANCHOR_SIZES = cfg.MODEL.RPN.ANCHOR_SIZES + cfg.MODEL.RETINANET.ANCHOR_STRIDES = [] # this is not used anywhere in any version diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/config/config.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/config/config.py new file mode 100644 index 0000000000000000000000000000000000000000..0875acaed54eaa0aaf761ecea4d6f96430240fd4 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/config/config.py @@ -0,0 +1,211 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import inspect +import logging +from fvcore.common.config import CfgNode as _CfgNode +from fvcore.common.file_io import PathManager + + +class CfgNode(_CfgNode): + """ + The same as `fvcore.common.config.CfgNode`, but different in: + + 1. Use unsafe yaml loading by default. + Note that this may lead to arbitrary code execution: you must not + load a config file from untrusted sources before manually inspecting + the content of the file. + 2. Support config versioning. + When attempting to merge an old config, it will convert the old config automatically. + """ + + # Note that the default value of allow_unsafe is changed to True + def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None: + assert PathManager.isfile(cfg_filename), f"Config file '{cfg_filename}' does not exist!" + loaded_cfg = _CfgNode.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe) + loaded_cfg = type(self)(loaded_cfg) + + # defaults.py needs to import CfgNode + from .defaults import _C + + latest_ver = _C.VERSION + assert ( + latest_ver == self.VERSION + ), "CfgNode.merge_from_file is only allowed on a config object of latest version!" + + logger = logging.getLogger(__name__) + + loaded_ver = loaded_cfg.get("VERSION", None) + if loaded_ver is None: + from .compat import guess_version + + loaded_ver = guess_version(loaded_cfg, cfg_filename) + assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format( + loaded_ver, self.VERSION + ) + + if loaded_ver == self.VERSION: + self.merge_from_other_cfg(loaded_cfg) + else: + # compat.py needs to import CfgNode + from .compat import upgrade_config, downgrade_config + + logger.warning( + "Loading an old v{} config file '{}' by automatically upgrading to v{}. " + "See docs/CHANGELOG.md for instructions to update your files.".format( + loaded_ver, cfg_filename, self.VERSION + ) + ) + # To convert, first obtain a full config at an old version + old_self = downgrade_config(self, to_version=loaded_ver) + old_self.merge_from_other_cfg(loaded_cfg) + new_config = upgrade_config(old_self) + self.clear() + self.update(new_config) + + def dump(self, *args, **kwargs): + """ + Returns: + str: a yaml string representation of the config + """ + # to make it show up in docs + return super().dump(*args, **kwargs) + + +global_cfg = CfgNode() + + +def get_cfg() -> CfgNode: + """ + Get a copy of the default config. + + Returns: + a detectron2 CfgNode instance. + """ + from .defaults import _C + + return _C.clone() + + +def set_global_cfg(cfg: CfgNode) -> None: + """ + Let the global config point to the given cfg. + + Assume that the given "cfg" has the key "KEY", after calling + `set_global_cfg(cfg)`, the key can be accessed by: + :: + from detectron2.config import global_cfg + print(global_cfg.KEY) + + By using a hacky global config, you can access these configs anywhere, + without having to pass the config object or the values deep into the code. + This is a hacky feature introduced for quick prototyping / research exploration. + """ + global global_cfg + global_cfg.clear() + global_cfg.update(cfg) + + +def configurable(init_func): + """ + Decorate a class's __init__ method so that it can be called with a CfgNode + object using the class's from_config classmethod. + + Examples: + :: + class A: + @configurable + def __init__(self, a, b=2, c=3): + pass + + @classmethod + def from_config(cls, cfg): + # Returns kwargs to be passed to __init__ + return {"a": cfg.A, "b": cfg.B} + + a1 = A(a=1, b=2) # regular construction + a2 = A(cfg) # construct with a cfg + a3 = A(cfg, b=3, c=4) # construct with extra overwrite + """ + assert init_func.__name__ == "__init__", "@configurable should only be used for __init__!" + if init_func.__module__.startswith("detectron2."): + assert ( + init_func.__doc__ is not None and "experimental" in init_func.__doc__ + ), f"configurable {init_func} should be marked experimental" + + @functools.wraps(init_func) + def wrapped(self, *args, **kwargs): + try: + from_config_func = type(self).from_config + except AttributeError: + raise AttributeError("Class with @configurable must have a 'from_config' classmethod.") + if not inspect.ismethod(from_config_func): + raise TypeError("Class with @configurable must have a 'from_config' classmethod.") + + if _called_with_cfg(*args, **kwargs): + explicit_args = _get_args_from_config(from_config_func, *args, **kwargs) + init_func(self, **explicit_args) + else: + init_func(self, *args, **kwargs) + + return wrapped + + +def _get_args_from_config(from_config_func, *args, **kwargs): + """ + Use `from_config` to obtain explicit arguments. + + Returns: + dict: arguments to be used for cls.__init__ + """ + signature = inspect.signature(from_config_func) + if list(signature.parameters.keys())[0] != "cfg": + raise TypeError( + f"{from_config_func.__self__}.from_config must take 'cfg' as the first argument!" + ) + support_var_arg = any( + param.kind in [param.VAR_POSITIONAL, param.VAR_KEYWORD] + for param in signature.parameters.values() + ) + if support_var_arg: # forward all arguments to from_config, if from_config accepts them + ret = from_config_func(*args, **kwargs) + else: + # forward supported arguments to from_config + supported_arg_names = set(signature.parameters.keys()) + extra_kwargs = {} + for name in list(kwargs.keys()): + if name not in supported_arg_names: + extra_kwargs[name] = kwargs.pop(name) + ret = from_config_func(*args, **kwargs) + # forward the other arguments to __init__ + ret.update(extra_kwargs) + return ret + + +def _called_with_cfg(*args, **kwargs): + """ + Returns: + bool: whether the arguments contain CfgNode and should be considered + forwarded to from_config. + """ + if len(args) and isinstance(args[0], _CfgNode): + return True + if isinstance(kwargs.pop("cfg", None), _CfgNode): + return True + # `from_config`'s first argument is forced to be "cfg". + # So the above check covers all cases. + return False diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/config/defaults.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/config/defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..c57ac4134b928559d677eaadf6fb69a7d47a5559 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/config/defaults.py @@ -0,0 +1,641 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .config import CfgNode as CN + +# ----------------------------------------------------------------------------- +# Convention about Training / Test specific parameters +# ----------------------------------------------------------------------------- +# Whenever an argument can be either used for training or for testing, the +# corresponding name will be post-fixed by a _TRAIN for a training parameter, +# or _TEST for a test-specific parameter. +# For example, the number of images during training will be +# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be +# IMAGES_PER_BATCH_TEST + +# ----------------------------------------------------------------------------- +# Config definition +# ----------------------------------------------------------------------------- + +_C = CN() + +# The version number, to upgrade from old configs to new ones if any +# changes happen. It's recommended to keep a VERSION in your config file. +_C.VERSION = 2 + +_C.MODEL = CN() +_C.MODEL.LOAD_PROPOSALS = False +_C.MODEL.MASK_ON = False +_C.MODEL.KEYPOINT_ON = False +_C.MODEL.DEVICE = "npu" +_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN" + +# Path (a file path, or URL like detectron2://.., https://..) to a checkpoint file +# to be loaded to the model. You can find available models in the model zoo. +_C.MODEL.WEIGHTS = "" + +# Values to be used for image normalization (BGR order, since INPUT.FORMAT defaults to BGR). +# To train on images of different number of channels, just set different mean & std. +# Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675] +_C.MODEL.PIXEL_MEAN = [103.530, 116.280, 123.675] +# When using pre-trained models in Detectron1 or any MSRA models, +# std has been absorbed into its conv1 weights, so the std needs to be set 1. +# Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std) +_C.MODEL.PIXEL_STD = [1.0, 1.0, 1.0] + + +# ----------------------------------------------------------------------------- +# INPUT +# ----------------------------------------------------------------------------- +_C.INPUT = CN() +#Size of the fixed shape +_C.INPUT.FIX_SHAPE = (1344, 1344) + +# Size of the smallest side of the image during training +_C.INPUT.MIN_SIZE_TRAIN = (800,) +# Sample size of smallest side by choice or random selection from range give by +# INPUT.MIN_SIZE_TRAIN +_C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice" +# Maximum size of the side of the image during training +_C.INPUT.MAX_SIZE_TRAIN = 1333 +# Size of the smallest side of the image during testing. Set to zero to disable resize in testing. +_C.INPUT.MIN_SIZE_TEST = 800 +# Maximum size of the side of the image during testing +_C.INPUT.MAX_SIZE_TEST = 1333 + +# `True` if cropping is used for data augmentation during training +_C.INPUT.CROP = CN({"ENABLED": False}) +# Cropping type: +# - "relative" crop (H * CROP.SIZE[0], W * CROP.SIZE[1]) part of an input of size (H, W) +# - "relative_range" uniformly sample relative crop size from between [CROP.SIZE[0], [CROP.SIZE[1]]. +# and [1, 1] and use it as in "relative" scenario. +# - "absolute" crop part of an input with absolute size: (CROP.SIZE[0], CROP.SIZE[1]). +# - "absolute_range", for an input of size (H, W), uniformly sample H_crop in +# [CROP.SIZE[0], min(H, CROP.SIZE[1])] and W_crop in [CROP.SIZE[0], min(W, CROP.SIZE[1])] +_C.INPUT.CROP.TYPE = "relative_range" +# Size of crop in range (0, 1] if CROP.TYPE is "relative" or "relative_range" and in number of +# pixels if CROP.TYPE is "absolute" +_C.INPUT.CROP.SIZE = [0.9, 0.9] + + +# Whether the model needs RGB, YUV, HSV etc. +# Should be one of the modes defined here, as we use PIL to read the image: +# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes +# with BGR being the one exception. One can set image format to BGR, we will +# internally use RGB for conversion and flip the channels over +_C.INPUT.FORMAT = "BGR" +# The ground truth mask format that the model will use. +# Mask R-CNN supports either "polygon" or "bitmask" as ground truth. +_C.INPUT.MASK_FORMAT = "polygon" # alternative: "bitmask" + + +# ----------------------------------------------------------------------------- +# Dataset +# ----------------------------------------------------------------------------- +_C.DATASETS = CN() +# List of the dataset names for training. Must be registered in DatasetCatalog +_C.DATASETS.TRAIN = () +# List of the pre-computed proposal files for training, which must be consistent +# with datasets listed in DATASETS.TRAIN. +_C.DATASETS.PROPOSAL_FILES_TRAIN = () +# Number of top scoring precomputed proposals to keep for training +_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN = 2000 +# List of the dataset names for testing. Must be registered in DatasetCatalog +_C.DATASETS.TEST = () +# List of the pre-computed proposal files for test, which must be consistent +# with datasets listed in DATASETS.TEST. +_C.DATASETS.PROPOSAL_FILES_TEST = () +# Number of top scoring precomputed proposals to keep for test +_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST = 1000 + +# ----------------------------------------------------------------------------- +# DataLoader +# ----------------------------------------------------------------------------- +_C.DATALOADER = CN() +# Number of data loading threads +_C.DATALOADER.NUM_WORKERS = 4 +# If True, each batch should contain only images for which the aspect ratio +# is compatible. This groups portrait images together, and landscape images +# are not batched with portrait images. +_C.DATALOADER.ASPECT_RATIO_GROUPING = True +# Options: TrainingSampler, RepeatFactorTrainingSampler +_C.DATALOADER.SAMPLER_TRAIN = "TrainingSampler" +# Repeat threshold for RepeatFactorTrainingSampler +_C.DATALOADER.REPEAT_THRESHOLD = 0.0 +# Tf True, when working on datasets that have instance annotations, the +# training dataloader will filter out images without associated annotations +_C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True + +# ---------------------------------------------------------------------------- # +# Backbone options +# ---------------------------------------------------------------------------- # +_C.MODEL.BACKBONE = CN() + +_C.MODEL.BACKBONE.NAME = "build_resnet_backbone" +# Freeze the first several stages so they are not trained. +# There are 5 stages in ResNet. The first is a convolution, and the following +# stages are each group of residual blocks. +_C.MODEL.BACKBONE.FREEZE_AT = 2 + + +# ---------------------------------------------------------------------------- # +# FPN options +# ---------------------------------------------------------------------------- # +_C.MODEL.FPN = CN() +# Names of the input feature maps to be used by FPN +# They must have contiguous power of 2 strides +# e.g., ["res2", "res3", "res4", "res5"] +_C.MODEL.FPN.IN_FEATURES = [] +_C.MODEL.FPN.OUT_CHANNELS = 256 + +# Options: "" (no norm), "GN" +_C.MODEL.FPN.NORM = "" + +# Types for fusing the FPN top-down and lateral features. Can be either "sum" or "avg" +_C.MODEL.FPN.FUSE_TYPE = "sum" + + +# ---------------------------------------------------------------------------- # +# Proposal generator options +# ---------------------------------------------------------------------------- # +_C.MODEL.PROPOSAL_GENERATOR = CN() +# Current proposal generators include "RPN", "RRPN" and "PrecomputedProposals" +_C.MODEL.PROPOSAL_GENERATOR.NAME = "RPN" +# Proposal height and width both need to be greater than MIN_SIZE +# (a the scale used during training or inference) +_C.MODEL.PROPOSAL_GENERATOR.MIN_SIZE = 0 + + +# ---------------------------------------------------------------------------- # +# Anchor generator options +# ---------------------------------------------------------------------------- # +_C.MODEL.ANCHOR_GENERATOR = CN() +# The generator can be any name in the ANCHOR_GENERATOR registry +_C.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator" +# Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input. +# Format: list[list[float]]. SIZES[i] specifies the list of sizes +# to use for IN_FEATURES[i]; len(SIZES) == len(IN_FEATURES) must be true, +# or len(SIZES) == 1 is true and size list SIZES[0] is used for all +# IN_FEATURES. +_C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]] +# Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect +# ratios are generated by an anchor generator. +# Format: list[list[float]]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W) +# to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true, +# or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used +# for all IN_FEATURES. +_C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]] +# Anchor angles. +# list[list[float]], the angle in degrees, for each input feature map. +# ANGLES[i] specifies the list of angles for IN_FEATURES[i]. +_C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]] +# Relative offset between the center of the first anchor and the top-left corner of the image +# Value has to be in [0, 1). Recommend to use 0.5, which means half stride. +# The value is not expected to affect model accuracy. +_C.MODEL.ANCHOR_GENERATOR.OFFSET = 0.0 + +# ---------------------------------------------------------------------------- # +# RPN options +# ---------------------------------------------------------------------------- # +_C.MODEL.RPN = CN() +_C.MODEL.RPN.HEAD_NAME = "StandardRPNHead" # used by RPN_HEAD_REGISTRY + +# Names of the input feature maps to be used by RPN +# e.g., ["p2", "p3", "p4", "p5", "p6"] for FPN +_C.MODEL.RPN.IN_FEATURES = ["res4"] +# Remove RPN anchors that go outside the image by BOUNDARY_THRESH pixels +# Set to -1 or a large value, e.g. 100000, to disable pruning anchors +_C.MODEL.RPN.BOUNDARY_THRESH = -1 +# IOU overlap ratios [BG_IOU_THRESHOLD, FG_IOU_THRESHOLD] +# Minimum overlap required between an anchor and ground-truth box for the +# (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD +# ==> positive RPN example: 1) +# Maximum overlap allowed between an anchor and ground-truth box for the +# (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD +# ==> negative RPN example: 0) +# Anchors with overlap in between (BG_IOU_THRESHOLD <= IoU < FG_IOU_THRESHOLD) +# are ignored (-1) +_C.MODEL.RPN.IOU_THRESHOLDS = [0.3, 0.7] +_C.MODEL.RPN.IOU_LABELS = [0, -1, 1] +# Total number of RPN examples per image +_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256 +# Target fraction of foreground (positive) examples per RPN minibatch +_C.MODEL.RPN.POSITIVE_FRACTION = 0.5 +# Options are: "smooth_l1", "giou" +_C.MODEL.RPN.BBOX_REG_LOSS_TYPE = "smooth_l1" +_C.MODEL.RPN.BBOX_REG_LOSS_WEIGHT = 1.0 +# Weights on (dx, dy, dw, dh) for normalizing RPN anchor regression targets +_C.MODEL.RPN.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0) +# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1. +_C.MODEL.RPN.SMOOTH_L1_BETA = 0.0 +_C.MODEL.RPN.LOSS_WEIGHT = 1.0 +# Number of top scoring RPN proposals to keep before applying NMS +# When FPN is used, this is *per FPN level* (not total) +_C.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 12000 +_C.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000 +# Number of top scoring RPN proposals to keep after applying NMS +# When FPN is used, this limit is applied per level and then again to the union +# of proposals from all levels +# NOTE: When FPN is used, the meaning of this config is different from Detectron1. +# It means per-batch topk in Detectron1, but per-image topk here. +# See the "find_top_rpn_proposals" function for details. +_C.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2000 +_C.MODEL.RPN.POST_NMS_TOPK_TEST = 1000 +# NMS threshold used on RPN proposals +_C.MODEL.RPN.NMS_THRESH = 0.7 + +# ---------------------------------------------------------------------------- # +# ROI HEADS options +# ---------------------------------------------------------------------------- # +_C.MODEL.ROI_HEADS = CN() +_C.MODEL.ROI_HEADS.NAME = "Res5ROIHeads" +# Number of foreground classes +_C.MODEL.ROI_HEADS.NUM_CLASSES = 80 +# Names of the input feature maps to be used by ROI heads +# Currently all heads (box, mask, ...) use the same input feature map list +# e.g., ["p2", "p3", "p4", "p5"] is commonly used for FPN +_C.MODEL.ROI_HEADS.IN_FEATURES = ["res4"] +# IOU overlap ratios [IOU_THRESHOLD] +# Overlap threshold for an RoI to be considered background (if < IOU_THRESHOLD) +# Overlap threshold for an RoI to be considered foreground (if >= IOU_THRESHOLD) +_C.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5] +_C.MODEL.ROI_HEADS.IOU_LABELS = [0, 1] +# RoI minibatch size *per image* (number of regions of interest [ROIs]) +# Total number of RoIs per training minibatch = +# ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH +# E.g., a common configuration is: 512 * 16 = 8192 +_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 +# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0) +_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25 + +# Only used on test mode + +# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to +# balance obtaining high recall with not having too many low precision +# detections that will slow down inference post processing steps (like NMS) +# A default threshold of 0.0 increases AP by ~0.2-0.3 but significantly slows down +# inference. +_C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05 +# Overlap threshold used for non-maximum suppression (suppress boxes with +# IoU >= this threshold) +_C.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5 +# If True, augment proposals with ground-truth boxes before sampling proposals to +# train ROI heads. +_C.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT = True + +# ---------------------------------------------------------------------------- # +# Box Head +# ---------------------------------------------------------------------------- # +_C.MODEL.ROI_BOX_HEAD = CN() +# C4 don't use head name option +# Options for non-C4 models: FastRCNNConvFCHead, +_C.MODEL.ROI_BOX_HEAD.NAME = "" +# Options are: "smooth_l1", "giou" +_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE = "smooth_l1" +# The final scaling coefficient on the box regression loss, used to balance the magnitude of its +# gradients with other losses in the model. See also `MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT`. +_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT = 1.0 +# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets +# These are empirically chosen to approximately lead to unit variance targets +_C.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0) +# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1. +_C.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA = 0.0 +_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14 +_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0 +# Type of pooling operation applied to the incoming feature map for each RoI +_C.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2" + +_C.MODEL.ROI_BOX_HEAD.NUM_FC = 0 +# Hidden layer dimension for FC layers in the RoI box head +_C.MODEL.ROI_BOX_HEAD.FC_DIM = 1024 +_C.MODEL.ROI_BOX_HEAD.NUM_CONV = 0 +# Channel dimension for Conv layers in the RoI box head +_C.MODEL.ROI_BOX_HEAD.CONV_DIM = 256 +# Normalization method for the convolution layers. +# Options: "" (no norm), "GN", "SyncBN". +_C.MODEL.ROI_BOX_HEAD.NORM = "" +# Whether to use class agnostic for bbox regression +_C.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = False +# If true, RoI heads use bounding boxes predicted by the box head rather than proposal boxes. +_C.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES = False + +# ---------------------------------------------------------------------------- # +# Cascaded Box Head +# ---------------------------------------------------------------------------- # +_C.MODEL.ROI_BOX_CASCADE_HEAD = CN() +# The number of cascade stages is implicitly defined by the length of the following two configs. +_C.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS = ( + (10.0, 10.0, 5.0, 5.0), + (20.0, 20.0, 10.0, 10.0), + (30.0, 30.0, 15.0, 15.0), +) +_C.MODEL.ROI_BOX_CASCADE_HEAD.IOUS = (0.5, 0.6, 0.7) + + +# ---------------------------------------------------------------------------- # +# Mask Head +# ---------------------------------------------------------------------------- # +_C.MODEL.ROI_MASK_HEAD = CN() +_C.MODEL.ROI_MASK_HEAD.NAME = "MaskRCNNConvUpsampleHead" +_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14 +_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0 +_C.MODEL.ROI_MASK_HEAD.NUM_CONV = 0 # The number of convs in the mask head +_C.MODEL.ROI_MASK_HEAD.CONV_DIM = 256 +# Normalization method for the convolution layers. +# Options: "" (no norm), "GN", "SyncBN". +_C.MODEL.ROI_MASK_HEAD.NORM = "" +# Whether to use class agnostic for mask prediction +_C.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK = False +# Type of pooling operation applied to the incoming feature map for each RoI +_C.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "ROIAlignV2" + + +# ---------------------------------------------------------------------------- # +# Keypoint Head +# ---------------------------------------------------------------------------- # +_C.MODEL.ROI_KEYPOINT_HEAD = CN() +_C.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpsampleHead" +_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14 +_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0 +_C.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS = tuple(512 for _ in range(8)) +_C.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17 # 17 is the number of keypoints in COCO. + +# Images with too few (or no) keypoints are excluded from training. +_C.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE = 1 +# Normalize by the total number of visible keypoints in the minibatch if True. +# Otherwise, normalize by the total number of keypoints that could ever exist +# in the minibatch. +# The keypoint softmax loss is only calculated on visible keypoints. +# Since the number of visible keypoints can vary significantly between +# minibatches, this has the effect of up-weighting the importance of +# minibatches with few visible keypoints. (Imagine the extreme case of +# only one visible keypoint versus N: in the case of N, each one +# contributes 1/N to the gradient compared to the single keypoint +# determining the gradient direction). Instead, we can normalize the +# loss by the total number of keypoints, if it were the case that all +# keypoints were visible in a full minibatch. (Returning to the example, +# this means that the one visible keypoint contributes as much as each +# of the N keypoints.) +_C.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS = True +# Multi-task loss weight to use for keypoints +# Recommended values: +# - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True +# - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False +_C.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT = 1.0 +# Type of pooling operation applied to the incoming feature map for each RoI +_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE = "ROIAlignV2" + +# ---------------------------------------------------------------------------- # +# Semantic Segmentation Head +# ---------------------------------------------------------------------------- # +_C.MODEL.SEM_SEG_HEAD = CN() +_C.MODEL.SEM_SEG_HEAD.NAME = "SemSegFPNHead" +_C.MODEL.SEM_SEG_HEAD.IN_FEATURES = ["p2", "p3", "p4", "p5"] +# Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for +# the correposnding pixel. +_C.MODEL.SEM_SEG_HEAD.IGNORE_VALUE = 255 +# Number of classes in the semantic segmentation head +_C.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 54 +# Number of channels in the 3x3 convs inside semantic-FPN heads. +_C.MODEL.SEM_SEG_HEAD.CONVS_DIM = 128 +# Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride. +_C.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4 +# Normalization method for the convolution layers. Options: "" (no norm), "GN". +_C.MODEL.SEM_SEG_HEAD.NORM = "GN" +_C.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT = 1.0 + +_C.MODEL.PANOPTIC_FPN = CN() +# Scaling of all losses from instance detection / segmentation head. +_C.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT = 1.0 + +# options when combining instance & semantic segmentation outputs +_C.MODEL.PANOPTIC_FPN.COMBINE = CN({"ENABLED": True}) +_C.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH = 0.5 +_C.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT = 4096 +_C.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = 0.5 + + +# ---------------------------------------------------------------------------- # +# RetinaNet Head +# ---------------------------------------------------------------------------- # +_C.MODEL.RETINANET = CN() + +# This is the number of foreground classes. +_C.MODEL.RETINANET.NUM_CLASSES = 80 + +_C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"] + +# Convolutions to use in the cls and bbox tower +# NOTE: this doesn't include the last conv for logits +_C.MODEL.RETINANET.NUM_CONVS = 4 + +# IoU overlap ratio [bg, fg] for labeling anchors. +# Anchors with < bg are labeled negative (0) +# Anchors with >= bg and < fg are ignored (-1) +# Anchors with >= fg are labeled positive (1) +_C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5] +_C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1] + +# Prior prob for rare case (i.e. foreground) at the beginning of training. +# This is used to set the bias for the logits layer of the classifier subnet. +# This improves training stability in the case of heavy class imbalance. +_C.MODEL.RETINANET.PRIOR_PROB = 0.01 + +# Inference cls score threshold, only anchors with score > INFERENCE_TH are +# considered for inference (to improve speed) +_C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05 +_C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000 +_C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5 + +# Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets +_C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0) + +# Loss parameters +_C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0 +_C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25 +_C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1 + + +# ---------------------------------------------------------------------------- # +# ResNe[X]t options (ResNets = {ResNet, ResNeXt} +# Note that parts of a resnet may be used for both the backbone and the head +# These options apply to both +# ---------------------------------------------------------------------------- # +_C.MODEL.RESNETS = CN() + +_C.MODEL.RESNETS.DEPTH = 50 +_C.MODEL.RESNETS.OUT_FEATURES = ["res4"] # res4 for C4 backbone, res2..5 for FPN backbone + +# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt +_C.MODEL.RESNETS.NUM_GROUPS = 1 + +# Options: FrozenBN, GN, "SyncBN", "BN" +_C.MODEL.RESNETS.NORM = "FrozenBN" + +# Baseline width of each group. +# Scaling this parameters will scale the width of all bottleneck layers. +_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64 + +# Place the stride 2 conv on the 1x1 filter +# Use True only for the original MSRA ResNet; use False for C2 and Torch models +_C.MODEL.RESNETS.STRIDE_IN_1X1 = True + +# Apply dilation in stage "res5" +_C.MODEL.RESNETS.RES5_DILATION = 1 + +# Output width of res2. Scaling this parameters will scale the width of all 1x1 convs in ResNet +# For R18 and R34, this needs to be set to 64 +_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256 +_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64 + +# Apply Deformable Convolution in stages +# Specify if apply deform_conv on Res2, Res3, Res4, Res5 +_C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False] +# Use True to use modulated deform_conv (DeformableV2, https://arxiv.org/abs/1811.11168); +# Use False for DeformableV1. +_C.MODEL.RESNETS.DEFORM_MODULATED = False +# Number of groups in deformable conv. +_C.MODEL.RESNETS.DEFORM_NUM_GROUPS = 1 + + +# ---------------------------------------------------------------------------- # +# Solver +# ---------------------------------------------------------------------------- # +_C.SOLVER = CN() + +# See detectron2/solver/build.py for LR scheduler options +_C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" + +_C.SOLVER.MAX_ITER = 40000 + +_C.SOLVER.BASE_LR = 0.001 + +_C.SOLVER.MOMENTUM = 0.9 + +_C.SOLVER.NESTEROV = False + +_C.SOLVER.WEIGHT_DECAY = 0.0001 +# The weight decay that's applied to parameters of normalization layers +# (typically the affine transformation) +_C.SOLVER.WEIGHT_DECAY_NORM = 0.0 + +_C.SOLVER.GAMMA = 0.1 +# The iteration number to decrease learning rate by GAMMA. +_C.SOLVER.STEPS = (30000,) + +_C.SOLVER.WARMUP_FACTOR = 1.0 / 1000 +_C.SOLVER.WARMUP_ITERS = 1000 +_C.SOLVER.WARMUP_METHOD = "linear" + +# Save a checkpoint after every this number of iterations +_C.SOLVER.CHECKPOINT_PERIOD = 5000 + +# Number of images per batch across all machines. +# If we have 16 GPUs and IMS_PER_BATCH = 32, +# each GPU will see 2 images per batch. +# May be adjusted automatically if REFERENCE_WORLD_SIZE is set. +_C.SOLVER.IMS_PER_BATCH = 16 + +# The reference number of workers (GPUs) this config is meant to train with. +# With a non-zero value, it will be used by DefaultTrainer to compute a desired +# per-worker batch size, and then scale the other related configs (total batch size, +# learning rate, etc) to match the per-worker batch size if the actual number +# of workers during training is different from this reference. +_C.SOLVER.REFERENCE_WORLD_SIZE = 0 + +# Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for +# biases. This is not useful (at least for recent models). You should avoid +# changing these and they exist only to reproduce Detectron v1 training if +# desired. +_C.SOLVER.BIAS_LR_FACTOR = 1.0 +_C.SOLVER.WEIGHT_DECAY_BIAS = _C.SOLVER.WEIGHT_DECAY + +# Gradient clipping +_C.SOLVER.CLIP_GRADIENTS = CN({"ENABLED": False}) +# Type of gradient clipping, currently 2 values are supported: +# - "value": the absolute values of elements of each gradients are clipped +# - "norm": the norm of the gradient for each parameter is clipped thus +# affecting all elements in the parameter +_C.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value" +# Maximum absolute value used for clipping gradients +_C.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0 +# Floating point number p for L-p norm to be used with the "norm" +# gradient clipping type; for L-inf, please specify .inf +_C.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 2.0 + +# ---------------------------------------------------------------------------- # +# Specific test options +# ---------------------------------------------------------------------------- # +_C.TEST = CN() +# For end-to-end tests to verify the expected accuracy. +# Each item is [task, metric, value, tolerance] +# e.g.: [['bbox', 'AP', 38.5, 0.2]] +_C.TEST.EXPECTED_RESULTS = [] +# The period (in terms of steps) to evaluate the model during training. +# Set to 0 to disable. +_C.TEST.EVAL_PERIOD = 0 +# The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval +# When empty, it will use the defaults in COCO. +# Otherwise it should be a list[float] with the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. +_C.TEST.KEYPOINT_OKS_SIGMAS = [] +# Maximum number of detections to return per image during inference (100 is +# based on the limit established for the COCO dataset). +_C.TEST.DETECTIONS_PER_IMAGE = 100 + +_C.TEST.AUG = CN({"ENABLED": False}) +_C.TEST.AUG.MIN_SIZES = (400, 500, 600, 700, 800, 900, 1000, 1100, 1200) +_C.TEST.AUG.MAX_SIZE = 4000 +_C.TEST.AUG.FLIP = True + +_C.TEST.PRECISE_BN = CN({"ENABLED": False}) +_C.TEST.PRECISE_BN.NUM_ITER = 200 + +# ---------------------------------------------------------------------------- # +# Misc options +# ---------------------------------------------------------------------------- # +# Directory where output files are written +_C.OUTPUT_DIR = "./output" +# Set seed to negative to fully randomize everything. +# Set seed to positive to use a fixed seed. Note that a fixed seed increases +# reproducibility but does not guarantee fully deterministic behavior. +# Disabling all parallelism further increases reproducibility. +_C.SEED = -1 +# Benchmark different cudnn algorithms. +# If input images have very different sizes, this option will have large overhead +# for about 10k iterations. It usually hurts total time, but can benefit for certain models. +# If input images have the same or similar sizes, benchmark is often helpful. +_C.CUDNN_BENCHMARK = False +# The period (in terms of steps) for minibatch visualization at train time. +# Set to 0 to disable. +_C.VIS_PERIOD = 0 +# Set to 0 to disable. +_C.AMP = 0 +# Optimize level, you can set O2 to enable fp16 training +_C.OPT_LEVEL = "O0" +# Adjust the loss during training +_C.LOSS_SCALE_VALUE = 64 + +# Set to 0 to disable +_C.DEBUG_MODE = 0 + +# global config is for quick hack purposes. +# You can set them in command line or config files, +# and access it with: +# +# from detectron2.config import global_cfg +# print(global_cfg.HACK) +# +# Do not commit any configs into it. +_C.GLOBAL = CN() +_C.GLOBAL.HACK = 1.0 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a92eb98d13544038e6a75a7ae4499cce6a939935 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/__init__.py @@ -0,0 +1,32 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from . import transforms # isort:skip + +from .build import ( + build_batch_data_loader, + build_detection_test_loader, + build_detection_train_loader, + get_detection_dataset_dicts, + load_proposals_into_dataset, + print_instances_class_histogram, +) +from .catalog import DatasetCatalog, MetadataCatalog, Metadata +from .common import DatasetFromList, MapDataset +from .dataset_mapper import DatasetMapper + +# ensure the builtin datasets are registered +from . import datasets, samplers # isort:skip + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/build.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/build.py new file mode 100644 index 0000000000000000000000000000000000000000..d82c4f8c4c4fe02bd377112785f4d9174c9600d2 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/build.py @@ -0,0 +1,426 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +import logging +import numpy as np +import operator +import pickle +import torch.utils.data +from fvcore.common.file_io import PathManager +from tabulate import tabulate +from termcolor import colored + +from detectron2.structures import BoxMode +from detectron2.utils.comm import get_world_size +from detectron2.utils.env import seed_all_rng +from detectron2.utils.logger import log_first_n + +from .catalog import DatasetCatalog, MetadataCatalog +from .common import AspectRatioGroupedDataset, DatasetFromList, MapDataset, PreloadLoader +from .dataset_mapper import DatasetMapper +from .detection_utils import check_metadata_consistency +from .samplers import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler + +""" +This file contains the default logic to build a dataloader for training or testing. +""" + +__all__ = [ + "build_batch_data_loader", + "build_detection_train_loader", + "build_detection_test_loader", + "get_detection_dataset_dicts", + "load_proposals_into_dataset", + "print_instances_class_histogram", +] + + +def filter_images_with_only_crowd_annotations(dataset_dicts): + """ + Filter out images with none annotations or only crowd annotations + (i.e., images without non-crowd annotations). + A common training-time preprocessing on COCO dataset. + + Args: + dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. + + Returns: + list[dict]: the same format, but filtered. + """ + num_before = len(dataset_dicts) + + def valid(anns): + for ann in anns: + if ann.get("iscrowd", 0) == 0: + return True + return False + + dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])] + num_after = len(dataset_dicts) + logger = logging.getLogger(__name__) + logger.info( + "Removed {} images with no usable annotations. {} images left.".format( + num_before - num_after, num_after + ) + ) + return dataset_dicts + + +def filter_images_with_few_keypoints(dataset_dicts, min_keypoints_per_image): + """ + Filter out images with too few number of keypoints. + + Args: + dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. + + Returns: + list[dict]: the same format as dataset_dicts, but filtered. + """ + num_before = len(dataset_dicts) + + def visible_keypoints_in_image(dic): + # Each keypoints field has the format [x1, y1, v1, ...], where v is visibility + annotations = dic["annotations"] + return sum( + (np.array(ann["keypoints"][2::3]) > 0).sum() + for ann in annotations + if "keypoints" in ann + ) + + dataset_dicts = [ + x for x in dataset_dicts if visible_keypoints_in_image(x) >= min_keypoints_per_image + ] + num_after = len(dataset_dicts) + logger = logging.getLogger(__name__) + logger.info( + "Removed {} images with fewer than {} keypoints.".format( + num_before - num_after, min_keypoints_per_image + ) + ) + return dataset_dicts + + +def load_proposals_into_dataset(dataset_dicts, proposal_file): + """ + Load precomputed object proposals into the dataset. + + The proposal file should be a pickled dict with the following keys: + + - "ids": list[int] or list[str], the image ids + - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id + - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores + corresponding to the boxes. + - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``. + + Args: + dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. + proposal_file (str): file path of pre-computed proposals, in pkl format. + + Returns: + list[dict]: the same format as dataset_dicts, but added proposal field. + """ + logger = logging.getLogger(__name__) + logger.info("Loading proposals from: {}".format(proposal_file)) + + with PathManager.open(proposal_file, "rb") as f: + proposals = pickle.load(f, encoding="latin1") + + # Rename the key names in D1 proposal files + rename_keys = {"indexes": "ids", "scores": "objectness_logits"} + for key in rename_keys: + if key in proposals: + proposals[rename_keys[key]] = proposals.pop(key) + + # Fetch the indexes of all proposals that are in the dataset + # Convert image_id to str since they could be int. + img_ids = set({str(record["image_id"]) for record in dataset_dicts}) + id_to_index = {str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids} + + # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS' + bbox_mode = BoxMode(proposals["bbox_mode"]) if "bbox_mode" in proposals else BoxMode.XYXY_ABS + + for record in dataset_dicts: + # Get the index of the proposal + i = id_to_index[str(record["image_id"])] + + boxes = proposals["boxes"][i] + objectness_logits = proposals["objectness_logits"][i] + # Sort the proposals in descending order of the scores + inds = objectness_logits.argsort()[::-1] + record["proposal_boxes"] = boxes[inds] + record["proposal_objectness_logits"] = objectness_logits[inds] + record["proposal_bbox_mode"] = bbox_mode + + return dataset_dicts + + +def print_instances_class_histogram(dataset_dicts, class_names): + """ + Args: + dataset_dicts (list[dict]): list of dataset dicts. + class_names (list[str]): list of class names (zero-indexed). + """ + num_classes = len(class_names) + hist_bins = np.arange(num_classes + 1) + histogram = np.zeros((num_classes,), dtype=np.int) + for entry in dataset_dicts: + annos = entry["annotations"] + classes = [x["category_id"] for x in annos if not x.get("iscrowd", 0)] + histogram += np.histogram(classes, bins=hist_bins)[0] + + N_COLS = min(6, len(class_names) * 2) + + def short_name(x): + # make long class names shorter. useful for lvis + if len(x) > 13: + return x[:11] + ".." + return x + + data = list( + itertools.chain(*[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)]) + ) + total_num_instances = sum(data[1::2]) + data.extend([None] * (N_COLS - (len(data) % N_COLS))) + if num_classes > 1: + data.extend(["total", total_num_instances]) + data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)]) + table = tabulate( + data, + headers=["category", "#instances"] * (N_COLS // 2), + tablefmt="pipe", + numalign="left", + stralign="center", + ) + log_first_n( + logging.INFO, + "Distribution of instances among all {} categories:\n".format(num_classes) + + colored(table, "cyan"), + key="message", + ) + + +def get_detection_dataset_dicts( + dataset_names, filter_empty=True, min_keypoints=0, proposal_files=None +): + """ + Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation. + + Args: + dataset_names (list[str]): a list of dataset names + filter_empty (bool): whether to filter out images without instance annotations + min_keypoints (int): filter out images with fewer keypoints than + `min_keypoints`. Set to 0 to do nothing. + proposal_files (list[str]): if given, a list of object proposal files + that match each dataset in `dataset_names`. + + Returns: + list[dict]: a list of dicts following the standard dataset dict format. + """ + assert len(dataset_names) + dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names] + for dataset_name, dicts in zip(dataset_names, dataset_dicts): + assert len(dicts), "Dataset '{}' is empty!".format(dataset_name) + + if proposal_files is not None: + assert len(dataset_names) == len(proposal_files) + # load precomputed proposals from proposal files + dataset_dicts = [ + load_proposals_into_dataset(dataset_i_dicts, proposal_file) + for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files) + ] + + dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts)) + + has_instances = "annotations" in dataset_dicts[0] + if filter_empty and has_instances: + dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts) + if min_keypoints > 0 and has_instances: + dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints) + + if has_instances: + try: + class_names = MetadataCatalog.get(dataset_names[0]).thing_classes + check_metadata_consistency("thing_classes", dataset_names) + print_instances_class_histogram(dataset_dicts, class_names) + except AttributeError: # class names are not available for this dataset + pass + return dataset_dicts + + +def build_batch_data_loader( + dataset, sampler, total_batch_size, device, *, + aspect_ratio_grouping=False, num_workers=0 +): + """ + Build a batched dataloader for training. + + Args: + dataset (torch.utils.data.Dataset): map-style PyTorch dataset. Can be indexed. + sampler (torch.utils.data.sampler.Sampler): a sampler that produces indices + total_batch_size (int): total batch size across GPUs. + aspect_ratio_grouping (bool): whether to group images with similar + aspect ratio for efficiency. When enabled, it requires each + element in dataset be a dict with keys "width" and "height". + num_workers (int): number of parallel data loading workers + + Returns: + iterable[list]. Length of each list is the batch size of the current + GPU. Each element in the list comes from the dataset. + """ + world_size = get_world_size() + assert ( + total_batch_size > 0 and total_batch_size % world_size == 0 + ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format( + total_batch_size, world_size + ) + + batch_size = total_batch_size // world_size + if aspect_ratio_grouping: + data_loader = torch.utils.data.DataLoader( + dataset, + sampler=sampler, + num_workers=num_workers, + batch_sampler=None, + collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements + worker_init_fn=worker_init_reset_seed, + pin_memory=True + ) # yield individual mapped dict + return AspectRatioGroupedDataset(data_loader, batch_size, device) + else: + batch_sampler = torch.utils.data.sampler.BatchSampler( + sampler, batch_size, drop_last=True + ) # drop_last so the batch always have the same size + data_loader = torch.utils.data.DataLoader( + dataset, + num_workers=num_workers, + batch_sampler=batch_sampler, + collate_fn=trivial_batch_collator, + worker_init_fn=worker_init_reset_seed, + pin_memory=True + ) + return PreloadLoader(data_loader, device) + + +def build_detection_train_loader(cfg, mapper=None): + """ + A data loader is created by the following steps: + + 1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts. + 2. Coordinate a random shuffle order shared among all processes (all GPUs) + 3. Each process spawn another few workers to process the dicts. Each worker will: + * Map each metadata dict into another format to be consumed by the model. + * Batch them by simply putting dicts into a list. + + The batched ``list[mapped_dict]`` is what this dataloader will yield. + + Args: + cfg (CfgNode): the config + mapper (callable): a callable which takes a sample (dict) from dataset and + returns the format to be consumed by the model. + By default it will be ``DatasetMapper(cfg, True)``. + + Returns: + an infinite iterator of training data + """ + dataset_dicts = get_detection_dataset_dicts( + cfg.DATASETS.TRAIN, + filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, + min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE + if cfg.MODEL.KEYPOINT_ON + else 0, + proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, + ) + dataset = DatasetFromList(dataset_dicts, copy=False) + + if mapper is None: + mapper = DatasetMapper(cfg, True, fix_shape=cfg.INPUT.FIX_SHAPE) + dataset = MapDataset(dataset, mapper) + + sampler_name = cfg.DATALOADER.SAMPLER_TRAIN + logger = logging.getLogger(__name__) + logger.info("Using training sampler {}".format(sampler_name)) + # TODO avoid if-else? + if sampler_name == "TrainingSampler": + sampler = TrainingSampler(len(dataset),shuffle = False) + elif sampler_name == "RepeatFactorTrainingSampler": + repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( + dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD + ) + sampler = RepeatFactorTrainingSampler(repeat_factors) + else: + raise ValueError("Unknown training sampler: {}".format(sampler_name)) + return build_batch_data_loader( + dataset, + sampler, + cfg.SOLVER.IMS_PER_BATCH, + cfg.MODEL.DEVICE, + aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, + num_workers=cfg.DATALOADER.NUM_WORKERS, + ) + +def build_detection_test_loader(cfg, dataset_name, mapper=None): + """ + Similar to `build_detection_train_loader`. + But this function uses the given `dataset_name` argument (instead of the names in cfg), + and uses batch size 1. + + Args: + cfg: a detectron2 CfgNode + dataset_name (str): a name of the dataset that's available in the DatasetCatalog + mapper (callable): a callable which takes a sample (dict) from dataset + and returns the format to be consumed by the model. + By default it will be `DatasetMapper(cfg, False)`. + + Returns: + DataLoader: a torch DataLoader, that loads the given detection + dataset, with test-time transformation and batching. + """ + dataset_dicts = get_detection_dataset_dicts( + [dataset_name], + filter_empty=False, + proposal_files=[ + cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)] + ] + if cfg.MODEL.LOAD_PROPOSALS + else None, + ) + + dataset = DatasetFromList(dataset_dicts) + if mapper is None: + mapper = DatasetMapper(cfg, False, fix_shape=cfg.INPUT.FIX_SHAPE) + dataset = MapDataset(dataset, mapper) + + sampler = InferenceSampler(len(dataset)) + # Always use 1 image per worker during inference since this is the + # standard when reporting inference time in papers. + batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) + + return torch.utils.data.DataLoader( + dataset, + num_workers=cfg.DATALOADER.NUM_WORKERS, + batch_sampler=batch_sampler, + collate_fn=trivial_batch_collator + ) + + +def trivial_batch_collator(batch): + """ + A batch collator that does nothing. + """ + return batch + + +def worker_init_reset_seed(worker_id): + seed_all_rng(np.random.randint(2 ** 31) + worker_id) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/catalog.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/catalog.py new file mode 100644 index 0000000000000000000000000000000000000000..30066c714c611a0a8bc7f0ce00e9935476b67002 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/catalog.py @@ -0,0 +1,246 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import logging +import types +from typing import List + +from detectron2.utils.logger import log_first_n + +__all__ = ["DatasetCatalog", "MetadataCatalog", "Metadata"] + + +class DatasetCatalog(object): + """ + A catalog that stores information about the datasets and how to obtain them. + + It contains a mapping from strings + (which are names that identify a dataset, e.g. "coco_2014_train") + to a function which parses the dataset and returns the samples in the + format of `list[dict]`. + + The returned dicts should be in Detectron2 Dataset format (See DATASETS.md for details) + if used with the data loader functionalities in `data/build.py,data/detection_transform.py`. + + The purpose of having this catalog is to make it easy to choose + different datasets, by just using the strings in the config. + """ + + _REGISTERED = {} + + @staticmethod + def register(name, func): + """ + Args: + name (str): the name that identifies a dataset, e.g. "coco_2014_train". + func (callable): a callable which takes no arguments and returns a list of dicts. + It must return the same results if called multiple times. + """ + assert callable(func), "You must register a function with `DatasetCatalog.register`!" + assert name not in DatasetCatalog._REGISTERED, "Dataset '{}' is already registered!".format( + name + ) + DatasetCatalog._REGISTERED[name] = func + + @staticmethod + def get(name): + """ + Call the registered function and return its results. + + Args: + name (str): the name that identifies a dataset, e.g. "coco_2014_train". + + Returns: + list[dict]: dataset annotations.0 + """ + try: + f = DatasetCatalog._REGISTERED[name] + except KeyError: + raise KeyError( + "Dataset '{}' is not registered! Available datasets are: {}".format( + name, ", ".join(DatasetCatalog._REGISTERED.keys()) + ) + ) + return f() + + @staticmethod + def list() -> List[str]: + """ + List all registered datasets. + + Returns: + list[str] + """ + return list(DatasetCatalog._REGISTERED.keys()) + + @staticmethod + def clear(): + """ + Remove all registered dataset. + """ + DatasetCatalog._REGISTERED.clear() + + @staticmethod + def remove(name): + """ + Remove the dataset registered by ``name``. + """ + DatasetCatalog._REGISTERED.pop(name) + + +class Metadata(types.SimpleNamespace): + """ + A class that supports simple attribute setter/getter. + It is intended for storing metadata of a dataset and make it accessible globally. + + Examples: + :: + # somewhere when you load the data: + MetadataCatalog.get("mydataset").thing_classes = ["person", "dog"] + + # somewhere when you print statistics or visualize: + classes = MetadataCatalog.get("mydataset").thing_classes + """ + + # the name of the dataset + # set default to N/A so that `self.name` in the errors will not trigger getattr again + name: str = "N/A" + + _RENAMED = { + "class_names": "thing_classes", + "dataset_id_to_contiguous_id": "thing_dataset_id_to_contiguous_id", + "stuff_class_names": "stuff_classes", + } + + def __getattr__(self, key): + if key in self._RENAMED: + log_first_n( + logging.WARNING, + "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]), + n=10, + ) + return getattr(self, self._RENAMED[key]) + + # "name" exists in every metadata + if len(self.__dict__) > 1: + raise AttributeError( + "Attribute '{}' does not exist in the metadata of dataset '{}'. Available " + "keys are {}.".format(key, self.name, str(self.__dict__.keys())) + ) + else: + raise AttributeError( + f"Attribute '{key}' does not exist in the metadata of dataset '{self.name}': " + "metadata is empty." + ) + + def __setattr__(self, key, val): + if key in self._RENAMED: + log_first_n( + logging.WARNING, + "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]), + n=10, + ) + setattr(self, self._RENAMED[key], val) + + # Ensure that metadata of the same name stays consistent + try: + oldval = getattr(self, key) + assert oldval == val, ( + "Attribute '{}' in the metadata of '{}' cannot be set " + "to a different value!\n{} != {}".format(key, self.name, oldval, val) + ) + except AttributeError: + super().__setattr__(key, val) + + def as_dict(self): + """ + Returns all the metadata as a dict. + Note that modifications to the returned dict will not reflect on the Metadata object. + """ + return copy.copy(self.__dict__) + + def set(self, **kwargs): + """ + Set multiple metadata with kwargs. + """ + for k, v in kwargs.items(): + setattr(self, k, v) + return self + + def get(self, key, default=None): + """ + Access an attribute and return its value if exists. + Otherwise return default. + """ + try: + return getattr(self, key) + except AttributeError: + return default + + +class MetadataCatalog: + """ + MetadataCatalog provides access to "Metadata" of a given dataset. + + The metadata associated with a certain name is a singleton: once created, the + metadata will stay alive and will be returned by future calls to ``get(name)``. + + It's like global variables, so don't abuse it. + It's meant for storing knowledge that's constant and shared across the execution + of the program, e.g.: the class names in COCO. + """ + + _NAME_TO_META = {} + + @staticmethod + def get(name): + """ + Args: + name (str): name of a dataset (e.g. coco_2014_train). + + Returns: + Metadata: The :class:`Metadata` instance associated with this name, + or create an empty one if none is available. + """ + assert len(name) + if name in MetadataCatalog._NAME_TO_META: + return MetadataCatalog._NAME_TO_META[name] + else: + m = MetadataCatalog._NAME_TO_META[name] = Metadata(name=name) + return m + + @staticmethod + def list(): + """ + List all registered metadata. + + Returns: + list[str]: keys (names of datasets) of all registered metadata + """ + return list(MetadataCatalog._NAME_TO_META.keys()) + + @staticmethod + def clear(): + """ + Remove all registered metadata. + """ + MetadataCatalog._NAME_TO_META.clear() + + @staticmethod + def remove(name): + """ + Remove the metadata registered by ``name``. + """ + MetadataCatalog._NAME_TO_META.pop(name) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/common.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/common.py new file mode 100644 index 0000000000000000000000000000000000000000..7eee0cc4222ff79b41a685f96b3bc65e3f4c4648 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/common.py @@ -0,0 +1,197 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import logging +import numpy as np +import pickle +import random +import torch +import torch.utils.data as data + +from detectron2.utils.serialize import PicklableWrapper + +__all__ = ["MapDataset", "DatasetFromList", "AspectRatioGroupedDataset"] + + +class MapDataset(data.Dataset): + """ + Map a function over the elements in a dataset. + + Args: + dataset: a dataset where map function is applied. + map_func: a callable which maps the element in dataset. map_func is + responsible for error handling, when error happens, it needs to + return None so the MapDataset will randomly use other + elements from the dataset. + """ + + def __init__(self, dataset, map_func): + self._dataset = dataset + self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work + + self._rng = random.Random(42) + self._fallback_candidates = set(range(len(dataset))) + + def __len__(self): + return len(self._dataset) + + def __getitem__(self, idx): + retry_count = 0 + cur_idx = int(idx) + + while True: + data = self._map_func(self._dataset[cur_idx]) + if data is not None: + self._fallback_candidates.add(cur_idx) + return data + + # _map_func fails for this idx, use a random new index from the pool + retry_count += 1 + self._fallback_candidates.discard(cur_idx) + cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0] + + if retry_count >= 3: + logger = logging.getLogger(__name__) + logger.warning( + "Failed to apply `_map_func` for idx: {}, retry count: {}".format( + idx, retry_count + ) + ) + + +class DatasetFromList(data.Dataset): + """ + Wrap a list to a torch Dataset. It produces elements of the list as data. + """ + + def __init__(self, lst: list, copy: bool = True, serialize: bool = True): + """ + Args: + lst (list): a list which contains elements to produce. + copy (bool): whether to deepcopy the element when producing it, + so that the result can be modified in place without affecting the + source in the list. + serialize (bool): whether to hold memory using serialized objects, when + enabled, data loader workers can use shared RAM from master + process instead of making a copy. + """ + self._lst = lst + self._copy = copy + self._serialize = serialize + + def _serialize(data): + buffer = pickle.dumps(data, protocol=-1) + return np.frombuffer(buffer, dtype=np.uint8) + + if self._serialize: + logger = logging.getLogger(__name__) + logger.info( + "Serializing {} elements to byte tensors and concatenating them all ...".format( + len(self._lst) + ) + ) + self._lst = [_serialize(x) for x in self._lst] + self._addr = np.asarray([len(x) for x in self._lst], dtype=np.int64) + self._addr = np.cumsum(self._addr) + self._lst = np.concatenate(self._lst) + logger.info("Serialized dataset takes {:.2f} MiB".format(len(self._lst) / 1024 ** 2)) + + def __len__(self): + if self._serialize: + return len(self._addr) + else: + return len(self._lst) + + def __getitem__(self, idx): + if self._serialize: + start_addr = 0 if idx == 0 else self._addr[idx - 1].item() + end_addr = self._addr[idx].item() + bytes = memoryview(self._lst[start_addr:end_addr]) + return pickle.loads(bytes) + elif self._copy: + return copy.deepcopy(self._lst[idx]) + else: + return self._lst[idx] + + +class PreloadLoader(object): + def __init__(self, + loader, + device + ): + self.device = device + self.loader = iter(loader) + self.stream = torch.npu.Stream() + self.preload() + def __len__(self): + return len(self.loader) + + def preload(self): + try: + self.next_data = next(self.loader) + except StopIteration: + self.next_data = None + return + with torch.npu.stream(self.stream): + for d in self.next_data: + d['image_preprocess'] = d['image_preprocess'].to(self.device, non_blocking=True) + if "instances" in d: + d['instances'] = d['instances'].to(self.device, non_blocking=True) + def next(self): + torch.npu.current_stream().wait_stream(self.stream) + data=self.next_data + self.preload() + return data + + +class AspectRatioGroupedDataset(data.IterableDataset): + """ + Batch data that have similar aspect ratio together. + In this implementation, images whose aspect ratio < (or >) 1 will + be batched together. + This improves training speed because the images then need less padding + to form a batch. + + It assumes the underlying dataset produces dicts with "width" and "height" keys. + It will then produce a list of original dicts with length = batch_size, + all with similar aspect ratios. + """ + + def __init__(self, dataset, batch_size,device): + """ + Args: + dataset: an iterable. Each element must be a dict with keys + "width" and "height", which will be used to batch data. + batch_size (int): + """ + self.dataset = dataset + self.batch_size = batch_size + self._buckets = [[] for _ in range(2)] + self.device = device + # Hard-coded two aspect ratio groups: w > h and w < h. + # Can add support for more aspect ratio groups, but doesn't seem useful + + def __iter__(self): + for d in self.dataset: + w, h = d["width"], d["height"] + d["image_preprocess"] = d["image_preprocess"].to(self.device) + if "instances" in d: + d['instances'] = d['instances'].to(self.device) + bucket_id = 0 if w > h else 1 + bucket = self._buckets[bucket_id] + bucket.append(d) + if len(bucket) == self.batch_size: + yield bucket[:] + del bucket[:] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/dataset_mapper.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/dataset_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..5a3000f8740e5c52a66dd6c6679c503e728bd084 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/dataset_mapper.py @@ -0,0 +1,282 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import logging +import numpy as np +from typing import List, Optional, Union +import torch +from torch.nn import functional as F + +from detectron2.config import configurable + +from . import detection_utils as utils +from . import transforms as T +from detectron2.structures import BitMasks +from detectron2.structures import Boxes +from detectron2.structures import BoxMode +from detectron2.structures import Instances +from detectron2.structures import Keypoints +from detectron2.structures import PolygonMasks +from detectron2.structures import RotatedBoxes +from detectron2.structures import polygons_to_bitmask + + +""" +This file contains the default mapping that's applied to "dataset dicts". +""" + +__all__ = ["DatasetMapper"] + + +class DatasetMapper: + """ + A callable which takes a dataset dict in Detectron2 Dataset format, + and map it into a format used by the model. + + This is the default callable to be used to map your dataset dict into training data. + You may need to follow it to implement your own one for customized logic, + such as a different way to read or transform images. + See :doc:`/tutorials/data_loading` for details. + + The callable currently does the following: + + 1. Read the image from "file_name" + 2. Applies cropping/geometric transforms to the image and annotations + 3. Prepare data and annotations to Tensor and :class:`Instances` + """ + + @configurable + def __init__( + self, + is_train: bool, + *, + augmentations: List[Union[T.Augmentation, T.Transform]], + image_format: str, + use_instance_mask: bool = False, + use_keypoint: bool = False, + instance_mask_format: str = "polygon", + keypoint_hflip_indices: Optional[np.ndarray] = None, + precomputed_proposal_topk: Optional[int] = None, + recompute_boxes: bool = False, + fix_shape: tuple = None, + amp: int = 0, + opt_level: str = "O0" + ): + """ + NOTE: this interface is experimental. + + Args: + is_train: whether it's used in training or inference + augmentations: a list of augmentations or deterministic transforms to apply + image_format: an image format supported by :func:`detection_utils.read_image`. + use_instance_mask: whether to process instance segmentation annotations, if available + use_keypoint: whether to process keypoint annotations if available + instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation + masks into this format. + keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices` + precomputed_proposal_topk: if given, will load pre-computed + proposals from dataset_dict and keep the top k proposals for each image. + recompute_boxes: whether to overwrite bounding box annotations + by computing tight bounding boxes from instance mask annotations. + """ + if recompute_boxes: + assert use_instance_mask, "recompute_boxes requires instance masks" + # fmt: off + self.is_train = is_train + self.augmentations = augmentations + self.image_format = image_format + self.use_instance_mask = use_instance_mask + self.instance_mask_format = instance_mask_format + self.use_keypoint = use_keypoint + self.keypoint_hflip_indices = keypoint_hflip_indices + self.proposal_topk = precomputed_proposal_topk + self.recompute_boxes = recompute_boxes + # fix shape + self.fix_shape = fix_shape + self.amp = amp + self.opt_level = opt_level + # fmt: on + logger = logging.getLogger(__name__) + logger.info("Augmentations used in training: " + str(augmentations)) + + @classmethod + def from_config(cls, cfg, is_train: bool = True): + augs = utils.build_augmentation(cfg, is_train) + if cfg.INPUT.CROP.ENABLED and is_train: + augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)) + recompute_boxes = cfg.MODEL.MASK_ON + else: + recompute_boxes = False + + ret = { + "is_train": is_train, + "augmentations": augs, + "image_format": cfg.INPUT.FORMAT, + "use_instance_mask": cfg.MODEL.MASK_ON, + "instance_mask_format": cfg.INPUT.MASK_FORMAT, + "use_keypoint": cfg.MODEL.KEYPOINT_ON, + "recompute_boxes": recompute_boxes, + "fix_shape": cfg.INPUT.FIX_SHAPE, + "amp": cfg.AMP, + "opt_level": cfg.OPT_LEVEL + } + if cfg.MODEL.KEYPOINT_ON: + ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN) + + if cfg.MODEL.LOAD_PROPOSALS: + ret["precomputed_proposal_topk"] = ( + cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN + if is_train + else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST + ) + return ret + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + # USER: Write your own image loading if it's not from a file + image = utils.read_image(dataset_dict["file_name"], format=self.image_format) + utils.check_image_size(dataset_dict, image) + + # USER: Remove if you don't do semantic/panoptic segmentation. + if "sem_seg_file_name" in dataset_dict: + sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) + else: + sem_seg_gt = None + + aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt) + transforms = aug_input.apply_augmentations(self.augmentations) + image, sem_seg_gt = aug_input.image, aug_input.sem_seg + + image_shape = image.shape[:2] # h, w + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + # put preprocess in dataset + logger = logging.getLogger(__name__) + size_divisibility = 32 + pad_value = 0 + pixel_mean = torch.Tensor([103.53, 116.28, 123.675]).view(-1, 1, 1) + pixel_std = torch.Tensor([1.0, 1.0, 1.0]).view(-1, 1, 1) + images = (dataset_dict["image"] - pixel_mean) / pixel_std + + dataset_dict["image_size"] = tuple(images.shape[-2:]) + + batch_shape = (3, self.fix_shape[1], self.fix_shape[0]) + padding_size = [0, batch_shape[-1] - images.shape[-1], + 0, batch_shape[-2] - images.shape[-2]] + padded = F.pad(images, padding_size, value=pad_value) + batched_imgs = padded.unsqueeze_(0) + + if self.amp and (self.opt_level == "O1" or self.opt_level == "O2"): + batched_imgs = batched_imgs.to(torch.float16) + dataset_dict["image_preprocess"] = batched_imgs.contiguous() + + if sem_seg_gt is not None: + dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) + + # USER: Remove if you don't use pre-computed proposals. + # Most users would not need this feature. + if self.proposal_topk is not None: + utils.transform_proposals( + dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk + ) + + if not self.is_train: + # USER: Modify this if you want to keep them for some reason. + dataset_dict.pop("annotations", None) + dataset_dict.pop("sem_seg_file_name", None) + return dataset_dict + + if "annotations" in dataset_dict: + # USER: Modify this if you want to keep them for some reason. + for anno in dataset_dict["annotations"]: + if not self.use_instance_mask: + anno.pop("segmentation", None) + if not self.use_keypoint: + anno.pop("keypoints", None) + + # USER: Implement additional transformations if you have other types of data + annos = [ + utils.transform_instance_annotations( + obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices + ) + for obj in dataset_dict.pop("annotations") + if obj.get("iscrowd", 0) == 0 + ] + + instances = utils.annotations_to_instances( + annos, image_shape, mask_format=self.instance_mask_format + ) + + # After transforms such as cropping are applied, the bounding box may no longer + # tightly bound the object. As an example, imagine a triangle object + # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight + # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to + # the intersection of original bounding box and the cropping box. + if self.recompute_boxes: + instances.gt_boxes = instances.gt_masks.get_bounding_boxes() + + if not self.is_train: + dataset_dict["instances"] = \ + utils.filter_empty_instances(instances) + else: + i = utils.filter_empty_instances(instances) + classes = 80 + max_len = 20 + boxes_num = len(i.gt_boxes) + if boxes_num < max_len: + diff_num = max_len - boxes_num + i.gt_boxes.tensor = torch.cat( + [i.gt_boxes.tensor,torch.zeros([diff_num, 4])],dim=0) + padding_array = np.zeros([diff_num]) + classes + i.gt_classes = torch.cat( + [i.gt_classes, torch.from_numpy( + padding_array).long()], + dim=0) + if self.use_instance_mask: + if isinstance(i.gt_masks, PolygonMasks): + i.gt_masks.polygons += [torch.from_numpy( + np.zeros([1] ))]*diff_num + elif isinstance(i.gt_masks, BitMasks): + padding_mask = torch.zeros( + [diff_num, i.gt_masks.tensor.shape[1], + i.gt_masks.tensor.shape[2]]).bool() + i.gt_masks.tensor = torch.cat( + [i.gt_masks.tensor, padding_mask], dim=0) + i.gt_masks.tensor = F.pad( + i.gt_masks.tensor, padding_size, value=False) + else: + select_idx = torch.randperm(boxes_num)[:max_len] + i.gt_boxes.tensor = i.gt_boxes.tensor[select_idx] + i.gt_classes = i.gt_classes[select_idx] + if self.use_instance_mask: + if isinstance(i.gt_masks, PolygonMasks): + i.gt_masks.polygons = [ + i.gt_masks.polygons[idx] + for idx in select_idx.numpy().tolist()] + elif isinstance(i.gt_masks, BitMasks): + i.gt_masks.tensor = i.gt_masks.tensor[select_idx] + i.gt_masks.tensor = F.pad( + i.gt_masks.tensor, padding_size, value=False) + dataset_dict["instances"] = i + return dataset_dict diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/README.md b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9fb3e4f7afec17137c95c78be6ef06d520ec8032 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/README.md @@ -0,0 +1,9 @@ + + +### Common Datasets + +The dataset implemented here do not need to load the data into the final format. +It should provide the minimal data structure needed to use the dataset, so it can be very efficient. + +For example, for an image dataset, just provide the file names and labels, but don't read the images. +Let the downstream decide how to read. diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a46957b660041838aefd644eed3345fc857b139c --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .cityscapes import load_cityscapes_instances +from .coco import load_coco_json, load_sem_seg +from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta +from .pascal_voc import load_voc_instances, register_pascal_voc +from .register_coco import register_coco_instances, register_coco_panoptic_separated +from . import builtin # ensure the builtin datasets are registered + + +__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/builtin.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/builtin.py new file mode 100644 index 0000000000000000000000000000000000000000..791739f94feb40a90f56d71c9d09a25f271873a9 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/builtin.py @@ -0,0 +1,239 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This file registers pre-defined datasets at hard-coded paths, and their metadata. + +We hard-code metadata for common datasets. This will enable: +1. Consistency check when loading the datasets +2. Use models on these standard datasets directly and run demos, + without having to download the dataset annotations + +We hard-code some paths to the dataset that's assumed to +exist in "./datasets/". + +Users SHOULD NOT use this file to create new dataset / metadata for new dataset. +To add new dataset, refer to the tutorial "docs/DATASETS.md". +""" + +import os + +from detectron2.data import DatasetCatalog, MetadataCatalog + +from .builtin_meta import _get_builtin_metadata +from .cityscapes import load_cityscapes_instances, load_cityscapes_semantic +from .lvis import get_lvis_instances_meta, register_lvis_instances +from .pascal_voc import register_pascal_voc +from .register_coco import register_coco_instances, register_coco_panoptic_separated + +# ==== Predefined datasets and splits for COCO ========== + +_PREDEFINED_SPLITS_COCO = {} +_PREDEFINED_SPLITS_COCO["coco"] = { + "coco_2014_train": ("coco/train2014", "coco/annotations/instances_train2014.json"), + "coco_2014_val": ("coco/val2014", "coco/annotations/instances_val2014.json"), + "coco_2014_minival": ("coco/val2014", "coco/annotations/instances_minival2014.json"), + "coco_2014_minival_100": ("coco/val2014", "coco/annotations/instances_minival2014_100.json"), + "coco_2014_valminusminival": ( + "coco/val2014", + "coco/annotations/instances_valminusminival2014.json", + ), + "coco_2017_train": ("coco/train2017", "coco/annotations/instances_train2017.json"), + "coco_2017_val": ("coco/val2017", "coco/annotations/instances_val2017.json"), + "coco_2017_test": ("coco/test2017", "coco/annotations/image_info_test2017.json"), + "coco_2017_test-dev": ("coco/test2017", "coco/annotations/image_info_test-dev2017.json"), + "coco_2017_val_100": ("coco/val2017", "coco/annotations/instances_val2017_100.json"), +} + +_PREDEFINED_SPLITS_COCO["coco_person"] = { + "keypoints_coco_2014_train": ( + "coco/train2014", + "coco/annotations/person_keypoints_train2014.json", + ), + "keypoints_coco_2014_val": ("coco/val2014", "coco/annotations/person_keypoints_val2014.json"), + "keypoints_coco_2014_minival": ( + "coco/val2014", + "coco/annotations/person_keypoints_minival2014.json", + ), + "keypoints_coco_2014_valminusminival": ( + "coco/val2014", + "coco/annotations/person_keypoints_valminusminival2014.json", + ), + "keypoints_coco_2014_minival_100": ( + "coco/val2014", + "coco/annotations/person_keypoints_minival2014_100.json", + ), + "keypoints_coco_2017_train": ( + "coco/train2017", + "coco/annotations/person_keypoints_train2017.json", + ), + "keypoints_coco_2017_val": ("coco/val2017", "coco/annotations/person_keypoints_val2017.json"), + "keypoints_coco_2017_val_100": ( + "coco/val2017", + "coco/annotations/person_keypoints_val2017_100.json", + ), +} + + +_PREDEFINED_SPLITS_COCO_PANOPTIC = { + "coco_2017_train_panoptic": ( + # This is the original panoptic annotation directory + "coco/panoptic_train2017", + "coco/annotations/panoptic_train2017.json", + # This directory contains semantic annotations that are + # converted from panoptic annotations. + # It is used by PanopticFPN. + # You can use the script at detectron2/datasets/prepare_panoptic_fpn.py + # to create these directories. + "coco/panoptic_stuff_train2017", + ), + "coco_2017_val_panoptic": ( + "coco/panoptic_val2017", + "coco/annotations/panoptic_val2017.json", + "coco/panoptic_stuff_val2017", + ), + "coco_2017_val_100_panoptic": ( + "coco/panoptic_val2017_100", + "coco/annotations/panoptic_val2017_100.json", + "coco/panoptic_stuff_val2017_100", + ), +} + + +def register_all_coco(root): + for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_COCO.items(): + for key, (image_root, json_file) in splits_per_dataset.items(): + # Assume pre-defined datasets live in `./datasets`. + register_coco_instances( + key, + _get_builtin_metadata(dataset_name), + os.path.join(root, json_file) if "://" not in json_file else json_file, + os.path.join(root, image_root), + ) + + for ( + prefix, + (panoptic_root, panoptic_json, semantic_root), + ) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items(): + prefix_instances = prefix[: -len("_panoptic")] + instances_meta = MetadataCatalog.get(prefix_instances) + image_root, instances_json = instances_meta.image_root, instances_meta.json_file + register_coco_panoptic_separated( + prefix, + _get_builtin_metadata("coco_panoptic_separated"), + image_root, + os.path.join(root, panoptic_root), + os.path.join(root, panoptic_json), + os.path.join(root, semantic_root), + instances_json, + ) + + +# ==== Predefined datasets and splits for LVIS ========== + + +_PREDEFINED_SPLITS_LVIS = { + "lvis_v1": { + "lvis_v1_train": ("coco/", "lvis/lvis_v1_train.json"), + "lvis_v1_val": ("coco/", "lvis/lvis_v1_val.json"), + "lvis_v1_test_dev": ("coco/", "lvis/lvis_v1_image_info_test_dev.json"), + "lvis_v1_test_challenge": ("coco/", "lvis/lvis_v1_image_info_test_challenge.json"), + }, + "lvis_v0.5": { + "lvis_v0.5_train": ("coco/", "lvis/lvis_v0.5_train.json"), + "lvis_v0.5_val": ("coco/", "lvis/lvis_v0.5_val.json"), + "lvis_v0.5_val_rand_100": ("coco/", "lvis/lvis_v0.5_val_rand_100.json"), + "lvis_v0.5_test": ("coco/", "lvis/lvis_v0.5_image_info_test.json"), + }, + "lvis_v0.5_cocofied": { + "lvis_v0.5_train_cocofied": ("coco/", "lvis/lvis_v0.5_train_cocofied.json"), + "lvis_v0.5_val_cocofied": ("coco/", "lvis/lvis_v0.5_val_cocofied.json"), + }, +} + + +def register_all_lvis(root): + for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_LVIS.items(): + for key, (image_root, json_file) in splits_per_dataset.items(): + # Assume pre-defined datasets live in `./datasets`. + register_lvis_instances( + key, + get_lvis_instances_meta(dataset_name), + os.path.join(root, json_file) if "://" not in json_file else json_file, + os.path.join(root, image_root), + ) + + +# ==== Predefined splits for raw cityscapes images =========== + + +_RAW_CITYSCAPES_SPLITS = { + "cityscapes_fine_{task}_train": ("cityscapes/leftImg8bit/train", "cityscapes/gtFine/train"), + "cityscapes_fine_{task}_val": ("cityscapes/leftImg8bit/val", "cityscapes/gtFine/val"), + "cityscapes_fine_{task}_test": ("cityscapes/leftImg8bit/test", "cityscapes/gtFine/test"), +} + + +def register_all_cityscapes(root): + for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items(): + meta = _get_builtin_metadata("cityscapes") + image_dir = os.path.join(root, image_dir) + gt_dir = os.path.join(root, gt_dir) + + inst_key = key.format(task="instance_seg") + DatasetCatalog.register( + inst_key, + lambda x=image_dir, y=gt_dir: load_cityscapes_instances( + x, y, from_json=True, to_polygons=True + ), + ) + MetadataCatalog.get(inst_key).set( + image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_instance", **meta + ) + + sem_key = key.format(task="sem_seg") + DatasetCatalog.register( + sem_key, lambda x=image_dir, y=gt_dir: load_cityscapes_semantic(x, y) + ) + MetadataCatalog.get(sem_key).set( + image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_sem_seg", **meta + ) + + +# ==== Predefined splits for PASCAL VOC =========== +def register_all_pascal_voc(root): + SPLITS = [ + ("voc_2007_trainval", "VOC2007", "trainval"), + ("voc_2007_train", "VOC2007", "train"), + ("voc_2007_val", "VOC2007", "val"), + ("voc_2007_test", "VOC2007", "test"), + ("voc_2012_trainval", "VOC2012", "trainval"), + ("voc_2012_train", "VOC2012", "train"), + ("voc_2012_val", "VOC2012", "val"), + ] + for name, dirname, split in SPLITS: + year = 2007 if "2007" in name else 2012 + register_pascal_voc(name, os.path.join(root, dirname), split, year) + MetadataCatalog.get(name).evaluator_type = "pascal_voc" + + +# Register them all under "./datasets" +_root = os.getenv("DETECTRON2_DATASETS", "/home/data/") +register_all_coco(_root) +register_all_lvis(_root) +register_all_cityscapes(_root) +register_all_pascal_voc(_root) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/builtin_meta.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/builtin_meta.py new file mode 100644 index 0000000000000000000000000000000000000000..b60123fe8a3b7d9780d5144be8cdb8e47d9c4ff6 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/builtin_meta.py @@ -0,0 +1,280 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# All coco categories, together with their nice-looking visualization colors +# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json +COCO_CATEGORIES = [ + {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"}, + {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"}, + {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"}, + {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"}, + {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"}, + {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"}, + {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"}, + {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"}, + {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"}, + {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"}, + {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"}, + {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"}, + {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"}, + {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"}, + {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"}, + {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"}, + {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"}, + {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"}, + {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"}, + {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"}, + {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"}, + {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"}, + {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"}, + {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"}, + {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"}, + {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"}, + {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"}, + {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"}, + {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"}, + {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"}, + {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"}, + {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"}, + {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"}, + {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"}, + {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"}, + {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"}, + {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"}, + {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"}, + {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"}, + {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"}, + {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"}, + {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"}, + {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"}, + {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"}, + {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"}, + {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"}, + {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"}, + {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"}, + {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"}, + {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"}, + {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"}, + {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"}, + {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"}, + {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"}, + {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"}, + {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"}, + {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"}, + {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"}, + {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"}, + {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"}, + {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"}, + {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"}, + {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"}, + {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"}, + {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"}, + {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"}, + {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"}, + {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"}, + {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"}, + {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"}, + {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"}, + {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"}, + {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"}, + {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"}, + {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"}, + {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"}, + {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"}, + {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"}, + {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"}, + {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"}, + {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"}, + {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"}, + {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"}, + {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"}, + {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"}, + {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"}, + {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"}, + {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"}, + {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"}, + {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"}, + {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"}, + {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"}, + {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"}, + {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"}, + {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"}, + {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"}, + {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"}, + {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"}, + {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"}, + {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"}, + {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"}, + {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"}, + {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"}, + {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"}, + {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"}, + {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"}, + {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"}, + {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"}, + {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"}, + {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"}, + {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"}, + {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"}, + {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"}, + {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"}, + {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"}, + {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"}, + {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"}, + {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"}, + {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"}, + {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"}, + {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"}, + {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"}, + {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"}, + {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"}, + {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"}, + {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"}, + {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"}, + {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"}, + {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"}, + {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"}, + {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"}, + {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"}, + {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"}, +] + +# fmt: off +COCO_PERSON_KEYPOINT_NAMES = ( + "nose", + "left_eye", "right_eye", + "left_ear", "right_ear", + "left_shoulder", "right_shoulder", + "left_elbow", "right_elbow", + "left_wrist", "right_wrist", + "left_hip", "right_hip", + "left_knee", "right_knee", + "left_ankle", "right_ankle", +) +# fmt: on + +# Pairs of keypoints that should be exchanged under horizontal flipping +COCO_PERSON_KEYPOINT_FLIP_MAP = ( + ("left_eye", "right_eye"), + ("left_ear", "right_ear"), + ("left_shoulder", "right_shoulder"), + ("left_elbow", "right_elbow"), + ("left_wrist", "right_wrist"), + ("left_hip", "right_hip"), + ("left_knee", "right_knee"), + ("left_ankle", "right_ankle"), +) + +# rules for pairs of keypoints to draw a line between, and the line color to use. +KEYPOINT_CONNECTION_RULES = [ + # face + ("left_ear", "left_eye", (102, 204, 255)), + ("right_ear", "right_eye", (51, 153, 255)), + ("left_eye", "nose", (102, 0, 204)), + ("nose", "right_eye", (51, 102, 255)), + # upper-body + ("left_shoulder", "right_shoulder", (255, 128, 0)), + ("left_shoulder", "left_elbow", (153, 255, 204)), + ("right_shoulder", "right_elbow", (128, 229, 255)), + ("left_elbow", "left_wrist", (153, 255, 153)), + ("right_elbow", "right_wrist", (102, 255, 224)), + # lower-body + ("left_hip", "right_hip", (255, 102, 0)), + ("left_hip", "left_knee", (255, 255, 77)), + ("right_hip", "right_knee", (153, 255, 204)), + ("left_knee", "left_ankle", (191, 255, 128)), + ("right_knee", "right_ankle", (255, 195, 77)), +] + + +def _get_coco_instances_meta(): + thing_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 1] + thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1] + assert len(thing_ids) == 80, len(thing_ids) + # Mapping from the incontiguous COCO category id to an id in [0, 79] + thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)} + thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1] + ret = { + "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id, + "thing_classes": thing_classes, + "thing_colors": thing_colors, + } + return ret + + +def _get_coco_panoptic_separated_meta(): + """ + Returns metadata for "separated" version of the panoptic segmentation dataset. + """ + stuff_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 0] + assert len(stuff_ids) == 53, len(stuff_ids) + + # For semantic segmentation, this mapping maps from contiguous stuff id + # (in [0, 53], used in models) to ids in the dataset (used for processing results) + # The id 0 is mapped to an extra category "thing". + stuff_dataset_id_to_contiguous_id = {k: i + 1 for i, k in enumerate(stuff_ids)} + # When converting COCO panoptic annotations to semantic annotations + # We label the "thing" category to 0 + stuff_dataset_id_to_contiguous_id[0] = 0 + + # 54 names for COCO stuff categories (including "things") + stuff_classes = ["things"] + [ + k["name"].replace("-other", "").replace("-merged", "") + for k in COCO_CATEGORIES + if k["isthing"] == 0 + ] + + # NOTE: I randomly picked a color for things + stuff_colors = [[82, 18, 128]] + [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 0] + ret = { + "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id, + "stuff_classes": stuff_classes, + "stuff_colors": stuff_colors, + } + ret.update(_get_coco_instances_meta()) + return ret + + +def _get_builtin_metadata(dataset_name): + if dataset_name == "coco": + return _get_coco_instances_meta() + if dataset_name == "coco_panoptic_separated": + return _get_coco_panoptic_separated_meta() + elif dataset_name == "coco_person": + return { + "thing_classes": ["person"], + "keypoint_names": COCO_PERSON_KEYPOINT_NAMES, + "keypoint_flip_map": COCO_PERSON_KEYPOINT_FLIP_MAP, + "keypoint_connection_rules": KEYPOINT_CONNECTION_RULES, + } + elif dataset_name == "cityscapes": + # fmt: off + CITYSCAPES_THING_CLASSES = [ + "person", "rider", "car", "truck", + "bus", "train", "motorcycle", "bicycle", + ] + CITYSCAPES_STUFF_CLASSES = [ + "road", "sidewalk", "building", "wall", "fence", "pole", "traffic light", + "traffic sign", "vegetation", "terrain", "sky", "person", "rider", "car", + "truck", "bus", "train", "motorcycle", "bicycle", "license plate", + ] + # fmt: on + return { + "thing_classes": CITYSCAPES_THING_CLASSES, + "stuff_classes": CITYSCAPES_STUFF_CLASSES, + } + raise KeyError("No built-in metadata for dataset {}".format(dataset_name)) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/cityscapes.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..917ce6a1d6e7689002bc0114f12c62f0e3ba88f5 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/cityscapes.py @@ -0,0 +1,342 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import functools +import json +import logging +import multiprocessing as mp +import numpy as np +import os +from itertools import chain +import pycocotools.mask as mask_util +from fvcore.common.file_io import PathManager +from PIL import Image + +from detectron2.structures import BoxMode +from detectron2.utils.comm import get_world_size +from detectron2.utils.logger import setup_logger + +try: + import cv2 # noqa +except ImportError: + # OpenCV is an optional dependency at the moment + pass + + +logger = logging.getLogger(__name__) + + +def get_cityscapes_files(image_dir, gt_dir): + files = [] + # scan through the directory + cities = PathManager.ls(image_dir) + logger.info(f"{len(cities)} cities found in '{image_dir}'.") + for city in cities: + city_img_dir = os.path.join(image_dir, city) + city_gt_dir = os.path.join(gt_dir, city) + for basename in PathManager.ls(city_img_dir): + image_file = os.path.join(city_img_dir, basename) + + suffix = "leftImg8bit.png" + assert basename.endswith(suffix), basename + basename = basename[: -len(suffix)] + + instance_file = os.path.join(city_gt_dir, basename + "gtFine_instanceIds.png") + label_file = os.path.join(city_gt_dir, basename + "gtFine_labelIds.png") + json_file = os.path.join(city_gt_dir, basename + "gtFine_polygons.json") + + files.append((image_file, instance_file, label_file, json_file)) + assert len(files), "No images found in {}".format(image_dir) + for f in files[0]: + assert PathManager.isfile(f), f + return files + + +def load_cityscapes_instances(image_dir, gt_dir, from_json=True, to_polygons=True): + """ + Args: + image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". + gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". + from_json (bool): whether to read annotations from the raw json file or the png files. + to_polygons (bool): whether to represent the segmentation as polygons + (COCO's format) instead of masks (cityscapes's format). + + Returns: + list[dict]: a list of dicts in Detectron2 standard format. (See + `Using Custom Datasets `_ ) + """ + if from_json: + assert to_polygons, ( + "Cityscapes's json annotations are in polygon format. " + "Converting to mask format is not supported now." + ) + files = get_cityscapes_files(image_dir, gt_dir) + + logger.info("Preprocessing cityscapes annotations ...") + # This is still not fast: all workers will execute duplicate works and will + # take up to 10m on a 8GPU server. + pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4)) + + ret = pool.map( + functools.partial(cityscapes_files_to_dict, from_json=from_json, to_polygons=to_polygons), + files, + ) + logger.info("Loaded {} images from {}".format(len(ret), image_dir)) + + # Map cityscape ids to contiguous ids + from cityscapesscripts.helpers.labels import labels + + labels = [l for l in labels if l.hasInstances and not l.ignoreInEval] + dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(labels)} + for dict_per_image in ret: + for anno in dict_per_image["annotations"]: + anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]] + return ret + + +def load_cityscapes_semantic(image_dir, gt_dir): + """ + Args: + image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". + gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". + + Returns: + list[dict]: a list of dict, each has "file_name" and + "sem_seg_file_name". + """ + ret = [] + # gt_dir is small and contain many small files. make sense to fetch to local first + gt_dir = PathManager.get_local_path(gt_dir) + for image_file, _, label_file, json_file in get_cityscapes_files(image_dir, gt_dir): + label_file = label_file.replace("labelIds", "labelTrainIds") + + with PathManager.open(json_file, "r") as f: + jsonobj = json.load(f) + ret.append( + { + "file_name": image_file, + "sem_seg_file_name": label_file, + "height": jsonobj["imgHeight"], + "width": jsonobj["imgWidth"], + } + ) + assert len(ret), f"No images found in {image_dir}!" + assert PathManager.isfile( + ret[0]["sem_seg_file_name"] + ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa + return ret + + +def cityscapes_files_to_dict(files, from_json, to_polygons): + """ + Parse cityscapes annotation files to a instance segmentation dataset dict. + + Args: + files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file) + from_json (bool): whether to read annotations from the raw json file or the png files. + to_polygons (bool): whether to represent the segmentation as polygons + (COCO's format) instead of masks (cityscapes's format). + + Returns: + A dict in Detectron2 Dataset format. + """ + from cityscapesscripts.helpers.labels import id2label, name2label + + image_file, instance_id_file, _, json_file = files + + annos = [] + + if from_json: + from shapely.geometry import MultiPolygon, Polygon + + with PathManager.open(json_file, "r") as f: + jsonobj = json.load(f) + ret = { + "file_name": image_file, + "image_id": os.path.basename(image_file), + "height": jsonobj["imgHeight"], + "width": jsonobj["imgWidth"], + } + + # `polygons_union` contains the union of all valid polygons. + polygons_union = Polygon() + + # CityscapesScripts draw the polygons in sequential order + # and each polygon *overwrites* existing ones. See + # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa + # We use reverse order, and each polygon *avoids* early ones. + # This will resolve the ploygon overlaps in the same way as CityscapesScripts. + for obj in jsonobj["objects"][::-1]: + if "deleted" in obj: # cityscapes data format specific + continue + label_name = obj["label"] + + try: + label = name2label[label_name] + except KeyError: + if label_name.endswith("group"): # crowd area + label = name2label[label_name[: -len("group")]] + else: + raise + if label.id < 0: # cityscapes data format + continue + + # Cityscapes's raw annotations uses integer coordinates + # Therefore +0.5 here + poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5 + # CityscapesScript uses PIL.ImageDraw.polygon to rasterize + # polygons for evaluation. This function operates in integer space + # and draws each pixel whose center falls into the polygon. + # Therefore it draws a polygon which is 0.5 "fatter" in expectation. + # We therefore dilate the input polygon by 0.5 as our input. + poly = Polygon(poly_coord).buffer(0.5, resolution=4) + + if not label.hasInstances or label.ignoreInEval: + # even if we won't store the polygon it still contributes to overlaps resolution + polygons_union = polygons_union.union(poly) + continue + + # Take non-overlapping part of the polygon + poly_wo_overlaps = poly.difference(polygons_union) + if poly_wo_overlaps.is_empty: + continue + polygons_union = polygons_union.union(poly) + + anno = {} + anno["iscrowd"] = label_name.endswith("group") + anno["category_id"] = label.id + + if isinstance(poly_wo_overlaps, Polygon): + poly_list = [poly_wo_overlaps] + elif isinstance(poly_wo_overlaps, MultiPolygon): + poly_list = poly_wo_overlaps.geoms + else: + raise NotImplementedError("Unknown geometric structure {}".format(poly_wo_overlaps)) + + poly_coord = [] + for poly_el in poly_list: + # COCO API can work only with exterior boundaries now, hence we store only them. + # TODO: store both exterior and interior boundaries once other parts of the + # codebase support holes in polygons. + poly_coord.append(list(chain(*poly_el.exterior.coords))) + anno["segmentation"] = poly_coord + (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds + + anno["bbox"] = (xmin, ymin, xmax, ymax) + anno["bbox_mode"] = BoxMode.XYXY_ABS + + annos.append(anno) + else: + # See also the official annotation parsing scripts at + # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py # noqa + with PathManager.open(instance_id_file, "rb") as f: + inst_image = np.asarray(Image.open(f), order="F") + # ids < 24 are stuff labels (filtering them first is about 5% faster) + flattened_ids = np.unique(inst_image[inst_image >= 24]) + + ret = { + "file_name": image_file, + "image_id": os.path.basename(image_file), + "height": inst_image.shape[0], + "width": inst_image.shape[1], + } + + for instance_id in flattened_ids: + # For non-crowd annotations, instance_id // 1000 is the label_id + # Crowd annotations have <1000 instance ids + label_id = instance_id // 1000 if instance_id >= 1000 else instance_id + label = id2label[label_id] + if not label.hasInstances or label.ignoreInEval: + continue + + anno = {} + anno["iscrowd"] = instance_id < 1000 + anno["category_id"] = label.id + + mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F") + + inds = np.nonzero(mask) + ymin, ymax = inds[0].min(), inds[0].max() + xmin, xmax = inds[1].min(), inds[1].max() + anno["bbox"] = (xmin, ymin, xmax, ymax) + if xmax <= xmin or ymax <= ymin: + continue + anno["bbox_mode"] = BoxMode.XYXY_ABS + if to_polygons: + # This conversion comes from D4809743 and D5171122, + # when Mask-RCNN was first developed. + contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[ + -2 + ] + polygons = [c.reshape(-1).tolist() for c in contours if len(c) >= 3] + # opencv's can produce invalid polygons + if len(polygons) == 0: + continue + anno["segmentation"] = polygons + else: + anno["segmentation"] = mask_util.encode(mask[:, :, None])[0] + annos.append(anno) + ret["annotations"] = annos + return ret + + +if __name__ == "__main__": + """ + Test the cityscapes dataset loader. + + Usage: + python -m detectron2.data.datasets.cityscapes \ + cityscapes/leftImg8bit/train cityscapes/gtFine/train + """ + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("image_dir") + parser.add_argument("gt_dir") + parser.add_argument("--type", choices=["instance", "semantic"], default="instance") + args = parser.parse_args() + from detectron2.data.catalog import Metadata + from detectron2.utils.visualizer import Visualizer + from cityscapesscripts.helpers.labels import labels + + logger = setup_logger(name=__name__) + + dirname = "cityscapes-data-vis" + os.makedirs(dirname, exist_ok=True) + + if args.type == "instance": + dicts = load_cityscapes_instances( + args.image_dir, args.gt_dir, from_json=True, to_polygons=True + ) + logger.info("Done loading {} samples.".format(len(dicts))) + + thing_classes = [k.name for k in labels if k.hasInstances and not k.ignoreInEval] + meta = Metadata().set(thing_classes=thing_classes) + + else: + dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir) + logger.info("Done loading {} samples.".format(len(dicts))) + + stuff_names = [k.name for k in labels if k.trainId != 255] + stuff_colors = [k.color for k in labels if k.trainId != 255] + meta = Metadata().set(stuff_names=stuff_names, stuff_colors=stuff_colors) + + for d in dicts: + img = np.array(Image.open(PathManager.open(d["file_name"], "rb"))) + visualizer = Visualizer(img, metadata=meta) + vis = visualizer.draw_dataset_dict(d) + # cv2.imshow("a", vis.get_image()[:, :, ::-1]) + # cv2.waitKey() + fpath = os.path.join(dirname, os.path.basename(d["file_name"])) + vis.save(fpath) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/coco.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..1ddea06182c43c9b819e1732351ec26366bb5f3c --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/coco.py @@ -0,0 +1,481 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import contextlib +import datetime +import io +import json +import logging +import numpy as np +import os +import pycocotools.mask as mask_util +from fvcore.common.file_io import PathManager, file_lock +from fvcore.common.timer import Timer +from PIL import Image + +from detectron2.structures import Boxes, BoxMode, PolygonMasks + +from .. import DatasetCatalog, MetadataCatalog + +""" +This file contains functions to parse COCO-format annotations into dicts in "Detectron2 format". +""" + + +logger = logging.getLogger(__name__) + +__all__ = ["load_coco_json", "load_sem_seg", "convert_to_coco_json"] + + +def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): + """ + Load a json file with COCO's instances annotation format. + Currently supports instance detection, instance segmentation, + and person keypoints annotations. + + Args: + json_file (str): full path to the json file in COCO instances annotation format. + image_root (str or path-like): the directory where the images in this json file exists. + dataset_name (str): the name of the dataset (e.g., coco_2017_train). + If provided, this function will also put "thing_classes" into + the metadata associated with this dataset. + extra_annotation_keys (list[str]): list of per-annotation keys that should also be + loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", + "category_id", "segmentation"). The values for these keys will be returned as-is. + For example, the densepose annotations are loaded in this way. + + Returns: + list[dict]: a list of dicts in Detectron2 standard dataset dicts format. (See + `Using Custom Datasets `_ ) + + Notes: + 1. This function does not read the image files. + The results do not have the "image" field. + """ + from pycocotools.coco import COCO + + timer = Timer() + json_file = PathManager.get_local_path(json_file) + with contextlib.redirect_stdout(io.StringIO()): + coco_api = COCO(json_file) + if timer.seconds() > 1: + logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) + + id_map = None + if dataset_name is not None: + meta = MetadataCatalog.get(dataset_name) + cat_ids = sorted(coco_api.getCatIds()) + cats = coco_api.loadCats(cat_ids) + # The categories in a custom json file may not be sorted. + thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])] + meta.thing_classes = thing_classes + + # In COCO, certain category ids are artificially removed, + # and by convention they are always ignored. + # We deal with COCO's id issue and translate + # the category ids to contiguous ids in [0, 80). + + # It works by looking at the "categories" field in the json, therefore + # if users' own json also have incontiguous ids, we'll + # apply this mapping as well but print a warning. + if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): + if "coco" not in dataset_name: + logger.warning( + """ +Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. +""" + ) + id_map = {v: i for i, v in enumerate(cat_ids)} + meta.thing_dataset_id_to_contiguous_id = id_map + + # sort indices for reproducible results + img_ids = sorted(coco_api.imgs.keys()) + # imgs is a list of dicts, each looks something like: + # {'license': 4, + # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', + # 'file_name': 'COCO_val2014_000000001268.jpg', + # 'height': 427, + # 'width': 640, + # 'date_captured': '2013-11-17 05:57:24', + # 'id': 1268} + imgs = coco_api.loadImgs(img_ids) + # anns is a list[list[dict]], where each dict is an annotation + # record for an object. The inner list enumerates the objects in an image + # and the outer list enumerates over images. Example of anns[0]: + # [{'segmentation': [[192.81, + # 247.09, + # ... + # 219.03, + # 249.06]], + # 'area': 1035.749, + # 'iscrowd': 0, + # 'image_id': 1268, + # 'bbox': [192.81, 224.8, 74.73, 33.43], + # 'category_id': 16, + # 'id': 42986}, + # ...] + anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] + + if "minival" not in json_file: + # The popular valminusminival & minival annotations for COCO2014 contain this bug. + # However the ratio of buggy annotations there is tiny and does not affect accuracy. + # Therefore we explicitly white-list them. + ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] + assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format( + json_file + ) + + imgs_anns = list(zip(imgs, anns)) + + logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file)) + + dataset_dicts = [] + + ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or []) + + num_instances_without_valid_segmentation = 0 + + for (img_dict, anno_dict_list) in imgs_anns: + record = {} + record["file_name"] = os.path.join(image_root, img_dict["file_name"]) + record["height"] = img_dict["height"] + record["width"] = img_dict["width"] + image_id = record["image_id"] = img_dict["id"] + + objs = [] + for anno in anno_dict_list: + # Check that the image_id in this annotation is the same as + # the image_id we're looking at. + # This fails only when the data parsing logic or the annotation file is buggy. + + # The original COCO valminusminival2014 & minival2014 annotation files + # actually contains bugs that, together with certain ways of using COCO API, + # can trigger this assertion. + assert anno["image_id"] == image_id + + assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.' + + obj = {key: anno[key] for key in ann_keys if key in anno} + + segm = anno.get("segmentation", None) + if segm: # either list[list[float]] or dict(RLE) + if isinstance(segm, dict): + if isinstance(segm["counts"], list): + # convert to compressed RLE + segm = mask_util.frPyObjects(segm, *segm["size"]) + else: + # filter out invalid polygons (< 3 points) + segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] + if len(segm) == 0: + num_instances_without_valid_segmentation += 1 + continue # ignore this instance + obj["segmentation"] = segm + + keypts = anno.get("keypoints", None) + if keypts: # list[int] + for idx, v in enumerate(keypts): + if idx % 3 != 2: + # COCO's segmentation coordinates are floating points in [0, H or W], + # but keypoint coordinates are integers in [0, H-1 or W-1] + # Therefore we assume the coordinates are "pixel indices" and + # add 0.5 to convert to floating point coordinates. + keypts[idx] = v + 0.5 + obj["keypoints"] = keypts + + obj["bbox_mode"] = BoxMode.XYWH_ABS + if id_map: + obj["category_id"] = id_map[obj["category_id"]] + objs.append(obj) + record["annotations"] = objs + dataset_dicts.append(record) + + if num_instances_without_valid_segmentation > 0: + logger.warning( + "Filtered out {} instances without valid segmentation. ".format( + num_instances_without_valid_segmentation + ) + + "There might be issues in your dataset generation process. " + "A valid polygon should be a list[float] with even length >= 6." + ) + return dataset_dicts + + +def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"): + """ + Load semantic segmentation datasets. All files under "gt_root" with "gt_ext" extension are + treated as ground truth annotations and all files under "image_root" with "image_ext" extension + as input images. Ground truth and input images are matched using file paths relative to + "gt_root" and "image_root" respectively without taking into account file extensions. + This works for COCO as well as some other datasets. + + Args: + gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation + annotations are stored as images with integer values in pixels that represent + corresponding semantic labels. + image_root (str): the directory where the input images are. + gt_ext (str): file extension for ground truth annotations. + image_ext (str): file extension for input images. + + Returns: + list[dict]: + a list of dicts in detectron2 standard format without instance-level + annotation. + + Notes: + 1. This function does not read the image and ground truth files. + The results do not have the "image" and "sem_seg" fields. + """ + + # We match input images with ground truth based on their relative filepaths (without file + # extensions) starting from 'image_root' and 'gt_root' respectively. + def file2id(folder_path, file_path): + # extract relative path starting from `folder_path` + image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path)) + # remove file extension + image_id = os.path.splitext(image_id)[0] + return image_id + + input_files = sorted( + (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)), + key=lambda file_path: file2id(image_root, file_path), + ) + gt_files = sorted( + (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)), + key=lambda file_path: file2id(gt_root, file_path), + ) + + assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root) + + # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images + if len(input_files) != len(gt_files): + logger.warn( + "Directory {} and {} has {} and {} files, respectively.".format( + image_root, gt_root, len(input_files), len(gt_files) + ) + ) + input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files] + gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files] + intersect = list(set(input_basenames) & set(gt_basenames)) + # sort, otherwise each worker may obtain a list[dict] in different order + intersect = sorted(intersect) + logger.warn("Will use their intersection of {} files.".format(len(intersect))) + input_files = [os.path.join(image_root, f + image_ext) for f in intersect] + gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect] + + logger.info( + "Loaded {} images with semantic segmentation from {}".format(len(input_files), image_root) + ) + + dataset_dicts = [] + for (img_path, gt_path) in zip(input_files, gt_files): + record = {} + record["file_name"] = img_path + record["sem_seg_file_name"] = gt_path + dataset_dicts.append(record) + + return dataset_dicts + + +def convert_to_coco_dict(dataset_name): + """ + Convert an instance detection/segmentation or keypoint detection dataset + in detectron2's standard format into COCO json format. + + Generic dataset description can be found here: + https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset + + COCO data format description can be found here: + http://cocodataset.org/#format-data + + Args: + dataset_name (str): + name of the source dataset + Must be registered in DatastCatalog and in detectron2's standard format. + Must have corresponding metadata "thing_classes" + Returns: + coco_dict: serializable dict in COCO json format + """ + + dataset_dicts = DatasetCatalog.get(dataset_name) + metadata = MetadataCatalog.get(dataset_name) + + # unmap the category mapping ids for COCO + if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): + reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()} + reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id] # noqa + else: + reverse_id_mapper = lambda contiguous_id: contiguous_id # noqa + + categories = [ + {"id": reverse_id_mapper(id), "name": name} + for id, name in enumerate(metadata.thing_classes) + ] + + logger.info("Converting dataset dicts into COCO format") + coco_images = [] + coco_annotations = [] + + for image_id, image_dict in enumerate(dataset_dicts): + coco_image = { + "id": image_dict.get("image_id", image_id), + "width": image_dict["width"], + "height": image_dict["height"], + "file_name": image_dict["file_name"], + } + coco_images.append(coco_image) + + anns_per_image = image_dict.get("annotations", []) + for annotation in anns_per_image: + # create a new dict with only COCO fields + coco_annotation = {} + + # COCO requirement: XYWH box format + bbox = annotation["bbox"] + bbox_mode = annotation["bbox_mode"] + bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS) + + # COCO requirement: instance area + if "segmentation" in annotation: + # Computing areas for instances by counting the pixels + segmentation = annotation["segmentation"] + # TODO: check segmentation type: RLE, BinaryMask or Polygon + if isinstance(segmentation, list): + polygons = PolygonMasks([segmentation]) + area = polygons.area()[0].item() + elif isinstance(segmentation, dict): # RLE + area = mask_util.area(segmentation).item() + else: + raise TypeError(f"Unknown segmentation type {type(segmentation)}!") + else: + # Computing areas using bounding boxes + bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) + area = Boxes([bbox_xy]).area()[0].item() + + if "keypoints" in annotation: + keypoints = annotation["keypoints"] # list[int] + for idx, v in enumerate(keypoints): + if idx % 3 != 2: + # COCO's segmentation coordinates are floating points in [0, H or W], + # but keypoint coordinates are integers in [0, H-1 or W-1] + # For COCO format consistency we substract 0.5 + # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163 + keypoints[idx] = v - 0.5 + if "num_keypoints" in annotation: + num_keypoints = annotation["num_keypoints"] + else: + num_keypoints = sum(kp > 0 for kp in keypoints[2::3]) + + # COCO requirement: + # linking annotations to images + # "id" field must start with 1 + coco_annotation["id"] = len(coco_annotations) + 1 + coco_annotation["image_id"] = coco_image["id"] + coco_annotation["bbox"] = [round(float(x), 3) for x in bbox] + coco_annotation["area"] = float(area) + coco_annotation["iscrowd"] = annotation.get("iscrowd", 0) + coco_annotation["category_id"] = reverse_id_mapper(annotation["category_id"]) + + # Add optional fields + if "keypoints" in annotation: + coco_annotation["keypoints"] = keypoints + coco_annotation["num_keypoints"] = num_keypoints + + if "segmentation" in annotation: + seg = coco_annotation["segmentation"] = annotation["segmentation"] + if isinstance(seg, dict): # RLE + counts = seg["counts"] + if not isinstance(counts, str): + # make it json-serializable + seg["counts"] = counts.decode("ascii") + + coco_annotations.append(coco_annotation) + + logger.info( + "Conversion finished, " + f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}" + ) + + info = { + "date_created": str(datetime.datetime.now()), + "description": "Automatically generated COCO json file for Detectron2.", + } + coco_dict = {"info": info, "images": coco_images, "categories": categories, "licenses": None} + if len(coco_annotations) > 0: + coco_dict["annotations"] = coco_annotations + return coco_dict + + +def convert_to_coco_json(dataset_name, output_file, allow_cached=True): + """ + Converts dataset into COCO format and saves it to a json file. + dataset_name must be registered in DatasetCatalog and in detectron2's standard format. + + Args: + dataset_name: + reference from the config file to the catalogs + must be registered in DatasetCatalog and in detectron2's standard format + output_file: path of json file that will be saved to + allow_cached: if json file is already present then skip conversion + """ + + # TODO: The dataset or the conversion script *may* change, + # a checksum would be useful for validating the cached data + + PathManager.mkdirs(os.path.dirname(output_file)) + with file_lock(output_file): + if PathManager.exists(output_file) and allow_cached: + logger.warning( + f"Using previously cached COCO format annotations at '{output_file}'. " + "You need to clear the cache file if your dataset has been modified." + ) + else: + logger.info(f"Converting annotations of dataset '{dataset_name}' to COCO format ...)") + coco_dict = convert_to_coco_dict(dataset_name) + + logger.info(f"Caching COCO format annotations at '{output_file}' ...") + with PathManager.open(output_file, "w") as f: + json.dump(coco_dict, f) + + +if __name__ == "__main__": + """ + Test the COCO json dataset loader. + + Usage: + python -m detectron2.data.datasets.coco \ + path/to/json path/to/image_root dataset_name + + "dataset_name" can be "coco_2014_minival_100", or other + pre-registered ones + """ + from detectron2.utils.logger import setup_logger + from detectron2.utils.visualizer import Visualizer + import detectron2.data.datasets # noqa # add pre-defined metadata + import sys + + logger = setup_logger(name=__name__) + assert sys.argv[3] in DatasetCatalog.list() + meta = MetadataCatalog.get(sys.argv[3]) + + dicts = load_coco_json(sys.argv[1], sys.argv[2], sys.argv[3]) + logger.info("Done loading {} samples.".format(len(dicts))) + + dirname = "coco-data-vis" + os.makedirs(dirname, exist_ok=True) + for d in dicts: + img = np.array(Image.open(d["file_name"])) + visualizer = Visualizer(img, metadata=meta) + vis = visualizer.draw_dataset_dict(d) + fpath = os.path.join(dirname, os.path.basename(d["file_name"])) + vis.save(fpath) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/lvis.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/lvis.py new file mode 100644 index 0000000000000000000000000000000000000000..bcc6e50d0b99e10d3486eeb0df423b8bc99a070a --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/lvis.py @@ -0,0 +1,236 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import os +from fvcore.common.file_io import PathManager +from fvcore.common.timer import Timer + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import BoxMode + +from .builtin_meta import _get_coco_instances_meta +from .lvis_v0_5_categories import LVIS_CATEGORIES as LVIS_V0_5_CATEGORIES +from .lvis_v1_categories import LVIS_CATEGORIES as LVIS_V1_CATEGORIES + +""" +This file contains functions to parse LVIS-format annotations into dicts in the +"Detectron2 format". +""" + +logger = logging.getLogger(__name__) + +__all__ = ["load_lvis_json", "register_lvis_instances", "get_lvis_instances_meta"] + + +def register_lvis_instances(name, metadata, json_file, image_root): + """ + Register a dataset in LVIS's json annotation format for instance detection and segmentation. + + Args: + name (str): a name that identifies the dataset, e.g. "lvis_v0.5_train". + metadata (dict): extra metadata associated with this dataset. It can be an empty dict. + json_file (str): path to the json instance annotation file. + image_root (str or path-like): directory which contains all the images. + """ + DatasetCatalog.register(name, lambda: load_lvis_json(json_file, image_root, name)) + MetadataCatalog.get(name).set( + json_file=json_file, image_root=image_root, evaluator_type="lvis", **metadata + ) + + +def load_lvis_json(json_file, image_root, dataset_name=None): + """ + Load a json file in LVIS's annotation format. + + Args: + json_file (str): full path to the LVIS json annotation file. + image_root (str): the directory where the images in this json file exists. + dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train"). + If provided, this function will put "thing_classes" into the metadata + associated with this dataset. + + Returns: + list[dict]: a list of dicts in Detectron2 standard format. (See + `Using Custom Datasets `_ ) + + Notes: + 1. This function does not read the image files. + The results do not have the "image" field. + """ + from lvis import LVIS + + json_file = PathManager.get_local_path(json_file) + + timer = Timer() + lvis_api = LVIS(json_file) + if timer.seconds() > 1: + logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) + + if dataset_name is not None: + meta = get_lvis_instances_meta(dataset_name) + MetadataCatalog.get(dataset_name).set(**meta) + + # sort indices for reproducible results + img_ids = sorted(lvis_api.imgs.keys()) + # imgs is a list of dicts, each looks something like: + # {'license': 4, + # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', + # 'file_name': 'COCO_val2014_000000001268.jpg', + # 'height': 427, + # 'width': 640, + # 'date_captured': '2013-11-17 05:57:24', + # 'id': 1268} + imgs = lvis_api.load_imgs(img_ids) + # anns is a list[list[dict]], where each dict is an annotation + # record for an object. The inner list enumerates the objects in an image + # and the outer list enumerates over images. Example of anns[0]: + # [{'segmentation': [[192.81, + # 247.09, + # ... + # 219.03, + # 249.06]], + # 'area': 1035.749, + # 'image_id': 1268, + # 'bbox': [192.81, 224.8, 74.73, 33.43], + # 'category_id': 16, + # 'id': 42986}, + # ...] + anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids] + + # Sanity check that each annotation has a unique id + ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] + assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique".format( + json_file + ) + + imgs_anns = list(zip(imgs, anns)) + + logger.info("Loaded {} images in the LVIS format from {}".format(len(imgs_anns), json_file)) + + def get_file_name(img_root, img_dict): + # Determine the path including the split folder ("train2017", "val2017", "test2017") from + # the coco_url field. Example: + # 'coco_url': 'http://images.cocodataset.org/train2017/000000155379.jpg' + split_folder, file_name = img_dict["coco_url"].split("/")[-2:] + return os.path.join(img_root + split_folder, file_name) + + dataset_dicts = [] + + for (img_dict, anno_dict_list) in imgs_anns: + record = {} + record["file_name"] = get_file_name(image_root, img_dict) + record["height"] = img_dict["height"] + record["width"] = img_dict["width"] + record["not_exhaustive_category_ids"] = img_dict.get("not_exhaustive_category_ids", []) + record["neg_category_ids"] = img_dict.get("neg_category_ids", []) + image_id = record["image_id"] = img_dict["id"] + + objs = [] + for anno in anno_dict_list: + # Check that the image_id in this annotation is the same as + # the image_id we're looking at. + # This fails only when the data parsing logic or the annotation file is buggy. + assert anno["image_id"] == image_id + obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS} + obj["category_id"] = anno["category_id"] - 1 # Convert 1-indexed to 0-indexed + segm = anno["segmentation"] # list[list[float]] + # filter out invalid polygons (< 3 points) + valid_segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] + assert len(segm) == len( + valid_segm + ), "Annotation contains an invalid polygon with < 3 points" + assert len(segm) > 0 + obj["segmentation"] = segm + objs.append(obj) + record["annotations"] = objs + dataset_dicts.append(record) + + return dataset_dicts + + +def get_lvis_instances_meta(dataset_name): + """ + Load LVIS metadata. + + Args: + dataset_name (str): LVIS dataset name without the split name (e.g., "lvis_v0.5"). + + Returns: + dict: LVIS metadata with keys: thing_classes + """ + if "cocofied" in dataset_name: + return _get_coco_instances_meta() + if "v0.5" in dataset_name: + return _get_lvis_instances_meta_v0_5() + elif "v1" in dataset_name: + return _get_lvis_instances_meta_v1() + raise ValueError("No built-in metadata for dataset {}".format(dataset_name)) + + +def _get_lvis_instances_meta_v0_5(): + assert len(LVIS_V0_5_CATEGORIES) == 1230 + cat_ids = [k["id"] for k in LVIS_V0_5_CATEGORIES] + assert min(cat_ids) == 1 and max(cat_ids) == len( + cat_ids + ), "Category ids are not in [1, #categories], as expected" + # Ensure that the category list is sorted by id + lvis_categories = sorted(LVIS_V0_5_CATEGORIES, key=lambda x: x["id"]) + thing_classes = [k["synonyms"][0] for k in lvis_categories] + meta = {"thing_classes": thing_classes} + return meta + + +def _get_lvis_instances_meta_v1(): + assert len(LVIS_V1_CATEGORIES) == 1203 + cat_ids = [k["id"] for k in LVIS_V1_CATEGORIES] + assert min(cat_ids) == 1 and max(cat_ids) == len( + cat_ids + ), "Category ids are not in [1, #categories], as expected" + # Ensure that the category list is sorted by id + lvis_categories = sorted(LVIS_V1_CATEGORIES, key=lambda x: x["id"]) + thing_classes = [k["synonyms"][0] for k in lvis_categories] + meta = {"thing_classes": thing_classes} + return meta + + +if __name__ == "__main__": + """ + Test the LVIS json dataset loader. + + Usage: + python -m detectron2.data.datasets.lvis \ + path/to/json path/to/image_root dataset_name vis_limit + """ + import sys + import numpy as np + from detectron2.utils.logger import setup_logger + from PIL import Image + import detectron2.data.datasets # noqa # add pre-defined metadata + from detectron2.utils.visualizer import Visualizer + + logger = setup_logger(name=__name__) + meta = MetadataCatalog.get(sys.argv[3]) + + dicts = load_lvis_json(sys.argv[1], sys.argv[2], sys.argv[3]) + logger.info("Done loading {} samples.".format(len(dicts))) + + dirname = "lvis-data-vis" + os.makedirs(dirname, exist_ok=True) + for d in dicts[: int(sys.argv[4])]: + img = np.array(Image.open(d["file_name"])) + visualizer = Visualizer(img, metadata=meta) + vis = visualizer.draw_dataset_dict(d) + fpath = os.path.join(dirname, os.path.basename(d["file_name"])) + vis.save(fpath) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py new file mode 100644 index 0000000000000000000000000000000000000000..2e73c2a34e7f2506d3fd83c5bfb52388b4dba0e3 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py @@ -0,0 +1,26 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Autogen with +# with open("lvis_v0.5_val.json", "r") as f: +# a = json.load(f) +# c = a["categories"] +# for x in c: +# del x["image_count"] +# del x["instance_count"] +# LVIS_CATEGORIES = repr(c) + " # noqa" + +# fmt: off +LVIS_CATEGORIES = [{'frequency': 'r', 'id': 1, 'synset': 'acorn.n.01', 'synonyms': ['acorn'], 'def': 'nut from an oak tree', 'name': 'acorn'}, {'frequency': 'c', 'id': 2, 'synset': 'aerosol.n.02', 'synonyms': ['aerosol_can', 'spray_can'], 'def': 'a dispenser that holds a substance under pressure', 'name': 'aerosol_can'}, {'frequency': 'f', 'id': 3, 'synset': 'air_conditioner.n.01', 'synonyms': ['air_conditioner'], 'def': 'a machine that keeps air cool and dry', 'name': 'air_conditioner'}, {'frequency': 'f', 'id': 4, 'synset': 'airplane.n.01', 'synonyms': ['airplane', 'aeroplane'], 'def': 'an aircraft that has a fixed wing and is powered by propellers or jets', 'name': 'airplane'}, {'frequency': 'c', 'id': 5, 'synset': 'alarm_clock.n.01', 'synonyms': ['alarm_clock'], 'def': 'a clock that wakes a sleeper at some preset time', 'name': 'alarm_clock'}, {'frequency': 'c', 'id': 6, 'synset': 'alcohol.n.01', 'synonyms': ['alcohol', 'alcoholic_beverage'], 'def': 'a liquor or brew containing alcohol as the active agent', 'name': 'alcohol'}, {'frequency': 'r', 'id': 7, 'synset': 'alligator.n.02', 'synonyms': ['alligator', 'gator'], 'def': 'amphibious reptiles related to crocodiles but with shorter broader snouts', 'name': 'alligator'}, {'frequency': 'c', 'id': 8, 'synset': 'almond.n.02', 'synonyms': ['almond'], 'def': 'oval-shaped edible seed of the almond tree', 'name': 'almond'}, {'frequency': 'c', 'id': 9, 'synset': 'ambulance.n.01', 'synonyms': ['ambulance'], 'def': 'a vehicle that takes people to and from hospitals', 'name': 'ambulance'}, {'frequency': 'r', 'id': 10, 'synset': 'amplifier.n.01', 'synonyms': ['amplifier'], 'def': 'electronic equipment that increases strength of signals', 'name': 'amplifier'}, {'frequency': 'c', 'id': 11, 'synset': 'anklet.n.03', 'synonyms': ['anklet', 'ankle_bracelet'], 'def': 'an ornament worn around the ankle', 'name': 'anklet'}, {'frequency': 'f', 'id': 12, 'synset': 'antenna.n.01', 'synonyms': ['antenna', 'aerial', 'transmitting_aerial'], 'def': 'an electrical device that sends or receives radio or television signals', 'name': 'antenna'}, {'frequency': 'f', 'id': 13, 'synset': 'apple.n.01', 'synonyms': ['apple'], 'def': 'fruit with red or yellow or green skin and sweet to tart crisp whitish flesh', 'name': 'apple'}, {'frequency': 'r', 'id': 14, 'synset': 'apple_juice.n.01', 'synonyms': ['apple_juice'], 'def': 'the juice of apples', 'name': 'apple_juice'}, {'frequency': 'r', 'id': 15, 'synset': 'applesauce.n.01', 'synonyms': ['applesauce'], 'def': 'puree of stewed apples usually sweetened and spiced', 'name': 'applesauce'}, {'frequency': 'r', 'id': 16, 'synset': 'apricot.n.02', 'synonyms': ['apricot'], 'def': 'downy yellow to rosy-colored fruit resembling a small peach', 'name': 'apricot'}, {'frequency': 'f', 'id': 17, 'synset': 'apron.n.01', 'synonyms': ['apron'], 'def': 'a garment of cloth that is tied about the waist and worn to protect clothing', 'name': 'apron'}, {'frequency': 'c', 'id': 18, 'synset': 'aquarium.n.01', 'synonyms': ['aquarium', 'fish_tank'], 'def': 'a tank/pool/bowl filled with water for keeping live fish and underwater animals', 'name': 'aquarium'}, {'frequency': 'c', 'id': 19, 'synset': 'armband.n.02', 'synonyms': ['armband'], 'def': 'a band worn around the upper arm', 'name': 'armband'}, {'frequency': 'f', 'id': 20, 'synset': 'armchair.n.01', 'synonyms': ['armchair'], 'def': 'chair with a support on each side for arms', 'name': 'armchair'}, {'frequency': 'r', 'id': 21, 'synset': 'armoire.n.01', 'synonyms': ['armoire'], 'def': 'a large wardrobe or cabinet', 'name': 'armoire'}, {'frequency': 'r', 'id': 22, 'synset': 'armor.n.01', 'synonyms': ['armor', 'armour'], 'def': 'protective covering made of metal and used in combat', 'name': 'armor'}, {'frequency': 'c', 'id': 23, 'synset': 'artichoke.n.02', 'synonyms': ['artichoke'], 'def': 'a thistlelike flower head with edible fleshy leaves and heart', 'name': 'artichoke'}, {'frequency': 'f', 'id': 24, 'synset': 'ashcan.n.01', 'synonyms': ['trash_can', 'garbage_can', 'wastebin', 'dustbin', 'trash_barrel', 'trash_bin'], 'def': 'a bin that holds rubbish until it is collected', 'name': 'trash_can'}, {'frequency': 'c', 'id': 25, 'synset': 'ashtray.n.01', 'synonyms': ['ashtray'], 'def': "a receptacle for the ash from smokers' cigars or cigarettes", 'name': 'ashtray'}, {'frequency': 'c', 'id': 26, 'synset': 'asparagus.n.02', 'synonyms': ['asparagus'], 'def': 'edible young shoots of the asparagus plant', 'name': 'asparagus'}, {'frequency': 'c', 'id': 27, 'synset': 'atomizer.n.01', 'synonyms': ['atomizer', 'atomiser', 'spray', 'sprayer', 'nebulizer', 'nebuliser'], 'def': 'a dispenser that turns a liquid (such as perfume) into a fine mist', 'name': 'atomizer'}, {'frequency': 'c', 'id': 28, 'synset': 'avocado.n.01', 'synonyms': ['avocado'], 'def': 'a pear-shaped fruit with green or blackish skin and rich yellowish pulp enclosing a single large seed', 'name': 'avocado'}, {'frequency': 'c', 'id': 29, 'synset': 'award.n.02', 'synonyms': ['award', 'accolade'], 'def': 'a tangible symbol signifying approval or distinction', 'name': 'award'}, {'frequency': 'f', 'id': 30, 'synset': 'awning.n.01', 'synonyms': ['awning'], 'def': 'a canopy made of canvas to shelter people or things from rain or sun', 'name': 'awning'}, {'frequency': 'r', 'id': 31, 'synset': 'ax.n.01', 'synonyms': ['ax', 'axe'], 'def': 'an edge tool with a heavy bladed head mounted across a handle', 'name': 'ax'}, {'frequency': 'f', 'id': 32, 'synset': 'baby_buggy.n.01', 'synonyms': ['baby_buggy', 'baby_carriage', 'perambulator', 'pram', 'stroller'], 'def': 'a small vehicle with four wheels in which a baby or child is pushed around', 'name': 'baby_buggy'}, {'frequency': 'c', 'id': 33, 'synset': 'backboard.n.01', 'synonyms': ['basketball_backboard'], 'def': 'a raised vertical board with basket attached; used to play basketball', 'name': 'basketball_backboard'}, {'frequency': 'f', 'id': 34, 'synset': 'backpack.n.01', 'synonyms': ['backpack', 'knapsack', 'packsack', 'rucksack', 'haversack'], 'def': 'a bag carried by a strap on your back or shoulder', 'name': 'backpack'}, {'frequency': 'f', 'id': 35, 'synset': 'bag.n.04', 'synonyms': ['handbag', 'purse', 'pocketbook'], 'def': 'a container used for carrying money and small personal items or accessories', 'name': 'handbag'}, {'frequency': 'f', 'id': 36, 'synset': 'bag.n.06', 'synonyms': ['suitcase', 'baggage', 'luggage'], 'def': 'cases used to carry belongings when traveling', 'name': 'suitcase'}, {'frequency': 'c', 'id': 37, 'synset': 'bagel.n.01', 'synonyms': ['bagel', 'beigel'], 'def': 'glazed yeast-raised doughnut-shaped roll with hard crust', 'name': 'bagel'}, {'frequency': 'r', 'id': 38, 'synset': 'bagpipe.n.01', 'synonyms': ['bagpipe'], 'def': 'a tubular wind instrument; the player blows air into a bag and squeezes it out', 'name': 'bagpipe'}, {'frequency': 'r', 'id': 39, 'synset': 'baguet.n.01', 'synonyms': ['baguet', 'baguette'], 'def': 'narrow French stick loaf', 'name': 'baguet'}, {'frequency': 'r', 'id': 40, 'synset': 'bait.n.02', 'synonyms': ['bait', 'lure'], 'def': 'something used to lure fish or other animals into danger so they can be trapped or killed', 'name': 'bait'}, {'frequency': 'f', 'id': 41, 'synset': 'ball.n.06', 'synonyms': ['ball'], 'def': 'a spherical object used as a plaything', 'name': 'ball'}, {'frequency': 'r', 'id': 42, 'synset': 'ballet_skirt.n.01', 'synonyms': ['ballet_skirt', 'tutu'], 'def': 'very short skirt worn by ballerinas', 'name': 'ballet_skirt'}, {'frequency': 'f', 'id': 43, 'synset': 'balloon.n.01', 'synonyms': ['balloon'], 'def': 'large tough nonrigid bag filled with gas or heated air', 'name': 'balloon'}, {'frequency': 'c', 'id': 44, 'synset': 'bamboo.n.02', 'synonyms': ['bamboo'], 'def': 'woody tropical grass having hollow woody stems', 'name': 'bamboo'}, {'frequency': 'f', 'id': 45, 'synset': 'banana.n.02', 'synonyms': ['banana'], 'def': 'elongated crescent-shaped yellow fruit with soft sweet flesh', 'name': 'banana'}, {'frequency': 'r', 'id': 46, 'synset': 'band_aid.n.01', 'synonyms': ['Band_Aid'], 'def': 'trade name for an adhesive bandage to cover small cuts or blisters', 'name': 'Band_Aid'}, {'frequency': 'c', 'id': 47, 'synset': 'bandage.n.01', 'synonyms': ['bandage'], 'def': 'a piece of soft material that covers and protects an injured part of the body', 'name': 'bandage'}, {'frequency': 'c', 'id': 48, 'synset': 'bandanna.n.01', 'synonyms': ['bandanna', 'bandana'], 'def': 'large and brightly colored handkerchief; often used as a neckerchief', 'name': 'bandanna'}, {'frequency': 'r', 'id': 49, 'synset': 'banjo.n.01', 'synonyms': ['banjo'], 'def': 'a stringed instrument of the guitar family with a long neck and circular body', 'name': 'banjo'}, {'frequency': 'f', 'id': 50, 'synset': 'banner.n.01', 'synonyms': ['banner', 'streamer'], 'def': 'long strip of cloth or paper used for decoration or advertising', 'name': 'banner'}, {'frequency': 'r', 'id': 51, 'synset': 'barbell.n.01', 'synonyms': ['barbell'], 'def': 'a bar to which heavy discs are attached at each end; used in weightlifting', 'name': 'barbell'}, {'frequency': 'r', 'id': 52, 'synset': 'barge.n.01', 'synonyms': ['barge'], 'def': 'a flatbottom boat for carrying heavy loads (especially on canals)', 'name': 'barge'}, {'frequency': 'f', 'id': 53, 'synset': 'barrel.n.02', 'synonyms': ['barrel', 'cask'], 'def': 'a cylindrical container that holds liquids', 'name': 'barrel'}, {'frequency': 'c', 'id': 54, 'synset': 'barrette.n.01', 'synonyms': ['barrette'], 'def': "a pin for holding women's hair in place", 'name': 'barrette'}, {'frequency': 'c', 'id': 55, 'synset': 'barrow.n.03', 'synonyms': ['barrow', 'garden_cart', 'lawn_cart', 'wheelbarrow'], 'def': 'a cart for carrying small loads; has handles and one or more wheels', 'name': 'barrow'}, {'frequency': 'f', 'id': 56, 'synset': 'base.n.03', 'synonyms': ['baseball_base'], 'def': 'a place that the runner must touch before scoring', 'name': 'baseball_base'}, {'frequency': 'f', 'id': 57, 'synset': 'baseball.n.02', 'synonyms': ['baseball'], 'def': 'a ball used in playing baseball', 'name': 'baseball'}, {'frequency': 'f', 'id': 58, 'synset': 'baseball_bat.n.01', 'synonyms': ['baseball_bat'], 'def': 'an implement used in baseball by the batter', 'name': 'baseball_bat'}, {'frequency': 'f', 'id': 59, 'synset': 'baseball_cap.n.01', 'synonyms': ['baseball_cap', 'jockey_cap', 'golf_cap'], 'def': 'a cap with a bill', 'name': 'baseball_cap'}, {'frequency': 'f', 'id': 60, 'synset': 'baseball_glove.n.01', 'synonyms': ['baseball_glove', 'baseball_mitt'], 'def': 'the handwear used by fielders in playing baseball', 'name': 'baseball_glove'}, {'frequency': 'f', 'id': 61, 'synset': 'basket.n.01', 'synonyms': ['basket', 'handbasket'], 'def': 'a container that is usually woven and has handles', 'name': 'basket'}, {'frequency': 'c', 'id': 62, 'synset': 'basket.n.03', 'synonyms': ['basketball_hoop'], 'def': 'metal hoop supporting a net through which players try to throw the basketball', 'name': 'basketball_hoop'}, {'frequency': 'c', 'id': 63, 'synset': 'basketball.n.02', 'synonyms': ['basketball'], 'def': 'an inflated ball used in playing basketball', 'name': 'basketball'}, {'frequency': 'r', 'id': 64, 'synset': 'bass_horn.n.01', 'synonyms': ['bass_horn', 'sousaphone', 'tuba'], 'def': 'the lowest brass wind instrument', 'name': 'bass_horn'}, {'frequency': 'r', 'id': 65, 'synset': 'bat.n.01', 'synonyms': ['bat_(animal)'], 'def': 'nocturnal mouselike mammal with forelimbs modified to form membranous wings', 'name': 'bat_(animal)'}, {'frequency': 'f', 'id': 66, 'synset': 'bath_mat.n.01', 'synonyms': ['bath_mat'], 'def': 'a heavy towel or mat to stand on while drying yourself after a bath', 'name': 'bath_mat'}, {'frequency': 'f', 'id': 67, 'synset': 'bath_towel.n.01', 'synonyms': ['bath_towel'], 'def': 'a large towel; to dry yourself after a bath', 'name': 'bath_towel'}, {'frequency': 'c', 'id': 68, 'synset': 'bathrobe.n.01', 'synonyms': ['bathrobe'], 'def': 'a loose-fitting robe of towelling; worn after a bath or swim', 'name': 'bathrobe'}, {'frequency': 'f', 'id': 69, 'synset': 'bathtub.n.01', 'synonyms': ['bathtub', 'bathing_tub'], 'def': 'a large open container that you fill with water and use to wash the body', 'name': 'bathtub'}, {'frequency': 'r', 'id': 70, 'synset': 'batter.n.02', 'synonyms': ['batter_(food)'], 'def': 'a liquid or semiliquid mixture, as of flour, eggs, and milk, used in cooking', 'name': 'batter_(food)'}, {'frequency': 'c', 'id': 71, 'synset': 'battery.n.02', 'synonyms': ['battery'], 'def': 'a portable device that produces electricity', 'name': 'battery'}, {'frequency': 'r', 'id': 72, 'synset': 'beach_ball.n.01', 'synonyms': ['beachball'], 'def': 'large and light ball; for play at the seaside', 'name': 'beachball'}, {'frequency': 'c', 'id': 73, 'synset': 'bead.n.01', 'synonyms': ['bead'], 'def': 'a small ball with a hole through the middle used for ornamentation, jewellery, etc.', 'name': 'bead'}, {'frequency': 'r', 'id': 74, 'synset': 'beaker.n.01', 'synonyms': ['beaker'], 'def': 'a flatbottomed jar made of glass or plastic; used for chemistry', 'name': 'beaker'}, {'frequency': 'c', 'id': 75, 'synset': 'bean_curd.n.01', 'synonyms': ['bean_curd', 'tofu'], 'def': 'cheeselike food made of curdled soybean milk', 'name': 'bean_curd'}, {'frequency': 'c', 'id': 76, 'synset': 'beanbag.n.01', 'synonyms': ['beanbag'], 'def': 'a bag filled with dried beans or similar items; used in games or to sit on', 'name': 'beanbag'}, {'frequency': 'f', 'id': 77, 'synset': 'beanie.n.01', 'synonyms': ['beanie', 'beany'], 'def': 'a small skullcap; formerly worn by schoolboys and college freshmen', 'name': 'beanie'}, {'frequency': 'f', 'id': 78, 'synset': 'bear.n.01', 'synonyms': ['bear'], 'def': 'large carnivorous or omnivorous mammals with shaggy coats and claws', 'name': 'bear'}, {'frequency': 'f', 'id': 79, 'synset': 'bed.n.01', 'synonyms': ['bed'], 'def': 'a piece of furniture that provides a place to sleep', 'name': 'bed'}, {'frequency': 'c', 'id': 80, 'synset': 'bedspread.n.01', 'synonyms': ['bedspread', 'bedcover', 'bed_covering', 'counterpane', 'spread'], 'def': 'decorative cover for a bed', 'name': 'bedspread'}, {'frequency': 'f', 'id': 81, 'synset': 'beef.n.01', 'synonyms': ['cow'], 'def': 'cattle that are reared for their meat', 'name': 'cow'}, {'frequency': 'c', 'id': 82, 'synset': 'beef.n.02', 'synonyms': ['beef_(food)', 'boeuf_(food)'], 'def': 'meat from an adult domestic bovine', 'name': 'beef_(food)'}, {'frequency': 'r', 'id': 83, 'synset': 'beeper.n.01', 'synonyms': ['beeper', 'pager'], 'def': 'an device that beeps when the person carrying it is being paged', 'name': 'beeper'}, {'frequency': 'f', 'id': 84, 'synset': 'beer_bottle.n.01', 'synonyms': ['beer_bottle'], 'def': 'a bottle that holds beer', 'name': 'beer_bottle'}, {'frequency': 'c', 'id': 85, 'synset': 'beer_can.n.01', 'synonyms': ['beer_can'], 'def': 'a can that holds beer', 'name': 'beer_can'}, {'frequency': 'r', 'id': 86, 'synset': 'beetle.n.01', 'synonyms': ['beetle'], 'def': 'insect with hard wing covers', 'name': 'beetle'}, {'frequency': 'f', 'id': 87, 'synset': 'bell.n.01', 'synonyms': ['bell'], 'def': 'a hollow device made of metal that makes a ringing sound when struck', 'name': 'bell'}, {'frequency': 'f', 'id': 88, 'synset': 'bell_pepper.n.02', 'synonyms': ['bell_pepper', 'capsicum'], 'def': 'large bell-shaped sweet pepper in green or red or yellow or orange or black varieties', 'name': 'bell_pepper'}, {'frequency': 'f', 'id': 89, 'synset': 'belt.n.02', 'synonyms': ['belt'], 'def': 'a band to tie or buckle around the body (usually at the waist)', 'name': 'belt'}, {'frequency': 'f', 'id': 90, 'synset': 'belt_buckle.n.01', 'synonyms': ['belt_buckle'], 'def': 'the buckle used to fasten a belt', 'name': 'belt_buckle'}, {'frequency': 'f', 'id': 91, 'synset': 'bench.n.01', 'synonyms': ['bench'], 'def': 'a long seat for more than one person', 'name': 'bench'}, {'frequency': 'c', 'id': 92, 'synset': 'beret.n.01', 'synonyms': ['beret'], 'def': 'a cap with no brim or bill; made of soft cloth', 'name': 'beret'}, {'frequency': 'c', 'id': 93, 'synset': 'bib.n.02', 'synonyms': ['bib'], 'def': 'a napkin tied under the chin of a child while eating', 'name': 'bib'}, {'frequency': 'r', 'id': 94, 'synset': 'bible.n.01', 'synonyms': ['Bible'], 'def': 'the sacred writings of the Christian religions', 'name': 'Bible'}, {'frequency': 'f', 'id': 95, 'synset': 'bicycle.n.01', 'synonyms': ['bicycle', 'bike_(bicycle)'], 'def': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'name': 'bicycle'}, {'frequency': 'f', 'id': 96, 'synset': 'bill.n.09', 'synonyms': ['visor', 'vizor'], 'def': 'a brim that projects to the front to shade the eyes', 'name': 'visor'}, {'frequency': 'c', 'id': 97, 'synset': 'binder.n.03', 'synonyms': ['binder', 'ring-binder'], 'def': 'holds loose papers or magazines', 'name': 'binder'}, {'frequency': 'c', 'id': 98, 'synset': 'binoculars.n.01', 'synonyms': ['binoculars', 'field_glasses', 'opera_glasses'], 'def': 'an optical instrument designed for simultaneous use by both eyes', 'name': 'binoculars'}, {'frequency': 'f', 'id': 99, 'synset': 'bird.n.01', 'synonyms': ['bird'], 'def': 'animal characterized by feathers and wings', 'name': 'bird'}, {'frequency': 'r', 'id': 100, 'synset': 'bird_feeder.n.01', 'synonyms': ['birdfeeder'], 'def': 'an outdoor device that supplies food for wild birds', 'name': 'birdfeeder'}, {'frequency': 'r', 'id': 101, 'synset': 'birdbath.n.01', 'synonyms': ['birdbath'], 'def': 'an ornamental basin (usually in a garden) for birds to bathe in', 'name': 'birdbath'}, {'frequency': 'c', 'id': 102, 'synset': 'birdcage.n.01', 'synonyms': ['birdcage'], 'def': 'a cage in which a bird can be kept', 'name': 'birdcage'}, {'frequency': 'c', 'id': 103, 'synset': 'birdhouse.n.01', 'synonyms': ['birdhouse'], 'def': 'a shelter for birds', 'name': 'birdhouse'}, {'frequency': 'f', 'id': 104, 'synset': 'birthday_cake.n.01', 'synonyms': ['birthday_cake'], 'def': 'decorated cake served at a birthday party', 'name': 'birthday_cake'}, {'frequency': 'r', 'id': 105, 'synset': 'birthday_card.n.01', 'synonyms': ['birthday_card'], 'def': 'a card expressing a birthday greeting', 'name': 'birthday_card'}, {'frequency': 'r', 'id': 106, 'synset': 'biscuit.n.01', 'synonyms': ['biscuit_(bread)'], 'def': 'small round bread leavened with baking-powder or soda', 'name': 'biscuit_(bread)'}, {'frequency': 'r', 'id': 107, 'synset': 'black_flag.n.01', 'synonyms': ['pirate_flag'], 'def': 'a flag usually bearing a white skull and crossbones on a black background', 'name': 'pirate_flag'}, {'frequency': 'c', 'id': 108, 'synset': 'black_sheep.n.02', 'synonyms': ['black_sheep'], 'def': 'sheep with a black coat', 'name': 'black_sheep'}, {'frequency': 'c', 'id': 109, 'synset': 'blackboard.n.01', 'synonyms': ['blackboard', 'chalkboard'], 'def': 'sheet of slate; for writing with chalk', 'name': 'blackboard'}, {'frequency': 'f', 'id': 110, 'synset': 'blanket.n.01', 'synonyms': ['blanket'], 'def': 'bedding that keeps a person warm in bed', 'name': 'blanket'}, {'frequency': 'c', 'id': 111, 'synset': 'blazer.n.01', 'synonyms': ['blazer', 'sport_jacket', 'sport_coat', 'sports_jacket', 'sports_coat'], 'def': 'lightweight jacket; often striped in the colors of a club or school', 'name': 'blazer'}, {'frequency': 'f', 'id': 112, 'synset': 'blender.n.01', 'synonyms': ['blender', 'liquidizer', 'liquidiser'], 'def': 'an electrically powered mixer that mix or chop or liquefy foods', 'name': 'blender'}, {'frequency': 'r', 'id': 113, 'synset': 'blimp.n.02', 'synonyms': ['blimp'], 'def': 'a small nonrigid airship used for observation or as a barrage balloon', 'name': 'blimp'}, {'frequency': 'c', 'id': 114, 'synset': 'blinker.n.01', 'synonyms': ['blinker', 'flasher'], 'def': 'a light that flashes on and off; used as a signal or to send messages', 'name': 'blinker'}, {'frequency': 'c', 'id': 115, 'synset': 'blueberry.n.02', 'synonyms': ['blueberry'], 'def': 'sweet edible dark-blue berries of blueberry plants', 'name': 'blueberry'}, {'frequency': 'r', 'id': 116, 'synset': 'boar.n.02', 'synonyms': ['boar'], 'def': 'an uncastrated male hog', 'name': 'boar'}, {'frequency': 'r', 'id': 117, 'synset': 'board.n.09', 'synonyms': ['gameboard'], 'def': 'a flat portable surface (usually rectangular) designed for board games', 'name': 'gameboard'}, {'frequency': 'f', 'id': 118, 'synset': 'boat.n.01', 'synonyms': ['boat', 'ship_(boat)'], 'def': 'a vessel for travel on water', 'name': 'boat'}, {'frequency': 'c', 'id': 119, 'synset': 'bobbin.n.01', 'synonyms': ['bobbin', 'spool', 'reel'], 'def': 'a thing around which thread/tape/film or other flexible materials can be wound', 'name': 'bobbin'}, {'frequency': 'r', 'id': 120, 'synset': 'bobby_pin.n.01', 'synonyms': ['bobby_pin', 'hairgrip'], 'def': 'a flat wire hairpin used to hold bobbed hair in place', 'name': 'bobby_pin'}, {'frequency': 'c', 'id': 121, 'synset': 'boiled_egg.n.01', 'synonyms': ['boiled_egg', 'coddled_egg'], 'def': 'egg cooked briefly in the shell in gently boiling water', 'name': 'boiled_egg'}, {'frequency': 'r', 'id': 122, 'synset': 'bolo_tie.n.01', 'synonyms': ['bolo_tie', 'bolo', 'bola_tie', 'bola'], 'def': 'a cord fastened around the neck with an ornamental clasp and worn as a necktie', 'name': 'bolo_tie'}, {'frequency': 'c', 'id': 123, 'synset': 'bolt.n.03', 'synonyms': ['deadbolt'], 'def': 'the part of a lock that is engaged or withdrawn with a key', 'name': 'deadbolt'}, {'frequency': 'f', 'id': 124, 'synset': 'bolt.n.06', 'synonyms': ['bolt'], 'def': 'a screw that screws into a nut to form a fastener', 'name': 'bolt'}, {'frequency': 'r', 'id': 125, 'synset': 'bonnet.n.01', 'synonyms': ['bonnet'], 'def': 'a hat tied under the chin', 'name': 'bonnet'}, {'frequency': 'f', 'id': 126, 'synset': 'book.n.01', 'synonyms': ['book'], 'def': 'a written work or composition that has been published', 'name': 'book'}, {'frequency': 'r', 'id': 127, 'synset': 'book_bag.n.01', 'synonyms': ['book_bag'], 'def': 'a bag in which students carry their books', 'name': 'book_bag'}, {'frequency': 'c', 'id': 128, 'synset': 'bookcase.n.01', 'synonyms': ['bookcase'], 'def': 'a piece of furniture with shelves for storing books', 'name': 'bookcase'}, {'frequency': 'c', 'id': 129, 'synset': 'booklet.n.01', 'synonyms': ['booklet', 'brochure', 'leaflet', 'pamphlet'], 'def': 'a small book usually having a paper cover', 'name': 'booklet'}, {'frequency': 'r', 'id': 130, 'synset': 'bookmark.n.01', 'synonyms': ['bookmark', 'bookmarker'], 'def': 'a marker (a piece of paper or ribbon) placed between the pages of a book', 'name': 'bookmark'}, {'frequency': 'r', 'id': 131, 'synset': 'boom.n.04', 'synonyms': ['boom_microphone', 'microphone_boom'], 'def': 'a pole carrying an overhead microphone projected over a film or tv set', 'name': 'boom_microphone'}, {'frequency': 'f', 'id': 132, 'synset': 'boot.n.01', 'synonyms': ['boot'], 'def': 'footwear that covers the whole foot and lower leg', 'name': 'boot'}, {'frequency': 'f', 'id': 133, 'synset': 'bottle.n.01', 'synonyms': ['bottle'], 'def': 'a glass or plastic vessel used for storing drinks or other liquids', 'name': 'bottle'}, {'frequency': 'c', 'id': 134, 'synset': 'bottle_opener.n.01', 'synonyms': ['bottle_opener'], 'def': 'an opener for removing caps or corks from bottles', 'name': 'bottle_opener'}, {'frequency': 'c', 'id': 135, 'synset': 'bouquet.n.01', 'synonyms': ['bouquet'], 'def': 'an arrangement of flowers that is usually given as a present', 'name': 'bouquet'}, {'frequency': 'r', 'id': 136, 'synset': 'bow.n.04', 'synonyms': ['bow_(weapon)'], 'def': 'a weapon for shooting arrows', 'name': 'bow_(weapon)'}, {'frequency': 'f', 'id': 137, 'synset': 'bow.n.08', 'synonyms': ['bow_(decorative_ribbons)'], 'def': 'a decorative interlacing of ribbons', 'name': 'bow_(decorative_ribbons)'}, {'frequency': 'f', 'id': 138, 'synset': 'bow_tie.n.01', 'synonyms': ['bow-tie', 'bowtie'], 'def': "a man's tie that ties in a bow", 'name': 'bow-tie'}, {'frequency': 'f', 'id': 139, 'synset': 'bowl.n.03', 'synonyms': ['bowl'], 'def': 'a dish that is round and open at the top for serving foods', 'name': 'bowl'}, {'frequency': 'r', 'id': 140, 'synset': 'bowl.n.08', 'synonyms': ['pipe_bowl'], 'def': 'a small round container that is open at the top for holding tobacco', 'name': 'pipe_bowl'}, {'frequency': 'c', 'id': 141, 'synset': 'bowler_hat.n.01', 'synonyms': ['bowler_hat', 'bowler', 'derby_hat', 'derby', 'plug_hat'], 'def': 'a felt hat that is round and hard with a narrow brim', 'name': 'bowler_hat'}, {'frequency': 'r', 'id': 142, 'synset': 'bowling_ball.n.01', 'synonyms': ['bowling_ball'], 'def': 'a large ball with finger holes used in the sport of bowling', 'name': 'bowling_ball'}, {'frequency': 'r', 'id': 143, 'synset': 'bowling_pin.n.01', 'synonyms': ['bowling_pin'], 'def': 'a club-shaped wooden object used in bowling', 'name': 'bowling_pin'}, {'frequency': 'r', 'id': 144, 'synset': 'boxing_glove.n.01', 'synonyms': ['boxing_glove'], 'def': 'large glove coverings the fists of a fighter worn for the sport of boxing', 'name': 'boxing_glove'}, {'frequency': 'c', 'id': 145, 'synset': 'brace.n.06', 'synonyms': ['suspenders'], 'def': 'elastic straps that hold trousers up (usually used in the plural)', 'name': 'suspenders'}, {'frequency': 'f', 'id': 146, 'synset': 'bracelet.n.02', 'synonyms': ['bracelet', 'bangle'], 'def': 'jewelry worn around the wrist for decoration', 'name': 'bracelet'}, {'frequency': 'r', 'id': 147, 'synset': 'brass.n.07', 'synonyms': ['brass_plaque'], 'def': 'a memorial made of brass', 'name': 'brass_plaque'}, {'frequency': 'c', 'id': 148, 'synset': 'brassiere.n.01', 'synonyms': ['brassiere', 'bra', 'bandeau'], 'def': 'an undergarment worn by women to support their breasts', 'name': 'brassiere'}, {'frequency': 'c', 'id': 149, 'synset': 'bread-bin.n.01', 'synonyms': ['bread-bin', 'breadbox'], 'def': 'a container used to keep bread or cake in', 'name': 'bread-bin'}, {'frequency': 'r', 'id': 150, 'synset': 'breechcloth.n.01', 'synonyms': ['breechcloth', 'breechclout', 'loincloth'], 'def': 'a garment that provides covering for the loins', 'name': 'breechcloth'}, {'frequency': 'c', 'id': 151, 'synset': 'bridal_gown.n.01', 'synonyms': ['bridal_gown', 'wedding_gown', 'wedding_dress'], 'def': 'a gown worn by the bride at a wedding', 'name': 'bridal_gown'}, {'frequency': 'c', 'id': 152, 'synset': 'briefcase.n.01', 'synonyms': ['briefcase'], 'def': 'a case with a handle; for carrying papers or files or books', 'name': 'briefcase'}, {'frequency': 'c', 'id': 153, 'synset': 'bristle_brush.n.01', 'synonyms': ['bristle_brush'], 'def': 'a brush that is made with the short stiff hairs of an animal or plant', 'name': 'bristle_brush'}, {'frequency': 'f', 'id': 154, 'synset': 'broccoli.n.01', 'synonyms': ['broccoli'], 'def': 'plant with dense clusters of tight green flower buds', 'name': 'broccoli'}, {'frequency': 'r', 'id': 155, 'synset': 'brooch.n.01', 'synonyms': ['broach'], 'def': 'a decorative pin worn by women', 'name': 'broach'}, {'frequency': 'c', 'id': 156, 'synset': 'broom.n.01', 'synonyms': ['broom'], 'def': 'bundle of straws or twigs attached to a long handle; used for cleaning', 'name': 'broom'}, {'frequency': 'c', 'id': 157, 'synset': 'brownie.n.03', 'synonyms': ['brownie'], 'def': 'square or bar of very rich chocolate cake usually with nuts', 'name': 'brownie'}, {'frequency': 'c', 'id': 158, 'synset': 'brussels_sprouts.n.01', 'synonyms': ['brussels_sprouts'], 'def': 'the small edible cabbage-like buds growing along a stalk', 'name': 'brussels_sprouts'}, {'frequency': 'r', 'id': 159, 'synset': 'bubble_gum.n.01', 'synonyms': ['bubble_gum'], 'def': 'a kind of chewing gum that can be blown into bubbles', 'name': 'bubble_gum'}, {'frequency': 'f', 'id': 160, 'synset': 'bucket.n.01', 'synonyms': ['bucket', 'pail'], 'def': 'a roughly cylindrical vessel that is open at the top', 'name': 'bucket'}, {'frequency': 'r', 'id': 161, 'synset': 'buggy.n.01', 'synonyms': ['horse_buggy'], 'def': 'a small lightweight carriage; drawn by a single horse', 'name': 'horse_buggy'}, {'frequency': 'c', 'id': 162, 'synset': 'bull.n.11', 'synonyms': ['bull'], 'def': 'mature male cow', 'name': 'bull'}, {'frequency': 'r', 'id': 163, 'synset': 'bulldog.n.01', 'synonyms': ['bulldog'], 'def': 'a thickset short-haired dog with a large head and strong undershot lower jaw', 'name': 'bulldog'}, {'frequency': 'r', 'id': 164, 'synset': 'bulldozer.n.01', 'synonyms': ['bulldozer', 'dozer'], 'def': 'large powerful tractor; a large blade in front flattens areas of ground', 'name': 'bulldozer'}, {'frequency': 'c', 'id': 165, 'synset': 'bullet_train.n.01', 'synonyms': ['bullet_train'], 'def': 'a high-speed passenger train', 'name': 'bullet_train'}, {'frequency': 'c', 'id': 166, 'synset': 'bulletin_board.n.02', 'synonyms': ['bulletin_board', 'notice_board'], 'def': 'a board that hangs on a wall; displays announcements', 'name': 'bulletin_board'}, {'frequency': 'r', 'id': 167, 'synset': 'bulletproof_vest.n.01', 'synonyms': ['bulletproof_vest'], 'def': 'a vest capable of resisting the impact of a bullet', 'name': 'bulletproof_vest'}, {'frequency': 'c', 'id': 168, 'synset': 'bullhorn.n.01', 'synonyms': ['bullhorn', 'megaphone'], 'def': 'a portable loudspeaker with built-in microphone and amplifier', 'name': 'bullhorn'}, {'frequency': 'r', 'id': 169, 'synset': 'bully_beef.n.01', 'synonyms': ['corned_beef', 'corn_beef'], 'def': 'beef cured or pickled in brine', 'name': 'corned_beef'}, {'frequency': 'f', 'id': 170, 'synset': 'bun.n.01', 'synonyms': ['bun', 'roll'], 'def': 'small rounded bread either plain or sweet', 'name': 'bun'}, {'frequency': 'c', 'id': 171, 'synset': 'bunk_bed.n.01', 'synonyms': ['bunk_bed'], 'def': 'beds built one above the other', 'name': 'bunk_bed'}, {'frequency': 'f', 'id': 172, 'synset': 'buoy.n.01', 'synonyms': ['buoy'], 'def': 'a float attached by rope to the seabed to mark channels in a harbor or underwater hazards', 'name': 'buoy'}, {'frequency': 'r', 'id': 173, 'synset': 'burrito.n.01', 'synonyms': ['burrito'], 'def': 'a flour tortilla folded around a filling', 'name': 'burrito'}, {'frequency': 'f', 'id': 174, 'synset': 'bus.n.01', 'synonyms': ['bus_(vehicle)', 'autobus', 'charabanc', 'double-decker', 'motorbus', 'motorcoach'], 'def': 'a vehicle carrying many passengers; used for public transport', 'name': 'bus_(vehicle)'}, {'frequency': 'c', 'id': 175, 'synset': 'business_card.n.01', 'synonyms': ['business_card'], 'def': "a card on which are printed the person's name and business affiliation", 'name': 'business_card'}, {'frequency': 'c', 'id': 176, 'synset': 'butcher_knife.n.01', 'synonyms': ['butcher_knife'], 'def': 'a large sharp knife for cutting or trimming meat', 'name': 'butcher_knife'}, {'frequency': 'c', 'id': 177, 'synset': 'butter.n.01', 'synonyms': ['butter'], 'def': 'an edible emulsion of fat globules made by churning milk or cream; for cooking and table use', 'name': 'butter'}, {'frequency': 'c', 'id': 178, 'synset': 'butterfly.n.01', 'synonyms': ['butterfly'], 'def': 'insect typically having a slender body with knobbed antennae and broad colorful wings', 'name': 'butterfly'}, {'frequency': 'f', 'id': 179, 'synset': 'button.n.01', 'synonyms': ['button'], 'def': 'a round fastener sewn to shirts and coats etc to fit through buttonholes', 'name': 'button'}, {'frequency': 'f', 'id': 180, 'synset': 'cab.n.03', 'synonyms': ['cab_(taxi)', 'taxi', 'taxicab'], 'def': 'a car that takes passengers where they want to go in exchange for money', 'name': 'cab_(taxi)'}, {'frequency': 'r', 'id': 181, 'synset': 'cabana.n.01', 'synonyms': ['cabana'], 'def': 'a small tent used as a dressing room beside the sea or a swimming pool', 'name': 'cabana'}, {'frequency': 'r', 'id': 182, 'synset': 'cabin_car.n.01', 'synonyms': ['cabin_car', 'caboose'], 'def': 'a car on a freight train for use of the train crew; usually the last car on the train', 'name': 'cabin_car'}, {'frequency': 'f', 'id': 183, 'synset': 'cabinet.n.01', 'synonyms': ['cabinet'], 'def': 'a piece of furniture resembling a cupboard with doors and shelves and drawers', 'name': 'cabinet'}, {'frequency': 'r', 'id': 184, 'synset': 'cabinet.n.03', 'synonyms': ['locker', 'storage_locker'], 'def': 'a storage compartment for clothes and valuables; usually it has a lock', 'name': 'locker'}, {'frequency': 'f', 'id': 185, 'synset': 'cake.n.03', 'synonyms': ['cake'], 'def': 'baked goods made from or based on a mixture of flour, sugar, eggs, and fat', 'name': 'cake'}, {'frequency': 'c', 'id': 186, 'synset': 'calculator.n.02', 'synonyms': ['calculator'], 'def': 'a small machine that is used for mathematical calculations', 'name': 'calculator'}, {'frequency': 'f', 'id': 187, 'synset': 'calendar.n.02', 'synonyms': ['calendar'], 'def': 'a list or register of events (appointments/social events/court cases, etc)', 'name': 'calendar'}, {'frequency': 'c', 'id': 188, 'synset': 'calf.n.01', 'synonyms': ['calf'], 'def': 'young of domestic cattle', 'name': 'calf'}, {'frequency': 'c', 'id': 189, 'synset': 'camcorder.n.01', 'synonyms': ['camcorder'], 'def': 'a portable television camera and videocassette recorder', 'name': 'camcorder'}, {'frequency': 'c', 'id': 190, 'synset': 'camel.n.01', 'synonyms': ['camel'], 'def': 'cud-chewing mammal used as a draft or saddle animal in desert regions', 'name': 'camel'}, {'frequency': 'f', 'id': 191, 'synset': 'camera.n.01', 'synonyms': ['camera'], 'def': 'equipment for taking photographs', 'name': 'camera'}, {'frequency': 'c', 'id': 192, 'synset': 'camera_lens.n.01', 'synonyms': ['camera_lens'], 'def': 'a lens that focuses the image in a camera', 'name': 'camera_lens'}, {'frequency': 'c', 'id': 193, 'synset': 'camper.n.02', 'synonyms': ['camper_(vehicle)', 'camping_bus', 'motor_home'], 'def': 'a recreational vehicle equipped for camping out while traveling', 'name': 'camper_(vehicle)'}, {'frequency': 'f', 'id': 194, 'synset': 'can.n.01', 'synonyms': ['can', 'tin_can'], 'def': 'airtight sealed metal container for food or drink or paint etc.', 'name': 'can'}, {'frequency': 'c', 'id': 195, 'synset': 'can_opener.n.01', 'synonyms': ['can_opener', 'tin_opener'], 'def': 'a device for cutting cans open', 'name': 'can_opener'}, {'frequency': 'r', 'id': 196, 'synset': 'candelabrum.n.01', 'synonyms': ['candelabrum', 'candelabra'], 'def': 'branched candlestick; ornamental; has several lights', 'name': 'candelabrum'}, {'frequency': 'f', 'id': 197, 'synset': 'candle.n.01', 'synonyms': ['candle', 'candlestick'], 'def': 'stick of wax with a wick in the middle', 'name': 'candle'}, {'frequency': 'f', 'id': 198, 'synset': 'candlestick.n.01', 'synonyms': ['candle_holder'], 'def': 'a holder with sockets for candles', 'name': 'candle_holder'}, {'frequency': 'r', 'id': 199, 'synset': 'candy_bar.n.01', 'synonyms': ['candy_bar'], 'def': 'a candy shaped as a bar', 'name': 'candy_bar'}, {'frequency': 'c', 'id': 200, 'synset': 'candy_cane.n.01', 'synonyms': ['candy_cane'], 'def': 'a hard candy in the shape of a rod (usually with stripes)', 'name': 'candy_cane'}, {'frequency': 'c', 'id': 201, 'synset': 'cane.n.01', 'synonyms': ['walking_cane'], 'def': 'a stick that people can lean on to help them walk', 'name': 'walking_cane'}, {'frequency': 'c', 'id': 202, 'synset': 'canister.n.02', 'synonyms': ['canister', 'cannister'], 'def': 'metal container for storing dry foods such as tea or flour', 'name': 'canister'}, {'frequency': 'r', 'id': 203, 'synset': 'cannon.n.02', 'synonyms': ['cannon'], 'def': 'heavy gun fired from a tank', 'name': 'cannon'}, {'frequency': 'c', 'id': 204, 'synset': 'canoe.n.01', 'synonyms': ['canoe'], 'def': 'small and light boat; pointed at both ends; propelled with a paddle', 'name': 'canoe'}, {'frequency': 'r', 'id': 205, 'synset': 'cantaloup.n.02', 'synonyms': ['cantaloup', 'cantaloupe'], 'def': 'the fruit of a cantaloup vine; small to medium-sized melon with yellowish flesh', 'name': 'cantaloup'}, {'frequency': 'r', 'id': 206, 'synset': 'canteen.n.01', 'synonyms': ['canteen'], 'def': 'a flask for carrying water; used by soldiers or travelers', 'name': 'canteen'}, {'frequency': 'c', 'id': 207, 'synset': 'cap.n.01', 'synonyms': ['cap_(headwear)'], 'def': 'a tight-fitting headwear', 'name': 'cap_(headwear)'}, {'frequency': 'f', 'id': 208, 'synset': 'cap.n.02', 'synonyms': ['bottle_cap', 'cap_(container_lid)'], 'def': 'a top (as for a bottle)', 'name': 'bottle_cap'}, {'frequency': 'r', 'id': 209, 'synset': 'cape.n.02', 'synonyms': ['cape'], 'def': 'a sleeveless garment like a cloak but shorter', 'name': 'cape'}, {'frequency': 'c', 'id': 210, 'synset': 'cappuccino.n.01', 'synonyms': ['cappuccino', 'coffee_cappuccino'], 'def': 'equal parts of espresso and steamed milk', 'name': 'cappuccino'}, {'frequency': 'f', 'id': 211, 'synset': 'car.n.01', 'synonyms': ['car_(automobile)', 'auto_(automobile)', 'automobile'], 'def': 'a motor vehicle with four wheels', 'name': 'car_(automobile)'}, {'frequency': 'f', 'id': 212, 'synset': 'car.n.02', 'synonyms': ['railcar_(part_of_a_train)', 'railway_car_(part_of_a_train)', 'railroad_car_(part_of_a_train)'], 'def': 'a wheeled vehicle adapted to the rails of railroad', 'name': 'railcar_(part_of_a_train)'}, {'frequency': 'r', 'id': 213, 'synset': 'car.n.04', 'synonyms': ['elevator_car'], 'def': 'where passengers ride up and down', 'name': 'elevator_car'}, {'frequency': 'r', 'id': 214, 'synset': 'car_battery.n.01', 'synonyms': ['car_battery', 'automobile_battery'], 'def': 'a battery in a motor vehicle', 'name': 'car_battery'}, {'frequency': 'c', 'id': 215, 'synset': 'card.n.02', 'synonyms': ['identity_card'], 'def': 'a card certifying the identity of the bearer', 'name': 'identity_card'}, {'frequency': 'c', 'id': 216, 'synset': 'card.n.03', 'synonyms': ['card'], 'def': 'a rectangular piece of paper used to send messages (e.g. greetings or pictures)', 'name': 'card'}, {'frequency': 'r', 'id': 217, 'synset': 'cardigan.n.01', 'synonyms': ['cardigan'], 'def': 'knitted jacket that is fastened up the front with buttons or a zipper', 'name': 'cardigan'}, {'frequency': 'r', 'id': 218, 'synset': 'cargo_ship.n.01', 'synonyms': ['cargo_ship', 'cargo_vessel'], 'def': 'a ship designed to carry cargo', 'name': 'cargo_ship'}, {'frequency': 'r', 'id': 219, 'synset': 'carnation.n.01', 'synonyms': ['carnation'], 'def': 'plant with pink to purple-red spice-scented usually double flowers', 'name': 'carnation'}, {'frequency': 'c', 'id': 220, 'synset': 'carriage.n.02', 'synonyms': ['horse_carriage'], 'def': 'a vehicle with wheels drawn by one or more horses', 'name': 'horse_carriage'}, {'frequency': 'f', 'id': 221, 'synset': 'carrot.n.01', 'synonyms': ['carrot'], 'def': 'deep orange edible root of the cultivated carrot plant', 'name': 'carrot'}, {'frequency': 'c', 'id': 222, 'synset': 'carryall.n.01', 'synonyms': ['tote_bag'], 'def': 'a capacious bag or basket', 'name': 'tote_bag'}, {'frequency': 'c', 'id': 223, 'synset': 'cart.n.01', 'synonyms': ['cart'], 'def': 'a heavy open wagon usually having two wheels and drawn by an animal', 'name': 'cart'}, {'frequency': 'c', 'id': 224, 'synset': 'carton.n.02', 'synonyms': ['carton'], 'def': 'a box made of cardboard; opens by flaps on top', 'name': 'carton'}, {'frequency': 'c', 'id': 225, 'synset': 'cash_register.n.01', 'synonyms': ['cash_register', 'register_(for_cash_transactions)'], 'def': 'a cashbox with an adding machine to register transactions', 'name': 'cash_register'}, {'frequency': 'r', 'id': 226, 'synset': 'casserole.n.01', 'synonyms': ['casserole'], 'def': 'food cooked and served in a casserole', 'name': 'casserole'}, {'frequency': 'r', 'id': 227, 'synset': 'cassette.n.01', 'synonyms': ['cassette'], 'def': 'a container that holds a magnetic tape used for recording or playing sound or video', 'name': 'cassette'}, {'frequency': 'c', 'id': 228, 'synset': 'cast.n.05', 'synonyms': ['cast', 'plaster_cast', 'plaster_bandage'], 'def': 'bandage consisting of a firm covering that immobilizes broken bones while they heal', 'name': 'cast'}, {'frequency': 'f', 'id': 229, 'synset': 'cat.n.01', 'synonyms': ['cat'], 'def': 'a domestic house cat', 'name': 'cat'}, {'frequency': 'c', 'id': 230, 'synset': 'cauliflower.n.02', 'synonyms': ['cauliflower'], 'def': 'edible compact head of white undeveloped flowers', 'name': 'cauliflower'}, {'frequency': 'r', 'id': 231, 'synset': 'caviar.n.01', 'synonyms': ['caviar', 'caviare'], 'def': "salted roe of sturgeon or other large fish; usually served as an hors d'oeuvre", 'name': 'caviar'}, {'frequency': 'c', 'id': 232, 'synset': 'cayenne.n.02', 'synonyms': ['cayenne_(spice)', 'cayenne_pepper_(spice)', 'red_pepper_(spice)'], 'def': 'ground pods and seeds of pungent red peppers of the genus Capsicum', 'name': 'cayenne_(spice)'}, {'frequency': 'c', 'id': 233, 'synset': 'cd_player.n.01', 'synonyms': ['CD_player'], 'def': 'electronic equipment for playing compact discs (CDs)', 'name': 'CD_player'}, {'frequency': 'c', 'id': 234, 'synset': 'celery.n.01', 'synonyms': ['celery'], 'def': 'widely cultivated herb with aromatic leaf stalks that are eaten raw or cooked', 'name': 'celery'}, {'frequency': 'f', 'id': 235, 'synset': 'cellular_telephone.n.01', 'synonyms': ['cellular_telephone', 'cellular_phone', 'cellphone', 'mobile_phone', 'smart_phone'], 'def': 'a hand-held mobile telephone', 'name': 'cellular_telephone'}, {'frequency': 'r', 'id': 236, 'synset': 'chain_mail.n.01', 'synonyms': ['chain_mail', 'ring_mail', 'chain_armor', 'chain_armour', 'ring_armor', 'ring_armour'], 'def': '(Middle Ages) flexible armor made of interlinked metal rings', 'name': 'chain_mail'}, {'frequency': 'f', 'id': 237, 'synset': 'chair.n.01', 'synonyms': ['chair'], 'def': 'a seat for one person, with a support for the back', 'name': 'chair'}, {'frequency': 'r', 'id': 238, 'synset': 'chaise_longue.n.01', 'synonyms': ['chaise_longue', 'chaise', 'daybed'], 'def': 'a long chair; for reclining', 'name': 'chaise_longue'}, {'frequency': 'r', 'id': 239, 'synset': 'champagne.n.01', 'synonyms': ['champagne'], 'def': 'a white sparkling wine produced in Champagne or resembling that produced there', 'name': 'champagne'}, {'frequency': 'f', 'id': 240, 'synset': 'chandelier.n.01', 'synonyms': ['chandelier'], 'def': 'branched lighting fixture; often ornate; hangs from the ceiling', 'name': 'chandelier'}, {'frequency': 'r', 'id': 241, 'synset': 'chap.n.04', 'synonyms': ['chap'], 'def': 'leather leggings without a seat; worn over trousers by cowboys to protect their legs', 'name': 'chap'}, {'frequency': 'r', 'id': 242, 'synset': 'checkbook.n.01', 'synonyms': ['checkbook', 'chequebook'], 'def': 'a book issued to holders of checking accounts', 'name': 'checkbook'}, {'frequency': 'r', 'id': 243, 'synset': 'checkerboard.n.01', 'synonyms': ['checkerboard'], 'def': 'a board having 64 squares of two alternating colors', 'name': 'checkerboard'}, {'frequency': 'c', 'id': 244, 'synset': 'cherry.n.03', 'synonyms': ['cherry'], 'def': 'a red fruit with a single hard stone', 'name': 'cherry'}, {'frequency': 'r', 'id': 245, 'synset': 'chessboard.n.01', 'synonyms': ['chessboard'], 'def': 'a checkerboard used to play chess', 'name': 'chessboard'}, {'frequency': 'r', 'id': 246, 'synset': 'chest_of_drawers.n.01', 'synonyms': ['chest_of_drawers_(furniture)', 'bureau_(furniture)', 'chest_(furniture)'], 'def': 'furniture with drawers for keeping clothes', 'name': 'chest_of_drawers_(furniture)'}, {'frequency': 'c', 'id': 247, 'synset': 'chicken.n.02', 'synonyms': ['chicken_(animal)'], 'def': 'a domestic fowl bred for flesh or eggs', 'name': 'chicken_(animal)'}, {'frequency': 'c', 'id': 248, 'synset': 'chicken_wire.n.01', 'synonyms': ['chicken_wire'], 'def': 'a galvanized wire network with a hexagonal mesh; used to build fences', 'name': 'chicken_wire'}, {'frequency': 'r', 'id': 249, 'synset': 'chickpea.n.01', 'synonyms': ['chickpea', 'garbanzo'], 'def': 'the seed of the chickpea plant; usually dried', 'name': 'chickpea'}, {'frequency': 'r', 'id': 250, 'synset': 'chihuahua.n.03', 'synonyms': ['Chihuahua'], 'def': 'an old breed of tiny short-haired dog with protruding eyes from Mexico', 'name': 'Chihuahua'}, {'frequency': 'r', 'id': 251, 'synset': 'chili.n.02', 'synonyms': ['chili_(vegetable)', 'chili_pepper_(vegetable)', 'chilli_(vegetable)', 'chilly_(vegetable)', 'chile_(vegetable)'], 'def': 'very hot and finely tapering pepper of special pungency', 'name': 'chili_(vegetable)'}, {'frequency': 'r', 'id': 252, 'synset': 'chime.n.01', 'synonyms': ['chime', 'gong'], 'def': 'an instrument consisting of a set of bells that are struck with a hammer', 'name': 'chime'}, {'frequency': 'r', 'id': 253, 'synset': 'chinaware.n.01', 'synonyms': ['chinaware'], 'def': 'dishware made of high quality porcelain', 'name': 'chinaware'}, {'frequency': 'c', 'id': 254, 'synset': 'chip.n.04', 'synonyms': ['crisp_(potato_chip)', 'potato_chip'], 'def': 'a thin crisp slice of potato fried in deep fat', 'name': 'crisp_(potato_chip)'}, {'frequency': 'r', 'id': 255, 'synset': 'chip.n.06', 'synonyms': ['poker_chip'], 'def': 'a small disk-shaped counter used to represent money when gambling', 'name': 'poker_chip'}, {'frequency': 'c', 'id': 256, 'synset': 'chocolate_bar.n.01', 'synonyms': ['chocolate_bar'], 'def': 'a bar of chocolate candy', 'name': 'chocolate_bar'}, {'frequency': 'c', 'id': 257, 'synset': 'chocolate_cake.n.01', 'synonyms': ['chocolate_cake'], 'def': 'cake containing chocolate', 'name': 'chocolate_cake'}, {'frequency': 'r', 'id': 258, 'synset': 'chocolate_milk.n.01', 'synonyms': ['chocolate_milk'], 'def': 'milk flavored with chocolate syrup', 'name': 'chocolate_milk'}, {'frequency': 'r', 'id': 259, 'synset': 'chocolate_mousse.n.01', 'synonyms': ['chocolate_mousse'], 'def': 'dessert mousse made with chocolate', 'name': 'chocolate_mousse'}, {'frequency': 'f', 'id': 260, 'synset': 'choker.n.03', 'synonyms': ['choker', 'collar', 'neckband'], 'def': 'necklace that fits tightly around the neck', 'name': 'choker'}, {'frequency': 'f', 'id': 261, 'synset': 'chopping_board.n.01', 'synonyms': ['chopping_board', 'cutting_board', 'chopping_block'], 'def': 'a wooden board where meats or vegetables can be cut', 'name': 'chopping_board'}, {'frequency': 'c', 'id': 262, 'synset': 'chopstick.n.01', 'synonyms': ['chopstick'], 'def': 'one of a pair of slender sticks used as oriental tableware to eat food with', 'name': 'chopstick'}, {'frequency': 'f', 'id': 263, 'synset': 'christmas_tree.n.05', 'synonyms': ['Christmas_tree'], 'def': 'an ornamented evergreen used as a Christmas decoration', 'name': 'Christmas_tree'}, {'frequency': 'c', 'id': 264, 'synset': 'chute.n.02', 'synonyms': ['slide'], 'def': 'sloping channel through which things can descend', 'name': 'slide'}, {'frequency': 'r', 'id': 265, 'synset': 'cider.n.01', 'synonyms': ['cider', 'cyder'], 'def': 'a beverage made from juice pressed from apples', 'name': 'cider'}, {'frequency': 'r', 'id': 266, 'synset': 'cigar_box.n.01', 'synonyms': ['cigar_box'], 'def': 'a box for holding cigars', 'name': 'cigar_box'}, {'frequency': 'c', 'id': 267, 'synset': 'cigarette.n.01', 'synonyms': ['cigarette'], 'def': 'finely ground tobacco wrapped in paper; for smoking', 'name': 'cigarette'}, {'frequency': 'c', 'id': 268, 'synset': 'cigarette_case.n.01', 'synonyms': ['cigarette_case', 'cigarette_pack'], 'def': 'a small flat case for holding cigarettes', 'name': 'cigarette_case'}, {'frequency': 'f', 'id': 269, 'synset': 'cistern.n.02', 'synonyms': ['cistern', 'water_tank'], 'def': 'a tank that holds the water used to flush a toilet', 'name': 'cistern'}, {'frequency': 'r', 'id': 270, 'synset': 'clarinet.n.01', 'synonyms': ['clarinet'], 'def': 'a single-reed instrument with a straight tube', 'name': 'clarinet'}, {'frequency': 'r', 'id': 271, 'synset': 'clasp.n.01', 'synonyms': ['clasp'], 'def': 'a fastener (as a buckle or hook) that is used to hold two things together', 'name': 'clasp'}, {'frequency': 'c', 'id': 272, 'synset': 'cleansing_agent.n.01', 'synonyms': ['cleansing_agent', 'cleanser', 'cleaner'], 'def': 'a preparation used in cleaning something', 'name': 'cleansing_agent'}, {'frequency': 'r', 'id': 273, 'synset': 'clementine.n.01', 'synonyms': ['clementine'], 'def': 'a variety of mandarin orange', 'name': 'clementine'}, {'frequency': 'c', 'id': 274, 'synset': 'clip.n.03', 'synonyms': ['clip'], 'def': 'any of various small fasteners used to hold loose articles together', 'name': 'clip'}, {'frequency': 'c', 'id': 275, 'synset': 'clipboard.n.01', 'synonyms': ['clipboard'], 'def': 'a small writing board with a clip at the top for holding papers', 'name': 'clipboard'}, {'frequency': 'f', 'id': 276, 'synset': 'clock.n.01', 'synonyms': ['clock', 'timepiece', 'timekeeper'], 'def': 'a timepiece that shows the time of day', 'name': 'clock'}, {'frequency': 'f', 'id': 277, 'synset': 'clock_tower.n.01', 'synonyms': ['clock_tower'], 'def': 'a tower with a large clock visible high up on an outside face', 'name': 'clock_tower'}, {'frequency': 'c', 'id': 278, 'synset': 'clothes_hamper.n.01', 'synonyms': ['clothes_hamper', 'laundry_basket', 'clothes_basket'], 'def': 'a hamper that holds dirty clothes to be washed or wet clothes to be dried', 'name': 'clothes_hamper'}, {'frequency': 'c', 'id': 279, 'synset': 'clothespin.n.01', 'synonyms': ['clothespin', 'clothes_peg'], 'def': 'wood or plastic fastener; for holding clothes on a clothesline', 'name': 'clothespin'}, {'frequency': 'r', 'id': 280, 'synset': 'clutch_bag.n.01', 'synonyms': ['clutch_bag'], 'def': "a woman's strapless purse that is carried in the hand", 'name': 'clutch_bag'}, {'frequency': 'f', 'id': 281, 'synset': 'coaster.n.03', 'synonyms': ['coaster'], 'def': 'a covering (plate or mat) that protects the surface of a table', 'name': 'coaster'}, {'frequency': 'f', 'id': 282, 'synset': 'coat.n.01', 'synonyms': ['coat'], 'def': 'an outer garment that has sleeves and covers the body from shoulder down', 'name': 'coat'}, {'frequency': 'c', 'id': 283, 'synset': 'coat_hanger.n.01', 'synonyms': ['coat_hanger', 'clothes_hanger', 'dress_hanger'], 'def': "a hanger that is shaped like a person's shoulders", 'name': 'coat_hanger'}, {'frequency': 'r', 'id': 284, 'synset': 'coatrack.n.01', 'synonyms': ['coatrack', 'hatrack'], 'def': 'a rack with hooks for temporarily holding coats and hats', 'name': 'coatrack'}, {'frequency': 'c', 'id': 285, 'synset': 'cock.n.04', 'synonyms': ['cock', 'rooster'], 'def': 'adult male chicken', 'name': 'cock'}, {'frequency': 'c', 'id': 286, 'synset': 'coconut.n.02', 'synonyms': ['coconut', 'cocoanut'], 'def': 'large hard-shelled brown oval nut with a fibrous husk', 'name': 'coconut'}, {'frequency': 'r', 'id': 287, 'synset': 'coffee_filter.n.01', 'synonyms': ['coffee_filter'], 'def': 'filter (usually of paper) that passes the coffee and retains the coffee grounds', 'name': 'coffee_filter'}, {'frequency': 'f', 'id': 288, 'synset': 'coffee_maker.n.01', 'synonyms': ['coffee_maker', 'coffee_machine'], 'def': 'a kitchen appliance for brewing coffee automatically', 'name': 'coffee_maker'}, {'frequency': 'f', 'id': 289, 'synset': 'coffee_table.n.01', 'synonyms': ['coffee_table', 'cocktail_table'], 'def': 'low table where magazines can be placed and coffee or cocktails are served', 'name': 'coffee_table'}, {'frequency': 'c', 'id': 290, 'synset': 'coffeepot.n.01', 'synonyms': ['coffeepot'], 'def': 'tall pot in which coffee is brewed', 'name': 'coffeepot'}, {'frequency': 'r', 'id': 291, 'synset': 'coil.n.05', 'synonyms': ['coil'], 'def': 'tubing that is wound in a spiral', 'name': 'coil'}, {'frequency': 'c', 'id': 292, 'synset': 'coin.n.01', 'synonyms': ['coin'], 'def': 'a flat metal piece (usually a disc) used as money', 'name': 'coin'}, {'frequency': 'r', 'id': 293, 'synset': 'colander.n.01', 'synonyms': ['colander', 'cullender'], 'def': 'bowl-shaped strainer; used to wash or drain foods', 'name': 'colander'}, {'frequency': 'c', 'id': 294, 'synset': 'coleslaw.n.01', 'synonyms': ['coleslaw', 'slaw'], 'def': 'basically shredded cabbage', 'name': 'coleslaw'}, {'frequency': 'r', 'id': 295, 'synset': 'coloring_material.n.01', 'synonyms': ['coloring_material', 'colouring_material'], 'def': 'any material used for its color', 'name': 'coloring_material'}, {'frequency': 'r', 'id': 296, 'synset': 'combination_lock.n.01', 'synonyms': ['combination_lock'], 'def': 'lock that can be opened only by turning dials in a special sequence', 'name': 'combination_lock'}, {'frequency': 'c', 'id': 297, 'synset': 'comforter.n.04', 'synonyms': ['pacifier', 'teething_ring'], 'def': 'device used for an infant to suck or bite on', 'name': 'pacifier'}, {'frequency': 'r', 'id': 298, 'synset': 'comic_book.n.01', 'synonyms': ['comic_book'], 'def': 'a magazine devoted to comic strips', 'name': 'comic_book'}, {'frequency': 'f', 'id': 299, 'synset': 'computer_keyboard.n.01', 'synonyms': ['computer_keyboard', 'keyboard_(computer)'], 'def': 'a keyboard that is a data input device for computers', 'name': 'computer_keyboard'}, {'frequency': 'r', 'id': 300, 'synset': 'concrete_mixer.n.01', 'synonyms': ['concrete_mixer', 'cement_mixer'], 'def': 'a machine with a large revolving drum in which cement/concrete is mixed', 'name': 'concrete_mixer'}, {'frequency': 'f', 'id': 301, 'synset': 'cone.n.01', 'synonyms': ['cone', 'traffic_cone'], 'def': 'a cone-shaped object used to direct traffic', 'name': 'cone'}, {'frequency': 'f', 'id': 302, 'synset': 'control.n.09', 'synonyms': ['control', 'controller'], 'def': 'a mechanism that controls the operation of a machine', 'name': 'control'}, {'frequency': 'r', 'id': 303, 'synset': 'convertible.n.01', 'synonyms': ['convertible_(automobile)'], 'def': 'a car that has top that can be folded or removed', 'name': 'convertible_(automobile)'}, {'frequency': 'r', 'id': 304, 'synset': 'convertible.n.03', 'synonyms': ['sofa_bed'], 'def': 'a sofa that can be converted into a bed', 'name': 'sofa_bed'}, {'frequency': 'c', 'id': 305, 'synset': 'cookie.n.01', 'synonyms': ['cookie', 'cooky', 'biscuit_(cookie)'], 'def': "any of various small flat sweet cakes (`biscuit' is the British term)", 'name': 'cookie'}, {'frequency': 'r', 'id': 306, 'synset': 'cookie_jar.n.01', 'synonyms': ['cookie_jar', 'cooky_jar'], 'def': 'a jar in which cookies are kept (and sometimes money is hidden)', 'name': 'cookie_jar'}, {'frequency': 'r', 'id': 307, 'synset': 'cooking_utensil.n.01', 'synonyms': ['cooking_utensil'], 'def': 'a kitchen utensil made of material that does not melt easily; used for cooking', 'name': 'cooking_utensil'}, {'frequency': 'f', 'id': 308, 'synset': 'cooler.n.01', 'synonyms': ['cooler_(for_food)', 'ice_chest'], 'def': 'an insulated box for storing food often with ice', 'name': 'cooler_(for_food)'}, {'frequency': 'c', 'id': 309, 'synset': 'cork.n.04', 'synonyms': ['cork_(bottle_plug)', 'bottle_cork'], 'def': 'the plug in the mouth of a bottle (especially a wine bottle)', 'name': 'cork_(bottle_plug)'}, {'frequency': 'r', 'id': 310, 'synset': 'corkboard.n.01', 'synonyms': ['corkboard'], 'def': 'a sheet consisting of cork granules', 'name': 'corkboard'}, {'frequency': 'r', 'id': 311, 'synset': 'corkscrew.n.01', 'synonyms': ['corkscrew', 'bottle_screw'], 'def': 'a bottle opener that pulls corks', 'name': 'corkscrew'}, {'frequency': 'c', 'id': 312, 'synset': 'corn.n.03', 'synonyms': ['edible_corn', 'corn', 'maize'], 'def': 'ears of corn that can be prepared and served for human food', 'name': 'edible_corn'}, {'frequency': 'r', 'id': 313, 'synset': 'cornbread.n.01', 'synonyms': ['cornbread'], 'def': 'bread made primarily of cornmeal', 'name': 'cornbread'}, {'frequency': 'c', 'id': 314, 'synset': 'cornet.n.01', 'synonyms': ['cornet', 'horn', 'trumpet'], 'def': 'a brass musical instrument with a narrow tube and a flared bell and many valves', 'name': 'cornet'}, {'frequency': 'c', 'id': 315, 'synset': 'cornice.n.01', 'synonyms': ['cornice', 'valance', 'valance_board', 'pelmet'], 'def': 'a decorative framework to conceal curtain fixtures at the top of a window casing', 'name': 'cornice'}, {'frequency': 'r', 'id': 316, 'synset': 'cornmeal.n.01', 'synonyms': ['cornmeal'], 'def': 'coarsely ground corn', 'name': 'cornmeal'}, {'frequency': 'r', 'id': 317, 'synset': 'corset.n.01', 'synonyms': ['corset', 'girdle'], 'def': "a woman's close-fitting foundation garment", 'name': 'corset'}, {'frequency': 'r', 'id': 318, 'synset': 'cos.n.02', 'synonyms': ['romaine_lettuce'], 'def': 'lettuce with long dark-green leaves in a loosely packed elongated head', 'name': 'romaine_lettuce'}, {'frequency': 'c', 'id': 319, 'synset': 'costume.n.04', 'synonyms': ['costume'], 'def': 'the attire characteristic of a country or a time or a social class', 'name': 'costume'}, {'frequency': 'r', 'id': 320, 'synset': 'cougar.n.01', 'synonyms': ['cougar', 'puma', 'catamount', 'mountain_lion', 'panther'], 'def': 'large American feline resembling a lion', 'name': 'cougar'}, {'frequency': 'r', 'id': 321, 'synset': 'coverall.n.01', 'synonyms': ['coverall'], 'def': 'a loose-fitting protective garment that is worn over other clothing', 'name': 'coverall'}, {'frequency': 'r', 'id': 322, 'synset': 'cowbell.n.01', 'synonyms': ['cowbell'], 'def': 'a bell hung around the neck of cow so that the cow can be easily located', 'name': 'cowbell'}, {'frequency': 'f', 'id': 323, 'synset': 'cowboy_hat.n.01', 'synonyms': ['cowboy_hat', 'ten-gallon_hat'], 'def': 'a hat with a wide brim and a soft crown; worn by American ranch hands', 'name': 'cowboy_hat'}, {'frequency': 'r', 'id': 324, 'synset': 'crab.n.01', 'synonyms': ['crab_(animal)'], 'def': 'decapod having eyes on short stalks and a broad flattened shell and pincers', 'name': 'crab_(animal)'}, {'frequency': 'c', 'id': 325, 'synset': 'cracker.n.01', 'synonyms': ['cracker'], 'def': 'a thin crisp wafer', 'name': 'cracker'}, {'frequency': 'r', 'id': 326, 'synset': 'crape.n.01', 'synonyms': ['crape', 'crepe', 'French_pancake'], 'def': 'small very thin pancake', 'name': 'crape'}, {'frequency': 'f', 'id': 327, 'synset': 'crate.n.01', 'synonyms': ['crate'], 'def': 'a rugged box (usually made of wood); used for shipping', 'name': 'crate'}, {'frequency': 'r', 'id': 328, 'synset': 'crayon.n.01', 'synonyms': ['crayon', 'wax_crayon'], 'def': 'writing or drawing implement made of a colored stick of composition wax', 'name': 'crayon'}, {'frequency': 'r', 'id': 329, 'synset': 'cream_pitcher.n.01', 'synonyms': ['cream_pitcher'], 'def': 'a small pitcher for serving cream', 'name': 'cream_pitcher'}, {'frequency': 'r', 'id': 330, 'synset': 'credit_card.n.01', 'synonyms': ['credit_card', 'charge_card', 'debit_card'], 'def': 'a card, usually plastic, used to pay for goods and services', 'name': 'credit_card'}, {'frequency': 'c', 'id': 331, 'synset': 'crescent_roll.n.01', 'synonyms': ['crescent_roll', 'croissant'], 'def': 'very rich flaky crescent-shaped roll', 'name': 'crescent_roll'}, {'frequency': 'c', 'id': 332, 'synset': 'crib.n.01', 'synonyms': ['crib', 'cot'], 'def': 'baby bed with high sides made of slats', 'name': 'crib'}, {'frequency': 'c', 'id': 333, 'synset': 'crock.n.03', 'synonyms': ['crock_pot', 'earthenware_jar'], 'def': 'an earthen jar (made of baked clay)', 'name': 'crock_pot'}, {'frequency': 'f', 'id': 334, 'synset': 'crossbar.n.01', 'synonyms': ['crossbar'], 'def': 'a horizontal bar that goes across something', 'name': 'crossbar'}, {'frequency': 'r', 'id': 335, 'synset': 'crouton.n.01', 'synonyms': ['crouton'], 'def': 'a small piece of toasted or fried bread; served in soup or salads', 'name': 'crouton'}, {'frequency': 'r', 'id': 336, 'synset': 'crow.n.01', 'synonyms': ['crow'], 'def': 'black birds having a raucous call', 'name': 'crow'}, {'frequency': 'c', 'id': 337, 'synset': 'crown.n.04', 'synonyms': ['crown'], 'def': 'an ornamental jeweled headdress signifying sovereignty', 'name': 'crown'}, {'frequency': 'c', 'id': 338, 'synset': 'crucifix.n.01', 'synonyms': ['crucifix'], 'def': 'representation of the cross on which Jesus died', 'name': 'crucifix'}, {'frequency': 'c', 'id': 339, 'synset': 'cruise_ship.n.01', 'synonyms': ['cruise_ship', 'cruise_liner'], 'def': 'a passenger ship used commercially for pleasure cruises', 'name': 'cruise_ship'}, {'frequency': 'c', 'id': 340, 'synset': 'cruiser.n.01', 'synonyms': ['police_cruiser', 'patrol_car', 'police_car', 'squad_car'], 'def': 'a car in which policemen cruise the streets', 'name': 'police_cruiser'}, {'frequency': 'c', 'id': 341, 'synset': 'crumb.n.03', 'synonyms': ['crumb'], 'def': 'small piece of e.g. bread or cake', 'name': 'crumb'}, {'frequency': 'r', 'id': 342, 'synset': 'crutch.n.01', 'synonyms': ['crutch'], 'def': 'a wooden or metal staff that fits under the armpit and reaches to the ground', 'name': 'crutch'}, {'frequency': 'c', 'id': 343, 'synset': 'cub.n.03', 'synonyms': ['cub_(animal)'], 'def': 'the young of certain carnivorous mammals such as the bear or wolf or lion', 'name': 'cub_(animal)'}, {'frequency': 'r', 'id': 344, 'synset': 'cube.n.05', 'synonyms': ['cube', 'square_block'], 'def': 'a block in the (approximate) shape of a cube', 'name': 'cube'}, {'frequency': 'f', 'id': 345, 'synset': 'cucumber.n.02', 'synonyms': ['cucumber', 'cuke'], 'def': 'cylindrical green fruit with thin green rind and white flesh eaten as a vegetable', 'name': 'cucumber'}, {'frequency': 'c', 'id': 346, 'synset': 'cufflink.n.01', 'synonyms': ['cufflink'], 'def': 'jewelry consisting of linked buttons used to fasten the cuffs of a shirt', 'name': 'cufflink'}, {'frequency': 'f', 'id': 347, 'synset': 'cup.n.01', 'synonyms': ['cup'], 'def': 'a small open container usually used for drinking; usually has a handle', 'name': 'cup'}, {'frequency': 'c', 'id': 348, 'synset': 'cup.n.08', 'synonyms': ['trophy_cup'], 'def': 'a metal vessel with handles that is awarded as a trophy to a competition winner', 'name': 'trophy_cup'}, {'frequency': 'c', 'id': 349, 'synset': 'cupcake.n.01', 'synonyms': ['cupcake'], 'def': 'small cake baked in a muffin tin', 'name': 'cupcake'}, {'frequency': 'r', 'id': 350, 'synset': 'curler.n.01', 'synonyms': ['hair_curler', 'hair_roller', 'hair_crimper'], 'def': 'a cylindrical tube around which the hair is wound to curl it', 'name': 'hair_curler'}, {'frequency': 'r', 'id': 351, 'synset': 'curling_iron.n.01', 'synonyms': ['curling_iron'], 'def': 'a cylindrical home appliance that heats hair that has been curled around it', 'name': 'curling_iron'}, {'frequency': 'f', 'id': 352, 'synset': 'curtain.n.01', 'synonyms': ['curtain', 'drapery'], 'def': 'hanging cloth used as a blind (especially for a window)', 'name': 'curtain'}, {'frequency': 'f', 'id': 353, 'synset': 'cushion.n.03', 'synonyms': ['cushion'], 'def': 'a soft bag filled with air or padding such as feathers or foam rubber', 'name': 'cushion'}, {'frequency': 'r', 'id': 354, 'synset': 'custard.n.01', 'synonyms': ['custard'], 'def': 'sweetened mixture of milk and eggs baked or boiled or frozen', 'name': 'custard'}, {'frequency': 'c', 'id': 355, 'synset': 'cutter.n.06', 'synonyms': ['cutting_tool'], 'def': 'a cutting implement; a tool for cutting', 'name': 'cutting_tool'}, {'frequency': 'r', 'id': 356, 'synset': 'cylinder.n.04', 'synonyms': ['cylinder'], 'def': 'a cylindrical container', 'name': 'cylinder'}, {'frequency': 'r', 'id': 357, 'synset': 'cymbal.n.01', 'synonyms': ['cymbal'], 'def': 'a percussion instrument consisting of a concave brass disk', 'name': 'cymbal'}, {'frequency': 'r', 'id': 358, 'synset': 'dachshund.n.01', 'synonyms': ['dachshund', 'dachsie', 'badger_dog'], 'def': 'small long-bodied short-legged breed of dog having a short sleek coat and long drooping ears', 'name': 'dachshund'}, {'frequency': 'r', 'id': 359, 'synset': 'dagger.n.01', 'synonyms': ['dagger'], 'def': 'a short knife with a pointed blade used for piercing or stabbing', 'name': 'dagger'}, {'frequency': 'r', 'id': 360, 'synset': 'dartboard.n.01', 'synonyms': ['dartboard'], 'def': 'a circular board of wood or cork used as the target in the game of darts', 'name': 'dartboard'}, {'frequency': 'r', 'id': 361, 'synset': 'date.n.08', 'synonyms': ['date_(fruit)'], 'def': 'sweet edible fruit of the date palm with a single long woody seed', 'name': 'date_(fruit)'}, {'frequency': 'f', 'id': 362, 'synset': 'deck_chair.n.01', 'synonyms': ['deck_chair', 'beach_chair'], 'def': 'a folding chair for use outdoors; a wooden frame supports a length of canvas', 'name': 'deck_chair'}, {'frequency': 'c', 'id': 363, 'synset': 'deer.n.01', 'synonyms': ['deer', 'cervid'], 'def': "distinguished from Bovidae by the male's having solid deciduous antlers", 'name': 'deer'}, {'frequency': 'c', 'id': 364, 'synset': 'dental_floss.n.01', 'synonyms': ['dental_floss', 'floss'], 'def': 'a soft thread for cleaning the spaces between the teeth', 'name': 'dental_floss'}, {'frequency': 'f', 'id': 365, 'synset': 'desk.n.01', 'synonyms': ['desk'], 'def': 'a piece of furniture with a writing surface and usually drawers or other compartments', 'name': 'desk'}, {'frequency': 'r', 'id': 366, 'synset': 'detergent.n.01', 'synonyms': ['detergent'], 'def': 'a surface-active chemical widely used in industry and laundering', 'name': 'detergent'}, {'frequency': 'c', 'id': 367, 'synset': 'diaper.n.01', 'synonyms': ['diaper'], 'def': 'garment consisting of a folded cloth drawn up between the legs and fastened at the waist', 'name': 'diaper'}, {'frequency': 'r', 'id': 368, 'synset': 'diary.n.01', 'synonyms': ['diary', 'journal'], 'def': 'a daily written record of (usually personal) experiences and observations', 'name': 'diary'}, {'frequency': 'r', 'id': 369, 'synset': 'die.n.01', 'synonyms': ['die', 'dice'], 'def': 'a small cube with 1 to 6 spots on the six faces; used in gambling', 'name': 'die'}, {'frequency': 'r', 'id': 370, 'synset': 'dinghy.n.01', 'synonyms': ['dinghy', 'dory', 'rowboat'], 'def': 'a small boat of shallow draft with seats and oars with which it is propelled', 'name': 'dinghy'}, {'frequency': 'f', 'id': 371, 'synset': 'dining_table.n.01', 'synonyms': ['dining_table'], 'def': 'a table at which meals are served', 'name': 'dining_table'}, {'frequency': 'r', 'id': 372, 'synset': 'dinner_jacket.n.01', 'synonyms': ['tux', 'tuxedo'], 'def': 'semiformal evening dress for men', 'name': 'tux'}, {'frequency': 'c', 'id': 373, 'synset': 'dish.n.01', 'synonyms': ['dish'], 'def': 'a piece of dishware normally used as a container for holding or serving food', 'name': 'dish'}, {'frequency': 'c', 'id': 374, 'synset': 'dish.n.05', 'synonyms': ['dish_antenna'], 'def': 'directional antenna consisting of a parabolic reflector', 'name': 'dish_antenna'}, {'frequency': 'c', 'id': 375, 'synset': 'dishrag.n.01', 'synonyms': ['dishrag', 'dishcloth'], 'def': 'a cloth for washing dishes', 'name': 'dishrag'}, {'frequency': 'c', 'id': 376, 'synset': 'dishtowel.n.01', 'synonyms': ['dishtowel', 'tea_towel'], 'def': 'a towel for drying dishes', 'name': 'dishtowel'}, {'frequency': 'f', 'id': 377, 'synset': 'dishwasher.n.01', 'synonyms': ['dishwasher', 'dishwashing_machine'], 'def': 'a machine for washing dishes', 'name': 'dishwasher'}, {'frequency': 'r', 'id': 378, 'synset': 'dishwasher_detergent.n.01', 'synonyms': ['dishwasher_detergent', 'dishwashing_detergent', 'dishwashing_liquid'], 'def': 'a low-sudsing detergent designed for use in dishwashers', 'name': 'dishwasher_detergent'}, {'frequency': 'r', 'id': 379, 'synset': 'diskette.n.01', 'synonyms': ['diskette', 'floppy', 'floppy_disk'], 'def': 'a small plastic magnetic disk enclosed in a stiff envelope used to store data', 'name': 'diskette'}, {'frequency': 'c', 'id': 380, 'synset': 'dispenser.n.01', 'synonyms': ['dispenser'], 'def': 'a container so designed that the contents can be used in prescribed amounts', 'name': 'dispenser'}, {'frequency': 'c', 'id': 381, 'synset': 'dixie_cup.n.01', 'synonyms': ['Dixie_cup', 'paper_cup'], 'def': 'a disposable cup made of paper; for holding drinks', 'name': 'Dixie_cup'}, {'frequency': 'f', 'id': 382, 'synset': 'dog.n.01', 'synonyms': ['dog'], 'def': 'a common domesticated dog', 'name': 'dog'}, {'frequency': 'f', 'id': 383, 'synset': 'dog_collar.n.01', 'synonyms': ['dog_collar'], 'def': 'a collar for a dog', 'name': 'dog_collar'}, {'frequency': 'c', 'id': 384, 'synset': 'doll.n.01', 'synonyms': ['doll'], 'def': 'a toy replica of a HUMAN (NOT AN ANIMAL)', 'name': 'doll'}, {'frequency': 'r', 'id': 385, 'synset': 'dollar.n.02', 'synonyms': ['dollar', 'dollar_bill', 'one_dollar_bill'], 'def': 'a piece of paper money worth one dollar', 'name': 'dollar'}, {'frequency': 'r', 'id': 386, 'synset': 'dolphin.n.02', 'synonyms': ['dolphin'], 'def': 'any of various small toothed whales with a beaklike snout; larger than porpoises', 'name': 'dolphin'}, {'frequency': 'c', 'id': 387, 'synset': 'domestic_ass.n.01', 'synonyms': ['domestic_ass', 'donkey'], 'def': 'domestic beast of burden descended from the African wild ass; patient but stubborn', 'name': 'domestic_ass'}, {'frequency': 'r', 'id': 388, 'synset': 'domino.n.03', 'synonyms': ['eye_mask'], 'def': 'a mask covering the upper part of the face but with holes for the eyes', 'name': 'eye_mask'}, {'frequency': 'r', 'id': 389, 'synset': 'doorbell.n.01', 'synonyms': ['doorbell', 'buzzer'], 'def': 'a button at an outer door that gives a ringing or buzzing signal when pushed', 'name': 'doorbell'}, {'frequency': 'f', 'id': 390, 'synset': 'doorknob.n.01', 'synonyms': ['doorknob', 'doorhandle'], 'def': "a knob used to open a door (often called `doorhandle' in Great Britain)", 'name': 'doorknob'}, {'frequency': 'c', 'id': 391, 'synset': 'doormat.n.02', 'synonyms': ['doormat', 'welcome_mat'], 'def': 'a mat placed outside an exterior door for wiping the shoes before entering', 'name': 'doormat'}, {'frequency': 'f', 'id': 392, 'synset': 'doughnut.n.02', 'synonyms': ['doughnut', 'donut'], 'def': 'a small ring-shaped friedcake', 'name': 'doughnut'}, {'frequency': 'r', 'id': 393, 'synset': 'dove.n.01', 'synonyms': ['dove'], 'def': 'any of numerous small pigeons', 'name': 'dove'}, {'frequency': 'r', 'id': 394, 'synset': 'dragonfly.n.01', 'synonyms': ['dragonfly'], 'def': 'slender-bodied non-stinging insect having iridescent wings that are outspread at rest', 'name': 'dragonfly'}, {'frequency': 'f', 'id': 395, 'synset': 'drawer.n.01', 'synonyms': ['drawer'], 'def': 'a boxlike container in a piece of furniture; made so as to slide in and out', 'name': 'drawer'}, {'frequency': 'c', 'id': 396, 'synset': 'drawers.n.01', 'synonyms': ['underdrawers', 'boxers', 'boxershorts'], 'def': 'underpants worn by men', 'name': 'underdrawers'}, {'frequency': 'f', 'id': 397, 'synset': 'dress.n.01', 'synonyms': ['dress', 'frock'], 'def': 'a one-piece garment for a woman; has skirt and bodice', 'name': 'dress'}, {'frequency': 'c', 'id': 398, 'synset': 'dress_hat.n.01', 'synonyms': ['dress_hat', 'high_hat', 'opera_hat', 'silk_hat', 'top_hat'], 'def': "a man's hat with a tall crown; usually covered with silk or with beaver fur", 'name': 'dress_hat'}, {'frequency': 'c', 'id': 399, 'synset': 'dress_suit.n.01', 'synonyms': ['dress_suit'], 'def': 'formalwear consisting of full evening dress for men', 'name': 'dress_suit'}, {'frequency': 'c', 'id': 400, 'synset': 'dresser.n.05', 'synonyms': ['dresser'], 'def': 'a cabinet with shelves', 'name': 'dresser'}, {'frequency': 'c', 'id': 401, 'synset': 'drill.n.01', 'synonyms': ['drill'], 'def': 'a tool with a sharp rotating point for making holes in hard materials', 'name': 'drill'}, {'frequency': 'r', 'id': 402, 'synset': 'drinking_fountain.n.01', 'synonyms': ['drinking_fountain'], 'def': 'a public fountain to provide a jet of drinking water', 'name': 'drinking_fountain'}, {'frequency': 'r', 'id': 403, 'synset': 'drone.n.04', 'synonyms': ['drone'], 'def': 'an aircraft without a pilot that is operated by remote control', 'name': 'drone'}, {'frequency': 'r', 'id': 404, 'synset': 'dropper.n.01', 'synonyms': ['dropper', 'eye_dropper'], 'def': 'pipet consisting of a small tube with a vacuum bulb at one end for drawing liquid in and releasing it a drop at a time', 'name': 'dropper'}, {'frequency': 'c', 'id': 405, 'synset': 'drum.n.01', 'synonyms': ['drum_(musical_instrument)'], 'def': 'a musical percussion instrument; usually consists of a hollow cylinder with a membrane stretched across each end', 'name': 'drum_(musical_instrument)'}, {'frequency': 'r', 'id': 406, 'synset': 'drumstick.n.02', 'synonyms': ['drumstick'], 'def': 'a stick used for playing a drum', 'name': 'drumstick'}, {'frequency': 'f', 'id': 407, 'synset': 'duck.n.01', 'synonyms': ['duck'], 'def': 'small web-footed broad-billed swimming bird', 'name': 'duck'}, {'frequency': 'r', 'id': 408, 'synset': 'duckling.n.02', 'synonyms': ['duckling'], 'def': 'young duck', 'name': 'duckling'}, {'frequency': 'c', 'id': 409, 'synset': 'duct_tape.n.01', 'synonyms': ['duct_tape'], 'def': 'a wide silvery adhesive tape', 'name': 'duct_tape'}, {'frequency': 'f', 'id': 410, 'synset': 'duffel_bag.n.01', 'synonyms': ['duffel_bag', 'duffle_bag', 'duffel', 'duffle'], 'def': 'a large cylindrical bag of heavy cloth', 'name': 'duffel_bag'}, {'frequency': 'r', 'id': 411, 'synset': 'dumbbell.n.01', 'synonyms': ['dumbbell'], 'def': 'an exercising weight with two ball-like ends connected by a short handle', 'name': 'dumbbell'}, {'frequency': 'c', 'id': 412, 'synset': 'dumpster.n.01', 'synonyms': ['dumpster'], 'def': 'a container designed to receive and transport and dump waste', 'name': 'dumpster'}, {'frequency': 'r', 'id': 413, 'synset': 'dustpan.n.02', 'synonyms': ['dustpan'], 'def': 'a short-handled receptacle into which dust can be swept', 'name': 'dustpan'}, {'frequency': 'r', 'id': 414, 'synset': 'dutch_oven.n.02', 'synonyms': ['Dutch_oven'], 'def': 'iron or earthenware cooking pot; used for stews', 'name': 'Dutch_oven'}, {'frequency': 'c', 'id': 415, 'synset': 'eagle.n.01', 'synonyms': ['eagle'], 'def': 'large birds of prey noted for their broad wings and strong soaring flight', 'name': 'eagle'}, {'frequency': 'f', 'id': 416, 'synset': 'earphone.n.01', 'synonyms': ['earphone', 'earpiece', 'headphone'], 'def': 'device for listening to audio that is held over or inserted into the ear', 'name': 'earphone'}, {'frequency': 'r', 'id': 417, 'synset': 'earplug.n.01', 'synonyms': ['earplug'], 'def': 'a soft plug that is inserted into the ear canal to block sound', 'name': 'earplug'}, {'frequency': 'f', 'id': 418, 'synset': 'earring.n.01', 'synonyms': ['earring'], 'def': 'jewelry to ornament the ear', 'name': 'earring'}, {'frequency': 'c', 'id': 419, 'synset': 'easel.n.01', 'synonyms': ['easel'], 'def': "an upright tripod for displaying something (usually an artist's canvas)", 'name': 'easel'}, {'frequency': 'r', 'id': 420, 'synset': 'eclair.n.01', 'synonyms': ['eclair'], 'def': 'oblong cream puff', 'name': 'eclair'}, {'frequency': 'r', 'id': 421, 'synset': 'eel.n.01', 'synonyms': ['eel'], 'def': 'an elongate fish with fatty flesh', 'name': 'eel'}, {'frequency': 'f', 'id': 422, 'synset': 'egg.n.02', 'synonyms': ['egg', 'eggs'], 'def': 'oval reproductive body of a fowl (especially a hen) used as food', 'name': 'egg'}, {'frequency': 'r', 'id': 423, 'synset': 'egg_roll.n.01', 'synonyms': ['egg_roll', 'spring_roll'], 'def': 'minced vegetables and meat wrapped in a pancake and fried', 'name': 'egg_roll'}, {'frequency': 'c', 'id': 424, 'synset': 'egg_yolk.n.01', 'synonyms': ['egg_yolk', 'yolk_(egg)'], 'def': 'the yellow spherical part of an egg', 'name': 'egg_yolk'}, {'frequency': 'c', 'id': 425, 'synset': 'eggbeater.n.02', 'synonyms': ['eggbeater', 'eggwhisk'], 'def': 'a mixer for beating eggs or whipping cream', 'name': 'eggbeater'}, {'frequency': 'c', 'id': 426, 'synset': 'eggplant.n.01', 'synonyms': ['eggplant', 'aubergine'], 'def': 'egg-shaped vegetable having a shiny skin typically dark purple', 'name': 'eggplant'}, {'frequency': 'r', 'id': 427, 'synset': 'electric_chair.n.01', 'synonyms': ['electric_chair'], 'def': 'a chair-shaped instrument of execution by electrocution', 'name': 'electric_chair'}, {'frequency': 'f', 'id': 428, 'synset': 'electric_refrigerator.n.01', 'synonyms': ['refrigerator'], 'def': 'a refrigerator in which the coolant is pumped around by an electric motor', 'name': 'refrigerator'}, {'frequency': 'f', 'id': 429, 'synset': 'elephant.n.01', 'synonyms': ['elephant'], 'def': 'a common elephant', 'name': 'elephant'}, {'frequency': 'r', 'id': 430, 'synset': 'elk.n.01', 'synonyms': ['elk', 'moose'], 'def': 'large northern deer with enormous flattened antlers in the male', 'name': 'elk'}, {'frequency': 'c', 'id': 431, 'synset': 'envelope.n.01', 'synonyms': ['envelope'], 'def': 'a flat (usually rectangular) container for a letter, thin package, etc.', 'name': 'envelope'}, {'frequency': 'c', 'id': 432, 'synset': 'eraser.n.01', 'synonyms': ['eraser'], 'def': 'an implement used to erase something', 'name': 'eraser'}, {'frequency': 'r', 'id': 433, 'synset': 'escargot.n.01', 'synonyms': ['escargot'], 'def': 'edible snail usually served in the shell with a sauce of melted butter and garlic', 'name': 'escargot'}, {'frequency': 'r', 'id': 434, 'synset': 'eyepatch.n.01', 'synonyms': ['eyepatch'], 'def': 'a protective cloth covering for an injured eye', 'name': 'eyepatch'}, {'frequency': 'r', 'id': 435, 'synset': 'falcon.n.01', 'synonyms': ['falcon'], 'def': 'birds of prey having long pointed powerful wings adapted for swift flight', 'name': 'falcon'}, {'frequency': 'f', 'id': 436, 'synset': 'fan.n.01', 'synonyms': ['fan'], 'def': 'a device for creating a current of air by movement of a surface or surfaces', 'name': 'fan'}, {'frequency': 'f', 'id': 437, 'synset': 'faucet.n.01', 'synonyms': ['faucet', 'spigot', 'tap'], 'def': 'a regulator for controlling the flow of a liquid from a reservoir', 'name': 'faucet'}, {'frequency': 'r', 'id': 438, 'synset': 'fedora.n.01', 'synonyms': ['fedora'], 'def': 'a hat made of felt with a creased crown', 'name': 'fedora'}, {'frequency': 'r', 'id': 439, 'synset': 'ferret.n.02', 'synonyms': ['ferret'], 'def': 'domesticated albino variety of the European polecat bred for hunting rats and rabbits', 'name': 'ferret'}, {'frequency': 'c', 'id': 440, 'synset': 'ferris_wheel.n.01', 'synonyms': ['Ferris_wheel'], 'def': 'a large wheel with suspended seats that remain upright as the wheel rotates', 'name': 'Ferris_wheel'}, {'frequency': 'r', 'id': 441, 'synset': 'ferry.n.01', 'synonyms': ['ferry', 'ferryboat'], 'def': 'a boat that transports people or vehicles across a body of water and operates on a regular schedule', 'name': 'ferry'}, {'frequency': 'r', 'id': 442, 'synset': 'fig.n.04', 'synonyms': ['fig_(fruit)'], 'def': 'fleshy sweet pear-shaped yellowish or purple fruit eaten fresh or preserved or dried', 'name': 'fig_(fruit)'}, {'frequency': 'c', 'id': 443, 'synset': 'fighter.n.02', 'synonyms': ['fighter_jet', 'fighter_aircraft', 'attack_aircraft'], 'def': 'a high-speed military or naval airplane designed to destroy enemy targets', 'name': 'fighter_jet'}, {'frequency': 'f', 'id': 444, 'synset': 'figurine.n.01', 'synonyms': ['figurine'], 'def': 'a small carved or molded figure', 'name': 'figurine'}, {'frequency': 'c', 'id': 445, 'synset': 'file.n.03', 'synonyms': ['file_cabinet', 'filing_cabinet'], 'def': 'office furniture consisting of a container for keeping papers in order', 'name': 'file_cabinet'}, {'frequency': 'r', 'id': 446, 'synset': 'file.n.04', 'synonyms': ['file_(tool)'], 'def': 'a steel hand tool with small sharp teeth on some or all of its surfaces; used for smoothing wood or metal', 'name': 'file_(tool)'}, {'frequency': 'f', 'id': 447, 'synset': 'fire_alarm.n.02', 'synonyms': ['fire_alarm', 'smoke_alarm'], 'def': 'an alarm that is tripped off by fire or smoke', 'name': 'fire_alarm'}, {'frequency': 'c', 'id': 448, 'synset': 'fire_engine.n.01', 'synonyms': ['fire_engine', 'fire_truck'], 'def': 'large trucks that carry firefighters and equipment to the site of a fire', 'name': 'fire_engine'}, {'frequency': 'c', 'id': 449, 'synset': 'fire_extinguisher.n.01', 'synonyms': ['fire_extinguisher', 'extinguisher'], 'def': 'a manually operated device for extinguishing small fires', 'name': 'fire_extinguisher'}, {'frequency': 'c', 'id': 450, 'synset': 'fire_hose.n.01', 'synonyms': ['fire_hose'], 'def': 'a large hose that carries water from a fire hydrant to the site of the fire', 'name': 'fire_hose'}, {'frequency': 'f', 'id': 451, 'synset': 'fireplace.n.01', 'synonyms': ['fireplace'], 'def': 'an open recess in a wall at the base of a chimney where a fire can be built', 'name': 'fireplace'}, {'frequency': 'f', 'id': 452, 'synset': 'fireplug.n.01', 'synonyms': ['fireplug', 'fire_hydrant', 'hydrant'], 'def': 'an upright hydrant for drawing water to use in fighting a fire', 'name': 'fireplug'}, {'frequency': 'c', 'id': 453, 'synset': 'fish.n.01', 'synonyms': ['fish'], 'def': 'any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills', 'name': 'fish'}, {'frequency': 'r', 'id': 454, 'synset': 'fish.n.02', 'synonyms': ['fish_(food)'], 'def': 'the flesh of fish used as food', 'name': 'fish_(food)'}, {'frequency': 'r', 'id': 455, 'synset': 'fishbowl.n.02', 'synonyms': ['fishbowl', 'goldfish_bowl'], 'def': 'a transparent bowl in which small fish are kept', 'name': 'fishbowl'}, {'frequency': 'r', 'id': 456, 'synset': 'fishing_boat.n.01', 'synonyms': ['fishing_boat', 'fishing_vessel'], 'def': 'a vessel for fishing', 'name': 'fishing_boat'}, {'frequency': 'c', 'id': 457, 'synset': 'fishing_rod.n.01', 'synonyms': ['fishing_rod', 'fishing_pole'], 'def': 'a rod that is used in fishing to extend the fishing line', 'name': 'fishing_rod'}, {'frequency': 'f', 'id': 458, 'synset': 'flag.n.01', 'synonyms': ['flag'], 'def': 'emblem usually consisting of a rectangular piece of cloth of distinctive design (do not include pole)', 'name': 'flag'}, {'frequency': 'f', 'id': 459, 'synset': 'flagpole.n.02', 'synonyms': ['flagpole', 'flagstaff'], 'def': 'a tall staff or pole on which a flag is raised', 'name': 'flagpole'}, {'frequency': 'c', 'id': 460, 'synset': 'flamingo.n.01', 'synonyms': ['flamingo'], 'def': 'large pink web-footed bird with down-bent bill', 'name': 'flamingo'}, {'frequency': 'c', 'id': 461, 'synset': 'flannel.n.01', 'synonyms': ['flannel'], 'def': 'a soft light woolen fabric; used for clothing', 'name': 'flannel'}, {'frequency': 'r', 'id': 462, 'synset': 'flash.n.10', 'synonyms': ['flash', 'flashbulb'], 'def': 'a lamp for providing momentary light to take a photograph', 'name': 'flash'}, {'frequency': 'c', 'id': 463, 'synset': 'flashlight.n.01', 'synonyms': ['flashlight', 'torch'], 'def': 'a small portable battery-powered electric lamp', 'name': 'flashlight'}, {'frequency': 'r', 'id': 464, 'synset': 'fleece.n.03', 'synonyms': ['fleece'], 'def': 'a soft bulky fabric with deep pile; used chiefly for clothing', 'name': 'fleece'}, {'frequency': 'f', 'id': 465, 'synset': 'flip-flop.n.02', 'synonyms': ['flip-flop_(sandal)'], 'def': 'a backless sandal held to the foot by a thong between two toes', 'name': 'flip-flop_(sandal)'}, {'frequency': 'c', 'id': 466, 'synset': 'flipper.n.01', 'synonyms': ['flipper_(footwear)', 'fin_(footwear)'], 'def': 'a shoe to aid a person in swimming', 'name': 'flipper_(footwear)'}, {'frequency': 'f', 'id': 467, 'synset': 'flower_arrangement.n.01', 'synonyms': ['flower_arrangement', 'floral_arrangement'], 'def': 'a decorative arrangement of flowers', 'name': 'flower_arrangement'}, {'frequency': 'c', 'id': 468, 'synset': 'flute.n.02', 'synonyms': ['flute_glass', 'champagne_flute'], 'def': 'a tall narrow wineglass', 'name': 'flute_glass'}, {'frequency': 'r', 'id': 469, 'synset': 'foal.n.01', 'synonyms': ['foal'], 'def': 'a young horse', 'name': 'foal'}, {'frequency': 'c', 'id': 470, 'synset': 'folding_chair.n.01', 'synonyms': ['folding_chair'], 'def': 'a chair that can be folded flat for storage', 'name': 'folding_chair'}, {'frequency': 'c', 'id': 471, 'synset': 'food_processor.n.01', 'synonyms': ['food_processor'], 'def': 'a kitchen appliance for shredding, blending, chopping, or slicing food', 'name': 'food_processor'}, {'frequency': 'c', 'id': 472, 'synset': 'football.n.02', 'synonyms': ['football_(American)'], 'def': 'the inflated oblong ball used in playing American football', 'name': 'football_(American)'}, {'frequency': 'r', 'id': 473, 'synset': 'football_helmet.n.01', 'synonyms': ['football_helmet'], 'def': 'a padded helmet with a face mask to protect the head of football players', 'name': 'football_helmet'}, {'frequency': 'c', 'id': 474, 'synset': 'footstool.n.01', 'synonyms': ['footstool', 'footrest'], 'def': 'a low seat or a stool to rest the feet of a seated person', 'name': 'footstool'}, {'frequency': 'f', 'id': 475, 'synset': 'fork.n.01', 'synonyms': ['fork'], 'def': 'cutlery used for serving and eating food', 'name': 'fork'}, {'frequency': 'r', 'id': 476, 'synset': 'forklift.n.01', 'synonyms': ['forklift'], 'def': 'an industrial vehicle with a power operated fork in front that can be inserted under loads to lift and move them', 'name': 'forklift'}, {'frequency': 'r', 'id': 477, 'synset': 'freight_car.n.01', 'synonyms': ['freight_car'], 'def': 'a railway car that carries freight', 'name': 'freight_car'}, {'frequency': 'r', 'id': 478, 'synset': 'french_toast.n.01', 'synonyms': ['French_toast'], 'def': 'bread slice dipped in egg and milk and fried', 'name': 'French_toast'}, {'frequency': 'c', 'id': 479, 'synset': 'freshener.n.01', 'synonyms': ['freshener', 'air_freshener'], 'def': 'anything that freshens', 'name': 'freshener'}, {'frequency': 'f', 'id': 480, 'synset': 'frisbee.n.01', 'synonyms': ['frisbee'], 'def': 'a light, plastic disk propelled with a flip of the wrist for recreation or competition', 'name': 'frisbee'}, {'frequency': 'c', 'id': 481, 'synset': 'frog.n.01', 'synonyms': ['frog', 'toad', 'toad_frog'], 'def': 'a tailless stout-bodied amphibians with long hind limbs for leaping', 'name': 'frog'}, {'frequency': 'c', 'id': 482, 'synset': 'fruit_juice.n.01', 'synonyms': ['fruit_juice'], 'def': 'drink produced by squeezing or crushing fruit', 'name': 'fruit_juice'}, {'frequency': 'r', 'id': 483, 'synset': 'fruit_salad.n.01', 'synonyms': ['fruit_salad'], 'def': 'salad composed of fruits', 'name': 'fruit_salad'}, {'frequency': 'c', 'id': 484, 'synset': 'frying_pan.n.01', 'synonyms': ['frying_pan', 'frypan', 'skillet'], 'def': 'a pan used for frying foods', 'name': 'frying_pan'}, {'frequency': 'r', 'id': 485, 'synset': 'fudge.n.01', 'synonyms': ['fudge'], 'def': 'soft creamy candy', 'name': 'fudge'}, {'frequency': 'r', 'id': 486, 'synset': 'funnel.n.02', 'synonyms': ['funnel'], 'def': 'a cone-shaped utensil used to channel a substance into a container with a small mouth', 'name': 'funnel'}, {'frequency': 'c', 'id': 487, 'synset': 'futon.n.01', 'synonyms': ['futon'], 'def': 'a pad that is used for sleeping on the floor or on a raised frame', 'name': 'futon'}, {'frequency': 'r', 'id': 488, 'synset': 'gag.n.02', 'synonyms': ['gag', 'muzzle'], 'def': "restraint put into a person's mouth to prevent speaking or shouting", 'name': 'gag'}, {'frequency': 'r', 'id': 489, 'synset': 'garbage.n.03', 'synonyms': ['garbage'], 'def': 'a receptacle where waste can be discarded', 'name': 'garbage'}, {'frequency': 'c', 'id': 490, 'synset': 'garbage_truck.n.01', 'synonyms': ['garbage_truck'], 'def': 'a truck for collecting domestic refuse', 'name': 'garbage_truck'}, {'frequency': 'c', 'id': 491, 'synset': 'garden_hose.n.01', 'synonyms': ['garden_hose'], 'def': 'a hose used for watering a lawn or garden', 'name': 'garden_hose'}, {'frequency': 'c', 'id': 492, 'synset': 'gargle.n.01', 'synonyms': ['gargle', 'mouthwash'], 'def': 'a medicated solution used for gargling and rinsing the mouth', 'name': 'gargle'}, {'frequency': 'r', 'id': 493, 'synset': 'gargoyle.n.02', 'synonyms': ['gargoyle'], 'def': 'an ornament consisting of a grotesquely carved figure of a person or animal', 'name': 'gargoyle'}, {'frequency': 'c', 'id': 494, 'synset': 'garlic.n.02', 'synonyms': ['garlic', 'ail'], 'def': 'aromatic bulb used as seasoning', 'name': 'garlic'}, {'frequency': 'r', 'id': 495, 'synset': 'gasmask.n.01', 'synonyms': ['gasmask', 'respirator', 'gas_helmet'], 'def': 'a protective face mask with a filter', 'name': 'gasmask'}, {'frequency': 'r', 'id': 496, 'synset': 'gazelle.n.01', 'synonyms': ['gazelle'], 'def': 'small swift graceful antelope of Africa and Asia having lustrous eyes', 'name': 'gazelle'}, {'frequency': 'c', 'id': 497, 'synset': 'gelatin.n.02', 'synonyms': ['gelatin', 'jelly'], 'def': 'an edible jelly made with gelatin and used as a dessert or salad base or a coating for foods', 'name': 'gelatin'}, {'frequency': 'r', 'id': 498, 'synset': 'gem.n.02', 'synonyms': ['gemstone'], 'def': 'a crystalline rock that can be cut and polished for jewelry', 'name': 'gemstone'}, {'frequency': 'c', 'id': 499, 'synset': 'giant_panda.n.01', 'synonyms': ['giant_panda', 'panda', 'panda_bear'], 'def': 'large black-and-white herbivorous mammal of bamboo forests of China and Tibet', 'name': 'giant_panda'}, {'frequency': 'c', 'id': 500, 'synset': 'gift_wrap.n.01', 'synonyms': ['gift_wrap'], 'def': 'attractive wrapping paper suitable for wrapping gifts', 'name': 'gift_wrap'}, {'frequency': 'c', 'id': 501, 'synset': 'ginger.n.03', 'synonyms': ['ginger', 'gingerroot'], 'def': 'the root of the common ginger plant; used fresh as a seasoning', 'name': 'ginger'}, {'frequency': 'f', 'id': 502, 'synset': 'giraffe.n.01', 'synonyms': ['giraffe'], 'def': 'tall animal having a spotted coat and small horns and very long neck and legs', 'name': 'giraffe'}, {'frequency': 'c', 'id': 503, 'synset': 'girdle.n.02', 'synonyms': ['cincture', 'sash', 'waistband', 'waistcloth'], 'def': 'a band of material around the waist that strengthens a skirt or trousers', 'name': 'cincture'}, {'frequency': 'f', 'id': 504, 'synset': 'glass.n.02', 'synonyms': ['glass_(drink_container)', 'drinking_glass'], 'def': 'a container for holding liquids while drinking', 'name': 'glass_(drink_container)'}, {'frequency': 'c', 'id': 505, 'synset': 'globe.n.03', 'synonyms': ['globe'], 'def': 'a sphere on which a map (especially of the earth) is represented', 'name': 'globe'}, {'frequency': 'f', 'id': 506, 'synset': 'glove.n.02', 'synonyms': ['glove'], 'def': 'handwear covering the hand', 'name': 'glove'}, {'frequency': 'c', 'id': 507, 'synset': 'goat.n.01', 'synonyms': ['goat'], 'def': 'a common goat', 'name': 'goat'}, {'frequency': 'f', 'id': 508, 'synset': 'goggles.n.01', 'synonyms': ['goggles'], 'def': 'tight-fitting spectacles worn to protect the eyes', 'name': 'goggles'}, {'frequency': 'r', 'id': 509, 'synset': 'goldfish.n.01', 'synonyms': ['goldfish'], 'def': 'small golden or orange-red freshwater fishes used as pond or aquarium pets', 'name': 'goldfish'}, {'frequency': 'r', 'id': 510, 'synset': 'golf_club.n.02', 'synonyms': ['golf_club', 'golf-club'], 'def': 'golf equipment used by a golfer to hit a golf ball', 'name': 'golf_club'}, {'frequency': 'c', 'id': 511, 'synset': 'golfcart.n.01', 'synonyms': ['golfcart'], 'def': 'a small motor vehicle in which golfers can ride between shots', 'name': 'golfcart'}, {'frequency': 'r', 'id': 512, 'synset': 'gondola.n.02', 'synonyms': ['gondola_(boat)'], 'def': 'long narrow flat-bottomed boat propelled by sculling; traditionally used on canals of Venice', 'name': 'gondola_(boat)'}, {'frequency': 'c', 'id': 513, 'synset': 'goose.n.01', 'synonyms': ['goose'], 'def': 'loud, web-footed long-necked aquatic birds usually larger than ducks', 'name': 'goose'}, {'frequency': 'r', 'id': 514, 'synset': 'gorilla.n.01', 'synonyms': ['gorilla'], 'def': 'largest ape', 'name': 'gorilla'}, {'frequency': 'r', 'id': 515, 'synset': 'gourd.n.02', 'synonyms': ['gourd'], 'def': 'any of numerous inedible fruits with hard rinds', 'name': 'gourd'}, {'frequency': 'r', 'id': 516, 'synset': 'gown.n.04', 'synonyms': ['surgical_gown', 'scrubs_(surgical_clothing)'], 'def': 'protective garment worn by surgeons during operations', 'name': 'surgical_gown'}, {'frequency': 'f', 'id': 517, 'synset': 'grape.n.01', 'synonyms': ['grape'], 'def': 'any of various juicy fruit with green or purple skins; grow in clusters', 'name': 'grape'}, {'frequency': 'r', 'id': 518, 'synset': 'grasshopper.n.01', 'synonyms': ['grasshopper'], 'def': 'plant-eating insect with hind legs adapted for leaping', 'name': 'grasshopper'}, {'frequency': 'c', 'id': 519, 'synset': 'grater.n.01', 'synonyms': ['grater'], 'def': 'utensil with sharp perforations for shredding foods (as vegetables or cheese)', 'name': 'grater'}, {'frequency': 'c', 'id': 520, 'synset': 'gravestone.n.01', 'synonyms': ['gravestone', 'headstone', 'tombstone'], 'def': 'a stone that is used to mark a grave', 'name': 'gravestone'}, {'frequency': 'r', 'id': 521, 'synset': 'gravy_boat.n.01', 'synonyms': ['gravy_boat', 'gravy_holder'], 'def': 'a dish (often boat-shaped) for serving gravy or sauce', 'name': 'gravy_boat'}, {'frequency': 'c', 'id': 522, 'synset': 'green_bean.n.02', 'synonyms': ['green_bean'], 'def': 'a common bean plant cultivated for its slender green edible pods', 'name': 'green_bean'}, {'frequency': 'c', 'id': 523, 'synset': 'green_onion.n.01', 'synonyms': ['green_onion', 'spring_onion', 'scallion'], 'def': 'a young onion before the bulb has enlarged', 'name': 'green_onion'}, {'frequency': 'r', 'id': 524, 'synset': 'griddle.n.01', 'synonyms': ['griddle'], 'def': 'cooking utensil consisting of a flat heated surface on which food is cooked', 'name': 'griddle'}, {'frequency': 'r', 'id': 525, 'synset': 'grillroom.n.01', 'synonyms': ['grillroom', 'grill_(restaurant)'], 'def': 'a restaurant where food is cooked on a grill', 'name': 'grillroom'}, {'frequency': 'r', 'id': 526, 'synset': 'grinder.n.04', 'synonyms': ['grinder_(tool)'], 'def': 'a machine tool that polishes metal', 'name': 'grinder_(tool)'}, {'frequency': 'r', 'id': 527, 'synset': 'grits.n.01', 'synonyms': ['grits', 'hominy_grits'], 'def': 'coarsely ground corn boiled as a breakfast dish', 'name': 'grits'}, {'frequency': 'c', 'id': 528, 'synset': 'grizzly.n.01', 'synonyms': ['grizzly', 'grizzly_bear'], 'def': 'powerful brownish-yellow bear of the uplands of western North America', 'name': 'grizzly'}, {'frequency': 'c', 'id': 529, 'synset': 'grocery_bag.n.01', 'synonyms': ['grocery_bag'], 'def': "a sack for holding customer's groceries", 'name': 'grocery_bag'}, {'frequency': 'r', 'id': 530, 'synset': 'guacamole.n.01', 'synonyms': ['guacamole'], 'def': 'a dip made of mashed avocado mixed with chopped onions and other seasonings', 'name': 'guacamole'}, {'frequency': 'f', 'id': 531, 'synset': 'guitar.n.01', 'synonyms': ['guitar'], 'def': 'a stringed instrument usually having six strings; played by strumming or plucking', 'name': 'guitar'}, {'frequency': 'c', 'id': 532, 'synset': 'gull.n.02', 'synonyms': ['gull', 'seagull'], 'def': 'mostly white aquatic bird having long pointed wings and short legs', 'name': 'gull'}, {'frequency': 'c', 'id': 533, 'synset': 'gun.n.01', 'synonyms': ['gun'], 'def': 'a weapon that discharges a bullet at high velocity from a metal tube', 'name': 'gun'}, {'frequency': 'r', 'id': 534, 'synset': 'hair_spray.n.01', 'synonyms': ['hair_spray'], 'def': 'substance sprayed on the hair to hold it in place', 'name': 'hair_spray'}, {'frequency': 'c', 'id': 535, 'synset': 'hairbrush.n.01', 'synonyms': ['hairbrush'], 'def': "a brush used to groom a person's hair", 'name': 'hairbrush'}, {'frequency': 'c', 'id': 536, 'synset': 'hairnet.n.01', 'synonyms': ['hairnet'], 'def': 'a small net that someone wears over their hair to keep it in place', 'name': 'hairnet'}, {'frequency': 'c', 'id': 537, 'synset': 'hairpin.n.01', 'synonyms': ['hairpin'], 'def': "a double pronged pin used to hold women's hair in place", 'name': 'hairpin'}, {'frequency': 'f', 'id': 538, 'synset': 'ham.n.01', 'synonyms': ['ham', 'jambon', 'gammon'], 'def': 'meat cut from the thigh of a hog (usually smoked)', 'name': 'ham'}, {'frequency': 'c', 'id': 539, 'synset': 'hamburger.n.01', 'synonyms': ['hamburger', 'beefburger', 'burger'], 'def': 'a sandwich consisting of a patty of minced beef served on a bun', 'name': 'hamburger'}, {'frequency': 'c', 'id': 540, 'synset': 'hammer.n.02', 'synonyms': ['hammer'], 'def': 'a hand tool with a heavy head and a handle; used to deliver an impulsive force by striking', 'name': 'hammer'}, {'frequency': 'r', 'id': 541, 'synset': 'hammock.n.02', 'synonyms': ['hammock'], 'def': 'a hanging bed of canvas or rope netting (usually suspended between two trees)', 'name': 'hammock'}, {'frequency': 'r', 'id': 542, 'synset': 'hamper.n.02', 'synonyms': ['hamper'], 'def': 'a basket usually with a cover', 'name': 'hamper'}, {'frequency': 'r', 'id': 543, 'synset': 'hamster.n.01', 'synonyms': ['hamster'], 'def': 'short-tailed burrowing rodent with large cheek pouches', 'name': 'hamster'}, {'frequency': 'c', 'id': 544, 'synset': 'hand_blower.n.01', 'synonyms': ['hair_dryer'], 'def': 'a hand-held electric blower that can blow warm air onto the hair', 'name': 'hair_dryer'}, {'frequency': 'r', 'id': 545, 'synset': 'hand_glass.n.01', 'synonyms': ['hand_glass', 'hand_mirror'], 'def': 'a mirror intended to be held in the hand', 'name': 'hand_glass'}, {'frequency': 'f', 'id': 546, 'synset': 'hand_towel.n.01', 'synonyms': ['hand_towel', 'face_towel'], 'def': 'a small towel used to dry the hands or face', 'name': 'hand_towel'}, {'frequency': 'c', 'id': 547, 'synset': 'handcart.n.01', 'synonyms': ['handcart', 'pushcart', 'hand_truck'], 'def': 'wheeled vehicle that can be pushed by a person', 'name': 'handcart'}, {'frequency': 'r', 'id': 548, 'synset': 'handcuff.n.01', 'synonyms': ['handcuff'], 'def': 'shackle that consists of a metal loop that can be locked around the wrist', 'name': 'handcuff'}, {'frequency': 'c', 'id': 549, 'synset': 'handkerchief.n.01', 'synonyms': ['handkerchief'], 'def': 'a square piece of cloth used for wiping the eyes or nose or as a costume accessory', 'name': 'handkerchief'}, {'frequency': 'f', 'id': 550, 'synset': 'handle.n.01', 'synonyms': ['handle', 'grip', 'handgrip'], 'def': 'the appendage to an object that is designed to be held in order to use or move it', 'name': 'handle'}, {'frequency': 'r', 'id': 551, 'synset': 'handsaw.n.01', 'synonyms': ['handsaw', "carpenter's_saw"], 'def': 'a saw used with one hand for cutting wood', 'name': 'handsaw'}, {'frequency': 'r', 'id': 552, 'synset': 'hardback.n.01', 'synonyms': ['hardback_book', 'hardcover_book'], 'def': 'a book with cardboard or cloth or leather covers', 'name': 'hardback_book'}, {'frequency': 'r', 'id': 553, 'synset': 'harmonium.n.01', 'synonyms': ['harmonium', 'organ_(musical_instrument)', 'reed_organ_(musical_instrument)'], 'def': 'a free-reed instrument in which air is forced through the reeds by bellows', 'name': 'harmonium'}, {'frequency': 'f', 'id': 554, 'synset': 'hat.n.01', 'synonyms': ['hat'], 'def': 'headwear that protects the head from bad weather, sun, or worn for fashion', 'name': 'hat'}, {'frequency': 'r', 'id': 555, 'synset': 'hatbox.n.01', 'synonyms': ['hatbox'], 'def': 'a round piece of luggage for carrying hats', 'name': 'hatbox'}, {'frequency': 'r', 'id': 556, 'synset': 'hatch.n.03', 'synonyms': ['hatch'], 'def': 'a movable barrier covering a hatchway', 'name': 'hatch'}, {'frequency': 'c', 'id': 557, 'synset': 'head_covering.n.01', 'synonyms': ['veil'], 'def': 'a garment that covers the head and face', 'name': 'veil'}, {'frequency': 'f', 'id': 558, 'synset': 'headband.n.01', 'synonyms': ['headband'], 'def': 'a band worn around or over the head', 'name': 'headband'}, {'frequency': 'f', 'id': 559, 'synset': 'headboard.n.01', 'synonyms': ['headboard'], 'def': 'a vertical board or panel forming the head of a bedstead', 'name': 'headboard'}, {'frequency': 'f', 'id': 560, 'synset': 'headlight.n.01', 'synonyms': ['headlight', 'headlamp'], 'def': 'a powerful light with reflector; attached to the front of an automobile or locomotive', 'name': 'headlight'}, {'frequency': 'c', 'id': 561, 'synset': 'headscarf.n.01', 'synonyms': ['headscarf'], 'def': 'a kerchief worn over the head and tied under the chin', 'name': 'headscarf'}, {'frequency': 'r', 'id': 562, 'synset': 'headset.n.01', 'synonyms': ['headset'], 'def': 'receiver consisting of a pair of headphones', 'name': 'headset'}, {'frequency': 'c', 'id': 563, 'synset': 'headstall.n.01', 'synonyms': ['headstall_(for_horses)', 'headpiece_(for_horses)'], 'def': "the band that is the part of a bridle that fits around a horse's head", 'name': 'headstall_(for_horses)'}, {'frequency': 'r', 'id': 564, 'synset': 'hearing_aid.n.02', 'synonyms': ['hearing_aid'], 'def': 'an acoustic device used to direct sound to the ear of a hearing-impaired person', 'name': 'hearing_aid'}, {'frequency': 'c', 'id': 565, 'synset': 'heart.n.02', 'synonyms': ['heart'], 'def': 'a muscular organ; its contractions move the blood through the body', 'name': 'heart'}, {'frequency': 'c', 'id': 566, 'synset': 'heater.n.01', 'synonyms': ['heater', 'warmer'], 'def': 'device that heats water or supplies warmth to a room', 'name': 'heater'}, {'frequency': 'c', 'id': 567, 'synset': 'helicopter.n.01', 'synonyms': ['helicopter'], 'def': 'an aircraft without wings that obtains its lift from the rotation of overhead blades', 'name': 'helicopter'}, {'frequency': 'f', 'id': 568, 'synset': 'helmet.n.02', 'synonyms': ['helmet'], 'def': 'a protective headgear made of hard material to resist blows', 'name': 'helmet'}, {'frequency': 'r', 'id': 569, 'synset': 'heron.n.02', 'synonyms': ['heron'], 'def': 'grey or white wading bird with long neck and long legs and (usually) long bill', 'name': 'heron'}, {'frequency': 'c', 'id': 570, 'synset': 'highchair.n.01', 'synonyms': ['highchair', 'feeding_chair'], 'def': 'a chair for feeding a very young child', 'name': 'highchair'}, {'frequency': 'f', 'id': 571, 'synset': 'hinge.n.01', 'synonyms': ['hinge'], 'def': 'a joint that holds two parts together so that one can swing relative to the other', 'name': 'hinge'}, {'frequency': 'r', 'id': 572, 'synset': 'hippopotamus.n.01', 'synonyms': ['hippopotamus'], 'def': 'massive thick-skinned animal living in or around rivers of tropical Africa', 'name': 'hippopotamus'}, {'frequency': 'r', 'id': 573, 'synset': 'hockey_stick.n.01', 'synonyms': ['hockey_stick'], 'def': 'sports implement consisting of a stick used by hockey players to move the puck', 'name': 'hockey_stick'}, {'frequency': 'c', 'id': 574, 'synset': 'hog.n.03', 'synonyms': ['hog', 'pig'], 'def': 'domestic swine', 'name': 'hog'}, {'frequency': 'f', 'id': 575, 'synset': 'home_plate.n.01', 'synonyms': ['home_plate_(baseball)', 'home_base_(baseball)'], 'def': '(baseball) a rubber slab where the batter stands; it must be touched by a base runner in order to score', 'name': 'home_plate_(baseball)'}, {'frequency': 'c', 'id': 576, 'synset': 'honey.n.01', 'synonyms': ['honey'], 'def': 'a sweet yellow liquid produced by bees', 'name': 'honey'}, {'frequency': 'f', 'id': 577, 'synset': 'hood.n.06', 'synonyms': ['fume_hood', 'exhaust_hood'], 'def': 'metal covering leading to a vent that exhausts smoke or fumes', 'name': 'fume_hood'}, {'frequency': 'f', 'id': 578, 'synset': 'hook.n.05', 'synonyms': ['hook'], 'def': 'a curved or bent implement for suspending or pulling something', 'name': 'hook'}, {'frequency': 'f', 'id': 579, 'synset': 'horse.n.01', 'synonyms': ['horse'], 'def': 'a common horse', 'name': 'horse'}, {'frequency': 'f', 'id': 580, 'synset': 'hose.n.03', 'synonyms': ['hose', 'hosepipe'], 'def': 'a flexible pipe for conveying a liquid or gas', 'name': 'hose'}, {'frequency': 'r', 'id': 581, 'synset': 'hot-air_balloon.n.01', 'synonyms': ['hot-air_balloon'], 'def': 'balloon for travel through the air in a basket suspended below a large bag of heated air', 'name': 'hot-air_balloon'}, {'frequency': 'r', 'id': 582, 'synset': 'hot_plate.n.01', 'synonyms': ['hotplate'], 'def': 'a portable electric appliance for heating or cooking or keeping food warm', 'name': 'hotplate'}, {'frequency': 'c', 'id': 583, 'synset': 'hot_sauce.n.01', 'synonyms': ['hot_sauce'], 'def': 'a pungent peppery sauce', 'name': 'hot_sauce'}, {'frequency': 'r', 'id': 584, 'synset': 'hourglass.n.01', 'synonyms': ['hourglass'], 'def': 'a sandglass timer that runs for sixty minutes', 'name': 'hourglass'}, {'frequency': 'r', 'id': 585, 'synset': 'houseboat.n.01', 'synonyms': ['houseboat'], 'def': 'a barge that is designed and equipped for use as a dwelling', 'name': 'houseboat'}, {'frequency': 'r', 'id': 586, 'synset': 'hummingbird.n.01', 'synonyms': ['hummingbird'], 'def': 'tiny American bird having brilliant iridescent plumage and long slender bills', 'name': 'hummingbird'}, {'frequency': 'r', 'id': 587, 'synset': 'hummus.n.01', 'synonyms': ['hummus', 'humus', 'hommos', 'hoummos', 'humous'], 'def': 'a thick spread made from mashed chickpeas', 'name': 'hummus'}, {'frequency': 'c', 'id': 588, 'synset': 'ice_bear.n.01', 'synonyms': ['polar_bear'], 'def': 'white bear of Arctic regions', 'name': 'polar_bear'}, {'frequency': 'c', 'id': 589, 'synset': 'ice_cream.n.01', 'synonyms': ['icecream'], 'def': 'frozen dessert containing cream and sugar and flavoring', 'name': 'icecream'}, {'frequency': 'r', 'id': 590, 'synset': 'ice_lolly.n.01', 'synonyms': ['popsicle'], 'def': 'ice cream or water ice on a small wooden stick', 'name': 'popsicle'}, {'frequency': 'c', 'id': 591, 'synset': 'ice_maker.n.01', 'synonyms': ['ice_maker'], 'def': 'an appliance included in some electric refrigerators for making ice cubes', 'name': 'ice_maker'}, {'frequency': 'r', 'id': 592, 'synset': 'ice_pack.n.01', 'synonyms': ['ice_pack', 'ice_bag'], 'def': 'a waterproof bag filled with ice: applied to the body (especially the head) to cool or reduce swelling', 'name': 'ice_pack'}, {'frequency': 'r', 'id': 593, 'synset': 'ice_skate.n.01', 'synonyms': ['ice_skate'], 'def': 'skate consisting of a boot with a steel blade fitted to the sole', 'name': 'ice_skate'}, {'frequency': 'r', 'id': 594, 'synset': 'ice_tea.n.01', 'synonyms': ['ice_tea', 'iced_tea'], 'def': 'strong tea served over ice', 'name': 'ice_tea'}, {'frequency': 'c', 'id': 595, 'synset': 'igniter.n.01', 'synonyms': ['igniter', 'ignitor', 'lighter'], 'def': 'a substance or device used to start a fire', 'name': 'igniter'}, {'frequency': 'r', 'id': 596, 'synset': 'incense.n.01', 'synonyms': ['incense'], 'def': 'a substance that produces a fragrant odor when burned', 'name': 'incense'}, {'frequency': 'r', 'id': 597, 'synset': 'inhaler.n.01', 'synonyms': ['inhaler', 'inhalator'], 'def': 'a dispenser that produces a chemical vapor to be inhaled through mouth or nose', 'name': 'inhaler'}, {'frequency': 'c', 'id': 598, 'synset': 'ipod.n.01', 'synonyms': ['iPod'], 'def': 'a pocket-sized device used to play music files', 'name': 'iPod'}, {'frequency': 'c', 'id': 599, 'synset': 'iron.n.04', 'synonyms': ['iron_(for_clothing)', 'smoothing_iron_(for_clothing)'], 'def': 'home appliance consisting of a flat metal base that is heated and used to smooth cloth', 'name': 'iron_(for_clothing)'}, {'frequency': 'r', 'id': 600, 'synset': 'ironing_board.n.01', 'synonyms': ['ironing_board'], 'def': 'narrow padded board on collapsible supports; used for ironing clothes', 'name': 'ironing_board'}, {'frequency': 'f', 'id': 601, 'synset': 'jacket.n.01', 'synonyms': ['jacket'], 'def': 'a waist-length coat', 'name': 'jacket'}, {'frequency': 'r', 'id': 602, 'synset': 'jam.n.01', 'synonyms': ['jam'], 'def': 'preserve of crushed fruit', 'name': 'jam'}, {'frequency': 'f', 'id': 603, 'synset': 'jean.n.01', 'synonyms': ['jean', 'blue_jean', 'denim'], 'def': '(usually plural) close-fitting trousers of heavy denim for manual work or casual wear', 'name': 'jean'}, {'frequency': 'c', 'id': 604, 'synset': 'jeep.n.01', 'synonyms': ['jeep', 'landrover'], 'def': 'a car suitable for traveling over rough terrain', 'name': 'jeep'}, {'frequency': 'r', 'id': 605, 'synset': 'jelly_bean.n.01', 'synonyms': ['jelly_bean', 'jelly_egg'], 'def': 'sugar-glazed jellied candy', 'name': 'jelly_bean'}, {'frequency': 'f', 'id': 606, 'synset': 'jersey.n.03', 'synonyms': ['jersey', 'T-shirt', 'tee_shirt'], 'def': 'a close-fitting pullover shirt', 'name': 'jersey'}, {'frequency': 'c', 'id': 607, 'synset': 'jet.n.01', 'synonyms': ['jet_plane', 'jet-propelled_plane'], 'def': 'an airplane powered by one or more jet engines', 'name': 'jet_plane'}, {'frequency': 'c', 'id': 608, 'synset': 'jewelry.n.01', 'synonyms': ['jewelry', 'jewellery'], 'def': 'an adornment (as a bracelet or ring or necklace) made of precious metals and set with gems (or imitation gems)', 'name': 'jewelry'}, {'frequency': 'r', 'id': 609, 'synset': 'joystick.n.02', 'synonyms': ['joystick'], 'def': 'a control device for computers consisting of a vertical handle that can move freely in two directions', 'name': 'joystick'}, {'frequency': 'r', 'id': 610, 'synset': 'jump_suit.n.01', 'synonyms': ['jumpsuit'], 'def': "one-piece garment fashioned after a parachutist's uniform", 'name': 'jumpsuit'}, {'frequency': 'c', 'id': 611, 'synset': 'kayak.n.01', 'synonyms': ['kayak'], 'def': 'a small canoe consisting of a light frame made watertight with animal skins', 'name': 'kayak'}, {'frequency': 'r', 'id': 612, 'synset': 'keg.n.02', 'synonyms': ['keg'], 'def': 'small cask or barrel', 'name': 'keg'}, {'frequency': 'r', 'id': 613, 'synset': 'kennel.n.01', 'synonyms': ['kennel', 'doghouse'], 'def': 'outbuilding that serves as a shelter for a dog', 'name': 'kennel'}, {'frequency': 'c', 'id': 614, 'synset': 'kettle.n.01', 'synonyms': ['kettle', 'boiler'], 'def': 'a metal pot for stewing or boiling; usually has a lid', 'name': 'kettle'}, {'frequency': 'f', 'id': 615, 'synset': 'key.n.01', 'synonyms': ['key'], 'def': 'metal instrument used to unlock a lock', 'name': 'key'}, {'frequency': 'r', 'id': 616, 'synset': 'keycard.n.01', 'synonyms': ['keycard'], 'def': 'a plastic card used to gain access typically to a door', 'name': 'keycard'}, {'frequency': 'r', 'id': 617, 'synset': 'kilt.n.01', 'synonyms': ['kilt'], 'def': 'a knee-length pleated tartan skirt worn by men as part of the traditional dress in the Highlands of northern Scotland', 'name': 'kilt'}, {'frequency': 'c', 'id': 618, 'synset': 'kimono.n.01', 'synonyms': ['kimono'], 'def': 'a loose robe; imitated from robes originally worn by Japanese', 'name': 'kimono'}, {'frequency': 'f', 'id': 619, 'synset': 'kitchen_sink.n.01', 'synonyms': ['kitchen_sink'], 'def': 'a sink in a kitchen', 'name': 'kitchen_sink'}, {'frequency': 'c', 'id': 620, 'synset': 'kitchen_table.n.01', 'synonyms': ['kitchen_table'], 'def': 'a table in the kitchen', 'name': 'kitchen_table'}, {'frequency': 'f', 'id': 621, 'synset': 'kite.n.03', 'synonyms': ['kite'], 'def': 'plaything consisting of a light frame covered with tissue paper; flown in wind at end of a string', 'name': 'kite'}, {'frequency': 'c', 'id': 622, 'synset': 'kitten.n.01', 'synonyms': ['kitten', 'kitty'], 'def': 'young domestic cat', 'name': 'kitten'}, {'frequency': 'c', 'id': 623, 'synset': 'kiwi.n.03', 'synonyms': ['kiwi_fruit'], 'def': 'fuzzy brown egg-shaped fruit with slightly tart green flesh', 'name': 'kiwi_fruit'}, {'frequency': 'f', 'id': 624, 'synset': 'knee_pad.n.01', 'synonyms': ['knee_pad'], 'def': 'protective garment consisting of a pad worn by football or baseball or hockey players', 'name': 'knee_pad'}, {'frequency': 'f', 'id': 625, 'synset': 'knife.n.01', 'synonyms': ['knife'], 'def': 'tool with a blade and point used as a cutting instrument', 'name': 'knife'}, {'frequency': 'r', 'id': 626, 'synset': 'knight.n.02', 'synonyms': ['knight_(chess_piece)', 'horse_(chess_piece)'], 'def': 'a chess game piece shaped to resemble the head of a horse', 'name': 'knight_(chess_piece)'}, {'frequency': 'r', 'id': 627, 'synset': 'knitting_needle.n.01', 'synonyms': ['knitting_needle'], 'def': 'needle consisting of a slender rod with pointed ends; usually used in pairs', 'name': 'knitting_needle'}, {'frequency': 'f', 'id': 628, 'synset': 'knob.n.02', 'synonyms': ['knob'], 'def': 'a round handle often found on a door', 'name': 'knob'}, {'frequency': 'r', 'id': 629, 'synset': 'knocker.n.05', 'synonyms': ['knocker_(on_a_door)', 'doorknocker'], 'def': 'a device (usually metal and ornamental) attached by a hinge to a door', 'name': 'knocker_(on_a_door)'}, {'frequency': 'r', 'id': 630, 'synset': 'koala.n.01', 'synonyms': ['koala', 'koala_bear'], 'def': 'sluggish tailless Australian marsupial with grey furry ears and coat', 'name': 'koala'}, {'frequency': 'r', 'id': 631, 'synset': 'lab_coat.n.01', 'synonyms': ['lab_coat', 'laboratory_coat'], 'def': 'a light coat worn to protect clothing from substances used while working in a laboratory', 'name': 'lab_coat'}, {'frequency': 'f', 'id': 632, 'synset': 'ladder.n.01', 'synonyms': ['ladder'], 'def': 'steps consisting of two parallel members connected by rungs', 'name': 'ladder'}, {'frequency': 'c', 'id': 633, 'synset': 'ladle.n.01', 'synonyms': ['ladle'], 'def': 'a spoon-shaped vessel with a long handle frequently used to transfer liquids', 'name': 'ladle'}, {'frequency': 'r', 'id': 634, 'synset': 'ladybug.n.01', 'synonyms': ['ladybug', 'ladybeetle', 'ladybird_beetle'], 'def': 'small round bright-colored and spotted beetle, typically red and black', 'name': 'ladybug'}, {'frequency': 'c', 'id': 635, 'synset': 'lamb.n.01', 'synonyms': ['lamb_(animal)'], 'def': 'young sheep', 'name': 'lamb_(animal)'}, {'frequency': 'r', 'id': 636, 'synset': 'lamb_chop.n.01', 'synonyms': ['lamb-chop', 'lambchop'], 'def': 'chop cut from a lamb', 'name': 'lamb-chop'}, {'frequency': 'f', 'id': 637, 'synset': 'lamp.n.02', 'synonyms': ['lamp'], 'def': 'a piece of furniture holding one or more electric light bulbs', 'name': 'lamp'}, {'frequency': 'f', 'id': 638, 'synset': 'lamppost.n.01', 'synonyms': ['lamppost'], 'def': 'a metal post supporting an outdoor lamp (such as a streetlight)', 'name': 'lamppost'}, {'frequency': 'f', 'id': 639, 'synset': 'lampshade.n.01', 'synonyms': ['lampshade'], 'def': 'a protective ornamental shade used to screen a light bulb from direct view', 'name': 'lampshade'}, {'frequency': 'c', 'id': 640, 'synset': 'lantern.n.01', 'synonyms': ['lantern'], 'def': 'light in a transparent protective case', 'name': 'lantern'}, {'frequency': 'f', 'id': 641, 'synset': 'lanyard.n.02', 'synonyms': ['lanyard', 'laniard'], 'def': 'a cord worn around the neck to hold a knife or whistle, etc.', 'name': 'lanyard'}, {'frequency': 'f', 'id': 642, 'synset': 'laptop.n.01', 'synonyms': ['laptop_computer', 'notebook_computer'], 'def': 'a portable computer small enough to use in your lap', 'name': 'laptop_computer'}, {'frequency': 'r', 'id': 643, 'synset': 'lasagna.n.01', 'synonyms': ['lasagna', 'lasagne'], 'def': 'baked dish of layers of lasagna pasta with sauce and cheese and meat or vegetables', 'name': 'lasagna'}, {'frequency': 'c', 'id': 644, 'synset': 'latch.n.02', 'synonyms': ['latch'], 'def': 'a bar that can be lowered or slid into a groove to fasten a door or gate', 'name': 'latch'}, {'frequency': 'r', 'id': 645, 'synset': 'lawn_mower.n.01', 'synonyms': ['lawn_mower'], 'def': 'garden tool for mowing grass on lawns', 'name': 'lawn_mower'}, {'frequency': 'r', 'id': 646, 'synset': 'leather.n.01', 'synonyms': ['leather'], 'def': 'an animal skin made smooth and flexible by removing the hair and then tanning', 'name': 'leather'}, {'frequency': 'c', 'id': 647, 'synset': 'legging.n.01', 'synonyms': ['legging_(clothing)', 'leging_(clothing)', 'leg_covering'], 'def': 'a garment covering the leg (usually extending from the knee to the ankle)', 'name': 'legging_(clothing)'}, {'frequency': 'c', 'id': 648, 'synset': 'lego.n.01', 'synonyms': ['Lego', 'Lego_set'], 'def': "a child's plastic construction set for making models from blocks", 'name': 'Lego'}, {'frequency': 'f', 'id': 649, 'synset': 'lemon.n.01', 'synonyms': ['lemon'], 'def': 'yellow oval fruit with juicy acidic flesh', 'name': 'lemon'}, {'frequency': 'r', 'id': 650, 'synset': 'lemonade.n.01', 'synonyms': ['lemonade'], 'def': 'sweetened beverage of diluted lemon juice', 'name': 'lemonade'}, {'frequency': 'f', 'id': 651, 'synset': 'lettuce.n.02', 'synonyms': ['lettuce'], 'def': 'leafy plant commonly eaten in salad or on sandwiches', 'name': 'lettuce'}, {'frequency': 'f', 'id': 652, 'synset': 'license_plate.n.01', 'synonyms': ['license_plate', 'numberplate'], 'def': "a plate mounted on the front and back of car and bearing the car's registration number", 'name': 'license_plate'}, {'frequency': 'f', 'id': 653, 'synset': 'life_buoy.n.01', 'synonyms': ['life_buoy', 'lifesaver', 'life_belt', 'life_ring'], 'def': 'a ring-shaped life preserver used to prevent drowning (NOT a life-jacket or vest)', 'name': 'life_buoy'}, {'frequency': 'f', 'id': 654, 'synset': 'life_jacket.n.01', 'synonyms': ['life_jacket', 'life_vest'], 'def': 'life preserver consisting of a sleeveless jacket of buoyant or inflatable design', 'name': 'life_jacket'}, {'frequency': 'f', 'id': 655, 'synset': 'light_bulb.n.01', 'synonyms': ['lightbulb'], 'def': 'glass bulb or tube shaped electric device that emits light (DO NOT MARK LAMPS AS A WHOLE)', 'name': 'lightbulb'}, {'frequency': 'r', 'id': 656, 'synset': 'lightning_rod.n.02', 'synonyms': ['lightning_rod', 'lightning_conductor'], 'def': 'a metallic conductor that is attached to a high point and leads to the ground', 'name': 'lightning_rod'}, {'frequency': 'c', 'id': 657, 'synset': 'lime.n.06', 'synonyms': ['lime'], 'def': 'the green acidic fruit of any of various lime trees', 'name': 'lime'}, {'frequency': 'r', 'id': 658, 'synset': 'limousine.n.01', 'synonyms': ['limousine'], 'def': 'long luxurious car; usually driven by a chauffeur', 'name': 'limousine'}, {'frequency': 'r', 'id': 659, 'synset': 'linen.n.02', 'synonyms': ['linen_paper'], 'def': 'a high-quality paper made of linen fibers or with a linen finish', 'name': 'linen_paper'}, {'frequency': 'c', 'id': 660, 'synset': 'lion.n.01', 'synonyms': ['lion'], 'def': 'large gregarious predatory cat of Africa and India', 'name': 'lion'}, {'frequency': 'c', 'id': 661, 'synset': 'lip_balm.n.01', 'synonyms': ['lip_balm'], 'def': 'a balm applied to the lips', 'name': 'lip_balm'}, {'frequency': 'c', 'id': 662, 'synset': 'lipstick.n.01', 'synonyms': ['lipstick', 'lip_rouge'], 'def': 'makeup that is used to color the lips', 'name': 'lipstick'}, {'frequency': 'r', 'id': 663, 'synset': 'liquor.n.01', 'synonyms': ['liquor', 'spirits', 'hard_liquor', 'liqueur', 'cordial'], 'def': 'an alcoholic beverage that is distilled rather than fermented', 'name': 'liquor'}, {'frequency': 'r', 'id': 664, 'synset': 'lizard.n.01', 'synonyms': ['lizard'], 'def': 'a reptile with usually two pairs of legs and a tapering tail', 'name': 'lizard'}, {'frequency': 'r', 'id': 665, 'synset': 'loafer.n.02', 'synonyms': ['Loafer_(type_of_shoe)'], 'def': 'a low leather step-in shoe', 'name': 'Loafer_(type_of_shoe)'}, {'frequency': 'f', 'id': 666, 'synset': 'log.n.01', 'synonyms': ['log'], 'def': 'a segment of the trunk of a tree when stripped of branches', 'name': 'log'}, {'frequency': 'c', 'id': 667, 'synset': 'lollipop.n.02', 'synonyms': ['lollipop'], 'def': 'hard candy on a stick', 'name': 'lollipop'}, {'frequency': 'c', 'id': 668, 'synset': 'lotion.n.01', 'synonyms': ['lotion'], 'def': 'any of various cosmetic preparations that are applied to the skin', 'name': 'lotion'}, {'frequency': 'f', 'id': 669, 'synset': 'loudspeaker.n.01', 'synonyms': ['speaker_(stero_equipment)'], 'def': 'electronic device that produces sound often as part of a stereo system', 'name': 'speaker_(stero_equipment)'}, {'frequency': 'c', 'id': 670, 'synset': 'love_seat.n.01', 'synonyms': ['loveseat'], 'def': 'small sofa that seats two people', 'name': 'loveseat'}, {'frequency': 'r', 'id': 671, 'synset': 'machine_gun.n.01', 'synonyms': ['machine_gun'], 'def': 'a rapidly firing automatic gun', 'name': 'machine_gun'}, {'frequency': 'f', 'id': 672, 'synset': 'magazine.n.02', 'synonyms': ['magazine'], 'def': 'a paperback periodic publication', 'name': 'magazine'}, {'frequency': 'f', 'id': 673, 'synset': 'magnet.n.01', 'synonyms': ['magnet'], 'def': 'a device that attracts iron and produces a magnetic field', 'name': 'magnet'}, {'frequency': 'r', 'id': 674, 'synset': 'mail_slot.n.01', 'synonyms': ['mail_slot'], 'def': 'a slot (usually in a door) through which mail can be delivered', 'name': 'mail_slot'}, {'frequency': 'c', 'id': 675, 'synset': 'mailbox.n.01', 'synonyms': ['mailbox_(at_home)', 'letter_box_(at_home)'], 'def': 'a private box for delivery of mail', 'name': 'mailbox_(at_home)'}, {'frequency': 'r', 'id': 676, 'synset': 'mallet.n.01', 'synonyms': ['mallet'], 'def': 'a sports implement with a long handle and a hammer-like head used to hit a ball', 'name': 'mallet'}, {'frequency': 'r', 'id': 677, 'synset': 'mammoth.n.01', 'synonyms': ['mammoth'], 'def': 'any of numerous extinct elephants widely distributed in the Pleistocene', 'name': 'mammoth'}, {'frequency': 'c', 'id': 678, 'synset': 'mandarin.n.05', 'synonyms': ['mandarin_orange'], 'def': 'a somewhat flat reddish-orange loose skinned citrus of China', 'name': 'mandarin_orange'}, {'frequency': 'c', 'id': 679, 'synset': 'manger.n.01', 'synonyms': ['manger', 'trough'], 'def': 'a container (usually in a barn or stable) from which cattle or horses feed', 'name': 'manger'}, {'frequency': 'f', 'id': 680, 'synset': 'manhole.n.01', 'synonyms': ['manhole'], 'def': 'a hole (usually with a flush cover) through which a person can gain access to an underground structure', 'name': 'manhole'}, {'frequency': 'c', 'id': 681, 'synset': 'map.n.01', 'synonyms': ['map'], 'def': "a diagrammatic representation of the earth's surface (or part of it)", 'name': 'map'}, {'frequency': 'c', 'id': 682, 'synset': 'marker.n.03', 'synonyms': ['marker'], 'def': 'a writing implement for making a mark', 'name': 'marker'}, {'frequency': 'r', 'id': 683, 'synset': 'martini.n.01', 'synonyms': ['martini'], 'def': 'a cocktail made of gin (or vodka) with dry vermouth', 'name': 'martini'}, {'frequency': 'r', 'id': 684, 'synset': 'mascot.n.01', 'synonyms': ['mascot'], 'def': 'a person or animal that is adopted by a team or other group as a symbolic figure', 'name': 'mascot'}, {'frequency': 'c', 'id': 685, 'synset': 'mashed_potato.n.01', 'synonyms': ['mashed_potato'], 'def': 'potato that has been peeled and boiled and then mashed', 'name': 'mashed_potato'}, {'frequency': 'r', 'id': 686, 'synset': 'masher.n.02', 'synonyms': ['masher'], 'def': 'a kitchen utensil used for mashing (e.g. potatoes)', 'name': 'masher'}, {'frequency': 'f', 'id': 687, 'synset': 'mask.n.04', 'synonyms': ['mask', 'facemask'], 'def': 'a protective covering worn over the face', 'name': 'mask'}, {'frequency': 'f', 'id': 688, 'synset': 'mast.n.01', 'synonyms': ['mast'], 'def': 'a vertical spar for supporting sails', 'name': 'mast'}, {'frequency': 'c', 'id': 689, 'synset': 'mat.n.03', 'synonyms': ['mat_(gym_equipment)', 'gym_mat'], 'def': 'sports equipment consisting of a piece of thick padding on the floor for gymnastics', 'name': 'mat_(gym_equipment)'}, {'frequency': 'r', 'id': 690, 'synset': 'matchbox.n.01', 'synonyms': ['matchbox'], 'def': 'a box for holding matches', 'name': 'matchbox'}, {'frequency': 'f', 'id': 691, 'synset': 'mattress.n.01', 'synonyms': ['mattress'], 'def': 'a thick pad filled with resilient material used as a bed or part of a bed', 'name': 'mattress'}, {'frequency': 'c', 'id': 692, 'synset': 'measuring_cup.n.01', 'synonyms': ['measuring_cup'], 'def': 'graduated cup used to measure liquid or granular ingredients', 'name': 'measuring_cup'}, {'frequency': 'c', 'id': 693, 'synset': 'measuring_stick.n.01', 'synonyms': ['measuring_stick', 'ruler_(measuring_stick)', 'measuring_rod'], 'def': 'measuring instrument having a sequence of marks at regular intervals', 'name': 'measuring_stick'}, {'frequency': 'c', 'id': 694, 'synset': 'meatball.n.01', 'synonyms': ['meatball'], 'def': 'ground meat formed into a ball and fried or simmered in broth', 'name': 'meatball'}, {'frequency': 'c', 'id': 695, 'synset': 'medicine.n.02', 'synonyms': ['medicine'], 'def': 'something that treats or prevents or alleviates the symptoms of disease', 'name': 'medicine'}, {'frequency': 'r', 'id': 696, 'synset': 'melon.n.01', 'synonyms': ['melon'], 'def': 'fruit of the gourd family having a hard rind and sweet juicy flesh', 'name': 'melon'}, {'frequency': 'f', 'id': 697, 'synset': 'microphone.n.01', 'synonyms': ['microphone'], 'def': 'device for converting sound waves into electrical energy', 'name': 'microphone'}, {'frequency': 'r', 'id': 698, 'synset': 'microscope.n.01', 'synonyms': ['microscope'], 'def': 'magnifier of the image of small objects', 'name': 'microscope'}, {'frequency': 'f', 'id': 699, 'synset': 'microwave.n.02', 'synonyms': ['microwave_oven'], 'def': 'kitchen appliance that cooks food by passing an electromagnetic wave through it', 'name': 'microwave_oven'}, {'frequency': 'r', 'id': 700, 'synset': 'milestone.n.01', 'synonyms': ['milestone', 'milepost'], 'def': 'stone post at side of a road to show distances', 'name': 'milestone'}, {'frequency': 'c', 'id': 701, 'synset': 'milk.n.01', 'synonyms': ['milk'], 'def': 'a white nutritious liquid secreted by mammals and used as food by human beings', 'name': 'milk'}, {'frequency': 'f', 'id': 702, 'synset': 'minivan.n.01', 'synonyms': ['minivan'], 'def': 'a small box-shaped passenger van', 'name': 'minivan'}, {'frequency': 'r', 'id': 703, 'synset': 'mint.n.05', 'synonyms': ['mint_candy'], 'def': 'a candy that is flavored with a mint oil', 'name': 'mint_candy'}, {'frequency': 'f', 'id': 704, 'synset': 'mirror.n.01', 'synonyms': ['mirror'], 'def': 'polished surface that forms images by reflecting light', 'name': 'mirror'}, {'frequency': 'c', 'id': 705, 'synset': 'mitten.n.01', 'synonyms': ['mitten'], 'def': 'glove that encases the thumb separately and the other four fingers together', 'name': 'mitten'}, {'frequency': 'c', 'id': 706, 'synset': 'mixer.n.04', 'synonyms': ['mixer_(kitchen_tool)', 'stand_mixer'], 'def': 'a kitchen utensil that is used for mixing foods', 'name': 'mixer_(kitchen_tool)'}, {'frequency': 'c', 'id': 707, 'synset': 'money.n.03', 'synonyms': ['money'], 'def': 'the official currency issued by a government or national bank', 'name': 'money'}, {'frequency': 'f', 'id': 708, 'synset': 'monitor.n.04', 'synonyms': ['monitor_(computer_equipment) computer_monitor'], 'def': 'a computer monitor', 'name': 'monitor_(computer_equipment) computer_monitor'}, {'frequency': 'c', 'id': 709, 'synset': 'monkey.n.01', 'synonyms': ['monkey'], 'def': 'any of various long-tailed primates', 'name': 'monkey'}, {'frequency': 'f', 'id': 710, 'synset': 'motor.n.01', 'synonyms': ['motor'], 'def': 'machine that converts other forms of energy into mechanical energy and so imparts motion', 'name': 'motor'}, {'frequency': 'f', 'id': 711, 'synset': 'motor_scooter.n.01', 'synonyms': ['motor_scooter', 'scooter'], 'def': 'a wheeled vehicle with small wheels and a low-powered engine', 'name': 'motor_scooter'}, {'frequency': 'r', 'id': 712, 'synset': 'motor_vehicle.n.01', 'synonyms': ['motor_vehicle', 'automotive_vehicle'], 'def': 'a self-propelled wheeled vehicle that does not run on rails', 'name': 'motor_vehicle'}, {'frequency': 'r', 'id': 713, 'synset': 'motorboat.n.01', 'synonyms': ['motorboat', 'powerboat'], 'def': 'a boat propelled by an internal-combustion engine', 'name': 'motorboat'}, {'frequency': 'f', 'id': 714, 'synset': 'motorcycle.n.01', 'synonyms': ['motorcycle'], 'def': 'a motor vehicle with two wheels and a strong frame', 'name': 'motorcycle'}, {'frequency': 'f', 'id': 715, 'synset': 'mound.n.01', 'synonyms': ['mound_(baseball)', "pitcher's_mound"], 'def': '(baseball) the slight elevation on which the pitcher stands', 'name': 'mound_(baseball)'}, {'frequency': 'r', 'id': 716, 'synset': 'mouse.n.01', 'synonyms': ['mouse_(animal_rodent)'], 'def': 'a small rodent with pointed snouts and small ears on elongated bodies with slender usually hairless tails', 'name': 'mouse_(animal_rodent)'}, {'frequency': 'f', 'id': 717, 'synset': 'mouse.n.04', 'synonyms': ['mouse_(computer_equipment)', 'computer_mouse'], 'def': 'a computer input device that controls an on-screen pointer', 'name': 'mouse_(computer_equipment)'}, {'frequency': 'f', 'id': 718, 'synset': 'mousepad.n.01', 'synonyms': ['mousepad'], 'def': 'a small portable pad that provides an operating surface for a computer mouse', 'name': 'mousepad'}, {'frequency': 'c', 'id': 719, 'synset': 'muffin.n.01', 'synonyms': ['muffin'], 'def': 'a sweet quick bread baked in a cup-shaped pan', 'name': 'muffin'}, {'frequency': 'f', 'id': 720, 'synset': 'mug.n.04', 'synonyms': ['mug'], 'def': 'with handle and usually cylindrical', 'name': 'mug'}, {'frequency': 'f', 'id': 721, 'synset': 'mushroom.n.02', 'synonyms': ['mushroom'], 'def': 'a common mushroom', 'name': 'mushroom'}, {'frequency': 'r', 'id': 722, 'synset': 'music_stool.n.01', 'synonyms': ['music_stool', 'piano_stool'], 'def': 'a stool for piano players; usually adjustable in height', 'name': 'music_stool'}, {'frequency': 'r', 'id': 723, 'synset': 'musical_instrument.n.01', 'synonyms': ['musical_instrument', 'instrument_(musical)'], 'def': 'any of various devices or contrivances that can be used to produce musical tones or sounds', 'name': 'musical_instrument'}, {'frequency': 'r', 'id': 724, 'synset': 'nailfile.n.01', 'synonyms': ['nailfile'], 'def': 'a small flat file for shaping the nails', 'name': 'nailfile'}, {'frequency': 'r', 'id': 725, 'synset': 'nameplate.n.01', 'synonyms': ['nameplate'], 'def': 'a plate bearing a name', 'name': 'nameplate'}, {'frequency': 'f', 'id': 726, 'synset': 'napkin.n.01', 'synonyms': ['napkin', 'table_napkin', 'serviette'], 'def': 'a small piece of table linen or paper that is used to wipe the mouth and to cover the lap in order to protect clothing', 'name': 'napkin'}, {'frequency': 'r', 'id': 727, 'synset': 'neckerchief.n.01', 'synonyms': ['neckerchief'], 'def': 'a kerchief worn around the neck', 'name': 'neckerchief'}, {'frequency': 'f', 'id': 728, 'synset': 'necklace.n.01', 'synonyms': ['necklace'], 'def': 'jewelry consisting of a cord or chain (often bearing gems) worn about the neck as an ornament', 'name': 'necklace'}, {'frequency': 'f', 'id': 729, 'synset': 'necktie.n.01', 'synonyms': ['necktie', 'tie_(necktie)'], 'def': 'neckwear consisting of a long narrow piece of material worn under a collar and tied in knot at the front', 'name': 'necktie'}, {'frequency': 'r', 'id': 730, 'synset': 'needle.n.03', 'synonyms': ['needle'], 'def': 'a sharp pointed implement (usually metal)', 'name': 'needle'}, {'frequency': 'c', 'id': 731, 'synset': 'nest.n.01', 'synonyms': ['nest'], 'def': 'a structure in which animals lay eggs or give birth to their young', 'name': 'nest'}, {'frequency': 'r', 'id': 732, 'synset': 'newsstand.n.01', 'synonyms': ['newsstand'], 'def': 'a stall where newspapers and other periodicals are sold', 'name': 'newsstand'}, {'frequency': 'c', 'id': 733, 'synset': 'nightwear.n.01', 'synonyms': ['nightshirt', 'nightwear', 'sleepwear', 'nightclothes'], 'def': 'garments designed to be worn in bed', 'name': 'nightshirt'}, {'frequency': 'r', 'id': 734, 'synset': 'nosebag.n.01', 'synonyms': ['nosebag_(for_animals)', 'feedbag'], 'def': 'a canvas bag that is used to feed an animal (such as a horse); covers the muzzle and fastens at the top of the head', 'name': 'nosebag_(for_animals)'}, {'frequency': 'r', 'id': 735, 'synset': 'noseband.n.01', 'synonyms': ['noseband_(for_animals)', 'nosepiece_(for_animals)'], 'def': "a strap that is the part of a bridle that goes over the animal's nose", 'name': 'noseband_(for_animals)'}, {'frequency': 'f', 'id': 736, 'synset': 'notebook.n.01', 'synonyms': ['notebook'], 'def': 'a book with blank pages for recording notes or memoranda', 'name': 'notebook'}, {'frequency': 'c', 'id': 737, 'synset': 'notepad.n.01', 'synonyms': ['notepad'], 'def': 'a pad of paper for keeping notes', 'name': 'notepad'}, {'frequency': 'c', 'id': 738, 'synset': 'nut.n.03', 'synonyms': ['nut'], 'def': 'a small metal block (usually square or hexagonal) with internal screw thread to be fitted onto a bolt', 'name': 'nut'}, {'frequency': 'r', 'id': 739, 'synset': 'nutcracker.n.01', 'synonyms': ['nutcracker'], 'def': 'a hand tool used to crack nuts open', 'name': 'nutcracker'}, {'frequency': 'c', 'id': 740, 'synset': 'oar.n.01', 'synonyms': ['oar'], 'def': 'an implement used to propel or steer a boat', 'name': 'oar'}, {'frequency': 'r', 'id': 741, 'synset': 'octopus.n.01', 'synonyms': ['octopus_(food)'], 'def': 'tentacles of octopus prepared as food', 'name': 'octopus_(food)'}, {'frequency': 'r', 'id': 742, 'synset': 'octopus.n.02', 'synonyms': ['octopus_(animal)'], 'def': 'bottom-living cephalopod having a soft oval body with eight long tentacles', 'name': 'octopus_(animal)'}, {'frequency': 'c', 'id': 743, 'synset': 'oil_lamp.n.01', 'synonyms': ['oil_lamp', 'kerosene_lamp', 'kerosine_lamp'], 'def': 'a lamp that burns oil (as kerosine) for light', 'name': 'oil_lamp'}, {'frequency': 'c', 'id': 744, 'synset': 'olive_oil.n.01', 'synonyms': ['olive_oil'], 'def': 'oil from olives', 'name': 'olive_oil'}, {'frequency': 'r', 'id': 745, 'synset': 'omelet.n.01', 'synonyms': ['omelet', 'omelette'], 'def': 'beaten eggs cooked until just set; may be folded around e.g. ham or cheese or jelly', 'name': 'omelet'}, {'frequency': 'f', 'id': 746, 'synset': 'onion.n.01', 'synonyms': ['onion'], 'def': 'the bulb of an onion plant', 'name': 'onion'}, {'frequency': 'f', 'id': 747, 'synset': 'orange.n.01', 'synonyms': ['orange_(fruit)'], 'def': 'orange (FRUIT of an orange tree)', 'name': 'orange_(fruit)'}, {'frequency': 'c', 'id': 748, 'synset': 'orange_juice.n.01', 'synonyms': ['orange_juice'], 'def': 'bottled or freshly squeezed juice of oranges', 'name': 'orange_juice'}, {'frequency': 'r', 'id': 749, 'synset': 'oregano.n.01', 'synonyms': ['oregano', 'marjoram'], 'def': 'aromatic Eurasian perennial herb used in cooking and baking', 'name': 'oregano'}, {'frequency': 'c', 'id': 750, 'synset': 'ostrich.n.02', 'synonyms': ['ostrich'], 'def': 'fast-running African flightless bird with two-toed feet; largest living bird', 'name': 'ostrich'}, {'frequency': 'c', 'id': 751, 'synset': 'ottoman.n.03', 'synonyms': ['ottoman', 'pouf', 'pouffe', 'hassock'], 'def': 'thick cushion used as a seat', 'name': 'ottoman'}, {'frequency': 'c', 'id': 752, 'synset': 'overall.n.01', 'synonyms': ['overalls_(clothing)'], 'def': 'work clothing consisting of denim trousers usually with a bib and shoulder straps', 'name': 'overalls_(clothing)'}, {'frequency': 'c', 'id': 753, 'synset': 'owl.n.01', 'synonyms': ['owl'], 'def': 'nocturnal bird of prey with hawk-like beak and claws and large head with front-facing eyes', 'name': 'owl'}, {'frequency': 'c', 'id': 754, 'synset': 'packet.n.03', 'synonyms': ['packet'], 'def': 'a small package or bundle', 'name': 'packet'}, {'frequency': 'r', 'id': 755, 'synset': 'pad.n.03', 'synonyms': ['inkpad', 'inking_pad', 'stamp_pad'], 'def': 'absorbent material saturated with ink used to transfer ink evenly to a rubber stamp', 'name': 'inkpad'}, {'frequency': 'c', 'id': 756, 'synset': 'pad.n.04', 'synonyms': ['pad'], 'def': 'a flat mass of soft material used for protection, stuffing, or comfort', 'name': 'pad'}, {'frequency': 'c', 'id': 757, 'synset': 'paddle.n.04', 'synonyms': ['paddle', 'boat_paddle'], 'def': 'a short light oar used without an oarlock to propel a canoe or small boat', 'name': 'paddle'}, {'frequency': 'c', 'id': 758, 'synset': 'padlock.n.01', 'synonyms': ['padlock'], 'def': 'a detachable, portable lock', 'name': 'padlock'}, {'frequency': 'r', 'id': 759, 'synset': 'paintbox.n.01', 'synonyms': ['paintbox'], 'def': "a box containing a collection of cubes or tubes of artists' paint", 'name': 'paintbox'}, {'frequency': 'c', 'id': 760, 'synset': 'paintbrush.n.01', 'synonyms': ['paintbrush'], 'def': 'a brush used as an applicator to apply paint', 'name': 'paintbrush'}, {'frequency': 'f', 'id': 761, 'synset': 'painting.n.01', 'synonyms': ['painting'], 'def': 'graphic art consisting of an artistic composition made by applying paints to a surface', 'name': 'painting'}, {'frequency': 'c', 'id': 762, 'synset': 'pajama.n.02', 'synonyms': ['pajamas', 'pyjamas'], 'def': 'loose-fitting nightclothes worn for sleeping or lounging', 'name': 'pajamas'}, {'frequency': 'c', 'id': 763, 'synset': 'palette.n.02', 'synonyms': ['palette', 'pallet'], 'def': 'board that provides a flat surface on which artists mix paints and the range of colors used', 'name': 'palette'}, {'frequency': 'f', 'id': 764, 'synset': 'pan.n.01', 'synonyms': ['pan_(for_cooking)', 'cooking_pan'], 'def': 'cooking utensil consisting of a wide metal vessel', 'name': 'pan_(for_cooking)'}, {'frequency': 'r', 'id': 765, 'synset': 'pan.n.03', 'synonyms': ['pan_(metal_container)'], 'def': 'shallow container made of metal', 'name': 'pan_(metal_container)'}, {'frequency': 'c', 'id': 766, 'synset': 'pancake.n.01', 'synonyms': ['pancake'], 'def': 'a flat cake of thin batter fried on both sides on a griddle', 'name': 'pancake'}, {'frequency': 'r', 'id': 767, 'synset': 'pantyhose.n.01', 'synonyms': ['pantyhose'], 'def': "a woman's tights consisting of underpants and stockings", 'name': 'pantyhose'}, {'frequency': 'r', 'id': 768, 'synset': 'papaya.n.02', 'synonyms': ['papaya'], 'def': 'large oval melon-like tropical fruit with yellowish flesh', 'name': 'papaya'}, {'frequency': 'r', 'id': 769, 'synset': 'paper_clip.n.01', 'synonyms': ['paperclip'], 'def': 'a wire or plastic clip for holding sheets of paper together', 'name': 'paperclip'}, {'frequency': 'f', 'id': 770, 'synset': 'paper_plate.n.01', 'synonyms': ['paper_plate'], 'def': 'a disposable plate made of cardboard', 'name': 'paper_plate'}, {'frequency': 'f', 'id': 771, 'synset': 'paper_towel.n.01', 'synonyms': ['paper_towel'], 'def': 'a disposable towel made of absorbent paper', 'name': 'paper_towel'}, {'frequency': 'r', 'id': 772, 'synset': 'paperback_book.n.01', 'synonyms': ['paperback_book', 'paper-back_book', 'softback_book', 'soft-cover_book'], 'def': 'a book with paper covers', 'name': 'paperback_book'}, {'frequency': 'r', 'id': 773, 'synset': 'paperweight.n.01', 'synonyms': ['paperweight'], 'def': 'a weight used to hold down a stack of papers', 'name': 'paperweight'}, {'frequency': 'c', 'id': 774, 'synset': 'parachute.n.01', 'synonyms': ['parachute'], 'def': 'rescue equipment consisting of a device that fills with air and retards your fall', 'name': 'parachute'}, {'frequency': 'r', 'id': 775, 'synset': 'parakeet.n.01', 'synonyms': ['parakeet', 'parrakeet', 'parroket', 'paraquet', 'paroquet', 'parroquet'], 'def': 'any of numerous small slender long-tailed parrots', 'name': 'parakeet'}, {'frequency': 'c', 'id': 776, 'synset': 'parasail.n.01', 'synonyms': ['parasail_(sports)'], 'def': 'parachute that will lift a person up into the air when it is towed by a motorboat or a car', 'name': 'parasail_(sports)'}, {'frequency': 'r', 'id': 777, 'synset': 'parchment.n.01', 'synonyms': ['parchment'], 'def': 'a superior paper resembling sheepskin', 'name': 'parchment'}, {'frequency': 'r', 'id': 778, 'synset': 'parka.n.01', 'synonyms': ['parka', 'anorak'], 'def': "a kind of heavy jacket (`windcheater' is a British term)", 'name': 'parka'}, {'frequency': 'f', 'id': 779, 'synset': 'parking_meter.n.01', 'synonyms': ['parking_meter'], 'def': 'a coin-operated timer located next to a parking space', 'name': 'parking_meter'}, {'frequency': 'c', 'id': 780, 'synset': 'parrot.n.01', 'synonyms': ['parrot'], 'def': 'usually brightly colored tropical birds with short hooked beaks and the ability to mimic sounds', 'name': 'parrot'}, {'frequency': 'c', 'id': 781, 'synset': 'passenger_car.n.01', 'synonyms': ['passenger_car_(part_of_a_train)', 'coach_(part_of_a_train)'], 'def': 'a railcar where passengers ride', 'name': 'passenger_car_(part_of_a_train)'}, {'frequency': 'r', 'id': 782, 'synset': 'passenger_ship.n.01', 'synonyms': ['passenger_ship'], 'def': 'a ship built to carry passengers', 'name': 'passenger_ship'}, {'frequency': 'r', 'id': 783, 'synset': 'passport.n.02', 'synonyms': ['passport'], 'def': 'a document issued by a country to a citizen allowing that person to travel abroad and re-enter the home country', 'name': 'passport'}, {'frequency': 'f', 'id': 784, 'synset': 'pastry.n.02', 'synonyms': ['pastry'], 'def': 'any of various baked foods made of dough or batter', 'name': 'pastry'}, {'frequency': 'r', 'id': 785, 'synset': 'patty.n.01', 'synonyms': ['patty_(food)'], 'def': 'small flat mass of chopped food', 'name': 'patty_(food)'}, {'frequency': 'c', 'id': 786, 'synset': 'pea.n.01', 'synonyms': ['pea_(food)'], 'def': 'seed of a pea plant used for food', 'name': 'pea_(food)'}, {'frequency': 'c', 'id': 787, 'synset': 'peach.n.03', 'synonyms': ['peach'], 'def': 'downy juicy fruit with sweet yellowish or whitish flesh', 'name': 'peach'}, {'frequency': 'c', 'id': 788, 'synset': 'peanut_butter.n.01', 'synonyms': ['peanut_butter'], 'def': 'a spread made from ground peanuts', 'name': 'peanut_butter'}, {'frequency': 'c', 'id': 789, 'synset': 'pear.n.01', 'synonyms': ['pear'], 'def': 'sweet juicy gritty-textured fruit available in many varieties', 'name': 'pear'}, {'frequency': 'r', 'id': 790, 'synset': 'peeler.n.03', 'synonyms': ['peeler_(tool_for_fruit_and_vegetables)'], 'def': 'a device for peeling vegetables or fruits', 'name': 'peeler_(tool_for_fruit_and_vegetables)'}, {'frequency': 'r', 'id': 791, 'synset': 'pegboard.n.01', 'synonyms': ['pegboard'], 'def': 'a board perforated with regularly spaced holes into which pegs can be fitted', 'name': 'pegboard'}, {'frequency': 'c', 'id': 792, 'synset': 'pelican.n.01', 'synonyms': ['pelican'], 'def': 'large long-winged warm-water seabird having a large bill with a distensible pouch for fish', 'name': 'pelican'}, {'frequency': 'f', 'id': 793, 'synset': 'pen.n.01', 'synonyms': ['pen'], 'def': 'a writing implement with a point from which ink flows', 'name': 'pen'}, {'frequency': 'c', 'id': 794, 'synset': 'pencil.n.01', 'synonyms': ['pencil'], 'def': 'a thin cylindrical pointed writing implement made of wood and graphite', 'name': 'pencil'}, {'frequency': 'r', 'id': 795, 'synset': 'pencil_box.n.01', 'synonyms': ['pencil_box', 'pencil_case'], 'def': 'a box for holding pencils', 'name': 'pencil_box'}, {'frequency': 'r', 'id': 796, 'synset': 'pencil_sharpener.n.01', 'synonyms': ['pencil_sharpener'], 'def': 'a rotary implement for sharpening the point on pencils', 'name': 'pencil_sharpener'}, {'frequency': 'r', 'id': 797, 'synset': 'pendulum.n.01', 'synonyms': ['pendulum'], 'def': 'an apparatus consisting of an object mounted so that it swings freely under the influence of gravity', 'name': 'pendulum'}, {'frequency': 'c', 'id': 798, 'synset': 'penguin.n.01', 'synonyms': ['penguin'], 'def': 'short-legged flightless birds of cold southern regions having webbed feet and wings modified as flippers', 'name': 'penguin'}, {'frequency': 'r', 'id': 799, 'synset': 'pennant.n.02', 'synonyms': ['pennant'], 'def': 'a flag longer than it is wide (and often tapering)', 'name': 'pennant'}, {'frequency': 'r', 'id': 800, 'synset': 'penny.n.02', 'synonyms': ['penny_(coin)'], 'def': 'a coin worth one-hundredth of the value of the basic unit', 'name': 'penny_(coin)'}, {'frequency': 'c', 'id': 801, 'synset': 'pepper.n.03', 'synonyms': ['pepper', 'peppercorn'], 'def': 'pungent seasoning from the berry of the common pepper plant; whole or ground', 'name': 'pepper'}, {'frequency': 'c', 'id': 802, 'synset': 'pepper_mill.n.01', 'synonyms': ['pepper_mill', 'pepper_grinder'], 'def': 'a mill for grinding pepper', 'name': 'pepper_mill'}, {'frequency': 'c', 'id': 803, 'synset': 'perfume.n.02', 'synonyms': ['perfume'], 'def': 'a toiletry that emits and diffuses a fragrant odor', 'name': 'perfume'}, {'frequency': 'r', 'id': 804, 'synset': 'persimmon.n.02', 'synonyms': ['persimmon'], 'def': 'orange fruit resembling a plum; edible when fully ripe', 'name': 'persimmon'}, {'frequency': 'f', 'id': 805, 'synset': 'person.n.01', 'synonyms': ['baby', 'child', 'boy', 'girl', 'man', 'woman', 'person', 'human'], 'def': 'a human being', 'name': 'baby'}, {'frequency': 'r', 'id': 806, 'synset': 'pet.n.01', 'synonyms': ['pet'], 'def': 'a domesticated animal kept for companionship or amusement', 'name': 'pet'}, {'frequency': 'r', 'id': 807, 'synset': 'petfood.n.01', 'synonyms': ['petfood', 'pet-food'], 'def': 'food prepared for animal pets', 'name': 'petfood'}, {'frequency': 'r', 'id': 808, 'synset': 'pew.n.01', 'synonyms': ['pew_(church_bench)', 'church_bench'], 'def': 'long bench with backs; used in church by the congregation', 'name': 'pew_(church_bench)'}, {'frequency': 'r', 'id': 809, 'synset': 'phonebook.n.01', 'synonyms': ['phonebook', 'telephone_book', 'telephone_directory'], 'def': 'a directory containing an alphabetical list of telephone subscribers and their telephone numbers', 'name': 'phonebook'}, {'frequency': 'c', 'id': 810, 'synset': 'phonograph_record.n.01', 'synonyms': ['phonograph_record', 'phonograph_recording', 'record_(phonograph_recording)'], 'def': 'sound recording consisting of a typically black disk with a continuous groove', 'name': 'phonograph_record'}, {'frequency': 'c', 'id': 811, 'synset': 'piano.n.01', 'synonyms': ['piano'], 'def': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'name': 'piano'}, {'frequency': 'f', 'id': 812, 'synset': 'pickle.n.01', 'synonyms': ['pickle'], 'def': 'vegetables (especially cucumbers) preserved in brine or vinegar', 'name': 'pickle'}, {'frequency': 'f', 'id': 813, 'synset': 'pickup.n.01', 'synonyms': ['pickup_truck'], 'def': 'a light truck with an open body and low sides and a tailboard', 'name': 'pickup_truck'}, {'frequency': 'c', 'id': 814, 'synset': 'pie.n.01', 'synonyms': ['pie'], 'def': 'dish baked in pastry-lined pan often with a pastry top', 'name': 'pie'}, {'frequency': 'c', 'id': 815, 'synset': 'pigeon.n.01', 'synonyms': ['pigeon'], 'def': 'wild and domesticated birds having a heavy body and short legs', 'name': 'pigeon'}, {'frequency': 'r', 'id': 816, 'synset': 'piggy_bank.n.01', 'synonyms': ['piggy_bank', 'penny_bank'], 'def': "a child's coin bank (often shaped like a pig)", 'name': 'piggy_bank'}, {'frequency': 'f', 'id': 817, 'synset': 'pillow.n.01', 'synonyms': ['pillow'], 'def': 'a cushion to support the head of a sleeping person', 'name': 'pillow'}, {'frequency': 'r', 'id': 818, 'synset': 'pin.n.09', 'synonyms': ['pin_(non_jewelry)'], 'def': 'a small slender (often pointed) piece of wood or metal used to support or fasten or attach things', 'name': 'pin_(non_jewelry)'}, {'frequency': 'f', 'id': 819, 'synset': 'pineapple.n.02', 'synonyms': ['pineapple'], 'def': 'large sweet fleshy tropical fruit with a tuft of stiff leaves', 'name': 'pineapple'}, {'frequency': 'c', 'id': 820, 'synset': 'pinecone.n.01', 'synonyms': ['pinecone'], 'def': 'the seed-producing cone of a pine tree', 'name': 'pinecone'}, {'frequency': 'r', 'id': 821, 'synset': 'ping-pong_ball.n.01', 'synonyms': ['ping-pong_ball'], 'def': 'light hollow ball used in playing table tennis', 'name': 'ping-pong_ball'}, {'frequency': 'r', 'id': 822, 'synset': 'pinwheel.n.03', 'synonyms': ['pinwheel'], 'def': 'a toy consisting of vanes of colored paper or plastic that is pinned to a stick and spins when it is pointed into the wind', 'name': 'pinwheel'}, {'frequency': 'r', 'id': 823, 'synset': 'pipe.n.01', 'synonyms': ['tobacco_pipe'], 'def': 'a tube with a small bowl at one end; used for smoking tobacco', 'name': 'tobacco_pipe'}, {'frequency': 'f', 'id': 824, 'synset': 'pipe.n.02', 'synonyms': ['pipe', 'piping'], 'def': 'a long tube made of metal or plastic that is used to carry water or oil or gas etc.', 'name': 'pipe'}, {'frequency': 'r', 'id': 825, 'synset': 'pistol.n.01', 'synonyms': ['pistol', 'handgun'], 'def': 'a firearm that is held and fired with one hand', 'name': 'pistol'}, {'frequency': 'r', 'id': 826, 'synset': 'pita.n.01', 'synonyms': ['pita_(bread)', 'pocket_bread'], 'def': 'usually small round bread that can open into a pocket for filling', 'name': 'pita_(bread)'}, {'frequency': 'f', 'id': 827, 'synset': 'pitcher.n.02', 'synonyms': ['pitcher_(vessel_for_liquid)', 'ewer'], 'def': 'an open vessel with a handle and a spout for pouring', 'name': 'pitcher_(vessel_for_liquid)'}, {'frequency': 'r', 'id': 828, 'synset': 'pitchfork.n.01', 'synonyms': ['pitchfork'], 'def': 'a long-handled hand tool with sharp widely spaced prongs for lifting and pitching hay', 'name': 'pitchfork'}, {'frequency': 'f', 'id': 829, 'synset': 'pizza.n.01', 'synonyms': ['pizza'], 'def': 'Italian open pie made of thin bread dough spread with a spiced mixture of e.g. tomato sauce and cheese', 'name': 'pizza'}, {'frequency': 'f', 'id': 830, 'synset': 'place_mat.n.01', 'synonyms': ['place_mat'], 'def': 'a mat placed on a table for an individual place setting', 'name': 'place_mat'}, {'frequency': 'f', 'id': 831, 'synset': 'plate.n.04', 'synonyms': ['plate'], 'def': 'dish on which food is served or from which food is eaten', 'name': 'plate'}, {'frequency': 'c', 'id': 832, 'synset': 'platter.n.01', 'synonyms': ['platter'], 'def': 'a large shallow dish used for serving food', 'name': 'platter'}, {'frequency': 'r', 'id': 833, 'synset': 'playing_card.n.01', 'synonyms': ['playing_card'], 'def': 'one of a pack of cards that are used to play card games', 'name': 'playing_card'}, {'frequency': 'r', 'id': 834, 'synset': 'playpen.n.01', 'synonyms': ['playpen'], 'def': 'a portable enclosure in which babies may be left to play', 'name': 'playpen'}, {'frequency': 'c', 'id': 835, 'synset': 'pliers.n.01', 'synonyms': ['pliers', 'plyers'], 'def': 'a gripping hand tool with two hinged arms and (usually) serrated jaws', 'name': 'pliers'}, {'frequency': 'r', 'id': 836, 'synset': 'plow.n.01', 'synonyms': ['plow_(farm_equipment)', 'plough_(farm_equipment)'], 'def': 'a farm tool having one or more heavy blades to break the soil and cut a furrow prior to sowing', 'name': 'plow_(farm_equipment)'}, {'frequency': 'r', 'id': 837, 'synset': 'pocket_watch.n.01', 'synonyms': ['pocket_watch'], 'def': 'a watch that is carried in a small watch pocket', 'name': 'pocket_watch'}, {'frequency': 'c', 'id': 838, 'synset': 'pocketknife.n.01', 'synonyms': ['pocketknife'], 'def': 'a knife with a blade that folds into the handle; suitable for carrying in the pocket', 'name': 'pocketknife'}, {'frequency': 'c', 'id': 839, 'synset': 'poker.n.01', 'synonyms': ['poker_(fire_stirring_tool)', 'stove_poker', 'fire_hook'], 'def': 'fire iron consisting of a metal rod with a handle; used to stir a fire', 'name': 'poker_(fire_stirring_tool)'}, {'frequency': 'f', 'id': 840, 'synset': 'pole.n.01', 'synonyms': ['pole', 'post'], 'def': 'a long (usually round) rod of wood or metal or plastic', 'name': 'pole'}, {'frequency': 'r', 'id': 841, 'synset': 'police_van.n.01', 'synonyms': ['police_van', 'police_wagon', 'paddy_wagon', 'patrol_wagon'], 'def': 'van used by police to transport prisoners', 'name': 'police_van'}, {'frequency': 'f', 'id': 842, 'synset': 'polo_shirt.n.01', 'synonyms': ['polo_shirt', 'sport_shirt'], 'def': 'a shirt with short sleeves designed for comfort and casual wear', 'name': 'polo_shirt'}, {'frequency': 'r', 'id': 843, 'synset': 'poncho.n.01', 'synonyms': ['poncho'], 'def': 'a blanket-like cloak with a hole in the center for the head', 'name': 'poncho'}, {'frequency': 'c', 'id': 844, 'synset': 'pony.n.05', 'synonyms': ['pony'], 'def': 'any of various breeds of small gentle horses usually less than five feet high at the shoulder', 'name': 'pony'}, {'frequency': 'r', 'id': 845, 'synset': 'pool_table.n.01', 'synonyms': ['pool_table', 'billiard_table', 'snooker_table'], 'def': 'game equipment consisting of a heavy table on which pool is played', 'name': 'pool_table'}, {'frequency': 'f', 'id': 846, 'synset': 'pop.n.02', 'synonyms': ['pop_(soda)', 'soda_(pop)', 'tonic', 'soft_drink'], 'def': 'a sweet drink containing carbonated water and flavoring', 'name': 'pop_(soda)'}, {'frequency': 'r', 'id': 847, 'synset': 'portrait.n.02', 'synonyms': ['portrait', 'portrayal'], 'def': 'any likeness of a person, in any medium', 'name': 'portrait'}, {'frequency': 'c', 'id': 848, 'synset': 'postbox.n.01', 'synonyms': ['postbox_(public)', 'mailbox_(public)'], 'def': 'public box for deposit of mail', 'name': 'postbox_(public)'}, {'frequency': 'c', 'id': 849, 'synset': 'postcard.n.01', 'synonyms': ['postcard', 'postal_card', 'mailing-card'], 'def': 'a card for sending messages by post without an envelope', 'name': 'postcard'}, {'frequency': 'f', 'id': 850, 'synset': 'poster.n.01', 'synonyms': ['poster', 'placard'], 'def': 'a sign posted in a public place as an advertisement', 'name': 'poster'}, {'frequency': 'f', 'id': 851, 'synset': 'pot.n.01', 'synonyms': ['pot'], 'def': 'metal or earthenware cooking vessel that is usually round and deep; often has a handle and lid', 'name': 'pot'}, {'frequency': 'f', 'id': 852, 'synset': 'pot.n.04', 'synonyms': ['flowerpot'], 'def': 'a container in which plants are cultivated', 'name': 'flowerpot'}, {'frequency': 'f', 'id': 853, 'synset': 'potato.n.01', 'synonyms': ['potato'], 'def': 'an edible tuber native to South America', 'name': 'potato'}, {'frequency': 'c', 'id': 854, 'synset': 'potholder.n.01', 'synonyms': ['potholder'], 'def': 'an insulated pad for holding hot pots', 'name': 'potholder'}, {'frequency': 'c', 'id': 855, 'synset': 'pottery.n.01', 'synonyms': ['pottery', 'clayware'], 'def': 'ceramic ware made from clay and baked in a kiln', 'name': 'pottery'}, {'frequency': 'c', 'id': 856, 'synset': 'pouch.n.01', 'synonyms': ['pouch'], 'def': 'a small or medium size container for holding or carrying things', 'name': 'pouch'}, {'frequency': 'r', 'id': 857, 'synset': 'power_shovel.n.01', 'synonyms': ['power_shovel', 'excavator', 'digger'], 'def': 'a machine for excavating', 'name': 'power_shovel'}, {'frequency': 'c', 'id': 858, 'synset': 'prawn.n.01', 'synonyms': ['prawn', 'shrimp'], 'def': 'any of various edible decapod crustaceans', 'name': 'prawn'}, {'frequency': 'f', 'id': 859, 'synset': 'printer.n.03', 'synonyms': ['printer', 'printing_machine'], 'def': 'a machine that prints', 'name': 'printer'}, {'frequency': 'c', 'id': 860, 'synset': 'projectile.n.01', 'synonyms': ['projectile_(weapon)', 'missile'], 'def': 'a weapon that is forcibly thrown or projected at a targets', 'name': 'projectile_(weapon)'}, {'frequency': 'c', 'id': 861, 'synset': 'projector.n.02', 'synonyms': ['projector'], 'def': 'an optical instrument that projects an enlarged image onto a screen', 'name': 'projector'}, {'frequency': 'f', 'id': 862, 'synset': 'propeller.n.01', 'synonyms': ['propeller', 'propellor'], 'def': 'a mechanical device that rotates to push against air or water', 'name': 'propeller'}, {'frequency': 'r', 'id': 863, 'synset': 'prune.n.01', 'synonyms': ['prune'], 'def': 'dried plum', 'name': 'prune'}, {'frequency': 'r', 'id': 864, 'synset': 'pudding.n.01', 'synonyms': ['pudding'], 'def': 'any of various soft thick unsweetened baked dishes', 'name': 'pudding'}, {'frequency': 'r', 'id': 865, 'synset': 'puffer.n.02', 'synonyms': ['puffer_(fish)', 'pufferfish', 'blowfish', 'globefish'], 'def': 'fishes whose elongated spiny body can inflate itself with water or air to form a globe', 'name': 'puffer_(fish)'}, {'frequency': 'r', 'id': 866, 'synset': 'puffin.n.01', 'synonyms': ['puffin'], 'def': 'seabirds having short necks and brightly colored compressed bills', 'name': 'puffin'}, {'frequency': 'r', 'id': 867, 'synset': 'pug.n.01', 'synonyms': ['pug-dog'], 'def': 'small compact smooth-coated breed of Asiatic origin having a tightly curled tail and broad flat wrinkled muzzle', 'name': 'pug-dog'}, {'frequency': 'c', 'id': 868, 'synset': 'pumpkin.n.02', 'synonyms': ['pumpkin'], 'def': 'usually large pulpy deep-yellow round fruit of the squash family maturing in late summer or early autumn', 'name': 'pumpkin'}, {'frequency': 'r', 'id': 869, 'synset': 'punch.n.03', 'synonyms': ['puncher'], 'def': 'a tool for making holes or indentations', 'name': 'puncher'}, {'frequency': 'r', 'id': 870, 'synset': 'puppet.n.01', 'synonyms': ['puppet', 'marionette'], 'def': 'a small figure of a person operated from above with strings by a puppeteer', 'name': 'puppet'}, {'frequency': 'r', 'id': 871, 'synset': 'puppy.n.01', 'synonyms': ['puppy'], 'def': 'a young dog', 'name': 'puppy'}, {'frequency': 'r', 'id': 872, 'synset': 'quesadilla.n.01', 'synonyms': ['quesadilla'], 'def': 'a tortilla that is filled with cheese and heated', 'name': 'quesadilla'}, {'frequency': 'r', 'id': 873, 'synset': 'quiche.n.02', 'synonyms': ['quiche'], 'def': 'a tart filled with rich unsweetened custard; often contains other ingredients (as cheese or ham or seafood or vegetables)', 'name': 'quiche'}, {'frequency': 'f', 'id': 874, 'synset': 'quilt.n.01', 'synonyms': ['quilt', 'comforter'], 'def': 'bedding made of two layers of cloth filled with stuffing and stitched together', 'name': 'quilt'}, {'frequency': 'c', 'id': 875, 'synset': 'rabbit.n.01', 'synonyms': ['rabbit'], 'def': 'any of various burrowing animals of the family Leporidae having long ears and short tails', 'name': 'rabbit'}, {'frequency': 'r', 'id': 876, 'synset': 'racer.n.02', 'synonyms': ['race_car', 'racing_car'], 'def': 'a fast car that competes in races', 'name': 'race_car'}, {'frequency': 'c', 'id': 877, 'synset': 'racket.n.04', 'synonyms': ['racket', 'racquet'], 'def': 'a sports implement used to strike a ball in various games', 'name': 'racket'}, {'frequency': 'r', 'id': 878, 'synset': 'radar.n.01', 'synonyms': ['radar'], 'def': 'measuring instrument in which the echo of a pulse of microwave radiation is used to detect and locate distant objects', 'name': 'radar'}, {'frequency': 'c', 'id': 879, 'synset': 'radiator.n.03', 'synonyms': ['radiator'], 'def': 'a mechanism consisting of a metal honeycomb through which hot fluids circulate', 'name': 'radiator'}, {'frequency': 'c', 'id': 880, 'synset': 'radio_receiver.n.01', 'synonyms': ['radio_receiver', 'radio_set', 'radio', 'tuner_(radio)'], 'def': 'an electronic receiver that detects and demodulates and amplifies transmitted radio signals', 'name': 'radio_receiver'}, {'frequency': 'c', 'id': 881, 'synset': 'radish.n.03', 'synonyms': ['radish', 'daikon'], 'def': 'pungent edible root of any of various cultivated radish plants', 'name': 'radish'}, {'frequency': 'c', 'id': 882, 'synset': 'raft.n.01', 'synonyms': ['raft'], 'def': 'a flat float (usually made of logs or planks) that can be used for transport or as a platform for swimmers', 'name': 'raft'}, {'frequency': 'r', 'id': 883, 'synset': 'rag_doll.n.01', 'synonyms': ['rag_doll'], 'def': 'a cloth doll that is stuffed and (usually) painted', 'name': 'rag_doll'}, {'frequency': 'c', 'id': 884, 'synset': 'raincoat.n.01', 'synonyms': ['raincoat', 'waterproof_jacket'], 'def': 'a water-resistant coat', 'name': 'raincoat'}, {'frequency': 'c', 'id': 885, 'synset': 'ram.n.05', 'synonyms': ['ram_(animal)'], 'def': 'uncastrated adult male sheep', 'name': 'ram_(animal)'}, {'frequency': 'c', 'id': 886, 'synset': 'raspberry.n.02', 'synonyms': ['raspberry'], 'def': 'red or black edible aggregate berries usually smaller than the related blackberries', 'name': 'raspberry'}, {'frequency': 'r', 'id': 887, 'synset': 'rat.n.01', 'synonyms': ['rat'], 'def': 'any of various long-tailed rodents similar to but larger than a mouse', 'name': 'rat'}, {'frequency': 'c', 'id': 888, 'synset': 'razorblade.n.01', 'synonyms': ['razorblade'], 'def': 'a blade that has very sharp edge', 'name': 'razorblade'}, {'frequency': 'c', 'id': 889, 'synset': 'reamer.n.01', 'synonyms': ['reamer_(juicer)', 'juicer', 'juice_reamer'], 'def': 'a squeezer with a conical ridged center that is used for squeezing juice from citrus fruit', 'name': 'reamer_(juicer)'}, {'frequency': 'f', 'id': 890, 'synset': 'rearview_mirror.n.01', 'synonyms': ['rearview_mirror'], 'def': 'car mirror that reflects the view out of the rear window', 'name': 'rearview_mirror'}, {'frequency': 'c', 'id': 891, 'synset': 'receipt.n.02', 'synonyms': ['receipt'], 'def': 'an acknowledgment (usually tangible) that payment has been made', 'name': 'receipt'}, {'frequency': 'c', 'id': 892, 'synset': 'recliner.n.01', 'synonyms': ['recliner', 'reclining_chair', 'lounger_(chair)'], 'def': 'an armchair whose back can be lowered and foot can be raised to allow the sitter to recline in it', 'name': 'recliner'}, {'frequency': 'r', 'id': 893, 'synset': 'record_player.n.01', 'synonyms': ['record_player', 'phonograph_(record_player)', 'turntable'], 'def': 'machine in which rotating records cause a stylus to vibrate and the vibrations are amplified acoustically or electronically', 'name': 'record_player'}, {'frequency': 'r', 'id': 894, 'synset': 'red_cabbage.n.02', 'synonyms': ['red_cabbage'], 'def': 'compact head of purplish-red leaves', 'name': 'red_cabbage'}, {'frequency': 'f', 'id': 895, 'synset': 'reflector.n.01', 'synonyms': ['reflector'], 'def': 'device that reflects light, radiation, etc.', 'name': 'reflector'}, {'frequency': 'f', 'id': 896, 'synset': 'remote_control.n.01', 'synonyms': ['remote_control'], 'def': 'a device that can be used to control a machine or apparatus from a distance', 'name': 'remote_control'}, {'frequency': 'c', 'id': 897, 'synset': 'rhinoceros.n.01', 'synonyms': ['rhinoceros'], 'def': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'name': 'rhinoceros'}, {'frequency': 'r', 'id': 898, 'synset': 'rib.n.03', 'synonyms': ['rib_(food)'], 'def': 'cut of meat including one or more ribs', 'name': 'rib_(food)'}, {'frequency': 'r', 'id': 899, 'synset': 'rifle.n.01', 'synonyms': ['rifle'], 'def': 'a shoulder firearm with a long barrel', 'name': 'rifle'}, {'frequency': 'f', 'id': 900, 'synset': 'ring.n.08', 'synonyms': ['ring'], 'def': 'jewelry consisting of a circlet of precious metal (often set with jewels) worn on the finger', 'name': 'ring'}, {'frequency': 'r', 'id': 901, 'synset': 'river_boat.n.01', 'synonyms': ['river_boat'], 'def': 'a boat used on rivers or to ply a river', 'name': 'river_boat'}, {'frequency': 'r', 'id': 902, 'synset': 'road_map.n.02', 'synonyms': ['road_map'], 'def': '(NOT A ROAD) a MAP showing roads (for automobile travel)', 'name': 'road_map'}, {'frequency': 'c', 'id': 903, 'synset': 'robe.n.01', 'synonyms': ['robe'], 'def': 'any loose flowing garment', 'name': 'robe'}, {'frequency': 'c', 'id': 904, 'synset': 'rocking_chair.n.01', 'synonyms': ['rocking_chair'], 'def': 'a chair mounted on rockers', 'name': 'rocking_chair'}, {'frequency': 'r', 'id': 905, 'synset': 'roller_skate.n.01', 'synonyms': ['roller_skate'], 'def': 'a shoe with pairs of rollers (small hard wheels) fixed to the sole', 'name': 'roller_skate'}, {'frequency': 'r', 'id': 906, 'synset': 'rollerblade.n.01', 'synonyms': ['Rollerblade'], 'def': 'an in-line variant of a roller skate', 'name': 'Rollerblade'}, {'frequency': 'c', 'id': 907, 'synset': 'rolling_pin.n.01', 'synonyms': ['rolling_pin'], 'def': 'utensil consisting of a cylinder (usually of wood) with a handle at each end; used to roll out dough', 'name': 'rolling_pin'}, {'frequency': 'r', 'id': 908, 'synset': 'root_beer.n.01', 'synonyms': ['root_beer'], 'def': 'carbonated drink containing extracts of roots and herbs', 'name': 'root_beer'}, {'frequency': 'c', 'id': 909, 'synset': 'router.n.02', 'synonyms': ['router_(computer_equipment)'], 'def': 'a device that forwards data packets between computer networks', 'name': 'router_(computer_equipment)'}, {'frequency': 'f', 'id': 910, 'synset': 'rubber_band.n.01', 'synonyms': ['rubber_band', 'elastic_band'], 'def': 'a narrow band of elastic rubber used to hold things (such as papers) together', 'name': 'rubber_band'}, {'frequency': 'c', 'id': 911, 'synset': 'runner.n.08', 'synonyms': ['runner_(carpet)'], 'def': 'a long narrow carpet', 'name': 'runner_(carpet)'}, {'frequency': 'f', 'id': 912, 'synset': 'sack.n.01', 'synonyms': ['plastic_bag', 'paper_bag'], 'def': "a bag made of paper or plastic for holding customer's purchases", 'name': 'plastic_bag'}, {'frequency': 'f', 'id': 913, 'synset': 'saddle.n.01', 'synonyms': ['saddle_(on_an_animal)'], 'def': 'a seat for the rider of a horse or camel', 'name': 'saddle_(on_an_animal)'}, {'frequency': 'f', 'id': 914, 'synset': 'saddle_blanket.n.01', 'synonyms': ['saddle_blanket', 'saddlecloth', 'horse_blanket'], 'def': 'stable gear consisting of a blanket placed under the saddle', 'name': 'saddle_blanket'}, {'frequency': 'c', 'id': 915, 'synset': 'saddlebag.n.01', 'synonyms': ['saddlebag'], 'def': 'a large bag (or pair of bags) hung over a saddle', 'name': 'saddlebag'}, {'frequency': 'r', 'id': 916, 'synset': 'safety_pin.n.01', 'synonyms': ['safety_pin'], 'def': 'a pin in the form of a clasp; has a guard so the point of the pin will not stick the user', 'name': 'safety_pin'}, {'frequency': 'c', 'id': 917, 'synset': 'sail.n.01', 'synonyms': ['sail'], 'def': 'a large piece of fabric by means of which wind is used to propel a sailing vessel', 'name': 'sail'}, {'frequency': 'c', 'id': 918, 'synset': 'salad.n.01', 'synonyms': ['salad'], 'def': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'name': 'salad'}, {'frequency': 'r', 'id': 919, 'synset': 'salad_plate.n.01', 'synonyms': ['salad_plate', 'salad_bowl'], 'def': 'a plate or bowl for individual servings of salad', 'name': 'salad_plate'}, {'frequency': 'r', 'id': 920, 'synset': 'salami.n.01', 'synonyms': ['salami'], 'def': 'highly seasoned fatty sausage of pork and beef usually dried', 'name': 'salami'}, {'frequency': 'r', 'id': 921, 'synset': 'salmon.n.01', 'synonyms': ['salmon_(fish)'], 'def': 'any of various large food and game fishes of northern waters', 'name': 'salmon_(fish)'}, {'frequency': 'r', 'id': 922, 'synset': 'salmon.n.03', 'synonyms': ['salmon_(food)'], 'def': 'flesh of any of various marine or freshwater fish of the family Salmonidae', 'name': 'salmon_(food)'}, {'frequency': 'r', 'id': 923, 'synset': 'salsa.n.01', 'synonyms': ['salsa'], 'def': 'spicy sauce of tomatoes and onions and chili peppers to accompany Mexican foods', 'name': 'salsa'}, {'frequency': 'f', 'id': 924, 'synset': 'saltshaker.n.01', 'synonyms': ['saltshaker'], 'def': 'a shaker with a perforated top for sprinkling salt', 'name': 'saltshaker'}, {'frequency': 'f', 'id': 925, 'synset': 'sandal.n.01', 'synonyms': ['sandal_(type_of_shoe)'], 'def': 'a shoe consisting of a sole fastened by straps to the foot', 'name': 'sandal_(type_of_shoe)'}, {'frequency': 'f', 'id': 926, 'synset': 'sandwich.n.01', 'synonyms': ['sandwich'], 'def': 'two (or more) slices of bread with a filling between them', 'name': 'sandwich'}, {'frequency': 'r', 'id': 927, 'synset': 'satchel.n.01', 'synonyms': ['satchel'], 'def': 'luggage consisting of a small case with a flat bottom and (usually) a shoulder strap', 'name': 'satchel'}, {'frequency': 'r', 'id': 928, 'synset': 'saucepan.n.01', 'synonyms': ['saucepan'], 'def': 'a deep pan with a handle; used for stewing or boiling', 'name': 'saucepan'}, {'frequency': 'f', 'id': 929, 'synset': 'saucer.n.02', 'synonyms': ['saucer'], 'def': 'a small shallow dish for holding a cup at the table', 'name': 'saucer'}, {'frequency': 'f', 'id': 930, 'synset': 'sausage.n.01', 'synonyms': ['sausage'], 'def': 'highly seasoned minced meat stuffed in casings', 'name': 'sausage'}, {'frequency': 'r', 'id': 931, 'synset': 'sawhorse.n.01', 'synonyms': ['sawhorse', 'sawbuck'], 'def': 'a framework for holding wood that is being sawed', 'name': 'sawhorse'}, {'frequency': 'r', 'id': 932, 'synset': 'sax.n.02', 'synonyms': ['saxophone'], 'def': "a wind instrument with a `J'-shaped form typically made of brass", 'name': 'saxophone'}, {'frequency': 'f', 'id': 933, 'synset': 'scale.n.07', 'synonyms': ['scale_(measuring_instrument)'], 'def': 'a measuring instrument for weighing; shows amount of mass', 'name': 'scale_(measuring_instrument)'}, {'frequency': 'r', 'id': 934, 'synset': 'scarecrow.n.01', 'synonyms': ['scarecrow', 'strawman'], 'def': 'an effigy in the shape of a man to frighten birds away from seeds', 'name': 'scarecrow'}, {'frequency': 'f', 'id': 935, 'synset': 'scarf.n.01', 'synonyms': ['scarf'], 'def': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'name': 'scarf'}, {'frequency': 'c', 'id': 936, 'synset': 'school_bus.n.01', 'synonyms': ['school_bus'], 'def': 'a bus used to transport children to or from school', 'name': 'school_bus'}, {'frequency': 'f', 'id': 937, 'synset': 'scissors.n.01', 'synonyms': ['scissors'], 'def': 'a tool having two crossed pivoting blades with looped handles', 'name': 'scissors'}, {'frequency': 'c', 'id': 938, 'synset': 'scoreboard.n.01', 'synonyms': ['scoreboard'], 'def': 'a large board for displaying the score of a contest (and some other information)', 'name': 'scoreboard'}, {'frequency': 'c', 'id': 939, 'synset': 'scrambled_eggs.n.01', 'synonyms': ['scrambled_eggs'], 'def': 'eggs beaten and cooked to a soft firm consistency while stirring', 'name': 'scrambled_eggs'}, {'frequency': 'r', 'id': 940, 'synset': 'scraper.n.01', 'synonyms': ['scraper'], 'def': 'any of various hand tools for scraping', 'name': 'scraper'}, {'frequency': 'r', 'id': 941, 'synset': 'scratcher.n.03', 'synonyms': ['scratcher'], 'def': 'a device used for scratching', 'name': 'scratcher'}, {'frequency': 'c', 'id': 942, 'synset': 'screwdriver.n.01', 'synonyms': ['screwdriver'], 'def': 'a hand tool for driving screws; has a tip that fits into the head of a screw', 'name': 'screwdriver'}, {'frequency': 'c', 'id': 943, 'synset': 'scrub_brush.n.01', 'synonyms': ['scrubbing_brush'], 'def': 'a brush with short stiff bristles for heavy cleaning', 'name': 'scrubbing_brush'}, {'frequency': 'c', 'id': 944, 'synset': 'sculpture.n.01', 'synonyms': ['sculpture'], 'def': 'a three-dimensional work of art', 'name': 'sculpture'}, {'frequency': 'r', 'id': 945, 'synset': 'seabird.n.01', 'synonyms': ['seabird', 'seafowl'], 'def': 'a bird that frequents coastal waters and the open ocean: gulls; pelicans; gannets; cormorants; albatrosses; petrels; etc.', 'name': 'seabird'}, {'frequency': 'r', 'id': 946, 'synset': 'seahorse.n.02', 'synonyms': ['seahorse'], 'def': 'small fish with horse-like heads bent sharply downward and curled tails', 'name': 'seahorse'}, {'frequency': 'r', 'id': 947, 'synset': 'seaplane.n.01', 'synonyms': ['seaplane', 'hydroplane'], 'def': 'an airplane that can land on or take off from water', 'name': 'seaplane'}, {'frequency': 'c', 'id': 948, 'synset': 'seashell.n.01', 'synonyms': ['seashell'], 'def': 'the shell of a marine organism', 'name': 'seashell'}, {'frequency': 'r', 'id': 949, 'synset': 'seedling.n.01', 'synonyms': ['seedling'], 'def': 'young plant or tree grown from a seed', 'name': 'seedling'}, {'frequency': 'c', 'id': 950, 'synset': 'serving_dish.n.01', 'synonyms': ['serving_dish'], 'def': 'a dish used for serving food', 'name': 'serving_dish'}, {'frequency': 'r', 'id': 951, 'synset': 'sewing_machine.n.01', 'synonyms': ['sewing_machine'], 'def': 'a textile machine used as a home appliance for sewing', 'name': 'sewing_machine'}, {'frequency': 'r', 'id': 952, 'synset': 'shaker.n.03', 'synonyms': ['shaker'], 'def': 'a container in which something can be shaken', 'name': 'shaker'}, {'frequency': 'c', 'id': 953, 'synset': 'shampoo.n.01', 'synonyms': ['shampoo'], 'def': 'cleansing agent consisting of soaps or detergents used for washing the hair', 'name': 'shampoo'}, {'frequency': 'r', 'id': 954, 'synset': 'shark.n.01', 'synonyms': ['shark'], 'def': 'typically large carnivorous fishes with sharpe teeth', 'name': 'shark'}, {'frequency': 'r', 'id': 955, 'synset': 'sharpener.n.01', 'synonyms': ['sharpener'], 'def': 'any implement that is used to make something (an edge or a point) sharper', 'name': 'sharpener'}, {'frequency': 'r', 'id': 956, 'synset': 'sharpie.n.03', 'synonyms': ['Sharpie'], 'def': 'a pen with indelible ink that will write on any surface', 'name': 'Sharpie'}, {'frequency': 'r', 'id': 957, 'synset': 'shaver.n.03', 'synonyms': ['shaver_(electric)', 'electric_shaver', 'electric_razor'], 'def': 'a razor powered by an electric motor', 'name': 'shaver_(electric)'}, {'frequency': 'c', 'id': 958, 'synset': 'shaving_cream.n.01', 'synonyms': ['shaving_cream', 'shaving_soap'], 'def': 'toiletry consisting that forms a rich lather for softening the beard before shaving', 'name': 'shaving_cream'}, {'frequency': 'r', 'id': 959, 'synset': 'shawl.n.01', 'synonyms': ['shawl'], 'def': 'cloak consisting of an oblong piece of cloth used to cover the head and shoulders', 'name': 'shawl'}, {'frequency': 'r', 'id': 960, 'synset': 'shears.n.01', 'synonyms': ['shears'], 'def': 'large scissors with strong blades', 'name': 'shears'}, {'frequency': 'f', 'id': 961, 'synset': 'sheep.n.01', 'synonyms': ['sheep'], 'def': 'woolly usually horned ruminant mammal related to the goat', 'name': 'sheep'}, {'frequency': 'r', 'id': 962, 'synset': 'shepherd_dog.n.01', 'synonyms': ['shepherd_dog', 'sheepdog'], 'def': 'any of various usually long-haired breeds of dog reared to herd and guard sheep', 'name': 'shepherd_dog'}, {'frequency': 'r', 'id': 963, 'synset': 'sherbert.n.01', 'synonyms': ['sherbert', 'sherbet'], 'def': 'a frozen dessert made primarily of fruit juice and sugar', 'name': 'sherbert'}, {'frequency': 'r', 'id': 964, 'synset': 'shield.n.02', 'synonyms': ['shield'], 'def': 'armor carried on the arm to intercept blows', 'name': 'shield'}, {'frequency': 'f', 'id': 965, 'synset': 'shirt.n.01', 'synonyms': ['shirt'], 'def': 'a garment worn on the upper half of the body', 'name': 'shirt'}, {'frequency': 'f', 'id': 966, 'synset': 'shoe.n.01', 'synonyms': ['shoe', 'sneaker_(type_of_shoe)', 'tennis_shoe'], 'def': 'common footwear covering the foot', 'name': 'shoe'}, {'frequency': 'c', 'id': 967, 'synset': 'shopping_bag.n.01', 'synonyms': ['shopping_bag'], 'def': 'a bag made of plastic or strong paper (often with handles); used to transport goods after shopping', 'name': 'shopping_bag'}, {'frequency': 'c', 'id': 968, 'synset': 'shopping_cart.n.01', 'synonyms': ['shopping_cart'], 'def': 'a handcart that holds groceries or other goods while shopping', 'name': 'shopping_cart'}, {'frequency': 'f', 'id': 969, 'synset': 'short_pants.n.01', 'synonyms': ['short_pants', 'shorts_(clothing)', 'trunks_(clothing)'], 'def': 'trousers that end at or above the knee', 'name': 'short_pants'}, {'frequency': 'r', 'id': 970, 'synset': 'shot_glass.n.01', 'synonyms': ['shot_glass'], 'def': 'a small glass adequate to hold a single swallow of whiskey', 'name': 'shot_glass'}, {'frequency': 'c', 'id': 971, 'synset': 'shoulder_bag.n.01', 'synonyms': ['shoulder_bag'], 'def': 'a large handbag that can be carried by a strap looped over the shoulder', 'name': 'shoulder_bag'}, {'frequency': 'c', 'id': 972, 'synset': 'shovel.n.01', 'synonyms': ['shovel'], 'def': 'a hand tool for lifting loose material such as snow, dirt, etc.', 'name': 'shovel'}, {'frequency': 'f', 'id': 973, 'synset': 'shower.n.01', 'synonyms': ['shower_head'], 'def': 'a plumbing fixture that sprays water over you', 'name': 'shower_head'}, {'frequency': 'f', 'id': 974, 'synset': 'shower_curtain.n.01', 'synonyms': ['shower_curtain'], 'def': 'a curtain that keeps water from splashing out of the shower area', 'name': 'shower_curtain'}, {'frequency': 'r', 'id': 975, 'synset': 'shredder.n.01', 'synonyms': ['shredder_(for_paper)'], 'def': 'a device that shreds documents', 'name': 'shredder_(for_paper)'}, {'frequency': 'r', 'id': 976, 'synset': 'sieve.n.01', 'synonyms': ['sieve', 'screen_(sieve)'], 'def': 'a strainer for separating lumps from powdered material or grading particles', 'name': 'sieve'}, {'frequency': 'f', 'id': 977, 'synset': 'signboard.n.01', 'synonyms': ['signboard'], 'def': 'structure displaying a board on which advertisements can be posted', 'name': 'signboard'}, {'frequency': 'c', 'id': 978, 'synset': 'silo.n.01', 'synonyms': ['silo'], 'def': 'a cylindrical tower used for storing goods', 'name': 'silo'}, {'frequency': 'f', 'id': 979, 'synset': 'sink.n.01', 'synonyms': ['sink'], 'def': 'plumbing fixture consisting of a water basin fixed to a wall or floor and having a drainpipe', 'name': 'sink'}, {'frequency': 'f', 'id': 980, 'synset': 'skateboard.n.01', 'synonyms': ['skateboard'], 'def': 'a board with wheels that is ridden in a standing or crouching position and propelled by foot', 'name': 'skateboard'}, {'frequency': 'c', 'id': 981, 'synset': 'skewer.n.01', 'synonyms': ['skewer'], 'def': 'a long pin for holding meat in position while it is being roasted', 'name': 'skewer'}, {'frequency': 'f', 'id': 982, 'synset': 'ski.n.01', 'synonyms': ['ski'], 'def': 'sports equipment for skiing on snow', 'name': 'ski'}, {'frequency': 'f', 'id': 983, 'synset': 'ski_boot.n.01', 'synonyms': ['ski_boot'], 'def': 'a stiff boot that is fastened to a ski with a ski binding', 'name': 'ski_boot'}, {'frequency': 'f', 'id': 984, 'synset': 'ski_parka.n.01', 'synonyms': ['ski_parka', 'ski_jacket'], 'def': 'a parka to be worn while skiing', 'name': 'ski_parka'}, {'frequency': 'f', 'id': 985, 'synset': 'ski_pole.n.01', 'synonyms': ['ski_pole'], 'def': 'a pole with metal points used as an aid in skiing', 'name': 'ski_pole'}, {'frequency': 'f', 'id': 986, 'synset': 'skirt.n.02', 'synonyms': ['skirt'], 'def': 'a garment hanging from the waist; worn mainly by girls and women', 'name': 'skirt'}, {'frequency': 'c', 'id': 987, 'synset': 'sled.n.01', 'synonyms': ['sled', 'sledge', 'sleigh'], 'def': 'a vehicle or flat object for transportation over snow by sliding or pulled by dogs, etc.', 'name': 'sled'}, {'frequency': 'c', 'id': 988, 'synset': 'sleeping_bag.n.01', 'synonyms': ['sleeping_bag'], 'def': 'large padded bag designed to be slept in outdoors', 'name': 'sleeping_bag'}, {'frequency': 'r', 'id': 989, 'synset': 'sling.n.05', 'synonyms': ['sling_(bandage)', 'triangular_bandage'], 'def': 'bandage to support an injured forearm; slung over the shoulder or neck', 'name': 'sling_(bandage)'}, {'frequency': 'c', 'id': 990, 'synset': 'slipper.n.01', 'synonyms': ['slipper_(footwear)', 'carpet_slipper_(footwear)'], 'def': 'low footwear that can be slipped on and off easily; usually worn indoors', 'name': 'slipper_(footwear)'}, {'frequency': 'r', 'id': 991, 'synset': 'smoothie.n.02', 'synonyms': ['smoothie'], 'def': 'a thick smooth drink consisting of fresh fruit pureed with ice cream or yoghurt or milk', 'name': 'smoothie'}, {'frequency': 'r', 'id': 992, 'synset': 'snake.n.01', 'synonyms': ['snake', 'serpent'], 'def': 'limbless scaly elongate reptile; some are venomous', 'name': 'snake'}, {'frequency': 'f', 'id': 993, 'synset': 'snowboard.n.01', 'synonyms': ['snowboard'], 'def': 'a board that resembles a broad ski or a small surfboard; used in a standing position to slide down snow-covered slopes', 'name': 'snowboard'}, {'frequency': 'c', 'id': 994, 'synset': 'snowman.n.01', 'synonyms': ['snowman'], 'def': 'a figure of a person made of packed snow', 'name': 'snowman'}, {'frequency': 'c', 'id': 995, 'synset': 'snowmobile.n.01', 'synonyms': ['snowmobile'], 'def': 'tracked vehicle for travel on snow having skis in front', 'name': 'snowmobile'}, {'frequency': 'f', 'id': 996, 'synset': 'soap.n.01', 'synonyms': ['soap'], 'def': 'a cleansing agent made from the salts of vegetable or animal fats', 'name': 'soap'}, {'frequency': 'f', 'id': 997, 'synset': 'soccer_ball.n.01', 'synonyms': ['soccer_ball'], 'def': "an inflated ball used in playing soccer (called `football' outside of the United States)", 'name': 'soccer_ball'}, {'frequency': 'f', 'id': 998, 'synset': 'sock.n.01', 'synonyms': ['sock'], 'def': 'cloth covering for the foot; worn inside the shoe; reaches to between the ankle and the knee', 'name': 'sock'}, {'frequency': 'r', 'id': 999, 'synset': 'soda_fountain.n.02', 'synonyms': ['soda_fountain'], 'def': 'an apparatus for dispensing soda water', 'name': 'soda_fountain'}, {'frequency': 'r', 'id': 1000, 'synset': 'soda_water.n.01', 'synonyms': ['carbonated_water', 'club_soda', 'seltzer', 'sparkling_water'], 'def': 'effervescent beverage artificially charged with carbon dioxide', 'name': 'carbonated_water'}, {'frequency': 'f', 'id': 1001, 'synset': 'sofa.n.01', 'synonyms': ['sofa', 'couch', 'lounge'], 'def': 'an upholstered seat for more than one person', 'name': 'sofa'}, {'frequency': 'r', 'id': 1002, 'synset': 'softball.n.01', 'synonyms': ['softball'], 'def': 'ball used in playing softball', 'name': 'softball'}, {'frequency': 'c', 'id': 1003, 'synset': 'solar_array.n.01', 'synonyms': ['solar_array', 'solar_battery', 'solar_panel'], 'def': 'electrical device consisting of a large array of connected solar cells', 'name': 'solar_array'}, {'frequency': 'r', 'id': 1004, 'synset': 'sombrero.n.02', 'synonyms': ['sombrero'], 'def': 'a straw hat with a tall crown and broad brim; worn in American southwest and in Mexico', 'name': 'sombrero'}, {'frequency': 'c', 'id': 1005, 'synset': 'soup.n.01', 'synonyms': ['soup'], 'def': 'liquid food especially of meat or fish or vegetable stock often containing pieces of solid food', 'name': 'soup'}, {'frequency': 'r', 'id': 1006, 'synset': 'soup_bowl.n.01', 'synonyms': ['soup_bowl'], 'def': 'a bowl for serving soup', 'name': 'soup_bowl'}, {'frequency': 'c', 'id': 1007, 'synset': 'soupspoon.n.01', 'synonyms': ['soupspoon'], 'def': 'a spoon with a rounded bowl for eating soup', 'name': 'soupspoon'}, {'frequency': 'c', 'id': 1008, 'synset': 'sour_cream.n.01', 'synonyms': ['sour_cream', 'soured_cream'], 'def': 'soured light cream', 'name': 'sour_cream'}, {'frequency': 'r', 'id': 1009, 'synset': 'soya_milk.n.01', 'synonyms': ['soya_milk', 'soybean_milk', 'soymilk'], 'def': 'a milk substitute containing soybean flour and water; used in some infant formulas and in making tofu', 'name': 'soya_milk'}, {'frequency': 'r', 'id': 1010, 'synset': 'space_shuttle.n.01', 'synonyms': ['space_shuttle'], 'def': "a reusable spacecraft with wings for a controlled descent through the Earth's atmosphere", 'name': 'space_shuttle'}, {'frequency': 'r', 'id': 1011, 'synset': 'sparkler.n.02', 'synonyms': ['sparkler_(fireworks)'], 'def': 'a firework that burns slowly and throws out a shower of sparks', 'name': 'sparkler_(fireworks)'}, {'frequency': 'f', 'id': 1012, 'synset': 'spatula.n.02', 'synonyms': ['spatula'], 'def': 'a hand tool with a thin flexible blade used to mix or spread soft substances', 'name': 'spatula'}, {'frequency': 'r', 'id': 1013, 'synset': 'spear.n.01', 'synonyms': ['spear', 'lance'], 'def': 'a long pointed rod used as a tool or weapon', 'name': 'spear'}, {'frequency': 'f', 'id': 1014, 'synset': 'spectacles.n.01', 'synonyms': ['spectacles', 'specs', 'eyeglasses', 'glasses'], 'def': 'optical instrument consisting of a frame that holds a pair of lenses for correcting defective vision', 'name': 'spectacles'}, {'frequency': 'c', 'id': 1015, 'synset': 'spice_rack.n.01', 'synonyms': ['spice_rack'], 'def': 'a rack for displaying containers filled with spices', 'name': 'spice_rack'}, {'frequency': 'r', 'id': 1016, 'synset': 'spider.n.01', 'synonyms': ['spider'], 'def': 'predatory arachnid with eight legs, two poison fangs, two feelers, and usually two silk-spinning organs at the back end of the body', 'name': 'spider'}, {'frequency': 'c', 'id': 1017, 'synset': 'sponge.n.01', 'synonyms': ['sponge'], 'def': 'a porous mass usable to absorb water typically used for cleaning', 'name': 'sponge'}, {'frequency': 'f', 'id': 1018, 'synset': 'spoon.n.01', 'synonyms': ['spoon'], 'def': 'a piece of cutlery with a shallow bowl-shaped container and a handle', 'name': 'spoon'}, {'frequency': 'c', 'id': 1019, 'synset': 'sportswear.n.01', 'synonyms': ['sportswear', 'athletic_wear', 'activewear'], 'def': 'attire worn for sport or for casual wear', 'name': 'sportswear'}, {'frequency': 'c', 'id': 1020, 'synset': 'spotlight.n.02', 'synonyms': ['spotlight'], 'def': 'a lamp that produces a strong beam of light to illuminate a restricted area; used to focus attention of a stage performer', 'name': 'spotlight'}, {'frequency': 'r', 'id': 1021, 'synset': 'squirrel.n.01', 'synonyms': ['squirrel'], 'def': 'a kind of arboreal rodent having a long bushy tail', 'name': 'squirrel'}, {'frequency': 'c', 'id': 1022, 'synset': 'stapler.n.01', 'synonyms': ['stapler_(stapling_machine)'], 'def': 'a machine that inserts staples into sheets of paper in order to fasten them together', 'name': 'stapler_(stapling_machine)'}, {'frequency': 'r', 'id': 1023, 'synset': 'starfish.n.01', 'synonyms': ['starfish', 'sea_star'], 'def': 'echinoderms characterized by five arms extending from a central disk', 'name': 'starfish'}, {'frequency': 'f', 'id': 1024, 'synset': 'statue.n.01', 'synonyms': ['statue_(sculpture)'], 'def': 'a sculpture representing a human or animal', 'name': 'statue_(sculpture)'}, {'frequency': 'c', 'id': 1025, 'synset': 'steak.n.01', 'synonyms': ['steak_(food)'], 'def': 'a slice of meat cut from the fleshy part of an animal or large fish', 'name': 'steak_(food)'}, {'frequency': 'r', 'id': 1026, 'synset': 'steak_knife.n.01', 'synonyms': ['steak_knife'], 'def': 'a sharp table knife used in eating steak', 'name': 'steak_knife'}, {'frequency': 'r', 'id': 1027, 'synset': 'steamer.n.02', 'synonyms': ['steamer_(kitchen_appliance)'], 'def': 'a cooking utensil that can be used to cook food by steaming it', 'name': 'steamer_(kitchen_appliance)'}, {'frequency': 'f', 'id': 1028, 'synset': 'steering_wheel.n.01', 'synonyms': ['steering_wheel'], 'def': 'a handwheel that is used for steering', 'name': 'steering_wheel'}, {'frequency': 'r', 'id': 1029, 'synset': 'stencil.n.01', 'synonyms': ['stencil'], 'def': 'a sheet of material (metal, plastic, etc.) that has been perforated with a pattern; ink or paint can pass through the perforations to create the printed pattern on the surface below', 'name': 'stencil'}, {'frequency': 'r', 'id': 1030, 'synset': 'step_ladder.n.01', 'synonyms': ['stepladder'], 'def': 'a folding portable ladder hinged at the top', 'name': 'stepladder'}, {'frequency': 'c', 'id': 1031, 'synset': 'step_stool.n.01', 'synonyms': ['step_stool'], 'def': 'a stool that has one or two steps that fold under the seat', 'name': 'step_stool'}, {'frequency': 'c', 'id': 1032, 'synset': 'stereo.n.01', 'synonyms': ['stereo_(sound_system)'], 'def': 'electronic device for playing audio', 'name': 'stereo_(sound_system)'}, {'frequency': 'r', 'id': 1033, 'synset': 'stew.n.02', 'synonyms': ['stew'], 'def': 'food prepared by stewing especially meat or fish with vegetables', 'name': 'stew'}, {'frequency': 'r', 'id': 1034, 'synset': 'stirrer.n.02', 'synonyms': ['stirrer'], 'def': 'an implement used for stirring', 'name': 'stirrer'}, {'frequency': 'f', 'id': 1035, 'synset': 'stirrup.n.01', 'synonyms': ['stirrup'], 'def': "support consisting of metal loops into which rider's feet go", 'name': 'stirrup'}, {'frequency': 'c', 'id': 1036, 'synset': 'stocking.n.01', 'synonyms': ['stockings_(leg_wear)'], 'def': 'close-fitting hosiery to cover the foot and leg; come in matched pairs', 'name': 'stockings_(leg_wear)'}, {'frequency': 'f', 'id': 1037, 'synset': 'stool.n.01', 'synonyms': ['stool'], 'def': 'a simple seat without a back or arms', 'name': 'stool'}, {'frequency': 'f', 'id': 1038, 'synset': 'stop_sign.n.01', 'synonyms': ['stop_sign'], 'def': 'a traffic sign to notify drivers that they must come to a complete stop', 'name': 'stop_sign'}, {'frequency': 'f', 'id': 1039, 'synset': 'stoplight.n.01', 'synonyms': ['brake_light'], 'def': 'a red light on the rear of a motor vehicle that signals when the brakes are applied', 'name': 'brake_light'}, {'frequency': 'f', 'id': 1040, 'synset': 'stove.n.01', 'synonyms': ['stove', 'kitchen_stove', 'range_(kitchen_appliance)', 'kitchen_range', 'cooking_stove'], 'def': 'a kitchen appliance used for cooking food', 'name': 'stove'}, {'frequency': 'c', 'id': 1041, 'synset': 'strainer.n.01', 'synonyms': ['strainer'], 'def': 'a filter to retain larger pieces while smaller pieces and liquids pass through', 'name': 'strainer'}, {'frequency': 'f', 'id': 1042, 'synset': 'strap.n.01', 'synonyms': ['strap'], 'def': 'an elongated strip of material for binding things together or holding', 'name': 'strap'}, {'frequency': 'f', 'id': 1043, 'synset': 'straw.n.04', 'synonyms': ['straw_(for_drinking)', 'drinking_straw'], 'def': 'a thin paper or plastic tube used to suck liquids into the mouth', 'name': 'straw_(for_drinking)'}, {'frequency': 'f', 'id': 1044, 'synset': 'strawberry.n.01', 'synonyms': ['strawberry'], 'def': 'sweet fleshy red fruit', 'name': 'strawberry'}, {'frequency': 'f', 'id': 1045, 'synset': 'street_sign.n.01', 'synonyms': ['street_sign'], 'def': 'a sign visible from the street', 'name': 'street_sign'}, {'frequency': 'f', 'id': 1046, 'synset': 'streetlight.n.01', 'synonyms': ['streetlight', 'street_lamp'], 'def': 'a lamp supported on a lamppost; for illuminating a street', 'name': 'streetlight'}, {'frequency': 'r', 'id': 1047, 'synset': 'string_cheese.n.01', 'synonyms': ['string_cheese'], 'def': 'cheese formed in long strings twisted together', 'name': 'string_cheese'}, {'frequency': 'r', 'id': 1048, 'synset': 'stylus.n.02', 'synonyms': ['stylus'], 'def': 'a pointed tool for writing or drawing or engraving', 'name': 'stylus'}, {'frequency': 'r', 'id': 1049, 'synset': 'subwoofer.n.01', 'synonyms': ['subwoofer'], 'def': 'a loudspeaker that is designed to reproduce very low bass frequencies', 'name': 'subwoofer'}, {'frequency': 'r', 'id': 1050, 'synset': 'sugar_bowl.n.01', 'synonyms': ['sugar_bowl'], 'def': 'a dish in which sugar is served', 'name': 'sugar_bowl'}, {'frequency': 'r', 'id': 1051, 'synset': 'sugarcane.n.01', 'synonyms': ['sugarcane_(plant)'], 'def': 'juicy canes whose sap is a source of molasses and commercial sugar; fresh canes are sometimes chewed for the juice', 'name': 'sugarcane_(plant)'}, {'frequency': 'c', 'id': 1052, 'synset': 'suit.n.01', 'synonyms': ['suit_(clothing)'], 'def': 'a set of garments (usually including a jacket and trousers or skirt) for outerwear all of the same fabric and color', 'name': 'suit_(clothing)'}, {'frequency': 'c', 'id': 1053, 'synset': 'sunflower.n.01', 'synonyms': ['sunflower'], 'def': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'name': 'sunflower'}, {'frequency': 'f', 'id': 1054, 'synset': 'sunglasses.n.01', 'synonyms': ['sunglasses'], 'def': 'spectacles that are darkened or polarized to protect the eyes from the glare of the sun', 'name': 'sunglasses'}, {'frequency': 'c', 'id': 1055, 'synset': 'sunhat.n.01', 'synonyms': ['sunhat'], 'def': 'a hat with a broad brim that protects the face from direct exposure to the sun', 'name': 'sunhat'}, {'frequency': 'r', 'id': 1056, 'synset': 'sunscreen.n.01', 'synonyms': ['sunscreen', 'sunblock'], 'def': 'a cream spread on the skin; contains a chemical to filter out ultraviolet light and so protect from sunburn', 'name': 'sunscreen'}, {'frequency': 'f', 'id': 1057, 'synset': 'surfboard.n.01', 'synonyms': ['surfboard'], 'def': 'a narrow buoyant board for riding surf', 'name': 'surfboard'}, {'frequency': 'c', 'id': 1058, 'synset': 'sushi.n.01', 'synonyms': ['sushi'], 'def': 'rice (with raw fish) wrapped in seaweed', 'name': 'sushi'}, {'frequency': 'c', 'id': 1059, 'synset': 'swab.n.02', 'synonyms': ['mop'], 'def': 'cleaning implement consisting of absorbent material fastened to a handle; for cleaning floors', 'name': 'mop'}, {'frequency': 'c', 'id': 1060, 'synset': 'sweat_pants.n.01', 'synonyms': ['sweat_pants'], 'def': 'loose-fitting trousers with elastic cuffs; worn by athletes', 'name': 'sweat_pants'}, {'frequency': 'c', 'id': 1061, 'synset': 'sweatband.n.02', 'synonyms': ['sweatband'], 'def': 'a band of material tied around the forehead or wrist to absorb sweat', 'name': 'sweatband'}, {'frequency': 'f', 'id': 1062, 'synset': 'sweater.n.01', 'synonyms': ['sweater'], 'def': 'a crocheted or knitted garment covering the upper part of the body', 'name': 'sweater'}, {'frequency': 'f', 'id': 1063, 'synset': 'sweatshirt.n.01', 'synonyms': ['sweatshirt'], 'def': 'cotton knit pullover with long sleeves worn during athletic activity', 'name': 'sweatshirt'}, {'frequency': 'c', 'id': 1064, 'synset': 'sweet_potato.n.02', 'synonyms': ['sweet_potato'], 'def': 'the edible tuberous root of the sweet potato vine', 'name': 'sweet_potato'}, {'frequency': 'f', 'id': 1065, 'synset': 'swimsuit.n.01', 'synonyms': ['swimsuit', 'swimwear', 'bathing_suit', 'swimming_costume', 'bathing_costume', 'swimming_trunks', 'bathing_trunks'], 'def': 'garment worn for swimming', 'name': 'swimsuit'}, {'frequency': 'c', 'id': 1066, 'synset': 'sword.n.01', 'synonyms': ['sword'], 'def': 'a cutting or thrusting weapon that has a long metal blade', 'name': 'sword'}, {'frequency': 'r', 'id': 1067, 'synset': 'syringe.n.01', 'synonyms': ['syringe'], 'def': 'a medical instrument used to inject or withdraw fluids', 'name': 'syringe'}, {'frequency': 'r', 'id': 1068, 'synset': 'tabasco.n.02', 'synonyms': ['Tabasco_sauce'], 'def': 'very spicy sauce (trade name Tabasco) made from fully-aged red peppers', 'name': 'Tabasco_sauce'}, {'frequency': 'r', 'id': 1069, 'synset': 'table-tennis_table.n.01', 'synonyms': ['table-tennis_table', 'ping-pong_table'], 'def': 'a table used for playing table tennis', 'name': 'table-tennis_table'}, {'frequency': 'f', 'id': 1070, 'synset': 'table.n.02', 'synonyms': ['table'], 'def': 'a piece of furniture having a smooth flat top that is usually supported by one or more vertical legs', 'name': 'table'}, {'frequency': 'c', 'id': 1071, 'synset': 'table_lamp.n.01', 'synonyms': ['table_lamp'], 'def': 'a lamp that sits on a table', 'name': 'table_lamp'}, {'frequency': 'f', 'id': 1072, 'synset': 'tablecloth.n.01', 'synonyms': ['tablecloth'], 'def': 'a covering spread over a dining table', 'name': 'tablecloth'}, {'frequency': 'r', 'id': 1073, 'synset': 'tachometer.n.01', 'synonyms': ['tachometer'], 'def': 'measuring instrument for indicating speed of rotation', 'name': 'tachometer'}, {'frequency': 'r', 'id': 1074, 'synset': 'taco.n.02', 'synonyms': ['taco'], 'def': 'a small tortilla cupped around a filling', 'name': 'taco'}, {'frequency': 'f', 'id': 1075, 'synset': 'tag.n.02', 'synonyms': ['tag'], 'def': 'a label associated with something for the purpose of identification or information', 'name': 'tag'}, {'frequency': 'f', 'id': 1076, 'synset': 'taillight.n.01', 'synonyms': ['taillight', 'rear_light'], 'def': 'lamp (usually red) mounted at the rear of a motor vehicle', 'name': 'taillight'}, {'frequency': 'r', 'id': 1077, 'synset': 'tambourine.n.01', 'synonyms': ['tambourine'], 'def': 'a shallow drum with a single drumhead and with metallic disks in the sides', 'name': 'tambourine'}, {'frequency': 'r', 'id': 1078, 'synset': 'tank.n.01', 'synonyms': ['army_tank', 'armored_combat_vehicle', 'armoured_combat_vehicle'], 'def': 'an enclosed armored military vehicle; has a cannon and moves on caterpillar treads', 'name': 'army_tank'}, {'frequency': 'c', 'id': 1079, 'synset': 'tank.n.02', 'synonyms': ['tank_(storage_vessel)', 'storage_tank'], 'def': 'a large (usually metallic) vessel for holding gases or liquids', 'name': 'tank_(storage_vessel)'}, {'frequency': 'f', 'id': 1080, 'synset': 'tank_top.n.01', 'synonyms': ['tank_top_(clothing)'], 'def': 'a tight-fitting sleeveless shirt with wide shoulder straps and low neck and no front opening', 'name': 'tank_top_(clothing)'}, {'frequency': 'c', 'id': 1081, 'synset': 'tape.n.01', 'synonyms': ['tape_(sticky_cloth_or_paper)'], 'def': 'a long thin piece of cloth or paper as used for binding or fastening', 'name': 'tape_(sticky_cloth_or_paper)'}, {'frequency': 'c', 'id': 1082, 'synset': 'tape.n.04', 'synonyms': ['tape_measure', 'measuring_tape'], 'def': 'measuring instrument consisting of a narrow strip (cloth or metal) marked in inches or centimeters and used for measuring lengths', 'name': 'tape_measure'}, {'frequency': 'c', 'id': 1083, 'synset': 'tapestry.n.02', 'synonyms': ['tapestry'], 'def': 'a heavy textile with a woven design; used for curtains and upholstery', 'name': 'tapestry'}, {'frequency': 'f', 'id': 1084, 'synset': 'tarpaulin.n.01', 'synonyms': ['tarp'], 'def': 'waterproofed canvas', 'name': 'tarp'}, {'frequency': 'c', 'id': 1085, 'synset': 'tartan.n.01', 'synonyms': ['tartan', 'plaid'], 'def': 'a cloth having a crisscross design', 'name': 'tartan'}, {'frequency': 'c', 'id': 1086, 'synset': 'tassel.n.01', 'synonyms': ['tassel'], 'def': 'adornment consisting of a bunch of cords fastened at one end', 'name': 'tassel'}, {'frequency': 'r', 'id': 1087, 'synset': 'tea_bag.n.01', 'synonyms': ['tea_bag'], 'def': 'a measured amount of tea in a bag for an individual serving of tea', 'name': 'tea_bag'}, {'frequency': 'c', 'id': 1088, 'synset': 'teacup.n.02', 'synonyms': ['teacup'], 'def': 'a cup from which tea is drunk', 'name': 'teacup'}, {'frequency': 'c', 'id': 1089, 'synset': 'teakettle.n.01', 'synonyms': ['teakettle'], 'def': 'kettle for boiling water to make tea', 'name': 'teakettle'}, {'frequency': 'c', 'id': 1090, 'synset': 'teapot.n.01', 'synonyms': ['teapot'], 'def': 'pot for brewing tea; usually has a spout and handle', 'name': 'teapot'}, {'frequency': 'f', 'id': 1091, 'synset': 'teddy.n.01', 'synonyms': ['teddy_bear'], 'def': "plaything consisting of a child's toy bear (usually plush and stuffed with soft materials)", 'name': 'teddy_bear'}, {'frequency': 'f', 'id': 1092, 'synset': 'telephone.n.01', 'synonyms': ['telephone', 'phone', 'telephone_set'], 'def': 'electronic device for communicating by voice over long distances', 'name': 'telephone'}, {'frequency': 'c', 'id': 1093, 'synset': 'telephone_booth.n.01', 'synonyms': ['telephone_booth', 'phone_booth', 'call_box', 'telephone_box', 'telephone_kiosk'], 'def': 'booth for using a telephone', 'name': 'telephone_booth'}, {'frequency': 'f', 'id': 1094, 'synset': 'telephone_pole.n.01', 'synonyms': ['telephone_pole', 'telegraph_pole', 'telegraph_post'], 'def': 'tall pole supporting telephone wires', 'name': 'telephone_pole'}, {'frequency': 'r', 'id': 1095, 'synset': 'telephoto_lens.n.01', 'synonyms': ['telephoto_lens', 'zoom_lens'], 'def': 'a camera lens that magnifies the image', 'name': 'telephoto_lens'}, {'frequency': 'c', 'id': 1096, 'synset': 'television_camera.n.01', 'synonyms': ['television_camera', 'tv_camera'], 'def': 'television equipment for capturing and recording video', 'name': 'television_camera'}, {'frequency': 'f', 'id': 1097, 'synset': 'television_receiver.n.01', 'synonyms': ['television_set', 'tv', 'tv_set'], 'def': 'an electronic device that receives television signals and displays them on a screen', 'name': 'television_set'}, {'frequency': 'f', 'id': 1098, 'synset': 'tennis_ball.n.01', 'synonyms': ['tennis_ball'], 'def': 'ball about the size of a fist used in playing tennis', 'name': 'tennis_ball'}, {'frequency': 'f', 'id': 1099, 'synset': 'tennis_racket.n.01', 'synonyms': ['tennis_racket'], 'def': 'a racket used to play tennis', 'name': 'tennis_racket'}, {'frequency': 'r', 'id': 1100, 'synset': 'tequila.n.01', 'synonyms': ['tequila'], 'def': 'Mexican liquor made from fermented juices of an agave plant', 'name': 'tequila'}, {'frequency': 'c', 'id': 1101, 'synset': 'thermometer.n.01', 'synonyms': ['thermometer'], 'def': 'measuring instrument for measuring temperature', 'name': 'thermometer'}, {'frequency': 'c', 'id': 1102, 'synset': 'thermos.n.01', 'synonyms': ['thermos_bottle'], 'def': 'vacuum flask that preserves temperature of hot or cold drinks', 'name': 'thermos_bottle'}, {'frequency': 'c', 'id': 1103, 'synset': 'thermostat.n.01', 'synonyms': ['thermostat'], 'def': 'a regulator for automatically regulating temperature by starting or stopping the supply of heat', 'name': 'thermostat'}, {'frequency': 'r', 'id': 1104, 'synset': 'thimble.n.02', 'synonyms': ['thimble'], 'def': 'a small metal cap to protect the finger while sewing; can be used as a small container', 'name': 'thimble'}, {'frequency': 'c', 'id': 1105, 'synset': 'thread.n.01', 'synonyms': ['thread', 'yarn'], 'def': 'a fine cord of twisted fibers (of cotton or silk or wool or nylon etc.) used in sewing and weaving', 'name': 'thread'}, {'frequency': 'c', 'id': 1106, 'synset': 'thumbtack.n.01', 'synonyms': ['thumbtack', 'drawing_pin', 'pushpin'], 'def': 'a tack for attaching papers to a bulletin board or drawing board', 'name': 'thumbtack'}, {'frequency': 'c', 'id': 1107, 'synset': 'tiara.n.01', 'synonyms': ['tiara'], 'def': 'a jeweled headdress worn by women on formal occasions', 'name': 'tiara'}, {'frequency': 'c', 'id': 1108, 'synset': 'tiger.n.02', 'synonyms': ['tiger'], 'def': 'large feline of forests in most of Asia having a tawny coat with black stripes', 'name': 'tiger'}, {'frequency': 'c', 'id': 1109, 'synset': 'tights.n.01', 'synonyms': ['tights_(clothing)', 'leotards'], 'def': 'skintight knit hose covering the body from the waist to the feet worn by acrobats and dancers and as stockings by women and girls', 'name': 'tights_(clothing)'}, {'frequency': 'c', 'id': 1110, 'synset': 'timer.n.01', 'synonyms': ['timer', 'stopwatch'], 'def': 'a timepiece that measures a time interval and signals its end', 'name': 'timer'}, {'frequency': 'f', 'id': 1111, 'synset': 'tinfoil.n.01', 'synonyms': ['tinfoil'], 'def': 'foil made of tin or an alloy of tin and lead', 'name': 'tinfoil'}, {'frequency': 'r', 'id': 1112, 'synset': 'tinsel.n.01', 'synonyms': ['tinsel'], 'def': 'a showy decoration that is basically valueless', 'name': 'tinsel'}, {'frequency': 'f', 'id': 1113, 'synset': 'tissue.n.02', 'synonyms': ['tissue_paper'], 'def': 'a soft thin (usually translucent) paper', 'name': 'tissue_paper'}, {'frequency': 'c', 'id': 1114, 'synset': 'toast.n.01', 'synonyms': ['toast_(food)'], 'def': 'slice of bread that has been toasted', 'name': 'toast_(food)'}, {'frequency': 'f', 'id': 1115, 'synset': 'toaster.n.02', 'synonyms': ['toaster'], 'def': 'a kitchen appliance (usually electric) for toasting bread', 'name': 'toaster'}, {'frequency': 'c', 'id': 1116, 'synset': 'toaster_oven.n.01', 'synonyms': ['toaster_oven'], 'def': 'kitchen appliance consisting of a small electric oven for toasting or warming food', 'name': 'toaster_oven'}, {'frequency': 'f', 'id': 1117, 'synset': 'toilet.n.02', 'synonyms': ['toilet'], 'def': 'a plumbing fixture for defecation and urination', 'name': 'toilet'}, {'frequency': 'f', 'id': 1118, 'synset': 'toilet_tissue.n.01', 'synonyms': ['toilet_tissue', 'toilet_paper', 'bathroom_tissue'], 'def': 'a soft thin absorbent paper for use in toilets', 'name': 'toilet_tissue'}, {'frequency': 'f', 'id': 1119, 'synset': 'tomato.n.01', 'synonyms': ['tomato'], 'def': 'mildly acid red or yellow pulpy fruit eaten as a vegetable', 'name': 'tomato'}, {'frequency': 'c', 'id': 1120, 'synset': 'tongs.n.01', 'synonyms': ['tongs'], 'def': 'any of various devices for taking hold of objects; usually have two hinged legs with handles above and pointed hooks below', 'name': 'tongs'}, {'frequency': 'c', 'id': 1121, 'synset': 'toolbox.n.01', 'synonyms': ['toolbox'], 'def': 'a box or chest or cabinet for holding hand tools', 'name': 'toolbox'}, {'frequency': 'f', 'id': 1122, 'synset': 'toothbrush.n.01', 'synonyms': ['toothbrush'], 'def': 'small brush; has long handle; used to clean teeth', 'name': 'toothbrush'}, {'frequency': 'f', 'id': 1123, 'synset': 'toothpaste.n.01', 'synonyms': ['toothpaste'], 'def': 'a dentifrice in the form of a paste', 'name': 'toothpaste'}, {'frequency': 'c', 'id': 1124, 'synset': 'toothpick.n.01', 'synonyms': ['toothpick'], 'def': 'pick consisting of a small strip of wood or plastic; used to pick food from between the teeth', 'name': 'toothpick'}, {'frequency': 'c', 'id': 1125, 'synset': 'top.n.09', 'synonyms': ['cover'], 'def': 'covering for a hole (especially a hole in the top of a container)', 'name': 'cover'}, {'frequency': 'c', 'id': 1126, 'synset': 'tortilla.n.01', 'synonyms': ['tortilla'], 'def': 'thin unleavened pancake made from cornmeal or wheat flour', 'name': 'tortilla'}, {'frequency': 'c', 'id': 1127, 'synset': 'tow_truck.n.01', 'synonyms': ['tow_truck'], 'def': 'a truck equipped to hoist and pull wrecked cars (or to remove cars from no-parking zones)', 'name': 'tow_truck'}, {'frequency': 'f', 'id': 1128, 'synset': 'towel.n.01', 'synonyms': ['towel'], 'def': 'a rectangular piece of absorbent cloth (or paper) for drying or wiping', 'name': 'towel'}, {'frequency': 'f', 'id': 1129, 'synset': 'towel_rack.n.01', 'synonyms': ['towel_rack', 'towel_rail', 'towel_bar'], 'def': 'a rack consisting of one or more bars on which towels can be hung', 'name': 'towel_rack'}, {'frequency': 'f', 'id': 1130, 'synset': 'toy.n.03', 'synonyms': ['toy'], 'def': 'a device regarded as providing amusement', 'name': 'toy'}, {'frequency': 'c', 'id': 1131, 'synset': 'tractor.n.01', 'synonyms': ['tractor_(farm_equipment)'], 'def': 'a wheeled vehicle with large wheels; used in farming and other applications', 'name': 'tractor_(farm_equipment)'}, {'frequency': 'f', 'id': 1132, 'synset': 'traffic_light.n.01', 'synonyms': ['traffic_light'], 'def': 'a device to control vehicle traffic often consisting of three or more lights', 'name': 'traffic_light'}, {'frequency': 'r', 'id': 1133, 'synset': 'trail_bike.n.01', 'synonyms': ['dirt_bike'], 'def': 'a lightweight motorcycle equipped with rugged tires and suspension for off-road use', 'name': 'dirt_bike'}, {'frequency': 'c', 'id': 1134, 'synset': 'trailer_truck.n.01', 'synonyms': ['trailer_truck', 'tractor_trailer', 'trucking_rig', 'articulated_lorry', 'semi_truck'], 'def': 'a truck consisting of a tractor and trailer together', 'name': 'trailer_truck'}, {'frequency': 'f', 'id': 1135, 'synset': 'train.n.01', 'synonyms': ['train_(railroad_vehicle)', 'railroad_train'], 'def': 'public or private transport provided by a line of railway cars coupled together and drawn by a locomotive', 'name': 'train_(railroad_vehicle)'}, {'frequency': 'r', 'id': 1136, 'synset': 'trampoline.n.01', 'synonyms': ['trampoline'], 'def': 'gymnastic apparatus consisting of a strong canvas sheet attached with springs to a metal frame', 'name': 'trampoline'}, {'frequency': 'f', 'id': 1137, 'synset': 'tray.n.01', 'synonyms': ['tray'], 'def': 'an open receptacle for holding or displaying or serving articles or food', 'name': 'tray'}, {'frequency': 'r', 'id': 1138, 'synset': 'tree_house.n.01', 'synonyms': ['tree_house'], 'def': '(NOT A TREE) a PLAYHOUSE built in the branches of a tree', 'name': 'tree_house'}, {'frequency': 'r', 'id': 1139, 'synset': 'trench_coat.n.01', 'synonyms': ['trench_coat'], 'def': 'a military style raincoat; belted with deep pockets', 'name': 'trench_coat'}, {'frequency': 'r', 'id': 1140, 'synset': 'triangle.n.05', 'synonyms': ['triangle_(musical_instrument)'], 'def': 'a percussion instrument consisting of a metal bar bent in the shape of an open triangle', 'name': 'triangle_(musical_instrument)'}, {'frequency': 'r', 'id': 1141, 'synset': 'tricycle.n.01', 'synonyms': ['tricycle'], 'def': 'a vehicle with three wheels that is moved by foot pedals', 'name': 'tricycle'}, {'frequency': 'c', 'id': 1142, 'synset': 'tripod.n.01', 'synonyms': ['tripod'], 'def': 'a three-legged rack used for support', 'name': 'tripod'}, {'frequency': 'f', 'id': 1143, 'synset': 'trouser.n.01', 'synonyms': ['trousers', 'pants_(clothing)'], 'def': 'a garment extending from the waist to the knee or ankle, covering each leg separately', 'name': 'trousers'}, {'frequency': 'f', 'id': 1144, 'synset': 'truck.n.01', 'synonyms': ['truck'], 'def': 'an automotive vehicle suitable for hauling', 'name': 'truck'}, {'frequency': 'r', 'id': 1145, 'synset': 'truffle.n.03', 'synonyms': ['truffle_(chocolate)', 'chocolate_truffle'], 'def': 'creamy chocolate candy', 'name': 'truffle_(chocolate)'}, {'frequency': 'c', 'id': 1146, 'synset': 'trunk.n.02', 'synonyms': ['trunk'], 'def': 'luggage consisting of a large strong case used when traveling or for storage', 'name': 'trunk'}, {'frequency': 'r', 'id': 1147, 'synset': 'tub.n.02', 'synonyms': ['vat'], 'def': 'a large open vessel for holding or storing liquids', 'name': 'vat'}, {'frequency': 'c', 'id': 1148, 'synset': 'turban.n.01', 'synonyms': ['turban'], 'def': 'a traditional headdress consisting of a long scarf wrapped around the head', 'name': 'turban'}, {'frequency': 'r', 'id': 1149, 'synset': 'turkey.n.01', 'synonyms': ['turkey_(bird)'], 'def': 'large gallinaceous bird with fan-shaped tail; widely domesticated for food', 'name': 'turkey_(bird)'}, {'frequency': 'c', 'id': 1150, 'synset': 'turkey.n.04', 'synonyms': ['turkey_(food)'], 'def': 'flesh of large domesticated fowl usually roasted', 'name': 'turkey_(food)'}, {'frequency': 'r', 'id': 1151, 'synset': 'turnip.n.01', 'synonyms': ['turnip'], 'def': 'widely cultivated plant having a large fleshy edible white or yellow root', 'name': 'turnip'}, {'frequency': 'c', 'id': 1152, 'synset': 'turtle.n.02', 'synonyms': ['turtle'], 'def': 'any of various aquatic and land reptiles having a bony shell and flipper-like limbs for swimming', 'name': 'turtle'}, {'frequency': 'r', 'id': 1153, 'synset': 'turtleneck.n.01', 'synonyms': ['turtleneck_(clothing)', 'polo-neck'], 'def': 'a sweater or jersey with a high close-fitting collar', 'name': 'turtleneck_(clothing)'}, {'frequency': 'r', 'id': 1154, 'synset': 'typewriter.n.01', 'synonyms': ['typewriter'], 'def': 'hand-operated character printer for printing written messages one character at a time', 'name': 'typewriter'}, {'frequency': 'f', 'id': 1155, 'synset': 'umbrella.n.01', 'synonyms': ['umbrella'], 'def': 'a lightweight handheld collapsible canopy', 'name': 'umbrella'}, {'frequency': 'c', 'id': 1156, 'synset': 'underwear.n.01', 'synonyms': ['underwear', 'underclothes', 'underclothing', 'underpants'], 'def': 'undergarment worn next to the skin and under the outer garments', 'name': 'underwear'}, {'frequency': 'r', 'id': 1157, 'synset': 'unicycle.n.01', 'synonyms': ['unicycle'], 'def': 'a vehicle with a single wheel that is driven by pedals', 'name': 'unicycle'}, {'frequency': 'c', 'id': 1158, 'synset': 'urinal.n.01', 'synonyms': ['urinal'], 'def': 'a plumbing fixture (usually attached to the wall) used by men to urinate', 'name': 'urinal'}, {'frequency': 'r', 'id': 1159, 'synset': 'urn.n.01', 'synonyms': ['urn'], 'def': 'a large vase that usually has a pedestal or feet', 'name': 'urn'}, {'frequency': 'c', 'id': 1160, 'synset': 'vacuum.n.04', 'synonyms': ['vacuum_cleaner'], 'def': 'an electrical home appliance that cleans by suction', 'name': 'vacuum_cleaner'}, {'frequency': 'c', 'id': 1161, 'synset': 'valve.n.03', 'synonyms': ['valve'], 'def': 'control consisting of a mechanical device for controlling the flow of a fluid', 'name': 'valve'}, {'frequency': 'f', 'id': 1162, 'synset': 'vase.n.01', 'synonyms': ['vase'], 'def': 'an open jar of glass or porcelain used as an ornament or to hold flowers', 'name': 'vase'}, {'frequency': 'c', 'id': 1163, 'synset': 'vending_machine.n.01', 'synonyms': ['vending_machine'], 'def': 'a slot machine for selling goods', 'name': 'vending_machine'}, {'frequency': 'f', 'id': 1164, 'synset': 'vent.n.01', 'synonyms': ['vent', 'blowhole', 'air_vent'], 'def': 'a hole for the escape of gas or air', 'name': 'vent'}, {'frequency': 'c', 'id': 1165, 'synset': 'videotape.n.01', 'synonyms': ['videotape'], 'def': 'a video recording made on magnetic tape', 'name': 'videotape'}, {'frequency': 'r', 'id': 1166, 'synset': 'vinegar.n.01', 'synonyms': ['vinegar'], 'def': 'sour-tasting liquid produced usually by oxidation of the alcohol in wine or cider and used as a condiment or food preservative', 'name': 'vinegar'}, {'frequency': 'r', 'id': 1167, 'synset': 'violin.n.01', 'synonyms': ['violin', 'fiddle'], 'def': 'bowed stringed instrument that is the highest member of the violin family', 'name': 'violin'}, {'frequency': 'r', 'id': 1168, 'synset': 'vodka.n.01', 'synonyms': ['vodka'], 'def': 'unaged colorless liquor originating in Russia', 'name': 'vodka'}, {'frequency': 'r', 'id': 1169, 'synset': 'volleyball.n.02', 'synonyms': ['volleyball'], 'def': 'an inflated ball used in playing volleyball', 'name': 'volleyball'}, {'frequency': 'r', 'id': 1170, 'synset': 'vulture.n.01', 'synonyms': ['vulture'], 'def': 'any of various large birds of prey having naked heads and weak claws and feeding chiefly on carrion', 'name': 'vulture'}, {'frequency': 'c', 'id': 1171, 'synset': 'waffle.n.01', 'synonyms': ['waffle'], 'def': 'pancake batter baked in a waffle iron', 'name': 'waffle'}, {'frequency': 'r', 'id': 1172, 'synset': 'waffle_iron.n.01', 'synonyms': ['waffle_iron'], 'def': 'a kitchen appliance for baking waffles', 'name': 'waffle_iron'}, {'frequency': 'c', 'id': 1173, 'synset': 'wagon.n.01', 'synonyms': ['wagon'], 'def': 'any of various kinds of wheeled vehicles drawn by an animal or a tractor', 'name': 'wagon'}, {'frequency': 'c', 'id': 1174, 'synset': 'wagon_wheel.n.01', 'synonyms': ['wagon_wheel'], 'def': 'a wheel of a wagon', 'name': 'wagon_wheel'}, {'frequency': 'c', 'id': 1175, 'synset': 'walking_stick.n.01', 'synonyms': ['walking_stick'], 'def': 'a stick carried in the hand for support in walking', 'name': 'walking_stick'}, {'frequency': 'c', 'id': 1176, 'synset': 'wall_clock.n.01', 'synonyms': ['wall_clock'], 'def': 'a clock mounted on a wall', 'name': 'wall_clock'}, {'frequency': 'f', 'id': 1177, 'synset': 'wall_socket.n.01', 'synonyms': ['wall_socket', 'wall_plug', 'electric_outlet', 'electrical_outlet', 'outlet', 'electric_receptacle'], 'def': 'receptacle providing a place in a wiring system where current can be taken to run electrical devices', 'name': 'wall_socket'}, {'frequency': 'c', 'id': 1178, 'synset': 'wallet.n.01', 'synonyms': ['wallet', 'billfold'], 'def': 'a pocket-size case for holding papers and paper money', 'name': 'wallet'}, {'frequency': 'r', 'id': 1179, 'synset': 'walrus.n.01', 'synonyms': ['walrus'], 'def': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'name': 'walrus'}, {'frequency': 'r', 'id': 1180, 'synset': 'wardrobe.n.01', 'synonyms': ['wardrobe'], 'def': 'a tall piece of furniture that provides storage space for clothes; has a door and rails or hooks for hanging clothes', 'name': 'wardrobe'}, {'frequency': 'r', 'id': 1181, 'synset': 'wasabi.n.02', 'synonyms': ['wasabi'], 'def': 'the thick green root of the wasabi plant that the Japanese use in cooking and that tastes like strong horseradish', 'name': 'wasabi'}, {'frequency': 'c', 'id': 1182, 'synset': 'washer.n.03', 'synonyms': ['automatic_washer', 'washing_machine'], 'def': 'a home appliance for washing clothes and linens automatically', 'name': 'automatic_washer'}, {'frequency': 'f', 'id': 1183, 'synset': 'watch.n.01', 'synonyms': ['watch', 'wristwatch'], 'def': 'a small, portable timepiece', 'name': 'watch'}, {'frequency': 'f', 'id': 1184, 'synset': 'water_bottle.n.01', 'synonyms': ['water_bottle'], 'def': 'a bottle for holding water', 'name': 'water_bottle'}, {'frequency': 'c', 'id': 1185, 'synset': 'water_cooler.n.01', 'synonyms': ['water_cooler'], 'def': 'a device for cooling and dispensing drinking water', 'name': 'water_cooler'}, {'frequency': 'c', 'id': 1186, 'synset': 'water_faucet.n.01', 'synonyms': ['water_faucet', 'water_tap', 'tap_(water_faucet)'], 'def': 'a faucet for drawing water from a pipe or cask', 'name': 'water_faucet'}, {'frequency': 'r', 'id': 1187, 'synset': 'water_filter.n.01', 'synonyms': ['water_filter'], 'def': 'a filter to remove impurities from the water supply', 'name': 'water_filter'}, {'frequency': 'r', 'id': 1188, 'synset': 'water_heater.n.01', 'synonyms': ['water_heater', 'hot-water_heater'], 'def': 'a heater and storage tank to supply heated water', 'name': 'water_heater'}, {'frequency': 'r', 'id': 1189, 'synset': 'water_jug.n.01', 'synonyms': ['water_jug'], 'def': 'a jug that holds water', 'name': 'water_jug'}, {'frequency': 'r', 'id': 1190, 'synset': 'water_pistol.n.01', 'synonyms': ['water_gun', 'squirt_gun'], 'def': 'plaything consisting of a toy pistol that squirts water', 'name': 'water_gun'}, {'frequency': 'c', 'id': 1191, 'synset': 'water_scooter.n.01', 'synonyms': ['water_scooter', 'sea_scooter', 'jet_ski'], 'def': 'a motorboat resembling a motor scooter (NOT A SURFBOARD OR WATER SKI)', 'name': 'water_scooter'}, {'frequency': 'c', 'id': 1192, 'synset': 'water_ski.n.01', 'synonyms': ['water_ski'], 'def': 'broad ski for skimming over water towed by a speedboat (DO NOT MARK WATER)', 'name': 'water_ski'}, {'frequency': 'c', 'id': 1193, 'synset': 'water_tower.n.01', 'synonyms': ['water_tower'], 'def': 'a large reservoir for water', 'name': 'water_tower'}, {'frequency': 'c', 'id': 1194, 'synset': 'watering_can.n.01', 'synonyms': ['watering_can'], 'def': 'a container with a handle and a spout with a perforated nozzle; used to sprinkle water over plants', 'name': 'watering_can'}, {'frequency': 'c', 'id': 1195, 'synset': 'watermelon.n.02', 'synonyms': ['watermelon'], 'def': 'large oblong or roundish melon with a hard green rind and sweet watery red or occasionally yellowish pulp', 'name': 'watermelon'}, {'frequency': 'f', 'id': 1196, 'synset': 'weathervane.n.01', 'synonyms': ['weathervane', 'vane_(weathervane)', 'wind_vane'], 'def': 'mechanical device attached to an elevated structure; rotates freely to show the direction of the wind', 'name': 'weathervane'}, {'frequency': 'c', 'id': 1197, 'synset': 'webcam.n.01', 'synonyms': ['webcam'], 'def': 'a digital camera designed to take digital photographs and transmit them over the internet', 'name': 'webcam'}, {'frequency': 'c', 'id': 1198, 'synset': 'wedding_cake.n.01', 'synonyms': ['wedding_cake', 'bridecake'], 'def': 'a rich cake with two or more tiers and covered with frosting and decorations; served at a wedding reception', 'name': 'wedding_cake'}, {'frequency': 'c', 'id': 1199, 'synset': 'wedding_ring.n.01', 'synonyms': ['wedding_ring', 'wedding_band'], 'def': 'a ring given to the bride and/or groom at the wedding', 'name': 'wedding_ring'}, {'frequency': 'f', 'id': 1200, 'synset': 'wet_suit.n.01', 'synonyms': ['wet_suit'], 'def': 'a close-fitting garment made of a permeable material; worn in cold water to retain body heat', 'name': 'wet_suit'}, {'frequency': 'f', 'id': 1201, 'synset': 'wheel.n.01', 'synonyms': ['wheel'], 'def': 'a circular frame with spokes (or a solid disc) that can rotate on a shaft or axle', 'name': 'wheel'}, {'frequency': 'c', 'id': 1202, 'synset': 'wheelchair.n.01', 'synonyms': ['wheelchair'], 'def': 'a movable chair mounted on large wheels', 'name': 'wheelchair'}, {'frequency': 'c', 'id': 1203, 'synset': 'whipped_cream.n.01', 'synonyms': ['whipped_cream'], 'def': 'cream that has been beaten until light and fluffy', 'name': 'whipped_cream'}, {'frequency': 'r', 'id': 1204, 'synset': 'whiskey.n.01', 'synonyms': ['whiskey'], 'def': 'a liquor made from fermented mash of grain', 'name': 'whiskey'}, {'frequency': 'r', 'id': 1205, 'synset': 'whistle.n.03', 'synonyms': ['whistle'], 'def': 'a small wind instrument that produces a whistling sound by blowing into it', 'name': 'whistle'}, {'frequency': 'r', 'id': 1206, 'synset': 'wick.n.02', 'synonyms': ['wick'], 'def': 'a loosely woven cord in a candle or oil lamp that is lit on fire', 'name': 'wick'}, {'frequency': 'c', 'id': 1207, 'synset': 'wig.n.01', 'synonyms': ['wig'], 'def': 'hairpiece covering the head and made of real or synthetic hair', 'name': 'wig'}, {'frequency': 'c', 'id': 1208, 'synset': 'wind_chime.n.01', 'synonyms': ['wind_chime'], 'def': 'a decorative arrangement of pieces of metal or glass or pottery that hang together loosely so the wind can cause them to tinkle', 'name': 'wind_chime'}, {'frequency': 'c', 'id': 1209, 'synset': 'windmill.n.01', 'synonyms': ['windmill'], 'def': 'a mill that is powered by the wind', 'name': 'windmill'}, {'frequency': 'c', 'id': 1210, 'synset': 'window_box.n.01', 'synonyms': ['window_box_(for_plants)'], 'def': 'a container for growing plants on a windowsill', 'name': 'window_box_(for_plants)'}, {'frequency': 'f', 'id': 1211, 'synset': 'windshield_wiper.n.01', 'synonyms': ['windshield_wiper', 'windscreen_wiper', 'wiper_(for_windshield/screen)'], 'def': 'a mechanical device that cleans the windshield', 'name': 'windshield_wiper'}, {'frequency': 'c', 'id': 1212, 'synset': 'windsock.n.01', 'synonyms': ['windsock', 'air_sock', 'air-sleeve', 'wind_sleeve', 'wind_cone'], 'def': 'a truncated cloth cone mounted on a mast/pole; shows wind direction', 'name': 'windsock'}, {'frequency': 'f', 'id': 1213, 'synset': 'wine_bottle.n.01', 'synonyms': ['wine_bottle'], 'def': 'a bottle for holding wine', 'name': 'wine_bottle'}, {'frequency': 'r', 'id': 1214, 'synset': 'wine_bucket.n.01', 'synonyms': ['wine_bucket', 'wine_cooler'], 'def': 'a bucket of ice used to chill a bottle of wine', 'name': 'wine_bucket'}, {'frequency': 'f', 'id': 1215, 'synset': 'wineglass.n.01', 'synonyms': ['wineglass'], 'def': 'a glass that has a stem and in which wine is served', 'name': 'wineglass'}, {'frequency': 'r', 'id': 1216, 'synset': 'wing_chair.n.01', 'synonyms': ['wing_chair'], 'def': 'easy chair having wings on each side of a high back', 'name': 'wing_chair'}, {'frequency': 'c', 'id': 1217, 'synset': 'winker.n.02', 'synonyms': ['blinder_(for_horses)'], 'def': 'blinds that prevent a horse from seeing something on either side', 'name': 'blinder_(for_horses)'}, {'frequency': 'c', 'id': 1218, 'synset': 'wok.n.01', 'synonyms': ['wok'], 'def': 'pan with a convex bottom; used for frying in Chinese cooking', 'name': 'wok'}, {'frequency': 'r', 'id': 1219, 'synset': 'wolf.n.01', 'synonyms': ['wolf'], 'def': 'a wild carnivorous mammal of the dog family, living and hunting in packs', 'name': 'wolf'}, {'frequency': 'c', 'id': 1220, 'synset': 'wooden_spoon.n.02', 'synonyms': ['wooden_spoon'], 'def': 'a spoon made of wood', 'name': 'wooden_spoon'}, {'frequency': 'c', 'id': 1221, 'synset': 'wreath.n.01', 'synonyms': ['wreath'], 'def': 'an arrangement of flowers, leaves, or stems fastened in a ring', 'name': 'wreath'}, {'frequency': 'c', 'id': 1222, 'synset': 'wrench.n.03', 'synonyms': ['wrench', 'spanner'], 'def': 'a hand tool that is used to hold or twist a nut or bolt', 'name': 'wrench'}, {'frequency': 'c', 'id': 1223, 'synset': 'wristband.n.01', 'synonyms': ['wristband'], 'def': 'band consisting of a part of a sleeve that covers the wrist', 'name': 'wristband'}, {'frequency': 'f', 'id': 1224, 'synset': 'wristlet.n.01', 'synonyms': ['wristlet', 'wrist_band'], 'def': 'a band or bracelet worn around the wrist', 'name': 'wristlet'}, {'frequency': 'r', 'id': 1225, 'synset': 'yacht.n.01', 'synonyms': ['yacht'], 'def': 'an expensive vessel propelled by sail or power and used for cruising or racing', 'name': 'yacht'}, {'frequency': 'r', 'id': 1226, 'synset': 'yak.n.02', 'synonyms': ['yak'], 'def': 'large long-haired wild ox of Tibet often domesticated', 'name': 'yak'}, {'frequency': 'c', 'id': 1227, 'synset': 'yogurt.n.01', 'synonyms': ['yogurt', 'yoghurt', 'yoghourt'], 'def': 'a custard-like food made from curdled milk', 'name': 'yogurt'}, {'frequency': 'r', 'id': 1228, 'synset': 'yoke.n.07', 'synonyms': ['yoke_(animal_equipment)'], 'def': 'gear joining two animals at the neck; NOT egg yolk', 'name': 'yoke_(animal_equipment)'}, {'frequency': 'f', 'id': 1229, 'synset': 'zebra.n.01', 'synonyms': ['zebra'], 'def': 'any of several fleet black-and-white striped African equines', 'name': 'zebra'}, {'frequency': 'c', 'id': 1230, 'synset': 'zucchini.n.02', 'synonyms': ['zucchini', 'courgette'], 'def': 'small cucumber-shaped vegetable marrow; typically dark green', 'name': 'zucchini'}] # noqa +# fmt: on diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/lvis_v1_categories.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/lvis_v1_categories.py new file mode 100644 index 0000000000000000000000000000000000000000..c24b0c9d2a84dbb5af781d82c57fd0fd4bf4fde0 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/lvis_v1_categories.py @@ -0,0 +1,29 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Autogen with +# with open("lvis_v1_val.json", "r") as f: +# a = json.load(f) +# c = a["categories"] +# for x in c: +# del x["image_count"] +# del x["instance_count"] +# LVIS_CATEGORIES = repr(c) + " # noqa" +# with open("/tmp/lvis_categories.py", "wt") as f: +# f.write(f"LVIS_CATEGORIES = {LVIS_CATEGORIES}") +# Then paste the contents of that file below + +# fmt: off +LVIS_CATEGORIES = [{'frequency': 'c', 'synset': 'aerosol.n.02', 'synonyms': ['aerosol_can', 'spray_can'], 'id': 1, 'def': 'a dispenser that holds a substance under pressure', 'name': 'aerosol_can'}, {'frequency': 'f', 'synset': 'air_conditioner.n.01', 'synonyms': ['air_conditioner'], 'id': 2, 'def': 'a machine that keeps air cool and dry', 'name': 'air_conditioner'}, {'frequency': 'f', 'synset': 'airplane.n.01', 'synonyms': ['airplane', 'aeroplane'], 'id': 3, 'def': 'an aircraft that has a fixed wing and is powered by propellers or jets', 'name': 'airplane'}, {'frequency': 'f', 'synset': 'alarm_clock.n.01', 'synonyms': ['alarm_clock'], 'id': 4, 'def': 'a clock that wakes a sleeper at some preset time', 'name': 'alarm_clock'}, {'frequency': 'c', 'synset': 'alcohol.n.01', 'synonyms': ['alcohol', 'alcoholic_beverage'], 'id': 5, 'def': 'a liquor or brew containing alcohol as the active agent', 'name': 'alcohol'}, {'frequency': 'c', 'synset': 'alligator.n.02', 'synonyms': ['alligator', 'gator'], 'id': 6, 'def': 'amphibious reptiles related to crocodiles but with shorter broader snouts', 'name': 'alligator'}, {'frequency': 'c', 'synset': 'almond.n.02', 'synonyms': ['almond'], 'id': 7, 'def': 'oval-shaped edible seed of the almond tree', 'name': 'almond'}, {'frequency': 'c', 'synset': 'ambulance.n.01', 'synonyms': ['ambulance'], 'id': 8, 'def': 'a vehicle that takes people to and from hospitals', 'name': 'ambulance'}, {'frequency': 'c', 'synset': 'amplifier.n.01', 'synonyms': ['amplifier'], 'id': 9, 'def': 'electronic equipment that increases strength of signals', 'name': 'amplifier'}, {'frequency': 'c', 'synset': 'anklet.n.03', 'synonyms': ['anklet', 'ankle_bracelet'], 'id': 10, 'def': 'an ornament worn around the ankle', 'name': 'anklet'}, {'frequency': 'f', 'synset': 'antenna.n.01', 'synonyms': ['antenna', 'aerial', 'transmitting_aerial'], 'id': 11, 'def': 'an electrical device that sends or receives radio or television signals', 'name': 'antenna'}, {'frequency': 'f', 'synset': 'apple.n.01', 'synonyms': ['apple'], 'id': 12, 'def': 'fruit with red or yellow or green skin and sweet to tart crisp whitish flesh', 'name': 'apple'}, {'frequency': 'r', 'synset': 'applesauce.n.01', 'synonyms': ['applesauce'], 'id': 13, 'def': 'puree of stewed apples usually sweetened and spiced', 'name': 'applesauce'}, {'frequency': 'r', 'synset': 'apricot.n.02', 'synonyms': ['apricot'], 'id': 14, 'def': 'downy yellow to rosy-colored fruit resembling a small peach', 'name': 'apricot'}, {'frequency': 'f', 'synset': 'apron.n.01', 'synonyms': ['apron'], 'id': 15, 'def': 'a garment of cloth that is tied about the waist and worn to protect clothing', 'name': 'apron'}, {'frequency': 'c', 'synset': 'aquarium.n.01', 'synonyms': ['aquarium', 'fish_tank'], 'id': 16, 'def': 'a tank/pool/bowl filled with water for keeping live fish and underwater animals', 'name': 'aquarium'}, {'frequency': 'r', 'synset': 'arctic.n.02', 'synonyms': ['arctic_(type_of_shoe)', 'galosh', 'golosh', 'rubber_(type_of_shoe)', 'gumshoe'], 'id': 17, 'def': 'a waterproof overshoe that protects shoes from water or snow', 'name': 'arctic_(type_of_shoe)'}, {'frequency': 'c', 'synset': 'armband.n.02', 'synonyms': ['armband'], 'id': 18, 'def': 'a band worn around the upper arm', 'name': 'armband'}, {'frequency': 'f', 'synset': 'armchair.n.01', 'synonyms': ['armchair'], 'id': 19, 'def': 'chair with a support on each side for arms', 'name': 'armchair'}, {'frequency': 'r', 'synset': 'armoire.n.01', 'synonyms': ['armoire'], 'id': 20, 'def': 'a large wardrobe or cabinet', 'name': 'armoire'}, {'frequency': 'r', 'synset': 'armor.n.01', 'synonyms': ['armor', 'armour'], 'id': 21, 'def': 'protective covering made of metal and used in combat', 'name': 'armor'}, {'frequency': 'c', 'synset': 'artichoke.n.02', 'synonyms': ['artichoke'], 'id': 22, 'def': 'a thistlelike flower head with edible fleshy leaves and heart', 'name': 'artichoke'}, {'frequency': 'f', 'synset': 'ashcan.n.01', 'synonyms': ['trash_can', 'garbage_can', 'wastebin', 'dustbin', 'trash_barrel', 'trash_bin'], 'id': 23, 'def': 'a bin that holds rubbish until it is collected', 'name': 'trash_can'}, {'frequency': 'c', 'synset': 'ashtray.n.01', 'synonyms': ['ashtray'], 'id': 24, 'def': "a receptacle for the ash from smokers' cigars or cigarettes", 'name': 'ashtray'}, {'frequency': 'c', 'synset': 'asparagus.n.02', 'synonyms': ['asparagus'], 'id': 25, 'def': 'edible young shoots of the asparagus plant', 'name': 'asparagus'}, {'frequency': 'c', 'synset': 'atomizer.n.01', 'synonyms': ['atomizer', 'atomiser', 'spray', 'sprayer', 'nebulizer', 'nebuliser'], 'id': 26, 'def': 'a dispenser that turns a liquid (such as perfume) into a fine mist', 'name': 'atomizer'}, {'frequency': 'f', 'synset': 'avocado.n.01', 'synonyms': ['avocado'], 'id': 27, 'def': 'a pear-shaped fruit with green or blackish skin and rich yellowish pulp enclosing a single large seed', 'name': 'avocado'}, {'frequency': 'c', 'synset': 'award.n.02', 'synonyms': ['award', 'accolade'], 'id': 28, 'def': 'a tangible symbol signifying approval or distinction', 'name': 'award'}, {'frequency': 'f', 'synset': 'awning.n.01', 'synonyms': ['awning'], 'id': 29, 'def': 'a canopy made of canvas to shelter people or things from rain or sun', 'name': 'awning'}, {'frequency': 'r', 'synset': 'ax.n.01', 'synonyms': ['ax', 'axe'], 'id': 30, 'def': 'an edge tool with a heavy bladed head mounted across a handle', 'name': 'ax'}, {'frequency': 'r', 'synset': 'baboon.n.01', 'synonyms': ['baboon'], 'id': 31, 'def': 'large terrestrial monkeys having doglike muzzles', 'name': 'baboon'}, {'frequency': 'f', 'synset': 'baby_buggy.n.01', 'synonyms': ['baby_buggy', 'baby_carriage', 'perambulator', 'pram', 'stroller'], 'id': 32, 'def': 'a small vehicle with four wheels in which a baby or child is pushed around', 'name': 'baby_buggy'}, {'frequency': 'c', 'synset': 'backboard.n.01', 'synonyms': ['basketball_backboard'], 'id': 33, 'def': 'a raised vertical board with basket attached; used to play basketball', 'name': 'basketball_backboard'}, {'frequency': 'f', 'synset': 'backpack.n.01', 'synonyms': ['backpack', 'knapsack', 'packsack', 'rucksack', 'haversack'], 'id': 34, 'def': 'a bag carried by a strap on your back or shoulder', 'name': 'backpack'}, {'frequency': 'f', 'synset': 'bag.n.04', 'synonyms': ['handbag', 'purse', 'pocketbook'], 'id': 35, 'def': 'a container used for carrying money and small personal items or accessories', 'name': 'handbag'}, {'frequency': 'f', 'synset': 'bag.n.06', 'synonyms': ['suitcase', 'baggage', 'luggage'], 'id': 36, 'def': 'cases used to carry belongings when traveling', 'name': 'suitcase'}, {'frequency': 'c', 'synset': 'bagel.n.01', 'synonyms': ['bagel', 'beigel'], 'id': 37, 'def': 'glazed yeast-raised doughnut-shaped roll with hard crust', 'name': 'bagel'}, {'frequency': 'r', 'synset': 'bagpipe.n.01', 'synonyms': ['bagpipe'], 'id': 38, 'def': 'a tubular wind instrument; the player blows air into a bag and squeezes it out', 'name': 'bagpipe'}, {'frequency': 'r', 'synset': 'baguet.n.01', 'synonyms': ['baguet', 'baguette'], 'id': 39, 'def': 'narrow French stick loaf', 'name': 'baguet'}, {'frequency': 'r', 'synset': 'bait.n.02', 'synonyms': ['bait', 'lure'], 'id': 40, 'def': 'something used to lure fish or other animals into danger so they can be trapped or killed', 'name': 'bait'}, {'frequency': 'f', 'synset': 'ball.n.06', 'synonyms': ['ball'], 'id': 41, 'def': 'a spherical object used as a plaything', 'name': 'ball'}, {'frequency': 'r', 'synset': 'ballet_skirt.n.01', 'synonyms': ['ballet_skirt', 'tutu'], 'id': 42, 'def': 'very short skirt worn by ballerinas', 'name': 'ballet_skirt'}, {'frequency': 'f', 'synset': 'balloon.n.01', 'synonyms': ['balloon'], 'id': 43, 'def': 'large tough nonrigid bag filled with gas or heated air', 'name': 'balloon'}, {'frequency': 'c', 'synset': 'bamboo.n.02', 'synonyms': ['bamboo'], 'id': 44, 'def': 'woody tropical grass having hollow woody stems', 'name': 'bamboo'}, {'frequency': 'f', 'synset': 'banana.n.02', 'synonyms': ['banana'], 'id': 45, 'def': 'elongated crescent-shaped yellow fruit with soft sweet flesh', 'name': 'banana'}, {'frequency': 'c', 'synset': 'band_aid.n.01', 'synonyms': ['Band_Aid'], 'id': 46, 'def': 'trade name for an adhesive bandage to cover small cuts or blisters', 'name': 'Band_Aid'}, {'frequency': 'c', 'synset': 'bandage.n.01', 'synonyms': ['bandage'], 'id': 47, 'def': 'a piece of soft material that covers and protects an injured part of the body', 'name': 'bandage'}, {'frequency': 'f', 'synset': 'bandanna.n.01', 'synonyms': ['bandanna', 'bandana'], 'id': 48, 'def': 'large and brightly colored handkerchief; often used as a neckerchief', 'name': 'bandanna'}, {'frequency': 'r', 'synset': 'banjo.n.01', 'synonyms': ['banjo'], 'id': 49, 'def': 'a stringed instrument of the guitar family with a long neck and circular body', 'name': 'banjo'}, {'frequency': 'f', 'synset': 'banner.n.01', 'synonyms': ['banner', 'streamer'], 'id': 50, 'def': 'long strip of cloth or paper used for decoration or advertising', 'name': 'banner'}, {'frequency': 'r', 'synset': 'barbell.n.01', 'synonyms': ['barbell'], 'id': 51, 'def': 'a bar to which heavy discs are attached at each end; used in weightlifting', 'name': 'barbell'}, {'frequency': 'r', 'synset': 'barge.n.01', 'synonyms': ['barge'], 'id': 52, 'def': 'a flatbottom boat for carrying heavy loads (especially on canals)', 'name': 'barge'}, {'frequency': 'f', 'synset': 'barrel.n.02', 'synonyms': ['barrel', 'cask'], 'id': 53, 'def': 'a cylindrical container that holds liquids', 'name': 'barrel'}, {'frequency': 'c', 'synset': 'barrette.n.01', 'synonyms': ['barrette'], 'id': 54, 'def': "a pin for holding women's hair in place", 'name': 'barrette'}, {'frequency': 'c', 'synset': 'barrow.n.03', 'synonyms': ['barrow', 'garden_cart', 'lawn_cart', 'wheelbarrow'], 'id': 55, 'def': 'a cart for carrying small loads; has handles and one or more wheels', 'name': 'barrow'}, {'frequency': 'f', 'synset': 'base.n.03', 'synonyms': ['baseball_base'], 'id': 56, 'def': 'a place that the runner must touch before scoring', 'name': 'baseball_base'}, {'frequency': 'f', 'synset': 'baseball.n.02', 'synonyms': ['baseball'], 'id': 57, 'def': 'a ball used in playing baseball', 'name': 'baseball'}, {'frequency': 'f', 'synset': 'baseball_bat.n.01', 'synonyms': ['baseball_bat'], 'id': 58, 'def': 'an implement used in baseball by the batter', 'name': 'baseball_bat'}, {'frequency': 'f', 'synset': 'baseball_cap.n.01', 'synonyms': ['baseball_cap', 'jockey_cap', 'golf_cap'], 'id': 59, 'def': 'a cap with a bill', 'name': 'baseball_cap'}, {'frequency': 'f', 'synset': 'baseball_glove.n.01', 'synonyms': ['baseball_glove', 'baseball_mitt'], 'id': 60, 'def': 'the handwear used by fielders in playing baseball', 'name': 'baseball_glove'}, {'frequency': 'f', 'synset': 'basket.n.01', 'synonyms': ['basket', 'handbasket'], 'id': 61, 'def': 'a container that is usually woven and has handles', 'name': 'basket'}, {'frequency': 'c', 'synset': 'basketball.n.02', 'synonyms': ['basketball'], 'id': 62, 'def': 'an inflated ball used in playing basketball', 'name': 'basketball'}, {'frequency': 'r', 'synset': 'bass_horn.n.01', 'synonyms': ['bass_horn', 'sousaphone', 'tuba'], 'id': 63, 'def': 'the lowest brass wind instrument', 'name': 'bass_horn'}, {'frequency': 'c', 'synset': 'bat.n.01', 'synonyms': ['bat_(animal)'], 'id': 64, 'def': 'nocturnal mouselike mammal with forelimbs modified to form membranous wings', 'name': 'bat_(animal)'}, {'frequency': 'f', 'synset': 'bath_mat.n.01', 'synonyms': ['bath_mat'], 'id': 65, 'def': 'a heavy towel or mat to stand on while drying yourself after a bath', 'name': 'bath_mat'}, {'frequency': 'f', 'synset': 'bath_towel.n.01', 'synonyms': ['bath_towel'], 'id': 66, 'def': 'a large towel; to dry yourself after a bath', 'name': 'bath_towel'}, {'frequency': 'c', 'synset': 'bathrobe.n.01', 'synonyms': ['bathrobe'], 'id': 67, 'def': 'a loose-fitting robe of towelling; worn after a bath or swim', 'name': 'bathrobe'}, {'frequency': 'f', 'synset': 'bathtub.n.01', 'synonyms': ['bathtub', 'bathing_tub'], 'id': 68, 'def': 'a large open container that you fill with water and use to wash the body', 'name': 'bathtub'}, {'frequency': 'r', 'synset': 'batter.n.02', 'synonyms': ['batter_(food)'], 'id': 69, 'def': 'a liquid or semiliquid mixture, as of flour, eggs, and milk, used in cooking', 'name': 'batter_(food)'}, {'frequency': 'c', 'synset': 'battery.n.02', 'synonyms': ['battery'], 'id': 70, 'def': 'a portable device that produces electricity', 'name': 'battery'}, {'frequency': 'r', 'synset': 'beach_ball.n.01', 'synonyms': ['beachball'], 'id': 71, 'def': 'large and light ball; for play at the seaside', 'name': 'beachball'}, {'frequency': 'c', 'synset': 'bead.n.01', 'synonyms': ['bead'], 'id': 72, 'def': 'a small ball with a hole through the middle used for ornamentation, jewellery, etc.', 'name': 'bead'}, {'frequency': 'c', 'synset': 'bean_curd.n.01', 'synonyms': ['bean_curd', 'tofu'], 'id': 73, 'def': 'cheeselike food made of curdled soybean milk', 'name': 'bean_curd'}, {'frequency': 'c', 'synset': 'beanbag.n.01', 'synonyms': ['beanbag'], 'id': 74, 'def': 'a bag filled with dried beans or similar items; used in games or to sit on', 'name': 'beanbag'}, {'frequency': 'f', 'synset': 'beanie.n.01', 'synonyms': ['beanie', 'beany'], 'id': 75, 'def': 'a small skullcap; formerly worn by schoolboys and college freshmen', 'name': 'beanie'}, {'frequency': 'f', 'synset': 'bear.n.01', 'synonyms': ['bear'], 'id': 76, 'def': 'large carnivorous or omnivorous mammals with shaggy coats and claws', 'name': 'bear'}, {'frequency': 'f', 'synset': 'bed.n.01', 'synonyms': ['bed'], 'id': 77, 'def': 'a piece of furniture that provides a place to sleep', 'name': 'bed'}, {'frequency': 'r', 'synset': 'bedpan.n.01', 'synonyms': ['bedpan'], 'id': 78, 'def': 'a shallow vessel used by a bedridden patient for defecation and urination', 'name': 'bedpan'}, {'frequency': 'f', 'synset': 'bedspread.n.01', 'synonyms': ['bedspread', 'bedcover', 'bed_covering', 'counterpane', 'spread'], 'id': 79, 'def': 'decorative cover for a bed', 'name': 'bedspread'}, {'frequency': 'f', 'synset': 'beef.n.01', 'synonyms': ['cow'], 'id': 80, 'def': 'cattle/cow', 'name': 'cow'}, {'frequency': 'f', 'synset': 'beef.n.02', 'synonyms': ['beef_(food)', 'boeuf_(food)'], 'id': 81, 'def': 'meat from an adult domestic bovine', 'name': 'beef_(food)'}, {'frequency': 'r', 'synset': 'beeper.n.01', 'synonyms': ['beeper', 'pager'], 'id': 82, 'def': 'an device that beeps when the person carrying it is being paged', 'name': 'beeper'}, {'frequency': 'f', 'synset': 'beer_bottle.n.01', 'synonyms': ['beer_bottle'], 'id': 83, 'def': 'a bottle that holds beer', 'name': 'beer_bottle'}, {'frequency': 'c', 'synset': 'beer_can.n.01', 'synonyms': ['beer_can'], 'id': 84, 'def': 'a can that holds beer', 'name': 'beer_can'}, {'frequency': 'r', 'synset': 'beetle.n.01', 'synonyms': ['beetle'], 'id': 85, 'def': 'insect with hard wing covers', 'name': 'beetle'}, {'frequency': 'f', 'synset': 'bell.n.01', 'synonyms': ['bell'], 'id': 86, 'def': 'a hollow device made of metal that makes a ringing sound when struck', 'name': 'bell'}, {'frequency': 'f', 'synset': 'bell_pepper.n.02', 'synonyms': ['bell_pepper', 'capsicum'], 'id': 87, 'def': 'large bell-shaped sweet pepper in green or red or yellow or orange or black varieties', 'name': 'bell_pepper'}, {'frequency': 'f', 'synset': 'belt.n.02', 'synonyms': ['belt'], 'id': 88, 'def': 'a band to tie or buckle around the body (usually at the waist)', 'name': 'belt'}, {'frequency': 'f', 'synset': 'belt_buckle.n.01', 'synonyms': ['belt_buckle'], 'id': 89, 'def': 'the buckle used to fasten a belt', 'name': 'belt_buckle'}, {'frequency': 'f', 'synset': 'bench.n.01', 'synonyms': ['bench'], 'id': 90, 'def': 'a long seat for more than one person', 'name': 'bench'}, {'frequency': 'c', 'synset': 'beret.n.01', 'synonyms': ['beret'], 'id': 91, 'def': 'a cap with no brim or bill; made of soft cloth', 'name': 'beret'}, {'frequency': 'c', 'synset': 'bib.n.02', 'synonyms': ['bib'], 'id': 92, 'def': 'a napkin tied under the chin of a child while eating', 'name': 'bib'}, {'frequency': 'r', 'synset': 'bible.n.01', 'synonyms': ['Bible'], 'id': 93, 'def': 'the sacred writings of the Christian religions', 'name': 'Bible'}, {'frequency': 'f', 'synset': 'bicycle.n.01', 'synonyms': ['bicycle', 'bike_(bicycle)'], 'id': 94, 'def': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'name': 'bicycle'}, {'frequency': 'f', 'synset': 'bill.n.09', 'synonyms': ['visor', 'vizor'], 'id': 95, 'def': 'a brim that projects to the front to shade the eyes', 'name': 'visor'}, {'frequency': 'f', 'synset': 'billboard.n.01', 'synonyms': ['billboard'], 'id': 96, 'def': 'large outdoor signboard', 'name': 'billboard'}, {'frequency': 'c', 'synset': 'binder.n.03', 'synonyms': ['binder', 'ring-binder'], 'id': 97, 'def': 'holds loose papers or magazines', 'name': 'binder'}, {'frequency': 'c', 'synset': 'binoculars.n.01', 'synonyms': ['binoculars', 'field_glasses', 'opera_glasses'], 'id': 98, 'def': 'an optical instrument designed for simultaneous use by both eyes', 'name': 'binoculars'}, {'frequency': 'f', 'synset': 'bird.n.01', 'synonyms': ['bird'], 'id': 99, 'def': 'animal characterized by feathers and wings', 'name': 'bird'}, {'frequency': 'c', 'synset': 'bird_feeder.n.01', 'synonyms': ['birdfeeder'], 'id': 100, 'def': 'an outdoor device that supplies food for wild birds', 'name': 'birdfeeder'}, {'frequency': 'c', 'synset': 'birdbath.n.01', 'synonyms': ['birdbath'], 'id': 101, 'def': 'an ornamental basin (usually in a garden) for birds to bathe in', 'name': 'birdbath'}, {'frequency': 'c', 'synset': 'birdcage.n.01', 'synonyms': ['birdcage'], 'id': 102, 'def': 'a cage in which a bird can be kept', 'name': 'birdcage'}, {'frequency': 'c', 'synset': 'birdhouse.n.01', 'synonyms': ['birdhouse'], 'id': 103, 'def': 'a shelter for birds', 'name': 'birdhouse'}, {'frequency': 'f', 'synset': 'birthday_cake.n.01', 'synonyms': ['birthday_cake'], 'id': 104, 'def': 'decorated cake served at a birthday party', 'name': 'birthday_cake'}, {'frequency': 'r', 'synset': 'birthday_card.n.01', 'synonyms': ['birthday_card'], 'id': 105, 'def': 'a card expressing a birthday greeting', 'name': 'birthday_card'}, {'frequency': 'r', 'synset': 'black_flag.n.01', 'synonyms': ['pirate_flag'], 'id': 106, 'def': 'a flag usually bearing a white skull and crossbones on a black background', 'name': 'pirate_flag'}, {'frequency': 'c', 'synset': 'black_sheep.n.02', 'synonyms': ['black_sheep'], 'id': 107, 'def': 'sheep with a black coat', 'name': 'black_sheep'}, {'frequency': 'c', 'synset': 'blackberry.n.01', 'synonyms': ['blackberry'], 'id': 108, 'def': 'large sweet black or very dark purple edible aggregate fruit', 'name': 'blackberry'}, {'frequency': 'f', 'synset': 'blackboard.n.01', 'synonyms': ['blackboard', 'chalkboard'], 'id': 109, 'def': 'sheet of slate; for writing with chalk', 'name': 'blackboard'}, {'frequency': 'f', 'synset': 'blanket.n.01', 'synonyms': ['blanket'], 'id': 110, 'def': 'bedding that keeps a person warm in bed', 'name': 'blanket'}, {'frequency': 'c', 'synset': 'blazer.n.01', 'synonyms': ['blazer', 'sport_jacket', 'sport_coat', 'sports_jacket', 'sports_coat'], 'id': 111, 'def': 'lightweight jacket; often striped in the colors of a club or school', 'name': 'blazer'}, {'frequency': 'f', 'synset': 'blender.n.01', 'synonyms': ['blender', 'liquidizer', 'liquidiser'], 'id': 112, 'def': 'an electrically powered mixer that mix or chop or liquefy foods', 'name': 'blender'}, {'frequency': 'r', 'synset': 'blimp.n.02', 'synonyms': ['blimp'], 'id': 113, 'def': 'a small nonrigid airship used for observation or as a barrage balloon', 'name': 'blimp'}, {'frequency': 'f', 'synset': 'blinker.n.01', 'synonyms': ['blinker', 'flasher'], 'id': 114, 'def': 'a light that flashes on and off; used as a signal or to send messages', 'name': 'blinker'}, {'frequency': 'f', 'synset': 'blouse.n.01', 'synonyms': ['blouse'], 'id': 115, 'def': 'a top worn by women', 'name': 'blouse'}, {'frequency': 'f', 'synset': 'blueberry.n.02', 'synonyms': ['blueberry'], 'id': 116, 'def': 'sweet edible dark-blue berries of blueberry plants', 'name': 'blueberry'}, {'frequency': 'r', 'synset': 'board.n.09', 'synonyms': ['gameboard'], 'id': 117, 'def': 'a flat portable surface (usually rectangular) designed for board games', 'name': 'gameboard'}, {'frequency': 'f', 'synset': 'boat.n.01', 'synonyms': ['boat', 'ship_(boat)'], 'id': 118, 'def': 'a vessel for travel on water', 'name': 'boat'}, {'frequency': 'r', 'synset': 'bob.n.05', 'synonyms': ['bob', 'bobber', 'bobfloat'], 'id': 119, 'def': 'a small float usually made of cork; attached to a fishing line', 'name': 'bob'}, {'frequency': 'c', 'synset': 'bobbin.n.01', 'synonyms': ['bobbin', 'spool', 'reel'], 'id': 120, 'def': 'a thing around which thread/tape/film or other flexible materials can be wound', 'name': 'bobbin'}, {'frequency': 'c', 'synset': 'bobby_pin.n.01', 'synonyms': ['bobby_pin', 'hairgrip'], 'id': 121, 'def': 'a flat wire hairpin used to hold bobbed hair in place', 'name': 'bobby_pin'}, {'frequency': 'c', 'synset': 'boiled_egg.n.01', 'synonyms': ['boiled_egg', 'coddled_egg'], 'id': 122, 'def': 'egg cooked briefly in the shell in gently boiling water', 'name': 'boiled_egg'}, {'frequency': 'r', 'synset': 'bolo_tie.n.01', 'synonyms': ['bolo_tie', 'bolo', 'bola_tie', 'bola'], 'id': 123, 'def': 'a cord fastened around the neck with an ornamental clasp and worn as a necktie', 'name': 'bolo_tie'}, {'frequency': 'c', 'synset': 'bolt.n.03', 'synonyms': ['deadbolt'], 'id': 124, 'def': 'the part of a lock that is engaged or withdrawn with a key', 'name': 'deadbolt'}, {'frequency': 'f', 'synset': 'bolt.n.06', 'synonyms': ['bolt'], 'id': 125, 'def': 'a screw that screws into a nut to form a fastener', 'name': 'bolt'}, {'frequency': 'r', 'synset': 'bonnet.n.01', 'synonyms': ['bonnet'], 'id': 126, 'def': 'a hat tied under the chin', 'name': 'bonnet'}, {'frequency': 'f', 'synset': 'book.n.01', 'synonyms': ['book'], 'id': 127, 'def': 'a written work or composition that has been published', 'name': 'book'}, {'frequency': 'c', 'synset': 'bookcase.n.01', 'synonyms': ['bookcase'], 'id': 128, 'def': 'a piece of furniture with shelves for storing books', 'name': 'bookcase'}, {'frequency': 'c', 'synset': 'booklet.n.01', 'synonyms': ['booklet', 'brochure', 'leaflet', 'pamphlet'], 'id': 129, 'def': 'a small book usually having a paper cover', 'name': 'booklet'}, {'frequency': 'r', 'synset': 'bookmark.n.01', 'synonyms': ['bookmark', 'bookmarker'], 'id': 130, 'def': 'a marker (a piece of paper or ribbon) placed between the pages of a book', 'name': 'bookmark'}, {'frequency': 'r', 'synset': 'boom.n.04', 'synonyms': ['boom_microphone', 'microphone_boom'], 'id': 131, 'def': 'a pole carrying an overhead microphone projected over a film or tv set', 'name': 'boom_microphone'}, {'frequency': 'f', 'synset': 'boot.n.01', 'synonyms': ['boot'], 'id': 132, 'def': 'footwear that covers the whole foot and lower leg', 'name': 'boot'}, {'frequency': 'f', 'synset': 'bottle.n.01', 'synonyms': ['bottle'], 'id': 133, 'def': 'a glass or plastic vessel used for storing drinks or other liquids', 'name': 'bottle'}, {'frequency': 'c', 'synset': 'bottle_opener.n.01', 'synonyms': ['bottle_opener'], 'id': 134, 'def': 'an opener for removing caps or corks from bottles', 'name': 'bottle_opener'}, {'frequency': 'c', 'synset': 'bouquet.n.01', 'synonyms': ['bouquet'], 'id': 135, 'def': 'an arrangement of flowers that is usually given as a present', 'name': 'bouquet'}, {'frequency': 'r', 'synset': 'bow.n.04', 'synonyms': ['bow_(weapon)'], 'id': 136, 'def': 'a weapon for shooting arrows', 'name': 'bow_(weapon)'}, {'frequency': 'f', 'synset': 'bow.n.08', 'synonyms': ['bow_(decorative_ribbons)'], 'id': 137, 'def': 'a decorative interlacing of ribbons', 'name': 'bow_(decorative_ribbons)'}, {'frequency': 'f', 'synset': 'bow_tie.n.01', 'synonyms': ['bow-tie', 'bowtie'], 'id': 138, 'def': "a man's tie that ties in a bow", 'name': 'bow-tie'}, {'frequency': 'f', 'synset': 'bowl.n.03', 'synonyms': ['bowl'], 'id': 139, 'def': 'a dish that is round and open at the top for serving foods', 'name': 'bowl'}, {'frequency': 'r', 'synset': 'bowl.n.08', 'synonyms': ['pipe_bowl'], 'id': 140, 'def': 'a small round container that is open at the top for holding tobacco', 'name': 'pipe_bowl'}, {'frequency': 'c', 'synset': 'bowler_hat.n.01', 'synonyms': ['bowler_hat', 'bowler', 'derby_hat', 'derby', 'plug_hat'], 'id': 141, 'def': 'a felt hat that is round and hard with a narrow brim', 'name': 'bowler_hat'}, {'frequency': 'r', 'synset': 'bowling_ball.n.01', 'synonyms': ['bowling_ball'], 'id': 142, 'def': 'a large ball with finger holes used in the sport of bowling', 'name': 'bowling_ball'}, {'frequency': 'f', 'synset': 'box.n.01', 'synonyms': ['box'], 'id': 143, 'def': 'a (usually rectangular) container; may have a lid', 'name': 'box'}, {'frequency': 'r', 'synset': 'boxing_glove.n.01', 'synonyms': ['boxing_glove'], 'id': 144, 'def': 'large glove coverings the fists of a fighter worn for the sport of boxing', 'name': 'boxing_glove'}, {'frequency': 'c', 'synset': 'brace.n.06', 'synonyms': ['suspenders'], 'id': 145, 'def': 'elastic straps that hold trousers up (usually used in the plural)', 'name': 'suspenders'}, {'frequency': 'f', 'synset': 'bracelet.n.02', 'synonyms': ['bracelet', 'bangle'], 'id': 146, 'def': 'jewelry worn around the wrist for decoration', 'name': 'bracelet'}, {'frequency': 'r', 'synset': 'brass.n.07', 'synonyms': ['brass_plaque'], 'id': 147, 'def': 'a memorial made of brass', 'name': 'brass_plaque'}, {'frequency': 'c', 'synset': 'brassiere.n.01', 'synonyms': ['brassiere', 'bra', 'bandeau'], 'id': 148, 'def': 'an undergarment worn by women to support their breasts', 'name': 'brassiere'}, {'frequency': 'c', 'synset': 'bread-bin.n.01', 'synonyms': ['bread-bin', 'breadbox'], 'id': 149, 'def': 'a container used to keep bread or cake in', 'name': 'bread-bin'}, {'frequency': 'f', 'synset': 'bread.n.01', 'synonyms': ['bread'], 'id': 150, 'def': 'food made from dough of flour or meal and usually raised with yeast or baking powder and then baked', 'name': 'bread'}, {'frequency': 'r', 'synset': 'breechcloth.n.01', 'synonyms': ['breechcloth', 'breechclout', 'loincloth'], 'id': 151, 'def': 'a garment that provides covering for the loins', 'name': 'breechcloth'}, {'frequency': 'f', 'synset': 'bridal_gown.n.01', 'synonyms': ['bridal_gown', 'wedding_gown', 'wedding_dress'], 'id': 152, 'def': 'a gown worn by the bride at a wedding', 'name': 'bridal_gown'}, {'frequency': 'c', 'synset': 'briefcase.n.01', 'synonyms': ['briefcase'], 'id': 153, 'def': 'a case with a handle; for carrying papers or files or books', 'name': 'briefcase'}, {'frequency': 'f', 'synset': 'broccoli.n.01', 'synonyms': ['broccoli'], 'id': 154, 'def': 'plant with dense clusters of tight green flower buds', 'name': 'broccoli'}, {'frequency': 'r', 'synset': 'brooch.n.01', 'synonyms': ['broach'], 'id': 155, 'def': 'a decorative pin worn by women', 'name': 'broach'}, {'frequency': 'c', 'synset': 'broom.n.01', 'synonyms': ['broom'], 'id': 156, 'def': 'bundle of straws or twigs attached to a long handle; used for cleaning', 'name': 'broom'}, {'frequency': 'c', 'synset': 'brownie.n.03', 'synonyms': ['brownie'], 'id': 157, 'def': 'square or bar of very rich chocolate cake usually with nuts', 'name': 'brownie'}, {'frequency': 'c', 'synset': 'brussels_sprouts.n.01', 'synonyms': ['brussels_sprouts'], 'id': 158, 'def': 'the small edible cabbage-like buds growing along a stalk', 'name': 'brussels_sprouts'}, {'frequency': 'r', 'synset': 'bubble_gum.n.01', 'synonyms': ['bubble_gum'], 'id': 159, 'def': 'a kind of chewing gum that can be blown into bubbles', 'name': 'bubble_gum'}, {'frequency': 'f', 'synset': 'bucket.n.01', 'synonyms': ['bucket', 'pail'], 'id': 160, 'def': 'a roughly cylindrical vessel that is open at the top', 'name': 'bucket'}, {'frequency': 'r', 'synset': 'buggy.n.01', 'synonyms': ['horse_buggy'], 'id': 161, 'def': 'a small lightweight carriage; drawn by a single horse', 'name': 'horse_buggy'}, {'frequency': 'c', 'synset': 'bull.n.11', 'synonyms': ['horned_cow'], 'id': 162, 'def': 'a cow with horns', 'name': 'bull'}, {'frequency': 'c', 'synset': 'bulldog.n.01', 'synonyms': ['bulldog'], 'id': 163, 'def': 'a thickset short-haired dog with a large head and strong undershot lower jaw', 'name': 'bulldog'}, {'frequency': 'r', 'synset': 'bulldozer.n.01', 'synonyms': ['bulldozer', 'dozer'], 'id': 164, 'def': 'large powerful tractor; a large blade in front flattens areas of ground', 'name': 'bulldozer'}, {'frequency': 'c', 'synset': 'bullet_train.n.01', 'synonyms': ['bullet_train'], 'id': 165, 'def': 'a high-speed passenger train', 'name': 'bullet_train'}, {'frequency': 'c', 'synset': 'bulletin_board.n.02', 'synonyms': ['bulletin_board', 'notice_board'], 'id': 166, 'def': 'a board that hangs on a wall; displays announcements', 'name': 'bulletin_board'}, {'frequency': 'r', 'synset': 'bulletproof_vest.n.01', 'synonyms': ['bulletproof_vest'], 'id': 167, 'def': 'a vest capable of resisting the impact of a bullet', 'name': 'bulletproof_vest'}, {'frequency': 'c', 'synset': 'bullhorn.n.01', 'synonyms': ['bullhorn', 'megaphone'], 'id': 168, 'def': 'a portable loudspeaker with built-in microphone and amplifier', 'name': 'bullhorn'}, {'frequency': 'f', 'synset': 'bun.n.01', 'synonyms': ['bun', 'roll'], 'id': 169, 'def': 'small rounded bread either plain or sweet', 'name': 'bun'}, {'frequency': 'c', 'synset': 'bunk_bed.n.01', 'synonyms': ['bunk_bed'], 'id': 170, 'def': 'beds built one above the other', 'name': 'bunk_bed'}, {'frequency': 'f', 'synset': 'buoy.n.01', 'synonyms': ['buoy'], 'id': 171, 'def': 'a float attached by rope to the seabed to mark channels in a harbor or underwater hazards', 'name': 'buoy'}, {'frequency': 'r', 'synset': 'burrito.n.01', 'synonyms': ['burrito'], 'id': 172, 'def': 'a flour tortilla folded around a filling', 'name': 'burrito'}, {'frequency': 'f', 'synset': 'bus.n.01', 'synonyms': ['bus_(vehicle)', 'autobus', 'charabanc', 'double-decker', 'motorbus', 'motorcoach'], 'id': 173, 'def': 'a vehicle carrying many passengers; used for public transport', 'name': 'bus_(vehicle)'}, {'frequency': 'c', 'synset': 'business_card.n.01', 'synonyms': ['business_card'], 'id': 174, 'def': "a card on which are printed the person's name and business affiliation", 'name': 'business_card'}, {'frequency': 'f', 'synset': 'butter.n.01', 'synonyms': ['butter'], 'id': 175, 'def': 'an edible emulsion of fat globules made by churning milk or cream; for cooking and table use', 'name': 'butter'}, {'frequency': 'c', 'synset': 'butterfly.n.01', 'synonyms': ['butterfly'], 'id': 176, 'def': 'insect typically having a slender body with knobbed antennae and broad colorful wings', 'name': 'butterfly'}, {'frequency': 'f', 'synset': 'button.n.01', 'synonyms': ['button'], 'id': 177, 'def': 'a round fastener sewn to shirts and coats etc to fit through buttonholes', 'name': 'button'}, {'frequency': 'f', 'synset': 'cab.n.03', 'synonyms': ['cab_(taxi)', 'taxi', 'taxicab'], 'id': 178, 'def': 'a car that takes passengers where they want to go in exchange for money', 'name': 'cab_(taxi)'}, {'frequency': 'r', 'synset': 'cabana.n.01', 'synonyms': ['cabana'], 'id': 179, 'def': 'a small tent used as a dressing room beside the sea or a swimming pool', 'name': 'cabana'}, {'frequency': 'c', 'synset': 'cabin_car.n.01', 'synonyms': ['cabin_car', 'caboose'], 'id': 180, 'def': 'a car on a freight train for use of the train crew; usually the last car on the train', 'name': 'cabin_car'}, {'frequency': 'f', 'synset': 'cabinet.n.01', 'synonyms': ['cabinet'], 'id': 181, 'def': 'a piece of furniture resembling a cupboard with doors and shelves and drawers', 'name': 'cabinet'}, {'frequency': 'r', 'synset': 'cabinet.n.03', 'synonyms': ['locker', 'storage_locker'], 'id': 182, 'def': 'a storage compartment for clothes and valuables; usually it has a lock', 'name': 'locker'}, {'frequency': 'f', 'synset': 'cake.n.03', 'synonyms': ['cake'], 'id': 183, 'def': 'baked goods made from or based on a mixture of flour, sugar, eggs, and fat', 'name': 'cake'}, {'frequency': 'c', 'synset': 'calculator.n.02', 'synonyms': ['calculator'], 'id': 184, 'def': 'a small machine that is used for mathematical calculations', 'name': 'calculator'}, {'frequency': 'f', 'synset': 'calendar.n.02', 'synonyms': ['calendar'], 'id': 185, 'def': 'a list or register of events (appointments/social events/court cases, etc)', 'name': 'calendar'}, {'frequency': 'c', 'synset': 'calf.n.01', 'synonyms': ['calf'], 'id': 186, 'def': 'young of domestic cattle', 'name': 'calf'}, {'frequency': 'c', 'synset': 'camcorder.n.01', 'synonyms': ['camcorder'], 'id': 187, 'def': 'a portable television camera and videocassette recorder', 'name': 'camcorder'}, {'frequency': 'c', 'synset': 'camel.n.01', 'synonyms': ['camel'], 'id': 188, 'def': 'cud-chewing mammal used as a draft or saddle animal in desert regions', 'name': 'camel'}, {'frequency': 'f', 'synset': 'camera.n.01', 'synonyms': ['camera'], 'id': 189, 'def': 'equipment for taking photographs', 'name': 'camera'}, {'frequency': 'c', 'synset': 'camera_lens.n.01', 'synonyms': ['camera_lens'], 'id': 190, 'def': 'a lens that focuses the image in a camera', 'name': 'camera_lens'}, {'frequency': 'c', 'synset': 'camper.n.02', 'synonyms': ['camper_(vehicle)', 'camping_bus', 'motor_home'], 'id': 191, 'def': 'a recreational vehicle equipped for camping out while traveling', 'name': 'camper_(vehicle)'}, {'frequency': 'f', 'synset': 'can.n.01', 'synonyms': ['can', 'tin_can'], 'id': 192, 'def': 'airtight sealed metal container for food or drink or paint etc.', 'name': 'can'}, {'frequency': 'c', 'synset': 'can_opener.n.01', 'synonyms': ['can_opener', 'tin_opener'], 'id': 193, 'def': 'a device for cutting cans open', 'name': 'can_opener'}, {'frequency': 'f', 'synset': 'candle.n.01', 'synonyms': ['candle', 'candlestick'], 'id': 194, 'def': 'stick of wax with a wick in the middle', 'name': 'candle'}, {'frequency': 'f', 'synset': 'candlestick.n.01', 'synonyms': ['candle_holder'], 'id': 195, 'def': 'a holder with sockets for candles', 'name': 'candle_holder'}, {'frequency': 'r', 'synset': 'candy_bar.n.01', 'synonyms': ['candy_bar'], 'id': 196, 'def': 'a candy shaped as a bar', 'name': 'candy_bar'}, {'frequency': 'c', 'synset': 'candy_cane.n.01', 'synonyms': ['candy_cane'], 'id': 197, 'def': 'a hard candy in the shape of a rod (usually with stripes)', 'name': 'candy_cane'}, {'frequency': 'c', 'synset': 'cane.n.01', 'synonyms': ['walking_cane'], 'id': 198, 'def': 'a stick that people can lean on to help them walk', 'name': 'walking_cane'}, {'frequency': 'c', 'synset': 'canister.n.02', 'synonyms': ['canister', 'cannister'], 'id': 199, 'def': 'metal container for storing dry foods such as tea or flour', 'name': 'canister'}, {'frequency': 'c', 'synset': 'canoe.n.01', 'synonyms': ['canoe'], 'id': 200, 'def': 'small and light boat; pointed at both ends; propelled with a paddle', 'name': 'canoe'}, {'frequency': 'c', 'synset': 'cantaloup.n.02', 'synonyms': ['cantaloup', 'cantaloupe'], 'id': 201, 'def': 'the fruit of a cantaloup vine; small to medium-sized melon with yellowish flesh', 'name': 'cantaloup'}, {'frequency': 'r', 'synset': 'canteen.n.01', 'synonyms': ['canteen'], 'id': 202, 'def': 'a flask for carrying water; used by soldiers or travelers', 'name': 'canteen'}, {'frequency': 'f', 'synset': 'cap.n.01', 'synonyms': ['cap_(headwear)'], 'id': 203, 'def': 'a tight-fitting headwear', 'name': 'cap_(headwear)'}, {'frequency': 'f', 'synset': 'cap.n.02', 'synonyms': ['bottle_cap', 'cap_(container_lid)'], 'id': 204, 'def': 'a top (as for a bottle)', 'name': 'bottle_cap'}, {'frequency': 'c', 'synset': 'cape.n.02', 'synonyms': ['cape'], 'id': 205, 'def': 'a sleeveless garment like a cloak but shorter', 'name': 'cape'}, {'frequency': 'c', 'synset': 'cappuccino.n.01', 'synonyms': ['cappuccino', 'coffee_cappuccino'], 'id': 206, 'def': 'equal parts of espresso and steamed milk', 'name': 'cappuccino'}, {'frequency': 'f', 'synset': 'car.n.01', 'synonyms': ['car_(automobile)', 'auto_(automobile)', 'automobile'], 'id': 207, 'def': 'a motor vehicle with four wheels', 'name': 'car_(automobile)'}, {'frequency': 'f', 'synset': 'car.n.02', 'synonyms': ['railcar_(part_of_a_train)', 'railway_car_(part_of_a_train)', 'railroad_car_(part_of_a_train)'], 'id': 208, 'def': 'a wheeled vehicle adapted to the rails of railroad (mark each individual railcar separately)', 'name': 'railcar_(part_of_a_train)'}, {'frequency': 'r', 'synset': 'car.n.04', 'synonyms': ['elevator_car'], 'id': 209, 'def': 'where passengers ride up and down', 'name': 'elevator_car'}, {'frequency': 'r', 'synset': 'car_battery.n.01', 'synonyms': ['car_battery', 'automobile_battery'], 'id': 210, 'def': 'a battery in a motor vehicle', 'name': 'car_battery'}, {'frequency': 'c', 'synset': 'card.n.02', 'synonyms': ['identity_card'], 'id': 211, 'def': 'a card certifying the identity of the bearer', 'name': 'identity_card'}, {'frequency': 'c', 'synset': 'card.n.03', 'synonyms': ['card'], 'id': 212, 'def': 'a rectangular piece of paper used to send messages (e.g. greetings or pictures)', 'name': 'card'}, {'frequency': 'c', 'synset': 'cardigan.n.01', 'synonyms': ['cardigan'], 'id': 213, 'def': 'knitted jacket that is fastened up the front with buttons or a zipper', 'name': 'cardigan'}, {'frequency': 'r', 'synset': 'cargo_ship.n.01', 'synonyms': ['cargo_ship', 'cargo_vessel'], 'id': 214, 'def': 'a ship designed to carry cargo', 'name': 'cargo_ship'}, {'frequency': 'r', 'synset': 'carnation.n.01', 'synonyms': ['carnation'], 'id': 215, 'def': 'plant with pink to purple-red spice-scented usually double flowers', 'name': 'carnation'}, {'frequency': 'c', 'synset': 'carriage.n.02', 'synonyms': ['horse_carriage'], 'id': 216, 'def': 'a vehicle with wheels drawn by one or more horses', 'name': 'horse_carriage'}, {'frequency': 'f', 'synset': 'carrot.n.01', 'synonyms': ['carrot'], 'id': 217, 'def': 'deep orange edible root of the cultivated carrot plant', 'name': 'carrot'}, {'frequency': 'f', 'synset': 'carryall.n.01', 'synonyms': ['tote_bag'], 'id': 218, 'def': 'a capacious bag or basket', 'name': 'tote_bag'}, {'frequency': 'c', 'synset': 'cart.n.01', 'synonyms': ['cart'], 'id': 219, 'def': 'a heavy open wagon usually having two wheels and drawn by an animal', 'name': 'cart'}, {'frequency': 'c', 'synset': 'carton.n.02', 'synonyms': ['carton'], 'id': 220, 'def': 'a container made of cardboard for holding food or drink', 'name': 'carton'}, {'frequency': 'c', 'synset': 'cash_register.n.01', 'synonyms': ['cash_register', 'register_(for_cash_transactions)'], 'id': 221, 'def': 'a cashbox with an adding machine to register transactions', 'name': 'cash_register'}, {'frequency': 'r', 'synset': 'casserole.n.01', 'synonyms': ['casserole'], 'id': 222, 'def': 'food cooked and served in a casserole', 'name': 'casserole'}, {'frequency': 'r', 'synset': 'cassette.n.01', 'synonyms': ['cassette'], 'id': 223, 'def': 'a container that holds a magnetic tape used for recording or playing sound or video', 'name': 'cassette'}, {'frequency': 'c', 'synset': 'cast.n.05', 'synonyms': ['cast', 'plaster_cast', 'plaster_bandage'], 'id': 224, 'def': 'bandage consisting of a firm covering that immobilizes broken bones while they heal', 'name': 'cast'}, {'frequency': 'f', 'synset': 'cat.n.01', 'synonyms': ['cat'], 'id': 225, 'def': 'a domestic house cat', 'name': 'cat'}, {'frequency': 'f', 'synset': 'cauliflower.n.02', 'synonyms': ['cauliflower'], 'id': 226, 'def': 'edible compact head of white undeveloped flowers', 'name': 'cauliflower'}, {'frequency': 'c', 'synset': 'cayenne.n.02', 'synonyms': ['cayenne_(spice)', 'cayenne_pepper_(spice)', 'red_pepper_(spice)'], 'id': 227, 'def': 'ground pods and seeds of pungent red peppers of the genus Capsicum', 'name': 'cayenne_(spice)'}, {'frequency': 'c', 'synset': 'cd_player.n.01', 'synonyms': ['CD_player'], 'id': 228, 'def': 'electronic equipment for playing compact discs (CDs)', 'name': 'CD_player'}, {'frequency': 'f', 'synset': 'celery.n.01', 'synonyms': ['celery'], 'id': 229, 'def': 'widely cultivated herb with aromatic leaf stalks that are eaten raw or cooked', 'name': 'celery'}, {'frequency': 'f', 'synset': 'cellular_telephone.n.01', 'synonyms': ['cellular_telephone', 'cellular_phone', 'cellphone', 'mobile_phone', 'smart_phone'], 'id': 230, 'def': 'a hand-held mobile telephone', 'name': 'cellular_telephone'}, {'frequency': 'r', 'synset': 'chain_mail.n.01', 'synonyms': ['chain_mail', 'ring_mail', 'chain_armor', 'chain_armour', 'ring_armor', 'ring_armour'], 'id': 231, 'def': '(Middle Ages) flexible armor made of interlinked metal rings', 'name': 'chain_mail'}, {'frequency': 'f', 'synset': 'chair.n.01', 'synonyms': ['chair'], 'id': 232, 'def': 'a seat for one person, with a support for the back', 'name': 'chair'}, {'frequency': 'r', 'synset': 'chaise_longue.n.01', 'synonyms': ['chaise_longue', 'chaise', 'daybed'], 'id': 233, 'def': 'a long chair; for reclining', 'name': 'chaise_longue'}, {'frequency': 'r', 'synset': 'chalice.n.01', 'synonyms': ['chalice'], 'id': 234, 'def': 'a bowl-shaped drinking vessel; especially the Eucharistic cup', 'name': 'chalice'}, {'frequency': 'f', 'synset': 'chandelier.n.01', 'synonyms': ['chandelier'], 'id': 235, 'def': 'branched lighting fixture; often ornate; hangs from the ceiling', 'name': 'chandelier'}, {'frequency': 'r', 'synset': 'chap.n.04', 'synonyms': ['chap'], 'id': 236, 'def': 'leather leggings without a seat; worn over trousers by cowboys to protect their legs', 'name': 'chap'}, {'frequency': 'r', 'synset': 'checkbook.n.01', 'synonyms': ['checkbook', 'chequebook'], 'id': 237, 'def': 'a book issued to holders of checking accounts', 'name': 'checkbook'}, {'frequency': 'r', 'synset': 'checkerboard.n.01', 'synonyms': ['checkerboard'], 'id': 238, 'def': 'a board having 64 squares of two alternating colors', 'name': 'checkerboard'}, {'frequency': 'c', 'synset': 'cherry.n.03', 'synonyms': ['cherry'], 'id': 239, 'def': 'a red fruit with a single hard stone', 'name': 'cherry'}, {'frequency': 'r', 'synset': 'chessboard.n.01', 'synonyms': ['chessboard'], 'id': 240, 'def': 'a checkerboard used to play chess', 'name': 'chessboard'}, {'frequency': 'c', 'synset': 'chicken.n.02', 'synonyms': ['chicken_(animal)'], 'id': 241, 'def': 'a domestic fowl bred for flesh or eggs', 'name': 'chicken_(animal)'}, {'frequency': 'c', 'synset': 'chickpea.n.01', 'synonyms': ['chickpea', 'garbanzo'], 'id': 242, 'def': 'the seed of the chickpea plant; usually dried', 'name': 'chickpea'}, {'frequency': 'c', 'synset': 'chili.n.02', 'synonyms': ['chili_(vegetable)', 'chili_pepper_(vegetable)', 'chilli_(vegetable)', 'chilly_(vegetable)', 'chile_(vegetable)'], 'id': 243, 'def': 'very hot and finely tapering pepper of special pungency', 'name': 'chili_(vegetable)'}, {'frequency': 'r', 'synset': 'chime.n.01', 'synonyms': ['chime', 'gong'], 'id': 244, 'def': 'an instrument consisting of a set of bells that are struck with a hammer', 'name': 'chime'}, {'frequency': 'r', 'synset': 'chinaware.n.01', 'synonyms': ['chinaware'], 'id': 245, 'def': 'dishware made of high quality porcelain', 'name': 'chinaware'}, {'frequency': 'c', 'synset': 'chip.n.04', 'synonyms': ['crisp_(potato_chip)', 'potato_chip'], 'id': 246, 'def': 'a thin crisp slice of potato fried in deep fat', 'name': 'crisp_(potato_chip)'}, {'frequency': 'r', 'synset': 'chip.n.06', 'synonyms': ['poker_chip'], 'id': 247, 'def': 'a small disk-shaped counter used to represent money when gambling', 'name': 'poker_chip'}, {'frequency': 'c', 'synset': 'chocolate_bar.n.01', 'synonyms': ['chocolate_bar'], 'id': 248, 'def': 'a bar of chocolate candy', 'name': 'chocolate_bar'}, {'frequency': 'c', 'synset': 'chocolate_cake.n.01', 'synonyms': ['chocolate_cake'], 'id': 249, 'def': 'cake containing chocolate', 'name': 'chocolate_cake'}, {'frequency': 'r', 'synset': 'chocolate_milk.n.01', 'synonyms': ['chocolate_milk'], 'id': 250, 'def': 'milk flavored with chocolate syrup', 'name': 'chocolate_milk'}, {'frequency': 'r', 'synset': 'chocolate_mousse.n.01', 'synonyms': ['chocolate_mousse'], 'id': 251, 'def': 'dessert mousse made with chocolate', 'name': 'chocolate_mousse'}, {'frequency': 'f', 'synset': 'choker.n.03', 'synonyms': ['choker', 'collar', 'neckband'], 'id': 252, 'def': 'shirt collar, animal collar, or tight-fitting necklace', 'name': 'choker'}, {'frequency': 'f', 'synset': 'chopping_board.n.01', 'synonyms': ['chopping_board', 'cutting_board', 'chopping_block'], 'id': 253, 'def': 'a wooden board where meats or vegetables can be cut', 'name': 'chopping_board'}, {'frequency': 'f', 'synset': 'chopstick.n.01', 'synonyms': ['chopstick'], 'id': 254, 'def': 'one of a pair of slender sticks used as oriental tableware to eat food with', 'name': 'chopstick'}, {'frequency': 'f', 'synset': 'christmas_tree.n.05', 'synonyms': ['Christmas_tree'], 'id': 255, 'def': 'an ornamented evergreen used as a Christmas decoration', 'name': 'Christmas_tree'}, {'frequency': 'c', 'synset': 'chute.n.02', 'synonyms': ['slide'], 'id': 256, 'def': 'sloping channel through which things can descend', 'name': 'slide'}, {'frequency': 'r', 'synset': 'cider.n.01', 'synonyms': ['cider', 'cyder'], 'id': 257, 'def': 'a beverage made from juice pressed from apples', 'name': 'cider'}, {'frequency': 'r', 'synset': 'cigar_box.n.01', 'synonyms': ['cigar_box'], 'id': 258, 'def': 'a box for holding cigars', 'name': 'cigar_box'}, {'frequency': 'f', 'synset': 'cigarette.n.01', 'synonyms': ['cigarette'], 'id': 259, 'def': 'finely ground tobacco wrapped in paper; for smoking', 'name': 'cigarette'}, {'frequency': 'c', 'synset': 'cigarette_case.n.01', 'synonyms': ['cigarette_case', 'cigarette_pack'], 'id': 260, 'def': 'a small flat case for holding cigarettes', 'name': 'cigarette_case'}, {'frequency': 'f', 'synset': 'cistern.n.02', 'synonyms': ['cistern', 'water_tank'], 'id': 261, 'def': 'a tank that holds the water used to flush a toilet', 'name': 'cistern'}, {'frequency': 'r', 'synset': 'clarinet.n.01', 'synonyms': ['clarinet'], 'id': 262, 'def': 'a single-reed instrument with a straight tube', 'name': 'clarinet'}, {'frequency': 'c', 'synset': 'clasp.n.01', 'synonyms': ['clasp'], 'id': 263, 'def': 'a fastener (as a buckle or hook) that is used to hold two things together', 'name': 'clasp'}, {'frequency': 'c', 'synset': 'cleansing_agent.n.01', 'synonyms': ['cleansing_agent', 'cleanser', 'cleaner'], 'id': 264, 'def': 'a preparation used in cleaning something', 'name': 'cleansing_agent'}, {'frequency': 'r', 'synset': 'cleat.n.02', 'synonyms': ['cleat_(for_securing_rope)'], 'id': 265, 'def': 'a fastener (usually with two projecting horns) around which a rope can be secured', 'name': 'cleat_(for_securing_rope)'}, {'frequency': 'r', 'synset': 'clementine.n.01', 'synonyms': ['clementine'], 'id': 266, 'def': 'a variety of mandarin orange', 'name': 'clementine'}, {'frequency': 'c', 'synset': 'clip.n.03', 'synonyms': ['clip'], 'id': 267, 'def': 'any of various small fasteners used to hold loose articles together', 'name': 'clip'}, {'frequency': 'c', 'synset': 'clipboard.n.01', 'synonyms': ['clipboard'], 'id': 268, 'def': 'a small writing board with a clip at the top for holding papers', 'name': 'clipboard'}, {'frequency': 'r', 'synset': 'clipper.n.03', 'synonyms': ['clippers_(for_plants)'], 'id': 269, 'def': 'shears for cutting grass or shrubbery (often used in the plural)', 'name': 'clippers_(for_plants)'}, {'frequency': 'r', 'synset': 'cloak.n.02', 'synonyms': ['cloak'], 'id': 270, 'def': 'a loose outer garment', 'name': 'cloak'}, {'frequency': 'f', 'synset': 'clock.n.01', 'synonyms': ['clock', 'timepiece', 'timekeeper'], 'id': 271, 'def': 'a timepiece that shows the time of day', 'name': 'clock'}, {'frequency': 'f', 'synset': 'clock_tower.n.01', 'synonyms': ['clock_tower'], 'id': 272, 'def': 'a tower with a large clock visible high up on an outside face', 'name': 'clock_tower'}, {'frequency': 'c', 'synset': 'clothes_hamper.n.01', 'synonyms': ['clothes_hamper', 'laundry_basket', 'clothes_basket'], 'id': 273, 'def': 'a hamper that holds dirty clothes to be washed or wet clothes to be dried', 'name': 'clothes_hamper'}, {'frequency': 'c', 'synset': 'clothespin.n.01', 'synonyms': ['clothespin', 'clothes_peg'], 'id': 274, 'def': 'wood or plastic fastener; for holding clothes on a clothesline', 'name': 'clothespin'}, {'frequency': 'r', 'synset': 'clutch_bag.n.01', 'synonyms': ['clutch_bag'], 'id': 275, 'def': "a woman's strapless purse that is carried in the hand", 'name': 'clutch_bag'}, {'frequency': 'f', 'synset': 'coaster.n.03', 'synonyms': ['coaster'], 'id': 276, 'def': 'a covering (plate or mat) that protects the surface of a table', 'name': 'coaster'}, {'frequency': 'f', 'synset': 'coat.n.01', 'synonyms': ['coat'], 'id': 277, 'def': 'an outer garment that has sleeves and covers the body from shoulder down', 'name': 'coat'}, {'frequency': 'c', 'synset': 'coat_hanger.n.01', 'synonyms': ['coat_hanger', 'clothes_hanger', 'dress_hanger'], 'id': 278, 'def': "a hanger that is shaped like a person's shoulders", 'name': 'coat_hanger'}, {'frequency': 'c', 'synset': 'coatrack.n.01', 'synonyms': ['coatrack', 'hatrack'], 'id': 279, 'def': 'a rack with hooks for temporarily holding coats and hats', 'name': 'coatrack'}, {'frequency': 'c', 'synset': 'cock.n.04', 'synonyms': ['cock', 'rooster'], 'id': 280, 'def': 'adult male chicken', 'name': 'cock'}, {'frequency': 'r', 'synset': 'cockroach.n.01', 'synonyms': ['cockroach'], 'id': 281, 'def': 'any of numerous chiefly nocturnal insects; some are domestic pests', 'name': 'cockroach'}, {'frequency': 'r', 'synset': 'cocoa.n.01', 'synonyms': ['cocoa_(beverage)', 'hot_chocolate_(beverage)', 'drinking_chocolate'], 'id': 282, 'def': 'a beverage made from cocoa powder and milk and sugar; usually drunk hot', 'name': 'cocoa_(beverage)'}, {'frequency': 'c', 'synset': 'coconut.n.02', 'synonyms': ['coconut', 'cocoanut'], 'id': 283, 'def': 'large hard-shelled brown oval nut with a fibrous husk', 'name': 'coconut'}, {'frequency': 'f', 'synset': 'coffee_maker.n.01', 'synonyms': ['coffee_maker', 'coffee_machine'], 'id': 284, 'def': 'a kitchen appliance for brewing coffee automatically', 'name': 'coffee_maker'}, {'frequency': 'f', 'synset': 'coffee_table.n.01', 'synonyms': ['coffee_table', 'cocktail_table'], 'id': 285, 'def': 'low table where magazines can be placed and coffee or cocktails are served', 'name': 'coffee_table'}, {'frequency': 'c', 'synset': 'coffeepot.n.01', 'synonyms': ['coffeepot'], 'id': 286, 'def': 'tall pot in which coffee is brewed', 'name': 'coffeepot'}, {'frequency': 'r', 'synset': 'coil.n.05', 'synonyms': ['coil'], 'id': 287, 'def': 'tubing that is wound in a spiral', 'name': 'coil'}, {'frequency': 'c', 'synset': 'coin.n.01', 'synonyms': ['coin'], 'id': 288, 'def': 'a flat metal piece (usually a disc) used as money', 'name': 'coin'}, {'frequency': 'c', 'synset': 'colander.n.01', 'synonyms': ['colander', 'cullender'], 'id': 289, 'def': 'bowl-shaped strainer; used to wash or drain foods', 'name': 'colander'}, {'frequency': 'c', 'synset': 'coleslaw.n.01', 'synonyms': ['coleslaw', 'slaw'], 'id': 290, 'def': 'basically shredded cabbage', 'name': 'coleslaw'}, {'frequency': 'r', 'synset': 'coloring_material.n.01', 'synonyms': ['coloring_material', 'colouring_material'], 'id': 291, 'def': 'any material used for its color', 'name': 'coloring_material'}, {'frequency': 'r', 'synset': 'combination_lock.n.01', 'synonyms': ['combination_lock'], 'id': 292, 'def': 'lock that can be opened only by turning dials in a special sequence', 'name': 'combination_lock'}, {'frequency': 'c', 'synset': 'comforter.n.04', 'synonyms': ['pacifier', 'teething_ring'], 'id': 293, 'def': 'device used for an infant to suck or bite on', 'name': 'pacifier'}, {'frequency': 'r', 'synset': 'comic_book.n.01', 'synonyms': ['comic_book'], 'id': 294, 'def': 'a magazine devoted to comic strips', 'name': 'comic_book'}, {'frequency': 'r', 'synset': 'compass.n.01', 'synonyms': ['compass'], 'id': 295, 'def': 'navigational instrument for finding directions', 'name': 'compass'}, {'frequency': 'f', 'synset': 'computer_keyboard.n.01', 'synonyms': ['computer_keyboard', 'keyboard_(computer)'], 'id': 296, 'def': 'a keyboard that is a data input device for computers', 'name': 'computer_keyboard'}, {'frequency': 'f', 'synset': 'condiment.n.01', 'synonyms': ['condiment'], 'id': 297, 'def': 'a preparation (a sauce or relish or spice) to enhance flavor or enjoyment', 'name': 'condiment'}, {'frequency': 'f', 'synset': 'cone.n.01', 'synonyms': ['cone', 'traffic_cone'], 'id': 298, 'def': 'a cone-shaped object used to direct traffic', 'name': 'cone'}, {'frequency': 'f', 'synset': 'control.n.09', 'synonyms': ['control', 'controller'], 'id': 299, 'def': 'a mechanism that controls the operation of a machine', 'name': 'control'}, {'frequency': 'r', 'synset': 'convertible.n.01', 'synonyms': ['convertible_(automobile)'], 'id': 300, 'def': 'a car that has top that can be folded or removed', 'name': 'convertible_(automobile)'}, {'frequency': 'r', 'synset': 'convertible.n.03', 'synonyms': ['sofa_bed'], 'id': 301, 'def': 'a sofa that can be converted into a bed', 'name': 'sofa_bed'}, {'frequency': 'r', 'synset': 'cooker.n.01', 'synonyms': ['cooker'], 'id': 302, 'def': 'a utensil for cooking', 'name': 'cooker'}, {'frequency': 'f', 'synset': 'cookie.n.01', 'synonyms': ['cookie', 'cooky', 'biscuit_(cookie)'], 'id': 303, 'def': "any of various small flat sweet cakes (`biscuit' is the British term)", 'name': 'cookie'}, {'frequency': 'r', 'synset': 'cooking_utensil.n.01', 'synonyms': ['cooking_utensil'], 'id': 304, 'def': 'a kitchen utensil made of material that does not melt easily; used for cooking', 'name': 'cooking_utensil'}, {'frequency': 'f', 'synset': 'cooler.n.01', 'synonyms': ['cooler_(for_food)', 'ice_chest'], 'id': 305, 'def': 'an insulated box for storing food often with ice', 'name': 'cooler_(for_food)'}, {'frequency': 'f', 'synset': 'cork.n.04', 'synonyms': ['cork_(bottle_plug)', 'bottle_cork'], 'id': 306, 'def': 'the plug in the mouth of a bottle (especially a wine bottle)', 'name': 'cork_(bottle_plug)'}, {'frequency': 'r', 'synset': 'corkboard.n.01', 'synonyms': ['corkboard'], 'id': 307, 'def': 'a sheet consisting of cork granules', 'name': 'corkboard'}, {'frequency': 'c', 'synset': 'corkscrew.n.01', 'synonyms': ['corkscrew', 'bottle_screw'], 'id': 308, 'def': 'a bottle opener that pulls corks', 'name': 'corkscrew'}, {'frequency': 'f', 'synset': 'corn.n.03', 'synonyms': ['edible_corn', 'corn', 'maize'], 'id': 309, 'def': 'ears or kernels of corn that can be prepared and served for human food (only mark individual ears or kernels)', 'name': 'edible_corn'}, {'frequency': 'r', 'synset': 'cornbread.n.01', 'synonyms': ['cornbread'], 'id': 310, 'def': 'bread made primarily of cornmeal', 'name': 'cornbread'}, {'frequency': 'c', 'synset': 'cornet.n.01', 'synonyms': ['cornet', 'horn', 'trumpet'], 'id': 311, 'def': 'a brass musical instrument with a narrow tube and a flared bell and many valves', 'name': 'cornet'}, {'frequency': 'c', 'synset': 'cornice.n.01', 'synonyms': ['cornice', 'valance', 'valance_board', 'pelmet'], 'id': 312, 'def': 'a decorative framework to conceal curtain fixtures at the top of a window casing', 'name': 'cornice'}, {'frequency': 'r', 'synset': 'cornmeal.n.01', 'synonyms': ['cornmeal'], 'id': 313, 'def': 'coarsely ground corn', 'name': 'cornmeal'}, {'frequency': 'c', 'synset': 'corset.n.01', 'synonyms': ['corset', 'girdle'], 'id': 314, 'def': "a woman's close-fitting foundation garment", 'name': 'corset'}, {'frequency': 'c', 'synset': 'costume.n.04', 'synonyms': ['costume'], 'id': 315, 'def': 'the attire characteristic of a country or a time or a social class', 'name': 'costume'}, {'frequency': 'r', 'synset': 'cougar.n.01', 'synonyms': ['cougar', 'puma', 'catamount', 'mountain_lion', 'panther'], 'id': 316, 'def': 'large American feline resembling a lion', 'name': 'cougar'}, {'frequency': 'r', 'synset': 'coverall.n.01', 'synonyms': ['coverall'], 'id': 317, 'def': 'a loose-fitting protective garment that is worn over other clothing', 'name': 'coverall'}, {'frequency': 'c', 'synset': 'cowbell.n.01', 'synonyms': ['cowbell'], 'id': 318, 'def': 'a bell hung around the neck of cow so that the cow can be easily located', 'name': 'cowbell'}, {'frequency': 'f', 'synset': 'cowboy_hat.n.01', 'synonyms': ['cowboy_hat', 'ten-gallon_hat'], 'id': 319, 'def': 'a hat with a wide brim and a soft crown; worn by American ranch hands', 'name': 'cowboy_hat'}, {'frequency': 'c', 'synset': 'crab.n.01', 'synonyms': ['crab_(animal)'], 'id': 320, 'def': 'decapod having eyes on short stalks and a broad flattened shell and pincers', 'name': 'crab_(animal)'}, {'frequency': 'r', 'synset': 'crab.n.05', 'synonyms': ['crabmeat'], 'id': 321, 'def': 'the edible flesh of any of various crabs', 'name': 'crabmeat'}, {'frequency': 'c', 'synset': 'cracker.n.01', 'synonyms': ['cracker'], 'id': 322, 'def': 'a thin crisp wafer', 'name': 'cracker'}, {'frequency': 'r', 'synset': 'crape.n.01', 'synonyms': ['crape', 'crepe', 'French_pancake'], 'id': 323, 'def': 'small very thin pancake', 'name': 'crape'}, {'frequency': 'f', 'synset': 'crate.n.01', 'synonyms': ['crate'], 'id': 324, 'def': 'a rugged box (usually made of wood); used for shipping', 'name': 'crate'}, {'frequency': 'c', 'synset': 'crayon.n.01', 'synonyms': ['crayon', 'wax_crayon'], 'id': 325, 'def': 'writing or drawing implement made of a colored stick of composition wax', 'name': 'crayon'}, {'frequency': 'r', 'synset': 'cream_pitcher.n.01', 'synonyms': ['cream_pitcher'], 'id': 326, 'def': 'a small pitcher for serving cream', 'name': 'cream_pitcher'}, {'frequency': 'c', 'synset': 'crescent_roll.n.01', 'synonyms': ['crescent_roll', 'croissant'], 'id': 327, 'def': 'very rich flaky crescent-shaped roll', 'name': 'crescent_roll'}, {'frequency': 'c', 'synset': 'crib.n.01', 'synonyms': ['crib', 'cot'], 'id': 328, 'def': 'baby bed with high sides made of slats', 'name': 'crib'}, {'frequency': 'c', 'synset': 'crock.n.03', 'synonyms': ['crock_pot', 'earthenware_jar'], 'id': 329, 'def': 'an earthen jar (made of baked clay) or a modern electric crockpot', 'name': 'crock_pot'}, {'frequency': 'f', 'synset': 'crossbar.n.01', 'synonyms': ['crossbar'], 'id': 330, 'def': 'a horizontal bar that goes across something', 'name': 'crossbar'}, {'frequency': 'r', 'synset': 'crouton.n.01', 'synonyms': ['crouton'], 'id': 331, 'def': 'a small piece of toasted or fried bread; served in soup or salads', 'name': 'crouton'}, {'frequency': 'c', 'synset': 'crow.n.01', 'synonyms': ['crow'], 'id': 332, 'def': 'black birds having a raucous call', 'name': 'crow'}, {'frequency': 'r', 'synset': 'crowbar.n.01', 'synonyms': ['crowbar', 'wrecking_bar', 'pry_bar'], 'id': 333, 'def': 'a heavy iron lever with one end forged into a wedge', 'name': 'crowbar'}, {'frequency': 'c', 'synset': 'crown.n.04', 'synonyms': ['crown'], 'id': 334, 'def': 'an ornamental jeweled headdress signifying sovereignty', 'name': 'crown'}, {'frequency': 'c', 'synset': 'crucifix.n.01', 'synonyms': ['crucifix'], 'id': 335, 'def': 'representation of the cross on which Jesus died', 'name': 'crucifix'}, {'frequency': 'c', 'synset': 'cruise_ship.n.01', 'synonyms': ['cruise_ship', 'cruise_liner'], 'id': 336, 'def': 'a passenger ship used commercially for pleasure cruises', 'name': 'cruise_ship'}, {'frequency': 'c', 'synset': 'cruiser.n.01', 'synonyms': ['police_cruiser', 'patrol_car', 'police_car', 'squad_car'], 'id': 337, 'def': 'a car in which policemen cruise the streets', 'name': 'police_cruiser'}, {'frequency': 'f', 'synset': 'crumb.n.03', 'synonyms': ['crumb'], 'id': 338, 'def': 'small piece of e.g. bread or cake', 'name': 'crumb'}, {'frequency': 'c', 'synset': 'crutch.n.01', 'synonyms': ['crutch'], 'id': 339, 'def': 'a wooden or metal staff that fits under the armpit and reaches to the ground', 'name': 'crutch'}, {'frequency': 'c', 'synset': 'cub.n.03', 'synonyms': ['cub_(animal)'], 'id': 340, 'def': 'the young of certain carnivorous mammals such as the bear or wolf or lion', 'name': 'cub_(animal)'}, {'frequency': 'c', 'synset': 'cube.n.05', 'synonyms': ['cube', 'square_block'], 'id': 341, 'def': 'a block in the (approximate) shape of a cube', 'name': 'cube'}, {'frequency': 'f', 'synset': 'cucumber.n.02', 'synonyms': ['cucumber', 'cuke'], 'id': 342, 'def': 'cylindrical green fruit with thin green rind and white flesh eaten as a vegetable', 'name': 'cucumber'}, {'frequency': 'c', 'synset': 'cufflink.n.01', 'synonyms': ['cufflink'], 'id': 343, 'def': 'jewelry consisting of linked buttons used to fasten the cuffs of a shirt', 'name': 'cufflink'}, {'frequency': 'f', 'synset': 'cup.n.01', 'synonyms': ['cup'], 'id': 344, 'def': 'a small open container usually used for drinking; usually has a handle', 'name': 'cup'}, {'frequency': 'c', 'synset': 'cup.n.08', 'synonyms': ['trophy_cup'], 'id': 345, 'def': 'a metal award or cup-shaped vessel with handles that is awarded as a trophy to a competition winner', 'name': 'trophy_cup'}, {'frequency': 'f', 'synset': 'cupboard.n.01', 'synonyms': ['cupboard', 'closet'], 'id': 346, 'def': 'a small room (or recess) or cabinet used for storage space', 'name': 'cupboard'}, {'frequency': 'f', 'synset': 'cupcake.n.01', 'synonyms': ['cupcake'], 'id': 347, 'def': 'small cake baked in a muffin tin', 'name': 'cupcake'}, {'frequency': 'r', 'synset': 'curler.n.01', 'synonyms': ['hair_curler', 'hair_roller', 'hair_crimper'], 'id': 348, 'def': 'a cylindrical tube around which the hair is wound to curl it', 'name': 'hair_curler'}, {'frequency': 'r', 'synset': 'curling_iron.n.01', 'synonyms': ['curling_iron'], 'id': 349, 'def': 'a cylindrical home appliance that heats hair that has been curled around it', 'name': 'curling_iron'}, {'frequency': 'f', 'synset': 'curtain.n.01', 'synonyms': ['curtain', 'drapery'], 'id': 350, 'def': 'hanging cloth used as a blind (especially for a window)', 'name': 'curtain'}, {'frequency': 'f', 'synset': 'cushion.n.03', 'synonyms': ['cushion'], 'id': 351, 'def': 'a soft bag filled with air or padding such as feathers or foam rubber', 'name': 'cushion'}, {'frequency': 'r', 'synset': 'cylinder.n.04', 'synonyms': ['cylinder'], 'id': 352, 'def': 'a cylindrical container', 'name': 'cylinder'}, {'frequency': 'r', 'synset': 'cymbal.n.01', 'synonyms': ['cymbal'], 'id': 353, 'def': 'a percussion instrument consisting of a concave brass disk', 'name': 'cymbal'}, {'frequency': 'r', 'synset': 'dagger.n.01', 'synonyms': ['dagger'], 'id': 354, 'def': 'a short knife with a pointed blade used for piercing or stabbing', 'name': 'dagger'}, {'frequency': 'r', 'synset': 'dalmatian.n.02', 'synonyms': ['dalmatian'], 'id': 355, 'def': 'a large breed having a smooth white coat with black or brown spots', 'name': 'dalmatian'}, {'frequency': 'c', 'synset': 'dartboard.n.01', 'synonyms': ['dartboard'], 'id': 356, 'def': 'a circular board of wood or cork used as the target in the game of darts', 'name': 'dartboard'}, {'frequency': 'r', 'synset': 'date.n.08', 'synonyms': ['date_(fruit)'], 'id': 357, 'def': 'sweet edible fruit of the date palm with a single long woody seed', 'name': 'date_(fruit)'}, {'frequency': 'f', 'synset': 'deck_chair.n.01', 'synonyms': ['deck_chair', 'beach_chair'], 'id': 358, 'def': 'a folding chair for use outdoors; a wooden frame supports a length of canvas', 'name': 'deck_chair'}, {'frequency': 'c', 'synset': 'deer.n.01', 'synonyms': ['deer', 'cervid'], 'id': 359, 'def': "distinguished from Bovidae by the male's having solid deciduous antlers", 'name': 'deer'}, {'frequency': 'c', 'synset': 'dental_floss.n.01', 'synonyms': ['dental_floss', 'floss'], 'id': 360, 'def': 'a soft thread for cleaning the spaces between the teeth', 'name': 'dental_floss'}, {'frequency': 'f', 'synset': 'desk.n.01', 'synonyms': ['desk'], 'id': 361, 'def': 'a piece of furniture with a writing surface and usually drawers or other compartments', 'name': 'desk'}, {'frequency': 'r', 'synset': 'detergent.n.01', 'synonyms': ['detergent'], 'id': 362, 'def': 'a surface-active chemical widely used in industry and laundering', 'name': 'detergent'}, {'frequency': 'c', 'synset': 'diaper.n.01', 'synonyms': ['diaper'], 'id': 363, 'def': 'garment consisting of a folded cloth drawn up between the legs and fastened at the waist', 'name': 'diaper'}, {'frequency': 'r', 'synset': 'diary.n.01', 'synonyms': ['diary', 'journal'], 'id': 364, 'def': 'yearly planner book', 'name': 'diary'}, {'frequency': 'r', 'synset': 'die.n.01', 'synonyms': ['die', 'dice'], 'id': 365, 'def': 'a small cube with 1 to 6 spots on the six faces; used in gambling', 'name': 'die'}, {'frequency': 'r', 'synset': 'dinghy.n.01', 'synonyms': ['dinghy', 'dory', 'rowboat'], 'id': 366, 'def': 'a small boat of shallow draft with seats and oars with which it is propelled', 'name': 'dinghy'}, {'frequency': 'f', 'synset': 'dining_table.n.01', 'synonyms': ['dining_table'], 'id': 367, 'def': 'a table at which meals are served', 'name': 'dining_table'}, {'frequency': 'r', 'synset': 'dinner_jacket.n.01', 'synonyms': ['tux', 'tuxedo'], 'id': 368, 'def': 'semiformal evening dress for men', 'name': 'tux'}, {'frequency': 'f', 'synset': 'dish.n.01', 'synonyms': ['dish'], 'id': 369, 'def': 'a piece of dishware normally used as a container for holding or serving food', 'name': 'dish'}, {'frequency': 'c', 'synset': 'dish.n.05', 'synonyms': ['dish_antenna'], 'id': 370, 'def': 'directional antenna consisting of a parabolic reflector', 'name': 'dish_antenna'}, {'frequency': 'c', 'synset': 'dishrag.n.01', 'synonyms': ['dishrag', 'dishcloth'], 'id': 371, 'def': 'a cloth for washing dishes or cleaning in general', 'name': 'dishrag'}, {'frequency': 'f', 'synset': 'dishtowel.n.01', 'synonyms': ['dishtowel', 'tea_towel'], 'id': 372, 'def': 'a towel for drying dishes', 'name': 'dishtowel'}, {'frequency': 'f', 'synset': 'dishwasher.n.01', 'synonyms': ['dishwasher', 'dishwashing_machine'], 'id': 373, 'def': 'a machine for washing dishes', 'name': 'dishwasher'}, {'frequency': 'r', 'synset': 'dishwasher_detergent.n.01', 'synonyms': ['dishwasher_detergent', 'dishwashing_detergent', 'dishwashing_liquid', 'dishsoap'], 'id': 374, 'def': 'dishsoap or dish detergent designed for use in dishwashers', 'name': 'dishwasher_detergent'}, {'frequency': 'f', 'synset': 'dispenser.n.01', 'synonyms': ['dispenser'], 'id': 375, 'def': 'a container so designed that the contents can be used in prescribed amounts', 'name': 'dispenser'}, {'frequency': 'r', 'synset': 'diving_board.n.01', 'synonyms': ['diving_board'], 'id': 376, 'def': 'a springboard from which swimmers can dive', 'name': 'diving_board'}, {'frequency': 'f', 'synset': 'dixie_cup.n.01', 'synonyms': ['Dixie_cup', 'paper_cup'], 'id': 377, 'def': 'a disposable cup made of paper; for holding drinks', 'name': 'Dixie_cup'}, {'frequency': 'f', 'synset': 'dog.n.01', 'synonyms': ['dog'], 'id': 378, 'def': 'a common domesticated dog', 'name': 'dog'}, {'frequency': 'f', 'synset': 'dog_collar.n.01', 'synonyms': ['dog_collar'], 'id': 379, 'def': 'a collar for a dog', 'name': 'dog_collar'}, {'frequency': 'f', 'synset': 'doll.n.01', 'synonyms': ['doll'], 'id': 380, 'def': 'a toy replica of a HUMAN (NOT AN ANIMAL)', 'name': 'doll'}, {'frequency': 'r', 'synset': 'dollar.n.02', 'synonyms': ['dollar', 'dollar_bill', 'one_dollar_bill'], 'id': 381, 'def': 'a piece of paper money worth one dollar', 'name': 'dollar'}, {'frequency': 'r', 'synset': 'dollhouse.n.01', 'synonyms': ['dollhouse', "doll's_house"], 'id': 382, 'def': "a house so small that it is likened to a child's plaything", 'name': 'dollhouse'}, {'frequency': 'c', 'synset': 'dolphin.n.02', 'synonyms': ['dolphin'], 'id': 383, 'def': 'any of various small toothed whales with a beaklike snout; larger than porpoises', 'name': 'dolphin'}, {'frequency': 'c', 'synset': 'domestic_ass.n.01', 'synonyms': ['domestic_ass', 'donkey'], 'id': 384, 'def': 'domestic beast of burden descended from the African wild ass; patient but stubborn', 'name': 'domestic_ass'}, {'frequency': 'f', 'synset': 'doorknob.n.01', 'synonyms': ['doorknob', 'doorhandle'], 'id': 385, 'def': "a knob used to open a door (often called `doorhandle' in Great Britain)", 'name': 'doorknob'}, {'frequency': 'c', 'synset': 'doormat.n.02', 'synonyms': ['doormat', 'welcome_mat'], 'id': 386, 'def': 'a mat placed outside an exterior door for wiping the shoes before entering', 'name': 'doormat'}, {'frequency': 'f', 'synset': 'doughnut.n.02', 'synonyms': ['doughnut', 'donut'], 'id': 387, 'def': 'a small ring-shaped friedcake', 'name': 'doughnut'}, {'frequency': 'r', 'synset': 'dove.n.01', 'synonyms': ['dove'], 'id': 388, 'def': 'any of numerous small pigeons', 'name': 'dove'}, {'frequency': 'r', 'synset': 'dragonfly.n.01', 'synonyms': ['dragonfly'], 'id': 389, 'def': 'slender-bodied non-stinging insect having iridescent wings that are outspread at rest', 'name': 'dragonfly'}, {'frequency': 'f', 'synset': 'drawer.n.01', 'synonyms': ['drawer'], 'id': 390, 'def': 'a boxlike container in a piece of furniture; made so as to slide in and out', 'name': 'drawer'}, {'frequency': 'c', 'synset': 'drawers.n.01', 'synonyms': ['underdrawers', 'boxers', 'boxershorts'], 'id': 391, 'def': 'underpants worn by men', 'name': 'underdrawers'}, {'frequency': 'f', 'synset': 'dress.n.01', 'synonyms': ['dress', 'frock'], 'id': 392, 'def': 'a one-piece garment for a woman; has skirt and bodice', 'name': 'dress'}, {'frequency': 'c', 'synset': 'dress_hat.n.01', 'synonyms': ['dress_hat', 'high_hat', 'opera_hat', 'silk_hat', 'top_hat'], 'id': 393, 'def': "a man's hat with a tall crown; usually covered with silk or with beaver fur", 'name': 'dress_hat'}, {'frequency': 'f', 'synset': 'dress_suit.n.01', 'synonyms': ['dress_suit'], 'id': 394, 'def': 'formalwear consisting of full evening dress for men', 'name': 'dress_suit'}, {'frequency': 'f', 'synset': 'dresser.n.05', 'synonyms': ['dresser'], 'id': 395, 'def': 'a cabinet with shelves', 'name': 'dresser'}, {'frequency': 'c', 'synset': 'drill.n.01', 'synonyms': ['drill'], 'id': 396, 'def': 'a tool with a sharp rotating point for making holes in hard materials', 'name': 'drill'}, {'frequency': 'r', 'synset': 'drone.n.04', 'synonyms': ['drone'], 'id': 397, 'def': 'an aircraft without a pilot that is operated by remote control', 'name': 'drone'}, {'frequency': 'r', 'synset': 'dropper.n.01', 'synonyms': ['dropper', 'eye_dropper'], 'id': 398, 'def': 'pipet consisting of a small tube with a vacuum bulb at one end for drawing liquid in and releasing it a drop at a time', 'name': 'dropper'}, {'frequency': 'c', 'synset': 'drum.n.01', 'synonyms': ['drum_(musical_instrument)'], 'id': 399, 'def': 'a musical percussion instrument; usually consists of a hollow cylinder with a membrane stretched across each end', 'name': 'drum_(musical_instrument)'}, {'frequency': 'r', 'synset': 'drumstick.n.02', 'synonyms': ['drumstick'], 'id': 400, 'def': 'a stick used for playing a drum', 'name': 'drumstick'}, {'frequency': 'f', 'synset': 'duck.n.01', 'synonyms': ['duck'], 'id': 401, 'def': 'small web-footed broad-billed swimming bird', 'name': 'duck'}, {'frequency': 'c', 'synset': 'duckling.n.02', 'synonyms': ['duckling'], 'id': 402, 'def': 'young duck', 'name': 'duckling'}, {'frequency': 'c', 'synset': 'duct_tape.n.01', 'synonyms': ['duct_tape'], 'id': 403, 'def': 'a wide silvery adhesive tape', 'name': 'duct_tape'}, {'frequency': 'f', 'synset': 'duffel_bag.n.01', 'synonyms': ['duffel_bag', 'duffle_bag', 'duffel', 'duffle'], 'id': 404, 'def': 'a large cylindrical bag of heavy cloth (does not include suitcases)', 'name': 'duffel_bag'}, {'frequency': 'r', 'synset': 'dumbbell.n.01', 'synonyms': ['dumbbell'], 'id': 405, 'def': 'an exercising weight with two ball-like ends connected by a short handle', 'name': 'dumbbell'}, {'frequency': 'c', 'synset': 'dumpster.n.01', 'synonyms': ['dumpster'], 'id': 406, 'def': 'a container designed to receive and transport and dump waste', 'name': 'dumpster'}, {'frequency': 'r', 'synset': 'dustpan.n.02', 'synonyms': ['dustpan'], 'id': 407, 'def': 'a short-handled receptacle into which dust can be swept', 'name': 'dustpan'}, {'frequency': 'c', 'synset': 'eagle.n.01', 'synonyms': ['eagle'], 'id': 408, 'def': 'large birds of prey noted for their broad wings and strong soaring flight', 'name': 'eagle'}, {'frequency': 'f', 'synset': 'earphone.n.01', 'synonyms': ['earphone', 'earpiece', 'headphone'], 'id': 409, 'def': 'device for listening to audio that is held over or inserted into the ear', 'name': 'earphone'}, {'frequency': 'r', 'synset': 'earplug.n.01', 'synonyms': ['earplug'], 'id': 410, 'def': 'a soft plug that is inserted into the ear canal to block sound', 'name': 'earplug'}, {'frequency': 'f', 'synset': 'earring.n.01', 'synonyms': ['earring'], 'id': 411, 'def': 'jewelry to ornament the ear', 'name': 'earring'}, {'frequency': 'c', 'synset': 'easel.n.01', 'synonyms': ['easel'], 'id': 412, 'def': "an upright tripod for displaying something (usually an artist's canvas)", 'name': 'easel'}, {'frequency': 'r', 'synset': 'eclair.n.01', 'synonyms': ['eclair'], 'id': 413, 'def': 'oblong cream puff', 'name': 'eclair'}, {'frequency': 'r', 'synset': 'eel.n.01', 'synonyms': ['eel'], 'id': 414, 'def': 'an elongate fish with fatty flesh', 'name': 'eel'}, {'frequency': 'f', 'synset': 'egg.n.02', 'synonyms': ['egg', 'eggs'], 'id': 415, 'def': 'oval reproductive body of a fowl (especially a hen) used as food', 'name': 'egg'}, {'frequency': 'r', 'synset': 'egg_roll.n.01', 'synonyms': ['egg_roll', 'spring_roll'], 'id': 416, 'def': 'minced vegetables and meat wrapped in a pancake and fried', 'name': 'egg_roll'}, {'frequency': 'c', 'synset': 'egg_yolk.n.01', 'synonyms': ['egg_yolk', 'yolk_(egg)'], 'id': 417, 'def': 'the yellow spherical part of an egg', 'name': 'egg_yolk'}, {'frequency': 'c', 'synset': 'eggbeater.n.02', 'synonyms': ['eggbeater', 'eggwhisk'], 'id': 418, 'def': 'a mixer for beating eggs or whipping cream', 'name': 'eggbeater'}, {'frequency': 'c', 'synset': 'eggplant.n.01', 'synonyms': ['eggplant', 'aubergine'], 'id': 419, 'def': 'egg-shaped vegetable having a shiny skin typically dark purple', 'name': 'eggplant'}, {'frequency': 'r', 'synset': 'electric_chair.n.01', 'synonyms': ['electric_chair'], 'id': 420, 'def': 'a chair-shaped instrument of execution by electrocution', 'name': 'electric_chair'}, {'frequency': 'f', 'synset': 'electric_refrigerator.n.01', 'synonyms': ['refrigerator'], 'id': 421, 'def': 'a refrigerator in which the coolant is pumped around by an electric motor', 'name': 'refrigerator'}, {'frequency': 'f', 'synset': 'elephant.n.01', 'synonyms': ['elephant'], 'id': 422, 'def': 'a common elephant', 'name': 'elephant'}, {'frequency': 'c', 'synset': 'elk.n.01', 'synonyms': ['elk', 'moose'], 'id': 423, 'def': 'large northern deer with enormous flattened antlers in the male', 'name': 'elk'}, {'frequency': 'c', 'synset': 'envelope.n.01', 'synonyms': ['envelope'], 'id': 424, 'def': 'a flat (usually rectangular) container for a letter, thin package, etc.', 'name': 'envelope'}, {'frequency': 'c', 'synset': 'eraser.n.01', 'synonyms': ['eraser'], 'id': 425, 'def': 'an implement used to erase something', 'name': 'eraser'}, {'frequency': 'r', 'synset': 'escargot.n.01', 'synonyms': ['escargot'], 'id': 426, 'def': 'edible snail usually served in the shell with a sauce of melted butter and garlic', 'name': 'escargot'}, {'frequency': 'r', 'synset': 'eyepatch.n.01', 'synonyms': ['eyepatch'], 'id': 427, 'def': 'a protective cloth covering for an injured eye', 'name': 'eyepatch'}, {'frequency': 'r', 'synset': 'falcon.n.01', 'synonyms': ['falcon'], 'id': 428, 'def': 'birds of prey having long pointed powerful wings adapted for swift flight', 'name': 'falcon'}, {'frequency': 'f', 'synset': 'fan.n.01', 'synonyms': ['fan'], 'id': 429, 'def': 'a device for creating a current of air by movement of a surface or surfaces', 'name': 'fan'}, {'frequency': 'f', 'synset': 'faucet.n.01', 'synonyms': ['faucet', 'spigot', 'tap'], 'id': 430, 'def': 'a regulator for controlling the flow of a liquid from a reservoir', 'name': 'faucet'}, {'frequency': 'r', 'synset': 'fedora.n.01', 'synonyms': ['fedora'], 'id': 431, 'def': 'a hat made of felt with a creased crown', 'name': 'fedora'}, {'frequency': 'r', 'synset': 'ferret.n.02', 'synonyms': ['ferret'], 'id': 432, 'def': 'domesticated albino variety of the European polecat bred for hunting rats and rabbits', 'name': 'ferret'}, {'frequency': 'c', 'synset': 'ferris_wheel.n.01', 'synonyms': ['Ferris_wheel'], 'id': 433, 'def': 'a large wheel with suspended seats that remain upright as the wheel rotates', 'name': 'Ferris_wheel'}, {'frequency': 'c', 'synset': 'ferry.n.01', 'synonyms': ['ferry', 'ferryboat'], 'id': 434, 'def': 'a boat that transports people or vehicles across a body of water and operates on a regular schedule', 'name': 'ferry'}, {'frequency': 'r', 'synset': 'fig.n.04', 'synonyms': ['fig_(fruit)'], 'id': 435, 'def': 'fleshy sweet pear-shaped yellowish or purple fruit eaten fresh or preserved or dried', 'name': 'fig_(fruit)'}, {'frequency': 'c', 'synset': 'fighter.n.02', 'synonyms': ['fighter_jet', 'fighter_aircraft', 'attack_aircraft'], 'id': 436, 'def': 'a high-speed military or naval airplane designed to destroy enemy targets', 'name': 'fighter_jet'}, {'frequency': 'f', 'synset': 'figurine.n.01', 'synonyms': ['figurine'], 'id': 437, 'def': 'a small carved or molded figure', 'name': 'figurine'}, {'frequency': 'c', 'synset': 'file.n.03', 'synonyms': ['file_cabinet', 'filing_cabinet'], 'id': 438, 'def': 'office furniture consisting of a container for keeping papers in order', 'name': 'file_cabinet'}, {'frequency': 'r', 'synset': 'file.n.04', 'synonyms': ['file_(tool)'], 'id': 439, 'def': 'a steel hand tool with small sharp teeth on some or all of its surfaces; used for smoothing wood or metal', 'name': 'file_(tool)'}, {'frequency': 'f', 'synset': 'fire_alarm.n.02', 'synonyms': ['fire_alarm', 'smoke_alarm'], 'id': 440, 'def': 'an alarm that is tripped off by fire or smoke', 'name': 'fire_alarm'}, {'frequency': 'f', 'synset': 'fire_engine.n.01', 'synonyms': ['fire_engine', 'fire_truck'], 'id': 441, 'def': 'large trucks that carry firefighters and equipment to the site of a fire', 'name': 'fire_engine'}, {'frequency': 'f', 'synset': 'fire_extinguisher.n.01', 'synonyms': ['fire_extinguisher', 'extinguisher'], 'id': 442, 'def': 'a manually operated device for extinguishing small fires', 'name': 'fire_extinguisher'}, {'frequency': 'c', 'synset': 'fire_hose.n.01', 'synonyms': ['fire_hose'], 'id': 443, 'def': 'a large hose that carries water from a fire hydrant to the site of the fire', 'name': 'fire_hose'}, {'frequency': 'f', 'synset': 'fireplace.n.01', 'synonyms': ['fireplace'], 'id': 444, 'def': 'an open recess in a wall at the base of a chimney where a fire can be built', 'name': 'fireplace'}, {'frequency': 'f', 'synset': 'fireplug.n.01', 'synonyms': ['fireplug', 'fire_hydrant', 'hydrant'], 'id': 445, 'def': 'an upright hydrant for drawing water to use in fighting a fire', 'name': 'fireplug'}, {'frequency': 'r', 'synset': 'first-aid_kit.n.01', 'synonyms': ['first-aid_kit'], 'id': 446, 'def': 'kit consisting of a set of bandages and medicines for giving first aid', 'name': 'first-aid_kit'}, {'frequency': 'f', 'synset': 'fish.n.01', 'synonyms': ['fish'], 'id': 447, 'def': 'any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills', 'name': 'fish'}, {'frequency': 'c', 'synset': 'fish.n.02', 'synonyms': ['fish_(food)'], 'id': 448, 'def': 'the flesh of fish used as food', 'name': 'fish_(food)'}, {'frequency': 'r', 'synset': 'fishbowl.n.02', 'synonyms': ['fishbowl', 'goldfish_bowl'], 'id': 449, 'def': 'a transparent bowl in which small fish are kept', 'name': 'fishbowl'}, {'frequency': 'c', 'synset': 'fishing_rod.n.01', 'synonyms': ['fishing_rod', 'fishing_pole'], 'id': 450, 'def': 'a rod that is used in fishing to extend the fishing line', 'name': 'fishing_rod'}, {'frequency': 'f', 'synset': 'flag.n.01', 'synonyms': ['flag'], 'id': 451, 'def': 'emblem usually consisting of a rectangular piece of cloth of distinctive design (do not include pole)', 'name': 'flag'}, {'frequency': 'f', 'synset': 'flagpole.n.02', 'synonyms': ['flagpole', 'flagstaff'], 'id': 452, 'def': 'a tall staff or pole on which a flag is raised', 'name': 'flagpole'}, {'frequency': 'c', 'synset': 'flamingo.n.01', 'synonyms': ['flamingo'], 'id': 453, 'def': 'large pink web-footed bird with down-bent bill', 'name': 'flamingo'}, {'frequency': 'c', 'synset': 'flannel.n.01', 'synonyms': ['flannel'], 'id': 454, 'def': 'a soft light woolen fabric; used for clothing', 'name': 'flannel'}, {'frequency': 'c', 'synset': 'flap.n.01', 'synonyms': ['flap'], 'id': 455, 'def': 'any broad thin covering attached at one edge, such as a mud flap next to a wheel or a flap on an airplane wing', 'name': 'flap'}, {'frequency': 'r', 'synset': 'flash.n.10', 'synonyms': ['flash', 'flashbulb'], 'id': 456, 'def': 'a lamp for providing momentary light to take a photograph', 'name': 'flash'}, {'frequency': 'c', 'synset': 'flashlight.n.01', 'synonyms': ['flashlight', 'torch'], 'id': 457, 'def': 'a small portable battery-powered electric lamp', 'name': 'flashlight'}, {'frequency': 'r', 'synset': 'fleece.n.03', 'synonyms': ['fleece'], 'id': 458, 'def': 'a soft bulky fabric with deep pile; used chiefly for clothing', 'name': 'fleece'}, {'frequency': 'f', 'synset': 'flip-flop.n.02', 'synonyms': ['flip-flop_(sandal)'], 'id': 459, 'def': 'a backless sandal held to the foot by a thong between two toes', 'name': 'flip-flop_(sandal)'}, {'frequency': 'c', 'synset': 'flipper.n.01', 'synonyms': ['flipper_(footwear)', 'fin_(footwear)'], 'id': 460, 'def': 'a shoe to aid a person in swimming', 'name': 'flipper_(footwear)'}, {'frequency': 'f', 'synset': 'flower_arrangement.n.01', 'synonyms': ['flower_arrangement', 'floral_arrangement'], 'id': 461, 'def': 'a decorative arrangement of flowers', 'name': 'flower_arrangement'}, {'frequency': 'c', 'synset': 'flute.n.02', 'synonyms': ['flute_glass', 'champagne_flute'], 'id': 462, 'def': 'a tall narrow wineglass', 'name': 'flute_glass'}, {'frequency': 'c', 'synset': 'foal.n.01', 'synonyms': ['foal'], 'id': 463, 'def': 'a young horse', 'name': 'foal'}, {'frequency': 'c', 'synset': 'folding_chair.n.01', 'synonyms': ['folding_chair'], 'id': 464, 'def': 'a chair that can be folded flat for storage', 'name': 'folding_chair'}, {'frequency': 'c', 'synset': 'food_processor.n.01', 'synonyms': ['food_processor'], 'id': 465, 'def': 'a kitchen appliance for shredding, blending, chopping, or slicing food', 'name': 'food_processor'}, {'frequency': 'c', 'synset': 'football.n.02', 'synonyms': ['football_(American)'], 'id': 466, 'def': 'the inflated oblong ball used in playing American football', 'name': 'football_(American)'}, {'frequency': 'r', 'synset': 'football_helmet.n.01', 'synonyms': ['football_helmet'], 'id': 467, 'def': 'a padded helmet with a face mask to protect the head of football players', 'name': 'football_helmet'}, {'frequency': 'c', 'synset': 'footstool.n.01', 'synonyms': ['footstool', 'footrest'], 'id': 468, 'def': 'a low seat or a stool to rest the feet of a seated person', 'name': 'footstool'}, {'frequency': 'f', 'synset': 'fork.n.01', 'synonyms': ['fork'], 'id': 469, 'def': 'cutlery used for serving and eating food', 'name': 'fork'}, {'frequency': 'c', 'synset': 'forklift.n.01', 'synonyms': ['forklift'], 'id': 470, 'def': 'an industrial vehicle with a power operated fork in front that can be inserted under loads to lift and move them', 'name': 'forklift'}, {'frequency': 'c', 'synset': 'freight_car.n.01', 'synonyms': ['freight_car'], 'id': 471, 'def': 'a railway car that carries freight', 'name': 'freight_car'}, {'frequency': 'c', 'synset': 'french_toast.n.01', 'synonyms': ['French_toast'], 'id': 472, 'def': 'bread slice dipped in egg and milk and fried', 'name': 'French_toast'}, {'frequency': 'c', 'synset': 'freshener.n.01', 'synonyms': ['freshener', 'air_freshener'], 'id': 473, 'def': 'anything that freshens air by removing or covering odor', 'name': 'freshener'}, {'frequency': 'f', 'synset': 'frisbee.n.01', 'synonyms': ['frisbee'], 'id': 474, 'def': 'a light, plastic disk propelled with a flip of the wrist for recreation or competition', 'name': 'frisbee'}, {'frequency': 'c', 'synset': 'frog.n.01', 'synonyms': ['frog', 'toad', 'toad_frog'], 'id': 475, 'def': 'a tailless stout-bodied amphibians with long hind limbs for leaping', 'name': 'frog'}, {'frequency': 'c', 'synset': 'fruit_juice.n.01', 'synonyms': ['fruit_juice'], 'id': 476, 'def': 'drink produced by squeezing or crushing fruit', 'name': 'fruit_juice'}, {'frequency': 'f', 'synset': 'frying_pan.n.01', 'synonyms': ['frying_pan', 'frypan', 'skillet'], 'id': 477, 'def': 'a pan used for frying foods', 'name': 'frying_pan'}, {'frequency': 'r', 'synset': 'fudge.n.01', 'synonyms': ['fudge'], 'id': 478, 'def': 'soft creamy candy', 'name': 'fudge'}, {'frequency': 'r', 'synset': 'funnel.n.02', 'synonyms': ['funnel'], 'id': 479, 'def': 'a cone-shaped utensil used to channel a substance into a container with a small mouth', 'name': 'funnel'}, {'frequency': 'r', 'synset': 'futon.n.01', 'synonyms': ['futon'], 'id': 480, 'def': 'a pad that is used for sleeping on the floor or on a raised frame', 'name': 'futon'}, {'frequency': 'r', 'synset': 'gag.n.02', 'synonyms': ['gag', 'muzzle'], 'id': 481, 'def': "restraint put into a person's mouth to prevent speaking or shouting", 'name': 'gag'}, {'frequency': 'r', 'synset': 'garbage.n.03', 'synonyms': ['garbage'], 'id': 482, 'def': 'a receptacle where waste can be discarded', 'name': 'garbage'}, {'frequency': 'c', 'synset': 'garbage_truck.n.01', 'synonyms': ['garbage_truck'], 'id': 483, 'def': 'a truck for collecting domestic refuse', 'name': 'garbage_truck'}, {'frequency': 'c', 'synset': 'garden_hose.n.01', 'synonyms': ['garden_hose'], 'id': 484, 'def': 'a hose used for watering a lawn or garden', 'name': 'garden_hose'}, {'frequency': 'c', 'synset': 'gargle.n.01', 'synonyms': ['gargle', 'mouthwash'], 'id': 485, 'def': 'a medicated solution used for gargling and rinsing the mouth', 'name': 'gargle'}, {'frequency': 'r', 'synset': 'gargoyle.n.02', 'synonyms': ['gargoyle'], 'id': 486, 'def': 'an ornament consisting of a grotesquely carved figure of a person or animal', 'name': 'gargoyle'}, {'frequency': 'c', 'synset': 'garlic.n.02', 'synonyms': ['garlic', 'ail'], 'id': 487, 'def': 'aromatic bulb used as seasoning', 'name': 'garlic'}, {'frequency': 'r', 'synset': 'gasmask.n.01', 'synonyms': ['gasmask', 'respirator', 'gas_helmet'], 'id': 488, 'def': 'a protective face mask with a filter', 'name': 'gasmask'}, {'frequency': 'c', 'synset': 'gazelle.n.01', 'synonyms': ['gazelle'], 'id': 489, 'def': 'small swift graceful antelope of Africa and Asia having lustrous eyes', 'name': 'gazelle'}, {'frequency': 'c', 'synset': 'gelatin.n.02', 'synonyms': ['gelatin', 'jelly'], 'id': 490, 'def': 'an edible jelly made with gelatin and used as a dessert or salad base or a coating for foods', 'name': 'gelatin'}, {'frequency': 'r', 'synset': 'gem.n.02', 'synonyms': ['gemstone'], 'id': 491, 'def': 'a crystalline rock that can be cut and polished for jewelry', 'name': 'gemstone'}, {'frequency': 'r', 'synset': 'generator.n.02', 'synonyms': ['generator'], 'id': 492, 'def': 'engine that converts mechanical energy into electrical energy by electromagnetic induction', 'name': 'generator'}, {'frequency': 'c', 'synset': 'giant_panda.n.01', 'synonyms': ['giant_panda', 'panda', 'panda_bear'], 'id': 493, 'def': 'large black-and-white herbivorous mammal of bamboo forests of China and Tibet', 'name': 'giant_panda'}, {'frequency': 'c', 'synset': 'gift_wrap.n.01', 'synonyms': ['gift_wrap'], 'id': 494, 'def': 'attractive wrapping paper suitable for wrapping gifts', 'name': 'gift_wrap'}, {'frequency': 'c', 'synset': 'ginger.n.03', 'synonyms': ['ginger', 'gingerroot'], 'id': 495, 'def': 'the root of the common ginger plant; used fresh as a seasoning', 'name': 'ginger'}, {'frequency': 'f', 'synset': 'giraffe.n.01', 'synonyms': ['giraffe'], 'id': 496, 'def': 'tall animal having a spotted coat and small horns and very long neck and legs', 'name': 'giraffe'}, {'frequency': 'c', 'synset': 'girdle.n.02', 'synonyms': ['cincture', 'sash', 'waistband', 'waistcloth'], 'id': 497, 'def': 'a band of material around the waist that strengthens a skirt or trousers', 'name': 'cincture'}, {'frequency': 'f', 'synset': 'glass.n.02', 'synonyms': ['glass_(drink_container)', 'drinking_glass'], 'id': 498, 'def': 'a container for holding liquids while drinking', 'name': 'glass_(drink_container)'}, {'frequency': 'c', 'synset': 'globe.n.03', 'synonyms': ['globe'], 'id': 499, 'def': 'a sphere on which a map (especially of the earth) is represented', 'name': 'globe'}, {'frequency': 'f', 'synset': 'glove.n.02', 'synonyms': ['glove'], 'id': 500, 'def': 'handwear covering the hand', 'name': 'glove'}, {'frequency': 'c', 'synset': 'goat.n.01', 'synonyms': ['goat'], 'id': 501, 'def': 'a common goat', 'name': 'goat'}, {'frequency': 'f', 'synset': 'goggles.n.01', 'synonyms': ['goggles'], 'id': 502, 'def': 'tight-fitting spectacles worn to protect the eyes', 'name': 'goggles'}, {'frequency': 'r', 'synset': 'goldfish.n.01', 'synonyms': ['goldfish'], 'id': 503, 'def': 'small golden or orange-red freshwater fishes used as pond or aquarium pets', 'name': 'goldfish'}, {'frequency': 'c', 'synset': 'golf_club.n.02', 'synonyms': ['golf_club', 'golf-club'], 'id': 504, 'def': 'golf equipment used by a golfer to hit a golf ball', 'name': 'golf_club'}, {'frequency': 'c', 'synset': 'golfcart.n.01', 'synonyms': ['golfcart'], 'id': 505, 'def': 'a small motor vehicle in which golfers can ride between shots', 'name': 'golfcart'}, {'frequency': 'r', 'synset': 'gondola.n.02', 'synonyms': ['gondola_(boat)'], 'id': 506, 'def': 'long narrow flat-bottomed boat propelled by sculling; traditionally used on canals of Venice', 'name': 'gondola_(boat)'}, {'frequency': 'c', 'synset': 'goose.n.01', 'synonyms': ['goose'], 'id': 507, 'def': 'loud, web-footed long-necked aquatic birds usually larger than ducks', 'name': 'goose'}, {'frequency': 'r', 'synset': 'gorilla.n.01', 'synonyms': ['gorilla'], 'id': 508, 'def': 'largest ape', 'name': 'gorilla'}, {'frequency': 'r', 'synset': 'gourd.n.02', 'synonyms': ['gourd'], 'id': 509, 'def': 'any of numerous inedible fruits with hard rinds', 'name': 'gourd'}, {'frequency': 'f', 'synset': 'grape.n.01', 'synonyms': ['grape'], 'id': 510, 'def': 'any of various juicy fruit with green or purple skins; grow in clusters', 'name': 'grape'}, {'frequency': 'c', 'synset': 'grater.n.01', 'synonyms': ['grater'], 'id': 511, 'def': 'utensil with sharp perforations for shredding foods (as vegetables or cheese)', 'name': 'grater'}, {'frequency': 'c', 'synset': 'gravestone.n.01', 'synonyms': ['gravestone', 'headstone', 'tombstone'], 'id': 512, 'def': 'a stone that is used to mark a grave', 'name': 'gravestone'}, {'frequency': 'r', 'synset': 'gravy_boat.n.01', 'synonyms': ['gravy_boat', 'gravy_holder'], 'id': 513, 'def': 'a dish (often boat-shaped) for serving gravy or sauce', 'name': 'gravy_boat'}, {'frequency': 'f', 'synset': 'green_bean.n.02', 'synonyms': ['green_bean'], 'id': 514, 'def': 'a common bean plant cultivated for its slender green edible pods', 'name': 'green_bean'}, {'frequency': 'f', 'synset': 'green_onion.n.01', 'synonyms': ['green_onion', 'spring_onion', 'scallion'], 'id': 515, 'def': 'a young onion before the bulb has enlarged', 'name': 'green_onion'}, {'frequency': 'r', 'synset': 'griddle.n.01', 'synonyms': ['griddle'], 'id': 516, 'def': 'cooking utensil consisting of a flat heated surface on which food is cooked', 'name': 'griddle'}, {'frequency': 'f', 'synset': 'grill.n.02', 'synonyms': ['grill', 'grille', 'grillwork', 'radiator_grille'], 'id': 517, 'def': 'a framework of metal bars used as a partition or a grate', 'name': 'grill'}, {'frequency': 'r', 'synset': 'grits.n.01', 'synonyms': ['grits', 'hominy_grits'], 'id': 518, 'def': 'coarsely ground corn boiled as a breakfast dish', 'name': 'grits'}, {'frequency': 'c', 'synset': 'grizzly.n.01', 'synonyms': ['grizzly', 'grizzly_bear'], 'id': 519, 'def': 'powerful brownish-yellow bear of the uplands of western North America', 'name': 'grizzly'}, {'frequency': 'c', 'synset': 'grocery_bag.n.01', 'synonyms': ['grocery_bag'], 'id': 520, 'def': "a sack for holding customer's groceries", 'name': 'grocery_bag'}, {'frequency': 'f', 'synset': 'guitar.n.01', 'synonyms': ['guitar'], 'id': 521, 'def': 'a stringed instrument usually having six strings; played by strumming or plucking', 'name': 'guitar'}, {'frequency': 'c', 'synset': 'gull.n.02', 'synonyms': ['gull', 'seagull'], 'id': 522, 'def': 'mostly white aquatic bird having long pointed wings and short legs', 'name': 'gull'}, {'frequency': 'c', 'synset': 'gun.n.01', 'synonyms': ['gun'], 'id': 523, 'def': 'a weapon that discharges a bullet at high velocity from a metal tube', 'name': 'gun'}, {'frequency': 'f', 'synset': 'hairbrush.n.01', 'synonyms': ['hairbrush'], 'id': 524, 'def': "a brush used to groom a person's hair", 'name': 'hairbrush'}, {'frequency': 'c', 'synset': 'hairnet.n.01', 'synonyms': ['hairnet'], 'id': 525, 'def': 'a small net that someone wears over their hair to keep it in place', 'name': 'hairnet'}, {'frequency': 'c', 'synset': 'hairpin.n.01', 'synonyms': ['hairpin'], 'id': 526, 'def': "a double pronged pin used to hold women's hair in place", 'name': 'hairpin'}, {'frequency': 'r', 'synset': 'halter.n.03', 'synonyms': ['halter_top'], 'id': 527, 'def': "a woman's top that fastens behind the back and neck leaving the back and arms uncovered", 'name': 'halter_top'}, {'frequency': 'f', 'synset': 'ham.n.01', 'synonyms': ['ham', 'jambon', 'gammon'], 'id': 528, 'def': 'meat cut from the thigh of a hog (usually smoked)', 'name': 'ham'}, {'frequency': 'c', 'synset': 'hamburger.n.01', 'synonyms': ['hamburger', 'beefburger', 'burger'], 'id': 529, 'def': 'a sandwich consisting of a patty of minced beef served on a bun', 'name': 'hamburger'}, {'frequency': 'c', 'synset': 'hammer.n.02', 'synonyms': ['hammer'], 'id': 530, 'def': 'a hand tool with a heavy head and a handle; used to deliver an impulsive force by striking', 'name': 'hammer'}, {'frequency': 'c', 'synset': 'hammock.n.02', 'synonyms': ['hammock'], 'id': 531, 'def': 'a hanging bed of canvas or rope netting (usually suspended between two trees)', 'name': 'hammock'}, {'frequency': 'r', 'synset': 'hamper.n.02', 'synonyms': ['hamper'], 'id': 532, 'def': 'a basket usually with a cover', 'name': 'hamper'}, {'frequency': 'c', 'synset': 'hamster.n.01', 'synonyms': ['hamster'], 'id': 533, 'def': 'short-tailed burrowing rodent with large cheek pouches', 'name': 'hamster'}, {'frequency': 'f', 'synset': 'hand_blower.n.01', 'synonyms': ['hair_dryer'], 'id': 534, 'def': 'a hand-held electric blower that can blow warm air onto the hair', 'name': 'hair_dryer'}, {'frequency': 'r', 'synset': 'hand_glass.n.01', 'synonyms': ['hand_glass', 'hand_mirror'], 'id': 535, 'def': 'a mirror intended to be held in the hand', 'name': 'hand_glass'}, {'frequency': 'f', 'synset': 'hand_towel.n.01', 'synonyms': ['hand_towel', 'face_towel'], 'id': 536, 'def': 'a small towel used to dry the hands or face', 'name': 'hand_towel'}, {'frequency': 'c', 'synset': 'handcart.n.01', 'synonyms': ['handcart', 'pushcart', 'hand_truck'], 'id': 537, 'def': 'wheeled vehicle that can be pushed by a person', 'name': 'handcart'}, {'frequency': 'r', 'synset': 'handcuff.n.01', 'synonyms': ['handcuff'], 'id': 538, 'def': 'shackle that consists of a metal loop that can be locked around the wrist', 'name': 'handcuff'}, {'frequency': 'c', 'synset': 'handkerchief.n.01', 'synonyms': ['handkerchief'], 'id': 539, 'def': 'a square piece of cloth used for wiping the eyes or nose or as a costume accessory', 'name': 'handkerchief'}, {'frequency': 'f', 'synset': 'handle.n.01', 'synonyms': ['handle', 'grip', 'handgrip'], 'id': 540, 'def': 'the appendage to an object that is designed to be held in order to use or move it', 'name': 'handle'}, {'frequency': 'r', 'synset': 'handsaw.n.01', 'synonyms': ['handsaw', "carpenter's_saw"], 'id': 541, 'def': 'a saw used with one hand for cutting wood', 'name': 'handsaw'}, {'frequency': 'r', 'synset': 'hardback.n.01', 'synonyms': ['hardback_book', 'hardcover_book'], 'id': 542, 'def': 'a book with cardboard or cloth or leather covers', 'name': 'hardback_book'}, {'frequency': 'r', 'synset': 'harmonium.n.01', 'synonyms': ['harmonium', 'organ_(musical_instrument)', 'reed_organ_(musical_instrument)'], 'id': 543, 'def': 'a free-reed instrument in which air is forced through the reeds by bellows', 'name': 'harmonium'}, {'frequency': 'f', 'synset': 'hat.n.01', 'synonyms': ['hat'], 'id': 544, 'def': 'headwear that protects the head from bad weather, sun, or worn for fashion', 'name': 'hat'}, {'frequency': 'r', 'synset': 'hatbox.n.01', 'synonyms': ['hatbox'], 'id': 545, 'def': 'a round piece of luggage for carrying hats', 'name': 'hatbox'}, {'frequency': 'c', 'synset': 'head_covering.n.01', 'synonyms': ['veil'], 'id': 546, 'def': 'a garment that covers the head OR face', 'name': 'veil'}, {'frequency': 'f', 'synset': 'headband.n.01', 'synonyms': ['headband'], 'id': 547, 'def': 'a band worn around or over the head', 'name': 'headband'}, {'frequency': 'f', 'synset': 'headboard.n.01', 'synonyms': ['headboard'], 'id': 548, 'def': 'a vertical board or panel forming the head of a bedstead', 'name': 'headboard'}, {'frequency': 'f', 'synset': 'headlight.n.01', 'synonyms': ['headlight', 'headlamp'], 'id': 549, 'def': 'a powerful light with reflector; attached to the front of an automobile or locomotive', 'name': 'headlight'}, {'frequency': 'c', 'synset': 'headscarf.n.01', 'synonyms': ['headscarf'], 'id': 550, 'def': 'a kerchief worn over the head and tied under the chin', 'name': 'headscarf'}, {'frequency': 'r', 'synset': 'headset.n.01', 'synonyms': ['headset'], 'id': 551, 'def': 'receiver consisting of a pair of headphones', 'name': 'headset'}, {'frequency': 'c', 'synset': 'headstall.n.01', 'synonyms': ['headstall_(for_horses)', 'headpiece_(for_horses)'], 'id': 552, 'def': "the band that is the part of a bridle that fits around a horse's head", 'name': 'headstall_(for_horses)'}, {'frequency': 'c', 'synset': 'heart.n.02', 'synonyms': ['heart'], 'id': 553, 'def': 'a muscular organ; its contractions move the blood through the body', 'name': 'heart'}, {'frequency': 'c', 'synset': 'heater.n.01', 'synonyms': ['heater', 'warmer'], 'id': 554, 'def': 'device that heats water or supplies warmth to a room', 'name': 'heater'}, {'frequency': 'c', 'synset': 'helicopter.n.01', 'synonyms': ['helicopter'], 'id': 555, 'def': 'an aircraft without wings that obtains its lift from the rotation of overhead blades', 'name': 'helicopter'}, {'frequency': 'f', 'synset': 'helmet.n.02', 'synonyms': ['helmet'], 'id': 556, 'def': 'a protective headgear made of hard material to resist blows', 'name': 'helmet'}, {'frequency': 'r', 'synset': 'heron.n.02', 'synonyms': ['heron'], 'id': 557, 'def': 'grey or white wading bird with long neck and long legs and (usually) long bill', 'name': 'heron'}, {'frequency': 'c', 'synset': 'highchair.n.01', 'synonyms': ['highchair', 'feeding_chair'], 'id': 558, 'def': 'a chair for feeding a very young child', 'name': 'highchair'}, {'frequency': 'f', 'synset': 'hinge.n.01', 'synonyms': ['hinge'], 'id': 559, 'def': 'a joint that holds two parts together so that one can swing relative to the other', 'name': 'hinge'}, {'frequency': 'r', 'synset': 'hippopotamus.n.01', 'synonyms': ['hippopotamus'], 'id': 560, 'def': 'massive thick-skinned animal living in or around rivers of tropical Africa', 'name': 'hippopotamus'}, {'frequency': 'r', 'synset': 'hockey_stick.n.01', 'synonyms': ['hockey_stick'], 'id': 561, 'def': 'sports implement consisting of a stick used by hockey players to move the puck', 'name': 'hockey_stick'}, {'frequency': 'c', 'synset': 'hog.n.03', 'synonyms': ['hog', 'pig'], 'id': 562, 'def': 'domestic swine', 'name': 'hog'}, {'frequency': 'f', 'synset': 'home_plate.n.01', 'synonyms': ['home_plate_(baseball)', 'home_base_(baseball)'], 'id': 563, 'def': '(baseball) a rubber slab where the batter stands; it must be touched by a base runner in order to score', 'name': 'home_plate_(baseball)'}, {'frequency': 'c', 'synset': 'honey.n.01', 'synonyms': ['honey'], 'id': 564, 'def': 'a sweet yellow liquid produced by bees', 'name': 'honey'}, {'frequency': 'f', 'synset': 'hood.n.06', 'synonyms': ['fume_hood', 'exhaust_hood'], 'id': 565, 'def': 'metal covering leading to a vent that exhausts smoke or fumes', 'name': 'fume_hood'}, {'frequency': 'f', 'synset': 'hook.n.05', 'synonyms': ['hook'], 'id': 566, 'def': 'a curved or bent implement for suspending or pulling something', 'name': 'hook'}, {'frequency': 'r', 'synset': 'hookah.n.01', 'synonyms': ['hookah', 'narghile', 'nargileh', 'sheesha', 'shisha', 'water_pipe'], 'id': 567, 'def': 'a tobacco pipe with a long flexible tube connected to a container where the smoke is cooled by passing through water', 'name': 'hookah'}, {'frequency': 'r', 'synset': 'hornet.n.01', 'synonyms': ['hornet'], 'id': 568, 'def': 'large stinging wasp', 'name': 'hornet'}, {'frequency': 'f', 'synset': 'horse.n.01', 'synonyms': ['horse'], 'id': 569, 'def': 'a common horse', 'name': 'horse'}, {'frequency': 'f', 'synset': 'hose.n.03', 'synonyms': ['hose', 'hosepipe'], 'id': 570, 'def': 'a flexible pipe for conveying a liquid or gas', 'name': 'hose'}, {'frequency': 'r', 'synset': 'hot-air_balloon.n.01', 'synonyms': ['hot-air_balloon'], 'id': 571, 'def': 'balloon for travel through the air in a basket suspended below a large bag of heated air', 'name': 'hot-air_balloon'}, {'frequency': 'r', 'synset': 'hot_plate.n.01', 'synonyms': ['hotplate'], 'id': 572, 'def': 'a portable electric appliance for heating or cooking or keeping food warm', 'name': 'hotplate'}, {'frequency': 'c', 'synset': 'hot_sauce.n.01', 'synonyms': ['hot_sauce'], 'id': 573, 'def': 'a pungent peppery sauce', 'name': 'hot_sauce'}, {'frequency': 'r', 'synset': 'hourglass.n.01', 'synonyms': ['hourglass'], 'id': 574, 'def': 'a sandglass timer that runs for sixty minutes', 'name': 'hourglass'}, {'frequency': 'r', 'synset': 'houseboat.n.01', 'synonyms': ['houseboat'], 'id': 575, 'def': 'a barge that is designed and equipped for use as a dwelling', 'name': 'houseboat'}, {'frequency': 'c', 'synset': 'hummingbird.n.01', 'synonyms': ['hummingbird'], 'id': 576, 'def': 'tiny American bird having brilliant iridescent plumage and long slender bills', 'name': 'hummingbird'}, {'frequency': 'r', 'synset': 'hummus.n.01', 'synonyms': ['hummus', 'humus', 'hommos', 'hoummos', 'humous'], 'id': 577, 'def': 'a thick spread made from mashed chickpeas', 'name': 'hummus'}, {'frequency': 'f', 'synset': 'ice_bear.n.01', 'synonyms': ['polar_bear'], 'id': 578, 'def': 'white bear of Arctic regions', 'name': 'polar_bear'}, {'frequency': 'c', 'synset': 'ice_cream.n.01', 'synonyms': ['icecream'], 'id': 579, 'def': 'frozen dessert containing cream and sugar and flavoring', 'name': 'icecream'}, {'frequency': 'r', 'synset': 'ice_lolly.n.01', 'synonyms': ['popsicle'], 'id': 580, 'def': 'ice cream or water ice on a small wooden stick', 'name': 'popsicle'}, {'frequency': 'c', 'synset': 'ice_maker.n.01', 'synonyms': ['ice_maker'], 'id': 581, 'def': 'an appliance included in some electric refrigerators for making ice cubes', 'name': 'ice_maker'}, {'frequency': 'r', 'synset': 'ice_pack.n.01', 'synonyms': ['ice_pack', 'ice_bag'], 'id': 582, 'def': 'a waterproof bag filled with ice: applied to the body (especially the head) to cool or reduce swelling', 'name': 'ice_pack'}, {'frequency': 'r', 'synset': 'ice_skate.n.01', 'synonyms': ['ice_skate'], 'id': 583, 'def': 'skate consisting of a boot with a steel blade fitted to the sole', 'name': 'ice_skate'}, {'frequency': 'c', 'synset': 'igniter.n.01', 'synonyms': ['igniter', 'ignitor', 'lighter'], 'id': 584, 'def': 'a substance or device used to start a fire', 'name': 'igniter'}, {'frequency': 'r', 'synset': 'inhaler.n.01', 'synonyms': ['inhaler', 'inhalator'], 'id': 585, 'def': 'a dispenser that produces a chemical vapor to be inhaled through mouth or nose', 'name': 'inhaler'}, {'frequency': 'f', 'synset': 'ipod.n.01', 'synonyms': ['iPod'], 'id': 586, 'def': 'a pocket-sized device used to play music files', 'name': 'iPod'}, {'frequency': 'c', 'synset': 'iron.n.04', 'synonyms': ['iron_(for_clothing)', 'smoothing_iron_(for_clothing)'], 'id': 587, 'def': 'home appliance consisting of a flat metal base that is heated and used to smooth cloth', 'name': 'iron_(for_clothing)'}, {'frequency': 'c', 'synset': 'ironing_board.n.01', 'synonyms': ['ironing_board'], 'id': 588, 'def': 'narrow padded board on collapsible supports; used for ironing clothes', 'name': 'ironing_board'}, {'frequency': 'f', 'synset': 'jacket.n.01', 'synonyms': ['jacket'], 'id': 589, 'def': 'a waist-length coat', 'name': 'jacket'}, {'frequency': 'c', 'synset': 'jam.n.01', 'synonyms': ['jam'], 'id': 590, 'def': 'preserve of crushed fruit', 'name': 'jam'}, {'frequency': 'f', 'synset': 'jar.n.01', 'synonyms': ['jar'], 'id': 591, 'def': 'a vessel (usually cylindrical) with a wide mouth and without handles', 'name': 'jar'}, {'frequency': 'f', 'synset': 'jean.n.01', 'synonyms': ['jean', 'blue_jean', 'denim'], 'id': 592, 'def': '(usually plural) close-fitting trousers of heavy denim for manual work or casual wear', 'name': 'jean'}, {'frequency': 'c', 'synset': 'jeep.n.01', 'synonyms': ['jeep', 'landrover'], 'id': 593, 'def': 'a car suitable for traveling over rough terrain', 'name': 'jeep'}, {'frequency': 'r', 'synset': 'jelly_bean.n.01', 'synonyms': ['jelly_bean', 'jelly_egg'], 'id': 594, 'def': 'sugar-glazed jellied candy', 'name': 'jelly_bean'}, {'frequency': 'f', 'synset': 'jersey.n.03', 'synonyms': ['jersey', 'T-shirt', 'tee_shirt'], 'id': 595, 'def': 'a close-fitting pullover shirt', 'name': 'jersey'}, {'frequency': 'c', 'synset': 'jet.n.01', 'synonyms': ['jet_plane', 'jet-propelled_plane'], 'id': 596, 'def': 'an airplane powered by one or more jet engines', 'name': 'jet_plane'}, {'frequency': 'r', 'synset': 'jewel.n.01', 'synonyms': ['jewel', 'gem', 'precious_stone'], 'id': 597, 'def': 'a precious or semiprecious stone incorporated into a piece of jewelry', 'name': 'jewel'}, {'frequency': 'c', 'synset': 'jewelry.n.01', 'synonyms': ['jewelry', 'jewellery'], 'id': 598, 'def': 'an adornment (as a bracelet or ring or necklace) made of precious metals and set with gems (or imitation gems)', 'name': 'jewelry'}, {'frequency': 'r', 'synset': 'joystick.n.02', 'synonyms': ['joystick'], 'id': 599, 'def': 'a control device for computers consisting of a vertical handle that can move freely in two directions', 'name': 'joystick'}, {'frequency': 'c', 'synset': 'jump_suit.n.01', 'synonyms': ['jumpsuit'], 'id': 600, 'def': "one-piece garment fashioned after a parachutist's uniform", 'name': 'jumpsuit'}, {'frequency': 'c', 'synset': 'kayak.n.01', 'synonyms': ['kayak'], 'id': 601, 'def': 'a small canoe consisting of a light frame made watertight with animal skins', 'name': 'kayak'}, {'frequency': 'r', 'synset': 'keg.n.02', 'synonyms': ['keg'], 'id': 602, 'def': 'small cask or barrel', 'name': 'keg'}, {'frequency': 'r', 'synset': 'kennel.n.01', 'synonyms': ['kennel', 'doghouse'], 'id': 603, 'def': 'outbuilding that serves as a shelter for a dog', 'name': 'kennel'}, {'frequency': 'c', 'synset': 'kettle.n.01', 'synonyms': ['kettle', 'boiler'], 'id': 604, 'def': 'a metal pot for stewing or boiling; usually has a lid', 'name': 'kettle'}, {'frequency': 'f', 'synset': 'key.n.01', 'synonyms': ['key'], 'id': 605, 'def': 'metal instrument used to unlock a lock', 'name': 'key'}, {'frequency': 'r', 'synset': 'keycard.n.01', 'synonyms': ['keycard'], 'id': 606, 'def': 'a plastic card used to gain access typically to a door', 'name': 'keycard'}, {'frequency': 'c', 'synset': 'kilt.n.01', 'synonyms': ['kilt'], 'id': 607, 'def': 'a knee-length pleated tartan skirt worn by men as part of the traditional dress in the Highlands of northern Scotland', 'name': 'kilt'}, {'frequency': 'c', 'synset': 'kimono.n.01', 'synonyms': ['kimono'], 'id': 608, 'def': 'a loose robe; imitated from robes originally worn by Japanese', 'name': 'kimono'}, {'frequency': 'f', 'synset': 'kitchen_sink.n.01', 'synonyms': ['kitchen_sink'], 'id': 609, 'def': 'a sink in a kitchen', 'name': 'kitchen_sink'}, {'frequency': 'r', 'synset': 'kitchen_table.n.01', 'synonyms': ['kitchen_table'], 'id': 610, 'def': 'a table in the kitchen', 'name': 'kitchen_table'}, {'frequency': 'f', 'synset': 'kite.n.03', 'synonyms': ['kite'], 'id': 611, 'def': 'plaything consisting of a light frame covered with tissue paper; flown in wind at end of a string', 'name': 'kite'}, {'frequency': 'c', 'synset': 'kitten.n.01', 'synonyms': ['kitten', 'kitty'], 'id': 612, 'def': 'young domestic cat', 'name': 'kitten'}, {'frequency': 'c', 'synset': 'kiwi.n.03', 'synonyms': ['kiwi_fruit'], 'id': 613, 'def': 'fuzzy brown egg-shaped fruit with slightly tart green flesh', 'name': 'kiwi_fruit'}, {'frequency': 'f', 'synset': 'knee_pad.n.01', 'synonyms': ['knee_pad'], 'id': 614, 'def': 'protective garment consisting of a pad worn by football or baseball or hockey players', 'name': 'knee_pad'}, {'frequency': 'f', 'synset': 'knife.n.01', 'synonyms': ['knife'], 'id': 615, 'def': 'tool with a blade and point used as a cutting instrument', 'name': 'knife'}, {'frequency': 'r', 'synset': 'knitting_needle.n.01', 'synonyms': ['knitting_needle'], 'id': 616, 'def': 'needle consisting of a slender rod with pointed ends; usually used in pairs', 'name': 'knitting_needle'}, {'frequency': 'f', 'synset': 'knob.n.02', 'synonyms': ['knob'], 'id': 617, 'def': 'a round handle often found on a door', 'name': 'knob'}, {'frequency': 'r', 'synset': 'knocker.n.05', 'synonyms': ['knocker_(on_a_door)', 'doorknocker'], 'id': 618, 'def': 'a device (usually metal and ornamental) attached by a hinge to a door', 'name': 'knocker_(on_a_door)'}, {'frequency': 'r', 'synset': 'koala.n.01', 'synonyms': ['koala', 'koala_bear'], 'id': 619, 'def': 'sluggish tailless Australian marsupial with grey furry ears and coat', 'name': 'koala'}, {'frequency': 'r', 'synset': 'lab_coat.n.01', 'synonyms': ['lab_coat', 'laboratory_coat'], 'id': 620, 'def': 'a light coat worn to protect clothing from substances used while working in a laboratory', 'name': 'lab_coat'}, {'frequency': 'f', 'synset': 'ladder.n.01', 'synonyms': ['ladder'], 'id': 621, 'def': 'steps consisting of two parallel members connected by rungs', 'name': 'ladder'}, {'frequency': 'c', 'synset': 'ladle.n.01', 'synonyms': ['ladle'], 'id': 622, 'def': 'a spoon-shaped vessel with a long handle frequently used to transfer liquids', 'name': 'ladle'}, {'frequency': 'c', 'synset': 'ladybug.n.01', 'synonyms': ['ladybug', 'ladybeetle', 'ladybird_beetle'], 'id': 623, 'def': 'small round bright-colored and spotted beetle, typically red and black', 'name': 'ladybug'}, {'frequency': 'f', 'synset': 'lamb.n.01', 'synonyms': ['lamb_(animal)'], 'id': 624, 'def': 'young sheep', 'name': 'lamb_(animal)'}, {'frequency': 'r', 'synset': 'lamb_chop.n.01', 'synonyms': ['lamb-chop', 'lambchop'], 'id': 625, 'def': 'chop cut from a lamb', 'name': 'lamb-chop'}, {'frequency': 'f', 'synset': 'lamp.n.02', 'synonyms': ['lamp'], 'id': 626, 'def': 'a piece of furniture holding one or more electric light bulbs', 'name': 'lamp'}, {'frequency': 'f', 'synset': 'lamppost.n.01', 'synonyms': ['lamppost'], 'id': 627, 'def': 'a metal post supporting an outdoor lamp (such as a streetlight)', 'name': 'lamppost'}, {'frequency': 'f', 'synset': 'lampshade.n.01', 'synonyms': ['lampshade'], 'id': 628, 'def': 'a protective ornamental shade used to screen a light bulb from direct view', 'name': 'lampshade'}, {'frequency': 'c', 'synset': 'lantern.n.01', 'synonyms': ['lantern'], 'id': 629, 'def': 'light in a transparent protective case', 'name': 'lantern'}, {'frequency': 'f', 'synset': 'lanyard.n.02', 'synonyms': ['lanyard', 'laniard'], 'id': 630, 'def': 'a cord worn around the neck to hold a knife or whistle, etc.', 'name': 'lanyard'}, {'frequency': 'f', 'synset': 'laptop.n.01', 'synonyms': ['laptop_computer', 'notebook_computer'], 'id': 631, 'def': 'a portable computer small enough to use in your lap', 'name': 'laptop_computer'}, {'frequency': 'r', 'synset': 'lasagna.n.01', 'synonyms': ['lasagna', 'lasagne'], 'id': 632, 'def': 'baked dish of layers of lasagna pasta with sauce and cheese and meat or vegetables', 'name': 'lasagna'}, {'frequency': 'f', 'synset': 'latch.n.02', 'synonyms': ['latch'], 'id': 633, 'def': 'a bar that can be lowered or slid into a groove to fasten a door or gate', 'name': 'latch'}, {'frequency': 'r', 'synset': 'lawn_mower.n.01', 'synonyms': ['lawn_mower'], 'id': 634, 'def': 'garden tool for mowing grass on lawns', 'name': 'lawn_mower'}, {'frequency': 'r', 'synset': 'leather.n.01', 'synonyms': ['leather'], 'id': 635, 'def': 'an animal skin made smooth and flexible by removing the hair and then tanning', 'name': 'leather'}, {'frequency': 'c', 'synset': 'legging.n.01', 'synonyms': ['legging_(clothing)', 'leging_(clothing)', 'leg_covering'], 'id': 636, 'def': 'a garment covering the leg (usually extending from the knee to the ankle)', 'name': 'legging_(clothing)'}, {'frequency': 'c', 'synset': 'lego.n.01', 'synonyms': ['Lego', 'Lego_set'], 'id': 637, 'def': "a child's plastic construction set for making models from blocks", 'name': 'Lego'}, {'frequency': 'r', 'synset': 'legume.n.02', 'synonyms': ['legume'], 'id': 638, 'def': 'the fruit or seed of bean or pea plants', 'name': 'legume'}, {'frequency': 'f', 'synset': 'lemon.n.01', 'synonyms': ['lemon'], 'id': 639, 'def': 'yellow oval fruit with juicy acidic flesh', 'name': 'lemon'}, {'frequency': 'r', 'synset': 'lemonade.n.01', 'synonyms': ['lemonade'], 'id': 640, 'def': 'sweetened beverage of diluted lemon juice', 'name': 'lemonade'}, {'frequency': 'f', 'synset': 'lettuce.n.02', 'synonyms': ['lettuce'], 'id': 641, 'def': 'leafy plant commonly eaten in salad or on sandwiches', 'name': 'lettuce'}, {'frequency': 'f', 'synset': 'license_plate.n.01', 'synonyms': ['license_plate', 'numberplate'], 'id': 642, 'def': "a plate mounted on the front and back of car and bearing the car's registration number", 'name': 'license_plate'}, {'frequency': 'f', 'synset': 'life_buoy.n.01', 'synonyms': ['life_buoy', 'lifesaver', 'life_belt', 'life_ring'], 'id': 643, 'def': 'a ring-shaped life preserver used to prevent drowning (NOT a life-jacket or vest)', 'name': 'life_buoy'}, {'frequency': 'f', 'synset': 'life_jacket.n.01', 'synonyms': ['life_jacket', 'life_vest'], 'id': 644, 'def': 'life preserver consisting of a sleeveless jacket of buoyant or inflatable design', 'name': 'life_jacket'}, {'frequency': 'f', 'synset': 'light_bulb.n.01', 'synonyms': ['lightbulb'], 'id': 645, 'def': 'lightblub/source of light', 'name': 'lightbulb'}, {'frequency': 'r', 'synset': 'lightning_rod.n.02', 'synonyms': ['lightning_rod', 'lightning_conductor'], 'id': 646, 'def': 'a metallic conductor that is attached to a high point and leads to the ground', 'name': 'lightning_rod'}, {'frequency': 'f', 'synset': 'lime.n.06', 'synonyms': ['lime'], 'id': 647, 'def': 'the green acidic fruit of any of various lime trees', 'name': 'lime'}, {'frequency': 'r', 'synset': 'limousine.n.01', 'synonyms': ['limousine'], 'id': 648, 'def': 'long luxurious car; usually driven by a chauffeur', 'name': 'limousine'}, {'frequency': 'c', 'synset': 'lion.n.01', 'synonyms': ['lion'], 'id': 649, 'def': 'large gregarious predatory cat of Africa and India', 'name': 'lion'}, {'frequency': 'c', 'synset': 'lip_balm.n.01', 'synonyms': ['lip_balm'], 'id': 650, 'def': 'a balm applied to the lips', 'name': 'lip_balm'}, {'frequency': 'r', 'synset': 'liquor.n.01', 'synonyms': ['liquor', 'spirits', 'hard_liquor', 'liqueur', 'cordial'], 'id': 651, 'def': 'liquor or beer', 'name': 'liquor'}, {'frequency': 'c', 'synset': 'lizard.n.01', 'synonyms': ['lizard'], 'id': 652, 'def': 'a reptile with usually two pairs of legs and a tapering tail', 'name': 'lizard'}, {'frequency': 'f', 'synset': 'log.n.01', 'synonyms': ['log'], 'id': 653, 'def': 'a segment of the trunk of a tree when stripped of branches', 'name': 'log'}, {'frequency': 'c', 'synset': 'lollipop.n.02', 'synonyms': ['lollipop'], 'id': 654, 'def': 'hard candy on a stick', 'name': 'lollipop'}, {'frequency': 'f', 'synset': 'loudspeaker.n.01', 'synonyms': ['speaker_(stero_equipment)'], 'id': 655, 'def': 'electronic device that produces sound often as part of a stereo system', 'name': 'speaker_(stero_equipment)'}, {'frequency': 'c', 'synset': 'love_seat.n.01', 'synonyms': ['loveseat'], 'id': 656, 'def': 'small sofa that seats two people', 'name': 'loveseat'}, {'frequency': 'r', 'synset': 'machine_gun.n.01', 'synonyms': ['machine_gun'], 'id': 657, 'def': 'a rapidly firing automatic gun', 'name': 'machine_gun'}, {'frequency': 'f', 'synset': 'magazine.n.02', 'synonyms': ['magazine'], 'id': 658, 'def': 'a paperback periodic publication', 'name': 'magazine'}, {'frequency': 'f', 'synset': 'magnet.n.01', 'synonyms': ['magnet'], 'id': 659, 'def': 'a device that attracts iron and produces a magnetic field', 'name': 'magnet'}, {'frequency': 'c', 'synset': 'mail_slot.n.01', 'synonyms': ['mail_slot'], 'id': 660, 'def': 'a slot (usually in a door) through which mail can be delivered', 'name': 'mail_slot'}, {'frequency': 'f', 'synset': 'mailbox.n.01', 'synonyms': ['mailbox_(at_home)', 'letter_box_(at_home)'], 'id': 661, 'def': 'a private box for delivery of mail', 'name': 'mailbox_(at_home)'}, {'frequency': 'r', 'synset': 'mallard.n.01', 'synonyms': ['mallard'], 'id': 662, 'def': 'wild dabbling duck from which domestic ducks are descended', 'name': 'mallard'}, {'frequency': 'r', 'synset': 'mallet.n.01', 'synonyms': ['mallet'], 'id': 663, 'def': 'a sports implement with a long handle and a hammer-like head used to hit a ball', 'name': 'mallet'}, {'frequency': 'r', 'synset': 'mammoth.n.01', 'synonyms': ['mammoth'], 'id': 664, 'def': 'any of numerous extinct elephants widely distributed in the Pleistocene', 'name': 'mammoth'}, {'frequency': 'r', 'synset': 'manatee.n.01', 'synonyms': ['manatee'], 'id': 665, 'def': 'sirenian mammal of tropical coastal waters of America', 'name': 'manatee'}, {'frequency': 'c', 'synset': 'mandarin.n.05', 'synonyms': ['mandarin_orange'], 'id': 666, 'def': 'a somewhat flat reddish-orange loose skinned citrus of China', 'name': 'mandarin_orange'}, {'frequency': 'c', 'synset': 'manger.n.01', 'synonyms': ['manger', 'trough'], 'id': 667, 'def': 'a container (usually in a barn or stable) from which cattle or horses feed', 'name': 'manger'}, {'frequency': 'f', 'synset': 'manhole.n.01', 'synonyms': ['manhole'], 'id': 668, 'def': 'a hole (usually with a flush cover) through which a person can gain access to an underground structure', 'name': 'manhole'}, {'frequency': 'f', 'synset': 'map.n.01', 'synonyms': ['map'], 'id': 669, 'def': "a diagrammatic representation of the earth's surface (or part of it)", 'name': 'map'}, {'frequency': 'f', 'synset': 'marker.n.03', 'synonyms': ['marker'], 'id': 670, 'def': 'a writing implement for making a mark', 'name': 'marker'}, {'frequency': 'r', 'synset': 'martini.n.01', 'synonyms': ['martini'], 'id': 671, 'def': 'a cocktail made of gin (or vodka) with dry vermouth', 'name': 'martini'}, {'frequency': 'r', 'synset': 'mascot.n.01', 'synonyms': ['mascot'], 'id': 672, 'def': 'a person or animal that is adopted by a team or other group as a symbolic figure', 'name': 'mascot'}, {'frequency': 'c', 'synset': 'mashed_potato.n.01', 'synonyms': ['mashed_potato'], 'id': 673, 'def': 'potato that has been peeled and boiled and then mashed', 'name': 'mashed_potato'}, {'frequency': 'r', 'synset': 'masher.n.02', 'synonyms': ['masher'], 'id': 674, 'def': 'a kitchen utensil used for mashing (e.g. potatoes)', 'name': 'masher'}, {'frequency': 'f', 'synset': 'mask.n.04', 'synonyms': ['mask', 'facemask'], 'id': 675, 'def': 'a protective covering worn over the face', 'name': 'mask'}, {'frequency': 'f', 'synset': 'mast.n.01', 'synonyms': ['mast'], 'id': 676, 'def': 'a vertical spar for supporting sails', 'name': 'mast'}, {'frequency': 'c', 'synset': 'mat.n.03', 'synonyms': ['mat_(gym_equipment)', 'gym_mat'], 'id': 677, 'def': 'sports equipment consisting of a piece of thick padding on the floor for gymnastics', 'name': 'mat_(gym_equipment)'}, {'frequency': 'r', 'synset': 'matchbox.n.01', 'synonyms': ['matchbox'], 'id': 678, 'def': 'a box for holding matches', 'name': 'matchbox'}, {'frequency': 'f', 'synset': 'mattress.n.01', 'synonyms': ['mattress'], 'id': 679, 'def': 'a thick pad filled with resilient material used as a bed or part of a bed', 'name': 'mattress'}, {'frequency': 'c', 'synset': 'measuring_cup.n.01', 'synonyms': ['measuring_cup'], 'id': 680, 'def': 'graduated cup used to measure liquid or granular ingredients', 'name': 'measuring_cup'}, {'frequency': 'c', 'synset': 'measuring_stick.n.01', 'synonyms': ['measuring_stick', 'ruler_(measuring_stick)', 'measuring_rod'], 'id': 681, 'def': 'measuring instrument having a sequence of marks at regular intervals', 'name': 'measuring_stick'}, {'frequency': 'c', 'synset': 'meatball.n.01', 'synonyms': ['meatball'], 'id': 682, 'def': 'ground meat formed into a ball and fried or simmered in broth', 'name': 'meatball'}, {'frequency': 'c', 'synset': 'medicine.n.02', 'synonyms': ['medicine'], 'id': 683, 'def': 'something that treats or prevents or alleviates the symptoms of disease', 'name': 'medicine'}, {'frequency': 'c', 'synset': 'melon.n.01', 'synonyms': ['melon'], 'id': 684, 'def': 'fruit of the gourd family having a hard rind and sweet juicy flesh', 'name': 'melon'}, {'frequency': 'f', 'synset': 'microphone.n.01', 'synonyms': ['microphone'], 'id': 685, 'def': 'device for converting sound waves into electrical energy', 'name': 'microphone'}, {'frequency': 'r', 'synset': 'microscope.n.01', 'synonyms': ['microscope'], 'id': 686, 'def': 'magnifier of the image of small objects', 'name': 'microscope'}, {'frequency': 'f', 'synset': 'microwave.n.02', 'synonyms': ['microwave_oven'], 'id': 687, 'def': 'kitchen appliance that cooks food by passing an electromagnetic wave through it', 'name': 'microwave_oven'}, {'frequency': 'r', 'synset': 'milestone.n.01', 'synonyms': ['milestone', 'milepost'], 'id': 688, 'def': 'stone post at side of a road to show distances', 'name': 'milestone'}, {'frequency': 'f', 'synset': 'milk.n.01', 'synonyms': ['milk'], 'id': 689, 'def': 'a white nutritious liquid secreted by mammals and used as food by human beings', 'name': 'milk'}, {'frequency': 'r', 'synset': 'milk_can.n.01', 'synonyms': ['milk_can'], 'id': 690, 'def': 'can for transporting milk', 'name': 'milk_can'}, {'frequency': 'r', 'synset': 'milkshake.n.01', 'synonyms': ['milkshake'], 'id': 691, 'def': 'frothy drink of milk and flavoring and sometimes fruit or ice cream', 'name': 'milkshake'}, {'frequency': 'f', 'synset': 'minivan.n.01', 'synonyms': ['minivan'], 'id': 692, 'def': 'a small box-shaped passenger van', 'name': 'minivan'}, {'frequency': 'r', 'synset': 'mint.n.05', 'synonyms': ['mint_candy'], 'id': 693, 'def': 'a candy that is flavored with a mint oil', 'name': 'mint_candy'}, {'frequency': 'f', 'synset': 'mirror.n.01', 'synonyms': ['mirror'], 'id': 694, 'def': 'polished surface that forms images by reflecting light', 'name': 'mirror'}, {'frequency': 'c', 'synset': 'mitten.n.01', 'synonyms': ['mitten'], 'id': 695, 'def': 'glove that encases the thumb separately and the other four fingers together', 'name': 'mitten'}, {'frequency': 'c', 'synset': 'mixer.n.04', 'synonyms': ['mixer_(kitchen_tool)', 'stand_mixer'], 'id': 696, 'def': 'a kitchen utensil that is used for mixing foods', 'name': 'mixer_(kitchen_tool)'}, {'frequency': 'c', 'synset': 'money.n.03', 'synonyms': ['money'], 'id': 697, 'def': 'the official currency issued by a government or national bank', 'name': 'money'}, {'frequency': 'f', 'synset': 'monitor.n.04', 'synonyms': ['monitor_(computer_equipment) computer_monitor'], 'id': 698, 'def': 'a computer monitor', 'name': 'monitor_(computer_equipment) computer_monitor'}, {'frequency': 'c', 'synset': 'monkey.n.01', 'synonyms': ['monkey'], 'id': 699, 'def': 'any of various long-tailed primates', 'name': 'monkey'}, {'frequency': 'f', 'synset': 'motor.n.01', 'synonyms': ['motor'], 'id': 700, 'def': 'machine that converts other forms of energy into mechanical energy and so imparts motion', 'name': 'motor'}, {'frequency': 'f', 'synset': 'motor_scooter.n.01', 'synonyms': ['motor_scooter', 'scooter'], 'id': 701, 'def': 'a wheeled vehicle with small wheels and a low-powered engine', 'name': 'motor_scooter'}, {'frequency': 'r', 'synset': 'motor_vehicle.n.01', 'synonyms': ['motor_vehicle', 'automotive_vehicle'], 'id': 702, 'def': 'a self-propelled wheeled vehicle that does not run on rails', 'name': 'motor_vehicle'}, {'frequency': 'f', 'synset': 'motorcycle.n.01', 'synonyms': ['motorcycle'], 'id': 703, 'def': 'a motor vehicle with two wheels and a strong frame', 'name': 'motorcycle'}, {'frequency': 'f', 'synset': 'mound.n.01', 'synonyms': ['mound_(baseball)', "pitcher's_mound"], 'id': 704, 'def': '(baseball) the slight elevation on which the pitcher stands', 'name': 'mound_(baseball)'}, {'frequency': 'f', 'synset': 'mouse.n.04', 'synonyms': ['mouse_(computer_equipment)', 'computer_mouse'], 'id': 705, 'def': 'a computer input device that controls an on-screen pointer (does not include trackpads / touchpads)', 'name': 'mouse_(computer_equipment)'}, {'frequency': 'f', 'synset': 'mousepad.n.01', 'synonyms': ['mousepad'], 'id': 706, 'def': 'a small portable pad that provides an operating surface for a computer mouse', 'name': 'mousepad'}, {'frequency': 'c', 'synset': 'muffin.n.01', 'synonyms': ['muffin'], 'id': 707, 'def': 'a sweet quick bread baked in a cup-shaped pan', 'name': 'muffin'}, {'frequency': 'f', 'synset': 'mug.n.04', 'synonyms': ['mug'], 'id': 708, 'def': 'with handle and usually cylindrical', 'name': 'mug'}, {'frequency': 'f', 'synset': 'mushroom.n.02', 'synonyms': ['mushroom'], 'id': 709, 'def': 'a common mushroom', 'name': 'mushroom'}, {'frequency': 'r', 'synset': 'music_stool.n.01', 'synonyms': ['music_stool', 'piano_stool'], 'id': 710, 'def': 'a stool for piano players; usually adjustable in height', 'name': 'music_stool'}, {'frequency': 'c', 'synset': 'musical_instrument.n.01', 'synonyms': ['musical_instrument', 'instrument_(musical)'], 'id': 711, 'def': 'any of various devices or contrivances that can be used to produce musical tones or sounds', 'name': 'musical_instrument'}, {'frequency': 'r', 'synset': 'nailfile.n.01', 'synonyms': ['nailfile'], 'id': 712, 'def': 'a small flat file for shaping the nails', 'name': 'nailfile'}, {'frequency': 'f', 'synset': 'napkin.n.01', 'synonyms': ['napkin', 'table_napkin', 'serviette'], 'id': 713, 'def': 'a small piece of table linen or paper that is used to wipe the mouth and to cover the lap in order to protect clothing', 'name': 'napkin'}, {'frequency': 'r', 'synset': 'neckerchief.n.01', 'synonyms': ['neckerchief'], 'id': 714, 'def': 'a kerchief worn around the neck', 'name': 'neckerchief'}, {'frequency': 'f', 'synset': 'necklace.n.01', 'synonyms': ['necklace'], 'id': 715, 'def': 'jewelry consisting of a cord or chain (often bearing gems) worn about the neck as an ornament', 'name': 'necklace'}, {'frequency': 'f', 'synset': 'necktie.n.01', 'synonyms': ['necktie', 'tie_(necktie)'], 'id': 716, 'def': 'neckwear consisting of a long narrow piece of material worn under a collar and tied in knot at the front', 'name': 'necktie'}, {'frequency': 'c', 'synset': 'needle.n.03', 'synonyms': ['needle'], 'id': 717, 'def': 'a sharp pointed implement (usually metal)', 'name': 'needle'}, {'frequency': 'c', 'synset': 'nest.n.01', 'synonyms': ['nest'], 'id': 718, 'def': 'a structure in which animals lay eggs or give birth to their young', 'name': 'nest'}, {'frequency': 'f', 'synset': 'newspaper.n.01', 'synonyms': ['newspaper', 'paper_(newspaper)'], 'id': 719, 'def': 'a daily or weekly publication on folded sheets containing news, articles, and advertisements', 'name': 'newspaper'}, {'frequency': 'c', 'synset': 'newsstand.n.01', 'synonyms': ['newsstand'], 'id': 720, 'def': 'a stall where newspapers and other periodicals are sold', 'name': 'newsstand'}, {'frequency': 'c', 'synset': 'nightwear.n.01', 'synonyms': ['nightshirt', 'nightwear', 'sleepwear', 'nightclothes'], 'id': 721, 'def': 'garments designed to be worn in bed', 'name': 'nightshirt'}, {'frequency': 'r', 'synset': 'nosebag.n.01', 'synonyms': ['nosebag_(for_animals)', 'feedbag'], 'id': 722, 'def': 'a canvas bag that is used to feed an animal (such as a horse); covers the muzzle and fastens at the top of the head', 'name': 'nosebag_(for_animals)'}, {'frequency': 'c', 'synset': 'noseband.n.01', 'synonyms': ['noseband_(for_animals)', 'nosepiece_(for_animals)'], 'id': 723, 'def': "a strap that is the part of a bridle that goes over the animal's nose", 'name': 'noseband_(for_animals)'}, {'frequency': 'f', 'synset': 'notebook.n.01', 'synonyms': ['notebook'], 'id': 724, 'def': 'a book with blank pages for recording notes or memoranda', 'name': 'notebook'}, {'frequency': 'c', 'synset': 'notepad.n.01', 'synonyms': ['notepad'], 'id': 725, 'def': 'a pad of paper for keeping notes', 'name': 'notepad'}, {'frequency': 'f', 'synset': 'nut.n.03', 'synonyms': ['nut'], 'id': 726, 'def': 'a small metal block (usually square or hexagonal) with internal screw thread to be fitted onto a bolt', 'name': 'nut'}, {'frequency': 'r', 'synset': 'nutcracker.n.01', 'synonyms': ['nutcracker'], 'id': 727, 'def': 'a hand tool used to crack nuts open', 'name': 'nutcracker'}, {'frequency': 'f', 'synset': 'oar.n.01', 'synonyms': ['oar'], 'id': 728, 'def': 'an implement used to propel or steer a boat', 'name': 'oar'}, {'frequency': 'r', 'synset': 'octopus.n.01', 'synonyms': ['octopus_(food)'], 'id': 729, 'def': 'tentacles of octopus prepared as food', 'name': 'octopus_(food)'}, {'frequency': 'r', 'synset': 'octopus.n.02', 'synonyms': ['octopus_(animal)'], 'id': 730, 'def': 'bottom-living cephalopod having a soft oval body with eight long tentacles', 'name': 'octopus_(animal)'}, {'frequency': 'c', 'synset': 'oil_lamp.n.01', 'synonyms': ['oil_lamp', 'kerosene_lamp', 'kerosine_lamp'], 'id': 731, 'def': 'a lamp that burns oil (as kerosine) for light', 'name': 'oil_lamp'}, {'frequency': 'c', 'synset': 'olive_oil.n.01', 'synonyms': ['olive_oil'], 'id': 732, 'def': 'oil from olives', 'name': 'olive_oil'}, {'frequency': 'r', 'synset': 'omelet.n.01', 'synonyms': ['omelet', 'omelette'], 'id': 733, 'def': 'beaten eggs cooked until just set; may be folded around e.g. ham or cheese or jelly', 'name': 'omelet'}, {'frequency': 'f', 'synset': 'onion.n.01', 'synonyms': ['onion'], 'id': 734, 'def': 'the bulb of an onion plant', 'name': 'onion'}, {'frequency': 'f', 'synset': 'orange.n.01', 'synonyms': ['orange_(fruit)'], 'id': 735, 'def': 'orange (FRUIT of an orange tree)', 'name': 'orange_(fruit)'}, {'frequency': 'c', 'synset': 'orange_juice.n.01', 'synonyms': ['orange_juice'], 'id': 736, 'def': 'bottled or freshly squeezed juice of oranges', 'name': 'orange_juice'}, {'frequency': 'c', 'synset': 'ostrich.n.02', 'synonyms': ['ostrich'], 'id': 737, 'def': 'fast-running African flightless bird with two-toed feet; largest living bird', 'name': 'ostrich'}, {'frequency': 'f', 'synset': 'ottoman.n.03', 'synonyms': ['ottoman', 'pouf', 'pouffe', 'hassock'], 'id': 738, 'def': 'a thick standalone cushion used as a seat or footrest, often next to a chair', 'name': 'ottoman'}, {'frequency': 'f', 'synset': 'oven.n.01', 'synonyms': ['oven'], 'id': 739, 'def': 'kitchen appliance used for baking or roasting', 'name': 'oven'}, {'frequency': 'c', 'synset': 'overall.n.01', 'synonyms': ['overalls_(clothing)'], 'id': 740, 'def': 'work clothing consisting of denim trousers usually with a bib and shoulder straps', 'name': 'overalls_(clothing)'}, {'frequency': 'c', 'synset': 'owl.n.01', 'synonyms': ['owl'], 'id': 741, 'def': 'nocturnal bird of prey with hawk-like beak and claws and large head with front-facing eyes', 'name': 'owl'}, {'frequency': 'c', 'synset': 'packet.n.03', 'synonyms': ['packet'], 'id': 742, 'def': 'a small package or bundle', 'name': 'packet'}, {'frequency': 'r', 'synset': 'pad.n.03', 'synonyms': ['inkpad', 'inking_pad', 'stamp_pad'], 'id': 743, 'def': 'absorbent material saturated with ink used to transfer ink evenly to a rubber stamp', 'name': 'inkpad'}, {'frequency': 'c', 'synset': 'pad.n.04', 'synonyms': ['pad'], 'id': 744, 'def': 'mostly arm/knee pads labeled', 'name': 'pad'}, {'frequency': 'f', 'synset': 'paddle.n.04', 'synonyms': ['paddle', 'boat_paddle'], 'id': 745, 'def': 'a short light oar used without an oarlock to propel a canoe or small boat', 'name': 'paddle'}, {'frequency': 'c', 'synset': 'padlock.n.01', 'synonyms': ['padlock'], 'id': 746, 'def': 'a detachable, portable lock', 'name': 'padlock'}, {'frequency': 'c', 'synset': 'paintbrush.n.01', 'synonyms': ['paintbrush'], 'id': 747, 'def': 'a brush used as an applicator to apply paint', 'name': 'paintbrush'}, {'frequency': 'f', 'synset': 'painting.n.01', 'synonyms': ['painting'], 'id': 748, 'def': 'graphic art consisting of an artistic composition made by applying paints to a surface', 'name': 'painting'}, {'frequency': 'f', 'synset': 'pajama.n.02', 'synonyms': ['pajamas', 'pyjamas'], 'id': 749, 'def': 'loose-fitting nightclothes worn for sleeping or lounging', 'name': 'pajamas'}, {'frequency': 'c', 'synset': 'palette.n.02', 'synonyms': ['palette', 'pallet'], 'id': 750, 'def': 'board that provides a flat surface on which artists mix paints and the range of colors used', 'name': 'palette'}, {'frequency': 'f', 'synset': 'pan.n.01', 'synonyms': ['pan_(for_cooking)', 'cooking_pan'], 'id': 751, 'def': 'cooking utensil consisting of a wide metal vessel', 'name': 'pan_(for_cooking)'}, {'frequency': 'r', 'synset': 'pan.n.03', 'synonyms': ['pan_(metal_container)'], 'id': 752, 'def': 'shallow container made of metal', 'name': 'pan_(metal_container)'}, {'frequency': 'c', 'synset': 'pancake.n.01', 'synonyms': ['pancake'], 'id': 753, 'def': 'a flat cake of thin batter fried on both sides on a griddle', 'name': 'pancake'}, {'frequency': 'r', 'synset': 'pantyhose.n.01', 'synonyms': ['pantyhose'], 'id': 754, 'def': "a woman's tights consisting of underpants and stockings", 'name': 'pantyhose'}, {'frequency': 'r', 'synset': 'papaya.n.02', 'synonyms': ['papaya'], 'id': 755, 'def': 'large oval melon-like tropical fruit with yellowish flesh', 'name': 'papaya'}, {'frequency': 'f', 'synset': 'paper_plate.n.01', 'synonyms': ['paper_plate'], 'id': 756, 'def': 'a disposable plate made of cardboard', 'name': 'paper_plate'}, {'frequency': 'f', 'synset': 'paper_towel.n.01', 'synonyms': ['paper_towel'], 'id': 757, 'def': 'a disposable towel made of absorbent paper', 'name': 'paper_towel'}, {'frequency': 'r', 'synset': 'paperback_book.n.01', 'synonyms': ['paperback_book', 'paper-back_book', 'softback_book', 'soft-cover_book'], 'id': 758, 'def': 'a book with paper covers', 'name': 'paperback_book'}, {'frequency': 'r', 'synset': 'paperweight.n.01', 'synonyms': ['paperweight'], 'id': 759, 'def': 'a weight used to hold down a stack of papers', 'name': 'paperweight'}, {'frequency': 'c', 'synset': 'parachute.n.01', 'synonyms': ['parachute'], 'id': 760, 'def': 'rescue equipment consisting of a device that fills with air and retards your fall', 'name': 'parachute'}, {'frequency': 'c', 'synset': 'parakeet.n.01', 'synonyms': ['parakeet', 'parrakeet', 'parroket', 'paraquet', 'paroquet', 'parroquet'], 'id': 761, 'def': 'any of numerous small slender long-tailed parrots', 'name': 'parakeet'}, {'frequency': 'c', 'synset': 'parasail.n.01', 'synonyms': ['parasail_(sports)'], 'id': 762, 'def': 'parachute that will lift a person up into the air when it is towed by a motorboat or a car', 'name': 'parasail_(sports)'}, {'frequency': 'c', 'synset': 'parasol.n.01', 'synonyms': ['parasol', 'sunshade'], 'id': 763, 'def': 'a handheld collapsible source of shade', 'name': 'parasol'}, {'frequency': 'r', 'synset': 'parchment.n.01', 'synonyms': ['parchment'], 'id': 764, 'def': 'a superior paper resembling sheepskin', 'name': 'parchment'}, {'frequency': 'c', 'synset': 'parka.n.01', 'synonyms': ['parka', 'anorak'], 'id': 765, 'def': "a kind of heavy jacket (`windcheater' is a British term)", 'name': 'parka'}, {'frequency': 'f', 'synset': 'parking_meter.n.01', 'synonyms': ['parking_meter'], 'id': 766, 'def': 'a coin-operated timer located next to a parking space', 'name': 'parking_meter'}, {'frequency': 'c', 'synset': 'parrot.n.01', 'synonyms': ['parrot'], 'id': 767, 'def': 'usually brightly colored tropical birds with short hooked beaks and the ability to mimic sounds', 'name': 'parrot'}, {'frequency': 'c', 'synset': 'passenger_car.n.01', 'synonyms': ['passenger_car_(part_of_a_train)', 'coach_(part_of_a_train)'], 'id': 768, 'def': 'a railcar where passengers ride', 'name': 'passenger_car_(part_of_a_train)'}, {'frequency': 'r', 'synset': 'passenger_ship.n.01', 'synonyms': ['passenger_ship'], 'id': 769, 'def': 'a ship built to carry passengers', 'name': 'passenger_ship'}, {'frequency': 'c', 'synset': 'passport.n.02', 'synonyms': ['passport'], 'id': 770, 'def': 'a document issued by a country to a citizen allowing that person to travel abroad and re-enter the home country', 'name': 'passport'}, {'frequency': 'f', 'synset': 'pastry.n.02', 'synonyms': ['pastry'], 'id': 771, 'def': 'any of various baked foods made of dough or batter', 'name': 'pastry'}, {'frequency': 'r', 'synset': 'patty.n.01', 'synonyms': ['patty_(food)'], 'id': 772, 'def': 'small flat mass of chopped food', 'name': 'patty_(food)'}, {'frequency': 'c', 'synset': 'pea.n.01', 'synonyms': ['pea_(food)'], 'id': 773, 'def': 'seed of a pea plant used for food', 'name': 'pea_(food)'}, {'frequency': 'c', 'synset': 'peach.n.03', 'synonyms': ['peach'], 'id': 774, 'def': 'downy juicy fruit with sweet yellowish or whitish flesh', 'name': 'peach'}, {'frequency': 'c', 'synset': 'peanut_butter.n.01', 'synonyms': ['peanut_butter'], 'id': 775, 'def': 'a spread made from ground peanuts', 'name': 'peanut_butter'}, {'frequency': 'f', 'synset': 'pear.n.01', 'synonyms': ['pear'], 'id': 776, 'def': 'sweet juicy gritty-textured fruit available in many varieties', 'name': 'pear'}, {'frequency': 'c', 'synset': 'peeler.n.03', 'synonyms': ['peeler_(tool_for_fruit_and_vegetables)'], 'id': 777, 'def': 'a device for peeling vegetables or fruits', 'name': 'peeler_(tool_for_fruit_and_vegetables)'}, {'frequency': 'r', 'synset': 'peg.n.04', 'synonyms': ['wooden_leg', 'pegleg'], 'id': 778, 'def': 'a prosthesis that replaces a missing leg', 'name': 'wooden_leg'}, {'frequency': 'r', 'synset': 'pegboard.n.01', 'synonyms': ['pegboard'], 'id': 779, 'def': 'a board perforated with regularly spaced holes into which pegs can be fitted', 'name': 'pegboard'}, {'frequency': 'c', 'synset': 'pelican.n.01', 'synonyms': ['pelican'], 'id': 780, 'def': 'large long-winged warm-water seabird having a large bill with a distensible pouch for fish', 'name': 'pelican'}, {'frequency': 'f', 'synset': 'pen.n.01', 'synonyms': ['pen'], 'id': 781, 'def': 'a writing implement with a point from which ink flows', 'name': 'pen'}, {'frequency': 'f', 'synset': 'pencil.n.01', 'synonyms': ['pencil'], 'id': 782, 'def': 'a thin cylindrical pointed writing implement made of wood and graphite', 'name': 'pencil'}, {'frequency': 'r', 'synset': 'pencil_box.n.01', 'synonyms': ['pencil_box', 'pencil_case'], 'id': 783, 'def': 'a box for holding pencils', 'name': 'pencil_box'}, {'frequency': 'r', 'synset': 'pencil_sharpener.n.01', 'synonyms': ['pencil_sharpener'], 'id': 784, 'def': 'a rotary implement for sharpening the point on pencils', 'name': 'pencil_sharpener'}, {'frequency': 'r', 'synset': 'pendulum.n.01', 'synonyms': ['pendulum'], 'id': 785, 'def': 'an apparatus consisting of an object mounted so that it swings freely under the influence of gravity', 'name': 'pendulum'}, {'frequency': 'c', 'synset': 'penguin.n.01', 'synonyms': ['penguin'], 'id': 786, 'def': 'short-legged flightless birds of cold southern regions having webbed feet and wings modified as flippers', 'name': 'penguin'}, {'frequency': 'r', 'synset': 'pennant.n.02', 'synonyms': ['pennant'], 'id': 787, 'def': 'a flag longer than it is wide (and often tapering)', 'name': 'pennant'}, {'frequency': 'r', 'synset': 'penny.n.02', 'synonyms': ['penny_(coin)'], 'id': 788, 'def': 'a coin worth one-hundredth of the value of the basic unit', 'name': 'penny_(coin)'}, {'frequency': 'f', 'synset': 'pepper.n.03', 'synonyms': ['pepper', 'peppercorn'], 'id': 789, 'def': 'pungent seasoning from the berry of the common pepper plant; whole or ground', 'name': 'pepper'}, {'frequency': 'c', 'synset': 'pepper_mill.n.01', 'synonyms': ['pepper_mill', 'pepper_grinder'], 'id': 790, 'def': 'a mill for grinding pepper', 'name': 'pepper_mill'}, {'frequency': 'c', 'synset': 'perfume.n.02', 'synonyms': ['perfume'], 'id': 791, 'def': 'a toiletry that emits and diffuses a fragrant odor', 'name': 'perfume'}, {'frequency': 'r', 'synset': 'persimmon.n.02', 'synonyms': ['persimmon'], 'id': 792, 'def': 'orange fruit resembling a plum; edible when fully ripe', 'name': 'persimmon'}, {'frequency': 'f', 'synset': 'person.n.01', 'synonyms': ['person', 'baby', 'child', 'boy', 'girl', 'man', 'woman', 'human'], 'id': 793, 'def': 'a human being', 'name': 'person'}, {'frequency': 'c', 'synset': 'pet.n.01', 'synonyms': ['pet'], 'id': 794, 'def': 'a domesticated animal kept for companionship or amusement', 'name': 'pet'}, {'frequency': 'c', 'synset': 'pew.n.01', 'synonyms': ['pew_(church_bench)', 'church_bench'], 'id': 795, 'def': 'long bench with backs; used in church by the congregation', 'name': 'pew_(church_bench)'}, {'frequency': 'r', 'synset': 'phonebook.n.01', 'synonyms': ['phonebook', 'telephone_book', 'telephone_directory'], 'id': 796, 'def': 'a directory containing an alphabetical list of telephone subscribers and their telephone numbers', 'name': 'phonebook'}, {'frequency': 'c', 'synset': 'phonograph_record.n.01', 'synonyms': ['phonograph_record', 'phonograph_recording', 'record_(phonograph_recording)'], 'id': 797, 'def': 'sound recording consisting of a typically black disk with a continuous groove', 'name': 'phonograph_record'}, {'frequency': 'f', 'synset': 'piano.n.01', 'synonyms': ['piano'], 'id': 798, 'def': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'name': 'piano'}, {'frequency': 'f', 'synset': 'pickle.n.01', 'synonyms': ['pickle'], 'id': 799, 'def': 'vegetables (especially cucumbers) preserved in brine or vinegar', 'name': 'pickle'}, {'frequency': 'f', 'synset': 'pickup.n.01', 'synonyms': ['pickup_truck'], 'id': 800, 'def': 'a light truck with an open body and low sides and a tailboard', 'name': 'pickup_truck'}, {'frequency': 'c', 'synset': 'pie.n.01', 'synonyms': ['pie'], 'id': 801, 'def': 'dish baked in pastry-lined pan often with a pastry top', 'name': 'pie'}, {'frequency': 'c', 'synset': 'pigeon.n.01', 'synonyms': ['pigeon'], 'id': 802, 'def': 'wild and domesticated birds having a heavy body and short legs', 'name': 'pigeon'}, {'frequency': 'r', 'synset': 'piggy_bank.n.01', 'synonyms': ['piggy_bank', 'penny_bank'], 'id': 803, 'def': "a child's coin bank (often shaped like a pig)", 'name': 'piggy_bank'}, {'frequency': 'f', 'synset': 'pillow.n.01', 'synonyms': ['pillow'], 'id': 804, 'def': 'a cushion to support the head of a sleeping person', 'name': 'pillow'}, {'frequency': 'r', 'synset': 'pin.n.09', 'synonyms': ['pin_(non_jewelry)'], 'id': 805, 'def': 'a small slender (often pointed) piece of wood or metal used to support or fasten or attach things', 'name': 'pin_(non_jewelry)'}, {'frequency': 'f', 'synset': 'pineapple.n.02', 'synonyms': ['pineapple'], 'id': 806, 'def': 'large sweet fleshy tropical fruit with a tuft of stiff leaves', 'name': 'pineapple'}, {'frequency': 'c', 'synset': 'pinecone.n.01', 'synonyms': ['pinecone'], 'id': 807, 'def': 'the seed-producing cone of a pine tree', 'name': 'pinecone'}, {'frequency': 'r', 'synset': 'ping-pong_ball.n.01', 'synonyms': ['ping-pong_ball'], 'id': 808, 'def': 'light hollow ball used in playing table tennis', 'name': 'ping-pong_ball'}, {'frequency': 'r', 'synset': 'pinwheel.n.03', 'synonyms': ['pinwheel'], 'id': 809, 'def': 'a toy consisting of vanes of colored paper or plastic that is pinned to a stick and spins when it is pointed into the wind', 'name': 'pinwheel'}, {'frequency': 'r', 'synset': 'pipe.n.01', 'synonyms': ['tobacco_pipe'], 'id': 810, 'def': 'a tube with a small bowl at one end; used for smoking tobacco', 'name': 'tobacco_pipe'}, {'frequency': 'f', 'synset': 'pipe.n.02', 'synonyms': ['pipe', 'piping'], 'id': 811, 'def': 'a long tube made of metal or plastic that is used to carry water or oil or gas etc.', 'name': 'pipe'}, {'frequency': 'r', 'synset': 'pistol.n.01', 'synonyms': ['pistol', 'handgun'], 'id': 812, 'def': 'a firearm that is held and fired with one hand', 'name': 'pistol'}, {'frequency': 'c', 'synset': 'pita.n.01', 'synonyms': ['pita_(bread)', 'pocket_bread'], 'id': 813, 'def': 'usually small round bread that can open into a pocket for filling', 'name': 'pita_(bread)'}, {'frequency': 'f', 'synset': 'pitcher.n.02', 'synonyms': ['pitcher_(vessel_for_liquid)', 'ewer'], 'id': 814, 'def': 'an open vessel with a handle and a spout for pouring', 'name': 'pitcher_(vessel_for_liquid)'}, {'frequency': 'r', 'synset': 'pitchfork.n.01', 'synonyms': ['pitchfork'], 'id': 815, 'def': 'a long-handled hand tool with sharp widely spaced prongs for lifting and pitching hay', 'name': 'pitchfork'}, {'frequency': 'f', 'synset': 'pizza.n.01', 'synonyms': ['pizza'], 'id': 816, 'def': 'Italian open pie made of thin bread dough spread with a spiced mixture of e.g. tomato sauce and cheese', 'name': 'pizza'}, {'frequency': 'f', 'synset': 'place_mat.n.01', 'synonyms': ['place_mat'], 'id': 817, 'def': 'a mat placed on a table for an individual place setting', 'name': 'place_mat'}, {'frequency': 'f', 'synset': 'plate.n.04', 'synonyms': ['plate'], 'id': 818, 'def': 'dish on which food is served or from which food is eaten', 'name': 'plate'}, {'frequency': 'c', 'synset': 'platter.n.01', 'synonyms': ['platter'], 'id': 819, 'def': 'a large shallow dish used for serving food', 'name': 'platter'}, {'frequency': 'r', 'synset': 'playpen.n.01', 'synonyms': ['playpen'], 'id': 820, 'def': 'a portable enclosure in which babies may be left to play', 'name': 'playpen'}, {'frequency': 'c', 'synset': 'pliers.n.01', 'synonyms': ['pliers', 'plyers'], 'id': 821, 'def': 'a gripping hand tool with two hinged arms and (usually) serrated jaws', 'name': 'pliers'}, {'frequency': 'r', 'synset': 'plow.n.01', 'synonyms': ['plow_(farm_equipment)', 'plough_(farm_equipment)'], 'id': 822, 'def': 'a farm tool having one or more heavy blades to break the soil and cut a furrow prior to sowing', 'name': 'plow_(farm_equipment)'}, {'frequency': 'r', 'synset': 'plume.n.02', 'synonyms': ['plume'], 'id': 823, 'def': 'a feather or cluster of feathers worn as an ornament', 'name': 'plume'}, {'frequency': 'r', 'synset': 'pocket_watch.n.01', 'synonyms': ['pocket_watch'], 'id': 824, 'def': 'a watch that is carried in a small watch pocket', 'name': 'pocket_watch'}, {'frequency': 'c', 'synset': 'pocketknife.n.01', 'synonyms': ['pocketknife'], 'id': 825, 'def': 'a knife with a blade that folds into the handle; suitable for carrying in the pocket', 'name': 'pocketknife'}, {'frequency': 'c', 'synset': 'poker.n.01', 'synonyms': ['poker_(fire_stirring_tool)', 'stove_poker', 'fire_hook'], 'id': 826, 'def': 'fire iron consisting of a metal rod with a handle; used to stir a fire', 'name': 'poker_(fire_stirring_tool)'}, {'frequency': 'f', 'synset': 'pole.n.01', 'synonyms': ['pole', 'post'], 'id': 827, 'def': 'a long (usually round) rod of wood or metal or plastic', 'name': 'pole'}, {'frequency': 'f', 'synset': 'polo_shirt.n.01', 'synonyms': ['polo_shirt', 'sport_shirt'], 'id': 828, 'def': 'a shirt with short sleeves designed for comfort and casual wear', 'name': 'polo_shirt'}, {'frequency': 'r', 'synset': 'poncho.n.01', 'synonyms': ['poncho'], 'id': 829, 'def': 'a blanket-like cloak with a hole in the center for the head', 'name': 'poncho'}, {'frequency': 'c', 'synset': 'pony.n.05', 'synonyms': ['pony'], 'id': 830, 'def': 'any of various breeds of small gentle horses usually less than five feet high at the shoulder', 'name': 'pony'}, {'frequency': 'r', 'synset': 'pool_table.n.01', 'synonyms': ['pool_table', 'billiard_table', 'snooker_table'], 'id': 831, 'def': 'game equipment consisting of a heavy table on which pool is played', 'name': 'pool_table'}, {'frequency': 'f', 'synset': 'pop.n.02', 'synonyms': ['pop_(soda)', 'soda_(pop)', 'tonic', 'soft_drink'], 'id': 832, 'def': 'a sweet drink containing carbonated water and flavoring', 'name': 'pop_(soda)'}, {'frequency': 'c', 'synset': 'postbox.n.01', 'synonyms': ['postbox_(public)', 'mailbox_(public)'], 'id': 833, 'def': 'public box for deposit of mail', 'name': 'postbox_(public)'}, {'frequency': 'c', 'synset': 'postcard.n.01', 'synonyms': ['postcard', 'postal_card', 'mailing-card'], 'id': 834, 'def': 'a card for sending messages by post without an envelope', 'name': 'postcard'}, {'frequency': 'f', 'synset': 'poster.n.01', 'synonyms': ['poster', 'placard'], 'id': 835, 'def': 'a sign posted in a public place as an advertisement', 'name': 'poster'}, {'frequency': 'f', 'synset': 'pot.n.01', 'synonyms': ['pot'], 'id': 836, 'def': 'metal or earthenware cooking vessel that is usually round and deep; often has a handle and lid', 'name': 'pot'}, {'frequency': 'f', 'synset': 'pot.n.04', 'synonyms': ['flowerpot'], 'id': 837, 'def': 'a container in which plants are cultivated', 'name': 'flowerpot'}, {'frequency': 'f', 'synset': 'potato.n.01', 'synonyms': ['potato'], 'id': 838, 'def': 'an edible tuber native to South America', 'name': 'potato'}, {'frequency': 'c', 'synset': 'potholder.n.01', 'synonyms': ['potholder'], 'id': 839, 'def': 'an insulated pad for holding hot pots', 'name': 'potholder'}, {'frequency': 'c', 'synset': 'pottery.n.01', 'synonyms': ['pottery', 'clayware'], 'id': 840, 'def': 'ceramic ware made from clay and baked in a kiln', 'name': 'pottery'}, {'frequency': 'c', 'synset': 'pouch.n.01', 'synonyms': ['pouch'], 'id': 841, 'def': 'a small or medium size container for holding or carrying things', 'name': 'pouch'}, {'frequency': 'c', 'synset': 'power_shovel.n.01', 'synonyms': ['power_shovel', 'excavator', 'digger'], 'id': 842, 'def': 'a machine for excavating', 'name': 'power_shovel'}, {'frequency': 'c', 'synset': 'prawn.n.01', 'synonyms': ['prawn', 'shrimp'], 'id': 843, 'def': 'any of various edible decapod crustaceans', 'name': 'prawn'}, {'frequency': 'c', 'synset': 'pretzel.n.01', 'synonyms': ['pretzel'], 'id': 844, 'def': 'glazed and salted cracker typically in the shape of a loose knot', 'name': 'pretzel'}, {'frequency': 'f', 'synset': 'printer.n.03', 'synonyms': ['printer', 'printing_machine'], 'id': 845, 'def': 'a machine that prints', 'name': 'printer'}, {'frequency': 'c', 'synset': 'projectile.n.01', 'synonyms': ['projectile_(weapon)', 'missile'], 'id': 846, 'def': 'a weapon that is forcibly thrown or projected at a targets', 'name': 'projectile_(weapon)'}, {'frequency': 'c', 'synset': 'projector.n.02', 'synonyms': ['projector'], 'id': 847, 'def': 'an optical instrument that projects an enlarged image onto a screen', 'name': 'projector'}, {'frequency': 'f', 'synset': 'propeller.n.01', 'synonyms': ['propeller', 'propellor'], 'id': 848, 'def': 'a mechanical device that rotates to push against air or water', 'name': 'propeller'}, {'frequency': 'r', 'synset': 'prune.n.01', 'synonyms': ['prune'], 'id': 849, 'def': 'dried plum', 'name': 'prune'}, {'frequency': 'r', 'synset': 'pudding.n.01', 'synonyms': ['pudding'], 'id': 850, 'def': 'any of various soft thick unsweetened baked dishes', 'name': 'pudding'}, {'frequency': 'r', 'synset': 'puffer.n.02', 'synonyms': ['puffer_(fish)', 'pufferfish', 'blowfish', 'globefish'], 'id': 851, 'def': 'fishes whose elongated spiny body can inflate itself with water or air to form a globe', 'name': 'puffer_(fish)'}, {'frequency': 'r', 'synset': 'puffin.n.01', 'synonyms': ['puffin'], 'id': 852, 'def': 'seabirds having short necks and brightly colored compressed bills', 'name': 'puffin'}, {'frequency': 'r', 'synset': 'pug.n.01', 'synonyms': ['pug-dog'], 'id': 853, 'def': 'small compact smooth-coated breed of Asiatic origin having a tightly curled tail and broad flat wrinkled muzzle', 'name': 'pug-dog'}, {'frequency': 'c', 'synset': 'pumpkin.n.02', 'synonyms': ['pumpkin'], 'id': 854, 'def': 'usually large pulpy deep-yellow round fruit of the squash family maturing in late summer or early autumn', 'name': 'pumpkin'}, {'frequency': 'r', 'synset': 'punch.n.03', 'synonyms': ['puncher'], 'id': 855, 'def': 'a tool for making holes or indentations', 'name': 'puncher'}, {'frequency': 'r', 'synset': 'puppet.n.01', 'synonyms': ['puppet', 'marionette'], 'id': 856, 'def': 'a small figure of a person operated from above with strings by a puppeteer', 'name': 'puppet'}, {'frequency': 'c', 'synset': 'puppy.n.01', 'synonyms': ['puppy'], 'id': 857, 'def': 'a young dog', 'name': 'puppy'}, {'frequency': 'r', 'synset': 'quesadilla.n.01', 'synonyms': ['quesadilla'], 'id': 858, 'def': 'a tortilla that is filled with cheese and heated', 'name': 'quesadilla'}, {'frequency': 'r', 'synset': 'quiche.n.02', 'synonyms': ['quiche'], 'id': 859, 'def': 'a tart filled with rich unsweetened custard; often contains other ingredients (as cheese or ham or seafood or vegetables)', 'name': 'quiche'}, {'frequency': 'f', 'synset': 'quilt.n.01', 'synonyms': ['quilt', 'comforter'], 'id': 860, 'def': 'bedding made of two layers of cloth filled with stuffing and stitched together', 'name': 'quilt'}, {'frequency': 'c', 'synset': 'rabbit.n.01', 'synonyms': ['rabbit'], 'id': 861, 'def': 'any of various burrowing animals of the family Leporidae having long ears and short tails', 'name': 'rabbit'}, {'frequency': 'r', 'synset': 'racer.n.02', 'synonyms': ['race_car', 'racing_car'], 'id': 862, 'def': 'a fast car that competes in races', 'name': 'race_car'}, {'frequency': 'c', 'synset': 'racket.n.04', 'synonyms': ['racket', 'racquet'], 'id': 863, 'def': 'a sports implement used to strike a ball in various games', 'name': 'racket'}, {'frequency': 'r', 'synset': 'radar.n.01', 'synonyms': ['radar'], 'id': 864, 'def': 'measuring instrument in which the echo of a pulse of microwave radiation is used to detect and locate distant objects', 'name': 'radar'}, {'frequency': 'f', 'synset': 'radiator.n.03', 'synonyms': ['radiator'], 'id': 865, 'def': 'a mechanism consisting of a metal honeycomb through which hot fluids circulate', 'name': 'radiator'}, {'frequency': 'c', 'synset': 'radio_receiver.n.01', 'synonyms': ['radio_receiver', 'radio_set', 'radio', 'tuner_(radio)'], 'id': 866, 'def': 'an electronic receiver that detects and demodulates and amplifies transmitted radio signals', 'name': 'radio_receiver'}, {'frequency': 'c', 'synset': 'radish.n.03', 'synonyms': ['radish', 'daikon'], 'id': 867, 'def': 'pungent edible root of any of various cultivated radish plants', 'name': 'radish'}, {'frequency': 'c', 'synset': 'raft.n.01', 'synonyms': ['raft'], 'id': 868, 'def': 'a flat float (usually made of logs or planks) that can be used for transport or as a platform for swimmers', 'name': 'raft'}, {'frequency': 'r', 'synset': 'rag_doll.n.01', 'synonyms': ['rag_doll'], 'id': 869, 'def': 'a cloth doll that is stuffed and (usually) painted', 'name': 'rag_doll'}, {'frequency': 'c', 'synset': 'raincoat.n.01', 'synonyms': ['raincoat', 'waterproof_jacket'], 'id': 870, 'def': 'a water-resistant coat', 'name': 'raincoat'}, {'frequency': 'c', 'synset': 'ram.n.05', 'synonyms': ['ram_(animal)'], 'id': 871, 'def': 'uncastrated adult male sheep', 'name': 'ram_(animal)'}, {'frequency': 'c', 'synset': 'raspberry.n.02', 'synonyms': ['raspberry'], 'id': 872, 'def': 'red or black edible aggregate berries usually smaller than the related blackberries', 'name': 'raspberry'}, {'frequency': 'r', 'synset': 'rat.n.01', 'synonyms': ['rat'], 'id': 873, 'def': 'any of various long-tailed rodents similar to but larger than a mouse', 'name': 'rat'}, {'frequency': 'c', 'synset': 'razorblade.n.01', 'synonyms': ['razorblade'], 'id': 874, 'def': 'a blade that has very sharp edge', 'name': 'razorblade'}, {'frequency': 'c', 'synset': 'reamer.n.01', 'synonyms': ['reamer_(juicer)', 'juicer', 'juice_reamer'], 'id': 875, 'def': 'a squeezer with a conical ridged center that is used for squeezing juice from citrus fruit', 'name': 'reamer_(juicer)'}, {'frequency': 'f', 'synset': 'rearview_mirror.n.01', 'synonyms': ['rearview_mirror'], 'id': 876, 'def': 'vehicle mirror (side or rearview)', 'name': 'rearview_mirror'}, {'frequency': 'c', 'synset': 'receipt.n.02', 'synonyms': ['receipt'], 'id': 877, 'def': 'an acknowledgment (usually tangible) that payment has been made', 'name': 'receipt'}, {'frequency': 'c', 'synset': 'recliner.n.01', 'synonyms': ['recliner', 'reclining_chair', 'lounger_(chair)'], 'id': 878, 'def': 'an armchair whose back can be lowered and foot can be raised to allow the sitter to recline in it', 'name': 'recliner'}, {'frequency': 'c', 'synset': 'record_player.n.01', 'synonyms': ['record_player', 'phonograph_(record_player)', 'turntable'], 'id': 879, 'def': 'machine in which rotating records cause a stylus to vibrate and the vibrations are amplified acoustically or electronically', 'name': 'record_player'}, {'frequency': 'f', 'synset': 'reflector.n.01', 'synonyms': ['reflector'], 'id': 880, 'def': 'device that reflects light, radiation, etc.', 'name': 'reflector'}, {'frequency': 'f', 'synset': 'remote_control.n.01', 'synonyms': ['remote_control'], 'id': 881, 'def': 'a device that can be used to control a machine or apparatus from a distance', 'name': 'remote_control'}, {'frequency': 'c', 'synset': 'rhinoceros.n.01', 'synonyms': ['rhinoceros'], 'id': 882, 'def': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'name': 'rhinoceros'}, {'frequency': 'r', 'synset': 'rib.n.03', 'synonyms': ['rib_(food)'], 'id': 883, 'def': 'cut of meat including one or more ribs', 'name': 'rib_(food)'}, {'frequency': 'c', 'synset': 'rifle.n.01', 'synonyms': ['rifle'], 'id': 884, 'def': 'a shoulder firearm with a long barrel', 'name': 'rifle'}, {'frequency': 'f', 'synset': 'ring.n.08', 'synonyms': ['ring'], 'id': 885, 'def': 'jewelry consisting of a circlet of precious metal (often set with jewels) worn on the finger', 'name': 'ring'}, {'frequency': 'r', 'synset': 'river_boat.n.01', 'synonyms': ['river_boat'], 'id': 886, 'def': 'a boat used on rivers or to ply a river', 'name': 'river_boat'}, {'frequency': 'r', 'synset': 'road_map.n.02', 'synonyms': ['road_map'], 'id': 887, 'def': '(NOT A ROAD) a MAP showing roads (for automobile travel)', 'name': 'road_map'}, {'frequency': 'c', 'synset': 'robe.n.01', 'synonyms': ['robe'], 'id': 888, 'def': 'any loose flowing garment', 'name': 'robe'}, {'frequency': 'c', 'synset': 'rocking_chair.n.01', 'synonyms': ['rocking_chair'], 'id': 889, 'def': 'a chair mounted on rockers', 'name': 'rocking_chair'}, {'frequency': 'r', 'synset': 'rodent.n.01', 'synonyms': ['rodent'], 'id': 890, 'def': 'relatively small placental mammals having a single pair of constantly growing incisor teeth specialized for gnawing', 'name': 'rodent'}, {'frequency': 'r', 'synset': 'roller_skate.n.01', 'synonyms': ['roller_skate'], 'id': 891, 'def': 'a shoe with pairs of rollers (small hard wheels) fixed to the sole', 'name': 'roller_skate'}, {'frequency': 'r', 'synset': 'rollerblade.n.01', 'synonyms': ['Rollerblade'], 'id': 892, 'def': 'an in-line variant of a roller skate', 'name': 'Rollerblade'}, {'frequency': 'c', 'synset': 'rolling_pin.n.01', 'synonyms': ['rolling_pin'], 'id': 893, 'def': 'utensil consisting of a cylinder (usually of wood) with a handle at each end; used to roll out dough', 'name': 'rolling_pin'}, {'frequency': 'r', 'synset': 'root_beer.n.01', 'synonyms': ['root_beer'], 'id': 894, 'def': 'carbonated drink containing extracts of roots and herbs', 'name': 'root_beer'}, {'frequency': 'c', 'synset': 'router.n.02', 'synonyms': ['router_(computer_equipment)'], 'id': 895, 'def': 'a device that forwards data packets between computer networks', 'name': 'router_(computer_equipment)'}, {'frequency': 'f', 'synset': 'rubber_band.n.01', 'synonyms': ['rubber_band', 'elastic_band'], 'id': 896, 'def': 'a narrow band of elastic rubber used to hold things (such as papers) together', 'name': 'rubber_band'}, {'frequency': 'c', 'synset': 'runner.n.08', 'synonyms': ['runner_(carpet)'], 'id': 897, 'def': 'a long narrow carpet', 'name': 'runner_(carpet)'}, {'frequency': 'f', 'synset': 'sack.n.01', 'synonyms': ['plastic_bag', 'paper_bag'], 'id': 898, 'def': "a bag made of paper or plastic for holding customer's purchases", 'name': 'plastic_bag'}, {'frequency': 'f', 'synset': 'saddle.n.01', 'synonyms': ['saddle_(on_an_animal)'], 'id': 899, 'def': 'a seat for the rider of a horse or camel', 'name': 'saddle_(on_an_animal)'}, {'frequency': 'f', 'synset': 'saddle_blanket.n.01', 'synonyms': ['saddle_blanket', 'saddlecloth', 'horse_blanket'], 'id': 900, 'def': 'stable gear consisting of a blanket placed under the saddle', 'name': 'saddle_blanket'}, {'frequency': 'c', 'synset': 'saddlebag.n.01', 'synonyms': ['saddlebag'], 'id': 901, 'def': 'a large bag (or pair of bags) hung over a saddle', 'name': 'saddlebag'}, {'frequency': 'r', 'synset': 'safety_pin.n.01', 'synonyms': ['safety_pin'], 'id': 902, 'def': 'a pin in the form of a clasp; has a guard so the point of the pin will not stick the user', 'name': 'safety_pin'}, {'frequency': 'f', 'synset': 'sail.n.01', 'synonyms': ['sail'], 'id': 903, 'def': 'a large piece of fabric by means of which wind is used to propel a sailing vessel', 'name': 'sail'}, {'frequency': 'f', 'synset': 'salad.n.01', 'synonyms': ['salad'], 'id': 904, 'def': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'name': 'salad'}, {'frequency': 'r', 'synset': 'salad_plate.n.01', 'synonyms': ['salad_plate', 'salad_bowl'], 'id': 905, 'def': 'a plate or bowl for individual servings of salad', 'name': 'salad_plate'}, {'frequency': 'c', 'synset': 'salami.n.01', 'synonyms': ['salami'], 'id': 906, 'def': 'highly seasoned fatty sausage of pork and beef usually dried', 'name': 'salami'}, {'frequency': 'c', 'synset': 'salmon.n.01', 'synonyms': ['salmon_(fish)'], 'id': 907, 'def': 'any of various large food and game fishes of northern waters', 'name': 'salmon_(fish)'}, {'frequency': 'r', 'synset': 'salmon.n.03', 'synonyms': ['salmon_(food)'], 'id': 908, 'def': 'flesh of any of various marine or freshwater fish of the family Salmonidae', 'name': 'salmon_(food)'}, {'frequency': 'c', 'synset': 'salsa.n.01', 'synonyms': ['salsa'], 'id': 909, 'def': 'spicy sauce of tomatoes and onions and chili peppers to accompany Mexican foods', 'name': 'salsa'}, {'frequency': 'f', 'synset': 'saltshaker.n.01', 'synonyms': ['saltshaker'], 'id': 910, 'def': 'a shaker with a perforated top for sprinkling salt', 'name': 'saltshaker'}, {'frequency': 'f', 'synset': 'sandal.n.01', 'synonyms': ['sandal_(type_of_shoe)'], 'id': 911, 'def': 'a shoe consisting of a sole fastened by straps to the foot', 'name': 'sandal_(type_of_shoe)'}, {'frequency': 'f', 'synset': 'sandwich.n.01', 'synonyms': ['sandwich'], 'id': 912, 'def': 'two (or more) slices of bread with a filling between them', 'name': 'sandwich'}, {'frequency': 'r', 'synset': 'satchel.n.01', 'synonyms': ['satchel'], 'id': 913, 'def': 'luggage consisting of a small case with a flat bottom and (usually) a shoulder strap', 'name': 'satchel'}, {'frequency': 'r', 'synset': 'saucepan.n.01', 'synonyms': ['saucepan'], 'id': 914, 'def': 'a deep pan with a handle; used for stewing or boiling', 'name': 'saucepan'}, {'frequency': 'f', 'synset': 'saucer.n.02', 'synonyms': ['saucer'], 'id': 915, 'def': 'a small shallow dish for holding a cup at the table', 'name': 'saucer'}, {'frequency': 'f', 'synset': 'sausage.n.01', 'synonyms': ['sausage'], 'id': 916, 'def': 'highly seasoned minced meat stuffed in casings', 'name': 'sausage'}, {'frequency': 'r', 'synset': 'sawhorse.n.01', 'synonyms': ['sawhorse', 'sawbuck'], 'id': 917, 'def': 'a framework for holding wood that is being sawed', 'name': 'sawhorse'}, {'frequency': 'r', 'synset': 'sax.n.02', 'synonyms': ['saxophone'], 'id': 918, 'def': "a wind instrument with a `J'-shaped form typically made of brass", 'name': 'saxophone'}, {'frequency': 'f', 'synset': 'scale.n.07', 'synonyms': ['scale_(measuring_instrument)'], 'id': 919, 'def': 'a measuring instrument for weighing; shows amount of mass', 'name': 'scale_(measuring_instrument)'}, {'frequency': 'r', 'synset': 'scarecrow.n.01', 'synonyms': ['scarecrow', 'strawman'], 'id': 920, 'def': 'an effigy in the shape of a man to frighten birds away from seeds', 'name': 'scarecrow'}, {'frequency': 'f', 'synset': 'scarf.n.01', 'synonyms': ['scarf'], 'id': 921, 'def': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'name': 'scarf'}, {'frequency': 'c', 'synset': 'school_bus.n.01', 'synonyms': ['school_bus'], 'id': 922, 'def': 'a bus used to transport children to or from school', 'name': 'school_bus'}, {'frequency': 'f', 'synset': 'scissors.n.01', 'synonyms': ['scissors'], 'id': 923, 'def': 'a tool having two crossed pivoting blades with looped handles', 'name': 'scissors'}, {'frequency': 'f', 'synset': 'scoreboard.n.01', 'synonyms': ['scoreboard'], 'id': 924, 'def': 'a large board for displaying the score of a contest (and some other information)', 'name': 'scoreboard'}, {'frequency': 'r', 'synset': 'scraper.n.01', 'synonyms': ['scraper'], 'id': 925, 'def': 'any of various hand tools for scraping', 'name': 'scraper'}, {'frequency': 'c', 'synset': 'screwdriver.n.01', 'synonyms': ['screwdriver'], 'id': 926, 'def': 'a hand tool for driving screws; has a tip that fits into the head of a screw', 'name': 'screwdriver'}, {'frequency': 'f', 'synset': 'scrub_brush.n.01', 'synonyms': ['scrubbing_brush'], 'id': 927, 'def': 'a brush with short stiff bristles for heavy cleaning', 'name': 'scrubbing_brush'}, {'frequency': 'c', 'synset': 'sculpture.n.01', 'synonyms': ['sculpture'], 'id': 928, 'def': 'a three-dimensional work of art', 'name': 'sculpture'}, {'frequency': 'c', 'synset': 'seabird.n.01', 'synonyms': ['seabird', 'seafowl'], 'id': 929, 'def': 'a bird that frequents coastal waters and the open ocean: gulls; pelicans; gannets; cormorants; albatrosses; petrels; etc.', 'name': 'seabird'}, {'frequency': 'c', 'synset': 'seahorse.n.02', 'synonyms': ['seahorse'], 'id': 930, 'def': 'small fish with horse-like heads bent sharply downward and curled tails', 'name': 'seahorse'}, {'frequency': 'r', 'synset': 'seaplane.n.01', 'synonyms': ['seaplane', 'hydroplane'], 'id': 931, 'def': 'an airplane that can land on or take off from water', 'name': 'seaplane'}, {'frequency': 'c', 'synset': 'seashell.n.01', 'synonyms': ['seashell'], 'id': 932, 'def': 'the shell of a marine organism', 'name': 'seashell'}, {'frequency': 'c', 'synset': 'sewing_machine.n.01', 'synonyms': ['sewing_machine'], 'id': 933, 'def': 'a textile machine used as a home appliance for sewing', 'name': 'sewing_machine'}, {'frequency': 'c', 'synset': 'shaker.n.03', 'synonyms': ['shaker'], 'id': 934, 'def': 'a container in which something can be shaken', 'name': 'shaker'}, {'frequency': 'c', 'synset': 'shampoo.n.01', 'synonyms': ['shampoo'], 'id': 935, 'def': 'cleansing agent consisting of soaps or detergents used for washing the hair', 'name': 'shampoo'}, {'frequency': 'c', 'synset': 'shark.n.01', 'synonyms': ['shark'], 'id': 936, 'def': 'typically large carnivorous fishes with sharpe teeth', 'name': 'shark'}, {'frequency': 'r', 'synset': 'sharpener.n.01', 'synonyms': ['sharpener'], 'id': 937, 'def': 'any implement that is used to make something (an edge or a point) sharper', 'name': 'sharpener'}, {'frequency': 'r', 'synset': 'sharpie.n.03', 'synonyms': ['Sharpie'], 'id': 938, 'def': 'a pen with indelible ink that will write on any surface', 'name': 'Sharpie'}, {'frequency': 'r', 'synset': 'shaver.n.03', 'synonyms': ['shaver_(electric)', 'electric_shaver', 'electric_razor'], 'id': 939, 'def': 'a razor powered by an electric motor', 'name': 'shaver_(electric)'}, {'frequency': 'c', 'synset': 'shaving_cream.n.01', 'synonyms': ['shaving_cream', 'shaving_soap'], 'id': 940, 'def': 'toiletry consisting that forms a rich lather for softening the beard before shaving', 'name': 'shaving_cream'}, {'frequency': 'r', 'synset': 'shawl.n.01', 'synonyms': ['shawl'], 'id': 941, 'def': 'cloak consisting of an oblong piece of cloth used to cover the head and shoulders', 'name': 'shawl'}, {'frequency': 'r', 'synset': 'shears.n.01', 'synonyms': ['shears'], 'id': 942, 'def': 'large scissors with strong blades', 'name': 'shears'}, {'frequency': 'f', 'synset': 'sheep.n.01', 'synonyms': ['sheep'], 'id': 943, 'def': 'woolly usually horned ruminant mammal related to the goat', 'name': 'sheep'}, {'frequency': 'r', 'synset': 'shepherd_dog.n.01', 'synonyms': ['shepherd_dog', 'sheepdog'], 'id': 944, 'def': 'any of various usually long-haired breeds of dog reared to herd and guard sheep', 'name': 'shepherd_dog'}, {'frequency': 'r', 'synset': 'sherbert.n.01', 'synonyms': ['sherbert', 'sherbet'], 'id': 945, 'def': 'a frozen dessert made primarily of fruit juice and sugar', 'name': 'sherbert'}, {'frequency': 'c', 'synset': 'shield.n.02', 'synonyms': ['shield'], 'id': 946, 'def': 'armor carried on the arm to intercept blows', 'name': 'shield'}, {'frequency': 'f', 'synset': 'shirt.n.01', 'synonyms': ['shirt'], 'id': 947, 'def': 'a garment worn on the upper half of the body', 'name': 'shirt'}, {'frequency': 'f', 'synset': 'shoe.n.01', 'synonyms': ['shoe', 'sneaker_(type_of_shoe)', 'tennis_shoe'], 'id': 948, 'def': 'common footwear covering the foot', 'name': 'shoe'}, {'frequency': 'f', 'synset': 'shopping_bag.n.01', 'synonyms': ['shopping_bag'], 'id': 949, 'def': 'a bag made of plastic or strong paper (often with handles); used to transport goods after shopping', 'name': 'shopping_bag'}, {'frequency': 'c', 'synset': 'shopping_cart.n.01', 'synonyms': ['shopping_cart'], 'id': 950, 'def': 'a handcart that holds groceries or other goods while shopping', 'name': 'shopping_cart'}, {'frequency': 'f', 'synset': 'short_pants.n.01', 'synonyms': ['short_pants', 'shorts_(clothing)', 'trunks_(clothing)'], 'id': 951, 'def': 'trousers that end at or above the knee', 'name': 'short_pants'}, {'frequency': 'r', 'synset': 'shot_glass.n.01', 'synonyms': ['shot_glass'], 'id': 952, 'def': 'a small glass adequate to hold a single swallow of whiskey', 'name': 'shot_glass'}, {'frequency': 'f', 'synset': 'shoulder_bag.n.01', 'synonyms': ['shoulder_bag'], 'id': 953, 'def': 'a large handbag that can be carried by a strap looped over the shoulder', 'name': 'shoulder_bag'}, {'frequency': 'c', 'synset': 'shovel.n.01', 'synonyms': ['shovel'], 'id': 954, 'def': 'a hand tool for lifting loose material such as snow, dirt, etc.', 'name': 'shovel'}, {'frequency': 'f', 'synset': 'shower.n.01', 'synonyms': ['shower_head'], 'id': 955, 'def': 'a plumbing fixture that sprays water over you', 'name': 'shower_head'}, {'frequency': 'r', 'synset': 'shower_cap.n.01', 'synonyms': ['shower_cap'], 'id': 956, 'def': 'a tight cap worn to keep hair dry while showering', 'name': 'shower_cap'}, {'frequency': 'f', 'synset': 'shower_curtain.n.01', 'synonyms': ['shower_curtain'], 'id': 957, 'def': 'a curtain that keeps water from splashing out of the shower area', 'name': 'shower_curtain'}, {'frequency': 'r', 'synset': 'shredder.n.01', 'synonyms': ['shredder_(for_paper)'], 'id': 958, 'def': 'a device that shreds documents', 'name': 'shredder_(for_paper)'}, {'frequency': 'f', 'synset': 'signboard.n.01', 'synonyms': ['signboard'], 'id': 959, 'def': 'structure displaying a board on which advertisements can be posted', 'name': 'signboard'}, {'frequency': 'c', 'synset': 'silo.n.01', 'synonyms': ['silo'], 'id': 960, 'def': 'a cylindrical tower used for storing goods', 'name': 'silo'}, {'frequency': 'f', 'synset': 'sink.n.01', 'synonyms': ['sink'], 'id': 961, 'def': 'plumbing fixture consisting of a water basin fixed to a wall or floor and having a drainpipe', 'name': 'sink'}, {'frequency': 'f', 'synset': 'skateboard.n.01', 'synonyms': ['skateboard'], 'id': 962, 'def': 'a board with wheels that is ridden in a standing or crouching position and propelled by foot', 'name': 'skateboard'}, {'frequency': 'c', 'synset': 'skewer.n.01', 'synonyms': ['skewer'], 'id': 963, 'def': 'a long pin for holding meat in position while it is being roasted', 'name': 'skewer'}, {'frequency': 'f', 'synset': 'ski.n.01', 'synonyms': ['ski'], 'id': 964, 'def': 'sports equipment for skiing on snow', 'name': 'ski'}, {'frequency': 'f', 'synset': 'ski_boot.n.01', 'synonyms': ['ski_boot'], 'id': 965, 'def': 'a stiff boot that is fastened to a ski with a ski binding', 'name': 'ski_boot'}, {'frequency': 'f', 'synset': 'ski_parka.n.01', 'synonyms': ['ski_parka', 'ski_jacket'], 'id': 966, 'def': 'a parka to be worn while skiing', 'name': 'ski_parka'}, {'frequency': 'f', 'synset': 'ski_pole.n.01', 'synonyms': ['ski_pole'], 'id': 967, 'def': 'a pole with metal points used as an aid in skiing', 'name': 'ski_pole'}, {'frequency': 'f', 'synset': 'skirt.n.02', 'synonyms': ['skirt'], 'id': 968, 'def': 'a garment hanging from the waist; worn mainly by girls and women', 'name': 'skirt'}, {'frequency': 'r', 'synset': 'skullcap.n.01', 'synonyms': ['skullcap'], 'id': 969, 'def': 'rounded brimless cap fitting the crown of the head', 'name': 'skullcap'}, {'frequency': 'c', 'synset': 'sled.n.01', 'synonyms': ['sled', 'sledge', 'sleigh'], 'id': 970, 'def': 'a vehicle or flat object for transportation over snow by sliding or pulled by dogs, etc.', 'name': 'sled'}, {'frequency': 'c', 'synset': 'sleeping_bag.n.01', 'synonyms': ['sleeping_bag'], 'id': 971, 'def': 'large padded bag designed to be slept in outdoors', 'name': 'sleeping_bag'}, {'frequency': 'r', 'synset': 'sling.n.05', 'synonyms': ['sling_(bandage)', 'triangular_bandage'], 'id': 972, 'def': 'bandage to support an injured forearm; slung over the shoulder or neck', 'name': 'sling_(bandage)'}, {'frequency': 'c', 'synset': 'slipper.n.01', 'synonyms': ['slipper_(footwear)', 'carpet_slipper_(footwear)'], 'id': 973, 'def': 'low footwear that can be slipped on and off easily; usually worn indoors', 'name': 'slipper_(footwear)'}, {'frequency': 'r', 'synset': 'smoothie.n.02', 'synonyms': ['smoothie'], 'id': 974, 'def': 'a thick smooth drink consisting of fresh fruit pureed with ice cream or yoghurt or milk', 'name': 'smoothie'}, {'frequency': 'r', 'synset': 'snake.n.01', 'synonyms': ['snake', 'serpent'], 'id': 975, 'def': 'limbless scaly elongate reptile; some are venomous', 'name': 'snake'}, {'frequency': 'f', 'synset': 'snowboard.n.01', 'synonyms': ['snowboard'], 'id': 976, 'def': 'a board that resembles a broad ski or a small surfboard; used in a standing position to slide down snow-covered slopes', 'name': 'snowboard'}, {'frequency': 'c', 'synset': 'snowman.n.01', 'synonyms': ['snowman'], 'id': 977, 'def': 'a figure of a person made of packed snow', 'name': 'snowman'}, {'frequency': 'c', 'synset': 'snowmobile.n.01', 'synonyms': ['snowmobile'], 'id': 978, 'def': 'tracked vehicle for travel on snow having skis in front', 'name': 'snowmobile'}, {'frequency': 'f', 'synset': 'soap.n.01', 'synonyms': ['soap'], 'id': 979, 'def': 'a cleansing agent made from the salts of vegetable or animal fats', 'name': 'soap'}, {'frequency': 'f', 'synset': 'soccer_ball.n.01', 'synonyms': ['soccer_ball'], 'id': 980, 'def': "an inflated ball used in playing soccer (called `football' outside of the United States)", 'name': 'soccer_ball'}, {'frequency': 'f', 'synset': 'sock.n.01', 'synonyms': ['sock'], 'id': 981, 'def': 'cloth covering for the foot; worn inside the shoe; reaches to between the ankle and the knee', 'name': 'sock'}, {'frequency': 'f', 'synset': 'sofa.n.01', 'synonyms': ['sofa', 'couch', 'lounge'], 'id': 982, 'def': 'an upholstered seat for more than one person', 'name': 'sofa'}, {'frequency': 'r', 'synset': 'softball.n.01', 'synonyms': ['softball'], 'id': 983, 'def': 'ball used in playing softball', 'name': 'softball'}, {'frequency': 'c', 'synset': 'solar_array.n.01', 'synonyms': ['solar_array', 'solar_battery', 'solar_panel'], 'id': 984, 'def': 'electrical device consisting of a large array of connected solar cells', 'name': 'solar_array'}, {'frequency': 'r', 'synset': 'sombrero.n.02', 'synonyms': ['sombrero'], 'id': 985, 'def': 'a straw hat with a tall crown and broad brim; worn in American southwest and in Mexico', 'name': 'sombrero'}, {'frequency': 'f', 'synset': 'soup.n.01', 'synonyms': ['soup'], 'id': 986, 'def': 'liquid food especially of meat or fish or vegetable stock often containing pieces of solid food', 'name': 'soup'}, {'frequency': 'r', 'synset': 'soup_bowl.n.01', 'synonyms': ['soup_bowl'], 'id': 987, 'def': 'a bowl for serving soup', 'name': 'soup_bowl'}, {'frequency': 'c', 'synset': 'soupspoon.n.01', 'synonyms': ['soupspoon'], 'id': 988, 'def': 'a spoon with a rounded bowl for eating soup', 'name': 'soupspoon'}, {'frequency': 'c', 'synset': 'sour_cream.n.01', 'synonyms': ['sour_cream', 'soured_cream'], 'id': 989, 'def': 'soured light cream', 'name': 'sour_cream'}, {'frequency': 'r', 'synset': 'soya_milk.n.01', 'synonyms': ['soya_milk', 'soybean_milk', 'soymilk'], 'id': 990, 'def': 'a milk substitute containing soybean flour and water; used in some infant formulas and in making tofu', 'name': 'soya_milk'}, {'frequency': 'r', 'synset': 'space_shuttle.n.01', 'synonyms': ['space_shuttle'], 'id': 991, 'def': "a reusable spacecraft with wings for a controlled descent through the Earth's atmosphere", 'name': 'space_shuttle'}, {'frequency': 'r', 'synset': 'sparkler.n.02', 'synonyms': ['sparkler_(fireworks)'], 'id': 992, 'def': 'a firework that burns slowly and throws out a shower of sparks', 'name': 'sparkler_(fireworks)'}, {'frequency': 'f', 'synset': 'spatula.n.02', 'synonyms': ['spatula'], 'id': 993, 'def': 'a hand tool with a thin flexible blade used to mix or spread soft substances', 'name': 'spatula'}, {'frequency': 'r', 'synset': 'spear.n.01', 'synonyms': ['spear', 'lance'], 'id': 994, 'def': 'a long pointed rod used as a tool or weapon', 'name': 'spear'}, {'frequency': 'f', 'synset': 'spectacles.n.01', 'synonyms': ['spectacles', 'specs', 'eyeglasses', 'glasses'], 'id': 995, 'def': 'optical instrument consisting of a frame that holds a pair of lenses for correcting defective vision', 'name': 'spectacles'}, {'frequency': 'c', 'synset': 'spice_rack.n.01', 'synonyms': ['spice_rack'], 'id': 996, 'def': 'a rack for displaying containers filled with spices', 'name': 'spice_rack'}, {'frequency': 'c', 'synset': 'spider.n.01', 'synonyms': ['spider'], 'id': 997, 'def': 'predatory arachnid with eight legs, two poison fangs, two feelers, and usually two silk-spinning organs at the back end of the body', 'name': 'spider'}, {'frequency': 'r', 'synset': 'spiny_lobster.n.02', 'synonyms': ['crawfish', 'crayfish'], 'id': 998, 'def': 'large edible marine crustacean having a spiny carapace but lacking the large pincers of true lobsters', 'name': 'crawfish'}, {'frequency': 'c', 'synset': 'sponge.n.01', 'synonyms': ['sponge'], 'id': 999, 'def': 'a porous mass usable to absorb water typically used for cleaning', 'name': 'sponge'}, {'frequency': 'f', 'synset': 'spoon.n.01', 'synonyms': ['spoon'], 'id': 1000, 'def': 'a piece of cutlery with a shallow bowl-shaped container and a handle', 'name': 'spoon'}, {'frequency': 'c', 'synset': 'sportswear.n.01', 'synonyms': ['sportswear', 'athletic_wear', 'activewear'], 'id': 1001, 'def': 'attire worn for sport or for casual wear', 'name': 'sportswear'}, {'frequency': 'c', 'synset': 'spotlight.n.02', 'synonyms': ['spotlight'], 'id': 1002, 'def': 'a lamp that produces a strong beam of light to illuminate a restricted area; used to focus attention of a stage performer', 'name': 'spotlight'}, {'frequency': 'r', 'synset': 'squid.n.01', 'synonyms': ['squid_(food)', 'calamari', 'calamary'], 'id': 1003, 'def': '(Italian cuisine) squid prepared as food', 'name': 'squid_(food)'}, {'frequency': 'c', 'synset': 'squirrel.n.01', 'synonyms': ['squirrel'], 'id': 1004, 'def': 'a kind of arboreal rodent having a long bushy tail', 'name': 'squirrel'}, {'frequency': 'r', 'synset': 'stagecoach.n.01', 'synonyms': ['stagecoach'], 'id': 1005, 'def': 'a large coach-and-four formerly used to carry passengers and mail on regular routes between towns', 'name': 'stagecoach'}, {'frequency': 'c', 'synset': 'stapler.n.01', 'synonyms': ['stapler_(stapling_machine)'], 'id': 1006, 'def': 'a machine that inserts staples into sheets of paper in order to fasten them together', 'name': 'stapler_(stapling_machine)'}, {'frequency': 'c', 'synset': 'starfish.n.01', 'synonyms': ['starfish', 'sea_star'], 'id': 1007, 'def': 'echinoderms characterized by five arms extending from a central disk', 'name': 'starfish'}, {'frequency': 'f', 'synset': 'statue.n.01', 'synonyms': ['statue_(sculpture)'], 'id': 1008, 'def': 'a sculpture representing a human or animal', 'name': 'statue_(sculpture)'}, {'frequency': 'c', 'synset': 'steak.n.01', 'synonyms': ['steak_(food)'], 'id': 1009, 'def': 'a slice of meat cut from the fleshy part of an animal or large fish', 'name': 'steak_(food)'}, {'frequency': 'r', 'synset': 'steak_knife.n.01', 'synonyms': ['steak_knife'], 'id': 1010, 'def': 'a sharp table knife used in eating steak', 'name': 'steak_knife'}, {'frequency': 'f', 'synset': 'steering_wheel.n.01', 'synonyms': ['steering_wheel'], 'id': 1011, 'def': 'a handwheel that is used for steering', 'name': 'steering_wheel'}, {'frequency': 'r', 'synset': 'step_ladder.n.01', 'synonyms': ['stepladder'], 'id': 1012, 'def': 'a folding portable ladder hinged at the top', 'name': 'stepladder'}, {'frequency': 'c', 'synset': 'step_stool.n.01', 'synonyms': ['step_stool'], 'id': 1013, 'def': 'a stool that has one or two steps that fold under the seat', 'name': 'step_stool'}, {'frequency': 'c', 'synset': 'stereo.n.01', 'synonyms': ['stereo_(sound_system)'], 'id': 1014, 'def': 'electronic device for playing audio', 'name': 'stereo_(sound_system)'}, {'frequency': 'r', 'synset': 'stew.n.02', 'synonyms': ['stew'], 'id': 1015, 'def': 'food prepared by stewing especially meat or fish with vegetables', 'name': 'stew'}, {'frequency': 'r', 'synset': 'stirrer.n.02', 'synonyms': ['stirrer'], 'id': 1016, 'def': 'an implement used for stirring', 'name': 'stirrer'}, {'frequency': 'f', 'synset': 'stirrup.n.01', 'synonyms': ['stirrup'], 'id': 1017, 'def': "support consisting of metal loops into which rider's feet go", 'name': 'stirrup'}, {'frequency': 'f', 'synset': 'stool.n.01', 'synonyms': ['stool'], 'id': 1018, 'def': 'a simple seat without a back or arms', 'name': 'stool'}, {'frequency': 'f', 'synset': 'stop_sign.n.01', 'synonyms': ['stop_sign'], 'id': 1019, 'def': 'a traffic sign to notify drivers that they must come to a complete stop', 'name': 'stop_sign'}, {'frequency': 'f', 'synset': 'stoplight.n.01', 'synonyms': ['brake_light'], 'id': 1020, 'def': 'a red light on the rear of a motor vehicle that signals when the brakes are applied', 'name': 'brake_light'}, {'frequency': 'f', 'synset': 'stove.n.01', 'synonyms': ['stove', 'kitchen_stove', 'range_(kitchen_appliance)', 'kitchen_range', 'cooking_stove'], 'id': 1021, 'def': 'a kitchen appliance used for cooking food', 'name': 'stove'}, {'frequency': 'c', 'synset': 'strainer.n.01', 'synonyms': ['strainer'], 'id': 1022, 'def': 'a filter to retain larger pieces while smaller pieces and liquids pass through', 'name': 'strainer'}, {'frequency': 'f', 'synset': 'strap.n.01', 'synonyms': ['strap'], 'id': 1023, 'def': 'an elongated strip of material for binding things together or holding', 'name': 'strap'}, {'frequency': 'f', 'synset': 'straw.n.04', 'synonyms': ['straw_(for_drinking)', 'drinking_straw'], 'id': 1024, 'def': 'a thin paper or plastic tube used to suck liquids into the mouth', 'name': 'straw_(for_drinking)'}, {'frequency': 'f', 'synset': 'strawberry.n.01', 'synonyms': ['strawberry'], 'id': 1025, 'def': 'sweet fleshy red fruit', 'name': 'strawberry'}, {'frequency': 'f', 'synset': 'street_sign.n.01', 'synonyms': ['street_sign'], 'id': 1026, 'def': 'a sign visible from the street', 'name': 'street_sign'}, {'frequency': 'f', 'synset': 'streetlight.n.01', 'synonyms': ['streetlight', 'street_lamp'], 'id': 1027, 'def': 'a lamp supported on a lamppost; for illuminating a street', 'name': 'streetlight'}, {'frequency': 'r', 'synset': 'string_cheese.n.01', 'synonyms': ['string_cheese'], 'id': 1028, 'def': 'cheese formed in long strings twisted together', 'name': 'string_cheese'}, {'frequency': 'r', 'synset': 'stylus.n.02', 'synonyms': ['stylus'], 'id': 1029, 'def': 'a pointed tool for writing or drawing or engraving, including pens', 'name': 'stylus'}, {'frequency': 'r', 'synset': 'subwoofer.n.01', 'synonyms': ['subwoofer'], 'id': 1030, 'def': 'a loudspeaker that is designed to reproduce very low bass frequencies', 'name': 'subwoofer'}, {'frequency': 'r', 'synset': 'sugar_bowl.n.01', 'synonyms': ['sugar_bowl'], 'id': 1031, 'def': 'a dish in which sugar is served', 'name': 'sugar_bowl'}, {'frequency': 'r', 'synset': 'sugarcane.n.01', 'synonyms': ['sugarcane_(plant)'], 'id': 1032, 'def': 'juicy canes whose sap is a source of molasses and commercial sugar; fresh canes are sometimes chewed for the juice', 'name': 'sugarcane_(plant)'}, {'frequency': 'f', 'synset': 'suit.n.01', 'synonyms': ['suit_(clothing)'], 'id': 1033, 'def': 'a set of garments (usually including a jacket and trousers or skirt) for outerwear all of the same fabric and color', 'name': 'suit_(clothing)'}, {'frequency': 'c', 'synset': 'sunflower.n.01', 'synonyms': ['sunflower'], 'id': 1034, 'def': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'name': 'sunflower'}, {'frequency': 'f', 'synset': 'sunglasses.n.01', 'synonyms': ['sunglasses'], 'id': 1035, 'def': 'spectacles that are darkened or polarized to protect the eyes from the glare of the sun', 'name': 'sunglasses'}, {'frequency': 'c', 'synset': 'sunhat.n.01', 'synonyms': ['sunhat'], 'id': 1036, 'def': 'a hat with a broad brim that protects the face from direct exposure to the sun', 'name': 'sunhat'}, {'frequency': 'f', 'synset': 'surfboard.n.01', 'synonyms': ['surfboard'], 'id': 1037, 'def': 'a narrow buoyant board for riding surf', 'name': 'surfboard'}, {'frequency': 'c', 'synset': 'sushi.n.01', 'synonyms': ['sushi'], 'id': 1038, 'def': 'rice (with raw fish) wrapped in seaweed', 'name': 'sushi'}, {'frequency': 'c', 'synset': 'swab.n.02', 'synonyms': ['mop'], 'id': 1039, 'def': 'cleaning implement consisting of absorbent material fastened to a handle; for cleaning floors', 'name': 'mop'}, {'frequency': 'c', 'synset': 'sweat_pants.n.01', 'synonyms': ['sweat_pants'], 'id': 1040, 'def': 'loose-fitting trousers with elastic cuffs; worn by athletes', 'name': 'sweat_pants'}, {'frequency': 'c', 'synset': 'sweatband.n.02', 'synonyms': ['sweatband'], 'id': 1041, 'def': 'a band of material tied around the forehead or wrist to absorb sweat', 'name': 'sweatband'}, {'frequency': 'f', 'synset': 'sweater.n.01', 'synonyms': ['sweater'], 'id': 1042, 'def': 'a crocheted or knitted garment covering the upper part of the body', 'name': 'sweater'}, {'frequency': 'f', 'synset': 'sweatshirt.n.01', 'synonyms': ['sweatshirt'], 'id': 1043, 'def': 'cotton knit pullover with long sleeves worn during athletic activity', 'name': 'sweatshirt'}, {'frequency': 'c', 'synset': 'sweet_potato.n.02', 'synonyms': ['sweet_potato'], 'id': 1044, 'def': 'the edible tuberous root of the sweet potato vine', 'name': 'sweet_potato'}, {'frequency': 'f', 'synset': 'swimsuit.n.01', 'synonyms': ['swimsuit', 'swimwear', 'bathing_suit', 'swimming_costume', 'bathing_costume', 'swimming_trunks', 'bathing_trunks'], 'id': 1045, 'def': 'garment worn for swimming', 'name': 'swimsuit'}, {'frequency': 'c', 'synset': 'sword.n.01', 'synonyms': ['sword'], 'id': 1046, 'def': 'a cutting or thrusting weapon that has a long metal blade', 'name': 'sword'}, {'frequency': 'r', 'synset': 'syringe.n.01', 'synonyms': ['syringe'], 'id': 1047, 'def': 'a medical instrument used to inject or withdraw fluids', 'name': 'syringe'}, {'frequency': 'r', 'synset': 'tabasco.n.02', 'synonyms': ['Tabasco_sauce'], 'id': 1048, 'def': 'very spicy sauce (trade name Tabasco) made from fully-aged red peppers', 'name': 'Tabasco_sauce'}, {'frequency': 'r', 'synset': 'table-tennis_table.n.01', 'synonyms': ['table-tennis_table', 'ping-pong_table'], 'id': 1049, 'def': 'a table used for playing table tennis', 'name': 'table-tennis_table'}, {'frequency': 'f', 'synset': 'table.n.02', 'synonyms': ['table'], 'id': 1050, 'def': 'a piece of furniture having a smooth flat top that is usually supported by one or more vertical legs', 'name': 'table'}, {'frequency': 'c', 'synset': 'table_lamp.n.01', 'synonyms': ['table_lamp'], 'id': 1051, 'def': 'a lamp that sits on a table', 'name': 'table_lamp'}, {'frequency': 'f', 'synset': 'tablecloth.n.01', 'synonyms': ['tablecloth'], 'id': 1052, 'def': 'a covering spread over a dining table', 'name': 'tablecloth'}, {'frequency': 'r', 'synset': 'tachometer.n.01', 'synonyms': ['tachometer'], 'id': 1053, 'def': 'measuring instrument for indicating speed of rotation', 'name': 'tachometer'}, {'frequency': 'r', 'synset': 'taco.n.02', 'synonyms': ['taco'], 'id': 1054, 'def': 'a small tortilla cupped around a filling', 'name': 'taco'}, {'frequency': 'f', 'synset': 'tag.n.02', 'synonyms': ['tag'], 'id': 1055, 'def': 'a label associated with something for the purpose of identification or information', 'name': 'tag'}, {'frequency': 'f', 'synset': 'taillight.n.01', 'synonyms': ['taillight', 'rear_light'], 'id': 1056, 'def': 'lamp (usually red) mounted at the rear of a motor vehicle', 'name': 'taillight'}, {'frequency': 'r', 'synset': 'tambourine.n.01', 'synonyms': ['tambourine'], 'id': 1057, 'def': 'a shallow drum with a single drumhead and with metallic disks in the sides', 'name': 'tambourine'}, {'frequency': 'r', 'synset': 'tank.n.01', 'synonyms': ['army_tank', 'armored_combat_vehicle', 'armoured_combat_vehicle'], 'id': 1058, 'def': 'an enclosed armored military vehicle; has a cannon and moves on caterpillar treads', 'name': 'army_tank'}, {'frequency': 'f', 'synset': 'tank.n.02', 'synonyms': ['tank_(storage_vessel)', 'storage_tank'], 'id': 1059, 'def': 'a large (usually metallic) vessel for holding gases or liquids', 'name': 'tank_(storage_vessel)'}, {'frequency': 'f', 'synset': 'tank_top.n.01', 'synonyms': ['tank_top_(clothing)'], 'id': 1060, 'def': 'a tight-fitting sleeveless shirt with wide shoulder straps and low neck and no front opening', 'name': 'tank_top_(clothing)'}, {'frequency': 'f', 'synset': 'tape.n.01', 'synonyms': ['tape_(sticky_cloth_or_paper)'], 'id': 1061, 'def': 'a long thin piece of cloth or paper as used for binding or fastening', 'name': 'tape_(sticky_cloth_or_paper)'}, {'frequency': 'c', 'synset': 'tape.n.04', 'synonyms': ['tape_measure', 'measuring_tape'], 'id': 1062, 'def': 'measuring instrument consisting of a narrow strip (cloth or metal) marked in inches or centimeters and used for measuring lengths', 'name': 'tape_measure'}, {'frequency': 'c', 'synset': 'tapestry.n.02', 'synonyms': ['tapestry'], 'id': 1063, 'def': 'a heavy textile with a woven design; used for curtains and upholstery', 'name': 'tapestry'}, {'frequency': 'f', 'synset': 'tarpaulin.n.01', 'synonyms': ['tarp'], 'id': 1064, 'def': 'waterproofed canvas', 'name': 'tarp'}, {'frequency': 'c', 'synset': 'tartan.n.01', 'synonyms': ['tartan', 'plaid'], 'id': 1065, 'def': 'a cloth having a crisscross design', 'name': 'tartan'}, {'frequency': 'c', 'synset': 'tassel.n.01', 'synonyms': ['tassel'], 'id': 1066, 'def': 'adornment consisting of a bunch of cords fastened at one end', 'name': 'tassel'}, {'frequency': 'c', 'synset': 'tea_bag.n.01', 'synonyms': ['tea_bag'], 'id': 1067, 'def': 'a measured amount of tea in a bag for an individual serving of tea', 'name': 'tea_bag'}, {'frequency': 'c', 'synset': 'teacup.n.02', 'synonyms': ['teacup'], 'id': 1068, 'def': 'a cup from which tea is drunk', 'name': 'teacup'}, {'frequency': 'c', 'synset': 'teakettle.n.01', 'synonyms': ['teakettle'], 'id': 1069, 'def': 'kettle for boiling water to make tea', 'name': 'teakettle'}, {'frequency': 'f', 'synset': 'teapot.n.01', 'synonyms': ['teapot'], 'id': 1070, 'def': 'pot for brewing tea; usually has a spout and handle', 'name': 'teapot'}, {'frequency': 'f', 'synset': 'teddy.n.01', 'synonyms': ['teddy_bear'], 'id': 1071, 'def': "plaything consisting of a child's toy bear (usually plush and stuffed with soft materials)", 'name': 'teddy_bear'}, {'frequency': 'f', 'synset': 'telephone.n.01', 'synonyms': ['telephone', 'phone', 'telephone_set'], 'id': 1072, 'def': 'electronic device for communicating by voice over long distances (includes wired and wireless/cell phones)', 'name': 'telephone'}, {'frequency': 'c', 'synset': 'telephone_booth.n.01', 'synonyms': ['telephone_booth', 'phone_booth', 'call_box', 'telephone_box', 'telephone_kiosk'], 'id': 1073, 'def': 'booth for using a telephone', 'name': 'telephone_booth'}, {'frequency': 'f', 'synset': 'telephone_pole.n.01', 'synonyms': ['telephone_pole', 'telegraph_pole', 'telegraph_post'], 'id': 1074, 'def': 'tall pole supporting telephone wires', 'name': 'telephone_pole'}, {'frequency': 'r', 'synset': 'telephoto_lens.n.01', 'synonyms': ['telephoto_lens', 'zoom_lens'], 'id': 1075, 'def': 'a camera lens that magnifies the image', 'name': 'telephoto_lens'}, {'frequency': 'c', 'synset': 'television_camera.n.01', 'synonyms': ['television_camera', 'tv_camera'], 'id': 1076, 'def': 'television equipment for capturing and recording video', 'name': 'television_camera'}, {'frequency': 'f', 'synset': 'television_receiver.n.01', 'synonyms': ['television_set', 'tv', 'tv_set'], 'id': 1077, 'def': 'an electronic device that receives television signals and displays them on a screen', 'name': 'television_set'}, {'frequency': 'f', 'synset': 'tennis_ball.n.01', 'synonyms': ['tennis_ball'], 'id': 1078, 'def': 'ball about the size of a fist used in playing tennis', 'name': 'tennis_ball'}, {'frequency': 'f', 'synset': 'tennis_racket.n.01', 'synonyms': ['tennis_racket'], 'id': 1079, 'def': 'a racket used to play tennis', 'name': 'tennis_racket'}, {'frequency': 'r', 'synset': 'tequila.n.01', 'synonyms': ['tequila'], 'id': 1080, 'def': 'Mexican liquor made from fermented juices of an agave plant', 'name': 'tequila'}, {'frequency': 'c', 'synset': 'thermometer.n.01', 'synonyms': ['thermometer'], 'id': 1081, 'def': 'measuring instrument for measuring temperature', 'name': 'thermometer'}, {'frequency': 'c', 'synset': 'thermos.n.01', 'synonyms': ['thermos_bottle'], 'id': 1082, 'def': 'vacuum flask that preserves temperature of hot or cold drinks', 'name': 'thermos_bottle'}, {'frequency': 'f', 'synset': 'thermostat.n.01', 'synonyms': ['thermostat'], 'id': 1083, 'def': 'a regulator for automatically regulating temperature by starting or stopping the supply of heat', 'name': 'thermostat'}, {'frequency': 'r', 'synset': 'thimble.n.02', 'synonyms': ['thimble'], 'id': 1084, 'def': 'a small metal cap to protect the finger while sewing; can be used as a small container', 'name': 'thimble'}, {'frequency': 'c', 'synset': 'thread.n.01', 'synonyms': ['thread', 'yarn'], 'id': 1085, 'def': 'a fine cord of twisted fibers (of cotton or silk or wool or nylon etc.) used in sewing and weaving', 'name': 'thread'}, {'frequency': 'c', 'synset': 'thumbtack.n.01', 'synonyms': ['thumbtack', 'drawing_pin', 'pushpin'], 'id': 1086, 'def': 'a tack for attaching papers to a bulletin board or drawing board', 'name': 'thumbtack'}, {'frequency': 'c', 'synset': 'tiara.n.01', 'synonyms': ['tiara'], 'id': 1087, 'def': 'a jeweled headdress worn by women on formal occasions', 'name': 'tiara'}, {'frequency': 'c', 'synset': 'tiger.n.02', 'synonyms': ['tiger'], 'id': 1088, 'def': 'large feline of forests in most of Asia having a tawny coat with black stripes', 'name': 'tiger'}, {'frequency': 'c', 'synset': 'tights.n.01', 'synonyms': ['tights_(clothing)', 'leotards'], 'id': 1089, 'def': 'skintight knit hose covering the body from the waist to the feet worn by acrobats and dancers and as stockings by women and girls', 'name': 'tights_(clothing)'}, {'frequency': 'c', 'synset': 'timer.n.01', 'synonyms': ['timer', 'stopwatch'], 'id': 1090, 'def': 'a timepiece that measures a time interval and signals its end', 'name': 'timer'}, {'frequency': 'f', 'synset': 'tinfoil.n.01', 'synonyms': ['tinfoil'], 'id': 1091, 'def': 'foil made of tin or an alloy of tin and lead', 'name': 'tinfoil'}, {'frequency': 'c', 'synset': 'tinsel.n.01', 'synonyms': ['tinsel'], 'id': 1092, 'def': 'a showy decoration that is basically valueless', 'name': 'tinsel'}, {'frequency': 'f', 'synset': 'tissue.n.02', 'synonyms': ['tissue_paper'], 'id': 1093, 'def': 'a soft thin (usually translucent) paper', 'name': 'tissue_paper'}, {'frequency': 'c', 'synset': 'toast.n.01', 'synonyms': ['toast_(food)'], 'id': 1094, 'def': 'slice of bread that has been toasted', 'name': 'toast_(food)'}, {'frequency': 'f', 'synset': 'toaster.n.02', 'synonyms': ['toaster'], 'id': 1095, 'def': 'a kitchen appliance (usually electric) for toasting bread', 'name': 'toaster'}, {'frequency': 'f', 'synset': 'toaster_oven.n.01', 'synonyms': ['toaster_oven'], 'id': 1096, 'def': 'kitchen appliance consisting of a small electric oven for toasting or warming food', 'name': 'toaster_oven'}, {'frequency': 'f', 'synset': 'toilet.n.02', 'synonyms': ['toilet'], 'id': 1097, 'def': 'a plumbing fixture for defecation and urination', 'name': 'toilet'}, {'frequency': 'f', 'synset': 'toilet_tissue.n.01', 'synonyms': ['toilet_tissue', 'toilet_paper', 'bathroom_tissue'], 'id': 1098, 'def': 'a soft thin absorbent paper for use in toilets', 'name': 'toilet_tissue'}, {'frequency': 'f', 'synset': 'tomato.n.01', 'synonyms': ['tomato'], 'id': 1099, 'def': 'mildly acid red or yellow pulpy fruit eaten as a vegetable', 'name': 'tomato'}, {'frequency': 'f', 'synset': 'tongs.n.01', 'synonyms': ['tongs'], 'id': 1100, 'def': 'any of various devices for taking hold of objects; usually have two hinged legs with handles above and pointed hooks below', 'name': 'tongs'}, {'frequency': 'c', 'synset': 'toolbox.n.01', 'synonyms': ['toolbox'], 'id': 1101, 'def': 'a box or chest or cabinet for holding hand tools', 'name': 'toolbox'}, {'frequency': 'f', 'synset': 'toothbrush.n.01', 'synonyms': ['toothbrush'], 'id': 1102, 'def': 'small brush; has long handle; used to clean teeth', 'name': 'toothbrush'}, {'frequency': 'f', 'synset': 'toothpaste.n.01', 'synonyms': ['toothpaste'], 'id': 1103, 'def': 'a dentifrice in the form of a paste', 'name': 'toothpaste'}, {'frequency': 'f', 'synset': 'toothpick.n.01', 'synonyms': ['toothpick'], 'id': 1104, 'def': 'pick consisting of a small strip of wood or plastic; used to pick food from between the teeth', 'name': 'toothpick'}, {'frequency': 'f', 'synset': 'top.n.09', 'synonyms': ['cover'], 'id': 1105, 'def': 'covering for a hole (especially a hole in the top of a container)', 'name': 'cover'}, {'frequency': 'c', 'synset': 'tortilla.n.01', 'synonyms': ['tortilla'], 'id': 1106, 'def': 'thin unleavened pancake made from cornmeal or wheat flour', 'name': 'tortilla'}, {'frequency': 'c', 'synset': 'tow_truck.n.01', 'synonyms': ['tow_truck'], 'id': 1107, 'def': 'a truck equipped to hoist and pull wrecked cars (or to remove cars from no-parking zones)', 'name': 'tow_truck'}, {'frequency': 'f', 'synset': 'towel.n.01', 'synonyms': ['towel'], 'id': 1108, 'def': 'a rectangular piece of absorbent cloth (or paper) for drying or wiping', 'name': 'towel'}, {'frequency': 'f', 'synset': 'towel_rack.n.01', 'synonyms': ['towel_rack', 'towel_rail', 'towel_bar'], 'id': 1109, 'def': 'a rack consisting of one or more bars on which towels can be hung', 'name': 'towel_rack'}, {'frequency': 'f', 'synset': 'toy.n.03', 'synonyms': ['toy'], 'id': 1110, 'def': 'a device regarded as providing amusement', 'name': 'toy'}, {'frequency': 'c', 'synset': 'tractor.n.01', 'synonyms': ['tractor_(farm_equipment)'], 'id': 1111, 'def': 'a wheeled vehicle with large wheels; used in farming and other applications', 'name': 'tractor_(farm_equipment)'}, {'frequency': 'f', 'synset': 'traffic_light.n.01', 'synonyms': ['traffic_light'], 'id': 1112, 'def': 'a device to control vehicle traffic often consisting of three or more lights', 'name': 'traffic_light'}, {'frequency': 'c', 'synset': 'trail_bike.n.01', 'synonyms': ['dirt_bike'], 'id': 1113, 'def': 'a lightweight motorcycle equipped with rugged tires and suspension for off-road use', 'name': 'dirt_bike'}, {'frequency': 'f', 'synset': 'trailer_truck.n.01', 'synonyms': ['trailer_truck', 'tractor_trailer', 'trucking_rig', 'articulated_lorry', 'semi_truck'], 'id': 1114, 'def': 'a truck consisting of a tractor and trailer together', 'name': 'trailer_truck'}, {'frequency': 'f', 'synset': 'train.n.01', 'synonyms': ['train_(railroad_vehicle)', 'railroad_train'], 'id': 1115, 'def': 'public or private transport provided by a line of railway cars coupled together and drawn by a locomotive', 'name': 'train_(railroad_vehicle)'}, {'frequency': 'r', 'synset': 'trampoline.n.01', 'synonyms': ['trampoline'], 'id': 1116, 'def': 'gymnastic apparatus consisting of a strong canvas sheet attached with springs to a metal frame', 'name': 'trampoline'}, {'frequency': 'f', 'synset': 'tray.n.01', 'synonyms': ['tray'], 'id': 1117, 'def': 'an open receptacle for holding or displaying or serving articles or food', 'name': 'tray'}, {'frequency': 'r', 'synset': 'trench_coat.n.01', 'synonyms': ['trench_coat'], 'id': 1118, 'def': 'a military style raincoat; belted with deep pockets', 'name': 'trench_coat'}, {'frequency': 'r', 'synset': 'triangle.n.05', 'synonyms': ['triangle_(musical_instrument)'], 'id': 1119, 'def': 'a percussion instrument consisting of a metal bar bent in the shape of an open triangle', 'name': 'triangle_(musical_instrument)'}, {'frequency': 'c', 'synset': 'tricycle.n.01', 'synonyms': ['tricycle'], 'id': 1120, 'def': 'a vehicle with three wheels that is moved by foot pedals', 'name': 'tricycle'}, {'frequency': 'f', 'synset': 'tripod.n.01', 'synonyms': ['tripod'], 'id': 1121, 'def': 'a three-legged rack used for support', 'name': 'tripod'}, {'frequency': 'f', 'synset': 'trouser.n.01', 'synonyms': ['trousers', 'pants_(clothing)'], 'id': 1122, 'def': 'a garment extending from the waist to the knee or ankle, covering each leg separately', 'name': 'trousers'}, {'frequency': 'f', 'synset': 'truck.n.01', 'synonyms': ['truck'], 'id': 1123, 'def': 'an automotive vehicle suitable for hauling', 'name': 'truck'}, {'frequency': 'r', 'synset': 'truffle.n.03', 'synonyms': ['truffle_(chocolate)', 'chocolate_truffle'], 'id': 1124, 'def': 'creamy chocolate candy', 'name': 'truffle_(chocolate)'}, {'frequency': 'c', 'synset': 'trunk.n.02', 'synonyms': ['trunk'], 'id': 1125, 'def': 'luggage consisting of a large strong case used when traveling or for storage', 'name': 'trunk'}, {'frequency': 'r', 'synset': 'tub.n.02', 'synonyms': ['vat'], 'id': 1126, 'def': 'a large vessel for holding or storing liquids', 'name': 'vat'}, {'frequency': 'c', 'synset': 'turban.n.01', 'synonyms': ['turban'], 'id': 1127, 'def': 'a traditional headdress consisting of a long scarf wrapped around the head', 'name': 'turban'}, {'frequency': 'c', 'synset': 'turkey.n.04', 'synonyms': ['turkey_(food)'], 'id': 1128, 'def': 'flesh of large domesticated fowl usually roasted', 'name': 'turkey_(food)'}, {'frequency': 'r', 'synset': 'turnip.n.01', 'synonyms': ['turnip'], 'id': 1129, 'def': 'widely cultivated plant having a large fleshy edible white or yellow root', 'name': 'turnip'}, {'frequency': 'c', 'synset': 'turtle.n.02', 'synonyms': ['turtle'], 'id': 1130, 'def': 'any of various aquatic and land reptiles having a bony shell and flipper-like limbs for swimming', 'name': 'turtle'}, {'frequency': 'c', 'synset': 'turtleneck.n.01', 'synonyms': ['turtleneck_(clothing)', 'polo-neck'], 'id': 1131, 'def': 'a sweater or jersey with a high close-fitting collar', 'name': 'turtleneck_(clothing)'}, {'frequency': 'c', 'synset': 'typewriter.n.01', 'synonyms': ['typewriter'], 'id': 1132, 'def': 'hand-operated character printer for printing written messages one character at a time', 'name': 'typewriter'}, {'frequency': 'f', 'synset': 'umbrella.n.01', 'synonyms': ['umbrella'], 'id': 1133, 'def': 'a lightweight handheld collapsible canopy', 'name': 'umbrella'}, {'frequency': 'f', 'synset': 'underwear.n.01', 'synonyms': ['underwear', 'underclothes', 'underclothing', 'underpants'], 'id': 1134, 'def': 'undergarment worn next to the skin and under the outer garments', 'name': 'underwear'}, {'frequency': 'r', 'synset': 'unicycle.n.01', 'synonyms': ['unicycle'], 'id': 1135, 'def': 'a vehicle with a single wheel that is driven by pedals', 'name': 'unicycle'}, {'frequency': 'f', 'synset': 'urinal.n.01', 'synonyms': ['urinal'], 'id': 1136, 'def': 'a plumbing fixture (usually attached to the wall) used by men to urinate', 'name': 'urinal'}, {'frequency': 'c', 'synset': 'urn.n.01', 'synonyms': ['urn'], 'id': 1137, 'def': 'a large vase that usually has a pedestal or feet', 'name': 'urn'}, {'frequency': 'c', 'synset': 'vacuum.n.04', 'synonyms': ['vacuum_cleaner'], 'id': 1138, 'def': 'an electrical home appliance that cleans by suction', 'name': 'vacuum_cleaner'}, {'frequency': 'f', 'synset': 'vase.n.01', 'synonyms': ['vase'], 'id': 1139, 'def': 'an open jar of glass or porcelain used as an ornament or to hold flowers', 'name': 'vase'}, {'frequency': 'c', 'synset': 'vending_machine.n.01', 'synonyms': ['vending_machine'], 'id': 1140, 'def': 'a slot machine for selling goods', 'name': 'vending_machine'}, {'frequency': 'f', 'synset': 'vent.n.01', 'synonyms': ['vent', 'blowhole', 'air_vent'], 'id': 1141, 'def': 'a hole for the escape of gas or air', 'name': 'vent'}, {'frequency': 'f', 'synset': 'vest.n.01', 'synonyms': ['vest', 'waistcoat'], 'id': 1142, 'def': "a man's sleeveless garment worn underneath a coat", 'name': 'vest'}, {'frequency': 'c', 'synset': 'videotape.n.01', 'synonyms': ['videotape'], 'id': 1143, 'def': 'a video recording made on magnetic tape', 'name': 'videotape'}, {'frequency': 'r', 'synset': 'vinegar.n.01', 'synonyms': ['vinegar'], 'id': 1144, 'def': 'sour-tasting liquid produced usually by oxidation of the alcohol in wine or cider and used as a condiment or food preservative', 'name': 'vinegar'}, {'frequency': 'r', 'synset': 'violin.n.01', 'synonyms': ['violin', 'fiddle'], 'id': 1145, 'def': 'bowed stringed instrument that is the highest member of the violin family', 'name': 'violin'}, {'frequency': 'r', 'synset': 'vodka.n.01', 'synonyms': ['vodka'], 'id': 1146, 'def': 'unaged colorless liquor originating in Russia', 'name': 'vodka'}, {'frequency': 'c', 'synset': 'volleyball.n.02', 'synonyms': ['volleyball'], 'id': 1147, 'def': 'an inflated ball used in playing volleyball', 'name': 'volleyball'}, {'frequency': 'r', 'synset': 'vulture.n.01', 'synonyms': ['vulture'], 'id': 1148, 'def': 'any of various large birds of prey having naked heads and weak claws and feeding chiefly on carrion', 'name': 'vulture'}, {'frequency': 'c', 'synset': 'waffle.n.01', 'synonyms': ['waffle'], 'id': 1149, 'def': 'pancake batter baked in a waffle iron', 'name': 'waffle'}, {'frequency': 'r', 'synset': 'waffle_iron.n.01', 'synonyms': ['waffle_iron'], 'id': 1150, 'def': 'a kitchen appliance for baking waffles', 'name': 'waffle_iron'}, {'frequency': 'c', 'synset': 'wagon.n.01', 'synonyms': ['wagon'], 'id': 1151, 'def': 'any of various kinds of wheeled vehicles drawn by an animal or a tractor', 'name': 'wagon'}, {'frequency': 'c', 'synset': 'wagon_wheel.n.01', 'synonyms': ['wagon_wheel'], 'id': 1152, 'def': 'a wheel of a wagon', 'name': 'wagon_wheel'}, {'frequency': 'c', 'synset': 'walking_stick.n.01', 'synonyms': ['walking_stick'], 'id': 1153, 'def': 'a stick carried in the hand for support in walking', 'name': 'walking_stick'}, {'frequency': 'c', 'synset': 'wall_clock.n.01', 'synonyms': ['wall_clock'], 'id': 1154, 'def': 'a clock mounted on a wall', 'name': 'wall_clock'}, {'frequency': 'f', 'synset': 'wall_socket.n.01', 'synonyms': ['wall_socket', 'wall_plug', 'electric_outlet', 'electrical_outlet', 'outlet', 'electric_receptacle'], 'id': 1155, 'def': 'receptacle providing a place in a wiring system where current can be taken to run electrical devices', 'name': 'wall_socket'}, {'frequency': 'f', 'synset': 'wallet.n.01', 'synonyms': ['wallet', 'billfold'], 'id': 1156, 'def': 'a pocket-size case for holding papers and paper money', 'name': 'wallet'}, {'frequency': 'r', 'synset': 'walrus.n.01', 'synonyms': ['walrus'], 'id': 1157, 'def': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'name': 'walrus'}, {'frequency': 'r', 'synset': 'wardrobe.n.01', 'synonyms': ['wardrobe'], 'id': 1158, 'def': 'a tall piece of furniture that provides storage space for clothes; has a door and rails or hooks for hanging clothes', 'name': 'wardrobe'}, {'frequency': 'r', 'synset': 'washbasin.n.01', 'synonyms': ['washbasin', 'basin_(for_washing)', 'washbowl', 'washstand', 'handbasin'], 'id': 1159, 'def': 'a bathroom sink that is permanently installed and connected to a water supply and drainpipe; where you can wash your hands and face', 'name': 'washbasin'}, {'frequency': 'c', 'synset': 'washer.n.03', 'synonyms': ['automatic_washer', 'washing_machine'], 'id': 1160, 'def': 'a home appliance for washing clothes and linens automatically', 'name': 'automatic_washer'}, {'frequency': 'f', 'synset': 'watch.n.01', 'synonyms': ['watch', 'wristwatch'], 'id': 1161, 'def': 'a small, portable timepiece', 'name': 'watch'}, {'frequency': 'f', 'synset': 'water_bottle.n.01', 'synonyms': ['water_bottle'], 'id': 1162, 'def': 'a bottle for holding water', 'name': 'water_bottle'}, {'frequency': 'c', 'synset': 'water_cooler.n.01', 'synonyms': ['water_cooler'], 'id': 1163, 'def': 'a device for cooling and dispensing drinking water', 'name': 'water_cooler'}, {'frequency': 'c', 'synset': 'water_faucet.n.01', 'synonyms': ['water_faucet', 'water_tap', 'tap_(water_faucet)'], 'id': 1164, 'def': 'a faucet for drawing water from a pipe or cask', 'name': 'water_faucet'}, {'frequency': 'r', 'synset': 'water_heater.n.01', 'synonyms': ['water_heater', 'hot-water_heater'], 'id': 1165, 'def': 'a heater and storage tank to supply heated water', 'name': 'water_heater'}, {'frequency': 'c', 'synset': 'water_jug.n.01', 'synonyms': ['water_jug'], 'id': 1166, 'def': 'a jug that holds water', 'name': 'water_jug'}, {'frequency': 'r', 'synset': 'water_pistol.n.01', 'synonyms': ['water_gun', 'squirt_gun'], 'id': 1167, 'def': 'plaything consisting of a toy pistol that squirts water', 'name': 'water_gun'}, {'frequency': 'c', 'synset': 'water_scooter.n.01', 'synonyms': ['water_scooter', 'sea_scooter', 'jet_ski'], 'id': 1168, 'def': 'a motorboat resembling a motor scooter (NOT A SURFBOARD OR WATER SKI)', 'name': 'water_scooter'}, {'frequency': 'c', 'synset': 'water_ski.n.01', 'synonyms': ['water_ski'], 'id': 1169, 'def': 'broad ski for skimming over water towed by a speedboat (DO NOT MARK WATER)', 'name': 'water_ski'}, {'frequency': 'c', 'synset': 'water_tower.n.01', 'synonyms': ['water_tower'], 'id': 1170, 'def': 'a large reservoir for water', 'name': 'water_tower'}, {'frequency': 'c', 'synset': 'watering_can.n.01', 'synonyms': ['watering_can'], 'id': 1171, 'def': 'a container with a handle and a spout with a perforated nozzle; used to sprinkle water over plants', 'name': 'watering_can'}, {'frequency': 'f', 'synset': 'watermelon.n.02', 'synonyms': ['watermelon'], 'id': 1172, 'def': 'large oblong or roundish melon with a hard green rind and sweet watery red or occasionally yellowish pulp', 'name': 'watermelon'}, {'frequency': 'f', 'synset': 'weathervane.n.01', 'synonyms': ['weathervane', 'vane_(weathervane)', 'wind_vane'], 'id': 1173, 'def': 'mechanical device attached to an elevated structure; rotates freely to show the direction of the wind', 'name': 'weathervane'}, {'frequency': 'c', 'synset': 'webcam.n.01', 'synonyms': ['webcam'], 'id': 1174, 'def': 'a digital camera designed to take digital photographs and transmit them over the internet', 'name': 'webcam'}, {'frequency': 'c', 'synset': 'wedding_cake.n.01', 'synonyms': ['wedding_cake', 'bridecake'], 'id': 1175, 'def': 'a rich cake with two or more tiers and covered with frosting and decorations; served at a wedding reception', 'name': 'wedding_cake'}, {'frequency': 'c', 'synset': 'wedding_ring.n.01', 'synonyms': ['wedding_ring', 'wedding_band'], 'id': 1176, 'def': 'a ring given to the bride and/or groom at the wedding', 'name': 'wedding_ring'}, {'frequency': 'f', 'synset': 'wet_suit.n.01', 'synonyms': ['wet_suit'], 'id': 1177, 'def': 'a close-fitting garment made of a permeable material; worn in cold water to retain body heat', 'name': 'wet_suit'}, {'frequency': 'f', 'synset': 'wheel.n.01', 'synonyms': ['wheel'], 'id': 1178, 'def': 'a circular frame with spokes (or a solid disc) that can rotate on a shaft or axle', 'name': 'wheel'}, {'frequency': 'c', 'synset': 'wheelchair.n.01', 'synonyms': ['wheelchair'], 'id': 1179, 'def': 'a movable chair mounted on large wheels', 'name': 'wheelchair'}, {'frequency': 'c', 'synset': 'whipped_cream.n.01', 'synonyms': ['whipped_cream'], 'id': 1180, 'def': 'cream that has been beaten until light and fluffy', 'name': 'whipped_cream'}, {'frequency': 'c', 'synset': 'whistle.n.03', 'synonyms': ['whistle'], 'id': 1181, 'def': 'a small wind instrument that produces a whistling sound by blowing into it', 'name': 'whistle'}, {'frequency': 'c', 'synset': 'wig.n.01', 'synonyms': ['wig'], 'id': 1182, 'def': 'hairpiece covering the head and made of real or synthetic hair', 'name': 'wig'}, {'frequency': 'c', 'synset': 'wind_chime.n.01', 'synonyms': ['wind_chime'], 'id': 1183, 'def': 'a decorative arrangement of pieces of metal or glass or pottery that hang together loosely so the wind can cause them to tinkle', 'name': 'wind_chime'}, {'frequency': 'c', 'synset': 'windmill.n.01', 'synonyms': ['windmill'], 'id': 1184, 'def': 'A mill or turbine that is powered by wind', 'name': 'windmill'}, {'frequency': 'c', 'synset': 'window_box.n.01', 'synonyms': ['window_box_(for_plants)'], 'id': 1185, 'def': 'a container for growing plants on a windowsill', 'name': 'window_box_(for_plants)'}, {'frequency': 'f', 'synset': 'windshield_wiper.n.01', 'synonyms': ['windshield_wiper', 'windscreen_wiper', 'wiper_(for_windshield/screen)'], 'id': 1186, 'def': 'a mechanical device that cleans the windshield', 'name': 'windshield_wiper'}, {'frequency': 'c', 'synset': 'windsock.n.01', 'synonyms': ['windsock', 'air_sock', 'air-sleeve', 'wind_sleeve', 'wind_cone'], 'id': 1187, 'def': 'a truncated cloth cone mounted on a mast/pole; shows wind direction', 'name': 'windsock'}, {'frequency': 'f', 'synset': 'wine_bottle.n.01', 'synonyms': ['wine_bottle'], 'id': 1188, 'def': 'a bottle for holding wine', 'name': 'wine_bottle'}, {'frequency': 'c', 'synset': 'wine_bucket.n.01', 'synonyms': ['wine_bucket', 'wine_cooler'], 'id': 1189, 'def': 'a bucket of ice used to chill a bottle of wine', 'name': 'wine_bucket'}, {'frequency': 'f', 'synset': 'wineglass.n.01', 'synonyms': ['wineglass'], 'id': 1190, 'def': 'a glass that has a stem and in which wine is served', 'name': 'wineglass'}, {'frequency': 'f', 'synset': 'winker.n.02', 'synonyms': ['blinder_(for_horses)'], 'id': 1191, 'def': 'blinds that prevent a horse from seeing something on either side', 'name': 'blinder_(for_horses)'}, {'frequency': 'c', 'synset': 'wok.n.01', 'synonyms': ['wok'], 'id': 1192, 'def': 'pan with a convex bottom; used for frying in Chinese cooking', 'name': 'wok'}, {'frequency': 'r', 'synset': 'wolf.n.01', 'synonyms': ['wolf'], 'id': 1193, 'def': 'a wild carnivorous mammal of the dog family, living and hunting in packs', 'name': 'wolf'}, {'frequency': 'c', 'synset': 'wooden_spoon.n.02', 'synonyms': ['wooden_spoon'], 'id': 1194, 'def': 'a spoon made of wood', 'name': 'wooden_spoon'}, {'frequency': 'c', 'synset': 'wreath.n.01', 'synonyms': ['wreath'], 'id': 1195, 'def': 'an arrangement of flowers, leaves, or stems fastened in a ring', 'name': 'wreath'}, {'frequency': 'c', 'synset': 'wrench.n.03', 'synonyms': ['wrench', 'spanner'], 'id': 1196, 'def': 'a hand tool that is used to hold or twist a nut or bolt', 'name': 'wrench'}, {'frequency': 'f', 'synset': 'wristband.n.01', 'synonyms': ['wristband'], 'id': 1197, 'def': 'band consisting of a part of a sleeve that covers the wrist', 'name': 'wristband'}, {'frequency': 'f', 'synset': 'wristlet.n.01', 'synonyms': ['wristlet', 'wrist_band'], 'id': 1198, 'def': 'a band or bracelet worn around the wrist', 'name': 'wristlet'}, {'frequency': 'c', 'synset': 'yacht.n.01', 'synonyms': ['yacht'], 'id': 1199, 'def': 'an expensive vessel propelled by sail or power and used for cruising or racing', 'name': 'yacht'}, {'frequency': 'c', 'synset': 'yogurt.n.01', 'synonyms': ['yogurt', 'yoghurt', 'yoghourt'], 'id': 1200, 'def': 'a custard-like food made from curdled milk', 'name': 'yogurt'}, {'frequency': 'c', 'synset': 'yoke.n.07', 'synonyms': ['yoke_(animal_equipment)'], 'id': 1201, 'def': 'gear joining two animals at the neck; NOT egg yolk', 'name': 'yoke_(animal_equipment)'}, {'frequency': 'f', 'synset': 'zebra.n.01', 'synonyms': ['zebra'], 'id': 1202, 'def': 'any of several fleet black-and-white striped African equines', 'name': 'zebra'}, {'frequency': 'c', 'synset': 'zucchini.n.02', 'synonyms': ['zucchini', 'courgette'], 'id': 1203, 'def': 'small cucumber-shaped vegetable marrow; typically dark green', 'name': 'zucchini'}] # noqa +# fmt: on diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/pascal_voc.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/pascal_voc.py new file mode 100644 index 0000000000000000000000000000000000000000..90b00377f856664401c07c141405c716f13d6e0e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/pascal_voc.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import os +import xml.etree.ElementTree as ET +from typing import List, Tuple, Union +from fvcore.common.file_io import PathManager + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import BoxMode + +__all__ = ["load_voc_instances", "register_pascal_voc"] + + +# fmt: off +CLASS_NAMES = ( + "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", + "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", + "pottedplant", "sheep", "sofa", "train", "tvmonitor" +) +# fmt: on + + +def load_voc_instances(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]): + """ + Load Pascal VOC detection annotations to Detectron2 format. + + Args: + dirname: Contain "Annotations", "ImageSets", "JPEGImages" + split (str): one of "train", "test", "val", "trainval" + class_names: list or tuple of class names + """ + with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f: + fileids = np.loadtxt(f, dtype=np.str) + + # Needs to read many small annotation files. Makes sense at local + annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/")) + dicts = [] + for fileid in fileids: + anno_file = os.path.join(annotation_dirname, fileid + ".xml") + jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg") + + with PathManager.open(anno_file) as f: + tree = ET.parse(f) + + r = { + "file_name": jpeg_file, + "image_id": fileid, + "height": int(tree.findall("./size/height")[0].text), + "width": int(tree.findall("./size/width")[0].text), + } + instances = [] + + for obj in tree.findall("object"): + cls = obj.find("name").text + # We include "difficult" samples in training. + # Based on limited experiments, they don't hurt accuracy. + # difficult = int(obj.find("difficult").text) + # if difficult == 1: + # continue + bbox = obj.find("bndbox") + bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]] + # Original annotations are integers in the range [1, W or H] + # Assuming they mean 1-based pixel indices (inclusive), + # a box with annotation (xmin=1, xmax=W) covers the whole image. + # In coordinate space this is represented by (xmin=0, xmax=W) + bbox[0] -= 1.0 + bbox[1] -= 1.0 + instances.append( + {"category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS} + ) + r["annotations"] = instances + dicts.append(r) + return dicts + + +def register_pascal_voc(name, dirname, split, year, class_names=CLASS_NAMES): + DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split, class_names)) + MetadataCatalog.get(name).set( + thing_classes=list(class_names), dirname=dirname, year=year, split=split + ) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/register_coco.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/register_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..f901c2e621061a7a7ca6940435b259dffd6d5da6 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/datasets/register_coco.py @@ -0,0 +1,142 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import os + +from detectron2.data import DatasetCatalog, MetadataCatalog + +from .coco import load_coco_json, load_sem_seg + +""" +This file contains functions to register a COCO-format dataset to the DatasetCatalog. +""" + +__all__ = ["register_coco_instances", "register_coco_panoptic_separated"] + + +def register_coco_instances(name, metadata, json_file, image_root): + """ + Register a dataset in COCO's json annotation format for + instance detection, instance segmentation and keypoint detection. + (i.e., Type 1 and 2 in http://cocodataset.org/#format-data. + `instances*.json` and `person_keypoints*.json` in the dataset). + + This is an example of how to register a new dataset. + You can do something similar to this function, to register new datasets. + + Args: + name (str): the name that identifies a dataset, e.g. "coco_2014_train". + metadata (dict): extra metadata associated with this dataset. You can + leave it as an empty dict. + json_file (str): path to the json instance annotation file. + image_root (str or path-like): directory which contains all the images. + """ + assert isinstance(name, str), name + assert isinstance(json_file, (str, os.PathLike)), json_file + assert isinstance(image_root, (str, os.PathLike)), image_root + # 1. register a function which returns dicts + DatasetCatalog.register(name, lambda: load_coco_json(json_file, image_root, name)) + + # 2. Optionally, add metadata about this dataset, + # since they might be useful in evaluation, visualization or logging + MetadataCatalog.get(name).set( + json_file=json_file, image_root=image_root, evaluator_type="coco", **metadata + ) + + +def register_coco_panoptic_separated( + name, metadata, image_root, panoptic_root, panoptic_json, sem_seg_root, instances_json +): + """ + Register a COCO panoptic segmentation dataset named `name`. + The annotations in this registered dataset will contain both instance annotations and + semantic annotations, each with its own contiguous ids. Hence it's called "separated". + + It follows the setting used by the PanopticFPN paper: + + 1. The instance annotations directly come from polygons in the COCO + instances annotation task, rather than from the masks in the COCO panoptic annotations. + + The two format have small differences: + Polygons in the instance annotations may have overlaps. + The mask annotations are produced by labeling the overlapped polygons + with depth ordering. + + 2. The semantic annotations are converted from panoptic annotations, where + all "things" are assigned a semantic id of 0. + All semantic categories will therefore have ids in contiguous + range [1, #stuff_categories]. + + This function will also register a pure semantic segmentation dataset + named ``name + '_stuffonly'``. + + Args: + name (str): the name that identifies a dataset, + e.g. "coco_2017_train_panoptic" + metadata (dict): extra metadata associated with this dataset. + image_root (str): directory which contains all the images + panoptic_root (str): directory which contains panoptic annotation images + panoptic_json (str): path to the json panoptic annotation file + sem_seg_root (str): directory which contains all the ground truth segmentation annotations. + instances_json (str): path to the json instance annotation file + """ + panoptic_name = name + "_separated" + DatasetCatalog.register( + panoptic_name, + lambda: merge_to_panoptic( + load_coco_json(instances_json, image_root, panoptic_name), + load_sem_seg(sem_seg_root, image_root), + ), + ) + MetadataCatalog.get(panoptic_name).set( + panoptic_root=panoptic_root, + image_root=image_root, + panoptic_json=panoptic_json, + sem_seg_root=sem_seg_root, + json_file=instances_json, # TODO rename + evaluator_type="coco_panoptic_seg", + **metadata + ) + + semantic_name = name + "_stuffonly" + DatasetCatalog.register(semantic_name, lambda: load_sem_seg(sem_seg_root, image_root)) + MetadataCatalog.get(semantic_name).set( + sem_seg_root=sem_seg_root, image_root=image_root, evaluator_type="sem_seg", **metadata + ) + + +def merge_to_panoptic(detection_dicts, sem_seg_dicts): + """ + Create dataset dicts for panoptic segmentation, by + merging two dicts using "file_name" field to match their entries. + + Args: + detection_dicts (list[dict]): lists of dicts for object detection or instance segmentation. + sem_seg_dicts (list[dict]): lists of dicts for semantic segmentation. + + Returns: + list[dict] (one per input image): Each dict contains all (key, value) pairs from dicts in + both detection_dicts and sem_seg_dicts that correspond to the same image. + The function assumes that the same key in different dicts has the same value. + """ + results = [] + sem_seg_file_to_entry = {x["file_name"]: x for x in sem_seg_dicts} + assert len(sem_seg_file_to_entry) > 0 + + for det_dict in detection_dicts: + dic = copy.copy(det_dict) + dic.update(sem_seg_file_to_entry[dic["file_name"]]) + results.append(dic) + return results diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/detection_utils.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/detection_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..216f8d28d1c5c4a45655e26e5b388a67cca917c8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/detection_utils.py @@ -0,0 +1,603 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Common data processing utilities that are used in a +typical object detection data pipeline. +""" +import logging +import numpy as np +import pycocotools.mask as mask_util +import torch +from fvcore.common.file_io import PathManager +from PIL import Image + +from detectron2.structures import ( + BitMasks, + Boxes, + BoxMode, + Instances, + Keypoints, + PolygonMasks, + RotatedBoxes, + polygons_to_bitmask, +) + +from . import transforms as T +from .catalog import MetadataCatalog + +__all__ = [ + "SizeMismatchError", + "convert_image_to_rgb", + "check_image_size", + "transform_proposals", + "transform_instance_annotations", + "annotations_to_instances", + "annotations_to_instances_rotated", + "build_augmentation", + "build_transform_gen", + "create_keypoint_hflip_indices", + "filter_empty_instances", + "read_image", +] + + +class SizeMismatchError(ValueError): + """ + When loaded image has difference width/height compared with annotation. + """ + + +# https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601 +_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]] +_M_YUV2RGB = [[1.0, 0.0, 1.13983], [1.0, -0.39465, -0.58060], [1.0, 2.03211, 0.0]] + +# https://www.exiv2.org/tags.html +_EXIF_ORIENT = 274 # exif 'Orientation' tag + + +def convert_PIL_to_numpy(image, format): + """ + Convert PIL image to numpy array of target format. + + Args: + image (PIL.Image): a PIL image + format (str): the format of output image + + Returns: + (np.ndarray): also see `read_image` + """ + if format is not None: + # PIL only supports RGB, so convert to RGB and flip channels over below + conversion_format = format + if format in ["BGR", "YUV-BT.601"]: + conversion_format = "RGB" + image = image.convert(conversion_format) + image = np.asarray(image) + # PIL squeezes out the channel dimension for "L", so make it HWC + if format == "L": + image = np.expand_dims(image, -1) + + # handle formats not supported by PIL + elif format == "BGR": + # flip channels if needed + image = image[:, :, ::-1] + elif format == "YUV-BT.601": + image = image / 255.0 + image = np.dot(image, np.array(_M_RGB2YUV).T) + + return image + + +def convert_image_to_rgb(image, format): + """ + Convert an image from given format to RGB. + + Args: + image (np.ndarray or Tensor): an HWC image + format (str): the format of input image, also see `read_image` + + Returns: + (np.ndarray): (H,W,3) RGB image in 0-255 range, can be either float or uint8 + """ + if isinstance(image, torch.Tensor): + image = image.cpu().numpy() + if format == "BGR": + image = image[:, :, [2, 1, 0]] + elif format == "YUV-BT.601": + image = np.dot(image, np.array(_M_YUV2RGB).T) + image = image * 255.0 + else: + if format == "L": + image = image[:, :, 0] + image = image.astype(np.uint8) + image = np.asarray(Image.fromarray(image, mode=format).convert("RGB")) + return image + + +def _apply_exif_orientation(image): + """ + Applies the exif orientation correctly. + + This code exists per the bug: + https://github.com/python-pillow/Pillow/issues/3973 + with the function `ImageOps.exif_transpose`. The Pillow source raises errors with + various methods, especially `tobytes` + + Function based on: + https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59 + https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527 + + Args: + image (PIL.Image): a PIL image + + Returns: + (PIL.Image): the PIL image with exif orientation applied, if applicable + """ + if not hasattr(image, "getexif"): + return image + + exif = image.getexif() + + if exif is None: + return image + + orientation = exif.get(_EXIF_ORIENT) + + method = { + 2: Image.FLIP_LEFT_RIGHT, + 3: Image.ROTATE_180, + 4: Image.FLIP_TOP_BOTTOM, + 5: Image.TRANSPOSE, + 6: Image.ROTATE_270, + 7: Image.TRANSVERSE, + 8: Image.ROTATE_90, + }.get(orientation) + + if method is not None: + return image.transpose(method) + return image + + +def read_image(file_name, format=None): + """ + Read an image into the given format. + Will apply rotation and flipping if the image has such exif information. + + Args: + file_name (str): image file path + format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601". + + Returns: + image (np.ndarray): an HWC image in the given format, which is 0-255, uint8 for + supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601. + """ + with PathManager.open(file_name, "rb") as f: + image = Image.open(f) + + # work around this bug: https://github.com/python-pillow/Pillow/issues/3973 + image = _apply_exif_orientation(image) + + return convert_PIL_to_numpy(image, format) + + +def check_image_size(dataset_dict, image): + """ + Raise an error if the image does not match the size specified in the dict. + """ + if "width" in dataset_dict or "height" in dataset_dict: + image_wh = (image.shape[1], image.shape[0]) + expected_wh = (dataset_dict["width"], dataset_dict["height"]) + if not image_wh == expected_wh: + raise SizeMismatchError( + "Mismatched (W,H){}, got {}, expect {}".format( + " for image " + dataset_dict["file_name"] + if "file_name" in dataset_dict + else "", + image_wh, + expected_wh, + ) + ) + + # To ensure bbox always remap to original image size + if "width" not in dataset_dict: + dataset_dict["width"] = image.shape[1] + if "height" not in dataset_dict: + dataset_dict["height"] = image.shape[0] + + +def transform_proposals(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0): + """ + Apply transformations to the proposals in dataset_dict, if any. + + Args: + dataset_dict (dict): a dict read from the dataset, possibly + contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode" + image_shape (tuple): height, width + transforms (TransformList): + proposal_topk (int): only keep top-K scoring proposals + min_box_size (int): proposals with either side smaller than this + threshold are removed + + The input dict is modified in-place, with abovementioned keys removed. A new + key "proposals" will be added. Its value is an `Instances` + object which contains the transformed proposals in its field + "proposal_boxes" and "objectness_logits". + """ + if "proposal_boxes" in dataset_dict: + # Transform proposal boxes + boxes = transforms.apply_box( + BoxMode.convert( + dataset_dict.pop("proposal_boxes"), + dataset_dict.pop("proposal_bbox_mode"), + BoxMode.XYXY_ABS, + ) + ) + boxes = Boxes(boxes) + objectness_logits = torch.as_tensor( + dataset_dict.pop("proposal_objectness_logits").astype("float32") + ) + + boxes.clip(image_shape) + keep = boxes.nonempty(threshold=min_box_size) + boxes = boxes[keep] + objectness_logits = objectness_logits[keep] + + proposals = Instances(image_shape) + proposals.proposal_boxes = boxes[:proposal_topk] + proposals.objectness_logits = objectness_logits[:proposal_topk] + dataset_dict["proposals"] = proposals + + +def transform_instance_annotations( + annotation, transforms, image_size, *, keypoint_hflip_indices=None +): + """ + Apply transforms to box, segmentation and keypoints annotations of a single instance. + + It will use `transforms.apply_box` for the box, and + `transforms.apply_coords` for segmentation polygons & keypoints. + If you need anything more specially designed for each data structure, + you'll need to implement your own version of this function or the transforms. + + Args: + annotation (dict): dict of instance annotations for a single instance. + It will be modified in-place. + transforms (TransformList or list[Transform]): + image_size (tuple): the height, width of the transformed image + keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. + + Returns: + dict: + the same input dict with fields "bbox", "segmentation", "keypoints" + transformed according to `transforms`. + The "bbox_mode" field will be set to XYXY_ABS. + """ + if isinstance(transforms, (tuple, list)): + transforms = T.TransformList(transforms) + # bbox is 1d (per-instance bounding box) + bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) + # clip transformed bbox to image size + bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0) + annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1]) + annotation["bbox_mode"] = BoxMode.XYXY_ABS + + if "segmentation" in annotation: + # each instance contains 1 or more polygons + segm = annotation["segmentation"] + if isinstance(segm, list): + # polygons + polygons = [np.asarray(p).reshape(-1, 2) for p in segm] + annotation["segmentation"] = [ + p.reshape(-1) for p in transforms.apply_polygons(polygons) + ] + elif isinstance(segm, dict): + # RLE + mask = mask_util.decode(segm) + mask = transforms.apply_segmentation(mask) + assert tuple(mask.shape[:2]) == image_size + annotation["segmentation"] = mask + else: + raise ValueError( + "Cannot transform segmentation of type '{}'!" + "Supported types are: polygons as list[list[float] or ndarray]," + " COCO-style RLE as a dict.".format(type(segm)) + ) + + if "keypoints" in annotation: + keypoints = transform_keypoint_annotations( + annotation["keypoints"], transforms, image_size, keypoint_hflip_indices + ) + annotation["keypoints"] = keypoints + + return annotation + + +def transform_keypoint_annotations(keypoints, transforms, image_size, keypoint_hflip_indices=None): + """ + Transform keypoint annotations of an image. + If a keypoint is transformed out of image boundary, it will be marked "unlabeled" (visibility=0) + + Args: + keypoints (list[float]): Nx3 float in Detectron2's Dataset format. + Each point is represented by (x, y, visibility). + transforms (TransformList): + image_size (tuple): the height, width of the transformed image + keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. + When `transforms` includes horizontal flip, will use the index + mapping to flip keypoints. + """ + # (N*3,) -> (N, 3) + keypoints = np.asarray(keypoints, dtype="float64").reshape(-1, 3) + keypoints_xy = transforms.apply_coords(keypoints[:, :2]) + + # Set all out-of-boundary points to "unlabeled" + inside = (keypoints_xy >= np.array([0, 0])) & (keypoints_xy <= np.array(image_size[::-1])) + inside = inside.all(axis=1) + keypoints[:, :2] = keypoints_xy + keypoints[:, 2][~inside] = 0 + + # This assumes that HorizFlipTransform is the only one that does flip + do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1 + + # Alternative way: check if probe points was horizontally flipped. + # probe = np.asarray([[0.0, 0.0], [image_width, 0.0]]) + # probe_aug = transforms.apply_coords(probe.copy()) + # do_hflip = np.sign(probe[1][0] - probe[0][0]) != np.sign(probe_aug[1][0] - probe_aug[0][0]) # noqa + + # If flipped, swap each keypoint with its opposite-handed equivalent + if do_hflip: + assert keypoint_hflip_indices is not None + keypoints = keypoints[keypoint_hflip_indices, :] + + # Maintain COCO convention that if visibility == 0 (unlabeled), then x, y = 0 + keypoints[keypoints[:, 2] == 0] = 0 + return keypoints + + +def annotations_to_instances(annos, image_size, mask_format="polygon"): + """ + Create an :class:`Instances` object used by the models, + from instance annotations in the dataset dict. + + Args: + annos (list[dict]): a list of instance annotations in one image, each + element for one instance. + image_size (tuple): height, width + + Returns: + Instances: + It will contain fields "gt_boxes", "gt_classes", + "gt_masks", "gt_keypoints", if they can be obtained from `annos`. + This is the format that builtin models expect. + """ + boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos] + target = Instances(image_size) + target.gt_boxes = Boxes(boxes) + + classes = [obj["category_id"] for obj in annos] + classes = torch.tensor(classes, dtype=torch.int64) + target.gt_classes = classes + + if len(annos) and "segmentation" in annos[0]: + segms = [obj["segmentation"] for obj in annos] + if mask_format == "polygon": + # TODO check type and provide better error + masks = PolygonMasks(segms) + else: + assert mask_format == "bitmask", mask_format + masks = [] + for segm in segms: + if isinstance(segm, list): + # polygon + masks.append(polygons_to_bitmask(segm, *image_size)) + elif isinstance(segm, dict): + # COCO RLE + masks.append(mask_util.decode(segm)) + elif isinstance(segm, np.ndarray): + assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( + segm.ndim + ) + # mask array + masks.append(segm) + else: + raise ValueError( + "Cannot convert segmentation of type '{}' to BitMasks!" + "Supported types are: polygons as list[list[float] or ndarray]," + " COCO-style RLE as a dict, or a full-image segmentation mask " + "as a 2D ndarray.".format(type(segm)) + ) + # torch.from_numpy does not support array with negative stride. + masks = BitMasks( + torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks]) + ) + target.gt_masks = masks + + if len(annos) and "keypoints" in annos[0]: + kpts = [obj.get("keypoints", []) for obj in annos] + target.gt_keypoints = Keypoints(kpts) + + return target + + +def annotations_to_instances_rotated(annos, image_size): + """ + Create an :class:`Instances` object used by the models, + from instance annotations in the dataset dict. + Compared to `annotations_to_instances`, this function is for rotated boxes only + + Args: + annos (list[dict]): a list of instance annotations in one image, each + element for one instance. + image_size (tuple): height, width + + Returns: + Instances: + Containing fields "gt_boxes", "gt_classes", + if they can be obtained from `annos`. + This is the format that builtin models expect. + """ + boxes = [obj["bbox"] for obj in annos] + target = Instances(image_size) + boxes = target.gt_boxes = RotatedBoxes(boxes) + boxes.clip(image_size) + + classes = [obj["category_id"] for obj in annos] + classes = torch.tensor(classes, dtype=torch.int64) + target.gt_classes = classes + + return target + + +def filter_empty_instances(instances, by_box=True, by_mask=True, box_threshold=1e-5): + """ + Filter out empty instances in an `Instances` object. + + Args: + instances (Instances): + by_box (bool): whether to filter out instances with empty boxes + by_mask (bool): whether to filter out instances with empty masks + box_threshold (float): minimum width and height to be considered non-empty + + Returns: + Instances: the filtered instances. + """ + assert by_box or by_mask + r = [] + if by_box: + r.append(instances.gt_boxes.nonempty(threshold=box_threshold)) + if instances.has("gt_masks") and by_mask: + r.append(instances.gt_masks.nonempty()) + + # TODO: can also filter visible keypoints + + if not r: + return instances + m = r[0] + for x in r[1:]: + m = m & x + return instances[m] + + +def create_keypoint_hflip_indices(dataset_names): + """ + Args: + dataset_names (list[str]): list of dataset names + Returns: + ndarray[int]: a vector of size=#keypoints, storing the + horizontally-flipped keypoint indices. + """ + + check_metadata_consistency("keypoint_names", dataset_names) + check_metadata_consistency("keypoint_flip_map", dataset_names) + + meta = MetadataCatalog.get(dataset_names[0]) + names = meta.keypoint_names + # TODO flip -> hflip + flip_map = dict(meta.keypoint_flip_map) + flip_map.update({v: k for k, v in flip_map.items()}) + flipped_names = [i if i not in flip_map else flip_map[i] for i in names] + flip_indices = [names.index(i) for i in flipped_names] + return np.asarray(flip_indices, dtype=np.int32) + + +def gen_crop_transform_with_instance(crop_size, image_size, instance): + """ + Generate a CropTransform so that the cropping region contains + the center of the given instance. + + Args: + crop_size (tuple): h, w in pixels + image_size (tuple): h, w + instance (dict): an annotation dict of one instance, in Detectron2's + dataset format. + """ + crop_size = np.asarray(crop_size, dtype=np.int32) + bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) + center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5 + assert ( + image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1] + ), "The annotation bounding box is outside of the image!" + assert ( + image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1] + ), "Crop size is larger than image size!" + + min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0) + max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0) + max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32)) + + y0 = np.random.randint(min_yx[0], max_yx[0] + 1) + x0 = np.random.randint(min_yx[1], max_yx[1] + 1) + return T.CropTransform(x0, y0, crop_size[1], crop_size[0]) + + +def check_metadata_consistency(key, dataset_names): + """ + Check that the datasets have consistent metadata. + + Args: + key (str): a metadata key + dataset_names (list[str]): a list of dataset names + + Raises: + AttributeError: if the key does not exist in the metadata + ValueError: if the given datasets do not have the same metadata values defined by key + """ + if len(dataset_names) == 0: + return + logger = logging.getLogger(__name__) + entries_per_dataset = [getattr(MetadataCatalog.get(d), key) for d in dataset_names] + for idx, entry in enumerate(entries_per_dataset): + if entry != entries_per_dataset[0]: + logger.error( + "Metadata '{}' for dataset '{}' is '{}'".format(key, dataset_names[idx], str(entry)) + ) + logger.error( + "Metadata '{}' for dataset '{}' is '{}'".format( + key, dataset_names[0], str(entries_per_dataset[0]) + ) + ) + raise ValueError("Datasets have different metadata '{}'!".format(key)) + + +def build_augmentation(cfg, is_train): + """ + Create a list of default :class:`Augmentation` from config. + Now it includes resizing and flipping. + + Returns: + list[Augmentation] + """ + if is_train: + min_size = cfg.INPUT.MIN_SIZE_TRAIN + max_size = cfg.INPUT.MAX_SIZE_TRAIN + sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING + else: + min_size = cfg.INPUT.MIN_SIZE_TEST + max_size = cfg.INPUT.MAX_SIZE_TEST + sample_style = "choice" + augmentation = [T.ResizeShortestEdge(min_size, + max_size, + sample_style)] + if is_train: + augmentation.append(T.RandomFlip()) + return augmentation + + +build_transform_gen = build_augmentation +""" +Alias for backward-compatibility. +""" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/samplers/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/samplers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8d32e80bf926fa7fc7e62d868029be09e18093da --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/samplers/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler +from .grouped_batch_sampler import GroupedBatchSampler + +__all__ = [ + "GroupedBatchSampler", + "TrainingSampler", + "InferenceSampler", + "RepeatFactorTrainingSampler", +] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/samplers/distributed_sampler.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/samplers/distributed_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..0269be722c7d3e85a8b3880189e7781aa575e909 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/samplers/distributed_sampler.py @@ -0,0 +1,213 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +import math +from collections import defaultdict +from typing import Optional +import torch +from torch.utils.data.sampler import Sampler + +from detectron2.utils import comm + + +class TrainingSampler(Sampler): + """ + In training, we only care about the "infinite stream" of training data. + So this sampler produces an infinite stream of indices and + all workers cooperate to correctly shuffle the indices and sample different indices. + + The samplers in each worker effectively produces `indices[worker_id::num_workers]` + where `indices` is an infinite stream of indices consisting of + `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True) + or `range(size) + range(size) + ...` (if shuffle is False) + """ + + def __init__(self, size: int, shuffle: bool = True, seed: Optional[int] = None): + """ + Args: + size (int): the total number of data of the underlying dataset to sample from + shuffle (bool): whether to shuffle the indices or not + seed (int): the initial seed of the shuffle. Must be the same + across all workers. If None, will use a random seed shared + among workers (require synchronization among all workers). + """ + self._size = size + assert size > 0 + self._shuffle = shuffle + if seed is None: + seed = comm.shared_random_seed() + self._seed = int(seed) + + self._rank = comm.get_rank() + self._world_size = comm.get_world_size() + + def __iter__(self): + start = self._rank + yield from itertools.islice(self._infinite_indices(), start, None, self._world_size) + + def _infinite_indices(self): + g = torch.Generator() + g.manual_seed(self._seed) + while True: + if self._shuffle: + yield from torch.randperm(self._size, generator=g) + else: + yield from torch.arange(self._size) + + +class RepeatFactorTrainingSampler(Sampler): + """ + Similar to TrainingSampler, but a sample may appear more times than others based + on its "repeat factor". This is suitable for training on class imbalanced datasets like LVIS. + """ + + def __init__(self, repeat_factors, *, shuffle=True, seed=None): + """ + Args: + repeat_factors (Tensor): a float vector, the repeat factor for each indice. When it's + full of ones, it is equivalent to ``TrainingSampler(len(repeat_factors), ...)``. + shuffle (bool): whether to shuffle the indices or not + seed (int): the initial seed of the shuffle. Must be the same + across all workers. If None, will use a random seed shared + among workers (require synchronization among all workers). + """ + self._shuffle = shuffle + if seed is None: + seed = comm.shared_random_seed() + self._seed = int(seed) + + self._rank = comm.get_rank() + self._world_size = comm.get_world_size() + + # Split into whole number (_int_part) and fractional (_frac_part) parts. + self._int_part = torch.trunc(repeat_factors) + self._frac_part = repeat_factors - self._int_part + + @staticmethod + def repeat_factors_from_category_frequency(dataset_dicts, repeat_thresh): + """ + Compute (fractional) per-image repeat factors based on category frequency. + The repeat factor for an image is a function of the frequency of the rarest + category labeled in that image. The "frequency of category c" in [0, 1] is defined + as the fraction of images in the training set (without repeats) in which category c + appears. + See :paper:`lvis` (>= v2) Appendix B.2. + + Args: + dataset_dicts (list[dict]): annotations in Detectron2 dataset format. + repeat_thresh (float): frequency threshold below which data is repeated. + If the frequency is half of `repeat_thresh`, the image will be + repeated twice. + + Returns: + torch.Tensor: the i-th element is the repeat factor for the dataset image + at index i. + """ + # 1. For each category c, compute the fraction of images that contain it: f(c) + category_freq = defaultdict(int) + for dataset_dict in dataset_dicts: # For each image (without repeats) + cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]} + for cat_id in cat_ids: + category_freq[cat_id] += 1 + num_images = len(dataset_dicts) + for k, v in category_freq.items(): + category_freq[k] = v / num_images + + # 2. For each category c, compute the category-level repeat factor: + # r(c) = max(1, sqrt(t / f(c))) + category_rep = { + cat_id: max(1.0, math.sqrt(repeat_thresh / cat_freq)) + for cat_id, cat_freq in category_freq.items() + } + + # 3. For each image I, compute the image-level repeat factor: + # r(I) = max_{c in I} r(c) + rep_factors = [] + for dataset_dict in dataset_dicts: + cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]} + rep_factor = max({category_rep[cat_id] for cat_id in cat_ids}) + rep_factors.append(rep_factor) + + return torch.tensor(rep_factors, dtype=torch.float32) + + def _get_epoch_indices(self, generator): + """ + Create a list of dataset indices (with repeats) to use for one epoch. + + Args: + generator (torch.Generator): pseudo random number generator used for + stochastic rounding. + + Returns: + torch.Tensor: list of dataset indices to use in one epoch. Each index + is repeated based on its calculated repeat factor. + """ + # Since repeat factors are fractional, we use stochastic rounding so + # that the target repeat factor is achieved in expectation over the + # course of training + rands = torch.rand(len(self._frac_part), generator=generator) + rep_factors = self._int_part + (rands < self._frac_part).float() + # Construct a list of indices in which we repeat images as specified + indices = [] + for dataset_index, rep_factor in enumerate(rep_factors): + indices.extend([dataset_index] * int(rep_factor.item())) + return torch.tensor(indices, dtype=torch.int64) + + def __iter__(self): + start = self._rank + yield from itertools.islice(self._infinite_indices(), start, None, self._world_size) + + def _infinite_indices(self): + g = torch.Generator() + g.manual_seed(self._seed) + while True: + # Sample indices with repeats determined by stochastic rounding; each + # "epoch" may have a slightly different size due to the rounding. + indices = self._get_epoch_indices(g) + if self._shuffle: + randperm = torch.randperm(len(indices), generator=g) + yield from indices[randperm] + else: + yield from indices + + +class InferenceSampler(Sampler): + """ + Produce indices for inference. + Inference needs to run on the __exact__ set of samples, + therefore when the total number of samples is not divisible by the number of workers, + this sampler produces different number of samples on different workers. + """ + + def __init__(self, size: int): + """ + Args: + size (int): the total number of data of the underlying dataset to sample from + """ + self._size = size + assert size > 0 + self._rank = comm.get_rank() + self._world_size = comm.get_world_size() + + shard_size = (self._size - 1) // self._world_size + 1 + begin = shard_size * self._rank + end = min(shard_size * (self._rank + 1), self._size) + self._local_indices = range(begin, end) + + def __iter__(self): + yield from self._local_indices + + def __len__(self): + return len(self._local_indices) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/samplers/grouped_batch_sampler.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/samplers/grouped_batch_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..9e1897c12a1f178ffcf2c7de9d353d4f080e867d --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/samplers/grouped_batch_sampler.py @@ -0,0 +1,60 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +from torch.utils.data.sampler import BatchSampler, Sampler + + +class GroupedBatchSampler(BatchSampler): + """ + Wraps another sampler to yield a mini-batch of indices. + It enforces that the batch only contain elements from the same group. + It also tries to provide mini-batches which follows an ordering which is + as close as possible to the ordering from the original sampler. + """ + + def __init__(self, sampler, group_ids, batch_size): + """ + Args: + sampler (Sampler): Base sampler. + group_ids (list[int]): If the sampler produces indices in range [0, N), + `group_ids` must be a list of `N` ints which contains the group id of each sample. + The group ids must be a set of integers in the range [0, num_groups). + batch_size (int): Size of mini-batch. + """ + if not isinstance(sampler, Sampler): + raise ValueError( + "sampler should be an instance of " + "torch.utils.data.Sampler, but got sampler={}".format(sampler) + ) + self.sampler = sampler + self.group_ids = np.asarray(group_ids) + assert self.group_ids.ndim == 1 + self.batch_size = batch_size + groups = np.unique(self.group_ids).tolist() + + # buffer the indices of each group until batch size is reached + self.buffer_per_group = {k: [] for k in groups} + + def __iter__(self): + for idx in self.sampler: + group_id = self.group_ids[idx] + group_buffer = self.buffer_per_group[group_id] + group_buffer.append(idx) + if len(group_buffer) == self.batch_size: + yield group_buffer[:] # yield a copy of the list + del group_buffer[:] + + def __len__(self): + raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.") diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/transforms/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/transforms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e048f26298827c1f840cc758280818ea1858143c --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/transforms/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .transform import * +from fvcore.transforms.transform import * +from .augmentation import * +from .augmentation_impl import * + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/transforms/augmentation.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/transforms/augmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..b82d041e09194eda58958b552fb89dcc89a76f42 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/transforms/augmentation.py @@ -0,0 +1,338 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import numpy as np +import pprint +from abc import ABCMeta, abstractmethod +from typing import List, Optional, Tuple, Union +from fvcore.transforms.transform import Transform, TransformList + +""" +Overview of the augmentation system: + +We have a design goal that aims at allowing: + (1) Arbitrary structures of input data (e.g. list[list[boxes]], dict[str, boxes], + multiple semantic segmentations for each image, etc) and arbitrary new data types + (rotated boxes, 3D meshes, densepose, etc) + (2) A list of augmentation to be applied sequentially + +`Augmentation` defines policies to create a `Transform` object from input data by +`get_transform` method. +A `Transform` object usually describes deterministic transformation, in the sense that it can +be re-applied on associated data, e.g. the geometry of an image and its segmentation masks need +to be transformed in the same way, instead of both being randomly augmented in inconsistent ways. +(If you're sure such re-application is not needed, then determinism is not a crucial requirement.) +An augmentation policy may need to access arbitrary input data to create a `Transform`, so it +declares the needed input data by its `input_args` attribute. Users are expected to provide them +when calling its `get_transform` method. + +`Augmentation` is not able to apply transforms to data: data associated with one sample may be +much more than what `Augmentation` gets. For example, >90% of the common augmentation policies +only need an image, but the actual input samples can be much more complicated. + +`AugInput` manages all inputs needed by `Augmentation` and implements the logic +to apply a sequence of augmentation. It defines how the inputs should be modified by a `Transform`, +because inputs needed by one `Augmentation` needs to be transformed to become arguments of the +next `Augmentation` in the sequence. + +`AugInput` does not need to contain all input data, because most augmentation policies +only need very few fields (e.g., >90% only need "image"). We provide `StandardAugInput` +that only contains "images", "boxes", "sem_seg", that are enough to create transforms +for most cases. In this way, users keep the responsibility and flexibility to apply transforms +to other (potentially new) data types and structures, e.g. keypoints, proposals boxes. + +To extend the system, one can do: +1. To add a new augmentation policy that only needs to use standard inputs + ("image", "boxes", "sem_seg"), writing a subclass of `Augmentation` is sufficient. +2. To use new data types or custom data structures, `StandardAugInput` can still be used as long + as the new data types or custom data structures are not needed by any augmentation policy. + The new data types or data structures can be transformed using the + transforms returned by `AugInput.apply_augmentations`. + The way new data types are transformed may need to declared using `Transform.register_type`. +3. (rare) To add new augmentation policies that need new data types or data structures, in + addition to implementing new `Augmentation`, a new `AugInput` is needed as well. +""" + + +__all__ = [ + "Augmentation", + "TransformGen", + "apply_transform_gens", + "AugInput", + "StandardAugInput", + "apply_augmentations", +] + + +def _check_img_dtype(img): + assert isinstance(img, np.ndarray), "[Augmentation] Needs an numpy array, but got a {}!".format( + type(img) + ) + assert not isinstance(img.dtype, np.integer) or ( + img.dtype == np.uint8 + ), "[Augmentation] Got image of type {}, use uint8 or floating points instead!".format( + img.dtype + ) + assert img.ndim in [2, 3], img.ndim + + +class Augmentation(metaclass=ABCMeta): + """ + Augmentation defines policies/strategies to generate :class:`Transform` from data. + It is often used for pre-processing of input data. A policy typically contains + randomness, but it can also choose to deterministically generate a :class:`Transform`. + + A "policy" that generates a :class:`Transform` may, in the most general case, + need arbitrary information from input data in order to determine what transforms + to apply. Therefore, each :class:`Augmentation` instance defines the arguments + needed by its :meth:`get_transform` method with the :attr:`input_args` attribute. + When called with the positional arguments defined by the :attr:`input_args`, + the :meth:`get_transform` method executes the policy. + + Examples: + :: + # if a policy needs to know both image and semantic segmentation + assert aug.input_args == ("image", "sem_seg") + tfm: Transform = aug.get_transform(image, sem_seg) + new_image = tfm.apply_image(image) + + To implement a custom :class:`Augmentation`, define its :attr:`input_args` and + implement :meth:`get_transform`. + + Note that :class:`Augmentation` defines the policies to create a :class:`Transform`, + but not how to apply the actual transform to those data. + """ + + input_args: Tuple[str] = ("image",) + """ + Attribute of class instances that defines the argument(s) needed by + :meth:`get_transform`. Default to only "image", because most policies only + require knowing the image in order to determine the transform. + + Users can freely define arbitrary new args and their types in custom + :class:`Augmentation`. In detectron2 we use the following convention: + + * image: (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or + floating point in range [0, 1] or [0, 255]. + * boxes: (N,4) ndarray of float32. It represents the instance bounding boxes + of N instances. Each is in XYXY format in unit of absolute coordinates. + * sem_seg: (H,W) ndarray of type uint8. Each element is an integer label of pixel. + + We do not specify convention for other types and do not include builtin + :class:`Augmentation` that uses other types in detectron2. + """ + + def _init(self, params=None): + if params: + for k, v in params.items(): + if k != "self" and not k.startswith("_"): + setattr(self, k, v) + + # NOTE: in the future, can allow it to return list[Augmentation], + # to delegate augmentation to others + @abstractmethod + def get_transform(self, *args) -> Transform: + """ + Execute the policy to use input data to create transform(s). + + Args: + arguments must follow what's defined in :attr:`input_args`. + + Returns: + Return a :class:`Transform` instance, which is the transform to apply to inputs. + """ + pass + + def _rand_range(self, low=1.0, high=None, size=None): + """ + Uniform float random number between low and high. + """ + if high is None: + low, high = 0, low + if size is None: + size = [] + return np.random.uniform(low, high, size) + + def __repr__(self): + """ + Produce something like: + "MyAugmentation(field1={self.field1}, field2={self.field2})" + """ + try: + sig = inspect.signature(self.__init__) + classname = type(self).__name__ + argstr = [] + for name, param in sig.parameters.items(): + assert ( + param.kind != param.VAR_POSITIONAL and param.kind != param.VAR_KEYWORD + ), "The default __repr__ doesn't support *args or **kwargs" + assert hasattr(self, name), ( + "Attribute {} not found! " + "Default __repr__ only works if attributes match the constructor.".format(name) + ) + attr = getattr(self, name) + default = param.default + if default is attr: + continue + argstr.append("{}={}".format(name, pprint.pformat(attr))) + return "{}({})".format(classname, ", ".join(argstr)) + except AssertionError: + return super().__repr__() + + __str__ = __repr__ + + +TransformGen = Augmentation +""" +Alias for Augmentation, since it is something that generates :class:`Transform`s +""" + + +class AugInput: + """ + A base class for anything on which a list of :class:`Augmentation` can be applied. + This class provides input arguments for :class:`Augmentation` to use, and defines how + to apply transforms to these data. + + An instance of this class must satisfy the following: + + * :class:`Augmentation` declares some data it needs as arguments. A :class:`AugInput` + must provide access to these data in the form of attribute access (``getattr``). + For example, if a :class:`Augmentation` to be applied needs "image" and "sem_seg" + arguments, this class must have the attribute "image" and "sem_seg" whose content + is as required by the :class:`Augmentation`s. + * This class must have a :meth:`transform(tfm: Transform) -> None` method which + in-place transforms all attributes stored in the class. + """ + + def transform(self, tfm: Transform) -> None: + raise NotImplementedError + + def apply_augmentations( + self, augmentations: List[Union[Augmentation, Transform]] + ) -> TransformList: + """ + Apply a list of Transform/Augmentation in-place and returned the applied transform. + Attributes of this class will be modified. + + Returns: + TransformList: + returns transformed inputs and the list of transforms applied. + The TransformList can then be applied to other data associated with the inputs. + """ + tfms = [] + for aug in augmentations: + if isinstance(aug, Augmentation): + args = [] + for f in aug.input_args: + try: + args.append(getattr(self, f)) + except AttributeError: + raise AttributeError( + f"Augmentation {aug} needs '{f}', which is not an attribute of {self}!" + ) + + tfm = aug.get_transform(*args) + assert isinstance(tfm, Transform), ( + f"{type(aug)}.get_transform must return an instance of Transform! " + "Got {type(tfm)} instead." + ) + else: + tfm = aug + self.transform(tfm) + tfms.append(tfm) + return TransformList(tfms) + + +class StandardAugInput(AugInput): + """ + A standard implementation of :class:`AugInput` for the majority of use cases. + This class provides the following standard attributes that are common to use by + Augmentation (augmentation policies). These are chosen because most + :class:`Augmentation` won't need anything more to define a augmentation policy. + After applying augmentations to these special attributes, the returned transforms + can then be used to transform other data structures that users have. + + Attributes: + image (ndarray): image in HW or HWC format. The meaning of C is up to users + boxes (ndarray or None): Nx4 boxes in XYXY_ABS mode + sem_seg (ndarray or None): HxW semantic segmentation mask + + Examples: + :: + input = StandardAugInput(image, boxes=boxes) + tfms = input.apply_augmentations(list_of_augmentations) + transformed_image = input.image + transformed_boxes = input.boxes + transformed_other_data = tfms.apply_other(other_data) + + An extended project that works with new data types may require augmentation + policies that need more inputs. An algorithm may need to transform inputs + in a way different from the standard approach defined in this class. In those + situations, users can implement new subclasses of :class:`AugInput` with differnt + attributes and the :meth:`transform` method. + """ + + def __init__( + self, + image: np.ndarray, + *, + boxes: Optional[np.ndarray] = None, + sem_seg: Optional[np.ndarray] = None, + ): + """ + Args: + image: (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or + floating point in range [0, 1] or [0, 255]. + boxes: (N,4) ndarray of float32. It represents the instance bounding boxes + of N instances. Each is in XYXY format in unit of absolute coordinates. + sem_seg: (H,W) ndarray of type uint8. Each element is an integer label of pixel. + """ + _check_img_dtype(image) + self.image = image + self.boxes = boxes + self.sem_seg = sem_seg + + def transform(self, tfm: Transform) -> None: + """ + In-place transform all attributes of this class. + """ + self.image = tfm.apply_image(self.image) + if self.boxes is not None: + self.boxes = tfm.apply_box(self.boxes) + if self.sem_seg is not None: + self.sem_seg = tfm.apply_segmentation(self.sem_seg) + + +def apply_augmentations(augmentations: List[Union[Transform, Augmentation]], inputs): + """ + Use :meth:`AugInput.apply_augmentations` instead. + """ + if isinstance(inputs, np.ndarray): + # handle the common case of image-only Augmentation, also for backward compatibility + image_only = True + inputs = StandardAugInput(inputs) + else: + image_only = False + tfms = inputs.apply_augmentations(augmentations) + return inputs.image if image_only else inputs, tfms + + +apply_transform_gens = apply_augmentations +""" +Alias for backward-compatibility. +""" diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/transforms/augmentation_impl.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/transforms/augmentation_impl.py new file mode 100644 index 0000000000000000000000000000000000000000..4d7161937a5f7f6ca0671a5fc717acb7a78308f0 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/transforms/augmentation_impl.py @@ -0,0 +1,497 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Implement many useful :class:`Augmentation`. +""" +import numpy as np +import sys +from fvcore.transforms.transform import ( + BlendTransform, + CropTransform, + HFlipTransform, + NoOpTransform, + Transform, + VFlipTransform, +) +from PIL import Image + +from .augmentation import Augmentation +from .transform import ExtentTransform, ResizeTransform, RotationTransform + +__all__ = [ + "RandomApply", + "RandomBrightness", + "RandomContrast", + "RandomCrop", + "RandomExtent", + "RandomFlip", + "RandomSaturation", + "RandomLighting", + "RandomRotation", + "Resize", + "ResizeShortestEdge", + "RandomCrop_CategoryAreaConstraint", +] + + +class RandomApply(Augmentation): + """ + Randomly apply the wrapper transformation with a given probability. + """ + + def __init__(self, transform, prob=0.5): + """ + Args: + transform (Transform, Augmentation): the transform to be wrapped + by the `RandomApply`. The `transform` can either be a + `Transform` or `Augmentation` instance. + prob (float): probability between 0.0 and 1.0 that + the wrapper transformation is applied + """ + super().__init__() + assert isinstance(transform, (Transform, Augmentation)), ( + f"The given transform must either be a Transform or Augmentation instance. " + f"Not {type(transform)}" + ) + assert 0.0 <= prob <= 1.0, f"Probablity must be between 0.0 and 1.0 (given: {prob})" + self.prob = prob + self.transform = transform + if isinstance(transform, Augmentation): + self.input_args = transform.input_args + + def get_transform(self, img): + do = self._rand_range() < self.prob + if do: + if isinstance(self.transform, Augmentation): + return self.transform.get_transform(img) + else: + return self.transform + else: + return NoOpTransform() + + +class RandomFlip(Augmentation): + """ + Flip the image horizontally or vertically with the given probability. + """ + + def __init__(self, prob=0.5, *, horizontal=True, vertical=False): + """ + Args: + prob (float): probability of flip. + horizontal (boolean): whether to apply horizontal flipping + vertical (boolean): whether to apply vertical flipping + """ + super().__init__() + + if horizontal and vertical: + raise ValueError("Cannot do both horiz and vert. Please use two Flip instead.") + if not horizontal and not vertical: + raise ValueError("At least one of horiz or vert has to be True!") + self._init(locals()) + + def get_transform(self, img): + h, w = img.shape[:2] + do = self._rand_range() < self.prob + if do: + if self.horizontal: + return HFlipTransform(w) + elif self.vertical: + return VFlipTransform(h) + else: + return NoOpTransform() + + +class Resize(Augmentation): + """ Resize image to a fixed target size""" + + def __init__(self, shape, interp=Image.BILINEAR): + """ + Args: + shape: (h, w) tuple or a int + interp: PIL interpolation method + """ + if isinstance(shape, int): + shape = (shape, shape) + shape = tuple(shape) + self._init(locals()) + + def get_transform(self, img): + return ResizeTransform( + img.shape[0], img.shape[1], self.shape[0], self.shape[1], self.interp + ) + + +class ResizeShortestEdge(Augmentation): + """ + Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge. + If `max_size` is reached, then downscale so that the longer edge does not exceed max_size. + """ + + def __init__( + self, short_edge_length, max_size=sys.maxsize, sample_style="range", interp=Image.BILINEAR + ): + """ + Args: + short_edge_length (list[int]): If ``sample_style=="range"``, + a [min, max] interval from which to sample the shortest edge length. + If ``sample_style=="choice"``, a list of shortest edge lengths to sample from. + max_size (int): maximum allowed longest edge length. + sample_style (str): either "range" or "choice". + """ + super().__init__() + assert sample_style in ["range", "choice"], sample_style + self.is_range = sample_style == "range" + if isinstance(short_edge_length, int): + short_edge_length = (short_edge_length, short_edge_length) + if self.is_range: + assert len(short_edge_length) == 2, ( + "short_edge_length must be two values using 'range' sample style." + f" Got {short_edge_length}!" + ) + self._init(locals()) + + def get_transform(self, img): + h, w = img.shape[:2] + if self.is_range: + size = np.random.randint(self.short_edge_length[0], self.short_edge_length[1] + 1) + else: + size = np.random.choice(self.short_edge_length) + if size == 0: + return NoOpTransform() + + scale = size * 1.0 / min(h, w) + if h < w: + newh, neww = size, scale * w + else: + newh, neww = scale * h, size + + if max(newh, neww) > self.max_size: + scale = self.max_size * 1.0 / max(newh, neww) + newh = newh * scale + neww = neww * scale + neww = int(neww + 0.5) + newh = int(newh + 0.5) + return ResizeTransform(h, w, newh, neww, self.interp) + + +class RandomRotation(Augmentation): + """ + This method returns a copy of this image, rotated the given + number of degrees counter clockwise around the given center. + """ + + def __init__(self, angle, expand=True, center=None, sample_style="range", interp=None): + """ + Args: + angle (list[float]): If ``sample_style=="range"``, + a [min, max] interval from which to sample the angle (in degrees). + If ``sample_style=="choice"``, a list of angles to sample from + expand (bool): choose if the image should be resized to fit the whole + rotated image (default), or simply cropped + center (list[[float, float]]): If ``sample_style=="range"``, + a [[minx, miny], [maxx, maxy]] relative interval from which to sample the center, + [0, 0] being the top left of the image and [1, 1] the bottom right. + If ``sample_style=="choice"``, a list of centers to sample from + Default: None, which means that the center of rotation is the center of the image + center has no effect if expand=True because it only affects shifting + """ + super().__init__() + assert sample_style in ["range", "choice"], sample_style + self.is_range = sample_style == "range" + if isinstance(angle, (float, int)): + angle = (angle, angle) + if center is not None and isinstance(center[0], (float, int)): + center = (center, center) + self._init(locals()) + + def get_transform(self, img): + h, w = img.shape[:2] + center = None + if self.is_range: + angle = np.random.uniform(self.angle[0], self.angle[1]) + if self.center is not None: + center = ( + np.random.uniform(self.center[0][0], self.center[1][0]), + np.random.uniform(self.center[0][1], self.center[1][1]), + ) + else: + angle = np.random.choice(self.angle) + if self.center is not None: + center = np.random.choice(self.center) + + if center is not None: + center = (w * center[0], h * center[1]) # Convert to absolute coordinates + + if angle % 360 == 0: + return NoOpTransform() + + return RotationTransform(h, w, angle, expand=self.expand, center=center, interp=self.interp) + + +class RandomCrop(Augmentation): + """ + Randomly crop a subimage out of an image. + """ + + def __init__(self, crop_type: str, crop_size): + """ + Args: + crop_type (str): one of "relative_range", "relative", "absolute", "absolute_range". + See `config/defaults.py` for explanation. + crop_size (tuple[float]): the relative ratio or absolute pixels of + height and width + """ + super().__init__() + assert crop_type in ["relative_range", "relative", "absolute", "absolute_range"] + self._init(locals()) + + def get_transform(self, img): + h, w = img.shape[:2] + croph, cropw = self.get_crop_size((h, w)) + assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(self) + h0 = np.random.randint(h - croph + 1) + w0 = np.random.randint(w - cropw + 1) + return CropTransform(w0, h0, cropw, croph) + + def get_crop_size(self, image_size): + """ + Args: + image_size (tuple): height, width + + Returns: + crop_size (tuple): height, width in absolute pixels + """ + h, w = image_size + if self.crop_type == "relative": + ch, cw = self.crop_size + return int(h * ch + 0.5), int(w * cw + 0.5) + elif self.crop_type == "relative_range": + crop_size = np.asarray(self.crop_size, dtype=np.float32) + ch, cw = crop_size + np.random.rand(2) * (1 - crop_size) + return int(h * ch + 0.5), int(w * cw + 0.5) + elif self.crop_type == "absolute": + return (min(self.crop_size[0], h), min(self.crop_size[1], w)) + elif self.crop_type == "absolute_range": + assert self.crop_size[0] <= self.crop_size[1] + ch = np.random.randint(min(h, self.crop_size[0]), min(h, self.crop_size[1]) + 1) + cw = np.random.randint(min(w, self.crop_size[0]), min(w, self.crop_size[1]) + 1) + return ch, cw + else: + NotImplementedError("Unknown crop type {}".format(self.crop_type)) + + +class RandomCrop_CategoryAreaConstraint(Augmentation): + """ + Similar to :class:`RandomCrop`, but find a cropping window such that no single category + occupies a ratio of more than `single_category_max_area` in semantic segmentation ground + truth, which can cause unstability in training. The function attempts to find such a valid + cropping window for at most 10 times. + """ + + input_args = ("image", "sem_seg") + + def __init__( + self, + crop_type: str, + crop_size, + single_category_max_area: float = 1.0, + ignored_category: int = None, + ): + """ + Args: + crop_type, crop_size: same as in :class:`RandomCrop` + single_category_max_area: the maximum allowed area ratio of a + category. Set to 1.0 to disable + ignored_category: allow this category in the semantic segmentation + ground truth to exceed the area ratio. Usually set to the category + that's ignored in training. + """ + self.crop_aug = RandomCrop(crop_type, crop_size) + self._init(locals()) + + def get_transform(self, image, sem_seg): + if self.single_category_max_area >= 1.0: + return self.crop_aug.get_transform(image) + else: + h, w = sem_seg.shape + for _ in range(10): + crop_size = self.crop_aug.get_crop_size((h, w)) + y0 = np.random.randint(h - crop_size[0] + 1) + x0 = np.random.randint(w - crop_size[1] + 1) + sem_seg_temp = sem_seg[y0 : y0 + crop_size[0], x0 : x0 + crop_size[1]] + labels, cnt = np.unique(sem_seg_temp, return_counts=True) + if self.ignored_category is not None: + cnt = cnt[labels != self.ignored_category] + if len(cnt) > 1 and np.max(cnt) < np.sum(cnt) * self.single_category_max_area: + break + crop_tfm = CropTransform(x0, y0, crop_size[1], crop_size[0]) + return crop_tfm + + +class RandomExtent(Augmentation): + """ + Outputs an image by cropping a random "subrect" of the source image. + + The subrect can be parameterized to include pixels outside the source image, + in which case they will be set to zeros (i.e. black). The size of the output + image will vary with the size of the random subrect. + """ + + def __init__(self, scale_range, shift_range): + """ + Args: + output_size (h, w): Dimensions of output image + scale_range (l, h): Range of input-to-output size scaling factor + shift_range (x, y): Range of shifts of the cropped subrect. The rect + is shifted by [w / 2 * Uniform(-x, x), h / 2 * Uniform(-y, y)], + where (w, h) is the (width, height) of the input image. Set each + component to zero to crop at the image's center. + """ + super().__init__() + self._init(locals()) + + def get_transform(self, img): + img_h, img_w = img.shape[:2] + + # Initialize src_rect to fit the input image. + src_rect = np.array([-0.5 * img_w, -0.5 * img_h, 0.5 * img_w, 0.5 * img_h]) + + # Apply a random scaling to the src_rect. + src_rect *= np.random.uniform(self.scale_range[0], self.scale_range[1]) + + # Apply a random shift to the coordinates origin. + src_rect[0::2] += self.shift_range[0] * img_w * (np.random.rand() - 0.5) + src_rect[1::2] += self.shift_range[1] * img_h * (np.random.rand() - 0.5) + + # Map src_rect coordinates into image coordinates (center at corner). + src_rect[0::2] += 0.5 * img_w + src_rect[1::2] += 0.5 * img_h + + return ExtentTransform( + src_rect=(src_rect[0], src_rect[1], src_rect[2], src_rect[3]), + output_size=(int(src_rect[3] - src_rect[1]), int(src_rect[2] - src_rect[0])), + ) + + +class RandomContrast(Augmentation): + """ + Randomly transforms image contrast. + + Contrast intensity is uniformly sampled in (intensity_min, intensity_max). + - intensity < 1 will reduce contrast + - intensity = 1 will preserve the input image + - intensity > 1 will increase contrast + + See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html + """ + + def __init__(self, intensity_min, intensity_max): + """ + Args: + intensity_min (float): Minimum augmentation + intensity_max (float): Maximum augmentation + """ + super().__init__() + self._init(locals()) + + def get_transform(self, img): + w = np.random.uniform(self.intensity_min, self.intensity_max) + return BlendTransform(src_image=img.mean(), src_weight=1 - w, dst_weight=w) + + +class RandomBrightness(Augmentation): + """ + Randomly transforms image brightness. + + Brightness intensity is uniformly sampled in (intensity_min, intensity_max). + - intensity < 1 will reduce brightness + - intensity = 1 will preserve the input image + - intensity > 1 will increase brightness + + See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html + """ + + def __init__(self, intensity_min, intensity_max): + """ + Args: + intensity_min (float): Minimum augmentation + intensity_max (float): Maximum augmentation + """ + super().__init__() + self._init(locals()) + + def get_transform(self, img): + w = np.random.uniform(self.intensity_min, self.intensity_max) + return BlendTransform(src_image=0, src_weight=1 - w, dst_weight=w) + + +class RandomSaturation(Augmentation): + """ + Randomly transforms saturation of an RGB image. + Input images are assumed to have 'RGB' channel order. + + Saturation intensity is uniformly sampled in (intensity_min, intensity_max). + - intensity < 1 will reduce saturation (make the image more grayscale) + - intensity = 1 will preserve the input image + - intensity > 1 will increase saturation + + See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html + """ + + def __init__(self, intensity_min, intensity_max): + """ + Args: + intensity_min (float): Minimum augmentation (1 preserves input). + intensity_max (float): Maximum augmentation (1 preserves input). + """ + super().__init__() + self._init(locals()) + + def get_transform(self, img): + assert img.shape[-1] == 3, "RandomSaturation only works on RGB images" + w = np.random.uniform(self.intensity_min, self.intensity_max) + grayscale = img.dot([0.299, 0.587, 0.114])[:, :, np.newaxis] + return BlendTransform(src_image=grayscale, src_weight=1 - w, dst_weight=w) + + +class RandomLighting(Augmentation): + """ + The "lighting" augmentation described in AlexNet, using fixed PCA over ImageNet. + Input images are assumed to have 'RGB' channel order. + + The degree of color jittering is randomly sampled via a normal distribution, + with standard deviation given by the scale parameter. + """ + + def __init__(self, scale): + """ + Args: + scale (float): Standard deviation of principal component weighting. + """ + super().__init__() + self._init(locals()) + self.eigen_vecs = np.array( + [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]] + ) + self.eigen_vals = np.array([0.2175, 0.0188, 0.0045]) + + def get_transform(self, img): + assert img.shape[-1] == 3, "RandomLighting only works on RGB images" + weights = np.random.normal(scale=self.scale, size=3) + return BlendTransform( + src_image=self.eigen_vecs.dot(weights * self.eigen_vals), src_weight=1.0, dst_weight=1.0 + ) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/transforms/transform.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/transforms/transform.py new file mode 100644 index 0000000000000000000000000000000000000000..10ebd1112b56b62df605528399331b33700131f3 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/data/transforms/transform.py @@ -0,0 +1,335 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import torch +import torch.nn.functional as F +from fvcore.transforms.transform import ( + CropTransform, + HFlipTransform, + NoOpTransform, + Transform, + TransformList, +) +from PIL import Image + +try: + import cv2 # noqa +except ImportError: + # OpenCV is an optional dependency at the moment + pass + +__all__ = [ + "ExtentTransform", + "ResizeTransform", + "RotationTransform", + "ColorTransform", + "PILColorTransform", +] + + +class ExtentTransform(Transform): + """ + Extracts a subregion from the source image and scales it to the output size. + + The fill color is used to map pixels from the source rect that fall outside + the source image. + + See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform + """ + + def __init__(self, src_rect, output_size, interp=Image.LINEAR, fill=0): + """ + Args: + src_rect (x0, y0, x1, y1): src coordinates + output_size (h, w): dst image size + interp: PIL interpolation methods + fill: Fill color used when src_rect extends outside image + """ + super().__init__() + self._set_attributes(locals()) + + def apply_image(self, img, interp=None): + h, w = self.output_size + ret = Image.fromarray(img).transform( + size=(w, h), + method=Image.EXTENT, + data=self.src_rect, + resample=interp if interp else self.interp, + fill=self.fill, + ) + return np.asarray(ret) + + def apply_coords(self, coords): + # Transform image center from source coordinates into output coordinates + # and then map the new origin to the corner of the output image. + h, w = self.output_size + x0, y0, x1, y1 = self.src_rect + new_coords = coords.astype(np.float32) + new_coords[:, 0] -= 0.5 * (x0 + x1) + new_coords[:, 1] -= 0.5 * (y0 + y1) + new_coords[:, 0] *= w / (x1 - x0) + new_coords[:, 1] *= h / (y1 - y0) + new_coords[:, 0] += 0.5 * w + new_coords[:, 1] += 0.5 * h + return new_coords + + def apply_segmentation(self, segmentation): + segmentation = self.apply_image(segmentation, interp=Image.NEAREST) + return segmentation + + +class ResizeTransform(Transform): + """ + Resize the image to a target size. + """ + + def __init__(self, h, w, new_h, new_w, interp=None): + """ + Args: + h, w (int): original image size + new_h, new_w (int): new image size + interp: PIL interpolation methods, defaults to bilinear. + """ + # TODO decide on PIL vs opencv + super().__init__() + if interp is None: + interp = Image.BILINEAR + self._set_attributes(locals()) + + def apply_image(self, img, interp=None): + assert img.shape[:2] == (self.h, self.w) + assert len(img.shape) <= 4 + + if img.dtype == np.uint8: + pil_image = Image.fromarray(img) + interp_method = interp if interp is not None else self.interp + pil_image = pil_image.resize((self.new_w, self.new_h), interp_method) + ret = np.asarray(pil_image) + else: + # PIL only supports uint8 + img = torch.from_numpy(img) + shape = list(img.shape) + shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:] + img = img.view(shape_4d).permute(2, 3, 0, 1) # hw(c) -> nchw + _PIL_RESIZE_TO_INTERPOLATE_MODE = {Image.BILINEAR: "bilinear", Image.BICUBIC: "bicubic"} + mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[self.interp] + img = F.interpolate(img, (self.new_h, self.new_w), mode=mode, align_corners=False) + shape[:2] = (self.new_h, self.new_w) + ret = img.permute(2, 3, 0, 1).view(shape).numpy() # nchw -> hw(c) + + return ret + + def apply_coords(self, coords): + coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w) + coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h) + return coords + + def apply_segmentation(self, segmentation): + segmentation = self.apply_image(segmentation, interp=Image.NEAREST) + return segmentation + + def inverse(self): + return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp) + + +class RotationTransform(Transform): + """ + This method returns a copy of this image, rotated the given + number of degrees counter clockwise around its center. + """ + + def __init__(self, h, w, angle, expand=True, center=None, interp=None): + """ + Args: + h, w (int): original image size + angle (float): degrees for rotation + expand (bool): choose if the image should be resized to fit the whole + rotated image (default), or simply cropped + center (tuple (width, height)): coordinates of the rotation center + if left to None, the center will be fit to the center of each image + center has no effect if expand=True because it only affects shifting + interp: cv2 interpolation method, default cv2.INTER_LINEAR + """ + super().__init__() + image_center = np.array((w / 2, h / 2)) + if center is None: + center = image_center + if interp is None: + interp = cv2.INTER_LINEAR + abs_cos, abs_sin = (abs(np.cos(np.deg2rad(angle))), abs(np.sin(np.deg2rad(angle)))) + if expand: + # find the new width and height bounds + bound_w, bound_h = np.rint( + [h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin] + ).astype(int) + else: + bound_w, bound_h = w, h + + self._set_attributes(locals()) + self.rm_coords = self.create_rotation_matrix() + # Needed because of this problem https://github.com/opencv/opencv/issues/11784 + self.rm_image = self.create_rotation_matrix(offset=-0.5) + + def apply_image(self, img, interp=None): + """ + img should be a numpy array, formatted as Height * Width * Nchannels + """ + if len(img) == 0 or self.angle % 360 == 0: + return img + assert img.shape[:2] == (self.h, self.w) + interp = interp if interp is not None else self.interp + return cv2.warpAffine(img, self.rm_image, (self.bound_w, self.bound_h), flags=interp) + + def apply_coords(self, coords): + """ + coords should be a N * 2 array-like, containing N couples of (x, y) points + """ + coords = np.asarray(coords, dtype=float) + if len(coords) == 0 or self.angle % 360 == 0: + return coords + return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :] + + def apply_segmentation(self, segmentation): + segmentation = self.apply_image(segmentation, interp=cv2.INTER_NEAREST) + return segmentation + + def create_rotation_matrix(self, offset=0): + center = (self.center[0] + offset, self.center[1] + offset) + rm = cv2.getRotationMatrix2D(tuple(center), self.angle, 1) + if self.expand: + # Find the coordinates of the center of rotation in the new image + # The only point for which we know the future coordinates is the center of the image + rot_im_center = cv2.transform(self.image_center[None, None, :] + offset, rm)[0, 0, :] + new_center = np.array([self.bound_w / 2, self.bound_h / 2]) + offset - rot_im_center + # shift the rotation center to the new coordinates + rm[:, 2] += new_center + return rm + + def inverse(self): + """ + The inverse is to rotate it back with expand, and crop to get the original shape. + """ + if not self.expand: # Not possible to inverse if a part of the image is lost + raise NotImplementedError() + rotation = RotationTransform( + self.bound_h, self.bound_w, -self.angle, True, None, self.interp + ) + crop = CropTransform( + (rotation.bound_w - self.w) // 2, (rotation.bound_h - self.h) // 2, self.w, self.h + ) + return TransformList([rotation, crop]) + + +class ColorTransform(Transform): + """ + Generic wrapper for any photometric transforms. + These transformations should only affect the color space and + not the coordinate space of the image (e.g. annotation + coordinates such as bounding boxes should not be changed) + """ + + def __init__(self, op): + """ + Args: + op (Callable): operation to be applied to the image, + which takes in an ndarray and returns an ndarray. + """ + if not callable(op): + raise ValueError("op parameter should be callable") + super().__init__() + self._set_attributes(locals()) + + def apply_image(self, img): + return self.op(img) + + def apply_coords(self, coords): + return coords + + def apply_segmentation(self, segmentation): + return segmentation + + +class PILColorTransform(ColorTransform): + """ + Generic wrapper for PIL Photometric image transforms, + which affect the color space and not the coordinate + space of the image + """ + + def __init__(self, op): + """ + Args: + op (Callable): operation to be applied to the image, + which takes in a PIL Image and returns a transformed + PIL Image. + For reference on possible operations see: + - https://pillow.readthedocs.io/en/stable/ + """ + if not callable(op): + raise ValueError("op parameter should be callable") + super().__init__(op) + + def apply_image(self, img): + img = Image.fromarray(img) + return np.asarray(super().apply_image(img)) + + +def HFlip_rotated_box(transform, rotated_boxes): + """ + Apply the horizontal flip transform on rotated boxes. + + Args: + rotated_boxes (ndarray): Nx5 floating point array of + (x_center, y_center, width, height, angle_degrees) format + in absolute coordinates. + """ + # Transform x_center + rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0] + # Transform angle + rotated_boxes[:, 4] = -rotated_boxes[:, 4] + return rotated_boxes + + +def Resize_rotated_box(transform, rotated_boxes): + """ + Apply the resizing transform on rotated boxes. For details of how these (approximation) + formulas are derived, please refer to :meth:`RotatedBoxes.scale`. + + Args: + rotated_boxes (ndarray): Nx5 floating point array of + (x_center, y_center, width, height, angle_degrees) format + in absolute coordinates. + """ + scale_factor_x = transform.new_w * 1.0 / transform.w + scale_factor_y = transform.new_h * 1.0 / transform.h + rotated_boxes[:, 0] *= scale_factor_x + rotated_boxes[:, 1] *= scale_factor_y + theta = rotated_boxes[:, 4] * np.pi / 180.0 + c = np.cos(theta) + s = np.sin(theta) + rotated_boxes[:, 2] *= np.sqrt(np.square(scale_factor_x * c) + np.square(scale_factor_y * s)) + rotated_boxes[:, 3] *= np.sqrt(np.square(scale_factor_x * s) + np.square(scale_factor_y * c)) + rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s, scale_factor_y * c) * 180 / np.pi + + return rotated_boxes + + +HFlipTransform.register_type("rotated_box", HFlip_rotated_box) +ResizeTransform.register_type("rotated_box", Resize_rotated_box) + +# not necessary any more with latest fvcore +NoOpTransform.register_type("rotated_box", lambda t, x: x) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dc41bdcc30c7902bae76687b145a5adad2d03818 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .launch import * +from .train_loop import * + +__all__ = [k for k in globals().keys() if not k.startswith("_")] + + +# prefer to let hooks and defaults live in separate namespaces (therefore not in __all__) +# but still make them available here +from .hooks import * +from .defaults import * diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/defaults.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..82d69a7ccdcd3b1a6f24548a5b802d8ab488a889 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/defaults.py @@ -0,0 +1,643 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This file contains components with some default boilerplate logic user may need +in training / testing. They will not work for everyone, but many users may find them useful. + +The behavior of functions/classes in this file is subject to change, +since they are meant to represent the "common default behavior" people need in their projects. +""" + +import argparse +import logging +import os +import sys +from collections import OrderedDict +import torch +from fvcore.common.file_io import PathManager +from fvcore.nn.precise_bn import get_bn_modules +from torch.nn.parallel import DistributedDataParallel +from apex import amp + +import detectron2.data.transforms as T +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.data import ( + MetadataCatalog, + build_detection_test_loader, + build_detection_train_loader, +) +from detectron2.evaluation import ( + DatasetEvaluator, + inference_on_dataset, + print_csv_format, + verify_results, +) +from detectron2.modeling import build_model +from detectron2.solver import build_lr_scheduler, build_optimizer +from detectron2.utils import comm +from detectron2.utils.collect_env import collect_env_info +from detectron2.utils.env import seed_all_rng +from detectron2.utils.events import CommonMetricPrinter, JSONWriter, TensorboardXWriter +from detectron2.utils.logger import setup_logger + +from . import hooks +from .train_loop import SimpleTrainer + +import datetime + +__all__ = ["default_argument_parser", "default_setup", "DefaultPredictor", "DefaultTrainer"] + + +### add hook func +def print_tensor(name, tensors): + if isinstance(tensors, torch.Tensor): + print(name) + print(tensors.device) + elif isinstance(tensors, tuple) or isinstance(tensors, list): + for tensor in tensors: + print_tensor(name, tensor) + else: + print(name, type(tensors)) + + +def hook_func(name, module): + def hook_function(module, inputs, outputs): + print_tensor(name + 'inputs', inputs) + print_tensor(name + 'outputs', outputs) + + print(datetime.datetime.now().strftime("%Y-%m-%d-%H:%M:%S.%f")) + + return hook_function + + +def hook_for_model(model): + for name, module in model.named_modules(): + module.register_forward_hook(hook_func('[forward]: ' + name, module)) + module.register_backward_hook(hook_func('[backward]: ' + name, module)) + +### end + +def default_argument_parser(epilog=None): + """ + Create a parser with some common arguments used by detectron2 users. + + Args: + epilog (str): epilog passed to ArgumentParser describing the usage. + + Returns: + argparse.ArgumentParser: + """ + parser = argparse.ArgumentParser( + epilog=epilog + or f""" +Examples: + +Run on single machine: + $ {sys.argv[0]} --num-gpus 8 --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth + +Run on multiple machines: + (machine0)$ {sys.argv[0]} --machine-rank 0 --num-machines 2 --dist-url [--other-flags] + (machine1)$ {sys.argv[0]} --machine-rank 1 --num-machines 2 --dist-url [--other-flags] +""", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file") + parser.add_argument( + "--resume", + action="store_true", + help="whether to attempt to resume from the checkpoint directory", + ) + parser.add_argument("--eval-only", action="store_true", help="perform evaluation only") + parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus *per machine*") + parser.add_argument('--device-ids',nargs='+') + parser.add_argument("--num-machines", type=int, default=1, help="total number of machines") + parser.add_argument("--local_rank", type=int, default=0) + parser.add_argument( + "--machine-rank", type=int, default=0, help="the rank of this machine (unique per machine)" + ) + + # PyTorch still may leave orphan processes in multi-gpu training. + # Therefore we use a deterministic way to obtain port, + # so that users are aware of orphan processes by seeing the port occupied. + port = 2 ** 15 + 2 ** 14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2 ** 14 + parser.add_argument( + "--dist-url", + default="tcp://127.0.0.1:{}".format(port), + help="initialization URL for pytorch distributed backend. See " + "https://pytorch.org/docs/stable/distributed.html for details.", + ) + parser.add_argument( + "opts", + help="Modify config options using the command-line", + default=None, + nargs=argparse.REMAINDER, + ) + return parser + + +def default_setup(cfg, args): + """ + Perform some basic common setups at the beginning of a job, including: + + 1. Set up the detectron2 logger + 2. Log basic information about environment, cmdline arguments, and config + 3. Backup the config to the output directory + + Args: + cfg (CfgNode): the full config to be used + args (argparse.NameSpace): the command line arguments to be logged + """ + output_dir = cfg.OUTPUT_DIR + if comm.is_main_process() and output_dir: + PathManager.mkdirs(output_dir) + + rank = comm.get_rank() + setup_logger(output_dir, distributed_rank=rank, name="fvcore") + logger = setup_logger(output_dir, distributed_rank=rank) + + logger.info("Rank of current process: {}. World size: {}".format(rank, comm.get_world_size())) + logger.info("Environment info:\n" + collect_env_info()) + + logger.info("Command line arguments: " + str(args)) + if hasattr(args, "config_file") and args.config_file != "": + logger.info( + "Contents of args.config_file={}:\n{}".format( + args.config_file, PathManager.open(args.config_file, "r").read() + ) + ) + + logger.info("Running with full config:\n{}".format(cfg)) + if comm.is_main_process() and output_dir: + # Note: some of our scripts may expect the existence of + # config.yaml in output directory + path = os.path.join(output_dir, "config.yaml") + with PathManager.open(path, "w") as f: + f.write(cfg.dump()) + logger.info("Full config saved to {}".format(path)) + + # make sure each worker has a different, yet deterministic seed if specified + seed_all_rng(None if cfg.SEED < 0 else cfg.SEED + rank) + + # cudnn benchmark has large overhead. It shouldn't be used considering the small size of + # typical validation set. + if not (hasattr(args, "eval_only") and args.eval_only): + torch.backends.cudnn.benchmark = cfg.CUDNN_BENCHMARK + + +class DefaultPredictor: + """ + Create a simple end-to-end predictor with the given config that runs on + single device for a single input image. + + Compared to using the model directly, this class does the following additions: + + 1. Load checkpoint from `cfg.MODEL.WEIGHTS`. + 2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`. + 3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`. + 4. Take one input image and produce a single output, instead of a batch. + + If you'd like to do anything more fancy, please refer to its source code + as examples to build and use the model manually. + + Attributes: + metadata (Metadata): the metadata of the underlying dataset, obtained from + cfg.DATASETS.TEST. + + Examples: + :: + pred = DefaultPredictor(cfg) + inputs = cv2.imread("input.jpg") + outputs = pred(inputs) + """ + + def __init__(self, cfg): + self.cfg = cfg.clone() # cfg can be modified by model + #self.model = build_model(self.cfg) + #edit by zsc + model = build_model(self.cfg) + if cfg.AMP: + model = amp.initialize( + model, + opt_level=cfg.OPT_LEVEL, + loss_scale=cfg.LOSS_SCALE_VALUE) + self.model = model + #end + self.model.eval() + if len(cfg.DATASETS.TEST): + self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0]) + + checkpointer = DetectionCheckpointer(self.model) + checkpointer.load(cfg.MODEL.WEIGHTS) + + self.aug = T.ResizeShortestEdge( + [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST + ) + + self.input_format = cfg.INPUT.FORMAT + assert self.input_format in ["RGB", "BGR"], self.input_format + + def __call__(self, original_image): + """ + Args: + original_image (np.ndarray): an image of shape (H, W, C) (in BGR order). + + Returns: + predictions (dict): + the output of the model for one image only. + See :doc:`/tutorials/models` for details about the format. + """ + with torch.no_grad(): # https://github.com/sphinx-doc/sphinx/issues/4258 + # Apply pre-processing to image. + if self.input_format == "RGB": + # whether the model expects BGR inputs or RGB + original_image = original_image[:, :, ::-1] + + height, width = original_image.shape[:2] + image = self.aug.get_transform(original_image).apply_image(original_image) + image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) + + inputs = {"image": image, "height": height, "width": width} + predictions = self.model([inputs])[0] + return predictions + + +class DefaultTrainer(SimpleTrainer): + """ + A trainer with default training logic. + It is a subclass of `SimpleTrainer` which instantiates everything needed from the + config. It does the following: + + 1. Create model, optimizer, scheduler, dataloader from the given config. + 2. Load a checkpoint or `cfg.MODEL.WEIGHTS`, if exists, when + `resume_or_load` is called. + 3. Register a few common hooks defined by the config. + + It is created to simplify the **standard model training workflow** and reduce code boilerplate + for users who only need the standard training workflow, with standard features. + It means this class makes *many assumptions* about your training logic that + may easily become invalid in a new research. In fact, any assumptions beyond those made in the + :class:`SimpleTrainer` are too much for research. + + The code of this class has been annotated about restrictive assumptions it mades. + When they do not work for you, you're encouraged to: + + 1. Overwrite methods of this class, OR: + 2. Use :class:`SimpleTrainer`, which only does minimal SGD training and + nothing else. You can then add your own hooks if needed. OR: + 3. Write your own training loop similar to `tools/plain_train_net.py`. + + Also note that the behavior of this class, like other functions/classes in + this file, is not stable, since it is meant to represent the "common default behavior". + It is only guaranteed to work well with the standard models and training workflow in detectron2. + To obtain more stable behavior, write your own training logic with other public APIs. + + Examples: + :: + trainer = DefaultTrainer(cfg) + trainer.resume_or_load() # load last checkpoint or MODEL.WEIGHTS + trainer.train() + + Attributes: + scheduler: + checkpointer (DetectionCheckpointer): + cfg (CfgNode): + """ + + def __init__(self, cfg): + """ + Args: + cfg (CfgNode): + """ + logger = logging.getLogger("detectron2") + if not logger.isEnabledFor(logging.INFO): # setup_logger is not called for d2 + setup_logger() + cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size()) + # Assume these objects must be constructed in this order. + model = self.build_model(cfg) + optimizer = self.build_optimizer(cfg, model) + if cfg.AMP: + model, optimizer = amp.initialize( + model, + optimizer, + opt_level=cfg.OPT_LEVEL, + loss_scale=cfg.LOSS_SCALE_VALUE) + + # add vovnet hook + hook_for_model(model) # add hook print + + data_loader = self.build_train_loader(cfg) + + # For training, wrap with DDP. But don't need this for inference. + if comm.get_world_size() > 1: + model = DistributedDataParallel( + model, device_ids=[comm.get_local_rank()], broadcast_buffers=False + ) + super().__init__(model, data_loader, optimizer, + aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING) + + self.scheduler = self.build_lr_scheduler(cfg, optimizer) + # Assume no other objects need to be checkpointed. + # We can later make it checkpoint the stateful hooks + self.checkpointer = DetectionCheckpointer( + # Assume you want to save checkpoints together with logs/statistics + model, + cfg.OUTPUT_DIR, + optimizer=optimizer, + scheduler=self.scheduler, + ) + self.start_iter = 0 + self.max_iter = cfg.SOLVER.MAX_ITER + self.cfg = cfg + self.register_hooks(self.build_hooks()) + + + def resume_or_load(self, resume=True): + """ + If `resume==True`, and last checkpoint exists, resume from it, load all checkpointables + (eg. optimizer and scheduler) and update iteration counter. + + Otherwise, load the model specified by the config (skip all checkpointables) and start from + the first iteration. + + Args: + resume (bool): whether to do resume or not + """ + checkpoint = self.checkpointer.resume_or_load(self.cfg.MODEL.WEIGHTS, resume=resume) + if resume and self.checkpointer.has_checkpoint(): + self.start_iter = checkpoint.get("iteration", -1) + 1 + # The checkpoint stores the training iteration that just finished, thus we start + # at the next iteration (or iter zero if there's no checkpoint). + + def build_hooks(self): + """ + Build a list of default hooks, including timing, evaluation, + checkpointing, lr scheduling, precise BN, writing events. + + Returns: + list[HookBase]: + """ + cfg = self.cfg.clone() + cfg.defrost() + cfg.DATALOADER.NUM_WORKERS = 0 # save some memory and time for PreciseBN + + ret = [ + hooks.IterationTimer(), + hooks.LRScheduler(self.optimizer, self.scheduler), + hooks.PreciseBN( + # Run at the same freq as (but before) evaluation. + cfg.TEST.EVAL_PERIOD, + self.model, + # Build a new data loader to not affect training + self.build_train_loader(cfg), + cfg.TEST.PRECISE_BN.NUM_ITER, + ) + if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model) + else None, + ] + + # Do PreciseBN before checkpointer, because it updates the model and need to + # be saved by checkpointer. + # This is not always the best: if checkpointing has a different frequency, + # some checkpoints may have more precise statistics than others. + if comm.is_main_process(): + ret.append(hooks.PeriodicCheckpointer(self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD)) + + def test_and_save_results(): + self._last_eval_results = self.test(self.cfg, self.model) + return self._last_eval_results + + # Do evaluation after checkpointer, because then if it fails, + # we can use the saved checkpoint to debug. + ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results)) + + if comm.is_main_process(): + # run writers in the end, so that evaluation metrics are written + ret.append(hooks.PeriodicWriter(self.build_writers(), period=20)) + return ret + + def build_writers(self): + """ + Build a list of writers to be used. By default it contains + writers that write metrics to the screen, + a json file, and a tensorboard event file respectively. + If you'd like a different list of writers, you can overwrite it in + your trainer. + + Returns: + list[EventWriter]: a list of :class:`EventWriter` objects. + + It is now implemented by: + :: + return [ + CommonMetricPrinter(self.max_iter), + JSONWriter(os.path.join(self.cfg.OUTPUT_DIR, "metrics.json")), + TensorboardXWriter(self.cfg.OUTPUT_DIR), + ] + + """ + # Here the default print/log frequency of each writer is used. + return [ + # It may not always print what you want to see, since it prints "common" metrics only. + CommonMetricPrinter(self.max_iter), + JSONWriter(os.path.join(self.cfg.OUTPUT_DIR, "metrics.json")), + #TensorboardXWriter(self.cfg.OUTPUT_DIR), + ] + + def train(self): + """ + Run training. + + Returns: + OrderedDict of results, if evaluation is enabled. Otherwise None. + """ + super().train(self.start_iter, self.max_iter) + if len(self.cfg.TEST.EXPECTED_RESULTS) and comm.is_main_process(): + assert hasattr( + self, "_last_eval_results" + ), "No evaluation results obtained during training!" + verify_results(self.cfg, self._last_eval_results) + return self._last_eval_results + + @classmethod + def build_model(cls, cfg): + """ + Returns: + torch.nn.Module: + + It now calls :func:`detectron2.modeling.build_model`. + Overwrite it if you'd like a different model. + """ + model = build_model(cfg) + logger = logging.getLogger(__name__) + logger.info("Model:\n{}".format(model)) + return model + + @classmethod + def build_optimizer(cls, cfg, model): + """ + Returns: + torch.optim.Optimizer: + + It now calls :func:`detectron2.solver.build_optimizer`. + Overwrite it if you'd like a different optimizer. + """ + return build_optimizer(cfg, model) + + @classmethod + def build_lr_scheduler(cls, cfg, optimizer): + """ + It now calls :func:`detectron2.solver.build_lr_scheduler`. + Overwrite it if you'd like a different scheduler. + """ + return build_lr_scheduler(cfg, optimizer) + + @classmethod + def build_train_loader(cls, cfg): + """ + Returns: + iterable + + It now calls :func:`detectron2.data.build_detection_train_loader`. + Overwrite it if you'd like a different data loader. + """ + return build_detection_train_loader(cfg) + + @classmethod + def build_test_loader(cls, cfg, dataset_name): + """ + Returns: + iterable + + It now calls :func:`detectron2.data.build_detection_test_loader`. + Overwrite it if you'd like a different data loader. + """ + return build_detection_test_loader(cfg, dataset_name) + + @classmethod + def build_evaluator(cls, cfg, dataset_name): + """ + Returns: + DatasetEvaluator or None + + It is not implemented by default. + """ + raise NotImplementedError( + """ +If you want DefaultTrainer to automatically run evaluation, +please implement `build_evaluator()` in subclasses (see train_net.py for example). +Alternatively, you can call evaluation functions yourself (see Colab balloon tutorial for example). +""" + ) + + @classmethod + def test(cls, cfg, model, evaluators=None): + """ + Args: + cfg (CfgNode): + model (nn.Module): + evaluators (list[DatasetEvaluator] or None): if None, will call + :meth:`build_evaluator`. Otherwise, must have the same length as + `cfg.DATASETS.TEST`. + + Returns: + dict: a dict of result metrics + """ + logger = logging.getLogger(__name__) + if isinstance(evaluators, DatasetEvaluator): + evaluators = [evaluators] + if evaluators is not None: + assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format( + len(cfg.DATASETS.TEST), len(evaluators) + ) + + results = OrderedDict() + for idx, dataset_name in enumerate(cfg.DATASETS.TEST): + data_loader = cls.build_test_loader(cfg, dataset_name) + # When evaluators are passed in as arguments, + # implicitly assume that evaluators can be created before data_loader. + if evaluators is not None: + evaluator = evaluators[idx] + else: + try: + evaluator = cls.build_evaluator(cfg, dataset_name) + except NotImplementedError: + logger.warn( + "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, " + "or implement its `build_evaluator` method." + ) + results[dataset_name] = {} + continue + results_i = inference_on_dataset(model, data_loader, evaluator, cfg.MODEL.DEVICE) + results[dataset_name] = results_i + if comm.is_main_process(): + assert isinstance( + results_i, dict + ), "Evaluator must return a dict on the main process. Got {} instead.".format( + results_i + ) + logger.info("Evaluation results for {} in csv format:".format(dataset_name)) + print_csv_format(results_i) + + if len(results) == 1: + results = list(results.values())[0] + return results + + @staticmethod + def auto_scale_workers(cfg, num_workers: int): + """ + When the config is defined for certain number of workers (according to + ``cfg.SOLVER.REFERENCE_WORLD_SIZE``) that's different from the number of + workers currently in use, returns a new cfg where the total batch size + is scaled so that the per-GPU batch size stays the same as the + original ``IMS_PER_BATCH // REFERENCE_WORLD_SIZE``. + + Other config options are also scaled accordingly: + * training steps and warmup steps are scaled inverse proportionally. + * learning rate are scaled proportionally, following :paper:`ImageNet in 1h`. + + It returns the original config if ``cfg.SOLVER.REFERENCE_WORLD_SIZE==0``. + + Returns: + CfgNode: a new config + """ + old_world_size = cfg.SOLVER.REFERENCE_WORLD_SIZE + if old_world_size == 0 or old_world_size == num_workers: + return cfg + cfg = cfg.clone() + frozen = cfg.is_frozen() + cfg.defrost() + + assert ( + cfg.SOLVER.IMS_PER_BATCH % old_world_size == 0 + ), "Invalid REFERENCE_WORLD_SIZE in config!" + scale = num_workers / old_world_size + bs = cfg.SOLVER.IMS_PER_BATCH = int(round(cfg.SOLVER.IMS_PER_BATCH * scale)) + lr = cfg.SOLVER.BASE_LR = cfg.SOLVER.BASE_LR * scale + max_iter = cfg.SOLVER.MAX_ITER = int(round(cfg.SOLVER.MAX_ITER / scale)) + warmup_iter = cfg.SOLVER.WARMUP_ITERS = int(round(cfg.SOLVER.WARMUP_ITERS / scale)) + cfg.SOLVER.STEPS = tuple(int(round(s / scale)) for s in cfg.SOLVER.STEPS) + cfg.TEST.EVAL_PERIOD = int(round(cfg.TEST.EVAL_PERIOD / scale)) + cfg.SOLVER.REFERENCE_WORLD_SIZE = num_workers # maintain invariant + logger = logging.getLogger(__name__) + logger.info( + f"Auto-scaling the config to batch_size={bs}, learning_rate={lr}, " + f"max_iter={max_iter}, warmup={warmup_iter}." + ) + + if frozen: + cfg.freeze() + return cfg diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/hooks.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/hooks.py new file mode 100644 index 0000000000000000000000000000000000000000..4cf32e94cf4457467f6e51cb9098654a6ea4c3e4 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/hooks.py @@ -0,0 +1,519 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import itertools +import logging +import os +import tempfile +import time +from collections import Counter +import torch +from fvcore.common.checkpoint import PeriodicCheckpointer as _PeriodicCheckpointer +from fvcore.common.file_io import PathManager +from fvcore.common.timer import Timer +from fvcore.nn.precise_bn import get_bn_modules, update_bn_stats + +import detectron2.utils.comm as comm +from detectron2.evaluation.testing import flatten_results_dict +from detectron2.utils.events import EventStorage, EventWriter + +from .train_loop import HookBase + +__all__ = [ + "CallbackHook", + "IterationTimer", + "PeriodicWriter", + "PeriodicCheckpointer", + "LRScheduler", + "AutogradProfiler", + "EvalHook", + "PreciseBN", +] + + +""" +Implement some common hooks. +""" + + +class CallbackHook(HookBase): + """ + Create a hook using callback functions provided by the user. + """ + + def __init__(self, *, before_train=None, after_train=None, before_step=None, after_step=None): + """ + Each argument is a function that takes one argument: the trainer. + """ + self._before_train = before_train + self._before_step = before_step + self._after_step = after_step + self._after_train = after_train + + def before_train(self): + if self._before_train: + self._before_train(self.trainer) + + def after_train(self): + if self._after_train: + self._after_train(self.trainer) + # The functions may be closures that hold reference to the trainer + # Therefore, delete them to avoid circular reference. + del self._before_train, self._after_train + del self._before_step, self._after_step + + def before_step(self): + if self._before_step: + self._before_step(self.trainer) + + def after_step(self): + if self._after_step: + self._after_step(self.trainer) + + +class IterationTimer(HookBase): + """ + Track the time spent for each iteration (each run_step call in the trainer). + Print a summary in the end of training. + + This hook uses the time between the call to its :meth:`before_step` + and :meth:`after_step` methods. + Under the convention that :meth:`before_step` of all hooks should only + take negligible amount of time, the :class:`IterationTimer` hook should be + placed at the beginning of the list of hooks to obtain accurate timing. + """ + + def __init__(self, warmup_iter=3): + """ + Args: + warmup_iter (int): the number of iterations at the beginning to exclude + from timing. + """ + self._warmup_iter = warmup_iter + self._step_timer = Timer() + self._start_time = time.perf_counter() + self._total_timer = Timer() + + def before_train(self): + self._start_time = time.perf_counter() + self._total_timer.reset() + self._total_timer.pause() + + def after_train(self): + logger = logging.getLogger(__name__) + total_time = time.perf_counter() - self._start_time + total_time_minus_hooks = self._total_timer.seconds() + hook_time = total_time - total_time_minus_hooks + + num_iter = self.trainer.iter + 1 - self.trainer.start_iter - self._warmup_iter + + if num_iter > 0 and total_time_minus_hooks > 0: + # Speed is meaningful only after warmup + # NOTE this format is parsed by grep in some scripts + logger.info( + "Overall training speed: {} iterations in {} ({:.4f} s / it)".format( + num_iter, + str(datetime.timedelta(seconds=int(total_time_minus_hooks))), + total_time_minus_hooks / num_iter, + ) + ) + + logger.info( + "Total training time: {} ({} on hooks)".format( + str(datetime.timedelta(seconds=int(total_time))), + str(datetime.timedelta(seconds=int(hook_time))), + ) + ) + + def before_step(self): + self._step_timer.reset() + self._total_timer.resume() + + def after_step(self): + # +1 because we're in after_step + iter_done = self.trainer.iter - self.trainer.start_iter + 1 + if iter_done >= self._warmup_iter: + sec = self._step_timer.seconds() + self.trainer.storage.put_scalars(time=sec) + else: + self._start_time = time.perf_counter() + self._total_timer.reset() + + self._total_timer.pause() + + +class PeriodicWriter(HookBase): + """ + Write events to EventStorage (by calling ``writer.write()``) periodically. + + It is executed every ``period`` iterations and after the last iteration. + Note that ``period`` does not affect how data is smoothed by each writer. + """ + + def __init__(self, writers, period=20): + """ + Args: + writers (list[EventWriter]): a list of EventWriter objects + period (int): + """ + self._writers = writers + for w in writers: + assert isinstance(w, EventWriter), w + self._period = period + + def after_step(self): + if (self.trainer.iter + 1) % self._period == 0 or ( + self.trainer.iter == self.trainer.max_iter - 1 + ): + for writer in self._writers: + writer.write() + + def after_train(self): + for writer in self._writers: + writer.close() + + +class PeriodicCheckpointer(_PeriodicCheckpointer, HookBase): + """ + Same as :class:`detectron2.checkpoint.PeriodicCheckpointer`, but as a hook. + + Note that when used as a hook, + it is unable to save additional data other than what's defined + by the given `checkpointer`. + + It is executed every ``period`` iterations and after the last iteration. + """ + + def before_train(self): + self.max_iter = self.trainer.max_iter + + def after_step(self): + # No way to use **kwargs + self.step(self.trainer.iter) + + +class LRScheduler(HookBase): + """ + A hook which executes a torch builtin LR scheduler and summarizes the LR. + It is executed after every iteration. + """ + + def __init__(self, optimizer, scheduler): + """ + Args: + optimizer (torch.optim.Optimizer): + scheduler (torch.optim._LRScheduler) + """ + self._optimizer = optimizer + self._scheduler = scheduler + + # NOTE: some heuristics on what LR to summarize + # summarize the param group with most parameters + largest_group = max(len(g["params"]) for g in optimizer.param_groups) + + if largest_group == 1: + # If all groups have one parameter, + # then find the most common initial LR, and use it for summary + lr_count = Counter([g["lr"] for g in optimizer.param_groups]) + lr = lr_count.most_common()[0][0] + for i, g in enumerate(optimizer.param_groups): + if g["lr"] == lr: + self._best_param_group_id = i + break + else: + for i, g in enumerate(optimizer.param_groups): + if len(g["params"]) == largest_group: + self._best_param_group_id = i + break + + def after_step(self): + lr = self._optimizer.param_groups[self._best_param_group_id]["lr"] + self.trainer.storage.put_scalar("lr", lr, smoothing_hint=False) + self._scheduler.step() + + +class AutogradProfiler(HookBase): + """ + A hook which runs `torch.autograd.profiler.profile`. + + Examples: + :: + hooks.AutogradProfiler( + lambda trainer: trainer.iter > 10 and trainer.iter < 20, self.cfg.OUTPUT_DIR + ) + + The above example will run the profiler for iteration 10~20 and dump + results to ``OUTPUT_DIR``. We did not profile the first few iterations + because they are typically slower than the rest. + The result files can be loaded in the ``chrome://tracing`` page in chrome browser. + + Note: + When used together with NCCL on older version of GPUs, + autograd profiler may cause deadlock because it unnecessarily allocates + memory on every device it sees. The memory management calls, if + interleaved with NCCL calls, lead to deadlock on GPUs that do not + support ``cudaLaunchCooperativeKernelMultiDevice``. + """ + + def __init__(self, enable_predicate, output_dir, *, use_cuda=True): + """ + Args: + enable_predicate (callable[trainer -> bool]): a function which takes a trainer, + and returns whether to enable the profiler. + It will be called once every step, and can be used to select which steps to profile. + output_dir (str): the output directory to dump tracing files. + use_cuda (bool): same as in `torch.autograd.profiler.profile`. + """ + self._enable_predicate = enable_predicate + self._use_cuda = use_cuda + self._output_dir = output_dir + + def before_step(self): + if self._enable_predicate(self.trainer): + self._profiler = torch.autograd.profiler.profile(use_npu=self._use_cuda) + self._profiler.__enter__() + else: + self._profiler = None + + def after_step(self): + if self._profiler is None: + return + self._profiler.__exit__(None, None, None) + PathManager.mkdirs(self._output_dir) + out_file = os.path.join( + self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter) + ) + if "://" not in out_file: + self._profiler.export_chrome_trace(out_file) + else: + # Support non-posix filesystems + with tempfile.TemporaryDirectory(prefix="detectron2_profiler") as d: + tmp_file = os.path.join(d, "tmp.json") + self._profiler.export_chrome_trace(tmp_file) + with open(tmp_file) as f: + content = f.read() + with PathManager.open(out_file, "w") as f: + f.write(content) + + +class EvalHook(HookBase): + """ + Run an evaluation function periodically, and at the end of training. + + It is executed every ``eval_period`` iterations and after the last iteration. + """ + + def __init__(self, eval_period, eval_function): + """ + Args: + eval_period (int): the period to run `eval_function`. + eval_function (callable): a function which takes no arguments, and + returns a nested dict of evaluation metrics. + + Note: + This hook must be enabled in all or none workers. + If you would like only certain workers to perform evaluation, + give other workers a no-op function (`eval_function=lambda: None`). + """ + self._period = eval_period + self._func = eval_function + + def _do_eval(self): + results = self._func() + + if results: + assert isinstance( + results, dict + ), "Eval function must return a dict. Got {} instead.".format(results) + + flattened_results = flatten_results_dict(results) + for k, v in flattened_results.items(): + try: + v = float(v) + except Exception: + raise ValueError( + "[EvalHook] eval_function should return a nested dict of float. " + "Got '{}: {}' instead.".format(k, v) + ) + self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False) + + # Evaluation may take different time among workers. + # A barrier make them start the next iteration together. + comm.synchronize() + + def after_step(self): + next_iter = self.trainer.iter + 1 + is_final = next_iter == self.trainer.max_iter + if is_final or (self._period > 0 and next_iter % self._period == 0): + self._do_eval() + + def after_train(self): + # func is likely a closure that holds reference to the trainer + # therefore we clean it to avoid circular reference in the end + del self._func + + +class PreciseBN(HookBase): + """ + The standard implementation of BatchNorm uses EMA in inference, which is + sometimes suboptimal. + This class computes the true average of statistics rather than the moving average, + and put true averages to every BN layer in the given model. + + It is executed every ``period`` iterations and after the last iteration. + """ + + def __init__(self, period, model, data_loader, num_iter): + """ + Args: + period (int): the period this hook is run, or 0 to not run during training. + The hook will always run in the end of training. + model (nn.Module): a module whose all BN layers in training mode will be + updated by precise BN. + Note that user is responsible for ensuring the BN layers to be + updated are in training mode when this hook is triggered. + data_loader (iterable): it will produce data to be run by `model(data)`. + num_iter (int): number of iterations used to compute the precise + statistics. + """ + self._logger = logging.getLogger(__name__) + if len(get_bn_modules(model)) == 0: + self._logger.info( + "PreciseBN is disabled because model does not contain BN layers in training mode." + ) + self._disabled = True + return + + self._model = model + self._data_loader = data_loader + self._num_iter = num_iter + self._period = period + self._disabled = False + + self._data_iter = None + + def after_step(self): + next_iter = self.trainer.iter + 1 + is_final = next_iter == self.trainer.max_iter + if is_final or (self._period > 0 and next_iter % self._period == 0): + self.update_stats() + + def update_stats(self): + """ + Update the model with precise statistics. Users can manually call this method. + """ + if self._disabled: + return + + if self._data_iter is None: + self._data_iter = iter(self._data_loader) + + def data_loader(): + for num_iter in itertools.count(1): + if num_iter % 100 == 0: + self._logger.info( + "Running precise-BN ... {}/{} iterations.".format(num_iter, self._num_iter) + ) + # This way we can reuse the same iterator + yield next(self._data_iter) + + with EventStorage(): # capture events in a new storage to discard them + self._logger.info( + "Running precise-BN for {} iterations... ".format(self._num_iter) + + "Note that this could produce different statistics every time." + ) + update_bn_stats(self._model, data_loader(), self._num_iter) + + +class ShowTraining(HookBase): + def __init__(self, model, start_iter): + self.start_iter = start_iter + self.model = model + # 保存中间变量 + self.fp_feature = {} + self.bp_gradient = {} + + for name, module in self.model.named_modules(): + if name not in ['proposal_generator.anchor_generator', + 'proposal_generator', + 'roi_heads', + 'proposal_generator.rpn_head']: + module.register_forward_hook(self.make_hook(name, 'forward')) + module.register_backward_hook(self.make_hook(name, 'backward')) + + def make_hook(self, name, flag): + if flag == 'forward': + def hook(module_name, hook_input, output): + if isinstance(output, (tuple, list)): + for idx, i in enumerate(output): + print('forward: ', name + '_%d' % idx) + print(output[idx].to('cpu')) + self.fp_feature[name + '_%d' % idx] = \ + output[idx].to('cpu') + elif isinstance(output, dict): + for key, value in output.items(): + print('forward: ', name + '_' + key) + print(value.to('cpu')) + self.fp_feature[name + '_' + key] = value.to('cpu') + else: + print('forward: ', name) + print(output.to('cpu')) + self.fp_feature[name] = output.to('cpu') + return hook + elif flag == 'backward': + def hook(module_name, hook_input, output): + if isinstance(output, (tuple, list)): + for idx, i in enumerate(output): + print('backward: ', name + '_%d' % idx) + print(output[idx].to('cpu')) + self.bp_gradient[name + '_%d' % idx] \ + = output[idx].to('cpu') + elif isinstance(output, dict): + for key, value in output.items(): + print('backward: ', name + '_' + key) + print(value.to('cpu')) + self.fp_feature[name + '_' + key] = value.to('cpu') + else: + print('backward: ', name) + print(output.to('cpu')) + self.bp_gradient[name] = output.to('cpu') + return hook + else: + assert False + + def before_step(self): + self.fp_feature = {} + self.bp_gradient = {} + + def after_step(self): + # 获取参数的梯度 + model = self.model.cpu() + grad_dict = {} + for name, parms in model.named_parameters(): + grad_dict[name] = parms.grad + print('-->name:', name, '-->grad_value', parms.grad) + + with open('npu_fp_feature_%d.pth' % + (self.start_iter), 'wb') as file_name: + torch.save(self.fp_feature, file_name) + with open('npu_bp_feature_%d.pth' % + (self.start_iter), 'wb') as file_name: + torch.save(self.bp_gradient, file_name) + with open('npu_grad_dict_%d.pth' % + (self.start_iter), 'wb') as file_name: + torch.save(grad_dict, file_name) + self.start_iter += 1 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/launch.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/launch.py new file mode 100644 index 0000000000000000000000000000000000000000..6fbab0ca9fc37fe0fb1958f411cf0ecd49aebea0 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/launch.py @@ -0,0 +1,141 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import torch +import torch.distributed as dist +import torch.multiprocessing as mp +import os + +from detectron2.utils import comm + +__all__ = ["launch"] + + +def _find_free_port(): + import socket + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + # Binding to port 0 will cause the OS to find an available port for us + sock.bind(("", 0)) + port = sock.getsockname()[1] + sock.close() + # NOTE: there is still a chance the port could be taken by other processes. + return port + + +def launch(main_func, num_gpus_per_machine,local_rank=0, num_machines=1, machine_rank=0, dist_url=None, args=()): + """ + Launch multi-gpu or distributed training. + This function must be called on all machines involved in the training. + It will spawn child processes (defined by ``num_gpus_per_machine`) on each machine. + + Args: + main_func: a function that will be called by `main_func(*args)` + num_gpus_per_machine (int): number of GPUs per machine + num_machines (int): the total number of machines + machine_rank (int): the rank of this machine + dist_url (str): url to connect to for distributed jobs, including protocol + e.g. "tcp://127.0.0.1:8686". + Can be set to "auto" to automatically select a free port on localhost + args (tuple): arguments passed to main_func + """ + world_size = num_machines * num_gpus_per_machine + if world_size > 1: + # https://github.com/pytorch/pytorch/pull/14391 + # TODO prctl in spawned processes + #dist.init_process_group(backend='hccl',world_size=world_size, rank=local_rank) + #local_device = torch.device(f'npu:{local_rank}') + #torch.npu.set_device(local_device) + print("_________DDDDDDDDDPPPPPPPPPPP") + # https://github.com/pytorch/pytorch/pull/14391 + # TODO prctl in spawned processes + + if dist_url == "auto": + assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs." + port = _find_free_port() + dist_url = f"tcp://127.0.0.1:{port}" + if num_machines > 1 and dist_url.startswith("file://"): + logger = logging.getLogger(__name__) + logger.warning( + "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://" + ) + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = str(_find_free_port()) + os.environ['WORLD_SIZE'] = str(world_size) + + mp.spawn( + _distributed_worker, + nprocs=num_gpus_per_machine, + args=(main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args), + daemon=False, + ) + + else: + argsd = dict(zip(args[0].opts[0::2], args[0].opts[1::2])) + #os.environ['KERNEL_NAME_ID'] = \ + # argsd['MODEL.DEVICE'].split(':')[-1] + torch.npu.set_device("npu:0") + + main_func(*args) + + +def _distributed_worker( + local_rank, + main_func, + world_size, + num_gpus_per_machine, + machine_rank, + dist_url, + args +): + assert torch.npu.is_available(), \ + "npu is not available. Please check your installation." + global_rank = machine_rank * num_gpus_per_machine + local_rank + os.environ["RANK"] = str(local_rank) + os.environ['KERNEL_NAME_ID'] = str(local_rank) + + try: + dist.init_process_group( + backend="hccl", # init_method=dist_url, + world_size=world_size, rank=global_rank + ) + except Exception as e: + logger = logging.getLogger(__name__) + logger.error("Process group URL: {}".format(dist_url)) + raise e + #comm.synchronize() + assert num_gpus_per_machine <= torch.npu.device_count() + if args[0].device_ids: + device = 'npu:{}'.format(args[0].device_ids[local_rank]) + else: + device = 'npu:{}'.format(local_rank) + #comm.synchronize() + + #assert num_gpus_per_machine <= torch.cuda.device_count() + torch.npu.set_device(device) + + # synchronize is needed here to prevent a possible timeout after calling init_process_group + # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172 + + # Setup the local process group (which contains ranks within the same machine) + assert comm._LOCAL_PROCESS_GROUP is None + num_machines = world_size // num_gpus_per_machine + for i in range(num_machines): + ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine)) + pg = dist.new_group(ranks_on_i) + if i == machine_rank: + comm._LOCAL_PROCESS_GROUP = pg + + main_func(*args) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/train_loop.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/train_loop.py new file mode 100644 index 0000000000000000000000000000000000000000..c81a6fa0a071a7cffb9e70a5955039af6bacf0e9 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/engine/train_loop.py @@ -0,0 +1,337 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import logging +import numpy as np +import time +import weakref +import torch +from apex import amp + +import detectron2.utils.comm as comm +from detectron2.utils.events import EventStorage + + + +__all__ = ["HookBase", "TrainerBase", "SimpleTrainer"] + + +try: + _nullcontext = contextlib.nullcontext # python 3.7+ +except AttributeError: + + @contextlib.contextmanager + def _nullcontext(enter_result=None): + yield enter_result + + +class HookBase: + """ + Base class for hooks that can be registered with :class:`TrainerBase`. + + Each hook can implement 4 methods. The way they are called is demonstrated + in the following snippet: + :: + hook.before_train() + for iter in range(start_iter, max_iter): + hook.before_step() + trainer.run_step() + hook.after_step() + hook.after_train() + + Notes: + 1. In the hook method, users can access `self.trainer` to access more + properties about the context (e.g., current iteration). + + 2. A hook that does something in :meth:`before_step` can often be + implemented equivalently in :meth:`after_step`. + If the hook takes non-trivial time, it is strongly recommended to + implement the hook in :meth:`after_step` instead of :meth:`before_step`. + The convention is that :meth:`before_step` should only take negligible time. + + Following this convention will allow hooks that do care about the difference + between :meth:`before_step` and :meth:`after_step` (e.g., timer) to + function properly. + + Attributes: + trainer: A weak reference to the trainer object. Set by the trainer when the hook is + registered. + """ + + def before_train(self): + """ + Called before the first iteration. + """ + pass + + def after_train(self): + """ + Called after the last iteration. + """ + pass + + def before_step(self): + """ + Called before each iteration. + """ + pass + + def after_step(self): + """ + Called after each iteration. + """ + pass + + +class TrainerBase: + """ + Base class for iterative trainer with hooks. + + The only assumption we made here is: the training runs in a loop. + A subclass can implement what the loop is. + We made no assumptions about the existence of dataloader, optimizer, model, etc. + + Attributes: + iter(int): the current iteration. + + start_iter(int): The iteration to start with. + By convention the minimum possible value is 0. + + max_iter(int): The iteration to end training. + + storage(EventStorage): An EventStorage that's opened during the course of training. + """ + + def __init__(self): + self._hooks = [] + + def register_hooks(self, hooks): + """ + Register hooks to the trainer. The hooks are executed in the order + they are registered. + + Args: + hooks (list[Optional[HookBase]]): list of hooks + """ + hooks = [h for h in hooks if h is not None] + for h in hooks: + assert isinstance(h, HookBase) + # To avoid circular reference, hooks and trainer cannot own each other. + # This normally does not matter, but will cause memory leak if the + # involved objects contain __del__: + # See http://engineering.hearsaysocial.com/2013/06/16/circular-references-in-python/ + h.trainer = weakref.proxy(self) + self._hooks.extend(hooks) + + def train(self, start_iter: int, max_iter: int): + """ + Args: + start_iter, max_iter (int): See docs above + """ + logger = logging.getLogger(__name__) + logger.info("Starting training from iteration {}".format(start_iter)) + + self.iter = self.start_iter = start_iter + self.max_iter = max_iter + + with EventStorage(start_iter) as self.storage: + try: + self.before_train() + for self.iter in range(start_iter, max_iter): + #if self.iter == 100: + # cann_profiling_path = './cann_profiling' + # with torch.autograd.profiler.profile(use_npu = True) as prof: + # print("begin training") + # self.before_step() + # self.run_step() + # self.after_step() + # torch.npu.synchronize() + # prof.export_chrome_trace("loss_dict6.prof") + # break + + self.before_step() + self.run_step() + self.after_step() + except Exception: + logger.exception("Exception during training:") + raise + finally: + self.after_train() + + def before_train(self): + for h in self._hooks: + h.before_train() + + def after_train(self): + for h in self._hooks: + h.after_train() + + def before_step(self): + for h in self._hooks: + h.before_step() + + def after_step(self): + for h in self._hooks: + h.after_step() + # this guarantees, that in each hook's after_step, storage.iter == trainer.iter + self.storage.step() + + def run_step(self): + raise NotImplementedError + + +class SimpleTrainer(TrainerBase): + """ + A simple trainer for the most common type of task: + single-cost single-optimizer single-data-source iterative optimization. + It assumes that every step, you: + + 1. Compute the loss with a data from the data_loader. + 2. Compute the gradients with the above loss. + 3. Update the model with the optimizer. + + All other tasks during training (checkpointing, logging, evaluation, LR schedule) + are maintained by hooks, which can be registered by :meth:`TrainerBase.register_hooks`. + + If you want to do anything fancier than this, + either subclass TrainerBase and implement your own `run_step`, + or write your own training loop. + """ + + def __init__(self, model, data_loader, optimizer, aspect_ratio_grouping=False): + """ + Args: + model: a torch Module. Takes a data from data_loader and returns a + dict oof losses. + data_loader: an iterable. Contains data to be used to call model. + optimizer: a torch optimizer. + """ + super().__init__() + + """ + We set the model to training mode in the trainer. + However it's valid to train a model that's in eval mode. + If you want your model (or a submodule of it) to behave + like evaluation during training, you can overwrite its train() method. + """ + model.train() + self.aspect_ratio_grouping = aspect_ratio_grouping + self.model = model + self.data_loader = data_loader + if self.aspect_ratio_grouping: + self._data_loader_iter = iter(data_loader) + self.optimizer = optimizer + + def run_step(self): + """ + Implement the standard training logic described above. + """ + assert self.model.training, "[SimpleTrainer] model was changed to eval mode!" + start = time.perf_counter() + """ + If you want to do something with the data, you can wrap the dataloader. + """ + if self.aspect_ratio_grouping: + data=next(self._data_loader_iter) + else: + data = self.data_loader.next() + data_time = time.perf_counter() - start + + """ + If you want to do something with the losses, you can wrap the model. + """ + # add prof + #cann_profiling_path = './cann_profiling' + + #with torch.autograd.profiler.profile(use_npu=True) as prof: + loss_dict = self.model(data) + #prof.export_chrome_trace("loss_dict6.prof") + + # torch.npu.synchronize() + #exit() + losses = sum(loss_dict.values()) + # end + """ + If you need to accumulate gradients or do something similar, you can + wrap the optimizer with your custom `zero_grad()` method. + """ + self.optimizer.zero_grad() + #print(torch.npu.synchronize(), "backward ") + # add prof + #with torch.autograd.profiler.profile(record_shapes=True,use_npu=True) as prof: + if self.cfg.AMP: + with amp.scale_loss(losses, self.optimizer) as scaled_loss: + + scaled_loss.backward() + else: + losses.backward() + # prof.export_chrome_trace("backward.prof") + # end + # use a new stream so the ops don't wait for DDP + with torch.npu.stream( + torch.npu.Stream() + ) if losses.device.type == "npu" else _nullcontext(): + metrics_dict = loss_dict + metrics_dict["data_time"] = data_time + self._write_metrics(metrics_dict) + self._detect_anomaly(losses, loss_dict) + + """ + If you need gradient clipping/scaling or other processing, you can + wrap the optimizer with your custom `step()` method. But it is + suboptimal as explained in https://arxiv.org/abs/2006.15704 Sec 3.2.4 + """ + self.optimizer.step() + + def _detect_anomaly(self, losses, loss_dict): + if not torch.isfinite(losses).all(): + raise FloatingPointError( + "Loss became infinite or NaN at iteration={}!\nloss_dict = {}".format( + self.iter, loss_dict + ) + ) + + def _write_metrics(self, metrics_dict: dict): + """ + Args: + metrics_dict (dict): dict of scalar metrics + """ + metrics_dict = { + k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v) + for k, v in metrics_dict.items() + } + # gather metrics among all workers for logging + # This assumes we do DDP-style training, which is currently the only + # supported method in detectron2. + all_metrics_dict = comm.gather(metrics_dict) + + if comm.is_main_process(): + if "data_time" in all_metrics_dict[0]: + # data_time among workers can have high variance. The actual latency + # caused by data_time is the maximum among workers. + data_time = np.max([x.pop("data_time") for x in all_metrics_dict]) + self.storage.put_scalar("data_time", data_time) + + # average the rest metrics + metrics_dict = { + k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() + } + total_losses_reduced = sum(loss for loss in metrics_dict.values()) + + self.storage.put_scalar("total_loss", total_losses_reduced) + if len(metrics_dict) > 1: + self.storage.put_scalars(**metrics_dict) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d1f1a948349f8ac41d04a2e52a63752c916dafb0 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator +from .coco_evaluation import COCOEvaluator +from .rotated_coco_evaluation import RotatedCOCOEvaluator +from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset +from .lvis_evaluation import LVISEvaluator +from .panoptic_evaluation import COCOPanopticEvaluator +from .pascal_voc_evaluation import PascalVOCDetectionEvaluator +from .sem_seg_evaluation import SemSegEvaluator +from .testing import print_csv_format, verify_results + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/cityscapes_evaluation.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/cityscapes_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..8a4d1084a480d465958d07bdac219987c08e16e4 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/cityscapes_evaluation.py @@ -0,0 +1,200 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import glob +import logging +import numpy as np +import os +import tempfile +from collections import OrderedDict +import torch +from fvcore.common.file_io import PathManager +from PIL import Image + +from detectron2.data import MetadataCatalog +from detectron2.utils import comm + +from .evaluator import DatasetEvaluator + + +class CityscapesEvaluator(DatasetEvaluator): + """ + Base class for evaluation using cityscapes API. + """ + + def __init__(self, dataset_name): + """ + Args: + dataset_name (str): the name of the dataset. + It must have the following metadata associated with it: + "thing_classes", "gt_dir". + """ + self._metadata = MetadataCatalog.get(dataset_name) + self._cpu_device = torch.device("cpu") + self._logger = logging.getLogger(__name__) + + def reset(self): + self._working_dir = tempfile.TemporaryDirectory(prefix="cityscapes_eval_") + self._temp_dir = self._working_dir.name + # All workers will write to the same results directory + # TODO this does not work in distributed training + self._temp_dir = comm.all_gather(self._temp_dir)[0] + if self._temp_dir != self._working_dir.name: + self._working_dir.cleanup() + self._logger.info( + "Writing cityscapes results to temporary directory {} ...".format(self._temp_dir) + ) + + +class CityscapesInstanceEvaluator(CityscapesEvaluator): + """ + Evaluate instance segmentation results on cityscapes dataset using cityscapes API. + + Note: + * It does not work in multi-machine distributed training. + * It contains a synchronization, therefore has to be used on all ranks. + * Only the main process runs evaluation. + """ + + def process(self, inputs, outputs): + from cityscapesscripts.helpers.labels import name2label + + for input, output in zip(inputs, outputs): + file_name = input["file_name"] + basename = os.path.splitext(os.path.basename(file_name))[0] + pred_txt = os.path.join(self._temp_dir, basename + "_pred.txt") + + output = output["instances"].to(self._cpu_device) + num_instances = len(output) + with open(pred_txt, "w") as fout: + for i in range(num_instances): + pred_class = output.pred_classes[i] + classes = self._metadata.thing_classes[pred_class] + class_id = name2label[classes].id + score = output.scores[i] + mask = output.pred_masks[i].numpy().astype("uint8") + png_filename = os.path.join( + self._temp_dir, basename + "_{}_{}.png".format(i, classes) + ) + + Image.fromarray(mask * 255).save(png_filename) + fout.write("{} {} {}\n".format(os.path.basename(png_filename), class_id, score)) + + def evaluate(self): + """ + Returns: + dict: has a key "segm", whose value is a dict of "AP" and "AP50". + """ + comm.synchronize() + if comm.get_rank() > 0: + return + import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval + + self._logger.info("Evaluating results under {} ...".format(self._temp_dir)) + + # set some global states in cityscapes evaluation API, before evaluating + cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir) + cityscapes_eval.args.predictionWalk = None + cityscapes_eval.args.JSONOutput = False + cityscapes_eval.args.colorized = False + cityscapes_eval.args.gtInstancesFile = os.path.join(self._temp_dir, "gtInstances.json") + + # These lines are adopted from + # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py # noqa + gt_dir = PathManager.get_local_path(self._metadata.gt_dir) + groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_instanceIds.png")) + assert len( + groundTruthImgList + ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format( + cityscapes_eval.args.groundTruthSearch + ) + predictionImgList = [] + for gt in groundTruthImgList: + predictionImgList.append(cityscapes_eval.getPrediction(gt, cityscapes_eval.args)) + results = cityscapes_eval.evaluateImgLists( + predictionImgList, groundTruthImgList, cityscapes_eval.args + )["averages"] + + ret = OrderedDict() + ret["segm"] = {"AP": results["allAp"] * 100, "AP50": results["allAp50%"] * 100} + self._working_dir.cleanup() + return ret + + +class CityscapesSemSegEvaluator(CityscapesEvaluator): + """ + Evaluate semantic segmentation results on cityscapes dataset using cityscapes API. + + Note: + * It does not work in multi-machine distributed training. + * It contains a synchronization, therefore has to be used on all ranks. + * Only the main process runs evaluation. + """ + + def process(self, inputs, outputs): + from cityscapesscripts.helpers.labels import trainId2label + + for input, output in zip(inputs, outputs): + file_name = input["file_name"] + basename = os.path.splitext(os.path.basename(file_name))[0] + pred_filename = os.path.join(self._temp_dir, basename + "_pred.png") + + output = output["sem_seg"].argmax(dim=0).to(self._cpu_device).numpy() + pred = 255 * np.ones(output.shape, dtype=np.uint8) + for train_id, label in trainId2label.items(): + if label.ignoreInEval: + continue + pred[output == train_id] = label.id + Image.fromarray(pred).save(pred_filename) + + def evaluate(self): + comm.synchronize() + if comm.get_rank() > 0: + return + # Load the Cityscapes eval script *after* setting the required env var, + # since the script reads CITYSCAPES_DATASET into global variables at load time. + import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as cityscapes_eval + + self._logger.info("Evaluating results under {} ...".format(self._temp_dir)) + + # set some global states in cityscapes evaluation API, before evaluating + cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir) + cityscapes_eval.args.predictionWalk = None + cityscapes_eval.args.JSONOutput = False + cityscapes_eval.args.colorized = False + + # These lines are adopted from + # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalPixelLevelSemanticLabeling.py # noqa + gt_dir = PathManager.get_local_path(self._metadata.gt_dir) + groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_labelIds.png")) + assert len( + groundTruthImgList + ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format( + cityscapes_eval.args.groundTruthSearch + ) + predictionImgList = [] + for gt in groundTruthImgList: + predictionImgList.append(cityscapes_eval.getPrediction(cityscapes_eval.args, gt)) + results = cityscapes_eval.evaluateImgLists( + predictionImgList, groundTruthImgList, cityscapes_eval.args + ) + ret = OrderedDict() + ret["sem_seg"] = { + "IoU": 100.0 * results["averageScoreClasses"], + "iIoU": 100.0 * results["averageScoreInstClasses"], + "IoU_sup": 100.0 * results["averageScoreCategories"], + "iIoU_sup": 100.0 * results["averageScoreInstCategories"], + } + self._working_dir.cleanup() + return ret diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/coco_evaluation.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/coco_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..1186fdbcd2ff0f2d04b51817e27db1ce5837691d --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/coco_evaluation.py @@ -0,0 +1,542 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import contextlib +import copy +import io +import itertools +import json +import logging +import numpy as np +import os +import pickle +from collections import OrderedDict +import pycocotools.mask as mask_util +import torch +from fvcore.common.file_io import PathManager +from pycocotools.coco import COCO +from tabulate import tabulate + +import detectron2.utils.comm as comm +from detectron2.data import MetadataCatalog +from detectron2.data.datasets.coco import convert_to_coco_json +from detectron2.evaluation.fast_eval_api import COCOeval_opt as COCOeval +from detectron2.structures import Boxes, BoxMode, pairwise_iou +from detectron2.utils.logger import create_small_table + +from .evaluator import DatasetEvaluator + + +class COCOEvaluator(DatasetEvaluator): + """ + Evaluate AR for object proposals, AP for instance detection/segmentation, AP + for keypoint detection outputs using COCO's metrics. + See http://cocodataset.org/#detection-eval and + http://cocodataset.org/#keypoints-eval to understand its metrics. + + In addition to COCO, this evaluator is able to support any bounding box detection, + instance segmentation, or keypoint detection dataset. + """ + + def __init__(self, dataset_name, cfg, distributed, output_dir=None): + """ + Args: + dataset_name (str): name of the dataset to be evaluated. + It must have either the following corresponding metadata: + + "json_file": the path to the COCO format annotation + + Or it must be in detectron2's standard dataset format + so it can be converted to COCO format automatically. + cfg (CfgNode): config instance + distributed (True): if True, will collect results from all ranks and run evaluation + in the main process. + Otherwise, will evaluate the results in the current process. + output_dir (str): optional, an output directory to dump all + results predicted on the dataset. The dump contains two files: + + 1. "instance_predictions.pth" a file in torch serialization + format that contains all the raw original predictions. + 2. "coco_instances_results.json" a json file in COCO's result + format. + """ + self._tasks = self._tasks_from_config(cfg) + self._distributed = distributed + self._output_dir = output_dir + + self._cpu_device = torch.device("cpu") + self._logger = logging.getLogger(__name__) + + self._metadata = MetadataCatalog.get(dataset_name) + if not hasattr(self._metadata, "json_file"): + self._logger.info( + f"'{dataset_name}' is not registered by `register_coco_instances`." + " Therefore trying to convert it to COCO format ..." + ) + + cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json") + self._metadata.json_file = cache_path + convert_to_coco_json(dataset_name, cache_path) + + json_file = PathManager.get_local_path(self._metadata.json_file) + with contextlib.redirect_stdout(io.StringIO()): + self._coco_api = COCO(json_file) + + self._kpt_oks_sigmas = cfg.TEST.KEYPOINT_OKS_SIGMAS + # Test set json files do not contain annotations (evaluation must be + # performed using the COCO evaluation server). + self._do_evaluation = "annotations" in self._coco_api.dataset + + def reset(self): + self._predictions = [] + + def _tasks_from_config(self, cfg): + """ + Returns: + tuple[str]: tasks that can be evaluated under the given configuration. + """ + tasks = ("bbox",) + if cfg.MODEL.MASK_ON: + tasks = tasks + ("segm",) + if cfg.MODEL.KEYPOINT_ON: + tasks = tasks + ("keypoints",) + return tasks + + def process(self, inputs, outputs): + """ + Args: + inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). + It is a list of dict. Each dict corresponds to an image and + contains keys like "height", "width", "file_name", "image_id". + outputs: the outputs of a COCO model. It is a list of dicts with key + "instances" that contains :class:`Instances`. + """ + for input, output in zip(inputs, outputs): + prediction = {"image_id": input["image_id"]} + + # TODO this is ugly + if "instances" in output: + instances = output["instances"].to(self._cpu_device) + keep_mask = instances.scores >= 0 + instances.scores = instances.scores[keep_mask] + instances.pred_boxes = instances.pred_boxes[keep_mask] + instances.pred_classes = instances.pred_classes[keep_mask] + if "pred_masks" in instances._fields.keys(): + instances.pred_masks = instances.pred_masks[keep_mask] + prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) + if "proposals" in output: + prediction["proposals"] = output["proposals"].to(self._cpu_device) + self._predictions.append(prediction) + + def evaluate(self): + if self._distributed: + comm.synchronize() + predictions = comm.gather(self._predictions, dst=0) + predictions = list(itertools.chain(*predictions)) + + if not comm.is_main_process(): + return {} + else: + predictions = self._predictions + + if len(predictions) == 0: + self._logger.warning("[COCOEvaluator] Did not receive valid predictions.") + return {} + + if self._output_dir: + PathManager.mkdirs(self._output_dir) + file_path = os.path.join(self._output_dir, "instances_predictions.pth") + with PathManager.open(file_path, "wb") as f: + torch.save(predictions, f) + + self._results = OrderedDict() + if "proposals" in predictions[0]: + self._eval_box_proposals(predictions) + if "instances" in predictions[0]: + self._eval_predictions(set(self._tasks), predictions) + # Copy so the caller can do whatever with results + return copy.deepcopy(self._results) + + def _eval_predictions(self, tasks, predictions): + """ + Evaluate predictions on the given tasks. + Fill self._results with the metrics of the tasks. + """ + self._logger.info("Preparing results for COCO format ...") + coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) + + # unmap the category ids for COCO + if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): + reverse_id_mapping = { + v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() + } + for result in coco_results: + category_id = result["category_id"] + assert ( + category_id in reverse_id_mapping + ), "A prediction has category_id={}, which is not available in the dataset.".format( + category_id + ) + result["category_id"] = reverse_id_mapping[category_id] + + if self._output_dir: + file_path = os.path.join(self._output_dir, "coco_instances_results.json") + self._logger.info("Saving results to {}".format(file_path)) + with PathManager.open(file_path, "w") as f: + f.write(json.dumps(coco_results)) + f.flush() + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info("Evaluating predictions ...") + for task in sorted(tasks): + coco_eval = ( + _evaluate_predictions_on_coco( + self._coco_api, coco_results, task, kpt_oks_sigmas=self._kpt_oks_sigmas + ) + if len(coco_results) > 0 + else None # cocoapi does not handle empty results very well + ) + + res = self._derive_coco_results( + coco_eval, task, class_names=self._metadata.get("thing_classes") + ) + self._results[task] = res + + def _eval_box_proposals(self, predictions): + """ + Evaluate the box proposals in predictions. + Fill self._results with the metrics for "box_proposals" task. + """ + if self._output_dir: + # Saving generated box proposals to file. + # Predicted box_proposals are in XYXY_ABS mode. + bbox_mode = BoxMode.XYXY_ABS.value + ids, boxes, objectness_logits = [], [], [] + for prediction in predictions: + ids.append(prediction["image_id"]) + boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) + objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) + + proposal_data = { + "boxes": boxes, + "objectness_logits": objectness_logits, + "ids": ids, + "bbox_mode": bbox_mode, + } + with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: + pickle.dump(proposal_data, f) + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info("Evaluating bbox proposals ...") + res = {} + areas = {"all": "", "small": "s", "medium": "m", "large": "l"} + for limit in [100, 1000]: + for area, suffix in areas.items(): + stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit) + key = "AR{}@{:d}".format(suffix, limit) + res[key] = float(stats["ar"].item() * 100) + self._logger.info("Proposal metrics: \n" + create_small_table(res)) + self._results["box_proposals"] = res + + def _derive_coco_results(self, coco_eval, iou_type, class_names=None): + """ + Derive the desired score numbers from summarized COCOeval. + + Args: + coco_eval (None or COCOEval): None represents no predictions from model. + iou_type (str): + class_names (None or list[str]): if provided, will use it to predict + per-category AP. + + Returns: + a dict of {metric name: score} + """ + + metrics = { + "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"], + "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"], + "keypoints": ["AP", "AP50", "AP75", "APm", "APl"], + }[iou_type] + + if coco_eval is None: + self._logger.warn("No predictions from the model!") + return {metric: float("nan") for metric in metrics} + + # the standard metrics + results = { + metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan") + for idx, metric in enumerate(metrics) + } + self._logger.info( + "Evaluation results for {}: \n".format(iou_type) + create_small_table(results) + ) + if not np.isfinite(sum(results.values())): + self._logger.info("Some metrics cannot be computed and is shown as NaN.") + + if class_names is None or len(class_names) <= 1: + return results + # Compute per-category AP + # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa + precisions = coco_eval.eval["precision"] + # precision has dims (iou, recall, cls, area range, max dets) + assert len(class_names) == precisions.shape[2] + + results_per_category = [] + for idx, name in enumerate(class_names): + # area range index 0: all area ranges + # max dets index -1: typically 100 per image + precision = precisions[:, :, idx, 0, -1] + precision = precision[precision > -1] + ap = np.mean(precision) if precision.size else float("nan") + results_per_category.append(("{}".format(name), float(ap * 100))) + + # tabulate it + N_COLS = min(6, len(results_per_category) * 2) + results_flatten = list(itertools.chain(*results_per_category)) + results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)]) + table = tabulate( + results_2d, + tablefmt="pipe", + floatfmt=".3f", + headers=["category", "AP"] * (N_COLS // 2), + numalign="left", + ) + self._logger.info("Per-category {} AP: \n".format(iou_type) + table) + + results.update({"AP-" + name: ap for name, ap in results_per_category}) + return results + + +def instances_to_coco_json(instances, img_id): + """ + Dump an "Instances" object to a COCO-format json that's used for evaluation. + + Args: + instances (Instances): + img_id (int): the image id + + Returns: + list[dict]: list of json annotations in COCO format. + """ + num_instance = len(instances) + if num_instance == 0: + return [] + + boxes = instances.pred_boxes.tensor.numpy() + boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) + boxes = boxes.tolist() + scores = instances.scores.tolist() + classes = instances.pred_classes.tolist() + + has_mask = instances.has("pred_masks") + if has_mask: + # use RLE to encode the masks, because they are too large and takes memory + # since this evaluator stores outputs of the entire dataset + rles = [ + mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] + for mask in instances.pred_masks + ] + for rle in rles: + # "counts" is an array encoded by mask_util as a byte-stream. Python3's + # json writer which always produces strings cannot serialize a bytestream + # unless you decode it. Thankfully, utf-8 works out (which is also what + # the pycocotools/_mask.pyx does). + rle["counts"] = rle["counts"].decode("utf-8") + + has_keypoints = instances.has("pred_keypoints") + if has_keypoints: + keypoints = instances.pred_keypoints + + results = [] + for k in range(num_instance): + result = { + "image_id": img_id, + "category_id": classes[k], + "bbox": boxes[k], + "score": scores[k], + } + if has_mask: + result["segmentation"] = rles[k] + if has_keypoints: + # In COCO annotations, + # keypoints coordinates are pixel indices. + # However our predictions are floating point coordinates. + # Therefore we subtract 0.5 to be consistent with the annotation format. + # This is the inverse of data loading logic in `datasets/coco.py`. + keypoints[k][:, :2] -= 0.5 + result["keypoints"] = keypoints[k].flatten().tolist() + results.append(result) + return results + + +# inspired from Detectron: +# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa +def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None): + """ + Evaluate detection proposal recall metrics. This function is a much + faster alternative to the official COCO API recall evaluation code. However, + it produces slightly different results. + """ + # Record max overlap value for each gt box + # Return vector of overlap values + areas = { + "all": 0, + "small": 1, + "medium": 2, + "large": 3, + "96-128": 4, + "128-256": 5, + "256-512": 6, + "512-inf": 7, + } + area_ranges = [ + [0 ** 2, 1e5 ** 2], # all + [0 ** 2, 32 ** 2], # small + [32 ** 2, 96 ** 2], # medium + [96 ** 2, 1e5 ** 2], # large + [96 ** 2, 128 ** 2], # 96-128 + [128 ** 2, 256 ** 2], # 128-256 + [256 ** 2, 512 ** 2], # 256-512 + [512 ** 2, 1e5 ** 2], + ] # 512-inf + assert area in areas, "Unknown area range: {}".format(area) + area_range = area_ranges[areas[area]] + gt_overlaps = [] + num_pos = 0 + + for prediction_dict in dataset_predictions: + predictions = prediction_dict["proposals"] + + # sort predictions in descending order + # TODO maybe remove this and make it explicit in the documentation + inds = predictions.objectness_logits.sort(descending=True)[1] + predictions = predictions[inds] + + ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"]) + anno = coco_api.loadAnns(ann_ids) + gt_boxes = [ + BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) + for obj in anno + if obj["iscrowd"] == 0 + ] + gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes + gt_boxes = Boxes(gt_boxes) + gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0]) + + if len(gt_boxes) == 0 or len(predictions) == 0: + continue + + valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) + gt_boxes = gt_boxes[valid_gt_inds] + + num_pos += len(gt_boxes) + + if len(gt_boxes) == 0: + continue + + if limit is not None and len(predictions) > limit: + predictions = predictions[:limit] + + overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) + + _gt_overlaps = torch.zeros(len(gt_boxes)) + for j in range(min(len(predictions), len(gt_boxes))): + # find which proposal box maximally covers each gt box + # and get the iou amount of coverage for each gt box + max_overlaps, argmax_overlaps = overlaps.max(dim=0) + + # find which gt box is 'best' covered (i.e. 'best' = most iou) + gt_ovr, gt_ind = max_overlaps.max(dim=0) + assert gt_ovr >= 0 + # find the proposal box that covers the best covered gt box + box_ind = argmax_overlaps[gt_ind] + # record the iou coverage of this gt box + _gt_overlaps[j] = overlaps[box_ind, gt_ind] + assert _gt_overlaps[j] == gt_ovr + # mark the proposal box and the gt box as used + overlaps[box_ind, :] = -1 + overlaps[:, gt_ind] = -1 + + # append recorded iou coverage level + gt_overlaps.append(_gt_overlaps) + gt_overlaps = ( + torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32) + ) + gt_overlaps, _ = torch.sort(gt_overlaps) + + if thresholds is None: + step = 0.05 + thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) + recalls = torch.zeros_like(thresholds) + # compute recall for each iou threshold + for i, t in enumerate(thresholds): + recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) + # ar = 2 * np.trapz(recalls, thresholds) + ar = recalls.mean() + return { + "ar": ar, + "recalls": recalls, + "thresholds": thresholds, + "gt_overlaps": gt_overlaps, + "num_pos": num_pos, + } + + +def _evaluate_predictions_on_coco(coco_gt, coco_results, iou_type, kpt_oks_sigmas=None): + """ + Evaluate the coco results using COCOEval API. + """ + assert len(coco_results) > 0 + + if iou_type == "segm": + coco_results = copy.deepcopy(coco_results) + # When evaluating mask AP, if the results contain bbox, cocoapi will + # use the box area as the area of the instance, instead of the mask area. + # This leads to a different definition of small/medium/large. + # We remove the bbox field to let mask AP use mask area. + for c in coco_results: + c.pop("bbox", None) + + coco_dt = coco_gt.loadRes(coco_results) + coco_eval = COCOeval(coco_gt, coco_dt, iou_type) + + if iou_type == "keypoints": + # Use the COCO default keypoint OKS sigmas unless overrides are specified + if kpt_oks_sigmas: + assert hasattr(coco_eval.params, "kpt_oks_sigmas"), "pycocotools is too old!" + coco_eval.params.kpt_oks_sigmas = np.array(kpt_oks_sigmas) + # COCOAPI requires every detection and every gt to have keypoints, so + # we just take the first entry from both + num_keypoints_dt = len(coco_results[0]["keypoints"]) // 3 + num_keypoints_gt = len(next(iter(coco_gt.anns.values()))["keypoints"]) // 3 + num_keypoints_oks = len(coco_eval.params.kpt_oks_sigmas) + assert num_keypoints_oks == num_keypoints_dt == num_keypoints_gt, ( + f"[COCOEvaluator] Prediction contain {num_keypoints_dt} keypoints. " + f"Ground truth contains {num_keypoints_gt} keypoints. " + f"The length of cfg.TEST.KEYPOINT_OKS_SIGMAS is {num_keypoints_oks}. " + "They have to agree with each other. For meaning of OKS, please refer to " + "http://cocodataset.org/#keypoints-eval." + ) + + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + return coco_eval diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/evaluator.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..bfc5223d9f8e177bf1699120ec691d12c2249454 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/evaluator.py @@ -0,0 +1,212 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import datetime +import logging +import time +from collections import OrderedDict +from contextlib import contextmanager +import torch + +from detectron2.utils.comm import get_world_size, is_main_process +from detectron2.utils.logger import log_every_n_seconds + + +class DatasetEvaluator: + """ + Base class for a dataset evaluator. + + The function :func:`inference_on_dataset` runs the model over + all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs. + + This class will accumulate information of the inputs/outputs (by :meth:`process`), + and produce evaluation results in the end (by :meth:`evaluate`). + """ + + def reset(self): + """ + Preparation for a new round of evaluation. + Should be called before starting a round of evaluation. + """ + pass + + def process(self, inputs, outputs): + """ + Process the pair of inputs and outputs. + If they contain batches, the pairs can be consumed one-by-one using `zip`: + + .. code-block:: python + + for input_, output in zip(inputs, outputs): + # do evaluation on single input/output pair + ... + + Args: + inputs (list): the inputs that's used to call the model. + outputs (list): the return value of `model(inputs)` + """ + pass + + def evaluate(self): + """ + Evaluate/summarize the performance, after processing all input/output pairs. + + Returns: + dict: + A new evaluator class can return a dict of arbitrary format + as long as the user can process the results. + In our train_net.py, we expect the following format: + + * key: the name of the task (e.g., bbox) + * value: a dict of {metric name: score}, e.g.: {"AP50": 80} + """ + pass + + +class DatasetEvaluators(DatasetEvaluator): + """ + Wrapper class to combine multiple :class:`DatasetEvaluator` instances. + + This class dispatches every evaluation call to + all of its :class:`DatasetEvaluator`. + """ + + def __init__(self, evaluators): + """ + Args: + evaluators (list): the evaluators to combine. + """ + super().__init__() + self._evaluators = evaluators + + def reset(self): + for evaluator in self._evaluators: + evaluator.reset() + + def process(self, inputs, outputs): + for evaluator in self._evaluators: + evaluator.process(inputs, outputs) + + def evaluate(self): + results = OrderedDict() + for evaluator in self._evaluators: + result = evaluator.evaluate() + if is_main_process() and result is not None: + for k, v in result.items(): + assert ( + k not in results + ), "Different evaluators produce results with the same key {}".format(k) + results[k] = v + return results + + +def inference_on_dataset(model, data_loader, evaluator, device): + """ + Run model on the data_loader and evaluate the metrics with evaluator. + Also benchmark the inference speed of `model.forward` accurately. + The model will be used in eval mode. + + Args: + model (nn.Module): a module which accepts an object from + `data_loader` and returns some outputs. It will be temporarily set to `eval` mode. + + If you wish to evaluate a model in `training` mode instead, you can + wrap the given model and override its behavior of `.eval()` and `.train()`. + data_loader: an iterable object with a length. + The elements it generates will be the inputs to the model. + evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want + to benchmark, but don't want to do any evaluation. + + Returns: + The return value of `evaluator.evaluate()` + """ + num_devices = get_world_size() + logger = logging.getLogger(__name__) + logger.info("Start inference on {} images".format(len(data_loader))) + + total = len(data_loader) # inference data loader must have a fixed length + if evaluator is None: + # create a no-op evaluator + evaluator = DatasetEvaluators([]) + evaluator.reset() + + num_warmup = min(5, total - 1) + start_time = time.perf_counter() + total_compute_time = 0 + with inference_context(model), torch.no_grad(): + for idx, inputs in enumerate(data_loader): + for d in inputs: + d["image_preprocess"] = d["image_preprocess"].to(device) + if idx == num_warmup: + start_time = time.perf_counter() + total_compute_time = 0 + + start_compute_time = time.perf_counter() + outputs = model(inputs) + if torch.cuda.is_available(): + torch.cuda.synchronize() + total_compute_time += time.perf_counter() - start_compute_time + evaluator.process(inputs, outputs) + + iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) + seconds_per_img = total_compute_time / iters_after_start + if idx >= num_warmup * 2 or seconds_per_img > 5: + total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start + eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1))) + log_every_n_seconds( + logging.INFO, + "Inference done {}/{}. {:.4f} s / img. ETA={}".format( + idx + 1, total, seconds_per_img, str(eta) + ), + n=5, + ) + + + # Measure the time only for this worker (before the synchronization barrier) + total_time = time.perf_counter() - start_time + total_time_str = str(datetime.timedelta(seconds=total_time)) + # NOTE this format is parsed by grep + logger.info( + "Total inference time: {} ({:.6f} s / img per device, on {} devices)".format( + total_time_str, total_time / (total - num_warmup), num_devices + ) + ) + total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time))) + logger.info( + "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)".format( + total_compute_time_str, total_compute_time / (total - num_warmup), num_devices + ) + ) + + results = evaluator.evaluate() + # An evaluator may return None when not in main process. + # Replace it by an empty dict instead to make it easier for downstream code to handle + if results is None: + results = {} + return results + + +@contextmanager +def inference_context(model): + """ + A context where the model is temporarily changed to eval mode, + and restored to previous mode afterwards. + + Args: + model: a torch Module + """ + training_mode = model.training + model.eval() + yield + model.train(training_mode) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/fast_eval_api.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/fast_eval_api.py new file mode 100644 index 0000000000000000000000000000000000000000..ff6e9eae08ce915c00492e2405961f2928a462bb --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/fast_eval_api.py @@ -0,0 +1,132 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import numpy as np +import time +from pycocotools.cocoeval import COCOeval + +from detectron2 import _C + + +class COCOeval_opt(COCOeval): + """ + This is a slightly modified version of the original COCO API, where the functions evaluateImg() + and accumulate() are implemented in C++ to speedup evaluation + """ + + def evaluate(self): + """ + Run per image evaluation on given images and store results in self.evalImgs_cpp, a + datastructure that isn't readable from Python but is used by a c++ implementation of + accumulate(). Unlike the original COCO PythonAPI, we don't populate the datastructure + self.evalImgs because this datastructure is a computational bottleneck. + :return: None + """ + tic = time.time() + + print("Running per image evaluation...") + p = self.params + # add backward compatibility if useSegm is specified in params + if p.useSegm is not None: + p.iouType = "segm" if p.useSegm == 1 else "bbox" + print("useSegm (deprecated) is not None. Running {} evaluation".format(p.iouType)) + print("Evaluate annotation type *{}*".format(p.iouType)) + p.imgIds = list(np.unique(p.imgIds)) + if p.useCats: + p.catIds = list(np.unique(p.catIds)) + p.maxDets = sorted(p.maxDets) + self.params = p + + self._prepare() + + # loop through images, area range, max detection number + catIds = p.catIds if p.useCats else [-1] + + if p.iouType == "segm" or p.iouType == "bbox": + computeIoU = self.computeIoU + elif p.iouType == "keypoints": + computeIoU = self.computeOks + self.ious = { + (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds + } + + maxDet = p.maxDets[-1] + + # <<<< Beginning of code differences with original COCO API + def convert_instances_to_cpp(instances, is_det=False): + # Convert annotations for a list of instances in an image to a format that's fast + # to access in C++ + instances_cpp = [] + for instance in instances: + instance_cpp = _C.InstanceAnnotation( + int(instance["id"]), + instance["score"] if is_det else instance.get("score", 0.0), + instance["area"], + bool(instance.get("iscrowd", 0)), + bool(instance.get("ignore", 0)), + ) + instances_cpp.append(instance_cpp) + return instances_cpp + + # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++ + ground_truth_instances = [ + [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds] + for imgId in p.imgIds + ] + detected_instances = [ + [convert_instances_to_cpp(self._dts[imgId, catId], is_det=True) for catId in p.catIds] + for imgId in p.imgIds + ] + ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds] + + if not p.useCats: + # For each image, flatten per-category lists into a single list + ground_truth_instances = [[[o for c in i for o in c]] for i in ground_truth_instances] + detected_instances = [[[o for c in i for o in c]] for i in detected_instances] + + # Call C++ implementation of self.evaluateImgs() + self._evalImgs_cpp = _C.COCOevalEvaluateImages( + p.areaRng, maxDet, p.iouThrs, ious, ground_truth_instances, detected_instances + ) + self._evalImgs = None + + self._paramsEval = copy.deepcopy(self.params) + toc = time.time() + print("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic)) + # >>>> End of code differences with original COCO API + + def accumulate(self): + """ + Accumulate per image evaluation results and store the result in self.eval. Does not + support changing parameter settings from those used by self.evaluate() + """ + print("Accumulating evaluation results...") + tic = time.time() + if not hasattr(self, "_evalImgs_cpp"): + print("Please run evaluate() first") + + self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp) + + # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections + self.eval["recall"] = np.array(self.eval["recall"]).reshape( + self.eval["counts"][:1] + self.eval["counts"][2:] + ) + + # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X + # num_area_ranges X num_max_detections + self.eval["precision"] = np.array(self.eval["precision"]).reshape(self.eval["counts"]) + self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"]) + toc = time.time() + print("COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic)) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/lvis_evaluation.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/lvis_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..68dbfc19ee84e94355732edd683f0a9f1abf87f8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/lvis_evaluation.py @@ -0,0 +1,363 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import itertools +import json +import logging +import os +import pickle +from collections import OrderedDict +import torch +from fvcore.common.file_io import PathManager + +import detectron2.utils.comm as comm +from detectron2.data import MetadataCatalog +from detectron2.structures import Boxes, BoxMode, pairwise_iou +from detectron2.utils.logger import create_small_table + +from .coco_evaluation import instances_to_coco_json +from .evaluator import DatasetEvaluator + + +class LVISEvaluator(DatasetEvaluator): + """ + Evaluate object proposal and instance detection/segmentation outputs using + LVIS's metrics and evaluation API. + """ + + def __init__(self, dataset_name, cfg, distributed, output_dir=None): + """ + Args: + dataset_name (str): name of the dataset to be evaluated. + It must have the following corresponding metadata: + "json_file": the path to the LVIS format annotation + cfg (CfgNode): config instance + distributed (True): if True, will collect results from all ranks for evaluation. + Otherwise, will evaluate the results in the current process. + output_dir (str): optional, an output directory to dump results. + """ + from lvis import LVIS + + self._tasks = self._tasks_from_config(cfg) + self._distributed = distributed + self._output_dir = output_dir + + self._cpu_device = torch.device("cpu") + self._logger = logging.getLogger(__name__) + + self._metadata = MetadataCatalog.get(dataset_name) + json_file = PathManager.get_local_path(self._metadata.json_file) + self._lvis_api = LVIS(json_file) + # Test set json files do not contain annotations (evaluation must be + # performed using the LVIS evaluation server). + self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0 + + def reset(self): + self._predictions = [] + + def _tasks_from_config(self, cfg): + """ + Returns: + tuple[str]: tasks that can be evaluated under the given configuration. + """ + tasks = ("bbox",) + if cfg.MODEL.MASK_ON: + tasks = tasks + ("segm",) + return tasks + + def process(self, inputs, outputs): + """ + Args: + inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN). + It is a list of dict. Each dict corresponds to an image and + contains keys like "height", "width", "file_name", "image_id". + outputs: the outputs of a LVIS model. It is a list of dicts with key + "instances" that contains :class:`Instances`. + """ + for input, output in zip(inputs, outputs): + prediction = {"image_id": input["image_id"]} + + if "instances" in output: + instances = output["instances"].to(self._cpu_device) + prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) + if "proposals" in output: + prediction["proposals"] = output["proposals"].to(self._cpu_device) + self._predictions.append(prediction) + + def evaluate(self): + if self._distributed: + comm.synchronize() + predictions = comm.gather(self._predictions, dst=0) + predictions = list(itertools.chain(*predictions)) + + if not comm.is_main_process(): + return + else: + predictions = self._predictions + + if len(predictions) == 0: + self._logger.warning("[LVISEvaluator] Did not receive valid predictions.") + return {} + + if self._output_dir: + PathManager.mkdirs(self._output_dir) + file_path = os.path.join(self._output_dir, "instances_predictions.pth") + with PathManager.open(file_path, "wb") as f: + torch.save(predictions, f) + + self._results = OrderedDict() + if "proposals" in predictions[0]: + self._eval_box_proposals(predictions) + if "instances" in predictions[0]: + self._eval_predictions(set(self._tasks), predictions) + # Copy so the caller can do whatever with results + return copy.deepcopy(self._results) + + def _eval_predictions(self, tasks, predictions): + """ + Evaluate predictions on the given tasks. + Fill self._results with the metrics of the tasks. + + Args: + predictions (list[dict]): list of outputs from the model + """ + self._logger.info("Preparing results in the LVIS format ...") + lvis_results = list(itertools.chain(*[x["instances"] for x in predictions])) + + # LVIS evaluator can be used to evaluate results for COCO dataset categories. + # In this case `_metadata` variable will have a field with COCO-specific category mapping. + if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): + reverse_id_mapping = { + v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() + } + for result in lvis_results: + result["category_id"] = reverse_id_mapping[result["category_id"]] + else: + # unmap the category ids for LVIS (from 0-indexed to 1-indexed) + for result in lvis_results: + result["category_id"] += 1 + + if self._output_dir: + file_path = os.path.join(self._output_dir, "lvis_instances_results.json") + self._logger.info("Saving results to {}".format(file_path)) + with PathManager.open(file_path, "w") as f: + f.write(json.dumps(lvis_results)) + f.flush() + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info("Evaluating predictions ...") + for task in sorted(tasks): + res = _evaluate_predictions_on_lvis( + self._lvis_api, lvis_results, task, class_names=self._metadata.get("thing_classes") + ) + self._results[task] = res + + def _eval_box_proposals(self, predictions): + """ + Evaluate the box proposals in predictions. + Fill self._results with the metrics for "box_proposals" task. + """ + if self._output_dir: + # Saving generated box proposals to file. + # Predicted box_proposals are in XYXY_ABS mode. + bbox_mode = BoxMode.XYXY_ABS.value + ids, boxes, objectness_logits = [], [], [] + for prediction in predictions: + ids.append(prediction["image_id"]) + boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) + objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) + + proposal_data = { + "boxes": boxes, + "objectness_logits": objectness_logits, + "ids": ids, + "bbox_mode": bbox_mode, + } + with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: + pickle.dump(proposal_data, f) + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info("Evaluating bbox proposals ...") + res = {} + areas = {"all": "", "small": "s", "medium": "m", "large": "l"} + for limit in [100, 1000]: + for area, suffix in areas.items(): + stats = _evaluate_box_proposals(predictions, self._lvis_api, area=area, limit=limit) + key = "AR{}@{:d}".format(suffix, limit) + res[key] = float(stats["ar"].item() * 100) + self._logger.info("Proposal metrics: \n" + create_small_table(res)) + self._results["box_proposals"] = res + + +# inspired from Detectron: +# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa +def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None): + """ + Evaluate detection proposal recall metrics. This function is a much + faster alternative to the official LVIS API recall evaluation code. However, + it produces slightly different results. + """ + # Record max overlap value for each gt box + # Return vector of overlap values + areas = { + "all": 0, + "small": 1, + "medium": 2, + "large": 3, + "96-128": 4, + "128-256": 5, + "256-512": 6, + "512-inf": 7, + } + area_ranges = [ + [0 ** 2, 1e5 ** 2], # all + [0 ** 2, 32 ** 2], # small + [32 ** 2, 96 ** 2], # medium + [96 ** 2, 1e5 ** 2], # large + [96 ** 2, 128 ** 2], # 96-128 + [128 ** 2, 256 ** 2], # 128-256 + [256 ** 2, 512 ** 2], # 256-512 + [512 ** 2, 1e5 ** 2], + ] # 512-inf + assert area in areas, "Unknown area range: {}".format(area) + area_range = area_ranges[areas[area]] + gt_overlaps = [] + num_pos = 0 + + for prediction_dict in dataset_predictions: + predictions = prediction_dict["proposals"] + + # sort predictions in descending order + # TODO maybe remove this and make it explicit in the documentation + inds = predictions.objectness_logits.sort(descending=True)[1] + predictions = predictions[inds] + + ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]]) + anno = lvis_api.load_anns(ann_ids) + gt_boxes = [ + BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno + ] + gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes + gt_boxes = Boxes(gt_boxes) + gt_areas = torch.as_tensor([obj["area"] for obj in anno]) + + if len(gt_boxes) == 0 or len(predictions) == 0: + continue + + valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) + gt_boxes = gt_boxes[valid_gt_inds] + + num_pos += len(gt_boxes) + + if len(gt_boxes) == 0: + continue + + if limit is not None and len(predictions) > limit: + predictions = predictions[:limit] + + overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) + + _gt_overlaps = torch.zeros(len(gt_boxes)) + for j in range(min(len(predictions), len(gt_boxes))): + # find which proposal box maximally covers each gt box + # and get the iou amount of coverage for each gt box + max_overlaps, argmax_overlaps = overlaps.max(dim=0) + + # find which gt box is 'best' covered (i.e. 'best' = most iou) + gt_ovr, gt_ind = max_overlaps.max(dim=0) + assert gt_ovr >= 0 + # find the proposal box that covers the best covered gt box + box_ind = argmax_overlaps[gt_ind] + # record the iou coverage of this gt box + _gt_overlaps[j] = overlaps[box_ind, gt_ind] + assert _gt_overlaps[j] == gt_ovr + # mark the proposal box and the gt box as used + overlaps[box_ind, :] = -1 + overlaps[:, gt_ind] = -1 + + # append recorded iou coverage level + gt_overlaps.append(_gt_overlaps) + gt_overlaps = ( + torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32) + ) + gt_overlaps, _ = torch.sort(gt_overlaps) + + if thresholds is None: + step = 0.05 + thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) + recalls = torch.zeros_like(thresholds) + # compute recall for each iou threshold + for i, t in enumerate(thresholds): + recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) + # ar = 2 * np.trapz(recalls, thresholds) + ar = recalls.mean() + return { + "ar": ar, + "recalls": recalls, + "thresholds": thresholds, + "gt_overlaps": gt_overlaps, + "num_pos": num_pos, + } + + +def _evaluate_predictions_on_lvis(lvis_gt, lvis_results, iou_type, class_names=None): + """ + Args: + iou_type (str): + kpt_oks_sigmas (list[float]): + class_names (None or list[str]): if provided, will use it to predict + per-category AP. + + Returns: + a dict of {metric name: score} + """ + metrics = { + "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], + "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], + }[iou_type] + + logger = logging.getLogger(__name__) + + if len(lvis_results) == 0: # TODO: check if needed + logger.warn("No predictions from the model!") + return {metric: float("nan") for metric in metrics} + + if iou_type == "segm": + lvis_results = copy.deepcopy(lvis_results) + # When evaluating mask AP, if the results contain bbox, LVIS API will + # use the box area as the area of the instance, instead of the mask area. + # This leads to a different definition of small/medium/large. + # We remove the bbox field to let mask AP use mask area. + for c in lvis_results: + c.pop("bbox", None) + + from lvis import LVISEval, LVISResults + + lvis_results = LVISResults(lvis_gt, lvis_results) + lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type) + lvis_eval.run() + lvis_eval.print_results() + + # Pull the standard metrics from the LVIS results + results = lvis_eval.get_results() + results = {metric: float(results[metric] * 100) for metric in metrics} + logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) + return results diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/panoptic_evaluation.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/panoptic_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..41e04a2644f6044e5e048962eb9ba6b52859fc64 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/panoptic_evaluation.py @@ -0,0 +1,180 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import contextlib +import io +import itertools +import json +import logging +import os +import tempfile +from collections import OrderedDict +from fvcore.common.file_io import PathManager +from PIL import Image +from tabulate import tabulate + +from detectron2.data import MetadataCatalog +from detectron2.utils import comm + +from .evaluator import DatasetEvaluator + +logger = logging.getLogger(__name__) + + +class COCOPanopticEvaluator(DatasetEvaluator): + """ + Evaluate Panoptic Quality metrics on COCO using PanopticAPI. + It saves panoptic segmentation prediction in `output_dir` + + It contains a synchronize call and has to be called from all workers. + """ + + def __init__(self, dataset_name, output_dir): + """ + Args: + dataset_name (str): name of the dataset + output_dir (str): output directory to save results for evaluation + """ + self._metadata = MetadataCatalog.get(dataset_name) + self._thing_contiguous_id_to_dataset_id = { + v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() + } + self._stuff_contiguous_id_to_dataset_id = { + v: k for k, v in self._metadata.stuff_dataset_id_to_contiguous_id.items() + } + + self._predictions_json = os.path.join(output_dir, "predictions.json") + + def reset(self): + self._predictions = [] + + def _convert_category_id(self, segment_info): + isthing = segment_info.pop("isthing", None) + if isthing is None: + # the model produces panoptic category id directly. No more conversion needed + return segment_info + if isthing is True: + segment_info["category_id"] = self._thing_contiguous_id_to_dataset_id[ + segment_info["category_id"] + ] + else: + segment_info["category_id"] = self._stuff_contiguous_id_to_dataset_id[ + segment_info["category_id"] + ] + return segment_info + + def process(self, inputs, outputs): + from panopticapi.utils import id2rgb + + for input, output in zip(inputs, outputs): + panoptic_img, segments_info = output["panoptic_seg"] + panoptic_img = panoptic_img.cpu().numpy() + + file_name = os.path.basename(input["file_name"]) + file_name_png = os.path.splitext(file_name)[0] + ".png" + with io.BytesIO() as out: + Image.fromarray(id2rgb(panoptic_img)).save(out, format="PNG") + segments_info = [self._convert_category_id(x) for x in segments_info] + self._predictions.append( + { + "image_id": input["image_id"], + "file_name": file_name_png, + "png_string": out.getvalue(), + "segments_info": segments_info, + } + ) + + def evaluate(self): + comm.synchronize() + + self._predictions = comm.gather(self._predictions) + self._predictions = list(itertools.chain(*self._predictions)) + if not comm.is_main_process(): + return + + # PanopticApi requires local files + gt_json = PathManager.get_local_path(self._metadata.panoptic_json) + gt_folder = PathManager.get_local_path(self._metadata.panoptic_root) + + with tempfile.TemporaryDirectory(prefix="panoptic_eval") as pred_dir: + logger.info("Writing all panoptic predictions to {} ...".format(pred_dir)) + for p in self._predictions: + with open(os.path.join(pred_dir, p["file_name"]), "wb") as f: + f.write(p.pop("png_string")) + + with open(gt_json, "r") as f: + json_data = json.load(f) + json_data["annotations"] = self._predictions + with PathManager.open(self._predictions_json, "w") as f: + f.write(json.dumps(json_data)) + + from panopticapi.evaluation import pq_compute + + with contextlib.redirect_stdout(io.StringIO()): + pq_res = pq_compute( + gt_json, + PathManager.get_local_path(self._predictions_json), + gt_folder=gt_folder, + pred_folder=pred_dir, + ) + + res = {} + res["PQ"] = 100 * pq_res["All"]["pq"] + res["SQ"] = 100 * pq_res["All"]["sq"] + res["RQ"] = 100 * pq_res["All"]["rq"] + res["PQ_th"] = 100 * pq_res["Things"]["pq"] + res["SQ_th"] = 100 * pq_res["Things"]["sq"] + res["RQ_th"] = 100 * pq_res["Things"]["rq"] + res["PQ_st"] = 100 * pq_res["Stuff"]["pq"] + res["SQ_st"] = 100 * pq_res["Stuff"]["sq"] + res["RQ_st"] = 100 * pq_res["Stuff"]["rq"] + + results = OrderedDict({"panoptic_seg": res}) + _print_panoptic_results(pq_res) + + return results + + +def _print_panoptic_results(pq_res): + headers = ["", "PQ", "SQ", "RQ", "#categories"] + data = [] + for name in ["All", "Things", "Stuff"]: + row = [name] + [pq_res[name][k] * 100 for k in ["pq", "sq", "rq"]] + [pq_res[name]["n"]] + data.append(row) + table = tabulate( + data, headers=headers, tablefmt="pipe", floatfmt=".3f", stralign="center", numalign="center" + ) + logger.info("Panoptic Evaluation Results:\n" + table) + + +if __name__ == "__main__": + from detectron2.utils.logger import setup_logger + + logger = setup_logger() + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--gt-json") + parser.add_argument("--gt-dir") + parser.add_argument("--pred-json") + parser.add_argument("--pred-dir") + args = parser.parse_args() + + from panopticapi.evaluation import pq_compute + + with contextlib.redirect_stdout(io.StringIO()): + pq_res = pq_compute( + args.gt_json, args.pred_json, gt_folder=args.gt_dir, pred_folder=args.pred_dir + ) + _print_panoptic_results(pq_res) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/pascal_voc_evaluation.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/pascal_voc_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..d63dbf66ab70fe22918bed90a2f3ce5db8f74905 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/pascal_voc_evaluation.py @@ -0,0 +1,308 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import numpy as np +import os +import tempfile +import xml.etree.ElementTree as ET +from collections import OrderedDict, defaultdict +from functools import lru_cache +import torch +from fvcore.common.file_io import PathManager + +from detectron2.data import MetadataCatalog +from detectron2.utils import comm + +from .evaluator import DatasetEvaluator + + +class PascalVOCDetectionEvaluator(DatasetEvaluator): + """ + Evaluate Pascal VOC style AP for Pascal VOC dataset. + It contains a synchronization, therefore has to be called from all ranks. + + Note that the concept of AP can be implemented in different ways and may not + produce identical results. This class mimics the implementation of the official + Pascal VOC Matlab API, and should produce similar but not identical results to the + official API. + """ + + def __init__(self, dataset_name): + """ + Args: + dataset_name (str): name of the dataset, e.g., "voc_2007_test" + """ + self._dataset_name = dataset_name + meta = MetadataCatalog.get(dataset_name) + self._anno_file_template = os.path.join(meta.dirname, "Annotations", "{}.xml") + self._image_set_path = os.path.join(meta.dirname, "ImageSets", "Main", meta.split + ".txt") + self._class_names = meta.thing_classes + assert meta.year in [2007, 2012], meta.year + self._is_2007 = meta.year == 2007 + self._cpu_device = torch.device("cpu") + self._logger = logging.getLogger(__name__) + + def reset(self): + self._predictions = defaultdict(list) # class name -> list of prediction strings + + def process(self, inputs, outputs): + for input, output in zip(inputs, outputs): + image_id = input["image_id"] + instances = output["instances"].to(self._cpu_device) + boxes = instances.pred_boxes.tensor.numpy() + scores = instances.scores.tolist() + classes = instances.pred_classes.tolist() + for box, score, cls in zip(boxes, scores, classes): + xmin, ymin, xmax, ymax = box + # The inverse of data loading logic in `datasets/pascal_voc.py` + xmin += 1 + ymin += 1 + self._predictions[cls].append( + f"{image_id} {score:.3f} {xmin:.1f} {ymin:.1f} {xmax:.1f} {ymax:.1f}" + ) + + def evaluate(self): + """ + Returns: + dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". + """ + all_predictions = comm.gather(self._predictions, dst=0) + if not comm.is_main_process(): + return + predictions = defaultdict(list) + for predictions_per_rank in all_predictions: + for clsid, lines in predictions_per_rank.items(): + predictions[clsid].extend(lines) + del all_predictions + + self._logger.info( + "Evaluating {} using {} metric. " + "Note that results do not use the official Matlab API.".format( + self._dataset_name, 2007 if self._is_2007 else 2012 + ) + ) + + with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: + res_file_template = os.path.join(dirname, "{}.txt") + + aps = defaultdict(list) # iou -> ap per class + for cls_id, cls_name in enumerate(self._class_names): + lines = predictions.get(cls_id, [""]) + + with open(res_file_template.format(cls_name), "w") as f: + f.write("\n".join(lines)) + + for thresh in range(50, 100, 5): + rec, prec, ap = voc_eval( + res_file_template, + self._anno_file_template, + self._image_set_path, + cls_name, + ovthresh=thresh / 100.0, + use_07_metric=self._is_2007, + ) + aps[thresh].append(ap * 100) + + ret = OrderedDict() + mAP = {iou: np.mean(x) for iou, x in aps.items()} + ret["bbox"] = {"AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75]} + return ret + + +############################################################################## +# +# Below code is modified from +# https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py +# -------------------------------------------------------- +# Fast/er R-CNN +# Licensed under The MIT License [see LICENSE for details] +# Written by Bharath Hariharan +# -------------------------------------------------------- + +"""Python implementation of the PASCAL VOC devkit's AP evaluation code.""" + + +@lru_cache(maxsize=None) +def parse_rec(filename): + """Parse a PASCAL VOC xml file.""" + with PathManager.open(filename) as f: + tree = ET.parse(f) + objects = [] + for obj in tree.findall("object"): + obj_struct = {} + obj_struct["name"] = obj.find("name").text + obj_struct["pose"] = obj.find("pose").text + obj_struct["truncated"] = int(obj.find("truncated").text) + obj_struct["difficult"] = int(obj.find("difficult").text) + bbox = obj.find("bndbox") + obj_struct["bbox"] = [ + int(bbox.find("xmin").text), + int(bbox.find("ymin").text), + int(bbox.find("xmax").text), + int(bbox.find("ymax").text), + ] + objects.append(obj_struct) + + return objects + + +def voc_ap(rec, prec, use_07_metric=False): + """Compute VOC AP given precision and recall. If use_07_metric is true, uses + the VOC 07 11-point method (default:False). + """ + if use_07_metric: + # 11 point metric + ap = 0.0 + for t in np.arange(0.0, 1.1, 0.1): + if np.sum(rec >= t) == 0: + p = 0 + else: + p = np.max(prec[rec >= t]) + ap = ap + p / 11.0 + else: + # correct AP calculation + # first append sentinel values at the end + mrec = np.concatenate(([0.0], rec, [1.0])) + mpre = np.concatenate(([0.0], prec, [0.0])) + + # compute the precision envelope + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (\Delta recall) * prec + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + + +def voc_eval(detpath, annopath, imagesetfile, classname, ovthresh=0.5, use_07_metric=False): + """rec, prec, ap = voc_eval(detpath, + annopath, + imagesetfile, + classname, + [ovthresh], + [use_07_metric]) + + Top level function that does the PASCAL VOC evaluation. + + detpath: Path to detections + detpath.format(classname) should produce the detection results file. + annopath: Path to annotations + annopath.format(imagename) should be the xml annotations file. + imagesetfile: Text file containing the list of images, one image per line. + classname: Category name (duh) + [ovthresh]: Overlap threshold (default = 0.5) + [use_07_metric]: Whether to use VOC07's 11 point AP computation + (default False) + """ + # assumes detections are in detpath.format(classname) + # assumes annotations are in annopath.format(imagename) + # assumes imagesetfile is a text file with each line an image name + + # first load gt + # read list of images + with PathManager.open(imagesetfile, "r") as f: + lines = f.readlines() + imagenames = [x.strip() for x in lines] + + # load annots + recs = {} + for imagename in imagenames: + recs[imagename] = parse_rec(annopath.format(imagename)) + + # extract gt objects for this class + class_recs = {} + npos = 0 + for imagename in imagenames: + R = [obj for obj in recs[imagename] if obj["name"] == classname] + bbox = np.array([x["bbox"] for x in R]) + difficult = np.array([x["difficult"] for x in R]).astype(np.bool) + # difficult = np.array([False for x in R]).astype(np.bool) # treat all "difficult" as GT + det = [False] * len(R) + npos = npos + sum(~difficult) + class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det} + + # read dets + detfile = detpath.format(classname) + with open(detfile, "r") as f: + lines = f.readlines() + + splitlines = [x.strip().split(" ") for x in lines] + image_ids = [x[0] for x in splitlines] + confidence = np.array([float(x[1]) for x in splitlines]) + BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4) + + # sort by confidence + sorted_ind = np.argsort(-confidence) + BB = BB[sorted_ind, :] + image_ids = [image_ids[x] for x in sorted_ind] + + # go down dets and mark TPs and FPs + nd = len(image_ids) + tp = np.zeros(nd) + fp = np.zeros(nd) + for d in range(nd): + R = class_recs[image_ids[d]] + bb = BB[d, :].astype(float) + ovmax = -np.inf + BBGT = R["bbox"].astype(float) + + if BBGT.size > 0: + # compute overlaps + # intersection + ixmin = np.maximum(BBGT[:, 0], bb[0]) + iymin = np.maximum(BBGT[:, 1], bb[1]) + ixmax = np.minimum(BBGT[:, 2], bb[2]) + iymax = np.minimum(BBGT[:, 3], bb[3]) + iw = np.maximum(ixmax - ixmin + 1.0, 0.0) + ih = np.maximum(iymax - iymin + 1.0, 0.0) + inters = iw * ih + + # union + uni = ( + (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0) + + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0) + - inters + ) + + overlaps = inters / uni + ovmax = np.max(overlaps) + jmax = np.argmax(overlaps) + + if ovmax > ovthresh: + if not R["difficult"][jmax]: + if not R["det"][jmax]: + tp[d] = 1.0 + R["det"][jmax] = 1 + else: + fp[d] = 1.0 + else: + fp[d] = 1.0 + + # compute precision recall + fp = np.cumsum(fp) + tp = np.cumsum(tp) + rec = tp / float(npos) + # avoid divide by zero in case the first detection matches a difficult + # ground truth + prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) + ap = voc_ap(rec, prec, use_07_metric) + + return rec, prec, ap diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/rotated_coco_evaluation.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/rotated_coco_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..63f53ab6a560cc37b84d7825694c5b108f8e902a --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/rotated_coco_evaluation.py @@ -0,0 +1,217 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +import json +import numpy as np +import os +import torch +from fvcore.common.file_io import PathManager +from pycocotools.cocoeval import COCOeval, maskUtils + +from detectron2.structures import BoxMode, RotatedBoxes, pairwise_iou_rotated + +from .coco_evaluation import COCOEvaluator + + +class RotatedCOCOeval(COCOeval): + @staticmethod + def is_rotated(box_list): + if type(box_list) == np.ndarray: + return box_list.shape[1] == 5 + elif type(box_list) == list: + if box_list == []: # cannot decide the box_dim + return False + return np.all( + np.array( + [ + (len(obj) == 5) and ((type(obj) == list) or (type(obj) == np.ndarray)) + for obj in box_list + ] + ) + ) + return False + + @staticmethod + def boxlist_to_tensor(boxlist, output_box_dim): + if type(boxlist) == np.ndarray: + box_tensor = torch.from_numpy(boxlist) + elif type(boxlist) == list: + if boxlist == []: + return torch.zeros((0, output_box_dim), dtype=torch.float32) + else: + box_tensor = torch.FloatTensor(boxlist) + else: + raise Exception("Unrecognized boxlist type") + + input_box_dim = box_tensor.shape[1] + if input_box_dim != output_box_dim: + if input_box_dim == 4 and output_box_dim == 5: + box_tensor = BoxMode.convert(box_tensor, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS) + else: + raise Exception( + "Unable to convert from {}-dim box to {}-dim box".format( + input_box_dim, output_box_dim + ) + ) + return box_tensor + + def compute_iou_dt_gt(self, dt, gt, is_crowd): + if self.is_rotated(dt) or self.is_rotated(gt): + # TODO: take is_crowd into consideration + assert all(c == 0 for c in is_crowd) + dt = RotatedBoxes(self.boxlist_to_tensor(dt, output_box_dim=5)) + gt = RotatedBoxes(self.boxlist_to_tensor(gt, output_box_dim=5)) + return pairwise_iou_rotated(dt, gt) + else: + # This is the same as the classical COCO evaluation + return maskUtils.iou(dt, gt, is_crowd) + + def computeIoU(self, imgId, catId): + p = self.params + if p.useCats: + gt = self._gts[imgId, catId] + dt = self._dts[imgId, catId] + else: + gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] + dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] + if len(gt) == 0 and len(dt) == 0: + return [] + inds = np.argsort([-d["score"] for d in dt], kind="mergesort") + dt = [dt[i] for i in inds] + if len(dt) > p.maxDets[-1]: + dt = dt[0 : p.maxDets[-1]] + + assert p.iouType == "bbox", "unsupported iouType for iou computation" + + g = [g["bbox"] for g in gt] + d = [d["bbox"] for d in dt] + + # compute iou between each dt and gt region + iscrowd = [int(o["iscrowd"]) for o in gt] + + # Note: this function is copied from cocoeval.py in cocoapi + # and the major difference is here. + ious = self.compute_iou_dt_gt(d, g, iscrowd) + return ious + + +class RotatedCOCOEvaluator(COCOEvaluator): + """ + Evaluate object proposal/instance detection outputs using COCO-like metrics and APIs, + with rotated boxes support. + Note: this uses IOU only and does not consider angle differences. + """ + + def process(self, inputs, outputs): + """ + Args: + inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). + It is a list of dict. Each dict corresponds to an image and + contains keys like "height", "width", "file_name", "image_id". + outputs: the outputs of a COCO model. It is a list of dicts with key + "instances" that contains :class:`Instances`. + """ + for input, output in zip(inputs, outputs): + prediction = {"image_id": input["image_id"]} + + if "instances" in output: + instances = output["instances"].to(self._cpu_device) + + prediction["instances"] = self.instances_to_json(instances, input["image_id"]) + if "proposals" in output: + prediction["proposals"] = output["proposals"].to(self._cpu_device) + self._predictions.append(prediction) + + def instances_to_json(self, instances, img_id): + num_instance = len(instances) + if num_instance == 0: + return [] + + boxes = instances.pred_boxes.tensor.numpy() + if boxes.shape[1] == 4: + boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) + boxes = boxes.tolist() + scores = instances.scores.tolist() + classes = instances.pred_classes.tolist() + + results = [] + for k in range(num_instance): + result = { + "image_id": img_id, + "category_id": classes[k], + "bbox": boxes[k], + "score": scores[k], + } + + results.append(result) + return results + + def _eval_predictions(self, tasks, predictions): + """ + Evaluate predictions on the given tasks. + Fill self._results with the metrics of the tasks. + """ + self._logger.info("Preparing results for COCO format ...") + coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) + + # unmap the category ids for COCO + if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): + reverse_id_mapping = { + v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() + } + for result in coco_results: + result["category_id"] = reverse_id_mapping[result["category_id"]] + + if self._output_dir: + file_path = os.path.join(self._output_dir, "coco_instances_results.json") + self._logger.info("Saving results to {}".format(file_path)) + with PathManager.open(file_path, "w") as f: + f.write(json.dumps(coco_results)) + f.flush() + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info("Evaluating predictions ...") + for task in sorted(tasks): + assert task == "bbox", "Task {} is not supported".format(task) + coco_eval = ( + self._evaluate_predictions_on_coco(self._coco_api, coco_results) + if len(coco_results) > 0 + else None # cocoapi does not handle empty results very well + ) + + res = self._derive_coco_results( + coco_eval, task, class_names=self._metadata.get("thing_classes") + ) + self._results[task] = res + + def _evaluate_predictions_on_coco(self, coco_gt, coco_results): + """ + Evaluate the coco results using COCOEval API. + """ + assert len(coco_results) > 0 + + coco_dt = coco_gt.loadRes(coco_results) + + # Only bbox is supported for now + coco_eval = RotatedCOCOeval(coco_gt, coco_dt, iouType="bbox") + + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + return coco_eval diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/sem_seg_evaluation.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/sem_seg_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..2b7f739d6fd4fc72a58bf0b5f78bb05f0ea0e23f --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/sem_seg_evaluation.py @@ -0,0 +1,181 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +import json +import logging +import numpy as np +import os +from collections import OrderedDict +import PIL.Image as Image +import pycocotools.mask as mask_util +import torch +from fvcore.common.file_io import PathManager + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.utils.comm import all_gather, is_main_process, synchronize + +from .evaluator import DatasetEvaluator + + +class SemSegEvaluator(DatasetEvaluator): + """ + Evaluate semantic segmentation metrics. + """ + + def __init__(self, dataset_name, distributed, num_classes, ignore_label=255, output_dir=None): + """ + Args: + dataset_name (str): name of the dataset to be evaluated. + distributed (True): if True, will collect results from all ranks for evaluation. + Otherwise, will evaluate the results in the current process. + num_classes (int): number of classes + ignore_label (int): value in semantic segmentation ground truth. Predictions for the + corresponding pixels should be ignored. + output_dir (str): an output directory to dump results. + """ + self._dataset_name = dataset_name + self._distributed = distributed + self._output_dir = output_dir + self._num_classes = num_classes + self._ignore_label = ignore_label + self._N = num_classes + 1 + + self._cpu_device = torch.device("cpu") + self._logger = logging.getLogger(__name__) + + self.input_file_to_gt_file = { + dataset_record["file_name"]: dataset_record["sem_seg_file_name"] + for dataset_record in DatasetCatalog.get(dataset_name) + } + + meta = MetadataCatalog.get(dataset_name) + # Dict that maps contiguous training ids to COCO category ids + try: + c2d = meta.stuff_dataset_id_to_contiguous_id + self._contiguous_id_to_dataset_id = {v: k for k, v in c2d.items()} + except AttributeError: + self._contiguous_id_to_dataset_id = None + self._class_names = meta.stuff_classes + + def reset(self): + self._conf_matrix = np.zeros((self._N, self._N), dtype=np.int64) + self._predictions = [] + + def process(self, inputs, outputs): + """ + Args: + inputs: the inputs to a model. + It is a list of dicts. Each dict corresponds to an image and + contains keys like "height", "width", "file_name". + outputs: the outputs of a model. It is either list of semantic segmentation predictions + (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic + segmentation prediction in the same format. + """ + for input, output in zip(inputs, outputs): + output = output["sem_seg"].argmax(dim=0).to(self._cpu_device) + pred = np.array(output, dtype=np.int) + with PathManager.open(self.input_file_to_gt_file[input["file_name"]], "rb") as f: + gt = np.array(Image.open(f), dtype=np.int) + + gt[gt == self._ignore_label] = self._num_classes + + self._conf_matrix += np.bincount( + self._N * pred.reshape(-1) + gt.reshape(-1), minlength=self._N ** 2 + ).reshape(self._N, self._N) + + self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"])) + + def evaluate(self): + """ + Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval): + + * Mean intersection-over-union averaged across classes (mIoU) + * Frequency Weighted IoU (fwIoU) + * Mean pixel accuracy averaged across classes (mACC) + * Pixel Accuracy (pACC) + """ + if self._distributed: + synchronize() + conf_matrix_list = all_gather(self._conf_matrix) + self._predictions = all_gather(self._predictions) + self._predictions = list(itertools.chain(*self._predictions)) + if not is_main_process(): + return + + self._conf_matrix = np.zeros_like(self._conf_matrix) + for conf_matrix in conf_matrix_list: + self._conf_matrix += conf_matrix + + if self._output_dir: + PathManager.mkdirs(self._output_dir) + file_path = os.path.join(self._output_dir, "sem_seg_predictions.json") + with PathManager.open(file_path, "w") as f: + f.write(json.dumps(self._predictions)) + + acc = np.full(self._num_classes, np.nan, dtype=np.float) + iou = np.full(self._num_classes, np.nan, dtype=np.float) + tp = self._conf_matrix.diagonal()[:-1].astype(np.float) + pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float) + class_weights = pos_gt / np.sum(pos_gt) + pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float) + acc_valid = pos_gt > 0 + acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid] + iou_valid = (pos_gt + pos_pred) > 0 + union = pos_gt + pos_pred - tp + iou[acc_valid] = tp[acc_valid] / union[acc_valid] + macc = np.sum(acc[acc_valid]) / np.sum(acc_valid) + miou = np.sum(iou[acc_valid]) / np.sum(iou_valid) + fiou = np.sum(iou[acc_valid] * class_weights[acc_valid]) + pacc = np.sum(tp) / np.sum(pos_gt) + + res = {} + res["mIoU"] = 100 * miou + res["fwIoU"] = 100 * fiou + for i, name in enumerate(self._class_names): + res["IoU-{}".format(name)] = 100 * iou[i] + res["mACC"] = 100 * macc + res["pACC"] = 100 * pacc + for i, name in enumerate(self._class_names): + res["ACC-{}".format(name)] = 100 * acc[i] + + if self._output_dir: + file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth") + with PathManager.open(file_path, "wb") as f: + torch.save(res, f) + results = OrderedDict({"sem_seg": res}) + self._logger.info(results) + return results + + def encode_json_sem_seg(self, sem_seg, input_file_name): + """ + Convert semantic segmentation to COCO stuff format with segments encoded as RLEs. + See http://cocodataset.org/#format-results + """ + json_list = [] + for label in np.unique(sem_seg): + if self._contiguous_id_to_dataset_id is not None: + assert ( + label in self._contiguous_id_to_dataset_id + ), "Label {} is not in the metadata info for {}".format(label, self._dataset_name) + dataset_id = self._contiguous_id_to_dataset_id[label] + else: + dataset_id = int(label) + mask = (sem_seg == label).astype(np.uint8) + mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0] + mask_rle["counts"] = mask_rle["counts"].decode("utf-8") + json_list.append( + {"file_name": input_file_name, "category_id": dataset_id, "segmentation": mask_rle} + ) + return json_list diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/testing.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/testing.py new file mode 100644 index 0000000000000000000000000000000000000000..a5f00715315221bf164260688c36179a490b9fe6 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/evaluation/testing.py @@ -0,0 +1,91 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import numpy as np +import pprint +import sys +from collections import OrderedDict +from collections.abc import Mapping + + +def print_csv_format(results): + """ + Print main metrics in a format similar to Detectron, + so that they are easy to copypaste into a spreadsheet. + + Args: + results (OrderedDict[dict]): task_name -> {metric -> score} + """ + assert isinstance(results, OrderedDict), results # unordered results cannot be properly printed + logger = logging.getLogger(__name__) + for task, res in results.items(): + # Don't print "AP-category" metrics since they are usually not tracked. + important_res = [(k, v) for k, v in res.items() if "-" not in k] + logger.info("copypaste: Task: {}".format(task)) + logger.info("copypaste: " + ",".join([k[0] for k in important_res])) + logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res])) + + +def verify_results(cfg, results): + """ + Args: + results (OrderedDict[dict]): task_name -> {metric -> score} + + Returns: + bool: whether the verification succeeds or not + """ + expected_results = cfg.TEST.EXPECTED_RESULTS + if not len(expected_results): + return True + + ok = True + for task, metric, expected, tolerance in expected_results: + actual = results[task][metric] + if not np.isfinite(actual): + ok = False + diff = abs(actual - expected) + if diff > tolerance: + ok = False + + logger = logging.getLogger(__name__) + if not ok: + logger.error("Result verification failed!") + logger.error("Expected Results: " + str(expected_results)) + logger.error("Actual Results: " + pprint.pformat(results)) + + sys.exit(1) + else: + logger.info("Results verification passed.") + return ok + + +def flatten_results_dict(results): + """ + Expand a hierarchical dict of scalars into a flat dict of scalars. + If results[k1][k2][k3] = v, the returned dict will have the entry + {"k1/k2/k3": v}. + + Args: + results (dict): + """ + r = {} + for k, v in results.items(): + if isinstance(v, Mapping): + v = flatten_results_dict(v) + for kk, vv in v.items(): + r[k + "/" + kk] = vv + else: + r[k] = v + return r diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/README.md b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9fcd33513fb81ef3aeb4d3c8d9732324dffa2646 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/README.md @@ -0,0 +1,13 @@ + +This directory contains code to prepare a detectron2 model for deployment. +Currently it supports exporting a detectron2 model to Caffe2 format through ONNX. + +Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage. + + +### Acknowledgements + +Thanks to Mobile Vision team at Facebook for developing the Caffe2 conversion tools. + +Thanks to Computing Platform Department - PAI team at Alibaba Group (@bddpqq, @chenbohua3) who +help export Detectron2 models to TorchScript. diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..48a1ebc1af741d4c4577e98ad0d71807e7f57e07 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -*- coding: utf-8 -*- + + +from .api import * + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/api.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/api.py new file mode 100644 index 0000000000000000000000000000000000000000..dfc544d8e11b435fe73f163db62213835d8a3a42 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/api.py @@ -0,0 +1,299 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import logging +import os +import torch +from caffe2.proto import caffe2_pb2 +from torch import nn + +from detectron2.config import CfgNode as CN + +from .caffe2_inference import ProtobufDetectionModel +from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format +from .shared import get_pb_arg_vali, get_pb_arg_vals, save_graph + +__all__ = [ + "add_export_config", + "export_caffe2_model", + "Caffe2Model", + "export_onnx_model", + "Caffe2Tracer", +] + + +def add_export_config(cfg): + """ + Args: + cfg (CfgNode): a detectron2 config + + Returns: + CfgNode: an updated config with new options that will be used + by :class:`Caffe2Tracer`. + """ + is_frozen = cfg.is_frozen() + cfg.defrost() + cfg.EXPORT_CAFFE2 = CN() + cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT = False + if is_frozen: + cfg.freeze() + return cfg + + +class Caffe2Tracer: + """ + Make a detectron2 model traceable with caffe2 style. + + An original detectron2 model may not be traceable, or + cannot be deployed directly after being traced, due to some reasons: + + 1. control flow in some ops + 2. custom ops + 3. complicated pre/post processing + + This class provides a traceable version of a detectron2 model by: + + 1. Rewrite parts of the model using ops in caffe2. Note that some ops do + not have GPU implementation. + 2. Define the inputs "after pre-processing" as inputs to the model + 3. Remove post-processing and produce raw layer outputs + + More specifically about inputs: all builtin models take two input tensors. + + 1. NCHW float "data" which is an image (usually in [0, 255]) + 2. Nx3 float "im_info", each row of which is (height, width, 1.0) + + After making a traceable model, the class provide methods to export such a + model to different deployment formats. + + The class currently only supports models using builtin meta architectures. + """ + + def __init__(self, cfg, model, inputs): + """ + Args: + cfg (CfgNode): a detectron2 config, with extra export-related options + added by :func:`add_export_config`. + model (nn.Module): a model built by + :func:`detectron2.modeling.build_model`. Weights have to be already + loaded to this model. + inputs: sample inputs that the given model takes for inference. + Will be used to trace the model. Random input with no detected objects + will not work if the model has data-dependent control flow (e.g., R-CNN). + """ + assert isinstance(cfg, CN), cfg + assert isinstance(model, torch.nn.Module), type(model) + if "EXPORT_CAFFE2" not in cfg: + cfg = add_export_config(cfg) # will just the defaults + + self.cfg = cfg + self.model = model + self.inputs = inputs + + def _get_traceable(self): + # TODO how to make it extensible to support custom models + C2MetaArch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[self.cfg.MODEL.META_ARCHITECTURE] + traceable_model = C2MetaArch(self.cfg, copy.deepcopy(self.model)) + traceable_inputs = traceable_model.get_caffe2_inputs(self.inputs) + return traceable_model, traceable_inputs + + def export_caffe2(self): + """ + Export the model to Caffe2's protobuf format. + The returned object can be saved with ``.save_protobuf()`` method. + The result can be loaded and executed using Caffe2 runtime. + + Returns: + Caffe2Model + """ + from .caffe2_export import export_caffe2_detection_model + + model, inputs = self._get_traceable() + predict_net, init_net = export_caffe2_detection_model(model, inputs) + return Caffe2Model(predict_net, init_net) + + def export_onnx(self): + """ + Export the model to ONNX format. + Note that the exported model contains custom ops only available in caffe2, therefore it + cannot be directly executed by other runtime. Post-processing or transformation passes + may be applied on the model to accommodate different runtimes. + + Returns: + onnx.ModelProto: an onnx model. + """ + from .caffe2_export import export_onnx_model as export_onnx_model_impl + + model, inputs = self._get_traceable() + return export_onnx_model_impl(model, (inputs,)) + + def export_torchscript(self): + """ + Export the model to a ``torch.jit.TracedModule`` by tracing. + The returned object can be saved to a file by ``.save()``. + + Returns: + torch.jit.TracedModule: a torch TracedModule + """ + model, inputs = self._get_traceable() + logger = logging.getLogger(__name__) + logger.info("Tracing the model with torch.jit.trace ...") + with torch.no_grad(): + return torch.jit.trace(model, (inputs,), optimize=True) + + +def export_caffe2_model(cfg, model, inputs): + """ + Export a detectron2 model to caffe2 format. + + Args: + cfg (CfgNode): a detectron2 config, with extra export-related options + added by :func:`add_export_config`. + model (nn.Module): a model built by + :func:`detectron2.modeling.build_model`. + It will be modified by this function. + inputs: sample inputs that the given model takes for inference. + Will be used to trace the model. + + Returns: + Caffe2Model + """ + return Caffe2Tracer(cfg, model, inputs).export_caffe2() + + +def export_onnx_model(cfg, model, inputs): + """ + Export a detectron2 model to ONNX format. + Note that the exported model contains custom ops only available in caffe2, therefore it + cannot be directly executed by other runtime. Post-processing or transformation passes + may be applied on the model to accommodate different runtimes. + + Args: + cfg (CfgNode): a detectron2 config, with extra export-related options + added by :func:`add_export_config`. + model (nn.Module): a model built by + :func:`detectron2.modeling.build_model`. + It will be modified by this function. + inputs: sample inputs that the given model takes for inference. + Will be used to trace the model. + Returns: + onnx.ModelProto: an onnx model. + """ + return Caffe2Tracer(cfg, model, inputs).export_onnx() + + +class Caffe2Model(nn.Module): + """ + A wrapper around the traced model in caffe2's pb format. + """ + + def __init__(self, predict_net, init_net): + super().__init__() + self.eval() # always in eval mode + self._predict_net = predict_net + self._init_net = init_net + self._predictor = None + + @property + def predict_net(self): + """ + Returns: + core.Net: the underlying caffe2 predict net + """ + return self._predict_net + + @property + def init_net(self): + """ + Returns: + core.Net: the underlying caffe2 init net + """ + return self._init_net + + __init__.__HIDE_SPHINX_DOC__ = True + + def save_protobuf(self, output_dir): + """ + Save the model as caffe2's protobuf format. + + Args: + output_dir (str): the output directory to save protobuf files. + """ + logger = logging.getLogger(__name__) + logger.info("Saving model to {} ...".format(output_dir)) + os.makedirs(output_dir, exist_ok=True) + + with open(os.path.join(output_dir, "model.pb"), "wb") as f: + f.write(self._predict_net.SerializeToString()) + with open(os.path.join(output_dir, "model.pbtxt"), "w") as f: + f.write(str(self._predict_net)) + with open(os.path.join(output_dir, "model_init.pb"), "wb") as f: + f.write(self._init_net.SerializeToString()) + + def save_graph(self, output_file, inputs=None): + """ + Save the graph as SVG format. + + Args: + output_file (str): a SVG file + inputs: optional inputs given to the model. + If given, the inputs will be used to run the graph to record + shape of every tensor. The shape information will be + saved together with the graph. + """ + from .caffe2_export import run_and_save_graph + + if inputs is None: + save_graph(self._predict_net, output_file, op_only=False) + else: + size_divisibility = get_pb_arg_vali(self._predict_net, "size_divisibility", 0) + device = get_pb_arg_vals(self._predict_net, "device", b"cpu").decode("ascii") + inputs = convert_batched_inputs_to_c2_format(inputs, size_divisibility, device) + inputs = [x.cpu().numpy() for x in inputs] + run_and_save_graph(self._predict_net, self._init_net, inputs, output_file) + + @staticmethod + def load_protobuf(dir): + """ + Args: + dir (str): a directory used to save Caffe2Model with + :meth:`save_protobuf`. + The files "model.pb" and "model_init.pb" are needed. + + Returns: + Caffe2Model: the caffe2 model loaded from this directory. + """ + predict_net = caffe2_pb2.NetDef() + with open(os.path.join(dir, "model.pb"), "rb") as f: + predict_net.ParseFromString(f.read()) + + init_net = caffe2_pb2.NetDef() + with open(os.path.join(dir, "model_init.pb"), "rb") as f: + init_net.ParseFromString(f.read()) + + return Caffe2Model(predict_net, init_net) + + def __call__(self, inputs): + """ + An interface that wraps around a caffe2 model and mimics detectron2's models' + input & output format. This is used to compare the outputs of caffe2 model + with its original torch model. + + Due to the extra conversion between torch/caffe2, + this method is not meant for benchmark. + """ + if self._predictor is None: + self._predictor = ProtobufDetectionModel(self._predict_net, self._init_net) + return self._predictor(inputs) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/c10.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/c10.py new file mode 100644 index 0000000000000000000000000000000000000000..7bb9ea92f90a6fcb629868fff52c9b4e8c8ce0b7 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/c10.py @@ -0,0 +1,516 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import torch +import torch.nn.functional as F + +from detectron2.layers import cat +from detectron2.layers.roi_align_rotated import ROIAlignRotated +from detectron2.modeling import poolers +from detectron2.modeling.proposal_generator import rpn +from detectron2.modeling.roi_heads.mask_head import mask_rcnn_inference +from detectron2.structures import Boxes, ImageList, Instances, Keypoints + +from .shared import alias, to_device + + +""" +This file contains caffe2-compatible implementation of several detectrno2 components. +""" + + +class Caffe2Boxes(Boxes): + """ + Representing a list of detectron2.structures.Boxes from minibatch, each box + is represented by a 5d vector (batch index + 4 coordinates), or a 6d vector + (batch index + 5 coordinates) for RotatedBoxes. + """ + + def __init__(self, tensor): + assert isinstance(tensor, torch.Tensor) + assert tensor.dim() == 2 and tensor.size(-1) in [4, 5, 6], tensor.size() + # TODO: make tensor immutable when dim is Nx5 for Boxes, + # and Nx6 for RotatedBoxes? + self.tensor = tensor + + +# TODO clean up this class, maybe just extend Instances +class InstancesList(object): + """ + Tensor representation of a list of Instances object for a batch of images. + + When dealing with a batch of images with Caffe2 ops, a list of bboxes + (instances) are usually represented by single Tensor with size + (sigma(Ni), 5) or (sigma(Ni), 4) plus a batch split Tensor. This class is + for providing common functions to convert between these two representations. + """ + + def __init__(self, im_info, indices, extra_fields=None): + # [N, 3] -> (H, W, Scale) + self.im_info = im_info + # [N,] -> indice of batch to which the instance belongs + self.indices = indices + # [N, ...] + self.batch_extra_fields = extra_fields or {} + + self.image_size = self.im_info + + def get_fields(self): + """ like `get_fields` in the Instances object, + but return each field in tensor representations """ + ret = {} + for k, v in self.batch_extra_fields.items(): + # if isinstance(v, torch.Tensor): + # tensor_rep = v + # elif isinstance(v, (Boxes, Keypoints)): + # tensor_rep = v.tensor + # else: + # raise ValueError("Can't find tensor representation for: {}".format()) + ret[k] = v + return ret + + def has(self, name): + return name in self.batch_extra_fields + + def set(self, name, value): + data_len = len(value) + if len(self.batch_extra_fields): + assert ( + len(self) == data_len + ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self)) + self.batch_extra_fields[name] = value + + def __setattr__(self, name, val): + if name in ["im_info", "indices", "batch_extra_fields", "image_size"]: + super().__setattr__(name, val) + else: + self.set(name, val) + + def __getattr__(self, name): + if name not in self.batch_extra_fields: + raise AttributeError("Cannot find field '{}' in the given Instances!".format(name)) + return self.batch_extra_fields[name] + + def __len__(self): + return len(self.indices) + + def flatten(self): + ret = [] + for _, v in self.batch_extra_fields.items(): + if isinstance(v, (Boxes, Keypoints)): + ret.append(v.tensor) + else: + ret.append(v) + return ret + + @staticmethod + def to_d2_instances_list(instances_list): + """ + Convert InstancesList to List[Instances]. The input `instances_list` can + also be a List[Instances], in this case this method is a non-op. + """ + if not isinstance(instances_list, InstancesList): + assert all(isinstance(x, Instances) for x in instances_list) + return instances_list + + ret = [] + for i, info in enumerate(instances_list.im_info): + instances = Instances(torch.Size([int(info[0].item()), int(info[1].item())])) + + ids = instances_list.indices == i + for k, v in instances_list.batch_extra_fields.items(): + if isinstance(v, torch.Tensor): + instances.set(k, v[ids]) + continue + elif isinstance(v, Boxes): + instances.set(k, v[ids, -4:]) + continue + + target_type, tensor_source = v + assert isinstance(tensor_source, torch.Tensor) + assert tensor_source.shape[0] == instances_list.indices.shape[0] + tensor_source = tensor_source[ids] + + if issubclass(target_type, Boxes): + instances.set(k, Boxes(tensor_source[:, -4:])) + elif issubclass(target_type, Keypoints): + instances.set(k, Keypoints(tensor_source)) + elif issubclass(target_type, torch.Tensor): + instances.set(k, tensor_source) + else: + raise ValueError("Can't handle targe type: {}".format(target_type)) + + ret.append(instances) + return ret + + +class Caffe2Compatible(object): + def _get_tensor_mode(self): + return self._tensor_mode + + def _set_tensor_mode(self, v): + self._tensor_mode = v + + tensor_mode = property(_get_tensor_mode, _set_tensor_mode) + """ + If true, the model expects C2-style tensor only inputs/outputs format. + """ + + +class Caffe2RPN(Caffe2Compatible, rpn.RPN): + def forward(self, images, features, gt_instances=None): + assert not self.training + + features = [features[f] for f in self.in_features] + objectness_logits_pred, anchor_deltas_pred = self.rpn_head(features) + + assert isinstance(images, ImageList) + if self.tensor_mode: + im_info = images.image_sizes + else: + im_info = torch.Tensor( + [[im_sz[0], im_sz[1], torch.Tensor([1.0])] for im_sz in images.image_sizes] + ).to(images.tensor.device) + assert isinstance(im_info, torch.Tensor) + + rpn_rois_list = [] + rpn_roi_probs_list = [] + for scores, bbox_deltas, cell_anchors_tensor, feat_stride in zip( + objectness_logits_pred, + anchor_deltas_pred, + iter(self.anchor_generator.cell_anchors), + self.anchor_generator.strides, + ): + scores = scores.detach() + bbox_deltas = bbox_deltas.detach() + + rpn_rois, rpn_roi_probs = torch.ops._caffe2.GenerateProposals( + scores, + bbox_deltas, + im_info, + cell_anchors_tensor, + spatial_scale=1.0 / feat_stride, + pre_nms_topN=self.pre_nms_topk[self.training], + post_nms_topN=self.post_nms_topk[self.training], + nms_thresh=self.nms_thresh, + min_size=self.min_box_size, + # correct_transform_coords=True, # deprecated argument + angle_bound_on=True, # Default + angle_bound_lo=-180, + angle_bound_hi=180, + clip_angle_thresh=1.0, # Default + legacy_plus_one=False, + ) + rpn_rois_list.append(rpn_rois) + rpn_roi_probs_list.append(rpn_roi_probs) + + # For FPN in D2, in RPN all proposals from different levels are concated + # together, ranked and picked by top post_nms_topk. Then in ROIPooler + # it calculates level_assignments and calls the RoIAlign from + # the corresponding level. + + if len(objectness_logits_pred) == 1: + rpn_rois = rpn_rois_list[0] + rpn_roi_probs = rpn_roi_probs_list[0] + else: + assert len(rpn_rois_list) == len(rpn_roi_probs_list) + rpn_post_nms_topN = self.post_nms_topk[self.training] + + device = rpn_rois_list[0].device + input_list = [to_device(x, "cpu") for x in (rpn_rois_list + rpn_roi_probs_list)] + + # TODO remove this after confirming rpn_max_level/rpn_min_level + # is not needed in CollectRpnProposals. + feature_strides = list(self.anchor_generator.strides) + rpn_min_level = int(math.log2(feature_strides[0])) + rpn_max_level = int(math.log2(feature_strides[-1])) + assert (rpn_max_level - rpn_min_level + 1) == len( + rpn_rois_list + ), "CollectRpnProposals requires continuous levels" + + rpn_rois = torch.ops._caffe2.CollectRpnProposals( + input_list, + # NOTE: in current implementation, rpn_max_level and rpn_min_level + # are not needed, only the subtraction of two matters and it + # can be infer from the number of inputs. Keep them now for + # consistency. + rpn_max_level=2 + len(rpn_rois_list) - 1, + rpn_min_level=2, + rpn_post_nms_topN=rpn_post_nms_topN, + ) + rpn_rois = to_device(rpn_rois, device) + rpn_roi_probs = [] + + proposals = self.c2_postprocess(im_info, rpn_rois, rpn_roi_probs, self.tensor_mode) + return proposals, {} + + @staticmethod + def c2_postprocess(im_info, rpn_rois, rpn_roi_probs, tensor_mode): + proposals = InstancesList( + im_info=im_info, + indices=rpn_rois[:, 0], + extra_fields={ + "proposal_boxes": Caffe2Boxes(rpn_rois), + "objectness_logits": (torch.Tensor, rpn_roi_probs), + }, + ) + if not tensor_mode: + proposals = InstancesList.to_d2_instances_list(proposals) + else: + proposals = [proposals] + return proposals + + +class Caffe2ROIPooler(Caffe2Compatible, poolers.ROIPooler): + @staticmethod + def c2_preprocess(box_lists): + assert all(isinstance(x, Boxes) for x in box_lists) + if all(isinstance(x, Caffe2Boxes) for x in box_lists): + # input is pure-tensor based + assert len(box_lists) == 1 + pooler_fmt_boxes = box_lists[0].tensor + else: + pooler_fmt_boxes = poolers.convert_boxes_to_pooler_format(box_lists) + return pooler_fmt_boxes + + def forward(self, x, box_lists): + assert not self.training + + pooler_fmt_boxes = self.c2_preprocess(box_lists) + num_level_assignments = len(self.level_poolers) + + if num_level_assignments == 1: + if isinstance(self.level_poolers[0], ROIAlignRotated): + c2_roi_align = torch.ops._caffe2.RoIAlignRotated + aligned = True + else: + c2_roi_align = torch.ops._caffe2.RoIAlign + aligned = self.level_poolers[0].aligned + + out = c2_roi_align( + x[0], + pooler_fmt_boxes, + order="NCHW", + spatial_scale=float(self.level_poolers[0].spatial_scale), + pooled_h=int(self.output_size[0]), + pooled_w=int(self.output_size[1]), + sampling_ratio=int(self.level_poolers[0].sampling_ratio), + aligned=aligned, + ) + return out + + device = pooler_fmt_boxes.device + assert ( + self.max_level - self.min_level + 1 == 4 + ), "Currently DistributeFpnProposals only support 4 levels" + fpn_outputs = torch.ops._caffe2.DistributeFpnProposals( + to_device(pooler_fmt_boxes, "cpu"), + roi_canonical_scale=self.canonical_box_size, + roi_canonical_level=self.canonical_level, + roi_max_level=self.max_level, + roi_min_level=self.min_level, + legacy_plus_one=False, + ) + fpn_outputs = [to_device(x, device) for x in fpn_outputs] + + rois_fpn_list = fpn_outputs[:-1] + rois_idx_restore_int32 = fpn_outputs[-1] + + roi_feat_fpn_list = [] + for roi_fpn, x_level, pooler in zip(rois_fpn_list, x, self.level_poolers): + if isinstance(pooler, ROIAlignRotated): + c2_roi_align = torch.ops._caffe2.RoIAlignRotated + aligned = True + else: + c2_roi_align = torch.ops._caffe2.RoIAlign + aligned = bool(pooler.aligned) + + roi_feat_fpn = c2_roi_align( + x_level, + roi_fpn, + order="NCHW", + spatial_scale=float(pooler.spatial_scale), + pooled_h=int(self.output_size[0]), + pooled_w=int(self.output_size[1]), + sampling_ratio=int(pooler.sampling_ratio), + aligned=aligned, + ) + roi_feat_fpn_list.append(roi_feat_fpn) + + roi_feat_shuffled = cat(roi_feat_fpn_list, dim=0) + roi_feat = torch.ops._caffe2.BatchPermutation(roi_feat_shuffled, rois_idx_restore_int32) + return roi_feat + + +class Caffe2FastRCNNOutputsInference: + def __init__(self, tensor_mode): + self.tensor_mode = tensor_mode # whether the output is caffe2 tensor mode + + def __call__(self, box_predictor, predictions, proposals): + """ equivalent to FastRCNNOutputLayers.inference """ + score_thresh = box_predictor.test_score_thresh + nms_thresh = box_predictor.test_nms_thresh + topk_per_image = box_predictor.test_topk_per_image + is_rotated = len(box_predictor.box2box_transform.weights) == 5 + + if is_rotated: + box_dim = 5 + assert box_predictor.box2box_transform.weights[4] == 1, ( + "The weights for Rotated BBoxTransform in C2 have only 4 dimensions," + + " thus enforcing the angle weight to be 1 for now" + ) + box2box_transform_weights = box_predictor.box2box_transform.weights[:4] + else: + box_dim = 4 + box2box_transform_weights = box_predictor.box2box_transform.weights + + class_logits, box_regression = predictions + class_prob = F.softmax(class_logits, -1) + + assert box_regression.shape[1] % box_dim == 0 + cls_agnostic_bbox_reg = box_regression.shape[1] // box_dim == 1 + + input_tensor_mode = proposals[0].proposal_boxes.tensor.shape[1] == box_dim + 1 + + rois = type(proposals[0].proposal_boxes).cat([p.proposal_boxes for p in proposals]) + device, dtype = rois.tensor.device, rois.tensor.dtype + if input_tensor_mode: + im_info = proposals[0].image_size + rois = rois.tensor + else: + im_info = torch.Tensor( + [[sz[0], sz[1], 1.0] for sz in [x.image_size for x in proposals]] + ) + batch_ids = cat( + [ + torch.full((b, 1), i, dtype=dtype, device=device) + for i, b in enumerate(len(p) for p in proposals) + ], + dim=0, + ) + rois = torch.cat([batch_ids, rois.tensor], dim=1) + + roi_pred_bbox, roi_batch_splits = torch.ops._caffe2.BBoxTransform( + to_device(rois, "cpu"), + to_device(box_regression, "cpu"), + to_device(im_info, "cpu"), + weights=box2box_transform_weights, + apply_scale=True, + rotated=is_rotated, + angle_bound_on=True, + angle_bound_lo=-180, + angle_bound_hi=180, + clip_angle_thresh=1.0, + legacy_plus_one=False, + ) + roi_pred_bbox = to_device(roi_pred_bbox, device) + roi_batch_splits = to_device(roi_batch_splits, device) + + nms_outputs = torch.ops._caffe2.BoxWithNMSLimit( + to_device(class_prob, "cpu"), + to_device(roi_pred_bbox, "cpu"), + to_device(roi_batch_splits, "cpu"), + score_thresh=float(score_thresh), + nms=float(nms_thresh), + detections_per_im=int(topk_per_image), + soft_nms_enabled=False, + soft_nms_method="linear", + soft_nms_sigma=0.5, + soft_nms_min_score_thres=0.001, + rotated=is_rotated, + cls_agnostic_bbox_reg=cls_agnostic_bbox_reg, + input_boxes_include_bg_cls=False, + output_classes_include_bg_cls=False, + legacy_plus_one=False, + ) + roi_score_nms = to_device(nms_outputs[0], device) + roi_bbox_nms = to_device(nms_outputs[1], device) + roi_class_nms = to_device(nms_outputs[2], device) + roi_batch_splits_nms = to_device(nms_outputs[3], device) + roi_keeps_nms = to_device(nms_outputs[4], device) + roi_keeps_size_nms = to_device(nms_outputs[5], device) + if not self.tensor_mode: + roi_class_nms = roi_class_nms.to(torch.int64) + + roi_batch_ids = cat( + [ + torch.full((b, 1), i, dtype=dtype, device=device) + for i, b in enumerate(int(x.item()) for x in roi_batch_splits_nms) + ], + dim=0, + ) + + roi_class_nms = alias(roi_class_nms, "class_nms") + roi_score_nms = alias(roi_score_nms, "score_nms") + roi_bbox_nms = alias(roi_bbox_nms, "bbox_nms") + roi_batch_splits_nms = alias(roi_batch_splits_nms, "batch_splits_nms") + roi_keeps_nms = alias(roi_keeps_nms, "keeps_nms") + roi_keeps_size_nms = alias(roi_keeps_size_nms, "keeps_size_nms") + + results = InstancesList( + im_info=im_info, + indices=roi_batch_ids[:, 0], + extra_fields={ + "pred_boxes": Caffe2Boxes(roi_bbox_nms), + "scores": roi_score_nms, + "pred_classes": roi_class_nms, + }, + ) + + if not self.tensor_mode: + results = InstancesList.to_d2_instances_list(results) + batch_splits = roi_batch_splits_nms.int().tolist() + kept_indices = list(roi_keeps_nms.to(torch.int64).split(batch_splits)) + else: + results = [results] + kept_indices = [roi_keeps_nms] + + return results, kept_indices + + +class Caffe2MaskRCNNInference: + def __call__(self, pred_mask_logits, pred_instances): + """ equivalent to mask_head.mask_rcnn_inference """ + if all(isinstance(x, InstancesList) for x in pred_instances): + assert len(pred_instances) == 1 + mask_probs_pred = pred_mask_logits.sigmoid() + mask_probs_pred = alias(mask_probs_pred, "mask_fcn_probs") + pred_instances[0].pred_masks = mask_probs_pred + else: + mask_rcnn_inference(pred_mask_logits, pred_instances) + + +class Caffe2KeypointRCNNInference: + def __init__(self, use_heatmap_max_keypoint): + self.use_heatmap_max_keypoint = use_heatmap_max_keypoint + + def __call__(self, pred_keypoint_logits, pred_instances): + # just return the keypoint heatmap for now, + # there will be option to call HeatmapMaxKeypointOp + output = alias(pred_keypoint_logits, "kps_score") + if all(isinstance(x, InstancesList) for x in pred_instances): + assert len(pred_instances) == 1 + if self.use_heatmap_max_keypoint: + device = output.device + output = torch.ops._caffe2.HeatmapMaxKeypoint( + to_device(output, "cpu"), + pred_instances[0].pred_boxes.tensor, + should_output_softmax=True, # worth make it configerable? + ) + output = to_device(output, device) + output = alias(output, "keypoints_out") + pred_instances[0].pred_keypoints = output + return pred_keypoint_logits diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/caffe2_export.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/caffe2_export.py new file mode 100644 index 0000000000000000000000000000000000000000..0b0919cb3c69eeed36e5d7388166f33e855221f5 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/caffe2_export.py @@ -0,0 +1,217 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import io +import logging +import numpy as np +from typing import List +import onnx +import torch +from caffe2.proto import caffe2_pb2 +from caffe2.python import core +from caffe2.python.onnx.backend import Caffe2Backend +from tabulate import tabulate +from termcolor import colored +from torch.onnx import OperatorExportTypes + +from .shared import ( + ScopedWS, + construct_init_net_from_params, + fuse_alias_placeholder, + fuse_copy_between_cpu_and_gpu, + get_params_from_init_net, + group_norm_replace_aten_with_caffe2, + infer_device_type, + remove_dead_end_ops, + remove_reshape_for_fc, + save_graph, +) + +logger = logging.getLogger(__name__) + + +def export_onnx_model(model, inputs): + """ + Trace and export a model to onnx format. + + Args: + model (nn.Module): + inputs (tuple[args]): the model will be called by `model(*inputs)` + + Returns: + an onnx model + """ + assert isinstance(model, torch.nn.Module) + + # make sure all modules are in eval mode, onnx may change the training state + # of the module if the states are not consistent + def _check_eval(module): + assert not module.training + + model.apply(_check_eval) + + # Export the model to ONNX + with torch.no_grad(): + with io.BytesIO() as f: + torch.onnx.export( + model, + inputs, + f, + operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK, + # verbose=True, # NOTE: uncomment this for debugging + # export_params=True, + ) + onnx_model = onnx.load_from_string(f.getvalue()) + + # Apply ONNX's Optimization + all_passes = onnx.optimizer.get_available_passes() + passes = ["fuse_bn_into_conv"] + assert all(p in all_passes for p in passes) + onnx_model = onnx.optimizer.optimize(onnx_model, passes) + return onnx_model + + +def _op_stats(net_def): + type_count = {} + for t in [op.type for op in net_def.op]: + type_count[t] = type_count.get(t, 0) + 1 + type_count_list = sorted(type_count.items(), key=lambda kv: kv[0]) # alphabet + type_count_list = sorted(type_count_list, key=lambda kv: -kv[1]) # count + return "\n".join("{:>4}x {}".format(count, name) for name, count in type_count_list) + + +def _assign_device_option( + predict_net: caffe2_pb2.NetDef, init_net: caffe2_pb2.NetDef, tensor_inputs: List[torch.Tensor] +): + """ + ONNX exported network doesn't have concept of device, assign necessary + device option for each op in order to make it runable on GPU runtime. + """ + + def _get_device_type(torch_tensor): + assert torch_tensor.device.type in ["cpu", "cuda"] + assert torch_tensor.device.index == 0 + return torch_tensor.device.type + + def _assign_op_device_option(net_proto, net_ssa, blob_device_types): + for op, ssa_i in zip(net_proto.op, net_ssa): + if op.type in ["CopyCPUToGPU", "CopyGPUToCPU"]: + op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0)) + else: + devices = [blob_device_types[b] for b in ssa_i[0] + ssa_i[1]] + assert all(d == devices[0] for d in devices) + if devices[0] == "cuda": + op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0)) + + # update ops in predict_net + predict_net_input_device_types = { + (name, 0): _get_device_type(tensor) + for name, tensor in zip(predict_net.external_input, tensor_inputs) + } + predict_net_device_types = infer_device_type( + predict_net, known_status=predict_net_input_device_types, device_name_style="pytorch" + ) + predict_net_ssa, _ = core.get_ssa(predict_net) + _assign_op_device_option(predict_net, predict_net_ssa, predict_net_device_types) + + # update ops in init_net + init_net_ssa, versions = core.get_ssa(init_net) + init_net_output_device_types = { + (name, versions[name]): predict_net_device_types[(name, 0)] + for name in init_net.external_output + } + init_net_device_types = infer_device_type( + init_net, known_status=init_net_output_device_types, device_name_style="pytorch" + ) + _assign_op_device_option(init_net, init_net_ssa, init_net_device_types) + + +def export_caffe2_detection_model(model: torch.nn.Module, tensor_inputs: List[torch.Tensor]): + """ + Export a caffe2-compatible Detectron2 model to caffe2 format via ONNX. + + Arg: + model: a caffe2-compatible version of detectron2 model, defined in caffe2_modeling.py + tensor_inputs: a list of tensors that caffe2 model takes as input. + """ + model = copy.deepcopy(model) + assert isinstance(model, torch.nn.Module) + assert hasattr(model, "encode_additional_info") + + # Export via ONNX + logger.info("Exporting a {} model via ONNX ...".format(type(model).__name__)) + onnx_model = export_onnx_model(model, (tensor_inputs,)) + # Convert ONNX model to Caffe2 protobuf + init_net, predict_net = Caffe2Backend.onnx_graph_to_caffe2_net(onnx_model) + ops_table = [[op.type, op.input, op.output] for op in predict_net.op] + table = tabulate(ops_table, headers=["type", "input", "output"], tablefmt="pipe") + logger.info( + "ONNX export Done. Exported predict_net (before optimizations):\n" + colored(table, "cyan") + ) + + # Apply protobuf optimization + fuse_alias_placeholder(predict_net, init_net) + if any(t.device.type != "cpu" for t in tensor_inputs): + fuse_copy_between_cpu_and_gpu(predict_net) + remove_dead_end_ops(init_net) + _assign_device_option(predict_net, init_net, tensor_inputs) + params, device_options = get_params_from_init_net(init_net) + predict_net, params = remove_reshape_for_fc(predict_net, params) + init_net = construct_init_net_from_params(params, device_options) + group_norm_replace_aten_with_caffe2(predict_net) + + # Record necessary information for running the pb model in Detectron2 system. + model.encode_additional_info(predict_net, init_net) + + logger.info("Operators used in predict_net: \n{}".format(_op_stats(predict_net))) + logger.info("Operators used in init_net: \n{}".format(_op_stats(init_net))) + + return predict_net, init_net + + +def run_and_save_graph(predict_net, init_net, tensor_inputs, graph_save_path): + """ + Run the caffe2 model on given inputs, recording the shape and draw the graph. + + predict_net/init_net: caffe2 model. + tensor_inputs: a list of tensors that caffe2 model takes as input. + graph_save_path: path for saving graph of exported model. + """ + + logger.info("Saving graph of ONNX exported model to {} ...".format(graph_save_path)) + save_graph(predict_net, graph_save_path, op_only=False) + + # Run the exported Caffe2 net + logger.info("Running ONNX exported model ...") + with ScopedWS("__ws_tmp__", True) as ws: + ws.RunNetOnce(init_net) + initialized_blobs = set(ws.Blobs()) + uninitialized = [inp for inp in predict_net.external_input if inp not in initialized_blobs] + for name, blob in zip(uninitialized, tensor_inputs): + ws.FeedBlob(name, blob) + + try: + ws.RunNetOnce(predict_net) + except RuntimeError as e: + logger.warning("Encountered RuntimeError: \n{}".format(str(e))) + + ws_blobs = {b: ws.FetchBlob(b) for b in ws.Blobs()} + blob_sizes = {b: ws_blobs[b].shape for b in ws_blobs if isinstance(ws_blobs[b], np.ndarray)} + + logger.info("Saving graph with blob shapes to {} ...".format(graph_save_path)) + save_graph(predict_net, graph_save_path, op_only=False, blob_sizes=blob_sizes) + + return ws_blobs diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/caffe2_inference.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/caffe2_inference.py new file mode 100644 index 0000000000000000000000000000000000000000..796206713d0e141487f02a486d78aafac4f4f594 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/caffe2_inference.py @@ -0,0 +1,149 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import logging +import numpy as np +import torch +from caffe2.proto import caffe2_pb2 +from caffe2.python import core + +from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format +from .shared import ScopedWS, get_pb_arg_vali, get_pb_arg_vals, infer_device_type + +logger = logging.getLogger(__name__) + + +class ProtobufModel(torch.nn.Module): + """ + A class works just like nn.Module in terms of inference, but running + caffe2 model under the hood. Input/Output are Dict[str, tensor] whose keys + are in external_input/output. + """ + + def __init__(self, predict_net, init_net): + logger.info("Initializing ProtobufModel ...") + super().__init__() + assert isinstance(predict_net, caffe2_pb2.NetDef) + assert isinstance(init_net, caffe2_pb2.NetDef) + self.ws_name = "__ws_tmp__" + self.net = core.Net(predict_net) + + with ScopedWS(self.ws_name, is_reset=True, is_cleanup=False) as ws: + ws.RunNetOnce(init_net) + for blob in self.net.Proto().external_input: + if blob not in ws.Blobs(): + ws.CreateBlob(blob) + ws.CreateNet(self.net) + + self._error_msgs = set() + + def forward(self, inputs_dict): + assert all(inp in self.net.Proto().external_input for inp in inputs_dict) + with ScopedWS(self.ws_name, is_reset=False, is_cleanup=False) as ws: + for b, tensor in inputs_dict.items(): + ws.FeedBlob(b, tensor) + try: + ws.RunNet(self.net.Proto().name) + except RuntimeError as e: + if not str(e) in self._error_msgs: + self._error_msgs.add(str(e)) + logger.warning("Encountered new RuntimeError: \n{}".format(str(e))) + logger.warning("Catch the error and use partial results.") + + outputs_dict = collections.OrderedDict( + [(b, ws.FetchBlob(b)) for b in self.net.Proto().external_output] + ) + # Remove outputs of current run, this is necessary in order to + # prevent fetching the result from previous run if the model fails + # in the middle. + for b in self.net.Proto().external_output: + # Needs to create uninitialized blob to make the net runable. + # This is "equivalent" to: ws.RemoveBlob(b) then ws.CreateBlob(b), + # but there'no such API. + ws.FeedBlob(b, "{}, a C++ native class of type nullptr (uninitialized).".format(b)) + + return outputs_dict + + +class ProtobufDetectionModel(torch.nn.Module): + """ + A class works just like a pytorch meta arch in terms of inference, but running + caffe2 model under the hood. + """ + + def __init__(self, predict_net, init_net, *, convert_outputs=None): + """ + Args: + predict_net, init_net (core.Net): caffe2 nets + convert_outptus (callable): a function that converts caffe2 + outputs to the same format of the original pytorch model. + By default, use the one defined in the caffe2 meta_arch. + """ + super().__init__() + self.protobuf_model = ProtobufModel(predict_net, init_net) + self.size_divisibility = get_pb_arg_vali(predict_net, "size_divisibility", 0) + self.device = get_pb_arg_vals(predict_net, "device", b"cpu").decode("ascii") + + if convert_outputs is None: + meta_arch = get_pb_arg_vals(predict_net, "meta_architecture", b"GeneralizedRCNN") + meta_arch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[meta_arch.decode("ascii")] + self._convert_outputs = meta_arch.get_outputs_converter(predict_net, init_net) + else: + self._convert_outputs = convert_outputs + + def _infer_output_devices(self, inputs_dict): + def _get_device_type(torch_tensor): + assert torch_tensor.device.type in ["cpu", "cuda"] + assert torch_tensor.device.index == 0 + return torch_tensor.device.type + + predict_net = self.protobuf_model.net.Proto() + input_device_types = { + (name, 0): _get_device_type(tensor) for name, tensor in inputs_dict.items() + } + device_type_map = infer_device_type( + predict_net, known_status=input_device_types, device_name_style="pytorch" + ) + ssa, versions = core.get_ssa(predict_net) + versioned_outputs = [(name, versions[name]) for name in predict_net.external_output] + output_devices = [device_type_map[outp] for outp in versioned_outputs] + return output_devices + + def _convert_inputs(self, batched_inputs): + # currently all models convert inputs in the same way + data, im_info = convert_batched_inputs_to_c2_format( + batched_inputs, self.size_divisibility, self.device + ) + return {"data": data, "im_info": im_info} + + def forward(self, batched_inputs): + c2_inputs = self._convert_inputs(batched_inputs) + c2_results = self.protobuf_model(c2_inputs) + + if any(t.device.type != "cpu" for _, t in c2_inputs.items()): + output_devices = self._infer_output_devices(c2_inputs) + else: + output_devices = ["cpu" for _ in self.protobuf_model.net.Proto().external_output] + + def _cast_caffe2_blob_to_torch_tensor(blob, device): + return torch.Tensor(blob).to(device) if isinstance(blob, np.ndarray) else None + + c2_results = { + name: _cast_caffe2_blob_to_torch_tensor(c2_results[name], device) + for name, device in zip(self.protobuf_model.net.Proto().external_output, output_devices) + } + + return self._convert_outputs(batched_inputs, c2_inputs, c2_results) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/caffe2_modeling.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/caffe2_modeling.py new file mode 100644 index 0000000000000000000000000000000000000000..9c6c4c77dea65f1da7075f738c819c762cb998aa --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/caffe2_modeling.py @@ -0,0 +1,510 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import io +import struct +import types +import torch + +from detectron2.modeling import meta_arch +from detectron2.modeling.box_regression import Box2BoxTransform +from detectron2.modeling.meta_arch.panoptic_fpn import combine_semantic_and_instance_outputs +from detectron2.modeling.meta_arch.retinanet import permute_to_N_HWA_K +from detectron2.modeling.postprocessing import detector_postprocess, sem_seg_postprocess +from detectron2.modeling.roi_heads import keypoint_head +from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes + +from .c10 import Caffe2Compatible +from .patcher import ROIHeadsPatcher, patch_generalized_rcnn +from .shared import ( + alias, + check_set_pb_arg, + get_pb_arg_floats, + get_pb_arg_valf, + get_pb_arg_vali, + get_pb_arg_vals, + mock_torch_nn_functional_interpolate, +) + + +def assemble_rcnn_outputs_by_name(image_sizes, tensor_outputs, force_mask_on=False): + """ + A function to assemble caffe2 model's outputs (i.e. Dict[str, Tensor]) + to detectron2's format (i.e. list of Instances instance). + This only works when the model follows the Caffe2 detectron's naming convention. + + Args: + image_sizes (List[List[int, int]]): [H, W] of every image. + tensor_outputs (Dict[str, Tensor]): external_output to its tensor. + + force_mask_on (Bool): if true, the it make sure there'll be pred_masks even + if the mask is not found from tensor_outputs (usually due to model crash) + """ + + results = [Instances(image_size) for image_size in image_sizes] + + batch_splits = tensor_outputs.get("batch_splits", None) + if batch_splits: + raise NotImplementedError() + assert len(image_sizes) == 1 + result = results[0] + + bbox_nms = tensor_outputs["bbox_nms"] + score_nms = tensor_outputs["score_nms"] + class_nms = tensor_outputs["class_nms"] + # Detection will always success because Conv support 0-batch + assert bbox_nms is not None + assert score_nms is not None + assert class_nms is not None + if bbox_nms.shape[1] == 5: + result.pred_boxes = RotatedBoxes(bbox_nms) + else: + result.pred_boxes = Boxes(bbox_nms) + result.scores = score_nms + result.pred_classes = class_nms.to(torch.int64) + + mask_fcn_probs = tensor_outputs.get("mask_fcn_probs", None) + if mask_fcn_probs is not None: + # finish the mask pred + mask_probs_pred = mask_fcn_probs + num_masks = mask_probs_pred.shape[0] + class_pred = result.pred_classes + indices = torch.arange(num_masks, device=class_pred.device) + mask_probs_pred = mask_probs_pred[indices, class_pred][:, None] + result.pred_masks = mask_probs_pred + elif force_mask_on: + # NOTE: there's no way to know the height/width of mask here, it won't be + # used anyway when batch size is 0, so just set them to 0. + result.pred_masks = torch.zeros([0, 1, 0, 0], dtype=torch.uint8) + + keypoints_out = tensor_outputs.get("keypoints_out", None) + kps_score = tensor_outputs.get("kps_score", None) + if keypoints_out is not None: + # keypoints_out: [N, 4, #kypoints], where 4 is in order of (x, y, score, prob) + keypoints_tensor = keypoints_out + # NOTE: it's possible that prob is not calculated if "should_output_softmax" + # is set to False in HeatmapMaxKeypoint, so just using raw score, seems + # it doesn't affect mAP. TODO: check more carefully. + keypoint_xyp = keypoints_tensor.transpose(1, 2)[:, :, [0, 1, 2]] + result.pred_keypoints = keypoint_xyp + elif kps_score is not None: + # keypoint heatmap to sparse data structure + pred_keypoint_logits = kps_score + keypoint_head.keypoint_rcnn_inference(pred_keypoint_logits, [result]) + + return results + + +def _cast_to_f32(f64): + return struct.unpack("f", struct.pack("f", f64))[0] + + +def set_caffe2_compatible_tensor_mode(model, enable=True): + def _fn(m): + if isinstance(m, Caffe2Compatible): + m.tensor_mode = enable + + model.apply(_fn) + + +def convert_batched_inputs_to_c2_format(batched_inputs, size_divisibility, device): + """ + See get_caffe2_inputs() below. + """ + assert all(isinstance(x, dict) for x in batched_inputs) + assert all(x["image"].dim() == 3 for x in batched_inputs) + + images = [x["image"] for x in batched_inputs] + images = ImageList.from_tensors(images, size_divisibility) + + im_info = [] + for input_per_image, image_size in zip(batched_inputs, images.image_sizes): + target_height = input_per_image.get("height", image_size[0]) + target_width = input_per_image.get("width", image_size[1]) # noqa + # NOTE: The scale inside im_info is kept as convention and for providing + # post-processing information if further processing is needed. For + # current Caffe2 model definitions that don't include post-processing inside + # the model, this number is not used. + # NOTE: There can be a slight difference between width and height + # scales, using a single number can results in numerical difference + # compared with D2's post-processing. + scale = target_height / image_size[0] + im_info.append([image_size[0], image_size[1], scale]) + im_info = torch.Tensor(im_info) + + return images.tensor.to(device), im_info.to(device) + + +class Caffe2MetaArch(Caffe2Compatible, torch.nn.Module): + """ + Base class for caffe2-compatible implementation of a meta architecture. + The forward is traceable and its traced graph can be converted to caffe2 + graph through ONNX. + """ + + def __init__(self, cfg, torch_model): + """ + Args: + cfg (CfgNode): + torch_model (nn.Module): the detectron2 model (meta_arch) to be + converted. + """ + super().__init__() + self._wrapped_model = torch_model + self.eval() + set_caffe2_compatible_tensor_mode(self, True) + + def get_caffe2_inputs(self, batched_inputs): + """ + Convert pytorch-style structured inputs to caffe2-style inputs that + are tuples of tensors. + + Args: + batched_inputs (list[dict]): inputs to a detectron2 model + in its standard format. Each dict has "image" (CHW tensor), and optionally + "height" and "width". + + Returns: + tuple[Tensor]: + tuple of tensors that will be the inputs to the + :meth:`forward` method. For existing models, the first + is an NCHW tensor (padded and batched); the second is + a im_info Nx3 tensor, where the rows are + (height, width, unused legacy parameter) + """ + return convert_batched_inputs_to_c2_format( + batched_inputs, + self._wrapped_model.backbone.size_divisibility, + self._wrapped_model.device, + ) + + def encode_additional_info(self, predict_net, init_net): + """ + Save extra metadata that will be used by inference in the output protobuf. + """ + pass + + def forward(self, inputs): + """ + Run the forward in caffe2-style. It has to use caffe2-compatible ops + and the method will be used for tracing. + + Args: + inputs (tuple[Tensor]): inputs defined by :meth:`get_caffe2_input`. + They will be the inputs of the converted caffe2 graph. + + Returns: + tuple[Tensor]: output tensors. They will be the outputs of the + converted caffe2 graph. + """ + raise NotImplementedError + + def _caffe2_preprocess_image(self, inputs): + """ + Caffe2 implementation of preprocess_image, which is called inside each MetaArch's forward. + It normalizes the input images, and the final caffe2 graph assumes the + inputs have been batched already. + """ + data, im_info = inputs + data = alias(data, "data") + im_info = alias(im_info, "im_info") + mean, std = self._wrapped_model.pixel_mean, self._wrapped_model.pixel_std + normalized_data = (data - mean) / std + normalized_data = alias(normalized_data, "normalized_data") + + # Pack (data, im_info) into ImageList which is recognized by self.inference. + images = ImageList(tensor=normalized_data, image_sizes=im_info) + return images + + @staticmethod + def get_outputs_converter(predict_net, init_net): + """ + Creates a function that converts outputs of the caffe2 model to + detectron2's standard format. + The function uses information in `predict_net` and `init_net` that are + available at inferene time. Therefore the function logic can be used in inference. + + The returned function has the following signature: + + def convert(batched_inputs, c2_inputs, c2_results) -> detectron2_outputs + + Where + + * batched_inputs (list[dict]): the original input format of the meta arch + * c2_inputs (dict[str, Tensor]): the caffe2 inputs. + * c2_results (dict[str, Tensor]): the caffe2 output format, + corresponding to the outputs of the :meth:`forward` function. + * detectron2_outputs: the original output format of the meta arch. + + This function can be used to compare the outputs of the original meta arch and + the converted caffe2 graph. + + Returns: + callable: a callable of the above signature. + """ + raise NotImplementedError + + +class Caffe2GeneralizedRCNN(Caffe2MetaArch): + def __init__(self, cfg, torch_model): + assert isinstance(torch_model, meta_arch.GeneralizedRCNN) + torch_model = patch_generalized_rcnn(torch_model) + super().__init__(cfg, torch_model) + + self.roi_heads_patcher = ROIHeadsPatcher(cfg, self._wrapped_model.roi_heads) + + def encode_additional_info(self, predict_net, init_net): + size_divisibility = self._wrapped_model.backbone.size_divisibility + check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility) + check_set_pb_arg( + predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii") + ) + check_set_pb_arg(predict_net, "meta_architecture", "s", b"GeneralizedRCNN") + + @mock_torch_nn_functional_interpolate() + def forward(self, inputs): + if not self.tensor_mode: + return self._wrapped_model.inference(inputs) + images = self._caffe2_preprocess_image(inputs) + features = self._wrapped_model.backbone(images.tensor) + proposals, _ = self._wrapped_model.proposal_generator(images, features) + with self.roi_heads_patcher.mock_roi_heads(): + detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals) + return tuple(detector_results[0].flatten()) + + @staticmethod + def get_outputs_converter(predict_net, init_net): + def f(batched_inputs, c2_inputs, c2_results): + image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]] + results = assemble_rcnn_outputs_by_name(image_sizes, c2_results) + return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes) + + return f + + +class Caffe2PanopticFPN(Caffe2MetaArch): + def __init__(self, cfg, torch_model): + assert isinstance(torch_model, meta_arch.PanopticFPN) + torch_model = patch_generalized_rcnn(torch_model) + super().__init__(cfg, torch_model) + + self.roi_heads_patcher = ROIHeadsPatcher(cfg, self._wrapped_model.roi_heads) + + @mock_torch_nn_functional_interpolate() + def forward(self, inputs): + assert self.tensor_mode + images = self._caffe2_preprocess_image(inputs) + features = self._wrapped_model.backbone(images.tensor) + + sem_seg_results, _ = self._wrapped_model.sem_seg_head(features) + sem_seg_results = alias(sem_seg_results, "sem_seg") + + proposals, _ = self._wrapped_model.proposal_generator(images, features) + + with self.roi_heads_patcher.mock_roi_heads(self.tensor_mode): + detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals) + + return tuple(detector_results[0].flatten()) + (sem_seg_results,) + + def encode_additional_info(self, predict_net, init_net): + size_divisibility = self._wrapped_model.backbone.size_divisibility + check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility) + check_set_pb_arg( + predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii") + ) + check_set_pb_arg(predict_net, "meta_architecture", "s", b"PanopticFPN") + + # Inference parameters: + check_set_pb_arg(predict_net, "combine_on", "i", self._wrapped_model.combine_on) + check_set_pb_arg( + predict_net, + "combine_overlap_threshold", + "f", + _cast_to_f32(self._wrapped_model.combine_overlap_threshold), + ) + check_set_pb_arg( + predict_net, + "combine_stuff_area_limit", + "i", + self._wrapped_model.combine_stuff_area_limit, + ) + check_set_pb_arg( + predict_net, + "combine_instances_confidence_threshold", + "f", + _cast_to_f32(self._wrapped_model.combine_instances_confidence_threshold), + ) + + @staticmethod + def get_outputs_converter(predict_net, init_net): + combine_on = get_pb_arg_vali(predict_net, "combine_on", None) + combine_overlap_threshold = get_pb_arg_valf(predict_net, "combine_overlap_threshold", None) + combine_stuff_area_limit = get_pb_arg_vali(predict_net, "combine_stuff_area_limit", None) + combine_instances_confidence_threshold = get_pb_arg_valf( + predict_net, "combine_instances_confidence_threshold", None + ) + + def f(batched_inputs, c2_inputs, c2_results): + image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]] + detector_results = assemble_rcnn_outputs_by_name( + image_sizes, c2_results, force_mask_on=True + ) + sem_seg_results = c2_results["sem_seg"] + + # copied from meta_arch/panoptic_fpn.py ... + processed_results = [] + for sem_seg_result, detector_result, input_per_image, image_size in zip( + sem_seg_results, detector_results, batched_inputs, image_sizes + ): + height = input_per_image.get("height", image_size[0]) + width = input_per_image.get("width", image_size[1]) + sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width) + detector_r = detector_postprocess(detector_result, height, width) + + processed_results.append({"sem_seg": sem_seg_r, "instances": detector_r}) + + if combine_on: + panoptic_r = combine_semantic_and_instance_outputs( + detector_r, + sem_seg_r.argmax(dim=0), + combine_overlap_threshold, + combine_stuff_area_limit, + combine_instances_confidence_threshold, + ) + processed_results[-1]["panoptic_seg"] = panoptic_r + return processed_results + + return f + + +class Caffe2RetinaNet(Caffe2MetaArch): + def __init__(self, cfg, torch_model): + assert isinstance(torch_model, meta_arch.RetinaNet) + super().__init__(cfg, torch_model) + + @mock_torch_nn_functional_interpolate() + def forward(self, inputs): + assert self.tensor_mode + images = self._caffe2_preprocess_image(inputs) + + # explicitly return the images sizes to avoid removing "im_info" by ONNX + # since it's not used in the forward path + return_tensors = [images.image_sizes] + + features = self._wrapped_model.backbone(images.tensor) + features = [features[f] for f in self._wrapped_model.in_features] + for i, feature_i in enumerate(features): + features[i] = alias(feature_i, "feature_{}".format(i), is_backward=True) + return_tensors.append(features[i]) + + pred_logits, pred_anchor_deltas = self._wrapped_model.head(features) + for i, (box_cls_i, box_delta_i) in enumerate(zip(pred_logits, pred_anchor_deltas)): + return_tensors.append(alias(box_cls_i, "box_cls_{}".format(i))) + return_tensors.append(alias(box_delta_i, "box_delta_{}".format(i))) + + return tuple(return_tensors) + + def encode_additional_info(self, predict_net, init_net): + size_divisibility = self._wrapped_model.backbone.size_divisibility + check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility) + check_set_pb_arg( + predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii") + ) + check_set_pb_arg(predict_net, "meta_architecture", "s", b"RetinaNet") + + # Inference parameters: + check_set_pb_arg( + predict_net, "score_threshold", "f", _cast_to_f32(self._wrapped_model.score_threshold) + ) + check_set_pb_arg(predict_net, "topk_candidates", "i", self._wrapped_model.topk_candidates) + check_set_pb_arg( + predict_net, "nms_threshold", "f", _cast_to_f32(self._wrapped_model.nms_threshold) + ) + check_set_pb_arg( + predict_net, + "max_detections_per_image", + "i", + self._wrapped_model.max_detections_per_image, + ) + + check_set_pb_arg( + predict_net, + "bbox_reg_weights", + "floats", + [_cast_to_f32(w) for w in self._wrapped_model.box2box_transform.weights], + ) + self._encode_anchor_generator_cfg(predict_net) + + def _encode_anchor_generator_cfg(self, predict_net): + # serialize anchor_generator for future use + serialized_anchor_generator = io.BytesIO() + torch.save(self._wrapped_model.anchor_generator, serialized_anchor_generator) + # Ideally we can put anchor generating inside the model, then we don't + # need to store this information. + bytes = serialized_anchor_generator.getvalue() + check_set_pb_arg(predict_net, "serialized_anchor_generator", "s", bytes) + + @staticmethod + def get_outputs_converter(predict_net, init_net): + self = types.SimpleNamespace() + serialized_anchor_generator = io.BytesIO( + get_pb_arg_vals(predict_net, "serialized_anchor_generator", None) + ) + self.anchor_generator = torch.load(serialized_anchor_generator) + bbox_reg_weights = get_pb_arg_floats(predict_net, "bbox_reg_weights", None) + self.box2box_transform = Box2BoxTransform(weights=tuple(bbox_reg_weights)) + self.score_threshold = get_pb_arg_valf(predict_net, "score_threshold", None) + self.topk_candidates = get_pb_arg_vali(predict_net, "topk_candidates", None) + self.nms_threshold = get_pb_arg_valf(predict_net, "nms_threshold", None) + self.max_detections_per_image = get_pb_arg_vali( + predict_net, "max_detections_per_image", None + ) + + # hack to reuse inference code from RetinaNet + self.inference = functools.partial(meta_arch.RetinaNet.inference, self) + self.inference_single_image = functools.partial( + meta_arch.RetinaNet.inference_single_image, self + ) + + def f(batched_inputs, c2_inputs, c2_results): + image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]] + + num_features = len([x for x in c2_results.keys() if x.startswith("box_cls_")]) + pred_logits = [c2_results["box_cls_{}".format(i)] for i in range(num_features)] + pred_anchor_deltas = [c2_results["box_delta_{}".format(i)] for i in range(num_features)] + + # For each feature level, feature should have the same batch size and + # spatial dimension as the box_cls and box_delta. + dummy_features = [x.clone()[:, 0:0, :, :] for x in pred_logits] + anchors = self.anchor_generator(dummy_features) + + # self.num_classess can be inferred + self.num_classes = pred_logits[0].shape[1] // (pred_anchor_deltas[0].shape[1] // 4) + + pred_logits = [permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits] + pred_anchor_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas] + + results = self.inference(anchors, pred_logits, pred_anchor_deltas, image_sizes) + return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes) + + return f + + +META_ARCH_CAFFE2_EXPORT_TYPE_MAP = { + "GeneralizedRCNN": Caffe2GeneralizedRCNN, + "PanopticFPN": Caffe2PanopticFPN, + "RetinaNet": Caffe2RetinaNet, +} diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/patcher.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/patcher.py new file mode 100644 index 0000000000000000000000000000000000000000..a23c69a04454c0068eb5c93e4b328d43c504aa93 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/patcher.py @@ -0,0 +1,166 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import mock +import torch + +from detectron2.modeling import poolers +from detectron2.modeling.proposal_generator import rpn +from detectron2.modeling.roi_heads import keypoint_head, mask_head +from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers + +from .c10 import ( + Caffe2Compatible, + Caffe2FastRCNNOutputsInference, + Caffe2KeypointRCNNInference, + Caffe2MaskRCNNInference, + Caffe2ROIPooler, + Caffe2RPN, +) + + +class GenericMixin(object): + pass + + +class Caffe2CompatibleConverter(object): + """ + A GenericUpdater which implements the `create_from` interface, by modifying + module object and assign it with another class replaceCls. + """ + + def __init__(self, replaceCls): + self.replaceCls = replaceCls + + def create_from(self, module): + # update module's class to the new class + assert isinstance(module, torch.nn.Module) + if issubclass(self.replaceCls, GenericMixin): + # replaceCls should act as mixin, create a new class on-the-fly + new_class = type( + "{}MixedWith{}".format(self.replaceCls.__name__, module.__class__.__name__), + (self.replaceCls, module.__class__), + {}, # {"new_method": lambda self: ...}, + ) + module.__class__ = new_class + else: + # replaceCls is complete class, this allow arbitrary class swap + module.__class__ = self.replaceCls + + # initialize Caffe2Compatible + if isinstance(module, Caffe2Compatible): + module.tensor_mode = False + + return module + + +def patch(model, target, updater, *args, **kwargs): + """ + recursively (post-order) update all modules with the target type and its + subclasses, make a initialization/composition/inheritance/... via the + updater.create_from. + """ + for name, module in model.named_children(): + model._modules[name] = patch(module, target, updater, *args, **kwargs) + if isinstance(model, target): + return updater.create_from(model, *args, **kwargs) + return model + + +def patch_generalized_rcnn(model): + ccc = Caffe2CompatibleConverter + model = patch(model, rpn.RPN, ccc(Caffe2RPN)) + model = patch(model, poolers.ROIPooler, ccc(Caffe2ROIPooler)) + + return model + + +@contextlib.contextmanager +def mock_fastrcnn_outputs_inference( + tensor_mode, check=True, box_predictor_type=FastRCNNOutputLayers +): + with mock.patch.object( + box_predictor_type, + "inference", + autospec=True, + side_effect=Caffe2FastRCNNOutputsInference(tensor_mode), + ) as mocked_func: + yield + if check: + assert mocked_func.call_count > 0 + + +@contextlib.contextmanager +def mock_mask_rcnn_inference(tensor_mode, patched_module, check=True): + with mock.patch( + "{}.mask_rcnn_inference".format(patched_module), side_effect=Caffe2MaskRCNNInference() + ) as mocked_func: + yield + if check: + assert mocked_func.call_count > 0 + + +@contextlib.contextmanager +def mock_keypoint_rcnn_inference(tensor_mode, patched_module, use_heatmap_max_keypoint, check=True): + with mock.patch( + "{}.keypoint_rcnn_inference".format(patched_module), + side_effect=Caffe2KeypointRCNNInference(use_heatmap_max_keypoint), + ) as mocked_func: + yield + if check: + assert mocked_func.call_count > 0 + + +class ROIHeadsPatcher: + def __init__(self, cfg, heads): + self.heads = heads + + self.use_heatmap_max_keypoint = cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT + + @contextlib.contextmanager + def mock_roi_heads(self, tensor_mode=True): + """ + Patching several inference functions inside ROIHeads and its subclasses + + Args: + tensor_mode (bool): whether the inputs/outputs are caffe2's tensor + format or not. Default to True. + """ + # NOTE: this requries the `keypoint_rcnn_inference` and `mask_rcnn_inference` + # are called inside the same file as BaseXxxHead due to using mock.patch. + kpt_heads_mod = keypoint_head.BaseKeypointRCNNHead.__module__ + mask_head_mod = mask_head.BaseMaskRCNNHead.__module__ + + mock_ctx_managers = [ + mock_fastrcnn_outputs_inference( + tensor_mode=tensor_mode, + check=True, + box_predictor_type=type(self.heads.box_predictor), + ) + ] + if getattr(self.heads, "keypoint_on", False): + mock_ctx_managers += [ + mock_keypoint_rcnn_inference( + tensor_mode, kpt_heads_mod, self.use_heatmap_max_keypoint + ) + ] + if getattr(self.heads, "mask_on", False): + mock_ctx_managers += [mock_mask_rcnn_inference(tensor_mode, mask_head_mod)] + + with contextlib.ExitStack() as stack: # python 3.3+ + for mgr in mock_ctx_managers: + stack.enter_context(mgr) + yield diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/shared.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/shared.py new file mode 100644 index 0000000000000000000000000000000000000000..65fa9bf932174cd1db37cfebf4e0142021770f1f --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/shared.py @@ -0,0 +1,1047 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import contextlib +import copy +import functools +import logging +import mock +import numpy as np +import os +from typing import Any, Callable, Dict, List, Optional, Tuple, Union +import caffe2.python.utils as putils +import torch +import torch.nn.functional as F +from caffe2.proto import caffe2_pb2 +from caffe2.python import core, net_drawer, workspace +from torch.nn.functional import interpolate as interp + +logger = logging.getLogger(__name__) + + +# ==== torch/utils_toffee/cast.py ======================================= + + +def to_device(t, device_str): + """ + This function is a replacement of .to(another_device) such that it allows the + casting to be traced properly by explicitly calling the underlying copy ops. + It also avoids introducing unncessary op when casting to the same device. + """ + src = t.device + dst = torch.device(device_str) + + if src == dst: + return t + elif src.type == "cuda" and dst.type == "cpu": + return torch.ops._caffe2.CopyGPUToCPU(t) + elif src.type == "cpu" and dst.type == "cuda": + return torch.ops._caffe2.CopyCPUToGPU(t) + else: + raise RuntimeError("Can't cast tensor from device {} to device {}".format(src, dst)) + + +# ==== torch/utils_toffee/interpolate.py ======================================= + + +# Note: borrowed from vision/detection/fair/detectron/detectron/modeling/detector.py +def BilinearInterpolation(tensor_in, up_scale): + assert up_scale % 2 == 0, "Scale should be even" + + def upsample_filt(size): + factor = (size + 1) // 2 + if size % 2 == 1: + center = factor - 1 + else: + center = factor - 0.5 + + og = np.ogrid[:size, :size] + return (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor) + + kernel_size = int(up_scale) * 2 + bil_filt = upsample_filt(kernel_size) + + dim = int(tensor_in.shape[1]) + kernel = np.zeros((dim, dim, kernel_size, kernel_size), dtype=np.float32) + kernel[range(dim), range(dim), :, :] = bil_filt + + tensor_out = F.conv_transpose2d( + tensor_in, + weight=to_device(torch.Tensor(kernel), tensor_in.device), + bias=None, + stride=int(up_scale), + padding=int(up_scale / 2), + ) + + return tensor_out + + +# NOTE: ONNX is incompatible with traced torch.nn.functional.interpolate if +# using dynamic `scale_factor` rather than static `size`. (T43166860) +# NOTE: Caffe2 Int8 conversion might not be able to quantize `size` properly. +def onnx_compatibale_interpolate( + input, size=None, scale_factor=None, mode="nearest", align_corners=None +): + # NOTE: The input dimensions are interpreted in the form: + # `mini-batch x channels x [optional depth] x [optional height] x width`. + if size is None and scale_factor is not None: + if input.dim() == 4: + if isinstance(scale_factor, (int, float)): + height_scale, width_scale = (scale_factor, scale_factor) + else: + assert isinstance(scale_factor, (tuple, list)) + assert len(scale_factor) == 2 + height_scale, width_scale = scale_factor + + assert not align_corners, "No matching C2 op for align_corners == True" + if mode == "nearest": + return torch.ops._caffe2.ResizeNearest( + input, order="NCHW", width_scale=width_scale, height_scale=height_scale + ) + elif mode == "bilinear": + logger.warning( + "Use F.conv_transpose2d for bilinear interpolate" + " because there's no such C2 op, this may cause significant" + " slowdown and the boundary pixels won't be as same as" + " using F.interpolate due to padding." + ) + assert height_scale == width_scale + return BilinearInterpolation(input, up_scale=height_scale) + logger.warning("Output size is not static, it might cause ONNX conversion issue") + + return interp(input, size, scale_factor, mode, align_corners) + + +@contextlib.contextmanager +def mock_torch_nn_functional_interpolate(): + if torch.onnx.is_in_onnx_export(): + with mock.patch( + "torch.nn.functional.interpolate", side_effect=onnx_compatibale_interpolate + ): + yield + else: + yield + + +# ==== torch/utils_caffe2/ws_utils.py ========================================== + + +class ScopedWS(object): + def __init__(self, ws_name, is_reset, is_cleanup=False): + self.ws_name = ws_name + self.is_reset = is_reset + self.is_cleanup = is_cleanup + self.org_ws = "" + + def __enter__(self): + self.org_ws = workspace.CurrentWorkspace() + if self.ws_name is not None: + workspace.SwitchWorkspace(self.ws_name, True) + if self.is_reset: + workspace.ResetWorkspace() + + return workspace + + def __exit__(self, *args): + if self.is_cleanup: + workspace.ResetWorkspace() + if self.ws_name is not None: + workspace.SwitchWorkspace(self.org_ws) + + +def fetch_any_blob(name): + bb = None + try: + bb = workspace.FetchBlob(name) + except TypeError: + bb = workspace.FetchInt8Blob(name) + except Exception as e: + logger.error("Get blob {} error: {}".format(name, e)) + + return bb + + +# ==== torch/utils_caffe2/protobuf.py ========================================== + + +def get_pb_arg(pb, arg_name): + for x in pb.arg: + if x.name == arg_name: + return x + return None + + +def get_pb_arg_valf(pb, arg_name, default_val): + arg = get_pb_arg(pb, arg_name) + return arg.f if arg is not None else default_val + + +def get_pb_arg_floats(pb, arg_name, default_val): + arg = get_pb_arg(pb, arg_name) + return list(map(float, arg.floats)) if arg is not None else default_val + + +def get_pb_arg_ints(pb, arg_name, default_val): + arg = get_pb_arg(pb, arg_name) + return list(map(int, arg.ints)) if arg is not None else default_val + + +def get_pb_arg_vali(pb, arg_name, default_val): + arg = get_pb_arg(pb, arg_name) + return arg.i if arg is not None else default_val + + +def get_pb_arg_vals(pb, arg_name, default_val): + arg = get_pb_arg(pb, arg_name) + return arg.s if arg is not None else default_val + + +def get_pb_arg_valstrings(pb, arg_name, default_val): + arg = get_pb_arg(pb, arg_name) + return list(arg.strings) if arg is not None else default_val + + +def check_set_pb_arg(pb, arg_name, arg_attr, arg_value, allow_override=False): + arg = get_pb_arg(pb, arg_name) + if arg is None: + arg = putils.MakeArgument(arg_name, arg_value) + assert hasattr(arg, arg_attr) + pb.arg.extend([arg]) + if allow_override and getattr(arg, arg_attr) != arg_value: + logger.warning( + "Override argument {}: {} -> {}".format(arg_name, getattr(arg, arg_attr), arg_value) + ) + setattr(arg, arg_attr, arg_value) + else: + assert arg is not None + assert getattr(arg, arg_attr) == arg_value, "Existing value {}, new value {}".format( + getattr(arg, arg_attr), arg_value + ) + + +def _create_const_fill_op_from_numpy(name, tensor, device_option=None): + assert type(tensor) == np.ndarray + kTypeNameMapper = { + np.dtype("float32"): "GivenTensorFill", + np.dtype("int32"): "GivenTensorIntFill", + np.dtype("int64"): "GivenTensorInt64Fill", + np.dtype("uint8"): "GivenTensorStringFill", + } + + args_dict = {} + if tensor.dtype == np.dtype("uint8"): + args_dict.update({"values": [str(tensor.data)], "shape": [1]}) + else: + args_dict.update({"values": tensor, "shape": tensor.shape}) + + if device_option is not None: + args_dict["device_option"] = device_option + + return core.CreateOperator(kTypeNameMapper[tensor.dtype], [], [name], **args_dict) + + +def _create_const_fill_op_from_c2_int8_tensor(name, int8_tensor): + assert type(int8_tensor) == workspace.Int8Tensor + kTypeNameMapper = { + np.dtype("int32"): "Int8GivenIntTensorFill", + np.dtype("uint8"): "Int8GivenTensorFill", + } + + tensor = int8_tensor.data + assert tensor.dtype in [np.dtype("uint8"), np.dtype("int32")] + values = tensor.tobytes() if tensor.dtype == np.dtype("uint8") else tensor + + return core.CreateOperator( + kTypeNameMapper[tensor.dtype], + [], + [name], + values=values, + shape=tensor.shape, + Y_scale=int8_tensor.scale, + Y_zero_point=int8_tensor.zero_point, + ) + + +def create_const_fill_op( + name: str, + blob: Union[np.ndarray, workspace.Int8Tensor], + device_option: Optional[caffe2_pb2.DeviceOption] = None, +) -> caffe2_pb2.OperatorDef: + """ + Given a blob object, return the Caffe2 operator that creates this blob + as constant. Currently support NumPy tensor and Caffe2 Int8Tensor. + """ + + tensor_type = type(blob) + assert tensor_type in [ + np.ndarray, + workspace.Int8Tensor, + ], 'Error when creating const fill op for "{}", unsupported blob type: {}'.format( + name, type(blob) + ) + + if tensor_type == np.ndarray: + return _create_const_fill_op_from_numpy(name, blob, device_option) + elif tensor_type == workspace.Int8Tensor: + assert device_option is None + return _create_const_fill_op_from_c2_int8_tensor(name, blob) + + +def construct_init_net_from_params( + params: Dict[str, Any], device_options: Optional[Dict[str, caffe2_pb2.DeviceOption]] = None +) -> caffe2_pb2.NetDef: + """ + Construct the init_net from params dictionary + """ + init_net = caffe2_pb2.NetDef() + device_options = device_options or {} + for name, blob in params.items(): + if isinstance(blob, str): + logger.warning( + ( + "Blob {} with type {} is not supported in generating init net," + " skipped.".format(name, type(blob)) + ) + ) + continue + init_net.op.extend( + [create_const_fill_op(name, blob, device_option=device_options.get(name, None))] + ) + init_net.external_output.append(name) + return init_net + + +def get_producer_map(ssa): + """ + Return dict from versioned blob to (i, j), + where i is index of producer op, j is the index of output of that op. + """ + producer_map = {} + for i in range(len(ssa)): + outputs = ssa[i][1] + for j, outp in enumerate(outputs): + producer_map[outp] = (i, j) + return producer_map + + +def get_consumer_map(ssa): + """ + Return dict from versioned blob to list of (i, j), + where i is index of consumer op, j is the index of input of that op. + """ + consumer_map = collections.defaultdict(list) + for i in range(len(ssa)): + inputs = ssa[i][0] + for j, inp in enumerate(inputs): + consumer_map[inp].append((i, j)) + return consumer_map + + +def get_params_from_init_net( + init_net: caffe2_pb2.NetDef, +) -> [Dict[str, Any], Dict[str, caffe2_pb2.DeviceOption]]: + """ + Take the output blobs from init_net by running it. + Outputs: + params: dict from blob name to numpy array + device_options: dict from blob name to the device option of its creating op + """ + # NOTE: this assumes that the params is determined by producer op with the + # only exception be CopyGPUToCPU which is CUDA op but returns CPU tensor. + def _get_device_option(producer_op): + if producer_op.type == "CopyGPUToCPU": + return caffe2_pb2.DeviceOption() + else: + return producer_op.device_option + + with ScopedWS("__get_params_from_init_net__", is_reset=True, is_cleanup=True) as ws: + ws.RunNetOnce(init_net) + params = {b: fetch_any_blob(b) for b in init_net.external_output} + ssa, versions = core.get_ssa(init_net) + producer_map = get_producer_map(ssa) + device_options = { + b: _get_device_option(init_net.op[producer_map[(b, versions[b])][0]]) + for b in init_net.external_output + } + return params, device_options + + +def _updater_raise(op, input_types, output_types): + raise RuntimeError( + "Failed to apply updater for op {} given input_types {} and" + " output_types {}".format(op, input_types, output_types) + ) + + +def _generic_status_identifier( + predict_net: caffe2_pb2.NetDef, + status_updater: Callable, + known_status: Dict[Tuple[str, int], Any], +) -> Dict[Tuple[str, int], Any]: + """ + Statically infer the status of each blob, the status can be such as device type + (CPU/GPU), layout (NCHW/NHWC), data type (float32/int8), etc. "Blob" here + is versioned blob (Tuple[str, int]) in the format compatible with ssa. + Inputs: + predict_net: the caffe2 network + status_updater: a callable, given an op and the status of its input/output, + it returns the updated status of input/output. `None` is used for + representing unknown status. + known_status: a dict containing known status, used as initialization. + Outputs: + A dict mapping from versioned blob to its status + """ + ssa, versions = core.get_ssa(predict_net) + versioned_ext_input = [(b, 0) for b in predict_net.external_input] + versioned_ext_output = [(b, versions[b]) for b in predict_net.external_output] + all_versioned_blobs = set().union(*[set(x[0] + x[1]) for x in ssa]) + + allowed_vbs = all_versioned_blobs.union(versioned_ext_input).union(versioned_ext_output) + assert all(k in allowed_vbs for k in known_status) + assert all(v is not None for v in known_status.values()) + _known_status = copy.deepcopy(known_status) + + def _check_and_update(key, value): + assert value is not None + if key in _known_status: + if not _known_status[key] == value: + raise RuntimeError( + "Confilict status for {}, existing status {}, new status {}".format( + key, _known_status[key], value + ) + ) + _known_status[key] = value + + def _update_i(op, ssa_i): + versioned_inputs = ssa_i[0] + versioned_outputs = ssa_i[1] + + inputs_status = [_known_status.get(b, None) for b in versioned_inputs] + outputs_status = [_known_status.get(b, None) for b in versioned_outputs] + + new_inputs_status, new_outputs_status = status_updater(op, inputs_status, outputs_status) + + for versioned_blob, status in zip( + versioned_inputs + versioned_outputs, new_inputs_status + new_outputs_status + ): + if status is not None: + _check_and_update(versioned_blob, status) + + for op, ssa_i in zip(predict_net.op, ssa): + _update_i(op, ssa_i) + for op, ssa_i in zip(reversed(predict_net.op), reversed(ssa)): + _update_i(op, ssa_i) + + # NOTE: This strictly checks all the blob from predict_net must be assgined + # a known status. However sometimes it's impossible (eg. having deadend op), + # we may relax this constraint if + for k in all_versioned_blobs: + if k not in _known_status: + raise NotImplementedError( + "Can not infer the status for {}. Currently only support the case where" + " a single forward and backward pass can identify status for all blobs.".format(k) + ) + + return _known_status + + +def infer_device_type( + predict_net: caffe2_pb2.NetDef, + known_status: Dict[Tuple[str, int], Any], + device_name_style: str = "caffe2", +) -> Dict[Tuple[str, int], str]: + """ Return the device type ("cpu" or "gpu"/"cuda") of each (versioned) blob """ + + assert device_name_style in ["caffe2", "pytorch"] + _CPU_STR = "cpu" + _GPU_STR = "gpu" if device_name_style == "caffe2" else "cuda" + + def _copy_cpu_to_gpu_updater(op, input_types, output_types): + if input_types[0] == _GPU_STR or output_types[0] == _CPU_STR: + _updater_raise(op, input_types, output_types) + return ([_CPU_STR], [_GPU_STR]) + + def _copy_gpu_to_cpu_updater(op, input_types, output_types): + if input_types[0] == _CPU_STR or output_types[0] == _GPU_STR: + _updater_raise(op, input_types, output_types) + return ([_GPU_STR], [_CPU_STR]) + + def _other_ops_updater(op, input_types, output_types): + non_none_types = [x for x in input_types + output_types if x is not None] + if len(non_none_types) > 0: + the_type = non_none_types[0] + if not all(x == the_type for x in non_none_types): + _updater_raise(op, input_types, output_types) + else: + the_type = None + return ([the_type for _ in op.input], [the_type for _ in op.output]) + + def _device_updater(op, *args, **kwargs): + return { + "CopyCPUToGPU": _copy_cpu_to_gpu_updater, + "CopyGPUToCPU": _copy_gpu_to_cpu_updater, + }.get(op.type, _other_ops_updater)(op, *args, **kwargs) + + return _generic_status_identifier(predict_net, _device_updater, known_status) + + +# ==== torch/utils_caffe2/vis.py =============================================== + + +def _modify_blob_names(ops, blob_rename_f): + ret = [] + + def _replace_list(blob_list, replaced_list): + del blob_list[:] + blob_list.extend(replaced_list) + + for x in ops: + cur = copy.deepcopy(x) + _replace_list(cur.input, list(map(blob_rename_f, cur.input))) + _replace_list(cur.output, list(map(blob_rename_f, cur.output))) + ret.append(cur) + + return ret + + +def _rename_blob(name, blob_sizes, blob_ranges): + def _list_to_str(bsize): + ret = ", ".join([str(x) for x in bsize]) + ret = "[" + ret + "]" + return ret + + ret = name + if blob_sizes is not None and name in blob_sizes: + ret += "\n" + _list_to_str(blob_sizes[name]) + if blob_ranges is not None and name in blob_ranges: + ret += "\n" + _list_to_str(blob_ranges[name]) + + return ret + + +# graph_name could not contain word 'graph' +def save_graph(net, file_name, graph_name="net", op_only=True, blob_sizes=None, blob_ranges=None): + blob_rename_f = functools.partial(_rename_blob, blob_sizes=blob_sizes, blob_ranges=blob_ranges) + return save_graph_base(net, file_name, graph_name, op_only, blob_rename_f) + + +def save_graph_base(net, file_name, graph_name="net", op_only=True, blob_rename_func=None): + graph = None + ops = net.op + if blob_rename_func is not None: + ops = _modify_blob_names(ops, blob_rename_func) + if not op_only: + graph = net_drawer.GetPydotGraph(ops, graph_name, rankdir="TB") + else: + graph = net_drawer.GetPydotGraphMinimal( + ops, graph_name, rankdir="TB", minimal_dependency=True + ) + + try: + par_dir = os.path.dirname(file_name) + if not os.path.exists(par_dir): + os.makedirs(par_dir) + + format = os.path.splitext(os.path.basename(file_name))[-1] + if format == ".png": + graph.write_png(file_name) + elif format == ".pdf": + graph.write_pdf(file_name) + elif format == ".svg": + graph.write_svg(file_name) + else: + print("Incorrect format {}".format(format)) + except Exception as e: + print("Error when writing graph to image {}".format(e)) + + return graph + + +# ==== torch/utils_toffee/aten_to_caffe2.py ==================================== + + +def group_norm_replace_aten_with_caffe2(predict_net: caffe2_pb2.NetDef): + """ + For ONNX exported model, GroupNorm will be represented as ATen op, + this can be a drop in replacement from ATen to GroupNorm + """ + count = 0 + for op in predict_net.op: + if op.type == "ATen": + op_name = get_pb_arg_vals(op, "operator", None) # return byte in py3 + if op_name and op_name.decode() == "group_norm": + op.arg.remove(get_pb_arg(op, "operator")) + + if get_pb_arg_vali(op, "cudnn_enabled", None): + op.arg.remove(get_pb_arg(op, "cudnn_enabled")) + + num_groups = get_pb_arg_vali(op, "num_groups", None) + if num_groups is not None: + op.arg.remove(get_pb_arg(op, "num_groups")) + check_set_pb_arg(op, "group", "i", num_groups) + + op.type = "GroupNorm" + count += 1 + if count > 1: + logger.info("Replaced {} ATen operator to GroupNormOp".format(count)) + + +# ==== torch/utils_toffee/alias.py ============================================= + + +def alias(x, name, is_backward=False): + if not torch.onnx.is_in_onnx_export(): + return x + assert isinstance(x, torch.Tensor) + return torch.ops._caffe2.AliasWithName(x, name, is_backward=is_backward) + + +def fuse_alias_placeholder(predict_net, init_net): + """ Remove AliasWithName placeholder and rename the input/output of it """ + # First we finish all the re-naming + for i, op in enumerate(predict_net.op): + if op.type == "AliasWithName": + assert len(op.input) == 1 + assert len(op.output) == 1 + name = get_pb_arg_vals(op, "name", None).decode() + is_backward = bool(get_pb_arg_vali(op, "is_backward", 0)) + rename_op_input(predict_net, init_net, i, 0, name, from_producer=is_backward) + rename_op_output(predict_net, i, 0, name) + + # Remove AliasWithName, should be very safe since it's a non-op + new_ops = [] + for op in predict_net.op: + if op.type != "AliasWithName": + new_ops.append(op) + else: + # safety check + assert op.input == op.output + assert op.input[0] == op.arg[0].s.decode() + del predict_net.op[:] + predict_net.op.extend(new_ops) + + +# ==== torch/utils_caffe2/graph_transform.py =================================== + + +class IllegalGraphTransformError(ValueError): + """ When a graph transform function call can't be executed. """ + + +def _rename_versioned_blob_in_proto( + proto: caffe2_pb2.NetDef, + old_name: str, + new_name: str, + version: int, + ssa: List[Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]], + start_versions: Dict[str, int], + end_versions: Dict[str, int], +): + """ In given proto, rename all blobs with matched version """ + # Operater list + for op, i_th_ssa in zip(proto.op, ssa): + versioned_inputs, versioned_outputs = i_th_ssa + for i in range(len(op.input)): + if versioned_inputs[i] == (old_name, version): + op.input[i] = new_name + for i in range(len(op.output)): + if versioned_outputs[i] == (old_name, version): + op.output[i] = new_name + # external_input + if start_versions.get(old_name, 0) == version: + for i in range(len(proto.external_input)): + if proto.external_input[i] == old_name: + proto.external_input[i] = new_name + # external_output + if end_versions.get(old_name, 0) == version: + for i in range(len(proto.external_output)): + if proto.external_output[i] == old_name: + proto.external_output[i] = new_name + + +def rename_op_input( + predict_net: caffe2_pb2.NetDef, + init_net: caffe2_pb2.NetDef, + op_id: int, + input_id: int, + new_name: str, + from_producer: bool = False, +): + """ + Rename the op_id-th operator in predict_net, change it's input_id-th input's + name to the new_name. It also does automatic re-route and change + external_input and init_net if necessary. + - It requires the input is only consumed by this op. + - This function modifies predict_net and init_net in-place. + - When from_producer is enable, this also updates other operators that consumes + the same input. Be cautious because may trigger unintended behavior. + """ + assert isinstance(predict_net, caffe2_pb2.NetDef) + assert isinstance(init_net, caffe2_pb2.NetDef) + + init_net_ssa, init_net_versions = core.get_ssa(init_net) + predict_net_ssa, predict_net_versions = core.get_ssa( + predict_net, copy.deepcopy(init_net_versions) + ) + + versioned_inputs, versioned_outputs = predict_net_ssa[op_id] + old_name, version = versioned_inputs[input_id] + + if from_producer: + producer_map = get_producer_map(predict_net_ssa) + if not (old_name, version) in producer_map: + raise NotImplementedError( + "Can't find producer, the input {} is probably from" + " init_net, this is not supported yet.".format(old_name) + ) + producer = producer_map[(old_name, version)] + rename_op_output(predict_net, producer[0], producer[1], new_name) + return + + def contain_targets(op_ssa): + return (old_name, version) in op_ssa[0] + + is_consumer = [contain_targets(op_ssa) for op_ssa in predict_net_ssa] + if sum(is_consumer) > 1: + raise IllegalGraphTransformError( + ( + "Input '{}' of operator(#{}) are consumed by other ops, please use" + + " rename_op_output on the producer instead. Offending op: \n{}" + ).format(old_name, op_id, predict_net.op[op_id]) + ) + + # update init_net + _rename_versioned_blob_in_proto( + init_net, old_name, new_name, version, init_net_ssa, {}, init_net_versions + ) + # update predict_net + _rename_versioned_blob_in_proto( + predict_net, + old_name, + new_name, + version, + predict_net_ssa, + init_net_versions, + predict_net_versions, + ) + + +def rename_op_output(predict_net: caffe2_pb2.NetDef, op_id: int, output_id: int, new_name: str): + """ + Rename the op_id-th operator in predict_net, change it's output_id-th input's + name to the new_name. It also does automatic re-route and change + external_output and if necessary. + - It allows multiple consumers of its output. + - This function modifies predict_net in-place, doesn't need init_net. + """ + assert isinstance(predict_net, caffe2_pb2.NetDef) + + ssa, blob_versions = core.get_ssa(predict_net) + + versioned_inputs, versioned_outputs = ssa[op_id] + old_name, version = versioned_outputs[output_id] + + # update predict_net + _rename_versioned_blob_in_proto( + predict_net, old_name, new_name, version, ssa, {}, blob_versions + ) + + +def get_sub_graph_external_input_output( + predict_net: caffe2_pb2.NetDef, sub_graph_op_indices: List[int] +) -> Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]: + """ + Return the list of external input/output of sub-graph, + each element is tuple of the name and corresponding version in predict_net. + + external input/output is defined the same way as caffe2 NetDef. + """ + ssa, versions = core.get_ssa(predict_net) + + all_inputs = [] + all_outputs = [] + for op_id in sub_graph_op_indices: + all_inputs += [inp for inp in ssa[op_id][0] if inp not in all_inputs] + all_outputs += list(ssa[op_id][1]) # ssa output won't repeat + + # for versioned blobs, external inputs are just those blob in all_inputs + # but not in all_outputs + ext_inputs = [inp for inp in all_inputs if inp not in all_outputs] + + # external outputs are essentially outputs of this subgraph that are used + # outside of this sub-graph (including predict_net.external_output) + all_other_inputs = sum( + (ssa[i][0] for i in range(len(ssa)) if i not in sub_graph_op_indices), + [(outp, versions[outp]) for outp in predict_net.external_output], + ) + ext_outputs = [outp for outp in all_outputs if outp in set(all_other_inputs)] + + return ext_inputs, ext_outputs + + +class DiGraph: + """ A DAG representation of caffe2 graph, each vertice is a versioned blob. """ + + def __init__(self): + self.vertices = set() + self.graph = collections.defaultdict(list) + + def add_edge(self, u, v): + self.graph[u].append(v) + self.vertices.add(u) + self.vertices.add(v) + + # grab from https://www.geeksforgeeks.org/find-paths-given-source-destination/ + def get_all_paths(self, s, d): + visited = {k: False for k in self.vertices} + path = [] + all_paths = [] + + def _get_all_paths_util(graph, u, d, visited, path): + visited[u] = True + path.append(u) + if u == d: + all_paths.append(copy.deepcopy(path)) + else: + for i in graph[u]: + if not visited[i]: + _get_all_paths_util(graph, i, d, visited, path) + path.pop() + visited[u] = False + + _get_all_paths_util(self.graph, s, d, visited, path) + return all_paths + + @staticmethod + def from_ssa(ssa): + graph = DiGraph() + for op_id in range(len(ssa)): + for inp in ssa[op_id][0]: + for outp in ssa[op_id][1]: + graph.add_edge(inp, outp) + return graph + + +def _get_dependency_chain(ssa, versioned_target, versioned_source): + """ + Return the index list of relevant operator to produce target blob from source blob, + if there's no dependency, return empty list. + """ + + # finding all paths between nodes can be O(N!), thus we can only search + # in the subgraph using the op starting from the first consumer of source blob + # to the producer of the target blob. + consumer_map = get_consumer_map(ssa) + producer_map = get_producer_map(ssa) + start_op = min(x[0] for x in consumer_map[versioned_source]) - 15 + end_op = ( + producer_map[versioned_target][0] + 15 if versioned_target in producer_map else start_op + ) + sub_graph_ssa = ssa[start_op : end_op + 1] + if len(sub_graph_ssa) > 30: + logger.warning( + "Subgraph bebetween {} and {} is large (from op#{} to op#{}), it" + " might take non-trival time to find all paths between them.".format( + versioned_source, versioned_target, start_op, end_op + ) + ) + + dag = DiGraph.from_ssa(sub_graph_ssa) + paths = dag.get_all_paths(versioned_source, versioned_target) # include two ends + ops_in_paths = [[producer_map[blob][0] for blob in path[1:]] for path in paths] + return sorted(set().union(*[set(ops) for ops in ops_in_paths])) + + +def identify_reshape_sub_graph(predict_net: caffe2_pb2.NetDef) -> List[List[int]]: + """ + Idenfity the reshape sub-graph in a protobuf. + The reshape sub-graph is defined as matching the following pattern: + + (input_blob) -> Op_1 -> ... -> Op_N -> (new_shape) -─┐ + └-------------------------------------------> Reshape -> (output_blob) + + Return: + List of sub-graphs, each sub-graph is represented as a list of indices + of the relavent ops, [Op_1, Op_2, ..., Op_N, Reshape] + """ + + ssa, _ = core.get_ssa(predict_net) + + ret = [] + for i, op in enumerate(predict_net.op): + if op.type == "Reshape": + assert len(op.input) == 2 + input_ssa = ssa[i][0] + data_source = input_ssa[0] + shape_source = input_ssa[1] + op_indices = _get_dependency_chain(ssa, shape_source, data_source) + ret.append(op_indices + [i]) + return ret + + +def remove_reshape_for_fc(predict_net, params): + """ + In PyTorch nn.Linear has to take 2D tensor, this often leads to reshape + a 4D tensor to 2D by calling .view(). However this (dynamic) reshaping + doesn't work well with ONNX and Int8 tools, and cause using extra + ops (eg. ExpandDims) that might not be available on mobile. + Luckily Caffe2 supports 4D tensor for FC, so we can remove those reshape + after exporting ONNX model. + """ + from caffe2.python import core + + # find all reshape sub-graph that can be removed, which is now all Reshape + # sub-graph whose output is only consumed by FC. + # TODO: to make it safer, we may need the actually value to better determine + # if a Reshape before FC is removable. + reshape_sub_graphs = identify_reshape_sub_graph(predict_net) + sub_graphs_to_remove = [] + for reshape_sub_graph in reshape_sub_graphs: + reshape_op_id = reshape_sub_graph[-1] + assert predict_net.op[reshape_op_id].type == "Reshape" + ssa, _ = core.get_ssa(predict_net) + reshape_output = ssa[reshape_op_id][1][0] + consumers = [i for i in range(len(ssa)) if reshape_output in ssa[i][0]] + if all(predict_net.op[consumer].type == "FC" for consumer in consumers): + # safety check if the sub-graph is isolated, for this reshape sub-graph, + # it means it has one non-param external input and one external output. + ext_inputs, ext_outputs = get_sub_graph_external_input_output( + predict_net, reshape_sub_graph + ) + non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0] + if len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1: + sub_graphs_to_remove.append(reshape_sub_graph) + + # perform removing subgraph by: + # 1: rename the Reshape's output to its input, then the graph can be + # seen as in-place itentify, meaning whose external input/output are the same. + # 2: simply remove those ops. + remove_op_ids = [] + params_to_remove = [] + for sub_graph in sub_graphs_to_remove: + logger.info( + "Remove Reshape sub-graph:\n{}".format( + "".join(["(#{:>4})\n{}".format(i, predict_net.op[i]) for i in sub_graph]) + ) + ) + reshape_op_id = sub_graph[-1] + new_reshap_output = predict_net.op[reshape_op_id].input[0] + rename_op_output(predict_net, reshape_op_id, 0, new_reshap_output) + ext_inputs, ext_outputs = get_sub_graph_external_input_output(predict_net, sub_graph) + non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0] + params_ext_inputs = [inp for inp in ext_inputs if inp[1] == 0] + assert len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1 + assert ext_outputs[0][0] == non_params_ext_inputs[0][0] + assert ext_outputs[0][1] == non_params_ext_inputs[0][1] + 1 + remove_op_ids.extend(sub_graph) + params_to_remove.extend(params_ext_inputs) + + predict_net = copy.deepcopy(predict_net) + new_ops = [op for i, op in enumerate(predict_net.op) if i not in remove_op_ids] + del predict_net.op[:] + predict_net.op.extend(new_ops) + for versioned_params in params_to_remove: + name = versioned_params[0] + logger.info("Remove params: {} from init_net and predict_net.external_input".format(name)) + del params[name] + predict_net.external_input.remove(name) + + return predict_net, params + + +def fuse_copy_between_cpu_and_gpu(predict_net: caffe2_pb2.NetDef): + """ + In-place fuse extra copy ops between cpu/gpu for the following case: + a -CopyAToB-> b -CopyBToA> c1 -NextOp1-> d1 + -CopyBToA> c2 -NextOp2-> d2 + The fused network will look like: + a -NextOp1-> d1 + -NextOp2-> d2 + """ + + _COPY_OPS = ["CopyCPUToGPU", "CopyGPUToCPU"] + + def _fuse_once(predict_net): + ssa, blob_versions = core.get_ssa(predict_net) + consumer_map = get_consumer_map(ssa) + versioned_external_output = [ + (name, blob_versions[name]) for name in predict_net.external_output + ] + + for op_id, op in enumerate(predict_net.op): + if op.type in _COPY_OPS: + fw_copy_versioned_output = ssa[op_id][1][0] + consumer_ids = [x[0] for x in consumer_map[fw_copy_versioned_output]] + reverse_op_type = _COPY_OPS[1 - _COPY_OPS.index(op.type)] + + is_fusable = ( + len(consumer_ids) > 0 + and fw_copy_versioned_output not in versioned_external_output + and all( + predict_net.op[_op_id].type == reverse_op_type + and ssa[_op_id][1][0] not in versioned_external_output + for _op_id in consumer_ids + ) + ) + + if is_fusable: + for rv_copy_op_id in consumer_ids: + # making each NextOp uses "a" directly and removing Copy ops + rs_copy_versioned_output = ssa[rv_copy_op_id][1][0] + next_op_id, inp_id = consumer_map[rs_copy_versioned_output][0] + predict_net.op[next_op_id].input[inp_id] = op.input[0] + # remove CopyOps + new_ops = [ + op + for i, op in enumerate(predict_net.op) + if i != op_id and i not in consumer_ids + ] + del predict_net.op[:] + predict_net.op.extend(new_ops) + return True + + return False + + # _fuse_once returns False is nothing can be fused + while _fuse_once(predict_net): + pass + + +def remove_dead_end_ops(net_def: caffe2_pb2.NetDef): + """ remove ops if its output is not used or not in external_output """ + ssa, versions = core.get_ssa(net_def) + versioned_external_output = [(name, versions[name]) for name in net_def.external_output] + consumer_map = get_consumer_map(ssa) + removed_op_ids = set() + + def _is_dead_end(versioned_blob): + return not ( + versioned_blob in versioned_external_output + or ( + len(consumer_map[versioned_blob]) > 0 + and all(x[0] not in removed_op_ids for x in consumer_map[versioned_blob]) + ) + ) + + for i, ssa_i in reversed(list(enumerate(ssa))): + versioned_outputs = ssa_i[1] + if all(_is_dead_end(outp) for outp in versioned_outputs): + removed_op_ids.add(i) + + # simply removing those deadend ops should have no effect to external_output + new_ops = [op for i, op in enumerate(net_def.op) if i not in removed_op_ids] + del net_def.op[:] + net_def.op.extend(new_ops) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/torchscript.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/torchscript.py new file mode 100644 index 0000000000000000000000000000000000000000..edfc0e8d67624f8d941ceca8eddc6cc1ab7592bc --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/export/torchscript.py @@ -0,0 +1,167 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib.util +import os +import sys +import tempfile +from contextlib import contextmanager +from typing import Dict +import torch + +# fmt: off +from detectron2.modeling.proposal_generator import RPN +# need an explicit import due to https://github.com/pytorch/pytorch/issues/38964 +from detectron2.structures import Boxes, Instances # noqa F401 + +# fmt: on + +_counter = 0 + + +def export_torchscript_with_instances(model, fields): + """ + Run :func:`torch.jit.script` on a model that uses the :class:`Instances` class. Since + attributes of :class:`Instances` are "dynamically" added in eager mode,it is difficult + for torchscript to support it out of the box. This function is made to support scripting + a model that uses :class:`Instances`. It does the following: + + 1. Create a scriptable ``new_Instances`` class which behaves similarly to ``Instances``, + but with all attributes been "static". + The attributes need to be statically declared in the ``fields`` argument. + 2. Register ``new_Instances`` to torchscript, and force torchscript to + use it when trying to compile ``Instances``. + + After this function, the process will be reverted. User should be able to script another model + using different fields. + + Example: + Assume that ``Instances`` in the model consist of two attributes named + ``proposal_boxes`` and ``objectness_logits`` with type :class:`Boxes` and + :class:`Tensor` respectively during inference. You can call this function like: + + :: + fields = {"proposal_boxes": "Boxes", "objectness_logits": "Tensor"} + torchscipt_model = export_torchscript_with_instances(model, fields) + + Args: + model (nn.Module): The input model to be exported to torchscript. + fields (Dict[str, str]): Attribute names and corresponding type annotations that + ``Instances`` will use in the model. Note that all attributes used in ``Instances`` + need to be added, regarldess of whether they are inputs/outputs of the model. + Custom data type is not supported for now. + + Returns: + torch.jit.ScriptModule: the input model in torchscript format + """ + with patch_instances(fields): + + # Also add some other hacks for torchscript: + # boolean as dictionary keys is unsupported: + # https://github.com/pytorch/pytorch/issues/41449 + # We annotate it this way to let torchscript interpret them as integers. + RPN.__annotations__["pre_nms_topk"] = Dict[int, int] + RPN.__annotations__["post_nms_topk"] = Dict[int, int] + + scripted_model = torch.jit.script(model) + return scripted_model + + +@contextmanager +def patch_instances(fields): + with tempfile.TemporaryDirectory(prefix="detectron2") as dir, tempfile.NamedTemporaryFile( + mode="w", encoding="utf-8", suffix=".py", dir=dir, delete=False + ) as f: + try: + cls_name, s = _gen_module(fields) + f.write(s) + f.flush() + f.close() + + module = _import(f.name) + new_instances = getattr(module, cls_name) + _ = torch.jit.script(new_instances) + + # let torchscript think Instances was scripted already + Instances.__torch_script_class__ = True + # let torchscript find new_instances when looking for the jit type of Instances + Instances._jit_override_qualname = torch._jit_internal._qualified_name(new_instances) + yield new_instances + finally: + try: + del Instances.__torch_script_class__ + del Instances._jit_override_qualname + except AttributeError: + pass + sys.modules.pop(module.__name__) + + +# TODO: find a more automatic way to enable import of other classes +def _gen_imports(): + imports_str = """ +import torch +from torch import Tensor +import typing +from typing import * + +from detectron2.structures import Boxes + +""" + return imports_str + + +def _gen_class(fields): + def indent(level, s): + return " " * 4 * level + s + + lines = [] + + global _counter + _counter += 1 + + cls_name = "Instances_patched{}".format(_counter) + + lines.append( + f""" +class {cls_name}: + def __init__(self, image_size: Tuple[int, int]): + self.image_size = image_size +""" + ) + + for name, type_ in fields.items(): + lines.append(indent(2, f"self.{name} = torch.jit.annotate(Optional[{type_}], None)")) + # TODO add getter/setter when @property is supported + + return cls_name, os.linesep.join(lines) + + +def _gen_module(fields): + s = "" + s += _gen_imports() + cls_name, cls_def = _gen_class(fields) + s += cls_def + return cls_name, s + + +def _import(path): + # https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly + spec = importlib.util.spec_from_file_location( + "{}{}".format(sys.modules[__name__].__name__, _counter), path + ) + module = importlib.util.module_from_spec(spec) + sys.modules[module.__name__] = module + spec.loader.exec_module(module) + return module diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fec5f4890c3c68a632cdd4df6c81c00576bad210 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/__init__.py @@ -0,0 +1,28 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm +from .deform_conv import DeformConv, ModulatedDeformConv +from .mask_ops import paste_masks_in_image +from .nms import batched_nms +from .nms import batched_nms_rotated +from .nms import nms_rotated,batched_nms_npu +from .roi_align import ROIAlign, roi_align +from .roi_align_rotated import ROIAlignRotated, roi_align_rotated +from .shape_spec import ShapeSpec +from .wrappers import BatchNorm2d, Conv2d, ConvTranspose2d, cat, interpolate, Linear, nonzero_tuple +from .blocks import CNNBlockBase +from .aspp import ASPP + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/aspp.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/aspp.py new file mode 100644 index 0000000000000000000000000000000000000000..86fd9f70895d6d6070d64713feaf362c532c0d05 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/aspp.py @@ -0,0 +1,136 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fvcore.nn.weight_init as weight_init +import torch +from torch import nn +from torch.nn import functional as F + +from .batch_norm import get_norm +from .wrappers import Conv2d + + +class ASPP(nn.Module): + """ + Atrous Spatial Pyramid Pooling (ASPP). + """ + + def __init__( + self, + in_channels, + out_channels, + dilations, + norm, + activation, + pool_kernel_size=None, + dropout: float = 0.0, + ): + """ + Args: + in_channels (int): number of input channels for ASPP. + out_channels (int): number of output channels. + dilations (list): a list of 3 dilations in ASPP. + norm (str or callable): normalization for all conv layers. + See :func:`layers.get_norm` for supported format. norm is + applied to all conv layers except the conv following + global average pooling. + activation (callable): activation function. + pool_kernel_size (tuple, list): the average pooling size (kh, kw) + for image pooling layer in ASPP. If set to None, it always + performs global average pooling. If not None, it must be + divisible by the shape of inputs in forward(). It is recommended + to use a fixed input feature size in training, and set this + option to match this size, so that it performs global average + pooling in training, and the size of the pooling window stays + consistent in inference. + dropout (float): apply dropout on the output of ASPP. It is used in + the official DeepLab implementation with a rate of 0.1: + https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/model.py#L532 # noqa + """ + super(ASPP, self).__init__() + assert len(dilations) == 3, "ASPP expects 3 dilations, got {}".format(len(dilations)) + self.pool_kernel_size = pool_kernel_size + self.dropout = dropout + use_bias = norm == "" + self.convs = nn.ModuleList() + # conv 1x1 + self.convs.append( + Conv2d( + in_channels, + out_channels, + kernel_size=1, + bias=use_bias, + norm=get_norm(norm, out_channels), + activation=activation, + ) + ) + weight_init.c2_xavier_fill(self.convs[-1]) + # atrous convs + for dilation in dilations: + self.convs.append( + Conv2d( + in_channels, + out_channels, + kernel_size=3, + padding=dilation, + dilation=dilation, + bias=use_bias, + norm=get_norm(norm, out_channels), + activation=activation, + ) + ) + weight_init.c2_xavier_fill(self.convs[-1]) + # image pooling + # We do not add BatchNorm because the spatial resolution is 1x1, + # the original TF implementation has BatchNorm. + if pool_kernel_size is None: + image_pooling = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + Conv2d(in_channels, out_channels, 1, bias=True, activation=activation), + ) + else: + image_pooling = nn.Sequential( + nn.AvgPool2d(kernel_size=pool_kernel_size, stride=1), + Conv2d(in_channels, out_channels, 1, bias=True, activation=activation), + ) + weight_init.c2_xavier_fill(image_pooling[1]) + self.convs.append(image_pooling) + + self.project = Conv2d( + 5 * out_channels, + out_channels, + kernel_size=1, + bias=use_bias, + norm=get_norm(norm, out_channels), + activation=activation, + ) + weight_init.c2_xavier_fill(self.project) + + def forward(self, x): + size = x.shape[-2:] + if self.pool_kernel_size is not None: + if size[0] % self.pool_kernel_size[0] or size[1] % self.pool_kernel_size[1]: + raise ValueError( + "`pool_kernel_size` must be divisible by the shape of inputs. " + "Input size: {} `pool_kernel_size`: {}".format(size, self.pool_kernel_size) + ) + res = [] + for conv in self.convs: + res.append(conv(x)) + res[-1] = F.interpolate(res[-1], size=size, mode="bilinear", align_corners=False) + res = torch.cat(res, dim=1) + res = self.project(res) + res = F.dropout(res, self.dropout, training=self.training) if self.dropout > 0 else res + return res diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/batch_norm.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/batch_norm.py new file mode 100644 index 0000000000000000000000000000000000000000..82a6b8c987c90201fdaefd052d6f24d577ec7ad6 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/batch_norm.py @@ -0,0 +1,253 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import torch +import torch.distributed as dist +from torch import nn +from torch.autograd.function import Function +from torch.nn import functional as F + +from detectron2.utils import comm, env + +from .wrappers import BatchNorm2d + + +class FrozenBatchNorm2d(nn.Module): + """ + BatchNorm2d where the batch statistics and the affine parameters are fixed. + + It contains non-trainable buffers called + "weight" and "bias", "running_mean", "running_var", + initialized to perform identity transformation. + + The pre-trained backbone models from Caffe2 only contain "weight" and "bias", + which are computed from the original four parameters of BN. + The affine transform `x * weight + bias` will perform the equivalent + computation of `(x - running_mean) / sqrt(running_var) * weight + bias`. + When loading a backbone model from Caffe2, "running_mean" and "running_var" + will be left unchanged as identity transformation. + + Other pre-trained backbone models may contain all 4 parameters. + + The forward is implemented by `F.batch_norm(..., training=False)`. + """ + + _version = 3 + + def __init__(self, num_features, eps=1e-5): + super().__init__() + self.num_features = num_features + self.eps = eps + self.register_buffer("weight", torch.ones(num_features)) + self.register_buffer("bias", torch.zeros(num_features)) + self.register_buffer("running_mean", torch.zeros(num_features)) + self.register_buffer("running_var", torch.ones(num_features) - eps) + + def forward(self, x): + if x.requires_grad: + # When gradients are needed, F.batch_norm will use extra memory + # because its backward op computes gradients for weight/bias as well. + scale = self.weight * (self.running_var + self.eps).rsqrt() + bias = self.bias - self.running_mean * scale + scale = scale.reshape(1, -1, 1, 1) + bias = bias.reshape(1, -1, 1, 1) + return x * scale + bias + else: + # When gradients are not needed, F.batch_norm is a single fused op + # and provide more optimization opportunities. + return F.batch_norm( + x, + self.running_mean, + self.running_var, + self.weight, + self.bias, + training=False, + eps=self.eps, + ) + + def _load_from_state_dict( + self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ): + version = local_metadata.get("version", None) + + if version is None or version < 2: + # No running_mean/var in early versions + # This will silent the warnings + if prefix + "running_mean" not in state_dict: + state_dict[prefix + "running_mean"] = torch.zeros_like(self.running_mean) + if prefix + "running_var" not in state_dict: + state_dict[prefix + "running_var"] = torch.ones_like(self.running_var) + + if version is not None and version < 3: + logger = logging.getLogger(__name__) + logger.info("FrozenBatchNorm {} is upgraded to version 3.".format(prefix.rstrip("."))) + # In version < 3, running_var are used without +eps. + state_dict[prefix + "running_var"] -= self.eps + + super()._load_from_state_dict( + state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ) + + def __repr__(self): + return "FrozenBatchNorm2d(num_features={}, eps={})".format(self.num_features, self.eps) + + @classmethod + def convert_frozen_batchnorm(cls, module): + """ + Convert BatchNorm/SyncBatchNorm in module into FrozenBatchNorm. + + Args: + module (torch.nn.Module): + + Returns: + If module is BatchNorm/SyncBatchNorm, returns a new module. + Otherwise, in-place convert module and return it. + + Similar to convert_sync_batchnorm in + https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py + """ + bn_module = nn.modules.batchnorm + bn_module = (bn_module.BatchNorm2d, bn_module.SyncBatchNorm) + res = module + if isinstance(module, bn_module): + res = cls(module.num_features) + if module.affine: + res.weight.data = module.weight.data.clone().detach() + res.bias.data = module.bias.data.clone().detach() + res.running_mean.data = module.running_mean.data + res.running_var.data = module.running_var.data + res.eps = module.eps + else: + for name, child in module.named_children(): + new_child = cls.convert_frozen_batchnorm(child) + if new_child is not child: + res.add_module(name, new_child) + return res + + +def get_norm(norm, out_channels): + """ + Args: + norm (str or callable): either one of BN, SyncBN, FrozenBN, GN; + or a callable that takes a channel number and returns + the normalization layer as a nn.Module. + + Returns: + nn.Module or None: the normalization layer + """ + if isinstance(norm, str): + if len(norm) == 0: + return None + norm = { + "BN": BatchNorm2d, + # Fixed in https://github.com/pytorch/pytorch/pull/36382 + "SyncBN": NaiveSyncBatchNorm if env.TORCH_VERSION <= (1, 5) else nn.SyncBatchNorm, + "FrozenBN": FrozenBatchNorm2d, + "GN": lambda channels: nn.GroupNorm(32, channels), + # for debugging: + "nnSyncBN": nn.SyncBatchNorm, + "naiveSyncBN": NaiveSyncBatchNorm, + }[norm] + return norm(out_channels) + + +class AllReduce(Function): + @staticmethod + def forward(ctx, input): + input_list = [torch.zeros_like(input) for k in range(dist.get_world_size())] + # Use allgather instead of allreduce since I don't trust in-place operations .. + dist.all_gather(input_list, input, async_op=False) + inputs = torch.stack(input_list, dim=0) + return torch.sum(inputs, dim=0) + + @staticmethod + def backward(ctx, grad_output): + dist.all_reduce(grad_output, async_op=False) + return grad_output + + +class NaiveSyncBatchNorm(BatchNorm2d): + """ + In PyTorch<=1.5, ``nn.SyncBatchNorm`` has incorrect gradient + when the batch size on each worker is different. + (e.g., when scale augmentation is used, or when it is applied to mask head). + + This is a slower but correct alternative to `nn.SyncBatchNorm`. + + Note: + There isn't a single definition of Sync BatchNorm. + + When ``stats_mode==""``, this module computes overall statistics by using + statistics of each worker with equal weight. The result is true statistics + of all samples (as if they are all on one worker) only when all workers + have the same (N, H, W). This mode does not support inputs with zero batch size. + + When ``stats_mode=="N"``, this module computes overall statistics by weighting + the statistics of each worker by their ``N``. The result is true statistics + of all samples (as if they are all on one worker) only when all workers + have the same (H, W). It is slower than ``stats_mode==""``. + + Even though the result of this module may not be the true statistics of all samples, + it may still be reasonable because it might be preferrable to assign equal weights + to all workers, regardless of their (H, W) dimension, instead of putting larger weight + on larger images. From preliminary experiments, little difference is found between such + a simplified implementation and an accurate computation of overall mean & variance. + """ + + def __init__(self, *args, stats_mode="", **kwargs): + super().__init__(*args, **kwargs) + assert stats_mode in ["", "N"] + self._stats_mode = stats_mode + + def forward(self, input): + if comm.get_world_size() == 1 or not self.training: + return super().forward(input) + + B, C = input.shape[0], input.shape[1] + + mean = torch.mean(input, dim=[0, 2, 3]) + meansqr = torch.mean(input * input, dim=[0, 2, 3]) + + if self._stats_mode == "": + assert B > 0, 'SyncBatchNorm(stats_mode="") does not support zero batch size.' + vec = torch.cat([mean, meansqr], dim=0) + vec = AllReduce.apply(vec) * (1.0 / dist.get_world_size()) + mean, meansqr = torch.split(vec, C) + momentum = self.momentum + else: + if B == 0: + vec = torch.zeros([2 * C + 1], device=mean.device, dtype=mean.dtype) + vec = vec + input.sum() # make sure there is gradient w.r.t input + else: + vec = torch.cat( + [mean, meansqr, torch.ones([1], device=mean.device, dtype=mean.dtype)], dim=0 + ) + vec = AllReduce.apply(vec * B) + + total_batch = vec[-1].detach() + momentum = total_batch.clamp(max=1) * self.momentum # no update if total_batch is 0 + total_batch = torch.max(total_batch, torch.ones_like(total_batch)) # avoid div-by-zero + mean, meansqr, _ = torch.split(vec / total_batch, C) + + var = meansqr - mean * mean + invstd = torch.rsqrt(var + self.eps) + scale = self.weight * invstd + bias = self.bias - mean * scale + scale = scale.reshape(1, -1, 1, 1) + bias = bias.reshape(1, -1, 1, 1) + + self.running_mean += momentum * (mean.detach() - self.running_mean) + self.running_var += momentum * (var.detach() - self.running_var) + return input * scale + bias diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/blocks.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..7fab84efe1d903943e26ea4bc8af4eda5fa6e2ff --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/blocks.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from torch import nn + +from .batch_norm import FrozenBatchNorm2d + + +class CNNBlockBase(nn.Module): + """ + A CNN block is assumed to have input channels, output channels and a stride. + The input and output of `forward()` method must be NCHW tensors. + The method can perform arbitrary computation but must match the given + channels and stride specification. + + Attribute: + in_channels (int): + out_channels (int): + stride (int): + """ + + def __init__(self, in_channels, out_channels, stride): + """ + The `__init__` method of any subclass should also contain these arguments. + + Args: + in_channels (int): + out_channels (int): + stride (int): + """ + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.stride = stride + + def freeze(self): + """ + Make this block not trainable. + This method sets all parameters to `requires_grad=False`, + and convert all BatchNorm layers to FrozenBatchNorm + + Returns: + the block itself + """ + for p in self.parameters(): + p.requires_grad = False + FrozenBatchNorm2d.convert_frozen_batchnorm(self) + return self diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/README.md b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..778ed3da0bae89820831bcd8a72ff7b9cad8d4dd --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/README.md @@ -0,0 +1,7 @@ + + +To add a new Op: + +1. Create a new directory +2. Implement new ops there +3. Delcare its Python interface in `vision.cpp`. diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign.h b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign.h new file mode 100644 index 0000000000000000000000000000000000000000..4acd134367d59e64b43e3a15b81f31a969ff3bf0 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign.h @@ -0,0 +1,130 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#pragma once +#include + +namespace detectron2 { + +at::Tensor ROIAlign_forward_cpu( + const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + bool aligned); + +at::Tensor ROIAlign_backward_cpu( + const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio, + bool aligned); + +#if defined(WITH_CUDA) || defined(WITH_HIP) +at::Tensor ROIAlign_forward_cuda( + const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + bool aligned); + +at::Tensor ROIAlign_backward_cuda( + const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio, + bool aligned); +#endif + +// Interface for Python +inline at::Tensor ROIAlign_forward( + const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + bool aligned) { + if (input.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + return ROIAlign_forward_cuda( + input, + rois, + spatial_scale, + pooled_height, + pooled_width, + sampling_ratio, + aligned); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + return ROIAlign_forward_cpu( + input, + rois, + spatial_scale, + pooled_height, + pooled_width, + sampling_ratio, + aligned); +} + +inline at::Tensor ROIAlign_backward( + const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio, + bool aligned) { + if (grad.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + return ROIAlign_backward_cuda( + grad, + rois, + spatial_scale, + pooled_height, + pooled_width, + batch_size, + channels, + height, + width, + sampling_ratio, + aligned); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + return ROIAlign_backward_cpu( + grad, + rois, + spatial_scale, + pooled_height, + pooled_width, + batch_size, + channels, + height, + width, + sampling_ratio, + aligned); +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cpu.cpp b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..52fc83f8140b29de7b2ad3cb490b8cb672959e16 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cpu.cpp @@ -0,0 +1,508 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#include +#include "ROIAlign.h" + +namespace { + +// implementation taken from Caffe2 +template +struct PreCalc { + int pos1; + int pos2; + int pos3; + int pos4; + T w1; + T w2; + T w3; + T w4; +}; + +template +void pre_calc_for_bilinear_interpolate( + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int iy_upper, + const int ix_upper, + T roi_start_h, + T roi_start_w, + T bin_size_h, + T bin_size_w, + int roi_bin_grid_h, + int roi_bin_grid_w, + std::vector>& pre_calc) { + int pre_calc_index = 0; + for (int ph = 0; ph < pooled_height; ph++) { + for (int pw = 0; pw < pooled_width; pw++) { + for (int iy = 0; iy < iy_upper; iy++) { + const T yy = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < ix_upper; ix++) { + const T xx = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + T x = xx; + T y = yy; + // deal with: inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + PreCalc pc; + pc.pos1 = 0; + pc.pos2 = 0; + pc.pos3 = 0; + pc.pos4 = 0; + pc.w1 = 0; + pc.w2 = 0; + pc.w3 = 0; + pc.w4 = 0; + pre_calc[pre_calc_index] = pc; + pre_calc_index += 1; + continue; + } + + if (y <= 0) { + y = 0; + } + if (x <= 0) { + x = 0; + } + + int y_low = (int)y; + int x_low = (int)x; + int y_high; + int x_high; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + // save weights and indices + PreCalc pc; + pc.pos1 = y_low * width + x_low; + pc.pos2 = y_low * width + x_high; + pc.pos3 = y_high * width + x_low; + pc.pos4 = y_high * width + x_high; + pc.w1 = w1; + pc.w2 = w2; + pc.w3 = w3; + pc.w4 = w4; + pre_calc[pre_calc_index] = pc; + + pre_calc_index += 1; + } + } + } + } +} + +template +void ROIAlignForward( + const int nthreads, + const T* input, + const T& spatial_scale, + const int channels, + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + const T* rois, + T* output, + bool aligned) { + int n_rois = nthreads / channels / pooled_width / pooled_height; + // (n, c, ph, pw) is an element in the pooled output + // can be parallelized using omp + // #pragma omp parallel for num_threads(32) + for (int n = 0; n < n_rois; n++) { + int index_n = n * channels * pooled_width * pooled_height; + + const T* offset_rois = rois + n * 5; + int roi_batch_ind = offset_rois[0]; + + // Do not use rounding; this implementation detail is critical + T offset = aligned ? (T)0.5 : (T)0.0; + T roi_start_w = offset_rois[1] * spatial_scale - offset; + T roi_start_h = offset_rois[2] * spatial_scale - offset; + T roi_end_w = offset_rois[3] * spatial_scale - offset; + T roi_end_h = offset_rois[4] * spatial_scale - offset; + + T roi_width = roi_end_w - roi_start_w; + T roi_height = roi_end_h - roi_start_h; + if (aligned) { + AT_ASSERTM( + roi_width >= 0 && roi_height >= 0, + "ROIs in ROIAlign cannot have non-negative size!"); + } else { // for backward-compatibility only + roi_width = std::max(roi_width, (T)1.); + roi_height = std::max(roi_height, (T)1.); + } + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = + (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // We do average (integral) pooling inside a bin + // When the grid is empty, output zeros == 0/1, instead of NaN. + const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 + + // we want to precalculate indices and weights shared by all channels, + // this is the key point of optimization + std::vector> pre_calc( + roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height); + pre_calc_for_bilinear_interpolate( + height, + width, + pooled_height, + pooled_width, + roi_bin_grid_h, + roi_bin_grid_w, + roi_start_h, + roi_start_w, + bin_size_h, + bin_size_w, + roi_bin_grid_h, + roi_bin_grid_w, + pre_calc); + + for (int c = 0; c < channels; c++) { + int index_n_c = index_n + c * pooled_width * pooled_height; + const T* offset_input = + input + (roi_batch_ind * channels + c) * height * width; + int pre_calc_index = 0; + + for (int ph = 0; ph < pooled_height; ph++) { + for (int pw = 0; pw < pooled_width; pw++) { + int index = index_n_c + ph * pooled_width + pw; + + T output_val = 0.; + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + PreCalc pc = pre_calc[pre_calc_index]; + output_val += pc.w1 * offset_input[pc.pos1] + + pc.w2 * offset_input[pc.pos2] + + pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4]; + + pre_calc_index += 1; + } + } + output_val /= count; + + output[index] = output_val; + } // for pw + } // for ph + } // for c + } // for n +} + +template +void bilinear_interpolate_gradient( + const int height, + const int width, + T y, + T x, + T& w1, + T& w2, + T& w3, + T& w4, + int& x_low, + int& x_high, + int& y_low, + int& y_high, + const int index /* index for debug only*/) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + w1 = w2 = w3 = w4 = 0.; + x_low = x_high = y_low = y_high = -1; + return; + } + + if (y <= 0) + y = 0; + if (x <= 0) + x = 0; + + y_low = (int)y; + x_low = (int)x; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + + // reference in forward + // T v1 = input[y_low * width + x_low]; + // T v2 = input[y_low * width + x_high]; + // T v3 = input[y_high * width + x_low]; + // T v4 = input[y_high * width + x_high]; + // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + return; +} + +template +inline void add(T* address, const T& val) { + *address += val; +} + +template +void ROIAlignBackward( + const int nthreads, + // may not be contiguous, and should be indexed using n_stride, etc + const T* grad_output, + const T& spatial_scale, + const int channels, + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + T* grad_input, + const T* rois, + const int n_stride, + const int c_stride, + const int h_stride, + const int w_stride, + bool aligned) { + for (int index = 0; index < nthreads; index++) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* offset_rois = rois + n * 5; + int roi_batch_ind = offset_rois[0]; + + // Do not use rounding; this implementation detail is critical + T offset = aligned ? (T)0.5 : (T)0.0; + T roi_start_w = offset_rois[1] * spatial_scale - offset; + T roi_start_h = offset_rois[2] * spatial_scale - offset; + T roi_end_w = offset_rois[3] * spatial_scale - offset; + T roi_end_h = offset_rois[4] * spatial_scale - offset; + + T roi_width = roi_end_w - roi_start_w; + T roi_height = roi_end_h - roi_start_h; + if (aligned) { + AT_ASSERTM( + roi_width >= 0 && roi_height >= 0, + "ROIs in ROIAlign do not have non-negative size!"); + } else { // for backward-compatibility only + roi_width = std::max(roi_width, (T)1.); + roi_height = std::max(roi_height, (T)1.); + } + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + T* offset_grad_input = + grad_input + ((roi_batch_ind * channels + c) * height * width); + + int output_offset = n * n_stride + c * c_stride; + const T* offset_grad_output = grad_output + output_offset; + const T grad_output_this_bin = + offset_grad_output[ph * h_stride + pw * w_stride]; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = + (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // We do average (integral) pooling inside a bin + const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 + + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + const T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + T w1, w2, w3, w4; + int x_low, x_high, y_low, y_high; + + bilinear_interpolate_gradient( + height, + width, + y, + x, + w1, + w2, + w3, + w4, + x_low, + x_high, + y_low, + y_high, + index); + + T g1 = grad_output_this_bin * w1 / count; + T g2 = grad_output_this_bin * w2 / count; + T g3 = grad_output_this_bin * w3 / count; + T g4 = grad_output_this_bin * w4 / count; + + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { + // atomic add is not needed for now since it is single threaded + add(offset_grad_input + y_low * width + x_low, static_cast(g1)); + add(offset_grad_input + y_low * width + x_high, static_cast(g2)); + add(offset_grad_input + y_high * width + x_low, static_cast(g3)); + add(offset_grad_input + y_high * width + x_high, static_cast(g4)); + } // if + } // ix + } // iy + } // for +} // ROIAlignBackward + +} // namespace + +namespace detectron2 { + +at::Tensor ROIAlign_forward_cpu( + const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + bool aligned) { + AT_ASSERTM(input.device().is_cpu(), "input must be a CPU tensor"); + AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor"); + + at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; + + at::CheckedFrom c = "ROIAlign_forward_cpu"; + at::checkAllSameType(c, {input_t, rois_t}); + + auto num_rois = rois.size(0); + auto channels = input.size(1); + auto height = input.size(2); + auto width = input.size(3); + + at::Tensor output = at::zeros( + {num_rois, channels, pooled_height, pooled_width}, input.options()); + + auto output_size = num_rois * pooled_height * pooled_width * channels; + + if (output.numel() == 0) + return output; + + auto input_ = input.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "ROIAlign_forward", [&] { + ROIAlignForward( + output_size, + input_.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + rois_.data_ptr(), + output.data_ptr(), + aligned); + }); + return output; +} + +at::Tensor ROIAlign_backward_cpu( + const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio, + bool aligned) { + AT_ASSERTM(grad.device().is_cpu(), "grad must be a CPU tensor"); + AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor"); + + at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; + + at::CheckedFrom c = "ROIAlign_backward_cpu"; + at::checkAllSameType(c, {grad_t, rois_t}); + + at::Tensor grad_input = + at::zeros({batch_size, channels, height, width}, grad.options()); + + // handle possibly empty gradients + if (grad.numel() == 0) { + return grad_input; + } + + // get stride values to ensure indexing into gradients is correct. + int n_stride = grad.stride(0); + int c_stride = grad.stride(1); + int h_stride = grad.stride(2); + int w_stride = grad.stride(3); + + auto rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad.scalar_type(), "ROIAlign_forward", [&] { + ROIAlignBackward( + grad.numel(), + grad.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + grad_input.data_ptr(), + rois_.data_ptr(), + n_stride, + c_stride, + h_stride, + w_stride, + aligned); + }); + return grad_input; +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cuda.cu b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..2e05953b03089203d29bc304726afbca7ee5d464 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cuda.cu @@ -0,0 +1,430 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#include +#include +#include +#include + +// TODO make it in a common file +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + +template +__device__ T bilinear_interpolate( + const T* bottom_data, + const int height, + const int width, + T y, + T x, + const int index /* index for debug only*/) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + return 0; + } + + if (y <= 0) + y = 0; + if (x <= 0) + x = 0; + + int y_low = (int)y; + int x_low = (int)x; + int y_high; + int x_high; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + // do bilinear interpolation + T v1 = bottom_data[y_low * width + x_low]; + T v2 = bottom_data[y_low * width + x_high]; + T v3 = bottom_data[y_high * width + x_low]; + T v4 = bottom_data[y_high * width + x_high]; + T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + return val; +} + +template +__global__ void RoIAlignForward( + const int nthreads, + const T* bottom_data, + const T spatial_scale, + const int channels, + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + const T* bottom_rois, + T* top_data, + bool aligned) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* offset_bottom_rois = bottom_rois + n * 5; + int roi_batch_ind = offset_bottom_rois[0]; + + // Do not use rounding; this implementation detail is critical + T offset = aligned ? (T)0.5 : (T)0.0; + T roi_start_w = offset_bottom_rois[1] * spatial_scale - offset; + T roi_start_h = offset_bottom_rois[2] * spatial_scale - offset; + T roi_end_w = offset_bottom_rois[3] * spatial_scale - offset; + T roi_end_h = offset_bottom_rois[4] * spatial_scale - offset; + + T roi_width = roi_end_w - roi_start_w; + T roi_height = roi_end_h - roi_start_h; + if (!aligned) { // for backward-compatibility only + roi_width = max(roi_width, (T)1.); + roi_height = max(roi_height, (T)1.); + } + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + const T* offset_bottom_data = + bottom_data + (roi_batch_ind * channels + c) * height * width; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = + (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // We do average (integral) pooling inside a bin + // When the grid is empty, output zeros == 0/1, instead of NaN. + const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 + + T output_val = 0.; + for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 + { + const T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + T val = bilinear_interpolate( + offset_bottom_data, height, width, y, x, index); + output_val += val; + } + } + output_val /= count; + + top_data[index] = output_val; + } +} + +template +__device__ void bilinear_interpolate_gradient( + const int height, + const int width, + T y, + T x, + T& w1, + T& w2, + T& w3, + T& w4, + int& x_low, + int& x_high, + int& y_low, + int& y_high, + const int index /* index for debug only*/) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + w1 = w2 = w3 = w4 = 0.; + x_low = x_high = y_low = y_high = -1; + return; + } + + if (y <= 0) + y = 0; + if (x <= 0) + x = 0; + + y_low = (int)y; + x_low = (int)x; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + + // reference in forward + // T v1 = bottom_data[y_low * width + x_low]; + // T v2 = bottom_data[y_low * width + x_high]; + // T v3 = bottom_data[y_high * width + x_low]; + // T v4 = bottom_data[y_high * width + x_high]; + // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + return; +} + +template +__global__ void RoIAlignBackwardFeature( + const int nthreads, + const T* top_diff, + const int num_rois, + const T spatial_scale, + const int channels, + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + T* bottom_diff, + const T* bottom_rois, + bool aligned) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* offset_bottom_rois = bottom_rois + n * 5; + int roi_batch_ind = offset_bottom_rois[0]; + + // Do not use rounding; this implementation detail is critical + T offset = aligned ? (T)0.5 : (T)0.0; + T roi_start_w = offset_bottom_rois[1] * spatial_scale - offset; + T roi_start_h = offset_bottom_rois[2] * spatial_scale - offset; + T roi_end_w = offset_bottom_rois[3] * spatial_scale - offset; + T roi_end_h = offset_bottom_rois[4] * spatial_scale - offset; + + T roi_width = roi_end_w - roi_start_w; + T roi_height = roi_end_h - roi_start_h; + if (!aligned) { // for backward-compatibility only + roi_width = max(roi_width, (T)1.); + roi_height = max(roi_height, (T)1.); + } + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + T* offset_bottom_diff = + bottom_diff + (roi_batch_ind * channels + c) * height * width; + + int top_offset = (n * channels + c) * pooled_height * pooled_width; + const T* offset_top_diff = top_diff + top_offset; + const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = + (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // We do average (integral) pooling inside a bin + const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 + + for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 + { + const T y = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T x = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + T w1, w2, w3, w4; + int x_low, x_high, y_low, y_high; + + bilinear_interpolate_gradient( + height, + width, + y, + x, + w1, + w2, + w3, + w4, + x_low, + x_high, + y_low, + y_high, + index); + + T g1 = top_diff_this_bin * w1 / count; + T g2 = top_diff_this_bin * w2 / count; + T g3 = top_diff_this_bin * w3 / count; + T g4 = top_diff_this_bin * w4 / count; + + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { + atomicAdd( + offset_bottom_diff + y_low * width + x_low, static_cast(g1)); + atomicAdd( + offset_bottom_diff + y_low * width + x_high, static_cast(g2)); + atomicAdd( + offset_bottom_diff + y_high * width + x_low, static_cast(g3)); + atomicAdd( + offset_bottom_diff + y_high * width + x_high, static_cast(g4)); + } // if + } // ix + } // iy + } // CUDA_1D_KERNEL_LOOP +} // RoIAlignBackward + +namespace detectron2 { + +at::Tensor ROIAlign_forward_cuda( + const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + bool aligned) { + AT_ASSERTM(input.device().is_cuda(), "input must be a CUDA tensor"); + AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor"); + at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; + + at::CheckedFrom c = "ROIAlign_forward_cuda"; + at::checkAllSameGPU(c, {input_t, rois_t}); + at::checkAllSameType(c, {input_t, rois_t}); + at::cuda::CUDAGuard device_guard(input.device()); + + auto num_rois = rois.size(0); + auto channels = input.size(1); + auto height = input.size(2); + auto width = input.size(3); + + auto output = at::empty( + {num_rois, channels, pooled_height, pooled_width}, input.options()); + auto output_size = num_rois * pooled_height * pooled_width * channels; + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + dim3 grid(std::min( + at::cuda::ATenCeilDiv( + static_cast(output_size), static_cast(512)), + static_cast(4096))); + dim3 block(512); + + if (output.numel() == 0) { + AT_CUDA_CHECK(cudaGetLastError()); + return output; + } + + auto input_ = input.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "ROIAlign_forward", [&] { + RoIAlignForward<<>>( + output_size, + input_.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + rois_.data_ptr(), + output.data_ptr(), + aligned); + }); + cudaDeviceSynchronize(); + AT_CUDA_CHECK(cudaGetLastError()); + return output; +} + +// TODO remove the dependency on input and use instead its sizes -> save memory +at::Tensor ROIAlign_backward_cuda( + const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio, + bool aligned) { + AT_ASSERTM(grad.device().is_cuda(), "grad must be a CUDA tensor"); + AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor"); + + at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; + at::CheckedFrom c = "ROIAlign_backward_cuda"; + at::checkAllSameGPU(c, {grad_t, rois_t}); + at::checkAllSameType(c, {grad_t, rois_t}); + at::cuda::CUDAGuard device_guard(grad.device()); + + auto num_rois = rois.size(0); + auto grad_input = + at::zeros({batch_size, channels, height, width}, grad.options()); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + dim3 grid(std::min( + at::cuda::ATenCeilDiv( + static_cast(grad.numel()), static_cast(512)), + static_cast(4096))); + dim3 block(512); + + // handle possibly empty gradients + if (grad.numel() == 0) { + AT_CUDA_CHECK(cudaGetLastError()); + return grad_input; + } + + auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES(grad.scalar_type(), "ROIAlign_backward", [&] { + RoIAlignBackwardFeature<<>>( + grad.numel(), + grad_.data_ptr(), + num_rois, + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + grad_input.data_ptr(), + rois_.data_ptr(), + aligned); + }); + AT_CUDA_CHECK(cudaGetLastError()); + return grad_input; +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h new file mode 100644 index 0000000000000000000000000000000000000000..1d8a2b62e57a582f18816b6a2f6ed880dbce7ccf --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h @@ -0,0 +1,115 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#pragma once +#include + +namespace detectron2 { + +at::Tensor ROIAlignRotated_forward_cpu( + const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio); + +at::Tensor ROIAlignRotated_backward_cpu( + const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio); + +#if defined(WITH_CUDA) || defined(WITH_HIP) +at::Tensor ROIAlignRotated_forward_cuda( + const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio); + +at::Tensor ROIAlignRotated_backward_cuda( + const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio); +#endif + +// Interface for Python +inline at::Tensor ROIAlignRotated_forward( + const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio) { + if (input.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + return ROIAlignRotated_forward_cuda( + input, + rois, + spatial_scale, + pooled_height, + pooled_width, + sampling_ratio); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + return ROIAlignRotated_forward_cpu( + input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); +} + +inline at::Tensor ROIAlignRotated_backward( + const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio) { + if (grad.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + return ROIAlignRotated_backward_cuda( + grad, + rois, + spatial_scale, + pooled_height, + pooled_width, + batch_size, + channels, + height, + width, + sampling_ratio); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + return ROIAlignRotated_backward_cpu( + grad, + rois, + spatial_scale, + pooled_height, + pooled_width, + batch_size, + channels, + height, + width, + sampling_ratio); +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7e5e1ffdccd0e2ced15fa34b4906388d371bffe2 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp @@ -0,0 +1,522 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#include +#include "ROIAlignRotated.h" + +// Note: this implementation originates from the Caffe2 ROIAlignRotated Op +// and PyTorch ROIAlign (non-rotated) Op implementations. +// The key difference between this implementation and those ones is +// we don't do "legacy offset" in this version, as there aren't many previous +// works, if any, using the "legacy" ROIAlignRotated Op. +// This would make the interface a bit cleaner. + +namespace detectron2 { + +namespace { +template +struct PreCalc { + int pos1; + int pos2; + int pos3; + int pos4; + T w1; + T w2; + T w3; + T w4; +}; + +template +void pre_calc_for_bilinear_interpolate( + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int iy_upper, + const int ix_upper, + T roi_start_h, + T roi_start_w, + T bin_size_h, + T bin_size_w, + int roi_bin_grid_h, + int roi_bin_grid_w, + T roi_center_h, + T roi_center_w, + T cos_theta, + T sin_theta, + std::vector>& pre_calc) { + int pre_calc_index = 0; + for (int ph = 0; ph < pooled_height; ph++) { + for (int pw = 0; pw < pooled_width; pw++) { + for (int iy = 0; iy < iy_upper; iy++) { + const T yy = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < ix_upper; ix++) { + const T xx = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + // Rotate by theta around the center and translate + // In image space, (y, x) is the order for Right Handed System, + // and this is essentially multiplying the point by a rotation matrix + // to rotate it counterclockwise through angle theta. + T y = yy * cos_theta - xx * sin_theta + roi_center_h; + T x = yy * sin_theta + xx * cos_theta + roi_center_w; + // deal with: inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + PreCalc pc; + pc.pos1 = 0; + pc.pos2 = 0; + pc.pos3 = 0; + pc.pos4 = 0; + pc.w1 = 0; + pc.w2 = 0; + pc.w3 = 0; + pc.w4 = 0; + pre_calc[pre_calc_index] = pc; + pre_calc_index += 1; + continue; + } + + if (y < 0) { + y = 0; + } + if (x < 0) { + x = 0; + } + + int y_low = (int)y; + int x_low = (int)x; + int y_high; + int x_high; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + // save weights and indices + PreCalc pc; + pc.pos1 = y_low * width + x_low; + pc.pos2 = y_low * width + x_high; + pc.pos3 = y_high * width + x_low; + pc.pos4 = y_high * width + x_high; + pc.w1 = w1; + pc.w2 = w2; + pc.w3 = w3; + pc.w4 = w4; + pre_calc[pre_calc_index] = pc; + + pre_calc_index += 1; + } + } + } + } +} + +template +void bilinear_interpolate_gradient( + const int height, + const int width, + T y, + T x, + T& w1, + T& w2, + T& w3, + T& w4, + int& x_low, + int& x_high, + int& y_low, + int& y_high) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + w1 = w2 = w3 = w4 = 0.; + x_low = x_high = y_low = y_high = -1; + return; + } + + if (y < 0) { + y = 0; + } + + if (x < 0) { + x = 0; + } + + y_low = (int)y; + x_low = (int)x; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + + // reference in forward + // T v1 = input[y_low * width + x_low]; + // T v2 = input[y_low * width + x_high]; + // T v3 = input[y_high * width + x_low]; + // T v4 = input[y_high * width + x_high]; + // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + return; +} + +template +inline void add(T* address, const T& val) { + *address += val; +} + +} // namespace + +template +void ROIAlignRotatedForward( + const int nthreads, + const T* input, + const T& spatial_scale, + const int channels, + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + const T* rois, + T* output) { + int n_rois = nthreads / channels / pooled_width / pooled_height; + // (n, c, ph, pw) is an element in the pooled output + // can be parallelized using omp + // #pragma omp parallel for num_threads(32) + for (int n = 0; n < n_rois; n++) { + int index_n = n * channels * pooled_width * pooled_height; + + const T* current_roi = rois + n * 6; + int roi_batch_ind = current_roi[0]; + + // Do not use rounding; this implementation detail is critical + // ROIAlignRotated supports align == true, i.e., continuous coordinate + // by default, thus the 0.5 offset + T offset = (T)0.5; + T roi_center_w = current_roi[1] * spatial_scale - offset; + T roi_center_h = current_roi[2] * spatial_scale - offset; + T roi_width = current_roi[3] * spatial_scale; + T roi_height = current_roi[4] * spatial_scale; + T theta = current_roi[5] * M_PI / 180.0; + T cos_theta = cos(theta); + T sin_theta = sin(theta); + + AT_ASSERTM( + roi_width >= 0 && roi_height >= 0, + "ROIs in ROIAlignRotated do not have non-negative size!"); + + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = + (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // We do average (integral) pooling inside a bin + const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 + + // we want to precalculate indices and weights shared by all channels, + // this is the key point of optimization + std::vector> pre_calc( + roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height); + + // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). + // Appropriate translation needs to be applied after. + T roi_start_h = -roi_height / 2.0; + T roi_start_w = -roi_width / 2.0; + + pre_calc_for_bilinear_interpolate( + height, + width, + pooled_height, + pooled_width, + roi_bin_grid_h, + roi_bin_grid_w, + roi_start_h, + roi_start_w, + bin_size_h, + bin_size_w, + roi_bin_grid_h, + roi_bin_grid_w, + roi_center_h, + roi_center_w, + cos_theta, + sin_theta, + pre_calc); + + for (int c = 0; c < channels; c++) { + int index_n_c = index_n + c * pooled_width * pooled_height; + const T* offset_input = + input + (roi_batch_ind * channels + c) * height * width; + int pre_calc_index = 0; + + for (int ph = 0; ph < pooled_height; ph++) { + for (int pw = 0; pw < pooled_width; pw++) { + int index = index_n_c + ph * pooled_width + pw; + + T output_val = 0.; + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + PreCalc pc = pre_calc[pre_calc_index]; + output_val += pc.w1 * offset_input[pc.pos1] + + pc.w2 * offset_input[pc.pos2] + + pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4]; + + pre_calc_index += 1; + } + } + output_val /= count; + + output[index] = output_val; + } // for pw + } // for ph + } // for c + } // for n +} + +template +void ROIAlignRotatedBackward( + const int nthreads, + // may not be contiguous. should index using n_stride, etc + const T* grad_output, + const T& spatial_scale, + const int channels, + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + T* grad_input, + const T* rois, + const int n_stride, + const int c_stride, + const int h_stride, + const int w_stride) { + for (int index = 0; index < nthreads; index++) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* current_roi = rois + n * 6; + int roi_batch_ind = current_roi[0]; + + // Do not use rounding; this implementation detail is critical + // ROIAlignRotated supports align == true, i.e., continuous coordinate + // by default, thus the 0.5 offset + T offset = (T)0.5; + T roi_center_w = current_roi[1] * spatial_scale - offset; + T roi_center_h = current_roi[2] * spatial_scale - offset; + T roi_width = current_roi[3] * spatial_scale; + T roi_height = current_roi[4] * spatial_scale; + T theta = current_roi[5] * M_PI / 180.0; + T cos_theta = cos(theta); + T sin_theta = sin(theta); + + AT_ASSERTM( + roi_width >= 0 && roi_height >= 0, + "ROIs in ROIAlignRotated do not have non-negative size!"); + + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + T* offset_grad_input = + grad_input + ((roi_batch_ind * channels + c) * height * width); + + int output_offset = n * n_stride + c * c_stride; + const T* offset_grad_output = grad_output + output_offset; + const T grad_output_this_bin = + offset_grad_output[ph * h_stride + pw * w_stride]; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = + (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). + // Appropriate translation needs to be applied after. + T roi_start_h = -roi_height / 2.0; + T roi_start_w = -roi_width / 2.0; + + // We do average (integral) pooling inside a bin + const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 + + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + const T yy = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T xx = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + // Rotate by theta around the center and translate + T y = yy * cos_theta - xx * sin_theta + roi_center_h; + T x = yy * sin_theta + xx * cos_theta + roi_center_w; + + T w1, w2, w3, w4; + int x_low, x_high, y_low, y_high; + + bilinear_interpolate_gradient( + height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high); + + T g1 = grad_output_this_bin * w1 / count; + T g2 = grad_output_this_bin * w2 / count; + T g3 = grad_output_this_bin * w3 / count; + T g4 = grad_output_this_bin * w4 / count; + + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { + // atomic add is not needed for now since it is single threaded + add(offset_grad_input + y_low * width + x_low, static_cast(g1)); + add(offset_grad_input + y_low * width + x_high, static_cast(g2)); + add(offset_grad_input + y_high * width + x_low, static_cast(g3)); + add(offset_grad_input + y_high * width + x_high, static_cast(g4)); + } // if + } // ix + } // iy + } // for +} // ROIAlignRotatedBackward + +at::Tensor ROIAlignRotated_forward_cpu( + const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio) { + AT_ASSERTM(input.device().is_cpu(), "input must be a CPU tensor"); + AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor"); + + at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; + + at::CheckedFrom c = "ROIAlign_forward_cpu"; + at::checkAllSameType(c, {input_t, rois_t}); + + auto num_rois = rois.size(0); + auto channels = input.size(1); + auto height = input.size(2); + auto width = input.size(3); + + at::Tensor output = at::zeros( + {num_rois, channels, pooled_height, pooled_width}, input.options()); + + auto output_size = num_rois * pooled_height * pooled_width * channels; + + if (output.numel() == 0) { + return output; + } + + auto input_ = input.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "ROIAlignRotated_forward", [&] { + ROIAlignRotatedForward( + output_size, + input_.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + rois_.data_ptr(), + output.data_ptr()); + }); + return output; +} + +at::Tensor ROIAlignRotated_backward_cpu( + const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio) { + AT_ASSERTM(grad.device().is_cpu(), "grad must be a CPU tensor"); + AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor"); + + at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; + + at::CheckedFrom c = "ROIAlignRotated_backward_cpu"; + at::checkAllSameType(c, {grad_t, rois_t}); + + at::Tensor grad_input = + at::zeros({batch_size, channels, height, width}, grad.options()); + + // handle possibly empty gradients + if (grad.numel() == 0) { + return grad_input; + } + + // get stride values to ensure indexing into gradients is correct. + int n_stride = grad.stride(0); + int c_stride = grad.stride(1); + int h_stride = grad.stride(2); + int w_stride = grad.stride(3); + + auto rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad.scalar_type(), "ROIAlignRotated_forward", [&] { + ROIAlignRotatedBackward( + grad.numel(), + grad.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + grad_input.data_ptr(), + rois_.data_ptr(), + n_stride, + c_stride, + h_stride, + w_stride); + }); + return grad_input; +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..9c376fc6973b75b34967faf870a9f85a3ee430be --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu @@ -0,0 +1,443 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#include +#include +#include +#include + +// TODO make it in a common file +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + +// Note: this implementation originates from the Caffe2 ROIAlignRotated Op +// and PyTorch ROIAlign (non-rotated) Op implementations. +// The key difference between this implementation and those ones is +// we don't do "legacy offset" in this version, as there aren't many previous +// works, if any, using the "legacy" ROIAlignRotated Op. +// This would make the interface a bit cleaner. + +namespace detectron2 { + +namespace { + +template +__device__ T bilinear_interpolate( + const T* input, + const int height, + const int width, + T y, + T x) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + return 0; + } + + if (y < 0) { + y = 0; + } + + if (x < 0) { + x = 0; + } + + int y_low = (int)y; + int x_low = (int)x; + int y_high; + int x_high; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + // do bilinear interpolation + T v1 = input[y_low * width + x_low]; + T v2 = input[y_low * width + x_high]; + T v3 = input[y_high * width + x_low]; + T v4 = input[y_high * width + x_high]; + T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + return val; +} + +template +__device__ void bilinear_interpolate_gradient( + const int height, + const int width, + T y, + T x, + T& w1, + T& w2, + T& w3, + T& w4, + int& x_low, + int& x_high, + int& y_low, + int& y_high) { + // deal with cases that inverse elements are out of feature map boundary + if (y < -1.0 || y > height || x < -1.0 || x > width) { + // empty + w1 = w2 = w3 = w4 = 0.; + x_low = x_high = y_low = y_high = -1; + return; + } + + if (y < 0) { + y = 0; + } + + if (x < 0) { + x = 0; + } + + y_low = (int)y; + x_low = (int)x; + + if (y_low >= height - 1) { + y_high = y_low = height - 1; + y = (T)y_low; + } else { + y_high = y_low + 1; + } + + if (x_low >= width - 1) { + x_high = x_low = width - 1; + x = (T)x_low; + } else { + x_high = x_low + 1; + } + + T ly = y - y_low; + T lx = x - x_low; + T hy = 1. - ly, hx = 1. - lx; + + // reference in forward + // T v1 = input[y_low * width + x_low]; + // T v2 = input[y_low * width + x_high]; + // T v3 = input[y_high * width + x_low]; + // T v4 = input[y_high * width + x_high]; + // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; + + return; +} + +} // namespace + +template +__global__ void RoIAlignRotatedForward( + const int nthreads, + const T* input, + const T spatial_scale, + const int channels, + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + const T* rois, + T* top_data) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* current_roi = rois + n * 6; + int roi_batch_ind = current_roi[0]; + + // Do not use rounding; this implementation detail is critical + // ROIAlignRotated supports align == true, i.e., continuous coordinate + // by default, thus the 0.5 offset + T offset = (T)0.5; + T roi_center_w = current_roi[1] * spatial_scale - offset; + T roi_center_h = current_roi[2] * spatial_scale - offset; + T roi_width = current_roi[3] * spatial_scale; + T roi_height = current_roi[4] * spatial_scale; + T theta = current_roi[5] * M_PI / 180.0; + T cos_theta = cos(theta); + T sin_theta = sin(theta); + + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + const T* offset_input = + input + (roi_batch_ind * channels + c) * height * width; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = + (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). + // Appropriate translation needs to be applied after. + T roi_start_h = -roi_height / 2.0; + T roi_start_w = -roi_width / 2.0; + + // We do average (inte gral) pooling inside a bin + const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 + + T output_val = 0.; + for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 + { + const T yy = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T xx = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + // Rotate by theta around the center and translate + T y = yy * cos_theta - xx * sin_theta + roi_center_h; + T x = yy * sin_theta + xx * cos_theta + roi_center_w; + + T val = bilinear_interpolate(offset_input, height, width, y, x); + output_val += val; + } + } + output_val /= count; + + top_data[index] = output_val; + } +} + +template +__global__ void RoIAlignRotatedBackwardFeature( + const int nthreads, + const T* top_diff, + const int num_rois, + const T spatial_scale, + const int channels, + const int height, + const int width, + const int pooled_height, + const int pooled_width, + const int sampling_ratio, + T* bottom_diff, + const T* rois) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // (n, c, ph, pw) is an element in the pooled output + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const T* current_roi = rois + n * 6; + int roi_batch_ind = current_roi[0]; + + // Do not use rounding; this implementation detail is critical + // ROIAlignRotated supports align == true, i.e., continuous coordinate + // by default, thus the 0.5 offset + T offset = (T)0.5; + T roi_center_w = current_roi[1] * spatial_scale - offset; + T roi_center_h = current_roi[2] * spatial_scale - offset; + T roi_width = current_roi[3] * spatial_scale; + T roi_height = current_roi[4] * spatial_scale; + T theta = current_roi[5] * M_PI / 180.0; + T cos_theta = cos(theta); + T sin_theta = sin(theta); + + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + T* offset_bottom_diff = + bottom_diff + (roi_batch_ind * channels + c) * height * width; + + int top_offset = (n * channels + c) * pooled_height * pooled_width; + const T* offset_top_diff = top_diff + top_offset; + const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; + + // We use roi_bin_grid to sample the grid and mimic integral + int roi_bin_grid_h = (sampling_ratio > 0) + ? sampling_ratio + : ceil(roi_height / pooled_height); // e.g., = 2 + int roi_bin_grid_w = + (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); + + // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). + // Appropriate translation needs to be applied after. + T roi_start_h = -roi_height / 2.0; + T roi_start_w = -roi_width / 2.0; + + // We do average (integral) pooling inside a bin + const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 + + for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 + { + const T yy = roi_start_h + ph * bin_size_h + + static_cast(iy + .5f) * bin_size_h / + static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T xx = roi_start_w + pw * bin_size_w + + static_cast(ix + .5f) * bin_size_w / + static_cast(roi_bin_grid_w); + + // Rotate by theta around the center and translate + T y = yy * cos_theta - xx * sin_theta + roi_center_h; + T x = yy * sin_theta + xx * cos_theta + roi_center_w; + + T w1, w2, w3, w4; + int x_low, x_high, y_low, y_high; + + bilinear_interpolate_gradient( + height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high); + + T g1 = top_diff_this_bin * w1 / count; + T g2 = top_diff_this_bin * w2 / count; + T g3 = top_diff_this_bin * w3 / count; + T g4 = top_diff_this_bin * w4 / count; + + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { + atomicAdd( + offset_bottom_diff + y_low * width + x_low, static_cast(g1)); + atomicAdd( + offset_bottom_diff + y_low * width + x_high, static_cast(g2)); + atomicAdd( + offset_bottom_diff + y_high * width + x_low, static_cast(g3)); + atomicAdd( + offset_bottom_diff + y_high * width + x_high, static_cast(g4)); + } // if + } // ix + } // iy + } // CUDA_1D_KERNEL_LOOP +} // RoIAlignRotatedBackward + +at::Tensor ROIAlignRotated_forward_cuda( + const at::Tensor& input, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int sampling_ratio) { + AT_ASSERTM(input.device().is_cuda(), "input must be a CUDA tensor"); + AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor"); + at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; + + at::CheckedFrom c = "ROIAlignRotated_forward_cuda"; + at::checkAllSameGPU(c, {input_t, rois_t}); + at::checkAllSameType(c, {input_t, rois_t}); + at::cuda::CUDAGuard device_guard(input.device()); + + auto num_rois = rois.size(0); + auto channels = input.size(1); + auto height = input.size(2); + auto width = input.size(3); + + auto output = at::empty( + {num_rois, channels, pooled_height, pooled_width}, input.options()); + auto output_size = num_rois * pooled_height * pooled_width * channels; + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + dim3 grid(std::min( + at::cuda::ATenCeilDiv( + static_cast(output_size), static_cast(512)), + static_cast(4096))); + dim3 block(512); + + if (output.numel() == 0) { + AT_CUDA_CHECK(cudaGetLastError()); + return output; + } + + auto input_ = input.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES( + input.scalar_type(), "ROIAlignRotated_forward", [&] { + RoIAlignRotatedForward<<>>( + output_size, + input_.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + rois_.data_ptr(), + output.data_ptr()); + }); + cudaDeviceSynchronize(); + AT_CUDA_CHECK(cudaGetLastError()); + return output; +} + +// TODO remove the dependency on input and use instead its sizes -> save memory +at::Tensor ROIAlignRotated_backward_cuda( + const at::Tensor& grad, + const at::Tensor& rois, + const float spatial_scale, + const int pooled_height, + const int pooled_width, + const int batch_size, + const int channels, + const int height, + const int width, + const int sampling_ratio) { + AT_ASSERTM(grad.device().is_cuda(), "grad must be a CUDA tensor"); + AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor"); + + at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; + at::CheckedFrom c = "ROIAlign_backward_cuda"; + at::checkAllSameGPU(c, {grad_t, rois_t}); + at::checkAllSameType(c, {grad_t, rois_t}); + at::cuda::CUDAGuard device_guard(grad.device()); + + auto num_rois = rois.size(0); + auto grad_input = + at::zeros({batch_size, channels, height, width}, grad.options()); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + dim3 grid(std::min( + at::cuda::ATenCeilDiv( + static_cast(grad.numel()), static_cast(512)), + static_cast(4096))); + dim3 block(512); + + // handle possibly empty gradients + if (grad.numel() == 0) { + AT_CUDA_CHECK(cudaGetLastError()); + return grad_input; + } + + auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES( + grad.scalar_type(), "ROIAlignRotated_backward", [&] { + RoIAlignRotatedBackwardFeature<<>>( + grad.numel(), + grad_.data_ptr(), + num_rois, + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + grad_input.data_ptr(), + rois_.data_ptr()); + }); + AT_CUDA_CHECK(cudaGetLastError()); + return grad_input; +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h new file mode 100644 index 0000000000000000000000000000000000000000..8c645abea878ae1f892cbb886ec461ec6988c052 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h @@ -0,0 +1,35 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#pragma once +#include + +namespace detectron2 { + +at::Tensor box_iou_rotated_cpu( + const at::Tensor& boxes1, + const at::Tensor& boxes2); + +#if defined(WITH_CUDA) || defined(WITH_HIP) +at::Tensor box_iou_rotated_cuda( + const at::Tensor& boxes1, + const at::Tensor& boxes2); +#endif + +// Interface for Python +// inline is needed to prevent multiple function definitions when this header is +// included by different cpps +inline at::Tensor box_iou_rotated( + const at::Tensor& boxes1, + const at::Tensor& boxes2) { + assert(boxes1.device().is_cuda() == boxes2.device().is_cuda()); + if (boxes1.device().is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + return box_iou_rotated_cuda(boxes1.contiguous(), boxes2.contiguous()); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + + return box_iou_rotated_cpu(boxes1.contiguous(), boxes2.contiguous()); +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f2b02d171077d96fcaf29b585fa6a678af1f2842 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp @@ -0,0 +1,39 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#include "box_iou_rotated.h" +#include "box_iou_rotated_utils.h" + +namespace detectron2 { + +template +void box_iou_rotated_cpu_kernel( + const at::Tensor& boxes1, + const at::Tensor& boxes2, + at::Tensor& ious) { + auto num_boxes1 = boxes1.size(0); + auto num_boxes2 = boxes2.size(0); + + for (int i = 0; i < num_boxes1; i++) { + for (int j = 0; j < num_boxes2; j++) { + ious[i * num_boxes2 + j] = single_box_iou_rotated( + boxes1[i].data_ptr(), boxes2[j].data_ptr()); + } + } +} + +at::Tensor box_iou_rotated_cpu( + // input must be contiguous: + const at::Tensor& boxes1, + const at::Tensor& boxes2) { + auto num_boxes1 = boxes1.size(0); + auto num_boxes2 = boxes2.size(0); + at::Tensor ious = + at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); + + box_iou_rotated_cpu_kernel(boxes1, boxes2, ious); + + // reshape from 1d array to 2d array + auto shape = std::vector{num_boxes1, num_boxes2}; + return ious.reshape(shape); +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..e3403c11796cb313771b8b6350c793b9fbdfbcaa --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu @@ -0,0 +1,130 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#include +#include +#include +#include +#include "box_iou_rotated_utils.h" + +namespace detectron2 { + +// 2D block with 32 * 16 = 512 threads per block +const int BLOCK_DIM_X = 32; +const int BLOCK_DIM_Y = 16; + +template +__global__ void box_iou_rotated_cuda_kernel( + const int n_boxes1, + const int n_boxes2, + const T* dev_boxes1, + const T* dev_boxes2, + T* dev_ious) { + const int row_start = blockIdx.x * blockDim.x; + const int col_start = blockIdx.y * blockDim.y; + + const int row_size = min(n_boxes1 - row_start, blockDim.x); + const int col_size = min(n_boxes2 - col_start, blockDim.y); + + __shared__ float block_boxes1[BLOCK_DIM_X * 5]; + __shared__ float block_boxes2[BLOCK_DIM_Y * 5]; + + // It's safe to copy using threadIdx.x since BLOCK_DIM_X >= BLOCK_DIM_Y + if (threadIdx.x < row_size && threadIdx.y == 0) { + block_boxes1[threadIdx.x * 5 + 0] = + dev_boxes1[(row_start + threadIdx.x) * 5 + 0]; + block_boxes1[threadIdx.x * 5 + 1] = + dev_boxes1[(row_start + threadIdx.x) * 5 + 1]; + block_boxes1[threadIdx.x * 5 + 2] = + dev_boxes1[(row_start + threadIdx.x) * 5 + 2]; + block_boxes1[threadIdx.x * 5 + 3] = + dev_boxes1[(row_start + threadIdx.x) * 5 + 3]; + block_boxes1[threadIdx.x * 5 + 4] = + dev_boxes1[(row_start + threadIdx.x) * 5 + 4]; + } + + if (threadIdx.x < col_size && threadIdx.y == 0) { + block_boxes2[threadIdx.x * 5 + 0] = + dev_boxes2[(col_start + threadIdx.x) * 5 + 0]; + block_boxes2[threadIdx.x * 5 + 1] = + dev_boxes2[(col_start + threadIdx.x) * 5 + 1]; + block_boxes2[threadIdx.x * 5 + 2] = + dev_boxes2[(col_start + threadIdx.x) * 5 + 2]; + block_boxes2[threadIdx.x * 5 + 3] = + dev_boxes2[(col_start + threadIdx.x) * 5 + 3]; + block_boxes2[threadIdx.x * 5 + 4] = + dev_boxes2[(col_start + threadIdx.x) * 5 + 4]; + } + __syncthreads(); + + if (threadIdx.x < row_size && threadIdx.y < col_size) { + int offset = (row_start + threadIdx.x) * n_boxes2 + col_start + threadIdx.y; + dev_ious[offset] = single_box_iou_rotated( + block_boxes1 + threadIdx.x * 5, block_boxes2 + threadIdx.y * 5); + } +} + +at::Tensor box_iou_rotated_cuda( + // input must be contiguous + const at::Tensor& boxes1, + const at::Tensor& boxes2) { + using scalar_t = float; + AT_ASSERTM( + boxes1.scalar_type() == at::kFloat, "boxes1 must be a float tensor"); + AT_ASSERTM( + boxes2.scalar_type() == at::kFloat, "boxes2 must be a float tensor"); + AT_ASSERTM(boxes1.is_cuda(), "boxes1 must be a CUDA tensor"); + AT_ASSERTM(boxes2.is_cuda(), "boxes2 must be a CUDA tensor"); + at::cuda::CUDAGuard device_guard(boxes1.device()); + + auto num_boxes1 = boxes1.size(0); + auto num_boxes2 = boxes2.size(0); + + at::Tensor ious = + at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); + + bool transpose = false; + if (num_boxes1 > 0 && num_boxes2 > 0) { + scalar_t *data1 = boxes1.data_ptr(), + *data2 = boxes2.data_ptr(); + + if (num_boxes2 > 65535 * BLOCK_DIM_Y) { + AT_ASSERTM( + num_boxes1 <= 65535 * BLOCK_DIM_Y, + "Too many boxes for box_iou_rotated_cuda!"); + // x dim is allowed to be large, but y dim cannot, + // so we transpose the two to avoid "invalid configuration argument" + // error. We assume one of them is small. Otherwise the result is hard to + // fit in memory anyway. + std::swap(num_boxes1, num_boxes2); + std::swap(data1, data2); + transpose = true; + } + + const int blocks_x = + at::cuda::ATenCeilDiv(static_cast(num_boxes1), BLOCK_DIM_X); + const int blocks_y = + at::cuda::ATenCeilDiv(static_cast(num_boxes2), BLOCK_DIM_Y); + + dim3 blocks(blocks_x, blocks_y); + dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + box_iou_rotated_cuda_kernel<<>>( + num_boxes1, + num_boxes2, + data1, + data2, + (scalar_t*)ious.data_ptr()); + + AT_CUDA_CHECK(cudaGetLastError()); + } + + // reshape from 1d array to 2d array + auto shape = std::vector{num_boxes1, num_boxes2}; + if (transpose) { + return ious.view(shape).t(); + } else { + return ious.view(shape); + } +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..e3257883b730cbef23ceaf432e65330e10be6553 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h @@ -0,0 +1,363 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#pragma once + +#include +#include + +#if defined(__CUDACC__) || __HCC__ == 1 || __HIP__ == 1 +// Designates functions callable from the host (CPU) and the device (GPU) +#define HOST_DEVICE __host__ __device__ +#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__ +#else +#include +#define HOST_DEVICE +#define HOST_DEVICE_INLINE HOST_DEVICE inline +#endif + +namespace detectron2 { + +namespace { + +template +struct RotatedBox { + T x_ctr, y_ctr, w, h, a; +}; + +template +struct Point { + T x, y; + HOST_DEVICE_INLINE Point(const T& px = 0, const T& py = 0) : x(px), y(py) {} + HOST_DEVICE_INLINE Point operator+(const Point& p) const { + return Point(x + p.x, y + p.y); + } + HOST_DEVICE_INLINE Point& operator+=(const Point& p) { + x += p.x; + y += p.y; + return *this; + } + HOST_DEVICE_INLINE Point operator-(const Point& p) const { + return Point(x - p.x, y - p.y); + } + HOST_DEVICE_INLINE Point operator*(const T coeff) const { + return Point(x * coeff, y * coeff); + } +}; + +template +HOST_DEVICE_INLINE T dot_2d(const Point& A, const Point& B) { + return A.x * B.x + A.y * B.y; +} + +// R: result type. can be different from input type +template +HOST_DEVICE_INLINE R cross_2d(const Point& A, const Point& B) { + return static_cast(A.x) * static_cast(B.y) - + static_cast(B.x) * static_cast(A.y); +} + +template +HOST_DEVICE_INLINE void get_rotated_vertices( + const RotatedBox& box, + Point (&pts)[4]) { + // M_PI / 180. == 0.01745329251 + double theta = box.a * 0.01745329251; + T cosTheta2 = (T)cos(theta) * 0.5f; + T sinTheta2 = (T)sin(theta) * 0.5f; + + // y: top --> down; x: left --> right + pts[0].x = box.x_ctr + sinTheta2 * box.h + cosTheta2 * box.w; + pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w; + pts[1].x = box.x_ctr - sinTheta2 * box.h + cosTheta2 * box.w; + pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w; + pts[2].x = 2 * box.x_ctr - pts[0].x; + pts[2].y = 2 * box.y_ctr - pts[0].y; + pts[3].x = 2 * box.x_ctr - pts[1].x; + pts[3].y = 2 * box.y_ctr - pts[1].y; +} + +template +HOST_DEVICE_INLINE int get_intersection_points( + const Point (&pts1)[4], + const Point (&pts2)[4], + Point (&intersections)[24]) { + // Line vector + // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1] + Point vec1[4], vec2[4]; + for (int i = 0; i < 4; i++) { + vec1[i] = pts1[(i + 1) % 4] - pts1[i]; + vec2[i] = pts2[(i + 1) % 4] - pts2[i]; + } + + // Line test - test all line combos for intersection + int num = 0; // number of intersections + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + // Solve for 2x2 Ax=b + T det = cross_2d(vec2[j], vec1[i]); + + // This takes care of parallel lines + if (fabs(det) <= 1e-14) { + continue; + } + + auto vec12 = pts2[j] - pts1[i]; + + T t1 = cross_2d(vec2[j], vec12) / det; + T t2 = cross_2d(vec1[i], vec12) / det; + + if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) { + intersections[num++] = pts1[i] + vec1[i] * t1; + } + } + } + + // Check for vertices of rect1 inside rect2 + { + const auto& AB = vec2[0]; + const auto& DA = vec2[3]; + auto ABdotAB = dot_2d(AB, AB); + auto ADdotAD = dot_2d(DA, DA); + for (int i = 0; i < 4; i++) { + // assume ABCD is the rectangle, and P is the point to be judged + // P is inside ABCD iff. P's projection on AB lies within AB + // and P's projection on AD lies within AD + + auto AP = pts1[i] - pts2[0]; + + auto APdotAB = dot_2d(AP, AB); + auto APdotAD = -dot_2d(AP, DA); + + if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && + (APdotAD <= ADdotAD)) { + intersections[num++] = pts1[i]; + } + } + } + + // Reverse the check - check for vertices of rect2 inside rect1 + { + const auto& AB = vec1[0]; + const auto& DA = vec1[3]; + auto ABdotAB = dot_2d(AB, AB); + auto ADdotAD = dot_2d(DA, DA); + for (int i = 0; i < 4; i++) { + auto AP = pts2[i] - pts1[0]; + + auto APdotAB = dot_2d(AP, AB); + auto APdotAD = -dot_2d(AP, DA); + + if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && + (APdotAD <= ADdotAD)) { + intersections[num++] = pts2[i]; + } + } + } + + return num; +} + +template +HOST_DEVICE_INLINE int convex_hull_graham( + const Point (&p)[24], + const int& num_in, + Point (&q)[24], + bool shift_to_zero = false) { + assert(num_in >= 2); + + // Step 1: + // Find point with minimum y + // if more than 1 points have the same minimum y, + // pick the one with the minimum x. + int t = 0; + for (int i = 1; i < num_in; i++) { + if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) { + t = i; + } + } + auto& start = p[t]; // starting point + + // Step 2: + // Subtract starting point from every points (for sorting in the next step) + for (int i = 0; i < num_in; i++) { + q[i] = p[i] - start; + } + + // Swap the starting point to position 0 + auto tmp = q[0]; + q[0] = q[t]; + q[t] = tmp; + + // Step 3: + // Sort point 1 ~ num_in according to their relative cross-product values + // (essentially sorting according to angles) + // If the angles are the same, sort according to their distance to origin + T dist[24]; +#if defined(__CUDACC__) || __HCC__ == 1 || __HIP__ == 1 + // compute distance to origin before sort, and sort them together with the + // points + for (int i = 0; i < num_in; i++) { + dist[i] = dot_2d(q[i], q[i]); + } + + // CUDA version + // In the future, we can potentially use thrust + // for sorting here to improve speed (though not guaranteed) + for (int i = 1; i < num_in - 1; i++) { + for (int j = i + 1; j < num_in; j++) { + T crossProduct = cross_2d(q[i], q[j]); + if ((crossProduct < -1e-6) || + (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) { + auto q_tmp = q[i]; + q[i] = q[j]; + q[j] = q_tmp; + auto dist_tmp = dist[i]; + dist[i] = dist[j]; + dist[j] = dist_tmp; + } + } + } +#else + // CPU version + std::sort( + q + 1, q + num_in, [](const Point& A, const Point& B) -> bool { + T temp = cross_2d(A, B); + if (fabs(temp) < 1e-6) { + return dot_2d(A, A) < dot_2d(B, B); + } else { + return temp > 0; + } + }); + // compute distance to origin after sort, since the points are now different. + for (int i = 0; i < num_in; i++) { + dist[i] = dot_2d(q[i], q[i]); + } +#endif + + // Step 4: + // Make sure there are at least 2 points (that don't overlap with each other) + // in the stack + int k; // index of the non-overlapped second point + for (k = 1; k < num_in; k++) { + if (dist[k] > 1e-8) { + break; + } + } + if (k == num_in) { + // We reach the end, which means the convex hull is just one point + q[0] = p[t]; + return 1; + } + q[1] = q[k]; + int m = 2; // 2 points in the stack + // Step 5: + // Finally we can start the scanning process. + // When a non-convex relationship between the 3 points is found + // (either concave shape or duplicated points), + // we pop the previous point from the stack + // until the 3-point relationship is convex again, or + // until the stack only contains two points + for (int i = k + 1; i < num_in; i++) { + while (m > 1) { + auto q1 = q[i] - q[m - 2], q2 = q[m - 1] - q[m - 2]; + // cross_2d() uses FMA and therefore computes round(round(q1.x*q2.y) - + // q2.x*q1.y) So it may not return 0 even when q1==q2. Therefore we + // compare round(q1.x*q2.y) and round(q2.x*q1.y) directly. (round means + // round to nearest floating point). + if (q1.x * q2.y >= q2.x * q1.y) + m--; + else + break; + } + // Using double also helps, but float can solve the issue for now. + // while (m > 1 && cross_2d(q[i] - q[m - 2], q[m - 1] - q[m - 2]) + // >= 0) { + // m--; + // } + q[m++] = q[i]; + } + + // Step 6 (Optional): + // In general sense we need the original coordinates, so we + // need to shift the points back (reverting Step 2) + // But if we're only interested in getting the area/perimeter of the shape + // We can simply return. + if (!shift_to_zero) { + for (int i = 0; i < m; i++) { + q[i] += start; + } + } + + return m; +} + +template +HOST_DEVICE_INLINE T polygon_area(const Point (&q)[24], const int& m) { + if (m <= 2) { + return 0; + } + + T area = 0; + for (int i = 1; i < m - 1; i++) { + area += fabs(cross_2d(q[i] - q[0], q[i + 1] - q[0])); + } + + return area / 2.0; +} + +template +HOST_DEVICE_INLINE T rotated_boxes_intersection( + const RotatedBox& box1, + const RotatedBox& box2) { + // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned + // from rotated_rect_intersection_pts + Point intersectPts[24], orderedPts[24]; + + Point pts1[4]; + Point pts2[4]; + get_rotated_vertices(box1, pts1); + get_rotated_vertices(box2, pts2); + + int num = get_intersection_points(pts1, pts2, intersectPts); + + if (num <= 2) { + return 0.0; + } + + // Convex Hull to order the intersection points in clockwise order and find + // the contour area. + int num_convex = convex_hull_graham(intersectPts, num, orderedPts, true); + return polygon_area(orderedPts, num_convex); +} + +} // namespace + +template +HOST_DEVICE_INLINE T +single_box_iou_rotated(T const* const box1_raw, T const* const box2_raw) { + // shift center to the middle point to achieve higher precision in result + RotatedBox box1, box2; + auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0; + auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0; + box1.x_ctr = box1_raw[0] - center_shift_x; + box1.y_ctr = box1_raw[1] - center_shift_y; + box1.w = box1_raw[2]; + box1.h = box1_raw[3]; + box1.a = box1_raw[4]; + box2.x_ctr = box2_raw[0] - center_shift_x; + box2.y_ctr = box2_raw[1] - center_shift_y; + box2.w = box2_raw[2]; + box2.h = box2_raw[3]; + box2.a = box2_raw[4]; + + T area1 = box1.w * box1.h; + T area2 = box2.w * box2.h; + if (area1 < 1e-14 || area2 < 1e-14) { + return 0.f; + } + + T intersection = rotated_boxes_intersection(box1, box2); + T iou = intersection / (area1 + area2 - intersection); + return iou; +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/cocoeval/cocoeval.cpp b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/cocoeval/cocoeval.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c6ce92c9d4b75be7285a6b8e21363d30fe02a92e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/cocoeval/cocoeval.cpp @@ -0,0 +1,501 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#include "cocoeval.h" +#include +#include +#include + +using namespace pybind11::literals; + +namespace detectron2 { + +namespace COCOeval { + +// Sort detections from highest score to lowest, such that +// detection_instances[detection_sorted_indices[t]] >= +// detection_instances[detection_sorted_indices[t+1]]. Use stable_sort to match +// original COCO API +void SortInstancesByDetectionScore( + const std::vector& detection_instances, + std::vector* detection_sorted_indices) { + detection_sorted_indices->resize(detection_instances.size()); + std::iota( + detection_sorted_indices->begin(), detection_sorted_indices->end(), 0); + std::stable_sort( + detection_sorted_indices->begin(), + detection_sorted_indices->end(), + [&detection_instances](size_t j1, size_t j2) { + return detection_instances[j1].score > detection_instances[j2].score; + }); +} + +// Partition the ground truth objects based on whether or not to ignore them +// based on area +void SortInstancesByIgnore( + const std::array& area_range, + const std::vector& ground_truth_instances, + std::vector* ground_truth_sorted_indices, + std::vector* ignores) { + ignores->clear(); + ignores->reserve(ground_truth_instances.size()); + for (auto o : ground_truth_instances) { + ignores->push_back( + o.ignore || o.area < area_range[0] || o.area > area_range[1]); + } + + ground_truth_sorted_indices->resize(ground_truth_instances.size()); + std::iota( + ground_truth_sorted_indices->begin(), + ground_truth_sorted_indices->end(), + 0); + std::stable_sort( + ground_truth_sorted_indices->begin(), + ground_truth_sorted_indices->end(), + [&ignores](size_t j1, size_t j2) { + return (int)(*ignores)[j1] < (int)(*ignores)[j2]; + }); +} + +// For each IOU threshold, greedily match each detected instance to a ground +// truth instance (if possible) and store the results +void MatchDetectionsToGroundTruth( + const std::vector& detection_instances, + const std::vector& detection_sorted_indices, + const std::vector& ground_truth_instances, + const std::vector& ground_truth_sorted_indices, + const std::vector& ignores, + const std::vector>& ious, + const std::vector& iou_thresholds, + const std::array& area_range, + ImageEvaluation* results) { + // Initialize memory to store return data matches and ignore + const int num_iou_thresholds = iou_thresholds.size(); + const int num_ground_truth = ground_truth_sorted_indices.size(); + const int num_detections = detection_sorted_indices.size(); + std::vector ground_truth_matches( + num_iou_thresholds * num_ground_truth, 0); + std::vector& detection_matches = results->detection_matches; + std::vector& detection_ignores = results->detection_ignores; + std::vector& ground_truth_ignores = results->ground_truth_ignores; + detection_matches.resize(num_iou_thresholds * num_detections, 0); + detection_ignores.resize(num_iou_thresholds * num_detections, false); + ground_truth_ignores.resize(num_ground_truth); + for (auto g = 0; g < num_ground_truth; ++g) { + ground_truth_ignores[g] = ignores[ground_truth_sorted_indices[g]]; + } + + for (auto t = 0; t < num_iou_thresholds; ++t) { + for (auto d = 0; d < num_detections; ++d) { + // information about best match so far (match=-1 -> unmatched) + double best_iou = std::min(iou_thresholds[t], 1 - 1e-10); + int match = -1; + for (auto g = 0; g < num_ground_truth; ++g) { + // if this ground truth instance is already matched and not a + // crowd, it cannot be matched to another detection + if (ground_truth_matches[t * num_ground_truth + g] > 0 && + !ground_truth_instances[ground_truth_sorted_indices[g]].is_crowd) { + continue; + } + + // if detected instance matched to a regular ground truth + // instance, we can break on the first ground truth instance + // tagged as ignore (because they are sorted by the ignore tag) + if (match >= 0 && !ground_truth_ignores[match] && + ground_truth_ignores[g]) { + break; + } + + // if IOU overlap is the best so far, store the match appropriately + if (ious[d][ground_truth_sorted_indices[g]] >= best_iou) { + best_iou = ious[d][ground_truth_sorted_indices[g]]; + match = g; + } + } + // if match was made, store id of match for both detection and + // ground truth + if (match >= 0) { + detection_ignores[t * num_detections + d] = ground_truth_ignores[match]; + detection_matches[t * num_detections + d] = + ground_truth_instances[ground_truth_sorted_indices[match]].id; + ground_truth_matches[t * num_ground_truth + match] = + detection_instances[detection_sorted_indices[d]].id; + } + + // set unmatched detections outside of area range to ignore + const InstanceAnnotation& detection = + detection_instances[detection_sorted_indices[d]]; + detection_ignores[t * num_detections + d] = + detection_ignores[t * num_detections + d] || + (detection_matches[t * num_detections + d] == 0 && + (detection.area < area_range[0] || detection.area > area_range[1])); + } + } + + // store detection score results + results->detection_scores.resize(detection_sorted_indices.size()); + for (size_t d = 0; d < detection_sorted_indices.size(); ++d) { + results->detection_scores[d] = + detection_instances[detection_sorted_indices[d]].score; + } +} + +std::vector EvaluateImages( + const std::vector>& area_ranges, + int max_detections, + const std::vector& iou_thresholds, + const ImageCategoryInstances>& image_category_ious, + const ImageCategoryInstances& + image_category_ground_truth_instances, + const ImageCategoryInstances& + image_category_detection_instances) { + const int num_area_ranges = area_ranges.size(); + const int num_images = image_category_ground_truth_instances.size(); + const int num_categories = + image_category_ious.size() > 0 ? image_category_ious[0].size() : 0; + std::vector detection_sorted_indices; + std::vector ground_truth_sorted_indices; + std::vector ignores; + std::vector results_all( + num_images * num_area_ranges * num_categories); + + // Store results for each image, category, and area range combination. Results + // for each IOU threshold are packed into the same ImageEvaluation object + for (auto i = 0; i < num_images; ++i) { + for (auto c = 0; c < num_categories; ++c) { + const std::vector& ground_truth_instances = + image_category_ground_truth_instances[i][c]; + const std::vector& detection_instances = + image_category_detection_instances[i][c]; + + SortInstancesByDetectionScore( + detection_instances, &detection_sorted_indices); + if ((int)detection_sorted_indices.size() > max_detections) { + detection_sorted_indices.resize(max_detections); + } + + for (size_t a = 0; a < area_ranges.size(); ++a) { + SortInstancesByIgnore( + area_ranges[a], + ground_truth_instances, + &ground_truth_sorted_indices, + &ignores); + + MatchDetectionsToGroundTruth( + detection_instances, + detection_sorted_indices, + ground_truth_instances, + ground_truth_sorted_indices, + ignores, + image_category_ious[i][c], + iou_thresholds, + area_ranges[a], + &results_all + [c * num_area_ranges * num_images + a * num_images + i]); + } + } + } + + return results_all; +} + +// Convert a python list to a vector +template +std::vector list_to_vec(const py::list& l) { + std::vector v(py::len(l)); + for (int i = 0; i < (int)py::len(l); ++i) { + v[i] = l[i].cast(); + } + return v; +} + +// Helper function to Accumulate() +// Considers the evaluation results applicable to a particular category, area +// range, and max_detections parameter setting, which begin at +// evaluations[evaluation_index]. Extracts a sorted list of length n of all +// applicable detection instances concatenated across all images in the dataset, +// which are represented by the outputs evaluation_indices, detection_scores, +// image_detection_indices, and detection_sorted_indices--all of which are +// length n. evaluation_indices[i] stores the applicable index into +// evaluations[] for instance i, which has detection score detection_score[i], +// and is the image_detection_indices[i]'th of the list of detections +// for the image containing i. detection_sorted_indices[] defines a sorted +// permutation of the 3 other outputs +int BuildSortedDetectionList( + const std::vector& evaluations, + const int64_t evaluation_index, + const int64_t num_images, + const int max_detections, + std::vector* evaluation_indices, + std::vector* detection_scores, + std::vector* detection_sorted_indices, + std::vector* image_detection_indices) { + assert(evaluations.size() >= evaluation_index + num_images); + + // Extract a list of object instances of the applicable category, area + // range, and max detections requirements such that they can be sorted + image_detection_indices->clear(); + evaluation_indices->clear(); + detection_scores->clear(); + image_detection_indices->reserve(num_images * max_detections); + evaluation_indices->reserve(num_images * max_detections); + detection_scores->reserve(num_images * max_detections); + int num_valid_ground_truth = 0; + for (auto i = 0; i < num_images; ++i) { + const ImageEvaluation& evaluation = evaluations[evaluation_index + i]; + + for (int d = 0; + d < (int)evaluation.detection_scores.size() && d < max_detections; + ++d) { // detected instances + evaluation_indices->push_back(evaluation_index + i); + image_detection_indices->push_back(d); + detection_scores->push_back(evaluation.detection_scores[d]); + } + for (auto ground_truth_ignore : evaluation.ground_truth_ignores) { + if (!ground_truth_ignore) { + ++num_valid_ground_truth; + } + } + } + + // Sort detections by decreasing score, using stable sort to match + // python implementation + detection_sorted_indices->resize(detection_scores->size()); + std::iota( + detection_sorted_indices->begin(), detection_sorted_indices->end(), 0); + std::stable_sort( + detection_sorted_indices->begin(), + detection_sorted_indices->end(), + [&detection_scores](size_t j1, size_t j2) { + return (*detection_scores)[j1] > (*detection_scores)[j2]; + }); + + return num_valid_ground_truth; +} + +// Helper function to Accumulate() +// Compute a precision recall curve given a sorted list of detected instances +// encoded in evaluations, evaluation_indices, detection_scores, +// detection_sorted_indices, image_detection_indices (see +// BuildSortedDetectionList()). Using vectors precisions and recalls +// and temporary storage, output the results into precisions_out, recalls_out, +// and scores_out, which are large buffers containing many precion/recall curves +// for all possible parameter settings, with precisions_out_index and +// recalls_out_index defining the applicable indices to store results. +void ComputePrecisionRecallCurve( + const int64_t precisions_out_index, + const int64_t precisions_out_stride, + const int64_t recalls_out_index, + const std::vector& recall_thresholds, + const int iou_threshold_index, + const int num_iou_thresholds, + const int num_valid_ground_truth, + const std::vector& evaluations, + const std::vector& evaluation_indices, + const std::vector& detection_scores, + const std::vector& detection_sorted_indices, + const std::vector& image_detection_indices, + std::vector* precisions, + std::vector* recalls, + std::vector* precisions_out, + std::vector* scores_out, + std::vector* recalls_out) { + assert(recalls_out->size() > recalls_out_index); + + // Compute precision/recall for each instance in the sorted list of detections + int64_t true_positives_sum = 0, false_positives_sum = 0; + precisions->clear(); + recalls->clear(); + precisions->reserve(detection_sorted_indices.size()); + recalls->reserve(detection_sorted_indices.size()); + assert(!evaluations.empty() || detection_sorted_indices.empty()); + for (auto detection_sorted_index : detection_sorted_indices) { + const ImageEvaluation& evaluation = + evaluations[evaluation_indices[detection_sorted_index]]; + const auto num_detections = + evaluation.detection_matches.size() / num_iou_thresholds; + const auto detection_index = iou_threshold_index * num_detections + + image_detection_indices[detection_sorted_index]; + assert(evaluation.detection_matches.size() > detection_index); + assert(evaluation.detection_ignores.size() > detection_index); + const int64_t detection_match = + evaluation.detection_matches[detection_index]; + const bool detection_ignores = + evaluation.detection_ignores[detection_index]; + const auto true_positive = detection_match > 0 && !detection_ignores; + const auto false_positive = detection_match == 0 && !detection_ignores; + if (true_positive) { + ++true_positives_sum; + } + if (false_positive) { + ++false_positives_sum; + } + + const double recall = + static_cast(true_positives_sum) / num_valid_ground_truth; + recalls->push_back(recall); + const int64_t num_valid_detections = + true_positives_sum + false_positives_sum; + const double precision = num_valid_detections > 0 + ? static_cast(true_positives_sum) / num_valid_detections + : 0.0; + precisions->push_back(precision); + } + + (*recalls_out)[recalls_out_index] = !recalls->empty() ? recalls->back() : 0; + + for (int64_t i = static_cast(precisions->size()) - 1; i > 0; --i) { + if ((*precisions)[i] > (*precisions)[i - 1]) { + (*precisions)[i - 1] = (*precisions)[i]; + } + } + + // Sample the per instance precision/recall list at each recall threshold + for (size_t r = 0; r < recall_thresholds.size(); ++r) { + // first index in recalls >= recall_thresholds[r] + std::vector::iterator low = std::lower_bound( + recalls->begin(), recalls->end(), recall_thresholds[r]); + size_t precisions_index = low - recalls->begin(); + + const auto results_ind = precisions_out_index + r * precisions_out_stride; + assert(results_ind < precisions_out->size()); + assert(results_ind < scores_out->size()); + if (precisions_index < precisions->size()) { + (*precisions_out)[results_ind] = (*precisions)[precisions_index]; + (*scores_out)[results_ind] = + detection_scores[detection_sorted_indices[precisions_index]]; + } else { + (*precisions_out)[results_ind] = 0; + (*scores_out)[results_ind] = 0; + } + } +} +py::dict Accumulate( + const py::object& params, + const std::vector& evaluations) { + const std::vector recall_thresholds = + list_to_vec(params.attr("recThrs")); + const std::vector max_detections = + list_to_vec(params.attr("maxDets")); + const int num_iou_thresholds = py::len(params.attr("iouThrs")); + const int num_recall_thresholds = py::len(params.attr("recThrs")); + const int num_categories = params.attr("useCats").cast() == 1 + ? py::len(params.attr("catIds")) + : 1; + const int num_area_ranges = py::len(params.attr("areaRng")); + const int num_max_detections = py::len(params.attr("maxDets")); + const int num_images = py::len(params.attr("imgIds")); + + std::vector precisions_out( + num_iou_thresholds * num_recall_thresholds * num_categories * + num_area_ranges * num_max_detections, + -1); + std::vector recalls_out( + num_iou_thresholds * num_categories * num_area_ranges * + num_max_detections, + -1); + std::vector scores_out( + num_iou_thresholds * num_recall_thresholds * num_categories * + num_area_ranges * num_max_detections, + -1); + + // Consider the list of all detected instances in the entire dataset in one + // large list. evaluation_indices, detection_scores, + // image_detection_indices, and detection_sorted_indices all have the same + // length as this list, such that each entry corresponds to one detected + // instance + std::vector evaluation_indices; // indices into evaluations[] + std::vector detection_scores; // detection scores of each instance + std::vector detection_sorted_indices; // sorted indices of all + // instances in the dataset + std::vector + image_detection_indices; // indices into the list of detected instances in + // the same image as each instance + std::vector precisions, recalls; + + for (auto c = 0; c < num_categories; ++c) { + for (auto a = 0; a < num_area_ranges; ++a) { + for (auto m = 0; m < num_max_detections; ++m) { + // The COCO PythonAPI assumes evaluations[] (the return value of + // COCOeval::EvaluateImages() is one long list storing results for each + // combination of category, area range, and image id, with categories in + // the outermost loop and images in the innermost loop. + const int64_t evaluations_index = + c * num_area_ranges * num_images + a * num_images; + int num_valid_ground_truth = BuildSortedDetectionList( + evaluations, + evaluations_index, + num_images, + max_detections[m], + &evaluation_indices, + &detection_scores, + &detection_sorted_indices, + &image_detection_indices); + + if (num_valid_ground_truth == 0) { + continue; + } + + for (auto t = 0; t < num_iou_thresholds; ++t) { + // recalls_out is a flattened vectors representing a + // num_iou_thresholds X num_categories X num_area_ranges X + // num_max_detections matrix + const int64_t recalls_out_index = + t * num_categories * num_area_ranges * num_max_detections + + c * num_area_ranges * num_max_detections + + a * num_max_detections + m; + + // precisions_out and scores_out are flattened vectors + // representing a num_iou_thresholds X num_recall_thresholds X + // num_categories X num_area_ranges X num_max_detections matrix + const int64_t precisions_out_stride = + num_categories * num_area_ranges * num_max_detections; + const int64_t precisions_out_index = t * num_recall_thresholds * + num_categories * num_area_ranges * num_max_detections + + c * num_area_ranges * num_max_detections + + a * num_max_detections + m; + + ComputePrecisionRecallCurve( + precisions_out_index, + precisions_out_stride, + recalls_out_index, + recall_thresholds, + t, + num_iou_thresholds, + num_valid_ground_truth, + evaluations, + evaluation_indices, + detection_scores, + detection_sorted_indices, + image_detection_indices, + &precisions, + &recalls, + &precisions_out, + &scores_out, + &recalls_out); + } + } + } + } + + time_t rawtime; + struct tm local_time; + std::array buffer; + time(&rawtime); + localtime_r(&rawtime, &local_time); + strftime( + buffer.data(), 200, "%Y-%m-%d %H:%num_max_detections:%S", &local_time); + return py::dict( + "params"_a = params, + "counts"_a = std::vector({num_iou_thresholds, + num_recall_thresholds, + num_categories, + num_area_ranges, + num_max_detections}), + "date"_a = buffer, + "precision"_a = precisions_out, + "recall"_a = recalls_out, + "scores"_a = scores_out); +} + +} // namespace COCOeval + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/cocoeval/cocoeval.h b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/cocoeval/cocoeval.h new file mode 100644 index 0000000000000000000000000000000000000000..396c5335037fff15b30ea9832bcbd5b48d6f822b --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/cocoeval/cocoeval.h @@ -0,0 +1,88 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#pragma once + +#include +#include +#include +#include +#include + +namespace py = pybind11; + +namespace detectron2 { + +namespace COCOeval { + +// Annotation data for a single object instance in an image +struct InstanceAnnotation { + InstanceAnnotation( + uint64_t id, + double score, + double area, + bool is_crowd, + bool ignore) + : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {} + uint64_t id; + double score = 0.; + double area = 0.; + bool is_crowd = false; + bool ignore = false; +}; + +// Stores intermediate results for evaluating detection results for a single +// image that has D detected instances and G ground truth instances. This stores +// matches between detected and ground truth instances +struct ImageEvaluation { + // For each of the D detected instances, the id of the matched ground truth + // instance, or 0 if unmatched + std::vector detection_matches; + + // The detection score of each of the D detected instances + std::vector detection_scores; + + // Marks whether or not each of G instances was ignored from evaluation (e.g., + // because it's outside area_range) + std::vector ground_truth_ignores; + + // Marks whether or not each of D instances was ignored from evaluation (e.g., + // because it's outside aRng) + std::vector detection_ignores; +}; + +template +using ImageCategoryInstances = std::vector>>; + +// C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg(). For each +// combination of image, category, area range settings, and IOU thresholds to +// evaluate, it matches detected instances to ground truth instances and stores +// the results into a vector of ImageEvaluation results, which will be +// interpreted by the COCOeval::Accumulate() function to produce precion-recall +// curves. The parameters of nested vectors have the following semantics: +// image_category_ious[i][c][d][g] is the intersection over union of the d'th +// detected instance and g'th ground truth instance of +// category category_ids[c] in image image_ids[i] +// image_category_ground_truth_instances[i][c] is a vector of ground truth +// instances in image image_ids[i] of category category_ids[c] +// image_category_detection_instances[i][c] is a vector of detected +// instances in image image_ids[i] of category category_ids[c] +std::vector EvaluateImages( + const std::vector>& area_ranges, // vector of 2-tuples + int max_detections, + const std::vector& iou_thresholds, + const ImageCategoryInstances>& image_category_ious, + const ImageCategoryInstances& + image_category_ground_truth_instances, + const ImageCategoryInstances& + image_category_detection_instances); + +// C++ implementation of COCOeval.accumulate(), which generates precision +// recall curves for each set of category, IOU threshold, detection area range, +// and max number of detections parameters. It is assumed that the parameter +// evaluations is the return value of the functon COCOeval::EvaluateImages(), +// which was called with the same parameter settings params +py::dict Accumulate( + const py::object& params, + const std::vector& evalutations); + +} // namespace COCOeval +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/cuda_version.cu b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/cuda_version.cu new file mode 100644 index 0000000000000000000000000000000000000000..1153a74f476db65a540a2848c4728df59651dfab --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/cuda_version.cu @@ -0,0 +1,26 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. + +#include + +namespace detectron2 { +int get_cudart_version() { +// Not a ROCM platform: Either HIP is not used, or +// it is used, but platform is not ROCM (i.e. it is CUDA) +#if !defined(__HIP_PLATFORM_HCC__) + return CUDART_VERSION; +#else + int version = 0; + +#if HIP_VERSION_MAJOR != 0 + // Create a convention similar to that of CUDA, as assumed by other + // parts of the code. + + version = HIP_VERSION_MINOR; + version += (HIP_VERSION_MAJOR * 100); +#else + hipRuntimeGetVersion(&version); +#endif + return version; +#endif +} +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/deformable/deform_conv.h b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/deformable/deform_conv.h new file mode 100644 index 0000000000000000000000000000000000000000..4e49a56a9a77b325b5a969ea823c3372f41fee29 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/deformable/deform_conv.h @@ -0,0 +1,377 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#pragma once +#include + +namespace detectron2 { + +#if defined(WITH_CUDA) || defined(WITH_HIP) +int deform_conv_forward_cuda( + at::Tensor input, + at::Tensor weight, + at::Tensor offset, + at::Tensor output, + at::Tensor columns, + at::Tensor ones, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + int im2col_step); + +int deform_conv_backward_input_cuda( + at::Tensor input, + at::Tensor offset, + at::Tensor gradOutput, + at::Tensor gradInput, + at::Tensor gradOffset, + at::Tensor weight, + at::Tensor columns, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + int im2col_step); + +int deform_conv_backward_parameters_cuda( + at::Tensor input, + at::Tensor offset, + at::Tensor gradOutput, + at::Tensor gradWeight, // at::Tensor gradBias, + at::Tensor columns, + at::Tensor ones, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + float scale, + int im2col_step); + +void modulated_deform_conv_cuda_forward( + at::Tensor input, + at::Tensor weight, + at::Tensor bias, + at::Tensor ones, + at::Tensor offset, + at::Tensor mask, + at::Tensor output, + at::Tensor columns, + int kernel_h, + int kernel_w, + const int stride_h, + const int stride_w, + const int pad_h, + const int pad_w, + const int dilation_h, + const int dilation_w, + const int group, + const int deformable_group, + const bool with_bias); + +void modulated_deform_conv_cuda_backward( + at::Tensor input, + at::Tensor weight, + at::Tensor bias, + at::Tensor ones, + at::Tensor offset, + at::Tensor mask, + at::Tensor columns, + at::Tensor grad_input, + at::Tensor grad_weight, + at::Tensor grad_bias, + at::Tensor grad_offset, + at::Tensor grad_mask, + at::Tensor grad_output, + int kernel_h, + int kernel_w, + int stride_h, + int stride_w, + int pad_h, + int pad_w, + int dilation_h, + int dilation_w, + int group, + int deformable_group, + const bool with_bias); + +#endif + +inline int deform_conv_forward( + at::Tensor input, + at::Tensor weight, + at::Tensor offset, + at::Tensor output, + at::Tensor columns, + at::Tensor ones, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + int im2col_step) { + if (input.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); + TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); + return deform_conv_forward_cuda( + input, + weight, + offset, + output, + columns, + ones, + kW, + kH, + dW, + dH, + padW, + padH, + dilationW, + dilationH, + group, + deformable_group, + im2col_step); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + +inline int deform_conv_backward_input( + at::Tensor input, + at::Tensor offset, + at::Tensor gradOutput, + at::Tensor gradInput, + at::Tensor gradOffset, + at::Tensor weight, + at::Tensor columns, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + int im2col_step) { + if (gradOutput.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!"); + TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); + TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); + return deform_conv_backward_input_cuda( + input, + offset, + gradOutput, + gradInput, + gradOffset, + weight, + columns, + kW, + kH, + dW, + dH, + padW, + padH, + dilationW, + dilationH, + group, + deformable_group, + im2col_step); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + +inline int deform_conv_backward_filter( + at::Tensor input, + at::Tensor offset, + at::Tensor gradOutput, + at::Tensor gradWeight, // at::Tensor gradBias, + at::Tensor columns, + at::Tensor ones, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + float scale, + int im2col_step) { + if (gradOutput.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!"); + TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); + return deform_conv_backward_parameters_cuda( + input, + offset, + gradOutput, + gradWeight, + columns, + ones, + kW, + kH, + dW, + dH, + padW, + padH, + dilationW, + dilationH, + group, + deformable_group, + scale, + im2col_step); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + +inline void modulated_deform_conv_forward( + at::Tensor input, + at::Tensor weight, + at::Tensor bias, + at::Tensor ones, + at::Tensor offset, + at::Tensor mask, + at::Tensor output, + at::Tensor columns, + int kernel_h, + int kernel_w, + const int stride_h, + const int stride_w, + const int pad_h, + const int pad_w, + const int dilation_h, + const int dilation_w, + const int group, + const int deformable_group, + const bool with_bias) { + if (input.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); + TORCH_CHECK(bias.is_cuda(), "bias tensor is not on GPU!"); + TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); + return modulated_deform_conv_cuda_forward( + input, + weight, + bias, + ones, + offset, + mask, + output, + columns, + kernel_h, + kernel_w, + stride_h, + stride_w, + pad_h, + pad_w, + dilation_h, + dilation_w, + group, + deformable_group, + with_bias); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + +inline void modulated_deform_conv_backward( + at::Tensor input, + at::Tensor weight, + at::Tensor bias, + at::Tensor ones, + at::Tensor offset, + at::Tensor mask, + at::Tensor columns, + at::Tensor grad_input, + at::Tensor grad_weight, + at::Tensor grad_bias, + at::Tensor grad_offset, + at::Tensor grad_mask, + at::Tensor grad_output, + int kernel_h, + int kernel_w, + int stride_h, + int stride_w, + int pad_h, + int pad_w, + int dilation_h, + int dilation_w, + int group, + int deformable_group, + const bool with_bias) { + if (grad_output.is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!"); + TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); + TORCH_CHECK(bias.is_cuda(), "bias tensor is not on GPU!"); + TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); + return modulated_deform_conv_cuda_backward( + input, + weight, + bias, + ones, + offset, + mask, + columns, + grad_input, + grad_weight, + grad_bias, + grad_offset, + grad_mask, + grad_output, + kernel_h, + kernel_w, + stride_h, + stride_w, + pad_h, + pad_w, + dilation_h, + dilation_w, + group, + deformable_group, + with_bias); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + AT_ERROR("Not implemented on the CPU"); +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda.cu b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..5376db0cc4d93e245cfc9fea0f3b5715a1f88db2 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda.cu @@ -0,0 +1,1131 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +// modified from +// https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp +// Original license: Apache 2.0 + +// modify from +// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c +// Original license: Apache 2.0 + +#include + +#include "deform_conv.h" + +#include +#include + +namespace detectron2 { + +void deformable_im2col( + const at::Tensor data_im, + const at::Tensor data_offset, + const int channels, + const int height, + const int width, + const int ksize_h, + const int ksize_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int parallel_imgs, + const int deformable_group, + at::Tensor data_col); + +void deformable_col2im( + const at::Tensor data_col, + const at::Tensor data_offset, + const int channels, + const int height, + const int width, + const int ksize_h, + const int ksize_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int parallel_imgs, + const int deformable_group, + at::Tensor grad_im); + +void deformable_col2im_coord( + const at::Tensor data_col, + const at::Tensor data_im, + const at::Tensor data_offset, + const int channels, + const int height, + const int width, + const int ksize_h, + const int ksize_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int parallel_imgs, + const int deformable_group, + at::Tensor grad_offset); + +void modulated_deformable_im2col_cuda( + const at::Tensor data_im, + const at::Tensor data_offset, + const at::Tensor data_mask, + const int batch_size, + const int channels, + const int height_im, + const int width_im, + const int height_col, + const int width_col, + const int kernel_h, + const int kenerl_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int deformable_group, + at::Tensor data_col); + +void modulated_deformable_col2im_cuda( + const at::Tensor data_col, + const at::Tensor data_offset, + const at::Tensor data_mask, + const int batch_size, + const int channels, + const int height_im, + const int width_im, + const int height_col, + const int width_col, + const int kernel_h, + const int kenerl_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int deformable_group, + at::Tensor grad_im); + +void modulated_deformable_col2im_coord_cuda( + const at::Tensor data_col, + const at::Tensor data_im, + const at::Tensor data_offset, + const at::Tensor data_mask, + const int batch_size, + const int channels, + const int height_im, + const int width_im, + const int height_col, + const int width_col, + const int kernel_h, + const int kenerl_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int deformable_group, + at::Tensor grad_offset, + at::Tensor grad_mask); + +void shape_check( + at::Tensor input, + at::Tensor offset, + at::Tensor* gradOutput, + at::Tensor weight, + int kH, + int kW, + int dH, + int dW, + int padH, + int padW, + int dilationH, + int dilationW, + int group, + int deformable_group) { + TORCH_CHECK( + weight.ndimension() == 4, + "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " + "but got: %s", + weight.ndimension()); + + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + + TORCH_CHECK( + kW > 0 && kH > 0, + "kernel size should be greater than zero, but got kH: %d kW: %d", + kH, + kW); + + TORCH_CHECK( + (weight.size(2) == kH && weight.size(3) == kW), + "kernel size should be consistent with weight, ", + "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", + kH, + kW, + weight.size(2), + weight.size(3)); + + TORCH_CHECK( + dW > 0 && dH > 0, + "stride should be greater than zero, but got dH: %d dW: %d", + dH, + dW); + + TORCH_CHECK( + dilationW > 0 && dilationH > 0, + "dilation should be greater than 0, but got dilationH: %d dilationW: %d", + dilationH, + dilationW); + + int ndim = input.ndimension(); + int dimf = 0; + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimf++; + dimh++; + dimw++; + } + + TORCH_CHECK( + ndim == 3 || ndim == 4, + "3D or 4D input tensor expected but got: %s", + ndim); + + long nInputPlane = weight.size(1) * group; + long inputHeight = input.size(dimh); + long inputWidth = input.size(dimw); + long nOutputPlane = weight.size(0); + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + + TORCH_CHECK( + nInputPlane % deformable_group == 0, + "input channels must divide deformable group size"); + + if (outputWidth < 1 || outputHeight < 1) + AT_ERROR( + "Given input size: (%ld x %ld x %ld). " + "Calculated output size: (%ld x %ld x %ld). Output size is too small", + nInputPlane, + inputHeight, + inputWidth, + nOutputPlane, + outputHeight, + outputWidth); + + TORCH_CHECK( + input.size(1) == nInputPlane, + "invalid number of input planes, expected: %d, but got: %d", + nInputPlane, + input.size(1)); + + TORCH_CHECK( + (inputHeight >= kH && inputWidth >= kW), + "input image is smaller than kernel"); + + TORCH_CHECK( + (offset.size(2) == outputHeight && offset.size(3) == outputWidth), + "invalid spatial size of offset, expected height: %d width: %d, but " + "got height: %d width: %d", + outputHeight, + outputWidth, + offset.size(2), + offset.size(3)); + + TORCH_CHECK( + (offset.size(1) == deformable_group * 2 * kH * kW), + "invalid number of channels of offset"); + + if (gradOutput != NULL) { + TORCH_CHECK( + gradOutput->size(dimf) == nOutputPlane, + "invalid number of gradOutput planes, expected: %d, but got: %d", + nOutputPlane, + gradOutput->size(dimf)); + + TORCH_CHECK( + (gradOutput->size(dimh) == outputHeight && + gradOutput->size(dimw) == outputWidth), + "invalid size of gradOutput, expected height: %d width: %d , but " + "got height: %d width: %d", + outputHeight, + outputWidth, + gradOutput->size(dimh), + gradOutput->size(dimw)); + } +} + +int deform_conv_forward_cuda( + at::Tensor input, + at::Tensor weight, + at::Tensor offset, + at::Tensor output, + at::Tensor columns, + at::Tensor ones, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + int im2col_step) { + // todo: resize columns to include im2col: done + // todo: add im2col_step as input + // todo: add new output buffer and transpose it to output (or directly + // transpose output) todo: possibly change data indexing because of + // parallel_imgs + + shape_check( + input, + offset, + NULL, + weight, + kH, + kW, + dH, + dW, + padH, + padW, + dilationH, + dilationW, + group, + deformable_group); + + input = input.contiguous(); + offset = offset.contiguous(); + weight = weight.contiguous(); + + int batch = 1; + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input.unsqueeze_(0); + offset.unsqueeze_(0); + } + + // todo: assert batchsize dividable by im2col_step + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = weight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + + output = output.view({batchSize / im2col_step, + im2col_step, + nOutputPlane, + outputHeight, + outputWidth}); + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < outputHeight * outputWidth) { + ones = at::ones({outputHeight, outputWidth}, input.options()); + } + + input = input.view({batchSize / im2col_step, + im2col_step, + nInputPlane, + inputHeight, + inputWidth}); + offset = offset.view({batchSize / im2col_step, + im2col_step, + deformable_group * 2 * kH * kW, + outputHeight, + outputWidth}); + + at::Tensor output_buffer = at::zeros( + {batchSize / im2col_step, + nOutputPlane, + im2col_step * outputHeight, + outputWidth}, + output.options()); + + output_buffer = output_buffer.view({output_buffer.size(0), + group, + output_buffer.size(1) / group, + output_buffer.size(2), + output_buffer.size(3)}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + deformable_im2col( + input[elt], + offset[elt], + nInputPlane, + inputHeight, + inputWidth, + kH, + kW, + padH, + padW, + dH, + dW, + dilationH, + dilationW, + im2col_step, + deformable_group, + columns); + + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view({group, + weight.size(0) / group, + weight.size(1), + weight.size(2), + weight.size(3)}); + + for (int g = 0; g < group; g++) { + output_buffer[elt][g] = output_buffer[elt][g] + .flatten(1) + .addmm_(weight[g].flatten(1), columns[g]) + .view_as(output_buffer[elt][g]); + } + } + + output_buffer = + output_buffer.view({output_buffer.size(0), + output_buffer.size(1) * output_buffer.size(2), + output_buffer.size(3), + output_buffer.size(4)}); + + output_buffer = output_buffer.view({batchSize / im2col_step, + nOutputPlane, + im2col_step, + outputHeight, + outputWidth}); + output_buffer.transpose_(1, 2); + output.copy_(output_buffer); + output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + output = output.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); + } + + return 1; +} + +int deform_conv_backward_input_cuda( + at::Tensor input, + at::Tensor offset, + at::Tensor gradOutput, + at::Tensor gradInput, + at::Tensor gradOffset, + at::Tensor weight, + at::Tensor columns, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + int im2col_step) { + shape_check( + input, + offset, + &gradOutput, + weight, + kH, + kW, + dH, + dW, + padH, + padW, + dilationH, + dilationW, + group, + deformable_group); + + input = input.contiguous(); + offset = offset.contiguous(); + gradOutput = gradOutput.contiguous(); + weight = weight.contiguous(); + + int batch = 1; + + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input = input.view({1, input.size(0), input.size(1), input.size(2)}); + offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)}); + gradOutput = gradOutput.view( + {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); + } + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = weight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); + gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + // change order of grad output + gradOutput = gradOutput.view({batchSize / im2col_step, + im2col_step, + nOutputPlane, + outputHeight, + outputWidth}); + gradOutput.transpose_(1, 2); + + gradInput = gradInput.view({batchSize / im2col_step, + im2col_step, + nInputPlane, + inputHeight, + inputWidth}); + input = input.view({batchSize / im2col_step, + im2col_step, + nInputPlane, + inputHeight, + inputWidth}); + gradOffset = gradOffset.view({batchSize / im2col_step, + im2col_step, + deformable_group * 2 * kH * kW, + outputHeight, + outputWidth}); + offset = offset.view({batchSize / im2col_step, + im2col_step, + deformable_group * 2 * kH * kW, + outputHeight, + outputWidth}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + // divide into groups + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view({group, + weight.size(0) / group, + weight.size(1), + weight.size(2), + weight.size(3)}); + gradOutput = gradOutput.view({gradOutput.size(0), + group, + gradOutput.size(1) / group, + gradOutput.size(2), + gradOutput.size(3), + gradOutput.size(4)}); + + for (int g = 0; g < group; g++) { + columns[g] = columns[g].addmm_( + weight[g].flatten(1).transpose(0, 1), + gradOutput[elt][g].flatten(1), + 0.0f, + 1.0f); + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + gradOutput = gradOutput.view({gradOutput.size(0), + gradOutput.size(1) * gradOutput.size(2), + gradOutput.size(3), + gradOutput.size(4), + gradOutput.size(5)}); + + deformable_col2im_coord( + columns, + input[elt], + offset[elt], + nInputPlane, + inputHeight, + inputWidth, + kH, + kW, + padH, + padW, + dH, + dW, + dilationH, + dilationW, + im2col_step, + deformable_group, + gradOffset[elt]); + + deformable_col2im( + columns, + offset[elt], + nInputPlane, + inputHeight, + inputWidth, + kH, + kW, + padH, + padW, + dH, + dW, + dilationH, + dilationW, + im2col_step, + deformable_group, + gradInput[elt]); + } + + gradOutput.transpose_(1, 2); + gradOutput = + gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + gradOffset = gradOffset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth}); + offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); + gradOffset = + gradOffset.view({offset.size(1), offset.size(2), offset.size(3)}); + } + + return 1; +} + +int deform_conv_backward_parameters_cuda( + at::Tensor input, + at::Tensor offset, + at::Tensor gradOutput, + at::Tensor gradWeight, // at::Tensor gradBias, + at::Tensor columns, + at::Tensor ones, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + int group, + int deformable_group, + float scale, + int im2col_step) { + // todo: transpose and reshape outGrad + // todo: reshape columns + // todo: add im2col_step as input + + shape_check( + input, + offset, + &gradOutput, + gradWeight, + kH, + kW, + dH, + dW, + padH, + padW, + dilationH, + dilationW, + group, + deformable_group); + + input = input.contiguous(); + offset = offset.contiguous(); + gradOutput = gradOutput.contiguous(); + + int batch = 1; + + if (input.ndimension() == 3) { + // Force batch + batch = 0; + input = input.view( + at::IntList({1, input.size(0), input.size(1), input.size(2)})); + gradOutput = gradOutput.view( + {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); + } + + long batchSize = input.size(0); + long nInputPlane = input.size(1); + long inputHeight = input.size(2); + long inputWidth = input.size(3); + + long nOutputPlane = gradWeight.size(0); + + long outputWidth = + (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = + (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + + columns = at::zeros( + {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, + input.options()); + + gradOutput = gradOutput.view({batchSize / im2col_step, + im2col_step, + nOutputPlane, + outputHeight, + outputWidth}); + gradOutput.transpose_(1, 2); + + at::Tensor gradOutputBuffer = at::zeros_like(gradOutput); + gradOutputBuffer = gradOutputBuffer.view({batchSize / im2col_step, + nOutputPlane, + im2col_step, + outputHeight, + outputWidth}); + gradOutputBuffer.copy_(gradOutput); + // gradOutput is not contiguous, so we do reshape (instead of view) next + gradOutputBuffer = gradOutputBuffer.reshape({batchSize / im2col_step, + nOutputPlane, + im2col_step * outputHeight, + outputWidth}); + + gradOutput.transpose_(1, 2); + gradOutput = + gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); + + input = input.view({batchSize / im2col_step, + im2col_step, + nInputPlane, + inputHeight, + inputWidth}); + offset = offset.view({batchSize / im2col_step, + im2col_step, + deformable_group * 2 * kH * kW, + outputHeight, + outputWidth}); + + for (int elt = 0; elt < batchSize / im2col_step; elt++) { + deformable_im2col( + input[elt], + offset[elt], + nInputPlane, + inputHeight, + inputWidth, + kH, + kW, + padH, + padW, + dH, + dW, + dilationH, + dilationW, + im2col_step, + deformable_group, + columns); + + // divide into group + gradOutputBuffer = gradOutputBuffer.view({gradOutputBuffer.size(0), + group, + gradOutputBuffer.size(1) / group, + gradOutputBuffer.size(2), + gradOutputBuffer.size(3)}); + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + gradWeight = gradWeight.view({group, + gradWeight.size(0) / group, + gradWeight.size(1), + gradWeight.size(2), + gradWeight.size(3)}); + + for (int g = 0; g < group; g++) { + gradWeight[g] = gradWeight[g] + .flatten(1) + .addmm_( + gradOutputBuffer[elt][g].flatten(1), + columns[g].transpose(1, 0), + 1.0, + scale) + .view_as(gradWeight[g]); + } + gradOutputBuffer = gradOutputBuffer.view( + {gradOutputBuffer.size(0), + gradOutputBuffer.size(1) * gradOutputBuffer.size(2), + gradOutputBuffer.size(3), + gradOutputBuffer.size(4)}); + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1), + gradWeight.size(2), + gradWeight.size(3), + gradWeight.size(4)}); + } + + input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); + offset = offset.view( + {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); + + if (batch == 0) { + gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); + input = input.view({nInputPlane, inputHeight, inputWidth}); + } + + return 1; +} + +void modulated_deform_conv_cuda_forward( + at::Tensor input, + at::Tensor weight, + at::Tensor bias, + at::Tensor ones, + at::Tensor offset, + at::Tensor mask, + at::Tensor output, + at::Tensor columns, + int kernel_h, + int kernel_w, + const int stride_h, + const int stride_w, + const int pad_h, + const int pad_w, + const int dilation_h, + const int dilation_w, + const int group, + const int deformable_group, + const bool with_bias) { + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + + const int batch = input.size(0); + const int channels = input.size(1); + const int height = input.size(2); + const int width = input.size(3); + + const int channels_out = weight.size(0); + const int channels_kernel = weight.size(1); + const int kernel_h_ = weight.size(2); + const int kernel_w_ = weight.size(3); + + if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) + AT_ERROR( + "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", + kernel_h_, + kernel_w, + kernel_h_, + kernel_w_); + if (channels != channels_kernel * group) + AT_ERROR( + "Input shape and kernel channels wont match: (%d vs %d).", + channels, + channels_kernel * group); + + const int height_out = + (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_out = + (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < height_out * width_out) { + // Resize plane and fill with ones... + ones = at::ones({height_out, width_out}, input.options()); + } + + // resize output + output = output.view({batch, channels_out, height_out, width_out}).zero_(); + // resize temporary columns + columns = at::zeros( + {channels * kernel_h * kernel_w, 1 * height_out * width_out}, + input.options()); + + output = output.view({output.size(0), + group, + output.size(1) / group, + output.size(2), + output.size(3)}); + + for (int b = 0; b < batch; b++) { + modulated_deformable_im2col_cuda( + input[b], + offset[b], + mask[b], + 1, + channels, + height, + width, + height_out, + width_out, + kernel_h, + kernel_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + deformable_group, + columns); + + // divide into group + weight = weight.view({group, + weight.size(0) / group, + weight.size(1), + weight.size(2), + weight.size(3)}); + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + + for (int g = 0; g < group; g++) { + output[b][g] = output[b][g] + .flatten(1) + .addmm_(weight[g].flatten(1), columns[g]) + .view_as(output[b][g]); + } + + weight = weight.view({weight.size(0) * weight.size(1), + weight.size(2), + weight.size(3), + weight.size(4)}); + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + } + + output = output.view({output.size(0), + output.size(1) * output.size(2), + output.size(3), + output.size(4)}); + + if (with_bias) { + output += bias.view({1, bias.size(0), 1, 1}); + } +} + +void modulated_deform_conv_cuda_backward( + at::Tensor input, + at::Tensor weight, + at::Tensor bias, + at::Tensor ones, + at::Tensor offset, + at::Tensor mask, + at::Tensor columns, + at::Tensor grad_input, + at::Tensor grad_weight, + at::Tensor grad_bias, + at::Tensor grad_offset, + at::Tensor grad_mask, + at::Tensor grad_output, + int kernel_h, + int kernel_w, + int stride_h, + int stride_w, + int pad_h, + int pad_w, + int dilation_h, + int dilation_w, + int group, + int deformable_group, + const bool with_bias) { + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + + const int batch = input.size(0); + const int channels = input.size(1); + const int height = input.size(2); + const int width = input.size(3); + + const int channels_kernel = weight.size(1); + const int kernel_h_ = weight.size(2); + const int kernel_w_ = weight.size(3); + if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) + AT_ERROR( + "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", + kernel_h_, + kernel_w, + kernel_h_, + kernel_w_); + if (channels != channels_kernel * group) + AT_ERROR( + "Input shape and kernel channels wont match: (%d vs %d).", + channels, + channels_kernel * group); + + const int height_out = + (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_out = + (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + + if (ones.ndimension() != 2 || + ones.size(0) * ones.size(1) < height_out * width_out) { + // Resize plane and fill with ones... + ones = at::ones({height_out, width_out}, input.options()); + } + + grad_input = grad_input.view({batch, channels, height, width}); + columns = at::zeros( + {channels * kernel_h * kernel_w, height_out * width_out}, + input.options()); + + grad_output = grad_output.view({grad_output.size(0), + group, + grad_output.size(1) / group, + grad_output.size(2), + grad_output.size(3)}); + + for (int b = 0; b < batch; b++) { + // divide int group + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + weight = weight.view({group, + weight.size(0) / group, + weight.size(1), + weight.size(2), + weight.size(3)}); + + for (int g = 0; g < group; g++) { + columns[g].addmm_( + weight[g].flatten(1).transpose(0, 1), + grad_output[b][g].flatten(1), + 0.0f, + 1.0f); + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + weight = weight.view({weight.size(0) * weight.size(1), + weight.size(2), + weight.size(3), + weight.size(4)}); + + // gradient w.r.t. input coordinate data + modulated_deformable_col2im_coord_cuda( + columns, + input[b], + offset[b], + mask[b], + 1, + channels, + height, + width, + height_out, + width_out, + kernel_h, + kernel_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + deformable_group, + grad_offset[b], + grad_mask[b]); + // gradient w.r.t. input data + modulated_deformable_col2im_cuda( + columns, + offset[b], + mask[b], + 1, + channels, + height, + width, + height_out, + width_out, + kernel_h, + kernel_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + deformable_group, + grad_input[b]); + + // gradient w.r.t. weight, dWeight should accumulate across the batch and + // group + modulated_deformable_im2col_cuda( + input[b], + offset[b], + mask[b], + 1, + channels, + height, + width, + height_out, + width_out, + kernel_h, + kernel_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + deformable_group, + columns); + + columns = columns.view({group, columns.size(0) / group, columns.size(1)}); + grad_weight = grad_weight.view({group, + grad_weight.size(0) / group, + grad_weight.size(1), + grad_weight.size(2), + grad_weight.size(3)}); + if (with_bias) + grad_bias = grad_bias.view({group, grad_bias.size(0) / group}); + + for (int g = 0; g < group; g++) { + grad_weight[g] = + grad_weight[g] + .flatten(1) + .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1)) + .view_as(grad_weight[g]); + if (with_bias) { + grad_bias[g] = + grad_bias[g] + .view({-1, 1}) + .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1})) + .view(-1); + } + } + + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1), + grad_weight.size(2), + grad_weight.size(3), + grad_weight.size(4)}); + if (with_bias) + grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)}); + } + grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1), + grad_output.size(2), + grad_output.size(3), + grad_output.size(4)}); +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..841f3166c902e7f1c17fe58137d42a58e4f66d69 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu @@ -0,0 +1,1288 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +// modified from +// https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu +// Original license: Apache 2.0 +// clang-format off + +// modify from +// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu + +/*! + ******************* BEGIN Caffe Copyright Notice and Disclaimer ***************** + * + * COPYRIGHT + * + * All contributions by the University of California: + * Copyright (c) 2014-2017 The Regents of the University of California (Regents) + * All rights reserved. + * + * All other contributions: + * Copyright (c) 2014-2017, the respective contributors + * All rights reserved. + * + * Caffe uses a shared copyright model: each contributor holds copyright over + * their contributions to Caffe. The project versioning records all such + * contribution and copyright details. If a contributor wants to further mark + * their specific copyright on a particular contribution, they should indicate + * their copyright solely in the commit message of the change when it is + * committed. + * + * LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * CONTRIBUTION AGREEMENT + * + * By contributing to the BVLC/caffe repository through pull-request, comment, + * or otherwise, the contributor releases their content to the + * license and copyright terms herein. + * + ***************** END Caffe Copyright Notice and Disclaimer ********************* + * + * Copyright (c) 2018 Microsoft + * Licensed under The MIT License [see LICENSE for details] + * \file modulated_deformable_im2col.cuh + * \brief Function definitions of converting an image to + * column matrix based on kernel, padding, dilation, and offset. + * These functions are mainly used in deformable convolution operators. + * \ref: https://arxiv.org/abs/1703.06211 + * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng + */ + +#include +#include +#include +#include +#include +#include + +using namespace at; + +#define CUDA_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ + i += blockDim.x * gridDim.x) + + +namespace { + +const int CUDA_NUM_THREADS = 1024; +const int kMaxGridNum = 65535; + +inline int GET_BLOCKS(const int N) { + return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS); +} + +} + +template +__device__ scalar_t deformable_im2col_bilinear( + const scalar_t* bottom_data, + const int data_width, + const int height, + const int width, + scalar_t h, + scalar_t w) { + int h_low = floor(h); + int w_low = floor(w); + int h_high = h_low + 1; + int w_high = w_low + 1; + + scalar_t lh = h - h_low; + scalar_t lw = w - w_low; + scalar_t hh = 1 - lh, hw = 1 - lw; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + v1 = bottom_data[h_low * data_width + w_low]; + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + v2 = bottom_data[h_low * data_width + w_high]; + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + v3 = bottom_data[h_high * data_width + w_low]; + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + v4 = bottom_data[h_high * data_width + w_high]; + + scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + + scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + return val; +} + +template +__device__ scalar_t get_gradient_weight( + scalar_t argmax_h, + scalar_t argmax_w, + const int h, + const int w, + const int height, + const int width) { + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || + argmax_w >= width) { + // empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + if (h == argmax_h_low && w == argmax_w_low) + weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); + if (h == argmax_h_low && w == argmax_w_high) + weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); + if (h == argmax_h_high && w == argmax_w_low) + weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); + if (h == argmax_h_high && w == argmax_w_high) + weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); + return weight; +} + +template +__device__ scalar_t get_coordinate_weight( + scalar_t argmax_h, + scalar_t argmax_w, + const int height, + const int width, + const scalar_t* im_data, + const int data_width, + const int bp_dir) { + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || + argmax_w >= width) { + // empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + + if (bp_dir == 0) { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_w_low + 1 - argmax_w) * + im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += -1 * (argmax_w - argmax_w_low) * + im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += (argmax_w_low + 1 - argmax_w) * + im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_w - argmax_w_low) * + im_data[argmax_h_high * data_width + argmax_w_high]; + } else if (bp_dir == 1) { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_h_low + 1 - argmax_h) * + im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += (argmax_h_low + 1 - argmax_h) * + im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += -1 * (argmax_h - argmax_h_low) * + im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_h - argmax_h_low) * + im_data[argmax_h_high * data_width + argmax_w_high]; + } + + return weight; +} + +template +__global__ void deformable_im2col_gpu_kernel( + const int n, + const scalar_t* data_im, + const scalar_t* data_offset, + const int height, + const int width, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, + const int num_channels, + const int deformable_group, + const int height_col, + const int width_col, + scalar_t* data_col) { + CUDA_KERNEL_LOOP(index, n) { + // index index of output matrix + const int w_col = index % width_col; + const int h_col = (index / width_col) % height_col; + const int b_col = (index / width_col / height_col) % batch_size; + const int c_im = (index / width_col / height_col) / batch_size; + const int c_col = c_im * kernel_h * kernel_w; + + // compute deformable group index + const int deformable_group_index = c_im / channel_per_deformable_group; + + const int h_in = h_col * stride_h - pad_h; + const int w_in = w_col * stride_w - pad_w; + scalar_t* data_col_ptr = data_col + + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; + // const scalar_t* data_im_ptr = data_im + ((b_col * num_channels + c_im) * + // height + h_in) * width + w_in; + const scalar_t* data_im_ptr = + data_im + (b_col * num_channels + c_im) * height * width; + const scalar_t* data_offset_ptr = data_offset + + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * + kernel_w * height_col * width_col; + + for (int i = 0; i < kernel_h; ++i) { + for (int j = 0; j < kernel_w; ++j) { + const int data_offset_h_ptr = + ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; + const int data_offset_w_ptr = + ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + + w_col; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + scalar_t val = static_cast(0); + const scalar_t h_im = h_in + i * dilation_h + offset_h; + const scalar_t w_im = w_in + j * dilation_w + offset_w; + if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) { + // const scalar_t map_h = i * dilation_h + offset_h; + // const scalar_t map_w = j * dilation_w + offset_w; + // const int cur_height = height - h_in; + // const int cur_width = width - w_in; + // val = deformable_im2col_bilinear(data_im_ptr, width, cur_height, + // cur_width, map_h, map_w); + val = deformable_im2col_bilinear( + data_im_ptr, width, height, width, h_im, w_im); + } + *data_col_ptr = val; + data_col_ptr += batch_size * height_col * width_col; + } + } + } +} + + +template +__global__ void deformable_col2im_gpu_kernel( + const int n, + const scalar_t* data_col, + const scalar_t* data_offset, + const int channels, + const int height, + const int width, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, + const int deformable_group, + const int height_col, + const int width_col, + scalar_t* grad_im) { + CUDA_KERNEL_LOOP(index, n) { + const int j = (index / width_col / height_col / batch_size) % kernel_w; + const int i = + (index / width_col / height_col / batch_size / kernel_w) % kernel_h; + const int c = + index / width_col / height_col / batch_size / kernel_w / kernel_h; + // compute the start and end of the output + + const int deformable_group_index = c / channel_per_deformable_group; + + int w_out = index % width_col; + int h_out = (index / width_col) % height_col; + int b = (index / width_col / height_col) % batch_size; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + + const scalar_t* data_offset_ptr = data_offset + + (b * deformable_group + deformable_group_index) * 2 * kernel_h * + kernel_w * height_col * width_col; + const int data_offset_h_ptr = + ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; + const int data_offset_w_ptr = + ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; + const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; + + const scalar_t cur_top_grad = data_col[index]; + const int cur_h = (int)cur_inv_h_data; + const int cur_w = (int)cur_inv_w_data; + for (int dy = -2; dy <= 2; dy++) { + for (int dx = -2; dx <= 2; dx++) { + if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && + cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && + abs(cur_inv_w_data - (cur_w + dx)) < 1) { + int cur_bottom_grad_pos = + ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; + scalar_t weight = get_gradient_weight( + cur_inv_h_data, + cur_inv_w_data, + cur_h + dy, + cur_w + dx, + height, + width); + atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); + } + } + } + } +} + + +template +__global__ void deformable_col2im_coord_gpu_kernel( + const int n, + const scalar_t* data_col, + const scalar_t* data_im, + const scalar_t* data_offset, + const int channels, + const int height, + const int width, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, + const int offset_channels, + const int deformable_group, + const int height_col, + const int width_col, + scalar_t* grad_offset) { + CUDA_KERNEL_LOOP(index, n) { + scalar_t val = 0; + int w = index % width_col; + int h = (index / width_col) % height_col; + int c = (index / width_col / height_col) % offset_channels; + int b = (index / width_col / height_col) / offset_channels; + // compute the start and end of the output + + const int deformable_group_index = c / (2 * kernel_h * kernel_w); + const int col_step = kernel_h * kernel_w; + int cnt = 0; + const scalar_t* data_col_ptr = data_col + + deformable_group_index * channel_per_deformable_group * batch_size * + width_col * height_col; + const scalar_t* data_im_ptr = data_im + + (b * deformable_group + deformable_group_index) * + channel_per_deformable_group / kernel_h / kernel_w * height * width; + const scalar_t* data_offset_ptr = data_offset + + (b * deformable_group + deformable_group_index) * 2 * kernel_h * + kernel_w * height_col * width_col; + + const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; + + for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; + col_c += col_step) { + const int col_pos = + (((col_c * batch_size + b) * height_col) + h) * width_col + w; + const int bp_dir = offset_c % 2; + + int j = (col_pos / width_col / height_col / batch_size) % kernel_w; + int i = + (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; + int w_out = col_pos % width_col; + int h_out = (col_pos / width_col) % height_col; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + const int data_offset_h_ptr = + (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); + const int data_offset_w_ptr = + (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + + w_out); + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + scalar_t inv_h = h_in + i * dilation_h + offset_h; + scalar_t inv_w = w_in + j * dilation_w + offset_w; + if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) { + inv_h = inv_w = -2; + } + const scalar_t weight = get_coordinate_weight( + inv_h, + inv_w, + height, + width, + data_im_ptr + cnt * height * width, + width, + bp_dir); + val += weight * data_col_ptr[col_pos]; + cnt += 1; + } + + grad_offset[index] = val; + } +} + + +namespace detectron2 { + +void deformable_im2col( + const at::Tensor data_im, + const at::Tensor data_offset, + const int channels, + const int height, + const int width, + const int ksize_h, + const int ksize_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int parallel_imgs, + const int deformable_group, + at::Tensor data_col) { + // num_axes should be smaller than block size + // todo: check parallel_imgs is correctly passed in + int height_col = + (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = + (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = channels * height_col * width_col * parallel_imgs; + int channel_per_deformable_group = channels / deformable_group; + + at::cuda::CUDAGuard device_guard(data_im.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_im.scalar_type(), "deformable_im2col_gpu", ([&] { + const scalar_t* data_im_ = data_im.data_ptr(); + const scalar_t* data_offset_ = data_offset.data_ptr(); + scalar_t* data_col_ = data_col.data_ptr(); + + deformable_im2col_gpu_kernel<<< + GET_BLOCKS(num_kernels), + CUDA_NUM_THREADS, + 0, + stream>>>( + num_kernels, + data_im_, + data_offset_, + height, + width, + ksize_h, + ksize_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + channel_per_deformable_group, + parallel_imgs, + channels, + deformable_group, + height_col, + width_col, + data_col_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + printf("error in deformable_im2col: %s\n", cudaGetErrorString(err)); + } +} + + +void deformable_col2im( + const at::Tensor data_col, + const at::Tensor data_offset, + const int channels, + const int height, + const int width, + const int ksize_h, + const int ksize_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int parallel_imgs, + const int deformable_group, + at::Tensor grad_im) { + // todo: make sure parallel_imgs is passed in correctly + int height_col = + (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = + (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = + channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs; + int channel_per_deformable_group = channels / deformable_group; + + at::cuda::CUDAGuard device_guard(data_col.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "deformable_col2im_gpu", ([&] { + const scalar_t* data_col_ = data_col.data_ptr(); + const scalar_t* data_offset_ = data_offset.data_ptr(); + scalar_t* grad_im_ = grad_im.data_ptr(); + + deformable_col2im_gpu_kernel<<< + GET_BLOCKS(num_kernels), + CUDA_NUM_THREADS, + 0, + stream>>>( + num_kernels, + data_col_, + data_offset_, + channels, + height, + width, + ksize_h, + ksize_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + channel_per_deformable_group, + parallel_imgs, + deformable_group, + height_col, + width_col, + grad_im_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + printf("error in deformable_col2im: %s\n", cudaGetErrorString(err)); + } +} + + +void deformable_col2im_coord( + const at::Tensor data_col, + const at::Tensor data_im, + const at::Tensor data_offset, + const int channels, + const int height, + const int width, + const int ksize_h, + const int ksize_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int parallel_imgs, + const int deformable_group, + at::Tensor grad_offset) { + int height_col = + (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; + int width_col = + (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; + int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * + deformable_group * parallel_imgs; + int channel_per_deformable_group = + channels * ksize_h * ksize_w / deformable_group; + + at::cuda::CUDAGuard device_guard(data_col.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] { + const scalar_t* data_col_ = data_col.data_ptr(); + const scalar_t* data_im_ = data_im.data_ptr(); + const scalar_t* data_offset_ = data_offset.data_ptr(); + scalar_t* grad_offset_ = grad_offset.data_ptr(); + + deformable_col2im_coord_gpu_kernel<<< + GET_BLOCKS(num_kernels), + CUDA_NUM_THREADS, + 0, + stream>>>( + num_kernels, + data_col_, + data_im_, + data_offset_, + channels, + height, + width, + ksize_h, + ksize_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + channel_per_deformable_group, + parallel_imgs, + 2 * ksize_h * ksize_w * deformable_group, + deformable_group, + height_col, + width_col, + grad_offset_); + })); +} + +} // namespace detectron2 + + +template +__device__ scalar_t dmcn_im2col_bilinear( + const scalar_t* bottom_data, + const int data_width, + const int height, + const int width, + scalar_t h, + scalar_t w) { + int h_low = floor(h); + int w_low = floor(w); + int h_high = h_low + 1; + int w_high = w_low + 1; + + scalar_t lh = h - h_low; + scalar_t lw = w - w_low; + scalar_t hh = 1 - lh, hw = 1 - lw; + + scalar_t v1 = 0; + if (h_low >= 0 && w_low >= 0) + v1 = bottom_data[h_low * data_width + w_low]; + scalar_t v2 = 0; + if (h_low >= 0 && w_high <= width - 1) + v2 = bottom_data[h_low * data_width + w_high]; + scalar_t v3 = 0; + if (h_high <= height - 1 && w_low >= 0) + v3 = bottom_data[h_high * data_width + w_low]; + scalar_t v4 = 0; + if (h_high <= height - 1 && w_high <= width - 1) + v4 = bottom_data[h_high * data_width + w_high]; + + scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; + + scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + return val; +} + +template +__device__ scalar_t dmcn_get_gradient_weight( + scalar_t argmax_h, + scalar_t argmax_w, + const int h, + const int w, + const int height, + const int width) { + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || + argmax_w >= width) { + // empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + if (h == argmax_h_low && w == argmax_w_low) + weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); + if (h == argmax_h_low && w == argmax_w_high) + weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); + if (h == argmax_h_high && w == argmax_w_low) + weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); + if (h == argmax_h_high && w == argmax_w_high) + weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); + return weight; +} + +template +__device__ scalar_t dmcn_get_coordinate_weight( + scalar_t argmax_h, + scalar_t argmax_w, + const int height, + const int width, + const scalar_t* im_data, + const int data_width, + const int bp_dir) { + if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || + argmax_w >= width) { + // empty + return 0; + } + + int argmax_h_low = floor(argmax_h); + int argmax_w_low = floor(argmax_w); + int argmax_h_high = argmax_h_low + 1; + int argmax_w_high = argmax_w_low + 1; + + scalar_t weight = 0; + + if (bp_dir == 0) { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_w_low + 1 - argmax_w) * + im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += -1 * (argmax_w - argmax_w_low) * + im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += (argmax_w_low + 1 - argmax_w) * + im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_w - argmax_w_low) * + im_data[argmax_h_high * data_width + argmax_w_high]; + } else if (bp_dir == 1) { + if (argmax_h_low >= 0 && argmax_w_low >= 0) + weight += -1 * (argmax_h_low + 1 - argmax_h) * + im_data[argmax_h_low * data_width + argmax_w_low]; + if (argmax_h_low >= 0 && argmax_w_high <= width - 1) + weight += (argmax_h_low + 1 - argmax_h) * + im_data[argmax_h_low * data_width + argmax_w_high]; + if (argmax_h_high <= height - 1 && argmax_w_low >= 0) + weight += -1 * (argmax_h - argmax_h_low) * + im_data[argmax_h_high * data_width + argmax_w_low]; + if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) + weight += (argmax_h - argmax_h_low) * + im_data[argmax_h_high * data_width + argmax_w_high]; + } + + return weight; +} + +template +__global__ void modulated_deformable_im2col_gpu_kernel( + const int n, + const scalar_t* data_im, + const scalar_t* data_offset, + const scalar_t* data_mask, + const int height, + const int width, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, + const int num_channels, + const int deformable_group, + const int height_col, + const int width_col, + scalar_t* data_col) { + CUDA_KERNEL_LOOP(index, n) { + // index index of output matrix + const int w_col = index % width_col; + const int h_col = (index / width_col) % height_col; + const int b_col = (index / width_col / height_col) % batch_size; + const int c_im = (index / width_col / height_col) / batch_size; + const int c_col = c_im * kernel_h * kernel_w; + + // compute deformable group index + const int deformable_group_index = c_im / channel_per_deformable_group; + + const int h_in = h_col * stride_h - pad_h; + const int w_in = w_col * stride_w - pad_w; + + scalar_t* data_col_ptr = data_col + + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; + // const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * + // height + h_in) * width + w_in; + const scalar_t* data_im_ptr = + data_im + (b_col * num_channels + c_im) * height * width; + const scalar_t* data_offset_ptr = data_offset + + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * + kernel_w * height_col * width_col; + + const scalar_t* data_mask_ptr = data_mask + + (b_col * deformable_group + deformable_group_index) * kernel_h * + kernel_w * height_col * width_col; + + for (int i = 0; i < kernel_h; ++i) { + for (int j = 0; j < kernel_w; ++j) { + const int data_offset_h_ptr = + ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; + const int data_offset_w_ptr = + ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + + w_col; + const int data_mask_hw_ptr = + ((i * kernel_w + j) * height_col + h_col) * width_col + w_col; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; + scalar_t val = static_cast(0); + const scalar_t h_im = h_in + i * dilation_h + offset_h; + const scalar_t w_im = w_in + j * dilation_w + offset_w; + // if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) { + if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) { + // const float map_h = i * dilation_h + offset_h; + // const float map_w = j * dilation_w + offset_w; + // const int cur_height = height - h_in; + // const int cur_width = width - w_in; + // val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, + // cur_width, map_h, map_w); + val = dmcn_im2col_bilinear( + data_im_ptr, width, height, width, h_im, w_im); + } + *data_col_ptr = val * mask; + data_col_ptr += batch_size * height_col * width_col; + // data_col_ptr += height_col * width_col; + } + } + } +} + +template +__global__ void modulated_deformable_col2im_gpu_kernel( + const int n, + const scalar_t* data_col, + const scalar_t* data_offset, + const scalar_t* data_mask, + const int channels, + const int height, + const int width, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, + const int deformable_group, + const int height_col, + const int width_col, + scalar_t* grad_im) { + CUDA_KERNEL_LOOP(index, n) { + const int j = (index / width_col / height_col / batch_size) % kernel_w; + const int i = + (index / width_col / height_col / batch_size / kernel_w) % kernel_h; + const int c = + index / width_col / height_col / batch_size / kernel_w / kernel_h; + // compute the start and end of the output + + const int deformable_group_index = c / channel_per_deformable_group; + + int w_out = index % width_col; + int h_out = (index / width_col) % height_col; + int b = (index / width_col / height_col) % batch_size; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + + const scalar_t* data_offset_ptr = data_offset + + (b * deformable_group + deformable_group_index) * 2 * kernel_h * + kernel_w * height_col * width_col; + const scalar_t* data_mask_ptr = data_mask + + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * + height_col * width_col; + const int data_offset_h_ptr = + ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; + const int data_offset_w_ptr = + ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; + const int data_mask_hw_ptr = + ((i * kernel_w + j) * height_col + h_out) * width_col + w_out; + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; + const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; + const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; + + const scalar_t cur_top_grad = data_col[index] * mask; + const int cur_h = (int)cur_inv_h_data; + const int cur_w = (int)cur_inv_w_data; + for (int dy = -2; dy <= 2; dy++) { + for (int dx = -2; dx <= 2; dx++) { + if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && + cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && + abs(cur_inv_w_data - (cur_w + dx)) < 1) { + int cur_bottom_grad_pos = + ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; + scalar_t weight = dmcn_get_gradient_weight( + cur_inv_h_data, + cur_inv_w_data, + cur_h + dy, + cur_w + dx, + height, + width); + atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); + } + } + } + } +} + +template +__global__ void modulated_deformable_col2im_coord_gpu_kernel( + const int n, + const scalar_t* data_col, + const scalar_t* data_im, + const scalar_t* data_offset, + const scalar_t* data_mask, + const int channels, + const int height, + const int width, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int channel_per_deformable_group, + const int batch_size, + const int offset_channels, + const int deformable_group, + const int height_col, + const int width_col, + scalar_t* grad_offset, + scalar_t* grad_mask) { + CUDA_KERNEL_LOOP(index, n) { + scalar_t val = 0, mval = 0; + int w = index % width_col; + int h = (index / width_col) % height_col; + int c = (index / width_col / height_col) % offset_channels; + int b = (index / width_col / height_col) / offset_channels; + // compute the start and end of the output + + const int deformable_group_index = c / (2 * kernel_h * kernel_w); + const int col_step = kernel_h * kernel_w; + int cnt = 0; + const scalar_t* data_col_ptr = data_col + + deformable_group_index * channel_per_deformable_group * batch_size * + width_col * height_col; + const scalar_t* data_im_ptr = data_im + + (b * deformable_group + deformable_group_index) * + channel_per_deformable_group / kernel_h / kernel_w * height * width; + const scalar_t* data_offset_ptr = data_offset + + (b * deformable_group + deformable_group_index) * 2 * kernel_h * + kernel_w * height_col * width_col; + const scalar_t* data_mask_ptr = data_mask + + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * + height_col * width_col; + + const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; + + for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; + col_c += col_step) { + const int col_pos = + (((col_c * batch_size + b) * height_col) + h) * width_col + w; + const int bp_dir = offset_c % 2; + + int j = (col_pos / width_col / height_col / batch_size) % kernel_w; + int i = + (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; + int w_out = col_pos % width_col; + int h_out = (col_pos / width_col) % height_col; + int w_in = w_out * stride_w - pad_w; + int h_in = h_out * stride_h - pad_h; + const int data_offset_h_ptr = + (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); + const int data_offset_w_ptr = + (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + + w_out); + const int data_mask_hw_ptr = + (((i * kernel_w + j) * height_col + h_out) * width_col + w_out); + const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; + const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; + const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; + scalar_t inv_h = h_in + i * dilation_h + offset_h; + scalar_t inv_w = w_in + j * dilation_w + offset_w; + if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) { + inv_h = inv_w = -2; + } else { + mval += data_col_ptr[col_pos] * + dmcn_im2col_bilinear( + data_im_ptr + cnt * height * width, + width, + height, + width, + inv_h, + inv_w); + } + const scalar_t weight = dmcn_get_coordinate_weight( + inv_h, + inv_w, + height, + width, + data_im_ptr + cnt * height * width, + width, + bp_dir); + val += weight * data_col_ptr[col_pos] * mask; + cnt += 1; + } + // KERNEL_ASSIGN(grad_offset[index], offset_req, val); + grad_offset[index] = val; + if (offset_c % 2 == 0) + // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + + // deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * + // height_col + h) * width_col + w], mask_req, mval); + grad_mask + [(((b * deformable_group + deformable_group_index) * kernel_h * + kernel_w + + offset_c / 2) * + height_col + + h) * + width_col + + w] = mval; + } +} + + +namespace detectron2 { + +void modulated_deformable_im2col_cuda( + const at::Tensor data_im, + const at::Tensor data_offset, + const at::Tensor data_mask, + const int batch_size, + const int channels, + const int height_im, + const int width_im, + const int height_col, + const int width_col, + const int kernel_h, + const int kenerl_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int deformable_group, + at::Tensor data_col) { + // num_axes should be smaller than block size + const int channel_per_deformable_group = channels / deformable_group; + const int num_kernels = channels * batch_size * height_col * width_col; + + at::cuda::CUDAGuard device_guard(data_im.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] { + const scalar_t* data_im_ = data_im.data_ptr(); + const scalar_t* data_offset_ = data_offset.data_ptr(); + const scalar_t* data_mask_ = data_mask.data_ptr(); + scalar_t* data_col_ = data_col.data_ptr(); + + modulated_deformable_im2col_gpu_kernel<<< + GET_BLOCKS(num_kernels), + CUDA_NUM_THREADS, + 0, + stream>>>( + num_kernels, + data_im_, + data_offset_, + data_mask_, + height_im, + width_im, + kernel_h, + kenerl_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + channel_per_deformable_group, + batch_size, + channels, + deformable_group, + height_col, + width_col, + data_col_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + printf( + "error in modulated_deformable_im2col_cuda: %s\n", + cudaGetErrorString(err)); + } +} + +void modulated_deformable_col2im_cuda( + const at::Tensor data_col, + const at::Tensor data_offset, + const at::Tensor data_mask, + const int batch_size, + const int channels, + const int height_im, + const int width_im, + const int height_col, + const int width_col, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int deformable_group, + at::Tensor grad_im) { + const int channel_per_deformable_group = channels / deformable_group; + const int num_kernels = + channels * kernel_h * kernel_w * batch_size * height_col * width_col; + + at::cuda::CUDAGuard device_guard(data_col.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] { + const scalar_t* data_col_ = data_col.data_ptr(); + const scalar_t* data_offset_ = data_offset.data_ptr(); + const scalar_t* data_mask_ = data_mask.data_ptr(); + scalar_t* grad_im_ = grad_im.data_ptr(); + + modulated_deformable_col2im_gpu_kernel<<< + GET_BLOCKS(num_kernels), + CUDA_NUM_THREADS, + 0, + stream>>>( + num_kernels, + data_col_, + data_offset_, + data_mask_, + channels, + height_im, + width_im, + kernel_h, + kernel_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + channel_per_deformable_group, + batch_size, + deformable_group, + height_col, + width_col, + grad_im_); + })); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + printf( + "error in modulated_deformable_col2im_cuda: %s\n", + cudaGetErrorString(err)); + } +} + +void modulated_deformable_col2im_coord_cuda( + const at::Tensor data_col, + const at::Tensor data_im, + const at::Tensor data_offset, + const at::Tensor data_mask, + const int batch_size, + const int channels, + const int height_im, + const int width_im, + const int height_col, + const int width_col, + const int kernel_h, + const int kernel_w, + const int pad_h, + const int pad_w, + const int stride_h, + const int stride_w, + const int dilation_h, + const int dilation_w, + const int deformable_group, + at::Tensor grad_offset, + at::Tensor grad_mask) { + const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * + kernel_w * deformable_group; + const int channel_per_deformable_group = + channels * kernel_h * kernel_w / deformable_group; + + at::cuda::CUDAGuard device_guard(data_col.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] { + const scalar_t* data_col_ = data_col.data_ptr(); + const scalar_t* data_im_ = data_im.data_ptr(); + const scalar_t* data_offset_ = data_offset.data_ptr(); + const scalar_t* data_mask_ = data_mask.data_ptr(); + scalar_t* grad_offset_ = grad_offset.data_ptr(); + scalar_t* grad_mask_ = grad_mask.data_ptr(); + + modulated_deformable_col2im_coord_gpu_kernel<<< + GET_BLOCKS(num_kernels), + CUDA_NUM_THREADS, + 0, + stream>>>( + num_kernels, + data_col_, + data_im_, + data_offset_, + data_mask_, + channels, + height_im, + width_im, + kernel_h, + kernel_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + channel_per_deformable_group, + batch_size, + 2 * kernel_h * kernel_w * deformable_group, + deformable_group, + height_col, + width_col, + grad_offset_, + grad_mask_); + })); + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + printf( + "error in modulated_deformable_col2im_coord_cuda: %s\n", + cudaGetErrorString(err)); + } +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated.h b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated.h new file mode 100644 index 0000000000000000000000000000000000000000..0f37eabe4b6638827fc31872fd9ea41b6b25ff0a --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated.h @@ -0,0 +1,39 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#pragma once +#include + +namespace detectron2 { + +at::Tensor nms_rotated_cpu( + const at::Tensor& dets, + const at::Tensor& scores, + const float iou_threshold); + +#if defined(WITH_CUDA) || defined(WITH_HIP) +at::Tensor nms_rotated_cuda( + const at::Tensor& dets, + const at::Tensor& scores, + const float iou_threshold); +#endif + +// Interface for Python +// inline is needed to prevent multiple function definitions when this header is +// included by different cpps +inline at::Tensor nms_rotated( + const at::Tensor& dets, + const at::Tensor& scores, + const float iou_threshold) { + assert(dets.device().is_cuda() == scores.device().is_cuda()); + if (dets.device().is_cuda()) { +#if defined(WITH_CUDA) || defined(WITH_HIP) + return nms_rotated_cuda( + dets.contiguous(), scores.contiguous(), iou_threshold); +#else + AT_ERROR("Not compiled with GPU support"); +#endif + } + + return nms_rotated_cpu(dets.contiguous(), scores.contiguous(), iou_threshold); +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0658e388df005748c358dcbf3a1ad2a59da6cac8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp @@ -0,0 +1,75 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#include "../box_iou_rotated/box_iou_rotated_utils.h" +#include "nms_rotated.h" + +namespace detectron2 { + +template +at::Tensor nms_rotated_cpu_kernel( + const at::Tensor& dets, + const at::Tensor& scores, + const float iou_threshold) { + // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel, + // however, the code in this function is much shorter because + // we delegate the IoU computation for rotated boxes to + // the single_box_iou_rotated function in box_iou_rotated_utils.h + AT_ASSERTM(dets.device().is_cpu(), "dets must be a CPU tensor"); + AT_ASSERTM(scores.device().is_cpu(), "scores must be a CPU tensor"); + AT_ASSERTM( + dets.scalar_type() == scores.scalar_type(), + "dets should have the same type as scores"); + + if (dets.numel() == 0) { + return at::empty({0}, dets.options().dtype(at::kLong)); + } + + auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); + + auto ndets = dets.size(0); + at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte)); + at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong)); + + auto suppressed = suppressed_t.data_ptr(); + auto keep = keep_t.data_ptr(); + auto order = order_t.data_ptr(); + + int64_t num_to_keep = 0; + + for (int64_t _i = 0; _i < ndets; _i++) { + auto i = order[_i]; + if (suppressed[i] == 1) { + continue; + } + + keep[num_to_keep++] = i; + + for (int64_t _j = _i + 1; _j < ndets; _j++) { + auto j = order[_j]; + if (suppressed[j] == 1) { + continue; + } + + auto ovr = single_box_iou_rotated( + dets[i].data_ptr(), dets[j].data_ptr()); + if (ovr >= iou_threshold) { + suppressed[j] = 1; + } + } + } + return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); +} + +at::Tensor nms_rotated_cpu( + // input must be contiguous + const at::Tensor& dets, + const at::Tensor& scores, + const float iou_threshold) { + auto result = at::empty({0}, dets.options()); + + AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] { + result = nms_rotated_cpu_kernel(dets, scores, iou_threshold); + }); + return result; +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..8117026d37c182831738f2bf01f92ea48f368779 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu @@ -0,0 +1,145 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#include +#include +#include +#include +#ifdef WITH_CUDA +#include "../box_iou_rotated/box_iou_rotated_utils.h" +#endif +// TODO avoid this when pytorch supports "same directory" hipification +#ifdef WITH_HIP +#include "box_iou_rotated/box_iou_rotated_utils.h" +#endif + +using namespace detectron2; + +namespace { +int const threadsPerBlock = sizeof(unsigned long long) * 8; +} + +template +__global__ void nms_rotated_cuda_kernel( + const int n_boxes, + const float iou_threshold, + const T* dev_boxes, + unsigned long long* dev_mask) { + // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel + + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = + min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); + const int col_size = + min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); + + // Compared to nms_cuda_kernel, where each box is represented with 4 values + // (x1, y1, x2, y2), each rotated box is represented with 5 values + // (x_center, y_center, width, height, angle_degrees) here. + __shared__ T block_boxes[threadsPerBlock * 5]; + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x * 5 + 0] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; + block_boxes[threadIdx.x * 5 + 1] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; + block_boxes[threadIdx.x * 5 + 2] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; + block_boxes[threadIdx.x * 5 + 3] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; + block_boxes[threadIdx.x * 5 + 4] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; + } + __syncthreads(); + + if (threadIdx.x < row_size) { + const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; + const T* cur_box = dev_boxes + cur_box_idx * 5; + int i = 0; + unsigned long long t = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + // Instead of devIoU used by original horizontal nms, here + // we use the single_box_iou_rotated function from box_iou_rotated_utils.h + if (single_box_iou_rotated(cur_box, block_boxes + i * 5) > + iou_threshold) { + t |= 1ULL << i; + } + } + const int col_blocks = at::cuda::ATenCeilDiv(n_boxes, threadsPerBlock); + dev_mask[cur_box_idx * col_blocks + col_start] = t; + } +} + +namespace detectron2 { + +at::Tensor nms_rotated_cuda( + // input must be contiguous + const at::Tensor& dets, + const at::Tensor& scores, + float iou_threshold) { + // using scalar_t = float; + AT_ASSERTM(dets.is_cuda(), "dets must be a CUDA tensor"); + AT_ASSERTM(scores.is_cuda(), "scores must be a CUDA tensor"); + at::cuda::CUDAGuard device_guard(dets.device()); + + auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); + auto dets_sorted = dets.index_select(0, order_t); + + auto dets_num = dets.size(0); + + const int col_blocks = + at::cuda::ATenCeilDiv(static_cast(dets_num), threadsPerBlock); + + at::Tensor mask = + at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong)); + + dim3 blocks(col_blocks, col_blocks); + dim3 threads(threadsPerBlock); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + AT_DISPATCH_FLOATING_TYPES( + dets_sorted.scalar_type(), "nms_rotated_kernel_cuda", [&] { + nms_rotated_cuda_kernel<<>>( + dets_num, + iou_threshold, + dets_sorted.data_ptr(), + (unsigned long long*)mask.data_ptr()); + }); + + at::Tensor mask_cpu = mask.to(at::kCPU); + unsigned long long* mask_host = + (unsigned long long*)mask_cpu.data_ptr(); + + std::vector remv(col_blocks); + memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); + + at::Tensor keep = + at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU)); + int64_t* keep_out = keep.data_ptr(); + + int num_to_keep = 0; + for (int i = 0; i < dets_num; i++) { + int nblock = i / threadsPerBlock; + int inblock = i % threadsPerBlock; + + if (!(remv[nblock] & (1ULL << inblock))) { + keep_out[num_to_keep++] = i; + unsigned long long* p = mask_host + i * col_blocks; + for (int j = nblock; j < col_blocks; j++) { + remv[j] |= p[j]; + } + } + } + + AT_CUDA_CHECK(cudaGetLastError()); + return order_t.index( + {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep) + .to(order_t.device(), keep.scalar_type())}); +} + +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/vision.cpp b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/vision.cpp new file mode 100644 index 0000000000000000000000000000000000000000..200f2cde86c7405fbf286b458d4b2db3793fe479 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/csrc/vision.cpp @@ -0,0 +1,118 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +#include +#include "ROIAlign/ROIAlign.h" +#include "ROIAlignRotated/ROIAlignRotated.h" +#include "box_iou_rotated/box_iou_rotated.h" +#include "cocoeval/cocoeval.h" +#include "deformable/deform_conv.h" +#include "nms_rotated/nms_rotated.h" + +namespace detectron2 { + +#if defined(WITH_CUDA) || defined(WITH_HIP) +extern int get_cudart_version(); +#endif + +std::string get_cuda_version() { +#if defined(WITH_CUDA) || defined(WITH_HIP) + std::ostringstream oss; + +#if defined(WITH_CUDA) + oss << "CUDA "; +#else + oss << "HIP "; +#endif + + // copied from + // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 + auto printCudaStyleVersion = [&](int v) { + oss << (v / 1000) << "." << (v / 10 % 100); + if (v % 10 != 0) { + oss << "." << (v % 10); + } + }; + printCudaStyleVersion(get_cudart_version()); + return oss.str(); +#else // neither CUDA nor HIP + return std::string("not available"); +#endif +} + +// similar to +// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp +std::string get_compiler_version() { + std::ostringstream ss; +#if defined(__GNUC__) +#ifndef __clang__ + +#if ((__GNUC__ <= 4) && (__GNUC_MINOR__ <= 8)) +#error "GCC >= 4.9 is required!" +#endif + + { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } +#endif +#endif + +#if defined(__clang_major__) + { + ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." + << __clang_patchlevel__; + } +#endif + +#if defined(_MSC_VER) + { ss << "MSVC " << _MSC_FULL_VER; } +#endif + return ss.str(); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); + m.def("get_cuda_version", &get_cuda_version, "get_cuda_version"); + + m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes"); + + m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); + m.def( + "deform_conv_backward_input", + &deform_conv_backward_input, + "deform_conv_backward_input"); + m.def( + "deform_conv_backward_filter", + &deform_conv_backward_filter, + "deform_conv_backward_filter"); + m.def( + "modulated_deform_conv_forward", + &modulated_deform_conv_forward, + "modulated_deform_conv_forward"); + m.def( + "modulated_deform_conv_backward", + &modulated_deform_conv_backward, + "modulated_deform_conv_backward"); + + m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes"); + + m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); + m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); + + m.def( + "roi_align_rotated_forward", + &ROIAlignRotated_forward, + "Forward pass for Rotated ROI-Align Operator"); + m.def( + "roi_align_rotated_backward", + &ROIAlignRotated_backward, + "Backward pass for Rotated ROI-Align Operator"); + + m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate"); + m.def( + "COCOevalEvaluateImages", + &COCOeval::EvaluateImages, + "COCOeval::EvaluateImages"); + pybind11::class_(m, "InstanceAnnotation") + .def(pybind11::init()); + pybind11::class_(m, "ImageEvaluation") + .def(pybind11::init<>()); +} +} // namespace detectron2 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/deform_conv.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/deform_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..bc1fe467bf794a573e6d3165f874727f15ac2649 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/deform_conv.py @@ -0,0 +1,507 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from functools import lru_cache +import torch +from torch import nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from detectron2 import _C + +from .wrappers import _NewEmptyTensorOp + + +class _DeformConv(Function): + @staticmethod + def forward( + ctx, + input, + offset, + weight, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + im2col_step=64, + ): + if input is not None and input.dim() != 4: + raise ValueError( + "Expected 4D tensor as input, got {}D tensor instead.".format(input.dim()) + ) + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.groups = groups + ctx.deformable_groups = deformable_groups + ctx.im2col_step = im2col_step + + ctx.save_for_backward(input, offset, weight) + + output = input.new_empty( + _DeformConv._output_size(input, weight, ctx.padding, ctx.dilation, ctx.stride) + ) + + ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones + + if not input.is_cuda: + raise NotImplementedError("Deformable Conv is not supported on CPUs!") + else: + cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step) + assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize" + + _C.deform_conv_forward( + input, + weight, + offset, + output, + ctx.bufs_[0], + ctx.bufs_[1], + weight.size(3), + weight.size(2), + ctx.stride[1], + ctx.stride[0], + ctx.padding[1], + ctx.padding[0], + ctx.dilation[1], + ctx.dilation[0], + ctx.groups, + ctx.deformable_groups, + cur_im2col_step, + ) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, weight = ctx.saved_tensors + + grad_input = grad_offset = grad_weight = None + + if not grad_output.is_cuda: + raise NotImplementedError("Deformable Conv is not supported on CPUs!") + else: + cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step) + assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize" + + if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + _C.deform_conv_backward_input( + input, + offset, + grad_output, + grad_input, + grad_offset, + weight, + ctx.bufs_[0], + weight.size(3), + weight.size(2), + ctx.stride[1], + ctx.stride[0], + ctx.padding[1], + ctx.padding[0], + ctx.dilation[1], + ctx.dilation[0], + ctx.groups, + ctx.deformable_groups, + cur_im2col_step, + ) + + if ctx.needs_input_grad[2]: + grad_weight = torch.zeros_like(weight) + _C.deform_conv_backward_filter( + input, + offset, + grad_output, + grad_weight, + ctx.bufs_[0], + ctx.bufs_[1], + weight.size(3), + weight.size(2), + ctx.stride[1], + ctx.stride[0], + ctx.padding[1], + ctx.padding[0], + ctx.dilation[1], + ctx.dilation[0], + ctx.groups, + ctx.deformable_groups, + 1, + cur_im2col_step, + ) + + return grad_input, grad_offset, grad_weight, None, None, None, None, None, None + + @staticmethod + def _output_size(input, weight, padding, dilation, stride): + channels = weight.size(0) + output_size = (input.size(0), channels) + for d in range(input.dim() - 2): + in_size = input.size(d + 2) + pad = padding[d] + kernel = dilation[d] * (weight.size(d + 2) - 1) + 1 + stride_ = stride[d] + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,) + if not all(map(lambda s: s > 0, output_size)): + raise ValueError( + "convolution input is too small (output would be {})".format( + "x".join(map(str, output_size)) + ) + ) + return output_size + + @staticmethod + @lru_cache(maxsize=128) + def _cal_im2col_step(input_size, default_size): + """ + Calculate proper im2col step size, which should be divisible by input_size and not larger + than prefer_size. Meanwhile the step size should be as large as possible to be more + efficient. So we choose the largest one among all divisors of input_size which are smaller + than prefer_size. + :param input_size: input batch size . + :param default_size: default preferred im2col step size. + :return: the largest proper step size. + """ + if input_size <= default_size: + return input_size + best_step = 1 + for step in range(2, min(int(math.sqrt(input_size)) + 1, default_size)): + if input_size % step == 0: + if input_size // step <= default_size: + return input_size // step + best_step = step + + return best_step + + +class _ModulatedDeformConv(Function): + @staticmethod + def forward( + ctx, + input, + offset, + mask, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + ): + ctx.stride = stride + ctx.padding = padding + ctx.dilation = dilation + ctx.groups = groups + ctx.deformable_groups = deformable_groups + ctx.with_bias = bias is not None + if not ctx.with_bias: + bias = input.new_empty(1) # fake tensor + if not input.is_cuda: + raise NotImplementedError("Deformable Conv is not supported on CPUs!") + if ( + weight.requires_grad + or mask.requires_grad + or offset.requires_grad + or input.requires_grad + ): + ctx.save_for_backward(input, offset, mask, weight, bias) + output = input.new_empty(_ModulatedDeformConv._infer_shape(ctx, input, weight)) + ctx._bufs = [input.new_empty(0), input.new_empty(0)] + _C.modulated_deform_conv_forward( + input, + weight, + bias, + ctx._bufs[0], + offset, + mask, + output, + ctx._bufs[1], + weight.shape[2], + weight.shape[3], + ctx.stride, + ctx.stride, + ctx.padding, + ctx.padding, + ctx.dilation, + ctx.dilation, + ctx.groups, + ctx.deformable_groups, + ctx.with_bias, + ) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + if not grad_output.is_cuda: + raise NotImplementedError("Deformable Conv is not supported on CPUs!") + input, offset, mask, weight, bias = ctx.saved_tensors + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + grad_mask = torch.zeros_like(mask) + grad_weight = torch.zeros_like(weight) + grad_bias = torch.zeros_like(bias) + _C.modulated_deform_conv_backward( + input, + weight, + bias, + ctx._bufs[0], + offset, + mask, + ctx._bufs[1], + grad_input, + grad_weight, + grad_bias, + grad_offset, + grad_mask, + grad_output, + weight.shape[2], + weight.shape[3], + ctx.stride, + ctx.stride, + ctx.padding, + ctx.padding, + ctx.dilation, + ctx.dilation, + ctx.groups, + ctx.deformable_groups, + ctx.with_bias, + ) + if not ctx.with_bias: + grad_bias = None + + return ( + grad_input, + grad_offset, + grad_mask, + grad_weight, + grad_bias, + None, + None, + None, + None, + None, + ) + + @staticmethod + def _infer_shape(ctx, input, weight): + n = input.size(0) + channels_out = weight.size(0) + height, width = input.shape[2:4] + kernel_h, kernel_w = weight.shape[2:4] + height_out = ( + height + 2 * ctx.padding - (ctx.dilation * (kernel_h - 1) + 1) + ) // ctx.stride + 1 + width_out = ( + width + 2 * ctx.padding - (ctx.dilation * (kernel_w - 1) + 1) + ) // ctx.stride + 1 + return n, channels_out, height_out, width_out + + +deform_conv = _DeformConv.apply +modulated_deform_conv = _ModulatedDeformConv.apply + + +class DeformConv(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + bias=False, + norm=None, + activation=None, + ): + """ + Deformable convolution from :paper:`deformconv`. + + Arguments are similar to :class:`Conv2D`. Extra arguments: + + Args: + deformable_groups (int): number of groups used in deformable convolution. + norm (nn.Module, optional): a normalization layer + activation (callable(Tensor) -> Tensor): a callable activation function + """ + super(DeformConv, self).__init__() + + assert not bias + assert in_channels % groups == 0, "in_channels {} cannot be divisible by groups {}".format( + in_channels, groups + ) + assert ( + out_channels % groups == 0 + ), "out_channels {} cannot be divisible by groups {}".format(out_channels, groups) + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.deformable_groups = deformable_groups + self.norm = norm + self.activation = activation + + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size) + ) + self.bias = None + + nn.init.kaiming_uniform_(self.weight, nonlinearity="relu") + + def forward(self, x, offset): + if x.numel() == 0: + # When input is empty, we want to return a empty tensor with "correct" shape, + # So that the following operations will not panic + # if they check for the shape of the tensor. + # This computes the height and width of the output tensor + output_shape = [ + (i + 2 * p - (di * (k - 1) + 1)) // s + 1 + for i, p, di, k, s in zip( + x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride + ) + ] + output_shape = [x.shape[0], self.weight.shape[0]] + output_shape + return _NewEmptyTensorOp.apply(x, output_shape) + + x = deform_conv( + x, + offset, + self.weight, + self.stride, + self.padding, + self.dilation, + self.groups, + self.deformable_groups, + ) + if self.norm is not None: + x = self.norm(x) + if self.activation is not None: + x = self.activation(x) + return x + + def extra_repr(self): + tmpstr = "in_channels=" + str(self.in_channels) + tmpstr += ", out_channels=" + str(self.out_channels) + tmpstr += ", kernel_size=" + str(self.kernel_size) + tmpstr += ", stride=" + str(self.stride) + tmpstr += ", padding=" + str(self.padding) + tmpstr += ", dilation=" + str(self.dilation) + tmpstr += ", groups=" + str(self.groups) + tmpstr += ", deformable_groups=" + str(self.deformable_groups) + tmpstr += ", bias=False" + return tmpstr + + +class ModulatedDeformConv(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + bias=True, + norm=None, + activation=None, + ): + """ + Modulated deformable convolution from :paper:`deformconv2`. + + Arguments are similar to :class:`Conv2D`. Extra arguments: + + Args: + deformable_groups (int): number of groups used in deformable convolution. + norm (nn.Module, optional): a normalization layer + activation (callable(Tensor) -> Tensor): a callable activation function + """ + super(ModulatedDeformConv, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.deformable_groups = deformable_groups + self.with_bias = bias + self.norm = norm + self.activation = activation + + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // groups, *self.kernel_size) + ) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_channels)) + else: + self.bias = None + + nn.init.kaiming_uniform_(self.weight, nonlinearity="relu") + if self.bias is not None: + nn.init.constant_(self.bias, 0) + + def forward(self, x, offset, mask): + if x.numel() == 0: + output_shape = [ + (i + 2 * p - (di * (k - 1) + 1)) // s + 1 + for i, p, di, k, s in zip( + x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride + ) + ] + output_shape = [x.shape[0], self.weight.shape[0]] + output_shape + return _NewEmptyTensorOp.apply(x, output_shape) + + x = modulated_deform_conv( + x, + offset, + mask, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + self.deformable_groups, + ) + if self.norm is not None: + x = self.norm(x) + if self.activation is not None: + x = self.activation(x) + return x + + def extra_repr(self): + tmpstr = "in_channels=" + str(self.in_channels) + tmpstr += ", out_channels=" + str(self.out_channels) + tmpstr += ", kernel_size=" + str(self.kernel_size) + tmpstr += ", stride=" + str(self.stride) + tmpstr += ", padding=" + str(self.padding) + tmpstr += ", dilation=" + str(self.dilation) + tmpstr += ", groups=" + str(self.groups) + tmpstr += ", deformable_groups=" + str(self.deformable_groups) + tmpstr += ", bias=" + str(self.with_bias) + return tmpstr diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/mask_ops.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/mask_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..f6ad215987ece426ae905aa0f8c76a227913c466 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/mask_ops.py @@ -0,0 +1,264 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import torch +from PIL import Image +from torch.nn import functional as F + +__all__ = ["paste_masks_in_image"] + + +BYTES_PER_FLOAT = 4 +# TODO: This memory limit may be too much or too little. It would be better to +# determine it based on available resources. +GPU_MEM_LIMIT = 1024 ** 3 # 1 GB memory limit + + +def _do_paste_mask(masks, boxes, img_h, img_w, skip_empty=True): + """ + Args: + masks: N, 1, H, W + boxes: N, 4 + img_h, img_w (int): + skip_empty (bool): only paste masks within the region that + tightly bound all boxes, and returns the results this region only. + An important optimization for CPU. + + Returns: + if skip_empty == False, a mask of shape (N, img_h, img_w) + if skip_empty == True, a mask of shape (N, h', w'), and the slice + object for the corresponding region. + """ + # On GPU, paste all masks together (up to chunk size) + # by using the entire image to sample the masks + # Compared to pasting them one by one, + # this has more operations but is faster on COCO-scale dataset. + device = masks.device + if skip_empty: + x0_int, y0_int = torch.clamp(boxes.min(dim=0).values.floor()[:2] - 1, min=0).to( + dtype=torch.int32 + ) + x1_int = torch.clamp(boxes[:, 2].max().ceil() + 1, max=img_w).to(dtype=torch.int32) + y1_int = torch.clamp(boxes[:, 3].max().ceil() + 1, max=img_h).to(dtype=torch.int32) + else: + x0_int, y0_int = 0, 0 + x1_int, y1_int = img_w, img_h + + boxes = boxes.to("cpu") + masks = masks.to("cpu") + x0, y0, x1, y1 = torch.split(boxes, 1, dim=1) # each is Nx1 + + N = masks.shape[0] + + img_y = torch.arange(y0_int, y1_int, device="cpu", dtype=torch.float32) + 0.5 + img_x = torch.arange(x0_int, x1_int, device="cpu", dtype=torch.float32) + 0.5 + img_y = (img_y - y0) / (y1 - y0) * 2 - 1 + img_x = (img_x - x0) / (x1 - x0) * 2 - 1 + # img_x, img_y have shapes (N, w), (N, h) + + gx = img_x[:, None, :].expand(N, img_y.size(1), img_x.size(1)) + gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1)) + grid = torch.stack([gx, gy], dim=3) + + img_masks = F.grid_sample(masks.to(dtype=torch.float32), grid, align_corners=False) + + if skip_empty: + return img_masks[:, 0], (slice(y0_int, y1_int), slice(x0_int, x1_int)) + else: + return img_masks[:, 0], () + + +def paste_masks_in_image(masks, boxes, image_shape, threshold=0.5): + """ + Paste a set of masks that are of a fixed resolution (e.g., 28 x 28) into an image. + The location, height, and width for pasting each mask is determined by their + corresponding bounding boxes in boxes. + + Note: + This is a complicated but more accurate implementation. In actual deployment, it is + often enough to use a faster but less accurate implementation. + See :func:`paste_mask_in_image_old` in this file for an alternative implementation. + + Args: + masks (tensor): Tensor of shape (Bimg, Hmask, Wmask), where Bimg is the number of + detected object instances in the image and Hmask, Wmask are the mask width and mask + height of the predicted mask (e.g., Hmask = Wmask = 28). Values are in [0, 1]. + boxes (Boxes or Tensor): A Boxes of length Bimg or Tensor of shape (Bimg, 4). + boxes[i] and masks[i] correspond to the same object instance. + image_shape (tuple): height, width + threshold (float): A threshold in [0, 1] for converting the (soft) masks to + binary masks. + + Returns: + img_masks (Tensor): A tensor of shape (Bimg, Himage, Wimage), where Bimg is the + number of detected object instances and Himage, Wimage are the image width + and height. img_masks[i] is a binary mask for object instance i. + """ + + assert masks.shape[-1] == masks.shape[-2], "Only square mask predictions are supported" + N = len(masks) + if N == 0: + return masks.new_empty((0,) + image_shape, dtype=torch.uint8) + if not isinstance(boxes, torch.Tensor): + boxes = boxes.tensor + device = boxes.device + assert len(boxes) == N, boxes.shape + + img_h, img_w = image_shape + + # The actual implementation split the input into chunks, + # and paste them chunk by chunk. + if device.type == "cpu": + # CPU is most efficient when they are pasted one by one with skip_empty=True + # so that it performs minimal number of operations. + num_chunks = N + else: + # GPU benefits from parallelism for larger chunks, but may have memory issue + # int(img_h) because shape may be tensors in tracing + num_chunks = int(np.ceil(N * int(img_h) * int(img_w) * BYTES_PER_FLOAT / GPU_MEM_LIMIT)) + assert ( + num_chunks <= N + ), "Default GPU_MEM_LIMIT in mask_ops.py is too small; try increasing it" + chunks = torch.chunk(torch.arange(N, device=device), num_chunks) + + img_masks = torch.zeros( + N, img_h, img_w, device=device, dtype=torch.bool if threshold >= 0 else torch.uint8 + ) + for inds in chunks: + masks_chunk, spatial_inds = \ + _do_paste_mask(masks[inds.long(), None, :, :], + boxes[inds.long()], img_h, img_w, + skip_empty=device.type == "cpu") + + if threshold >= 0: + masks_chunk = (masks_chunk >= threshold).to(dtype=torch.bool) + else: + # for visualization and debugging + masks_chunk = (masks_chunk * 255).to(dtype=torch.uint8) + img_masks[(inds.long(),) + spatial_inds] = masks_chunk.to(device) + return img_masks + + +# The below are the original paste function (from Detectron1) which has +# larger quantization error. +# It is faster on CPU, while the aligned one is faster on GPU thanks to grid_sample. + + +def paste_mask_in_image_old(mask, box, img_h, img_w, threshold): + """ + Paste a single mask in an image. + This is a per-box implementation of :func:`paste_masks_in_image`. + This function has larger quantization error due to incorrect pixel + modeling and is not used any more. + + Args: + mask (Tensor): A tensor of shape (Hmask, Wmask) storing the mask of a single + object instance. Values are in [0, 1]. + box (Tensor): A tensor of shape (4, ) storing the x0, y0, x1, y1 box corners + of the object instance. + img_h, img_w (int): Image height and width. + threshold (float): Mask binarization threshold in [0, 1]. + + Returns: + im_mask (Tensor): + The resized and binarized object mask pasted into the original + image plane (a tensor of shape (img_h, img_w)). + """ + # Conversion from continuous box coordinates to discrete pixel coordinates + # via truncation (cast to int32). This determines which pixels to paste the + # mask onto. + box = box.to(dtype=torch.int32) # Continuous to discrete coordinate conversion + # An example (1D) box with continuous coordinates (x0=0.7, x1=4.3) will map to + # a discrete coordinates (x0=0, x1=4). Note that box is mapped to 5 = x1 - x0 + 1 + # pixels (not x1 - x0 pixels). + samples_w = box[2] - box[0] + 1 # Number of pixel samples, *not* geometric width + samples_h = box[3] - box[1] + 1 # Number of pixel samples, *not* geometric height + + # Resample the mask from it's original grid to the new samples_w x samples_h grid + mask = Image.fromarray(mask.cpu().numpy()) + mask = mask.resize((samples_w, samples_h), resample=Image.BILINEAR) + mask = np.array(mask, copy=False) + + if threshold >= 0: + mask = np.array(mask > threshold, dtype=np.uint8) + mask = torch.from_numpy(mask) + else: + # for visualization and debugging, we also + # allow it to return an unmodified mask + mask = torch.from_numpy(mask * 255).to(torch.uint8) + + im_mask = torch.zeros((img_h, img_w), dtype=torch.uint8) + x_0 = max(box[0], 0) + x_1 = min(box[2] + 1, img_w) + y_0 = max(box[1], 0) + y_1 = min(box[3] + 1, img_h) + + im_mask[y_0:y_1, x_0:x_1] = mask[ + (y_0 - box[1]) : (y_1 - box[1]), (x_0 - box[0]) : (x_1 - box[0]) + ] + return im_mask + + +# Our pixel modeling requires extrapolation for any continuous +# coordinate < 0.5 or > length - 0.5. When sampling pixels on the masks, +# we would like this extrapolation to be an interpolation between boundary values and zero, +# instead of using absolute zero or boundary values. +# Therefore `paste_mask_in_image_old` is often used with zero padding around the masks like this: +# masks, scale = pad_masks(masks[:, 0, :, :], 1) +# boxes = scale_boxes(boxes.tensor, scale) + + +def pad_masks(masks, padding): + """ + Args: + masks (tensor): A tensor of shape (B, M, M) representing B masks. + padding (int): Number of cells to pad on all sides. + + Returns: + The padded masks and the scale factor of the padding size / original size. + """ + B = masks.shape[0] + M = masks.shape[-1] + pad2 = 2 * padding + scale = float(M + pad2) / M + padded_masks = masks.new_zeros((B, M + pad2, M + pad2)) + padded_masks[:, padding:-padding, padding:-padding] = masks + return padded_masks, scale + + +def scale_boxes(boxes, scale): + """ + Args: + boxes (tensor): A tensor of shape (B, 4) representing B boxes with 4 + coords representing the corners x0, y0, x1, y1, + scale (float): The box scaling factor. + + Returns: + Scaled boxes. + """ + w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5 + h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5 + x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5 + y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5 + + w_half *= scale + h_half *= scale + + scaled_boxes = torch.zeros_like(boxes) + scaled_boxes[:, 0] = x_c - w_half + scaled_boxes[:, 2] = x_c + w_half + scaled_boxes[:, 1] = y_c - h_half + scaled_boxes[:, 3] = y_c + h_half + return scaled_boxes diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/nms.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/nms.py new file mode 100644 index 0000000000000000000000000000000000000000..122e4197d4601337a8754e7fdd9f4e02e0676928 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/nms.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List +import torch +from torchvision.ops import boxes as box_ops +from torchvision.ops import nms # BC-compat +import numpy as np + + +def batched_nms_npu(boxes, scores, idxs, iou_threshold): + """ + Performs non-maximum suppression in a batched fashion. + + Each index value correspond to a category, and NMS + will not be applied between elements of different categories. + + Parameters + ---------- + boxes : Tensor[N, 4] + boxes where NMS will be performed. They + are expected to be in (x1, y1, x2, y2) format + scores : Tensor[N] + scores for each one of the boxes + idxs : Tensor[N] + indices of the categories for each one of the boxes. + iou_threshold : float + discards all overlapping boxes + with IoU > iou_threshold + + Returns + ------- + keep : Tensor + int64 tensor with the indices of + the elements that have been kept by NMS, sorted + in decreasing order of scores + """ + if boxes.numel() == 0: + return torch.empty((0,), dtype=torch.int64, device=boxes.device) + + ''' + npu_nms_with_mask function detail + box_scores tensor (N,8),N为候选框个数,8为候选框坐标与置信度得分 + iou_threshold float IOU阈值 + selected_boxes tensor (N,5),返回过滤后并排过序的候选框,N为输出候选框个数,5为坐标与置信度得分 + selected_idx tensor 排过序的box在输入box列表中的位置索引 + selected_mask tensor 当前候选框是否可用的标志 + ''' + _, _, keep_mask = \ + torch.npu_nms_with_mask( + torch.cat([boxes, scores[..., None]], 1), iou_threshold) + return keep_mask + + +def batched_nms( + boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float +): + """ + Same as torchvision.ops.boxes.batched_nms, but safer. + """ + assert boxes.shape[-1] == 4 + # TODO may need better strategy. + # Investigate after having a fully-cuda NMS op. + if len(boxes) < 40000: + + return box_ops.batched_nms(boxes, scores.float(), idxs, iou_threshold) + + result_mask = scores.new_zeros(scores.size(), dtype=torch.bool) + for id in torch.jit.annotate(List[int], torch.unique(idxs).cpu().tolist()): + mask = (idxs == id).nonzero().view(-1) + keep = nms(boxes[mask], scores[mask], iou_threshold) + result_mask[mask[keep]] = True + keep = result_mask.nonzero().view(-1) + keep = keep[scores[keep].argsort(descending=True)] + return keep + + +# Note: this function (nms_rotated) might be moved into +# torchvision/ops/boxes.py in the future +def nms_rotated(boxes, scores, iou_threshold): + """ + Performs non-maximum suppression (NMS) on the rotated boxes according + to their intersection-over-union (IoU). + + Rotated NMS iteratively removes lower scoring rotated boxes which have an + IoU greater than iou_threshold with another (higher scoring) rotated box. + + Note that RotatedBox (5, 3, 4, 2, -90) covers exactly the same region as + RotatedBox (5, 3, 4, 2, 90) does, and their IoU will be 1. However, they + can be representing completely different objects in certain tasks, e.g., OCR. + + As for the question of whether rotated-NMS should treat them as faraway boxes + even though their IOU is 1, it depends on the application and/or ground truth annotation. + + As an extreme example, consider a single character v and the square box around it. + + If the angle is 0 degree, the object (text) would be read as 'v'; + + If the angle is 90 degrees, the object (text) would become '>'; + + If the angle is 180 degrees, the object (text) would become '^'; + + If the angle is 270/-90 degrees, the object (text) would become '<' + + All of these cases have IoU of 1 to each other, and rotated NMS that only + uses IoU as criterion would only keep one of them with the highest score - + which, practically, still makes sense in most cases because typically + only one of theses orientations is the correct one. Also, it does not matter + as much if the box is only used to classify the object (instead of transcribing + them with a sequential OCR recognition model) later. + + On the other hand, when we use IoU to filter proposals that are close to the + ground truth during training, we should definitely take the angle into account if + we know the ground truth is labeled with the strictly correct orientation (as in, + upside-down words are annotated with -180 degrees even though they can be covered + with a 0/90/-90 degree box, etc.) + + The way the original dataset is annotated also matters. For example, if the dataset + is a 4-point polygon dataset that does not enforce ordering of vertices/orientation, + we can estimate a minimum rotated bounding box to this polygon, but there's no way + we can tell the correct angle with 100% confidence (as shown above, there could be 4 different + rotated boxes, with angles differed by 90 degrees to each other, covering the exactly + same region). In that case we have to just use IoU to determine the box + proximity (as many detection benchmarks (even for text) do) unless there're other + assumptions we can make (like width is always larger than height, or the object is not + rotated by more than 90 degrees CCW/CW, etc.) + + In summary, not considering angles in rotated NMS seems to be a good option for now, + but we should be aware of its implications. + + Args: + boxes (Tensor[N, 5]): Rotated boxes to perform NMS on. They are expected to be in + (x_center, y_center, width, height, angle_degrees) format. + scores (Tensor[N]): Scores for each one of the rotated boxes + iou_threshold (float): Discards all overlapping rotated boxes with IoU < iou_threshold + + Returns: + keep (Tensor): int64 tensor with the indices of the elements that have been kept + by Rotated NMS, sorted in decreasing order of scores + """ + from detectron2 import _C + + return _C.nms_rotated(boxes, scores, iou_threshold) + + +# Note: this function (batched_nms_rotated) might be moved into +# torchvision/ops/boxes.py in the future +def batched_nms_rotated(boxes, scores, idxs, iou_threshold): + """ + Performs non-maximum suppression in a batched fashion. + + Each index value correspond to a category, and NMS + will not be applied between elements of different categories. + + Args: + boxes (Tensor[N, 5]): + boxes where NMS will be performed. They + are expected to be in (x_ctr, y_ctr, width, height, angle_degrees) format + scores (Tensor[N]): + scores for each one of the boxes + idxs (Tensor[N]): + indices of the categories for each one of the boxes. + iou_threshold (float): + discards all overlapping boxes + with IoU < iou_threshold + + Returns: + Tensor: + int64 tensor with the indices of the elements that have been kept + by NMS, sorted in decreasing order of scores + """ + assert boxes.shape[-1] == 5 + + if boxes.numel() == 0: + return torch.empty((0,), dtype=torch.int64, device=boxes.device) + # Strategy: in order to perform NMS independently per class, + # we add an offset to all the boxes. The offset is dependent + # only on the class idx, and is large enough so that boxes + # from different classes do not overlap + + # Note that batched_nms in torchvision/ops/boxes.py only uses max_coordinate, + # which won't handle negative coordinates correctly. + # Here by using min_coordinate we can make sure the negative coordinates are + # correctly handled. + max_coordinate = ( + torch.max(boxes[:, 0], boxes[:, 1]) + torch.max(boxes[:, 2], boxes[:, 3]) / 2 + ).max() + min_coordinate = ( + torch.min(boxes[:, 0], boxes[:, 1]) - torch.max(boxes[:, 2], boxes[:, 3]) / 2 + ).min() + offsets = idxs.to(boxes) * (max_coordinate - min_coordinate + 1) + boxes_for_nms = boxes.clone() # avoid modifying the original values in boxes + boxes_for_nms[:, :2] += offsets[:, None] + keep = nms_rotated(boxes_for_nms, scores, iou_threshold) + return keep diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/roi_align.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/roi_align.py new file mode 100644 index 0000000000000000000000000000000000000000..c0cfabcba784789729ff5114b0786bebf2578eb0 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/roi_align.py @@ -0,0 +1,147 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from torch import nn + +from torch.nn.modules.utils import _pair +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from detectron2 import _C +class _ROIAlign(Function): + @staticmethod + def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned): + ctx.save_for_backward(roi) + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.sampling_ratio = sampling_ratio + ctx.input_shape = input.size() + ctx.aligned = aligned + if str(input.device) == 'cpu': + output = _C.roi_align_forward( + input, roi, spatial_scale, + output_size[0], output_size[1], sampling_ratio, aligned) + else: + output = _C.roi_align_forward( + input.cpu(), roi.cpu(), spatial_scale, + output_size[0], output_size[1], sampling_ratio, aligned) + #output = torch.npu_roi_align( + # input, roi, spatial_scale, + # output_size[0], output_size[1], sampling_ratio, roi_end_mode) + + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + (rois,) = ctx.saved_tensors + output_size = ctx.output_size + spatial_scale = ctx.spatial_scale + sampling_ratio = ctx.sampling_ratio + bs, ch, h, w = ctx.input_shape + if str(grad_output.device) =='cpu': + grad_input = _C.roi_align_backward( + grad_output, + rois, + spatial_scale, + output_size[0], + output_size[1], + bs, + ch, + h, + w, + sampling_ratio, + ctx.aligned, + ) + else: + grad_input = _C.roi_align_backward( + grad_output.cpu(), + rois.cpu(), + spatial_scale, + output_size[0], + output_size[1], + bs, + ch, + h, + w, + sampling_ratio, + ctx.aligned, + ) + + #grad_input = torch.npu_roi_alignbk( + # grad_output, rois, ctx.input_shape, + # output_size[0], output_size[1], + # spatial_scale, sampling_ratio) + + return grad_input, None, None, None, None, None + +roi_align = _ROIAlign.apply + +# NOTE: torchvision's RoIAlign has a different default aligned=False +class ROIAlign(nn.Module): + def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True): + """ + Args: + output_size (tuple): h, w + spatial_scale (float): scale the input boxes by this number + sampling_ratio (int): number of inputs samples to take for each output + sample. 0 to take samples densely. + aligned (bool): if False, use the legacy implementation in + Detectron. If True, align the results more perfectly. + + Note: + The meaning of aligned=True: + + Given a continuous coordinate c, its two neighboring pixel indices (in our + pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, + c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled + from the underlying signal at continuous coordinates 0.5 and 1.5). But the original + roi_align (aligned=False) does not subtract the 0.5 when computing neighboring + pixel indices and therefore it uses pixels with a slightly incorrect alignment + (relative to our pixel model) when performing bilinear interpolation. + + With `aligned=True`, + we first appropriately scale the ROI and then shift it by -0.5 + prior to calling roi_align. This produces the correct neighbors; see + detectron2/tests/test_roi_align.py for verification. + + The difference does not make a difference to the model's performance if + ROIAlign is used together with conv layers. + """ + super(ROIAlign, self).__init__() + self.output_size = output_size + self.spatial_scale = spatial_scale + self.sampling_ratio = sampling_ratio + self.aligned = aligned + + def forward(self, input, rois): + """ + Args: + input: NCHW images + rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. + """ + assert rois.dim() == 2 and rois.size(1) == 5 + return roi_align( + input.float(), rois, self.output_size, + self.spatial_scale, self.sampling_ratio, self.aligned + ) + + def __repr__(self): + tmpstr = self.__class__.__name__ + "(" + tmpstr += "output_size=" + str(self.output_size) + tmpstr += ", spatial_scale=" + str(self.spatial_scale) + tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) + tmpstr += ", aligned=" + str(self.aligned) + tmpstr += ")" + return tmpstr diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/roi_align_rotated.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/roi_align_rotated.py new file mode 100644 index 0000000000000000000000000000000000000000..cfd0d2710c0473bad2b6611a76d39cd90a2b3e7e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/roi_align_rotated.py @@ -0,0 +1,101 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from torch import nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from detectron2 import _C + + +class _ROIAlignRotated(Function): + @staticmethod + def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): + ctx.save_for_backward(roi) + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.sampling_ratio = sampling_ratio + ctx.input_shape = input.size() + output = _C.roi_align_rotated_forward( + input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio + ) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + (rois,) = ctx.saved_tensors + output_size = ctx.output_size + spatial_scale = ctx.spatial_scale + sampling_ratio = ctx.sampling_ratio + bs, ch, h, w = ctx.input_shape + grad_input = _C.roi_align_rotated_backward( + grad_output, + rois, + spatial_scale, + output_size[0], + output_size[1], + bs, + ch, + h, + w, + sampling_ratio, + ) + return grad_input, None, None, None, None, None + + +roi_align_rotated = _ROIAlignRotated.apply + + +class ROIAlignRotated(nn.Module): + def __init__(self, output_size, spatial_scale, sampling_ratio): + """ + Args: + output_size (tuple): h, w + spatial_scale (float): scale the input boxes by this number + sampling_ratio (int): number of inputs samples to take for each output + sample. 0 to take samples densely. + + Note: + ROIAlignRotated supports continuous coordinate by default: + Given a continuous coordinate c, its two neighboring pixel indices (in our + pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, + c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled + from the underlying signal at continuous coordinates 0.5 and 1.5). + """ + super(ROIAlignRotated, self).__init__() + self.output_size = output_size + self.spatial_scale = spatial_scale + self.sampling_ratio = sampling_ratio + + def forward(self, input, rois): + """ + Args: + input: NCHW images + rois: Bx6 boxes. First column is the index into N. + The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees). + """ + assert rois.dim() == 2 and rois.size(1) == 6 + return roi_align_rotated( + input, rois, self.output_size, self.spatial_scale, self.sampling_ratio + ) + + def __repr__(self): + tmpstr = self.__class__.__name__ + "(" + tmpstr += "output_size=" + str(self.output_size) + tmpstr += ", spatial_scale=" + str(self.spatial_scale) + tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) + tmpstr += ")" + return tmpstr diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/rotated_boxes.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/rotated_boxes.py new file mode 100644 index 0000000000000000000000000000000000000000..9606fdfbf726c9a525b02c1b5b31c6138e47fb90 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/rotated_boxes.py @@ -0,0 +1,35 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import, division, print_function, unicode_literals + +from detectron2 import _C + + +def pairwise_iou_rotated(boxes1, boxes2): + """ + Return intersection-over-union (Jaccard index) of boxes. + + Both sets of boxes are expected to be in + (x_center, y_center, width, height, angle) format. + + Arguments: + boxes1 (Tensor[N, 5]) + boxes2 (Tensor[M, 5]) + + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + return _C.box_iou_rotated(boxes1, boxes2) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/shape_spec.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/shape_spec.py new file mode 100644 index 0000000000000000000000000000000000000000..56c6899025c13228acf365d4386b9d17bfb41c0a --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/shape_spec.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple + + +class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): + """ + A simple structure that contains basic shape specification about a tensor. + It is often used as the auxiliary inputs/outputs of models, + to obtain the shape inference ability among pytorch modules. + + Attributes: + channels: + height: + width: + stride: + """ + + def __new__(cls, *, channels=None, height=None, width=None, stride=None): + return super().__new__(cls, channels, height, width, stride) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/wrappers.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..659b446e5900fc297c481e7d8f3435d500a3bcc1 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/layers/wrappers.py @@ -0,0 +1,239 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Wrappers around on some nn functions, mainly to support empty tensors. + +Ideally, add support directly in PyTorch to empty tensors in those functions. + +These can be removed once https://github.com/pytorch/pytorch/issues/12013 +is implemented +""" + +import math +from typing import List +import torch +from torch.nn.modules.utils import _ntuple + +from detectron2.utils.env import TORCH_VERSION + + +def cat(tensors: List[torch.Tensor], dim: int = 0): + """ + Efficient version of torch.cat that avoids a copy if there is only a single element in a list + """ + assert isinstance(tensors, (list, tuple)) + if len(tensors) == 1: + return tensors[0] + return torch.cat(tensors, dim) + + +class _NewEmptyTensorOp(torch.autograd.Function): + @staticmethod + def forward(ctx, x, new_shape): + ctx.shape = x.shape + return x.new_empty(new_shape) + + @staticmethod + def backward(ctx, grad): + shape = ctx.shape + return _NewEmptyTensorOp.apply(grad, shape), None + + +class Conv2d(torch.nn.Conv2d): + """ + A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features. + """ + + def __init__(self, *args, **kwargs): + """ + Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`: + + Args: + norm (nn.Module, optional): a normalization layer + activation (callable(Tensor) -> Tensor): a callable activation function + + It assumes that norm layer is used before activation. + """ + norm = kwargs.pop("norm", None) + activation = kwargs.pop("activation", None) + super().__init__(*args, **kwargs) + + self.norm = norm + self.activation = activation + + def forward(self, x): + if x.numel() == 0 and self.training: + # https://github.com/pytorch/pytorch/issues/12013 + assert not isinstance( + self.norm, torch.nn.SyncBatchNorm + ), "SyncBatchNorm does not support empty inputs!" + + if x.numel() == 0 and TORCH_VERSION <= (1, 4): + assert not isinstance( + self.norm, torch.nn.GroupNorm + ), "GroupNorm does not support empty inputs in PyTorch <=1.4!" + # When input is empty, we want to return a empty tensor with "correct" shape, + # So that the following operations will not panic + # if they check for the shape of the tensor. + # This computes the height and width of the output tensor + output_shape = [ + (i + 2 * p - (di * (k - 1) + 1)) // s + 1 + for i, p, di, k, s in zip( + x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride + ) + ] + output_shape = [x.shape[0], self.weight.shape[0]] + output_shape + empty = _NewEmptyTensorOp.apply(x, output_shape) + if self.training: + # This is to make DDP happy. + # DDP expects all workers to have gradient w.r.t the same set of parameters. + _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + _dummy + else: + return empty + + x = super().forward(x) + if self.norm is not None: + x = self.norm(x) + if self.activation is not None: + x = self.activation(x) + return x + + +if TORCH_VERSION > (1, 4): + ConvTranspose2d = torch.nn.ConvTranspose2d +else: + + class ConvTranspose2d(torch.nn.ConvTranspose2d): + """ + A wrapper around :class:`torch.nn.ConvTranspose2d` to support zero-size tensor. + """ + + def forward(self, x): + if x.numel() > 0: + return super(ConvTranspose2d, self).forward(x) + # get output shape + + # When input is empty, we want to return a empty tensor with "correct" shape, + # So that the following operations will not panic + # if they check for the shape of the tensor. + # This computes the height and width of the output tensor + output_shape = [ + (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op + for i, p, di, k, d, op in zip( + x.shape[-2:], + self.padding, + self.dilation, + self.kernel_size, + self.stride, + self.output_padding, + ) + ] + output_shape = [x.shape[0], self.out_channels] + output_shape + # This is to make DDP happy. + # DDP expects all workers to have gradient w.r.t the same set of parameters. + _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return _NewEmptyTensorOp.apply(x, output_shape) + _dummy + + +if TORCH_VERSION > (1, 4): + BatchNorm2d = torch.nn.BatchNorm2d +else: + + class BatchNorm2d(torch.nn.BatchNorm2d): + """ + A wrapper around :class:`torch.nn.BatchNorm2d` to support zero-size tensor. + """ + + def forward(self, x): + if x.numel() > 0: + return super(BatchNorm2d, self).forward(x) + # get output shape + output_shape = x.shape + return _NewEmptyTensorOp.apply(x, output_shape) + + +if TORCH_VERSION > (1, 5): + Linear = torch.nn.Linear +else: + + class Linear(torch.nn.Linear): + """ + A wrapper around :class:`torch.nn.Linear` to support empty inputs and more features. + Because of https://github.com/pytorch/pytorch/issues/34202 + """ + + def forward(self, x): + if x.numel() == 0: + output_shape = [x.shape[0], self.weight.shape[0]] + + empty = _NewEmptyTensorOp.apply(x, output_shape) + if self.training: + # This is to make DDP happy. + # DDP expects all workers to have gradient w.r.t the same set of parameters. + _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + _dummy + else: + return empty + + x = super().forward(x) + return x + + +def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None): + """ + A wrapper around :func:`torch.nn.functional.interpolate` to support zero-size tensor. + """ + if TORCH_VERSION > (1, 4) or input.numel() > 0: + return torch.nn.functional.interpolate( + input, size, scale_factor, mode, align_corners=align_corners + ) + + def _check_size_scale_factor(dim): + if size is None and scale_factor is None: + raise ValueError("either size or scale_factor should be defined") + if size is not None and scale_factor is not None: + raise ValueError("only one of size or scale_factor should be defined") + if ( + scale_factor is not None + and isinstance(scale_factor, tuple) + and len(scale_factor) != dim + ): + raise ValueError( + "scale_factor shape must match input shape. " + "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor)) + ) + + def _output_size(dim): + _check_size_scale_factor(dim) + if size is not None: + return size + scale_factors = _ntuple(dim)(scale_factor) + # math.floor might return float in py2.7 + return [int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim)] + + output_shape = tuple(_output_size(2)) + output_shape = input.shape[:-2] + output_shape + return _NewEmptyTensorOp.apply(input, output_shape) + + +def nonzero_tuple(x): + """ + A 'as_tuple=True' version of torch.nonzero to support torchscript. + because of https://github.com/pytorch/pytorch/issues/38718 + """ + if x.dim() == 0: + return x.unsqueeze(0).nonzero().unbind(1) + return x.nonzero().unbind(1) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/model_zoo/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/model_zoo/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0a02809ffb695b79c8eb1e4fdad9c5c989a79faf --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/model_zoo/__init__.py @@ -0,0 +1,22 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Model Zoo API for Detectron2: a collection of functions to create common model architectures and +optionally load pre-trained weights as released in +`MODEL_ZOO.md `_. +""" +from .model_zoo import get, get_config_file, get_checkpoint_url + +__all__ = ["get_checkpoint_url", "get", "get_config_file"] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/model_zoo/model_zoo.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/model_zoo/model_zoo.py new file mode 100644 index 0000000000000000000000000000000000000000..b885c8073c680bde1be2753925b98b4e5f585967 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/model_zoo/model_zoo.py @@ -0,0 +1,164 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import pkg_resources +import torch + +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.config import get_cfg +from detectron2.modeling import build_model + + +class _ModelZooUrls(object): + """ + Mapping from names to officially released Detectron2 pre-trained models. + """ + + S3_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" + + # format: {config_path.yaml} -> model_id/model_final_{commit}.pkl + CONFIG_PATH_TO_URL_SUFFIX = { + # COCO Detection with Faster R-CNN + "COCO-Detection/faster_rcnn_R_50_C4_1x.yaml": "137257644/model_final_721ade.pkl", + "COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml": "137847829/model_final_51d356.pkl", + "COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml": "137257794/model_final_b275ba.pkl", + "COCO-Detection/faster_rcnn_R_50_C4_3x.yaml": "137849393/model_final_f97cb7.pkl", + "COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml": "137849425/model_final_68d202.pkl", + "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml": "137849458/model_final_280758.pkl", + "COCO-Detection/faster_rcnn_R_101_C4_3x.yaml": "138204752/model_final_298dad.pkl", + "COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml": "138204841/model_final_3e0943.pkl", + "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml": "137851257/model_final_f6e8b1.pkl", + "COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml": "139173657/model_final_68b088.pkl", + # COCO Detection with RetinaNet + "COCO-Detection/retinanet_R_50_FPN_1x.yaml": "190397773/model_final_bfca0b.pkl", + "COCO-Detection/retinanet_R_50_FPN_3x.yaml": "190397829/model_final_5bd44e.pkl", + "COCO-Detection/retinanet_R_101_FPN_3x.yaml": "190397697/model_final_971ab9.pkl", + # COCO Detection with RPN and Fast R-CNN + "COCO-Detection/rpn_R_50_C4_1x.yaml": "137258005/model_final_450694.pkl", + "COCO-Detection/rpn_R_50_FPN_1x.yaml": "137258492/model_final_02ce48.pkl", + "COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml": "137635226/model_final_e5f7ce.pkl", + # COCO Instance Segmentation Baselines with Mask R-CNN + "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml": "137259246/model_final_9243eb.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml": "137260150/model_final_4f86c3.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml": "137260431/model_final_a54504.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml": "137849525/model_final_4ce675.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml": "137849551/model_final_84107b.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml": "137849600/model_final_f10217.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml": "138363239/model_final_a2914c.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml": "138363294/model_final_0464b7.pkl", + "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml": "138205316/model_final_a3ec72.pkl", + "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml": "139653917/model_final_2d9806.pkl", # noqa + # COCO Person Keypoint Detection Baselines with Keypoint R-CNN + "COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml": "137261548/model_final_04e291.pkl", + "COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml": "137849621/model_final_a6e10b.pkl", + "COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml": "138363331/model_final_997cc7.pkl", + "COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml": "139686956/model_final_5ad38f.pkl", + # COCO Panoptic Segmentation Baselines with Panoptic FPN + "COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml": "139514544/model_final_dbfeb4.pkl", + "COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml": "139514569/model_final_c10459.pkl", + "COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml": "139514519/model_final_cafdb1.pkl", + # LVIS Instance Segmentation Baselines with Mask R-CNN + "LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml": "144219072/model_final_571f7c.pkl", + "LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml": "144219035/model_final_824ab5.pkl", + "LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml": "144219108/model_final_5e3439.pkl", # noqa + # Cityscapes & Pascal VOC Baselines + "Cityscapes/mask_rcnn_R_50_FPN.yaml": "142423278/model_final_af9cf5.pkl", + "PascalVOC-Detection/faster_rcnn_R_50_C4.yaml": "142202221/model_final_b1acc2.pkl", + # Other Settings + "Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml": "138602867/model_final_65c703.pkl", + "Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml": "144998336/model_final_821d0b.pkl", + "Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml": "138602847/model_final_e9d89b.pkl", + "Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml": "144998488/model_final_480dd8.pkl", + "Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml": "169527823/model_final_3b3c51.pkl", + "Misc/mask_rcnn_R_50_FPN_3x_gn.yaml": "138602888/model_final_dc5d9e.pkl", + "Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml": "138602908/model_final_01ca85.pkl", + "Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml": "139797668/model_final_be35db.pkl", + "Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml": "18131413/model_0039999_e76410.pkl", # noqa + # D1 Comparisons + "Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml": "137781054/model_final_7ab50c.pkl", # noqa + "Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml": "137781281/model_final_62ca52.pkl", # noqa + "Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml": "137781195/model_final_cce136.pkl", + } + + +def get_checkpoint_url(config_path): + """ + Returns the URL to the model trained using the given config + + Args: + config_path (str): config file name relative to detectron2's "configs/" + directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" + + Returns: + str: a URL to the model + """ + name = config_path.replace(".yaml", "") + if config_path in _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX: + suffix = _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX[config_path] + return _ModelZooUrls.S3_PREFIX + name + "/" + suffix + raise RuntimeError("{} not available in Model Zoo!".format(name)) + + +def get_config_file(config_path): + """ + Returns path to a builtin config file. + + Args: + config_path (str): config file name relative to detectron2's "configs/" + directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" + + Returns: + str: the real path to the config file. + """ + cfg_file = pkg_resources.resource_filename( + "detectron2.model_zoo", os.path.join("configs", config_path) + ) + if not os.path.exists(cfg_file): + raise RuntimeError("{} not available in Model Zoo!".format(config_path)) + return cfg_file + + +def get(config_path, trained: bool = False): + """ + Get a model specified by relative path under Detectron2's official ``configs/`` directory. + + Args: + config_path (str): config file name relative to detectron2's "configs/" + directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" + trained (bool): If True, will initialize the model with the trained model zoo weights. + If False, the checkpoint specified in the config file's ``MODEL.WEIGHTS`` is used + instead; this will typically (though not always) initialize a subset of weights using + an ImageNet pre-trained model, while randomly initializing the other weights. + + Returns: + nn.Module: a detectron2 model + + Example: + :: + from detectron2 import model_zoo + model = model_zoo.get("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml", trained=True) + """ + cfg_file = get_config_file(config_path) + + cfg = get_cfg() + cfg.merge_from_file(cfg_file) + if trained: + cfg.MODEL.WEIGHTS = get_checkpoint_url(config_path) + if not torch.cuda.is_available(): + cfg.MODEL.DEVICE = "cpu" + + model = build_model(cfg) + DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) + return model diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..64ddfe472b935e7c2afb5b475a827ecf17688644 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/__init__.py @@ -0,0 +1,63 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from detectron2.layers import ShapeSpec + +from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY +from .backbone import ( + BACKBONE_REGISTRY, + FPN, + Backbone, + ResNet, + ResNetBlockBase, + build_backbone, + build_resnet_backbone, + make_stage, +) +from .meta_arch import ( + META_ARCH_REGISTRY, + SEM_SEG_HEADS_REGISTRY, + GeneralizedRCNN, + PanopticFPN, + ProposalNetwork, + RetinaNet, + SemanticSegmentor, + build_model, + build_sem_seg_head, +) +from .postprocessing import detector_postprocess +from .proposal_generator import ( + PROPOSAL_GENERATOR_REGISTRY, + build_proposal_generator, + RPN_HEAD_REGISTRY, + build_rpn_head, +) +from .roi_heads import ( + ROI_BOX_HEAD_REGISTRY, + ROI_HEADS_REGISTRY, + ROI_KEYPOINT_HEAD_REGISTRY, + ROI_MASK_HEAD_REGISTRY, + ROIHeads, + StandardROIHeads, + BaseMaskRCNNHead, + BaseKeypointRCNNHead, + build_box_head, + build_keypoint_head, + build_mask_head, + build_roi_heads, +) +from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA + +_EXCLUDE = {"ShapeSpec"} +__all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/anchor_generator.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/anchor_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..aa4bc7b4af3b2536c9f6d6da048a80c8e5fc2b55 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/anchor_generator.py @@ -0,0 +1,397 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from typing import List +import torch +from torch import nn + +from detectron2.config import configurable +from detectron2.layers import ShapeSpec +from detectron2.structures import Boxes, RotatedBoxes +from detectron2.utils.registry import Registry + +ANCHOR_GENERATOR_REGISTRY = Registry("ANCHOR_GENERATOR") +ANCHOR_GENERATOR_REGISTRY.__doc__ = """ +Registry for modules that creates object detection anchors for feature maps. + +The registered object will be called with `obj(cfg, input_shape)`. +""" + + +class BufferList(nn.Module): + """ + Similar to nn.ParameterList, but for buffers + """ + + def __init__(self, buffers): + super(BufferList, self).__init__() + for i, buffer in enumerate(buffers): + self.register_buffer(str(i), buffer) + + def __len__(self): + return len(self._buffers) + + def __iter__(self): + return iter(self._buffers.values()) + + +def _create_grid_offsets(size: List[int], stride: int, offset: float, device: torch.device): + grid_height, grid_width = size + shifts_x = torch.arange( + offset * stride, grid_width * stride, step=stride, dtype=torch.float32, device=device + ) + shifts_y = torch.arange( + offset * stride, grid_height * stride, step=stride, dtype=torch.float32, device=device + ) + + shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) + shift_x = shift_x.reshape(-1) + shift_y = shift_y.reshape(-1) + return shift_x, shift_y + + +def _broadcast_params(params, num_features, name): + """ + If one size (or aspect ratio) is specified and there are multiple feature + maps, we "broadcast" anchors of that single size (or aspect ratio) + over all feature maps. + + If params is list[float], or list[list[float]] with len(params) == 1, repeat + it num_features time. + + Returns: + list[list[float]]: param for each feature + """ + assert isinstance( + params, (list, tuple) + ), f"{name} in anchor generator has to be a list! Got {params}." + assert len(params), f"{name} in anchor generator cannot be empty!" + if not isinstance(params[0], (list, tuple)): # list[float] + return [params] * num_features + if len(params) == 1: + return list(params) * num_features + assert len(params) == num_features, ( + f"Got {name} of length {len(params)} in anchor generator, " + f"but the number of input features is {num_features}!" + ) + return params + + +@ANCHOR_GENERATOR_REGISTRY.register() +class DefaultAnchorGenerator(nn.Module): + """ + Compute anchors in the standard ways described in + "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks". + """ + + box_dim: torch.jit.Final[int] = 4 + """ + the dimension of each anchor box. + """ + + @configurable + def __init__(self, *, sizes, aspect_ratios, strides, offset=0.5): + """ + This interface is experimental. + + Args: + sizes (list[list[float]] or list[float]): + If sizes is list[list[float]], sizes[i] is the list of anchor sizes + (i.e. sqrt of anchor area) to use for the i-th feature map. + If sizes is list[float], the sizes are used for all feature maps. + Anchor sizes are given in absolute lengths in units of + the input image; they do not dynamically scale if the input image size changes. + aspect_ratios (list[list[float]] or list[float]): list of aspect ratios + (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies. + strides (list[int]): stride of each input feature. + offset (float): Relative offset between the center of the first anchor and the top-left + corner of the image. Value has to be in [0, 1). + Recommend to use 0.5, which means half stride. + """ + super().__init__() + + self.strides = strides + self.num_features = len(self.strides) + sizes = _broadcast_params(sizes, self.num_features, "sizes") + aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios") + self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios) + + self.offset = offset + assert 0.0 <= self.offset < 1.0, self.offset + + @classmethod + def from_config(cls, cfg, input_shape: List[ShapeSpec]): + return { + "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES, + "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS, + "strides": [x.stride for x in input_shape], + "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET, + } + + def _calculate_anchors(self, sizes, aspect_ratios): + cell_anchors = [ + self.generate_cell_anchors(s, a).float() for s, a in zip(sizes, aspect_ratios) + ] + return BufferList(cell_anchors) + + @property + def num_cell_anchors(self): + """ + Alias of `num_anchors`. + """ + return self.num_anchors + + @property + def num_anchors(self): + """ + Returns: + list[int]: Each int is the number of anchors at every pixel + location, on that feature map. + For example, if at every pixel we use anchors of 3 aspect + ratios and 5 sizes, the number of anchors is 15. + (See also ANCHOR_GENERATOR.SIZES and ANCHOR_GENERATOR.ASPECT_RATIOS in config) + + In standard RPN models, `num_anchors` on every feature map is the same. + """ + return [len(cell_anchors) for cell_anchors in self.cell_anchors] + + def _grid_anchors(self, grid_sizes: List[List[int]]): + """ + Returns: + list[Tensor]: #featuremap tensors, each is (#locations x #cell_anchors) x 4 + """ + anchors = [] + # buffers() not supported by torchscript. use named_buffers() instead + buffers: List[torch.Tensor] = [x[1] for x in self.cell_anchors.named_buffers()] + for size, stride, base_anchors in zip(grid_sizes, self.strides, buffers): + shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device) + shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1) + + if shifts.dtype != base_anchors.dtype: + anchors.append( + (shifts.type(base_anchors.dtype).view(-1, 1, 4) + + base_anchors.view(1, -1, 4)).reshape(-1, 4)) + else: + anchors.append((shifts.view(-1, 1, 4) + + base_anchors.view(1, -1, 4)).reshape(-1, 4)) + + return anchors + + def generate_cell_anchors(self, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)): + """ + Generate a tensor storing canonical anchor boxes, which are all anchor + boxes of different sizes and aspect_ratios centered at (0, 0). + We can later build the set of anchors for a full feature map by + shifting and tiling these tensors (see `meth:_grid_anchors`). + + Args: + sizes (tuple[float]): + aspect_ratios (tuple[float]]): + + Returns: + Tensor of shape (len(sizes) * len(aspect_ratios), 4) storing anchor boxes + in XYXY format. + """ + + # This is different from the anchor generator defined in the original Faster R-CNN + # code or Detectron. They yield the same AP, however the old version defines cell + # anchors in a less natural way with a shift relative to the feature grid and + # quantization that results in slightly different sizes for different aspect ratios. + # See also https://github.com/facebookresearch/Detectron/issues/227 + + anchors = [] + for size in sizes: + area = size ** 2.0 + for aspect_ratio in aspect_ratios: + # s * s = w * h + # a = h / w + # ... some algebra ... + # w = sqrt(s * s / a) + # h = a * w + w = math.sqrt(area / aspect_ratio) + h = aspect_ratio * w + x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0 + anchors.append([x0, y0, x1, y1]) + return torch.tensor(anchors) + + def forward(self, features: List[torch.Tensor]): + """ + Args: + features (list[Tensor]): list of backbone feature maps on which to generate anchors. + + Returns: + list[Boxes]: a list of Boxes containing all the anchors for each feature map + (i.e. the cell anchors repeated over all locations in the feature map). + The number of anchors of each feature map is Hi x Wi x num_cell_anchors, + where Hi, Wi are resolution of the feature map divided by anchor stride. + """ + grid_sizes = [feature_map.shape[-2:] for feature_map in features] + anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) + return [Boxes(x) for x in anchors_over_all_feature_maps] + + +@ANCHOR_GENERATOR_REGISTRY.register() +class RotatedAnchorGenerator(nn.Module): + """ + Compute rotated anchors used by Rotated RPN (RRPN), described in + "Arbitrary-Oriented Scene Text Detection via Rotation Proposals". + """ + + box_dim: int = 5 + """ + the dimension of each anchor box. + """ + + @configurable + def __init__(self, *, sizes, aspect_ratios, strides, angles, offset=0.5): + """ + This interface is experimental. + + Args: + sizes (list[list[float]] or list[float]): + If sizes is list[list[float]], sizes[i] is the list of anchor sizes + (i.e. sqrt of anchor area) to use for the i-th feature map. + If sizes is list[float], the sizes are used for all feature maps. + Anchor sizes are given in absolute lengths in units of + the input image; they do not dynamically scale if the input image size changes. + aspect_ratios (list[list[float]] or list[float]): list of aspect ratios + (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies. + strides (list[int]): stride of each input feature. + angles (list[list[float]] or list[float]): list of angles (in degrees CCW) + to use for anchors. Same "broadcast" rule for `sizes` applies. + offset (float): Relative offset between the center of the first anchor and the top-left + corner of the image. Value has to be in [0, 1). + Recommend to use 0.5, which means half stride. + """ + super().__init__() + + self.strides = strides + self.num_features = len(self.strides) + sizes = _broadcast_params(sizes, self.num_features, "sizes") + aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios") + angles = _broadcast_params(angles, self.num_features, "angles") + self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios, angles) + + self.offset = offset + assert 0.0 <= self.offset < 1.0, self.offset + + @classmethod + def from_config(cls, cfg, input_shape: List[ShapeSpec]): + return { + "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES, + "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS, + "strides": [x.stride for x in input_shape], + "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET, + "angles": cfg.MODEL.ANCHOR_GENERATOR.ANGLES, + } + + def _calculate_anchors(self, sizes, aspect_ratios, angles): + cell_anchors = [ + self.generate_cell_anchors(size, aspect_ratio, angle).float() + for size, aspect_ratio, angle in zip(sizes, aspect_ratios, angles) + ] + return BufferList(cell_anchors) + + @property + def num_cell_anchors(self): + """ + Alias of `num_anchors`. + """ + return self.num_anchors + + @property + def num_anchors(self): + """ + Returns: + list[int]: Each int is the number of anchors at every pixel + location, on that feature map. + For example, if at every pixel we use anchors of 3 aspect + ratios, 2 sizes and 5 angles, the number of anchors is 30. + (See also ANCHOR_GENERATOR.SIZES, ANCHOR_GENERATOR.ASPECT_RATIOS + and ANCHOR_GENERATOR.ANGLES in config) + + In standard RRPN models, `num_anchors` on every feature map is the same. + """ + return [len(cell_anchors) for cell_anchors in self.cell_anchors] + + def _grid_anchors(self, grid_sizes): + anchors = [] + for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors): + shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device) + zeros = torch.zeros_like(shift_x) + shifts = torch.stack((shift_x, shift_y, zeros, zeros, zeros), dim=1) + + anchors.append((shifts.view(-1, 1, 5) + base_anchors.view(1, -1, 5)).reshape(-1, 5)) + + return anchors + + def generate_cell_anchors( + self, + sizes=(32, 64, 128, 256, 512), + aspect_ratios=(0.5, 1, 2), + angles=(-90, -60, -30, 0, 30, 60, 90), + ): + """ + Generate a tensor storing canonical anchor boxes, which are all anchor + boxes of different sizes, aspect_ratios, angles centered at (0, 0). + We can later build the set of anchors for a full feature map by + shifting and tiling these tensors (see `meth:_grid_anchors`). + + Args: + sizes (tuple[float]): + aspect_ratios (tuple[float]]): + angles (tuple[float]]): + + Returns: + Tensor of shape (len(sizes) * len(aspect_ratios) * len(angles), 5) + storing anchor boxes in (x_ctr, y_ctr, w, h, angle) format. + """ + anchors = [] + for size in sizes: + area = size ** 2.0 + for aspect_ratio in aspect_ratios: + # s * s = w * h + # a = h / w + # ... some algebra ... + # w = sqrt(s * s / a) + # h = a * w + w = math.sqrt(area / aspect_ratio) + h = aspect_ratio * w + anchors.extend([0, 0, w, h, a] for a in angles) + + return torch.tensor(anchors) + + def forward(self, features): + """ + Args: + features (list[Tensor]): list of backbone feature maps on which to generate anchors. + + Returns: + list[RotatedBoxes]: a list of Boxes containing all the anchors for each feature map + (i.e. the cell anchors repeated over all locations in the feature map). + The number of anchors of each feature map is Hi x Wi x num_cell_anchors, + where Hi, Wi are resolution of the feature map divided by anchor stride. + """ + grid_sizes = [feature_map.shape[-2:] for feature_map in features] + anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) + return [RotatedBoxes(x) for x in anchors_over_all_feature_maps] + + +def build_anchor_generator(cfg, input_shape): + """ + Built an anchor generator from `cfg.MODEL.ANCHOR_GENERATOR.NAME`. + """ + anchor_generator = cfg.MODEL.ANCHOR_GENERATOR.NAME + return ANCHOR_GENERATOR_REGISTRY.get(anchor_generator)(cfg, input_shape) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..101884f83948da4dde5ed51bdc816cb46cf845de --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/__init__.py @@ -0,0 +1,22 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .build import build_backbone, BACKBONE_REGISTRY # noqa F401 isort:skip + +from .backbone import Backbone +from .fpn import FPN +from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage + +__all__ = [k for k in globals().keys() if not k.startswith("_")] +# TODO can expose more resnet blocks after careful consideration diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/backbone.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/backbone.py new file mode 100644 index 0000000000000000000000000000000000000000..89af1f828dd8b4216dc91da348f85d20f0f8f413 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/backbone.py @@ -0,0 +1,66 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from abc import ABCMeta, abstractmethod +import torch.nn as nn + +from detectron2.layers import ShapeSpec + +__all__ = ["Backbone"] + + +class Backbone(nn.Module, metaclass=ABCMeta): + """ + Abstract base class for network backbones. + """ + + def __init__(self): + """ + The `__init__` method of any subclass can specify its own set of arguments. + """ + super().__init__() + + @abstractmethod + def forward(self): + """ + Subclasses must override this method, but adhere to the same return type. + + Returns: + dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor + """ + pass + + @property + def size_divisibility(self): + """ + Some backbones require the input height and width to be divisible by a + specific integer. This is typically true for encoder / decoder type networks + with lateral connection (e.g., FPN) for which feature maps need to match + dimension in the "bottom up" and "top down" paths. Set to 0 if no specific + input size divisibility is required. + """ + return 0 + + def output_shape(self): + """ + Returns: + dict[str->ShapeSpec] + """ + # this is a backward-compatible default + return { + name: ShapeSpec( + channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] + ) + for name in self._out_features + } diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/build.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/build.py new file mode 100644 index 0000000000000000000000000000000000000000..4e3a9626a61a3a7232275543d5492b38e8ad6012 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/build.py @@ -0,0 +1,46 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from detectron2.layers import ShapeSpec +from detectron2.utils.registry import Registry + +from .backbone import Backbone + +BACKBONE_REGISTRY = Registry("BACKBONE") +BACKBONE_REGISTRY.__doc__ = """ +Registry for backbones, which extract feature maps from images + +The registered object must be a callable that accepts two arguments: + +1. A :class:`detectron2.config.CfgNode` +2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification. + +It must returns an instance of :class:`Backbone`. +""" + + +def build_backbone(cfg, input_shape=None): + """ + Build a backbone from `cfg.MODEL.BACKBONE.NAME`. + + Returns: + an instance of :class:`Backbone` + """ + if input_shape is None: + input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) + + backbone_name = cfg.MODEL.BACKBONE.NAME + backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape) + assert isinstance(backbone, Backbone) + return backbone diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/fpn.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..3b15cded161b4db74674d10b899381bc3e8d04d1 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/fpn.py @@ -0,0 +1,258 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import fvcore.nn.weight_init as weight_init +import torch.nn.functional as F +from torch import nn + +from detectron2.layers import Conv2d, ShapeSpec, get_norm + +from .backbone import Backbone +from .build import BACKBONE_REGISTRY +from .resnet import build_resnet_backbone + +__all__ = ["build_resnet_fpn_backbone", "build_retinanet_resnet_fpn_backbone", "FPN"] + + +class FPN(Backbone): + """ + This module implements :paper:`FPN`. + It creates pyramid features built on top of some input feature maps. + """ + + def __init__( + self, bottom_up, in_features, out_channels, norm="", top_block=None, fuse_type="sum" + ): + """ + Args: + bottom_up (Backbone): module representing the bottom up subnetwork. + Must be a subclass of :class:`Backbone`. The multi-scale feature + maps generated by the bottom up network, and listed in `in_features`, + are used to generate FPN levels. + in_features (list[str]): names of the input feature maps coming + from the backbone to which FPN is attached. For example, if the + backbone produces ["res2", "res3", "res4"], any *contiguous* sublist + of these may be used; order must be from high to low resolution. + out_channels (int): number of channels in the output feature maps. + norm (str): the normalization to use. + top_block (nn.Module or None): if provided, an extra operation will + be performed on the output of the last (smallest resolution) + FPN output, and the result will extend the result list. The top_block + further downsamples the feature map. It must have an attribute + "num_levels", meaning the number of extra FPN levels added by + this block, and "in_feature", which is a string representing + its input feature (e.g., p5). + fuse_type (str): types for fusing the top down features and the lateral + ones. It can be "sum" (default), which sums up element-wise; or "avg", + which takes the element-wise mean of the two. + """ + super(FPN, self).__init__() + assert isinstance(bottom_up, Backbone) + + # Feature map strides and channels from the bottom up network (e.g. ResNet) + input_shapes = bottom_up.output_shape() + strides = [input_shapes[f].stride for f in in_features] + in_channels_per_feature = [input_shapes[f].channels for f in in_features] + + _assert_strides_are_log2_contiguous(strides) + lateral_convs = [] + output_convs = [] + + use_bias = norm == "" + for idx, in_channels in enumerate(in_channels_per_feature): + lateral_norm = get_norm(norm, out_channels) + output_norm = get_norm(norm, out_channels) + + lateral_conv = Conv2d( + in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm + ) + output_conv = Conv2d( + out_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + bias=use_bias, + norm=output_norm, + ) + weight_init.c2_xavier_fill(lateral_conv) + weight_init.c2_xavier_fill(output_conv) + stage = int(math.log2(strides[idx])) + self.add_module("fpn_lateral{}".format(stage), lateral_conv) + self.add_module("fpn_output{}".format(stage), output_conv) + + lateral_convs.append(lateral_conv) + output_convs.append(output_conv) + # Place convs into top-down order (from low to high resolution) + # to make the top-down computation in forward clearer. + self.lateral_convs = lateral_convs[::-1] + self.output_convs = output_convs[::-1] + self.top_block = top_block + self.in_features = in_features + self.bottom_up = bottom_up + # Return feature names are "p", like ["p2", "p3", ..., "p6"] + self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in strides} + # top block output feature maps. + if self.top_block is not None: + for s in range(stage, stage + self.top_block.num_levels): + self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1) + + self._out_features = list(self._out_feature_strides.keys()) + self._out_feature_channels = {k: out_channels for k in self._out_features} + self._size_divisibility = strides[-1] + assert fuse_type in {"avg", "sum"} + self._fuse_type = fuse_type + + @property + def size_divisibility(self): + return self._size_divisibility + + def forward(self, x): + """ + Args: + input (dict[str->Tensor]): mapping feature map name (e.g., "res5") to + feature map tensor for each feature level in high to low resolution order. + + Returns: + dict[str->Tensor]: + mapping from feature map name to FPN feature map tensor + in high to low resolution order. Returned feature names follow the FPN + paper convention: "p", where stage has stride = 2 ** stage e.g., + ["p2", "p3", ..., "p6"]. + """ + # Reverse feature maps into top-down order (from low to high resolution) + bottom_up_features = self.bottom_up(x) + x = [bottom_up_features[f] for f in self.in_features[::-1]] + results = [] + prev_features = self.lateral_convs[0](x[0]) + results.append(self.output_convs[0](prev_features)) + for features, lateral_conv, output_conv in zip( + x[1:], self.lateral_convs[1:], self.output_convs[1:] + ): + top_down_features = F.interpolate(prev_features, scale_factor=2, mode="nearest") + lateral_features = lateral_conv(features) + prev_features = lateral_features + top_down_features + if self._fuse_type == "avg": + prev_features /= 2 + results.insert(0, output_conv(prev_features)) + + if self.top_block is not None: + top_block_in_feature = bottom_up_features.get(self.top_block.in_feature, None) + if top_block_in_feature is None: + top_block_in_feature = results[self._out_features.index(self.top_block.in_feature)] + results.extend(self.top_block(top_block_in_feature)) + assert len(self._out_features) == len(results) + return dict(zip(self._out_features, results)) + + def output_shape(self): + return { + name: ShapeSpec( + channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] + ) + for name in self._out_features + } + + +def _assert_strides_are_log2_contiguous(strides): + """ + Assert that each stride is 2x times its preceding stride, i.e. "contiguous in log2". + """ + for i, stride in enumerate(strides[1:], 1): + assert stride == 2 * strides[i - 1], "Strides {} {} are not log2 contiguous".format( + stride, strides[i - 1] + ) + + +class LastLevelMaxPool(nn.Module): + """ + This module is used in the original FPN to generate a downsampled + P6 feature from P5. + """ + + def __init__(self): + super().__init__() + self.num_levels = 1 + self.in_feature = "p5" + + def forward(self, x): + return [F.max_pool2d(x, kernel_size=1, stride=2, padding=0)] + + +class LastLevelP6P7(nn.Module): + """ + This module is used in RetinaNet to generate extra layers, P6 and P7 from + C5 feature. + """ + + def __init__(self, in_channels, out_channels, in_feature="res5"): + super().__init__() + self.num_levels = 2 + self.in_feature = in_feature + self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) + self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) + for module in [self.p6, self.p7]: + weight_init.c2_xavier_fill(module) + + def forward(self, c5): + p6 = self.p6(c5) + p7 = self.p7(F.relu(p6)) + return [p6, p7] + + +@BACKBONE_REGISTRY.register() +def build_resnet_fpn_backbone(cfg, input_shape: ShapeSpec): + """ + Args: + cfg: a detectron2 CfgNode + + Returns: + backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. + """ + bottom_up = build_resnet_backbone(cfg, input_shape) + in_features = cfg.MODEL.FPN.IN_FEATURES + out_channels = cfg.MODEL.FPN.OUT_CHANNELS + backbone = FPN( + bottom_up=bottom_up, + in_features=in_features, + out_channels=out_channels, + norm=cfg.MODEL.FPN.NORM, + top_block=LastLevelMaxPool(), + fuse_type=cfg.MODEL.FPN.FUSE_TYPE, + ) + return backbone + + +@BACKBONE_REGISTRY.register() +def build_retinanet_resnet_fpn_backbone(cfg, input_shape: ShapeSpec): + """ + Args: + cfg: a detectron2 CfgNode + + Returns: + backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. + """ + bottom_up = build_resnet_backbone(cfg, input_shape) + in_features = cfg.MODEL.FPN.IN_FEATURES + out_channels = cfg.MODEL.FPN.OUT_CHANNELS + in_channels_p6p7 = bottom_up.output_shape()["res5"].channels + backbone = FPN( + bottom_up=bottom_up, + in_features=in_features, + out_channels=out_channels, + norm=cfg.MODEL.FPN.NORM, + top_block=LastLevelP6P7(in_channels_p6p7, out_channels), + fuse_type=cfg.MODEL.FPN.FUSE_TYPE, + ) + return backbone diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/resnet.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..d0a7c700d05d477e7a549c23ed5590479cafee2e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/backbone/resnet.py @@ -0,0 +1,656 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import numpy as np +import fvcore.nn.weight_init as weight_init +import torch +import torch.nn.functional as F +from torch import nn + +from detectron2.layers import ( + CNNBlockBase, + Conv2d, + DeformConv, + ModulatedDeformConv, + ShapeSpec, + get_norm, +) + +from .backbone import Backbone +from .build import BACKBONE_REGISTRY + +__all__ = [ + "ResNetBlockBase", + "BasicBlock", + "BottleneckBlock", + "DeformBottleneckBlock", + "BasicStem", + "ResNet", + "make_stage", + "build_resnet_backbone", +] + + +class BasicBlock(CNNBlockBase): + """ + The basic residual block for ResNet-18 and ResNet-34 defined in :paper:`ResNet`, + with two 3x3 conv layers and a projection shortcut if needed. + """ + + def __init__(self, in_channels, out_channels, *, stride=1, norm="BN"): + """ + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + stride (int): Stride for the first conv. + norm (str or callable): normalization for all conv layers. + See :func:`layers.get_norm` for supported format. + """ + super().__init__(in_channels, out_channels, stride) + + if in_channels != out_channels: + self.shortcut = Conv2d( + in_channels, + out_channels, + kernel_size=1, + stride=stride, + bias=False, + norm=get_norm(norm, out_channels), + ) + else: + self.shortcut = None + + self.conv1 = Conv2d( + in_channels, + out_channels, + kernel_size=3, + stride=stride, + padding=1, + bias=False, + norm=get_norm(norm, out_channels), + ) + + self.conv2 = Conv2d( + out_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False, + norm=get_norm(norm, out_channels), + ) + + for layer in [self.conv1, self.conv2, self.shortcut]: + if layer is not None: # shortcut can be None + weight_init.c2_msra_fill(layer) + + def forward(self, x): + out = self.conv1(x) + out = F.relu_(out) + out = self.conv2(out) + + if self.shortcut is not None: + shortcut = self.shortcut(x) + else: + shortcut = x + + out += shortcut + out = F.relu_(out) + return out + + +class BottleneckBlock(CNNBlockBase): + """ + The standard bottleneck residual block used by ResNet-50, 101 and 152 + defined in :paper:`ResNet`. It contains 3 conv layers with kernels + 1x1, 3x3, 1x1, and a projection shortcut if needed. + """ + + def __init__( + self, + in_channels, + out_channels, + *, + bottleneck_channels, + stride=1, + num_groups=1, + norm="BN", + stride_in_1x1=False, + dilation=1, + ): + """ + Args: + bottleneck_channels (int): number of output channels for the 3x3 + "bottleneck" conv layers. + num_groups (int): number of groups for the 3x3 conv layer. + norm (str or callable): normalization for all conv layers. + See :func:`layers.get_norm` for supported format. + stride_in_1x1 (bool): when stride>1, whether to put stride in the + first 1x1 convolution or the bottleneck 3x3 convolution. + dilation (int): the dilation rate of the 3x3 conv layer. + """ + super().__init__(in_channels, out_channels, stride) + + if in_channels != out_channels: + self.shortcut = Conv2d( + in_channels, + out_channels, + kernel_size=1, + stride=stride, + bias=False, + norm=get_norm(norm, out_channels), + ) + else: + self.shortcut = None + + # The original MSRA ResNet models have stride in the first 1x1 conv + # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have + # stride in the 3x3 conv + stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) + + self.conv1 = Conv2d( + in_channels, + bottleneck_channels, + kernel_size=1, + stride=stride_1x1, + bias=False, + norm=get_norm(norm, bottleneck_channels), + ) + + self.conv2 = Conv2d( + bottleneck_channels, + bottleneck_channels, + kernel_size=3, + stride=stride_3x3, + padding=1 * dilation, + bias=False, + groups=num_groups, + dilation=dilation, + norm=get_norm(norm, bottleneck_channels), + ) + + self.conv3 = Conv2d( + bottleneck_channels, + out_channels, + kernel_size=1, + bias=False, + norm=get_norm(norm, out_channels), + ) + + for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: + if layer is not None: # shortcut can be None + weight_init.c2_msra_fill(layer) + + # Zero-initialize the last normalization in each residual branch, + # so that at the beginning, the residual branch starts with zeros, + # and each residual block behaves like an identity. + # See Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour": + # "For BN layers, the learnable scaling coefficient γ is initialized + # to be 1, except for each residual block's last BN + # where γ is initialized to be 0." + + # nn.init.constant_(self.conv3.norm.weight, 0) + # TODO this somehow hurts performance when training GN models from scratch. + # Add it as an option when we need to use this code to train a backbone. + + def forward(self, x): + out = self.conv1(x) + out = F.relu_(out) + + out = self.conv2(out) + out = F.relu_(out) + + out = self.conv3(out) + + if self.shortcut is not None: + shortcut = self.shortcut(x) + else: + shortcut = x + + out += shortcut + out = F.relu_(out) + return out + + +class DeformBottleneckBlock(CNNBlockBase): + """ + Similar to :class:`BottleneckBlock`, but with :paper:`deformable conv ` + in the 3x3 convolution. + """ + + def __init__( + self, + in_channels, + out_channels, + *, + bottleneck_channels, + stride=1, + num_groups=1, + norm="BN", + stride_in_1x1=False, + dilation=1, + deform_modulated=False, + deform_num_groups=1, + ): + super().__init__(in_channels, out_channels, stride) + self.deform_modulated = deform_modulated + + if in_channels != out_channels: + self.shortcut = Conv2d( + in_channels, + out_channels, + kernel_size=1, + stride=stride, + bias=False, + norm=get_norm(norm, out_channels), + ) + else: + self.shortcut = None + + stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) + + self.conv1 = Conv2d( + in_channels, + bottleneck_channels, + kernel_size=1, + stride=stride_1x1, + bias=False, + norm=get_norm(norm, bottleneck_channels), + ) + + if deform_modulated: + deform_conv_op = ModulatedDeformConv + # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size + offset_channels = 27 + else: + deform_conv_op = DeformConv + offset_channels = 18 + + self.conv2_offset = Conv2d( + bottleneck_channels, + offset_channels * deform_num_groups, + kernel_size=3, + stride=stride_3x3, + padding=1 * dilation, + dilation=dilation, + ) + self.conv2 = deform_conv_op( + bottleneck_channels, + bottleneck_channels, + kernel_size=3, + stride=stride_3x3, + padding=1 * dilation, + bias=False, + groups=num_groups, + dilation=dilation, + deformable_groups=deform_num_groups, + norm=get_norm(norm, bottleneck_channels), + ) + + self.conv3 = Conv2d( + bottleneck_channels, + out_channels, + kernel_size=1, + bias=False, + norm=get_norm(norm, out_channels), + ) + + for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: + if layer is not None: # shortcut can be None + weight_init.c2_msra_fill(layer) + + nn.init.constant_(self.conv2_offset.weight, 0) + nn.init.constant_(self.conv2_offset.bias, 0) + + def forward(self, x): + out = self.conv1(x) + out = F.relu_(out) + + if self.deform_modulated: + offset_mask = self.conv2_offset(out) + offset_x, offset_y, mask = torch.chunk(offset_mask, 3, dim=1) + offset = torch.cat((offset_x, offset_y), dim=1) + mask = mask.sigmoid() + out = self.conv2(out, offset, mask) + else: + offset = self.conv2_offset(out) + out = self.conv2(out, offset) + out = F.relu_(out) + + out = self.conv3(out) + + if self.shortcut is not None: + shortcut = self.shortcut(x) + else: + shortcut = x + + out += shortcut + out = F.relu_(out) + return out + + +class BasicStem(CNNBlockBase): + """ + The standard ResNet stem (layers before the first residual block). + """ + + def __init__(self, in_channels=3, out_channels=64, norm="BN"): + """ + Args: + norm (str or callable): norm after the first conv layer. + See :func:`layers.get_norm` for supported format. + """ + super().__init__(in_channels, out_channels, 4) + self.in_channels = in_channels + self.conv1 = Conv2d( + in_channels, + out_channels, + kernel_size=7, + stride=2, + padding=3, + bias=False, + norm=get_norm(norm, out_channels), + ) + weight_init.c2_msra_fill(self.conv1) + + def forward(self, x): + x = self.conv1(x) + x = F.relu_(x) + x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) + return x + + +class ResNet(Backbone): + """ + Implement :paper:`ResNet`. + """ + + def __init__(self, stem, stages, num_classes=None, out_features=None): + """ + Args: + stem (nn.Module): a stem module + stages (list[list[CNNBlockBase]]): several (typically 4) stages, + each contains multiple :class:`CNNBlockBase`. + num_classes (None or int): if None, will not perform classification. + Otherwise, will create a linear layer. + out_features (list[str]): name of the layers whose outputs should + be returned in forward. Can be anything in "stem", "linear", or "res2" ... + If None, will return the output of the last layer. + """ + super().__init__() + self.stem = stem + self.num_classes = num_classes + + current_stride = self.stem.stride + self._out_feature_strides = {"stem": current_stride} + self._out_feature_channels = {"stem": self.stem.out_channels} + + self.stages_and_names = [] + for i, blocks in enumerate(stages): + assert len(blocks) > 0, len(blocks) + for block in blocks: + assert isinstance(block, CNNBlockBase), block + + name = "res" + str(i + 2) + stage = nn.Sequential(*blocks) + + self.add_module(name, stage) + self.stages_and_names.append((stage, name)) + + self._out_feature_strides[name] = current_stride = int( + current_stride * np.prod([k.stride for k in blocks]) + ) + self._out_feature_channels[name] = curr_channels = blocks[-1].out_channels + + if num_classes is not None: + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.linear = nn.Linear(curr_channels, num_classes) + + # Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour": + # "The 1000-way fully-connected layer is initialized by + # drawing weights from a zero-mean Gaussian with standard deviation of 0.01." + nn.init.normal_(self.linear.weight, std=0.01) + name = "linear" + + if out_features is None: + out_features = [name] + self._out_features = out_features + assert len(self._out_features) + children = [x[0] for x in self.named_children()] + for out_feature in self._out_features: + assert out_feature in children, "Available children: {}".format(", ".join(children)) + + def forward(self, x): + """ + Args: + x: Tensor of shape (N,C,H,W). H, W must be a multiple of ``self.size_divisibility``. + + Returns: + dict[str->Tensor] + """ + assert x.dim() == 4, f"ResNet takes an input of shape (N, C, H, W). Got {x.shape} instead!" + outputs = {} + x = self.stem(x) + if "stem" in self._out_features: + outputs["stem"] = x + for stage, name in self.stages_and_names: + x = stage(x) + if name in self._out_features: + outputs[name] = x + if self.num_classes is not None: + x = self.avgpool(x) + x = torch.flatten(x, 1) + x = self.linear(x) + if "linear" in self._out_features: + outputs["linear"] = x + return outputs + + def output_shape(self): + return { + name: ShapeSpec( + channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] + ) + for name in self._out_features + } + + def freeze(self, freeze_at=0): + """ + Freeze the first several stages of the ResNet. Commonly used in + fine-tuning. + + Layers that produce the same feature map spatial size are defined as one + "stage" by :paper:`FPN`. + + Args: + freeze_at (int): number of stages to freeze. + `1` means freezing the stem. `2` means freezing the stem and + one residual stage, etc. + + Returns: + nn.Module: this ResNet itself + """ + if freeze_at >= 1: + self.stem.freeze() + for idx, (stage, _) in enumerate(self.stages_and_names, start=2): + if freeze_at >= idx: + for block in stage.children(): + block.freeze() + return self + + @staticmethod + def make_stage( + block_class, num_blocks, first_stride=None, *, in_channels, out_channels, **kwargs + ): + """ + Create a list of blocks of the same type that forms one ResNet stage. + + Args: + block_class (type): a subclass of CNNBlockBase that's used to create all blocks in this + stage. A module of this type must not change spatial resolution of inputs unless its + stride != 1. + num_blocks (int): number of blocks in this stage + first_stride (int): deprecated + in_channels (int): input channels of the entire stage. + out_channels (int): output channels of **every block** in the stage. + kwargs: other arguments passed to the constructor of + `block_class`. If the argument name is "xx_per_block", the + argument is a list of values to be passed to each block in the + stage. Otherwise, the same argument is passed to every block + in the stage. + + Returns: + list[nn.Module]: a list of block module. + + Examples: + :: + stages = ResNet.make_stage( + BottleneckBlock, 3, in_channels=16, out_channels=64, + bottleneck_channels=16, num_groups=1, + stride_per_block=[2, 1, 1], + dilations_per_block=[1, 1, 2] + ) + + Usually, layers that produce the same feature map spatial size are defined as one + "stage" (in :paper:`FPN`). In this case ``stride_per_block[1:]`` should all be 1. + """ + if first_stride is not None: + assert "stride" not in kwargs and "stride_per_block" not in kwargs + kwargs["stride_per_block"] = [first_stride] + [1] * (num_blocks - 1) + logger = logging.getLogger(__name__) + logger.warning( + "ResNet.make_stage(first_stride=) is deprecated! " + "Use 'stride_per_block' or 'stride' instead." + ) + + blocks = [] + for i in range(num_blocks): + curr_kwargs = {} + for k, v in kwargs.items(): + if k.endswith("_per_block"): + assert len(v) == num_blocks, ( + f"Argument '{k}' of make_stage should have the " + f"same length as num_blocks={num_blocks}." + ) + newk = k[: -len("_per_block")] + assert newk not in kwargs, f"Cannot call make_stage with both {k} and {newk}!" + curr_kwargs[newk] = v[i] + else: + curr_kwargs[k] = v + + blocks.append( + block_class(in_channels=in_channels, out_channels=out_channels, **curr_kwargs) + ) + in_channels = out_channels + return blocks + + +ResNetBlockBase = CNNBlockBase +""" +Alias for backward compatibiltiy. +""" + + +def make_stage(*args, **kwargs): + """ + Deprecated alias for backward compatibiltiy. + """ + return ResNet.make_stage(*args, **kwargs) + + +@BACKBONE_REGISTRY.register() +def build_resnet_backbone(cfg, input_shape): + """ + Create a ResNet instance from config. + + Returns: + ResNet: a :class:`ResNet` instance. + """ + # need registration of new blocks/stems? + norm = cfg.MODEL.RESNETS.NORM + stem = BasicStem( + in_channels=input_shape.channels, + out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS, + norm=norm, + ) + + # fmt: off + freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT + out_features = cfg.MODEL.RESNETS.OUT_FEATURES + depth = cfg.MODEL.RESNETS.DEPTH + num_groups = cfg.MODEL.RESNETS.NUM_GROUPS + width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP + bottleneck_channels = num_groups * width_per_group + in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS + out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS + stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 + res5_dilation = cfg.MODEL.RESNETS.RES5_DILATION + deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE + deform_modulated = cfg.MODEL.RESNETS.DEFORM_MODULATED + deform_num_groups = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS + # fmt: on + assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation) + + num_blocks_per_stage = { + 18: [2, 2, 2, 2], + 34: [3, 4, 6, 3], + 50: [3, 4, 6, 3], + 101: [3, 4, 23, 3], + 152: [3, 8, 36, 3], + }[depth] + + if depth in [18, 34]: + assert out_channels == 64, "Must set MODEL.RESNETS.RES2_OUT_CHANNELS = 64 for R18/R34" + assert not any( + deform_on_per_stage + ), "MODEL.RESNETS.DEFORM_ON_PER_STAGE unsupported for R18/R34" + assert res5_dilation == 1, "Must set MODEL.RESNETS.RES5_DILATION = 1 for R18/R34" + assert num_groups == 1, "Must set MODEL.RESNETS.NUM_GROUPS = 1 for R18/R34" + + stages = [] + + # Avoid creating variables without gradients + # It consumes extra memory and may cause allreduce to fail + out_stage_idx = [{"res2": 2, "res3": 3, "res4": 4, "res5": 5}[f] for f in out_features] + max_stage_idx = max(out_stage_idx) + for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)): + dilation = res5_dilation if stage_idx == 5 else 1 + first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2 + stage_kargs = { + "num_blocks": num_blocks_per_stage[idx], + "stride_per_block": [first_stride] + [1] * (num_blocks_per_stage[idx] - 1), + "in_channels": in_channels, + "out_channels": out_channels, + "norm": norm, + } + # Use BasicBlock for R18 and R34. + if depth in [18, 34]: + stage_kargs["block_class"] = BasicBlock + else: + stage_kargs["bottleneck_channels"] = bottleneck_channels + stage_kargs["stride_in_1x1"] = stride_in_1x1 + stage_kargs["dilation"] = dilation + stage_kargs["num_groups"] = num_groups + if deform_on_per_stage[idx]: + stage_kargs["block_class"] = DeformBottleneckBlock + stage_kargs["deform_modulated"] = deform_modulated + stage_kargs["deform_num_groups"] = deform_num_groups + else: + stage_kargs["block_class"] = BottleneckBlock + blocks = ResNet.make_stage(**stage_kargs) + in_channels = out_channels + out_channels *= 2 + bottleneck_channels *= 2 + stages.append(blocks) + return ResNet(stem, stages, out_features=out_features).freeze(freeze_at) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/box_regression.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/box_regression.py new file mode 100644 index 0000000000000000000000000000000000000000..34cc36374cf534d0d88731ba8591eb43a25861f6 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/box_regression.py @@ -0,0 +1,240 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from typing import Tuple +import torch + +# Value for clamping large dw and dh predictions. The heuristic is that we clamp +# such that dw and dh are no larger than what would transform a 16px box into a +# 1000px box (based on a small anchor, 16px, and a typical image size, 1000px). +_DEFAULT_SCALE_CLAMP = math.log(1000.0 / 16) + + +__all__ = ["Box2BoxTransform", "Box2BoxTransformRotated"] + + +@torch.jit.script +class Box2BoxTransform(object): + """ + The box-to-box transform defined in R-CNN. The transformation is parameterized + by 4 deltas: (dx, dy, dw, dh). The transformation scales the box's width and height + by exp(dw), exp(dh) and shifts a box's center by the offset (dx * width, dy * height). + """ + + def __init__( + self, weights: Tuple[float, float, float, float], scale_clamp: float = _DEFAULT_SCALE_CLAMP + ): + """ + Args: + weights (4-element tuple): Scaling factors that are applied to the + (dx, dy, dw, dh) deltas. In Fast R-CNN, these were originally set + such that the deltas have unit variance; now they are treated as + hyperparameters of the system. + scale_clamp (float): When predicting deltas, the predicted box scaling + factors (dw and dh) are clamped such that they are <= scale_clamp. + """ + self.weights = weights + self.scale_clamp = scale_clamp + + def get_deltas(self, src_boxes, target_boxes): + """ + Get box regression transformation deltas (dx, dy, dw, dh) that can be used + to transform the `src_boxes` into the `target_boxes`. That is, the relation + ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless + any delta is too large and is clamped). + + Args: + src_boxes (Tensor): source boxes, e.g., object proposals + target_boxes (Tensor): target of the transformation, e.g., ground-truth + boxes. + """ + assert isinstance(src_boxes, torch.Tensor), type(src_boxes) + assert isinstance(target_boxes, torch.Tensor), type(target_boxes) + + src_widths = src_boxes[:, 2] - src_boxes[:, 0] + src_heights = src_boxes[:, 3] - src_boxes[:, 1] + src_ctr_x = src_boxes[:, 0] + 0.5 * src_widths + src_ctr_y = src_boxes[:, 1] + 0.5 * src_heights + + target_widths = target_boxes[:, 2] - target_boxes[:, 0] + target_heights = target_boxes[:, 3] - target_boxes[:, 1] + target_ctr_x = target_boxes[:, 0] + 0.5 * target_widths + target_ctr_y = target_boxes[:, 1] + 0.5 * target_heights + + wx, wy, ww, wh = self.weights + dx = wx * (target_ctr_x - src_ctr_x) / src_widths + dy = wy * (target_ctr_y - src_ctr_y) / src_heights + dw = ww * torch.log(target_widths / src_widths) + dh = wh * torch.log(target_heights / src_heights) + + deltas = torch.stack((dx, dy, dw, dh), dim=1) + # assert (src_widths > 0).all().item(), "Input boxes to Box2BoxTransform are not valid!" + return deltas + + def apply_deltas(self, deltas, boxes): + """ + Apply transformation `deltas` (dx, dy, dw, dh) to `boxes`. + + Args: + deltas (Tensor): transformation deltas of shape (N, k*4), where k >= 1. + deltas[i] represents k potentially different class-specific + box transformations for the single box boxes[i]. + boxes (Tensor): boxes to transform, of shape (N, 4) + """ + boxes = boxes.to(deltas.dtype) + + widths = boxes[:, 2] - boxes[:, 0] + heights = boxes[:, 3] - boxes[:, 1] + ctr_x = boxes[:, 0] + 0.5 * widths + ctr_y = boxes[:, 1] + 0.5 * heights + + wx, wy, ww, wh = self.weights + dx = deltas[:, 0::4] / wx + dy = deltas[:, 1::4] / wy + dw = deltas[:, 2::4] / ww + dh = deltas[:, 3::4] / wh + + # Prevent sending too large values into torch.exp() + if dw.dtype == torch.float32: + dw = torch.clamp(dw, max=self.scale_clamp) + dh = torch.clamp(dh, max=self.scale_clamp) + elif dw.dtype == torch.float16: + dw = torch.clamp(dw.to(torch.float32), + max=self.scale_clamp).to(torch.float16) + dh = torch.clamp(dh.to(torch.float32), + max=self.scale_clamp).to(torch.float16) + + pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] + pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] + pred_w = torch.exp(dw) * widths[:, None] + pred_h = torch.exp(dh) * heights[:, None] + + pred_boxes = torch.zeros_like(deltas) + pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # x1 + pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # y1 + pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w # x2 + pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h # y2 + return pred_boxes + + +@torch.jit.script +class Box2BoxTransformRotated(object): + """ + The box-to-box transform defined in Rotated R-CNN. The transformation is parameterized + by 5 deltas: (dx, dy, dw, dh, da). The transformation scales the box's width and height + by exp(dw), exp(dh), shifts a box's center by the offset (dx * width, dy * height), + and rotate a box's angle by da (radians). + Note: angles of deltas are in radians while angles of boxes are in degrees. + """ + + def __init__( + self, + weights: Tuple[float, float, float, float, float], + scale_clamp: float = _DEFAULT_SCALE_CLAMP, + ): + """ + Args: + weights (5-element tuple): Scaling factors that are applied to the + (dx, dy, dw, dh, da) deltas. These are treated as + hyperparameters of the system. + scale_clamp (float): When predicting deltas, the predicted box scaling + factors (dw and dh) are clamped such that they are <= scale_clamp. + """ + self.weights = weights + self.scale_clamp = scale_clamp + + def get_deltas(self, src_boxes, target_boxes): + """ + Get box regression transformation deltas (dx, dy, dw, dh, da) that can be used + to transform the `src_boxes` into the `target_boxes`. That is, the relation + ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless + any delta is too large and is clamped). + + Args: + src_boxes (Tensor): Nx5 source boxes, e.g., object proposals + target_boxes (Tensor): Nx5 target of the transformation, e.g., ground-truth + boxes. + """ + assert isinstance(src_boxes, torch.Tensor), type(src_boxes) + assert isinstance(target_boxes, torch.Tensor), type(target_boxes) + + src_ctr_x, src_ctr_y, src_widths, src_heights, src_angles = torch.unbind(src_boxes, dim=1) + + target_ctr_x, target_ctr_y, target_widths, target_heights, target_angles = torch.unbind( + target_boxes, dim=1 + ) + + wx, wy, ww, wh, wa = self.weights + dx = wx * (target_ctr_x - src_ctr_x) / src_widths + dy = wy * (target_ctr_y - src_ctr_y) / src_heights + dw = ww * torch.log(target_widths / src_widths) + dh = wh * torch.log(target_heights / src_heights) + # Angles of deltas are in radians while angles of boxes are in degrees. + # the conversion to radians serve as a way to normalize the values + da = target_angles - src_angles + da = (da + 180.0) % 360.0 - 180.0 # make it in [-180, 180) + da *= wa * math.pi / 180.0 + + deltas = torch.stack((dx, dy, dw, dh, da), dim=1) + assert ( + (src_widths > 0).all().item() + ), "Input boxes to Box2BoxTransformRotated are not valid!" + return deltas + + def apply_deltas(self, deltas, boxes): + """ + Apply transformation `deltas` (dx, dy, dw, dh, da) to `boxes`. + + Args: + deltas (Tensor): transformation deltas of shape (N, k*5). + deltas[i] represents box transformation for the single box boxes[i]. + boxes (Tensor): boxes to transform, of shape (N, 5) + """ + assert deltas.shape[1] % 5 == 0 and boxes.shape[1] == 5 + + boxes = boxes.to(deltas.dtype).unsqueeze(2) + + ctr_x = boxes[:, 0] + ctr_y = boxes[:, 1] + widths = boxes[:, 2] + heights = boxes[:, 3] + angles = boxes[:, 4] + + wx, wy, ww, wh, wa = self.weights + + dx = deltas[:, 0::5] / wx + dy = deltas[:, 1::5] / wy + dw = deltas[:, 2::5] / ww + dh = deltas[:, 3::5] / wh + da = deltas[:, 4::5] / wa + + # Prevent sending too large values into torch.exp() + dw = torch.clamp(dw, max=self.scale_clamp) + dh = torch.clamp(dh, max=self.scale_clamp) + + pred_boxes = torch.zeros_like(deltas) + pred_boxes[:, 0::5] = dx * widths + ctr_x # x_ctr + pred_boxes[:, 1::5] = dy * heights + ctr_y # y_ctr + pred_boxes[:, 2::5] = torch.exp(dw) * widths # width + pred_boxes[:, 3::5] = torch.exp(dh) * heights # height + + # Following original RRPN implementation, + # angles of deltas are in radians while angles of boxes are in degrees. + pred_angle = da * 180.0 / math.pi + angles + pred_angle = (pred_angle + 180.0) % 360.0 - 180.0 # make it in [-180, 180) + + pred_boxes[:, 4::5] = pred_angle + + return pred_boxes diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/matcher.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/matcher.py new file mode 100644 index 0000000000000000000000000000000000000000..ad53ce1916cb8a19efb69a684338e8a8acae6130 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/matcher.py @@ -0,0 +1,158 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List +import torch + +from detectron2.layers import nonzero_tuple + + +class Matcher(object): + """ + This class assigns to each predicted "element" (e.g., a box) a ground-truth + element. Each predicted element will have exactly zero or one matches; each + ground-truth element may be matched to zero or more predicted elements. + + The matching is determined by the MxN match_quality_matrix, that characterizes + how well each (ground-truth, prediction)-pair match each other. For example, + if the elements are boxes, this matrix may contain box intersection-over-union + overlap values. + + The matcher returns (a) a vector of length N containing the index of the + ground-truth element m in [0, M) that matches to prediction n in [0, N). + (b) a vector of length N containing the labels for each prediction. + """ + + def __init__( + self, thresholds: List[float], labels: List[int], allow_low_quality_matches: bool = False + ): + """ + Args: + thresholds (list): a list of thresholds used to stratify predictions + into levels. + labels (list): a list of values to label predictions belonging at + each level. A label can be one of {-1, 0, 1} signifying + {ignore, negative class, positive class}, respectively. + allow_low_quality_matches (bool): if True, produce additional matches + for predictions with maximum match quality lower than high_threshold. + See set_low_quality_matches_ for more details. + + For example, + thresholds = [0.3, 0.5] + labels = [0, -1, 1] + All predictions with iou < 0.3 will be marked with 0 and + thus will be considered as false positives while training. + All predictions with 0.3 <= iou < 0.5 will be marked with -1 and + thus will be ignored. + All predictions with 0.5 <= iou will be marked with 1 and + thus will be considered as true positives. + """ + # Add -inf and +inf to first and last position in thresholds + thresholds = thresholds[:] + assert thresholds[0] > 0 + thresholds.insert(0, -float("inf")) + thresholds.append(float("inf")) + # Currently torchscript does not support all + generator + assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])]) + assert all([l in [-1, 0, 1] for l in labels]) + assert len(labels) == len(thresholds) - 1 + self.thresholds = thresholds + self.labels = labels + self.allow_low_quality_matches = allow_low_quality_matches + + def __call__(self, match_quality_matrix): + """ + Args: + match_quality_matrix (Tensor[float]): an MxN tensor, containing the + pairwise quality between M ground-truth elements and N predicted + elements. All elements must be >= 0 (due to the us of `torch.nonzero` + for selecting indices in :meth:`set_low_quality_matches_`). + + Returns: + matches (Tensor[int64]): a vector of length N, where matches[i] is a matched + ground-truth index in [0, M) + match_labels (Tensor[int8]): a vector of length N, where pred_labels[i] indicates + whether a prediction is a true or false positive or ignored + """ + assert match_quality_matrix.dim() == 2 + if match_quality_matrix.numel() == 0: + default_matches = match_quality_matrix.new_full( + (match_quality_matrix.size(1),), 0, dtype=torch.int32 + ) + # When no gt boxes exist, we define IOU = 0 and therefore set labels + # to `self.labels[0]`, which usually defaults to background class 0 + # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds + default_match_labels = match_quality_matrix.new_full( + (match_quality_matrix.size(1),), self.labels[0], dtype=torch.int32 + ) + return default_matches, default_match_labels + + assert torch.all(match_quality_matrix >= 0) + + # match_quality_matrix is M (gt) x N (predicted) + # Max over gt elements (dim 0) to find best gt candidate for each prediction + + # add nup because they use different metrics + + # matched_vals, matches = match_quality_matrix.max(dim=0) + device = match_quality_matrix.device + matched_vals, matches = match_quality_matrix.npu().max(dim=0) + matched_vals = matched_vals.to(device) + matches = matches.to(device) + #end + + match_labels = matches.new_full(matches.size(), 1, dtype=torch.int32) + + for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]): + low_high = (matched_vals >= low) & (matched_vals < high) + match_labels = torch.where( + low_high, match_labels.new_full( + match_labels.size(), l), match_labels) + + if self.allow_low_quality_matches: + match_labels = self.set_low_quality_matches_( + match_labels, match_quality_matrix) + + return matches, match_labels + + def set_low_quality_matches_(self, match_labels, match_quality_matrix): + """ + Produce additional matches for predictions that have only low-quality matches. + Specifically, for each ground-truth G find the set of predictions that have + maximum overlap with it (including ties); for each prediction in that set, if + it is unmatched, then match it to the ground-truth G. + + This function implements the RPN assignment case (i) in Sec. 3.1.2 of + :paper:`Faster R-CNN`. + """ + # For each gt, find the prediction with which it has highest quality + highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) + # Find the highest quality match available, even if it is low, including ties. + # Note that the matches qualities must be positive due to the use of + # `torch.nonzero`. + pred_inds_with_highest_quality_mask = \ + (match_quality_matrix == highest_quality_foreach_gt[:, None]) \ + & (match_quality_matrix > highest_quality_foreach_gt.new_full( + highest_quality_foreach_gt[:, None].shape, 0)) + pred_inds_with_highest_quality_mask = \ + pred_inds_with_highest_quality_mask.any(0) + # If an anchor was labeled positive only due to a low-quality match + # with gt_A, but it has larger overlap + # with gt_B, it's matched index will still be gt_B. + # This follows the implementation in Detectron, + # and is found to have no significant impact. + match_labels3 = torch.where( + pred_inds_with_highest_quality_mask, + match_labels.new_full(match_labels.size(), 1),match_labels) + return match_labels3 diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4251863c84a45a6ed77dcb6f08c35f5006dcafd8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/__init__.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .build import META_ARCH_REGISTRY, build_model # isort:skip + +from .panoptic_fpn import PanopticFPN + +# import all the meta_arch, so they will be registered +from .rcnn import GeneralizedRCNN, ProposalNetwork +from .retinanet import RetinaNet +from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head + + +__all__ = list(globals().keys()) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/build.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/build.py new file mode 100644 index 0000000000000000000000000000000000000000..a6df047565aba313821edb61317350de970909d7 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/build.py @@ -0,0 +1,37 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from detectron2.utils.registry import Registry + + +META_ARCH_REGISTRY = Registry("META_ARCH") # noqa F401 isort:skip +META_ARCH_REGISTRY.__doc__ = """ +Registry for meta-architectures, i.e. the whole model. + +The registered object will be called with `obj(cfg)` +and expected to return a `nn.Module` object. +""" + + +def build_model(cfg): + """ + Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``. + Note that it does not load any weights from ``cfg``. + """ + meta_arch = cfg.MODEL.META_ARCHITECTURE + model = META_ARCH_REGISTRY.get(meta_arch)(cfg) + model.to(torch.device(cfg.MODEL.DEVICE)) + return model diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/panoptic_fpn.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/panoptic_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..9072bdbe903e76f7eec138ca383cf713d5d169f4 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/panoptic_fpn.py @@ -0,0 +1,231 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch import nn + +from detectron2.structures import ImageList + +from ..backbone import build_backbone +from ..postprocessing import detector_postprocess, sem_seg_postprocess +from ..proposal_generator import build_proposal_generator +from ..roi_heads import build_roi_heads +from .build import META_ARCH_REGISTRY +from .semantic_seg import build_sem_seg_head + +__all__ = ["PanopticFPN"] + + +@META_ARCH_REGISTRY.register() +class PanopticFPN(nn.Module): + """ + Implement the paper :paper:`PanopticFPN`. + """ + + def __init__(self, cfg): + super().__init__() + + self.instance_loss_weight = cfg.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT + + # options when combining instance & semantic outputs + self.combine_on = cfg.MODEL.PANOPTIC_FPN.COMBINE.ENABLED + self.combine_overlap_threshold = cfg.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH + self.combine_stuff_area_limit = cfg.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT + self.combine_instances_confidence_threshold = ( + cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH + ) + + self.backbone = build_backbone(cfg) + self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape()) + self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) + self.sem_seg_head = build_sem_seg_head(cfg, self.backbone.output_shape()) + + self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) + self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) + + @property + def device(self): + return self.pixel_mean.device + + def forward(self, batched_inputs): + """ + Args: + batched_inputs: a list, batched outputs of :class:`DatasetMapper`. + Each item in the list contains the inputs for one image. + + For now, each item in the list is a dict that contains: + + * "image": Tensor, image in (C, H, W) format. + * "instances": Instances + * "sem_seg": semantic segmentation ground truth. + * Other information that's included in the original dicts, such as: + "height", "width" (int): the output resolution of the model, used in inference. + See :meth:`postprocess` for details. + + Returns: + list[dict]: + each dict is the results for one image. The dict contains the following keys: + + * "instances": see :meth:`GeneralizedRCNN.forward` for its format. + * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format. + * "panoptic_seg": available when `PANOPTIC_FPN.COMBINE.ENABLED`. + See the return value of + :func:`combine_semantic_and_instance_outputs` for its format. + """ + images = [x["image"].to(self.device) for x in batched_inputs] + images = [(x - self.pixel_mean) / self.pixel_std for x in images] + images = ImageList.from_tensors(images, self.backbone.size_divisibility) + features = self.backbone(images.tensor) + + if "proposals" in batched_inputs[0]: + proposals = [x["proposals"].to(self.device) for x in batched_inputs] + proposal_losses = {} + + if "sem_seg" in batched_inputs[0]: + gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs] + gt_sem_seg = ImageList.from_tensors( + gt_sem_seg, self.backbone.size_divisibility, self.sem_seg_head.ignore_value + ).tensor + else: + gt_sem_seg = None + sem_seg_results, sem_seg_losses = self.sem_seg_head(features, gt_sem_seg) + + if "instances" in batched_inputs[0]: + gt_instances = [x["instances"].to(self.device) for x in batched_inputs] + else: + gt_instances = None + if self.proposal_generator: + proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) + detector_results, detector_losses = self.roi_heads( + images, features, proposals, gt_instances + ) + + if self.training: + losses = {} + losses.update(sem_seg_losses) + losses.update({k: v * self.instance_loss_weight for k, v in detector_losses.items()}) + losses.update(proposal_losses) + return losses + + processed_results = [] + for sem_seg_result, detector_result, input_per_image, image_size in zip( + sem_seg_results, detector_results, batched_inputs, images.image_sizes + ): + height = input_per_image.get("height", image_size[0]) + width = input_per_image.get("width", image_size[1]) + sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width) + detector_r = detector_postprocess(detector_result, height, width) + + processed_results.append({"sem_seg": sem_seg_r, "instances": detector_r}) + + if self.combine_on: + panoptic_r = combine_semantic_and_instance_outputs( + detector_r, + sem_seg_r.argmax(dim=0), + self.combine_overlap_threshold, + self.combine_stuff_area_limit, + self.combine_instances_confidence_threshold, + ) + processed_results[-1]["panoptic_seg"] = panoptic_r + return processed_results + + +def combine_semantic_and_instance_outputs( + instance_results, + semantic_results, + overlap_threshold, + stuff_area_limit, + instances_confidence_threshold, +): + """ + Implement a simple combining logic following + "combine_semantic_and_instance_predictions.py" in panopticapi + to produce panoptic segmentation outputs. + + Args: + instance_results: output of :func:`detector_postprocess`. + semantic_results: an (H, W) tensor, each is the contiguous semantic + category id + + Returns: + panoptic_seg (Tensor): of shape (height, width) where the values are ids for each segment. + segments_info (list[dict]): Describe each segment in `panoptic_seg`. + Each dict contains keys "id", "category_id", "isthing". + """ + panoptic_seg = torch.zeros_like(semantic_results, dtype=torch.int32) + + # sort instance outputs by scores + sorted_inds = torch.argsort(-instance_results.scores) + + current_segment_id = 0 + segments_info = [] + + instance_masks = instance_results.pred_masks.to(dtype=torch.bool, device=panoptic_seg.device) + + # Add instances one-by-one, check for overlaps with existing ones + for inst_id in sorted_inds: + score = instance_results.scores[inst_id].item() + if score < instances_confidence_threshold: + break + mask = instance_masks[inst_id] # H,W + mask_area = mask.sum().item() + + if mask_area == 0: + continue + + intersect = (mask > 0) & (panoptic_seg > 0) + intersect_area = intersect.sum().item() + + if intersect_area * 1.0 / mask_area > overlap_threshold: + continue + + if intersect_area > 0: + mask = mask & (panoptic_seg == 0) + + current_segment_id += 1 + panoptic_seg[mask] = current_segment_id + segments_info.append( + { + "id": current_segment_id, + "isthing": True, + "score": score, + "category_id": instance_results.pred_classes[inst_id].item(), + "instance_id": inst_id.item(), + } + ) + + # Add semantic results to remaining empty areas + semantic_labels = torch.unique(semantic_results).cpu().tolist() + for semantic_label in semantic_labels: + if semantic_label == 0: # 0 is a special "thing" class + continue + mask = (semantic_results == semantic_label) & (panoptic_seg == 0) + mask_area = mask.sum().item() + if mask_area < stuff_area_limit: + continue + + current_segment_id += 1 + panoptic_seg[mask] = current_segment_id + segments_info.append( + { + "id": current_segment_id, + "isthing": False, + "category_id": semantic_label, + "area": mask_area, + } + ) + + return panoptic_seg, segments_info diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/rcnn.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..7ca7243b11c290054bf0616d780f41a6a33c6663 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/rcnn.py @@ -0,0 +1,380 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import numpy as np +from typing import Optional, Tuple +import torch +from torch import nn +from torch.nn import functional as F +from detectron2.config import configurable +from detectron2.data.detection_utils import convert_image_to_rgb +from detectron2.structures import ImageList +from detectron2.utils.events import get_event_storage +from detectron2.utils.logger import log_first_n + +from ..backbone import Backbone, build_backbone +from ..postprocessing import detector_postprocess +from ..proposal_generator import build_proposal_generator +from ..roi_heads import build_roi_heads +from .build import META_ARCH_REGISTRY + +import pdb +__all__ = ["GeneralizedRCNN", "ProposalNetwork"] + + + +### add hook func +def print_tensor(name, tensors): + if isinstance(tensors, torch.Tensor): + print(name) + elif isinstance(tensors, list) or isinstance(tensors,tuple) : + for tensor in tensors: + print_tensor(name, tensor) + else: + print(name, type(tensors)) + + +def hook_func(name, module): + def hook_function(module, inputs, outputs): + print_tensor(name + 'inputs', inputs) + print_tensor(name + 'outputs', outputs) + return hook_function + + +def hook_for_model(model): + for name, module in model.named_modules(): + #module.register_forward_hook(hook_func('[forward]: ' + name, module)) + module.register_backward_hook(hook_func('[backward]: ' + name, module)) + +### end + + +@META_ARCH_REGISTRY.register() +class GeneralizedRCNN(nn.Module): + """ + Generalized R-CNN. Any models that contains the following three components: + 1. Per-image feature extraction (aka backbone) + 2. Region proposal generation + 3. Per-region feature extraction and prediction + """ + + @configurable + def __init__( + self, + *, + backbone: Backbone, + proposal_generator: nn.Module, + roi_heads: nn.Module, + pixel_mean: Tuple[float], + pixel_std: Tuple[float], + input_format: Optional[str] = None, + vis_period: int = 0, + amp: int = 0, + opt_level: str = None, + ): + """ + NOTE: this interface is experimental. + + Args: + backbone: a backbone module, must follow detectron2's backbone interface + proposal_generator: a module that generates proposals using backbone features + roi_heads: a ROI head that performs per-region computation + pixel_mean, pixel_std: list or tuple with #channels element, + representing the per-channel mean and std to be used to normalize + the input image + input_format: describe the meaning of channels of input. Needed by visualization + vis_period: the period to run visualization. Set to 0 to disable. + """ + super().__init__() + self.backbone = backbone + self.proposal_generator = proposal_generator + self.roi_heads = roi_heads + + self.input_format = input_format + self.vis_period = vis_period + if vis_period > 0: + assert input_format is not None, "input_format is required for visualization!" + self.amp = amp + self.opt_level = opt_level + self.register_buffer("pixel_mean", torch.Tensor(pixel_mean).view(-1, 1, 1)) + self.register_buffer("pixel_std", torch.Tensor(pixel_std).view(-1, 1, 1)) + assert ( + self.pixel_mean.shape == self.pixel_std.shape + ), f"{self.pixel_mean} and {self.pixel_std} have different shapes!" + + @classmethod + def from_config(cls, cfg): + backbone = build_backbone(cfg) + return { + "backbone": backbone, + "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()), + "roi_heads": build_roi_heads(cfg, backbone.output_shape()), + "input_format": cfg.INPUT.FORMAT, + "vis_period": cfg.VIS_PERIOD, + "pixel_mean": cfg.MODEL.PIXEL_MEAN, + "pixel_std": cfg.MODEL.PIXEL_STD, + "amp": cfg.AMP, + "opt_level": cfg.OPT_LEVEL, + } + + @property + def device(self): + return self.pixel_mean.device + + def visualize_training(self, batched_inputs, proposals): + """ + A function used to visualize images and proposals. It shows ground truth + bounding boxes on the original image and up to 20 predicted object + proposals on the original image. Users can implement different + visualization functions for different models. + + Args: + batched_inputs (list): a list that contains input to the model. + proposals (list): a list that contains predicted proposals. Both + batched_inputs and proposals should have the same length. + """ + from detectron2.utils.visualizer import Visualizer + + storage = get_event_storage() + max_vis_prop = 20 + + for input, prop in zip(batched_inputs, proposals): + img = input["image"] + img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format) + v_gt = Visualizer(img, None) + v_gt = v_gt.overlay_instances(boxes=input["instances"].gt_boxes) + anno_img = v_gt.get_image() + box_size = min(len(prop.proposal_boxes), max_vis_prop) + v_pred = Visualizer(img, None) + v_pred = v_pred.overlay_instances( + boxes=prop.proposal_boxes[0:box_size].tensor.cpu().numpy() + ) + prop_img = v_pred.get_image() + vis_img = np.concatenate((anno_img, prop_img), axis=1) + vis_img = vis_img.transpose(2, 0, 1) + vis_name = "Left: GT bounding boxes; Right: Predicted proposals" + storage.put_image(vis_name, vis_img) + break # only visualize one image in a batch + + def forward(self, batched_inputs): + """ + Args: + batched_inputs: a list, batched outputs of :class:`DatasetMapper` . + Each item in the list contains the inputs for one image. + For now, each item in the list is a dict that contains: + + * image: Tensor, image in (C, H, W) format. + * instances (optional): groundtruth :class:`Instances` + * proposals (optional): :class:`Instances`, precomputed proposals. + + Other information that's included in the original dicts, such as: + + * "height", "width" (int): the output resolution of the model, used in inference. + See :meth:`postprocess` for details. + + Returns: + list[dict]: + Each dict is the output for one input image. + The dict contains one key "instances" whose value is a :class:`Instances`. + The :class:`Instances` object has the following keys: + "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints" + """ + if not self.training: + return self.inference(batched_inputs) + + images = self.preprocess_image_v2(batched_inputs) + if "instances" in batched_inputs[0]: + gt_instances = [x["instances"] for x in batched_inputs] + else: + gt_instances = None + + #with torch.autograd.profiler.profile(record_shapes = True, use_npu=True) as prof: + + features = self.backbone(images.tensor) + + + + if self.proposal_generator: + proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) + else: + assert "proposals" in batched_inputs[0] + proposals = [x["proposals"].to(self.device) for x in batched_inputs] + proposal_losses = {} + print(torch.npu.synchronize(),"proposals end") + #print("features", features.shape) + #print("proposals", proposals.shape) + #state = {"features": features, "proposals": proposals} + #torch.save(state, "/home/zsclzy/centermask2/model3.pth") + + #with torch.autograd.profiler.profile(record_shapes = True, use_npu=True) as prof: + #hook_for_model(self.roi_heads) + _, detector_losses = self.roi_heads(images, features, proposals, gt_instances) + #print(prof.key_averages().table(sort_by="self_cpu_time_total")) + #prof.export_chrome_trace("detector_npu.prof") + + + if self.vis_period > 0: + storage = get_event_storage() + if storage.iter % self.vis_period == 0: + self.visualize_training(batched_inputs, proposals) + + losses = {} + losses.update(detector_losses) + losses.update(proposal_losses) + return losses + + def inference(self, batched_inputs, detected_instances=None, do_postprocess=True): + """ + Run inference on the given inputs. + + Args: + batched_inputs (list[dict]): same as in :meth:`forward` + detected_instances (None or list[Instances]): if not None, it + contains an `Instances` object per image. The `Instances` + object contains "pred_boxes" and "pred_classes" which are + known boxes in the image. + The inference will then skip the detection of bounding boxes, + and only predict other per-ROI outputs. + do_postprocess (bool): whether to apply post-processing on the outputs. + + Returns: + same as in :meth:`forward`. + """ + assert not self.training + images = self.preprocess_image_v3(batched_inputs) + + features = self.backbone(images.tensor) + + if detected_instances is None: + if self.proposal_generator: + proposals, _ = self.proposal_generator(images, features, None) + else: + assert "proposals" in batched_inputs[0] + proposals = [x["proposals"].to(self.device) for x in batched_inputs] + + results, _ = self.roi_heads(images, features, proposals, None) + else: + detected_instances = [x.to(self.device) for x in detected_instances] + results = self.roi_heads.forward_with_given_boxes(features, detected_instances) + + if do_postprocess: + return GeneralizedRCNN._postprocess(results, batched_inputs, images.image_sizes) + else: + return results + + def preprocess_image_v2(self, batched_inputs): + """ + use dataset results. + """ + images = [x["image_preprocess"] for x in batched_inputs] + images = torch.cat(images, dim=0) + image_sizes = [x["image_size"] for x in batched_inputs] + return ImageList(images, image_sizes) + + def preprocess_image_v3(self, batched_inputs): + """ + use dataset results. + """ + images = [x["image"].to(self.device) for x in batched_inputs] + images = [(x - self.pixel_mean) / self.pixel_std for x in images] + image_pres = [] + image_sizes = [] + + for image in images: + image_size = image.shape + put_size = [0, 1344-image_size[-1], + 0, 1344-image_size[-2]] + image_pre = F.pad(image, put_size, value=0) + image_pre = image_pre.unsqueeze(0) + image_pres.append(image_pre) + image_sizes.append(torch.tensor(image.shape[-2:]).tolist()) + images = torch.cat(image_pres, dim=0) + return ImageList(images, image_sizes) + + @staticmethod + def _postprocess(instances, batched_inputs, image_sizes): + """ + Rescale the output instances to the target size. + """ + # note: private function; subject to changes + processed_results = [] + for results_per_image, input_per_image, image_size in zip( + instances, batched_inputs, image_sizes + ): + height = input_per_image.get("height", image_size[0]) + width = input_per_image.get("width", image_size[1]) + r = detector_postprocess(results_per_image, height, width) + processed_results.append({"instances": r}) + return processed_results + + +@META_ARCH_REGISTRY.register() +class ProposalNetwork(nn.Module): + """ + A meta architecture that only predicts object proposals. + """ + + def __init__(self, cfg): + super().__init__() + self.backbone = build_backbone(cfg) + self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape()) + + self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) + self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) + + @property + def device(self): + return self.pixel_mean.device + + def forward(self, batched_inputs): + """ + Args: + Same as in :class:`GeneralizedRCNN.forward` + + Returns: + list[dict]: + Each dict is the output for one input image. + The dict contains one key "proposals" whose value is a + :class:`Instances` with keys "proposal_boxes" and "objectness_logits". + """ + images = [x["image"].to(self.device) for x in batched_inputs] + images = [(x - self.pixel_mean) / self.pixel_std for x in images] + images = ImageList.from_tensors(images, self.backbone.size_divisibility) + features = self.backbone(images.tensor) + + if "instances" in batched_inputs[0]: + gt_instances = [x["instances"].to(self.device) for x in batched_inputs] + elif "targets" in batched_inputs[0]: + log_first_n( + logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10 + ) + gt_instances = [x["targets"].to(self.device) for x in batched_inputs] + else: + gt_instances = None + proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) + # In training, the proposals are not useful at all but we generate them anyway. + # This makes RPN-only models about 5% slower. + if self.training: + return proposal_losses + + processed_results = [] + for results_per_image, input_per_image, image_size in zip( + proposals, batched_inputs, images.image_sizes + ): + height = input_per_image.get("height", image_size[0]) + width = input_per_image.get("width", image_size[1]) + r = detector_postprocess(results_per_image, height, width) + processed_results.append({"proposals": r}) + return processed_results diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/retinanet.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/retinanet.py new file mode 100644 index 0000000000000000000000000000000000000000..57127669f47bd1422b919a58ae94c57ccd4b507c --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/retinanet.py @@ -0,0 +1,461 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import numpy as np +from typing import List +import torch +from fvcore.nn import sigmoid_focal_loss_jit, smooth_l1_loss +from torch import nn +from torch.nn import functional as F + +from detectron2.data.detection_utils import convert_image_to_rgb +from detectron2.layers import ShapeSpec, batched_nms, cat +from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou +from detectron2.utils.events import get_event_storage + +from ..anchor_generator import build_anchor_generator +from ..backbone import build_backbone +from ..box_regression import Box2BoxTransform +from ..matcher import Matcher +from ..postprocessing import detector_postprocess +from .build import META_ARCH_REGISTRY + +__all__ = ["RetinaNet"] + + +def permute_to_N_HWA_K(tensor, K): + """ + Transpose/reshape a tensor from (N, (Ai x K), H, W) to (N, (HxWxAi), K) + """ + assert tensor.dim() == 4, tensor.shape + N, _, H, W = tensor.shape + tensor = tensor.view(N, -1, K, H, W) + tensor = tensor.permute(0, 3, 4, 1, 2) + tensor = tensor.reshape(N, -1, K) # Size=(N,HWA,K) + return tensor + + +@META_ARCH_REGISTRY.register() +class RetinaNet(nn.Module): + """ + Implement RetinaNet in :paper:`RetinaNet`. + """ + + def __init__(self, cfg): + super().__init__() + # fmt: off + self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES + self.in_features = cfg.MODEL.RETINANET.IN_FEATURES + # Loss parameters: + self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA + self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA + self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA + # Inference parameters: + self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST + self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST + self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST + self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE + # Vis parameters + self.vis_period = cfg.VIS_PERIOD + self.input_format = cfg.INPUT.FORMAT + # fmt: on + + self.backbone = build_backbone(cfg) + + backbone_shape = self.backbone.output_shape() + feature_shapes = [backbone_shape[f] for f in self.in_features] + self.head = RetinaNetHead(cfg, feature_shapes) + self.anchor_generator = build_anchor_generator(cfg, feature_shapes) + + # Matching and loss + self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) + self.anchor_matcher = Matcher( + cfg.MODEL.RETINANET.IOU_THRESHOLDS, + cfg.MODEL.RETINANET.IOU_LABELS, + allow_low_quality_matches=True, + ) + + self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) + self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) + + """ + In Detectron1, loss is normalized by number of foreground samples in the batch. + When batch size is 1 per GPU, #foreground has a large variance and + using it lead to lower performance. Here we maintain an EMA of #foreground to + stabilize the normalizer. + """ + self.loss_normalizer = 100 # initialize with any reasonable #fg that's not too small + self.loss_normalizer_momentum = 0.9 + + @property + def device(self): + return self.pixel_mean.device + + def visualize_training(self, batched_inputs, results): + """ + A function used to visualize ground truth images and final network predictions. + It shows ground truth bounding boxes on the original image and up to 20 + predicted object bounding boxes on the original image. + + Args: + batched_inputs (list): a list that contains input to the model. + results (List[Instances]): a list of #images elements. + """ + from detectron2.utils.visualizer import Visualizer + + assert len(batched_inputs) == len( + results + ), "Cannot visualize inputs and results of different sizes" + storage = get_event_storage() + max_boxes = 20 + + image_index = 0 # only visualize a single image + img = batched_inputs[image_index]["image"] + img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format) + v_gt = Visualizer(img, None) + v_gt = v_gt.overlay_instances(boxes=batched_inputs[image_index]["instances"].gt_boxes) + anno_img = v_gt.get_image() + processed_results = detector_postprocess(results[image_index], img.shape[0], img.shape[1]) + predicted_boxes = processed_results.pred_boxes.tensor.detach().cpu().numpy() + + v_pred = Visualizer(img, None) + v_pred = v_pred.overlay_instances(boxes=predicted_boxes[0:max_boxes]) + prop_img = v_pred.get_image() + vis_img = np.vstack((anno_img, prop_img)) + vis_img = vis_img.transpose(2, 0, 1) + vis_name = f"Top: GT bounding boxes; Bottom: {max_boxes} Highest Scoring Results" + storage.put_image(vis_name, vis_img) + + def forward(self, batched_inputs): + """ + Args: + batched_inputs: a list, batched outputs of :class:`DatasetMapper` . + Each item in the list contains the inputs for one image. + For now, each item in the list is a dict that contains: + + * image: Tensor, image in (C, H, W) format. + * instances: Instances + + Other information that's included in the original dicts, such as: + + * "height", "width" (int): the output resolution of the model, used in inference. + See :meth:`postprocess` for details. + Returns: + dict[str: Tensor]: + mapping from a named loss to a tensor storing the loss. Used during training only. + """ + images = self.preprocess_image(batched_inputs) + features = self.backbone(images.tensor) + features = [features[f] for f in self.in_features] + + anchors = self.anchor_generator(features) + pred_logits, pred_anchor_deltas = self.head(features) + # Transpose the Hi*Wi*A dimension to the middle: + pred_logits = [permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits] + pred_anchor_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas] + + if self.training: + assert "instances" in batched_inputs[0], "Instance annotations are missing in training!" + gt_instances = [x["instances"].to(self.device) for x in batched_inputs] + + gt_labels, gt_boxes = self.label_anchors(anchors, gt_instances) + losses = self.losses(anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes) + + if self.vis_period > 0: + storage = get_event_storage() + if storage.iter % self.vis_period == 0: + results = self.inference( + anchors, pred_logits, pred_anchor_deltas, images.image_sizes + ) + self.visualize_training(batched_inputs, results) + + return losses + else: + results = self.inference(anchors, pred_logits, pred_anchor_deltas, images.image_sizes) + processed_results = [] + for results_per_image, input_per_image, image_size in zip( + results, batched_inputs, images.image_sizes + ): + height = input_per_image.get("height", image_size[0]) + width = input_per_image.get("width", image_size[1]) + r = detector_postprocess(results_per_image, height, width) + processed_results.append({"instances": r}) + return processed_results + + def losses(self, anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes): + """ + Args: + anchors (list[Boxes]): a list of #feature level Boxes + gt_labels, gt_boxes: see output of :meth:`RetinaNet.label_anchors`. + Their shapes are (N, R) and (N, R, 4), respectively, where R is + the total number of anchors across levels, i.e. sum(Hi x Wi x Ai) + pred_logits, pred_anchor_deltas: both are list[Tensor]. Each element in the + list corresponds to one level and has shape (N, Hi * Wi * Ai, K or 4). + Where K is the number of classes used in `pred_logits`. + + Returns: + dict[str, Tensor]: + mapping from a named loss to a scalar tensor + storing the loss. Used during training only. The dict keys are: + "loss_cls" and "loss_box_reg" + """ + num_images = len(gt_labels) + gt_labels = torch.stack(gt_labels) # (N, R) + anchors = type(anchors[0]).cat(anchors).tensor # (R, 4) + gt_anchor_deltas = [self.box2box_transform.get_deltas(anchors, k) for k in gt_boxes] + gt_anchor_deltas = torch.stack(gt_anchor_deltas) # (N, R, 4) + + valid_mask = gt_labels >= 0 + pos_mask = (gt_labels >= 0) & (gt_labels != self.num_classes) + num_pos_anchors = pos_mask.sum().item() + get_event_storage().put_scalar("num_pos_anchors", num_pos_anchors / num_images) + self.loss_normalizer = self.loss_normalizer_momentum * self.loss_normalizer + ( + 1 - self.loss_normalizer_momentum + ) * max(num_pos_anchors, 1) + + # classification and regression loss + gt_labels_target = F.one_hot(gt_labels[valid_mask], num_classes=self.num_classes + 1)[ + :, :-1 + ] # no loss for the last (background) class + loss_cls = sigmoid_focal_loss_jit( + cat(pred_logits, dim=1)[valid_mask], + gt_labels_target.to(pred_logits[0].dtype), + alpha=self.focal_loss_alpha, + gamma=self.focal_loss_gamma, + reduction="sum", + ) + + loss_box_reg = smooth_l1_loss( + cat(pred_anchor_deltas, dim=1)[pos_mask], + gt_anchor_deltas[pos_mask], + beta=self.smooth_l1_loss_beta, + reduction="sum", + ) + return { + "loss_cls": loss_cls / self.loss_normalizer, + "loss_box_reg": loss_box_reg / self.loss_normalizer, + } + + @torch.no_grad() + def label_anchors(self, anchors, gt_instances): + """ + Args: + anchors (list[Boxes]): A list of #feature level Boxes. + The Boxes contains anchors of this image on the specific feature level. + gt_instances (list[Instances]): a list of N `Instances`s. The i-th + `Instances` contains the ground-truth per-instance annotations + for the i-th input image. + + Returns: + list[Tensor]: + List of #img tensors. i-th element is a vector of labels whose length is + the total number of anchors across all feature maps (sum(Hi * Wi * A)). + Label values are in {-1, 0, ..., K}, with -1 means ignore, and K means background. + list[Tensor]: + i-th element is a Rx4 tensor, where R is the total number of anchors across + feature maps. The values are the matched gt boxes for each anchor. + Values are undefined for those anchors not labeled as foreground. + """ + anchors = Boxes.cat(anchors) # Rx4 + + gt_labels = [] + matched_gt_boxes = [] + for gt_per_image in gt_instances: + match_quality_matrix = pairwise_iou(gt_per_image.gt_boxes, anchors) + matched_idxs, anchor_labels = self.anchor_matcher(match_quality_matrix) + del match_quality_matrix + + if len(gt_per_image) > 0: + matched_gt_boxes_i = gt_per_image.gt_boxes.tensor[matched_idxs] + + gt_labels_i = gt_per_image.gt_classes[matched_idxs] + # Anchors with label 0 are treated as background. + gt_labels_i[anchor_labels == 0] = self.num_classes + # Anchors with label -1 are ignored. + gt_labels_i[anchor_labels == -1] = -1 + else: + matched_gt_boxes_i = torch.zeros_like(anchors.tensor) + gt_labels_i = torch.zeros_like(matched_idxs) + self.num_classes + + gt_labels.append(gt_labels_i) + matched_gt_boxes.append(matched_gt_boxes_i) + + return gt_labels, matched_gt_boxes + + def inference(self, anchors, pred_logits, pred_anchor_deltas, image_sizes): + """ + Arguments: + anchors (list[Boxes]): A list of #feature level Boxes. + The Boxes contain anchors of this image on the specific feature level. + pred_logits, pred_anchor_deltas: list[Tensor], one per level. Each + has shape (N, Hi * Wi * Ai, K or 4) + image_sizes (List[torch.Size]): the input image sizes + + Returns: + results (List[Instances]): a list of #images elements. + """ + results = [] + for img_idx, image_size in enumerate(image_sizes): + pred_logits_per_image = [x[img_idx] for x in pred_logits] + deltas_per_image = [x[img_idx] for x in pred_anchor_deltas] + results_per_image = self.inference_single_image( + anchors, pred_logits_per_image, deltas_per_image, tuple(image_size) + ) + results.append(results_per_image) + return results + + def inference_single_image(self, anchors, box_cls, box_delta, image_size): + """ + Single-image inference. Return bounding-box detection results by thresholding + on scores and applying non-maximum suppression (NMS). + + Arguments: + anchors (list[Boxes]): list of #feature levels. Each entry contains + a Boxes object, which contains all the anchors in that feature level. + box_cls (list[Tensor]): list of #feature levels. Each entry contains + tensor of size (H x W x A, K) + box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4. + image_size (tuple(H, W)): a tuple of the image height and width. + + Returns: + Same as `inference`, but for only one image. + """ + boxes_all = [] + scores_all = [] + class_idxs_all = [] + + # Iterate over every feature level + for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta, anchors): + # (HxWxAxK,) + box_cls_i = box_cls_i.flatten().sigmoid_() + + # Keep top k top scoring indices only. + num_topk = min(self.topk_candidates, box_reg_i.size(0)) + # torch.sort is actually faster than .topk (at least on GPUs) + predicted_prob, topk_idxs = box_cls_i.sort(descending=True) + predicted_prob = predicted_prob[:num_topk] + topk_idxs = topk_idxs[:num_topk] + + # filter out the proposals with low confidence score + keep_idxs = predicted_prob > self.score_threshold + predicted_prob = predicted_prob[keep_idxs] + topk_idxs = topk_idxs[keep_idxs] + + anchor_idxs = topk_idxs // self.num_classes + classes_idxs = topk_idxs % self.num_classes + + box_reg_i = box_reg_i[anchor_idxs] + anchors_i = anchors_i[anchor_idxs] + # predict boxes + predicted_boxes = self.box2box_transform.apply_deltas(box_reg_i, anchors_i.tensor) + + boxes_all.append(predicted_boxes) + scores_all.append(predicted_prob) + class_idxs_all.append(classes_idxs) + + boxes_all, scores_all, class_idxs_all = [ + cat(x) for x in [boxes_all, scores_all, class_idxs_all] + ] + keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.nms_threshold) + keep = keep[: self.max_detections_per_image] + + result = Instances(image_size) + result.pred_boxes = Boxes(boxes_all[keep]) + result.scores = scores_all[keep] + result.pred_classes = class_idxs_all[keep] + return result + + def preprocess_image(self, batched_inputs): + """ + Normalize, pad and batch the input images. + """ + images = [x["image"].to(self.device) for x in batched_inputs] + images = [(x - self.pixel_mean) / self.pixel_std for x in images] + images = ImageList.from_tensors(images, self.backbone.size_divisibility) + return images + + +class RetinaNetHead(nn.Module): + """ + The head used in RetinaNet for object classification and box regression. + It has two subnets for the two tasks, with a common structure but separate parameters. + """ + + def __init__(self, cfg, input_shape: List[ShapeSpec]): + super().__init__() + # fmt: off + in_channels = input_shape[0].channels + num_classes = cfg.MODEL.RETINANET.NUM_CLASSES + num_convs = cfg.MODEL.RETINANET.NUM_CONVS + prior_prob = cfg.MODEL.RETINANET.PRIOR_PROB + num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors + # fmt: on + assert ( + len(set(num_anchors)) == 1 + ), "Using different number of anchors between levels is not currently supported!" + num_anchors = num_anchors[0] + + cls_subnet = [] + bbox_subnet = [] + for _ in range(num_convs): + cls_subnet.append( + nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) + ) + cls_subnet.append(nn.ReLU()) + bbox_subnet.append( + nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) + ) + bbox_subnet.append(nn.ReLU()) + + self.cls_subnet = nn.Sequential(*cls_subnet) + self.bbox_subnet = nn.Sequential(*bbox_subnet) + self.cls_score = nn.Conv2d( + in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1 + ) + self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1) + + # Initialization + for modules in [self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred]: + for layer in modules.modules(): + if isinstance(layer, nn.Conv2d): + torch.nn.init.normal_(layer.weight, mean=0, std=0.01) + torch.nn.init.constant_(layer.bias, 0) + + # Use prior in model initialization to improve stability + bias_value = -(math.log((1 - prior_prob) / prior_prob)) + torch.nn.init.constant_(self.cls_score.bias, bias_value) + + def forward(self, features): + """ + Arguments: + features (list[Tensor]): FPN feature map tensors in high to low resolution. + Each tensor in the list correspond to different feature levels. + + Returns: + logits (list[Tensor]): #lvl tensors, each has shape (N, AxK, Hi, Wi). + The tensor predicts the classification probability + at each spatial position for each of the A anchors and K object + classes. + bbox_reg (list[Tensor]): #lvl tensors, each has shape (N, Ax4, Hi, Wi). + The tensor predicts 4-vector (dx,dy,dw,dh) box + regression values for every anchor. These values are the + relative offset between the anchor and the ground truth box. + """ + logits = [] + bbox_reg = [] + for feature in features: + logits.append(self.cls_score(self.cls_subnet(feature))) + bbox_reg.append(self.bbox_pred(self.bbox_subnet(feature))) + return logits, bbox_reg diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/semantic_seg.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/semantic_seg.py new file mode 100644 index 0000000000000000000000000000000000000000..52418c0932f96a48b78b34cefb89e4f6539ae5aa --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/meta_arch/semantic_seg.py @@ -0,0 +1,200 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +from typing import Dict +import fvcore.nn.weight_init as weight_init +import torch +from torch import nn +from torch.nn import functional as F + +from detectron2.layers import Conv2d, ShapeSpec +from detectron2.structures import ImageList +from detectron2.utils.registry import Registry + +from ..backbone import build_backbone +from ..postprocessing import sem_seg_postprocess +from .build import META_ARCH_REGISTRY + +__all__ = ["SemanticSegmentor", "SEM_SEG_HEADS_REGISTRY", "SemSegFPNHead", "build_sem_seg_head"] + + +SEM_SEG_HEADS_REGISTRY = Registry("SEM_SEG_HEADS") +SEM_SEG_HEADS_REGISTRY.__doc__ = """ +Registry for semantic segmentation heads, which make semantic segmentation predictions +from feature maps. +""" + + +@META_ARCH_REGISTRY.register() +class SemanticSegmentor(nn.Module): + """ + Main class for semantic segmentation architectures. + """ + + def __init__(self, cfg): + super().__init__() + self.backbone = build_backbone(cfg) + self.sem_seg_head = build_sem_seg_head(cfg, self.backbone.output_shape()) + self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) + self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) + + @property + def device(self): + return self.pixel_mean.device + + def forward(self, batched_inputs): + """ + Args: + batched_inputs: a list, batched outputs of :class:`DatasetMapper`. + Each item in the list contains the inputs for one image. + + For now, each item in the list is a dict that contains: + + * "image": Tensor, image in (C, H, W) format. + * "sem_seg": semantic segmentation ground truth + * Other information that's included in the original dicts, such as: + "height", "width" (int): the output resolution of the model (may be different + from input resolution), used in inference. + + + Returns: + list[dict]: + Each dict is the output for one input image. + The dict contains one key "sem_seg" whose value is a + Tensor that represents the + per-pixel segmentation prediced by the head. + The prediction has shape KxHxW that represents the logits of + each class for each pixel. + """ + images = [x["image"].to(self.device) for x in batched_inputs] + images = [(x - self.pixel_mean) / self.pixel_std for x in images] + images = ImageList.from_tensors(images, self.backbone.size_divisibility) + + features = self.backbone(images.tensor) + + if "sem_seg" in batched_inputs[0]: + targets = [x["sem_seg"].to(self.device) for x in batched_inputs] + targets = ImageList.from_tensors( + targets, self.backbone.size_divisibility, self.sem_seg_head.ignore_value + ).tensor + else: + targets = None + results, losses = self.sem_seg_head(features, targets) + + if self.training: + return losses + + processed_results = [] + for result, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes): + height = input_per_image.get("height") + width = input_per_image.get("width") + r = sem_seg_postprocess(result, image_size, height, width) + processed_results.append({"sem_seg": r}) + return processed_results + + +def build_sem_seg_head(cfg, input_shape): + """ + Build a semantic segmentation head from `cfg.MODEL.SEM_SEG_HEAD.NAME`. + """ + name = cfg.MODEL.SEM_SEG_HEAD.NAME + return SEM_SEG_HEADS_REGISTRY.get(name)(cfg, input_shape) + + +@SEM_SEG_HEADS_REGISTRY.register() +class SemSegFPNHead(nn.Module): + """ + A semantic segmentation head described in :paper:`PanopticFPN`. + It takes FPN features as input and merges information from all + levels of the FPN into single output. + """ + + def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): + super().__init__() + + # fmt: off + self.in_features = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES + feature_strides = {k: v.stride for k, v in input_shape.items()} + feature_channels = {k: v.channels for k, v in input_shape.items()} + self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE + num_classes = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES + conv_dims = cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM + self.common_stride = cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE + norm = cfg.MODEL.SEM_SEG_HEAD.NORM + self.loss_weight = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT + # fmt: on + + self.scale_heads = [] + for in_feature in self.in_features: + head_ops = [] + head_length = max( + 1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride)) + ) + for k in range(head_length): + norm_module = nn.GroupNorm(32, conv_dims) if norm == "GN" else None + conv = Conv2d( + feature_channels[in_feature] if k == 0 else conv_dims, + conv_dims, + kernel_size=3, + stride=1, + padding=1, + bias=not norm, + norm=norm_module, + activation=F.relu, + ) + weight_init.c2_msra_fill(conv) + head_ops.append(conv) + if feature_strides[in_feature] != self.common_stride: + head_ops.append( + nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False) + ) + self.scale_heads.append(nn.Sequential(*head_ops)) + self.add_module(in_feature, self.scale_heads[-1]) + self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0) + weight_init.c2_msra_fill(self.predictor) + + def forward(self, features, targets=None): + """ + Returns: + In training, returns (None, dict of losses) + In inference, returns (CxHxW logits, {}) + """ + x = self.layers(features) + if self.training: + return None, self.losses(x, targets) + else: + x = F.interpolate( + x, scale_factor=self.common_stride, mode="bilinear", align_corners=False + ) + return x, {} + + def layers(self, features): + for i, f in enumerate(self.in_features): + if i == 0: + x = self.scale_heads[i](features[f]) + else: + x = x + self.scale_heads[i](features[f]) + x = self.predictor(x) + return x + + def losses(self, predictions, targets): + predictions = F.interpolate( + predictions, scale_factor=self.common_stride, mode="bilinear", align_corners=False + ) + loss = F.cross_entropy( + predictions, targets, reduction="mean", ignore_index=self.ignore_value + ) + losses = {"loss_sem_seg": loss * self.loss_weight} + return losses diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/poolers.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/poolers.py new file mode 100644 index 0000000000000000000000000000000000000000..121b1a83694d2688ae3ea46d3c78951465177a5d --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/poolers.py @@ -0,0 +1,272 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import sys +from typing import List +import torch +from torch import nn +# from torchvision.ops import RoIPool + +from detectron2.layers import ROIAlign, ROIAlignRotated, cat, nonzero_tuple +from detectron2.structures import Boxes + +""" +To export ROIPooler to torchscript, in this file, +variables that should be annotated with +`Union[List[Boxes], List[RotatedBoxes]]` +are only annotated with `List[Boxes]`. + +TODO: Correct these annotations +when torchscript support `Union`. +https://github.com/pytorch/pytorch/issues/41412 +""" + +__all__ = ["ROIPooler"] + + +def assign_boxes_to_levels( + box_lists: List[Boxes], + min_level: int, + max_level: int, + canonical_box_size: int, + canonical_level: int, +): + """ + Map each box in `box_lists` to a feature map level index and return the assignment + vector. + + Args: + box_lists (list[Boxes] | list[RotatedBoxes]): A list of N Boxes or N RotatedBoxes, + where N is the number of images in the batch. + min_level (int): Smallest feature map level index. The input is considered index 0, + the output of stage 1 is index 1, and so. + max_level (int): Largest feature map level index. + canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). + canonical_level (int): The feature map level index on which a canonically-sized box + should be placed. + + Returns: + A tensor of length M, where M is the total number of boxes aggregated over all + N batch images. The memory layout corresponds to the concatenation of boxes + from all images. Each element is the feature map index, as an offset from + `self.min_level`, for the corresponding box (so value i means the box is at + `self.min_level + i`). + """ + box_sizes = torch.sqrt(cat([boxes.area() for boxes in box_lists])) + # Eqn.(1) in FPN paper + level_assignments = torch.floor( + canonical_level + torch.log2(box_sizes / canonical_box_size + 1e-8) + ) + # clamp level to (min, max), in case the box size is too large or too small + # for the available feature maps + level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level) + return level_assignments.to(torch.int32) - min_level + + +def _fmt_box_list(box_tensor, batch_index: int): + repeated_index = torch.full( + (len(box_tensor), 1), batch_index, + dtype=box_tensor.dtype, device=box_tensor.device + ) + return cat((repeated_index, box_tensor), dim=1) + + +def convert_boxes_to_pooler_format(box_lists: List[Boxes]): + """ + Convert all boxes in `box_lists` to the low-level format used by ROI pooling ops + (see description under Returns). + + Args: + box_lists (list[Boxes] | list[RotatedBoxes]): + A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch. + + Returns: + When input is list[Boxes]: + A tensor of shape (M, 5), where M is the total number of boxes aggregated over all + N batch images. + The 5 columns are (batch index, x0, y0, x1, y1), where batch index + is the index in [0, N) identifying which batch image the box with corners at + (x0, y0, x1, y1) comes from. + When input is list[RotatedBoxes]: + A tensor of shape (M, 6), where M is the total number of boxes aggregated over all + N batch images. + The 6 columns are (batch index, x_ctr, y_ctr, width, height, angle_degrees), + where batch index is the index in [0, N) identifying which batch image the + rotated box (x_ctr, y_ctr, width, height, angle_degrees) comes from. + """ + pooler_fmt_boxes = cat( + [_fmt_box_list(box_list.tensor, i) for i, box_list in enumerate(box_lists)], dim=0 + ) + + return pooler_fmt_boxes + + +class ROIPooler(nn.Module): + """ + Region of interest feature map pooler that supports pooling from one or more + feature maps. + """ + + def __init__( + self, + output_size, + scales, + sampling_ratio, + pooler_type, + canonical_box_size=224, + canonical_level=4, + ): + """ + Args: + output_size (int, tuple[int] or list[int]): output size of the pooled region, + e.g., 14 x 14. If tuple or list is given, the length must be 2. + scales (list[float]): The scale for each low-level pooling op relative to + the input image. For a feature map with stride s relative to the input + image, scale is defined as a 1 / s. The stride must be power of 2. + When there are multiple scales, they must form a pyramid, i.e. they must be + a monotically decreasing geometric sequence with a factor of 1/2. + sampling_ratio (int): The `sampling_ratio` parameter for the ROIAlign op. + pooler_type (string): Name of the type of pooling operation that should be applied. + For instance, "ROIPool" or "ROIAlignV2". + canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). The default + is heuristically defined as 224 pixels in the FPN paper (based on ImageNet + pre-training). + canonical_level (int): The feature map level index from which a canonically-sized box + should be placed. The default is defined as level 4 (stride=16) in the FPN paper, + i.e., a box of size 224x224 will be placed on the feature with stride=16. + The box placement for all boxes will be determined from their sizes w.r.t + canonical_box_size. For example, a box whose area is 4x that of a canonical box + should be used to pool features from feature level ``canonical_level+1``. + + Note that the actual input feature maps given to this module may not have + sufficiently many levels for the input boxes. If the boxes are too large or too + small for the input feature maps, the closest level will be used. + """ + super().__init__() + + if isinstance(output_size, int): + output_size = (output_size, output_size) + assert len(output_size) == 2 + assert isinstance(output_size[0], int) and isinstance(output_size[1], int) + self.output_size = output_size + + if pooler_type == "ROIAlign": + self.level_poolers = nn.ModuleList( + ROIAlign( + output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=False + ) + for scale in scales + ) + elif pooler_type == "ROIAlignV2": + self.level_poolers = nn.ModuleList( + ROIAlign( + output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=True + ) + for scale in scales + ) + elif pooler_type == "ROIPool": + self.level_poolers = nn.ModuleList( + RoIPool(output_size, spatial_scale=scale) for scale in scales + ) + elif pooler_type == "ROIAlignRotated": + self.level_poolers = nn.ModuleList( + ROIAlignRotated(output_size, spatial_scale=scale, sampling_ratio=sampling_ratio) + for scale in scales + ) + else: + raise ValueError("Unknown pooler type: {}".format(pooler_type)) + + # Map scale (defined as 1 / stride) to its feature map level under the + # assumption that stride is a power of 2. + min_level = -(math.log2(scales[0])) + max_level = -(math.log2(scales[-1])) + assert math.isclose(min_level, int(min_level)) and math.isclose( + max_level, int(max_level) + ), "Featuremap stride is not power of 2!" + self.min_level = int(min_level) + self.max_level = int(max_level) + assert ( + len(scales) == self.max_level - self.min_level + 1 + ), "[ROIPooler] Sizes of input featuremaps do not form a pyramid!" + assert 0 <= self.min_level and self.min_level <= self.max_level + self.canonical_level = canonical_level + assert canonical_box_size > 0 + self.canonical_box_size = canonical_box_size + + def forward(self, x: List[torch.Tensor], box_lists: List[Boxes]): + """ + Args: + x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those + used to construct this module. + box_lists (list[Boxes] | list[RotatedBoxes]): + A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch. + The box coordinates are defined on the original image and + will be scaled by the `scales` argument of :class:`ROIPooler`. + + Returns: + Tensor: + A tensor of shape (M, C, output_size, output_size) where M is the total number of + boxes aggregated over all N batch images and C is the number of channels in `x`. + """ + num_level_assignments = len(self.level_poolers) + + assert isinstance(x, list) and isinstance( + box_lists, list + ), "Arguments to pooler must be lists" + assert ( + len(x) == num_level_assignments + ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format( + num_level_assignments, len(x) + ) + + assert len(box_lists) == x[0].size( + 0 + ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format( + x[0].size(0), len(box_lists) + ) + + pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists) + + if num_level_assignments == 1: + return self.level_poolers[0](x[0], pooler_fmt_boxes) + + level_assignments = assign_boxes_to_levels( + box_lists, self.min_level, self.max_level, self.canonical_box_size, self.canonical_level + ) + + num_boxes = len(pooler_fmt_boxes) + num_channels = x[0].shape[1] + output_size = self.output_size[0] + + dtype, device = x[0].dtype, x[0].device + output = torch.zeros( + (num_boxes, num_channels, output_size, output_size), + dtype=dtype, device=device) + for level, pooler in enumerate(self.level_poolers): + inds_mask = (level_assignments == level) + inds_num = inds_mask.sum().item() + if inds_num != 0: + temp = pooler(x[level], pooler_fmt_boxes).npu_format_cast(0) + if dtype == torch.float16: + output = torch.where( + inds_mask[:, None, None, None].repeat( + 1, num_channels, output_size, output_size), + temp.half(), output) + elif dtype == torch.float32: + output = torch.where( + inds_mask[:, None, None, None].repeat( + 1, num_channels, output_size, output_size), + temp, output) + return output diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/postprocessing.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/postprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..0d7a30f739eee6619a582b5c32ff226d60dc6caf --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/postprocessing.py @@ -0,0 +1,112 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from torch.nn import functional as F + +from detectron2.layers import paste_masks_in_image +from detectron2.structures import Instances +from detectron2.utils.memory import retry_if_cuda_oom + + +def detector_postprocess(results, output_height, output_width, mask_threshold=0.5): + """ + Resize the output instances. + The input images are often resized when entering an object detector. + As a result, we often need the outputs of the detector in a different + resolution from its inputs. + + This function will resize the raw outputs of an R-CNN detector + to produce outputs according to the desired output resolution. + + Args: + results (Instances): the raw outputs from the detector. + `results.image_size` contains the input image resolution the detector sees. + This object might be modified in-place. + output_height, output_width: the desired output resolution. + + Returns: + Instances: the resized output from the model, based on the output resolution + """ + + # Converts integer tensors to float temporaries + # to ensure true division is performed when + # computing scale_x and scale_y. + if isinstance(output_width, torch.Tensor): + output_width_tmp = output_width.float() + else: + output_width_tmp = output_width + + if isinstance(output_height, torch.Tensor): + output_height_tmp = output_height.float() + else: + output_height_tmp = output_height + + scale_x, scale_y = ( + output_width_tmp / results.image_size[1], + output_height_tmp / results.image_size[0], + ) + results = Instances((output_height, output_width), **results.get_fields()) + + if results.has("pred_boxes"): + output_boxes = results.pred_boxes + elif results.has("proposal_boxes"): + output_boxes = results.proposal_boxes + + output_boxes.scale(scale_x, scale_y) + output_boxes.clip(results.image_size) + + if results.has("pred_masks"): + results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)( + results.pred_masks[:, 0, :, :], # N, 1, M, M + results.pred_boxes, + results.image_size, + threshold=mask_threshold, + ) + + hw_mask = output_boxes.nonempty() + keep_mask = (results.scores != -1.0) & hw_mask + results.scores[~keep_mask] = -1.0 + + if results.has("pred_keypoints"): + results.pred_keypoints[:, :, 0] *= scale_x + results.pred_keypoints[:, :, 1] *= scale_y + + return results + + +def sem_seg_postprocess(result, img_size, output_height, output_width): + """ + Return semantic segmentation predictions in the original resolution. + + The input images are often resized when entering semantic segmentor. Moreover, in same + cases, they also padded inside segmentor to be divisible by maximum network stride. + As a result, we often need the predictions of the segmentor in a different + resolution from its inputs. + + Args: + result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W), + where C is the number of classes, and H, W are the height and width of the prediction. + img_size (tuple): image size that segmentor is taking as input. + output_height, output_width: the desired output resolution. + + Returns: + semantic segmentation prediction (Tensor): A tensor of the shape + (C, output_height, output_width) that contains per-pixel soft predictions. + """ + result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1) + result = F.interpolate( + result, size=(output_height, output_width), mode="bilinear", align_corners=False + )[0] + return result diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3e50daf7cc8dc8d07f94e139b95e99c60804ee26 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator +from .rpn import RPN_HEAD_REGISTRY, build_rpn_head, RPN + +__all__ = list(globals().keys()) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/build.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/build.py new file mode 100644 index 0000000000000000000000000000000000000000..0648f70ace8b150db36a4519d092554ed5a21977 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/build.py @@ -0,0 +1,37 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from detectron2.utils.registry import Registry + +PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR") +PROPOSAL_GENERATOR_REGISTRY.__doc__ = """ +Registry for proposal generator, which produces object proposals from feature maps. + +The registered object will be called with `obj(cfg, input_shape)`. +The call should return a `nn.Module` object. +""" + +from . import rpn, rrpn # noqa F401 isort:skip + + +def build_proposal_generator(cfg, input_shape): + """ + Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`. + The name can be "PrecomputedProposals" to use no proposal generator. + """ + name = cfg.MODEL.PROPOSAL_GENERATOR.NAME + if name == "PrecomputedProposals": + return None + + return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/proposal_utils.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/proposal_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..693c50eaff2643ad7d9e5527b66b2173ec64b81f --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/proposal_utils.py @@ -0,0 +1,174 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import math +from typing import List, Tuple +import torch + +from detectron2.layers import batched_nms, cat +from detectron2.structures import Boxes, Instances + +logger = logging.getLogger(__name__) + + +def find_top_rpn_proposals( + proposals: List[torch.Tensor], + pred_objectness_logits: List[torch.Tensor], + image_sizes: List[Tuple[int, int]], + nms_thresh: float, + pre_nms_topk: int, + post_nms_topk: int, + min_box_size: float, + training: bool, +): + """ + For each feature map, select the `pre_nms_topk` highest scoring proposals, + apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` + highest scoring proposals among all the feature maps for each image. + + Args: + proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4). + All proposal predictions on the feature maps. + pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). + image_sizes (list[tuple]): sizes (h, w) for each image + nms_thresh (float): IoU threshold to use for NMS + pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. + When RPN is run on multiple feature maps (as in FPN) this number is per + feature map. + post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. + When RPN is run on multiple feature maps (as in FPN) this number is total, + over all feature maps. + min_box_size (float): minimum proposal box side length in pixels (absolute units + wrt input images). + training (bool): True if proposals are to be used in training, otherwise False. + This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." + comment. + + Returns: + list[Instances]: list of N Instances. The i-th Instances + stores post_nms_topk object proposals for image i, sorted by their + objectness score in descending order. + """ + num_images = len(image_sizes) + device = proposals[0].device + topk_scores_list = [] # #lvl Tensor, each of shape N x topk + topk_proposals_list = [] + level_ids_list = [] # #lvl Tensor, each of shape (topk,) + batch_idx = torch.arange(num_images, device=device) + for level_id, (proposals_i, logits_i) in enumerate(zip(proposals, pred_objectness_logits)): + Hi_Wi_A = logits_i.shape[1] + num_proposals_i = min(pre_nms_topk, Hi_Wi_A) + topk_scores_i, topk_idx = torch.topk(logits_i, num_proposals_i, dim=1) + topk_proposals_i = proposals_i[batch_idx[:, None].long(), + topk_idx.long()] # N x topk x 4 + topk_proposals_list.append(topk_proposals_i) + topk_scores_list.append(topk_scores_i) + level_ids_list.append( + torch.full((num_proposals_i,), level_id, + dtype=torch.int32, device=device)) + + results: List[Instances] = [] + for n, image_size in enumerate(image_sizes): + level_keep_list = [] + level_boxes_list = [] + level_scores_per_img = [] + + for level in range(len(topk_proposals_list)): + topk_proposals = topk_proposals_list[level] + topk_scores = topk_scores_list[level] + level_ids = level_ids_list[level] + + boxes = Boxes(topk_proposals[n]) + scores_per_img = topk_scores[n] + lvl = level_ids + + if not training: + valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img.float()) + if not valid_mask.all(): + boxes = boxes[valid_mask] + scores_per_img = scores_per_img[valid_mask] + lvl = lvl[valid_mask] + + if scores_per_img.dtype != torch.float32: + scores_per_img = scores_per_img.to(torch.float32) + + keep_mask = batched_nms(boxes.tensor, + scores_per_img, lvl, nms_thresh) + + level_keep_list.append(keep_mask) + level_boxes_list.append(boxes) + level_scores_per_img.append(scores_per_img) + + keep_mask = cat(level_keep_list, dim=0) + boxes = Boxes.cat(level_boxes_list) + scores_per_img = cat(level_scores_per_img, dim=0) + scores_per_img = scores_per_img * keep_mask.float() + + topk_scores_i, indice = torch.topk(scores_per_img, post_nms_topk) + + res = Instances(image_size) + res.proposal_boxes = boxes[indice.long()] + res.objectness_logits = topk_scores_i + results.append(res) + return results + +def add_ground_truth_to_proposals(gt_boxes, proposals): + """ + Call `add_ground_truth_to_proposals_single_image` for all images. + + Args: + gt_boxes(list[Boxes]): list of N elements. Element i is a Boxes + representing the gound-truth for image i. + proposals (list[Instances]): list of N elements. Element i is a Instances + representing the proposals for image i. + + Returns: + list[Instances]: list of N Instances. Each is the proposals for the image, + with field "proposal_boxes" and "objectness_logits". + """ + assert gt_boxes is not None + + assert len(proposals) == len(gt_boxes) + if len(proposals) == 0: + return proposals + + return [ + add_ground_truth_to_proposals_single_image(gt_boxes_i, proposals_i) + for gt_boxes_i, proposals_i in zip(gt_boxes, proposals) + ] + + +def add_ground_truth_to_proposals_single_image(gt_boxes, proposals): + """ + Augment `proposals` with ground-truth boxes from `gt_boxes`. + + Args: + Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals + per image. + + Returns: + Same as `add_ground_truth_to_proposals`, but for only one image. + """ + device = proposals.objectness_logits.device + gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10))) + gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device) + + # Concatenating gt_boxes with proposals requires them to have the same fields + gt_proposal = Instances(proposals.image_size) + gt_proposal.proposal_boxes = gt_boxes + gt_proposal.objectness_logits = gt_logits + new_proposals = Instances.cat([proposals, gt_proposal]) + + return new_proposals diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/rpn.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/rpn.py new file mode 100644 index 0000000000000000000000000000000000000000..cbb097537550077a9af168f796e5aab1323f7431 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/rpn.py @@ -0,0 +1,554 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict, List, Optional, Tuple, Union +import torch +import torch.nn.functional as F +from fvcore.nn import giou_loss, smooth_l1_loss +from torch import nn + +from detectron2.config import configurable +from detectron2.layers import ShapeSpec, cat +from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou +from detectron2.utils.events import get_event_storage +from detectron2.utils.memory import retry_if_cuda_oom +from detectron2.utils.registry import Registry + +from ..anchor_generator import build_anchor_generator +from ..box_regression import Box2BoxTransform +from ..matcher import Matcher +from ..sampling import subsample_labels +from .build import PROPOSAL_GENERATOR_REGISTRY +from .proposal_utils import find_top_rpn_proposals + +RPN_HEAD_REGISTRY = Registry("RPN_HEAD") +RPN_HEAD_REGISTRY.__doc__ = """ +Registry for RPN heads, which take feature maps and perform +objectness classification and bounding box regression for anchors. + +The registered object will be called with `obj(cfg, input_shape)`. +The call should return a `nn.Module` object. +""" + + +""" +Shape shorthand in this module: + + N: number of images in the minibatch + L: number of feature maps per image on which RPN is run + A: number of cell anchors (must be the same for all feature maps) + Hi, Wi: height and width of the i-th feature map + B: size of the box parameterization + +Naming convention: + + objectness: refers to the binary classification of an anchor as object vs. not object. + + deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box + transform (see :class:`box_regression.Box2BoxTransform`), or 5d for rotated boxes. + + pred_objectness_logits: predicted objectness scores in [-inf, +inf]; use + sigmoid(pred_objectness_logits) to estimate P(object). + + gt_labels: ground-truth binary classification labels for objectness + + pred_anchor_deltas: predicted box2box transform deltas + + gt_anchor_deltas: ground-truth box2box transform deltas +""" + + +def build_rpn_head(cfg, input_shape): + """ + Build an RPN head defined by `cfg.MODEL.RPN.HEAD_NAME`. + """ + name = cfg.MODEL.RPN.HEAD_NAME + return RPN_HEAD_REGISTRY.get(name)(cfg, input_shape) + + +@RPN_HEAD_REGISTRY.register() +class StandardRPNHead(nn.Module): + """ + Standard RPN classification and regression heads described in :paper:`Faster R-CNN`. + Uses a 3x3 conv to produce a shared hidden state from which one 1x1 conv predicts + objectness logits for each anchor and a second 1x1 conv predicts bounding-box deltas + specifying how to deform each anchor into an object proposal. + """ + + @configurable + def __init__(self, *, in_channels: int, num_anchors: int, box_dim: int = 4): + """ + NOTE: this interface is experimental. + + Args: + in_channels (int): number of input feature channels. When using multiple + input features, they must have the same number of channels. + num_anchors (int): number of anchors to predict for *each spatial position* + on the feature map. The total number of anchors for each + feature map will be `num_anchors * H * W`. + box_dim (int): dimension of a box, which is also the number of box regression + predictions to make for each anchor. An axis aligned box has + box_dim=4, while a rotated box has box_dim=5. + """ + super().__init__() + # 3x3 conv for the hidden representation + self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) + # 1x1 conv for predicting objectness logits + self.objectness_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1) + # 1x1 conv for predicting box2box transform deltas + self.anchor_deltas = nn.Conv2d(in_channels, num_anchors * box_dim, kernel_size=1, stride=1) + + for l in [self.conv, self.objectness_logits, self.anchor_deltas]: + nn.init.normal_(l.weight, std=0.01) + nn.init.constant_(l.bias, 0) + + @classmethod + def from_config(cls, cfg, input_shape): + # Standard RPN is shared across levels: + in_channels = [s.channels for s in input_shape] + assert len(set(in_channels)) == 1, "Each level must have the same channel!" + in_channels = in_channels[0] + + # RPNHead should take the same input as anchor generator + # NOTE: it assumes that creating an anchor generator does not have unwanted side effect. + anchor_generator = build_anchor_generator(cfg, input_shape) + num_anchors = anchor_generator.num_anchors + box_dim = anchor_generator.box_dim + assert ( + len(set(num_anchors)) == 1 + ), "Each level must have the same number of anchors per spatial position" + return {"in_channels": in_channels, "num_anchors": num_anchors[0], "box_dim": box_dim} + + def forward(self, features: List[torch.Tensor]): + """ + Args: + features (list[Tensor]): list of feature maps + + Returns: + list[Tensor]: A list of L elements. + Element i is a tensor of shape (N, A, Hi, Wi) representing + the predicted objectness logits for all anchors. A is the number of cell anchors. + list[Tensor]: A list of L elements. Element i is a tensor of shape + (N, A*box_dim, Hi, Wi) representing the predicted "deltas" used to transform anchors + to proposals. + """ + pred_objectness_logits = [] + pred_anchor_deltas = [] + for x in features: + t = F.relu(self.conv(x)) + pred_objectness_logits.append(self.objectness_logits(t)) + pred_anchor_deltas.append(self.anchor_deltas(t)) + return pred_objectness_logits, pred_anchor_deltas + + +@PROPOSAL_GENERATOR_REGISTRY.register() +class RPN(nn.Module): + """ + Region Proposal Network, introduced by :paper:`Faster R-CNN`. + """ + + @configurable + def __init__( + self, + *, + in_features: List[str], + head: nn.Module, + anchor_generator: nn.Module, + anchor_matcher: Matcher, + box2box_transform: Box2BoxTransform, + batch_size_per_image: int, + positive_fraction: float, + pre_nms_topk: Tuple[float, float], + post_nms_topk: Tuple[float, float], + nms_thresh: float = 0.7, + min_box_size: float = 0.0, + anchor_boundary_thresh: float = -1.0, + loss_weight: Union[float, Dict[str, float]] = 1.0, + box_reg_loss_type: str = "smooth_l1", + smooth_l1_beta: float = 0.0, + amp=0, + opt_level=None, + fix_shape=None + ): + """ + NOTE: this interface is experimental. + + Args: + in_features (list[str]): list of names of input features to use + head (nn.Module): a module that predicts logits and regression deltas + for each level from a list of per-level features + anchor_generator (nn.Module): a module that creates anchors from a + list of features. Usually an instance of :class:`AnchorGenerator` + anchor_matcher (Matcher): label the anchors by matching them with ground truth. + box2box_transform (Box2BoxTransform): defines the transform from anchors boxes to + instance boxes + batch_size_per_image (int): number of anchors per image to sample for training + positive_fraction (float): fraction of foreground anchors to sample for training + pre_nms_topk (tuple[float]): (train, test) that represents the + number of top k proposals to select before NMS, in + training and testing. + post_nms_topk (tuple[float]): (train, test) that represents the + number of top k proposals to select after NMS, in + training and testing. + nms_thresh (float): NMS threshold used to de-duplicate the predicted proposals + min_box_size (float): remove proposal boxes with any side smaller than this threshold, + in the unit of input image pixels + anchor_boundary_thresh (float): legacy option + loss_weight (float|dict): weights to use for losses. Can be single float for weighting + all rpn losses together, or a dict of individual weightings. Valid dict keys are: + "loss_rpn_cls" - applied to classification loss + "loss_rpn_loc" - applied to box regression loss + box_reg_loss_type (str): Loss type to use. Supported losses: "smooth_l1", "giou". + smooth_l1_beta (float): beta parameter for the smooth L1 regression loss. Default to + use L1 loss. Only used when `box_reg_loss_type` is "smooth_l1" + """ + super().__init__() + self.in_features = in_features + self.rpn_head = head + self.anchor_generator = anchor_generator + self.anchor_matcher = anchor_matcher + self.box2box_transform = box2box_transform + self.batch_size_per_image = batch_size_per_image + self.positive_fraction = positive_fraction + # Map from self.training state to train/test settings + self.pre_nms_topk = {True: pre_nms_topk[0], False: pre_nms_topk[1]} + self.post_nms_topk = {True: post_nms_topk[0], False: post_nms_topk[1]} + self.nms_thresh = nms_thresh + self.min_box_size = float(min_box_size) + self.anchor_boundary_thresh = anchor_boundary_thresh + if isinstance(loss_weight, float): + loss_weight = {"loss_rpn_cls": loss_weight, "loss_rpn_loc": loss_weight} + self.loss_weight = loss_weight + self.box_reg_loss_type = box_reg_loss_type + self.smooth_l1_beta = smooth_l1_beta + + self.static_shape = True + self.anchor_gened = False + self.amp = amp + self.opt_level = opt_level + self.fix_shape = fix_shape + + @classmethod + def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): + in_features = cfg.MODEL.RPN.IN_FEATURES + ret = { + "in_features": in_features, + "min_box_size": cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE, + "nms_thresh": cfg.MODEL.RPN.NMS_THRESH, + "batch_size_per_image": cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE, + "positive_fraction": cfg.MODEL.RPN.POSITIVE_FRACTION, + "loss_weight": { + "loss_rpn_cls": cfg.MODEL.RPN.LOSS_WEIGHT, + "loss_rpn_loc": cfg.MODEL.RPN.BBOX_REG_LOSS_WEIGHT * cfg.MODEL.RPN.LOSS_WEIGHT, + }, + "anchor_boundary_thresh": cfg.MODEL.RPN.BOUNDARY_THRESH, + "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS), + "box_reg_loss_type": cfg.MODEL.RPN.BBOX_REG_LOSS_TYPE, + "smooth_l1_beta": cfg.MODEL.RPN.SMOOTH_L1_BETA, + } + + ret["pre_nms_topk"] = (cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN, cfg.MODEL.RPN.PRE_NMS_TOPK_TEST) + ret["post_nms_topk"] = (cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN, cfg.MODEL.RPN.POST_NMS_TOPK_TEST) + + ret["anchor_generator"] = build_anchor_generator(cfg, [input_shape[f] for f in in_features]) + ret["anchor_matcher"] = Matcher( + cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, + allow_low_quality_matches=True) + ret["head"] = build_rpn_head(cfg, [input_shape[f] for f in in_features]) + ret["amp"] = cfg.AMP + ret["opt_level"] = cfg.OPT_LEVEL + ret["fix_shape"] = cfg.INPUT.FIX_SHAPE + return ret + + def _subsample_labels(self, label): + """ + Randomly sample a subset of positive and negative examples, and overwrite + the label vector to the ignore value (-1) for all elements that are not + included in the sample. + + Args: + labels (Tensor): a vector of -1, 0, 1. Will be modified in-place and returned. + """ + pos_idx, neg_idx = subsample_labels( + label, self.batch_size_per_image, self.positive_fraction, 0 + ) + # Fill with the ignore label (-1), then set positive and negative labels + label.fill_(-1) + label[pos_idx.long()] = 1 + label[neg_idx.long()] = 0 + return label + + @torch.jit.unused + @torch.no_grad() + def label_and_sample_anchors( + self, anchors: List[Boxes], gt_instances: List[Instances] + ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: + """ + Args: + anchors (list[Boxes]): anchors for each feature map. + gt_instances: the ground-truth instances for each image. + + Returns: + list[Tensor]: + List of #img tensors. i-th element is a vector of labels whose length is + the total number of anchors across all feature maps R = sum(Hi * Wi * A). + Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative + class; 1 = positive class. + list[Tensor]: + i-th element is a Rx4 tensor. The values are the matched gt boxes for each + anchor. Values are undefined for those anchors not labeled as 1. + """ + anchors = Boxes.cat(anchors) + + gt_boxes = [x.gt_boxes for x in gt_instances] + image_sizes = [x.image_size for x in gt_instances] + del gt_instances + + gt_labels = [] + matched_gt_boxes = [] + for image_size_i, gt_boxes_i in zip(image_sizes, gt_boxes): + """ + image_size_i: (h, w) for the i-th image + gt_boxes_i: ground-truth boxes for i-th image + """ + + match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, anchors) + matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix) + # Matching is memory-expensive and may result in CPU tensors. But the result is small + gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device) + del match_quality_matrix + + if self.anchor_boundary_thresh >= 0: + # Discard anchors that go out of the boundaries of the image + # NOTE: This is legacy functionality that is turned off by default in Detectron2 + anchors_inside_image = anchors.inside_box(image_size_i, self.anchor_boundary_thresh) + gt_labels_i[~anchors_inside_image] = -1 + + # A vector of labels (-1, 0, 1) for each anchor + gt_labels_i = self._subsample_labels(gt_labels_i) + + if len(gt_boxes_i) == 0: + # These values won't be used anyway since the anchor is labeled as background + matched_gt_boxes_i = torch.zeros_like(anchors.tensor) + else: + # TODO wasted indexing computation for ignored boxes + matched_gt_boxes_i = torch.index_select( + gt_boxes_i.tensor, 0, matched_idxs.long()) + + gt_labels.append(gt_labels_i) # N,AHW + matched_gt_boxes.append(matched_gt_boxes_i) + return gt_labels, matched_gt_boxes + + @torch.jit.unused + def losses( + self, + anchors: List[Boxes], + pred_objectness_logits: List[torch.Tensor], + gt_labels: List[torch.Tensor], + pred_anchor_deltas: List[torch.Tensor], + gt_boxes: List[torch.Tensor], + ) -> Dict[str, torch.Tensor]: + """ + Return the losses from a set of RPN predictions and their associated ground-truth. + + Args: + anchors (list[Boxes or RotatedBoxes]): anchors for each feature map, each + has shape (Hi*Wi*A, B), where B is box dimension (4 or 5). + pred_objectness_logits (list[Tensor]): A list of L elements. + Element i is a tensor of shape (N, Hi*Wi*A) representing + the predicted objectness logits for all anchors. + gt_labels (list[Tensor]): Output of :meth:`label_and_sample_anchors`. + pred_anchor_deltas (list[Tensor]): A list of L elements. Element i is a tensor of shape + (N, Hi*Wi*A, 4 or 5) representing the predicted "deltas" used to transform anchors + to proposals. + gt_boxes (list[Tensor]): Output of :meth:`label_and_sample_anchors`. + + Returns: + dict[loss name -> loss value]: A dict mapping from loss name to loss value. + Loss names are: `loss_rpn_cls` for objectness classification and + `loss_rpn_loc` for proposal localization. + """ + num_images = len(gt_labels) + gt_labels = torch.stack(gt_labels) # (N, sum(Hi*Wi*Ai)) + + # Log the number of positive/negative anchors per-image that's used in training + pos_mask = gt_labels == 1 + num_pos_anchors = pos_mask.sum().item() + num_neg_anchors = (gt_labels == 0).sum().item() + storage = get_event_storage() + storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / num_images) + storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / num_images) + + if self.box_reg_loss_type == "smooth_l1": + anchors = type(anchors[0]).cat(anchors).tensor # Ax(4 or 5) + gt_anchor_deltas = [torch.npu_bounding_box_encode + (anchors, k, 0, 0, 0, 0, 1, 1, 1, 1) + for k in gt_boxes] + gt_anchor_deltas = torch.stack( + gt_anchor_deltas) # (N, sum(Hi*Wi*Ai), 4 or 5) + if self.amp and (self.opt_level == "O1" or self.opt_level == "O2"): + input = cat(pred_anchor_deltas, dim=1) * \ + pos_mask[:, :, None].half() + target = gt_anchor_deltas.half() * \ + pos_mask[:, :, None].half() + else: + input = cat(pred_anchor_deltas, dim=1) * \ + pos_mask[:, :, None].float() + target = gt_anchor_deltas * pos_mask[:, :, None].float() + localization_loss = smooth_l1_loss( + input, target, self.smooth_l1_beta, reduction="sum",) + + elif self.box_reg_loss_type == "giou": + pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas) + pred_proposals = cat(pred_proposals, dim=1) + pred_proposals = pred_proposals.view(-1, pred_proposals.shape[-1]) + pos_mask = pos_mask.view(-1) + localization_loss = giou_loss( + pred_proposals[pos_mask], cat(gt_boxes)[pos_mask], reduction="sum" + ) + else: + raise ValueError(f"Invalid rpn box reg loss type '{self.box_reg_loss_type}'") + + valid_mask = gt_labels >= 0 # already fixed, the num be RPN.BATCH_SIZE_PER_IMAGE(256)*num_images + objectness_loss = F.binary_cross_entropy_with_logits( + cat(pred_objectness_logits, dim=1).float(), + gt_labels.float(), + reduction="none", + ) + objectness_loss = (objectness_loss * valid_mask.float()).sum() + normalizer = self.batch_size_per_image * num_images + losses = { + "loss_rpn_cls": objectness_loss / normalizer, + "loss_rpn_loc": localization_loss / normalizer, + } + losses = {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()} + return losses + + def forward( + self, + images: ImageList, + features: Dict[str, torch.Tensor], + gt_instances: Optional[List[Instances]] = None, + ): + """ + Args: + images (ImageList): input images of length `N` + features (dict[str, Tensor]): input data as a mapping from feature + map name to tensor. Axis 0 represents the number of images `N` in + the input data; axes 1-3 are channels, height, and width, which may + vary between feature maps (e.g., if a feature pyramid is used). + gt_instances (list[Instances], optional): a length `N` list of `Instances`s. + Each `Instances` stores ground-truth instances for the corresponding image. + + Returns: + proposals: list[Instances]: contains fields "proposal_boxes", "objectness_logits" + loss: dict[Tensor] or None + """ + features = [features[f] for f in self.in_features] + if self.static_shape: + if self.anchor_gened: + anchors = self.anchors + else: + anchors = self.anchor_generator(features) + self.anchors = anchors + self.anchor_gened = True + else: + anchors = self.anchor_generator(features) + + pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features) + # Transpose the Hi*Wi*A dimension to the middle: + pred_objectness_logits = [ + # (N, A, Hi, Wi) -> (N, Hi, Wi, A) -> (N, Hi*Wi*A) + score.npu_format_cast(0).permute(0, 2, 3, 1).flatten(1) + for score in pred_objectness_logits + ] + pred_anchor_deltas = [ + # (N, A*B, Hi, Wi) -> (N, A, B, Hi, Wi) -> (N, Hi, Wi, A, B) -> (N, Hi*Wi*A, B) + x.npu_format_cast(0).view(x.shape[0], -1, self.anchor_generator.box_dim, x.shape[-2], x.shape[-1]) + .permute(0, 3, 4, 1, 2) + .flatten(1, -2) + for x in pred_anchor_deltas + ] + + if self.training: + assert gt_instances is not None, "RPN requires gt_instances in training!" + gt_labels, gt_boxes = self.label_and_sample_anchors(anchors, gt_instances) + losses = self.losses( + anchors, pred_objectness_logits, gt_labels, pred_anchor_deltas, gt_boxes + ) + else: + losses = {} + proposals = self.predict_proposals( + anchors, pred_objectness_logits, pred_anchor_deltas, images.image_sizes + ) + return proposals, losses + + # TODO: use torch.no_grad when torchscript supports it. + # https://github.com/pytorch/pytorch/pull/41371 + def predict_proposals( + self, + anchors: List[Boxes], + pred_objectness_logits: List[torch.Tensor], + pred_anchor_deltas: List[torch.Tensor], + image_sizes: List[Tuple[int, int]], + ): + """ + Decode all the predicted box regression deltas to proposals. Find the top proposals + by applying NMS and removing boxes that are too small. + + Returns: + proposals (list[Instances]): list of N Instances. The i-th Instances + stores post_nms_topk object proposals for image i, sorted by their + objectness score in descending order. + """ + # The proposals are treated as fixed for approximate joint training with roi heads. + # This approach ignores the derivative w.r.t. the proposal boxes’ coordinates that + # are also network responses, so is approximate. + pred_objectness_logits = [t.detach() for t in pred_objectness_logits] + pred_anchor_deltas = [t.detach() for t in pred_anchor_deltas] + pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas) + return find_top_rpn_proposals( + pred_proposals, + pred_objectness_logits, + image_sizes, + self.nms_thresh, + # https://github.com/pytorch/pytorch/issues/41449 + self.pre_nms_topk[int(self.training)], + self.post_nms_topk[int(self.training)], + self.min_box_size, + self.training, + ) + + def _decode_proposals(self, anchors: List[Boxes], pred_anchor_deltas: List[torch.Tensor]): + """ + Transform anchors into proposals by applying the predicted anchor deltas. + + Returns: + proposals (list[Tensor]): A list of L tensors. Tensor i has shape + (N, Hi*Wi*A, B) + """ + N = pred_anchor_deltas[0].shape[0] + proposals = [] + # For each feature map + for anchors_i, pred_anchor_deltas_i in zip(anchors, pred_anchor_deltas): + B = anchors_i.tensor.size(1) + pred_anchor_deltas_i = pred_anchor_deltas_i.reshape(-1, B) + # Expand anchors to shape (N*Hi*Wi*A, B) + anchors_i = anchors_i.tensor.unsqueeze(0).expand(N, -1, -1).reshape(-1, B) + proposals_i = torch.npu_bounding_box_decode( + anchors_i, pred_anchor_deltas_i, 0, 0, 0, 0, 1, 1, 1, 1, + (self.fix_shape[1], self.fix_shape[0]), 16/1000) + # Append feature map proposals with shape (N, Hi*Wi*A, B) + proposals.append(proposals_i.view(N, -1, B)) + return proposals diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/rrpn.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/rrpn.py new file mode 100644 index 0000000000000000000000000000000000000000..6a64931031311450eff98f031ee599268eada623 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/proposal_generator/rrpn.py @@ -0,0 +1,209 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +import logging +from typing import Dict, List +import torch + +from detectron2.layers import ShapeSpec, batched_nms_rotated, cat +from detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated +from detectron2.utils.memory import retry_if_cuda_oom + +from ..box_regression import Box2BoxTransformRotated +from .build import PROPOSAL_GENERATOR_REGISTRY +from .rpn import RPN + +logger = logging.getLogger(__name__) + + +def find_top_rrpn_proposals( + proposals, + pred_objectness_logits, + image_sizes, + nms_thresh, + pre_nms_topk, + post_nms_topk, + min_box_size, + training, +): + """ + For each feature map, select the `pre_nms_topk` highest scoring proposals, + apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` + highest scoring proposals among all the feature maps if `training` is True, + otherwise, returns the highest `post_nms_topk` scoring proposals for each + feature map. + + Args: + proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 5). + All proposal predictions on the feature maps. + pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). + image_sizes (list[tuple]): sizes (h, w) for each image + nms_thresh (float): IoU threshold to use for NMS + pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. + When RRPN is run on multiple feature maps (as in FPN) this number is per + feature map. + post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. + When RRPN is run on multiple feature maps (as in FPN) this number is total, + over all feature maps. + min_box_size(float): minimum proposal box side length in pixels (absolute units wrt + input images). + training (bool): True if proposals are to be used in training, otherwise False. + This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." + comment. + + Returns: + proposals (list[Instances]): list of N Instances. The i-th Instances + stores post_nms_topk object proposals for image i. + """ + num_images = len(image_sizes) + device = proposals[0].device + + # 1. Select top-k anchor for every level and every image + topk_scores = [] # #lvl Tensor, each of shape N x topk + topk_proposals = [] + level_ids = [] # #lvl Tensor, each of shape (topk,) + batch_idx = torch.arange(num_images, device=device) + for level_id, proposals_i, logits_i in zip( + itertools.count(), proposals, pred_objectness_logits + ): + Hi_Wi_A = logits_i.shape[1] + num_proposals_i = min(pre_nms_topk, Hi_Wi_A) + + # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812) + # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) + logits_i, idx = logits_i.sort(descending=True, dim=1) + topk_scores_i = logits_i[batch_idx, :num_proposals_i] + topk_idx = idx[batch_idx, :num_proposals_i] + + # each is N x topk + topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 5 + + topk_proposals.append(topk_proposals_i) + topk_scores.append(topk_scores_i) + level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device)) + + # 2. Concat all levels together + topk_scores = cat(topk_scores, dim=1) + topk_proposals = cat(topk_proposals, dim=1) + level_ids = cat(level_ids, dim=0) + + # 3. For each image, run a per-level NMS, and choose topk results. + results = [] + for n, image_size in enumerate(image_sizes): + boxes = RotatedBoxes(topk_proposals[n]) + scores_per_img = topk_scores[n] + valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img) + if not valid_mask.all(): + boxes = boxes[valid_mask] + scores_per_img = scores_per_img[valid_mask] + boxes.clip(image_size) + + # filter empty boxes + keep = boxes.nonempty(threshold=min_box_size) + lvl = level_ids + if keep.sum().item() != len(boxes): + boxes, scores_per_img, lvl = (boxes[keep], scores_per_img[keep], level_ids[keep]) + + keep = batched_nms_rotated(boxes.tensor, scores_per_img, lvl, nms_thresh) + # In Detectron1, there was different behavior during training vs. testing. + # (https://github.com/facebookresearch/Detectron/issues/459) + # During training, topk is over the proposals from *all* images in the training batch. + # During testing, it is over the proposals for each image separately. + # As a result, the training behavior becomes batch-dependent, + # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size. + # This bug is addressed in Detectron2 to make the behavior independent of batch size. + keep = keep[:post_nms_topk] + + res = Instances(image_size) + res.proposal_boxes = boxes[keep] + res.objectness_logits = scores_per_img[keep] + results.append(res) + return results + + +@PROPOSAL_GENERATOR_REGISTRY.register() +class RRPN(RPN): + """ + Rotated Region Proposal Network described in :paper:`RRPN`. + """ + + def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): + box2box_transform = Box2BoxTransformRotated(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) + super().__init__(cfg, input_shape, box2box_transform=box2box_transform) + if self.anchor_boundary_thresh >= 0: + raise NotImplementedError( + "anchor_boundary_thresh is a legacy option not implemented for RRPN." + ) + + @torch.no_grad() + def label_and_sample_anchors(self, anchors: List[RotatedBoxes], gt_instances: List[Instances]): + """ + Args: + anchors (list[RotatedBoxes]): anchors for each feature map. + gt_instances: the ground-truth instances for each image. + + Returns: + list[Tensor]: + List of #img tensors. i-th element is a vector of labels whose length is + the total number of anchors across feature maps. Label values are in {-1, 0, 1}, + with meanings: -1 = ignore; 0 = negative class; 1 = positive class. + list[Tensor]: + i-th element is a Nx5 tensor, where N is the total number of anchors across + feature maps. The values are the matched gt boxes for each anchor. + Values are undefined for those anchors not labeled as 1. + """ + anchors = RotatedBoxes.cat(anchors) + + gt_boxes = [x.gt_boxes for x in gt_instances] + del gt_instances + + gt_labels = [] + matched_gt_boxes = [] + for gt_boxes_i in gt_boxes: + """ + gt_boxes_i: ground-truth boxes for i-th image + """ + match_quality_matrix = retry_if_cuda_oom(pairwise_iou_rotated)(gt_boxes_i, anchors) + matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix) + # Matching is memory-expensive and may result in CPU tensors. But the result is small + gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device) + + # A vector of labels (-1, 0, 1) for each anchor + gt_labels_i = self._subsample_labels(gt_labels_i) + + if len(gt_boxes_i) == 0: + # These values won't be used anyway since the anchor is labeled as background + matched_gt_boxes_i = torch.zeros_like(anchors.tensor) + else: + # TODO wasted indexing computation for ignored boxes + matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor + + gt_labels.append(gt_labels_i) # N,AHW + matched_gt_boxes.append(matched_gt_boxes_i) + return gt_labels, matched_gt_boxes + + @torch.no_grad() + def predict_proposals(self, anchors, pred_objectness_logits, pred_anchor_deltas, image_sizes): + pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas) + return find_top_rrpn_proposals( + pred_proposals, + pred_objectness_logits, + image_sizes, + self.nms_thresh, + self.pre_nms_topk[self.training], + self.post_nms_topk[self.training], + self.min_box_size, + self.training, + ) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..182415045bcdf109bb2242cfd11e481d44b81a4a --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/__init__.py @@ -0,0 +1,31 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head +from .keypoint_head import ROI_KEYPOINT_HEAD_REGISTRY, build_keypoint_head, BaseKeypointRCNNHead +from .mask_head import ROI_MASK_HEAD_REGISTRY, build_mask_head, BaseMaskRCNNHead +from .roi_heads import ( + ROI_HEADS_REGISTRY, + ROIHeads, + Res5ROIHeads, + StandardROIHeads, + build_roi_heads, + select_foreground_proposals, +) +from .rotated_fast_rcnn import RROIHeads +from .fast_rcnn import FastRCNNOutputLayers + +from . import cascade_rcnn # isort:skip + +__all__ = list(globals().keys()) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/box_head.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/box_head.py new file mode 100644 index 0000000000000000000000000000000000000000..ffcd75f06bc22afa15bb49de0e2cb9d0dd2f3a79 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/box_head.py @@ -0,0 +1,130 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +from typing import List +import fvcore.nn.weight_init as weight_init +import torch +from torch import nn +from torch.nn import functional as F + +from detectron2.config import configurable +from detectron2.layers import Conv2d, Linear, ShapeSpec, get_norm +from detectron2.utils.registry import Registry + +__all__ = ["FastRCNNConvFCHead", "build_box_head", "ROI_BOX_HEAD_REGISTRY"] + +ROI_BOX_HEAD_REGISTRY = Registry("ROI_BOX_HEAD") +ROI_BOX_HEAD_REGISTRY.__doc__ = """ +Registry for box heads, which make box predictions from per-region features. + +The registered object will be called with `obj(cfg, input_shape)`. +""" + + +@ROI_BOX_HEAD_REGISTRY.register() +class FastRCNNConvFCHead(nn.Module): + """ + A head with several 3x3 conv layers (each followed by norm & relu) and then + several fc layers (each followed by relu). + """ + + @configurable + def __init__( + self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm="" + ): + """ + NOTE: this interface is experimental. + + Args: + input_shape (ShapeSpec): shape of the input feature. + conv_dims (list[int]): the output dimensions of the conv layers + fc_dims (list[int]): the output dimensions of the fc layers + conv_norm (str or callable): normalization for the conv layers. + See :func:`detectron2.layers.get_norm` for supported types. + """ + super().__init__() + assert len(conv_dims) + len(fc_dims) > 0 + + self._output_size = (input_shape.channels, input_shape.height, input_shape.width) + + self.conv_norm_relus = [] + for k, conv_dim in enumerate(conv_dims): + conv = Conv2d( + self._output_size[0], + conv_dim, + kernel_size=3, + padding=1, + bias=not conv_norm, + norm=get_norm(conv_norm, conv_dim), + activation=F.relu, + ) + self.add_module("conv{}".format(k + 1), conv) + self.conv_norm_relus.append(conv) + self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) + + self.fcs = [] + for k, fc_dim in enumerate(fc_dims): + fc = Linear(np.prod(self._output_size), fc_dim) + self.add_module("fc{}".format(k + 1), fc) + self.fcs.append(fc) + self._output_size = fc_dim + + for layer in self.conv_norm_relus: + weight_init.c2_msra_fill(layer) + for layer in self.fcs: + weight_init.c2_xavier_fill(layer) + + @classmethod + def from_config(cls, cfg, input_shape): + num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV + conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM + num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC + fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM + return { + "input_shape": input_shape, + "conv_dims": [conv_dim] * num_conv, + "fc_dims": [fc_dim] * num_fc, + "conv_norm": cfg.MODEL.ROI_BOX_HEAD.NORM, + } + + def forward(self, x): + for layer in self.conv_norm_relus: + x = layer(x) + if len(self.fcs): + if x.dim() > 2: + x = torch.flatten(x, start_dim=1) + for layer in self.fcs: + x = F.relu(layer(x)) + return x + + @property + def output_shape(self): + """ + Returns: + ShapeSpec: the output feature shape + """ + o = self._output_size + if isinstance(o, int): + return ShapeSpec(channels=o) + else: + return ShapeSpec(channels=o[0], height=o[1], width=o[2]) + + +def build_box_head(cfg, input_shape): + """ + Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`. + """ + name = cfg.MODEL.ROI_BOX_HEAD.NAME + return ROI_BOX_HEAD_REGISTRY.get(name)(cfg, input_shape) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/cascade_rcnn.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/cascade_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..c6c08b32e73ac4562c96abd73d41706cd60638f7 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/cascade_rcnn.py @@ -0,0 +1,311 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List +import torch +from torch import nn +from torch.autograd.function import Function + +from detectron2.config import configurable +from detectron2.layers import ShapeSpec +from detectron2.structures import Boxes, Instances, pairwise_iou +from detectron2.utils.events import get_event_storage + +from ..box_regression import Box2BoxTransform +from ..matcher import Matcher +from ..poolers import ROIPooler +from .box_head import build_box_head +from .fast_rcnn import FastRCNNOutputLayers, fast_rcnn_inference +from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads + + +class _ScaleGradient(Function): + @staticmethod + def forward(ctx, input, scale): + ctx.scale = scale + return input + + @staticmethod + def backward(ctx, grad_output): + return grad_output * ctx.scale, None + + +@ROI_HEADS_REGISTRY.register() +class CascadeROIHeads(StandardROIHeads): + """ + Implement :paper:`Cascade R-CNN`. + """ + + @configurable + def __init__( + self, + *, + box_in_features: List[str], + box_pooler: ROIPooler, + box_heads: List[nn.Module], + box_predictors: List[nn.Module], + proposal_matchers: List[Matcher], + **kwargs, + ): + """ + NOTE: this interface is experimental. + + Args: + box_pooler (ROIPooler): pooler that extracts region features from given boxes + box_heads (list[nn.Module]): box head for each cascade stage + box_predictors (list[nn.Module]): box predictor for each cascade stage + proposal_matchers (list[Matcher]): matcher with different IoU thresholds to + match boxes with ground truth for each stage. The first matcher matches + RPN proposals with ground truth, the other matchers use boxes predicted + by the previous stage as proposals and match them with ground truth. + """ + assert "proposal_matcher" not in kwargs, ( + "CascadeROIHeads takes 'proposal_matchers=' for each stage instead " + "of one 'proposal_matcher='." + ) + # The first matcher matches RPN proposals with ground truth, done in the base class + kwargs["proposal_matcher"] = proposal_matchers[0] + num_stages = self.num_cascade_stages = len(box_heads) + box_heads = nn.ModuleList(box_heads) + box_predictors = nn.ModuleList(box_predictors) + assert len(box_predictors) == num_stages, f"{len(box_predictors)} != {num_stages}!" + assert len(proposal_matchers) == num_stages, f"{len(proposal_matchers)} != {num_stages}!" + super().__init__( + box_in_features=box_in_features, + box_pooler=box_pooler, + box_head=box_heads, + box_predictor=box_predictors, + **kwargs, + ) + self.proposal_matchers = proposal_matchers + + @classmethod + def from_config(cls, cfg, input_shape): + ret = super().from_config(cfg, input_shape) + ret.pop("proposal_matcher") + return ret + + @classmethod + def _init_box_head(cls, cfg, input_shape): + # fmt: off + in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES + pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION + pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) + sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO + pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE + cascade_bbox_reg_weights = cfg.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS + cascade_ious = cfg.MODEL.ROI_BOX_CASCADE_HEAD.IOUS + assert len(cascade_bbox_reg_weights) == len(cascade_ious) + assert cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, \ + "CascadeROIHeads only support class-agnostic regression now!" + assert cascade_ious[0] == cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS[0] + # fmt: on + + in_channels = [input_shape[f].channels for f in in_features] + # Check all channel counts are equal + assert len(set(in_channels)) == 1, in_channels + in_channels = in_channels[0] + + box_pooler = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + ) + pooled_shape = ShapeSpec( + channels=in_channels, width=pooler_resolution, height=pooler_resolution + ) + + box_heads, box_predictors, proposal_matchers = [], [], [] + for match_iou, bbox_reg_weights in zip(cascade_ious, cascade_bbox_reg_weights): + box_head = build_box_head(cfg, pooled_shape) + box_heads.append(box_head) + box_predictors.append( + FastRCNNOutputLayers( + cfg, + box_head.output_shape, + box2box_transform=Box2BoxTransform(weights=bbox_reg_weights), + ) + ) + proposal_matchers.append(Matcher([match_iou], [0, 1], allow_low_quality_matches=False)) + return { + "box_in_features": in_features, + "box_pooler": box_pooler, + "box_heads": box_heads, + "box_predictors": box_predictors, + "proposal_matchers": proposal_matchers, + } + + def forward(self, images, features, proposals, targets=None): + del images + if self.training: + proposals = self.label_and_sample_proposals(proposals, targets) + + if self.training: + # Need targets to box head + losses = self._forward_box(features, proposals, targets) + losses.update(self._forward_mask(features, proposals)) + losses.update(self._forward_keypoint(features, proposals)) + return proposals, losses + else: + pred_instances = self._forward_box(features, proposals) + pred_instances = self.forward_with_given_boxes(features, pred_instances) + return pred_instances, {} + + def _forward_box(self, features, proposals, targets=None): + """ + Args: + features, targets: the same as in + Same as in :meth:`ROIHeads.forward`. + proposals (list[Instances]): the per-image object proposals with + their matching ground truth. + Each has fields "proposal_boxes", and "objectness_logits", + "gt_classes", "gt_boxes". + """ + features = [features[f] for f in self.box_in_features] + head_outputs = [] # (predictor, predictions, proposals) + prev_pred_boxes = None + image_sizes = [x.image_size for x in proposals] + for k in range(self.num_cascade_stages): + if k > 0: + # The output boxes of the previous stage are used to create the input + # proposals of the next stage. + proposals = self._create_proposals_from_boxes(prev_pred_boxes, image_sizes) + if self.training: + proposals = self._match_and_label_boxes(proposals, k, targets) + predictions = self._run_stage(features, proposals, k) + prev_pred_boxes = self.box_predictor[k].predict_boxes(predictions, proposals) + head_outputs.append((self.box_predictor[k], predictions, proposals)) + + if self.training: + losses = {} + storage = get_event_storage() + for stage, (predictor, predictions, proposals) in enumerate(head_outputs): + with storage.name_scope("stage{}".format(stage)): + stage_losses = predictor.losses(predictions, proposals) + losses.update({k + "_stage{}".format(stage): v for k, v in stage_losses.items()}) + return losses + else: + # Each is a list[Tensor] of length #image. Each tensor is Ri x (K+1) + scores_per_stage = [h[0].predict_probs(h[1], h[2]) for h in head_outputs] + + # Average the scores across heads + scores = [ + sum(list(scores_per_image)) * (1.0 / self.num_cascade_stages) + for scores_per_image in zip(*scores_per_stage) + ] + # Use the boxes of the last head + predictor, predictions, proposals = head_outputs[-1] + boxes = predictor.predict_boxes(predictions, proposals) + pred_instances, _ = fast_rcnn_inference( + boxes, + scores, + image_sizes, + predictor.test_score_thresh, + predictor.test_nms_thresh, + predictor.test_topk_per_image, + ) + return pred_instances + + @torch.no_grad() + def _match_and_label_boxes(self, proposals, stage, targets): + """ + Match proposals with groundtruth using the matcher at the given stage. + Label the proposals as foreground or background based on the match. + + Args: + proposals (list[Instances]): One Instances for each image, with + the field "proposal_boxes". + stage (int): the current stage + targets (list[Instances]): the ground truth instances + + Returns: + list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes" + """ + num_fg_samples, num_bg_samples = [], [] + for proposals_per_image, targets_per_image in zip(proposals, targets): + match_quality_matrix = pairwise_iou( + targets_per_image.gt_boxes, proposals_per_image.proposal_boxes + ) + # proposal_labels are 0 or 1 + matched_idxs, proposal_labels = self.proposal_matchers[stage](match_quality_matrix) + if len(targets_per_image) > 0: + gt_classes = targets_per_image.gt_classes[matched_idxs] + # Label unmatched proposals (0 label from matcher) as background (label=num_classes) + gt_classes[proposal_labels == 0] = self.num_classes + gt_boxes = targets_per_image.gt_boxes[matched_idxs] + else: + gt_classes = torch.zeros_like(matched_idxs) + self.num_classes + gt_boxes = Boxes( + targets_per_image.gt_boxes.tensor.new_zeros((len(proposals_per_image), 4)) + ) + proposals_per_image.gt_classes = gt_classes + proposals_per_image.gt_boxes = gt_boxes + + num_fg_samples.append((proposal_labels == 1).sum().item()) + num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1]) + + # Log the number of fg/bg samples in each stage + storage = get_event_storage() + storage.put_scalar( + "stage{}/roi_head/num_fg_samples".format(stage), + sum(num_fg_samples) / len(num_fg_samples), + ) + storage.put_scalar( + "stage{}/roi_head/num_bg_samples".format(stage), + sum(num_bg_samples) / len(num_bg_samples), + ) + return proposals + + def _run_stage(self, features, proposals, stage): + """ + Args: + features (list[Tensor]): #lvl input features to ROIHeads + proposals (list[Instances]): #image Instances, with the field "proposal_boxes" + stage (int): the current stage + + Returns: + Same output as `FastRCNNOutputLayers.forward()`. + """ + box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals]) + # The original implementation averages the losses among heads, + # but scale up the parameter gradients of the heads. + # This is equivalent to adding the losses among heads, + # but scale down the gradients on features. + box_features = _ScaleGradient.apply(box_features, 1.0 / self.num_cascade_stages) + box_features = self.box_head[stage](box_features) + return self.box_predictor[stage](box_features) + + def _create_proposals_from_boxes(self, boxes, image_sizes): + """ + Args: + boxes (list[Tensor]): per-image predicted boxes, each of shape Ri x 4 + image_sizes (list[tuple]): list of image shapes in (h, w) + + Returns: + list[Instances]: per-image proposals with the given boxes. + """ + # Just like RPN, the proposals should not have gradients + boxes = [Boxes(b.detach()) for b in boxes] + proposals = [] + for boxes_per_image, image_size in zip(boxes, image_sizes): + boxes_per_image.clip(image_size) + if self.training: + # do not filter empty boxes at inference time, + # because the scores from each stage need to be aligned and added later + boxes_per_image = boxes_per_image[boxes_per_image.nonempty()] + prop = Instances(image_size) + prop.proposal_boxes = boxes_per_image + proposals.append(prop) + return proposals diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/fast_rcnn.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/fast_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..f4d7c3e07bf12c87205c45279e04890eec0d8461 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/fast_rcnn.py @@ -0,0 +1,578 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +from typing import Dict, Union +import torch +from fvcore.nn import giou_loss, smooth_l1_loss +from torch import nn +from torch.nn import functional as F + +from detectron2.config import configurable +from detectron2.layers import Linear, ShapeSpec, batched_nms, cat, nonzero_tuple +from detectron2.modeling.box_regression import Box2BoxTransform +from detectron2.structures import Boxes, Instances +from detectron2.utils.events import get_event_storage + +__all__ = ["fast_rcnn_inference", "FastRCNNOutputLayers"] + + +logger = logging.getLogger(__name__) + +""" +Shape shorthand in this module: + + N: number of images in the minibatch + R: number of ROIs, combined over all images, in the minibatch + Ri: number of ROIs in image i + K: number of foreground classes. E.g.,there are 80 foreground classes in COCO. + +Naming convention: + + deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box + transform (see :class:`box_regression.Box2BoxTransform`). + + pred_class_logits: predicted class scores in [-inf, +inf]; use + softmax(pred_class_logits) to estimate P(class). + + gt_classes: ground-truth classification labels in [0, K], where [0, K) represent + foreground object classes and K represents the background class. + + pred_proposal_deltas: predicted box2box transform deltas for transforming proposals + to detection box predictions. + + gt_proposal_deltas: ground-truth box2box transform deltas +""" + + +def fast_rcnn_inference(boxes, scores, image_shapes, score_thresh, nms_thresh, topk_per_image): + """ + Call `fast_rcnn_inference_single_image` for all images. + + Args: + boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic + boxes for each image. Element i has shape (Ri, K * 4) if doing + class-specific regression, or (Ri, 4) if doing class-agnostic + regression, where Ri is the number of predicted objects for image i. + This is compatible with the output of :meth:`FastRCNNOutputLayers.predict_boxes`. + scores (list[Tensor]): A list of Tensors of predicted class scores for each image. + Element i has shape (Ri, K + 1), where Ri is the number of predicted objects + for image i. Compatible with the output of :meth:`FastRCNNOutputLayers.predict_probs`. + image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch. + score_thresh (float): Only return detections with a confidence score exceeding this + threshold. + nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1]. + topk_per_image (int): The number of top scoring detections to return. Set < 0 to return + all detections. + + Returns: + instances: (list[Instances]): A list of N instances, one for each image in the batch, + that stores the topk most confidence detections. + kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates + the corresponding boxes/scores index in [0, Ri) from the input, for image i. + """ + result_per_image = [ + fast_rcnn_inference_single_image( + boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image + ) + for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes) + ] + return [x[0] for x in result_per_image], [x[1] for x in result_per_image] + + +def fast_rcnn_inference_single_image( + boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image +): + """ + Single-image inference. + Return bounding-box detection results by thresholding + on scores and applying non-maximum suppression (NMS). + + Args: + Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes + per image. + + Returns: + Same as `fast_rcnn_inference`, but for only one image. + """ + valid_mask = torch.isfinite(boxes.float()).all(dim=1) & \ + torch.isfinite(scores.float()).all(dim=1) + if not valid_mask.all(): + boxes = boxes[valid_mask] + scores = scores[valid_mask] + + scores = scores[:, :-1] + num_bbox_reg_classes = boxes.shape[1] // 4 + # Convert to Boxes to use the `clip` function ... + boxes = Boxes(boxes.reshape(-1, 4)) + boxes.clip(image_shape) + boxes = boxes.tensor.view(-1, + num_bbox_reg_classes, 4) # R x C x 4 (1000, 80, 4) + + + if boxes.dtype == torch.float32: + boxes = boxes.reshape(1, 1000, num_bbox_reg_classes, 4).half() + scores = scores.reshape( + 1,scores.shape[0],num_bbox_reg_classes).half() + else: + boxes = boxes.reshape(1, 1000, num_bbox_reg_classes, 4) + scores = scores.reshape(1,scores.shape[0],num_bbox_reg_classes) + + nmsed_boxex, nmsed_scores, nmsed_classes, nmsed_num = \ + torch.npu_batch_nms(boxes, scores, score_thresh, nms_thresh, 400, 400) + + result = Instances(image_shape) + result.pred_boxes = Boxes(nmsed_boxex.reshape(nmsed_boxex.shape[1:])) + result.scores = nmsed_scores.reshape(nmsed_scores.shape[1]) + result.pred_classes = nmsed_classes.reshape(nmsed_classes.shape[1]) + return result, None + + +class FastRCNNOutputs: + """ + An internal implementation that stores information about outputs of a Fast R-CNN head, + and provides methods that are used to decode the outputs of a Fast R-CNN head. + """ + + def __init__( + self, + box2box_transform, + pred_class_logits, + pred_proposal_deltas, + proposals, + smooth_l1_beta=0.0, + box_reg_loss_type="smooth_l1", + ): + """ + Args: + box2box_transform (Box2BoxTransform/Box2BoxTransformRotated): + box2box transform instance for proposal-to-detection transformations. + pred_class_logits (Tensor): A tensor of shape (R, K + 1) storing the predicted class + logits for all R predicted object instances. + Each row corresponds to a predicted object instance. + pred_proposal_deltas (Tensor): A tensor of shape (R, K * B) or (R, B) for + class-specific or class-agnostic regression. It stores the predicted deltas that + transform proposals into final box detections. + B is the box dimension (4 or 5). + When B is 4, each row is [dx, dy, dw, dh (, ....)]. + When B is 5, each row is [dx, dy, dw, dh, da (, ....)]. + proposals (list[Instances]): A list of N Instances, where Instances i stores the + proposals for image i, in the field "proposal_boxes". + When training, each Instances must have ground-truth labels + stored in the field "gt_classes" and "gt_boxes". + The total number of all instances must be equal to R. + smooth_l1_beta (float): The transition point between L1 and L2 loss in + the smooth L1 loss function. When set to 0, the loss becomes L1. When + set to +inf, the loss becomes constant 0. + box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou" + """ + self.box2box_transform = box2box_transform + self.num_preds_per_image = [len(p) for p in proposals] + self.pred_class_logits = pred_class_logits + self.pred_proposal_deltas = pred_proposal_deltas + self.smooth_l1_beta = smooth_l1_beta + self.box_reg_loss_type = box_reg_loss_type + + self.image_shapes = [x.image_size for x in proposals] + + if len(proposals): + box_type = type(proposals[0].proposal_boxes) + # cat(..., dim=0) concatenates over all images in the batch + self.proposals = box_type.cat([p.proposal_boxes for p in proposals]) + assert ( + not self.proposals.tensor.requires_grad + ), "Proposals should not require gradients!" + + # The following fields should exist only when training. + if proposals[0].has("gt_boxes"): + self.gt_boxes = box_type.cat([p.gt_boxes for p in proposals]) + assert proposals[0].has("gt_classes") + self.gt_classes = cat([p.gt_classes for p in proposals], dim=0) + else: + self.proposals = Boxes(torch.zeros(0, 4, device=self.pred_proposal_deltas.device)) + self._no_instances = len(proposals) == 0 # no instances found + + def _log_accuracy(self): + """ + Log the accuracy metrics to EventStorage. + """ + num_instances = self.gt_classes.numel() + pred_classes = self.pred_class_logits.argmax(dim=1) + bg_class_ind = self.pred_class_logits.shape[1] - 1 + + fg_inds = (self.gt_classes >= 0) & (self.gt_classes < bg_class_ind) + num_fg = fg_inds.nonzero().numel() + fg_gt_classes = self.gt_classes[fg_inds] + fg_pred_classes = pred_classes[fg_inds] + + num_false_negative = (fg_pred_classes == bg_class_ind).nonzero().numel() + num_accurate = (pred_classes == self.gt_classes).nonzero().numel() + fg_num_accurate = (fg_pred_classes == fg_gt_classes).nonzero().numel() + + storage = get_event_storage() + if num_instances > 0: + storage.put_scalar("fast_rcnn/cls_accuracy", num_accurate / num_instances) + if num_fg > 0: + storage.put_scalar("fast_rcnn/fg_cls_accuracy", fg_num_accurate / num_fg) + storage.put_scalar("fast_rcnn/false_negative", num_false_negative / num_fg) + + def softmax_cross_entropy_loss(self): + """ + Compute the softmax cross entropy loss for box classification. + + Returns: + scalar Tensor + """ + if self._no_instances: + return 0.0 * self.pred_class_logits.sum() + else: + return F.cross_entropy(self.pred_class_logits, self.gt_classes, reduction="mean") + + def box_reg_loss(self): + """ + Compute the smooth L1 loss for box regression. + + Returns: + scalar Tensor + """ + if self._no_instances: + return 0.0 * self.pred_proposal_deltas.sum() + + box_dim = self.gt_boxes.tensor.size(1) # 4 or 5 + cls_agnostic_bbox_reg = self.pred_proposal_deltas.size(1) == box_dim + device = self.pred_proposal_deltas.device + + bg_class_ind = self.pred_class_logits.shape[1] - 1 + + # Box delta loss is only computed between the prediction for the gt class k + # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions + # for non-gt classes and background. + # Empty fg_inds produces a valid loss of zero as long as the size_average + # arg to smooth_l1_loss is False (otherwise it uses torch.mean internally + # and would produce a nan loss). + # fg_inds = nonzero_tuple((self.gt_classes >= 0) & (self.gt_classes < bg_class_ind))[0] + fg_inds = ((self.gt_classes >= 0) & (self.gt_classes < bg_class_ind)) + if cls_agnostic_bbox_reg: + # pred_proposal_deltas only corresponds to foreground class for agnostic + gt_class_cols = torch.arange(box_dim, device=device) + else: + gt_classes_num = self.gt_classes.shape[0] + gt_class_cols_all = box_dim * self.gt_classes[:, None] \ + + torch.arange(box_dim, device=device) + gt_class_cols_all_mask = ((gt_class_cols_all >= 0) & (gt_class_cols_all < 320)) + gt_class_cols_all[~gt_class_cols_all_mask] = 0 + fg_inds_all = torch.arange(gt_classes_num, device=device) + + if self.box_reg_loss_type == "smooth_l1": + gt_proposal_deltas = torch.npu_bounding_box_encode( + self.proposals.tensor, self.gt_boxes.tensor, + 0, 0, 0, 0, 0.1, 0.1, 0.2, 0.2) + input_ = self.pred_proposal_deltas[fg_inds_all.long()[:, None], gt_class_cols_all.long()] + target = gt_proposal_deltas + input_[~fg_inds] = 0 + target[~fg_inds] = 0 + loss_box_reg = smooth_l1_loss(input_, target, self.smooth_l1_beta, reduction="sum") + + + elif self.box_reg_loss_type == "giou": + loss_box_reg = giou_loss( + self._predict_boxes()[fg_inds[:, None].long(), gt_class_cols.long()], + self.gt_boxes.tensor[fg_inds.long()], + reduction="sum", + ) + else: + raise ValueError(f"Invalid bbox reg loss type '{self.box_reg_loss_type}'") + + # The loss is normalized using the total number of regions (R), not the number + # of foreground regions even though the box regression loss is only defined on + # foreground regions. Why? Because doing so gives equal training influence to + # each foreground example. To see how, consider two different minibatches: + # (1) Contains a single foreground region + # (2) Contains 100 foreground regions + # If we normalize by the number of foreground regions, the single example in + # minibatch (1) will be given 100 times as much influence as each foreground + # example in minibatch (2). Normalizing by the total number of regions, R, + # means that the single example in minibatch (1) and each of the 100 examples + # in minibatch (2) are given equal influence. + loss_box_reg = loss_box_reg / self.gt_classes.numel() + return loss_box_reg + + def _predict_boxes(self): + """ + Returns: + Tensor: A Tensors of predicted class-specific or class-agnostic boxes + for all images in a batch. Element i has shape (Ri, K * B) or (Ri, B), where Ri is + the number of predicted objects for image i and B is the box dimension (4 or 5) + """ + return self.box2box_transform.apply_deltas(self.pred_proposal_deltas, self.proposals.tensor) + + """ + A subclass is expected to have the following methods because + they are used to query information about the head predictions. + """ + + def losses(self): + """ + Compute the default losses for box head in Fast(er) R-CNN, + with softmax cross entropy loss and smooth L1 loss. + + Returns: + A dict of losses (scalar tensors) containing keys "loss_cls" and "loss_box_reg". + """ + return {"loss_cls": self.softmax_cross_entropy_loss(), "loss_box_reg": self.box_reg_loss()} + + def predict_boxes(self): + """ + Deprecated + """ + return self._predict_boxes().split(self.num_preds_per_image, dim=0) + + def predict_probs(self): + """ + Deprecated + """ + probs = F.softmax(self.pred_class_logits, dim=-1) + return probs.split(self.num_preds_per_image, dim=0) + + def inference(self, score_thresh, nms_thresh, topk_per_image): + """ + Deprecated + """ + boxes = self.predict_boxes() + scores = self.predict_probs() + image_shapes = self.image_shapes + return fast_rcnn_inference( + boxes, scores, image_shapes, score_thresh, nms_thresh, topk_per_image + ) + + +class FastRCNNOutputLayers(nn.Module): + """ + Two linear layers for predicting Fast R-CNN outputs: + (1) proposal-to-detection box regression deltas + (2) classification scores + """ + + @configurable + def __init__( + self, + input_shape: ShapeSpec, + *, + box2box_transform, + num_classes: int, + test_score_thresh: float = 0.0, + test_nms_thresh: float = 0.5, + test_topk_per_image: int = 100, + cls_agnostic_bbox_reg: bool = False, + smooth_l1_beta: float = 0.0, + box_reg_loss_type: str = "smooth_l1", + loss_weight: Union[float, Dict[str, float]] = 1.0, + ): + """ + NOTE: this interface is experimental. + + Args: + input_shape (ShapeSpec): shape of the input feature to this module + box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): + num_classes (int): number of foreground classes + test_score_thresh (float): threshold to filter predictions results. + test_nms_thresh (float): NMS threshold for prediction results. + test_topk_per_image (int): number of top predictions to produce per image. + cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression + smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if + `box_reg_loss_type` is "smooth_l1" + box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou" + loss_weight (float|dict): weights to use for losses. Can be single float for weighting + all losses, or a dict of individual weightings. Valid dict keys are: + "loss_cls" - applied to classification loss + "loss_box_reg" - applied to box regression loss + """ + super().__init__() + if isinstance(input_shape, int): # some backward compatibility + input_shape = ShapeSpec(channels=input_shape) + input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) + # prediction layer for num_classes foreground classes and one background class (hence + 1) + self.cls_score = Linear(input_size, num_classes + 1) + num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes + box_dim = len(box2box_transform.weights) + self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) + + nn.init.normal_(self.cls_score.weight, std=0.01) + nn.init.normal_(self.bbox_pred.weight, std=0.001) + for l in [self.cls_score, self.bbox_pred]: + nn.init.constant_(l.bias, 0) + + self.box2box_transform = box2box_transform + self.smooth_l1_beta = smooth_l1_beta + self.test_score_thresh = test_score_thresh + self.test_nms_thresh = test_nms_thresh + self.test_topk_per_image = test_topk_per_image + self.box_reg_loss_type = box_reg_loss_type + if isinstance(loss_weight, float): + loss_weight = {"loss_cls": loss_weight, "loss_box_reg": loss_weight} + self.loss_weight = loss_weight + + @classmethod + def from_config(cls, cfg, input_shape): + return { + "input_shape": input_shape, + "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS), + # fmt: off + "num_classes" : cfg.MODEL.ROI_HEADS.NUM_CLASSES, + "cls_agnostic_bbox_reg" : cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, + "smooth_l1_beta" : cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA, + "test_score_thresh" : cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST, + "test_nms_thresh" : cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST, + "test_topk_per_image" : cfg.TEST.DETECTIONS_PER_IMAGE, + "box_reg_loss_type" : cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE, + "loss_weight" : {"loss_box_reg": cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT}, + # fmt: on + } + + def forward(self, x): + """ + Args: + x: per-region features of shape (N, ...) for N bounding boxes to predict. + + Returns: + Tensor: shape (N,K+1), scores for each of the N box. Each row contains the scores for + K object categories and 1 background class. + Tensor: bounding box regression deltas for each box. Shape is shape (N,Kx4), or (N,4) + for class-agnostic regression. + """ + if x.dim() > 2: + x = torch.flatten(x, start_dim=1) + scores = self.cls_score(x) + proposal_deltas = self.bbox_pred(x) + return scores, proposal_deltas + + # TODO: move the implementation to this class. + def losses(self, predictions, proposals): + """ + Args: + predictions: return values of :meth:`forward()`. + proposals (list[Instances]): proposals that match the features that were used + to compute predictions. The fields ``proposal_boxes``, ``gt_boxes``, + ``gt_classes`` are expected. + + Returns: + Dict[str, Tensor]: dict of losses + """ + scores, proposal_deltas = predictions + losses = FastRCNNOutputs( + self.box2box_transform, + scores, + proposal_deltas, + proposals, + self.smooth_l1_beta, + self.box_reg_loss_type, + ).losses() + return {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()} + + def inference(self, predictions, proposals): + """ + Args: + predictions: return values of :meth:`forward()`. + proposals (list[Instances]): proposals that match the features that were + used to compute predictions. The ``proposal_boxes`` field is expected. + + Returns: + list[Instances]: same as `fast_rcnn_inference`. + list[Tensor]: same as `fast_rcnn_inference`. + """ + boxes = self.predict_boxes(predictions, proposals) + scores = self.predict_probs(predictions, proposals) + image_shapes = [x.image_size for x in proposals] + return fast_rcnn_inference( + boxes, + scores, + image_shapes, + self.test_score_thresh, + self.test_nms_thresh, + self.test_topk_per_image, + ) + + def predict_boxes_for_gt_classes(self, predictions, proposals): + """ + Args: + predictions: return values of :meth:`forward()`. + proposals (list[Instances]): proposals that match the features that were used + to compute predictions. The fields ``proposal_boxes``, ``gt_classes`` are expected. + + Returns: + list[Tensor]: A list of Tensors of predicted boxes for GT classes in case of + class-specific box head. Element i of the list has shape (Ri, B), where Ri is + the number of proposals for image i and B is the box dimension (4 or 5) + """ + if not len(proposals): + return [] + scores, proposal_deltas = predictions + proposal_boxes = [p.proposal_boxes for p in proposals] + proposal_boxes = proposal_boxes[0].cat(proposal_boxes).tensor + N, B = proposal_boxes.shape + predict_boxes = self.box2box_transform.apply_deltas( + proposal_deltas, proposal_boxes + ) # Nx(KxB) + + K = predict_boxes.shape[1] // B + if K > 1: + gt_classes = torch.cat([p.gt_classes for p in proposals], dim=0) + # Some proposals are ignored or have a background class. Their gt_classes + # cannot be used as index. + gt_classes = gt_classes.clamp_(0, K - 1) + + predict_boxes = predict_boxes.view(N, K, B)[ + torch.arange(N, dtype=torch.long, device=predict_boxes.device), gt_classes + ] + num_prop_per_image = [len(p) for p in proposals] + return predict_boxes.split(num_prop_per_image) + + def predict_boxes(self, predictions, proposals): + """ + Args: + predictions: return values of :meth:`forward()`. + proposals (list[Instances]): proposals that match the features that were + used to compute predictions. The ``proposal_boxes`` field is expected. + + Returns: + list[Tensor]: A list of Tensors of predicted class-specific or class-agnostic boxes + for each image. Element i has shape (Ri, K * B) or (Ri, B), where Ri is + the number of proposals for image i and B is the box dimension (4 or 5) + """ + if not len(proposals): + return [] + _, proposal_deltas = predictions + num_prop_per_image = [len(p) for p in proposals] + proposal_boxes = [p.proposal_boxes for p in proposals] + proposal_boxes = proposal_boxes[0].cat(proposal_boxes).tensor + predict_boxes = self.box2box_transform.apply_deltas( + proposal_deltas, proposal_boxes + ) # Nx(KxB) + return predict_boxes.split(num_prop_per_image) + + def predict_probs(self, predictions, proposals): + """ + Args: + predictions: return values of :meth:`forward()`. + proposals (list[Instances]): proposals that match the features that were + used to compute predictions. + + Returns: + list[Tensor]: A list of Tensors of predicted class probabilities for each image. + Element i has shape (Ri, K + 1), where Ri is the number of proposals for image i. + """ + scores, _ = predictions + num_inst_per_image = [len(p) for p in proposals] + probs = F.softmax(scores, dim=-1) + return probs.split(num_inst_per_image, dim=0) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/keypoint_head.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/keypoint_head.py new file mode 100644 index 0000000000000000000000000000000000000000..22266f43a981f1a5161251912aa3c287610b4186 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/keypoint_head.py @@ -0,0 +1,275 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List +import torch +from torch import nn +from torch.nn import functional as F + +from detectron2.config import configurable +from detectron2.layers import Conv2d, ConvTranspose2d, cat, interpolate +from detectron2.structures import Instances, heatmaps_to_keypoints +from detectron2.utils.events import get_event_storage +from detectron2.utils.registry import Registry + +_TOTAL_SKIPPED = 0 + + +__all__ = [ + "ROI_KEYPOINT_HEAD_REGISTRY", + "build_keypoint_head", + "BaseKeypointRCNNHead", + "KRCNNConvDeconvUpsampleHead", +] + + +ROI_KEYPOINT_HEAD_REGISTRY = Registry("ROI_KEYPOINT_HEAD") +ROI_KEYPOINT_HEAD_REGISTRY.__doc__ = """ +Registry for keypoint heads, which make keypoint predictions from per-region features. + +The registered object will be called with `obj(cfg, input_shape)`. +""" + + +def build_keypoint_head(cfg, input_shape): + """ + Build a keypoint head from `cfg.MODEL.ROI_KEYPOINT_HEAD.NAME`. + """ + name = cfg.MODEL.ROI_KEYPOINT_HEAD.NAME + return ROI_KEYPOINT_HEAD_REGISTRY.get(name)(cfg, input_shape) + + +def keypoint_rcnn_loss(pred_keypoint_logits, instances, normalizer): + """ + Arguments: + pred_keypoint_logits (Tensor): A tensor of shape (N, K, S, S) where N is the total number + of instances in the batch, K is the number of keypoints, and S is the side length + of the keypoint heatmap. The values are spatial logits. + instances (list[Instances]): A list of M Instances, where M is the batch size. + These instances are predictions from the model + that are in 1:1 correspondence with pred_keypoint_logits. + Each Instances should contain a `gt_keypoints` field containing a `structures.Keypoint` + instance. + normalizer (float): Normalize the loss by this amount. + If not specified, we normalize by the number of visible keypoints in the minibatch. + + Returns a scalar tensor containing the loss. + """ + heatmaps = [] + valid = [] + + keypoint_side_len = pred_keypoint_logits.shape[2] + for instances_per_image in instances: + if len(instances_per_image) == 0: + continue + keypoints = instances_per_image.gt_keypoints + heatmaps_per_image, valid_per_image = keypoints.to_heatmap( + instances_per_image.proposal_boxes.tensor, keypoint_side_len + ) + heatmaps.append(heatmaps_per_image.view(-1)) + valid.append(valid_per_image.view(-1)) + + if len(heatmaps): + keypoint_targets = cat(heatmaps, dim=0) + valid = cat(valid, dim=0).to(dtype=torch.uint8) + valid = torch.nonzero(valid).squeeze(1) + + # torch.mean (in binary_cross_entropy_with_logits) doesn't + # accept empty tensors, so handle it separately + if len(heatmaps) == 0 or valid.numel() == 0: + global _TOTAL_SKIPPED + _TOTAL_SKIPPED += 1 + storage = get_event_storage() + storage.put_scalar("kpts_num_skipped_batches", _TOTAL_SKIPPED, smoothing_hint=False) + return pred_keypoint_logits.sum() * 0 + + N, K, H, W = pred_keypoint_logits.shape + pred_keypoint_logits = pred_keypoint_logits.view(N * K, H * W) + + keypoint_loss = F.cross_entropy( + pred_keypoint_logits[valid], keypoint_targets[valid], reduction="sum" + ) + + # If a normalizer isn't specified, normalize by the number of visible keypoints in the minibatch + if normalizer is None: + normalizer = valid.numel() + keypoint_loss /= normalizer + + return keypoint_loss + + +def keypoint_rcnn_inference(pred_keypoint_logits, pred_instances): + """ + Post process each predicted keypoint heatmap in `pred_keypoint_logits` into (x, y, score) + and add it to the `pred_instances` as a `pred_keypoints` field. + + Args: + pred_keypoint_logits (Tensor): A tensor of shape (R, K, S, S) where R is the total number + of instances in the batch, K is the number of keypoints, and S is the side length of + the keypoint heatmap. The values are spatial logits. + pred_instances (list[Instances]): A list of N Instances, where N is the number of images. + + Returns: + None. Each element in pred_instances will contain an extra "pred_keypoints" field. + The field is a tensor of shape (#instance, K, 3) where the last + dimension corresponds to (x, y, score). + The scores are larger than 0. + """ + # flatten all bboxes from all images together (list[Boxes] -> Rx4 tensor) + bboxes_flat = cat([b.pred_boxes.tensor for b in pred_instances], dim=0) + + keypoint_results = heatmaps_to_keypoints(pred_keypoint_logits.detach(), bboxes_flat.detach()) + num_instances_per_image = [len(i) for i in pred_instances] + keypoint_results = keypoint_results[:, :, [0, 1, 3]].split(num_instances_per_image, dim=0) + + for keypoint_results_per_image, instances_per_image in zip(keypoint_results, pred_instances): + # keypoint_results_per_image is (num instances)x(num keypoints)x(x, y, score) + instances_per_image.pred_keypoints = keypoint_results_per_image + + +class BaseKeypointRCNNHead(nn.Module): + """ + Implement the basic Keypoint R-CNN losses and inference logic described in :paper:`Mask R-CNN`. + """ + + @configurable + def __init__(self, *, num_keypoints, loss_weight=1.0, loss_normalizer=1.0): + """ + NOTE: this interface is experimental. + + Args: + num_keypoints (int): number of keypoints to predict + loss_weight (float): weight to multiple on the keypoint loss + loss_normalizer (float or str): + If float, divide the loss by `loss_normalizer * #images`. + If 'visible', the loss is normalized by the total number of + visible keypoints across images. + """ + super().__init__() + self.num_keypoints = num_keypoints + self.loss_weight = loss_weight + assert loss_normalizer == "visible" or isinstance(loss_normalizer, float), loss_normalizer + self.loss_normalizer = loss_normalizer + + @classmethod + def from_config(cls, cfg, input_shape): + ret = { + "loss_weight": cfg.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT, + "num_keypoints": cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS, + } + normalize_by_visible = ( + cfg.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS + ) # noqa + if not normalize_by_visible: + batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE + positive_sample_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION + ret["loss_normalizer"] = ( + ret["num_keypoints"] * batch_size_per_image * positive_sample_fraction + ) + else: + ret["loss_normalizer"] = "visible" + return ret + + def forward(self, x, instances: List[Instances]): + """ + Args: + x: input region feature(s) provided by :class:`ROIHeads`. + instances (list[Instances]): contains the boxes & labels corresponding + to the input features. + Exact format is up to its caller to decide. + Typically, this is the foreground instances in training, with + "proposal_boxes" field and other gt annotations. + In inference, it contains boxes that are already predicted. + + Returns: + A dict of losses if in training. The predicted "instances" if in inference. + """ + x = self.layers(x) + if self.training: + num_images = len(instances) + normalizer = ( + None if self.loss_normalizer == "visible" else num_images * self.loss_normalizer + ) + return { + "loss_keypoint": keypoint_rcnn_loss(x, instances, normalizer=normalizer) + * self.loss_weight + } + else: + keypoint_rcnn_inference(x, instances) + return instances + + def layers(self, x): + """ + Neural network layers that makes predictions from regional input features. + """ + raise NotImplementedError + + +@ROI_KEYPOINT_HEAD_REGISTRY.register() +class KRCNNConvDeconvUpsampleHead(BaseKeypointRCNNHead): + """ + A standard keypoint head containing a series of 3x3 convs, followed by + a transpose convolution and bilinear interpolation for upsampling. + """ + + @configurable + def __init__(self, input_shape, *, num_keypoints, conv_dims, **kwargs): + """ + NOTE: this interface is experimental. + + Args: + input_shape (ShapeSpec): shape of the input feature + conv_dims: an iterable of output channel counts for each conv in the head + e.g. (512, 512, 512) for three convs outputting 512 channels. + """ + super().__init__(num_keypoints=num_keypoints, **kwargs) + + # default up_scale to 2 (this can be made an option) + up_scale = 2 + in_channels = input_shape.channels + + self.blocks = [] + for idx, layer_channels in enumerate(conv_dims, 1): + module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1) + self.add_module("conv_fcn{}".format(idx), module) + self.blocks.append(module) + in_channels = layer_channels + + deconv_kernel = 4 + self.score_lowres = ConvTranspose2d( + in_channels, num_keypoints, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1 + ) + self.up_scale = up_scale + + for name, param in self.named_parameters(): + if "bias" in name: + nn.init.constant_(param, 0) + elif "weight" in name: + # Caffe2 implementation uses MSRAFill, which in fact + # corresponds to kaiming_normal_ in PyTorch + nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") + + @classmethod + def from_config(cls, cfg, input_shape): + ret = super().from_config(cfg, input_shape) + ret["input_shape"] = input_shape + ret["conv_dims"] = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS + return ret + + def layers(self, x): + for layer in self.blocks: + x = F.relu(layer(x)) + x = self.score_lowres(x) + x = interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False) + return x diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/mask_head.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/mask_head.py new file mode 100644 index 0000000000000000000000000000000000000000..278533f62e714fb2ad75d130a6694ff6c949ecca --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/mask_head.py @@ -0,0 +1,311 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List +import fvcore.nn.weight_init as weight_init +import torch +from torch import nn +from torch.nn import functional as F + +from detectron2.config import configurable +from detectron2.layers import Conv2d, ConvTranspose2d, ShapeSpec, cat, get_norm +from detectron2.structures import Instances +from detectron2.utils.events import get_event_storage +from detectron2.utils.registry import Registry + +__all__ = [ + "BaseMaskRCNNHead", + "MaskRCNNConvUpsampleHead", + "build_mask_head", + "ROI_MASK_HEAD_REGISTRY", +] + + +ROI_MASK_HEAD_REGISTRY = Registry("ROI_MASK_HEAD") +ROI_MASK_HEAD_REGISTRY.__doc__ = """ +Registry for mask heads, which predicts instance masks given +per-region features. + +The registered object will be called with `obj(cfg, input_shape)`. +""" + + +def mask_rcnn_loss(pred_mask_logits, instances, vis_period=0): + """ + Compute the mask prediction loss defined in the Mask R-CNN paper. + + Args: + pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask) + for class-specific or class-agnostic, where B is the total number of predicted masks + in all images, C is the number of foreground classes, and Hmask, Wmask are the height + and width of the mask predictions. The values are logits. + instances (list[Instances]): A list of N Instances, where N is the number of images + in the batch. These instances are in 1:1 + correspondence with the pred_mask_logits. The ground-truth labels (class, box, mask, + ...) associated with each instance are stored in fields. + vis_period (int): the period (in steps) to dump visualization. + + Returns: + mask_loss (Tensor): A scalar tensor containing the loss. + """ + cls_agnostic_mask = pred_mask_logits.size(1) == 1 + total_num_masks = pred_mask_logits.size(0) + mask_side_len = pred_mask_logits.size(2) + assert pred_mask_logits.size(2) == pred_mask_logits.size(3), "Mask prediction must be square!" + + gt_classes = [] + gt_masks = [] + fg_selection_mask_list = [] + for instances_per_image in instances: + if len(instances_per_image) == 0: + continue + if not cls_agnostic_mask: + gt_classes_per_image = instances_per_image.gt_classes.to(dtype=torch.int64) + gt_classes.append(gt_classes_per_image) + + gt_masks_per_image = instances_per_image.gt_masks.crop_and_resize( + instances_per_image.proposal_boxes.tensor, mask_side_len + ).to(device=pred_mask_logits.device) + # A tensor of shape (N, M, M), N=#instances in the image; M=mask_side_len + gt_masks.append(gt_masks_per_image) + fg_selection_mask = (instances_per_image.gt_classes != -1) \ + & (instances_per_image.gt_classes != 80) + fg_selection_mask_list.append(fg_selection_mask) + + fg_selection_masks = cat(fg_selection_mask_list, dim=0) + if len(gt_masks) == 0: + return pred_mask_logits.sum() * 0 + + gt_masks = cat(gt_masks, dim=0) + + if cls_agnostic_mask: + pred_mask_logits = pred_mask_logits[:, 0] + else: + gt_classes = cat(gt_classes, dim=0) + indices_all = torch.arange( + total_num_masks, device=pred_mask_logits.device) + gt_classes[~fg_selection_masks] = 0 + pred_mask_logits = pred_mask_logits[ + indices_all.long(), gt_classes.long()] + pred_mask_logits[~fg_selection_masks] = -1e4 + gt_masks[~fg_selection_masks] = 0 + + # if gt_masks.dtype == torch.bool: + # gt_masks_bool = gt_masks + # else: + # # Here we allow gt_masks to be float as well (depend on the implementation of rasterize()) + # gt_masks_bool = gt_masks > 0.5 + gt_masks = gt_masks.to(dtype=torch.float32) + loss_num = fg_selection_masks.sum().item() \ + * pred_mask_logits.shape[-2] * pred_mask_logits.shape[-1] + + # # Log the training accuracy (using gt classes and 0.5 threshold) + # mask_incorrect = (pred_mask_logits > 0.0) != gt_masks_bool + # mask_accuracy = 1 - (mask_incorrect.sum().item() / max(mask_incorrect.numel(), 1.0)) + # num_positive = gt_masks_bool.sum().item() + # false_positive = (mask_incorrect & ~gt_masks_bool).sum().item() / max( + # gt_masks_bool.numel() - num_positive, 1.0 + # ) + # false_negative = (mask_incorrect & gt_masks_bool).sum().item() / max(num_positive, 1.0) + + # storage = get_event_storage() + # storage.put_scalar("mask_rcnn/accuracy", mask_accuracy) + # storage.put_scalar("mask_rcnn/false_positive", false_positive) + # storage.put_scalar("mask_rcnn/false_negative", false_negative) + # if vis_period > 0 and storage.iter % vis_period == 0: + # pred_masks = pred_mask_logits.sigmoid() + # vis_masks = torch.cat([pred_masks, gt_masks], axis=2) + # name = "Left: mask prediction; Right: mask GT" + # for idx, vis_mask in enumerate(vis_masks): + # vis_mask = torch.stack([vis_mask] * 3, axis=0) + # storage.put_image(name + f" ({idx})", vis_mask) + mask_loss = F.binary_cross_entropy_with_logits(pred_mask_logits.float(), gt_masks, reduction="sum") / loss_num + return mask_loss + + +def mask_rcnn_inference(pred_mask_logits, pred_instances): + """ + Convert pred_mask_logits to estimated foreground probability masks while also + extracting only the masks for the predicted classes in pred_instances. For each + predicted box, the mask of the same class is attached to the instance by adding a + new "pred_masks" field to pred_instances. + + Args: + pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask) + for class-specific or class-agnostic, where B is the total number of predicted masks + in all images, C is the number of foreground classes, and Hmask, Wmask are the height + and width of the mask predictions. The values are logits. + pred_instances (list[Instances]): A list of N Instances, where N is the number of images + in the batch. Each Instances must have field "pred_classes". + + Returns: + None. pred_instances will contain an extra "pred_masks" field storing a mask of size (Hmask, + Wmask) for predicted class. Note that the masks are returned as a soft (non-quantized) + masks the resolution predicted by the network; post-processing steps, such as resizing + the predicted masks to the original image resolution and/or binarizing them, is left + to the caller. + """ + cls_agnostic_mask = pred_mask_logits.size(1) == 1 + + if cls_agnostic_mask: + mask_probs_pred = pred_mask_logits.sigmoid() + else: + # Select masks corresponding to the predicted classes + num_masks = pred_mask_logits.shape[0] + class_pred = cat([i.pred_classes for i in pred_instances]) + indices = torch.arange(num_masks, device=class_pred.device) + # mask_probs_pred = pred_mask_logits[indices, class_pred][:, None].sigmoid() + mask_probs_pred = pred_mask_logits[indices.long(), + class_pred.long()][:, None].sigmoid() + # mask_probs_pred.shape: (B, 1, Hmask, Wmask) + + num_boxes_per_image = [len(i) for i in pred_instances] + mask_probs_pred = mask_probs_pred.split(num_boxes_per_image, dim=0) + + for prob, instances in zip(mask_probs_pred, pred_instances): + instances.pred_masks = prob # (1, Hmask, Wmask) + + +class BaseMaskRCNNHead(nn.Module): + """ + Implement the basic Mask R-CNN losses and inference logic described in :paper:`Mask R-CNN` + """ + + @configurable + def __init__(self, *, vis_period=0): + """ + NOTE: this interface is experimental. + + Args: + vis_period (int): visualization period + """ + super().__init__() + self.vis_period = vis_period + + @classmethod + def from_config(cls, cfg, input_shape): + return {"vis_period": cfg.VIS_PERIOD} + + def forward(self, x, instances: List[Instances]): + """ + Args: + x: input region feature(s) provided by :class:`ROIHeads`. + instances (list[Instances]): contains the boxes & labels corresponding + to the input features. + Exact format is up to its caller to decide. + Typically, this is the foreground instances in training, with + "proposal_boxes" field and other gt annotations. + In inference, it contains boxes that are already predicted. + + Returns: + A dict of losses in training. The predicted "instances" in inference. + """ + x = self.layers(x) + if self.training: + return {"loss_mask": mask_rcnn_loss(x, instances, self.vis_period)} + else: + mask_rcnn_inference(x, instances) + return instances + + def layers(self, x): + """ + Neural network layers that makes predictions from input features. + """ + raise NotImplementedError + + +@ROI_MASK_HEAD_REGISTRY.register() +class MaskRCNNConvUpsampleHead(BaseMaskRCNNHead): + """ + A mask head with several conv layers, plus an upsample layer (with `ConvTranspose2d`). + Predictions are made with a final 1x1 conv layer. + """ + + @configurable + def __init__(self, input_shape: ShapeSpec, *, num_classes, conv_dims, conv_norm="", **kwargs): + """ + NOTE: this interface is experimental. + + Args: + input_shape (ShapeSpec): shape of the input feature + num_classes (int): the number of classes. 1 if using class agnostic prediction. + conv_dims (list[int]): a list of N>0 integers representing the output dimensions + of N-1 conv layers and the last upsample layer. + conv_norm (str or callable): normalization for the conv layers. + See :func:`detectron2.layers.get_norm` for supported types. + """ + super().__init__(**kwargs) + assert len(conv_dims) >= 1, "conv_dims have to be non-empty!" + + self.conv_norm_relus = [] + + cur_channels = input_shape.channels + for k, conv_dim in enumerate(conv_dims[:-1]): + conv = Conv2d( + cur_channels, + conv_dim, + kernel_size=3, + stride=1, + padding=1, + bias=not conv_norm, + norm=get_norm(conv_norm, conv_dim), + activation=F.relu, + ) + self.add_module("mask_fcn{}".format(k + 1), conv) + self.conv_norm_relus.append(conv) + cur_channels = conv_dim + + self.deconv = ConvTranspose2d( + cur_channels, conv_dims[-1], kernel_size=2, stride=2, padding=0 + ) + cur_channels = conv_dims[-1] + + self.predictor = Conv2d(cur_channels, num_classes, kernel_size=1, stride=1, padding=0) + + for layer in self.conv_norm_relus + [self.deconv]: + weight_init.c2_msra_fill(layer) + # use normal distribution initialization for mask prediction layer + nn.init.normal_(self.predictor.weight, std=0.001) + if self.predictor.bias is not None: + nn.init.constant_(self.predictor.bias, 0) + + @classmethod + def from_config(cls, cfg, input_shape): + ret = super().from_config(cfg, input_shape) + conv_dim = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM + num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV + ret.update( + conv_dims=[conv_dim] * (num_conv + 1), # +1 for ConvTranspose + conv_norm=cfg.MODEL.ROI_MASK_HEAD.NORM, + input_shape=input_shape, + ) + if cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK: + ret["num_classes"] = 1 + else: + ret["num_classes"] = cfg.MODEL.ROI_HEADS.NUM_CLASSES + return ret + + def layers(self, x): + for layer in self.conv_norm_relus: + x = layer(x) + x = F.relu(self.deconv(x)) + return self.predictor(x) + + +def build_mask_head(cfg, input_shape): + """ + Build a mask head defined by `cfg.MODEL.ROI_MASK_HEAD.NAME`. + """ + name = cfg.MODEL.ROI_MASK_HEAD.NAME + return ROI_MASK_HEAD_REGISTRY.get(name)(cfg, input_shape) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/roi_heads.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/roi_heads.py new file mode 100644 index 0000000000000000000000000000000000000000..f34913fa04d38a897bf26ead03975e7a2f3a6eb1 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/roi_heads.py @@ -0,0 +1,878 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import inspect +import logging +import numpy as np +from typing import Dict, List, Optional, Tuple, Union +import torch +from torch import nn + +from detectron2.config import configurable +from detectron2.layers import ShapeSpec, nonzero_tuple +from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou, BitMasks, PolygonMasks +from detectron2.utils.events import get_event_storage +from detectron2.utils.registry import Registry + +from ..backbone.resnet import BottleneckBlock, make_stage +from ..matcher import Matcher +from ..poolers import ROIPooler +from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals +from ..sampling import subsample_labels, subsample_labels_all +from .box_head import build_box_head +from .fast_rcnn import FastRCNNOutputLayers +from .keypoint_head import build_keypoint_head +from .mask_head import build_mask_head + +ROI_HEADS_REGISTRY = Registry("ROI_HEADS") +ROI_HEADS_REGISTRY.__doc__ = """ +Registry for ROI heads in a generalized R-CNN model. +ROIHeads take feature maps and region proposals, and +perform per-region computation. + +The registered object will be called with `obj(cfg, input_shape)`. +The call is expected to return an :class:`ROIHeads`. +""" + +logger = logging.getLogger(__name__) + + +def build_roi_heads(cfg, input_shape): + """ + Build ROIHeads defined by `cfg.MODEL.ROI_HEADS.NAME`. + """ + name = cfg.MODEL.ROI_HEADS.NAME + return ROI_HEADS_REGISTRY.get(name)(cfg, input_shape) + + +def select_foreground_proposals( + proposals: List[Instances], bg_label: int +) -> Tuple[List[Instances], List[torch.Tensor]]: + """ + Given a list of N Instances (for N images), each containing a `gt_classes` field, + return a list of Instances that contain only instances with `gt_classes != -1 && + gt_classes != bg_label`. + + Args: + proposals (list[Instances]): A list of N Instances, where N is the number of + images in the batch. + bg_label: label index of background class. + + Returns: + list[Instances]: N Instances, each contains only the selected foreground instances. + list[Tensor]: N boolean vector, correspond to the selection mask of + each Instances object. True for selected instances. + """ + assert isinstance(proposals, (list, tuple)) + assert isinstance(proposals[0], Instances) + assert proposals[0].has("gt_classes") + fg_proposals = [] + fg_selection_masks = [] + for proposals_per_image in proposals: + gt_classes = proposals_per_image.gt_classes + fg_selection_mask = (gt_classes != -1) & (gt_classes != bg_label) + fg_idxs = fg_selection_mask.nonzero().squeeze(1) + fg_proposals.append(proposals_per_image[fg_idxs]) + fg_selection_masks.append(fg_selection_mask) + return fg_proposals, fg_selection_masks + + +def select_foreground_proposals_fix_shape( + proposals: List[Instances], bg_label: int, +) -> Tuple[List[Instances], List[torch.Tensor]]: + """ + Given a list of N Instances (for N images), + each containing a `gt_classes` field, + return a list of Instances that contain + only instances with `gt_classes != -1 && + gt_classes != bg_label`. + + Args: + proposals (list[Instances]): A list of N Instances, + where N is the number of + images in the batch. + bg_label: label index of background class. + + Returns: + list[Instances]: N Instances, + each contains only the selected foreground instances. + list[Tensor]: N boolean vector, correspond to the selection mask of + each Instances object. True for selected instances. + """ + assert isinstance(proposals, (list, tuple)) + assert isinstance(proposals[0], Instances) + assert proposals[0].has("gt_classes") + fg_proposals = [] + fg_selection_masks = [] + for proposals_per_image in proposals: + gt_classes = proposals_per_image.gt_classes + fg_selection_mask = (gt_classes != -1) & (gt_classes != bg_label) + fix_num = 128 # max 128 + fg_selection_mask_num = fg_selection_mask.sum().item() + if fg_selection_mask_num < fix_num: + bg_index_all = nonzero_tuple(~fg_selection_mask)[0] + bg_index_index_keep = torch.randperm( + bg_index_all.numel(), device=bg_index_all.device + )[:(fix_num-fg_selection_mask_num)] + bg_index_keep = bg_index_all[bg_index_index_keep] + fg_selection_mask[bg_index_keep.long()] = True + elif fg_selection_mask_num > fix_num: + fg_index_all = nonzero_tuple(fg_selection_mask)[0] + fg_index_index_del = torch.randperm( + fg_index_all.numel(), device=fg_index_all.device + )[:(fg_selection_mask_num-fix_num)] + fg_index_del = fg_index_all[fg_index_index_del] + fg_selection_mask[fg_index_del.long()] = False + + fg_proposals.append(proposals_per_image[fg_selection_mask]) + fg_selection_masks.append(fg_selection_mask) + return fg_proposals, fg_selection_masks + + +def select_proposals_with_visible_keypoints(proposals: List[Instances]) -> List[Instances]: + """ + Args: + proposals (list[Instances]): a list of N Instances, where N is the + number of images. + + Returns: + proposals: only contains proposals with at least one visible keypoint. + + Note that this is still slightly different from Detectron. + In Detectron, proposals for training keypoint head are re-sampled from + all the proposals with IOU>threshold & >=1 visible keypoint. + + Here, the proposals are first sampled from all proposals with + IOU>threshold, then proposals with no visible keypoint are filtered out. + This strategy seems to make no difference on Detectron and is easier to implement. + """ + ret = [] + all_num_fg = [] + for proposals_per_image in proposals: + # If empty/unannotated image (hard negatives), skip filtering for train + if len(proposals_per_image) == 0: + ret.append(proposals_per_image) + continue + gt_keypoints = proposals_per_image.gt_keypoints.tensor + # #fg x K x 3 + vis_mask = gt_keypoints[:, :, 2] >= 1 + xs, ys = gt_keypoints[:, :, 0], gt_keypoints[:, :, 1] + proposal_boxes = proposals_per_image.proposal_boxes.tensor.unsqueeze(dim=1) # #fg x 1 x 4 + kp_in_box = ( + (xs >= proposal_boxes[:, :, 0]) + & (xs <= proposal_boxes[:, :, 2]) + & (ys >= proposal_boxes[:, :, 1]) + & (ys <= proposal_boxes[:, :, 3]) + ) + selection = (kp_in_box & vis_mask).any(dim=1) + selection_idxs = nonzero_tuple(selection)[0] + all_num_fg.append(selection_idxs.numel()) + ret.append(proposals_per_image[selection_idxs]) + + storage = get_event_storage() + storage.put_scalar("keypoint_head/num_fg_samples", np.mean(all_num_fg)) + return ret + + +class ROIHeads(torch.nn.Module): + """ + ROIHeads perform all per-region computation in an R-CNN. + + It typically contains logic to + + 1. (in training only) match proposals with ground truth and sample them + 2. crop the regions and extract per-region features using proposals + 3. make per-region predictions with different heads + + It can have many variants, implemented as subclasses of this class. + This base class contains the logic to match/sample proposals. + But it is not necessary to inherit this class if the sampling logic is not needed. + """ + + @configurable + def __init__( + self, + *, + num_classes, + batch_size_per_image, + positive_fraction, + proposal_matcher, + proposal_append_gt=True + ): + """ + NOTE: this interface is experimental. + + Args: + num_classes (int): number of classes. Used to label background proposals. + batch_size_per_image (int): number of proposals to sample for training + positive_fraction (float): fraction of positive (foreground) proposals + to sample for training. + proposal_matcher (Matcher): matcher that matches proposals and ground truth + proposal_append_gt (bool): whether to include ground truth as proposals as well + """ + super().__init__() + self.batch_size_per_image = batch_size_per_image + self.positive_fraction = positive_fraction + self.num_classes = num_classes + self.proposal_matcher = proposal_matcher + self.proposal_append_gt = proposal_append_gt + + @classmethod + def from_config(cls, cfg): + return { + "batch_size_per_image": cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, + "positive_fraction": cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION, + "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES, + "proposal_append_gt": cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT, + # Matcher to assign box proposals to gt boxes + "proposal_matcher": Matcher( + cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS, + cfg.MODEL.ROI_HEADS.IOU_LABELS, + allow_low_quality_matches=False, + ), + } + + def _sample_proposals( + self, matched_idxs: torch.Tensor, matched_labels: torch.Tensor, gt_classes: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Based on the matching between N proposals and M groundtruth, + sample the proposals and set their classification labels. + + Args: + matched_idxs (Tensor): a vector of length N, each is the best-matched + gt index in [0, M) for each proposal. + matched_labels (Tensor): a vector of length N, the matcher's label + (one of cfg.MODEL.ROI_HEADS.IOU_LABELS) for each proposal. + gt_classes (Tensor): a vector of length M. + + Returns: + Tensor: a vector of indices of sampled proposals. Each is in [0, N). + Tensor: a vector of the same length, the classification label for + each sampled proposal. Each sample is labeled as either a category in + [0, num_classes) or the background (num_classes). + """ + has_gt = gt_classes.numel() > 0 + # Get the corresponding GT for each proposal + if has_gt: + gt_classes = gt_classes[matched_idxs.long()].int() + # Label unmatched proposals (0 label from matcher) as background (label=num_classes) + gt_classes[matched_labels == 0] = self.num_classes + # Label ignore proposals (-1 label) + gt_classes[matched_labels == -1] = -1 + else: + gt_classes = torch.zeros_like(matched_idxs) + self.num_classes + + sampled_fg_idxs, sampled_bg_idxs = subsample_labels( + gt_classes, self.batch_size_per_image, self.positive_fraction, self.num_classes + ) + sampled_idxs = torch.cat([sampled_fg_idxs, sampled_bg_idxs], dim=0).long() + return sampled_idxs, gt_classes[sampled_idxs] + + @torch.no_grad() + def label_and_sample_proposals( + self, proposals: List[Instances], targets: List[Instances] + ) -> List[Instances]: + """ + Prepare some proposals to be used to train the ROI heads. + It performs box matching between `proposals` and `targets`, and assigns + training labels to the proposals. + It returns ``self.batch_size_per_image`` random samples from proposals and groundtruth + boxes, with a fraction of positives that is no larger than + ``self.positive_fraction``. + + Args: + See :meth:`ROIHeads.forward` + + Returns: + list[Instances]: + length `N` list of `Instances`s containing the proposals + sampled for training. Each `Instances` has the following fields: + + - proposal_boxes: the proposal boxes + - gt_boxes: the ground-truth box that the proposal is assigned to + (this is only meaningful if the proposal has a label > 0; if label = 0 + then the ground-truth box is random) + + Other fields such as "gt_classes", "gt_masks", that's included in `targets`. + """ + gt_boxes = [x.gt_boxes for x in targets] + # Augment proposals with ground-truth boxes. + # In the case of learned proposals (e.g., RPN), when training starts + # the proposals will be low quality due to random initialization. + # It's possible that none of these initial + # proposals have high enough overlap with the gt objects to be used + # as positive examples for the second stage components (box head, + # cls head, mask head). Adding the gt boxes to the set of proposals + # ensures that the second stage components will have some positive + # examples from the start of training. For RPN, this augmentation improves + # convergence and empirically improves box AP on COCO by about 0.5 + # points (under one tested configuration). + if self.proposal_append_gt: + proposals = add_ground_truth_to_proposals(gt_boxes, proposals) + + proposals_with_gt = [] + + num_fg_samples = [] + num_bg_samples = [] + + for proposals_per_image, targets_per_image in zip(proposals, targets): + has_gt = len(targets_per_image) > 0 + match_quality_matrix = pairwise_iou( + targets_per_image.gt_boxes, proposals_per_image.proposal_boxes + ) + matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix) + sampled_idxs, gt_classes = self._sample_proposals( + matched_idxs, matched_labels, targets_per_image.gt_classes + ) + + # Set target attributes of the sampled proposals: + proposals_per_image = proposals_per_image[sampled_idxs] + proposals_per_image.gt_classes = gt_classes + + # We index all the attributes of targets that start with "gt_" + # and have not been added to proposals yet (="gt_classes"). + if has_gt: + sampled_targets = matched_idxs[sampled_idxs] + # NOTE: here the indexing waste some compute, because heads + # like masks, keypoints, etc, will filter the proposals again, + # (by foreground/background, or number of keypoints in the image, etc) + # so we essentially index the data twice. + for (trg_name, trg_value) in targets_per_image.get_fields().items(): + if trg_name.startswith("gt_") and not proposals_per_image.has(trg_name): + proposals_per_image.set(trg_name, trg_value[sampled_targets]) + else: + gt_boxes = Boxes( + targets_per_image.gt_boxes.tensor.new_zeros((len(sampled_idxs), 4)) + ) + proposals_per_image.gt_boxes = gt_boxes + + num_bg_samples.append((gt_classes == self.num_classes).sum().item()) + num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) + proposals_with_gt.append(proposals_per_image) + + # Log the number of fg/bg samples that are selected for training ROI heads + storage = get_event_storage() + storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) + storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) + + return proposals_with_gt + + def forward( + self, + images: ImageList, + features: Dict[str, torch.Tensor], + proposals: List[Instances], + targets: Optional[List[Instances]] = None, + ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]: + """ + Args: + images (ImageList): + features (dict[str,Tensor]): input data as a mapping from feature + map name to tensor. Axis 0 represents the number of images `N` in + the input data; axes 1-3 are channels, height, and width, which may + vary between feature maps (e.g., if a feature pyramid is used). + proposals (list[Instances]): length `N` list of `Instances`. The i-th + `Instances` contains object proposals for the i-th input image, + with fields "proposal_boxes" and "objectness_logits". + targets (list[Instances], optional): length `N` list of `Instances`. The i-th + `Instances` contains the ground-truth per-instance annotations + for the i-th input image. Specify `targets` during training only. + It may have the following fields: + + - gt_boxes: the bounding box of each instance. + - gt_classes: the label for each instance with a category ranging in [0, #class]. + - gt_masks: PolygonMasks or BitMasks, the ground-truth masks of each instance. + - gt_keypoints: NxKx3, the groud-truth keypoints for each instance. + + Returns: + list[Instances]: length `N` list of `Instances` containing the + detected instances. Returned during inference only; may be [] during training. + + dict[str->Tensor]: + mapping from a named loss to a tensor storing the loss. Used during training only. + """ + raise NotImplementedError() + + +@ROI_HEADS_REGISTRY.register() +class Res5ROIHeads(ROIHeads): + """ + The ROIHeads in a typical "C4" R-CNN model, where + the box and mask head share the cropping and + the per-region feature computation by a Res5 block. + """ + + def __init__(self, cfg, input_shape): + super().__init__(cfg) + + # fmt: off + self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES + pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION + pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE + pooler_scales = (1.0 / input_shape[self.in_features[0]].stride, ) + sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO + self.mask_on = cfg.MODEL.MASK_ON + # fmt: on + assert not cfg.MODEL.KEYPOINT_ON + assert len(self.in_features) == 1 + + self.pooler = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + ) + + self.res5, out_channels = self._build_res5_block(cfg) + self.box_predictor = FastRCNNOutputLayers( + cfg, ShapeSpec(channels=out_channels, height=1, width=1) + ) + + if self.mask_on: + self.mask_head = build_mask_head( + cfg, + ShapeSpec(channels=out_channels, width=pooler_resolution, height=pooler_resolution), + ) + + def _build_res5_block(self, cfg): + # fmt: off + stage_channel_factor = 2 ** 3 # res5 is 8x res2 + num_groups = cfg.MODEL.RESNETS.NUM_GROUPS + width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP + bottleneck_channels = num_groups * width_per_group * stage_channel_factor + out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS * stage_channel_factor + stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 + norm = cfg.MODEL.RESNETS.NORM + assert not cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE[-1], \ + "Deformable conv is not yet supported in res5 head." + # fmt: on + + blocks = make_stage( + BottleneckBlock, + 3, + first_stride=2, + in_channels=out_channels // 2, + bottleneck_channels=bottleneck_channels, + out_channels=out_channels, + num_groups=num_groups, + norm=norm, + stride_in_1x1=stride_in_1x1, + ) + return nn.Sequential(*blocks), out_channels + + def _shared_roi_transform(self, features, boxes): + x = self.pooler(features, boxes) + return self.res5(x) + + def forward(self, images, features, proposals, targets=None): + """ + See :meth:`ROIHeads.forward`. + """ + del images + + if self.training: + assert targets + proposals = self.label_and_sample_proposals(proposals, targets) + del targets + + proposal_boxes = [x.proposal_boxes for x in proposals] + box_features = self._shared_roi_transform( + [features[f] for f in self.in_features], proposal_boxes + ) + predictions = self.box_predictor(box_features.mean(dim=[2, 3])) + + if self.training: + del features + losses = self.box_predictor.losses(predictions, proposals) + if self.mask_on: + proposals, fg_selection_masks = select_foreground_proposals( + proposals, self.num_classes + ) + # Since the ROI feature transform is shared between boxes and masks, + # we don't need to recompute features. The mask loss is only defined + # on foreground proposals, so we need to select out the foreground + # features. + mask_features = box_features[torch.cat(fg_selection_masks, dim=0)] + del box_features + losses.update(self.mask_head(mask_features, proposals)) + return [], losses + else: + pred_instances, _ = self.box_predictor.inference(predictions, proposals) + pred_instances = self.forward_with_given_boxes(features, pred_instances) + return pred_instances, {} + + def forward_with_given_boxes(self, features, instances): + """ + Use the given boxes in `instances` to produce other (non-box) per-ROI outputs. + + Args: + features: same as in `forward()` + instances (list[Instances]): instances to predict other outputs. Expect the keys + "pred_boxes" and "pred_classes" to exist. + + Returns: + instances (Instances): + the same `Instances` object, with extra + fields such as `pred_masks` or `pred_keypoints`. + """ + assert not self.training + assert instances[0].has("pred_boxes") and instances[0].has("pred_classes") + + if self.mask_on: + features = [features[f] for f in self.in_features] + x = self._shared_roi_transform(features, [x.pred_boxes for x in instances]) + return self.mask_head(x, instances) + else: + return instances + + +@ROI_HEADS_REGISTRY.register() +class StandardROIHeads(ROIHeads): + """ + It's "standard" in a sense that there is no ROI transform sharing + or feature sharing between tasks. + Each head independently processes the input features by each head's + own pooler and head. + + This class is used by most models, such as FPN and C5. + To implement more models, you can subclass it and implement a different + :meth:`forward()` or a head. + """ + + @configurable + def __init__( + self, + *, + box_in_features: List[str], + box_pooler: ROIPooler, + box_head: nn.Module, + box_predictor: nn.Module, + mask_in_features: Optional[List[str]] = None, + mask_pooler: Optional[ROIPooler] = None, + mask_head: Optional[nn.Module] = None, + keypoint_in_features: Optional[List[str]] = None, + keypoint_pooler: Optional[ROIPooler] = None, + keypoint_head: Optional[nn.Module] = None, + train_on_pred_boxes: bool = False, + **kwargs + ): + """ + NOTE: this interface is experimental. + + Args: + box_in_features (list[str]): list of feature names to use for the box head. + box_pooler (ROIPooler): pooler to extra region features for box head + box_head (nn.Module): transform features to make box predictions + box_predictor (nn.Module): make box predictions from the feature. + Should have the same interface as :class:`FastRCNNOutputLayers`. + mask_in_features (list[str]): list of feature names to use for the mask head. + None if not using mask head. + mask_pooler (ROIPooler): pooler to extra region features for mask head + mask_head (nn.Module): transform features to make mask predictions + keypoint_in_features, keypoint_pooler, keypoint_head: similar to ``mask*``. + train_on_pred_boxes (bool): whether to use proposal boxes or + predicted boxes from the box head to train other heads. + """ + super().__init__(**kwargs) + # keep self.in_features for backward compatibility + self.in_features = self.box_in_features = box_in_features + self.box_pooler = box_pooler + self.box_head = box_head + self.box_predictor = box_predictor + + self.mask_on = mask_in_features is not None + if self.mask_on: + self.mask_in_features = mask_in_features + self.mask_pooler = mask_pooler + self.mask_head = mask_head + self.keypoint_on = keypoint_in_features is not None + if self.keypoint_on: + self.keypoint_in_features = keypoint_in_features + self.keypoint_pooler = keypoint_pooler + self.keypoint_head = keypoint_head + + self.train_on_pred_boxes = train_on_pred_boxes + + @classmethod + def from_config(cls, cfg, input_shape): + ret = super().from_config(cfg) + ret["train_on_pred_boxes"] = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES + # Subclasses that have not been updated to use from_config style construction + # may have overridden _init_*_head methods. In this case, those overridden methods + # will not be classmethods and we need to avoid trying to call them here. + # We test for this with ismethod which only returns True for bound methods of cls. + # Such subclasses will need to handle calling their overridden _init_*_head methods. + if inspect.ismethod(cls._init_box_head): + ret.update(cls._init_box_head(cfg, input_shape)) + if inspect.ismethod(cls._init_mask_head): + ret.update(cls._init_mask_head(cfg, input_shape)) + if inspect.ismethod(cls._init_keypoint_head): + ret.update(cls._init_keypoint_head(cfg, input_shape)) + return ret + + @classmethod + def _init_box_head(cls, cfg, input_shape): + # fmt: off + in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES + pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION + pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) + sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO + pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE + # fmt: on + + # If StandardROIHeads is applied on multiple feature maps (as in FPN), + # then we share the same predictors and therefore the channel counts must be the same + in_channels = [input_shape[f].channels for f in in_features] + # Check all channel counts are equal + assert len(set(in_channels)) == 1, in_channels + in_channels = in_channels[0] + + box_pooler = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + ) + # Here we split "box head" and "box predictor", which is mainly due to historical reasons. + # They are used together so the "box predictor" layers should be part of the "box head". + # New subclasses of ROIHeads do not need "box predictor"s. + box_head = build_box_head( + cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution) + ) + box_predictor = FastRCNNOutputLayers(cfg, box_head.output_shape) + return { + "box_in_features": in_features, + "box_pooler": box_pooler, + "box_head": box_head, + "box_predictor": box_predictor, + } + + @classmethod + def _init_mask_head(cls, cfg, input_shape): + if not cfg.MODEL.MASK_ON: + return {} + # fmt: off + in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES + pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION + pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) + sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO + pooler_type = cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE + # fmt: on + + in_channels = [input_shape[f].channels for f in in_features][0] + + ret = {"mask_in_features": in_features} + ret["mask_pooler"] = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + ) + ret["mask_head"] = build_mask_head( + cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) + ) + return ret + + @classmethod + def _init_keypoint_head(cls, cfg, input_shape): + if not cfg.MODEL.KEYPOINT_ON: + return {} + # fmt: off + in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES + pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION + pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) # noqa + sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO + pooler_type = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE + # fmt: on + + in_channels = [input_shape[f].channels for f in in_features][0] + + ret = {"keypoint_in_features": in_features} + ret["keypoint_pooler"] = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + ) + ret["keypoint_head"] = build_keypoint_head( + cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) + ) + return ret + + def forward( + self, + images: ImageList, + features: Dict[str, torch.Tensor], + proposals: List[Instances], + targets: Optional[List[Instances]] = None, + ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]: + """ + See :class:`ROIHeads.forward`. + """ + del images + if self.training: + assert targets + proposals = self.label_and_sample_proposals(proposals, targets) + del targets + + if self.training: + losses = self._forward_box(features, proposals) + # Usually the original proposals used by the box head are used by the mask, keypoint + # heads. But when `self.train_on_pred_boxes is True`, proposals will contain boxes + # predicted by the box head. + losses.update(self._forward_mask(features, proposals)) + losses.update(self._forward_keypoint(features, proposals)) + return proposals, losses + else: + pred_instances = self._forward_box(features, proposals) + # During inference cascaded prediction is used: the mask and keypoints heads are only + # applied to the top scoring box detections. + pred_instances = self.forward_with_given_boxes(features, pred_instances) + return pred_instances, {} + + def forward_with_given_boxes( + self, features: Dict[str, torch.Tensor], instances: List[Instances] + ) -> List[Instances]: + """ + Use the given boxes in `instances` to produce other (non-box) per-ROI outputs. + + This is useful for downstream tasks where a box is known, but need to obtain + other attributes (outputs of other heads). + Test-time augmentation also uses this. + + Args: + features: same as in `forward()` + instances (list[Instances]): instances to predict other outputs. Expect the keys + "pred_boxes" and "pred_classes" to exist. + + Returns: + instances (list[Instances]): + the same `Instances` objects, with extra + fields such as `pred_masks` or `pred_keypoints`. + """ + assert not self.training + assert instances[0].has("pred_boxes") and instances[0].has("pred_classes") + + instances = self._forward_mask(features, instances) + instances = self._forward_keypoint(features, instances) + return instances + + def _forward_box( + self, features: Dict[str, torch.Tensor], proposals: List[Instances] + ) -> Union[Dict[str, torch.Tensor], List[Instances]]: + """ + Forward logic of the box prediction branch. If `self.train_on_pred_boxes is True`, + the function puts predicted boxes in the `proposal_boxes` field of `proposals` argument. + + Args: + features (dict[str, Tensor]): mapping from feature map names to tensor. + Same as in :meth:`ROIHeads.forward`. + proposals (list[Instances]): the per-image object proposals with + their matching ground truth. + Each has fields "proposal_boxes", and "objectness_logits", + "gt_classes", "gt_boxes". + + Returns: + In training, a dict of losses. + In inference, a list of `Instances`, the predicted instances. + """ + features = [features[f] for f in self.box_in_features] + box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals]) + box_features = self.box_head(box_features) + predictions = self.box_predictor(box_features) + del box_features + + if self.training: + losses = self.box_predictor.losses(predictions, proposals) + # proposals is modified in-place below, so losses must be computed first. + if self.train_on_pred_boxes: + with torch.no_grad(): + pred_boxes = self.box_predictor.predict_boxes_for_gt_classes( + predictions, proposals + ) + for proposals_per_image, pred_boxes_per_image in zip(proposals, pred_boxes): + proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image) + return losses + else: + pred_instances, _ = self.box_predictor.inference(predictions, proposals) + return pred_instances + + def _forward_mask( + self, features: Dict[str, torch.Tensor], instances: List[Instances] + ) -> Union[Dict[str, torch.Tensor], List[Instances]]: + """ + Forward logic of the mask prediction branch. + + Args: + features (dict[str, Tensor]): mapping from feature map names to tensor. + Same as in :meth:`ROIHeads.forward`. + instances (list[Instances]): the per-image instances to train/predict masks. + In training, they can be the proposals. + In inference, they can be the predicted boxes. + + Returns: + In training, a dict of losses. + In inference, update `instances` with new fields "pred_masks" and return it. + """ + if not self.mask_on: + return {} if self.training else instances + + features = [features[f] for f in self.mask_in_features] + + if self.training: + # The loss is only defined on positive proposals. + proposals, _ = select_foreground_proposals_fix_shape(instances, self.num_classes) + proposal_boxes = [x.proposal_boxes for x in proposals] + mask_features = self.mask_pooler(features, proposal_boxes) + return self.mask_head(mask_features, proposals) + else: + pred_boxes = [x.pred_boxes for x in instances] + mask_features = self.mask_pooler(features, pred_boxes) + return self.mask_head(mask_features, instances) + + def _forward_keypoint( + self, features: Dict[str, torch.Tensor], instances: List[Instances] + ) -> Union[Dict[str, torch.Tensor], List[Instances]]: + """ + Forward logic of the keypoint prediction branch. + + Args: + features (dict[str, Tensor]): mapping from feature map names to tensor. + Same as in :meth:`ROIHeads.forward`. + instances (list[Instances]): the per-image instances to train/predict keypoints. + In training, they can be the proposals. + In inference, they can be the predicted boxes. + + Returns: + In training, a dict of losses. + In inference, update `instances` with new fields "pred_keypoints" and return it. + """ + if not self.keypoint_on: + return {} if self.training else instances + + features = [features[f] for f in self.keypoint_in_features] + + if self.training: + # The loss is defined on positive proposals with >=1 visible keypoints. + proposals, _ = select_foreground_proposals(instances, self.num_classes) + proposals = select_proposals_with_visible_keypoints(proposals) + proposal_boxes = [x.proposal_boxes for x in proposals] + + keypoint_features = self.keypoint_pooler(features, proposal_boxes) + return self.keypoint_head(keypoint_features, proposals) + else: + pred_boxes = [x.pred_boxes for x in instances] + keypoint_features = self.keypoint_pooler(features, pred_boxes) + return self.keypoint_head(keypoint_features, instances) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/rotated_fast_rcnn.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/rotated_fast_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..21d9b7abe636617c9bcfcbac1edaf1eb9f32f226 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/roi_heads/rotated_fast_rcnn.py @@ -0,0 +1,289 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import numpy as np +import torch + +from detectron2.config import configurable +from detectron2.layers import ShapeSpec, batched_nms_rotated +from detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated +from detectron2.utils.events import get_event_storage + +from ..box_regression import Box2BoxTransformRotated +from ..poolers import ROIPooler +from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals +from .box_head import build_box_head +from .fast_rcnn import FastRCNNOutputLayers +from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads + +logger = logging.getLogger(__name__) + +""" +Shape shorthand in this module: + + N: number of images in the minibatch + R: number of ROIs, combined over all images, in the minibatch + Ri: number of ROIs in image i + K: number of foreground classes. E.g.,there are 80 foreground classes in COCO. + +Naming convention: + + deltas: refers to the 5-d (dx, dy, dw, dh, da) deltas that parameterize the box2box + transform (see :class:`box_regression.Box2BoxTransformRotated`). + + pred_class_logits: predicted class scores in [-inf, +inf]; use + softmax(pred_class_logits) to estimate P(class). + + gt_classes: ground-truth classification labels in [0, K], where [0, K) represent + foreground object classes and K represents the background class. + + pred_proposal_deltas: predicted rotated box2box transform deltas for transforming proposals + to detection box predictions. + + gt_proposal_deltas: ground-truth rotated box2box transform deltas +""" + + +def fast_rcnn_inference_rotated( + boxes, scores, image_shapes, score_thresh, nms_thresh, topk_per_image +): + """ + Call `fast_rcnn_inference_single_image_rotated` for all images. + + Args: + boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic + boxes for each image. Element i has shape (Ri, K * 5) if doing + class-specific regression, or (Ri, 5) if doing class-agnostic + regression, where Ri is the number of predicted objects for image i. + This is compatible with the output of :meth:`FastRCNNOutputs.predict_boxes`. + scores (list[Tensor]): A list of Tensors of predicted class scores for each image. + Element i has shape (Ri, K + 1), where Ri is the number of predicted objects + for image i. Compatible with the output of :meth:`FastRCNNOutputs.predict_probs`. + image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch. + score_thresh (float): Only return detections with a confidence score exceeding this + threshold. + nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1]. + topk_per_image (int): The number of top scoring detections to return. Set < 0 to return + all detections. + + Returns: + instances: (list[Instances]): A list of N instances, one for each image in the batch, + that stores the topk most confidence detections. + kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates + the corresponding boxes/scores index in [0, Ri) from the input, for image i. + """ + result_per_image = [ + fast_rcnn_inference_single_image_rotated( + boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image + ) + for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes) + ] + return [x[0] for x in result_per_image], [x[1] for x in result_per_image] + + +def fast_rcnn_inference_single_image_rotated( + boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image +): + """ + Single-image inference. Return rotated bounding-box detection results by thresholding + on scores and applying rotated non-maximum suppression (Rotated NMS). + + Args: + Same as `fast_rcnn_inference_rotated`, but with rotated boxes, scores, and image shapes + per image. + + Returns: + Same as `fast_rcnn_inference_rotated`, but for only one image. + """ + valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) + if not valid_mask.all(): + boxes = boxes[valid_mask] + scores = scores[valid_mask] + + B = 5 # box dimension + scores = scores[:, :-1] + num_bbox_reg_classes = boxes.shape[1] // B + # Convert to Boxes to use the `clip` function ... + boxes = RotatedBoxes(boxes.reshape(-1, B)) + boxes.clip(image_shape) + boxes = boxes.tensor.view(-1, num_bbox_reg_classes, B) # R x C x B + # Filter results based on detection scores + filter_mask = scores > score_thresh # R x K + # R' x 2. First column contains indices of the R predictions; + # Second column contains indices of classes. + filter_inds = filter_mask.nonzero() + if num_bbox_reg_classes == 1: + boxes = boxes[filter_inds[:, 0], 0] + else: + boxes = boxes[filter_mask] + scores = scores[filter_mask] + + # Apply per-class Rotated NMS + keep = batched_nms_rotated(boxes, scores, filter_inds[:, 1], nms_thresh) + if topk_per_image >= 0: + keep = keep[:topk_per_image] + boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] + + result = Instances(image_shape) + result.pred_boxes = RotatedBoxes(boxes) + result.scores = scores + result.pred_classes = filter_inds[:, 1] + + return result, filter_inds[:, 0] + + +class RotatedFastRCNNOutputLayers(FastRCNNOutputLayers): + """ + Two linear layers for predicting Rotated Fast R-CNN outputs. + """ + + @classmethod + def from_config(cls, cfg, input_shape): + args = super().from_config(cfg, input_shape) + args["box2box_transform"] = Box2BoxTransformRotated( + weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS + ) + return args + + def inference(self, predictions, proposals): + """ + Returns: + list[Instances]: same as `fast_rcnn_inference_rotated`. + list[Tensor]: same as `fast_rcnn_inference_rotated`. + """ + boxes = self.predict_boxes(predictions, proposals) + scores = self.predict_probs(predictions, proposals) + image_shapes = [x.image_size for x in proposals] + + return fast_rcnn_inference_rotated( + boxes, + scores, + image_shapes, + self.test_score_thresh, + self.test_nms_thresh, + self.test_topk_per_image, + ) + + +@ROI_HEADS_REGISTRY.register() +class RROIHeads(StandardROIHeads): + """ + This class is used by Rotated Fast R-CNN to detect rotated boxes. + For now, it only supports box predictions but not mask or keypoints. + """ + + @configurable + def __init__(self, **kwargs): + """ + NOTE: this interface is experimental. + """ + super().__init__(**kwargs) + assert ( + not self.mask_on and not self.keypoint_on + ), "Mask/Keypoints not supported in Rotated ROIHeads." + assert not self.train_on_pred_boxes, "train_on_pred_boxes not implemented for RROIHeads!" + + @classmethod + def _init_box_head(cls, cfg, input_shape): + # fmt: off + in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES + pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION + pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) + sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO + pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE + # fmt: on + assert pooler_type in ["ROIAlignRotated"], pooler_type + # assume all channel counts are equal + in_channels = [input_shape[f].channels for f in in_features][0] + + box_pooler = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type=pooler_type, + ) + box_head = build_box_head( + cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution) + ) + # This line is the only difference v.s. StandardROIHeads + box_predictor = RotatedFastRCNNOutputLayers(cfg, box_head.output_shape) + return { + "box_in_features": in_features, + "box_pooler": box_pooler, + "box_head": box_head, + "box_predictor": box_predictor, + } + + @torch.no_grad() + def label_and_sample_proposals(self, proposals, targets): + """ + Prepare some proposals to be used to train the RROI heads. + It performs box matching between `proposals` and `targets`, and assigns + training labels to the proposals. + It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes, + with a fraction of positives that is no larger than `self.positive_sample_fraction. + + Args: + See :meth:`StandardROIHeads.forward` + + Returns: + list[Instances]: length `N` list of `Instances`s containing the proposals + sampled for training. Each `Instances` has the following fields: + - proposal_boxes: the rotated proposal boxes + - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to + (this is only meaningful if the proposal has a label > 0; if label = 0 + then the ground-truth box is random) + - gt_classes: the ground-truth classification lable for each proposal + """ + gt_boxes = [x.gt_boxes for x in targets] + if self.proposal_append_gt: + proposals = add_ground_truth_to_proposals(gt_boxes, proposals) + + proposals_with_gt = [] + + num_fg_samples = [] + num_bg_samples = [] + for proposals_per_image, targets_per_image in zip(proposals, targets): + has_gt = len(targets_per_image) > 0 + match_quality_matrix = pairwise_iou_rotated( + targets_per_image.gt_boxes, proposals_per_image.proposal_boxes + ) + matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix) + sampled_idxs, gt_classes = self._sample_proposals( + matched_idxs, matched_labels, targets_per_image.gt_classes + ) + + proposals_per_image = proposals_per_image[sampled_idxs] + proposals_per_image.gt_classes = gt_classes + + if has_gt: + sampled_targets = matched_idxs[sampled_idxs] + proposals_per_image.gt_boxes = targets_per_image.gt_boxes[sampled_targets] + else: + gt_boxes = RotatedBoxes( + targets_per_image.gt_boxes.tensor.new_zeros((len(sampled_idxs), 5)) + ) + proposals_per_image.gt_boxes = gt_boxes + + num_bg_samples.append((gt_classes == self.num_classes).sum().item()) + num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) + proposals_with_gt.append(proposals_per_image) + + # Log the number of fg/bg samples that are selected for training ROI heads + storage = get_event_storage() + storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) + storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) + + return proposals_with_gt diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/sampling.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/sampling.py new file mode 100644 index 0000000000000000000000000000000000000000..4af65ff7c13d750a0f9ce39deba8a6732a3af7f5 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/sampling.py @@ -0,0 +1,172 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import numpy as np + +from detectron2.layers import nonzero_tuple + +__all__ = ["subsample_labels", "subsample_labels_all"] + +def subsample_labels_bk( + labels: torch.Tensor, num_samples: int, positive_fraction: float, bg_label: int +): + """ + Return `num_samples` (or fewer, if not enough found) + random samples from `labels` which is a mixture of positives & negatives. + It will try to return as many positives as possible without + exceeding `positive_fraction * num_samples`, and then try to + fill the remaining slots with negatives. + + Args: + labels (Tensor): (N, ) label vector with values: + * -1: ignore + * bg_label: background ("negative") class + * otherwise: one or more foreground ("positive") classes + num_samples (int): The total number of labels with value >= 0 to return. + Values that are not sampled will be filled with -1 (ignore). + positive_fraction (float): The number of subsampled labels with values > 0 + is `min(num_positives, int(positive_fraction * num_samples))`. The number + of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`. + In order words, if there are not enough positives, the sample is filled with + negatives. If there are also not enough negatives, then as many elements are + sampled as is possible. + bg_label (int): label index of background ("negative") class. + + Returns: + pos_idx, neg_idx (Tensor): + 1D vector of indices. The total length of both is `num_samples` or fewer. + """ + positive = nonzero_tuple((labels != -1) & (labels != bg_label))[0] + negative = nonzero_tuple(labels == bg_label)[0] + num_pos = int(num_samples * positive_fraction) + num_pos = min(positive.numel(), num_pos) + num_neg = num_samples - num_pos + num_neg = min(negative.numel(), num_neg) + perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] + perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] + + pos_idx = positive[perm1] + neg_idx = negative[perm2] + return pos_idx, neg_idx + + +def subsample_labels( + labels: torch.Tensor, num_samples: int, positive_fraction: float, bg_label: int +): + """ + Return `num_samples` (or fewer, if not enough found) + random samples from `labels` which is a mixture of positives & negatives. + It will try to return as many positives as possible without + exceeding `positive_fraction * num_samples`, and then try to + fill the remaining slots with negatives. + Args: + labels (Tensor): (N, ) label vector with values: + * -1: ignore + * bg_label: background ("negative") class + * otherwise: one or more foreground ("positive") classes + num_samples (int): The total number of labels with value >= 0 to return. + Values that are not sampled will be filled with -1 (ignore). + positive_fraction (float): The number of subsampled labels with values > 0 + is `min(num_positives, int(positive_fraction * num_samples))`. The number + of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`. + In order words, if there are not enough positives, the sample is filled with + negatives. If there are also not enough negatives, then as many elements are + sampled as is possible. + bg_label (int): label index of background ("negative") class. + Returns: + pos_idx, neg_idx (Tensor): + 1D vector of indices. The total length of both is `num_samples` or fewer. + """ + # positive = (labels != -1) & (labels != bg_label) + # negative = labels == bg_label + # num_pos = int(num_samples * positive_fraction) + # pos_idx, pos_mask = torch.npu_random_choice_with_mask(positive, num_pos) + # num_pos = min(pos_mask.sum(), num_pos) + # neg_idx, neg_mask = torch.npu_random_choice_with_mask(negative, num_samples) + # num_neg = min(neg_mask.sum(), num_samples - num_pos) + # return pos_idx.reshape((-1,))[:num_pos], neg_idx.reshape((-1,))[:num_neg] + positive = (labels != -1) & (labels != bg_label) + negative = labels == bg_label + + num_pos = int(num_samples * positive_fraction) + + pos_idx, pos_mask = torch.npu_random_choice_with_mask(positive, num_pos) + # protect against not enough positive examples + num_pos = min(pos_mask.sum(), num_pos) + neg_idx, neg_mask = torch.npu_random_choice_with_mask(negative, num_samples) + num_neg = min(neg_mask.sum(), num_samples - num_pos) + # protect against not enough negative examples + #num_neg = min(negative.numel(), num_neg) + + # randomly select positive and negative examples + #perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] + #perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] + + #pos_idx = positive[perm1] + #neg_idx = negative[perm2] + return pos_idx.reshape((-1,))[:num_pos], neg_idx.reshape((-1,))[:num_neg] + +def subsample_labels_all( + labels: torch.Tensor, num_samples: int, positive_fraction: float, bg_label: int +): + """ + Return `num_samples` (or fewer, if not enough found) + random samples from `labels` which is a mixture of positives & negatives. + It will try to return as many positives as possible without + exceeding `positive_fraction * num_samples`, and then try to + fill the remaining slots with negatives. + Args: + labels (Tensor): (N, ) label vector with values: + * -1: ignore + * bg_label: background ("negative") class + * otherwise: one or more foreground ("positive") classes + num_samples (int): The total number of labels with value >= 0 to return. + Values that are not sampled will be filled with -1 (ignore). + positive_fraction (float): The number of subsampled labels with values > 0 + is `min(num_positives, int(positive_fraction * num_samples))`. The number + of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`. + In order words, if there are not enough positives, the sample is filled with + negatives. If there are also not enough negatives, then as many elements are + sampled as is possible. + bg_label (int): label index of background ("negative") class. + Returns: + pos_idx, neg_idx (Tensor): + 1D vector of indices. The total length of both is `num_samples` or fewer. + """ + positive_mask = ((labels != -1) & (labels != bg_label)).int() + negative_mask = (labels == bg_label).int() + positive = nonzero_tuple(positive_mask)[0] + negative = nonzero_tuple(negative_mask)[0] + num_pos = int(num_samples * positive_fraction) + num_pos = min(positive.numel(), num_pos) + num_neg = num_samples - num_pos + num_neg = min(negative.numel(), num_neg) + num_pos_unuse = positive.numel() - num_pos + num_neg_unuse = negative.numel() - num_neg + if num_pos_unuse != 0: + perm1_un = torch.randperm( + positive.numel(), device=positive.device + )[:num_pos_unuse] + positive_unuse = positive[perm1_un] + positive_mask[positive_unuse.long()] = 0 + if num_neg_unuse != 0: + perm2_un = torch.randperm( + negative.numel(), device=negative.device + )[:num_neg_unuse] + negative_unuse = negative[perm2_un] + negative_mask[negative_unuse.long()] = 0 + sampled_idxs_mask = positive_mask + negative_mask + sampled_idxs = (nonzero_tuple(sampled_idxs_mask)[0]).long() + return sampled_idxs diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/test_time_augmentation.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/test_time_augmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..4398aaf7df0b8c5e930e493ff190180594068e7b --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/modeling/test_time_augmentation.py @@ -0,0 +1,304 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import numpy as np +from contextlib import contextmanager +from itertools import count +import torch +from fvcore.transforms import HFlipTransform, NoOpTransform +from torch import nn +from torch.nn.parallel import DistributedDataParallel + +from detectron2.data.detection_utils import read_image +from detectron2.data.transforms import ( + RandomFlip, + ResizeShortestEdge, + ResizeTransform, + apply_augmentations, +) +from detectron2.structures import Boxes, Instances + +from .meta_arch import GeneralizedRCNN +from .postprocessing import detector_postprocess +from .roi_heads.fast_rcnn import fast_rcnn_inference_single_image + +__all__ = ["DatasetMapperTTA", "GeneralizedRCNNWithTTA"] + + +class DatasetMapperTTA: + """ + Implement test-time augmentation for detection data. + It is a callable which takes a dataset dict from a detection dataset, + and returns a list of dataset dicts where the images + are augmented from the input image by the transformations defined in the config. + This is used for test-time augmentation. + """ + + def __init__(self, cfg): + self.min_sizes = cfg.TEST.AUG.MIN_SIZES + self.max_size = cfg.TEST.AUG.MAX_SIZE + self.flip = cfg.TEST.AUG.FLIP + self.image_format = cfg.INPUT.FORMAT + + def __call__(self, dataset_dict): + """ + Args: + dict: a dict in standard model input format. See tutorials for details. + + Returns: + list[dict]: + a list of dicts, which contain augmented version of the input image. + The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``. + Each dict has field "transforms" which is a TransformList, + containing the transforms that are used to generate this image. + """ + numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy() + shape = numpy_image.shape + orig_shape = (dataset_dict["height"], dataset_dict["width"]) + if shape[:2] != orig_shape: + # It transforms the "original" image in the dataset to the input image + pre_tfm = ResizeTransform(orig_shape[0], orig_shape[1], shape[0], shape[1]) + else: + pre_tfm = NoOpTransform() + + # Create all combinations of augmentations to use + aug_candidates = [] # each element is a list[Augmentation] + for min_size in self.min_sizes: + resize = ResizeShortestEdge(min_size, self.max_size) + aug_candidates.append([resize]) # resize only + if self.flip: + flip = RandomFlip(prob=1.0) + aug_candidates.append([resize, flip]) # resize + flip + + # Apply all the augmentations + ret = [] + for aug in aug_candidates: + new_image, tfms = apply_augmentations(aug, np.copy(numpy_image)) + torch_image = torch.from_numpy(np.ascontiguousarray(new_image.transpose(2, 0, 1))) + + dic = copy.deepcopy(dataset_dict) + dic["transforms"] = pre_tfm + tfms + dic["image"] = torch_image + ret.append(dic) + return ret + + +class GeneralizedRCNNWithTTA(nn.Module): + """ + A GeneralizedRCNN with test-time augmentation enabled. + Its :meth:`__call__` method has the same interface as :meth:`GeneralizedRCNN.forward`. + """ + + def __init__(self, cfg, model, tta_mapper=None, batch_size=3): + """ + Args: + cfg (CfgNode): + model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on. + tta_mapper (callable): takes a dataset dict and returns a list of + augmented versions of the dataset dict. Defaults to + `DatasetMapperTTA(cfg)`. + batch_size (int): batch the augmented images into this batch size for inference. + """ + super().__init__() + if isinstance(model, DistributedDataParallel): + model = model.module + assert isinstance( + model, GeneralizedRCNN + ), "TTA is only supported on GeneralizedRCNN. Got a model of type {}".format(type(model)) + self.cfg = cfg.clone() + assert not self.cfg.MODEL.KEYPOINT_ON, "TTA for keypoint is not supported yet" + assert ( + not self.cfg.MODEL.LOAD_PROPOSALS + ), "TTA for pre-computed proposals is not supported yet" + + self.model = model + + if tta_mapper is None: + tta_mapper = DatasetMapperTTA(cfg) + self.tta_mapper = tta_mapper + self.batch_size = batch_size + + @contextmanager + def _turn_off_roi_heads(self, attrs): + """ + Open a context where some heads in `model.roi_heads` are temporarily turned off. + Args: + attr (list[str]): the attribute in `model.roi_heads` which can be used + to turn off a specific head, e.g., "mask_on", "keypoint_on". + """ + roi_heads = self.model.roi_heads + old = {} + for attr in attrs: + try: + old[attr] = getattr(roi_heads, attr) + except AttributeError: + # The head may not be implemented in certain ROIHeads + pass + + if len(old.keys()) == 0: + yield + else: + for attr in old.keys(): + setattr(roi_heads, attr, False) + yield + for attr in old.keys(): + setattr(roi_heads, attr, old[attr]) + + def _batch_inference(self, batched_inputs, detected_instances=None): + """ + Execute inference on a list of inputs, + using batch size = self.batch_size, instead of the length of the list. + + Inputs & outputs have the same format as :meth:`GeneralizedRCNN.inference` + """ + if detected_instances is None: + detected_instances = [None] * len(batched_inputs) + + outputs = [] + inputs, instances = [], [] + for idx, input, instance in zip(count(), batched_inputs, detected_instances): + inputs.append(input) + instances.append(instance) + if len(inputs) == self.batch_size or idx == len(batched_inputs) - 1: + outputs.extend( + self.model.inference( + inputs, + instances if instances[0] is not None else None, + do_postprocess=False, + ) + ) + inputs, instances = [], [] + return outputs + + def __call__(self, batched_inputs): + """ + Same input/output format as :meth:`GeneralizedRCNN.forward` + """ + + def _maybe_read_image(dataset_dict): + ret = copy.copy(dataset_dict) + if "image" not in ret: + image = read_image(ret.pop("file_name"), self.image_format) + image = torch.from_numpy(image).permute(2, 0, 1) # CHW + ret["image"] = image + if "height" not in ret and "width" not in ret: + ret["height"] = image.shape[1] + ret["width"] = image.shape[2] + return ret + + return [self._inference_one_image(_maybe_read_image(x)) for x in batched_inputs] + + def _inference_one_image(self, input): + """ + Args: + input (dict): one dataset dict with "image" field being a CHW tensor + + Returns: + dict: one output dict + """ + orig_shape = (input["height"], input["width"]) + augmented_inputs, tfms = self._get_augmented_inputs(input) + # Detect boxes from all augmented versions + with self._turn_off_roi_heads(["mask_on", "keypoint_on"]): + # temporarily disable roi heads + all_boxes, all_scores, all_classes = self._get_augmented_boxes(augmented_inputs, tfms) + # merge all detected boxes to obtain final predictions for boxes + merged_instances = self._merge_detections(all_boxes, all_scores, all_classes, orig_shape) + + if self.cfg.MODEL.MASK_ON: + # Use the detected boxes to obtain masks + augmented_instances = self._rescale_detected_boxes( + augmented_inputs, merged_instances, tfms + ) + # run forward on the detected boxes + outputs = self._batch_inference(augmented_inputs, augmented_instances) + # Delete now useless variables to avoid being out of memory + del augmented_inputs, augmented_instances + # average the predictions + merged_instances.pred_masks = self._reduce_pred_masks(outputs, tfms) + merged_instances = detector_postprocess(merged_instances, *orig_shape) + return {"instances": merged_instances} + else: + return {"instances": merged_instances} + + def _get_augmented_inputs(self, input): + augmented_inputs = self.tta_mapper(input) + tfms = [x.pop("transforms") for x in augmented_inputs] + return augmented_inputs, tfms + + def _get_augmented_boxes(self, augmented_inputs, tfms): + # 1: forward with all augmented images + outputs = self._batch_inference(augmented_inputs) + # 2: union the results + all_boxes = [] + all_scores = [] + all_classes = [] + for output, tfm in zip(outputs, tfms): + # Need to inverse the transforms on boxes, to obtain results on original image + pred_boxes = output.pred_boxes.tensor + original_pred_boxes = tfm.inverse().apply_box(pred_boxes.cpu().numpy()) + all_boxes.append(torch.from_numpy(original_pred_boxes).to(pred_boxes.device)) + + all_scores.extend(output.scores) + all_classes.extend(output.pred_classes) + all_boxes = torch.cat(all_boxes, dim=0) + return all_boxes, all_scores, all_classes + + def _merge_detections(self, all_boxes, all_scores, all_classes, shape_hw): + # select from the union of all results + num_boxes = len(all_boxes) + num_classes = self.cfg.MODEL.ROI_HEADS.NUM_CLASSES + # +1 because fast_rcnn_inference expects background scores as well + all_scores_2d = torch.zeros(num_boxes, num_classes + 1, device=all_boxes.device) + for idx, cls, score in zip(count(), all_classes, all_scores): + all_scores_2d[idx, cls] = score + + merged_instances, _ = fast_rcnn_inference_single_image( + all_boxes, + all_scores_2d, + shape_hw, + 1e-8, + self.cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST, + self.cfg.TEST.DETECTIONS_PER_IMAGE, + ) + + return merged_instances + + def _rescale_detected_boxes(self, augmented_inputs, merged_instances, tfms): + augmented_instances = [] + for input, tfm in zip(augmented_inputs, tfms): + # Transform the target box to the augmented image's coordinate space + pred_boxes = merged_instances.pred_boxes.tensor.cpu().numpy() + pred_boxes = torch.from_numpy(tfm.apply_box(pred_boxes)) + + aug_instances = Instances( + image_size=input["image"].shape[1:3], + pred_boxes=Boxes(pred_boxes), + pred_classes=merged_instances.pred_classes, + scores=merged_instances.scores, + ) + augmented_instances.append(aug_instances) + return augmented_instances + + def _reduce_pred_masks(self, outputs, tfms): + # Should apply inverse transforms on masks. + # We assume only resize & flip are used. pred_masks is a scale-invariant + # representation, so we handle flip specially + for output, tfm in zip(outputs, tfms): + if any(isinstance(t, HFlipTransform) for t in tfm.transforms): + output.pred_masks = output.pred_masks.flip(dims=[3]) + all_pred_masks = torch.stack([o.pred_masks for o in outputs], dim=0) + avg_pred_masks = torch.mean(all_pred_masks, dim=0) + return avg_pred_masks diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/solver/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/solver/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5141f66066ffe3dae34477a387d793d78b185aa1 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/solver/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .build import build_lr_scheduler, build_optimizer +from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/solver/build.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/solver/build.py new file mode 100644 index 0000000000000000000000000000000000000000..ab1392e56f9ac71d4bb0ed3e7917c7bdadeeedc0 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/solver/build.py @@ -0,0 +1,188 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from enum import Enum +from typing import Any, Callable, Dict, Iterable, List, Set, Type, Union +import torch +import apex + +from detectron2.config import CfgNode + +from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR + +_GradientClipperInput = Union[torch.Tensor, Iterable[torch.Tensor]] +_GradientClipper = Callable[[_GradientClipperInput], None] + + +class GradientClipType(Enum): + VALUE = "value" + NORM = "norm" + + +def _create_gradient_clipper(cfg: CfgNode) -> _GradientClipper: + """ + Creates gradient clipping closure to clip by value or by norm, + according to the provided config. + """ + cfg = cfg.clone() + + def clip_grad_norm(p: _GradientClipperInput): + torch.nn.utils.clip_grad_norm_(p, cfg.CLIP_VALUE, cfg.NORM_TYPE) + + def clip_grad_value(p: _GradientClipperInput): + torch.nn.utils.clip_grad_value_(p, cfg.CLIP_VALUE) + + _GRADIENT_CLIP_TYPE_TO_CLIPPER = { + GradientClipType.VALUE: clip_grad_value, + GradientClipType.NORM: clip_grad_norm, + } + return _GRADIENT_CLIP_TYPE_TO_CLIPPER[GradientClipType(cfg.CLIP_TYPE)] + + +def _generate_optimizer_class_with_gradient_clipping( + optimizer_type: Type[torch.optim.Optimizer], gradient_clipper: _GradientClipper +) -> Type[torch.optim.Optimizer]: + """ + Dynamically creates a new type that inherits the type of a given instance + and overrides the `step` method to add gradient clipping + """ + + def optimizer_wgc_step(self, closure=None): + for group in self.param_groups: + for p in group["params"]: + gradient_clipper(p) + super(type(self), self).step(closure) + + OptimizerWithGradientClip = type( + optimizer_type.__name__ + "WithGradientClip", + (optimizer_type,), + {"step": optimizer_wgc_step}, + ) + return OptimizerWithGradientClip + + +def maybe_add_gradient_clipping( + cfg: CfgNode, optimizer: torch.optim.Optimizer +) -> torch.optim.Optimizer: + """ + If gradient clipping is enabled through config options, wraps the existing + optimizer instance of some type OptimizerType to become an instance + of the new dynamically created class OptimizerTypeWithGradientClip + that inherits OptimizerType and overrides the `step` method to + include gradient clipping. + + Args: + cfg: CfgNode + configuration options + optimizer: torch.optim.Optimizer + existing optimizer instance + + Return: + optimizer: torch.optim.Optimizer + either the unmodified optimizer instance (if gradient clipping is + disabled), or the same instance with adjusted __class__ to override + the `step` method and include gradient clipping + """ + if not cfg.SOLVER.CLIP_GRADIENTS.ENABLED: + return optimizer + grad_clipper = _create_gradient_clipper(cfg.SOLVER.CLIP_GRADIENTS) + OptimizerWithGradientClip = _generate_optimizer_class_with_gradient_clipping( + type(optimizer), grad_clipper + ) + optimizer.__class__ = OptimizerWithGradientClip + return optimizer + + +def build_optimizer(cfg: CfgNode, model: torch.nn.Module) -> torch.optim.Optimizer: + """ + Build an optimizer from config. + """ + norm_module_types = ( + torch.nn.BatchNorm1d, + torch.nn.BatchNorm2d, + torch.nn.BatchNorm3d, + torch.nn.SyncBatchNorm, + # NaiveSyncBatchNorm inherits from BatchNorm2d + torch.nn.GroupNorm, + torch.nn.InstanceNorm1d, + torch.nn.InstanceNorm2d, + torch.nn.InstanceNorm3d, + torch.nn.LayerNorm, + torch.nn.LocalResponseNorm, + ) + + params: List[Dict[str, Any]] = [] + params_1 = [] + params_2 = [] + memo: Set[torch.nn.parameter.Parameter] = set() + for module in model.modules(): + for key, value in module.named_parameters(recurse=False): + if not value.requires_grad: + continue + # Avoid duplicating parameters + if value in memo: + continue + memo.add(value) + lr = cfg.SOLVER.BASE_LR + weight_decay = cfg.SOLVER.WEIGHT_DECAY + weight_decay_1 = cfg.SOLVER.WEIGHT_DECAY + if isinstance(module, norm_module_types): + weight_decay = cfg.SOLVER.WEIGHT_DECAY_NORM + if key == "bias": + # NOTE: unlike Detectron v1, we now default BIAS_LR_FACTOR to 1.0 + # and WEIGHT_DECAY_BIAS to WEIGHT_DECAY so that bias optimizer + # hyperparameters are by default exactly the same as for regular + # weights. + lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR + weight_decay_1 = cfg.SOLVER.WEIGHT_DECAY_BIAS + params_1.append(value) + else: + params_2.append(value) + + params = [{"params": params_1, "lr": lr, "weight_decay": weight_decay_1}, + {"params": params_2, "lr": lr, "weight_decay": weight_decay}] + + optimizer = apex.optimizers.NpuFusedSGD( + params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM, nesterov=cfg.SOLVER.NESTEROV + ) + optimizer = maybe_add_gradient_clipping(cfg, optimizer) + return optimizer + + +def build_lr_scheduler( + cfg: CfgNode, optimizer: torch.optim.Optimizer +) -> torch.optim.lr_scheduler._LRScheduler: + """ + Build a LR scheduler from config. + """ + name = cfg.SOLVER.LR_SCHEDULER_NAME + if name == "WarmupMultiStepLR": + return WarmupMultiStepLR( + optimizer, + cfg.SOLVER.STEPS, + cfg.SOLVER.GAMMA, + warmup_factor=cfg.SOLVER.WARMUP_FACTOR, + warmup_iters=cfg.SOLVER.WARMUP_ITERS, + warmup_method=cfg.SOLVER.WARMUP_METHOD, + ) + elif name == "WarmupCosineLR": + return WarmupCosineLR( + optimizer, + cfg.SOLVER.MAX_ITER, + warmup_factor=cfg.SOLVER.WARMUP_FACTOR, + warmup_iters=cfg.SOLVER.WARMUP_ITERS, + warmup_method=cfg.SOLVER.WARMUP_METHOD, + ) + else: + raise ValueError("Unknown LR scheduler: {}".format(name)) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/solver/lr_scheduler.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/solver/lr_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..33965c3b19c53647ac11f1a7f0ae9fe0b9ef8884 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/solver/lr_scheduler.py @@ -0,0 +1,129 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from bisect import bisect_right +from typing import List +import torch + +# NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes +# only on epoch boundaries. We typically use iteration based schedules instead. +# As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean +# "iteration" instead. + +# FIXME: ideally this would be achieved with a CombinedLRScheduler, separating +# MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it. + + +class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): + def __init__( + self, + optimizer: torch.optim.Optimizer, + milestones: List[int], + gamma: float = 0.1, + warmup_factor: float = 0.001, + warmup_iters: int = 1000, + warmup_method: str = "linear", + last_epoch: int = -1, + ): + if not list(milestones) == sorted(milestones): + raise ValueError( + "Milestones should be a list of" " increasing integers. Got {}", milestones + ) + self.milestones = milestones + self.gamma = gamma + self.warmup_factor = warmup_factor + self.warmup_iters = warmup_iters + self.warmup_method = warmup_method + super().__init__(optimizer, last_epoch) + + def get_lr(self) -> List[float]: + warmup_factor = _get_warmup_factor_at_iter( + self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor + ) + return [ + base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch) + for base_lr in self.base_lrs + ] + + def _compute_values(self) -> List[float]: + # The new interface + return self.get_lr() + + +class WarmupCosineLR(torch.optim.lr_scheduler._LRScheduler): + def __init__( + self, + optimizer: torch.optim.Optimizer, + max_iters: int, + warmup_factor: float = 0.001, + warmup_iters: int = 1000, + warmup_method: str = "linear", + last_epoch: int = -1, + ): + self.max_iters = max_iters + self.warmup_factor = warmup_factor + self.warmup_iters = warmup_iters + self.warmup_method = warmup_method + super().__init__(optimizer, last_epoch) + + def get_lr(self) -> List[float]: + warmup_factor = _get_warmup_factor_at_iter( + self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor + ) + # Different definitions of half-cosine with warmup are possible. For + # simplicity we multiply the standard half-cosine schedule by the warmup + # factor. An alternative is to start the period of the cosine at warmup_iters + # instead of at 0. In the case that warmup_iters << max_iters the two are + # very close to each other. + return [ + base_lr + * warmup_factor + * 0.5 + * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters)) + for base_lr in self.base_lrs + ] + + def _compute_values(self) -> List[float]: + # The new interface + return self.get_lr() + + +def _get_warmup_factor_at_iter( + method: str, iter: int, warmup_iters: int, warmup_factor: float +) -> float: + """ + Return the learning rate warmup factor at a specific iteration. + See :paper:`ImageNet in 1h` for more details. + + Args: + method (str): warmup method; either "constant" or "linear". + iter (int): iteration at which to calculate the warmup factor. + warmup_iters (int): the number of warmup iterations. + warmup_factor (float): the base warmup factor (the meaning changes according + to the method used). + + Returns: + float: the effective warmup factor at the given iteration. + """ + if iter >= warmup_iters: + return 1.0 + + if method == "constant": + return warmup_factor + elif method == "linear": + alpha = iter / warmup_iters + return warmup_factor * (1 - alpha) + alpha + else: + raise ValueError("Unknown warmup method: {}".format(method)) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..29a10afde2a38aa8550ed923ae4c05b381951347 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/__init__.py @@ -0,0 +1,24 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .boxes import Boxes, BoxMode, pairwise_iou +from .image_list import ImageList + +from .instances import Instances +from .keypoints import Keypoints, heatmaps_to_keypoints +from .masks import BitMasks, PolygonMasks, rasterize_polygons_within_box, polygons_to_bitmask +from .rotated_boxes import RotatedBoxes +from .rotated_boxes import pairwise_iou as pairwise_iou_rotated + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/boxes.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/boxes.py new file mode 100644 index 0000000000000000000000000000000000000000..d6220b1bea271dfd0d04a62fc31cbc4f2baf7be3 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/boxes.py @@ -0,0 +1,378 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import numpy as np +from enum import IntEnum, unique +from typing import Any, List, Tuple, Union +import torch + +_RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray] + + +@unique +class BoxMode(IntEnum): + """ + Enum of different ways to represent a box. + """ + + XYXY_ABS = 0 + """ + (x0, y0, x1, y1) in absolute floating points coordinates. + The coordinates in range [0, width or height]. + """ + XYWH_ABS = 1 + """ + (x0, y0, w, h) in absolute floating points coordinates. + """ + XYXY_REL = 2 + """ + Not yet supported! + (x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image. + """ + XYWH_REL = 3 + """ + Not yet supported! + (x0, y0, w, h) in range [0, 1]. They are relative to the size of the image. + """ + XYWHA_ABS = 4 + """ + (xc, yc, w, h, a) in absolute floating points coordinates. + (xc, yc) is the center of the rotated box, and the angle a is in degrees ccw. + """ + + @staticmethod + def convert(box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode") -> _RawBoxType: + """ + Args: + box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5 + from_mode, to_mode (BoxMode) + + Returns: + The converted box of the same type. + """ + if from_mode == to_mode: + return box + + original_type = type(box) + is_numpy = isinstance(box, np.ndarray) + single_box = isinstance(box, (list, tuple)) + if single_box: + assert len(box) == 4 or len(box) == 5, ( + "BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor," + " where k == 4 or 5" + ) + arr = torch.tensor(box)[None, :] + else: + # avoid modifying the input box + if is_numpy: + arr = torch.from_numpy(np.asarray(box)).clone() + else: + arr = box.clone() + + assert to_mode.value not in [ + BoxMode.XYXY_REL, + BoxMode.XYWH_REL, + ] and from_mode.value not in [ + BoxMode.XYXY_REL, + BoxMode.XYWH_REL, + ], "Relative mode not yet supported!" + + if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS: + assert ( + arr.shape[-1] == 5 + ), "The last dimension of input shape must be 5 for XYWHA format" + original_dtype = arr.dtype + arr = arr.double() + + w = arr[:, 2] + h = arr[:, 3] + a = arr[:, 4] + c = torch.abs(torch.cos(a * math.pi / 180.0)) + s = torch.abs(torch.sin(a * math.pi / 180.0)) + # This basically computes the horizontal bounding rectangle of the rotated box + new_w = c * w + s * h + new_h = c * h + s * w + + # convert center to top-left corner + arr[:, 0] -= new_w / 2.0 + arr[:, 1] -= new_h / 2.0 + # bottom-right corner + arr[:, 2] = arr[:, 0] + new_w + arr[:, 3] = arr[:, 1] + new_h + + arr = arr[:, :4].to(dtype=original_dtype) + elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS: + original_dtype = arr.dtype + arr = arr.double() + arr[:, 0] += arr[:, 2] / 2.0 + arr[:, 1] += arr[:, 3] / 2.0 + angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype) + arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype) + else: + if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS: + arr[:, 2] += arr[:, 0] + arr[:, 3] += arr[:, 1] + elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS: + arr[:, 2] -= arr[:, 0] + arr[:, 3] -= arr[:, 1] + else: + raise NotImplementedError( + "Conversion from BoxMode {} to {} is not supported yet".format( + from_mode, to_mode + ) + ) + + if single_box: + return original_type(arr.flatten().tolist()) + if is_numpy: + return arr.numpy() + else: + return arr + + +class Boxes: + """ + This structure stores a list of boxes as a Nx4 torch.Tensor. + It supports some common methods about boxes + (`area`, `clip`, `nonempty`, etc), + and also behaves like a Tensor + (support indexing, `to(device)`, `.device`, and iteration over all boxes) + + Attributes: + tensor (torch.Tensor): float matrix of Nx4. Each row is (x1, y1, x2, y2). + """ + + def __init__(self, tensor: torch.Tensor): + """ + Args: + tensor (Tensor[float]): a Nx4 matrix. Each row is (x1, y1, x2, y2). + """ + device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") + tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device) + if tensor.numel() == 0: + # Use reshape, so we don't end up creating a new tensor that does not depend on + # the inputs (and consequently confuses jit) + tensor = tensor.reshape((0, 4)).to(dtype=torch.float32, device=device) + assert tensor.dim() == 2 and tensor.size(-1) == 4, tensor.size() + + self.tensor = tensor + + def clone(self) -> "Boxes": + """ + Clone the Boxes. + + Returns: + Boxes + """ + return Boxes(self.tensor.clone()) + + @torch.jit.unused + def to(self, *args: Any, **kwargs: Any): + if len(args) == 1 and args[0] == torch.float16 and self.tensor.dtype == torch.float32: + return self + return Boxes(self.tensor.to(*args, **kwargs)) + + def area(self) -> torch.Tensor: + """ + Computes the area of all the boxes. + + Returns: + torch.Tensor: a vector with areas of each box. + """ + box = self.tensor + area = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]) + return area + + def clip(self, box_size: Tuple[int, int]) -> None: + """ + Clip (in place) the boxes by limiting x coordinates to the range [0, width] + and y coordinates to the range [0, height]. + + Args: + box_size (height, width): The clipping box's size. + """ + assert torch.isfinite(self.tensor).all(), "Box tensor contains infinite or NaN!" + h, w = box_size + self.tensor[:, 0].clamp_(min=0, max=w) + self.tensor[:, 1].clamp_(min=0, max=h) + self.tensor[:, 2].clamp_(min=0, max=w) + self.tensor[:, 3].clamp_(min=0, max=h) + + def nonempty(self, threshold: float = 0.0) -> torch.Tensor: + """ + Find boxes that are non-empty. + A box is considered empty, if either of its side is no larger than threshold. + + Returns: + Tensor: + a binary vector which represents whether each box is empty + (False) or non-empty (True). + """ + box = self.tensor + widths = box[:, 2] - box[:, 0] + heights = box[:, 3] - box[:, 1] + keep = (widths > threshold) & (heights > threshold) + return keep + + def __getitem__(self, item): + """ + Args: + item: int, slice, or a BoolTensor + + Returns: + Boxes: Create a new :class:`Boxes` by indexing. + + The following usage are allowed: + + 1. `new_boxes = boxes[3]`: return a `Boxes` which contains only one box. + 2. `new_boxes = boxes[2:10]`: return a slice of boxes. + 3. `new_boxes = boxes[vector]`, where vector is a torch.BoolTensor + with `length = len(boxes)`. Nonzero elements in the vector will be selected. + + Note that the returned Boxes might share storage with this Boxes, + subject to Pytorch's indexing semantics. + """ + if isinstance(item, int): + return Boxes(self.tensor[item].view(1, -1)) + + # npu2cpu change + if isinstance(item, slice): + b_item = self.tensor[item] + elif item.dtype == torch.int32: + b_item = self.tensor[item.long()] + else: + b_item = self.tensor[item] + + assert b_item.dim() == 2, "Indexing on Boxes with {} failed to return a matrix!".format(item) + return Boxes(b_item) + + def __len__(self) -> int: + return self.tensor.shape[0] + + def __repr__(self) -> str: + return "Boxes(" + str(self.tensor) + ")" + + def inside_box(self, box_size: Tuple[int, int], boundary_threshold: int = 0) -> torch.Tensor: + """ + Args: + box_size (height, width): Size of the reference box. + boundary_threshold (int): Boxes that extend beyond the reference box + boundary by more than boundary_threshold are considered "outside". + + Returns: + a binary vector, indicating whether each box is inside the reference box. + """ + height, width = box_size + inds_inside = ( + (self.tensor[..., 0] >= -boundary_threshold) + & (self.tensor[..., 1] >= -boundary_threshold) + & (self.tensor[..., 2] < width + boundary_threshold) + & (self.tensor[..., 3] < height + boundary_threshold) + ) + return inds_inside + + def get_centers(self) -> torch.Tensor: + """ + Returns: + The box centers in a Nx2 array of (x, y). + """ + return (self.tensor[:, :2] + self.tensor[:, 2:]) / 2 + + def scale(self, scale_x: float, scale_y: float) -> None: + """ + Scale the box with horizontal and vertical scaling factors + """ + self.tensor[:, 0::2] *= scale_x + self.tensor[:, 1::2] *= scale_y + + # classmethod not supported by torchscript. TODO try staticmethod + @classmethod + @torch.jit.unused + def cat(cls, boxes_list): + """ + Concatenates a list of Boxes into a single Boxes + + Arguments: + boxes_list (list[Boxes]) + + Returns: + Boxes: the concatenated Boxes + """ + assert isinstance(boxes_list, (list, tuple)) + if len(boxes_list) == 0: + return cls(torch.empty(0)) + assert all([isinstance(box, Boxes) for box in boxes_list]) + device = boxes_list[0].tensor.device + cat_boxes = cls(torch.cat([b.tensor.cpu() for b in boxes_list], dim=0).to(device)) + return cat_boxes + + @property + def device(self) -> torch.device: + return self.tensor.device + + # type "Iterator[torch.Tensor]", yield, and iter() not supported by torchscript + # https://github.com/pytorch/pytorch/issues/18627 + @torch.jit.unused + def __iter__(self): + """ + Yield a box as a Tensor of shape (4,) at a time. + """ + yield from self.tensor + + +# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py +# with slight modifications +def pairwise_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor: + """ + Given two lists of boxes of size N and M, + compute the IoU (intersection over union) + between __all__ N x M pairs of boxes. + The box order must be (xmin, ymin, xmax, ymax). + + Args: + boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. + + Returns: + Tensor: IoU, sized [N,M]. + """ + out = torch.npu_ptiou(boxes2.tensor, boxes1.tensor) + return out + + +def matched_boxlist_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor: + """ + Compute pairwise intersection over union (IOU) of two sets of matched + boxes. The box order must be (xmin, ymin, xmax, ymax). + Similar to boxlist_iou, but computes only diagonal elements of the matrix + Arguments: + boxes1: (Boxes) bounding boxes, sized [N,4]. + boxes2: (Boxes) bounding boxes, sized [N,4]. + Returns: + (tensor) iou, sized [N]. + """ + assert len(boxes1) == len( + boxes2 + ), "boxlists should have the same" "number of entries, got {}, {}".format( + len(boxes1), len(boxes2) + ) + area1 = boxes1.area() # [N] + area2 = boxes2.area() # [N] + box1, box2 = boxes1.tensor, boxes2.tensor + lt = torch.max(box1[:, :2], box2[:, :2]) # [N,2] + rb = torch.min(box1[:, 2:], box2[:, 2:]) # [N,2] + wh = (rb - lt).clamp(min=0) # [N,2] + inter = wh[:, 0] * wh[:, 1] # [N] + iou = inter / (area1 + area2 - inter) # [N] + return iou diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/image_list.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/image_list.py new file mode 100644 index 0000000000000000000000000000000000000000..eea4edc85a799e6ac50ced80b1548dc6b59af849 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/image_list.py @@ -0,0 +1,136 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import division +from typing import Any, List, Sequence, Tuple +import torch +from torch.nn import functional as F + + +class ImageList(object): + """ + Structure that holds a list of images (of possibly + varying sizes) as a single tensor. + This works by padding the images to the same size, + and storing in a field the original sizes of each image + + Attributes: + image_sizes (list[tuple[int, int]]): each tuple is (h, w) + """ + + def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]): + """ + Arguments: + tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1 + image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can + be smaller than (H, W) due to padding. + """ + self.tensor = tensor + self.image_sizes = image_sizes + + def __len__(self) -> int: + return len(self.image_sizes) + + def __getitem__(self, idx) -> torch.Tensor: + """ + Access the individual image in its original size. + + Args: + idx: int or slice + + Returns: + Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1 + """ + size = self.image_sizes[idx] + return self.tensor[idx, ..., : size[0], : size[1]] + + @torch.jit.unused + def to(self, *args: Any, **kwargs: Any) -> "ImageList": + cast_tensor = self.tensor.to(*args, **kwargs) + return ImageList(cast_tensor, self.image_sizes) + + @property + def device(self) -> torch.device: + return self.tensor.device + + @staticmethod + # https://github.com/pytorch/pytorch/issues/39308 + @torch.jit.unused + def from_tensors( + tensors: Sequence[torch.Tensor], + size_divisibility: int = 0, + pad_value: float = 0.0, + mean = 0.0, std = 1.0 + ) -> "ImageList": + """ + Args: + tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or + (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded + to the same shape with `pad_value`. + size_divisibility (int): If `size_divisibility > 0`, add padding to ensure + the common height and width is divisible by `size_divisibility`. + This depends on the model and many models need a divisibility of 32. + pad_value (float): value to pad + + Returns: + an `ImageList`. + """ + assert len(tensors) > 0 + assert isinstance(tensors, (tuple, list)) + for t in tensors: + assert isinstance(t, torch.Tensor), type(t) + assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape + # per dimension maximum (H, W) or (C_1, ..., C_K, H, W) where K >= 1 among all tensors + max_size = ( + # In tracing mode, x.shape[i] is Tensor, and should not be converted + # to int: this will cause the traced graph to have hard-coded shapes. + # Instead we should make max_size a Tensor that depends on these tensors. + # Using torch.stack twice seems to be the best way to convert + # list[list[ScalarTensor]] to a Tensor + torch.stack( + [ + torch.stack([torch.as_tensor(dim) for dim in size]) + for size in [tuple(img.shape) for img in tensors] + ] + ) + .max(0) + .values + ) + + if size_divisibility > 1: + stride = size_divisibility + image_sizes = [tuple(im.shape[-2:]) for im in tensors] + + if len(tensors) == 0: + # This seems slightly (2%) faster. + # TODO: check whether it's faster for multiple images as well + image_size = image_sizes[0] + padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]] + if all(x == 0 for x in padding_size): # https://github.com/pytorch/pytorch/issues/31734 + batched_imgs = tensors[0].unsqueeze(0) + else: + padded = F.pad(tensors[0], padding_size, value=pad_value) + batched_imgs = padded.unsqueeze_(0) + else: + # max_size can be a tensor in tracing mode, therefore use tuple() + batch_shape = (len(tensors),) + tuple(max_size) + batched_imgs = tensors[0].new_full(batch_shape, -1.0) + for img, pad_img in zip(tensors, batched_imgs): + pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img) + mask = pad_img >= 0 + pad_img.sub_(mean) + pad_img.div_(std) + pad_img.mul_(mask.half()) + + return ImageList(batched_imgs.contiguous(), image_sizes) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/instances.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/instances.py new file mode 100644 index 0000000000000000000000000000000000000000..42bd5150ff0702e883d70ae2ec62ddaa1afabe38 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/instances.py @@ -0,0 +1,204 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +from typing import Any, Dict, List, Tuple, Union +import torch + + +class Instances: + """ + This class represents a list of instances in an image. + It stores the attributes of instances (e.g., boxes, masks, labels, scores) as "fields". + All fields must have the same ``__len__`` which is the number of instances. + + All other (non-field) attributes of this class are considered private: + they must start with '_' and are not modifiable by a user. + + Some basic usage: + + 1. Set/get/check a field: + + .. code-block:: python + + instances.gt_boxes = Boxes(...) + print(instances.pred_masks) # a tensor of shape (N, H, W) + print('gt_masks' in instances) + + 2. ``len(instances)`` returns the number of instances + 3. Indexing: ``instances[indices]`` will apply the indexing on all the fields + and returns a new :class:`Instances`. + Typically, ``indices`` is a integer vector of indices, + or a binary mask of length ``num_instances`` + """ + + def __init__(self, image_size: Tuple[int, int], **kwargs: Any): + """ + Args: + image_size (height, width): the spatial size of the image. + kwargs: fields to add to this `Instances`. + """ + self._image_size = image_size + self._fields: Dict[str, Any] = {} + for k, v in kwargs.items(): + self.set(k, v) + + @property + def image_size(self) -> Tuple[int, int]: + """ + Returns: + tuple: height, width + """ + return self._image_size + + def __setattr__(self, name: str, val: Any) -> None: + if name.startswith("_"): + super().__setattr__(name, val) + else: + self.set(name, val) + + def __getattr__(self, name: str) -> Any: + if name == "_fields" or name not in self._fields: + raise AttributeError("Cannot find field '{}' in the given Instances!".format(name)) + return self._fields[name] + + def set(self, name: str, value: Any) -> None: + """ + Set the field named `name` to `value`. + The length of `value` must be the number of instances, + and must agree with other existing fields in this object. + """ + data_len = len(value) + self._fields[name] = value + + def has(self, name: str) -> bool: + """ + Returns: + bool: whether the field called `name` exists. + """ + return name in self._fields + + def remove(self, name: str) -> None: + """ + Remove the field called `name`. + """ + del self._fields[name] + + def get(self, name: str) -> Any: + """ + Returns the field called `name`. + """ + return self._fields[name] + + def get_fields(self) -> Dict[str, Any]: + """ + Returns: + dict: a dict which maps names (str) to data of the fields + + Modifying the returned dict will modify this instance. + """ + return self._fields + + # Tensor-like methods + def to(self, *args: Any, **kwargs: Any) -> "Instances": + """ + Returns: + Instances: all fields are called with a `to(device)`, if the field has this method. + """ + ret = Instances(self._image_size) + for k, v in self._fields.items(): + if hasattr(v, "to"): + v = v.to(*args, **kwargs) + ret.set(k, v) + return ret + + def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Instances": + """ + Args: + item: an index-like object and will be used to index all the fields. + + Returns: + If `item` is a string, return the data in the corresponding field. + Otherwise, returns an `Instances` where all fields are indexed by `item`. + """ + if type(item) == int: + if item >= len(self) or item < -len(self): + raise IndexError("Instances index out of range!") + else: + item = slice(item, None, len(self)) + + ret = Instances(self._image_size) + for k, v in self._fields.items(): + ret.set(k, v[item]) + return ret + + def __len__(self) -> int: + for v in self._fields.values(): + return len(v) + raise NotImplementedError("Empty Instances does not support __len__!") + + def __iter__(self): + raise NotImplementedError("`Instances` object is not iterable!") + + @staticmethod + def cat(instance_lists: List["Instances"]) -> "Instances": + """ + Args: + instance_lists (list[Instances]) + + Returns: + Instances + """ + assert all(isinstance(i, Instances) for i in instance_lists) + assert len(instance_lists) > 0 + if len(instance_lists) == 1: + return instance_lists[0] + + image_size = instance_lists[0].image_size + for i in instance_lists[1:]: + assert i.image_size == image_size + ret = Instances(image_size) + for k in instance_lists[0]._fields.keys(): + values = [i.get(k) for i in instance_lists] + v0 = values[0] + + if isinstance(v0, torch.Tensor): + #edit by zsc + #values = torch.cat(values,dim=0) + device = v0.device + if values[1].dtype != values[0].dtype: + torch_type = values[0].dtype + values = torch.cat([value.type(torch_type).cpu() for value in values], dim=0) + else: + values = torch.cat([value.cpu() for value in values], dim=0) + values = values.to(device) + ##ends + elif isinstance(v0, list): + values = list(itertools.chain(*values)) + elif hasattr(type(v0), "cat"): + values = type(v0).cat(values) + else: + raise ValueError("Unsupported type {} for concatenation".format(type(v0))) + ret.set(k, values) + return ret + + def __str__(self) -> str: + s = self.__class__.__name__ + "(" + s += "num_instances={}, ".format(len(self)) + s += "image_height={}, ".format(self._image_size[0]) + s += "image_width={}, ".format(self._image_size[1]) + s += "fields=[{}])".format(", ".join((f"{k}: {v}" for k, v in self._fields.items()))) + return s + + __repr__ = __str__ diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/keypoints.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/keypoints.py new file mode 100644 index 0000000000000000000000000000000000000000..1e52829b3a939e2ea741397795aa6b3241002b28 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/keypoints.py @@ -0,0 +1,225 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +from typing import Any, List, Tuple, Union +import torch + +from detectron2.layers import interpolate + + +class Keypoints: + """ + Stores keypoint **annotation** data. GT Instances have a `gt_keypoints` property + containing the x,y location and visibility flag of each keypoint. This tensor has shape + (N, K, 3) where N is the number of instances and K is the number of keypoints per instance. + + The visibility flag follows the COCO format and must be one of three integers: + * v=0: not labeled (in which case x=y=0) + * v=1: labeled but not visible + * v=2: labeled and visible + """ + + def __init__(self, keypoints: Union[torch.Tensor, np.ndarray, List[List[float]]]): + """ + Arguments: + keypoints: A Tensor, numpy array, or list of the x, y, and visibility of each keypoint. + The shape should be (N, K, 3) where N is the number of + instances, and K is the number of keypoints per instance. + """ + device = keypoints.device if isinstance(keypoints, torch.Tensor) else torch.device("cpu") + keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device) + assert keypoints.dim() == 3 and keypoints.shape[2] == 3, keypoints.shape + self.tensor = keypoints + + def __len__(self) -> int: + return self.tensor.size(0) + + def to(self, *args: Any, **kwargs: Any) -> "Keypoints": + return type(self)(self.tensor.to(*args, **kwargs)) + + @property + def device(self) -> torch.device: + return self.tensor.device + + def to_heatmap(self, boxes: torch.Tensor, heatmap_size: int) -> torch.Tensor: + """ + Convert keypoint annotations to a heatmap of one-hot labels for training, + as described in :paper:`Mask R-CNN`. + + Arguments: + boxes: Nx4 tensor, the boxes to draw the keypoints to + + Returns: + heatmaps: + A tensor of shape (N, K), each element is integer spatial label + in the range [0, heatmap_size**2 - 1] for each keypoint in the input. + valid: + A tensor of shape (N, K) containing whether each keypoint is in the roi or not. + """ + return _keypoints_to_heatmap(self.tensor, boxes, heatmap_size) + + def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Keypoints": + """ + Create a new `Keypoints` by indexing on this `Keypoints`. + + The following usage are allowed: + + 1. `new_kpts = kpts[3]`: return a `Keypoints` which contains only one instance. + 2. `new_kpts = kpts[2:10]`: return a slice of key points. + 3. `new_kpts = kpts[vector]`, where vector is a torch.ByteTensor + with `length = len(kpts)`. Nonzero elements in the vector will be selected. + + Note that the returned Keypoints might share storage with this Keypoints, + subject to Pytorch's indexing semantics. + """ + if isinstance(item, int): + return Keypoints([self.tensor[item]]) + return Keypoints(self.tensor[item]) + + def __repr__(self) -> str: + s = self.__class__.__name__ + "(" + s += "num_instances={})".format(len(self.tensor)) + return s + + +# TODO make this nicer, this is a direct translation from C2 (but removing the inner loop) +def _keypoints_to_heatmap( + keypoints: torch.Tensor, rois: torch.Tensor, heatmap_size: int +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Encode keypoint locations into a target heatmap for use in SoftmaxWithLoss across space. + + Maps keypoints from the half-open interval [x1, x2) on continuous image coordinates to the + closed interval [0, heatmap_size - 1] on discrete image coordinates. We use the + continuous-discrete conversion from Heckbert 1990 ("What is the coordinate of a pixel?"): + d = floor(c) and c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate. + + Arguments: + keypoints: tensor of keypoint locations in of shape (N, K, 3). + rois: Nx4 tensor of rois in xyxy format + heatmap_size: integer side length of square heatmap. + + Returns: + heatmaps: A tensor of shape (N, K) containing an integer spatial label + in the range [0, heatmap_size**2 - 1] for each keypoint in the input. + valid: A tensor of shape (N, K) containing whether each keypoint is in + the roi or not. + """ + + if rois.numel() == 0: + return rois.new().long(), rois.new().long() + offset_x = rois[:, 0] + offset_y = rois[:, 1] + scale_x = heatmap_size / (rois[:, 2] - rois[:, 0]) + scale_y = heatmap_size / (rois[:, 3] - rois[:, 1]) + + offset_x = offset_x[:, None] + offset_y = offset_y[:, None] + scale_x = scale_x[:, None] + scale_y = scale_y[:, None] + + x = keypoints[..., 0] + y = keypoints[..., 1] + + x_boundary_inds = x == rois[:, 2][:, None] + y_boundary_inds = y == rois[:, 3][:, None] + + x = (x - offset_x) * scale_x + x = x.floor().long() + y = (y - offset_y) * scale_y + y = y.floor().long() + + x[x_boundary_inds] = heatmap_size - 1 + y[y_boundary_inds] = heatmap_size - 1 + + valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size) + vis = keypoints[..., 2] > 0 + valid = (valid_loc & vis).long() + + lin_ind = y * heatmap_size + x + heatmaps = lin_ind * valid + + return heatmaps, valid + + +@torch.no_grad() +def heatmaps_to_keypoints(maps: torch.Tensor, rois: torch.Tensor) -> torch.Tensor: + """ + Extract predicted keypoint locations from heatmaps. + + Args: + maps (Tensor): (#ROIs, #keypoints, POOL_H, POOL_W). The predicted heatmap of logits for + each ROI and each keypoint. + rois (Tensor): (#ROIs, 4). The box of each ROI. + + Returns: + Tensor of shape (#ROIs, #keypoints, 4) with the last dimension corresponding to + (x, y, logit, score) for each keypoint. + + When converting discrete pixel indices in an NxN image to a continuous keypoint coordinate, + we maintain consistency with :meth:`Keypoints.to_heatmap` by using the conversion from + Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate. + """ + offset_x = rois[:, 0] + offset_y = rois[:, 1] + + widths = (rois[:, 2] - rois[:, 0]).clamp(min=1) + heights = (rois[:, 3] - rois[:, 1]).clamp(min=1) + widths_ceil = widths.ceil() + heights_ceil = heights.ceil() + + num_rois, num_keypoints = maps.shape[:2] + xy_preds = maps.new_zeros(rois.shape[0], num_keypoints, 4) + + width_corrections = widths / widths_ceil + height_corrections = heights / heights_ceil + + keypoints_idx = torch.arange(num_keypoints, device=maps.device) + + for i in range(num_rois): + outsize = (int(heights_ceil[i]), int(widths_ceil[i])) + roi_map = interpolate(maps[[i]], size=outsize, mode="bicubic", align_corners=False).squeeze( + 0 + ) # #keypoints x H x W + + # softmax over the spatial region + max_score, _ = roi_map.view(num_keypoints, -1).max(1) + max_score = max_score.view(num_keypoints, 1, 1) + tmp_full_resolution = (roi_map - max_score).exp_() + tmp_pool_resolution = (maps[i] - max_score).exp_() + # Produce scores over the region H x W, but normalize with POOL_H x POOL_W, + # so that the scores of objects of different absolute sizes will be more comparable + roi_map_scores = tmp_full_resolution / tmp_pool_resolution.sum((1, 2), keepdim=True) + + w = roi_map.shape[2] + pos = roi_map.view(num_keypoints, -1).argmax(1) + + x_int = pos % w + y_int = (pos - x_int) // w + + assert ( + roi_map_scores[keypoints_idx, y_int, x_int] + == roi_map_scores.view(num_keypoints, -1).max(1)[0] + ).all() + + x = (x_int.float() + 0.5) * width_corrections[i] + y = (y_int.float() + 0.5) * height_corrections[i] + + xy_preds[i, :, 0] = x + offset_x[i] + xy_preds[i, :, 1] = y + offset_y[i] + xy_preds[i, :, 2] = roi_map[keypoints_idx, y_int, x_int] + xy_preds[i, :, 3] = roi_map_scores[keypoints_idx, y_int, x_int] + + return xy_preds diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/masks.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/masks.py new file mode 100644 index 0000000000000000000000000000000000000000..3933471d084b42fc2e0e055d7452d1660e7dc0f1 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/masks.py @@ -0,0 +1,442 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import itertools +import numpy as np +from typing import Any, Iterator, List, Union +import pycocotools.mask as mask_util +import torch + +from detectron2.layers.roi_align import ROIAlign + +from .boxes import Boxes + + +def polygon_area(x, y): + # Using the shoelace formula + # https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates + return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) + + +def polygons_to_bitmask(polygons: List[np.ndarray], height: int, width: int) -> np.ndarray: + """ + Args: + polygons (list[ndarray]): each array has shape (Nx2,) + height, width (int) + + Returns: + ndarray: a bool mask of shape (height, width) + """ + assert len(polygons) > 0, "COCOAPI does not support empty polygons" + rles = mask_util.frPyObjects(polygons, height, width) + rle = mask_util.merge(rles) + return mask_util.decode(rle).astype(np.bool) + + +def rasterize_polygons_within_box( + polygons: List[np.ndarray], box: np.ndarray, mask_size: int +) -> torch.Tensor: + """ + Rasterize the polygons into a mask image and + crop the mask content in the given box. + The cropped mask is resized to (mask_size, mask_size). + + This function is used when generating training targets for mask head in Mask R-CNN. + Given original ground-truth masks for an image, new ground-truth mask + training targets in the size of `mask_size x mask_size` + must be provided for each predicted box. This function will be called to + produce such targets. + + Args: + polygons (list[ndarray[float]]): a list of polygons, which represents an instance. + box: 4-element numpy array + mask_size (int): + + Returns: + Tensor: BoolTensor of shape (mask_size, mask_size) + """ + # 1. Shift the polygons w.r.t the boxes + w, h = box[2] - box[0], box[3] - box[1] + + polygons = copy.deepcopy(polygons) + for p in polygons: + p[0::2] = p[0::2] - box[0] + p[1::2] = p[1::2] - box[1] + + # 2. Rescale the polygons to the new box size + # max() to avoid division by small number + ratio_h = mask_size / max(h, 0.1) + ratio_w = mask_size / max(w, 0.1) + + if ratio_h == ratio_w: + for p in polygons: + p *= ratio_h + else: + for p in polygons: + p[0::2] *= ratio_w + p[1::2] *= ratio_h + + # 3. Rasterize the polygons with coco api + mask = polygons_to_bitmask(polygons, mask_size, mask_size) + mask = torch.from_numpy(mask) + return mask + + +class BitMasks: + """ + This class stores the segmentation masks for all objects in one image, in + the form of bitmaps. + + Attributes: + tensor: bool Tensor of N,H,W, representing N instances in the image. + """ + + def __init__(self, tensor: Union[torch.Tensor, np.ndarray]): + """ + Args: + tensor: bool Tensor of N,H,W, representing N instances in the image. + """ + device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") + tensor = torch.as_tensor(tensor, dtype=torch.bool, device=device) + assert tensor.dim() == 3, tensor.size() + self.image_size = tensor.shape[1:] + self.tensor = tensor + + def to(self, *args: Any, **kwargs: Any) -> "BitMasks": + return BitMasks(self.tensor.to(*args, **kwargs)) + + @property + def device(self) -> torch.device: + return self.tensor.device + + def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks": + """ + Returns: + BitMasks: Create a new :class:`BitMasks` by indexing. + + The following usage are allowed: + + 1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask. + 2. `new_masks = masks[2:10]`: return a slice of masks. + 3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor + with `length = len(masks)`. Nonzero elements in the vector will be selected. + + Note that the returned object might share storage with this object, + subject to Pytorch's indexing semantics. + """ + if isinstance(item, int): + return BitMasks(self.tensor[item].view(1, -1)) + + if item.dtype == torch.int32: + m = self.tensor[item.long()] + else: + m = self.tensor[item] + + assert m.dim() == 3, "Indexing on BitMasks with {} returns a tensor with shape {}!".format( + item, m.shape + ) + return BitMasks(m) + + def __iter__(self) -> torch.Tensor: + yield from self.tensor + + def __repr__(self) -> str: + s = self.__class__.__name__ + "(" + s += "num_instances={})".format(len(self.tensor)) + return s + + def __len__(self) -> int: + return self.tensor.shape[0] + + def nonempty(self) -> torch.Tensor: + """ + Find masks that are non-empty. + + Returns: + Tensor: a BoolTensor which represents + whether each mask is empty (False) or non-empty (True). + """ + return self.tensor.flatten(1).any(dim=1) + + @staticmethod + def from_polygon_masks( + polygon_masks: Union["PolygonMasks", List[List[np.ndarray]]], height: int, width: int + ) -> "BitMasks": + """ + Args: + polygon_masks (list[list[ndarray]] or PolygonMasks) + height, width (int) + """ + if isinstance(polygon_masks, PolygonMasks): + polygon_masks = polygon_masks.polygons + masks = [polygons_to_bitmask(p, height, width) for p in polygon_masks] + return BitMasks(torch.stack([torch.from_numpy(x) for x in masks])) + + def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: + """ + Crop each bitmask by the given box, and resize results to (mask_size, mask_size). + This can be used to prepare training targets for Mask R-CNN. + It has less reconstruction error compared to rasterization with polygons. + However we observe no difference in accuracy, + but BitMasks requires more memory to store all the masks. + + Args: + boxes (Tensor): Nx4 tensor storing the boxes for each mask + mask_size (int): the size of the rasterized mask. + + Returns: + Tensor: + A bool tensor of shape (N, mask_size, mask_size), where + N is the number of predicted boxes for this image. + """ + assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) + device = self.tensor.device + + batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[:, None] + rois = torch.cat([batch_inds, boxes], dim=1) + bit_masks = self.tensor.to(dtype=torch.float32) + rois = rois.to(device=device) + + output = ( + ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True) + .forward(bit_masks[:, None, :, :], rois) + .squeeze(1) + ) + output = output >= 0.5 + return output + + def get_bounding_boxes(self) -> None: + # not needed now + raise NotImplementedError + + @staticmethod + def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks": + """ + Concatenates a list of BitMasks into a single BitMasks + + Arguments: + bitmasks_list (list[BitMasks]) + + Returns: + BitMasks: the concatenated BitMasks + """ + assert isinstance(bitmasks_list, (list, tuple)) + assert len(bitmasks_list) > 0 + assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list) + + cat_bitmasks = type(bitmasks_list[0])(torch.cat([bm.tensor for bm in bitmasks_list], dim=0)) + return cat_bitmasks + + +class PolygonMasks: + """ + This class stores the segmentation masks for all objects in one image, in the form of polygons. + + Attributes: + polygons: list[list[ndarray]]. Each ndarray is a float64 vector representing a polygon. + """ + + def __init__(self, polygons: List[List[Union[torch.Tensor, np.ndarray]]]): + """ + Arguments: + polygons (list[list[np.ndarray]]): The first + level of the list correspond to individual instances, + the second level to all the polygons that compose the + instance, and the third level to the polygon coordinates. + The third level array should have the format of + [x0, y0, x1, y1, ..., xn, yn] (n >= 3). + """ + assert isinstance(polygons, list), ( + "Cannot create PolygonMasks: Expect a list of list of polygons per image. " + "Got '{}' instead.".format(type(polygons)) + ) + + def _make_array(t: Union[torch.Tensor, np.ndarray]) -> np.ndarray: + # Use float64 for higher precision, because why not? + # Always put polygons on CPU (self.to is a no-op) since they + # are supposed to be small tensors. + # May need to change this assumption if GPU placement becomes useful + if isinstance(t, torch.Tensor): + t = t.cpu().numpy() + return np.asarray(t).astype("float64") + + def process_polygons( + polygons_per_instance: List[Union[torch.Tensor, np.ndarray]] + ) -> List[np.ndarray]: + assert isinstance(polygons_per_instance, list), ( + "Cannot create polygons: Expect a list of polygons per instance. " + "Got '{}' instead.".format(type(polygons_per_instance)) + ) + # transform the polygon to a tensor + polygons_per_instance = [_make_array(p) for p in polygons_per_instance] + for polygon in polygons_per_instance: + assert len(polygon) % 2 == 0 and len(polygon) >= 6 + return polygons_per_instance + + self.polygons: List[List[np.ndarray]] = [ + process_polygons(polygons_per_instance) for polygons_per_instance in polygons + ] + + def to(self, *args: Any, **kwargs: Any) -> "PolygonMasks": + return self + + @property + def device(self) -> torch.device: + return torch.device("cpu") + + def get_bounding_boxes(self) -> Boxes: + """ + Returns: + Boxes: tight bounding boxes around polygon masks. + """ + boxes = torch.zeros(len(self.polygons), 4, dtype=torch.float32) + for idx, polygons_per_instance in enumerate(self.polygons): + minxy = torch.as_tensor([float("inf"), float("inf")], dtype=torch.float32) + maxxy = torch.zeros(2, dtype=torch.float32) + for polygon in polygons_per_instance: + coords = torch.from_numpy(polygon).view(-1, 2).to(dtype=torch.float32) + minxy = torch.min(minxy, torch.min(coords, dim=0).values) + maxxy = torch.max(maxxy, torch.max(coords, dim=0).values) + boxes[idx, :2] = minxy + boxes[idx, 2:] = maxxy + return Boxes(boxes) + + def nonempty(self) -> torch.Tensor: + """ + Find masks that are non-empty. + + Returns: + Tensor: + a BoolTensor which represents whether each mask is empty (False) or not (True). + """ + keep = [1 if len(polygon) > 0 else 0 for polygon in self.polygons] + return torch.from_numpy(np.asarray(keep, dtype=np.bool)) + + def __getitem__(self, item: Union[int, slice, List[int], torch.BoolTensor]) -> "PolygonMasks": + """ + Support indexing over the instances and return a `PolygonMasks` object. + `item` can be: + + 1. An integer. It will return an object with only one instance. + 2. A slice. It will return an object with the selected instances. + 3. A list[int]. It will return an object with the selected instances, + correpsonding to the indices in the list. + 4. A vector mask of type BoolTensor, whose length is num_instances. + It will return an object with the instances whose mask is nonzero. + """ + if isinstance(item, int): + selected_polygons = [self.polygons[item]] + elif isinstance(item, slice): + selected_polygons = self.polygons[item] + elif isinstance(item, list): + selected_polygons = [self.polygons[i] for i in item] + elif isinstance(item, torch.Tensor): + # Polygons is a list, so we have to move the indices back to CPU. + if item.dtype == torch.bool: + assert item.dim() == 1, item.shape + item = item.nonzero().squeeze(1).cpu().numpy().tolist() + elif item.dtype in [torch.int32, torch.int64]: + item = item.cpu().numpy().tolist() + else: + raise ValueError("Unsupported tensor dtype={} for indexing!".format(item.dtype)) + selected_polygons = [self.polygons[i] for i in item] + return PolygonMasks(selected_polygons) + + def __iter__(self) -> Iterator[List[np.ndarray]]: + """ + Yields: + list[ndarray]: the polygons for one instance. + Each Tensor is a float64 vector representing a polygon. + """ + return iter(self.polygons) + + def __repr__(self) -> str: + s = self.__class__.__name__ + "(" + s += "num_instances={})".format(len(self.polygons)) + return s + + def __len__(self) -> int: + return len(self.polygons) + + def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: + """ + Crop each mask by the given box, and resize results to (mask_size, mask_size). + This can be used to prepare training targets for Mask R-CNN. + + Args: + boxes (Tensor): Nx4 tensor storing the boxes for each mask + mask_size (int): the size of the rasterized mask. + + Returns: + Tensor: A bool tensor of shape (N, mask_size, mask_size), where + N is the number of predicted boxes for this image. + """ + assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) + + device = boxes.device + # Put boxes on the CPU, as the polygon representation is not efficient GPU-wise + # (several small tensors for representing a single instance mask) + boxes = boxes.to(torch.device("cpu")) + + results = [ + rasterize_polygons_within_box(poly, box.numpy(), mask_size) + for poly, box in zip(self.polygons, boxes) + ] + """ + poly: list[list[float]], the polygons for one instance + box: a tensor of shape (4,) + """ + if len(results) == 0: + return torch.empty(0, mask_size, mask_size, dtype=torch.bool, device=device) + return torch.stack(results, dim=0).to(device=device) + + def area(self): + """ + Computes area of the mask. + Only works with Polygons, using the shoelace formula: + https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates + + Returns: + Tensor: a vector, area for each instance + """ + + area = [] + for polygons_per_instance in self.polygons: + area_per_instance = 0 + for p in polygons_per_instance: + area_per_instance += polygon_area(p[0::2], p[1::2]) + area.append(area_per_instance) + + return torch.tensor(area) + + @staticmethod + def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks": + """ + Concatenates a list of PolygonMasks into a single PolygonMasks + + Arguments: + polymasks_list (list[PolygonMasks]) + + Returns: + PolygonMasks: the concatenated PolygonMasks + """ + assert isinstance(polymasks_list, (list, tuple)) + assert len(polymasks_list) > 0 + assert all(isinstance(polymask, PolygonMasks) for polymask in polymasks_list) + + cat_polymasks = type(polymasks_list[0])( + list(itertools.chain.from_iterable(pm.polygons for pm in polymasks_list)) + ) + return cat_polymasks diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/rotated_boxes.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/rotated_boxes.py new file mode 100644 index 0000000000000000000000000000000000000000..775b2f3606eceff206112942ba9eb964155d5228 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/structures/rotated_boxes.py @@ -0,0 +1,494 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from typing import Any, Iterator, Tuple, Union +import torch + +from detectron2.layers.rotated_boxes import pairwise_iou_rotated + +from .boxes import Boxes + + +class RotatedBoxes(Boxes): + """ + This structure stores a list of rotated boxes as a Nx5 torch.Tensor. + It supports some common methods about boxes + (`area`, `clip`, `nonempty`, etc), + and also behaves like a Tensor + (support indexing, `to(device)`, `.device`, and iteration over all boxes) + """ + + def __init__(self, tensor: torch.Tensor): + """ + Args: + tensor (Tensor[float]): a Nx5 matrix. Each row is + (x_center, y_center, width, height, angle), + in which angle is represented in degrees. + While there's no strict range restriction for it, + the recommended principal range is between [-180, 180) degrees. + + Assume we have a horizontal box B = (x_center, y_center, width, height), + where width is along the x-axis and height is along the y-axis. + The rotated box B_rot (x_center, y_center, width, height, angle) + can be seen as: + + 1. When angle == 0: + B_rot == B + 2. When angle > 0: + B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CCW; + 3. When angle < 0: + B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CW. + + Mathematically, since the right-handed coordinate system for image space + is (y, x), where y is top->down and x is left->right, the 4 vertices of the + rotated rectangle :math:`(yr_i, xr_i)` (i = 1, 2, 3, 4) can be obtained from + the vertices of the horizontal rectangle (y_i, x_i) (i = 1, 2, 3, 4) + in the following way (:math:`\\theta = angle*\\pi/180` is the angle in radians, + (y_c, x_c) is the center of the rectangle): + + .. math:: + + yr_i = \\cos(\\theta) (y_i - y_c) - \\sin(\\theta) (x_i - x_c) + y_c, + + xr_i = \\sin(\\theta) (y_i - y_c) + \\cos(\\theta) (x_i - x_c) + x_c, + + which is the standard rigid-body rotation transformation. + + Intuitively, the angle is + (1) the rotation angle from y-axis in image space + to the height vector (top->down in the box's local coordinate system) + of the box in CCW, and + (2) the rotation angle from x-axis in image space + to the width vector (left->right in the box's local coordinate system) + of the box in CCW. + + More intuitively, consider the following horizontal box ABCD represented + in (x1, y1, x2, y2): (3, 2, 7, 4), + covering the [3, 7] x [2, 4] region of the continuous coordinate system + which looks like this: + + .. code:: none + + O--------> x + | + | A---B + | | | + | D---C + | + v y + + Note that each capital letter represents one 0-dimensional geometric point + instead of a 'square pixel' here. + + In the example above, using (x, y) to represent a point we have: + + .. math:: + + O = (0, 0), A = (3, 2), B = (7, 2), C = (7, 4), D = (3, 4) + + We name vector AB = vector DC as the width vector in box's local coordinate system, and + vector AD = vector BC as the height vector in box's local coordinate system. Initially, + when angle = 0 degree, they're aligned with the positive directions of x-axis and y-axis + in the image space, respectively. + + For better illustration, we denote the center of the box as E, + + .. code:: none + + O--------> x + | + | A---B + | | E | + | D---C + | + v y + + where the center E = ((3+7)/2, (2+4)/2) = (5, 3). + + Also, + + .. math:: + + width = |AB| = |CD| = 7 - 3 = 4, + height = |AD| = |BC| = 4 - 2 = 2. + + Therefore, the corresponding representation for the same shape in rotated box in + (x_center, y_center, width, height, angle) format is: + + (5, 3, 4, 2, 0), + + Now, let's consider (5, 3, 4, 2, 90), which is rotated by 90 degrees + CCW (counter-clockwise) by definition. It looks like this: + + .. code:: none + + O--------> x + | B-C + | | | + | |E| + | | | + | A-D + v y + + The center E is still located at the same point (5, 3), while the vertices + ABCD are rotated by 90 degrees CCW with regard to E: + A = (4, 5), B = (4, 1), C = (6, 1), D = (6, 5) + + Here, 90 degrees can be seen as the CCW angle to rotate from y-axis to + vector AD or vector BC (the top->down height vector in box's local coordinate system), + or the CCW angle to rotate from x-axis to vector AB or vector DC (the left->right + width vector in box's local coordinate system). + + .. math:: + + width = |AB| = |CD| = 5 - 1 = 4, + height = |AD| = |BC| = 6 - 4 = 2. + + Next, how about (5, 3, 4, 2, -90), which is rotated by 90 degrees CW (clockwise) + by definition? It looks like this: + + .. code:: none + + O--------> x + | D-A + | | | + | |E| + | | | + | C-B + v y + + The center E is still located at the same point (5, 3), while the vertices + ABCD are rotated by 90 degrees CW with regard to E: + A = (6, 1), B = (6, 5), C = (4, 5), D = (4, 1) + + .. math:: + + width = |AB| = |CD| = 5 - 1 = 4, + height = |AD| = |BC| = 6 - 4 = 2. + + This covers exactly the same region as (5, 3, 4, 2, 90) does, and their IoU + will be 1. However, these two will generate different RoI Pooling results and + should not be treated as an identical box. + + On the other hand, it's easy to see that (X, Y, W, H, A) is identical to + (X, Y, W, H, A+360N), for any integer N. For example (5, 3, 4, 2, 270) would be + identical to (5, 3, 4, 2, -90), because rotating the shape 270 degrees CCW is + equivalent to rotating the same shape 90 degrees CW. + + We could rotate further to get (5, 3, 4, 2, 180), or (5, 3, 4, 2, -180): + + .. code:: none + + O--------> x + | + | C---D + | | E | + | B---A + | + v y + + .. math:: + + A = (7, 4), B = (3, 4), C = (3, 2), D = (7, 2), + + width = |AB| = |CD| = 7 - 3 = 4, + height = |AD| = |BC| = 4 - 2 = 2. + + Finally, this is a very inaccurate (heavily quantized) illustration of + how (5, 3, 4, 2, 60) looks like in case anyone wonders: + + .. code:: none + + O--------> x + | B\ + | / C + | /E / + | A / + | `D + v y + + It's still a rectangle with center of (5, 3), width of 4 and height of 2, + but its angle (and thus orientation) is somewhere between + (5, 3, 4, 2, 0) and (5, 3, 4, 2, 90). + """ + device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") + tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device) + if tensor.numel() == 0: + # Use reshape, so we don't end up creating a new tensor that does not depend on + # the inputs (and consequently confuses jit) + tensor = tensor.reshape((0, 5)).to(dtype=torch.float32, device=device) + assert tensor.dim() == 2 and tensor.size(-1) == 5, tensor.size() + + self.tensor = tensor + + def clone(self) -> "RotatedBoxes": + """ + Clone the RotatedBoxes. + + Returns: + RotatedBoxes + """ + return RotatedBoxes(self.tensor.clone()) + + def to(self, *args: Any, **kwargs: Any) -> "RotatedBoxes": + return RotatedBoxes(self.tensor.to(*args, **kwargs)) + + def area(self) -> torch.Tensor: + """ + Computes the area of all the boxes. + + Returns: + torch.Tensor: a vector with areas of each box. + """ + box = self.tensor + area = box[:, 2] * box[:, 3] + return area + + def normalize_angles(self) -> None: + """ + Restrict angles to the range of [-180, 180) degrees + """ + self.tensor[:, 4] = (self.tensor[:, 4] + 180.0) % 360.0 - 180.0 + + def clip(self, box_size: Tuple[int, int], clip_angle_threshold: float = 1.0) -> None: + """ + Clip (in place) the boxes by limiting x coordinates to the range [0, width] + and y coordinates to the range [0, height]. + + For RRPN: + Only clip boxes that are almost horizontal with a tolerance of + clip_angle_threshold to maintain backward compatibility. + + Rotated boxes beyond this threshold are not clipped for two reasons: + + 1. There are potentially multiple ways to clip a rotated box to make it + fit within the image. + 2. It's tricky to make the entire rectangular box fit within the image + and still be able to not leave out pixels of interest. + + Therefore we rely on ops like RoIAlignRotated to safely handle this. + + Args: + box_size (height, width): The clipping box's size. + clip_angle_threshold: + Iff. abs(normalized(angle)) <= clip_angle_threshold (in degrees), + we do the clipping as horizontal boxes. + """ + h, w = box_size + + # normalize angles to be within (-180, 180] degrees + self.normalize_angles() + + idx = torch.where(torch.abs(self.tensor[:, 4]) <= clip_angle_threshold)[0] + + # convert to (x1, y1, x2, y2) + x1 = self.tensor[idx, 0] - self.tensor[idx, 2] / 2.0 + y1 = self.tensor[idx, 1] - self.tensor[idx, 3] / 2.0 + x2 = self.tensor[idx, 0] + self.tensor[idx, 2] / 2.0 + y2 = self.tensor[idx, 1] + self.tensor[idx, 3] / 2.0 + + # clip + x1.clamp_(min=0, max=w) + y1.clamp_(min=0, max=h) + x2.clamp_(min=0, max=w) + y2.clamp_(min=0, max=h) + + # convert back to (xc, yc, w, h) + self.tensor[idx, 0] = (x1 + x2) / 2.0 + self.tensor[idx, 1] = (y1 + y2) / 2.0 + # make sure widths and heights do not increase due to numerical errors + self.tensor[idx, 2] = torch.min(self.tensor[idx, 2], x2 - x1) + self.tensor[idx, 3] = torch.min(self.tensor[idx, 3], y2 - y1) + + def nonempty(self, threshold: float = 0.0) -> torch.Tensor: + """ + Find boxes that are non-empty. + A box is considered empty, if either of its side is no larger than threshold. + + Returns: + Tensor: a binary vector which represents + whether each box is empty (False) or non-empty (True). + """ + box = self.tensor + widths = box[:, 2] + heights = box[:, 3] + keep = (widths > threshold) & (heights > threshold) + return keep + + def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "RotatedBoxes": + """ + Returns: + RotatedBoxes: Create a new :class:`RotatedBoxes` by indexing. + + The following usage are allowed: + + 1. `new_boxes = boxes[3]`: return a `RotatedBoxes` which contains only one box. + 2. `new_boxes = boxes[2:10]`: return a slice of boxes. + 3. `new_boxes = boxes[vector]`, where vector is a torch.ByteTensor + with `length = len(boxes)`. Nonzero elements in the vector will be selected. + + Note that the returned RotatedBoxes might share storage with this RotatedBoxes, + subject to Pytorch's indexing semantics. + """ + if isinstance(item, int): + return RotatedBoxes(self.tensor[item].view(1, -1)) + b = self.tensor[item] + assert b.dim() == 2, "Indexing on RotatedBoxes with {} failed to return a matrix!".format( + item + ) + return RotatedBoxes(b) + + def __len__(self) -> int: + return self.tensor.shape[0] + + def __repr__(self) -> str: + return "RotatedBoxes(" + str(self.tensor) + ")" + + def inside_box(self, box_size: Tuple[int, int], boundary_threshold: int = 0) -> torch.Tensor: + """ + Args: + box_size (height, width): Size of the reference box covering + [0, width] x [0, height] + boundary_threshold (int): Boxes that extend beyond the reference box + boundary by more than boundary_threshold are considered "outside". + + For RRPN, it might not be necessary to call this function since it's common + for rotated box to extend to outside of the image boundaries + (the clip function only clips the near-horizontal boxes) + + Returns: + a binary vector, indicating whether each box is inside the reference box. + """ + height, width = box_size + + cnt_x = self.tensor[..., 0] + cnt_y = self.tensor[..., 1] + half_w = self.tensor[..., 2] / 2.0 + half_h = self.tensor[..., 3] / 2.0 + a = self.tensor[..., 4] + c = torch.abs(torch.cos(a * math.pi / 180.0)) + s = torch.abs(torch.sin(a * math.pi / 180.0)) + # This basically computes the horizontal bounding rectangle of the rotated box + max_rect_dx = c * half_w + s * half_h + max_rect_dy = c * half_h + s * half_w + + inds_inside = ( + (cnt_x - max_rect_dx >= -boundary_threshold) + & (cnt_y - max_rect_dy >= -boundary_threshold) + & (cnt_x + max_rect_dx < width + boundary_threshold) + & (cnt_y + max_rect_dy < height + boundary_threshold) + ) + + return inds_inside + + def get_centers(self) -> torch.Tensor: + """ + Returns: + The box centers in a Nx2 array of (x, y). + """ + return self.tensor[:, :2] + + def scale(self, scale_x: float, scale_y: float) -> None: + """ + Scale the rotated box with horizontal and vertical scaling factors + Note: when scale_factor_x != scale_factor_y, + the rotated box does not preserve the rectangular shape when the angle + is not a multiple of 90 degrees under resize transformation. + Instead, the shape is a parallelogram (that has skew) + Here we make an approximation by fitting a rotated rectangle to the parallelogram. + """ + self.tensor[:, 0] *= scale_x + self.tensor[:, 1] *= scale_y + theta = self.tensor[:, 4] * math.pi / 180.0 + c = torch.cos(theta) + s = torch.sin(theta) + + # In image space, y is top->down and x is left->right + # Consider the local coordintate system for the rotated box, + # where the box center is located at (0, 0), and the four vertices ABCD are + # A(-w / 2, -h / 2), B(w / 2, -h / 2), C(w / 2, h / 2), D(-w / 2, h / 2) + # the midpoint of the left edge AD of the rotated box E is: + # E = (A+D)/2 = (-w / 2, 0) + # the midpoint of the top edge AB of the rotated box F is: + # F(0, -h / 2) + # To get the old coordinates in the global system, apply the rotation transformation + # (Note: the right-handed coordinate system for image space is yOx): + # (old_x, old_y) = (s * y + c * x, c * y - s * x) + # E(old) = (s * 0 + c * (-w/2), c * 0 - s * (-w/2)) = (-c * w / 2, s * w / 2) + # F(old) = (s * (-h / 2) + c * 0, c * (-h / 2) - s * 0) = (-s * h / 2, -c * h / 2) + # After applying the scaling factor (sfx, sfy): + # E(new) = (-sfx * c * w / 2, sfy * s * w / 2) + # F(new) = (-sfx * s * h / 2, -sfy * c * h / 2) + # The new width after scaling tranformation becomes: + + # w(new) = |E(new) - O| * 2 + # = sqrt[(sfx * c * w / 2)^2 + (sfy * s * w / 2)^2] * 2 + # = sqrt[(sfx * c)^2 + (sfy * s)^2] * w + # i.e., scale_factor_w = sqrt[(sfx * c)^2 + (sfy * s)^2] + # + # For example, + # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_w == scale_factor_x; + # when |angle| = 90, c = 0, |s| = 1, scale_factor_w == scale_factor_y + self.tensor[:, 2] *= torch.sqrt((scale_x * c) ** 2 + (scale_y * s) ** 2) + + # h(new) = |F(new) - O| * 2 + # = sqrt[(sfx * s * h / 2)^2 + (sfy * c * h / 2)^2] * 2 + # = sqrt[(sfx * s)^2 + (sfy * c)^2] * h + # i.e., scale_factor_h = sqrt[(sfx * s)^2 + (sfy * c)^2] + # + # For example, + # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_h == scale_factor_y; + # when |angle| = 90, c = 0, |s| = 1, scale_factor_h == scale_factor_x + self.tensor[:, 3] *= torch.sqrt((scale_x * s) ** 2 + (scale_y * c) ** 2) + + # The angle is the rotation angle from y-axis in image space to the height + # vector (top->down in the box's local coordinate system) of the box in CCW. + # + # angle(new) = angle_yOx(O - F(new)) + # = angle_yOx( (sfx * s * h / 2, sfy * c * h / 2) ) + # = atan2(sfx * s * h / 2, sfy * c * h / 2) + # = atan2(sfx * s, sfy * c) + # + # For example, + # when sfx == sfy, angle(new) == atan2(s, c) == angle(old) + self.tensor[:, 4] = torch.atan2(scale_x * s, scale_y * c) * 180 / math.pi + + @property + def device(self) -> str: + return self.tensor.device + + def __iter__(self) -> Iterator[torch.Tensor]: + """ + Yield a box as a Tensor of shape (5,) at a time. + """ + yield from self.tensor + + +def pairwise_iou(boxes1: RotatedBoxes, boxes2: RotatedBoxes) -> None: + """ + Given two lists of rotated boxes of size N and M, + compute the IoU (intersection over union) + between __all__ N x M pairs of boxes. + The box order must be (x_center, y_center, width, height, angle). + + Args: + boxes1, boxes2 (RotatedBoxes): + two `RotatedBoxes`. Contains N & M rotated boxes, respectively. + + Returns: + Tensor: IoU, sized [N,M]. + """ + + return pairwise_iou_rotated(boxes1.tensor, boxes2.tensor) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/README.md b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9765b24a730b77556104187ac3ef5439ab0859fd --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/README.md @@ -0,0 +1,5 @@ +# Utility functions + +This folder contain utility functions that are not used in the +core library, but are useful for building models or training +code using the config system. diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..12a395bf51b720fb3ea42528addd2a312b27e44d --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/analysis.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..e6120cff919006fcaccd260c3a6d68de608fcb46 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/analysis.py @@ -0,0 +1,177 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -*- coding: utf-8 -*- + +import logging +import typing +import torch +from fvcore.nn import activation_count, flop_count, parameter_count, parameter_count_table +from torch import nn + +from detectron2.structures import BitMasks, Boxes, ImageList, Instances + +from .logger import log_first_n + +__all__ = [ + "activation_count_operators", + "flop_count_operators", + "parameter_count_table", + "parameter_count", +] + +FLOPS_MODE = "flops" +ACTIVATIONS_MODE = "activations" + + +# some extra ops to ignore from counting. +_IGNORED_OPS = [ + "aten::add", + "aten::add_", + "aten::batch_norm", + "aten::constant_pad_nd", + "aten::div", + "aten::div_", + "aten::exp", + "aten::log2", + "aten::max_pool2d", + "aten::meshgrid", + "aten::mul", + "aten::mul_", + "aten::nonzero_numpy", + "aten::relu", + "aten::relu_", + "aten::rsub", + "aten::sigmoid", + "aten::sigmoid_", + "aten::softmax", + "aten::sort", + "aten::sqrt", + "aten::sub", + "aten::upsample_nearest2d", + "prim::PythonOp", + "torchvision::nms", +] + + +def flop_count_operators( + model: nn.Module, inputs: list, **kwargs +) -> typing.DefaultDict[str, float]: + """ + Implement operator-level flops counting using jit. + This is a wrapper of fvcore.nn.flop_count, that supports standard detection models + in detectron2. + + Note: + The function runs the input through the model to compute flops. + The flops of a detection model is often input-dependent, for example, + the flops of box & mask head depends on the number of proposals & + the number of detected objects. + Therefore, the flops counting using a single input may not accurately + reflect the computation cost of a model. + + Args: + model: a detectron2 model that takes `list[dict]` as input. + inputs (list[dict]): inputs to model, in detectron2's standard format. + """ + return _wrapper_count_operators(model=model, inputs=inputs, mode=FLOPS_MODE, **kwargs) + + +def activation_count_operators( + model: nn.Module, inputs: list, **kwargs +) -> typing.DefaultDict[str, float]: + """ + Implement operator-level activations counting using jit. + This is a wrapper of fvcore.nn.activation_count, that supports standard detection models + in detectron2. + + Note: + The function runs the input through the model to compute activations. + The activations of a detection model is often input-dependent, for example, + the activations of box & mask head depends on the number of proposals & + the number of detected objects. + + Args: + model: a detectron2 model that takes `list[dict]` as input. + inputs (list[dict]): inputs to model, in detectron2's standard format. + """ + return _wrapper_count_operators(model=model, inputs=inputs, mode=ACTIVATIONS_MODE, **kwargs) + + +def _flatten_to_tuple(outputs): + result = [] + if isinstance(outputs, torch.Tensor): + result.append(outputs) + elif isinstance(outputs, (list, tuple)): + for v in outputs: + result.extend(_flatten_to_tuple(v)) + elif isinstance(outputs, dict): + for _, v in outputs.items(): + result.extend(_flatten_to_tuple(v)) + elif isinstance(outputs, Instances): + result.extend(_flatten_to_tuple(outputs.get_fields())) + elif isinstance(outputs, (Boxes, BitMasks, ImageList)): + result.append(outputs.tensor) + else: + log_first_n( + logging.WARN, + f"Output of type {type(outputs)} not included in flops/activations count.", + n=10, + ) + return tuple(result) + + +def _wrapper_count_operators( + model: nn.Module, inputs: list, mode: str, **kwargs +) -> typing.DefaultDict[str, float]: + + # ignore some ops + supported_ops = {k: lambda *args, **kwargs: {} for k in _IGNORED_OPS} + supported_ops.update(kwargs.pop("supported_ops", {})) + kwargs["supported_ops"] = supported_ops + + assert len(inputs) == 1, "Please use batch size=1" + tensor_input = inputs[0]["image"] + + class WrapModel(nn.Module): + def __init__(self, model): + super().__init__() + if isinstance( + model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel) + ): + self.model = model.module + else: + self.model = model + + def forward(self, image): + # jit requires the input/output to be Tensors + inputs = [{"image": image}] + outputs = self.model.forward(inputs) + # Only the subgraph that computes the returned tuple of tensor will be + # counted. So we flatten everything we found to tuple of tensors. + return _flatten_to_tuple(outputs) + + old_train = model.training + with torch.no_grad(): + if mode == FLOPS_MODE: + ret = flop_count(WrapModel(model).train(False), (tensor_input,), **kwargs) + elif mode == ACTIVATIONS_MODE: + ret = activation_count(WrapModel(model).train(False), (tensor_input,), **kwargs) + else: + raise NotImplementedError("Count for mode {} is not supported yet.".format(mode)) + # compatible with change in fvcore + if isinstance(ret, tuple): + ret = ret[0] + model.train(old_train) + return ret diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/collect_env.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/collect_env.py new file mode 100644 index 0000000000000000000000000000000000000000..c8ab1442c667aa5bac81e864e5aca9ebb9aa4c18 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/collect_env.py @@ -0,0 +1,196 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import importlib +import numpy as np +import os +import re +import subprocess +import sys +from collections import defaultdict +import PIL +import torch +import torchvision +from tabulate import tabulate + +__all__ = ["collect_env_info"] + + +def collect_torch_env(): + try: + import torch.__config__ + + return torch.__config__.show() + except ImportError: + # compatible with older versions of pytorch + from torch.utils.collect_env import get_pretty_env_info + + return get_pretty_env_info() + + +def get_env_module(): + var_name = "DETECTRON2_ENV_MODULE" + return var_name, os.environ.get(var_name, "") + + +def detect_compute_compatibility(CUDA_HOME, so_file): + try: + cuobjdump = os.path.join(CUDA_HOME, "bin", "cuobjdump") + if os.path.isfile(cuobjdump): + output = subprocess.check_output( + "'{}' --list-elf '{}'".format(cuobjdump, so_file), shell=True + ) + output = output.decode("utf-8").strip().split("\n") + sm = [] + for line in output: + line = re.findall(r"\.sm_[0-9]*\.", line)[0] + sm.append(line.strip(".")) + sm = sorted(set(sm)) + return ", ".join(sm) + else: + return so_file + "; cannot find cuobjdump" + except Exception: + # unhandled failure + return so_file + + +def collect_env_info(): + has_gpu = torch.cuda.is_available() # true for both CUDA & ROCM + torch_version = torch.__version__ + + # NOTE: the use of CUDA_HOME and ROCM_HOME requires the CUDA/ROCM build deps, though in + # theory detectron2 should be made runnable with only the corresponding runtimes + from torch.utils.cpp_extension import CUDA_HOME + + has_rocm = False + if tuple(map(int, torch_version.split(".")[:2])) >= (1, 5): + from torch.utils.cpp_extension import ROCM_HOME + + if (getattr(torch.version, "hip", None) is not None) and (ROCM_HOME is not None): + has_rocm = True + has_cuda = has_gpu and (not has_rocm) + + data = [] + data.append(("sys.platform", sys.platform)) + data.append(("Python", sys.version.replace("\n", ""))) + data.append(("numpy", np.__version__)) + + try: + import detectron2 # noqa + + data.append( + ("detectron2", detectron2.__version__ + " @" + os.path.dirname(detectron2.__file__)) + ) + except ImportError: + data.append(("detectron2", "failed to import")) + + try: + from detectron2 import _C + except ImportError: + data.append(("detectron2._C", "failed to import")) + + # print system compilers when extension fails to build + if sys.platform != "win32": # don't know what to do for windows + try: + # this is how torch/utils/cpp_extensions.py choose compiler + cxx = os.environ.get("CXX", "c++") + cxx = subprocess.check_output("'{}' --version".format(cxx), shell=True) + cxx = cxx.decode("utf-8").strip().split("\n")[0] + except subprocess.SubprocessError: + cxx = "Not found" + data.append(("Compiler", cxx)) + + if has_cuda and CUDA_HOME is not None: + try: + nvcc = os.path.join(CUDA_HOME, "bin", "nvcc") + nvcc = subprocess.check_output("'{}' -V".format(nvcc), shell=True) + nvcc = nvcc.decode("utf-8").strip().split("\n")[-1] + except subprocess.SubprocessError: + nvcc = "Not found" + data.append(("CUDA compiler", nvcc)) + else: + # print compilers that are used to build extension + data.append(("Compiler", _C.get_compiler_version())) + data.append(("CUDA compiler", _C.get_cuda_version())) # cuda or hip + if has_cuda: + data.append( + ("detectron2 arch flags", detect_compute_compatibility(CUDA_HOME, _C.__file__)) + ) + + data.append(get_env_module()) + data.append(("PyTorch", torch_version + " @" + os.path.dirname(torch.__file__))) + data.append(("PyTorch debug build", torch.version.debug)) + + data.append(("GPU available", has_gpu)) + if has_gpu: + devices = defaultdict(list) + for k in range(torch.cuda.device_count()): + devices[torch.cuda.get_device_name(k)].append(str(k)) + for name, devids in devices.items(): + data.append(("GPU " + ",".join(devids), name)) + + if has_rocm: + data.append(("ROCM_HOME", str(ROCM_HOME))) + else: + data.append(("CUDA_HOME", str(CUDA_HOME))) + + cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None) + if cuda_arch_list: + data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list)) + data.append(("Pillow", PIL.__version__)) + + try: + data.append( + ( + "torchvision", + str(torchvision.__version__) + " @" + os.path.dirname(torchvision.__file__), + ) + ) + if has_cuda: + try: + torchvision_C = importlib.util.find_spec("torchvision._C").origin + msg = detect_compute_compatibility(CUDA_HOME, torchvision_C) + data.append(("torchvision arch flags", msg)) + except ImportError: + data.append(("torchvision._C", "failed to find")) + except AttributeError: + data.append(("torchvision", "unknown")) + + try: + import fvcore + + data.append(("fvcore", fvcore.__version__)) + except ImportError: + pass + + try: + import cv2 + + data.append(("cv2", cv2.__version__)) + except ImportError: + pass + env_str = tabulate(data) + "\n" + env_str += collect_torch_env() + return env_str + + +if __name__ == "__main__": + try: + import detectron2 # noqa + except ImportError: + print(collect_env_info()) + else: + from detectron2.utils.collect_env import collect_env_info + + print(collect_env_info()) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/colormap.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/colormap.py new file mode 100644 index 0000000000000000000000000000000000000000..27ebe872c3751bc615523935ae879b82e5400bb5 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/colormap.py @@ -0,0 +1,153 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +An awesome colormap for really neat visualizations. +Copied from Detectron, and removed gray colors. +""" + +import numpy as np + +__all__ = ["colormap", "random_color"] + +# fmt: off +# RGB: +_COLORS = np.array( + [ + 0.000, 0.447, 0.741, + 0.850, 0.325, 0.098, + 0.929, 0.694, 0.125, + 0.494, 0.184, 0.556, + 0.466, 0.674, 0.188, + 0.301, 0.745, 0.933, + 0.635, 0.078, 0.184, + 0.300, 0.300, 0.300, + 0.600, 0.600, 0.600, + 1.000, 0.000, 0.000, + 1.000, 0.500, 0.000, + 0.749, 0.749, 0.000, + 0.000, 1.000, 0.000, + 0.000, 0.000, 1.000, + 0.667, 0.000, 1.000, + 0.333, 0.333, 0.000, + 0.333, 0.667, 0.000, + 0.333, 1.000, 0.000, + 0.667, 0.333, 0.000, + 0.667, 0.667, 0.000, + 0.667, 1.000, 0.000, + 1.000, 0.333, 0.000, + 1.000, 0.667, 0.000, + 1.000, 1.000, 0.000, + 0.000, 0.333, 0.500, + 0.000, 0.667, 0.500, + 0.000, 1.000, 0.500, + 0.333, 0.000, 0.500, + 0.333, 0.333, 0.500, + 0.333, 0.667, 0.500, + 0.333, 1.000, 0.500, + 0.667, 0.000, 0.500, + 0.667, 0.333, 0.500, + 0.667, 0.667, 0.500, + 0.667, 1.000, 0.500, + 1.000, 0.000, 0.500, + 1.000, 0.333, 0.500, + 1.000, 0.667, 0.500, + 1.000, 1.000, 0.500, + 0.000, 0.333, 1.000, + 0.000, 0.667, 1.000, + 0.000, 1.000, 1.000, + 0.333, 0.000, 1.000, + 0.333, 0.333, 1.000, + 0.333, 0.667, 1.000, + 0.333, 1.000, 1.000, + 0.667, 0.000, 1.000, + 0.667, 0.333, 1.000, + 0.667, 0.667, 1.000, + 0.667, 1.000, 1.000, + 1.000, 0.000, 1.000, + 1.000, 0.333, 1.000, + 1.000, 0.667, 1.000, + 0.333, 0.000, 0.000, + 0.500, 0.000, 0.000, + 0.667, 0.000, 0.000, + 0.833, 0.000, 0.000, + 1.000, 0.000, 0.000, + 0.000, 0.167, 0.000, + 0.000, 0.333, 0.000, + 0.000, 0.500, 0.000, + 0.000, 0.667, 0.000, + 0.000, 0.833, 0.000, + 0.000, 1.000, 0.000, + 0.000, 0.000, 0.167, + 0.000, 0.000, 0.333, + 0.000, 0.000, 0.500, + 0.000, 0.000, 0.667, + 0.000, 0.000, 0.833, + 0.000, 0.000, 1.000, + 0.000, 0.000, 0.000, + 0.143, 0.143, 0.143, + 0.857, 0.857, 0.857, + 1.000, 1.000, 1.000 + ] +).astype(np.float32).reshape(-1, 3) +# fmt: on + + +def colormap(rgb=False, maximum=255): + """ + Args: + rgb (bool): whether to return RGB colors or BGR colors. + maximum (int): either 255 or 1 + + Returns: + ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1] + """ + assert maximum in [255, 1], maximum + c = _COLORS * maximum + if not rgb: + c = c[:, ::-1] + return c + + +def random_color(rgb=False, maximum=255): + """ + Args: + rgb (bool): whether to return RGB colors or BGR colors. + maximum (int): either 255 or 1 + + Returns: + ndarray: a vector of 3 numbers + """ + idx = np.random.randint(0, len(_COLORS)) + ret = _COLORS[idx] * maximum + if not rgb: + ret = ret[::-1] + return ret + + +if __name__ == "__main__": + import cv2 + + size = 100 + H, W = 10, 10 + canvas = np.random.rand(H * size, W * size, 3).astype("float32") + for h in range(H): + for w in range(W): + idx = h * W + w + if idx >= len(_COLORS): + break + canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx] + cv2.imshow("a", canvas) + cv2.waitKey(0) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/comm.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/comm.py new file mode 100644 index 0000000000000000000000000000000000000000..d7c16e03b5bf44ea76ed79a381832f34aa710b94 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/comm.py @@ -0,0 +1,409 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This file contains primitives for multi-gpu communication. +This is useful when doing distributed training. +""" + +import functools +import logging +import numpy as np +import pickle +import torch +import os +import torch.distributed as dist +from detectron2.structures import Boxes +from detectron2.structures import Instances + +_LOCAL_PROCESS_GROUP = None +""" +A torch process group which only includes processes that on the same machine as the current process. +This variable is set when processes are spawned by `launch()` in "engine/launch.py". +""" + +def set_device(obj, device='cpu'): + if isinstance(obj, (tuple, list)): + dump = [] + for item in obj: + dump.append(set_device(item, device)) + return dump + elif isinstance(obj, dict): + dump = {} + for key, value in obj.items(): + dump[key] = set_device(value, device) + return dump + elif isinstance(obj, Boxes): + dump = Boxes(obj.tensor.to(device)) + return dump + elif isinstance(obj, Instances): + dump = Instances(obj._image_size) + for key, value in obj._fields.items(): + dump.set(key, set_device(value, device)) + return dump + elif isinstance(obj, torch.Tensor): + return obj.to(device) + else: + return obj + + +def dump_tensor(output, name): + dump = set_device(output, 'cpu') + torch.save(dump,name) + print('%s dump success!' %(name)) + + +def load_tensor(name, device): + output = torch.load(name) + dump = set_device(output, device) + print('%s load success!' % (name)) + return dump + + +def pres_check(gpu_path, npu_path): + gpu_list = [] + npu_list = [] + file_list = os.listdir(npu_path) + for item in file_list: + if item.endswith('npu.dat'): + npu_list.append(os.path.join(npu_path, item)) + gpu_list.append(os.path.join(gpu_path, item[:-7] + 'gpu.dat')) + print('all compare file:', gpu_list) + + for npu_item,gpu_item in zip(npu_list, gpu_list): + print('start check %s and %s'%(npu_item, gpu_item)) + gpu_data = torch.load(gpu_item) + npu_data = torch.load(npu_item) + pres_check_item(gpu_data, npu_data, gpu_item) + + +def pres_check_item(gpu_data, npu_data, gpu_item): + assert(type(gpu_data) == type(npu_data)) + if isinstance(gpu_data, (tuple, list)): + for g_item, n_item in zip(gpu_data, npu_data): + pres_check_item(g_item, n_item, gpu_item) + elif isinstance(gpu_data, dict): + for key, val in gpu_data.items(): + pres_check_item(val, npu_data[key], gpu_item + '_' + key) + elif isinstance(gpu_data, Boxes): + pres_check_item(gpu_data.tensor, npu_data.tensor, gpu_item) + elif isinstance(gpu_data, Instances): + for key, val in gpu_data._fields.items(): + pres_check_item(val, npu_data._fields[key], gpu_item + '_' + key) + elif isinstance(gpu_data, torch.Tensor): + g_np = gpu_data.detach().numpy() + n_np = npu_data.detach().numpy() + compare_res(g_np, n_np, os.path.basename(gpu_item)) + +def compare_res(x, y, testcase_name, prec=None, prec16=None): + # pytorch guixiaobing + threshold = 1.e-4 + threshold2 = 1.e-3 + if prec is None: + prec = threshold + if prec16 is None: + prec16 = threshold2 + size = x.size + if torch.is_tensor(x) and torch.is_tensor(y): + x = x.numpy() + y = y.numpy() + if (x.shape != y.shape): + print("%s shpae error"%(testcase_name)) + return + if (x.dtype != y.dtype): + if(x.dtype == np.int8) or (x.dtype == np.int64): + x = np.int32(x) + else: + print("%s dtype error, %s, %s"%(testcase_name, x.dtype, y.dtype)) + return + dtype_list = [np.bool, np.int32, np.float16, np.float32] + if x.dtype not in dtype_list: + print("%s required dtype in [np.bool, np.int32, np.float16, np.float32]"%(testcase_name)) + return + if x.dtype == np.bool: + result = np.equal(x, y) + if result.all() == False: + print("%s error" % testcase_name) + return + elif (x.dtype == np.int32): + result = np.equal(x, y) + err_cnt = size-result.sum() + if result.all() == False: + print("%s error, err_cnt: %d, all_cnt: %d, err_ratio: %f" %(testcase_name,err_cnt, size, float(err_cnt)/size)) + return + elif (x.dtype == np.float16): + result = np.abs(y - x) + result = np.less_equal(result, prec16 * np.abs(x)) + err_cnt = np.sum(result == False) + if result.all() == False: + if err_cnt > size * prec16: + print("%s error, err_cnt: %d, all_cnt: %d, err_ratio: %f" %(testcase_name,err_cnt, size, float(err_cnt)/size)) + return + elif (x.dtype == np.float32): + result = np.abs(y - x) + result = np.less_equal(result, prec * np.abs(x)) + err_cnt = np.sum(result == False) + if result.all() == False: + if err_cnt > size * prec: + print("%s error, err_cnt: %d, all_cnt: %d, err_ratio: %f" %(testcase_name,err_cnt, size, float(err_cnt)/size)) + return + else: + print("%s required numpy object"%(testcase_name)) + return + print("%s success, err_cnt: %d, all_cnt: %d, err_ratio: %f" %(testcase_name,err_cnt, size, float(err_cnt)/size)) + + +def get_world_size() -> int: + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank() -> int: + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank() + + +def get_local_rank() -> int: + """ + Returns: + The rank of the current process within the local (per-machine) process group. + """ + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + assert _LOCAL_PROCESS_GROUP is not None + return dist.get_rank(group=_LOCAL_PROCESS_GROUP) + + +def get_local_size() -> int: + """ + Returns: + The size of the per-machine process group, + i.e. the number of processes per machine. + """ + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) + + +def is_main_process() -> bool: + return get_rank() == 0 + + +def synchronize(): + """ + Helper function to synchronize (barrier) among all processes when + using distributed training + """ + if not dist.is_available(): + return + if not dist.is_initialized(): + return + world_size = dist.get_world_size() + if world_size == 1: + return + dist.barrier() + + +@functools.lru_cache() +def _get_global_gloo_group(): + """ + Return a process group based on gloo backend, containing all the ranks + The result is cached. + """ + if dist.get_backend() == "hccl": + return dist.new_group(backend="gloo") + else: + return dist.group.WORLD + + +def _serialize_to_tensor(data, group): + backend = dist.get_backend(group) + assert backend in ["gloo", "hccl"] + device = torch.device("cpu" if backend == "gloo" else "npu") + + buffer = pickle.dumps(data) + if len(buffer) > 1024 ** 3: + logger = logging.getLogger(__name__) + logger.warning( + "Rank {} trying to all-gather {:.2f} GB of data on device {}".format( + get_rank(), len(buffer) / (1024 ** 3), device + ) + ) + storage = torch.ByteStorage.from_buffer(buffer) + tensor = torch.ByteTensor(storage).to(device=device) + return tensor + + +def _pad_to_largest_tensor(tensor, group): + """ + Returns: + list[int]: size of the tensor, on each rank + Tensor: padded tensor that has the max size + """ + world_size = dist.get_world_size(group=group) + assert ( + world_size >= 1 + ), "comm.gather/all_gather must be called from ranks within the given group!" + local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device) + size_list = [ + torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size) + ] + dist.all_gather(size_list, local_size, group=group) + size_list = [int(size.item()) for size in size_list] + + max_size = max(size_list) + + # we pad the tensor because torch all_gather does not support + # gathering tensors of different shapes + if local_size != max_size: + padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device) + tensor = torch.cat((tensor, padding), dim=0) + return size_list, tensor + + +def all_gather(data, group=None): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + + Returns: + list[data]: list of data gathered from each rank + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group) == 1: + return [data] + + tensor = _serialize_to_tensor(data, group) + + size_list, tensor = _pad_to_largest_tensor(tensor, group) + max_size = max(size_list) + + # receiving Tensor from all ranks + tensor_list = [ + torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list + ] + dist.all_gather(tensor_list, tensor, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + + return data_list + + +def gather(data, dst=0, group=None): + """ + Run gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + dst (int): destination rank + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + + Returns: + list[data]: on dst, a list of data gathered from each rank. Otherwise, + an empty list. + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group=group) == 1: + return [data] + rank = dist.get_rank(group=group) + + tensor = _serialize_to_tensor(data, group) + size_list, tensor = _pad_to_largest_tensor(tensor, group) + + # receiving Tensor from all ranks + if rank == dst: + max_size = max(size_list) + tensor_list = [ + torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list + ] + dist.gather(tensor, tensor_list, dst=dst, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + return data_list + else: + dist.gather(tensor, [], dst=dst, group=group) + return [] + + +def shared_random_seed(): + """ + Returns: + int: a random number that is the same across all workers. + If workers need a shared RNG, they can use this shared seed to + create one. + + All workers must call this function, otherwise it will deadlock. + """ + ints = np.random.randint(2 ** 31) + all_ints = all_gather(ints) + return all_ints[0] + + +def reduce_dict(input_dict, average=True): + """ + Reduce the values in the dictionary from all processes so that process with rank + 0 has the reduced results. + + Args: + input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor. + average (bool): whether to do average or sum + + Returns: + a dict with the same keys as input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.reduce(values, dst=0) + if dist.get_rank() == 0 and average: + # only main process gets accumulated, so only divide by + # world_size in this case + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/env.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/env.py new file mode 100644 index 0000000000000000000000000000000000000000..ffb0ab830c6da55471008976237831a846fe6e3e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/env.py @@ -0,0 +1,135 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import importlib +import importlib.util +import logging +import numpy as np +import os +import random +import sys +from datetime import datetime +import torch + +__all__ = ["seed_all_rng"] + + +TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2]) +""" +PyTorch version as a tuple of 2 ints. Useful for comparison. +""" + + +def seed_all_rng(seed=None): + """ + Set the random seed for the RNG in torch, numpy and python. + + Args: + seed (int): if None, will use a strong random seed. + """ + if seed is None: + seed = ( + os.getpid() + + int(datetime.now().strftime("%S%f")) + + int.from_bytes(os.urandom(2), "big") + ) + logger = logging.getLogger(__name__) + logger.info("Using a generated random seed {}".format(seed)) + np.random.seed(seed) + torch.set_rng_state(torch.manual_seed(seed).get_state()) + random.seed(seed) + + +# from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path +def _import_file(module_name, file_path, make_importable=False): + spec = importlib.util.spec_from_file_location(module_name, file_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + if make_importable: + sys.modules[module_name] = module + return module + + +def _configure_libraries(): + """ + Configurations for some libraries. + """ + # An environment option to disable `import cv2` globally, + # in case it leads to negative performance impact + disable_cv2 = int(os.environ.get("DETECTRON2_DISABLE_CV2", False)) + if disable_cv2: + sys.modules["cv2"] = None + else: + # Disable opencl in opencv since its interaction with cuda often has negative effects + # This envvar is supported after OpenCV 3.4.0 + os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled" + try: + import cv2 + + if int(cv2.__version__.split(".")[0]) >= 3: + cv2.ocl.setUseOpenCL(False) + except ImportError: + pass + + def get_version(module, digit=2): + return tuple(map(int, module.__version__.split(".")[:digit])) + + # fmt: off + assert get_version(torch) >= (1, 4), "Requires torch>=1.4" + import fvcore + assert get_version(fvcore, 3) >= (0, 1, 1), "Requires fvcore>=0.1.1" + import yaml + assert get_version(yaml) >= (5, 1), "Requires pyyaml>=5.1" + # fmt: on + + +_ENV_SETUP_DONE = False + + +def setup_environment(): + """Perform environment setup work. The default setup is a no-op, but this + function allows the user to specify a Python source file or a module in + the $DETECTRON2_ENV_MODULE environment variable, that performs + custom setup work that may be necessary to their computing environment. + """ + global _ENV_SETUP_DONE + if _ENV_SETUP_DONE: + return + _ENV_SETUP_DONE = True + + _configure_libraries() + + custom_module_path = os.environ.get("DETECTRON2_ENV_MODULE") + + if custom_module_path: + setup_custom_environment(custom_module_path) + else: + # The default setup is a no-op + pass + + +def setup_custom_environment(custom_module): + """ + Load custom environment setup by importing a Python source file or a + module, and run the setup function. + """ + if custom_module.endswith(".py"): + module = _import_file("detectron2.utils.env.custom_module", custom_module) + else: + module = importlib.import_module(custom_module) + assert hasattr(module, "setup_environment") and callable(module.setup_environment), ( + "Custom environment module defined in {} does not have the " + "required callable attribute 'setup_environment'." + ).format(custom_module) + module.setup_environment() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/events.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/events.py new file mode 100644 index 0000000000000000000000000000000000000000..29c16f6893fc7f6d948d40465aeaa7812b22637b --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/events.py @@ -0,0 +1,446 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import datetime +import json +import logging +import os +import time +from collections import defaultdict +from contextlib import contextmanager +import torch +from fvcore.common.file_io import PathManager +from fvcore.common.history_buffer import HistoryBuffer +from torch.utils.tensorboard import SummaryWriter + +_CURRENT_STORAGE_STACK = [] + + +def get_event_storage(): + """ + Returns: + The :class:`EventStorage` object that's currently being used. + Throws an error if no :class:`EventStorage` is currently enabled. + """ + assert len( + _CURRENT_STORAGE_STACK + ), "get_event_storage() has to be called inside a 'with EventStorage(...)' context!" + return _CURRENT_STORAGE_STACK[-1] + + +class EventWriter: + """ + Base class for writers that obtain events from :class:`EventStorage` and process them. + """ + + def write(self): + raise NotImplementedError + + def close(self): + pass + + +class JSONWriter(EventWriter): + """ + Write scalars to a json file. + + It saves scalars as one json per line (instead of a big json) for easy parsing. + + Examples parsing such a json file: + :: + $ cat metrics.json | jq -s '.[0:2]' + [ + { + "data_time": 0.008433341979980469, + "iteration": 20, + "loss": 1.9228371381759644, + "loss_box_reg": 0.050025828182697296, + "loss_classifier": 0.5316952466964722, + "loss_mask": 0.7236229181289673, + "loss_rpn_box": 0.0856662318110466, + "loss_rpn_cls": 0.48198649287223816, + "lr": 0.007173333333333333, + "time": 0.25401854515075684 + }, + { + "data_time": 0.007216215133666992, + "iteration": 40, + "loss": 1.282649278640747, + "loss_box_reg": 0.06222952902317047, + "loss_classifier": 0.30682939291000366, + "loss_mask": 0.6970193982124329, + "loss_rpn_box": 0.038663312792778015, + "loss_rpn_cls": 0.1471673548221588, + "lr": 0.007706666666666667, + "time": 0.2490077018737793 + } + ] + + $ cat metrics.json | jq '.loss_mask' + 0.7126231789588928 + 0.689423680305481 + 0.6776131987571716 + ... + + """ + + def __init__(self, json_file, window_size=20): + """ + Args: + json_file (str): path to the json file. New data will be appended if the file exists. + window_size (int): the window size of median smoothing for the scalars whose + `smoothing_hint` are True. + """ + self._file_handle = PathManager.open(json_file, "a") + self._window_size = window_size + + def write(self): + storage = get_event_storage() + to_save = {"iteration": storage.iter} + to_save.update(storage.latest_with_smoothing_hint(self._window_size)) + self._file_handle.write(json.dumps(to_save, sort_keys=True) + "\n") + self._file_handle.flush() + try: + os.fsync(self._file_handle.fileno()) + except AttributeError: + pass + + def close(self): + self._file_handle.close() + + +class TensorboardXWriter(EventWriter): + """ + Write all scalars to a tensorboard file. + """ + + def __init__(self, log_dir: str, window_size: int = 20, **kwargs): + """ + Args: + log_dir (str): the directory to save the output events + window_size (int): the scalars will be median-smoothed by this window size + + kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)` + """ + self._window_size = window_size + + self._writer = SummaryWriter(log_dir, **kwargs) + + def write(self): + storage = get_event_storage() + for k, v in storage.latest_with_smoothing_hint(self._window_size).items(): + self._writer.add_scalar(k, v, storage.iter) + + # storage.put_{image,histogram} is only meant to be used by + # tensorboard writer. So we access its internal fields directly from here. + if len(storage._vis_data) >= 1: + for img_name, img, step_num in storage._vis_data: + self._writer.add_image(img_name, img, step_num) + # Storage stores all image data and rely on this writer to clear them. + # As a result it assumes only one writer will use its image data. + # An alternative design is to let storage store limited recent + # data (e.g. only the most recent image) that all writers can access. + # In that case a writer may not see all image data if its period is long. + storage.clear_images() + + if len(storage._histograms) >= 1: + for params in storage._histograms: + self._writer.add_histogram_raw(**params) + storage.clear_histograms() + + def close(self): + if hasattr(self, "_writer"): # doesn't exist when the code fails at import + self._writer.close() + + +class CommonMetricPrinter(EventWriter): + """ + Print **common** metrics to the terminal, including + iteration time, ETA, memory, all losses, and the learning rate. + It also applies smoothing using a window of 20 elements. + + It's meant to print common metrics in common ways. + To print something in more customized ways, please implement a similar printer by yourself. + """ + + def __init__(self, max_iter): + """ + Args: + max_iter (int): the maximum number of iterations to train. + Used to compute ETA. + """ + self.logger = logging.getLogger(__name__) + self._max_iter = max_iter + self._last_write = None + + def write(self): + storage = get_event_storage() + iteration = storage.iter + + try: + data_time = storage.history("data_time").avg(20) + except KeyError: + # they may not exist in the first few iterations (due to warmup) + # or when SimpleTrainer is not used + data_time = None + + eta_string = None + try: + iter_time = storage.history("time").global_avg() + eta_seconds = storage.history("time").median(1000) * (self._max_iter - iteration) + storage.put_scalar("eta_seconds", eta_seconds, smoothing_hint=False) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + except KeyError: + iter_time = None + # estimate eta on our own - more noisy + if self._last_write is not None: + estimate_iter_time = (time.perf_counter() - self._last_write[1]) / ( + iteration - self._last_write[0] + ) + eta_seconds = estimate_iter_time * (self._max_iter - iteration) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + self._last_write = (iteration, time.perf_counter()) + + try: + lr = "{:.6f}".format(storage.history("lr").latest()) + except KeyError: + lr = "N/A" + + if torch.cuda.is_available(): + max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 + else: + max_mem_mb = None + + # NOTE: max_mem is parsed by grep in "dev/parse_results.sh" + self.logger.info( + " {eta}iter: {iter} {losses} {time}{data_time}{fps}lr: {lr} {memory}".format( + eta=f"eta: {eta_string} " if eta_string else "", + iter=iteration, + losses=" ".join( + [ + "{}: {:.3f}".format(k, v.median(20)) + for k, v in storage.histories().items() + if "loss" in k + ] + ), + time="time: {:.4f} ".format(iter_time) if iter_time is not None else "", + data_time="data_time: {:.4f} ".format(data_time) if data_time is not None else "", + fps="fps: {:.4f} ".format(2/iter_time) if iter_time is not None else "", + lr=lr, + memory="max_mem: {:.0f}M".format(max_mem_mb) if max_mem_mb is not None else "", + ) + ) + + +class EventStorage: + """ + The user-facing class that provides metric storage functionalities. + + In the future we may add support for storing / logging other types of data if needed. + """ + + def __init__(self, start_iter=0): + """ + Args: + start_iter (int): the iteration number to start with + """ + self._history = defaultdict(HistoryBuffer) + self._smoothing_hints = {} + self._latest_scalars = {} + self._iter = start_iter + self._current_prefix = "" + self._vis_data = [] + self._histograms = [] + + def put_image(self, img_name, img_tensor): + """ + Add an `img_tensor` associated with `img_name`, to be shown on + tensorboard. + + Args: + img_name (str): The name of the image to put into tensorboard. + img_tensor (torch.Tensor or numpy.array): An `uint8` or `float` + Tensor of shape `[channel, height, width]` where `channel` is + 3. The image format should be RGB. The elements in img_tensor + can either have values in [0, 1] (float32) or [0, 255] (uint8). + The `img_tensor` will be visualized in tensorboard. + """ + self._vis_data.append((img_name, img_tensor, self._iter)) + + def put_scalar(self, name, value, smoothing_hint=True): + """ + Add a scalar `value` to the `HistoryBuffer` associated with `name`. + + Args: + smoothing_hint (bool): a 'hint' on whether this scalar is noisy and should be + smoothed when logged. The hint will be accessible through + :meth:`EventStorage.smoothing_hints`. A writer may ignore the hint + and apply custom smoothing rule. + + It defaults to True because most scalars we save need to be smoothed to + provide any useful signal. + """ + name = self._current_prefix + name + history = self._history[name] + value = float(value) + history.update(value, self._iter) + self._latest_scalars[name] = value + + existing_hint = self._smoothing_hints.get(name) + if existing_hint is not None: + assert ( + existing_hint == smoothing_hint + ), "Scalar {} was put with a different smoothing_hint!".format(name) + else: + self._smoothing_hints[name] = smoothing_hint + + def put_scalars(self, *, smoothing_hint=True, **kwargs): + """ + Put multiple scalars from keyword arguments. + + Examples: + + storage.put_scalars(loss=my_loss, accuracy=my_accuracy, smoothing_hint=True) + """ + for k, v in kwargs.items(): + self.put_scalar(k, v, smoothing_hint=smoothing_hint) + + def put_histogram(self, hist_name, hist_tensor, bins=1000): + """ + Create a histogram from a tensor. + + Args: + hist_name (str): The name of the histogram to put into tensorboard. + hist_tensor (torch.Tensor): A Tensor of arbitrary shape to be converted + into a histogram. + bins (int): Number of histogram bins. + """ + ht_min, ht_max = hist_tensor.min().item(), hist_tensor.max().item() + + # Create a histogram with PyTorch + hist_counts = torch.histc(hist_tensor, bins=bins) + hist_edges = torch.linspace(start=ht_min, end=ht_max, steps=bins + 1, dtype=torch.float32) + + # Parameter for the add_histogram_raw function of SummaryWriter + hist_params = dict( + tag=hist_name, + min=ht_min, + max=ht_max, + num=len(hist_tensor), + sum=float(hist_tensor.sum()), + sum_squares=float(torch.sum(hist_tensor ** 2)), + bucket_limits=hist_edges[1:].tolist(), + bucket_counts=hist_counts.tolist(), + global_step=self._iter, + ) + self._histograms.append(hist_params) + + def history(self, name): + """ + Returns: + HistoryBuffer: the scalar history for name + """ + ret = self._history.get(name, None) + if ret is None: + raise KeyError("No history metric available for {}!".format(name)) + return ret + + def histories(self): + """ + Returns: + dict[name -> HistoryBuffer]: the HistoryBuffer for all scalars + """ + return self._history + + def latest(self): + """ + Returns: + dict[name -> number]: the scalars that's added in the current iteration. + """ + return self._latest_scalars + + def latest_with_smoothing_hint(self, window_size=20): + """ + Similar to :meth:`latest`, but the returned values + are either the un-smoothed original latest value, + or a median of the given window_size, + depend on whether the smoothing_hint is True. + + This provides a default behavior that other writers can use. + """ + result = {} + for k, v in self._latest_scalars.items(): + result[k] = self._history[k].median(window_size) if self._smoothing_hints[k] else v + return result + + def smoothing_hints(self): + """ + Returns: + dict[name -> bool]: the user-provided hint on whether the scalar + is noisy and needs smoothing. + """ + return self._smoothing_hints + + def step(self): + """ + User should call this function at the beginning of each iteration, to + notify the storage of the start of a new iteration. + The storage will then be able to associate the new data with the + correct iteration number. + """ + self._iter += 1 + self._latest_scalars = {} + + @property + def iter(self): + return self._iter + + @property + def iteration(self): + # for backward compatibility + return self._iter + + def __enter__(self): + _CURRENT_STORAGE_STACK.append(self) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + assert _CURRENT_STORAGE_STACK[-1] == self + _CURRENT_STORAGE_STACK.pop() + + @contextmanager + def name_scope(self, name): + """ + Yields: + A context within which all the events added to this storage + will be prefixed by the name scope. + """ + old_prefix = self._current_prefix + self._current_prefix = name.rstrip("/") + "/" + yield + self._current_prefix = old_prefix + + def clear_images(self): + """ + Delete all the stored images for visualization. This should be called + after images are written to tensorboard. + """ + self._vis_data = [] + + def clear_histograms(self): + """ + Delete all the stored histograms for visualization. + This should be called after histograms are written to tensorboard. + """ + self._histograms = [] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/file_io.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/file_io.py new file mode 100644 index 0000000000000000000000000000000000000000..a9d70ca875172514a19b2141142131244f87c430 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/file_io.py @@ -0,0 +1,35 @@ +from fvcore.common.file_io import HTTPURLHandler, OneDrivePathHandler, PathHandler, PathManagerBase + +__all__ = ["PathManager", "PathHandler"] + + +PathManager = PathManagerBase() +""" +This is a detectron2 project-specific PathManager. +We try to stay away from global PathManager in fvcore as it +introduces potential conflicts among other libraries. +""" + + +class Detectron2Handler(PathHandler): + """ + Resolve anything that's hosted under detectron2's namespace. + """ + + PREFIX = "detectron2://" + S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" + + def _get_supported_prefixes(self): + return [self.PREFIX] + + def _get_local_path(self, path): + name = path[len(self.PREFIX) :] + return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name) + + def _open(self, path, mode="r", **kwargs): + return PathManager.open(self._get_local_path(path), mode, **kwargs) + + +PathManager.register_handler(HTTPURLHandler()) +PathManager.register_handler(OneDrivePathHandler()) +PathManager.register_handler(Detectron2Handler()) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/logger.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..a6187a3b300aeada38a355f168f44f5330866c94 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/logger.py @@ -0,0 +1,234 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import functools +import logging +import os +import sys +import time +from collections import Counter +from fvcore.common.file_io import PathManager +from tabulate import tabulate +from termcolor import colored + + +class _ColorfulFormatter(logging.Formatter): + def __init__(self, *args, **kwargs): + self._root_name = kwargs.pop("root_name") + "." + self._abbrev_name = kwargs.pop("abbrev_name", "") + if len(self._abbrev_name): + self._abbrev_name = self._abbrev_name + "." + super(_ColorfulFormatter, self).__init__(*args, **kwargs) + + def formatMessage(self, record): + record.name = record.name.replace(self._root_name, self._abbrev_name) + log = super(_ColorfulFormatter, self).formatMessage(record) + if record.levelno == logging.WARNING: + prefix = colored("WARNING", "red", attrs=["blink"]) + elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL: + prefix = colored("ERROR", "red", attrs=["blink", "underline"]) + else: + return log + return prefix + " " + log + + +@functools.lru_cache() # so that calling setup_logger multiple times won't add many handlers +def setup_logger( + output=None, distributed_rank=0, *, color=True, name="detectron2", abbrev_name=None +): + """ + Initialize the detectron2 logger and set its verbosity level to "DEBUG". + + Args: + output (str): a file name or a directory to save log. If None, will not save log file. + If ends with ".txt" or ".log", assumed to be a file name. + Otherwise, logs will be saved to `output/log.txt`. + name (str): the root module name of this logger + abbrev_name (str): an abbreviation of the module, to avoid long names in logs. + Set to "" to not log the root module in logs. + By default, will abbreviate "detectron2" to "d2" and leave other + modules unchanged. + + Returns: + logging.Logger: a logger + """ + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) + logger.propagate = False + + if abbrev_name is None: + abbrev_name = "d2" if name == "detectron2" else name + + plain_formatter = logging.Formatter( + "[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S" + ) + # stdout logging: master only + if distributed_rank == 0: + ch = logging.StreamHandler(stream=sys.stdout) + ch.setLevel(logging.DEBUG) + if color: + formatter = _ColorfulFormatter( + colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s", + datefmt="%m/%d %H:%M:%S", + root_name=name, + abbrev_name=str(abbrev_name), + ) + else: + formatter = plain_formatter + ch.setFormatter(formatter) + logger.addHandler(ch) + + # file logging: all workers + if output is not None: + if output.endswith(".txt") or output.endswith(".log"): + filename = output + else: + filename = os.path.join(output, "log.txt") + if distributed_rank > 0: + filename = filename + ".rank{}".format(distributed_rank) + PathManager.mkdirs(os.path.dirname(filename)) + + fh = logging.StreamHandler(_cached_log_stream(filename)) + fh.setLevel(logging.DEBUG) + fh.setFormatter(plain_formatter) + logger.addHandler(fh) + + return logger + + +# cache the opened file object, so that different calls to `setup_logger` +# with the same file name can safely write to the same file. +@functools.lru_cache(maxsize=None) +def _cached_log_stream(filename): + return PathManager.open(filename, "a") + + +""" +Below are some other convenient logging methods. +They are mainly adopted from +https://github.com/abseil/abseil-py/blob/master/absl/logging/__init__.py +""" + + +def _find_caller(): + """ + Returns: + str: module name of the caller + tuple: a hashable key to be used to identify different callers + """ + frame = sys._getframe(2) + while frame: + code = frame.f_code + if os.path.join("utils", "logger.") not in code.co_filename: + mod_name = frame.f_globals["__name__"] + if mod_name == "__main__": + mod_name = "detectron2" + return mod_name, (code.co_filename, frame.f_lineno, code.co_name) + frame = frame.f_back + + +_LOG_COUNTER = Counter() +_LOG_TIMER = {} + + +def log_first_n(lvl, msg, n=1, *, name=None, key="caller"): + """ + Log only for the first n times. + + Args: + lvl (int): the logging level + msg (str): + n (int): + name (str): name of the logger to use. Will use the caller's module by default. + key (str or tuple[str]): the string(s) can be one of "caller" or + "message", which defines how to identify duplicated logs. + For example, if called with `n=1, key="caller"`, this function + will only log the first call from the same caller, regardless of + the message content. + If called with `n=1, key="message"`, this function will log the + same content only once, even if they are called from different places. + If called with `n=1, key=("caller", "message")`, this function + will not log only if the same caller has logged the same message before. + """ + if isinstance(key, str): + key = (key,) + assert len(key) > 0 + + caller_module, caller_key = _find_caller() + hash_key = () + if "caller" in key: + hash_key = hash_key + caller_key + if "message" in key: + hash_key = hash_key + (msg,) + + _LOG_COUNTER[hash_key] += 1 + if _LOG_COUNTER[hash_key] <= n: + logging.getLogger(name or caller_module).log(lvl, msg) + + +def log_every_n(lvl, msg, n=1, *, name=None): + """ + Log once per n times. + + Args: + lvl (int): the logging level + msg (str): + n (int): + name (str): name of the logger to use. Will use the caller's module by default. + """ + caller_module, key = _find_caller() + _LOG_COUNTER[key] += 1 + if n == 1 or _LOG_COUNTER[key] % n == 1: + logging.getLogger(name or caller_module).log(lvl, msg) + + +def log_every_n_seconds(lvl, msg, n=1, *, name=None): + """ + Log no more than once per n seconds. + + Args: + lvl (int): the logging level + msg (str): + n (int): + name (str): name of the logger to use. Will use the caller's module by default. + """ + caller_module, key = _find_caller() + last_logged = _LOG_TIMER.get(key, None) + current_time = time.time() + if last_logged is None or current_time - last_logged >= n: + logging.getLogger(name or caller_module).log(lvl, msg) + _LOG_TIMER[key] = current_time + + +def create_small_table(small_dict): + """ + Create a small table using the keys of small_dict as headers. This is only + suitable for small dictionaries. + + Args: + small_dict (dict): a result dictionary of only a few items. + + Returns: + str: the table as a string. + """ + keys, values = tuple(zip(*small_dict.items())) + table = tabulate( + [values], + headers=keys, + tablefmt="pipe", + floatfmt=".3f", + stralign="center", + numalign="center", + ) + return table diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/memory.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/memory.py new file mode 100644 index 0000000000000000000000000000000000000000..ca782a292ecae247a09d33415c36ac845ecbbab9 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/memory.py @@ -0,0 +1,97 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from contextlib import contextmanager +from functools import wraps +import torch + +__all__ = ["retry_if_cuda_oom"] + + +@contextmanager +def _ignore_torch_cuda_oom(): + """ + A context which ignores CUDA OOM exception from pytorch. + """ + try: + yield + except RuntimeError as e: + # NOTE: the string may change? + if "CUDA out of memory. " in str(e): + pass + else: + raise + + +def retry_if_cuda_oom(func): + """ + Makes a function retry itself after encountering + pytorch's CUDA OOM error. + It will first retry after calling `torch.cuda.empty_cache()`. + + If that still fails, it will then retry by trying to convert inputs to CPUs. + In this case, it expects the function to dispatch to CPU implementation. + The return values may become CPU tensors as well and it's user's + responsibility to convert it back to CUDA tensor if needed. + + Args: + func: a stateless callable that takes tensor-like objects as arguments + + Returns: + a callable which retries `func` if OOM is encountered. + + Examples: + :: + output = retry_if_cuda_oom(some_torch_function)(input1, input2) + # output may be on CPU even if inputs are on GPU + + Note: + 1. When converting inputs to CPU, it will only look at each argument and check + if it has `.device` and `.to` for conversion. Nested structures of tensors + are not supported. + + 2. Since the function might be called more than once, it has to be + stateless. + """ + + def maybe_to_cpu(x): + try: + like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to") + except AttributeError: + like_gpu_tensor = False + if like_gpu_tensor: + return x.to(device="cpu") + else: + return x + + @wraps(func) + def wrapped(*args, **kwargs): + with _ignore_torch_cuda_oom(): + return func(*args, **kwargs) + + # Clear cache and retry + torch.cuda.empty_cache() + with _ignore_torch_cuda_oom(): + return func(*args, **kwargs) + + # Try on CPU. This slows down the code significantly, therefore print a notice. + logger = logging.getLogger(__name__) + logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func))) + new_args = (maybe_to_cpu(x) for x in args) + new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()} + return func(*new_args, **new_kwargs) + + return wrapped diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/registry.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..4c9706de13a27c87f11383d91e26efca93273729 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/registry.py @@ -0,0 +1,19 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Keep this module for backward compatibility. +from fvcore.common.registry import Registry # noqa + +__all__ = ["Registry"] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/serialize.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/serialize.py new file mode 100644 index 0000000000000000000000000000000000000000..7f323352052f039c3363078f3c3eb82bcc2fcf5c --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/serialize.py @@ -0,0 +1,42 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cloudpickle + + +class PicklableWrapper(object): + """ + Wrap an object to make it more picklable, note that it uses + heavy weight serialization libraries that are slower than pickle. + It's best to use it only on closures (which are usually not picklable). + + This is a simplified version of + https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py + """ + + def __init__(self, obj): + self._obj = obj + + def __reduce__(self): + s = cloudpickle.dumps(self._obj) + return cloudpickle.loads, (s,) + + def __call__(self, *args, **kwargs): + return self._obj(*args, **kwargs) + + def __getattr__(self, attr): + # Ensure that the wrapped object can be used seamlessly as the previous object. + if attr not in ["_obj"]: + return getattr(self._obj, attr) + return getattr(self, attr) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/video_visualizer.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/video_visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..516463ff84f69ed8f1a1b2e8869ade2ae1bbd26b --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/video_visualizer.py @@ -0,0 +1,248 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pycocotools.mask as mask_util + +from detectron2.utils.visualizer import ( + ColorMode, + Visualizer, + _create_text_labels, + _PanopticPrediction, +) + +from .colormap import random_color + + +class _DetectedInstance: + """ + Used to store data about detected objects in video frame, + in order to transfer color to objects in the future frames. + + Attributes: + label (int): + bbox (tuple[float]): + mask_rle (dict): + color (tuple[float]): RGB colors in range (0, 1) + ttl (int): time-to-live for the instance. For example, if ttl=2, + the instance color can be transferred to objects in the next two frames. + """ + + __slots__ = ["label", "bbox", "mask_rle", "color", "ttl"] + + def __init__(self, label, bbox, mask_rle, color, ttl): + self.label = label + self.bbox = bbox + self.mask_rle = mask_rle + self.color = color + self.ttl = ttl + + +class VideoVisualizer: + def __init__(self, metadata, instance_mode=ColorMode.IMAGE): + """ + Args: + metadata (MetadataCatalog): image metadata. + """ + self.metadata = metadata + self._old_instances = [] + assert instance_mode in [ + ColorMode.IMAGE, + ColorMode.IMAGE_BW, + ], "Other mode not supported yet." + self._instance_mode = instance_mode + + def draw_instance_predictions(self, frame, predictions): + """ + Draw instance-level prediction results on an image. + + Args: + frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. + predictions (Instances): the output of an instance detection/segmentation + model. Following fields will be used to draw: + "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). + + Returns: + output (VisImage): image object with visualizations. + """ + frame_visualizer = Visualizer(frame, self.metadata) + num_instances = len(predictions) + if num_instances == 0: + return frame_visualizer.output + + boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None + scores = predictions.scores if predictions.has("scores") else None + classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None + keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None + + if predictions.has("pred_masks"): + masks = predictions.pred_masks + # mask IOU is not yet enabled + # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F")) + # assert len(masks_rles) == num_instances + else: + masks = None + + detected = [ + _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8) + for i in range(num_instances) + ] + colors = self._assign_colors(detected) + + labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) + + if self._instance_mode == ColorMode.IMAGE_BW: + # any() returns uint8 tensor + frame_visualizer.output.img = frame_visualizer._create_grayscale_image( + (masks.any(dim=0) > 0).numpy() if masks is not None else None + ) + alpha = 0.3 + else: + alpha = 0.5 + + frame_visualizer.overlay_instances( + boxes=None if masks is not None else boxes, # boxes are a bit distracting + masks=masks, + labels=labels, + keypoints=keypoints, + assigned_colors=colors, + alpha=alpha, + ) + + return frame_visualizer.output + + def draw_sem_seg(self, frame, sem_seg, area_threshold=None): + """ + Args: + sem_seg (ndarray or Tensor): semantic segmentation of shape (H, W), + each value is the integer label. + area_threshold (Optional[int]): only draw segmentations larger than the threshold + """ + # don't need to do anything special + frame_visualizer = Visualizer(frame, self.metadata) + frame_visualizer.draw_sem_seg(sem_seg, area_threshold=None) + return frame_visualizer.output + + def draw_panoptic_seg_predictions( + self, frame, panoptic_seg, segments_info, area_threshold=None, alpha=0.5 + ): + frame_visualizer = Visualizer(frame, self.metadata) + pred = _PanopticPrediction(panoptic_seg, segments_info) + + if self._instance_mode == ColorMode.IMAGE_BW: + frame_visualizer.output.img = frame_visualizer._create_grayscale_image( + pred.non_empty_mask() + ) + + # draw mask for all semantic segments first i.e. "stuff" + for mask, sinfo in pred.semantic_masks(): + category_idx = sinfo["category_id"] + try: + mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]] + except AttributeError: + mask_color = None + + frame_visualizer.draw_binary_mask( + mask, + color=mask_color, + text=self.metadata.stuff_classes[category_idx], + alpha=alpha, + area_threshold=area_threshold, + ) + + all_instances = list(pred.instance_masks()) + if len(all_instances) == 0: + return frame_visualizer.output + # draw mask for all instances second + masks, sinfo = list(zip(*all_instances)) + num_instances = len(masks) + masks_rles = mask_util.encode( + np.asarray(np.asarray(masks).transpose(1, 2, 0), dtype=np.uint8, order="F") + ) + assert len(masks_rles) == num_instances + + category_ids = [x["category_id"] for x in sinfo] + detected = [ + _DetectedInstance(category_ids[i], bbox=None, mask_rle=masks_rles[i], color=None, ttl=8) + for i in range(num_instances) + ] + colors = self._assign_colors(detected) + labels = [self.metadata.thing_classes[k] for k in category_ids] + + frame_visualizer.overlay_instances( + boxes=None, + masks=masks, + labels=labels, + keypoints=None, + assigned_colors=colors, + alpha=alpha, + ) + return frame_visualizer.output + + def _assign_colors(self, instances): + """ + Naive tracking heuristics to assign same color to the same instance, + will update the internal state of tracked instances. + + Returns: + list[tuple[float]]: list of colors. + """ + + # Compute iou with either boxes or masks: + is_crowd = np.zeros((len(instances),), dtype=np.bool) + if instances[0].bbox is None: + assert instances[0].mask_rle is not None + # use mask iou only when box iou is None + # because box seems good enough + rles_old = [x.mask_rle for x in self._old_instances] + rles_new = [x.mask_rle for x in instances] + ious = mask_util.iou(rles_old, rles_new, is_crowd) + threshold = 0.5 + else: + boxes_old = [x.bbox for x in self._old_instances] + boxes_new = [x.bbox for x in instances] + ious = mask_util.iou(boxes_old, boxes_new, is_crowd) + threshold = 0.6 + if len(ious) == 0: + ious = np.zeros((len(self._old_instances), len(instances)), dtype="float32") + + # Only allow matching instances of the same label: + for old_idx, old in enumerate(self._old_instances): + for new_idx, new in enumerate(instances): + if old.label != new.label: + ious[old_idx, new_idx] = 0 + + matched_new_per_old = np.asarray(ious).argmax(axis=1) + max_iou_per_old = np.asarray(ious).max(axis=1) + + # Try to find match for each old instance: + extra_instances = [] + for idx, inst in enumerate(self._old_instances): + if max_iou_per_old[idx] > threshold: + newidx = matched_new_per_old[idx] + if instances[newidx].color is None: + instances[newidx].color = inst.color + continue + # If an old instance does not match any new instances, + # keep it for the next frame in case it is just missed by the detector + inst.ttl -= 1 + if inst.ttl > 0: + extra_instances.append(inst) + + # Assign random color to newly-detected instances: + for inst in instances: + if inst.color is None: + inst.color = random_color(rgb=True, maximum=1) + self._old_instances = instances[:] + extra_instances + return [d.color for d in instances] diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/visualizer.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..3780cad55ba8034eb475f11163831782adbe3729 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/detectron2/utils/visualizer.py @@ -0,0 +1,1180 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import colorsys +import logging +import math +import numpy as np +from enum import Enum, unique +import cv2 +import matplotlib as mpl +import matplotlib.colors as mplc +import matplotlib.figure as mplfigure +import pycocotools.mask as mask_util +import torch +from fvcore.common.file_io import PathManager +from matplotlib.backends.backend_agg import FigureCanvasAgg +from PIL import Image + +from detectron2.data import MetadataCatalog +from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes + +from .colormap import random_color + +logger = logging.getLogger(__name__) + +__all__ = ["ColorMode", "VisImage", "Visualizer"] + + +_SMALL_OBJECT_AREA_THRESH = 1000 +_LARGE_MASK_AREA_THRESH = 120000 +_OFF_WHITE = (1.0, 1.0, 240.0 / 255) +_BLACK = (0, 0, 0) +_RED = (1.0, 0, 0) + +_KEYPOINT_THRESHOLD = 0.05 + + +@unique +class ColorMode(Enum): + """ + Enum of different color modes to use for instance visualizations. + """ + + IMAGE = 0 + """ + Picks a random color for every instance and overlay segmentations with low opacity. + """ + SEGMENTATION = 1 + """ + Let instances of the same category have similar colors + (from metadata.thing_colors), and overlay them with + high opacity. This provides more attention on the quality of segmentation. + """ + IMAGE_BW = 2 + """ + Same as IMAGE, but convert all areas without masks to gray-scale. + Only available for drawing per-instance mask predictions. + """ + + +class GenericMask: + """ + Attribute: + polygons (list[ndarray]): list[ndarray]: polygons for this mask. + Each ndarray has format [x, y, x, y, ...] + mask (ndarray): a binary mask + """ + + def __init__(self, mask_or_polygons, height, width): + self._mask = self._polygons = self._has_holes = None + self.height = height + self.width = width + + m = mask_or_polygons + if isinstance(m, dict): + # RLEs + assert "counts" in m and "size" in m + if isinstance(m["counts"], list): # uncompressed RLEs + h, w = m["size"] + assert h == height and w == width + m = mask_util.frPyObjects(m, h, w) + self._mask = mask_util.decode(m)[:, :] + return + + if isinstance(m, list): # list[ndarray] + self._polygons = [np.asarray(x).reshape(-1) for x in m] + return + + if isinstance(m, np.ndarray): # assumed to be a binary mask + assert m.shape[1] != 2, m.shape + assert m.shape == (height, width), m.shape + self._mask = m.astype("uint8") + return + + raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m))) + + @property + def mask(self): + if self._mask is None: + self._mask = self.polygons_to_mask(self._polygons) + return self._mask + + @property + def polygons(self): + if self._polygons is None: + self._polygons, self._has_holes = self.mask_to_polygons(self._mask) + return self._polygons + + @property + def has_holes(self): + if self._has_holes is None: + if self._mask is not None: + self._polygons, self._has_holes = self.mask_to_polygons(self._mask) + else: + self._has_holes = False # if original format is polygon, does not have holes + return self._has_holes + + def mask_to_polygons(self, mask): + # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level + # hierarchy. External contours (boundary) of the object are placed in hierarchy-1. + # Internal contours (holes) are placed in hierarchy-2. + # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours. + mask = np.ascontiguousarray(mask) # some versions of cv2 does not support incontiguous arr + res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) + hierarchy = res[-1] + if hierarchy is None: # empty mask + return [], False + has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0 + res = res[-2] + res = [x.flatten() for x in res] + res = [x for x in res if len(x) >= 6] + return res, has_holes + + def polygons_to_mask(self, polygons): + rle = mask_util.frPyObjects(polygons, self.height, self.width) + rle = mask_util.merge(rle) + return mask_util.decode(rle)[:, :] + + def area(self): + return self.mask.sum() + + def bbox(self): + p = mask_util.frPyObjects(self.polygons, self.height, self.width) + p = mask_util.merge(p) + bbox = mask_util.toBbox(p) + bbox[2] += bbox[0] + bbox[3] += bbox[1] + return bbox + + +class _PanopticPrediction: + def __init__(self, panoptic_seg, segments_info): + self._seg = panoptic_seg + + self._sinfo = {s["id"]: s for s in segments_info} # seg id -> seg info + segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True) + areas = areas.numpy() + sorted_idxs = np.argsort(-areas) + self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs] + self._seg_ids = self._seg_ids.tolist() + for sid, area in zip(self._seg_ids, self._seg_areas): + if sid in self._sinfo: + self._sinfo[sid]["area"] = float(area) + + def non_empty_mask(self): + """ + Returns: + (H, W) array, a mask for all pixels that have a prediction + """ + empty_ids = [] + for id in self._seg_ids: + if id not in self._sinfo: + empty_ids.append(id) + if len(empty_ids) == 0: + return np.zeros(self._seg.shape, dtype=np.uint8) + assert ( + len(empty_ids) == 1 + ), ">1 ids corresponds to no labels. This is currently not supported" + return (self._seg != empty_ids[0]).numpy().astype(np.bool) + + def semantic_masks(self): + for sid in self._seg_ids: + sinfo = self._sinfo.get(sid) + if sinfo is None or sinfo["isthing"]: + # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions. + continue + yield (self._seg == sid).numpy().astype(np.bool), sinfo + + def instance_masks(self): + for sid in self._seg_ids: + sinfo = self._sinfo.get(sid) + if sinfo is None or not sinfo["isthing"]: + continue + mask = (self._seg == sid).numpy().astype(np.bool) + if mask.sum() > 0: + yield mask, sinfo + + +def _create_text_labels(classes, scores, class_names): + """ + Args: + classes (list[int] or None): + scores (list[float] or None): + class_names (list[str] or None): + + Returns: + list[str] or None + """ + labels = None + if classes is not None and class_names is not None and len(class_names) > 0: + labels = [class_names[i] for i in classes] + if scores is not None: + if labels is None: + labels = ["{:.0f}%".format(s * 100) for s in scores] + else: + labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)] + return labels + + +class VisImage: + def __init__(self, img, scale=1.0): + """ + Args: + img (ndarray): an RGB image of shape (H, W, 3). + scale (float): scale the input image + """ + self.img = img + self.scale = scale + self.width, self.height = img.shape[1], img.shape[0] + self._setup_figure(img) + + def _setup_figure(self, img): + """ + Args: + Same as in :meth:`__init__()`. + + Returns: + fig (matplotlib.pyplot.figure): top level container for all the image plot elements. + ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system. + """ + fig = mplfigure.Figure(frameon=False) + self.dpi = fig.get_dpi() + # add a small 1e-2 to avoid precision lost due to matplotlib's truncation + # (https://github.com/matplotlib/matplotlib/issues/15363) + fig.set_size_inches( + (self.width * self.scale + 1e-2) / self.dpi, + (self.height * self.scale + 1e-2) / self.dpi, + ) + self.canvas = FigureCanvasAgg(fig) + # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig) + ax = fig.add_axes([0.0, 0.0, 1.0, 1.0]) + ax.axis("off") + ax.set_xlim(0.0, self.width) + ax.set_ylim(self.height) + + self.fig = fig + self.ax = ax + + def save(self, filepath): + """ + Args: + filepath (str): a string that contains the absolute path, including the file name, where + the visualized image will be saved. + """ + if filepath.lower().endswith(".jpg") or filepath.lower().endswith(".png"): + # faster than matplotlib's imshow + cv2.imwrite(filepath, self.get_image()[:, :, ::-1]) + else: + # support general formats (e.g. pdf) + self.ax.imshow(self.img, interpolation="nearest") + self.fig.savefig(filepath) + + def get_image(self): + """ + Returns: + ndarray: + the visualized image of shape (H, W, 3) (RGB) in uint8 type. + The shape is scaled w.r.t the input image using the given `scale` argument. + """ + canvas = self.canvas + s, (width, height) = canvas.print_to_buffer() + if (self.width, self.height) != (width, height): + img = cv2.resize(self.img, (width, height)) + else: + img = self.img + + # buf = io.BytesIO() # works for cairo backend + # canvas.print_rgba(buf) + # width, height = self.width, self.height + # s = buf.getvalue() + + buffer = np.frombuffer(s, dtype="uint8") + + # imshow is slow. blend manually (still quite slow) + img_rgba = buffer.reshape(height, width, 4) + rgb, alpha = np.split(img_rgba, [3], axis=2) + + try: + import numexpr as ne # fuse them with numexpr + + visualized_image = ne.evaluate("img * (1 - alpha / 255.0) + rgb * (alpha / 255.0)") + except ImportError: + alpha = alpha.astype("float32") / 255.0 + visualized_image = img * (1 - alpha) + rgb * alpha + + visualized_image = visualized_image.astype("uint8") + + return visualized_image + + +class Visualizer: + """ + Visualizer that draws data about detection/segmentation on images. + + It contains methods like `draw_{text,box,circle,line,binary_mask,polygon}` + that draw primitive objects to images, as well as high-level wrappers like + `draw_{instance_predictions,sem_seg,panoptic_seg_predictions,dataset_dict}` + that draw composite data in some pre-defined style. + + Note that the exact visualization style for the high-level wrappers are subject to change. + Style such as color, opacity, label contents, visibility of labels, or even the visibility + of objects themselves (e.g. when the object is too small) may change according + to different heuristics, as long as the results still look visually reasonable. + To obtain a consistent style, implement custom drawing functions with the primitive + methods instead. + + This visualizer focuses on high rendering quality rather than performance. It is not + designed to be used for real-time applications. + """ + + def __init__(self, img_rgb, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE): + """ + Args: + img_rgb: a numpy array of shape (H, W, C), where H and W correspond to + the height and width of the image respectively. C is the number of + color channels. The image is required to be in RGB format since that + is a requirement of the Matplotlib library. The image is also expected + to be in the range [0, 255]. + metadata (MetadataCatalog): image metadata. + instance_mode (ColorMode): defines one of the pre-defined style for drawing + instances on an image. + """ + self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8) + if metadata is None: + metadata = MetadataCatalog.get("__nonexist__") + self.metadata = metadata + self.output = VisImage(self.img, scale=scale) + self.cpu_device = torch.device("cpu") + + # too small texts are useless, therefore clamp to 9 + self._default_font_size = max( + np.sqrt(self.output.height * self.output.width) // 90, 10 // scale + ) + self._instance_mode = instance_mode + + def draw_instance_predictions(self, predictions): + """ + Draw instance-level prediction results on an image. + + Args: + predictions (Instances): the output of an instance detection/segmentation + model. Following fields will be used to draw: + "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). + + Returns: + output (VisImage): image object with visualizations. + """ + boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None + scores = predictions.scores if predictions.has("scores") else None + classes = predictions.pred_classes if predictions.has("pred_classes") else None + labels = _create_text_labels(classes.long(), scores, self.metadata.get("thing_classes", None)) + keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None + + if predictions.has("pred_masks"): + masks = np.asarray(predictions.pred_masks) + masks = [GenericMask(x, self.output.height, self.output.width) for x in masks] + else: + masks = None + + if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"): + colors = [ + self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes + ] + alpha = 0.8 + else: + colors = None + alpha = 0.5 + + if self._instance_mode == ColorMode.IMAGE_BW: + self.output.img = self._create_grayscale_image( + (predictions.pred_masks.any(dim=0) > 0).numpy() + ) + alpha = 0.3 + + self.overlay_instances( + masks=masks, + boxes=boxes, + labels=labels, + keypoints=keypoints, + assigned_colors=colors, + alpha=alpha, + ) + return self.output + + def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8): + """ + Draw semantic segmentation predictions/labels. + + Args: + sem_seg (Tensor or ndarray): the segmentation of shape (H, W). + Each value is the integer label of the pixel. + area_threshold (int): segments with less than `area_threshold` are not drawn. + alpha (float): the larger it is, the more opaque the segmentations are. + + Returns: + output (VisImage): image object with visualizations. + """ + if isinstance(sem_seg, torch.Tensor): + sem_seg = sem_seg.numpy() + labels, areas = np.unique(sem_seg, return_counts=True) + sorted_idxs = np.argsort(-areas).tolist() + labels = labels[sorted_idxs] + for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels): + try: + mask_color = [x / 255 for x in self.metadata.stuff_colors[label]] + except (AttributeError, IndexError): + mask_color = None + + binary_mask = (sem_seg == label).astype(np.uint8) + text = self.metadata.stuff_classes[label] + self.draw_binary_mask( + binary_mask, + color=mask_color, + edge_color=_OFF_WHITE, + text=text, + alpha=alpha, + area_threshold=area_threshold, + ) + return self.output + + def draw_panoptic_seg_predictions( + self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7 + ): + """ + Draw panoptic prediction results on an image. + + Args: + panoptic_seg (Tensor): of shape (height, width) where the values are ids for each + segment. + segments_info (list[dict]): Describe each segment in `panoptic_seg`. + Each dict contains keys "id", "category_id", "isthing". + area_threshold (int): stuff segments with less than `area_threshold` are not drawn. + + Returns: + output (VisImage): image object with visualizations. + """ + pred = _PanopticPrediction(panoptic_seg, segments_info) + + if self._instance_mode == ColorMode.IMAGE_BW: + self.output.img = self._create_grayscale_image(pred.non_empty_mask()) + + # draw mask for all semantic segments first i.e. "stuff" + for mask, sinfo in pred.semantic_masks(): + category_idx = sinfo["category_id"] + try: + mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]] + except AttributeError: + mask_color = None + + text = self.metadata.stuff_classes[category_idx] + self.draw_binary_mask( + mask, + color=mask_color, + edge_color=_OFF_WHITE, + text=text, + alpha=alpha, + area_threshold=area_threshold, + ) + + # draw mask for all instances second + all_instances = list(pred.instance_masks()) + if len(all_instances) == 0: + return self.output + masks, sinfo = list(zip(*all_instances)) + category_ids = [x["category_id"] for x in sinfo] + + try: + scores = [x["score"] for x in sinfo] + except KeyError: + scores = None + labels = _create_text_labels(category_ids, scores, self.metadata.thing_classes) + + try: + colors = [random_color(rgb=True, maximum=1) for k in category_ids] + except AttributeError: + colors = None + self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha) + + return self.output + + def draw_dataset_dict(self, dic): + """ + Draw annotations/segmentaions in Detectron2 Dataset format. + + Args: + dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format. + + Returns: + output (VisImage): image object with visualizations. + """ + annos = dic.get("annotations", None) + if annos: + if "segmentation" in annos[0]: + masks = [x["segmentation"] for x in annos] + else: + masks = None + if "keypoints" in annos[0]: + keypts = [x["keypoints"] for x in annos] + keypts = np.array(keypts).reshape(len(annos), -1, 3) + else: + keypts = None + + boxes = [BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) for x in annos] + + labels = [x["category_id"] for x in annos] + colors = None + if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"): + colors = [ + self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in labels + ] + names = self.metadata.get("thing_classes", None) + if names: + labels = [names[i] for i in labels] + labels = [ + "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "") + for i, a in zip(labels, annos) + ] + self.overlay_instances( + labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors + ) + + sem_seg = dic.get("sem_seg", None) + if sem_seg is None and "sem_seg_file_name" in dic: + with PathManager.open(dic["sem_seg_file_name"], "rb") as f: + sem_seg = Image.open(f) + sem_seg = np.asarray(sem_seg, dtype="uint8") + if sem_seg is not None: + self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5) + return self.output + + def overlay_instances( + self, + *, + boxes=None, + labels=None, + masks=None, + keypoints=None, + assigned_colors=None, + alpha=0.5 + ): + """ + Args: + boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`, + or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image, + or a :class:`RotatedBoxes`, + or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format + for the N objects in a single image, + labels (list[str]): the text to be displayed for each instance. + masks (masks-like object): Supported types are: + + * :class:`detectron2.structures.PolygonMasks`, + :class:`detectron2.structures.BitMasks`. + * list[list[ndarray]]: contains the segmentation masks for all objects in one image. + The first level of the list corresponds to individual instances. The second + level to all the polygon that compose the instance, and the third level + to the polygon coordinates. The third level should have the format of + [x0, y0, x1, y1, ..., xn, yn] (n >= 3). + * list[ndarray]: each ndarray is a binary mask of shape (H, W). + * list[dict]: each dict is a COCO-style RLE. + keypoints (Keypoint or array like): an array-like object of shape (N, K, 3), + where the N is the number of instances and K is the number of keypoints. + The last dimension corresponds to (x, y, visibility or score). + assigned_colors (list[matplotlib.colors]): a list of colors, where each color + corresponds to each mask or box in the image. Refer to 'matplotlib.colors' + for full list of formats that the colors are accepted in. + + Returns: + output (VisImage): image object with visualizations. + """ + num_instances = None + if boxes is not None: + boxes = self._convert_boxes(boxes) + num_instances = len(boxes) + if masks is not None: + masks = self._convert_masks(masks) + if num_instances: + assert len(masks) == num_instances + else: + num_instances = len(masks) + if keypoints is not None: + if num_instances: + assert len(keypoints) == num_instances + else: + num_instances = len(keypoints) + keypoints = self._convert_keypoints(keypoints) + if labels is not None: + assert len(labels) == num_instances + if assigned_colors is None: + assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)] + if num_instances == 0: + return self.output + if boxes is not None and boxes.shape[1] == 5: + return self.overlay_rotated_instances( + boxes=boxes, labels=labels, assigned_colors=assigned_colors + ) + + # Display in largest to smallest order to reduce occlusion. + areas = None + if boxes is not None: + areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1) + elif masks is not None: + areas = np.asarray([x.area() for x in masks]) + + if areas is not None: + sorted_idxs = np.argsort(-areas).tolist() + # Re-order overlapped instances in descending order. + boxes = boxes[sorted_idxs] if boxes is not None else None + labels = [labels[k] for k in sorted_idxs] if labels is not None else None + masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None + assigned_colors = [assigned_colors[idx] for idx in sorted_idxs] + keypoints = keypoints[sorted_idxs] if keypoints is not None else None + + for i in range(num_instances): + color = assigned_colors[i] + if boxes is not None: + self.draw_box(boxes[i], edge_color=color) + + if masks is not None: + for segment in masks[i].polygons: + self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha) + + if labels is not None: + # first get a box + if boxes is not None: + x0, y0, x1, y1 = boxes[i] + text_pos = (x0, y0) # if drawing boxes, put text on the box corner. + horiz_align = "left" + elif masks is not None: + x0, y0, x1, y1 = masks[i].bbox() + + # draw text in the center (defined by median) when box is not drawn + # median is less sensitive to outliers. + text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1] + horiz_align = "center" + else: + continue # drawing the box confidence for keypoints isn't very useful. + # for small objects, draw text at the side to avoid occlusion + instance_area = (y1 - y0) * (x1 - x0) + if ( + instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale + or y1 - y0 < 40 * self.output.scale + ): + if y1 >= self.output.height - 5: + text_pos = (x1, y0) + else: + text_pos = (x0, y1) + + height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width) + lighter_color = self._change_color_brightness(color, brightness_factor=0.7) + font_size = ( + np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) + * 0.5 + * self._default_font_size + ) + self.draw_text( + labels[i], + text_pos, + color=lighter_color, + horizontal_alignment=horiz_align, + font_size=font_size, + ) + + # draw keypoints + if keypoints is not None: + for keypoints_per_instance in keypoints: + self.draw_and_connect_keypoints(keypoints_per_instance) + + return self.output + + def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None): + """ + Args: + boxes (ndarray): an Nx5 numpy array of + (x_center, y_center, width, height, angle_degrees) format + for the N objects in a single image. + labels (list[str]): the text to be displayed for each instance. + assigned_colors (list[matplotlib.colors]): a list of colors, where each color + corresponds to each mask or box in the image. Refer to 'matplotlib.colors' + for full list of formats that the colors are accepted in. + + Returns: + output (VisImage): image object with visualizations. + """ + num_instances = len(boxes) + + if assigned_colors is None: + assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)] + if num_instances == 0: + return self.output + + # Display in largest to smallest order to reduce occlusion. + if boxes is not None: + areas = boxes[:, 2] * boxes[:, 3] + + sorted_idxs = np.argsort(-areas).tolist() + # Re-order overlapped instances in descending order. + boxes = boxes[sorted_idxs] + labels = [labels[k] for k in sorted_idxs] if labels is not None else None + colors = [assigned_colors[idx] for idx in sorted_idxs] + + for i in range(num_instances): + self.draw_rotated_box_with_label( + boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None + ) + + return self.output + + def draw_and_connect_keypoints(self, keypoints): + """ + Draws keypoints of an instance and follows the rules for keypoint connections + to draw lines between appropriate keypoints. This follows color heuristics for + line color. + + Args: + keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints + and the last dimension corresponds to (x, y, probability). + + Returns: + output (VisImage): image object with visualizations. + """ + visible = {} + keypoint_names = self.metadata.get("keypoint_names") + for idx, keypoint in enumerate(keypoints): + # draw keypoint + x, y, prob = keypoint + if prob > _KEYPOINT_THRESHOLD: + self.draw_circle((x, y), color=_RED) + if keypoint_names: + keypoint_name = keypoint_names[idx] + visible[keypoint_name] = (x, y) + + if self.metadata.get("keypoint_connection_rules"): + for kp0, kp1, color in self.metadata.keypoint_connection_rules: + if kp0 in visible and kp1 in visible: + x0, y0 = visible[kp0] + x1, y1 = visible[kp1] + color = tuple(x / 255.0 for x in color) + self.draw_line([x0, x1], [y0, y1], color=color) + + # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip + # Note that this strategy is specific to person keypoints. + # For other keypoints, it should just do nothing + try: + ls_x, ls_y = visible["left_shoulder"] + rs_x, rs_y = visible["right_shoulder"] + mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2 + except KeyError: + pass + else: + # draw line from nose to mid-shoulder + nose_x, nose_y = visible.get("nose", (None, None)) + if nose_x is not None: + self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED) + + try: + # draw line from mid-shoulder to mid-hip + lh_x, lh_y = visible["left_hip"] + rh_x, rh_y = visible["right_hip"] + except KeyError: + pass + else: + mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2 + self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED) + return self.output + + """ + Primitive drawing functions: + """ + + def draw_text( + self, + text, + position, + *, + font_size=None, + color="g", + horizontal_alignment="center", + rotation=0 + ): + """ + Args: + text (str): class label + position (tuple): a tuple of the x and y coordinates to place text on image. + font_size (int, optional): font of the text. If not provided, a font size + proportional to the image width is calculated and used. + color: color of the text. Refer to `matplotlib.colors` for full list + of formats that are accepted. + horizontal_alignment (str): see `matplotlib.text.Text` + rotation: rotation angle in degrees CCW + + Returns: + output (VisImage): image object with text drawn. + """ + if not font_size: + font_size = self._default_font_size + + # since the text background is dark, we don't want the text to be dark + color = np.maximum(list(mplc.to_rgb(color)), 0.2) + color[np.argmax(color)] = max(0.8, np.max(color)) + + x, y = position + self.output.ax.text( + x, + y, + text, + size=font_size * self.output.scale, + family="sans-serif", + bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"}, + verticalalignment="top", + horizontalalignment=horizontal_alignment, + color=color, + zorder=10, + rotation=rotation, + ) + return self.output + + def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"): + """ + Args: + box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0 + are the coordinates of the image's top left corner. x1 and y1 are the + coordinates of the image's bottom right corner. + alpha (float): blending efficient. Smaller values lead to more transparent masks. + edge_color: color of the outline of the box. Refer to `matplotlib.colors` + for full list of formats that are accepted. + line_style (string): the string to use to create the outline of the boxes. + + Returns: + output (VisImage): image object with box drawn. + """ + x0, y0, x1, y1 = box_coord + width = x1 - x0 + height = y1 - y0 + + linewidth = max(self._default_font_size / 4, 1) + + self.output.ax.add_patch( + mpl.patches.Rectangle( + (x0, y0), + width, + height, + fill=False, + edgecolor=edge_color, + linewidth=linewidth * self.output.scale, + alpha=alpha, + linestyle=line_style, + ) + ) + return self.output + + def draw_rotated_box_with_label( + self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None + ): + """ + Draw a rotated box with label on its top-left corner. + + Args: + rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle), + where cnt_x and cnt_y are the center coordinates of the box. + w and h are the width and height of the box. angle represents how + many degrees the box is rotated CCW with regard to the 0-degree box. + alpha (float): blending efficient. Smaller values lead to more transparent masks. + edge_color: color of the outline of the box. Refer to `matplotlib.colors` + for full list of formats that are accepted. + line_style (string): the string to use to create the outline of the boxes. + label (string): label for rotated box. It will not be rendered when set to None. + + Returns: + output (VisImage): image object with box drawn. + """ + cnt_x, cnt_y, w, h, angle = rotated_box + area = w * h + # use thinner lines when the box is small + linewidth = self._default_font_size / ( + 6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3 + ) + + theta = angle * math.pi / 180.0 + c = math.cos(theta) + s = math.sin(theta) + rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)] + # x: left->right ; y: top->down + rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect] + for k in range(4): + j = (k + 1) % 4 + self.draw_line( + [rotated_rect[k][0], rotated_rect[j][0]], + [rotated_rect[k][1], rotated_rect[j][1]], + color=edge_color, + linestyle="--" if k == 1 else line_style, + linewidth=linewidth, + ) + + if label is not None: + text_pos = rotated_rect[1] # topleft corner + + height_ratio = h / np.sqrt(self.output.height * self.output.width) + label_color = self._change_color_brightness(edge_color, brightness_factor=0.7) + font_size = ( + np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size + ) + self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle) + + return self.output + + def draw_circle(self, circle_coord, color, radius=3): + """ + Args: + circle_coord (list(int) or tuple(int)): contains the x and y coordinates + of the center of the circle. + color: color of the polygon. Refer to `matplotlib.colors` for a full list of + formats that are accepted. + radius (int): radius of the circle. + + Returns: + output (VisImage): image object with box drawn. + """ + x, y = circle_coord + self.output.ax.add_patch( + mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color) + ) + return self.output + + def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None): + """ + Args: + x_data (list[int]): a list containing x values of all the points being drawn. + Length of list should match the length of y_data. + y_data (list[int]): a list containing y values of all the points being drawn. + Length of list should match the length of x_data. + color: color of the line. Refer to `matplotlib.colors` for a full list of + formats that are accepted. + linestyle: style of the line. Refer to `matplotlib.lines.Line2D` + for a full list of formats that are accepted. + linewidth (float or None): width of the line. When it's None, + a default value will be computed and used. + + Returns: + output (VisImage): image object with line drawn. + """ + if linewidth is None: + linewidth = self._default_font_size / 3 + linewidth = max(linewidth, 1) + self.output.ax.add_line( + mpl.lines.Line2D( + x_data, + y_data, + linewidth=linewidth * self.output.scale, + color=color, + linestyle=linestyle, + ) + ) + return self.output + + def draw_binary_mask( + self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=0 + ): + """ + Args: + binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and + W is the image width. Each value in the array is either a 0 or 1 value of uint8 + type. + color: color of the mask. Refer to `matplotlib.colors` for a full list of + formats that are accepted. If None, will pick a random color. + edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a + full list of formats that are accepted. + text (str): if None, will be drawn in the object's center of mass. + alpha (float): blending efficient. Smaller values lead to more transparent masks. + area_threshold (float): a connected component small than this will not be shown. + + Returns: + output (VisImage): image object with mask drawn. + """ + if color is None: + color = random_color(rgb=True, maximum=1) + color = mplc.to_rgb(color) + + has_valid_segment = False + binary_mask = binary_mask.astype("uint8") # opencv needs uint8 + mask = GenericMask(binary_mask, self.output.height, self.output.width) + shape2d = (binary_mask.shape[0], binary_mask.shape[1]) + + if not mask.has_holes: + # draw polygons for regular masks + for segment in mask.polygons: + area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1])) + if area < (area_threshold or 0): + continue + has_valid_segment = True + segment = segment.reshape(-1, 2) + self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha) + else: + rgba = np.zeros(shape2d + (4,), dtype="float32") + rgba[:, :, :3] = color + rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha + has_valid_segment = True + self.output.ax.imshow(rgba) + + if text is not None and has_valid_segment: + # TODO sometimes drawn on wrong objects. the heuristics here can improve. + lighter_color = self._change_color_brightness(color, brightness_factor=0.7) + _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8) + largest_component_id = np.argmax(stats[1:, -1]) + 1 + + # draw text on the largest component, as well as other very large components. + for cid in range(1, _num_cc): + if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH: + # median is more stable than centroid + # center = centroids[largest_component_id] + center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1] + self.draw_text(text, center, color=lighter_color) + return self.output + + def draw_polygon(self, segment, color, edge_color=None, alpha=0.5): + """ + Args: + segment: numpy array of shape Nx2, containing all the points in the polygon. + color: color of the polygon. Refer to `matplotlib.colors` for a full list of + formats that are accepted. + edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a + full list of formats that are accepted. If not provided, a darker shade + of the polygon color will be used instead. + alpha (float): blending efficient. Smaller values lead to more transparent masks. + + Returns: + output (VisImage): image object with polygon drawn. + """ + if edge_color is None: + # make edge color darker than the polygon color + if alpha > 0.8: + edge_color = self._change_color_brightness(color, brightness_factor=-0.7) + else: + edge_color = color + edge_color = mplc.to_rgb(edge_color) + (1,) + + polygon = mpl.patches.Polygon( + segment, + fill=True, + facecolor=mplc.to_rgb(color) + (alpha,), + edgecolor=edge_color, + linewidth=max(self._default_font_size // 15 * self.output.scale, 1), + ) + self.output.ax.add_patch(polygon) + return self.output + + """ + Internal methods: + """ + + def _jitter(self, color): + """ + Randomly modifies given color to produce a slightly different color than the color given. + + Args: + color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color + picked. The values in the list are in the [0.0, 1.0] range. + + Returns: + jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the + color after being jittered. The values in the list are in the [0.0, 1.0] range. + """ + color = mplc.to_rgb(color) + vec = np.random.rand(3) + # better to do it in another color space + vec = vec / np.linalg.norm(vec) * 0.5 + res = np.clip(vec + color, 0, 1) + return tuple(res) + + def _create_grayscale_image(self, mask=None): + """ + Create a grayscale version of the original image. + The colors in masked area, if given, will be kept. + """ + img_bw = self.img.astype("f4").mean(axis=2) + img_bw = np.stack([img_bw] * 3, axis=2) + if mask is not None: + img_bw[mask] = self.img[mask] + return img_bw + + def _change_color_brightness(self, color, brightness_factor): + """ + Depending on the brightness_factor, gives a lighter or darker color i.e. a color with + less or more saturation than the original color. + + Args: + color: color of the polygon. Refer to `matplotlib.colors` for a full list of + formats that are accepted. + brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of + 0 will correspond to no change, a factor in [-1.0, 0) range will result in + a darker color and a factor in (0, 1.0] range will result in a lighter color. + + Returns: + modified_color (tuple[double]): a tuple containing the RGB values of the + modified color. Each value in the tuple is in the [0.0, 1.0] range. + """ + assert brightness_factor >= -1.0 and brightness_factor <= 1.0 + color = mplc.to_rgb(color) + polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color)) + modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1]) + modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness + modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness + modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2]) + return modified_color + + def _convert_boxes(self, boxes): + """ + Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension. + """ + if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes): + return boxes.tensor.numpy() + else: + return np.asarray(boxes) + + def _convert_masks(self, masks_or_polygons): + """ + Convert different format of masks or polygons to a tuple of masks and polygons. + + Returns: + list[GenericMask]: + """ + + m = masks_or_polygons + if isinstance(m, PolygonMasks): + m = m.polygons + if isinstance(m, BitMasks): + m = m.tensor.numpy() + if isinstance(m, torch.Tensor): + m = m.numpy() + ret = [] + for x in m: + if isinstance(x, GenericMask): + ret.append(x) + else: + ret.append(GenericMask(x, self.output.height, self.output.width)) + return ret + + def _convert_keypoints(self, keypoints): + if isinstance(keypoints, Keypoints): + keypoints = keypoints.tensor + keypoints = np.asarray(keypoints) + return keypoints + + def get_output(self): + """ + Returns: + output (VisImage): the image output containing the visualizations added + to the image. + """ + return self.output diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/requirements.txt b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..1d547c4650ac742f7f8a18cf0ae7e0ca90032224 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/requirements.txt @@ -0,0 +1,7 @@ +torch==1.5.0 +apex +torchvision==0.2.2.post2 +fvcore +pycocotools +cloudpickle +tensorboard \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/setup.cfg b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..b09bba99ca88d5cc900d1cc7fb0947d0443522be --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/setup.cfg @@ -0,0 +1,26 @@ +[isort] +line_length=100 +multi_line_output=3 +include_trailing_comma=True +known_standard_library=numpy,setuptools,mock +skip=./datasets,docs +skip_glob=*/__init__.py +known_myself=detectron2 +known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil,pkg_resources,caffe2,onnx +no_lines_before=STDLIB,THIRDPARTY +sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER +default_section=FIRSTPARTY + +[mypy] +python_version=3.6 +ignore_missing_imports = True +warn_unused_configs = True +disallow_untyped_defs = True +check_untyped_defs = True +warn_unused_ignores = True +warn_redundant_casts = True +show_column_numbers = True +follow_imports = silent +allow_redefinition = True +; Require all functions to be annotated +disallow_incomplete_defs = True diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/setup.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..89a7560322541dc20ed9b5dd3d35e5ef8400d042 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/setup.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import os +import shutil +from os import path +from setuptools import find_packages, setup +from typing import List +import torch +from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension +from torch.utils.hipify import hipify_python + +torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] +assert torch_ver >= [1, 4], "Requires PyTorch >= 1.4" + + +def get_version(): + init_py_path = path.join(path.abspath(path.dirname(__file__)), "detectron2", "__init__.py") + init_py = open(init_py_path, "r").readlines() + version_line = [l.strip() for l in init_py if l.startswith("__version__")][0] + version = version_line.split("=")[-1].strip().strip("'\"") + + # The following is used to build release packages. + # Users should never use it. + suffix = os.getenv("D2_VERSION_SUFFIX", "") + version = version + suffix + if os.getenv("BUILD_NIGHTLY", "0") == "1": + from datetime import datetime + + date_str = datetime.today().strftime("%y%m%d") + version = version + ".dev" + date_str + + new_init_py = [l for l in init_py if not l.startswith("__version__")] + new_init_py.append('__version__ = "{}"\n'.format(version)) + with open(init_py_path, "w") as f: + f.write("".join(new_init_py)) + return version + + +def get_extensions(): + this_dir = path.dirname(path.abspath(__file__)) + extensions_dir = path.join(this_dir, "detectron2", "layers", "csrc") + + main_source = path.join(extensions_dir, "vision.cpp") + sources = glob.glob(path.join(extensions_dir, "**", "*.cpp")) + + is_rocm_pytorch = False + if torch_ver >= [1, 5]: + from torch.utils.cpp_extension import ROCM_HOME + + is_rocm_pytorch = ( + True if ((torch.version.hip is not None) and (ROCM_HOME is not None)) else False + ) + + if is_rocm_pytorch: + hipify_python.hipify( + project_directory=this_dir, + output_directory=this_dir, + includes="/detectron2/layers/csrc/*", + show_detailed=True, + is_pytorch_extension=True, + ) + + # Current version of hipify function in pytorch creates an intermediate directory + # named "hip" at the same level of the path hierarchy if a "cuda" directory exists, + # or modifying the hierarchy, if it doesn't. Once pytorch supports + # "same directory" hipification (PR pendeing), the source_cuda will be set + # similarly in both cuda and hip paths, and the explicit header file copy + # (below) will not be needed. + source_cuda = glob.glob(path.join(extensions_dir, "**", "hip", "*.hip")) + glob.glob( + path.join(extensions_dir, "hip", "*.hip") + ) + + shutil.copy( + "detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h", + "detectron2/layers/csrc/box_iou_rotated/hip/box_iou_rotated_utils.h", + ) + shutil.copy( + "detectron2/layers/csrc/deformable/deform_conv.h", + "detectron2/layers/csrc/deformable/hip/deform_conv.h", + ) + + else: + source_cuda = glob.glob(path.join(extensions_dir, "**", "*.cu")) + glob.glob( + path.join(extensions_dir, "*.cu") + ) + + sources = [main_source] + sources + + extension = CppExtension + + extra_compile_args = {"cxx": []} + define_macros = [] + + if (torch.cuda.is_available() and ((CUDA_HOME is not None) or is_rocm_pytorch)) or os.getenv( + "FORCE_CUDA", "0" + ) == "1": + extension = CUDAExtension + sources += source_cuda + + if not is_rocm_pytorch: + define_macros += [("WITH_CUDA", None)] + extra_compile_args["nvcc"] = [ + "-O3", + "-DCUDA_HAS_FP16=1", + "-D__CUDA_NO_HALF_OPERATORS__", + "-D__CUDA_NO_HALF_CONVERSIONS__", + "-D__CUDA_NO_HALF2_OPERATORS__", + ] + else: + define_macros += [("WITH_HIP", None)] + extra_compile_args["nvcc"] = [] + + # It's better if pytorch can do this by default .. + CC = os.environ.get("CC", None) + if CC is not None: + extra_compile_args["nvcc"].append("-ccbin={}".format(CC)) + + include_dirs = [extensions_dir] + + ext_modules = [ + extension( + "detectron2._C", + sources, + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ) + ] + + return ext_modules + + +def get_model_zoo_configs() -> List[str]: + """ + Return a list of configs to include in package for model zoo. Copy over these configs inside + detectron2/model_zoo. + """ + + # Use absolute paths while symlinking. + source_configs_dir = path.join(path.dirname(path.realpath(__file__)), "configs") + destination = path.join( + path.dirname(path.realpath(__file__)), "detectron2", "model_zoo", "configs" + ) + # Symlink the config directory inside package to have a cleaner pip install. + + # Remove stale symlink/directory from a previous build. + if path.exists(source_configs_dir): + if path.islink(destination): + os.unlink(destination) + elif path.isdir(destination): + shutil.rmtree(destination) + + if not path.exists(destination): + try: + os.symlink(source_configs_dir, destination) + except OSError: + # Fall back to copying if symlink fails: ex. on Windows. + shutil.copytree(source_configs_dir, destination) + + config_paths = glob.glob("configs/**/*.yaml", recursive=True) + return config_paths + + +setup( + name="detectron2", + version=get_version(), + author="FAIR", + url="https://github.com/facebookresearch/detectron2", + description="Detectron2 is FAIR's next-generation research " + "platform for object detection and segmentation.", + packages=find_packages(exclude=("configs", "tests*")), + package_data={"detectron2.model_zoo": get_model_zoo_configs()}, + python_requires=">=3.6", + install_requires=[ + "termcolor>=1.1", + "Pillow>=7.0", # or use pillow-simd for better performance + "yacs>=0.1.6", + "tabulate", + "cloudpickle", + "matplotlib", + "mock", + "tqdm>4.29.0", + "tensorboard", + "fvcore>=0.1.1", + "pycocotools>=2.0.1", + "future", # used by caffe2 + "pydot", # used to save caffe2 SVGs + ], + extras_require={ + "all": ["shapely", "psutil"], + "dev": [ + "flake8==3.8.1", + "isort==4.3.21", + "black @ git+https://github.com/psf/black@673327449f86fce558adde153bb6cbe54bfebad2", + "flake8-bugbear", + "flake8-comprehensions", + ], + }, + ext_modules=get_extensions(), + cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, +) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/README.md b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f560384045ab4f6bc2beabef1170308fca117eb3 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/README.md @@ -0,0 +1,9 @@ +## Unit Tests + +To run the unittests, do: +``` +cd detectron2 +python -m unittest discover -v -s ./tests +``` + +There are also end-to-end inference & training tests, in [dev/run_*_tests.sh](../dev). diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..12a395bf51b720fb3ea42528addd2a312b27e44d --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2c3af8b0462a3096662f6df47471acde3ff88583 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_coco.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..56c12d47ee490ab20b02dfe63eae35f687896c6b --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_coco.py @@ -0,0 +1,117 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import numpy as np +import os +import tempfile +import unittest +import pycocotools.mask as mask_util + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.data.datasets.coco import convert_to_coco_dict, load_coco_json +from detectron2.structures import BoxMode + + +def make_mask(): + """ + Makes a donut shaped binary mask. + """ + H = 100 + W = 100 + mask = np.zeros([H, W], dtype=np.uint8) + for x in range(W): + for y in range(H): + d = np.linalg.norm(np.array([W, H]) / 2 - np.array([x, y])) + if d > 10 and d < 20: + mask[y, x] = 1 + return mask + + +def uncompressed_rle(mask): + l = mask.flatten(order="F").tolist() + counts = [] + p = False + cnt = 0 + for i in l: + if i == p: + cnt += 1 + else: + counts.append(cnt) + p = i + cnt = 1 + counts.append(cnt) + return {"counts": counts, "size": [mask.shape[0], mask.shape[1]]} + + +def make_dataset_dicts(mask, compressed: bool = True): + """ + Returns a list of dicts that represents a single COCO data point for + object detection. The single instance given by `mask` is represented by + RLE, either compressed or uncompressed. + """ + record = {} + record["file_name"] = "test" + record["image_id"] = 0 + record["height"] = mask.shape[0] + record["width"] = mask.shape[1] + + y, x = np.nonzero(mask) + if compressed: + segmentation = mask_util.encode(np.asarray(mask, order="F")) + else: + segmentation = uncompressed_rle(mask) + min_x = np.min(x) + max_x = np.max(x) + min_y = np.min(y) + max_y = np.max(y) + obj = { + "bbox": [min_x, min_y, max_x, max_y], + "bbox_mode": BoxMode.XYXY_ABS, + "category_id": 0, + "iscrowd": 0, + "segmentation": segmentation, + } + record["annotations"] = [obj] + return [record] + + +class TestRLEToJson(unittest.TestCase): + def test(self): + # Make a dummy dataset. + mask = make_mask() + DatasetCatalog.register("test_dataset", lambda: make_dataset_dicts(mask)) + MetadataCatalog.get("test_dataset").set(thing_classes=["test_label"]) + + # Dump to json. + json_dict = convert_to_coco_dict("test_dataset") + with tempfile.TemporaryDirectory() as tmpdir: + json_file_name = os.path.join(tmpdir, "test.json") + with open(json_file_name, "w") as f: + json.dump(json_dict, f) + # Load from json. + dicts = load_coco_json(json_file_name, "") + + # Check the loaded mask matches the original. + anno = dicts[0]["annotations"][0] + loaded_mask = mask_util.decode(anno["segmentation"]) + self.assertTrue(np.array_equal(loaded_mask, mask)) + + def test_uncompressed_RLE(self): + mask = make_mask() + rle = mask_util.encode(np.asarray(mask, order="F")) + uncompressed = uncompressed_rle(mask) + compressed = mask_util.frPyObjects(uncompressed, *rle["size"]) + self.assertEqual(rle, compressed) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_coco_evaluation.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_coco_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..93bf47ba0514e3a16e650816edf16dd11755c97d --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_coco_evaluation.py @@ -0,0 +1,134 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import contextlib +import copy +import io +import json +import numpy as np +import os +import tempfile +import unittest +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +from detectron2.evaluation.fast_eval_api import COCOeval_opt + + +class TestCOCOeval(unittest.TestCase): + def test(self): + # A small set of images/categories from COCO val + # fmt: off + detections = [{"image_id": 139, "category_id": 1, "bbox": [417.3332824707031, 159.27003479003906, 47.66064453125, 143.00193786621094], "score": 0.9949821829795837, "segmentation": {"size": [426, 640], "counts": "Tc`52W=3N0N4aNN^E7]:4XE1g:8kDMT;U100000001O1gE[Nk8h1dFiNY9Z1aFkN]9g2J3NdN`FlN`9S1cFRN07]9g1bFoM6;X9c1cFoM=8R9g1bFQN>3U9Y30O01OO1O001N2O1N1O4L4L5UNoE3V:CVF6Q:@YF9l9@ZF 0 else 0.0 + msg = "%s: comparing COCO APIs, %s differs by %f" % (name, k, abs_diff) + self.assertTrue(abs_diff < 1e-4, msg=msg) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_detection_utils.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_detection_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a669e2099acaa93248924f18463c46dd7f1f3e50 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_detection_utils.py @@ -0,0 +1,170 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import numpy as np +import os +import unittest +import pycocotools.mask as mask_util +from fvcore.common.file_io import PathManager + +from detectron2.data import MetadataCatalog, detection_utils +from detectron2.data import transforms as T +from detectron2.structures import BitMasks, BoxMode + + +class TestTransformAnnotations(unittest.TestCase): + def test_transform_simple_annotation(self): + transforms = T.TransformList([T.HFlipTransform(400)]) + anno = { + "bbox": np.asarray([10, 10, 200, 300]), + "bbox_mode": BoxMode.XYXY_ABS, + "category_id": 3, + "segmentation": [[10, 10, 100, 100, 100, 10], [150, 150, 200, 150, 200, 200]], + } + + output = detection_utils.transform_instance_annotations(anno, transforms, (400, 400)) + self.assertTrue(np.allclose(output["bbox"], [200, 10, 390, 300])) + self.assertEqual(len(output["segmentation"]), len(anno["segmentation"])) + self.assertTrue(np.allclose(output["segmentation"][0], [390, 10, 300, 100, 300, 10])) + + detection_utils.annotations_to_instances([output, output], (400, 400)) + + def test_flip_keypoints(self): + transforms = T.TransformList([T.HFlipTransform(400)]) + anno = { + "bbox": np.asarray([10, 10, 200, 300]), + "bbox_mode": BoxMode.XYXY_ABS, + "keypoints": np.random.rand(17, 3) * 50 + 15, + } + + output = detection_utils.transform_instance_annotations( + copy.deepcopy(anno), + transforms, + (400, 400), + keypoint_hflip_indices=detection_utils.create_keypoint_hflip_indices( + ["keypoints_coco_2017_train"] + ), + ) + # The first keypoint is nose + self.assertTrue(np.allclose(output["keypoints"][0, 0], 400 - anno["keypoints"][0, 0])) + # The last 16 keypoints are 8 left-right pairs + self.assertTrue( + np.allclose( + output["keypoints"][1:, 0].reshape(-1, 2)[:, ::-1], + 400 - anno["keypoints"][1:, 0].reshape(-1, 2), + ) + ) + self.assertTrue( + np.allclose( + output["keypoints"][1:, 1:].reshape(-1, 2, 2)[:, ::-1, :], + anno["keypoints"][1:, 1:].reshape(-1, 2, 2), + ) + ) + + def test_crop(self): + transforms = T.TransformList([T.CropTransform(300, 300, 10, 10)]) + keypoints = np.random.rand(17, 3) * 50 + 15 + keypoints[:, 2] = 2 + anno = { + "bbox": np.asarray([10, 10, 200, 400]), + "bbox_mode": BoxMode.XYXY_ABS, + "keypoints": keypoints, + } + + output = detection_utils.transform_instance_annotations( + copy.deepcopy(anno), transforms, (10, 10) + ) + # box is shifted and cropped + self.assertTrue((output["bbox"] == np.asarray([0, 0, 0, 10])).all()) + # keypoints are no longer visible + self.assertTrue((output["keypoints"][:, 2] == 0).all()) + + def test_transform_RLE(self): + transforms = T.TransformList([T.HFlipTransform(400)]) + mask = np.zeros((300, 400), order="F").astype("uint8") + mask[:, :200] = 1 + + anno = { + "bbox": np.asarray([10, 10, 200, 300]), + "bbox_mode": BoxMode.XYXY_ABS, + "segmentation": mask_util.encode(mask[:, :, None])[0], + "category_id": 3, + } + output = detection_utils.transform_instance_annotations( + copy.deepcopy(anno), transforms, (300, 400) + ) + mask = output["segmentation"] + self.assertTrue((mask[:, 200:] == 1).all()) + self.assertTrue((mask[:, :200] == 0).all()) + + inst = detection_utils.annotations_to_instances( + [output, output], (400, 400), mask_format="bitmask" + ) + self.assertTrue(isinstance(inst.gt_masks, BitMasks)) + + def test_transform_RLE_resize(self): + transforms = T.TransformList( + [T.HFlipTransform(400), T.ScaleTransform(300, 400, 400, 400, "bilinear")] + ) + mask = np.zeros((300, 400), order="F").astype("uint8") + mask[:, :200] = 1 + + anno = { + "bbox": np.asarray([10, 10, 200, 300]), + "bbox_mode": BoxMode.XYXY_ABS, + "segmentation": mask_util.encode(mask[:, :, None])[0], + "category_id": 3, + } + output = detection_utils.transform_instance_annotations( + copy.deepcopy(anno), transforms, (400, 400) + ) + + inst = detection_utils.annotations_to_instances( + [output, output], (400, 400), mask_format="bitmask" + ) + self.assertTrue(isinstance(inst.gt_masks, BitMasks)) + + def test_gen_crop(self): + instance = {"bbox": [10, 10, 100, 100], "bbox_mode": BoxMode.XYXY_ABS} + t = detection_utils.gen_crop_transform_with_instance((10, 10), (150, 150), instance) + # the box center must fall into the cropped region + self.assertTrue(t.x0 <= 55 <= t.x0 + t.w) + + def test_gen_crop_outside_boxes(self): + instance = {"bbox": [10, 10, 100, 100], "bbox_mode": BoxMode.XYXY_ABS} + with self.assertRaises(AssertionError): + detection_utils.gen_crop_transform_with_instance((10, 10), (15, 15), instance) + + def test_read_sem_seg(self): + cityscapes_dir = MetadataCatalog.get("cityscapes_fine_sem_seg_val").gt_dir + sem_seg_gt_path = os.path.join( + cityscapes_dir, "frankfurt", "frankfurt_000001_083852_gtFine_labelIds.png" + ) + if not PathManager.exists(sem_seg_gt_path): + raise unittest.SkipTest( + "Semantic segmentation ground truth {} not found.".format(sem_seg_gt_path) + ) + sem_seg = detection_utils.read_image(sem_seg_gt_path, "L") + self.assertEqual(sem_seg.ndim, 3) + self.assertEqual(sem_seg.shape[2], 1) + self.assertEqual(sem_seg.dtype, np.uint8) + self.assertEqual(sem_seg.max(), 32) + self.assertEqual(sem_seg.min(), 1) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_rotation_transform.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_rotation_transform.py new file mode 100644 index 0000000000000000000000000000000000000000..6e2693ec21e3b150bd3fdae8e93da36b026bfe32 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_rotation_transform.py @@ -0,0 +1,85 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import unittest + +from detectron2.data.transforms.transform import RotationTransform + + +class TestRotationTransform(unittest.TestCase): + def assertEqualsArrays(self, a1, a2): + self.assertTrue(np.allclose(a1, a2)) + + def randomData(self, h=5, w=5): + image = np.random.rand(h, w) + coords = np.array([[i, j] for j in range(h + 1) for i in range(w + 1)], dtype=float) + return image, coords, h, w + + def test180(self): + image, coords, h, w = self.randomData(6, 6) + rot = RotationTransform(h, w, 180, expand=False, center=None) + self.assertEqualsArrays(rot.apply_image(image), image[::-1, ::-1]) + rotated_coords = [[w - c[0], h - c[1]] for c in coords] + self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) + + def test45_coords(self): + _, coords, h, w = self.randomData(4, 6) + rot = RotationTransform(h, w, 45, expand=False, center=None) + rotated_coords = [ + [(x + y - (h + w) / 2) / np.sqrt(2) + w / 2, h / 2 + (y + (w - h) / 2 - x) / np.sqrt(2)] + for (x, y) in coords + ] + self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) + + def test90(self): + image, coords, h, w = self.randomData() + rot = RotationTransform(h, w, 90, expand=False, center=None) + self.assertEqualsArrays(rot.apply_image(image), image.T[::-1]) + rotated_coords = [[c[1], w - c[0]] for c in coords] + self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) + + def test90_expand(self): # non-square image + image, coords, h, w = self.randomData(h=5, w=8) + rot = RotationTransform(h, w, 90, expand=True, center=None) + self.assertEqualsArrays(rot.apply_image(image), image.T[::-1]) + rotated_coords = [[c[1], w - c[0]] for c in coords] + self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) + + def test_center_expand(self): + # center has no effect if expand=True because it only affects shifting + image, coords, h, w = self.randomData(h=5, w=8) + angle = np.random.randint(360) + rot1 = RotationTransform(h, w, angle, expand=True, center=None) + rot2 = RotationTransform(h, w, angle, expand=True, center=(0, 0)) + rot3 = RotationTransform(h, w, angle, expand=True, center=(h, w)) + rot4 = RotationTransform(h, w, angle, expand=True, center=(2, 5)) + for r1 in [rot1, rot2, rot3, rot4]: + for r2 in [rot1, rot2, rot3, rot4]: + self.assertEqualsArrays(r1.apply_image(image), r2.apply_image(image)) + self.assertEqualsArrays(r1.apply_coords(coords), r2.apply_coords(coords)) + + def test_inverse_transform(self): + image, coords, h, w = self.randomData(h=5, w=8) + rot = RotationTransform(h, w, 90, expand=True, center=None) + rot_image = rot.apply_image(image) + self.assertEqualsArrays(rot.inverse().apply_image(rot_image), image) + rot = RotationTransform(h, w, 65, expand=True, center=None) + rotated_coords = rot.apply_coords(coords) + self.assertEqualsArrays(rot.inverse().apply_coords(rotated_coords), coords) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_sampler.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..44e8a67b89178196f1563630446488330967e775 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_sampler.py @@ -0,0 +1,37 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +from torch.utils.data.sampler import SequentialSampler + +from detectron2.data.samplers import GroupedBatchSampler + + +class TestGroupedBatchSampler(unittest.TestCase): + def test_missing_group_id(self): + sampler = SequentialSampler(list(range(100))) + group_ids = [1] * 100 + samples = GroupedBatchSampler(sampler, group_ids, 2) + + for mini_batch in samples: + self.assertEqual(len(mini_batch), 2) + + def test_groups(self): + sampler = SequentialSampler(list(range(100))) + group_ids = [1, 0] * 50 + samples = GroupedBatchSampler(sampler, group_ids, 2) + + for mini_batch in samples: + self.assertEqual((mini_batch[0] + mini_batch[1]) % 2, 0) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_transforms.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..8371cfc5e15f45e54558c56904b6e4af05885b88 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/data/test_transforms.py @@ -0,0 +1,184 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import numpy as np +import unittest +from unittest import mock +from PIL import Image, ImageOps + +from detectron2.config import get_cfg +from detectron2.data import detection_utils +from detectron2.data import transforms as T +from detectron2.utils.logger import setup_logger + +logger = logging.getLogger(__name__) + + +class TestTransforms(unittest.TestCase): + def setUp(self): + setup_logger() + + def test_apply_rotated_boxes(self): + np.random.seed(125) + cfg = get_cfg() + is_train = True + augs = detection_utils.build_augmentation(cfg, is_train) + image = np.random.rand(200, 300) + image, transforms = T.apply_augmentations(augs, image) + image_shape = image.shape[:2] # h, w + assert image_shape == (800, 1200) + annotation = {"bbox": [179, 97, 62, 40, -56]} + + boxes = np.array([annotation["bbox"]], dtype=np.float64) # boxes.shape = (1, 5) + transformed_bbox = transforms.apply_rotated_box(boxes)[0] + + expected_bbox = np.array([484, 388, 248, 160, 56], dtype=np.float64) + err_msg = "transformed_bbox = {}, expected {}".format(transformed_bbox, expected_bbox) + assert np.allclose(transformed_bbox, expected_bbox), err_msg + + def test_apply_rotated_boxes_unequal_scaling_factor(self): + np.random.seed(125) + h, w = 400, 200 + newh, neww = 800, 800 + image = np.random.rand(h, w) + augs = [] + augs.append(T.Resize(shape=(newh, neww))) + image, transforms = T.apply_augmentations(augs, image) + image_shape = image.shape[:2] # h, w + assert image_shape == (newh, neww) + + boxes = np.array( + [ + [150, 100, 40, 20, 0], + [150, 100, 40, 20, 30], + [150, 100, 40, 20, 90], + [150, 100, 40, 20, -90], + ], + dtype=np.float64, + ) + transformed_boxes = transforms.apply_rotated_box(boxes) + + expected_bboxes = np.array( + [ + [600, 200, 160, 40, 0], + [600, 200, 144.22205102, 52.91502622, 49.10660535], + [600, 200, 80, 80, 90], + [600, 200, 80, 80, -90], + ], + dtype=np.float64, + ) + err_msg = "transformed_boxes = {}, expected {}".format(transformed_boxes, expected_bboxes) + assert np.allclose(transformed_boxes, expected_bboxes), err_msg + + def test_print_augmentation(self): + t = T.RandomCrop("relative", (100, 100)) + self.assertTrue(str(t) == "RandomCrop(crop_type='relative', crop_size=(100, 100))") + + t = T.RandomFlip(prob=0.5) + self.assertTrue(str(t) == "RandomFlip(prob=0.5)") + + t = T.RandomFlip() + self.assertTrue(str(t) == "RandomFlip()") + + def test_random_apply_prob_out_of_range_check(self): + test_probabilities = {0.0: True, 0.5: True, 1.0: True, -0.01: False, 1.01: False} + + for given_probability, is_valid in test_probabilities.items(): + if not is_valid: + self.assertRaises(AssertionError, T.RandomApply, None, prob=given_probability) + else: + T.RandomApply(T.NoOpTransform(), prob=given_probability) + + def test_random_apply_wrapping_aug_probability_occured_evaluation(self): + transform_mock = mock.MagicMock(name="MockTransform", spec=T.Augmentation) + image_mock = mock.MagicMock(name="MockImage") + random_apply = T.RandomApply(transform_mock, prob=0.001) + + with mock.patch.object(random_apply, "_rand_range", return_value=0.0001): + transform = random_apply.get_transform(image_mock) + transform_mock.get_transform.assert_called_once_with(image_mock) + self.assertIsNot(transform, transform_mock) + + def test_random_apply_wrapping_std_transform_probability_occured_evaluation(self): + transform_mock = mock.MagicMock(name="MockTransform", spec=T.Transform) + image_mock = mock.MagicMock(name="MockImage") + random_apply = T.RandomApply(transform_mock, prob=0.001) + + with mock.patch.object(random_apply, "_rand_range", return_value=0.0001): + transform = random_apply.get_transform(image_mock) + self.assertIs(transform, transform_mock) + + def test_random_apply_probability_not_occured_evaluation(self): + transform_mock = mock.MagicMock(name="MockTransform", spec=T.Augmentation) + image_mock = mock.MagicMock(name="MockImage") + random_apply = T.RandomApply(transform_mock, prob=0.001) + + with mock.patch.object(random_apply, "_rand_range", return_value=0.9): + transform = random_apply.get_transform(image_mock) + transform_mock.get_transform.assert_not_called() + self.assertIsInstance(transform, T.NoOpTransform) + + def test_augmentation_input_args(self): + input_shape = (100, 100) + output_shape = (50, 50) + + # define two augmentations with different args + class TG1(T.Augmentation): + input_args = ("image", "sem_seg") + + def get_transform(self, image, sem_seg): + return T.ResizeTransform( + input_shape[0], input_shape[1], output_shape[0], output_shape[1] + ) + + class TG2(T.Augmentation): + def get_transform(self, image): + assert image.shape[:2] == output_shape # check that TG1 is applied + return T.HFlipTransform(output_shape[1]) + + image = np.random.rand(*input_shape).astype("float32") + sem_seg = (np.random.rand(*input_shape) < 0.5).astype("uint8") + inputs = T.StandardAugInput(image, sem_seg=sem_seg) # provide two args + tfms = inputs.apply_augmentations([TG1(), TG2()]) + self.assertIsInstance(tfms[0], T.ResizeTransform) + self.assertIsInstance(tfms[1], T.HFlipTransform) + self.assertTrue(inputs.image.shape[:2] == output_shape) + self.assertTrue(inputs.sem_seg.shape[:2] == output_shape) + + class TG3(T.Augmentation): + input_args = ("image", "nonexist") + + def get_transform(self, image, nonexist): + pass + + with self.assertRaises(AttributeError): + inputs.apply_augmentations([TG3()]) + + def test_color_transforms(self): + rand_img = np.random.random((100, 100, 3)) * 255 + rand_img = rand_img.astype("uint8") + + # Test no-op + noop_transform = T.ColorTransform(lambda img: img) + self.assertTrue(np.array_equal(rand_img, noop_transform.apply_image(rand_img))) + + # Test a ImageOps operation + magnitude = np.random.randint(0, 256) + solarize_transform = T.PILColorTransform(lambda img: ImageOps.solarize(img, magnitude)) + expected_img = ImageOps.solarize(Image.fromarray(rand_img), magnitude) + self.assertTrue(np.array_equal(expected_img, solarize_transform.apply_image(rand_img))) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2c3af8b0462a3096662f6df47471acde3ff88583 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_mask_ops.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_mask_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..6ae0dd06e309be1a5cf0012ce1ff6666a037ca9a --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_mask_ops.py @@ -0,0 +1,203 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import io +import numpy as np +import unittest +from collections import defaultdict +import torch +import tqdm +from fvcore.common.benchmark import benchmark +from fvcore.common.file_io import PathManager +from pycocotools.coco import COCO +from tabulate import tabulate +from torch.nn import functional as F + +from detectron2.data import MetadataCatalog +from detectron2.layers.mask_ops import ( + pad_masks, + paste_mask_in_image_old, + paste_masks_in_image, + scale_boxes, +) +from detectron2.structures import BitMasks, Boxes, BoxMode, PolygonMasks +from detectron2.structures.masks import polygons_to_bitmask + + +def iou_between_full_image_bit_masks(a, b): + intersect = (a & b).sum() + union = (a | b).sum() + return intersect / union + + +def rasterize_polygons_with_grid_sample(full_image_bit_mask, box, mask_size, threshold=0.5): + x0, y0, x1, y1 = box[0], box[1], box[2], box[3] + + img_h, img_w = full_image_bit_mask.shape + + mask_y = np.arange(0.0, mask_size) + 0.5 # mask y sample coords in [0.5, mask_size - 0.5] + mask_x = np.arange(0.0, mask_size) + 0.5 # mask x sample coords in [0.5, mask_size - 0.5] + mask_y = mask_y / mask_size * (y1 - y0) + y0 + mask_x = mask_x / mask_size * (x1 - x0) + x0 + + mask_x = (mask_x - 0.5) / (img_w - 1) * 2 + -1 + mask_y = (mask_y - 0.5) / (img_h - 1) * 2 + -1 + gy, gx = torch.meshgrid(torch.from_numpy(mask_y), torch.from_numpy(mask_x)) + ind = torch.stack([gx, gy], dim=-1).to(dtype=torch.float32) + + full_image_bit_mask = torch.from_numpy(full_image_bit_mask) + mask = F.grid_sample( + full_image_bit_mask[None, None, :, :].to(dtype=torch.float32), + ind[None, :, :, :], + align_corners=True, + ) + + return mask[0, 0] >= threshold + + +class TestMaskCropPaste(unittest.TestCase): + def setUp(self): + json_file = MetadataCatalog.get("coco_2017_val_100").json_file + if not PathManager.isfile(json_file): + raise unittest.SkipTest("{} not found".format(json_file)) + with contextlib.redirect_stdout(io.StringIO()): + json_file = PathManager.get_local_path(json_file) + self.coco = COCO(json_file) + + def test_crop_paste_consistency(self): + """ + rasterize_polygons_within_box (used in training) + and + paste_masks_in_image (used in inference) + should be inverse operations to each other. + + This function runs several implementation of the above two operations and prints + the reconstruction error. + """ + + anns = self.coco.loadAnns(self.coco.getAnnIds(iscrowd=False)) # avoid crowd annotations + + selected_anns = anns[:100] + + ious = [] + for ann in tqdm.tqdm(selected_anns): + results = self.process_annotation(ann) + ious.append([k[2] for k in results]) + + ious = np.array(ious) + mean_ious = ious.mean(axis=0) + table = [] + res_dic = defaultdict(dict) + for row, iou in zip(results, mean_ious): + table.append((row[0], row[1], iou)) + res_dic[row[0]][row[1]] = iou + print(tabulate(table, headers=["rasterize", "paste", "iou"], tablefmt="simple")) + # assert that the reconstruction is good: + self.assertTrue(res_dic["polygon"]["aligned"] > 0.94) + self.assertTrue(res_dic["roialign"]["aligned"] > 0.95) + + def process_annotation(self, ann, mask_side_len=28): + # Parse annotation data + img_info = self.coco.loadImgs(ids=[ann["image_id"]])[0] + height, width = img_info["height"], img_info["width"] + gt_polygons = [np.array(p, dtype=np.float64) for p in ann["segmentation"]] + gt_bbox = BoxMode.convert(ann["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) + gt_bit_mask = polygons_to_bitmask(gt_polygons, height, width) + + # Run rasterize .. + torch_gt_bbox = torch.tensor(gt_bbox).to(dtype=torch.float32).reshape(-1, 4) + box_bitmasks = { + "polygon": PolygonMasks([gt_polygons]).crop_and_resize(torch_gt_bbox, mask_side_len)[0], + "gridsample": rasterize_polygons_with_grid_sample(gt_bit_mask, gt_bbox, mask_side_len), + "roialign": BitMasks(torch.from_numpy(gt_bit_mask[None, :, :])).crop_and_resize( + torch_gt_bbox, mask_side_len + )[0], + } + + # Run paste .. + results = defaultdict(dict) + for k, box_bitmask in box_bitmasks.items(): + padded_bitmask, scale = pad_masks(box_bitmask[None, :, :], 1) + scaled_boxes = scale_boxes(torch_gt_bbox, scale) + + r = results[k] + r["old"] = paste_mask_in_image_old( + padded_bitmask[0], scaled_boxes[0], height, width, threshold=0.5 + ) + r["aligned"] = paste_masks_in_image( + box_bitmask[None, :, :], Boxes(torch_gt_bbox), (height, width) + )[0] + + table = [] + for rasterize_method, r in results.items(): + for paste_method, mask in r.items(): + mask = np.asarray(mask) + iou = iou_between_full_image_bit_masks(gt_bit_mask.astype("uint8"), mask) + table.append((rasterize_method, paste_method, iou)) + return table + + def test_polygon_area(self): + # Draw polygon boxes + for d in [5.0, 10.0, 1000.0]: + polygon = PolygonMasks([[[0, 0, 0, d, d, d, d, 0]]]) + area = polygon.area()[0] + target = d ** 2 + self.assertEqual(area, target) + + # Draw polygon triangles + for d in [5.0, 10.0, 1000.0]: + polygon = PolygonMasks([[[0, 0, 0, d, d, d]]]) + area = polygon.area()[0] + target = d ** 2 / 2 + self.assertEqual(area, target) + + +def benchmark_paste(): + S = 800 + H, W = image_shape = (S, S) + N = 64 + torch.manual_seed(42) + masks = torch.rand(N, 28, 28) + + center = torch.rand(N, 2) * 600 + 100 + wh = torch.clamp(torch.randn(N, 2) * 40 + 200, min=50) + x0y0 = torch.clamp(center - wh * 0.5, min=0.0) + x1y1 = torch.clamp(center + wh * 0.5, max=S) + boxes = Boxes(torch.cat([x0y0, x1y1], axis=1)) + + def func(device, n=3): + m = masks.to(device=device) + b = boxes.to(device=device) + + def bench(): + for _ in range(n): + paste_masks_in_image(m, b, image_shape) + if device.type == "cuda": + torch.cuda.synchronize() + + return bench + + specs = [{"device": torch.device("cpu"), "n": 3}] + if torch.cuda.is_available(): + specs.append({"device": torch.device("cuda"), "n": 3}) + + benchmark(func, "paste_masks", specs, num_iters=10, warmup_iters=2) + + +if __name__ == "__main__": + benchmark_paste() + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_nms.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_nms.py new file mode 100644 index 0000000000000000000000000000000000000000..3931aa7c6c8afb0e74789fc6ad5ee327b049dd51 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_nms.py @@ -0,0 +1,52 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import, division, print_function, unicode_literals +import unittest +import torch + +from detectron2.layers import batched_nms +from detectron2.utils.env import TORCH_VERSION + + +class TestNMS(unittest.TestCase): + def _create_tensors(self, N): + boxes = torch.rand(N, 4) * 100 + # Note: the implementation of this function in torchvision is: + # boxes[:, 2:] += torch.rand(N, 2) * 100 + # but it does not guarantee non-negative widths/heights constraints: + # boxes[:, 2] >= boxes[:, 0] and boxes[:, 3] >= boxes[:, 1]: + boxes[:, 2:] += boxes[:, :2] + scores = torch.rand(N) + return boxes, scores + + @unittest.skipIf(TORCH_VERSION < (1, 6), "Insufficient pytorch version") + def test_nms_scriptability(self): + N = 2000 + num_classes = 50 + boxes, scores = self._create_tensors(N) + idxs = torch.randint(0, num_classes, (N,)) + scripted_batched_nms = torch.jit.script(batched_nms) + err_msg = "NMS is incompatible with jit-scripted NMS for IoU={}" + + for iou in [0.2, 0.5, 0.8]: + keep_ref = batched_nms(boxes, scores, idxs, iou) + backup = boxes.clone() + scripted_keep = scripted_batched_nms(boxes, scores, idxs, iou) + assert torch.allclose(boxes, backup), "boxes modified by jit-scripted batched_nms" + self.assertTrue(torch.equal(keep_ref, scripted_keep), err_msg.format(iou)) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_nms_rotated.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_nms_rotated.py new file mode 100644 index 0000000000000000000000000000000000000000..cff4b88fb6aaa3b6a2558224db1fe294a0bdeb5e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_nms_rotated.py @@ -0,0 +1,200 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import, division, print_function, unicode_literals +import numpy as np +import unittest +import torch +from torchvision import ops + +from detectron2.layers import batched_nms, batched_nms_rotated, nms_rotated + + +def nms_edit_distance(keep1, keep2): + """ + Compare the "keep" result of two nms call. + They are allowed to be different in terms of edit distance + due to floating point precision issues, e.g., + if a box happen to have an IoU of 0.5 with another box, + one implentation may choose to keep it while another may discard it. + """ + if torch.equal(keep1, keep2): + # they should be equal most of the time + return 0 + keep1, keep2 = tuple(keep1.cpu()), tuple(keep2.cpu()) + m, n = len(keep1), len(keep2) + + # edit distance with DP + f = [np.arange(n + 1), np.arange(n + 1)] + for i in range(m): + cur_row = i % 2 + other_row = (i + 1) % 2 + f[other_row][0] = i + 1 + for j in range(n): + f[other_row][j + 1] = ( + f[cur_row][j] + if keep1[i] == keep2[j] + else min(min(f[cur_row][j], f[cur_row][j + 1]), f[other_row][j]) + 1 + ) + return f[m % 2][n] + + +class TestNMSRotated(unittest.TestCase): + def reference_horizontal_nms(self, boxes, scores, iou_threshold): + """ + Args: + box_scores (N, 5): boxes in corner-form and probabilities. + (Note here 5 == 4 + 1, i.e., 4-dim horizontal box + 1-dim prob) + iou_threshold: intersection over union threshold. + Returns: + picked: a list of indexes of the kept boxes + """ + picked = [] + _, indexes = scores.sort(descending=True) + while len(indexes) > 0: + current = indexes[0] + picked.append(current.item()) + if len(indexes) == 1: + break + current_box = boxes[current, :] + indexes = indexes[1:] + rest_boxes = boxes[indexes, :] + iou = ops.box_iou(rest_boxes, current_box.unsqueeze(0)).squeeze(1) + indexes = indexes[iou <= iou_threshold] + + return torch.as_tensor(picked) + + def _create_tensors(self, N): + boxes = torch.rand(N, 4) * 100 + # Note: the implementation of this function in torchvision is: + # boxes[:, 2:] += torch.rand(N, 2) * 100 + # but it does not guarantee non-negative widths/heights constraints: + # boxes[:, 2] >= boxes[:, 0] and boxes[:, 3] >= boxes[:, 1]: + boxes[:, 2:] += boxes[:, :2] + scores = torch.rand(N) + return boxes, scores + + def test_batched_nms_rotated_0_degree_cpu(self): + N = 2000 + num_classes = 50 + boxes, scores = self._create_tensors(N) + idxs = torch.randint(0, num_classes, (N,)) + rotated_boxes = torch.zeros(N, 5) + rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 + rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 + rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] + rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] + err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}" + for iou in [0.2, 0.5, 0.8]: + backup = boxes.clone() + keep_ref = batched_nms(boxes, scores, idxs, iou) + assert torch.allclose(boxes, backup), "boxes modified by batched_nms" + backup = rotated_boxes.clone() + keep = batched_nms_rotated(rotated_boxes, scores, idxs, iou) + assert torch.allclose( + rotated_boxes, backup + ), "rotated_boxes modified by batched_nms_rotated" + self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou)) + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") + def test_batched_nms_rotated_0_degree_cuda(self): + N = 2000 + num_classes = 50 + boxes, scores = self._create_tensors(N) + idxs = torch.randint(0, num_classes, (N,)) + rotated_boxes = torch.zeros(N, 5) + rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 + rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 + rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] + rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] + err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}" + for iou in [0.2, 0.5, 0.8]: + backup = boxes.clone() + keep_ref = batched_nms(boxes.cuda(), scores.cuda(), idxs, iou) + self.assertTrue(torch.allclose(boxes, backup), "boxes modified by batched_nms") + backup = rotated_boxes.clone() + keep = batched_nms_rotated(rotated_boxes.cuda(), scores.cuda(), idxs, iou) + self.assertTrue( + torch.allclose(rotated_boxes, backup), + "rotated_boxes modified by batched_nms_rotated", + ) + self.assertLessEqual(nms_edit_distance(keep, keep_ref), 2, err_msg.format(iou)) + + def test_nms_rotated_0_degree_cpu(self): + N = 1000 + boxes, scores = self._create_tensors(N) + rotated_boxes = torch.zeros(N, 5) + rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 + rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 + rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] + rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] + err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}" + for iou in [0.5]: + keep_ref = self.reference_horizontal_nms(boxes, scores, iou) + keep = nms_rotated(rotated_boxes, scores, iou) + self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou)) + + def test_nms_rotated_90_degrees_cpu(self): + N = 1000 + boxes, scores = self._create_tensors(N) + rotated_boxes = torch.zeros(N, 5) + rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 + rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 + # Note for rotated_boxes[:, 2] and rotated_boxes[:, 3]: + # widths and heights are intentionally swapped here for 90 degrees case + # so that the reference horizontal nms could be used + rotated_boxes[:, 2] = boxes[:, 3] - boxes[:, 1] + rotated_boxes[:, 3] = boxes[:, 2] - boxes[:, 0] + + rotated_boxes[:, 4] = torch.ones(N) * 90 + err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}" + for iou in [0.2, 0.5, 0.8]: + keep_ref = self.reference_horizontal_nms(boxes, scores, iou) + keep = nms_rotated(rotated_boxes, scores, iou) + self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou)) + + def test_nms_rotated_180_degrees_cpu(self): + N = 1000 + boxes, scores = self._create_tensors(N) + rotated_boxes = torch.zeros(N, 5) + rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 + rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 + rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] + rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] + rotated_boxes[:, 4] = torch.ones(N) * 180 + err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}" + for iou in [0.2, 0.5, 0.8]: + keep_ref = self.reference_horizontal_nms(boxes, scores, iou) + keep = nms_rotated(rotated_boxes, scores, iou) + self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou)) + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") + def test_nms_rotated_0_degree_cuda(self): + N = 1000 + boxes, scores = self._create_tensors(N) + rotated_boxes = torch.zeros(N, 5) + rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 + rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 + rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] + rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] + err_msg = "Rotated NMS incompatible between CPU and CUDA for IoU={}" + + for iou in [0.2, 0.5, 0.8]: + r_cpu = nms_rotated(rotated_boxes, scores, iou) + r_cuda = nms_rotated(rotated_boxes.cuda(), scores.cuda(), iou) + self.assertLessEqual(nms_edit_distance(r_cpu, r_cuda.cpu()), 1, err_msg.format(iou)) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_roi_align.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_roi_align.py new file mode 100644 index 0000000000000000000000000000000000000000..f64a144822697d55d9677f62d2db5601baedeaa8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_roi_align.py @@ -0,0 +1,165 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import unittest +import cv2 +import torch +from fvcore.common.benchmark import benchmark + +from detectron2.layers.roi_align import ROIAlign + + +class ROIAlignTest(unittest.TestCase): + def test_forward_output(self): + input = np.arange(25).reshape(5, 5).astype("float32") + """ + 0 1 2 3 4 + 5 6 7 8 9 + 10 11 12 13 14 + 15 16 17 18 19 + 20 21 22 23 24 + """ + + output = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=False) + output_correct = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=True) + + # without correction: + old_results = [ + [7.5, 8, 8.5, 9], + [10, 10.5, 11, 11.5], + [12.5, 13, 13.5, 14], + [15, 15.5, 16, 16.5], + ] + + # with 0.5 correction: + correct_results = [ + [4.5, 5.0, 5.5, 6.0], + [7.0, 7.5, 8.0, 8.5], + [9.5, 10.0, 10.5, 11.0], + [12.0, 12.5, 13.0, 13.5], + ] + # This is an upsampled version of [[6, 7], [11, 12]] + + self.assertTrue(np.allclose(output.flatten(), np.asarray(old_results).flatten())) + self.assertTrue( + np.allclose(output_correct.flatten(), np.asarray(correct_results).flatten()) + ) + + # Also see similar issues in tensorflow at + # https://github.com/tensorflow/tensorflow/issues/26278 + + def test_resize(self): + H, W = 30, 30 + input = np.random.rand(H, W).astype("float32") * 100 + box = [10, 10, 20, 20] + output = self._simple_roialign(input, box, (5, 5), aligned=True) + + input2x = cv2.resize(input, (W // 2, H // 2), interpolation=cv2.INTER_LINEAR) + box2x = [x / 2 for x in box] + output2x = self._simple_roialign(input2x, box2x, (5, 5), aligned=True) + diff = np.abs(output2x - output) + self.assertTrue(diff.max() < 1e-4) + + def _simple_roialign(self, img, box, resolution, aligned=True): + """ + RoiAlign with scale 1.0 and 0 sample ratio. + """ + if isinstance(resolution, int): + resolution = (resolution, resolution) + op = ROIAlign(resolution, 1.0, 0, aligned=aligned) + input = torch.from_numpy(img[None, None, :, :].astype("float32")) + + rois = [0] + list(box) + rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32")) + output = op.forward(input, rois) + if torch.cuda.is_available(): + output_cuda = op.forward(input.cuda(), rois.cuda()).cpu() + self.assertTrue(torch.allclose(output, output_cuda)) + return output[0, 0] + + def _simple_roialign_with_grad(self, img, box, resolution, device): + if isinstance(resolution, int): + resolution = (resolution, resolution) + + op = ROIAlign(resolution, 1.0, 0, aligned=True) + input = torch.from_numpy(img[None, None, :, :].astype("float32")) + + rois = [0] + list(box) + rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32")) + input = input.to(device=device) + rois = rois.to(device=device) + input.requires_grad = True + output = op.forward(input, rois) + return input, output + + def test_empty_box(self): + img = np.random.rand(5, 5) + box = [3, 4, 5, 4] + o = self._simple_roialign(img, box, 7) + self.assertTrue(o.shape == (7, 7)) + self.assertTrue((o == 0).all()) + + for dev in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: + input, output = self._simple_roialign_with_grad(img, box, 7, torch.device(dev)) + output.sum().backward() + self.assertTrue(torch.allclose(input.grad, torch.zeros_like(input))) + + def test_empty_batch(self): + input = torch.zeros(0, 3, 10, 10, dtype=torch.float32) + rois = torch.zeros(0, 5, dtype=torch.float32) + op = ROIAlign((7, 7), 1.0, 0, aligned=True) + output = op.forward(input, rois) + self.assertTrue(output.shape == (0, 3, 7, 7)) + + +def benchmark_roi_align(): + from detectron2 import _C + + def random_boxes(mean_box, stdev, N, maxsize): + ret = torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float) + ret.clamp_(min=0, max=maxsize) + return ret + + def func(N, C, H, W, nboxes_per_img): + input = torch.rand(N, C, H, W) + boxes = [] + batch_idx = [] + for k in range(N): + b = random_boxes([80, 80, 130, 130], 24, nboxes_per_img, H) + # try smaller boxes: + # b = random_boxes([100, 100, 110, 110], 4, nboxes_per_img, H) + boxes.append(b) + batch_idx.append(torch.zeros(nboxes_per_img, 1, dtype=torch.float32) + k) + boxes = torch.cat(boxes, axis=0) + batch_idx = torch.cat(batch_idx, axis=0) + boxes = torch.cat([batch_idx, boxes], axis=1) + + input = input.cuda() + boxes = boxes.cuda() + + def bench(): + _C.roi_align_forward(input, boxes, 1.0, 7, 7, 0, True) + torch.cuda.synchronize() + + return bench + + args = [dict(N=2, C=512, H=256, W=256, nboxes_per_img=500)] + benchmark(func, "cuda_roialign", args, num_iters=20, warmup_iters=1) + + +if __name__ == "__main__": + if torch.cuda.is_available(): + benchmark_roi_align() + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_roi_align_rotated.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_roi_align_rotated.py new file mode 100644 index 0000000000000000000000000000000000000000..389e6eea32ce18aac08a040a0d9ce88271f20ca2 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/layers/test_roi_align_rotated.py @@ -0,0 +1,189 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import unittest +import cv2 +import torch +from torch.autograd import Variable, gradcheck + +from detectron2.layers.roi_align import ROIAlign +from detectron2.layers.roi_align_rotated import ROIAlignRotated + +logger = logging.getLogger(__name__) + + +class ROIAlignRotatedTest(unittest.TestCase): + def _box_to_rotated_box(self, box, angle): + return [ + (box[0] + box[2]) / 2.0, + (box[1] + box[3]) / 2.0, + box[2] - box[0], + box[3] - box[1], + angle, + ] + + def _rot90(self, img, num): + num = num % 4 # note: -1 % 4 == 3 + for _ in range(num): + img = img.transpose(0, 1).flip(0) + return img + + def test_forward_output_0_90_180_270(self): + for i in range(4): + # i = 0, 1, 2, 3 corresponding to 0, 90, 180, 270 degrees + img = torch.arange(25, dtype=torch.float32).reshape(5, 5) + """ + 0 1 2 3 4 + 5 6 7 8 9 + 10 11 12 13 14 + 15 16 17 18 19 + 20 21 22 23 24 + """ + box = [1, 1, 3, 3] + rotated_box = self._box_to_rotated_box(box=box, angle=90 * i) + + result = self._simple_roi_align_rotated(img=img, box=rotated_box, resolution=(4, 4)) + + # Here's an explanation for 0 degree case: + # point 0 in the original input lies at [0.5, 0.5] + # (the center of bin [0, 1] x [0, 1]) + # point 1 in the original input lies at [1.5, 0.5], etc. + # since the resolution is (4, 4) that divides [1, 3] x [1, 3] + # into 4 x 4 equal bins, + # the top-left bin is [1, 1.5] x [1, 1.5], and its center + # (1.25, 1.25) lies at the 3/4 position + # between point 0 and point 1, point 5 and point 6, + # point 0 and point 5, point 1 and point 6, so it can be calculated as + # 0.25*(0*0.25+1*0.75)+(5*0.25+6*0.75)*0.75 = 4.5 + result_expected = torch.tensor( + [ + [4.5, 5.0, 5.5, 6.0], + [7.0, 7.5, 8.0, 8.5], + [9.5, 10.0, 10.5, 11.0], + [12.0, 12.5, 13.0, 13.5], + ] + ) + # This is also an upsampled version of [[6, 7], [11, 12]] + + # When the box is rotated by 90 degrees CCW, + # the result would be rotated by 90 degrees CW, thus it's -i here + result_expected = self._rot90(result_expected, -i) + + assert torch.allclose(result, result_expected) + + def test_resize(self): + H, W = 30, 30 + input = torch.rand(H, W) * 100 + box = [10, 10, 20, 20] + rotated_box = self._box_to_rotated_box(box, angle=0) + output = self._simple_roi_align_rotated(img=input, box=rotated_box, resolution=(5, 5)) + + input2x = cv2.resize(input.numpy(), (W // 2, H // 2), interpolation=cv2.INTER_LINEAR) + input2x = torch.from_numpy(input2x) + box2x = [x / 2 for x in box] + rotated_box2x = self._box_to_rotated_box(box2x, angle=0) + output2x = self._simple_roi_align_rotated(img=input2x, box=rotated_box2x, resolution=(5, 5)) + assert torch.allclose(output2x, output) + + def _simple_roi_align_rotated(self, img, box, resolution): + """ + RoiAlignRotated with scale 1.0 and 0 sample ratio. + """ + op = ROIAlignRotated(output_size=resolution, spatial_scale=1.0, sampling_ratio=0) + input = img[None, None, :, :] + + rois = [0] + list(box) + rois = torch.tensor(rois, dtype=torch.float32)[None, :] + result_cpu = op.forward(input, rois) + if torch.cuda.is_available(): + result_cuda = op.forward(input.cuda(), rois.cuda()) + assert torch.allclose(result_cpu, result_cuda.cpu()) + return result_cpu[0, 0] + + def test_empty_box(self): + img = torch.rand(5, 5) + out = self._simple_roi_align_rotated(img, [2, 3, 0, 0, 0], (7, 7)) + self.assertTrue((out == 0).all()) + + def test_roi_align_rotated_gradcheck_cpu(self): + dtype = torch.float64 + device = torch.device("cpu") + roi_align_rotated_op = ROIAlignRotated( + output_size=(5, 5), spatial_scale=0.5, sampling_ratio=1 + ).to(dtype=dtype, device=device) + x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True) + # roi format is (batch index, x_center, y_center, width, height, angle) + rois = torch.tensor( + [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]], + dtype=dtype, + device=device, + ) + + def func(input): + return roi_align_rotated_op(input, rois) + + assert gradcheck(func, (x,)), "gradcheck failed for RoIAlignRotated CPU" + assert gradcheck(func, (x.transpose(2, 3),)), "gradcheck failed for RoIAlignRotated CPU" + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") + def test_roi_align_rotated_gradient_cuda(self): + """ + Compute gradients for ROIAlignRotated with multiple bounding boxes on the GPU, + and compare the result with ROIAlign + """ + # torch.manual_seed(123) + dtype = torch.float64 + device = torch.device("cuda") + pool_h, pool_w = (5, 5) + + roi_align = ROIAlign(output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to( + device=device + ) + + roi_align_rotated = ROIAlignRotated( + output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2 + ).to(device=device) + + x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True) + # x_rotated = x.clone() won't work (will lead to grad_fun=CloneBackward)! + x_rotated = Variable(x.data.clone(), requires_grad=True) + + # roi_rotated format is (batch index, x_center, y_center, width, height, angle) + rois_rotated = torch.tensor( + [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]], + dtype=dtype, + device=device, + ) + + y_rotated = roi_align_rotated(x_rotated, rois_rotated) + s_rotated = y_rotated.sum() + s_rotated.backward() + + # roi format is (batch index, x1, y1, x2, y2) + rois = torch.tensor( + [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9]], dtype=dtype, device=device + ) + + y = roi_align(x, rois) + s = y.sum() + s.backward() + + assert torch.allclose( + x.grad, x_rotated.grad + ), "gradients for ROIAlign and ROIAlignRotated mismatch on CUDA" + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2c3af8b0462a3096662f6df47471acde3ff88583 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_anchor_generator.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_anchor_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..f8234935e44baf32f60483d5525e56a2b552174c --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_anchor_generator.py @@ -0,0 +1,135 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import unittest +import torch + +from detectron2.config import get_cfg +from detectron2.layers import ShapeSpec +from detectron2.modeling.anchor_generator import DefaultAnchorGenerator, RotatedAnchorGenerator +from detectron2.utils.env import TORCH_VERSION + +logger = logging.getLogger(__name__) + + +class TestAnchorGenerator(unittest.TestCase): + def test_default_anchor_generator(self): + cfg = get_cfg() + cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] + cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]] + + anchor_generator = DefaultAnchorGenerator(cfg, [ShapeSpec(stride=4)]) + + # only the last two dimensions of features matter here + num_images = 2 + features = {"stage3": torch.rand(num_images, 96, 1, 2)} + anchors = anchor_generator([features["stage3"]]) + expected_anchor_tensor = torch.tensor( + [ + [-32.0, -8.0, 32.0, 8.0], + [-16.0, -16.0, 16.0, 16.0], + [-8.0, -32.0, 8.0, 32.0], + [-64.0, -16.0, 64.0, 16.0], + [-32.0, -32.0, 32.0, 32.0], + [-16.0, -64.0, 16.0, 64.0], + [-28.0, -8.0, 36.0, 8.0], # -28.0 == -32.0 + STRIDE (4) + [-12.0, -16.0, 20.0, 16.0], + [-4.0, -32.0, 12.0, 32.0], + [-60.0, -16.0, 68.0, 16.0], + [-28.0, -32.0, 36.0, 32.0], + [-12.0, -64.0, 20.0, 64.0], + ] + ) + + assert torch.allclose(anchors[0].tensor, expected_anchor_tensor) + + def test_default_anchor_generator_centered(self): + # test explicit args + anchor_generator = DefaultAnchorGenerator( + sizes=[32, 64], aspect_ratios=[0.25, 1, 4], strides=[4] + ) + + # only the last two dimensions of features matter here + num_images = 2 + features = {"stage3": torch.rand(num_images, 96, 1, 2)} + expected_anchor_tensor = torch.tensor( + [ + [-30.0, -6.0, 34.0, 10.0], + [-14.0, -14.0, 18.0, 18.0], + [-6.0, -30.0, 10.0, 34.0], + [-62.0, -14.0, 66.0, 18.0], + [-30.0, -30.0, 34.0, 34.0], + [-14.0, -62.0, 18.0, 66.0], + [-26.0, -6.0, 38.0, 10.0], + [-10.0, -14.0, 22.0, 18.0], + [-2.0, -30.0, 14.0, 34.0], + [-58.0, -14.0, 70.0, 18.0], + [-26.0, -30.0, 38.0, 34.0], + [-10.0, -62.0, 22.0, 66.0], + ] + ) + + anchors = anchor_generator([features["stage3"]]) + assert torch.allclose(anchors[0].tensor, expected_anchor_tensor) + + if TORCH_VERSION >= (1, 6): + anchors = torch.jit.script(anchor_generator)([features["stage3"]]) + assert torch.allclose(anchors[0].tensor, expected_anchor_tensor) + + def test_rrpn_anchor_generator(self): + cfg = get_cfg() + cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] + cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]] + cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [0, 45] # test single list[float] + anchor_generator = RotatedAnchorGenerator(cfg, [ShapeSpec(stride=4)]) + + # only the last two dimensions of features matter here + num_images = 2 + features = {"stage3": torch.rand(num_images, 96, 1, 2)} + anchors = anchor_generator([features["stage3"]]) + expected_anchor_tensor = torch.tensor( + [ + [0.0, 0.0, 64.0, 16.0, 0.0], + [0.0, 0.0, 64.0, 16.0, 45.0], + [0.0, 0.0, 32.0, 32.0, 0.0], + [0.0, 0.0, 32.0, 32.0, 45.0], + [0.0, 0.0, 16.0, 64.0, 0.0], + [0.0, 0.0, 16.0, 64.0, 45.0], + [0.0, 0.0, 128.0, 32.0, 0.0], + [0.0, 0.0, 128.0, 32.0, 45.0], + [0.0, 0.0, 64.0, 64.0, 0.0], + [0.0, 0.0, 64.0, 64.0, 45.0], + [0.0, 0.0, 32.0, 128.0, 0.0], + [0.0, 0.0, 32.0, 128.0, 45.0], + [4.0, 0.0, 64.0, 16.0, 0.0], # 4.0 == 0.0 + STRIDE (4) + [4.0, 0.0, 64.0, 16.0, 45.0], + [4.0, 0.0, 32.0, 32.0, 0.0], + [4.0, 0.0, 32.0, 32.0, 45.0], + [4.0, 0.0, 16.0, 64.0, 0.0], + [4.0, 0.0, 16.0, 64.0, 45.0], + [4.0, 0.0, 128.0, 32.0, 0.0], + [4.0, 0.0, 128.0, 32.0, 45.0], + [4.0, 0.0, 64.0, 64.0, 0.0], + [4.0, 0.0, 64.0, 64.0, 45.0], + [4.0, 0.0, 32.0, 128.0, 0.0], + [4.0, 0.0, 32.0, 128.0, 45.0], + ] + ) + + assert torch.allclose(anchors[0].tensor, expected_anchor_tensor) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_box2box_transform.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_box2box_transform.py new file mode 100644 index 0000000000000000000000000000000000000000..228d38699cf2746edf6d2cc9a89f41a0d4704419 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_box2box_transform.py @@ -0,0 +1,77 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import unittest +import torch + +from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated + +logger = logging.getLogger(__name__) + + +def random_boxes(mean_box, stdev, N): + return torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float) + + +class TestBox2BoxTransform(unittest.TestCase): + def test_reconstruction(self): + weights = (5, 5, 10, 10) + b2b_tfm = Box2BoxTransform(weights=weights) + src_boxes = random_boxes([10, 10, 20, 20], 1, 10) + dst_boxes = random_boxes([10, 10, 20, 20], 1, 10) + + devices = [torch.device("cpu")] + if torch.cuda.is_available(): + devices.append(torch.device("cuda")) + for device in devices: + src_boxes = src_boxes.to(device=device) + dst_boxes = dst_boxes.to(device=device) + deltas = b2b_tfm.get_deltas(src_boxes, dst_boxes) + dst_boxes_reconstructed = b2b_tfm.apply_deltas(deltas, src_boxes) + assert torch.allclose(dst_boxes, dst_boxes_reconstructed) + + +def random_rotated_boxes(mean_box, std_length, std_angle, N): + return torch.cat( + [torch.rand(N, 4) * std_length, torch.rand(N, 1) * std_angle], dim=1 + ) + torch.tensor(mean_box, dtype=torch.float) + + +class TestBox2BoxTransformRotated(unittest.TestCase): + def test_reconstruction(self): + weights = (5, 5, 10, 10, 1) + b2b_transform = Box2BoxTransformRotated(weights=weights) + src_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10) + dst_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10) + + devices = [torch.device("cpu")] + if torch.cuda.is_available(): + devices.append(torch.device("cuda")) + for device in devices: + src_boxes = src_boxes.to(device=device) + dst_boxes = dst_boxes.to(device=device) + deltas = b2b_transform.get_deltas(src_boxes, dst_boxes) + dst_boxes_reconstructed = b2b_transform.apply_deltas(deltas, src_boxes) + assert torch.allclose(dst_boxes[:, :4], dst_boxes_reconstructed[:, :4], atol=1e-5) + # angle difference has to be normalized + assert torch.allclose( + (dst_boxes[:, 4] - dst_boxes_reconstructed[:, 4] + 180.0) % 360.0 - 180.0, + torch.zeros_like(dst_boxes[:, 4]), + atol=1e-4, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_fast_rcnn.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_fast_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..b3ff8d56f176fe94c8af0b7a65a89c5a1cf6cd7c --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_fast_rcnn.py @@ -0,0 +1,119 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import unittest +import torch + +from detectron2.layers import ShapeSpec +from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated +from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers +from detectron2.modeling.roi_heads.rotated_fast_rcnn import RotatedFastRCNNOutputLayers +from detectron2.structures import Boxes, Instances, RotatedBoxes +from detectron2.utils.events import EventStorage + +logger = logging.getLogger(__name__) + + +class FastRCNNTest(unittest.TestCase): + def test_fast_rcnn(self): + torch.manual_seed(132) + + box_head_output_size = 8 + + box_predictor = FastRCNNOutputLayers( + ShapeSpec(channels=box_head_output_size), + box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)), + num_classes=5, + ) + feature_pooled = torch.rand(2, box_head_output_size) + predictions = box_predictor(feature_pooled) + + proposal_boxes = torch.tensor([[0.8, 1.1, 3.2, 2.8], [2.3, 2.5, 7, 8]], dtype=torch.float32) + gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) + proposal = Instances((10, 10)) + proposal.proposal_boxes = Boxes(proposal_boxes) + proposal.gt_boxes = Boxes(gt_boxes) + proposal.gt_classes = torch.tensor([1, 2]) + + with EventStorage(): # capture events in a new storage to discard them + losses = box_predictor.losses(predictions, [proposal]) + + expected_losses = { + "loss_cls": torch.tensor(1.7951188087), + "loss_box_reg": torch.tensor(4.0357131958), + } + for name in expected_losses.keys(): + assert torch.allclose(losses[name], expected_losses[name]) + + def test_fast_rcnn_empty_batch(self, device="cpu"): + box_predictor = FastRCNNOutputLayers( + ShapeSpec(channels=10), + box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)), + num_classes=8, + ).to(device=device) + + logits = torch.randn(0, 100, requires_grad=True, device=device) + deltas = torch.randn(0, 4, requires_grad=True, device=device) + losses = box_predictor.losses([logits, deltas], []) + for value in losses.values(): + self.assertTrue(torch.allclose(value, torch.zeros_like(value))) + sum(losses.values()).backward() + self.assertTrue(logits.grad is not None) + self.assertTrue(deltas.grad is not None) + + predictions, _ = box_predictor.inference([logits, deltas], []) + self.assertEqual(len(predictions), 0) + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") + def test_fast_rcnn_empty_batch_cuda(self): + self.test_fast_rcnn_empty_batch(device=torch.device("cuda")) + + def test_fast_rcnn_rotated(self): + torch.manual_seed(132) + box_head_output_size = 8 + + box_predictor = RotatedFastRCNNOutputLayers( + ShapeSpec(channels=box_head_output_size), + box2box_transform=Box2BoxTransformRotated(weights=(10, 10, 5, 5, 1)), + num_classes=5, + ) + feature_pooled = torch.rand(2, box_head_output_size) + predictions = box_predictor(feature_pooled) + proposal_boxes = torch.tensor( + [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]], dtype=torch.float32 + ) + gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) + proposal = Instances((10, 10)) + proposal.proposal_boxes = RotatedBoxes(proposal_boxes) + proposal.gt_boxes = RotatedBoxes(gt_boxes) + proposal.gt_classes = torch.tensor([1, 2]) + + with EventStorage(): # capture events in a new storage to discard them + losses = box_predictor.losses(predictions, [proposal]) + + # Note: the expected losses are slightly different even if + # the boxes are essentially the same as in the FastRCNNOutput test, because + # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization + # between the two cases. + expected_losses = { + "loss_cls": torch.tensor(1.7920907736), + "loss_box_reg": torch.tensor(4.0410838127), + } + for name in expected_losses.keys(): + assert torch.allclose(losses[name], expected_losses[name]) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_matcher.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_matcher.py new file mode 100644 index 0000000000000000000000000000000000000000..7a4bb8f9ce5a66a47845c52611200d48fb08c5a8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_matcher.py @@ -0,0 +1,58 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +from typing import List +import torch + +from detectron2.config import get_cfg +from detectron2.modeling.matcher import Matcher +from detectron2.utils.env import TORCH_VERSION + + +class TestMatcher(unittest.TestCase): + # need https://github.com/pytorch/pytorch/pull/38378 + @unittest.skipIf(TORCH_VERSION < (1, 6), "Insufficient pytorch version") + def test_scriptability(self): + cfg = get_cfg() + anchor_matcher = Matcher( + cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True + ) + match_quality_matrix = torch.tensor( + [[0.15, 0.45, 0.2, 0.6], [0.3, 0.65, 0.05, 0.1], [0.05, 0.4, 0.25, 0.4]] + ) + expected_matches = torch.tensor([1, 1, 2, 0]) + expected_match_labels = torch.tensor([-1, 1, 0, 1], dtype=torch.int8) + + matches, match_labels = anchor_matcher(match_quality_matrix) + self.assertTrue(torch.allclose(matches, expected_matches)) + self.assertTrue(torch.allclose(match_labels, expected_match_labels)) + + # nonzero_tuple must be import explicitly to let jit know what it is. + # https://github.com/pytorch/pytorch/issues/38964 + from detectron2.layers import nonzero_tuple # noqa F401 + + def f(thresholds: List[float], labels: List[int]): + return Matcher(thresholds, labels, allow_low_quality_matches=True) + + scripted_anchor_matcher = torch.jit.script(f)( + cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS + ) + matches, match_labels = scripted_anchor_matcher(match_quality_matrix) + self.assertTrue(torch.allclose(matches, expected_matches)) + self.assertTrue(torch.allclose(match_labels, expected_match_labels)) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_model_e2e.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_model_e2e.py new file mode 100644 index 0000000000000000000000000000000000000000..3d16e336c2449e0221420945a15653ee793fe22b --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_model_e2e.py @@ -0,0 +1,170 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import numpy as np +import unittest +import torch + +import detectron2.model_zoo as model_zoo +from detectron2.config import get_cfg +from detectron2.modeling import build_model +from detectron2.structures import BitMasks, Boxes, ImageList, Instances +from detectron2.utils.events import EventStorage + + +def get_model_zoo(config_path): + """ + Like model_zoo.get, but do not load any weights (even pretrained) + """ + cfg_file = model_zoo.get_config_file(config_path) + cfg = get_cfg() + cfg.merge_from_file(cfg_file) + if not torch.cuda.is_available(): + cfg.MODEL.DEVICE = "cpu" + return build_model(cfg) + + +def create_model_input(img, inst=None): + if inst is not None: + return {"image": img, "instances": inst} + else: + return {"image": img} + + +def get_empty_instance(h, w): + inst = Instances((h, w)) + inst.gt_boxes = Boxes(torch.rand(0, 4)) + inst.gt_classes = torch.tensor([]).to(dtype=torch.int64) + inst.gt_masks = BitMasks(torch.rand(0, h, w)) + return inst + + +def get_regular_bitmask_instances(h, w): + inst = Instances((h, w)) + inst.gt_boxes = Boxes(torch.rand(3, 4)) + inst.gt_boxes.tensor[:, 2:] += inst.gt_boxes.tensor[:, :2] + inst.gt_classes = torch.tensor([3, 4, 5]).to(dtype=torch.int64) + inst.gt_masks = BitMasks((torch.rand(3, h, w) > 0.5)) + return inst + + +class ModelE2ETest: + def setUp(self): + torch.manual_seed(43) + self.model = get_model_zoo(self.CONFIG_PATH) + + def _test_eval(self, input_sizes): + inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes] + self.model.eval() + self.model(inputs) + + def _test_train(self, input_sizes, instances): + assert len(input_sizes) == len(instances) + inputs = [ + create_model_input(torch.rand(3, s[0], s[1]), inst) + for s, inst in zip(input_sizes, instances) + ] + self.model.train() + with EventStorage(): + losses = self.model(inputs) + sum(losses.values()).backward() + del losses + + def _inf_tensor(self, *shape): + return 1.0 / torch.zeros(*shape, device=self.model.device) + + def _nan_tensor(self, *shape): + return torch.zeros(*shape, device=self.model.device).fill_(float("nan")) + + def test_empty_data(self): + instances = [get_empty_instance(200, 250), get_empty_instance(200, 249)] + self._test_eval([(200, 250), (200, 249)]) + self._test_train([(200, 250), (200, 249)], instances) + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") + def test_eval_tocpu(self): + model = get_model_zoo(self.CONFIG_PATH).cpu() + model.eval() + input_sizes = [(200, 250), (200, 249)] + inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes] + model(inputs) + + +class MaskRCNNE2ETest(ModelE2ETest, unittest.TestCase): + CONFIG_PATH = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" + + def test_half_empty_data(self): + instances = [get_empty_instance(200, 250), get_regular_bitmask_instances(200, 249)] + self._test_train([(200, 250), (200, 249)], instances) + + # This test is flaky because in some environment the output features are zero due to relu + # def test_rpn_inf_nan_data(self): + # self.model.eval() + # for tensor in [self._inf_tensor, self._nan_tensor]: + # images = ImageList(tensor(1, 3, 512, 512), [(510, 510)]) + # features = { + # "p2": tensor(1, 256, 256, 256), + # "p3": tensor(1, 256, 128, 128), + # "p4": tensor(1, 256, 64, 64), + # "p5": tensor(1, 256, 32, 32), + # "p6": tensor(1, 256, 16, 16), + # } + # props, _ = self.model.proposal_generator(images, features) + # self.assertEqual(len(props[0]), 0) + + def test_roiheads_inf_nan_data(self): + self.model.eval() + for tensor in [self._inf_tensor, self._nan_tensor]: + images = ImageList(tensor(1, 3, 512, 512), [(510, 510)]) + features = { + "p2": tensor(1, 256, 256, 256), + "p3": tensor(1, 256, 128, 128), + "p4": tensor(1, 256, 64, 64), + "p5": tensor(1, 256, 32, 32), + "p6": tensor(1, 256, 16, 16), + } + props = [Instances((510, 510))] + props[0].proposal_boxes = Boxes([[10, 10, 20, 20]]).to(device=self.model.device) + props[0].objectness_logits = torch.tensor([1.0]).reshape(1, 1) + det, _ = self.model.roi_heads(images, features, props) + self.assertEqual(len(det[0]), 0) + + +class RetinaNetE2ETest(ModelE2ETest, unittest.TestCase): + CONFIG_PATH = "COCO-Detection/retinanet_R_50_FPN_1x.yaml" + + def test_inf_nan_data(self): + self.model.eval() + self.model.score_threshold = -999999999 + for tensor in [self._inf_tensor, self._nan_tensor]: + images = ImageList(tensor(1, 3, 512, 512), [(510, 510)]) + features = [ + tensor(1, 256, 128, 128), + tensor(1, 256, 64, 64), + tensor(1, 256, 32, 32), + tensor(1, 256, 16, 16), + tensor(1, 256, 8, 8), + ] + anchors = self.model.anchor_generator(features) + _, pred_anchor_deltas = self.model.head(features) + HWAs = [np.prod(x.shape[-3:]) // 4 for x in pred_anchor_deltas] + + pred_logits = [tensor(1, HWA, self.model.num_classes) for HWA in HWAs] + pred_anchor_deltas = [tensor(1, HWA, 4) for HWA in HWAs] + det = self.model.inference(anchors, pred_logits, pred_anchor_deltas, images.image_sizes) + # all predictions (if any) are infinite or nan + if len(det[0]): + self.assertTrue(torch.isfinite(det[0].pred_boxes.tensor).sum() == 0) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_roi_heads.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_roi_heads.py new file mode 100644 index 0000000000000000000000000000000000000000..48f337eaf47239eb34ddfdb3cb2ae84eba911001 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_roi_heads.py @@ -0,0 +1,149 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import unittest +import torch + +from detectron2.config import get_cfg +from detectron2.layers import ShapeSpec +from detectron2.modeling.proposal_generator.build import build_proposal_generator +from detectron2.modeling.roi_heads import StandardROIHeads, build_roi_heads +from detectron2.structures import BitMasks, Boxes, ImageList, Instances, RotatedBoxes +from detectron2.utils.events import EventStorage + +logger = logging.getLogger(__name__) + +""" +Make sure the losses of ROIHeads/RPN do not change, to avoid +breaking the forward logic by mistake. +This relies on assumption that pytorch's RNG is stable. +""" + + +class ROIHeadsTest(unittest.TestCase): + def test_roi_heads(self): + torch.manual_seed(121) + cfg = get_cfg() + cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" + cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 + cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2" + cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5) + cfg.MODEL.MASK_ON = True + num_images = 2 + images_tensor = torch.rand(num_images, 20, 30) + image_sizes = [(10, 10), (20, 30)] + images = ImageList(images_tensor, image_sizes) + num_channels = 1024 + features = {"res4": torch.rand(num_images, num_channels, 1, 2)} + feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)} + + image_shape = (15, 15) + gt_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) + gt_instance0 = Instances(image_shape) + gt_instance0.gt_boxes = Boxes(gt_boxes0) + gt_instance0.gt_classes = torch.tensor([2, 1]) + gt_instance0.gt_masks = BitMasks(torch.rand((2,) + image_shape) > 0.5) + gt_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32) + gt_instance1 = Instances(image_shape) + gt_instance1.gt_boxes = Boxes(gt_boxes1) + gt_instance1.gt_classes = torch.tensor([1, 2]) + gt_instance1.gt_masks = BitMasks(torch.rand((2,) + image_shape) > 0.5) + gt_instances = [gt_instance0, gt_instance1] + + proposal_generator = build_proposal_generator(cfg, feature_shape) + roi_heads = StandardROIHeads(cfg, feature_shape) + + with EventStorage(): # capture events in a new storage to discard them + proposals, proposal_losses = proposal_generator(images, features, gt_instances) + _, detector_losses = roi_heads(images, features, proposals, gt_instances) + + detector_losses.update(proposal_losses) + expected_losses = { + "loss_cls": 4.5253729820251465, + "loss_box_reg": 0.009785720147192478, + "loss_mask": 0.693184494972229, + "loss_rpn_cls": 0.08186662942171097, + "loss_rpn_loc": 0.1104838103055954, + } + succ = all( + torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0))) + for name in detector_losses.keys() + ) + self.assertTrue( + succ, + "Losses has changed! New losses: {}".format( + {k: v.item() for k, v in detector_losses.items()} + ), + ) + + def test_rroi_heads(self): + torch.manual_seed(121) + cfg = get_cfg() + cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" + cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" + cfg.MODEL.ROI_HEADS.NAME = "RROIHeads" + cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" + cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 + cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) + cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" + cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated" + cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) + num_images = 2 + images_tensor = torch.rand(num_images, 20, 30) + image_sizes = [(10, 10), (20, 30)] + images = ImageList(images_tensor, image_sizes) + num_channels = 1024 + features = {"res4": torch.rand(num_images, num_channels, 1, 2)} + feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)} + + image_shape = (15, 15) + gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32) + gt_instance0 = Instances(image_shape) + gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0) + gt_instance0.gt_classes = torch.tensor([2, 1]) + gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32) + gt_instance1 = Instances(image_shape) + gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1) + gt_instance1.gt_classes = torch.tensor([1, 2]) + gt_instances = [gt_instance0, gt_instance1] + + proposal_generator = build_proposal_generator(cfg, feature_shape) + roi_heads = build_roi_heads(cfg, feature_shape) + + with EventStorage(): # capture events in a new storage to discard them + proposals, proposal_losses = proposal_generator(images, features, gt_instances) + _, detector_losses = roi_heads(images, features, proposals, gt_instances) + + detector_losses.update(proposal_losses) + expected_losses = { + "loss_cls": 4.365657806396484, + "loss_box_reg": 0.0015851043863222003, + "loss_rpn_cls": 0.2427729219198227, + "loss_rpn_loc": 0.3646621108055115, + } + succ = all( + torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0))) + for name in detector_losses.keys() + ) + self.assertTrue( + succ, + "Losses has changed! New losses: {}".format( + {k: v.item() for k, v in detector_losses.items()} + ), + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_roi_pooler.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_roi_pooler.py new file mode 100644 index 0000000000000000000000000000000000000000..7824cd7df53a7fd978d26451c489e4d11bf7d40d --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_roi_pooler.py @@ -0,0 +1,142 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import unittest +import torch + +from detectron2.modeling.poolers import ROIPooler +from detectron2.structures import Boxes, RotatedBoxes +from detectron2.utils.env import TORCH_VERSION + +logger = logging.getLogger(__name__) + + +class TestROIPooler(unittest.TestCase): + def _rand_boxes(self, num_boxes, x_max, y_max): + coords = torch.rand(num_boxes, 4) + coords[:, 0] *= x_max + coords[:, 1] *= y_max + coords[:, 2] *= x_max + coords[:, 3] *= y_max + boxes = torch.zeros(num_boxes, 4) + boxes[:, 0] = torch.min(coords[:, 0], coords[:, 2]) + boxes[:, 1] = torch.min(coords[:, 1], coords[:, 3]) + boxes[:, 2] = torch.max(coords[:, 0], coords[:, 2]) + boxes[:, 3] = torch.max(coords[:, 1], coords[:, 3]) + return boxes + + def _test_roialignv2_roialignrotated_match(self, device): + pooler_resolution = 14 + canonical_level = 4 + canonical_scale_factor = 2 ** canonical_level + pooler_scales = (1.0 / canonical_scale_factor,) + sampling_ratio = 0 + + N, C, H, W = 2, 4, 10, 8 + N_rois = 10 + std = 11 + mean = 0 + feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean + + features = [feature.to(device)] + + rois = [] + rois_rotated = [] + for _ in range(N): + boxes = self._rand_boxes( + num_boxes=N_rois, x_max=W * canonical_scale_factor, y_max=H * canonical_scale_factor + ) + + rotated_boxes = torch.zeros(N_rois, 5) + rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 + rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 + rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] + rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] + rois.append(Boxes(boxes).to(device)) + rois_rotated.append(RotatedBoxes(rotated_boxes).to(device)) + + roialignv2_pooler = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type="ROIAlignV2", + ) + + roialignv2_out = roialignv2_pooler(features, rois) + + roialignrotated_pooler = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type="ROIAlignRotated", + ) + + roialignrotated_out = roialignrotated_pooler(features, rois_rotated) + + self.assertTrue(torch.allclose(roialignv2_out, roialignrotated_out, atol=1e-4)) + + def test_roialignv2_roialignrotated_match_cpu(self): + self._test_roialignv2_roialignrotated_match(device="cpu") + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") + def test_roialignv2_roialignrotated_match_cuda(self): + self._test_roialignv2_roialignrotated_match(device="cuda") + + def _test_scriptability(self, device): + pooler_resolution = 14 + canonical_level = 4 + canonical_scale_factor = 2 ** canonical_level + pooler_scales = (1.0 / canonical_scale_factor,) + sampling_ratio = 0 + + N, C, H, W = 2, 4, 10, 8 + N_rois = 10 + std = 11 + mean = 0 + feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean + + features = [feature.to(device)] + + rois = [] + for _ in range(N): + boxes = self._rand_boxes( + num_boxes=N_rois, x_max=W * canonical_scale_factor, y_max=H * canonical_scale_factor + ) + + rois.append(Boxes(boxes).to(device)) + + roialignv2_pooler = ROIPooler( + output_size=pooler_resolution, + scales=pooler_scales, + sampling_ratio=sampling_ratio, + pooler_type="ROIAlignV2", + ) + + roialignv2_out = roialignv2_pooler(features, rois) + scripted_roialignv2_out = torch.jit.script(roialignv2_pooler)(features, rois) + self.assertTrue(torch.equal(roialignv2_out, scripted_roialignv2_out)) + + @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version") + def test_scriptability_cpu(self): + self._test_scriptability(device="cpu") + + @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version") + @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") + def test_scriptability_gpu(self): + self._test_scriptability(device="cuda") + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_rpn.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_rpn.py new file mode 100644 index 0000000000000000000000000000000000000000..81dec6a5252172ca8be1681da506fea017fb1b72 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/modeling/test_rpn.py @@ -0,0 +1,269 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import unittest +import torch + +from detectron2.config import get_cfg +from detectron2.export.torchscript import export_torchscript_with_instances +from detectron2.layers import ShapeSpec +from detectron2.modeling.backbone import build_backbone +from detectron2.modeling.proposal_generator import RPN, build_proposal_generator +from detectron2.modeling.proposal_generator.proposal_utils import find_top_rpn_proposals +from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes +from detectron2.utils.env import TORCH_VERSION +from detectron2.utils.events import EventStorage + +logger = logging.getLogger(__name__) + + +class RPNTest(unittest.TestCase): + def test_rpn(self): + torch.manual_seed(121) + cfg = get_cfg() + backbone = build_backbone(cfg) + proposal_generator = RPN(cfg, backbone.output_shape()) + num_images = 2 + images_tensor = torch.rand(num_images, 20, 30) + image_sizes = [(10, 10), (20, 30)] + images = ImageList(images_tensor, image_sizes) + image_shape = (15, 15) + num_channels = 1024 + features = {"res4": torch.rand(num_images, num_channels, 1, 2)} + gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) + gt_instances = Instances(image_shape) + gt_instances.gt_boxes = Boxes(gt_boxes) + with EventStorage(): # capture events in a new storage to discard them + proposals, proposal_losses = proposal_generator( + images, features, [gt_instances[0], gt_instances[1]] + ) + + expected_losses = { + "loss_rpn_cls": torch.tensor(0.0804563984), + "loss_rpn_loc": torch.tensor(0.0990132466), + } + for name in expected_losses.keys(): + err_msg = "proposal_losses[{}] = {}, expected losses = {}".format( + name, proposal_losses[name], expected_losses[name] + ) + self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg) + + expected_proposal_boxes = [ + Boxes(torch.tensor([[0, 0, 10, 10], [7.3365392685, 0, 10, 10]])), + Boxes( + torch.tensor( + [ + [0, 0, 30, 20], + [0, 0, 16.7862777710, 13.1362524033], + [0, 0, 30, 13.3173446655], + [0, 0, 10.8602609634, 20], + [7.7165775299, 0, 27.3875980377, 20], + ] + ) + ), + ] + + expected_objectness_logits = [ + torch.tensor([0.1225359365, -0.0133192837]), + torch.tensor([0.1415634006, 0.0989848152, 0.0565387346, -0.0072308783, -0.0428492837]), + ] + + for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip( + proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits + ): + self.assertEqual(len(proposal), len(expected_proposal_box)) + self.assertEqual(proposal.image_size, im_size) + self.assertTrue( + torch.allclose(proposal.proposal_boxes.tensor, expected_proposal_box.tensor) + ) + self.assertTrue(torch.allclose(proposal.objectness_logits, expected_objectness_logit)) + + @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version") + def test_rpn_scriptability(self): + cfg = get_cfg() + proposal_generator = RPN(cfg, {"res4": ShapeSpec(channels=1024, stride=16)}).eval() + num_images = 2 + images_tensor = torch.rand(num_images, 30, 40) + image_sizes = [(32, 32), (30, 40)] + images = ImageList(images_tensor, image_sizes) + features = {"res4": torch.rand(num_images, 1024, 1, 2)} + + fields = {"proposal_boxes": "Boxes", "objectness_logits": "Tensor"} + proposal_generator_ts = export_torchscript_with_instances(proposal_generator, fields) + + proposals, _ = proposal_generator(images, features) + proposals_ts, _ = proposal_generator_ts(images, features) + + for proposal, proposal_ts in zip(proposals, proposals_ts): + self.assertEqual(proposal.image_size, proposal_ts.image_size) + self.assertTrue( + torch.equal(proposal.proposal_boxes.tensor, proposal_ts.proposal_boxes.tensor) + ) + self.assertTrue(torch.equal(proposal.objectness_logits, proposal_ts.objectness_logits)) + + def test_rrpn(self): + torch.manual_seed(121) + cfg = get_cfg() + cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" + cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" + cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] + cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1]] + cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 60]] + cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) + cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" + backbone = build_backbone(cfg) + proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) + num_images = 2 + images_tensor = torch.rand(num_images, 20, 30) + image_sizes = [(10, 10), (20, 30)] + images = ImageList(images_tensor, image_sizes) + image_shape = (15, 15) + num_channels = 1024 + features = {"res4": torch.rand(num_images, num_channels, 1, 2)} + gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) + gt_instances = Instances(image_shape) + gt_instances.gt_boxes = RotatedBoxes(gt_boxes) + with EventStorage(): # capture events in a new storage to discard them + proposals, proposal_losses = proposal_generator( + images, features, [gt_instances[0], gt_instances[1]] + ) + + expected_losses = { + "loss_rpn_cls": torch.tensor(0.043263837695121765), + "loss_rpn_loc": torch.tensor(0.14432406425476074), + } + for name in expected_losses.keys(): + err_msg = "proposal_losses[{}] = {}, expected losses = {}".format( + name, proposal_losses[name], expected_losses[name] + ) + self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg) + + expected_proposal_boxes = [ + RotatedBoxes( + torch.tensor( + [ + [0.60189795, 1.24095452, 61.98131943, 18.03621292, -4.07244873], + [15.64940453, 1.69624567, 59.59749603, 16.34339333, 2.62692475], + [-3.02982378, -2.69752932, 67.90952301, 59.62455750, 59.97010040], + [16.71863365, 1.98309708, 35.61507797, 32.81484985, 62.92267227], + [0.49432933, -7.92979717, 67.77606201, 62.93098450, -1.85656738], + [8.00880814, 1.36017394, 121.81007385, 32.74150467, 50.44297409], + [16.44299889, -4.82221127, 63.39775848, 61.22503662, 54.12270737], + [5.00000000, 5.00000000, 10.00000000, 10.00000000, -0.76943970], + [17.64130402, -0.98095351, 61.40377808, 16.28918839, 55.53118134], + [0.13016054, 4.60568953, 35.80157471, 32.30180359, 62.52872086], + [-4.26460743, 0.39604485, 124.30079651, 31.84611320, -1.58203125], + [7.52815342, -0.91636634, 62.39784622, 15.45565224, 60.79549789], + ] + ) + ), + RotatedBoxes( + torch.tensor( + [ + [0.07734215, 0.81635046, 65.33510590, 17.34688377, -1.51821899], + [-3.41833067, -3.11320257, 64.17595673, 60.55617905, 58.27033234], + [20.67383385, -6.16561556, 63.60531998, 62.52315903, 54.85546494], + [15.00000000, 10.00000000, 30.00000000, 20.00000000, -0.18218994], + [9.22646523, -6.84775209, 62.09895706, 65.46472931, -2.74307251], + [15.00000000, 4.93451595, 30.00000000, 9.86903191, -0.60272217], + [8.88342094, 2.65560246, 120.95362854, 32.45022202, 55.75970078], + [16.39088631, 2.33887148, 34.78761292, 35.61492920, 60.81977463], + [9.78298569, 10.00000000, 19.56597137, 20.00000000, -0.86660767], + [1.28576660, 5.49873352, 34.93610382, 33.22600174, 60.51599884], + [17.58912468, -1.63270092, 62.96052551, 16.45713997, 52.91245270], + [5.64749718, -1.90428460, 62.37649155, 16.19474792, 61.09543991], + [0.82255805, 2.34931135, 118.83985901, 32.83671188, 56.50753784], + [-5.33874989, 1.64404404, 125.28501892, 33.35424042, -2.80731201], + ] + ) + ), + ] + + expected_objectness_logits = [ + torch.tensor( + [ + 0.10111768, + 0.09112845, + 0.08466332, + 0.07589971, + 0.06650183, + 0.06350251, + 0.04299347, + 0.01864817, + 0.00986163, + 0.00078543, + -0.04573630, + -0.04799230, + ] + ), + torch.tensor( + [ + 0.11373727, + 0.09377633, + 0.05281663, + 0.05143715, + 0.04040275, + 0.03250912, + 0.01307789, + 0.01177734, + 0.00038105, + -0.00540255, + -0.01194804, + -0.01461012, + -0.03061717, + -0.03599222, + ] + ), + ] + + torch.set_printoptions(precision=8, sci_mode=False) + + for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip( + proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits + ): + self.assertEqual(len(proposal), len(expected_proposal_box)) + self.assertEqual(proposal.image_size, im_size) + # It seems that there's some randomness in the result across different machines: + # This test can be run on a local machine for 100 times with exactly the same result, + # However, a different machine might produce slightly different results, + # thus the atol here. + err_msg = "computed proposal boxes = {}, expected {}".format( + proposal.proposal_boxes.tensor, expected_proposal_box.tensor + ) + self.assertTrue( + torch.allclose( + proposal.proposal_boxes.tensor, expected_proposal_box.tensor, atol=1e-5 + ), + err_msg, + ) + + err_msg = "computed objectness logits = {}, expected {}".format( + proposal.objectness_logits, expected_objectness_logit + ) + self.assertTrue( + torch.allclose(proposal.objectness_logits, expected_objectness_logit, atol=1e-5), + err_msg, + ) + + def test_rpn_proposals_inf(self): + N, Hi, Wi, A = 3, 3, 3, 3 + proposals = [torch.rand(N, Hi * Wi * A, 4)] + pred_logits = [torch.rand(N, Hi * Wi * A)] + pred_logits[0][1][3:5].fill_(float("inf")) + find_top_rpn_proposals(proposals, pred_logits, [(10, 10)], 0.5, 1000, 1000, 0, False) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/__init__.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2c3af8b0462a3096662f6df47471acde3ff88583 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/test_boxes.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/test_boxes.py new file mode 100644 index 0000000000000000000000000000000000000000..cd5cf4803254e95c9724cc413c90c09be7bf5e90 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/test_boxes.py @@ -0,0 +1,206 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import math +import numpy as np +import unittest +import torch + +from detectron2.structures import Boxes, BoxMode, pairwise_iou +from detectron2.utils.env import TORCH_VERSION + + +class TestBoxMode(unittest.TestCase): + def _convert_xy_to_wh(self, x): + return BoxMode.convert(x, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) + + def _convert_xywha_to_xyxy(self, x): + return BoxMode.convert(x, BoxMode.XYWHA_ABS, BoxMode.XYXY_ABS) + + def _convert_xywh_to_xywha(self, x): + return BoxMode.convert(x, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS) + + def test_box_convert_list(self): + for tp in [list, tuple]: + box = tp([5.0, 5.0, 10.0, 10.0]) + output = self._convert_xy_to_wh(box) + self.assertIsInstance(output, tp) + self.assertIsInstance(output[0], float) + self.assertEqual(output, tp([5.0, 5.0, 5.0, 5.0])) + + with self.assertRaises(Exception): + self._convert_xy_to_wh([box]) + + def test_box_convert_array(self): + box = np.asarray([[5, 5, 10, 10], [1, 1, 2, 3]]) + output = self._convert_xy_to_wh(box) + self.assertEqual(output.dtype, box.dtype) + self.assertEqual(output.shape, box.shape) + self.assertTrue((output[0] == [5, 5, 5, 5]).all()) + self.assertTrue((output[1] == [1, 1, 1, 2]).all()) + + def test_box_convert_cpu_tensor(self): + box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]]) + output = self._convert_xy_to_wh(box) + self.assertEqual(output.dtype, box.dtype) + self.assertEqual(output.shape, box.shape) + output = output.numpy() + self.assertTrue((output[0] == [5, 5, 5, 5]).all()) + self.assertTrue((output[1] == [1, 1, 1, 2]).all()) + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") + def test_box_convert_cuda_tensor(self): + box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]]).cuda() + output = self._convert_xy_to_wh(box) + self.assertEqual(output.dtype, box.dtype) + self.assertEqual(output.shape, box.shape) + self.assertEqual(output.device, box.device) + output = output.cpu().numpy() + self.assertTrue((output[0] == [5, 5, 5, 5]).all()) + self.assertTrue((output[1] == [1, 1, 1, 2]).all()) + + def test_box_convert_xywha_to_xyxy_list(self): + for tp in [list, tuple]: + box = tp([50, 50, 30, 20, 0]) + output = self._convert_xywha_to_xyxy(box) + self.assertIsInstance(output, tp) + self.assertEqual(output, tp([35, 40, 65, 60])) + + with self.assertRaises(Exception): + self._convert_xywha_to_xyxy([box]) + + def test_box_convert_xywha_to_xyxy_array(self): + for dtype in [np.float64, np.float32]: + box = np.asarray( + [ + [50, 50, 30, 20, 0], + [50, 50, 30, 20, 90], + [1, 1, math.sqrt(2), math.sqrt(2), -45], + ], + dtype=dtype, + ) + output = self._convert_xywha_to_xyxy(box) + self.assertEqual(output.dtype, box.dtype) + expected = np.asarray([[35, 40, 65, 60], [40, 35, 60, 65], [0, 0, 2, 2]], dtype=dtype) + self.assertTrue(np.allclose(output, expected, atol=1e-6), "output={}".format(output)) + + def test_box_convert_xywha_to_xyxy_tensor(self): + for dtype in [torch.float32, torch.float64]: + box = torch.tensor( + [ + [50, 50, 30, 20, 0], + [50, 50, 30, 20, 90], + [1, 1, math.sqrt(2), math.sqrt(2), -45], + ], + dtype=dtype, + ) + output = self._convert_xywha_to_xyxy(box) + self.assertEqual(output.dtype, box.dtype) + expected = torch.tensor([[35, 40, 65, 60], [40, 35, 60, 65], [0, 0, 2, 2]], dtype=dtype) + + self.assertTrue(torch.allclose(output, expected, atol=1e-6), "output={}".format(output)) + + def test_box_convert_xywh_to_xywha_list(self): + for tp in [list, tuple]: + box = tp([50, 50, 30, 20]) + output = self._convert_xywh_to_xywha(box) + self.assertIsInstance(output, tp) + self.assertEqual(output, tp([65, 60, 30, 20, 0])) + + with self.assertRaises(Exception): + self._convert_xywh_to_xywha([box]) + + def test_box_convert_xywh_to_xywha_array(self): + for dtype in [np.float64, np.float32]: + box = np.asarray([[30, 40, 70, 60], [30, 40, 60, 70], [-1, -1, 2, 2]], dtype=dtype) + output = self._convert_xywh_to_xywha(box) + self.assertEqual(output.dtype, box.dtype) + expected = np.asarray( + [[65, 70, 70, 60, 0], [60, 75, 60, 70, 0], [0, 0, 2, 2, 0]], dtype=dtype + ) + self.assertTrue(np.allclose(output, expected, atol=1e-6), "output={}".format(output)) + + def test_box_convert_xywh_to_xywha_tensor(self): + for dtype in [torch.float32, torch.float64]: + box = torch.tensor([[30, 40, 70, 60], [30, 40, 60, 70], [-1, -1, 2, 2]], dtype=dtype) + output = self._convert_xywh_to_xywha(box) + self.assertEqual(output.dtype, box.dtype) + expected = torch.tensor( + [[65, 70, 70, 60, 0], [60, 75, 60, 70, 0], [0, 0, 2, 2, 0]], dtype=dtype + ) + + self.assertTrue(torch.allclose(output, expected, atol=1e-6), "output={}".format(output)) + + def test_json_serializable(self): + payload = {"box_mode": BoxMode.XYWH_REL} + try: + json.dumps(payload) + except Exception: + self.fail("JSON serialization failed") + + def test_json_deserializable(self): + payload = '{"box_mode": 2}' + obj = json.loads(payload) + try: + obj["box_mode"] = BoxMode(obj["box_mode"]) + except Exception: + self.fail("JSON deserialization failed") + + +class TestBoxIOU(unittest.TestCase): + def test_pairwise_iou(self): + boxes1 = torch.tensor([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]) + + boxes2 = torch.tensor( + [ + [0.0, 0.0, 1.0, 1.0], + [0.0, 0.0, 0.5, 1.0], + [0.0, 0.0, 1.0, 0.5], + [0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 1.0, 1.0], + [0.5, 0.5, 1.5, 1.5], + ] + ) + + expected_ious = torch.tensor( + [ + [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], + [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], + ] + ) + + ious = pairwise_iou(Boxes(boxes1), Boxes(boxes2)) + + self.assertTrue(torch.allclose(ious, expected_ious)) + + +class TestBoxes(unittest.TestCase): + def test_empty_cat(self): + x = Boxes.cat([]) + self.assertTrue(x.tensor.shape, (0, 4)) + + # require https://github.com/pytorch/pytorch/pull/39336 + @unittest.skipIf(TORCH_VERSION < (1, 6), "Insufficient pytorch version") + def test_scriptability(self): + def func(x): + boxes = Boxes(x) + return boxes.area() + + f = torch.jit.script(func) + f(torch.rand((3, 4))) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/test_imagelist.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/test_imagelist.py new file mode 100644 index 0000000000000000000000000000000000000000..8dfe470722954b336621670455d16ae5a832a2bd --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/test_imagelist.py @@ -0,0 +1,72 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from typing import List, Sequence, Tuple +import torch + +from detectron2.structures import ImageList +from detectron2.utils.env import TORCH_VERSION + + +class TestImageList(unittest.TestCase): + def test_imagelist_padding_shape(self): + class TensorToImageList(torch.nn.Module): + def forward(self, tensors: Sequence[torch.Tensor]): + return ImageList.from_tensors(tensors, 4).tensor + + func = torch.jit.trace( + TensorToImageList(), ([torch.ones((3, 10, 10), dtype=torch.float32)],) + ) + ret = func([torch.ones((3, 15, 20), dtype=torch.float32)]) + self.assertEqual(list(ret.shape), [1, 3, 16, 20], str(ret.shape)) + + func = torch.jit.trace( + TensorToImageList(), + ( + [ + torch.ones((3, 16, 10), dtype=torch.float32), + torch.ones((3, 13, 11), dtype=torch.float32), + ], + ), + ) + ret = func( + [ + torch.ones((3, 25, 20), dtype=torch.float32), + torch.ones((3, 10, 10), dtype=torch.float32), + ] + ) + # does not support calling with different #images + self.assertEqual(list(ret.shape), [2, 3, 28, 20], str(ret.shape)) + + @unittest.skipIf(TORCH_VERSION < (1, 6), "Insufficient pytorch version") + def test_imagelist_scriptability(self): + image_nums = 2 + image_tensor = torch.randn((image_nums, 10, 20), dtype=torch.float32) + image_shape = [(10, 20)] * image_nums + + def f(image_tensor, image_shape: List[Tuple[int, int]]): + return ImageList(image_tensor, image_shape) + + ret = f(image_tensor, image_shape) + ret_script = torch.jit.script(f)(image_tensor, image_shape) + + self.assertEqual(len(ret), len(ret_script)) + for i in range(image_nums): + self.assertTrue(torch.equal(ret[i], ret_script[i])) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/test_instances.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/test_instances.py new file mode 100644 index 0000000000000000000000000000000000000000..e681f90411a135cf14648682dfdb19ede507c031 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/test_instances.py @@ -0,0 +1,72 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +import torch + +from detectron2.export.torchscript import patch_instances +from detectron2.structures import Instances +from detectron2.utils.env import TORCH_VERSION + + +class TestInstances(unittest.TestCase): + def test_int_indexing(self): + attr1 = torch.tensor([[0.0, 0.0, 1.0], [0.0, 0.0, 0.5], [0.0, 0.0, 1.0], [0.0, 0.5, 0.5]]) + attr2 = torch.tensor([0.1, 0.2, 0.3, 0.4]) + instances = Instances((100, 100)) + instances.attr1 = attr1 + instances.attr2 = attr2 + for i in range(-len(instances), len(instances)): + inst = instances[i] + self.assertEqual((inst.attr1 == attr1[i]).all(), True) + self.assertEqual((inst.attr2 == attr2[i]).all(), True) + + self.assertRaises(IndexError, lambda: instances[len(instances)]) + self.assertRaises(IndexError, lambda: instances[-len(instances) - 1]) + + @unittest.skipIf(TORCH_VERSION < (1, 6), "Insufficient pytorch version") + def test_script_new_fields(self): + class f(torch.nn.Module): + def forward(self, x: Instances): + proposal_boxes = x.proposal_boxes # noqa F841 + objectness_logits = x.objectness_logits # noqa F841 + return x + + class g(torch.nn.Module): + def forward(self, x: Instances): + mask = x.mask # noqa F841 + return x + + class g2(torch.nn.Module): + def forward(self, x: Instances): + proposal_boxes = x.proposal_boxes # noqa F841 + return x + + fields = {"proposal_boxes": "Boxes", "objectness_logits": "Tensor"} + with patch_instances(fields): + torch.jit.script(f()) + + # can't script anymore after exiting the context + with self.assertRaises(Exception): + torch.jit.script(g2()) + + new_fields = {"mask": "Tensor"} + with patch_instances(new_fields): + torch.jit.script(g()) + with self.assertRaises(Exception): + torch.jit.script(g2()) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/test_rotated_boxes.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/test_rotated_boxes.py new file mode 100644 index 0000000000000000000000000000000000000000..6b7ba7c1503844064579dbbcf433afeae04ce021 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/structures/test_rotated_boxes.py @@ -0,0 +1,370 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import, division, print_function, unicode_literals +import logging +import math +import random +import unittest +import torch +from fvcore.common.benchmark import benchmark + +from detectron2.layers.rotated_boxes import pairwise_iou_rotated +from detectron2.structures.boxes import Boxes +from detectron2.structures.rotated_boxes import RotatedBoxes, pairwise_iou + +logger = logging.getLogger(__name__) + + +class TestRotatedBoxesLayer(unittest.TestCase): + def test_iou_0_dim_cpu(self): + boxes1 = torch.rand(0, 5, dtype=torch.float32) + boxes2 = torch.rand(10, 5, dtype=torch.float32) + expected_ious = torch.zeros(0, 10, dtype=torch.float32) + ious = pairwise_iou_rotated(boxes1, boxes2) + self.assertTrue(torch.allclose(ious, expected_ious)) + + boxes1 = torch.rand(10, 5, dtype=torch.float32) + boxes2 = torch.rand(0, 5, dtype=torch.float32) + expected_ious = torch.zeros(10, 0, dtype=torch.float32) + ious = pairwise_iou_rotated(boxes1, boxes2) + self.assertTrue(torch.allclose(ious, expected_ious)) + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") + def test_iou_0_dim_cuda(self): + boxes1 = torch.rand(0, 5, dtype=torch.float32) + boxes2 = torch.rand(10, 5, dtype=torch.float32) + expected_ious = torch.zeros(0, 10, dtype=torch.float32) + ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda()) + self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious)) + + boxes1 = torch.rand(10, 5, dtype=torch.float32) + boxes2 = torch.rand(0, 5, dtype=torch.float32) + expected_ious = torch.zeros(10, 0, dtype=torch.float32) + ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda()) + self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious)) + + def test_iou_half_overlap_cpu(self): + boxes1 = torch.tensor([[0.5, 0.5, 1.0, 1.0, 0.0]], dtype=torch.float32) + boxes2 = torch.tensor([[0.25, 0.5, 0.5, 1.0, 0.0]], dtype=torch.float32) + expected_ious = torch.tensor([[0.5]], dtype=torch.float32) + ious = pairwise_iou_rotated(boxes1, boxes2) + self.assertTrue(torch.allclose(ious, expected_ious)) + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") + def test_iou_half_overlap_cuda(self): + boxes1 = torch.tensor([[0.5, 0.5, 1.0, 1.0, 0.0]], dtype=torch.float32) + boxes2 = torch.tensor([[0.25, 0.5, 0.5, 1.0, 0.0]], dtype=torch.float32) + expected_ious = torch.tensor([[0.5]], dtype=torch.float32) + ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda()) + self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious)) + + def test_iou_precision(self): + for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: + boxes1 = torch.tensor([[565, 565, 10, 10.0, 0]], dtype=torch.float32, device=device) + boxes2 = torch.tensor([[565, 565, 10, 8.3, 0]], dtype=torch.float32, device=device) + iou = 8.3 / 10.0 + expected_ious = torch.tensor([[iou]], dtype=torch.float32) + ious = pairwise_iou_rotated(boxes1, boxes2) + self.assertTrue(torch.allclose(ious.cpu(), expected_ious)) + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") + def test_iou_too_many_boxes_cuda(self): + s1, s2 = 5, 1289035 + boxes1 = torch.zeros(s1, 5) + boxes2 = torch.zeros(s2, 5) + ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda()) + self.assertTupleEqual(tuple(ious_cuda.shape), (s1, s2)) + + def test_iou_extreme(self): + # Cause floating point issues in cuda kernels (#1266) + for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: + boxes1 = torch.tensor([[160.0, 153.0, 230.0, 23.0, -37.0]], device=device) + boxes2 = torch.tensor( + [ + [ + -1.117407639806935e17, + 1.3858420478349148e18, + 1000.0000610351562, + 1000.0000610351562, + 1612.0, + ] + ], + device=device, + ) + ious = pairwise_iou_rotated(boxes1, boxes2) + self.assertTrue(ious.min() >= 0, ious) + + +class TestRotatedBoxesStructure(unittest.TestCase): + def test_clip_area_0_degree(self): + for _ in range(50): + num_boxes = 100 + boxes_5d = torch.zeros(num_boxes, 5) + boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500) + boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500) + boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500) + boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500) + # Convert from (x_ctr, y_ctr, w, h, 0) to (x1, y1, x2, y2) + boxes_4d = torch.zeros(num_boxes, 4) + boxes_4d[:, 0] = boxes_5d[:, 0] - boxes_5d[:, 2] / 2.0 + boxes_4d[:, 1] = boxes_5d[:, 1] - boxes_5d[:, 3] / 2.0 + boxes_4d[:, 2] = boxes_5d[:, 0] + boxes_5d[:, 2] / 2.0 + boxes_4d[:, 3] = boxes_5d[:, 1] + boxes_5d[:, 3] / 2.0 + + image_size = (500, 600) + test_boxes_4d = Boxes(boxes_4d) + test_boxes_5d = RotatedBoxes(boxes_5d) + # Before clip + areas_4d = test_boxes_4d.area() + areas_5d = test_boxes_5d.area() + self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5)) + # After clip + test_boxes_4d.clip(image_size) + test_boxes_5d.clip(image_size) + areas_4d = test_boxes_4d.area() + areas_5d = test_boxes_5d.area() + self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5)) + + def test_clip_area_arbitrary_angle(self): + num_boxes = 100 + boxes_5d = torch.zeros(num_boxes, 5) + boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500) + boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500) + boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500) + boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500) + boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800) + clip_angle_threshold = random.uniform(0, 180) + + image_size = (500, 600) + test_boxes_5d = RotatedBoxes(boxes_5d) + # Before clip + areas_before = test_boxes_5d.area() + # After clip + test_boxes_5d.clip(image_size, clip_angle_threshold) + areas_diff = test_boxes_5d.area() - areas_before + + # the areas should only decrease after clipping + self.assertTrue(torch.all(areas_diff <= 0)) + # whenever the box is clipped (thus the area shrinks), + # the angle for the box must be within the clip_angle_threshold + # Note that the clip function will normalize the angle range + # to be within (-180, 180] + self.assertTrue( + torch.all(torch.abs(boxes_5d[:, 4][torch.where(areas_diff < 0)]) < clip_angle_threshold) + ) + + def test_normalize_angles(self): + # torch.manual_seed(0) + for _ in range(50): + num_boxes = 100 + boxes_5d = torch.zeros(num_boxes, 5) + boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500) + boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500) + boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500) + boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500) + boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800) + rotated_boxes = RotatedBoxes(boxes_5d) + normalized_boxes = rotated_boxes.clone() + normalized_boxes.normalize_angles() + self.assertTrue(torch.all(normalized_boxes.tensor[:, 4] >= -180)) + self.assertTrue(torch.all(normalized_boxes.tensor[:, 4] < 180)) + # x, y, w, h should not change + self.assertTrue(torch.allclose(boxes_5d[:, :4], normalized_boxes.tensor[:, :4])) + # the cos/sin values of the angles should stay the same + + self.assertTrue( + torch.allclose( + torch.cos(boxes_5d[:, 4] * math.pi / 180), + torch.cos(normalized_boxes.tensor[:, 4] * math.pi / 180), + atol=1e-5, + ) + ) + + self.assertTrue( + torch.allclose( + torch.sin(boxes_5d[:, 4] * math.pi / 180), + torch.sin(normalized_boxes.tensor[:, 4] * math.pi / 180), + atol=1e-5, + ) + ) + + def test_pairwise_iou_0_degree(self): + for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: + boxes1 = torch.tensor( + [[0.5, 0.5, 1.0, 1.0, 0.0], [0.5, 0.5, 1.0, 1.0, 0.0]], + dtype=torch.float32, + device=device, + ) + boxes2 = torch.tensor( + [ + [0.5, 0.5, 1.0, 1.0, 0.0], + [0.25, 0.5, 0.5, 1.0, 0.0], + [0.5, 0.25, 1.0, 0.5, 0.0], + [0.25, 0.25, 0.5, 0.5, 0.0], + [0.75, 0.75, 0.5, 0.5, 0.0], + [1.0, 1.0, 1.0, 1.0, 0.0], + ], + dtype=torch.float32, + device=device, + ) + expected_ious = torch.tensor( + [ + [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], + [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], + ], + dtype=torch.float32, + device=device, + ) + ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) + self.assertTrue(torch.allclose(ious, expected_ious)) + + def test_pairwise_iou_45_degrees(self): + for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: + boxes1 = torch.tensor( + [ + [1, 1, math.sqrt(2), math.sqrt(2), 45], + [1, 1, 2 * math.sqrt(2), 2 * math.sqrt(2), -45], + ], + dtype=torch.float32, + device=device, + ) + boxes2 = torch.tensor([[1, 1, 2, 2, 0]], dtype=torch.float32, device=device) + expected_ious = torch.tensor([[0.5], [0.5]], dtype=torch.float32, device=device) + ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) + self.assertTrue(torch.allclose(ious, expected_ious)) + + def test_pairwise_iou_orthogonal(self): + for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: + boxes1 = torch.tensor([[5, 5, 10, 6, 55]], dtype=torch.float32, device=device) + boxes2 = torch.tensor([[5, 5, 10, 6, -35]], dtype=torch.float32, device=device) + iou = (6.0 * 6.0) / (6.0 * 6.0 + 4.0 * 6.0 + 4.0 * 6.0) + expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device) + ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) + self.assertTrue(torch.allclose(ious, expected_ious)) + + def test_pairwise_iou_large_close_boxes(self): + for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: + boxes1 = torch.tensor( + [[299.500000, 417.370422, 600.000000, 364.259186, 27.1828]], + dtype=torch.float32, + device=device, + ) + boxes2 = torch.tensor( + [[299.500000, 417.370422, 600.000000, 364.259155, 27.1828]], + dtype=torch.float32, + device=device, + ) + iou = 364.259155 / 364.259186 + expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device) + ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) + self.assertTrue(torch.allclose(ious, expected_ious)) + + def test_pairwise_iou_many_boxes(self): + for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: + num_boxes1 = 100 + num_boxes2 = 200 + boxes1 = torch.stack( + [ + torch.tensor( + [5 + 20 * i, 5 + 20 * i, 10, 10, 0], dtype=torch.float32, device=device + ) + for i in range(num_boxes1) + ] + ) + boxes2 = torch.stack( + [ + torch.tensor( + [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0], + dtype=torch.float32, + device=device, + ) + for i in range(num_boxes2) + ] + ) + expected_ious = torch.zeros(num_boxes1, num_boxes2, dtype=torch.float32, device=device) + for i in range(min(num_boxes1, num_boxes2)): + expected_ious[i][i] = (1 + 9 * i / num_boxes2) / 10.0 + ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) + self.assertTrue(torch.allclose(ious, expected_ious)) + + def test_pairwise_iou_issue1207_simplified(self): + for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: + # Simplified test case of D2-issue-1207 + boxes1 = torch.tensor([[3, 3, 8, 2, -45.0]], device=device) + boxes2 = torch.tensor([[6, 0, 8, 2, -45.0]], device=device) + iou = 0.0 + expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device) + + ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) + self.assertTrue(torch.allclose(ious, expected_ious)) + + def test_pairwise_iou_issue1207(self): + for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: + # The original test case in D2-issue-1207 + boxes1 = torch.tensor([[160.0, 153.0, 230.0, 23.0, -37.0]], device=device) + boxes2 = torch.tensor([[190.0, 127.0, 80.0, 21.0, -46.0]], device=device) + + iou = 0.0 + expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device) + + ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) + self.assertTrue(torch.allclose(ious, expected_ious)) + + def test_empty_cat(self): + x = RotatedBoxes.cat([]) + self.assertTrue(x.tensor.shape, (0, 5)) + + +def benchmark_rotated_iou(): + num_boxes1 = 200 + num_boxes2 = 500 + boxes1 = torch.stack( + [ + torch.tensor([5 + 20 * i, 5 + 20 * i, 10, 10, 0], dtype=torch.float32) + for i in range(num_boxes1) + ] + ) + boxes2 = torch.stack( + [ + torch.tensor( + [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0], dtype=torch.float32 + ) + for i in range(num_boxes2) + ] + ) + + def func(dev, n=1): + b1 = boxes1.to(device=dev) + b2 = boxes2.to(device=dev) + + def bench(): + for _ in range(n): + pairwise_iou_rotated(b1, b2) + if dev.type == "cuda": + torch.cuda.synchronize() + + return bench + + # only run it once per timed loop, since it's slow + args = [{"dev": torch.device("cpu"), "n": 1}] + if torch.cuda.is_available(): + args.append({"dev": torch.device("cuda"), "n": 10}) + + benchmark(func, "rotated_iou", args, warmup_iters=3) + + +if __name__ == "__main__": + unittest.main() + benchmark_rotated_iou() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_checkpoint.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..71f936e67ec353b1b6583b299ca5b866e254e243 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_checkpoint.py @@ -0,0 +1,61 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +from collections import OrderedDict +import torch +from torch import nn + +from detectron2.checkpoint.c2_model_loading import align_and_update_state_dicts +from detectron2.utils.logger import setup_logger + + +class TestCheckpointer(unittest.TestCase): + def setUp(self): + setup_logger() + + def create_complex_model(self): + m = nn.Module() + m.block1 = nn.Module() + m.block1.layer1 = nn.Linear(2, 3) + m.layer2 = nn.Linear(3, 2) + m.res = nn.Module() + m.res.layer2 = nn.Linear(3, 2) + + state_dict = OrderedDict() + state_dict["layer1.weight"] = torch.rand(3, 2) + state_dict["layer1.bias"] = torch.rand(3) + state_dict["layer2.weight"] = torch.rand(2, 3) + state_dict["layer2.bias"] = torch.rand(2) + state_dict["res.layer2.weight"] = torch.rand(2, 3) + state_dict["res.layer2.bias"] = torch.rand(2) + return m, state_dict + + def test_complex_model_loaded(self): + for add_data_parallel in [False, True]: + model, state_dict = self.create_complex_model() + if add_data_parallel: + model = nn.DataParallel(model) + model_sd = model.state_dict() + + align_and_update_state_dicts(model_sd, state_dict) + for loaded, stored in zip(model_sd.values(), state_dict.values()): + # different tensor references + self.assertFalse(id(loaded) == id(stored)) + # same content + self.assertTrue(loaded.equal(stored)) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_config.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_config.py new file mode 100644 index 0000000000000000000000000000000000000000..7c758c8cc39ca92ba5a6a9f2abcfe74c3636243c --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_config.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import tempfile +import unittest +import torch + +from detectron2.config import configurable, downgrade_config, get_cfg, upgrade_config +from detectron2.layers import ShapeSpec + +_V0_CFG = """ +MODEL: + RPN_HEAD: + NAME: "TEST" +VERSION: 0 +""" + +_V1_CFG = """ +MODEL: + WEIGHT: "/path/to/weight" +""" + + +class TestConfigVersioning(unittest.TestCase): + def test_upgrade_downgrade_consistency(self): + cfg = get_cfg() + # check that custom is preserved + cfg.USER_CUSTOM = 1 + + down = downgrade_config(cfg, to_version=0) + up = upgrade_config(down) + self.assertTrue(up == cfg) + + def _merge_cfg_str(self, cfg, merge_str): + f = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) + try: + f.write(merge_str) + f.close() + cfg.merge_from_file(f.name) + finally: + os.remove(f.name) + return cfg + + def test_auto_upgrade(self): + cfg = get_cfg() + latest_ver = cfg.VERSION + cfg.USER_CUSTOM = 1 + + self._merge_cfg_str(cfg, _V0_CFG) + + self.assertEqual(cfg.MODEL.RPN.HEAD_NAME, "TEST") + self.assertEqual(cfg.VERSION, latest_ver) + + def test_guess_v1(self): + cfg = get_cfg() + latest_ver = cfg.VERSION + self._merge_cfg_str(cfg, _V1_CFG) + self.assertEqual(cfg.VERSION, latest_ver) + + +class _TestClassA(torch.nn.Module): + @configurable + def __init__(self, arg1, arg2, arg3=3): + super().__init__() + self.arg1 = arg1 + self.arg2 = arg2 + self.arg3 = arg3 + assert arg1 == 1 + assert arg2 == 2 + assert arg3 == 3 + + @classmethod + def from_config(cls, cfg): + args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2} + return args + + +class _TestClassB(_TestClassA): + @configurable + def __init__(self, input_shape, arg1, arg2, arg3=3): + """ + Doc of _TestClassB + """ + assert input_shape == "shape" + super().__init__(arg1, arg2, arg3) + + @classmethod + def from_config(cls, cfg, input_shape): # test extra positional arg in from_config + args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2} + args["input_shape"] = input_shape + return args + + +class _LegacySubClass(_TestClassB): + # an old subclass written in cfg style + def __init__(self, cfg, input_shape, arg4=4): + super().__init__(cfg, input_shape) + assert self.arg1 == 1 + assert self.arg2 == 2 + assert self.arg3 == 3 + + +class _NewSubClassNewInit(_TestClassB): + # test new subclass with a new __init__ + @configurable + def __init__(self, input_shape, arg4=4, **kwargs): + super().__init__(input_shape, **kwargs) + assert self.arg1 == 1 + assert self.arg2 == 2 + assert self.arg3 == 3 + + +class _LegacySubClassNotCfg(_TestClassB): + # an old subclass written in cfg style, but argument is not called "cfg" + def __init__(self, config, input_shape): + super().__init__(config, input_shape) + assert self.arg1 == 1 + assert self.arg2 == 2 + assert self.arg3 == 3 + + +class _TestClassC(_TestClassB): + @classmethod + def from_config(cls, cfg, input_shape, **kwargs): # test extra kwarg overwrite + args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2} + args["input_shape"] = input_shape + args.update(kwargs) + return args + + +class _TestClassD(_TestClassA): + @configurable + def __init__(self, input_shape: ShapeSpec, arg1: int, arg2, arg3=3): + assert input_shape == "shape" + super().__init__(arg1, arg2, arg3) + + # _TestClassA.from_config does not have input_shape args. + # Test whether input_shape will be forwarded to __init__ + + +class TestConfigurable(unittest.TestCase): + def testInitWithArgs(self): + _ = _TestClassA(arg1=1, arg2=2, arg3=3) + _ = _TestClassB("shape", arg1=1, arg2=2) + _ = _TestClassC("shape", arg1=1, arg2=2) + _ = _TestClassD("shape", arg1=1, arg2=2, arg3=3) + + def testPatchedAttr(self): + self.assertTrue("Doc" in _TestClassB.__init__.__doc__) + self.assertEqual(_TestClassD.__init__.__annotations__["arg1"], int) + + def testInitWithCfg(self): + cfg = get_cfg() + cfg.ARG1 = 1 + cfg.ARG2 = 2 + cfg.ARG3 = 3 + _ = _TestClassA(cfg) + _ = _TestClassB(cfg, input_shape="shape") + _ = _TestClassC(cfg, input_shape="shape") + _ = _TestClassD(cfg, input_shape="shape") + _ = _LegacySubClass(cfg, input_shape="shape") + _ = _NewSubClassNewInit(cfg, input_shape="shape") + _ = _LegacySubClassNotCfg(cfg, input_shape="shape") + with self.assertRaises(TypeError): + # disallow forwarding positional args to __init__ since it's prone to errors + _ = _TestClassD(cfg, "shape") + + # call with kwargs instead + _ = _TestClassA(cfg=cfg) + _ = _TestClassB(cfg=cfg, input_shape="shape") + _ = _TestClassC(cfg=cfg, input_shape="shape") + _ = _TestClassD(cfg=cfg, input_shape="shape") + _ = _LegacySubClass(cfg=cfg, input_shape="shape") + _ = _NewSubClassNewInit(cfg=cfg, input_shape="shape") + _ = _LegacySubClassNotCfg(config=cfg, input_shape="shape") + + def testInitWithCfgOverwrite(self): + cfg = get_cfg() + cfg.ARG1 = 1 + cfg.ARG2 = 999 # wrong config + with self.assertRaises(AssertionError): + _ = _TestClassA(cfg, arg3=3) + + # overwrite arg2 with correct config later: + _ = _TestClassA(cfg, arg2=2, arg3=3) + _ = _TestClassB(cfg, input_shape="shape", arg2=2, arg3=3) + _ = _TestClassC(cfg, input_shape="shape", arg2=2, arg3=3) + _ = _TestClassD(cfg, input_shape="shape", arg2=2, arg3=3) + + # call with kwargs cfg=cfg instead + _ = _TestClassA(cfg=cfg, arg2=2, arg3=3) + _ = _TestClassB(cfg=cfg, input_shape="shape", arg2=2, arg3=3) + _ = _TestClassC(cfg=cfg, input_shape="shape", arg2=2, arg3=3) + _ = _TestClassD(cfg=cfg, input_shape="shape", arg2=2, arg3=3) + + def testInitWithCfgWrongArgs(self): + cfg = get_cfg() + cfg.ARG1 = 1 + cfg.ARG2 = 2 + with self.assertRaises(TypeError): + _ = _TestClassB(cfg, "shape", not_exist=1) + with self.assertRaises(TypeError): + _ = _TestClassC(cfg, "shape", not_exist=1) + with self.assertRaises(TypeError): + _ = _TestClassD(cfg, "shape", not_exist=1) + + def testBadClass(self): + class _BadClass1: + @configurable + def __init__(self, a=1, b=2): + pass + + class _BadClass2: + @configurable + def __init__(self, a=1, b=2): + pass + + def from_config(self, cfg): # noqa + pass + + class _BadClass3: + @configurable + def __init__(self, a=1, b=2): + pass + + # bad name: must be cfg + @classmethod + def from_config(cls, config): # noqa + pass + + with self.assertRaises(AttributeError): + _ = _BadClass1(a=1) + + with self.assertRaises(TypeError): + _ = _BadClass2(a=1) + + with self.assertRaises(TypeError): + _ = _BadClass3(get_cfg()) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_engine.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..633b973f5fae8d58c99f2e75abdeb2a33f47a3aa --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_engine.py @@ -0,0 +1,43 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import unittest +import torch +from torch import nn + +from detectron2.engine import SimpleTrainer + + +class SimpleModel(nn.Sequential): + def forward(self, x): + return {"loss": x.sum() + sum([x.mean() for x in self.parameters()])} + + +class TestTrainer(unittest.TestCase): + def test_simple_trainer(self, device="cpu"): + device = torch.device(device) + model = SimpleModel(nn.Linear(10, 10)).to(device) + + def data_loader(): + while True: + yield torch.rand(3, 3).to(device) + + trainer = SimpleTrainer(model, data_loader(), torch.optim.SGD(model.parameters(), 0.1)) + trainer.train(0, 10) + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") + def test_simple_trainer_cuda(self): + self.test_simple_trainer(device="cuda") diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_export_caffe2.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_export_caffe2.py new file mode 100644 index 0000000000000000000000000000000000000000..868fb2d16c32fafedcabdd01e7003f99a19e097b --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_export_caffe2.py @@ -0,0 +1,83 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -*- coding: utf-8 -*- + +import copy +import numpy as np +import os +import tempfile +import unittest +import cv2 +import torch +from fvcore.common.file_io import PathManager + +from detectron2 import model_zoo +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.config import get_cfg +from detectron2.data import DatasetCatalog +from detectron2.modeling import build_model +from detectron2.utils.logger import setup_logger + + +@unittest.skipIf(os.environ.get("CIRCLECI"), "Require COCO data and model zoo.") +class TestCaffe2Export(unittest.TestCase): + def setUp(self): + setup_logger() + + def _test_model(self, config_path, device="cpu"): + # requires extra dependencies + from detectron2.export import Caffe2Model, add_export_config, export_caffe2_model + + cfg = get_cfg() + cfg.merge_from_file(model_zoo.get_config_file(config_path)) + cfg = add_export_config(cfg) + cfg.MODEL.DEVICE = device + + inputs = [{"image": self._get_test_image()}] + model = build_model(cfg) + DetectionCheckpointer(model).load(model_zoo.get_checkpoint_url(config_path)) + c2_model = export_caffe2_model(cfg, model, copy.deepcopy(inputs)) + + with tempfile.TemporaryDirectory(prefix="detectron2_unittest") as d: + c2_model.save_protobuf(d) + c2_model.save_graph(os.path.join(d, "test.svg"), inputs=copy.deepcopy(inputs)) + c2_model = Caffe2Model.load_protobuf(d) + c2_model(inputs)[0]["instances"] + + def _get_test_image(self): + try: + file_name = DatasetCatalog.get("coco_2017_train")[0]["file_name"] + assert PathManager.exists(file_name) + except Exception: + self.skipTest("COCO dataset not available.") + + with PathManager.open(file_name, "rb") as f: + buf = f.read() + img = cv2.imdecode(np.frombuffer(buf, dtype=np.uint8), cv2.IMREAD_COLOR) + assert img is not None, file_name + return torch.from_numpy(img.transpose(2, 0, 1)) + + def testMaskRCNN(self): + self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") + def testMaskRCNNGPU(self): + self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", device="cuda") + + def testRetinaNet(self): + self._test_model("COCO-Detection/retinanet_R_50_FPN_3x.yaml") + + def testPanopticFPN(self): + self._test_model("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml") diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_model_analysis.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_model_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..ff04665b138db9fa8f8563e4a316caa05474ffd6 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_model_analysis.py @@ -0,0 +1,71 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import unittest +import torch + +import detectron2.model_zoo as model_zoo +from detectron2.config import get_cfg +from detectron2.modeling import build_model +from detectron2.utils.analysis import flop_count_operators, parameter_count + + +def get_model_zoo(config_path): + """ + Like model_zoo.get, but do not load any weights (even pretrained) + """ + cfg_file = model_zoo.get_config_file(config_path) + cfg = get_cfg() + cfg.merge_from_file(cfg_file) + if not torch.cuda.is_available(): + cfg.MODEL.DEVICE = "cpu" + return build_model(cfg) + + +class RetinaNetTest(unittest.TestCase): + def setUp(self): + self.model = get_model_zoo("COCO-Detection/retinanet_R_50_FPN_1x.yaml") + + def test_flop(self): + # RetinaNet supports flop-counting with random inputs + inputs = [{"image": torch.rand(3, 800, 800)}] + res = flop_count_operators(self.model, inputs) + self.assertTrue(int(res["conv"]), 146) # 146B flops + + def test_param_count(self): + res = parameter_count(self.model) + self.assertTrue(res[""], 37915572) + self.assertTrue(res["backbone"], 31452352) + + +class FasterRCNNTest(unittest.TestCase): + def setUp(self): + self.model = get_model_zoo("COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml") + + def test_flop(self): + # Faster R-CNN supports flop-counting with random inputs + inputs = [{"image": torch.rand(3, 800, 800)}] + res = flop_count_operators(self.model, inputs) + + # This only checks flops for backbone & proposal generator + # Flops for box head is not conv, and depends on #proposals, which is + # almost 0 for random inputs. + self.assertTrue(int(res["conv"]), 117) + + def test_param_count(self): + res = parameter_count(self.model) + self.assertTrue(res[""], 41699936) + self.assertTrue(res["backbone"], 26799296) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_model_zoo.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_model_zoo.py new file mode 100644 index 0000000000000000000000000000000000000000..9472f635748d4bd4179ec1e330d6073fb892617d --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_model_zoo.py @@ -0,0 +1,42 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import unittest + +from detectron2 import model_zoo +from detectron2.modeling import FPN, GeneralizedRCNN + +logger = logging.getLogger(__name__) + + +class TestModelZoo(unittest.TestCase): + def test_get_returns_model(self): + model = model_zoo.get("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml", trained=False) + self.assertIsInstance(model, GeneralizedRCNN) + self.assertIsInstance(model.backbone, FPN) + + def test_get_invalid_model(self): + self.assertRaises(RuntimeError, model_zoo.get, "Invalid/config.yaml") + + def test_get_url(self): + url = model_zoo.get_checkpoint_url("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml") + self.assertEqual( + url, + "https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/model_final_01ca85.pkl", # noqa + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_visualizer.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..4c5f8f27e43c1640728bf8120dbec97c0c9fe73c --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tests/test_visualizer.py @@ -0,0 +1,174 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import unittest +import cv2 +import torch + +from detectron2.data import MetadataCatalog +from detectron2.structures import BoxMode, Instances, RotatedBoxes +from detectron2.utils.visualizer import Visualizer + + +class TestVisualizer(unittest.TestCase): + def _random_data(self): + H, W = 100, 100 + N = 10 + img = np.random.rand(H, W, 3) * 255 + boxxy = np.random.rand(N, 2) * (H // 2) + boxes = np.concatenate((boxxy, boxxy + H // 2), axis=1) + + def _rand_poly(): + return np.random.rand(3, 2).flatten() * H + + polygons = [[_rand_poly() for _ in range(np.random.randint(1, 5))] for _ in range(N)] + + mask = np.zeros_like(img[:, :, 0], dtype=np.bool) + mask[:10, 10:20] = 1 + + labels = [str(i) for i in range(N)] + return img, boxes, labels, polygons, [mask] * N + + @property + def metadata(self): + return MetadataCatalog.get("coco_2017_train") + + def test_draw_dataset_dict(self): + img = np.random.rand(512, 512, 3) * 255 + dic = { + "annotations": [ + { + "bbox": [ + 368.9946492271106, + 330.891438763377, + 13.148537455410235, + 13.644708680142685, + ], + "bbox_mode": BoxMode.XYWH_ABS, + "category_id": 0, + "iscrowd": 1, + "segmentation": { + "counts": "_jh52m?2N2N2N2O100O10O001N1O2MceP2", + "size": [512, 512], + }, + } + ], + "height": 512, + "image_id": 1, + "width": 512, + } + v = Visualizer(img, self.metadata) + v.draw_dataset_dict(dic) + + def test_overlay_instances(self): + img, boxes, labels, polygons, masks = self._random_data() + + v = Visualizer(img, self.metadata) + output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image() + self.assertEqual(output.shape, img.shape) + + # Test 2x scaling + v = Visualizer(img, self.metadata, scale=2.0) + output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image() + self.assertEqual(output.shape[0], img.shape[0] * 2) + + # Test overlay masks + v = Visualizer(img, self.metadata) + output = v.overlay_instances(masks=masks, boxes=boxes, labels=labels).get_image() + self.assertEqual(output.shape, img.shape) + + def test_overlay_instances_no_boxes(self): + img, boxes, labels, polygons, _ = self._random_data() + v = Visualizer(img, self.metadata) + v.overlay_instances(masks=polygons, boxes=None, labels=labels).get_image() + + def test_draw_instance_predictions(self): + img, boxes, _, _, masks = self._random_data() + num_inst = len(boxes) + inst = Instances((img.shape[0], img.shape[1])) + inst.pred_classes = torch.randint(0, 80, size=(num_inst,)) + inst.scores = torch.rand(num_inst) + inst.pred_boxes = torch.from_numpy(boxes) + inst.pred_masks = torch.from_numpy(np.asarray(masks)) + + v = Visualizer(img, self.metadata) + v.draw_instance_predictions(inst) + + def test_draw_empty_mask_predictions(self): + img, boxes, _, _, masks = self._random_data() + num_inst = len(boxes) + inst = Instances((img.shape[0], img.shape[1])) + inst.pred_classes = torch.randint(0, 80, size=(num_inst,)) + inst.scores = torch.rand(num_inst) + inst.pred_boxes = torch.from_numpy(boxes) + inst.pred_masks = torch.from_numpy(np.zeros_like(np.asarray(masks))) + + v = Visualizer(img, self.metadata) + v.draw_instance_predictions(inst) + + def test_correct_output_shape(self): + img = np.random.rand(928, 928, 3) * 255 + v = Visualizer(img, self.metadata) + out = v.output.get_image() + self.assertEqual(out.shape, img.shape) + + def test_overlay_rotated_instances(self): + H, W = 100, 150 + img = np.random.rand(H, W, 3) * 255 + num_boxes = 50 + boxes_5d = torch.zeros(num_boxes, 5) + boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-0.1 * W, 1.1 * W) + boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-0.1 * H, 1.1 * H) + boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H)) + boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H)) + boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800) + rotated_boxes = RotatedBoxes(boxes_5d) + labels = [str(i) for i in range(num_boxes)] + + v = Visualizer(img, self.metadata) + output = v.overlay_instances(boxes=rotated_boxes, labels=labels).get_image() + self.assertEqual(output.shape, img.shape) + + def test_draw_no_metadata(self): + img, boxes, _, _, masks = self._random_data() + num_inst = len(boxes) + inst = Instances((img.shape[0], img.shape[1])) + inst.pred_classes = torch.randint(0, 80, size=(num_inst,)) + inst.scores = torch.rand(num_inst) + inst.pred_boxes = torch.from_numpy(boxes) + inst.pred_masks = torch.from_numpy(np.asarray(masks)) + + v = Visualizer(img, MetadataCatalog.get("asdfasdf")) + v.draw_instance_predictions(inst) + + def test_draw_binary_mask(self): + img, boxes, _, _, masks = self._random_data() + img[:, :, 0] = 0 # remove red color + mask = masks[0] + mask_with_hole = np.zeros_like(mask).astype("uint8") + mask_with_hole = cv2.rectangle(mask_with_hole, (10, 10), (50, 50), 1, 5) + + for m in [mask, mask_with_hole]: + v = Visualizer(img) + o = v.draw_binary_mask(m, color="red", text="test") + o = o.get_image().astype("float32") + # red color is drawn on the image + self.assertTrue(o[:, :, 0].sum() > 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/README.md b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3733863970218bf8bdf9b32420163f4c858e209e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/README.md @@ -0,0 +1,45 @@ + +This directory contains a few scripts that use detectron2. + + +* `train_net.py` + +An example training script that's made to train builtin models of detectron2. + +For usage, see [GETTING_STARTED.md](../GETTING_STARTED.md). + +* `plain_train_net.py` + +Similar to `train_net.py`, but implements a training loop instead of using `Trainer`. +This script includes fewer features but it may be more friendly to hackers. + +* `benchmark.py` + +Benchmark the training speed, inference speed or data loading speed of a given config. + +Usage: +``` +python benchmark.py --config-file config.yaml --task train/eval/data [optional DDP flags] +``` + +* `visualize_json_results.py` + +Visualize the json instance detection/segmentation results dumped by `COCOEvalutor` or `LVISEvaluator` + +Usage: +``` +python visualize_json_results.py --input x.json --output dir/ --dataset coco_2017_val +``` +If not using a builtin dataset, you'll need your own script or modify this script. + +* `visualize_data.py` + +Visualize ground truth raw annotations or training data (after preprocessing/augmentations). + +Usage: +``` +python visualize_data.py --config-file config.yaml --source annotation/dataloader --output-dir dir/ [--show] +``` + +NOTE: the script does not stop by itself when using `--source dataloader` because a training +dataloader is usually infinite. diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/analyze_model.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/analyze_model.py new file mode 100644 index 0000000000000000000000000000000000000000..ebb6454bdfb1a65e018c1b869426615e31687c1e --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/analyze_model.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import numpy as np +from collections import Counter +import tqdm + +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.config import get_cfg +from detectron2.data import build_detection_test_loader +from detectron2.engine import default_argument_parser +from detectron2.modeling import build_model +from detectron2.utils.analysis import ( + activation_count_operators, + flop_count_operators, + parameter_count_table, +) +from detectron2.utils.logger import setup_logger + +logger = logging.getLogger("detectron2") + + +def setup(args): + cfg = get_cfg() + cfg.merge_from_file(args.config_file) + cfg.DATALOADER.NUM_WORKERS = 0 + cfg.merge_from_list(args.opts) + cfg.freeze() + setup_logger() + return cfg + + +def do_flop(cfg): + data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) + model = build_model(cfg) + DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) + model.eval() + + counts = Counter() + total_flops = [] + for idx, data in zip(tqdm.trange(args.num_inputs), data_loader): # noqa + count = flop_count_operators(model, data) + counts += count + total_flops.append(sum(count.values())) + logger.info( + "(G)Flops for Each Type of Operators:\n" + str([(k, v / idx) for k, v in counts.items()]) + ) + logger.info("Total (G)Flops: {}±{}".format(np.mean(total_flops), np.std(total_flops))) + + +def do_activation(cfg): + data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) + model = build_model(cfg) + DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) + model.eval() + + counts = Counter() + total_activations = [] + for idx, data in zip(tqdm.trange(args.num_inputs), data_loader): # noqa + count = activation_count_operators(model, data) + counts += count + total_activations.append(sum(count.values())) + logger.info( + "(Million) Activations for Each Type of Operators:\n" + + str([(k, v / idx) for k, v in counts.items()]) + ) + logger.info( + "Total (Million) Activations: {}±{}".format( + np.mean(total_activations), np.std(total_activations) + ) + ) + + +def do_parameter(cfg): + model = build_model(cfg) + logger.info("Parameter Count:\n" + parameter_count_table(model, max_depth=5)) + + +def do_structure(cfg): + model = build_model(cfg) + logger.info("Model Structure:\n" + str(model)) + + +if __name__ == "__main__": + parser = default_argument_parser( + epilog=""" +Examples: + +To show parameters of a model: +$ ./analyze_model.py --tasks parameter \\ + --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml + +Flops and activations are data-dependent, therefore inputs and model weights +are needed to count them: + +$ ./analyze_model.py --num-inputs 100 --tasks flop \\ + --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \\ + MODEL.WEIGHTS /path/to/model.pkl +""" + ) + parser.add_argument( + "--tasks", + choices=["flop", "activation", "parameter", "structure"], + required=True, + nargs="+", + ) + parser.add_argument( + "--num-inputs", + default=100, + type=int, + help="number of inputs used to compute statistics for flops/activations, " + "both are data dependent.", + ) + args = parser.parse_args() + assert not args.eval_only + assert args.num_gpus == 1 + + cfg = setup(args) + + for task in args.tasks: + { + "flop": do_flop, + "activation": do_activation, + "parameter": do_parameter, + "structure": do_structure, + }[task](cfg) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/benchmark.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..9b4dbe467a01f0e9455b89e0f8e09242d263a961 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/benchmark.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +A script to benchmark builtin models. + +Note: this script has an extra dependency of psutil. +""" + +import itertools +import logging +import psutil +import torch +import tqdm +from fvcore.common.timer import Timer +from torch.nn.parallel import DistributedDataParallel + +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.config import get_cfg +from detectron2.data import ( + DatasetFromList, + build_detection_test_loader, + build_detection_train_loader, +) +from detectron2.engine import SimpleTrainer, default_argument_parser, hooks, launch +from detectron2.modeling import build_model +from detectron2.solver import build_optimizer +from detectron2.utils import comm +from detectron2.utils.events import CommonMetricPrinter +from detectron2.utils.logger import setup_logger + +logger = logging.getLogger("detectron2") + + +def setup(args): + cfg = get_cfg() + cfg.merge_from_file(args.config_file) + cfg.SOLVER.BASE_LR = 0.001 # Avoid NaNs. Not useful in this script anyway. + cfg.merge_from_list(args.opts) + cfg.freeze() + setup_logger(distributed_rank=comm.get_rank()) + return cfg + + +def benchmark_data(args): + cfg = setup(args) + + timer = Timer() + dataloader = build_detection_train_loader(cfg) + logger.info("Initialize loader using {} seconds.".format(timer.seconds())) + + timer.reset() + itr = iter(dataloader) + for i in range(10): # warmup + next(itr) + if i == 0: + startup_time = timer.seconds() + timer = Timer() + max_iter = 1000 + for _ in tqdm.trange(max_iter): + next(itr) + logger.info( + "{} iters ({} images) in {} seconds.".format( + max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds() + ) + ) + logger.info("Startup time: {} seconds".format(startup_time)) + vram = psutil.virtual_memory() + logger.info( + "RAM Usage: {:.2f}/{:.2f} GB".format( + (vram.total - vram.available) / 1024 ** 3, vram.total / 1024 ** 3 + ) + ) + + # test for a few more rounds + for _ in range(10): + timer = Timer() + max_iter = 1000 + for _ in tqdm.trange(max_iter): + next(itr) + logger.info( + "{} iters ({} images) in {} seconds.".format( + max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds() + ) + ) + + +def benchmark_train(args): + cfg = setup(args) + model = build_model(cfg) + logger.info("Model:\n{}".format(model)) + if comm.get_world_size() > 1: + model = DistributedDataParallel( + model, device_ids=[comm.get_local_rank()], broadcast_buffers=False + ) + optimizer = build_optimizer(cfg, model) + checkpointer = DetectionCheckpointer(model, optimizer=optimizer) + checkpointer.load(cfg.MODEL.WEIGHTS) + + cfg.defrost() + cfg.DATALOADER.NUM_WORKERS = 0 + data_loader = build_detection_train_loader(cfg) + dummy_data = list(itertools.islice(data_loader, 100)) + + def f(): + data = DatasetFromList(dummy_data, copy=False) + while True: + yield from data + + max_iter = 400 + trainer = SimpleTrainer(model, f(), optimizer) + trainer.register_hooks( + [hooks.IterationTimer(), hooks.PeriodicWriter([CommonMetricPrinter(max_iter)])] + ) + trainer.train(1, max_iter) + + +@torch.no_grad() +def benchmark_eval(args): + cfg = setup(args) + model = build_model(cfg) + model.eval() + logger.info("Model:\n{}".format(model)) + DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) + + cfg.defrost() + cfg.DATALOADER.NUM_WORKERS = 0 + data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) + dummy_data = list(itertools.islice(data_loader, 100)) + + def f(): + while True: + yield from DatasetFromList(dummy_data, copy=False) + + for _ in range(5): # warmup + model(dummy_data[0]) + + max_iter = 400 + timer = Timer() + with tqdm.tqdm(total=max_iter) as pbar: + for idx, d in enumerate(f()): + if idx == max_iter: + break + model(d) + pbar.update() + logger.info("{} iters in {} seconds.".format(max_iter, timer.seconds())) + + +if __name__ == "__main__": + parser = default_argument_parser() + parser.add_argument("--task", choices=["train", "eval", "data"], required=True) + args = parser.parse_args() + assert not args.eval_only + + if args.task == "data": + f = benchmark_data + elif args.task == "train": + """ + Note: training speed may not be representative. + The training cost of a R-CNN model varies with the content of the data + and the quality of the model. + """ + f = benchmark_train + elif args.task == "eval": + f = benchmark_eval + # only benchmark single-GPU inference. + assert args.num_gpus == 1 and args.num_machines == 1 + launch(f, args.num_gpus, args.num_machines, args.machine_rank, args.dist_url, args=(args,)) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/convert-torchvision-to-d2.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/convert-torchvision-to-d2.py new file mode 100644 index 0000000000000000000000000000000000000000..ad95c5339f2b41c1b9b841abf75f422853812c3f --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/convert-torchvision-to-d2.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pickle as pkl +import sys +import torch + +""" +Usage: + # download one of the ResNet{18,34,50,101,152} models from torchvision: + wget https://download.pytorch.org/models/resnet50-19c8e357.pth -O r50.pth + # run the conversion + ./convert-torchvision-to-d2.py r50.pth r50.pkl + + # Then, use r50.pkl with the following changes in config: + +MODEL: + WEIGHTS: "/path/to/r50.pkl" + PIXEL_MEAN: [123.675, 116.280, 103.530] + PIXEL_STD: [58.395, 57.120, 57.375] + RESNETS: + DEPTH: 50 + STRIDE_IN_1X1: False +INPUT: + FORMAT: "RGB" + + These models typically produce slightly worse results than the + pre-trained ResNets we use in official configs, which are the + original ResNet models released by MSRA. +""" + +if __name__ == "__main__": + input = sys.argv[1] + + obj = torch.load(input, map_location="cpu") + + newmodel = {} + for k in list(obj.keys()): + old_k = k + if "layer" not in k: + k = "stem." + k + for t in [1, 2, 3, 4]: + k = k.replace("layer{}".format(t), "res{}".format(t + 1)) + for t in [1, 2, 3]: + k = k.replace("bn{}".format(t), "conv{}.norm".format(t)) + k = k.replace("downsample.0", "shortcut") + k = k.replace("downsample.1", "shortcut.norm") + print(old_k, "->", k) + newmodel[k] = obj.pop(old_k).detach().numpy() + + res = {"model": newmodel, "__author__": "torchvision", "matching_heuristics": True} + + with open(sys.argv[2], "wb") as f: + pkl.dump(res, f) + if obj: + print("Unconverted keys:", obj.keys()) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/plain_train_net.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/plain_train_net.py new file mode 100644 index 0000000000000000000000000000000000000000..87509075144904c118b9942db1810c9d397668d8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/plain_train_net.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Detectron2 training script with a plain training loop. + +This script reads a given config file and runs the training or evaluation. +It is an entry point that is able to train standard models in detectron2. + +In order to let one script support training of many models, +this script contains logic that are specific to these built-in models and therefore +may not be suitable for your own project. +For example, your research project perhaps only needs a single "evaluator". + +Therefore, we recommend you to use detectron2 as a library and take +this file as an example of how to use the library. +You may want to write your own script with your datasets and other customizations. + +Compared to "train_net.py", this script supports fewer default features. +It also includes fewer abstraction, therefore is easier to add custom logic. +""" + +import logging +import os +from collections import OrderedDict +import torch +from torch.nn.parallel import DistributedDataParallel + +import detectron2.utils.comm as comm +from detectron2.checkpoint import DetectionCheckpointer, PeriodicCheckpointer +from detectron2.config import get_cfg +from detectron2.data import ( + MetadataCatalog, + build_detection_test_loader, + build_detection_train_loader, +) +from detectron2.engine import default_argument_parser, default_setup, launch +from detectron2.evaluation import ( + CityscapesInstanceEvaluator, + CityscapesSemSegEvaluator, + COCOEvaluator, + COCOPanopticEvaluator, + DatasetEvaluators, + LVISEvaluator, + PascalVOCDetectionEvaluator, + SemSegEvaluator, + inference_on_dataset, + print_csv_format, +) +from detectron2.modeling import build_model +from detectron2.solver import build_lr_scheduler, build_optimizer +from detectron2.utils.events import ( + CommonMetricPrinter, + EventStorage, + JSONWriter, + TensorboardXWriter, +) + +logger = logging.getLogger("detectron2") + + +def get_evaluator(cfg, dataset_name, output_folder=None): + """ + Create evaluator(s) for a given dataset. + This uses the special metadata "evaluator_type" associated with each builtin dataset. + For your own dataset, you can simply create an evaluator manually in your + script and do not have to worry about the hacky if-else logic here. + """ + if output_folder is None: + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") + evaluator_list = [] + evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type + if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: + evaluator_list.append( + SemSegEvaluator( + dataset_name, + distributed=True, + num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, + ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, + output_dir=output_folder, + ) + ) + if evaluator_type in ["coco", "coco_panoptic_seg"]: + evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder)) + if evaluator_type == "coco_panoptic_seg": + evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) + if evaluator_type == "cityscapes_instance": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesInstanceEvaluator(dataset_name) + if evaluator_type == "cityscapes_sem_seg": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesSemSegEvaluator(dataset_name) + if evaluator_type == "pascal_voc": + return PascalVOCDetectionEvaluator(dataset_name) + if evaluator_type == "lvis": + return LVISEvaluator(dataset_name, cfg, True, output_folder) + if len(evaluator_list) == 0: + raise NotImplementedError( + "no Evaluator for the dataset {} with the type {}".format(dataset_name, evaluator_type) + ) + if len(evaluator_list) == 1: + return evaluator_list[0] + return DatasetEvaluators(evaluator_list) + + +def do_test(cfg, model): + results = OrderedDict() + for dataset_name in cfg.DATASETS.TEST: + data_loader = build_detection_test_loader(cfg, dataset_name) + evaluator = get_evaluator( + cfg, dataset_name, os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) + ) + results_i = inference_on_dataset(model, data_loader, evaluator) + results[dataset_name] = results_i + if comm.is_main_process(): + logger.info("Evaluation results for {} in csv format:".format(dataset_name)) + print_csv_format(results_i) + if len(results) == 1: + results = list(results.values())[0] + return results + + +def do_train(cfg, model, resume=False): + model.train() + optimizer = build_optimizer(cfg, model) + scheduler = build_lr_scheduler(cfg, optimizer) + + checkpointer = DetectionCheckpointer( + model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler + ) + start_iter = ( + checkpointer.resume_or_load(cfg.MODEL.WEIGHTS, resume=resume).get("iteration", -1) + 1 + ) + max_iter = cfg.SOLVER.MAX_ITER + + periodic_checkpointer = PeriodicCheckpointer( + checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD, max_iter=max_iter + ) + + writers = ( + [ + CommonMetricPrinter(max_iter), + JSONWriter(os.path.join(cfg.OUTPUT_DIR, "metrics.json")), + TensorboardXWriter(cfg.OUTPUT_DIR), + ] + if comm.is_main_process() + else [] + ) + + # compared to "train_net.py", we do not support accurate timing and + # precise BN here, because they are not trivial to implement in a small training loop + data_loader = build_detection_train_loader(cfg) + logger.info("Starting training from iteration {}".format(start_iter)) + with EventStorage(start_iter) as storage: + for data, iteration in zip(data_loader, range(start_iter, max_iter)): + iteration = iteration + 1 + storage.step() + + loss_dict = model(data) + losses = sum(loss_dict.values()) + assert torch.isfinite(losses).all(), loss_dict + + loss_dict_reduced = {k: v.item() for k, v in comm.reduce_dict(loss_dict).items()} + losses_reduced = sum(loss for loss in loss_dict_reduced.values()) + if comm.is_main_process(): + storage.put_scalars(total_loss=losses_reduced, **loss_dict_reduced) + + optimizer.zero_grad() + losses.backward() + optimizer.step() + storage.put_scalar("lr", optimizer.param_groups[0]["lr"], smoothing_hint=False) + scheduler.step() + + if ( + cfg.TEST.EVAL_PERIOD > 0 + and iteration % cfg.TEST.EVAL_PERIOD == 0 + and iteration != max_iter + ): + do_test(cfg, model) + # Compared to "train_net.py", the test results are not dumped to EventStorage + comm.synchronize() + + if iteration - start_iter > 5 and (iteration % 20 == 0 or iteration == max_iter): + for writer in writers: + writer.write() + periodic_checkpointer.step(iteration) + + +def setup(args): + """ + Create configs and perform basic setups. + """ + cfg = get_cfg() + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + default_setup( + cfg, args + ) # if you don't like any of the default setup, write your own setup code + return cfg + + +def main(args): + cfg = setup(args) + + model = build_model(cfg) + logger.info("Model:\n{}".format(model)) + if args.eval_only: + DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( + cfg.MODEL.WEIGHTS, resume=args.resume + ) + return do_test(cfg, model) + + distributed = comm.get_world_size() > 1 + if distributed: + model = DistributedDataParallel( + model, device_ids=[comm.get_local_rank()], broadcast_buffers=False + ) + + do_train(cfg, model, resume=args.resume) + return do_test(cfg, model) + + +if __name__ == "__main__": + args = default_argument_parser().parse_args() + print("Command Line Args:", args) + launch( + main, + args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=(args,), + ) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/train_net.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/train_net.py new file mode 100644 index 0000000000000000000000000000000000000000..fe8a0cdf85ce840ff71dab2278a3c35cf54f7554 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/train_net.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Detection Training Script. + +This scripts reads a given config file and runs the training or evaluation. +It is an entry point that is made to train standard models in detectron2. + +In order to let one script support training of many models, +this script contains logic that are specific to these built-in models and therefore +may not be suitable for your own project. +For example, your research project perhaps only needs a single "evaluator". + +Therefore, we recommend you to use detectron2 as an library and take +this file as an example of how to use the library. +You may want to write your own script with your datasets and other customizations. +""" + +import logging +import os +from collections import OrderedDict +import torch +from apex import amp + +import detectron2.utils.comm as comm +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.config import get_cfg +from detectron2.data import MetadataCatalog +from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch +from detectron2.evaluation import ( + CityscapesInstanceEvaluator, + CityscapesSemSegEvaluator, + COCOEvaluator, + COCOPanopticEvaluator, + DatasetEvaluators, + LVISEvaluator, + PascalVOCDetectionEvaluator, + SemSegEvaluator, + verify_results, +) +from detectron2.modeling import GeneralizedRCNNWithTTA + + +class Trainer(DefaultTrainer): + """ + We use the "DefaultTrainer" which contains pre-defined default logic for + standard training workflow. They may not work for you, especially if you + are working on a new research project. In that case you can write your + own training loop. You can use "tools/plain_train_net.py" as an example. + """ + def __init__(self, cfg, args): + super(Trainer, self).__init__(cfg, args) + self.args = args + + @classmethod + def build_evaluator(cls, cfg, dataset_name, output_folder=None): + """ + Create evaluator(s) for a given dataset. + This uses the special metadata "evaluator_type" associated with each builtin dataset. + For your own dataset, you can simply create an evaluator manually in your + script and do not have to worry about the hacky if-else logic here. + """ + if output_folder is None: + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") + evaluator_list = [] + evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type + if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: + evaluator_list.append( + SemSegEvaluator( + dataset_name, + distributed=True, + num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, + ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, + output_dir=output_folder, + ) + ) + if evaluator_type in ["coco", "coco_panoptic_seg"]: + evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder)) + if evaluator_type == "coco_panoptic_seg": + evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) + if evaluator_type == "cityscapes_instance": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesInstanceEvaluator(dataset_name) + if evaluator_type == "cityscapes_sem_seg": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesSemSegEvaluator(dataset_name) + elif evaluator_type == "pascal_voc": + return PascalVOCDetectionEvaluator(dataset_name) + elif evaluator_type == "lvis": + return LVISEvaluator(dataset_name, cfg, True, output_folder) + if len(evaluator_list) == 0: + raise NotImplementedError( + "no Evaluator for the dataset {} with the type {}".format( + dataset_name, evaluator_type + ) + ) + elif len(evaluator_list) == 1: + return evaluator_list[0] + return DatasetEvaluators(evaluator_list) + + @classmethod + def test_with_TTA(cls, cfg, model): + logger = logging.getLogger("detectron2.trainer") + # In the end of training, run an evaluation with TTA + # Only support some R-CNN models. + logger.info("Running inference with test-time augmentation ...") + model = GeneralizedRCNNWithTTA(cfg, model) + evaluators = [ + cls.build_evaluator( + cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") + ) + for name in cfg.DATASETS.TEST + ] + res = cls.test(cfg, model, evaluators) + res = OrderedDict({k + "_TTA": v for k, v in res.items()}) + return res + + +def setup(args): + """ + Create configs and perform basic setups. + """ + cfg = get_cfg() + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + default_setup(cfg, args) + return cfg + + +def main(args): + cfg = setup(args) + + """ + If you'd like to do anything fancier than the standard training logic, + consider writing your own training loop (see plain_train_net.py) or + subclassing the trainer. + """ + trainer = Trainer(cfg, args) + trainer.resume_or_load(resume=args.resume) + if cfg.TEST.AUG.ENABLED: + trainer.register_hooks( + [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] + ) + + if args.eval_only: + # model = Trainer.build_model(cfg) + model = trainer.model + model.eval() + DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( + cfg.MODEL.WEIGHTS, resume=args.resume + ) + res = Trainer.test(cfg, model) + if cfg.TEST.AUG.ENABLED: + res.update(Trainer.test_with_TTA(cfg, model)) + if comm.is_main_process(): + verify_results(cfg, res) + return res + + + if cfg.DEBUG_MODE: + trainer.register_hooks([hooks.ShowTraining(trainer.model, trainer.start_iter)]) + + return trainer.train() + + +if __name__ == "__main__": + args = default_argument_parser().parse_args() + print("Command Line Args:", args) + launch( + main, + args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=(args,), + ) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/visualize_data.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/visualize_data.py new file mode 100644 index 0000000000000000000000000000000000000000..b0a142622e5a8a7c533e6a56b403da9cd0720947 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/visualize_data.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import os +from itertools import chain +import cv2 +import tqdm + +from detectron2.config import get_cfg +from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader +from detectron2.data import detection_utils as utils +from detectron2.data.build import filter_images_with_few_keypoints +from detectron2.utils.logger import setup_logger +from detectron2.utils.visualizer import Visualizer + + +def setup(args): + cfg = get_cfg() + if args.config_file: + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + return cfg + + +def parse_args(in_args=None): + parser = argparse.ArgumentParser(description="Visualize ground-truth data") + parser.add_argument( + "--source", + choices=["annotation", "dataloader"], + required=True, + help="visualize the annotations or the data loader (with pre-processing)", + ) + parser.add_argument("--config-file", metavar="FILE", help="path to config file") + parser.add_argument("--output-dir", default="./", help="path to output directory") + parser.add_argument("--show", action="store_true", help="show output in a window") + parser.add_argument( + "opts", + help="Modify config options using the command-line", + default=None, + nargs=argparse.REMAINDER, + ) + return parser.parse_args(in_args) + + +if __name__ == "__main__": + args = parse_args() + logger = setup_logger() + logger.info("Arguments: " + str(args)) + cfg = setup(args) + + dirname = args.output_dir + os.makedirs(dirname, exist_ok=True) + metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) + + def output(vis, fname): + if args.show: + print(fname) + cv2.imshow("window", vis.get_image()[:, :, ::-1]) + cv2.waitKey() + else: + filepath = os.path.join(dirname, fname) + print("Saving to {} ...".format(filepath)) + vis.save(filepath) + + scale = 2.0 if args.show else 1.0 + if args.source == "dataloader": + train_data_loader = build_detection_train_loader(cfg) + for batch in train_data_loader: + for per_image in batch: + # Pytorch tensor is in (C, H, W) format + img = per_image["image"].permute(1, 2, 0).cpu().detach().numpy() + img = utils.convert_image_to_rgb(img, cfg.INPUT.FORMAT) + + visualizer = Visualizer(img, metadata=metadata, scale=scale) + target_fields = per_image["instances"].get_fields() + labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]] + vis = visualizer.overlay_instances( + labels=labels, + boxes=target_fields.get("gt_boxes", None), + masks=target_fields.get("gt_masks", None), + keypoints=target_fields.get("gt_keypoints", None), + ) + output(vis, str(per_image["image_id"]) + ".jpg") + else: + dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN])) + if cfg.MODEL.KEYPOINT_ON: + dicts = filter_images_with_few_keypoints(dicts, 1) + for dic in tqdm.tqdm(dicts): + img = utils.read_image(dic["file_name"], "RGB") + visualizer = Visualizer(img, metadata=metadata, scale=scale) + vis = visualizer.draw_dataset_dict(dic) + output(vis, os.path.basename(dic["file_name"])) diff --git a/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/visualize_json_results.py b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/visualize_json_results.py new file mode 100644 index 0000000000000000000000000000000000000000..7dec699e875f3be6cdf8dc882b22b51107330c20 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/models/detectron2/tools/visualize_json_results.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import json +import numpy as np +import os +from collections import defaultdict +import cv2 +import tqdm +from fvcore.common.file_io import PathManager + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import Boxes, BoxMode, Instances +from detectron2.utils.logger import setup_logger +from detectron2.utils.visualizer import Visualizer + + +def create_instances(predictions, image_size): + ret = Instances(image_size) + + score = np.asarray([x["score"] for x in predictions]) + chosen = (score > args.conf_threshold).nonzero()[0] + score = score[chosen] + bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 4) + bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) + + labels = np.asarray([dataset_id_map(predictions[i]["category_id"]) for i in chosen]) + + ret.scores = score + ret.pred_boxes = Boxes(bbox) + ret.pred_classes = labels + + try: + ret.pred_masks = [predictions[i]["segmentation"] for i in chosen] + except KeyError: + pass + return ret + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="A script that visualizes the json predictions from COCO or LVIS dataset." + ) + parser.add_argument("--input", required=True, help="JSON file produced by the model") + parser.add_argument("--output", required=True, help="output directory") + parser.add_argument("--dataset", help="name of the dataset", default="coco_2017_val") + parser.add_argument("--conf-threshold", default=0.5, type=float, help="confidence threshold") + args = parser.parse_args() + + logger = setup_logger() + + with PathManager.open(args.input, "r") as f: + predictions = json.load(f) + + pred_by_image = defaultdict(list) + for p in predictions: + pred_by_image[p["image_id"]].append(p) + + dicts = list(DatasetCatalog.get(args.dataset)) + metadata = MetadataCatalog.get(args.dataset) + if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): + + def dataset_id_map(ds_id): + return metadata.thing_dataset_id_to_contiguous_id[ds_id] + + elif "lvis" in args.dataset: + # LVIS results are in the same format as COCO results, but have a different + # mapping from dataset category id to contiguous category id in [0, #categories - 1] + def dataset_id_map(ds_id): + return ds_id - 1 + + else: + raise ValueError("Unsupported dataset: {}".format(args.dataset)) + + os.makedirs(args.output, exist_ok=True) + + for dic in tqdm.tqdm(dicts): + img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1] + basename = os.path.basename(dic["file_name"]) + + predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2]) + vis = Visualizer(img, metadata) + vis_pred = vis.draw_instance_predictions(predictions).get_image() + + vis = Visualizer(img, metadata) + vis_gt = vis.draw_dataset_dict(dic).get_image() + + concat = np.concatenate((vis_pred, vis_gt), axis=1) + cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1]) diff --git a/PyTorch/contrib/cv/others/CenterMask2/requirements.txt b/PyTorch/contrib/cv/others/CenterMask2/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..0b8f90aed8fcd6a2bfa1c467ed7c6991d5ad9ac8 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/requirements.txt @@ -0,0 +1,67 @@ +absl-py==1.0.0 +antlr4-python3-runtime==4.8 +asn1crypto==0.24.0 +attrs==21.4.0 +auto-tune==0.1.0 +cachetools==5.0.0 +certifi==2018.11.29 +cffi==1.11.5 +chardet==3.0.4 +cloudpickle==2.0.0 +conda==4.5.12 +cryptography==2.3.1 +cycler==0.11.0 +Cython==0.29.24 +decorator==5.1.0 +future==0.18.2 +google-auth==2.6.5 +google-auth-oauthlib==0.4.6 +grpcio==1.45.0 +hccl==0.1.0 +hccl-parser==0.1 +idna==2.8 +importlib-metadata==4.11.3 +iopath==0.1.9 +kiwisolver==1.3.2 +Markdown==3.3.6 +matplotlib==3.4.3 +mock==4.0.3 +mpmath==1.2.1 +numpy==1.21.0 +oauthlib==3.2.0 +omegaconf==2.1.1 +op-gen==0.1 +op-test-frame==0.1 +Pillow==8.4.0 +portalocker==2.4.0 +protobuf==3.20.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycocotools==2.0.2 +pycosat==0.6.3 +pycparser==2.19 +pydot==1.4.2 +pyOpenSSL==18.0.0 +pyparsing==3.0.5 +PySocks==1.6.8 +python-dateutil==2.8.2 +PyYAML==6.0 +requests==2.21.0 +requests-oauthlib==1.3.1 +rsa==4.8 +ruamel-yaml==0.15.71 +six==1.11.0 +sympy==1.9 +tabulate==0.8.9 +te==0.4.0 +tensorboard==2.8.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +termcolor==1.1.0 +topi==0.4.0 +tqdm==4.64.0 +typing-extensions==4.2.0 +urllib3==1.24.1 +Werkzeug==2.1.1 +yacs==0.1.8 +zipp==3.8.0 diff --git a/PyTorch/contrib/cv/others/CenterMask2/test/train_eval_8p.sh b/PyTorch/contrib/cv/others/CenterMask2/test/train_eval_8p.sh new file mode 100644 index 0000000000000000000000000000000000000000..9bea1478f05dfc04100f1b52ceafc283634cdc3d --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/test/train_eval_8p.sh @@ -0,0 +1,130 @@ +#!/bin/bash + +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size resume RANK_SIZE +# 网络名称,同目录名称 +Network="centermask2" +# 训练batch_size +batch_size=32 +# 训练使用的npu卡数 +export RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" +# checkpoint文件路径,以实际路径为准 +pth_path="" +# 训练epoch +#train_epochs=200 +# 学习率 +learning_rate=0.01 +# 加载数据进程数 +workers=184 + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --workers* ]];then + workers=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --pth_path* ]];then + pth_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +# 校验是否传入 pth_path , 验证脚本需要传入此参数 +if [[ $pth_path == "" ]];then + echo "[Error] para \"pth_path\" must be confing" + exit 1 +fi + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi +python ./models/centermask2/train_net.py \ + --config-file ./models/centermask2/configs/centermask/zsclzy_model_config_amp.yaml \ + --device-ids 0 1 2 3 4 5 6 7 \ + --num-gpus 8 \ + --eval-only \ + MODEL.WEIGHTS $pth_path\ + SOLVER.MAX_ITER 4000\ + SOLVER.BASE_LR 0.01\ + SOLVER.CHECKPOINT_PERIOD 3700\ + OPT_LEVEL O1\ + LOSS_SCALE_VALUE None > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait + + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" + +#输出训练精度,需要模型审视修改 +train_accuracy_bbox=`grep -a 'copypaste' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'NR==3{print}' |awk -F " " '{print $5}'|awk -F "," '{print $1}'` +train_accuracy_sgem=`grep -a 'copypaste' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'NR==6{print}' |awk -F " " '{print $5}'|awk -F "," '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : bbox:${train_accuracy_bbox};segm:${train_accuracy_sgem}" +echo "E2E Training Duration sec : $e2e_time" + +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +#grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep -a "iter:" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "total_loss:" '{print $NF}'| awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/others/CenterMask2/test/train_finetune_1p.sh b/PyTorch/contrib/cv/others/CenterMask2/test/train_finetune_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..3875a3d89e0335d477b8d69c75b9480c724ca0d6 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/test/train_finetune_1p.sh @@ -0,0 +1,144 @@ +#!/bin/bash +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="centermask2_1" +# 训练batch_size +batch_size=2 +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 +data_path="" +# checkpoint文件路径,以实际路径为准 +pth_path="" +# 训练epoch +train_epochs=30 +# 指定训练所使用的npu device卡id +device_id=0 +# 加载数据进程数 +workers=128 + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + + fi +done + +# 校验是否传入data_path,不需要修改 + echo $data_path +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 +if [ $ASCEND_DEVICE_ID ];then + echo "device id is ${ASCEND_DEVICE_ID}" +elif [ ${device_id} ];then + export ASCEND_DEVICE_ID=${device_id} + echo "device id is ${ASCEND_DEVICE_ID}" +else + "[Error] device id must be config" + exit 1 +fi + +export 'DETECTRON2_DATASETS'=$data_path +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi +python ./models/centermask2/train_net.py \ + --config-file ./models/centermask2/configs/centermask/zsclzy_model_config_amp.yaml \ + --device-ids 0 \ + --num-gpus 1\ + SOLVER.IMS_PER_BATCH 2\ + SOLVER.BASE_LR 0.0001 \ + SOLVER.MAX_ITER 4000 \ + SOLVER.CHECKPOINT_PERIOD 3700\ + OPT_LEVEL O2\ + LOSS_SCALE_VALUE 128> ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'fps' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $26}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy_bbox=`grep -a 'copypaste' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'NR==3{print}' |awk -F " " '{print $5}'|awk -F "," '{print $1}'` +train_accuracy_sgem=`grep -a 'copypaste' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'NR==6{print}' |awk -F " " '{print $5}'|awk -F "," '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : bbox:${train_accuracy_bbox};segm:${train_accuracy_sgem}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +#grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep -a "iter:" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "total_loss:" '{print $NF}'| awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracyBBox = ${train_accuracy_bbox}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracySgem = ${train_accuracy_sgem}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/others/CenterMask2/test/train_full_8p.sh b/PyTorch/contrib/cv/others/CenterMask2/test/train_full_8p.sh new file mode 100644 index 0000000000000000000000000000000000000000..47e8d905d8b7616dfe611662c548a64af2159bf9 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/test/train_full_8p.sh @@ -0,0 +1,113 @@ +#!/bin/bash +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="centermask2_1" +# 训练batch_size +batch_size=32 +# 训练使用的npu卡数 +export RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" +# 训练epoch +train_epochs=30 +# 加载数据进程数 +workers=128 +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --workers* ]];then + workers=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done +# 校验是否传入data_path,不需要修改 + echo $data_path +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +export 'DETECTRON2_DATASETS'=$data_path +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi +python ./models/centermask2/train_net.py \ + --config-file ./models/centermask2/configs/centermask/zsclzy_model_config_amp.yaml \ + --device-ids 0 1 2 3 4 5 6 7\ + --num-gpus 8\ + SOLVER.IMS_PER_BATCH 32\ + SOLVER.MAX_ITER 4000 \ + SOLVER.BASE_LR 0.01 \ + SOLVER.CHECKPOINT_PERIOD 3700 \ + OPT_LEVEL O1\ + LOSS_SCALE_VALUE None > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'fps' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $26}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" +#输出训练精度,需要模型审视修改 +train_accuracy_bbox=`grep -a 'copypaste' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'NR==3{print}' |awk -F " " '{print $5}'|awk -F "," '{print $1}'` +train_accuracy_sgem=`grep -a 'copypaste' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'NR==6{print}' |awk -F " " '{print $5}'|awk -F "," '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : bbox:${train_accuracy_bbox};segm:${train_accuracy_sgem}" +echo "E2E Training Duration sec : $e2e_time" +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +#grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep -a "iter:" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "total_loss:" '{print $NF}'| awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracyBBox = ${train_accuracy_bbox}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracySgem = ${train_accuracy_sgem}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/others/CenterMask2/test/train_performance_1p.sh b/PyTorch/contrib/cv/others/CenterMask2/test/train_performance_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..62f40c8687c5a46644d7441cc650d5d302ce2cc3 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/test/train_performance_1p.sh @@ -0,0 +1,121 @@ +#!/bin/bash +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="centermask2" +# 训练batch_size +batch_size=2 +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 +data_path="" +# 训练epoch +train_epochs=30 +# 指定训练所使用的npu device卡id +device_id=0 +# 加载数据进程数 +workers=128 +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done +# 校验是否传入data_path,不需要修改 + echo $data_path +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 +if [ $ASCEND_DEVICE_ID ];then + echo "device id is ${ASCEND_DEVICE_ID}" +elif [ ${device_id} ];then + export ASCEND_DEVICE_ID=${device_id} + echo "device id is ${ASCEND_DEVICE_ID}" +else + "[Error] device id must be config" + exit 1 +fi +export 'DETECTRON2_DATASETS'=$data_path +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi +#################创建日志输出目录,不需要修改################# +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi +python ./models/centermask2/train_net.py \ + --config-file ./models/centermask2/configs/centermask/zsclzy_model_config_amp.yaml \ + --device-ids 0 \ + --num-gpus 1\ + SOLVER.MAX_ITER 4000\ + SOLVER.BASE_LR 0.0001\ + SOLVER.CHECKPOINT_PERIOD 3700\ + OPT_LEVEL O2\ + LOSS_SCALE_VALUE 128> ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'fps' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $26}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" +#输出训练精度,需要模型审视修改 +train_accuracy_bbox=`grep -a 'copypaste' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'NR==3{print}' |awk -F " " '{print $5}'|awk -F "," '{print $1}'` +train_accuracy_sgem=`grep -a 'copypaste' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'NR==6{print}' |awk -F " " '{print $5}'|awk -F "," '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : bbox:${train_accuracy_bbox};segm:${train_accuracy_sgem}" +echo "E2E Training Duration sec : $e2e_time" +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'pref' +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +#grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep -a "iter:" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "total_loss:" '{print $NF}'| awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/others/CenterMask2/test/train_performance_8p.sh b/PyTorch/contrib/cv/others/CenterMask2/test/train_performance_8p.sh new file mode 100644 index 0000000000000000000000000000000000000000..8014366c417800c4b99c007e79281df45c6d0765 --- /dev/null +++ b/PyTorch/contrib/cv/others/CenterMask2/test/train_performance_8p.sh @@ -0,0 +1,130 @@ +#!/bin/bash +################基础配置参数,需要模型审视修改################## +# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE +# 网络名称,同目录名称 +Network="centermask2" +# 训练batch_size +batch_size=32 +# 训练使用的npu卡数 +export RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +# 训练epoch +train_epochs=30 + +# 加载数据进程数 +workers=128 + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --workers* ]];then + workers=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 + echo $data_path +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +export 'DETECTRON2_DATASETS'=$data_path +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + + +#################创建日志输出目录,不需要修改################# +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + + +#################启动训练脚本################# +#训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi +python ./models/centermask2/train_net.py \ + --config-file ./models/centermask2/configs/centermask/zsclzy_model_config_amp.yaml \ + --device-ids 0 1 2 3 4 5 6 7\ + --num-gpus 8\ + SOLVER.MAX_ITER 4000\ + SOLVER.BASE_LR 0.01\ + SOLVER.CHECKPOINT_PERIOD 3700\ + OPT_LEVEL O1\ + LOSS_SCALE_VALUE None > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +wait + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'fps' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $26}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy_bbox=`grep -a 'copypaste' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'NR==3{print}' |awk -F " " '{print $5}'|awk -F "," '{print $1}'` +train_accuracy_sgem=`grep -a 'copypaste' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'NR==6{print}' |awk -F " " '{print $5}'|awk -F "," '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : bbox:${train_accuracy_bbox};segm:${train_accuracy_sgem}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'pref' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +#grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep -a "iter:" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "total_loss:" '{print $NF}'| awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/others/CycleGAN/CycleGAN_NetLoad.py b/PyTorch/contrib/cv/others/CycleGAN/CycleGAN_NetLoad.py index eb500107040c180a59742c8137b383ecb0bb91e9..2effb5f2ba84a07b38b3ccb38c9bb517368e14f9 100644 --- a/PyTorch/contrib/cv/others/CycleGAN/CycleGAN_NetLoad.py +++ b/PyTorch/contrib/cv/others/CycleGAN/CycleGAN_NetLoad.py @@ -1,92 +1,92 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -from collections import OrderedDict -import torch -from models import networks_adapt as networks - - -class load_networks(): - def __init__(self, opt): - self.opt = opt - self.gpu = 0 - self.netG_A = networks.define_G(self.opt.input_nc, self.opt.output_nc, self.opt.ngf, self.opt.netG, - self.opt.norm, not self.opt.no_dropout, self.opt.init_type, self.opt.init_gain, - self.gpu) - self.netG_B = networks.define_G(self.opt.output_nc, self.opt.input_nc, self.opt.ngf, self.opt.netG, - self.opt.norm, not self.opt.no_dropout, self.opt.init_type, self.opt.init_gain, - self.gpu) - if (opt.npu >= 1): - self.device = torch.device('npu:{}'.format(self.gpu)) - if (opt.npu == 0): - self.device = torch.device('cuda:{}'.format(self.gpu)) - else: - self.device = torch.device("cpu") - - def __patch_instance_norm_state_dict(self, state_dict, module, keys, i=0): - """Fix InstanceNorm checkpoints incompatibility (prior to 0.4)""" - key = keys[i] - if i + 1 == len(keys): # at the end, pointing to a parameter/buffer - if module.__class__.__name__.startswith('InstanceNorm') and \ - (key == 'running_mean' or key == 'running_var'): - if getattr(module, key) is None: - state_dict.pop('.'.join(keys)) - if module.__class__.__name__.startswith('InstanceNorm') and \ - (key == 'num_batches_tracked'): - state_dict.pop('.'.join(keys)) - else: - self.__patch_instance_norm_state_dict(state_dict, getattr(module, key), keys, i + 1) - - def proc_nodes_module(self, checkpoint): - new_state_dict = OrderedDict() - for k, v in checkpoint.items(): - if "module." in k: - name = k.replace("module.", "") - else: - name = k - new_state_dict[name] = v - return new_state_dict - - def loadnetworks(self, net, load_path): - state_dict = torch.load(load_path, map_location=torch.device('cpu')) - state_dict = self.proc_nodes_module(state_dict) - if hasattr(state_dict, '_metadata'): - del state_dict._metadata - # patch InstanceNorm checkpoints prior to 0.4 - for key in list(state_dict.keys()): # need to copy keys here because we mutate in loop - self.__patch_instance_norm_state_dict(state_dict, net, key.split('.')) - net.load_state_dict(state_dict) - return net - - def get_networks(self, load_patha, load_pathb): - model_Ga = self.loadnetworks(self.netG_A, load_patha) - model_Gb = self.loadnetworks(self.netG_B, load_pathb) - return model_Ga, model_Gb +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +from collections import OrderedDict +import torch +from models import networks_adapt as networks + + +class load_networks(): + def __init__(self, opt): + self.opt = opt + self.gpu = 0 + self.netG_A = networks.define_G(self.opt.input_nc, self.opt.output_nc, self.opt.ngf, self.opt.netG, + self.opt.norm, not self.opt.no_dropout, self.opt.init_type, self.opt.init_gain, + self.gpu) + self.netG_B = networks.define_G(self.opt.output_nc, self.opt.input_nc, self.opt.ngf, self.opt.netG, + self.opt.norm, not self.opt.no_dropout, self.opt.init_type, self.opt.init_gain, + self.gpu) + if (opt.npu >= 1): + self.device = torch.device('npu:{}'.format(self.gpu)) + if (opt.npu == 0): + self.device = torch.device('cuda:{}'.format(self.gpu)) + else: + self.device = torch.device("cpu") + + def __patch_instance_norm_state_dict(self, state_dict, module, keys, i=0): + """Fix InstanceNorm checkpoints incompatibility (prior to 0.4)""" + key = keys[i] + if i + 1 == len(keys): # at the end, pointing to a parameter/buffer + if module.__class__.__name__.startswith('InstanceNorm') and \ + (key == 'running_mean' or key == 'running_var'): + if getattr(module, key) is None: + state_dict.pop('.'.join(keys)) + if module.__class__.__name__.startswith('InstanceNorm') and \ + (key == 'num_batches_tracked'): + state_dict.pop('.'.join(keys)) + else: + self.__patch_instance_norm_state_dict(state_dict, getattr(module, key), keys, i + 1) + + def proc_nodes_module(self, checkpoint): + new_state_dict = OrderedDict() + for k, v in checkpoint.items(): + if "module." in k: + name = k.replace("module.", "") + else: + name = k + new_state_dict[name] = v + return new_state_dict + + def loadnetworks(self, net, load_path): + state_dict = torch.load(load_path, map_location=torch.device('cpu')) + state_dict = self.proc_nodes_module(state_dict) + if hasattr(state_dict, '_metadata'): + del state_dict._metadata + # patch InstanceNorm checkpoints prior to 0.4 + for key in list(state_dict.keys()): # need to copy keys here because we mutate in loop + self.__patch_instance_norm_state_dict(state_dict, net, key.split('.')) + net.load_state_dict(state_dict) + return net + + def get_networks(self, load_patha, load_pathb): + model_Ga = self.loadnetworks(self.netG_A, load_patha) + model_Gb = self.loadnetworks(self.netG_B, load_pathb) + return model_Ga, model_Gb diff --git a/PyTorch/contrib/cv/others/CycleGAN/LICENSE b/PyTorch/contrib/cv/others/CycleGAN/LICENSE index 4e1ad12a819e98036586f198d3873933f1892331..eb1309d6c1e79cfb4dad830ae04bfca945f5568e 100644 --- a/PyTorch/contrib/cv/others/CycleGAN/LICENSE +++ b/PyTorch/contrib/cv/others/CycleGAN/LICENSE @@ -1,31 +1,31 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ============================================================================ \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CycleGAN/Readme.md b/PyTorch/contrib/cv/others/CycleGAN/Readme.md index 6eece9b767c2489f4a548198b61d069bde48b925..f3e1aa0e9acf1979f819df0174ed48d1da8d66c8 100644 --- a/PyTorch/contrib/cv/others/CycleGAN/Readme.md +++ b/PyTorch/contrib/cv/others/CycleGAN/Readme.md @@ -1,172 +1,172 @@ -# CycleGAN 训练 - -## Unpaired Image-to-Image Translation using Cycle-Consistent Adversarial Networks - -it's an approach for learning to translate an image from a source domain $X$ to a target domain $Y$ in the absence of paired examples - -For more detail:https://arxiv.org/abs/1703.10593v6 - -## - -## Requirements - - **You need to install CNN5.0.3 to ensure the normal training of the model!!** -and use pytorch, you can use pip or conda to install the requirements - -``` -# for pip -torch>=1.5.0 -torchvision>=0.5.0 -dominate>=2.4.0 -visdom>=0.1.8.8 -``` - -## 数据集准备 - -1.从以下网址获取maps.zip作为训练集 - -http://efrosgans.eecs.berkeley.edu/cyclegan/datasets/ - -文件结构如下: - - -``` -CycleGAN -|-- datasets -| |-- maps -| | |-- testA -| | |-- testB -| | |--trainA -| | |--trainB -| | |--valA -| | |--valB -|-- test - |--train_full_1p.sh - |--train_full_8p.sh - |--train_performance_1p.sh - |--train_performance_8p.sh -|-- models - |--cycle_gan_model_adapt.py - |--networks_adapt.py -|--util - |--html.py - |--visualizer_adapt.py - |--util.py - |--visualizer_adapt.py -|-- dataloader.py -|-- parse.py -|-- train.py -|--env_npu.sh - -``` - -将数据集按照以上结构放在代码目录下 - -## 安装 - -请注意,本模型使用了新版本的pytorch以及CANN包,具体版本为:torch-1.5.0+ascend.post3.20210930-cp37-cp37m-linux_aarch64.whl,Ascend-cann-toolkit_5.0.3_linux-aarch64.run; - -source 环境变量 - -``` -bash ./env_npu.sh -``` - - -## TRAIN - -### 单p训练 - -source 环境变量 - -``` -bash./env_npu.sh -``` - -运行单p脚本 - -``` -bash ./test/train_full_1p.sh -``` - - - -### 多p训练 - -source 环境变量 - -``` -source ./env_npu.sh -``` - -运行8p脚本 - -``` -bash ./test/train_full_8p.sh -``` - -模型保存在./checkpoints目录下,以数字命名的pth文件是当前epoch训练得到的权重文件,可用来恢复训练; - -运行日志保存至./目录下 - -## TEST - -测试精度 - - - -``` -由于论文为人眼观察生成效果的真假,所以这里省略,不过下面的demon提供将生成结果以网页的形式更为直观的展现出来 -``` - - - - -## Demo -然后运行以下脚本,执行demo.py: - -``` -python3.7.5 demon.py --pu_ids='0' \ - --prof=0 \ - --multiprocessing_distributed=0 \ - --distributed=1 \ - --npu=1 \ - --dataroot=./datasets/maps \ - --checkpoints_dir=./checkpoints_1pbs1_O1_sacle_1024_torchadam \ - --model_ga_path=./checkpoints_8pbs1/maps_cycle_gan/175_pu0_net_G_A.pth \ - --model_gb_path=./checkpoints_8pbs1/maps_cycle_gan/175_pu0_net_G_B.pth >>npu8pbs1_demon.log 2>&1 & -``` - -请指定需要测试的模型路径,将--checkpoints_dir、--model_ga_path、--model_gb_path所指向的参数替换掉既可替换掉即可,最后的输出结果存放在根目录的result目录下,点击index.html既可查看,结果展示请在支持浏览器的系统查看。 - -## 注意事项 -1、超参说明 -``` ---pu_ids='0,1,2,3,4,5,6,7'------------------------------------------指定几张卡训练,必须使用连续的卡号 ---prof=0------------------------------------------------------------是否测试性能,当为0时,不测试性能,为1则在大于等于10个epoch后输出prof文件 ---multiprocessing_distributed=1-------------------------------------是否执行多核训练,多卡必须为1,单卡设置为0既可 ---distributed=1-----------------------------------------------------该参数不可更改 ---npu=1-------------------------------------------------------------是否使用Npu开始训练,如果在Npu平台训练则必须使用1,GPU平台则必须为0 ---dataroot=./datasets/maps------------------------------------------数据集的目录 ---checkpoints_dir=./checkpoints_8pbs1_O1_sacle_1024_torchadam-------存放训练权重的目录 ---batch_size=1------------------------------------------------------指定训练时每个step输入多少样本,多卡训练不建议调高,单卡可适当调高为2。bs过大, - 会导致判别器过早收敛,进而造成生辰效果不佳 ---isapex=True-------------------------------------------------------是否开启混合精度进行训练,一般是开启的 ---apex_type="O1"----------------------------------------------------如果开启混合精度训练,建议使用O1模式,O2模式不收敛。当然O0也是可以的 ---loss_scale=1024---------------------------------------------------指定混合精度训练时的loss放大倍数,loss放大倍数也可以被指定为dynamic ---log_path="npu8pbs1.txt"-------------------------------------------只存放与模型有关的日志,不存放与后台输出有关的其他调试日志 ---num_epoch_start=0-------------------------------------------------从第几个epoch开始训练,如果开启继续训练,则需要指定该参数 ---num_epoch=200-----------------------------------------------------默认训练200个epoch,不可调高,但可以调低 ---n_epochs=100------------------------------------------------------权重衰减参数,默认前100个epoch保持学习率不变,后面开始慢慢线性衰减 ---lr=1e-4-----------------------------------------------------------baseline的学习率 ---line_scale=1------------------------------------------------------baseline的学习率的放大倍数,单卡为1,8卡训练建议设为2,其他卡酌情调参 ---n_epochs=100------------------------------------------------------与n_epochs保持一致 ---n_epochs_decay=100------------------------------------------------与n_epochs保持一致 ---pool_size=50-------------------------------------------------------该参数如果为单卡,使用50既可,如果为8卡,建议设置为16,其他卡酌情调参,一般多卡要调低且数 - 值为4的倍数 ---lambda_A=10--------------------------------------------------------论文超参 ---lambda_B=10--------------------------------------------------------论文超参 ---loadweight=199_pu0-----------------------------------------------------指定多少个epoch开始继续训练,重新训练默认参数既可 ---model_ga_path=./checkpoints_8pbs1/maps_cycle_gan/175_pu0_net_G_A.pth--存放权重的目录,运行demon的时候需要 ---model_gb_path=./checkpoints_8pbs1/maps_cycle_gan/175_pu0_net_G_B.pth--存放权重的目录,运行demon的时候需要_ -``` +# CycleGAN 训练 + +## Unpaired Image-to-Image Translation using Cycle-Consistent Adversarial Networks + +it's an approach for learning to translate an image from a source domain $X$ to a target domain $Y$ in the absence of paired examples + +For more detail:https://arxiv.org/abs/1703.10593v6 + +## + +## Requirements + + **You need to install CNN5.0.3 to ensure the normal training of the model!!** +and use pytorch, you can use pip or conda to install the requirements + +``` +# for pip +torch>=1.5.0 +torchvision>=0.5.0 +dominate>=2.4.0 +visdom>=0.1.8.8 +``` + +## 数据集准备 + +1.从以下网址获取maps.zip作为训练集 + +http://efrosgans.eecs.berkeley.edu/cyclegan/datasets/ + +文件结构如下: + + +``` +CycleGAN +|-- datasets +| |-- maps +| | |-- testA +| | |-- testB +| | |--trainA +| | |--trainB +| | |--valA +| | |--valB +|-- test + |--train_full_1p.sh + |--train_full_8p.sh + |--train_performance_1p.sh + |--train_performance_8p.sh +|-- models + |--cycle_gan_model_adapt.py + |--networks_adapt.py +|--util + |--html.py + |--visualizer_adapt.py + |--util.py + |--visualizer_adapt.py +|-- dataloader.py +|-- parse.py +|-- train.py +|--env_npu.sh + +``` + +将数据集按照以上结构放在代码目录下 + +## 安装 + +请注意,本模型使用了新版本的pytorch以及CANN包,具体版本为:torch-1.5.0+ascend.post3.20210930-cp37-cp37m-linux_aarch64.whl,Ascend-cann-toolkit_5.0.3_linux-aarch64.run; + +source 环境变量 + +``` +bash ./env_npu.sh +``` + + +## TRAIN + +### 单p训练 + +source 环境变量 + +``` +bash./env_npu.sh +``` + +运行单p脚本 + +``` +bash ./test/train_full_1p.sh +``` + + + +### 多p训练 + +source 环境变量 + +``` +source ./env_npu.sh +``` + +运行8p脚本 + +``` +bash ./test/train_full_8p.sh +``` + +模型保存在./checkpoints目录下,以数字命名的pth文件是当前epoch训练得到的权重文件,可用来恢复训练; + +运行日志保存至./目录下 + +## TEST + +测试精度 + + + +``` +由于论文为人眼观察生成效果的真假,所以这里省略,不过下面的demon提供将生成结果以网页的形式更为直观的展现出来 +``` + + + + +## Demo +然后运行以下脚本,执行demo.py: + +``` +python3.7.5 demon.py --pu_ids='0' \ + --prof=0 \ + --multiprocessing_distributed=0 \ + --distributed=1 \ + --npu=1 \ + --dataroot=./datasets/maps \ + --checkpoints_dir=./checkpoints_1pbs1_O1_sacle_1024_torchadam \ + --model_ga_path=./checkpoints_8pbs1/maps_cycle_gan/175_pu0_net_G_A.pth \ + --model_gb_path=./checkpoints_8pbs1/maps_cycle_gan/175_pu0_net_G_B.pth >>npu8pbs1_demon.log 2>&1 & +``` + +请指定需要测试的模型路径,将--checkpoints_dir、--model_ga_path、--model_gb_path所指向的参数替换掉既可替换掉即可,最后的输出结果存放在根目录的result目录下,点击index.html既可查看,结果展示请在支持浏览器的系统查看。 + +## 注意事项 +1、超参说明 +``` +--pu_ids='0,1,2,3,4,5,6,7'------------------------------------------指定几张卡训练,必须使用连续的卡号 +--prof=0------------------------------------------------------------是否测试性能,当为0时,不测试性能,为1则在大于等于10个epoch后输出prof文件 +--multiprocessing_distributed=1-------------------------------------是否执行多核训练,多卡必须为1,单卡设置为0既可 +--distributed=1-----------------------------------------------------该参数不可更改 +--npu=1-------------------------------------------------------------是否使用Npu开始训练,如果在Npu平台训练则必须使用1,GPU平台则必须为0 +--dataroot=./datasets/maps------------------------------------------数据集的目录 +--checkpoints_dir=./checkpoints_8pbs1_O1_sacle_1024_torchadam-------存放训练权重的目录 +--batch_size=1------------------------------------------------------指定训练时每个step输入多少样本,多卡训练不建议调高,单卡可适当调高为2。bs过大, + 会导致判别器过早收敛,进而造成生辰效果不佳 +--isapex=True-------------------------------------------------------是否开启混合精度进行训练,一般是开启的 +--apex_type="O1"----------------------------------------------------如果开启混合精度训练,建议使用O1模式,O2模式不收敛。当然O0也是可以的 +--loss_scale=1024---------------------------------------------------指定混合精度训练时的loss放大倍数,loss放大倍数也可以被指定为dynamic +--log_path="npu8pbs1.txt"-------------------------------------------只存放与模型有关的日志,不存放与后台输出有关的其他调试日志 +--num_epoch_start=0-------------------------------------------------从第几个epoch开始训练,如果开启继续训练,则需要指定该参数 +--num_epoch=200-----------------------------------------------------默认训练200个epoch,不可调高,但可以调低 +--n_epochs=100------------------------------------------------------权重衰减参数,默认前100个epoch保持学习率不变,后面开始慢慢线性衰减 +--lr=1e-4-----------------------------------------------------------baseline的学习率 +--line_scale=1------------------------------------------------------baseline的学习率的放大倍数,单卡为1,8卡训练建议设为2,其他卡酌情调参 +--n_epochs=100------------------------------------------------------与n_epochs保持一致 +--n_epochs_decay=100------------------------------------------------与n_epochs保持一致 +--pool_size=50-------------------------------------------------------该参数如果为单卡,使用50既可,如果为8卡,建议设置为16,其他卡酌情调参,一般多卡要调低且数 + 值为4的倍数 +--lambda_A=10--------------------------------------------------------论文超参 +--lambda_B=10--------------------------------------------------------论文超参 +--loadweight=199_pu0-----------------------------------------------------指定多少个epoch开始继续训练,重新训练默认参数既可 +--model_ga_path=./checkpoints_8pbs1/maps_cycle_gan/175_pu0_net_G_A.pth--存放权重的目录,运行demon的时候需要 +--model_gb_path=./checkpoints_8pbs1/maps_cycle_gan/175_pu0_net_G_B.pth--存放权重的目录,运行demon的时候需要_ +``` diff --git a/PyTorch/contrib/cv/others/CycleGAN/dataloader.py b/PyTorch/contrib/cv/others/CycleGAN/dataloader.py index 4cdc1e610cf9d470aea1a020dcfd7d52b55c2e05..71b12c2f9f127ecc567273443ae0d52e0f81fd8e 100644 --- a/PyTorch/contrib/cv/others/CycleGAN/dataloader.py +++ b/PyTorch/contrib/cv/others/CycleGAN/dataloader.py @@ -1,128 +1,128 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import importlib -import torch.utils.data -#from data.base_dataset import BaseDataset - - -def find_dataset_using_name(dataset_name): - """Import the module "data/[dataset_name]_dataset.py". - - In the file, the class called DatasetNameDataset() will - be instantiated. It has to be a subclass of BaseDataset, - and it is case-insensitive. - """ - dataset_filename = "data." + dataset_name + "_dataset" - datasetlib = importlib.import_module(dataset_filename) - - dataset = None - target_dataset_name = dataset_name.replace('_', '') + 'dataset' - for name, cls in datasetlib.__dict__.items(): - if name.lower() == target_dataset_name.lower(): - dataset = cls - - if dataset is None: - raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name)) - - return dataset - - -def get_option_setter(dataset_name): - """Return the static method of the dataset class.""" - dataset_class = find_dataset_using_name(dataset_name) - return dataset_class.modify_commandline_options - - -def create_dataset(opt): - """Create a dataset given the option. - - This function wraps the class CustomDatasetDataLoader. - This is the main interface between this package and 'train.py'/'test.py' - """ - dataset_class = find_dataset_using_name(opt.dataset_mode) - datasets = dataset_class(opt) - train_sampler = torch.utils.data.distributed.DistributedSampler(datasets) - data_loader = CustomDatasetDataLoader(opt,datasets,train_sampler) - dataset = data_loader.load_data() - return dataset,train_sampler - - -class CustomDatasetDataLoader(): - """Wrapper class of Dataset class that performs multi-threaded data loading""" - - def __init__(self, opt,dataset,train_sampler): - """Initialize this class - - Step 1: create a dataset instance given the name [dataset_mode] - Step 2: create a multi-threaded data loader. - """ - self.opt = opt - - self.dataset=dataset - - print("dataset [%s] was created" % type(self.dataset).__name__) - if(opt.ngpus_per_node>1 and opt.multiprocessing_distributed>=1): - self.dataloader = torch.utils.data.DataLoader( - self.dataset, - batch_size=opt.batch_size, - shuffle=(train_sampler is None), - pin_memory=False, - num_workers=int(opt.num_threads), - sampler=train_sampler, - drop_last=True) - #self.dataloader = torch.utils.data.DataLoader( - # self.dataset, - # batch_size=opt.batch_size, - # shuffle=not opt.serial_batches, - # num_workers=int(opt.num_threads), - # ) - else: - self.dataloader = torch.utils.data.DataLoader( - self.dataset, - batch_size=opt.batch_size, - shuffle=not opt.serial_batches, - num_workers=int(opt.num_threads), - ) - - def load_data(self): - return self - - def __len__(self): - """Return the number of data in the dataset""" - return min(len(self.dataset), self.opt.max_dataset_size) - - def __iter__(self): - """Return a batch of data""" - for i, data in enumerate(self.dataloader): - if i * self.opt.batch_size >= self.opt.max_dataset_size: - break - yield data +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import importlib +import torch.utils.data +#from data.base_dataset import BaseDataset + + +def find_dataset_using_name(dataset_name): + """Import the module "data/[dataset_name]_dataset.py". + + In the file, the class called DatasetNameDataset() will + be instantiated. It has to be a subclass of BaseDataset, + and it is case-insensitive. + """ + dataset_filename = "data." + dataset_name + "_dataset" + datasetlib = importlib.import_module(dataset_filename) + + dataset = None + target_dataset_name = dataset_name.replace('_', '') + 'dataset' + for name, cls in datasetlib.__dict__.items(): + if name.lower() == target_dataset_name.lower(): + dataset = cls + + if dataset is None: + raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name)) + + return dataset + + +def get_option_setter(dataset_name): + """Return the static method of the dataset class.""" + dataset_class = find_dataset_using_name(dataset_name) + return dataset_class.modify_commandline_options + + +def create_dataset(opt): + """Create a dataset given the option. + + This function wraps the class CustomDatasetDataLoader. + This is the main interface between this package and 'train.py'/'test.py' + """ + dataset_class = find_dataset_using_name(opt.dataset_mode) + datasets = dataset_class(opt) + train_sampler = torch.utils.data.distributed.DistributedSampler(datasets) + data_loader = CustomDatasetDataLoader(opt,datasets,train_sampler) + dataset = data_loader.load_data() + return dataset,train_sampler + + +class CustomDatasetDataLoader(): + """Wrapper class of Dataset class that performs multi-threaded data loading""" + + def __init__(self, opt,dataset,train_sampler): + """Initialize this class + + Step 1: create a dataset instance given the name [dataset_mode] + Step 2: create a multi-threaded data loader. + """ + self.opt = opt + + self.dataset=dataset + + print("dataset [%s] was created" % type(self.dataset).__name__) + if(opt.ngpus_per_node>1 and opt.multiprocessing_distributed>=1): + self.dataloader = torch.utils.data.DataLoader( + self.dataset, + batch_size=opt.batch_size, + shuffle=(train_sampler is None), + pin_memory=False, + num_workers=int(opt.num_threads), + sampler=train_sampler, + drop_last=True) + #self.dataloader = torch.utils.data.DataLoader( + # self.dataset, + # batch_size=opt.batch_size, + # shuffle=not opt.serial_batches, + # num_workers=int(opt.num_threads), + # ) + else: + self.dataloader = torch.utils.data.DataLoader( + self.dataset, + batch_size=opt.batch_size, + shuffle=not opt.serial_batches, + num_workers=int(opt.num_threads), + ) + + def load_data(self): + return self + + def __len__(self): + """Return the number of data in the dataset""" + return min(len(self.dataset), self.opt.max_dataset_size) + + def __iter__(self): + """Return a batch of data""" + for i, data in enumerate(self.dataloader): + if i * self.opt.batch_size >= self.opt.max_dataset_size: + break + yield data diff --git a/PyTorch/contrib/cv/others/CycleGAN/demon.py b/PyTorch/contrib/cv/others/CycleGAN/demon.py index 1090328622f263aaf2a4bc0f89bc9b1ce45dff8f..cbcff3e5542702c29d21089ae167bce2241af40f 100644 --- a/PyTorch/contrib/cv/others/CycleGAN/demon.py +++ b/PyTorch/contrib/cv/others/CycleGAN/demon.py @@ -1,189 +1,189 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import os -import torchvision.transforms as transforms -from PIL import Image -import torch.onnx -from torch.utils.data import Dataset -from torchvision.datasets.folder import IMG_EXTENSIONS -from parse import parse_args -from CycleGAN_NetLoad import load_networks - - -def make_power(img, base): - ow, oh = img.size - h = int(round(oh / base) * base) - w = int(round(ow / base) * base) - if h == oh and w == ow: - return img - - -def preprocess(image_shape): - process = transforms.Compose([ - transforms.Lambda(lambda img: make_power(img, base=4)), - transforms.Resize(image_shape), - transforms.ToTensor(), - transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) - return process - - -def postprocess(img_tensor): - inv_normalize = transforms.Normalize( - mean=(-1, -1, -1), - std=(2.0, 2.0, 2.0)) - to_PIL_image = transforms.ToPILImage() - return to_PIL_image(inv_normalize(img_tensor[0]).clamp(0, 1)) - - -def make_dataset(dir, max_dataset_size=float("inf")): - images = [] - assert os.path.isdir(dir), '%s is not a valid directory' % dir - - for root, _, fnames in sorted(os.walk(dir)): - for fname in fnames: - path = os.path.join(root, fname) - images.append(path) - return images[:min(max_dataset_size, len(images))] - - -def default_loader(path): - return Image.open(path).convert('RGB') - - -class ImageFolder(Dataset): - def __init__(self, root, transform=None, return_paths=True, - loader=default_loader): - imgs = make_dataset(root + '/testA') - if len(imgs) == 0: - raise (RuntimeError("Found 0 images in: " + root + "\n" + - "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) - self.root = root - self.imgs = imgs - self.transform = transform - self.return_paths = return_paths - self.loader = loader - - def __getitem__(self, index): - path = self.imgs[index] - img = self.loader(path) - if self.transform is not None: - img = self.transform(img) - if self.return_paths: - return img, path - else: - return img - - def __len__(self): - return len(self.imgs) - - -def deal_tensor(datas, outputs): - res_img = postprocess(datas) - res_gimg = postprocess(outputs) - - -def main(): - paser = parse_args(True, True) - opt = paser.initialize() - htmlres = '' - - pathroot = './result/' - images_name = 'img' - if (os.path.exists(pathroot + images_name) == False): - os.makedirs(pathroot + images_name) - f = open(pathroot + 'index.html', 'w') - lnetworks = load_networks(opt) - bachsize = opt.batch_size - loc_cpu = 'cpu' - loc = 'npu:1' - transform = preprocess((256, 256)) - model_Ga, _ = lnetworks.get_networks(opt.model_ga_path, opt.model_gb_path) - model_Ga.eval() - datasets = ImageFolder(opt.dataroot, transform) - dataloader = torch.utils.data.DataLoader(datasets, batch_size=bachsize, shuffle=True, num_workers=4) - - count = 0 - for i, (x, x_path) in enumerate(dataloader): - count += 1 - if (count > 10): - break - temp = str(x_path).split('/') - img_name = temp[4].split(',')[0].split('\'')[0] - src_real = temp[3] - src_g = temp[3] + 'G' - if (os.path.exists(pathroot + images_name + '/' + src_real) == False): - os.makedirs(pathroot + images_name + '/' + src_real) - if (os.path.exists(pathroot + images_name + '/' + src_g) == False): - os.makedirs(pathroot + images_name + '/' + src_g) - x1 = postprocess(x) - realsrc = images_name + '/' + src_real + '/' + img_name - fakesrc = images_name + '/' + src_g + '/' + img_name - y = model_Ga(x.to(loc)) - y = postprocess(y.to(loc_cpu)) - x1.save(pathroot + realsrc) - y.save(pathroot + fakesrc) - htmlres += ''' -
-
-

%s

- -
-
-

%s

- -
-
- ''' % (img_name.split('.')[0], realsrc, img_name.split('.')[0] + '_fake', fakesrc) - - htmlshow = """ - - - %s - - """ % (htmlres) - f.write(htmlshow) - f.close() - - -if __name__ == '__main__': - main() +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import os +import torchvision.transforms as transforms +from PIL import Image +import torch.onnx +from torch.utils.data import Dataset +from torchvision.datasets.folder import IMG_EXTENSIONS +from parse import parse_args +from CycleGAN_NetLoad import load_networks + + +def make_power(img, base): + ow, oh = img.size + h = int(round(oh / base) * base) + w = int(round(ow / base) * base) + if h == oh and w == ow: + return img + + +def preprocess(image_shape): + process = transforms.Compose([ + transforms.Lambda(lambda img: make_power(img, base=4)), + transforms.Resize(image_shape), + transforms.ToTensor(), + transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) + return process + + +def postprocess(img_tensor): + inv_normalize = transforms.Normalize( + mean=(-1, -1, -1), + std=(2.0, 2.0, 2.0)) + to_PIL_image = transforms.ToPILImage() + return to_PIL_image(inv_normalize(img_tensor[0]).clamp(0, 1)) + + +def make_dataset(dir, max_dataset_size=float("inf")): + images = [] + assert os.path.isdir(dir), '%s is not a valid directory' % dir + + for root, _, fnames in sorted(os.walk(dir)): + for fname in fnames: + path = os.path.join(root, fname) + images.append(path) + return images[:min(max_dataset_size, len(images))] + + +def default_loader(path): + return Image.open(path).convert('RGB') + + +class ImageFolder(Dataset): + def __init__(self, root, transform=None, return_paths=True, + loader=default_loader): + imgs = make_dataset(root + '/testA') + if len(imgs) == 0: + raise (RuntimeError("Found 0 images in: " + root + "\n" + + "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) + self.root = root + self.imgs = imgs + self.transform = transform + self.return_paths = return_paths + self.loader = loader + + def __getitem__(self, index): + path = self.imgs[index] + img = self.loader(path) + if self.transform is not None: + img = self.transform(img) + if self.return_paths: + return img, path + else: + return img + + def __len__(self): + return len(self.imgs) + + +def deal_tensor(datas, outputs): + res_img = postprocess(datas) + res_gimg = postprocess(outputs) + + +def main(): + paser = parse_args(True, True) + opt = paser.initialize() + htmlres = '' + + pathroot = './result/' + images_name = 'img' + if (os.path.exists(pathroot + images_name) == False): + os.makedirs(pathroot + images_name) + f = open(pathroot + 'index.html', 'w') + lnetworks = load_networks(opt) + bachsize = opt.batch_size + loc_cpu = 'cpu' + loc = 'npu:1' + transform = preprocess((256, 256)) + model_Ga, _ = lnetworks.get_networks(opt.model_ga_path, opt.model_gb_path) + model_Ga.eval() + datasets = ImageFolder(opt.dataroot, transform) + dataloader = torch.utils.data.DataLoader(datasets, batch_size=bachsize, shuffle=True, num_workers=4) + + count = 0 + for i, (x, x_path) in enumerate(dataloader): + count += 1 + if (count > 10): + break + temp = str(x_path).split('/') + img_name = temp[4].split(',')[0].split('\'')[0] + src_real = temp[3] + src_g = temp[3] + 'G' + if (os.path.exists(pathroot + images_name + '/' + src_real) == False): + os.makedirs(pathroot + images_name + '/' + src_real) + if (os.path.exists(pathroot + images_name + '/' + src_g) == False): + os.makedirs(pathroot + images_name + '/' + src_g) + x1 = postprocess(x) + realsrc = images_name + '/' + src_real + '/' + img_name + fakesrc = images_name + '/' + src_g + '/' + img_name + y = model_Ga(x.to(loc)) + y = postprocess(y.to(loc_cpu)) + x1.save(pathroot + realsrc) + y.save(pathroot + fakesrc) + htmlres += ''' +
+
+

%s

+ +
+
+

%s

+ +
+
+ ''' % (img_name.split('.')[0], realsrc, img_name.split('.')[0] + '_fake', fakesrc) + + htmlshow = """ + + + %s + + """ % (htmlres) + f.write(htmlshow) + f.close() + + +if __name__ == '__main__': + main() diff --git a/PyTorch/contrib/cv/others/CycleGAN/models/cycle_gan_model_adapt.py b/PyTorch/contrib/cv/others/CycleGAN/models/cycle_gan_model_adapt.py index 56a9fe915730b1dcaf702ecfa76dfdcaacdb0768..a5c1e39cae4bad192af8de9a3fbc3597a1ab0c09 100644 --- a/PyTorch/contrib/cv/others/CycleGAN/models/cycle_gan_model_adapt.py +++ b/PyTorch/contrib/cv/others/CycleGAN/models/cycle_gan_model_adapt.py @@ -1,421 +1,421 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import os -import sys -from collections import OrderedDict -import torch -import itertools -from util.image_pool import ImagePool -from . import networks_adapt as networks -from torch.nn.parallel import DistributedDataParallel as DDP -# from .npu_fused_adam import NpuFusedAdam as adam -from torch.optim import Adam as adam - -if sys.version_info < (3, 0): - raise RuntimeError("Apex currently only supports Python 3. Aborting.") -try: - import apex - from apex import amp -except ImportError: - amp = None - - -class CycleGANModel(): - """ - This class implements the CycleGAN model, for learning image-to-image translation without paired data. - - The model training requires '--dataset_mode unaligned' dataset. - By default, it uses a '--netG resnet_9blocks' ResNet generator, - a '--netD basic' discriminator (PatchGAN introduced by pix2pix), - and a least-square GANs objective ('--gan_mode lsgan'). - - CycleGAN paper: https://arxiv.org/pdf/1703.10593.pdf - """ - - @staticmethod - def modify_commandline_options(parser, is_train=True): - """Add new dataset-specific options, and rewrite default values for existing options. - - Parameters: - parser -- original option parser - is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. - - Returns: - the modified parser. - - For CycleGAN, in addition to GAN losses, we introduce lambda_A, lambda_B, and lambda_identity for the following losses. - A (source domain), B (target domain). - Generators: G_A: A -> B; G_B: B -> A. - Discriminators: D_A: G_A(A) vs. B; D_B: G_B(B) vs. A. - Forward cycle loss: lambda_A * ||G_B(G_A(A)) - A|| (Eqn. (2) in the paper) - Backward cycle loss: lambda_B * ||G_A(G_B(B)) - B|| (Eqn. (2) in the paper) - Identity loss (optional): lambda_identity * (||G_A(B) - B|| * lambda_B + ||G_B(A) - A|| * lambda_A) (Sec 5.2 "Photo generation from paintings" in the paper) - Dropout is not used in the original CycleGAN paper. - """ - parser.set_defaults(no_dropout=True) # default CycleGAN did not use dropout - if is_train: - parser.add_argument('--lambda_A', type=float, default=10.0, help='weight for cycle loss (A -> B -> A)') - parser.add_argument('--lambda_B', type=float, default=10.0, help='weight for cycle loss (B -> A -> B)') - parser.add_argument('--lambda_identity', type=float, default=0.5, - help='use identity mapping. Setting lambda_identity other than 0 has an effect of' - ' scaling the weight of the identity mapping loss. For example, if the weight of ' - 'the identity loss should be 10 times smaller than the weight of the reconstruction' - ' loss, please set lambda_identity = 0.1') - - return parser - - def __init__(self, opt): - """Initialize the CycleGAN class. - - Parameters: - opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions - """ - # specify the training losses you want to print out. The training/test scripts will call - self.opt = opt - self.isTrain = opt.isTrain - self.optimizers = [] - if (opt.npu < 1): - self.device = torch.device('cuda:{}'.format(self.opt.gpu)) # get device name: CPU or GPU - # self.device = torch.device('cuda:{}'.format(self.gpu_ids[0])) # get device name: CPU or GPU - else: - # self.device = torch.device('npu:{}'.format(self.gpu_ids)) if self.gpu_ids else torch.device( 'cpu') # get device name: CPU or GPU - self.device = torch.device('npu:{}'.format(self.opt.gpu)) # get device name: CPU or GPU - self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) # save all the checkpoints to save_dir - self.loss_names = ['D_A', 'G_A', 'cycle_A', 'idt_A', 'D_B', 'G_B', 'cycle_B', 'idt_B'] - # specify the images you want to save/display. The training/test scripts will call - - visual_names_A = ['real_A', 'fake_B', 'rec_A'] - visual_names_B = ['real_B', 'fake_A', 'rec_B'] - if self.isTrain and self.opt.lambda_identity > 0.0: # if identity loss is used, we also visualize idt_B=G_A(B) ad idt_A=G_A(B) - visual_names_A.append('idt_B') - visual_names_B.append('idt_A') - - self.visual_names = visual_names_A + visual_names_B # combine visualizations for A and B - # specify the models you want to save to the disk. The training/test scripts - # will call and . - if self.isTrain: - self.model_names = ['G_A', 'G_B', 'D_A', 'D_B'] - else: # during test time, only load Gs - self.model_names = ['G_A', 'G_B'] - - # define networks (both Generators and discriminators) - # The naming is different from those used in the paper. - # Code (vs. paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X) - self.netG_A = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, opt.netG, opt.norm, - not opt.no_dropout, opt.init_type, opt.init_gain, self.opt.gpu) - self.netG_B = networks.define_G(opt.output_nc, opt.input_nc, opt.ngf, opt.netG, opt.norm, - not opt.no_dropout, opt.init_type, opt.init_gain, self.opt.gpu) - if self.isTrain: # define discriminators - self.netD_A = networks.define_D(opt.output_nc, opt.ndf, opt.netD, - opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.opt.gpu) - self.netD_B = networks.define_D(opt.input_nc, opt.ndf, opt.netD, - opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.opt.gpu) - - if self.isTrain: - if opt.lambda_identity > 0.0: # only works when input and output images have the same number of channels - assert (opt.input_nc == opt.output_nc) - self.fake_A_pool = ImagePool(opt.pool_size) # create image buffer to store previously generated images - self.fake_B_pool = ImagePool(opt.pool_size) # create image buffer to store previously generated images - # define loss functions - self.criterionGAN = networks.GANLoss(opt.gan_mode).to(self.device) # define GAN loss. - self.criterionCycle = torch.nn.L1Loss() - self.criterionIdt = torch.nn.L1Loss() - # initialize optimizers; schedulers will be automatically created by function . - self.optimizer_G = adam(itertools.chain(self.netG_A.parameters(), self.netG_B.parameters()), - lr=opt.lr, betas=(opt.beta1, 0.999)) - self.optimizer_D = adam(itertools.chain(self.netD_A.parameters(), self.netD_B.parameters()), - lr=opt.lr, betas=(opt.beta1, 0.999)) - self.optimizers.append(self.optimizer_G) - self.optimizers.append(self.optimizer_D) - self.lr_scheduler_G = networks.get_scheduler(self.optimizer_G, self.opt) - self.lr_scheduler_D = networks.get_scheduler(self.optimizer_D, self.opt) - # self.scaler = GradScaler() - amp.register_float_function(torch, 'sigmoid') - amp.register_float_function(torch, 'softmax') - amp.register_float_function(torch, 'tanh') - if (self.opt.isapex): - [self.netG_A, self.netG_B, self.netD_A, self.netD_B], [self.optimizer_G, self.optimizer_D] = \ - amp.initialize([self.netG_A, self.netG_B, self.netD_A, self.netD_B], - [self.optimizer_G, self.optimizer_D], opt_level=self.opt.apex_type, - loss_scale=self.opt.loss_scale) - if (self.opt.distributed >= 1): - temp = bool(1 - opt.npu) - self.netG_A = DDP(self.netG_A, [self.opt.gpu], broadcast_buffers=temp) - self.netG_B = DDP(self.netG_B, [self.opt.gpu], broadcast_buffers=temp) - self.netD_A = DDP(self.netD_A, [self.opt.gpu], broadcast_buffers=temp) - self.netD_B = DDP(self.netD_B, [self.opt.gpu], broadcast_buffers=temp) - - def setup(self, opt): - """Load and print networks; create schedulers - - Parameters: - opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions - """ - if self.isTrain: - self.schedulers = [networks.get_scheduler(optimizer, opt) for optimizer in self.optimizers] - if not self.isTrain or opt.continue_train: - load_suffix = 'iter_%d' % opt.load_iter if opt.load_iter > 0 else opt.epoch - self.load_networks(load_suffix) - self.print_networks(opt.verbose) - - def update_learning_rate(self): - old_lr = self.optimizers[0].param_groups[0]['lr'] - for scheduler in self.schedulers: - if self.opt.lr_policy == 'plateau': - scheduler.step(self.metric) - else: - scheduler.step() - self.lr_scheduler_G.step() - self.lr_scheduler_D.step() - - def set_input(self, input): - """Unpack input data from the dataloader and perform necessary pre-processing steps. - - Parameters: - input (dict): include the data itself and its metadata information. - - The option 'direction' can be used to swap domain A and domain B. - """ - AtoB = self.opt.direction == 'AtoB' - - self.real_A = input['A' if AtoB else 'B'].to(self.device) - self.real_B = input['B' if AtoB else 'A'].to(self.device) - self.image_paths = input['A_paths' if AtoB else 'B_paths'] - - def forward(self): - """Run forward pass; called by both functions and .""" - self.fake_B = self.netG_A(self.real_A) # G_A(A) - self.rec_A = self.netG_B(self.fake_B) # G_B(G_A(A)) - self.fake_A = self.netG_B(self.real_B) # G_B(B) - self.rec_B = self.netG_A(self.fake_A) # G_A(G_B(B)) - - def backward_D_basic(self, netD, real, fake): - """Calculate GAN loss for the discriminator - - Parameters: - netD (network) -- the discriminator D - real (tensor array) -- real images - fake (tensor array) -- images generated by a generator - - Return the discriminator loss. - We also call loss_D.backward() to calculate the gradients. - """ - # Real - pred_real = netD(real) - loss_D_real = self.criterionGAN(pred_real, True) - # Fake - pred_fake = netD(fake.detach()) - loss_D_fake = self.criterionGAN(pred_fake, False) - # Combined loss and calculate gradients - loss_D = (loss_D_real + loss_D_fake) * 0.5 - if (self.opt.isapex): - with amp.scale_loss(loss_D, self.optimizer_D) as scaled_lossd: - scaled_lossd.backward() - else: - loss_D.backward() - return loss_D - - def backward_D_A(self): - """Calculate GAN loss for discriminator D_A""" - fake_B = self.fake_B_pool.query(self.fake_B) - self.loss_D_A = self.backward_D_basic(self.netD_A, self.real_B, fake_B) - - def backward_D_B(self): - """Calculate GAN loss for discriminator D_B""" - fake_A = self.fake_A_pool.query(self.fake_A) - self.loss_D_B = self.backward_D_basic(self.netD_B, self.real_A, fake_A) - - def backward_G(self): - """Calculate the loss for generators G_A and G_B""" - lambda_idt = self.opt.lambda_identity - lambda_A = self.opt.lambda_A - lambda_B = self.opt.lambda_B - # Identity loss - if lambda_idt > 0: - # G_A should be identity if real_B is fed: ||G_A(B) - B|| - self.idt_A = self.netG_A(self.real_B) - self.loss_idt_A = self.criterionIdt(self.idt_A, self.real_B) * lambda_B * lambda_idt - # G_B should be identity if real_A is fed: ||G_B(A) - A|| - self.idt_B = self.netG_B(self.real_A) - self.loss_idt_B = self.criterionIdt(self.idt_B, self.real_A) * lambda_A * lambda_idt - else: - self.loss_idt_A = 0 - self.loss_idt_B = 0 - - # GAN loss D_A(G_A(A)) - self.loss_G_A = self.criterionGAN(self.netD_A(self.fake_B), True) - # GAN loss D_B(G_B(B)) - self.loss_G_B = self.criterionGAN(self.netD_B(self.fake_A), True) - # Forward cycle loss || G_B(G_A(A)) - A|| - self.loss_cycle_A = self.criterionCycle(self.rec_A, self.real_A) * lambda_A - # Backward cycle loss || G_A(G_B(B)) - B|| - self.loss_cycle_B = self.criterionCycle(self.rec_B, self.real_B) * lambda_B - # combined loss and calculate gradients - self.loss_G = self.loss_G_A + self.loss_G_B + self.loss_cycle_A + self.loss_cycle_B + self.loss_idt_A + self.loss_idt_B - if (self.opt.isapex == True): - with amp.scale_loss(self.loss_G, self.optimizer_G) as scaled_lossg: - scaled_lossg.backward() - else: - self.loss_G.backward() - - def optimize_parameters(self): - """Calculate losses, gradients, and update network weights; called in every training iteration""" - # forwar - self.forward() # compute fake images and reconstruction images. - # G_A and G_B - self.set_requires_grad([self.netD_A, self.netD_B], False) # Ds require no gradients when optimizing Gs - self.optimizer_G.zero_grad() # set G_A and G_B's gradients to zero - self.backward_G() # calculate gradients for G_A and G_B - self.optimizer_G.step() # update G_A and G_B's weights - # D_A and D_B - self.set_requires_grad([self.netD_A, self.netD_B], True) - self.optimizer_D.zero_grad() # set D_A and D_B's gradients to zero - self.backward_D_A() # calculate gradients for D_A - self.backward_D_B() # calculate graidents for D_B - self.optimizer_D.step() # update D_A and D_B's weights - - def get_current_visuals(self): - """Return visualization images. train.py will display these images with visdom, and save the images to a HTML""" - visual_ret = OrderedDict() - for name in self.visual_names: - if isinstance(name, str): - visual_ret[name] = getattr(self, name) - return visual_ret - - def get_current_losses(self): - """Return traning losses / errors. train.py will print out these errors on console, and save them to a file""" - errors_ret = OrderedDict() - for name in self.loss_names: - if isinstance(name, str): - errors_ret[name] = float( - getattr(self, 'loss_' + name)) # float(...) works for both scalar tensor and float number - return errors_ret - - def save_networks(self, epoch): - """Save all the networks to the disk. - - Parameters: - epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name) - """ - for name in self.model_names: - if isinstance(name, str): - save_filename = '%s_net_%s.pth' % (epoch, name) - save_path = os.path.join(self.save_dir, save_filename) - save_path1 = os.path.join(self.save_dir, 'a' + save_filename) - net = getattr(self, 'net' + name) - - if self.opt.distributed >= 1 and torch.cuda.is_available(): - torch.save(net.cpu().module.state_dict(), save_path) - if (self.opt.npu >= 1): - net.npu(self.opt.gpu) - else: - net.cuda(self.opt.gpu) - else: - torch.save(net.cpu().state_dict(), save_path) - if (self.opt.npu >= 1): - net.npu(self.opt.gpu) - else: - net.cuda(self.opt.gpu) - - def __patch_instance_norm_state_dict(self, state_dict, module, keys, i=0): - """Fix InstanceNorm checkpoints incompatibility (prior to 0.4)""" - key = keys[i] - if i + 1 == len(keys): # at the end, pointing to a parameter/buffer - if module.__class__.__name__.startswith('InstanceNorm') and \ - (key == 'running_mean' or key == 'running_var'): - if getattr(module, key) is None: - state_dict.pop('.'.join(keys)) - if module.__class__.__name__.startswith('InstanceNorm') and \ - (key == 'num_batches_tracked'): - state_dict.pop('.'.join(keys)) - else: - self.__patch_instance_norm_state_dict(state_dict, getattr(module, key), keys, i + 1) - - def load_networks(self, epoch): - """Load all the networks from the disk. - - Parameters: - epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name) - """ - for name in self.model_names: - if isinstance(name, str): - load_filename = '%s_net_%s.pth' % (epoch, name) - load_path = os.path.join(self.save_dir, load_filename) - net = getattr(self, 'net' + name) - if isinstance(net, torch.nn.DataParallel): - net = net.module - print('loading the model from %s' % load_path) - # if you are using PyTorch newer than 0.4 (e.g., built from - # GitHub source), you can remove str() on self.device - state_dict = torch.load(load_path, map_location=str(self.device)) - if hasattr(state_dict, '_metadata'): - del state_dict._metadata - - # patch InstanceNorm checkpoints prior to 0.4 - for key in list(state_dict.keys()): # need to copy keys here because we mutate in loop - self.__patch_instance_norm_state_dict(state_dict, net, key.split('.')) - net.load_state_dict(state_dict) - - def print_networks(self, verbose): - """Print the total number of parameters in the network and (if verbose) network architecture - - Parameters: - verbose (bool) -- if verbose: print the network architecture - """ - print('---------- Networks initialized -------------') - for name in self.model_names: - if isinstance(name, str): - net = getattr(self, 'net' + name) - num_params = 0 - for param in net.parameters(): - num_params += param.numel() - if verbose: - print(net) - print('[Network %s] Total number of parameters : %.3f M' % (name, num_params / 1e6)) - print('-----------------------------------------------') - - def set_requires_grad(self, nets, requires_grad=False): - """Set requies_grad=Fasle for all the networks to avoid unnecessary computations - Parameters: - nets (network list) -- a list of networks - requires_grad (bool) -- whether the networks require gradients or not - """ - if not isinstance(nets, list): - nets = [nets] - for net in nets: - if net is not None: - for param in net.parameters(): - param.requires_grad = requires_grad - - def compute_visuals(self): - """Calculate additional output images for visdom and HTML visualization""" - pass +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import os +import sys +from collections import OrderedDict +import torch +import itertools +from util.image_pool import ImagePool +from . import networks_adapt as networks +from torch.nn.parallel import DistributedDataParallel as DDP +# from .npu_fused_adam import NpuFusedAdam as adam +from torch.optim import Adam as adam + +if sys.version_info < (3, 0): + raise RuntimeError("Apex currently only supports Python 3. Aborting.") +try: + import apex + from apex import amp +except ImportError: + amp = None + + +class CycleGANModel(): + """ + This class implements the CycleGAN model, for learning image-to-image translation without paired data. + + The model training requires '--dataset_mode unaligned' dataset. + By default, it uses a '--netG resnet_9blocks' ResNet generator, + a '--netD basic' discriminator (PatchGAN introduced by pix2pix), + and a least-square GANs objective ('--gan_mode lsgan'). + + CycleGAN paper: https://arxiv.org/pdf/1703.10593.pdf + """ + + @staticmethod + def modify_commandline_options(parser, is_train=True): + """Add new dataset-specific options, and rewrite default values for existing options. + + Parameters: + parser -- original option parser + is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. + + Returns: + the modified parser. + + For CycleGAN, in addition to GAN losses, we introduce lambda_A, lambda_B, and lambda_identity for the following losses. + A (source domain), B (target domain). + Generators: G_A: A -> B; G_B: B -> A. + Discriminators: D_A: G_A(A) vs. B; D_B: G_B(B) vs. A. + Forward cycle loss: lambda_A * ||G_B(G_A(A)) - A|| (Eqn. (2) in the paper) + Backward cycle loss: lambda_B * ||G_A(G_B(B)) - B|| (Eqn. (2) in the paper) + Identity loss (optional): lambda_identity * (||G_A(B) - B|| * lambda_B + ||G_B(A) - A|| * lambda_A) (Sec 5.2 "Photo generation from paintings" in the paper) + Dropout is not used in the original CycleGAN paper. + """ + parser.set_defaults(no_dropout=True) # default CycleGAN did not use dropout + if is_train: + parser.add_argument('--lambda_A', type=float, default=10.0, help='weight for cycle loss (A -> B -> A)') + parser.add_argument('--lambda_B', type=float, default=10.0, help='weight for cycle loss (B -> A -> B)') + parser.add_argument('--lambda_identity', type=float, default=0.5, + help='use identity mapping. Setting lambda_identity other than 0 has an effect of' + ' scaling the weight of the identity mapping loss. For example, if the weight of ' + 'the identity loss should be 10 times smaller than the weight of the reconstruction' + ' loss, please set lambda_identity = 0.1') + + return parser + + def __init__(self, opt): + """Initialize the CycleGAN class. + + Parameters: + opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions + """ + # specify the training losses you want to print out. The training/test scripts will call + self.opt = opt + self.isTrain = opt.isTrain + self.optimizers = [] + if (opt.npu < 1): + self.device = torch.device('cuda:{}'.format(self.opt.gpu)) # get device name: CPU or GPU + # self.device = torch.device('cuda:{}'.format(self.gpu_ids[0])) # get device name: CPU or GPU + else: + # self.device = torch.device('npu:{}'.format(self.gpu_ids)) if self.gpu_ids else torch.device( 'cpu') # get device name: CPU or GPU + self.device = torch.device('npu:{}'.format(self.opt.gpu)) # get device name: CPU or GPU + self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) # save all the checkpoints to save_dir + self.loss_names = ['D_A', 'G_A', 'cycle_A', 'idt_A', 'D_B', 'G_B', 'cycle_B', 'idt_B'] + # specify the images you want to save/display. The training/test scripts will call + + visual_names_A = ['real_A', 'fake_B', 'rec_A'] + visual_names_B = ['real_B', 'fake_A', 'rec_B'] + if self.isTrain and self.opt.lambda_identity > 0.0: # if identity loss is used, we also visualize idt_B=G_A(B) ad idt_A=G_A(B) + visual_names_A.append('idt_B') + visual_names_B.append('idt_A') + + self.visual_names = visual_names_A + visual_names_B # combine visualizations for A and B + # specify the models you want to save to the disk. The training/test scripts + # will call and . + if self.isTrain: + self.model_names = ['G_A', 'G_B', 'D_A', 'D_B'] + else: # during test time, only load Gs + self.model_names = ['G_A', 'G_B'] + + # define networks (both Generators and discriminators) + # The naming is different from those used in the paper. + # Code (vs. paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X) + self.netG_A = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, opt.netG, opt.norm, + not opt.no_dropout, opt.init_type, opt.init_gain, self.opt.gpu) + self.netG_B = networks.define_G(opt.output_nc, opt.input_nc, opt.ngf, opt.netG, opt.norm, + not opt.no_dropout, opt.init_type, opt.init_gain, self.opt.gpu) + if self.isTrain: # define discriminators + self.netD_A = networks.define_D(opt.output_nc, opt.ndf, opt.netD, + opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.opt.gpu) + self.netD_B = networks.define_D(opt.input_nc, opt.ndf, opt.netD, + opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.opt.gpu) + + if self.isTrain: + if opt.lambda_identity > 0.0: # only works when input and output images have the same number of channels + assert (opt.input_nc == opt.output_nc) + self.fake_A_pool = ImagePool(opt.pool_size) # create image buffer to store previously generated images + self.fake_B_pool = ImagePool(opt.pool_size) # create image buffer to store previously generated images + # define loss functions + self.criterionGAN = networks.GANLoss(opt.gan_mode).to(self.device) # define GAN loss. + self.criterionCycle = torch.nn.L1Loss() + self.criterionIdt = torch.nn.L1Loss() + # initialize optimizers; schedulers will be automatically created by function . + self.optimizer_G = adam(itertools.chain(self.netG_A.parameters(), self.netG_B.parameters()), + lr=opt.lr, betas=(opt.beta1, 0.999)) + self.optimizer_D = adam(itertools.chain(self.netD_A.parameters(), self.netD_B.parameters()), + lr=opt.lr, betas=(opt.beta1, 0.999)) + self.optimizers.append(self.optimizer_G) + self.optimizers.append(self.optimizer_D) + self.lr_scheduler_G = networks.get_scheduler(self.optimizer_G, self.opt) + self.lr_scheduler_D = networks.get_scheduler(self.optimizer_D, self.opt) + # self.scaler = GradScaler() + amp.register_float_function(torch, 'sigmoid') + amp.register_float_function(torch, 'softmax') + amp.register_float_function(torch, 'tanh') + if (self.opt.isapex): + [self.netG_A, self.netG_B, self.netD_A, self.netD_B], [self.optimizer_G, self.optimizer_D] = \ + amp.initialize([self.netG_A, self.netG_B, self.netD_A, self.netD_B], + [self.optimizer_G, self.optimizer_D], opt_level=self.opt.apex_type, + loss_scale=self.opt.loss_scale) + if (self.opt.distributed >= 1): + temp = bool(1 - opt.npu) + self.netG_A = DDP(self.netG_A, [self.opt.gpu], broadcast_buffers=temp) + self.netG_B = DDP(self.netG_B, [self.opt.gpu], broadcast_buffers=temp) + self.netD_A = DDP(self.netD_A, [self.opt.gpu], broadcast_buffers=temp) + self.netD_B = DDP(self.netD_B, [self.opt.gpu], broadcast_buffers=temp) + + def setup(self, opt): + """Load and print networks; create schedulers + + Parameters: + opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions + """ + if self.isTrain: + self.schedulers = [networks.get_scheduler(optimizer, opt) for optimizer in self.optimizers] + if not self.isTrain or opt.continue_train: + load_suffix = 'iter_%d' % opt.load_iter if opt.load_iter > 0 else opt.epoch + self.load_networks(load_suffix) + self.print_networks(opt.verbose) + + def update_learning_rate(self): + old_lr = self.optimizers[0].param_groups[0]['lr'] + for scheduler in self.schedulers: + if self.opt.lr_policy == 'plateau': + scheduler.step(self.metric) + else: + scheduler.step() + self.lr_scheduler_G.step() + self.lr_scheduler_D.step() + + def set_input(self, input): + """Unpack input data from the dataloader and perform necessary pre-processing steps. + + Parameters: + input (dict): include the data itself and its metadata information. + + The option 'direction' can be used to swap domain A and domain B. + """ + AtoB = self.opt.direction == 'AtoB' + + self.real_A = input['A' if AtoB else 'B'].to(self.device) + self.real_B = input['B' if AtoB else 'A'].to(self.device) + self.image_paths = input['A_paths' if AtoB else 'B_paths'] + + def forward(self): + """Run forward pass; called by both functions and .""" + self.fake_B = self.netG_A(self.real_A) # G_A(A) + self.rec_A = self.netG_B(self.fake_B) # G_B(G_A(A)) + self.fake_A = self.netG_B(self.real_B) # G_B(B) + self.rec_B = self.netG_A(self.fake_A) # G_A(G_B(B)) + + def backward_D_basic(self, netD, real, fake): + """Calculate GAN loss for the discriminator + + Parameters: + netD (network) -- the discriminator D + real (tensor array) -- real images + fake (tensor array) -- images generated by a generator + + Return the discriminator loss. + We also call loss_D.backward() to calculate the gradients. + """ + # Real + pred_real = netD(real) + loss_D_real = self.criterionGAN(pred_real, True) + # Fake + pred_fake = netD(fake.detach()) + loss_D_fake = self.criterionGAN(pred_fake, False) + # Combined loss and calculate gradients + loss_D = (loss_D_real + loss_D_fake) * 0.5 + if (self.opt.isapex): + with amp.scale_loss(loss_D, self.optimizer_D) as scaled_lossd: + scaled_lossd.backward() + else: + loss_D.backward() + return loss_D + + def backward_D_A(self): + """Calculate GAN loss for discriminator D_A""" + fake_B = self.fake_B_pool.query(self.fake_B) + self.loss_D_A = self.backward_D_basic(self.netD_A, self.real_B, fake_B) + + def backward_D_B(self): + """Calculate GAN loss for discriminator D_B""" + fake_A = self.fake_A_pool.query(self.fake_A) + self.loss_D_B = self.backward_D_basic(self.netD_B, self.real_A, fake_A) + + def backward_G(self): + """Calculate the loss for generators G_A and G_B""" + lambda_idt = self.opt.lambda_identity + lambda_A = self.opt.lambda_A + lambda_B = self.opt.lambda_B + # Identity loss + if lambda_idt > 0: + # G_A should be identity if real_B is fed: ||G_A(B) - B|| + self.idt_A = self.netG_A(self.real_B) + self.loss_idt_A = self.criterionIdt(self.idt_A, self.real_B) * lambda_B * lambda_idt + # G_B should be identity if real_A is fed: ||G_B(A) - A|| + self.idt_B = self.netG_B(self.real_A) + self.loss_idt_B = self.criterionIdt(self.idt_B, self.real_A) * lambda_A * lambda_idt + else: + self.loss_idt_A = 0 + self.loss_idt_B = 0 + + # GAN loss D_A(G_A(A)) + self.loss_G_A = self.criterionGAN(self.netD_A(self.fake_B), True) + # GAN loss D_B(G_B(B)) + self.loss_G_B = self.criterionGAN(self.netD_B(self.fake_A), True) + # Forward cycle loss || G_B(G_A(A)) - A|| + self.loss_cycle_A = self.criterionCycle(self.rec_A, self.real_A) * lambda_A + # Backward cycle loss || G_A(G_B(B)) - B|| + self.loss_cycle_B = self.criterionCycle(self.rec_B, self.real_B) * lambda_B + # combined loss and calculate gradients + self.loss_G = self.loss_G_A + self.loss_G_B + self.loss_cycle_A + self.loss_cycle_B + self.loss_idt_A + self.loss_idt_B + if (self.opt.isapex == True): + with amp.scale_loss(self.loss_G, self.optimizer_G) as scaled_lossg: + scaled_lossg.backward() + else: + self.loss_G.backward() + + def optimize_parameters(self): + """Calculate losses, gradients, and update network weights; called in every training iteration""" + # forwar + self.forward() # compute fake images and reconstruction images. + # G_A and G_B + self.set_requires_grad([self.netD_A, self.netD_B], False) # Ds require no gradients when optimizing Gs + self.optimizer_G.zero_grad() # set G_A and G_B's gradients to zero + self.backward_G() # calculate gradients for G_A and G_B + self.optimizer_G.step() # update G_A and G_B's weights + # D_A and D_B + self.set_requires_grad([self.netD_A, self.netD_B], True) + self.optimizer_D.zero_grad() # set D_A and D_B's gradients to zero + self.backward_D_A() # calculate gradients for D_A + self.backward_D_B() # calculate graidents for D_B + self.optimizer_D.step() # update D_A and D_B's weights + + def get_current_visuals(self): + """Return visualization images. train.py will display these images with visdom, and save the images to a HTML""" + visual_ret = OrderedDict() + for name in self.visual_names: + if isinstance(name, str): + visual_ret[name] = getattr(self, name) + return visual_ret + + def get_current_losses(self): + """Return traning losses / errors. train.py will print out these errors on console, and save them to a file""" + errors_ret = OrderedDict() + for name in self.loss_names: + if isinstance(name, str): + errors_ret[name] = float( + getattr(self, 'loss_' + name)) # float(...) works for both scalar tensor and float number + return errors_ret + + def save_networks(self, epoch): + """Save all the networks to the disk. + + Parameters: + epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name) + """ + for name in self.model_names: + if isinstance(name, str): + save_filename = '%s_net_%s.pth' % (epoch, name) + save_path = os.path.join(self.save_dir, save_filename) + save_path1 = os.path.join(self.save_dir, 'a' + save_filename) + net = getattr(self, 'net' + name) + + if self.opt.distributed >= 1 and torch.cuda.is_available(): + torch.save(net.cpu().module.state_dict(), save_path) + if (self.opt.npu >= 1): + net.npu(self.opt.gpu) + else: + net.cuda(self.opt.gpu) + else: + torch.save(net.cpu().state_dict(), save_path) + if (self.opt.npu >= 1): + net.npu(self.opt.gpu) + else: + net.cuda(self.opt.gpu) + + def __patch_instance_norm_state_dict(self, state_dict, module, keys, i=0): + """Fix InstanceNorm checkpoints incompatibility (prior to 0.4)""" + key = keys[i] + if i + 1 == len(keys): # at the end, pointing to a parameter/buffer + if module.__class__.__name__.startswith('InstanceNorm') and \ + (key == 'running_mean' or key == 'running_var'): + if getattr(module, key) is None: + state_dict.pop('.'.join(keys)) + if module.__class__.__name__.startswith('InstanceNorm') and \ + (key == 'num_batches_tracked'): + state_dict.pop('.'.join(keys)) + else: + self.__patch_instance_norm_state_dict(state_dict, getattr(module, key), keys, i + 1) + + def load_networks(self, epoch): + """Load all the networks from the disk. + + Parameters: + epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name) + """ + for name in self.model_names: + if isinstance(name, str): + load_filename = '%s_net_%s.pth' % (epoch, name) + load_path = os.path.join(self.save_dir, load_filename) + net = getattr(self, 'net' + name) + if isinstance(net, torch.nn.DataParallel): + net = net.module + print('loading the model from %s' % load_path) + # if you are using PyTorch newer than 0.4 (e.g., built from + # GitHub source), you can remove str() on self.device + state_dict = torch.load(load_path, map_location=str(self.device)) + if hasattr(state_dict, '_metadata'): + del state_dict._metadata + + # patch InstanceNorm checkpoints prior to 0.4 + for key in list(state_dict.keys()): # need to copy keys here because we mutate in loop + self.__patch_instance_norm_state_dict(state_dict, net, key.split('.')) + net.load_state_dict(state_dict) + + def print_networks(self, verbose): + """Print the total number of parameters in the network and (if verbose) network architecture + + Parameters: + verbose (bool) -- if verbose: print the network architecture + """ + print('---------- Networks initialized -------------') + for name in self.model_names: + if isinstance(name, str): + net = getattr(self, 'net' + name) + num_params = 0 + for param in net.parameters(): + num_params += param.numel() + if verbose: + print(net) + print('[Network %s] Total number of parameters : %.3f M' % (name, num_params / 1e6)) + print('-----------------------------------------------') + + def set_requires_grad(self, nets, requires_grad=False): + """Set requies_grad=Fasle for all the networks to avoid unnecessary computations + Parameters: + nets (network list) -- a list of networks + requires_grad (bool) -- whether the networks require gradients or not + """ + if not isinstance(nets, list): + nets = [nets] + for net in nets: + if net is not None: + for param in net.parameters(): + param.requires_grad = requires_grad + + def compute_visuals(self): + """Calculate additional output images for visdom and HTML visualization""" + pass diff --git a/PyTorch/contrib/cv/others/CycleGAN/modelzoo_level.txt b/PyTorch/contrib/cv/others/CycleGAN/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/others/CycleGAN/modelzoo_level.txt +++ b/PyTorch/contrib/cv/others/CycleGAN/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CycleGAN/parse.py b/PyTorch/contrib/cv/others/CycleGAN/parse.py index 64e6560084a0b19548d46c13b90079560c73e5dd..a58d344312f5af5e8bfe2f52826d19c5864e00e9 100644 --- a/PyTorch/contrib/cv/others/CycleGAN/parse.py +++ b/PyTorch/contrib/cv/others/CycleGAN/parse.py @@ -1,236 +1,236 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import argparse -import torch - - -class parse_args(): - def __init__(self, isTrain=True, isTest=False): - self.isTrain = isTrain - self.isTest = isTest - self.parser = argparse.ArgumentParser(description='Pytorch CycleGAN training') - - def initialize(self): - parser = self.parser - parser.add_argument('--model_ga_path', type=str, - default='./checkpoints_8pbs1/maps_cycle_gan/175_pu0_net_G_A.pth', - help='path for modelga') - parser.add_argument('--model_gb_path', type=str, - default='./checkpoints_8pbs1/maps_cycle_gan/175_pu0_net_G_B.pth', - help='path for modelga') - parser.add_argument('--prof', type=int, default=1, help='whether to get prof file') - parser.add_argument('--num_epoch', type=int, default=240, help='whether to get prof file1') - parser.add_argument('--line_scale', type=float, default=2, help='whether to get prof file1') - parser.add_argument('--num_epoch_start', type=int, default=0, help='whether to get prof file1') - parser.add_argument('--loadweight', default='latest', help='whether to get prof file1') - parser.add_argument('--prof_file', type=str, default='./output.prof', help='whether to get prof file') - parser.add_argument('--log_path', type=str, default='gpu1p.txt', help='whether to get prof file') - parser.add_argument('--multiprocessing_distributed', type=int, default=1, - help='Use multi-processing distributed training to launch,if it is eaqul to 1 or more than ,start to npu/gpu Multi-card training ') - parser.add_argument('--world_size', type=int, default=1, help='word__size') - parser.add_argument('--distributed', type=int, default=1, - help='whether to use distributed to fastern training,if it is eaqul to 1 or more than ,start to npu/gpu Multi-card training') - parser.add_argument('--rank', default=0, type=int, help='rank') - parser.add_argument('--gpu', default=None, type=int, help='GPU id to use.') - parser.add_argument('--npu', type=int, default=0, help='whether to use npu to fastern training') - parser.add_argument('--pu_ids', type=str, default='0,1', - help='gpu ids(npu ids): e.g. 0 0,1,2, 0,2. use -1 for CPU') - - parser.add_argument('--isapex', default=True, help='whether to use apex to fastern training') - parser.add_argument('--apex_type', type=str, default="O1", help='O0,O1,O2,O3') - parser.add_argument('--loss_scale', default=None, help='loss_scale:1,128,dynamic') - parser.add_argument('--dataroot', type=str, default='./datasets/maps', - help='path to images (should have subfolders trainA, trainB, valA, valB, etc)') - parser.add_argument('--name', type=str, default='maps_cycle_gan', - help='name of the experiment. It decides where to store samples and models') - - parser.add_argument('--checkpoints_dir', type=str, default='./re_checkpoints2p_bs1', - help='models are saved here') - # model parameters - parser.add_argument('--model', type=str, default='cycle_gan', - help='chooses which model to use. [cycle_gan| pix2pix | test | colorization]') - parser.add_argument('--input_nc', type=int, default=3, - help='# of input image channels: 3 for RGB and 1 for grayscale') - parser.add_argument('--output_nc', type=int, default=3, - help='# of output image channels: 3 for RGB and 1 for grayscale') - parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in the last conv layer') - parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in the first conv layer') - parser.add_argument('--netD', type=str, default='basic', - help='specify discriminator architecture [basic | n_layers | pixel].' - ' The basic model is a 70x70 PatchGAN. n_layers allows you to ' - 'specify the layers in the discriminator') - parser.add_argument('--netG', type=str, default='resnet_9blocks', - help='specify generator architecture [resnet_9blocks | resnet_6blocks | ' - 'unet_256 | unet_128]') - parser.add_argument('--n_layers_D', type=int, default=3, help='only used if netD==n_layers') - parser.add_argument('--norm', type=str, default='instance', - help='instance normalization or batch normalization [instance | batch | none]') - parser.add_argument('--init_type', type=str, default='normal', - help='network initialization [normal | xavier | kaiming | orthogonal]') - parser.add_argument('--init_gain', type=float, default=0.02, - help='scaling factor for normal, xavier and orthogonal.') - parser.add_argument('--no_dropout', action='store_true', help='no dropout for the generator') - # dataset parameters - parser.add_argument('--dataset_mode', type=str, default='unaligned', - help='chooses how datasets are loaded. [unaligned | aligned | single | colorization]') - parser.add_argument('--direction', type=str, default='AtoB', help='AtoB or BtoA') - parser.add_argument('--serial_batches', action='store_true', - help='if true, takes images in order to make batches, otherwise takes them randomly') - parser.add_argument('--num_threads', default=8, type=int, help='# threads for loading data') - parser.add_argument('--batch_size', type=int, default=4, help='input batch size') - parser.add_argument('--load_size', type=int, default=286, help='scale images to this size') - parser.add_argument('--crop_size', type=int, default=256, help='then crop to this size') - parser.add_argument('--max_dataset_size', type=int, default=float("inf"), - help='Maximum number of samples allowed per dataset. If the dataset directory ' - 'contains more than max_dataset_size, only a subset is loaded.') - parser.add_argument('--preprocess', type=str, default='resize_and_crop', - help='scaling and cropping of images at load time [resize_and_crop | crop | ' - 'scale_width | scale_width_and_crop | none]') - parser.add_argument('--no_flip', action='store_true', - help='if specified, do not flip the images for data augmentation') - parser.add_argument('--display_winsize', type=int, default=256, - help='display window size for both visdom and HTML') - # additional parameters - parser.add_argument('--epoch', type=str, default='latest', - help='which epoch to load? set to latest to use latest cached model') - parser.add_argument('--load_iter', type=int, default='0', - help='which iteration to load? if load_iter > 0, the code will load models by iter_' - '[load_iter]; otherwise, the code will load models by [epoch]') - parser.add_argument('--verbose', action='store_true', help='if specified, print more debugging information') - parser.add_argument('--suffix', default='', type=str, - help='customized suffix: opt.name = opt.name + suffix: e.g., {model}_{netG}_size{load_size}') - parser.add_argument( - "--cache-dataset", - dest="cache_dataset", - help="Cache the datasets for quicker initialization. It also serializes the transforms", - action="store_true", - ) - parser.set_defaults(no_dropout=True) # default CycleGAN did not use dropout - if (self.isTrain): - # network saving and loading parameters - parser.add_argument('--display_freq', type=int, default=400, - help='frequency of showing training results on screen') - parser.add_argument('--display_ncols', type=int, default=4, - help='if positive, display all images in a single visdom web panel with ' - 'certain number of images per row.') - parser.add_argument('--display_id', type=int, default=-1, help='window id of the web display') - parser.add_argument('--display_server', type=str, default="http://localhost", - help='visdom server of the web display') - parser.add_argument('--display_env', type=str, default='main', - help='visdom display environment name (default is "main")') - parser.add_argument('--display_port', type=int, default=8097, help='visdom port of the web display') - parser.add_argument('--update_html_freq', type=int, default=1000, - help='frequency of saving training results to html') - parser.add_argument('--print_freq', type=int, default=100, - help='frequency of showing training results on console') - parser.add_argument('--no_html', action='store_true', - help='do not save intermediate training results to [' - 'opt.checkpoints_dir]/[opt.name]/web/') - # network saving and loading parameters - parser.add_argument('--save_latest_freq', type=int, default=5000, - help='frequency of saving the latest results') - parser.add_argument('--save_epoch_freq', type=int, default=5, - help='frequency of saving checkpoints at the end of epochs') - parser.add_argument('--save_by_iter', action='store_true', help='whether saves model by iteration') - parser.add_argument('--continue_train', action='store_true', - help='continue training: load the latest model') - parser.add_argument('--epoch_count', type=int, default=1, - help='the starting epoch count, we save the model ' - 'by , +, ...') - parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc') - # training parameters - parser.add_argument('--n_epochs', type=int, default=100, - help='number of epochs with the initial learning rate') - parser.add_argument('--n_epochs_decay', type=int, default=100, - help='number of epochs to linearly decay learning rate to zero') - parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam') - parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam') - parser.add_argument('--gan_mode', type=str, default='lsgan', - help='the type of GAN objective. [vanilla| lsgan | wgangp]. vanilla GAN loss is' - ' the cross-entropy objective used in the original GAN paper.') - parser.add_argument('--pool_size', type=int, default=50, - help='the size of image buffer that stores previously generated images') - parser.add_argument('--lr_policy', type=str, default='linear', - help='learning rate policy. [linear | step | plateau | cosine]') - parser.add_argument('--lr_decay_iters', type=int, default=50, - help='multiply by a gamma every lr_decay_iters iterations') - parser.add_argument('--lambda_A', type=float, default=10.0, help='weight for cycle loss (A -> B -> A)') - parser.add_argument('--lambda_B', type=float, default=10.0, help='weight for cycle loss (B -> A -> B)') - parser.add_argument('--lambda_identity', type=float, default=0.5, - help='use identity mapping. Setting lambda_identity other than 0 has an effect of' - ' scaling the weight of the identity mapping loss. For example, if the weight of' - ' the identity loss should be 10 times smaller than the weight of the ' - 'reconstruction loss, please set lambda_identity = 0.1') - parser = parser.parse_args() - parser.process_device_map = self.device_id_to_process_device_map(parser.pu_ids) - return parser - - def device_id_to_process_device_map(self, device_list): - devices = device_list.split(",") - devices = [int(x) for x in devices] - devices.sort() - - process_device_map = dict() - for process_id, device_id in enumerate(devices): - process_device_map[process_id] = device_id - return process_device_map - - def change_parser(self, isTrain=True, isTest=False): - self.isTest = isTest - self.isTrain = isTrain - self.parser = None - return self.initialize() - - def printParser(self): - pasers = self.parser.parse_args() - message = '' - message += '----------------- Options ---------------\n' - for k, v in sorted(vars(pasers).items()): - comment = '' - default = self.parser.get_default(k) - # if v != default: - # comment = '\t[default: %s]' % str(default) - message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment) - message += '----------------- End -------------------' - print(message) - - def init_distributed_mode(self, ngpus_per_node, gpu): - opt = self.parser.parse_args() - if opt.multiprocessing_distributed >= 1: - # For multiprocessing distributed training, rank needs to be the - # global rank among all the processes - opt.rank = opt.rank * ngpus_per_node + gpu - if (opt.npu < 1): - torch.distributed.init_process_group(backend='nccl', init_method='env://', world_size=opt.world_size, - rank=opt.rank) - elif (opt.npu >= 1): - torch.distributed.init_process_group(backend='hccl', world_size=opt.world_size, rank=opt.rank) +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import argparse +import torch + + +class parse_args(): + def __init__(self, isTrain=True, isTest=False): + self.isTrain = isTrain + self.isTest = isTest + self.parser = argparse.ArgumentParser(description='Pytorch CycleGAN training') + + def initialize(self): + parser = self.parser + parser.add_argument('--model_ga_path', type=str, + default='./checkpoints_8pbs1/maps_cycle_gan/175_pu0_net_G_A.pth', + help='path for modelga') + parser.add_argument('--model_gb_path', type=str, + default='./checkpoints_8pbs1/maps_cycle_gan/175_pu0_net_G_B.pth', + help='path for modelga') + parser.add_argument('--prof', type=int, default=1, help='whether to get prof file') + parser.add_argument('--num_epoch', type=int, default=240, help='whether to get prof file1') + parser.add_argument('--line_scale', type=float, default=2, help='whether to get prof file1') + parser.add_argument('--num_epoch_start', type=int, default=0, help='whether to get prof file1') + parser.add_argument('--loadweight', default='latest', help='whether to get prof file1') + parser.add_argument('--prof_file', type=str, default='./output.prof', help='whether to get prof file') + parser.add_argument('--log_path', type=str, default='gpu1p.txt', help='whether to get prof file') + parser.add_argument('--multiprocessing_distributed', type=int, default=1, + help='Use multi-processing distributed training to launch,if it is eaqul to 1 or more than ,start to npu/gpu Multi-card training ') + parser.add_argument('--world_size', type=int, default=1, help='word__size') + parser.add_argument('--distributed', type=int, default=1, + help='whether to use distributed to fastern training,if it is eaqul to 1 or more than ,start to npu/gpu Multi-card training') + parser.add_argument('--rank', default=0, type=int, help='rank') + parser.add_argument('--gpu', default=None, type=int, help='GPU id to use.') + parser.add_argument('--npu', type=int, default=0, help='whether to use npu to fastern training') + parser.add_argument('--pu_ids', type=str, default='0,1', + help='gpu ids(npu ids): e.g. 0 0,1,2, 0,2. use -1 for CPU') + + parser.add_argument('--isapex', default=True, help='whether to use apex to fastern training') + parser.add_argument('--apex_type', type=str, default="O1", help='O0,O1,O2,O3') + parser.add_argument('--loss_scale', default=None, help='loss_scale:1,128,dynamic') + parser.add_argument('--dataroot', type=str, default='./datasets/maps', + help='path to images (should have subfolders trainA, trainB, valA, valB, etc)') + parser.add_argument('--name', type=str, default='maps_cycle_gan', + help='name of the experiment. It decides where to store samples and models') + + parser.add_argument('--checkpoints_dir', type=str, default='./re_checkpoints2p_bs1', + help='models are saved here') + # model parameters + parser.add_argument('--model', type=str, default='cycle_gan', + help='chooses which model to use. [cycle_gan| pix2pix | test | colorization]') + parser.add_argument('--input_nc', type=int, default=3, + help='# of input image channels: 3 for RGB and 1 for grayscale') + parser.add_argument('--output_nc', type=int, default=3, + help='# of output image channels: 3 for RGB and 1 for grayscale') + parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in the last conv layer') + parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in the first conv layer') + parser.add_argument('--netD', type=str, default='basic', + help='specify discriminator architecture [basic | n_layers | pixel].' + ' The basic model is a 70x70 PatchGAN. n_layers allows you to ' + 'specify the layers in the discriminator') + parser.add_argument('--netG', type=str, default='resnet_9blocks', + help='specify generator architecture [resnet_9blocks | resnet_6blocks | ' + 'unet_256 | unet_128]') + parser.add_argument('--n_layers_D', type=int, default=3, help='only used if netD==n_layers') + parser.add_argument('--norm', type=str, default='instance', + help='instance normalization or batch normalization [instance | batch | none]') + parser.add_argument('--init_type', type=str, default='normal', + help='network initialization [normal | xavier | kaiming | orthogonal]') + parser.add_argument('--init_gain', type=float, default=0.02, + help='scaling factor for normal, xavier and orthogonal.') + parser.add_argument('--no_dropout', action='store_true', help='no dropout for the generator') + # dataset parameters + parser.add_argument('--dataset_mode', type=str, default='unaligned', + help='chooses how datasets are loaded. [unaligned | aligned | single | colorization]') + parser.add_argument('--direction', type=str, default='AtoB', help='AtoB or BtoA') + parser.add_argument('--serial_batches', action='store_true', + help='if true, takes images in order to make batches, otherwise takes them randomly') + parser.add_argument('--num_threads', default=8, type=int, help='# threads for loading data') + parser.add_argument('--batch_size', type=int, default=4, help='input batch size') + parser.add_argument('--load_size', type=int, default=286, help='scale images to this size') + parser.add_argument('--crop_size', type=int, default=256, help='then crop to this size') + parser.add_argument('--max_dataset_size', type=int, default=float("inf"), + help='Maximum number of samples allowed per dataset. If the dataset directory ' + 'contains more than max_dataset_size, only a subset is loaded.') + parser.add_argument('--preprocess', type=str, default='resize_and_crop', + help='scaling and cropping of images at load time [resize_and_crop | crop | ' + 'scale_width | scale_width_and_crop | none]') + parser.add_argument('--no_flip', action='store_true', + help='if specified, do not flip the images for data augmentation') + parser.add_argument('--display_winsize', type=int, default=256, + help='display window size for both visdom and HTML') + # additional parameters + parser.add_argument('--epoch', type=str, default='latest', + help='which epoch to load? set to latest to use latest cached model') + parser.add_argument('--load_iter', type=int, default='0', + help='which iteration to load? if load_iter > 0, the code will load models by iter_' + '[load_iter]; otherwise, the code will load models by [epoch]') + parser.add_argument('--verbose', action='store_true', help='if specified, print more debugging information') + parser.add_argument('--suffix', default='', type=str, + help='customized suffix: opt.name = opt.name + suffix: e.g., {model}_{netG}_size{load_size}') + parser.add_argument( + "--cache-dataset", + dest="cache_dataset", + help="Cache the datasets for quicker initialization. It also serializes the transforms", + action="store_true", + ) + parser.set_defaults(no_dropout=True) # default CycleGAN did not use dropout + if (self.isTrain): + # network saving and loading parameters + parser.add_argument('--display_freq', type=int, default=400, + help='frequency of showing training results on screen') + parser.add_argument('--display_ncols', type=int, default=4, + help='if positive, display all images in a single visdom web panel with ' + 'certain number of images per row.') + parser.add_argument('--display_id', type=int, default=-1, help='window id of the web display') + parser.add_argument('--display_server', type=str, default="http://localhost", + help='visdom server of the web display') + parser.add_argument('--display_env', type=str, default='main', + help='visdom display environment name (default is "main")') + parser.add_argument('--display_port', type=int, default=8097, help='visdom port of the web display') + parser.add_argument('--update_html_freq', type=int, default=1000, + help='frequency of saving training results to html') + parser.add_argument('--print_freq', type=int, default=100, + help='frequency of showing training results on console') + parser.add_argument('--no_html', action='store_true', + help='do not save intermediate training results to [' + 'opt.checkpoints_dir]/[opt.name]/web/') + # network saving and loading parameters + parser.add_argument('--save_latest_freq', type=int, default=5000, + help='frequency of saving the latest results') + parser.add_argument('--save_epoch_freq', type=int, default=5, + help='frequency of saving checkpoints at the end of epochs') + parser.add_argument('--save_by_iter', action='store_true', help='whether saves model by iteration') + parser.add_argument('--continue_train', action='store_true', + help='continue training: load the latest model') + parser.add_argument('--epoch_count', type=int, default=1, + help='the starting epoch count, we save the model ' + 'by , +, ...') + parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc') + # training parameters + parser.add_argument('--n_epochs', type=int, default=100, + help='number of epochs with the initial learning rate') + parser.add_argument('--n_epochs_decay', type=int, default=100, + help='number of epochs to linearly decay learning rate to zero') + parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam') + parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam') + parser.add_argument('--gan_mode', type=str, default='lsgan', + help='the type of GAN objective. [vanilla| lsgan | wgangp]. vanilla GAN loss is' + ' the cross-entropy objective used in the original GAN paper.') + parser.add_argument('--pool_size', type=int, default=50, + help='the size of image buffer that stores previously generated images') + parser.add_argument('--lr_policy', type=str, default='linear', + help='learning rate policy. [linear | step | plateau | cosine]') + parser.add_argument('--lr_decay_iters', type=int, default=50, + help='multiply by a gamma every lr_decay_iters iterations') + parser.add_argument('--lambda_A', type=float, default=10.0, help='weight for cycle loss (A -> B -> A)') + parser.add_argument('--lambda_B', type=float, default=10.0, help='weight for cycle loss (B -> A -> B)') + parser.add_argument('--lambda_identity', type=float, default=0.5, + help='use identity mapping. Setting lambda_identity other than 0 has an effect of' + ' scaling the weight of the identity mapping loss. For example, if the weight of' + ' the identity loss should be 10 times smaller than the weight of the ' + 'reconstruction loss, please set lambda_identity = 0.1') + parser = parser.parse_args() + parser.process_device_map = self.device_id_to_process_device_map(parser.pu_ids) + return parser + + def device_id_to_process_device_map(self, device_list): + devices = device_list.split(",") + devices = [int(x) for x in devices] + devices.sort() + + process_device_map = dict() + for process_id, device_id in enumerate(devices): + process_device_map[process_id] = device_id + return process_device_map + + def change_parser(self, isTrain=True, isTest=False): + self.isTest = isTest + self.isTrain = isTrain + self.parser = None + return self.initialize() + + def printParser(self): + pasers = self.parser.parse_args() + message = '' + message += '----------------- Options ---------------\n' + for k, v in sorted(vars(pasers).items()): + comment = '' + default = self.parser.get_default(k) + # if v != default: + # comment = '\t[default: %s]' % str(default) + message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment) + message += '----------------- End -------------------' + print(message) + + def init_distributed_mode(self, ngpus_per_node, gpu): + opt = self.parser.parse_args() + if opt.multiprocessing_distributed >= 1: + # For multiprocessing distributed training, rank needs to be the + # global rank among all the processes + opt.rank = opt.rank * ngpus_per_node + gpu + if (opt.npu < 1): + torch.distributed.init_process_group(backend='nccl', init_method='env://', world_size=opt.world_size, + rank=opt.rank) + elif (opt.npu >= 1): + torch.distributed.init_process_group(backend='hccl', world_size=opt.world_size, rank=opt.rank) diff --git a/PyTorch/contrib/cv/others/CycleGAN/requirements.txt b/PyTorch/contrib/cv/others/CycleGAN/requirements.txt index 4038ca0bc4742f5fd062ff7a7456ff8cf01fdcd7..8a81295f99d9675b70c8644abaa5efbe4daa9e13 100644 --- a/PyTorch/contrib/cv/others/CycleGAN/requirements.txt +++ b/PyTorch/contrib/cv/others/CycleGAN/requirements.txt @@ -1,4 +1,4 @@ -torch>=1.4.0 -torchvision>=0.5.0 -dominate>=2.4.0 +torch>=1.4.0 +torchvision>=0.5.0 +dominate>=2.4.0 visdom>=0.1.8.8 \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/CycleGAN/train.py b/PyTorch/contrib/cv/others/CycleGAN/train.py index 4396d1904c452ae9be682aaccaf8f27313e57a28..0e54494a621b9cc00b8e37b01a57c6d1ba1bde87 100644 --- a/PyTorch/contrib/cv/others/CycleGAN/train.py +++ b/PyTorch/contrib/cv/others/CycleGAN/train.py @@ -1,130 +1,130 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import os -import time -import torch -from dataloader import create_dataset -from parse import parse_args -from util.visualizer_adapt import Visualizer -import torch.multiprocessing as mp -from models.cycle_gan_model_adapt import CycleGANModel as create_model -from torch import distributed as dist - - -def main(opt): - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '23112' - if opt.distributed >= 1: - ngpus_per_node = len(opt.process_device_map) - opt.ngpus_per_node = ngpus_per_node - if (ngpus_per_node == 1): - ngpus_per_node = 0 - opt.total_iters = 0 - if opt.multiprocessing_distributed >= 1: - opt.world_size = ngpus_per_node * opt.world_size - mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, opt)) - else: - main_worker(ngpus_per_node, opt, opt) - - -def main_worker(gpu, ngpus_per_node, args): - opt = args - print([args.process_device_map, gpu]) - opt.gpu = args.process_device_map[gpu] - if (opt.distributed >= 1): - opt.rank = gpu - if opt.multiprocessing_distributed >= 1: - opt.rank = gpu - if (opt.npu < 1): - torch.distributed.init_process_group(backend='nccl', init_method='env://', world_size=opt.world_size, - rank=opt.rank) - elif (opt.npu >= 1): - torch.npu.set_device(gpu) - torch.distributed.init_process_group(backend='hccl', world_size=opt.world_size, rank=opt.rank) - dataset, train_sampler = create_dataset(opt) # create a dataset given opt.dataset_mode and other options - dataset_size = len(dataset) # get the number of images in the dataset. - print('The number of training images = %d' % dataset_size) - opt.isTrain = True - model = create_model(opt) # create a model given opt.model and other options - model.setup(opt) - visualizer = Visualizer(opt) # create a visualizer that display/save images and plots - for epoch in range(opt.num_epoch_start, opt.num_epoch): - visualizer.reset() # reset the visualizer: make sure it saves the results to HTML at least once every epoch - if (opt.ngpus_per_node > 1): - train_sampler.set_epoch(epoch) - for i, data in enumerate(dataset): # inner loop within one epoch - iter_start_time = time.time() # timer for computation per iteration - opt.total_iters += (opt.batch_size * opt.ngpus_per_node) - if (opt.prof >= 1 and i > 10): - if (opt.npu == False): - with torch.autograd.profiler.profile(use_cuda=True) as prof: - model.set_input(data) - model.optimize_parameters() - print(prof.key_averages().table()) - prof.export_chrome_trace(opt.prof_file) # "output.prof" - opt.prof = False - else: - with torch.autograd.profiler.profile(use_npu=True) as prof: - model.set_input(data) - model.optimize_parameters() - print(prof.key_averages().table()) - prof.export_chrome_trace(opt.prof_file) # - opt.prof = False - else: - model.set_input(data) - model.optimize_parameters() - if opt.total_iters % opt.save_latest_freq == 0: # print training losses and save logging information to the disk - model.save_networks(epoch) - # model.save_networks(epoch) - if opt.total_iters % opt.display_freq == 0: # display images on visdom and save images to a HTML file - t_comp = (time.time() - iter_start_time) / opt.batch_size - fps = opt.batch_size * opt.ngpus_per_node / t_comp - losses = model.get_current_losses() - visualizer.print_current_losses(epoch, fps, losses, t_comp) - # print_current_losses(opt, epoch, fps, losses, t_comp) - save_result = opt.total_iters % opt.update_html_freq == 0 - model.compute_visuals() - visualizer.display_current_results(model.get_current_visuals(), epoch, save_result) - model.update_learning_rate() # Update learning rates - - if epoch % opt.save_epoch_freq == 0: # cache our model every epochs - print('saving the model at the end of epoch %d, iters %d' % (epoch, opt.total_iters)) - model.save_networks('latest_pu' + str(opt.gpu)) - model.save_networks(str(epoch) + '_pu' + str(opt.gpu)) - dist.barrier() - - -if __name__ == '__main__': - paser = parse_args(True, False) - opt = paser.initialize() - paser.printParser() - main(opt) +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import os +import time +import torch +from dataloader import create_dataset +from parse import parse_args +from util.visualizer_adapt import Visualizer +import torch.multiprocessing as mp +from models.cycle_gan_model_adapt import CycleGANModel as create_model +from torch import distributed as dist + + +def main(opt): + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = '23112' + if opt.distributed >= 1: + ngpus_per_node = len(opt.process_device_map) + opt.ngpus_per_node = ngpus_per_node + if (ngpus_per_node == 1): + ngpus_per_node = 0 + opt.total_iters = 0 + if opt.multiprocessing_distributed >= 1: + opt.world_size = ngpus_per_node * opt.world_size + mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, opt)) + else: + main_worker(ngpus_per_node, opt, opt) + + +def main_worker(gpu, ngpus_per_node, args): + opt = args + print([args.process_device_map, gpu]) + opt.gpu = args.process_device_map[gpu] + if (opt.distributed >= 1): + opt.rank = gpu + if opt.multiprocessing_distributed >= 1: + opt.rank = gpu + if (opt.npu < 1): + torch.distributed.init_process_group(backend='nccl', init_method='env://', world_size=opt.world_size, + rank=opt.rank) + elif (opt.npu >= 1): + torch.npu.set_device(gpu) + torch.distributed.init_process_group(backend='hccl', world_size=opt.world_size, rank=opt.rank) + dataset, train_sampler = create_dataset(opt) # create a dataset given opt.dataset_mode and other options + dataset_size = len(dataset) # get the number of images in the dataset. + print('The number of training images = %d' % dataset_size) + opt.isTrain = True + model = create_model(opt) # create a model given opt.model and other options + model.setup(opt) + visualizer = Visualizer(opt) # create a visualizer that display/save images and plots + for epoch in range(opt.num_epoch_start, opt.num_epoch): + visualizer.reset() # reset the visualizer: make sure it saves the results to HTML at least once every epoch + if (opt.ngpus_per_node > 1): + train_sampler.set_epoch(epoch) + for i, data in enumerate(dataset): # inner loop within one epoch + iter_start_time = time.time() # timer for computation per iteration + opt.total_iters += (opt.batch_size * opt.ngpus_per_node) + if (opt.prof >= 1 and i > 10): + if (opt.npu == False): + with torch.autograd.profiler.profile(use_cuda=True) as prof: + model.set_input(data) + model.optimize_parameters() + print(prof.key_averages().table()) + prof.export_chrome_trace(opt.prof_file) # "output.prof" + opt.prof = False + else: + with torch.autograd.profiler.profile(use_npu=True) as prof: + model.set_input(data) + model.optimize_parameters() + print(prof.key_averages().table()) + prof.export_chrome_trace(opt.prof_file) # + opt.prof = False + else: + model.set_input(data) + model.optimize_parameters() + if opt.total_iters % opt.save_latest_freq == 0: # print training losses and save logging information to the disk + model.save_networks(epoch) + # model.save_networks(epoch) + if opt.total_iters % opt.display_freq == 0: # display images on visdom and save images to a HTML file + t_comp = (time.time() - iter_start_time) / opt.batch_size + fps = opt.batch_size * opt.ngpus_per_node / t_comp + losses = model.get_current_losses() + visualizer.print_current_losses(epoch, fps, losses, t_comp) + # print_current_losses(opt, epoch, fps, losses, t_comp) + save_result = opt.total_iters % opt.update_html_freq == 0 + model.compute_visuals() + visualizer.display_current_results(model.get_current_visuals(), epoch, save_result) + model.update_learning_rate() # Update learning rates + + if epoch % opt.save_epoch_freq == 0: # cache our model every epochs + print('saving the model at the end of epoch %d, iters %d' % (epoch, opt.total_iters)) + model.save_networks('latest_pu' + str(opt.gpu)) + model.save_networks(str(epoch) + '_pu' + str(opt.gpu)) + dist.barrier() + + +if __name__ == '__main__': + paser = parse_args(True, False) + opt = paser.initialize() + paser.printParser() + main(opt) diff --git a/PyTorch/contrib/cv/others/DCGAN/LICENSE b/PyTorch/contrib/cv/others/DCGAN/LICENSE index 753842b6720f7980d411ecf2c78eb4ef220b9df8..f49a4e16e68b128803cc2dcea614603632b04eac 100644 --- a/PyTorch/contrib/cv/others/DCGAN/LICENSE +++ b/PyTorch/contrib/cv/others/DCGAN/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/DCGAN/dcgan.py b/PyTorch/contrib/cv/others/DCGAN/dcgan.py index e130984cedbe4368913fd927cd3fefdea50df582..e486b5863eef079ab85b2816fd8a327cb7551132 100644 --- a/PyTorch/contrib/cv/others/DCGAN/dcgan.py +++ b/PyTorch/contrib/cv/others/DCGAN/dcgan.py @@ -1,82 +1,82 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch -import torch.nn as nn - - -def weights_init_normal(m): - class_name = m.__class__.__name__ - if class_name.find("Conv") != -1: - torch.nn.init.normal_(m.weight.data, 0.0, 0.02) - elif class_name.find("BatchNorm2d") != -1: - torch.nn.init.normal_(m.weight.data, 1.0, 0.02) - torch.nn.init.constant_(m.bias.data, 0.0) - - -class Generator(nn.Module): - def __init__(self, img_size, latent_dim, channels): - super(Generator, self).__init__() - - self.init_size = img_size // 4 - self.l1 = nn.Sequential(nn.Linear(latent_dim, 128 * self.init_size ** 2)) - - self.conv_blocks = nn.Sequential( - nn.BatchNorm2d(128), - nn.Upsample(scale_factor=2), - nn.Conv2d(128, 128, 3, stride=1, padding=1), - nn.BatchNorm2d(128, 0.8), - nn.LeakyReLU(0.2, inplace=True), - nn.Upsample(scale_factor=2), - nn.Conv2d(128, 32, 3, stride=1, padding=1), - nn.BatchNorm2d(32, 0.8), - nn.LeakyReLU(0.2, inplace=True), - nn.Conv2d(32, channels, 3, stride=1, padding=1), - nn.Tanh() - ) - - def forward(self, z): - out = self.l1(z) - out = out.view(out.shape[0], 128, self.init_size, self.init_size) - img = self.conv_blocks(out) - return img - - -class Discriminator(nn.Module): - def __init__(self, img_size, channels): - super(Discriminator, self).__init__() - - def discriminator_block(in_filters, out_filters, bn=True): - block = [nn.Conv2d(in_filters, out_filters, 3, 2, 1), nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25)] - if bn: - block.append(nn.BatchNorm2d(out_filters, 0.8)) - return block - - self.model = nn.Sequential( - *discriminator_block(channels, 16, bn=False), - *discriminator_block(16, 32), - *discriminator_block(32, 64), - *discriminator_block(64, 128) - ) - - # The height and width of down_sampled image - ds_size = img_size // 2 ** 4 - self.adv_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, 1)) - - def forward(self, img): - out = self.model(img) - out = out.view(out.shape[0], -1) - validity = self.adv_layer(out) - - return validity +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch +import torch.nn as nn + + +def weights_init_normal(m): + class_name = m.__class__.__name__ + if class_name.find("Conv") != -1: + torch.nn.init.normal_(m.weight.data, 0.0, 0.02) + elif class_name.find("BatchNorm2d") != -1: + torch.nn.init.normal_(m.weight.data, 1.0, 0.02) + torch.nn.init.constant_(m.bias.data, 0.0) + + +class Generator(nn.Module): + def __init__(self, img_size, latent_dim, channels): + super(Generator, self).__init__() + + self.init_size = img_size // 4 + self.l1 = nn.Sequential(nn.Linear(latent_dim, 128 * self.init_size ** 2)) + + self.conv_blocks = nn.Sequential( + nn.BatchNorm2d(128), + nn.Upsample(scale_factor=2), + nn.Conv2d(128, 128, 3, stride=1, padding=1), + nn.BatchNorm2d(128, 0.8), + nn.LeakyReLU(0.2, inplace=True), + nn.Upsample(scale_factor=2), + nn.Conv2d(128, 32, 3, stride=1, padding=1), + nn.BatchNorm2d(32, 0.8), + nn.LeakyReLU(0.2, inplace=True), + nn.Conv2d(32, channels, 3, stride=1, padding=1), + nn.Tanh() + ) + + def forward(self, z): + out = self.l1(z) + out = out.view(out.shape[0], 128, self.init_size, self.init_size) + img = self.conv_blocks(out) + return img + + +class Discriminator(nn.Module): + def __init__(self, img_size, channels): + super(Discriminator, self).__init__() + + def discriminator_block(in_filters, out_filters, bn=True): + block = [nn.Conv2d(in_filters, out_filters, 3, 2, 1), nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25)] + if bn: + block.append(nn.BatchNorm2d(out_filters, 0.8)) + return block + + self.model = nn.Sequential( + *discriminator_block(channels, 16, bn=False), + *discriminator_block(16, 32), + *discriminator_block(32, 64), + *discriminator_block(64, 128) + ) + + # The height and width of down_sampled image + ds_size = img_size // 2 ** 4 + self.adv_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, 1)) + + def forward(self, img): + out = self.model(img) + out = out.view(out.shape[0], -1) + validity = self.adv_layer(out) + + return validity diff --git a/PyTorch/contrib/cv/others/DCGAN/get_mnist.py b/PyTorch/contrib/cv/others/DCGAN/get_mnist.py index 443b0bd63be4669b5f8402d2f23c8ac7466adb67..6e3a45b7ac555f781ed9103d3f3b4d437f6a61e1 100644 --- a/PyTorch/contrib/cv/others/DCGAN/get_mnist.py +++ b/PyTorch/contrib/cv/others/DCGAN/get_mnist.py @@ -1,30 +1,30 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torchvision.datasets as datasets -import argparse - -parser = argparse.ArgumentParser(description="MNIST dataset") -parser.add_argument('--data_path', metavar='DIR', type=str, default="./data", - help='path to dataset') - -if __name__ == "__main__": - args = parser.parse_args() - print("MNIST target folder : ", args.data_path) - print("start download...") - train_dataset = datasets.MNIST( - args.data_path, - train=True, - download=True) - print("download done...") +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torchvision.datasets as datasets +import argparse + +parser = argparse.ArgumentParser(description="MNIST dataset") +parser.add_argument('--data_path', metavar='DIR', type=str, default="./data", + help='path to dataset') + +if __name__ == "__main__": + args = parser.parse_args() + print("MNIST target folder : ", args.data_path) + print("start download...") + train_dataset = datasets.MNIST( + args.data_path, + train=True, + download=True) + print("download done...") diff --git a/PyTorch/contrib/cv/others/DCGAN/main.py b/PyTorch/contrib/cv/others/DCGAN/main.py index 0abc7517f0f4adba9cb11ae3d510268bac8f7c4f..a1c157ca33df01ef63f6a6ade01b1d0499f618e2 100644 --- a/PyTorch/contrib/cv/others/DCGAN/main.py +++ b/PyTorch/contrib/cv/others/DCGAN/main.py @@ -1,510 +1,510 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import argparse -import os -import time -import apex -from apex import amp -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.distributed as dist -import torch.optim -import torch.utils.data -import torch.utils.data.distributed -import torchvision.transforms as transforms -import torchvision.datasets as datasets -from torchvision.utils import save_image - -from dcgan import Generator, Discriminator, weights_init_normal - -parser = argparse.ArgumentParser(description="pytorch DCGAN implementation") -## dcgan parameters -parser.add_argument('--data', metavar='DIR', type=str, default="./data", - help='path to dataset') -parser.add_argument("--n-epochs", type=int, default=200, - help="number of epochs of training") -parser.add_argument("--batch-size", type=int, default=64, - help="size of the batches") -parser.add_argument("--lr", type=float, default=0.0002, - help="adam: learning rate") -parser.add_argument("--b1", type=float, default=0.5, - help="adam: decay of first order momentum of gradient") -parser.add_argument("--b2", type=float, default=0.999, - help="adam: decay of first order momentum of gradient") -parser.add_argument("--n-cpu", type=int, default=8, - help="number of cpu threads to use during batch generation") -parser.add_argument("--latent_dim", type=int, default=100, - help="dimensionality of the latent space") -parser.add_argument("--img_size", type=int, default=32, - help="size of each image dimension") -parser.add_argument("--channels", type=int, default=1, - help="number of image channels") -parser.add_argument("--sample_interval", type=int, default=400, - help="interval between image sampling") -## add useful parameters : such as resume,evaluate -parser.add_argument('--checkpoint-path', default=None, type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', default=False, - help='evaluate model : generate (n_samples) samples,saved in dir(validate)') -parser.add_argument('--n-samples', type=int, default=10, - help="amount of samples in function(validate)") -parser.add_argument('-p', '--print-freq', default=10, type=int, metavar='N', - help='print frequency (default 10)') -## parameters for distribute training -parser.add_argument('--world-size', default=-1, type=int, - help='number of nodes for distributed training') -parser.add_argument('--rank', default=-1, type=int, - help='node rank for distributed training') -parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, - help='url used to set up distributed training') -parser.add_argument('--dist-backend', default='nccl', type=str, - help='distributed backend') -parser.add_argument('--gpu', default=None, type=int, - help='GPU id to use.') -## for ascend 910 -parser.add_argument('--device', default='npu', type=str, help='npu or gpu') -parser.add_argument('--addr', default='10.136.181.115', - type=str, help='master addr') -parser.add_argument('--device-list', default='0,1,2,3,4,5,6,7', - type=str, help='device id list') -parser.add_argument('--amp', default=False, action='store_true', - help='use amp to train the model') -parser.add_argument('--loss-scale', default=None, type=float, - help='loss scale using in amp, default None means dynamic') -parser.add_argument('--opt-level', default='O2', type=str, - help='loss scale using in amp, default -1 means dynamic') -parser.add_argument('--prof', default=False, action='store_true', - help='use profiling to evaluate the performance of model') - - -def device_id_to_process_device_map(device_list): - devices = device_list.split(",") - devices = [int(x) for x in devices] - devices.sort() - - process_device_map = dict() - for process_id, device_id in enumerate(devices): - process_device_map[process_id] = device_id - - return process_device_map - - -def get_device_name(device_type, device_order): - if device_type == 'npu': - device_name = 'npu:{}'.format(device_order) - else: - device_name = 'cuda:{}'.format(device_order) - - return device_name - - -def main(): - args = parser.parse_args() - print(args.device_list) - args.process_device_map = device_id_to_process_device_map(args.device_list) - - # add start_epoch - args.start_epoch = 0 - - os.environ['MASTER_ADDR'] = args.addr - os.environ['MASTER_PORT'] = '29688' - - if args.device == 'npu': - ngpus_per_node = len(args.process_device_map) - else: - if args.gpu is None: - ngpus_per_node = len(args.process_device_map) - else: - ngpus_per_node = 1 - print('ngpus_per_node:', ngpus_per_node) - - args.world_size = ngpus_per_node * args.world_size - args.distributed = args.world_size > 1 - - # create folders - if not args.distributed or (args.distributed and args.rank == args.process_device_map[0]): - if not os.path.exists("./images/"): - os.makedirs("./images/") - if not os.path.exists("./samples/"): - os.makedirs("./samples/") - - main_worker(args.rank, ngpus_per_node, args) - - -def main_worker(gpu, ngpus_per_node, args): - args.gpu = args.process_device_map[gpu] - if args.distributed: - if args.device == 'npu': - dist.init_process_group(backend=args.dist_backend, - # init_method=args.dist_url, - world_size=args.world_size, - rank=args.rank) - else: - dist.init_process_group(backend=args.dist_backend, - init_method=args.dist_url, - world_size=args.world_size, - rank=args.rank) - - print('rank: {} / {}'.format(args.rank, args.world_size)) - - # init device - device_loc = get_device_name(args.device, args.gpu) - args.loc = device_loc - - # set device - print('set_device ', device_loc) - if args.device == 'npu': - torch.npu.set_device(device_loc) - else: - torch.cuda.set_device(args.gpu) - - # create model - G = Generator(args.img_size, args.latent_dim, args.channels) - D = Discriminator(args.img_size, args.channels) - # initialize weights - G.apply(weights_init_normal) - D.apply(weights_init_normal) - if args.checkpoint_path: - print("=> using pre-trained model dcgan,device(%d)" % args.gpu) - print("loading model of yours...,device(%d)" % args.gpu) - checkpoint = torch.load(args.checkpoint_path, map_location="cpu") - G.load_state_dict({k.replace('module.', ''): v for k, v in checkpoint["G"].items()}) - D.load_state_dict({k.replace('module.', ''): v for k, v in checkpoint["D"].items()}) - else: - print("=> creating model dcgan,device(%d)" % args.gpu) - - print('model to device_loc(%s)...' % device_loc) - G = G.to(device_loc) - D = D.to(device_loc) - - if args.distributed: - args.batch_size = int(args.batch_size / args.world_size) - args.n_cpu = int((args.n_cpu + ngpus_per_node - 1) / ngpus_per_node) - args.sample_interval = int(args.sample_interval / ngpus_per_node) - - # define optimizer, apply apex - optimizer_G = apex.optimizers.NpuFusedAdam(G.parameters(), lr=args.lr, betas=(args.b1, args.b2)) - optimizer_D = apex.optimizers.NpuFusedAdam(D.parameters(), lr=args.lr, betas=(args.b1, args.b2)) - - if args.amp: - [D, G], [optimizer_D, optimizer_G] = amp.initialize( - [D, G], [optimizer_D, optimizer_G], opt_level=args.opt_level, loss_scale=args.loss_scale, num_losses=3, - combine_grad=True) - - if args.evaluate: - print("evaluate mode...", " device(%d)," % args.gpu) - validate(G, args) - return - - if args.checkpoint_path: - args.start_epoch = checkpoint['epoch'] - optimizer_G.load_state_dict(checkpoint['optimizer_G']) - optimizer_D.load_state_dict(checkpoint['optimizer_D']) - if args.amp: - amp.load_state_dict(checkpoint['amp']) - print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) - - if args.distributed: - G = torch.nn.parallel.DistributedDataParallel(G, device_ids=[args.gpu], broadcast_buffers=False) - D = torch.nn.parallel.DistributedDataParallel(D, device_ids=[args.gpu], broadcast_buffers=False) - - # Loss function - adversarial_loss = nn.BCEWithLogitsLoss().to(device_loc) - - cudnn.benchmark = True - - # Data loading code - data_path = args.data - print("dataset path : %s" % data_path) - train_dataset = datasets.MNIST( - data_path, - train=True, - download=False, - transform=transforms.Compose( - [transforms.Resize(args.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])] - )) - - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset) - else: - train_sampler = None - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), - num_workers=args.n_cpu, pin_memory=False, sampler=train_sampler, drop_last=True) - - if args.prof: - print("profiling mode...", " device(%d)," % args.gpu) - profiling(train_loader, G, D, optimizer_G, optimizer_D, adversarial_loss, args) - return - - # start training - print("train mode...", " device(%d)," % args.gpu) - fixed_z = torch.randn((5, args.latent_dim), dtype=torch.float32) - # Configure input - fixed_z = fixed_z.to(device_loc, non_blocking=True).to(torch.float) - for epoch in range(args.start_epoch, args.n_epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - # train for one epoch - train(train_loader, - G, D, - optimizer_G, optimizer_D, - adversarial_loss, - epoch, args, - ngpus_per_node) - - if not args.distributed or (args.distributed and args.gpu == args.process_device_map[0]): - # save fixed imgs - G.eval() - fixed_imgs = G(fixed_z) - save_image(fixed_imgs[:5], "samples/fixed_images-epoch_%03d.png" % epoch, nrow=5, normalize=True) - ############## npu modify begin ############# - if args.amp: - torch.save({ - 'epoch': epoch + 1, - 'arch': 'dcgan', - 'G': G.state_dict(), - 'D': D.state_dict(), - 'optimizer_G': optimizer_G.state_dict(), - 'optimizer_D': optimizer_D.state_dict(), - 'amp': amp.state_dict(), - }, "checkpoint-amp-epoch_%d.pth" % (epoch + 1)) - - if os.path.exists("checkpoint-amp-epoch_%d.pth" % epoch): - os.remove("checkpoint-amp-epoch_%d.pth" % epoch) - else: - torch.save({ - 'epoch': epoch + 1, - 'arch': 'dcgan', - 'G': G.state_dict(), - 'D': D.state_dict(), - 'optimizer_G': optimizer_G.state_dict(), - 'optimizer_D': optimizer_D.state_dict(), - }, "checkpoint-epoch_%d.pth" % (epoch + 1)) - if os.path.exists("checkpoint-epoch_%d.pth" % epoch): - os.remove("checkpoint-epoch_%d.pth" % epoch) - ############## npu modify end ############# - # train loop done - - -def profiling(train_loader, generator, discriminator, optimizer_G, optimizer_D, loss, args): - generator.train() - discriminator.train() - - def update(step=None): - start_time = time.time() - valid = torch.ones(imgs.size(0), 1, requires_grad=False) - fake = torch.zeros(imgs.size(0), 1, requires_grad=False) - # Sample noise as generator input - z = torch.randn((imgs.size(0), args.latent_dim), dtype=torch.float32) - # Configure input - real_imgs = imgs.to(args.loc, non_blocking=True).to(torch.float) - valid = valid.to(args.loc, non_blocking=True).to(torch.float) - fake = fake.to(args.loc, non_blocking=True).to(torch.float) - z = z.to(args.loc, non_blocking=True).to(torch.float) - # update D - discriminator.zero_grad() - output = discriminator(real_imgs) - errD_real = loss(output, valid) - with amp.scale_loss(errD_real, optimizer_D, loss_id=0) as errD_real_scaled: - errD_real_scaled.backward() - gen_imgs = generator(z) - output = discriminator(gen_imgs.detach()) - errD_fake = loss(output, fake) - with amp.scale_loss(errD_fake, optimizer_D, loss_id=1) as errD_fake_scaled: - errD_fake_scaled.backward() - errD = errD_real + errD_fake - optimizer_D.step() - # update G - generator.zero_grad() - output = discriminator(gen_imgs) - errG = loss(output, valid) - with amp.scale_loss(errG, optimizer_G, loss_id=2) as errG_scaled: - errG_scaled.backward() - optimizer_G.step() - if step is not None: - print('iter: %d, loss: %.2f, time: %.2f' % (step, errG.item(), (time.time() - start_time))) - - for i, (imgs, _) in enumerate(train_loader): - if i < 20: - update(step=i) - else: - if args.device == 'npu': - with torch.autograd.profiler.profile(use_npu=True) as prof: - update() - else: - with torch.autograd.profiler.profile(use_cuda=True) as prof: - update() - break - prof.export_chrome_trace("dcgan.prof") - - -def train(train_loader, generator, discriminator, optimizer_G, optimizer_D, loss, epoch, args, ngpus_per_node): - batch_time = AverageMeter('Time', ':6.3f') - data_time = AverageMeter('Data', ':6.3f') - G_loss = AverageMeter('G_Loss', ':.4e') - D_loss = AverageMeter('D_Loss', ':.4e') - D_real = AverageMeter('D_real', ':.4e') - D_fake = AverageMeter('D_fake', ':.4e') - progress = ProgressMeter( - len(train_loader), - [batch_time, data_time, G_loss, D_loss, D_real, D_fake], - prefix="Epoch: [{}]".format(epoch)) - - # switch to train mode - generator.train() - discriminator.train() - - end = time.time() - for i, (imgs, _) in enumerate(train_loader): - # measure data loading time - data_time.update(time.time() - end) - - valid = torch.ones(imgs.size(0), 1, requires_grad=False) - fake = torch.zeros(imgs.size(0), 1, requires_grad=False) - # Sample noise as generator input - z = torch.randn((imgs.size(0), args.latent_dim), dtype=torch.float32) - # Configure input - real_imgs = imgs.to(args.loc, non_blocking=True).to(torch.float) - valid = valid.to(args.loc, non_blocking=True).to(torch.float) - fake = fake.to(args.loc, non_blocking=True).to(torch.float) - z = z.to(args.loc, non_blocking=True).to(torch.float) - - # update D - discriminator.zero_grad() - output = discriminator(real_imgs) - errD_real = loss(output, valid) - with amp.scale_loss(errD_real, optimizer_D, loss_id=0) as errD_real_scaled: - errD_real_scaled.backward() - - gen_imgs = generator(z) - output = discriminator(gen_imgs.detach()) - errD_fake = loss(output, fake) - with amp.scale_loss(errD_fake, optimizer_D, loss_id=1) as errD_fake_scaled: - errD_fake_scaled.backward() - errD = errD_real + errD_fake - optimizer_D.step() - - # update G - generator.zero_grad() - output = discriminator(gen_imgs) - errG = loss(output, valid) - with amp.scale_loss(errG, optimizer_G, loss_id=2) as errG_scaled: - errG_scaled.backward() - optimizer_G.step() - - D_loss.update(errD.item(), real_imgs.size(0)) - D_fake.update(errD_fake.item(), real_imgs.size(0)) - D_real.update(errD_real.item(), real_imgs.size(0)) - G_loss.update(errG.item(), real_imgs.size(0)) - - # measure elapsed time - cost_time = time.time() - end - batch_time.update(cost_time) - end = time.time() - - if not args.distributed or (args.distributed and args.gpu == args.process_device_map[0]): - if i % args.print_freq == 0: - progress.display(i) - - batches_done = epoch * len(train_loader) + i - if batches_done % args.sample_interval == 0: - save_image(gen_imgs.data[:25], "images/%06d.png" % batches_done, nrow=5, normalize=True) - - if batch_time.avg: - print("[npu id:", args.gpu, "]", "batch_size:", args.world_size * args.batch_size, - 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( - args.batch_size * args.world_size / batch_time.avg)) - # train loop done - - -def validate(generator, args): - batch_time = AverageMeter('Time', ':6.3f') - print("start generate random image...(validate mode)") - generator.eval() - - if not os.path.exists("./validate/"): - os.makedirs("validate") - end = time.time() - with torch.no_grad(): - for i in range(args.n_samples): - z = torch.randn((25, args.latent_dim), dtype=torch.float32) - z = z.to(args.loc, non_blocking=True) - # gen images - images = generator(z) - batch_time.update(time.time() - end) - end = time.time() - save_image(images.data[:25], "validate/%03d.jpg" % i, nrow=5, normalize=True) - if batch_time.avg: - print("[npu id:", args.gpu, "]", "batch_size:", 25, - 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( - 25 / batch_time.avg)) - # train loop done - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self, name, fmt=':f', start_count_index=2): - self.name = name - self.fmt = fmt - self.reset() - self.start_count_index = start_count_index - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - if self.count == 0: - self.N = n - - self.val = val - self.count += n - if self.count > (self.start_count_index * self.N): - self.sum += val * n - self.avg = self.sum / (self.count - self.start_count_index * self.N) - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - - -class ProgressMeter(object): - - def __init__(self, num_batches, meters, prefix=""): - self.batch_fmtstr = self._get_batch_fmtstr(num_batches) - self.meters = meters - self.prefix = prefix - - def display(self, batch): - entries = [self.prefix + self.batch_fmtstr.format(batch)] - entries += [str(meter) for meter in self.meters] - print('\t'.join(entries)) - - def _get_batch_fmtstr(self, num_batches): - num_digits = len(str(num_batches // 1)) - fmt = '{:' + str(num_digits) + 'd}' - return '[' + fmt + '/' + fmt.format(num_batches) + ']' - - -if __name__ == "__main__": - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import argparse +import os +import time +import apex +from apex import amp +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.optim +import torch.utils.data +import torch.utils.data.distributed +import torchvision.transforms as transforms +import torchvision.datasets as datasets +from torchvision.utils import save_image + +from dcgan import Generator, Discriminator, weights_init_normal + +parser = argparse.ArgumentParser(description="pytorch DCGAN implementation") +## dcgan parameters +parser.add_argument('--data', metavar='DIR', type=str, default="./data", + help='path to dataset') +parser.add_argument("--n-epochs", type=int, default=200, + help="number of epochs of training") +parser.add_argument("--batch-size", type=int, default=64, + help="size of the batches") +parser.add_argument("--lr", type=float, default=0.0002, + help="adam: learning rate") +parser.add_argument("--b1", type=float, default=0.5, + help="adam: decay of first order momentum of gradient") +parser.add_argument("--b2", type=float, default=0.999, + help="adam: decay of first order momentum of gradient") +parser.add_argument("--n-cpu", type=int, default=8, + help="number of cpu threads to use during batch generation") +parser.add_argument("--latent_dim", type=int, default=100, + help="dimensionality of the latent space") +parser.add_argument("--img_size", type=int, default=32, + help="size of each image dimension") +parser.add_argument("--channels", type=int, default=1, + help="number of image channels") +parser.add_argument("--sample_interval", type=int, default=400, + help="interval between image sampling") +## add useful parameters : such as resume,evaluate +parser.add_argument('--checkpoint-path', default=None, type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', default=False, + help='evaluate model : generate (n_samples) samples,saved in dir(validate)') +parser.add_argument('--n-samples', type=int, default=10, + help="amount of samples in function(validate)") +parser.add_argument('-p', '--print-freq', default=10, type=int, metavar='N', + help='print frequency (default 10)') +## parameters for distribute training +parser.add_argument('--world-size', default=-1, type=int, + help='number of nodes for distributed training') +parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') +parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') +parser.add_argument('--gpu', default=None, type=int, + help='GPU id to use.') +## for ascend 910 +parser.add_argument('--device', default='npu', type=str, help='npu or gpu') +parser.add_argument('--addr', default='10.136.181.115', + type=str, help='master addr') +parser.add_argument('--device-list', default='0,1,2,3,4,5,6,7', + type=str, help='device id list') +parser.add_argument('--amp', default=False, action='store_true', + help='use amp to train the model') +parser.add_argument('--loss-scale', default=None, type=float, + help='loss scale using in amp, default None means dynamic') +parser.add_argument('--opt-level', default='O2', type=str, + help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--prof', default=False, action='store_true', + help='use profiling to evaluate the performance of model') + + +def device_id_to_process_device_map(device_list): + devices = device_list.split(",") + devices = [int(x) for x in devices] + devices.sort() + + process_device_map = dict() + for process_id, device_id in enumerate(devices): + process_device_map[process_id] = device_id + + return process_device_map + + +def get_device_name(device_type, device_order): + if device_type == 'npu': + device_name = 'npu:{}'.format(device_order) + else: + device_name = 'cuda:{}'.format(device_order) + + return device_name + + +def main(): + args = parser.parse_args() + print(args.device_list) + args.process_device_map = device_id_to_process_device_map(args.device_list) + + # add start_epoch + args.start_epoch = 0 + + os.environ['MASTER_ADDR'] = args.addr + os.environ['MASTER_PORT'] = '29688' + + if args.device == 'npu': + ngpus_per_node = len(args.process_device_map) + else: + if args.gpu is None: + ngpus_per_node = len(args.process_device_map) + else: + ngpus_per_node = 1 + print('ngpus_per_node:', ngpus_per_node) + + args.world_size = ngpus_per_node * args.world_size + args.distributed = args.world_size > 1 + + # create folders + if not args.distributed or (args.distributed and args.rank == args.process_device_map[0]): + if not os.path.exists("./images/"): + os.makedirs("./images/") + if not os.path.exists("./samples/"): + os.makedirs("./samples/") + + main_worker(args.rank, ngpus_per_node, args) + + +def main_worker(gpu, ngpus_per_node, args): + args.gpu = args.process_device_map[gpu] + if args.distributed: + if args.device == 'npu': + dist.init_process_group(backend=args.dist_backend, + # init_method=args.dist_url, + world_size=args.world_size, + rank=args.rank) + else: + dist.init_process_group(backend=args.dist_backend, + init_method=args.dist_url, + world_size=args.world_size, + rank=args.rank) + + print('rank: {} / {}'.format(args.rank, args.world_size)) + + # init device + device_loc = get_device_name(args.device, args.gpu) + args.loc = device_loc + + # set device + print('set_device ', device_loc) + if args.device == 'npu': + torch.npu.set_device(device_loc) + else: + torch.cuda.set_device(args.gpu) + + # create model + G = Generator(args.img_size, args.latent_dim, args.channels) + D = Discriminator(args.img_size, args.channels) + # initialize weights + G.apply(weights_init_normal) + D.apply(weights_init_normal) + if args.checkpoint_path: + print("=> using pre-trained model dcgan,device(%d)" % args.gpu) + print("loading model of yours...,device(%d)" % args.gpu) + checkpoint = torch.load(args.checkpoint_path, map_location="cpu") + G.load_state_dict({k.replace('module.', ''): v for k, v in checkpoint["G"].items()}) + D.load_state_dict({k.replace('module.', ''): v for k, v in checkpoint["D"].items()}) + else: + print("=> creating model dcgan,device(%d)" % args.gpu) + + print('model to device_loc(%s)...' % device_loc) + G = G.to(device_loc) + D = D.to(device_loc) + + if args.distributed: + args.batch_size = int(args.batch_size / args.world_size) + args.n_cpu = int((args.n_cpu + ngpus_per_node - 1) / ngpus_per_node) + args.sample_interval = int(args.sample_interval / ngpus_per_node) + + # define optimizer, apply apex + optimizer_G = apex.optimizers.NpuFusedAdam(G.parameters(), lr=args.lr, betas=(args.b1, args.b2)) + optimizer_D = apex.optimizers.NpuFusedAdam(D.parameters(), lr=args.lr, betas=(args.b1, args.b2)) + + if args.amp: + [D, G], [optimizer_D, optimizer_G] = amp.initialize( + [D, G], [optimizer_D, optimizer_G], opt_level=args.opt_level, loss_scale=args.loss_scale, num_losses=3, + combine_grad=True) + + if args.evaluate: + print("evaluate mode...", " device(%d)," % args.gpu) + validate(G, args) + return + + if args.checkpoint_path: + args.start_epoch = checkpoint['epoch'] + optimizer_G.load_state_dict(checkpoint['optimizer_G']) + optimizer_D.load_state_dict(checkpoint['optimizer_D']) + if args.amp: + amp.load_state_dict(checkpoint['amp']) + print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) + + if args.distributed: + G = torch.nn.parallel.DistributedDataParallel(G, device_ids=[args.gpu], broadcast_buffers=False) + D = torch.nn.parallel.DistributedDataParallel(D, device_ids=[args.gpu], broadcast_buffers=False) + + # Loss function + adversarial_loss = nn.BCEWithLogitsLoss().to(device_loc) + + cudnn.benchmark = True + + # Data loading code + data_path = args.data + print("dataset path : %s" % data_path) + train_dataset = datasets.MNIST( + data_path, + train=True, + download=False, + transform=transforms.Compose( + [transforms.Resize(args.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])] + )) + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset) + else: + train_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), + num_workers=args.n_cpu, pin_memory=False, sampler=train_sampler, drop_last=True) + + if args.prof: + print("profiling mode...", " device(%d)," % args.gpu) + profiling(train_loader, G, D, optimizer_G, optimizer_D, adversarial_loss, args) + return + + # start training + print("train mode...", " device(%d)," % args.gpu) + fixed_z = torch.randn((5, args.latent_dim), dtype=torch.float32) + # Configure input + fixed_z = fixed_z.to(device_loc, non_blocking=True).to(torch.float) + for epoch in range(args.start_epoch, args.n_epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + # train for one epoch + train(train_loader, + G, D, + optimizer_G, optimizer_D, + adversarial_loss, + epoch, args, + ngpus_per_node) + + if not args.distributed or (args.distributed and args.gpu == args.process_device_map[0]): + # save fixed imgs + G.eval() + fixed_imgs = G(fixed_z) + save_image(fixed_imgs[:5], "samples/fixed_images-epoch_%03d.png" % epoch, nrow=5, normalize=True) + ############## npu modify begin ############# + if args.amp: + torch.save({ + 'epoch': epoch + 1, + 'arch': 'dcgan', + 'G': G.state_dict(), + 'D': D.state_dict(), + 'optimizer_G': optimizer_G.state_dict(), + 'optimizer_D': optimizer_D.state_dict(), + 'amp': amp.state_dict(), + }, "checkpoint-amp-epoch_%d.pth" % (epoch + 1)) + + if os.path.exists("checkpoint-amp-epoch_%d.pth" % epoch): + os.remove("checkpoint-amp-epoch_%d.pth" % epoch) + else: + torch.save({ + 'epoch': epoch + 1, + 'arch': 'dcgan', + 'G': G.state_dict(), + 'D': D.state_dict(), + 'optimizer_G': optimizer_G.state_dict(), + 'optimizer_D': optimizer_D.state_dict(), + }, "checkpoint-epoch_%d.pth" % (epoch + 1)) + if os.path.exists("checkpoint-epoch_%d.pth" % epoch): + os.remove("checkpoint-epoch_%d.pth" % epoch) + ############## npu modify end ############# + # train loop done + + +def profiling(train_loader, generator, discriminator, optimizer_G, optimizer_D, loss, args): + generator.train() + discriminator.train() + + def update(step=None): + start_time = time.time() + valid = torch.ones(imgs.size(0), 1, requires_grad=False) + fake = torch.zeros(imgs.size(0), 1, requires_grad=False) + # Sample noise as generator input + z = torch.randn((imgs.size(0), args.latent_dim), dtype=torch.float32) + # Configure input + real_imgs = imgs.to(args.loc, non_blocking=True).to(torch.float) + valid = valid.to(args.loc, non_blocking=True).to(torch.float) + fake = fake.to(args.loc, non_blocking=True).to(torch.float) + z = z.to(args.loc, non_blocking=True).to(torch.float) + # update D + discriminator.zero_grad() + output = discriminator(real_imgs) + errD_real = loss(output, valid) + with amp.scale_loss(errD_real, optimizer_D, loss_id=0) as errD_real_scaled: + errD_real_scaled.backward() + gen_imgs = generator(z) + output = discriminator(gen_imgs.detach()) + errD_fake = loss(output, fake) + with amp.scale_loss(errD_fake, optimizer_D, loss_id=1) as errD_fake_scaled: + errD_fake_scaled.backward() + errD = errD_real + errD_fake + optimizer_D.step() + # update G + generator.zero_grad() + output = discriminator(gen_imgs) + errG = loss(output, valid) + with amp.scale_loss(errG, optimizer_G, loss_id=2) as errG_scaled: + errG_scaled.backward() + optimizer_G.step() + if step is not None: + print('iter: %d, loss: %.2f, time: %.2f' % (step, errG.item(), (time.time() - start_time))) + + for i, (imgs, _) in enumerate(train_loader): + if i < 20: + update(step=i) + else: + if args.device == 'npu': + with torch.autograd.profiler.profile(use_npu=True) as prof: + update() + else: + with torch.autograd.profiler.profile(use_cuda=True) as prof: + update() + break + prof.export_chrome_trace("dcgan.prof") + + +def train(train_loader, generator, discriminator, optimizer_G, optimizer_D, loss, epoch, args, ngpus_per_node): + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + G_loss = AverageMeter('G_Loss', ':.4e') + D_loss = AverageMeter('D_Loss', ':.4e') + D_real = AverageMeter('D_real', ':.4e') + D_fake = AverageMeter('D_fake', ':.4e') + progress = ProgressMeter( + len(train_loader), + [batch_time, data_time, G_loss, D_loss, D_real, D_fake], + prefix="Epoch: [{}]".format(epoch)) + + # switch to train mode + generator.train() + discriminator.train() + + end = time.time() + for i, (imgs, _) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + valid = torch.ones(imgs.size(0), 1, requires_grad=False) + fake = torch.zeros(imgs.size(0), 1, requires_grad=False) + # Sample noise as generator input + z = torch.randn((imgs.size(0), args.latent_dim), dtype=torch.float32) + # Configure input + real_imgs = imgs.to(args.loc, non_blocking=True).to(torch.float) + valid = valid.to(args.loc, non_blocking=True).to(torch.float) + fake = fake.to(args.loc, non_blocking=True).to(torch.float) + z = z.to(args.loc, non_blocking=True).to(torch.float) + + # update D + discriminator.zero_grad() + output = discriminator(real_imgs) + errD_real = loss(output, valid) + with amp.scale_loss(errD_real, optimizer_D, loss_id=0) as errD_real_scaled: + errD_real_scaled.backward() + + gen_imgs = generator(z) + output = discriminator(gen_imgs.detach()) + errD_fake = loss(output, fake) + with amp.scale_loss(errD_fake, optimizer_D, loss_id=1) as errD_fake_scaled: + errD_fake_scaled.backward() + errD = errD_real + errD_fake + optimizer_D.step() + + # update G + generator.zero_grad() + output = discriminator(gen_imgs) + errG = loss(output, valid) + with amp.scale_loss(errG, optimizer_G, loss_id=2) as errG_scaled: + errG_scaled.backward() + optimizer_G.step() + + D_loss.update(errD.item(), real_imgs.size(0)) + D_fake.update(errD_fake.item(), real_imgs.size(0)) + D_real.update(errD_real.item(), real_imgs.size(0)) + G_loss.update(errG.item(), real_imgs.size(0)) + + # measure elapsed time + cost_time = time.time() - end + batch_time.update(cost_time) + end = time.time() + + if not args.distributed or (args.distributed and args.gpu == args.process_device_map[0]): + if i % args.print_freq == 0: + progress.display(i) + + batches_done = epoch * len(train_loader) + i + if batches_done % args.sample_interval == 0: + save_image(gen_imgs.data[:25], "images/%06d.png" % batches_done, nrow=5, normalize=True) + + if batch_time.avg: + print("[npu id:", args.gpu, "]", "batch_size:", args.world_size * args.batch_size, + 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( + args.batch_size * args.world_size / batch_time.avg)) + # train loop done + + +def validate(generator, args): + batch_time = AverageMeter('Time', ':6.3f') + print("start generate random image...(validate mode)") + generator.eval() + + if not os.path.exists("./validate/"): + os.makedirs("validate") + end = time.time() + with torch.no_grad(): + for i in range(args.n_samples): + z = torch.randn((25, args.latent_dim), dtype=torch.float32) + z = z.to(args.loc, non_blocking=True) + # gen images + images = generator(z) + batch_time.update(time.time() - end) + end = time.time() + save_image(images.data[:25], "validate/%03d.jpg" % i, nrow=5, normalize=True) + if batch_time.avg: + print("[npu id:", args.gpu, "]", "batch_size:", 25, + 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( + 25 / batch_time.avg)) + # train loop done + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f', start_count_index=2): + self.name = name + self.fmt = fmt + self.reset() + self.start_count_index = start_count_index + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + if self.count == 0: + self.N = n + + self.val = val + self.count += n + if self.count > (self.start_count_index * self.N): + self.sum += val * n + self.avg = self.sum / (self.count - self.start_count_index * self.N) + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + + +class ProgressMeter(object): + + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + + +if __name__ == "__main__": + main() diff --git a/PyTorch/contrib/cv/others/DCGAN/modelzoo_level.txt b/PyTorch/contrib/cv/others/DCGAN/modelzoo_level.txt index 405b26618a0c92027927a9c583a4b47f640bcf7b..c45626e398eabe6022fe7b2e148f0ffce6400d6e 100644 --- a/PyTorch/contrib/cv/others/DCGAN/modelzoo_level.txt +++ b/PyTorch/contrib/cv/others/DCGAN/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:POK +FuncStatus:OK +PerfStatus:POK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/DCGAN/requirements.txt b/PyTorch/contrib/cv/others/DCGAN/requirements.txt index dd1af6e127fb102fc10d031b41231ea42cff0387..ea5a8512c14e2f7cdbf5a5b0b46f485f1a032afc 100644 --- a/PyTorch/contrib/cv/others/DCGAN/requirements.txt +++ b/PyTorch/contrib/cv/others/DCGAN/requirements.txt @@ -1,3 +1,3 @@ -torch -apex +torch +apex torchvision \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/GAN_Pytorch/Dockerfile b/PyTorch/contrib/cv/others/GAN_Pytorch/Dockerfile index 7e712fe1a166790798f57a2f2762c47394beb625..30a31af55804dd79571d2a36e6107a844cb7e549 100644 --- a/PyTorch/contrib/cv/others/GAN_Pytorch/Dockerfile +++ b/PyTorch/contrib/cv/others/GAN_Pytorch/Dockerfile @@ -1,5 +1,5 @@ -ARG FROM_IMAGE_NAME -FROM $FROM_IMAGE_NAME - -COPY requirements.txt . +ARG FROM_IMAGE_NAME +FROM $FROM_IMAGE_NAME + +COPY requirements.txt . RUN pip3.7 install -r requirements.txt \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/GAN_Pytorch/README.md b/PyTorch/contrib/cv/others/GAN_Pytorch/README.md index 96151c3e2174db8b1ca7fb5c1813ded4b0930bb7..be1d95ec6e43049d93d20d7716bd64dfd7a22fb5 100644 --- a/PyTorch/contrib/cv/others/GAN_Pytorch/README.md +++ b/PyTorch/contrib/cv/others/GAN_Pytorch/README.md @@ -1,46 +1,46 @@ -# GAN 训练 -This implements training of RDN on the DIV2K_x2 dataset. -- Reference implementation: -``` -url=https://github.com/eriklindernoren/PyTorch-GAN/blob/master/implementations/gan/gan.py -``` - - - -## Requirements # - -- Install PyTorch ([pytorch.org](http://pytorch.org)) -- `pip install -r requirements.txt` -- The MNIST Dataset can be downloaded from the links below.Move the datasets to directory ./data . - - Train Set : [Download Mnist](https://wwr.lanzoui.com/iSBOeu43dkf) - -## Training # -To train a model, change the working directory to `./test`,then run: - -```bash -# 1p train perf -bash train_performance_1p.sh - -# 8p train perf -bash train_performance_8p.sh - -# 8p train full -bash train_full_8p.sh - -# 8p eval -bash train_eval_8p.sh - -# finetuning -bash train_finetune_1p.sh -``` -After running,you can see the results in `./output` - -## GAN training result # - -| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | -| :------: | :------: | :------: | :------: | :------: | -| - | 997 | 1 | 200 | O1 | -| - | 11795 | 8 | 200 | O1 | - - - +# GAN 训练 +This implements training of RDN on the DIV2K_x2 dataset. +- Reference implementation: +``` +url=https://github.com/eriklindernoren/PyTorch-GAN/blob/master/implementations/gan/gan.py +``` + + + +## Requirements # + +- Install PyTorch ([pytorch.org](http://pytorch.org)) +- `pip install -r requirements.txt` +- The MNIST Dataset can be downloaded from the links below.Move the datasets to directory ./data . + - Train Set : [Download Mnist](https://wwr.lanzoui.com/iSBOeu43dkf) + +## Training # +To train a model, change the working directory to `./test`,then run: + +```bash +# 1p train perf +bash train_performance_1p.sh + +# 8p train perf +bash train_performance_8p.sh + +# 8p train full +bash train_full_8p.sh + +# 8p eval +bash train_eval_8p.sh + +# finetuning +bash train_finetune_1p.sh +``` +After running,you can see the results in `./output` + +## GAN training result # + +| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | +| :------: | :------: | :------: | :------: | :------: | +| - | 997 | 1 | 200 | O1 | +| - | 11795 | 8 | 200 | O1 | + + + diff --git a/PyTorch/contrib/cv/others/GAN_Pytorch/docker_start.sh b/PyTorch/contrib/cv/others/GAN_Pytorch/docker_start.sh index 46ce9a02ec0532d6db324beaee7f7eab501b4565..944bca3cdac8e3f2d47ceb0e2b6eb181a405de11 100644 --- a/PyTorch/contrib/cv/others/GAN_Pytorch/docker_start.sh +++ b/PyTorch/contrib/cv/others/GAN_Pytorch/docker_start.sh @@ -1,25 +1,25 @@ -#!/bin/bash - -docker_image=$1 -data_dir=$2 -model_dir=$3 - -docker run -it --ipc=host \ - --device=/dev/davinci0 \ - --device=/dev/davinci1 \ - --device=/dev/davinci2 \ - --device=/dev/davinci3 \ - --device=/dev/davinci4 \ - --device=/dev/davinci5 \ - --device=/dev/davinci6 \ - --device=/dev/davinci7 \ - --device=/dev/davinci_manager \ - --device=/dev/devmm_svm --device=/dev/hisi_hdc \ - -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ - -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ - -v ${model_dir}:${model_dir} \ - -v ${data_dir}:${data_dir} \ - -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \ - -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \ - -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \ +#!/bin/bash + +docker_image=$1 +data_dir=$2 +model_dir=$3 + +docker run -it --ipc=host \ + --device=/dev/davinci0 \ + --device=/dev/davinci1 \ + --device=/dev/davinci2 \ + --device=/dev/davinci3 \ + --device=/dev/davinci4 \ + --device=/dev/davinci5 \ + --device=/dev/davinci6 \ + --device=/dev/davinci7 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ + -v ${model_dir}:${model_dir} \ + -v ${data_dir}:${data_dir} \ + -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \ + -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \ + -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \ /bin/bash \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/GAN_Pytorch/main.py b/PyTorch/contrib/cv/others/GAN_Pytorch/main.py index 87b96cdccb4ff7e9a845418bd5f1632bd2c33c53..892cbad98135c15dc82a2db870f15d12590ceafa 100644 --- a/PyTorch/contrib/cv/others/GAN_Pytorch/main.py +++ b/PyTorch/contrib/cv/others/GAN_Pytorch/main.py @@ -1,387 +1,387 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import argparse -import os -import sys -import numpy as np -import time -import matplotlib.pyplot as plt -import datetime - -import torchvision.transforms as transforms -from torchvision.utils import save_image -from torchvision import datasets - -import torch -from torch.autograd import Variable -import torch.nn as nn -import torch.multiprocessing as mp -from torch.nn.parallel import DistributedDataParallel as DDP -from torch.utils.data import DataLoader - -from models import Generator, Discriminator - -try: - import apex - from apex import amp -except ImportError: - amp = None - -def flush_print(func): - def new_print(*args, **kwargs): - func(*args, **kwargs) - sys.stdout.flush() - return new_print - -print = flush_print(print) - - -def train_one_epoch(generator, discriminator, optimizer_G, optimizer_D, adversarial_loss, - epoch, args, dataloader, Tensor,LOSS_G,LOSS_D,device): - batch_time = AverageMeter('Time', ':6.3f', start_count_index=5) - G_loss = AverageMeter('g_loss', ':6.3f', start_count_index=0) - D_loss = AverageMeter('d_loss', ':6. 3f', start_count_index=0) - - for i, (imgs,_) in enumerate(dataloader): - - start_time = time.time() - valid = Variable(Tensor(imgs.size(0), 1).fill_(1.0), requires_grad=False) - fake = Variable(Tensor(imgs.size(0), 1).fill_(0.0), requires_grad=False) - - # Configure input - real_imgs = Variable(imgs.type(Tensor)).to(device) - - # ----------------- - # Train Generator - # ----------------- - - optimizer_G.zero_grad() - - # Sample noise as generator input - z = Variable(Tensor(np.random.normal(0, 1, (imgs.shape[0], args.latent_dim)))).to(device) - - # Generate a batch of images - gen_imgs = generator(z) - - # Loss measures generator's ability to fool the discriminator - g_loss = adversarial_loss(discriminator(gen_imgs), valid) - - G_loss.update(g_loss.item(), len(gen_imgs)) - if args.apex: - with amp.scale_loss(g_loss, optimizer_G) as scaled_loss: - scaled_loss.backward() - else: - G_loss.backward() - optimizer_G.step() - - # --------------------- - # Train Discriminator - # --------------------- - - optimizer_D.zero_grad() - - # Measure discriminator's ability to classify real from generated samples - real_loss = adversarial_loss(discriminator(real_imgs), valid) - fake_loss = adversarial_loss(discriminator(gen_imgs.detach()), fake) - d_loss = (real_loss + fake_loss) / 2 - - D_loss.update(d_loss.item(), len(real_imgs)) - if args.apex: - with amp.scale_loss(d_loss, optimizer_D) as scaled_loss: - scaled_loss.backward() - else: - d_loss.backward() - optimizer_D.step() - batch_time.update(time.time() - start_time) - if args.n_epochs == 1: - print( - "[Epoch %d] [step %d] [D loss: %f] [G loss: %f]" - % (epoch, i, D_loss.avg, G_loss.avg) - ) - batches_done = epoch * len(dataloader)+ i - if batches_done % args.sample_interval == 0 and args.is_master_node and args.n_epochs != 1: - save_image(gen_imgs.data[:25], "training_images/%d.png" % batches_done, nrow=5, normalize=True) - if args.is_master_node: - print( - "[Epoch %d] [D loss: %f] [G loss: %f] FPS:%.3f" - % (epoch, D_loss.avg,G_loss.avg,args.batch_size*args.gpus/batch_time.avg) - ) - LOSS_G.append(G_loss.avg) - LOSS_D.append(D_loss.avg) - - - -def main(args): - - os.environ['MASTER_ADDR'] = args.addr - os.environ['MASTER_PORT'] = '29688' - - if args.apex: - if sys.version_info < (3, 0): - raise RuntimeError("Apex currently only supports Python 3. Aborting.") - if amp is None: - raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " - "to enable mixed-precision training.") - # if args.output_dir: - # os.mkdir(args.output_dir) - - if args.distributed: - - mp.spawn(main_worker, nprocs=args.gpus, - args=(args,)) - else: - main_worker(args.gpus, args) - -def main_worker(nprocs, args): - local_rank = 0 - if args.distributed: - torch.distributed.init_process_group(backend="hccl", - init_method='env://', - world_size=args.nodes * args.gpus, - rank=nprocs) - local_rank = torch.distributed.get_rank() - args.is_master_node = not args.distributed or local_rank == 0 - if args.is_master_node: - print(args) - args.device_id = args.device_id + local_rank - print('device_id=', args.device_id) - device = torch.device(f'npu:{args.device_id}') # npu - torch.npu.set_device(device) # for npu - print("Downloading dataset...") - # Configure data loader - os.makedirs("../data/mnist", exist_ok=True) - train_dataset = datasets.MNIST( - "../../data/mnist", - train=True, - download=True, - transform=transforms.Compose( - [transforms.Resize(args.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])] - )) - - print("Creating dataloader") - - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler( - train_dataset) - else: - train_sampler = None - dataloader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, shuffle=( - train_sampler is None), - num_workers=args.workers, pin_memory=True, sampler=train_sampler) - - if args.is_master_node: - print("Creating model") - # create model - Tensor = torch.npu.FloatTensor - LOSS_G=[] - LOSS_D=[] - os.makedirs("../output", exist_ok=True) - os.chdir("../output") - generator = Generator() - discriminator = Discriminator() - if args.pretrained: - print("=> using pre-trained model GAN") - generator = Generator() - discriminator = Discriminator() - print("loading model of yours...") - checkpoint = torch.load(r'./checkpoint.pth.tar',map_location='cpu') - from collections import OrderedDict - new_state_dict_g = OrderedDict() - new_state_dict_d = OrderedDict() - for k, v in checkpoint['state_dict_G'].items(): - name = k.replace("module.", "") # remove `module.` - new_state_dict_g[name] = v - for k, v in checkpoint['state_dict_D'].items(): - name = k.replace("module.", "") # remove `module.` - new_state_dict_d[name] = v - # load params - generator.load_state_dict(new_state_dict_g) - discriminator.load_state_dict(new_state_dict_d) - LOSS_D = checkpoint['loss_d'] - LOSS_G = checkpoint['loss_g'] - args.start_epoch = checkpoint['epoch'] - - generator = generator.to(device) - discriminator = discriminator.to(device) - - adversarial_loss = nn.BCELoss().to(device) - - optimizer_G = apex.optimizers.NpuFusedAdam(generator.parameters(), lr=args.lr, betas=(args.b1, args.b2)) - optimizer_D = apex.optimizers.NpuFusedAdam(discriminator.parameters(), lr=args.lr, betas=(args.b1, args.b2)) - - if args.apex: - amp.register_float_function(torch, 'sigmoid') - amp.register_half_function(torch, 'addmm') - generator, optimizer_G = amp.initialize(generator, optimizer_G, - opt_level='O1', loss_scale=128,combine_grad=True) - - discriminator, optimizer_D = amp.initialize(discriminator, optimizer_D, - opt_level='O1', loss_scale=128,combine_grad=True) - - if args.distributed: - generator = DDP(generator, device_ids=[local_rank], broadcast_buffers=False) - discriminator = DDP(discriminator, device_ids=[local_rank], broadcast_buffers=False) - - if args.test_only : - Tensor = torch.npu.FloatTensor - generator = Generator().npu() - checkpoint = torch.load(r'./checkpoint.pth.tar', map_location='cpu') - - loss_d = checkpoint['loss_d'] - loss_g = checkpoint['loss_g'] - x = range(len(loss_d)) - plt.figure() - - plt.plot(x, loss_d, color='r', label="loss_d") - plt.plot(x, loss_g, color='g', label="loss_g") - plt.legend() - plt.xlabel('epoch') - plt.ylabel('value') - plt.savefig('LOSS_{}p_{}_{}.jpg'.format(args.gpus, args.lr, args.batch_size)) - - # create new OrderedDict that does not contain `module.` - from collections import OrderedDict - new_state_dict = OrderedDict() - for k, v in checkpoint['state_dict_G'].items(): - name = k.replace("module.", "") # remove `module.` - new_state_dict[name] = v - # load params - generator.load_state_dict(new_state_dict) - os.makedirs("image", exist_ok=True) - for i in range(200): - z = Variable(Tensor(np.random.normal(0, 1, (64, 100)))).npu() - - # Generate a batch of images - gen_imgs = generator(z) - - save_image(gen_imgs.data[:25], "image/%d.png" % i, nrow=5, normalize=True) - print("Generate done!") - return - - if args.is_master_node: - print("Start training") - start_time = time.time() - os.makedirs("training_images",exist_ok=True) - for epoch in range(args.start_epoch, args.n_epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - - # train for one epoch - train_one_epoch(generator, discriminator, optimizer_G, optimizer_D, adversarial_loss, - epoch, args, dataloader,Tensor, LOSS_G,LOSS_D,device) - - if epoch == 50 or epoch == 199: - if args.apex and args.is_master_node: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': 'GAN', - 'state_dict_G': generator.state_dict(), - 'state_dict_D': discriminator.state_dict(), - 'optimizer_G': optimizer_G.state_dict(), - 'optimizer_D': optimizer_D.state_dict(), - 'loss_g': LOSS_G, - 'loss_d': LOSS_D, - 'apex': amp.state_dict() - }) - elif args.is_master_node: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': 'GAN', - 'state_dict_G': generator.state_dict(), - 'state_dict_D': discriminator.state_dict(), - 'optimizer_G': optimizer_G.state_dict(), - 'optimizer_D': optimizer_D.state_dict(), - 'loss_g': LOSS_G, - 'loss_d': LOSS_D - }) - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=int(total_time))) - if args.is_master_node: - print('Training time {}'.format(total_time_str)) - -def save_checkpoint(state, filename='./checkpoint.pth.tar'): - torch.save(state, filename) - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self, name, fmt=':f', start_count_index=10): - self.name = name - self.fmt = fmt - self.reset() - self.start_count_index = start_count_index - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - if self.count == 0: - self.N = n - - self.val = val - self.count += n - if self.count > (self.start_count_index * self.N): - self.sum += val * n - self.avg = self.sum / (self.count - self.start_count_index * self.N) - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - -def parse_args(): - parser = argparse.ArgumentParser(description='PyTorch GAN Training') - parser.add_argument("--n_epochs", type=int, default=200, help="number of epochs of training") - parser.add_argument("--batch_size", type=int, default=128, help="size of the batches") - parser.add_argument("--lr", type=float, default=0.0008, help="adam: learning rate") - parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient") - parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient") - parser.add_argument("--sample_interval", type=int, default=400, help="interval betwen image samples") - parser.add_argument("--latent_dim", type=int, default=100, help="dimensionality of the latent space") - parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension") - parser.add_argument("--channels", type=int, default=1, help="number of image channels") - parser.add_argument("--gpus", type=int, default=8, help="num of gpus of per node") - parser.add_argument("--nodes", type=int, default=1) - parser.add_argument('--device_id', default=0, type=int, help='device id') - parser.add_argument("--test_only", type=int, default=None, help="only generate images") - parser.add_argument('--start_epoch', default=0, type=int, metavar='N', - help='manual epoch number (useful on restarts)') - parser.add_argument('--resume', default='', type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') - parser.add_argument('--pretrained', dest='pretrained', action='store_true', - help='use pre-trained model') - - parser.add_argument('--distributed', action='store_true', - help='Use multi-processing distributed training to launch ' - 'N processes per node, which has N GPUs. This is the ' - 'fastest way to use PyTorch for either single node or ' - 'multi node data parallel training') - ## for ascend 910 - parser.add_argument('--addr', default='127.0.0.1', - type=str, help='master addr') - parser.add_argument('--workers', default=16, type=int, - help='numbers of worker') - parser.add_argument('--apex', default=False, action='store_true', - help='use apex to train the model') - args = parser.parse_args() - return args -if __name__ == '__main__': - args = parse_args() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import argparse +import os +import sys +import numpy as np +import time +import matplotlib.pyplot as plt +import datetime + +import torchvision.transforms as transforms +from torchvision.utils import save_image +from torchvision import datasets + +import torch +from torch.autograd import Variable +import torch.nn as nn +import torch.multiprocessing as mp +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.data import DataLoader + +from models import Generator, Discriminator + +try: + import apex + from apex import amp +except ImportError: + amp = None + +def flush_print(func): + def new_print(*args, **kwargs): + func(*args, **kwargs) + sys.stdout.flush() + return new_print + +print = flush_print(print) + + +def train_one_epoch(generator, discriminator, optimizer_G, optimizer_D, adversarial_loss, + epoch, args, dataloader, Tensor,LOSS_G,LOSS_D,device): + batch_time = AverageMeter('Time', ':6.3f', start_count_index=5) + G_loss = AverageMeter('g_loss', ':6.3f', start_count_index=0) + D_loss = AverageMeter('d_loss', ':6. 3f', start_count_index=0) + + for i, (imgs,_) in enumerate(dataloader): + + start_time = time.time() + valid = Variable(Tensor(imgs.size(0), 1).fill_(1.0), requires_grad=False) + fake = Variable(Tensor(imgs.size(0), 1).fill_(0.0), requires_grad=False) + + # Configure input + real_imgs = Variable(imgs.type(Tensor)).to(device) + + # ----------------- + # Train Generator + # ----------------- + + optimizer_G.zero_grad() + + # Sample noise as generator input + z = Variable(Tensor(np.random.normal(0, 1, (imgs.shape[0], args.latent_dim)))).to(device) + + # Generate a batch of images + gen_imgs = generator(z) + + # Loss measures generator's ability to fool the discriminator + g_loss = adversarial_loss(discriminator(gen_imgs), valid) + + G_loss.update(g_loss.item(), len(gen_imgs)) + if args.apex: + with amp.scale_loss(g_loss, optimizer_G) as scaled_loss: + scaled_loss.backward() + else: + G_loss.backward() + optimizer_G.step() + + # --------------------- + # Train Discriminator + # --------------------- + + optimizer_D.zero_grad() + + # Measure discriminator's ability to classify real from generated samples + real_loss = adversarial_loss(discriminator(real_imgs), valid) + fake_loss = adversarial_loss(discriminator(gen_imgs.detach()), fake) + d_loss = (real_loss + fake_loss) / 2 + + D_loss.update(d_loss.item(), len(real_imgs)) + if args.apex: + with amp.scale_loss(d_loss, optimizer_D) as scaled_loss: + scaled_loss.backward() + else: + d_loss.backward() + optimizer_D.step() + batch_time.update(time.time() - start_time) + if args.n_epochs == 1: + print( + "[Epoch %d] [step %d] [D loss: %f] [G loss: %f]" + % (epoch, i, D_loss.avg, G_loss.avg) + ) + batches_done = epoch * len(dataloader)+ i + if batches_done % args.sample_interval == 0 and args.is_master_node and args.n_epochs != 1: + save_image(gen_imgs.data[:25], "training_images/%d.png" % batches_done, nrow=5, normalize=True) + if args.is_master_node: + print( + "[Epoch %d] [D loss: %f] [G loss: %f] FPS:%.3f" + % (epoch, D_loss.avg,G_loss.avg,args.batch_size*args.gpus/batch_time.avg) + ) + LOSS_G.append(G_loss.avg) + LOSS_D.append(D_loss.avg) + + + +def main(args): + + os.environ['MASTER_ADDR'] = args.addr + os.environ['MASTER_PORT'] = '29688' + + if args.apex: + if sys.version_info < (3, 0): + raise RuntimeError("Apex currently only supports Python 3. Aborting.") + if amp is None: + raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " + "to enable mixed-precision training.") + # if args.output_dir: + # os.mkdir(args.output_dir) + + if args.distributed: + + mp.spawn(main_worker, nprocs=args.gpus, + args=(args,)) + else: + main_worker(args.gpus, args) + +def main_worker(nprocs, args): + local_rank = 0 + if args.distributed: + torch.distributed.init_process_group(backend="hccl", + init_method='env://', + world_size=args.nodes * args.gpus, + rank=nprocs) + local_rank = torch.distributed.get_rank() + args.is_master_node = not args.distributed or local_rank == 0 + if args.is_master_node: + print(args) + args.device_id = args.device_id + local_rank + print('device_id=', args.device_id) + device = torch.device(f'npu:{args.device_id}') # npu + torch.npu.set_device(device) # for npu + print("Downloading dataset...") + # Configure data loader + os.makedirs("../data/mnist", exist_ok=True) + train_dataset = datasets.MNIST( + "../../data/mnist", + train=True, + download=True, + transform=transforms.Compose( + [transforms.Resize(args.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])] + )) + + print("Creating dataloader") + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset) + else: + train_sampler = None + dataloader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, shuffle=( + train_sampler is None), + num_workers=args.workers, pin_memory=True, sampler=train_sampler) + + if args.is_master_node: + print("Creating model") + # create model + Tensor = torch.npu.FloatTensor + LOSS_G=[] + LOSS_D=[] + os.makedirs("../output", exist_ok=True) + os.chdir("../output") + generator = Generator() + discriminator = Discriminator() + if args.pretrained: + print("=> using pre-trained model GAN") + generator = Generator() + discriminator = Discriminator() + print("loading model of yours...") + checkpoint = torch.load(r'./checkpoint.pth.tar',map_location='cpu') + from collections import OrderedDict + new_state_dict_g = OrderedDict() + new_state_dict_d = OrderedDict() + for k, v in checkpoint['state_dict_G'].items(): + name = k.replace("module.", "") # remove `module.` + new_state_dict_g[name] = v + for k, v in checkpoint['state_dict_D'].items(): + name = k.replace("module.", "") # remove `module.` + new_state_dict_d[name] = v + # load params + generator.load_state_dict(new_state_dict_g) + discriminator.load_state_dict(new_state_dict_d) + LOSS_D = checkpoint['loss_d'] + LOSS_G = checkpoint['loss_g'] + args.start_epoch = checkpoint['epoch'] + + generator = generator.to(device) + discriminator = discriminator.to(device) + + adversarial_loss = nn.BCELoss().to(device) + + optimizer_G = apex.optimizers.NpuFusedAdam(generator.parameters(), lr=args.lr, betas=(args.b1, args.b2)) + optimizer_D = apex.optimizers.NpuFusedAdam(discriminator.parameters(), lr=args.lr, betas=(args.b1, args.b2)) + + if args.apex: + amp.register_float_function(torch, 'sigmoid') + amp.register_half_function(torch, 'addmm') + generator, optimizer_G = amp.initialize(generator, optimizer_G, + opt_level='O1', loss_scale=128,combine_grad=True) + + discriminator, optimizer_D = amp.initialize(discriminator, optimizer_D, + opt_level='O1', loss_scale=128,combine_grad=True) + + if args.distributed: + generator = DDP(generator, device_ids=[local_rank], broadcast_buffers=False) + discriminator = DDP(discriminator, device_ids=[local_rank], broadcast_buffers=False) + + if args.test_only : + Tensor = torch.npu.FloatTensor + generator = Generator().npu() + checkpoint = torch.load(r'./checkpoint.pth.tar', map_location='cpu') + + loss_d = checkpoint['loss_d'] + loss_g = checkpoint['loss_g'] + x = range(len(loss_d)) + plt.figure() + + plt.plot(x, loss_d, color='r', label="loss_d") + plt.plot(x, loss_g, color='g', label="loss_g") + plt.legend() + plt.xlabel('epoch') + plt.ylabel('value') + plt.savefig('LOSS_{}p_{}_{}.jpg'.format(args.gpus, args.lr, args.batch_size)) + + # create new OrderedDict that does not contain `module.` + from collections import OrderedDict + new_state_dict = OrderedDict() + for k, v in checkpoint['state_dict_G'].items(): + name = k.replace("module.", "") # remove `module.` + new_state_dict[name] = v + # load params + generator.load_state_dict(new_state_dict) + os.makedirs("image", exist_ok=True) + for i in range(200): + z = Variable(Tensor(np.random.normal(0, 1, (64, 100)))).npu() + + # Generate a batch of images + gen_imgs = generator(z) + + save_image(gen_imgs.data[:25], "image/%d.png" % i, nrow=5, normalize=True) + print("Generate done!") + return + + if args.is_master_node: + print("Start training") + start_time = time.time() + os.makedirs("training_images",exist_ok=True) + for epoch in range(args.start_epoch, args.n_epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + + # train for one epoch + train_one_epoch(generator, discriminator, optimizer_G, optimizer_D, adversarial_loss, + epoch, args, dataloader,Tensor, LOSS_G,LOSS_D,device) + + if epoch == 50 or epoch == 199: + if args.apex and args.is_master_node: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': 'GAN', + 'state_dict_G': generator.state_dict(), + 'state_dict_D': discriminator.state_dict(), + 'optimizer_G': optimizer_G.state_dict(), + 'optimizer_D': optimizer_D.state_dict(), + 'loss_g': LOSS_G, + 'loss_d': LOSS_D, + 'apex': amp.state_dict() + }) + elif args.is_master_node: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': 'GAN', + 'state_dict_G': generator.state_dict(), + 'state_dict_D': discriminator.state_dict(), + 'optimizer_G': optimizer_G.state_dict(), + 'optimizer_D': optimizer_D.state_dict(), + 'loss_g': LOSS_G, + 'loss_d': LOSS_D + }) + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + if args.is_master_node: + print('Training time {}'.format(total_time_str)) + +def save_checkpoint(state, filename='./checkpoint.pth.tar'): + torch.save(state, filename) + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f', start_count_index=10): + self.name = name + self.fmt = fmt + self.reset() + self.start_count_index = start_count_index + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + if self.count == 0: + self.N = n + + self.val = val + self.count += n + if self.count > (self.start_count_index * self.N): + self.sum += val * n + self.avg = self.sum / (self.count - self.start_count_index * self.N) + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + +def parse_args(): + parser = argparse.ArgumentParser(description='PyTorch GAN Training') + parser.add_argument("--n_epochs", type=int, default=200, help="number of epochs of training") + parser.add_argument("--batch_size", type=int, default=128, help="size of the batches") + parser.add_argument("--lr", type=float, default=0.0008, help="adam: learning rate") + parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient") + parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient") + parser.add_argument("--sample_interval", type=int, default=400, help="interval betwen image samples") + parser.add_argument("--latent_dim", type=int, default=100, help="dimensionality of the latent space") + parser.add_argument("--img_size", type=int, default=28, help="size of each image dimension") + parser.add_argument("--channels", type=int, default=1, help="number of image channels") + parser.add_argument("--gpus", type=int, default=8, help="num of gpus of per node") + parser.add_argument("--nodes", type=int, default=1) + parser.add_argument('--device_id', default=0, type=int, help='device id') + parser.add_argument("--test_only", type=int, default=None, help="only generate images") + parser.add_argument('--start_epoch', default=0, type=int, metavar='N', + help='manual epoch number (useful on restarts)') + parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') + parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') + + parser.add_argument('--distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') + ## for ascend 910 + parser.add_argument('--addr', default='127.0.0.1', + type=str, help='master addr') + parser.add_argument('--workers', default=16, type=int, + help='numbers of worker') + parser.add_argument('--apex', default=False, action='store_true', + help='use apex to train the model') + args = parser.parse_args() + return args +if __name__ == '__main__': + args = parse_args() main(args) \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/GAN_Pytorch/models.py b/PyTorch/contrib/cv/others/GAN_Pytorch/models.py index 6e6f1a998465dd292d956195ce630082f74df933..f04632ce980380e4a1060658946c11c4b323847b 100644 --- a/PyTorch/contrib/cv/others/GAN_Pytorch/models.py +++ b/PyTorch/contrib/cv/others/GAN_Pytorch/models.py @@ -1,68 +1,68 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import torch.nn as nn -import numpy as np - -channels = 1 -image_size = 28 -img_shape =(channels,image_size,image_size) -latent_dim = 100 - -class Generator(nn.Module): - def __init__(self): - super(Generator, self).__init__() - - def block(in_feat, out_feat, normalize=True): - layers = [nn.Linear(in_feat, out_feat)] - if normalize: - layers.append(nn.BatchNorm1d(out_feat, 0.8)) - layers.append(nn.LeakyReLU(0.2, inplace=True)) - return layers - - self.model = nn.Sequential( - *block(latent_dim, 128, normalize=False), - *block(128, 256), - *block(256, 512), - *block(512, 1024), - nn.Linear(1024, int(np.prod(img_shape))), - nn.Tanh() - ) - - def forward(self, z): - img = self.model(z) - img = img.view(img.size(0), *img_shape) - return img - -class Discriminator(nn.Module): - def __init__(self): - super(Discriminator, self).__init__() - - self.model = nn.Sequential( - nn.Linear(int(np.prod(img_shape)), 512), - nn.LeakyReLU(0.2, inplace=True), - nn.Linear(512, 256), - nn.LeakyReLU(0.2, inplace=True), - nn.Linear(256, 1), - nn.Sigmoid(), - ) - - def forward(self, img): - img_flat = img.view(img.size(0), -1) - validity = self.model(img_flat) - - return validity - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch.nn as nn +import numpy as np + +channels = 1 +image_size = 28 +img_shape =(channels,image_size,image_size) +latent_dim = 100 + +class Generator(nn.Module): + def __init__(self): + super(Generator, self).__init__() + + def block(in_feat, out_feat, normalize=True): + layers = [nn.Linear(in_feat, out_feat)] + if normalize: + layers.append(nn.BatchNorm1d(out_feat, 0.8)) + layers.append(nn.LeakyReLU(0.2, inplace=True)) + return layers + + self.model = nn.Sequential( + *block(latent_dim, 128, normalize=False), + *block(128, 256), + *block(256, 512), + *block(512, 1024), + nn.Linear(1024, int(np.prod(img_shape))), + nn.Tanh() + ) + + def forward(self, z): + img = self.model(z) + img = img.view(img.size(0), *img_shape) + return img + +class Discriminator(nn.Module): + def __init__(self): + super(Discriminator, self).__init__() + + self.model = nn.Sequential( + nn.Linear(int(np.prod(img_shape)), 512), + nn.LeakyReLU(0.2, inplace=True), + nn.Linear(512, 256), + nn.LeakyReLU(0.2, inplace=True), + nn.Linear(256, 1), + nn.Sigmoid(), + ) + + def forward(self, img): + img_flat = img.view(img.size(0), -1) + validity = self.model(img_flat) + + return validity + + diff --git a/PyTorch/contrib/cv/others/GAN_Pytorch/modelzoo_level.txt b/PyTorch/contrib/cv/others/GAN_Pytorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/others/GAN_Pytorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/others/GAN_Pytorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/Pix2Pix/README.md b/PyTorch/contrib/cv/others/Pix2Pix/README.md index f1b3173677ba3fe268fecba18ca2ac5fb7df269a..c76a5fa338a64b429b1698e01e7631aa3c700494 100644 --- a/PyTorch/contrib/cv/others/Pix2Pix/README.md +++ b/PyTorch/contrib/cv/others/Pix2Pix/README.md @@ -1,38 +1,38 @@ -# Pix2Pix - url=https://gitee.com/iiiimp/modelzoo/tree/master/contrib/PyTorch/Research/cv/gan/Pix2Pix - branch=master - -# 精度性能 - - | 名称 | 精度 | 性能 | - | :------: | :------: | :------: | - | GPU-1p | - | 15 | - | GPU-8p | - | 31 | - | NPU-1p | - | 8 | - | NPU-8p | - | 8 | -# 自验报告 - - # 1p train perf - # 是否正确输出了性能log文件 - bash ./test/train_performance_1p.sh\ --data_path=./datasets/facades - # 验收结果: OK - # 备注: 目标性能8FPS;验收测试性能8FPS; - - # 8p train perf - # 是否正确输出了性能log文件 - bash ./test/train_performance_8p.sh\ --data_path=./datasets/facades - # 验收结果: OK - # 备注: 目标性能15FPS;验收测试性能8PS; - - # 8p train full - # 是否正确输出了性能精度log文件,是否正确保存了模型文件 - bash ./test/train_full_8p.sh\ --data_path=./datasets/facades - # 验收结果: OK - # 备注:直接看图片效果 - - # 8p eval - # 是否正确输出了性能精度log文件 - bash ./test/train_eval_8p.sh\ --data_path=./datasets/facades --pth_path=./checkpoints/facades_pix2pix_npu_8p_full - # 验收结果: OK - # 备注:直接看图片效果 - +# Pix2Pix + url=https://gitee.com/iiiimp/modelzoo/tree/master/contrib/PyTorch/Research/cv/gan/Pix2Pix + branch=master + +# 精度性能 + + | 名称 | 精度 | 性能 | + | :------: | :------: | :------: | + | GPU-1p | - | 15 | + | GPU-8p | - | 31 | + | NPU-1p | - | 8 | + | NPU-8p | - | 8 | +# 自验报告 + + # 1p train perf + # 是否正确输出了性能log文件 + bash ./test/train_performance_1p.sh\ --data_path=./datasets/facades + # 验收结果: OK + # 备注: 目标性能8FPS;验收测试性能8FPS; + + # 8p train perf + # 是否正确输出了性能log文件 + bash ./test/train_performance_8p.sh\ --data_path=./datasets/facades + # 验收结果: OK + # 备注: 目标性能15FPS;验收测试性能8PS; + + # 8p train full + # 是否正确输出了性能精度log文件,是否正确保存了模型文件 + bash ./test/train_full_8p.sh\ --data_path=./datasets/facades + # 验收结果: OK + # 备注:直接看图片效果 + + # 8p eval + # 是否正确输出了性能精度log文件 + bash ./test/train_eval_8p.sh\ --data_path=./datasets/facades --pth_path=./checkpoints/facades_pix2pix_npu_8p_full + # 验收结果: OK + # 备注:直接看图片效果 + diff --git a/PyTorch/contrib/cv/others/Pix2Pix/modelzoo_level.txt b/PyTorch/contrib/cv/others/Pix2Pix/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/others/Pix2Pix/modelzoo_level.txt +++ b/PyTorch/contrib/cv/others/Pix2Pix/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/Pix2Pix/precision.py b/PyTorch/contrib/cv/others/Pix2Pix/precision.py index c14e8e8abc80d21f8af8667757ddafbdc0e3623f..99dc5002230f276cde25dc3bf83c514a26bb4c6e 100644 --- a/PyTorch/contrib/cv/others/Pix2Pix/precision.py +++ b/PyTorch/contrib/cv/others/Pix2Pix/precision.py @@ -1,198 +1,198 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Copyright (c) Soumith Chintala 2016, -# All rights reserved -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- -"""用于精度比对 -""" - -import torch -import torch.nn as nn -import torchvision -import apex -from apex import amp -import copy -from models import networks - -##### 需自行改写部分 start ##### -# 获得模型 -def get_model(): - model = networks.define_G(3, 3, 64, 'unet_256', 'instance', - True, 'normal', 0.02, '[0]') - # model = networks.define_D(6, 64, 'basic', - # 3, 'instance','normal', 0.02, '[0]') - # 用于避免BN或者Dropout带来的影响,如果遇到无法evalbackward的现象,请注掉该行 - # model.eval() - - return model - -# 获得输入tensor -input_tensor = torch.randn(1, 3, 256, 256) -# input_tensor = torch.randn(1, 6, 256, 256) - -# 设置npu_device -npu_device = 'npu:0' - -# 设置amp -AMP_MODE = True - -# 设置NPU prof 文件输出 -NPU_PROF = True - -##### 需自行改写部分 end ##### - -def cri_func(x): - base_func = nn.CrossEntropyLoss() - shape_list = x.shape - N = shape_list[0] - R = 1 - if len(shape_list) > 1: - for r in shape_list[1:]: - R *= r - T = torch.randint(0,R, size=(N,)).to(x.device) - if str(T.device).startswith('npu'): - T = T.int() - return base_func(x.reshape(N, -1), T) - -# 设置hook -def hook_func(name, save_dict, module): - def hook_function(module, inputs, outputs): - inputs_key = name + '_inputs' - idx = 0 - while inputs_key in save_dict: - inputs_key = inputs_key.split('-')[0] + '-%d'%idx - idx +=1 - save_dict[inputs_key] = inputs - - outputs_key = name + '_outputs' - idx = 0 - while outputs_key in save_dict: - outputs_key = outputs_key.split('-')[0] + '-%d'%idx - idx +=1 - save_dict[outputs_key] = outputs - return hook_function - -##### CPU ##### -# CPU固定输入和权重 -model = get_model() -optimizer = torch.optim.SGD(model.parameters(), 0.1) -state_dict = copy.deepcopy(model.state_dict()) - -# CPU注册hook,cpu_dict用于存储对比对象 -cpu_dict = {} -for name, module in model.named_modules(): - module.register_forward_hook(hook_func('[forward]:' + name, cpu_dict, module)) - module.register_backward_hook(hook_func('[backward]:' + name, cpu_dict, module)) - -# CPU运行正反向,获取正反向每个module的输入输出和所有参数的grad -out = model(input_tensor) -loss = cri_func(out) -optimizer.zero_grad() -loss.backward() -optimizer.step() -for name, param in model.named_parameters(): - cpu_dict["[grad]:" + name] = param.grad - -##### NPU ##### -# 重新定义模型,清理模型状态,并加装权重,保持初始化一致 -model = get_model() -optimizer = torch.optim.SGD(model.parameters(), 0.1) -model.load_state_dict(state_dict) - -# NPU注册hook,npu_dict用于存储对比对象 -npu_dict = {} -for name, module in model.named_modules(): - module.register_forward_hook(hook_func('[forward]:' + name, npu_dict, module)) - module.register_backward_hook(hook_func('[backward]:' + name, npu_dict, module)) - -# 将model和input_tensor放到npu -torch.npu.set_device(npu_device) -model = model.npu() -input_tensor = input_tensor.npu() - -# amp可选项,不适用请注释 -if AMP_MODE: - optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), 0.1) - model, optimizer = amp.initialize(model, optimizer, opt_level='O2', loss_scale=1.0, combine_grad=True) - -# NPU运行正反向,获取正反向每个module的输入输出和所有参数的grad -out = model(input_tensor) -loss = cri_func(out) -optimizer.zero_grad() -if AMP_MODE: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() -else: - loss.backward() -optimizer.step() -for name, param in model.named_parameters(): - npu_dict["[grad]:" + name] = param.grad - - -##### ComPare ##### -# 递归得到对比值 -def compare(x1, x2, prefix=''): - if isinstance(x1, tuple): - if x1: - for idx in range(len(x1)): - try: - compare(x1[idx], x2[idx], prefix=prefix + '.%d' % idx) - except Exception as e: - # print(str(e)) - print(prefix, 'failed.') - elif isinstance(x1, torch.Tensor) and isinstance(x2, torch.Tensor): - try: - l1_error = (x1.half().float() - x2.cpu()).abs().mean() - rel_error = l1_error / (x1.abs().mean()) - print(prefix, 'l1_error: ', l1_error, 'rel_error', rel_error) - if l1_error * rel_error > 10 : - print('\n###\n',prefix, 'should checked!','\n###\n') - except Exception as e: - # print(str(e)) - print(prefix, 'failed.') - -for k in cpu_dict: - compare(cpu_dict[k], npu_dict[k], prefix=k) - -# 需要profiling的时候额外输出一次 -if NPU_PROF: - with torch.autograd.profiler.profile(use_npu=True) as prof: - out = model(input_tensor) - loss = cri_func(out) - optimizer.zero_grad() - if AMP_MODE: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - prof.export_chrome_trace("netD output.prof") # "output.prof"为输出文件地址 - - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright (c) Soumith Chintala 2016, +# All rights reserved +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +"""用于精度比对 +""" + +import torch +import torch.nn as nn +import torchvision +import apex +from apex import amp +import copy +from models import networks + +##### 需自行改写部分 start ##### +# 获得模型 +def get_model(): + model = networks.define_G(3, 3, 64, 'unet_256', 'instance', + True, 'normal', 0.02, '[0]') + # model = networks.define_D(6, 64, 'basic', + # 3, 'instance','normal', 0.02, '[0]') + # 用于避免BN或者Dropout带来的影响,如果遇到无法evalbackward的现象,请注掉该行 + # model.eval() + + return model + +# 获得输入tensor +input_tensor = torch.randn(1, 3, 256, 256) +# input_tensor = torch.randn(1, 6, 256, 256) + +# 设置npu_device +npu_device = 'npu:0' + +# 设置amp +AMP_MODE = True + +# 设置NPU prof 文件输出 +NPU_PROF = True + +##### 需自行改写部分 end ##### + +def cri_func(x): + base_func = nn.CrossEntropyLoss() + shape_list = x.shape + N = shape_list[0] + R = 1 + if len(shape_list) > 1: + for r in shape_list[1:]: + R *= r + T = torch.randint(0,R, size=(N,)).to(x.device) + if str(T.device).startswith('npu'): + T = T.int() + return base_func(x.reshape(N, -1), T) + +# 设置hook +def hook_func(name, save_dict, module): + def hook_function(module, inputs, outputs): + inputs_key = name + '_inputs' + idx = 0 + while inputs_key in save_dict: + inputs_key = inputs_key.split('-')[0] + '-%d'%idx + idx +=1 + save_dict[inputs_key] = inputs + + outputs_key = name + '_outputs' + idx = 0 + while outputs_key in save_dict: + outputs_key = outputs_key.split('-')[0] + '-%d'%idx + idx +=1 + save_dict[outputs_key] = outputs + return hook_function + +##### CPU ##### +# CPU固定输入和权重 +model = get_model() +optimizer = torch.optim.SGD(model.parameters(), 0.1) +state_dict = copy.deepcopy(model.state_dict()) + +# CPU注册hook,cpu_dict用于存储对比对象 +cpu_dict = {} +for name, module in model.named_modules(): + module.register_forward_hook(hook_func('[forward]:' + name, cpu_dict, module)) + module.register_backward_hook(hook_func('[backward]:' + name, cpu_dict, module)) + +# CPU运行正反向,获取正反向每个module的输入输出和所有参数的grad +out = model(input_tensor) +loss = cri_func(out) +optimizer.zero_grad() +loss.backward() +optimizer.step() +for name, param in model.named_parameters(): + cpu_dict["[grad]:" + name] = param.grad + +##### NPU ##### +# 重新定义模型,清理模型状态,并加装权重,保持初始化一致 +model = get_model() +optimizer = torch.optim.SGD(model.parameters(), 0.1) +model.load_state_dict(state_dict) + +# NPU注册hook,npu_dict用于存储对比对象 +npu_dict = {} +for name, module in model.named_modules(): + module.register_forward_hook(hook_func('[forward]:' + name, npu_dict, module)) + module.register_backward_hook(hook_func('[backward]:' + name, npu_dict, module)) + +# 将model和input_tensor放到npu +torch.npu.set_device(npu_device) +model = model.npu() +input_tensor = input_tensor.npu() + +# amp可选项,不适用请注释 +if AMP_MODE: + optimizer = apex.optimizers.NpuFusedSGD(model.parameters(), 0.1) + model, optimizer = amp.initialize(model, optimizer, opt_level='O2', loss_scale=1.0, combine_grad=True) + +# NPU运行正反向,获取正反向每个module的输入输出和所有参数的grad +out = model(input_tensor) +loss = cri_func(out) +optimizer.zero_grad() +if AMP_MODE: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() +else: + loss.backward() +optimizer.step() +for name, param in model.named_parameters(): + npu_dict["[grad]:" + name] = param.grad + + +##### ComPare ##### +# 递归得到对比值 +def compare(x1, x2, prefix=''): + if isinstance(x1, tuple): + if x1: + for idx in range(len(x1)): + try: + compare(x1[idx], x2[idx], prefix=prefix + '.%d' % idx) + except Exception as e: + # print(str(e)) + print(prefix, 'failed.') + elif isinstance(x1, torch.Tensor) and isinstance(x2, torch.Tensor): + try: + l1_error = (x1.half().float() - x2.cpu()).abs().mean() + rel_error = l1_error / (x1.abs().mean()) + print(prefix, 'l1_error: ', l1_error, 'rel_error', rel_error) + if l1_error * rel_error > 10 : + print('\n###\n',prefix, 'should checked!','\n###\n') + except Exception as e: + # print(str(e)) + print(prefix, 'failed.') + +for k in cpu_dict: + compare(cpu_dict[k], npu_dict[k], prefix=k) + +# 需要profiling的时候额外输出一次 +if NPU_PROF: + with torch.autograd.profiler.profile(use_npu=True) as prof: + out = model(input_tensor) + loss = cri_func(out) + optimizer.zero_grad() + if AMP_MODE: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + prof.export_chrome_trace("netD output.prof") # "output.prof"为输出文件地址 + + diff --git a/PyTorch/contrib/cv/others/Pix2Pix/pytorch_prof.py b/PyTorch/contrib/cv/others/Pix2Pix/pytorch_prof.py index afb06b4b978436cd03d61ba2f71e041e20d7dba7..d79a619de98089f04918148bd6f1d14c2b1eaa02 100644 --- a/PyTorch/contrib/cv/others/Pix2Pix/pytorch_prof.py +++ b/PyTorch/contrib/cv/others/Pix2Pix/pytorch_prof.py @@ -1,141 +1,141 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Copyright (c) Soumith Chintala 2016, -# All rights reserved -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- -"""pytorch_prof.py -""" - -import torch -import torch.optim as optim -import torch.nn as nn -import time -import argparse -from models import networks - -def build_model(): - # 请自定义模型并加载预训练模型 - # import torchvision - # model = torchvision.models.resnet50(pretrained=True) - model = networks.define_G(3, 3, 64, 'unet_256', 'instance', - True, 'normal', 0.02, '[0]') - return model - - -def get_raw_data(): - # input_tensor = torch.randn(2, 3, 224, 224) - input_tensor = torch.randn(1, 3, 256, 256) - return input_tensor - - -def criterion(x): - base_func = nn.CrossEntropyLoss() - shape_list = x.shape - N = shape_list[0] - R = 1 - if len(shape_list) > 1: - for r in shape_list[1:]: - R *= r - T = torch.randint(0,R, size=(N,)).to(x.device) - if str(T.device).startswith('npu'): - T = T.int() - return base_func(x.reshape(N, -1), T) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='PyTorch Prof') - parser.add_argument('--device', type=str, default='cpu', - help='set which type of device used. Support cuda:0(device_id), npu:0(device_id).') - parser.add_argument('--amp', default=False, action='store_true', - help='use amp during prof') - parser.add_argument('--loss-scale', default=64.0, type=float, - help='loss scale using in amp, default 64.0, -1 means dynamic') - parser.add_argument('--opt-level', default='O2', type=str, - help='opt-level using in amp, default O2') - parser.add_argument('--FusedSGD', default=False, action='store_true', - help='use FusedSGD during prof') - - args = parser.parse_args() - - # 1.准备工作 - if args.device.startswith('cuda'): - torch.cuda.set_device(args.device) - prof_kwargs = {'use_cuda': True} - elif args.device.startswith('npu'): - torch.npu.set_device(args.device) - prof_kwargs = {'use_npu': True} - else: - prof_kwargs = {} - - # 2.构建模型 - model = build_model() - if args.FusedSGD: - from apex.optimizers import NpuFusedSGD - optimizer = NpuFusedSGD(model.parameters(), lr=0.01) - model = model.to(args.device) - if args.amp: - from apex import amp - model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, - loss_scale=None if args.loss_scale == -1 else args.loss_scale, - combine_grad=True) - else: - optimizer = optim.SGD(model.parameters(), lr=0.01) - model = model.to(args.device) - if args.amp: - from apex import amp - model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, - loss_scale=None if args.loss_scale == -1 else args.loss_scale) - - # 3.生成input - input_tensor = get_raw_data() - input_tensor = input_tensor.to(args.device) - - # 先运行一次,保证prof得到的性能是正确的 - def run(): - output_tensor = model(input_tensor) - loss = criterion(output_tensor) - optimizer.zero_grad() - if args.amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - return loss - for i in range(5): - start_time = time.time() - loss = run() - print('iter: %d, loss: %.2f, time: %.2f'%(i, loss, (time.time() - start_time)*1000)) - - # 4. 执行forward+profiling - with torch.autograd.profiler.profile(**prof_kwargs) as prof: - run() - print(prof.key_averages().table()) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright (c) Soumith Chintala 2016, +# All rights reserved +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://spdx.org/licenses/BSD-3-Clause.html +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +"""pytorch_prof.py +""" + +import torch +import torch.optim as optim +import torch.nn as nn +import time +import argparse +from models import networks + +def build_model(): + # 请自定义模型并加载预训练模型 + # import torchvision + # model = torchvision.models.resnet50(pretrained=True) + model = networks.define_G(3, 3, 64, 'unet_256', 'instance', + True, 'normal', 0.02, '[0]') + return model + + +def get_raw_data(): + # input_tensor = torch.randn(2, 3, 224, 224) + input_tensor = torch.randn(1, 3, 256, 256) + return input_tensor + + +def criterion(x): + base_func = nn.CrossEntropyLoss() + shape_list = x.shape + N = shape_list[0] + R = 1 + if len(shape_list) > 1: + for r in shape_list[1:]: + R *= r + T = torch.randint(0,R, size=(N,)).to(x.device) + if str(T.device).startswith('npu'): + T = T.int() + return base_func(x.reshape(N, -1), T) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='PyTorch Prof') + parser.add_argument('--device', type=str, default='cpu', + help='set which type of device used. Support cuda:0(device_id), npu:0(device_id).') + parser.add_argument('--amp', default=False, action='store_true', + help='use amp during prof') + parser.add_argument('--loss-scale', default=64.0, type=float, + help='loss scale using in amp, default 64.0, -1 means dynamic') + parser.add_argument('--opt-level', default='O2', type=str, + help='opt-level using in amp, default O2') + parser.add_argument('--FusedSGD', default=False, action='store_true', + help='use FusedSGD during prof') + + args = parser.parse_args() + + # 1.准备工作 + if args.device.startswith('cuda'): + torch.cuda.set_device(args.device) + prof_kwargs = {'use_cuda': True} + elif args.device.startswith('npu'): + torch.npu.set_device(args.device) + prof_kwargs = {'use_npu': True} + else: + prof_kwargs = {} + + # 2.构建模型 + model = build_model() + if args.FusedSGD: + from apex.optimizers import NpuFusedSGD + optimizer = NpuFusedSGD(model.parameters(), lr=0.01) + model = model.to(args.device) + if args.amp: + from apex import amp + model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, + loss_scale=None if args.loss_scale == -1 else args.loss_scale, + combine_grad=True) + else: + optimizer = optim.SGD(model.parameters(), lr=0.01) + model = model.to(args.device) + if args.amp: + from apex import amp + model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, + loss_scale=None if args.loss_scale == -1 else args.loss_scale) + + # 3.生成input + input_tensor = get_raw_data() + input_tensor = input_tensor.to(args.device) + + # 先运行一次,保证prof得到的性能是正确的 + def run(): + output_tensor = model(input_tensor) + loss = criterion(output_tensor) + optimizer.zero_grad() + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + return loss + for i in range(5): + start_time = time.time() + loss = run() + print('iter: %d, loss: %.2f, time: %.2f'%(i, loss, (time.time() - start_time)*1000)) + + # 4. 执行forward+profiling + with torch.autograd.profiler.profile(**prof_kwargs) as prof: + run() + print(prof.key_averages().table()) prof.export_chrome_trace("pytorch_prof_%s.prof" % args.device) \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/RDN/Dockerfile b/PyTorch/contrib/cv/others/RDN/Dockerfile index 7e712fe1a166790798f57a2f2762c47394beb625..30a31af55804dd79571d2a36e6107a844cb7e549 100644 --- a/PyTorch/contrib/cv/others/RDN/Dockerfile +++ b/PyTorch/contrib/cv/others/RDN/Dockerfile @@ -1,5 +1,5 @@ -ARG FROM_IMAGE_NAME -FROM $FROM_IMAGE_NAME - -COPY requirements.txt . +ARG FROM_IMAGE_NAME +FROM $FROM_IMAGE_NAME + +COPY requirements.txt . RUN pip3.7 install -r requirements.txt \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/RDN/README.md b/PyTorch/contrib/cv/others/RDN/README.md index 6a58d58d887d38860d71c58b8523425e8ae603f8..bbda601483f9461c50c3d758fdd7efee9f99010e 100644 --- a/PyTorch/contrib/cv/others/RDN/README.md +++ b/PyTorch/contrib/cv/others/RDN/README.md @@ -1,51 +1,51 @@ -# RDN 训练 -# Residual Dense Network for Image Super-Resolution -This implements training of RDN on the DIV2K_x2 dataset. -- Reference implementation: -``` -url=https://github.com/yjn870/RDN-pytorch -``` - -## RDN Detail # - -As of the current date, Ascend-Pytorch is still inefficient for contiguous operations. -Therefore, RDN is re-implemented using semantics such as custom OP. - - -## Requirements # - -- Install PyTorch ([pytorch.org](http://pytorch.org)) -- `pip install -r requirements.txt` -- The DIV2k, Set5 Dataset can be downloaded from the links below.Move the datasets to directory ./data . - - Train Set : [Download DIV2k](https://www.dropbox.com/s/41sn4eie37hp6rh/DIV2K_x2.h5?dl=0) - - Test Set : [Download Set5](https://www.dropbox.com/s/pd52pkmaik1ri0h/rdn_x2.pth?dl=0) - -## Training # -To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset: - -```bash -# 1p train perf -bash test/train_performance_1p.sh - -# 8p train perf -bash test/train_performance_8p.sh - -# 8p train full -bash test/train_full_8p.sh - -# 8p eval -bash test/train_eval_8p.sh - -# finetuning -bash test/train_finetune_1p.sh -``` - -## RDN training result # - -| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | -| :------: | :------: | :------: | :------: | :------: | -| - | 240 | 1 | 800 | O1 | -| 37.95 | 1716 | 8 | 800 | O1 | - - - +# RDN 训练 +# Residual Dense Network for Image Super-Resolution +This implements training of RDN on the DIV2K_x2 dataset. +- Reference implementation: +``` +url=https://github.com/yjn870/RDN-pytorch +``` + +## RDN Detail # + +As of the current date, Ascend-Pytorch is still inefficient for contiguous operations. +Therefore, RDN is re-implemented using semantics such as custom OP. + + +## Requirements # + +- Install PyTorch ([pytorch.org](http://pytorch.org)) +- `pip install -r requirements.txt` +- The DIV2k, Set5 Dataset can be downloaded from the links below.Move the datasets to directory ./data . + - Train Set : [Download DIV2k](https://www.dropbox.com/s/41sn4eie37hp6rh/DIV2K_x2.h5?dl=0) + - Test Set : [Download Set5](https://www.dropbox.com/s/pd52pkmaik1ri0h/rdn_x2.pth?dl=0) + +## Training # +To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset: + +```bash +# 1p train perf +bash test/train_performance_1p.sh + +# 8p train perf +bash test/train_performance_8p.sh + +# 8p train full +bash test/train_full_8p.sh + +# 8p eval +bash test/train_eval_8p.sh + +# finetuning +bash test/train_finetune_1p.sh +``` + +## RDN training result # + +| Acc@1 | FPS | Npu_nums | Epochs | AMP_Type | +| :------: | :------: | :------: | :------: | :------: | +| - | 240 | 1 | 800 | O1 | +| 37.95 | 1716 | 8 | 800 | O1 | + + + diff --git a/PyTorch/contrib/cv/others/RDN/docker_start.sh b/PyTorch/contrib/cv/others/RDN/docker_start.sh index 46ce9a02ec0532d6db324beaee7f7eab501b4565..944bca3cdac8e3f2d47ceb0e2b6eb181a405de11 100644 --- a/PyTorch/contrib/cv/others/RDN/docker_start.sh +++ b/PyTorch/contrib/cv/others/RDN/docker_start.sh @@ -1,25 +1,25 @@ -#!/bin/bash - -docker_image=$1 -data_dir=$2 -model_dir=$3 - -docker run -it --ipc=host \ - --device=/dev/davinci0 \ - --device=/dev/davinci1 \ - --device=/dev/davinci2 \ - --device=/dev/davinci3 \ - --device=/dev/davinci4 \ - --device=/dev/davinci5 \ - --device=/dev/davinci6 \ - --device=/dev/davinci7 \ - --device=/dev/davinci_manager \ - --device=/dev/devmm_svm --device=/dev/hisi_hdc \ - -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ - -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ - -v ${model_dir}:${model_dir} \ - -v ${data_dir}:${data_dir} \ - -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \ - -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \ - -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \ +#!/bin/bash + +docker_image=$1 +data_dir=$2 +model_dir=$3 + +docker run -it --ipc=host \ + --device=/dev/davinci0 \ + --device=/dev/davinci1 \ + --device=/dev/davinci2 \ + --device=/dev/davinci3 \ + --device=/dev/davinci4 \ + --device=/dev/davinci5 \ + --device=/dev/davinci6 \ + --device=/dev/davinci7 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ + -v ${model_dir}:${model_dir} \ + -v ${data_dir}:${data_dir} \ + -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \ + -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \ + -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \ /bin/bash \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/RDN/modelzoo_level.txt b/PyTorch/contrib/cv/others/RDN/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/others/RDN/modelzoo_level.txt +++ b/PyTorch/contrib/cv/others/RDN/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/Srcnn_x2_for_Pytorch/modelzoo_level.txt b/PyTorch/contrib/cv/others/Srcnn_x2_for_Pytorch/modelzoo_level.txt index 4987c1069692fa42cf124e9045a3d42b733b2a79..0c22703439d27ef96518c74688e17502e7209c62 100644 --- a/PyTorch/contrib/cv/others/Srcnn_x2_for_Pytorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/others/Srcnn_x2_for_Pytorch/modelzoo_level.txt @@ -1,6 +1,6 @@ -GPUStatus:OK -NPUMigrationStatus:OK -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +GPUStatus:OK +NPUMigrationStatus:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/Srcnn_x2_for_Pytorch/requirements.txt b/PyTorch/contrib/cv/others/Srcnn_x2_for_Pytorch/requirements.txt index 148f855f4f1b3d43fc499cd9ebae3f404561dcee..7cd93126a9ce5ff9a38a4a4e2c6da87a34d0d968 100644 --- a/PyTorch/contrib/cv/others/Srcnn_x2_for_Pytorch/requirements.txt +++ b/PyTorch/contrib/cv/others/Srcnn_x2_for_Pytorch/requirements.txt @@ -1,5 +1,5 @@ -h5py==3.3.0 -numpy==1.20.2 -Pillow==8.2.0 -tqdm==4.19.9 +h5py==3.3.0 +numpy==1.20.2 +Pillow==8.2.0 +tqdm==4.19.9 scikit-image==0.16.2 \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/edsr_x2/loss.py b/PyTorch/contrib/cv/others/edsr_x2/loss.py index 98219c473cdc2215f0f8c4547006fa0c288375d7..c89c9ce15945db2b5c897a737726e607cc30ee23 100644 --- a/PyTorch/contrib/cv/others/edsr_x2/loss.py +++ b/PyTorch/contrib/cv/others/edsr_x2/loss.py @@ -1,152 +1,152 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import os -from importlib import import_module - -import numpy as np - -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class Loss(nn.modules.loss._Loss): - def __init__(self, args, ckp): - super(Loss, self).__init__() - print("Preparing loss function:") - - self.n_GPUs = args.n_GPUs - self.loss = [] - self.loss_module = nn.ModuleList() - for loss in args.loss.split("+"): - weight, loss_type = loss.split("*") - if loss_type == "MSE": - loss_function = nn.MSELoss() - elif loss_type == "L1": - loss_function = nn.L1Loss() - elif loss_type.find("VGG") >= 0: - module = import_module("loss.vgg") - loss_function = getattr(module, "VGG")( - loss_type[3:], rgb_range=args.rgb_range - ) - elif loss_type.find("GAN") >= 0: - module = import_module("loss.adversarial") - loss_function = getattr(module, "Adversarial")(args, loss_type) - - self.loss.append( - {"type": loss_type, "weight": float( - weight), "function": loss_function} - ) - if loss_type.find("GAN") >= 0: - self.loss.append( - {"type": "DIS", "weight": 1, "function": None}) - - if len(self.loss) > 1: - self.loss.append({"type": "Total", "weight": 0, "function": None}) - - for l in self.loss: - if l["function"] is not None: - print("{:.3f} * {}".format(l["weight"], l["type"])) - self.loss_module.append(l["function"]) - - self.log = torch.Tensor() - - device = torch.device("cpu" if args.cpu else "cuda") - if args.use_npu: - device = args.device - self.loss_module.to(device) - if not args.cpu and args.n_GPUs > 1: - self.loss_module = nn.DataParallel( - self.loss_module, range(args.n_GPUs)) - - if args.load != "": - self.load(ckp.dir, cpu=args.cpu) - - def forward(self, sr, hr): - losses = [] - for i, l in enumerate(self.loss): - if l["function"] is not None: - loss = l["function"](sr, hr) - effective_loss = l["weight"] * loss - losses.append(effective_loss) - self.log[-1, i] += effective_loss.item() - elif l["type"] == "DIS": - self.log[-1, i] += self.loss[i - 1]["function"].loss - - loss_sum = sum(losses) - if len(self.loss) > 1: - self.log[-1, -1] += loss_sum.item() - - return loss_sum - - def step(self): - for l in self.get_loss_module(): - if hasattr(l, "scheduler"): - l.scheduler.step() - - def start_log(self): - self.log = torch.cat((self.log, torch.zeros(1, len(self.loss)))) - - def end_log(self, n_batches): - self.log[-1].div_(n_batches) - - def display_loss(self, batch): - n_samples = batch + 1 - log = [] - for l, c in zip(self.loss, self.log[-1]): - log.append("[{}: {:.4f}]".format(l["type"], c / n_samples)) - - return "".join(log) - - def get_loss_module(self): - if self.n_GPUs == 1: - return self.loss_module - else: - return self.loss_module.module - - def save(self, apath): - torch.save(self.state_dict(), os.path.join(apath, "loss.pt")) - torch.save(self.log, os.path.join(apath, "loss_log.pt")) - - def load(self, apath, cpu=False): - if cpu: - kwargs = {"map_location": lambda storage, loc: storage} - else: - kwargs = {} - - self.load_state_dict(torch.load( - os.path.join(apath, "loss.pt"), **kwargs)) - self.log = torch.load(os.path.join(apath, "loss_log.pt")) - for l in self.get_loss_module(): - if hasattr(l, "scheduler"): - for _ in range(len(self.log)): - l.scheduler.step() +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import os +from importlib import import_module + +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Loss(nn.modules.loss._Loss): + def __init__(self, args, ckp): + super(Loss, self).__init__() + print("Preparing loss function:") + + self.n_GPUs = args.n_GPUs + self.loss = [] + self.loss_module = nn.ModuleList() + for loss in args.loss.split("+"): + weight, loss_type = loss.split("*") + if loss_type == "MSE": + loss_function = nn.MSELoss() + elif loss_type == "L1": + loss_function = nn.L1Loss() + elif loss_type.find("VGG") >= 0: + module = import_module("loss.vgg") + loss_function = getattr(module, "VGG")( + loss_type[3:], rgb_range=args.rgb_range + ) + elif loss_type.find("GAN") >= 0: + module = import_module("loss.adversarial") + loss_function = getattr(module, "Adversarial")(args, loss_type) + + self.loss.append( + {"type": loss_type, "weight": float( + weight), "function": loss_function} + ) + if loss_type.find("GAN") >= 0: + self.loss.append( + {"type": "DIS", "weight": 1, "function": None}) + + if len(self.loss) > 1: + self.loss.append({"type": "Total", "weight": 0, "function": None}) + + for l in self.loss: + if l["function"] is not None: + print("{:.3f} * {}".format(l["weight"], l["type"])) + self.loss_module.append(l["function"]) + + self.log = torch.Tensor() + + device = torch.device("cpu" if args.cpu else "cuda") + if args.use_npu: + device = args.device + self.loss_module.to(device) + if not args.cpu and args.n_GPUs > 1: + self.loss_module = nn.DataParallel( + self.loss_module, range(args.n_GPUs)) + + if args.load != "": + self.load(ckp.dir, cpu=args.cpu) + + def forward(self, sr, hr): + losses = [] + for i, l in enumerate(self.loss): + if l["function"] is not None: + loss = l["function"](sr, hr) + effective_loss = l["weight"] * loss + losses.append(effective_loss) + self.log[-1, i] += effective_loss.item() + elif l["type"] == "DIS": + self.log[-1, i] += self.loss[i - 1]["function"].loss + + loss_sum = sum(losses) + if len(self.loss) > 1: + self.log[-1, -1] += loss_sum.item() + + return loss_sum + + def step(self): + for l in self.get_loss_module(): + if hasattr(l, "scheduler"): + l.scheduler.step() + + def start_log(self): + self.log = torch.cat((self.log, torch.zeros(1, len(self.loss)))) + + def end_log(self, n_batches): + self.log[-1].div_(n_batches) + + def display_loss(self, batch): + n_samples = batch + 1 + log = [] + for l, c in zip(self.loss, self.log[-1]): + log.append("[{}: {:.4f}]".format(l["type"], c / n_samples)) + + return "".join(log) + + def get_loss_module(self): + if self.n_GPUs == 1: + return self.loss_module + else: + return self.loss_module.module + + def save(self, apath): + torch.save(self.state_dict(), os.path.join(apath, "loss.pt")) + torch.save(self.log, os.path.join(apath, "loss_log.pt")) + + def load(self, apath, cpu=False): + if cpu: + kwargs = {"map_location": lambda storage, loc: storage} + else: + kwargs = {} + + self.load_state_dict(torch.load( + os.path.join(apath, "loss.pt"), **kwargs)) + self.log = torch.load(os.path.join(apath, "loss_log.pt")) + for l in self.get_loss_module(): + if hasattr(l, "scheduler"): + for _ in range(len(self.log)): + l.scheduler.step() diff --git a/PyTorch/contrib/cv/others/stargan/Dockerfile b/PyTorch/contrib/cv/others/stargan/Dockerfile index 7e712fe1a166790798f57a2f2762c47394beb625..30a31af55804dd79571d2a36e6107a844cb7e549 100644 --- a/PyTorch/contrib/cv/others/stargan/Dockerfile +++ b/PyTorch/contrib/cv/others/stargan/Dockerfile @@ -1,5 +1,5 @@ -ARG FROM_IMAGE_NAME -FROM $FROM_IMAGE_NAME - -COPY requirements.txt . +ARG FROM_IMAGE_NAME +FROM $FROM_IMAGE_NAME + +COPY requirements.txt . RUN pip3.7 install -r requirements.txt \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/stargan/README.md b/PyTorch/contrib/cv/others/stargan/README.md index 90ce46ed16e3bf2047a58d4f2434057875aa16c0..54be720a4bd1d865c8526a480a4ae3989c46a830 100644 --- a/PyTorch/contrib/cv/others/stargan/README.md +++ b/PyTorch/contrib/cv/others/stargan/README.md @@ -1,55 +1,55 @@ -# StarGAN 训练 -This implements training of StarGAN on the CelebA dataset. -- Reference implementation: -``` -url=https://github.com/yunjey/stargan -``` - - - -## Requirements # - -- Install Packages -- `pip install -r requirements.txt` -- The CelebA dataset can be downloaded from the [link](https://www.dropbox.com/s/d1kjpkqklf0uw77/celeba.zip?dl=0). You can use `wget` to download as well. - - `wget -N https://www.dropbox.com/s/d1kjpkqklf0uw77/celeba.zip?dl=0` -- Move the datasets to root directory and run the script `unzip_dataset.sh`. - - `bash ./unzip_dataset.sh` - - - -## Training # -To train a model, change the working directory to `./NPU`,then run: - -```bash -# 1p train perf -bash ./test/train_performance_1p.sh '[your_dataset_path]' - -# 8p train perf -bash ./test/train_performance_8p.sh '[your_dataset_path]' - -# 1p train full -bash ./test/train_full_1p.sh '[your_dataset_path]' - -# 8p train full -bash ./test/train_full_8p.sh '[your_dataset_path]' - -# finetuning -bash ./test/train_finetune_1p.sh '[your_dataset_path]' -``` -After running,you can see the results in `./NPU/stargan_full_8p/samples` or `./NPU/stargan_full_1p/samples` - - - - -## GAN training result # - -| Type | FPS | Epochs | AMP_Type | -| :------: | :------: | :------: | :------: | -| NPU-1p | 95 | 1 | O1 | -| NPU-8p | 615 | 50 | O1 | -| GPU-1p | 62 | 1 | O1 | -| GPU-8p | 517 | 50 | O1 | - - - +# StarGAN 训练 +This implements training of StarGAN on the CelebA dataset. +- Reference implementation: +``` +url=https://github.com/yunjey/stargan +``` + + + +## Requirements # + +- Install Packages +- `pip install -r requirements.txt` +- The CelebA dataset can be downloaded from the [link](https://www.dropbox.com/s/d1kjpkqklf0uw77/celeba.zip?dl=0). You can use `wget` to download as well. + - `wget -N https://www.dropbox.com/s/d1kjpkqklf0uw77/celeba.zip?dl=0` +- Move the datasets to root directory and run the script `unzip_dataset.sh`. + - `bash ./unzip_dataset.sh` + + + +## Training # +To train a model, change the working directory to `./NPU`,then run: + +```bash +# 1p train perf +bash ./test/train_performance_1p.sh '[your_dataset_path]' + +# 8p train perf +bash ./test/train_performance_8p.sh '[your_dataset_path]' + +# 1p train full +bash ./test/train_full_1p.sh '[your_dataset_path]' + +# 8p train full +bash ./test/train_full_8p.sh '[your_dataset_path]' + +# finetuning +bash ./test/train_finetune_1p.sh '[your_dataset_path]' +``` +After running,you can see the results in `./NPU/stargan_full_8p/samples` or `./NPU/stargan_full_1p/samples` + + + + +## GAN training result # + +| Type | FPS | Epochs | AMP_Type | +| :------: | :------: | :------: | :------: | +| NPU-1p | 95 | 1 | O1 | +| NPU-8p | 615 | 50 | O1 | +| GPU-1p | 62 | 1 | O1 | +| GPU-8p | 517 | 50 | O1 | + + + diff --git a/PyTorch/contrib/cv/others/stargan/docker_start.sh b/PyTorch/contrib/cv/others/stargan/docker_start.sh index 46ce9a02ec0532d6db324beaee7f7eab501b4565..944bca3cdac8e3f2d47ceb0e2b6eb181a405de11 100644 --- a/PyTorch/contrib/cv/others/stargan/docker_start.sh +++ b/PyTorch/contrib/cv/others/stargan/docker_start.sh @@ -1,25 +1,25 @@ -#!/bin/bash - -docker_image=$1 -data_dir=$2 -model_dir=$3 - -docker run -it --ipc=host \ - --device=/dev/davinci0 \ - --device=/dev/davinci1 \ - --device=/dev/davinci2 \ - --device=/dev/davinci3 \ - --device=/dev/davinci4 \ - --device=/dev/davinci5 \ - --device=/dev/davinci6 \ - --device=/dev/davinci7 \ - --device=/dev/davinci_manager \ - --device=/dev/devmm_svm --device=/dev/hisi_hdc \ - -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ - -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ - -v ${model_dir}:${model_dir} \ - -v ${data_dir}:${data_dir} \ - -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \ - -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \ - -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \ +#!/bin/bash + +docker_image=$1 +data_dir=$2 +model_dir=$3 + +docker run -it --ipc=host \ + --device=/dev/davinci0 \ + --device=/dev/davinci1 \ + --device=/dev/davinci2 \ + --device=/dev/davinci3 \ + --device=/dev/davinci4 \ + --device=/dev/davinci5 \ + --device=/dev/davinci6 \ + --device=/dev/davinci7 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ + -v ${model_dir}:${model_dir} \ + -v ${data_dir}:${data_dir} \ + -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \ + -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \ + -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \ /bin/bash \ No newline at end of file diff --git a/PyTorch/contrib/cv/others/stargan/modelzoo_level.txt b/PyTorch/contrib/cv/others/stargan/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/others/stargan/modelzoo_level.txt +++ b/PyTorch/contrib/cv/others/stargan/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/README_ch.md b/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/README_ch.md index 0b511652cb6e16ad0fe9b3c6e1a806cb726ba7a1..c52f043e2768c6411556e7d03dfae58b3ad5b437 100644 --- a/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/README_ch.md +++ b/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/README_ch.md @@ -56,8 +56,8 @@ ${3DMPPE_ROOTNET} ## 训练模型 - 注意,`test`目录下的`output`文件夹也会保存代码运行的日志。 -- 运行 `train_1p.py` 或 `train_8p.py` 进行模型训练: - +- 运行 `train_1p.py` 或 `train_8p.py` 进行模型训练: + ``` # 1p train perf @@ -85,7 +85,7 @@ bash test/train_full_8p.sh --data_path=xxx # 其它说明 # -- 运行 `demo.py`: +- 运行 `demo.py`: 进入 `demo` 文件夹。运行demo的输入文件已经提供(`input.jpg`),运行结束后会在该目录下得到输出的图片。将 `snapshot_XX.pth` 放置在 `./output/model_dump/` 目录下。 修改 `run_demo.sh` 中 `test_epoch` 的参数为 `XX` ,与刚才的 `.pth` 文件的数字对应。最后,运行指令: ``` diff --git a/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/README_en.md b/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/README_en.md index c8d3de5dcea4b91ba1bea185c36f5b4a1477b559..677449b5bb9ee3e49b8506547bef6a8c301eb1b9 100644 --- a/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/README_en.md +++ b/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/README_en.md @@ -41,7 +41,7 @@ ${3DMPPE_ROOTNET} | | | | |-- ... ## image files | | | |-- MuPoTS-3D.json ``` -- You need to follow directory structure of the `output` as below. +- You need to follow directory structure of the `output` as below. ``` ${3DMPPE_ROOTNET} |-- output @@ -56,7 +56,7 @@ ${3DMPPE_ROOTNET} ## Training # -- Note that the `output` folder under the `test` directory will also save the code running log. +- Note that the `output` folder under the `test` directory will also save the code running log. - To train a model, run `train_1p.py` or `train_8p.py`: ```bash @@ -85,7 +85,7 @@ bash test/train_full_8p.sh --data_path=xxx # Else # -- run `demo.py`: +- run `demo.py`: Enter the demo folder. The input file for running the demo has been provided(`input.jpg`). After running, the output pictures will be obtained in this directory. First, place `snapshot_XX.pth` in directory `./output/model_dump/`. Then, Change the parameter `test_epoch` of `run_demo.sh` to `XX` ,which corresponds to the number of `.pth` file just now. Finally, run the command: ``` diff --git a/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/common/timer.py b/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/common/timer.py index 994424e9862d51ca78ec23174bfb41dbb8a18896..3653f621de41911c573c55ae61affe375d52423e 100644 --- a/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/common/timer.py +++ b/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/common/timer.py @@ -1,53 +1,53 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -# -------------------------------------------------------- -# Fast R-CNN -# Copyright (c) 2015 Microsoft -# Licensed under The MIT License [see LICENSE for details] -# Written by Ross Girshick -# -------------------------------------------------------- - -import time - -class Timer(object): - """A simple timer.""" - def __init__(self): - self.total_time = 0. - self.calls = 0 - self.start_time = 0. - self.diff = 0. - self.average_time = 0. - self.warm_up = 0 - - def tic(self): - # using time.time instead of time.clock because time time.clock - # does not normalize for multithreading - self.start_time = time.time() - - def toc(self, average=True): - self.diff = time.time() - self.start_time - if self.warm_up < 10: - self.warm_up += 1 - return self.diff - else: - self.total_time += self.diff - self.calls += 1 - self.average_time = self.total_time / self.calls - - if average: - return self.average_time - else: - return self.diff +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import time + +class Timer(object): + """A simple timer.""" + def __init__(self): + self.total_time = 0. + self.calls = 0 + self.start_time = 0. + self.diff = 0. + self.average_time = 0. + self.warm_up = 0 + + def tic(self): + # using time.time instead of time.clock because time time.clock + # does not normalize for multithreading + self.start_time = time.time() + + def toc(self, average=True): + self.diff = time.time() - self.start_time + if self.warm_up < 10: + self.warm_up += 1 + return self.diff + else: + self.total_time += self.diff + self.calls += 1 + self.average_time = self.total_time / self.calls + + if average: + return self.average_time + else: + return self.diff diff --git a/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/modelzoo_level.txt b/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/modelzoo_level.txt +++ b/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/requirements.txt b/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/requirements.txt index 5655d0ed6729f55249b51c2f156d01a1689ab29c..0865b11eec36034a3bc5cf747759e41b490e8c47 100644 --- a/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/requirements.txt +++ b/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet/requirements.txt @@ -1,14 +1,14 @@ -torch==1.5.0+ascend.post3.20210930 -apex==0.1+ascend.20210930 -torchvision==0.2.2.post3 -cycler==0.10.0 -Cpython==0.29.24 -matplotlib==3.4.3 -numpy==1.21.1 -opencv-python==4.5.3.56 -Pillow==8.2.0 -pycocotools==2.0.2 -future==0.18.2 -scikit-learn==1.0 -scipy==1.7.1 +torch==1.5.0+ascend.post3.20210930 +apex==0.1+ascend.20210930 +torchvision==0.2.2.post3 +cycler==0.10.0 +Cpython==0.29.24 +matplotlib==3.4.3 +numpy==1.21.1 +opencv-python==4.5.3.56 +Pillow==8.2.0 +pycocotools==2.0.2 +future==0.18.2 +scikit-learn==1.0 +scipy==1.7.1 tqdm==4.62.3 \ No newline at end of file diff --git a/PyTorch/contrib/cv/pose_estimation/HigherHRNet/README.md b/PyTorch/contrib/cv/pose_estimation/HigherHRNet/README.md index 5faa89a351fd80dc14556f619b2cabe6ece7bcd9..4d9e9f6805eea5fe4ae5842e501e93f04f0f7b86 100644 --- a/PyTorch/contrib/cv/pose_estimation/HigherHRNet/README.md +++ b/PyTorch/contrib/cv/pose_estimation/HigherHRNet/README.md @@ -1,93 +1,93 @@ -# HigherHRNet - -This implements training of HigherHRNet on the COCO dataset, mainly modified from GitHub - HRNet/HigherHRNet-Human-Pose-Estimation - -1. Install package dependencies. Make sure the python environment >=3.7 - - ```bash - pip install -r requirements.txt - ``` -2. Install COCOAPI: - -``` -# COCOAPI=/path/to/clone/cocoapi - -git clone https://github.com/cocodataset/cocoapi.git $COCOAPI -cd $COCOAPI/PythonAPI - -# Install into global site-packages - -make install - -# Alternatively, if you do not have permissions or prefer - -# not to install the COCO API into global site-packages - -python3 setup.py install --user -Note that instructions like # COCOAPI=/path/to/install/cocoapi indicate that you should pick a path where you'd like to have the software cloned and then set an environment variable (COCOAPI in this case) accordingly. -``` - - - -3. Download pretrained models from the releases of HigherHRNet-Human-Pose-Estimation to the specified directory - - ```txt - ${POSE_ROOT} - `-- models - `-- pytorch - |-- imagenet - | `-- hrnet_w32-36af842e.pth - `-- pose_coco - `-- pose_higher_hrnet_w32_512.pth - ``` - -### Data Preparation - -Please download or link COCO to ${POSE_ROOT}/data/coco/, and make them look like this: - -```txt -${POSE_ROOT}/data/coco/ -|-- annotations -| |-- person_keypoints_train2017.json -| `-- person_keypoints_val2017.json -|-- person_detection_results -| |-- COCO_val2017_detections_AP_H_56_person.json -| `-- COCO_test-dev2017_detections_AP_H_609_person.json -`-- images - |-- train2017 - | |-- 000000000009.jpg - | |-- ... - `-- val2017 - |-- 000000000139.jpg - |-- ... -``` -## Training - -To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset: - -```bash -# training 1p accuracy -bash ./test/train_full_1p.sh --data_path=real_data_path - -# training 1p performance -bash ./test/train_performance_1p.sh --data_path=real_data_path - -# training 8p accuracy -bash ./test/train_full_8p.sh --data_path=real_data_path - -# training 8p performance -bash ./test/train_performance_8p.sh --data_path=real_data_path - -#test 8p accuracy -bash test/train_eval_8p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path - -``` - -## HigherHRNet training result - -| 名称 | 精度 | 性能 | -| :----: | :--: | :------: | -| NPU-8p | 66.9 | 2.2s/step | -| GPU-8p | 67.1 | 1.2s/step | -| NPU-1p | | 1.1s/step | +# HigherHRNet + +This implements training of HigherHRNet on the COCO dataset, mainly modified from GitHub - HRNet/HigherHRNet-Human-Pose-Estimation + +1. Install package dependencies. Make sure the python environment >=3.7 + + ```bash + pip install -r requirements.txt + ``` +2. Install COCOAPI: + +``` +# COCOAPI=/path/to/clone/cocoapi + +git clone https://github.com/cocodataset/cocoapi.git $COCOAPI +cd $COCOAPI/PythonAPI + +# Install into global site-packages + +make install + +# Alternatively, if you do not have permissions or prefer + +# not to install the COCO API into global site-packages + +python3 setup.py install --user +Note that instructions like # COCOAPI=/path/to/install/cocoapi indicate that you should pick a path where you'd like to have the software cloned and then set an environment variable (COCOAPI in this case) accordingly. +``` + + + +3. Download pretrained models from the releases of HigherHRNet-Human-Pose-Estimation to the specified directory + + ```txt + ${POSE_ROOT} + `-- models + `-- pytorch + |-- imagenet + | `-- hrnet_w32-36af842e.pth + `-- pose_coco + `-- pose_higher_hrnet_w32_512.pth + ``` + +### Data Preparation + +Please download or link COCO to ${POSE_ROOT}/data/coco/, and make them look like this: + +```txt +${POSE_ROOT}/data/coco/ +|-- annotations +| |-- person_keypoints_train2017.json +| `-- person_keypoints_val2017.json +|-- person_detection_results +| |-- COCO_val2017_detections_AP_H_56_person.json +| `-- COCO_test-dev2017_detections_AP_H_609_person.json +`-- images + |-- train2017 + | |-- 000000000009.jpg + | |-- ... + `-- val2017 + |-- 000000000139.jpg + |-- ... +``` +## Training + +To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset: + +```bash +# training 1p accuracy +bash ./test/train_full_1p.sh --data_path=real_data_path + +# training 1p performance +bash ./test/train_performance_1p.sh --data_path=real_data_path + +# training 8p accuracy +bash ./test/train_full_8p.sh --data_path=real_data_path + +# training 8p performance +bash ./test/train_performance_8p.sh --data_path=real_data_path + +#test 8p accuracy +bash test/train_eval_8p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path + +``` + +## HigherHRNet training result + +| 名称 | 精度 | 性能 | +| :----: | :--: | :------: | +| NPU-8p | 66.9 | 2.2s/step | +| GPU-8p | 67.1 | 1.2s/step | +| NPU-1p | | 1.1s/step | | GPU-1p | | 0.7s/step| \ No newline at end of file diff --git a/PyTorch/contrib/cv/pose_estimation/HigherHRNet/modelzoo_level.txt b/PyTorch/contrib/cv/pose_estimation/HigherHRNet/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/pose_estimation/HigherHRNet/modelzoo_level.txt +++ b/PyTorch/contrib/cv/pose_estimation/HigherHRNet/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/pose_estimation/HigherHRNet/requirements.txt b/PyTorch/contrib/cv/pose_estimation/HigherHRNet/requirements.txt index 720188a90e23c475b75d763a66bd651a64793194..03bc1448f394f5f5dbd929e8b7a198425b58acfe 100644 --- a/PyTorch/contrib/cv/pose_estimation/HigherHRNet/requirements.txt +++ b/PyTorch/contrib/cv/pose_estimation/HigherHRNet/requirements.txt @@ -1,13 +1,13 @@ -EasyDict==1.7 -opencv-python -Cython -scipy -pandas -pyyaml -json_tricks -scikit-image -tensorboardX -yacs -cffi -munkres +EasyDict==1.7 +opencv-python +Cython +scipy +pandas +pyyaml +json_tricks +scikit-image +tensorboardX +yacs +cffi +munkres tqdm \ No newline at end of file diff --git a/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch/mmpose-master/docs/merge_docs.sh b/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch/mmpose-master/docs/merge_docs.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch/mmpose-master/tests/data/interhand2d/test_interhand2d_camera.json b/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch/mmpose-master/tests/data/interhand2d/test_interhand2d_camera.json old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch/mmpose-master/tests/data/interhand2d/test_interhand2d_data.json b/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch/mmpose-master/tests/data/interhand2d/test_interhand2d_data.json old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch/mmpose-master/tests/data/interhand2d/test_interhand2d_joint_3d.json b/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch/mmpose-master/tests/data/interhand2d/test_interhand2d_joint_3d.json old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch/mmpose-master/tests/data/panoptic/test_panoptic.json b/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch/mmpose-master/tests/data/panoptic/test_panoptic.json old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch/mmpose-master/tools/dist_test.sh b/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch/mmpose-master/tools/dist_test.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch/mmpose-master/tools/dist_train.sh b/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch/mmpose-master/tools/dist_train.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/LICENSE b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/LICENSE index 753842b6720f7980d411ecf2c78eb4ef220b9df8..f49a4e16e68b128803cc2dcea614603632b04eac 100644 --- a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/LICENSE +++ b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/datasets/__init__.py b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/datasets/__init__.py index 453a036caaabcd54ddbca118df386e4268f6574f..6147909dea67fb89ce7466da833d27c97062e5c3 100644 --- a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/datasets/__init__.py +++ b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/datasets/__init__.py @@ -1,14 +1,14 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # ============================================================================ \ No newline at end of file diff --git a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/models/__init__.py b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/models/__init__.py index 453a036caaabcd54ddbca118df386e4268f6574f..6147909dea67fb89ce7466da833d27c97062e5c3 100644 --- a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/models/__init__.py +++ b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/models/__init__.py @@ -1,14 +1,14 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # ============================================================================ \ No newline at end of file diff --git a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/modelzoo_level.txt b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/modelzoo_level.txt +++ b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/modules/__init__.py b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/modules/__init__.py index 453a036caaabcd54ddbca118df386e4268f6574f..6147909dea67fb89ce7466da833d27c97062e5c3 100644 --- a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/modules/__init__.py +++ b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/modules/__init__.py @@ -1,14 +1,14 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # ============================================================================ \ No newline at end of file diff --git a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/multi_epochs_dataloaders.py b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/multi_epochs_dataloaders.py index 49b9ae910a987752af23c44d4235d54e1a74334d..256ccca3c96ab7e44d267217f714a3b8d7228522 100644 --- a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/multi_epochs_dataloaders.py +++ b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/multi_epochs_dataloaders.py @@ -1,46 +1,46 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import torch - - -class MultiEpochsDataLoader(torch.utils.data.DataLoader): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._DataLoader__initialized = False - self.batch_sampler = _RepeatSampler(self.batch_sampler) - self._DataLoader__initialized = True - self.iterator = super().__iter__() - - def __len__(self): - return len(self.batch_sampler.sampler) - - def __iter__(self): - for _ in range(len(self)): - yield next(self.iterator) - - -class _RepeatSampler(object): - """ Sampler that repeats forever. - Args: - sampler (Sampler) - """ - - def __init__(self, sampler): - self.sampler = sampler - - def __iter__(self): - while True: - yield from iter(self.sampler) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch + + +class MultiEpochsDataLoader(torch.utils.data.DataLoader): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._DataLoader__initialized = False + self.batch_sampler = _RepeatSampler(self.batch_sampler) + self._DataLoader__initialized = True + self.iterator = super().__iter__() + + def __len__(self): + return len(self.batch_sampler.sampler) + + def __iter__(self): + for _ in range(len(self)): + yield next(self.iterator) + + +class _RepeatSampler(object): + """ Sampler that repeats forever. + Args: + sampler (Sampler) + """ + + def __init__(self, sampler): + self.sampler = sampler + + def __iter__(self): + while True: + yield from iter(self.sampler) diff --git a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/requirements.txt b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/requirements.txt index 279fbeb14a7d2d6eb23261c89567549beb078bed..aa1d3fb15f1c63aa0f1b11501b354d4770b42da3 100644 --- a/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/requirements.txt +++ b/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose/requirements.txt @@ -1,2 +1,2 @@ -opencv-python -pycocotools==2.0.2 +opencv-python +pycocotools==2.0.2 diff --git a/PyTorch/contrib/cv/pose_estimation/MSPN/dataset/MPII/mpii.py b/PyTorch/contrib/cv/pose_estimation/MSPN/dataset/MPII/mpii.py index 58f7422a2617f27af3f7c0ffc195cda5480ff2d7..42d224cdb8b3019f0892b90494a464085667e443 100644 --- a/PyTorch/contrib/cv/pose_estimation/MSPN/dataset/MPII/mpii.py +++ b/PyTorch/contrib/cv/pose_estimation/MSPN/dataset/MPII/mpii.py @@ -1,198 +1,198 @@ -# encoding: utf-8 -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -@author: Wenbo Li -@contact: fenglinglwb@gmail.com -""" - -import cv2 -import json -import numpy as np -import os -from scipy.io import loadmat -from collections import OrderedDict - -from dataset.JointsDataset import JointsDataset - -class MPIIDataset(JointsDataset): - - def __init__(self, DATASET, stage, transform=None): - super().__init__(DATASET, stage, transform) - self.cur_dir = os.path.split(os.path.realpath(__file__))[0] - - self.train_gt_file = 'train.json' - self.train_gt_path = os.path.join(self.cur_dir, 'gt_json', - self.train_gt_file) - - self.val_gt_file = 'valid.json' - self.val_gt_path = os.path.join(self.cur_dir, 'gt_json', - self.val_gt_file) - self.val_gt_mat = os.path.join(self.cur_dir, 'gt_json', 'valid.mat') - - self.test_det_file = 'test.json' - self.test_det_path = os.path.join(self.cur_dir, 'det_json', - self.test_det_file) - - self.data = self._get_data() - self.data_num = len(self.data) - - def _get_data(self): - data = list() - - if self.stage == 'train': - mpii = json.load(open(self.train_gt_path)) - elif self.stage == 'val': - mpii = json.load(open(self.val_gt_path)) - else: - mpii = json.load(open(self.test_det_path)) - - for d in mpii: - img_name = d['image'] - img_id = img_name.split('.')[0] - img_path = os.path.join(self.cur_dir, 'images', img_name) - - center = np.array(d['center'], dtype=np.float32) - scale = np.array([d['scale'], d['scale']], dtype=np.float32) - - if center[0] != -1: - center[1] = center[1] + 15 * scale[1] - center -= 1 - - if self.stage == 'test': - joints = np.zeros((self.keypoint_num, 3), dtype=np.float32) - else: - joints = np.array(d['joints'], dtype=np.float32) - joints -= 1 - joints_vis = np.array(d['joints_vis'], dtype=np.float32) - joints_vis = joints_vis.reshape(-1, 1) * 2 - joints = np.concatenate((joints, joints_vis), axis=1) - - data.append(dict(center=center, - img_id=img_id, - img_path=img_path, - img_name=img_name, - joints=joints, - scale=scale)) - - return data - - # referring msra high resolution - def evaluate(self, preds): - preds = preds[:, :, 0:2] + 1.0 - - SC_BIAS = 0.6 - threshold = 0.5 - - gt_file = os.path.join(self.val_gt_mat) - gt_dict = loadmat(gt_file) - dataset_joints = gt_dict['dataset_joints'] - jnt_missing = gt_dict['jnt_missing'] - pos_gt_src = gt_dict['pos_gt_src'] - headboxes_src = gt_dict['headboxes_src'] - - pos_pred_src = np.transpose(preds, [1, 2, 0]) - - head = np.where(dataset_joints == 'head')[1][0] - lsho = np.where(dataset_joints == 'lsho')[1][0] - lelb = np.where(dataset_joints == 'lelb')[1][0] - lwri = np.where(dataset_joints == 'lwri')[1][0] - lhip = np.where(dataset_joints == 'lhip')[1][0] - lkne = np.where(dataset_joints == 'lkne')[1][0] - lank = np.where(dataset_joints == 'lank')[1][0] - - rsho = np.where(dataset_joints == 'rsho')[1][0] - relb = np.where(dataset_joints == 'relb')[1][0] - rwri = np.where(dataset_joints == 'rwri')[1][0] - rkne = np.where(dataset_joints == 'rkne')[1][0] - rank = np.where(dataset_joints == 'rank')[1][0] - rhip = np.where(dataset_joints == 'rhip')[1][0] - - jnt_visible = 1 - jnt_missing - uv_error = pos_pred_src - pos_gt_src - uv_err = np.linalg.norm(uv_error, axis=1) - headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :] - headsizes = np.linalg.norm(headsizes, axis=0) - headsizes *= SC_BIAS - scale = np.multiply(headsizes, np.ones((len(uv_err), 1))) - scaled_uv_err = np.divide(uv_err, scale) - scaled_uv_err = np.multiply(scaled_uv_err, jnt_visible) - jnt_count = np.sum(jnt_visible, axis=1) - less_than_threshold = np.multiply((scaled_uv_err <= threshold), - jnt_visible) - PCKh = np.divide(100.*np.sum(less_than_threshold, axis=1), jnt_count) - - rng = np.arange(0, 0.5+0.01, 0.01) - pckAll = np.zeros((len(rng), 16)) - - for r in range(len(rng)): - threshold = rng[r] - less_than_threshold = np.multiply(scaled_uv_err <= threshold, - jnt_visible) - pckAll[r, :] = np.divide(100.*np.sum(less_than_threshold, axis=1), - jnt_count) - - PCKh = np.ma.array(PCKh, mask=False) - PCKh.mask[6:8] = True - - jnt_count = np.ma.array(jnt_count, mask=False) - jnt_count.mask[6:8] = True - jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64) - - name_value = [ - ('Head', PCKh[head]), - ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])), - ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])), - ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])), - ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])), - ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])), - ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])), - ('Mean', np.sum(PCKh * jnt_ratio)), - ('Mean@0.1', np.sum(pckAll[11, :] * jnt_ratio)) - ] - name_value = OrderedDict(name_value) - - print(name_value) - - def visualize(self, img, joints, score=None): - pairs = [[0, 1], [1, 2], [2, 6], [3, 4], [3, 6], [4, 5], [6, 7], - [7, 8], [8, 9], [8, 12], [8, 13], [10, 11], [11, 12], - [13, 14], [14, 15]] - color = np.random.randint(0, 256, (self.keypoint_num, 3)).tolist() - - for i in range(self.keypoint_num): - if joints[i, 0] > 0 and joints[i, 1] > 0: - cv2.circle(img, tuple(joints[i, :2]), 2, tuple(color[i]), 2) - if score: - cv2.putText(img, score, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.2, - (128, 255, 0), 2) - - def draw_line(img, p1, p2): - c = (0, 0, 255) - if p1[0] > 0 and p1[1] > 0 and p2[0] > 0 and p2[1] > 0: - cv2.line(img, tuple(p1), tuple(p2), c, 2) - - for pair in pairs: - draw_line(img, joints[pair[0] - 1], joints[pair[1] - 1]) - - return img - - -if __name__ == '__main__': - from dataset.attribute import load_dataset - dataset = load_dataset('MPII') - mpii = MPIIDataset(dataset, 'val') - print(mpii.data_num) - +# encoding: utf-8 +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +@author: Wenbo Li +@contact: fenglinglwb@gmail.com +""" + +import cv2 +import json +import numpy as np +import os +from scipy.io import loadmat +from collections import OrderedDict + +from dataset.JointsDataset import JointsDataset + +class MPIIDataset(JointsDataset): + + def __init__(self, DATASET, stage, transform=None): + super().__init__(DATASET, stage, transform) + self.cur_dir = os.path.split(os.path.realpath(__file__))[0] + + self.train_gt_file = 'train.json' + self.train_gt_path = os.path.join(self.cur_dir, 'gt_json', + self.train_gt_file) + + self.val_gt_file = 'valid.json' + self.val_gt_path = os.path.join(self.cur_dir, 'gt_json', + self.val_gt_file) + self.val_gt_mat = os.path.join(self.cur_dir, 'gt_json', 'valid.mat') + + self.test_det_file = 'test.json' + self.test_det_path = os.path.join(self.cur_dir, 'det_json', + self.test_det_file) + + self.data = self._get_data() + self.data_num = len(self.data) + + def _get_data(self): + data = list() + + if self.stage == 'train': + mpii = json.load(open(self.train_gt_path)) + elif self.stage == 'val': + mpii = json.load(open(self.val_gt_path)) + else: + mpii = json.load(open(self.test_det_path)) + + for d in mpii: + img_name = d['image'] + img_id = img_name.split('.')[0] + img_path = os.path.join(self.cur_dir, 'images', img_name) + + center = np.array(d['center'], dtype=np.float32) + scale = np.array([d['scale'], d['scale']], dtype=np.float32) + + if center[0] != -1: + center[1] = center[1] + 15 * scale[1] + center -= 1 + + if self.stage == 'test': + joints = np.zeros((self.keypoint_num, 3), dtype=np.float32) + else: + joints = np.array(d['joints'], dtype=np.float32) + joints -= 1 + joints_vis = np.array(d['joints_vis'], dtype=np.float32) + joints_vis = joints_vis.reshape(-1, 1) * 2 + joints = np.concatenate((joints, joints_vis), axis=1) + + data.append(dict(center=center, + img_id=img_id, + img_path=img_path, + img_name=img_name, + joints=joints, + scale=scale)) + + return data + + # referring msra high resolution + def evaluate(self, preds): + preds = preds[:, :, 0:2] + 1.0 + + SC_BIAS = 0.6 + threshold = 0.5 + + gt_file = os.path.join(self.val_gt_mat) + gt_dict = loadmat(gt_file) + dataset_joints = gt_dict['dataset_joints'] + jnt_missing = gt_dict['jnt_missing'] + pos_gt_src = gt_dict['pos_gt_src'] + headboxes_src = gt_dict['headboxes_src'] + + pos_pred_src = np.transpose(preds, [1, 2, 0]) + + head = np.where(dataset_joints == 'head')[1][0] + lsho = np.where(dataset_joints == 'lsho')[1][0] + lelb = np.where(dataset_joints == 'lelb')[1][0] + lwri = np.where(dataset_joints == 'lwri')[1][0] + lhip = np.where(dataset_joints == 'lhip')[1][0] + lkne = np.where(dataset_joints == 'lkne')[1][0] + lank = np.where(dataset_joints == 'lank')[1][0] + + rsho = np.where(dataset_joints == 'rsho')[1][0] + relb = np.where(dataset_joints == 'relb')[1][0] + rwri = np.where(dataset_joints == 'rwri')[1][0] + rkne = np.where(dataset_joints == 'rkne')[1][0] + rank = np.where(dataset_joints == 'rank')[1][0] + rhip = np.where(dataset_joints == 'rhip')[1][0] + + jnt_visible = 1 - jnt_missing + uv_error = pos_pred_src - pos_gt_src + uv_err = np.linalg.norm(uv_error, axis=1) + headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :] + headsizes = np.linalg.norm(headsizes, axis=0) + headsizes *= SC_BIAS + scale = np.multiply(headsizes, np.ones((len(uv_err), 1))) + scaled_uv_err = np.divide(uv_err, scale) + scaled_uv_err = np.multiply(scaled_uv_err, jnt_visible) + jnt_count = np.sum(jnt_visible, axis=1) + less_than_threshold = np.multiply((scaled_uv_err <= threshold), + jnt_visible) + PCKh = np.divide(100.*np.sum(less_than_threshold, axis=1), jnt_count) + + rng = np.arange(0, 0.5+0.01, 0.01) + pckAll = np.zeros((len(rng), 16)) + + for r in range(len(rng)): + threshold = rng[r] + less_than_threshold = np.multiply(scaled_uv_err <= threshold, + jnt_visible) + pckAll[r, :] = np.divide(100.*np.sum(less_than_threshold, axis=1), + jnt_count) + + PCKh = np.ma.array(PCKh, mask=False) + PCKh.mask[6:8] = True + + jnt_count = np.ma.array(jnt_count, mask=False) + jnt_count.mask[6:8] = True + jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64) + + name_value = [ + ('Head', PCKh[head]), + ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])), + ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])), + ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])), + ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])), + ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])), + ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])), + ('Mean', np.sum(PCKh * jnt_ratio)), + ('Mean@0.1', np.sum(pckAll[11, :] * jnt_ratio)) + ] + name_value = OrderedDict(name_value) + + print(name_value) + + def visualize(self, img, joints, score=None): + pairs = [[0, 1], [1, 2], [2, 6], [3, 4], [3, 6], [4, 5], [6, 7], + [7, 8], [8, 9], [8, 12], [8, 13], [10, 11], [11, 12], + [13, 14], [14, 15]] + color = np.random.randint(0, 256, (self.keypoint_num, 3)).tolist() + + for i in range(self.keypoint_num): + if joints[i, 0] > 0 and joints[i, 1] > 0: + cv2.circle(img, tuple(joints[i, :2]), 2, tuple(color[i]), 2) + if score: + cv2.putText(img, score, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.2, + (128, 255, 0), 2) + + def draw_line(img, p1, p2): + c = (0, 0, 255) + if p1[0] > 0 and p1[1] > 0 and p2[0] > 0 and p2[1] > 0: + cv2.line(img, tuple(p1), tuple(p2), c, 2) + + for pair in pairs: + draw_line(img, joints[pair[0] - 1], joints[pair[1] - 1]) + + return img + + +if __name__ == '__main__': + from dataset.attribute import load_dataset + dataset = load_dataset('MPII') + mpii = MPIIDataset(dataset, 'val') + print(mpii.data_num) + diff --git a/PyTorch/contrib/cv/semantic_segmentation/3DUNet/docker/r.txt b/PyTorch/contrib/cv/semantic_segmentation/3DUNet/docker/r.txt index 37416fff58f8fc989cc253859dd432f9a15e7a4e..ad452c9c59521eaaf81944da89a56925871f6413 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/3DUNet/docker/r.txt +++ b/PyTorch/contrib/cv/semantic_segmentation/3DUNet/docker/r.txt @@ -1,10 +1,10 @@ -scipy>=1.4.1 -numpy>=1.18.2 - -nibabel>=3.0.2 -tensorboard>=2.2.0 -torchsummary>=1.5.1 -torchnet>=0.0.4 -matplotlib>=3.2.1 -Pillow>=7.0.0 - +scipy>=1.4.1 +numpy>=1.18.2 + +nibabel>=3.0.2 +tensorboard>=2.2.0 +torchsummary>=1.5.1 +torchnet>=0.0.4 +matplotlib>=3.2.1 +Pillow>=7.0.0 + diff --git a/PyTorch/contrib/cv/semantic_segmentation/3DUNet/lib/medloaders/multi_epochs_dataloader.py b/PyTorch/contrib/cv/semantic_segmentation/3DUNet/lib/medloaders/multi_epochs_dataloader.py index 49b9ae910a987752af23c44d4235d54e1a74334d..256ccca3c96ab7e44d267217f714a3b8d7228522 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/3DUNet/lib/medloaders/multi_epochs_dataloader.py +++ b/PyTorch/contrib/cv/semantic_segmentation/3DUNet/lib/medloaders/multi_epochs_dataloader.py @@ -1,46 +1,46 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import torch - - -class MultiEpochsDataLoader(torch.utils.data.DataLoader): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._DataLoader__initialized = False - self.batch_sampler = _RepeatSampler(self.batch_sampler) - self._DataLoader__initialized = True - self.iterator = super().__iter__() - - def __len__(self): - return len(self.batch_sampler.sampler) - - def __iter__(self): - for _ in range(len(self)): - yield next(self.iterator) - - -class _RepeatSampler(object): - """ Sampler that repeats forever. - Args: - sampler (Sampler) - """ - - def __init__(self, sampler): - self.sampler = sampler - - def __iter__(self): - while True: - yield from iter(self.sampler) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch + + +class MultiEpochsDataLoader(torch.utils.data.DataLoader): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._DataLoader__initialized = False + self.batch_sampler = _RepeatSampler(self.batch_sampler) + self._DataLoader__initialized = True + self.iterator = super().__iter__() + + def __len__(self): + return len(self.batch_sampler.sampler) + + def __iter__(self): + for _ in range(len(self)): + yield next(self.iterator) + + +class _RepeatSampler(object): + """ Sampler that repeats forever. + Args: + sampler (Sampler) + """ + + def __init__(self, sampler): + self.sampler = sampler + + def __iter__(self): + while True: + yield from iter(self.sampler) diff --git a/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/Dockerfile b/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/Dockerfile index 46016ca43f6b7760b2fa65a87361c1fb13ece2bc..95d754fbec64997ceb0c1c1176641d3b115217dc 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/Dockerfile +++ b/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/Dockerfile @@ -1,21 +1,21 @@ -ARG FROM_IMAGE_NAME -FROM $FROM_IMAGE_NAME -RUN mkdir ./deeplabv3_requirement -WORKDIR ./deeplabv3_requirement -COPY requirements.txt . -RUN mkdir requirements -COPY requirements/* ./requirements/ -RUN pip3.7 install -r requirements.txt -RUN apt-get update && apt-get install -y git 2to3 libgl1-mesa-glx -RUN git config --global http.sslverify false -RUN git clone -b v1.3.9 --depth=1 https://github.com/open-mmlab/mmcv.git -WORKDIR ./mmcv -ENV MMCV_WITH_OPS=1 -ENV MAX_JOBS=8 -RUN python3.7 setup.py build_ext -RUN python3.7 setup.py develop -RUN pip3.7 uninstall opencv-python -RUN pip3.7 install opencv-python-headless -COPY mmcv_need/_functions.py ./mmcv/parallel/ -COPY mmcv_need/scatter_gather.py ./mmcv/parallel/ -COPY mmcv_need/dist_utils.py ./mmcv/runner/ +ARG FROM_IMAGE_NAME +FROM $FROM_IMAGE_NAME +RUN mkdir ./deeplabv3_requirement +WORKDIR ./deeplabv3_requirement +COPY requirements.txt . +RUN mkdir requirements +COPY requirements/* ./requirements/ +RUN pip3.7 install -r requirements.txt +RUN apt-get update && apt-get install -y git 2to3 libgl1-mesa-glx +RUN git config --global http.sslverify false +RUN git clone -b v1.3.9 --depth=1 https://github.com/open-mmlab/mmcv.git +WORKDIR ./mmcv +ENV MMCV_WITH_OPS=1 +ENV MAX_JOBS=8 +RUN python3.7 setup.py build_ext +RUN python3.7 setup.py develop +RUN pip3.7 uninstall opencv-python +RUN pip3.7 install opencv-python-headless +COPY mmcv_need/_functions.py ./mmcv/parallel/ +COPY mmcv_need/scatter_gather.py ./mmcv/parallel/ +COPY mmcv_need/dist_utils.py ./mmcv/runner/ diff --git a/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/README.md b/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/README.md index d05d1035196d1290a6d8399e9168247b1db01699..c22e463aeb8484652f80fb64ae599124d2265f2a 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/README.md +++ b/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/README.md @@ -1,108 +1,108 @@ -# DeeplabV3模型使用说明 - -## Requirements -* NPU配套的run包安装 -* Python 3.7.5 -* PyTorch(NPU版本) -* apex(NPU版本) -* mmcv-full 1.3.9 - -### Dataset Prepare -1. 下载cityscapes数据集 - -2. 新建文件夹data - -3. 将cityscas数据集放于data目录下 - - ```shell - ln -s /path/to/cityscapes/ ./data - ``` - -4. 处理数据集,`**labelTrainIds.png` 被用来训练 - - ```shell - python3 tools/convert_datasets/cityscapes.py data/cityscapes --nproc 8 - # python3 tools/convert_datasets/cityscapes.py /path/to/cityscapes --nproc 8 - ``` - -### 预训练模型下载 -* 若无法自动下载,可手动下载resnet50_v1c.pth,并放到/root/.cache/torch/checkpoints/文件夹下。 - -### 脚本环境安装 -#### 运行env_set.sh脚本,进行MMCV和mmsegmentation的安装 -```shell -bash env_set.sh -``` -编译mmcv耗时较长,请耐心等待 - -### 手动环境安装 -#### Build MMSEG from source - -1. 下载项目zip文件并解压 -3. 于npu服务器解压DeeplabV3_for_PyTorch压缩包 -4. 执行以下命令,安装mmsegmentation -```shell -cd DeeplabV3_for_PyTorch -pip3.7 install -r requirements.txt -pip3.7 install -e . -pip3.7 list | grep mm -``` - - -#### Build MMCV - -##### MMCV full version with cpu -```shell -source ./test/env_npu.sh -cd .. -git clone -b v1.3.9 --depth=1 https://github.com/open-mmlab/mmcv.git -export MMCV_WITH_OPS=1 -export MAX_JOBS=8 - -cd mmcv -python3.7 setup.py build_ext -python3.7 setup.py develop -pip3.7 list | grep mmcv -# 安装opencv-python-headless, 规避cv2引入错误 -pip3.7 uninstall opencv-python -pip3.7 install opencv-python-headless -``` - -##### Modified MMCV -将mmcv_need目录下的文件替换到mmcv的安装目录下。 - -```shell -cd ../DeeplabV3_for_PyTorch -/bin/cp -f mmcv_need/_functions.py ../mmcv/mmcv/parallel/ -/bin/cp -f mmcv_need/scatter_gather.py ../mmcv/parallel/ -/bin/cp -f mmcv_need/dist_utils.py ../mmcv/mmcv/runner/ -``` - -## Training - -```shell -# training 1p accuracy -bash ./test/train_full_1p.sh --data_path=real_data_path - -# training 1p performance -bash ./test/train_performance_1p.sh --data_path=real_data_path - -# training 8p accuracy -bash ./test/train_full_8p.sh --data_path=real_data_path - -# training 8p performance -bash ./test/train_performance_8p.sh --data_path=real_data_path -``` - - -## hipcc检查问题 -若在训练模型时,有报"which: no hipcc in (/usr/local/sbin:..." 的日志打印问题, -而hipcc是amd和nvidia平台需要的,npu并不需要。 -建议在torch/utils/cpp_extension.py文件中修改代码,当检查hipcc时,抑制输出。 -将 hipcc = subprocess.check_output(['which', 'hipcc']).decode().rstrip('\r\n')修改为 -hipcc = subprocess.check_output(['which', 'hipcc'], stderr=subporcess.DEVNULL).decode().rstrip('\r\n') - -## 报No module named 'mmcv._ext'问题 -在宿主机上训练模型,有时会报No module named 'mmcv._ext'问题(按照setup.py build_ext安装一般不会遇到此问题),或者别的带有mmcv的报错。 -解决方法:这一般是因为宿主机上安装了多个版本的mmcv,而训练脚本调用到了不匹配DeeplabV3模型使用的mmcv,因此报mmcv的错误。 +# DeeplabV3模型使用说明 + +## Requirements +* NPU配套的run包安装 +* Python 3.7.5 +* PyTorch(NPU版本) +* apex(NPU版本) +* mmcv-full 1.3.9 + +### Dataset Prepare +1. 下载cityscapes数据集 + +2. 新建文件夹data + +3. 将cityscas数据集放于data目录下 + + ```shell + ln -s /path/to/cityscapes/ ./data + ``` + +4. 处理数据集,`**labelTrainIds.png` 被用来训练 + + ```shell + python3 tools/convert_datasets/cityscapes.py data/cityscapes --nproc 8 + # python3 tools/convert_datasets/cityscapes.py /path/to/cityscapes --nproc 8 + ``` + +### 预训练模型下载 +* 若无法自动下载,可手动下载resnet50_v1c.pth,并放到/root/.cache/torch/checkpoints/文件夹下。 + +### 脚本环境安装 +#### 运行env_set.sh脚本,进行MMCV和mmsegmentation的安装 +```shell +bash env_set.sh +``` +编译mmcv耗时较长,请耐心等待 + +### 手动环境安装 +#### Build MMSEG from source + +1. 下载项目zip文件并解压 +3. 于npu服务器解压DeeplabV3_for_PyTorch压缩包 +4. 执行以下命令,安装mmsegmentation +```shell +cd DeeplabV3_for_PyTorch +pip3.7 install -r requirements.txt +pip3.7 install -e . +pip3.7 list | grep mm +``` + + +#### Build MMCV + +##### MMCV full version with cpu +```shell +source ./test/env_npu.sh +cd .. +git clone -b v1.3.9 --depth=1 https://github.com/open-mmlab/mmcv.git +export MMCV_WITH_OPS=1 +export MAX_JOBS=8 + +cd mmcv +python3.7 setup.py build_ext +python3.7 setup.py develop +pip3.7 list | grep mmcv +# 安装opencv-python-headless, 规避cv2引入错误 +pip3.7 uninstall opencv-python +pip3.7 install opencv-python-headless +``` + +##### Modified MMCV +将mmcv_need目录下的文件替换到mmcv的安装目录下。 + +```shell +cd ../DeeplabV3_for_PyTorch +/bin/cp -f mmcv_need/_functions.py ../mmcv/mmcv/parallel/ +/bin/cp -f mmcv_need/scatter_gather.py ../mmcv/parallel/ +/bin/cp -f mmcv_need/dist_utils.py ../mmcv/mmcv/runner/ +``` + +## Training + +```shell +# training 1p accuracy +bash ./test/train_full_1p.sh --data_path=real_data_path + +# training 1p performance +bash ./test/train_performance_1p.sh --data_path=real_data_path + +# training 8p accuracy +bash ./test/train_full_8p.sh --data_path=real_data_path + +# training 8p performance +bash ./test/train_performance_8p.sh --data_path=real_data_path +``` + + +## hipcc检查问题 +若在训练模型时,有报"which: no hipcc in (/usr/local/sbin:..." 的日志打印问题, +而hipcc是amd和nvidia平台需要的,npu并不需要。 +建议在torch/utils/cpp_extension.py文件中修改代码,当检查hipcc时,抑制输出。 +将 hipcc = subprocess.check_output(['which', 'hipcc']).decode().rstrip('\r\n')修改为 +hipcc = subprocess.check_output(['which', 'hipcc'], stderr=subporcess.DEVNULL).decode().rstrip('\r\n') + +## 报No module named 'mmcv._ext'问题 +在宿主机上训练模型,有时会报No module named 'mmcv._ext'问题(按照setup.py build_ext安装一般不会遇到此问题),或者别的带有mmcv的报错。 +解决方法:这一般是因为宿主机上安装了多个版本的mmcv,而训练脚本调用到了不匹配DeeplabV3模型使用的mmcv,因此报mmcv的错误。 为了解决这个问题,建议在启动训练脚本前,先导入已经安装的符合DeeplabV3模型需要的mmcv路径的环境变量。export PYTHONPATH=mmcv的路径:$PYTHONPATH \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/modelzoo_level.txt b/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/ENet/README.md b/PyTorch/contrib/cv/semantic_segmentation/ENet/README.md index 0299b55f7e6ce51e547e5c5b132bd2c69a5198ed..69cd3ae9a1ca69d85fd41155382641917b3be654 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/ENet/README.md +++ b/PyTorch/contrib/cv/semantic_segmentation/ENet/README.md @@ -1,52 +1,52 @@ -# ENet 训练 -This implements training of ENet on the Cityscapes dataset. -- Reference implementation: -``` -url=https://github.com/Tramac/awesome-semantic-segmentation-pytorch -``` - - - -## Requirements # - -- Install Packages -- `pip install -r requirements.txt` -- The Cityscapes dataset can be downloaded from the [link](https://www.cityscapes-dataset.com/). -- Move the datasets to root directory and run the script `unzip.sh`. - - `bash ./unzip.sh` - - - -## Training # -To train a model, change the working directory to `./NPU`,then run: - -```bash -# 1p train perf -bash ./test/train_performance_1p.sh '[your_dataset_path]' - -# 8p train perf -bash ./test/train_performance_8p.sh '[your_dataset_path]' - -# 1p train full -bash ./test/train_full_1p.sh '[your_dataset_path]' - -# 8p train full -bash ./test/train_full_8p.sh '[your_dataset_path]' - -# finetuning -bash ./test/train_finetune_1p.sh '[your_dataset_path]' -``` -After running,you can see the results in `./NPU/stargan_full_8p/samples` or `./NPU/stargan_full_1p/samples` - - - - -## GAN training result # - -| Type | FPS | Epochs | AMP_Type | -| :----: | :-----: | :----: | :------: | -| NPU-1p | 14.398 | 400 | O2 | -| NPU-8p | 74.310 | 400 | O2 | -| GPU-1p | 21.885 | 400 | O2 | -| GPU-8p | 161.495 | 400 | O2 | - +# ENet 训练 +This implements training of ENet on the Cityscapes dataset. +- Reference implementation: +``` +url=https://github.com/Tramac/awesome-semantic-segmentation-pytorch +``` + + + +## Requirements # + +- Install Packages +- `pip install -r requirements.txt` +- The Cityscapes dataset can be downloaded from the [link](https://www.cityscapes-dataset.com/). +- Move the datasets to root directory and run the script `unzip.sh`. + - `bash ./unzip.sh` + + + +## Training # +To train a model, change the working directory to `./NPU`,then run: + +```bash +# 1p train perf +bash ./test/train_performance_1p.sh '[your_dataset_path]' + +# 8p train perf +bash ./test/train_performance_8p.sh '[your_dataset_path]' + +# 1p train full +bash ./test/train_full_1p.sh '[your_dataset_path]' + +# 8p train full +bash ./test/train_full_8p.sh '[your_dataset_path]' + +# finetuning +bash ./test/train_finetune_1p.sh '[your_dataset_path]' +``` +After running,you can see the results in `./NPU/stargan_full_8p/samples` or `./NPU/stargan_full_1p/samples` + + + + +## GAN training result # + +| Type | FPS | Epochs | AMP_Type | +| :----: | :-----: | :----: | :------: | +| NPU-1p | 14.398 | 400 | O2 | +| NPU-8p | 74.310 | 400 | O2 | +| GPU-1p | 21.885 | 400 | O2 | +| GPU-8p | 161.495 | 400 | O2 | + diff --git a/PyTorch/contrib/cv/semantic_segmentation/ENet/core/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/ENet/core/__init__.py index abca8fd29e3cfef8d9892aa3818a80fb7a0e110d..ee12af033180db2f910f67edba7c79891c0650f3 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/ENet/core/__init__.py +++ b/PyTorch/contrib/cv/semantic_segmentation/ENet/core/__init__.py @@ -1,15 +1,15 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from . import nn, models, utils, data \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/__init__.py index a5f8598aa44dc6b32162d43ee60c98a1725037ef..89552b1d3f5f6255840161c8c17cf314ab3fedff 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/__init__.py +++ b/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/__init__.py @@ -1,13 +1,13 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and # limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/downloader/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/downloader/__init__.py index a5f8598aa44dc6b32162d43ee60c98a1725037ef..89552b1d3f5f6255840161c8c17cf314ab3fedff 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/downloader/__init__.py +++ b/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/downloader/__init__.py @@ -1,13 +1,13 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and # limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/ENet/core/models/avg_enet.py b/PyTorch/contrib/cv/semantic_segmentation/ENet/core/models/avg_enet.py index 8a8b6838768d6f97bec0b40c112cfdba24de38e0..6a6d5c6399ed35c953450a8fd03bafcde7237cfc 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/ENet/core/models/avg_enet.py +++ b/PyTorch/contrib/cv/semantic_segmentation/ENet/core/models/avg_enet.py @@ -1,550 +1,550 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Efficient Neural Network""" -import torch -import torch.nn as nn - -__all__ = ['ENet', 'get_enet', 'get_enet_citys'] - - -class ENet(nn.Module): - """Efficient Neural Network""" - - def __init__(self, nclass, backbone='', aux=False, jpu=False, pretrained_base=None, **kwargs): - super(ENet, self).__init__() - self.initial = InitialBlock(13, **kwargs) - - self.bottleneck1_0 = Bottleneck(16, 16, 64, downsampling=True, **kwargs) - self.bottleneck1_1 = Bottleneck(64, 16, 64, **kwargs) - self.bottleneck1_2 = Bottleneck(64, 16, 64, **kwargs) - self.bottleneck1_3 = Bottleneck(64, 16, 64, **kwargs) - self.bottleneck1_4 = Bottleneck(64, 16, 64, **kwargs) - - self.bottleneck2_0 = Bottleneck(64, 32, 128, downsampling=True, **kwargs) - self.bottleneck2_1 = Bottleneck(128, 32, 128, **kwargs) - self.bottleneck2_2 = Bottleneck(128, 32, 128, dilation=2, **kwargs) - self.bottleneck2_3 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) - self.bottleneck2_4 = Bottleneck(128, 32, 128, dilation=4, **kwargs) - self.bottleneck2_5 = Bottleneck(128, 32, 128, **kwargs) - self.bottleneck2_6 = Bottleneck(128, 32, 128, dilation=8, **kwargs) - self.bottleneck2_7 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) - self.bottleneck2_8 = Bottleneck(128, 32, 128, dilation=16, **kwargs) - - self.bottleneck3_1 = Bottleneck(128, 32, 128, **kwargs) - self.bottleneck3_2 = Bottleneck(128, 32, 128, dilation=2, **kwargs) - self.bottleneck3_3 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) - self.bottleneck3_4 = Bottleneck(128, 32, 128, dilation=4, **kwargs) - self.bottleneck3_5 = Bottleneck(128, 32, 128, **kwargs) - self.bottleneck3_6 = Bottleneck(128, 32, 128, dilation=8, **kwargs) - self.bottleneck3_7 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) - self.bottleneck3_8 = Bottleneck(128, 32, 128, dilation=16, **kwargs) - - self.bottleneck4_0 = UpsamplingBottleneck(128, 16, 64, **kwargs) - self.bottleneck4_1 = Bottleneck(64, 16, 64, **kwargs) - self.bottleneck4_2 = Bottleneck(64, 16, 64, **kwargs) - - self.bottleneck5_0 = UpsamplingBottleneck(64, 4, 16, **kwargs) - self.bottleneck5_1 = Bottleneck(16, 4, 16, **kwargs) - - self.fullconv = nn.ConvTranspose2d(16, nclass, 2, 2, bias=False) - - self.__setattr__('exclusive', ['bottleneck1_0', 'bottleneck1_1', 'bottleneck1_2', 'bottleneck1_3', - 'bottleneck1_4', 'bottleneck2_0', 'bottleneck2_1', 'bottleneck2_2', - 'bottleneck2_3', 'bottleneck2_4', 'bottleneck2_5', 'bottleneck2_6', - 'bottleneck2_7', 'bottleneck2_8', 'bottleneck3_1', 'bottleneck3_2', - 'bottleneck3_3', 'bottleneck3_4', 'bottleneck3_5', 'bottleneck3_6', - 'bottleneck3_7', 'bottleneck3_8', 'bottleneck4_0', 'bottleneck4_1', - 'bottleneck4_2', 'bottleneck5_0', 'bottleneck5_1', 'fullconv']) - - def forward(self, x): - # init - x = self.initial(x) - - # stage 1 - #x, max_indices1 = self.bottleneck1_0(x) - x = self.bottleneck1_0(x) - x = self.bottleneck1_1(x) - x = self.bottleneck1_2(x) - x = self.bottleneck1_3(x) - x = self.bottleneck1_4(x) - - # stage 2 - #x, max_indices2 = self.bottleneck2_0(x) - x = self.bottleneck2_0(x) - x = self.bottleneck2_1(x) - x = self.bottleneck2_2(x) - x = self.bottleneck2_3(x) - x = self.bottleneck2_4(x) - x = self.bottleneck2_5(x) - x = self.bottleneck2_6(x) - x = self.bottleneck2_7(x) - x = self.bottleneck2_8(x) - - # stage 3 - x = self.bottleneck3_1(x) - x = self.bottleneck3_2(x) - x = self.bottleneck3_3(x) - x = self.bottleneck3_4(x) - x = self.bottleneck3_6(x) - x = self.bottleneck3_7(x) - x = self.bottleneck3_8(x) - - # stage 4 - #x = self.bottleneck4_0(x, max_indices2) - x = self.bottleneck4_0(x) - x = self.bottleneck4_1(x) - x = self.bottleneck4_2(x) - - # stage 5 - #x = self.bottleneck5_0(x, max_indices1) - x = self.bottleneck5_0(x) - x = self.bottleneck5_1(x) - - # out - x = self.fullconv(x) - return tuple([x]) - - -class InitialBlock(nn.Module): - """ENet initial block""" - - def __init__(self, out_channels, norm_layer=nn.BatchNorm2d, **kwargs): - super(InitialBlock, self).__init__() - self.conv = nn.Conv2d(3, out_channels, 3, 2, 1, bias=False) - self.maxpool = nn.MaxPool2d(2, 2) - self.bn = norm_layer(out_channels + 3) - self.act = nn.PReLU() - - def forward(self, x): - x_conv = self.conv(x) - x_pool = self.maxpool(x) - x = torch.cat([x_conv, x_pool], dim=1) - x = self.bn(x) - x = self.act(x) - return x - - -class Bottleneck(nn.Module): - """Bottlenecks include regular, asymmetric, downsampling, dilated""" - - def __init__(self, in_channels, inter_channels, out_channels, dilation=1, asymmetric=False, - downsampling=False, norm_layer=nn.BatchNorm2d, **kwargs): - self.npu = kwargs['npu'] - - super(Bottleneck, self).__init__() - self.downsamping = downsampling - if downsampling: - #self.maxpool = nn.MaxPool2d(2, 2, return_indices=True) - self.avgpool = nn.AvgPool2d((2, 2), stride=(2,2)) - self.conv_down = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 1, bias=False), - norm_layer(out_channels) - ) - - self.conv1 = nn.Sequential( - nn.Conv2d(in_channels, inter_channels, 1, bias=False), - norm_layer(inter_channels), - nn.PReLU() - ) - - if downsampling: - self.conv2 = nn.Sequential( - nn.Conv2d(inter_channels, inter_channels, 2, stride=2, bias=False), - norm_layer(inter_channels), - nn.PReLU() - ) - else: - if asymmetric: - self.conv2 = nn.Sequential( - nn.Conv2d(inter_channels, inter_channels, (5, 1), padding=(2, 0), bias=False), - nn.Conv2d(inter_channels, inter_channels, (1, 5), padding=(0, 2), bias=False), - norm_layer(inter_channels), - nn.PReLU() - ) - else: - self.conv2 = nn.Sequential( - nn.Conv2d(inter_channels, inter_channels, 3, dilation=dilation, padding=dilation, bias=False), - norm_layer(inter_channels), - nn.PReLU() - ) - self.conv3 = nn.Sequential( - nn.Conv2d(inter_channels, out_channels, 1, bias=False), - norm_layer(out_channels), - nn.Dropout2d(0.1) - ) - self.act = nn.PReLU() - - def forward(self, x): - identity = x - if self.downsamping: - ''' - if self.npu: - identity = x.cpu().to(torch.float32) - identity, max_indices = self.maxpool(identity) - identity = identity.npu().to(torch.float16) - else: - identity, max_indices = self.maxpool(identity) - ''' - identity = self.avgpool(identity) - identity = self.conv_down(identity) - - out = self.conv1(x) - out = self.conv2(out) - out = self.conv3(out) - out = self.act(out + identity) - - if self.downsamping: - return out#, max_indices - else: - return out - - -class UpsamplingBottleneck(nn.Module): - """upsampling Block""" - - def __init__(self, in_channels, inter_channels, out_channels, norm_layer=nn.BatchNorm2d, **kwargs): - self.npu = kwargs['npu'] - - super(UpsamplingBottleneck, self).__init__() - self.conv = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 1, bias=False), - norm_layer(out_channels) - ) - #self.upsampling = nn.MaxUnpool2d(2) - self.upsampling = nn.Upsample(scale_factor=2, mode='nearest') - - self.block = nn.Sequential( - nn.Conv2d(in_channels, inter_channels, 1, bias=False), - norm_layer(inter_channels), - nn.PReLU(), - nn.ConvTranspose2d(inter_channels, inter_channels, 2, 2, bias=False), - norm_layer(inter_channels), - nn.PReLU(), - nn.Conv2d(inter_channels, out_channels, 1, bias=False), - norm_layer(out_channels), - nn.Dropout2d(0.1) - ) - self.act = nn.PReLU() - - def forward(self, x):#, max_indices): - out_up = self.conv(x) - ''' - if self.npu: - out_up = out_up.to(torch.float32) - max_indices = max_indices.to(torch.int64).npu() - out_up = self.upsampling(out_up, max_indices).to(torch.half) - else: - out_up = self.upsampling(out_up, max_indices) - ''' - out_up = self.upsampling(out_up) - out_ext = self.block(x) - out = self.act(out_up + out_ext) - return out - - -def get_enet(dataset='citys', backbone='', pretrained=False, root='~/.torch/models', pretrained_base=True, **kwargs): - acronyms = { - 'pascal_voc': 'pascal_voc', - 'pascal_aug': 'pascal_aug', - 'ade20k': 'ade', - 'coco': 'coco', - 'citys': 'citys', - } - from core.data.dataloader import datasets - model = ENet(datasets[dataset].NUM_CLASS, backbone=backbone, pretrained_base=pretrained_base, **kwargs) - if pretrained: - from .model_store import get_model_file - device = torch.device(kwargs['local_rank']) - model.load_state_dict(torch.load(get_model_file('enet_%s' % (acronyms[dataset]), root=root), - map_location=device)) - return model - - -def get_enet_citys(**kwargs): - return get_enet('citys', '', **kwargs) - - -if __name__ == '__main__': - img = torch.randn(1, 3, 512, 512) - model = get_enet_citys() - output = model(img) - - -"""Efficient Neural Network""" -import torch -import torch.nn as nn - -__all__ = ['ENet', 'get_enet', 'get_enet_citys'] - - -class ENet(nn.Module): - """Efficient Neural Network""" - - def __init__(self, nclass, backbone='', aux=False, jpu=False, pretrained_base=None, **kwargs): - super(ENet, self).__init__() - self.initial = InitialBlock(13, **kwargs) - - self.bottleneck1_0 = Bottleneck(16, 16, 64, downsampling=True, **kwargs) - self.bottleneck1_1 = Bottleneck(64, 16, 64, **kwargs) - self.bottleneck1_2 = Bottleneck(64, 16, 64, **kwargs) - self.bottleneck1_3 = Bottleneck(64, 16, 64, **kwargs) - self.bottleneck1_4 = Bottleneck(64, 16, 64, **kwargs) - - self.bottleneck2_0 = Bottleneck(64, 32, 128, downsampling=True, **kwargs) - self.bottleneck2_1 = Bottleneck(128, 32, 128, **kwargs) - self.bottleneck2_2 = Bottleneck(128, 32, 128, dilation=2, **kwargs) - self.bottleneck2_3 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) - self.bottleneck2_4 = Bottleneck(128, 32, 128, dilation=4, **kwargs) - self.bottleneck2_5 = Bottleneck(128, 32, 128, **kwargs) - self.bottleneck2_6 = Bottleneck(128, 32, 128, dilation=8, **kwargs) - self.bottleneck2_7 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) - self.bottleneck2_8 = Bottleneck(128, 32, 128, dilation=16, **kwargs) - - self.bottleneck3_1 = Bottleneck(128, 32, 128, **kwargs) - self.bottleneck3_2 = Bottleneck(128, 32, 128, dilation=2, **kwargs) - self.bottleneck3_3 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) - self.bottleneck3_4 = Bottleneck(128, 32, 128, dilation=4, **kwargs) - self.bottleneck3_5 = Bottleneck(128, 32, 128, **kwargs) - self.bottleneck3_6 = Bottleneck(128, 32, 128, dilation=8, **kwargs) - self.bottleneck3_7 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) - self.bottleneck3_8 = Bottleneck(128, 32, 128, dilation=16, **kwargs) - - self.bottleneck4_0 = UpsamplingBottleneck(128, 16, 64, **kwargs) - self.bottleneck4_1 = Bottleneck(64, 16, 64, **kwargs) - self.bottleneck4_2 = Bottleneck(64, 16, 64, **kwargs) - - self.bottleneck5_0 = UpsamplingBottleneck(64, 4, 16, **kwargs) - self.bottleneck5_1 = Bottleneck(16, 4, 16, **kwargs) - - self.fullconv = nn.ConvTranspose2d(16, nclass, 2, 2, bias=False) - - self.__setattr__('exclusive', ['bottleneck1_0', 'bottleneck1_1', 'bottleneck1_2', 'bottleneck1_3', - 'bottleneck1_4', 'bottleneck2_0', 'bottleneck2_1', 'bottleneck2_2', - 'bottleneck2_3', 'bottleneck2_4', 'bottleneck2_5', 'bottleneck2_6', - 'bottleneck2_7', 'bottleneck2_8', 'bottleneck3_1', 'bottleneck3_2', - 'bottleneck3_3', 'bottleneck3_4', 'bottleneck3_5', 'bottleneck3_6', - 'bottleneck3_7', 'bottleneck3_8', 'bottleneck4_0', 'bottleneck4_1', - 'bottleneck4_2', 'bottleneck5_0', 'bottleneck5_1', 'fullconv']) - - def forward(self, x): - # init - x = self.initial(x) - - # stage 1 - x, max_indices1 = self.bottleneck1_0(x) - #x = self.bottleneck1_0(x) - x = self.bottleneck1_1(x) - x = self.bottleneck1_2(x) - x = self.bottleneck1_3(x) - x = self.bottleneck1_4(x) - - # stage 2 - x, max_indices2 = self.bottleneck2_0(x) - #x = self.bottleneck2_0(x) - x = self.bottleneck2_1(x) - x = self.bottleneck2_2(x) - x = self.bottleneck2_3(x) - x = self.bottleneck2_4(x) - x = self.bottleneck2_5(x) - x = self.bottleneck2_6(x) - x = self.bottleneck2_7(x) - x = self.bottleneck2_8(x) - - # stage 3 - x = self.bottleneck3_1(x) - x = self.bottleneck3_2(x) - x = self.bottleneck3_3(x) - x = self.bottleneck3_4(x) - x = self.bottleneck3_6(x) - x = self.bottleneck3_7(x) - x = self.bottleneck3_8(x) - - # stage 4 - x = self.bottleneck4_0(x, max_indices2) - #x = self.bottleneck4_0(x) - x = self.bottleneck4_1(x) - x = self.bottleneck4_2(x) - - # stage 5 - x = self.bottleneck5_0(x, max_indices1) - #x = self.bottleneck5_0(x) - x = self.bottleneck5_1(x) - - # out - x = self.fullconv(x) - return tuple([x]) - - -class InitialBlock(nn.Module): - """ENet initial block""" - - def __init__(self, out_channels, norm_layer=nn.BatchNorm2d, **kwargs): - super(InitialBlock, self).__init__() - self.conv = nn.Conv2d(3, out_channels, 3, 2, 1, bias=False) - self.maxpool = nn.MaxPool2d(2, 2) - self.bn = norm_layer(out_channels + 3) - self.act = nn.PReLU() - - def forward(self, x): - x_conv = self.conv(x) - x_pool = self.maxpool(x) - x = torch.cat([x_conv, x_pool], dim=1) - x = self.bn(x) - x = self.act(x) - return x - - -class Bottleneck(nn.Module): - """Bottlenecks include regular, asymmetric, downsampling, dilated""" - - def __init__(self, in_channels, inter_channels, out_channels, dilation=1, asymmetric=False, - downsampling=False, norm_layer=nn.BatchNorm2d, **kwargs): - self.npu = kwargs['npu'] - - super(Bottleneck, self).__init__() - self.downsamping = downsampling - if downsampling: - self.maxpool = nn.MaxPool2d(2, 2, return_indices=True) - #self.avgpool = nn.AvgPool2d((2, 2), stride=(2,2)) - self.conv_down = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 1, bias=False), - norm_layer(out_channels) - ) - - self.conv1 = nn.Sequential( - nn.Conv2d(in_channels, inter_channels, 1, bias=False), - norm_layer(inter_channels), - nn.PReLU() - ) - - if downsampling: - self.conv2 = nn.Sequential( - nn.Conv2d(inter_channels, inter_channels, 2, stride=2, bias=False), - norm_layer(inter_channels), - nn.PReLU() - ) - else: - if asymmetric: - self.conv2 = nn.Sequential( - nn.Conv2d(inter_channels, inter_channels, (5, 1), padding=(2, 0), bias=False), - nn.Conv2d(inter_channels, inter_channels, (1, 5), padding=(0, 2), bias=False), - norm_layer(inter_channels), - nn.PReLU() - ) - else: - self.conv2 = nn.Sequential( - nn.Conv2d(inter_channels, inter_channels, 3, dilation=dilation, padding=dilation, bias=False), - norm_layer(inter_channels), - nn.PReLU() - ) - self.conv3 = nn.Sequential( - nn.Conv2d(inter_channels, out_channels, 1, bias=False), - norm_layer(out_channels), - nn.Dropout2d(0.1) - ) - self.act = nn.PReLU() - - def forward(self, x): - identity = x - if self.downsamping: - if self.npu: - identity = x.cpu().to(torch.float32) - identity, max_indices = self.maxpool(identity) - identity = identity.npu().to(torch.float16) - else: - identity, max_indices = self.maxpool(identity) - #identity = self.avgpool(identity) - identity = self.conv_down(identity) - - out = self.conv1(x) - out = self.conv2(out) - out = self.conv3(out) - out = self.act(out + identity) - - if self.downsamping: - return out, max_indices - else: - return out - - -class UpsamplingBottleneck(nn.Module): - """upsampling Block""" - - def __init__(self, in_channels, inter_channels, out_channels, norm_layer=nn.BatchNorm2d, **kwargs): - self.npu = kwargs['npu'] - - super(UpsamplingBottleneck, self).__init__() - self.conv = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 1, bias=False), - norm_layer(out_channels) - ) - self.upsampling = nn.MaxUnpool2d(2) - #self.upsampling = nn.Upsample(scale_factor=2, mode='nearest') - - self.block = nn.Sequential( - nn.Conv2d(in_channels, inter_channels, 1, bias=False), - norm_layer(inter_channels), - nn.PReLU(), - nn.ConvTranspose2d(inter_channels, inter_channels, 2, 2, bias=False), - norm_layer(inter_channels), - nn.PReLU(), - nn.Conv2d(inter_channels, out_channels, 1, bias=False), - norm_layer(out_channels), - nn.Dropout2d(0.1) - ) - self.act = nn.PReLU() - - def forward(self, x, max_indices): - out_up = self.conv(x) - - if self.npu: - out_up = out_up.to(torch.float32) - max_indices = max_indices.to(torch.int64).npu() - out_up = self.upsampling(out_up, max_indices).to(torch.half) - else: - out_up = self.upsampling(out_up, max_indices) - - #out_up = self.upsampling(out_up) - out_ext = self.block(x) - out = self.act(out_up + out_ext) - return out - - -def get_enet(dataset='citys', backbone='', pretrained=False, root='~/.torch/models', pretrained_base=True, **kwargs): - acronyms = { - 'pascal_voc': 'pascal_voc', - 'pascal_aug': 'pascal_aug', - 'ade20k': 'ade', - 'coco': 'coco', - 'citys': 'citys', - } - from core.data.dataloader import datasets - model = ENet(datasets[dataset].NUM_CLASS, backbone=backbone, pretrained_base=pretrained_base, **kwargs) - if pretrained: - from .model_store import get_model_file - device = torch.device(kwargs['local_rank']) - model.load_state_dict(torch.load(get_model_file('enet_%s' % (acronyms[dataset]), root=root), - map_location=device)) - return model - - -def get_enet_citys(**kwargs): - return get_enet('citys', '', **kwargs) - - -if __name__ == '__main__': - img = torch.randn(1, 3, 512, 512) - model = get_enet_citys() - output = model(img) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Efficient Neural Network""" +import torch +import torch.nn as nn + +__all__ = ['ENet', 'get_enet', 'get_enet_citys'] + + +class ENet(nn.Module): + """Efficient Neural Network""" + + def __init__(self, nclass, backbone='', aux=False, jpu=False, pretrained_base=None, **kwargs): + super(ENet, self).__init__() + self.initial = InitialBlock(13, **kwargs) + + self.bottleneck1_0 = Bottleneck(16, 16, 64, downsampling=True, **kwargs) + self.bottleneck1_1 = Bottleneck(64, 16, 64, **kwargs) + self.bottleneck1_2 = Bottleneck(64, 16, 64, **kwargs) + self.bottleneck1_3 = Bottleneck(64, 16, 64, **kwargs) + self.bottleneck1_4 = Bottleneck(64, 16, 64, **kwargs) + + self.bottleneck2_0 = Bottleneck(64, 32, 128, downsampling=True, **kwargs) + self.bottleneck2_1 = Bottleneck(128, 32, 128, **kwargs) + self.bottleneck2_2 = Bottleneck(128, 32, 128, dilation=2, **kwargs) + self.bottleneck2_3 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) + self.bottleneck2_4 = Bottleneck(128, 32, 128, dilation=4, **kwargs) + self.bottleneck2_5 = Bottleneck(128, 32, 128, **kwargs) + self.bottleneck2_6 = Bottleneck(128, 32, 128, dilation=8, **kwargs) + self.bottleneck2_7 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) + self.bottleneck2_8 = Bottleneck(128, 32, 128, dilation=16, **kwargs) + + self.bottleneck3_1 = Bottleneck(128, 32, 128, **kwargs) + self.bottleneck3_2 = Bottleneck(128, 32, 128, dilation=2, **kwargs) + self.bottleneck3_3 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) + self.bottleneck3_4 = Bottleneck(128, 32, 128, dilation=4, **kwargs) + self.bottleneck3_5 = Bottleneck(128, 32, 128, **kwargs) + self.bottleneck3_6 = Bottleneck(128, 32, 128, dilation=8, **kwargs) + self.bottleneck3_7 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) + self.bottleneck3_8 = Bottleneck(128, 32, 128, dilation=16, **kwargs) + + self.bottleneck4_0 = UpsamplingBottleneck(128, 16, 64, **kwargs) + self.bottleneck4_1 = Bottleneck(64, 16, 64, **kwargs) + self.bottleneck4_2 = Bottleneck(64, 16, 64, **kwargs) + + self.bottleneck5_0 = UpsamplingBottleneck(64, 4, 16, **kwargs) + self.bottleneck5_1 = Bottleneck(16, 4, 16, **kwargs) + + self.fullconv = nn.ConvTranspose2d(16, nclass, 2, 2, bias=False) + + self.__setattr__('exclusive', ['bottleneck1_0', 'bottleneck1_1', 'bottleneck1_2', 'bottleneck1_3', + 'bottleneck1_4', 'bottleneck2_0', 'bottleneck2_1', 'bottleneck2_2', + 'bottleneck2_3', 'bottleneck2_4', 'bottleneck2_5', 'bottleneck2_6', + 'bottleneck2_7', 'bottleneck2_8', 'bottleneck3_1', 'bottleneck3_2', + 'bottleneck3_3', 'bottleneck3_4', 'bottleneck3_5', 'bottleneck3_6', + 'bottleneck3_7', 'bottleneck3_8', 'bottleneck4_0', 'bottleneck4_1', + 'bottleneck4_2', 'bottleneck5_0', 'bottleneck5_1', 'fullconv']) + + def forward(self, x): + # init + x = self.initial(x) + + # stage 1 + #x, max_indices1 = self.bottleneck1_0(x) + x = self.bottleneck1_0(x) + x = self.bottleneck1_1(x) + x = self.bottleneck1_2(x) + x = self.bottleneck1_3(x) + x = self.bottleneck1_4(x) + + # stage 2 + #x, max_indices2 = self.bottleneck2_0(x) + x = self.bottleneck2_0(x) + x = self.bottleneck2_1(x) + x = self.bottleneck2_2(x) + x = self.bottleneck2_3(x) + x = self.bottleneck2_4(x) + x = self.bottleneck2_5(x) + x = self.bottleneck2_6(x) + x = self.bottleneck2_7(x) + x = self.bottleneck2_8(x) + + # stage 3 + x = self.bottleneck3_1(x) + x = self.bottleneck3_2(x) + x = self.bottleneck3_3(x) + x = self.bottleneck3_4(x) + x = self.bottleneck3_6(x) + x = self.bottleneck3_7(x) + x = self.bottleneck3_8(x) + + # stage 4 + #x = self.bottleneck4_0(x, max_indices2) + x = self.bottleneck4_0(x) + x = self.bottleneck4_1(x) + x = self.bottleneck4_2(x) + + # stage 5 + #x = self.bottleneck5_0(x, max_indices1) + x = self.bottleneck5_0(x) + x = self.bottleneck5_1(x) + + # out + x = self.fullconv(x) + return tuple([x]) + + +class InitialBlock(nn.Module): + """ENet initial block""" + + def __init__(self, out_channels, norm_layer=nn.BatchNorm2d, **kwargs): + super(InitialBlock, self).__init__() + self.conv = nn.Conv2d(3, out_channels, 3, 2, 1, bias=False) + self.maxpool = nn.MaxPool2d(2, 2) + self.bn = norm_layer(out_channels + 3) + self.act = nn.PReLU() + + def forward(self, x): + x_conv = self.conv(x) + x_pool = self.maxpool(x) + x = torch.cat([x_conv, x_pool], dim=1) + x = self.bn(x) + x = self.act(x) + return x + + +class Bottleneck(nn.Module): + """Bottlenecks include regular, asymmetric, downsampling, dilated""" + + def __init__(self, in_channels, inter_channels, out_channels, dilation=1, asymmetric=False, + downsampling=False, norm_layer=nn.BatchNorm2d, **kwargs): + self.npu = kwargs['npu'] + + super(Bottleneck, self).__init__() + self.downsamping = downsampling + if downsampling: + #self.maxpool = nn.MaxPool2d(2, 2, return_indices=True) + self.avgpool = nn.AvgPool2d((2, 2), stride=(2,2)) + self.conv_down = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1, bias=False), + norm_layer(out_channels) + ) + + self.conv1 = nn.Sequential( + nn.Conv2d(in_channels, inter_channels, 1, bias=False), + norm_layer(inter_channels), + nn.PReLU() + ) + + if downsampling: + self.conv2 = nn.Sequential( + nn.Conv2d(inter_channels, inter_channels, 2, stride=2, bias=False), + norm_layer(inter_channels), + nn.PReLU() + ) + else: + if asymmetric: + self.conv2 = nn.Sequential( + nn.Conv2d(inter_channels, inter_channels, (5, 1), padding=(2, 0), bias=False), + nn.Conv2d(inter_channels, inter_channels, (1, 5), padding=(0, 2), bias=False), + norm_layer(inter_channels), + nn.PReLU() + ) + else: + self.conv2 = nn.Sequential( + nn.Conv2d(inter_channels, inter_channels, 3, dilation=dilation, padding=dilation, bias=False), + norm_layer(inter_channels), + nn.PReLU() + ) + self.conv3 = nn.Sequential( + nn.Conv2d(inter_channels, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.Dropout2d(0.1) + ) + self.act = nn.PReLU() + + def forward(self, x): + identity = x + if self.downsamping: + ''' + if self.npu: + identity = x.cpu().to(torch.float32) + identity, max_indices = self.maxpool(identity) + identity = identity.npu().to(torch.float16) + else: + identity, max_indices = self.maxpool(identity) + ''' + identity = self.avgpool(identity) + identity = self.conv_down(identity) + + out = self.conv1(x) + out = self.conv2(out) + out = self.conv3(out) + out = self.act(out + identity) + + if self.downsamping: + return out#, max_indices + else: + return out + + +class UpsamplingBottleneck(nn.Module): + """upsampling Block""" + + def __init__(self, in_channels, inter_channels, out_channels, norm_layer=nn.BatchNorm2d, **kwargs): + self.npu = kwargs['npu'] + + super(UpsamplingBottleneck, self).__init__() + self.conv = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1, bias=False), + norm_layer(out_channels) + ) + #self.upsampling = nn.MaxUnpool2d(2) + self.upsampling = nn.Upsample(scale_factor=2, mode='nearest') + + self.block = nn.Sequential( + nn.Conv2d(in_channels, inter_channels, 1, bias=False), + norm_layer(inter_channels), + nn.PReLU(), + nn.ConvTranspose2d(inter_channels, inter_channels, 2, 2, bias=False), + norm_layer(inter_channels), + nn.PReLU(), + nn.Conv2d(inter_channels, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.Dropout2d(0.1) + ) + self.act = nn.PReLU() + + def forward(self, x):#, max_indices): + out_up = self.conv(x) + ''' + if self.npu: + out_up = out_up.to(torch.float32) + max_indices = max_indices.to(torch.int64).npu() + out_up = self.upsampling(out_up, max_indices).to(torch.half) + else: + out_up = self.upsampling(out_up, max_indices) + ''' + out_up = self.upsampling(out_up) + out_ext = self.block(x) + out = self.act(out_up + out_ext) + return out + + +def get_enet(dataset='citys', backbone='', pretrained=False, root='~/.torch/models', pretrained_base=True, **kwargs): + acronyms = { + 'pascal_voc': 'pascal_voc', + 'pascal_aug': 'pascal_aug', + 'ade20k': 'ade', + 'coco': 'coco', + 'citys': 'citys', + } + from core.data.dataloader import datasets + model = ENet(datasets[dataset].NUM_CLASS, backbone=backbone, pretrained_base=pretrained_base, **kwargs) + if pretrained: + from .model_store import get_model_file + device = torch.device(kwargs['local_rank']) + model.load_state_dict(torch.load(get_model_file('enet_%s' % (acronyms[dataset]), root=root), + map_location=device)) + return model + + +def get_enet_citys(**kwargs): + return get_enet('citys', '', **kwargs) + + +if __name__ == '__main__': + img = torch.randn(1, 3, 512, 512) + model = get_enet_citys() + output = model(img) + + +"""Efficient Neural Network""" +import torch +import torch.nn as nn + +__all__ = ['ENet', 'get_enet', 'get_enet_citys'] + + +class ENet(nn.Module): + """Efficient Neural Network""" + + def __init__(self, nclass, backbone='', aux=False, jpu=False, pretrained_base=None, **kwargs): + super(ENet, self).__init__() + self.initial = InitialBlock(13, **kwargs) + + self.bottleneck1_0 = Bottleneck(16, 16, 64, downsampling=True, **kwargs) + self.bottleneck1_1 = Bottleneck(64, 16, 64, **kwargs) + self.bottleneck1_2 = Bottleneck(64, 16, 64, **kwargs) + self.bottleneck1_3 = Bottleneck(64, 16, 64, **kwargs) + self.bottleneck1_4 = Bottleneck(64, 16, 64, **kwargs) + + self.bottleneck2_0 = Bottleneck(64, 32, 128, downsampling=True, **kwargs) + self.bottleneck2_1 = Bottleneck(128, 32, 128, **kwargs) + self.bottleneck2_2 = Bottleneck(128, 32, 128, dilation=2, **kwargs) + self.bottleneck2_3 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) + self.bottleneck2_4 = Bottleneck(128, 32, 128, dilation=4, **kwargs) + self.bottleneck2_5 = Bottleneck(128, 32, 128, **kwargs) + self.bottleneck2_6 = Bottleneck(128, 32, 128, dilation=8, **kwargs) + self.bottleneck2_7 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) + self.bottleneck2_8 = Bottleneck(128, 32, 128, dilation=16, **kwargs) + + self.bottleneck3_1 = Bottleneck(128, 32, 128, **kwargs) + self.bottleneck3_2 = Bottleneck(128, 32, 128, dilation=2, **kwargs) + self.bottleneck3_3 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) + self.bottleneck3_4 = Bottleneck(128, 32, 128, dilation=4, **kwargs) + self.bottleneck3_5 = Bottleneck(128, 32, 128, **kwargs) + self.bottleneck3_6 = Bottleneck(128, 32, 128, dilation=8, **kwargs) + self.bottleneck3_7 = Bottleneck(128, 32, 128, asymmetric=True, **kwargs) + self.bottleneck3_8 = Bottleneck(128, 32, 128, dilation=16, **kwargs) + + self.bottleneck4_0 = UpsamplingBottleneck(128, 16, 64, **kwargs) + self.bottleneck4_1 = Bottleneck(64, 16, 64, **kwargs) + self.bottleneck4_2 = Bottleneck(64, 16, 64, **kwargs) + + self.bottleneck5_0 = UpsamplingBottleneck(64, 4, 16, **kwargs) + self.bottleneck5_1 = Bottleneck(16, 4, 16, **kwargs) + + self.fullconv = nn.ConvTranspose2d(16, nclass, 2, 2, bias=False) + + self.__setattr__('exclusive', ['bottleneck1_0', 'bottleneck1_1', 'bottleneck1_2', 'bottleneck1_3', + 'bottleneck1_4', 'bottleneck2_0', 'bottleneck2_1', 'bottleneck2_2', + 'bottleneck2_3', 'bottleneck2_4', 'bottleneck2_5', 'bottleneck2_6', + 'bottleneck2_7', 'bottleneck2_8', 'bottleneck3_1', 'bottleneck3_2', + 'bottleneck3_3', 'bottleneck3_4', 'bottleneck3_5', 'bottleneck3_6', + 'bottleneck3_7', 'bottleneck3_8', 'bottleneck4_0', 'bottleneck4_1', + 'bottleneck4_2', 'bottleneck5_0', 'bottleneck5_1', 'fullconv']) + + def forward(self, x): + # init + x = self.initial(x) + + # stage 1 + x, max_indices1 = self.bottleneck1_0(x) + #x = self.bottleneck1_0(x) + x = self.bottleneck1_1(x) + x = self.bottleneck1_2(x) + x = self.bottleneck1_3(x) + x = self.bottleneck1_4(x) + + # stage 2 + x, max_indices2 = self.bottleneck2_0(x) + #x = self.bottleneck2_0(x) + x = self.bottleneck2_1(x) + x = self.bottleneck2_2(x) + x = self.bottleneck2_3(x) + x = self.bottleneck2_4(x) + x = self.bottleneck2_5(x) + x = self.bottleneck2_6(x) + x = self.bottleneck2_7(x) + x = self.bottleneck2_8(x) + + # stage 3 + x = self.bottleneck3_1(x) + x = self.bottleneck3_2(x) + x = self.bottleneck3_3(x) + x = self.bottleneck3_4(x) + x = self.bottleneck3_6(x) + x = self.bottleneck3_7(x) + x = self.bottleneck3_8(x) + + # stage 4 + x = self.bottleneck4_0(x, max_indices2) + #x = self.bottleneck4_0(x) + x = self.bottleneck4_1(x) + x = self.bottleneck4_2(x) + + # stage 5 + x = self.bottleneck5_0(x, max_indices1) + #x = self.bottleneck5_0(x) + x = self.bottleneck5_1(x) + + # out + x = self.fullconv(x) + return tuple([x]) + + +class InitialBlock(nn.Module): + """ENet initial block""" + + def __init__(self, out_channels, norm_layer=nn.BatchNorm2d, **kwargs): + super(InitialBlock, self).__init__() + self.conv = nn.Conv2d(3, out_channels, 3, 2, 1, bias=False) + self.maxpool = nn.MaxPool2d(2, 2) + self.bn = norm_layer(out_channels + 3) + self.act = nn.PReLU() + + def forward(self, x): + x_conv = self.conv(x) + x_pool = self.maxpool(x) + x = torch.cat([x_conv, x_pool], dim=1) + x = self.bn(x) + x = self.act(x) + return x + + +class Bottleneck(nn.Module): + """Bottlenecks include regular, asymmetric, downsampling, dilated""" + + def __init__(self, in_channels, inter_channels, out_channels, dilation=1, asymmetric=False, + downsampling=False, norm_layer=nn.BatchNorm2d, **kwargs): + self.npu = kwargs['npu'] + + super(Bottleneck, self).__init__() + self.downsamping = downsampling + if downsampling: + self.maxpool = nn.MaxPool2d(2, 2, return_indices=True) + #self.avgpool = nn.AvgPool2d((2, 2), stride=(2,2)) + self.conv_down = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1, bias=False), + norm_layer(out_channels) + ) + + self.conv1 = nn.Sequential( + nn.Conv2d(in_channels, inter_channels, 1, bias=False), + norm_layer(inter_channels), + nn.PReLU() + ) + + if downsampling: + self.conv2 = nn.Sequential( + nn.Conv2d(inter_channels, inter_channels, 2, stride=2, bias=False), + norm_layer(inter_channels), + nn.PReLU() + ) + else: + if asymmetric: + self.conv2 = nn.Sequential( + nn.Conv2d(inter_channels, inter_channels, (5, 1), padding=(2, 0), bias=False), + nn.Conv2d(inter_channels, inter_channels, (1, 5), padding=(0, 2), bias=False), + norm_layer(inter_channels), + nn.PReLU() + ) + else: + self.conv2 = nn.Sequential( + nn.Conv2d(inter_channels, inter_channels, 3, dilation=dilation, padding=dilation, bias=False), + norm_layer(inter_channels), + nn.PReLU() + ) + self.conv3 = nn.Sequential( + nn.Conv2d(inter_channels, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.Dropout2d(0.1) + ) + self.act = nn.PReLU() + + def forward(self, x): + identity = x + if self.downsamping: + if self.npu: + identity = x.cpu().to(torch.float32) + identity, max_indices = self.maxpool(identity) + identity = identity.npu().to(torch.float16) + else: + identity, max_indices = self.maxpool(identity) + #identity = self.avgpool(identity) + identity = self.conv_down(identity) + + out = self.conv1(x) + out = self.conv2(out) + out = self.conv3(out) + out = self.act(out + identity) + + if self.downsamping: + return out, max_indices + else: + return out + + +class UpsamplingBottleneck(nn.Module): + """upsampling Block""" + + def __init__(self, in_channels, inter_channels, out_channels, norm_layer=nn.BatchNorm2d, **kwargs): + self.npu = kwargs['npu'] + + super(UpsamplingBottleneck, self).__init__() + self.conv = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1, bias=False), + norm_layer(out_channels) + ) + self.upsampling = nn.MaxUnpool2d(2) + #self.upsampling = nn.Upsample(scale_factor=2, mode='nearest') + + self.block = nn.Sequential( + nn.Conv2d(in_channels, inter_channels, 1, bias=False), + norm_layer(inter_channels), + nn.PReLU(), + nn.ConvTranspose2d(inter_channels, inter_channels, 2, 2, bias=False), + norm_layer(inter_channels), + nn.PReLU(), + nn.Conv2d(inter_channels, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.Dropout2d(0.1) + ) + self.act = nn.PReLU() + + def forward(self, x, max_indices): + out_up = self.conv(x) + + if self.npu: + out_up = out_up.to(torch.float32) + max_indices = max_indices.to(torch.int64).npu() + out_up = self.upsampling(out_up, max_indices).to(torch.half) + else: + out_up = self.upsampling(out_up, max_indices) + + #out_up = self.upsampling(out_up) + out_ext = self.block(x) + out = self.act(out_up + out_ext) + return out + + +def get_enet(dataset='citys', backbone='', pretrained=False, root='~/.torch/models', pretrained_base=True, **kwargs): + acronyms = { + 'pascal_voc': 'pascal_voc', + 'pascal_aug': 'pascal_aug', + 'ade20k': 'ade', + 'coco': 'coco', + 'citys': 'citys', + } + from core.data.dataloader import datasets + model = ENet(datasets[dataset].NUM_CLASS, backbone=backbone, pretrained_base=pretrained_base, **kwargs) + if pretrained: + from .model_store import get_model_file + device = torch.device(kwargs['local_rank']) + model.load_state_dict(torch.load(get_model_file('enet_%s' % (acronyms[dataset]), root=root), + map_location=device)) + return model + + +def get_enet_citys(**kwargs): + return get_enet('citys', '', **kwargs) + + +if __name__ == '__main__': + img = torch.randn(1, 3, 512, 512) + model = get_enet_citys() + output = model(img) diff --git a/PyTorch/contrib/cv/semantic_segmentation/ENet/core/models/ocnet.py b/PyTorch/contrib/cv/semantic_segmentation/ENet/core/models/ocnet.py index e59df3ce96908439cb3f860af8ff9f43153c7026..4eea5bd8d9b6e0ab6beed1952f715b07204a2328 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/ENet/core/models/ocnet.py +++ b/PyTorch/contrib/cv/semantic_segmentation/ENet/core/models/ocnet.py @@ -1,359 +1,359 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" Object Context Network for Scene Parsing""" -import torch -import torch.nn as nn -import torch.nn.functional as F - -from .segbase import SegBaseModel -from .fcn import _FCNHead - -__all__ = ['OCNet', 'get_ocnet', 'get_base_ocnet_resnet101_citys', - 'get_pyramid_ocnet_resnet101_citys', 'get_asp_ocnet_resnet101_citys'] - - -class OCNet(SegBaseModel): - r"""OCNet - - Parameters - ---------- - nclass : int - Number of categories for the training dataset. - backbone : string - Pre-trained dilated backbone network type (default:'resnet50'; 'resnet50', - 'resnet101' or 'resnet152'). - norm_layer : object - Normalization layer used in backbone network (default: :class:`nn.BatchNorm`; - for Synchronized Cross-GPU BachNormalization). - aux : bool - Auxiliary loss. - Reference: - Yuhui Yuan, Jingdong Wang. "OCNet: Object Context Network for Scene Parsing." - arXiv preprint arXiv:1809.00916 (2018). - """ - - def __init__(self, nclass, backbone='resnet101', oc_arch='base', aux=False, pretrained_base=True, **kwargs): - super(OCNet, self).__init__(nclass, aux, backbone, pretrained_base=pretrained_base, **kwargs) - self.head = _OCHead(nclass, oc_arch, **kwargs) - if self.aux: - self.auxlayer = _FCNHead(1024, nclass, **kwargs) - - self.__setattr__('exclusive', ['head', 'auxlayer'] if aux else ['head']) - - def forward(self, x): - size = x.size()[2:] - _, _, c3, c4 = self.base_forward(x) - outputs = [] - x = self.head(c4) - x = F.interpolate(x, size, mode='bilinear', align_corners=True) - outputs.append(x) - - if self.aux: - auxout = self.auxlayer(c3) - auxout = F.interpolate(auxout, size, mode='bilinear', align_corners=True) - outputs.append(auxout) - return tuple(outputs) - - -class _OCHead(nn.Module): - def __init__(self, nclass, oc_arch, norm_layer=nn.BatchNorm2d, **kwargs): - super(_OCHead, self).__init__() - if oc_arch == 'base': - self.context = nn.Sequential( - nn.Conv2d(2048, 512, 3, 1, padding=1, bias=False), - norm_layer(512), - nn.ReLU(True), - BaseOCModule(512, 512, 256, 256, scales=([1]), norm_layer=norm_layer, **kwargs)) - elif oc_arch == 'pyramid': - self.context = nn.Sequential( - nn.Conv2d(2048, 512, 3, 1, padding=1, bias=False), - norm_layer(512), - nn.ReLU(True), - PyramidOCModule(512, 512, 256, 512, scales=([1, 2, 3, 6]), norm_layer=norm_layer, **kwargs)) - elif oc_arch == 'asp': - self.context = ASPOCModule(2048, 512, 256, 512, norm_layer=norm_layer, **kwargs) - else: - raise ValueError("Unknown OC architecture!") - - self.out = nn.Conv2d(512, nclass, 1) - - def forward(self, x): - x = self.context(x) - return self.out(x) - - -class BaseAttentionBlock(nn.Module): - """The basic implementation for self-attention block/non-local block.""" - - def __init__(self, in_channels, out_channels, key_channels, value_channels, - scale=1, norm_layer=nn.BatchNorm2d, **kwargs): - super(BaseAttentionBlock, self).__init__() - self.scale = scale - self.key_channels = key_channels - self.value_channels = value_channels - if scale > 1: - self.pool = nn.MaxPool2d(scale) - - self.f_value = nn.Conv2d(in_channels, value_channels, 1) - self.f_key = nn.Sequential( - nn.Conv2d(in_channels, key_channels, 1), - norm_layer(key_channels), - nn.ReLU(True) - ) - self.f_query = self.f_key - self.W = nn.Conv2d(value_channels, out_channels, 1) - nn.init.constant_(self.W.weight, 0) - nn.init.constant_(self.W.bias, 0) - - def forward(self, x): - batch_size, c, w, h = x.size() - if self.scale > 1: - x = self.pool(x) - - value = self.f_value(x).view(batch_size, self.value_channels, -1).permute(0, 2, 1) - query = self.f_query(x).view(batch_size, self.key_channels, -1).permute(0, 2, 1) - key = self.f_key(x).view(batch_size, self.key_channels, -1) - - sim_map = torch.bmm(query, key) * (self.key_channels ** -.5) - sim_map = F.softmax(sim_map, dim=-1) - - context = torch.bmm(sim_map, value).permute(0, 2, 1).contiguous() - context = context.view(batch_size, self.value_channels, *x.size()[2:]) - context = self.W(context) - if self.scale > 1: - context = F.interpolate(context, size=(w, h), mode='bilinear', align_corners=True) - - return context - - -class BaseOCModule(nn.Module): - """Base-OC""" - - def __init__(self, in_channels, out_channels, key_channels, value_channels, - scales=([1]), norm_layer=nn.BatchNorm2d, concat=True, **kwargs): - super(BaseOCModule, self).__init__() - self.stages = nn.ModuleList([ - BaseAttentionBlock(in_channels, out_channels, key_channels, value_channels, scale, norm_layer, **kwargs) - for scale in scales]) - in_channels = in_channels * 2 if concat else in_channels - self.project = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 1), - norm_layer(out_channels), - nn.ReLU(True), - nn.Dropout2d(0.05) - ) - self.concat = concat - - def forward(self, x): - priors = [stage(x) for stage in self.stages] - context = priors[0] - for i in range(1, len(priors)): - context += priors[i] - if self.concat: - context = torch.cat([context, x], 1) - out = self.project(context) - return out - - -class PyramidAttentionBlock(nn.Module): - """The basic implementation for pyramid self-attention block/non-local block""" - - def __init__(self, in_channels, out_channels, key_channels, value_channels, - scale=1, norm_layer=nn.BatchNorm2d, **kwargs): - super(PyramidAttentionBlock, self).__init__() - self.scale = scale - self.value_channels = value_channels - self.key_channels = key_channels - - self.f_value = nn.Conv2d(in_channels, value_channels, 1) - self.f_key = nn.Sequential( - nn.Conv2d(in_channels, key_channels, 1), - norm_layer(key_channels), - nn.ReLU(True) - ) - self.f_query = self.f_key - self.W = nn.Conv2d(value_channels, out_channels, 1) - nn.init.constant_(self.W.weight, 0) - nn.init.constant_(self.W.bias, 0) - - def forward(self, x): - batch_size, c, w, h = x.size() - - local_x = list() - local_y = list() - step_w, step_h = w // self.scale, h // self.scale - for i in range(self.scale): - for j in range(self.scale): - start_x, start_y = step_w * i, step_h * j - end_x, end_y = min(start_x + step_w, w), min(start_y + step_h, h) - if i == (self.scale - 1): - end_x = w - if j == (self.scale - 1): - end_y = h - local_x += [start_x, end_x] - local_y += [start_y, end_y] - - value = self.f_value(x) - query = self.f_query(x) - key = self.f_key(x) - - local_list = list() - local_block_cnt = (self.scale ** 2) * 2 - for i in range(0, local_block_cnt, 2): - value_local = value[:, :, local_x[i]:local_x[i + 1], local_y[i]:local_y[i + 1]] - query_local = query[:, :, local_x[i]:local_x[i + 1], local_y[i]:local_y[i + 1]] - key_local = key[:, :, local_x[i]:local_x[i + 1], local_y[i]:local_y[i + 1]] - - w_local, h_local = value_local.size(2), value_local.size(3) - value_local = value_local.contiguous().view(batch_size, self.value_channels, -1).permute(0, 2, 1) - query_local = query_local.contiguous().view(batch_size, self.key_channels, -1).permute(0, 2, 1) - key_local = key_local.contiguous().view(batch_size, self.key_channels, -1) - - sim_map = torch.bmm(query_local, key_local) * (self.key_channels ** -.5) - sim_map = F.softmax(sim_map, dim=-1) - - context_local = torch.bmm(sim_map, value_local).permute(0, 2, 1).contiguous() - context_local = context_local.view(batch_size, self.value_channels, w_local, h_local) - local_list.append(context_local) - - context_list = list() - for i in range(0, self.scale): - row_tmp = list() - for j in range(self.scale): - row_tmp.append(local_list[j + i * self.scale]) - context_list.append(torch.cat(row_tmp, 3)) - - context = torch.cat(context_list, 2) - context = self.W(context) - - return context - - -class PyramidOCModule(nn.Module): - """Pyramid-OC""" - - def __init__(self, in_channels, out_channels, key_channels, value_channels, - scales=([1]), norm_layer=nn.BatchNorm2d, **kwargs): - super(PyramidOCModule, self).__init__() - self.stages = nn.ModuleList([ - PyramidAttentionBlock(in_channels, out_channels, key_channels, value_channels, scale, norm_layer, **kwargs) - for scale in scales]) - self.up_dr = nn.Sequential( - nn.Conv2d(in_channels, in_channels * len(scales), 1), - norm_layer(in_channels * len(scales)), - nn.ReLU(True) - ) - self.project = nn.Sequential( - nn.Conv2d(in_channels * len(scales) * 2, out_channels, 1), - norm_layer(out_channels), - nn.ReLU(True), - nn.Dropout2d(0.05) - ) - - def forward(self, x): - priors = [stage(x) for stage in self.stages] - context = [self.up_dr(x)] - for i in range(len(priors)): - context += [priors[i]] - context = torch.cat(context, 1) - out = self.project(context) - return out - - -class ASPOCModule(nn.Module): - """ASP-OC""" - - def __init__(self, in_channels, out_channels, key_channels, value_channels, - atrous_rates=(12, 24, 36), norm_layer=nn.BatchNorm2d, **kwargs): - super(ASPOCModule, self).__init__() - self.context = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 3, padding=1), - norm_layer(out_channels), - nn.ReLU(True), - BaseOCModule(out_channels, out_channels, key_channels, value_channels, ([2]), norm_layer, False, **kwargs)) - - rate1, rate2, rate3 = tuple(atrous_rates) - self.b1 = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 3, padding=rate1, dilation=rate1, bias=False), - norm_layer(out_channels), - nn.ReLU(True)) - self.b2 = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 3, padding=rate2, dilation=rate2, bias=False), - norm_layer(out_channels), - nn.ReLU(True)) - self.b3 = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 3, padding=rate3, dilation=rate3, bias=False), - norm_layer(out_channels), - nn.ReLU(True)) - self.b4 = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 1, bias=False), - norm_layer(out_channels), - nn.ReLU(True)) - - self.project = nn.Sequential( - nn.Conv2d(out_channels * 5, out_channels, 1, bias=False), - norm_layer(out_channels), - nn.ReLU(True), - nn.Dropout2d(0.1) - ) - - def forward(self, x): - feat1 = self.context(x) - feat2 = self.b1(x) - feat3 = self.b2(x) - feat4 = self.b3(x) - feat5 = self.b4(x) - out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1) - out = self.project(out) - return out - - -def get_ocnet(dataset='citys', backbone='resnet50', oc_arch='base', pretrained=False, root='~/.torch/models', - pretrained_base=True, **kwargs): - acronyms = { - 'pascal_voc': 'pascal_voc', - 'pascal_aug': 'pascal_aug', - 'ade20k': 'ade', - 'coco': 'coco', - 'citys': 'citys', - } - from ..data.dataloader import datasets - model = OCNet(datasets[dataset].NUM_CLASS, backbone=backbone, oc_arch=oc_arch, - pretrained_base=pretrained_base, **kwargs) - if pretrained: - from .model_store import get_model_file - device = torch.device(kwargs['local_rank']) - model.load_state_dict(torch.load(get_model_file('%s_ocnet_%s_%s' % ( - oc_arch, backbone, acronyms[dataset]), root=root), - map_location=device)) - return model - - -def get_base_ocnet_resnet101_citys(**kwargs): - return get_ocnet('citys', 'resnet101', 'base', **kwargs) - - -def get_pyramid_ocnet_resnet101_citys(**kwargs): - return get_ocnet('citys', 'resnet101', 'pyramid', **kwargs) - - -def get_asp_ocnet_resnet101_citys(**kwargs): - return get_ocnet('citys', 'resnet101', 'asp', **kwargs) - - -if __name__ == '__main__': - img = torch.randn(1, 3, 256, 256) - model = get_asp_ocnet_resnet101_citys() - outputs = model(img) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" Object Context Network for Scene Parsing""" +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .segbase import SegBaseModel +from .fcn import _FCNHead + +__all__ = ['OCNet', 'get_ocnet', 'get_base_ocnet_resnet101_citys', + 'get_pyramid_ocnet_resnet101_citys', 'get_asp_ocnet_resnet101_citys'] + + +class OCNet(SegBaseModel): + r"""OCNet + + Parameters + ---------- + nclass : int + Number of categories for the training dataset. + backbone : string + Pre-trained dilated backbone network type (default:'resnet50'; 'resnet50', + 'resnet101' or 'resnet152'). + norm_layer : object + Normalization layer used in backbone network (default: :class:`nn.BatchNorm`; + for Synchronized Cross-GPU BachNormalization). + aux : bool + Auxiliary loss. + Reference: + Yuhui Yuan, Jingdong Wang. "OCNet: Object Context Network for Scene Parsing." + arXiv preprint arXiv:1809.00916 (2018). + """ + + def __init__(self, nclass, backbone='resnet101', oc_arch='base', aux=False, pretrained_base=True, **kwargs): + super(OCNet, self).__init__(nclass, aux, backbone, pretrained_base=pretrained_base, **kwargs) + self.head = _OCHead(nclass, oc_arch, **kwargs) + if self.aux: + self.auxlayer = _FCNHead(1024, nclass, **kwargs) + + self.__setattr__('exclusive', ['head', 'auxlayer'] if aux else ['head']) + + def forward(self, x): + size = x.size()[2:] + _, _, c3, c4 = self.base_forward(x) + outputs = [] + x = self.head(c4) + x = F.interpolate(x, size, mode='bilinear', align_corners=True) + outputs.append(x) + + if self.aux: + auxout = self.auxlayer(c3) + auxout = F.interpolate(auxout, size, mode='bilinear', align_corners=True) + outputs.append(auxout) + return tuple(outputs) + + +class _OCHead(nn.Module): + def __init__(self, nclass, oc_arch, norm_layer=nn.BatchNorm2d, **kwargs): + super(_OCHead, self).__init__() + if oc_arch == 'base': + self.context = nn.Sequential( + nn.Conv2d(2048, 512, 3, 1, padding=1, bias=False), + norm_layer(512), + nn.ReLU(True), + BaseOCModule(512, 512, 256, 256, scales=([1]), norm_layer=norm_layer, **kwargs)) + elif oc_arch == 'pyramid': + self.context = nn.Sequential( + nn.Conv2d(2048, 512, 3, 1, padding=1, bias=False), + norm_layer(512), + nn.ReLU(True), + PyramidOCModule(512, 512, 256, 512, scales=([1, 2, 3, 6]), norm_layer=norm_layer, **kwargs)) + elif oc_arch == 'asp': + self.context = ASPOCModule(2048, 512, 256, 512, norm_layer=norm_layer, **kwargs) + else: + raise ValueError("Unknown OC architecture!") + + self.out = nn.Conv2d(512, nclass, 1) + + def forward(self, x): + x = self.context(x) + return self.out(x) + + +class BaseAttentionBlock(nn.Module): + """The basic implementation for self-attention block/non-local block.""" + + def __init__(self, in_channels, out_channels, key_channels, value_channels, + scale=1, norm_layer=nn.BatchNorm2d, **kwargs): + super(BaseAttentionBlock, self).__init__() + self.scale = scale + self.key_channels = key_channels + self.value_channels = value_channels + if scale > 1: + self.pool = nn.MaxPool2d(scale) + + self.f_value = nn.Conv2d(in_channels, value_channels, 1) + self.f_key = nn.Sequential( + nn.Conv2d(in_channels, key_channels, 1), + norm_layer(key_channels), + nn.ReLU(True) + ) + self.f_query = self.f_key + self.W = nn.Conv2d(value_channels, out_channels, 1) + nn.init.constant_(self.W.weight, 0) + nn.init.constant_(self.W.bias, 0) + + def forward(self, x): + batch_size, c, w, h = x.size() + if self.scale > 1: + x = self.pool(x) + + value = self.f_value(x).view(batch_size, self.value_channels, -1).permute(0, 2, 1) + query = self.f_query(x).view(batch_size, self.key_channels, -1).permute(0, 2, 1) + key = self.f_key(x).view(batch_size, self.key_channels, -1) + + sim_map = torch.bmm(query, key) * (self.key_channels ** -.5) + sim_map = F.softmax(sim_map, dim=-1) + + context = torch.bmm(sim_map, value).permute(0, 2, 1).contiguous() + context = context.view(batch_size, self.value_channels, *x.size()[2:]) + context = self.W(context) + if self.scale > 1: + context = F.interpolate(context, size=(w, h), mode='bilinear', align_corners=True) + + return context + + +class BaseOCModule(nn.Module): + """Base-OC""" + + def __init__(self, in_channels, out_channels, key_channels, value_channels, + scales=([1]), norm_layer=nn.BatchNorm2d, concat=True, **kwargs): + super(BaseOCModule, self).__init__() + self.stages = nn.ModuleList([ + BaseAttentionBlock(in_channels, out_channels, key_channels, value_channels, scale, norm_layer, **kwargs) + for scale in scales]) + in_channels = in_channels * 2 if concat else in_channels + self.project = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1), + norm_layer(out_channels), + nn.ReLU(True), + nn.Dropout2d(0.05) + ) + self.concat = concat + + def forward(self, x): + priors = [stage(x) for stage in self.stages] + context = priors[0] + for i in range(1, len(priors)): + context += priors[i] + if self.concat: + context = torch.cat([context, x], 1) + out = self.project(context) + return out + + +class PyramidAttentionBlock(nn.Module): + """The basic implementation for pyramid self-attention block/non-local block""" + + def __init__(self, in_channels, out_channels, key_channels, value_channels, + scale=1, norm_layer=nn.BatchNorm2d, **kwargs): + super(PyramidAttentionBlock, self).__init__() + self.scale = scale + self.value_channels = value_channels + self.key_channels = key_channels + + self.f_value = nn.Conv2d(in_channels, value_channels, 1) + self.f_key = nn.Sequential( + nn.Conv2d(in_channels, key_channels, 1), + norm_layer(key_channels), + nn.ReLU(True) + ) + self.f_query = self.f_key + self.W = nn.Conv2d(value_channels, out_channels, 1) + nn.init.constant_(self.W.weight, 0) + nn.init.constant_(self.W.bias, 0) + + def forward(self, x): + batch_size, c, w, h = x.size() + + local_x = list() + local_y = list() + step_w, step_h = w // self.scale, h // self.scale + for i in range(self.scale): + for j in range(self.scale): + start_x, start_y = step_w * i, step_h * j + end_x, end_y = min(start_x + step_w, w), min(start_y + step_h, h) + if i == (self.scale - 1): + end_x = w + if j == (self.scale - 1): + end_y = h + local_x += [start_x, end_x] + local_y += [start_y, end_y] + + value = self.f_value(x) + query = self.f_query(x) + key = self.f_key(x) + + local_list = list() + local_block_cnt = (self.scale ** 2) * 2 + for i in range(0, local_block_cnt, 2): + value_local = value[:, :, local_x[i]:local_x[i + 1], local_y[i]:local_y[i + 1]] + query_local = query[:, :, local_x[i]:local_x[i + 1], local_y[i]:local_y[i + 1]] + key_local = key[:, :, local_x[i]:local_x[i + 1], local_y[i]:local_y[i + 1]] + + w_local, h_local = value_local.size(2), value_local.size(3) + value_local = value_local.contiguous().view(batch_size, self.value_channels, -1).permute(0, 2, 1) + query_local = query_local.contiguous().view(batch_size, self.key_channels, -1).permute(0, 2, 1) + key_local = key_local.contiguous().view(batch_size, self.key_channels, -1) + + sim_map = torch.bmm(query_local, key_local) * (self.key_channels ** -.5) + sim_map = F.softmax(sim_map, dim=-1) + + context_local = torch.bmm(sim_map, value_local).permute(0, 2, 1).contiguous() + context_local = context_local.view(batch_size, self.value_channels, w_local, h_local) + local_list.append(context_local) + + context_list = list() + for i in range(0, self.scale): + row_tmp = list() + for j in range(self.scale): + row_tmp.append(local_list[j + i * self.scale]) + context_list.append(torch.cat(row_tmp, 3)) + + context = torch.cat(context_list, 2) + context = self.W(context) + + return context + + +class PyramidOCModule(nn.Module): + """Pyramid-OC""" + + def __init__(self, in_channels, out_channels, key_channels, value_channels, + scales=([1]), norm_layer=nn.BatchNorm2d, **kwargs): + super(PyramidOCModule, self).__init__() + self.stages = nn.ModuleList([ + PyramidAttentionBlock(in_channels, out_channels, key_channels, value_channels, scale, norm_layer, **kwargs) + for scale in scales]) + self.up_dr = nn.Sequential( + nn.Conv2d(in_channels, in_channels * len(scales), 1), + norm_layer(in_channels * len(scales)), + nn.ReLU(True) + ) + self.project = nn.Sequential( + nn.Conv2d(in_channels * len(scales) * 2, out_channels, 1), + norm_layer(out_channels), + nn.ReLU(True), + nn.Dropout2d(0.05) + ) + + def forward(self, x): + priors = [stage(x) for stage in self.stages] + context = [self.up_dr(x)] + for i in range(len(priors)): + context += [priors[i]] + context = torch.cat(context, 1) + out = self.project(context) + return out + + +class ASPOCModule(nn.Module): + """ASP-OC""" + + def __init__(self, in_channels, out_channels, key_channels, value_channels, + atrous_rates=(12, 24, 36), norm_layer=nn.BatchNorm2d, **kwargs): + super(ASPOCModule, self).__init__() + self.context = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 3, padding=1), + norm_layer(out_channels), + nn.ReLU(True), + BaseOCModule(out_channels, out_channels, key_channels, value_channels, ([2]), norm_layer, False, **kwargs)) + + rate1, rate2, rate3 = tuple(atrous_rates) + self.b1 = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 3, padding=rate1, dilation=rate1, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + self.b2 = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 3, padding=rate2, dilation=rate2, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + self.b3 = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 3, padding=rate3, dilation=rate3, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + self.b4 = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + + self.project = nn.Sequential( + nn.Conv2d(out_channels * 5, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.ReLU(True), + nn.Dropout2d(0.1) + ) + + def forward(self, x): + feat1 = self.context(x) + feat2 = self.b1(x) + feat3 = self.b2(x) + feat4 = self.b3(x) + feat5 = self.b4(x) + out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1) + out = self.project(out) + return out + + +def get_ocnet(dataset='citys', backbone='resnet50', oc_arch='base', pretrained=False, root='~/.torch/models', + pretrained_base=True, **kwargs): + acronyms = { + 'pascal_voc': 'pascal_voc', + 'pascal_aug': 'pascal_aug', + 'ade20k': 'ade', + 'coco': 'coco', + 'citys': 'citys', + } + from ..data.dataloader import datasets + model = OCNet(datasets[dataset].NUM_CLASS, backbone=backbone, oc_arch=oc_arch, + pretrained_base=pretrained_base, **kwargs) + if pretrained: + from .model_store import get_model_file + device = torch.device(kwargs['local_rank']) + model.load_state_dict(torch.load(get_model_file('%s_ocnet_%s_%s' % ( + oc_arch, backbone, acronyms[dataset]), root=root), + map_location=device)) + return model + + +def get_base_ocnet_resnet101_citys(**kwargs): + return get_ocnet('citys', 'resnet101', 'base', **kwargs) + + +def get_pyramid_ocnet_resnet101_citys(**kwargs): + return get_ocnet('citys', 'resnet101', 'pyramid', **kwargs) + + +def get_asp_ocnet_resnet101_citys(**kwargs): + return get_ocnet('citys', 'resnet101', 'asp', **kwargs) + + +if __name__ == '__main__': + img = torch.randn(1, 3, 256, 256) + model = get_asp_ocnet_resnet101_citys() + outputs = model(img) diff --git a/PyTorch/contrib/cv/semantic_segmentation/ENet/scripts/eval.py b/PyTorch/contrib/cv/semantic_segmentation/ENet/scripts/eval.py index 0633c0615df2c223f219a6ef44b1b8d92588bb75..0bbb7efdd2f5e70e7d1ecfb9174dc7ca460650c6 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/ENet/scripts/eval.py +++ b/PyTorch/contrib/cv/semantic_segmentation/ENet/scripts/eval.py @@ -1,143 +1,143 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import os -import sys - -cur_path = os.path.abspath(os.path.dirname(__file__)) -root_path = os.path.split(cur_path)[0] -sys.path.append(root_path) - -import torch -import torch.nn as nn -import torch.utils.data as data -import torch.backends.cudnn as cudnn - -from torchvision import transforms -from core.data.dataloader import get_segmentation_dataset -from core.models.model_zoo import get_segmentation_model -from core.utils.score import SegmentationMetric -from core.utils.visualize import get_color_pallete -from core.utils.logger import setup_logger -from core.utils.distributed import synchronize, get_rank, make_data_sampler, make_batch_data_sampler - -from train import parse_args - - -class Evaluator(object): - def __init__(self, args): - self.args = args - loc = 'npu:{}'.format(self.args.local_rank) - #self.device = torch.device(args.device) - if args.device == 'npu': - self.device = torch.npu.set_device(loc) - else: - self.device = torch.device(args.device) - - # image transform - input_transform = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize([.485, .456, .406], [.229, .224, .225]), - ]) - - # dataset and dataloader - val_dataset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) - val_sampler = make_data_sampler(val_dataset, False, args.distributed) - val_batch_sampler = make_batch_data_sampler(val_sampler, images_per_batch=1) - self.val_loader = data.DataLoader(dataset=val_dataset, - batch_sampler=val_batch_sampler, - num_workers=args.workers, - pin_memory=True) - - # create network - BatchNorm2d = nn.BatchNorm2d#nn.SyncBatchNorm if args.distributed else nn.BatchNorm2d - self.model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, - aux=args.aux, pretrained=True, pretrained_base=False, - local_rank=args.local_rank, - norm_layer=BatchNorm2d).to(loc) - if args.distributed: - self.model = nn.parallel.DistributedDataParallel(self.model, - device_ids=[args.local_rank], output_device=args.local_rank) - self.model.to(loc) - - self.metric = SegmentationMetric(val_dataset.num_class) - - def eval(self): - loc = 'npu:{}'.format(self.args.local_rank) - self.metric.reset() - self.model.eval() - if self.args.distributed: - model = self.model.module - else: - model = self.model - logger.info("Start validation, Total sample: {:d}".format(len(self.val_loader))) - for i, (image, target, filename) in enumerate(self.val_loader): - image = image.to(loc) - target = target.to(loc) - target = target.to(torch.int32) - - with torch.no_grad(): - outputs = model(image) - self.metric.update(outputs[0], target) - pixAcc, mIoU = self.metric.get() - logger.info("Sample: {:d}, validation pixAcc: {:.3f}, mIoU: {:.3f}".format( - i + 1, pixAcc * 100, mIoU * 100)) - - if self.args.save_pred: - pred = torch.argmax(outputs[0], 1) - pred = pred.cpu().data.numpy() - - predict = pred.squeeze(0) - mask = get_color_pallete(predict, self.args.dataset) - mask.save(os.path.join(outdir, os.path.splitext(filename[0])[0] + '.png')) - synchronize() - - -if __name__ == '__main__': - args = parse_args() - num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 - args.distributed = num_gpus > 1 - if args.device == "npu": - args.device = "npu" - elif not args.no_cuda and torch.cuda.is_available(): - cudnn.benchmark = True - args.device = "cuda" - else: - args.distributed = False - args.device = "cpu" - if args.distributed: - torch.npu.set_device(args.local_rank) - torch.distributed.init_process_group(backend=args.dist_backend, init_method="env://") - synchronize() - - # TODO: optim code - args.save_pred = True - if args.save_pred: - outdir = '../runs/pred_pic/{}_{}_{}'.format(args.model, args.backbone, args.dataset) - if args.model == "enet": - outdir = '../runs/pred_pic/{}_{}'.format(args.model, args.dataset) - if not os.path.exists(outdir): - os.makedirs(outdir) - - log_filename = '{}_{}_{}_log.txt'.format(args.model, args.backbone, args.dataset) - if args.model == "enet": - log_filename = '{}_{}_log.txt'.format(args.model, args.dataset) - logger = setup_logger("semantic_segmentation", args.log_dir, get_rank(), - filename=log_filename, mode='a+') - - evaluator = Evaluator(args) - evaluator.eval() - torch.npu.empty_cache() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import sys + +cur_path = os.path.abspath(os.path.dirname(__file__)) +root_path = os.path.split(cur_path)[0] +sys.path.append(root_path) + +import torch +import torch.nn as nn +import torch.utils.data as data +import torch.backends.cudnn as cudnn + +from torchvision import transforms +from core.data.dataloader import get_segmentation_dataset +from core.models.model_zoo import get_segmentation_model +from core.utils.score import SegmentationMetric +from core.utils.visualize import get_color_pallete +from core.utils.logger import setup_logger +from core.utils.distributed import synchronize, get_rank, make_data_sampler, make_batch_data_sampler + +from train import parse_args + + +class Evaluator(object): + def __init__(self, args): + self.args = args + loc = 'npu:{}'.format(self.args.local_rank) + #self.device = torch.device(args.device) + if args.device == 'npu': + self.device = torch.npu.set_device(loc) + else: + self.device = torch.device(args.device) + + # image transform + input_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize([.485, .456, .406], [.229, .224, .225]), + ]) + + # dataset and dataloader + val_dataset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) + val_sampler = make_data_sampler(val_dataset, False, args.distributed) + val_batch_sampler = make_batch_data_sampler(val_sampler, images_per_batch=1) + self.val_loader = data.DataLoader(dataset=val_dataset, + batch_sampler=val_batch_sampler, + num_workers=args.workers, + pin_memory=True) + + # create network + BatchNorm2d = nn.BatchNorm2d#nn.SyncBatchNorm if args.distributed else nn.BatchNorm2d + self.model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, + aux=args.aux, pretrained=True, pretrained_base=False, + local_rank=args.local_rank, + norm_layer=BatchNorm2d).to(loc) + if args.distributed: + self.model = nn.parallel.DistributedDataParallel(self.model, + device_ids=[args.local_rank], output_device=args.local_rank) + self.model.to(loc) + + self.metric = SegmentationMetric(val_dataset.num_class) + + def eval(self): + loc = 'npu:{}'.format(self.args.local_rank) + self.metric.reset() + self.model.eval() + if self.args.distributed: + model = self.model.module + else: + model = self.model + logger.info("Start validation, Total sample: {:d}".format(len(self.val_loader))) + for i, (image, target, filename) in enumerate(self.val_loader): + image = image.to(loc) + target = target.to(loc) + target = target.to(torch.int32) + + with torch.no_grad(): + outputs = model(image) + self.metric.update(outputs[0], target) + pixAcc, mIoU = self.metric.get() + logger.info("Sample: {:d}, validation pixAcc: {:.3f}, mIoU: {:.3f}".format( + i + 1, pixAcc * 100, mIoU * 100)) + + if self.args.save_pred: + pred = torch.argmax(outputs[0], 1) + pred = pred.cpu().data.numpy() + + predict = pred.squeeze(0) + mask = get_color_pallete(predict, self.args.dataset) + mask.save(os.path.join(outdir, os.path.splitext(filename[0])[0] + '.png')) + synchronize() + + +if __name__ == '__main__': + args = parse_args() + num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 + args.distributed = num_gpus > 1 + if args.device == "npu": + args.device = "npu" + elif not args.no_cuda and torch.cuda.is_available(): + cudnn.benchmark = True + args.device = "cuda" + else: + args.distributed = False + args.device = "cpu" + if args.distributed: + torch.npu.set_device(args.local_rank) + torch.distributed.init_process_group(backend=args.dist_backend, init_method="env://") + synchronize() + + # TODO: optim code + args.save_pred = True + if args.save_pred: + outdir = '../runs/pred_pic/{}_{}_{}'.format(args.model, args.backbone, args.dataset) + if args.model == "enet": + outdir = '../runs/pred_pic/{}_{}'.format(args.model, args.dataset) + if not os.path.exists(outdir): + os.makedirs(outdir) + + log_filename = '{}_{}_{}_log.txt'.format(args.model, args.backbone, args.dataset) + if args.model == "enet": + log_filename = '{}_{}_log.txt'.format(args.model, args.dataset) + logger = setup_logger("semantic_segmentation", args.log_dir, get_rank(), + filename=log_filename, mode='a+') + + evaluator = Evaluator(args) + evaluator.eval() + torch.npu.empty_cache() diff --git a/PyTorch/contrib/cv/semantic_segmentation/ENet/scripts/proc_node_module.py b/PyTorch/contrib/cv/semantic_segmentation/ENet/scripts/proc_node_module.py index c139998dc9a92d2570632335540867dca6e5ee57..31cf2b16c7245ef358776030c28149440dda1cbb 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/ENet/scripts/proc_node_module.py +++ b/PyTorch/contrib/cv/semantic_segmentation/ENet/scripts/proc_node_module.py @@ -1,40 +1,40 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from collections import OrderedDict -import os - -def proc_nodes_module(checkpoint): - new_state_dict = OrderedDict() - for k, v in checkpoint.items(): - if "module." in k: - name = k.replace("module.", "") - else: - name = k - new_state_dict[name] = v - return new_state_dict - -root ='~/.torch/models' -root = os.path.expanduser(root) -file_path = os.path.join(root, 'enet_citys.pth') -checkpoint = torch.load(file_path, map_location='cpu') -checkpoint = proc_nodes_module(checkpoint) - - -#directory = os.path.expanduser(args.save_dir) -directory = os.path.expanduser(root) -filename = 'enet_citys.pth' -filename = os.path.join(directory, filename) -torch.save(checkpoint, filename) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from collections import OrderedDict +import os + +def proc_nodes_module(checkpoint): + new_state_dict = OrderedDict() + for k, v in checkpoint.items(): + if "module." in k: + name = k.replace("module.", "") + else: + name = k + new_state_dict[name] = v + return new_state_dict + +root ='~/.torch/models' +root = os.path.expanduser(root) +file_path = os.path.join(root, 'enet_citys.pth') +checkpoint = torch.load(file_path, map_location='cpu') +checkpoint = proc_nodes_module(checkpoint) + + +#directory = os.path.expanduser(args.save_dir) +directory = os.path.expanduser(root) +filename = 'enet_citys.pth' +filename = os.path.join(directory, filename) +torch.save(checkpoint, filename) diff --git a/PyTorch/contrib/cv/semantic_segmentation/ENet/scripts/train.py b/PyTorch/contrib/cv/semantic_segmentation/ENet/scripts/train.py index e4e27aa58c51a83e371d7e047e60c51f622cacb5..137742ca7019d80dd551ea01cb659fbfc2b41d00 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/ENet/scripts/train.py +++ b/PyTorch/contrib/cv/semantic_segmentation/ENet/scripts/train.py @@ -1,449 +1,449 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- - -import argparse -import time -import datetime -import os -import shutil -import sys - -cur_path = os.path.abspath(os.path.dirname(__file__)) -root_path = os.path.split(cur_path)[0] -sys.path.append(root_path) - -CALCULATE_DEVICE = "npu:0" - -import torch -import torch.nn as nn -import torch.utils.data as data -import torch.backends.cudnn as cudnn - -import torch.npu - -from torchvision import transforms -from core.data.dataloader import get_segmentation_dataset -from core.models.model_zoo import get_segmentation_model -from core.utils.loss import get_segmentation_loss -from core.utils.distributed import * -from core.utils.logger import setup_logger -from core.utils.lr_scheduler import WarmupPolyLR -from core.utils.score import SegmentationMetric -from apex import amp - - -def parse_args(): - parser = argparse.ArgumentParser(description='Semantic Segmentation Training With Pytorch') - # model and dataset - parser.add_argument('--model', type=str, default='fcn', - choices=['fcn32s', 'fcn16s', 'fcn8s', 'fcn', 'psp', 'deeplabv3', - 'deeplabv3_plus', 'danet', 'denseaspp', 'bisenet', 'encnet', - 'dunet', 'icnet', 'enet', 'ocnet', 'psanet', 'cgnet', 'espnet', - 'lednet', 'dfanet'], - help='model name (default: fcn32s)') - parser.add_argument('--backbone', type=str, default='resnet50', - choices=['vgg16', 'resnet18', 'resnet50', 'resnet101', 'resnet152', - 'densenet121', 'densenet161', 'densenet169', 'densenet201'], - help='backbone name (default: vgg16)') - parser.add_argument('--dataset', type=str, default='pascal_voc', - choices=['pascal_voc', 'pascal_aug', 'ade20k', 'citys', 'sbu'], - help='dataset name (default: pascal_voc)') - parser.add_argument('--base-size', type=int, default=520, - help='base image size') - parser.add_argument('--crop-size', type=int, default=480, - help='crop image size') - parser.add_argument('--workers', '-j', type=int, default=4, - metavar='N', help='dataloader threads') - # training hyper params - parser.add_argument('--jpu', action='store_true', default=False, - help='JPU') - parser.add_argument('--use-ohem', type=bool, default=False, - help='OHEM Loss for cityscapes dataset') - parser.add_argument('--aux', action='store_true', default=False, - help='Auxiliary loss') - parser.add_argument('--aux-weight', type=float, default=0.4, - help='auxiliary loss weight') - parser.add_argument('--batch-size', type=int, default=4, metavar='N', - help='input batch size for training (default: 8)') - parser.add_argument('--start_epoch', type=int, default=0, - metavar='N', help='start epochs (default:0)') - parser.add_argument('--epochs', type=int, default=50, metavar='N', - help='number of epochs to train (default: 50)') - parser.add_argument('--lr', type=float, default=1e-4, metavar='LR', - help='learning rate (default: 1e-4)') - parser.add_argument('--momentum', type=float, default=0.9, metavar='M', - help='momentum (default: 0.9)') - parser.add_argument('--weight-decay', type=float, default=1e-4, metavar='M', - help='w-decay (default: 5e-4)') - parser.add_argument('--warmup-iters', type=int, default=0, - help='warmup iters') - parser.add_argument('--warmup-factor', type=float, default=1.0 / 3, - help='lr = warmup_factor * lr') - parser.add_argument('--warmup-method', type=str, default='linear', - help='method of warmup') - # cuda setting - parser.add_argument('--no-cuda', action='store_true', default=False, - help='disables CUDA training') - parser.add_argument('--local_rank', type=int, default=0) - # checkpoint and log - parser.add_argument('--resume', type=str, default=None, - help='put the path to resuming file if needed') - parser.add_argument('--save-dir', default='~/.torch/models', - help='Directory for saving checkpoint models') - parser.add_argument('--save-epoch', type=int, default=10, - help='save model every checkpoint-epoch') - parser.add_argument('--log-dir', default='../runs/logs/', - help='Directory for saving checkpoint models') - parser.add_argument('--log-iter', type=int, default=10, - help='print log every log-iter') - # evaluation only - parser.add_argument('--val-epoch', type=int, default=1, - help='run validation every val-epoch') - parser.add_argument('--skip-val', action='store_true', default=False, - help='skip validation during training') - # apex - parser.add_argument('--amp', default=False, action='store_true', - help='use amp to train the model') - parser.add_argument('--loss-scale', default=128.0, type=float, - help='loss scale using in amp, default -1 means dynamic') - parser.add_argument('--opt-level', default='O2', type=str, - help='loss scale using in amp, default -1 means dynamic') - - # npu setting - parser.add_argument('--device', default='npu', type=str, - help='npu or gpu') - parser.add_argument('--dist-backend', default='hccl', type=str, - help='distributed backend') - - args = parser.parse_args() - - # default settings for epochs, batch_size and lr - if args.epochs is None: - epoches = { - 'coco': 30, - 'pascal_aug': 80, - 'pascal_voc': 50, - 'pcontext': 80, - 'ade20k': 160, - 'citys': 120, - 'sbu': 160, - } - args.epochs = epoches[args.dataset.lower()] - if args.lr is None: - lrs = { - 'coco': 0.004, - 'pascal_aug': 0.001, - 'pascal_voc': 0.0001, - 'pcontext': 0.001, - 'ade20k': 0.01, - 'citys': 0.01, - 'sbu': 0.001, - } - args.lr = lrs[args.dataset.lower()] / 8 * args.batch_size - return args - - -class Trainer(object): - def __init__(self, args): - self.args = args - # self.device = torch.device(args.device) - - loc = 'npu:{}'.format(args.local_rank) - if args.device == "npu": - self.device = torch.npu.set_device(loc) - else: - self.device = torch.device(args.device) - - # image transform - input_transform = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize([.485, .456, .406], [.229, .224, .225]), - ]) - # dataset and dataloader - data_kwargs = {'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size} - train_dataset = get_segmentation_dataset(args.dataset, split='train', mode='train', **data_kwargs) - val_dataset = get_segmentation_dataset(args.dataset, split='val', mode='val', **data_kwargs) - args.iters_per_epoch = len(train_dataset) // (args.num_gpus * args.batch_size) - args.max_iters = args.epochs * args.iters_per_epoch - - train_sampler = make_data_sampler(train_dataset, shuffle=True, distributed=args.distributed) - train_batch_sampler = make_batch_data_sampler(train_sampler, args.batch_size, args.max_iters) - val_sampler = make_data_sampler(val_dataset, False, args.distributed) - val_batch_sampler = make_batch_data_sampler(val_sampler, args.batch_size) - - self.train_loader = data.DataLoader(dataset=train_dataset, - batch_sampler=train_batch_sampler, - num_workers=args.workers, - pin_memory=True) - self.val_loader = data.DataLoader(dataset=val_dataset, - batch_sampler=val_batch_sampler, - num_workers=args.workers, - pin_memory=True) - - # create network - BatchNorm2d = nn.BatchNorm2d#nn.SyncBatchNorm if args.distributed else nn.BatchNorm2d - self.model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, - aux=args.aux, jpu=args.jpu, norm_layer=BatchNorm2d).to(loc) - - # resume checkpoint if needed - if args.resume: - if os.path.isfile(args.resume): - name, ext = os.path.splitext(args.resume) - assert ext == '.pkl' or '.pth', 'Sorry only .pth and .pkl files supported.' - print('Resuming training, loading {}...'.format(args.resume)) - self.model.load_state_dict(torch.load(args.resume, map_location=lambda storage, loc: storage)) - - # create criterion - self.criterion = get_segmentation_loss(args.model, use_ohem=args.use_ohem, aux=args.aux, - aux_weight=args.aux_weight, ignore_index=-1).to(loc) # (args.device) - - # optimizer, for model just includes pretrained, head and auxlayer - params_list = list() - if hasattr(self.model, 'pretrained'): - params_list.append({'params': self.model.pretrained.parameters(), 'lr': args.lr}) - if hasattr(self.model, 'exclusive'): - for module in self.model.exclusive: - params_list.append({'params': getattr(self.model, module).parameters(), 'lr': args.lr * 10}) - self.optimizer = torch.optim.SGD(params_list, - lr=args.lr, - momentum=args.momentum, - weight_decay=args.weight_decay) - - if args.amp: - self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level=args.opt_level, - loss_scale=args.loss_scale) - - # lr scheduling - self.lr_scheduler = WarmupPolyLR(self.optimizer, - max_iters=args.max_iters, - power=0.9, - warmup_factor=args.warmup_factor, - warmup_iters=args.warmup_iters, - warmup_method=args.warmup_method) - - if args.distributed: - self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[args.local_rank], - output_device=args.local_rank, find_unused_parameters=True, broadcast_buffers=False) - - # evaluation metrics - self.metric = SegmentationMetric(train_dataset.num_class) - - self.best_pred = 0.0 - - def train(self): - batch_time = AverageMeter('Time', ':6.3f', start_count_index=5) - - loc = 'npu:{}'.format(self.args.local_rank) - - save_to_disk = get_rank() == 0 - epochs, max_iters = self.args.epochs, self.args.max_iters - log_per_iters, val_per_iters = self.args.log_iter, self.args.val_epoch * self.args.iters_per_epoch - save_per_iters = self.args.save_epoch * self.args.iters_per_epoch - start_time = time.time() - logger.info('Start training, Total Epochs: {:d} = Total Iterations {:d}'.format(epochs, max_iters)) - - end = time.time() - - self.model.train() - for iteration, (images, targets, _) in enumerate(self.train_loader): - iteration = iteration + 1 - self.lr_scheduler.step() - - # if 'npu' in CALCULATE_DEVICE: - targets = targets.to(torch.int32) - images = images.to(loc) - targets = targets.to(loc) - - # with torch.autograd.profiler.profile(use_npu=True) as prof: - outputs = self.model(images) - - loss_dict = self.criterion(outputs, targets) - - losses = sum(loss for loss in loss_dict.values()) - - # reduce losses over all GPUs for logging purposes - loss_dict_reduced = reduce_loss_dict(loss_dict) - losses_reduced = sum(loss for loss in loss_dict_reduced.values()) - - self.optimizer.zero_grad() - - # losses.backward() - if self.args.amp: - with amp.scale_loss(losses, self.optimizer) as scaled_loss: - scaled_loss.backward() - else: - losses.backward() - - self.optimizer.step() - - # print(prof.key_averages().table(sort_by="self_cpu_time_total")) - # prof.export_chrome_trace("output.prof") # "output.prof"为输出文件地址 - - eta_seconds = ((time.time() - start_time) / iteration) * (max_iters - iteration) - eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) - - if iteration % log_per_iters == 0 and save_to_disk: - logger.info( - "Iters: {:d}/{:d} || Lr: {:.6f} || Loss: {:.4f} || Cost Time: {} || Estimated Time: {}".format( - iteration, max_iters, self.optimizer.param_groups[0]['lr'], losses_reduced.item(), - str(datetime.timedelta(seconds=int(time.time() - start_time))), eta_string)) - - if iteration % self.args.iters_per_epoch == 0 and batch_time.avg > 0: - logger.info("Epoch: {:d}/{:d} || FPS img/s: {:.3f}".format( - iteration // self.args.iters_per_epoch, epochs, - args.num_gpus * self.args.batch_size / batch_time.avg)) - - if iteration % save_per_iters == 0 and save_to_disk and self.args.local_rank == 0: - save_checkpoint(self.model, self.args, is_best=False) - - batch_time.update(time.time() - end) - - if not self.args.skip_val and iteration % val_per_iters == 0: - self.validation() - self.model.train() - - end = time.time() - - if self.args.local_rank == 0: - save_checkpoint(self.model, self.args, is_best=False) - total_training_time = time.time() - start_time - total_training_str = str(datetime.timedelta(seconds=total_training_time)) - logger.info( - "Total training time: {} ({:.4f}s / it)".format( - total_training_str, total_training_time / max_iters)) - - def validation(self): - loc = 'npu:{}'.format(self.args.local_rank) - - # total_inter, total_union, total_correct, total_label = 0, 0, 0, 0 - is_best = False - self.metric.reset() - #if self.args.distributed: - #model = self.model.module - #else: - model = self.model - torch.npu.empty_cache() # TODO check if it helps - model.eval() - for i, (image, target, filename) in enumerate(self.val_loader): - # if 'npu' in CALCULATE_DEVICE: - # target = target.to(torch.int32) - target = target.to(torch.int32) - image = image.to(loc) - target = target.to(loc) - with torch.no_grad(): - outputs = model(image) - self.metric.update(outputs[0], target) - pixAcc, mIoU = self.metric.get() - logger.info("Sample: {:d}, Validation pixAcc: {:.3f}, mIoU: {:.3f}".format(i + 1, pixAcc, mIoU)) - - new_pred = (pixAcc + mIoU) / 2 - if new_pred > self.best_pred: - is_best = True - self.best_pred = new_pred - if self.args.local_rank == 0: - save_checkpoint(self.model, self.args, is_best) - synchronize() - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self, name, fmt=':f', start_count_index=10): - self.name = name - self.fmt = fmt - self.reset() - self.start_count_index = start_count_index - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - if self.count == 0: - self.N = n - - self.val = val - self.count += n - if self.count > (self.start_count_index * self.N): - self.sum += val * n - self.avg = self.sum / (self.count - self.start_count_index * self.N) - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - -def save_checkpoint(model, args, is_best=False): - """Save Checkpoint""" - directory = os.path.expanduser(args.save_dir) - if not os.path.exists(directory): - os.makedirs(directory) - - filename = '{}_{}_{}.pth'.format(args.model, args.backbone, args.dataset) - if args.model == "enet": - filename = '{}_{}.pth'.format(args.model, args.dataset) - filename = os.path.join(directory, filename) - - #if args.distributed: - #model = model.module - torch.save(model.state_dict(), filename) - if is_best: - best_filename = '{}_{}_{}_best_model.pth'.format(args.model, args.backbone, args.dataset) - if args.model == "enet": - best_filename = '{}_{}_best_model.pth'.format(args.model, args.dataset) - best_filename = os.path.join(directory, best_filename) - shutil.copyfile(filename, best_filename) - - -if __name__ == '__main__': - args = parse_args() - - os.environ['MASTER_ADDR'] = '127.0.0.1' # 可以使用当前真实ip或者'127.0.0.1' - os.environ['MASTER_PORT'] = '29688' # 随意一个可使用的port即可 - - # reference maskrcnn-benchmark - num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 - args.num_gpus = num_gpus - #args.num_gpus = 1 - args.distributed = num_gpus > 1 - - if args.device == "npu": - args.device = "npu" - elif not args.no_cuda and torch.cuda.is_available(): - cudnn.benchmark = True - args.device = "cuda" - else: - args.distributed = False - args.device = "cpu" - if args.distributed: - loc = 'npu:{}'.format(args.local_rank) - torch.npu.set_device(loc) - torch.distributed.init_process_group(backend=args.dist_backend, init_method="env://") - synchronize() - args.lr = args.lr * num_gpus - - logger_filename = '{}_{}_{}_log.txt'.format(args.model, args.backbone, args.dataset) - if args.model == "enet": - logger_filename = '{}_{}_log.txt'.format(args.model, args.dataset) - logger = setup_logger("semantic_segmentation", args.log_dir, get_rank(), filename=logger_filename) - logger.info("Using {} GPUs".format(num_gpus)) - logger.info(args) - - trainer = Trainer(args) - trainer.train() - torch.npu.empty_cache() - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- + +import argparse +import time +import datetime +import os +import shutil +import sys + +cur_path = os.path.abspath(os.path.dirname(__file__)) +root_path = os.path.split(cur_path)[0] +sys.path.append(root_path) + +CALCULATE_DEVICE = "npu:0" + +import torch +import torch.nn as nn +import torch.utils.data as data +import torch.backends.cudnn as cudnn + +import torch.npu + +from torchvision import transforms +from core.data.dataloader import get_segmentation_dataset +from core.models.model_zoo import get_segmentation_model +from core.utils.loss import get_segmentation_loss +from core.utils.distributed import * +from core.utils.logger import setup_logger +from core.utils.lr_scheduler import WarmupPolyLR +from core.utils.score import SegmentationMetric +from apex import amp + + +def parse_args(): + parser = argparse.ArgumentParser(description='Semantic Segmentation Training With Pytorch') + # model and dataset + parser.add_argument('--model', type=str, default='fcn', + choices=['fcn32s', 'fcn16s', 'fcn8s', 'fcn', 'psp', 'deeplabv3', + 'deeplabv3_plus', 'danet', 'denseaspp', 'bisenet', 'encnet', + 'dunet', 'icnet', 'enet', 'ocnet', 'psanet', 'cgnet', 'espnet', + 'lednet', 'dfanet'], + help='model name (default: fcn32s)') + parser.add_argument('--backbone', type=str, default='resnet50', + choices=['vgg16', 'resnet18', 'resnet50', 'resnet101', 'resnet152', + 'densenet121', 'densenet161', 'densenet169', 'densenet201'], + help='backbone name (default: vgg16)') + parser.add_argument('--dataset', type=str, default='pascal_voc', + choices=['pascal_voc', 'pascal_aug', 'ade20k', 'citys', 'sbu'], + help='dataset name (default: pascal_voc)') + parser.add_argument('--base-size', type=int, default=520, + help='base image size') + parser.add_argument('--crop-size', type=int, default=480, + help='crop image size') + parser.add_argument('--workers', '-j', type=int, default=4, + metavar='N', help='dataloader threads') + # training hyper params + parser.add_argument('--jpu', action='store_true', default=False, + help='JPU') + parser.add_argument('--use-ohem', type=bool, default=False, + help='OHEM Loss for cityscapes dataset') + parser.add_argument('--aux', action='store_true', default=False, + help='Auxiliary loss') + parser.add_argument('--aux-weight', type=float, default=0.4, + help='auxiliary loss weight') + parser.add_argument('--batch-size', type=int, default=4, metavar='N', + help='input batch size for training (default: 8)') + parser.add_argument('--start_epoch', type=int, default=0, + metavar='N', help='start epochs (default:0)') + parser.add_argument('--epochs', type=int, default=50, metavar='N', + help='number of epochs to train (default: 50)') + parser.add_argument('--lr', type=float, default=1e-4, metavar='LR', + help='learning rate (default: 1e-4)') + parser.add_argument('--momentum', type=float, default=0.9, metavar='M', + help='momentum (default: 0.9)') + parser.add_argument('--weight-decay', type=float, default=1e-4, metavar='M', + help='w-decay (default: 5e-4)') + parser.add_argument('--warmup-iters', type=int, default=0, + help='warmup iters') + parser.add_argument('--warmup-factor', type=float, default=1.0 / 3, + help='lr = warmup_factor * lr') + parser.add_argument('--warmup-method', type=str, default='linear', + help='method of warmup') + # cuda setting + parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables CUDA training') + parser.add_argument('--local_rank', type=int, default=0) + # checkpoint and log + parser.add_argument('--resume', type=str, default=None, + help='put the path to resuming file if needed') + parser.add_argument('--save-dir', default='~/.torch/models', + help='Directory for saving checkpoint models') + parser.add_argument('--save-epoch', type=int, default=10, + help='save model every checkpoint-epoch') + parser.add_argument('--log-dir', default='../runs/logs/', + help='Directory for saving checkpoint models') + parser.add_argument('--log-iter', type=int, default=10, + help='print log every log-iter') + # evaluation only + parser.add_argument('--val-epoch', type=int, default=1, + help='run validation every val-epoch') + parser.add_argument('--skip-val', action='store_true', default=False, + help='skip validation during training') + # apex + parser.add_argument('--amp', default=False, action='store_true', + help='use amp to train the model') + parser.add_argument('--loss-scale', default=128.0, type=float, + help='loss scale using in amp, default -1 means dynamic') + parser.add_argument('--opt-level', default='O2', type=str, + help='loss scale using in amp, default -1 means dynamic') + + # npu setting + parser.add_argument('--device', default='npu', type=str, + help='npu or gpu') + parser.add_argument('--dist-backend', default='hccl', type=str, + help='distributed backend') + + args = parser.parse_args() + + # default settings for epochs, batch_size and lr + if args.epochs is None: + epoches = { + 'coco': 30, + 'pascal_aug': 80, + 'pascal_voc': 50, + 'pcontext': 80, + 'ade20k': 160, + 'citys': 120, + 'sbu': 160, + } + args.epochs = epoches[args.dataset.lower()] + if args.lr is None: + lrs = { + 'coco': 0.004, + 'pascal_aug': 0.001, + 'pascal_voc': 0.0001, + 'pcontext': 0.001, + 'ade20k': 0.01, + 'citys': 0.01, + 'sbu': 0.001, + } + args.lr = lrs[args.dataset.lower()] / 8 * args.batch_size + return args + + +class Trainer(object): + def __init__(self, args): + self.args = args + # self.device = torch.device(args.device) + + loc = 'npu:{}'.format(args.local_rank) + if args.device == "npu": + self.device = torch.npu.set_device(loc) + else: + self.device = torch.device(args.device) + + # image transform + input_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize([.485, .456, .406], [.229, .224, .225]), + ]) + # dataset and dataloader + data_kwargs = {'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size} + train_dataset = get_segmentation_dataset(args.dataset, split='train', mode='train', **data_kwargs) + val_dataset = get_segmentation_dataset(args.dataset, split='val', mode='val', **data_kwargs) + args.iters_per_epoch = len(train_dataset) // (args.num_gpus * args.batch_size) + args.max_iters = args.epochs * args.iters_per_epoch + + train_sampler = make_data_sampler(train_dataset, shuffle=True, distributed=args.distributed) + train_batch_sampler = make_batch_data_sampler(train_sampler, args.batch_size, args.max_iters) + val_sampler = make_data_sampler(val_dataset, False, args.distributed) + val_batch_sampler = make_batch_data_sampler(val_sampler, args.batch_size) + + self.train_loader = data.DataLoader(dataset=train_dataset, + batch_sampler=train_batch_sampler, + num_workers=args.workers, + pin_memory=True) + self.val_loader = data.DataLoader(dataset=val_dataset, + batch_sampler=val_batch_sampler, + num_workers=args.workers, + pin_memory=True) + + # create network + BatchNorm2d = nn.BatchNorm2d#nn.SyncBatchNorm if args.distributed else nn.BatchNorm2d + self.model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, + aux=args.aux, jpu=args.jpu, norm_layer=BatchNorm2d).to(loc) + + # resume checkpoint if needed + if args.resume: + if os.path.isfile(args.resume): + name, ext = os.path.splitext(args.resume) + assert ext == '.pkl' or '.pth', 'Sorry only .pth and .pkl files supported.' + print('Resuming training, loading {}...'.format(args.resume)) + self.model.load_state_dict(torch.load(args.resume, map_location=lambda storage, loc: storage)) + + # create criterion + self.criterion = get_segmentation_loss(args.model, use_ohem=args.use_ohem, aux=args.aux, + aux_weight=args.aux_weight, ignore_index=-1).to(loc) # (args.device) + + # optimizer, for model just includes pretrained, head and auxlayer + params_list = list() + if hasattr(self.model, 'pretrained'): + params_list.append({'params': self.model.pretrained.parameters(), 'lr': args.lr}) + if hasattr(self.model, 'exclusive'): + for module in self.model.exclusive: + params_list.append({'params': getattr(self.model, module).parameters(), 'lr': args.lr * 10}) + self.optimizer = torch.optim.SGD(params_list, + lr=args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay) + + if args.amp: + self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level=args.opt_level, + loss_scale=args.loss_scale) + + # lr scheduling + self.lr_scheduler = WarmupPolyLR(self.optimizer, + max_iters=args.max_iters, + power=0.9, + warmup_factor=args.warmup_factor, + warmup_iters=args.warmup_iters, + warmup_method=args.warmup_method) + + if args.distributed: + self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[args.local_rank], + output_device=args.local_rank, find_unused_parameters=True, broadcast_buffers=False) + + # evaluation metrics + self.metric = SegmentationMetric(train_dataset.num_class) + + self.best_pred = 0.0 + + def train(self): + batch_time = AverageMeter('Time', ':6.3f', start_count_index=5) + + loc = 'npu:{}'.format(self.args.local_rank) + + save_to_disk = get_rank() == 0 + epochs, max_iters = self.args.epochs, self.args.max_iters + log_per_iters, val_per_iters = self.args.log_iter, self.args.val_epoch * self.args.iters_per_epoch + save_per_iters = self.args.save_epoch * self.args.iters_per_epoch + start_time = time.time() + logger.info('Start training, Total Epochs: {:d} = Total Iterations {:d}'.format(epochs, max_iters)) + + end = time.time() + + self.model.train() + for iteration, (images, targets, _) in enumerate(self.train_loader): + iteration = iteration + 1 + self.lr_scheduler.step() + + # if 'npu' in CALCULATE_DEVICE: + targets = targets.to(torch.int32) + images = images.to(loc) + targets = targets.to(loc) + + # with torch.autograd.profiler.profile(use_npu=True) as prof: + outputs = self.model(images) + + loss_dict = self.criterion(outputs, targets) + + losses = sum(loss for loss in loss_dict.values()) + + # reduce losses over all GPUs for logging purposes + loss_dict_reduced = reduce_loss_dict(loss_dict) + losses_reduced = sum(loss for loss in loss_dict_reduced.values()) + + self.optimizer.zero_grad() + + # losses.backward() + if self.args.amp: + with amp.scale_loss(losses, self.optimizer) as scaled_loss: + scaled_loss.backward() + else: + losses.backward() + + self.optimizer.step() + + # print(prof.key_averages().table(sort_by="self_cpu_time_total")) + # prof.export_chrome_trace("output.prof") # "output.prof"为输出文件地址 + + eta_seconds = ((time.time() - start_time) / iteration) * (max_iters - iteration) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + + if iteration % log_per_iters == 0 and save_to_disk: + logger.info( + "Iters: {:d}/{:d} || Lr: {:.6f} || Loss: {:.4f} || Cost Time: {} || Estimated Time: {}".format( + iteration, max_iters, self.optimizer.param_groups[0]['lr'], losses_reduced.item(), + str(datetime.timedelta(seconds=int(time.time() - start_time))), eta_string)) + + if iteration % self.args.iters_per_epoch == 0 and batch_time.avg > 0: + logger.info("Epoch: {:d}/{:d} || FPS img/s: {:.3f}".format( + iteration // self.args.iters_per_epoch, epochs, + args.num_gpus * self.args.batch_size / batch_time.avg)) + + if iteration % save_per_iters == 0 and save_to_disk and self.args.local_rank == 0: + save_checkpoint(self.model, self.args, is_best=False) + + batch_time.update(time.time() - end) + + if not self.args.skip_val and iteration % val_per_iters == 0: + self.validation() + self.model.train() + + end = time.time() + + if self.args.local_rank == 0: + save_checkpoint(self.model, self.args, is_best=False) + total_training_time = time.time() - start_time + total_training_str = str(datetime.timedelta(seconds=total_training_time)) + logger.info( + "Total training time: {} ({:.4f}s / it)".format( + total_training_str, total_training_time / max_iters)) + + def validation(self): + loc = 'npu:{}'.format(self.args.local_rank) + + # total_inter, total_union, total_correct, total_label = 0, 0, 0, 0 + is_best = False + self.metric.reset() + #if self.args.distributed: + #model = self.model.module + #else: + model = self.model + torch.npu.empty_cache() # TODO check if it helps + model.eval() + for i, (image, target, filename) in enumerate(self.val_loader): + # if 'npu' in CALCULATE_DEVICE: + # target = target.to(torch.int32) + target = target.to(torch.int32) + image = image.to(loc) + target = target.to(loc) + with torch.no_grad(): + outputs = model(image) + self.metric.update(outputs[0], target) + pixAcc, mIoU = self.metric.get() + logger.info("Sample: {:d}, Validation pixAcc: {:.3f}, mIoU: {:.3f}".format(i + 1, pixAcc, mIoU)) + + new_pred = (pixAcc + mIoU) / 2 + if new_pred > self.best_pred: + is_best = True + self.best_pred = new_pred + if self.args.local_rank == 0: + save_checkpoint(self.model, self.args, is_best) + synchronize() + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f', start_count_index=10): + self.name = name + self.fmt = fmt + self.reset() + self.start_count_index = start_count_index + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + if self.count == 0: + self.N = n + + self.val = val + self.count += n + if self.count > (self.start_count_index * self.N): + self.sum += val * n + self.avg = self.sum / (self.count - self.start_count_index * self.N) + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + +def save_checkpoint(model, args, is_best=False): + """Save Checkpoint""" + directory = os.path.expanduser(args.save_dir) + if not os.path.exists(directory): + os.makedirs(directory) + + filename = '{}_{}_{}.pth'.format(args.model, args.backbone, args.dataset) + if args.model == "enet": + filename = '{}_{}.pth'.format(args.model, args.dataset) + filename = os.path.join(directory, filename) + + #if args.distributed: + #model = model.module + torch.save(model.state_dict(), filename) + if is_best: + best_filename = '{}_{}_{}_best_model.pth'.format(args.model, args.backbone, args.dataset) + if args.model == "enet": + best_filename = '{}_{}_best_model.pth'.format(args.model, args.dataset) + best_filename = os.path.join(directory, best_filename) + shutil.copyfile(filename, best_filename) + + +if __name__ == '__main__': + args = parse_args() + + os.environ['MASTER_ADDR'] = '127.0.0.1' # 可以使用当前真实ip或者'127.0.0.1' + os.environ['MASTER_PORT'] = '29688' # 随意一个可使用的port即可 + + # reference maskrcnn-benchmark + num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 + args.num_gpus = num_gpus + #args.num_gpus = 1 + args.distributed = num_gpus > 1 + + if args.device == "npu": + args.device = "npu" + elif not args.no_cuda and torch.cuda.is_available(): + cudnn.benchmark = True + args.device = "cuda" + else: + args.distributed = False + args.device = "cpu" + if args.distributed: + loc = 'npu:{}'.format(args.local_rank) + torch.npu.set_device(loc) + torch.distributed.init_process_group(backend=args.dist_backend, init_method="env://") + synchronize() + args.lr = args.lr * num_gpus + + logger_filename = '{}_{}_{}_log.txt'.format(args.model, args.backbone, args.dataset) + if args.model == "enet": + logger_filename = '{}_{}_log.txt'.format(args.model, args.dataset) + logger = setup_logger("semantic_segmentation", args.log_dir, get_rank(), filename=logger_filename) + logger.info("Using {} GPUs".format(num_gpus)) + logger.info(args) + + trainer = Trainer(args) + trainer.train() + torch.npu.empty_cache() + diff --git a/PyTorch/contrib/cv/semantic_segmentation/FCN8s/modelzoo_level.txt b/PyTorch/contrib/cv/semantic_segmentation/FCN8s/modelzoo_level.txt index 4987c1069692fa42cf124e9045a3d42b733b2a79..0c22703439d27ef96518c74688e17502e7209c62 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/FCN8s/modelzoo_level.txt +++ b/PyTorch/contrib/cv/semantic_segmentation/FCN8s/modelzoo_level.txt @@ -1,6 +1,6 @@ -GPUStatus:OK -NPUMigrationStatus:OK -FuncStatus:OK -PrecisionStatus:OK -AutoTune:OK +GPUStatus:OK +NPUMigrationStatus:OK +FuncStatus:OK +PrecisionStatus:OK +AutoTune:OK PerfStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/README.md b/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/README.md index cd43babf0736f67151a98118a112645e56842c27..df221c9595b22acd5d65698a1ed2548cc4189f55 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/README.md +++ b/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/README.md @@ -1,154 +1,154 @@ -# FastSCNN 训练 - -# the real-time image segmentation FastSCNN - -Fast segmentation convolutional neural network (Fast-SCNN), an above real-time semantic segmentation model on high resolution image data (1024x2048px) suits to efficient computation on embedded devices with low memory. Building on existing two-branch methods for fast segmentation, the 'learning to downsample' module computes low-level features for multiple resolution branches simultaneously. FastSCNN combines spatial detail at high resolution with deep features extracted at lower resolution, yielding an accuracy of 68.0% mean intersection over union at 123.5 frames per second on Cityscapes. - -For more detail:https://arxiv.org/abs/1902.04502 - -## - -## Requirements - -use pytorch, you can use pip or conda to install the requirements - -``` -# for pip -cd $project -pip install -r requirements.txt -CANN 20210617_5.0.T205 -torchvision==0.6.0 -``` - - - -## 数据集准备 - -1.从以下网址获取leftImg8bit_trainvaltest.zip作为训练集 - -https://www.cityscapes-dataset.com/downloads/ - -2.从以往网址获取gtFine_trainvaltest.zip作为标签 - -https://www.cityscapes-dataset.com/downloads/ - -文件结构如下: - - -``` -FastSCNN -|-- configs -|-- datasets -| |-- cityscapes -| | |-- gtFine -| | | |-- test -| | | |-- train -| | | `-- val -| | `-- leftImg8bit -| | |-- test -| | |-- train -| | `-- val -|-- docs -|-- test -|-- segmentron -|-- tools - -``` - -将数据集按照以上结构放在代码目录下 - -## 安装 - -请注意,本模型使用了新版本的pytorch以及CANN包,具体版本为:20210617_5.0.T205; - -![](C:\Users\dilig\Pictures\image-20210824164049265 (2).png) - -source 环境变量 - -``` -source ./test/env.sh -``` - -安装 - -``` -python3.7.5 setup.py develop -``` - -或使用sh脚本安装 - -``` -bash ./test/setup.sh -``` - - - -## TRAIN - -### 单p训练 - -source 环境变量 - -``` -source ./test/env.sh -``` - -运行单p脚本 - -``` -bash ./test/run1p.sh -``` - - - -### 多p训练 - -source 环境变量 - -``` -source ./test/env.sh -``` - -运行8p脚本 - -``` -bash ./test/run8p.sh -``` - -模型保存在./runs/checkpoints目录下,以数字命名的pth文件是当前epoch训练得到的权重文件,可用来恢复训练,best_model.pth是当前训练出的最优模型; - -运行日志保存至./runs/logs目录下 - -## TEST - -测试精度 - -使用sh文件 - -``` -bash test/eval.sh -``` - -测试之前请指定测试的模型路径。打开./test/eval.sh文件,最后一行如下所示: - -``` -python3.7.5 -u ./tools/eval.py --config-file configs/cityscapes_fast_scnn.yaml TEST.TEST_MODEL_PATH runs/checkpoints/FastSCNN__cityscape_2021-07-15-00-51/best_model.pth -``` - -请指定需要测试的模型路径,将runs/checkpoints/FastSCNN__cityscape_2021-07-15-00-51/best_model.pth替换掉即可。 - -## Demo -修改 configs/cityscapes_fast_scnn.yaml中的TEST_MODEL_PATH: 'runs/checkpoints/fast_scnn__cityscape_2019-11-19-02-02/best_model.pth'替换成自己实际存储的模型路径,demo.py的输入图片默认时tool/demo_vis.png,运行的结果保存在runs/demo_result目录下,然后运行以下脚本,执行demo.py: -``` -python3.7 tools/demo.py --config-file configs/cityscapes_fast_scnn.yaml -``` - -### 精度对比 - -GPU8p loss scale使用O1 128混合精度获得的结果为:mIoU:64.46 - -NPU8p loss scale使用O1 128混合精度获得的结果为: mIoU:63.914 - -## 注意事项 - -由于在./FastSCNN/segmentron/modules/csrc/vision.cpp中添加了Licence,可能会导致程序在调用此文件时报错,只需要删除Licence就可以使用 +# FastSCNN 训练 + +# the real-time image segmentation FastSCNN + +Fast segmentation convolutional neural network (Fast-SCNN), an above real-time semantic segmentation model on high resolution image data (1024x2048px) suits to efficient computation on embedded devices with low memory. Building on existing two-branch methods for fast segmentation, the 'learning to downsample' module computes low-level features for multiple resolution branches simultaneously. FastSCNN combines spatial detail at high resolution with deep features extracted at lower resolution, yielding an accuracy of 68.0% mean intersection over union at 123.5 frames per second on Cityscapes. + +For more detail:https://arxiv.org/abs/1902.04502 + +## + +## Requirements + +use pytorch, you can use pip or conda to install the requirements + +``` +# for pip +cd $project +pip install -r requirements.txt +CANN 20210617_5.0.T205 +torchvision==0.6.0 +``` + + + +## 数据集准备 + +1.从以下网址获取leftImg8bit_trainvaltest.zip作为训练集 + +https://www.cityscapes-dataset.com/downloads/ + +2.从以往网址获取gtFine_trainvaltest.zip作为标签 + +https://www.cityscapes-dataset.com/downloads/ + +文件结构如下: + + +``` +FastSCNN +|-- configs +|-- datasets +| |-- cityscapes +| | |-- gtFine +| | | |-- test +| | | |-- train +| | | `-- val +| | `-- leftImg8bit +| | |-- test +| | |-- train +| | `-- val +|-- docs +|-- test +|-- segmentron +|-- tools + +``` + +将数据集按照以上结构放在代码目录下 + +## 安装 + +请注意,本模型使用了新版本的pytorch以及CANN包,具体版本为:20210617_5.0.T205; + +![](C:\Users\dilig\Pictures\image-20210824164049265 (2).png) + +source 环境变量 + +``` +source ./test/env.sh +``` + +安装 + +``` +python3.7.5 setup.py develop +``` + +或使用sh脚本安装 + +``` +bash ./test/setup.sh +``` + + + +## TRAIN + +### 单p训练 + +source 环境变量 + +``` +source ./test/env.sh +``` + +运行单p脚本 + +``` +bash ./test/run1p.sh +``` + + + +### 多p训练 + +source 环境变量 + +``` +source ./test/env.sh +``` + +运行8p脚本 + +``` +bash ./test/run8p.sh +``` + +模型保存在./runs/checkpoints目录下,以数字命名的pth文件是当前epoch训练得到的权重文件,可用来恢复训练,best_model.pth是当前训练出的最优模型; + +运行日志保存至./runs/logs目录下 + +## TEST + +测试精度 + +使用sh文件 + +``` +bash test/eval.sh +``` + +测试之前请指定测试的模型路径。打开./test/eval.sh文件,最后一行如下所示: + +``` +python3.7.5 -u ./tools/eval.py --config-file configs/cityscapes_fast_scnn.yaml TEST.TEST_MODEL_PATH runs/checkpoints/FastSCNN__cityscape_2021-07-15-00-51/best_model.pth +``` + +请指定需要测试的模型路径,将runs/checkpoints/FastSCNN__cityscape_2021-07-15-00-51/best_model.pth替换掉即可。 + +## Demo +修改 configs/cityscapes_fast_scnn.yaml中的TEST_MODEL_PATH: 'runs/checkpoints/fast_scnn__cityscape_2019-11-19-02-02/best_model.pth'替换成自己实际存储的模型路径,demo.py的输入图片默认时tool/demo_vis.png,运行的结果保存在runs/demo_result目录下,然后运行以下脚本,执行demo.py: +``` +python3.7 tools/demo.py --config-file configs/cityscapes_fast_scnn.yaml +``` + +### 精度对比 + +GPU8p loss scale使用O1 128混合精度获得的结果为:mIoU:64.46 + +NPU8p loss scale使用O1 128混合精度获得的结果为: mIoU:63.914 + +## 注意事项 + +由于在./FastSCNN/segmentron/modules/csrc/vision.cpp中添加了Licence,可能会导致程序在调用此文件时报错,只需要删除Licence就可以使用 diff --git a/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/modelzoo_level.txt b/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/modelzoo_level.txt index 0673c9bd59b5838ddfddd72ee5f4c355afb112c0..05a5423b13b33ed85d11be6e57b361f5039249be 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/modelzoo_level.txt +++ b/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK,but performance on npu less than performance on gpu +FuncStatus:OK +PerfStatus:OK,but performance on npu less than performance on gpu PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/segmentron/models/ocnet.py b/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/segmentron/models/ocnet.py index ecd42b5f76306229804a4c6f65be90dee7f95ff4..942039eddfa3e7e800c4b71d22833f2c7742d332 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/segmentron/models/ocnet.py +++ b/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/segmentron/models/ocnet.py @@ -1,308 +1,308 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import torch.nn as nn -import torch.nn.functional as F - -from .segbase import SegBaseModel -from .model_zoo import MODEL_REGISTRY -from ..modules import _FCNHead -from ..config import cfg - -__all__ = ['OCNet'] - - -@MODEL_REGISTRY.register() -class OCNet(SegBaseModel): - r"""OCNet - Reference: - Yuhui Yuan, Jingdong Wang. "OCNet: Object Context Network for Scene Parsing." - arXiv preprint arXiv:1809.00916 (2018). - """ - - def __init__(self): - super(OCNet, self).__init__() - oc_arch = cfg.MODEL.OCNet.OC_ARCH - self.head = _OCHead(self.nclass, oc_arch, norm_layer=self.norm_layer) - if self.aux: - self.auxlayer = _FCNHead(1024, self.nclass, norm_layer=self.norm_layer) - - self.__setattr__('decoder', ['head', 'auxlayer'] if self.aux else ['head']) - - def forward(self, x): - size = x.size()[2:] - _, _, c3, c4 = self.base_forward(x) - outputs = [] - x = self.head(c4) - x = F.interpolate(x, size, mode='bilinear', align_corners=True) - outputs.append(x) - - if self.aux: - auxout = self.auxlayer(c3) - auxout = F.interpolate(auxout, size, mode='bilinear', align_corners=True) - outputs.append(auxout) - return tuple(outputs) - - -class _OCHead(nn.Module): - def __init__(self, nclass, oc_arch, norm_layer=nn.BatchNorm2d, **kwargs): - super(_OCHead, self).__init__() - if oc_arch == 'base': - self.context = nn.Sequential( - nn.Conv2d(2048, 512, 3, 1, padding=1, bias=False), - norm_layer(512), - nn.ReLU(True), - BaseOCModule(512, 512, 256, 256, scales=([1]), norm_layer=norm_layer, **kwargs)) - elif oc_arch == 'pyramid': - self.context = nn.Sequential( - nn.Conv2d(2048, 512, 3, 1, padding=1, bias=False), - norm_layer(512), - nn.ReLU(True), - PyramidOCModule(512, 512, 256, 512, scales=([1, 2, 3, 6]), norm_layer=norm_layer, **kwargs)) - elif oc_arch == 'asp': - self.context = ASPOCModule(2048, 512, 256, 512, norm_layer=norm_layer, **kwargs) - else: - raise ValueError("Unknown OC architecture!") - - self.out = nn.Conv2d(512, nclass, 1) - - def forward(self, x): - x = self.context(x) - return self.out(x) - - -class BaseAttentionBlock(nn.Module): - """The basic implementation for self-attention block/non-local block.""" - - def __init__(self, in_channels, out_channels, key_channels, value_channels, - scale=1, norm_layer=nn.BatchNorm2d, **kwargs): - super(BaseAttentionBlock, self).__init__() - self.scale = scale - self.key_channels = key_channels - self.value_channels = value_channels - if scale > 1: - self.pool = nn.MaxPool2d(scale) - - self.f_value = nn.Conv2d(in_channels, value_channels, 1) - self.f_key = nn.Sequential( - nn.Conv2d(in_channels, key_channels, 1), - norm_layer(key_channels), - nn.ReLU(True) - ) - self.f_query = self.f_key - self.W = nn.Conv2d(value_channels, out_channels, 1) - nn.init.constant_(self.W.weight, 0) - nn.init.constant_(self.W.bias, 0) - - def forward(self, x): - batch_size, c, w, h = x.size() - if self.scale > 1: - x = self.pool(x) - - value = self.f_value(x).view(batch_size, self.value_channels, -1).permute(0, 2, 1) - query = self.f_query(x).view(batch_size, self.key_channels, -1).permute(0, 2, 1) - key = self.f_key(x).view(batch_size, self.key_channels, -1) - - sim_map = torch.bmm(query, key) * (self.key_channels ** -.5) - sim_map = F.softmax(sim_map, dim=-1) - - context = torch.bmm(sim_map, value).permute(0, 2, 1).contiguous() - context = context.view(batch_size, self.value_channels, *x.size()[2:]) - context = self.W(context) - if self.scale > 1: - context = F.interpolate(context, size=(w, h), mode='bilinear', align_corners=True) - - return context - - -class BaseOCModule(nn.Module): - """Base-OC""" - - def __init__(self, in_channels, out_channels, key_channels, value_channels, - scales=([1]), norm_layer=nn.BatchNorm2d, concat=True, **kwargs): - super(BaseOCModule, self).__init__() - self.stages = nn.ModuleList([ - BaseAttentionBlock(in_channels, out_channels, key_channels, value_channels, scale, norm_layer, **kwargs) - for scale in scales]) - in_channels = in_channels * 2 if concat else in_channels - self.project = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 1), - norm_layer(out_channels), - nn.ReLU(True), - nn.Dropout2d(0.05) - ) - self.concat = concat - - def forward(self, x): - priors = [stage(x) for stage in self.stages] - context = priors[0] - for i in range(1, len(priors)): - context += priors[i] - if self.concat: - context = torch.cat([context, x], 1) - out = self.project(context) - return out - - -class PyramidAttentionBlock(nn.Module): - """The basic implementation for pyramid self-attention block/non-local block""" - - def __init__(self, in_channels, out_channels, key_channels, value_channels, - scale=1, norm_layer=nn.BatchNorm2d, **kwargs): - super(PyramidAttentionBlock, self).__init__() - self.scale = scale - self.value_channels = value_channels - self.key_channels = key_channels - - self.f_value = nn.Conv2d(in_channels, value_channels, 1) - self.f_key = nn.Sequential( - nn.Conv2d(in_channels, key_channels, 1), - norm_layer(key_channels), - nn.ReLU(True) - ) - self.f_query = self.f_key - self.W = nn.Conv2d(value_channels, out_channels, 1) - nn.init.constant_(self.W.weight, 0) - nn.init.constant_(self.W.bias, 0) - - def forward(self, x): - batch_size, c, w, h = x.size() - - local_x = list() - local_y = list() - step_w, step_h = w // self.scale, h // self.scale - for i in range(self.scale): - for j in range(self.scale): - start_x, start_y = step_w * i, step_h * j - end_x, end_y = min(start_x + step_w, w), min(start_y + step_h, h) - if i == (self.scale - 1): - end_x = w - if j == (self.scale - 1): - end_y = h - local_x += [start_x, end_x] - local_y += [start_y, end_y] - - value = self.f_value(x) - query = self.f_query(x) - key = self.f_key(x) - - local_list = list() - local_block_cnt = (self.scale ** 2) * 2 - for i in range(0, local_block_cnt, 2): - value_local = value[:, :, local_x[i]:local_x[i + 1], local_y[i]:local_y[i + 1]] - query_local = query[:, :, local_x[i]:local_x[i + 1], local_y[i]:local_y[i + 1]] - key_local = key[:, :, local_x[i]:local_x[i + 1], local_y[i]:local_y[i + 1]] - - w_local, h_local = value_local.size(2), value_local.size(3) - value_local = value_local.contiguous().view(batch_size, self.value_channels, -1).permute(0, 2, 1) - query_local = query_local.contiguous().view(batch_size, self.key_channels, -1).permute(0, 2, 1) - key_local = key_local.contiguous().view(batch_size, self.key_channels, -1) - - sim_map = torch.bmm(query_local, key_local) * (self.key_channels ** -.5) - sim_map = F.softmax(sim_map, dim=-1) - - context_local = torch.bmm(sim_map, value_local).permute(0, 2, 1).contiguous() - context_local = context_local.view(batch_size, self.value_channels, w_local, h_local) - local_list.append(context_local) - - context_list = list() - for i in range(0, self.scale): - row_tmp = list() - for j in range(self.scale): - row_tmp.append(local_list[j + i * self.scale]) - context_list.append(torch.cat(row_tmp, 3)) - - context = torch.cat(context_list, 2) - context = self.W(context) - - return context - - -class PyramidOCModule(nn.Module): - """Pyramid-OC""" - - def __init__(self, in_channels, out_channels, key_channels, value_channels, - scales=([1]), norm_layer=nn.BatchNorm2d, **kwargs): - super(PyramidOCModule, self).__init__() - self.stages = nn.ModuleList([ - PyramidAttentionBlock(in_channels, out_channels, key_channels, value_channels, scale, norm_layer, **kwargs) - for scale in scales]) - self.up_dr = nn.Sequential( - nn.Conv2d(in_channels, in_channels * len(scales), 1), - norm_layer(in_channels * len(scales)), - nn.ReLU(True) - ) - self.project = nn.Sequential( - nn.Conv2d(in_channels * len(scales) * 2, out_channels, 1), - norm_layer(out_channels), - nn.ReLU(True), - nn.Dropout2d(0.05) - ) - - def forward(self, x): - priors = [stage(x) for stage in self.stages] - context = [self.up_dr(x)] - for i in range(len(priors)): - context += [priors[i]] - context = torch.cat(context, 1) - out = self.project(context) - return out - - -class ASPOCModule(nn.Module): - """ASP-OC""" - - def __init__(self, in_channels, out_channels, key_channels, value_channels, - atrous_rates=(12, 24, 36), norm_layer=nn.BatchNorm2d, **kwargs): - super(ASPOCModule, self).__init__() - self.context = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 3, padding=1), - norm_layer(out_channels), - nn.ReLU(True), - BaseOCModule(out_channels, out_channels, key_channels, value_channels, ([2]), norm_layer, False, **kwargs)) - - rate1, rate2, rate3 = tuple(atrous_rates) - self.b1 = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 3, padding=rate1, dilation=rate1, bias=False), - norm_layer(out_channels), - nn.ReLU(True)) - self.b2 = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 3, padding=rate2, dilation=rate2, bias=False), - norm_layer(out_channels), - nn.ReLU(True)) - self.b3 = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 3, padding=rate3, dilation=rate3, bias=False), - norm_layer(out_channels), - nn.ReLU(True)) - self.b4 = nn.Sequential( - nn.Conv2d(in_channels, out_channels, 1, bias=False), - norm_layer(out_channels), - nn.ReLU(True)) - - self.project = nn.Sequential( - nn.Conv2d(out_channels * 5, out_channels, 1, bias=False), - norm_layer(out_channels), - nn.ReLU(True), - nn.Dropout2d(0.1) - ) - - def forward(self, x): - feat1 = self.context(x) - feat2 = self.b1(x) - feat3 = self.b2(x) - feat4 = self.b3(x) - feat5 = self.b4(x) - out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1) - out = self.project(out) - return out +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .segbase import SegBaseModel +from .model_zoo import MODEL_REGISTRY +from ..modules import _FCNHead +from ..config import cfg + +__all__ = ['OCNet'] + + +@MODEL_REGISTRY.register() +class OCNet(SegBaseModel): + r"""OCNet + Reference: + Yuhui Yuan, Jingdong Wang. "OCNet: Object Context Network for Scene Parsing." + arXiv preprint arXiv:1809.00916 (2018). + """ + + def __init__(self): + super(OCNet, self).__init__() + oc_arch = cfg.MODEL.OCNet.OC_ARCH + self.head = _OCHead(self.nclass, oc_arch, norm_layer=self.norm_layer) + if self.aux: + self.auxlayer = _FCNHead(1024, self.nclass, norm_layer=self.norm_layer) + + self.__setattr__('decoder', ['head', 'auxlayer'] if self.aux else ['head']) + + def forward(self, x): + size = x.size()[2:] + _, _, c3, c4 = self.base_forward(x) + outputs = [] + x = self.head(c4) + x = F.interpolate(x, size, mode='bilinear', align_corners=True) + outputs.append(x) + + if self.aux: + auxout = self.auxlayer(c3) + auxout = F.interpolate(auxout, size, mode='bilinear', align_corners=True) + outputs.append(auxout) + return tuple(outputs) + + +class _OCHead(nn.Module): + def __init__(self, nclass, oc_arch, norm_layer=nn.BatchNorm2d, **kwargs): + super(_OCHead, self).__init__() + if oc_arch == 'base': + self.context = nn.Sequential( + nn.Conv2d(2048, 512, 3, 1, padding=1, bias=False), + norm_layer(512), + nn.ReLU(True), + BaseOCModule(512, 512, 256, 256, scales=([1]), norm_layer=norm_layer, **kwargs)) + elif oc_arch == 'pyramid': + self.context = nn.Sequential( + nn.Conv2d(2048, 512, 3, 1, padding=1, bias=False), + norm_layer(512), + nn.ReLU(True), + PyramidOCModule(512, 512, 256, 512, scales=([1, 2, 3, 6]), norm_layer=norm_layer, **kwargs)) + elif oc_arch == 'asp': + self.context = ASPOCModule(2048, 512, 256, 512, norm_layer=norm_layer, **kwargs) + else: + raise ValueError("Unknown OC architecture!") + + self.out = nn.Conv2d(512, nclass, 1) + + def forward(self, x): + x = self.context(x) + return self.out(x) + + +class BaseAttentionBlock(nn.Module): + """The basic implementation for self-attention block/non-local block.""" + + def __init__(self, in_channels, out_channels, key_channels, value_channels, + scale=1, norm_layer=nn.BatchNorm2d, **kwargs): + super(BaseAttentionBlock, self).__init__() + self.scale = scale + self.key_channels = key_channels + self.value_channels = value_channels + if scale > 1: + self.pool = nn.MaxPool2d(scale) + + self.f_value = nn.Conv2d(in_channels, value_channels, 1) + self.f_key = nn.Sequential( + nn.Conv2d(in_channels, key_channels, 1), + norm_layer(key_channels), + nn.ReLU(True) + ) + self.f_query = self.f_key + self.W = nn.Conv2d(value_channels, out_channels, 1) + nn.init.constant_(self.W.weight, 0) + nn.init.constant_(self.W.bias, 0) + + def forward(self, x): + batch_size, c, w, h = x.size() + if self.scale > 1: + x = self.pool(x) + + value = self.f_value(x).view(batch_size, self.value_channels, -1).permute(0, 2, 1) + query = self.f_query(x).view(batch_size, self.key_channels, -1).permute(0, 2, 1) + key = self.f_key(x).view(batch_size, self.key_channels, -1) + + sim_map = torch.bmm(query, key) * (self.key_channels ** -.5) + sim_map = F.softmax(sim_map, dim=-1) + + context = torch.bmm(sim_map, value).permute(0, 2, 1).contiguous() + context = context.view(batch_size, self.value_channels, *x.size()[2:]) + context = self.W(context) + if self.scale > 1: + context = F.interpolate(context, size=(w, h), mode='bilinear', align_corners=True) + + return context + + +class BaseOCModule(nn.Module): + """Base-OC""" + + def __init__(self, in_channels, out_channels, key_channels, value_channels, + scales=([1]), norm_layer=nn.BatchNorm2d, concat=True, **kwargs): + super(BaseOCModule, self).__init__() + self.stages = nn.ModuleList([ + BaseAttentionBlock(in_channels, out_channels, key_channels, value_channels, scale, norm_layer, **kwargs) + for scale in scales]) + in_channels = in_channels * 2 if concat else in_channels + self.project = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1), + norm_layer(out_channels), + nn.ReLU(True), + nn.Dropout2d(0.05) + ) + self.concat = concat + + def forward(self, x): + priors = [stage(x) for stage in self.stages] + context = priors[0] + for i in range(1, len(priors)): + context += priors[i] + if self.concat: + context = torch.cat([context, x], 1) + out = self.project(context) + return out + + +class PyramidAttentionBlock(nn.Module): + """The basic implementation for pyramid self-attention block/non-local block""" + + def __init__(self, in_channels, out_channels, key_channels, value_channels, + scale=1, norm_layer=nn.BatchNorm2d, **kwargs): + super(PyramidAttentionBlock, self).__init__() + self.scale = scale + self.value_channels = value_channels + self.key_channels = key_channels + + self.f_value = nn.Conv2d(in_channels, value_channels, 1) + self.f_key = nn.Sequential( + nn.Conv2d(in_channels, key_channels, 1), + norm_layer(key_channels), + nn.ReLU(True) + ) + self.f_query = self.f_key + self.W = nn.Conv2d(value_channels, out_channels, 1) + nn.init.constant_(self.W.weight, 0) + nn.init.constant_(self.W.bias, 0) + + def forward(self, x): + batch_size, c, w, h = x.size() + + local_x = list() + local_y = list() + step_w, step_h = w // self.scale, h // self.scale + for i in range(self.scale): + for j in range(self.scale): + start_x, start_y = step_w * i, step_h * j + end_x, end_y = min(start_x + step_w, w), min(start_y + step_h, h) + if i == (self.scale - 1): + end_x = w + if j == (self.scale - 1): + end_y = h + local_x += [start_x, end_x] + local_y += [start_y, end_y] + + value = self.f_value(x) + query = self.f_query(x) + key = self.f_key(x) + + local_list = list() + local_block_cnt = (self.scale ** 2) * 2 + for i in range(0, local_block_cnt, 2): + value_local = value[:, :, local_x[i]:local_x[i + 1], local_y[i]:local_y[i + 1]] + query_local = query[:, :, local_x[i]:local_x[i + 1], local_y[i]:local_y[i + 1]] + key_local = key[:, :, local_x[i]:local_x[i + 1], local_y[i]:local_y[i + 1]] + + w_local, h_local = value_local.size(2), value_local.size(3) + value_local = value_local.contiguous().view(batch_size, self.value_channels, -1).permute(0, 2, 1) + query_local = query_local.contiguous().view(batch_size, self.key_channels, -1).permute(0, 2, 1) + key_local = key_local.contiguous().view(batch_size, self.key_channels, -1) + + sim_map = torch.bmm(query_local, key_local) * (self.key_channels ** -.5) + sim_map = F.softmax(sim_map, dim=-1) + + context_local = torch.bmm(sim_map, value_local).permute(0, 2, 1).contiguous() + context_local = context_local.view(batch_size, self.value_channels, w_local, h_local) + local_list.append(context_local) + + context_list = list() + for i in range(0, self.scale): + row_tmp = list() + for j in range(self.scale): + row_tmp.append(local_list[j + i * self.scale]) + context_list.append(torch.cat(row_tmp, 3)) + + context = torch.cat(context_list, 2) + context = self.W(context) + + return context + + +class PyramidOCModule(nn.Module): + """Pyramid-OC""" + + def __init__(self, in_channels, out_channels, key_channels, value_channels, + scales=([1]), norm_layer=nn.BatchNorm2d, **kwargs): + super(PyramidOCModule, self).__init__() + self.stages = nn.ModuleList([ + PyramidAttentionBlock(in_channels, out_channels, key_channels, value_channels, scale, norm_layer, **kwargs) + for scale in scales]) + self.up_dr = nn.Sequential( + nn.Conv2d(in_channels, in_channels * len(scales), 1), + norm_layer(in_channels * len(scales)), + nn.ReLU(True) + ) + self.project = nn.Sequential( + nn.Conv2d(in_channels * len(scales) * 2, out_channels, 1), + norm_layer(out_channels), + nn.ReLU(True), + nn.Dropout2d(0.05) + ) + + def forward(self, x): + priors = [stage(x) for stage in self.stages] + context = [self.up_dr(x)] + for i in range(len(priors)): + context += [priors[i]] + context = torch.cat(context, 1) + out = self.project(context) + return out + + +class ASPOCModule(nn.Module): + """ASP-OC""" + + def __init__(self, in_channels, out_channels, key_channels, value_channels, + atrous_rates=(12, 24, 36), norm_layer=nn.BatchNorm2d, **kwargs): + super(ASPOCModule, self).__init__() + self.context = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 3, padding=1), + norm_layer(out_channels), + nn.ReLU(True), + BaseOCModule(out_channels, out_channels, key_channels, value_channels, ([2]), norm_layer, False, **kwargs)) + + rate1, rate2, rate3 = tuple(atrous_rates) + self.b1 = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 3, padding=rate1, dilation=rate1, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + self.b2 = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 3, padding=rate2, dilation=rate2, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + self.b3 = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 3, padding=rate3, dilation=rate3, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + self.b4 = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + + self.project = nn.Sequential( + nn.Conv2d(out_channels * 5, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.ReLU(True), + nn.Dropout2d(0.1) + ) + + def forward(self, x): + feat1 = self.context(x) + feat2 = self.b1(x) + feat3 = self.b2(x) + feat4 = self.b3(x) + feat5 = self.b4(x) + out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1) + out = self.project(out) + return out diff --git a/PyTorch/contrib/cv/semantic_segmentation/HRNet_SEG_for_Pytorch/figures/OCR.PNG b/PyTorch/contrib/cv/semantic_segmentation/HRNet_SEG_for_Pytorch/figures/OCR.PNG deleted file mode 100644 index 4b72e990fd559ff19bd91756b08475cff30b5aa9..0000000000000000000000000000000000000000 Binary files a/PyTorch/contrib/cv/semantic_segmentation/HRNet_SEG_for_Pytorch/figures/OCR.PNG and /dev/null differ diff --git a/PyTorch/contrib/cv/semantic_segmentation/HRnet-OCR/README.md b/PyTorch/contrib/cv/semantic_segmentation/HRnet-OCR/README.md index f1a53a63d9a35f55e20506591c29765b79e70292..f31f065a189498710654ba793117f6a27b7353ba 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/HRnet-OCR/README.md +++ b/PyTorch/contrib/cv/semantic_segmentation/HRnet-OCR/README.md @@ -1,65 +1,65 @@ -# HRnet-OCR - -## 模型简介 - -- 参考实现: - -``` -url=https:https://github.com/NVIDIA/semantic-segmentation -branch=master -commit_id=7726b144c2cc0b8e09c67eabb78f027efdf3f0fa -``` - -- 模型原理:HRnet-OCR模型为图像分割网络,通过将注意力机制和多尺度预测的方法结合,实现了更快速的训练模型并保持更高精度。 - -## Requirements - -- CANN 5.0.3.1 -- torch 1.5.0+ascend.post3.20210930 -- apex 0.1+ascend.20210930 -- tensor-fused-plugin 0.1+ascend -- te 0.4.0 -- python 3.7.5 -- runx 0.0.11 -- torchvision 0.6.0 - -## 配置数据集路径 - -采用Cityscapes数据集 - -参考源码仓的方式获取数据集:https://github.com/NVIDIA/semantic-segmentation - -获取数据集后需按照源代码仓Download/Prepare Data指示配置数据集路径 - -## 配置预训练模型 - -预训练模型权重在作者源代码仓中均已给出,配置路径请参照源代码仓Download Weights进行配置 - -## NPU 单卡训练命令 - -- 训练(注:训练结束后模型将自动打印评估结果): - -``` -nohup bash test/train_full_1p.sh --data_path=./large_asset_dir/ & -``` - -- 性能: - -``` -nohup bash test/train_performance_1p.sh --data_path=./large_asset_dir/ & -``` - -## NPU 8卡训练命令 - -- 训练(注:训练结束后模型将自动打印评估结果): - -``` -nohup bash test/train_full_8p.sh --data_path=./large_asset_dir/ & -``` - -- 性能: - -``` -nohup bash test/train_performance_8p.sh --data_path=./large_asset_dir/ & -``` - +# HRnet-OCR + +## 模型简介 + +- 参考实现: + +``` +url=https:https://github.com/NVIDIA/semantic-segmentation +branch=master +commit_id=7726b144c2cc0b8e09c67eabb78f027efdf3f0fa +``` + +- 模型原理:HRnet-OCR模型为图像分割网络,通过将注意力机制和多尺度预测的方法结合,实现了更快速的训练模型并保持更高精度。 + +## Requirements + +- CANN 5.0.3.1 +- torch 1.5.0+ascend.post3.20210930 +- apex 0.1+ascend.20210930 +- tensor-fused-plugin 0.1+ascend +- te 0.4.0 +- python 3.7.5 +- runx 0.0.11 +- torchvision 0.6.0 + +## 配置数据集路径 + +采用Cityscapes数据集 + +参考源码仓的方式获取数据集:https://github.com/NVIDIA/semantic-segmentation + +获取数据集后需按照源代码仓Download/Prepare Data指示配置数据集路径 + +## 配置预训练模型 + +预训练模型权重在作者源代码仓中均已给出,配置路径请参照源代码仓Download Weights进行配置 + +## NPU 单卡训练命令 + +- 训练(注:训练结束后模型将自动打印评估结果): + +``` +nohup bash test/train_full_1p.sh --data_path=./large_asset_dir/ & +``` + +- 性能: + +``` +nohup bash test/train_performance_1p.sh --data_path=./large_asset_dir/ & +``` + +## NPU 8卡训练命令 + +- 训练(注:训练结束后模型将自动打印评估结果): + +``` +nohup bash test/train_full_8p.sh --data_path=./large_asset_dir/ & +``` + +- 性能: + +``` +nohup bash test/train_performance_8p.sh --data_path=./large_asset_dir/ & +``` + diff --git a/PyTorch/contrib/cv/semantic_segmentation/HRnet-OCR/modelzoo_level.txt b/PyTorch/contrib/cv/semantic_segmentation/HRnet-OCR/modelzoo_level.txt index 405b26618a0c92027927a9c583a4b47f640bcf7b..c45626e398eabe6022fe7b2e148f0ffce6400d6e 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/HRnet-OCR/modelzoo_level.txt +++ b/PyTorch/contrib/cv/semantic_segmentation/HRnet-OCR/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:POK +FuncStatus:OK +PerfStatus:POK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/HRnet-OCR/utils/results_page.py b/PyTorch/contrib/cv/semantic_segmentation/HRnet-OCR/utils/results_page.py index 80c7ad5529d95a7f7c17574e6d9ede44abf2581c..840902241107847a14328eda5aca3c051483d685 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/HRnet-OCR/utils/results_page.py +++ b/PyTorch/contrib/cv/semantic_segmentation/HRnet-OCR/utils/results_page.py @@ -1,276 +1,276 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -""" -Copyright 2020 Nvidia Corporation - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" - -import glob -import os -import numpy as np - -id2cat = { - 0: 'road', - 1: 'sidewalk', - 2: 'building', - 3: 'wall', - 4: 'fence', - 5: 'pole', - 6: 'traffic_light', - 7: 'traffic_sign', - 8: 'vegetation', - 9: 'terrain', - 10: 'sky', - 11: 'person', - 12: 'rider', - 13: 'car', - 14: 'truck', - 15: 'bus', - 16: 'train', - 17: 'motorcycle', - 18: 'bicycle'} - -# Leaderboard mapillary -sota_iu_results = { - 0: 98.4046, - 1: 85.0224, - 2: 93.6462, - 3: 61.7487, - 4: 63.8885, - 5: 67.6745, - 6: 77.43, - 7: 80.8351, - 8: 93.7341, - 9: 71.8774, - 10: 95.6122, - 11: 86.7228, - 12: 72.7778, - 13: 95.7033, - 14: 79.9019, - 15: 93.0954, - 16: 89.7196, - 17: 72.5731, - 18: 78.2172, - 255: 0} - - -class ResultsPage(object): - ''' - This creates an HTML page of embedded images, useful for showing evaluation results. - - Usage: - ip = ImagePage(html_fn) - - # Add a table with N images ... - ip.add_table((img, descr), (img, descr), ...) - - # Generate html page - ip.write_page() - ''' - - def __init__(self, experiment_name, html_filename): - self.experiment_name = experiment_name - self.html_filename = html_filename - self.outfile = open(self.html_filename, 'w') - self.items = [] - - def _print_header(self): - header = ''' - - - Experiment = {} - - '''.format(self.experiment_name) - self.outfile.write(header) - - def _print_footer(self): - self.outfile.write(''' -''') - - def _print_table_header(self, table_name): - table_hdr = '''

{}

- - '''.format(table_name) - self.outfile.write(table_hdr) - - def _print_table_footer(self): - table_ftr = ''' -
''' - self.outfile.write(table_ftr) - - def _print_table_guts(self, img_fn, descr): - table = ''' -

- - -
-

{descr}

-

- '''.format(img_fn=img_fn, descr=descr) - self.outfile.write(table) - - def add_table(self, img_label_pairs, table_heading=''): - """ - :img_label_pairs: A list of pairs of [img,label] - """ - self.items.append([img_label_pairs, table_heading]) - - def _write_table(self, table, heading): - img, _descr = table[0] - self._print_table_header(heading) - for img, descr in table: - self._print_table_guts(img, descr) - self._print_table_footer() - - def write_page(self): - self._print_header() - - for table, heading in self.items: - self._write_table(table, heading) - - self._print_footer() - - def _print_page_start(self): - page_start = ''' - - -Experiment = EXP_NAME - - -''' - self.outfile.write(page_start) - - def _print_table_start(self, caption, hdr): - self.outfile.write(''' - - '''.format(caption)) - for hdr_col in hdr: - self.outfile.write(' '.format(hdr_col)) - self.outfile.write(' ') - - def _print_table_row(self, row): - self.outfile.write(' ') - for i in row: - self.outfile.write(' '.format(i)) - # Create Links - fp_link = 'false positive Top N'.format(row[ - 1]) - fn_link = 'false_negative Top N'.format(row[ - 1]) - self.outfile.write(' '.format(fp_link)) - self.outfile.write(' '.format(fn_link)) - self.outfile.write(' ') - - def _print_table_end(self): - self.outfile.write('
{}
{}
{}{}{}
') - - def _print_page_end(self): - self.outfile.write(''' - -''') - - def create_main(self, iu, hist): - self._print_page_start() - #_print_table_style() - # Calculate all of the terms: - iu_false_positive = hist.sum(axis=1) - np.diag(hist) - iu_false_negative = hist.sum(axis=0) - np.diag(hist) - iu_true_positive = np.diag(hist) - - hdr = ("Class ID", "Class", "IoU", "Sota-IU", "TP", - "FP", "FN", "precision", "recall", "", "") - self._print_table_start("Mean IoU Results", hdr) - for iu_score, index in iu: - class_name = id2cat[index] - iu_string = '{:5.2f}'.format(iu_score * 100) - total_pixels = hist.sum() - tp = '{:5.2f}'.format(100 * iu_true_positive[index] / total_pixels) - fp = '{:5.2f}'.format( - iu_false_positive[index] / iu_true_positive[index]) - fn = '{:5.2f}'.format( - iu_false_negative[index] / iu_true_positive[index]) - precision = '{:5.2f}'.format( - iu_true_positive[index] / (iu_true_positive[index] + iu_false_positive[index])) - recall = '{:5.2f}'.format( - iu_true_positive[index] / (iu_true_positive[index] + iu_false_negative[index])) - sota = '{:5.2f}'.format(sota_iu_results[index]) - row = (index, class_name, iu_string, sota, - tp, fp, fn, precision, recall) - self._print_table_row(row) - self._print_table_end() - self._print_page_end() - - -def main(): - images = glob.glob('dump_imgs_train/*.png') - images = [i for i in images if 'mask' not in i] - - ip = ResultsPage('test page', 'dd.html') - for img in images: - basename = os.path.splitext(img)[0] - mask_img = basename + '_mask.png' - ip.add_table(((img, 'image'), (mask_img, 'mask'))) - ip.write_page() +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +""" +Copyright 2020 Nvidia Corporation + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" + +import glob +import os +import numpy as np + +id2cat = { + 0: 'road', + 1: 'sidewalk', + 2: 'building', + 3: 'wall', + 4: 'fence', + 5: 'pole', + 6: 'traffic_light', + 7: 'traffic_sign', + 8: 'vegetation', + 9: 'terrain', + 10: 'sky', + 11: 'person', + 12: 'rider', + 13: 'car', + 14: 'truck', + 15: 'bus', + 16: 'train', + 17: 'motorcycle', + 18: 'bicycle'} + +# Leaderboard mapillary +sota_iu_results = { + 0: 98.4046, + 1: 85.0224, + 2: 93.6462, + 3: 61.7487, + 4: 63.8885, + 5: 67.6745, + 6: 77.43, + 7: 80.8351, + 8: 93.7341, + 9: 71.8774, + 10: 95.6122, + 11: 86.7228, + 12: 72.7778, + 13: 95.7033, + 14: 79.9019, + 15: 93.0954, + 16: 89.7196, + 17: 72.5731, + 18: 78.2172, + 255: 0} + + +class ResultsPage(object): + ''' + This creates an HTML page of embedded images, useful for showing evaluation results. + + Usage: + ip = ImagePage(html_fn) + + # Add a table with N images ... + ip.add_table((img, descr), (img, descr), ...) + + # Generate html page + ip.write_page() + ''' + + def __init__(self, experiment_name, html_filename): + self.experiment_name = experiment_name + self.html_filename = html_filename + self.outfile = open(self.html_filename, 'w') + self.items = [] + + def _print_header(self): + header = ''' + + + Experiment = {} + + '''.format(self.experiment_name) + self.outfile.write(header) + + def _print_footer(self): + self.outfile.write(''' +''') + + def _print_table_header(self, table_name): + table_hdr = '''

{}

+ + '''.format(table_name) + self.outfile.write(table_hdr) + + def _print_table_footer(self): + table_ftr = ''' +
''' + self.outfile.write(table_ftr) + + def _print_table_guts(self, img_fn, descr): + table = ''' +

+ + +
+

{descr}

+

+ '''.format(img_fn=img_fn, descr=descr) + self.outfile.write(table) + + def add_table(self, img_label_pairs, table_heading=''): + """ + :img_label_pairs: A list of pairs of [img,label] + """ + self.items.append([img_label_pairs, table_heading]) + + def _write_table(self, table, heading): + img, _descr = table[0] + self._print_table_header(heading) + for img, descr in table: + self._print_table_guts(img, descr) + self._print_table_footer() + + def write_page(self): + self._print_header() + + for table, heading in self.items: + self._write_table(table, heading) + + self._print_footer() + + def _print_page_start(self): + page_start = ''' + + +Experiment = EXP_NAME + + +''' + self.outfile.write(page_start) + + def _print_table_start(self, caption, hdr): + self.outfile.write(''' + + '''.format(caption)) + for hdr_col in hdr: + self.outfile.write(' '.format(hdr_col)) + self.outfile.write(' ') + + def _print_table_row(self, row): + self.outfile.write(' ') + for i in row: + self.outfile.write(' '.format(i)) + # Create Links + fp_link = 'false positive Top N'.format(row[ + 1]) + fn_link = 'false_negative Top N'.format(row[ + 1]) + self.outfile.write(' '.format(fp_link)) + self.outfile.write(' '.format(fn_link)) + self.outfile.write(' ') + + def _print_table_end(self): + self.outfile.write('
{}
{}
{}{}{}
') + + def _print_page_end(self): + self.outfile.write(''' + +''') + + def create_main(self, iu, hist): + self._print_page_start() + #_print_table_style() + # Calculate all of the terms: + iu_false_positive = hist.sum(axis=1) - np.diag(hist) + iu_false_negative = hist.sum(axis=0) - np.diag(hist) + iu_true_positive = np.diag(hist) + + hdr = ("Class ID", "Class", "IoU", "Sota-IU", "TP", + "FP", "FN", "precision", "recall", "", "") + self._print_table_start("Mean IoU Results", hdr) + for iu_score, index in iu: + class_name = id2cat[index] + iu_string = '{:5.2f}'.format(iu_score * 100) + total_pixels = hist.sum() + tp = '{:5.2f}'.format(100 * iu_true_positive[index] / total_pixels) + fp = '{:5.2f}'.format( + iu_false_positive[index] / iu_true_positive[index]) + fn = '{:5.2f}'.format( + iu_false_negative[index] / iu_true_positive[index]) + precision = '{:5.2f}'.format( + iu_true_positive[index] / (iu_true_positive[index] + iu_false_positive[index])) + recall = '{:5.2f}'.format( + iu_true_positive[index] / (iu_true_positive[index] + iu_false_negative[index])) + sota = '{:5.2f}'.format(sota_iu_results[index]) + row = (index, class_name, iu_string, sota, + tp, fp, fn, precision, recall) + self._print_table_row(row) + self._print_table_end() + self._print_page_end() + + +def main(): + images = glob.glob('dump_imgs_train/*.png') + images = [i for i in images if 'mask' not in i] + + ip = ResultsPage('test page', 'dd.html') + for img in images: + basename = os.path.splitext(img)[0] + mask_img = basename + '_mask.png' + ip.add_table(((img, 'image'), (mask_img, 'mask'))) + ip.write_page() diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/__init__.py index 55887fca321859e7fe8df0054b4761e83a7b4210..945cb920d49428f81bc1c2597ffa7a2a61e8ef81 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/__init__.py +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/__init__.py @@ -1,13 +1,13 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and # limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/dataset/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/dataset/__init__.py index 55887fca321859e7fe8df0054b4761e83a7b4210..945cb920d49428f81bc1c2597ffa7a2a61e8ef81 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/dataset/__init__.py +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/dataset/__init__.py @@ -1,13 +1,13 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and # limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/__init__.py index 55887fca321859e7fe8df0054b4761e83a7b4210..945cb920d49428f81bc1c2597ffa7a2a61e8ef81 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/__init__.py +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/__init__.py @@ -1,13 +1,13 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and # limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/config.py b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/config.py index f9a2ffbacd4e577cbd4bd3df4f9dd8a7ec8e3805..49411a50bc9ac37bf956900db07b396858660c51 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/config.py +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/config.py @@ -1,156 +1,156 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -------------------------------------------------------- -# Configurations for domain adaptation -# Copyright (c) 2019 valeo.ai -# -# Written by Tuan-Hung Vu -# Adapted from https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py -# -------------------------------------------------------- - -import os.path as osp - -import numpy as np -from easydict import EasyDict - -from advent.utils import project_root -from advent.utils.serialization import yaml_load - - -cfg = EasyDict() - -# COMMON CONFIGS -# source domain -cfg.SOURCE = 'GTA' -# target domain -cfg.TARGET = 'Cityscapes' -# Number of workers for dataloading -cfg.NUM_WORKERS = 4 -# List of training images -cfg.DATA_LIST_SOURCE = str(project_root / 'advent/dataset/gta5_list/{}.txt') -cfg.DATA_LIST_TARGET = str(project_root / 'advent/dataset/cityscapes_list/{}.txt') -# Directories -cfg.DATA_DIRECTORY_SOURCE = str(project_root / 'data/GTA5') -cfg.DATA_DIRECTORY_TARGET = str(project_root / 'data/Cityscapes') -# Number of object classes -cfg.NUM_CLASSES = 19 -# Exp dirs -cfg.EXP_NAME = '' -cfg.EXP_ROOT = project_root / 'experiments' -cfg.EXP_ROOT_SNAPSHOT = osp.join(cfg.EXP_ROOT, 'snapshots') -cfg.EXP_ROOT_LOGS = osp.join(cfg.EXP_ROOT, 'logs') -# CUDA -cfg.GPU_ID = 0 - -# TRAIN CONFIGS -cfg.TRAIN = EasyDict() -cfg.TRAIN.SET_SOURCE = 'all' -cfg.TRAIN.SET_TARGET = 'train' -cfg.TRAIN.BATCH_SIZE_SOURCE = 1 -cfg.TRAIN.BATCH_SIZE_TARGET = 1 -cfg.TRAIN.IGNORE_LABEL = 255 -cfg.TRAIN.INPUT_SIZE_SOURCE = (1280, 720) -cfg.TRAIN.INPUT_SIZE_TARGET = (1024, 512) -# Class info -cfg.TRAIN.INFO_SOURCE = '' -cfg.TRAIN.INFO_TARGET = str(project_root / 'advent/dataset/cityscapes_list/info.json') -# Segmentation network params -cfg.TRAIN.MODEL = 'DeepLabv2' -cfg.TRAIN.MULTI_LEVEL = True -cfg.TRAIN.RESTORE_FROM = '' -cfg.TRAIN.IMG_MEAN = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32) -cfg.TRAIN.LEARNING_RATE = 2.5e-4 -cfg.TRAIN.MOMENTUM = 0.9 -cfg.TRAIN.WEIGHT_DECAY = 0.0005 -cfg.TRAIN.POWER = 0.9 -cfg.TRAIN.LAMBDA_SEG_MAIN = 1.0 -cfg.TRAIN.LAMBDA_SEG_AUX = 0.1 # weight of conv4 prediction. Used in multi-level setting. -# Domain adaptation -cfg.TRAIN.DA_METHOD = 'AdvEnt' -# Adversarial training params -cfg.TRAIN.LEARNING_RATE_D = 1e-4 -cfg.TRAIN.LAMBDA_ADV_MAIN = 0.001 -cfg.TRAIN.LAMBDA_ADV_AUX = 0.0002 -# MinEnt params -cfg.TRAIN.LAMBDA_ENT_MAIN = 0.001 -cfg.TRAIN.LAMBDA_ENT_AUX = 0.0002 -# Other params -cfg.TRAIN.MAX_ITERS = 250000 -cfg.TRAIN.EARLY_STOP = 120000 -cfg.TRAIN.SAVE_PRED_EVERY = 2000 -cfg.TRAIN.SNAPSHOT_DIR = '' -cfg.TRAIN.RANDOM_SEED = 1234 -cfg.TRAIN.TENSORBOARD_LOGDIR = '' -cfg.TRAIN.TENSORBOARD_VIZRATE = 100 - -# TEST CONFIGS -cfg.TEST = EasyDict() -cfg.TEST.MODE = 'best' # {'single', 'best'} -# model -cfg.TEST.MODEL = ('DeepLabv2',) -cfg.TEST.MODEL_WEIGHT = (1.0,) -cfg.TEST.MULTI_LEVEL = (True,) -cfg.TEST.IMG_MEAN = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32) -cfg.TEST.RESTORE_FROM = ('',) -cfg.TEST.SNAPSHOT_DIR = ('',) # used in 'best' mode -cfg.TEST.SNAPSHOT_STEP = 2000 # used in 'best' mode -cfg.TEST.SNAPSHOT_MAXITER = 120000 # used in 'best' mode -# Test sets -cfg.TEST.SET_TARGET = 'val' -cfg.TEST.BATCH_SIZE_TARGET = 1 -cfg.TEST.INPUT_SIZE_TARGET = (1024, 512) -cfg.TEST.OUTPUT_SIZE_TARGET = (2048, 1024) -cfg.TEST.INFO_TARGET = str(project_root / 'advent/dataset/cityscapes_list/info.json') -cfg.TEST.WAIT_MODEL = True - - -def _merge_a_into_b(a, b): - """Merge config dictionary a into config dictionary b, clobbering the - options in b whenever they are also specified in a. - """ - if type(a) is not EasyDict: - return - - for k, v in a.items(): - # a must specify keys that are in b - # if not b.has_key(k): - if k not in b: - raise KeyError(f'{k} is not a valid config key') - - # the types must match, too - old_type = type(b[k]) - if old_type is not type(v): - if isinstance(b[k], np.ndarray): - v = np.array(v, dtype=b[k].dtype) - else: - raise ValueError(f'Type mismatch ({type(b[k])} vs. {type(v)}) ' - f'for config key: {k}') - - # recursively merge dicts - if type(v) is EasyDict: - try: - _merge_a_into_b(a[k], b[k]) - except Exception: - print(f'Error under config key: {k}') - raise - else: - b[k] = v - - -def cfg_from_file(filename): - """Load a config file and merge it into the default options. - """ - yaml_cfg = EasyDict(yaml_load(filename)) - _merge_a_into_b(yaml_cfg, cfg) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -------------------------------------------------------- +# Configurations for domain adaptation +# Copyright (c) 2019 valeo.ai +# +# Written by Tuan-Hung Vu +# Adapted from https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py +# -------------------------------------------------------- + +import os.path as osp + +import numpy as np +from easydict import EasyDict + +from advent.utils import project_root +from advent.utils.serialization import yaml_load + + +cfg = EasyDict() + +# COMMON CONFIGS +# source domain +cfg.SOURCE = 'GTA' +# target domain +cfg.TARGET = 'Cityscapes' +# Number of workers for dataloading +cfg.NUM_WORKERS = 4 +# List of training images +cfg.DATA_LIST_SOURCE = str(project_root / 'advent/dataset/gta5_list/{}.txt') +cfg.DATA_LIST_TARGET = str(project_root / 'advent/dataset/cityscapes_list/{}.txt') +# Directories +cfg.DATA_DIRECTORY_SOURCE = str(project_root / 'data/GTA5') +cfg.DATA_DIRECTORY_TARGET = str(project_root / 'data/Cityscapes') +# Number of object classes +cfg.NUM_CLASSES = 19 +# Exp dirs +cfg.EXP_NAME = '' +cfg.EXP_ROOT = project_root / 'experiments' +cfg.EXP_ROOT_SNAPSHOT = osp.join(cfg.EXP_ROOT, 'snapshots') +cfg.EXP_ROOT_LOGS = osp.join(cfg.EXP_ROOT, 'logs') +# CUDA +cfg.GPU_ID = 0 + +# TRAIN CONFIGS +cfg.TRAIN = EasyDict() +cfg.TRAIN.SET_SOURCE = 'all' +cfg.TRAIN.SET_TARGET = 'train' +cfg.TRAIN.BATCH_SIZE_SOURCE = 1 +cfg.TRAIN.BATCH_SIZE_TARGET = 1 +cfg.TRAIN.IGNORE_LABEL = 255 +cfg.TRAIN.INPUT_SIZE_SOURCE = (1280, 720) +cfg.TRAIN.INPUT_SIZE_TARGET = (1024, 512) +# Class info +cfg.TRAIN.INFO_SOURCE = '' +cfg.TRAIN.INFO_TARGET = str(project_root / 'advent/dataset/cityscapes_list/info.json') +# Segmentation network params +cfg.TRAIN.MODEL = 'DeepLabv2' +cfg.TRAIN.MULTI_LEVEL = True +cfg.TRAIN.RESTORE_FROM = '' +cfg.TRAIN.IMG_MEAN = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32) +cfg.TRAIN.LEARNING_RATE = 2.5e-4 +cfg.TRAIN.MOMENTUM = 0.9 +cfg.TRAIN.WEIGHT_DECAY = 0.0005 +cfg.TRAIN.POWER = 0.9 +cfg.TRAIN.LAMBDA_SEG_MAIN = 1.0 +cfg.TRAIN.LAMBDA_SEG_AUX = 0.1 # weight of conv4 prediction. Used in multi-level setting. +# Domain adaptation +cfg.TRAIN.DA_METHOD = 'AdvEnt' +# Adversarial training params +cfg.TRAIN.LEARNING_RATE_D = 1e-4 +cfg.TRAIN.LAMBDA_ADV_MAIN = 0.001 +cfg.TRAIN.LAMBDA_ADV_AUX = 0.0002 +# MinEnt params +cfg.TRAIN.LAMBDA_ENT_MAIN = 0.001 +cfg.TRAIN.LAMBDA_ENT_AUX = 0.0002 +# Other params +cfg.TRAIN.MAX_ITERS = 250000 +cfg.TRAIN.EARLY_STOP = 120000 +cfg.TRAIN.SAVE_PRED_EVERY = 2000 +cfg.TRAIN.SNAPSHOT_DIR = '' +cfg.TRAIN.RANDOM_SEED = 1234 +cfg.TRAIN.TENSORBOARD_LOGDIR = '' +cfg.TRAIN.TENSORBOARD_VIZRATE = 100 + +# TEST CONFIGS +cfg.TEST = EasyDict() +cfg.TEST.MODE = 'best' # {'single', 'best'} +# model +cfg.TEST.MODEL = ('DeepLabv2',) +cfg.TEST.MODEL_WEIGHT = (1.0,) +cfg.TEST.MULTI_LEVEL = (True,) +cfg.TEST.IMG_MEAN = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32) +cfg.TEST.RESTORE_FROM = ('',) +cfg.TEST.SNAPSHOT_DIR = ('',) # used in 'best' mode +cfg.TEST.SNAPSHOT_STEP = 2000 # used in 'best' mode +cfg.TEST.SNAPSHOT_MAXITER = 120000 # used in 'best' mode +# Test sets +cfg.TEST.SET_TARGET = 'val' +cfg.TEST.BATCH_SIZE_TARGET = 1 +cfg.TEST.INPUT_SIZE_TARGET = (1024, 512) +cfg.TEST.OUTPUT_SIZE_TARGET = (2048, 1024) +cfg.TEST.INFO_TARGET = str(project_root / 'advent/dataset/cityscapes_list/info.json') +cfg.TEST.WAIT_MODEL = True + + +def _merge_a_into_b(a, b): + """Merge config dictionary a into config dictionary b, clobbering the + options in b whenever they are also specified in a. + """ + if type(a) is not EasyDict: + return + + for k, v in a.items(): + # a must specify keys that are in b + # if not b.has_key(k): + if k not in b: + raise KeyError(f'{k} is not a valid config key') + + # the types must match, too + old_type = type(b[k]) + if old_type is not type(v): + if isinstance(b[k], np.ndarray): + v = np.array(v, dtype=b[k].dtype) + else: + raise ValueError(f'Type mismatch ({type(b[k])} vs. {type(v)}) ' + f'for config key: {k}') + + # recursively merge dicts + if type(v) is EasyDict: + try: + _merge_a_into_b(a[k], b[k]) + except Exception: + print(f'Error under config key: {k}') + raise + else: + b[k] = v + + +def cfg_from_file(filename): + """Load a config file and merge it into the default options. + """ + yaml_cfg = EasyDict(yaml_load(filename)) + _merge_a_into_b(yaml_cfg, cfg) diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/eval_UDA.py b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/eval_UDA.py index caff904345384ab98e11f02ea9baabb25a3e73a7..11a700a2f8c94f40d7bc654fd9b46c048a3137e9 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/eval_UDA.py +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/eval_UDA.py @@ -1,157 +1,157 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -------------------------------------------------------- -# Domain adpatation evaluation -# Copyright (c) 2019 valeo.ai -# -# Written by Tuan-Hung Vu -# -------------------------------------------------------- - -import os.path as osp -import time - -import numpy as np -import torch -from torch import nn -from tqdm import tqdm - -from advent.utils.func import per_class_iu, fast_hist -from advent.utils.serialization import pickle_dump, pickle_load - - -def evaluate_domain_adaptation( models, test_loader, cfg, - fixed_test_size=True, - verbose=True): - device = cfg.GPU_ID - interp = None - if fixed_test_size: - interp = nn.Upsample(size=(cfg.TEST.OUTPUT_SIZE_TARGET[1], cfg.TEST.OUTPUT_SIZE_TARGET[0]), mode='bilinear', align_corners=True) - # eval - if cfg.TEST.MODE == 'single': - eval_single(cfg, models, - device, test_loader, interp, fixed_test_size, - verbose) - elif cfg.TEST.MODE == 'best': - eval_best(cfg, models, - device, test_loader, interp, fixed_test_size, - verbose) - else: - raise NotImplementedError(f"Not yet supported test mode {cfg.TEST.MODE}") - - -def eval_single(cfg, models, - device, test_loader, interp, - fixed_test_size, verbose): - assert len(cfg.TEST.RESTORE_FROM) == len(models), 'Number of models are not matched' - for checkpoint, model in zip(cfg.TEST.RESTORE_FROM, models): - load_checkpoint_for_evaluation(model, checkpoint, device) - # eval - hist = np.zeros((cfg.NUM_CLASSES, cfg.NUM_CLASSES)) - for index, batch in tqdm(enumerate(test_loader)): - image, label, _, name = batch - if not fixed_test_size: - interp = nn.Upsample(size=(label.shape[1], label.shape[2]), mode='bilinear', align_corners=True) - with torch.no_grad(): - output = None - for model, model_weight in zip(models, cfg.TEST.MODEL_WEIGHT): - pred_main = model(image.cuda(device))[1] - output_ = interp(pred_main).cpu().data[0].numpy() - if output is None: - output = model_weight * output_ - else: - output += model_weight * output_ - assert output is not None, 'Output is None' - output = output.transpose(1, 2, 0) - output = np.argmax(output, axis=2) - label = label.numpy()[0] - hist += fast_hist(label.flatten(), output.flatten(), cfg.NUM_CLASSES) - inters_over_union_classes = per_class_iu(hist) - print(f'mIoU = \t{round(np.nanmean(inters_over_union_classes) * 100, 2)}') - if verbose: - display_stats(cfg, test_loader.dataset.class_names, inters_over_union_classes) - - -def eval_best(cfg, models, - device, test_loader, interp, - fixed_test_size, verbose): - assert len(models) == 1, 'Not yet supported multi models in this mode' - assert osp.exists(cfg.TEST.SNAPSHOT_DIR[0]), 'SNAPSHOT_DIR is not found' - start_iter = cfg.TEST.SNAPSHOT_STEP - step = cfg.TEST.SNAPSHOT_STEP - max_iter = cfg.TEST.SNAPSHOT_MAXITER - cache_path = osp.join(cfg.TEST.SNAPSHOT_DIR[0], 'all_res.pkl') - if osp.exists(cache_path): - all_res = pickle_load(cache_path) - else: - all_res = {} - cur_best_miou = -1 - cur_best_model = '' - for i_iter in range(start_iter, max_iter + 1, step): - restore_from = osp.join(cfg.TEST.SNAPSHOT_DIR[0], f'model_{i_iter}.pth') - if not osp.exists(restore_from): - # continue - if cfg.TEST.WAIT_MODEL: - print('Waiting for model..!') - while not osp.exists(restore_from): - time.sleep(5) - print("Evaluating model", restore_from) - if i_iter not in all_res.keys(): - load_checkpoint_for_evaluation(models[0], restore_from, device) - # eval - hist = np.zeros((cfg.NUM_CLASSES, cfg.NUM_CLASSES)) - # for index, batch in enumerate(test_loader): - # image, _, _, name = batch - test_iter = iter(test_loader) - for index in tqdm(range(len(test_loader))): - image, label, _, name = next(test_iter) - if not fixed_test_size: - interp = nn.Upsample(size=(label.shape[1], label.shape[2]), mode='bilinear', align_corners=True) - with torch.no_grad(): - pred_main = models[0](image.cuda(device))[1] - output = interp(pred_main).cpu().data[0].numpy() - output = output.transpose(1, 2, 0) - output = np.argmax(output, axis=2) - label = label.numpy()[0] - hist += fast_hist(label.flatten(), output.flatten(), cfg.NUM_CLASSES) - if verbose and index > 0 and index % 100 == 0: - print('{:d} / {:d}: {:0.2f}'.format( - index, len(test_loader), 100 * np.nanmean(per_class_iu(hist)))) - inters_over_union_classes = per_class_iu(hist) - all_res[i_iter] = inters_over_union_classes - pickle_dump(all_res, cache_path) - else: - inters_over_union_classes = all_res[i_iter] - computed_miou = round(np.nanmean(inters_over_union_classes) * 100, 2) - if cur_best_miou < computed_miou: - cur_best_miou = computed_miou - cur_best_model = restore_from - print('\tCurrent mIoU:', computed_miou) - print('\tCurrent best model:', cur_best_model) - print('\tCurrent best mIoU:', cur_best_miou) - if verbose: - display_stats(cfg, test_loader.dataset.class_names, inters_over_union_classes) - - -def load_checkpoint_for_evaluation(model, checkpoint, device): - saved_state_dict = torch.load(checkpoint) - model.load_state_dict(saved_state_dict) - model.eval() - model.cuda(device) - - -def display_stats(cfg, name_classes, inters_over_union_classes): - for ind_class in range(cfg.NUM_CLASSES): - print(name_classes[ind_class] - + '\t' + str(round(inters_over_union_classes[ind_class] * 100, 2))) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -------------------------------------------------------- +# Domain adpatation evaluation +# Copyright (c) 2019 valeo.ai +# +# Written by Tuan-Hung Vu +# -------------------------------------------------------- + +import os.path as osp +import time + +import numpy as np +import torch +from torch import nn +from tqdm import tqdm + +from advent.utils.func import per_class_iu, fast_hist +from advent.utils.serialization import pickle_dump, pickle_load + + +def evaluate_domain_adaptation( models, test_loader, cfg, + fixed_test_size=True, + verbose=True): + device = cfg.GPU_ID + interp = None + if fixed_test_size: + interp = nn.Upsample(size=(cfg.TEST.OUTPUT_SIZE_TARGET[1], cfg.TEST.OUTPUT_SIZE_TARGET[0]), mode='bilinear', align_corners=True) + # eval + if cfg.TEST.MODE == 'single': + eval_single(cfg, models, + device, test_loader, interp, fixed_test_size, + verbose) + elif cfg.TEST.MODE == 'best': + eval_best(cfg, models, + device, test_loader, interp, fixed_test_size, + verbose) + else: + raise NotImplementedError(f"Not yet supported test mode {cfg.TEST.MODE}") + + +def eval_single(cfg, models, + device, test_loader, interp, + fixed_test_size, verbose): + assert len(cfg.TEST.RESTORE_FROM) == len(models), 'Number of models are not matched' + for checkpoint, model in zip(cfg.TEST.RESTORE_FROM, models): + load_checkpoint_for_evaluation(model, checkpoint, device) + # eval + hist = np.zeros((cfg.NUM_CLASSES, cfg.NUM_CLASSES)) + for index, batch in tqdm(enumerate(test_loader)): + image, label, _, name = batch + if not fixed_test_size: + interp = nn.Upsample(size=(label.shape[1], label.shape[2]), mode='bilinear', align_corners=True) + with torch.no_grad(): + output = None + for model, model_weight in zip(models, cfg.TEST.MODEL_WEIGHT): + pred_main = model(image.cuda(device))[1] + output_ = interp(pred_main).cpu().data[0].numpy() + if output is None: + output = model_weight * output_ + else: + output += model_weight * output_ + assert output is not None, 'Output is None' + output = output.transpose(1, 2, 0) + output = np.argmax(output, axis=2) + label = label.numpy()[0] + hist += fast_hist(label.flatten(), output.flatten(), cfg.NUM_CLASSES) + inters_over_union_classes = per_class_iu(hist) + print(f'mIoU = \t{round(np.nanmean(inters_over_union_classes) * 100, 2)}') + if verbose: + display_stats(cfg, test_loader.dataset.class_names, inters_over_union_classes) + + +def eval_best(cfg, models, + device, test_loader, interp, + fixed_test_size, verbose): + assert len(models) == 1, 'Not yet supported multi models in this mode' + assert osp.exists(cfg.TEST.SNAPSHOT_DIR[0]), 'SNAPSHOT_DIR is not found' + start_iter = cfg.TEST.SNAPSHOT_STEP + step = cfg.TEST.SNAPSHOT_STEP + max_iter = cfg.TEST.SNAPSHOT_MAXITER + cache_path = osp.join(cfg.TEST.SNAPSHOT_DIR[0], 'all_res.pkl') + if osp.exists(cache_path): + all_res = pickle_load(cache_path) + else: + all_res = {} + cur_best_miou = -1 + cur_best_model = '' + for i_iter in range(start_iter, max_iter + 1, step): + restore_from = osp.join(cfg.TEST.SNAPSHOT_DIR[0], f'model_{i_iter}.pth') + if not osp.exists(restore_from): + # continue + if cfg.TEST.WAIT_MODEL: + print('Waiting for model..!') + while not osp.exists(restore_from): + time.sleep(5) + print("Evaluating model", restore_from) + if i_iter not in all_res.keys(): + load_checkpoint_for_evaluation(models[0], restore_from, device) + # eval + hist = np.zeros((cfg.NUM_CLASSES, cfg.NUM_CLASSES)) + # for index, batch in enumerate(test_loader): + # image, _, _, name = batch + test_iter = iter(test_loader) + for index in tqdm(range(len(test_loader))): + image, label, _, name = next(test_iter) + if not fixed_test_size: + interp = nn.Upsample(size=(label.shape[1], label.shape[2]), mode='bilinear', align_corners=True) + with torch.no_grad(): + pred_main = models[0](image.cuda(device))[1] + output = interp(pred_main).cpu().data[0].numpy() + output = output.transpose(1, 2, 0) + output = np.argmax(output, axis=2) + label = label.numpy()[0] + hist += fast_hist(label.flatten(), output.flatten(), cfg.NUM_CLASSES) + if verbose and index > 0 and index % 100 == 0: + print('{:d} / {:d}: {:0.2f}'.format( + index, len(test_loader), 100 * np.nanmean(per_class_iu(hist)))) + inters_over_union_classes = per_class_iu(hist) + all_res[i_iter] = inters_over_union_classes + pickle_dump(all_res, cache_path) + else: + inters_over_union_classes = all_res[i_iter] + computed_miou = round(np.nanmean(inters_over_union_classes) * 100, 2) + if cur_best_miou < computed_miou: + cur_best_miou = computed_miou + cur_best_model = restore_from + print('\tCurrent mIoU:', computed_miou) + print('\tCurrent best model:', cur_best_model) + print('\tCurrent best mIoU:', cur_best_miou) + if verbose: + display_stats(cfg, test_loader.dataset.class_names, inters_over_union_classes) + + +def load_checkpoint_for_evaluation(model, checkpoint, device): + saved_state_dict = torch.load(checkpoint) + model.load_state_dict(saved_state_dict) + model.eval() + model.cuda(device) + + +def display_stats(cfg, name_classes, inters_over_union_classes): + for ind_class in range(cfg.NUM_CLASSES): + print(name_classes[ind_class] + + '\t' + str(round(inters_over_union_classes[ind_class] * 100, 2))) diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/train_UDA.py b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/train_UDA.py index e7266b67933cb6c778d932c0e53d9c021393aa90..899fc87bdcc0ddcb46feec1b74f5001d1edd9eb3 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/train_UDA.py +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/domain_adaptation/train_UDA.py @@ -1,356 +1,356 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -------------------------------------------------------- -# Domain adpatation training -# Copyright (c) 2019 valeo.ai -# -# Written by Tuan-Hung Vu -# -------------------------------------------------------- -import os -import sys -from pathlib import Path - -import os.path as osp -import numpy as np -import torch -import torch.backends.cudnn as cudnn -import torch.nn.functional as F -import torch.optim as optim -from tensorboardX import SummaryWriter -from torch import nn -from torchvision.utils import make_grid -from tqdm import tqdm - -from advent.model.discriminator import get_fc_discriminator -from advent.utils.func import adjust_learning_rate, adjust_learning_rate_discriminator -from advent.utils.func import loss_calc, bce_loss -from advent.utils.loss import entropy_loss -from advent.utils.func import prob_2_entropy -from advent.utils.viz_segmask import colorize_mask - - -def train_advent(model, trainloader, targetloader, cfg): - ''' UDA training with advent - ''' - # Create the model and start the training. - input_size_source = cfg.TRAIN.INPUT_SIZE_SOURCE - input_size_target = cfg.TRAIN.INPUT_SIZE_TARGET - device = cfg.GPU_ID - num_classes = cfg.NUM_CLASSES - viz_tensorboard = os.path.exists(cfg.TRAIN.TENSORBOARD_LOGDIR) - if viz_tensorboard: - writer = SummaryWriter(log_dir=cfg.TRAIN.TENSORBOARD_LOGDIR) - - # SEGMNETATION NETWORK - model.train() - model.to(device) - cudnn.benchmark = True - cudnn.enabled = True - - # DISCRIMINATOR NETWORK - # feature-level - d_aux = get_fc_discriminator(num_classes=num_classes) - d_aux.train() - d_aux.to(device) - - # seg maps, i.e. output, level - d_main = get_fc_discriminator(num_classes=num_classes) - d_main.train() - d_main.to(device) - - # OPTIMIZERS - # segnet's optimizer - optimizer = optim.SGD(model.optim_parameters(cfg.TRAIN.LEARNING_RATE), - lr=cfg.TRAIN.LEARNING_RATE, - momentum=cfg.TRAIN.MOMENTUM, - weight_decay=cfg.TRAIN.WEIGHT_DECAY) - - # discriminators' optimizers - optimizer_d_aux = optim.Adam(d_aux.parameters(), lr=cfg.TRAIN.LEARNING_RATE_D, - betas=(0.9, 0.99)) - optimizer_d_main = optim.Adam(d_main.parameters(), lr=cfg.TRAIN.LEARNING_RATE_D, - betas=(0.9, 0.99)) - - # interpolate output segmaps - interp = nn.Upsample(size=(input_size_source[1], input_size_source[0]), mode='bilinear', - align_corners=True) - interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), mode='bilinear', - align_corners=True) - - # labels for adversarial training - source_label = 0 - target_label = 1 - trainloader_iter = enumerate(trainloader) - targetloader_iter = enumerate(targetloader) - for i_iter in tqdm(range(cfg.TRAIN.EARLY_STOP + 1)): - - # reset optimizers - optimizer.zero_grad() - optimizer_d_aux.zero_grad() - optimizer_d_main.zero_grad() - # adapt LR if needed - adjust_learning_rate(optimizer, i_iter, cfg) - adjust_learning_rate_discriminator(optimizer_d_aux, i_iter, cfg) - adjust_learning_rate_discriminator(optimizer_d_main, i_iter, cfg) - - # UDA Training - # only train segnet. Don't accumulate grads in disciminators - for param in d_aux.parameters(): - param.requires_grad = False - for param in d_main.parameters(): - param.requires_grad = False - # train on source - _, batch = trainloader_iter.__next__() - images_source, labels, _, _ = batch - pred_src_aux, pred_src_main = model(images_source.cuda(device)) - if cfg.TRAIN.MULTI_LEVEL: - pred_src_aux = interp(pred_src_aux) - loss_seg_src_aux = loss_calc(pred_src_aux, labels, device) - else: - loss_seg_src_aux = 0 - pred_src_main = interp(pred_src_main) - loss_seg_src_main = loss_calc(pred_src_main, labels, device) - loss = (cfg.TRAIN.LAMBDA_SEG_MAIN * loss_seg_src_main - + cfg.TRAIN.LAMBDA_SEG_AUX * loss_seg_src_aux) - loss.backward() - - # adversarial training ot fool the discriminator - _, batch = targetloader_iter.__next__() - images, _, _, _ = batch - pred_trg_aux, pred_trg_main = model(images.cuda(device)) - if cfg.TRAIN.MULTI_LEVEL: - pred_trg_aux = interp_target(pred_trg_aux) - d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_trg_aux))) - loss_adv_trg_aux = bce_loss(d_out_aux, source_label) - else: - loss_adv_trg_aux = 0 - pred_trg_main = interp_target(pred_trg_main) - d_out_main = d_main(prob_2_entropy(F.softmax(pred_trg_main))) - loss_adv_trg_main = bce_loss(d_out_main, source_label) - loss = (cfg.TRAIN.LAMBDA_ADV_MAIN * loss_adv_trg_main - + cfg.TRAIN.LAMBDA_ADV_AUX * loss_adv_trg_aux) - loss = loss - loss.backward() - - # Train discriminator networks - # enable training mode on discriminator networks - for param in d_aux.parameters(): - param.requires_grad = True - for param in d_main.parameters(): - param.requires_grad = True - # train with source - if cfg.TRAIN.MULTI_LEVEL: - pred_src_aux = pred_src_aux.detach() - d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_src_aux))) - loss_d_aux = bce_loss(d_out_aux, source_label) - loss_d_aux = loss_d_aux / 2 - loss_d_aux.backward() - pred_src_main = pred_src_main.detach() - d_out_main = d_main(prob_2_entropy(F.softmax(pred_src_main))) - loss_d_main = bce_loss(d_out_main, source_label) - loss_d_main = loss_d_main / 2 - loss_d_main.backward() - - # train with target - if cfg.TRAIN.MULTI_LEVEL: - pred_trg_aux = pred_trg_aux.detach() - d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_trg_aux))) - loss_d_aux = bce_loss(d_out_aux, target_label) - loss_d_aux = loss_d_aux / 2 - loss_d_aux.backward() - else: - loss_d_aux = 0 - pred_trg_main = pred_trg_main.detach() - d_out_main = d_main(prob_2_entropy(F.softmax(pred_trg_main))) - loss_d_main = bce_loss(d_out_main, target_label) - loss_d_main = loss_d_main / 2 - loss_d_main.backward() - - optimizer.step() - if cfg.TRAIN.MULTI_LEVEL: - optimizer_d_aux.step() - optimizer_d_main.step() - - current_losses = {'loss_seg_src_aux': loss_seg_src_aux, - 'loss_seg_src_main': loss_seg_src_main, - 'loss_adv_trg_aux': loss_adv_trg_aux, - 'loss_adv_trg_main': loss_adv_trg_main, - 'loss_d_aux': loss_d_aux, - 'loss_d_main': loss_d_main} - print_losses(current_losses, i_iter) - - if i_iter % cfg.TRAIN.SAVE_PRED_EVERY == 0 and i_iter != 0: - print('taking snapshot ...') - print('exp =', cfg.TRAIN.SNAPSHOT_DIR) - snapshot_dir = Path(cfg.TRAIN.SNAPSHOT_DIR) - torch.save(model.state_dict(), snapshot_dir / f'model_{i_iter}.pth') - torch.save(d_aux.state_dict(), snapshot_dir / f'model_{i_iter}_D_aux.pth') - torch.save(d_main.state_dict(), snapshot_dir / f'model_{i_iter}_D_main.pth') - if i_iter >= cfg.TRAIN.EARLY_STOP - 1: - break - sys.stdout.flush() - - # Visualize with tensorboard - if viz_tensorboard: - log_losses_tensorboard(writer, current_losses, i_iter) - - if i_iter % cfg.TRAIN.TENSORBOARD_VIZRATE == cfg.TRAIN.TENSORBOARD_VIZRATE - 1: - draw_in_tensorboard(writer, images, i_iter, pred_trg_main, num_classes, 'T') - draw_in_tensorboard(writer, images_source, i_iter, pred_src_main, num_classes, 'S') - - -def draw_in_tensorboard(writer, images, i_iter, pred_main, num_classes, type_): - grid_image = make_grid(images[:3].clone().cpu().data, 3, normalize=True) - writer.add_image(f'Image - {type_}', grid_image, i_iter) - - grid_image = make_grid(torch.from_numpy(np.array(colorize_mask(np.asarray( - np.argmax(F.softmax(pred_main).cpu().data[0].numpy().transpose(1, 2, 0), - axis=2), dtype=np.uint8)).convert('RGB')).transpose(2, 0, 1)), 3, - normalize=False, range=(0, 255)) - writer.add_image(f'Prediction - {type_}', grid_image, i_iter) - - output_sm = F.softmax(pred_main).cpu().data[0].numpy().transpose(1, 2, 0) - output_ent = np.sum(-np.multiply(output_sm, np.log2(output_sm)), axis=2, - keepdims=False) - grid_image = make_grid(torch.from_numpy(output_ent), 3, normalize=True, - range=(0, np.log2(num_classes))) - writer.add_image(f'Entropy - {type_}', grid_image, i_iter) - - -def train_minent(model, trainloader, targetloader, cfg): - ''' UDA training with minEnt - ''' - # Create the model and start the training. - input_size_source = cfg.TRAIN.INPUT_SIZE_SOURCE - input_size_target = cfg.TRAIN.INPUT_SIZE_TARGET - device = cfg.GPU_ID - num_classes = cfg.NUM_CLASSES - viz_tensorboard = os.path.exists(cfg.TRAIN.TENSORBOARD_LOGDIR) - if viz_tensorboard: - writer = SummaryWriter(log_dir=cfg.TRAIN.TENSORBOARD_LOGDIR) - - # SEGMNETATION NETWORK - model.train() - model.to(device) - cudnn.benchmark = True - cudnn.enabled = True - - # OPTIMIZERS - # segnet's optimizer - optimizer = optim.SGD(model.optim_parameters(cfg.TRAIN.LEARNING_RATE), - lr=cfg.TRAIN.LEARNING_RATE, - momentum=cfg.TRAIN.MOMENTUM, - weight_decay=cfg.TRAIN.WEIGHT_DECAY) - - # interpolate output segmaps - interp = nn.Upsample(size=(input_size_source[1], input_size_source[0]), mode='bilinear', - align_corners=True) - interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), mode='bilinear', - align_corners=True) - - trainloader_iter = enumerate(trainloader) - targetloader_iter = enumerate(targetloader) - for i_iter in tqdm(range(cfg.TRAIN.EARLY_STOP)): - - # reset optimizers - optimizer.zero_grad() - - # adapt LR if needed - adjust_learning_rate(optimizer, i_iter, cfg) - - # UDA Training - # train on source - _, batch = trainloader_iter.__next__() - images_source, labels, _, _ = batch - pred_src_aux, pred_src_main = model(images_source.cuda(device)) - if cfg.TRAIN.MULTI_LEVEL: - pred_src_aux = interp(pred_src_aux) - loss_seg_src_aux = loss_calc(pred_src_aux, labels, device) - else: - loss_seg_src_aux = 0 - pred_src_main = interp(pred_src_main) - loss_seg_src_main = loss_calc(pred_src_main, labels, device) - loss = (cfg.TRAIN.LAMBDA_SEG_MAIN * loss_seg_src_main - + cfg.TRAIN.LAMBDA_SEG_AUX * loss_seg_src_aux) - loss.backward() - - # adversarial training with minent - _, batch = targetloader_iter.__next__() - images, _, _, _ = batch - pred_trg_aux, pred_trg_main = model(images.cuda(device)) - pred_trg_aux = interp_target(pred_trg_aux) - pred_trg_main = interp_target(pred_trg_main) - pred_prob_trg_aux = F.softmax(pred_trg_aux) - pred_prob_trg_main = F.softmax(pred_trg_main) - - loss_target_entp_aux = entropy_loss(pred_prob_trg_aux) - loss_target_entp_main = entropy_loss(pred_prob_trg_main) - loss = (cfg.TRAIN.LAMBDA_ENT_AUX * loss_target_entp_aux - + cfg.TRAIN.LAMBDA_ENT_MAIN * loss_target_entp_main) - loss.backward() - optimizer.step() - - current_losses = {'loss_seg_src_aux': loss_seg_src_aux, - 'loss_seg_src_main': loss_seg_src_main, - 'loss_ent_aux': loss_target_entp_aux, - 'loss_ent_main': loss_target_entp_main} - - print_losses(current_losses, i_iter) - - if i_iter % cfg.TRAIN.SAVE_PRED_EVERY == 0 and i_iter != 0: - print('taking snapshot ...') - print('exp =', cfg.TRAIN.SNAPSHOT_DIR) - torch.save(model.state_dict(), - osp.join(cfg.TRAIN.SNAPSHOT_DIR, f'model_{i_iter}.pth')) - if i_iter >= cfg.TRAIN.EARLY_STOP - 1: - break - sys.stdout.flush() - - # Visualize with tensorboard - if viz_tensorboard: - log_losses_tensorboard(writer, current_losses, i_iter) - - if i_iter % cfg.TRAIN.TENSORBOARD_VIZRATE == cfg.TRAIN.TENSORBOARD_VIZRATE - 1: - draw_in_tensorboard(writer, images, i_iter, pred_trg_main, num_classes, 'T') - draw_in_tensorboard(writer, images_source, i_iter, pred_src_main, num_classes, 'S') - - -def print_losses(current_losses, i_iter): - list_strings = [] - for loss_name, loss_value in current_losses.items(): - list_strings.append(f'{loss_name} = {to_numpy(loss_value):.3f} ') - full_string = ' '.join(list_strings) - tqdm.write(f'iter = {i_iter} {full_string}') - - -def log_losses_tensorboard(writer, current_losses, i_iter): - for loss_name, loss_value in current_losses.items(): - writer.add_scalar(f'data/{loss_name}', to_numpy(loss_value), i_iter) - - -def to_numpy(tensor): - if isinstance(tensor, (int, float)): - return tensor - else: - return tensor.data.cpu().numpy() - - -def train_domain_adaptation(model, trainloader, targetloader, cfg): - if cfg.TRAIN.DA_METHOD == 'MinEnt': - train_minent(model, trainloader, targetloader, cfg) - elif cfg.TRAIN.DA_METHOD == 'AdvEnt': - train_advent(model, trainloader, targetloader, cfg) - else: - raise NotImplementedError(f"Not yet supported DA method {cfg.TRAIN.DA_METHOD}") +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -------------------------------------------------------- +# Domain adpatation training +# Copyright (c) 2019 valeo.ai +# +# Written by Tuan-Hung Vu +# -------------------------------------------------------- +import os +import sys +from pathlib import Path + +import os.path as osp +import numpy as np +import torch +import torch.backends.cudnn as cudnn +import torch.nn.functional as F +import torch.optim as optim +from tensorboardX import SummaryWriter +from torch import nn +from torchvision.utils import make_grid +from tqdm import tqdm + +from advent.model.discriminator import get_fc_discriminator +from advent.utils.func import adjust_learning_rate, adjust_learning_rate_discriminator +from advent.utils.func import loss_calc, bce_loss +from advent.utils.loss import entropy_loss +from advent.utils.func import prob_2_entropy +from advent.utils.viz_segmask import colorize_mask + + +def train_advent(model, trainloader, targetloader, cfg): + ''' UDA training with advent + ''' + # Create the model and start the training. + input_size_source = cfg.TRAIN.INPUT_SIZE_SOURCE + input_size_target = cfg.TRAIN.INPUT_SIZE_TARGET + device = cfg.GPU_ID + num_classes = cfg.NUM_CLASSES + viz_tensorboard = os.path.exists(cfg.TRAIN.TENSORBOARD_LOGDIR) + if viz_tensorboard: + writer = SummaryWriter(log_dir=cfg.TRAIN.TENSORBOARD_LOGDIR) + + # SEGMNETATION NETWORK + model.train() + model.to(device) + cudnn.benchmark = True + cudnn.enabled = True + + # DISCRIMINATOR NETWORK + # feature-level + d_aux = get_fc_discriminator(num_classes=num_classes) + d_aux.train() + d_aux.to(device) + + # seg maps, i.e. output, level + d_main = get_fc_discriminator(num_classes=num_classes) + d_main.train() + d_main.to(device) + + # OPTIMIZERS + # segnet's optimizer + optimizer = optim.SGD(model.optim_parameters(cfg.TRAIN.LEARNING_RATE), + lr=cfg.TRAIN.LEARNING_RATE, + momentum=cfg.TRAIN.MOMENTUM, + weight_decay=cfg.TRAIN.WEIGHT_DECAY) + + # discriminators' optimizers + optimizer_d_aux = optim.Adam(d_aux.parameters(), lr=cfg.TRAIN.LEARNING_RATE_D, + betas=(0.9, 0.99)) + optimizer_d_main = optim.Adam(d_main.parameters(), lr=cfg.TRAIN.LEARNING_RATE_D, + betas=(0.9, 0.99)) + + # interpolate output segmaps + interp = nn.Upsample(size=(input_size_source[1], input_size_source[0]), mode='bilinear', + align_corners=True) + interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), mode='bilinear', + align_corners=True) + + # labels for adversarial training + source_label = 0 + target_label = 1 + trainloader_iter = enumerate(trainloader) + targetloader_iter = enumerate(targetloader) + for i_iter in tqdm(range(cfg.TRAIN.EARLY_STOP + 1)): + + # reset optimizers + optimizer.zero_grad() + optimizer_d_aux.zero_grad() + optimizer_d_main.zero_grad() + # adapt LR if needed + adjust_learning_rate(optimizer, i_iter, cfg) + adjust_learning_rate_discriminator(optimizer_d_aux, i_iter, cfg) + adjust_learning_rate_discriminator(optimizer_d_main, i_iter, cfg) + + # UDA Training + # only train segnet. Don't accumulate grads in disciminators + for param in d_aux.parameters(): + param.requires_grad = False + for param in d_main.parameters(): + param.requires_grad = False + # train on source + _, batch = trainloader_iter.__next__() + images_source, labels, _, _ = batch + pred_src_aux, pred_src_main = model(images_source.cuda(device)) + if cfg.TRAIN.MULTI_LEVEL: + pred_src_aux = interp(pred_src_aux) + loss_seg_src_aux = loss_calc(pred_src_aux, labels, device) + else: + loss_seg_src_aux = 0 + pred_src_main = interp(pred_src_main) + loss_seg_src_main = loss_calc(pred_src_main, labels, device) + loss = (cfg.TRAIN.LAMBDA_SEG_MAIN * loss_seg_src_main + + cfg.TRAIN.LAMBDA_SEG_AUX * loss_seg_src_aux) + loss.backward() + + # adversarial training ot fool the discriminator + _, batch = targetloader_iter.__next__() + images, _, _, _ = batch + pred_trg_aux, pred_trg_main = model(images.cuda(device)) + if cfg.TRAIN.MULTI_LEVEL: + pred_trg_aux = interp_target(pred_trg_aux) + d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_trg_aux))) + loss_adv_trg_aux = bce_loss(d_out_aux, source_label) + else: + loss_adv_trg_aux = 0 + pred_trg_main = interp_target(pred_trg_main) + d_out_main = d_main(prob_2_entropy(F.softmax(pred_trg_main))) + loss_adv_trg_main = bce_loss(d_out_main, source_label) + loss = (cfg.TRAIN.LAMBDA_ADV_MAIN * loss_adv_trg_main + + cfg.TRAIN.LAMBDA_ADV_AUX * loss_adv_trg_aux) + loss = loss + loss.backward() + + # Train discriminator networks + # enable training mode on discriminator networks + for param in d_aux.parameters(): + param.requires_grad = True + for param in d_main.parameters(): + param.requires_grad = True + # train with source + if cfg.TRAIN.MULTI_LEVEL: + pred_src_aux = pred_src_aux.detach() + d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_src_aux))) + loss_d_aux = bce_loss(d_out_aux, source_label) + loss_d_aux = loss_d_aux / 2 + loss_d_aux.backward() + pred_src_main = pred_src_main.detach() + d_out_main = d_main(prob_2_entropy(F.softmax(pred_src_main))) + loss_d_main = bce_loss(d_out_main, source_label) + loss_d_main = loss_d_main / 2 + loss_d_main.backward() + + # train with target + if cfg.TRAIN.MULTI_LEVEL: + pred_trg_aux = pred_trg_aux.detach() + d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_trg_aux))) + loss_d_aux = bce_loss(d_out_aux, target_label) + loss_d_aux = loss_d_aux / 2 + loss_d_aux.backward() + else: + loss_d_aux = 0 + pred_trg_main = pred_trg_main.detach() + d_out_main = d_main(prob_2_entropy(F.softmax(pred_trg_main))) + loss_d_main = bce_loss(d_out_main, target_label) + loss_d_main = loss_d_main / 2 + loss_d_main.backward() + + optimizer.step() + if cfg.TRAIN.MULTI_LEVEL: + optimizer_d_aux.step() + optimizer_d_main.step() + + current_losses = {'loss_seg_src_aux': loss_seg_src_aux, + 'loss_seg_src_main': loss_seg_src_main, + 'loss_adv_trg_aux': loss_adv_trg_aux, + 'loss_adv_trg_main': loss_adv_trg_main, + 'loss_d_aux': loss_d_aux, + 'loss_d_main': loss_d_main} + print_losses(current_losses, i_iter) + + if i_iter % cfg.TRAIN.SAVE_PRED_EVERY == 0 and i_iter != 0: + print('taking snapshot ...') + print('exp =', cfg.TRAIN.SNAPSHOT_DIR) + snapshot_dir = Path(cfg.TRAIN.SNAPSHOT_DIR) + torch.save(model.state_dict(), snapshot_dir / f'model_{i_iter}.pth') + torch.save(d_aux.state_dict(), snapshot_dir / f'model_{i_iter}_D_aux.pth') + torch.save(d_main.state_dict(), snapshot_dir / f'model_{i_iter}_D_main.pth') + if i_iter >= cfg.TRAIN.EARLY_STOP - 1: + break + sys.stdout.flush() + + # Visualize with tensorboard + if viz_tensorboard: + log_losses_tensorboard(writer, current_losses, i_iter) + + if i_iter % cfg.TRAIN.TENSORBOARD_VIZRATE == cfg.TRAIN.TENSORBOARD_VIZRATE - 1: + draw_in_tensorboard(writer, images, i_iter, pred_trg_main, num_classes, 'T') + draw_in_tensorboard(writer, images_source, i_iter, pred_src_main, num_classes, 'S') + + +def draw_in_tensorboard(writer, images, i_iter, pred_main, num_classes, type_): + grid_image = make_grid(images[:3].clone().cpu().data, 3, normalize=True) + writer.add_image(f'Image - {type_}', grid_image, i_iter) + + grid_image = make_grid(torch.from_numpy(np.array(colorize_mask(np.asarray( + np.argmax(F.softmax(pred_main).cpu().data[0].numpy().transpose(1, 2, 0), + axis=2), dtype=np.uint8)).convert('RGB')).transpose(2, 0, 1)), 3, + normalize=False, range=(0, 255)) + writer.add_image(f'Prediction - {type_}', grid_image, i_iter) + + output_sm = F.softmax(pred_main).cpu().data[0].numpy().transpose(1, 2, 0) + output_ent = np.sum(-np.multiply(output_sm, np.log2(output_sm)), axis=2, + keepdims=False) + grid_image = make_grid(torch.from_numpy(output_ent), 3, normalize=True, + range=(0, np.log2(num_classes))) + writer.add_image(f'Entropy - {type_}', grid_image, i_iter) + + +def train_minent(model, trainloader, targetloader, cfg): + ''' UDA training with minEnt + ''' + # Create the model and start the training. + input_size_source = cfg.TRAIN.INPUT_SIZE_SOURCE + input_size_target = cfg.TRAIN.INPUT_SIZE_TARGET + device = cfg.GPU_ID + num_classes = cfg.NUM_CLASSES + viz_tensorboard = os.path.exists(cfg.TRAIN.TENSORBOARD_LOGDIR) + if viz_tensorboard: + writer = SummaryWriter(log_dir=cfg.TRAIN.TENSORBOARD_LOGDIR) + + # SEGMNETATION NETWORK + model.train() + model.to(device) + cudnn.benchmark = True + cudnn.enabled = True + + # OPTIMIZERS + # segnet's optimizer + optimizer = optim.SGD(model.optim_parameters(cfg.TRAIN.LEARNING_RATE), + lr=cfg.TRAIN.LEARNING_RATE, + momentum=cfg.TRAIN.MOMENTUM, + weight_decay=cfg.TRAIN.WEIGHT_DECAY) + + # interpolate output segmaps + interp = nn.Upsample(size=(input_size_source[1], input_size_source[0]), mode='bilinear', + align_corners=True) + interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), mode='bilinear', + align_corners=True) + + trainloader_iter = enumerate(trainloader) + targetloader_iter = enumerate(targetloader) + for i_iter in tqdm(range(cfg.TRAIN.EARLY_STOP)): + + # reset optimizers + optimizer.zero_grad() + + # adapt LR if needed + adjust_learning_rate(optimizer, i_iter, cfg) + + # UDA Training + # train on source + _, batch = trainloader_iter.__next__() + images_source, labels, _, _ = batch + pred_src_aux, pred_src_main = model(images_source.cuda(device)) + if cfg.TRAIN.MULTI_LEVEL: + pred_src_aux = interp(pred_src_aux) + loss_seg_src_aux = loss_calc(pred_src_aux, labels, device) + else: + loss_seg_src_aux = 0 + pred_src_main = interp(pred_src_main) + loss_seg_src_main = loss_calc(pred_src_main, labels, device) + loss = (cfg.TRAIN.LAMBDA_SEG_MAIN * loss_seg_src_main + + cfg.TRAIN.LAMBDA_SEG_AUX * loss_seg_src_aux) + loss.backward() + + # adversarial training with minent + _, batch = targetloader_iter.__next__() + images, _, _, _ = batch + pred_trg_aux, pred_trg_main = model(images.cuda(device)) + pred_trg_aux = interp_target(pred_trg_aux) + pred_trg_main = interp_target(pred_trg_main) + pred_prob_trg_aux = F.softmax(pred_trg_aux) + pred_prob_trg_main = F.softmax(pred_trg_main) + + loss_target_entp_aux = entropy_loss(pred_prob_trg_aux) + loss_target_entp_main = entropy_loss(pred_prob_trg_main) + loss = (cfg.TRAIN.LAMBDA_ENT_AUX * loss_target_entp_aux + + cfg.TRAIN.LAMBDA_ENT_MAIN * loss_target_entp_main) + loss.backward() + optimizer.step() + + current_losses = {'loss_seg_src_aux': loss_seg_src_aux, + 'loss_seg_src_main': loss_seg_src_main, + 'loss_ent_aux': loss_target_entp_aux, + 'loss_ent_main': loss_target_entp_main} + + print_losses(current_losses, i_iter) + + if i_iter % cfg.TRAIN.SAVE_PRED_EVERY == 0 and i_iter != 0: + print('taking snapshot ...') + print('exp =', cfg.TRAIN.SNAPSHOT_DIR) + torch.save(model.state_dict(), + osp.join(cfg.TRAIN.SNAPSHOT_DIR, f'model_{i_iter}.pth')) + if i_iter >= cfg.TRAIN.EARLY_STOP - 1: + break + sys.stdout.flush() + + # Visualize with tensorboard + if viz_tensorboard: + log_losses_tensorboard(writer, current_losses, i_iter) + + if i_iter % cfg.TRAIN.TENSORBOARD_VIZRATE == cfg.TRAIN.TENSORBOARD_VIZRATE - 1: + draw_in_tensorboard(writer, images, i_iter, pred_trg_main, num_classes, 'T') + draw_in_tensorboard(writer, images_source, i_iter, pred_src_main, num_classes, 'S') + + +def print_losses(current_losses, i_iter): + list_strings = [] + for loss_name, loss_value in current_losses.items(): + list_strings.append(f'{loss_name} = {to_numpy(loss_value):.3f} ') + full_string = ' '.join(list_strings) + tqdm.write(f'iter = {i_iter} {full_string}') + + +def log_losses_tensorboard(writer, current_losses, i_iter): + for loss_name, loss_value in current_losses.items(): + writer.add_scalar(f'data/{loss_name}', to_numpy(loss_value), i_iter) + + +def to_numpy(tensor): + if isinstance(tensor, (int, float)): + return tensor + else: + return tensor.data.cpu().numpy() + + +def train_domain_adaptation(model, trainloader, targetloader, cfg): + if cfg.TRAIN.DA_METHOD == 'MinEnt': + train_minent(model, trainloader, targetloader, cfg) + elif cfg.TRAIN.DA_METHOD == 'AdvEnt': + train_advent(model, trainloader, targetloader, cfg) + else: + raise NotImplementedError(f"Not yet supported DA method {cfg.TRAIN.DA_METHOD}") diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/model/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/model/__init__.py index 55887fca321859e7fe8df0054b4761e83a7b4210..945cb920d49428f81bc1c2597ffa7a2a61e8ef81 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/model/__init__.py +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/model/__init__.py @@ -1,13 +1,13 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and # limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/advent+minent_pretrained.yml b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/advent+minent_pretrained.yml index 7e274313c576a26dc1bab94889b2134713162ddf..a423a0b559a7cd13b2d126e2bc5aebe4cccdddf6 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/advent+minent_pretrained.yml +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/advent+minent_pretrained.yml @@ -1,18 +1,18 @@ -SOURCE: GTA -TARGET: Cityscapes -EXP_NAME: GTA2Cityscapes_AdvEnt_MinEnt -NUM_WORKERS: 4 -TEST: - MODE: single - MODEL: - - DeepLabv2 - - DeepLabv2 - MULTI_LEVEL: - - True - - True - RESTORE_FROM: - - ../../pretrained_models/gta2cityscapes_advent.pth - - ../../pretrained_models/gta2cityscapes_minent_ER.pth - MODEL_WEIGHT: - - 0.5 +SOURCE: GTA +TARGET: Cityscapes +EXP_NAME: GTA2Cityscapes_AdvEnt_MinEnt +NUM_WORKERS: 4 +TEST: + MODE: single + MODEL: + - DeepLabv2 + - DeepLabv2 + MULTI_LEVEL: + - True + - True + RESTORE_FROM: + - ../../pretrained_models/gta2cityscapes_advent.pth + - ../../pretrained_models/gta2cityscapes_minent_ER.pth + MODEL_WEIGHT: + - 0.5 - 0.5 \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/advent_cyclegan_pretrained.yml b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/advent_cyclegan_pretrained.yml index 881a9defef166e82c2560b6a3d78c66db23153be..3840244b80109421bb2519e94d78615cbbe48c66 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/advent_cyclegan_pretrained.yml +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/advent_cyclegan_pretrained.yml @@ -1,8 +1,8 @@ -SOURCE: GTA -TARGET: Cityscapes -EXP_NAME: GTA2Cityscapes_AdvEnt_CycleGAN -NUM_WORKERS: 4 -TEST: - MODE: single - RESTORE_FROM: +SOURCE: GTA +TARGET: Cityscapes +EXP_NAME: GTA2Cityscapes_AdvEnt_CycleGAN +NUM_WORKERS: 4 +TEST: + MODE: single + RESTORE_FROM: - ../../pretrained_models/gta2cityscapes_advent_cyclegan.pth \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/advent_pretrained.yml b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/advent_pretrained.yml index 85a2f7ac4d8a216b820d5dcb0a42ee7cb1fd2041..2e35e98857835f39081fa1c63fa70630f75c9796 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/advent_pretrained.yml +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/advent_pretrained.yml @@ -1,8 +1,8 @@ -SOURCE: GTA -TARGET: Cityscapes -EXP_NAME: GTA2Cityscapes_AdvEnt -NUM_WORKERS: 4 -TEST: - MODE: single - RESTORE_FROM: +SOURCE: GTA +TARGET: Cityscapes +EXP_NAME: GTA2Cityscapes_AdvEnt +NUM_WORKERS: 4 +TEST: + MODE: single + RESTORE_FROM: - ../../pretrained_models/gta2cityscapes_advent.pth \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/minent.yml b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/minent.yml index 3ddef6dc8fcfa1044ff5fc2d676c48482cb7c12c..6c85e845459d45ac98e1a19fd55a32176e95d300 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/minent.yml +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/minent.yml @@ -1,12 +1,12 @@ -SOURCE: GTA -TARGET: Cityscapes -NUM_WORKERS: 4 -TRAIN: - DA_METHOD: MinEnt - MODEL: DeepLabv2 - RESTORE_FROM: ../../pretrained_models/DeepLab_resnet_pretrained_imagenet.pth - MULTI_LEVEL: True - LAMBDA_ENT_MAIN: 0.001 - LAMBDA_ENT_AUX: 0.0002 -TEST: +SOURCE: GTA +TARGET: Cityscapes +NUM_WORKERS: 4 +TRAIN: + DA_METHOD: MinEnt + MODEL: DeepLabv2 + RESTORE_FROM: ../../pretrained_models/DeepLab_resnet_pretrained_imagenet.pth + MULTI_LEVEL: True + LAMBDA_ENT_MAIN: 0.001 + LAMBDA_ENT_AUX: 0.0002 +TEST: MODE: best \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/minent_pretrained.yml b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/minent_pretrained.yml index 7385d864837661ce7e7fda6f5e79db87c83e3015..5ccf3a3c17916983435c5ca0dca6fcfd86209ace 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/minent_pretrained.yml +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/configs/minent_pretrained.yml @@ -1,9 +1,9 @@ -SOURCE: GTA -TARGET: Cityscapes -EXP_NAME: GTA2Cityscapes_MinEnt -NUM_WORKERS: 4 -TEST: - MODE: single - RESTORE_FROM: - - ../../pretrained_models/gta2cityscapes_minent_ER.pth +SOURCE: GTA +TARGET: Cityscapes +EXP_NAME: GTA2Cityscapes_MinEnt +NUM_WORKERS: 4 +TEST: + MODE: single + RESTORE_FROM: + - ../../pretrained_models/gta2cityscapes_minent_ER.pth # - ../../pretrained_models/gta2cityscapes_minent.pth \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/test.py b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/test.py index d6545bbdc329cf8ebfcd0504a7eeeda59435bf9f..f88cfed0c9805e8a37c20594740f4d94e77960f0 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/test.py +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/test.py @@ -1,109 +1,109 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -------------------------------------------------------- -# AdvEnt training -# Copyright (c) 2019 valeo.ai -# -# Written by Tuan-Hung Vu -# -------------------------------------------------------- -import sys -sys.path.append('/home/feipan/IntraDA/ADVENT') -import pdb - -import argparse -import os -import os.path as osp -import pprint -import warnings - -from torch.utils import data - -from advent.model.deeplabv2 import get_deeplab_v2 -from advent.dataset.cityscapes import CityscapesDataSet -from advent.domain_adaptation.config import cfg, cfg_from_file -from advent.domain_adaptation.eval_UDA import evaluate_domain_adaptation - -warnings.filterwarnings("ignore", message="numpy.dtype size changed") -warnings.filterwarnings("ignore") - - - -def get_arguments(): - """ - Parse input arguments - """ - parser = argparse.ArgumentParser(description="Code for evaluation") - parser.add_argument('--cfg', type=str, default=None, - help='optional config file', ) - parser.add_argument("--exp-suffix", type=str, default=None, - help="optional experiment suffix") - return parser.parse_args() - - -def main(config_file, exp_suffix): - # LOAD ARGS - assert config_file is not None, 'Missing cfg file' - cfg_from_file(config_file) - # auto-generate exp name if not specified - if cfg.EXP_NAME == '': - cfg.EXP_NAME = f'{cfg.SOURCE}2{cfg.TARGET}_{cfg.TRAIN.MODEL}_{cfg.TRAIN.DA_METHOD}' - if exp_suffix: - cfg.EXP_NAME += f'_{exp_suffix}' - # auto-generate snapshot path if not specified - if cfg.TEST.SNAPSHOT_DIR[0] == '': - cfg.TEST.SNAPSHOT_DIR[0] = osp.join(cfg.EXP_ROOT_SNAPSHOT, cfg.EXP_NAME) - os.makedirs(cfg.TEST.SNAPSHOT_DIR[0], exist_ok=True) - - print('Using config:') - pprint.pprint(cfg) - # load models - models = [] - n_models = len(cfg.TEST.MODEL) - if cfg.TEST.MODE == 'best': - assert n_models == 1, 'Not yet supported' - for i in range(n_models): - if cfg.TEST.MODEL[i] == 'DeepLabv2': - model = get_deeplab_v2(num_classes=cfg.NUM_CLASSES, - multi_level=cfg.TEST.MULTI_LEVEL[i]) - else: - raise NotImplementedError(f"Not yet supported {cfg.TEST.MODEL[i]}") - models.append(model) - - if os.environ.get('ADVENT_DRY_RUN', '0') == '1': - return - - # dataloaders - pdb.set_trace() - test_dataset = CityscapesDataSet(root=cfg.DATA_DIRECTORY_TARGET, - list_path=cfg.DATA_LIST_TARGET, - set=cfg.TEST.SET_TARGET, - info_path=cfg.TEST.INFO_TARGET, - crop_size=cfg.TEST.INPUT_SIZE_TARGET, - mean=cfg.TEST.IMG_MEAN, - labels_size=cfg.TEST.OUTPUT_SIZE_TARGET) - test_loader = data.DataLoader(test_dataset, - batch_size=cfg.TEST.BATCH_SIZE_TARGET, - num_workers=cfg.NUM_WORKERS, - shuffle=False, - pin_memory=True) - # eval - evaluate_domain_adaptation(models, test_loader, cfg) - - -if __name__ == '__main__': - args = get_arguments() - print('Called with args:') - print(args) - main(args.cfg, args.exp_suffix) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -------------------------------------------------------- +# AdvEnt training +# Copyright (c) 2019 valeo.ai +# +# Written by Tuan-Hung Vu +# -------------------------------------------------------- +import sys +sys.path.append('/home/feipan/IntraDA/ADVENT') +import pdb + +import argparse +import os +import os.path as osp +import pprint +import warnings + +from torch.utils import data + +from advent.model.deeplabv2 import get_deeplab_v2 +from advent.dataset.cityscapes import CityscapesDataSet +from advent.domain_adaptation.config import cfg, cfg_from_file +from advent.domain_adaptation.eval_UDA import evaluate_domain_adaptation + +warnings.filterwarnings("ignore", message="numpy.dtype size changed") +warnings.filterwarnings("ignore") + + + +def get_arguments(): + """ + Parse input arguments + """ + parser = argparse.ArgumentParser(description="Code for evaluation") + parser.add_argument('--cfg', type=str, default=None, + help='optional config file', ) + parser.add_argument("--exp-suffix", type=str, default=None, + help="optional experiment suffix") + return parser.parse_args() + + +def main(config_file, exp_suffix): + # LOAD ARGS + assert config_file is not None, 'Missing cfg file' + cfg_from_file(config_file) + # auto-generate exp name if not specified + if cfg.EXP_NAME == '': + cfg.EXP_NAME = f'{cfg.SOURCE}2{cfg.TARGET}_{cfg.TRAIN.MODEL}_{cfg.TRAIN.DA_METHOD}' + if exp_suffix: + cfg.EXP_NAME += f'_{exp_suffix}' + # auto-generate snapshot path if not specified + if cfg.TEST.SNAPSHOT_DIR[0] == '': + cfg.TEST.SNAPSHOT_DIR[0] = osp.join(cfg.EXP_ROOT_SNAPSHOT, cfg.EXP_NAME) + os.makedirs(cfg.TEST.SNAPSHOT_DIR[0], exist_ok=True) + + print('Using config:') + pprint.pprint(cfg) + # load models + models = [] + n_models = len(cfg.TEST.MODEL) + if cfg.TEST.MODE == 'best': + assert n_models == 1, 'Not yet supported' + for i in range(n_models): + if cfg.TEST.MODEL[i] == 'DeepLabv2': + model = get_deeplab_v2(num_classes=cfg.NUM_CLASSES, + multi_level=cfg.TEST.MULTI_LEVEL[i]) + else: + raise NotImplementedError(f"Not yet supported {cfg.TEST.MODEL[i]}") + models.append(model) + + if os.environ.get('ADVENT_DRY_RUN', '0') == '1': + return + + # dataloaders + pdb.set_trace() + test_dataset = CityscapesDataSet(root=cfg.DATA_DIRECTORY_TARGET, + list_path=cfg.DATA_LIST_TARGET, + set=cfg.TEST.SET_TARGET, + info_path=cfg.TEST.INFO_TARGET, + crop_size=cfg.TEST.INPUT_SIZE_TARGET, + mean=cfg.TEST.IMG_MEAN, + labels_size=cfg.TEST.OUTPUT_SIZE_TARGET) + test_loader = data.DataLoader(test_dataset, + batch_size=cfg.TEST.BATCH_SIZE_TARGET, + num_workers=cfg.NUM_WORKERS, + shuffle=False, + pin_memory=True) + # eval + evaluate_domain_adaptation(models, test_loader, cfg) + + +if __name__ == '__main__': + args = get_arguments() + print('Called with args:') + print(args) + main(args.cfg, args.exp_suffix) diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/train.py b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/train.py index 80a915ab207ca3fa3820ae08f94b2a61146449ab..3109bd535d9fb0aee80d3ec8b9e4e11224c95b0a 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/train.py +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/scripts/train.py @@ -1,159 +1,159 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -------------------------------------------------------- -# AdvEnt training -# Copyright (c) 2019 valeo.ai -# -# Written by Tuan-Hung Vu -# -------------------------------------------------------- -import argparse -import os -import os.path as osp -import pprint -import random -import warnings - -import numpy as np -import yaml -import torch -from torch.utils import data - -from advent.model.deeplabv2 import get_deeplab_v2 -from advent.dataset.gta5 import GTA5DataSet -from advent.dataset.cityscapes import CityscapesDataSet -from advent.domain_adaptation.config import cfg, cfg_from_file -from advent.domain_adaptation.train_UDA import train_domain_adaptation - -warnings.filterwarnings("ignore", message="numpy.dtype size changed") -warnings.filterwarnings("ignore") - - -def get_arguments(): - """ - Parse input arguments - """ - parser = argparse.ArgumentParser(description="Code for domain adaptation (DA) training") - parser.add_argument('--cfg', type=str, default=None, - help='optional config file', ) - parser.add_argument("--random-train", action="store_true", - help="not fixing random seed.") - parser.add_argument("--tensorboard", action="store_true", - help="visualize training loss with tensorboardX.") - parser.add_argument("--viz-every-iter", type=int, default=None, - help="visualize results.") - parser.add_argument("--exp-suffix", type=str, default=None, - help="optional experiment suffix") - return parser.parse_args() - - -def main(): - # LOAD ARGS - args = get_arguments() - print('Called with args:') - print(args) - - assert args.cfg is not None, 'Missing cfg file' - cfg_from_file(args.cfg) - # auto-generate exp name if not specified - if cfg.EXP_NAME == '': - cfg.EXP_NAME = f'{cfg.SOURCE}2{cfg.TARGET}_{cfg.TRAIN.MODEL}_{cfg.TRAIN.DA_METHOD}' - - if args.exp_suffix: - cfg.EXP_NAME += f'_{args.exp_suffix}' - # auto-generate snapshot path if not specified - if cfg.TRAIN.SNAPSHOT_DIR == '': - cfg.TRAIN.SNAPSHOT_DIR = osp.join(cfg.EXP_ROOT_SNAPSHOT, cfg.EXP_NAME) - os.makedirs(cfg.TRAIN.SNAPSHOT_DIR, exist_ok=True) - # tensorboard - if args.tensorboard: - if cfg.TRAIN.TENSORBOARD_LOGDIR == '': - cfg.TRAIN.TENSORBOARD_LOGDIR = osp.join(cfg.EXP_ROOT_LOGS, 'tensorboard', cfg.EXP_NAME) - os.makedirs(cfg.TRAIN.TENSORBOARD_LOGDIR, exist_ok=True) - if args.viz_every_iter is not None: - cfg.TRAIN.TENSORBOARD_VIZRATE = args.viz_every_iter - else: - cfg.TRAIN.TENSORBOARD_LOGDIR = '' - print('Using config:') - pprint.pprint(cfg) - - # INIT - _init_fn = None - if not args.random_train: - torch.manual_seed(cfg.TRAIN.RANDOM_SEED) - torch.cuda.manual_seed(cfg.TRAIN.RANDOM_SEED) - np.random.seed(cfg.TRAIN.RANDOM_SEED) - random.seed(cfg.TRAIN.RANDOM_SEED) - - def _init_fn(worker_id): - np.random.seed(cfg.TRAIN.RANDOM_SEED + worker_id) - - if os.environ.get('ADVENT_DRY_RUN', '0') == '1': - return - - # LOAD SEGMENTATION NET - assert osp.exists(cfg.TRAIN.RESTORE_FROM), f'Missing init model {cfg.TRAIN.RESTORE_FROM}' - if cfg.TRAIN.MODEL == 'DeepLabv2': - model = get_deeplab_v2(num_classes=cfg.NUM_CLASSES, multi_level=cfg.TRAIN.MULTI_LEVEL) - saved_state_dict = torch.load(cfg.TRAIN.RESTORE_FROM) - if 'DeepLab_resnet_pretrained_imagenet' in cfg.TRAIN.RESTORE_FROM: - new_params = model.state_dict().copy() - for i in saved_state_dict: - i_parts = i.split('.') - if not i_parts[1] == 'layer5': - new_params['.'.join(i_parts[1:])] = saved_state_dict[i] - model.load_state_dict(new_params) - else: - model.load_state_dict(saved_state_dict) - else: - raise NotImplementedError(f"Not yet supported {cfg.TRAIN.MODEL}") - print('Model loaded') - - # DATALOADERS - source_dataset = GTA5DataSet(root=cfg.DATA_DIRECTORY_SOURCE, - list_path=cfg.DATA_LIST_SOURCE, - set=cfg.TRAIN.SET_SOURCE, - max_iters=cfg.TRAIN.MAX_ITERS * cfg.TRAIN.BATCH_SIZE_SOURCE, - crop_size=cfg.TRAIN.INPUT_SIZE_SOURCE, - mean=cfg.TRAIN.IMG_MEAN) - source_loader = data.DataLoader(source_dataset, - batch_size=cfg.TRAIN.BATCH_SIZE_SOURCE, - num_workers=cfg.NUM_WORKERS, - shuffle=True, - pin_memory=True, - worker_init_fn=_init_fn) - - target_dataset = CityscapesDataSet(root=cfg.DATA_DIRECTORY_TARGET, - list_path=cfg.DATA_LIST_TARGET, - set=cfg.TRAIN.SET_TARGET, - info_path=cfg.TRAIN.INFO_TARGET, - max_iters=cfg.TRAIN.MAX_ITERS * cfg.TRAIN.BATCH_SIZE_TARGET, - crop_size=cfg.TRAIN.INPUT_SIZE_TARGET, - mean=cfg.TRAIN.IMG_MEAN) - target_loader = data.DataLoader(target_dataset, - batch_size=cfg.TRAIN.BATCH_SIZE_TARGET, - num_workers=cfg.NUM_WORKERS, - shuffle=True, - pin_memory=True, - worker_init_fn=_init_fn) - - with open(osp.join(cfg.TRAIN.SNAPSHOT_DIR, 'train_cfg.yml'), 'w') as yaml_file: - yaml.dump(cfg, yaml_file, default_flow_style=False) - - # UDA TRAINING - train_domain_adaptation(model, source_loader, target_loader, cfg) - - -if __name__ == '__main__': - main() +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -------------------------------------------------------- +# AdvEnt training +# Copyright (c) 2019 valeo.ai +# +# Written by Tuan-Hung Vu +# -------------------------------------------------------- +import argparse +import os +import os.path as osp +import pprint +import random +import warnings + +import numpy as np +import yaml +import torch +from torch.utils import data + +from advent.model.deeplabv2 import get_deeplab_v2 +from advent.dataset.gta5 import GTA5DataSet +from advent.dataset.cityscapes import CityscapesDataSet +from advent.domain_adaptation.config import cfg, cfg_from_file +from advent.domain_adaptation.train_UDA import train_domain_adaptation + +warnings.filterwarnings("ignore", message="numpy.dtype size changed") +warnings.filterwarnings("ignore") + + +def get_arguments(): + """ + Parse input arguments + """ + parser = argparse.ArgumentParser(description="Code for domain adaptation (DA) training") + parser.add_argument('--cfg', type=str, default=None, + help='optional config file', ) + parser.add_argument("--random-train", action="store_true", + help="not fixing random seed.") + parser.add_argument("--tensorboard", action="store_true", + help="visualize training loss with tensorboardX.") + parser.add_argument("--viz-every-iter", type=int, default=None, + help="visualize results.") + parser.add_argument("--exp-suffix", type=str, default=None, + help="optional experiment suffix") + return parser.parse_args() + + +def main(): + # LOAD ARGS + args = get_arguments() + print('Called with args:') + print(args) + + assert args.cfg is not None, 'Missing cfg file' + cfg_from_file(args.cfg) + # auto-generate exp name if not specified + if cfg.EXP_NAME == '': + cfg.EXP_NAME = f'{cfg.SOURCE}2{cfg.TARGET}_{cfg.TRAIN.MODEL}_{cfg.TRAIN.DA_METHOD}' + + if args.exp_suffix: + cfg.EXP_NAME += f'_{args.exp_suffix}' + # auto-generate snapshot path if not specified + if cfg.TRAIN.SNAPSHOT_DIR == '': + cfg.TRAIN.SNAPSHOT_DIR = osp.join(cfg.EXP_ROOT_SNAPSHOT, cfg.EXP_NAME) + os.makedirs(cfg.TRAIN.SNAPSHOT_DIR, exist_ok=True) + # tensorboard + if args.tensorboard: + if cfg.TRAIN.TENSORBOARD_LOGDIR == '': + cfg.TRAIN.TENSORBOARD_LOGDIR = osp.join(cfg.EXP_ROOT_LOGS, 'tensorboard', cfg.EXP_NAME) + os.makedirs(cfg.TRAIN.TENSORBOARD_LOGDIR, exist_ok=True) + if args.viz_every_iter is not None: + cfg.TRAIN.TENSORBOARD_VIZRATE = args.viz_every_iter + else: + cfg.TRAIN.TENSORBOARD_LOGDIR = '' + print('Using config:') + pprint.pprint(cfg) + + # INIT + _init_fn = None + if not args.random_train: + torch.manual_seed(cfg.TRAIN.RANDOM_SEED) + torch.cuda.manual_seed(cfg.TRAIN.RANDOM_SEED) + np.random.seed(cfg.TRAIN.RANDOM_SEED) + random.seed(cfg.TRAIN.RANDOM_SEED) + + def _init_fn(worker_id): + np.random.seed(cfg.TRAIN.RANDOM_SEED + worker_id) + + if os.environ.get('ADVENT_DRY_RUN', '0') == '1': + return + + # LOAD SEGMENTATION NET + assert osp.exists(cfg.TRAIN.RESTORE_FROM), f'Missing init model {cfg.TRAIN.RESTORE_FROM}' + if cfg.TRAIN.MODEL == 'DeepLabv2': + model = get_deeplab_v2(num_classes=cfg.NUM_CLASSES, multi_level=cfg.TRAIN.MULTI_LEVEL) + saved_state_dict = torch.load(cfg.TRAIN.RESTORE_FROM) + if 'DeepLab_resnet_pretrained_imagenet' in cfg.TRAIN.RESTORE_FROM: + new_params = model.state_dict().copy() + for i in saved_state_dict: + i_parts = i.split('.') + if not i_parts[1] == 'layer5': + new_params['.'.join(i_parts[1:])] = saved_state_dict[i] + model.load_state_dict(new_params) + else: + model.load_state_dict(saved_state_dict) + else: + raise NotImplementedError(f"Not yet supported {cfg.TRAIN.MODEL}") + print('Model loaded') + + # DATALOADERS + source_dataset = GTA5DataSet(root=cfg.DATA_DIRECTORY_SOURCE, + list_path=cfg.DATA_LIST_SOURCE, + set=cfg.TRAIN.SET_SOURCE, + max_iters=cfg.TRAIN.MAX_ITERS * cfg.TRAIN.BATCH_SIZE_SOURCE, + crop_size=cfg.TRAIN.INPUT_SIZE_SOURCE, + mean=cfg.TRAIN.IMG_MEAN) + source_loader = data.DataLoader(source_dataset, + batch_size=cfg.TRAIN.BATCH_SIZE_SOURCE, + num_workers=cfg.NUM_WORKERS, + shuffle=True, + pin_memory=True, + worker_init_fn=_init_fn) + + target_dataset = CityscapesDataSet(root=cfg.DATA_DIRECTORY_TARGET, + list_path=cfg.DATA_LIST_TARGET, + set=cfg.TRAIN.SET_TARGET, + info_path=cfg.TRAIN.INFO_TARGET, + max_iters=cfg.TRAIN.MAX_ITERS * cfg.TRAIN.BATCH_SIZE_TARGET, + crop_size=cfg.TRAIN.INPUT_SIZE_TARGET, + mean=cfg.TRAIN.IMG_MEAN) + target_loader = data.DataLoader(target_dataset, + batch_size=cfg.TRAIN.BATCH_SIZE_TARGET, + num_workers=cfg.NUM_WORKERS, + shuffle=True, + pin_memory=True, + worker_init_fn=_init_fn) + + with open(osp.join(cfg.TRAIN.SNAPSHOT_DIR, 'train_cfg.yml'), 'w') as yaml_file: + yaml.dump(cfg, yaml_file, default_flow_style=False) + + # UDA TRAINING + train_domain_adaptation(model, source_loader, target_loader, cfg) + + +if __name__ == '__main__': + main() diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/utils/func.py b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/utils/func.py index a57a26d522c9e169ccad5c561b297dbccb5b4cfe..0ff54adcd3010608be3cfd0b58478b9da322feaa 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/utils/func.py +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/utils/func.py @@ -1,75 +1,75 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import torch -import torch.nn as nn - -from advent.utils.loss import cross_entropy_2d - - -def bce_loss(y_pred, y_label): - y_truth_tensor = torch.FloatTensor(y_pred.size()) - y_truth_tensor.fill_(y_label) - y_truth_tensor = y_truth_tensor.to(y_pred.get_device()) - return nn.BCEWithLogitsLoss()(y_pred, y_truth_tensor) - - -def loss_calc(pred, label, device): - """ - This function returns cross entropy loss for semantic segmentation - """ - # out shape batch_size x channels x h x w -> batch_size x channels x h x w - # label shape h x w x 1 x batch_size -> batch_size x 1 x h x w - label = label.long().to(device) - return cross_entropy_2d(pred, label) - - -def lr_poly(base_lr, iter, max_iter, power): - """ Poly_LR scheduler - """ - return base_lr * ((1 - float(iter) / max_iter) ** power) - - -def _adjust_learning_rate(optimizer, i_iter, cfg, learning_rate): - lr = lr_poly(learning_rate, i_iter, cfg.TRAIN.MAX_ITERS, cfg.TRAIN.POWER) - optimizer.param_groups[0]['lr'] = lr - if len(optimizer.param_groups) > 1: - optimizer.param_groups[1]['lr'] = lr * 10 - - -def adjust_learning_rate(optimizer, i_iter, cfg): - """ adject learning rate for main segnet - """ - _adjust_learning_rate(optimizer, i_iter, cfg, cfg.TRAIN.LEARNING_RATE) - - -def adjust_learning_rate_discriminator(optimizer, i_iter, cfg): - _adjust_learning_rate(optimizer, i_iter, cfg, cfg.TRAIN.LEARNING_RATE_D) - - -def prob_2_entropy(prob): - """ convert probabilistic prediction maps to weighted self-information maps - """ - n, c, h, w = prob.size() - return -torch.mul(prob, torch.log2(prob + 1e-30)) / np.log2(c) - - -def fast_hist(a, b, n): - k = (a >= 0) & (a < n) - return np.bincount(n * a[k].astype(int) + b[k], minlength=n ** 2).reshape(n, n) - - -def per_class_iu(hist): - return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import torch +import torch.nn as nn + +from advent.utils.loss import cross_entropy_2d + + +def bce_loss(y_pred, y_label): + y_truth_tensor = torch.FloatTensor(y_pred.size()) + y_truth_tensor.fill_(y_label) + y_truth_tensor = y_truth_tensor.to(y_pred.get_device()) + return nn.BCEWithLogitsLoss()(y_pred, y_truth_tensor) + + +def loss_calc(pred, label, device): + """ + This function returns cross entropy loss for semantic segmentation + """ + # out shape batch_size x channels x h x w -> batch_size x channels x h x w + # label shape h x w x 1 x batch_size -> batch_size x 1 x h x w + label = label.long().to(device) + return cross_entropy_2d(pred, label) + + +def lr_poly(base_lr, iter, max_iter, power): + """ Poly_LR scheduler + """ + return base_lr * ((1 - float(iter) / max_iter) ** power) + + +def _adjust_learning_rate(optimizer, i_iter, cfg, learning_rate): + lr = lr_poly(learning_rate, i_iter, cfg.TRAIN.MAX_ITERS, cfg.TRAIN.POWER) + optimizer.param_groups[0]['lr'] = lr + if len(optimizer.param_groups) > 1: + optimizer.param_groups[1]['lr'] = lr * 10 + + +def adjust_learning_rate(optimizer, i_iter, cfg): + """ adject learning rate for main segnet + """ + _adjust_learning_rate(optimizer, i_iter, cfg, cfg.TRAIN.LEARNING_RATE) + + +def adjust_learning_rate_discriminator(optimizer, i_iter, cfg): + _adjust_learning_rate(optimizer, i_iter, cfg, cfg.TRAIN.LEARNING_RATE_D) + + +def prob_2_entropy(prob): + """ convert probabilistic prediction maps to weighted self-information maps + """ + n, c, h, w = prob.size() + return -torch.mul(prob, torch.log2(prob + 1e-30)) / np.log2(c) + + +def fast_hist(a, b, n): + k = (a >= 0) & (a < n) + return np.bincount(n * a[k].astype(int) + b[k], minlength=n ** 2).reshape(n, n) + + +def per_class_iu(hist): + return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/utils/viz_segmask.py b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/utils/viz_segmask.py index 0fc82ad3ec2aa717de7fb257324bda28c830b9b2..949396b6c78c9a95e2e6a8f5bdf78df3a137b9fd 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/utils/viz_segmask.py +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/advent/utils/viz_segmask.py @@ -1,34 +1,34 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -from PIL import Image - -palette = [128, 64, 128, 244, 35, 232, 70, 70, 70, 102, 102, 156, - 190, 153, 153, 153, 153, 153, 250, - 170, 30, - 220, 220, 0, 107, 142, 35, 152, 251, 152, - 70, 130, 180, 220, 20, 60, 255, 0, 0, 0, 0, - 142, 0, 0, 70, - 0, 60, 100, 0, 80, 100, 0, 0, 230, 119, 11, 32] -zero_pad = 256 * 3 - len(palette) -for i in range(zero_pad): - palette.append(0) - - -def colorize_mask(mask): - # mask: numpy array of the mask - new_mask = Image.fromarray(mask.astype(np.uint8)).convert('P') - new_mask.putpalette(palette) - return new_mask +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from PIL import Image + +palette = [128, 64, 128, 244, 35, 232, 70, 70, 70, 102, 102, 156, + 190, 153, 153, 153, 153, 153, 250, + 170, 30, + 220, 220, 0, 107, 142, 35, 152, 251, 152, + 70, 130, 180, 220, 20, 60, 255, 0, 0, 0, 0, + 142, 0, 0, 70, + 0, 60, 100, 0, 80, 100, 0, 0, 230, 119, 11, 32] +zero_pad = 256 * 3 - len(palette) +for i in range(zero_pad): + palette.append(0) + + +def colorize_mask(mask): + # mask: numpy array of the mask + new_mask = Image.fromarray(mask.astype(np.uint8)).convert('P') + new_mask.putpalette(palette) + return new_mask diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/modelzoo_level.txt b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/modelzoo_level.txt +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/requirements.txt b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/requirements.txt index fa5371544150086c29001df6fadecfcffe27b5ee..e03d22e457e28db0000e1ea2235adb1a432521c3 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/requirements.txt +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/ADVENT/requirements.txt @@ -1,2 +1,2 @@ -torch==1.5.0 -torchvision==0.6.0 +torch==1.5.0 +torchvision==0.6.0 diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/LICENSE b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/LICENSE index eeac88fb9dc15a1427b41173cf5f136327230c49..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/LICENSE +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/README.md b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/README.md index 2de9b5ac6a87e095d9b9e79e1ae48e90c4129058..22445b46eb4b62dfdfec1fcec6d784886d66950f 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/README.md +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/README.md @@ -1,77 +1,77 @@ -# IntraDA - -- 参考实现: -``` -url=https://github.com/feipan664/IntraDA.git -branch=master -commit_id=070b0b702fe94a34288eba4ca990410b5aaadc4a -``` - -## IntraDA Detail - -- 增加了混合精度训练 -- 增加了多卡分布式训练 -- 优化了loss在NPU上的计算效率 - -## Requirements - -- CANN 5.0.2 -- torch 1.5.0+ascend.post3.20210824 -- apex 0.1+ascend.20210824 -- 安装ADVENT - ``` - cd IntraDA/ADVENT - pip3 install -e . - ``` -- 下载[CityScapes数据集](https://www.cityscapes-dataset.com/downloads/) - 在IntraDA/ADVENT目录下创建data文件夹,将数据集按照如下结构放入data目录: - ``` - |-- ADVENT - | |-- data - | | `-- Cityscapes - | | |-- gtFine - | | `-- leftImg8bit - ``` -- 下载以下两个预训练模型: - ImageNet预训练模型: - https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/semantic_segmentation/IntraDA/DeepLab_resnet_pretrained_imagenet.pth - ADVENT warmup模型: - https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/semantic_segmentation/IntraDA/gta2cityscapes_advent.pth - 在IntraDA/ADVENT目录下创建pretrained_models文件夹,将以上2个模型放入改文件夹,目录结构如下: - ``` - |-- ADVENT - | |-- pretrained_models - | | |-- DeepLab_resnet_pretrained_imagenet.pth - | | `-- gta2cityscapes_advent.pth - ``` -- 生成训练用的伪标签及数据集分组文件: - ``` - cd IntraDA/entropy_rank/ - bash gen_color_mask_npu.sh - ``` - - -## Training - -```bash -cd IntraDA/intrada - -# 1p train perf 运行 500 step, 输出 performance_1p.log 文件 -bash test/train_performance_1p.sh - -# 8p train perf 运行 500 step, 输出 performance_8p.log 文件 -bash test/train_performance_8p.sh - -# 8p train full 完整训练并保存checkpoints,中间不会测试 -bash test/train_full_8p.sh - -# eval 测试8p训练保存的 checkpoints 得到精度信息 -bash test/train_eval_8p.sh -``` - -## IntraDA training result - -| mIoU | FPS | Npu_nums | Epochs | AMP_Type | -| :------: | :------: | :------: | :------: | :------: | -| | 2.7 | 1 | - | O2 | -| 42.55 | 21 | 8 | - | O2 | +# IntraDA + +- 参考实现: +``` +url=https://github.com/feipan664/IntraDA.git +branch=master +commit_id=070b0b702fe94a34288eba4ca990410b5aaadc4a +``` + +## IntraDA Detail + +- 增加了混合精度训练 +- 增加了多卡分布式训练 +- 优化了loss在NPU上的计算效率 + +## Requirements + +- CANN 5.0.2 +- torch 1.5.0+ascend.post3.20210824 +- apex 0.1+ascend.20210824 +- 安装ADVENT + ``` + cd IntraDA/ADVENT + pip3 install -e . + ``` +- 下载[CityScapes数据集](https://www.cityscapes-dataset.com/downloads/) + 在IntraDA/ADVENT目录下创建data文件夹,将数据集按照如下结构放入data目录: + ``` + |-- ADVENT + | |-- data + | | `-- Cityscapes + | | |-- gtFine + | | `-- leftImg8bit + ``` +- 下载以下两个预训练模型: + ImageNet预训练模型: + https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/semantic_segmentation/IntraDA/DeepLab_resnet_pretrained_imagenet.pth + ADVENT warmup模型: + https://ascend-pytorch-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/semantic_segmentation/IntraDA/gta2cityscapes_advent.pth + 在IntraDA/ADVENT目录下创建pretrained_models文件夹,将以上2个模型放入改文件夹,目录结构如下: + ``` + |-- ADVENT + | |-- pretrained_models + | | |-- DeepLab_resnet_pretrained_imagenet.pth + | | `-- gta2cityscapes_advent.pth + ``` +- 生成训练用的伪标签及数据集分组文件: + ``` + cd IntraDA/entropy_rank/ + bash gen_color_mask_npu.sh + ``` + + +## Training + +```bash +cd IntraDA/intrada + +# 1p train perf 运行 500 step, 输出 performance_1p.log 文件 +bash test/train_performance_1p.sh + +# 8p train perf 运行 500 step, 输出 performance_8p.log 文件 +bash test/train_performance_8p.sh + +# 8p train full 完整训练并保存checkpoints,中间不会测试 +bash test/train_full_8p.sh + +# eval 测试8p训练保存的 checkpoints 得到精度信息 +bash test/train_eval_8p.sh +``` + +## IntraDA training result + +| mIoU | FPS | Npu_nums | Epochs | AMP_Type | +| :------: | :------: | :------: | :------: | :------: | +| | 2.7 | 1 | - | O2 | +| 42.55 | 21 | 8 | - | O2 | diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/figure/adaptsegnet.PNG b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/figure/adaptsegnet.PNG deleted file mode 100644 index a9267aadc430b587f9641ab9820d4c9d3e35be2e..0000000000000000000000000000000000000000 Binary files a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/figure/adaptsegnet.PNG and /dev/null differ diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/figure/advent.PNG b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/figure/advent.PNG deleted file mode 100644 index d737e14f12538b08e715becc5e1b95e6ed19fa20..0000000000000000000000000000000000000000 Binary files a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/figure/advent.PNG and /dev/null differ diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/intrada/test.py b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/intrada/test.py index 77bf700ab35061280ad6277fac340b3941714b8d..9830c7b3a3bf490403e642d7a99b4723be3c1f5b 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/intrada/test.py +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/intrada/test.py @@ -1,114 +1,114 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#-------------------------------------------------------------------- -# modified from "ADVENT/advent/scripts/test.py" by Tuan-Hung Vu -#-------------------------------------------------------------------- -import argparse -import os -import os.path as osp -import pprint -import warnings -import torch - -from torch.utils import data - -from advent.model.deeplabv2 import get_deeplab_v2 -from advent.dataset.cityscapes import CityscapesDataSet -from advent.domain_adaptation.config import cfg, cfg_from_file -# from advent.domain_adaptation.eval_UDA import evaluate_domain_adaptation -from eval_UDA import evaluate_domain_adaptation - -warnings.filterwarnings("ignore", message="numpy.dtype size changed") -warnings.filterwarnings("ignore") - - -def get_arguments(): - """ - Parse input arguments - """ - parser = argparse.ArgumentParser(description="Code for evaluation") - parser.add_argument('--cfg', type=str, default=None, - help='optional config file', ) - parser.add_argument("--exp-suffix", type=str, default=None, - help="optional experiment suffix") - parser.add_argument('--device_type', type=str, default='npu') - parser.add_argument('--device_id', type=int, ) - return parser.parse_args() - - -def main(config_file, exp_suffix): - # LOAD ARGS - assert config_file is not None, 'Missing cfg file' - cfg_from_file(config_file) - # auto-generate exp name if not specified - # pdb.set_trace() - if cfg.EXP_NAME == '': - cfg.EXP_NAME = f'{cfg.SOURCE}2{cfg.TARGET}_{cfg.TRAIN.MODEL}_{cfg.TRAIN.DA_METHOD}' - if exp_suffix: - cfg.EXP_NAME += f'_{exp_suffix}' - # auto-generate snapshot path if not specified - # pdb.set_trace() - if cfg.TEST.SNAPSHOT_DIR[0] == '': - cfg.TEST.SNAPSHOT_DIR[0] = osp.join(cfg.EXP_ROOT_SNAPSHOT, cfg.EXP_NAME) - os.makedirs(cfg.TEST.SNAPSHOT_DIR[0], exist_ok=True) - - device = torch.device("{}:{}".format(args.device_type, args.device_id)) - if args.device_type == 'npu': - torch.npu.set_device(args.device_id) - elif args.device_type == 'cuda': - torch.cuda.set_device(args.device_id) - - print('Using config:') - pprint.pprint(cfg) - # load models - models = [] - n_models = len(cfg.TEST.MODEL) - if cfg.TEST.MODE == 'best': - assert n_models == 1, 'Not yet supported' - for i in range(n_models): - if cfg.TEST.MODEL[i] == 'DeepLabv2': - model = get_deeplab_v2(num_classes=cfg.NUM_CLASSES, - multi_level=cfg.TEST.MULTI_LEVEL[i]) - else: - raise NotImplementedError(f"Not yet supported {cfg.TEST.MODEL[i]}") - models.append(model) - - if os.environ.get('ADVENT_DRY_RUN', '0') == '1': - return - - # dataloaders - # pdb.set_trace() - test_dataset = CityscapesDataSet(root=cfg.DATA_DIRECTORY_TARGET, - list_path='../ADVENT/advent/dataset/cityscapes_list/{}.txt', - set=cfg.TEST.SET_TARGET, - info_path=cfg.TEST.INFO_TARGET, - crop_size=cfg.TEST.INPUT_SIZE_TARGET, - mean=cfg.TEST.IMG_MEAN, - labels_size=cfg.TEST.OUTPUT_SIZE_TARGET) - test_loader = data.DataLoader(test_dataset, - batch_size=cfg.TEST.BATCH_SIZE_TARGET, - num_workers=cfg.NUM_WORKERS, - shuffle=False, - pin_memory=True) - # eval - # pdb.set_trace() - evaluate_domain_adaptation(models, test_loader, cfg, device) - - -if __name__ == '__main__': - args = get_arguments() - print('Called with args:') - print(args) - main(args.cfg, args.exp_suffix) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#-------------------------------------------------------------------- +# modified from "ADVENT/advent/scripts/test.py" by Tuan-Hung Vu +#-------------------------------------------------------------------- +import argparse +import os +import os.path as osp +import pprint +import warnings +import torch + +from torch.utils import data + +from advent.model.deeplabv2 import get_deeplab_v2 +from advent.dataset.cityscapes import CityscapesDataSet +from advent.domain_adaptation.config import cfg, cfg_from_file +# from advent.domain_adaptation.eval_UDA import evaluate_domain_adaptation +from eval_UDA import evaluate_domain_adaptation + +warnings.filterwarnings("ignore", message="numpy.dtype size changed") +warnings.filterwarnings("ignore") + + +def get_arguments(): + """ + Parse input arguments + """ + parser = argparse.ArgumentParser(description="Code for evaluation") + parser.add_argument('--cfg', type=str, default=None, + help='optional config file', ) + parser.add_argument("--exp-suffix", type=str, default=None, + help="optional experiment suffix") + parser.add_argument('--device_type', type=str, default='npu') + parser.add_argument('--device_id', type=int, ) + return parser.parse_args() + + +def main(config_file, exp_suffix): + # LOAD ARGS + assert config_file is not None, 'Missing cfg file' + cfg_from_file(config_file) + # auto-generate exp name if not specified + # pdb.set_trace() + if cfg.EXP_NAME == '': + cfg.EXP_NAME = f'{cfg.SOURCE}2{cfg.TARGET}_{cfg.TRAIN.MODEL}_{cfg.TRAIN.DA_METHOD}' + if exp_suffix: + cfg.EXP_NAME += f'_{exp_suffix}' + # auto-generate snapshot path if not specified + # pdb.set_trace() + if cfg.TEST.SNAPSHOT_DIR[0] == '': + cfg.TEST.SNAPSHOT_DIR[0] = osp.join(cfg.EXP_ROOT_SNAPSHOT, cfg.EXP_NAME) + os.makedirs(cfg.TEST.SNAPSHOT_DIR[0], exist_ok=True) + + device = torch.device("{}:{}".format(args.device_type, args.device_id)) + if args.device_type == 'npu': + torch.npu.set_device(args.device_id) + elif args.device_type == 'cuda': + torch.cuda.set_device(args.device_id) + + print('Using config:') + pprint.pprint(cfg) + # load models + models = [] + n_models = len(cfg.TEST.MODEL) + if cfg.TEST.MODE == 'best': + assert n_models == 1, 'Not yet supported' + for i in range(n_models): + if cfg.TEST.MODEL[i] == 'DeepLabv2': + model = get_deeplab_v2(num_classes=cfg.NUM_CLASSES, + multi_level=cfg.TEST.MULTI_LEVEL[i]) + else: + raise NotImplementedError(f"Not yet supported {cfg.TEST.MODEL[i]}") + models.append(model) + + if os.environ.get('ADVENT_DRY_RUN', '0') == '1': + return + + # dataloaders + # pdb.set_trace() + test_dataset = CityscapesDataSet(root=cfg.DATA_DIRECTORY_TARGET, + list_path='../ADVENT/advent/dataset/cityscapes_list/{}.txt', + set=cfg.TEST.SET_TARGET, + info_path=cfg.TEST.INFO_TARGET, + crop_size=cfg.TEST.INPUT_SIZE_TARGET, + mean=cfg.TEST.IMG_MEAN, + labels_size=cfg.TEST.OUTPUT_SIZE_TARGET) + test_loader = data.DataLoader(test_dataset, + batch_size=cfg.TEST.BATCH_SIZE_TARGET, + num_workers=cfg.NUM_WORKERS, + shuffle=False, + pin_memory=True) + # eval + # pdb.set_trace() + evaluate_domain_adaptation(models, test_loader, cfg, device) + + +if __name__ == '__main__': + args = get_arguments() + print('Called with args:') + print(args) + main(args.cfg, args.exp_suffix) diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/intrada/train.py b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/intrada/train.py index ce51c5899142a38ff2be0330976f2dedc43b9cdc..89fbde254ae220ad769d69e1201c06e60f3fb0cb 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/intrada/train.py +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/intrada/train.py @@ -1,197 +1,197 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#-------------------------------------------------------------------- -# modified from "ADVENT/advent/scripts/train.py" by Tuan-Hung Vu -#-------------------------------------------------------------------- -import argparse -import os -import os.path as osp -import pprint -import random -import warnings - -import numpy as np -import yaml -import torch -from torch.utils import data - -from advent.model.deeplabv2 import get_deeplab_v2 -from advent.dataset.gta5 import GTA5DataSet -from advent.dataset.cityscapes import CityscapesDataSet as CityscapesDataSet_hard -from cityscapes import CityscapesDataSet as CityscapesDataSet_easy -from advent.domain_adaptation.config import cfg, cfg_from_file -from train_UDA import train_domain_adaptation - - -warnings.filterwarnings("ignore", message="numpy.dtype size changed") -warnings.filterwarnings("ignore") - - -def get_arguments(): - """ - Parse input arguments - """ - parser = argparse.ArgumentParser(description="Code for domain adaptation (DA) training") - parser.add_argument('--cfg', type=str, default=None, - help='optional config file', ) - parser.add_argument("--random-train", action="store_true", - help="not fixing random seed.") - parser.add_argument("--tensorboard", action="store_true", - help="visualize training loss with tensorboardX.") - parser.add_argument("--viz_every_iter", type=int, default=None, - help="visualize results.") - parser.add_argument("--exp-suffix", type=str, default=None, - help="optional experiment suffix") - parser.add_argument('--rank', type=int, default=0) - parser.add_argument('--device_type', type=str, default='npu') - parser.add_argument('--device_id', type=int, ) - parser.add_argument('--world_size', type=int, default=1) - parser.add_argument('--distributed', action='store_true', default=False) - parser.add_argument('--performance_log', action='store_true', default=False) - return parser.parse_args() - - -def main(): - # LOAD ARGS - args = get_arguments() - print('Called with args:') - print(args) - - # pdb.set_trace() - - assert args.cfg is not None, 'Missing cfg file' - cfg_from_file(args.cfg) - cfg.distributed = args.distributed - ddp_backend = "hccl" if args.device_type == "npu" else "nccl" - - if cfg.distributed: - torch.distributed.init_process_group(backend=ddp_backend, world_size=args.world_size, rank=args.rank) - device = torch.device("{}:{}".format(args.device_type ,args.device_id)) - cfg.device_id = args.device_id - cfg.rank = args.rank - cfg.world_size = args.world_size - cfg.device_type = args.device_type - cfg.performance_log = args.performance_log - if args.device_type == 'cuda': - torch.cuda.set_device(args.device_id) - elif args.device_type == 'npu': - torch.npu.set_device(args.device_id) - - cfg.is_master_node = args.world_size == 1 or args.device_id == 0 - - # auto-generate exp name if not specified - if cfg.EXP_NAME == '': - cfg.EXP_NAME = f'{cfg.SOURCE}2{cfg.TARGET}_{cfg.TRAIN.MODEL}_{cfg.TRAIN.DA_METHOD}' - - if args.exp_suffix: - cfg.EXP_NAME += f'_{args.exp_suffix}' - # auto-generate snapshot path if not specified - if cfg.TRAIN.SNAPSHOT_DIR == '': - cfg.TRAIN.SNAPSHOT_DIR = osp.join(cfg.EXP_ROOT_SNAPSHOT, cfg.EXP_NAME) - os.makedirs(cfg.TRAIN.SNAPSHOT_DIR, exist_ok=True) - # tensorboard - if args.tensorboard: - if cfg.TRAIN.TENSORBOARD_LOGDIR == '': - cfg.TRAIN.TENSORBOARD_LOGDIR = osp.join(cfg.EXP_ROOT_LOGS, 'tensorboard', cfg.EXP_NAME) - os.makedirs(cfg.TRAIN.TENSORBOARD_LOGDIR, exist_ok=True) - if args.viz_every_iter is not None: - cfg.TRAIN.TENSORBOARD_VIZRATE = args.viz_every_iter - else: - cfg.TRAIN.TENSORBOARD_LOGDIR = '' - if cfg.is_master_node: - print('Using config:') - pprint.pprint(cfg) - - # INIT - _init_fn = None - if not args.random_train: - torch.manual_seed(cfg.TRAIN.RANDOM_SEED) - if args.device_type == 'cuda': - torch.cuda.manual_seed(cfg.TRAIN.RANDOM_SEED) - elif args.device_type == 'npu': - torch.npu.manual_seed(cfg.TRAIN.RANDOM_SEED) - np.random.seed(cfg.TRAIN.RANDOM_SEED) - random.seed(cfg.TRAIN.RANDOM_SEED) - - def _init_fn(worker_id): - np.random.seed(cfg.TRAIN.RANDOM_SEED + worker_id) - - if os.environ.get('ADVENT_DRY_RUN', '0') == '1': - return - - # LOAD SEGMENTATION NET - assert osp.exists(cfg.TRAIN.RESTORE_FROM), f'Missing init model {cfg.TRAIN.RESTORE_FROM}' - if cfg.TRAIN.MODEL == 'DeepLabv2': - model = get_deeplab_v2(num_classes=cfg.NUM_CLASSES, multi_level=cfg.TRAIN.MULTI_LEVEL) - saved_state_dict = torch.load(cfg.TRAIN.RESTORE_FROM) - if 'DeepLab_resnet_pretrained_imagenet' in cfg.TRAIN.RESTORE_FROM: - new_params = model.state_dict().copy() - for i in saved_state_dict: - i_parts = i.split('.') - if not i_parts[1] == 'layer5': - new_params['.'.join(i_parts[1:])] = saved_state_dict[i] - model.load_state_dict(new_params) - else: - model.load_state_dict(saved_state_dict) - else: - raise NotImplementedError(f"Not yet supported {cfg.TRAIN.MODEL}") - print('Model loaded') - - # DATALOADERS - # pdb.set_trace() - easy_dataset = CityscapesDataSet_easy(root=cfg.DATA_DIRECTORY_SOURCE, - list_path=cfg.DATA_LIST_SOURCE, - max_iters=cfg.TRAIN.MAX_ITERS * cfg.TRAIN.BATCH_SIZE_SOURCE * args.world_size, - crop_size=cfg.TRAIN.INPUT_SIZE_SOURCE, - mean=cfg.TRAIN.IMG_MEAN) - if cfg.distributed: - easy_sampler = torch.utils.data.distributed.DistributedSampler(easy_dataset) - easy_loader = data.DataLoader(easy_dataset, - batch_size=cfg.TRAIN.BATCH_SIZE_SOURCE, - num_workers=cfg.NUM_WORKERS, - shuffle=(not cfg.distributed), - pin_memory=False, - sampler=easy_sampler if cfg.distributed else None, - worker_init_fn=_init_fn) - - # pdb.set_trace() - hard_dataset = CityscapesDataSet_hard(root=cfg.DATA_DIRECTORY_TARGET, - list_path=cfg.DATA_LIST_TARGET, - set=cfg.TRAIN.SET_TARGET, - info_path=cfg.TRAIN.INFO_TARGET, - max_iters=cfg.TRAIN.MAX_ITERS * cfg.TRAIN.BATCH_SIZE_TARGET * args.world_size, - crop_size=cfg.TRAIN.INPUT_SIZE_TARGET, - mean=cfg.TRAIN.IMG_MEAN) - if cfg.distributed: - hard_sampler = torch.utils.data.distributed.DistributedSampler(hard_dataset) - hard_loader = data.DataLoader(hard_dataset, - batch_size=cfg.TRAIN.BATCH_SIZE_TARGET, - num_workers=cfg.NUM_WORKERS, - shuffle=(not cfg.distributed), - pin_memory=False, - sampler=hard_sampler if cfg.distributed else None, - worker_init_fn=_init_fn) - - with open(osp.join(cfg.TRAIN.SNAPSHOT_DIR, 'train_cfg.yml'), 'w') as yaml_file: - yaml.dump(cfg, yaml_file, default_flow_style=False) - - # pdb.set_trace() - train_domain_adaptation(model, easy_loader, hard_loader, device, cfg) - - -if __name__ == '__main__': - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '29688' - main() +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#-------------------------------------------------------------------- +# modified from "ADVENT/advent/scripts/train.py" by Tuan-Hung Vu +#-------------------------------------------------------------------- +import argparse +import os +import os.path as osp +import pprint +import random +import warnings + +import numpy as np +import yaml +import torch +from torch.utils import data + +from advent.model.deeplabv2 import get_deeplab_v2 +from advent.dataset.gta5 import GTA5DataSet +from advent.dataset.cityscapes import CityscapesDataSet as CityscapesDataSet_hard +from cityscapes import CityscapesDataSet as CityscapesDataSet_easy +from advent.domain_adaptation.config import cfg, cfg_from_file +from train_UDA import train_domain_adaptation + + +warnings.filterwarnings("ignore", message="numpy.dtype size changed") +warnings.filterwarnings("ignore") + + +def get_arguments(): + """ + Parse input arguments + """ + parser = argparse.ArgumentParser(description="Code for domain adaptation (DA) training") + parser.add_argument('--cfg', type=str, default=None, + help='optional config file', ) + parser.add_argument("--random-train", action="store_true", + help="not fixing random seed.") + parser.add_argument("--tensorboard", action="store_true", + help="visualize training loss with tensorboardX.") + parser.add_argument("--viz_every_iter", type=int, default=None, + help="visualize results.") + parser.add_argument("--exp-suffix", type=str, default=None, + help="optional experiment suffix") + parser.add_argument('--rank', type=int, default=0) + parser.add_argument('--device_type', type=str, default='npu') + parser.add_argument('--device_id', type=int, ) + parser.add_argument('--world_size', type=int, default=1) + parser.add_argument('--distributed', action='store_true', default=False) + parser.add_argument('--performance_log', action='store_true', default=False) + return parser.parse_args() + + +def main(): + # LOAD ARGS + args = get_arguments() + print('Called with args:') + print(args) + + # pdb.set_trace() + + assert args.cfg is not None, 'Missing cfg file' + cfg_from_file(args.cfg) + cfg.distributed = args.distributed + ddp_backend = "hccl" if args.device_type == "npu" else "nccl" + + if cfg.distributed: + torch.distributed.init_process_group(backend=ddp_backend, world_size=args.world_size, rank=args.rank) + device = torch.device("{}:{}".format(args.device_type ,args.device_id)) + cfg.device_id = args.device_id + cfg.rank = args.rank + cfg.world_size = args.world_size + cfg.device_type = args.device_type + cfg.performance_log = args.performance_log + if args.device_type == 'cuda': + torch.cuda.set_device(args.device_id) + elif args.device_type == 'npu': + torch.npu.set_device(args.device_id) + + cfg.is_master_node = args.world_size == 1 or args.device_id == 0 + + # auto-generate exp name if not specified + if cfg.EXP_NAME == '': + cfg.EXP_NAME = f'{cfg.SOURCE}2{cfg.TARGET}_{cfg.TRAIN.MODEL}_{cfg.TRAIN.DA_METHOD}' + + if args.exp_suffix: + cfg.EXP_NAME += f'_{args.exp_suffix}' + # auto-generate snapshot path if not specified + if cfg.TRAIN.SNAPSHOT_DIR == '': + cfg.TRAIN.SNAPSHOT_DIR = osp.join(cfg.EXP_ROOT_SNAPSHOT, cfg.EXP_NAME) + os.makedirs(cfg.TRAIN.SNAPSHOT_DIR, exist_ok=True) + # tensorboard + if args.tensorboard: + if cfg.TRAIN.TENSORBOARD_LOGDIR == '': + cfg.TRAIN.TENSORBOARD_LOGDIR = osp.join(cfg.EXP_ROOT_LOGS, 'tensorboard', cfg.EXP_NAME) + os.makedirs(cfg.TRAIN.TENSORBOARD_LOGDIR, exist_ok=True) + if args.viz_every_iter is not None: + cfg.TRAIN.TENSORBOARD_VIZRATE = args.viz_every_iter + else: + cfg.TRAIN.TENSORBOARD_LOGDIR = '' + if cfg.is_master_node: + print('Using config:') + pprint.pprint(cfg) + + # INIT + _init_fn = None + if not args.random_train: + torch.manual_seed(cfg.TRAIN.RANDOM_SEED) + if args.device_type == 'cuda': + torch.cuda.manual_seed(cfg.TRAIN.RANDOM_SEED) + elif args.device_type == 'npu': + torch.npu.manual_seed(cfg.TRAIN.RANDOM_SEED) + np.random.seed(cfg.TRAIN.RANDOM_SEED) + random.seed(cfg.TRAIN.RANDOM_SEED) + + def _init_fn(worker_id): + np.random.seed(cfg.TRAIN.RANDOM_SEED + worker_id) + + if os.environ.get('ADVENT_DRY_RUN', '0') == '1': + return + + # LOAD SEGMENTATION NET + assert osp.exists(cfg.TRAIN.RESTORE_FROM), f'Missing init model {cfg.TRAIN.RESTORE_FROM}' + if cfg.TRAIN.MODEL == 'DeepLabv2': + model = get_deeplab_v2(num_classes=cfg.NUM_CLASSES, multi_level=cfg.TRAIN.MULTI_LEVEL) + saved_state_dict = torch.load(cfg.TRAIN.RESTORE_FROM) + if 'DeepLab_resnet_pretrained_imagenet' in cfg.TRAIN.RESTORE_FROM: + new_params = model.state_dict().copy() + for i in saved_state_dict: + i_parts = i.split('.') + if not i_parts[1] == 'layer5': + new_params['.'.join(i_parts[1:])] = saved_state_dict[i] + model.load_state_dict(new_params) + else: + model.load_state_dict(saved_state_dict) + else: + raise NotImplementedError(f"Not yet supported {cfg.TRAIN.MODEL}") + print('Model loaded') + + # DATALOADERS + # pdb.set_trace() + easy_dataset = CityscapesDataSet_easy(root=cfg.DATA_DIRECTORY_SOURCE, + list_path=cfg.DATA_LIST_SOURCE, + max_iters=cfg.TRAIN.MAX_ITERS * cfg.TRAIN.BATCH_SIZE_SOURCE * args.world_size, + crop_size=cfg.TRAIN.INPUT_SIZE_SOURCE, + mean=cfg.TRAIN.IMG_MEAN) + if cfg.distributed: + easy_sampler = torch.utils.data.distributed.DistributedSampler(easy_dataset) + easy_loader = data.DataLoader(easy_dataset, + batch_size=cfg.TRAIN.BATCH_SIZE_SOURCE, + num_workers=cfg.NUM_WORKERS, + shuffle=(not cfg.distributed), + pin_memory=False, + sampler=easy_sampler if cfg.distributed else None, + worker_init_fn=_init_fn) + + # pdb.set_trace() + hard_dataset = CityscapesDataSet_hard(root=cfg.DATA_DIRECTORY_TARGET, + list_path=cfg.DATA_LIST_TARGET, + set=cfg.TRAIN.SET_TARGET, + info_path=cfg.TRAIN.INFO_TARGET, + max_iters=cfg.TRAIN.MAX_ITERS * cfg.TRAIN.BATCH_SIZE_TARGET * args.world_size, + crop_size=cfg.TRAIN.INPUT_SIZE_TARGET, + mean=cfg.TRAIN.IMG_MEAN) + if cfg.distributed: + hard_sampler = torch.utils.data.distributed.DistributedSampler(hard_dataset) + hard_loader = data.DataLoader(hard_dataset, + batch_size=cfg.TRAIN.BATCH_SIZE_TARGET, + num_workers=cfg.NUM_WORKERS, + shuffle=(not cfg.distributed), + pin_memory=False, + sampler=hard_sampler if cfg.distributed else None, + worker_init_fn=_init_fn) + + with open(osp.join(cfg.TRAIN.SNAPSHOT_DIR, 'train_cfg.yml'), 'w') as yaml_file: + yaml.dump(cfg, yaml_file, default_flow_style=False) + + # pdb.set_trace() + train_domain_adaptation(model, easy_loader, hard_loader, device, cfg) + + +if __name__ == '__main__': + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = '29688' + main() diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/intrada/train_UDA.py b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/intrada/train_UDA.py index 417abb8798ca32446db271efc7433a4b4844304e..a27de683550f4d570430b737918c8449eabe3b27 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/intrada/train_UDA.py +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/intrada/train_UDA.py @@ -1,469 +1,469 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#-------------------------------------------------------------------- -# modified from "ADVENT/advent/domain_adaptation/train_UDA.py" by Tuan-Hung Vu -#-------------------------------------------------------------------- -import os -import sys -import time -from pathlib import Path - -import os.path as osp -import numpy as np -import torch -#import torch.backends.cudnn as cudnn -import torch.nn.functional as F -import torch.optim as optim -from tensorboardX import SummaryWriter -from torch import nn -from torchvision.utils import make_grid -from tqdm import tqdm -from collections import OrderedDict - -from advent.model.discriminator import get_fc_discriminator -from advent.utils.func import adjust_learning_rate, adjust_learning_rate_discriminator -from advent.utils.func import loss_calc, bce_loss -from advent.utils.loss import entropy_loss -from advent.utils.func import prob_2_entropy -from advent.utils.viz_segmask import colorize_mask - -import apex -from apex import amp - -def load_checkpoint_for_evaluation(model, checkpoint, device): - saved_state_dict = torch.load(checkpoint, map_location="cpu") - new_state_dict = OrderedDict() - for k,v in saved_state_dict.items(): - if k[:7] != "module.": - name = k - else: - name = k[7:] - new_state_dict[name] = v - model.load_state_dict(new_state_dict) - model.eval() - model.to(device) - - -def train_advent(model, trainloader, targetloader, device, cfg): - ''' UDA training with advent - ''' - # Create the model and start the training. - # pdb.set_trace() - input_size_source = cfg.TRAIN.INPUT_SIZE_SOURCE - input_size_target = cfg.TRAIN.INPUT_SIZE_TARGET - # device = cfg.GPU_ID - num_classes = cfg.NUM_CLASSES - viz_tensorboard = os.path.exists(cfg.TRAIN.TENSORBOARD_LOGDIR) - if viz_tensorboard: - writer = SummaryWriter(log_dir=cfg.TRAIN.TENSORBOARD_LOGDIR) - - print(device) - # SEGMNETATION NETWORK - model.train() - model.to(device) - # cudnn.benchmark = True - # cudnn.enabled = True - - # DISCRIMINATOR NETWORK - # feature-level - d_aux = get_fc_discriminator(num_classes=num_classes) - d_aux.train() - d_aux.to(device) - # restore_from = cfg.TRAIN.RESTORE_FROM_aux - # print("Load Discriminator:", restore_from) - # load_checkpoint_for_evaluation(d_aux, restore_from, device) - - - # seg maps, i.e. output, level - d_main = get_fc_discriminator(num_classes=num_classes) - d_main.train() - d_main.to(device) - - # restore_from = cfg.TRAIN.RESTORE_FROM_main - # print("Load Discriminator:", restore_from) - # load_checkpoint_for_evaluation(d_main, restore_from, device) - - # OPTIMIZERS - # segnet's optimizer - optimizer = optim.SGD(model.optim_parameters(cfg.TRAIN.LEARNING_RATE), - lr=cfg.TRAIN.LEARNING_RATE, - momentum=cfg.TRAIN.MOMENTUM, - weight_decay=cfg.TRAIN.WEIGHT_DECAY) - model, optimizer = amp.initialize(model, optimizer, opt_level="O0",loss_scale=128.0) - if cfg.distributed: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[cfg.device_id], find_unused_parameters=True) - - # discriminators' optimizers - optimizer_d_aux = optim.Adam(d_aux.parameters(), lr=cfg.TRAIN.LEARNING_RATE_D, - betas=(0.9, 0.99)) - optimizer_d_main = optim.Adam(d_main.parameters(), lr=cfg.TRAIN.LEARNING_RATE_D, - betas=(0.9, 0.99)) - d_aux, optimizer_d_aux = amp.initialize(d_aux, optimizer_d_aux, opt_level="O0",loss_scale=128.0) - d_main, optimizer_d_main = amp.initialize(d_main, optimizer_d_main, opt_level="O0",loss_scale=128.0) - if cfg.distributed: - d_aux = torch.nn.parallel.DistributedDataParallel(d_aux, device_ids=[cfg.device_id],find_unused_parameters=True) - d_main = torch.nn.parallel.DistributedDataParallel(d_main, device_ids=[cfg.device_id], find_unused_parameters=True) - - # interpolate output segmaps - interp = nn.Upsample(size=(input_size_source[1], input_size_source[0]), mode='bilinear', - align_corners=True) - interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), mode='bilinear', - align_corners=True) - - # labels for adversarial training - source_label = 0 - target_label = 1 - trainloader_iter = enumerate(trainloader) - targetloader_iter = enumerate(targetloader) - for i_iter in tqdm(range(cfg.TRAIN.EARLY_STOP)): - - # reset optimizers - optimizer.zero_grad() - optimizer_d_aux.zero_grad() - optimizer_d_main.zero_grad() - # adapt LR if needed - adjust_learning_rate(optimizer, i_iter, cfg) - adjust_learning_rate_discriminator(optimizer_d_aux, i_iter, cfg) - adjust_learning_rate_discriminator(optimizer_d_main, i_iter, cfg) - - # UDA Training - # only train segnet. Don't accumulate grads in disciminators - for param in d_aux.parameters(): - param.requires_grad = False - for param in d_main.parameters(): - param.requires_grad = False - # train on source - _, batch = trainloader_iter.__next__() - images_source, labels, _, _ = batch - images_source, labels = images_source.to(device), labels.to(device) - # debug: - # labels=labels.numpy() - # from matplotlib import pyplot as plt - # import numpy as np - # plt.figure(1), plt.imshow(labels[0]), plt.ion(), plt.colorbar(), plt.show() - pred_src_aux, pred_src_main = model(images_source) - if cfg.TRAIN.MULTI_LEVEL: - pred_src_aux = interp(pred_src_aux) - loss_seg_src_aux = loss_calc(pred_src_aux, labels, device) - else: - loss_seg_src_aux = 0 - pred_src_main = interp(pred_src_main) - loss_seg_src_main = loss_calc(pred_src_main, labels, device) - # pdb.set_trace() - loss = (cfg.TRAIN.LAMBDA_SEG_MAIN * loss_seg_src_main - + cfg.TRAIN.LAMBDA_SEG_AUX * loss_seg_src_aux) - #loss.backward() - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - - # adversarial training ot fool the discriminator - _, batch = targetloader_iter.__next__() - images, _, _, _ = batch - images = images.to(device) - pred_trg_aux, pred_trg_main = model(images) - if cfg.TRAIN.MULTI_LEVEL: - pred_trg_aux = interp_target(pred_trg_aux) - d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_trg_aux))) - loss_adv_trg_aux = bce_loss(d_out_aux, source_label) - else: - loss_adv_trg_aux = 0 - pred_trg_main = interp_target(pred_trg_main) - d_out_main = d_main(prob_2_entropy(F.softmax(pred_trg_main))) - loss_adv_trg_main = bce_loss(d_out_main, source_label) - loss = (cfg.TRAIN.LAMBDA_ADV_MAIN * loss_adv_trg_main - + cfg.TRAIN.LAMBDA_ADV_AUX * loss_adv_trg_aux) - loss = loss - #loss.backward() - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - - # Train discriminator networks - # enable training mode on discriminator networks - for param in d_aux.parameters(): - param.requires_grad = True - for param in d_main.parameters(): - param.requires_grad = True - # train with source - if cfg.TRAIN.MULTI_LEVEL: - pred_src_aux = pred_src_aux.detach() - d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_src_aux))) - loss_d_aux = bce_loss(d_out_aux, source_label) - loss_d_aux = loss_d_aux / 2 - with amp.scale_loss(loss_d_aux, optimizer_d_aux) as scaled_loss: - scaled_loss.backward() - # loss_d_aux.backward() - pred_src_main = pred_src_main.detach() - d_out_main = d_main(prob_2_entropy(F.softmax(pred_src_main))) - loss_d_main = bce_loss(d_out_main, source_label) - loss_d_main = loss_d_main / 2 - #loss_d_main.backward() - with amp.scale_loss(loss_d_main, optimizer_d_main) as scaled_loss: - scaled_loss.backward() - - # train with target - if cfg.TRAIN.MULTI_LEVEL: - pred_trg_aux = pred_trg_aux.detach() - d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_trg_aux))) - loss_d_aux = bce_loss(d_out_aux, target_label) - loss_d_aux = loss_d_aux / 2 - #loss_d_aux.backward() - with amp.scale_loss(loss_d_aux, optimizer_d_aux) as scaled_loss: - scaled_loss.backward() - else: - loss_d_aux = 0 - pred_trg_main = pred_trg_main.detach() - d_out_main = d_main(prob_2_entropy(F.softmax(pred_trg_main))) - loss_d_main = bce_loss(d_out_main, target_label) - loss_d_main = loss_d_main / 2 - #loss_d_main.backward() - with amp.scale_loss(loss_d_main, optimizer_d_main) as scaled_loss: - scaled_loss.backward() - - optimizer.step() - if cfg.TRAIN.MULTI_LEVEL: - optimizer_d_aux.step() - optimizer_d_main.step() - - current_losses = {'loss_seg_src_aux': loss_seg_src_aux, - 'loss_seg_src_main': loss_seg_src_main, - 'loss_adv_trg_aux': loss_adv_trg_aux, - 'loss_adv_trg_main': loss_adv_trg_main, - 'loss_d_aux': loss_d_aux, - 'loss_d_main': loss_d_main} - print_losses(current_losses, i_iter) - - if i_iter % cfg.TRAIN.SAVE_PRED_EVERY == 0 and i_iter != 0 and cfg.rank == 0: - print('taking snapshot ...') - print('exp =', cfg.TRAIN.SNAPSHOT_DIR) - snapshot_dir = Path(cfg.TRAIN.SNAPSHOT_DIR) - torch.save(model.state_dict(), snapshot_dir / f'model_{i_iter}.pth') - torch.save(d_aux.state_dict(), snapshot_dir / f'model_{i_iter}_D_aux.pth') - torch.save(d_main.state_dict(), snapshot_dir / f'model_{i_iter}_D_main.pth') - if i_iter >= cfg.TRAIN.EARLY_STOP - 1: - break - sys.stdout.flush() - - # Visualize with tensorboard - if viz_tensorboard: - log_losses_tensorboard(writer, current_losses, i_iter) - - if i_iter % cfg.TRAIN.TENSORBOARD_VIZRATE == cfg.TRAIN.TENSORBOARD_VIZRATE - 1: - draw_in_tensorboard(writer, images, i_iter, pred_trg_main, num_classes, 'T') - draw_in_tensorboard(writer, images_source, i_iter, pred_src_main, num_classes, 'S') - - -def draw_in_tensorboard(writer, images, i_iter, pred_main, num_classes, type_): - grid_image = make_grid(images[:3].clone().cpu().data, 3, normalize=True) - writer.add_image(f'Image - {type_}', grid_image, i_iter) - - grid_image = make_grid(torch.from_numpy(np.array(colorize_mask(np.asarray( - np.argmax(F.softmax(pred_main).cpu().data[0].numpy().transpose(1, 2, 0), - axis=2), dtype=np.uint8)).convert('RGB')).transpose(2, 0, 1)), 3, - normalize=False, range=(0, 255)) - writer.add_image(f'Prediction - {type_}', grid_image, i_iter) - - output_sm = F.softmax(pred_main).cpu().data[0].numpy().transpose(1, 2, 0) - output_ent = np.sum(-np.multiply(output_sm, np.log2(output_sm)), axis=2, - keepdims=False) - grid_image = make_grid(torch.from_numpy(output_ent), 3, normalize=True, - range=(0, np.log2(num_classes))) - writer.add_image(f'Entropy - {type_}', grid_image, i_iter) - - -def train_minent(model, trainloader, targetloader, device, cfg): - ''' UDA training with minEnt - ''' - # Create the model and start the training. - input_size_source = cfg.TRAIN.INPUT_SIZE_SOURCE - input_size_target = cfg.TRAIN.INPUT_SIZE_TARGET - # device = cfg.GPU_ID - num_classes = cfg.NUM_CLASSES - viz_tensorboard = os.path.exists(cfg.TRAIN.TENSORBOARD_LOGDIR) - if viz_tensorboard: - writer = SummaryWriter(log_dir=cfg.TRAIN.TENSORBOARD_LOGDIR) - - # SEGMNETATION NETWORK - model.train() - model.to(device) - # cudnn.benchmark = True - # cudnn.enabled = True - - # OPTIMIZERS - # segnet's optimizer - # optimizer_fn = apex.optimizers.NpuFusedSGD if cfg.device_type == 'npu' else optim.SGD - optimizer_fn = optim.SGD - optimizer = optimizer_fn(model.optim_parameters(cfg.TRAIN.LEARNING_RATE), - lr=cfg.TRAIN.LEARNING_RATE, - momentum=cfg.TRAIN.MOMENTUM, - weight_decay=cfg.TRAIN.WEIGHT_DECAY) - - model, optimizer = amp.initialize(model, optimizer, opt_level='O2',loss_scale=128.0) - - if cfg.distributed: - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[cfg.device_id]) - - # interpolate output segmaps - interp = nn.Upsample(size=(input_size_source[1], input_size_source[0]), mode='bilinear', - align_corners=True) - interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), mode='bilinear', - align_corners=True) - - trainloader_iter = enumerate(trainloader) - targetloader_iter = enumerate(targetloader) - - # FPS statistics - time_meter = AverageMeter(name='time_avg') - num_devices = cfg.world_size - batch_size = cfg.TRAIN.BATCH_SIZE_SOURCE - - for i_iter in tqdm(range(cfg.TRAIN.EARLY_STOP)): - - # time start - time_start = time.time() - - # reset optimizers - optimizer.zero_grad() - - # adapt LR if needed - adjust_learning_rate(optimizer, i_iter, cfg) - - # UDA Training - # train on source - _, batch = trainloader_iter.__next__() - images_source, labels, _, _ = batch - images_source, labels = images_source.to(device), labels.to(device) - pred_src_aux, pred_src_main = model(images_source) - if cfg.TRAIN.MULTI_LEVEL: - pred_src_aux = interp(pred_src_aux) - loss_seg_src_aux = loss_calc(pred_src_aux, labels, device) - else: - loss_seg_src_aux = 0 - pred_src_main = interp(pred_src_main) - loss_seg_src_main = loss_calc(pred_src_main, labels, device) - loss = (cfg.TRAIN.LAMBDA_SEG_MAIN * loss_seg_src_main - + cfg.TRAIN.LAMBDA_SEG_AUX * loss_seg_src_aux) - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - # loss.backward() - - # adversarial training with minent - _, batch = targetloader_iter.__next__() - images, _, _, _ = batch - images = images.to(device) - pred_trg_aux, pred_trg_main = model(images) - pred_trg_aux = interp_target(pred_trg_aux) - pred_trg_main = interp_target(pred_trg_main) - pred_prob_trg_aux = F.softmax(pred_trg_aux) - pred_prob_trg_main = F.softmax(pred_trg_main) - - loss_target_entp_aux = entropy_loss(pred_prob_trg_aux) - loss_target_entp_main = entropy_loss(pred_prob_trg_main) - loss = (cfg.TRAIN.LAMBDA_ENT_AUX * loss_target_entp_aux - + cfg.TRAIN.LAMBDA_ENT_MAIN * loss_target_entp_main) - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - # loss.backward() - optimizer.step() - - # update time statistics - time_cost = time.time() - time_start - time_meter.update(time_cost) - - current_losses = {'loss_seg_src_aux': loss_seg_src_aux, - 'loss_seg_src_main': loss_seg_src_main, - 'loss_ent_aux': loss_target_entp_aux, - 'loss_ent_main': loss_target_entp_main, - 'FPS': num_devices * batch_size / time_meter.avg if time_meter.avg > 1e-6 else -1} - - if cfg.is_master_node: - print_losses(current_losses, i_iter) - - if i_iter % cfg.TRAIN.SAVE_PRED_EVERY == 0 and i_iter != 0 and cfg.is_master_node: - print('taking snapshot ...') - print('exp =', cfg.TRAIN.SNAPSHOT_DIR) - torch.save(model.state_dict(), - osp.join(cfg.TRAIN.SNAPSHOT_DIR, f'model_{i_iter}.pth')) - if i_iter >= cfg.TRAIN.EARLY_STOP - 1: - break - sys.stdout.flush() - - # Visualize with tensorboard - if viz_tensorboard: - log_losses_tensorboard(writer, current_losses, i_iter) - - if i_iter % cfg.TRAIN.TENSORBOARD_VIZRATE == cfg.TRAIN.TENSORBOARD_VIZRATE - 1: - draw_in_tensorboard(writer, images, i_iter, pred_trg_main, num_classes, 'T') - draw_in_tensorboard(writer, images_source, i_iter, pred_src_main, num_classes, 'S') - - -def print_losses(current_losses, i_iter): - list_strings = [] - for loss_name, loss_value in current_losses.items(): - list_strings.append(f'{loss_name} = {to_numpy(loss_value):.3f} ') - full_string = ' '.join(list_strings) - tqdm.write(f'iter = {i_iter} {full_string}') - - -def log_losses_tensorboard(writer, current_losses, i_iter): - for loss_name, loss_value in current_losses.items(): - writer.add_scalar(f'data/{loss_name}', to_numpy(loss_value), i_iter) - - -def to_numpy(tensor): - if isinstance(tensor, (int, float)): - return tensor - else: - return tensor.data.cpu().numpy() - - -def train_domain_adaptation(model, trainloader, targetloader, device, cfg): - if cfg.TRAIN.DA_METHOD == 'MinEnt': - if cfg.performance_log: - cfg.TRAIN.EARLY_STOP = 500 - train_minent(model, trainloader, targetloader, device, cfg) - elif cfg.TRAIN.DA_METHOD == 'AdvEnt': - train_advent(model, trainloader, targetloader, device, cfg) - else: - raise NotImplementedError(f"Not yet supported DA method {cfg.TRAIN.DA_METHOD}") - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self, name, fmt=':f', start_count_index=10): - self.name = name - self.fmt = fmt - self.reset() - self.start_count_index = start_count_index - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - if self.count == 0: - self.N = n - - self.val = val - self.count += n - if self.count > (self.start_count_index * self.N): - self.sum += val * n - self.avg = self.sum / (self.count - self.start_count_index * self.N) - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#-------------------------------------------------------------------- +# modified from "ADVENT/advent/domain_adaptation/train_UDA.py" by Tuan-Hung Vu +#-------------------------------------------------------------------- +import os +import sys +import time +from pathlib import Path + +import os.path as osp +import numpy as np +import torch +#import torch.backends.cudnn as cudnn +import torch.nn.functional as F +import torch.optim as optim +from tensorboardX import SummaryWriter +from torch import nn +from torchvision.utils import make_grid +from tqdm import tqdm +from collections import OrderedDict + +from advent.model.discriminator import get_fc_discriminator +from advent.utils.func import adjust_learning_rate, adjust_learning_rate_discriminator +from advent.utils.func import loss_calc, bce_loss +from advent.utils.loss import entropy_loss +from advent.utils.func import prob_2_entropy +from advent.utils.viz_segmask import colorize_mask + +import apex +from apex import amp + +def load_checkpoint_for_evaluation(model, checkpoint, device): + saved_state_dict = torch.load(checkpoint, map_location="cpu") + new_state_dict = OrderedDict() + for k,v in saved_state_dict.items(): + if k[:7] != "module.": + name = k + else: + name = k[7:] + new_state_dict[name] = v + model.load_state_dict(new_state_dict) + model.eval() + model.to(device) + + +def train_advent(model, trainloader, targetloader, device, cfg): + ''' UDA training with advent + ''' + # Create the model and start the training. + # pdb.set_trace() + input_size_source = cfg.TRAIN.INPUT_SIZE_SOURCE + input_size_target = cfg.TRAIN.INPUT_SIZE_TARGET + # device = cfg.GPU_ID + num_classes = cfg.NUM_CLASSES + viz_tensorboard = os.path.exists(cfg.TRAIN.TENSORBOARD_LOGDIR) + if viz_tensorboard: + writer = SummaryWriter(log_dir=cfg.TRAIN.TENSORBOARD_LOGDIR) + + print(device) + # SEGMNETATION NETWORK + model.train() + model.to(device) + # cudnn.benchmark = True + # cudnn.enabled = True + + # DISCRIMINATOR NETWORK + # feature-level + d_aux = get_fc_discriminator(num_classes=num_classes) + d_aux.train() + d_aux.to(device) + # restore_from = cfg.TRAIN.RESTORE_FROM_aux + # print("Load Discriminator:", restore_from) + # load_checkpoint_for_evaluation(d_aux, restore_from, device) + + + # seg maps, i.e. output, level + d_main = get_fc_discriminator(num_classes=num_classes) + d_main.train() + d_main.to(device) + + # restore_from = cfg.TRAIN.RESTORE_FROM_main + # print("Load Discriminator:", restore_from) + # load_checkpoint_for_evaluation(d_main, restore_from, device) + + # OPTIMIZERS + # segnet's optimizer + optimizer = optim.SGD(model.optim_parameters(cfg.TRAIN.LEARNING_RATE), + lr=cfg.TRAIN.LEARNING_RATE, + momentum=cfg.TRAIN.MOMENTUM, + weight_decay=cfg.TRAIN.WEIGHT_DECAY) + model, optimizer = amp.initialize(model, optimizer, opt_level="O0",loss_scale=128.0) + if cfg.distributed: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[cfg.device_id], find_unused_parameters=True) + + # discriminators' optimizers + optimizer_d_aux = optim.Adam(d_aux.parameters(), lr=cfg.TRAIN.LEARNING_RATE_D, + betas=(0.9, 0.99)) + optimizer_d_main = optim.Adam(d_main.parameters(), lr=cfg.TRAIN.LEARNING_RATE_D, + betas=(0.9, 0.99)) + d_aux, optimizer_d_aux = amp.initialize(d_aux, optimizer_d_aux, opt_level="O0",loss_scale=128.0) + d_main, optimizer_d_main = amp.initialize(d_main, optimizer_d_main, opt_level="O0",loss_scale=128.0) + if cfg.distributed: + d_aux = torch.nn.parallel.DistributedDataParallel(d_aux, device_ids=[cfg.device_id],find_unused_parameters=True) + d_main = torch.nn.parallel.DistributedDataParallel(d_main, device_ids=[cfg.device_id], find_unused_parameters=True) + + # interpolate output segmaps + interp = nn.Upsample(size=(input_size_source[1], input_size_source[0]), mode='bilinear', + align_corners=True) + interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), mode='bilinear', + align_corners=True) + + # labels for adversarial training + source_label = 0 + target_label = 1 + trainloader_iter = enumerate(trainloader) + targetloader_iter = enumerate(targetloader) + for i_iter in tqdm(range(cfg.TRAIN.EARLY_STOP)): + + # reset optimizers + optimizer.zero_grad() + optimizer_d_aux.zero_grad() + optimizer_d_main.zero_grad() + # adapt LR if needed + adjust_learning_rate(optimizer, i_iter, cfg) + adjust_learning_rate_discriminator(optimizer_d_aux, i_iter, cfg) + adjust_learning_rate_discriminator(optimizer_d_main, i_iter, cfg) + + # UDA Training + # only train segnet. Don't accumulate grads in disciminators + for param in d_aux.parameters(): + param.requires_grad = False + for param in d_main.parameters(): + param.requires_grad = False + # train on source + _, batch = trainloader_iter.__next__() + images_source, labels, _, _ = batch + images_source, labels = images_source.to(device), labels.to(device) + # debug: + # labels=labels.numpy() + # from matplotlib import pyplot as plt + # import numpy as np + # plt.figure(1), plt.imshow(labels[0]), plt.ion(), plt.colorbar(), plt.show() + pred_src_aux, pred_src_main = model(images_source) + if cfg.TRAIN.MULTI_LEVEL: + pred_src_aux = interp(pred_src_aux) + loss_seg_src_aux = loss_calc(pred_src_aux, labels, device) + else: + loss_seg_src_aux = 0 + pred_src_main = interp(pred_src_main) + loss_seg_src_main = loss_calc(pred_src_main, labels, device) + # pdb.set_trace() + loss = (cfg.TRAIN.LAMBDA_SEG_MAIN * loss_seg_src_main + + cfg.TRAIN.LAMBDA_SEG_AUX * loss_seg_src_aux) + #loss.backward() + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + + # adversarial training ot fool the discriminator + _, batch = targetloader_iter.__next__() + images, _, _, _ = batch + images = images.to(device) + pred_trg_aux, pred_trg_main = model(images) + if cfg.TRAIN.MULTI_LEVEL: + pred_trg_aux = interp_target(pred_trg_aux) + d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_trg_aux))) + loss_adv_trg_aux = bce_loss(d_out_aux, source_label) + else: + loss_adv_trg_aux = 0 + pred_trg_main = interp_target(pred_trg_main) + d_out_main = d_main(prob_2_entropy(F.softmax(pred_trg_main))) + loss_adv_trg_main = bce_loss(d_out_main, source_label) + loss = (cfg.TRAIN.LAMBDA_ADV_MAIN * loss_adv_trg_main + + cfg.TRAIN.LAMBDA_ADV_AUX * loss_adv_trg_aux) + loss = loss + #loss.backward() + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + + # Train discriminator networks + # enable training mode on discriminator networks + for param in d_aux.parameters(): + param.requires_grad = True + for param in d_main.parameters(): + param.requires_grad = True + # train with source + if cfg.TRAIN.MULTI_LEVEL: + pred_src_aux = pred_src_aux.detach() + d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_src_aux))) + loss_d_aux = bce_loss(d_out_aux, source_label) + loss_d_aux = loss_d_aux / 2 + with amp.scale_loss(loss_d_aux, optimizer_d_aux) as scaled_loss: + scaled_loss.backward() + # loss_d_aux.backward() + pred_src_main = pred_src_main.detach() + d_out_main = d_main(prob_2_entropy(F.softmax(pred_src_main))) + loss_d_main = bce_loss(d_out_main, source_label) + loss_d_main = loss_d_main / 2 + #loss_d_main.backward() + with amp.scale_loss(loss_d_main, optimizer_d_main) as scaled_loss: + scaled_loss.backward() + + # train with target + if cfg.TRAIN.MULTI_LEVEL: + pred_trg_aux = pred_trg_aux.detach() + d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_trg_aux))) + loss_d_aux = bce_loss(d_out_aux, target_label) + loss_d_aux = loss_d_aux / 2 + #loss_d_aux.backward() + with amp.scale_loss(loss_d_aux, optimizer_d_aux) as scaled_loss: + scaled_loss.backward() + else: + loss_d_aux = 0 + pred_trg_main = pred_trg_main.detach() + d_out_main = d_main(prob_2_entropy(F.softmax(pred_trg_main))) + loss_d_main = bce_loss(d_out_main, target_label) + loss_d_main = loss_d_main / 2 + #loss_d_main.backward() + with amp.scale_loss(loss_d_main, optimizer_d_main) as scaled_loss: + scaled_loss.backward() + + optimizer.step() + if cfg.TRAIN.MULTI_LEVEL: + optimizer_d_aux.step() + optimizer_d_main.step() + + current_losses = {'loss_seg_src_aux': loss_seg_src_aux, + 'loss_seg_src_main': loss_seg_src_main, + 'loss_adv_trg_aux': loss_adv_trg_aux, + 'loss_adv_trg_main': loss_adv_trg_main, + 'loss_d_aux': loss_d_aux, + 'loss_d_main': loss_d_main} + print_losses(current_losses, i_iter) + + if i_iter % cfg.TRAIN.SAVE_PRED_EVERY == 0 and i_iter != 0 and cfg.rank == 0: + print('taking snapshot ...') + print('exp =', cfg.TRAIN.SNAPSHOT_DIR) + snapshot_dir = Path(cfg.TRAIN.SNAPSHOT_DIR) + torch.save(model.state_dict(), snapshot_dir / f'model_{i_iter}.pth') + torch.save(d_aux.state_dict(), snapshot_dir / f'model_{i_iter}_D_aux.pth') + torch.save(d_main.state_dict(), snapshot_dir / f'model_{i_iter}_D_main.pth') + if i_iter >= cfg.TRAIN.EARLY_STOP - 1: + break + sys.stdout.flush() + + # Visualize with tensorboard + if viz_tensorboard: + log_losses_tensorboard(writer, current_losses, i_iter) + + if i_iter % cfg.TRAIN.TENSORBOARD_VIZRATE == cfg.TRAIN.TENSORBOARD_VIZRATE - 1: + draw_in_tensorboard(writer, images, i_iter, pred_trg_main, num_classes, 'T') + draw_in_tensorboard(writer, images_source, i_iter, pred_src_main, num_classes, 'S') + + +def draw_in_tensorboard(writer, images, i_iter, pred_main, num_classes, type_): + grid_image = make_grid(images[:3].clone().cpu().data, 3, normalize=True) + writer.add_image(f'Image - {type_}', grid_image, i_iter) + + grid_image = make_grid(torch.from_numpy(np.array(colorize_mask(np.asarray( + np.argmax(F.softmax(pred_main).cpu().data[0].numpy().transpose(1, 2, 0), + axis=2), dtype=np.uint8)).convert('RGB')).transpose(2, 0, 1)), 3, + normalize=False, range=(0, 255)) + writer.add_image(f'Prediction - {type_}', grid_image, i_iter) + + output_sm = F.softmax(pred_main).cpu().data[0].numpy().transpose(1, 2, 0) + output_ent = np.sum(-np.multiply(output_sm, np.log2(output_sm)), axis=2, + keepdims=False) + grid_image = make_grid(torch.from_numpy(output_ent), 3, normalize=True, + range=(0, np.log2(num_classes))) + writer.add_image(f'Entropy - {type_}', grid_image, i_iter) + + +def train_minent(model, trainloader, targetloader, device, cfg): + ''' UDA training with minEnt + ''' + # Create the model and start the training. + input_size_source = cfg.TRAIN.INPUT_SIZE_SOURCE + input_size_target = cfg.TRAIN.INPUT_SIZE_TARGET + # device = cfg.GPU_ID + num_classes = cfg.NUM_CLASSES + viz_tensorboard = os.path.exists(cfg.TRAIN.TENSORBOARD_LOGDIR) + if viz_tensorboard: + writer = SummaryWriter(log_dir=cfg.TRAIN.TENSORBOARD_LOGDIR) + + # SEGMNETATION NETWORK + model.train() + model.to(device) + # cudnn.benchmark = True + # cudnn.enabled = True + + # OPTIMIZERS + # segnet's optimizer + # optimizer_fn = apex.optimizers.NpuFusedSGD if cfg.device_type == 'npu' else optim.SGD + optimizer_fn = optim.SGD + optimizer = optimizer_fn(model.optim_parameters(cfg.TRAIN.LEARNING_RATE), + lr=cfg.TRAIN.LEARNING_RATE, + momentum=cfg.TRAIN.MOMENTUM, + weight_decay=cfg.TRAIN.WEIGHT_DECAY) + + model, optimizer = amp.initialize(model, optimizer, opt_level='O2',loss_scale=128.0) + + if cfg.distributed: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[cfg.device_id]) + + # interpolate output segmaps + interp = nn.Upsample(size=(input_size_source[1], input_size_source[0]), mode='bilinear', + align_corners=True) + interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), mode='bilinear', + align_corners=True) + + trainloader_iter = enumerate(trainloader) + targetloader_iter = enumerate(targetloader) + + # FPS statistics + time_meter = AverageMeter(name='time_avg') + num_devices = cfg.world_size + batch_size = cfg.TRAIN.BATCH_SIZE_SOURCE + + for i_iter in tqdm(range(cfg.TRAIN.EARLY_STOP)): + + # time start + time_start = time.time() + + # reset optimizers + optimizer.zero_grad() + + # adapt LR if needed + adjust_learning_rate(optimizer, i_iter, cfg) + + # UDA Training + # train on source + _, batch = trainloader_iter.__next__() + images_source, labels, _, _ = batch + images_source, labels = images_source.to(device), labels.to(device) + pred_src_aux, pred_src_main = model(images_source) + if cfg.TRAIN.MULTI_LEVEL: + pred_src_aux = interp(pred_src_aux) + loss_seg_src_aux = loss_calc(pred_src_aux, labels, device) + else: + loss_seg_src_aux = 0 + pred_src_main = interp(pred_src_main) + loss_seg_src_main = loss_calc(pred_src_main, labels, device) + loss = (cfg.TRAIN.LAMBDA_SEG_MAIN * loss_seg_src_main + + cfg.TRAIN.LAMBDA_SEG_AUX * loss_seg_src_aux) + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + # loss.backward() + + # adversarial training with minent + _, batch = targetloader_iter.__next__() + images, _, _, _ = batch + images = images.to(device) + pred_trg_aux, pred_trg_main = model(images) + pred_trg_aux = interp_target(pred_trg_aux) + pred_trg_main = interp_target(pred_trg_main) + pred_prob_trg_aux = F.softmax(pred_trg_aux) + pred_prob_trg_main = F.softmax(pred_trg_main) + + loss_target_entp_aux = entropy_loss(pred_prob_trg_aux) + loss_target_entp_main = entropy_loss(pred_prob_trg_main) + loss = (cfg.TRAIN.LAMBDA_ENT_AUX * loss_target_entp_aux + + cfg.TRAIN.LAMBDA_ENT_MAIN * loss_target_entp_main) + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + # loss.backward() + optimizer.step() + + # update time statistics + time_cost = time.time() - time_start + time_meter.update(time_cost) + + current_losses = {'loss_seg_src_aux': loss_seg_src_aux, + 'loss_seg_src_main': loss_seg_src_main, + 'loss_ent_aux': loss_target_entp_aux, + 'loss_ent_main': loss_target_entp_main, + 'FPS': num_devices * batch_size / time_meter.avg if time_meter.avg > 1e-6 else -1} + + if cfg.is_master_node: + print_losses(current_losses, i_iter) + + if i_iter % cfg.TRAIN.SAVE_PRED_EVERY == 0 and i_iter != 0 and cfg.is_master_node: + print('taking snapshot ...') + print('exp =', cfg.TRAIN.SNAPSHOT_DIR) + torch.save(model.state_dict(), + osp.join(cfg.TRAIN.SNAPSHOT_DIR, f'model_{i_iter}.pth')) + if i_iter >= cfg.TRAIN.EARLY_STOP - 1: + break + sys.stdout.flush() + + # Visualize with tensorboard + if viz_tensorboard: + log_losses_tensorboard(writer, current_losses, i_iter) + + if i_iter % cfg.TRAIN.TENSORBOARD_VIZRATE == cfg.TRAIN.TENSORBOARD_VIZRATE - 1: + draw_in_tensorboard(writer, images, i_iter, pred_trg_main, num_classes, 'T') + draw_in_tensorboard(writer, images_source, i_iter, pred_src_main, num_classes, 'S') + + +def print_losses(current_losses, i_iter): + list_strings = [] + for loss_name, loss_value in current_losses.items(): + list_strings.append(f'{loss_name} = {to_numpy(loss_value):.3f} ') + full_string = ' '.join(list_strings) + tqdm.write(f'iter = {i_iter} {full_string}') + + +def log_losses_tensorboard(writer, current_losses, i_iter): + for loss_name, loss_value in current_losses.items(): + writer.add_scalar(f'data/{loss_name}', to_numpy(loss_value), i_iter) + + +def to_numpy(tensor): + if isinstance(tensor, (int, float)): + return tensor + else: + return tensor.data.cpu().numpy() + + +def train_domain_adaptation(model, trainloader, targetloader, device, cfg): + if cfg.TRAIN.DA_METHOD == 'MinEnt': + if cfg.performance_log: + cfg.TRAIN.EARLY_STOP = 500 + train_minent(model, trainloader, targetloader, device, cfg) + elif cfg.TRAIN.DA_METHOD == 'AdvEnt': + train_advent(model, trainloader, targetloader, device, cfg) + else: + raise NotImplementedError(f"Not yet supported DA method {cfg.TRAIN.DA_METHOD}") + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f', start_count_index=10): + self.name = name + self.fmt = fmt + self.reset() + self.start_count_index = start_count_index + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + if self.count == 0: + self.N = n + + self.val = val + self.count += n + if self.count > (self.start_count_index * self.N): + self.sum += val * n + self.avg = self.sum / (self.count - self.start_count_index * self.N) + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/modelzoo_level.txt b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/modelzoo_level.txt +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/requirements.txt b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/requirements.txt index e427e81e69ce3e849d80fbc605484a8b3b48471e..5abfd47cd40ccd92fff6a47601041a14052295ae 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/IntraDA/requirements.txt +++ b/PyTorch/contrib/cv/semantic_segmentation/IntraDA/requirements.txt @@ -1,4 +1,4 @@ -#torch==1.5.0 -#torchvision==0.6.0 - - +#torch==1.5.0 +#torchvision==0.6.0 + + diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/createTrainIdLabelImgs.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/createTrainIdLabelImgs.py index dc521ade892a8ec2f995ebc923144965eb3160b9..d9b7bde86c12c00613634cfb76cff3c9530634a2 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/PointRend/createTrainIdLabelImgs.py +++ b/PyTorch/contrib/cv/semantic_segmentation/PointRend/createTrainIdLabelImgs.py @@ -1,71 +1,71 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function, absolute_import, division -import os, glob, sys - -# cityscapes imports -from cityscapesscripts.helpers.csHelpers import printError -from cityscapesscripts.preparation.json2instanceImg import json2instanceImg - - -# The main method -def main(data_path): - # Where to look for Cityscapes - cityscapesPath = data_path - # how to search for all ground truth - searchFine = os.path.join( cityscapesPath , "gtFine" , "*" , "*" , "*_gt*_polygons.json" ) - searchCoarse = os.path.join( cityscapesPath , "gtCoarse" , "*" , "*" , "*_gt*_polygons.json" ) - - # search files - filesFine = glob.glob( searchFine ) - filesFine.sort() - filesCoarse = glob.glob( searchCoarse ) - filesCoarse.sort() - - # concatenate fine and coarse - files = filesFine + filesCoarse - # files = filesFine # use this line if fine is enough for now. - - # quit if we did not find anything - if not files: - printError( "Did not find any files. Please consult the README." ) - - # a bit verbose - print("Processing {} annotation files".format(len(files))) - - # iterate through files - progress = 0 - print("Progress: {:>3} %".format( progress * 100 / len(files) ), end=' ') - for f in files: - # create the output filename - dst = f.replace( "_polygons.json" , "_instanceTrainIds.png" ) - - # do the conversion - try: - json2instanceImg( f , dst , "trainIds" ) - except: - print("Failed to convert: {}".format(f)) - raise - - # status - progress += 1 - print("\rProgress: {:>3} %".format( progress * 100 / len(files) ), end=' ') - sys.stdout.flush() - - -# call the main -if __name__ == "__main__": - data_path = sys.argv[1] +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function, absolute_import, division +import os, glob, sys + +# cityscapes imports +from cityscapesscripts.helpers.csHelpers import printError +from cityscapesscripts.preparation.json2instanceImg import json2instanceImg + + +# The main method +def main(data_path): + # Where to look for Cityscapes + cityscapesPath = data_path + # how to search for all ground truth + searchFine = os.path.join( cityscapesPath , "gtFine" , "*" , "*" , "*_gt*_polygons.json" ) + searchCoarse = os.path.join( cityscapesPath , "gtCoarse" , "*" , "*" , "*_gt*_polygons.json" ) + + # search files + filesFine = glob.glob( searchFine ) + filesFine.sort() + filesCoarse = glob.glob( searchCoarse ) + filesCoarse.sort() + + # concatenate fine and coarse + files = filesFine + filesCoarse + # files = filesFine # use this line if fine is enough for now. + + # quit if we did not find anything + if not files: + printError( "Did not find any files. Please consult the README." ) + + # a bit verbose + print("Processing {} annotation files".format(len(files))) + + # iterate through files + progress = 0 + print("Progress: {:>3} %".format( progress * 100 / len(files) ), end=' ') + for f in files: + # create the output filename + dst = f.replace( "_polygons.json" , "_instanceTrainIds.png" ) + + # do the conversion + try: + json2instanceImg( f , dst , "trainIds" ) + except: + print("Failed to convert: {}".format(f)) + raise + + # status + progress += 1 + print("\rProgress: {:>3} %".format( progress * 100 / len(files) ), end=' ') + sys.stdout.flush() + + +# call the main +if __name__ == "__main__": + data_path = sys.argv[1] main(data_path) \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/datasets/prepare_ade20k_sem_seg.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/datasets/prepare_ade20k_sem_seg.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/datasets/prepare_cocofied_lvis.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/datasets/prepare_cocofied_lvis.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/datasets/prepare_for_tests.sh b/PyTorch/contrib/cv/semantic_segmentation/PointRend/datasets/prepare_for_tests.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/datasets/prepare_panoptic_fpn.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/datasets/prepare_panoptic_fpn.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/demo/demo.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/demo/demo.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/linter.sh b/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/linter.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/packaging/build_all_wheels.sh b/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/packaging/build_all_wheels.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/packaging/build_wheel.sh b/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/packaging/build_wheel.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/packaging/gen_install_table.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/packaging/gen_install_table.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/packaging/gen_wheel_index.sh b/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/packaging/gen_wheel_index.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/packaging/pkg_helpers.bash b/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/packaging/pkg_helpers.bash old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/parse_results.sh b/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/parse_results.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/run_inference_tests.sh b/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/run_inference_tests.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/run_instant_tests.sh b/PyTorch/contrib/cv/semantic_segmentation/PointRend/dev/run_instant_tests.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/projects/PointRend/train_net.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/projects/PointRend/train_net.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/analyze_model.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/analyze_model.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/benchmark.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/benchmark.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/convert-torchvision-to-d2.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/convert-torchvision-to-d2.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/deploy/export_model.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/deploy/export_model.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/lazyconfig_train_net.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/lazyconfig_train_net.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/plain_train_net.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/plain_train_net.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/train_net.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/train_net.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/visualize_data.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/visualize_data.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/visualize_json_results.py b/PyTorch/contrib/cv/semantic_segmentation/PointRend/tools/visualize_json_results.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/RCAN/dataset.py b/PyTorch/contrib/cv/semantic_segmentation/RCAN/dataset.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/RCAN/main.py b/PyTorch/contrib/cv/semantic_segmentation/RCAN/main.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/RCAN/model.py b/PyTorch/contrib/cv/semantic_segmentation/RCAN/model.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/RCAN/test.py b/PyTorch/contrib/cv/semantic_segmentation/RCAN/test.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/run_1p_prof.sh b/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/run_1p_prof.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/run_test.sh b/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/run_test.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/set_npu_env.sh b/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/set_npu_env.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/train_finetuning_1p.sh b/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/train_finetuning_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/train_full_1p.sh b/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/train_full_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/train_full_8p.sh b/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/train_full_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/train_performance_1p.sh b/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/train_performance_1p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/train_performance_8p.sh b/PyTorch/contrib/cv/semantic_segmentation/RCAN/test/train_performance_8p.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/RCAN/utils.py b/PyTorch/contrib/cv/semantic_segmentation/RCAN/utils.py old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/RefineNet/Dockerfile b/PyTorch/contrib/cv/semantic_segmentation/RefineNet/Dockerfile index 7e712fe1a166790798f57a2f2762c47394beb625..30a31af55804dd79571d2a36e6107a844cb7e549 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/RefineNet/Dockerfile +++ b/PyTorch/contrib/cv/semantic_segmentation/RefineNet/Dockerfile @@ -1,5 +1,5 @@ -ARG FROM_IMAGE_NAME -FROM $FROM_IMAGE_NAME - -COPY requirements.txt . +ARG FROM_IMAGE_NAME +FROM $FROM_IMAGE_NAME + +COPY requirements.txt . RUN pip3.7 install -r requirements.txt \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/RefineNet/README.md b/PyTorch/contrib/cv/semantic_segmentation/RefineNet/README.md index c76ab910ec220273414e18a1f0b6a4a2a31ec338..00f9295deaf325e57e3ef421789d60fdaac86ba9 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/RefineNet/README.md +++ b/PyTorch/contrib/cv/semantic_segmentation/RefineNet/README.md @@ -1,79 +1,79 @@ -# RefineNet - -This repository is an NPU implementation of the ["RefineNet: Multi-Path Refinement Networks for High-Resolution Semantic Segmentation"](https://arxiv.org/abs/1611.06612), referring to https://github.com/DrSleep/refinenet-pytorch - - - -## Requirements - -See requirements.txt - -- PyTorch 1.5.0 -- torchvision 0.6.0 -- Numpy 1.15.1 -- Pillow 5.3.0 -- h5py 2.8.0 -- tqdm 4.28.1 -- h5py 3.4.0 -- opencv-python 3.4.4.19 -- albumentations 0.4.5 -- install densetorch as follow: - -```bash - git clone https://github.com/DrSleep/DenseTorch - cd ./DenseTorch - python setup.py install -``` - -## Training - -The processed VOC dataset can be downloaded from [Download](https://pan.baidu.com/s/12wHGhby5vEcG6isQpnpcMQ) with extraction code: vnhb (about 9 G), put it in ./VOC. Or, you can download it by: -```bash -bash load_data.sh -``` - -The training common: - -```bash -# 1p train perf -bash test/train_performance_1p.sh - -# 8p train perf -bash test/train_performance_8p.sh - -# 8p train full -bash test/train_full_8p.sh - -# finetuning -bash test/train_finetune_1p.sh -``` - -In the first running, it requires time to downloaded the model pre-trained on ImageNet. Or you can manually download it by: -```shell -cd ~ -mkdir .torch -mkdir .torch/models -cd .torch/models -wget https://download.pytorch.org/models/resnet101-5d3b4d8f.pth -mv resnet101-5d3b4d8f.pth 101_imagenet.pth.tar -``` -Log path: ./log/ - -Saved model path: ./model/ - -## Training result - -| IOU | FPS | NPU_nums | BS/NPU | AMP_type | -|-----------|-------|-------|-----------------|-----------| -| 78.56 | 25.56 | 1 | 16 | O2 | -| 77.34 | 159.46| 8 | 8 | O2 | - - - - -## Citation -``` -RefineNet: Multi-Path Refinement Networks for High-Resolution Semantic Segmentation -Guosheng Lin, Anton Milan, Chunhua Shen, Ian Reid -In CVPR 2017 -``` +# RefineNet + +This repository is an NPU implementation of the ["RefineNet: Multi-Path Refinement Networks for High-Resolution Semantic Segmentation"](https://arxiv.org/abs/1611.06612), referring to https://github.com/DrSleep/refinenet-pytorch + + + +## Requirements + +See requirements.txt + +- PyTorch 1.5.0 +- torchvision 0.6.0 +- Numpy 1.15.1 +- Pillow 5.3.0 +- h5py 2.8.0 +- tqdm 4.28.1 +- h5py 3.4.0 +- opencv-python 3.4.4.19 +- albumentations 0.4.5 +- install densetorch as follow: + +```bash + git clone https://github.com/DrSleep/DenseTorch + cd ./DenseTorch + python setup.py install +``` + +## Training + +The processed VOC dataset can be downloaded from [Download](https://pan.baidu.com/s/12wHGhby5vEcG6isQpnpcMQ) with extraction code: vnhb (about 9 G), put it in ./VOC. Or, you can download it by: +```bash +bash load_data.sh +``` + +The training common: + +```bash +# 1p train perf +bash test/train_performance_1p.sh + +# 8p train perf +bash test/train_performance_8p.sh + +# 8p train full +bash test/train_full_8p.sh + +# finetuning +bash test/train_finetune_1p.sh +``` + +In the first running, it requires time to downloaded the model pre-trained on ImageNet. Or you can manually download it by: +```shell +cd ~ +mkdir .torch +mkdir .torch/models +cd .torch/models +wget https://download.pytorch.org/models/resnet101-5d3b4d8f.pth +mv resnet101-5d3b4d8f.pth 101_imagenet.pth.tar +``` +Log path: ./log/ + +Saved model path: ./model/ + +## Training result + +| IOU | FPS | NPU_nums | BS/NPU | AMP_type | +|-----------|-------|-------|-----------------|-----------| +| 78.56 | 25.56 | 1 | 16 | O2 | +| 77.34 | 159.46| 8 | 8 | O2 | + + + + +## Citation +``` +RefineNet: Multi-Path Refinement Networks for High-Resolution Semantic Segmentation +Guosheng Lin, Anton Milan, Chunhua Shen, Ian Reid +In CVPR 2017 +``` diff --git a/PyTorch/contrib/cv/semantic_segmentation/RefineNet/models/refinenet.py b/PyTorch/contrib/cv/semantic_segmentation/RefineNet/models/refinenet.py index 07027e25da303fc3fde829db22b93929cfc8b35e..64eafea2b3dcb0cb29f646ea37afdaebf9341502 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/RefineNet/models/refinenet.py +++ b/PyTorch/contrib/cv/semantic_segmentation/RefineNet/models/refinenet.py @@ -1,336 +1,336 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch.nn as nn -import torch.nn.functional as F -import torch - -import numpy as np - -IMG_SCALE = 1./255 -IMG_MEAN = np.array([0.485, 0.456, 0.406]).reshape((1, 1, 3)) -IMG_STD = np.array([0.229, 0.224, 0.225]).reshape((1, 1, 3)) - -def maybe_download(model_name, model_url, model_dir=None, map_location=None): - import os, sys - from six.moves import urllib - if model_dir is None: - torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) - model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) - if not os.path.exists(model_dir): - os.makedirs(model_dir) - filename = '{}.pth.tar'.format(model_name) - cached_file = os.path.join(model_dir, filename) - if not os.path.exists(cached_file): - url = model_url - sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) - urllib.request.urlretrieve(url, cached_file) - return torch.load(cached_file, map_location=map_location) - -def prepare_img(img): - return (img * IMG_SCALE - IMG_MEAN) / IMG_STD - -def batchnorm(in_planes): - "batch norm 2d" - return nn.BatchNorm2d(in_planes, affine=True, eps=1e-5, momentum=0.1) - -def conv3x3(in_planes, out_planes, stride=1, bias=False): - "3x3 convolution with padding" - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, - padding=1, bias=bias) - -def conv1x1(in_planes, out_planes, stride=1, bias=False): - "1x1 convolution" - return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, - padding=0, bias=bias) - -def convbnrelu(in_planes, out_planes, kernel_size, stride=1, groups=1, act=True): - "conv-batchnorm-relu" - if act: - return nn.Sequential(nn.Conv2d(in_planes, out_planes, kernel_size, stride=stride, padding=int(kernel_size / 2.), groups=groups, bias=False), - batchnorm(out_planes), - nn.ReLU6(inplace=True)) - else: - return nn.Sequential(nn.Conv2d(in_planes, out_planes, kernel_size, stride=stride, padding=int(kernel_size / 2.), groups=groups, bias=False), - batchnorm(out_planes)) - -class CRPBlock(nn.Module): - - def __init__(self, in_planes, out_planes, n_stages, max_pooling=True): - super(CRPBlock, self).__init__() - for i in range(n_stages): - setattr(self, '{}_{}'.format(i + 1, 'outvar_dimred'), - conv3x3(in_planes if (i == 0) else out_planes, - out_planes, stride=1, - bias=False)) - self.stride = 1 - self.n_stages = n_stages - if max_pooling: self.maxpool = nn.MaxPool2d(kernel_size=5, stride=1, padding=2) - else: self.maxpool = nn.MaxPool2d(kernel_size=1, stride=1, padding=0) - - def forward(self, x): - top = x - for i in range(self.n_stages): - top = self.maxpool(top) - top = getattr(self, '{}_{}'.format(i + 1, 'outvar_dimred'))(top) - x = top + x - return x - -stages_suffixes = {0 : '_conv', - 1 : '_conv_relu_varout_dimred'} - -class RCUBlock(nn.Module): - - def __init__(self, in_planes, out_planes, n_blocks, n_stages): - super(RCUBlock, self).__init__() - for i in range(n_blocks): - for j in range(n_stages): - setattr(self, '{}{}'.format(i + 1, stages_suffixes[j]), - conv3x3(in_planes if (i == 0) and (j == 0) else out_planes, - out_planes, stride=1, - bias=(j == 0))) - self.stride = 1 - self.n_blocks = n_blocks - self.n_stages = n_stages - - def forward(self, x): - for i in range(self.n_blocks): - residual = x - for j in range(self.n_stages): - x = F.relu(x) - x = getattr(self, '{}{}'.format(i + 1, stages_suffixes[j]))(x) - x += residual - return x - -data_info = { - 21: 'VOC', - } - -models_urls = { - '101_voc' : 'https://cloudstor.aarnet.edu.au/plus/s/Owmttk9bdPROwc6/download', - - '101_imagenet': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', - } - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(BasicBlock, self).__init__() - self.conv1 = conv3x3(inplanes, planes, stride) - self.bn1 = nn.BatchNorm2d(planes) - self.relu = nn.ReLU(inplace=True) - self.conv2 = conv3x3(planes, planes) - self.bn2 = nn.BatchNorm2d(planes) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(Bottleneck, self).__init__() - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, - padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(planes * 4) - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class RefineNet(nn.Module): - - def __init__(self, block, layers, num_classes=21): - self.inplanes = 64 - super(RefineNet, self).__init__() - self.do = nn.Dropout(p=0.5) - self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, - bias=False) - self.bn1 = nn.BatchNorm2d(64) - self.relu = nn.ReLU(inplace=True) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - self.layer1 = self._make_layer(block, 64, layers[0]) - self.layer2 = self._make_layer(block, 128, layers[1], stride=2) - self.layer3 = self._make_layer(block, 256, layers[2], stride=2) - self.layer4 = self._make_layer(block, 512, layers[3], stride=2) - self.p_ims1d2_outl1_dimred = conv3x3(2048, 512, bias=False) - self.adapt_stage1_b = self._make_rcu(512, 512, 2, 2) - self.mflow_conv_g1_pool = self._make_crp(512, 512, 4) - self.mflow_conv_g1_b = self._make_rcu(512, 512, 3, 2) - self.mflow_conv_g1_b3_joint_varout_dimred = conv3x3(512, 256, bias=False) - self.p_ims1d2_outl2_dimred = conv3x3(1024, 256, bias=False) - self.adapt_stage2_b = self._make_rcu(256, 256, 2, 2) - self.adapt_stage2_b2_joint_varout_dimred = conv3x3(256, 256, bias=False) - self.mflow_conv_g2_pool = self._make_crp(256, 256, 4) - self.mflow_conv_g2_b = self._make_rcu(256, 256, 3, 2) - self.mflow_conv_g2_b3_joint_varout_dimred = conv3x3(256, 256, bias=False) - - self.p_ims1d2_outl3_dimred = conv3x3(512, 256, bias=False) - self.adapt_stage3_b = self._make_rcu(256, 256, 2, 2) - self.adapt_stage3_b2_joint_varout_dimred = conv3x3(256, 256, bias=False) - self.mflow_conv_g3_pool = self._make_crp(256, 256, 4) - self.mflow_conv_g3_b = self._make_rcu(256, 256, 3, 2) - self.mflow_conv_g3_b3_joint_varout_dimred = conv3x3(256, 256, bias=False) - - self.p_ims1d2_outl4_dimred = conv3x3(256, 256, bias=False) - self.adapt_stage4_b = self._make_rcu(256, 256, 2, 2) - self.adapt_stage4_b2_joint_varout_dimred = conv3x3(256, 256, bias=False) - self.mflow_conv_g4_pool = self._make_crp(256, 256, 4, max_pooling=False) - self.mflow_conv_g4_b = self._make_rcu(256, 256, 3, 2) - - self.clf_conv = nn.Conv2d(256, num_classes, kernel_size=3, stride=1, - padding=1, bias=True) - - def _make_crp(self, in_planes, out_planes, stages, max_pooling=True): - layers = [CRPBlock(in_planes, out_planes, stages, max_pooling)] - return nn.Sequential(*layers) - - def _make_rcu(self, in_planes, out_planes, blocks, stages): - layers = [RCUBlock(in_planes, out_planes, blocks, stages)] - return nn.Sequential(*layers) - - def _make_layer(self, block, planes, blocks, stride=1): - downsample = None - if stride != 1 or self.inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv2d(self.inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(planes * block.expansion), - ) - - layers = [] - layers.append(block(self.inplanes, planes, stride, downsample)) - self.inplanes = planes * block.expansion - for i in range(1, blocks): - layers.append(block(self.inplanes, planes)) - - return nn.Sequential(*layers) - - def forward(self, x): - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - x = self.maxpool(x) - - l1 = self.layer1(x) - l2 = self.layer2(l1) - l3 = self.layer3(l2) - l4 = self.layer4(l3) - - l4 = self.do(l4) - l3 = self.do(l3) - - - x4 = self.p_ims1d2_outl1_dimred(l4) - x4 = self.adapt_stage1_b(x4) - x4 = self.relu(x4) - x4 = self.mflow_conv_g1_pool(x4) - x4 = self.mflow_conv_g1_b(x4) - x4 = self.mflow_conv_g1_b3_joint_varout_dimred(x4) - x4 = nn.Upsample(size=l3.size()[2:], mode='bilinear', align_corners=True)(x4) - - x3 = self.p_ims1d2_outl2_dimred(l3) - x3 = self.adapt_stage2_b(x3) - x3 = self.adapt_stage2_b2_joint_varout_dimred(x3) - x3 = x3 + x4 - x3 = F.relu(x3) - x3 = self.mflow_conv_g2_pool(x3) - x3 = self.mflow_conv_g2_b(x3) - x3 = self.mflow_conv_g2_b3_joint_varout_dimred(x3) - x3 = nn.Upsample(size=l2.size()[2:], mode='bilinear', align_corners=True)(x3) - - x2 = self.p_ims1d2_outl3_dimred(l2) - x2 = self.adapt_stage3_b(x2) - x2 = self.adapt_stage3_b2_joint_varout_dimred(x2) - x2 = x2 + x3 - x2 = F.relu(x2) - x2 = self.mflow_conv_g3_pool(x2) - x2 = self.mflow_conv_g3_b(x2) - x2 = self.mflow_conv_g3_b3_joint_varout_dimred(x2) - x2 = nn.Upsample(size=l1.size()[2:], mode='bilinear', align_corners=True)(x2) - - - x1 = self.p_ims1d2_outl4_dimred(l1) - x1 = self.adapt_stage4_b(x1) - x1 = self.adapt_stage4_b2_joint_varout_dimred(x1) - x1 = x1 + x2 - x1 = F.relu(x1) - - x1 = self.mflow_conv_g4_pool(x1) - x1 = self.mflow_conv_g4_b(x1) - x1 = self.do(x1) - - out = self.clf_conv(x1) - return out - - -def rf101(num_classes, imagenet=False, pretrained=True, **kwargs): - model = RefineNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, **kwargs) - if imagenet: - key = '101_imagenet' - url = models_urls[key] - model.load_state_dict(maybe_download(key, url), strict=False) - elif pretrained: - dataset = data_info.get(num_classes, None) - if dataset: - bname = '101_' + dataset.lower() - key = 'rf' + bname - url = models_urls[bname] - model.load_state_dict(maybe_download(key, url), strict=False) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch.nn as nn +import torch.nn.functional as F +import torch + +import numpy as np + +IMG_SCALE = 1./255 +IMG_MEAN = np.array([0.485, 0.456, 0.406]).reshape((1, 1, 3)) +IMG_STD = np.array([0.229, 0.224, 0.225]).reshape((1, 1, 3)) + +def maybe_download(model_name, model_url, model_dir=None, map_location=None): + import os, sys + from six.moves import urllib + if model_dir is None: + torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) + model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) + if not os.path.exists(model_dir): + os.makedirs(model_dir) + filename = '{}.pth.tar'.format(model_name) + cached_file = os.path.join(model_dir, filename) + if not os.path.exists(cached_file): + url = model_url + sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) + urllib.request.urlretrieve(url, cached_file) + return torch.load(cached_file, map_location=map_location) + +def prepare_img(img): + return (img * IMG_SCALE - IMG_MEAN) / IMG_STD + +def batchnorm(in_planes): + "batch norm 2d" + return nn.BatchNorm2d(in_planes, affine=True, eps=1e-5, momentum=0.1) + +def conv3x3(in_planes, out_planes, stride=1, bias=False): + "3x3 convolution with padding" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=bias) + +def conv1x1(in_planes, out_planes, stride=1, bias=False): + "1x1 convolution" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, + padding=0, bias=bias) + +def convbnrelu(in_planes, out_planes, kernel_size, stride=1, groups=1, act=True): + "conv-batchnorm-relu" + if act: + return nn.Sequential(nn.Conv2d(in_planes, out_planes, kernel_size, stride=stride, padding=int(kernel_size / 2.), groups=groups, bias=False), + batchnorm(out_planes), + nn.ReLU6(inplace=True)) + else: + return nn.Sequential(nn.Conv2d(in_planes, out_planes, kernel_size, stride=stride, padding=int(kernel_size / 2.), groups=groups, bias=False), + batchnorm(out_planes)) + +class CRPBlock(nn.Module): + + def __init__(self, in_planes, out_planes, n_stages, max_pooling=True): + super(CRPBlock, self).__init__() + for i in range(n_stages): + setattr(self, '{}_{}'.format(i + 1, 'outvar_dimred'), + conv3x3(in_planes if (i == 0) else out_planes, + out_planes, stride=1, + bias=False)) + self.stride = 1 + self.n_stages = n_stages + if max_pooling: self.maxpool = nn.MaxPool2d(kernel_size=5, stride=1, padding=2) + else: self.maxpool = nn.MaxPool2d(kernel_size=1, stride=1, padding=0) + + def forward(self, x): + top = x + for i in range(self.n_stages): + top = self.maxpool(top) + top = getattr(self, '{}_{}'.format(i + 1, 'outvar_dimred'))(top) + x = top + x + return x + +stages_suffixes = {0 : '_conv', + 1 : '_conv_relu_varout_dimred'} + +class RCUBlock(nn.Module): + + def __init__(self, in_planes, out_planes, n_blocks, n_stages): + super(RCUBlock, self).__init__() + for i in range(n_blocks): + for j in range(n_stages): + setattr(self, '{}{}'.format(i + 1, stages_suffixes[j]), + conv3x3(in_planes if (i == 0) and (j == 0) else out_planes, + out_planes, stride=1, + bias=(j == 0))) + self.stride = 1 + self.n_blocks = n_blocks + self.n_stages = n_stages + + def forward(self, x): + for i in range(self.n_blocks): + residual = x + for j in range(self.n_stages): + x = F.relu(x) + x = getattr(self, '{}{}'.format(i + 1, stages_suffixes[j]))(x) + x += residual + return x + +data_info = { + 21: 'VOC', + } + +models_urls = { + '101_voc' : 'https://cloudstor.aarnet.edu.au/plus/s/Owmttk9bdPROwc6/download', + + '101_imagenet': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + } + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class RefineNet(nn.Module): + + def __init__(self, block, layers, num_classes=21): + self.inplanes = 64 + super(RefineNet, self).__init__() + self.do = nn.Dropout(p=0.5) + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.p_ims1d2_outl1_dimred = conv3x3(2048, 512, bias=False) + self.adapt_stage1_b = self._make_rcu(512, 512, 2, 2) + self.mflow_conv_g1_pool = self._make_crp(512, 512, 4) + self.mflow_conv_g1_b = self._make_rcu(512, 512, 3, 2) + self.mflow_conv_g1_b3_joint_varout_dimred = conv3x3(512, 256, bias=False) + self.p_ims1d2_outl2_dimred = conv3x3(1024, 256, bias=False) + self.adapt_stage2_b = self._make_rcu(256, 256, 2, 2) + self.adapt_stage2_b2_joint_varout_dimred = conv3x3(256, 256, bias=False) + self.mflow_conv_g2_pool = self._make_crp(256, 256, 4) + self.mflow_conv_g2_b = self._make_rcu(256, 256, 3, 2) + self.mflow_conv_g2_b3_joint_varout_dimred = conv3x3(256, 256, bias=False) + + self.p_ims1d2_outl3_dimred = conv3x3(512, 256, bias=False) + self.adapt_stage3_b = self._make_rcu(256, 256, 2, 2) + self.adapt_stage3_b2_joint_varout_dimred = conv3x3(256, 256, bias=False) + self.mflow_conv_g3_pool = self._make_crp(256, 256, 4) + self.mflow_conv_g3_b = self._make_rcu(256, 256, 3, 2) + self.mflow_conv_g3_b3_joint_varout_dimred = conv3x3(256, 256, bias=False) + + self.p_ims1d2_outl4_dimred = conv3x3(256, 256, bias=False) + self.adapt_stage4_b = self._make_rcu(256, 256, 2, 2) + self.adapt_stage4_b2_joint_varout_dimred = conv3x3(256, 256, bias=False) + self.mflow_conv_g4_pool = self._make_crp(256, 256, 4, max_pooling=False) + self.mflow_conv_g4_b = self._make_rcu(256, 256, 3, 2) + + self.clf_conv = nn.Conv2d(256, num_classes, kernel_size=3, stride=1, + padding=1, bias=True) + + def _make_crp(self, in_planes, out_planes, stages, max_pooling=True): + layers = [CRPBlock(in_planes, out_planes, stages, max_pooling)] + return nn.Sequential(*layers) + + def _make_rcu(self, in_planes, out_planes, blocks, stages): + layers = [RCUBlock(in_planes, out_planes, blocks, stages)] + return nn.Sequential(*layers) + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + l1 = self.layer1(x) + l2 = self.layer2(l1) + l3 = self.layer3(l2) + l4 = self.layer4(l3) + + l4 = self.do(l4) + l3 = self.do(l3) + + + x4 = self.p_ims1d2_outl1_dimred(l4) + x4 = self.adapt_stage1_b(x4) + x4 = self.relu(x4) + x4 = self.mflow_conv_g1_pool(x4) + x4 = self.mflow_conv_g1_b(x4) + x4 = self.mflow_conv_g1_b3_joint_varout_dimred(x4) + x4 = nn.Upsample(size=l3.size()[2:], mode='bilinear', align_corners=True)(x4) + + x3 = self.p_ims1d2_outl2_dimred(l3) + x3 = self.adapt_stage2_b(x3) + x3 = self.adapt_stage2_b2_joint_varout_dimred(x3) + x3 = x3 + x4 + x3 = F.relu(x3) + x3 = self.mflow_conv_g2_pool(x3) + x3 = self.mflow_conv_g2_b(x3) + x3 = self.mflow_conv_g2_b3_joint_varout_dimred(x3) + x3 = nn.Upsample(size=l2.size()[2:], mode='bilinear', align_corners=True)(x3) + + x2 = self.p_ims1d2_outl3_dimred(l2) + x2 = self.adapt_stage3_b(x2) + x2 = self.adapt_stage3_b2_joint_varout_dimred(x2) + x2 = x2 + x3 + x2 = F.relu(x2) + x2 = self.mflow_conv_g3_pool(x2) + x2 = self.mflow_conv_g3_b(x2) + x2 = self.mflow_conv_g3_b3_joint_varout_dimred(x2) + x2 = nn.Upsample(size=l1.size()[2:], mode='bilinear', align_corners=True)(x2) + + + x1 = self.p_ims1d2_outl4_dimred(l1) + x1 = self.adapt_stage4_b(x1) + x1 = self.adapt_stage4_b2_joint_varout_dimred(x1) + x1 = x1 + x2 + x1 = F.relu(x1) + + x1 = self.mflow_conv_g4_pool(x1) + x1 = self.mflow_conv_g4_b(x1) + x1 = self.do(x1) + + out = self.clf_conv(x1) + return out + + +def rf101(num_classes, imagenet=False, pretrained=True, **kwargs): + model = RefineNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, **kwargs) + if imagenet: + key = '101_imagenet' + url = models_urls[key] + model.load_state_dict(maybe_download(key, url), strict=False) + elif pretrained: + dataset = data_info.get(num_classes, None) + if dataset: + bname = '101_' + dataset.lower() + key = 'rf' + bname + url = models_urls[bname] + model.load_state_dict(maybe_download(key, url), strict=False) return model \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/RefineNet/modelzoo_level.txt b/PyTorch/contrib/cv/semantic_segmentation/RefineNet/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/RefineNet/modelzoo_level.txt +++ b/PyTorch/contrib/cv/semantic_segmentation/RefineNet/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/RefineNet/src/load_dataset.py b/PyTorch/contrib/cv/semantic_segmentation/RefineNet/src/load_dataset.py index c011e1f9f3b61b059fa419cf134ec46adfe77913..b74edc85e57013c4bcd2d598b79bb731ad8bf5e5 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/RefineNet/src/load_dataset.py +++ b/PyTorch/contrib/cv/semantic_segmentation/RefineNet/src/load_dataset.py @@ -1,73 +1,73 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import os -import re -import sys -from tqdm import tqdm -from time import time -sys.path.append('./') -# general libs -import logging -import numpy as np - -# pytorch libs -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.utils.data import DataLoader - -# densetorch wrapper -import densetorch as dt - -# configuration for light-weight refinenet -from arguments import get_arguments -from data import get_datasets, get_transforms -from network import get_segmenter -from optimisers import get_optimisers, get_lr_schedulers -from apex import amp -import torch.multiprocessing as mp - -def setup_data_loaders(args): - train_transforms, val_transforms = get_transforms( - crop_size=args.crop_size, - shorter_side=args.shorter_side, - low_scale=args.low_scale, - high_scale=args.high_scale, - img_mean=args.img_mean, - img_std=args.img_std, - img_scale=args.img_scale, - ignore_label=args.ignore_label, - num_stages=args.num_stages, - augmentations_type=args.augmentations_type, - dataset_type=args.dataset_type, - ) - train_sets, val_set = get_datasets( - train_dir=args.train_dir, - val_dir=args.val_dir, - train_list_path=args.train_list_path, - val_list_path=args.val_list_path, - train_transforms=train_transforms, - val_transforms=val_transforms, - masks_names=("segm",), - dataset_type=args.dataset_type, - stage_names=args.stage_names, - train_download=args.train_download, - val_download=args.val_download, - ) - - -args = get_arguments() -setup_data_loaders(args) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import os +import re +import sys +from tqdm import tqdm +from time import time +sys.path.append('./') +# general libs +import logging +import numpy as np + +# pytorch libs +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.utils.data import DataLoader + +# densetorch wrapper +import densetorch as dt + +# configuration for light-weight refinenet +from arguments import get_arguments +from data import get_datasets, get_transforms +from network import get_segmenter +from optimisers import get_optimisers, get_lr_schedulers +from apex import amp +import torch.multiprocessing as mp + +def setup_data_loaders(args): + train_transforms, val_transforms = get_transforms( + crop_size=args.crop_size, + shorter_side=args.shorter_side, + low_scale=args.low_scale, + high_scale=args.high_scale, + img_mean=args.img_mean, + img_std=args.img_std, + img_scale=args.img_scale, + ignore_label=args.ignore_label, + num_stages=args.num_stages, + augmentations_type=args.augmentations_type, + dataset_type=args.dataset_type, + ) + train_sets, val_set = get_datasets( + train_dir=args.train_dir, + val_dir=args.val_dir, + train_list_path=args.train_list_path, + val_list_path=args.val_list_path, + train_transforms=train_transforms, + val_transforms=val_transforms, + masks_names=("segm",), + dataset_type=args.dataset_type, + stage_names=args.stage_names, + train_download=args.train_download, + val_download=args.val_download, + ) + + +args = get_arguments() +setup_data_loaders(args) + diff --git a/PyTorch/contrib/cv/semantic_segmentation/SETR/env_npu.sh b/PyTorch/contrib/cv/semantic_segmentation/SETR/env_npu.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/SETR/mmcv-need/fp16_utils.py b/PyTorch/contrib/cv/semantic_segmentation/SETR/mmcv-need/fp16_utils.py index 44d1bdd7d86e9736bb2afde0d6fd01a4d35bbee2..b1bd4a135221e28ecda9e955c111ce9c6ed6eaf2 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/SETR/mmcv-need/fp16_utils.py +++ b/PyTorch/contrib/cv/semantic_segmentation/SETR/mmcv-need/fp16_utils.py @@ -1,381 +1,381 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -import functools -import warnings -from collections import abc -from inspect import getfullargspec - -import numpy as np -import torch -import torch.nn as nn - -from .dist_utils import allreduce_grads as _allreduce_grads - - -def cast_tensor_type(inputs, src_type, dst_type): - """Recursively convert Tensor in inputs from src_type to dst_type. - - Args: - inputs: Inputs that to be casted. - src_type (torch.dtype): Source type.. - dst_type (torch.dtype): Destination type. - - Returns: - The same type with inputs, but all contained Tensors have been cast. - """ - if isinstance(inputs, torch.Tensor): - return inputs.to(dst_type) - elif isinstance(inputs, str): - return inputs - elif isinstance(inputs, np.ndarray): - return inputs - elif isinstance(inputs, abc.Mapping): - return type(inputs)({ - k: cast_tensor_type(v, src_type, dst_type) - for k, v in inputs.items() - }) - elif isinstance(inputs, abc.Iterable): - return type(inputs)( - cast_tensor_type(item, src_type, dst_type) for item in inputs) - else: - return inputs - - -def auto_fp16(apply_to=None, out_fp32=False): - """Decorator to enable fp16 training automatically. - - This decorator is useful when you write custom modules and want to support - mixed precision training. If inputs arguments are fp32 tensors, they will - be converted to fp16 automatically. Arguments other than fp32 tensors are - ignored. - - Args: - apply_to (Iterable, optional): The argument names to be converted. - `None` indicates all arguments. - out_fp32 (bool): Whether to convert the output back to fp32. - - Example: - - >>> import torch.nn as nn - >>> class MyModule1(nn.Module): - >>> - >>> # Convert x and y to fp16 - >>> @auto_fp16() - >>> def forward(self, x, y): - >>> pass - - >>> import torch.nn as nn - >>> class MyModule2(nn.Module): - >>> - >>> # convert pred to fp16 - >>> @auto_fp16(apply_to=('pred', )) - >>> def do_something(self, pred, others): - >>> pass - """ - - def auto_fp16_wrapper(old_func): - - @functools.wraps(old_func) - def new_func(*args, **kwargs): - # check if the module has set the attribute `fp16_enabled`, if not, - # just fallback to the original method. - if not isinstance(args[0], torch.nn.Module): - raise TypeError('@auto_fp16 can only be used to decorate the ' - 'method of nn.Module') - if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): - return old_func(*args, **kwargs) - # get the arg spec of the decorated method - args_info = getfullargspec(old_func) - # get the argument names to be casted - args_to_cast = args_info.args if apply_to is None else apply_to - # convert the args that need to be processed - new_args = [] - # NOTE: default args are not taken into consideration - if args: - arg_names = args_info.args[:len(args)] - for i, arg_name in enumerate(arg_names): - if arg_name in args_to_cast: - new_args.append( - cast_tensor_type(args[i], torch.float, torch.half)) - else: - new_args.append(args[i]) - # convert the kwargs that need to be processed - new_kwargs = {} - if kwargs: - for arg_name, arg_value in kwargs.items(): - if arg_name in args_to_cast: - new_kwargs[arg_name] = cast_tensor_type( - arg_value, torch.float, torch.half) - else: - new_kwargs[arg_name] = arg_value - # apply converted arguments to the decorated method - output = old_func(*new_args, **new_kwargs) - # cast the results back to fp32 if necessary - if out_fp32: - output = cast_tensor_type(output, torch.half, torch.float) - return output - - return new_func - - return auto_fp16_wrapper - - -def force_fp32(apply_to=None, out_fp16=False): - """Decorator to convert input arguments to fp32 in force. - - This decorator is useful when you write custom modules and want to support - mixed precision training. If there are some inputs that must be processed - in fp32 mode, then this decorator can handle it. If inputs arguments are - fp16 tensors, they will be converted to fp32 automatically. Arguments other - than fp16 tensors are ignored. - - Args: - apply_to (Iterable, optional): The argument names to be converted. - `None` indicates all arguments. - out_fp16 (bool): Whether to convert the output back to fp16. - - Example: - - >>> import torch.nn as nn - >>> class MyModule1(nn.Module): - >>> - >>> # Convert x and y to fp32 - >>> @force_fp32() - >>> def loss(self, x, y): - >>> pass - - >>> import torch.nn as nn - >>> class MyModule2(nn.Module): - >>> - >>> # convert pred to fp32 - >>> @force_fp32(apply_to=('pred', )) - >>> def post_process(self, pred, others): - >>> pass - """ - - def force_fp32_wrapper(old_func): - - @functools.wraps(old_func) - def new_func(*args, **kwargs): - # check if the module has set the attribute `fp16_enabled`, if not, - # just fallback to the original method. - if not isinstance(args[0], torch.nn.Module): - raise TypeError('@force_fp32 can only be used to decorate the ' - 'method of nn.Module') - if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): - return old_func(*args, **kwargs) - # get the arg spec of the decorated method - args_info = getfullargspec(old_func) - # get the argument names to be casted - args_to_cast = args_info.args if apply_to is None else apply_to - # convert the args that need to be processed - new_args = [] - if args: - arg_names = args_info.args[:len(args)] - for i, arg_name in enumerate(arg_names): - if arg_name in args_to_cast: - new_args.append( - cast_tensor_type(args[i], torch.half, torch.float)) - else: - new_args.append(args[i]) - # convert the kwargs that need to be processed - new_kwargs = dict() - if kwargs: - for arg_name, arg_value in kwargs.items(): - if arg_name in args_to_cast: - new_kwargs[arg_name] = cast_tensor_type( - arg_value, torch.half, torch.float) - else: - new_kwargs[arg_name] = arg_value - # apply converted arguments to the decorated method - output = old_func(*new_args, **new_kwargs) - # cast the results back to fp32 if necessary - if out_fp16: - output = cast_tensor_type(output, torch.float, torch.half) - return output - - return new_func - - return force_fp32_wrapper - - -def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): - warnings.warning( - '"mmcv.runner.fp16_utils.allreduce_grads" is deprecated, and will be ' - 'removed in v2.8. Please switch to "mmcv.runner.allreduce_grads') - _allreduce_grads(params, coalesce=coalesce, bucket_size_mb=bucket_size_mb) - - -def wrap_fp16_model(model): - """Wrap the FP32 model to FP16. - - 1. Convert FP32 model to FP16. - 2. Remain some necessary layers to be FP32, e.g., normalization layers. - - Args: - model (nn.Module): Model in FP32. - """ - # convert model to fp16 - model.half() - # patch the normalization layers to make it work in fp32 mode - patch_norm_fp32(model) - # set `fp16_enabled` flag - for m in model.modules(): - if hasattr(m, 'fp16_enabled'): - m.fp16_enabled = True - - -def patch_norm_fp32(module): - """Recursively convert normalization layers from FP16 to FP32. - - Args: - module (nn.Module): The modules to be converted in FP16. - - Returns: - nn.Module: The converted module, the normalization layers have been - converted to FP32. - """ - if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm, nn.LayerNorm)): - module.float() - # if isinstance(module, nn.GroupNorm) or torch.__version__ < '1.3': - module.forward = patch_forward_method(module.forward, torch.half, - torch.float) - for child in module.children(): - patch_norm_fp32(child) - return module - - -def patch_forward_method(func, src_type, dst_type, convert_output=True): - """Patch the forward method of a module. - - Args: - func (callable): The original forward method. - src_type (torch.dtype): Type of input arguments to be converted from. - dst_type (torch.dtype): Type of input arguments to be converted to. - convert_output (bool): Whether to convert the output back to src_type. - - Returns: - callable: The patched forward method. - """ - - def new_forward(*args, **kwargs): - output = func(*cast_tensor_type(args, src_type, dst_type), - **cast_tensor_type(kwargs, src_type, dst_type)) - if convert_output: - output = cast_tensor_type(output, dst_type, src_type) - return output - - return new_forward - - -class LossScaler: - """Class that manages loss scaling in mixed precision training which - supports both dynamic or static mode. - - The implementation refers to - https://github.com/NVIDIA/apex/blob/master/apex/fp16_utils/loss_scaler.py. - Indirectly, by supplying ``mode='dynamic'`` for dynamic loss scaling. - It's important to understand how :class:`LossScaler` operates. - Loss scaling is designed to combat the problem of underflowing - gradients encountered at long times when training fp16 networks. - Dynamic loss scaling begins by attempting a very high loss - scale. Ironically, this may result in OVERflowing gradients. - If overflowing gradients are encountered, :class:`FP16_Optimizer` then - skips the update step for this particular iteration/minibatch, - and :class:`LossScaler` adjusts the loss scale to a lower value. - If a certain number of iterations occur without overflowing gradients - detected,:class:`LossScaler` increases the loss scale once more. - In this way :class:`LossScaler` attempts to "ride the edge" of always - using the highest loss scale possible without incurring overflow. - - Args: - init_scale (float): Initial loss scale value, default: 2**32. - scale_factor (float): Factor used when adjusting the loss scale. - Default: 2. - mode (str): Loss scaling mode. 'dynamic' or 'static' - scale_window (int): Number of consecutive iterations without an - overflow to wait before increasing the loss scale. Default: 1000. - """ - - def __init__(self, - init_scale=2**32, - mode='dynamic', - scale_factor=2., - scale_window=1000): - self.cur_scale = init_scale - self.cur_iter = 0 - assert mode in ('dynamic', - 'static'), 'mode can only be dynamic or static' - self.mode = mode - self.last_overflow_iter = -1 - self.scale_factor = scale_factor - self.scale_window = scale_window - - def has_overflow(self, params): - """Check if params contain overflow.""" - if self.mode != 'dynamic': - return False - for p in params: - if p.grad is not None and LossScaler._has_inf_or_nan(p.grad.data): - return True - return False - - def _has_inf_or_nan(x): - """Check if params contain NaN.""" - try: - cpu_sum = float(x.float().sum()) - except RuntimeError as instance: - if 'value cannot be converted' not in instance.args[0]: - raise - return True - else: - if cpu_sum == float('inf') or cpu_sum == -float('inf') \ - or cpu_sum != cpu_sum: - return True - return False - - def update_scale(self, overflow): - """update the current loss scale value when overflow happens.""" - if self.mode != 'dynamic': - return - if overflow: - self.cur_scale = max(self.cur_scale / self.scale_factor, 1) - self.last_overflow_iter = self.cur_iter - else: - if (self.cur_iter - self.last_overflow_iter) % \ - self.scale_window == 0: - self.cur_scale *= self.scale_factor - self.cur_iter += 1 - - @property - def loss_scale(self): - return self.cur_scale +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +import functools +import warnings +from collections import abc +from inspect import getfullargspec + +import numpy as np +import torch +import torch.nn as nn + +from .dist_utils import allreduce_grads as _allreduce_grads + + +def cast_tensor_type(inputs, src_type, dst_type): + """Recursively convert Tensor in inputs from src_type to dst_type. + + Args: + inputs: Inputs that to be casted. + src_type (torch.dtype): Source type.. + dst_type (torch.dtype): Destination type. + + Returns: + The same type with inputs, but all contained Tensors have been cast. + """ + if isinstance(inputs, torch.Tensor): + return inputs.to(dst_type) + elif isinstance(inputs, str): + return inputs + elif isinstance(inputs, np.ndarray): + return inputs + elif isinstance(inputs, abc.Mapping): + return type(inputs)({ + k: cast_tensor_type(v, src_type, dst_type) + for k, v in inputs.items() + }) + elif isinstance(inputs, abc.Iterable): + return type(inputs)( + cast_tensor_type(item, src_type, dst_type) for item in inputs) + else: + return inputs + + +def auto_fp16(apply_to=None, out_fp32=False): + """Decorator to enable fp16 training automatically. + + This decorator is useful when you write custom modules and want to support + mixed precision training. If inputs arguments are fp32 tensors, they will + be converted to fp16 automatically. Arguments other than fp32 tensors are + ignored. + + Args: + apply_to (Iterable, optional): The argument names to be converted. + `None` indicates all arguments. + out_fp32 (bool): Whether to convert the output back to fp32. + + Example: + + >>> import torch.nn as nn + >>> class MyModule1(nn.Module): + >>> + >>> # Convert x and y to fp16 + >>> @auto_fp16() + >>> def forward(self, x, y): + >>> pass + + >>> import torch.nn as nn + >>> class MyModule2(nn.Module): + >>> + >>> # convert pred to fp16 + >>> @auto_fp16(apply_to=('pred', )) + >>> def do_something(self, pred, others): + >>> pass + """ + + def auto_fp16_wrapper(old_func): + + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # check if the module has set the attribute `fp16_enabled`, if not, + # just fallback to the original method. + if not isinstance(args[0], torch.nn.Module): + raise TypeError('@auto_fp16 can only be used to decorate the ' + 'method of nn.Module') + if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): + return old_func(*args, **kwargs) + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get the argument names to be casted + args_to_cast = args_info.args if apply_to is None else apply_to + # convert the args that need to be processed + new_args = [] + # NOTE: default args are not taken into consideration + if args: + arg_names = args_info.args[:len(args)] + for i, arg_name in enumerate(arg_names): + if arg_name in args_to_cast: + new_args.append( + cast_tensor_type(args[i], torch.float, torch.half)) + else: + new_args.append(args[i]) + # convert the kwargs that need to be processed + new_kwargs = {} + if kwargs: + for arg_name, arg_value in kwargs.items(): + if arg_name in args_to_cast: + new_kwargs[arg_name] = cast_tensor_type( + arg_value, torch.float, torch.half) + else: + new_kwargs[arg_name] = arg_value + # apply converted arguments to the decorated method + output = old_func(*new_args, **new_kwargs) + # cast the results back to fp32 if necessary + if out_fp32: + output = cast_tensor_type(output, torch.half, torch.float) + return output + + return new_func + + return auto_fp16_wrapper + + +def force_fp32(apply_to=None, out_fp16=False): + """Decorator to convert input arguments to fp32 in force. + + This decorator is useful when you write custom modules and want to support + mixed precision training. If there are some inputs that must be processed + in fp32 mode, then this decorator can handle it. If inputs arguments are + fp16 tensors, they will be converted to fp32 automatically. Arguments other + than fp16 tensors are ignored. + + Args: + apply_to (Iterable, optional): The argument names to be converted. + `None` indicates all arguments. + out_fp16 (bool): Whether to convert the output back to fp16. + + Example: + + >>> import torch.nn as nn + >>> class MyModule1(nn.Module): + >>> + >>> # Convert x and y to fp32 + >>> @force_fp32() + >>> def loss(self, x, y): + >>> pass + + >>> import torch.nn as nn + >>> class MyModule2(nn.Module): + >>> + >>> # convert pred to fp32 + >>> @force_fp32(apply_to=('pred', )) + >>> def post_process(self, pred, others): + >>> pass + """ + + def force_fp32_wrapper(old_func): + + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # check if the module has set the attribute `fp16_enabled`, if not, + # just fallback to the original method. + if not isinstance(args[0], torch.nn.Module): + raise TypeError('@force_fp32 can only be used to decorate the ' + 'method of nn.Module') + if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): + return old_func(*args, **kwargs) + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get the argument names to be casted + args_to_cast = args_info.args if apply_to is None else apply_to + # convert the args that need to be processed + new_args = [] + if args: + arg_names = args_info.args[:len(args)] + for i, arg_name in enumerate(arg_names): + if arg_name in args_to_cast: + new_args.append( + cast_tensor_type(args[i], torch.half, torch.float)) + else: + new_args.append(args[i]) + # convert the kwargs that need to be processed + new_kwargs = dict() + if kwargs: + for arg_name, arg_value in kwargs.items(): + if arg_name in args_to_cast: + new_kwargs[arg_name] = cast_tensor_type( + arg_value, torch.half, torch.float) + else: + new_kwargs[arg_name] = arg_value + # apply converted arguments to the decorated method + output = old_func(*new_args, **new_kwargs) + # cast the results back to fp32 if necessary + if out_fp16: + output = cast_tensor_type(output, torch.float, torch.half) + return output + + return new_func + + return force_fp32_wrapper + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + warnings.warning( + '"mmcv.runner.fp16_utils.allreduce_grads" is deprecated, and will be ' + 'removed in v2.8. Please switch to "mmcv.runner.allreduce_grads') + _allreduce_grads(params, coalesce=coalesce, bucket_size_mb=bucket_size_mb) + + +def wrap_fp16_model(model): + """Wrap the FP32 model to FP16. + + 1. Convert FP32 model to FP16. + 2. Remain some necessary layers to be FP32, e.g., normalization layers. + + Args: + model (nn.Module): Model in FP32. + """ + # convert model to fp16 + model.half() + # patch the normalization layers to make it work in fp32 mode + patch_norm_fp32(model) + # set `fp16_enabled` flag + for m in model.modules(): + if hasattr(m, 'fp16_enabled'): + m.fp16_enabled = True + + +def patch_norm_fp32(module): + """Recursively convert normalization layers from FP16 to FP32. + + Args: + module (nn.Module): The modules to be converted in FP16. + + Returns: + nn.Module: The converted module, the normalization layers have been + converted to FP32. + """ + if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm, nn.LayerNorm)): + module.float() + # if isinstance(module, nn.GroupNorm) or torch.__version__ < '1.3': + module.forward = patch_forward_method(module.forward, torch.half, + torch.float) + for child in module.children(): + patch_norm_fp32(child) + return module + + +def patch_forward_method(func, src_type, dst_type, convert_output=True): + """Patch the forward method of a module. + + Args: + func (callable): The original forward method. + src_type (torch.dtype): Type of input arguments to be converted from. + dst_type (torch.dtype): Type of input arguments to be converted to. + convert_output (bool): Whether to convert the output back to src_type. + + Returns: + callable: The patched forward method. + """ + + def new_forward(*args, **kwargs): + output = func(*cast_tensor_type(args, src_type, dst_type), + **cast_tensor_type(kwargs, src_type, dst_type)) + if convert_output: + output = cast_tensor_type(output, dst_type, src_type) + return output + + return new_forward + + +class LossScaler: + """Class that manages loss scaling in mixed precision training which + supports both dynamic or static mode. + + The implementation refers to + https://github.com/NVIDIA/apex/blob/master/apex/fp16_utils/loss_scaler.py. + Indirectly, by supplying ``mode='dynamic'`` for dynamic loss scaling. + It's important to understand how :class:`LossScaler` operates. + Loss scaling is designed to combat the problem of underflowing + gradients encountered at long times when training fp16 networks. + Dynamic loss scaling begins by attempting a very high loss + scale. Ironically, this may result in OVERflowing gradients. + If overflowing gradients are encountered, :class:`FP16_Optimizer` then + skips the update step for this particular iteration/minibatch, + and :class:`LossScaler` adjusts the loss scale to a lower value. + If a certain number of iterations occur without overflowing gradients + detected,:class:`LossScaler` increases the loss scale once more. + In this way :class:`LossScaler` attempts to "ride the edge" of always + using the highest loss scale possible without incurring overflow. + + Args: + init_scale (float): Initial loss scale value, default: 2**32. + scale_factor (float): Factor used when adjusting the loss scale. + Default: 2. + mode (str): Loss scaling mode. 'dynamic' or 'static' + scale_window (int): Number of consecutive iterations without an + overflow to wait before increasing the loss scale. Default: 1000. + """ + + def __init__(self, + init_scale=2**32, + mode='dynamic', + scale_factor=2., + scale_window=1000): + self.cur_scale = init_scale + self.cur_iter = 0 + assert mode in ('dynamic', + 'static'), 'mode can only be dynamic or static' + self.mode = mode + self.last_overflow_iter = -1 + self.scale_factor = scale_factor + self.scale_window = scale_window + + def has_overflow(self, params): + """Check if params contain overflow.""" + if self.mode != 'dynamic': + return False + for p in params: + if p.grad is not None and LossScaler._has_inf_or_nan(p.grad.data): + return True + return False + + def _has_inf_or_nan(x): + """Check if params contain NaN.""" + try: + cpu_sum = float(x.float().sum()) + except RuntimeError as instance: + if 'value cannot be converted' not in instance.args[0]: + raise + return True + else: + if cpu_sum == float('inf') or cpu_sum == -float('inf') \ + or cpu_sum != cpu_sum: + return True + return False + + def update_scale(self, overflow): + """update the current loss scale value when overflow happens.""" + if self.mode != 'dynamic': + return + if overflow: + self.cur_scale = max(self.cur_scale / self.scale_factor, 1) + self.last_overflow_iter = self.cur_iter + else: + if (self.cur_iter - self.last_overflow_iter) % \ + self.scale_window == 0: + self.cur_scale *= self.scale_factor + self.cur_iter += 1 + + @property + def loss_scale(self): + return self.cur_scale diff --git a/PyTorch/contrib/cv/semantic_segmentation/SETR/tools/dist_train.sh b/PyTorch/contrib/cv/semantic_segmentation/SETR/tools/dist_train.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/semantic_segmentation/UNet_for_PyTorch/modelzoo_level.txt b/PyTorch/contrib/cv/semantic_segmentation/UNet_for_PyTorch/modelzoo_level.txt index a17c8f95fa388fbc6d253e2cd7cfd0b73b734073..a829ab59b97a1022dd6fc33b59b7ae0d55009432 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/UNet_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/contrib/cv/semantic_segmentation/UNet_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/Wseg/configs/voc_resnet38.yaml b/PyTorch/contrib/cv/semantic_segmentation/Wseg/configs/voc_resnet38.yaml index 7870288e6fe79fbd17946c4e5ee5036bba63a379..b89a2dd6539132dca08fc3a7dc7efc953974fe61 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/Wseg/configs/voc_resnet38.yaml +++ b/PyTorch/contrib/cv/semantic_segmentation/Wseg/configs/voc_resnet38.yaml @@ -1,32 +1,32 @@ -NUM_GPUS: 1 -DATASET: - CROP_SIZE: 321 - SCALE_FROM: 0.9 - SCALE_TO: 1.0 -TRAIN: - BATCH_SIZE: 16 - NUM_EPOCHS: 24 - NUM_WORKERS: 8 - PRETRAIN: 5 -NET: - BACKBONE: "resnet38" - MODEL: "ae" - PRE_WEIGHTS_PATH: "./models/weights/ilsvrc-cls_rna-a1_cls1000_ep-0001.pth" - LR: 0.001 - OPT: "SGD" - LOSS: "SoftMargin" - WEIGHT_DECAY: 0.0005 - PAMR_ITER: 10 - FOCAL_LAMBDA: 0.01 - FOCAL_P: 3 - SG_PSI: 0.3 -TEST: - METHOD: "multiscale" - DATA_ROOT: "./data" - FLIP: True - BATCH_SIZE: 8 - PAD_SIZE: [1024, 1024] - SCALES: [1, 0.5, 1.5, 2.0] - FP_CUT_SCORE: 0.1 - BG_POW: 3 - USE_GT_LABELS: False +NUM_GPUS: 1 +DATASET: + CROP_SIZE: 321 + SCALE_FROM: 0.9 + SCALE_TO: 1.0 +TRAIN: + BATCH_SIZE: 16 + NUM_EPOCHS: 24 + NUM_WORKERS: 8 + PRETRAIN: 5 +NET: + BACKBONE: "resnet38" + MODEL: "ae" + PRE_WEIGHTS_PATH: "./models/weights/ilsvrc-cls_rna-a1_cls1000_ep-0001.pth" + LR: 0.001 + OPT: "SGD" + LOSS: "SoftMargin" + WEIGHT_DECAY: 0.0005 + PAMR_ITER: 10 + FOCAL_LAMBDA: 0.01 + FOCAL_P: 3 + SG_PSI: 0.3 +TEST: + METHOD: "multiscale" + DATA_ROOT: "./data" + FLIP: True + BATCH_SIZE: 8 + PAD_SIZE: [1024, 1024] + SCALES: [1, 0.5, 1.5, 2.0] + FP_CUT_SCORE: 0.1 + BG_POW: 3 + USE_GT_LABELS: False diff --git a/PyTorch/contrib/cv/semantic_segmentation/Wseg/core/config.py b/PyTorch/contrib/cv/semantic_segmentation/Wseg/core/config.py index dee2341357703ac9083ef5869b11ce6d4c2cb9e8..fcc5d5b80853e824835467cd836e03aba16ea6f4 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/Wseg/core/config.py +++ b/PyTorch/contrib/cv/semantic_segmentation/Wseg/core/config.py @@ -1,261 +1,261 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - - -import yaml -import six -import os -import os.path as osp -import copy -from ast import literal_eval - -import numpy as np -from packaging import version - -from utils.collections import AttrDict - -__C = AttrDict() -# Consumers can get config by: -# from fast_rcnn_config import cfg -cfg = __C - -__C.NUM_GPUS = 1 -# Random note: avoid using '.ON' as a config key since yaml converts it to True; -# prefer 'ENABLED' instead - -# ---------------------------------------------------------------------------- # -# Training options -# ---------------------------------------------------------------------------- # -__C.TRAIN = AttrDict() -__C.TRAIN.BATCH_SIZE = 20 -__C.TRAIN.NUM_EPOCHS = 15 -__C.TRAIN.NUM_WORKERS = 4 -__C.TRAIN.MASK_LOSS = 0.0 -__C.TRAIN.PRETRAIN = 5 - -# ---------------------------------------------------------------------------- # -# Inference options -# ---------------------------------------------------------------------------- # -__C.TEST = AttrDict() -__C.TEST.METHOD = "multiscale" # multiscale | crop -__C.TEST.DATA_ROOT = "/data/your_directory" -__C.TEST.SCALES = [1, 0.5, 1.5, 2.0] -__C.TEST.FLIP = True -__C.TEST.PAD_SIZE = [1024, 1024] -__C.TEST.CROP_SIZE = [448, 448] -__C.TEST.CROP_GRID_SIZE = [2, 2] -__C.TEST.BATCH_SIZE = 8 -__C.TEST.BG_POW = 3 -__C.TEST.NUM_CLASSES = 21 - -# use ground-truth labels to remove -# false positive masks -__C.TEST.USE_GT_LABELS = False - -# if class confidence does not exceed this threshold -# the mask is removed (count as false positive) -# used only if MASKS.USE_GT_LABELS is False -__C.TEST.FP_CUT_SCORE = 0.1 - -# ---------------------------------------------------------------------------- # -# Dataset options -# ---------------------------------------------------------------------------- # -__C.DATASET = AttrDict() - -__C.DATASET.CROP_SIZE = 321 -__C.DATASET.SCALE_FROM = 0.9 -__C.DATASET.SCALE_TO = 1.0 -__C.DATASET.PATH = "data/images" - -# ---------------------------------------------------------------------------- # -# Network options -# ---------------------------------------------------------------------------- # -__C.NET = AttrDict() -__C.NET.MODEL = 'vgg16' -__C.NET.BACKBONE = 'resnet50' -__C.NET.PRE_WEIGHTS_PATH = "" -__C.NET.OPT = 'SGD' -__C.NET.LR = 0.001 -__C.NET.BETA1 = 0.5 -__C.NET.MOMENTUM = 0.9 -__C.NET.WEIGHT_DECAY = 1e-5 -__C.NET.LOSS = 'SoftMargin' -__C.NET.MASK_LOSS_BCE = 1.0 -__C.NET.BG_SCORE = 0.1 # background score (only for CAM) -__C.NET.FOCAL_P = 3 -__C.NET.FOCAL_LAMBDA = 0.01 -__C.NET.PAMR_KERNEL = [1, 2, 4, 8, 12, 24] -__C.NET.PAMR_ITER = 10 -__C.NET.SG_PSI = 0.3 - -# Mask Inference -__C.MASKS = AttrDict() - -# CRF options -__C.MASKS.CRF = AttrDict() -__C.MASKS.CRF.ALPHA_LOW = 4 -__C.MASKS.CRF.ALPHA_HIGH = 32 - -# [Infered value] -__C.CUDA = False - -__C.DEBUG = False - -# [Infered value] -__C.PYTORCH_VERSION_LESS_THAN_040 = False - -def assert_and_infer_cfg(make_immutable=True): - """Call this function in your script after you have finished setting all cfg - values that are necessary (e.g., merging a config from a file, merging - command line config options, etc.). By default, this function will also - mark the global cfg as immutable to prevent changing the global cfg settings - during script execution (which can lead to hard to debug errors or code - that's harder to understand than is necessary). - """ - if make_immutable: - cfg.immutable(True) - - -def merge_cfg_from_file(cfg_filename): - """Load a yaml config file and merge it into the global config.""" - with open(cfg_filename, 'r') as f: - if hasattr(yaml, "FullLoader"): - yaml_cfg = AttrDict(yaml.load(f, Loader=yaml.FullLoader)) - else: - yaml_cfg = AttrDict(yaml.load(f)) - - _merge_a_into_b(yaml_cfg, __C) - -cfg_from_file = merge_cfg_from_file - - -def merge_cfg_from_cfg(cfg_other): - """Merge `cfg_other` into the global config.""" - _merge_a_into_b(cfg_other, __C) - - -def merge_cfg_from_list(cfg_list): - """Merge config keys, values in a list (e.g., from command line) into the - global config. For example, `cfg_list = ['TEST.NMS', 0.5]`. - """ - assert len(cfg_list) % 2 == 0 - for full_key, v in zip(cfg_list[0::2], cfg_list[1::2]): - key_list = full_key.split('.') - d = __C - for subkey in key_list[:-1]: - assert subkey in d, 'Non-existent key: {}'.format(full_key) - d = d[subkey] - subkey = key_list[-1] - assert subkey in d, 'Non-existent key: {}'.format(full_key) - value = _decode_cfg_value(v) - value = _check_and_coerce_cfg_value_type( - value, d[subkey], subkey, full_key - ) - d[subkey] = value - -cfg_from_list = merge_cfg_from_list - - -def _merge_a_into_b(a, b, stack=None): - """Merge config dictionary a into config dictionary b, clobbering the - options in b whenever they are also specified in a. - """ - assert isinstance(a, AttrDict), 'Argument `a` must be an AttrDict' - assert isinstance(b, AttrDict), 'Argument `b` must be an AttrDict' - - for k, v_ in a.items(): - full_key = '.'.join(stack) + '.' + k if stack is not None else k - # a must specify keys that are in b - if k not in b: - raise KeyError('Non-existent config key: {}'.format(full_key)) - - v = copy.deepcopy(v_) - v = _decode_cfg_value(v) - v = _check_and_coerce_cfg_value_type(v, b[k], k, full_key) - - # Recursively merge dicts - if isinstance(v, AttrDict): - try: - stack_push = [k] if stack is None else stack + [k] - _merge_a_into_b(v, b[k], stack=stack_push) - except BaseException: - raise - else: - b[k] = v - - -def _decode_cfg_value(v): - """Decodes a raw config value (e.g., from a yaml config files or command - line argument) into a Python object. - """ - # Configs parsed from raw yaml will contain dictionary keys that need to be - # converted to AttrDict objects - if isinstance(v, dict): - return AttrDict(v) - # All remaining processing is only applied to strings - if not isinstance(v, six.string_types): - return v - # Try to interpret `v` as a: - # string, number, tuple, list, dict, boolean, or None - try: - v = literal_eval(v) - # The following two excepts allow v to pass through when it represents a - # string. - # - # Longer explanation: - # The type of v is always a string (before calling literal_eval), but - # sometimes it *represents* a string and other times a data structure, like - # a list. In the case that v represents a string, what we got back from the - # yaml parser is 'foo' *without quotes* (so, not '"foo"'). literal_eval is - # ok with '"foo"', but will raise a ValueError if given 'foo'. In other - # cases, like paths (v = 'foo/bar' and not v = '"foo/bar"'), literal_eval - # will raise a SyntaxError. - except ValueError: - pass - except SyntaxError: - pass - return v - - -def _check_and_coerce_cfg_value_type(value_a, value_b, key, full_key): - """Checks that `value_a`, which is intended to replace `value_b` is of the - right type. The type is correct if it matches exactly or is one of a few - cases in which the type can be easily coerced. - """ - # The types must match (with some exceptions) - type_b = type(value_b) - type_a = type(value_a) - if type_a is type_b: - return value_a - - # Exceptions: numpy arrays, strings, tuple<->list - if isinstance(value_b, np.ndarray): - value_a = np.array(value_a, dtype=value_b.dtype) - elif isinstance(value_b, six.string_types): - value_a = str(value_a) - elif isinstance(value_a, tuple) and isinstance(value_b, list): - value_a = list(value_a) - elif isinstance(value_a, list) and isinstance(value_b, tuple): - value_a = tuple(value_a) - else: - raise ValueError( - 'Type mismatch ({} vs. {}) with values ({} vs. {}) for config ' - 'key: {}'.format(type_b, type_a, value_b, value_a, full_key) - ) - return value_a +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + + +import yaml +import six +import os +import os.path as osp +import copy +from ast import literal_eval + +import numpy as np +from packaging import version + +from utils.collections import AttrDict + +__C = AttrDict() +# Consumers can get config by: +# from fast_rcnn_config import cfg +cfg = __C + +__C.NUM_GPUS = 1 +# Random note: avoid using '.ON' as a config key since yaml converts it to True; +# prefer 'ENABLED' instead + +# ---------------------------------------------------------------------------- # +# Training options +# ---------------------------------------------------------------------------- # +__C.TRAIN = AttrDict() +__C.TRAIN.BATCH_SIZE = 20 +__C.TRAIN.NUM_EPOCHS = 15 +__C.TRAIN.NUM_WORKERS = 4 +__C.TRAIN.MASK_LOSS = 0.0 +__C.TRAIN.PRETRAIN = 5 + +# ---------------------------------------------------------------------------- # +# Inference options +# ---------------------------------------------------------------------------- # +__C.TEST = AttrDict() +__C.TEST.METHOD = "multiscale" # multiscale | crop +__C.TEST.DATA_ROOT = "/data/your_directory" +__C.TEST.SCALES = [1, 0.5, 1.5, 2.0] +__C.TEST.FLIP = True +__C.TEST.PAD_SIZE = [1024, 1024] +__C.TEST.CROP_SIZE = [448, 448] +__C.TEST.CROP_GRID_SIZE = [2, 2] +__C.TEST.BATCH_SIZE = 8 +__C.TEST.BG_POW = 3 +__C.TEST.NUM_CLASSES = 21 + +# use ground-truth labels to remove +# false positive masks +__C.TEST.USE_GT_LABELS = False + +# if class confidence does not exceed this threshold +# the mask is removed (count as false positive) +# used only if MASKS.USE_GT_LABELS is False +__C.TEST.FP_CUT_SCORE = 0.1 + +# ---------------------------------------------------------------------------- # +# Dataset options +# ---------------------------------------------------------------------------- # +__C.DATASET = AttrDict() + +__C.DATASET.CROP_SIZE = 321 +__C.DATASET.SCALE_FROM = 0.9 +__C.DATASET.SCALE_TO = 1.0 +__C.DATASET.PATH = "data/images" + +# ---------------------------------------------------------------------------- # +# Network options +# ---------------------------------------------------------------------------- # +__C.NET = AttrDict() +__C.NET.MODEL = 'vgg16' +__C.NET.BACKBONE = 'resnet50' +__C.NET.PRE_WEIGHTS_PATH = "" +__C.NET.OPT = 'SGD' +__C.NET.LR = 0.001 +__C.NET.BETA1 = 0.5 +__C.NET.MOMENTUM = 0.9 +__C.NET.WEIGHT_DECAY = 1e-5 +__C.NET.LOSS = 'SoftMargin' +__C.NET.MASK_LOSS_BCE = 1.0 +__C.NET.BG_SCORE = 0.1 # background score (only for CAM) +__C.NET.FOCAL_P = 3 +__C.NET.FOCAL_LAMBDA = 0.01 +__C.NET.PAMR_KERNEL = [1, 2, 4, 8, 12, 24] +__C.NET.PAMR_ITER = 10 +__C.NET.SG_PSI = 0.3 + +# Mask Inference +__C.MASKS = AttrDict() + +# CRF options +__C.MASKS.CRF = AttrDict() +__C.MASKS.CRF.ALPHA_LOW = 4 +__C.MASKS.CRF.ALPHA_HIGH = 32 + +# [Infered value] +__C.CUDA = False + +__C.DEBUG = False + +# [Infered value] +__C.PYTORCH_VERSION_LESS_THAN_040 = False + +def assert_and_infer_cfg(make_immutable=True): + """Call this function in your script after you have finished setting all cfg + values that are necessary (e.g., merging a config from a file, merging + command line config options, etc.). By default, this function will also + mark the global cfg as immutable to prevent changing the global cfg settings + during script execution (which can lead to hard to debug errors or code + that's harder to understand than is necessary). + """ + if make_immutable: + cfg.immutable(True) + + +def merge_cfg_from_file(cfg_filename): + """Load a yaml config file and merge it into the global config.""" + with open(cfg_filename, 'r') as f: + if hasattr(yaml, "FullLoader"): + yaml_cfg = AttrDict(yaml.load(f, Loader=yaml.FullLoader)) + else: + yaml_cfg = AttrDict(yaml.load(f)) + + _merge_a_into_b(yaml_cfg, __C) + +cfg_from_file = merge_cfg_from_file + + +def merge_cfg_from_cfg(cfg_other): + """Merge `cfg_other` into the global config.""" + _merge_a_into_b(cfg_other, __C) + + +def merge_cfg_from_list(cfg_list): + """Merge config keys, values in a list (e.g., from command line) into the + global config. For example, `cfg_list = ['TEST.NMS', 0.5]`. + """ + assert len(cfg_list) % 2 == 0 + for full_key, v in zip(cfg_list[0::2], cfg_list[1::2]): + key_list = full_key.split('.') + d = __C + for subkey in key_list[:-1]: + assert subkey in d, 'Non-existent key: {}'.format(full_key) + d = d[subkey] + subkey = key_list[-1] + assert subkey in d, 'Non-existent key: {}'.format(full_key) + value = _decode_cfg_value(v) + value = _check_and_coerce_cfg_value_type( + value, d[subkey], subkey, full_key + ) + d[subkey] = value + +cfg_from_list = merge_cfg_from_list + + +def _merge_a_into_b(a, b, stack=None): + """Merge config dictionary a into config dictionary b, clobbering the + options in b whenever they are also specified in a. + """ + assert isinstance(a, AttrDict), 'Argument `a` must be an AttrDict' + assert isinstance(b, AttrDict), 'Argument `b` must be an AttrDict' + + for k, v_ in a.items(): + full_key = '.'.join(stack) + '.' + k if stack is not None else k + # a must specify keys that are in b + if k not in b: + raise KeyError('Non-existent config key: {}'.format(full_key)) + + v = copy.deepcopy(v_) + v = _decode_cfg_value(v) + v = _check_and_coerce_cfg_value_type(v, b[k], k, full_key) + + # Recursively merge dicts + if isinstance(v, AttrDict): + try: + stack_push = [k] if stack is None else stack + [k] + _merge_a_into_b(v, b[k], stack=stack_push) + except BaseException: + raise + else: + b[k] = v + + +def _decode_cfg_value(v): + """Decodes a raw config value (e.g., from a yaml config files or command + line argument) into a Python object. + """ + # Configs parsed from raw yaml will contain dictionary keys that need to be + # converted to AttrDict objects + if isinstance(v, dict): + return AttrDict(v) + # All remaining processing is only applied to strings + if not isinstance(v, six.string_types): + return v + # Try to interpret `v` as a: + # string, number, tuple, list, dict, boolean, or None + try: + v = literal_eval(v) + # The following two excepts allow v to pass through when it represents a + # string. + # + # Longer explanation: + # The type of v is always a string (before calling literal_eval), but + # sometimes it *represents* a string and other times a data structure, like + # a list. In the case that v represents a string, what we got back from the + # yaml parser is 'foo' *without quotes* (so, not '"foo"'). literal_eval is + # ok with '"foo"', but will raise a ValueError if given 'foo'. In other + # cases, like paths (v = 'foo/bar' and not v = '"foo/bar"'), literal_eval + # will raise a SyntaxError. + except ValueError: + pass + except SyntaxError: + pass + return v + + +def _check_and_coerce_cfg_value_type(value_a, value_b, key, full_key): + """Checks that `value_a`, which is intended to replace `value_b` is of the + right type. The type is correct if it matches exactly or is one of a few + cases in which the type can be easily coerced. + """ + # The types must match (with some exceptions) + type_b = type(value_b) + type_a = type(value_a) + if type_a is type_b: + return value_a + + # Exceptions: numpy arrays, strings, tuple<->list + if isinstance(value_b, np.ndarray): + value_a = np.array(value_a, dtype=value_b.dtype) + elif isinstance(value_b, six.string_types): + value_a = str(value_a) + elif isinstance(value_a, tuple) and isinstance(value_b, list): + value_a = list(value_a) + elif isinstance(value_a, list) and isinstance(value_b, tuple): + value_a = tuple(value_a) + else: + raise ValueError( + 'Type mismatch ({} vs. {}) with values ({} vs. {}) for config ' + 'key: {}'.format(type_b, type_a, value_b, value_a, full_key) + ) + return value_a diff --git a/PyTorch/contrib/cv/video/C3D/additional_need/mmcv/dist_utils.py b/PyTorch/contrib/cv/video/C3D/additional_need/mmcv/dist_utils.py index 91346643e8f70fb97a407de4b6fda417cd9fa6ee..9e3f2b072b230c45b051270b5730c30822c74a2c 100644 --- a/PyTorch/contrib/cv/video/C3D/additional_need/mmcv/dist_utils.py +++ b/PyTorch/contrib/cv/video/C3D/additional_need/mmcv/dist_utils.py @@ -1,185 +1,185 @@ -# Copyright (c) Open-MMLab. All rights reserved. -# -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import functools -import os -import subprocess -from collections import OrderedDict - -import torch -import torch.multiprocessing as mp -from torch import distributed as dist -from torch._utils import (_flatten_dense_tensors, _take_tensors, - _unflatten_dense_tensors) - -from mmcv.utils import TORCH_VERSION - - -def init_dist(launcher, backend='nccl', **kwargs): - if mp.get_start_method(allow_none=True) is None: - mp.set_start_method('spawn') - if launcher == 'pytorch': - _init_dist_pytorch(backend, **kwargs) - elif launcher == 'mpi': - _init_dist_mpi(backend, **kwargs) - elif launcher == 'slurm': - _init_dist_slurm(backend, **kwargs) - else: - raise ValueError(f'Invalid launcher type: {launcher}') - - -def _init_dist_pytorch(backend, **kwargs): - # TODO: use local_rank instead of rank % num_gpus - rank = int(os.environ['RANK']) - offset = 0 if os.getenv('NPUID', None) is None else int(os.environ['NPUID']) - num_gpus = torch.npu.device_count() - torch.npu.set_device((rank + offset) % num_gpus) - dist.init_process_group(backend=backend, **kwargs) - - -def _init_dist_mpi(backend, **kwargs): - # TODO: use local_rank instead of rank % num_gpus - rank = int(os.environ['OMPI_COMM_WORLD_RANK']) - num_gpus = torch.cuda.device_count() - torch.cuda.set_device(rank % num_gpus) - dist.init_process_group(backend=backend, **kwargs) - - -def _init_dist_slurm(backend, port=None): - """Initialize slurm distributed training environment. - If argument ``port`` is not specified, then the master port will be system - environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system - environment variable, then a default port ``29500`` will be used. - Args: - backend (str): Backend of torch.distributed. - port (int, optional): Master port. Defaults to None. - """ - proc_id = int(os.environ['SLURM_PROCID']) - ntasks = int(os.environ['SLURM_NTASKS']) - node_list = os.environ['SLURM_NODELIST'] - num_gpus = torch.cuda.device_count() - torch.cuda.set_device(proc_id % num_gpus) - addr = subprocess.getoutput( - f'scontrol show hostname {node_list} | head -n1') - # specify master port - if port is not None: - os.environ['MASTER_PORT'] = str(port) - elif 'MASTER_PORT' in os.environ: - pass # use MASTER_PORT in the environment variable - else: - # 29500 is torch.distributed default port - os.environ['MASTER_PORT'] = '29500' - # use MASTER_ADDR in the environment variable if it already exists - if 'MASTER_ADDR' not in os.environ: - os.environ['MASTER_ADDR'] = addr - os.environ['WORLD_SIZE'] = str(ntasks) - os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) - os.environ['RANK'] = str(proc_id) - dist.init_process_group(backend=backend) - - -def get_dist_info(): - if TORCH_VERSION < '1.0': - initialized = dist._initialized - else: - if dist.is_available(): - initialized = dist.is_initialized() - else: - initialized = False - if initialized: - rank = dist.get_rank() - world_size = dist.get_world_size() - else: - rank = 0 - world_size = 1 - return rank, world_size - - -def master_only(func): - - @functools.wraps(func) - def wrapper(*args, **kwargs): - rank, _ = get_dist_info() - if rank == 0: - return func(*args, **kwargs) - - return wrapper - - -def allreduce_params(params, coalesce=True, bucket_size_mb=-1): - """Allreduce parameters. - Args: - params (list[torch.Parameters]): List of parameters or buffers of a - model. - coalesce (bool, optional): Whether allreduce parameters as a whole. - Defaults to True. - bucket_size_mb (int, optional): Size of bucket, the unit is MB. - Defaults to -1. - """ - _, world_size = get_dist_info() - if world_size == 1: - return - params = [param.data for param in params] - if coalesce: - _allreduce_coalesced(params, world_size, bucket_size_mb) - else: - for tensor in params: - dist.all_reduce(tensor.div_(world_size)) - - -def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): - """Allreduce gradients. - Args: - params (list[torch.Parameters]): List of parameters of a model - coalesce (bool, optional): Whether allreduce parameters as a whole. - Defaults to True. - bucket_size_mb (int, optional): Size of bucket, the unit is MB. - Defaults to -1. - """ - grads = [ - param.grad.data for param in params - if param.requires_grad and param.grad is not None - ] - _, world_size = get_dist_info() - if world_size == 1: - return - if coalesce: - _allreduce_coalesced(grads, world_size, bucket_size_mb) - else: - for tensor in grads: - dist.all_reduce(tensor.div_(world_size)) - - -def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): - if bucket_size_mb > 0: - bucket_size_bytes = bucket_size_mb * 1024 * 1024 - buckets = _take_tensors(tensors, bucket_size_bytes) - else: - buckets = OrderedDict() - for tensor in tensors: - tp = tensor.type() - if tp not in buckets: - buckets[tp] = [] - buckets[tp].append(tensor) - buckets = buckets.values() - - for bucket in buckets: - flat_tensors = _flatten_dense_tensors(bucket) - dist.all_reduce(flat_tensors) - flat_tensors.div_(world_size) - for tensor, synced in zip( - bucket, _unflatten_dense_tensors(flat_tensors, bucket)): +# Copyright (c) Open-MMLab. All rights reserved. +# +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import os +import subprocess +from collections import OrderedDict + +import torch +import torch.multiprocessing as mp +from torch import distributed as dist +from torch._utils import (_flatten_dense_tensors, _take_tensors, + _unflatten_dense_tensors) + +from mmcv.utils import TORCH_VERSION + + +def init_dist(launcher, backend='nccl', **kwargs): + if mp.get_start_method(allow_none=True) is None: + mp.set_start_method('spawn') + if launcher == 'pytorch': + _init_dist_pytorch(backend, **kwargs) + elif launcher == 'mpi': + _init_dist_mpi(backend, **kwargs) + elif launcher == 'slurm': + _init_dist_slurm(backend, **kwargs) + else: + raise ValueError(f'Invalid launcher type: {launcher}') + + +def _init_dist_pytorch(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['RANK']) + offset = 0 if os.getenv('NPUID', None) is None else int(os.environ['NPUID']) + num_gpus = torch.npu.device_count() + torch.npu.set_device((rank + offset) % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_mpi(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['OMPI_COMM_WORLD_RANK']) + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_slurm(backend, port=None): + """Initialize slurm distributed training environment. + If argument ``port`` is not specified, then the master port will be system + environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system + environment variable, then a default port ``29500`` will be used. + Args: + backend (str): Backend of torch.distributed. + port (int, optional): Master port. Defaults to None. + """ + proc_id = int(os.environ['SLURM_PROCID']) + ntasks = int(os.environ['SLURM_NTASKS']) + node_list = os.environ['SLURM_NODELIST'] + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(proc_id % num_gpus) + addr = subprocess.getoutput( + f'scontrol show hostname {node_list} | head -n1') + # specify master port + if port is not None: + os.environ['MASTER_PORT'] = str(port) + elif 'MASTER_PORT' in os.environ: + pass # use MASTER_PORT in the environment variable + else: + # 29500 is torch.distributed default port + os.environ['MASTER_PORT'] = '29500' + # use MASTER_ADDR in the environment variable if it already exists + if 'MASTER_ADDR' not in os.environ: + os.environ['MASTER_ADDR'] = addr + os.environ['WORLD_SIZE'] = str(ntasks) + os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) + os.environ['RANK'] = str(proc_id) + dist.init_process_group(backend=backend) + + +def get_dist_info(): + if TORCH_VERSION < '1.0': + initialized = dist._initialized + else: + if dist.is_available(): + initialized = dist.is_initialized() + else: + initialized = False + if initialized: + rank = dist.get_rank() + world_size = dist.get_world_size() + else: + rank = 0 + world_size = 1 + return rank, world_size + + +def master_only(func): + + @functools.wraps(func) + def wrapper(*args, **kwargs): + rank, _ = get_dist_info() + if rank == 0: + return func(*args, **kwargs) + + return wrapper + + +def allreduce_params(params, coalesce=True, bucket_size_mb=-1): + """Allreduce parameters. + Args: + params (list[torch.Parameters]): List of parameters or buffers of a + model. + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + _, world_size = get_dist_info() + if world_size == 1: + return + params = [param.data for param in params] + if coalesce: + _allreduce_coalesced(params, world_size, bucket_size_mb) + else: + for tensor in params: + dist.all_reduce(tensor.div_(world_size)) + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + """Allreduce gradients. + Args: + params (list[torch.Parameters]): List of parameters of a model + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + grads = [ + param.grad.data for param in params + if param.requires_grad and param.grad is not None + ] + _, world_size = get_dist_info() + if world_size == 1: + return + if coalesce: + _allreduce_coalesced(grads, world_size, bucket_size_mb) + else: + for tensor in grads: + dist.all_reduce(tensor.div_(world_size)) + + +def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): + if bucket_size_mb > 0: + bucket_size_bytes = bucket_size_mb * 1024 * 1024 + buckets = _take_tensors(tensors, bucket_size_bytes) + else: + buckets = OrderedDict() + for tensor in tensors: + tp = tensor.type() + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(tensor) + buckets = buckets.values() + + for bucket in buckets: + flat_tensors = _flatten_dense_tensors(bucket) + dist.all_reduce(flat_tensors) + flat_tensors.div_(world_size) + for tensor, synced in zip( + bucket, _unflatten_dense_tensors(flat_tensors, bucket)): tensor.copy_(synced) \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/C3D/additional_need/mmcv/distributed.py b/PyTorch/contrib/cv/video/C3D/additional_need/mmcv/distributed.py index fb2bae922930929dc6fa46ad612463cc335dc208..4c89d09fc490488789ce565a7bad43ce7ded1568 100644 --- a/PyTorch/contrib/cv/video/C3D/additional_need/mmcv/distributed.py +++ b/PyTorch/contrib/cv/video/C3D/additional_need/mmcv/distributed.py @@ -1,119 +1,119 @@ -# Copyright (c) Open-MMLab. All rights reserved. -# -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from torch.nn.parallel.distributed import (DistributedDataParallel, - _find_tensors) - -from mmcv import print_log -from mmcv.utils import TORCH_VERSION -from .scatter_gather import scatter_kwargs - - -class MMDistributedDataParallel(DistributedDataParallel): - """The DDP module that supports DataContainer. - - MMDDP has two main differences with PyTorch DDP: - - - It supports a custom type :class:`DataContainer` which allows more - flexible control of input data. - - It implement two APIs ``train_step()`` and ``val_step()``. - """ - - def scatter(self, inputs, kwargs, device_ids): - return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) - - def train_step(self, *inputs, **kwargs): - """train_step() API for module wrapped by DistributedDataParallel. - - This method is basically the same as - ``DistributedDataParallel.forward()``, while replacing - ``self.module.forward()`` with ``self.module.train_step()``. - It is compatible with PyTorch 1.1 - 1.5. - """ - - # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the - # end of backward to the beginning of forward. - if (TORCH_VERSION >= '1.7' and 'parrots' - not in TORCH_VERSION) and self.reducer._rebuild_buckets(): - print_log( - 'Reducer buckets have been rebuilt in this iteration.', - logger='mmcv') - - if getattr(self, 'require_forward_param_sync', True): - self._sync_params() - if self.device_ids and False: - inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) - if len(self.device_ids) == 1: - output = self.module.train_step(*inputs[0], **kwargs[0]) - else: - outputs = self.parallel_apply( - self._module_copies[:len(inputs)], inputs, kwargs) - output = self.gather(outputs, self.output_device) - else: - inputs, kwargs = self.scatter(inputs, kwargs, [-1]) - output = self.module.train_step(*inputs[0], **kwargs[0]) - - if torch.is_grad_enabled() and getattr( - self, 'require_backward_grad_sync', True): - if self.find_unused_parameters: - self.reducer.prepare_for_backward(list(_find_tensors(output))) - else: - self.reducer.prepare_for_backward([]) - else: - if TORCH_VERSION > '1.2': - self.require_forward_param_sync = False - return output - - def val_step(self, *inputs, **kwargs): - """val_step() API for module wrapped by DistributedDataParallel. - - This method is basically the same as - ``DistributedDataParallel.forward()``, while replacing - ``self.module.forward()`` with ``self.module.val_step()``. - It is compatible with PyTorch 1.1 - 1.5. - """ - # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the - # end of backward to the beginning of forward. - if (TORCH_VERSION >= '1.7' and 'parrots' - not in TORCH_VERSION) and self.reducer._rebuild_buckets(): - print_log( - 'Reducer buckets have been rebuilt in this iteration.', - logger='mmcv') - - if getattr(self, 'require_forward_param_sync', True): - self._sync_params() - if self.device_ids: - inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) - if len(self.device_ids) == 1: - output = self.module.val_step(*inputs[0], **kwargs[0]) - else: - outputs = self.parallel_apply( - self._module_copies[:len(inputs)], inputs, kwargs) - output = self.gather(outputs, self.output_device) - else: - output = self.module.val_step(*inputs, **kwargs) - - if torch.is_grad_enabled() and getattr( - self, 'require_backward_grad_sync', True): - if self.find_unused_parameters: - self.reducer.prepare_for_backward(list(_find_tensors(output))) - else: - self.reducer.prepare_for_backward([]) - else: - if TORCH_VERSION > '1.2': - self.require_forward_param_sync = False - return output +# Copyright (c) Open-MMLab. All rights reserved. +# +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch.nn.parallel.distributed import (DistributedDataParallel, + _find_tensors) + +from mmcv import print_log +from mmcv.utils import TORCH_VERSION +from .scatter_gather import scatter_kwargs + + +class MMDistributedDataParallel(DistributedDataParallel): + """The DDP module that supports DataContainer. + + MMDDP has two main differences with PyTorch DDP: + + - It supports a custom type :class:`DataContainer` which allows more + flexible control of input data. + - It implement two APIs ``train_step()`` and ``val_step()``. + """ + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def train_step(self, *inputs, **kwargs): + """train_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.train_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + + # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the + # end of backward to the beginning of forward. + if (TORCH_VERSION >= '1.7' and 'parrots' + not in TORCH_VERSION) and self.reducer._rebuild_buckets(): + print_log( + 'Reducer buckets have been rebuilt in this iteration.', + logger='mmcv') + + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids and False: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.train_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply( + self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + output = self.module.train_step(*inputs[0], **kwargs[0]) + + if torch.is_grad_enabled() and getattr( + self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if TORCH_VERSION > '1.2': + self.require_forward_param_sync = False + return output + + def val_step(self, *inputs, **kwargs): + """val_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.val_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the + # end of backward to the beginning of forward. + if (TORCH_VERSION >= '1.7' and 'parrots' + not in TORCH_VERSION) and self.reducer._rebuild_buckets(): + print_log( + 'Reducer buckets have been rebuilt in this iteration.', + logger='mmcv') + + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.val_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply( + self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + output = self.module.val_step(*inputs, **kwargs) + + if torch.is_grad_enabled() and getattr( + self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if TORCH_VERSION > '1.2': + self.require_forward_param_sync = False + return output diff --git a/PyTorch/contrib/cv/video/C3D/additional_need/mmcv/optimizer.py b/PyTorch/contrib/cv/video/C3D/additional_need/mmcv/optimizer.py index 59eff597b8c798f9189b569b970778be1335dfd5..4219fcf100e49d5f49f9c3fa05959cfbb53601fa 100644 --- a/PyTorch/contrib/cv/video/C3D/additional_need/mmcv/optimizer.py +++ b/PyTorch/contrib/cv/video/C3D/additional_need/mmcv/optimizer.py @@ -1,182 +1,182 @@ -# Copyright (c) Open-MMLab. All rights reserved. -# -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import copy -from collections import defaultdict -from itertools import chain - -from torch.nn.utils import clip_grad - -from ..dist_utils import allreduce_grads -from ..fp16_utils import LossScaler, wrap_fp16_model -from .hook import HOOKS, Hook -from apex import amp +# Copyright (c) Open-MMLab. All rights reserved. +# +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +from collections import defaultdict +from itertools import chain + +from torch.nn.utils import clip_grad + +from ..dist_utils import allreduce_grads +from ..fp16_utils import LossScaler, wrap_fp16_model +from .hook import HOOKS, Hook +from apex import amp import time - -@HOOKS.register_module() -class OptimizerHook(Hook): - - def __init__(self, grad_clip=None): - self.grad_clip = grad_clip - - def clip_grads(self, params): - params = list( - filter(lambda p: p.requires_grad and p.grad is not None, params)) - #todo add a line - if len(params) > 0: - return clip_grad.clip_grad_norm_(params, **self.grad_clip) - - def after_train_iter(self, runner): - runner.optimizer.zero_grad() - with amp.scale_loss(runner.outputs['loss'], runner.optimizer ) as scaled_loss: - scaled_loss.backward() - if self.grad_clip is not None: - grad_norm = self.clip_grads(runner.model.npu().parameters()) - if grad_norm is not None: - # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, - runner.outputs['num_samples']) + +@HOOKS.register_module() +class OptimizerHook(Hook): + + def __init__(self, grad_clip=None): + self.grad_clip = grad_clip + + def clip_grads(self, params): + params = list( + filter(lambda p: p.requires_grad and p.grad is not None, params)) + #todo add a line + if len(params) > 0: + return clip_grad.clip_grad_norm_(params, **self.grad_clip) + + def after_train_iter(self, runner): + runner.optimizer.zero_grad() + with amp.scale_loss(runner.outputs['loss'], runner.optimizer ) as scaled_loss: + scaled_loss.backward() + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.npu().parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) runner.optimizer.step() runner.batch_time.update(time.time()-runner.end) - runner.end = time.time() - - - -@HOOKS.register_module() -class Fp16OptimizerHook(OptimizerHook): - """FP16 optimizer hook. - - The steps of fp16 optimizer is as follows. - 1. Scale the loss value. - 2. BP in the fp16 model. - 2. Copy gradients from fp16 model to fp32 weights. - 3. Update fp32 weights. - 4. Copy updated parameters from fp32 weights to fp16 model. - - Refer to https://arxiv.org/abs/1710.03740 for more details. - - Args: - loss_scale (float | str | dict): Scale factor multiplied with loss. - If loss_scale is a float, static loss scaling will be used with - the specified scale. If loss_scale is a string, it must be - 'dynamic', then dynamic loss scaling will be used. - It can also be a dict containing arguments of LossScaler. - Defaults to 512. - """ - - def __init__(self, - grad_clip=None, - coalesce=True, - bucket_size_mb=-1, - loss_scale=512., - distributed=True): - self.grad_clip = grad_clip - self.coalesce = coalesce - self.bucket_size_mb = bucket_size_mb - self.distributed = distributed - if loss_scale == 'dynamic': - self.loss_scaler = LossScaler(mode='dynamic') - elif isinstance(loss_scale, float): - self.loss_scaler = LossScaler(init_scale=loss_scale, mode='static') - elif isinstance(loss_scale, dict): - self.loss_scaler = LossScaler(**loss_scale) - else: - raise ValueError('loss_scale must be of type float, dict, or ' - f'"dynamic", got {loss_scale}') - - def before_run(self, runner): - """Preparing steps before Mixed Precision Training. - - 1. Make a master copy of fp32 weights for optimization. - 2. Convert the main model from fp32 to fp16. - """ - # keep a copy of fp32 weights - old_groups = runner.optimizer.param_groups - runner.optimizer.param_groups = copy.deepcopy( - runner.optimizer.param_groups) - state = defaultdict(dict) - p_map = { - old_p: p - for old_p, p in zip( - chain(*(g['params'] for g in old_groups)), - chain(*(g['params'] for g in runner.optimizer.param_groups))) - } - for k, v in runner.optimizer.state.items(): - state[p_map[k]] = v - runner.optimizer.state = state - # convert model to fp16 - wrap_fp16_model(runner.model) - - def copy_grads_to_fp32(self, fp16_net, fp32_weights): - """Copy gradients from fp16 model to fp32 weight copy.""" - for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): - if fp16_param.grad is not None: - if fp32_param.grad is None: - fp32_param.grad = fp32_param.data.new(fp32_param.size()) - fp32_param.grad.copy_(fp16_param.grad) - - def copy_params_to_fp16(self, fp16_net, fp32_weights): - """Copy updated params from fp32 weight copy to fp16 model.""" - for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): - fp16_param.data.copy_(fp32_param.data) - - def after_train_iter(self, runner): - """Backward optimization steps for Mixed Precision Training. For - dynamic loss scaling, please refer `loss_scalar.py` - - 1. Scale the loss by a scale factor. - 2. Backward the loss to obtain the gradients (fp16). - 3. Copy gradients from the model to the fp32 weight copy. - 4. Scale the gradients back and update the fp32 weight copy. - 5. Copy back the params from fp32 weight copy to the fp16 model. - """ - # clear grads of last iteration - runner.model.zero_grad() - runner.optimizer.zero_grad() - # scale the loss value - scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale - scaled_loss.backward() - # copy fp16 grads in the model to fp32 params in the optimizer - - fp32_weights = [] - for param_group in runner.optimizer.param_groups: - fp32_weights += param_group['params'] - self.copy_grads_to_fp32(runner.model, fp32_weights) - # allreduce grads - if self.distributed: - allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) - - has_overflow = self.loss_scaler.has_overflow(fp32_weights) - # if has overflow, skip this iteration - if not has_overflow: - # scale the gradients back - for param in fp32_weights: - if param.grad is not None: - param.grad.div_(self.loss_scaler.loss_scale) - if self.grad_clip is not None: - grad_norm = self.clip_grads(fp32_weights) - if grad_norm is not None: - # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, - runner.outputs['num_samples']) - # update fp32 params - runner.optimizer.step() - # copy fp32 params to the fp16 model - self.copy_params_to_fp16(runner.model, fp32_weights) - self.loss_scaler.update_scale(has_overflow) - if has_overflow: - runner.logger.warning('Check overflow, downscale loss scale ' - f'to {self.loss_scaler.cur_scale}') + runner.end = time.time() + + + +@HOOKS.register_module() +class Fp16OptimizerHook(OptimizerHook): + """FP16 optimizer hook. + + The steps of fp16 optimizer is as follows. + 1. Scale the loss value. + 2. BP in the fp16 model. + 2. Copy gradients from fp16 model to fp32 weights. + 3. Update fp32 weights. + 4. Copy updated parameters from fp32 weights to fp16 model. + + Refer to https://arxiv.org/abs/1710.03740 for more details. + + Args: + loss_scale (float | str | dict): Scale factor multiplied with loss. + If loss_scale is a float, static loss scaling will be used with + the specified scale. If loss_scale is a string, it must be + 'dynamic', then dynamic loss scaling will be used. + It can also be a dict containing arguments of LossScaler. + Defaults to 512. + """ + + def __init__(self, + grad_clip=None, + coalesce=True, + bucket_size_mb=-1, + loss_scale=512., + distributed=True): + self.grad_clip = grad_clip + self.coalesce = coalesce + self.bucket_size_mb = bucket_size_mb + self.distributed = distributed + if loss_scale == 'dynamic': + self.loss_scaler = LossScaler(mode='dynamic') + elif isinstance(loss_scale, float): + self.loss_scaler = LossScaler(init_scale=loss_scale, mode='static') + elif isinstance(loss_scale, dict): + self.loss_scaler = LossScaler(**loss_scale) + else: + raise ValueError('loss_scale must be of type float, dict, or ' + f'"dynamic", got {loss_scale}') + + def before_run(self, runner): + """Preparing steps before Mixed Precision Training. + + 1. Make a master copy of fp32 weights for optimization. + 2. Convert the main model from fp32 to fp16. + """ + # keep a copy of fp32 weights + old_groups = runner.optimizer.param_groups + runner.optimizer.param_groups = copy.deepcopy( + runner.optimizer.param_groups) + state = defaultdict(dict) + p_map = { + old_p: p + for old_p, p in zip( + chain(*(g['params'] for g in old_groups)), + chain(*(g['params'] for g in runner.optimizer.param_groups))) + } + for k, v in runner.optimizer.state.items(): + state[p_map[k]] = v + runner.optimizer.state = state + # convert model to fp16 + wrap_fp16_model(runner.model) + + def copy_grads_to_fp32(self, fp16_net, fp32_weights): + """Copy gradients from fp16 model to fp32 weight copy.""" + for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): + if fp16_param.grad is not None: + if fp32_param.grad is None: + fp32_param.grad = fp32_param.data.new(fp32_param.size()) + fp32_param.grad.copy_(fp16_param.grad) + + def copy_params_to_fp16(self, fp16_net, fp32_weights): + """Copy updated params from fp32 weight copy to fp16 model.""" + for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): + fp16_param.data.copy_(fp32_param.data) + + def after_train_iter(self, runner): + """Backward optimization steps for Mixed Precision Training. For + dynamic loss scaling, please refer `loss_scalar.py` + + 1. Scale the loss by a scale factor. + 2. Backward the loss to obtain the gradients (fp16). + 3. Copy gradients from the model to the fp32 weight copy. + 4. Scale the gradients back and update the fp32 weight copy. + 5. Copy back the params from fp32 weight copy to the fp16 model. + """ + # clear grads of last iteration + runner.model.zero_grad() + runner.optimizer.zero_grad() + # scale the loss value + scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale + scaled_loss.backward() + # copy fp16 grads in the model to fp32 params in the optimizer + + fp32_weights = [] + for param_group in runner.optimizer.param_groups: + fp32_weights += param_group['params'] + self.copy_grads_to_fp32(runner.model, fp32_weights) + # allreduce grads + if self.distributed: + allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) + + has_overflow = self.loss_scaler.has_overflow(fp32_weights) + # if has overflow, skip this iteration + if not has_overflow: + # scale the gradients back + for param in fp32_weights: + if param.grad is not None: + param.grad.div_(self.loss_scaler.loss_scale) + if self.grad_clip is not None: + grad_norm = self.clip_grads(fp32_weights) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + # update fp32 params + runner.optimizer.step() + # copy fp32 params to the fp16 model + self.copy_params_to_fp16(runner.model, fp32_weights) + self.loss_scaler.update_scale(has_overflow) + if has_overflow: + runner.logger.warning('Check overflow, downscale loss scale ' + f'to {self.loss_scaler.cur_scale}') diff --git a/PyTorch/contrib/cv/video/C3D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_1p.py b/PyTorch/contrib/cv/video/C3D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_1p.py index a2efdbe74322f30057214c44af9804bd7ae93c19..f5475765077c996f0a54187b24a09986a57851d7 100644 --- a/PyTorch/contrib/cv/video/C3D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_1p.py +++ b/PyTorch/contrib/cv/video/C3D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_1p.py @@ -1,113 +1,113 @@ -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -_base_ = [ - '../../_base_/models/r2plus1d_r34.py', - '../../_base_/default_runtime.py' -] - -# dataset settings -dataset_type = 'RawframeDataset' -data_root = 'data/ucf101/rawframes/' -data_root_val = 'data/ucf101/rawframes/' -split = 1 # official train/test splits. valid numbers: 1, 2, 3 -ann_file_train = f'data/ucf101/ucf101_train_split_{split}_rawframes.txt' -ann_file_val = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' -ann_file_test = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) -train_pipeline = [ - dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='RandomResizedCrop'), - dict(type='Resize', scale=(224, 224), keep_ratio=False), - dict(type='Flip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs', 'label']) -] -val_pipeline = [ - dict( - type='SampleFrames', - clip_len=8, - frame_interval=8, - num_clips=1, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -test_pipeline = [ - dict( - type='SampleFrames', - clip_len=8, - frame_interval=8, - num_clips=10, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='ThreeCrop', crop_size=256), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -data = dict( - videos_per_gpu=16, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=ann_file_train, - data_prefix=data_root, - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=val_pipeline), - test=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=test_pipeline)) -# optimizer -optimizer = dict( - type='SGD', lr=0.0025, momentum=0.9, - weight_decay=0.0001) # this lr is used for 8 gpus -optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) -# learning policy -# lr_config = dict(policy='step', steps=[1,2,3], lrs=[1e-3,1e-4,1e-5]) -lr_config = dict(policy='CosineAnnealing', min_lr=0) -total_epochs = 60 -# total_epochs = 90 - -# runtime settings -checkpoint_config = dict(interval=5) -evaluation = dict( - interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) -#work_dir = './work_dirs/r2plus1d_r34_8x8x1_180e_ucf101_rgb3/' -find_unused_parameters = True -load_from = 'https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb_20200729-aa94765e.pth' -resume_from = None - +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_base_ = [ + '../../_base_/models/r2plus1d_r34.py', + '../../_base_/default_runtime.py' +] + +# dataset settings +dataset_type = 'RawframeDataset' +data_root = 'data/ucf101/rawframes/' +data_root_val = 'data/ucf101/rawframes/' +split = 1 # official train/test splits. valid numbers: 1, 2, 3 +ann_file_train = f'data/ucf101/ucf101_train_split_{split}_rawframes.txt' +ann_file_val = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' +ann_file_test = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='RandomResizedCrop'), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=8, + num_clips=1, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=8, + num_clips=10, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=16, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', lr=0.0025, momentum=0.9, + weight_decay=0.0001) # this lr is used for 8 gpus +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +# lr_config = dict(policy='step', steps=[1,2,3], lrs=[1e-3,1e-4,1e-5]) +lr_config = dict(policy='CosineAnnealing', min_lr=0) +total_epochs = 60 +# total_epochs = 90 + +# runtime settings +checkpoint_config = dict(interval=5) +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) +#work_dir = './work_dirs/r2plus1d_r34_8x8x1_180e_ucf101_rgb3/' +find_unused_parameters = True +load_from = 'https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb_20200729-aa94765e.pth' +resume_from = None + work_dir = './work_dirs/r2plus1d-1p-npu/' \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/C3D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_8p.py b/PyTorch/contrib/cv/video/C3D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_8p.py index 7afb668be6e510b5e9fcf7e0165cb2b036068a97..7f68a5166d8d438584c147721985f6d136581b00 100644 --- a/PyTorch/contrib/cv/video/C3D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_8p.py +++ b/PyTorch/contrib/cv/video/C3D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_8p.py @@ -1,113 +1,113 @@ -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -_base_ = [ - '../../_base_/models/r2plus1d_r34.py', - '../../_base_/default_runtime.py' -] - -# dataset settings -dataset_type = 'RawframeDataset' -data_root = '/home/linus/rawframes/' -data_root_val = '/home/linus/rawframes/' -split = 1 # official train/test splits. valid numbers: 1, 2, 3 -ann_file_train = f'data/ucf101/ucf101_train_split_{split}_rawframes.txt' -ann_file_val = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' -ann_file_test = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) -train_pipeline = [ - dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='RandomResizedCrop'), - dict(type='Resize', scale=(224, 224), keep_ratio=False), - dict(type='Flip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs', 'label']) -] -val_pipeline = [ - dict( - type='SampleFrames', - clip_len=8, - frame_interval=8, - num_clips=1, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -test_pipeline = [ - dict( - type='SampleFrames', - clip_len=8, - frame_interval=8, - num_clips=10, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='ThreeCrop', crop_size=256), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -data = dict( - videos_per_gpu=42, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=ann_file_train, - data_prefix=data_root, - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=val_pipeline), - test=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=test_pipeline)) -# optimizer -optimizer = dict( - type='SGD', lr=0.008, momentum=0.9, - weight_decay=0.0001) # this lr is used for 8 gpus -optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) -# learning policy -# lr_config = dict(policy='step', teps=[1,2,3], lrs=[1e-3,1e-4,1e-5]) -lr_config = dict(policy='CosineAnnealing', min_lr=0) -total_epochs = 70 -# total_epochs = 90 - -# runtime settings -checkpoint_config = dict(interval=5) -evaluation = dict( - interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) -#work_dir = './work_dirs/r2plus1d_r34_8x8x1_180e_ucf101_rgb3/' -find_unused_parameters = True -load_from = 'https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb_20200729-aa94765e.pth' -resume_from = None - +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_base_ = [ + '../../_base_/models/r2plus1d_r34.py', + '../../_base_/default_runtime.py' +] + +# dataset settings +dataset_type = 'RawframeDataset' +data_root = '/home/linus/rawframes/' +data_root_val = '/home/linus/rawframes/' +split = 1 # official train/test splits. valid numbers: 1, 2, 3 +ann_file_train = f'data/ucf101/ucf101_train_split_{split}_rawframes.txt' +ann_file_val = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' +ann_file_test = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='RandomResizedCrop'), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=8, + num_clips=1, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=8, + num_clips=10, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=42, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', lr=0.008, momentum=0.9, + weight_decay=0.0001) # this lr is used for 8 gpus +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +# lr_config = dict(policy='step', teps=[1,2,3], lrs=[1e-3,1e-4,1e-5]) +lr_config = dict(policy='CosineAnnealing', min_lr=0) +total_epochs = 70 +# total_epochs = 90 + +# runtime settings +checkpoint_config = dict(interval=5) +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) +#work_dir = './work_dirs/r2plus1d_r34_8x8x1_180e_ucf101_rgb3/' +find_unused_parameters = True +load_from = 'https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb_20200729-aa94765e.pth' +resume_from = None + work_dir = './work_dirs/r2plus1d-8p-npu/' diff --git a/PyTorch/contrib/cv/video/C3D/tools/dist_train.sh b/PyTorch/contrib/cv/video/C3D/tools/dist_train.sh old mode 100755 new mode 100644 diff --git a/PyTorch/contrib/cv/video/GloRe/README.md b/PyTorch/contrib/cv/video/GloRe/README.md index f26ecfaec3fe7082f5c32c1a3386bf4e4b40e0b3..3b6e55924a976142ff69d48b4cef8ddd65031949 100644 --- a/PyTorch/contrib/cv/video/GloRe/README.md +++ b/PyTorch/contrib/cv/video/GloRe/README.md @@ -1,50 +1,50 @@ -# GloRe 训练 -# Graph-Based Global Reasoning Networks -This implements training of GloRe on the UCF-101 dataset. -- Reference implementation: -``` -url=https: https://github.com/facebookresearch/GloRe -``` - -## GloRe Detail # - -As of the current date, Ascend-Pytorch is still inefficient for contiguous operations. -Therefore, GloRe is re-implemented using semantics such as custom OP. - - -## Requirements # - -- Install PyTorch ([pytorch.org](http://pytorch.org)) -- `pip install -r requirements.txt` -- The UCF-101 Dataset can be downloaded from the links below.Move the datasets to directory ./dataset/UCF101/raw/data . - - Train Set : [Download UCF-101](https://www.crcv.ucf.edu/data/UCF101/UCF101.rar) - - Test Set : [Download UCF-101](https://www.crcv.ucf.edu/data/UCF101/UCF101.rar) -- The pretrained model can be downloaded from the links below. Move the datasets to directory ./network/pretrain . - - Pretrained model : [Download pth](https://dl.fbaipublicfiles.com/glore/kinetics/resnet50-lite_3d_8x8_w-glore_2-3_ep-0000.pth). Create directory ./network/pretrained/ and place pretrained model under directory ./network/pretrained/ - -## Training # -To train a model, run `train_kinetics.py`: - -```bash -# 1p train perf -bash test/train_performance_1p.sh - -# 8p train perf -bash test/train_performance_8p.sh - -# 8p train full -bash test/train_full_8p.sh - -# finetuning -bash test/train_finetune_1p.sh -``` - -## GloRe training result # - -| ACC@1 | FPS | Npu_nums | Epochs | AMP_Type | -| :------: | :------: | :------: | :------: | :------: | -| - | 11.647 | 1 | 90 | O2 | -| 92.39 | 141.31 | 8 | 90 | O2 | - - - +# GloRe 训练 +# Graph-Based Global Reasoning Networks +This implements training of GloRe on the UCF-101 dataset. +- Reference implementation: +``` +url=https: https://github.com/facebookresearch/GloRe +``` + +## GloRe Detail # + +As of the current date, Ascend-Pytorch is still inefficient for contiguous operations. +Therefore, GloRe is re-implemented using semantics such as custom OP. + + +## Requirements # + +- Install PyTorch ([pytorch.org](http://pytorch.org)) +- `pip install -r requirements.txt` +- The UCF-101 Dataset can be downloaded from the links below.Move the datasets to directory ./dataset/UCF101/raw/data . + - Train Set : [Download UCF-101](https://www.crcv.ucf.edu/data/UCF101/UCF101.rar) + - Test Set : [Download UCF-101](https://www.crcv.ucf.edu/data/UCF101/UCF101.rar) +- The pretrained model can be downloaded from the links below. Move the datasets to directory ./network/pretrain . + - Pretrained model : [Download pth](https://dl.fbaipublicfiles.com/glore/kinetics/resnet50-lite_3d_8x8_w-glore_2-3_ep-0000.pth). Create directory ./network/pretrained/ and place pretrained model under directory ./network/pretrained/ + +## Training # +To train a model, run `train_kinetics.py`: + +```bash +# 1p train perf +bash test/train_performance_1p.sh + +# 8p train perf +bash test/train_performance_8p.sh + +# 8p train full +bash test/train_full_8p.sh + +# finetuning +bash test/train_finetune_1p.sh +``` + +## GloRe training result # + +| ACC@1 | FPS | Npu_nums | Epochs | AMP_Type | +| :------: | :------: | :------: | :------: | :------: | +| - | 11.647 | 1 | 90 | O2 | +| 92.39 | 141.31 | 8 | 90 | O2 | + + + diff --git a/PyTorch/contrib/cv/video/GloRe/modelzoo_level.txt b/PyTorch/contrib/cv/video/GloRe/modelzoo_level.txt index a17c8f95fa388fbc6d253e2cd7cfd0b73b734073..a829ab59b97a1022dd6fc33b59b7ae0d55009432 100644 --- a/PyTorch/contrib/cv/video/GloRe/modelzoo_level.txt +++ b/PyTorch/contrib/cv/video/GloRe/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/NonLocal/config/default_runtime.py b/PyTorch/contrib/cv/video/NonLocal/config/default_runtime.py index 41329bf37107afc6238c8360bc11dd80efd93324..7652836b33c6b6c3927a2e0bea18322d4cc13788 100644 --- a/PyTorch/contrib/cv/video/NonLocal/config/default_runtime.py +++ b/PyTorch/contrib/cv/video/NonLocal/config/default_runtime.py @@ -1,27 +1,27 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -checkpoint_config = dict(interval=5) -log_config = dict( - interval=1, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook'), - ]) -# runtime settings -dist_params = dict(backend='nccl') -log_level = 'INFO' -load_from = None -resume_from = None -workflow = [('train', 1)] +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +checkpoint_config = dict(interval=5) +log_config = dict( + interval=1, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook'), + ]) +# runtime settings +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/PyTorch/contrib/cv/video/NonLocal/config/tsm_r50.py b/PyTorch/contrib/cv/video/NonLocal/config/tsm_r50.py index d7124c2e442bf4ce8cddfa80f30a3a0d080d2f96..6015b73cb2f4abfd05a31ac3e7cc532243f58879 100644 --- a/PyTorch/contrib/cv/video/NonLocal/config/tsm_r50.py +++ b/PyTorch/contrib/cv/video/NonLocal/config/tsm_r50.py @@ -1,35 +1,35 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -# model settings -model = dict( - type='Recognizer2D', - backbone=dict( - type='ResNetTSM', - pretrained='torchvision://resnet50', - depth=50, - norm_eval=False, - shift_div=8), - cls_head=dict( - type='TSMHead', - num_classes=400, - in_channels=2048, - spatial_type='avg', - consensus=dict(type='AvgConsensus', dim=1), - dropout_ratio=0.5, - init_std=0.001, - is_shift=True), - # model training and testing settings - train_cfg=None, - test_cfg=dict(average_clips='prob')) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# model settings +model = dict( + type='Recognizer2D', + backbone=dict( + type='ResNetTSM', + pretrained='torchvision://resnet50', + depth=50, + norm_eval=False, + shift_div=8), + cls_head=dict( + type='TSMHead', + num_classes=400, + in_channels=2048, + spatial_type='avg', + consensus=dict(type='AvgConsensus', dim=1), + dropout_ratio=0.5, + init_std=0.001, + is_shift=True), + # model training and testing settings + train_cfg=None, + test_cfg=dict(average_clips='prob')) diff --git a/PyTorch/contrib/cv/video/NonLocal/mmaction/apis/train.py b/PyTorch/contrib/cv/video/NonLocal/mmaction/apis/train.py index 59f1bc0a8ab759f36a0485076f0585add1bd384e..d50c7f7e6ae9f8487dea82a33bb3351356bbaaf8 100644 --- a/PyTorch/contrib/cv/video/NonLocal/mmaction/apis/train.py +++ b/PyTorch/contrib/cv/video/NonLocal/mmaction/apis/train.py @@ -1,267 +1,267 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import copy as cp -import os.path as osp - -from apex import amp - -import torch -from mmcv.parallel import MMDataParallel, MMDistributedDataParallel -from mmcv.runner import (DistSamplerSeedHook, EpochBasedRunner, OptimizerHook, - build_optimizer, get_dist_info) -from mmcv.runner.hooks import Fp16OptimizerHook - -from ..core import (DistEvalHook, EvalHook, OmniSourceDistSamplerSeedHook, - OmniSourceRunner) -from ..datasets import build_dataloader, build_dataset -from ..utils import PreciseBNHook, get_root_logger -from .test import multi_gpu_test - - -def train_model(model, - dataset, - cfg, - distributed=False, - validate=False, - test=dict(test_best=False, test_last=False), - timestamp=None, - meta=None): - """Train model entry function. - - Args: - model (nn.Module): The model to be trained. - dataset (:obj:`Dataset`): Train dataset. - cfg (dict): The config dict for training. - distributed (bool): Whether to use distributed training. - Default: False. - validate (bool): Whether to do evaluation. Default: False. - test (dict): The testing option, with two keys: test_last & test_best. - The value is True or False, indicating whether to test the - corresponding checkpoint. - Default: dict(test_best=False, test_last=False). - timestamp (str | None): Local time for runner. Default: None. - meta (dict | None): Meta dict to record some important information. - Default: None - """ - logger = get_root_logger(log_level=cfg.log_level) - - # prepare data loaders - dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] - - dataloader_setting = dict(videos_per_gpu=cfg.data.get('videos_per_gpu', 1), - workers_per_gpu=cfg.data.get( - 'workers_per_gpu', 1), - num_gpus=len(cfg.gpu_ids), - dist=distributed, - seed=cfg.seed) - dataloader_setting = dict(dataloader_setting, - **cfg.data.get('train_dataloader', {})) - - if cfg.omnisource: - # The option can override videos_per_gpu - train_ratio = cfg.data.get('train_ratio', [1] * len(dataset)) - omni_videos_per_gpu = cfg.data.get('omni_videos_per_gpu', None) - if omni_videos_per_gpu is None: - dataloader_settings = [dataloader_setting] * len(dataset) - else: - dataloader_settings = [] - for videos_per_gpu in omni_videos_per_gpu: - this_setting = cp.deepcopy(dataloader_setting) - this_setting['videos_per_gpu'] = videos_per_gpu - dataloader_settings.append(this_setting) - data_loaders = [ - build_dataloader(ds, **setting) - for ds, setting in zip(dataset, dataloader_settings) - ] - - else: - data_loaders = [ - build_dataloader(ds, **dataloader_setting) for ds in dataset - ] - - # build runner - optimizer = build_optimizer(model, cfg.optimizer) - - # Allow Amp to perform casts as required by the opt_level - if cfg.AMP: - # model, optimizer = amp.initialize(model.cuda(), - # optimizer, - # opt_level=cfg.OPT_LEVEL, - # loss_scale=cfg.LOSS_SCALE) - model, optimizer = amp.initialize(model.npu(), - optimizer, - opt_level=cfg.OPT_LEVEL, - loss_scale=cfg.LOSS_SCALE, - combine_grad=True) - - # put model on gpus - if distributed: - find_unused_parameters = cfg.get('find_unused_parameters', False) - # Sets the `find_unused_parameters` parameter in - # torch.nn.parallel.DistributedDataParallel - model = MMDistributedDataParallel( - model, - device_ids=[torch.npu.current_device()], - broadcast_buffers=False, - find_unused_parameters=find_unused_parameters) - # model = MMDistributedDataParallel( - # model, - # device_ids=[torch.cuda.current_device()], - # broadcast_buffers=False, - # find_unused_parameters=find_unused_parameters) - - else: - # In 1-p training, we don't use Dataparallel - # model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), - # device_ids=cfg.gpu_ids) - model = model.npu() - - Runner = OmniSourceRunner if cfg.omnisource else EpochBasedRunner - runner = Runner(model, - optimizer=optimizer, - work_dir=cfg.work_dir, - logger=logger, - meta=meta, - distributed=distributed) - # an ugly workaround to make .log and .log.json filenames the same - runner.timestamp = timestamp - - # fp16 setting - fp16_cfg = cfg.get('fp16', None) - if fp16_cfg is not None: - optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, - **fp16_cfg, - distributed=distributed) - elif distributed and 'type' not in cfg.optimizer_config: - optimizer_config = OptimizerHook(**cfg.optimizer_config) - else: - optimizer_config = cfg.optimizer_config - - # register hooks - runner.register_training_hooks(cfg.lr_config, optimizer_config, - cfg.checkpoint_config, cfg.log_config, - cfg.get('momentum_config', None)) - if distributed: - if cfg.omnisource: - runner.register_hook(OmniSourceDistSamplerSeedHook()) - else: - runner.register_hook(DistSamplerSeedHook()) - - # precise bn setting - if cfg.get('precise_bn', False): - precise_bn_dataset = build_dataset(cfg.data.train) - dataloader_setting = dict( - videos_per_gpu=cfg.data.get('videos_per_gpu', 1), - workers_per_gpu=0, # save memory and time - num_gpus=len(cfg.gpu_ids), - dist=distributed, - seed=cfg.seed) - data_loader_precise_bn = build_dataloader(precise_bn_dataset, - **dataloader_setting) - precise_bn_hook = PreciseBNHook(data_loader_precise_bn, - **cfg.get('precise_bn')) - runner.register_hook(precise_bn_hook) - - if validate: - eval_cfg = cfg.get('evaluation', {}) - val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) - dataloader_setting = dict( - videos_per_gpu=cfg.data.get('videos_per_gpu', 1), - workers_per_gpu=cfg.data.get('workers_per_gpu', 1), - # cfg.gpus will be ignored if distributed - num_gpus=len(cfg.gpu_ids), - dist=distributed, - shuffle=False) - dataloader_setting = dict(dataloader_setting, - **cfg.data.get('val_dataloader', {})) - val_dataloader = build_dataloader(val_dataset, **dataloader_setting) - eval_hook = DistEvalHook(val_dataloader, **eval_cfg) if distributed \ - else EvalHook(val_dataloader, **eval_cfg) - runner.register_hook(eval_hook) - - if cfg.resume_from: - runner.resume(cfg.resume_from) - elif cfg.load_from: - runner.load_checkpoint(cfg.load_from) - runner_kwargs = dict() - if cfg.omnisource: - runner_kwargs = dict(train_ratio=train_ratio) - runner.run(data_loaders, cfg.workflow, cfg.total_epochs, **runner_kwargs) - - if test['test_last'] or test['test_best']: - best_ckpt_path = None - if test['test_best']: - if hasattr(eval_hook, 'best_ckpt_path'): - best_ckpt_path = eval_hook.best_ckpt_path - - if best_ckpt_path is None or not osp.exists(best_ckpt_path): - test['test_best'] = False - if best_ckpt_path is None: - runner.logger.info('Warning: test_best set as True, but ' - 'is not applicable ' - '(eval_hook.best_ckpt_path is None)') - else: - runner.logger.info('Warning: test_best set as True, but ' - 'is not applicable (best_ckpt ' - f'{best_ckpt_path} not found)') - if not test['test_last']: - return - - test_dataset = build_dataset(cfg.data.test, dict(test_mode=True)) - gpu_collect = cfg.get('evaluation', {}).get('gpu_collect', False) - tmpdir = cfg.get('evaluation', {}).get('tmpdir', - osp.join(cfg.work_dir, 'tmp')) - dataloader_setting = dict( - videos_per_gpu=cfg.data.get('videos_per_gpu', 1), - workers_per_gpu=cfg.data.get('workers_per_gpu', 1), - num_gpus=len(cfg.gpu_ids), - dist=distributed, - shuffle=False) - dataloader_setting = dict(dataloader_setting, - **cfg.data.get('test_dataloader', {})) - - test_dataloader = build_dataloader(test_dataset, **dataloader_setting) - - names, ckpts = [], [] - - if test['test_last']: - names.append('last') - ckpts.append(None) - if test['test_best']: - names.append('best') - ckpts.append(best_ckpt_path) - - for name, ckpt in zip(names, ckpts): - if ckpt is not None: - runner.load_checkpoint(ckpt) - - outputs = multi_gpu_test(runner.model, test_dataloader, tmpdir, - gpu_collect) - rank, _ = get_dist_info() - if rank == 0: - out = osp.join(cfg.work_dir, f'{name}_pred.pkl') - test_dataset.dump_results(outputs, out) - - eval_cfg = cfg.get('evaluation', {}) - for key in [ - 'interval', 'tmpdir', 'start', 'gpu_collect', - 'save_best', 'rule', 'by_epoch', 'broadcast_bn_buffers' - ]: - eval_cfg.pop(key, None) - - eval_res = test_dataset.evaluate(outputs, **eval_cfg) - runner.logger.info(f'Testing results of the {name} checkpoint') - for metric_name, val in eval_res.items(): - runner.logger.info(f'{metric_name}: {val:.04f}') +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import copy as cp +import os.path as osp + +from apex import amp + +import torch +from mmcv.parallel import MMDataParallel, MMDistributedDataParallel +from mmcv.runner import (DistSamplerSeedHook, EpochBasedRunner, OptimizerHook, + build_optimizer, get_dist_info) +from mmcv.runner.hooks import Fp16OptimizerHook + +from ..core import (DistEvalHook, EvalHook, OmniSourceDistSamplerSeedHook, + OmniSourceRunner) +from ..datasets import build_dataloader, build_dataset +from ..utils import PreciseBNHook, get_root_logger +from .test import multi_gpu_test + + +def train_model(model, + dataset, + cfg, + distributed=False, + validate=False, + test=dict(test_best=False, test_last=False), + timestamp=None, + meta=None): + """Train model entry function. + + Args: + model (nn.Module): The model to be trained. + dataset (:obj:`Dataset`): Train dataset. + cfg (dict): The config dict for training. + distributed (bool): Whether to use distributed training. + Default: False. + validate (bool): Whether to do evaluation. Default: False. + test (dict): The testing option, with two keys: test_last & test_best. + The value is True or False, indicating whether to test the + corresponding checkpoint. + Default: dict(test_best=False, test_last=False). + timestamp (str | None): Local time for runner. Default: None. + meta (dict | None): Meta dict to record some important information. + Default: None + """ + logger = get_root_logger(log_level=cfg.log_level) + + # prepare data loaders + dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] + + dataloader_setting = dict(videos_per_gpu=cfg.data.get('videos_per_gpu', 1), + workers_per_gpu=cfg.data.get( + 'workers_per_gpu', 1), + num_gpus=len(cfg.gpu_ids), + dist=distributed, + seed=cfg.seed) + dataloader_setting = dict(dataloader_setting, + **cfg.data.get('train_dataloader', {})) + + if cfg.omnisource: + # The option can override videos_per_gpu + train_ratio = cfg.data.get('train_ratio', [1] * len(dataset)) + omni_videos_per_gpu = cfg.data.get('omni_videos_per_gpu', None) + if omni_videos_per_gpu is None: + dataloader_settings = [dataloader_setting] * len(dataset) + else: + dataloader_settings = [] + for videos_per_gpu in omni_videos_per_gpu: + this_setting = cp.deepcopy(dataloader_setting) + this_setting['videos_per_gpu'] = videos_per_gpu + dataloader_settings.append(this_setting) + data_loaders = [ + build_dataloader(ds, **setting) + for ds, setting in zip(dataset, dataloader_settings) + ] + + else: + data_loaders = [ + build_dataloader(ds, **dataloader_setting) for ds in dataset + ] + + # build runner + optimizer = build_optimizer(model, cfg.optimizer) + + # Allow Amp to perform casts as required by the opt_level + if cfg.AMP: + # model, optimizer = amp.initialize(model.cuda(), + # optimizer, + # opt_level=cfg.OPT_LEVEL, + # loss_scale=cfg.LOSS_SCALE) + model, optimizer = amp.initialize(model.npu(), + optimizer, + opt_level=cfg.OPT_LEVEL, + loss_scale=cfg.LOSS_SCALE, + combine_grad=True) + + # put model on gpus + if distributed: + find_unused_parameters = cfg.get('find_unused_parameters', False) + # Sets the `find_unused_parameters` parameter in + # torch.nn.parallel.DistributedDataParallel + model = MMDistributedDataParallel( + model, + device_ids=[torch.npu.current_device()], + broadcast_buffers=False, + find_unused_parameters=find_unused_parameters) + # model = MMDistributedDataParallel( + # model, + # device_ids=[torch.cuda.current_device()], + # broadcast_buffers=False, + # find_unused_parameters=find_unused_parameters) + + else: + # In 1-p training, we don't use Dataparallel + # model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), + # device_ids=cfg.gpu_ids) + model = model.npu() + + Runner = OmniSourceRunner if cfg.omnisource else EpochBasedRunner + runner = Runner(model, + optimizer=optimizer, + work_dir=cfg.work_dir, + logger=logger, + meta=meta, + distributed=distributed) + # an ugly workaround to make .log and .log.json filenames the same + runner.timestamp = timestamp + + # fp16 setting + fp16_cfg = cfg.get('fp16', None) + if fp16_cfg is not None: + optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, + **fp16_cfg, + distributed=distributed) + elif distributed and 'type' not in cfg.optimizer_config: + optimizer_config = OptimizerHook(**cfg.optimizer_config) + else: + optimizer_config = cfg.optimizer_config + + # register hooks + runner.register_training_hooks(cfg.lr_config, optimizer_config, + cfg.checkpoint_config, cfg.log_config, + cfg.get('momentum_config', None)) + if distributed: + if cfg.omnisource: + runner.register_hook(OmniSourceDistSamplerSeedHook()) + else: + runner.register_hook(DistSamplerSeedHook()) + + # precise bn setting + if cfg.get('precise_bn', False): + precise_bn_dataset = build_dataset(cfg.data.train) + dataloader_setting = dict( + videos_per_gpu=cfg.data.get('videos_per_gpu', 1), + workers_per_gpu=0, # save memory and time + num_gpus=len(cfg.gpu_ids), + dist=distributed, + seed=cfg.seed) + data_loader_precise_bn = build_dataloader(precise_bn_dataset, + **dataloader_setting) + precise_bn_hook = PreciseBNHook(data_loader_precise_bn, + **cfg.get('precise_bn')) + runner.register_hook(precise_bn_hook) + + if validate: + eval_cfg = cfg.get('evaluation', {}) + val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) + dataloader_setting = dict( + videos_per_gpu=cfg.data.get('videos_per_gpu', 1), + workers_per_gpu=cfg.data.get('workers_per_gpu', 1), + # cfg.gpus will be ignored if distributed + num_gpus=len(cfg.gpu_ids), + dist=distributed, + shuffle=False) + dataloader_setting = dict(dataloader_setting, + **cfg.data.get('val_dataloader', {})) + val_dataloader = build_dataloader(val_dataset, **dataloader_setting) + eval_hook = DistEvalHook(val_dataloader, **eval_cfg) if distributed \ + else EvalHook(val_dataloader, **eval_cfg) + runner.register_hook(eval_hook) + + if cfg.resume_from: + runner.resume(cfg.resume_from) + elif cfg.load_from: + runner.load_checkpoint(cfg.load_from) + runner_kwargs = dict() + if cfg.omnisource: + runner_kwargs = dict(train_ratio=train_ratio) + runner.run(data_loaders, cfg.workflow, cfg.total_epochs, **runner_kwargs) + + if test['test_last'] or test['test_best']: + best_ckpt_path = None + if test['test_best']: + if hasattr(eval_hook, 'best_ckpt_path'): + best_ckpt_path = eval_hook.best_ckpt_path + + if best_ckpt_path is None or not osp.exists(best_ckpt_path): + test['test_best'] = False + if best_ckpt_path is None: + runner.logger.info('Warning: test_best set as True, but ' + 'is not applicable ' + '(eval_hook.best_ckpt_path is None)') + else: + runner.logger.info('Warning: test_best set as True, but ' + 'is not applicable (best_ckpt ' + f'{best_ckpt_path} not found)') + if not test['test_last']: + return + + test_dataset = build_dataset(cfg.data.test, dict(test_mode=True)) + gpu_collect = cfg.get('evaluation', {}).get('gpu_collect', False) + tmpdir = cfg.get('evaluation', {}).get('tmpdir', + osp.join(cfg.work_dir, 'tmp')) + dataloader_setting = dict( + videos_per_gpu=cfg.data.get('videos_per_gpu', 1), + workers_per_gpu=cfg.data.get('workers_per_gpu', 1), + num_gpus=len(cfg.gpu_ids), + dist=distributed, + shuffle=False) + dataloader_setting = dict(dataloader_setting, + **cfg.data.get('test_dataloader', {})) + + test_dataloader = build_dataloader(test_dataset, **dataloader_setting) + + names, ckpts = [], [] + + if test['test_last']: + names.append('last') + ckpts.append(None) + if test['test_best']: + names.append('best') + ckpts.append(best_ckpt_path) + + for name, ckpt in zip(names, ckpts): + if ckpt is not None: + runner.load_checkpoint(ckpt) + + outputs = multi_gpu_test(runner.model, test_dataloader, tmpdir, + gpu_collect) + rank, _ = get_dist_info() + if rank == 0: + out = osp.join(cfg.work_dir, f'{name}_pred.pkl') + test_dataset.dump_results(outputs, out) + + eval_cfg = cfg.get('evaluation', {}) + for key in [ + 'interval', 'tmpdir', 'start', 'gpu_collect', + 'save_best', 'rule', 'by_epoch', 'broadcast_bn_buffers' + ]: + eval_cfg.pop(key, None) + + eval_res = test_dataset.evaluate(outputs, **eval_cfg) + runner.logger.info(f'Testing results of the {name} checkpoint') + for metric_name, val in eval_res.items(): + runner.logger.info(f'{metric_name}: {val:.04f}') diff --git a/PyTorch/contrib/cv/video/NonLocal/mmaction/datasets/pipelines/formating.py b/PyTorch/contrib/cv/video/NonLocal/mmaction/datasets/pipelines/formating.py index a7d0876a040cc435060f9056c6343fc0fd9f7b3c..3811c61addc5060784cf69acc2613b9b33567ea9 100644 --- a/PyTorch/contrib/cv/video/NonLocal/mmaction/datasets/pipelines/formating.py +++ b/PyTorch/contrib/cv/video/NonLocal/mmaction/datasets/pipelines/formating.py @@ -1,378 +1,378 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -from collections.abc import Sequence - -import mmcv -import numpy as np -import torch -from mmcv.parallel import DataContainer as DC - -from ..builder import PIPELINES - - -def to_tensor(data): - """Convert objects of various python types to :obj:`torch.Tensor`. - - Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, - :class:`Sequence`, :class:`int` and :class:`float`. - """ - if isinstance(data, torch.Tensor): - return data - if isinstance(data, np.ndarray): - return torch.from_numpy(data) - if isinstance(data, Sequence) and not mmcv.is_str(data): - return torch.tensor(data) - if isinstance(data, int): - return torch.LongTensor([data]) - if isinstance(data, float): - return torch.FloatTensor([data]) - raise TypeError(f'type {type(data)} cannot be converted to tensor.') - - -@PIPELINES.register_module() -class ToTensor: - """Convert some values in results dict to `torch.Tensor` type in data - loader pipeline. - - Args: - keys (Sequence[str]): Required keys to be converted. - """ - def __init__(self, keys): - self.keys = keys - - def __call__(self, results): - """Performs the ToTensor formating. - - Args: - results (dict): The resulting dict to be modified and passed - to the next transform in pipeline. - """ - for key in self.keys: - results[key] = to_tensor(results[key]) - return results - - def __repr__(self): - return f'{self.__class__.__name__}(keys={self.keys})' - - -@PIPELINES.register_module() -class Rename: - """Rename the key in results. - - Args: - mapping (dict): The keys in results that need to be renamed. The key of - the dict is the original name, while the value is the new name. If - the original name not found in results, do nothing. - Default: dict(). - """ - def __init__(self, mapping): - self.mapping = mapping - - def __call__(self, results): - for key, value in self.mapping.items(): - if key in results: - assert isinstance(key, str) and isinstance(value, str) - assert value not in results, ('the new name already exists in ' - 'results') - results[value] = results[key] - results.pop(key) - return results - - -@PIPELINES.register_module() -class ToDataContainer: - """Convert the data to DataContainer. - - Args: - fields (Sequence[dict]): Required fields to be converted - with keys and attributes. E.g. - fields=(dict(key='gt_bbox', stack=False),). - Note that key can also be a list of keys, if so, every tensor in - the list will be converted to DataContainer. - """ - def __init__(self, fields): - self.fields = fields - - def __call__(self, results): - """Performs the ToDataContainer formating. - - Args: - results (dict): The resulting dict to be modified and passed - to the next transform in pipeline. - """ - for field in self.fields: - _field = field.copy() - key = _field.pop('key') - if isinstance(key, list): - for item in key: - results[item] = DC(results[item], **_field) - else: - results[key] = DC(results[key], **_field) - return results - - def __repr__(self): - return self.__class__.__name__ + f'(fields={self.fields})' - - -@PIPELINES.register_module() -class ImageToTensor: - """Convert image type to `torch.Tensor` type. - - Args: - keys (Sequence[str]): Required keys to be converted. - """ - def __init__(self, keys): - self.keys = keys - - def __call__(self, results): - """Performs the ImageToTensor formating. - - Args: - results (dict): The resulting dict to be modified and passed - to the next transform in pipeline. - """ - for key in self.keys: - results[key] = to_tensor(results[key].transpose(2, 0, 1)) - return results - - def __repr__(self): - return f'{self.__class__.__name__}(keys={self.keys})' - - -@PIPELINES.register_module() -class Transpose: - """Transpose image channels to a given order. - - Args: - keys (Sequence[str]): Required keys to be converted. - order (Sequence[int]): Image channel order. - """ - def __init__(self, keys, order): - self.keys = keys - self.order = order - - def __call__(self, results): - """Performs the Transpose formatting. - - Args: - results (dict): The resulting dict to be modified and passed - to the next transform in pipeline. - """ - for key in self.keys: - results[key] = results[key].transpose(self.order) - return results - - def __repr__(self): - return (f'{self.__class__.__name__}(' - f'keys={self.keys}, order={self.order})') - - -@PIPELINES.register_module() -class Collect: - """Collect data from the loader relevant to the specific task. - - This keeps the items in ``keys`` as it is, and collect items in - ``meta_keys`` into a meta item called ``meta_name``.This is usually - the last stage of the data loader pipeline. - For example, when keys='imgs', meta_keys=('filename', 'label', - 'original_shape'), meta_name='img_metas', the results will be a dict with - keys 'imgs' and 'img_metas', where 'img_metas' is a DataContainer of - another dict with keys 'filename', 'label', 'original_shape'. - - Args: - keys (Sequence[str]): Required keys to be collected. - meta_name (str): The name of the key that contains meta infomation. - This key is always populated. Default: "img_metas". - meta_keys (Sequence[str]): Keys that are collected under meta_name. - The contents of the ``meta_name`` dictionary depends on - ``meta_keys``. - By default this includes: - - - "filename": path to the image file - - "label": label of the image file - - "original_shape": original shape of the image as a tuple - (h, w, c) - - "img_shape": shape of the image input to the network as a tuple - (h, w, c). Note that images may be zero padded on the - bottom/right, if the batch tensor is larger than this shape. - - "pad_shape": image shape after padding - - "flip_direction": a str in ("horiziontal", "vertival") to - indicate if the image is fliped horizontally or vertically. - - "img_norm_cfg": a dict of normalization information: - - mean - per channel mean subtraction - - std - per channel std divisor - - to_rgb - bool indicating if bgr was converted to rgb - nested (bool): If set as True, will apply data[x] = [data[x]] to all - items in data. The arg is added for compatibility. Default: False. - """ - def __init__(self, - keys, - meta_keys=('filename', 'label', 'original_shape', 'img_shape', - 'pad_shape', 'flip_direction', 'img_norm_cfg'), - meta_name='img_metas', - nested=False): - self.keys = keys - self.meta_keys = meta_keys - self.meta_name = meta_name - self.nested = nested - - def __call__(self, results): - """Performs the Collect formating. - - Args: - results (dict): The resulting dict to be modified and passed - to the next transform in pipeline. - """ - data = {} - for key in self.keys: - data[key] = results[key] - - if len(self.meta_keys) != 0: - meta = {} - for key in self.meta_keys: - meta[key] = results[key] - data[self.meta_name] = DC(meta, cpu_only=True) - if self.nested: - for k in data: - data[k] = [data[k]] - - return data - - def __repr__(self): - return (f'{self.__class__.__name__}(' - f'keys={self.keys}, meta_keys={self.meta_keys}, ' - f'nested={self.nested})') - - -@PIPELINES.register_module() -class FormatShape: - """Format final imgs shape to the given input_format. - - Required keys are "imgs", "num_clips" and "clip_len", added or modified - keys are "imgs" and "input_shape". - - Args: - input_format (str): Define the final imgs format. - collapse (bool): To collpase input_format N... to ... (NCTHW to CTHW, - etc.) if N is 1. Should be set as True when training and testing - detectors. Default: False. - """ - def __init__(self, input_format, collapse=False): - self.input_format = input_format - self.collapse = collapse - if self.input_format not in ['NCTHW', 'NCHW', 'NCHW_Flow', 'NPTCHW']: - raise ValueError( - f'The input format {self.input_format} is invalid.') - - def __call__(self, results): - """Performs the FormatShape formating. - - Args: - results (dict): The resulting dict to be modified and passed - to the next transform in pipeline. - """ - if not isinstance(results['imgs'], np.ndarray): - results['imgs'] = np.array(results['imgs']) - imgs = results['imgs'] - # [M x H x W x C] - # M = 1 * N_crops * N_clips * L - if self.collapse: - assert results['num_clips'] == 1 - - if self.input_format == 'NCTHW': - num_clips = results['num_clips'] - clip_len = results['clip_len'] - - imgs = imgs.reshape((-1, num_clips, clip_len) + imgs.shape[1:]) - # N_crops x N_clips x L x H x W x C - imgs = np.transpose(imgs, (0, 1, 5, 2, 3, 4)) - # N_crops x N_clips x C x L x H x W - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - # M' x C x L x H x W - # M' = N_crops x N_clips - elif self.input_format == 'NCHW': - imgs = np.transpose(imgs, (0, 3, 1, 2)) - # M x C x H x W - elif self.input_format == 'NCHW_Flow': - num_clips = results['num_clips'] - clip_len = results['clip_len'] - imgs = imgs.reshape((-1, num_clips, clip_len) + imgs.shape[1:]) - # N_crops x N_clips x L x H x W x C - imgs = np.transpose(imgs, (0, 1, 2, 5, 3, 4)) - # N_crops x N_clips x L x C x H x W - imgs = imgs.reshape((-1, imgs.shape[2] * imgs.shape[3]) + - imgs.shape[4:]) - # M' x C' x H x W - # M' = N_crops x N_clips - # C' = L x C - elif self.input_format == 'NPTCHW': - num_proposals = results['num_proposals'] - num_clips = results['num_clips'] - clip_len = results['clip_len'] - imgs = imgs.reshape((num_proposals, num_clips * clip_len) + - imgs.shape[1:]) - # P x M x H x W x C - # M = N_clips x L - imgs = np.transpose(imgs, (0, 1, 4, 2, 3)) - # P x M x C x H x W - if self.collapse: - assert imgs.shape[0] == 1 - imgs = imgs.squeeze(0) - - results['imgs'] = imgs - results['input_shape'] = imgs.shape - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += f"(input_format='{self.input_format}')" - return repr_str - - -@PIPELINES.register_module() -class FormatAudioShape: - """Format final audio shape to the given input_format. - - Required keys are "imgs", "num_clips" and "clip_len", added or modified - keys are "imgs" and "input_shape". - - Args: - input_format (str): Define the final imgs format. - """ - def __init__(self, input_format): - self.input_format = input_format - if self.input_format not in ['NCTF']: - raise ValueError( - f'The input format {self.input_format} is invalid.') - - def __call__(self, results): - """Performs the FormatShape formatting. - - Args: - results (dict): The resulting dict to be modified and passed - to the next transform in pipeline. - """ - audios = results['audios'] - # clip x sample x freq -> clip x channel x sample x freq - clip, sample, freq = audios.shape - audios = audios.reshape(clip, 1, sample, freq) - results['audios'] = audios - results['input_shape'] = audios.shape - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += f"(input_format='{self.input_format}')" - return repr_str +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from collections.abc import Sequence + +import mmcv +import numpy as np +import torch +from mmcv.parallel import DataContainer as DC + +from ..builder import PIPELINES + + +def to_tensor(data): + """Convert objects of various python types to :obj:`torch.Tensor`. + + Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, + :class:`Sequence`, :class:`int` and :class:`float`. + """ + if isinstance(data, torch.Tensor): + return data + if isinstance(data, np.ndarray): + return torch.from_numpy(data) + if isinstance(data, Sequence) and not mmcv.is_str(data): + return torch.tensor(data) + if isinstance(data, int): + return torch.LongTensor([data]) + if isinstance(data, float): + return torch.FloatTensor([data]) + raise TypeError(f'type {type(data)} cannot be converted to tensor.') + + +@PIPELINES.register_module() +class ToTensor: + """Convert some values in results dict to `torch.Tensor` type in data + loader pipeline. + + Args: + keys (Sequence[str]): Required keys to be converted. + """ + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + """Performs the ToTensor formating. + + Args: + results (dict): The resulting dict to be modified and passed + to the next transform in pipeline. + """ + for key in self.keys: + results[key] = to_tensor(results[key]) + return results + + def __repr__(self): + return f'{self.__class__.__name__}(keys={self.keys})' + + +@PIPELINES.register_module() +class Rename: + """Rename the key in results. + + Args: + mapping (dict): The keys in results that need to be renamed. The key of + the dict is the original name, while the value is the new name. If + the original name not found in results, do nothing. + Default: dict(). + """ + def __init__(self, mapping): + self.mapping = mapping + + def __call__(self, results): + for key, value in self.mapping.items(): + if key in results: + assert isinstance(key, str) and isinstance(value, str) + assert value not in results, ('the new name already exists in ' + 'results') + results[value] = results[key] + results.pop(key) + return results + + +@PIPELINES.register_module() +class ToDataContainer: + """Convert the data to DataContainer. + + Args: + fields (Sequence[dict]): Required fields to be converted + with keys and attributes. E.g. + fields=(dict(key='gt_bbox', stack=False),). + Note that key can also be a list of keys, if so, every tensor in + the list will be converted to DataContainer. + """ + def __init__(self, fields): + self.fields = fields + + def __call__(self, results): + """Performs the ToDataContainer formating. + + Args: + results (dict): The resulting dict to be modified and passed + to the next transform in pipeline. + """ + for field in self.fields: + _field = field.copy() + key = _field.pop('key') + if isinstance(key, list): + for item in key: + results[item] = DC(results[item], **_field) + else: + results[key] = DC(results[key], **_field) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(fields={self.fields})' + + +@PIPELINES.register_module() +class ImageToTensor: + """Convert image type to `torch.Tensor` type. + + Args: + keys (Sequence[str]): Required keys to be converted. + """ + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + """Performs the ImageToTensor formating. + + Args: + results (dict): The resulting dict to be modified and passed + to the next transform in pipeline. + """ + for key in self.keys: + results[key] = to_tensor(results[key].transpose(2, 0, 1)) + return results + + def __repr__(self): + return f'{self.__class__.__name__}(keys={self.keys})' + + +@PIPELINES.register_module() +class Transpose: + """Transpose image channels to a given order. + + Args: + keys (Sequence[str]): Required keys to be converted. + order (Sequence[int]): Image channel order. + """ + def __init__(self, keys, order): + self.keys = keys + self.order = order + + def __call__(self, results): + """Performs the Transpose formatting. + + Args: + results (dict): The resulting dict to be modified and passed + to the next transform in pipeline. + """ + for key in self.keys: + results[key] = results[key].transpose(self.order) + return results + + def __repr__(self): + return (f'{self.__class__.__name__}(' + f'keys={self.keys}, order={self.order})') + + +@PIPELINES.register_module() +class Collect: + """Collect data from the loader relevant to the specific task. + + This keeps the items in ``keys`` as it is, and collect items in + ``meta_keys`` into a meta item called ``meta_name``.This is usually + the last stage of the data loader pipeline. + For example, when keys='imgs', meta_keys=('filename', 'label', + 'original_shape'), meta_name='img_metas', the results will be a dict with + keys 'imgs' and 'img_metas', where 'img_metas' is a DataContainer of + another dict with keys 'filename', 'label', 'original_shape'. + + Args: + keys (Sequence[str]): Required keys to be collected. + meta_name (str): The name of the key that contains meta infomation. + This key is always populated. Default: "img_metas". + meta_keys (Sequence[str]): Keys that are collected under meta_name. + The contents of the ``meta_name`` dictionary depends on + ``meta_keys``. + By default this includes: + + - "filename": path to the image file + - "label": label of the image file + - "original_shape": original shape of the image as a tuple + (h, w, c) + - "img_shape": shape of the image input to the network as a tuple + (h, w, c). Note that images may be zero padded on the + bottom/right, if the batch tensor is larger than this shape. + - "pad_shape": image shape after padding + - "flip_direction": a str in ("horiziontal", "vertival") to + indicate if the image is fliped horizontally or vertically. + - "img_norm_cfg": a dict of normalization information: + - mean - per channel mean subtraction + - std - per channel std divisor + - to_rgb - bool indicating if bgr was converted to rgb + nested (bool): If set as True, will apply data[x] = [data[x]] to all + items in data. The arg is added for compatibility. Default: False. + """ + def __init__(self, + keys, + meta_keys=('filename', 'label', 'original_shape', 'img_shape', + 'pad_shape', 'flip_direction', 'img_norm_cfg'), + meta_name='img_metas', + nested=False): + self.keys = keys + self.meta_keys = meta_keys + self.meta_name = meta_name + self.nested = nested + + def __call__(self, results): + """Performs the Collect formating. + + Args: + results (dict): The resulting dict to be modified and passed + to the next transform in pipeline. + """ + data = {} + for key in self.keys: + data[key] = results[key] + + if len(self.meta_keys) != 0: + meta = {} + for key in self.meta_keys: + meta[key] = results[key] + data[self.meta_name] = DC(meta, cpu_only=True) + if self.nested: + for k in data: + data[k] = [data[k]] + + return data + + def __repr__(self): + return (f'{self.__class__.__name__}(' + f'keys={self.keys}, meta_keys={self.meta_keys}, ' + f'nested={self.nested})') + + +@PIPELINES.register_module() +class FormatShape: + """Format final imgs shape to the given input_format. + + Required keys are "imgs", "num_clips" and "clip_len", added or modified + keys are "imgs" and "input_shape". + + Args: + input_format (str): Define the final imgs format. + collapse (bool): To collpase input_format N... to ... (NCTHW to CTHW, + etc.) if N is 1. Should be set as True when training and testing + detectors. Default: False. + """ + def __init__(self, input_format, collapse=False): + self.input_format = input_format + self.collapse = collapse + if self.input_format not in ['NCTHW', 'NCHW', 'NCHW_Flow', 'NPTCHW']: + raise ValueError( + f'The input format {self.input_format} is invalid.') + + def __call__(self, results): + """Performs the FormatShape formating. + + Args: + results (dict): The resulting dict to be modified and passed + to the next transform in pipeline. + """ + if not isinstance(results['imgs'], np.ndarray): + results['imgs'] = np.array(results['imgs']) + imgs = results['imgs'] + # [M x H x W x C] + # M = 1 * N_crops * N_clips * L + if self.collapse: + assert results['num_clips'] == 1 + + if self.input_format == 'NCTHW': + num_clips = results['num_clips'] + clip_len = results['clip_len'] + + imgs = imgs.reshape((-1, num_clips, clip_len) + imgs.shape[1:]) + # N_crops x N_clips x L x H x W x C + imgs = np.transpose(imgs, (0, 1, 5, 2, 3, 4)) + # N_crops x N_clips x C x L x H x W + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + # M' x C x L x H x W + # M' = N_crops x N_clips + elif self.input_format == 'NCHW': + imgs = np.transpose(imgs, (0, 3, 1, 2)) + # M x C x H x W + elif self.input_format == 'NCHW_Flow': + num_clips = results['num_clips'] + clip_len = results['clip_len'] + imgs = imgs.reshape((-1, num_clips, clip_len) + imgs.shape[1:]) + # N_crops x N_clips x L x H x W x C + imgs = np.transpose(imgs, (0, 1, 2, 5, 3, 4)) + # N_crops x N_clips x L x C x H x W + imgs = imgs.reshape((-1, imgs.shape[2] * imgs.shape[3]) + + imgs.shape[4:]) + # M' x C' x H x W + # M' = N_crops x N_clips + # C' = L x C + elif self.input_format == 'NPTCHW': + num_proposals = results['num_proposals'] + num_clips = results['num_clips'] + clip_len = results['clip_len'] + imgs = imgs.reshape((num_proposals, num_clips * clip_len) + + imgs.shape[1:]) + # P x M x H x W x C + # M = N_clips x L + imgs = np.transpose(imgs, (0, 1, 4, 2, 3)) + # P x M x C x H x W + if self.collapse: + assert imgs.shape[0] == 1 + imgs = imgs.squeeze(0) + + results['imgs'] = imgs + results['input_shape'] = imgs.shape + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f"(input_format='{self.input_format}')" + return repr_str + + +@PIPELINES.register_module() +class FormatAudioShape: + """Format final audio shape to the given input_format. + + Required keys are "imgs", "num_clips" and "clip_len", added or modified + keys are "imgs" and "input_shape". + + Args: + input_format (str): Define the final imgs format. + """ + def __init__(self, input_format): + self.input_format = input_format + if self.input_format not in ['NCTF']: + raise ValueError( + f'The input format {self.input_format} is invalid.') + + def __call__(self, results): + """Performs the FormatShape formatting. + + Args: + results (dict): The resulting dict to be modified and passed + to the next transform in pipeline. + """ + audios = results['audios'] + # clip x sample x freq -> clip x channel x sample x freq + clip, sample, freq = audios.shape + audios = audios.reshape(clip, 1, sample, freq) + results['audios'] = audios + results['input_shape'] = audios.shape + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f"(input_format='{self.input_format}')" + return repr_str diff --git a/PyTorch/contrib/cv/video/NonLocal/mmaction/models/backbones/resnet3d_slowfast.py b/PyTorch/contrib/cv/video/NonLocal/mmaction/models/backbones/resnet3d_slowfast.py index a5d23bb38eddec07ec89262f4e7a446a17b429fa..e61842a2bab64177f4f1b83b30430833911ee16a 100644 --- a/PyTorch/contrib/cv/video/NonLocal/mmaction/models/backbones/resnet3d_slowfast.py +++ b/PyTorch/contrib/cv/video/NonLocal/mmaction/models/backbones/resnet3d_slowfast.py @@ -1,531 +1,531 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import warnings - -import torch -import torch.nn as nn -from mmcv.cnn import ConvModule, kaiming_init -from mmcv.runner import _load_checkpoint, load_checkpoint -from mmcv.utils import print_log - -from ...utils import get_root_logger -from ..builder import BACKBONES -from .resnet3d import ResNet3d - -mmdet_imported = False - - -class ResNet3dPathway(ResNet3d): - """A pathway of Slowfast based on ResNet3d. - - Args: - *args (arguments): Arguments same as :class:``ResNet3d``. - lateral (bool): Determines whether to enable the lateral connection - from another pathway. Default: False. - speed_ratio (int): Speed ratio indicating the ratio between time - dimension of the fast and slow pathway, corresponding to the - ``alpha`` in the paper. Default: 8. - channel_ratio (int): Reduce the channel number of fast pathway - by ``channel_ratio``, corresponding to ``beta`` in the paper. - Default: 8. - fusion_kernel (int): The kernel size of lateral fusion. - Default: 5. - **kwargs (keyword arguments): Keywords arguments for ResNet3d. - """ - def __init__(self, - *args, - lateral=False, - speed_ratio=8, - channel_ratio=8, - fusion_kernel=5, - **kwargs): - self.lateral = lateral - self.speed_ratio = speed_ratio - self.channel_ratio = channel_ratio - self.fusion_kernel = fusion_kernel - super().__init__(*args, **kwargs) - self.inplanes = self.base_channels - if self.lateral: - self.conv1_lateral = ConvModule( - self.inplanes // self.channel_ratio, - # https://arxiv.org/abs/1812.03982, the - # third type of lateral connection has out_channel: - # 2 * \beta * C - self.inplanes * 2 // self.channel_ratio, - kernel_size=(fusion_kernel, 1, 1), - stride=(self.speed_ratio, 1, 1), - padding=((fusion_kernel - 1) // 2, 0, 0), - bias=False, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=None) - - self.lateral_connections = [] - for i in range(len(self.stage_blocks)): - planes = self.base_channels * 2**i - self.inplanes = planes * self.block.expansion - - if lateral and i != self.num_stages - 1: - # no lateral connection needed in final stage - lateral_name = f'layer{(i + 1)}_lateral' - setattr( - self, lateral_name, - ConvModule(self.inplanes // self.channel_ratio, - self.inplanes * 2 // self.channel_ratio, - kernel_size=(fusion_kernel, 1, 1), - stride=(self.speed_ratio, 1, 1), - padding=((fusion_kernel - 1) // 2, 0, 0), - bias=False, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=None)) - self.lateral_connections.append(lateral_name) - - def make_res_layer(self, - block, - inplanes, - planes, - blocks, - spatial_stride=1, - temporal_stride=1, - dilation=1, - style='pytorch', - inflate=1, - inflate_style='3x1x1', - non_local=0, - non_local_cfg=dict(), - conv_cfg=None, - norm_cfg=None, - act_cfg=None, - with_cp=False): - """Build residual layer for Slowfast. - - Args: - block (nn.Module): Residual module to be built. - inplanes (int): Number of channels for the input - feature in each block. - planes (int): Number of channels for the output - feature in each block. - blocks (int): Number of residual blocks. - spatial_stride (int | Sequence[int]): Spatial strides - in residual and conv layers. Default: 1. - temporal_stride (int | Sequence[int]): Temporal strides in - residual and conv layers. Default: 1. - dilation (int): Spacing between kernel elements. Default: 1. - style (str): ``pytorch`` or ``caffe``. If set to ``pytorch``, - the stride-two layer is the 3x3 conv layer, - otherwise the stride-two layer is the first 1x1 conv layer. - Default: ``pytorch``. - inflate (int | Sequence[int]): Determine whether to inflate - for each block. Default: 1. - inflate_style (str): ``3x1x1`` or ``3x3x3``. which determines - the kernel sizes and padding strides for conv1 and - conv2 in each block. Default: ``3x1x1``. - non_local (int | Sequence[int]): Determine whether to apply - non-local module in the corresponding block of each stages. - Default: 0. - non_local_cfg (dict): Config for non-local module. - Default: ``dict()``. - conv_cfg (dict | None): Config for conv layers. Default: None. - norm_cfg (dict | None): Config for norm layers. Default: None. - act_cfg (dict | None): Config for activate layers. Default: None. - with_cp (bool): Use checkpoint or not. Using checkpoint will save - some memory while slowing down the training speed. - Default: False. - - Returns: - nn.Module: A residual layer for the given config. - """ - inflate = inflate if not isinstance(inflate, - int) else (inflate, ) * blocks - non_local = non_local if not isinstance( - non_local, int) else (non_local, ) * blocks - assert len(inflate) == blocks and len(non_local) == blocks - if self.lateral: - lateral_inplanes = inplanes * 2 // self.channel_ratio - else: - lateral_inplanes = 0 - if (spatial_stride != 1 - or (inplanes + lateral_inplanes) != planes * block.expansion): - downsample = ConvModule(inplanes + lateral_inplanes, - planes * block.expansion, - kernel_size=1, - stride=(temporal_stride, spatial_stride, - spatial_stride), - bias=False, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=None) - else: - downsample = None - - layers = [] - layers.append( - block(inplanes + lateral_inplanes, - planes, - spatial_stride, - temporal_stride, - dilation, - downsample, - style=style, - inflate=(inflate[0] == 1), - inflate_style=inflate_style, - non_local=(non_local[0] == 1), - non_local_cfg=non_local_cfg, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - with_cp=with_cp)) - inplanes = planes * block.expansion - - for i in range(1, blocks): - layers.append( - block(inplanes, - planes, - 1, - 1, - dilation, - style=style, - inflate=(inflate[i] == 1), - inflate_style=inflate_style, - non_local=(non_local[i] == 1), - non_local_cfg=non_local_cfg, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - with_cp=with_cp)) - - return nn.Sequential(*layers) - - def inflate_weights(self, logger): - """Inflate the resnet2d parameters to resnet3d pathway. - - The differences between resnet3d and resnet2d mainly lie in an extra - axis of conv kernel. To utilize the pretrained parameters in 2d model, - the weight of conv2d models should be inflated to fit in the shapes of - the 3d counterpart. For pathway the ``lateral_connection`` part should - not be inflated from 2d weights. - - Args: - logger (logging.Logger): The logger used to print - debugging infomation. - """ - - state_dict_r2d = _load_checkpoint(self.pretrained) - if 'state_dict' in state_dict_r2d: - state_dict_r2d = state_dict_r2d['state_dict'] - - inflated_param_names = [] - for name, module in self.named_modules(): - if 'lateral' in name: - continue - if isinstance(module, ConvModule): - # we use a ConvModule to wrap conv+bn+relu layers, thus the - # name mapping is needed - if 'downsample' in name: - # layer{X}.{Y}.downsample.conv->layer{X}.{Y}.downsample.0 - original_conv_name = name + '.0' - # layer{X}.{Y}.downsample.bn->layer{X}.{Y}.downsample.1 - original_bn_name = name + '.1' - else: - # layer{X}.{Y}.conv{n}.conv->layer{X}.{Y}.conv{n} - original_conv_name = name - # layer{X}.{Y}.conv{n}.bn->layer{X}.{Y}.bn{n} - original_bn_name = name.replace('conv', 'bn') - if original_conv_name + '.weight' not in state_dict_r2d: - logger.warning(f'Module not exist in the state_dict_r2d' - f': {original_conv_name}') - else: - self._inflate_conv_params(module.conv, state_dict_r2d, - original_conv_name, - inflated_param_names) - if original_bn_name + '.weight' not in state_dict_r2d: - logger.warning(f'Module not exist in the state_dict_r2d' - f': {original_bn_name}') - else: - self._inflate_bn_params(module.bn, state_dict_r2d, - original_bn_name, - inflated_param_names) - - # check if any parameters in the 2d checkpoint are not loaded - remaining_names = set( - state_dict_r2d.keys()) - set(inflated_param_names) - if remaining_names: - logger.info(f'These parameters in the 2d checkpoint are not loaded' - f': {remaining_names}') - - def _inflate_conv_params(self, conv3d, state_dict_2d, module_name_2d, - inflated_param_names): - """Inflate a conv module from 2d to 3d. - - The differences of conv modules betweene 2d and 3d in Pathway - mainly lie in the inplanes due to lateral connections. To fit the - shapes of the lateral connection counterpart, it will expand - parameters by concatting conv2d parameters and extra zero paddings. - - Args: - conv3d (nn.Module): The destination conv3d module. - state_dict_2d (OrderedDict): The state dict of pretrained 2d model. - module_name_2d (str): The name of corresponding conv module in the - 2d model. - inflated_param_names (list[str]): List of parameters that have been - inflated. - """ - weight_2d_name = module_name_2d + '.weight' - conv2d_weight = state_dict_2d[weight_2d_name] - old_shape = conv2d_weight.shape - new_shape = conv3d.weight.data.shape - kernel_t = new_shape[2] - - if new_shape[1] != old_shape[1]: - if new_shape[1] < old_shape[1]: - warnings.warn(f'The parameter of {module_name_2d} is not' - 'loaded due to incompatible shapes. ') - return - # Inplanes may be different due to lateral connections - new_channels = new_shape[1] - old_shape[1] - pad_shape = old_shape - pad_shape = pad_shape[:1] + (new_channels, ) + pad_shape[2:] - # Expand parameters by concat extra channels - conv2d_weight = torch.cat( - (conv2d_weight, - torch.zeros(pad_shape).type_as(conv2d_weight).to( - conv2d_weight.device)), - dim=1) - - new_weight = conv2d_weight.data.unsqueeze(2).expand_as( - conv3d.weight) / kernel_t - conv3d.weight.data.copy_(new_weight) - inflated_param_names.append(weight_2d_name) - - if getattr(conv3d, 'bias') is not None: - bias_2d_name = module_name_2d + '.bias' - conv3d.bias.data.copy_(state_dict_2d[bias_2d_name]) - inflated_param_names.append(bias_2d_name) - - def _freeze_stages(self): - """Prevent all the parameters from being optimized before - `self.frozen_stages`.""" - if self.frozen_stages >= 0: - self.conv1.eval() - for param in self.conv1.parameters(): - param.requires_grad = False - - for i in range(1, self.frozen_stages + 1): - m = getattr(self, f'layer{i}') - m.eval() - for param in m.parameters(): - param.requires_grad = False - - if i != len(self.res_layers) and self.lateral: - # No fusion needed in the final stage - lateral_name = self.lateral_connections[i - 1] - conv_lateral = getattr(self, lateral_name) - conv_lateral.eval() - for param in conv_lateral.parameters(): - param.requires_grad = False - - def init_weights(self, pretrained=None): - """Initiate the parameters either from existing checkpoint or from - scratch.""" - if pretrained: - self.pretrained = pretrained - - # Override the init_weights of i3d - super().init_weights() - for module_name in self.lateral_connections: - layer = getattr(self, module_name) - for m in layer.modules(): - if isinstance(m, (nn.Conv3d, nn.Conv2d)): - kaiming_init(m) - - -pathway_cfg = { - 'resnet3d': ResNet3dPathway, - # TODO: BNInceptionPathway -} - - -def build_pathway(cfg, *args, **kwargs): - """Build pathway. - - Args: - cfg (None or dict): cfg should contain: - - type (str): identify conv layer type. - - Returns: - nn.Module: Created pathway. - """ - if not (isinstance(cfg, dict) and 'type' in cfg): - raise TypeError('cfg must be a dict containing the key "type"') - cfg_ = cfg.copy() - - pathway_type = cfg_.pop('type') - if pathway_type not in pathway_cfg: - raise KeyError(f'Unrecognized pathway type {pathway_type}') - - pathway_cls = pathway_cfg[pathway_type] - pathway = pathway_cls(*args, **kwargs, **cfg_) - - return pathway - - -@BACKBONES.register_module() -class ResNet3dSlowFast(nn.Module): - """Slowfast backbone. - - This module is proposed in `SlowFast Networks for Video Recognition - `_ - - Args: - pretrained (str): The file path to a pretrained model. - resample_rate (int): A large temporal stride ``resample_rate`` - on input frames. The actual resample rate is calculated by - multipling the ``interval`` in ``SampleFrames`` in the - pipeline with ``resample_rate``, equivalent to the :math:`\\tau` - in the paper, i.e. it processes only one out of - ``resample_rate * interval`` frames. Default: 8. - speed_ratio (int): Speed ratio indicating the ratio between time - dimension of the fast and slow pathway, corresponding to the - :math:`\\alpha` in the paper. Default: 8. - channel_ratio (int): Reduce the channel number of fast pathway - by ``channel_ratio``, corresponding to :math:`\\beta` in the paper. - Default: 8. - slow_pathway (dict): Configuration of slow branch, should contain - necessary arguments for building the specific type of pathway - and: - type (str): type of backbone the pathway bases on. - lateral (bool): determine whether to build lateral connection - for the pathway.Default: - - .. code-block:: Python - - dict(type='ResNetPathway', - lateral=True, depth=50, pretrained=None, - conv1_kernel=(1, 7, 7), dilations=(1, 1, 1, 1), - conv1_stride_t=1, pool1_stride_t=1, inflate=(0, 0, 1, 1)) - - fast_pathway (dict): Configuration of fast branch, similar to - `slow_pathway`. Default: - - .. code-block:: Python - - dict(type='ResNetPathway', - lateral=False, depth=50, pretrained=None, base_channels=8, - conv1_kernel=(5, 7, 7), conv1_stride_t=1, pool1_stride_t=1) - """ - def __init__(self, - pretrained, - resample_rate=8, - speed_ratio=8, - channel_ratio=8, - slow_pathway=dict(type='resnet3d', - depth=50, - pretrained=None, - lateral=True, - conv1_kernel=(1, 7, 7), - dilations=(1, 1, 1, 1), - conv1_stride_t=1, - pool1_stride_t=1, - inflate=(0, 0, 1, 1)), - fast_pathway=dict(type='resnet3d', - depth=50, - pretrained=None, - lateral=False, - base_channels=8, - conv1_kernel=(5, 7, 7), - conv1_stride_t=1, - pool1_stride_t=1)): - super().__init__() - self.pretrained = pretrained - self.resample_rate = resample_rate - self.speed_ratio = speed_ratio - self.channel_ratio = channel_ratio - - if slow_pathway['lateral']: - slow_pathway['speed_ratio'] = speed_ratio - slow_pathway['channel_ratio'] = channel_ratio - - self.slow_path = build_pathway(slow_pathway) - self.fast_path = build_pathway(fast_pathway) - - def init_weights(self, pretrained=None): - """Initiate the parameters either from existing checkpoint or from - scratch.""" - if pretrained: - self.pretrained = pretrained - - if isinstance(self.pretrained, str): - logger = get_root_logger() - msg = f'load model from: {self.pretrained}' - print_log(msg, logger=logger) - # Directly load 3D model. - load_checkpoint(self, self.pretrained, strict=True, logger=logger) - elif self.pretrained is None: - # Init two branch seperately. - self.fast_path.init_weights() - self.slow_path.init_weights() - else: - raise TypeError('pretrained must be a str or None') - - def forward(self, x): - """Defines the computation performed at every call. - - Args: - x (torch.Tensor): The input data. - - Returns: - tuple[torch.Tensor]: The feature of the input samples extracted - by the backbone. - """ - x_slow = nn.functional.interpolate( - x, - mode='nearest', - scale_factor=(1.0 / self.resample_rate, 1.0, 1.0)) - x_slow = self.slow_path.conv1(x_slow) - x_slow = self.slow_path.maxpool(x_slow) - - x_fast = nn.functional.interpolate( - x, - mode='nearest', - scale_factor=(1.0 / (self.resample_rate // self.speed_ratio), 1.0, - 1.0)) - x_fast = self.fast_path.conv1(x_fast) - x_fast = self.fast_path.maxpool(x_fast) - - if self.slow_path.lateral: - x_fast_lateral = self.slow_path.conv1_lateral(x_fast) - x_slow = torch.cat((x_slow, x_fast_lateral), dim=1) - - for i, layer_name in enumerate(self.slow_path.res_layers): - res_layer = getattr(self.slow_path, layer_name) - x_slow = res_layer(x_slow) - res_layer_fast = getattr(self.fast_path, layer_name) - x_fast = res_layer_fast(x_fast) - if (i != len(self.slow_path.res_layers) - 1 - and self.slow_path.lateral): - # No fusion needed in the final stage - lateral_name = self.slow_path.lateral_connections[i] - conv_lateral = getattr(self.slow_path, lateral_name) - x_fast_lateral = conv_lateral(x_fast) - x_slow = torch.cat((x_slow, x_fast_lateral), dim=1) - - out = (x_slow, x_fast) - - return out - - -if mmdet_imported: - MMDET_BACKBONES.register_module()(ResNet3dSlowFast) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import warnings + +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, kaiming_init +from mmcv.runner import _load_checkpoint, load_checkpoint +from mmcv.utils import print_log + +from ...utils import get_root_logger +from ..builder import BACKBONES +from .resnet3d import ResNet3d + +mmdet_imported = False + + +class ResNet3dPathway(ResNet3d): + """A pathway of Slowfast based on ResNet3d. + + Args: + *args (arguments): Arguments same as :class:``ResNet3d``. + lateral (bool): Determines whether to enable the lateral connection + from another pathway. Default: False. + speed_ratio (int): Speed ratio indicating the ratio between time + dimension of the fast and slow pathway, corresponding to the + ``alpha`` in the paper. Default: 8. + channel_ratio (int): Reduce the channel number of fast pathway + by ``channel_ratio``, corresponding to ``beta`` in the paper. + Default: 8. + fusion_kernel (int): The kernel size of lateral fusion. + Default: 5. + **kwargs (keyword arguments): Keywords arguments for ResNet3d. + """ + def __init__(self, + *args, + lateral=False, + speed_ratio=8, + channel_ratio=8, + fusion_kernel=5, + **kwargs): + self.lateral = lateral + self.speed_ratio = speed_ratio + self.channel_ratio = channel_ratio + self.fusion_kernel = fusion_kernel + super().__init__(*args, **kwargs) + self.inplanes = self.base_channels + if self.lateral: + self.conv1_lateral = ConvModule( + self.inplanes // self.channel_ratio, + # https://arxiv.org/abs/1812.03982, the + # third type of lateral connection has out_channel: + # 2 * \beta * C + self.inplanes * 2 // self.channel_ratio, + kernel_size=(fusion_kernel, 1, 1), + stride=(self.speed_ratio, 1, 1), + padding=((fusion_kernel - 1) // 2, 0, 0), + bias=False, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + + self.lateral_connections = [] + for i in range(len(self.stage_blocks)): + planes = self.base_channels * 2**i + self.inplanes = planes * self.block.expansion + + if lateral and i != self.num_stages - 1: + # no lateral connection needed in final stage + lateral_name = f'layer{(i + 1)}_lateral' + setattr( + self, lateral_name, + ConvModule(self.inplanes // self.channel_ratio, + self.inplanes * 2 // self.channel_ratio, + kernel_size=(fusion_kernel, 1, 1), + stride=(self.speed_ratio, 1, 1), + padding=((fusion_kernel - 1) // 2, 0, 0), + bias=False, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None)) + self.lateral_connections.append(lateral_name) + + def make_res_layer(self, + block, + inplanes, + planes, + blocks, + spatial_stride=1, + temporal_stride=1, + dilation=1, + style='pytorch', + inflate=1, + inflate_style='3x1x1', + non_local=0, + non_local_cfg=dict(), + conv_cfg=None, + norm_cfg=None, + act_cfg=None, + with_cp=False): + """Build residual layer for Slowfast. + + Args: + block (nn.Module): Residual module to be built. + inplanes (int): Number of channels for the input + feature in each block. + planes (int): Number of channels for the output + feature in each block. + blocks (int): Number of residual blocks. + spatial_stride (int | Sequence[int]): Spatial strides + in residual and conv layers. Default: 1. + temporal_stride (int | Sequence[int]): Temporal strides in + residual and conv layers. Default: 1. + dilation (int): Spacing between kernel elements. Default: 1. + style (str): ``pytorch`` or ``caffe``. If set to ``pytorch``, + the stride-two layer is the 3x3 conv layer, + otherwise the stride-two layer is the first 1x1 conv layer. + Default: ``pytorch``. + inflate (int | Sequence[int]): Determine whether to inflate + for each block. Default: 1. + inflate_style (str): ``3x1x1`` or ``3x3x3``. which determines + the kernel sizes and padding strides for conv1 and + conv2 in each block. Default: ``3x1x1``. + non_local (int | Sequence[int]): Determine whether to apply + non-local module in the corresponding block of each stages. + Default: 0. + non_local_cfg (dict): Config for non-local module. + Default: ``dict()``. + conv_cfg (dict | None): Config for conv layers. Default: None. + norm_cfg (dict | None): Config for norm layers. Default: None. + act_cfg (dict | None): Config for activate layers. Default: None. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. + Default: False. + + Returns: + nn.Module: A residual layer for the given config. + """ + inflate = inflate if not isinstance(inflate, + int) else (inflate, ) * blocks + non_local = non_local if not isinstance( + non_local, int) else (non_local, ) * blocks + assert len(inflate) == blocks and len(non_local) == blocks + if self.lateral: + lateral_inplanes = inplanes * 2 // self.channel_ratio + else: + lateral_inplanes = 0 + if (spatial_stride != 1 + or (inplanes + lateral_inplanes) != planes * block.expansion): + downsample = ConvModule(inplanes + lateral_inplanes, + planes * block.expansion, + kernel_size=1, + stride=(temporal_stride, spatial_stride, + spatial_stride), + bias=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + else: + downsample = None + + layers = [] + layers.append( + block(inplanes + lateral_inplanes, + planes, + spatial_stride, + temporal_stride, + dilation, + downsample, + style=style, + inflate=(inflate[0] == 1), + inflate_style=inflate_style, + non_local=(non_local[0] == 1), + non_local_cfg=non_local_cfg, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp)) + inplanes = planes * block.expansion + + for i in range(1, blocks): + layers.append( + block(inplanes, + planes, + 1, + 1, + dilation, + style=style, + inflate=(inflate[i] == 1), + inflate_style=inflate_style, + non_local=(non_local[i] == 1), + non_local_cfg=non_local_cfg, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp)) + + return nn.Sequential(*layers) + + def inflate_weights(self, logger): + """Inflate the resnet2d parameters to resnet3d pathway. + + The differences between resnet3d and resnet2d mainly lie in an extra + axis of conv kernel. To utilize the pretrained parameters in 2d model, + the weight of conv2d models should be inflated to fit in the shapes of + the 3d counterpart. For pathway the ``lateral_connection`` part should + not be inflated from 2d weights. + + Args: + logger (logging.Logger): The logger used to print + debugging infomation. + """ + + state_dict_r2d = _load_checkpoint(self.pretrained) + if 'state_dict' in state_dict_r2d: + state_dict_r2d = state_dict_r2d['state_dict'] + + inflated_param_names = [] + for name, module in self.named_modules(): + if 'lateral' in name: + continue + if isinstance(module, ConvModule): + # we use a ConvModule to wrap conv+bn+relu layers, thus the + # name mapping is needed + if 'downsample' in name: + # layer{X}.{Y}.downsample.conv->layer{X}.{Y}.downsample.0 + original_conv_name = name + '.0' + # layer{X}.{Y}.downsample.bn->layer{X}.{Y}.downsample.1 + original_bn_name = name + '.1' + else: + # layer{X}.{Y}.conv{n}.conv->layer{X}.{Y}.conv{n} + original_conv_name = name + # layer{X}.{Y}.conv{n}.bn->layer{X}.{Y}.bn{n} + original_bn_name = name.replace('conv', 'bn') + if original_conv_name + '.weight' not in state_dict_r2d: + logger.warning(f'Module not exist in the state_dict_r2d' + f': {original_conv_name}') + else: + self._inflate_conv_params(module.conv, state_dict_r2d, + original_conv_name, + inflated_param_names) + if original_bn_name + '.weight' not in state_dict_r2d: + logger.warning(f'Module not exist in the state_dict_r2d' + f': {original_bn_name}') + else: + self._inflate_bn_params(module.bn, state_dict_r2d, + original_bn_name, + inflated_param_names) + + # check if any parameters in the 2d checkpoint are not loaded + remaining_names = set( + state_dict_r2d.keys()) - set(inflated_param_names) + if remaining_names: + logger.info(f'These parameters in the 2d checkpoint are not loaded' + f': {remaining_names}') + + def _inflate_conv_params(self, conv3d, state_dict_2d, module_name_2d, + inflated_param_names): + """Inflate a conv module from 2d to 3d. + + The differences of conv modules betweene 2d and 3d in Pathway + mainly lie in the inplanes due to lateral connections. To fit the + shapes of the lateral connection counterpart, it will expand + parameters by concatting conv2d parameters and extra zero paddings. + + Args: + conv3d (nn.Module): The destination conv3d module. + state_dict_2d (OrderedDict): The state dict of pretrained 2d model. + module_name_2d (str): The name of corresponding conv module in the + 2d model. + inflated_param_names (list[str]): List of parameters that have been + inflated. + """ + weight_2d_name = module_name_2d + '.weight' + conv2d_weight = state_dict_2d[weight_2d_name] + old_shape = conv2d_weight.shape + new_shape = conv3d.weight.data.shape + kernel_t = new_shape[2] + + if new_shape[1] != old_shape[1]: + if new_shape[1] < old_shape[1]: + warnings.warn(f'The parameter of {module_name_2d} is not' + 'loaded due to incompatible shapes. ') + return + # Inplanes may be different due to lateral connections + new_channels = new_shape[1] - old_shape[1] + pad_shape = old_shape + pad_shape = pad_shape[:1] + (new_channels, ) + pad_shape[2:] + # Expand parameters by concat extra channels + conv2d_weight = torch.cat( + (conv2d_weight, + torch.zeros(pad_shape).type_as(conv2d_weight).to( + conv2d_weight.device)), + dim=1) + + new_weight = conv2d_weight.data.unsqueeze(2).expand_as( + conv3d.weight) / kernel_t + conv3d.weight.data.copy_(new_weight) + inflated_param_names.append(weight_2d_name) + + if getattr(conv3d, 'bias') is not None: + bias_2d_name = module_name_2d + '.bias' + conv3d.bias.data.copy_(state_dict_2d[bias_2d_name]) + inflated_param_names.append(bias_2d_name) + + def _freeze_stages(self): + """Prevent all the parameters from being optimized before + `self.frozen_stages`.""" + if self.frozen_stages >= 0: + self.conv1.eval() + for param in self.conv1.parameters(): + param.requires_grad = False + + for i in range(1, self.frozen_stages + 1): + m = getattr(self, f'layer{i}') + m.eval() + for param in m.parameters(): + param.requires_grad = False + + if i != len(self.res_layers) and self.lateral: + # No fusion needed in the final stage + lateral_name = self.lateral_connections[i - 1] + conv_lateral = getattr(self, lateral_name) + conv_lateral.eval() + for param in conv_lateral.parameters(): + param.requires_grad = False + + def init_weights(self, pretrained=None): + """Initiate the parameters either from existing checkpoint or from + scratch.""" + if pretrained: + self.pretrained = pretrained + + # Override the init_weights of i3d + super().init_weights() + for module_name in self.lateral_connections: + layer = getattr(self, module_name) + for m in layer.modules(): + if isinstance(m, (nn.Conv3d, nn.Conv2d)): + kaiming_init(m) + + +pathway_cfg = { + 'resnet3d': ResNet3dPathway, + # TODO: BNInceptionPathway +} + + +def build_pathway(cfg, *args, **kwargs): + """Build pathway. + + Args: + cfg (None or dict): cfg should contain: + - type (str): identify conv layer type. + + Returns: + nn.Module: Created pathway. + """ + if not (isinstance(cfg, dict) and 'type' in cfg): + raise TypeError('cfg must be a dict containing the key "type"') + cfg_ = cfg.copy() + + pathway_type = cfg_.pop('type') + if pathway_type not in pathway_cfg: + raise KeyError(f'Unrecognized pathway type {pathway_type}') + + pathway_cls = pathway_cfg[pathway_type] + pathway = pathway_cls(*args, **kwargs, **cfg_) + + return pathway + + +@BACKBONES.register_module() +class ResNet3dSlowFast(nn.Module): + """Slowfast backbone. + + This module is proposed in `SlowFast Networks for Video Recognition + `_ + + Args: + pretrained (str): The file path to a pretrained model. + resample_rate (int): A large temporal stride ``resample_rate`` + on input frames. The actual resample rate is calculated by + multipling the ``interval`` in ``SampleFrames`` in the + pipeline with ``resample_rate``, equivalent to the :math:`\\tau` + in the paper, i.e. it processes only one out of + ``resample_rate * interval`` frames. Default: 8. + speed_ratio (int): Speed ratio indicating the ratio between time + dimension of the fast and slow pathway, corresponding to the + :math:`\\alpha` in the paper. Default: 8. + channel_ratio (int): Reduce the channel number of fast pathway + by ``channel_ratio``, corresponding to :math:`\\beta` in the paper. + Default: 8. + slow_pathway (dict): Configuration of slow branch, should contain + necessary arguments for building the specific type of pathway + and: + type (str): type of backbone the pathway bases on. + lateral (bool): determine whether to build lateral connection + for the pathway.Default: + + .. code-block:: Python + + dict(type='ResNetPathway', + lateral=True, depth=50, pretrained=None, + conv1_kernel=(1, 7, 7), dilations=(1, 1, 1, 1), + conv1_stride_t=1, pool1_stride_t=1, inflate=(0, 0, 1, 1)) + + fast_pathway (dict): Configuration of fast branch, similar to + `slow_pathway`. Default: + + .. code-block:: Python + + dict(type='ResNetPathway', + lateral=False, depth=50, pretrained=None, base_channels=8, + conv1_kernel=(5, 7, 7), conv1_stride_t=1, pool1_stride_t=1) + """ + def __init__(self, + pretrained, + resample_rate=8, + speed_ratio=8, + channel_ratio=8, + slow_pathway=dict(type='resnet3d', + depth=50, + pretrained=None, + lateral=True, + conv1_kernel=(1, 7, 7), + dilations=(1, 1, 1, 1), + conv1_stride_t=1, + pool1_stride_t=1, + inflate=(0, 0, 1, 1)), + fast_pathway=dict(type='resnet3d', + depth=50, + pretrained=None, + lateral=False, + base_channels=8, + conv1_kernel=(5, 7, 7), + conv1_stride_t=1, + pool1_stride_t=1)): + super().__init__() + self.pretrained = pretrained + self.resample_rate = resample_rate + self.speed_ratio = speed_ratio + self.channel_ratio = channel_ratio + + if slow_pathway['lateral']: + slow_pathway['speed_ratio'] = speed_ratio + slow_pathway['channel_ratio'] = channel_ratio + + self.slow_path = build_pathway(slow_pathway) + self.fast_path = build_pathway(fast_pathway) + + def init_weights(self, pretrained=None): + """Initiate the parameters either from existing checkpoint or from + scratch.""" + if pretrained: + self.pretrained = pretrained + + if isinstance(self.pretrained, str): + logger = get_root_logger() + msg = f'load model from: {self.pretrained}' + print_log(msg, logger=logger) + # Directly load 3D model. + load_checkpoint(self, self.pretrained, strict=True, logger=logger) + elif self.pretrained is None: + # Init two branch seperately. + self.fast_path.init_weights() + self.slow_path.init_weights() + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + """Defines the computation performed at every call. + + Args: + x (torch.Tensor): The input data. + + Returns: + tuple[torch.Tensor]: The feature of the input samples extracted + by the backbone. + """ + x_slow = nn.functional.interpolate( + x, + mode='nearest', + scale_factor=(1.0 / self.resample_rate, 1.0, 1.0)) + x_slow = self.slow_path.conv1(x_slow) + x_slow = self.slow_path.maxpool(x_slow) + + x_fast = nn.functional.interpolate( + x, + mode='nearest', + scale_factor=(1.0 / (self.resample_rate // self.speed_ratio), 1.0, + 1.0)) + x_fast = self.fast_path.conv1(x_fast) + x_fast = self.fast_path.maxpool(x_fast) + + if self.slow_path.lateral: + x_fast_lateral = self.slow_path.conv1_lateral(x_fast) + x_slow = torch.cat((x_slow, x_fast_lateral), dim=1) + + for i, layer_name in enumerate(self.slow_path.res_layers): + res_layer = getattr(self.slow_path, layer_name) + x_slow = res_layer(x_slow) + res_layer_fast = getattr(self.fast_path, layer_name) + x_fast = res_layer_fast(x_fast) + if (i != len(self.slow_path.res_layers) - 1 + and self.slow_path.lateral): + # No fusion needed in the final stage + lateral_name = self.slow_path.lateral_connections[i] + conv_lateral = getattr(self.slow_path, lateral_name) + x_fast_lateral = conv_lateral(x_fast) + x_slow = torch.cat((x_slow, x_fast_lateral), dim=1) + + out = (x_slow, x_fast) + + return out + + +if mmdet_imported: + MMDET_BACKBONES.register_module()(ResNet3dSlowFast) diff --git a/PyTorch/contrib/cv/video/NonLocal/mmaction/models/heads/base.py b/PyTorch/contrib/cv/video/NonLocal/mmaction/models/heads/base.py index 8d165096e1f96749bdab7f9b31f07ce000794d94..6343b3dd0f60cf7183183ff14f024b143a0bf138 100644 --- a/PyTorch/contrib/cv/video/NonLocal/mmaction/models/heads/base.py +++ b/PyTorch/contrib/cv/video/NonLocal/mmaction/models/heads/base.py @@ -1,122 +1,122 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -from abc import ABCMeta, abstractmethod - -import torch -import torch.nn as nn - -from ...core import top_k_accuracy -from ..builder import build_loss - - -class AvgConsensus(nn.Module): - """Average consensus module. - - Args: - dim (int): Decide which dim consensus function to apply. - Default: 1. - """ - def __init__(self, dim=1): - super().__init__() - self.dim = dim - - def forward(self, x): - """Defines the computation performed at every call.""" - return x.mean(dim=self.dim, keepdim=True) - - -class BaseHead(nn.Module, metaclass=ABCMeta): - """Base class for head. - - All Head should subclass it. - All subclass should overwrite: - - Methods:``init_weights``, initializing weights in some modules. - - Methods:``forward``, supporting to forward both for training and testing. - - Args: - num_classes (int): Number of classes to be classified. - in_channels (int): Number of channels in input feature. - loss_cls (dict): Config for building loss. - Default: dict(type='CrossEntropyLoss', loss_weight=1.0). - multi_class (bool): Determines whether it is a multi-class - recognition task. Default: False. - label_smooth_eps (float): Epsilon used in label smooth. - Reference: arxiv.org/abs/1906.02629. Default: 0. - """ - def __init__(self, - num_classes, - in_channels, - loss_cls=dict(type='CrossEntropyLoss', loss_weight=1.0), - multi_class=False, - label_smooth_eps=0.0): - super().__init__() - self.num_classes = num_classes - self.in_channels = in_channels - self.loss_cls = build_loss(loss_cls) - self.multi_class = multi_class - self.label_smooth_eps = label_smooth_eps - - @abstractmethod - def init_weights(self): - """Initiate the parameters either from existing checkpoint or from - scratch.""" - - @abstractmethod - def forward(self, x): - """Defines the computation performed at every call.""" - - def loss(self, cls_score, labels, **kwargs): - """Calculate the loss given output ``cls_score``, target ``labels``. - - Args: - cls_score (torch.Tensor): The output of the model. - labels (torch.Tensor): The target output of the model. - - Returns: - dict: A dict containing field 'loss_cls'(mandatory) - and 'top1_acc', 'top5_acc'(optional). - """ - losses = dict() - if labels.shape == torch.Size([]): - labels = labels.unsqueeze(0) - elif labels.dim() == 1 and labels.size()[0] == self.num_classes \ - and cls_score.size()[0] == 1: - # Fix a bug when training with soft labels and batch size is 1. - # When using soft labels, `labels` and `cls_socre` share the same - # shape. - labels = labels.unsqueeze(0) - - if not self.multi_class and cls_score.size() != labels.size(): - top_k_acc = top_k_accuracy(cls_score.detach().cpu().numpy(), - labels.detach().cpu().numpy(), (1, 5)) - losses['top1_acc'] = torch.tensor(top_k_acc[0], - device=cls_score.device, - dtype=torch.float32) - losses['top5_acc'] = torch.tensor(top_k_acc[1], - device=cls_score.device, - dtype=torch.float32) - - elif self.multi_class and self.label_smooth_eps != 0: - labels = ((1 - self.label_smooth_eps) * labels + - self.label_smooth_eps / self.num_classes) - - loss_cls = self.loss_cls(cls_score, labels, **kwargs) - # loss_cls may be dictionary or single tensor - if isinstance(loss_cls, dict): - losses.update(loss_cls) - else: - losses['loss_cls'] = loss_cls - - return losses +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from abc import ABCMeta, abstractmethod + +import torch +import torch.nn as nn + +from ...core import top_k_accuracy +from ..builder import build_loss + + +class AvgConsensus(nn.Module): + """Average consensus module. + + Args: + dim (int): Decide which dim consensus function to apply. + Default: 1. + """ + def __init__(self, dim=1): + super().__init__() + self.dim = dim + + def forward(self, x): + """Defines the computation performed at every call.""" + return x.mean(dim=self.dim, keepdim=True) + + +class BaseHead(nn.Module, metaclass=ABCMeta): + """Base class for head. + + All Head should subclass it. + All subclass should overwrite: + - Methods:``init_weights``, initializing weights in some modules. + - Methods:``forward``, supporting to forward both for training and testing. + + Args: + num_classes (int): Number of classes to be classified. + in_channels (int): Number of channels in input feature. + loss_cls (dict): Config for building loss. + Default: dict(type='CrossEntropyLoss', loss_weight=1.0). + multi_class (bool): Determines whether it is a multi-class + recognition task. Default: False. + label_smooth_eps (float): Epsilon used in label smooth. + Reference: arxiv.org/abs/1906.02629. Default: 0. + """ + def __init__(self, + num_classes, + in_channels, + loss_cls=dict(type='CrossEntropyLoss', loss_weight=1.0), + multi_class=False, + label_smooth_eps=0.0): + super().__init__() + self.num_classes = num_classes + self.in_channels = in_channels + self.loss_cls = build_loss(loss_cls) + self.multi_class = multi_class + self.label_smooth_eps = label_smooth_eps + + @abstractmethod + def init_weights(self): + """Initiate the parameters either from existing checkpoint or from + scratch.""" + + @abstractmethod + def forward(self, x): + """Defines the computation performed at every call.""" + + def loss(self, cls_score, labels, **kwargs): + """Calculate the loss given output ``cls_score``, target ``labels``. + + Args: + cls_score (torch.Tensor): The output of the model. + labels (torch.Tensor): The target output of the model. + + Returns: + dict: A dict containing field 'loss_cls'(mandatory) + and 'top1_acc', 'top5_acc'(optional). + """ + losses = dict() + if labels.shape == torch.Size([]): + labels = labels.unsqueeze(0) + elif labels.dim() == 1 and labels.size()[0] == self.num_classes \ + and cls_score.size()[0] == 1: + # Fix a bug when training with soft labels and batch size is 1. + # When using soft labels, `labels` and `cls_socre` share the same + # shape. + labels = labels.unsqueeze(0) + + if not self.multi_class and cls_score.size() != labels.size(): + top_k_acc = top_k_accuracy(cls_score.detach().cpu().numpy(), + labels.detach().cpu().numpy(), (1, 5)) + losses['top1_acc'] = torch.tensor(top_k_acc[0], + device=cls_score.device, + dtype=torch.float32) + losses['top5_acc'] = torch.tensor(top_k_acc[1], + device=cls_score.device, + dtype=torch.float32) + + elif self.multi_class and self.label_smooth_eps != 0: + labels = ((1 - self.label_smooth_eps) * labels + + self.label_smooth_eps / self.num_classes) + + loss_cls = self.loss_cls(cls_score, labels, **kwargs) + # loss_cls may be dictionary or single tensor + if isinstance(loss_cls, dict): + losses.update(loss_cls) + else: + losses['loss_cls'] = loss_cls + + return losses diff --git a/PyTorch/contrib/cv/video/NonLocal/mmaction/models/heads/slowfast_head.py b/PyTorch/contrib/cv/video/NonLocal/mmaction/models/heads/slowfast_head.py index 30c3590fa96398b7282c3c47415a31e5e4b7c858..31de953e262024feb99fab0e3809fc96d2791d6a 100644 --- a/PyTorch/contrib/cv/video/NonLocal/mmaction/models/heads/slowfast_head.py +++ b/PyTorch/contrib/cv/video/NonLocal/mmaction/models/heads/slowfast_head.py @@ -1,92 +1,92 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch -import torch.nn as nn -from mmcv.cnn import normal_init - -from ..builder import HEADS -from .base import BaseHead - - -@HEADS.register_module() -class SlowFastHead(BaseHead): - """The classification head for SlowFast. - - Args: - num_classes (int): Number of classes to be classified. - in_channels (int): Number of channels in input feature. - loss_cls (dict): Config for building loss. - Default: dict(type='CrossEntropyLoss'). - spatial_type (str): Pooling type in spatial dimension. Default: 'avg'. - dropout_ratio (float): Probability of dropout layer. Default: 0.8. - init_std (float): Std value for Initiation. Default: 0.01. - kwargs (dict, optional): Any keyword argument to be used to initialize - the head. - """ - def __init__(self, - num_classes, - in_channels, - loss_cls=dict(type='CrossEntropyLoss'), - spatial_type='avg', - dropout_ratio=0.8, - init_std=0.01, - **kwargs): - - super().__init__(num_classes, in_channels, loss_cls, **kwargs) - self.spatial_type = spatial_type - self.dropout_ratio = dropout_ratio - self.init_std = init_std - - if self.dropout_ratio != 0: - self.dropout = nn.Dropout(p=self.dropout_ratio) - else: - self.dropout = None - self.fc_cls = nn.Linear(in_channels, num_classes) - - if self.spatial_type == 'avg': - self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1)) - else: - self.avg_pool = None - - def init_weights(self): - """Initiate the parameters from scratch.""" - normal_init(self.fc_cls, std=self.init_std) - - def forward(self, x): - """Defines the computation performed at every call. - - Args: - x (torch.Tensor): The input data. - - Returns: - torch.Tensor: The classification scores for input samples. - """ - # ([N, channel_fast, T, H, W], [(N, channel_slow, T, H, W)]) - x_fast, x_slow = x - # ([N, channel_fast, 1, 1, 1], [N, channel_slow, 1, 1, 1]) - x_fast = self.avg_pool(x_fast) - x_slow = self.avg_pool(x_slow) - # [N, channel_fast + channel_slow, 1, 1, 1] - x = torch.cat((x_slow, x_fast), dim=1) - - if self.dropout is not None: - x = self.dropout(x) - - # [N x C] - x = x.view(x.size(0), -1) - # [N x num_classes] - cls_score = self.fc_cls(x) - - return cls_score.npu_format_cast(0) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch +import torch.nn as nn +from mmcv.cnn import normal_init + +from ..builder import HEADS +from .base import BaseHead + + +@HEADS.register_module() +class SlowFastHead(BaseHead): + """The classification head for SlowFast. + + Args: + num_classes (int): Number of classes to be classified. + in_channels (int): Number of channels in input feature. + loss_cls (dict): Config for building loss. + Default: dict(type='CrossEntropyLoss'). + spatial_type (str): Pooling type in spatial dimension. Default: 'avg'. + dropout_ratio (float): Probability of dropout layer. Default: 0.8. + init_std (float): Std value for Initiation. Default: 0.01. + kwargs (dict, optional): Any keyword argument to be used to initialize + the head. + """ + def __init__(self, + num_classes, + in_channels, + loss_cls=dict(type='CrossEntropyLoss'), + spatial_type='avg', + dropout_ratio=0.8, + init_std=0.01, + **kwargs): + + super().__init__(num_classes, in_channels, loss_cls, **kwargs) + self.spatial_type = spatial_type + self.dropout_ratio = dropout_ratio + self.init_std = init_std + + if self.dropout_ratio != 0: + self.dropout = nn.Dropout(p=self.dropout_ratio) + else: + self.dropout = None + self.fc_cls = nn.Linear(in_channels, num_classes) + + if self.spatial_type == 'avg': + self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1)) + else: + self.avg_pool = None + + def init_weights(self): + """Initiate the parameters from scratch.""" + normal_init(self.fc_cls, std=self.init_std) + + def forward(self, x): + """Defines the computation performed at every call. + + Args: + x (torch.Tensor): The input data. + + Returns: + torch.Tensor: The classification scores for input samples. + """ + # ([N, channel_fast, T, H, W], [(N, channel_slow, T, H, W)]) + x_fast, x_slow = x + # ([N, channel_fast, 1, 1, 1], [N, channel_slow, 1, 1, 1]) + x_fast = self.avg_pool(x_fast) + x_slow = self.avg_pool(x_slow) + # [N, channel_fast + channel_slow, 1, 1, 1] + x = torch.cat((x_slow, x_fast), dim=1) + + if self.dropout is not None: + x = self.dropout(x) + + # [N x C] + x = x.view(x.size(0), -1) + # [N x num_classes] + cls_score = self.fc_cls(x) + + return cls_score.npu_format_cast(0) diff --git a/PyTorch/contrib/cv/video/NonLocal/mmaction/models/losses/cross_entropy_loss.py b/PyTorch/contrib/cv/video/NonLocal/mmaction/models/losses/cross_entropy_loss.py index 11c9bfd4ddc109784c8a48a77286ed9663c7c53f..a0afe4232c36c25b3735f3efd9caa8de24e081e0 100644 --- a/PyTorch/contrib/cv/video/NonLocal/mmaction/models/losses/cross_entropy_loss.py +++ b/PyTorch/contrib/cv/video/NonLocal/mmaction/models/losses/cross_entropy_loss.py @@ -1,133 +1,133 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch -import torch.nn.functional as F - -from ..builder import LOSSES -from .base import BaseWeightedLoss - - -@LOSSES.register_module() -class CrossEntropyLoss(BaseWeightedLoss): - """Cross Entropy Loss. - - Support two kinds of labels and their corresponding loss type. It's worth - mentioning that loss type will be detected by the shape of ``cls_score`` - and ``label``. - 1) Hard label: This label is an integer array and all of the elements are - in the range [0, num_classes - 1]. This label's shape should be - ``cls_score``'s shape with the `num_classes` dimension removed. - 2) Soft label(probablity distribution over classes): This label is a - probability distribution and all of the elements are in the range - [0, 1]. This label's shape must be the same as ``cls_score``. For now, - only 2-dim soft label is supported. - - Args: - loss_weight (float): Factor scalar multiplied on the loss. - Default: 1.0. - class_weight (list[float] | None): Loss weight for each class. If set - as None, use the same weight 1 for all classes. Only applies - to CrossEntropyLoss and BCELossWithLogits (should not be set when - using other losses). Default: None. - """ - def __init__(self, loss_weight=1.0, class_weight=None): - super().__init__(loss_weight=loss_weight) - self.class_weight = None - if class_weight is not None: - self.class_weight = torch.Tensor(class_weight) - - def _forward(self, cls_score, label, **kwargs): - """Forward function. - - Args: - cls_score (torch.Tensor): The class score. - label (torch.Tensor): The ground truth label. - kwargs: Any keyword argument to be used to calculate - CrossEntropy loss. - - Returns: - torch.Tensor: The returned CrossEntropy loss. - """ - if cls_score.size() == label.size(): - # calculate loss for soft label - - assert cls_score.dim() == 2, 'Only support 2-dim soft label' - assert len(kwargs) == 0, \ - ('For now, no extra args are supported for soft label, ' - f'but get {kwargs}') - - lsm = F.log_softmax(cls_score, 1) - if self.class_weight is not None: - lsm = lsm * self.class_weight.unsqueeze(0) - loss_cls = -(label * lsm).sum(1) - - # default reduction 'mean' - if self.class_weight is not None: - # Use weighted average as pytorch CrossEntropyLoss does. - # For more information, please visit https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html # noqa - loss_cls = loss_cls.sum() / torch.sum( - self.class_weight.unsqueeze(0) * label) - else: - loss_cls = loss_cls.mean() - else: - # calculate loss for hard label - - if self.class_weight is not None: - assert 'weight' not in kwargs, \ - "The key 'weight' already exists." - kwargs['weight'] = self.class_weight.to(cls_score.device) - # cls_score = cls_score.type(torch.float32) - # label = label.type(torch.int32) - loss_cls = F.cross_entropy(cls_score, label, **kwargs) - - return loss_cls - - -@LOSSES.register_module() -class BCELossWithLogits(BaseWeightedLoss): - """Binary Cross Entropy Loss with logits. - - Args: - loss_weight (float): Factor scalar multiplied on the loss. - Default: 1.0. - class_weight (list[float] | None): Loss weight for each class. If set - as None, use the same weight 1 for all classes. Only applies - to CrossEntropyLoss and BCELossWithLogits (should not be set when - using other losses). Default: None. - """ - def __init__(self, loss_weight=1.0, class_weight=None): - super().__init__(loss_weight=loss_weight) - self.class_weight = None - if class_weight is not None: - self.class_weight = torch.Tensor(class_weight) - - def _forward(self, cls_score, label, **kwargs): - """Forward function. - - Args: - cls_score (torch.Tensor): The class score. - label (torch.Tensor): The ground truth label. - kwargs: Any keyword argument to be used to calculate - bce loss with logits. - - Returns: - torch.Tensor: The returned bce loss with logits. - """ - if self.class_weight is not None: - assert 'weight' not in kwargs, "The key 'weight' already exists." - kwargs['weight'] = self.class_weight.to(cls_score.device) - loss_cls = F.binary_cross_entropy_with_logits(cls_score, label, - **kwargs) - return loss_cls +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch +import torch.nn.functional as F + +from ..builder import LOSSES +from .base import BaseWeightedLoss + + +@LOSSES.register_module() +class CrossEntropyLoss(BaseWeightedLoss): + """Cross Entropy Loss. + + Support two kinds of labels and their corresponding loss type. It's worth + mentioning that loss type will be detected by the shape of ``cls_score`` + and ``label``. + 1) Hard label: This label is an integer array and all of the elements are + in the range [0, num_classes - 1]. This label's shape should be + ``cls_score``'s shape with the `num_classes` dimension removed. + 2) Soft label(probablity distribution over classes): This label is a + probability distribution and all of the elements are in the range + [0, 1]. This label's shape must be the same as ``cls_score``. For now, + only 2-dim soft label is supported. + + Args: + loss_weight (float): Factor scalar multiplied on the loss. + Default: 1.0. + class_weight (list[float] | None): Loss weight for each class. If set + as None, use the same weight 1 for all classes. Only applies + to CrossEntropyLoss and BCELossWithLogits (should not be set when + using other losses). Default: None. + """ + def __init__(self, loss_weight=1.0, class_weight=None): + super().__init__(loss_weight=loss_weight) + self.class_weight = None + if class_weight is not None: + self.class_weight = torch.Tensor(class_weight) + + def _forward(self, cls_score, label, **kwargs): + """Forward function. + + Args: + cls_score (torch.Tensor): The class score. + label (torch.Tensor): The ground truth label. + kwargs: Any keyword argument to be used to calculate + CrossEntropy loss. + + Returns: + torch.Tensor: The returned CrossEntropy loss. + """ + if cls_score.size() == label.size(): + # calculate loss for soft label + + assert cls_score.dim() == 2, 'Only support 2-dim soft label' + assert len(kwargs) == 0, \ + ('For now, no extra args are supported for soft label, ' + f'but get {kwargs}') + + lsm = F.log_softmax(cls_score, 1) + if self.class_weight is not None: + lsm = lsm * self.class_weight.unsqueeze(0) + loss_cls = -(label * lsm).sum(1) + + # default reduction 'mean' + if self.class_weight is not None: + # Use weighted average as pytorch CrossEntropyLoss does. + # For more information, please visit https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html # noqa + loss_cls = loss_cls.sum() / torch.sum( + self.class_weight.unsqueeze(0) * label) + else: + loss_cls = loss_cls.mean() + else: + # calculate loss for hard label + + if self.class_weight is not None: + assert 'weight' not in kwargs, \ + "The key 'weight' already exists." + kwargs['weight'] = self.class_weight.to(cls_score.device) + # cls_score = cls_score.type(torch.float32) + # label = label.type(torch.int32) + loss_cls = F.cross_entropy(cls_score, label, **kwargs) + + return loss_cls + + +@LOSSES.register_module() +class BCELossWithLogits(BaseWeightedLoss): + """Binary Cross Entropy Loss with logits. + + Args: + loss_weight (float): Factor scalar multiplied on the loss. + Default: 1.0. + class_weight (list[float] | None): Loss weight for each class. If set + as None, use the same weight 1 for all classes. Only applies + to CrossEntropyLoss and BCELossWithLogits (should not be set when + using other losses). Default: None. + """ + def __init__(self, loss_weight=1.0, class_weight=None): + super().__init__(loss_weight=loss_weight) + self.class_weight = None + if class_weight is not None: + self.class_weight = torch.Tensor(class_weight) + + def _forward(self, cls_score, label, **kwargs): + """Forward function. + + Args: + cls_score (torch.Tensor): The class score. + label (torch.Tensor): The ground truth label. + kwargs: Any keyword argument to be used to calculate + bce loss with logits. + + Returns: + torch.Tensor: The returned bce loss with logits. + """ + if self.class_weight is not None: + assert 'weight' not in kwargs, "The key 'weight' already exists." + kwargs['weight'] = self.class_weight.to(cls_score.device) + loss_cls = F.binary_cross_entropy_with_logits(cls_score, label, + **kwargs) + return loss_cls diff --git a/PyTorch/contrib/cv/video/NonLocal/mmaction/models/recognizers/base.py b/PyTorch/contrib/cv/video/NonLocal/mmaction/models/recognizers/base.py index 35603218baa99c3f8caa13bfd76f1e46fbc1e117..b7a0c527ec36b603f98961b7c0d6b511d99f1f6d 100644 --- a/PyTorch/contrib/cv/video/NonLocal/mmaction/models/recognizers/base.py +++ b/PyTorch/contrib/cv/video/NonLocal/mmaction/models/recognizers/base.py @@ -1,339 +1,339 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import warnings -from abc import ABCMeta, abstractmethod -from collections import OrderedDict - -import torch -import torch.distributed as dist -import torch.nn as nn -import torch.nn.functional as F -from mmcv.runner import auto_fp16 - -from .. import builder - - -class BaseRecognizer(nn.Module, metaclass=ABCMeta): - """Base class for recognizers. - - All recognizers should subclass it. - All subclass should overwrite: - - - Methods:``forward_train``, supporting to forward when training. - - Methods:``forward_test``, supporting to forward when testing. - - Args: - backbone (dict): Backbone modules to extract feature. - cls_head (dict | None): Classification head to process feature. - Default: None. - neck (dict | None): Neck for feature fusion. Default: None. - train_cfg (dict | None): Config for training. Default: None. - test_cfg (dict | None): Config for testing. Default: None. - """ - def __init__(self, - backbone, - cls_head=None, - neck=None, - train_cfg=None, - test_cfg=None): - super().__init__() - # record the source of the backbone - self.backbone_from = 'mmaction2' - - if backbone['type'].startswith('mmcls.'): - try: - import mmcls.models.builder as mmcls_builder - except (ImportError, ModuleNotFoundError): - raise ImportError('Please install mmcls to use this backbone.') - backbone['type'] = backbone['type'][6:] - self.backbone = mmcls_builder.build_backbone(backbone) - self.backbone_from = 'mmcls' - elif backbone['type'].startswith('torchvision.'): - try: - import torchvision.models - except (ImportError, ModuleNotFoundError): - raise ImportError('Please install torchvision to use this ' - 'backbone.') - backbone_type = backbone.pop('type')[12:] - self.backbone = torchvision.models.__dict__[backbone_type]( - **backbone) - # disable the classifier - self.backbone.classifier = nn.Identity() - self.backbone.fc = nn.Identity() - self.backbone_from = 'torchvision' - elif backbone['type'].startswith('timm.'): - try: - import timm - except (ImportError, ModuleNotFoundError): - raise ImportError('Please install timm to use this ' - 'backbone.') - backbone_type = backbone.pop('type')[5:] - # disable the classifier - backbone['num_classes'] = 0 - self.backbone = timm.create_model(backbone_type, **backbone) - self.backbone_from = 'timm' - else: - self.backbone = builder.build_backbone(backbone) - - if neck is not None: - self.neck = builder.build_neck(neck) - - self.cls_head = builder.build_head(cls_head) if cls_head else None - - self.train_cfg = train_cfg - self.test_cfg = test_cfg - - # aux_info is the list of tensor names beyond 'imgs' and 'label' which - # will be used in train_step and val_step, data_batch should contain - # these tensors - self.aux_info = [] - if train_cfg is not None and 'aux_info' in train_cfg: - self.aux_info = train_cfg['aux_info'] - # max_testing_views should be int - self.max_testing_views = None - if test_cfg is not None and 'max_testing_views' in test_cfg: - self.max_testing_views = test_cfg['max_testing_views'] - assert isinstance(self.max_testing_views, int) - - if test_cfg is not None and 'feature_extraction' in test_cfg: - self.feature_extraction = test_cfg['feature_extraction'] - else: - self.feature_extraction = False - - # mini-batch blending, e.g. mixup, cutmix, etc. - self.blending = None - if train_cfg is not None and 'blending' in train_cfg: - from mmcv.utils import build_from_cfg - from mmaction.datasets.builder import BLENDINGS - self.blending = build_from_cfg(train_cfg['blending'], BLENDINGS) - - self.init_weights() - - self.fp16_enabled = False - - @property - def with_neck(self): - """bool: whether the recognizer has a neck""" - return hasattr(self, 'neck') and self.neck is not None - - @property - def with_cls_head(self): - """bool: whether the recognizer has a cls_head""" - return hasattr(self, 'cls_head') and self.cls_head is not None - - def init_weights(self): - """Initialize the model network weights.""" - if self.backbone_from in ['mmcls', 'mmaction2']: - self.backbone.init_weights() - elif self.backbone_from in ['torchvision', 'timm']: - warnings.warn('We do not initialize weights for backbones in ' - f'{self.backbone_from}, since the weights for ' - f'backbones in {self.backbone_from} are initialized' - 'in their __init__ functions.') - else: - raise NotImplementedError('Unsupported backbone source ' - f'{self.backbone_from}!') - - if self.with_cls_head: - self.cls_head.init_weights() - if self.with_neck: - self.neck.init_weights() - - @auto_fp16() - def extract_feat(self, imgs): - """Extract features through a backbone. - - Args: - imgs (torch.Tensor): The input images. - - Returns: - torch.tensor: The extracted features. - """ - if (hasattr(self.backbone, 'features') - and self.backbone_from == 'torchvision'): - x = self.backbone.features(imgs) - elif self.backbone_from == 'timm': - x = self.backbone.forward_features(imgs) - else: - x = self.backbone(imgs) - return x - - def average_clip(self, cls_score, num_segs=1): - """Averaging class score over multiple clips. - - Using different averaging types ('score' or 'prob' or None, - which defined in test_cfg) to computed the final averaged - class score. Only called in test mode. - - Args: - cls_score (torch.Tensor): Class score to be averaged. - num_segs (int): Number of clips for each input sample. - - Returns: - torch.Tensor: Averaged class score. - """ - if 'average_clips' not in self.test_cfg.keys(): - raise KeyError('"average_clips" must defined in test_cfg\'s keys') - - average_clips = self.test_cfg['average_clips'] - if average_clips not in ['score', 'prob', None]: - raise ValueError(f'{average_clips} is not supported. ' - f'Currently supported ones are ' - f'["score", "prob", None]') - - if average_clips is None: - return cls_score - - batch_size = cls_score.shape[0] - cls_score = cls_score.view(batch_size // num_segs, num_segs, -1) - - if average_clips == 'prob': - cls_score = F.softmax(cls_score, dim=2).mean(dim=1) - elif average_clips == 'score': - cls_score = cls_score.mean(dim=1) - - return cls_score - - @abstractmethod - def forward_train(self, imgs, labels, **kwargs): - """Defines the computation performed at every call when training.""" - - @abstractmethod - def forward_test(self, imgs): - """Defines the computation performed at every call when evaluation and - testing.""" - - @abstractmethod - def forward_gradcam(self, imgs): - """Defines the computation performed at every all when using gradcam - utils.""" - - @staticmethod - def _parse_losses(losses): - """Parse the raw outputs (losses) of the network. - - Args: - losses (dict): Raw output of the network, which usually contain - losses and other necessary information. - - Returns: - tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor - which may be a weighted sum of all losses, log_vars contains - all the variables to be sent to the logger. - """ - log_vars = OrderedDict() - for loss_name, loss_value in losses.items(): - if isinstance(loss_value, torch.Tensor): - log_vars[loss_name] = loss_value.mean() - elif isinstance(loss_value, list): - log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) - else: - raise TypeError( - f'{loss_name} is not a tensor or list of tensors') - - loss = sum(_value for _key, _value in log_vars.items() - if 'loss' in _key) - - log_vars['loss'] = loss - for loss_name, loss_value in log_vars.items(): - # reduce loss when distributed training - if dist.is_available() and dist.is_initialized(): - loss_value = loss_value.data.clone() - dist.all_reduce(loss_value.div_(dist.get_world_size())) - log_vars[loss_name] = loss_value.item() - - return loss, log_vars - - def forward(self, imgs, label=None, return_loss=True, **kwargs): - """Define the computation performed at every call.""" - if kwargs.get('gradcam', False): - del kwargs['gradcam'] - return self.forward_gradcam(imgs, **kwargs) - if return_loss: - if label is None: - raise ValueError('Label should not be None.') - if self.blending is not None: - imgs, label = self.blending(imgs, label) - return self.forward_train(imgs, label, **kwargs) - - return self.forward_test(imgs, **kwargs) - - def train_step(self, data_batch, optimizer, **kwargs): - """The iteration step during training. - - This method defines an iteration step during training, except for the - back propagation and optimizer updating, which are done in an optimizer - hook. Note that in some complicated cases or models, the whole process - including back propagation and optimizer updating is also defined in - this method, such as GAN. - - Args: - data_batch (dict): The output of dataloader. - optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of - runner is passed to ``train_step()``. This argument is unused - and reserved. - - Returns: - dict: It should contain at least 3 keys: ``loss``, ``log_vars``, - ``num_samples``. - ``loss`` is a tensor for back propagation, which can be a - weighted sum of multiple losses. - ``log_vars`` contains all the variables to be sent to the - logger. - ``num_samples`` indicates the batch size (when the model is - DDP, it means the batch size on each GPU), which is used for - averaging the logs. - """ - imgs = data_batch['imgs'].npu().type(torch.float32) - label = data_batch['label'].npu().type(torch.int32) - - aux_info = {} - for item in self.aux_info: - assert item in data_batch - aux_info[item] = data_batch[item] - - losses = self(imgs, label, return_loss=True, **aux_info) - - loss, log_vars = self._parse_losses(losses) - - outputs = dict(loss=loss, - log_vars=log_vars, - num_samples=len(next(iter(data_batch.values())))) - - return outputs - - def val_step(self, data_batch, optimizer, **kwargs): - """The iteration step during validation. - - This method shares the same signature as :func:`train_step`, but used - during val epochs. Note that the evaluation after training epochs is - not implemented with this method, but an evaluation hook. - """ - imgs = data_batch['imgs'].npu().type(torch.float32) - label = data_batch['label'].npu().type(torch.int32) - - aux_info = {} - for item in self.aux_info: - aux_info[item] = data_batch[item] - - losses = self(imgs, label, return_loss=True, **aux_info) - - loss, log_vars = self._parse_losses(losses) - - outputs = dict(loss=loss, - log_vars=log_vars, - num_samples=len(next(iter(data_batch.values())))) - - return outputs +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import warnings +from abc import ABCMeta, abstractmethod +from collections import OrderedDict + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +from mmcv.runner import auto_fp16 + +from .. import builder + + +class BaseRecognizer(nn.Module, metaclass=ABCMeta): + """Base class for recognizers. + + All recognizers should subclass it. + All subclass should overwrite: + + - Methods:``forward_train``, supporting to forward when training. + - Methods:``forward_test``, supporting to forward when testing. + + Args: + backbone (dict): Backbone modules to extract feature. + cls_head (dict | None): Classification head to process feature. + Default: None. + neck (dict | None): Neck for feature fusion. Default: None. + train_cfg (dict | None): Config for training. Default: None. + test_cfg (dict | None): Config for testing. Default: None. + """ + def __init__(self, + backbone, + cls_head=None, + neck=None, + train_cfg=None, + test_cfg=None): + super().__init__() + # record the source of the backbone + self.backbone_from = 'mmaction2' + + if backbone['type'].startswith('mmcls.'): + try: + import mmcls.models.builder as mmcls_builder + except (ImportError, ModuleNotFoundError): + raise ImportError('Please install mmcls to use this backbone.') + backbone['type'] = backbone['type'][6:] + self.backbone = mmcls_builder.build_backbone(backbone) + self.backbone_from = 'mmcls' + elif backbone['type'].startswith('torchvision.'): + try: + import torchvision.models + except (ImportError, ModuleNotFoundError): + raise ImportError('Please install torchvision to use this ' + 'backbone.') + backbone_type = backbone.pop('type')[12:] + self.backbone = torchvision.models.__dict__[backbone_type]( + **backbone) + # disable the classifier + self.backbone.classifier = nn.Identity() + self.backbone.fc = nn.Identity() + self.backbone_from = 'torchvision' + elif backbone['type'].startswith('timm.'): + try: + import timm + except (ImportError, ModuleNotFoundError): + raise ImportError('Please install timm to use this ' + 'backbone.') + backbone_type = backbone.pop('type')[5:] + # disable the classifier + backbone['num_classes'] = 0 + self.backbone = timm.create_model(backbone_type, **backbone) + self.backbone_from = 'timm' + else: + self.backbone = builder.build_backbone(backbone) + + if neck is not None: + self.neck = builder.build_neck(neck) + + self.cls_head = builder.build_head(cls_head) if cls_head else None + + self.train_cfg = train_cfg + self.test_cfg = test_cfg + + # aux_info is the list of tensor names beyond 'imgs' and 'label' which + # will be used in train_step and val_step, data_batch should contain + # these tensors + self.aux_info = [] + if train_cfg is not None and 'aux_info' in train_cfg: + self.aux_info = train_cfg['aux_info'] + # max_testing_views should be int + self.max_testing_views = None + if test_cfg is not None and 'max_testing_views' in test_cfg: + self.max_testing_views = test_cfg['max_testing_views'] + assert isinstance(self.max_testing_views, int) + + if test_cfg is not None and 'feature_extraction' in test_cfg: + self.feature_extraction = test_cfg['feature_extraction'] + else: + self.feature_extraction = False + + # mini-batch blending, e.g. mixup, cutmix, etc. + self.blending = None + if train_cfg is not None and 'blending' in train_cfg: + from mmcv.utils import build_from_cfg + from mmaction.datasets.builder import BLENDINGS + self.blending = build_from_cfg(train_cfg['blending'], BLENDINGS) + + self.init_weights() + + self.fp16_enabled = False + + @property + def with_neck(self): + """bool: whether the recognizer has a neck""" + return hasattr(self, 'neck') and self.neck is not None + + @property + def with_cls_head(self): + """bool: whether the recognizer has a cls_head""" + return hasattr(self, 'cls_head') and self.cls_head is not None + + def init_weights(self): + """Initialize the model network weights.""" + if self.backbone_from in ['mmcls', 'mmaction2']: + self.backbone.init_weights() + elif self.backbone_from in ['torchvision', 'timm']: + warnings.warn('We do not initialize weights for backbones in ' + f'{self.backbone_from}, since the weights for ' + f'backbones in {self.backbone_from} are initialized' + 'in their __init__ functions.') + else: + raise NotImplementedError('Unsupported backbone source ' + f'{self.backbone_from}!') + + if self.with_cls_head: + self.cls_head.init_weights() + if self.with_neck: + self.neck.init_weights() + + @auto_fp16() + def extract_feat(self, imgs): + """Extract features through a backbone. + + Args: + imgs (torch.Tensor): The input images. + + Returns: + torch.tensor: The extracted features. + """ + if (hasattr(self.backbone, 'features') + and self.backbone_from == 'torchvision'): + x = self.backbone.features(imgs) + elif self.backbone_from == 'timm': + x = self.backbone.forward_features(imgs) + else: + x = self.backbone(imgs) + return x + + def average_clip(self, cls_score, num_segs=1): + """Averaging class score over multiple clips. + + Using different averaging types ('score' or 'prob' or None, + which defined in test_cfg) to computed the final averaged + class score. Only called in test mode. + + Args: + cls_score (torch.Tensor): Class score to be averaged. + num_segs (int): Number of clips for each input sample. + + Returns: + torch.Tensor: Averaged class score. + """ + if 'average_clips' not in self.test_cfg.keys(): + raise KeyError('"average_clips" must defined in test_cfg\'s keys') + + average_clips = self.test_cfg['average_clips'] + if average_clips not in ['score', 'prob', None]: + raise ValueError(f'{average_clips} is not supported. ' + f'Currently supported ones are ' + f'["score", "prob", None]') + + if average_clips is None: + return cls_score + + batch_size = cls_score.shape[0] + cls_score = cls_score.view(batch_size // num_segs, num_segs, -1) + + if average_clips == 'prob': + cls_score = F.softmax(cls_score, dim=2).mean(dim=1) + elif average_clips == 'score': + cls_score = cls_score.mean(dim=1) + + return cls_score + + @abstractmethod + def forward_train(self, imgs, labels, **kwargs): + """Defines the computation performed at every call when training.""" + + @abstractmethod + def forward_test(self, imgs): + """Defines the computation performed at every call when evaluation and + testing.""" + + @abstractmethod + def forward_gradcam(self, imgs): + """Defines the computation performed at every all when using gradcam + utils.""" + + @staticmethod + def _parse_losses(losses): + """Parse the raw outputs (losses) of the network. + + Args: + losses (dict): Raw output of the network, which usually contain + losses and other necessary information. + + Returns: + tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor + which may be a weighted sum of all losses, log_vars contains + all the variables to be sent to the logger. + """ + log_vars = OrderedDict() + for loss_name, loss_value in losses.items(): + if isinstance(loss_value, torch.Tensor): + log_vars[loss_name] = loss_value.mean() + elif isinstance(loss_value, list): + log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) + else: + raise TypeError( + f'{loss_name} is not a tensor or list of tensors') + + loss = sum(_value for _key, _value in log_vars.items() + if 'loss' in _key) + + log_vars['loss'] = loss + for loss_name, loss_value in log_vars.items(): + # reduce loss when distributed training + if dist.is_available() and dist.is_initialized(): + loss_value = loss_value.data.clone() + dist.all_reduce(loss_value.div_(dist.get_world_size())) + log_vars[loss_name] = loss_value.item() + + return loss, log_vars + + def forward(self, imgs, label=None, return_loss=True, **kwargs): + """Define the computation performed at every call.""" + if kwargs.get('gradcam', False): + del kwargs['gradcam'] + return self.forward_gradcam(imgs, **kwargs) + if return_loss: + if label is None: + raise ValueError('Label should not be None.') + if self.blending is not None: + imgs, label = self.blending(imgs, label) + return self.forward_train(imgs, label, **kwargs) + + return self.forward_test(imgs, **kwargs) + + def train_step(self, data_batch, optimizer, **kwargs): + """The iteration step during training. + + This method defines an iteration step during training, except for the + back propagation and optimizer updating, which are done in an optimizer + hook. Note that in some complicated cases or models, the whole process + including back propagation and optimizer updating is also defined in + this method, such as GAN. + + Args: + data_batch (dict): The output of dataloader. + optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of + runner is passed to ``train_step()``. This argument is unused + and reserved. + + Returns: + dict: It should contain at least 3 keys: ``loss``, ``log_vars``, + ``num_samples``. + ``loss`` is a tensor for back propagation, which can be a + weighted sum of multiple losses. + ``log_vars`` contains all the variables to be sent to the + logger. + ``num_samples`` indicates the batch size (when the model is + DDP, it means the batch size on each GPU), which is used for + averaging the logs. + """ + imgs = data_batch['imgs'].npu().type(torch.float32) + label = data_batch['label'].npu().type(torch.int32) + + aux_info = {} + for item in self.aux_info: + assert item in data_batch + aux_info[item] = data_batch[item] + + losses = self(imgs, label, return_loss=True, **aux_info) + + loss, log_vars = self._parse_losses(losses) + + outputs = dict(loss=loss, + log_vars=log_vars, + num_samples=len(next(iter(data_batch.values())))) + + return outputs + + def val_step(self, data_batch, optimizer, **kwargs): + """The iteration step during validation. + + This method shares the same signature as :func:`train_step`, but used + during val epochs. Note that the evaluation after training epochs is + not implemented with this method, but an evaluation hook. + """ + imgs = data_batch['imgs'].npu().type(torch.float32) + label = data_batch['label'].npu().type(torch.int32) + + aux_info = {} + for item in self.aux_info: + aux_info[item] = data_batch[item] + + losses = self(imgs, label, return_loss=True, **aux_info) + + loss, log_vars = self._parse_losses(losses) + + outputs = dict(loss=loss, + log_vars=log_vars, + num_samples=len(next(iter(data_batch.values())))) + + return outputs diff --git a/PyTorch/contrib/cv/video/NonLocal/mmaction/models/recognizers/recognizer2d.py b/PyTorch/contrib/cv/video/NonLocal/mmaction/models/recognizers/recognizer2d.py index 7d145f6dd83cab07d056cb40ce712c0ab9485c5c..3d9a755a5a53ebe1432ac413ee6ab85566d07b06 100644 --- a/PyTorch/contrib/cv/video/NonLocal/mmaction/models/recognizers/recognizer2d.py +++ b/PyTorch/contrib/cv/video/NonLocal/mmaction/models/recognizers/recognizer2d.py @@ -1,195 +1,195 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch -from torch import nn - -from ..builder import RECOGNIZERS -from .base import BaseRecognizer - - -@RECOGNIZERS.register_module() -class Recognizer2D(BaseRecognizer): - """2D recognizer model framework.""" - def forward_train(self, imgs, labels, **kwargs): - """Defines the computation performed at every call when training.""" - - assert self.with_cls_head - batches = imgs.shape[0] - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - num_segs = imgs.shape[0] // batches - - losses = dict() - - x = self.extract_feat(imgs) - - if self.backbone_from in ['torchvision', 'timm']: - if len(x.shape) == 4 and (x.shape[2] > 1 or x.shape[3] > 1): - # apply adaptive avg pooling - x = nn.AdaptiveAvgPool2d(1)(x) - x = x.reshape((x.shape[0], -1)) - x = x.reshape(x.shape + (1, 1)) - - if self.with_neck: - x = [ - each.reshape((-1, num_segs) + each.shape[1:]).transpose( - 1, 2).contiguous() for each in x - ] - x, loss_aux = self.neck(x, labels.squeeze()) - x = x.squeeze(2) - num_segs = 1 - losses.update(loss_aux) - - cls_score = self.cls_head(x, num_segs) - gt_labels = labels.squeeze() - loss_cls = self.cls_head.loss(cls_score, gt_labels, **kwargs) - losses.update(loss_cls) - - return losses - - def _do_test(self, imgs): - """Defines the computation performed at every call when evaluation, - testing and gradcam.""" - batches = imgs.shape[0] - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - num_segs = imgs.shape[0] // batches - - x = self.extract_feat(imgs) - - if self.backbone_from in ['torchvision', 'timm']: - if len(x.shape) == 4 and (x.shape[2] > 1 or x.shape[3] > 1): - # apply adaptive avg pooling - x = nn.AdaptiveAvgPool2d(1)(x) - x = x.reshape((x.shape[0], -1)) - x = x.reshape(x.shape + (1, 1)) - - if self.with_neck: - x = [ - each.reshape((-1, num_segs) + each.shape[1:]).transpose( - 1, 2).contiguous() for each in x - ] - x, _ = self.neck(x) - x = x.squeeze(2) - num_segs = 1 - - if self.feature_extraction: - # perform spatial pooling - avg_pool = nn.AdaptiveAvgPool2d(1) - x = avg_pool(x) - # squeeze dimensions - x = x.reshape((batches, num_segs, -1)) - # temporal average pooling - x = x.mean(axis=1) - return x - - # When using `TSNHead` or `TPNHead`, shape is [batch_size, num_classes] - # When using `TSMHead`, shape is [batch_size * num_crops, num_classes] - # `num_crops` is calculated by: - # 1) `twice_sample` in `SampleFrames` - # 2) `num_sample_positions` in `DenseSampleFrames` - # 3) `ThreeCrop/TenCrop/MultiGroupCrop` in `test_pipeline` - # 4) `num_clips` in `SampleFrames` or its subclass if `clip_len != 1` - - # should have cls_head if not extracting features - cls_score = self.cls_head(x, num_segs) - - assert cls_score.size()[0] % batches == 0 - # calculate num_crops automatically - cls_score = self.average_clip(cls_score, - cls_score.size()[0] // batches) - return cls_score - - def _do_fcn_test(self, imgs): - # [N, num_crops * num_segs, C, H, W] -> - # [N * num_crops * num_segs, C, H, W] - batches = imgs.shape[0] - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - num_segs = self.test_cfg.get('num_segs', self.backbone.num_segments) - - if self.test_cfg.get('flip', False): - imgs = torch.flip(imgs, [-1]) - x = self.extract_feat(imgs) - - if self.with_neck: - x = [ - each.reshape((-1, num_segs) + each.shape[1:]).transpose( - 1, 2).contiguous() for each in x - ] - x, _ = self.neck(x) - else: - x = x.reshape((-1, num_segs) + x.shape[1:]).transpose( - 1, 2).contiguous() - - # When using `TSNHead` or `TPNHead`, shape is [batch_size, num_classes] - # When using `TSMHead`, shape is [batch_size * num_crops, num_classes] - # `num_crops` is calculated by: - # 1) `twice_sample` in `SampleFrames` - # 2) `num_sample_positions` in `DenseSampleFrames` - # 3) `ThreeCrop/TenCrop/MultiGroupCrop` in `test_pipeline` - # 4) `num_clips` in `SampleFrames` or its subclass if `clip_len != 1` - cls_score = self.cls_head(x, fcn_test=True) - - assert cls_score.size()[0] % batches == 0 - # calculate num_crops automatically - cls_score = self.average_clip(cls_score, - cls_score.size()[0] // batches) - return cls_score - - def forward_test(self, imgs): - """Defines the computation performed at every call when evaluation and - testing.""" - imgs = imgs.npu().type(torch.float16) - if self.test_cfg.get('fcn_test', False): - # If specified, spatially fully-convolutional testing is performed - assert not self.feature_extraction - assert self.with_cls_head - return self._do_fcn_test(imgs).cpu().numpy() - return self._do_test(imgs).cpu().numpy() - - def forward_dummy(self, imgs, softmax=False): - """Used for computing network FLOPs. - - See ``tools/analysis/get_flops.py``. - - Args: - imgs (torch.Tensor): Input images. - - Returns: - Tensor: Class score. - """ - assert self.with_cls_head - batches = imgs.shape[0] - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - num_segs = imgs.shape[0] // batches - - x = self.extract_feat(imgs) - if self.with_neck: - x = [ - each.reshape((-1, num_segs) + each.shape[1:]).transpose( - 1, 2).contiguous() for each in x - ] - x, _ = self.neck(x) - x = x.squeeze(2) - num_segs = 1 - - outs = self.cls_head(x, num_segs) - if softmax: - outs = nn.functional.softmax(outs) - return (outs, ) - - def forward_gradcam(self, imgs): - """Defines the computation performed at every call when using gradcam - utils.""" - assert self.with_cls_head - return self._do_test(imgs) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch +from torch import nn + +from ..builder import RECOGNIZERS +from .base import BaseRecognizer + + +@RECOGNIZERS.register_module() +class Recognizer2D(BaseRecognizer): + """2D recognizer model framework.""" + def forward_train(self, imgs, labels, **kwargs): + """Defines the computation performed at every call when training.""" + + assert self.with_cls_head + batches = imgs.shape[0] + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + num_segs = imgs.shape[0] // batches + + losses = dict() + + x = self.extract_feat(imgs) + + if self.backbone_from in ['torchvision', 'timm']: + if len(x.shape) == 4 and (x.shape[2] > 1 or x.shape[3] > 1): + # apply adaptive avg pooling + x = nn.AdaptiveAvgPool2d(1)(x) + x = x.reshape((x.shape[0], -1)) + x = x.reshape(x.shape + (1, 1)) + + if self.with_neck: + x = [ + each.reshape((-1, num_segs) + each.shape[1:]).transpose( + 1, 2).contiguous() for each in x + ] + x, loss_aux = self.neck(x, labels.squeeze()) + x = x.squeeze(2) + num_segs = 1 + losses.update(loss_aux) + + cls_score = self.cls_head(x, num_segs) + gt_labels = labels.squeeze() + loss_cls = self.cls_head.loss(cls_score, gt_labels, **kwargs) + losses.update(loss_cls) + + return losses + + def _do_test(self, imgs): + """Defines the computation performed at every call when evaluation, + testing and gradcam.""" + batches = imgs.shape[0] + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + num_segs = imgs.shape[0] // batches + + x = self.extract_feat(imgs) + + if self.backbone_from in ['torchvision', 'timm']: + if len(x.shape) == 4 and (x.shape[2] > 1 or x.shape[3] > 1): + # apply adaptive avg pooling + x = nn.AdaptiveAvgPool2d(1)(x) + x = x.reshape((x.shape[0], -1)) + x = x.reshape(x.shape + (1, 1)) + + if self.with_neck: + x = [ + each.reshape((-1, num_segs) + each.shape[1:]).transpose( + 1, 2).contiguous() for each in x + ] + x, _ = self.neck(x) + x = x.squeeze(2) + num_segs = 1 + + if self.feature_extraction: + # perform spatial pooling + avg_pool = nn.AdaptiveAvgPool2d(1) + x = avg_pool(x) + # squeeze dimensions + x = x.reshape((batches, num_segs, -1)) + # temporal average pooling + x = x.mean(axis=1) + return x + + # When using `TSNHead` or `TPNHead`, shape is [batch_size, num_classes] + # When using `TSMHead`, shape is [batch_size * num_crops, num_classes] + # `num_crops` is calculated by: + # 1) `twice_sample` in `SampleFrames` + # 2) `num_sample_positions` in `DenseSampleFrames` + # 3) `ThreeCrop/TenCrop/MultiGroupCrop` in `test_pipeline` + # 4) `num_clips` in `SampleFrames` or its subclass if `clip_len != 1` + + # should have cls_head if not extracting features + cls_score = self.cls_head(x, num_segs) + + assert cls_score.size()[0] % batches == 0 + # calculate num_crops automatically + cls_score = self.average_clip(cls_score, + cls_score.size()[0] // batches) + return cls_score + + def _do_fcn_test(self, imgs): + # [N, num_crops * num_segs, C, H, W] -> + # [N * num_crops * num_segs, C, H, W] + batches = imgs.shape[0] + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + num_segs = self.test_cfg.get('num_segs', self.backbone.num_segments) + + if self.test_cfg.get('flip', False): + imgs = torch.flip(imgs, [-1]) + x = self.extract_feat(imgs) + + if self.with_neck: + x = [ + each.reshape((-1, num_segs) + each.shape[1:]).transpose( + 1, 2).contiguous() for each in x + ] + x, _ = self.neck(x) + else: + x = x.reshape((-1, num_segs) + x.shape[1:]).transpose( + 1, 2).contiguous() + + # When using `TSNHead` or `TPNHead`, shape is [batch_size, num_classes] + # When using `TSMHead`, shape is [batch_size * num_crops, num_classes] + # `num_crops` is calculated by: + # 1) `twice_sample` in `SampleFrames` + # 2) `num_sample_positions` in `DenseSampleFrames` + # 3) `ThreeCrop/TenCrop/MultiGroupCrop` in `test_pipeline` + # 4) `num_clips` in `SampleFrames` or its subclass if `clip_len != 1` + cls_score = self.cls_head(x, fcn_test=True) + + assert cls_score.size()[0] % batches == 0 + # calculate num_crops automatically + cls_score = self.average_clip(cls_score, + cls_score.size()[0] // batches) + return cls_score + + def forward_test(self, imgs): + """Defines the computation performed at every call when evaluation and + testing.""" + imgs = imgs.npu().type(torch.float16) + if self.test_cfg.get('fcn_test', False): + # If specified, spatially fully-convolutional testing is performed + assert not self.feature_extraction + assert self.with_cls_head + return self._do_fcn_test(imgs).cpu().numpy() + return self._do_test(imgs).cpu().numpy() + + def forward_dummy(self, imgs, softmax=False): + """Used for computing network FLOPs. + + See ``tools/analysis/get_flops.py``. + + Args: + imgs (torch.Tensor): Input images. + + Returns: + Tensor: Class score. + """ + assert self.with_cls_head + batches = imgs.shape[0] + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + num_segs = imgs.shape[0] // batches + + x = self.extract_feat(imgs) + if self.with_neck: + x = [ + each.reshape((-1, num_segs) + each.shape[1:]).transpose( + 1, 2).contiguous() for each in x + ] + x, _ = self.neck(x) + x = x.squeeze(2) + num_segs = 1 + + outs = self.cls_head(x, num_segs) + if softmax: + outs = nn.functional.softmax(outs) + return (outs, ) + + def forward_gradcam(self, imgs): + """Defines the computation performed at every call when using gradcam + utils.""" + assert self.with_cls_head + return self._do_test(imgs) diff --git a/PyTorch/contrib/cv/video/NonLocal/mmaction/models/recognizers/recognizer3d.py b/PyTorch/contrib/cv/video/NonLocal/mmaction/models/recognizers/recognizer3d.py index fe1a61ef2bad97c0c35ee9c97b7fd66373381585..9a1b70449dfdaf5b598586a98d91a1fbcdd057dc 100644 --- a/PyTorch/contrib/cv/video/NonLocal/mmaction/models/recognizers/recognizer3d.py +++ b/PyTorch/contrib/cv/video/NonLocal/mmaction/models/recognizers/recognizer3d.py @@ -1,133 +1,133 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch -from torch import nn - -from ..builder import RECOGNIZERS -from .base import BaseRecognizer - - -@RECOGNIZERS.register_module() -class Recognizer3D(BaseRecognizer): - """3D recognizer model framework.""" - def forward_train(self, imgs, labels, **kwargs): - """Defines the computation performed at every call when training.""" - - assert self.with_cls_head - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - losses = dict() - - x = self.extract_feat(imgs) - if self.with_neck: - x, loss_aux = self.neck(x, labels.squeeze()) - losses.update(loss_aux) - - cls_score = self.cls_head(x) - gt_labels = labels.squeeze() - loss_cls = self.cls_head.loss(cls_score, gt_labels, **kwargs) - losses.update(loss_cls) - - return losses - - def _do_test(self, imgs): - """Defines the computation performed at every call when evaluation, - testing and gradcam.""" - batches = imgs.shape[0] - num_segs = imgs.shape[1] - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - - if self.max_testing_views is not None: - total_views = imgs.shape[0] - assert num_segs == total_views, ( - 'max_testing_views is only compatible ' - 'with batch_size == 1') - view_ptr = 0 - feats = [] - while view_ptr < total_views: - batch_imgs = imgs[view_ptr:view_ptr + self.max_testing_views] - x = self.extract_feat(batch_imgs) - if self.with_neck: - x, _ = self.neck(x) - feats.append(x) - view_ptr += self.max_testing_views - # should consider the case that feat is a tuple - if isinstance(feats[0], tuple): - len_tuple = len(feats[0]) - feat = [ - torch.cat([x[i] for x in feats]) for i in range(len_tuple) - ] - feat = tuple(feat) - else: - feat = torch.cat(feats) - else: - feat = self.extract_feat(imgs) - if self.with_neck: - feat, _ = self.neck(feat) - - if self.feature_extraction: - # perform spatio-temporal pooling - avg_pool = nn.AdaptiveAvgPool3d(1) - if isinstance(feat, tuple): - feat = [avg_pool(x) for x in feat] - # concat them - feat = torch.cat(feat, axis=1) - else: - feat = avg_pool(feat) - # squeeze dimensions - feat = feat.reshape((batches, num_segs, -1)) - # temporal average pooling - feat = feat.mean(axis=1) - return feat - - # should have cls_head if not extracting features - assert self.with_cls_head - cls_score = self.cls_head(feat) - cls_score = self.average_clip(cls_score, num_segs) - return cls_score - - def forward_test(self, imgs): - """Defines the computation performed at every call when evaluation and - testing.""" - imgs = imgs.npu().type(torch.float16) - return self._do_test(imgs).cpu().numpy() - - def forward_dummy(self, imgs, softmax=False): - """Used for computing network FLOPs. - - See ``tools/analysis/get_flops.py``. - - Args: - imgs (torch.Tensor): Input images. - - Returns: - Tensor: Class score. - """ - assert self.with_cls_head - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - x = self.extract_feat(imgs) - - if self.with_neck: - x, _ = self.neck(x) - - outs = self.cls_head(x) - if softmax: - outs = nn.functional.softmax(outs) - return (outs, ) - - def forward_gradcam(self, imgs): - """Defines the computation performed at every call when using gradcam - utils.""" - assert self.with_cls_head - return self._do_test(imgs) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch +from torch import nn + +from ..builder import RECOGNIZERS +from .base import BaseRecognizer + + +@RECOGNIZERS.register_module() +class Recognizer3D(BaseRecognizer): + """3D recognizer model framework.""" + def forward_train(self, imgs, labels, **kwargs): + """Defines the computation performed at every call when training.""" + + assert self.with_cls_head + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + losses = dict() + + x = self.extract_feat(imgs) + if self.with_neck: + x, loss_aux = self.neck(x, labels.squeeze()) + losses.update(loss_aux) + + cls_score = self.cls_head(x) + gt_labels = labels.squeeze() + loss_cls = self.cls_head.loss(cls_score, gt_labels, **kwargs) + losses.update(loss_cls) + + return losses + + def _do_test(self, imgs): + """Defines the computation performed at every call when evaluation, + testing and gradcam.""" + batches = imgs.shape[0] + num_segs = imgs.shape[1] + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + + if self.max_testing_views is not None: + total_views = imgs.shape[0] + assert num_segs == total_views, ( + 'max_testing_views is only compatible ' + 'with batch_size == 1') + view_ptr = 0 + feats = [] + while view_ptr < total_views: + batch_imgs = imgs[view_ptr:view_ptr + self.max_testing_views] + x = self.extract_feat(batch_imgs) + if self.with_neck: + x, _ = self.neck(x) + feats.append(x) + view_ptr += self.max_testing_views + # should consider the case that feat is a tuple + if isinstance(feats[0], tuple): + len_tuple = len(feats[0]) + feat = [ + torch.cat([x[i] for x in feats]) for i in range(len_tuple) + ] + feat = tuple(feat) + else: + feat = torch.cat(feats) + else: + feat = self.extract_feat(imgs) + if self.with_neck: + feat, _ = self.neck(feat) + + if self.feature_extraction: + # perform spatio-temporal pooling + avg_pool = nn.AdaptiveAvgPool3d(1) + if isinstance(feat, tuple): + feat = [avg_pool(x) for x in feat] + # concat them + feat = torch.cat(feat, axis=1) + else: + feat = avg_pool(feat) + # squeeze dimensions + feat = feat.reshape((batches, num_segs, -1)) + # temporal average pooling + feat = feat.mean(axis=1) + return feat + + # should have cls_head if not extracting features + assert self.with_cls_head + cls_score = self.cls_head(feat) + cls_score = self.average_clip(cls_score, num_segs) + return cls_score + + def forward_test(self, imgs): + """Defines the computation performed at every call when evaluation and + testing.""" + imgs = imgs.npu().type(torch.float16) + return self._do_test(imgs).cpu().numpy() + + def forward_dummy(self, imgs, softmax=False): + """Used for computing network FLOPs. + + See ``tools/analysis/get_flops.py``. + + Args: + imgs (torch.Tensor): Input images. + + Returns: + Tensor: Class score. + """ + assert self.with_cls_head + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + x = self.extract_feat(imgs) + + if self.with_neck: + x, _ = self.neck(x) + + outs = self.cls_head(x) + if softmax: + outs = nn.functional.softmax(outs) + return (outs, ) + + def forward_gradcam(self, imgs): + """Defines the computation performed at every call when using gradcam + utils.""" + assert self.with_cls_head + return self._do_test(imgs) diff --git a/PyTorch/contrib/cv/video/R(2+1)D/additional_need/mmcv/dist_utils.py b/PyTorch/contrib/cv/video/R(2+1)D/additional_need/mmcv/dist_utils.py index 91346643e8f70fb97a407de4b6fda417cd9fa6ee..9e3f2b072b230c45b051270b5730c30822c74a2c 100644 --- a/PyTorch/contrib/cv/video/R(2+1)D/additional_need/mmcv/dist_utils.py +++ b/PyTorch/contrib/cv/video/R(2+1)D/additional_need/mmcv/dist_utils.py @@ -1,185 +1,185 @@ -# Copyright (c) Open-MMLab. All rights reserved. -# -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import functools -import os -import subprocess -from collections import OrderedDict - -import torch -import torch.multiprocessing as mp -from torch import distributed as dist -from torch._utils import (_flatten_dense_tensors, _take_tensors, - _unflatten_dense_tensors) - -from mmcv.utils import TORCH_VERSION - - -def init_dist(launcher, backend='nccl', **kwargs): - if mp.get_start_method(allow_none=True) is None: - mp.set_start_method('spawn') - if launcher == 'pytorch': - _init_dist_pytorch(backend, **kwargs) - elif launcher == 'mpi': - _init_dist_mpi(backend, **kwargs) - elif launcher == 'slurm': - _init_dist_slurm(backend, **kwargs) - else: - raise ValueError(f'Invalid launcher type: {launcher}') - - -def _init_dist_pytorch(backend, **kwargs): - # TODO: use local_rank instead of rank % num_gpus - rank = int(os.environ['RANK']) - offset = 0 if os.getenv('NPUID', None) is None else int(os.environ['NPUID']) - num_gpus = torch.npu.device_count() - torch.npu.set_device((rank + offset) % num_gpus) - dist.init_process_group(backend=backend, **kwargs) - - -def _init_dist_mpi(backend, **kwargs): - # TODO: use local_rank instead of rank % num_gpus - rank = int(os.environ['OMPI_COMM_WORLD_RANK']) - num_gpus = torch.cuda.device_count() - torch.cuda.set_device(rank % num_gpus) - dist.init_process_group(backend=backend, **kwargs) - - -def _init_dist_slurm(backend, port=None): - """Initialize slurm distributed training environment. - If argument ``port`` is not specified, then the master port will be system - environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system - environment variable, then a default port ``29500`` will be used. - Args: - backend (str): Backend of torch.distributed. - port (int, optional): Master port. Defaults to None. - """ - proc_id = int(os.environ['SLURM_PROCID']) - ntasks = int(os.environ['SLURM_NTASKS']) - node_list = os.environ['SLURM_NODELIST'] - num_gpus = torch.cuda.device_count() - torch.cuda.set_device(proc_id % num_gpus) - addr = subprocess.getoutput( - f'scontrol show hostname {node_list} | head -n1') - # specify master port - if port is not None: - os.environ['MASTER_PORT'] = str(port) - elif 'MASTER_PORT' in os.environ: - pass # use MASTER_PORT in the environment variable - else: - # 29500 is torch.distributed default port - os.environ['MASTER_PORT'] = '29500' - # use MASTER_ADDR in the environment variable if it already exists - if 'MASTER_ADDR' not in os.environ: - os.environ['MASTER_ADDR'] = addr - os.environ['WORLD_SIZE'] = str(ntasks) - os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) - os.environ['RANK'] = str(proc_id) - dist.init_process_group(backend=backend) - - -def get_dist_info(): - if TORCH_VERSION < '1.0': - initialized = dist._initialized - else: - if dist.is_available(): - initialized = dist.is_initialized() - else: - initialized = False - if initialized: - rank = dist.get_rank() - world_size = dist.get_world_size() - else: - rank = 0 - world_size = 1 - return rank, world_size - - -def master_only(func): - - @functools.wraps(func) - def wrapper(*args, **kwargs): - rank, _ = get_dist_info() - if rank == 0: - return func(*args, **kwargs) - - return wrapper - - -def allreduce_params(params, coalesce=True, bucket_size_mb=-1): - """Allreduce parameters. - Args: - params (list[torch.Parameters]): List of parameters or buffers of a - model. - coalesce (bool, optional): Whether allreduce parameters as a whole. - Defaults to True. - bucket_size_mb (int, optional): Size of bucket, the unit is MB. - Defaults to -1. - """ - _, world_size = get_dist_info() - if world_size == 1: - return - params = [param.data for param in params] - if coalesce: - _allreduce_coalesced(params, world_size, bucket_size_mb) - else: - for tensor in params: - dist.all_reduce(tensor.div_(world_size)) - - -def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): - """Allreduce gradients. - Args: - params (list[torch.Parameters]): List of parameters of a model - coalesce (bool, optional): Whether allreduce parameters as a whole. - Defaults to True. - bucket_size_mb (int, optional): Size of bucket, the unit is MB. - Defaults to -1. - """ - grads = [ - param.grad.data for param in params - if param.requires_grad and param.grad is not None - ] - _, world_size = get_dist_info() - if world_size == 1: - return - if coalesce: - _allreduce_coalesced(grads, world_size, bucket_size_mb) - else: - for tensor in grads: - dist.all_reduce(tensor.div_(world_size)) - - -def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): - if bucket_size_mb > 0: - bucket_size_bytes = bucket_size_mb * 1024 * 1024 - buckets = _take_tensors(tensors, bucket_size_bytes) - else: - buckets = OrderedDict() - for tensor in tensors: - tp = tensor.type() - if tp not in buckets: - buckets[tp] = [] - buckets[tp].append(tensor) - buckets = buckets.values() - - for bucket in buckets: - flat_tensors = _flatten_dense_tensors(bucket) - dist.all_reduce(flat_tensors) - flat_tensors.div_(world_size) - for tensor, synced in zip( - bucket, _unflatten_dense_tensors(flat_tensors, bucket)): +# Copyright (c) Open-MMLab. All rights reserved. +# +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import os +import subprocess +from collections import OrderedDict + +import torch +import torch.multiprocessing as mp +from torch import distributed as dist +from torch._utils import (_flatten_dense_tensors, _take_tensors, + _unflatten_dense_tensors) + +from mmcv.utils import TORCH_VERSION + + +def init_dist(launcher, backend='nccl', **kwargs): + if mp.get_start_method(allow_none=True) is None: + mp.set_start_method('spawn') + if launcher == 'pytorch': + _init_dist_pytorch(backend, **kwargs) + elif launcher == 'mpi': + _init_dist_mpi(backend, **kwargs) + elif launcher == 'slurm': + _init_dist_slurm(backend, **kwargs) + else: + raise ValueError(f'Invalid launcher type: {launcher}') + + +def _init_dist_pytorch(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['RANK']) + offset = 0 if os.getenv('NPUID', None) is None else int(os.environ['NPUID']) + num_gpus = torch.npu.device_count() + torch.npu.set_device((rank + offset) % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_mpi(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['OMPI_COMM_WORLD_RANK']) + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_slurm(backend, port=None): + """Initialize slurm distributed training environment. + If argument ``port`` is not specified, then the master port will be system + environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system + environment variable, then a default port ``29500`` will be used. + Args: + backend (str): Backend of torch.distributed. + port (int, optional): Master port. Defaults to None. + """ + proc_id = int(os.environ['SLURM_PROCID']) + ntasks = int(os.environ['SLURM_NTASKS']) + node_list = os.environ['SLURM_NODELIST'] + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(proc_id % num_gpus) + addr = subprocess.getoutput( + f'scontrol show hostname {node_list} | head -n1') + # specify master port + if port is not None: + os.environ['MASTER_PORT'] = str(port) + elif 'MASTER_PORT' in os.environ: + pass # use MASTER_PORT in the environment variable + else: + # 29500 is torch.distributed default port + os.environ['MASTER_PORT'] = '29500' + # use MASTER_ADDR in the environment variable if it already exists + if 'MASTER_ADDR' not in os.environ: + os.environ['MASTER_ADDR'] = addr + os.environ['WORLD_SIZE'] = str(ntasks) + os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) + os.environ['RANK'] = str(proc_id) + dist.init_process_group(backend=backend) + + +def get_dist_info(): + if TORCH_VERSION < '1.0': + initialized = dist._initialized + else: + if dist.is_available(): + initialized = dist.is_initialized() + else: + initialized = False + if initialized: + rank = dist.get_rank() + world_size = dist.get_world_size() + else: + rank = 0 + world_size = 1 + return rank, world_size + + +def master_only(func): + + @functools.wraps(func) + def wrapper(*args, **kwargs): + rank, _ = get_dist_info() + if rank == 0: + return func(*args, **kwargs) + + return wrapper + + +def allreduce_params(params, coalesce=True, bucket_size_mb=-1): + """Allreduce parameters. + Args: + params (list[torch.Parameters]): List of parameters or buffers of a + model. + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + _, world_size = get_dist_info() + if world_size == 1: + return + params = [param.data for param in params] + if coalesce: + _allreduce_coalesced(params, world_size, bucket_size_mb) + else: + for tensor in params: + dist.all_reduce(tensor.div_(world_size)) + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + """Allreduce gradients. + Args: + params (list[torch.Parameters]): List of parameters of a model + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + grads = [ + param.grad.data for param in params + if param.requires_grad and param.grad is not None + ] + _, world_size = get_dist_info() + if world_size == 1: + return + if coalesce: + _allreduce_coalesced(grads, world_size, bucket_size_mb) + else: + for tensor in grads: + dist.all_reduce(tensor.div_(world_size)) + + +def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): + if bucket_size_mb > 0: + bucket_size_bytes = bucket_size_mb * 1024 * 1024 + buckets = _take_tensors(tensors, bucket_size_bytes) + else: + buckets = OrderedDict() + for tensor in tensors: + tp = tensor.type() + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(tensor) + buckets = buckets.values() + + for bucket in buckets: + flat_tensors = _flatten_dense_tensors(bucket) + dist.all_reduce(flat_tensors) + flat_tensors.div_(world_size) + for tensor, synced in zip( + bucket, _unflatten_dense_tensors(flat_tensors, bucket)): tensor.copy_(synced) \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/R(2+1)D/additional_need/mmcv/distributed.py b/PyTorch/contrib/cv/video/R(2+1)D/additional_need/mmcv/distributed.py index fb2bae922930929dc6fa46ad612463cc335dc208..4c89d09fc490488789ce565a7bad43ce7ded1568 100644 --- a/PyTorch/contrib/cv/video/R(2+1)D/additional_need/mmcv/distributed.py +++ b/PyTorch/contrib/cv/video/R(2+1)D/additional_need/mmcv/distributed.py @@ -1,119 +1,119 @@ -# Copyright (c) Open-MMLab. All rights reserved. -# -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from torch.nn.parallel.distributed import (DistributedDataParallel, - _find_tensors) - -from mmcv import print_log -from mmcv.utils import TORCH_VERSION -from .scatter_gather import scatter_kwargs - - -class MMDistributedDataParallel(DistributedDataParallel): - """The DDP module that supports DataContainer. - - MMDDP has two main differences with PyTorch DDP: - - - It supports a custom type :class:`DataContainer` which allows more - flexible control of input data. - - It implement two APIs ``train_step()`` and ``val_step()``. - """ - - def scatter(self, inputs, kwargs, device_ids): - return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) - - def train_step(self, *inputs, **kwargs): - """train_step() API for module wrapped by DistributedDataParallel. - - This method is basically the same as - ``DistributedDataParallel.forward()``, while replacing - ``self.module.forward()`` with ``self.module.train_step()``. - It is compatible with PyTorch 1.1 - 1.5. - """ - - # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the - # end of backward to the beginning of forward. - if (TORCH_VERSION >= '1.7' and 'parrots' - not in TORCH_VERSION) and self.reducer._rebuild_buckets(): - print_log( - 'Reducer buckets have been rebuilt in this iteration.', - logger='mmcv') - - if getattr(self, 'require_forward_param_sync', True): - self._sync_params() - if self.device_ids and False: - inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) - if len(self.device_ids) == 1: - output = self.module.train_step(*inputs[0], **kwargs[0]) - else: - outputs = self.parallel_apply( - self._module_copies[:len(inputs)], inputs, kwargs) - output = self.gather(outputs, self.output_device) - else: - inputs, kwargs = self.scatter(inputs, kwargs, [-1]) - output = self.module.train_step(*inputs[0], **kwargs[0]) - - if torch.is_grad_enabled() and getattr( - self, 'require_backward_grad_sync', True): - if self.find_unused_parameters: - self.reducer.prepare_for_backward(list(_find_tensors(output))) - else: - self.reducer.prepare_for_backward([]) - else: - if TORCH_VERSION > '1.2': - self.require_forward_param_sync = False - return output - - def val_step(self, *inputs, **kwargs): - """val_step() API for module wrapped by DistributedDataParallel. - - This method is basically the same as - ``DistributedDataParallel.forward()``, while replacing - ``self.module.forward()`` with ``self.module.val_step()``. - It is compatible with PyTorch 1.1 - 1.5. - """ - # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the - # end of backward to the beginning of forward. - if (TORCH_VERSION >= '1.7' and 'parrots' - not in TORCH_VERSION) and self.reducer._rebuild_buckets(): - print_log( - 'Reducer buckets have been rebuilt in this iteration.', - logger='mmcv') - - if getattr(self, 'require_forward_param_sync', True): - self._sync_params() - if self.device_ids: - inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) - if len(self.device_ids) == 1: - output = self.module.val_step(*inputs[0], **kwargs[0]) - else: - outputs = self.parallel_apply( - self._module_copies[:len(inputs)], inputs, kwargs) - output = self.gather(outputs, self.output_device) - else: - output = self.module.val_step(*inputs, **kwargs) - - if torch.is_grad_enabled() and getattr( - self, 'require_backward_grad_sync', True): - if self.find_unused_parameters: - self.reducer.prepare_for_backward(list(_find_tensors(output))) - else: - self.reducer.prepare_for_backward([]) - else: - if TORCH_VERSION > '1.2': - self.require_forward_param_sync = False - return output +# Copyright (c) Open-MMLab. All rights reserved. +# +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch.nn.parallel.distributed import (DistributedDataParallel, + _find_tensors) + +from mmcv import print_log +from mmcv.utils import TORCH_VERSION +from .scatter_gather import scatter_kwargs + + +class MMDistributedDataParallel(DistributedDataParallel): + """The DDP module that supports DataContainer. + + MMDDP has two main differences with PyTorch DDP: + + - It supports a custom type :class:`DataContainer` which allows more + flexible control of input data. + - It implement two APIs ``train_step()`` and ``val_step()``. + """ + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def train_step(self, *inputs, **kwargs): + """train_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.train_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + + # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the + # end of backward to the beginning of forward. + if (TORCH_VERSION >= '1.7' and 'parrots' + not in TORCH_VERSION) and self.reducer._rebuild_buckets(): + print_log( + 'Reducer buckets have been rebuilt in this iteration.', + logger='mmcv') + + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids and False: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.train_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply( + self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + output = self.module.train_step(*inputs[0], **kwargs[0]) + + if torch.is_grad_enabled() and getattr( + self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if TORCH_VERSION > '1.2': + self.require_forward_param_sync = False + return output + + def val_step(self, *inputs, **kwargs): + """val_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.val_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the + # end of backward to the beginning of forward. + if (TORCH_VERSION >= '1.7' and 'parrots' + not in TORCH_VERSION) and self.reducer._rebuild_buckets(): + print_log( + 'Reducer buckets have been rebuilt in this iteration.', + logger='mmcv') + + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.val_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply( + self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + output = self.module.val_step(*inputs, **kwargs) + + if torch.is_grad_enabled() and getattr( + self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if TORCH_VERSION > '1.2': + self.require_forward_param_sync = False + return output diff --git a/PyTorch/contrib/cv/video/R(2+1)D/additional_need/mmcv/optimizer.py b/PyTorch/contrib/cv/video/R(2+1)D/additional_need/mmcv/optimizer.py index 385978353dfebd6f7d24d3ce37ffafe63591563e..5956d430e829d63867f1a460cfe1e9f80c81ccc0 100644 --- a/PyTorch/contrib/cv/video/R(2+1)D/additional_need/mmcv/optimizer.py +++ b/PyTorch/contrib/cv/video/R(2+1)D/additional_need/mmcv/optimizer.py @@ -1,182 +1,182 @@ -# Copyright (c) Open-MMLab. All rights reserved. -# -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import copy -from collections import defaultdict -from itertools import chain - -from torch.nn.utils import clip_grad - -from ..dist_utils import allreduce_grads -from ..fp16_utils import LossScaler, wrap_fp16_model -from .hook import HOOKS, Hook -from apex import amp - -@HOOKS.register_module() -class OptimizerHook(Hook): - - def __init__(self, grad_clip=None): - self.grad_clip = grad_clip - - def clip_grads(self, params): - params = list( - filter(lambda p: p.requires_grad and p.grad is not None, params)) - #todo add a line - if len(params) > 0: - return clip_grad.clip_grad_norm_(params, **self.grad_clip) - - def after_train_iter(self, runner): - runner.optimizer.zero_grad() - with amp.scale_loss(runner.outputs['loss'], runner.optimizer ) as scaled_loss: - scaled_loss.backward() - #print('run wan with amp') - if self.grad_clip is not None: - grad_norm = self.clip_grads(runner.model.npu().parameters()) - if grad_norm is not None: - # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, - runner.outputs['num_samples']) - #print('before optimizer step') - runner.optimizer.step() - #print('after optimizer step') - - - -@HOOKS.register_module() -class Fp16OptimizerHook(OptimizerHook): - """FP16 optimizer hook. - - The steps of fp16 optimizer is as follows. - 1. Scale the loss value. - 2. BP in the fp16 model. - 2. Copy gradients from fp16 model to fp32 weights. - 3. Update fp32 weights. - 4. Copy updated parameters from fp32 weights to fp16 model. - - Refer to https://arxiv.org/abs/1710.03740 for more details. - - Args: - loss_scale (float | str | dict): Scale factor multiplied with loss. - If loss_scale is a float, static loss scaling will be used with - the specified scale. If loss_scale is a string, it must be - 'dynamic', then dynamic loss scaling will be used. - It can also be a dict containing arguments of LossScaler. - Defaults to 512. - """ - - def __init__(self, - grad_clip=None, - coalesce=True, - bucket_size_mb=-1, - loss_scale=512., - distributed=True): - self.grad_clip = grad_clip - self.coalesce = coalesce - self.bucket_size_mb = bucket_size_mb - self.distributed = distributed - if loss_scale == 'dynamic': - self.loss_scaler = LossScaler(mode='dynamic') - elif isinstance(loss_scale, float): - self.loss_scaler = LossScaler(init_scale=loss_scale, mode='static') - elif isinstance(loss_scale, dict): - self.loss_scaler = LossScaler(**loss_scale) - else: - raise ValueError('loss_scale must be of type float, dict, or ' - f'"dynamic", got {loss_scale}') - - def before_run(self, runner): - """Preparing steps before Mixed Precision Training. - - 1. Make a master copy of fp32 weights for optimization. - 2. Convert the main model from fp32 to fp16. - """ - # keep a copy of fp32 weights - old_groups = runner.optimizer.param_groups - runner.optimizer.param_groups = copy.deepcopy( - runner.optimizer.param_groups) - state = defaultdict(dict) - p_map = { - old_p: p - for old_p, p in zip( - chain(*(g['params'] for g in old_groups)), - chain(*(g['params'] for g in runner.optimizer.param_groups))) - } - for k, v in runner.optimizer.state.items(): - state[p_map[k]] = v - runner.optimizer.state = state - # convert model to fp16 - wrap_fp16_model(runner.model) - - def copy_grads_to_fp32(self, fp16_net, fp32_weights): - """Copy gradients from fp16 model to fp32 weight copy.""" - for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): - if fp16_param.grad is not None: - if fp32_param.grad is None: - fp32_param.grad = fp32_param.data.new(fp32_param.size()) - fp32_param.grad.copy_(fp16_param.grad) - - def copy_params_to_fp16(self, fp16_net, fp32_weights): - """Copy updated params from fp32 weight copy to fp16 model.""" - for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): - fp16_param.data.copy_(fp32_param.data) - - def after_train_iter(self, runner): - """Backward optimization steps for Mixed Precision Training. For - dynamic loss scaling, please refer `loss_scalar.py` - - 1. Scale the loss by a scale factor. - 2. Backward the loss to obtain the gradients (fp16). - 3. Copy gradients from the model to the fp32 weight copy. - 4. Scale the gradients back and update the fp32 weight copy. - 5. Copy back the params from fp32 weight copy to the fp16 model. - """ - # clear grads of last iteration - runner.model.zero_grad() - runner.optimizer.zero_grad() - # scale the loss value - scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale - scaled_loss.backward() - # copy fp16 grads in the model to fp32 params in the optimizer - - fp32_weights = [] - for param_group in runner.optimizer.param_groups: - fp32_weights += param_group['params'] - self.copy_grads_to_fp32(runner.model, fp32_weights) - # allreduce grads - if self.distributed: - allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) - - has_overflow = self.loss_scaler.has_overflow(fp32_weights) - # if has overflow, skip this iteration - if not has_overflow: - # scale the gradients back - for param in fp32_weights: - if param.grad is not None: - param.grad.div_(self.loss_scaler.loss_scale) - if self.grad_clip is not None: - grad_norm = self.clip_grads(fp32_weights) - if grad_norm is not None: - # Add grad norm to the logger - runner.log_buffer.update({'grad_norm': float(grad_norm)}, - runner.outputs['num_samples']) - # update fp32 params - runner.optimizer.step() - # copy fp32 params to the fp16 model - self.copy_params_to_fp16(runner.model, fp32_weights) - self.loss_scaler.update_scale(has_overflow) - if has_overflow: - runner.logger.warning('Check overflow, downscale loss scale ' - f'to {self.loss_scaler.cur_scale}') +# Copyright (c) Open-MMLab. All rights reserved. +# +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +from collections import defaultdict +from itertools import chain + +from torch.nn.utils import clip_grad + +from ..dist_utils import allreduce_grads +from ..fp16_utils import LossScaler, wrap_fp16_model +from .hook import HOOKS, Hook +from apex import amp + +@HOOKS.register_module() +class OptimizerHook(Hook): + + def __init__(self, grad_clip=None): + self.grad_clip = grad_clip + + def clip_grads(self, params): + params = list( + filter(lambda p: p.requires_grad and p.grad is not None, params)) + #todo add a line + if len(params) > 0: + return clip_grad.clip_grad_norm_(params, **self.grad_clip) + + def after_train_iter(self, runner): + runner.optimizer.zero_grad() + with amp.scale_loss(runner.outputs['loss'], runner.optimizer ) as scaled_loss: + scaled_loss.backward() + #print('run wan with amp') + if self.grad_clip is not None: + grad_norm = self.clip_grads(runner.model.npu().parameters()) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + #print('before optimizer step') + runner.optimizer.step() + #print('after optimizer step') + + + +@HOOKS.register_module() +class Fp16OptimizerHook(OptimizerHook): + """FP16 optimizer hook. + + The steps of fp16 optimizer is as follows. + 1. Scale the loss value. + 2. BP in the fp16 model. + 2. Copy gradients from fp16 model to fp32 weights. + 3. Update fp32 weights. + 4. Copy updated parameters from fp32 weights to fp16 model. + + Refer to https://arxiv.org/abs/1710.03740 for more details. + + Args: + loss_scale (float | str | dict): Scale factor multiplied with loss. + If loss_scale is a float, static loss scaling will be used with + the specified scale. If loss_scale is a string, it must be + 'dynamic', then dynamic loss scaling will be used. + It can also be a dict containing arguments of LossScaler. + Defaults to 512. + """ + + def __init__(self, + grad_clip=None, + coalesce=True, + bucket_size_mb=-1, + loss_scale=512., + distributed=True): + self.grad_clip = grad_clip + self.coalesce = coalesce + self.bucket_size_mb = bucket_size_mb + self.distributed = distributed + if loss_scale == 'dynamic': + self.loss_scaler = LossScaler(mode='dynamic') + elif isinstance(loss_scale, float): + self.loss_scaler = LossScaler(init_scale=loss_scale, mode='static') + elif isinstance(loss_scale, dict): + self.loss_scaler = LossScaler(**loss_scale) + else: + raise ValueError('loss_scale must be of type float, dict, or ' + f'"dynamic", got {loss_scale}') + + def before_run(self, runner): + """Preparing steps before Mixed Precision Training. + + 1. Make a master copy of fp32 weights for optimization. + 2. Convert the main model from fp32 to fp16. + """ + # keep a copy of fp32 weights + old_groups = runner.optimizer.param_groups + runner.optimizer.param_groups = copy.deepcopy( + runner.optimizer.param_groups) + state = defaultdict(dict) + p_map = { + old_p: p + for old_p, p in zip( + chain(*(g['params'] for g in old_groups)), + chain(*(g['params'] for g in runner.optimizer.param_groups))) + } + for k, v in runner.optimizer.state.items(): + state[p_map[k]] = v + runner.optimizer.state = state + # convert model to fp16 + wrap_fp16_model(runner.model) + + def copy_grads_to_fp32(self, fp16_net, fp32_weights): + """Copy gradients from fp16 model to fp32 weight copy.""" + for fp32_param, fp16_param in zip(fp32_weights, fp16_net.parameters()): + if fp16_param.grad is not None: + if fp32_param.grad is None: + fp32_param.grad = fp32_param.data.new(fp32_param.size()) + fp32_param.grad.copy_(fp16_param.grad) + + def copy_params_to_fp16(self, fp16_net, fp32_weights): + """Copy updated params from fp32 weight copy to fp16 model.""" + for fp16_param, fp32_param in zip(fp16_net.parameters(), fp32_weights): + fp16_param.data.copy_(fp32_param.data) + + def after_train_iter(self, runner): + """Backward optimization steps for Mixed Precision Training. For + dynamic loss scaling, please refer `loss_scalar.py` + + 1. Scale the loss by a scale factor. + 2. Backward the loss to obtain the gradients (fp16). + 3. Copy gradients from the model to the fp32 weight copy. + 4. Scale the gradients back and update the fp32 weight copy. + 5. Copy back the params from fp32 weight copy to the fp16 model. + """ + # clear grads of last iteration + runner.model.zero_grad() + runner.optimizer.zero_grad() + # scale the loss value + scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale + scaled_loss.backward() + # copy fp16 grads in the model to fp32 params in the optimizer + + fp32_weights = [] + for param_group in runner.optimizer.param_groups: + fp32_weights += param_group['params'] + self.copy_grads_to_fp32(runner.model, fp32_weights) + # allreduce grads + if self.distributed: + allreduce_grads(fp32_weights, self.coalesce, self.bucket_size_mb) + + has_overflow = self.loss_scaler.has_overflow(fp32_weights) + # if has overflow, skip this iteration + if not has_overflow: + # scale the gradients back + for param in fp32_weights: + if param.grad is not None: + param.grad.div_(self.loss_scaler.loss_scale) + if self.grad_clip is not None: + grad_norm = self.clip_grads(fp32_weights) + if grad_norm is not None: + # Add grad norm to the logger + runner.log_buffer.update({'grad_norm': float(grad_norm)}, + runner.outputs['num_samples']) + # update fp32 params + runner.optimizer.step() + # copy fp32 params to the fp16 model + self.copy_params_to_fp16(runner.model, fp32_weights) + self.loss_scaler.update_scale(has_overflow) + if has_overflow: + runner.logger.warning('Check overflow, downscale loss scale ' + f'to {self.loss_scaler.cur_scale}') diff --git a/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_1p.py b/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_1p.py index a2efdbe74322f30057214c44af9804bd7ae93c19..f5475765077c996f0a54187b24a09986a57851d7 100644 --- a/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_1p.py +++ b/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_1p.py @@ -1,113 +1,113 @@ -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -_base_ = [ - '../../_base_/models/r2plus1d_r34.py', - '../../_base_/default_runtime.py' -] - -# dataset settings -dataset_type = 'RawframeDataset' -data_root = 'data/ucf101/rawframes/' -data_root_val = 'data/ucf101/rawframes/' -split = 1 # official train/test splits. valid numbers: 1, 2, 3 -ann_file_train = f'data/ucf101/ucf101_train_split_{split}_rawframes.txt' -ann_file_val = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' -ann_file_test = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) -train_pipeline = [ - dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='RandomResizedCrop'), - dict(type='Resize', scale=(224, 224), keep_ratio=False), - dict(type='Flip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs', 'label']) -] -val_pipeline = [ - dict( - type='SampleFrames', - clip_len=8, - frame_interval=8, - num_clips=1, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -test_pipeline = [ - dict( - type='SampleFrames', - clip_len=8, - frame_interval=8, - num_clips=10, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='ThreeCrop', crop_size=256), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -data = dict( - videos_per_gpu=16, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=ann_file_train, - data_prefix=data_root, - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=val_pipeline), - test=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=test_pipeline)) -# optimizer -optimizer = dict( - type='SGD', lr=0.0025, momentum=0.9, - weight_decay=0.0001) # this lr is used for 8 gpus -optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) -# learning policy -# lr_config = dict(policy='step', steps=[1,2,3], lrs=[1e-3,1e-4,1e-5]) -lr_config = dict(policy='CosineAnnealing', min_lr=0) -total_epochs = 60 -# total_epochs = 90 - -# runtime settings -checkpoint_config = dict(interval=5) -evaluation = dict( - interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) -#work_dir = './work_dirs/r2plus1d_r34_8x8x1_180e_ucf101_rgb3/' -find_unused_parameters = True -load_from = 'https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb_20200729-aa94765e.pth' -resume_from = None - +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_base_ = [ + '../../_base_/models/r2plus1d_r34.py', + '../../_base_/default_runtime.py' +] + +# dataset settings +dataset_type = 'RawframeDataset' +data_root = 'data/ucf101/rawframes/' +data_root_val = 'data/ucf101/rawframes/' +split = 1 # official train/test splits. valid numbers: 1, 2, 3 +ann_file_train = f'data/ucf101/ucf101_train_split_{split}_rawframes.txt' +ann_file_val = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' +ann_file_test = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='RandomResizedCrop'), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=8, + num_clips=1, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=8, + num_clips=10, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=16, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', lr=0.0025, momentum=0.9, + weight_decay=0.0001) # this lr is used for 8 gpus +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +# lr_config = dict(policy='step', steps=[1,2,3], lrs=[1e-3,1e-4,1e-5]) +lr_config = dict(policy='CosineAnnealing', min_lr=0) +total_epochs = 60 +# total_epochs = 90 + +# runtime settings +checkpoint_config = dict(interval=5) +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) +#work_dir = './work_dirs/r2plus1d_r34_8x8x1_180e_ucf101_rgb3/' +find_unused_parameters = True +load_from = 'https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb_20200729-aa94765e.pth' +resume_from = None + work_dir = './work_dirs/r2plus1d-1p-npu/' \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_1p_perf.py b/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_1p_perf.py index bde3e578dd5ffd7106e9fa96bb11b9ed233308a8..829d7bb6f9ea901814e479e931e2a99397dabf1d 100644 --- a/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_1p_perf.py +++ b/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_1p_perf.py @@ -1,113 +1,113 @@ -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -_base_ = [ - '../../_base_/models/r2plus1d_r34.py', - '../../_base_/default_runtime.py' -] - -# dataset settings -dataset_type = 'RawframeDataset' -data_root = 'data/ucf101/rawframes/' -data_root_val = 'data/ucf101/rawframes/' -split = 1 # official train/test splits. valid numbers: 1, 2, 3 -ann_file_train = f'data/ucf101/ucf101_train_split_{split}_rawframes.txt' -ann_file_val = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' -ann_file_test = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) -train_pipeline = [ - dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='RandomResizedCrop'), - dict(type='Resize', scale=(224, 224), keep_ratio=False), - dict(type='Flip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs', 'label']) -] -val_pipeline = [ - dict( - type='SampleFrames', - clip_len=8, - frame_interval=8, - num_clips=1, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -test_pipeline = [ - dict( - type='SampleFrames', - clip_len=8, - frame_interval=8, - num_clips=10, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='ThreeCrop', crop_size=256), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -data = dict( - videos_per_gpu=16, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=ann_file_train, - data_prefix=data_root, - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=val_pipeline), - test=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=test_pipeline)) -# optimizer -optimizer = dict( - type='SGD', lr=0.0025, momentum=0.9, - weight_decay=0.0001) # this lr is used for 8 gpus -optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) -# learning policy -# lr_config = dict(policy='step', steps=[1,2,3], lrs=[1e-3,1e-4,1e-5]) -lr_config = dict(policy='CosineAnnealing', min_lr=0) -total_epochs = 1 -# total_epochs = 90 - -# runtime settings -checkpoint_config = dict(interval=5) -evaluation = dict( - interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) -#work_dir = './work_dirs/r2plus1d_r34_8x8x1_180e_ucf101_rgb3/' -find_unused_parameters = True -load_from = 'https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb_20200729-aa94765e.pth' -resume_from = None - +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_base_ = [ + '../../_base_/models/r2plus1d_r34.py', + '../../_base_/default_runtime.py' +] + +# dataset settings +dataset_type = 'RawframeDataset' +data_root = 'data/ucf101/rawframes/' +data_root_val = 'data/ucf101/rawframes/' +split = 1 # official train/test splits. valid numbers: 1, 2, 3 +ann_file_train = f'data/ucf101/ucf101_train_split_{split}_rawframes.txt' +ann_file_val = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' +ann_file_test = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='RandomResizedCrop'), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=8, + num_clips=1, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=8, + num_clips=10, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=16, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', lr=0.0025, momentum=0.9, + weight_decay=0.0001) # this lr is used for 8 gpus +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +# lr_config = dict(policy='step', steps=[1,2,3], lrs=[1e-3,1e-4,1e-5]) +lr_config = dict(policy='CosineAnnealing', min_lr=0) +total_epochs = 1 +# total_epochs = 90 + +# runtime settings +checkpoint_config = dict(interval=5) +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) +#work_dir = './work_dirs/r2plus1d_r34_8x8x1_180e_ucf101_rgb3/' +find_unused_parameters = True +load_from = 'https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb_20200729-aa94765e.pth' +resume_from = None + work_dir = './work_dirs/r2plus1d-1p-npu/' \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_8p.py b/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_8p.py index edf87d1e4bac895dfd7a21e77f8f1194dd1b4984..d67fd10c1aa2c00eca331a248fa6b9676bbf64b2 100644 --- a/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_8p.py +++ b/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_8p.py @@ -1,113 +1,113 @@ -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -_base_ = [ - '../../_base_/models/r2plus1d_r34.py', - '../../_base_/default_runtime.py' -] - -# dataset settings -dataset_type = 'RawframeDataset' -data_root = 'data/ucf101/rawframes/' -data_root_val = 'data/ucf101/rawframes/' -split = 1 # official train/test splits. valid numbers: 1, 2, 3 -ann_file_train = f'data/ucf101/ucf101_train_split_{split}_rawframes.txt' -ann_file_val = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' -ann_file_test = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) -train_pipeline = [ - dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='RandomResizedCrop'), - dict(type='Resize', scale=(224, 224), keep_ratio=False), - dict(type='Flip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs', 'label']) -] -val_pipeline = [ - dict( - type='SampleFrames', - clip_len=8, - frame_interval=8, - num_clips=1, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -test_pipeline = [ - dict( - type='SampleFrames', - clip_len=8, - frame_interval=8, - num_clips=10, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='ThreeCrop', crop_size=256), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -data = dict( - videos_per_gpu=42, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=ann_file_train, - data_prefix=data_root, - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=val_pipeline), - test=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=test_pipeline)) -# optimizer -optimizer = dict( - type='SGD', lr=0.008, momentum=0.9, - weight_decay=0.0001) # this lr is used for 8 gpus -optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) -# learning policy -# lr_config = dict(policy='step', steps=[1,2,3], lrs=[1e-3,1e-4,1e-5]) -lr_config = dict(policy='CosineAnnealing', min_lr=0) -total_epochs = 70 -# total_epochs = 90 - -# runtime settings -checkpoint_config = dict(interval=5) -evaluation = dict( - interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) -#work_dir = './work_dirs/r2plus1d_r34_8x8x1_180e_ucf101_rgb3/' -find_unused_parameters = True -load_from = 'https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb_20200729-aa94765e.pth' -resume_from = None - +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_base_ = [ + '../../_base_/models/r2plus1d_r34.py', + '../../_base_/default_runtime.py' +] + +# dataset settings +dataset_type = 'RawframeDataset' +data_root = 'data/ucf101/rawframes/' +data_root_val = 'data/ucf101/rawframes/' +split = 1 # official train/test splits. valid numbers: 1, 2, 3 +ann_file_train = f'data/ucf101/ucf101_train_split_{split}_rawframes.txt' +ann_file_val = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' +ann_file_test = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='RandomResizedCrop'), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=8, + num_clips=1, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=8, + num_clips=10, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=42, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', lr=0.008, momentum=0.9, + weight_decay=0.0001) # this lr is used for 8 gpus +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +# lr_config = dict(policy='step', steps=[1,2,3], lrs=[1e-3,1e-4,1e-5]) +lr_config = dict(policy='CosineAnnealing', min_lr=0) +total_epochs = 70 +# total_epochs = 90 + +# runtime settings +checkpoint_config = dict(interval=5) +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) +#work_dir = './work_dirs/r2plus1d_r34_8x8x1_180e_ucf101_rgb3/' +find_unused_parameters = True +load_from = 'https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb_20200729-aa94765e.pth' +resume_from = None + work_dir = './work_dirs/r2plus1d-8p-npu/' \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_8p_perf.py b/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_8p_perf.py index b0d83a2679cfe62835e0596ea7a7e8e55d78ced1..35f1356e12d9de0ae3c4a94b235062ff903822c3 100644 --- a/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_8p_perf.py +++ b/PyTorch/contrib/cv/video/R(2+1)D/configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_8p_perf.py @@ -1,113 +1,113 @@ -# -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -_base_ = [ - '../../_base_/models/r2plus1d_r34.py', - '../../_base_/default_runtime.py' -] - -# dataset settings -dataset_type = 'RawframeDataset' -data_root = 'data/ucf101/rawframes/' -data_root_val = 'data/ucf101/rawframes/' -split = 1 # official train/test splits. valid numbers: 1, 2, 3 -ann_file_train = f'data/ucf101/ucf101_train_split_{split}_rawframes.txt' -ann_file_val = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' -ann_file_test = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) -train_pipeline = [ - dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='RandomResizedCrop'), - dict(type='Resize', scale=(224, 224), keep_ratio=False), - dict(type='Flip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs', 'label']) -] -val_pipeline = [ - dict( - type='SampleFrames', - clip_len=8, - frame_interval=8, - num_clips=1, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='CenterCrop', crop_size=224), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -test_pipeline = [ - dict( - type='SampleFrames', - clip_len=8, - frame_interval=8, - num_clips=10, - test_mode=True), - dict(type='RawFrameDecode'), - dict(type='Resize', scale=(-1, 256)), - dict(type='ThreeCrop', crop_size=256), - dict(type='Flip', flip_ratio=0), - dict(type='Normalize', **img_norm_cfg), - dict(type='FormatShape', input_format='NCTHW'), - dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), - dict(type='ToTensor', keys=['imgs']) -] -data = dict( - videos_per_gpu=42, - workers_per_gpu=4, - train=dict( - type=dataset_type, - ann_file=ann_file_train, - data_prefix=data_root, - pipeline=train_pipeline), - val=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=val_pipeline), - test=dict( - type=dataset_type, - ann_file=ann_file_val, - data_prefix=data_root_val, - pipeline=test_pipeline)) -# optimizer -optimizer = dict( - type='SGD', lr=0.008, momentum=0.9, - weight_decay=0.0001) # this lr is used for 8 gpus -optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) -# learning policy -# lr_config = dict(policy='step', steps=[1,2,3], lrs=[1e-3,1e-4,1e-5]) -lr_config = dict(policy='CosineAnnealing', min_lr=0) -total_epochs = 1 -# total_epochs = 90 - -# runtime settings -checkpoint_config = dict(interval=5) -evaluation = dict( - interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) -#work_dir = './work_dirs/r2plus1d_r34_8x8x1_180e_ucf101_rgb3/' -find_unused_parameters = True -load_from = 'https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb_20200729-aa94765e.pth' -resume_from = None - +# +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_base_ = [ + '../../_base_/models/r2plus1d_r34.py', + '../../_base_/default_runtime.py' +] + +# dataset settings +dataset_type = 'RawframeDataset' +data_root = 'data/ucf101/rawframes/' +data_root_val = 'data/ucf101/rawframes/' +split = 1 # official train/test splits. valid numbers: 1, 2, 3 +ann_file_train = f'data/ucf101/ucf101_train_split_{split}_rawframes.txt' +ann_file_val = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' +ann_file_test = f'data/ucf101/ucf101_val_split_{split}_rawframes.txt' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) +train_pipeline = [ + dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='RandomResizedCrop'), + dict(type='Resize', scale=(224, 224), keep_ratio=False), + dict(type='Flip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs', 'label']) +] +val_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=8, + num_clips=1, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='CenterCrop', crop_size=224), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +test_pipeline = [ + dict( + type='SampleFrames', + clip_len=8, + frame_interval=8, + num_clips=10, + test_mode=True), + dict(type='RawFrameDecode'), + dict(type='Resize', scale=(-1, 256)), + dict(type='ThreeCrop', crop_size=256), + dict(type='Flip', flip_ratio=0), + dict(type='Normalize', **img_norm_cfg), + dict(type='FormatShape', input_format='NCTHW'), + dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), + dict(type='ToTensor', keys=['imgs']) +] +data = dict( + videos_per_gpu=42, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=ann_file_train, + data_prefix=data_root, + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=val_pipeline), + test=dict( + type=dataset_type, + ann_file=ann_file_val, + data_prefix=data_root_val, + pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', lr=0.008, momentum=0.9, + weight_decay=0.0001) # this lr is used for 8 gpus +optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) +# learning policy +# lr_config = dict(policy='step', steps=[1,2,3], lrs=[1e-3,1e-4,1e-5]) +lr_config = dict(policy='CosineAnnealing', min_lr=0) +total_epochs = 1 +# total_epochs = 90 + +# runtime settings +checkpoint_config = dict(interval=5) +evaluation = dict( + interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy']) +#work_dir = './work_dirs/r2plus1d_r34_8x8x1_180e_ucf101_rgb3/' +find_unused_parameters = True +load_from = 'https://download.openmmlab.com/mmaction/recognition/r2plus1d/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb/r2plus1d_r34_256p_8x8x1_180e_kinetics400_rgb_20200729-aa94765e.pth' +resume_from = None + work_dir = './work_dirs/r2plus1d-8p-npu/' \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/R(2+1)D/modelzoo_level.txt b/PyTorch/contrib/cv/video/R(2+1)D/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/cv/video/R(2+1)D/modelzoo_level.txt +++ b/PyTorch/contrib/cv/video/R(2+1)D/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/R(2+1)D/test/onnx.sh b/PyTorch/contrib/cv/video/R(2+1)D/test/onnx.sh index 4b04b49017fff5085d927c2c1d152faad664842d..c9272a2993fd9fca51c0bd2033027aaf3e0851bf 100644 --- a/PyTorch/contrib/cv/video/R(2+1)D/test/onnx.sh +++ b/PyTorch/contrib/cv/video/R(2+1)D/test/onnx.sh @@ -1,10 +1,10 @@ -#!/bin/bash - -python3.7 ./tools/deployment/pytorch2onnx.py \ - ./configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_1p.py ./work_dirs/r2plus1d-1p-npu/best_top1_acc_epoch_35.pth \ - --verify --output-file=r2plus1d.onnx --shape 1 3 3 8 256 256 - -#简化onnx。 -python3.7 -m onnxsim --input-shape="1,3,3,8,256,256" --dynamic-input-shape r2plus1d.onnx r2plus1d_sim.onnx - +#!/bin/bash + +python3.7 ./tools/deployment/pytorch2onnx.py \ + ./configs/recognition/r2plus1d/r2plus1d_ucf101_rgb_1p.py ./work_dirs/r2plus1d-1p-npu/best_top1_acc_epoch_35.pth \ + --verify --output-file=r2plus1d.onnx --shape 1 3 3 8 256 256 + +#简化onnx。 +python3.7 -m onnxsim --input-shape="1,3,3,8,256,256" --dynamic-input-shape r2plus1d.onnx r2plus1d_sim.onnx + \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/__init__.py b/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/__init__.py index a5f8598aa44dc6b32162d43ee60c98a1725037ef..89552b1d3f5f6255840161c8c17cf314ab3fedff 100644 --- a/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/__init__.py +++ b/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/__init__.py @@ -1,13 +1,13 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and # limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/models/__init__.py b/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/models/__init__.py index a5f8598aa44dc6b32162d43ee60c98a1725037ef..89552b1d3f5f6255840161c8c17cf314ab3fedff 100644 --- a/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/models/__init__.py +++ b/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/models/__init__.py @@ -1,13 +1,13 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and # limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/tracker/__init__.py b/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/tracker/__init__.py index a5f8598aa44dc6b32162d43ee60c98a1725037ef..89552b1d3f5f6255840161c8c17cf314ab3fedff 100644 --- a/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/tracker/__init__.py +++ b/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/tracker/__init__.py @@ -1,13 +1,13 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and # limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/utils/__init__.py b/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/utils/__init__.py index a5f8598aa44dc6b32162d43ee60c98a1725037ef..89552b1d3f5f6255840161c8c17cf314ab3fedff 100644 --- a/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/utils/__init__.py +++ b/PyTorch/contrib/cv/video/SiamRPN/pysot-master/pysot/utils/__init__.py @@ -1,13 +1,13 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and # limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/SiamRPN/pysot-master/toolkit/__init__.py b/PyTorch/contrib/cv/video/SiamRPN/pysot-master/toolkit/__init__.py index a5f8598aa44dc6b32162d43ee60c98a1725037ef..89552b1d3f5f6255840161c8c17cf314ab3fedff 100644 --- a/PyTorch/contrib/cv/video/SiamRPN/pysot-master/toolkit/__init__.py +++ b/PyTorch/contrib/cv/video/SiamRPN/pysot-master/toolkit/__init__.py @@ -1,13 +1,13 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and # limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/SiamRPN/pysot-master/vot_iter/__init__.py b/PyTorch/contrib/cv/video/SiamRPN/pysot-master/vot_iter/__init__.py index a5f8598aa44dc6b32162d43ee60c98a1725037ef..89552b1d3f5f6255840161c8c17cf314ab3fedff 100644 --- a/PyTorch/contrib/cv/video/SiamRPN/pysot-master/vot_iter/__init__.py +++ b/PyTorch/contrib/cv/video/SiamRPN/pysot-master/vot_iter/__init__.py @@ -1,13 +1,13 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and # limitations under the License. \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/SlowFast/config/default_runtime.py b/PyTorch/contrib/cv/video/SlowFast/config/default_runtime.py index 41329bf37107afc6238c8360bc11dd80efd93324..7652836b33c6b6c3927a2e0bea18322d4cc13788 100644 --- a/PyTorch/contrib/cv/video/SlowFast/config/default_runtime.py +++ b/PyTorch/contrib/cv/video/SlowFast/config/default_runtime.py @@ -1,27 +1,27 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -checkpoint_config = dict(interval=5) -log_config = dict( - interval=1, - hooks=[ - dict(type='TextLoggerHook'), - # dict(type='TensorboardLoggerHook'), - ]) -# runtime settings -dist_params = dict(backend='nccl') -log_level = 'INFO' -load_from = None -resume_from = None -workflow = [('train', 1)] +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +checkpoint_config = dict(interval=5) +log_config = dict( + interval=1, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook'), + ]) +# runtime settings +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/PyTorch/contrib/cv/video/SlowFast/mmaction/apis/train.py b/PyTorch/contrib/cv/video/SlowFast/mmaction/apis/train.py index 59f1bc0a8ab759f36a0485076f0585add1bd384e..d50c7f7e6ae9f8487dea82a33bb3351356bbaaf8 100644 --- a/PyTorch/contrib/cv/video/SlowFast/mmaction/apis/train.py +++ b/PyTorch/contrib/cv/video/SlowFast/mmaction/apis/train.py @@ -1,267 +1,267 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import copy as cp -import os.path as osp - -from apex import amp - -import torch -from mmcv.parallel import MMDataParallel, MMDistributedDataParallel -from mmcv.runner import (DistSamplerSeedHook, EpochBasedRunner, OptimizerHook, - build_optimizer, get_dist_info) -from mmcv.runner.hooks import Fp16OptimizerHook - -from ..core import (DistEvalHook, EvalHook, OmniSourceDistSamplerSeedHook, - OmniSourceRunner) -from ..datasets import build_dataloader, build_dataset -from ..utils import PreciseBNHook, get_root_logger -from .test import multi_gpu_test - - -def train_model(model, - dataset, - cfg, - distributed=False, - validate=False, - test=dict(test_best=False, test_last=False), - timestamp=None, - meta=None): - """Train model entry function. - - Args: - model (nn.Module): The model to be trained. - dataset (:obj:`Dataset`): Train dataset. - cfg (dict): The config dict for training. - distributed (bool): Whether to use distributed training. - Default: False. - validate (bool): Whether to do evaluation. Default: False. - test (dict): The testing option, with two keys: test_last & test_best. - The value is True or False, indicating whether to test the - corresponding checkpoint. - Default: dict(test_best=False, test_last=False). - timestamp (str | None): Local time for runner. Default: None. - meta (dict | None): Meta dict to record some important information. - Default: None - """ - logger = get_root_logger(log_level=cfg.log_level) - - # prepare data loaders - dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] - - dataloader_setting = dict(videos_per_gpu=cfg.data.get('videos_per_gpu', 1), - workers_per_gpu=cfg.data.get( - 'workers_per_gpu', 1), - num_gpus=len(cfg.gpu_ids), - dist=distributed, - seed=cfg.seed) - dataloader_setting = dict(dataloader_setting, - **cfg.data.get('train_dataloader', {})) - - if cfg.omnisource: - # The option can override videos_per_gpu - train_ratio = cfg.data.get('train_ratio', [1] * len(dataset)) - omni_videos_per_gpu = cfg.data.get('omni_videos_per_gpu', None) - if omni_videos_per_gpu is None: - dataloader_settings = [dataloader_setting] * len(dataset) - else: - dataloader_settings = [] - for videos_per_gpu in omni_videos_per_gpu: - this_setting = cp.deepcopy(dataloader_setting) - this_setting['videos_per_gpu'] = videos_per_gpu - dataloader_settings.append(this_setting) - data_loaders = [ - build_dataloader(ds, **setting) - for ds, setting in zip(dataset, dataloader_settings) - ] - - else: - data_loaders = [ - build_dataloader(ds, **dataloader_setting) for ds in dataset - ] - - # build runner - optimizer = build_optimizer(model, cfg.optimizer) - - # Allow Amp to perform casts as required by the opt_level - if cfg.AMP: - # model, optimizer = amp.initialize(model.cuda(), - # optimizer, - # opt_level=cfg.OPT_LEVEL, - # loss_scale=cfg.LOSS_SCALE) - model, optimizer = amp.initialize(model.npu(), - optimizer, - opt_level=cfg.OPT_LEVEL, - loss_scale=cfg.LOSS_SCALE, - combine_grad=True) - - # put model on gpus - if distributed: - find_unused_parameters = cfg.get('find_unused_parameters', False) - # Sets the `find_unused_parameters` parameter in - # torch.nn.parallel.DistributedDataParallel - model = MMDistributedDataParallel( - model, - device_ids=[torch.npu.current_device()], - broadcast_buffers=False, - find_unused_parameters=find_unused_parameters) - # model = MMDistributedDataParallel( - # model, - # device_ids=[torch.cuda.current_device()], - # broadcast_buffers=False, - # find_unused_parameters=find_unused_parameters) - - else: - # In 1-p training, we don't use Dataparallel - # model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), - # device_ids=cfg.gpu_ids) - model = model.npu() - - Runner = OmniSourceRunner if cfg.omnisource else EpochBasedRunner - runner = Runner(model, - optimizer=optimizer, - work_dir=cfg.work_dir, - logger=logger, - meta=meta, - distributed=distributed) - # an ugly workaround to make .log and .log.json filenames the same - runner.timestamp = timestamp - - # fp16 setting - fp16_cfg = cfg.get('fp16', None) - if fp16_cfg is not None: - optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, - **fp16_cfg, - distributed=distributed) - elif distributed and 'type' not in cfg.optimizer_config: - optimizer_config = OptimizerHook(**cfg.optimizer_config) - else: - optimizer_config = cfg.optimizer_config - - # register hooks - runner.register_training_hooks(cfg.lr_config, optimizer_config, - cfg.checkpoint_config, cfg.log_config, - cfg.get('momentum_config', None)) - if distributed: - if cfg.omnisource: - runner.register_hook(OmniSourceDistSamplerSeedHook()) - else: - runner.register_hook(DistSamplerSeedHook()) - - # precise bn setting - if cfg.get('precise_bn', False): - precise_bn_dataset = build_dataset(cfg.data.train) - dataloader_setting = dict( - videos_per_gpu=cfg.data.get('videos_per_gpu', 1), - workers_per_gpu=0, # save memory and time - num_gpus=len(cfg.gpu_ids), - dist=distributed, - seed=cfg.seed) - data_loader_precise_bn = build_dataloader(precise_bn_dataset, - **dataloader_setting) - precise_bn_hook = PreciseBNHook(data_loader_precise_bn, - **cfg.get('precise_bn')) - runner.register_hook(precise_bn_hook) - - if validate: - eval_cfg = cfg.get('evaluation', {}) - val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) - dataloader_setting = dict( - videos_per_gpu=cfg.data.get('videos_per_gpu', 1), - workers_per_gpu=cfg.data.get('workers_per_gpu', 1), - # cfg.gpus will be ignored if distributed - num_gpus=len(cfg.gpu_ids), - dist=distributed, - shuffle=False) - dataloader_setting = dict(dataloader_setting, - **cfg.data.get('val_dataloader', {})) - val_dataloader = build_dataloader(val_dataset, **dataloader_setting) - eval_hook = DistEvalHook(val_dataloader, **eval_cfg) if distributed \ - else EvalHook(val_dataloader, **eval_cfg) - runner.register_hook(eval_hook) - - if cfg.resume_from: - runner.resume(cfg.resume_from) - elif cfg.load_from: - runner.load_checkpoint(cfg.load_from) - runner_kwargs = dict() - if cfg.omnisource: - runner_kwargs = dict(train_ratio=train_ratio) - runner.run(data_loaders, cfg.workflow, cfg.total_epochs, **runner_kwargs) - - if test['test_last'] or test['test_best']: - best_ckpt_path = None - if test['test_best']: - if hasattr(eval_hook, 'best_ckpt_path'): - best_ckpt_path = eval_hook.best_ckpt_path - - if best_ckpt_path is None or not osp.exists(best_ckpt_path): - test['test_best'] = False - if best_ckpt_path is None: - runner.logger.info('Warning: test_best set as True, but ' - 'is not applicable ' - '(eval_hook.best_ckpt_path is None)') - else: - runner.logger.info('Warning: test_best set as True, but ' - 'is not applicable (best_ckpt ' - f'{best_ckpt_path} not found)') - if not test['test_last']: - return - - test_dataset = build_dataset(cfg.data.test, dict(test_mode=True)) - gpu_collect = cfg.get('evaluation', {}).get('gpu_collect', False) - tmpdir = cfg.get('evaluation', {}).get('tmpdir', - osp.join(cfg.work_dir, 'tmp')) - dataloader_setting = dict( - videos_per_gpu=cfg.data.get('videos_per_gpu', 1), - workers_per_gpu=cfg.data.get('workers_per_gpu', 1), - num_gpus=len(cfg.gpu_ids), - dist=distributed, - shuffle=False) - dataloader_setting = dict(dataloader_setting, - **cfg.data.get('test_dataloader', {})) - - test_dataloader = build_dataloader(test_dataset, **dataloader_setting) - - names, ckpts = [], [] - - if test['test_last']: - names.append('last') - ckpts.append(None) - if test['test_best']: - names.append('best') - ckpts.append(best_ckpt_path) - - for name, ckpt in zip(names, ckpts): - if ckpt is not None: - runner.load_checkpoint(ckpt) - - outputs = multi_gpu_test(runner.model, test_dataloader, tmpdir, - gpu_collect) - rank, _ = get_dist_info() - if rank == 0: - out = osp.join(cfg.work_dir, f'{name}_pred.pkl') - test_dataset.dump_results(outputs, out) - - eval_cfg = cfg.get('evaluation', {}) - for key in [ - 'interval', 'tmpdir', 'start', 'gpu_collect', - 'save_best', 'rule', 'by_epoch', 'broadcast_bn_buffers' - ]: - eval_cfg.pop(key, None) - - eval_res = test_dataset.evaluate(outputs, **eval_cfg) - runner.logger.info(f'Testing results of the {name} checkpoint') - for metric_name, val in eval_res.items(): - runner.logger.info(f'{metric_name}: {val:.04f}') +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import copy as cp +import os.path as osp + +from apex import amp + +import torch +from mmcv.parallel import MMDataParallel, MMDistributedDataParallel +from mmcv.runner import (DistSamplerSeedHook, EpochBasedRunner, OptimizerHook, + build_optimizer, get_dist_info) +from mmcv.runner.hooks import Fp16OptimizerHook + +from ..core import (DistEvalHook, EvalHook, OmniSourceDistSamplerSeedHook, + OmniSourceRunner) +from ..datasets import build_dataloader, build_dataset +from ..utils import PreciseBNHook, get_root_logger +from .test import multi_gpu_test + + +def train_model(model, + dataset, + cfg, + distributed=False, + validate=False, + test=dict(test_best=False, test_last=False), + timestamp=None, + meta=None): + """Train model entry function. + + Args: + model (nn.Module): The model to be trained. + dataset (:obj:`Dataset`): Train dataset. + cfg (dict): The config dict for training. + distributed (bool): Whether to use distributed training. + Default: False. + validate (bool): Whether to do evaluation. Default: False. + test (dict): The testing option, with two keys: test_last & test_best. + The value is True or False, indicating whether to test the + corresponding checkpoint. + Default: dict(test_best=False, test_last=False). + timestamp (str | None): Local time for runner. Default: None. + meta (dict | None): Meta dict to record some important information. + Default: None + """ + logger = get_root_logger(log_level=cfg.log_level) + + # prepare data loaders + dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] + + dataloader_setting = dict(videos_per_gpu=cfg.data.get('videos_per_gpu', 1), + workers_per_gpu=cfg.data.get( + 'workers_per_gpu', 1), + num_gpus=len(cfg.gpu_ids), + dist=distributed, + seed=cfg.seed) + dataloader_setting = dict(dataloader_setting, + **cfg.data.get('train_dataloader', {})) + + if cfg.omnisource: + # The option can override videos_per_gpu + train_ratio = cfg.data.get('train_ratio', [1] * len(dataset)) + omni_videos_per_gpu = cfg.data.get('omni_videos_per_gpu', None) + if omni_videos_per_gpu is None: + dataloader_settings = [dataloader_setting] * len(dataset) + else: + dataloader_settings = [] + for videos_per_gpu in omni_videos_per_gpu: + this_setting = cp.deepcopy(dataloader_setting) + this_setting['videos_per_gpu'] = videos_per_gpu + dataloader_settings.append(this_setting) + data_loaders = [ + build_dataloader(ds, **setting) + for ds, setting in zip(dataset, dataloader_settings) + ] + + else: + data_loaders = [ + build_dataloader(ds, **dataloader_setting) for ds in dataset + ] + + # build runner + optimizer = build_optimizer(model, cfg.optimizer) + + # Allow Amp to perform casts as required by the opt_level + if cfg.AMP: + # model, optimizer = amp.initialize(model.cuda(), + # optimizer, + # opt_level=cfg.OPT_LEVEL, + # loss_scale=cfg.LOSS_SCALE) + model, optimizer = amp.initialize(model.npu(), + optimizer, + opt_level=cfg.OPT_LEVEL, + loss_scale=cfg.LOSS_SCALE, + combine_grad=True) + + # put model on gpus + if distributed: + find_unused_parameters = cfg.get('find_unused_parameters', False) + # Sets the `find_unused_parameters` parameter in + # torch.nn.parallel.DistributedDataParallel + model = MMDistributedDataParallel( + model, + device_ids=[torch.npu.current_device()], + broadcast_buffers=False, + find_unused_parameters=find_unused_parameters) + # model = MMDistributedDataParallel( + # model, + # device_ids=[torch.cuda.current_device()], + # broadcast_buffers=False, + # find_unused_parameters=find_unused_parameters) + + else: + # In 1-p training, we don't use Dataparallel + # model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), + # device_ids=cfg.gpu_ids) + model = model.npu() + + Runner = OmniSourceRunner if cfg.omnisource else EpochBasedRunner + runner = Runner(model, + optimizer=optimizer, + work_dir=cfg.work_dir, + logger=logger, + meta=meta, + distributed=distributed) + # an ugly workaround to make .log and .log.json filenames the same + runner.timestamp = timestamp + + # fp16 setting + fp16_cfg = cfg.get('fp16', None) + if fp16_cfg is not None: + optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, + **fp16_cfg, + distributed=distributed) + elif distributed and 'type' not in cfg.optimizer_config: + optimizer_config = OptimizerHook(**cfg.optimizer_config) + else: + optimizer_config = cfg.optimizer_config + + # register hooks + runner.register_training_hooks(cfg.lr_config, optimizer_config, + cfg.checkpoint_config, cfg.log_config, + cfg.get('momentum_config', None)) + if distributed: + if cfg.omnisource: + runner.register_hook(OmniSourceDistSamplerSeedHook()) + else: + runner.register_hook(DistSamplerSeedHook()) + + # precise bn setting + if cfg.get('precise_bn', False): + precise_bn_dataset = build_dataset(cfg.data.train) + dataloader_setting = dict( + videos_per_gpu=cfg.data.get('videos_per_gpu', 1), + workers_per_gpu=0, # save memory and time + num_gpus=len(cfg.gpu_ids), + dist=distributed, + seed=cfg.seed) + data_loader_precise_bn = build_dataloader(precise_bn_dataset, + **dataloader_setting) + precise_bn_hook = PreciseBNHook(data_loader_precise_bn, + **cfg.get('precise_bn')) + runner.register_hook(precise_bn_hook) + + if validate: + eval_cfg = cfg.get('evaluation', {}) + val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) + dataloader_setting = dict( + videos_per_gpu=cfg.data.get('videos_per_gpu', 1), + workers_per_gpu=cfg.data.get('workers_per_gpu', 1), + # cfg.gpus will be ignored if distributed + num_gpus=len(cfg.gpu_ids), + dist=distributed, + shuffle=False) + dataloader_setting = dict(dataloader_setting, + **cfg.data.get('val_dataloader', {})) + val_dataloader = build_dataloader(val_dataset, **dataloader_setting) + eval_hook = DistEvalHook(val_dataloader, **eval_cfg) if distributed \ + else EvalHook(val_dataloader, **eval_cfg) + runner.register_hook(eval_hook) + + if cfg.resume_from: + runner.resume(cfg.resume_from) + elif cfg.load_from: + runner.load_checkpoint(cfg.load_from) + runner_kwargs = dict() + if cfg.omnisource: + runner_kwargs = dict(train_ratio=train_ratio) + runner.run(data_loaders, cfg.workflow, cfg.total_epochs, **runner_kwargs) + + if test['test_last'] or test['test_best']: + best_ckpt_path = None + if test['test_best']: + if hasattr(eval_hook, 'best_ckpt_path'): + best_ckpt_path = eval_hook.best_ckpt_path + + if best_ckpt_path is None or not osp.exists(best_ckpt_path): + test['test_best'] = False + if best_ckpt_path is None: + runner.logger.info('Warning: test_best set as True, but ' + 'is not applicable ' + '(eval_hook.best_ckpt_path is None)') + else: + runner.logger.info('Warning: test_best set as True, but ' + 'is not applicable (best_ckpt ' + f'{best_ckpt_path} not found)') + if not test['test_last']: + return + + test_dataset = build_dataset(cfg.data.test, dict(test_mode=True)) + gpu_collect = cfg.get('evaluation', {}).get('gpu_collect', False) + tmpdir = cfg.get('evaluation', {}).get('tmpdir', + osp.join(cfg.work_dir, 'tmp')) + dataloader_setting = dict( + videos_per_gpu=cfg.data.get('videos_per_gpu', 1), + workers_per_gpu=cfg.data.get('workers_per_gpu', 1), + num_gpus=len(cfg.gpu_ids), + dist=distributed, + shuffle=False) + dataloader_setting = dict(dataloader_setting, + **cfg.data.get('test_dataloader', {})) + + test_dataloader = build_dataloader(test_dataset, **dataloader_setting) + + names, ckpts = [], [] + + if test['test_last']: + names.append('last') + ckpts.append(None) + if test['test_best']: + names.append('best') + ckpts.append(best_ckpt_path) + + for name, ckpt in zip(names, ckpts): + if ckpt is not None: + runner.load_checkpoint(ckpt) + + outputs = multi_gpu_test(runner.model, test_dataloader, tmpdir, + gpu_collect) + rank, _ = get_dist_info() + if rank == 0: + out = osp.join(cfg.work_dir, f'{name}_pred.pkl') + test_dataset.dump_results(outputs, out) + + eval_cfg = cfg.get('evaluation', {}) + for key in [ + 'interval', 'tmpdir', 'start', 'gpu_collect', + 'save_best', 'rule', 'by_epoch', 'broadcast_bn_buffers' + ]: + eval_cfg.pop(key, None) + + eval_res = test_dataset.evaluate(outputs, **eval_cfg) + runner.logger.info(f'Testing results of the {name} checkpoint') + for metric_name, val in eval_res.items(): + runner.logger.info(f'{metric_name}: {val:.04f}') diff --git a/PyTorch/contrib/cv/video/SlowFast/mmaction/datasets/pipelines/formating.py b/PyTorch/contrib/cv/video/SlowFast/mmaction/datasets/pipelines/formating.py index a7d0876a040cc435060f9056c6343fc0fd9f7b3c..3811c61addc5060784cf69acc2613b9b33567ea9 100644 --- a/PyTorch/contrib/cv/video/SlowFast/mmaction/datasets/pipelines/formating.py +++ b/PyTorch/contrib/cv/video/SlowFast/mmaction/datasets/pipelines/formating.py @@ -1,378 +1,378 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -from collections.abc import Sequence - -import mmcv -import numpy as np -import torch -from mmcv.parallel import DataContainer as DC - -from ..builder import PIPELINES - - -def to_tensor(data): - """Convert objects of various python types to :obj:`torch.Tensor`. - - Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, - :class:`Sequence`, :class:`int` and :class:`float`. - """ - if isinstance(data, torch.Tensor): - return data - if isinstance(data, np.ndarray): - return torch.from_numpy(data) - if isinstance(data, Sequence) and not mmcv.is_str(data): - return torch.tensor(data) - if isinstance(data, int): - return torch.LongTensor([data]) - if isinstance(data, float): - return torch.FloatTensor([data]) - raise TypeError(f'type {type(data)} cannot be converted to tensor.') - - -@PIPELINES.register_module() -class ToTensor: - """Convert some values in results dict to `torch.Tensor` type in data - loader pipeline. - - Args: - keys (Sequence[str]): Required keys to be converted. - """ - def __init__(self, keys): - self.keys = keys - - def __call__(self, results): - """Performs the ToTensor formating. - - Args: - results (dict): The resulting dict to be modified and passed - to the next transform in pipeline. - """ - for key in self.keys: - results[key] = to_tensor(results[key]) - return results - - def __repr__(self): - return f'{self.__class__.__name__}(keys={self.keys})' - - -@PIPELINES.register_module() -class Rename: - """Rename the key in results. - - Args: - mapping (dict): The keys in results that need to be renamed. The key of - the dict is the original name, while the value is the new name. If - the original name not found in results, do nothing. - Default: dict(). - """ - def __init__(self, mapping): - self.mapping = mapping - - def __call__(self, results): - for key, value in self.mapping.items(): - if key in results: - assert isinstance(key, str) and isinstance(value, str) - assert value not in results, ('the new name already exists in ' - 'results') - results[value] = results[key] - results.pop(key) - return results - - -@PIPELINES.register_module() -class ToDataContainer: - """Convert the data to DataContainer. - - Args: - fields (Sequence[dict]): Required fields to be converted - with keys and attributes. E.g. - fields=(dict(key='gt_bbox', stack=False),). - Note that key can also be a list of keys, if so, every tensor in - the list will be converted to DataContainer. - """ - def __init__(self, fields): - self.fields = fields - - def __call__(self, results): - """Performs the ToDataContainer formating. - - Args: - results (dict): The resulting dict to be modified and passed - to the next transform in pipeline. - """ - for field in self.fields: - _field = field.copy() - key = _field.pop('key') - if isinstance(key, list): - for item in key: - results[item] = DC(results[item], **_field) - else: - results[key] = DC(results[key], **_field) - return results - - def __repr__(self): - return self.__class__.__name__ + f'(fields={self.fields})' - - -@PIPELINES.register_module() -class ImageToTensor: - """Convert image type to `torch.Tensor` type. - - Args: - keys (Sequence[str]): Required keys to be converted. - """ - def __init__(self, keys): - self.keys = keys - - def __call__(self, results): - """Performs the ImageToTensor formating. - - Args: - results (dict): The resulting dict to be modified and passed - to the next transform in pipeline. - """ - for key in self.keys: - results[key] = to_tensor(results[key].transpose(2, 0, 1)) - return results - - def __repr__(self): - return f'{self.__class__.__name__}(keys={self.keys})' - - -@PIPELINES.register_module() -class Transpose: - """Transpose image channels to a given order. - - Args: - keys (Sequence[str]): Required keys to be converted. - order (Sequence[int]): Image channel order. - """ - def __init__(self, keys, order): - self.keys = keys - self.order = order - - def __call__(self, results): - """Performs the Transpose formatting. - - Args: - results (dict): The resulting dict to be modified and passed - to the next transform in pipeline. - """ - for key in self.keys: - results[key] = results[key].transpose(self.order) - return results - - def __repr__(self): - return (f'{self.__class__.__name__}(' - f'keys={self.keys}, order={self.order})') - - -@PIPELINES.register_module() -class Collect: - """Collect data from the loader relevant to the specific task. - - This keeps the items in ``keys`` as it is, and collect items in - ``meta_keys`` into a meta item called ``meta_name``.This is usually - the last stage of the data loader pipeline. - For example, when keys='imgs', meta_keys=('filename', 'label', - 'original_shape'), meta_name='img_metas', the results will be a dict with - keys 'imgs' and 'img_metas', where 'img_metas' is a DataContainer of - another dict with keys 'filename', 'label', 'original_shape'. - - Args: - keys (Sequence[str]): Required keys to be collected. - meta_name (str): The name of the key that contains meta infomation. - This key is always populated. Default: "img_metas". - meta_keys (Sequence[str]): Keys that are collected under meta_name. - The contents of the ``meta_name`` dictionary depends on - ``meta_keys``. - By default this includes: - - - "filename": path to the image file - - "label": label of the image file - - "original_shape": original shape of the image as a tuple - (h, w, c) - - "img_shape": shape of the image input to the network as a tuple - (h, w, c). Note that images may be zero padded on the - bottom/right, if the batch tensor is larger than this shape. - - "pad_shape": image shape after padding - - "flip_direction": a str in ("horiziontal", "vertival") to - indicate if the image is fliped horizontally or vertically. - - "img_norm_cfg": a dict of normalization information: - - mean - per channel mean subtraction - - std - per channel std divisor - - to_rgb - bool indicating if bgr was converted to rgb - nested (bool): If set as True, will apply data[x] = [data[x]] to all - items in data. The arg is added for compatibility. Default: False. - """ - def __init__(self, - keys, - meta_keys=('filename', 'label', 'original_shape', 'img_shape', - 'pad_shape', 'flip_direction', 'img_norm_cfg'), - meta_name='img_metas', - nested=False): - self.keys = keys - self.meta_keys = meta_keys - self.meta_name = meta_name - self.nested = nested - - def __call__(self, results): - """Performs the Collect formating. - - Args: - results (dict): The resulting dict to be modified and passed - to the next transform in pipeline. - """ - data = {} - for key in self.keys: - data[key] = results[key] - - if len(self.meta_keys) != 0: - meta = {} - for key in self.meta_keys: - meta[key] = results[key] - data[self.meta_name] = DC(meta, cpu_only=True) - if self.nested: - for k in data: - data[k] = [data[k]] - - return data - - def __repr__(self): - return (f'{self.__class__.__name__}(' - f'keys={self.keys}, meta_keys={self.meta_keys}, ' - f'nested={self.nested})') - - -@PIPELINES.register_module() -class FormatShape: - """Format final imgs shape to the given input_format. - - Required keys are "imgs", "num_clips" and "clip_len", added or modified - keys are "imgs" and "input_shape". - - Args: - input_format (str): Define the final imgs format. - collapse (bool): To collpase input_format N... to ... (NCTHW to CTHW, - etc.) if N is 1. Should be set as True when training and testing - detectors. Default: False. - """ - def __init__(self, input_format, collapse=False): - self.input_format = input_format - self.collapse = collapse - if self.input_format not in ['NCTHW', 'NCHW', 'NCHW_Flow', 'NPTCHW']: - raise ValueError( - f'The input format {self.input_format} is invalid.') - - def __call__(self, results): - """Performs the FormatShape formating. - - Args: - results (dict): The resulting dict to be modified and passed - to the next transform in pipeline. - """ - if not isinstance(results['imgs'], np.ndarray): - results['imgs'] = np.array(results['imgs']) - imgs = results['imgs'] - # [M x H x W x C] - # M = 1 * N_crops * N_clips * L - if self.collapse: - assert results['num_clips'] == 1 - - if self.input_format == 'NCTHW': - num_clips = results['num_clips'] - clip_len = results['clip_len'] - - imgs = imgs.reshape((-1, num_clips, clip_len) + imgs.shape[1:]) - # N_crops x N_clips x L x H x W x C - imgs = np.transpose(imgs, (0, 1, 5, 2, 3, 4)) - # N_crops x N_clips x C x L x H x W - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - # M' x C x L x H x W - # M' = N_crops x N_clips - elif self.input_format == 'NCHW': - imgs = np.transpose(imgs, (0, 3, 1, 2)) - # M x C x H x W - elif self.input_format == 'NCHW_Flow': - num_clips = results['num_clips'] - clip_len = results['clip_len'] - imgs = imgs.reshape((-1, num_clips, clip_len) + imgs.shape[1:]) - # N_crops x N_clips x L x H x W x C - imgs = np.transpose(imgs, (0, 1, 2, 5, 3, 4)) - # N_crops x N_clips x L x C x H x W - imgs = imgs.reshape((-1, imgs.shape[2] * imgs.shape[3]) + - imgs.shape[4:]) - # M' x C' x H x W - # M' = N_crops x N_clips - # C' = L x C - elif self.input_format == 'NPTCHW': - num_proposals = results['num_proposals'] - num_clips = results['num_clips'] - clip_len = results['clip_len'] - imgs = imgs.reshape((num_proposals, num_clips * clip_len) + - imgs.shape[1:]) - # P x M x H x W x C - # M = N_clips x L - imgs = np.transpose(imgs, (0, 1, 4, 2, 3)) - # P x M x C x H x W - if self.collapse: - assert imgs.shape[0] == 1 - imgs = imgs.squeeze(0) - - results['imgs'] = imgs - results['input_shape'] = imgs.shape - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += f"(input_format='{self.input_format}')" - return repr_str - - -@PIPELINES.register_module() -class FormatAudioShape: - """Format final audio shape to the given input_format. - - Required keys are "imgs", "num_clips" and "clip_len", added or modified - keys are "imgs" and "input_shape". - - Args: - input_format (str): Define the final imgs format. - """ - def __init__(self, input_format): - self.input_format = input_format - if self.input_format not in ['NCTF']: - raise ValueError( - f'The input format {self.input_format} is invalid.') - - def __call__(self, results): - """Performs the FormatShape formatting. - - Args: - results (dict): The resulting dict to be modified and passed - to the next transform in pipeline. - """ - audios = results['audios'] - # clip x sample x freq -> clip x channel x sample x freq - clip, sample, freq = audios.shape - audios = audios.reshape(clip, 1, sample, freq) - results['audios'] = audios - results['input_shape'] = audios.shape - return results - - def __repr__(self): - repr_str = self.__class__.__name__ - repr_str += f"(input_format='{self.input_format}')" - return repr_str +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from collections.abc import Sequence + +import mmcv +import numpy as np +import torch +from mmcv.parallel import DataContainer as DC + +from ..builder import PIPELINES + + +def to_tensor(data): + """Convert objects of various python types to :obj:`torch.Tensor`. + + Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, + :class:`Sequence`, :class:`int` and :class:`float`. + """ + if isinstance(data, torch.Tensor): + return data + if isinstance(data, np.ndarray): + return torch.from_numpy(data) + if isinstance(data, Sequence) and not mmcv.is_str(data): + return torch.tensor(data) + if isinstance(data, int): + return torch.LongTensor([data]) + if isinstance(data, float): + return torch.FloatTensor([data]) + raise TypeError(f'type {type(data)} cannot be converted to tensor.') + + +@PIPELINES.register_module() +class ToTensor: + """Convert some values in results dict to `torch.Tensor` type in data + loader pipeline. + + Args: + keys (Sequence[str]): Required keys to be converted. + """ + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + """Performs the ToTensor formating. + + Args: + results (dict): The resulting dict to be modified and passed + to the next transform in pipeline. + """ + for key in self.keys: + results[key] = to_tensor(results[key]) + return results + + def __repr__(self): + return f'{self.__class__.__name__}(keys={self.keys})' + + +@PIPELINES.register_module() +class Rename: + """Rename the key in results. + + Args: + mapping (dict): The keys in results that need to be renamed. The key of + the dict is the original name, while the value is the new name. If + the original name not found in results, do nothing. + Default: dict(). + """ + def __init__(self, mapping): + self.mapping = mapping + + def __call__(self, results): + for key, value in self.mapping.items(): + if key in results: + assert isinstance(key, str) and isinstance(value, str) + assert value not in results, ('the new name already exists in ' + 'results') + results[value] = results[key] + results.pop(key) + return results + + +@PIPELINES.register_module() +class ToDataContainer: + """Convert the data to DataContainer. + + Args: + fields (Sequence[dict]): Required fields to be converted + with keys and attributes. E.g. + fields=(dict(key='gt_bbox', stack=False),). + Note that key can also be a list of keys, if so, every tensor in + the list will be converted to DataContainer. + """ + def __init__(self, fields): + self.fields = fields + + def __call__(self, results): + """Performs the ToDataContainer formating. + + Args: + results (dict): The resulting dict to be modified and passed + to the next transform in pipeline. + """ + for field in self.fields: + _field = field.copy() + key = _field.pop('key') + if isinstance(key, list): + for item in key: + results[item] = DC(results[item], **_field) + else: + results[key] = DC(results[key], **_field) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(fields={self.fields})' + + +@PIPELINES.register_module() +class ImageToTensor: + """Convert image type to `torch.Tensor` type. + + Args: + keys (Sequence[str]): Required keys to be converted. + """ + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + """Performs the ImageToTensor formating. + + Args: + results (dict): The resulting dict to be modified and passed + to the next transform in pipeline. + """ + for key in self.keys: + results[key] = to_tensor(results[key].transpose(2, 0, 1)) + return results + + def __repr__(self): + return f'{self.__class__.__name__}(keys={self.keys})' + + +@PIPELINES.register_module() +class Transpose: + """Transpose image channels to a given order. + + Args: + keys (Sequence[str]): Required keys to be converted. + order (Sequence[int]): Image channel order. + """ + def __init__(self, keys, order): + self.keys = keys + self.order = order + + def __call__(self, results): + """Performs the Transpose formatting. + + Args: + results (dict): The resulting dict to be modified and passed + to the next transform in pipeline. + """ + for key in self.keys: + results[key] = results[key].transpose(self.order) + return results + + def __repr__(self): + return (f'{self.__class__.__name__}(' + f'keys={self.keys}, order={self.order})') + + +@PIPELINES.register_module() +class Collect: + """Collect data from the loader relevant to the specific task. + + This keeps the items in ``keys`` as it is, and collect items in + ``meta_keys`` into a meta item called ``meta_name``.This is usually + the last stage of the data loader pipeline. + For example, when keys='imgs', meta_keys=('filename', 'label', + 'original_shape'), meta_name='img_metas', the results will be a dict with + keys 'imgs' and 'img_metas', where 'img_metas' is a DataContainer of + another dict with keys 'filename', 'label', 'original_shape'. + + Args: + keys (Sequence[str]): Required keys to be collected. + meta_name (str): The name of the key that contains meta infomation. + This key is always populated. Default: "img_metas". + meta_keys (Sequence[str]): Keys that are collected under meta_name. + The contents of the ``meta_name`` dictionary depends on + ``meta_keys``. + By default this includes: + + - "filename": path to the image file + - "label": label of the image file + - "original_shape": original shape of the image as a tuple + (h, w, c) + - "img_shape": shape of the image input to the network as a tuple + (h, w, c). Note that images may be zero padded on the + bottom/right, if the batch tensor is larger than this shape. + - "pad_shape": image shape after padding + - "flip_direction": a str in ("horiziontal", "vertival") to + indicate if the image is fliped horizontally or vertically. + - "img_norm_cfg": a dict of normalization information: + - mean - per channel mean subtraction + - std - per channel std divisor + - to_rgb - bool indicating if bgr was converted to rgb + nested (bool): If set as True, will apply data[x] = [data[x]] to all + items in data. The arg is added for compatibility. Default: False. + """ + def __init__(self, + keys, + meta_keys=('filename', 'label', 'original_shape', 'img_shape', + 'pad_shape', 'flip_direction', 'img_norm_cfg'), + meta_name='img_metas', + nested=False): + self.keys = keys + self.meta_keys = meta_keys + self.meta_name = meta_name + self.nested = nested + + def __call__(self, results): + """Performs the Collect formating. + + Args: + results (dict): The resulting dict to be modified and passed + to the next transform in pipeline. + """ + data = {} + for key in self.keys: + data[key] = results[key] + + if len(self.meta_keys) != 0: + meta = {} + for key in self.meta_keys: + meta[key] = results[key] + data[self.meta_name] = DC(meta, cpu_only=True) + if self.nested: + for k in data: + data[k] = [data[k]] + + return data + + def __repr__(self): + return (f'{self.__class__.__name__}(' + f'keys={self.keys}, meta_keys={self.meta_keys}, ' + f'nested={self.nested})') + + +@PIPELINES.register_module() +class FormatShape: + """Format final imgs shape to the given input_format. + + Required keys are "imgs", "num_clips" and "clip_len", added or modified + keys are "imgs" and "input_shape". + + Args: + input_format (str): Define the final imgs format. + collapse (bool): To collpase input_format N... to ... (NCTHW to CTHW, + etc.) if N is 1. Should be set as True when training and testing + detectors. Default: False. + """ + def __init__(self, input_format, collapse=False): + self.input_format = input_format + self.collapse = collapse + if self.input_format not in ['NCTHW', 'NCHW', 'NCHW_Flow', 'NPTCHW']: + raise ValueError( + f'The input format {self.input_format} is invalid.') + + def __call__(self, results): + """Performs the FormatShape formating. + + Args: + results (dict): The resulting dict to be modified and passed + to the next transform in pipeline. + """ + if not isinstance(results['imgs'], np.ndarray): + results['imgs'] = np.array(results['imgs']) + imgs = results['imgs'] + # [M x H x W x C] + # M = 1 * N_crops * N_clips * L + if self.collapse: + assert results['num_clips'] == 1 + + if self.input_format == 'NCTHW': + num_clips = results['num_clips'] + clip_len = results['clip_len'] + + imgs = imgs.reshape((-1, num_clips, clip_len) + imgs.shape[1:]) + # N_crops x N_clips x L x H x W x C + imgs = np.transpose(imgs, (0, 1, 5, 2, 3, 4)) + # N_crops x N_clips x C x L x H x W + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + # M' x C x L x H x W + # M' = N_crops x N_clips + elif self.input_format == 'NCHW': + imgs = np.transpose(imgs, (0, 3, 1, 2)) + # M x C x H x W + elif self.input_format == 'NCHW_Flow': + num_clips = results['num_clips'] + clip_len = results['clip_len'] + imgs = imgs.reshape((-1, num_clips, clip_len) + imgs.shape[1:]) + # N_crops x N_clips x L x H x W x C + imgs = np.transpose(imgs, (0, 1, 2, 5, 3, 4)) + # N_crops x N_clips x L x C x H x W + imgs = imgs.reshape((-1, imgs.shape[2] * imgs.shape[3]) + + imgs.shape[4:]) + # M' x C' x H x W + # M' = N_crops x N_clips + # C' = L x C + elif self.input_format == 'NPTCHW': + num_proposals = results['num_proposals'] + num_clips = results['num_clips'] + clip_len = results['clip_len'] + imgs = imgs.reshape((num_proposals, num_clips * clip_len) + + imgs.shape[1:]) + # P x M x H x W x C + # M = N_clips x L + imgs = np.transpose(imgs, (0, 1, 4, 2, 3)) + # P x M x C x H x W + if self.collapse: + assert imgs.shape[0] == 1 + imgs = imgs.squeeze(0) + + results['imgs'] = imgs + results['input_shape'] = imgs.shape + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f"(input_format='{self.input_format}')" + return repr_str + + +@PIPELINES.register_module() +class FormatAudioShape: + """Format final audio shape to the given input_format. + + Required keys are "imgs", "num_clips" and "clip_len", added or modified + keys are "imgs" and "input_shape". + + Args: + input_format (str): Define the final imgs format. + """ + def __init__(self, input_format): + self.input_format = input_format + if self.input_format not in ['NCTF']: + raise ValueError( + f'The input format {self.input_format} is invalid.') + + def __call__(self, results): + """Performs the FormatShape formatting. + + Args: + results (dict): The resulting dict to be modified and passed + to the next transform in pipeline. + """ + audios = results['audios'] + # clip x sample x freq -> clip x channel x sample x freq + clip, sample, freq = audios.shape + audios = audios.reshape(clip, 1, sample, freq) + results['audios'] = audios + results['input_shape'] = audios.shape + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f"(input_format='{self.input_format}')" + return repr_str diff --git a/PyTorch/contrib/cv/video/SlowFast/mmaction/models/backbones/resnet3d_slowfast.py b/PyTorch/contrib/cv/video/SlowFast/mmaction/models/backbones/resnet3d_slowfast.py index a5d23bb38eddec07ec89262f4e7a446a17b429fa..e61842a2bab64177f4f1b83b30430833911ee16a 100644 --- a/PyTorch/contrib/cv/video/SlowFast/mmaction/models/backbones/resnet3d_slowfast.py +++ b/PyTorch/contrib/cv/video/SlowFast/mmaction/models/backbones/resnet3d_slowfast.py @@ -1,531 +1,531 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import warnings - -import torch -import torch.nn as nn -from mmcv.cnn import ConvModule, kaiming_init -from mmcv.runner import _load_checkpoint, load_checkpoint -from mmcv.utils import print_log - -from ...utils import get_root_logger -from ..builder import BACKBONES -from .resnet3d import ResNet3d - -mmdet_imported = False - - -class ResNet3dPathway(ResNet3d): - """A pathway of Slowfast based on ResNet3d. - - Args: - *args (arguments): Arguments same as :class:``ResNet3d``. - lateral (bool): Determines whether to enable the lateral connection - from another pathway. Default: False. - speed_ratio (int): Speed ratio indicating the ratio between time - dimension of the fast and slow pathway, corresponding to the - ``alpha`` in the paper. Default: 8. - channel_ratio (int): Reduce the channel number of fast pathway - by ``channel_ratio``, corresponding to ``beta`` in the paper. - Default: 8. - fusion_kernel (int): The kernel size of lateral fusion. - Default: 5. - **kwargs (keyword arguments): Keywords arguments for ResNet3d. - """ - def __init__(self, - *args, - lateral=False, - speed_ratio=8, - channel_ratio=8, - fusion_kernel=5, - **kwargs): - self.lateral = lateral - self.speed_ratio = speed_ratio - self.channel_ratio = channel_ratio - self.fusion_kernel = fusion_kernel - super().__init__(*args, **kwargs) - self.inplanes = self.base_channels - if self.lateral: - self.conv1_lateral = ConvModule( - self.inplanes // self.channel_ratio, - # https://arxiv.org/abs/1812.03982, the - # third type of lateral connection has out_channel: - # 2 * \beta * C - self.inplanes * 2 // self.channel_ratio, - kernel_size=(fusion_kernel, 1, 1), - stride=(self.speed_ratio, 1, 1), - padding=((fusion_kernel - 1) // 2, 0, 0), - bias=False, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=None) - - self.lateral_connections = [] - for i in range(len(self.stage_blocks)): - planes = self.base_channels * 2**i - self.inplanes = planes * self.block.expansion - - if lateral and i != self.num_stages - 1: - # no lateral connection needed in final stage - lateral_name = f'layer{(i + 1)}_lateral' - setattr( - self, lateral_name, - ConvModule(self.inplanes // self.channel_ratio, - self.inplanes * 2 // self.channel_ratio, - kernel_size=(fusion_kernel, 1, 1), - stride=(self.speed_ratio, 1, 1), - padding=((fusion_kernel - 1) // 2, 0, 0), - bias=False, - conv_cfg=self.conv_cfg, - norm_cfg=self.norm_cfg, - act_cfg=None)) - self.lateral_connections.append(lateral_name) - - def make_res_layer(self, - block, - inplanes, - planes, - blocks, - spatial_stride=1, - temporal_stride=1, - dilation=1, - style='pytorch', - inflate=1, - inflate_style='3x1x1', - non_local=0, - non_local_cfg=dict(), - conv_cfg=None, - norm_cfg=None, - act_cfg=None, - with_cp=False): - """Build residual layer for Slowfast. - - Args: - block (nn.Module): Residual module to be built. - inplanes (int): Number of channels for the input - feature in each block. - planes (int): Number of channels for the output - feature in each block. - blocks (int): Number of residual blocks. - spatial_stride (int | Sequence[int]): Spatial strides - in residual and conv layers. Default: 1. - temporal_stride (int | Sequence[int]): Temporal strides in - residual and conv layers. Default: 1. - dilation (int): Spacing between kernel elements. Default: 1. - style (str): ``pytorch`` or ``caffe``. If set to ``pytorch``, - the stride-two layer is the 3x3 conv layer, - otherwise the stride-two layer is the first 1x1 conv layer. - Default: ``pytorch``. - inflate (int | Sequence[int]): Determine whether to inflate - for each block. Default: 1. - inflate_style (str): ``3x1x1`` or ``3x3x3``. which determines - the kernel sizes and padding strides for conv1 and - conv2 in each block. Default: ``3x1x1``. - non_local (int | Sequence[int]): Determine whether to apply - non-local module in the corresponding block of each stages. - Default: 0. - non_local_cfg (dict): Config for non-local module. - Default: ``dict()``. - conv_cfg (dict | None): Config for conv layers. Default: None. - norm_cfg (dict | None): Config for norm layers. Default: None. - act_cfg (dict | None): Config for activate layers. Default: None. - with_cp (bool): Use checkpoint or not. Using checkpoint will save - some memory while slowing down the training speed. - Default: False. - - Returns: - nn.Module: A residual layer for the given config. - """ - inflate = inflate if not isinstance(inflate, - int) else (inflate, ) * blocks - non_local = non_local if not isinstance( - non_local, int) else (non_local, ) * blocks - assert len(inflate) == blocks and len(non_local) == blocks - if self.lateral: - lateral_inplanes = inplanes * 2 // self.channel_ratio - else: - lateral_inplanes = 0 - if (spatial_stride != 1 - or (inplanes + lateral_inplanes) != planes * block.expansion): - downsample = ConvModule(inplanes + lateral_inplanes, - planes * block.expansion, - kernel_size=1, - stride=(temporal_stride, spatial_stride, - spatial_stride), - bias=False, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=None) - else: - downsample = None - - layers = [] - layers.append( - block(inplanes + lateral_inplanes, - planes, - spatial_stride, - temporal_stride, - dilation, - downsample, - style=style, - inflate=(inflate[0] == 1), - inflate_style=inflate_style, - non_local=(non_local[0] == 1), - non_local_cfg=non_local_cfg, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - with_cp=with_cp)) - inplanes = planes * block.expansion - - for i in range(1, blocks): - layers.append( - block(inplanes, - planes, - 1, - 1, - dilation, - style=style, - inflate=(inflate[i] == 1), - inflate_style=inflate_style, - non_local=(non_local[i] == 1), - non_local_cfg=non_local_cfg, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg, - with_cp=with_cp)) - - return nn.Sequential(*layers) - - def inflate_weights(self, logger): - """Inflate the resnet2d parameters to resnet3d pathway. - - The differences between resnet3d and resnet2d mainly lie in an extra - axis of conv kernel. To utilize the pretrained parameters in 2d model, - the weight of conv2d models should be inflated to fit in the shapes of - the 3d counterpart. For pathway the ``lateral_connection`` part should - not be inflated from 2d weights. - - Args: - logger (logging.Logger): The logger used to print - debugging infomation. - """ - - state_dict_r2d = _load_checkpoint(self.pretrained) - if 'state_dict' in state_dict_r2d: - state_dict_r2d = state_dict_r2d['state_dict'] - - inflated_param_names = [] - for name, module in self.named_modules(): - if 'lateral' in name: - continue - if isinstance(module, ConvModule): - # we use a ConvModule to wrap conv+bn+relu layers, thus the - # name mapping is needed - if 'downsample' in name: - # layer{X}.{Y}.downsample.conv->layer{X}.{Y}.downsample.0 - original_conv_name = name + '.0' - # layer{X}.{Y}.downsample.bn->layer{X}.{Y}.downsample.1 - original_bn_name = name + '.1' - else: - # layer{X}.{Y}.conv{n}.conv->layer{X}.{Y}.conv{n} - original_conv_name = name - # layer{X}.{Y}.conv{n}.bn->layer{X}.{Y}.bn{n} - original_bn_name = name.replace('conv', 'bn') - if original_conv_name + '.weight' not in state_dict_r2d: - logger.warning(f'Module not exist in the state_dict_r2d' - f': {original_conv_name}') - else: - self._inflate_conv_params(module.conv, state_dict_r2d, - original_conv_name, - inflated_param_names) - if original_bn_name + '.weight' not in state_dict_r2d: - logger.warning(f'Module not exist in the state_dict_r2d' - f': {original_bn_name}') - else: - self._inflate_bn_params(module.bn, state_dict_r2d, - original_bn_name, - inflated_param_names) - - # check if any parameters in the 2d checkpoint are not loaded - remaining_names = set( - state_dict_r2d.keys()) - set(inflated_param_names) - if remaining_names: - logger.info(f'These parameters in the 2d checkpoint are not loaded' - f': {remaining_names}') - - def _inflate_conv_params(self, conv3d, state_dict_2d, module_name_2d, - inflated_param_names): - """Inflate a conv module from 2d to 3d. - - The differences of conv modules betweene 2d and 3d in Pathway - mainly lie in the inplanes due to lateral connections. To fit the - shapes of the lateral connection counterpart, it will expand - parameters by concatting conv2d parameters and extra zero paddings. - - Args: - conv3d (nn.Module): The destination conv3d module. - state_dict_2d (OrderedDict): The state dict of pretrained 2d model. - module_name_2d (str): The name of corresponding conv module in the - 2d model. - inflated_param_names (list[str]): List of parameters that have been - inflated. - """ - weight_2d_name = module_name_2d + '.weight' - conv2d_weight = state_dict_2d[weight_2d_name] - old_shape = conv2d_weight.shape - new_shape = conv3d.weight.data.shape - kernel_t = new_shape[2] - - if new_shape[1] != old_shape[1]: - if new_shape[1] < old_shape[1]: - warnings.warn(f'The parameter of {module_name_2d} is not' - 'loaded due to incompatible shapes. ') - return - # Inplanes may be different due to lateral connections - new_channels = new_shape[1] - old_shape[1] - pad_shape = old_shape - pad_shape = pad_shape[:1] + (new_channels, ) + pad_shape[2:] - # Expand parameters by concat extra channels - conv2d_weight = torch.cat( - (conv2d_weight, - torch.zeros(pad_shape).type_as(conv2d_weight).to( - conv2d_weight.device)), - dim=1) - - new_weight = conv2d_weight.data.unsqueeze(2).expand_as( - conv3d.weight) / kernel_t - conv3d.weight.data.copy_(new_weight) - inflated_param_names.append(weight_2d_name) - - if getattr(conv3d, 'bias') is not None: - bias_2d_name = module_name_2d + '.bias' - conv3d.bias.data.copy_(state_dict_2d[bias_2d_name]) - inflated_param_names.append(bias_2d_name) - - def _freeze_stages(self): - """Prevent all the parameters from being optimized before - `self.frozen_stages`.""" - if self.frozen_stages >= 0: - self.conv1.eval() - for param in self.conv1.parameters(): - param.requires_grad = False - - for i in range(1, self.frozen_stages + 1): - m = getattr(self, f'layer{i}') - m.eval() - for param in m.parameters(): - param.requires_grad = False - - if i != len(self.res_layers) and self.lateral: - # No fusion needed in the final stage - lateral_name = self.lateral_connections[i - 1] - conv_lateral = getattr(self, lateral_name) - conv_lateral.eval() - for param in conv_lateral.parameters(): - param.requires_grad = False - - def init_weights(self, pretrained=None): - """Initiate the parameters either from existing checkpoint or from - scratch.""" - if pretrained: - self.pretrained = pretrained - - # Override the init_weights of i3d - super().init_weights() - for module_name in self.lateral_connections: - layer = getattr(self, module_name) - for m in layer.modules(): - if isinstance(m, (nn.Conv3d, nn.Conv2d)): - kaiming_init(m) - - -pathway_cfg = { - 'resnet3d': ResNet3dPathway, - # TODO: BNInceptionPathway -} - - -def build_pathway(cfg, *args, **kwargs): - """Build pathway. - - Args: - cfg (None or dict): cfg should contain: - - type (str): identify conv layer type. - - Returns: - nn.Module: Created pathway. - """ - if not (isinstance(cfg, dict) and 'type' in cfg): - raise TypeError('cfg must be a dict containing the key "type"') - cfg_ = cfg.copy() - - pathway_type = cfg_.pop('type') - if pathway_type not in pathway_cfg: - raise KeyError(f'Unrecognized pathway type {pathway_type}') - - pathway_cls = pathway_cfg[pathway_type] - pathway = pathway_cls(*args, **kwargs, **cfg_) - - return pathway - - -@BACKBONES.register_module() -class ResNet3dSlowFast(nn.Module): - """Slowfast backbone. - - This module is proposed in `SlowFast Networks for Video Recognition - `_ - - Args: - pretrained (str): The file path to a pretrained model. - resample_rate (int): A large temporal stride ``resample_rate`` - on input frames. The actual resample rate is calculated by - multipling the ``interval`` in ``SampleFrames`` in the - pipeline with ``resample_rate``, equivalent to the :math:`\\tau` - in the paper, i.e. it processes only one out of - ``resample_rate * interval`` frames. Default: 8. - speed_ratio (int): Speed ratio indicating the ratio between time - dimension of the fast and slow pathway, corresponding to the - :math:`\\alpha` in the paper. Default: 8. - channel_ratio (int): Reduce the channel number of fast pathway - by ``channel_ratio``, corresponding to :math:`\\beta` in the paper. - Default: 8. - slow_pathway (dict): Configuration of slow branch, should contain - necessary arguments for building the specific type of pathway - and: - type (str): type of backbone the pathway bases on. - lateral (bool): determine whether to build lateral connection - for the pathway.Default: - - .. code-block:: Python - - dict(type='ResNetPathway', - lateral=True, depth=50, pretrained=None, - conv1_kernel=(1, 7, 7), dilations=(1, 1, 1, 1), - conv1_stride_t=1, pool1_stride_t=1, inflate=(0, 0, 1, 1)) - - fast_pathway (dict): Configuration of fast branch, similar to - `slow_pathway`. Default: - - .. code-block:: Python - - dict(type='ResNetPathway', - lateral=False, depth=50, pretrained=None, base_channels=8, - conv1_kernel=(5, 7, 7), conv1_stride_t=1, pool1_stride_t=1) - """ - def __init__(self, - pretrained, - resample_rate=8, - speed_ratio=8, - channel_ratio=8, - slow_pathway=dict(type='resnet3d', - depth=50, - pretrained=None, - lateral=True, - conv1_kernel=(1, 7, 7), - dilations=(1, 1, 1, 1), - conv1_stride_t=1, - pool1_stride_t=1, - inflate=(0, 0, 1, 1)), - fast_pathway=dict(type='resnet3d', - depth=50, - pretrained=None, - lateral=False, - base_channels=8, - conv1_kernel=(5, 7, 7), - conv1_stride_t=1, - pool1_stride_t=1)): - super().__init__() - self.pretrained = pretrained - self.resample_rate = resample_rate - self.speed_ratio = speed_ratio - self.channel_ratio = channel_ratio - - if slow_pathway['lateral']: - slow_pathway['speed_ratio'] = speed_ratio - slow_pathway['channel_ratio'] = channel_ratio - - self.slow_path = build_pathway(slow_pathway) - self.fast_path = build_pathway(fast_pathway) - - def init_weights(self, pretrained=None): - """Initiate the parameters either from existing checkpoint or from - scratch.""" - if pretrained: - self.pretrained = pretrained - - if isinstance(self.pretrained, str): - logger = get_root_logger() - msg = f'load model from: {self.pretrained}' - print_log(msg, logger=logger) - # Directly load 3D model. - load_checkpoint(self, self.pretrained, strict=True, logger=logger) - elif self.pretrained is None: - # Init two branch seperately. - self.fast_path.init_weights() - self.slow_path.init_weights() - else: - raise TypeError('pretrained must be a str or None') - - def forward(self, x): - """Defines the computation performed at every call. - - Args: - x (torch.Tensor): The input data. - - Returns: - tuple[torch.Tensor]: The feature of the input samples extracted - by the backbone. - """ - x_slow = nn.functional.interpolate( - x, - mode='nearest', - scale_factor=(1.0 / self.resample_rate, 1.0, 1.0)) - x_slow = self.slow_path.conv1(x_slow) - x_slow = self.slow_path.maxpool(x_slow) - - x_fast = nn.functional.interpolate( - x, - mode='nearest', - scale_factor=(1.0 / (self.resample_rate // self.speed_ratio), 1.0, - 1.0)) - x_fast = self.fast_path.conv1(x_fast) - x_fast = self.fast_path.maxpool(x_fast) - - if self.slow_path.lateral: - x_fast_lateral = self.slow_path.conv1_lateral(x_fast) - x_slow = torch.cat((x_slow, x_fast_lateral), dim=1) - - for i, layer_name in enumerate(self.slow_path.res_layers): - res_layer = getattr(self.slow_path, layer_name) - x_slow = res_layer(x_slow) - res_layer_fast = getattr(self.fast_path, layer_name) - x_fast = res_layer_fast(x_fast) - if (i != len(self.slow_path.res_layers) - 1 - and self.slow_path.lateral): - # No fusion needed in the final stage - lateral_name = self.slow_path.lateral_connections[i] - conv_lateral = getattr(self.slow_path, lateral_name) - x_fast_lateral = conv_lateral(x_fast) - x_slow = torch.cat((x_slow, x_fast_lateral), dim=1) - - out = (x_slow, x_fast) - - return out - - -if mmdet_imported: - MMDET_BACKBONES.register_module()(ResNet3dSlowFast) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import warnings + +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, kaiming_init +from mmcv.runner import _load_checkpoint, load_checkpoint +from mmcv.utils import print_log + +from ...utils import get_root_logger +from ..builder import BACKBONES +from .resnet3d import ResNet3d + +mmdet_imported = False + + +class ResNet3dPathway(ResNet3d): + """A pathway of Slowfast based on ResNet3d. + + Args: + *args (arguments): Arguments same as :class:``ResNet3d``. + lateral (bool): Determines whether to enable the lateral connection + from another pathway. Default: False. + speed_ratio (int): Speed ratio indicating the ratio between time + dimension of the fast and slow pathway, corresponding to the + ``alpha`` in the paper. Default: 8. + channel_ratio (int): Reduce the channel number of fast pathway + by ``channel_ratio``, corresponding to ``beta`` in the paper. + Default: 8. + fusion_kernel (int): The kernel size of lateral fusion. + Default: 5. + **kwargs (keyword arguments): Keywords arguments for ResNet3d. + """ + def __init__(self, + *args, + lateral=False, + speed_ratio=8, + channel_ratio=8, + fusion_kernel=5, + **kwargs): + self.lateral = lateral + self.speed_ratio = speed_ratio + self.channel_ratio = channel_ratio + self.fusion_kernel = fusion_kernel + super().__init__(*args, **kwargs) + self.inplanes = self.base_channels + if self.lateral: + self.conv1_lateral = ConvModule( + self.inplanes // self.channel_ratio, + # https://arxiv.org/abs/1812.03982, the + # third type of lateral connection has out_channel: + # 2 * \beta * C + self.inplanes * 2 // self.channel_ratio, + kernel_size=(fusion_kernel, 1, 1), + stride=(self.speed_ratio, 1, 1), + padding=((fusion_kernel - 1) // 2, 0, 0), + bias=False, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + + self.lateral_connections = [] + for i in range(len(self.stage_blocks)): + planes = self.base_channels * 2**i + self.inplanes = planes * self.block.expansion + + if lateral and i != self.num_stages - 1: + # no lateral connection needed in final stage + lateral_name = f'layer{(i + 1)}_lateral' + setattr( + self, lateral_name, + ConvModule(self.inplanes // self.channel_ratio, + self.inplanes * 2 // self.channel_ratio, + kernel_size=(fusion_kernel, 1, 1), + stride=(self.speed_ratio, 1, 1), + padding=((fusion_kernel - 1) // 2, 0, 0), + bias=False, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None)) + self.lateral_connections.append(lateral_name) + + def make_res_layer(self, + block, + inplanes, + planes, + blocks, + spatial_stride=1, + temporal_stride=1, + dilation=1, + style='pytorch', + inflate=1, + inflate_style='3x1x1', + non_local=0, + non_local_cfg=dict(), + conv_cfg=None, + norm_cfg=None, + act_cfg=None, + with_cp=False): + """Build residual layer for Slowfast. + + Args: + block (nn.Module): Residual module to be built. + inplanes (int): Number of channels for the input + feature in each block. + planes (int): Number of channels for the output + feature in each block. + blocks (int): Number of residual blocks. + spatial_stride (int | Sequence[int]): Spatial strides + in residual and conv layers. Default: 1. + temporal_stride (int | Sequence[int]): Temporal strides in + residual and conv layers. Default: 1. + dilation (int): Spacing between kernel elements. Default: 1. + style (str): ``pytorch`` or ``caffe``. If set to ``pytorch``, + the stride-two layer is the 3x3 conv layer, + otherwise the stride-two layer is the first 1x1 conv layer. + Default: ``pytorch``. + inflate (int | Sequence[int]): Determine whether to inflate + for each block. Default: 1. + inflate_style (str): ``3x1x1`` or ``3x3x3``. which determines + the kernel sizes and padding strides for conv1 and + conv2 in each block. Default: ``3x1x1``. + non_local (int | Sequence[int]): Determine whether to apply + non-local module in the corresponding block of each stages. + Default: 0. + non_local_cfg (dict): Config for non-local module. + Default: ``dict()``. + conv_cfg (dict | None): Config for conv layers. Default: None. + norm_cfg (dict | None): Config for norm layers. Default: None. + act_cfg (dict | None): Config for activate layers. Default: None. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. + Default: False. + + Returns: + nn.Module: A residual layer for the given config. + """ + inflate = inflate if not isinstance(inflate, + int) else (inflate, ) * blocks + non_local = non_local if not isinstance( + non_local, int) else (non_local, ) * blocks + assert len(inflate) == blocks and len(non_local) == blocks + if self.lateral: + lateral_inplanes = inplanes * 2 // self.channel_ratio + else: + lateral_inplanes = 0 + if (spatial_stride != 1 + or (inplanes + lateral_inplanes) != planes * block.expansion): + downsample = ConvModule(inplanes + lateral_inplanes, + planes * block.expansion, + kernel_size=1, + stride=(temporal_stride, spatial_stride, + spatial_stride), + bias=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + else: + downsample = None + + layers = [] + layers.append( + block(inplanes + lateral_inplanes, + planes, + spatial_stride, + temporal_stride, + dilation, + downsample, + style=style, + inflate=(inflate[0] == 1), + inflate_style=inflate_style, + non_local=(non_local[0] == 1), + non_local_cfg=non_local_cfg, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp)) + inplanes = planes * block.expansion + + for i in range(1, blocks): + layers.append( + block(inplanes, + planes, + 1, + 1, + dilation, + style=style, + inflate=(inflate[i] == 1), + inflate_style=inflate_style, + non_local=(non_local[i] == 1), + non_local_cfg=non_local_cfg, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp)) + + return nn.Sequential(*layers) + + def inflate_weights(self, logger): + """Inflate the resnet2d parameters to resnet3d pathway. + + The differences between resnet3d and resnet2d mainly lie in an extra + axis of conv kernel. To utilize the pretrained parameters in 2d model, + the weight of conv2d models should be inflated to fit in the shapes of + the 3d counterpart. For pathway the ``lateral_connection`` part should + not be inflated from 2d weights. + + Args: + logger (logging.Logger): The logger used to print + debugging infomation. + """ + + state_dict_r2d = _load_checkpoint(self.pretrained) + if 'state_dict' in state_dict_r2d: + state_dict_r2d = state_dict_r2d['state_dict'] + + inflated_param_names = [] + for name, module in self.named_modules(): + if 'lateral' in name: + continue + if isinstance(module, ConvModule): + # we use a ConvModule to wrap conv+bn+relu layers, thus the + # name mapping is needed + if 'downsample' in name: + # layer{X}.{Y}.downsample.conv->layer{X}.{Y}.downsample.0 + original_conv_name = name + '.0' + # layer{X}.{Y}.downsample.bn->layer{X}.{Y}.downsample.1 + original_bn_name = name + '.1' + else: + # layer{X}.{Y}.conv{n}.conv->layer{X}.{Y}.conv{n} + original_conv_name = name + # layer{X}.{Y}.conv{n}.bn->layer{X}.{Y}.bn{n} + original_bn_name = name.replace('conv', 'bn') + if original_conv_name + '.weight' not in state_dict_r2d: + logger.warning(f'Module not exist in the state_dict_r2d' + f': {original_conv_name}') + else: + self._inflate_conv_params(module.conv, state_dict_r2d, + original_conv_name, + inflated_param_names) + if original_bn_name + '.weight' not in state_dict_r2d: + logger.warning(f'Module not exist in the state_dict_r2d' + f': {original_bn_name}') + else: + self._inflate_bn_params(module.bn, state_dict_r2d, + original_bn_name, + inflated_param_names) + + # check if any parameters in the 2d checkpoint are not loaded + remaining_names = set( + state_dict_r2d.keys()) - set(inflated_param_names) + if remaining_names: + logger.info(f'These parameters in the 2d checkpoint are not loaded' + f': {remaining_names}') + + def _inflate_conv_params(self, conv3d, state_dict_2d, module_name_2d, + inflated_param_names): + """Inflate a conv module from 2d to 3d. + + The differences of conv modules betweene 2d and 3d in Pathway + mainly lie in the inplanes due to lateral connections. To fit the + shapes of the lateral connection counterpart, it will expand + parameters by concatting conv2d parameters and extra zero paddings. + + Args: + conv3d (nn.Module): The destination conv3d module. + state_dict_2d (OrderedDict): The state dict of pretrained 2d model. + module_name_2d (str): The name of corresponding conv module in the + 2d model. + inflated_param_names (list[str]): List of parameters that have been + inflated. + """ + weight_2d_name = module_name_2d + '.weight' + conv2d_weight = state_dict_2d[weight_2d_name] + old_shape = conv2d_weight.shape + new_shape = conv3d.weight.data.shape + kernel_t = new_shape[2] + + if new_shape[1] != old_shape[1]: + if new_shape[1] < old_shape[1]: + warnings.warn(f'The parameter of {module_name_2d} is not' + 'loaded due to incompatible shapes. ') + return + # Inplanes may be different due to lateral connections + new_channels = new_shape[1] - old_shape[1] + pad_shape = old_shape + pad_shape = pad_shape[:1] + (new_channels, ) + pad_shape[2:] + # Expand parameters by concat extra channels + conv2d_weight = torch.cat( + (conv2d_weight, + torch.zeros(pad_shape).type_as(conv2d_weight).to( + conv2d_weight.device)), + dim=1) + + new_weight = conv2d_weight.data.unsqueeze(2).expand_as( + conv3d.weight) / kernel_t + conv3d.weight.data.copy_(new_weight) + inflated_param_names.append(weight_2d_name) + + if getattr(conv3d, 'bias') is not None: + bias_2d_name = module_name_2d + '.bias' + conv3d.bias.data.copy_(state_dict_2d[bias_2d_name]) + inflated_param_names.append(bias_2d_name) + + def _freeze_stages(self): + """Prevent all the parameters from being optimized before + `self.frozen_stages`.""" + if self.frozen_stages >= 0: + self.conv1.eval() + for param in self.conv1.parameters(): + param.requires_grad = False + + for i in range(1, self.frozen_stages + 1): + m = getattr(self, f'layer{i}') + m.eval() + for param in m.parameters(): + param.requires_grad = False + + if i != len(self.res_layers) and self.lateral: + # No fusion needed in the final stage + lateral_name = self.lateral_connections[i - 1] + conv_lateral = getattr(self, lateral_name) + conv_lateral.eval() + for param in conv_lateral.parameters(): + param.requires_grad = False + + def init_weights(self, pretrained=None): + """Initiate the parameters either from existing checkpoint or from + scratch.""" + if pretrained: + self.pretrained = pretrained + + # Override the init_weights of i3d + super().init_weights() + for module_name in self.lateral_connections: + layer = getattr(self, module_name) + for m in layer.modules(): + if isinstance(m, (nn.Conv3d, nn.Conv2d)): + kaiming_init(m) + + +pathway_cfg = { + 'resnet3d': ResNet3dPathway, + # TODO: BNInceptionPathway +} + + +def build_pathway(cfg, *args, **kwargs): + """Build pathway. + + Args: + cfg (None or dict): cfg should contain: + - type (str): identify conv layer type. + + Returns: + nn.Module: Created pathway. + """ + if not (isinstance(cfg, dict) and 'type' in cfg): + raise TypeError('cfg must be a dict containing the key "type"') + cfg_ = cfg.copy() + + pathway_type = cfg_.pop('type') + if pathway_type not in pathway_cfg: + raise KeyError(f'Unrecognized pathway type {pathway_type}') + + pathway_cls = pathway_cfg[pathway_type] + pathway = pathway_cls(*args, **kwargs, **cfg_) + + return pathway + + +@BACKBONES.register_module() +class ResNet3dSlowFast(nn.Module): + """Slowfast backbone. + + This module is proposed in `SlowFast Networks for Video Recognition + `_ + + Args: + pretrained (str): The file path to a pretrained model. + resample_rate (int): A large temporal stride ``resample_rate`` + on input frames. The actual resample rate is calculated by + multipling the ``interval`` in ``SampleFrames`` in the + pipeline with ``resample_rate``, equivalent to the :math:`\\tau` + in the paper, i.e. it processes only one out of + ``resample_rate * interval`` frames. Default: 8. + speed_ratio (int): Speed ratio indicating the ratio between time + dimension of the fast and slow pathway, corresponding to the + :math:`\\alpha` in the paper. Default: 8. + channel_ratio (int): Reduce the channel number of fast pathway + by ``channel_ratio``, corresponding to :math:`\\beta` in the paper. + Default: 8. + slow_pathway (dict): Configuration of slow branch, should contain + necessary arguments for building the specific type of pathway + and: + type (str): type of backbone the pathway bases on. + lateral (bool): determine whether to build lateral connection + for the pathway.Default: + + .. code-block:: Python + + dict(type='ResNetPathway', + lateral=True, depth=50, pretrained=None, + conv1_kernel=(1, 7, 7), dilations=(1, 1, 1, 1), + conv1_stride_t=1, pool1_stride_t=1, inflate=(0, 0, 1, 1)) + + fast_pathway (dict): Configuration of fast branch, similar to + `slow_pathway`. Default: + + .. code-block:: Python + + dict(type='ResNetPathway', + lateral=False, depth=50, pretrained=None, base_channels=8, + conv1_kernel=(5, 7, 7), conv1_stride_t=1, pool1_stride_t=1) + """ + def __init__(self, + pretrained, + resample_rate=8, + speed_ratio=8, + channel_ratio=8, + slow_pathway=dict(type='resnet3d', + depth=50, + pretrained=None, + lateral=True, + conv1_kernel=(1, 7, 7), + dilations=(1, 1, 1, 1), + conv1_stride_t=1, + pool1_stride_t=1, + inflate=(0, 0, 1, 1)), + fast_pathway=dict(type='resnet3d', + depth=50, + pretrained=None, + lateral=False, + base_channels=8, + conv1_kernel=(5, 7, 7), + conv1_stride_t=1, + pool1_stride_t=1)): + super().__init__() + self.pretrained = pretrained + self.resample_rate = resample_rate + self.speed_ratio = speed_ratio + self.channel_ratio = channel_ratio + + if slow_pathway['lateral']: + slow_pathway['speed_ratio'] = speed_ratio + slow_pathway['channel_ratio'] = channel_ratio + + self.slow_path = build_pathway(slow_pathway) + self.fast_path = build_pathway(fast_pathway) + + def init_weights(self, pretrained=None): + """Initiate the parameters either from existing checkpoint or from + scratch.""" + if pretrained: + self.pretrained = pretrained + + if isinstance(self.pretrained, str): + logger = get_root_logger() + msg = f'load model from: {self.pretrained}' + print_log(msg, logger=logger) + # Directly load 3D model. + load_checkpoint(self, self.pretrained, strict=True, logger=logger) + elif self.pretrained is None: + # Init two branch seperately. + self.fast_path.init_weights() + self.slow_path.init_weights() + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + """Defines the computation performed at every call. + + Args: + x (torch.Tensor): The input data. + + Returns: + tuple[torch.Tensor]: The feature of the input samples extracted + by the backbone. + """ + x_slow = nn.functional.interpolate( + x, + mode='nearest', + scale_factor=(1.0 / self.resample_rate, 1.0, 1.0)) + x_slow = self.slow_path.conv1(x_slow) + x_slow = self.slow_path.maxpool(x_slow) + + x_fast = nn.functional.interpolate( + x, + mode='nearest', + scale_factor=(1.0 / (self.resample_rate // self.speed_ratio), 1.0, + 1.0)) + x_fast = self.fast_path.conv1(x_fast) + x_fast = self.fast_path.maxpool(x_fast) + + if self.slow_path.lateral: + x_fast_lateral = self.slow_path.conv1_lateral(x_fast) + x_slow = torch.cat((x_slow, x_fast_lateral), dim=1) + + for i, layer_name in enumerate(self.slow_path.res_layers): + res_layer = getattr(self.slow_path, layer_name) + x_slow = res_layer(x_slow) + res_layer_fast = getattr(self.fast_path, layer_name) + x_fast = res_layer_fast(x_fast) + if (i != len(self.slow_path.res_layers) - 1 + and self.slow_path.lateral): + # No fusion needed in the final stage + lateral_name = self.slow_path.lateral_connections[i] + conv_lateral = getattr(self.slow_path, lateral_name) + x_fast_lateral = conv_lateral(x_fast) + x_slow = torch.cat((x_slow, x_fast_lateral), dim=1) + + out = (x_slow, x_fast) + + return out + + +if mmdet_imported: + MMDET_BACKBONES.register_module()(ResNet3dSlowFast) diff --git a/PyTorch/contrib/cv/video/SlowFast/mmaction/models/heads/base.py b/PyTorch/contrib/cv/video/SlowFast/mmaction/models/heads/base.py index 8d165096e1f96749bdab7f9b31f07ce000794d94..6343b3dd0f60cf7183183ff14f024b143a0bf138 100644 --- a/PyTorch/contrib/cv/video/SlowFast/mmaction/models/heads/base.py +++ b/PyTorch/contrib/cv/video/SlowFast/mmaction/models/heads/base.py @@ -1,122 +1,122 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -from abc import ABCMeta, abstractmethod - -import torch -import torch.nn as nn - -from ...core import top_k_accuracy -from ..builder import build_loss - - -class AvgConsensus(nn.Module): - """Average consensus module. - - Args: - dim (int): Decide which dim consensus function to apply. - Default: 1. - """ - def __init__(self, dim=1): - super().__init__() - self.dim = dim - - def forward(self, x): - """Defines the computation performed at every call.""" - return x.mean(dim=self.dim, keepdim=True) - - -class BaseHead(nn.Module, metaclass=ABCMeta): - """Base class for head. - - All Head should subclass it. - All subclass should overwrite: - - Methods:``init_weights``, initializing weights in some modules. - - Methods:``forward``, supporting to forward both for training and testing. - - Args: - num_classes (int): Number of classes to be classified. - in_channels (int): Number of channels in input feature. - loss_cls (dict): Config for building loss. - Default: dict(type='CrossEntropyLoss', loss_weight=1.0). - multi_class (bool): Determines whether it is a multi-class - recognition task. Default: False. - label_smooth_eps (float): Epsilon used in label smooth. - Reference: arxiv.org/abs/1906.02629. Default: 0. - """ - def __init__(self, - num_classes, - in_channels, - loss_cls=dict(type='CrossEntropyLoss', loss_weight=1.0), - multi_class=False, - label_smooth_eps=0.0): - super().__init__() - self.num_classes = num_classes - self.in_channels = in_channels - self.loss_cls = build_loss(loss_cls) - self.multi_class = multi_class - self.label_smooth_eps = label_smooth_eps - - @abstractmethod - def init_weights(self): - """Initiate the parameters either from existing checkpoint or from - scratch.""" - - @abstractmethod - def forward(self, x): - """Defines the computation performed at every call.""" - - def loss(self, cls_score, labels, **kwargs): - """Calculate the loss given output ``cls_score``, target ``labels``. - - Args: - cls_score (torch.Tensor): The output of the model. - labels (torch.Tensor): The target output of the model. - - Returns: - dict: A dict containing field 'loss_cls'(mandatory) - and 'top1_acc', 'top5_acc'(optional). - """ - losses = dict() - if labels.shape == torch.Size([]): - labels = labels.unsqueeze(0) - elif labels.dim() == 1 and labels.size()[0] == self.num_classes \ - and cls_score.size()[0] == 1: - # Fix a bug when training with soft labels and batch size is 1. - # When using soft labels, `labels` and `cls_socre` share the same - # shape. - labels = labels.unsqueeze(0) - - if not self.multi_class and cls_score.size() != labels.size(): - top_k_acc = top_k_accuracy(cls_score.detach().cpu().numpy(), - labels.detach().cpu().numpy(), (1, 5)) - losses['top1_acc'] = torch.tensor(top_k_acc[0], - device=cls_score.device, - dtype=torch.float32) - losses['top5_acc'] = torch.tensor(top_k_acc[1], - device=cls_score.device, - dtype=torch.float32) - - elif self.multi_class and self.label_smooth_eps != 0: - labels = ((1 - self.label_smooth_eps) * labels + - self.label_smooth_eps / self.num_classes) - - loss_cls = self.loss_cls(cls_score, labels, **kwargs) - # loss_cls may be dictionary or single tensor - if isinstance(loss_cls, dict): - losses.update(loss_cls) - else: - losses['loss_cls'] = loss_cls - - return losses +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from abc import ABCMeta, abstractmethod + +import torch +import torch.nn as nn + +from ...core import top_k_accuracy +from ..builder import build_loss + + +class AvgConsensus(nn.Module): + """Average consensus module. + + Args: + dim (int): Decide which dim consensus function to apply. + Default: 1. + """ + def __init__(self, dim=1): + super().__init__() + self.dim = dim + + def forward(self, x): + """Defines the computation performed at every call.""" + return x.mean(dim=self.dim, keepdim=True) + + +class BaseHead(nn.Module, metaclass=ABCMeta): + """Base class for head. + + All Head should subclass it. + All subclass should overwrite: + - Methods:``init_weights``, initializing weights in some modules. + - Methods:``forward``, supporting to forward both for training and testing. + + Args: + num_classes (int): Number of classes to be classified. + in_channels (int): Number of channels in input feature. + loss_cls (dict): Config for building loss. + Default: dict(type='CrossEntropyLoss', loss_weight=1.0). + multi_class (bool): Determines whether it is a multi-class + recognition task. Default: False. + label_smooth_eps (float): Epsilon used in label smooth. + Reference: arxiv.org/abs/1906.02629. Default: 0. + """ + def __init__(self, + num_classes, + in_channels, + loss_cls=dict(type='CrossEntropyLoss', loss_weight=1.0), + multi_class=False, + label_smooth_eps=0.0): + super().__init__() + self.num_classes = num_classes + self.in_channels = in_channels + self.loss_cls = build_loss(loss_cls) + self.multi_class = multi_class + self.label_smooth_eps = label_smooth_eps + + @abstractmethod + def init_weights(self): + """Initiate the parameters either from existing checkpoint or from + scratch.""" + + @abstractmethod + def forward(self, x): + """Defines the computation performed at every call.""" + + def loss(self, cls_score, labels, **kwargs): + """Calculate the loss given output ``cls_score``, target ``labels``. + + Args: + cls_score (torch.Tensor): The output of the model. + labels (torch.Tensor): The target output of the model. + + Returns: + dict: A dict containing field 'loss_cls'(mandatory) + and 'top1_acc', 'top5_acc'(optional). + """ + losses = dict() + if labels.shape == torch.Size([]): + labels = labels.unsqueeze(0) + elif labels.dim() == 1 and labels.size()[0] == self.num_classes \ + and cls_score.size()[0] == 1: + # Fix a bug when training with soft labels and batch size is 1. + # When using soft labels, `labels` and `cls_socre` share the same + # shape. + labels = labels.unsqueeze(0) + + if not self.multi_class and cls_score.size() != labels.size(): + top_k_acc = top_k_accuracy(cls_score.detach().cpu().numpy(), + labels.detach().cpu().numpy(), (1, 5)) + losses['top1_acc'] = torch.tensor(top_k_acc[0], + device=cls_score.device, + dtype=torch.float32) + losses['top5_acc'] = torch.tensor(top_k_acc[1], + device=cls_score.device, + dtype=torch.float32) + + elif self.multi_class and self.label_smooth_eps != 0: + labels = ((1 - self.label_smooth_eps) * labels + + self.label_smooth_eps / self.num_classes) + + loss_cls = self.loss_cls(cls_score, labels, **kwargs) + # loss_cls may be dictionary or single tensor + if isinstance(loss_cls, dict): + losses.update(loss_cls) + else: + losses['loss_cls'] = loss_cls + + return losses diff --git a/PyTorch/contrib/cv/video/SlowFast/mmaction/models/heads/slowfast_head.py b/PyTorch/contrib/cv/video/SlowFast/mmaction/models/heads/slowfast_head.py index 30c3590fa96398b7282c3c47415a31e5e4b7c858..31de953e262024feb99fab0e3809fc96d2791d6a 100644 --- a/PyTorch/contrib/cv/video/SlowFast/mmaction/models/heads/slowfast_head.py +++ b/PyTorch/contrib/cv/video/SlowFast/mmaction/models/heads/slowfast_head.py @@ -1,92 +1,92 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch -import torch.nn as nn -from mmcv.cnn import normal_init - -from ..builder import HEADS -from .base import BaseHead - - -@HEADS.register_module() -class SlowFastHead(BaseHead): - """The classification head for SlowFast. - - Args: - num_classes (int): Number of classes to be classified. - in_channels (int): Number of channels in input feature. - loss_cls (dict): Config for building loss. - Default: dict(type='CrossEntropyLoss'). - spatial_type (str): Pooling type in spatial dimension. Default: 'avg'. - dropout_ratio (float): Probability of dropout layer. Default: 0.8. - init_std (float): Std value for Initiation. Default: 0.01. - kwargs (dict, optional): Any keyword argument to be used to initialize - the head. - """ - def __init__(self, - num_classes, - in_channels, - loss_cls=dict(type='CrossEntropyLoss'), - spatial_type='avg', - dropout_ratio=0.8, - init_std=0.01, - **kwargs): - - super().__init__(num_classes, in_channels, loss_cls, **kwargs) - self.spatial_type = spatial_type - self.dropout_ratio = dropout_ratio - self.init_std = init_std - - if self.dropout_ratio != 0: - self.dropout = nn.Dropout(p=self.dropout_ratio) - else: - self.dropout = None - self.fc_cls = nn.Linear(in_channels, num_classes) - - if self.spatial_type == 'avg': - self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1)) - else: - self.avg_pool = None - - def init_weights(self): - """Initiate the parameters from scratch.""" - normal_init(self.fc_cls, std=self.init_std) - - def forward(self, x): - """Defines the computation performed at every call. - - Args: - x (torch.Tensor): The input data. - - Returns: - torch.Tensor: The classification scores for input samples. - """ - # ([N, channel_fast, T, H, W], [(N, channel_slow, T, H, W)]) - x_fast, x_slow = x - # ([N, channel_fast, 1, 1, 1], [N, channel_slow, 1, 1, 1]) - x_fast = self.avg_pool(x_fast) - x_slow = self.avg_pool(x_slow) - # [N, channel_fast + channel_slow, 1, 1, 1] - x = torch.cat((x_slow, x_fast), dim=1) - - if self.dropout is not None: - x = self.dropout(x) - - # [N x C] - x = x.view(x.size(0), -1) - # [N x num_classes] - cls_score = self.fc_cls(x) - - return cls_score.npu_format_cast(0) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch +import torch.nn as nn +from mmcv.cnn import normal_init + +from ..builder import HEADS +from .base import BaseHead + + +@HEADS.register_module() +class SlowFastHead(BaseHead): + """The classification head for SlowFast. + + Args: + num_classes (int): Number of classes to be classified. + in_channels (int): Number of channels in input feature. + loss_cls (dict): Config for building loss. + Default: dict(type='CrossEntropyLoss'). + spatial_type (str): Pooling type in spatial dimension. Default: 'avg'. + dropout_ratio (float): Probability of dropout layer. Default: 0.8. + init_std (float): Std value for Initiation. Default: 0.01. + kwargs (dict, optional): Any keyword argument to be used to initialize + the head. + """ + def __init__(self, + num_classes, + in_channels, + loss_cls=dict(type='CrossEntropyLoss'), + spatial_type='avg', + dropout_ratio=0.8, + init_std=0.01, + **kwargs): + + super().__init__(num_classes, in_channels, loss_cls, **kwargs) + self.spatial_type = spatial_type + self.dropout_ratio = dropout_ratio + self.init_std = init_std + + if self.dropout_ratio != 0: + self.dropout = nn.Dropout(p=self.dropout_ratio) + else: + self.dropout = None + self.fc_cls = nn.Linear(in_channels, num_classes) + + if self.spatial_type == 'avg': + self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1)) + else: + self.avg_pool = None + + def init_weights(self): + """Initiate the parameters from scratch.""" + normal_init(self.fc_cls, std=self.init_std) + + def forward(self, x): + """Defines the computation performed at every call. + + Args: + x (torch.Tensor): The input data. + + Returns: + torch.Tensor: The classification scores for input samples. + """ + # ([N, channel_fast, T, H, W], [(N, channel_slow, T, H, W)]) + x_fast, x_slow = x + # ([N, channel_fast, 1, 1, 1], [N, channel_slow, 1, 1, 1]) + x_fast = self.avg_pool(x_fast) + x_slow = self.avg_pool(x_slow) + # [N, channel_fast + channel_slow, 1, 1, 1] + x = torch.cat((x_slow, x_fast), dim=1) + + if self.dropout is not None: + x = self.dropout(x) + + # [N x C] + x = x.view(x.size(0), -1) + # [N x num_classes] + cls_score = self.fc_cls(x) + + return cls_score.npu_format_cast(0) diff --git a/PyTorch/contrib/cv/video/SlowFast/mmaction/models/losses/cross_entropy_loss.py b/PyTorch/contrib/cv/video/SlowFast/mmaction/models/losses/cross_entropy_loss.py index 11c9bfd4ddc109784c8a48a77286ed9663c7c53f..a0afe4232c36c25b3735f3efd9caa8de24e081e0 100644 --- a/PyTorch/contrib/cv/video/SlowFast/mmaction/models/losses/cross_entropy_loss.py +++ b/PyTorch/contrib/cv/video/SlowFast/mmaction/models/losses/cross_entropy_loss.py @@ -1,133 +1,133 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch -import torch.nn.functional as F - -from ..builder import LOSSES -from .base import BaseWeightedLoss - - -@LOSSES.register_module() -class CrossEntropyLoss(BaseWeightedLoss): - """Cross Entropy Loss. - - Support two kinds of labels and their corresponding loss type. It's worth - mentioning that loss type will be detected by the shape of ``cls_score`` - and ``label``. - 1) Hard label: This label is an integer array and all of the elements are - in the range [0, num_classes - 1]. This label's shape should be - ``cls_score``'s shape with the `num_classes` dimension removed. - 2) Soft label(probablity distribution over classes): This label is a - probability distribution and all of the elements are in the range - [0, 1]. This label's shape must be the same as ``cls_score``. For now, - only 2-dim soft label is supported. - - Args: - loss_weight (float): Factor scalar multiplied on the loss. - Default: 1.0. - class_weight (list[float] | None): Loss weight for each class. If set - as None, use the same weight 1 for all classes. Only applies - to CrossEntropyLoss and BCELossWithLogits (should not be set when - using other losses). Default: None. - """ - def __init__(self, loss_weight=1.0, class_weight=None): - super().__init__(loss_weight=loss_weight) - self.class_weight = None - if class_weight is not None: - self.class_weight = torch.Tensor(class_weight) - - def _forward(self, cls_score, label, **kwargs): - """Forward function. - - Args: - cls_score (torch.Tensor): The class score. - label (torch.Tensor): The ground truth label. - kwargs: Any keyword argument to be used to calculate - CrossEntropy loss. - - Returns: - torch.Tensor: The returned CrossEntropy loss. - """ - if cls_score.size() == label.size(): - # calculate loss for soft label - - assert cls_score.dim() == 2, 'Only support 2-dim soft label' - assert len(kwargs) == 0, \ - ('For now, no extra args are supported for soft label, ' - f'but get {kwargs}') - - lsm = F.log_softmax(cls_score, 1) - if self.class_weight is not None: - lsm = lsm * self.class_weight.unsqueeze(0) - loss_cls = -(label * lsm).sum(1) - - # default reduction 'mean' - if self.class_weight is not None: - # Use weighted average as pytorch CrossEntropyLoss does. - # For more information, please visit https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html # noqa - loss_cls = loss_cls.sum() / torch.sum( - self.class_weight.unsqueeze(0) * label) - else: - loss_cls = loss_cls.mean() - else: - # calculate loss for hard label - - if self.class_weight is not None: - assert 'weight' not in kwargs, \ - "The key 'weight' already exists." - kwargs['weight'] = self.class_weight.to(cls_score.device) - # cls_score = cls_score.type(torch.float32) - # label = label.type(torch.int32) - loss_cls = F.cross_entropy(cls_score, label, **kwargs) - - return loss_cls - - -@LOSSES.register_module() -class BCELossWithLogits(BaseWeightedLoss): - """Binary Cross Entropy Loss with logits. - - Args: - loss_weight (float): Factor scalar multiplied on the loss. - Default: 1.0. - class_weight (list[float] | None): Loss weight for each class. If set - as None, use the same weight 1 for all classes. Only applies - to CrossEntropyLoss and BCELossWithLogits (should not be set when - using other losses). Default: None. - """ - def __init__(self, loss_weight=1.0, class_weight=None): - super().__init__(loss_weight=loss_weight) - self.class_weight = None - if class_weight is not None: - self.class_weight = torch.Tensor(class_weight) - - def _forward(self, cls_score, label, **kwargs): - """Forward function. - - Args: - cls_score (torch.Tensor): The class score. - label (torch.Tensor): The ground truth label. - kwargs: Any keyword argument to be used to calculate - bce loss with logits. - - Returns: - torch.Tensor: The returned bce loss with logits. - """ - if self.class_weight is not None: - assert 'weight' not in kwargs, "The key 'weight' already exists." - kwargs['weight'] = self.class_weight.to(cls_score.device) - loss_cls = F.binary_cross_entropy_with_logits(cls_score, label, - **kwargs) - return loss_cls +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch +import torch.nn.functional as F + +from ..builder import LOSSES +from .base import BaseWeightedLoss + + +@LOSSES.register_module() +class CrossEntropyLoss(BaseWeightedLoss): + """Cross Entropy Loss. + + Support two kinds of labels and their corresponding loss type. It's worth + mentioning that loss type will be detected by the shape of ``cls_score`` + and ``label``. + 1) Hard label: This label is an integer array and all of the elements are + in the range [0, num_classes - 1]. This label's shape should be + ``cls_score``'s shape with the `num_classes` dimension removed. + 2) Soft label(probablity distribution over classes): This label is a + probability distribution and all of the elements are in the range + [0, 1]. This label's shape must be the same as ``cls_score``. For now, + only 2-dim soft label is supported. + + Args: + loss_weight (float): Factor scalar multiplied on the loss. + Default: 1.0. + class_weight (list[float] | None): Loss weight for each class. If set + as None, use the same weight 1 for all classes. Only applies + to CrossEntropyLoss and BCELossWithLogits (should not be set when + using other losses). Default: None. + """ + def __init__(self, loss_weight=1.0, class_weight=None): + super().__init__(loss_weight=loss_weight) + self.class_weight = None + if class_weight is not None: + self.class_weight = torch.Tensor(class_weight) + + def _forward(self, cls_score, label, **kwargs): + """Forward function. + + Args: + cls_score (torch.Tensor): The class score. + label (torch.Tensor): The ground truth label. + kwargs: Any keyword argument to be used to calculate + CrossEntropy loss. + + Returns: + torch.Tensor: The returned CrossEntropy loss. + """ + if cls_score.size() == label.size(): + # calculate loss for soft label + + assert cls_score.dim() == 2, 'Only support 2-dim soft label' + assert len(kwargs) == 0, \ + ('For now, no extra args are supported for soft label, ' + f'but get {kwargs}') + + lsm = F.log_softmax(cls_score, 1) + if self.class_weight is not None: + lsm = lsm * self.class_weight.unsqueeze(0) + loss_cls = -(label * lsm).sum(1) + + # default reduction 'mean' + if self.class_weight is not None: + # Use weighted average as pytorch CrossEntropyLoss does. + # For more information, please visit https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html # noqa + loss_cls = loss_cls.sum() / torch.sum( + self.class_weight.unsqueeze(0) * label) + else: + loss_cls = loss_cls.mean() + else: + # calculate loss for hard label + + if self.class_weight is not None: + assert 'weight' not in kwargs, \ + "The key 'weight' already exists." + kwargs['weight'] = self.class_weight.to(cls_score.device) + # cls_score = cls_score.type(torch.float32) + # label = label.type(torch.int32) + loss_cls = F.cross_entropy(cls_score, label, **kwargs) + + return loss_cls + + +@LOSSES.register_module() +class BCELossWithLogits(BaseWeightedLoss): + """Binary Cross Entropy Loss with logits. + + Args: + loss_weight (float): Factor scalar multiplied on the loss. + Default: 1.0. + class_weight (list[float] | None): Loss weight for each class. If set + as None, use the same weight 1 for all classes. Only applies + to CrossEntropyLoss and BCELossWithLogits (should not be set when + using other losses). Default: None. + """ + def __init__(self, loss_weight=1.0, class_weight=None): + super().__init__(loss_weight=loss_weight) + self.class_weight = None + if class_weight is not None: + self.class_weight = torch.Tensor(class_weight) + + def _forward(self, cls_score, label, **kwargs): + """Forward function. + + Args: + cls_score (torch.Tensor): The class score. + label (torch.Tensor): The ground truth label. + kwargs: Any keyword argument to be used to calculate + bce loss with logits. + + Returns: + torch.Tensor: The returned bce loss with logits. + """ + if self.class_weight is not None: + assert 'weight' not in kwargs, "The key 'weight' already exists." + kwargs['weight'] = self.class_weight.to(cls_score.device) + loss_cls = F.binary_cross_entropy_with_logits(cls_score, label, + **kwargs) + return loss_cls diff --git a/PyTorch/contrib/cv/video/SlowFast/mmaction/models/recognizers/base.py b/PyTorch/contrib/cv/video/SlowFast/mmaction/models/recognizers/base.py index 35603218baa99c3f8caa13bfd76f1e46fbc1e117..b7a0c527ec36b603f98961b7c0d6b511d99f1f6d 100644 --- a/PyTorch/contrib/cv/video/SlowFast/mmaction/models/recognizers/base.py +++ b/PyTorch/contrib/cv/video/SlowFast/mmaction/models/recognizers/base.py @@ -1,339 +1,339 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import warnings -from abc import ABCMeta, abstractmethod -from collections import OrderedDict - -import torch -import torch.distributed as dist -import torch.nn as nn -import torch.nn.functional as F -from mmcv.runner import auto_fp16 - -from .. import builder - - -class BaseRecognizer(nn.Module, metaclass=ABCMeta): - """Base class for recognizers. - - All recognizers should subclass it. - All subclass should overwrite: - - - Methods:``forward_train``, supporting to forward when training. - - Methods:``forward_test``, supporting to forward when testing. - - Args: - backbone (dict): Backbone modules to extract feature. - cls_head (dict | None): Classification head to process feature. - Default: None. - neck (dict | None): Neck for feature fusion. Default: None. - train_cfg (dict | None): Config for training. Default: None. - test_cfg (dict | None): Config for testing. Default: None. - """ - def __init__(self, - backbone, - cls_head=None, - neck=None, - train_cfg=None, - test_cfg=None): - super().__init__() - # record the source of the backbone - self.backbone_from = 'mmaction2' - - if backbone['type'].startswith('mmcls.'): - try: - import mmcls.models.builder as mmcls_builder - except (ImportError, ModuleNotFoundError): - raise ImportError('Please install mmcls to use this backbone.') - backbone['type'] = backbone['type'][6:] - self.backbone = mmcls_builder.build_backbone(backbone) - self.backbone_from = 'mmcls' - elif backbone['type'].startswith('torchvision.'): - try: - import torchvision.models - except (ImportError, ModuleNotFoundError): - raise ImportError('Please install torchvision to use this ' - 'backbone.') - backbone_type = backbone.pop('type')[12:] - self.backbone = torchvision.models.__dict__[backbone_type]( - **backbone) - # disable the classifier - self.backbone.classifier = nn.Identity() - self.backbone.fc = nn.Identity() - self.backbone_from = 'torchvision' - elif backbone['type'].startswith('timm.'): - try: - import timm - except (ImportError, ModuleNotFoundError): - raise ImportError('Please install timm to use this ' - 'backbone.') - backbone_type = backbone.pop('type')[5:] - # disable the classifier - backbone['num_classes'] = 0 - self.backbone = timm.create_model(backbone_type, **backbone) - self.backbone_from = 'timm' - else: - self.backbone = builder.build_backbone(backbone) - - if neck is not None: - self.neck = builder.build_neck(neck) - - self.cls_head = builder.build_head(cls_head) if cls_head else None - - self.train_cfg = train_cfg - self.test_cfg = test_cfg - - # aux_info is the list of tensor names beyond 'imgs' and 'label' which - # will be used in train_step and val_step, data_batch should contain - # these tensors - self.aux_info = [] - if train_cfg is not None and 'aux_info' in train_cfg: - self.aux_info = train_cfg['aux_info'] - # max_testing_views should be int - self.max_testing_views = None - if test_cfg is not None and 'max_testing_views' in test_cfg: - self.max_testing_views = test_cfg['max_testing_views'] - assert isinstance(self.max_testing_views, int) - - if test_cfg is not None and 'feature_extraction' in test_cfg: - self.feature_extraction = test_cfg['feature_extraction'] - else: - self.feature_extraction = False - - # mini-batch blending, e.g. mixup, cutmix, etc. - self.blending = None - if train_cfg is not None and 'blending' in train_cfg: - from mmcv.utils import build_from_cfg - from mmaction.datasets.builder import BLENDINGS - self.blending = build_from_cfg(train_cfg['blending'], BLENDINGS) - - self.init_weights() - - self.fp16_enabled = False - - @property - def with_neck(self): - """bool: whether the recognizer has a neck""" - return hasattr(self, 'neck') and self.neck is not None - - @property - def with_cls_head(self): - """bool: whether the recognizer has a cls_head""" - return hasattr(self, 'cls_head') and self.cls_head is not None - - def init_weights(self): - """Initialize the model network weights.""" - if self.backbone_from in ['mmcls', 'mmaction2']: - self.backbone.init_weights() - elif self.backbone_from in ['torchvision', 'timm']: - warnings.warn('We do not initialize weights for backbones in ' - f'{self.backbone_from}, since the weights for ' - f'backbones in {self.backbone_from} are initialized' - 'in their __init__ functions.') - else: - raise NotImplementedError('Unsupported backbone source ' - f'{self.backbone_from}!') - - if self.with_cls_head: - self.cls_head.init_weights() - if self.with_neck: - self.neck.init_weights() - - @auto_fp16() - def extract_feat(self, imgs): - """Extract features through a backbone. - - Args: - imgs (torch.Tensor): The input images. - - Returns: - torch.tensor: The extracted features. - """ - if (hasattr(self.backbone, 'features') - and self.backbone_from == 'torchvision'): - x = self.backbone.features(imgs) - elif self.backbone_from == 'timm': - x = self.backbone.forward_features(imgs) - else: - x = self.backbone(imgs) - return x - - def average_clip(self, cls_score, num_segs=1): - """Averaging class score over multiple clips. - - Using different averaging types ('score' or 'prob' or None, - which defined in test_cfg) to computed the final averaged - class score. Only called in test mode. - - Args: - cls_score (torch.Tensor): Class score to be averaged. - num_segs (int): Number of clips for each input sample. - - Returns: - torch.Tensor: Averaged class score. - """ - if 'average_clips' not in self.test_cfg.keys(): - raise KeyError('"average_clips" must defined in test_cfg\'s keys') - - average_clips = self.test_cfg['average_clips'] - if average_clips not in ['score', 'prob', None]: - raise ValueError(f'{average_clips} is not supported. ' - f'Currently supported ones are ' - f'["score", "prob", None]') - - if average_clips is None: - return cls_score - - batch_size = cls_score.shape[0] - cls_score = cls_score.view(batch_size // num_segs, num_segs, -1) - - if average_clips == 'prob': - cls_score = F.softmax(cls_score, dim=2).mean(dim=1) - elif average_clips == 'score': - cls_score = cls_score.mean(dim=1) - - return cls_score - - @abstractmethod - def forward_train(self, imgs, labels, **kwargs): - """Defines the computation performed at every call when training.""" - - @abstractmethod - def forward_test(self, imgs): - """Defines the computation performed at every call when evaluation and - testing.""" - - @abstractmethod - def forward_gradcam(self, imgs): - """Defines the computation performed at every all when using gradcam - utils.""" - - @staticmethod - def _parse_losses(losses): - """Parse the raw outputs (losses) of the network. - - Args: - losses (dict): Raw output of the network, which usually contain - losses and other necessary information. - - Returns: - tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor - which may be a weighted sum of all losses, log_vars contains - all the variables to be sent to the logger. - """ - log_vars = OrderedDict() - for loss_name, loss_value in losses.items(): - if isinstance(loss_value, torch.Tensor): - log_vars[loss_name] = loss_value.mean() - elif isinstance(loss_value, list): - log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) - else: - raise TypeError( - f'{loss_name} is not a tensor or list of tensors') - - loss = sum(_value for _key, _value in log_vars.items() - if 'loss' in _key) - - log_vars['loss'] = loss - for loss_name, loss_value in log_vars.items(): - # reduce loss when distributed training - if dist.is_available() and dist.is_initialized(): - loss_value = loss_value.data.clone() - dist.all_reduce(loss_value.div_(dist.get_world_size())) - log_vars[loss_name] = loss_value.item() - - return loss, log_vars - - def forward(self, imgs, label=None, return_loss=True, **kwargs): - """Define the computation performed at every call.""" - if kwargs.get('gradcam', False): - del kwargs['gradcam'] - return self.forward_gradcam(imgs, **kwargs) - if return_loss: - if label is None: - raise ValueError('Label should not be None.') - if self.blending is not None: - imgs, label = self.blending(imgs, label) - return self.forward_train(imgs, label, **kwargs) - - return self.forward_test(imgs, **kwargs) - - def train_step(self, data_batch, optimizer, **kwargs): - """The iteration step during training. - - This method defines an iteration step during training, except for the - back propagation and optimizer updating, which are done in an optimizer - hook. Note that in some complicated cases or models, the whole process - including back propagation and optimizer updating is also defined in - this method, such as GAN. - - Args: - data_batch (dict): The output of dataloader. - optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of - runner is passed to ``train_step()``. This argument is unused - and reserved. - - Returns: - dict: It should contain at least 3 keys: ``loss``, ``log_vars``, - ``num_samples``. - ``loss`` is a tensor for back propagation, which can be a - weighted sum of multiple losses. - ``log_vars`` contains all the variables to be sent to the - logger. - ``num_samples`` indicates the batch size (when the model is - DDP, it means the batch size on each GPU), which is used for - averaging the logs. - """ - imgs = data_batch['imgs'].npu().type(torch.float32) - label = data_batch['label'].npu().type(torch.int32) - - aux_info = {} - for item in self.aux_info: - assert item in data_batch - aux_info[item] = data_batch[item] - - losses = self(imgs, label, return_loss=True, **aux_info) - - loss, log_vars = self._parse_losses(losses) - - outputs = dict(loss=loss, - log_vars=log_vars, - num_samples=len(next(iter(data_batch.values())))) - - return outputs - - def val_step(self, data_batch, optimizer, **kwargs): - """The iteration step during validation. - - This method shares the same signature as :func:`train_step`, but used - during val epochs. Note that the evaluation after training epochs is - not implemented with this method, but an evaluation hook. - """ - imgs = data_batch['imgs'].npu().type(torch.float32) - label = data_batch['label'].npu().type(torch.int32) - - aux_info = {} - for item in self.aux_info: - aux_info[item] = data_batch[item] - - losses = self(imgs, label, return_loss=True, **aux_info) - - loss, log_vars = self._parse_losses(losses) - - outputs = dict(loss=loss, - log_vars=log_vars, - num_samples=len(next(iter(data_batch.values())))) - - return outputs +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import warnings +from abc import ABCMeta, abstractmethod +from collections import OrderedDict + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +from mmcv.runner import auto_fp16 + +from .. import builder + + +class BaseRecognizer(nn.Module, metaclass=ABCMeta): + """Base class for recognizers. + + All recognizers should subclass it. + All subclass should overwrite: + + - Methods:``forward_train``, supporting to forward when training. + - Methods:``forward_test``, supporting to forward when testing. + + Args: + backbone (dict): Backbone modules to extract feature. + cls_head (dict | None): Classification head to process feature. + Default: None. + neck (dict | None): Neck for feature fusion. Default: None. + train_cfg (dict | None): Config for training. Default: None. + test_cfg (dict | None): Config for testing. Default: None. + """ + def __init__(self, + backbone, + cls_head=None, + neck=None, + train_cfg=None, + test_cfg=None): + super().__init__() + # record the source of the backbone + self.backbone_from = 'mmaction2' + + if backbone['type'].startswith('mmcls.'): + try: + import mmcls.models.builder as mmcls_builder + except (ImportError, ModuleNotFoundError): + raise ImportError('Please install mmcls to use this backbone.') + backbone['type'] = backbone['type'][6:] + self.backbone = mmcls_builder.build_backbone(backbone) + self.backbone_from = 'mmcls' + elif backbone['type'].startswith('torchvision.'): + try: + import torchvision.models + except (ImportError, ModuleNotFoundError): + raise ImportError('Please install torchvision to use this ' + 'backbone.') + backbone_type = backbone.pop('type')[12:] + self.backbone = torchvision.models.__dict__[backbone_type]( + **backbone) + # disable the classifier + self.backbone.classifier = nn.Identity() + self.backbone.fc = nn.Identity() + self.backbone_from = 'torchvision' + elif backbone['type'].startswith('timm.'): + try: + import timm + except (ImportError, ModuleNotFoundError): + raise ImportError('Please install timm to use this ' + 'backbone.') + backbone_type = backbone.pop('type')[5:] + # disable the classifier + backbone['num_classes'] = 0 + self.backbone = timm.create_model(backbone_type, **backbone) + self.backbone_from = 'timm' + else: + self.backbone = builder.build_backbone(backbone) + + if neck is not None: + self.neck = builder.build_neck(neck) + + self.cls_head = builder.build_head(cls_head) if cls_head else None + + self.train_cfg = train_cfg + self.test_cfg = test_cfg + + # aux_info is the list of tensor names beyond 'imgs' and 'label' which + # will be used in train_step and val_step, data_batch should contain + # these tensors + self.aux_info = [] + if train_cfg is not None and 'aux_info' in train_cfg: + self.aux_info = train_cfg['aux_info'] + # max_testing_views should be int + self.max_testing_views = None + if test_cfg is not None and 'max_testing_views' in test_cfg: + self.max_testing_views = test_cfg['max_testing_views'] + assert isinstance(self.max_testing_views, int) + + if test_cfg is not None and 'feature_extraction' in test_cfg: + self.feature_extraction = test_cfg['feature_extraction'] + else: + self.feature_extraction = False + + # mini-batch blending, e.g. mixup, cutmix, etc. + self.blending = None + if train_cfg is not None and 'blending' in train_cfg: + from mmcv.utils import build_from_cfg + from mmaction.datasets.builder import BLENDINGS + self.blending = build_from_cfg(train_cfg['blending'], BLENDINGS) + + self.init_weights() + + self.fp16_enabled = False + + @property + def with_neck(self): + """bool: whether the recognizer has a neck""" + return hasattr(self, 'neck') and self.neck is not None + + @property + def with_cls_head(self): + """bool: whether the recognizer has a cls_head""" + return hasattr(self, 'cls_head') and self.cls_head is not None + + def init_weights(self): + """Initialize the model network weights.""" + if self.backbone_from in ['mmcls', 'mmaction2']: + self.backbone.init_weights() + elif self.backbone_from in ['torchvision', 'timm']: + warnings.warn('We do not initialize weights for backbones in ' + f'{self.backbone_from}, since the weights for ' + f'backbones in {self.backbone_from} are initialized' + 'in their __init__ functions.') + else: + raise NotImplementedError('Unsupported backbone source ' + f'{self.backbone_from}!') + + if self.with_cls_head: + self.cls_head.init_weights() + if self.with_neck: + self.neck.init_weights() + + @auto_fp16() + def extract_feat(self, imgs): + """Extract features through a backbone. + + Args: + imgs (torch.Tensor): The input images. + + Returns: + torch.tensor: The extracted features. + """ + if (hasattr(self.backbone, 'features') + and self.backbone_from == 'torchvision'): + x = self.backbone.features(imgs) + elif self.backbone_from == 'timm': + x = self.backbone.forward_features(imgs) + else: + x = self.backbone(imgs) + return x + + def average_clip(self, cls_score, num_segs=1): + """Averaging class score over multiple clips. + + Using different averaging types ('score' or 'prob' or None, + which defined in test_cfg) to computed the final averaged + class score. Only called in test mode. + + Args: + cls_score (torch.Tensor): Class score to be averaged. + num_segs (int): Number of clips for each input sample. + + Returns: + torch.Tensor: Averaged class score. + """ + if 'average_clips' not in self.test_cfg.keys(): + raise KeyError('"average_clips" must defined in test_cfg\'s keys') + + average_clips = self.test_cfg['average_clips'] + if average_clips not in ['score', 'prob', None]: + raise ValueError(f'{average_clips} is not supported. ' + f'Currently supported ones are ' + f'["score", "prob", None]') + + if average_clips is None: + return cls_score + + batch_size = cls_score.shape[0] + cls_score = cls_score.view(batch_size // num_segs, num_segs, -1) + + if average_clips == 'prob': + cls_score = F.softmax(cls_score, dim=2).mean(dim=1) + elif average_clips == 'score': + cls_score = cls_score.mean(dim=1) + + return cls_score + + @abstractmethod + def forward_train(self, imgs, labels, **kwargs): + """Defines the computation performed at every call when training.""" + + @abstractmethod + def forward_test(self, imgs): + """Defines the computation performed at every call when evaluation and + testing.""" + + @abstractmethod + def forward_gradcam(self, imgs): + """Defines the computation performed at every all when using gradcam + utils.""" + + @staticmethod + def _parse_losses(losses): + """Parse the raw outputs (losses) of the network. + + Args: + losses (dict): Raw output of the network, which usually contain + losses and other necessary information. + + Returns: + tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor + which may be a weighted sum of all losses, log_vars contains + all the variables to be sent to the logger. + """ + log_vars = OrderedDict() + for loss_name, loss_value in losses.items(): + if isinstance(loss_value, torch.Tensor): + log_vars[loss_name] = loss_value.mean() + elif isinstance(loss_value, list): + log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) + else: + raise TypeError( + f'{loss_name} is not a tensor or list of tensors') + + loss = sum(_value for _key, _value in log_vars.items() + if 'loss' in _key) + + log_vars['loss'] = loss + for loss_name, loss_value in log_vars.items(): + # reduce loss when distributed training + if dist.is_available() and dist.is_initialized(): + loss_value = loss_value.data.clone() + dist.all_reduce(loss_value.div_(dist.get_world_size())) + log_vars[loss_name] = loss_value.item() + + return loss, log_vars + + def forward(self, imgs, label=None, return_loss=True, **kwargs): + """Define the computation performed at every call.""" + if kwargs.get('gradcam', False): + del kwargs['gradcam'] + return self.forward_gradcam(imgs, **kwargs) + if return_loss: + if label is None: + raise ValueError('Label should not be None.') + if self.blending is not None: + imgs, label = self.blending(imgs, label) + return self.forward_train(imgs, label, **kwargs) + + return self.forward_test(imgs, **kwargs) + + def train_step(self, data_batch, optimizer, **kwargs): + """The iteration step during training. + + This method defines an iteration step during training, except for the + back propagation and optimizer updating, which are done in an optimizer + hook. Note that in some complicated cases or models, the whole process + including back propagation and optimizer updating is also defined in + this method, such as GAN. + + Args: + data_batch (dict): The output of dataloader. + optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of + runner is passed to ``train_step()``. This argument is unused + and reserved. + + Returns: + dict: It should contain at least 3 keys: ``loss``, ``log_vars``, + ``num_samples``. + ``loss`` is a tensor for back propagation, which can be a + weighted sum of multiple losses. + ``log_vars`` contains all the variables to be sent to the + logger. + ``num_samples`` indicates the batch size (when the model is + DDP, it means the batch size on each GPU), which is used for + averaging the logs. + """ + imgs = data_batch['imgs'].npu().type(torch.float32) + label = data_batch['label'].npu().type(torch.int32) + + aux_info = {} + for item in self.aux_info: + assert item in data_batch + aux_info[item] = data_batch[item] + + losses = self(imgs, label, return_loss=True, **aux_info) + + loss, log_vars = self._parse_losses(losses) + + outputs = dict(loss=loss, + log_vars=log_vars, + num_samples=len(next(iter(data_batch.values())))) + + return outputs + + def val_step(self, data_batch, optimizer, **kwargs): + """The iteration step during validation. + + This method shares the same signature as :func:`train_step`, but used + during val epochs. Note that the evaluation after training epochs is + not implemented with this method, but an evaluation hook. + """ + imgs = data_batch['imgs'].npu().type(torch.float32) + label = data_batch['label'].npu().type(torch.int32) + + aux_info = {} + for item in self.aux_info: + aux_info[item] = data_batch[item] + + losses = self(imgs, label, return_loss=True, **aux_info) + + loss, log_vars = self._parse_losses(losses) + + outputs = dict(loss=loss, + log_vars=log_vars, + num_samples=len(next(iter(data_batch.values())))) + + return outputs diff --git a/PyTorch/contrib/cv/video/SlowFast/mmaction/models/recognizers/recognizer2d.py b/PyTorch/contrib/cv/video/SlowFast/mmaction/models/recognizers/recognizer2d.py index 7d145f6dd83cab07d056cb40ce712c0ab9485c5c..3d9a755a5a53ebe1432ac413ee6ab85566d07b06 100644 --- a/PyTorch/contrib/cv/video/SlowFast/mmaction/models/recognizers/recognizer2d.py +++ b/PyTorch/contrib/cv/video/SlowFast/mmaction/models/recognizers/recognizer2d.py @@ -1,195 +1,195 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch -from torch import nn - -from ..builder import RECOGNIZERS -from .base import BaseRecognizer - - -@RECOGNIZERS.register_module() -class Recognizer2D(BaseRecognizer): - """2D recognizer model framework.""" - def forward_train(self, imgs, labels, **kwargs): - """Defines the computation performed at every call when training.""" - - assert self.with_cls_head - batches = imgs.shape[0] - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - num_segs = imgs.shape[0] // batches - - losses = dict() - - x = self.extract_feat(imgs) - - if self.backbone_from in ['torchvision', 'timm']: - if len(x.shape) == 4 and (x.shape[2] > 1 or x.shape[3] > 1): - # apply adaptive avg pooling - x = nn.AdaptiveAvgPool2d(1)(x) - x = x.reshape((x.shape[0], -1)) - x = x.reshape(x.shape + (1, 1)) - - if self.with_neck: - x = [ - each.reshape((-1, num_segs) + each.shape[1:]).transpose( - 1, 2).contiguous() for each in x - ] - x, loss_aux = self.neck(x, labels.squeeze()) - x = x.squeeze(2) - num_segs = 1 - losses.update(loss_aux) - - cls_score = self.cls_head(x, num_segs) - gt_labels = labels.squeeze() - loss_cls = self.cls_head.loss(cls_score, gt_labels, **kwargs) - losses.update(loss_cls) - - return losses - - def _do_test(self, imgs): - """Defines the computation performed at every call when evaluation, - testing and gradcam.""" - batches = imgs.shape[0] - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - num_segs = imgs.shape[0] // batches - - x = self.extract_feat(imgs) - - if self.backbone_from in ['torchvision', 'timm']: - if len(x.shape) == 4 and (x.shape[2] > 1 or x.shape[3] > 1): - # apply adaptive avg pooling - x = nn.AdaptiveAvgPool2d(1)(x) - x = x.reshape((x.shape[0], -1)) - x = x.reshape(x.shape + (1, 1)) - - if self.with_neck: - x = [ - each.reshape((-1, num_segs) + each.shape[1:]).transpose( - 1, 2).contiguous() for each in x - ] - x, _ = self.neck(x) - x = x.squeeze(2) - num_segs = 1 - - if self.feature_extraction: - # perform spatial pooling - avg_pool = nn.AdaptiveAvgPool2d(1) - x = avg_pool(x) - # squeeze dimensions - x = x.reshape((batches, num_segs, -1)) - # temporal average pooling - x = x.mean(axis=1) - return x - - # When using `TSNHead` or `TPNHead`, shape is [batch_size, num_classes] - # When using `TSMHead`, shape is [batch_size * num_crops, num_classes] - # `num_crops` is calculated by: - # 1) `twice_sample` in `SampleFrames` - # 2) `num_sample_positions` in `DenseSampleFrames` - # 3) `ThreeCrop/TenCrop/MultiGroupCrop` in `test_pipeline` - # 4) `num_clips` in `SampleFrames` or its subclass if `clip_len != 1` - - # should have cls_head if not extracting features - cls_score = self.cls_head(x, num_segs) - - assert cls_score.size()[0] % batches == 0 - # calculate num_crops automatically - cls_score = self.average_clip(cls_score, - cls_score.size()[0] // batches) - return cls_score - - def _do_fcn_test(self, imgs): - # [N, num_crops * num_segs, C, H, W] -> - # [N * num_crops * num_segs, C, H, W] - batches = imgs.shape[0] - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - num_segs = self.test_cfg.get('num_segs', self.backbone.num_segments) - - if self.test_cfg.get('flip', False): - imgs = torch.flip(imgs, [-1]) - x = self.extract_feat(imgs) - - if self.with_neck: - x = [ - each.reshape((-1, num_segs) + each.shape[1:]).transpose( - 1, 2).contiguous() for each in x - ] - x, _ = self.neck(x) - else: - x = x.reshape((-1, num_segs) + x.shape[1:]).transpose( - 1, 2).contiguous() - - # When using `TSNHead` or `TPNHead`, shape is [batch_size, num_classes] - # When using `TSMHead`, shape is [batch_size * num_crops, num_classes] - # `num_crops` is calculated by: - # 1) `twice_sample` in `SampleFrames` - # 2) `num_sample_positions` in `DenseSampleFrames` - # 3) `ThreeCrop/TenCrop/MultiGroupCrop` in `test_pipeline` - # 4) `num_clips` in `SampleFrames` or its subclass if `clip_len != 1` - cls_score = self.cls_head(x, fcn_test=True) - - assert cls_score.size()[0] % batches == 0 - # calculate num_crops automatically - cls_score = self.average_clip(cls_score, - cls_score.size()[0] // batches) - return cls_score - - def forward_test(self, imgs): - """Defines the computation performed at every call when evaluation and - testing.""" - imgs = imgs.npu().type(torch.float16) - if self.test_cfg.get('fcn_test', False): - # If specified, spatially fully-convolutional testing is performed - assert not self.feature_extraction - assert self.with_cls_head - return self._do_fcn_test(imgs).cpu().numpy() - return self._do_test(imgs).cpu().numpy() - - def forward_dummy(self, imgs, softmax=False): - """Used for computing network FLOPs. - - See ``tools/analysis/get_flops.py``. - - Args: - imgs (torch.Tensor): Input images. - - Returns: - Tensor: Class score. - """ - assert self.with_cls_head - batches = imgs.shape[0] - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - num_segs = imgs.shape[0] // batches - - x = self.extract_feat(imgs) - if self.with_neck: - x = [ - each.reshape((-1, num_segs) + each.shape[1:]).transpose( - 1, 2).contiguous() for each in x - ] - x, _ = self.neck(x) - x = x.squeeze(2) - num_segs = 1 - - outs = self.cls_head(x, num_segs) - if softmax: - outs = nn.functional.softmax(outs) - return (outs, ) - - def forward_gradcam(self, imgs): - """Defines the computation performed at every call when using gradcam - utils.""" - assert self.with_cls_head - return self._do_test(imgs) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch +from torch import nn + +from ..builder import RECOGNIZERS +from .base import BaseRecognizer + + +@RECOGNIZERS.register_module() +class Recognizer2D(BaseRecognizer): + """2D recognizer model framework.""" + def forward_train(self, imgs, labels, **kwargs): + """Defines the computation performed at every call when training.""" + + assert self.with_cls_head + batches = imgs.shape[0] + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + num_segs = imgs.shape[0] // batches + + losses = dict() + + x = self.extract_feat(imgs) + + if self.backbone_from in ['torchvision', 'timm']: + if len(x.shape) == 4 and (x.shape[2] > 1 or x.shape[3] > 1): + # apply adaptive avg pooling + x = nn.AdaptiveAvgPool2d(1)(x) + x = x.reshape((x.shape[0], -1)) + x = x.reshape(x.shape + (1, 1)) + + if self.with_neck: + x = [ + each.reshape((-1, num_segs) + each.shape[1:]).transpose( + 1, 2).contiguous() for each in x + ] + x, loss_aux = self.neck(x, labels.squeeze()) + x = x.squeeze(2) + num_segs = 1 + losses.update(loss_aux) + + cls_score = self.cls_head(x, num_segs) + gt_labels = labels.squeeze() + loss_cls = self.cls_head.loss(cls_score, gt_labels, **kwargs) + losses.update(loss_cls) + + return losses + + def _do_test(self, imgs): + """Defines the computation performed at every call when evaluation, + testing and gradcam.""" + batches = imgs.shape[0] + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + num_segs = imgs.shape[0] // batches + + x = self.extract_feat(imgs) + + if self.backbone_from in ['torchvision', 'timm']: + if len(x.shape) == 4 and (x.shape[2] > 1 or x.shape[3] > 1): + # apply adaptive avg pooling + x = nn.AdaptiveAvgPool2d(1)(x) + x = x.reshape((x.shape[0], -1)) + x = x.reshape(x.shape + (1, 1)) + + if self.with_neck: + x = [ + each.reshape((-1, num_segs) + each.shape[1:]).transpose( + 1, 2).contiguous() for each in x + ] + x, _ = self.neck(x) + x = x.squeeze(2) + num_segs = 1 + + if self.feature_extraction: + # perform spatial pooling + avg_pool = nn.AdaptiveAvgPool2d(1) + x = avg_pool(x) + # squeeze dimensions + x = x.reshape((batches, num_segs, -1)) + # temporal average pooling + x = x.mean(axis=1) + return x + + # When using `TSNHead` or `TPNHead`, shape is [batch_size, num_classes] + # When using `TSMHead`, shape is [batch_size * num_crops, num_classes] + # `num_crops` is calculated by: + # 1) `twice_sample` in `SampleFrames` + # 2) `num_sample_positions` in `DenseSampleFrames` + # 3) `ThreeCrop/TenCrop/MultiGroupCrop` in `test_pipeline` + # 4) `num_clips` in `SampleFrames` or its subclass if `clip_len != 1` + + # should have cls_head if not extracting features + cls_score = self.cls_head(x, num_segs) + + assert cls_score.size()[0] % batches == 0 + # calculate num_crops automatically + cls_score = self.average_clip(cls_score, + cls_score.size()[0] // batches) + return cls_score + + def _do_fcn_test(self, imgs): + # [N, num_crops * num_segs, C, H, W] -> + # [N * num_crops * num_segs, C, H, W] + batches = imgs.shape[0] + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + num_segs = self.test_cfg.get('num_segs', self.backbone.num_segments) + + if self.test_cfg.get('flip', False): + imgs = torch.flip(imgs, [-1]) + x = self.extract_feat(imgs) + + if self.with_neck: + x = [ + each.reshape((-1, num_segs) + each.shape[1:]).transpose( + 1, 2).contiguous() for each in x + ] + x, _ = self.neck(x) + else: + x = x.reshape((-1, num_segs) + x.shape[1:]).transpose( + 1, 2).contiguous() + + # When using `TSNHead` or `TPNHead`, shape is [batch_size, num_classes] + # When using `TSMHead`, shape is [batch_size * num_crops, num_classes] + # `num_crops` is calculated by: + # 1) `twice_sample` in `SampleFrames` + # 2) `num_sample_positions` in `DenseSampleFrames` + # 3) `ThreeCrop/TenCrop/MultiGroupCrop` in `test_pipeline` + # 4) `num_clips` in `SampleFrames` or its subclass if `clip_len != 1` + cls_score = self.cls_head(x, fcn_test=True) + + assert cls_score.size()[0] % batches == 0 + # calculate num_crops automatically + cls_score = self.average_clip(cls_score, + cls_score.size()[0] // batches) + return cls_score + + def forward_test(self, imgs): + """Defines the computation performed at every call when evaluation and + testing.""" + imgs = imgs.npu().type(torch.float16) + if self.test_cfg.get('fcn_test', False): + # If specified, spatially fully-convolutional testing is performed + assert not self.feature_extraction + assert self.with_cls_head + return self._do_fcn_test(imgs).cpu().numpy() + return self._do_test(imgs).cpu().numpy() + + def forward_dummy(self, imgs, softmax=False): + """Used for computing network FLOPs. + + See ``tools/analysis/get_flops.py``. + + Args: + imgs (torch.Tensor): Input images. + + Returns: + Tensor: Class score. + """ + assert self.with_cls_head + batches = imgs.shape[0] + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + num_segs = imgs.shape[0] // batches + + x = self.extract_feat(imgs) + if self.with_neck: + x = [ + each.reshape((-1, num_segs) + each.shape[1:]).transpose( + 1, 2).contiguous() for each in x + ] + x, _ = self.neck(x) + x = x.squeeze(2) + num_segs = 1 + + outs = self.cls_head(x, num_segs) + if softmax: + outs = nn.functional.softmax(outs) + return (outs, ) + + def forward_gradcam(self, imgs): + """Defines the computation performed at every call when using gradcam + utils.""" + assert self.with_cls_head + return self._do_test(imgs) diff --git a/PyTorch/contrib/cv/video/SlowFast/mmaction/models/recognizers/recognizer3d.py b/PyTorch/contrib/cv/video/SlowFast/mmaction/models/recognizers/recognizer3d.py index fe1a61ef2bad97c0c35ee9c97b7fd66373381585..9a1b70449dfdaf5b598586a98d91a1fbcdd057dc 100644 --- a/PyTorch/contrib/cv/video/SlowFast/mmaction/models/recognizers/recognizer3d.py +++ b/PyTorch/contrib/cv/video/SlowFast/mmaction/models/recognizers/recognizer3d.py @@ -1,133 +1,133 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import torch -from torch import nn - -from ..builder import RECOGNIZERS -from .base import BaseRecognizer - - -@RECOGNIZERS.register_module() -class Recognizer3D(BaseRecognizer): - """3D recognizer model framework.""" - def forward_train(self, imgs, labels, **kwargs): - """Defines the computation performed at every call when training.""" - - assert self.with_cls_head - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - losses = dict() - - x = self.extract_feat(imgs) - if self.with_neck: - x, loss_aux = self.neck(x, labels.squeeze()) - losses.update(loss_aux) - - cls_score = self.cls_head(x) - gt_labels = labels.squeeze() - loss_cls = self.cls_head.loss(cls_score, gt_labels, **kwargs) - losses.update(loss_cls) - - return losses - - def _do_test(self, imgs): - """Defines the computation performed at every call when evaluation, - testing and gradcam.""" - batches = imgs.shape[0] - num_segs = imgs.shape[1] - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - - if self.max_testing_views is not None: - total_views = imgs.shape[0] - assert num_segs == total_views, ( - 'max_testing_views is only compatible ' - 'with batch_size == 1') - view_ptr = 0 - feats = [] - while view_ptr < total_views: - batch_imgs = imgs[view_ptr:view_ptr + self.max_testing_views] - x = self.extract_feat(batch_imgs) - if self.with_neck: - x, _ = self.neck(x) - feats.append(x) - view_ptr += self.max_testing_views - # should consider the case that feat is a tuple - if isinstance(feats[0], tuple): - len_tuple = len(feats[0]) - feat = [ - torch.cat([x[i] for x in feats]) for i in range(len_tuple) - ] - feat = tuple(feat) - else: - feat = torch.cat(feats) - else: - feat = self.extract_feat(imgs) - if self.with_neck: - feat, _ = self.neck(feat) - - if self.feature_extraction: - # perform spatio-temporal pooling - avg_pool = nn.AdaptiveAvgPool3d(1) - if isinstance(feat, tuple): - feat = [avg_pool(x) for x in feat] - # concat them - feat = torch.cat(feat, axis=1) - else: - feat = avg_pool(feat) - # squeeze dimensions - feat = feat.reshape((batches, num_segs, -1)) - # temporal average pooling - feat = feat.mean(axis=1) - return feat - - # should have cls_head if not extracting features - assert self.with_cls_head - cls_score = self.cls_head(feat) - cls_score = self.average_clip(cls_score, num_segs) - return cls_score - - def forward_test(self, imgs): - """Defines the computation performed at every call when evaluation and - testing.""" - imgs = imgs.npu().type(torch.float16) - return self._do_test(imgs).cpu().numpy() - - def forward_dummy(self, imgs, softmax=False): - """Used for computing network FLOPs. - - See ``tools/analysis/get_flops.py``. - - Args: - imgs (torch.Tensor): Input images. - - Returns: - Tensor: Class score. - """ - assert self.with_cls_head - imgs = imgs.reshape((-1, ) + imgs.shape[2:]) - x = self.extract_feat(imgs) - - if self.with_neck: - x, _ = self.neck(x) - - outs = self.cls_head(x) - if softmax: - outs = nn.functional.softmax(outs) - return (outs, ) - - def forward_gradcam(self, imgs): - """Defines the computation performed at every call when using gradcam - utils.""" - assert self.with_cls_head - return self._do_test(imgs) +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import torch +from torch import nn + +from ..builder import RECOGNIZERS +from .base import BaseRecognizer + + +@RECOGNIZERS.register_module() +class Recognizer3D(BaseRecognizer): + """3D recognizer model framework.""" + def forward_train(self, imgs, labels, **kwargs): + """Defines the computation performed at every call when training.""" + + assert self.with_cls_head + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + losses = dict() + + x = self.extract_feat(imgs) + if self.with_neck: + x, loss_aux = self.neck(x, labels.squeeze()) + losses.update(loss_aux) + + cls_score = self.cls_head(x) + gt_labels = labels.squeeze() + loss_cls = self.cls_head.loss(cls_score, gt_labels, **kwargs) + losses.update(loss_cls) + + return losses + + def _do_test(self, imgs): + """Defines the computation performed at every call when evaluation, + testing and gradcam.""" + batches = imgs.shape[0] + num_segs = imgs.shape[1] + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + + if self.max_testing_views is not None: + total_views = imgs.shape[0] + assert num_segs == total_views, ( + 'max_testing_views is only compatible ' + 'with batch_size == 1') + view_ptr = 0 + feats = [] + while view_ptr < total_views: + batch_imgs = imgs[view_ptr:view_ptr + self.max_testing_views] + x = self.extract_feat(batch_imgs) + if self.with_neck: + x, _ = self.neck(x) + feats.append(x) + view_ptr += self.max_testing_views + # should consider the case that feat is a tuple + if isinstance(feats[0], tuple): + len_tuple = len(feats[0]) + feat = [ + torch.cat([x[i] for x in feats]) for i in range(len_tuple) + ] + feat = tuple(feat) + else: + feat = torch.cat(feats) + else: + feat = self.extract_feat(imgs) + if self.with_neck: + feat, _ = self.neck(feat) + + if self.feature_extraction: + # perform spatio-temporal pooling + avg_pool = nn.AdaptiveAvgPool3d(1) + if isinstance(feat, tuple): + feat = [avg_pool(x) for x in feat] + # concat them + feat = torch.cat(feat, axis=1) + else: + feat = avg_pool(feat) + # squeeze dimensions + feat = feat.reshape((batches, num_segs, -1)) + # temporal average pooling + feat = feat.mean(axis=1) + return feat + + # should have cls_head if not extracting features + assert self.with_cls_head + cls_score = self.cls_head(feat) + cls_score = self.average_clip(cls_score, num_segs) + return cls_score + + def forward_test(self, imgs): + """Defines the computation performed at every call when evaluation and + testing.""" + imgs = imgs.npu().type(torch.float16) + return self._do_test(imgs).cpu().numpy() + + def forward_dummy(self, imgs, softmax=False): + """Used for computing network FLOPs. + + See ``tools/analysis/get_flops.py``. + + Args: + imgs (torch.Tensor): Input images. + + Returns: + Tensor: Class score. + """ + assert self.with_cls_head + imgs = imgs.reshape((-1, ) + imgs.shape[2:]) + x = self.extract_feat(imgs) + + if self.with_neck: + x, _ = self.neck(x) + + outs = self.cls_head(x) + if softmax: + outs = nn.functional.softmax(outs) + return (outs, ) + + def forward_gradcam(self, imgs): + """Defines the computation performed at every call when using gradcam + utils.""" + assert self.with_cls_head + return self._do_test(imgs) diff --git a/PyTorch/contrib/cv/video/VideoPose3D/run.py b/PyTorch/contrib/cv/video/VideoPose3D/run.py index dea000f7c6f929fb8f9b126470e74efc5e56a671..c252af15642db549f167c2eb88d0b5b3f148170f 100644 --- a/PyTorch/contrib/cv/video/VideoPose3D/run.py +++ b/PyTorch/contrib/cv/video/VideoPose3D/run.py @@ -1,699 +1,699 @@ -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ - -# Copyright (c) 2018-present, Facebook, Inc. -# All rights reserved. -# -# This source code is licensed under the license found in the -# LICENSE file in the root directory of this source tree. - - -import numpy as np - -from common.arguments import parse_args -import torch - -import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim -import torch.distributed as dist -import torch.multiprocessing as mp -import os -import sys -import errno -import math -import logging - -from common.camera import * -from common.model import * -from common.loss import * -from common.generators import ChunkedGenerator, UnchunkedGenerator -from time import time -from common.utils import deterministic_random, fetch, run_evaluation -from common.structure import AverageMeter, time_format_convert, device_id_to_process_device_map - -from tensorboardX import SummaryWriter -from apex import amp -# from apex.optimizers import NpuFusedAdam - - -def main(): - args = parse_args() - # print(args) - - try: - # Create checkpoint directory if it does not exist - os.makedirs(args.checkpoint) - except OSError as e: - if e.errno != errno.EEXIST: - raise RuntimeError('Unable to create checkpoint directory:', args.checkpoint) - - if not os.path.exists(args.output): - os.makedirs(args.output) - print(f"args.output:{args.output}") - - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '27005' - - process_device_map = device_id_to_process_device_map(args.device_list) - - if args.device == 'npu': - ngpus_per_node = len(process_device_map) - else: - ngpus_per_node = args.num_gpus - - args.num_gpus = ngpus_per_node - args.world_size = args.world_size * ngpus_per_node - - # npu = int(os.environ['RANK_ID']) - mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) - - -def setup_logger(final_output_dir, rank, phase): - # time_str = time.strftime('%Y-%m-%d-%H-%M') - log_file = '{}_rank{}.log'.format(phase, rank) - final_log_file = os.path.join(final_output_dir, log_file) - head = '%(asctime)-15s %(message)s' - # logging.basicConfig(format=head) - logging.basicConfig(filename=str(final_log_file), - format=head) - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console = logging.StreamHandler() - logging.getLogger('').addHandler(console) - - return logger - - -def main_worker(gpu, ngpus_per_node, args): - process_device_map = device_id_to_process_device_map(args.device_list) - log_dir = args.output - logger = setup_logger(log_dir, gpu, 'train') - - # args.gpu = gpu - args.gpu = process_device_map[gpu] - # logger.info(f"args.gpu is {args.gpu}") - - args.rank = args.rank * ngpus_per_node + gpu - - # print(f'args.print_feq:{args.print_feq}') - if args.rank % ngpus_per_node == 0: - log_path = args.log - writer_dict = { - 'writer': SummaryWriter(logdir=log_path), - 'train_global_steps': 0, - 'valid_global_steps': 0, - } - - if args.device == 'npu': - print("args.rank:",args.rank) - dist.init_process_group(backend=args.dist_backend, # init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - else: - dist.init_process_group(backend='nccl', init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - - logger.info(f'Loading dataset for rank:{args.rank}...') - dataset_path = 'data/data_3d_' + args.dataset + '.npz' - if args.dataset == 'h36m': - from common.h36m_dataset import Human36mDataset - dataset = Human36mDataset(dataset_path) - elif args.dataset.startswith('humaneva'): - from common.humaneva_dataset import HumanEvaDataset - dataset = HumanEvaDataset(dataset_path) - elif args.dataset.startswith('custom'): - from common.custom_dataset import CustomDataset - dataset = CustomDataset('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz') - else: - raise KeyError('Invalid dataset') - - logger.info(f'Preparing data for rank:{args.rank}...') - for subject in dataset.subjects(): - for action in dataset[subject].keys(): - anim = dataset[subject][action] - - if 'positions' in anim: - positions_3d = [] - for cam in anim['cameras']: - pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation']) - pos_3d[:, 1:] -= pos_3d[:, :1] # Remove global offset, but keep trajectory in first position - positions_3d.append(pos_3d) - anim['positions_3d'] = positions_3d - - logger.info(f'Loading 2D detections for rank:{args.rank}...') - keypoints = np.load('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz', allow_pickle=True) - keypoints_metadata = keypoints['metadata'].item() - keypoints_symmetry = keypoints_metadata['keypoints_symmetry'] - kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1]) - joints_left, joints_right = list(dataset.skeleton().joints_left()), list(dataset.skeleton().joints_right()) - keypoints = keypoints['positions_2d'].item() - - for subject in dataset.subjects(): - assert subject in keypoints, 'Subject {} is missing from the 2D detections dataset'.format(subject) - for action in dataset[subject].keys(): - assert action in keypoints[subject], 'Action {} of subject {} is missing from the 2D detections dataset'.format(action, subject) - if 'positions_3d' not in dataset[subject][action]: - continue - - for cam_idx in range(len(keypoints[subject][action])): - - # We check for >= instead of == because some videos in H3.6M contain extra frames - mocap_length = dataset[subject][action]['positions_3d'][cam_idx].shape[0] - assert keypoints[subject][action][cam_idx].shape[0] >= mocap_length - - if keypoints[subject][action][cam_idx].shape[0] > mocap_length: - # Shorten sequence - keypoints[subject][action][cam_idx] = keypoints[subject][action][cam_idx][:mocap_length] - - assert len(keypoints[subject][action]) == len(dataset[subject][action]['positions_3d']) - - for subject in keypoints.keys(): - for action in keypoints[subject]: - for cam_idx, kps in enumerate(keypoints[subject][action]): - # Normalize camera frame - cam = dataset.cameras()[subject][cam_idx] - kps[..., :2] = normalize_screen_coordinates(kps[..., :2], w=cam['res_w'], h=cam['res_h']) - keypoints[subject][action][cam_idx] = kps - - subjects_train = args.subjects_train.split(',') - subjects_semi = [] if not args.subjects_unlabeled else args.subjects_unlabeled.split(',') - if not args.render: - subjects_test = args.subjects_test.split(',') - else: - subjects_test = [args.viz_subject] - - semi_supervised = len(subjects_semi) > 0 - if semi_supervised and not dataset.supports_semi_supervised(): - raise RuntimeError('Semi-supervised training is not implemented for this dataset') - - # moved fatch to utils.py - - action_filter = None if args.actions == '*' else args.actions.split(',') - if action_filter is not None: - print('Selected actions:', action_filter) - - cameras_valid, poses_valid, poses_valid_2d = fetch(subjects_test, keypoints=keypoints, dataset=dataset, args=args ,action_filter=action_filter) - - filter_widths = [int(x) for x in args.architecture.split(',')] - if not args.disable_optimizations and not args.dense and args.stride == 1: - # Use optimized model for single-frame predictions - model_pos_train = TemporalModelOptimized1f(poses_valid_2d[0].shape[-2], poses_valid_2d[0].shape[-1], dataset.skeleton().num_joints(), - filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels) - else: - # When incompatible settings are detected (stride > 1, dense filters, or disabled optimization) fall back to normal model - model_pos_train = TemporalModel(poses_valid_2d[0].shape[-2], poses_valid_2d[0].shape[-1], dataset.skeleton().num_joints(), - filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels, - dense=args.dense) - - model_pos = TemporalModel(poses_valid_2d[0].shape[-2], poses_valid_2d[0].shape[-1], dataset.skeleton().num_joints(), - filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels, - dense=args.dense) - - receptive_field = model_pos.receptive_field() - logger.info('INFO: Receptive field: {} frames'.format(receptive_field)) - pad = (receptive_field - 1) // 2 # Padding on each side - if args.causal: - logger.info('INFO: Using causal convolutions') - causal_shift = pad - else: - causal_shift = 0 - - model_params = 0 - for parameter in model_pos.parameters(): - model_params += parameter.numel() - print('INFO: Trainable parameter count:', model_params) - - assert args.gpu is not None, "Something wrong about args.gpu, it shouldn't be None." - - if not torch.npu.is_available(): - print("We only implemented for GPUs") - raise NotImplementedError - else: - loc = f'npu:{args.gpu}' - torch.npu.set_device(loc) - model_pos = model_pos.to(loc) - model_pos_train = model_pos_train.to(loc) - model_pos = torch.nn.parallel.DistributedDataParallel(model_pos, device_ids=[args.gpu], broadcast_buffers=False) - - - - if args.evaluate: - assert args.resume is '' - chk_filename = os.path.join(args.checkpoint, args.evaluate) - logger.info(f'Loading checkpoint {chk_filename}') - checkpoint = torch.load(chk_filename, map_location=lambda storage, loc: storage) - model_pos.load_state_dict(checkpoint['model_pos']) - model_traj = None - - - test_generator = UnchunkedGenerator(args, cameras_valid, poses_valid, poses_valid_2d, - pad=pad, causal_shift=causal_shift, augment=False, - kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right) - logger.info('INFO: Testing on {} frames'.format(test_generator.num_frames())) - - if not args.evaluate: - cameras_train, poses_train, poses_train_2d = fetch(subjects_train, keypoints=keypoints, dataset=dataset, args=args, action_filter=action_filter, subset=args.subset) - - lr = args.learning_rate - if args.rank % args.num_gpus == 0: - logger.info(f"inital learning rate is:{lr}") - if semi_supervised: - print("Not Implement semi_supervised version for DDP") - raise NotImplementedError - else: - optimizer = optim.Adam(model_pos_train.parameters(), lr=lr) #, amsgrad=True) - # optimizer = NpuFusedAdam(model_pos_train.parameters(), lr=lr) - print(f"Use Apex:{args.apex}") - print(f"Sampler:{args.sampler}") - if args.apex: - model_pos_train, optimizer = amp.initialize(model_pos_train, optimizer, opt_level="O1", loss_scale=128.0) #, combine_grad=True) - model_pos_train = torch.nn.parallel.DistributedDataParallel(model_pos_train, device_ids=[args.gpu], broadcast_buffers=False) - - lr_decay = args.lr_decay - - losses_3d_train = [] - losses_3d_train_eval = [] - losses_3d_valid = [] - - epoch = 0 - initial_momentum = 0.1 - final_momentum = 0.001 - - - train_generator = ChunkedGenerator(args, args.batch_size//args.stride, cameras_train, poses_train, poses_train_2d, args.stride, - pad=pad, causal_shift=causal_shift, shuffle=True, random_seed=args.random_seed, augment=args.data_augmentation, - kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right) - train_generator_eval = UnchunkedGenerator(args, cameras_train, poses_train, poses_train_2d, - pad=pad, causal_shift=causal_shift, augment=False) - print('INFO: Training on {} frames'.format(train_generator_eval.num_frames())) - if semi_supervised: - print("Not Implement semi_supervised version for DDP") - raise NotImplementedError - - if args.resume: - chk_filename = os.path.join(args.checkpoint, args.resume) - print("resuming the training...") - print('Loading checkpoint', chk_filename) - checkpoint = torch.load(chk_filename, map_location=loc) - epoch = checkpoint['epoch'] - model_pos_train.load_state_dict(checkpoint['model_pos']) - if 'optimizer' in checkpoint and checkpoint['optimizer'] is not None: - optimizer.load_state_dict(checkpoint['optimizer']) - # train_generator.set_random_state(checkpoint['random_state']) - else: - print('WARNING: this checkpoint does not contain an optimizer state. The optimizer will be reinitialized.') - if checkpoint['amp'] is not None: - amp.load_state_dict(checkpoint['amp']) - if args.rank % ngpus_per_node == 0: - if 'train_global_steps' in checkpoint and 'valid_global_steps' in checkpoint: - writer_dict['train_global_steps'] = checkpoint['train_global_steps'] - writer_dict['valid_global_steps'] = checkpoint['valid_global_steps'] - lr = checkpoint['lr'] - if semi_supervised: - print("Not Implement semi_supervised version for DDP") - raise NotImplementedError - # model_traj_train.load_state_dict(checkpoint['model_traj']) - # model_traj.load_state_dict(checkpoint['model_traj']) - # semi_generator.set_random_state(checkpoint['random_state_semi']) - - logger.info('** Note: reported losses are averaged over all frames and test-time augmentation is not used here.') - logger.info('** The final evaluation will be carried out after the last training epoch.') - - myend = time() - mytime = AverageMeter() - best_valid = 50.0 - prof_flag = args.prof - while epoch < args.epochs: - start_time = time() - epoch_loss = AverageMeter() - epoch_loss_val = AverageMeter() - train_generator.set_epoch(epoch) - epoch_loss_3d_train = 0 - # epoch_loss_traj_train = 0 - # epoch_loss_2d_train_unlabeled = 0 - epoch_fps = AverageMeter() - N = 0 - N_semi = 0 - model_pos_train.train() - if semi_supervised: - print("Not Implement semi_supervised version for DDP") - raise NotImplementedError - else: - # Regular supervised scenario - count = 0 - for _, batch_3d, batch_2d in train_generator.next_epoch(): - if count >= 2: - my_epoch_start = time() - if batch_2d.shape[0] == 0: - continue - # print(f"batch_3d.shape:{batch_3d.shape} for rank:{args.rank}") - bz = batch_2d.shape[0] - assert batch_3d.shape[0] == bz - inputs_3d = torch.from_numpy(batch_3d.astype('float32')) - inputs_2d = torch.from_numpy(batch_2d.astype('float32')) - if torch.npu.is_available(): - inputs_3d = inputs_3d.to(loc, non_blocking=False) - inputs_2d = inputs_2d.to(loc, non_blocking=False) - inputs_3d[:, :, 0] = 0 - - if prof_flag and count==10 and args.rank==0: - with torch.autograd.profiler.profile(use_npu=True) as prof: - optimizer.zero_grad() - - # Predict 3D poses - predicted_3d_pos = model_pos_train(inputs_2d) - loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d) - - loss_total = loss_3d_pos - if args.apex: - with amp.scale_loss(loss_total, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss_total.backward() - - optimizer.step() - print(prof.key_averages().table(sort_by='self_cpu_time_total')) - prof.export_chrome_trace(os.path.join(args.checkpoint,'out.prof')) - prof_flag = False - print(f"prof has been saved as {os.path.join(args.checkpoint,'out.prof')}") - else: - optimizer.zero_grad() - - # Predict 3D poses - predicted_3d_pos = model_pos_train(inputs_2d) - loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d) - - loss_total = loss_3d_pos - if args.apex: - with amp.scale_loss(loss_total, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss_total.backward() - - optimizer.step() - - dist.all_reduce(loss_total) - loss_total = loss_total / ngpus_per_node - epoch_loss.update(loss_total.item(), bz) - - epoch_loss_3d_train += inputs_3d.shape[0]*inputs_3d.shape[1] * loss_total.item() - N += inputs_3d.shape[0]*inputs_3d.shape[1] - - if count >= 2: - batch_time = time()-my_epoch_start - fps = bz * ngpus_per_node / batch_time - epoch_fps.update(fps) - if args.rank % ngpus_per_node == 0: - writer = writer_dict['writer'] - train_step = writer_dict['train_global_steps'] - writer.add_scalar('total_loss',epoch_loss.avg,train_step) - writer_dict['train_global_steps'] = train_step + 1 - - - if count % args.print_freq == 0 and args.rank % ngpus_per_node == 0: - logger.info("({batch}/{size})| loss:{loss.val:.5f} ({loss.avg:.5f})| FPS:{fps.val:.3f} ({fps.avg:.3f})".format( - batch=count, size=math.ceil(train_generator.num_frames()/(args.batch_size*ngpus_per_node)), loss=epoch_loss, - fps=epoch_fps - )) - count +=1 - if args.rank % ngpus_per_node == 0: - writer.add_scalar('loss_3d/train', epoch_loss_3d_train / N, epoch) - - losses_3d_train.append(epoch_loss_3d_train / N) - - - # End-of-epoch evaluation - if args.rank == 0 and not args.no_eval: - print("End of epoch evaluation start ....") - with torch.no_grad(): - model_pos.load_state_dict(model_pos_train.state_dict()) - model_pos.eval() - if semi_supervised: - print("Not Implement semi_supervised version for DDP") - raise NotImplementedError - # model_traj.load_state_dict(model_traj_train.state_dict()) - # model_traj.eval() - - epoch_loss_3d_valid = 0 - epoch_loss_traj_valid = 0 - epoch_loss_2d_valid = 0 - N = 0 - - if not args.no_eval: - # Evaluate on test set - for cam, batch, batch_2d in test_generator.next_epoch(): - inputs_3d = torch.from_numpy(batch.astype('float32')) - inputs_2d = torch.from_numpy(batch_2d.astype('float32')) - if torch.npu.is_available(): - inputs_3d = inputs_3d.to(loc, non_blocking=False) - inputs_2d = inputs_2d.to(loc, non_blocking=False) - inputs_traj = inputs_3d[:, :, :1].clone() - inputs_3d[:, :, 0] = 0 - - # Predict 3D poses - predicted_3d_pos = model_pos(inputs_2d) - loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d) - - bz = inputs_2d.shape[0] - assert bz == inputs_3d.shape[0] - - dist.all_reduce(loss_3d_pos) - loss_3d_pos = loss_3d_pos / ngpus_per_node - - epoch_loss_val.update(loss_3d_pos, bz) - - epoch_loss_3d_valid += inputs_3d.shape[0]*inputs_3d.shape[1] * loss_3d_pos.item() - N += inputs_3d.shape[0]*inputs_3d.shape[1] - - if args.rank % ngpus_per_node == 0: - val_step = writer_dict['valid_global_steps'] - writer.add_scalar("val_loss",epoch_loss_val.avg, val_step) - writer_dict['valid_global_steps'] = val_step + 1 - if semi_supervised: - print("Not Implement semi_supervised version for DDP") - raise NotImplementedError - if args.rank % ngpus_per_node == 0: - writer.add_scalar("loss_3d/valid", epoch_loss_3d_valid / N, epoch) - print("out of end-of-epoch evaluation loop.") - losses_3d_valid.append(epoch_loss_3d_valid / N) - if semi_supervised: - print("Not Implement semi_supervised version for DDP") - raise NotImplementedError - # losses_traj_valid.append(epoch_loss_traj_valid / N) - # losses_2d_valid.append(epoch_loss_2d_valid / N) - - - # Evaluate on training set, this time in evaluation mode - epoch_loss_3d_train_eval = 0 - # epoch_loss_traj_train_eval = 0 - # epoch_loss_2d_train_labeled_eval = 0 - N = 0 - for cam, batch, batch_2d in train_generator_eval.next_epoch(): - if batch_2d.shape[1] == 0: - # This can only happen when downsampling the dataset - continue - - inputs_3d = torch.from_numpy(batch.astype('float32')) - inputs_2d = torch.from_numpy(batch_2d.astype('float32')) - if torch.npu.is_available(): - inputs_3d = inputs_3d.npu() - inputs_2d = inputs_2d.npu() - inputs_traj = inputs_3d[:, :, :1].clone() - inputs_3d[:, :, 0] = 0 - - # Compute 3D poses - predicted_3d_pos = model_pos(inputs_2d) - loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d) - - dist.all_reduce(loss_3d_pos) - loss_3d_pos = loss_3d_pos / ngpus_per_node - epoch_loss_3d_train_eval += inputs_3d.shape[0]*inputs_3d.shape[1] * loss_3d_pos.item() - N += inputs_3d.shape[0]*inputs_3d.shape[1] - - if semi_supervised: - print("Not Implement semi_supervised version for DDP") - raise NotImplementedError - - if args.rank % ngpus_per_node == 0: - writer.add_scalar('loss_3d/train_eval', epoch_loss_3d_train_eval / N, epoch) - losses_3d_train_eval.append(epoch_loss_3d_train_eval / N) - if semi_supervised: - print("Not Implement semi_supervised version for DDP") - raise NotImplementedError - # losses_traj_train_eval.append(epoch_loss_traj_train_eval / N) - # losses_2d_train_labeled_eval.append(epoch_loss_2d_train_labeled_eval / N) - - # Evaluate 2D loss on unlabeled training set (in evaluation mode) - epoch_loss_2d_train_unlabeled_eval = 0 - N_semi = 0 - if semi_supervised: - print("Not Implement semi_supervised version for DDP") - raise NotImplementedError - - elapsed = (time() - start_time)/60 - - if args.rank % ngpus_per_node == 0: - if args.no_eval: - logger.info('[%d] time %.2f lr %f 3d_train %f FPS %d' % ( - epoch + 1, - elapsed, - lr, - losses_3d_train[-1] * 1000, - int(epoch_fps.avg))) - else: - if semi_supervised: - print("Not Implement semi_supervised version for DDP") - raise NotImplementedError - else: - logger.info('[%d] time %.2f lr %f 3d_train %f 3d_eval %f 3d_valid %f FPS %d' % ( - epoch + 1, - elapsed, - lr, - losses_3d_train[-1] * 1000, - losses_3d_train_eval[-1] * 1000, - losses_3d_valid[-1] *1000, - int(epoch_fps.avg)) - ) - - # Decay learning rate exponentially - lr *= lr_decay - for param_group in optimizer.param_groups: - param_group['lr'] *= lr_decay - epoch += 1 - - # Decay BatchNorm momentum - momentum = initial_momentum * np.exp(-epoch/args.epochs * np.log(initial_momentum/final_momentum)) - model_pos_train.module.set_bn_momentum(momentum) - if semi_supervised: - print("Not Implement semi_supervised version for DDP") - raise NotImplementedError - # model_traj_train.set_bn_momentum(momentum) - - # Save best valid - if args.no_eval: - valid = 0 - else: - valid = losses_3d_valid[-1] *1000 - if args.rank % ngpus_per_node == 0 and valid < best_valid: - best_valid = valid - bst_path = os.path.join(args.checkpoint, 'model_best.bin') - logger.info(f'Saving best model up to epoch:{epoch} to {bst_path}') - torch.save({ - 'model_pos':model_pos_train.state_dict() - }, bst_path) - - # Save checkpoint if necessary - if epoch % args.checkpoint_frequency == 0 and args.rank % ngpus_per_node == 0: - chk_path = os.path.join(args.checkpoint, 'epoch_{}.bin'.format(epoch)) - logger.info(f'Saving checkpoint to {chk_path}') - - torch.save({ - 'epoch': epoch, - 'lr': lr, - # 'random_state': train_generator.random_state(), - 'optimizer': optimizer.state_dict(), - 'model_pos': model_pos_train.state_dict(), - # 'model_traj': None, # model_traj_train.state_dict() if semi_supervised else None, - 'amp': amp.state_dict() if args.apex else None, - 'random_state_semi': None, #semi_generator.random_state() if semi_supervised else None, - 'train_global_steps': writer_dict['train_global_steps'], - 'valid_global_steps': writer_dict['valid_global_steps'] - }, chk_path) - - - # Save training curves after every epoch, as .png images (if requested) - if args.export_training_curves and epoch > 3 and args.rank % ngpus_per_node == 0: - if 'matplotlib' not in sys.modules: - import matplotlib - matplotlib.use('Agg') - import matplotlib.pyplot as plt - - plt.figure() - epoch_x = np.arange(3, len(losses_3d_train)) + 1 - plt.plot(epoch_x, losses_3d_train[3:], '--', color='C0') - plt.plot(epoch_x, losses_3d_train_eval[3:], color='C0') - plt.plot(epoch_x, losses_3d_valid[3:], color='C1') - plt.legend(['3d train', '3d train (eval)', '3d valid (eval)']) - plt.ylabel('MPJPE (m)') - plt.xlabel('Epoch') - plt.xlim((3, epoch)) - plt.savefig(os.path.join(args.checkpoint, 'loss_3d.png')) - - if semi_supervised: - print("Not Implement semi_supervised version for DDP") - raise NotImplementedError - plt.close('all') - - mytime.update(time()-myend) - myend = time() - if args.rank % ngpus_per_node == 0: - print(f"In average, it takes {time_format_convert(mytime.avg)} per epoch.") - print(f"Time has elapsed {time_format_convert(mytime.sum)}") - print(f"It still need {time_format_convert(mytime.avg*(args.epochs-epoch))}") - print("****************************************************************************") - if args.rank % ngpus_per_node == 0: - writer_dict['writer'].close() - # Evaluate - if args.evaluate: - logger.info('Evaluating...') - # chk_filename = os.path.join(args.checkpoint, 'model_best.bin') - # if (not args.evaluate) and (os.path.exists(chk_filename)): - # checkpoint = torch.load(chk_filename, map_location='cpu') - # model_pos.load_state_dict(checkpoint['model_pos']) - all_actions = {} - all_actions_by_subject = {} - for subject in subjects_test: - if subject not in all_actions_by_subject: - all_actions_by_subject[subject] = {} - - for action in dataset[subject].keys(): - action_name = action.split(' ')[0] - if action_name not in all_actions: - all_actions[action_name] = [] - if action_name not in all_actions_by_subject[subject]: - all_actions_by_subject[subject][action_name] = [] - all_actions[action_name].append((subject, action)) - all_actions_by_subject[subject][action_name].append((subject, action)) - - if not args.by_subject: - run_evaluation(args, all_actions, model_pos, None, keypoints, dataset, pad, causal_shift, kps_left, kps_right, joints_left, joints_right, action_filter) - else: - for subject in all_actions_by_subject.keys(): - if args.rank % ngpus_per_node == 0: - print('Evaluating on subject', subject) - run_evaluation(args, all_actions_by_subject[subject], model_pos, None, keypoints, dataset, pad, causal_shift, kps_left, kps_right, joints_left, joints_right, action_filter) - if args.rank % ngpus_per_node == 0: - print('') - dist.destroy_process_group() - -if __name__ == "__main__": - main() +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ + +# Copyright (c) 2018-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + + +import numpy as np + +from common.arguments import parse_args +import torch + +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torch.distributed as dist +import torch.multiprocessing as mp +import os +import sys +import errno +import math +import logging + +from common.camera import * +from common.model import * +from common.loss import * +from common.generators import ChunkedGenerator, UnchunkedGenerator +from time import time +from common.utils import deterministic_random, fetch, run_evaluation +from common.structure import AverageMeter, time_format_convert, device_id_to_process_device_map + +from tensorboardX import SummaryWriter +from apex import amp +# from apex.optimizers import NpuFusedAdam + + +def main(): + args = parse_args() + # print(args) + + try: + # Create checkpoint directory if it does not exist + os.makedirs(args.checkpoint) + except OSError as e: + if e.errno != errno.EEXIST: + raise RuntimeError('Unable to create checkpoint directory:', args.checkpoint) + + if not os.path.exists(args.output): + os.makedirs(args.output) + print(f"args.output:{args.output}") + + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = '27005' + + process_device_map = device_id_to_process_device_map(args.device_list) + + if args.device == 'npu': + ngpus_per_node = len(process_device_map) + else: + ngpus_per_node = args.num_gpus + + args.num_gpus = ngpus_per_node + args.world_size = args.world_size * ngpus_per_node + + # npu = int(os.environ['RANK_ID']) + mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) + + +def setup_logger(final_output_dir, rank, phase): + # time_str = time.strftime('%Y-%m-%d-%H-%M') + log_file = '{}_rank{}.log'.format(phase, rank) + final_log_file = os.path.join(final_output_dir, log_file) + head = '%(asctime)-15s %(message)s' + # logging.basicConfig(format=head) + logging.basicConfig(filename=str(final_log_file), + format=head) + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console = logging.StreamHandler() + logging.getLogger('').addHandler(console) + + return logger + + +def main_worker(gpu, ngpus_per_node, args): + process_device_map = device_id_to_process_device_map(args.device_list) + log_dir = args.output + logger = setup_logger(log_dir, gpu, 'train') + + # args.gpu = gpu + args.gpu = process_device_map[gpu] + # logger.info(f"args.gpu is {args.gpu}") + + args.rank = args.rank * ngpus_per_node + gpu + + # print(f'args.print_feq:{args.print_feq}') + if args.rank % ngpus_per_node == 0: + log_path = args.log + writer_dict = { + 'writer': SummaryWriter(logdir=log_path), + 'train_global_steps': 0, + 'valid_global_steps': 0, + } + + if args.device == 'npu': + print("args.rank:",args.rank) + dist.init_process_group(backend=args.dist_backend, # init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + else: + dist.init_process_group(backend='nccl', init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + + logger.info(f'Loading dataset for rank:{args.rank}...') + dataset_path = 'data/data_3d_' + args.dataset + '.npz' + if args.dataset == 'h36m': + from common.h36m_dataset import Human36mDataset + dataset = Human36mDataset(dataset_path) + elif args.dataset.startswith('humaneva'): + from common.humaneva_dataset import HumanEvaDataset + dataset = HumanEvaDataset(dataset_path) + elif args.dataset.startswith('custom'): + from common.custom_dataset import CustomDataset + dataset = CustomDataset('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz') + else: + raise KeyError('Invalid dataset') + + logger.info(f'Preparing data for rank:{args.rank}...') + for subject in dataset.subjects(): + for action in dataset[subject].keys(): + anim = dataset[subject][action] + + if 'positions' in anim: + positions_3d = [] + for cam in anim['cameras']: + pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation']) + pos_3d[:, 1:] -= pos_3d[:, :1] # Remove global offset, but keep trajectory in first position + positions_3d.append(pos_3d) + anim['positions_3d'] = positions_3d + + logger.info(f'Loading 2D detections for rank:{args.rank}...') + keypoints = np.load('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz', allow_pickle=True) + keypoints_metadata = keypoints['metadata'].item() + keypoints_symmetry = keypoints_metadata['keypoints_symmetry'] + kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1]) + joints_left, joints_right = list(dataset.skeleton().joints_left()), list(dataset.skeleton().joints_right()) + keypoints = keypoints['positions_2d'].item() + + for subject in dataset.subjects(): + assert subject in keypoints, 'Subject {} is missing from the 2D detections dataset'.format(subject) + for action in dataset[subject].keys(): + assert action in keypoints[subject], 'Action {} of subject {} is missing from the 2D detections dataset'.format(action, subject) + if 'positions_3d' not in dataset[subject][action]: + continue + + for cam_idx in range(len(keypoints[subject][action])): + + # We check for >= instead of == because some videos in H3.6M contain extra frames + mocap_length = dataset[subject][action]['positions_3d'][cam_idx].shape[0] + assert keypoints[subject][action][cam_idx].shape[0] >= mocap_length + + if keypoints[subject][action][cam_idx].shape[0] > mocap_length: + # Shorten sequence + keypoints[subject][action][cam_idx] = keypoints[subject][action][cam_idx][:mocap_length] + + assert len(keypoints[subject][action]) == len(dataset[subject][action]['positions_3d']) + + for subject in keypoints.keys(): + for action in keypoints[subject]: + for cam_idx, kps in enumerate(keypoints[subject][action]): + # Normalize camera frame + cam = dataset.cameras()[subject][cam_idx] + kps[..., :2] = normalize_screen_coordinates(kps[..., :2], w=cam['res_w'], h=cam['res_h']) + keypoints[subject][action][cam_idx] = kps + + subjects_train = args.subjects_train.split(',') + subjects_semi = [] if not args.subjects_unlabeled else args.subjects_unlabeled.split(',') + if not args.render: + subjects_test = args.subjects_test.split(',') + else: + subjects_test = [args.viz_subject] + + semi_supervised = len(subjects_semi) > 0 + if semi_supervised and not dataset.supports_semi_supervised(): + raise RuntimeError('Semi-supervised training is not implemented for this dataset') + + # moved fatch to utils.py + + action_filter = None if args.actions == '*' else args.actions.split(',') + if action_filter is not None: + print('Selected actions:', action_filter) + + cameras_valid, poses_valid, poses_valid_2d = fetch(subjects_test, keypoints=keypoints, dataset=dataset, args=args ,action_filter=action_filter) + + filter_widths = [int(x) for x in args.architecture.split(',')] + if not args.disable_optimizations and not args.dense and args.stride == 1: + # Use optimized model for single-frame predictions + model_pos_train = TemporalModelOptimized1f(poses_valid_2d[0].shape[-2], poses_valid_2d[0].shape[-1], dataset.skeleton().num_joints(), + filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels) + else: + # When incompatible settings are detected (stride > 1, dense filters, or disabled optimization) fall back to normal model + model_pos_train = TemporalModel(poses_valid_2d[0].shape[-2], poses_valid_2d[0].shape[-1], dataset.skeleton().num_joints(), + filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels, + dense=args.dense) + + model_pos = TemporalModel(poses_valid_2d[0].shape[-2], poses_valid_2d[0].shape[-1], dataset.skeleton().num_joints(), + filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels, + dense=args.dense) + + receptive_field = model_pos.receptive_field() + logger.info('INFO: Receptive field: {} frames'.format(receptive_field)) + pad = (receptive_field - 1) // 2 # Padding on each side + if args.causal: + logger.info('INFO: Using causal convolutions') + causal_shift = pad + else: + causal_shift = 0 + + model_params = 0 + for parameter in model_pos.parameters(): + model_params += parameter.numel() + print('INFO: Trainable parameter count:', model_params) + + assert args.gpu is not None, "Something wrong about args.gpu, it shouldn't be None." + + if not torch.npu.is_available(): + print("We only implemented for GPUs") + raise NotImplementedError + else: + loc = f'npu:{args.gpu}' + torch.npu.set_device(loc) + model_pos = model_pos.to(loc) + model_pos_train = model_pos_train.to(loc) + model_pos = torch.nn.parallel.DistributedDataParallel(model_pos, device_ids=[args.gpu], broadcast_buffers=False) + + + + if args.evaluate: + assert args.resume is '' + chk_filename = os.path.join(args.checkpoint, args.evaluate) + logger.info(f'Loading checkpoint {chk_filename}') + checkpoint = torch.load(chk_filename, map_location=lambda storage, loc: storage) + model_pos.load_state_dict(checkpoint['model_pos']) + model_traj = None + + + test_generator = UnchunkedGenerator(args, cameras_valid, poses_valid, poses_valid_2d, + pad=pad, causal_shift=causal_shift, augment=False, + kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right) + logger.info('INFO: Testing on {} frames'.format(test_generator.num_frames())) + + if not args.evaluate: + cameras_train, poses_train, poses_train_2d = fetch(subjects_train, keypoints=keypoints, dataset=dataset, args=args, action_filter=action_filter, subset=args.subset) + + lr = args.learning_rate + if args.rank % args.num_gpus == 0: + logger.info(f"inital learning rate is:{lr}") + if semi_supervised: + print("Not Implement semi_supervised version for DDP") + raise NotImplementedError + else: + optimizer = optim.Adam(model_pos_train.parameters(), lr=lr) #, amsgrad=True) + # optimizer = NpuFusedAdam(model_pos_train.parameters(), lr=lr) + print(f"Use Apex:{args.apex}") + print(f"Sampler:{args.sampler}") + if args.apex: + model_pos_train, optimizer = amp.initialize(model_pos_train, optimizer, opt_level="O1", loss_scale=128.0) #, combine_grad=True) + model_pos_train = torch.nn.parallel.DistributedDataParallel(model_pos_train, device_ids=[args.gpu], broadcast_buffers=False) + + lr_decay = args.lr_decay + + losses_3d_train = [] + losses_3d_train_eval = [] + losses_3d_valid = [] + + epoch = 0 + initial_momentum = 0.1 + final_momentum = 0.001 + + + train_generator = ChunkedGenerator(args, args.batch_size//args.stride, cameras_train, poses_train, poses_train_2d, args.stride, + pad=pad, causal_shift=causal_shift, shuffle=True, random_seed=args.random_seed, augment=args.data_augmentation, + kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right) + train_generator_eval = UnchunkedGenerator(args, cameras_train, poses_train, poses_train_2d, + pad=pad, causal_shift=causal_shift, augment=False) + print('INFO: Training on {} frames'.format(train_generator_eval.num_frames())) + if semi_supervised: + print("Not Implement semi_supervised version for DDP") + raise NotImplementedError + + if args.resume: + chk_filename = os.path.join(args.checkpoint, args.resume) + print("resuming the training...") + print('Loading checkpoint', chk_filename) + checkpoint = torch.load(chk_filename, map_location=loc) + epoch = checkpoint['epoch'] + model_pos_train.load_state_dict(checkpoint['model_pos']) + if 'optimizer' in checkpoint and checkpoint['optimizer'] is not None: + optimizer.load_state_dict(checkpoint['optimizer']) + # train_generator.set_random_state(checkpoint['random_state']) + else: + print('WARNING: this checkpoint does not contain an optimizer state. The optimizer will be reinitialized.') + if checkpoint['amp'] is not None: + amp.load_state_dict(checkpoint['amp']) + if args.rank % ngpus_per_node == 0: + if 'train_global_steps' in checkpoint and 'valid_global_steps' in checkpoint: + writer_dict['train_global_steps'] = checkpoint['train_global_steps'] + writer_dict['valid_global_steps'] = checkpoint['valid_global_steps'] + lr = checkpoint['lr'] + if semi_supervised: + print("Not Implement semi_supervised version for DDP") + raise NotImplementedError + # model_traj_train.load_state_dict(checkpoint['model_traj']) + # model_traj.load_state_dict(checkpoint['model_traj']) + # semi_generator.set_random_state(checkpoint['random_state_semi']) + + logger.info('** Note: reported losses are averaged over all frames and test-time augmentation is not used here.') + logger.info('** The final evaluation will be carried out after the last training epoch.') + + myend = time() + mytime = AverageMeter() + best_valid = 50.0 + prof_flag = args.prof + while epoch < args.epochs: + start_time = time() + epoch_loss = AverageMeter() + epoch_loss_val = AverageMeter() + train_generator.set_epoch(epoch) + epoch_loss_3d_train = 0 + # epoch_loss_traj_train = 0 + # epoch_loss_2d_train_unlabeled = 0 + epoch_fps = AverageMeter() + N = 0 + N_semi = 0 + model_pos_train.train() + if semi_supervised: + print("Not Implement semi_supervised version for DDP") + raise NotImplementedError + else: + # Regular supervised scenario + count = 0 + for _, batch_3d, batch_2d in train_generator.next_epoch(): + if count >= 2: + my_epoch_start = time() + if batch_2d.shape[0] == 0: + continue + # print(f"batch_3d.shape:{batch_3d.shape} for rank:{args.rank}") + bz = batch_2d.shape[0] + assert batch_3d.shape[0] == bz + inputs_3d = torch.from_numpy(batch_3d.astype('float32')) + inputs_2d = torch.from_numpy(batch_2d.astype('float32')) + if torch.npu.is_available(): + inputs_3d = inputs_3d.to(loc, non_blocking=False) + inputs_2d = inputs_2d.to(loc, non_blocking=False) + inputs_3d[:, :, 0] = 0 + + if prof_flag and count==10 and args.rank==0: + with torch.autograd.profiler.profile(use_npu=True) as prof: + optimizer.zero_grad() + + # Predict 3D poses + predicted_3d_pos = model_pos_train(inputs_2d) + loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d) + + loss_total = loss_3d_pos + if args.apex: + with amp.scale_loss(loss_total, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss_total.backward() + + optimizer.step() + print(prof.key_averages().table(sort_by='self_cpu_time_total')) + prof.export_chrome_trace(os.path.join(args.checkpoint,'out.prof')) + prof_flag = False + print(f"prof has been saved as {os.path.join(args.checkpoint,'out.prof')}") + else: + optimizer.zero_grad() + + # Predict 3D poses + predicted_3d_pos = model_pos_train(inputs_2d) + loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d) + + loss_total = loss_3d_pos + if args.apex: + with amp.scale_loss(loss_total, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss_total.backward() + + optimizer.step() + + dist.all_reduce(loss_total) + loss_total = loss_total / ngpus_per_node + epoch_loss.update(loss_total.item(), bz) + + epoch_loss_3d_train += inputs_3d.shape[0]*inputs_3d.shape[1] * loss_total.item() + N += inputs_3d.shape[0]*inputs_3d.shape[1] + + if count >= 2: + batch_time = time()-my_epoch_start + fps = bz * ngpus_per_node / batch_time + epoch_fps.update(fps) + if args.rank % ngpus_per_node == 0: + writer = writer_dict['writer'] + train_step = writer_dict['train_global_steps'] + writer.add_scalar('total_loss',epoch_loss.avg,train_step) + writer_dict['train_global_steps'] = train_step + 1 + + + if count % args.print_freq == 0 and args.rank % ngpus_per_node == 0: + logger.info("({batch}/{size})| loss:{loss.val:.5f} ({loss.avg:.5f})| FPS:{fps.val:.3f} ({fps.avg:.3f})".format( + batch=count, size=math.ceil(train_generator.num_frames()/(args.batch_size*ngpus_per_node)), loss=epoch_loss, + fps=epoch_fps + )) + count +=1 + if args.rank % ngpus_per_node == 0: + writer.add_scalar('loss_3d/train', epoch_loss_3d_train / N, epoch) + + losses_3d_train.append(epoch_loss_3d_train / N) + + + # End-of-epoch evaluation + if args.rank == 0 and not args.no_eval: + print("End of epoch evaluation start ....") + with torch.no_grad(): + model_pos.load_state_dict(model_pos_train.state_dict()) + model_pos.eval() + if semi_supervised: + print("Not Implement semi_supervised version for DDP") + raise NotImplementedError + # model_traj.load_state_dict(model_traj_train.state_dict()) + # model_traj.eval() + + epoch_loss_3d_valid = 0 + epoch_loss_traj_valid = 0 + epoch_loss_2d_valid = 0 + N = 0 + + if not args.no_eval: + # Evaluate on test set + for cam, batch, batch_2d in test_generator.next_epoch(): + inputs_3d = torch.from_numpy(batch.astype('float32')) + inputs_2d = torch.from_numpy(batch_2d.astype('float32')) + if torch.npu.is_available(): + inputs_3d = inputs_3d.to(loc, non_blocking=False) + inputs_2d = inputs_2d.to(loc, non_blocking=False) + inputs_traj = inputs_3d[:, :, :1].clone() + inputs_3d[:, :, 0] = 0 + + # Predict 3D poses + predicted_3d_pos = model_pos(inputs_2d) + loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d) + + bz = inputs_2d.shape[0] + assert bz == inputs_3d.shape[0] + + dist.all_reduce(loss_3d_pos) + loss_3d_pos = loss_3d_pos / ngpus_per_node + + epoch_loss_val.update(loss_3d_pos, bz) + + epoch_loss_3d_valid += inputs_3d.shape[0]*inputs_3d.shape[1] * loss_3d_pos.item() + N += inputs_3d.shape[0]*inputs_3d.shape[1] + + if args.rank % ngpus_per_node == 0: + val_step = writer_dict['valid_global_steps'] + writer.add_scalar("val_loss",epoch_loss_val.avg, val_step) + writer_dict['valid_global_steps'] = val_step + 1 + if semi_supervised: + print("Not Implement semi_supervised version for DDP") + raise NotImplementedError + if args.rank % ngpus_per_node == 0: + writer.add_scalar("loss_3d/valid", epoch_loss_3d_valid / N, epoch) + print("out of end-of-epoch evaluation loop.") + losses_3d_valid.append(epoch_loss_3d_valid / N) + if semi_supervised: + print("Not Implement semi_supervised version for DDP") + raise NotImplementedError + # losses_traj_valid.append(epoch_loss_traj_valid / N) + # losses_2d_valid.append(epoch_loss_2d_valid / N) + + + # Evaluate on training set, this time in evaluation mode + epoch_loss_3d_train_eval = 0 + # epoch_loss_traj_train_eval = 0 + # epoch_loss_2d_train_labeled_eval = 0 + N = 0 + for cam, batch, batch_2d in train_generator_eval.next_epoch(): + if batch_2d.shape[1] == 0: + # This can only happen when downsampling the dataset + continue + + inputs_3d = torch.from_numpy(batch.astype('float32')) + inputs_2d = torch.from_numpy(batch_2d.astype('float32')) + if torch.npu.is_available(): + inputs_3d = inputs_3d.npu() + inputs_2d = inputs_2d.npu() + inputs_traj = inputs_3d[:, :, :1].clone() + inputs_3d[:, :, 0] = 0 + + # Compute 3D poses + predicted_3d_pos = model_pos(inputs_2d) + loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d) + + dist.all_reduce(loss_3d_pos) + loss_3d_pos = loss_3d_pos / ngpus_per_node + epoch_loss_3d_train_eval += inputs_3d.shape[0]*inputs_3d.shape[1] * loss_3d_pos.item() + N += inputs_3d.shape[0]*inputs_3d.shape[1] + + if semi_supervised: + print("Not Implement semi_supervised version for DDP") + raise NotImplementedError + + if args.rank % ngpus_per_node == 0: + writer.add_scalar('loss_3d/train_eval', epoch_loss_3d_train_eval / N, epoch) + losses_3d_train_eval.append(epoch_loss_3d_train_eval / N) + if semi_supervised: + print("Not Implement semi_supervised version for DDP") + raise NotImplementedError + # losses_traj_train_eval.append(epoch_loss_traj_train_eval / N) + # losses_2d_train_labeled_eval.append(epoch_loss_2d_train_labeled_eval / N) + + # Evaluate 2D loss on unlabeled training set (in evaluation mode) + epoch_loss_2d_train_unlabeled_eval = 0 + N_semi = 0 + if semi_supervised: + print("Not Implement semi_supervised version for DDP") + raise NotImplementedError + + elapsed = (time() - start_time)/60 + + if args.rank % ngpus_per_node == 0: + if args.no_eval: + logger.info('[%d] time %.2f lr %f 3d_train %f FPS %d' % ( + epoch + 1, + elapsed, + lr, + losses_3d_train[-1] * 1000, + int(epoch_fps.avg))) + else: + if semi_supervised: + print("Not Implement semi_supervised version for DDP") + raise NotImplementedError + else: + logger.info('[%d] time %.2f lr %f 3d_train %f 3d_eval %f 3d_valid %f FPS %d' % ( + epoch + 1, + elapsed, + lr, + losses_3d_train[-1] * 1000, + losses_3d_train_eval[-1] * 1000, + losses_3d_valid[-1] *1000, + int(epoch_fps.avg)) + ) + + # Decay learning rate exponentially + lr *= lr_decay + for param_group in optimizer.param_groups: + param_group['lr'] *= lr_decay + epoch += 1 + + # Decay BatchNorm momentum + momentum = initial_momentum * np.exp(-epoch/args.epochs * np.log(initial_momentum/final_momentum)) + model_pos_train.module.set_bn_momentum(momentum) + if semi_supervised: + print("Not Implement semi_supervised version for DDP") + raise NotImplementedError + # model_traj_train.set_bn_momentum(momentum) + + # Save best valid + if args.no_eval: + valid = 0 + else: + valid = losses_3d_valid[-1] *1000 + if args.rank % ngpus_per_node == 0 and valid < best_valid: + best_valid = valid + bst_path = os.path.join(args.checkpoint, 'model_best.bin') + logger.info(f'Saving best model up to epoch:{epoch} to {bst_path}') + torch.save({ + 'model_pos':model_pos_train.state_dict() + }, bst_path) + + # Save checkpoint if necessary + if epoch % args.checkpoint_frequency == 0 and args.rank % ngpus_per_node == 0: + chk_path = os.path.join(args.checkpoint, 'epoch_{}.bin'.format(epoch)) + logger.info(f'Saving checkpoint to {chk_path}') + + torch.save({ + 'epoch': epoch, + 'lr': lr, + # 'random_state': train_generator.random_state(), + 'optimizer': optimizer.state_dict(), + 'model_pos': model_pos_train.state_dict(), + # 'model_traj': None, # model_traj_train.state_dict() if semi_supervised else None, + 'amp': amp.state_dict() if args.apex else None, + 'random_state_semi': None, #semi_generator.random_state() if semi_supervised else None, + 'train_global_steps': writer_dict['train_global_steps'], + 'valid_global_steps': writer_dict['valid_global_steps'] + }, chk_path) + + + # Save training curves after every epoch, as .png images (if requested) + if args.export_training_curves and epoch > 3 and args.rank % ngpus_per_node == 0: + if 'matplotlib' not in sys.modules: + import matplotlib + matplotlib.use('Agg') + import matplotlib.pyplot as plt + + plt.figure() + epoch_x = np.arange(3, len(losses_3d_train)) + 1 + plt.plot(epoch_x, losses_3d_train[3:], '--', color='C0') + plt.plot(epoch_x, losses_3d_train_eval[3:], color='C0') + plt.plot(epoch_x, losses_3d_valid[3:], color='C1') + plt.legend(['3d train', '3d train (eval)', '3d valid (eval)']) + plt.ylabel('MPJPE (m)') + plt.xlabel('Epoch') + plt.xlim((3, epoch)) + plt.savefig(os.path.join(args.checkpoint, 'loss_3d.png')) + + if semi_supervised: + print("Not Implement semi_supervised version for DDP") + raise NotImplementedError + plt.close('all') + + mytime.update(time()-myend) + myend = time() + if args.rank % ngpus_per_node == 0: + print(f"In average, it takes {time_format_convert(mytime.avg)} per epoch.") + print(f"Time has elapsed {time_format_convert(mytime.sum)}") + print(f"It still need {time_format_convert(mytime.avg*(args.epochs-epoch))}") + print("****************************************************************************") + if args.rank % ngpus_per_node == 0: + writer_dict['writer'].close() + # Evaluate + if args.evaluate: + logger.info('Evaluating...') + # chk_filename = os.path.join(args.checkpoint, 'model_best.bin') + # if (not args.evaluate) and (os.path.exists(chk_filename)): + # checkpoint = torch.load(chk_filename, map_location='cpu') + # model_pos.load_state_dict(checkpoint['model_pos']) + all_actions = {} + all_actions_by_subject = {} + for subject in subjects_test: + if subject not in all_actions_by_subject: + all_actions_by_subject[subject] = {} + + for action in dataset[subject].keys(): + action_name = action.split(' ')[0] + if action_name not in all_actions: + all_actions[action_name] = [] + if action_name not in all_actions_by_subject[subject]: + all_actions_by_subject[subject][action_name] = [] + all_actions[action_name].append((subject, action)) + all_actions_by_subject[subject][action_name].append((subject, action)) + + if not args.by_subject: + run_evaluation(args, all_actions, model_pos, None, keypoints, dataset, pad, causal_shift, kps_left, kps_right, joints_left, joints_right, action_filter) + else: + for subject in all_actions_by_subject.keys(): + if args.rank % ngpus_per_node == 0: + print('Evaluating on subject', subject) + run_evaluation(args, all_actions_by_subject[subject], model_pos, None, keypoints, dataset, pad, causal_shift, kps_left, kps_right, joints_left, joints_right, action_filter) + if args.rank % ngpus_per_node == 0: + print('') + dist.destroy_process_group() + +if __name__ == "__main__": + main() diff --git a/PyTorch/contrib/cv/video/VideoPose3D/test/eval_full_8p.sh b/PyTorch/contrib/cv/video/VideoPose3D/test/eval_full_8p.sh index bd38f9fdc596b024a9e50ac3e572a6172d991f40..5c6235ba8cc8e175e26a20b31e75aa0d1506f815 100644 --- a/PyTorch/contrib/cv/video/VideoPose3D/test/eval_full_8p.sh +++ b/PyTorch/contrib/cv/video/VideoPose3D/test/eval_full_8p.sh @@ -1,100 +1,100 @@ -#!/bin/sh - -source /usr/local/Ascend/ascend-toolkit/set_env.sh -export HCCL_WHITELIST_DISABLE=1 -export ASCEND_SLOG_PRINT_TO_STDOUT=1 -export ASCEND_GLOBAL_LOG_LEVEL=3 -export ASCEND_GLOBAL_EVENT_ENABLE=0 -export PTCOPY_ENABLE=1 - -################ Basic Training Settings ################## -# "Must Have" Settings: Network batch_size RANK_SIZE -# Network Name, the same with dir -Network="VideoPose3D" -# training batch_size per GPU -batch_size=8192 -# num of NPUs -export RANK_SIZE=8 -# train epochs -# train_epochs=80 - -############# Specify Training Directory ############# -cur_path=`pwd` -cur_path_last_diename=${cur_path##*/} -if [ x"${cur_path_last_diename}" == x"test" ];then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi - -############# Create Log output directory ############## -ASCEND_DEVICE_ID=0 -if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -fi - -#################### Start Training ################# -# start time, no modification needed -start_time=$(date +%s) - -python run.py \ - -k cpn_ft_h36m_dbb \ - -arc 3,3,3,3,3 \ - -c checkpoint/8p_lr2.2e-3 \ - -o test/output/8p_lr2.2e-3_eval \ - --evaluate model_best.bin & -wait - -################# Gain Training Data #################### -# end training time, no modification needed -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -# print results, no modification needed -echo "------------------ Final result ------------------" -# output FPS -FPS=`grep -a 'FPS' ${test_path_dir}/output/8p_lr2.2e-3/train_rank0.log|awk -F " " '{print $15}'|awk 'END {print}'` -# print -echo "Final Performance images/sec : $FPS" - -# train-accuracy -acc=`grep -a 'Protocol #1' ${test_path_dir}/output/8p_lr2.2e-3_eval/train_rank0.log|awk 'END {print}'|awk -F " " '{print $7}'` -# print -echo "Final Train Accuracy (mm) : ${acc:8:5}" -echo "E2E Eval Duration sec : $e2e_time" -train_accuracy=${acc:8:5} - -# Performance Summary -# Train-related information, no modification needed -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -## Acquire performance data -# Throughput -ActualFPS=${FPS} -# time of single loop -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -# Extract loss to train_${CaseName}_loss.txt -grep -a 'FPS' ${test_path_dir}/output/8p_lr2.2e-3/train_rank0.log|awk -F " " '{print $3,$4,$5}'|awk -F "loss:" '{print $NF}'|awk -F "time" '{print $1}'|awk -F "(" '{print $NF}'|awk -F ")" '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -# loss from the last loop -ActualLoss=`awk -F: '{if($1!="[80] ")print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt|awk 'END {print}'` - -# Key information print -echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/sh + +source /usr/local/Ascend/ascend-toolkit/set_env.sh +export HCCL_WHITELIST_DISABLE=1 +export ASCEND_SLOG_PRINT_TO_STDOUT=1 +export ASCEND_GLOBAL_LOG_LEVEL=3 +export ASCEND_GLOBAL_EVENT_ENABLE=0 +export PTCOPY_ENABLE=1 + +################ Basic Training Settings ################## +# "Must Have" Settings: Network batch_size RANK_SIZE +# Network Name, the same with dir +Network="VideoPose3D" +# training batch_size per GPU +batch_size=8192 +# num of NPUs +export RANK_SIZE=8 +# train epochs +# train_epochs=80 + +############# Specify Training Directory ############# +cur_path=`pwd` +cur_path_last_diename=${cur_path##*/} +if [ x"${cur_path_last_diename}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +############# Create Log output directory ############## +ASCEND_DEVICE_ID=0 +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +#################### Start Training ################# +# start time, no modification needed +start_time=$(date +%s) + +python run.py \ + -k cpn_ft_h36m_dbb \ + -arc 3,3,3,3,3 \ + -c checkpoint/8p_lr2.2e-3 \ + -o test/output/8p_lr2.2e-3_eval \ + --evaluate model_best.bin & +wait + +################# Gain Training Data #################### +# end training time, no modification needed +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +# print results, no modification needed +echo "------------------ Final result ------------------" +# output FPS +FPS=`grep -a 'FPS' ${test_path_dir}/output/8p_lr2.2e-3/train_rank0.log|awk -F " " '{print $15}'|awk 'END {print}'` +# print +echo "Final Performance images/sec : $FPS" + +# train-accuracy +acc=`grep -a 'Protocol #1' ${test_path_dir}/output/8p_lr2.2e-3_eval/train_rank0.log|awk 'END {print}'|awk -F " " '{print $7}'` +# print +echo "Final Train Accuracy (mm) : ${acc:8:5}" +echo "E2E Eval Duration sec : $e2e_time" +train_accuracy=${acc:8:5} + +# Performance Summary +# Train-related information, no modification needed +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +## Acquire performance data +# Throughput +ActualFPS=${FPS} +# time of single loop +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +# Extract loss to train_${CaseName}_loss.txt +grep -a 'FPS' ${test_path_dir}/output/8p_lr2.2e-3/train_rank0.log|awk -F " " '{print $3,$4,$5}'|awk -F "loss:" '{print $NF}'|awk -F "time" '{print $1}'|awk -F "(" '{print $NF}'|awk -F ")" '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +# loss from the last loop +ActualLoss=`awk -F: '{if($1!="[80] ")print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt|awk 'END {print}'` + +# Key information print +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log # echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/contrib/cv/video/X3D/README.md b/PyTorch/contrib/cv/video/X3D/README.md index 6c0dcffc318ef886e076dc3d806a8c15c413dba0..8e3949ec65bac43d667c40d0abc6c83b4da3de5d 100644 --- a/PyTorch/contrib/cv/video/X3D/README.md +++ b/PyTorch/contrib/cv/video/X3D/README.md @@ -1,212 +1,212 @@ -# X3D-S - -Implements training of X3D-S on the Kinetics-400 dataset - -## Detail - -Most of codes are modified according to [here](https://gitee.com/ascend/modelzoo/wikis/Pytorch%E8%AE%AD%E7%BB%83%E6%8C%87%E5%AF%BC?sort_id=4208869#21-%E8%BF%81%E7%A7%BB%E6%B5%81%E7%A8%8B%E8%AF%B4%E6%98%8E) - -There are some special modification of [source repository](https://github.com/facebookresearch/SlowFast) : - -##### NPU & GPU - -- Add some customized yaml configuration items, such as APEX.ENABLE、DIST_BACKEND... -- Ascend-Pytorch-1.5 is not supported `torch.nn.init.trunc_normal` , using `torch.nn.init.normal_` instead -- Adjusted the order of dependency import to prevent some unknown bugs (`scikit-learn`) - -##### NPU - -- Group conv3D of Ascend-Pytorch is not supported, so we canceled all group operations in the model -- Remove some irrelevant codes to prevent some unknown bugs (`Segmentation fault (core dumped)`) - - -## Requirements - -##### Base Environment - -- Python == 3.7.5 -- GCC >= 4.9 - -##### Python Environment - -1. Installing these error-prone dependencies first: - -- PyTorch (raw==1.5 or ascend) - - Ascend-Pytorch Version after August 24 be installed -- torchvision == 0.6.0 - - If on Centos arm, please build the source code from [here](https://gitee.com/azureology/torchvision/tree/v0.6.0/) -- PyAV - - If the installation fails on Centos arm, following this [issue](https://gitee.com/ascend/modelzoo/issues/I48AP3?from=project-issue) -- Detectron2 - - According to the CUDA version and Pytorch version, build from [source code](https://github.com/facebookresearch/detectron2) - -2. Then, you can use `pip3 install -r requirements.txt` to install some simple dependencies - - - -3. Building source code - -```shell -cd X3D # into source code root - -# Switch to your prepared environment - -python3.7 setup.py build develop # build slowfast and install the remaining dependencies -``` - - - -##### Modify Ascend-tookit - -```shell -cd /usr/local -find / -name fractal_z_3d_2_ncdhw.py -vi path_to_fractal_z_3d_2_ncdhw.py - -located method: - 1. def fractal_z_3d_2_ncdhw(src, dst, src_format, dst_format,kernel_name="fractal_z_3d_2_ncdhw") - 2. modify it according this picture: - 2.1. remove `if list(dst_shape) in ....` - 2.2. Align the next two lines like this -``` - -![image-20210909203603647](meta\bug-opt.png) - - - -##### Dataset - -- Download the Kinetics-400 dataset from [here](https://github.com/PaddlePaddle/PaddleVideo/blob/develop/docs/zh-CN/dataset/k400.md) - - 1. unzip the all packages and merge all folders - - 2. we get two sets , train set (used to train) and val set (used to test). And each of both has 400 folders - - ```markdown - # format of data folder - - |-data - |-train - |- video type 1 - |- video 1 - |- video 2 - ... - |- video type 2 - ... - |- video type 400 - |-val - |- video type 1 - |- video type 2 - ... - |- video type 400 - ``` - - - - 3. build train.csv, val.csv, test.csv, and put them in the same folder - - ```markdown - # format of data path folder - |- data_path - |- train.csv - |- val.csv - |- test.csv - ``` - - train.csv consists of train set - - val.csv is same as test.csv, and consists of test set - - ```markdown - # format of data path csv is: - - path_to_video_1 label_1 - path_to_video_2 label_2 - ... - path_to_video_N label_N - ``` - - 4. check if the all videos are lossless according to the scripts provided by project [mmaction2](https://github.com/open-mmlab/mmaction2) . Here, we provide the [list](mytest\Vinput\error_video) of corrupted videos that have been checked out -5. remove the those corrupted videos from the three csv - -## Training - -To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset: - -> Note:the `real_data_path` is path of csv folder mentioned above - -```bash -# training 1p (300 epoch) -bash ./test/train_full_1p.sh --data_path=real_data_path - -# training 8p (300 epoch) -bash ./test/train_full_8p.sh --data_path=real_data_path - -# training performance 1p (1 epoch) -bash ./test/train_performance_1p.sh --data_path=real_data_path - -# training performance 8p (3 epoch) -bash ./test/train_performance_8p.sh --data_path=real_data_path - -# testing 8p -bash test/train_eval_8p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path - -# train_finetune_1p.sh -bash test/train_finetune_1p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path --num_classes=default_400 -``` - -> Log path: ./stdout.log - - - -## Training Result - -> Due to the calculation cast a long time, we choose to run the full data, and the NPU-ACC is aligned with the GPU-ACC (as many epochs as possible). - - -| Device | FPS | Top1-ACC 10-view | Batch Size | Epochs | AMP | -| :-------------: | :-----: | :--------------: | :--------: | :----: | :------: | -| 1P-GPU | 10.39 | 6.67% | 96 | 1/300 | O2-128.0 | -| 1P-NPU | 5.38 | 6.18% | 96 | 1/300 | O2-128.0 | -| 1P-NPU-白名单 | 5.35 | 6.36% | 96 | 1/300 | O2-128.0 | -| 8P-GPU | 1137.49 | 37.56% | 256 | 30/300 | O2-128.0 | -| 8P-NPU | 529.24 | 39.67% | 256 | 30/300 | O2-128.0 | -| 8P-NPU-fusedSGD | 510.66 | 5.80% | 256 | 2/300 | O2-128.0 | - - - -- Testing result: Top1-ACC of 8P-NPU and 8P-GPU training (30 epochs) - -![img](meta\8P-GPU & 8P-NPU.png) - -## Performance Optimization - -According to the above, it can be concluded that the accuracy(Top1-ACC 10-view) of 8P-GPU and 8P-NPU is little different. But performance(FPS) of 8P-NPU is 50% of 8P-GPU's. - -So we made the following analysis and improvement: - -- find the dynamic operators following [here](https://gitee.com/wangjiangben_hw/ascend-pytorch-crowdintelligence-doc/blob/master/pytorch-train-guide/%E6%A8%A1%E5%9E%8B%E7%AE%97%E5%AD%90%E6%8F%90%E5%8F%96%E6%8C%87%E5%8D%97.md), but the operators is very basic, and we can not identify them from our big model. - -![img](meta\dynamic_ops.png) - -- check the profile of NPU through chrome tracing - - image-20210913190836215 - -- In order to improve the low perfomance of Transpose, we first generate the `cann profiling` following [here](https://gitee.com/wangjiangben_hw/ascend-pytorch-crowdintelligence-doc/blob/master/pytorch-train-guide/CANNProfiling%E5%B7%A5%E5%85%B7%E4%BD%BF%E7%94%A8%E6%8C%87%E5%AF%BC%E4%B9%A6.md), then we extract the two operators, TransposeD and TransData. - - if TransposeD `Consuming time > 10s`, add its Input Shapes to White List (/usr/local/Ascend/ascend-toolkit/5.0.2/x86_64-linux/opp/op_impl/built-in/ai_core/tbe/impl/dynamic/transpose.py) - - if TransData `Consuming time > 10s & Input Formats == 'NCHW' & Output Formats == 'NC1HWC0'`, add its Input Shapes to White List (/usr/local/Ascend/ascend-toolkit/5.0.2/x86_64-linux/opp/op_impl/built-in/ai_core/tbe/impl/four_2_five.py) - - if TransData `Consuming time > 10s & Input Formats == 'NC1HWC0' & Output Formats == 'NCHW'`, add its Input Shapes to White List (/usr/local/Ascend/ascend-toolkit/5.0.2/x86_64-linux/opp/op_impl/built-in/ai_core/tbe/impl/five_2_four.py) - -**After Optimization** - -![image-20210918103240921](meta\profile-2.png) - -## ELSE - -Iessues and PRs about this project - -- invalid gradient https://gitee.com/ascend/modelzoo/issues/I452ZB https://gitee.com/ascend/pytorch-develop/pulls/2438 -- optimizer error https://gitee.com/ascend/pytorch-develop/pulls/2438 -- pyav install on CentOS arm https://gitee.com/ascend/modelzoo/issues/I48AP3 -- scikit-learn cannot allocate memory in static TLS https://gitee.com/ascend/modelzoo/issues/I48QNY +# X3D-S + +Implements training of X3D-S on the Kinetics-400 dataset + +## Detail + +Most of codes are modified according to [here](https://gitee.com/ascend/modelzoo/wikis/Pytorch%E8%AE%AD%E7%BB%83%E6%8C%87%E5%AF%BC?sort_id=4208869#21-%E8%BF%81%E7%A7%BB%E6%B5%81%E7%A8%8B%E8%AF%B4%E6%98%8E) + +There are some special modification of [source repository](https://github.com/facebookresearch/SlowFast) : + +##### NPU & GPU + +- Add some customized yaml configuration items, such as APEX.ENABLE、DIST_BACKEND... +- Ascend-Pytorch-1.5 is not supported `torch.nn.init.trunc_normal` , using `torch.nn.init.normal_` instead +- Adjusted the order of dependency import to prevent some unknown bugs (`scikit-learn`) + +##### NPU + +- Group conv3D of Ascend-Pytorch is not supported, so we canceled all group operations in the model +- Remove some irrelevant codes to prevent some unknown bugs (`Segmentation fault (core dumped)`) + + +## Requirements + +##### Base Environment + +- Python == 3.7.5 +- GCC >= 4.9 + +##### Python Environment + +1. Installing these error-prone dependencies first: + +- PyTorch (raw==1.5 or ascend) + - Ascend-Pytorch Version after August 24 be installed +- torchvision == 0.6.0 + - If on Centos arm, please build the source code from [here](https://gitee.com/azureology/torchvision/tree/v0.6.0/) +- PyAV + - If the installation fails on Centos arm, following this [issue](https://gitee.com/ascend/modelzoo/issues/I48AP3?from=project-issue) +- Detectron2 + - According to the CUDA version and Pytorch version, build from [source code](https://github.com/facebookresearch/detectron2) + +2. Then, you can use `pip3 install -r requirements.txt` to install some simple dependencies + + + +3. Building source code + +```shell +cd X3D # into source code root + +# Switch to your prepared environment + +python3.7 setup.py build develop # build slowfast and install the remaining dependencies +``` + + + +##### Modify Ascend-tookit + +```shell +cd /usr/local +find / -name fractal_z_3d_2_ncdhw.py +vi path_to_fractal_z_3d_2_ncdhw.py + +located method: + 1. def fractal_z_3d_2_ncdhw(src, dst, src_format, dst_format,kernel_name="fractal_z_3d_2_ncdhw") + 2. modify it according this picture: + 2.1. remove `if list(dst_shape) in ....` + 2.2. Align the next two lines like this +``` + +![image-20210909203603647](meta\bug-opt.png) + + + +##### Dataset + +- Download the Kinetics-400 dataset from [here](https://github.com/PaddlePaddle/PaddleVideo/blob/develop/docs/zh-CN/dataset/k400.md) + + 1. unzip the all packages and merge all folders + + 2. we get two sets , train set (used to train) and val set (used to test). And each of both has 400 folders + + ```markdown + # format of data folder + + |-data + |-train + |- video type 1 + |- video 1 + |- video 2 + ... + |- video type 2 + ... + |- video type 400 + |-val + |- video type 1 + |- video type 2 + ... + |- video type 400 + ``` + + + + 3. build train.csv, val.csv, test.csv, and put them in the same folder + + ```markdown + # format of data path folder + |- data_path + |- train.csv + |- val.csv + |- test.csv + ``` + + train.csv consists of train set + + val.csv is same as test.csv, and consists of test set + + ```markdown + # format of data path csv is: + + path_to_video_1 label_1 + path_to_video_2 label_2 + ... + path_to_video_N label_N + ``` + + 4. check if the all videos are lossless according to the scripts provided by project [mmaction2](https://github.com/open-mmlab/mmaction2) . Here, we provide the [list](mytest\Vinput\error_video) of corrupted videos that have been checked out +5. remove the those corrupted videos from the three csv + +## Training + +To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset: + +> Note:the `real_data_path` is path of csv folder mentioned above + +```bash +# training 1p (300 epoch) +bash ./test/train_full_1p.sh --data_path=real_data_path + +# training 8p (300 epoch) +bash ./test/train_full_8p.sh --data_path=real_data_path + +# training performance 1p (1 epoch) +bash ./test/train_performance_1p.sh --data_path=real_data_path + +# training performance 8p (3 epoch) +bash ./test/train_performance_8p.sh --data_path=real_data_path + +# testing 8p +bash test/train_eval_8p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path + +# train_finetune_1p.sh +bash test/train_finetune_1p.sh --data_path=real_data_path --pth_path=real_pre_train_model_path --num_classes=default_400 +``` + +> Log path: ./stdout.log + + + +## Training Result + +> Due to the calculation cast a long time, we choose to run the full data, and the NPU-ACC is aligned with the GPU-ACC (as many epochs as possible). + + +| Device | FPS | Top1-ACC 10-view | Batch Size | Epochs | AMP | +| :-------------: | :-----: | :--------------: | :--------: | :----: | :------: | +| 1P-GPU | 10.39 | 6.67% | 96 | 1/300 | O2-128.0 | +| 1P-NPU | 5.38 | 6.18% | 96 | 1/300 | O2-128.0 | +| 1P-NPU-白名单 | 5.35 | 6.36% | 96 | 1/300 | O2-128.0 | +| 8P-GPU | 1137.49 | 37.56% | 256 | 30/300 | O2-128.0 | +| 8P-NPU | 529.24 | 39.67% | 256 | 30/300 | O2-128.0 | +| 8P-NPU-fusedSGD | 510.66 | 5.80% | 256 | 2/300 | O2-128.0 | + + + +- Testing result: Top1-ACC of 8P-NPU and 8P-GPU training (30 epochs) + +![img](meta\8P-GPU & 8P-NPU.png) + +## Performance Optimization + +According to the above, it can be concluded that the accuracy(Top1-ACC 10-view) of 8P-GPU and 8P-NPU is little different. But performance(FPS) of 8P-NPU is 50% of 8P-GPU's. + +So we made the following analysis and improvement: + +- find the dynamic operators following [here](https://gitee.com/wangjiangben_hw/ascend-pytorch-crowdintelligence-doc/blob/master/pytorch-train-guide/%E6%A8%A1%E5%9E%8B%E7%AE%97%E5%AD%90%E6%8F%90%E5%8F%96%E6%8C%87%E5%8D%97.md), but the operators is very basic, and we can not identify them from our big model. + +![img](meta\dynamic_ops.png) + +- check the profile of NPU through chrome tracing + + image-20210913190836215 + +- In order to improve the low perfomance of Transpose, we first generate the `cann profiling` following [here](https://gitee.com/wangjiangben_hw/ascend-pytorch-crowdintelligence-doc/blob/master/pytorch-train-guide/CANNProfiling%E5%B7%A5%E5%85%B7%E4%BD%BF%E7%94%A8%E6%8C%87%E5%AF%BC%E4%B9%A6.md), then we extract the two operators, TransposeD and TransData. + - if TransposeD `Consuming time > 10s`, add its Input Shapes to White List (/usr/local/Ascend/ascend-toolkit/5.0.2/x86_64-linux/opp/op_impl/built-in/ai_core/tbe/impl/dynamic/transpose.py) + - if TransData `Consuming time > 10s & Input Formats == 'NCHW' & Output Formats == 'NC1HWC0'`, add its Input Shapes to White List (/usr/local/Ascend/ascend-toolkit/5.0.2/x86_64-linux/opp/op_impl/built-in/ai_core/tbe/impl/four_2_five.py) + - if TransData `Consuming time > 10s & Input Formats == 'NC1HWC0' & Output Formats == 'NCHW'`, add its Input Shapes to White List (/usr/local/Ascend/ascend-toolkit/5.0.2/x86_64-linux/opp/op_impl/built-in/ai_core/tbe/impl/five_2_four.py) + +**After Optimization** + +![image-20210918103240921](meta\profile-2.png) + +## ELSE + +Iessues and PRs about this project + +- invalid gradient https://gitee.com/ascend/modelzoo/issues/I452ZB https://gitee.com/ascend/pytorch-develop/pulls/2438 +- optimizer error https://gitee.com/ascend/pytorch-develop/pulls/2438 +- pyav install on CentOS arm https://gitee.com/ascend/modelzoo/issues/I48AP3 +- scikit-learn cannot allocate memory in static TLS https://gitee.com/ascend/modelzoo/issues/I48QNY diff --git a/PyTorch/contrib/cv/video/X3D/slowfast/utils/ava_evaluation/__init__.py b/PyTorch/contrib/cv/video/X3D/slowfast/utils/ava_evaluation/__init__.py index 919b0579077e1ca4dc63cedb7ae3a1a3e0134283..6228b7132697d24157a4052193061e9913f031c4 100644 --- a/PyTorch/contrib/cv/video/X3D/slowfast/utils/ava_evaluation/__init__.py +++ b/PyTorch/contrib/cv/video/X3D/slowfast/utils/ava_evaluation/__init__.py @@ -1,14 +1,14 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/PyTorch/contrib/nlp/Bertsum_for_PyTorch/modelzoo_level.txt b/PyTorch/contrib/nlp/Bertsum_for_PyTorch/modelzoo_level.txt index 0ea38b69c9f5b7594cdb9bec28a2b3a74bf82420..ceafe5b115be8b8b1cc908ab00c3d274b684821e 100644 --- a/PyTorch/contrib/nlp/Bertsum_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/contrib/nlp/Bertsum_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:PERFECT:NOK +FuncStatus:OK +PerfStatus:PERFECT:NOK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/nlp/TextCNN/LICENSE b/PyTorch/contrib/nlp/TextCNN/LICENSE index 185404d5515c393add9ecfbdd7cd83596e8a4b26..5b4cf39445b7b24f2e5d38062c3b9cca89ad8a90 100644 --- a/PyTorch/contrib/nlp/TextCNN/LICENSE +++ b/PyTorch/contrib/nlp/TextCNN/LICENSE @@ -1,204 +1,204 @@ -Copyright 2018-2019 Open-MMLab. All rights reserved. -Copyright 2021 Huawei Technologies Co., Ltd - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018-2019 Open-MMLab. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Copyright 2018-2019 Open-MMLab. All rights reserved. +Copyright 2021 Huawei Technologies Co., Ltd + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2019 Open-MMLab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/PyTorch/contrib/nlp/TextCNN/modelzoo_level.txt b/PyTorch/contrib/nlp/TextCNN/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/contrib/nlp/TextCNN/modelzoo_level.txt +++ b/PyTorch/contrib/nlp/TextCNN/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/nlp/tinybert/demo.py b/PyTorch/contrib/nlp/tinybert/demo.py index 2ce50832af94bbf34b70676a500afb33bb607200..3311302206a623f4e3b29a302fab8e30b3d06f40 100644 --- a/PyTorch/contrib/nlp/tinybert/demo.py +++ b/PyTorch/contrib/nlp/tinybert/demo.py @@ -1,89 +1,89 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""TinyBERT finetuning runner specifically for SST-2 dataset.""" - -################## import libraries ################## - -#standard libraries -from __future__ import absolute_import, division, print_function -import argparse -import random - -#third-party libraries -import numpy as np -import torch - -#local libraries -from transformer.modeling import TinyBertForSequenceClassification - -################## end import libraries ################## - -def random_int_list(start, stop, length): - start, stop = (int(start), int(stop)) if start <= stop else (int(stop), int(start)) - length = int(abs(length)) if length else 0 - random_list = [] - for i in range(length): - random_list.append(random.randint(start, stop)) - return random_list - -def is_same(a,b,i): - result = (a == b).mean() - if result == 1: - print("step {} = step {}: {}".format(i-1,i,'True')) - else: - print("step {} = step {}: {}".format(i - 1, i, 'False')) - -def main(): - - ################## set args ################## - parser = argparse.ArgumentParser() - - # 1.file and model - parser.add_argument("--model", - default=None, - type=str, - help="The model dir.") - parser.add_argument("--max_seq_length", - default=64, - type=int, - help="The maximum total input sequence length after WordPiece tokenization. \n" - "Sequences longer than this will be truncated, and sequences shorter \n" - "than this will be padded.") - args = parser.parse_args() - - # create model - model = TinyBertForSequenceClassification.from_pretrained(args.model, num_labels=2) - model.eval() - # test - input_ids = torch.tensor(random_int_list(0,9999,args.max_seq_length), dtype=torch.long).view(1,-1) - print(input_ids) - segment_ids = torch.tensor(random_int_list(0,1,args.max_seq_length), dtype=torch.long).view(1,-1) - input_mask = torch.tensor(random_int_list(0,1,args.max_seq_length), dtype=torch.long).view(1,-1) - repeat_time = 20 - for i in range(1,repeat_time+1): - logits, _, _ = model(input_ids, segment_ids, input_mask) - logits = logits.squeeze() - print("step {}, logits = {}".format(i,logits)) - if i == 1: - a = logits - elif i == 2: - b = logits - is_same(a.detach().numpy(),b.detach().numpy(),i) - else: - a = b - b = logits - is_same(a.detach().numpy(),b.detach().numpy(),i) - -if __name__ == "__main__": - main() +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""TinyBERT finetuning runner specifically for SST-2 dataset.""" + +################## import libraries ################## + +#standard libraries +from __future__ import absolute_import, division, print_function +import argparse +import random + +#third-party libraries +import numpy as np +import torch + +#local libraries +from transformer.modeling import TinyBertForSequenceClassification + +################## end import libraries ################## + +def random_int_list(start, stop, length): + start, stop = (int(start), int(stop)) if start <= stop else (int(stop), int(start)) + length = int(abs(length)) if length else 0 + random_list = [] + for i in range(length): + random_list.append(random.randint(start, stop)) + return random_list + +def is_same(a,b,i): + result = (a == b).mean() + if result == 1: + print("step {} = step {}: {}".format(i-1,i,'True')) + else: + print("step {} = step {}: {}".format(i - 1, i, 'False')) + +def main(): + + ################## set args ################## + parser = argparse.ArgumentParser() + + # 1.file and model + parser.add_argument("--model", + default=None, + type=str, + help="The model dir.") + parser.add_argument("--max_seq_length", + default=64, + type=int, + help="The maximum total input sequence length after WordPiece tokenization. \n" + "Sequences longer than this will be truncated, and sequences shorter \n" + "than this will be padded.") + args = parser.parse_args() + + # create model + model = TinyBertForSequenceClassification.from_pretrained(args.model, num_labels=2) + model.eval() + # test + input_ids = torch.tensor(random_int_list(0,9999,args.max_seq_length), dtype=torch.long).view(1,-1) + print(input_ids) + segment_ids = torch.tensor(random_int_list(0,1,args.max_seq_length), dtype=torch.long).view(1,-1) + input_mask = torch.tensor(random_int_list(0,1,args.max_seq_length), dtype=torch.long).view(1,-1) + repeat_time = 20 + for i in range(1,repeat_time+1): + logits, _, _ = model(input_ids, segment_ids, input_mask) + logits = logits.squeeze() + print("step {}, logits = {}".format(i,logits)) + if i == 1: + a = logits + elif i == 2: + b = logits + is_same(a.detach().numpy(),b.detach().numpy(),i) + else: + a = b + b = logits + is_same(a.detach().numpy(),b.detach().numpy(),i) + +if __name__ == "__main__": + main() diff --git a/PyTorch/contrib/nlp/tinybert/modelzoo_level.txt b/PyTorch/contrib/nlp/tinybert/modelzoo_level.txt index 66713332410061acecd3de873bf802dc52a017c4..c33e5e547967cbd6ca4ea0e4e5a2ee2f54150268 100644 --- a/PyTorch/contrib/nlp/tinybert/modelzoo_level.txt +++ b/PyTorch/contrib/nlp/tinybert/modelzoo_level.txt @@ -1,6 +1,6 @@ -FuncStatus:OK -PerfStatus:NOK -PrecisionStatus:OK -GPUStatus:OK -AutoTune:POK +FuncStatus:OK +PerfStatus:NOK +PrecisionStatus:OK +GPUStatus:OK +AutoTune:POK NPUMigrationStatus:OK \ No newline at end of file diff --git a/PyTorch/contrib/nlp/tinybert/transformer/modeling_for_finetune.py b/PyTorch/contrib/nlp/tinybert/transformer/modeling_for_finetune.py index d3f480d2ee257550698493bc541a87ceba3d4d52..8a0204692c6b9073f1eb82520c40633c834acf84 100644 --- a/PyTorch/contrib/nlp/tinybert/transformer/modeling_for_finetune.py +++ b/PyTorch/contrib/nlp/tinybert/transformer/modeling_for_finetune.py @@ -1,1144 +1,1144 @@ -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""PyTorch BERT model.""" - -from __future__ import absolute_import, division, print_function, unicode_literals - -import copy -import json -import logging -import math -import os -import shutil -import tarfile -import tempfile -import sys -from io import open - -import torch -import torch.nn.functional as F -from torch import nn -from torch.nn import CrossEntropyLoss -from torch.autograd import Variable -from torch.nn.parameter import Parameter - -from .file_utils import WEIGHTS_NAME, CONFIG_NAME - -logger = logging.getLogger(__name__) - -PRETRAINED_MODEL_ARCHIVE_MAP = { - 'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz", - 'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased.tar.gz", - 'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz", - 'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased.tar.gz", - 'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased.tar.gz", - 'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz", - 'bert-base-chinese': "", -} - -BERT_CONFIG_NAME = 'bert_config.json' -TF_WEIGHTS_NAME = 'model.ckpt' - - -def load_tf_weights_in_bert(model, tf_checkpoint_path): - """ Load tf checkpoints in a pytorch model - """ - try: - import re - import numpy as np - import tensorflow as tf - except ImportError: - print("Loading a TensorFlow models in PyTorch, requires TensorFlow to be installed. Please see " - "https://www.tensorflow.org/install/ for installation instructions.") - raise - tf_path = os.path.abspath(tf_checkpoint_path) - print("Converting TensorFlow checkpoint from {}".format(tf_path)) - # Load weights from TF model - init_vars = tf.train.list_variables(tf_path) - names = [] - arrays = [] - for name, shape in init_vars: - print("Loading TF weight {} with shape {}".format(name, shape)) - array = tf.train.load_variable(tf_path, name) - names.append(name) - arrays.append(array) - - for name, array in zip(names, arrays): - name = name.split('/') - # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v - # which are not required for using pretrained model - if any(n in ["adam_v", "adam_m", "global_step"] for n in name): - print("Skipping {}".format("/".join(name))) - continue - pointer = model - for m_name in name: - if re.fullmatch(r'[A-Za-z]+_\d+', m_name): - l = re.split(r'_(\d+)', m_name) - else: - l = [m_name] - if l[0] == 'kernel' or l[0] == 'gamma': - pointer = getattr(pointer, 'weight') - elif l[0] == 'output_bias' or l[0] == 'beta': - try: - pointer = getattr(pointer, 'bias') - except AttributeError: - print("Skipping {}".format("/".join(name))) - continue - elif l[0] == 'output_weights': - pointer = getattr(pointer, 'weight') - elif l[0] == 'squad': - pointer = getattr(pointer, 'classifier') - else: - try: - pointer = getattr(pointer, l[0]) - except AttributeError: - print("Skipping {}".format("/".join(name))) - continue - if len(l) >= 2: - num = int(l[1]) - pointer = pointer[num] - if m_name[-11:] == '_embeddings': - pointer = getattr(pointer, 'weight') - elif m_name == 'kernel': - array = np.transpose(array) - try: - assert pointer.shape == array.shape - except AssertionError as e: - e.args += (pointer.shape, array.shape) - raise - print("Initialize PyTorch weight {}".format(name)) - pointer.data = torch.from_numpy(array) - return model - - -def gelu(x): - """Implementation of the gelu activation function. - For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): - 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) - Also see https://arxiv.org/abs/1606.08415 - """ - return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) - - -def swish(x): - return x * torch.sigmoid(x) - -''' - -try: - from apex.normalization.fused_layer_norm import FusedLayerNorm as BertLayerNorm -except ImportError: - logger.info( - "Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .") -''' -class BertLayerNorm(nn.Module): - def __init__(self, hidden_size, eps=1e-12): - """Construct a layernorm module in the TF style (epsilon inside the square root). - """ - super(BertLayerNorm, self).__init__() - self.weight = nn.Parameter(torch.ones(hidden_size)) - self.bias = nn.Parameter(torch.zeros(hidden_size)) - self.variance_epsilon = eps - - def forward(self, x): - u = x.mean(-1, keepdim=True) - s = (x - u).pow(2).mean(-1, keepdim=True) - x = (x - u) / torch.sqrt(s + self.variance_epsilon) - return self.weight * x + self.bias - - -class HeadAttention(nn.Module): - def __init__(self, config, hidden_size, head_num, head_used): - super(HeadAttention, self).__init__() - self.head_num = head_num - self.head_used = head_used - self.hidden_size = hidden_size - if self.hidden_size % self.head_num != 0: - raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (self.hidden_size, self.head_num)) - - self.attention_head_size = int(self.hidden_size / self.head_num) - self.all_head_size = self.num_heads_used * self.attention_head_size - - self.query = nn.Linear(self.hidden_size, self.all_head_size) - self.key = nn.Linear(self.hidden_size, self.all_head_size) - self.value = nn.Linear(self.hidden_size, self.all_head_size) - - self.dropout = nn.Dropout(config.attention_probs_dropout_prob) - - def transpose_for_scores(self, x): - new_x_shape = x.size()[:-1] + (self.num_heads_used, - self.attention_head_size) - x = x.view(*new_x_shape) - return x.permute(0, 2, 1, 3) - - def forward(self, hidden_states, attention_mask): - mixed_query_layer = self.query(hidden_states) - mixed_key_layer = self.key(hidden_states) - mixed_value_layer = self.value(hidden_states) - - query_layer = self.transpose_for_scores(mixed_query_layer) - key_layer = self.transpose_for_scores(mixed_key_layer) - value_layer = self.transpose_for_scores(mixed_value_layer) - - # Take the dot product between "query" and "key" to get the raw attention scores. - attention_scores = torch.matmul( - query_layer, key_layer.transpose(-1, -2)) - attention_scores = attention_scores / \ - math.sqrt(self.attention_head_size) - # Apply the attention mask is (precomputed for all layers in BertModel forward() function) - attention_scores = attention_scores + attention_mask - - # Normalize the attention scores to probabilities. - attention_probs = nn.Softmax(dim=-1)(attention_scores) - - # This is actually dropping out entire tokens to attend to, which might - # seem a bit unusual, but is taken from the original Transformer paper. - attention_probs = self.dropout(attention_probs) - - context_layer = torch.matmul(attention_probs, value_layer) - context_layer = context_layer.permute(0, 2, 1, 3).contiguous() - new_context_layer_shape = context_layer.size()[ - :-2] + (self.all_head_size,) - context_layer = context_layer.view(*new_context_layer_shape) - - if self.num_heads_used != self.num_attention_heads: - pad_shape = context_layer.size()[:-1] + \ - ((self.num_attention_heads - self.num_heads_used) - * self.attention_head_size, ) - - pad_layer = torch.zeros(*pad_shape).to(context_layer.device) - context_layer = torch.cat((context_layer, pad_layer), -1) - return context_layer - - -ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu} -NORM = {'layer_norm': BertLayerNorm} - - -class BertConfig(object): - """Configuration class to store the configuration of a `BertModel`. - """ - - def __init__(self, - vocab_size_or_config_json_file, - hidden_size=768, - num_hidden_layers=12, - num_attention_heads=12, - intermediate_size=3072, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=2, - initializer_range=0.02, - pre_trained='', - training=''): - """Constructs BertConfig. - - Args: - vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`. - hidden_size: Size of the encoder layers and the pooler layer. - num_hidden_layers: Number of hidden layers in the Transformer encoder. - num_attention_heads: Number of attention heads for each attention layer in - the Transformer encoder. - intermediate_size: The size of the "intermediate" (i.e., feed-forward) - layer in the Transformer encoder. - hidden_act: The non-linear activation function (function or string) in the - encoder and pooler. If string, "gelu", "relu" and "swish" are supported. - hidden_dropout_prob: The dropout probabilitiy for all fully connected - layers in the embeddings, encoder, and pooler. - attention_probs_dropout_prob: The dropout ratio for the attention - probabilities. - max_position_embeddings: The maximum sequence length that this model might - ever be used with. Typically set this to something large just in case - (e.g., 512 or 1024 or 2048). - type_vocab_size: The vocabulary size of the `token_type_ids` passed into - `BertModel`. - initializer_range: The sttdev of the truncated_normal_initializer for - initializing all weight matrices. - """ - if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 - and isinstance(vocab_size_or_config_json_file, unicode)): - with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: - json_config = json.loads(reader.read()) - for key, value in json_config.items(): - self.__dict__[key] = value - elif isinstance(vocab_size_or_config_json_file, int): - self.vocab_size = vocab_size_or_config_json_file - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.hidden_act = hidden_act - self.intermediate_size = intermediate_size - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.initializer_range = initializer_range - self.pre_trained = pre_trained - self.training = training - else: - raise ValueError("First argument must be either a vocabulary size (int)" - "or the path to a pretrained model config file (str)") - - @classmethod - def from_dict(cls, json_object): - """Constructs a `BertConfig` from a Python dictionary of parameters.""" - config = BertConfig(vocab_size_or_config_json_file=-1) - for key, value in json_object.items(): - config.__dict__[key] = value - return config - - @classmethod - def from_json_file(cls, json_file): - """Constructs a `BertConfig` from a json file of parameters.""" - with open(json_file, "r", encoding='utf-8') as reader: - text = reader.read() - return cls.from_dict(json.loads(text)) - - def __repr__(self): - return str(self.to_json_string()) - - def to_dict(self): - """Serializes this instance to a Python dictionary.""" - output = copy.deepcopy(self.__dict__) - return output - - def to_json_string(self): - """Serializes this instance to a JSON string.""" - return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" - - def to_json_file(self, json_file_path): - """ Save this instance to a json file.""" - with open(json_file_path, "w", encoding='utf-8') as writer: - writer.write(self.to_json_string()) - - -class BertEmbeddings(nn.Module): - """Construct the embeddings from word, position and token_type embeddings. - """ - - def __init__(self, config): - super(BertEmbeddings, self).__init__() - self.word_embeddings = nn.Embedding( - config.vocab_size, config.hidden_size, padding_idx=0) - self.position_embeddings = nn.Embedding( - config.max_position_embeddings, config.hidden_size) - self.token_type_embeddings = nn.Embedding( - config.type_vocab_size, config.hidden_size) - - # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load - # any TensorFlow checkpoint file - self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) - self.dropout = nn.Dropout(config.hidden_dropout_prob) - - def forward(self, input_ids, token_type_ids=None): - seq_length = input_ids.size(1) - position_ids = torch.arange( - seq_length, dtype=torch.long, device=input_ids.device) - position_ids = position_ids.unsqueeze(0).expand_as(input_ids) - if token_type_ids is None: - token_type_ids = torch.zeros_like(input_ids) - - words_embeddings = self.word_embeddings(input_ids) - position_embeddings = self.position_embeddings(position_ids) - token_type_embeddings = self.token_type_embeddings(token_type_ids) - - embeddings = words_embeddings + position_embeddings + token_type_embeddings - embeddings = self.LayerNorm(embeddings) - embeddings = self.dropout(embeddings) - return embeddings - - -class BertSelfAttention(nn.Module): - def __init__(self, config): - super(BertSelfAttention, self).__init__() - if config.hidden_size % config.num_attention_heads != 0: - raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads)) - self.num_attention_heads = config.num_attention_heads - self.attention_head_size = int( - config.hidden_size / config.num_attention_heads) - self.all_head_size = self.num_attention_heads * self.attention_head_size - - self.query = nn.Linear(config.hidden_size, self.all_head_size) - self.key = nn.Linear(config.hidden_size, self.all_head_size) - self.value = nn.Linear(config.hidden_size, self.all_head_size) - - self.dropout = nn.Dropout(config.attention_probs_dropout_prob) - - def transpose_for_scores(self, x): - new_x_shape = x.size()[ - :-1] + (self.num_attention_heads, self.attention_head_size) - x = x.view(*new_x_shape) - return x.permute(0, 2, 1, 3) - - def forward(self, hidden_states, attention_mask, output_att=False): - mixed_query_layer = self.query(hidden_states) - mixed_key_layer = self.key(hidden_states) - mixed_value_layer = self.value(hidden_states) - - query_layer = self.transpose_for_scores(mixed_query_layer) - key_layer = self.transpose_for_scores(mixed_key_layer) - value_layer = self.transpose_for_scores(mixed_value_layer) - - # Take the dot product between "query" and "key" to get the raw attention scores. - attention_scores = torch.matmul( - query_layer, key_layer.transpose(-1, -2)) - attention_scores = attention_scores / \ - math.sqrt(self.attention_head_size) - # Apply the attention mask is (precomputed for all layers in BertModel forward() function) - attention_scores = attention_scores + attention_mask - - # Normalize the attention scores to probabilities. - attention_probs = nn.Softmax(dim=-1)(attention_scores) - - # This is actually dropping out entire tokens to attend to, which might - # seem a bit unusual, but is taken from the original Transformer paper. - attention_probs = self.dropout(attention_probs) - - context_layer = torch.matmul(attention_probs, value_layer) - context_layer = context_layer.permute(0, 2, 1, 3).contiguous() - new_context_layer_shape = context_layer.size()[ - :-2] + (self.all_head_size,) - context_layer = context_layer.view(*new_context_layer_shape) - return context_layer, attention_scores - - -class BertAttention(nn.Module): - def __init__(self, config): - super(BertAttention, self).__init__() - - self.self = BertSelfAttention(config) - self.output = BertSelfOutput(config) - - def forward(self, input_tensor, attention_mask): - self_output, layer_att = self.self(input_tensor, attention_mask) - attention_output = self.output(self_output, input_tensor) - return attention_output, layer_att - - -class BertSelfOutput(nn.Module): - def __init__(self, config): - super(BertSelfOutput, self).__init__() - self.dense = nn.Linear(config.hidden_size, config.hidden_size) - self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) - self.dropout = nn.Dropout(config.hidden_dropout_prob) - - def forward(self, hidden_states, input_tensor): - hidden_states = self.dense(hidden_states) - hidden_states = self.dropout(hidden_states) - hidden_states = self.LayerNorm(hidden_states + input_tensor) - return hidden_states - - -class BertIntermediate(nn.Module): - def __init__(self, config, intermediate_size=-1): - super(BertIntermediate, self).__init__() - if intermediate_size < 0: - self.dense = nn.Linear( - config.hidden_size, config.intermediate_size) - else: - self.dense = nn.Linear(config.hidden_size, intermediate_size) - if isinstance(config.hidden_act, str) or (sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)): - self.intermediate_act_fn = ACT2FN[config.hidden_act] - else: - self.intermediate_act_fn = config.hidden_act - - def forward(self, hidden_states): - hidden_states = self.dense(hidden_states) - hidden_states = self.intermediate_act_fn(hidden_states) - return hidden_states - - -class BertOutput(nn.Module): - def __init__(self, config, intermediate_size=-1): - super(BertOutput, self).__init__() - if intermediate_size < 0: - self.dense = nn.Linear( - config.intermediate_size, config.hidden_size) - else: - self.dense = nn.Linear(intermediate_size, config.hidden_size) - self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) - self.dropout = nn.Dropout(config.hidden_dropout_prob) - - def forward(self, hidden_states, input_tensor): - hidden_states = self.dense(hidden_states) - hidden_states = self.dropout(hidden_states) - hidden_states = self.LayerNorm(hidden_states + input_tensor) - return hidden_states - - -class BertLayer(nn.Module): - def __init__(self, config): - super(BertLayer, self).__init__() - self.attention = BertAttention(config) - self.intermediate = BertIntermediate(config) - self.output = BertOutput(config) - - def forward(self, hidden_states, attention_mask): - attention_output, layer_att = self.attention( - hidden_states, attention_mask) - intermediate_output = self.intermediate(attention_output) - layer_output = self.output(intermediate_output, attention_output) - - return layer_output, layer_att - - -class BertEncoder(nn.Module): - def __init__(self, config): - super(BertEncoder, self).__init__() - self.layer = nn.ModuleList([BertLayer(config) - for _ in range(config.num_hidden_layers)]) - - def forward(self, hidden_states, attention_mask): - all_encoder_layers = [] - all_encoder_atts = [] - for _, layer_module in enumerate(self.layer): - all_encoder_layers.append(hidden_states) - hidden_states, layer_att = layer_module( - hidden_states, attention_mask) - all_encoder_atts.append(layer_att) - - all_encoder_layers.append(hidden_states) - return all_encoder_layers, all_encoder_atts - - -class BertPooler(nn.Module): - def __init__(self, config, recurs=None): - super(BertPooler, self).__init__() - self.dense = nn.Linear(config.hidden_size, config.hidden_size) - self.activation = nn.Tanh() - self.config = config - - def forward(self, hidden_states): - # We "pool" the model by simply taking the hidden state corresponding - # to the first token. "-1" refers to last layer - pooled_output = hidden_states[-1][:, 0] - - pooled_output = self.dense(pooled_output) - pooled_output = self.activation(pooled_output) - - return pooled_output - - -class BertPredictionHeadTransform(nn.Module): - def __init__(self, config): - super(BertPredictionHeadTransform, self).__init__() - # Need to unty it when we separate the dimensions of hidden and emb - self.dense = nn.Linear(config.hidden_size, config.hidden_size) - if isinstance(config.hidden_act, str) or (sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)): - self.transform_act_fn = ACT2FN[config.hidden_act] - else: - self.transform_act_fn = config.hidden_act - self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) - - def forward(self, hidden_states): - hidden_states = self.dense(hidden_states) - hidden_states = self.transform_act_fn(hidden_states) - hidden_states = self.LayerNorm(hidden_states) - return hidden_states - - -class BertLMPredictionHead(nn.Module): - def __init__(self, config, bert_model_embedding_weights): - super(BertLMPredictionHead, self).__init__() - self.transform = BertPredictionHeadTransform(config) - - # The output weights are the same as the input embeddings, but there is - # an output-only bias for each token. - self.decoder = nn.Linear(bert_model_embedding_weights.size(1), - bert_model_embedding_weights.size(0), - bias=False) - self.decoder.weight = bert_model_embedding_weights - self.bias = nn.Parameter(torch.zeros( - bert_model_embedding_weights.size(0))) - - def forward(self, hidden_states): - hidden_states = self.transform(hidden_states) - hidden_states = self.decoder(hidden_states) + self.bias - return hidden_states - - -class BertOnlyMLMHead(nn.Module): - def __init__(self, config, bert_model_embedding_weights): - super(BertOnlyMLMHead, self).__init__() - self.predictions = BertLMPredictionHead( - config, bert_model_embedding_weights) - - def forward(self, sequence_output): - prediction_scores = self.predictions(sequence_output) - return prediction_scores - - -class BertOnlyNSPHead(nn.Module): - def __init__(self, config): - super(BertOnlyNSPHead, self).__init__() - self.seq_relationship = nn.Linear(config.hidden_size, 2) - - def forward(self, pooled_output): - seq_relationship_score = self.seq_relationship(pooled_output) - return seq_relationship_score - - -class BertPreTrainingHeads(nn.Module): - def __init__(self, config, bert_model_embedding_weights): - super(BertPreTrainingHeads, self).__init__() - self.predictions = BertLMPredictionHead( - config, bert_model_embedding_weights) - self.seq_relationship = nn.Linear(config.hidden_size, 2) - - def forward(self, sequence_output, pooled_output): - prediction_scores = self.predictions(sequence_output) - seq_relationship_score = self.seq_relationship(pooled_output) - return prediction_scores, seq_relationship_score - - -class BertPreTrainedModel(nn.Module): - """ An abstract class to handle weights initialization and - a simple interface for dowloading and loading pretrained models. - """ - - def __init__(self, config, *inputs, **kwargs): - super(BertPreTrainedModel, self).__init__() - if not isinstance(config, BertConfig): - raise ValueError( - "Parameter config in `{}(config)` should be an instance of class `BertConfig`. " - "To create a model from a Google pretrained model use " - "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format( - self.__class__.__name__, self.__class__.__name__ - )) - self.config = config - - def init_bert_weights(self, module): - """ Initialize the weights. - """ - if isinstance(module, (nn.Linear, nn.Embedding)): - # Slightly different from the TF version which uses truncated_normal for initialization - # cf https://github.com/pytorch/pytorch/pull/5617 - module.weight.data.normal_( - mean=0.0, std=self.config.initializer_range) - elif isinstance(module, BertLayerNorm): - module.bias.data.zero_() - module.weight.data.fill_(1.0) - if isinstance(module, nn.Linear) and module.bias is not None: - module.bias.data.zero_() - - @classmethod - def from_scratch(cls, pretrained_model_name_or_path, *inputs, **kwargs): - resolved_config_file = os.path.join( - pretrained_model_name_or_path, CONFIG_NAME) - config = BertConfig.from_json_file(resolved_config_file) - - logger.info("Model config {}".format(config)) - model = cls(config, *inputs, **kwargs) - return model - - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs): - """ - Instantiate a BertPreTrainedModel from a pre-trained model file or a pytorch state dict. - Download and cache the pre-trained model file if needed. - - Params: - pretrained_model_name_or_path: either: - - a str with the name of a pre-trained model to load selected in the list of: - . `bert-base-uncased` - . `bert-large-uncased` - . `bert-base-cased` - . `bert-large-cased` - . `bert-base-multilingual-uncased` - . `bert-base-multilingual-cased` - . `bert-base-chinese` - - a path or url to a pretrained model archive containing: - . `bert_config.json` a configuration file for the model - . `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance - - a path or url to a pretrained model archive containing: - . `bert_config.json` a configuration file for the model - . `model.chkpt` a TensorFlow checkpoint - from_tf: should we load the weights from a locally saved TensorFlow checkpoint - cache_dir: an optional path to a folder in which the pre-trained models will be cached. - state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models - *inputs, **kwargs: additional input for the specific Bert class - (ex: num_labels for BertForSequenceClassification) - """ - state_dict = kwargs.get('state_dict', None) - kwargs.pop('state_dict', None) - from_tf = kwargs.get('from_tf', False) - kwargs.pop('from_tf', None) - - # Load config - config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME) - config = BertConfig.from_json_file(config_file) - logger.info("Model config {}".format(config)) - # Instantiate model. - - model = cls(config, *inputs, **kwargs) - if state_dict is None and not from_tf: - weights_path = os.path.join( - pretrained_model_name_or_path, WEIGHTS_NAME) - logger.info("Loading model {}".format(weights_path)) - state_dict = torch.load(weights_path, map_location='cpu') - state_dict.pop('classifier.weight') - state_dict.pop('classifier.bias') - if from_tf: - # Directly load from a TensorFlow checkpoint - weights_path = os.path.join( - pretrained_model_name_or_path, TF_WEIGHTS_NAME) - return load_tf_weights_in_bert(model, weights_path) - # Load from a PyTorch state_dict - old_keys = [] - new_keys = [] - for key in state_dict.keys(): - new_key = None - if 'gamma' in key: - new_key = key.replace('gamma', 'weight') - if 'beta' in key: - new_key = key.replace('beta', 'bias') - if new_key: - old_keys.append(key) - new_keys.append(new_key) - for old_key, new_key in zip(old_keys, new_keys): - state_dict[new_key] = state_dict.pop(old_key) - - missing_keys = [] - unexpected_keys = [] - error_msgs = [] - # copy state_dict so _load_from_state_dict can modify it - metadata = getattr(state_dict, '_metadata', None) - state_dict = state_dict.copy() - if metadata is not None: - state_dict._metadata = metadata - - def load(module, prefix=''): - local_metadata = {} if metadata is None else metadata.get( - prefix[:-1], {}) - module._load_from_state_dict( - state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs) - for name, child in module._modules.items(): - if child is not None: - load(child, prefix + name + '.') - - start_prefix = '' - if not hasattr(model, 'bert') and any(s.startswith('bert.') for s in state_dict.keys()): - start_prefix = 'bert.' - - logger.info('loading model...') - load(model, prefix=start_prefix) - logger.info('done!') - if len(missing_keys) > 0: - logger.info("Weights of {} not initialized from pretrained model: {}".format( - model.__class__.__name__, missing_keys)) - if len(unexpected_keys) > 0: - logger.info("Weights from pretrained model not used in {}: {}".format( - model.__class__.__name__, unexpected_keys)) - if len(error_msgs) > 0: - raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format( - model.__class__.__name__, "\n\t".join(error_msgs))) - - return model - - -class BertModel(BertPreTrainedModel): - """BERT model ("Bidirectional Embedding Representations from a Transformer"). - - Params: - config: a BertConfig class instance with the configuration to build a new model - - Inputs: - `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] - with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts - `extract_features.py`, `run_classifier.py` and `run_squad.py`) - `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token - types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to - a `sentence B` token (see BERT paper for more details). - `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices - selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max - input sequence length in the current batch. It's the mask that we typically use for attention when - a batch has varying length sentences. - `output_all_encoded_layers`: boolean which controls the content of the `encoded_layers` output as described below. Default: `True`. - - Outputs: Tuple of (encoded_layers, pooled_output) - `encoded_layers`: controled by `output_all_encoded_layers` argument: - - `output_all_encoded_layers=True`: outputs a list of the full sequences of encoded-hidden-states at the end - of each attention block (i.e. 12 full sequences for BERT-base, 24 for BERT-large), each - encoded-hidden-state is a torch.FloatTensor of size [batch_size, sequence_length, hidden_size], - - `output_all_encoded_layers=False`: outputs only the full sequence of hidden-states corresponding - to the last attention block of shape [batch_size, sequence_length, hidden_size], - `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a - classifier pretrained on top of the hidden state associated to the first character of the - input (`CLS`) to train on the Next-Sentence task (see BERT's paper). - - Example usage: - ```python - # Already been converted into WordPiece token ids - input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) - input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) - token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) - - config = modeling.BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, - num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) - - model = modeling.BertModel(config=config) - all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask) - ``` - """ - - def __init__(self, config): - super(BertModel, self).__init__(config) - self.embeddings = BertEmbeddings(config) - self.encoder = BertEncoder(config) - self.pooler = BertPooler(config) - self.apply(self.init_bert_weights) - - def forward(self, input_ids, token_type_ids=None, attention_mask=None, - output_all_encoded_layers=True, output_att=True): - - if attention_mask is None: - attention_mask = torch.ones_like(input_ids) - if token_type_ids is None: - token_type_ids = torch.zeros_like(input_ids) - - # We create a 3D attention mask from a 2D tensor mask. - # Sizes are [batch_size, 1, 1, to_seq_length] - # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] - # this attention mask is more simple than the triangular masking of causal attention - # used in OpenAI GPT, we just need to prepare the broadcast dimension here. - extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) - - # Since attention_mask is 1.0 for positions we want to attend and 0.0 for - # masked positions, this operation will create a tensor which is 0.0 for - # positions we want to attend and -10000.0 for masked positions. - # Since we are adding it to the raw scores before the softmax, this is - # effectively the same as removing these entirely. - extended_attention_mask = extended_attention_mask.to( - dtype=next(self.parameters()).dtype) # fp16 compatibility - extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 - - embedding_output = self.embeddings(input_ids, token_type_ids) - encoded_layers, layer_atts = self.encoder(embedding_output, - extended_attention_mask) - - pooled_output = self.pooler(encoded_layers) - if not output_all_encoded_layers: - encoded_layers = encoded_layers[-1] - - if not output_att: - return encoded_layers, pooled_output - - return encoded_layers, layer_atts, pooled_output - - -class BertForPreTraining(BertPreTrainedModel): - """BERT model with pre-training heads. - This module comprises the BERT model followed by the two pre-training heads: - - the masked language modeling head, and - - the next sentence classification head. - - Params: - config: a BertConfig class instance with the configuration to build a new model. - - Inputs: - `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] - with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts - `extract_features.py`, `run_classifier.py` and `run_squad.py`) - `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token - types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to - a `sentence B` token (see BERT paper for more details). - `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices - selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max - input sequence length in the current batch. It's the mask that we typically use for attention when - a batch has varying length sentences. - `masked_lm_labels`: optional masked language modeling labels: torch.LongTensor of shape [batch_size, sequence_length] - with indices selected in [-1, 0, ..., vocab_size]. All labels set to -1 are ignored (masked), the loss - is only computed for the labels set in [0, ..., vocab_size] - `next_sentence_label`: optional next sentence classification loss: torch.LongTensor of shape [batch_size] - with indices selected in [0, 1]. - 0 => next sentence is the continuation, 1 => next sentence is a random sentence. - - Outputs: - if `masked_lm_labels` and `next_sentence_label` are not `None`: - Outputs the total_loss which is the sum of the masked language modeling loss and the next - sentence classification loss. - if `masked_lm_labels` or `next_sentence_label` is `None`: - Outputs a tuple comprising - - the masked language modeling logits of shape [batch_size, sequence_length, vocab_size], and - - the next sentence classification logits of shape [batch_size, 2]. - - Example usage: - ```python - # Already been converted into WordPiece token ids - input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) - input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) - token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) - - config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, - num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) - - model = BertForPreTraining(config) - masked_lm_logits_scores, seq_relationship_logits = model(input_ids, token_type_ids, input_mask) - ``` - """ - - def __init__(self, config): - super(BertForPreTraining, self).__init__(config) - self.bert = BertModel(config) - self.cls = BertPreTrainingHeads( - config, self.bert.embeddings.word_embeddings.weight) - self.apply(self.init_bert_weights) - - def forward(self, input_ids, token_type_ids=None, attention_mask=None, - masked_lm_labels=None, next_sentence_label=None): - sequence_output, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, - output_all_encoded_layers=False) - prediction_scores, seq_relationship_score = self.cls( - sequence_output, pooled_output) - - if masked_lm_labels is not None and next_sentence_label is not None: - loss_fct = CrossEntropyLoss(ignore_index=-1) - masked_lm_loss = loss_fct( - prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1)) - next_sentence_loss = loss_fct( - seq_relationship_score.view(-1, 2), next_sentence_label.view(-1)) - total_loss = masked_lm_loss + next_sentence_loss - return total_loss - elif masked_lm_labels is not None: - loss_fct = CrossEntropyLoss(ignore_index=-1) - masked_lm_loss = loss_fct( - prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1)) - total_loss = masked_lm_loss - return total_loss - else: - return prediction_scores, seq_relationship_score - - -class TinyBertForPreTraining(BertPreTrainedModel): - def __init__(self, config, fit_size=768): - super(TinyBertForPreTraining, self).__init__(config) - self.bert = BertModel(config) - self.cls = BertPreTrainingHeads( - config, self.bert.embeddings.word_embeddings.weight) - self.fit_dense = nn.Linear(config.hidden_size, fit_size) - self.apply(self.init_bert_weights) - - def forward(self, input_ids, token_type_ids=None, - attention_mask=None, masked_lm_labels=None, - next_sentence_label=None, labels=None): - sequence_output, att_output, pooled_output = self.bert( - input_ids, token_type_ids, attention_mask) - tmp = [] - for s_id, sequence_layer in enumerate(sequence_output): - tmp.append(self.fit_dense(sequence_layer)) - sequence_output = tmp - - return att_output, sequence_output - - -class BertForMaskedLM(BertPreTrainedModel): - """BERT model with the masked language modeling head. - This module comprises the BERT model followed by the masked language modeling head. - - Params: - config: a BertConfig class instance with the configuration to build a new model. - - Inputs: - `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] - with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts - `extract_features.py`, `run_classifier.py` and `run_squad.py`) - `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token - types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to - a `sentence B` token (see BERT paper for more details). - `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices - selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max - input sequence length in the current batch. It's the mask that we typically use for attention when - a batch has varying length sentences. - `masked_lm_labels`: masked language modeling labels: torch.LongTensor of shape [batch_size, sequence_length] - with indices selected in [-1, 0, ..., vocab_size]. All labels set to -1 are ignored (masked), the loss - is only computed for the labels set in [0, ..., vocab_size] - - Outputs: - if `masked_lm_labels` is not `None`: - Outputs the masked language modeling loss. - if `masked_lm_labels` is `None`: - Outputs the masked language modeling logits of shape [batch_size, sequence_length, vocab_size]. - - Example usage: - ```python - # Already been converted into WordPiece token ids - input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) - input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) - token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) - - config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, - num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) - - model = BertForMaskedLM(config) - masked_lm_logits_scores = model(input_ids, token_type_ids, input_mask) - ``` - """ - - def __init__(self, config): - super(BertForMaskedLM, self).__init__(config) - self.bert = BertModel(config) - self.cls = BertOnlyMLMHead( - config, self.bert.embeddings.word_embeddings.weight) - self.apply(self.init_bert_weights) - - def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, - output_att=False, infer=False): - sequence_output, _ = self.bert(input_ids, token_type_ids, attention_mask, - output_all_encoded_layers=True, output_att=output_att) - - if output_att: - sequence_output, att_output = sequence_output - prediction_scores = self.cls(sequence_output[-1]) - - if masked_lm_labels is not None: - loss_fct = CrossEntropyLoss(ignore_index=-1) - masked_lm_loss = loss_fct( - prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1)) - if not output_att: - return masked_lm_loss - else: - return masked_lm_loss, att_output - else: - if not output_att: - return prediction_scores - else: - return prediction_scores, att_output - - -class BertForNextSentencePrediction(BertPreTrainedModel): - """BERT model with next sentence prediction head. - This module comprises the BERT model followed by the next sentence classification head. - - Params: - config: a BertConfig class instance with the configuration to build a new model. - - Inputs: - `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] - with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts - `extract_features.py`, `run_classifier.py` and `run_squad.py`) - `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token - types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to - a `sentence B` token (see BERT paper for more details). - `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices - selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max - input sequence length in the current batch. It's the mask that we typically use for attention when - a batch has varying length sentences. - `next_sentence_label`: next sentence classification loss: torch.LongTensor of shape [batch_size] - with indices selected in [0, 1]. - 0 => next sentence is the continuation, 1 => next sentence is a random sentence. - - Outputs: - if `next_sentence_label` is not `None`: - Outputs the total_loss which is the sum of the masked language modeling loss and the next - sentence classification loss. - if `next_sentence_label` is `None`: - Outputs the next sentence classification logits of shape [batch_size, 2]. - - Example usage: - ```python - # Already been converted into WordPiece token ids - input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) - input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) - token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) - - config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, - num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) - - model = BertForNextSentencePrediction(config) - seq_relationship_logits = model(input_ids, token_type_ids, input_mask) - ``` - """ - - def __init__(self, config): - super(BertForNextSentencePrediction, self).__init__(config) - self.bert = BertModel(config) - self.cls = BertOnlyNSPHead(config) - self.apply(self.init_bert_weights) - - def forward(self, input_ids, token_type_ids=None, attention_mask=None, next_sentence_label=None): - _, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, - output_all_encoded_layers=False) - seq_relationship_score = self.cls(pooled_output) - - if next_sentence_label is not None: - loss_fct = CrossEntropyLoss(ignore_index=-1) - next_sentence_loss = loss_fct( - seq_relationship_score.view(-1, 2), next_sentence_label.view(-1)) - return next_sentence_loss - else: - return seq_relationship_score - - -class BertForSentencePairClassification(BertPreTrainedModel): - def __init__(self, config, num_labels): - super(BertForSentencePairClassification, self).__init__(config) - self.num_labels = num_labels - self.bert = BertModel(config) - self.dropout = nn.Dropout(config.hidden_dropout_prob) - self.classifier = nn.Linear(config.hidden_size * 3, num_labels) - self.apply(self.init_bert_weights) - - def forward(self, a_input_ids, b_input_ids, a_token_type_ids=None, b_token_type_ids=None, - a_attention_mask=None, b_attention_mask=None, labels=None): - _, a_pooled_output = self.bert( - a_input_ids, a_token_type_ids, a_attention_mask, output_all_encoded_layers=False) - # a_pooled_output = self.dropout(a_pooled_output) - - _, b_pooled_output = self.bert( - b_input_ids, b_token_type_ids, b_attention_mask, output_all_encoded_layers=False) - # b_pooled_output = self.dropout(b_pooled_output) - - logits = self.classifier(torch.relu(torch.cat((a_pooled_output, b_pooled_output, - torch.abs(a_pooled_output - b_pooled_output)), -1))) - - if labels is not None: - loss_fct = CrossEntropyLoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) - return loss - else: - return logits - - -class TinyBertForSequenceClassification_for_finetune(BertPreTrainedModel): - def __init__(self, config, num_labels, fit_size=768): - super(TinyBertForSequenceClassification_for_finetune, self).__init__(config) - self.num_labels = num_labels - self.bert = BertModel(config) - self.dropout = nn.Dropout(config.hidden_dropout_prob) - self.classifier = nn.Linear(config.hidden_size, num_labels) - self.fit_dense = nn.Linear(config.hidden_size, fit_size) - self.apply(self.init_bert_weights) - - def forward(self, input_ids, token_type_ids=None, attention_mask=None, - labels=None, is_student=False): - - sequence_output, att_output, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, - output_all_encoded_layers=True, output_att=True) - - logits = self.classifier(torch.relu(pooled_output)) - - tmp = [] - if is_student: - for s_id, sequence_layer in enumerate(sequence_output): - tmp.append(self.fit_dense(sequence_layer)) - sequence_output = tmp - return logits, att_output, sequence_output +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +from __future__ import absolute_import, division, print_function, unicode_literals + +import copy +import json +import logging +import math +import os +import shutil +import tarfile +import tempfile +import sys +from io import open + +import torch +import torch.nn.functional as F +from torch import nn +from torch.nn import CrossEntropyLoss +from torch.autograd import Variable +from torch.nn.parameter import Parameter + +from .file_utils import WEIGHTS_NAME, CONFIG_NAME + +logger = logging.getLogger(__name__) + +PRETRAINED_MODEL_ARCHIVE_MAP = { + 'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz", + 'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased.tar.gz", + 'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz", + 'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased.tar.gz", + 'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased.tar.gz", + 'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz", + 'bert-base-chinese': "", +} + +BERT_CONFIG_NAME = 'bert_config.json' +TF_WEIGHTS_NAME = 'model.ckpt' + + +def load_tf_weights_in_bert(model, tf_checkpoint_path): + """ Load tf checkpoints in a pytorch model + """ + try: + import re + import numpy as np + import tensorflow as tf + except ImportError: + print("Loading a TensorFlow models in PyTorch, requires TensorFlow to be installed. Please see " + "https://www.tensorflow.org/install/ for installation instructions.") + raise + tf_path = os.path.abspath(tf_checkpoint_path) + print("Converting TensorFlow checkpoint from {}".format(tf_path)) + # Load weights from TF model + init_vars = tf.train.list_variables(tf_path) + names = [] + arrays = [] + for name, shape in init_vars: + print("Loading TF weight {} with shape {}".format(name, shape)) + array = tf.train.load_variable(tf_path, name) + names.append(name) + arrays.append(array) + + for name, array in zip(names, arrays): + name = name.split('/') + # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v + # which are not required for using pretrained model + if any(n in ["adam_v", "adam_m", "global_step"] for n in name): + print("Skipping {}".format("/".join(name))) + continue + pointer = model + for m_name in name: + if re.fullmatch(r'[A-Za-z]+_\d+', m_name): + l = re.split(r'_(\d+)', m_name) + else: + l = [m_name] + if l[0] == 'kernel' or l[0] == 'gamma': + pointer = getattr(pointer, 'weight') + elif l[0] == 'output_bias' or l[0] == 'beta': + try: + pointer = getattr(pointer, 'bias') + except AttributeError: + print("Skipping {}".format("/".join(name))) + continue + elif l[0] == 'output_weights': + pointer = getattr(pointer, 'weight') + elif l[0] == 'squad': + pointer = getattr(pointer, 'classifier') + else: + try: + pointer = getattr(pointer, l[0]) + except AttributeError: + print("Skipping {}".format("/".join(name))) + continue + if len(l) >= 2: + num = int(l[1]) + pointer = pointer[num] + if m_name[-11:] == '_embeddings': + pointer = getattr(pointer, 'weight') + elif m_name == 'kernel': + array = np.transpose(array) + try: + assert pointer.shape == array.shape + except AssertionError as e: + e.args += (pointer.shape, array.shape) + raise + print("Initialize PyTorch weight {}".format(name)) + pointer.data = torch.from_numpy(array) + return model + + +def gelu(x): + """Implementation of the gelu activation function. + For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): + 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) + Also see https://arxiv.org/abs/1606.08415 + """ + return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) + + +def swish(x): + return x * torch.sigmoid(x) + +''' + +try: + from apex.normalization.fused_layer_norm import FusedLayerNorm as BertLayerNorm +except ImportError: + logger.info( + "Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .") +''' +class BertLayerNorm(nn.Module): + def __init__(self, hidden_size, eps=1e-12): + """Construct a layernorm module in the TF style (epsilon inside the square root). + """ + super(BertLayerNorm, self).__init__() + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.bias = nn.Parameter(torch.zeros(hidden_size)) + self.variance_epsilon = eps + + def forward(self, x): + u = x.mean(-1, keepdim=True) + s = (x - u).pow(2).mean(-1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.variance_epsilon) + return self.weight * x + self.bias + + +class HeadAttention(nn.Module): + def __init__(self, config, hidden_size, head_num, head_used): + super(HeadAttention, self).__init__() + self.head_num = head_num + self.head_used = head_used + self.hidden_size = hidden_size + if self.hidden_size % self.head_num != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (self.hidden_size, self.head_num)) + + self.attention_head_size = int(self.hidden_size / self.head_num) + self.all_head_size = self.num_heads_used * self.attention_head_size + + self.query = nn.Linear(self.hidden_size, self.all_head_size) + self.key = nn.Linear(self.hidden_size, self.all_head_size) + self.value = nn.Linear(self.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[:-1] + (self.num_heads_used, + self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward(self, hidden_states, attention_mask): + mixed_query_layer = self.query(hidden_states) + mixed_key_layer = self.key(hidden_states) + mixed_value_layer = self.value(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul( + query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / \ + math.sqrt(self.attention_head_size) + # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[ + :-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + + if self.num_heads_used != self.num_attention_heads: + pad_shape = context_layer.size()[:-1] + \ + ((self.num_attention_heads - self.num_heads_used) + * self.attention_head_size, ) + + pad_layer = torch.zeros(*pad_shape).to(context_layer.device) + context_layer = torch.cat((context_layer, pad_layer), -1) + return context_layer + + +ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu} +NORM = {'layer_norm': BertLayerNorm} + + +class BertConfig(object): + """Configuration class to store the configuration of a `BertModel`. + """ + + def __init__(self, + vocab_size_or_config_json_file, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=2, + initializer_range=0.02, + pre_trained='', + training=''): + """Constructs BertConfig. + + Args: + vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. If string, "gelu", "relu" and "swish" are supported. + hidden_dropout_prob: The dropout probabilitiy for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + max_position_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + type_vocab_size: The vocabulary size of the `token_type_ids` passed into + `BertModel`. + initializer_range: The sttdev of the truncated_normal_initializer for + initializing all weight matrices. + """ + if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 + and isinstance(vocab_size_or_config_json_file, unicode)): + with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: + json_config = json.loads(reader.read()) + for key, value in json_config.items(): + self.__dict__[key] = value + elif isinstance(vocab_size_or_config_json_file, int): + self.vocab_size = vocab_size_or_config_json_file + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + self.pre_trained = pre_trained + self.training = training + else: + raise ValueError("First argument must be either a vocabulary size (int)" + "or the path to a pretrained model config file (str)") + + @classmethod + def from_dict(cls, json_object): + """Constructs a `BertConfig` from a Python dictionary of parameters.""" + config = BertConfig(vocab_size_or_config_json_file=-1) + for key, value in json_object.items(): + config.__dict__[key] = value + return config + + @classmethod + def from_json_file(cls, json_file): + """Constructs a `BertConfig` from a json file of parameters.""" + with open(json_file, "r", encoding='utf-8') as reader: + text = reader.read() + return cls.from_dict(json.loads(text)) + + def __repr__(self): + return str(self.to_json_string()) + + def to_dict(self): + """Serializes this instance to a Python dictionary.""" + output = copy.deepcopy(self.__dict__) + return output + + def to_json_string(self): + """Serializes this instance to a JSON string.""" + return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" + + def to_json_file(self, json_file_path): + """ Save this instance to a json file.""" + with open(json_file_path, "w", encoding='utf-8') as writer: + writer.write(self.to_json_string()) + + +class BertEmbeddings(nn.Module): + """Construct the embeddings from word, position and token_type embeddings. + """ + + def __init__(self, config): + super(BertEmbeddings, self).__init__() + self.word_embeddings = nn.Embedding( + config.vocab_size, config.hidden_size, padding_idx=0) + self.position_embeddings = nn.Embedding( + config.max_position_embeddings, config.hidden_size) + self.token_type_embeddings = nn.Embedding( + config.type_vocab_size, config.hidden_size) + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file + self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, input_ids, token_type_ids=None): + seq_length = input_ids.size(1) + position_ids = torch.arange( + seq_length, dtype=torch.long, device=input_ids.device) + position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + if token_type_ids is None: + token_type_ids = torch.zeros_like(input_ids) + + words_embeddings = self.word_embeddings(input_ids) + position_embeddings = self.position_embeddings(position_ids) + token_type_embeddings = self.token_type_embeddings(token_type_ids) + + embeddings = words_embeddings + position_embeddings + token_type_embeddings + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + + +class BertSelfAttention(nn.Module): + def __init__(self, config): + super(BertSelfAttention, self).__init__() + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads)) + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int( + config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[ + :-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward(self, hidden_states, attention_mask, output_att=False): + mixed_query_layer = self.query(hidden_states) + mixed_key_layer = self.key(hidden_states) + mixed_value_layer = self.value(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = torch.matmul( + query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / \ + math.sqrt(self.attention_head_size) + # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(dim=-1)(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[ + :-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + return context_layer, attention_scores + + +class BertAttention(nn.Module): + def __init__(self, config): + super(BertAttention, self).__init__() + + self.self = BertSelfAttention(config) + self.output = BertSelfOutput(config) + + def forward(self, input_tensor, attention_mask): + self_output, layer_att = self.self(input_tensor, attention_mask) + attention_output = self.output(self_output, input_tensor) + return attention_output, layer_att + + +class BertSelfOutput(nn.Module): + def __init__(self, config): + super(BertSelfOutput, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertIntermediate(nn.Module): + def __init__(self, config, intermediate_size=-1): + super(BertIntermediate, self).__init__() + if intermediate_size < 0: + self.dense = nn.Linear( + config.hidden_size, config.intermediate_size) + else: + self.dense = nn.Linear(config.hidden_size, intermediate_size) + if isinstance(config.hidden_act, str) or (sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)): + self.intermediate_act_fn = ACT2FN[config.hidden_act] + else: + self.intermediate_act_fn = config.hidden_act + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +class BertOutput(nn.Module): + def __init__(self, config, intermediate_size=-1): + super(BertOutput, self).__init__() + if intermediate_size < 0: + self.dense = nn.Linear( + config.intermediate_size, config.hidden_size) + else: + self.dense = nn.Linear(intermediate_size, config.hidden_size) + self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertLayer(nn.Module): + def __init__(self, config): + super(BertLayer, self).__init__() + self.attention = BertAttention(config) + self.intermediate = BertIntermediate(config) + self.output = BertOutput(config) + + def forward(self, hidden_states, attention_mask): + attention_output, layer_att = self.attention( + hidden_states, attention_mask) + intermediate_output = self.intermediate(attention_output) + layer_output = self.output(intermediate_output, attention_output) + + return layer_output, layer_att + + +class BertEncoder(nn.Module): + def __init__(self, config): + super(BertEncoder, self).__init__() + self.layer = nn.ModuleList([BertLayer(config) + for _ in range(config.num_hidden_layers)]) + + def forward(self, hidden_states, attention_mask): + all_encoder_layers = [] + all_encoder_atts = [] + for _, layer_module in enumerate(self.layer): + all_encoder_layers.append(hidden_states) + hidden_states, layer_att = layer_module( + hidden_states, attention_mask) + all_encoder_atts.append(layer_att) + + all_encoder_layers.append(hidden_states) + return all_encoder_layers, all_encoder_atts + + +class BertPooler(nn.Module): + def __init__(self, config, recurs=None): + super(BertPooler, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.activation = nn.Tanh() + self.config = config + + def forward(self, hidden_states): + # We "pool" the model by simply taking the hidden state corresponding + # to the first token. "-1" refers to last layer + pooled_output = hidden_states[-1][:, 0] + + pooled_output = self.dense(pooled_output) + pooled_output = self.activation(pooled_output) + + return pooled_output + + +class BertPredictionHeadTransform(nn.Module): + def __init__(self, config): + super(BertPredictionHeadTransform, self).__init__() + # Need to unty it when we separate the dimensions of hidden and emb + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + if isinstance(config.hidden_act, str) or (sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)): + self.transform_act_fn = ACT2FN[config.hidden_act] + else: + self.transform_act_fn = config.hidden_act + self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.transform_act_fn(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + return hidden_states + + +class BertLMPredictionHead(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertLMPredictionHead, self).__init__() + self.transform = BertPredictionHeadTransform(config) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.decoder = nn.Linear(bert_model_embedding_weights.size(1), + bert_model_embedding_weights.size(0), + bias=False) + self.decoder.weight = bert_model_embedding_weights + self.bias = nn.Parameter(torch.zeros( + bert_model_embedding_weights.size(0))) + + def forward(self, hidden_states): + hidden_states = self.transform(hidden_states) + hidden_states = self.decoder(hidden_states) + self.bias + return hidden_states + + +class BertOnlyMLMHead(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertOnlyMLMHead, self).__init__() + self.predictions = BertLMPredictionHead( + config, bert_model_embedding_weights) + + def forward(self, sequence_output): + prediction_scores = self.predictions(sequence_output) + return prediction_scores + + +class BertOnlyNSPHead(nn.Module): + def __init__(self, config): + super(BertOnlyNSPHead, self).__init__() + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, pooled_output): + seq_relationship_score = self.seq_relationship(pooled_output) + return seq_relationship_score + + +class BertPreTrainingHeads(nn.Module): + def __init__(self, config, bert_model_embedding_weights): + super(BertPreTrainingHeads, self).__init__() + self.predictions = BertLMPredictionHead( + config, bert_model_embedding_weights) + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, sequence_output, pooled_output): + prediction_scores = self.predictions(sequence_output) + seq_relationship_score = self.seq_relationship(pooled_output) + return prediction_scores, seq_relationship_score + + +class BertPreTrainedModel(nn.Module): + """ An abstract class to handle weights initialization and + a simple interface for dowloading and loading pretrained models. + """ + + def __init__(self, config, *inputs, **kwargs): + super(BertPreTrainedModel, self).__init__() + if not isinstance(config, BertConfig): + raise ValueError( + "Parameter config in `{}(config)` should be an instance of class `BertConfig`. " + "To create a model from a Google pretrained model use " + "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format( + self.__class__.__name__, self.__class__.__name__ + )) + self.config = config + + def init_bert_weights(self, module): + """ Initialize the weights. + """ + if isinstance(module, (nn.Linear, nn.Embedding)): + # Slightly different from the TF version which uses truncated_normal for initialization + # cf https://github.com/pytorch/pytorch/pull/5617 + module.weight.data.normal_( + mean=0.0, std=self.config.initializer_range) + elif isinstance(module, BertLayerNorm): + module.bias.data.zero_() + module.weight.data.fill_(1.0) + if isinstance(module, nn.Linear) and module.bias is not None: + module.bias.data.zero_() + + @classmethod + def from_scratch(cls, pretrained_model_name_or_path, *inputs, **kwargs): + resolved_config_file = os.path.join( + pretrained_model_name_or_path, CONFIG_NAME) + config = BertConfig.from_json_file(resolved_config_file) + + logger.info("Model config {}".format(config)) + model = cls(config, *inputs, **kwargs) + return model + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs): + """ + Instantiate a BertPreTrainedModel from a pre-trained model file or a pytorch state dict. + Download and cache the pre-trained model file if needed. + + Params: + pretrained_model_name_or_path: either: + - a str with the name of a pre-trained model to load selected in the list of: + . `bert-base-uncased` + . `bert-large-uncased` + . `bert-base-cased` + . `bert-large-cased` + . `bert-base-multilingual-uncased` + . `bert-base-multilingual-cased` + . `bert-base-chinese` + - a path or url to a pretrained model archive containing: + . `bert_config.json` a configuration file for the model + . `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance + - a path or url to a pretrained model archive containing: + . `bert_config.json` a configuration file for the model + . `model.chkpt` a TensorFlow checkpoint + from_tf: should we load the weights from a locally saved TensorFlow checkpoint + cache_dir: an optional path to a folder in which the pre-trained models will be cached. + state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models + *inputs, **kwargs: additional input for the specific Bert class + (ex: num_labels for BertForSequenceClassification) + """ + state_dict = kwargs.get('state_dict', None) + kwargs.pop('state_dict', None) + from_tf = kwargs.get('from_tf', False) + kwargs.pop('from_tf', None) + + # Load config + config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME) + config = BertConfig.from_json_file(config_file) + logger.info("Model config {}".format(config)) + # Instantiate model. + + model = cls(config, *inputs, **kwargs) + if state_dict is None and not from_tf: + weights_path = os.path.join( + pretrained_model_name_or_path, WEIGHTS_NAME) + logger.info("Loading model {}".format(weights_path)) + state_dict = torch.load(weights_path, map_location='cpu') + state_dict.pop('classifier.weight') + state_dict.pop('classifier.bias') + if from_tf: + # Directly load from a TensorFlow checkpoint + weights_path = os.path.join( + pretrained_model_name_or_path, TF_WEIGHTS_NAME) + return load_tf_weights_in_bert(model, weights_path) + # Load from a PyTorch state_dict + old_keys = [] + new_keys = [] + for key in state_dict.keys(): + new_key = None + if 'gamma' in key: + new_key = key.replace('gamma', 'weight') + if 'beta' in key: + new_key = key.replace('beta', 'bias') + if new_key: + old_keys.append(key) + new_keys.append(new_key) + for old_key, new_key in zip(old_keys, new_keys): + state_dict[new_key] = state_dict.pop(old_key) + + missing_keys = [] + unexpected_keys = [] + error_msgs = [] + # copy state_dict so _load_from_state_dict can modify it + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + def load(module, prefix=''): + local_metadata = {} if metadata is None else metadata.get( + prefix[:-1], {}) + module._load_from_state_dict( + state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + start_prefix = '' + if not hasattr(model, 'bert') and any(s.startswith('bert.') for s in state_dict.keys()): + start_prefix = 'bert.' + + logger.info('loading model...') + load(model, prefix=start_prefix) + logger.info('done!') + if len(missing_keys) > 0: + logger.info("Weights of {} not initialized from pretrained model: {}".format( + model.__class__.__name__, missing_keys)) + if len(unexpected_keys) > 0: + logger.info("Weights from pretrained model not used in {}: {}".format( + model.__class__.__name__, unexpected_keys)) + if len(error_msgs) > 0: + raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format( + model.__class__.__name__, "\n\t".join(error_msgs))) + + return model + + +class BertModel(BertPreTrainedModel): + """BERT model ("Bidirectional Embedding Representations from a Transformer"). + + Params: + config: a BertConfig class instance with the configuration to build a new model + + Inputs: + `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] + with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts + `extract_features.py`, `run_classifier.py` and `run_squad.py`) + `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token + types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to + a `sentence B` token (see BERT paper for more details). + `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices + selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max + input sequence length in the current batch. It's the mask that we typically use for attention when + a batch has varying length sentences. + `output_all_encoded_layers`: boolean which controls the content of the `encoded_layers` output as described below. Default: `True`. + + Outputs: Tuple of (encoded_layers, pooled_output) + `encoded_layers`: controled by `output_all_encoded_layers` argument: + - `output_all_encoded_layers=True`: outputs a list of the full sequences of encoded-hidden-states at the end + of each attention block (i.e. 12 full sequences for BERT-base, 24 for BERT-large), each + encoded-hidden-state is a torch.FloatTensor of size [batch_size, sequence_length, hidden_size], + - `output_all_encoded_layers=False`: outputs only the full sequence of hidden-states corresponding + to the last attention block of shape [batch_size, sequence_length, hidden_size], + `pooled_output`: a torch.FloatTensor of size [batch_size, hidden_size] which is the output of a + classifier pretrained on top of the hidden state associated to the first character of the + input (`CLS`) to train on the Next-Sentence task (see BERT's paper). + + Example usage: + ```python + # Already been converted into WordPiece token ids + input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) + input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) + token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) + + config = modeling.BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) + + model = modeling.BertModel(config=config) + all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask) + ``` + """ + + def __init__(self, config): + super(BertModel, self).__init__(config) + self.embeddings = BertEmbeddings(config) + self.encoder = BertEncoder(config) + self.pooler = BertPooler(config) + self.apply(self.init_bert_weights) + + def forward(self, input_ids, token_type_ids=None, attention_mask=None, + output_all_encoded_layers=True, output_att=True): + + if attention_mask is None: + attention_mask = torch.ones_like(input_ids) + if token_type_ids is None: + token_type_ids = torch.zeros_like(input_ids) + + # We create a 3D attention mask from a 2D tensor mask. + # Sizes are [batch_size, 1, 1, to_seq_length] + # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] + # this attention mask is more simple than the triangular masking of causal attention + # used in OpenAI GPT, we just need to prepare the broadcast dimension here. + extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) + + # Since attention_mask is 1.0 for positions we want to attend and 0.0 for + # masked positions, this operation will create a tensor which is 0.0 for + # positions we want to attend and -10000.0 for masked positions. + # Since we are adding it to the raw scores before the softmax, this is + # effectively the same as removing these entirely. + extended_attention_mask = extended_attention_mask.to( + dtype=next(self.parameters()).dtype) # fp16 compatibility + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + + embedding_output = self.embeddings(input_ids, token_type_ids) + encoded_layers, layer_atts = self.encoder(embedding_output, + extended_attention_mask) + + pooled_output = self.pooler(encoded_layers) + if not output_all_encoded_layers: + encoded_layers = encoded_layers[-1] + + if not output_att: + return encoded_layers, pooled_output + + return encoded_layers, layer_atts, pooled_output + + +class BertForPreTraining(BertPreTrainedModel): + """BERT model with pre-training heads. + This module comprises the BERT model followed by the two pre-training heads: + - the masked language modeling head, and + - the next sentence classification head. + + Params: + config: a BertConfig class instance with the configuration to build a new model. + + Inputs: + `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] + with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts + `extract_features.py`, `run_classifier.py` and `run_squad.py`) + `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token + types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to + a `sentence B` token (see BERT paper for more details). + `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices + selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max + input sequence length in the current batch. It's the mask that we typically use for attention when + a batch has varying length sentences. + `masked_lm_labels`: optional masked language modeling labels: torch.LongTensor of shape [batch_size, sequence_length] + with indices selected in [-1, 0, ..., vocab_size]. All labels set to -1 are ignored (masked), the loss + is only computed for the labels set in [0, ..., vocab_size] + `next_sentence_label`: optional next sentence classification loss: torch.LongTensor of shape [batch_size] + with indices selected in [0, 1]. + 0 => next sentence is the continuation, 1 => next sentence is a random sentence. + + Outputs: + if `masked_lm_labels` and `next_sentence_label` are not `None`: + Outputs the total_loss which is the sum of the masked language modeling loss and the next + sentence classification loss. + if `masked_lm_labels` or `next_sentence_label` is `None`: + Outputs a tuple comprising + - the masked language modeling logits of shape [batch_size, sequence_length, vocab_size], and + - the next sentence classification logits of shape [batch_size, 2]. + + Example usage: + ```python + # Already been converted into WordPiece token ids + input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) + input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) + token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) + + config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) + + model = BertForPreTraining(config) + masked_lm_logits_scores, seq_relationship_logits = model(input_ids, token_type_ids, input_mask) + ``` + """ + + def __init__(self, config): + super(BertForPreTraining, self).__init__(config) + self.bert = BertModel(config) + self.cls = BertPreTrainingHeads( + config, self.bert.embeddings.word_embeddings.weight) + self.apply(self.init_bert_weights) + + def forward(self, input_ids, token_type_ids=None, attention_mask=None, + masked_lm_labels=None, next_sentence_label=None): + sequence_output, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, + output_all_encoded_layers=False) + prediction_scores, seq_relationship_score = self.cls( + sequence_output, pooled_output) + + if masked_lm_labels is not None and next_sentence_label is not None: + loss_fct = CrossEntropyLoss(ignore_index=-1) + masked_lm_loss = loss_fct( + prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1)) + next_sentence_loss = loss_fct( + seq_relationship_score.view(-1, 2), next_sentence_label.view(-1)) + total_loss = masked_lm_loss + next_sentence_loss + return total_loss + elif masked_lm_labels is not None: + loss_fct = CrossEntropyLoss(ignore_index=-1) + masked_lm_loss = loss_fct( + prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1)) + total_loss = masked_lm_loss + return total_loss + else: + return prediction_scores, seq_relationship_score + + +class TinyBertForPreTraining(BertPreTrainedModel): + def __init__(self, config, fit_size=768): + super(TinyBertForPreTraining, self).__init__(config) + self.bert = BertModel(config) + self.cls = BertPreTrainingHeads( + config, self.bert.embeddings.word_embeddings.weight) + self.fit_dense = nn.Linear(config.hidden_size, fit_size) + self.apply(self.init_bert_weights) + + def forward(self, input_ids, token_type_ids=None, + attention_mask=None, masked_lm_labels=None, + next_sentence_label=None, labels=None): + sequence_output, att_output, pooled_output = self.bert( + input_ids, token_type_ids, attention_mask) + tmp = [] + for s_id, sequence_layer in enumerate(sequence_output): + tmp.append(self.fit_dense(sequence_layer)) + sequence_output = tmp + + return att_output, sequence_output + + +class BertForMaskedLM(BertPreTrainedModel): + """BERT model with the masked language modeling head. + This module comprises the BERT model followed by the masked language modeling head. + + Params: + config: a BertConfig class instance with the configuration to build a new model. + + Inputs: + `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] + with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts + `extract_features.py`, `run_classifier.py` and `run_squad.py`) + `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token + types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to + a `sentence B` token (see BERT paper for more details). + `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices + selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max + input sequence length in the current batch. It's the mask that we typically use for attention when + a batch has varying length sentences. + `masked_lm_labels`: masked language modeling labels: torch.LongTensor of shape [batch_size, sequence_length] + with indices selected in [-1, 0, ..., vocab_size]. All labels set to -1 are ignored (masked), the loss + is only computed for the labels set in [0, ..., vocab_size] + + Outputs: + if `masked_lm_labels` is not `None`: + Outputs the masked language modeling loss. + if `masked_lm_labels` is `None`: + Outputs the masked language modeling logits of shape [batch_size, sequence_length, vocab_size]. + + Example usage: + ```python + # Already been converted into WordPiece token ids + input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) + input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) + token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) + + config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) + + model = BertForMaskedLM(config) + masked_lm_logits_scores = model(input_ids, token_type_ids, input_mask) + ``` + """ + + def __init__(self, config): + super(BertForMaskedLM, self).__init__(config) + self.bert = BertModel(config) + self.cls = BertOnlyMLMHead( + config, self.bert.embeddings.word_embeddings.weight) + self.apply(self.init_bert_weights) + + def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, + output_att=False, infer=False): + sequence_output, _ = self.bert(input_ids, token_type_ids, attention_mask, + output_all_encoded_layers=True, output_att=output_att) + + if output_att: + sequence_output, att_output = sequence_output + prediction_scores = self.cls(sequence_output[-1]) + + if masked_lm_labels is not None: + loss_fct = CrossEntropyLoss(ignore_index=-1) + masked_lm_loss = loss_fct( + prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1)) + if not output_att: + return masked_lm_loss + else: + return masked_lm_loss, att_output + else: + if not output_att: + return prediction_scores + else: + return prediction_scores, att_output + + +class BertForNextSentencePrediction(BertPreTrainedModel): + """BERT model with next sentence prediction head. + This module comprises the BERT model followed by the next sentence classification head. + + Params: + config: a BertConfig class instance with the configuration to build a new model. + + Inputs: + `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] + with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts + `extract_features.py`, `run_classifier.py` and `run_squad.py`) + `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token + types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to + a `sentence B` token (see BERT paper for more details). + `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices + selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max + input sequence length in the current batch. It's the mask that we typically use for attention when + a batch has varying length sentences. + `next_sentence_label`: next sentence classification loss: torch.LongTensor of shape [batch_size] + with indices selected in [0, 1]. + 0 => next sentence is the continuation, 1 => next sentence is a random sentence. + + Outputs: + if `next_sentence_label` is not `None`: + Outputs the total_loss which is the sum of the masked language modeling loss and the next + sentence classification loss. + if `next_sentence_label` is `None`: + Outputs the next sentence classification logits of shape [batch_size, 2]. + + Example usage: + ```python + # Already been converted into WordPiece token ids + input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) + input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) + token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) + + config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) + + model = BertForNextSentencePrediction(config) + seq_relationship_logits = model(input_ids, token_type_ids, input_mask) + ``` + """ + + def __init__(self, config): + super(BertForNextSentencePrediction, self).__init__(config) + self.bert = BertModel(config) + self.cls = BertOnlyNSPHead(config) + self.apply(self.init_bert_weights) + + def forward(self, input_ids, token_type_ids=None, attention_mask=None, next_sentence_label=None): + _, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, + output_all_encoded_layers=False) + seq_relationship_score = self.cls(pooled_output) + + if next_sentence_label is not None: + loss_fct = CrossEntropyLoss(ignore_index=-1) + next_sentence_loss = loss_fct( + seq_relationship_score.view(-1, 2), next_sentence_label.view(-1)) + return next_sentence_loss + else: + return seq_relationship_score + + +class BertForSentencePairClassification(BertPreTrainedModel): + def __init__(self, config, num_labels): + super(BertForSentencePairClassification, self).__init__(config) + self.num_labels = num_labels + self.bert = BertModel(config) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + self.classifier = nn.Linear(config.hidden_size * 3, num_labels) + self.apply(self.init_bert_weights) + + def forward(self, a_input_ids, b_input_ids, a_token_type_ids=None, b_token_type_ids=None, + a_attention_mask=None, b_attention_mask=None, labels=None): + _, a_pooled_output = self.bert( + a_input_ids, a_token_type_ids, a_attention_mask, output_all_encoded_layers=False) + # a_pooled_output = self.dropout(a_pooled_output) + + _, b_pooled_output = self.bert( + b_input_ids, b_token_type_ids, b_attention_mask, output_all_encoded_layers=False) + # b_pooled_output = self.dropout(b_pooled_output) + + logits = self.classifier(torch.relu(torch.cat((a_pooled_output, b_pooled_output, + torch.abs(a_pooled_output - b_pooled_output)), -1))) + + if labels is not None: + loss_fct = CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) + return loss + else: + return logits + + +class TinyBertForSequenceClassification_for_finetune(BertPreTrainedModel): + def __init__(self, config, num_labels, fit_size=768): + super(TinyBertForSequenceClassification_for_finetune, self).__init__(config) + self.num_labels = num_labels + self.bert = BertModel(config) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + self.classifier = nn.Linear(config.hidden_size, num_labels) + self.fit_dense = nn.Linear(config.hidden_size, fit_size) + self.apply(self.init_bert_weights) + + def forward(self, input_ids, token_type_ids=None, attention_mask=None, + labels=None, is_student=False): + + sequence_output, att_output, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, + output_all_encoded_layers=True, output_att=True) + + logits = self.classifier(torch.relu(pooled_output)) + + tmp = [] + if is_student: + for s_id, sequence_layer in enumerate(sequence_output): + tmp.append(self.fit_dense(sequence_layer)) + sequence_output = tmp + return logits, att_output, sequence_output diff --git a/PyTorch/contrib/others/DQN/train_dqn.py b/PyTorch/contrib/others/DQN/train_dqn.py index 06d1210a1e96f0da05cd29d1bcdcfff82e54c4a6..feca5641edf8f358e8e31801f3d6ca811bef1f30 100644 --- a/PyTorch/contrib/others/DQN/train_dqn.py +++ b/PyTorch/contrib/others/DQN/train_dqn.py @@ -1,38 +1,38 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from examples import * - -if __name__ == '__main__': - cf = Config() - cf.add_argument('--game', type=str, default='BreakoutNoFrameskip-v4') - cf.add_argument('--use_device', type=str, default='use_npu') - cf.add_argument('--device_id', type=int, default=0) - cf.add_argument('--max_steps', type=int, default=2e7) - cf.add_argument('--save_interval', type=int, default=0) - cf.add_argument('--eval_interval', type=int, default=0) - cf.add_argument('--log_interval', type=int, default=1e3) - cf.add_argument('--tag', type=str, default=None) - cf.add_argument('--pth_path', type=str, default='null') - cf.add_argument('--status_path', type=str, default='null') - cf.merge() - - param = dict(game=cf.game, max_steps=cf.max_steps, save_interval=cf.save_interval, eval_interval=cf.eval_interval, - log_interval=cf.log_interval, pth_path=cf.pth_path, status_path=cf.status_path, tag=cf.tag, device_id=cf.device_id,maxremark=dqn_pixel.__name__) - - mkdir('data') - random_seed() - select_device(cf.use_device, cf.device_id) - dqn_pixel(**param) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from examples import * + +if __name__ == '__main__': + cf = Config() + cf.add_argument('--game', type=str, default='BreakoutNoFrameskip-v4') + cf.add_argument('--use_device', type=str, default='use_npu') + cf.add_argument('--device_id', type=int, default=0) + cf.add_argument('--max_steps', type=int, default=2e7) + cf.add_argument('--save_interval', type=int, default=0) + cf.add_argument('--eval_interval', type=int, default=0) + cf.add_argument('--log_interval', type=int, default=1e3) + cf.add_argument('--tag', type=str, default=None) + cf.add_argument('--pth_path', type=str, default='null') + cf.add_argument('--status_path', type=str, default='null') + cf.merge() + + param = dict(game=cf.game, max_steps=cf.max_steps, save_interval=cf.save_interval, eval_interval=cf.eval_interval, + log_interval=cf.log_interval, pth_path=cf.pth_path, status_path=cf.status_path, tag=cf.tag, device_id=cf.device_id,maxremark=dqn_pixel.__name__) + + mkdir('data') + random_seed() + select_device(cf.use_device, cf.device_id) + dqn_pixel(**param) exit() \ No newline at end of file diff --git a/PyTorch/dev/cv/detection/FasterRCNN-Resnet50-FPN_ID1552_for_PyTorch/README.md b/PyTorch/dev/cv/detection/FasterRCNN-Resnet50-FPN_ID1552_for_PyTorch/README.md index 9ce0995753e74a1de90b678c774bdcebe428a022..83e71bddbacfe2862060d86786a70460aa47a6ae 100644 --- a/PyTorch/dev/cv/detection/FasterRCNN-Resnet50-FPN_ID1552_for_PyTorch/README.md +++ b/PyTorch/dev/cv/detection/FasterRCNN-Resnet50-FPN_ID1552_for_PyTorch/README.md @@ -1,102 +1,102 @@ -- [基本信息](#基本信息.md) -- [概述](#概述.md) -- [训练环境准备](#训练环境准备.md) -- [快速上手](#快速上手.md) - -

基本信息

- -**发布者(Publisher):huawei** - -**应用领域(Application Domain):Object Detection** - -**版本(Version):1.0** - -**修改时间(Modified) :2021.05.20** - -_**大小(Size)**_**:318M** - -**框架(Framework):PyTorch1.5** - -**模型格式(Model Format):pth** - -**精度(Precision):Mixed** - -**处理器(Processor):昇腾910** - -**应用级别(Categories):Research** - -**描述(Description):基于PyTorch框架的FasterRCNN目标检测网络** - -

概述

-Faster R-CNN是截至目前,RCNN系列算法的最杰出产物,two-stage中最为经典的物体检测算法。推理第一阶段先找出图片中待检测物体的anchor矩形框(对背景、待检测物体进行二分类),第二阶段对anchor框内待检测物体进行分类。R-CNN系列物体检测算法的思路都是先产生一些待检测框,再对检测框进行分类。Faster R-CNN使用神经网络生成待检测框,替代了其他R-CNN算法中通过规则等产生候选框的方法,从而实现了端到端训练,并且大幅提速。本文档描述的Faster R-CNN是基于PyTorch实现的版本。 - -- 参考实现: - - https://github.com/facebookresearch/detectron2 - -- 适配昇腾 AI 处理器的实现: - - https://gitee.com/ascend/modelzoo/tree/master/built-in/PyTorch/Research/cv/detection/Faster_Mask_RCNN_for_PyTorch - -

训练环境准备

- -硬件环境准备请参见[各硬件产品文档](https://ascend.huawei.com/#/document?tag=developer)。需要在硬件设备上安装固件与驱动。 - -关键依赖请获取NPU适配版本: - -PyTorch - -apex - -tensor-fused-plugin - -另外该代码运行需要从源编译库: - - cd Faster_Mask_RCNN_for_PyTorch - python3 -m pip install -e ./ -## 默认配置 - -- 训练超参(8卡): - - Batch size: 16(2 per device) - - Momentum: 0.9 - - LR scheduler: step - - Learning rate\(LR\): 0.02 - - Weight decay: 0.0001 - - Label smoothing: 0.1 - - Train epoch: 12 - - -## 混合精度训练 - -昇腾910 AI处理器提供自动混合精度功能,可以针对全网中float32数据类型的算子,按照内置的优化策略,自动将部分float32的算子降低精度到float16,从而在精度损失很小的情况下提升系统性能并减少内存使用。 - -## 开启混合精度 -在启动脚本中执行训练脚本处配置命令行参数 AMP 1 即可开启NPU上混合精度训练模式。 - - -## 数据集准备 - -默认使用coco2017数据集,请用户自行获取。数据集路径通过启动脚本的命令行参数--data_path配置。应有如下目录结构 - -/path/to/dataset/coco - -## 快速上手 - -1.下载预训练模型。 - -以resnet50为例: - -wget https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl - -将其置于数据集所在目录下 - -另附resnet101[下载地址](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-101.pkl) - -2.开始训练。 -- 单机单卡 - - cd test && bash ./train_full_1p.sh --data_path=/path/to/dataset - -- 单机8卡 - - cd test && bash ./train_full_8p.sh --data_path=/path/to/dataset +- [基本信息](#基本信息.md) +- [概述](#概述.md) +- [训练环境准备](#训练环境准备.md) +- [快速上手](#快速上手.md) + +

基本信息

+ +**发布者(Publisher):huawei** + +**应用领域(Application Domain):Object Detection** + +**版本(Version):1.0** + +**修改时间(Modified) :2021.05.20** + +_**大小(Size)**_**:318M** + +**框架(Framework):PyTorch1.5** + +**模型格式(Model Format):pth** + +**精度(Precision):Mixed** + +**处理器(Processor):昇腾910** + +**应用级别(Categories):Research** + +**描述(Description):基于PyTorch框架的FasterRCNN目标检测网络** + +

概述

+Faster R-CNN是截至目前,RCNN系列算法的最杰出产物,two-stage中最为经典的物体检测算法。推理第一阶段先找出图片中待检测物体的anchor矩形框(对背景、待检测物体进行二分类),第二阶段对anchor框内待检测物体进行分类。R-CNN系列物体检测算法的思路都是先产生一些待检测框,再对检测框进行分类。Faster R-CNN使用神经网络生成待检测框,替代了其他R-CNN算法中通过规则等产生候选框的方法,从而实现了端到端训练,并且大幅提速。本文档描述的Faster R-CNN是基于PyTorch实现的版本。 + +- 参考实现: + + https://github.com/facebookresearch/detectron2 + +- 适配昇腾 AI 处理器的实现: + + https://gitee.com/ascend/modelzoo/tree/master/built-in/PyTorch/Research/cv/detection/Faster_Mask_RCNN_for_PyTorch + +

训练环境准备

+ +硬件环境准备请参见[各硬件产品文档](https://ascend.huawei.com/#/document?tag=developer)。需要在硬件设备上安装固件与驱动。 + +关键依赖请获取NPU适配版本: + +PyTorch + +apex + +tensor-fused-plugin + +另外该代码运行需要从源编译库: + + cd Faster_Mask_RCNN_for_PyTorch + python3 -m pip install -e ./ +## 默认配置 + +- 训练超参(8卡): + - Batch size: 16(2 per device) + - Momentum: 0.9 + - LR scheduler: step + - Learning rate\(LR\): 0.02 + - Weight decay: 0.0001 + - Label smoothing: 0.1 + - Train epoch: 12 + + +## 混合精度训练 + +昇腾910 AI处理器提供自动混合精度功能,可以针对全网中float32数据类型的算子,按照内置的优化策略,自动将部分float32的算子降低精度到float16,从而在精度损失很小的情况下提升系统性能并减少内存使用。 + +## 开启混合精度 +在启动脚本中执行训练脚本处配置命令行参数 AMP 1 即可开启NPU上混合精度训练模式。 + + +## 数据集准备 + +默认使用coco2017数据集,请用户自行获取。数据集路径通过启动脚本的命令行参数--data_path配置。应有如下目录结构 + +/path/to/dataset/coco + +## 快速上手 + +1.下载预训练模型。 + +以resnet50为例: + +wget https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl + +将其置于数据集所在目录下 + +另附resnet101[下载地址](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-101.pkl) + +2.开始训练。 +- 单机单卡 + + cd test && bash ./train_full_1p.sh --data_path=/path/to/dataset + +- 单机8卡 + + cd test && bash ./train_full_8p.sh --data_path=/path/to/dataset diff --git a/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_full_8p.sh b/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_full_8p.sh index 504e63730482fa99c7f893e1fa66f635d6015aa0..0456da4f2275cf70ebe7dfc537c409cdb2e0e814 100644 --- a/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_full_8p.sh @@ -1,125 +1,125 @@ -#!/bin/bash - -cur_path=`pwd`/../ -#失败用例打屏 -export ASCEND_SLOG_PRINT_TO_STDOUT=0 - -#基础参数,需要模型审视修改 -#Batch Size -batch_size=1 -#网络名称,同目录名称 -Network="2D_Unet_ID0624_for_PyTorch" -#Device数量,单卡默认为1 -RANK_SIZE=8 -#训练epoch,可选 -train_epochs=5 -#训练step -train_steps= -#学习率 -learning_rate=1e-3 -#参数配置 -data_path="" - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_accormance_1p.sh " - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be config" - exit 1 -fi -##############执行训练########## -cd $cur_path -if [ -d $cur_path/test/output ];then - rm -rf $cur_path/test/output/* - mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID -fi -wait - -export HCCL_WHITELIST_DISABLE=1 -export MASTER_ADDR=127.0.0.1 -export MASTER_PORT=23456 - - -export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID -sed -i "s|data/imgs/|$data_path/imgs/|g" $cur_path/train.py -sed -i "s|data/masks/|$data_path/masks/|g" $cur_path/train.py -start=$(date +%s) -#nohup python3 train.py -e $train_epochs > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & - - -NPUS=($(seq 0 7)) -export NPU_WORLD_SIZE=${#NPUS[@]} -rank=0 -for i in ${NPUS[@]} -do - mkdir -p $cur_path/test/output/${i}/ - export NPU_CALCULATE_DEVICE=${i} - export ASCEND_DEVICE_ID=${i} - export RANK=${rank} - echo run process ${rank} - nohup python3 train.py -e $train_epochs -l 0.0001 --distributed True > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 & - let rank++ -done -wait -end=$(date +%s) -e2e_time=$(( $end - $start )) - - - -#输出训练精度,需要模型审视修改 -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -sed -i "s|\r|\n|g" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log -TrainingTime=0 -FPS=`grep img/s $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |grep -v 0% | awk -F "," '{print$2}' | awk '{print$1}' | awk -F "i" '{print$1}' | awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` -FPS=$(awk 'BEGIN{print '$FPS'*8}') -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -ActualFPS=${FPS} - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log| awk -F "," '{print$3}' | awk -F "=" '{print$2}' | awk -F "]" '{print$1}'| awk '{if(length !=0)print $0}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - - -#精度值 -train_accuracy=`grep "Validation Dice" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log - +#!/bin/bash + +cur_path=`pwd`/../ +#失败用例打屏 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 + +#基础参数,需要模型审视修改 +#Batch Size +batch_size=1 +#网络名称,同目录名称 +Network="2D_Unet_ID0624_for_PyTorch" +#Device数量,单卡默认为1 +RANK_SIZE=8 +#训练epoch,可选 +train_epochs=5 +#训练step +train_steps= +#学习率 +learning_rate=1e-3 +#参数配置 +data_path="" + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_accormance_1p.sh " + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi +##############执行训练########## +cd $cur_path +if [ -d $cur_path/test/output ];then + rm -rf $cur_path/test/output/* + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +fi +wait + +export HCCL_WHITELIST_DISABLE=1 +export MASTER_ADDR=127.0.0.1 +export MASTER_PORT=23456 + + +export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID +sed -i "s|data/imgs/|$data_path/imgs/|g" $cur_path/train.py +sed -i "s|data/masks/|$data_path/masks/|g" $cur_path/train.py +start=$(date +%s) +#nohup python3 train.py -e $train_epochs > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & + + +NPUS=($(seq 0 7)) +export NPU_WORLD_SIZE=${#NPUS[@]} +rank=0 +for i in ${NPUS[@]} +do + mkdir -p $cur_path/test/output/${i}/ + export NPU_CALCULATE_DEVICE=${i} + export ASCEND_DEVICE_ID=${i} + export RANK=${rank} + echo run process ${rank} + nohup python3 train.py -e $train_epochs -l 0.0001 --distributed True > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 & + let rank++ +done +wait +end=$(date +%s) +e2e_time=$(( $end - $start )) + + + +#输出训练精度,需要模型审视修改 +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +sed -i "s|\r|\n|g" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log +TrainingTime=0 +FPS=`grep img/s $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |grep -v 0% | awk -F "," '{print$2}' | awk '{print$1}' | awk -F "i" '{print$1}' | awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` +FPS=$(awk 'BEGIN{print '$FPS'*8}') +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +ActualFPS=${FPS} + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log| awk -F "," '{print$3}' | awk -F "=" '{print$2}' | awk -F "]" '{print$1}'| awk '{if(length !=0)print $0}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + + +#精度值 +train_accuracy=`grep "Validation Dice" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log + diff --git a/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_performance_16p.sh b/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_performance_16p.sh index cff304ed2b1e884cd43873500eb8a05e552d8112..6c6f6681d92bef47bf582890d7bfb1d1057d5ce8 100644 --- a/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_performance_16p.sh +++ b/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_performance_16p.sh @@ -1,149 +1,149 @@ -#!/bin/bash - -cur_path=`pwd`/../ -#失败用例打屏 -export ASCEND_SLOG_PRINT_TO_STDOUT=0 - -source /usr/local/Ascend/bin/setenv.bash - -export PATH=/usr/local/hdf5/bin:$PATH -export LD_LIBRARY_PATH=/usr/local/hdf5/lib:$LD_LIBRARY_PATH -export LIBRARY_PATH=/usr/local/hdf5/lib:$LIBRARY_PATH -export CPATH=/usr/local/hdf5/include:$CPATH - -#基础参数,需要模型审视修改 -#Batch Size -batch_size=1 -#网络名称,同目录名称 -Network="2D_Unet_ID0624_for_PyTorch" -#Device数量,单卡默认为1 -RANK_SIZE=16 -#训练epoch,可选 -train_epochs=1 -#训练step -train_steps= -#学习率 -learning_rate=1e-3 -#参数配置 -data_path="" -conf_path="" -server_index="" -fix_node_ip="" - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh " - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --conf_path* ]];then - conf_path=`echo ${para#*=}` - elif [[ $para == --server_index* ]];then - server_index=`echo ${para#*=}` - elif [[ $para == --fix_node_ip* ]];then - fix_node_ip=`echo ${para#*=}` - fi -done - -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be config" - exit 1 -fi - -one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` -linux_num=`find $conf_path -name "server_*.info" |wc -l` - -export HCCL_IF_IP=$fix_node_ip -export MASTER_ADDR=$one_node_ip - -##############执行训练########## -cd $cur_path -if [ -d $cur_path/test/output ];then - rm -rf $cur_path/test/output/* - mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID -fi -wait - - -export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID -sed -i "s|data/imgs/|$data_path/imgs/|g" $cur_path/train.py -sed -i "s|data/masks/|$data_path/masks/|g" $cur_path/train.py -#sed -i "s|if global_step == 100: pass|if global_step == 100: break|g" $cur_path/train.py -start=$(date +%s) -#nohup python3 train.py -e $train_epochs > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & - - -export HCCL_WHITELIST_DISABLE=1 -export MASTER_PORT=23456 -NPUS=($(seq 0 7)) -rank_server=`awk 'BEGIN{printf "%.0f\n",8*'${server_index}'}'` -export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'` -rank=0 -for i in ${NPUS[@]} -do - mkdir -p $cur_path/test/output/${i}/ - export NPU_CALCULATE_DEVICE=${i} - export ASCEND_DEVICE_ID=${i} - export RANK=`awk 'BEGIN{printf "%.0f\n",'${rank}'+'${rank_server}'}'` - echo run process ${rank} - nohup python3 train.py -e $train_epochs --distributed True > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 & - let rank++ -done -wait -end=$(date +%s) -e2e_time=$(( $end - $start )) - - -#sed -i "s|if global_step == 100: break|if global_step == 100: pass|g" $cur_path/train.py - -#输出训练精度,需要模型审视修改 -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -sed -i "s|\r|\n|g" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log -TrainingTime=0 -FPS=`grep img/s $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | grep -v 0% | awk -F "," '{print$2}' | awk '{print$1}' | awk -F "i" '{print$1}' | awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` -FPS=$(awk 'BEGIN{print '$FPS'*16}') - -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -ActualFPS=${FPS} - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log| awk -F "," '{print$3}' | awk -F "=" '{print$2}' | awk -F "]" '{print$1}'| awk '{if(length !=0)print $0}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - - -#精度值 -#train_accuracy=`grep "loss" $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss_2.txt|awk -F " " '{print $8}'|awk 'END {print}'` - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +cur_path=`pwd`/../ +#失败用例打屏 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 + +source /usr/local/Ascend/bin/setenv.bash + +export PATH=/usr/local/hdf5/bin:$PATH +export LD_LIBRARY_PATH=/usr/local/hdf5/lib:$LD_LIBRARY_PATH +export LIBRARY_PATH=/usr/local/hdf5/lib:$LIBRARY_PATH +export CPATH=/usr/local/hdf5/include:$CPATH + +#基础参数,需要模型审视修改 +#Batch Size +batch_size=1 +#网络名称,同目录名称 +Network="2D_Unet_ID0624_for_PyTorch" +#Device数量,单卡默认为1 +RANK_SIZE=16 +#训练epoch,可选 +train_epochs=1 +#训练step +train_steps= +#学习率 +learning_rate=1e-3 +#参数配置 +data_path="" +conf_path="" +server_index="" +fix_node_ip="" + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh " + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --conf_path* ]];then + conf_path=`echo ${para#*=}` + elif [[ $para == --server_index* ]];then + server_index=`echo ${para#*=}` + elif [[ $para == --fix_node_ip* ]];then + fix_node_ip=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi + +one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` +linux_num=`find $conf_path -name "server_*.info" |wc -l` + +export HCCL_IF_IP=$fix_node_ip +export MASTER_ADDR=$one_node_ip + +##############执行训练########## +cd $cur_path +if [ -d $cur_path/test/output ];then + rm -rf $cur_path/test/output/* + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +fi +wait + + +export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID +sed -i "s|data/imgs/|$data_path/imgs/|g" $cur_path/train.py +sed -i "s|data/masks/|$data_path/masks/|g" $cur_path/train.py +#sed -i "s|if global_step == 100: pass|if global_step == 100: break|g" $cur_path/train.py +start=$(date +%s) +#nohup python3 train.py -e $train_epochs > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & + + +export HCCL_WHITELIST_DISABLE=1 +export MASTER_PORT=23456 +NPUS=($(seq 0 7)) +rank_server=`awk 'BEGIN{printf "%.0f\n",8*'${server_index}'}'` +export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'` +rank=0 +for i in ${NPUS[@]} +do + mkdir -p $cur_path/test/output/${i}/ + export NPU_CALCULATE_DEVICE=${i} + export ASCEND_DEVICE_ID=${i} + export RANK=`awk 'BEGIN{printf "%.0f\n",'${rank}'+'${rank_server}'}'` + echo run process ${rank} + nohup python3 train.py -e $train_epochs --distributed True > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 & + let rank++ +done +wait +end=$(date +%s) +e2e_time=$(( $end - $start )) + + +#sed -i "s|if global_step == 100: break|if global_step == 100: pass|g" $cur_path/train.py + +#输出训练精度,需要模型审视修改 +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +sed -i "s|\r|\n|g" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log +TrainingTime=0 +FPS=`grep img/s $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | grep -v 0% | awk -F "," '{print$2}' | awk '{print$1}' | awk -F "i" '{print$1}' | awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` +FPS=$(awk 'BEGIN{print '$FPS'*16}') + +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +ActualFPS=${FPS} + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log| awk -F "," '{print$3}' | awk -F "=" '{print$2}' | awk -F "]" '{print$1}'| awk '{if(length !=0)print $0}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + + +#精度值 +#train_accuracy=`grep "loss" $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss_2.txt|awk -F " " '{print $8}'|awk 'END {print}'` + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_performance_8p.sh b/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_performance_8p.sh index 9876a9c467e0446b45a6123ccb96f8318f8613f1..472d0c48a9af98eeffdf19d4a8bbfe6011564713 100644 --- a/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_performance_8p.sh @@ -1,128 +1,128 @@ -#!/bin/bash - -cur_path=`pwd`/../ -#失败用例打屏 -export ASCEND_SLOG_PRINT_TO_STDOUT=0 - -#基础参数,需要模型审视修改 -#Batch Size -batch_size=1 -#网络名称,同目录名称 -Network="2D_Unet_ID0624_for_PyTorch" -#Device数量,单卡默认为1 -RANK_SIZE=8 -#训练epoch,可选 -train_epochs=1 -#训练step -train_steps= -#学习率 -learning_rate=1e-3 -#参数配置 -data_path="" - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh " - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be config" - exit 1 -fi -##############执行训练########## -cd $cur_path -if [ -d $cur_path/test/output ];then - rm -rf $cur_path/test/output/* - mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID -fi -wait - - -export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID -sed -i "s|data/imgs/|$data_path/imgs/|g" $cur_path/train.py -sed -i "s|data/masks/|$data_path/masks/|g" $cur_path/train.py -sed -i "s|if global_step == 100: pass|if global_step == 100: break|g" $cur_path/train.py -start=$(date +%s) -#nohup python3 train.py -e $train_epochs > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & - - -export HCCL_WHITELIST_DISABLE=1 -export MASTER_ADDR=127.0.0.1 -export MASTER_PORT=23456 - -NPUS=($(seq 0 7)) -export NPU_WORLD_SIZE=${#NPUS[@]} -rank=0 -for i in ${NPUS[@]} -do - mkdir -p $cur_path/test/output/${i}/ - export NPU_CALCULATE_DEVICE=${i} - export ASCEND_DEVICE_ID=${i} - export RANK=${rank} - echo run process ${rank} - nohup python3 train.py -e $train_epochs --distributed True > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 & - let rank++ -done -wait -end=$(date +%s) -e2e_time=$(( $end - $start )) - - -sed -i "s|if global_step == 100: break|if global_step == 100: pass|g" $cur_path/train.py - -#输出训练精度,需要模型审视修改 -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -sed -i "s|\r|\n|g" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log -TrainingTime=0 -FPS=`grep img/s $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | grep -v 0% | awk -F "," '{print$2}' | awk '{print$1}' | awk -F "i" '{print$1}' | awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` -FPS=$(awk 'BEGIN{print '$FPS'*8}') - -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -ActualFPS=${FPS} - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log| awk -F "," '{print$3}' | awk -F "=" '{print$2}' | awk -F "]" '{print$1}'| awk '{if(length !=0)print $0}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - - -#精度值 -#train_accuracy=`grep "loss" $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss_2.txt|awk -F " " '{print $8}'|awk 'END {print}'` - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log - +#!/bin/bash + +cur_path=`pwd`/../ +#失败用例打屏 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 + +#基础参数,需要模型审视修改 +#Batch Size +batch_size=1 +#网络名称,同目录名称 +Network="2D_Unet_ID0624_for_PyTorch" +#Device数量,单卡默认为1 +RANK_SIZE=8 +#训练epoch,可选 +train_epochs=1 +#训练step +train_steps= +#学习率 +learning_rate=1e-3 +#参数配置 +data_path="" + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh " + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be config" + exit 1 +fi +##############执行训练########## +cd $cur_path +if [ -d $cur_path/test/output ];then + rm -rf $cur_path/test/output/* + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID +fi +wait + + +export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID +sed -i "s|data/imgs/|$data_path/imgs/|g" $cur_path/train.py +sed -i "s|data/masks/|$data_path/masks/|g" $cur_path/train.py +sed -i "s|if global_step == 100: pass|if global_step == 100: break|g" $cur_path/train.py +start=$(date +%s) +#nohup python3 train.py -e $train_epochs > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & + + +export HCCL_WHITELIST_DISABLE=1 +export MASTER_ADDR=127.0.0.1 +export MASTER_PORT=23456 + +NPUS=($(seq 0 7)) +export NPU_WORLD_SIZE=${#NPUS[@]} +rank=0 +for i in ${NPUS[@]} +do + mkdir -p $cur_path/test/output/${i}/ + export NPU_CALCULATE_DEVICE=${i} + export ASCEND_DEVICE_ID=${i} + export RANK=${rank} + echo run process ${rank} + nohup python3 train.py -e $train_epochs --distributed True > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 & + let rank++ +done +wait +end=$(date +%s) +e2e_time=$(( $end - $start )) + + +sed -i "s|if global_step == 100: break|if global_step == 100: pass|g" $cur_path/train.py + +#输出训练精度,需要模型审视修改 +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +sed -i "s|\r|\n|g" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log +TrainingTime=0 +FPS=`grep img/s $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | grep -v 0% | awk -F "," '{print$2}' | awk '{print$1}' | awk -F "i" '{print$1}' | awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` +FPS=$(awk 'BEGIN{print '$FPS'*8}') + +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +ActualFPS=${FPS} + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log| awk -F "," '{print$3}' | awk -F "=" '{print$2}' | awk -F "]" '{print$1}'| awk '{if(length !=0)print $0}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + + +#精度值 +#train_accuracy=`grep "loss" $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss_2.txt|awk -F " " '{print $8}'|awk 'END {print}'` + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log + diff --git a/PyTorch/dev/cv/image_classification/3D_Nest_Unet_ID0476_for_PyTorch/README_ori.md b/PyTorch/dev/cv/image_classification/3D_Nest_Unet_ID0476_for_PyTorch/README_ori.md index 82e051e68c24117f10850fdf1fd6ac93f04c22e4..1d803ac37c7a4278bdf0895b04232b2a53660d5b 100644 --- a/PyTorch/dev/cv/image_classification/3D_Nest_Unet_ID0476_for_PyTorch/README_ori.md +++ b/PyTorch/dev/cv/image_classification/3D_Nest_Unet_ID0476_for_PyTorch/README_ori.md @@ -1,109 +1,109 @@ -# PyTorch implementation of UNet++ (Nested U-Net) -[![MIT License](http://img.shields.io/badge/license-MIT-blue.svg?style=flat)](LICENSE) - -This repository contains code for a image segmentation model based on [UNet++: A Nested U-Net Architecture for Medical Image Segmentation](https://arxiv.org/abs/1807.10165) implemented in PyTorch. - -[**NEW**] Add support for multi-class segmentation dataset. - -[**NEW**] Add support for PyTorch 1.x. - - -## Requirements -- PyTorch 1.x or 0.41 - -## Installation -1. Create an anaconda environment. -```sh -conda create -n= python=3.6 anaconda -conda activate -``` -2. Install PyTorch. -```sh -conda install pytorch torchvision cudatoolkit=10.1 -c pytorch -``` -3. Install pip packages. -```sh -pip install -r requirements.txt -``` - -## Training on [2018 Data Science Bowl](https://www.kaggle.com/c/data-science-bowl-2018) dataset -1. Download dataset from [here](https://www.kaggle.com/c/data-science-bowl-2018/data) to inputs/ and unzip. The file structure is the following: -``` -inputs -└── data-science-bowl-2018 - ├── stage1_train - | ├── 00ae65... - │ │ ├── images - │ │ │ └── 00ae65... - │ │ └── masks - │ │ └── 00ae65... - │ ├── ... - | - ... -``` -2. Preprocess. -```sh -python preprocess_dsb2018.py -``` -3. Train the model. -```sh -python train.py --dataset dsb2018_96 --arch NestedUNet -``` -4. Evaluate. -```sh -python val.py --name dsb2018_96_NestedUNet_woDS -``` -### (Optional) Using LovaszHingeLoss -1. Clone LovaszSoftmax from [bermanmaxim/LovaszSoftmax](https://github.com/bermanmaxim/LovaszSoftmax). -``` -git clone https://github.com/bermanmaxim/LovaszSoftmax.git -``` -2. Train the model with LovaszHingeLoss. -``` -python train.py --dataset dsb2018_96 --arch NestedUNet --loss LovaszHingeLoss -``` - -## Training on original dataset -Make sure to put the files as the following structure (e.g. the number of classes is 2): -``` -inputs -└── - ├── images - | ├── 0a7e06.jpg - │ ├── 0aab0a.jpg - │ ├── 0b1761.jpg - │ ├── ... - | - └── masks - ├── 0 - | ├── 0a7e06.png - | ├── 0aab0a.png - | ├── 0b1761.png - | ├── ... - | - └── 1 - ├── 0a7e06.png - ├── 0aab0a.png - ├── 0b1761.png - ├── ... -``` - -1. Train the model. -``` -python train.py --dataset --arch NestedUNet --img_ext .jpg --mask_ext .png -``` -2. Evaluate. -``` -python val.py --name _NestedUNet_woDS -``` - -## Results -### DSB2018 (96x96) - -Here is the results on DSB2018 dataset (96x96) with LovaszHingeLoss. - -| Model | IoU | Loss | -|:------------------------------- |:-------:|:-------:| -| U-Net | 0.839 | 0.365 | -| Nested U-Net | 0.842 |**0.354**| -| Nested U-Net w/ Deepsupervision |**0.843**| 0.362 | +# PyTorch implementation of UNet++ (Nested U-Net) +[![MIT License](http://img.shields.io/badge/license-MIT-blue.svg?style=flat)](LICENSE) + +This repository contains code for a image segmentation model based on [UNet++: A Nested U-Net Architecture for Medical Image Segmentation](https://arxiv.org/abs/1807.10165) implemented in PyTorch. + +[**NEW**] Add support for multi-class segmentation dataset. + +[**NEW**] Add support for PyTorch 1.x. + + +## Requirements +- PyTorch 1.x or 0.41 + +## Installation +1. Create an anaconda environment. +```sh +conda create -n= python=3.6 anaconda +conda activate +``` +2. Install PyTorch. +```sh +conda install pytorch torchvision cudatoolkit=10.1 -c pytorch +``` +3. Install pip packages. +```sh +pip install -r requirements.txt +``` + +## Training on [2018 Data Science Bowl](https://www.kaggle.com/c/data-science-bowl-2018) dataset +1. Download dataset from [here](https://www.kaggle.com/c/data-science-bowl-2018/data) to inputs/ and unzip. The file structure is the following: +``` +inputs +└── data-science-bowl-2018 + ├── stage1_train + | ├── 00ae65... + │ │ ├── images + │ │ │ └── 00ae65... + │ │ └── masks + │ │ └── 00ae65... + │ ├── ... + | + ... +``` +2. Preprocess. +```sh +python preprocess_dsb2018.py +``` +3. Train the model. +```sh +python train.py --dataset dsb2018_96 --arch NestedUNet +``` +4. Evaluate. +```sh +python val.py --name dsb2018_96_NestedUNet_woDS +``` +### (Optional) Using LovaszHingeLoss +1. Clone LovaszSoftmax from [bermanmaxim/LovaszSoftmax](https://github.com/bermanmaxim/LovaszSoftmax). +``` +git clone https://github.com/bermanmaxim/LovaszSoftmax.git +``` +2. Train the model with LovaszHingeLoss. +``` +python train.py --dataset dsb2018_96 --arch NestedUNet --loss LovaszHingeLoss +``` + +## Training on original dataset +Make sure to put the files as the following structure (e.g. the number of classes is 2): +``` +inputs +└── + ├── images + | ├── 0a7e06.jpg + │ ├── 0aab0a.jpg + │ ├── 0b1761.jpg + │ ├── ... + | + └── masks + ├── 0 + | ├── 0a7e06.png + | ├── 0aab0a.png + | ├── 0b1761.png + | ├── ... + | + └── 1 + ├── 0a7e06.png + ├── 0aab0a.png + ├── 0b1761.png + ├── ... +``` + +1. Train the model. +``` +python train.py --dataset --arch NestedUNet --img_ext .jpg --mask_ext .png +``` +2. Evaluate. +``` +python val.py --name _NestedUNet_woDS +``` + +## Results +### DSB2018 (96x96) + +Here is the results on DSB2018 dataset (96x96) with LovaszHingeLoss. + +| Model | IoU | Loss | +|:------------------------------- |:-------:|:-------:| +| U-Net | 0.839 | 0.365 | +| Nested U-Net | 0.842 |**0.354**| +| Nested U-Net w/ Deepsupervision |**0.843**| 0.362 | diff --git a/PyTorch/dev/cv/image_classification/AUTOAUGMENT_ID0792_for_PyTorch/modelzoo_level.txt b/PyTorch/dev/cv/image_classification/AUTOAUGMENT_ID0792_for_PyTorch/modelzoo_level.txt index 405b26618a0c92027927a9c583a4b47f640bcf7b..c45626e398eabe6022fe7b2e148f0ffce6400d6e 100644 --- a/PyTorch/dev/cv/image_classification/AUTOAUGMENT_ID0792_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/dev/cv/image_classification/AUTOAUGMENT_ID0792_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:POK +FuncStatus:OK +PerfStatus:POK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/AdvancedEast_ID0473_for_PyTorch/README_ori.md b/PyTorch/dev/cv/image_classification/AdvancedEast_ID0473_for_PyTorch/README_ori.md index 1a712c230e09778f6ca4136d03a972ac022631e4..32d36760cb26edc2a4329ddc29373a9e8e4aa9ba 100644 --- a/PyTorch/dev/cv/image_classification/AdvancedEast_ID0473_for_PyTorch/README_ori.md +++ b/PyTorch/dev/cv/image_classification/AdvancedEast_ID0473_for_PyTorch/README_ori.md @@ -1,14 +1,14 @@ -# pytorch_AdvancedEast -pytorch实现AdvancedEast+mobilenetv3 - -# 参考https://github.com/huoyijie/AdvancedEAST -# training -## tianchi ICPR dataset download 链接: https://pan.baidu.com/s/1NSyc-cHKV3IwDo6qojIrKA 密码: ye9y -### 1.modify config params in cfg.py, see default values. -### 2.python preprocess.py, resize image to 256256,384384,512512,640640,736*736, and train respectively could speed up training process. -### 3.python label.py -### 4.python train.py -### 5.python predict.py -图片: -![demo](https://github.com/corleonechensiyu/pytorch_AdvancedEast/blob/master/012.png_predict.jpg) - +# pytorch_AdvancedEast +pytorch实现AdvancedEast+mobilenetv3 + +# 参考https://github.com/huoyijie/AdvancedEAST +# training +## tianchi ICPR dataset download 链接: https://pan.baidu.com/s/1NSyc-cHKV3IwDo6qojIrKA 密码: ye9y +### 1.modify config params in cfg.py, see default values. +### 2.python preprocess.py, resize image to 256256,384384,512512,640640,736*736, and train respectively could speed up training process. +### 3.python label.py +### 4.python train.py +### 5.python predict.py +图片: +![demo](https://github.com/corleonechensiyu/pytorch_AdvancedEast/blob/master/012.png_predict.jpg) + diff --git a/PyTorch/dev/cv/image_classification/AdvancedEast_ID0473_for_PyTorch/modelzoo_level.txt b/PyTorch/dev/cv/image_classification/AdvancedEast_ID0473_for_PyTorch/modelzoo_level.txt index 484664c2399ae4109859a67aba6cb9facff03cf1..55a9add9fa74832ca908108d73946cd76281a9cd 100644 --- a/PyTorch/dev/cv/image_classification/AdvancedEast_ID0473_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/dev/cv/image_classification/AdvancedEast_ID0473_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/AlexNet_ID0472_for_PyTorch/modelzoo_level.txt b/PyTorch/dev/cv/image_classification/AlexNet_ID0472_for_PyTorch/modelzoo_level.txt index 82f29898a44e5414055c4a4dbb4f0998260f9809..2963c766cbc04c7ed4092aced23b5376029e98ac 100644 --- a/PyTorch/dev/cv/image_classification/AlexNet_ID0472_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/dev/cv/image_classification/AlexNet_ID0472_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:POK +FuncStatus:OK +PerfStatus:POK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/BASNET_ID1134_for_PyTorch/iou_tmp.py b/PyTorch/dev/cv/image_classification/BASNET_ID1134_for_PyTorch/iou_tmp.py index 17db3508e1e9d13b2d03a83f8637ec2ae36dac40..f3298c8c96030abb20d943faa0ed90e39e9d65ed 100644 --- a/PyTorch/dev/cv/image_classification/BASNET_ID1134_for_PyTorch/iou_tmp.py +++ b/PyTorch/dev/cv/image_classification/BASNET_ID1134_for_PyTorch/iou_tmp.py @@ -1,82 +1,82 @@ -# -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -# -import torch -import torch.nn.functional as F -from torch.autograd import Variable -import numpy as np - - -def iiou(pred, target, size_average = True): - - combination = target[:,:,:,:]*pred[:,:,:,:] - sumlist_combination = torch.sum(combination,[-1,2]).npu() #compute SG - sumlist_pred = torch.sum(pred,[-1,2]).npu() # compute S - sumlist_target = torch.sum(target,[-1,2]).npu() # compute G - #print('compression matrix',sumlist_target) - - - iou0 = 1-(sumlist_combination[0]/(sumlist_pred[0]+sumlist_target[0]-sumlist_combination[0])) - iou1 = 1-(sumlist_combination[1]/(sumlist_pred[1]+sumlist_target[1]-sumlist_combination[1])) - iou2 = 1-(sumlist_combination[2]/(sumlist_pred[2]+sumlist_target[2]-sumlist_combination[2])) - iou3 = 1-(sumlist_combination[3]/(sumlist_pred[3]+sumlist_target[3]-sumlist_combination[3])) - iou4 = 1-(sumlist_combination[4]/(sumlist_pred[4]+sumlist_target[4]-sumlist_combination[4])) - iou5 = 1-(sumlist_combination[5]/(sumlist_pred[5]+sumlist_target[5]-sumlist_combination[5])) - iou6 = 1-(sumlist_combination[6]/(sumlist_pred[6]+sumlist_target[6]-sumlist_combination[6])) - iou7 = 1-(sumlist_combination[7]/(sumlist_pred[7]+sumlist_target[7]-sumlist_combination[7])) - - IoU = (iou0+iou1+iou2+iou3+iou4+iou5+iou6+iou7)/8 - - - - #b = pred.shape[0] - #i=0 - #IoU = 0.0 - - - #Iand1 = torch.sum(target[:,:,:,:]*pred[:,:,:,:]) - - - #Ior1 = torch.sum(target[:,:,:,:]) + torch.sum(pred[:,:,:,:])-Iand1 - - - #IoU1 = Iand1/Ior1 - - - - #IoU loss is (1-IoU1) - #IoU = IoU + (1-IoU1) - - return IoU - - +# +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +# +import torch +import torch.nn.functional as F +from torch.autograd import Variable +import numpy as np + + +def iiou(pred, target, size_average = True): + + combination = target[:,:,:,:]*pred[:,:,:,:] + sumlist_combination = torch.sum(combination,[-1,2]).npu() #compute SG + sumlist_pred = torch.sum(pred,[-1,2]).npu() # compute S + sumlist_target = torch.sum(target,[-1,2]).npu() # compute G + #print('compression matrix',sumlist_target) + + + iou0 = 1-(sumlist_combination[0]/(sumlist_pred[0]+sumlist_target[0]-sumlist_combination[0])) + iou1 = 1-(sumlist_combination[1]/(sumlist_pred[1]+sumlist_target[1]-sumlist_combination[1])) + iou2 = 1-(sumlist_combination[2]/(sumlist_pred[2]+sumlist_target[2]-sumlist_combination[2])) + iou3 = 1-(sumlist_combination[3]/(sumlist_pred[3]+sumlist_target[3]-sumlist_combination[3])) + iou4 = 1-(sumlist_combination[4]/(sumlist_pred[4]+sumlist_target[4]-sumlist_combination[4])) + iou5 = 1-(sumlist_combination[5]/(sumlist_pred[5]+sumlist_target[5]-sumlist_combination[5])) + iou6 = 1-(sumlist_combination[6]/(sumlist_pred[6]+sumlist_target[6]-sumlist_combination[6])) + iou7 = 1-(sumlist_combination[7]/(sumlist_pred[7]+sumlist_target[7]-sumlist_combination[7])) + + IoU = (iou0+iou1+iou2+iou3+iou4+iou5+iou6+iou7)/8 + + + + #b = pred.shape[0] + #i=0 + #IoU = 0.0 + + + #Iand1 = torch.sum(target[:,:,:,:]*pred[:,:,:,:]) + + + #Ior1 = torch.sum(target[:,:,:,:]) + torch.sum(pred[:,:,:,:])-Iand1 + + + #IoU1 = Iand1/Ior1 + + + + #IoU loss is (1-IoU1) + #IoU = IoU + (1-IoU1) + + return IoU + + diff --git a/PyTorch/dev/cv/image_classification/BASNET_ID1134_for_PyTorch/prefetcher.py b/PyTorch/dev/cv/image_classification/BASNET_ID1134_for_PyTorch/prefetcher.py index 485463e8aa3c3f7de7f6b86d2300f3989c9a1b77..1d8b0c8087719cefc905c13186f4d80bbd402f82 100644 --- a/PyTorch/dev/cv/image_classification/BASNET_ID1134_for_PyTorch/prefetcher.py +++ b/PyTorch/dev/cv/image_classification/BASNET_ID1134_for_PyTorch/prefetcher.py @@ -1,71 +1,71 @@ -# -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -# - - -import torch - -class DataPrefetcher(): - def __init__(self, loader, steam=None): - self.loader = iter(loader) - self.stream = torch.npu.Stream() - # With Amp, it isn't necessary to manually convert data to half. - # if args.fp16: - # self.mean = self.mean.half() - # self.std = self.std.half() - self.preload() - - def preload(self): - try: - self.next_input,self.next_label = next(self.loader) - except StopIteration: - self.next_input = None - self.next_label = None - return - with torch.npu.stream(self.stream): - self.next_input = self.next_input.npu(non_blocking=True) - self.next_label = self.next_label.npu(non_blocking=True) - - # With Amp, it isn't necessary to manually convert data to half. - # if args.fp16: - # self.next_input = self.next_input.half() - # else: - # self.next_input = self.next_input.float() - - def next(self): - torch.npu.current_stream().wait_stream(self.stream) - next_input = self.next_input - next_label = self.next_label - if next_label is not None: - self.preload() +# +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +# + + +import torch + +class DataPrefetcher(): + def __init__(self, loader, steam=None): + self.loader = iter(loader) + self.stream = torch.npu.Stream() + # With Amp, it isn't necessary to manually convert data to half. + # if args.fp16: + # self.mean = self.mean.half() + # self.std = self.std.half() + self.preload() + + def preload(self): + try: + self.next_input,self.next_label = next(self.loader) + except StopIteration: + self.next_input = None + self.next_label = None + return + with torch.npu.stream(self.stream): + self.next_input = self.next_input.npu(non_blocking=True) + self.next_label = self.next_label.npu(non_blocking=True) + + # With Amp, it isn't necessary to manually convert data to half. + # if args.fp16: + # self.next_input = self.next_input.half() + # else: + # self.next_input = self.next_input.float() + + def next(self): + torch.npu.current_stream().wait_stream(self.stream) + next_input = self.next_input + next_label = self.next_label + if next_label is not None: + self.preload() return next_input,next_label \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_performance_16p.sh b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_performance_16p.sh index 0c7e241fbf5607d3fbbf57e1554d387d3839a4cd..0346feb92ff2f6173580eacbed682f3c1bd9c0ab 100644 --- a/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_performance_16p.sh +++ b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_performance_16p.sh @@ -1,221 +1,221 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#source ../env_npu.sh - -data_path="" -conf_path="" -server_index="" -fix_node_ip="" -#集合通信参数,不需要修改 - -export RANK_SIZE=16 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="BertBase_ID0490_for_PyTorch" -#训练batch_size -batch_size=80 - - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --ckpt_path* ]];then - ckpt_path=`echo ${para#*=}` - elif [[ $para == --conf_path* ]];then - conf_path=`echo ${para#*=}` - elif [[ $para == --server_index* ]];then - server_index=`echo ${para#*=}` - elif [[ $para == --fix_node_ip* ]];then - fix_node_ip=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` -linux_num=`find $conf_path -name "server_*.info" |wc -l` - -export HCCL_IF_IP=$fix_node_ip -export MASTER_ADDR=$one_node_ip - -rank_server=`awk 'BEGIN{printf "%.0f\n",8*'${server_index}'}'` -export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'` - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/../ -cur_1=${1:-"1"} -cur_2=${2:-"2"} -cur_3=${3:-"3"} -cur_4=${4:-"4"} -init_checkpoint=${5:-"`${data_path}/pretrained/bert_base_pretrain.pt`"} -epochs=${6:-"1.0"} -batch_size=${7:-"80"} -learning_rate=${8:-"2e-4"} -precision=${9:-"fp16"} -num_npu=${10:-"16"} -seed=${11:-"1"} -squad_dir=${12:-"`${data_path}/squad/v1.1`"} -vocab_file=${13:-"data/uncased_L-24_H-1024_A-16/vocab.txt"} -OUT_DIR=${14:-"results/SQuAD"} -mode=${15:-"train eval"} -CONFIG_FILE=${16:-"bert_base_config.json"} -max_steps=${17:-"-1"} - -echo "out dir is $OUT_DIR" -mkdir -p $OUT_DIR -if [ ! -d "$OUT_DIR" ]; then - echo "ERROR: non existing $OUT_DIR" - exit 1 -fi - -use_fp16="" -if [ "$precision" = "fp16" ] ; then - echo "fp16 activated!" - use_fp16=" --fp16 " -fi - -CMD="python3.7 run_squad.py " -CMD+="--init_checkpoint=${data_path}/pretrained/bert_base_pretrain.pt " -if [ "$mode" = "train" ] ; then - CMD+="--do_train " - CMD+="--train_file=${data_path}/squad/v1.1/train-v1.1.json " - CMD+="--train_batch_size=$batch_size " -elif [ "$mode" = "eval" ] ; then - CMD+="--do_predict " - CMD+="--predict_file=${data_path}/squad/v1.1/dev-v1.1.json " - CMD+="--predict_batch_size=$batch_size " - CMD+="--eval_script=${data_path}/squad/v1.1/evaluate-v1.1.py " - CMD+="--do_eval " -elif [ "$mode" = "prediction" ] ; then - CMD+="--do_predict " - CMD+="--predict_file=${data_path}/squad/v1.1/dev-v1.1.json " - CMD+="--predict_batch_size=$batch_size " -else - CMD+=" --do_train " - CMD+=" --train_file=${data_path}/squad/v1.1/train-v1.1.json " - CMD+=" --train_batch_size=$batch_size " - CMD+="--do_predict " - CMD+="--predict_file=${data_path}/squad/v1.1/dev-v1.1.json " - CMD+="--predict_batch_size=$batch_size " - CMD+="--eval_script=${data_path}/squad/v1.1/evaluate-v1.1.py " - CMD+="--do_eval " -fi - -CMD+=" --do_lower_case " -CMD+=" --bert_model=bert-large-uncased " -CMD+=" --learning_rate=$learning_rate " -CMD+=" --seed=$seed " -CMD+=" --num_train_epochs=$epochs " -CMD+=" --max_seq_length=384 " -CMD+=" --doc_stride=128 " -CMD+=" --output_dir=$OUT_DIR " -CMD+=" --vocab_file=$vocab_file " -CMD+=" --config_file=$CONFIG_FILE " -CMD+=" --max_steps=$max_steps " -CMD+=" $use_fp16" -CMD+=" --use_npu" -CMD+=" --num_npu=$num_npu" -CMD+=" --loss_scale=4096" -CMD+=" --addr=$one_node_ip" - -if [ $(uname -m) = "aarch64" ] -then - for i in $(seq 0 7) - do - let p_start=0+24*i - let p_end=23+24*i - export RANK=`awk 'BEGIN{printf "%.0f\n",'${i}'+'${rank_server}'}'` - if [ -d ${cur_path}/output/${i} ];then - rm -rf ${cur_path}/output/${i} - mkdir -p ${cur_path}/output/$i - else - mkdir -p ${cur_path}/output/$i - fi - taskset -c $p_start-$p_end $CMD --local_rank=$i > ${cur_path}/output/${i}/train_${i}.log 2>&1 & - done -else - for i in $(seq 0 7) - do - export RANK=`awk 'BEGIN{printf "%.0f\n",'${i}'+'${rank_server}'}'` - if [ -d ${cur_path}/output/${i} ];then - rm -rf ${cur_path}/output/${i} - mkdir -p ${cur_path}/output/$i - else - mkdir -p ${cur_path}/output/$i - fi - $CMD --local_rank=$i > ${cur_path}/output/${i}/train_${i}.log 2>&1 & - done -fi -wait - -ASCEND_DEVICE_ID=0 -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -iter=`grep 'Epoch: ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "iter/s :" '{print $NF}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` -FPS=`awk 'BEGIN{printf "%.2f\n",'${iter}'*16*'${batch_size}'}'` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep -r "step_loss :" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk '{print $19}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#source ../env_npu.sh + +data_path="" +conf_path="" +server_index="" +fix_node_ip="" +#集合通信参数,不需要修改 + +export RANK_SIZE=16 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="BertBase_ID0490_for_PyTorch" +#训练batch_size +batch_size=80 + + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + elif [[ $para == --conf_path* ]];then + conf_path=`echo ${para#*=}` + elif [[ $para == --server_index* ]];then + server_index=`echo ${para#*=}` + elif [[ $para == --fix_node_ip* ]];then + fix_node_ip=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` +linux_num=`find $conf_path -name "server_*.info" |wc -l` + +export HCCL_IF_IP=$fix_node_ip +export MASTER_ADDR=$one_node_ip + +rank_server=`awk 'BEGIN{printf "%.0f\n",8*'${server_index}'}'` +export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'` + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ +cur_1=${1:-"1"} +cur_2=${2:-"2"} +cur_3=${3:-"3"} +cur_4=${4:-"4"} +init_checkpoint=${5:-"`${data_path}/pretrained/bert_base_pretrain.pt`"} +epochs=${6:-"1.0"} +batch_size=${7:-"80"} +learning_rate=${8:-"2e-4"} +precision=${9:-"fp16"} +num_npu=${10:-"16"} +seed=${11:-"1"} +squad_dir=${12:-"`${data_path}/squad/v1.1`"} +vocab_file=${13:-"data/uncased_L-24_H-1024_A-16/vocab.txt"} +OUT_DIR=${14:-"results/SQuAD"} +mode=${15:-"train eval"} +CONFIG_FILE=${16:-"bert_base_config.json"} +max_steps=${17:-"-1"} + +echo "out dir is $OUT_DIR" +mkdir -p $OUT_DIR +if [ ! -d "$OUT_DIR" ]; then + echo "ERROR: non existing $OUT_DIR" + exit 1 +fi + +use_fp16="" +if [ "$precision" = "fp16" ] ; then + echo "fp16 activated!" + use_fp16=" --fp16 " +fi + +CMD="python3.7 run_squad.py " +CMD+="--init_checkpoint=${data_path}/pretrained/bert_base_pretrain.pt " +if [ "$mode" = "train" ] ; then + CMD+="--do_train " + CMD+="--train_file=${data_path}/squad/v1.1/train-v1.1.json " + CMD+="--train_batch_size=$batch_size " +elif [ "$mode" = "eval" ] ; then + CMD+="--do_predict " + CMD+="--predict_file=${data_path}/squad/v1.1/dev-v1.1.json " + CMD+="--predict_batch_size=$batch_size " + CMD+="--eval_script=${data_path}/squad/v1.1/evaluate-v1.1.py " + CMD+="--do_eval " +elif [ "$mode" = "prediction" ] ; then + CMD+="--do_predict " + CMD+="--predict_file=${data_path}/squad/v1.1/dev-v1.1.json " + CMD+="--predict_batch_size=$batch_size " +else + CMD+=" --do_train " + CMD+=" --train_file=${data_path}/squad/v1.1/train-v1.1.json " + CMD+=" --train_batch_size=$batch_size " + CMD+="--do_predict " + CMD+="--predict_file=${data_path}/squad/v1.1/dev-v1.1.json " + CMD+="--predict_batch_size=$batch_size " + CMD+="--eval_script=${data_path}/squad/v1.1/evaluate-v1.1.py " + CMD+="--do_eval " +fi + +CMD+=" --do_lower_case " +CMD+=" --bert_model=bert-large-uncased " +CMD+=" --learning_rate=$learning_rate " +CMD+=" --seed=$seed " +CMD+=" --num_train_epochs=$epochs " +CMD+=" --max_seq_length=384 " +CMD+=" --doc_stride=128 " +CMD+=" --output_dir=$OUT_DIR " +CMD+=" --vocab_file=$vocab_file " +CMD+=" --config_file=$CONFIG_FILE " +CMD+=" --max_steps=$max_steps " +CMD+=" $use_fp16" +CMD+=" --use_npu" +CMD+=" --num_npu=$num_npu" +CMD+=" --loss_scale=4096" +CMD+=" --addr=$one_node_ip" + +if [ $(uname -m) = "aarch64" ] +then + for i in $(seq 0 7) + do + let p_start=0+24*i + let p_end=23+24*i + export RANK=`awk 'BEGIN{printf "%.0f\n",'${i}'+'${rank_server}'}'` + if [ -d ${cur_path}/output/${i} ];then + rm -rf ${cur_path}/output/${i} + mkdir -p ${cur_path}/output/$i + else + mkdir -p ${cur_path}/output/$i + fi + taskset -c $p_start-$p_end $CMD --local_rank=$i > ${cur_path}/output/${i}/train_${i}.log 2>&1 & + done +else + for i in $(seq 0 7) + do + export RANK=`awk 'BEGIN{printf "%.0f\n",'${i}'+'${rank_server}'}'` + if [ -d ${cur_path}/output/${i} ];then + rm -rf ${cur_path}/output/${i} + mkdir -p ${cur_path}/output/$i + else + mkdir -p ${cur_path}/output/$i + fi + $CMD --local_rank=$i > ${cur_path}/output/${i}/train_${i}.log 2>&1 & + done +fi +wait + +ASCEND_DEVICE_ID=0 +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +iter=`grep 'Epoch: ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "iter/s :" '{print $NF}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` +FPS=`awk 'BEGIN{printf "%.2f\n",'${iter}'*16*'${batch_size}'}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep -r "step_loss :" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk '{print $19}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log rm -rf ${data_path}/squad/v1.1/train-v1.1.json_bert-large-uncased_384_128_64 \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/ConvLSTM_ID1772_for_PyTorch/requirements.txt b/PyTorch/dev/cv/image_classification/ConvLSTM_ID1772_for_PyTorch/requirements.txt old mode 100755 new mode 100644 diff --git a/PyTorch/dev/cv/image_classification/DCAP_ID2836_for_PyTorch/prefetcher.py b/PyTorch/dev/cv/image_classification/DCAP_ID2836_for_PyTorch/prefetcher.py index a02e80d661454670a1eefb1ccb5e351bf2b7e923..efc4a37ec1b9d9389f740f45c6deb71c30b9de59 100644 --- a/PyTorch/dev/cv/image_classification/DCAP_ID2836_for_PyTorch/prefetcher.py +++ b/PyTorch/dev/cv/image_classification/DCAP_ID2836_for_PyTorch/prefetcher.py @@ -1,63 +1,63 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch - - -class Prefetcher(object): - """Prefetcher using on npu device. - - Origin Code URL: - https://github.com/implus/PytorchInsight/blob/master/classification/imagenet_fast.py#L280 - - Args: - loder (torch.utils.data.DataLoader or DataLoader like iterator): - Using to generate inputs after preprocessing. - stream (torch.npu.Stream): Default None. - Because of the limitation of NPU's memory mechanism, - if prefetcher is initialized repeatedly during training, - a defined stream should be introduced to prevent memory leakage; - if prefetcher is initialized only once during training, - a defined stream is not necessary. - - Returns: - float: tensors of shape (k, 5) and (k, 1). Labels are 0-based. - """ - - def __init__(self, loader, stream=None): - self.loader = iter(loader) - self.stream = stream if stream is not None else torch.npu.Stream() - self.preload() - - def preload(self): - try: - self.next_input, self.next_target = next(self.loader) - except StopIteration: - self.user = None - self.item = None - return - - with torch.npu.stream(self.stream): - self.next_input, self.next_target = self.next_input.to(torch.float), self.next_target.to(torch.float) - self.next_input, self.next_target = self.next_input.npu(non_blocking=True), self.next_target.npu(non_blocking=True) - - def next(self): - torch.npu.current_stream().wait_stream(self.stream) - next_input = self.next_input - next_target = self.next_target - if next_input is not None: - self.preload() +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + + +class Prefetcher(object): + """Prefetcher using on npu device. + + Origin Code URL: + https://github.com/implus/PytorchInsight/blob/master/classification/imagenet_fast.py#L280 + + Args: + loder (torch.utils.data.DataLoader or DataLoader like iterator): + Using to generate inputs after preprocessing. + stream (torch.npu.Stream): Default None. + Because of the limitation of NPU's memory mechanism, + if prefetcher is initialized repeatedly during training, + a defined stream should be introduced to prevent memory leakage; + if prefetcher is initialized only once during training, + a defined stream is not necessary. + + Returns: + float: tensors of shape (k, 5) and (k, 1). Labels are 0-based. + """ + + def __init__(self, loader, stream=None): + self.loader = iter(loader) + self.stream = stream if stream is not None else torch.npu.Stream() + self.preload() + + def preload(self): + try: + self.next_input, self.next_target = next(self.loader) + except StopIteration: + self.user = None + self.item = None + return + + with torch.npu.stream(self.stream): + self.next_input, self.next_target = self.next_input.to(torch.float), self.next_target.to(torch.float) + self.next_input, self.next_target = self.next_input.npu(non_blocking=True), self.next_target.npu(non_blocking=True) + + def next(self): + torch.npu.current_stream().wait_stream(self.stream) + next_input = self.next_input + next_target = self.next_target + if next_input is not None: + self.preload() return next_input, next_target \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/README_ori.md b/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/README_ori.md index 2cf3d2a82106d3e10156ce42e0cb5cc404686b73..e89c9b3ea08691210046fbb9184bf8e44e88f29e 100644 --- a/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/README_ori.md +++ b/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/README_ori.md @@ -1,3 +1,3 @@ -# Deep Sort - +# Deep Sort + This is the implemention of deep sort with pytorch. \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/requirements.txt b/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/requirements.txt index 848ebde50e69db701f64332fea0b153a362100ed..f9b2aa91e4a9024db31af76e55e7a5b7fa4350b7 100644 --- a/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/requirements.txt +++ b/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/requirements.txt @@ -1,2 +1,2 @@ -torch +torch numpy \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/test/train_full_8p.sh b/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/test/train_full_8p.sh index 49ef461403e0611e7244f27e90276faf46b7744b..6d473004a5fcd1b6ef9c5532d67e792ce2ad00ed 100644 --- a/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/test/train_full_8p.sh @@ -1,201 +1,201 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 - -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 - - - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="DeepSort_ID0654_for_PyTorch" -#训练epoch -train_epochs=10 -#训练batch_size -batch_size=64 -#训练step -train_steps= -#学习率 -learning_rate=0.8 - -#TF2.X独有,需要模型审视修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_performance_1P.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--apex" -fi -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -# 解压数据集 -if [ ! -e "$data_path/data/train/1499/1499C5T0007F050.jpg" ];then - tar -xvf $data_path/data.tar -C $data_path/ -else - echo "NO NEED UNTAR" -fi -wait - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -#cd $cur_path/../tensorflow -cd $cur_path/../ - -sed -i "s|for epoch in range(start_epoch, start_epoch + 40):|for epoch in range(start_epoch, start_epoch + $train_epochs):|g" $cur_path/../deep/train.py - -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - export MASTER_ADDR=localhost - export MASTER_PORT=29688 - export HCCL_WHITELIST_DISABLE=1 - - NPUS=($(seq 0 7)) - export NPU_WORLD_SIZE=${#NPUS[@]} - rank=0 - for i in ${NPUS[@]} - do - mkdir -p $cur_path/output/${i}/ - export NPU_CALCULATE_DEVICE=${i} - export RANK=${rank} - export ASCEND_DEVICE_ID=${i} - echo run process ${rank} - python3 deep/train.py --ddp \ - --data-dir $data_path/data \ - --lr 0.8 \ - $PREC > $cur_path/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 & - let rank++ - done -done -wait -sed -i "s|for epoch in range(start_epoch, start_epoch + $train_epochs):|for epoch in range(start_epoch, start_epoch + 40):|g" $cur_path/../deep/train.py -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -#FPS=`grep "]time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F":" 'NR>26{print $3}' | awk -F"s" '{print $1}' | head -n -1 | awk '{sum+=$1} END {print 64*NR/sum}'` -FPS=`grep -rn "progress:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F "time:" '{print $2}'| awk -F "s" '{print $1}'|awk '{if (NR>1){print $1}}'|awk '{if(length !=0) print $0}'|awk '{sum+=$1} END {print 64*NR/sum}'` -FPS=$(awk 'BEGIN{print '$FPS'*8}') -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep train_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $8}'|cut -c 1-5` -train_accuracy=`grep "Acc" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Acc:" '{print$2}' | sed 's/%//g' |awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "Loss:" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "Loss:" '{print$2}' | awk '{print$1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + + + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="DeepSort_ID0654_for_PyTorch" +#训练epoch +train_epochs=10 +#训练batch_size +batch_size=64 +#训练step +train_steps= +#学习率 +learning_rate=0.8 + +#TF2.X独有,需要模型审视修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--apex" +fi +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +# 解压数据集 +if [ ! -e "$data_path/data/train/1499/1499C5T0007F050.jpg" ];then + tar -xvf $data_path/data.tar -C $data_path/ +else + echo "NO NEED UNTAR" +fi +wait + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +#cd $cur_path/../tensorflow +cd $cur_path/../ + +sed -i "s|for epoch in range(start_epoch, start_epoch + 40):|for epoch in range(start_epoch, start_epoch + $train_epochs):|g" $cur_path/../deep/train.py + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + export MASTER_ADDR=localhost + export MASTER_PORT=29688 + export HCCL_WHITELIST_DISABLE=1 + + NPUS=($(seq 0 7)) + export NPU_WORLD_SIZE=${#NPUS[@]} + rank=0 + for i in ${NPUS[@]} + do + mkdir -p $cur_path/output/${i}/ + export NPU_CALCULATE_DEVICE=${i} + export RANK=${rank} + export ASCEND_DEVICE_ID=${i} + echo run process ${rank} + python3 deep/train.py --ddp \ + --data-dir $data_path/data \ + --lr 0.8 \ + $PREC > $cur_path/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 & + let rank++ + done +done +wait +sed -i "s|for epoch in range(start_epoch, start_epoch + $train_epochs):|for epoch in range(start_epoch, start_epoch + 40):|g" $cur_path/../deep/train.py +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +#FPS=`grep "]time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F":" 'NR>26{print $3}' | awk -F"s" '{print $1}' | head -n -1 | awk '{sum+=$1} END {print 64*NR/sum}'` +FPS=`grep -rn "progress:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F "time:" '{print $2}'| awk -F "s" '{print $1}'|awk '{if (NR>1){print $1}}'|awk '{if(length !=0) print $0}'|awk '{sum+=$1} END {print 64*NR/sum}'` +FPS=$(awk 'BEGIN{print '$FPS'*8}') +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep train_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $8}'|cut -c 1-5` +train_accuracy=`grep "Acc" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Acc:" '{print$2}' | sed 's/%//g' |awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "Loss:" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "Loss:" '{print$2}' | awk '{print$1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/test/train_performance_8p.sh b/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/test/train_performance_8p.sh index 1caa51cfe889539996085e509bb8d388b90b885c..55612613f3c52376433bc96e1d938fb498ef6e38 100644 --- a/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch/test/train_performance_8p.sh @@ -1,198 +1,198 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 - -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="DeepSort_ID0654_for_PyTorch" -#训练epoch -train_epochs=2 -#训练batch_size -#batch_size=256 -batch_size=64 -#训练step -train_steps= -#学习率 -learning_rate=0.1 - -#TF2.X独有,需要模型审视修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_performance_1P.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--apex" -fi -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -# 解压数据集 -if [ ! -e "$data_path/data/train/1499/1499C5T0007F050.jpg" ];then - tar -xvf $data_path/data.tar -C $data_path/ -else - echo "NO NEED UNTAR" -fi -wait - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -#cd $cur_path/../tensorflow -cd $cur_path/../ - -sed -i "s|for epoch in range(start_epoch, start_epoch + 40):|for epoch in range(start_epoch, start_epoch + $train_epochs):|g" $cur_path/../deep/train.py - -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - export MASTER_ADDR=localhost - export MASTER_PORT=29688 - export HCCL_WHITELIST_DISABLE=1 - - NPUS=($(seq 0 7)) - export NPU_WORLD_SIZE=${#NPUS[@]} - rank=0 - for i in ${NPUS[@]} - do - mkdir -p $cur_path/output/${i}/ - export NPU_CALCULATE_DEVICE=${i} - export RANK=${rank} - export ASCEND_DEVICE_ID=${i} - echo run process ${rank} - python3 deep/train.py --ddp \ - --data-dir $data_path/data \ - --max_steps 200 \ - $PREC > $cur_path/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 & - let rank++ - done -done -wait -sed -i "s|for epoch in range(start_epoch, start_epoch + $train_epochs):|for epoch in range(start_epoch, start_epoch + 40):|g" $cur_path/../deep/train.py -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -#FPS=`grep "]time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F":" 'NR>1{print $3}' | awk -F"s" '{print $1}' | head -n -1 | awk '{sum+=$1} END {print 64*NR/sum}'` -FPS=`grep -rn "progress:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F "time:" '{print $2}'| awk -F "s" '{print $1}'|awk '{if (NR>1){print $1}}'|awk '{if(length !=0) print $0}'|awk '{sum+=$1} END {print 64*NR/sum}'` -FPS=$(awk 'BEGIN{print '$FPS'*8}') -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep train_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $8}'|cut -c 1-5` -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "Loss:" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "Loss:" '{print$2}' | awk '{print$1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="DeepSort_ID0654_for_PyTorch" +#训练epoch +train_epochs=2 +#训练batch_size +#batch_size=256 +batch_size=64 +#训练step +train_steps= +#学习率 +learning_rate=0.1 + +#TF2.X独有,需要模型审视修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--apex" +fi +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +# 解压数据集 +if [ ! -e "$data_path/data/train/1499/1499C5T0007F050.jpg" ];then + tar -xvf $data_path/data.tar -C $data_path/ +else + echo "NO NEED UNTAR" +fi +wait + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +#cd $cur_path/../tensorflow +cd $cur_path/../ + +sed -i "s|for epoch in range(start_epoch, start_epoch + 40):|for epoch in range(start_epoch, start_epoch + $train_epochs):|g" $cur_path/../deep/train.py + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + export MASTER_ADDR=localhost + export MASTER_PORT=29688 + export HCCL_WHITELIST_DISABLE=1 + + NPUS=($(seq 0 7)) + export NPU_WORLD_SIZE=${#NPUS[@]} + rank=0 + for i in ${NPUS[@]} + do + mkdir -p $cur_path/output/${i}/ + export NPU_CALCULATE_DEVICE=${i} + export RANK=${rank} + export ASCEND_DEVICE_ID=${i} + echo run process ${rank} + python3 deep/train.py --ddp \ + --data-dir $data_path/data \ + --max_steps 200 \ + $PREC > $cur_path/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 & + let rank++ + done +done +wait +sed -i "s|for epoch in range(start_epoch, start_epoch + $train_epochs):|for epoch in range(start_epoch, start_epoch + 40):|g" $cur_path/../deep/train.py +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +#FPS=`grep "]time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F":" 'NR>1{print $3}' | awk -F"s" '{print $1}' | head -n -1 | awk '{sum+=$1} END {print 64*NR/sum}'` +FPS=`grep -rn "progress:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F "time:" '{print $2}'| awk -F "s" '{print $1}'|awk '{if (NR>1){print $1}}'|awk '{if(length !=0) print $0}'|awk '{sum+=$1} END {print 64*NR/sum}'` +FPS=$(awk 'BEGIN{print '$FPS'*8}') +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep train_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $8}'|cut -c 1-5` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "Loss:" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "Loss:" '{print$2}' | awk '{print$1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/dev/cv/image_classification/ESMM_ID2839_for_PyTorch/modelzoo_level b/PyTorch/dev/cv/image_classification/ESMM_ID2839_for_PyTorch/modelzoo_level index 405b26618a0c92027927a9c583a4b47f640bcf7b..c45626e398eabe6022fe7b2e148f0ffce6400d6e 100644 --- a/PyTorch/dev/cv/image_classification/ESMM_ID2839_for_PyTorch/modelzoo_level +++ b/PyTorch/dev/cv/image_classification/ESMM_ID2839_for_PyTorch/modelzoo_level @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:POK +FuncStatus:OK +PerfStatus:POK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/ESRGAN_ID1813_for_PyTorch/modelzoo_level.txt b/PyTorch/dev/cv/image_classification/ESRGAN_ID1813_for_PyTorch/modelzoo_level.txt index a17c8f95fa388fbc6d253e2cd7cfd0b73b734073..a829ab59b97a1022dd6fc33b59b7ae0d55009432 100644 --- a/PyTorch/dev/cv/image_classification/ESRGAN_ID1813_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/dev/cv/image_classification/ESRGAN_ID1813_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/Hourglass_ID1809_for_PyTorch/test/train_performance_8p.sh b/PyTorch/dev/cv/image_classification/Hourglass_ID1809_for_PyTorch/test/train_performance_8p.sh index ebdab5ffd443e06a27116df579ba0d8515a33310..b3d6682684d90412a80609da641767cf8c11f594 100644 --- a/PyTorch/dev/cv/image_classification/Hourglass_ID1809_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/dev/cv/image_classification/Hourglass_ID1809_for_PyTorch/test/train_performance_8p.sh @@ -1,207 +1,207 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -#export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID -#集合通信参数,不需要修改 - -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 - - - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="Hourglass_ID1809_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=16 -#训练step -train_steps=10 -#学习率 -learning_rate=1e-3 - -#TF2.X独有,不需要修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False -autotune=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_full_1p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/../ -sed -i "s|'batchsize': 16|'batchsize': $batch_size|g" $cur_path/../task/pose.py -sed -i "s|'learning_rate': 1e-3|'learning_rate': $learning_rate|g" $cur_path/../task/pose.py -sed -i "s|'epoch_num': 200|'epoch_num': $train_epochs|g" $cur_path/../task/pose.py -sed -i "s|'train_iters': 1000|'train_iters': $train_steps|g" $cur_path/../task/pose.py -sed -i "s|annot_dir = 'data/MPII/annot'|annot_dir = '$data_path/data/MPII/annot'|g" $cur_path/../datat/MPII/ref.py -sed -i "s|img_dir = 'data/MPII/images'|img_dir = '$data_path/data/MPII/images'|g" $cur_path/../datat/MPII/ref.py -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - - #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 - #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` - #cpustep=`expr $cpucount / 8` - #echo "taskset c steps:" $cpustep - #let a=RANK_ID*$cpustep - #let b=RANK_ID+1 - #let c=b*$cpustep-1 - - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 -done -wait - - - -export HCCL_WHITELIST_DISABLE=1 -export MASTER_ADDR=127.0.0.1 -export MASTER_PORT=23456 - -NPUS=($(seq 0 7)) -export NPU_WORLD_SIZE=${#NPUS[@]} -rank=0 -for i in ${NPUS[@]} -do - mkdir -p $cur_path/output/${i}/ - export NPU_CALCULATE_DEVICE=${i} - export ASCEND_DEVICE_ID=${i} - export RANK=${rank} - echo run process ${rank} - - nohup python3 train.py -e test_run_001 --ddp True > $cur_path/output/${i}/train_${i}.log 2>&1 & - let rank++ -done -wait - - -sed -i "s|'batchsize': $batch_size|'batchsize': 16|g" $cur_path/../task/pose.py -sed -i "s|'learning_rate': $learning_rate|'learning_rate': 1e-3|g" $cur_path/../task/pose.py -sed -i "s|'epoch_num': $train_epochs|'epoch_num': 200|g" $cur_path/../task/pose.py -sed -i "s|'train_iters': $train_steps|'train_iters': 1000|g" $cur_path/../task/pose.py -sed -i "s|annot_dir = '$data_path/data/MPII/annot'|annot_dir = 'data/MPII/annot'|g" $cur_path/../datat/MPII/ref.py -sed -i "s|img_dir = '$data_path/data/MPII/images'|img_dir = 'data/MPII/images'|g" $cur_path/../datat/MPII/ref.py -#conda deactivate -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -sed -i "s|\r|\n|g" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep "fps:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "fps: " '{print $2}'|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` -FPS=$(awk 'BEGIN{print '$FPS'*8}') - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据 -#吞吐量,不需要修改 -ActualFPS=${FPS} -#单迭代训练时长,不需要修改 -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "the loss is: " $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "the loss is: " '{print $2}'|sed s/[[:space:]]//g > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +#export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID +#集合通信参数,不需要修改 + +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + + + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Hourglass_ID1809_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=16 +#训练step +train_steps=10 +#学习率 +learning_rate=1e-3 + +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ +sed -i "s|'batchsize': 16|'batchsize': $batch_size|g" $cur_path/../task/pose.py +sed -i "s|'learning_rate': 1e-3|'learning_rate': $learning_rate|g" $cur_path/../task/pose.py +sed -i "s|'epoch_num': 200|'epoch_num': $train_epochs|g" $cur_path/../task/pose.py +sed -i "s|'train_iters': 1000|'train_iters': $train_steps|g" $cur_path/../task/pose.py +sed -i "s|annot_dir = 'data/MPII/annot'|annot_dir = '$data_path/data/MPII/annot'|g" $cur_path/../datat/MPII/ref.py +sed -i "s|img_dir = 'data/MPII/images'|img_dir = '$data_path/data/MPII/images'|g" $cur_path/../datat/MPII/ref.py +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 + #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` + #cpustep=`expr $cpucount / 8` + #echo "taskset c steps:" $cpustep + #let a=RANK_ID*$cpustep + #let b=RANK_ID+1 + #let c=b*$cpustep-1 + + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 +done +wait + + + +export HCCL_WHITELIST_DISABLE=1 +export MASTER_ADDR=127.0.0.1 +export MASTER_PORT=23456 + +NPUS=($(seq 0 7)) +export NPU_WORLD_SIZE=${#NPUS[@]} +rank=0 +for i in ${NPUS[@]} +do + mkdir -p $cur_path/output/${i}/ + export NPU_CALCULATE_DEVICE=${i} + export ASCEND_DEVICE_ID=${i} + export RANK=${rank} + echo run process ${rank} + + nohup python3 train.py -e test_run_001 --ddp True > $cur_path/output/${i}/train_${i}.log 2>&1 & + let rank++ +done +wait + + +sed -i "s|'batchsize': $batch_size|'batchsize': 16|g" $cur_path/../task/pose.py +sed -i "s|'learning_rate': $learning_rate|'learning_rate': 1e-3|g" $cur_path/../task/pose.py +sed -i "s|'epoch_num': $train_epochs|'epoch_num': 200|g" $cur_path/../task/pose.py +sed -i "s|'train_iters': $train_steps|'train_iters': 1000|g" $cur_path/../task/pose.py +sed -i "s|annot_dir = '$data_path/data/MPII/annot'|annot_dir = 'data/MPII/annot'|g" $cur_path/../datat/MPII/ref.py +sed -i "s|img_dir = '$data_path/data/MPII/images'|img_dir = 'data/MPII/images'|g" $cur_path/../datat/MPII/ref.py +#conda deactivate +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +sed -i "s|\r|\n|g" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep "fps:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "fps: " '{print $2}'|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` +FPS=$(awk 'BEGIN{print '$FPS'*8}') + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "the loss is: " $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "the loss is: " '{print $2}'|sed s/[[:space:]]//g > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/dev/cv/image_classification/Hourglass_ID1809_for_PyTorch/utils/__init__.py b/PyTorch/dev/cv/image_classification/Hourglass_ID1809_for_PyTorch/utils/__init__.py index 475a7da6fc12869078b4a38eed08047e6b06c130..2fcb3d2f55c539cb2454a25da4a272a83e3faa44 100644 --- a/PyTorch/dev/cv/image_classification/Hourglass_ID1809_for_PyTorch/utils/__init__.py +++ b/PyTorch/dev/cv/image_classification/Hourglass_ID1809_for_PyTorch/utils/__init__.py @@ -1,33 +1,33 @@ -# -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ +# +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ # \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/test/train_full_8p.sh b/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/test/train_full_8p.sh index 8563d37ef64f2abf016ca59090ec540ce3a94b62..76482f090613e3757932212b06a93dafa73695cd 100644 --- a/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/test/train_full_8p.sh @@ -1,152 +1,152 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 - -export RANK_SIZE=8 - -RANK_ID_START=0 - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="InceptionV3_ID0445_for_PyTorch" -#训练epoch -train_epochs=90 -#训练batch_size -batch_size=256 -#训练step -train_steps= -#学习率 -learning_rate=0.1 - -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--apex" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path - - -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $RANK_ID" - export RANK_ID=$RANK_ID - export ASCEND_DEVICE_ID=$RANK_ID - ASCEND_DEVICE_ID=$RANK_ID - - if [ -d $cur_path/output/$ASCEND_DEVICE_ID ];then - rm -rf $cur_path/output/$ASCEND_DEVICE_ID - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID - else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID - fi - - - #训练开始时间,不需要修改 - start_time=$(date +%s) - - nohup python3.7 $cur_path/../train_8p_aux.py \ - --model inception_v3 \ - --epochs ${train_epochs} \ - --workers 192 \ - --data-path=${data_path} \ - --batch-size ${batch_size} $PREC \ - --lr ${learning_rate} \ - --distributed \ - --apex \ - --output-dir=$cur_path/output/$ASCEND_DEVICE_ID \ - --momentum 0.9 \ - --weight-decay 1e-4 \ - --seed 49 \ - --print-freq 10 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & -done -wait - - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep -a 'img/s' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "img/s" '{print $2}'|awk '{print$2}' |awk '{sum+=$1} END {print sum/NR}'|sed s/[[:space:]]//g` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Acc@1" '{print $2}'|cut -c 2-6|awk 'BEGIN {max = 0} {if ($1+0>max+0) max=$1 fi} END {print max}'` - -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep img/s $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "img/s" '{print$2}' | awk '{print$4}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 + +export RANK_SIZE=8 + +RANK_ID_START=0 + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="InceptionV3_ID0445_for_PyTorch" +#训练epoch +train_epochs=90 +#训练batch_size +batch_size=256 +#训练step +train_steps= +#学习率 +learning_rate=0.1 + +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--apex" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path + + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $RANK_ID" + export RANK_ID=$RANK_ID + export ASCEND_DEVICE_ID=$RANK_ID + ASCEND_DEVICE_ID=$RANK_ID + + if [ -d $cur_path/output/$ASCEND_DEVICE_ID ];then + rm -rf $cur_path/output/$ASCEND_DEVICE_ID + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID + else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID + fi + + + #训练开始时间,不需要修改 + start_time=$(date +%s) + + nohup python3.7 $cur_path/../train_8p_aux.py \ + --model inception_v3 \ + --epochs ${train_epochs} \ + --workers 192 \ + --data-path=${data_path} \ + --batch-size ${batch_size} $PREC \ + --lr ${learning_rate} \ + --distributed \ + --apex \ + --output-dir=$cur_path/output/$ASCEND_DEVICE_ID \ + --momentum 0.9 \ + --weight-decay 1e-4 \ + --seed 49 \ + --print-freq 10 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & +done +wait + + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'img/s' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "img/s" '{print $2}'|awk '{print$2}' |awk '{sum+=$1} END {print sum/NR}'|sed s/[[:space:]]//g` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Acc@1" '{print $2}'|cut -c 2-6|awk 'BEGIN {max = 0} {if ($1+0>max+0) max=$1 fi} END {print max}'` + +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep img/s $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "img/s" '{print$2}' | awk '{print$4}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/train_8p.py b/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/train_8p.py index 14585e31408ef52e636f85fa03e735b462bc9ac7..59c6158dd2092a5c9131c514cd35ac137c960669 100644 --- a/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/train_8p.py +++ b/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/train_8p.py @@ -1,395 +1,395 @@ -# -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -from __future__ import print_function -import datetime -import os -import time -import sys -sys.path.insert(1,os.path.abspath('..')+ "/..") -import apex -import random -import torch -import torch.utils.data -from torch import nn -import torchvision -from torchvision import transforms -import torch.npu -import numpy as np -import inception -import torch.optim -import utils - -try: - from apex import amp -except ImportError: - amp = None - - -def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, print_freq, apex=False): - model.train() - metric_logger = utils.MetricLogger(delimiter=" ") - metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) - metric_logger.add_meter('img/s', utils.SmoothedValue(window_size=10, fmt='{value}')) - - header = 'Epoch: [{}]'.format(epoch) - cnt = 0 - n = 0 - for image, target in metric_logger.log_every(data_loader, print_freq, header): - n = n + 1 - if n >= 100: - pass - start_time = time.time() - image, target = image.to(device), target.to(torch.int).to(device) - output = model(image) - loss = criterion(output, target) - - optimizer.zero_grad() - if apex: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - - acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) - batch_size = image.shape[0] - metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) - metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) - metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) - metric_logger.meters['img/s'].update(batch_size / (time.time() - start_time)) - cnt = cnt + 1 - - if args.max_steps and cnt > args.max_steps: - break - n = 0 - - -def evaluate(model, criterion, data_loader, device): - model.eval() - metric_logger = utils.MetricLogger(delimiter=" ") - header = 'Test:' - with torch.no_grad(): - n = 0 - for image, target in metric_logger.log_every(data_loader, 10, header): - n = n + 1 - if n >= 200: - pass - image = image.to(device, non_blocking=True) - target = target.to(torch.int).to(device, non_blocking=True) - output = model(image) - loss = criterion(output, target) - - acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) - # FIXME need to take into account that the datasets - # could have been padded in distributed setup - batch_size = image.shape[0] - metric_logger.update(loss=loss.item()) - metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) - metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) - n = 0 - # gather the stats from all processes - print(device) - metric_logger.synchronize_between_processes(device) - - print(' * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}' - .format(top1=metric_logger.acc1, top5=metric_logger.acc5)) - return metric_logger.acc1.global_avg - - -def _get_cache_path(filepath): - import hashlib - h = hashlib.sha1(filepath.encode()).hexdigest() - cache_path = os.path.join("~", ".torch", "vision", "datasets", "imagefolder", h[:10] + ".pt") - cache_path = os.path.expanduser(cache_path) - return cache_path - - -def main(args): - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '29688' - args.device = os.environ['RANK_ID'] - - - rad = random.randint(0,50) - args.seed = args.seed + rad - random.seed(args.seed) - np.random.seed(args.seed) - torch.manual_seed(args.seed) - os.environ['PYTHONHASHSEED'] = str(args.seed) - if args.apex: - if sys.version_info < (3, 0): - raise RuntimeError("Apex currently only supports Python 3. Aborting.") - if amp is None: - raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " - "to enable mixed-precision training.") - - if args.output_dir: - utils.mkdir(args.output_dir) - - if args.distributed: - utils.init_distributed_mode(args) - - device = torch.device(f'npu:{args.device}') - torch.npu.set_device(device) - - #torch.backends.cudnn.benchmark = True - - # Data loading code - print("Loading data") - traindir = os.path.join(args.data_path, 'train') - valdir = os.path.join(args.data_path, 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - print("Loading training data") - st = time.time() - cache_path = _get_cache_path(traindir) - if args.cache_dataset and os.path.exists(cache_path): - # Attention, as the transforms are also cached! - print("Loading dataset_train from {}".format(cache_path)) - dataset, _ = torch.load(cache_path) - else: - dataset = torchvision.datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(299), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - if args.cache_dataset: - print("Saving dataset_train to {}".format(cache_path)) - utils.mkdir(os.path.dirname(cache_path)) - utils.save_on_master((dataset, traindir), cache_path) - print("Took", time.time() - st) - - print("Loading validation data") - cache_path = _get_cache_path(valdir) - if args.cache_dataset and os.path.exists(cache_path): - # Attention, as the transforms are also cached! - print("Loading dataset_test from {}".format(cache_path)) - dataset_test, _ = torch.load(cache_path) - else: - dataset_test = torchvision.datasets.ImageFolder( - valdir, - transforms.Compose([ - transforms.Resize(342), - transforms.CenterCrop(299), - transforms.ToTensor(), - normalize, - ])) - if args.cache_dataset: - print("Saving dataset_test to {}".format(cache_path)) - utils.mkdir(os.path.dirname(cache_path)) - utils.save_on_master((dataset_test, valdir), cache_path) - - print("Creating data loaders") - if args.distributed: - print(len(dataset)) - train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) - print(len(train_sampler)) - test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) - else: - train_sampler = torch.utils.data.RandomSampler(dataset) - test_sampler = torch.utils.data.SequentialSampler(dataset_test) - - print(len(train_sampler),args.batch_size,args.workers) - data_loader = torch.utils.data.DataLoader( - dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.workers,drop_last=True) # pin_memory=True - print(len(data_loader)) - - data_loader_test = torch.utils.data.DataLoader( - dataset_test, batch_size=args.batch_size, - sampler=test_sampler, num_workers=args.workers, drop_last=True) - - print("Creating model") - print(torchvision.models.__dict__) - #model = torchvision.models.__dict__[args.model](pretrained=args.pretrained) - model = inception.inception_v3(pretrained=args.pretrained) - model.to(device) - if args.distributed and args.sync_bn: - model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) - - - #criterion = nn.CrossEntropyLoss().to(device) - criterion = nn.CrossEntropyLoss() - - if args.apex: - optimizer = apex.optimizers.NpuFusedSGD( - model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) - - - if args.apex: - model, optimizer = amp.initialize(model, optimizer, - opt_level=args.apex_opt_level, - combine_grad=True - ) #loss_scale=args.loss_scale, - - lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) - - model_without_ddp = model - if args.distributed: - print(args.device) - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.device],broadcast_buffers=False,find_unused_parameters=True) - model_without_ddp = model.module - - #optimizer = torch.optim.SGD( - #model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) - - if args.resume: - checkpoint = torch.load(args.resume, map_location='cpu') - model_without_ddp.load_state_dict(checkpoint['model']) - optimizer.load_state_dict(checkpoint['optimizer']) - lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) - args.start_epoch = checkpoint['epoch'] + 1 - - if args.test_only: - evaluate(model, criterion, data_loader_test, device=device) - return - - print("Start training") - start_time = time.time() - for epoch in range(args.start_epoch, args.epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args.print_freq, args.apex) - lr_scheduler.step() - evaluate(model, criterion, data_loader_test, device=device) - if args.output_dir: - checkpoint = { - 'model': model_without_ddp.state_dict(), - 'optimizer': optimizer.state_dict(), - 'lr_scheduler': lr_scheduler.state_dict(), - 'epoch': epoch, - 'args': args} - utils.save_on_master( - checkpoint, - os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) - utils.save_on_master( - checkpoint, - os.path.join(args.output_dir, 'checkpoint.pth')) - - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('Training time {}'.format(total_time_str)) - - -def parse_args(): - import argparse - parser = argparse.ArgumentParser(description='PyTorch Classification Training') - - parser.add_argument('--data-path', default='/datasets01/imagenet_full_size/061417/', help='dataset') - parser.add_argument('--model', default='resnet18', help='model') - parser.add_argument('--device', default='6', help='device') - parser.add_argument('-b', '--batch-size', default=32, type=int) - parser.add_argument('--epochs', default=90, type=int, metavar='N', - help='number of total epochs to run') - parser.add_argument('--max_steps', default=None, type=int, metavar='N', - help='number of total steps to run') - parser.add_argument('-j', '--workers', default=32, type=int, metavar='N', - help='number of data loading workers (default: 16)') - parser.add_argument('--lr', default=0.1, type=float, help='initial learning rate') - parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') - parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') - parser.add_argument('--lr-step-size', default=30, type=int, help='decrease lr every step-size epochs') - parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') - parser.add_argument('--print-freq', default=10, type=int, help='print frequency') - parser.add_argument('--output-dir', default='.', help='path where to save') - parser.add_argument('--resume', default='', help='resume from checkpoint') - parser.add_argument('--start-epoch', default=0, type=int, metavar='N', - help='start epoch') - parser.add_argument( - "--cache-dataset", - dest="cache_dataset", - help="Cache the datasets for quicker initialization. It also serializes the transforms", - action="store_true", - ) - parser.add_argument( - "--sync-bn", - dest="sync_bn", - help="Use sync batch norm", - action="store_true", - ) - parser.add_argument( - "--test-only", - dest="test_only", - help="Only test the model", - action="store_true", - ) - parser.add_argument( - "--pretrained", - dest="pretrained", - help="Use pre-trained models from the modelzoo", - action="store_true", - ) - - # Mixed precision training parameters - parser.add_argument('--apex', action='store_true', - help='Use apex for mixed precision training') - parser.add_argument('--apex-opt-level', default='O1', type=str, - help='For apex mixed precision training' - 'O0 for FP32 training, O1 for mixed precision training.' - 'For further detail, see https://github.com/NVIDIA/apex/tree/master/examples/imagenet' - ) - parser.add_argument('--loss_scale', default=1024., type=float, - help='loss scale using in amp, default -1 means dynamic') - - # distributed training parameters - parser.add_argument('--world-size', default=1, type=int, - help='number of distributed processes') - parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') - parser.add_argument('--distributed', - action='store_true', - help='Use multi-processing distributed training to launch ' - 'N processes per node, which has N GPUs.') - parser.add_argument('--dist_rank', - default=0, - type=int, - help='node rank for distributed training') - parser.add_argument('--seed', - default=1, - type=int, - help='Manually set random seed') - - args = parser.parse_args() - - return args - - -if __name__ == "__main__": - args = parse_args() - main(args) +# +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +from __future__ import print_function +import datetime +import os +import time +import sys +sys.path.insert(1,os.path.abspath('..')+ "/..") +import apex +import random +import torch +import torch.utils.data +from torch import nn +import torchvision +from torchvision import transforms +import torch.npu +import numpy as np +import inception +import torch.optim +import utils + +try: + from apex import amp +except ImportError: + amp = None + + +def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, print_freq, apex=False): + model.train() + metric_logger = utils.MetricLogger(delimiter=" ") + metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) + metric_logger.add_meter('img/s', utils.SmoothedValue(window_size=10, fmt='{value}')) + + header = 'Epoch: [{}]'.format(epoch) + cnt = 0 + n = 0 + for image, target in metric_logger.log_every(data_loader, print_freq, header): + n = n + 1 + if n >= 100: + pass + start_time = time.time() + image, target = image.to(device), target.to(torch.int).to(device) + output = model(image) + loss = criterion(output, target) + + optimizer.zero_grad() + if apex: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + + acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) + batch_size = image.shape[0] + metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) + metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) + metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) + metric_logger.meters['img/s'].update(batch_size / (time.time() - start_time)) + cnt = cnt + 1 + + if args.max_steps and cnt > args.max_steps: + break + n = 0 + + +def evaluate(model, criterion, data_loader, device): + model.eval() + metric_logger = utils.MetricLogger(delimiter=" ") + header = 'Test:' + with torch.no_grad(): + n = 0 + for image, target in metric_logger.log_every(data_loader, 10, header): + n = n + 1 + if n >= 200: + pass + image = image.to(device, non_blocking=True) + target = target.to(torch.int).to(device, non_blocking=True) + output = model(image) + loss = criterion(output, target) + + acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) + # FIXME need to take into account that the datasets + # could have been padded in distributed setup + batch_size = image.shape[0] + metric_logger.update(loss=loss.item()) + metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) + metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) + n = 0 + # gather the stats from all processes + print(device) + metric_logger.synchronize_between_processes(device) + + print(' * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}' + .format(top1=metric_logger.acc1, top5=metric_logger.acc5)) + return metric_logger.acc1.global_avg + + +def _get_cache_path(filepath): + import hashlib + h = hashlib.sha1(filepath.encode()).hexdigest() + cache_path = os.path.join("~", ".torch", "vision", "datasets", "imagefolder", h[:10] + ".pt") + cache_path = os.path.expanduser(cache_path) + return cache_path + + +def main(args): + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = '29688' + args.device = os.environ['RANK_ID'] + + + rad = random.randint(0,50) + args.seed = args.seed + rad + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + os.environ['PYTHONHASHSEED'] = str(args.seed) + if args.apex: + if sys.version_info < (3, 0): + raise RuntimeError("Apex currently only supports Python 3. Aborting.") + if amp is None: + raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " + "to enable mixed-precision training.") + + if args.output_dir: + utils.mkdir(args.output_dir) + + if args.distributed: + utils.init_distributed_mode(args) + + device = torch.device(f'npu:{args.device}') + torch.npu.set_device(device) + + #torch.backends.cudnn.benchmark = True + + # Data loading code + print("Loading data") + traindir = os.path.join(args.data_path, 'train') + valdir = os.path.join(args.data_path, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + print("Loading training data") + st = time.time() + cache_path = _get_cache_path(traindir) + if args.cache_dataset and os.path.exists(cache_path): + # Attention, as the transforms are also cached! + print("Loading dataset_train from {}".format(cache_path)) + dataset, _ = torch.load(cache_path) + else: + dataset = torchvision.datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(299), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + if args.cache_dataset: + print("Saving dataset_train to {}".format(cache_path)) + utils.mkdir(os.path.dirname(cache_path)) + utils.save_on_master((dataset, traindir), cache_path) + print("Took", time.time() - st) + + print("Loading validation data") + cache_path = _get_cache_path(valdir) + if args.cache_dataset and os.path.exists(cache_path): + # Attention, as the transforms are also cached! + print("Loading dataset_test from {}".format(cache_path)) + dataset_test, _ = torch.load(cache_path) + else: + dataset_test = torchvision.datasets.ImageFolder( + valdir, + transforms.Compose([ + transforms.Resize(342), + transforms.CenterCrop(299), + transforms.ToTensor(), + normalize, + ])) + if args.cache_dataset: + print("Saving dataset_test to {}".format(cache_path)) + utils.mkdir(os.path.dirname(cache_path)) + utils.save_on_master((dataset_test, valdir), cache_path) + + print("Creating data loaders") + if args.distributed: + print(len(dataset)) + train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) + print(len(train_sampler)) + test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) + else: + train_sampler = torch.utils.data.RandomSampler(dataset) + test_sampler = torch.utils.data.SequentialSampler(dataset_test) + + print(len(train_sampler),args.batch_size,args.workers) + data_loader = torch.utils.data.DataLoader( + dataset, batch_size=args.batch_size, + sampler=train_sampler, num_workers=args.workers,drop_last=True) # pin_memory=True + print(len(data_loader)) + + data_loader_test = torch.utils.data.DataLoader( + dataset_test, batch_size=args.batch_size, + sampler=test_sampler, num_workers=args.workers, drop_last=True) + + print("Creating model") + print(torchvision.models.__dict__) + #model = torchvision.models.__dict__[args.model](pretrained=args.pretrained) + model = inception.inception_v3(pretrained=args.pretrained) + model.to(device) + if args.distributed and args.sync_bn: + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + + + #criterion = nn.CrossEntropyLoss().to(device) + criterion = nn.CrossEntropyLoss() + + if args.apex: + optimizer = apex.optimizers.NpuFusedSGD( + model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) + + + if args.apex: + model, optimizer = amp.initialize(model, optimizer, + opt_level=args.apex_opt_level, + combine_grad=True + ) #loss_scale=args.loss_scale, + + lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) + + model_without_ddp = model + if args.distributed: + print(args.device) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.device],broadcast_buffers=False,find_unused_parameters=True) + model_without_ddp = model.module + + #optimizer = torch.optim.SGD( + #model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) + + if args.resume: + checkpoint = torch.load(args.resume, map_location='cpu') + model_without_ddp.load_state_dict(checkpoint['model']) + optimizer.load_state_dict(checkpoint['optimizer']) + lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) + args.start_epoch = checkpoint['epoch'] + 1 + + if args.test_only: + evaluate(model, criterion, data_loader_test, device=device) + return + + print("Start training") + start_time = time.time() + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args.print_freq, args.apex) + lr_scheduler.step() + evaluate(model, criterion, data_loader_test, device=device) + if args.output_dir: + checkpoint = { + 'model': model_without_ddp.state_dict(), + 'optimizer': optimizer.state_dict(), + 'lr_scheduler': lr_scheduler.state_dict(), + 'epoch': epoch, + 'args': args} + utils.save_on_master( + checkpoint, + os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) + utils.save_on_master( + checkpoint, + os.path.join(args.output_dir, 'checkpoint.pth')) + + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('Training time {}'.format(total_time_str)) + + +def parse_args(): + import argparse + parser = argparse.ArgumentParser(description='PyTorch Classification Training') + + parser.add_argument('--data-path', default='/datasets01/imagenet_full_size/061417/', help='dataset') + parser.add_argument('--model', default='resnet18', help='model') + parser.add_argument('--device', default='6', help='device') + parser.add_argument('-b', '--batch-size', default=32, type=int) + parser.add_argument('--epochs', default=90, type=int, metavar='N', + help='number of total epochs to run') + parser.add_argument('--max_steps', default=None, type=int, metavar='N', + help='number of total steps to run') + parser.add_argument('-j', '--workers', default=32, type=int, metavar='N', + help='number of data loading workers (default: 16)') + parser.add_argument('--lr', default=0.1, type=float, help='initial learning rate') + parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') + parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') + parser.add_argument('--lr-step-size', default=30, type=int, help='decrease lr every step-size epochs') + parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') + parser.add_argument('--print-freq', default=10, type=int, help='print frequency') + parser.add_argument('--output-dir', default='.', help='path where to save') + parser.add_argument('--resume', default='', help='resume from checkpoint') + parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='start epoch') + parser.add_argument( + "--cache-dataset", + dest="cache_dataset", + help="Cache the datasets for quicker initialization. It also serializes the transforms", + action="store_true", + ) + parser.add_argument( + "--sync-bn", + dest="sync_bn", + help="Use sync batch norm", + action="store_true", + ) + parser.add_argument( + "--test-only", + dest="test_only", + help="Only test the model", + action="store_true", + ) + parser.add_argument( + "--pretrained", + dest="pretrained", + help="Use pre-trained models from the modelzoo", + action="store_true", + ) + + # Mixed precision training parameters + parser.add_argument('--apex', action='store_true', + help='Use apex for mixed precision training') + parser.add_argument('--apex-opt-level', default='O1', type=str, + help='For apex mixed precision training' + 'O0 for FP32 training, O1 for mixed precision training.' + 'For further detail, see https://github.com/NVIDIA/apex/tree/master/examples/imagenet' + ) + parser.add_argument('--loss_scale', default=1024., type=float, + help='loss scale using in amp, default -1 means dynamic') + + # distributed training parameters + parser.add_argument('--world-size', default=1, type=int, + help='number of distributed processes') + parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') + parser.add_argument('--distributed', + action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs.') + parser.add_argument('--dist_rank', + default=0, + type=int, + help='node rank for distributed training') + parser.add_argument('--seed', + default=1, + type=int, + help='Manually set random seed') + + args = parser.parse_args() + + return args + + +if __name__ == "__main__": + args = parse_args() + main(args) diff --git a/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/train_8p_aux.py b/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/train_8p_aux.py index d86c45d9878e587ca08a6f6452825fd3321d3814..220ea58448446cc33cc29c496207cdcdcbfca488 100644 --- a/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/train_8p_aux.py +++ b/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/train_8p_aux.py @@ -1,394 +1,394 @@ -# -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -from __future__ import print_function -import datetime -import os -import time -import sys -sys.path.insert(1,os.path.abspath('..')+ "/..") -import apex -import random -import torch -import torch.utils.data -from torch import nn -import torchvision -from torchvision import transforms -import torch.npu -import numpy as np -import inception - -import utils - -try: - from apex import amp -except ImportError: - amp = None - - -def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, print_freq, apex=False): - model.train() - metric_logger = utils.MetricLogger(delimiter=" ") - metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) - metric_logger.add_meter('img/s', utils.SmoothedValue(window_size=10, fmt='{value}')) - - header = 'Epoch: [{}]'.format(epoch) - cnt = 0 - n = 0 - for image, target in metric_logger.log_every(data_loader, print_freq, header): - n = n + 1 - if n >= 100: - pass - start_time = time.time() - image, target = image.to(device), target.to(torch.int).to(device) - output,aux = model(image) - loss1 = criterion(output, target) - loss2 = criterion(aux, target) - loss = loss1 + 0.4*loss2 - - optimizer.zero_grad() - if apex: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - - acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) - batch_size = image.shape[0] - metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) - metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) - metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) - metric_logger.meters['img/s'].update(batch_size / (time.time() - start_time)) - cnt = cnt + 1 - - if args.max_steps and cnt > args.max_steps: - break - n = 0 - - -def evaluate(model, criterion, data_loader, device): - model.eval() - metric_logger = utils.MetricLogger(delimiter=" ") - header = 'Test:' - with torch.no_grad(): - n = 0 - for image, target in metric_logger.log_every(data_loader, 100, header): - n = n + 1 - if n >= 200: - pass - image = image.to(device, non_blocking=True) - target = target.to(torch.int).to(device, non_blocking=True) - output= model(image) - loss1 = criterion(output, target) - - loss = loss1 - - acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) - # FIXME need to take into account that the datasets - # could have been padded in distributed setup - batch_size = image.shape[0] - metric_logger.update(loss=loss.item()) - metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) - metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) - n = 0 - # gather the stats from all processes - print(device) - metric_logger.synchronize_between_processes(device) - - print(' * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}' - .format(top1=metric_logger.acc1, top5=metric_logger.acc5)) - return metric_logger.acc1.global_avg - - -def _get_cache_path(filepath): - import hashlib - h = hashlib.sha1(filepath.encode()).hexdigest() - cache_path = os.path.join("~", ".torch", "vision", "datasets", "imagefolder", h[:10] + ".pt") - cache_path = os.path.expanduser(cache_path) - return cache_path - - -def main(args): - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '29688' - args.device = os.environ['RANK_ID'] - - rad = random.randint(0,50) - args.seed = args.seed + rad - random.seed(args.seed) - np.random.seed(args.seed) - torch.manual_seed(args.seed) - os.environ['PYTHONHASHSEED'] = str(args.seed) - if args.apex: - if sys.version_info < (3, 0): - raise RuntimeError("Apex currently only supports Python 3. Aborting.") - if amp is None: - raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " - "to enable mixed-precision training.") - - if args.output_dir: - utils.mkdir(args.output_dir) - - if args.distributed: - utils.init_distributed_mode(args) - - device = torch.device(f'npu:{args.device}') - torch.npu.set_device(device) - - torch.backends.cudnn.benchmark = True - - # Data loading code - print("Loading data") - traindir = os.path.join(args.data_path, 'train') - valdir = os.path.join(args.data_path, 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - print("Loading training data") - st = time.time() - cache_path = _get_cache_path(traindir) - if args.cache_dataset and os.path.exists(cache_path): - # Attention, as the transforms are also cached! - print("Loading dataset_train from {}".format(cache_path)) - dataset, _ = torch.load(cache_path) - else: - dataset = torchvision.datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(299), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - if args.cache_dataset: - print("Saving dataset_train to {}".format(cache_path)) - utils.mkdir(os.path.dirname(cache_path)) - utils.save_on_master((dataset, traindir), cache_path) - print("Took", time.time() - st) - - print("Loading validation data") - cache_path = _get_cache_path(valdir) - if args.cache_dataset and os.path.exists(cache_path): - # Attention, as the transforms are also cached! - print("Loading dataset_test from {}".format(cache_path)) - dataset_test, _ = torch.load(cache_path) - else: - dataset_test = torchvision.datasets.ImageFolder( - valdir, - transforms.Compose([ - transforms.Resize(342), - transforms.CenterCrop(299), - transforms.ToTensor(), - normalize, - ])) - if args.cache_dataset: - print("Saving dataset_test to {}".format(cache_path)) - utils.mkdir(os.path.dirname(cache_path)) - utils.save_on_master((dataset_test, valdir), cache_path) - - print("Creating data loaders") - if args.distributed: - print(len(dataset)) - train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) - print(len(train_sampler)) - test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) - else: - train_sampler = torch.utils.data.RandomSampler(dataset) - test_sampler = torch.utils.data.SequentialSampler(dataset_test) - - print(len(train_sampler),args.batch_size,args.workers) - data_loader = torch.utils.data.DataLoader( - dataset, batch_size=args.batch_size, - sampler=train_sampler, num_workers=args.workers,drop_last=True) # pin_memory=True - print(len(data_loader)) - - data_loader_test = torch.utils.data.DataLoader( - dataset_test, batch_size=args.batch_size, - sampler=test_sampler, num_workers=args.workers, drop_last=True) - - print("Creating model") - print(torchvision.models.__dict__) - model = torchvision.models.__dict__[args.model](pretrained=args.pretrained) - #model = inception.inception_v3(pretrained=args.pretrained) - model.to(device) - - - - criterion = nn.CrossEntropyLoss().to(device) - #criterion = nn.CrossEntropyLoss() - - optimizer = apex.optimizers.NpuFusedSGD( - model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) - - if args.apex: - model, optimizer = amp.initialize(model, optimizer, - opt_level=args.apex_opt_level, - loss_scale=args.loss_scale, - combine_grad=True - ) #loss_scale=args.loss_scale, - - lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) - - model_without_ddp = model - - if args.distributed: - print(args.device) - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.device],find_unused_parameters=True) # output_device=args.device, - model_without_ddp = model.module - - if args.resume: - checkpoint = torch.load(args.resume, map_location='cpu') - model_without_ddp.load_state_dict(checkpoint['model']) - optimizer.load_state_dict(checkpoint['optimizer']) - lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) - args.start_epoch = checkpoint['epoch'] + 1 - - if args.test_only: - evaluate(model, criterion, data_loader_test, device=device) - return - - print("Start training") - start_time = time.time() - for epoch in range(args.start_epoch, args.epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args.print_freq, args.apex) - lr_scheduler.step() - evaluate(model, criterion, data_loader_test, device=device) - if args.output_dir: - checkpoint = { - 'model': model_without_ddp.state_dict(), - 'optimizer': optimizer.state_dict(), - 'lr_scheduler': lr_scheduler.state_dict(), - 'epoch': epoch, - 'args': args} - utils.save_on_master( - checkpoint, - os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) - utils.save_on_master( - checkpoint, - os.path.join(args.output_dir, 'checkpoint.pth')) - - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('Training time {}'.format(total_time_str)) - - -def parse_args(): - import argparse - parser = argparse.ArgumentParser(description='PyTorch Classification Training') - - parser.add_argument('--data-path', default='/datasets01/imagenet_full_size/061417/', help='dataset') - parser.add_argument('--model', default='resnet18', help='model') - parser.add_argument('--device', default='6', help='device') - parser.add_argument('-b', '--batch-size', default=32, type=int) - parser.add_argument('--epochs', default=90, type=int, metavar='N', - help='number of total epochs to run') - parser.add_argument('--max_steps', default=None, type=int, metavar='N', - help='number of total steps to run') - parser.add_argument('-j', '--workers', default=32, type=int, metavar='N', - help='number of data loading workers (default: 16)') - parser.add_argument('--lr', default=0.1, type=float, help='initial learning rate') - parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') - parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') - parser.add_argument('--lr-step-size', default=30, type=int, help='decrease lr every step-size epochs') - parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') - parser.add_argument('--print-freq', default=10, type=int, help='print frequency') - parser.add_argument('--output-dir', default='.', help='path where to save') - parser.add_argument('--resume', default='', help='resume from checkpoint') - parser.add_argument('--start-epoch', default=0, type=int, metavar='N', - help='start epoch') - parser.add_argument( - "--cache-dataset", - dest="cache_dataset", - help="Cache the datasets for quicker initialization. It also serializes the transforms", - action="store_true", - ) - parser.add_argument( - "--sync-bn", - dest="sync_bn", - help="Use sync batch norm", - action="store_true", - ) - parser.add_argument( - "--test-only", - dest="test_only", - help="Only test the model", - action="store_true", - ) - parser.add_argument( - "--pretrained", - dest="pretrained", - help="Use pre-trained models from the modelzoo", - action="store_true", - ) - - # Mixed precision training parameters - parser.add_argument('--apex', action='store_true', - help='Use apex for mixed precision training') - parser.add_argument('--apex-opt-level', default='O1', type=str, - help='For apex mixed precision training' - 'O0 for FP32 training, O1 for mixed precision training.' - 'For further detail, see https://github.com/NVIDIA/apex/tree/master/examples/imagenet' - ) - parser.add_argument('--loss_scale', default=1024., type=float, - help='loss scale using in amp, default -1 means dynamic') - - # distributed training parameters - parser.add_argument('--world-size', default=1, type=int, - help='number of distributed processes') - parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') - parser.add_argument('--distributed', - action='store_true', - help='Use multi-processing distributed training to launch ' - 'N processes per node, which has N GPUs.') - parser.add_argument('--dist_rank', - default=0, - type=int, - help='node rank for distributed training') - parser.add_argument('--seed', - default=1, - type=int, - help='Manually set random seed') - - args = parser.parse_args() - - return args - - -if __name__ == "__main__": - args = parse_args() - main(args) +# +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +from __future__ import print_function +import datetime +import os +import time +import sys +sys.path.insert(1,os.path.abspath('..')+ "/..") +import apex +import random +import torch +import torch.utils.data +from torch import nn +import torchvision +from torchvision import transforms +import torch.npu +import numpy as np +import inception + +import utils + +try: + from apex import amp +except ImportError: + amp = None + + +def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, print_freq, apex=False): + model.train() + metric_logger = utils.MetricLogger(delimiter=" ") + metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) + metric_logger.add_meter('img/s', utils.SmoothedValue(window_size=10, fmt='{value}')) + + header = 'Epoch: [{}]'.format(epoch) + cnt = 0 + n = 0 + for image, target in metric_logger.log_every(data_loader, print_freq, header): + n = n + 1 + if n >= 100: + pass + start_time = time.time() + image, target = image.to(device), target.to(torch.int).to(device) + output,aux = model(image) + loss1 = criterion(output, target) + loss2 = criterion(aux, target) + loss = loss1 + 0.4*loss2 + + optimizer.zero_grad() + if apex: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + + acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) + batch_size = image.shape[0] + metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) + metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) + metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) + metric_logger.meters['img/s'].update(batch_size / (time.time() - start_time)) + cnt = cnt + 1 + + if args.max_steps and cnt > args.max_steps: + break + n = 0 + + +def evaluate(model, criterion, data_loader, device): + model.eval() + metric_logger = utils.MetricLogger(delimiter=" ") + header = 'Test:' + with torch.no_grad(): + n = 0 + for image, target in metric_logger.log_every(data_loader, 100, header): + n = n + 1 + if n >= 200: + pass + image = image.to(device, non_blocking=True) + target = target.to(torch.int).to(device, non_blocking=True) + output= model(image) + loss1 = criterion(output, target) + + loss = loss1 + + acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) + # FIXME need to take into account that the datasets + # could have been padded in distributed setup + batch_size = image.shape[0] + metric_logger.update(loss=loss.item()) + metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) + metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) + n = 0 + # gather the stats from all processes + print(device) + metric_logger.synchronize_between_processes(device) + + print(' * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}' + .format(top1=metric_logger.acc1, top5=metric_logger.acc5)) + return metric_logger.acc1.global_avg + + +def _get_cache_path(filepath): + import hashlib + h = hashlib.sha1(filepath.encode()).hexdigest() + cache_path = os.path.join("~", ".torch", "vision", "datasets", "imagefolder", h[:10] + ".pt") + cache_path = os.path.expanduser(cache_path) + return cache_path + + +def main(args): + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = '29688' + args.device = os.environ['RANK_ID'] + + rad = random.randint(0,50) + args.seed = args.seed + rad + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + os.environ['PYTHONHASHSEED'] = str(args.seed) + if args.apex: + if sys.version_info < (3, 0): + raise RuntimeError("Apex currently only supports Python 3. Aborting.") + if amp is None: + raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " + "to enable mixed-precision training.") + + if args.output_dir: + utils.mkdir(args.output_dir) + + if args.distributed: + utils.init_distributed_mode(args) + + device = torch.device(f'npu:{args.device}') + torch.npu.set_device(device) + + torch.backends.cudnn.benchmark = True + + # Data loading code + print("Loading data") + traindir = os.path.join(args.data_path, 'train') + valdir = os.path.join(args.data_path, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + print("Loading training data") + st = time.time() + cache_path = _get_cache_path(traindir) + if args.cache_dataset and os.path.exists(cache_path): + # Attention, as the transforms are also cached! + print("Loading dataset_train from {}".format(cache_path)) + dataset, _ = torch.load(cache_path) + else: + dataset = torchvision.datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(299), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + if args.cache_dataset: + print("Saving dataset_train to {}".format(cache_path)) + utils.mkdir(os.path.dirname(cache_path)) + utils.save_on_master((dataset, traindir), cache_path) + print("Took", time.time() - st) + + print("Loading validation data") + cache_path = _get_cache_path(valdir) + if args.cache_dataset and os.path.exists(cache_path): + # Attention, as the transforms are also cached! + print("Loading dataset_test from {}".format(cache_path)) + dataset_test, _ = torch.load(cache_path) + else: + dataset_test = torchvision.datasets.ImageFolder( + valdir, + transforms.Compose([ + transforms.Resize(342), + transforms.CenterCrop(299), + transforms.ToTensor(), + normalize, + ])) + if args.cache_dataset: + print("Saving dataset_test to {}".format(cache_path)) + utils.mkdir(os.path.dirname(cache_path)) + utils.save_on_master((dataset_test, valdir), cache_path) + + print("Creating data loaders") + if args.distributed: + print(len(dataset)) + train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) + print(len(train_sampler)) + test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) + else: + train_sampler = torch.utils.data.RandomSampler(dataset) + test_sampler = torch.utils.data.SequentialSampler(dataset_test) + + print(len(train_sampler),args.batch_size,args.workers) + data_loader = torch.utils.data.DataLoader( + dataset, batch_size=args.batch_size, + sampler=train_sampler, num_workers=args.workers,drop_last=True) # pin_memory=True + print(len(data_loader)) + + data_loader_test = torch.utils.data.DataLoader( + dataset_test, batch_size=args.batch_size, + sampler=test_sampler, num_workers=args.workers, drop_last=True) + + print("Creating model") + print(torchvision.models.__dict__) + model = torchvision.models.__dict__[args.model](pretrained=args.pretrained) + #model = inception.inception_v3(pretrained=args.pretrained) + model.to(device) + + + + criterion = nn.CrossEntropyLoss().to(device) + #criterion = nn.CrossEntropyLoss() + + optimizer = apex.optimizers.NpuFusedSGD( + model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) + + if args.apex: + model, optimizer = amp.initialize(model, optimizer, + opt_level=args.apex_opt_level, + loss_scale=args.loss_scale, + combine_grad=True + ) #loss_scale=args.loss_scale, + + lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) + + model_without_ddp = model + + if args.distributed: + print(args.device) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.device],find_unused_parameters=True) # output_device=args.device, + model_without_ddp = model.module + + if args.resume: + checkpoint = torch.load(args.resume, map_location='cpu') + model_without_ddp.load_state_dict(checkpoint['model']) + optimizer.load_state_dict(checkpoint['optimizer']) + lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) + args.start_epoch = checkpoint['epoch'] + 1 + + if args.test_only: + evaluate(model, criterion, data_loader_test, device=device) + return + + print("Start training") + start_time = time.time() + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args.print_freq, args.apex) + lr_scheduler.step() + evaluate(model, criterion, data_loader_test, device=device) + if args.output_dir: + checkpoint = { + 'model': model_without_ddp.state_dict(), + 'optimizer': optimizer.state_dict(), + 'lr_scheduler': lr_scheduler.state_dict(), + 'epoch': epoch, + 'args': args} + utils.save_on_master( + checkpoint, + os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) + utils.save_on_master( + checkpoint, + os.path.join(args.output_dir, 'checkpoint.pth')) + + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('Training time {}'.format(total_time_str)) + + +def parse_args(): + import argparse + parser = argparse.ArgumentParser(description='PyTorch Classification Training') + + parser.add_argument('--data-path', default='/datasets01/imagenet_full_size/061417/', help='dataset') + parser.add_argument('--model', default='resnet18', help='model') + parser.add_argument('--device', default='6', help='device') + parser.add_argument('-b', '--batch-size', default=32, type=int) + parser.add_argument('--epochs', default=90, type=int, metavar='N', + help='number of total epochs to run') + parser.add_argument('--max_steps', default=None, type=int, metavar='N', + help='number of total steps to run') + parser.add_argument('-j', '--workers', default=32, type=int, metavar='N', + help='number of data loading workers (default: 16)') + parser.add_argument('--lr', default=0.1, type=float, help='initial learning rate') + parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') + parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') + parser.add_argument('--lr-step-size', default=30, type=int, help='decrease lr every step-size epochs') + parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') + parser.add_argument('--print-freq', default=10, type=int, help='print frequency') + parser.add_argument('--output-dir', default='.', help='path where to save') + parser.add_argument('--resume', default='', help='resume from checkpoint') + parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='start epoch') + parser.add_argument( + "--cache-dataset", + dest="cache_dataset", + help="Cache the datasets for quicker initialization. It also serializes the transforms", + action="store_true", + ) + parser.add_argument( + "--sync-bn", + dest="sync_bn", + help="Use sync batch norm", + action="store_true", + ) + parser.add_argument( + "--test-only", + dest="test_only", + help="Only test the model", + action="store_true", + ) + parser.add_argument( + "--pretrained", + dest="pretrained", + help="Use pre-trained models from the modelzoo", + action="store_true", + ) + + # Mixed precision training parameters + parser.add_argument('--apex', action='store_true', + help='Use apex for mixed precision training') + parser.add_argument('--apex-opt-level', default='O1', type=str, + help='For apex mixed precision training' + 'O0 for FP32 training, O1 for mixed precision training.' + 'For further detail, see https://github.com/NVIDIA/apex/tree/master/examples/imagenet' + ) + parser.add_argument('--loss_scale', default=1024., type=float, + help='loss scale using in amp, default -1 means dynamic') + + # distributed training parameters + parser.add_argument('--world-size', default=1, type=int, + help='number of distributed processes') + parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') + parser.add_argument('--distributed', + action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs.') + parser.add_argument('--dist_rank', + default=0, + type=int, + help='node rank for distributed training') + parser.add_argument('--seed', + default=1, + type=int, + help='Manually set random seed') + + args = parser.parse_args() + + return args + + +if __name__ == "__main__": + args = parse_args() + main(args) diff --git a/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/utils3.py b/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/utils3.py index 3ac42e9b7b763e06e8d5d1bcde192b308c44f1a1..b41e2d90abea88f9393a588a82a03a882d0989a7 100644 --- a/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/utils3.py +++ b/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch/utils3.py @@ -1,288 +1,288 @@ -# -# BSD 3-Clause License -# -# Copyright (c) 2017 xxxx -# All rights reserved. -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ============================================================================ -#from __future__ import print_function -from collections import defaultdict, deque -import datetime -import time -import torch -import torch.distributed as dist - -import errno -import os - - -class SmoothedValue(object): - """Track a series of values and provide access to smoothed values over a - window or the global series average. - """ - - def __init__(self, window_size=20, fmt=None): - if fmt is None: - fmt = "{median:.4f} ({global_avg:.4f})" - self.deque = deque(maxlen=window_size) - self.total = 0.0 - self.count = 0.0 - self.fmt = fmt - - def update(self, value, n=1): - self.deque.append(value) - self.count += n - self.total += value * n - - def synchronize_between_processes(self,npu_device): - """ - Warning: does not synchronize the deque! - """ - if not is_dist_avail_and_initialized(): - return - t = torch.as_tensor([self.count, self.total], dtype=torch.float32, device=npu_device) - print(t,t.dtype) - dist.barrier() - dist.all_reduce(t) - t = t.tolist() - self.count = int(t[0]) - self.total = t[1] - - @property - def median(self): - d = torch.tensor(list(self.deque)) - return d.median().item() - - @property - def avg(self): - d = torch.tensor(list(self.deque), dtype=torch.float32) - return d.mean().item() - - @property - def global_avg(self): - return self.total / self.count - - @property - def max(self): - return max(self.deque) - - @property - def value(self): - return self.deque[-1] - - def __str__(self): - return self.fmt.format( - median=self.median, - avg=self.avg, - global_avg=self.global_avg, - max=self.max, - value=self.value) - - -class MetricLogger(object): - def __init__(self, delimiter="\t"): - self.meters = defaultdict(SmoothedValue) - self.delimiter = delimiter - - def update(self, **kwargs): - for k, v in kwargs.items(): - if isinstance(v, torch.Tensor): - v = v.item() - assert isinstance(v, (float, int)) - self.meters[k].update(v) - - def __getattr__(self, attr): - if attr in self.meters: - return self.meters[attr] - if attr in self.__dict__: - return self.__dict__[attr] - raise AttributeError("'{}' object has no attribute '{}'".format( - type(self).__name__, attr)) - - def __str__(self): - loss_str = [] - for name, meter in self.meters.items(): - loss_str.append( - "{}: {}".format(name, str(meter)) - ) - return self.delimiter.join(loss_str) - - def synchronize_between_processes(self,device): - for meter in self.meters.values(): - meter.synchronize_between_processes(device) - - def add_meter(self, name, meter): - self.meters[name] = meter - - def log_every(self, iterable, print_freq, header=None): - i = 0 - if not header: - header = '' - start_time = time.time() - end = time.time() - iter_time = SmoothedValue(fmt='{avg:.4f}') - data_time = SmoothedValue(fmt='{avg:.4f}') - space_fmt = ':' + str(len(str(len(iterable)))) + 'd' - if torch.npu.is_available(): - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}', - 'max mem: {memory:.0f}' - ]) - else: - log_msg = self.delimiter.join([ - header, - '[{0' + space_fmt + '}/{1}]', - 'eta: {eta}', - '{meters}', - 'time: {time}', - 'data: {data}' - ]) - MB = 1024.0 * 1024.0 - for obj in iterable: - data_time.update(time.time() - end) - yield obj - iter_time.update(time.time() - end) - if i % print_freq == 0: - eta_seconds = iter_time.global_avg * (len(iterable) - i) - eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) - if torch.npu.is_available(): - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time), - memory=0)) - #memory=torch.cuda.max_memory_allocated() / MB)) - else: - print(log_msg.format( - i, len(iterable), eta=eta_string, - meters=str(self), - time=str(iter_time), data=str(data_time))) - i += 1 - end = time.time() - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('{} Total time: {}'.format(header, total_time_str)) - - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target[None]) - - res = [] - for k in topk: - correct_k = correct[:k].flatten().sum(dtype=torch.float32) - res.append(correct_k * (100.0 / batch_size)) - return res - - -def mkdir(path): - try: - os.makedirs(path) - except OSError as e: - if e.errno != errno.EEXIST: - raise - - -def setup_for_distributed(is_master): - """ - This function disables printing when not in master process - """ - import builtins as __builtin__ - builtin_print = __builtin__.print - - def print(*args, **kwargs): - force = kwargs.pop('force', False) - if is_master or force: - builtin_print(*args, **kwargs) - - __builtin__.print = print - - -def is_dist_avail_and_initialized(): - if not dist.is_available(): - return False - if not dist.is_initialized(): - return False - return True - - -def get_world_size(): - if not is_dist_avail_and_initialized(): - return 1 - return dist.get_world_size() - - -def get_rank(): - if not is_dist_avail_and_initialized(): - return 0 - return dist.get_rank() - - -def is_main_process(): - return get_rank() == 0 - - -def save_on_master(*args, **kwargs): - torch.save(*args, **kwargs) - - -def init_distributed_mode(args): - if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: - args.rank = int(os.environ["RANK"]) - args.world_size = int(os.environ['WORLD_SIZE']) - args.gpu = int(os.environ['LOCAL_RANK']) - elif 'SLURM_PROCID' in os.environ: - args.rank = int(os.environ['SLURM_PROCID']) - args.gpu = args.rank % torch.cuda.device_count() - elif hasattr(args, "rank"): - pass - else: - print('Not using distributed mode') - args.distributed = False - return - - args.distributed = True - - torch.npu.set_device(args.gpu) - args.dist_backend = 'nccl' - print('| distributed init (rank {}): {}'.format( - args.rank, args.dist_url), flush=True) - torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - setup_for_distributed(args.rank == 0) +# +# BSD 3-Clause License +# +# Copyright (c) 2017 xxxx +# All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ============================================================================ +#from __future__ import print_function +from collections import defaultdict, deque +import datetime +import time +import torch +import torch.distributed as dist + +import errno +import os + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0.0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self,npu_device): + """ + Warning: does not synchronize the deque! + """ + if not is_dist_avail_and_initialized(): + return + t = torch.as_tensor([self.count, self.total], dtype=torch.float32, device=npu_device) + print(t,t.dtype) + dist.barrier() + dist.all_reduce(t) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, + avg=self.avg, + global_avg=self.global_avg, + max=self.max, + value=self.value) + + +class MetricLogger(object): + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError("'{}' object has no attribute '{}'".format( + type(self).__name__, attr)) + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append( + "{}: {}".format(name, str(meter)) + ) + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self,device): + for meter in self.meters.values(): + meter.synchronize_between_processes(device) + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, print_freq, header=None): + i = 0 + if not header: + header = '' + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt='{avg:.4f}') + data_time = SmoothedValue(fmt='{avg:.4f}') + space_fmt = ':' + str(len(str(len(iterable)))) + 'd' + if torch.npu.is_available(): + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}', + 'max mem: {memory:.0f}' + ]) + else: + log_msg = self.delimiter.join([ + header, + '[{0' + space_fmt + '}/{1}]', + 'eta: {eta}', + '{meters}', + 'time: {time}', + 'data: {data}' + ]) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if i % print_freq == 0: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch.npu.is_available(): + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time), + memory=0)) + #memory=torch.cuda.max_memory_allocated() / MB)) + else: + print(log_msg.format( + i, len(iterable), eta=eta_string, + meters=str(self), + time=str(iter_time), data=str(data_time))) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('{} Total time: {}'.format(header, total_time_str)) + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target[None]) + + res = [] + for k in topk: + correct_k = correct[:k].flatten().sum(dtype=torch.float32) + res.append(correct_k * (100.0 / batch_size)) + return res + + +def mkdir(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def setup_for_distributed(is_master): + """ + This function disables printing when not in master process + """ + import builtins as __builtin__ + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop('force', False) + if is_master or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + return get_rank() == 0 + + +def save_on_master(*args, **kwargs): + torch.save(*args, **kwargs) + + +def init_distributed_mode(args): + if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + args.rank = int(os.environ["RANK"]) + args.world_size = int(os.environ['WORLD_SIZE']) + args.gpu = int(os.environ['LOCAL_RANK']) + elif 'SLURM_PROCID' in os.environ: + args.rank = int(os.environ['SLURM_PROCID']) + args.gpu = args.rank % torch.cuda.device_count() + elif hasattr(args, "rank"): + pass + else: + print('Not using distributed mode') + args.distributed = False + return + + args.distributed = True + + torch.npu.set_device(args.gpu) + args.dist_backend = 'nccl' + print('| distributed init (rank {}): {}'.format( + args.rank, args.dist_url), flush=True) + torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + setup_for_distributed(args.rank == 0) diff --git a/PyTorch/dev/cv/image_classification/MaskRCNN_ID0101_for_PyTorch/test/train_full_8p.sh b/PyTorch/dev/cv/image_classification/MaskRCNN_ID0101_for_PyTorch/test/train_full_8p.sh index 947225d26f217de2c357cb331f4915659d9d04e8..f194144737d9b81dd8295b5ac47020df3a08df53 100644 --- a/PyTorch/dev/cv/image_classification/MaskRCNN_ID0101_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/dev/cv/image_classification/MaskRCNN_ID0101_for_PyTorch/test/train_full_8p.sh @@ -1,223 +1,223 @@ -#!/bin/bash -export HCCL_WHITELIST_DISABLE=1 -export MASTER_ADDR=127.0.0.1 -export MASTER_PORT=23456 -#当前路径,不需要修改 -cur_path=`pwd` -#ASCEND_DEVICE_ID=1 -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID - -#集合通信参数,不需要修改 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="MaskRCNN_ID0101_for_PyTorch" -#训练epoch -train_epochs=82000 -#训练batch_size -batch_size=8 -#训练step -train_steps= -#学习率 -learning_rate= - -#TF2.X独有,不需要修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False -autotune=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_full_1p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --conda_name* ]];then - conda_name=`echo ${para#*=}` - source set_conda.sh --conda_name=$conda_name - source activate $conda_name - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/../ - -sed -i "s|\"coco_2017_train\": (\"coco/train2017\", \"coco/annotations/instances_train2017.json\")|\"coco_2017_train\": (\"$data_path/coco/train2017\", \"$data_path/coco/annotations/instances_train2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py -sed -i "s|\"coco_2017_val\": (\"coco/val2017\", \"coco/annotations/instances_val2017.json\")|\"coco_2017_val\": (\"$data_path/coco/val2017\", \"$data_path/coco/annotations/instances_val2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py -sed -i "s|WEIGHTS: \"detectron2://ImageNetPretrained/MSRA/R-101.pkl\"|WEIGHTS: \"$data_path/R-101.pkl\"|g" $cur_path/../configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml - -python3 setup.py build develop > $cur_path/../log.txt - - -#训练开始时间,不需要修改 -start_time=$(date +%s) -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - - #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 - #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` - #cpustep=`expr $cpucount / 8` - #echo "taskset c steps:" $cpustep - #let a=RANK_ID*$cpustep - #let b=RANK_ID+1 - #let c=b*$cpustep-1 - - - - #SOLVER.MAX_ITER 82000 \ - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 -done -wait - - - - - -NPUS=($(seq 0 7)) -export NPU_WORLD_SIZE=${#NPUS[@]} -rank=0 -for i in ${NPUS[@]} -do - export NPU_CALCULATE_DEVICE=${i} - mkdir -p $cur_path/test/output/${i}/ - export RANK=${rank} - echo run process ${rank} - python3 tools/train_net.py \ - --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml \ - --num-gpus 8 \ - AMP 1\ - OPT_LEVEL O2 \ - LOSS_SCALE_VALUE 64 \ - SOLVER.IMS_PER_BATCH $batch_size \ - SOLVER.MAX_ITER 82000 \ - SEED 1234 \ - MODEL.RPN.NMS_THRESH 0.8 \ - MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO 2 \ - MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 2 \ - DATALOADER.NUM_WORKERS 4 \ - SOLVER.BASE_LR 0.0025 > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 & - let rank++ -done - - - -wait - - -#conda deactivate -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - - - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -Time=`grep total_loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F 'time: ' '{print $2}'|awk '{print $1}'|tail -n +3|awk '{sum+=$1} END {print sum/NR}'|sed s/[[:space:]]//g` -FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${Time}'}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=None -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据 -#吞吐量,不需要修改 -ActualFPS=${FPS} -#单迭代训练时长,不需要修改 -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep total_loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'total_loss: ' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash +export HCCL_WHITELIST_DISABLE=1 +export MASTER_ADDR=127.0.0.1 +export MASTER_PORT=23456 +#当前路径,不需要修改 +cur_path=`pwd` +#ASCEND_DEVICE_ID=1 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID + +#集合通信参数,不需要修改 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="MaskRCNN_ID0101_for_PyTorch" +#训练epoch +train_epochs=82000 +#训练batch_size +batch_size=8 +#训练step +train_steps= +#学习率 +learning_rate= + +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --conda_name* ]];then + conda_name=`echo ${para#*=}` + source set_conda.sh --conda_name=$conda_name + source activate $conda_name + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ + +sed -i "s|\"coco_2017_train\": (\"coco/train2017\", \"coco/annotations/instances_train2017.json\")|\"coco_2017_train\": (\"$data_path/coco/train2017\", \"$data_path/coco/annotations/instances_train2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py +sed -i "s|\"coco_2017_val\": (\"coco/val2017\", \"coco/annotations/instances_val2017.json\")|\"coco_2017_val\": (\"$data_path/coco/val2017\", \"$data_path/coco/annotations/instances_val2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py +sed -i "s|WEIGHTS: \"detectron2://ImageNetPretrained/MSRA/R-101.pkl\"|WEIGHTS: \"$data_path/R-101.pkl\"|g" $cur_path/../configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml + +python3 setup.py build develop > $cur_path/../log.txt + + +#训练开始时间,不需要修改 +start_time=$(date +%s) +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 + #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` + #cpustep=`expr $cpucount / 8` + #echo "taskset c steps:" $cpustep + #let a=RANK_ID*$cpustep + #let b=RANK_ID+1 + #let c=b*$cpustep-1 + + + + #SOLVER.MAX_ITER 82000 \ + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 +done +wait + + + + + +NPUS=($(seq 0 7)) +export NPU_WORLD_SIZE=${#NPUS[@]} +rank=0 +for i in ${NPUS[@]} +do + export NPU_CALCULATE_DEVICE=${i} + mkdir -p $cur_path/test/output/${i}/ + export RANK=${rank} + echo run process ${rank} + python3 tools/train_net.py \ + --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml \ + --num-gpus 8 \ + AMP 1\ + OPT_LEVEL O2 \ + LOSS_SCALE_VALUE 64 \ + SOLVER.IMS_PER_BATCH $batch_size \ + SOLVER.MAX_ITER 82000 \ + SEED 1234 \ + MODEL.RPN.NMS_THRESH 0.8 \ + MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO 2 \ + MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 2 \ + DATALOADER.NUM_WORKERS 4 \ + SOLVER.BASE_LR 0.0025 > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 & + let rank++ +done + + + +wait + + +#conda deactivate +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + + + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +Time=`grep total_loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F 'time: ' '{print $2}'|awk '{print $1}'|tail -n +3|awk '{sum+=$1} END {print sum/NR}'|sed s/[[:space:]]//g` +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${Time}'}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=None +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep total_loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'total_loss: ' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/dev/cv/image_classification/MaskRCNN_ID0101_for_PyTorch/test/train_performance_8p.sh b/PyTorch/dev/cv/image_classification/MaskRCNN_ID0101_for_PyTorch/test/train_performance_8p.sh index e35123e027322d68234548c3ab891400966479d6..0373df8202b65c0a0b59729f19339af95965c171 100644 --- a/PyTorch/dev/cv/image_classification/MaskRCNN_ID0101_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/dev/cv/image_classification/MaskRCNN_ID0101_for_PyTorch/test/train_performance_8p.sh @@ -1,224 +1,224 @@ -#!/bin/bash -export HCCL_WHITELIST_DISABLE=1 -export MASTER_ADDR=127.0.0.1 -export MASTER_PORT=23456 -#当前路径,不需要修改 -cur_path=`pwd` -#ASCEND_DEVICE_ID=1 -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID - -#集合通信参数,不需要修改 -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="MaskRCNN_ID0101_for_PyTorch" -#训练epoch -train_epochs=100 -#训练batch_size -batch_size=8 -#训练step -train_steps= -#学习率 -learning_rate= - -#TF2.X独有,不需要修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False -autotune=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_full_1p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --conda_name* ]];then - conda_name=`echo ${para#*=}` - source set_conda.sh --conda_name=$conda_name - source activate $conda_name - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/../ - -sed -i "s|\"coco_2017_train\": (\"coco/train2017\", \"coco/annotations/instances_train2017.json\")|\"coco_2017_train\": (\"$data_path/coco/train2017\", \"$data_path/coco/annotations/instances_train2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py -sed -i "s|\"coco_2017_val\": (\"coco/val2017\", \"coco/annotations/instances_val2017.json\")|\"coco_2017_val\": (\"$data_path/coco/val2017\", \"$data_path/coco/annotations/instances_val2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py -sed -i "s|WEIGHTS: \"detectron2://ImageNetPretrained/MSRA/R-101.pkl\"|WEIGHTS: \"$data_path/R-101.pkl\"|g" $cur_path/../configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml - -python3 setup.py build develop > $cur_path/../log.txt - - -#训练开始时间,不需要修改 -start_time=$(date +%s) -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - - #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 - #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` - #cpustep=`expr $cpucount / 8` - #echo "taskset c steps:" $cpustep - #let a=RANK_ID*$cpustep - #let b=RANK_ID+1 - #let c=b*$cpustep-1 - - - - #SOLVER.MAX_ITER 82000 \ - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 -done -wait - - - - - -NPUS=($(seq 0 7)) -export NPU_WORLD_SIZE=${#NPUS[@]} -rank=0 -for i in ${NPUS[@]} -do - export NPU_CALCULATE_DEVICE=${i} - mkdir -p $cur_path/output/${i}/ - export ASCEND_DEVICE_ID=${i} - export RANK=${rank} - echo run process ${rank} - python3 tools/train_net.py \ - --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml \ - --num-gpus 8 \ - AMP 1\ - OPT_LEVEL O2 \ - LOSS_SCALE_VALUE 64 \ - SOLVER.IMS_PER_BATCH $batch_size \ - SOLVER.MAX_ITER 100 \ - SEED 1234 \ - MODEL.RPN.NMS_THRESH 0.8 \ - MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO 2 \ - MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 2 \ - DATALOADER.NUM_WORKERS 4 \ - SOLVER.BASE_LR 0.0025 > $cur_path/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 & - let rank++ -done - - - -wait - - -#conda deactivate -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - - - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -Time=`grep total_loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F 'time: ' '{print $2}'|awk '{print $1}'|tail -n +3|awk '{sum+=$1} END {print sum/NR}'|sed s/[[:space:]]//g` -FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${Time}'}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=None -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据 -#吞吐量,不需要修改 -ActualFPS=${FPS} -#单迭代训练时长,不需要修改 -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep total_loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'total_loss: ' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash +export HCCL_WHITELIST_DISABLE=1 +export MASTER_ADDR=127.0.0.1 +export MASTER_PORT=23456 +#当前路径,不需要修改 +cur_path=`pwd` +#ASCEND_DEVICE_ID=1 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID + +#集合通信参数,不需要修改 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="MaskRCNN_ID0101_for_PyTorch" +#训练epoch +train_epochs=100 +#训练batch_size +batch_size=8 +#训练step +train_steps= +#学习率 +learning_rate= + +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --conda_name* ]];then + conda_name=`echo ${para#*=}` + source set_conda.sh --conda_name=$conda_name + source activate $conda_name + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ + +sed -i "s|\"coco_2017_train\": (\"coco/train2017\", \"coco/annotations/instances_train2017.json\")|\"coco_2017_train\": (\"$data_path/coco/train2017\", \"$data_path/coco/annotations/instances_train2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py +sed -i "s|\"coco_2017_val\": (\"coco/val2017\", \"coco/annotations/instances_val2017.json\")|\"coco_2017_val\": (\"$data_path/coco/val2017\", \"$data_path/coco/annotations/instances_val2017.json\")|g" $cur_path/../detectron2/data/datasets/builtin.py +sed -i "s|WEIGHTS: \"detectron2://ImageNetPretrained/MSRA/R-101.pkl\"|WEIGHTS: \"$data_path/R-101.pkl\"|g" $cur_path/../configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml + +python3 setup.py build develop > $cur_path/../log.txt + + +#训练开始时间,不需要修改 +start_time=$(date +%s) +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 + #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` + #cpustep=`expr $cpucount / 8` + #echo "taskset c steps:" $cpustep + #let a=RANK_ID*$cpustep + #let b=RANK_ID+1 + #let c=b*$cpustep-1 + + + + #SOLVER.MAX_ITER 82000 \ + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 +done +wait + + + + + +NPUS=($(seq 0 7)) +export NPU_WORLD_SIZE=${#NPUS[@]} +rank=0 +for i in ${NPUS[@]} +do + export NPU_CALCULATE_DEVICE=${i} + mkdir -p $cur_path/output/${i}/ + export ASCEND_DEVICE_ID=${i} + export RANK=${rank} + echo run process ${rank} + python3 tools/train_net.py \ + --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml \ + --num-gpus 8 \ + AMP 1\ + OPT_LEVEL O2 \ + LOSS_SCALE_VALUE 64 \ + SOLVER.IMS_PER_BATCH $batch_size \ + SOLVER.MAX_ITER 100 \ + SEED 1234 \ + MODEL.RPN.NMS_THRESH 0.8 \ + MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO 2 \ + MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 2 \ + DATALOADER.NUM_WORKERS 4 \ + SOLVER.BASE_LR 0.0025 > $cur_path/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 & + let rank++ +done + + + +wait + + +#conda deactivate +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + + + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +Time=`grep total_loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F 'time: ' '{print $2}'|awk '{print $1}'|tail -n +3|awk '{sum+=$1} END {print sum/NR}'|sed s/[[:space:]]//g` +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${Time}'}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=None +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep total_loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'total_loss: ' '{print $2}'|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/dev/cv/image_classification/NeuMF_ID0351_for_PyTorch/LICENSE b/PyTorch/dev/cv/image_classification/NeuMF_ID0351_for_PyTorch/LICENSE index db05a35866f7f1e2bc78bdfe9e7048e779552d8c..09d493bf1fc257505c1336f3f87425568ab9da3c 100644 --- a/PyTorch/dev/cv/image_classification/NeuMF_ID0351_for_PyTorch/LICENSE +++ b/PyTorch/dev/cv/image_classification/NeuMF_ID0351_for_PyTorch/LICENSE @@ -1,29 +1,29 @@ -BSD 3-Clause License - -Copyright (c) 2017, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +BSD 3-Clause License + +Copyright (c) 2017, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/PyTorch/dev/cv/image_classification/NeuMF_ID0351_for_PyTorch/src/prefetcher.py b/PyTorch/dev/cv/image_classification/NeuMF_ID0351_for_PyTorch/src/prefetcher.py index 5e05ead50092cc817ea11c9a17e4a724d0e0deb8..c0c8310c60e099859f4e6cce206ad940d2c14ef7 100644 --- a/PyTorch/dev/cv/image_classification/NeuMF_ID0351_for_PyTorch/src/prefetcher.py +++ b/PyTorch/dev/cv/image_classification/NeuMF_ID0351_for_PyTorch/src/prefetcher.py @@ -1,67 +1,67 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch - - -class Prefetcher(object): - """Prefetcher using on npu device. - - Origin Code URL: - https://github.com/implus/PytorchInsight/blob/master/classification/imagenet_fast.py#L280 - - Args: - loder (torch.utils.data.DataLoader or DataLoader like iterator): - Using to generate inputs after preprocessing. - stream (torch.npu.Stream): Default None. - Because of the limitation of NPU's memory mechanism, - if prefetcher is initialized repeatedly during training, - a defined stream should be introduced to prevent memory leakage; - if prefetcher is initialized only once during training, - a defined stream is not necessary. - - Returns: - float: tensors of shape (k, 5) and (k, 1). Labels are 0-based. - """ - - def __init__(self, loader, stream=None): - self.loader = iter(loader) - self.stream = stream if stream is not None else torch.npu.Stream() - self.preload() - - def preload(self): - try: - self.user, self.item, self.rating = next(self.loader) - assert isinstance(self.user, torch.IntTensor) - self.rating = self.rating.float() - except StopIteration: - self.user = None - self.item = None - return - - with torch.npu.stream(self.stream): - self.user = self.user.npu(non_blocking=True) - self.item = self.item.npu(non_blocking=True) - self.rating = self.rating.npu(non_blocking=True) - - def next(self): - torch.npu.current_stream().wait_stream(self.stream) - user = self.user - item = self.item - rating = self.rating - if user is not None: - self.preload() - return user, item, rating +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + + +class Prefetcher(object): + """Prefetcher using on npu device. + + Origin Code URL: + https://github.com/implus/PytorchInsight/blob/master/classification/imagenet_fast.py#L280 + + Args: + loder (torch.utils.data.DataLoader or DataLoader like iterator): + Using to generate inputs after preprocessing. + stream (torch.npu.Stream): Default None. + Because of the limitation of NPU's memory mechanism, + if prefetcher is initialized repeatedly during training, + a defined stream should be introduced to prevent memory leakage; + if prefetcher is initialized only once during training, + a defined stream is not necessary. + + Returns: + float: tensors of shape (k, 5) and (k, 1). Labels are 0-based. + """ + + def __init__(self, loader, stream=None): + self.loader = iter(loader) + self.stream = stream if stream is not None else torch.npu.Stream() + self.preload() + + def preload(self): + try: + self.user, self.item, self.rating = next(self.loader) + assert isinstance(self.user, torch.IntTensor) + self.rating = self.rating.float() + except StopIteration: + self.user = None + self.item = None + return + + with torch.npu.stream(self.stream): + self.user = self.user.npu(non_blocking=True) + self.item = self.item.npu(non_blocking=True) + self.rating = self.rating.npu(non_blocking=True) + + def next(self): + torch.npu.current_stream().wait_stream(self.stream) + user = self.user + item = self.item + rating = self.rating + if user is not None: + self.preload() + return user, item, rating diff --git a/PyTorch/dev/cv/image_classification/RUC_ID2470_for_PyTorch/figure/additonal_result.PNG b/PyTorch/dev/cv/image_classification/RUC_ID2470_for_PyTorch/figure/additonal_result.PNG deleted file mode 100644 index f352f7933c8fa3583fe21006a0e3f747e5a3e66b..0000000000000000000000000000000000000000 Binary files a/PyTorch/dev/cv/image_classification/RUC_ID2470_for_PyTorch/figure/additonal_result.PNG and /dev/null differ diff --git a/PyTorch/dev/cv/image_classification/RUC_ID2470_for_PyTorch/figure/model_arch.PNG b/PyTorch/dev/cv/image_classification/RUC_ID2470_for_PyTorch/figure/model_arch.PNG deleted file mode 100644 index 126039f2766b81d68a69277d02c4b37308d045bc..0000000000000000000000000000000000000000 Binary files a/PyTorch/dev/cv/image_classification/RUC_ID2470_for_PyTorch/figure/model_arch.PNG and /dev/null differ diff --git a/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/main_8p.py b/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/main_8p.py index c62e6ae659bac007b63025cf87c6e8a52e2cd16f..1790b4a24da775ef65de4848599f3d7ef24f7cc2 100644 --- a/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/main_8p.py +++ b/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/main_8p.py @@ -1,478 +1,478 @@ -import argparse -import os -import random -import shutil -import time -import warnings -from enum import Enum - -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.distributed as dist -import torch.optim -import torch.multiprocessing as mp -import torch.utils.data -import torch.utils.data.distributed -import torchvision.transforms as transforms -import torchvision.datasets as datasets -import torchvision.models as models -import torch.npu -import os -from apex import amp - - -NPU_CALCULATE_DEVICE = 0 -if os.getenv('NPU_CALCULATE_DEVICE') and str.isdigit(os.getenv('NPU_CALCULATE_DEVICE')): - NPU_CALCULATE_DEVICE = int(os.getenv('NPU_CALCULATE_DEVICE')) -if torch.npu.current_device() != NPU_CALCULATE_DEVICE: - torch.npu.set_device(f'npu:{NPU_CALCULATE_DEVICE}') -NPU_WORLD_SIZE = int(os.getenv('NPU_WORLD_SIZE')) -RANK = int(os.getenv('RANK')) -torch.distributed.init_process_group('hccl', rank=RANK, world_size=NPU_WORLD_SIZE) - -model_names = sorted(name for name in models.__dict__ - if name.islower() and not name.startswith("__") - and callable(models.__dict__[name])) - -parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') -parser.add_argument('data', metavar='DIR', - help='path to dataset') -parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', - choices=model_names, - help='model architecture: ' + - ' | '.join(model_names) + - ' (default: resnet18)') -parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', - help='number of data loading workers (default: 4)') -parser.add_argument('--epochs', default=90, type=int, metavar='N', - help='number of total epochs to run') -parser.add_argument('--start-epoch', default=0, type=int, metavar='N', - help='manual epoch number (useful on restarts)') -parser.add_argument('-b', '--batch-size', default=256, type=int, - metavar='N', - help='mini-batch size (default: 256), this is the total ' - 'batch size of all GPUs on the current node when ' - 'using Data Parallel or Distributed Data Parallel') -parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, - metavar='LR', help='initial learning rate', dest='lr') -parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') -parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') -parser.add_argument('-p', '--print-freq', default=10, type=int, - metavar='N', help='print frequency (default: 10)') -parser.add_argument('--resume', default='', type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', - help='evaluate model on validation set') -parser.add_argument('--pretrained', dest='pretrained', action='store_true', - help='use pre-trained model') -parser.add_argument('--world-size', default=-1, type=int, - help='number of nodes for distributed training') -parser.add_argument('--rank', default=-1, type=int, - help='node rank for distributed training') -parser.add_argument('--dist-url', default='', type=str, - help='url used to set up distributed training') -parser.add_argument('--dist-backend', default='hccl', type=str, - help='distributed backend') -parser.add_argument('--seed', default=None, type=int, - help='seed for initializing training. ') -parser.add_argument('--gpu', default=None, type=int, - help='GPU id to use.') -parser.add_argument('--multiprocessing-distributed', action='store_true', - help='Use multi-processing distributed training to launch ' - 'N processes per node, which has N GPUs. This is the ' - 'fastest way to use PyTorch for either single node or ' - 'multi node data parallel training') -## for ascend 910 - -parser.add_argument('--amp', default=False, action='store_true', - help='use amp to train the model') -parser.add_argument('--loss-scale', default=1024., type=float, - help='loss scale using in amp, default -1 means dynamic') -parser.add_argument('--opt-level', default='O2', type=str, - help='loss scale using in amp, default -1 means dynamic') -parser.add_argument('--FusedSGD', default=False, action='store_true', - help='use FusedSGD') -parser.add_argument('--stop-step-num', default=None, type=int, - help='after the stop-step, killing the training task') - -best_acc1 = 0 - - -def main(): - global best_acc1 - args = parser.parse_args() - - if args.seed is not None: - random.seed(args.seed) - torch.manual_seed(args.seed) - cudnn.deterministic = True - warnings.warn('You have chosen to seed training. ' - 'This will turn on the CUDNN deterministic setting, ' - 'which can slow down your training considerably! ' - 'You may see unexpected behavior when restarting ' - 'from checkpoints.') - - if args.gpu is not None: - warnings.warn('You have chosen a specific GPU. This will completely ' - 'disable data parallelism.') - - - - - - - - - # create model - if args.pretrained: - print("=> using pre-trained model '{}'".format(args.arch)) - model = models.__dict__[args.arch](pretrained=True) - model = model.to(f'npu:{NPU_CALCULATE_DEVICE}') - #if not isinstance(model, torch.nn.parallel.DistributedDataParallel): - #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[NPU_CALCULATE_DEVICE], broadcast_buffers=False) - else: - print("=> creating model '{}'".format(args.arch)) - model = models.__dict__[args.arch]() - model = model.to(f'npu:{NPU_CALCULATE_DEVICE}') - #if not isinstance(model, torch.nn.parallel.DistributedDataParallel): - #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[NPU_CALCULATE_DEVICE], broadcast_buffers=False) - - if not torch.npu.is_available(): - print('using CPU, this will be slow') - - - elif args.gpu is not None: - torch.npu.set_device(f'npu:{NPU_CALCULATE_DEVICE}') - model = model.npu(f'npu:{NPU_CALCULATE_DEVICE}') - model = model.to(f'npu:{NPU_CALCULATE_DEVICE}') - #if not isinstance(model, torch.nn.parallel.DistributedDataParallel): - #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[NPU_CALCULATE_DEVICE], broadcast_buffers=False) - else: - # DataParallel will divide and allocate batch_size to all available GPUs - if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): - model.features = torch.nn.DataParallel(model.features) - model.npu() - - - # define loss function (criterion) and optimizer - if args.FusedSGD: - from apex.optimizers import NpuFusedSGD - optimizer = NpuFusedSGD(model.parameters(), args.lr, - momentum=args.momentum, - weight_decay=args.weight_decay) - - else: - optimizer = torch.optim.SGD(model.parameters(), args.lr, - momentum=args.momentum, - weight_decay=args.weight_decay) - - if args.amp: - model, optimizer = amp.initialize( - model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale,combine_grad=True) - if not isinstance(model, torch.nn.parallel.DistributedDataParallel): - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[NPU_CALCULATE_DEVICE], broadcast_buffers=False) - # optionally resume from a checkpoint - loc = 'npu:{}'.format(NPU_CALCULATE_DEVICE) - criterion = nn.CrossEntropyLoss().to(loc) - if args.resume: - if os.path.isfile(args.resume): - print("=> loading checkpoint '{}'".format(args.resume)) - if args.gpu is None: - checkpoint = torch.load(args.resume) - else: - # Map model to be loaded to specified single gpu. - loc = 'npu:{}'.format(args.gpu) - checkpoint = torch.load(args.resume, map_location=f'npu:{NPU_CALCULATE_DEVICE}') - args.start_epoch = checkpoint['epoch'] - best_acc1 = checkpoint['best_acc1'] - if args.gpu is not None: - # best_acc1 may be from a checkpoint from a different GPU - best_acc1 = best_acc1.to(f'npu:{NPU_CALCULATE_DEVICE}') - model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - if args.amp: - amp.load_state_dict(checkpoint['amp']) - print("=> loaded checkpoint '{}' (epoch {})" - .format(args.resume, checkpoint['epoch'])) - else: - print("=> no checkpoint found at '{}'".format(args.resume)) - - cudnn.benchmark = True - - # Data loading code - traindir = os.path.join(args.data, 'train') - valdir = os.path.join(args.data, 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - - #train_sampler = None - train_loader_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) - train_loader_batch_size = args.batch_size - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=train_loader_batch_size, shuffle=False, - num_workers=args.workers, pin_memory=True, sampler=train_loader_sampler, drop_last = True) - - val_loader = torch.utils.data.DataLoader( - datasets.ImageFolder(valdir, transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ])), - batch_size=args.batch_size, shuffle=True, - num_workers=args.workers, pin_memory=False, drop_last=True) - - if args.evaluate: - validate(val_loader, model, criterion, args) - return - - for epoch in range(args.start_epoch, args.epochs): - train_loader.sampler.set_epoch(epoch) - - - adjust_learning_rate(optimizer, epoch, args) - - # train for one epoch - train(train_loader, model, criterion, optimizer, epoch, args) - - # evaluate on validation set - acc1 = validate(val_loader, model, criterion, args) - - # remember best acc@1 and save checkpoint - is_best = acc1 > best_acc1 - best_acc1 = max(acc1, best_acc1) - - if not args.multiprocessing_distributed or (args.multiprocessing_distributed - and args.rank % ngpus_per_node == 0): - if args.amp: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': args.arch, - 'state_dict': model.state_dict(), - 'best_acc1': best_acc1, - 'optimizer': optimizer.state_dict(), - 'amp': amp.state_dict(), - }, is_best) - else: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': args.arch, - 'state_dict': model.state_dict(), - 'best_acc1': best_acc1, - 'optimizer': optimizer.state_dict(), - }, is_best) - - -def train(train_loader, model, criterion, optimizer, epoch, args): - batch_time = AverageMeter('Time', ':6.3f') - data_time = AverageMeter('Data', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(train_loader), - [batch_time, data_time, losses, top1, top5], - prefix="Epoch: [{}]".format(epoch)) - - # switch to train mode - model.train() - - end = time.time() - for i, (images, target) in enumerate(train_loader): - # measure data loading time - data_time.update(time.time() - end) - - if args.gpu is not None: - images = images.npu(f'npu:{NPU_CALCULATE_DEVICE}', non_blocking=True) - if torch.npu.is_available(): - target = target.npu(f'npu:{NPU_CALCULATE_DEVICE}', non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # compute gradient and do SGD step - optimizer.zero_grad() - if args.amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - optimizer.step() - - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - - if i % args.print_freq == 0: - progress.display(i) - - -def validate(val_loader, model, criterion, args): - batch_time = AverageMeter('Time', ':6.3f', Summary.NONE) - losses = AverageMeter('Loss', ':.4e', Summary.NONE) - top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE) - top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE) - progress = ProgressMeter( - len(val_loader), - [batch_time, losses, top1, top5], - prefix='Test: ') - - # switch to evaluate mode - model.eval() - - with torch.no_grad(): - end = time.time() - for i, (images, target) in enumerate(val_loader): - if args.gpu is not None: - images = images.npu(f'npu:{NPU_CALCULATE_DEVICE}', non_blocking=True) - if torch.npu.is_available(): - target = target.npu(f'npu:{NPU_CALCULATE_DEVICE}', non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - - if i % args.print_freq == 0: - progress.display(i) - - progress.display_summary() - - return top1.avg - - -def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): - torch.save(state, filename) - if is_best: - shutil.copyfile(filename, 'model_best.pth.tar') - - -class Summary(Enum): - NONE = 0 - AVERAGE = 1 - SUM = 2 - COUNT = 3 - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self, name, fmt=':f', summary_type=Summary.AVERAGE): - self.name = name - self.fmt = fmt - self.summary_type = summary_type - self.reset() - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - - def summary(self): - fmtstr = '' - if self.summary_type is Summary.NONE: - fmtstr = '' - elif self.summary_type is Summary.AVERAGE: - fmtstr = '{name} {avg:.3f}' - elif self.summary_type is Summary.SUM: - fmtstr = '{name} {sum:.3f}' - elif self.summary_type is Summary.COUNT: - fmtstr = '{name} {count:.3f}' - else: - raise ValueError('invalid summary type %r' % self.summary_type) - - return fmtstr.format(**self.__dict__) - - -class ProgressMeter(object): - def __init__(self, num_batches, meters, prefix=""): - self.batch_fmtstr = self._get_batch_fmtstr(num_batches) - self.meters = meters - self.prefix = prefix - - def display(self, batch): - entries = [self.prefix + self.batch_fmtstr.format(batch)] - entries += [str(meter) for meter in self.meters] - print('\t'.join(entries)) - - def display_summary(self): - entries = [" *"] - entries += [meter.summary() for meter in self.meters] - print(' '.join(entries)) - - def _get_batch_fmtstr(self, num_batches): - num_digits = len(str(num_batches // 1)) - fmt = '{:' + str(num_digits) + 'd}' - return '[' + fmt + '/' + fmt.format(num_batches) + ']' - - -def adjust_learning_rate(optimizer, epoch, args): - """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" - lr = args.lr * (0.1 ** (epoch // 30)) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - - -if __name__ == '__main__': - main() - +import argparse +import os +import random +import shutil +import time +import warnings +from enum import Enum + +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.optim +import torch.multiprocessing as mp +import torch.utils.data +import torch.utils.data.distributed +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import torchvision.models as models +import torch.npu +import os +from apex import amp + + +NPU_CALCULATE_DEVICE = 0 +if os.getenv('NPU_CALCULATE_DEVICE') and str.isdigit(os.getenv('NPU_CALCULATE_DEVICE')): + NPU_CALCULATE_DEVICE = int(os.getenv('NPU_CALCULATE_DEVICE')) +if torch.npu.current_device() != NPU_CALCULATE_DEVICE: + torch.npu.set_device(f'npu:{NPU_CALCULATE_DEVICE}') +NPU_WORLD_SIZE = int(os.getenv('NPU_WORLD_SIZE')) +RANK = int(os.getenv('RANK')) +torch.distributed.init_process_group('hccl', rank=RANK, world_size=NPU_WORLD_SIZE) + +model_names = sorted(name for name in models.__dict__ + if name.islower() and not name.startswith("__") + and callable(models.__dict__[name])) + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument('data', metavar='DIR', + help='path to dataset') +parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', + choices=model_names, + help='model architecture: ' + + ' | '.join(model_names) + + ' (default: resnet18)') +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('--epochs', default=90, type=int, metavar='N', + help='number of total epochs to run') +parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('-b', '--batch-size', default=256, type=int, + metavar='N', + help='mini-batch size (default: 256), this is the total ' + 'batch size of all GPUs on the current node when ' + 'using Data Parallel or Distributed Data Parallel') +parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, + metavar='LR', help='initial learning rate', dest='lr') +parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') +parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') +parser.add_argument('-p', '--print-freq', default=10, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', + help='evaluate model on validation set') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--world-size', default=-1, type=int, + help='number of nodes for distributed training') +parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') +parser.add_argument('--dist-url', default='', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='hccl', type=str, + help='distributed backend') +parser.add_argument('--seed', default=None, type=int, + help='seed for initializing training. ') +parser.add_argument('--gpu', default=None, type=int, + help='GPU id to use.') +parser.add_argument('--multiprocessing-distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') +## for ascend 910 + +parser.add_argument('--amp', default=False, action='store_true', + help='use amp to train the model') +parser.add_argument('--loss-scale', default=1024., type=float, + help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--opt-level', default='O2', type=str, + help='loss scale using in amp, default -1 means dynamic') +parser.add_argument('--FusedSGD', default=False, action='store_true', + help='use FusedSGD') +parser.add_argument('--stop-step-num', default=None, type=int, + help='after the stop-step, killing the training task') + +best_acc1 = 0 + + +def main(): + global best_acc1 + args = parser.parse_args() + + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn('You have chosen to seed training. ' + 'This will turn on the CUDNN deterministic setting, ' + 'which can slow down your training considerably! ' + 'You may see unexpected behavior when restarting ' + 'from checkpoints.') + + if args.gpu is not None: + warnings.warn('You have chosen a specific GPU. This will completely ' + 'disable data parallelism.') + + + + + + + + + # create model + if args.pretrained: + print("=> using pre-trained model '{}'".format(args.arch)) + model = models.__dict__[args.arch](pretrained=True) + model = model.to(f'npu:{NPU_CALCULATE_DEVICE}') + #if not isinstance(model, torch.nn.parallel.DistributedDataParallel): + #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[NPU_CALCULATE_DEVICE], broadcast_buffers=False) + else: + print("=> creating model '{}'".format(args.arch)) + model = models.__dict__[args.arch]() + model = model.to(f'npu:{NPU_CALCULATE_DEVICE}') + #if not isinstance(model, torch.nn.parallel.DistributedDataParallel): + #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[NPU_CALCULATE_DEVICE], broadcast_buffers=False) + + if not torch.npu.is_available(): + print('using CPU, this will be slow') + + + elif args.gpu is not None: + torch.npu.set_device(f'npu:{NPU_CALCULATE_DEVICE}') + model = model.npu(f'npu:{NPU_CALCULATE_DEVICE}') + model = model.to(f'npu:{NPU_CALCULATE_DEVICE}') + #if not isinstance(model, torch.nn.parallel.DistributedDataParallel): + #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[NPU_CALCULATE_DEVICE], broadcast_buffers=False) + else: + # DataParallel will divide and allocate batch_size to all available GPUs + if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): + model.features = torch.nn.DataParallel(model.features) + model.npu() + + + # define loss function (criterion) and optimizer + if args.FusedSGD: + from apex.optimizers import NpuFusedSGD + optimizer = NpuFusedSGD(model.parameters(), args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay) + + else: + optimizer = torch.optim.SGD(model.parameters(), args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay) + + if args.amp: + model, optimizer = amp.initialize( + model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale,combine_grad=True) + if not isinstance(model, torch.nn.parallel.DistributedDataParallel): + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[NPU_CALCULATE_DEVICE], broadcast_buffers=False) + # optionally resume from a checkpoint + loc = 'npu:{}'.format(NPU_CALCULATE_DEVICE) + criterion = nn.CrossEntropyLoss().to(loc) + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + if args.gpu is None: + checkpoint = torch.load(args.resume) + else: + # Map model to be loaded to specified single gpu. + loc = 'npu:{}'.format(args.gpu) + checkpoint = torch.load(args.resume, map_location=f'npu:{NPU_CALCULATE_DEVICE}') + args.start_epoch = checkpoint['epoch'] + best_acc1 = checkpoint['best_acc1'] + if args.gpu is not None: + # best_acc1 may be from a checkpoint from a different GPU + best_acc1 = best_acc1.to(f'npu:{NPU_CALCULATE_DEVICE}') + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + if args.amp: + amp.load_state_dict(checkpoint['amp']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(args.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + + cudnn.benchmark = True + + # Data loading code + traindir = os.path.join(args.data, 'train') + valdir = os.path.join(args.data, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + + #train_sampler = None + train_loader_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + train_loader_batch_size = args.batch_size + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=train_loader_batch_size, shuffle=False, + num_workers=args.workers, pin_memory=True, sampler=train_loader_sampler, drop_last = True) + + val_loader = torch.utils.data.DataLoader( + datasets.ImageFolder(valdir, transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ])), + batch_size=args.batch_size, shuffle=True, + num_workers=args.workers, pin_memory=False, drop_last=True) + + if args.evaluate: + validate(val_loader, model, criterion, args) + return + + for epoch in range(args.start_epoch, args.epochs): + train_loader.sampler.set_epoch(epoch) + + + adjust_learning_rate(optimizer, epoch, args) + + # train for one epoch + train(train_loader, model, criterion, optimizer, epoch, args) + + # evaluate on validation set + acc1 = validate(val_loader, model, criterion, args) + + # remember best acc@1 and save checkpoint + is_best = acc1 > best_acc1 + best_acc1 = max(acc1, best_acc1) + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + if args.amp: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer': optimizer.state_dict(), + 'amp': amp.state_dict(), + }, is_best) + else: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer': optimizer.state_dict(), + }, is_best) + + +def train(train_loader, model, criterion, optimizer, epoch, args): + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(train_loader), + [batch_time, data_time, losses, top1, top5], + prefix="Epoch: [{}]".format(epoch)) + + # switch to train mode + model.train() + + end = time.time() + for i, (images, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + if args.gpu is not None: + images = images.npu(f'npu:{NPU_CALCULATE_DEVICE}', non_blocking=True) + if torch.npu.is_available(): + target = target.npu(f'npu:{NPU_CALCULATE_DEVICE}', non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # compute gradient and do SGD step + optimizer.zero_grad() + if args.amp: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + progress.display(i) + + +def validate(val_loader, model, criterion, args): + batch_time = AverageMeter('Time', ':6.3f', Summary.NONE) + losses = AverageMeter('Loss', ':.4e', Summary.NONE) + top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE) + top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE) + progress = ProgressMeter( + len(val_loader), + [batch_time, losses, top1, top5], + prefix='Test: ') + + # switch to evaluate mode + model.eval() + + with torch.no_grad(): + end = time.time() + for i, (images, target) in enumerate(val_loader): + if args.gpu is not None: + images = images.npu(f'npu:{NPU_CALCULATE_DEVICE}', non_blocking=True) + if torch.npu.is_available(): + target = target.npu(f'npu:{NPU_CALCULATE_DEVICE}', non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + progress.display(i) + + progress.display_summary() + + return top1.avg + + +def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): + torch.save(state, filename) + if is_best: + shutil.copyfile(filename, 'model_best.pth.tar') + + +class Summary(Enum): + NONE = 0 + AVERAGE = 1 + SUM = 2 + COUNT = 3 + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f', summary_type=Summary.AVERAGE): + self.name = name + self.fmt = fmt + self.summary_type = summary_type + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + + def summary(self): + fmtstr = '' + if self.summary_type is Summary.NONE: + fmtstr = '' + elif self.summary_type is Summary.AVERAGE: + fmtstr = '{name} {avg:.3f}' + elif self.summary_type is Summary.SUM: + fmtstr = '{name} {sum:.3f}' + elif self.summary_type is Summary.COUNT: + fmtstr = '{name} {count:.3f}' + else: + raise ValueError('invalid summary type %r' % self.summary_type) + + return fmtstr.format(**self.__dict__) + + +class ProgressMeter(object): + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + + def display_summary(self): + entries = [" *"] + entries += [meter.summary() for meter in self.meters] + print(' '.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + + +def adjust_learning_rate(optimizer, epoch, args): + """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" + lr = args.lr * (0.1 ** (epoch // 30)) + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +if __name__ == '__main__': + main() + diff --git a/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/modelzoo_level.txt b/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/modelzoo_level.txt index 484664c2399ae4109859a67aba6cb9facff03cf1..55a9add9fa74832ca908108d73946cd76281a9cd 100644 --- a/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/test/train_full_8pt.sh b/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/test/train_full_8pt.sh index e73115f70f335a809bd3185ac6ebb64beeec9a14..5474eb332c6e039c814e7e1e96d6a8436eec31d4 100644 --- a/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/test/train_full_8pt.sh +++ b/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/test/train_full_8pt.sh @@ -1,197 +1,197 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#source env.sh -#集合通信参数,不需要修改 - -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="ResNet152_ID0424_for_PyTorch" -#训练epoch -train_epochs=110 -#训练batch_size -batch_size=2048 -#训练step -#train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.5 - -#TF2.X独有,需要模型审视修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_performance_1P.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/.. -for((RANK_ID=$RANK_ID_START;RANK_ID<1;RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - #export RANK_ID=$RANK_ID - #export ASCEND_DEVICE_ID=$RANK_ID - ASCEND_DEVICE_ID=$RANK_ID - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - # 绑核,不需要的绑核的模型删除,需要的模型审视修改 - #let a=RANK_ID*12 - #let b=RANK_ID+1 - #let c=b*12-1 - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - -done -wait - - - -export MASTER_ADDR=localhost -export MASTER_PORT=29688 -export HCCL_WHITELIST_DISABLE=1 - -NPUS=($(seq 0 7)) -export NPU_WORLD_SIZE=${#NPUS[@]} -rank=0 -for i in ${NPUS[@]} -do - export NPU_CALCULATE_DEVICE=${i} - export RANK=${rank} - echo run process ${rank} - python3 main_8p.py \ - ${data_path} \ - -a resnet152 \ - --seed=49 \ - --learning-rate=0.8 \ - --mom=0.9 \ - --weight-decay=1.0e-04 \ - --print-freq=1 \ - --epochs=1 \ - --batch-size=256 \ - --gpu ${i} \ - --FusedSGD \ - --loss-scale=1024 \ - --amp > $cur_path/output/${ASCEND_DEVICE_ID}/train_${i}.log 2>&1 & - let rank++ -done -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#source env.sh +#集合通信参数,不需要修改 + +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="ResNet152_ID0424_for_PyTorch" +#训练epoch +train_epochs=110 +#训练batch_size +batch_size=2048 +#训练step +#train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.5 + +#TF2.X独有,需要模型审视修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +#precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/.. +for((RANK_ID=$RANK_ID_START;RANK_ID<1;RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + #export RANK_ID=$RANK_ID + #export ASCEND_DEVICE_ID=$RANK_ID + ASCEND_DEVICE_ID=$RANK_ID + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + # 绑核,不需要的绑核的模型删除,需要的模型审视修改 + #let a=RANK_ID*12 + #let b=RANK_ID+1 + #let c=b*12-1 + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + +done +wait + + + +export MASTER_ADDR=localhost +export MASTER_PORT=29688 +export HCCL_WHITELIST_DISABLE=1 + +NPUS=($(seq 0 7)) +export NPU_WORLD_SIZE=${#NPUS[@]} +rank=0 +for i in ${NPUS[@]} +do + export NPU_CALCULATE_DEVICE=${i} + export RANK=${rank} + echo run process ${rank} + python3 main_8p.py \ + ${data_path} \ + -a resnet152 \ + --seed=49 \ + --learning-rate=0.8 \ + --mom=0.9 \ + --weight-decay=1.0e-04 \ + --print-freq=1 \ + --epochs=1 \ + --batch-size=256 \ + --gpu ${i} \ + --FusedSGD \ + --loss-scale=1024 \ + --amp > $cur_path/output/${ASCEND_DEVICE_ID}/train_${i}.log 2>&1 & + let rank++ +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch: $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/test/train_performance_16p.sh b/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/test/train_performance_16p.sh index e713468687efbd9f4806d8af1d987a3379ed8619..061b47e07b883992129e854b59f1f677043c631a 100644 --- a/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/test/train_performance_16p.sh +++ b/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch/test/train_performance_16p.sh @@ -1,213 +1,213 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#source env.sh -#集合通信参数,不需要修改 - -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path="" -conf_path="" -server_index="" -fix_node_ip="" -devicesnum="" - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="ResNet152_ID0424_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=4096 -#训练step -#train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.5 - -#TF2.X独有,需要模型审视修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_performance_1P.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --fix_node_ip* ]];then - fix_node_ip=`echo ${para#*=}` - elif [[ $para == --devicesnum* ]];then - devicesnum=`echo ${para#*=}` - elif [[ $para == --conf_path* ]];then - conf_path=`echo ${para#*=}` - elif [[ $para == --server_index* ]];then - server_index=`echo ${para#*=}` - fi -done - -one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` -linux_num=`find $conf_path -name "server_*.info" |wc -l` - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -export HCCL_IF_IP=$fix_node_ip -export MASTER_ADDR=$one_node_ip -export MASTER_PORT=29688 -export HCCL_WHITELIST_DISABLE=1 -device_num=${#devicesnum} -devices_num=`awk 'BEGIN{printf "%.0f\n",'${device_num}'-1}'` - -NPUS=($(seq 0 $devices_num)) -rank_server=`awk 'BEGIN{printf "%.0f\n",'${device_num}'*'${server_index}'}'` -export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",'${device_num}'*'${linux_num}'}'` - - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/.. -rank=0 -for((RANK_ID=$RANK_ID_START;RANK_ID<1;RANK_ID++)); -do - #设置环境变量,不需要修改 - ASCEND_DEVICE_ID=$RANK_ID - echo "Device ID: $ASCEND_DEVICE_ID" - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - # 绑核,不需要的绑核的模型删除,需要的模型审视修改 - #let a=RANK_ID*12 - #let b=RANK_ID+1 - #let c=b*12-1 - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - nohup python3 ${cur_path}/../main.py \ - ${data_path} \ - -a resnet152 \ - --addr=$one_node_ip \ - --seed=49 \ - --workers=$(nproc) \ - --learning-rate=${learning_rate} \ - --mom=0.9 \ - --weight-decay=1.0e-04 \ - --print-freq=1 \ - --dist-url='tcp://127.0.0.1:50000' \ - --multiprocessing-distributed \ - --world-size=2 \ - --rank=${server_index} \ - --device='npu' \ - --dist-backend='hccl' \ - --epochs=${train_epochs} \ - --batch-size=${batch_size} \ - --amp \ - --device_list=0,1,2,3,4,5,6,7 \ - --FusedSGD \ - --stop-step-num=128 \ - --loss-scale=1024 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -done -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -fps=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` -FPS=`awk 'BEGIN{printf "%.2f\n",'${fps}'*2}'` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#source env.sh +#集合通信参数,不需要修改 + +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" +conf_path="" +server_index="" +fix_node_ip="" +devicesnum="" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="ResNet152_ID0424_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=4096 +#训练step +#train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.5 + +#TF2.X独有,需要模型审视修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +#precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --fix_node_ip* ]];then + fix_node_ip=`echo ${para#*=}` + elif [[ $para == --devicesnum* ]];then + devicesnum=`echo ${para#*=}` + elif [[ $para == --conf_path* ]];then + conf_path=`echo ${para#*=}` + elif [[ $para == --server_index* ]];then + server_index=`echo ${para#*=}` + fi +done + +one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` +linux_num=`find $conf_path -name "server_*.info" |wc -l` + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +export HCCL_IF_IP=$fix_node_ip +export MASTER_ADDR=$one_node_ip +export MASTER_PORT=29688 +export HCCL_WHITELIST_DISABLE=1 +device_num=${#devicesnum} +devices_num=`awk 'BEGIN{printf "%.0f\n",'${device_num}'-1}'` + +NPUS=($(seq 0 $devices_num)) +rank_server=`awk 'BEGIN{printf "%.0f\n",'${device_num}'*'${server_index}'}'` +export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",'${device_num}'*'${linux_num}'}'` + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/.. +rank=0 +for((RANK_ID=$RANK_ID_START;RANK_ID<1;RANK_ID++)); +do + #设置环境变量,不需要修改 + ASCEND_DEVICE_ID=$RANK_ID + echo "Device ID: $ASCEND_DEVICE_ID" + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + # 绑核,不需要的绑核的模型删除,需要的模型审视修改 + #let a=RANK_ID*12 + #let b=RANK_ID+1 + #let c=b*12-1 + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup python3 ${cur_path}/../main.py \ + ${data_path} \ + -a resnet152 \ + --addr=$one_node_ip \ + --seed=49 \ + --workers=$(nproc) \ + --learning-rate=${learning_rate} \ + --mom=0.9 \ + --weight-decay=1.0e-04 \ + --print-freq=1 \ + --dist-url='tcp://127.0.0.1:50000' \ + --multiprocessing-distributed \ + --world-size=2 \ + --rank=${server_index} \ + --device='npu' \ + --dist-backend='hccl' \ + --epochs=${train_epochs} \ + --batch-size=${batch_size} \ + --amp \ + --device_list=0,1,2,3,4,5,6,7 \ + --FusedSGD \ + --stop-step-num=128 \ + --loss-scale=1024 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +fps=`grep -a 'FPS' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F " " '{print $NF}'|awk 'END {print}'` +FPS=`awk 'BEGIN{printf "%.2f\n",'${fps}'*2}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a '* Acc@1' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Epoch: $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/SEResNext_ID0415_for_PyTorch/README_ori.md b/PyTorch/dev/cv/image_classification/SEResNext_ID0415_for_PyTorch/README_ori.md index dc0d40de7e7698b34ccbf7eba07d61628d13f9f2..3847a5f095590b08fbcfafed32022ac8229647e0 100644 --- a/PyTorch/dev/cv/image_classification/SEResNext_ID0415_for_PyTorch/README_ori.md +++ b/PyTorch/dev/cv/image_classification/SEResNext_ID0415_for_PyTorch/README_ori.md @@ -1,118 +1,118 @@ -# Convolutional Networks for Image Classification in PyTorch - -In this repository you will find implementations of various image classification models. - -Detailed information on each model can be found here: - -## Table Of Contents - -* [Models](#models) -* [Validation accuracy results](#validation-accuracy-results) -* [Training performance results](#training-performance-results) - * [Training performance: NVIDIA DGX A100 (8x A100 40GB)](#training-performance-nvidia-dgx-a100-8x-a100-40gb) - * [Training performance: NVIDIA DGX-1 16GB (8x V100 16GB)](#training-performance-nvidia-dgx-1-16gb-8x-v100-16gb) - * [Training performance: NVIDIA DGX-2 (16x V100 32GB)](#training-performance-nvidia-dgx-2-16x-v100-32gb) -* [Model comparison](#model-comparison) - * [Accuracy vs FLOPS](#accuracy-vs-flops) - * [Latency vs Throughput on different batch sizes](#latency-vs-throughput-on-different-batch-sizes) - -## Models - -The following table provides links to where you can find additional information on each model: - -| **Model** | **Link**| -|:-:|:-:| -| resnet50 | [README](./resnet50v1.5/README.md) | -| resnext101-32x4d | [README](./resnext101-32x4d/README.md) | -| se-resnext101-32x4d | [README](./se-resnext101-32x4d/README.md) | - -## Validation accuracy results - -Our results were obtained by running the applicable -training scripts in the [framework-container-name] NGC container -on NVIDIA DGX-1 with (8x V100 16GB) GPUs. -The specific training script that was run is documented -in the corresponding model's README. - - -The following table shows the validation accuracy results of the -three classification models side-by-side. - - -| **arch** | **AMP Top1** | **AMP Top5** | **FP32 Top1** | **FP32 Top5** | -|:-:|:-:|:-:|:-:|:-:| -| resnet50 | 78.46 | 94.15 | 78.50 | 94.11 | -| resnext101-32x4d | 80.08 | 94.89 | 80.14 | 95.02 | -| se-resnext101-32x4d | 81.01 | 95.52 | 81.12 | 95.54 | - - -## Training performance results - -### Training performance: NVIDIA DGX A100 (8x A100 40GB) - - -Our results were obtained by running the applicable -training scripts in the pytorch-20.06 NGC container -on NVIDIA DGX A100 with (8x A100 40GB) GPUs. -Performance numbers (in images per second) -were averaged over an entire training epoch. -The specific training script that was run is documented -in the corresponding model's README. - -The following table shows the training accuracy results of the -three classification models side-by-side. - - -| **arch** | **Mixed Precision** | **TF32** | **Mixed Precision Speedup** | -|:-------------------:|:-------------------:|:-------------:|:---------------------------:| -| resnet50 | 9488.39 img/s | 5322.10 img/s | 1.78x | -| resnext101-32x4d | 6758.98 img/s | 2353.25 img/s | 2.87x | -| se-resnext101-32x4d | 4670.72 img/s | 2011.21 img/s | 2.32x | - -ResNeXt and SE-ResNeXt use [NHWC data layout](https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html) when training using Mixed Precision, -which improves the model performance. We are currently working on adding it for ResNet. - - -### Training performance: NVIDIA DGX-1 16G (8x V100 16GB) - - -Our results were obtained by running the applicable -training scripts in the pytorch-20.06 NGC container -on NVIDIA DGX-1 with (8x V100 16GB) GPUs. -Performance numbers (in images per second) -were averaged over an entire training epoch. -The specific training script that was run is documented -in the corresponding model's README. - -The following table shows the training accuracy results of the -three classification models side-by-side. - - -| **arch** | **Mixed Precision** | **FP32** | **Mixed Precision Speedup** | -|:-------------------:|:-------------------:|:-------------:|:---------------------------:| -| resnet50 | 6565.61 img/s | 2869.19 img/s | 2.29x | -| resnext101-32x4d | 3922.74 img/s | 1136.30 img/s | 3.45x | -| se-resnext101-32x4d | 2651.13 img/s | 982.78 img/s | 2.70x | - -ResNeXt and SE-ResNeXt use [NHWC data layout](https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html) when training using Mixed Precision, -which improves the model performance. We are currently working on adding it for ResNet. - - -## Model Comparison - -### Accuracy vs FLOPS -![ACCvsFLOPS](./img/ACCvsFLOPS.png) - -Plot describes relationship between floating point operations -needed for computing forward pass on a 224px x 224px image, -for the implemented models. -Dot size indicates number of trainable parameters. - -### Latency vs Throughput on different batch sizes -![LATvsTHR](./img/LATvsTHR.png) - -Plot describes relationship between -inference latency, throughput and batch size -for the implemented models. - - +# Convolutional Networks for Image Classification in PyTorch + +In this repository you will find implementations of various image classification models. + +Detailed information on each model can be found here: + +## Table Of Contents + +* [Models](#models) +* [Validation accuracy results](#validation-accuracy-results) +* [Training performance results](#training-performance-results) + * [Training performance: NVIDIA DGX A100 (8x A100 40GB)](#training-performance-nvidia-dgx-a100-8x-a100-40gb) + * [Training performance: NVIDIA DGX-1 16GB (8x V100 16GB)](#training-performance-nvidia-dgx-1-16gb-8x-v100-16gb) + * [Training performance: NVIDIA DGX-2 (16x V100 32GB)](#training-performance-nvidia-dgx-2-16x-v100-32gb) +* [Model comparison](#model-comparison) + * [Accuracy vs FLOPS](#accuracy-vs-flops) + * [Latency vs Throughput on different batch sizes](#latency-vs-throughput-on-different-batch-sizes) + +## Models + +The following table provides links to where you can find additional information on each model: + +| **Model** | **Link**| +|:-:|:-:| +| resnet50 | [README](./resnet50v1.5/README.md) | +| resnext101-32x4d | [README](./resnext101-32x4d/README.md) | +| se-resnext101-32x4d | [README](./se-resnext101-32x4d/README.md) | + +## Validation accuracy results + +Our results were obtained by running the applicable +training scripts in the [framework-container-name] NGC container +on NVIDIA DGX-1 with (8x V100 16GB) GPUs. +The specific training script that was run is documented +in the corresponding model's README. + + +The following table shows the validation accuracy results of the +three classification models side-by-side. + + +| **arch** | **AMP Top1** | **AMP Top5** | **FP32 Top1** | **FP32 Top5** | +|:-:|:-:|:-:|:-:|:-:| +| resnet50 | 78.46 | 94.15 | 78.50 | 94.11 | +| resnext101-32x4d | 80.08 | 94.89 | 80.14 | 95.02 | +| se-resnext101-32x4d | 81.01 | 95.52 | 81.12 | 95.54 | + + +## Training performance results + +### Training performance: NVIDIA DGX A100 (8x A100 40GB) + + +Our results were obtained by running the applicable +training scripts in the pytorch-20.06 NGC container +on NVIDIA DGX A100 with (8x A100 40GB) GPUs. +Performance numbers (in images per second) +were averaged over an entire training epoch. +The specific training script that was run is documented +in the corresponding model's README. + +The following table shows the training accuracy results of the +three classification models side-by-side. + + +| **arch** | **Mixed Precision** | **TF32** | **Mixed Precision Speedup** | +|:-------------------:|:-------------------:|:-------------:|:---------------------------:| +| resnet50 | 9488.39 img/s | 5322.10 img/s | 1.78x | +| resnext101-32x4d | 6758.98 img/s | 2353.25 img/s | 2.87x | +| se-resnext101-32x4d | 4670.72 img/s | 2011.21 img/s | 2.32x | + +ResNeXt and SE-ResNeXt use [NHWC data layout](https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html) when training using Mixed Precision, +which improves the model performance. We are currently working on adding it for ResNet. + + +### Training performance: NVIDIA DGX-1 16G (8x V100 16GB) + + +Our results were obtained by running the applicable +training scripts in the pytorch-20.06 NGC container +on NVIDIA DGX-1 with (8x V100 16GB) GPUs. +Performance numbers (in images per second) +were averaged over an entire training epoch. +The specific training script that was run is documented +in the corresponding model's README. + +The following table shows the training accuracy results of the +three classification models side-by-side. + + +| **arch** | **Mixed Precision** | **FP32** | **Mixed Precision Speedup** | +|:-------------------:|:-------------------:|:-------------:|:---------------------------:| +| resnet50 | 6565.61 img/s | 2869.19 img/s | 2.29x | +| resnext101-32x4d | 3922.74 img/s | 1136.30 img/s | 3.45x | +| se-resnext101-32x4d | 2651.13 img/s | 982.78 img/s | 2.70x | + +ResNeXt and SE-ResNeXt use [NHWC data layout](https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html) when training using Mixed Precision, +which improves the model performance. We are currently working on adding it for ResNet. + + +## Model Comparison + +### Accuracy vs FLOPS +![ACCvsFLOPS](./img/ACCvsFLOPS.png) + +Plot describes relationship between floating point operations +needed for computing forward pass on a 224px x 224px image, +for the implemented models. +Dot size indicates number of trainable parameters. + +### Latency vs Throughput on different batch sizes +![LATvsTHR](./img/LATvsTHR.png) + +Plot describes relationship between +inference latency, throughput and batch size +for the implemented models. + + diff --git a/PyTorch/dev/cv/image_classification/SINGLESHOTPOSE_ID0869_for_PyTorch/requirements.txt b/PyTorch/dev/cv/image_classification/SINGLESHOTPOSE_ID0869_for_PyTorch/requirements.txt index 8367c68c70bbf939fb99178b36a63a04bfef92ae..b0b3b089fc4edf671c91f863d00f3b09251a0cd8 100644 --- a/PyTorch/dev/cv/image_classification/SINGLESHOTPOSE_ID0869_for_PyTorch/requirements.txt +++ b/PyTorch/dev/cv/image_classification/SINGLESHOTPOSE_ID0869_for_PyTorch/requirements.txt @@ -1,3 +1,3 @@ -torch==1.5.0 -apex +torch==1.5.0 +apex torchvision \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/SlowFast_ID0646_for_PyTorch/detectron2/projects/Panoptic-DeepLab/configs/COCO-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_200k_bs64_crop_640_640_coco_dsconv.yaml b/PyTorch/dev/cv/image_classification/SlowFast_ID0646_for_PyTorch/detectron2/projects/Panoptic-DeepLab/configs/COCO-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_200k_bs64_crop_640_640_coco_dsconv.yaml deleted file mode 100644 index 6944c6fdf3dcaafdc0a740188610fe604cb7d3be..0000000000000000000000000000000000000000 --- a/PyTorch/dev/cv/image_classification/SlowFast_ID0646_for_PyTorch/detectron2/projects/Panoptic-DeepLab/configs/COCO-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_200k_bs64_crop_640_640_coco_dsconv.yaml +++ /dev/null @@ -1,42 +0,0 @@ -_BASE_: ../Cityscapes-PanopticSegmentation/Base-PanopticDeepLab-OS16.yaml -MODEL: - WEIGHTS: "detectron2://DeepLab/R-52.pkl" - PIXEL_MEAN: [123.675, 116.280, 103.530] - PIXEL_STD: [58.395, 57.120, 57.375] - BACKBONE: - NAME: "build_resnet_deeplab_backbone" - RESNETS: - DEPTH: 50 - NORM: "SyncBN" - RES5_MULTI_GRID: [1, 2, 4] - STEM_TYPE: "deeplab" - STEM_OUT_CHANNELS: 128 - STRIDE_IN_1X1: False - SEM_SEG_HEAD: - NUM_CLASSES: 133 - LOSS_TOP_K: 1.0 - USE_DEPTHWISE_SEPARABLE_CONV: True - PANOPTIC_DEEPLAB: - STUFF_AREA: 4096 - NMS_KERNEL: 41 - SIZE_DIVISIBILITY: 640 - USE_DEPTHWISE_SEPARABLE_CONV: True -DATASETS: - TRAIN: ("coco_2017_train_panoptic",) - TEST: ("coco_2017_val_panoptic",) -SOLVER: - BASE_LR: 0.0005 - MAX_ITER: 200000 - IMS_PER_BATCH: 64 -INPUT: - FORMAT: "RGB" - GAUSSIAN_SIGMA: 8 - MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 640) for x in range(5, 16)]"] - MIN_SIZE_TRAIN_SAMPLING: "choice" - MIN_SIZE_TEST: 640 - MAX_SIZE_TRAIN: 960 - MAX_SIZE_TEST: 640 - CROP: - ENABLED: True - TYPE: "absolute" - SIZE: (640, 640) diff --git a/PyTorch/dev/cv/image_classification/SlowFast_ID0646_for_PyTorch/detectron2/projects/Panoptic-DeepLab/configs/Cityscapes-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_90k_bs32_crop_512_1024_dsconv.yaml b/PyTorch/dev/cv/image_classification/SlowFast_ID0646_for_PyTorch/detectron2/projects/Panoptic-DeepLab/configs/Cityscapes-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_90k_bs32_crop_512_1024_dsconv.yaml deleted file mode 100644 index 8e314204c9b464993d92d3b4d95e2aa9b287b938..0000000000000000000000000000000000000000 --- a/PyTorch/dev/cv/image_classification/SlowFast_ID0646_for_PyTorch/detectron2/projects/Panoptic-DeepLab/configs/Cityscapes-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_90k_bs32_crop_512_1024_dsconv.yaml +++ /dev/null @@ -1,24 +0,0 @@ -_BASE_: Base-PanopticDeepLab-OS16.yaml -MODEL: - WEIGHTS: "detectron2://DeepLab/R-52.pkl" - PIXEL_MEAN: [123.675, 116.280, 103.530] - PIXEL_STD: [58.395, 57.120, 57.375] - BACKBONE: - NAME: "build_resnet_deeplab_backbone" - RESNETS: - DEPTH: 50 - NORM: "SyncBN" - RES5_MULTI_GRID: [1, 2, 4] - STEM_TYPE: "deeplab" - STEM_OUT_CHANNELS: 128 - STRIDE_IN_1X1: False - PANOPTIC_DEEPLAB: - USE_DEPTHWISE_SEPARABLE_CONV: True - SEM_SEG_HEAD: - USE_DEPTHWISE_SEPARABLE_CONV: True -SOLVER: - MAX_ITER: 90000 -INPUT: - FORMAT: "RGB" - CROP: - SIZE: (512, 1024) diff --git a/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/ README.md b/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/ README.md index 5e97ad0585242345f611406fef5e1bb48e6c2b56..acca051a72e6f147c271bb78ac493151bceb1fe5 100644 --- a/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/ README.md +++ b/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/ README.md @@ -1,262 +1,262 @@ -# VGGNet-PyTorch - -### Update (Feb 14, 2020) - -The update is for ease of use and deployment. - - * [Example: Export to ONNX](#example-export-to-onnx) - * [Example: Extract features](#example-feature-extraction) - * [Example: Visual](#example-visual) - -It is also now incredibly simple to load a pretrained model with a new number of classes for transfer learning: - -```python -from vgg_pytorch import VGG -model = VGG.from_pretrained('vgg11', num_classes=10) -``` - -### Update (January 15, 2020) - -This update allows you to use NVIDIA's Apex tool for accelerated training. By default choice `hybrid training precision` + `dynamic loss amplified` version, if you need to learn more and details about `apex` tools, please visit https://github.com/NVIDIA/apex. - -### Update (January 9, 2020) - -This update adds a visual interface for testing, which is developed by pyqt5. At present, it has realized basic functions, and other functions will be gradually improved in the future. - -### Update (January 6, 2020) - -This update adds a modular neural network, making it more flexible in use. It can be deployed to many common dataset classification tasks. Of course, it can also be used in your products. - -### Overview -This repository contains an op-for-op PyTorch reimplementation of [VGGNet](https://arxiv.org/pdf/1409.1556.pdf). - -The goal of this implementation is to be simple, highly extensible, and easy to integrate into your own projects. This implementation is a work in progress -- new features are currently being implemented. - -At the moment, you can easily: - * Load pretrained VGGNet models - * Use VGGNet models for classification or feature extraction - -_Upcoming features_: In the next few days, you will be able to: - * Quickly finetune an VGGNet on your own dataset - * Export VGGNet models for production - -### Table of contents -1. [About VGG](#about-vgg) -2. [Installation](#installation) -3. [Usage](#usage) - * [Load pretrained models](#loading-pretrained-models) - * [Example: Classify](#example-classification) - * [Example: Extract features](#example-feature-extraction) - * [Example: Export to ONNX](#example-export-to-onnx) - * [Example: Visual](#example-visual) -4. [Contributing](#contributing) - -### About VGG - -If you're new to VGGNets, here is an explanation straight from the official PyTorch implementation: - -In this work we investigate the effect of the convolutional network depth on its -accuracy in the large-scale image recognition setting. Our main contribution is -a thorough evaluation of networks of increasing depth using an architecture with -very small (3 × 3) convolution filters, which shows that a significant improvement -on the prior-art configurations can be achieved by pushing the depth to 16–19 -weight layers. These findings were the basis of our ImageNet Challenge 2014 -submission, where our team secured the first and the second places in the localisation and classification tracks respectively. We also show that our representations -generalise well to other datasets, where they achieve state-of-the-art results. We -have made our two best-performing ConvNet models publicly available to facilitate further research on the use of deep visual representations in computer vision. - -### Installation - -Install from pypi: -```bash -$ pip3 install vgg_pytorch -``` - -Install from source: -```bash -$ git clone https://github.com/Lornatang/VGGNet-PyTorch.git -$ cd VGGNet-PyTorch -$ pip3 install -e . -``` - -### Usage - -#### Loading pretrained models - -Load an vgg11 network: -```python -from vgg_pytorch import VGG -model = VGG.from_name("vgg11") -``` - -Load a pretrained vgg11: -```python -from vgg_pytorch import VGG -model = VGG.from_pretrained("vgg11") -``` - -Their 1-crop error rates on imagenet dataset with pretrained models are listed below. - -| Model structure | Top-1 error | Top-5 error | -| --------------- | ----------- | ----------- | -| vgg11 | 30.98 | 11.37 | -| vgg11_bn | 29.70 | 10.19 | -| vgg13 | 30.07 | 10.75 | -| vgg13_bn | 28.45 | 9.63 | -| vgg16 | 28.41 | 9.62 | -| vgg16_bn | 26.63 | 8.50 | -| vgg19 | 27.62 | 9.12 | -| vgg19_bn | 25.76 | 8.15 | - -Details about the models are below (for CIFAR10 dataset): - -| *Name* |*# Params*|*Top-1 Acc.*|*Pretrained?*| -|:-----------------:|:--------:|:----------:|:-----------:| -| `vgg11` | 132.9M | 91.1 | √ | -| `vgg13` | 133M | 92.8 | √ | -| `vgg16` | 138.4M | 92.6 | √ | -| `vgg19` | 143.7M | 92.3 | √ | -|-------------------|----------|------------|-------------| -| `vgg11_bn` | 132.9M | 92.2 | √ | -| `vgg13_bn` | 133M | 94.2 | √ | -| `vgg16_bn` | 138.4M | 93.9 | √ | -| `vgg19_bn` | 143.7M | 93.7 | √ | - - -#### Example: Classification - -We assume that in your current directory, there is a `img.jpg` file and a `labels_map.txt` file (ImageNet class names). These are both included in `examples/simple`. - -All pre-trained models expect input images normalized in the same way, -i.e. mini-batches of 3-channel RGB images of shape `(3 x H x W)`, where `H` and `W` are expected to be at least `224`. -The images have to be loaded in to a range of `[0, 1]` and then normalized using `mean = [0.485, 0.456, 0.406]` -and `std = [0.229, 0.224, 0.225]`. - -Here's a sample execution. - -```python -import json - -import torch -import torchvision.transforms as transforms -from PIL import Image - -from vgg_pytorch import VGG - -# Open image -input_image = Image.open("img.jpg") - -# Preprocess image -preprocess = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), -]) -input_tensor = preprocess(input_image) -input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model - -# Load class names -labels_map = json.load(open("labels_map.txt")) -labels_map = [labels_map[str(i)] for i in range(1000)] - -# Classify with VGG11 -model = VGG.from_pretrained("vgg11") -model.eval() - -# move the input and model to GPU for speed if available -if torch.cuda.is_available(): - input_batch = input_batch.to("cuda") - model.to("cuda") - -with torch.no_grad(): - logits = model(input_batch) -preds = torch.topk(logits, k=5).indices.squeeze(0).tolist() - -print("-----") -for idx in preds: - label = labels_map[idx] - prob = torch.softmax(logits, dim=1)[0, idx].item() - print(f"{label:<75} ({prob * 100:.2f}%)") -``` - -#### Example: Feature Extraction - -You can easily extract features with `model.extract_features`: -```python -import torch -from vgg_pytorch import VGG -model = VGG.from_pretrained('vgg11') - -# ... image preprocessing as in the classification example ... -inputs = torch.randn(1, 3, 224, 224) -print(inputs.shape) # torch.Size([1, 3, 224, 224]) - -features = model.extract_features(inputs) -print(features.shape) # torch.Size([1, 512, 7, 7]) -``` - -#### Example: Export to ONNX - -Exporting to ONNX for deploying to production is now simple: -```python -import torch -from vgg_pytorch import VGG - -model = VGG.from_pretrained('vgg11') -dummy_input = torch.randn(16, 3, 224, 224) - -torch.onnx.export(model, dummy_input, "demo.onnx", verbose=True) -``` - -#### Example: Visual - -```text -cd $REPO$/framework -sh start.sh -``` - -Then open the browser and type in the browser address [http://127.0.0.1:8000/](http://127.0.0.1:8000/). - -Enjoy it. - -#### ImageNet - -See `examples/imagenet` for details about evaluating on ImageNet. - -For more datasets result. Please see `research/README.md`. - -### Contributing - -If you find a bug, create a GitHub issue, or even better, submit a pull request. Similarly, if you have questions, simply post them as GitHub issues. - -I look forward to seeing what the community does with these models! - -### Credit - -#### Very Deep Convolutional Networks for Large-Scale Image Recognition - -*Karen Simonyan, Andrew Zisserman* - -##### Abstract - -In this work we investigate the effect of the convolutional network depth on its accuracy in the -large-scale image recognition setting. Our main contribution is a thorough evaluation of networks -of increasing depth using an architecture with very small (3x3) convolution filters, which shows -that a significant improvement on the prior-art configurations can be achieved by pushing the depth -to 16-19 weight layers. These findings were the basis of our ImageNet Challenge 2014 submission, -where our team secured the first and the second places in the localisation and classification tracks -respectively. We also show that our representations generalise well to other datasets, where they -achieve state-of-the-art results. We have made our two best-performing ConvNet models publicly -available to facilitate further research on the use of deep visual representations in computer vision. - -[paper](https://arxiv.org/abs/1409.1556) - -```text -@article{VGG, -title:{Very Deep Convolutional Networks for Large-Scale Image Recognition}, -author:{Karen Simonyan, Andrew Zisserman}, -journal={iclr}, -year={2015} -} +# VGGNet-PyTorch + +### Update (Feb 14, 2020) + +The update is for ease of use and deployment. + + * [Example: Export to ONNX](#example-export-to-onnx) + * [Example: Extract features](#example-feature-extraction) + * [Example: Visual](#example-visual) + +It is also now incredibly simple to load a pretrained model with a new number of classes for transfer learning: + +```python +from vgg_pytorch import VGG +model = VGG.from_pretrained('vgg11', num_classes=10) +``` + +### Update (January 15, 2020) + +This update allows you to use NVIDIA's Apex tool for accelerated training. By default choice `hybrid training precision` + `dynamic loss amplified` version, if you need to learn more and details about `apex` tools, please visit https://github.com/NVIDIA/apex. + +### Update (January 9, 2020) + +This update adds a visual interface for testing, which is developed by pyqt5. At present, it has realized basic functions, and other functions will be gradually improved in the future. + +### Update (January 6, 2020) + +This update adds a modular neural network, making it more flexible in use. It can be deployed to many common dataset classification tasks. Of course, it can also be used in your products. + +### Overview +This repository contains an op-for-op PyTorch reimplementation of [VGGNet](https://arxiv.org/pdf/1409.1556.pdf). + +The goal of this implementation is to be simple, highly extensible, and easy to integrate into your own projects. This implementation is a work in progress -- new features are currently being implemented. + +At the moment, you can easily: + * Load pretrained VGGNet models + * Use VGGNet models for classification or feature extraction + +_Upcoming features_: In the next few days, you will be able to: + * Quickly finetune an VGGNet on your own dataset + * Export VGGNet models for production + +### Table of contents +1. [About VGG](#about-vgg) +2. [Installation](#installation) +3. [Usage](#usage) + * [Load pretrained models](#loading-pretrained-models) + * [Example: Classify](#example-classification) + * [Example: Extract features](#example-feature-extraction) + * [Example: Export to ONNX](#example-export-to-onnx) + * [Example: Visual](#example-visual) +4. [Contributing](#contributing) + +### About VGG + +If you're new to VGGNets, here is an explanation straight from the official PyTorch implementation: + +In this work we investigate the effect of the convolutional network depth on its +accuracy in the large-scale image recognition setting. Our main contribution is +a thorough evaluation of networks of increasing depth using an architecture with +very small (3 × 3) convolution filters, which shows that a significant improvement +on the prior-art configurations can be achieved by pushing the depth to 16–19 +weight layers. These findings were the basis of our ImageNet Challenge 2014 +submission, where our team secured the first and the second places in the localisation and classification tracks respectively. We also show that our representations +generalise well to other datasets, where they achieve state-of-the-art results. We +have made our two best-performing ConvNet models publicly available to facilitate further research on the use of deep visual representations in computer vision. + +### Installation + +Install from pypi: +```bash +$ pip3 install vgg_pytorch +``` + +Install from source: +```bash +$ git clone https://github.com/Lornatang/VGGNet-PyTorch.git +$ cd VGGNet-PyTorch +$ pip3 install -e . +``` + +### Usage + +#### Loading pretrained models + +Load an vgg11 network: +```python +from vgg_pytorch import VGG +model = VGG.from_name("vgg11") +``` + +Load a pretrained vgg11: +```python +from vgg_pytorch import VGG +model = VGG.from_pretrained("vgg11") +``` + +Their 1-crop error rates on imagenet dataset with pretrained models are listed below. + +| Model structure | Top-1 error | Top-5 error | +| --------------- | ----------- | ----------- | +| vgg11 | 30.98 | 11.37 | +| vgg11_bn | 29.70 | 10.19 | +| vgg13 | 30.07 | 10.75 | +| vgg13_bn | 28.45 | 9.63 | +| vgg16 | 28.41 | 9.62 | +| vgg16_bn | 26.63 | 8.50 | +| vgg19 | 27.62 | 9.12 | +| vgg19_bn | 25.76 | 8.15 | + +Details about the models are below (for CIFAR10 dataset): + +| *Name* |*# Params*|*Top-1 Acc.*|*Pretrained?*| +|:-----------------:|:--------:|:----------:|:-----------:| +| `vgg11` | 132.9M | 91.1 | √ | +| `vgg13` | 133M | 92.8 | √ | +| `vgg16` | 138.4M | 92.6 | √ | +| `vgg19` | 143.7M | 92.3 | √ | +|-------------------|----------|------------|-------------| +| `vgg11_bn` | 132.9M | 92.2 | √ | +| `vgg13_bn` | 133M | 94.2 | √ | +| `vgg16_bn` | 138.4M | 93.9 | √ | +| `vgg19_bn` | 143.7M | 93.7 | √ | + + +#### Example: Classification + +We assume that in your current directory, there is a `img.jpg` file and a `labels_map.txt` file (ImageNet class names). These are both included in `examples/simple`. + +All pre-trained models expect input images normalized in the same way, +i.e. mini-batches of 3-channel RGB images of shape `(3 x H x W)`, where `H` and `W` are expected to be at least `224`. +The images have to be loaded in to a range of `[0, 1]` and then normalized using `mean = [0.485, 0.456, 0.406]` +and `std = [0.229, 0.224, 0.225]`. + +Here's a sample execution. + +```python +import json + +import torch +import torchvision.transforms as transforms +from PIL import Image + +from vgg_pytorch import VGG + +# Open image +input_image = Image.open("img.jpg") + +# Preprocess image +preprocess = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), +]) +input_tensor = preprocess(input_image) +input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model + +# Load class names +labels_map = json.load(open("labels_map.txt")) +labels_map = [labels_map[str(i)] for i in range(1000)] + +# Classify with VGG11 +model = VGG.from_pretrained("vgg11") +model.eval() + +# move the input and model to GPU for speed if available +if torch.cuda.is_available(): + input_batch = input_batch.to("cuda") + model.to("cuda") + +with torch.no_grad(): + logits = model(input_batch) +preds = torch.topk(logits, k=5).indices.squeeze(0).tolist() + +print("-----") +for idx in preds: + label = labels_map[idx] + prob = torch.softmax(logits, dim=1)[0, idx].item() + print(f"{label:<75} ({prob * 100:.2f}%)") +``` + +#### Example: Feature Extraction + +You can easily extract features with `model.extract_features`: +```python +import torch +from vgg_pytorch import VGG +model = VGG.from_pretrained('vgg11') + +# ... image preprocessing as in the classification example ... +inputs = torch.randn(1, 3, 224, 224) +print(inputs.shape) # torch.Size([1, 3, 224, 224]) + +features = model.extract_features(inputs) +print(features.shape) # torch.Size([1, 512, 7, 7]) +``` + +#### Example: Export to ONNX + +Exporting to ONNX for deploying to production is now simple: +```python +import torch +from vgg_pytorch import VGG + +model = VGG.from_pretrained('vgg11') +dummy_input = torch.randn(16, 3, 224, 224) + +torch.onnx.export(model, dummy_input, "demo.onnx", verbose=True) +``` + +#### Example: Visual + +```text +cd $REPO$/framework +sh start.sh +``` + +Then open the browser and type in the browser address [http://127.0.0.1:8000/](http://127.0.0.1:8000/). + +Enjoy it. + +#### ImageNet + +See `examples/imagenet` for details about evaluating on ImageNet. + +For more datasets result. Please see `research/README.md`. + +### Contributing + +If you find a bug, create a GitHub issue, or even better, submit a pull request. Similarly, if you have questions, simply post them as GitHub issues. + +I look forward to seeing what the community does with these models! + +### Credit + +#### Very Deep Convolutional Networks for Large-Scale Image Recognition + +*Karen Simonyan, Andrew Zisserman* + +##### Abstract + +In this work we investigate the effect of the convolutional network depth on its accuracy in the +large-scale image recognition setting. Our main contribution is a thorough evaluation of networks +of increasing depth using an architecture with very small (3x3) convolution filters, which shows +that a significant improvement on the prior-art configurations can be achieved by pushing the depth +to 16-19 weight layers. These findings were the basis of our ImageNet Challenge 2014 submission, +where our team secured the first and the second places in the localisation and classification tracks +respectively. We also show that our representations generalise well to other datasets, where they +achieve state-of-the-art results. We have made our two best-performing ConvNet models publicly +available to facilitate further research on the use of deep visual representations in computer vision. + +[paper](https://arxiv.org/abs/1409.1556) + +```text +@article{VGG, +title:{Very Deep Convolutional Networks for Large-Scale Image Recognition}, +author:{Karen Simonyan, Andrew Zisserman}, +journal={iclr}, +year={2015} +} ``` \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/LICENSE b/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/LICENSE index deeea2d8ccdb1354f351a6ea02ed456849d51422..b09cd7856d58590578ee1a4f3ad45d1310a97f87 100644 --- a/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/LICENSE +++ b/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/LICENSE @@ -1,201 +1,201 @@ -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/VGGNet_for_PyTorch/modelzoo_level.txt b/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/VGGNet_for_PyTorch/modelzoo_level.txt index 5afcef9188bf9d39f1e34b45bd91324c6093137a..3117fffc3be7f5c479f10f09ba38a25c47739a00 100644 --- a/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/VGGNet_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/VGGNet_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/VGGNet_for_PyTorch/requirements.txt b/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/VGGNet_for_PyTorch/requirements.txt index 420b1d1f4aac66daaa4f127fb4954bf98af238d6..a9df0d18fc5856758e7a4736738fc8a7415b484c 100644 --- a/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/VGGNet_for_PyTorch/requirements.txt +++ b/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/VGGNet_for_PyTorch/requirements.txt @@ -1,16 +1,16 @@ -# progress bars in model download and training scripts -tqdm -# Accessing files from S3 directly. -boto3 -# Used for downloading models over HTTP -requests -six -ipdb -#Data processing -h5py -html2text -nltk -progressbar -#Others -onnxruntime +# progress bars in model download and training scripts +tqdm +# Accessing files from S3 directly. +boto3 +# Used for downloading models over HTTP +requests +six +ipdb +#Data processing +h5py +html2text +nltk +progressbar +#Others +onnxruntime git+https://github.com/NVIDIA/dllogger \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/VGGNet_for_PyTorch/test/train_performance_1p.sh b/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/VGGNet_for_PyTorch/test/train_performance_1p.sh index a75708d7e2ee83ef84d0a0a3fa3b51d4bfae6958..58a96bba3198252a5907e92ab575c019e317de3d 100644 --- a/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/VGGNet_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/VGGNet_for_PyTorch/test/train_performance_1p.sh @@ -1,166 +1,166 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 - -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="VGGNet_ID0400_for_PyTorch" -#训练epoch -epoch=2 -#训练batch_size -RANK_SIZE=1 -batch_size=64 -#迭代数iteration -iteration=100 -#训练step -#train_steps=1000 -#学习率 -#learning_rate=3.96 - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False -autotune=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_full_1p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --autotune whether to enable autotune, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --autotune* ]];then - autotune=`echo ${para#*=}` - #开autotune特有环境变量 - autotune=True - export autotune=True - export REPEAT_TUNE=True - export ASCEND_DEVICE_ID=0 - export ENABLE_TUNE_BANK=True - export TE_PARALLEL_COMPILER=32 - mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak - mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak - autotune_dump_path=${cur_path}/output/autotune_dump - mkdir -p ${autotune_dump_path}/GA - mkdir -p ${autotune_dump_path}/rl - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/../examples/simple/ -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - export DEVICE_ID=$RANK_ID - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - nohup python3 test.py \ - > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -done -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -grep "Training //" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log > traintime.log -sed -i '1d' traintime.log -traintime=`cat traintime.log | grep "Training //" | awk '{sum+=$13} END {print sum/NR}'` - -#traintime=`grep "Training //" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $13}'` -TrainingTime=`echo "scale=3;${traintime} / 1000"|bc |awk '{printf "%0.3f",$0}'` - -ActualFPS=`echo "scale=2;${batch_size} / ${TrainingTime}"|bc` - -echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -#ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk 'END {print $6}' |tr -d ,` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="VGGNet_ID0400_for_PyTorch" +#训练epoch +epoch=2 +#训练batch_size +RANK_SIZE=1 +batch_size=64 +#迭代数iteration +iteration=100 +#训练step +#train_steps=1000 +#学习率 +#learning_rate=3.96 + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --autotune whether to enable autotune, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --autotune* ]];then + autotune=`echo ${para#*=}` + #开autotune特有环境变量 + autotune=True + export autotune=True + export REPEAT_TUNE=True + export ASCEND_DEVICE_ID=0 + export ENABLE_TUNE_BANK=True + export TE_PARALLEL_COMPILER=32 + mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak + mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak + autotune_dump_path=${cur_path}/output/autotune_dump + mkdir -p ${autotune_dump_path}/GA + mkdir -p ${autotune_dump_path}/rl + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../examples/simple/ +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + export DEVICE_ID=$RANK_ID + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + nohup python3 test.py \ + > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +grep "Training //" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log > traintime.log +sed -i '1d' traintime.log +traintime=`cat traintime.log | grep "Training //" | awk '{sum+=$13} END {print sum/NR}'` + +#traintime=`grep "Training //" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk 'END {print $13}'` +TrainingTime=`echo "scale=3;${traintime} / 1000"|bc |awk '{printf "%0.3f",$0}'` + +ActualFPS=`echo "scale=2;${batch_size} / ${TrainingTime}"|bc` + +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +#ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk 'END {print $6}' |tr -d ,` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/modelzoo_level.txt b/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/modelzoo_level.txt index 5afcef9188bf9d39f1e34b45bd91324c6093137a..3117fffc3be7f5c479f10f09ba38a25c47739a00 100644 --- a/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/modelzoo_level.txt +++ b/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:POK \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/requirements.txt b/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/requirements.txt index 420b1d1f4aac66daaa4f127fb4954bf98af238d6..a9df0d18fc5856758e7a4736738fc8a7415b484c 100644 --- a/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/requirements.txt +++ b/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch/requirements.txt @@ -1,16 +1,16 @@ -# progress bars in model download and training scripts -tqdm -# Accessing files from S3 directly. -boto3 -# Used for downloading models over HTTP -requests -six -ipdb -#Data processing -h5py -html2text -nltk -progressbar -#Others -onnxruntime +# progress bars in model download and training scripts +tqdm +# Accessing files from S3 directly. +boto3 +# Used for downloading models over HTTP +requests +six +ipdb +#Data processing +h5py +html2text +nltk +progressbar +#Others +onnxruntime git+https://github.com/NVIDIA/dllogger \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/mBART_ID1550_for_PyTorch/modelzoo_level.txt b/PyTorch/dev/cv/image_classification/mBART_ID1550_for_PyTorch/modelzoo_level.txt index a17c8f95fa388fbc6d253e2cd7cfd0b73b734073..a829ab59b97a1022dd6fc33b59b7ae0d55009432 100644 --- a/PyTorch/dev/cv/image_classification/mBART_ID1550_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/dev/cv/image_classification/mBART_ID1550_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:NOK +FuncStatus:OK +PerfStatus:NOK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/dev/cv/image_classification/vit-base_ID0492_for_PyTorch/test/train_full_8p.sh b/PyTorch/dev/cv/image_classification/vit-base_ID0492_for_PyTorch/test/train_full_8p.sh index 8b5edcbaa7c7840fb2adfa18a130e68f9c0b546a..053b27b30d58db6900654d457f600d6a77c5050f 100644 --- a/PyTorch/dev/cv/image_classification/vit-base_ID0492_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/dev/cv/image_classification/vit-base_ID0492_for_PyTorch/test/train_full_8p.sh @@ -1,230 +1,230 @@ -#!/bin/bash - -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 - -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="vit-base_ID0492_for_PyTorch" -#训练epoch -train_epochs=10000 -#训练batch_size -batch_size=64 -#训练step -train_steps= -#学习率 -learning_rate=0.1 - -#TF2.X独有,需要模型审视修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -PREC="" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_performance_1P.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(O0/O1/O2/O3) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - apex_opt_level=`echo ${para#*=}` - if [[ $apex_opt_level != "O1" ]] && [[ $apex_opt_level != "O2" ]] && [[ $apex_opt_level != "O3" ]]; then - echo "[Error] para \"precision_mode\" must be config O1 or O2 or O3" - exit 1 - fi - PREC="--apex --apex-opt-level "$apex_opt_level - elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --ckpt_path* ]];then - ckpt_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path - -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - # 绑核,不需要的绑核的模型删除,需要的模型审视修改 - #let a=RANK_ID*12 - #let b=RANK_ID+1 - #let c=b*12-1 - - #网络特有 拷贝预训练模型到root路径下 - if [ -d /root/.cache/torch/checkpoints/ ];then - echo "File_path exist" - else - mkdir -p /root/.cache/torch/checkpoints/ - fi - cp ${data_path}/mobilenet_v2-b0353104.pth /root/.cache/torch/checkpoints/ - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path - #python3 $cur_path/../train.py \ - #--name cifar10-100_500 \ - #--dataset cifar10 \ - #--model_type ViT-B_16 \ - #--pretrained_dir ${ckpt_path}/ViT-B_16.npz \ - #--addr=127.0.0.1 \ - #--train_batch_size=64 \ - #--num_steps=10000 \ - #--npu-fused-sgd \ - #--fp16 \ - #--data_dir ${data_path} \ - #--fp16_opt_level O2 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -done -wait - -export MASTER_ADDR=localhost -export MASTER_PORT=29688 -export HCCL_WHITELIST_DISABLE=1 - -NPUS=($(seq 0 7)) -export NPU_WORLD_SIZE=${#NPUS[@]} -rank=0 -for i in ${NPUS[@]} -do - mkdir -p $cur_path/output/${i}/ - export NPU_CALCULATE_DEVICE=${i} - export ASCEND_DEVICE_ID=${i} - export RANK=${rank} - echo run process ${rank} - python3 $cur_path/../train.py \ - --name cifar10-100_500 \ - --dataset cifar10 \ - --model_type ViT-B_16 \ - --pretrained_dir ${ckpt_path}/ViT-B_16.npz \ - --addr=127.0.0.1 \ - --train_batch_size=64 \ - --num_steps=1000 \ - --npu-fused-sgd \ - --fp16 \ - --learning_rate 0.24 \ - --data_dir ${data_path} \ - --ddp True \ - --fp16_opt_level O2 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${i}.log 2>&1 & - let rank++ -done -wait - - - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -sed -i "s|\r|\n|g" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep FPS $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "FPS=" '{print$2}' | awk -F ")" '{print$1}' |tail -n +5 |awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` -FPS=$(awk 'BEGIN{print '$FPS'*8}') -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep "Prec@5:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F ":" 'END {print $8}'|cut -c 1-6` -train_accuracy=`grep -a "Best Accuracy:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print$NF}'` - -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -#echo "Final Train Accuracy(top5) : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${ActualFPS}'}'` - - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -#grep "Training" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print$2,$6}'|awk -F "(" '{print$2,$3}'|sort -k 1,1 -u -n |awk -F '=' '{print$2}'|awk -F ')' '{print$1}'|awk '{if(length !=0) print $0}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -grep "Steps" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v X |awk -F "loss=" '{print$2}' | awk -F ")" '{print$1}'|sed '/^$/d' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 + +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="vit-base_ID0492_for_PyTorch" +#训练epoch +train_epochs=10000 +#训练batch_size +batch_size=64 +#训练step +train_steps= +#学习率 +learning_rate=0.1 + +#TF2.X独有,需要模型审视修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +PREC="" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(O0/O1/O2/O3) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + apex_opt_level=`echo ${para#*=}` + if [[ $apex_opt_level != "O1" ]] && [[ $apex_opt_level != "O2" ]] && [[ $apex_opt_level != "O3" ]]; then + echo "[Error] para \"precision_mode\" must be config O1 or O2 or O3" + exit 1 + fi + PREC="--apex --apex-opt-level "$apex_opt_level + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + # 绑核,不需要的绑核的模型删除,需要的模型审视修改 + #let a=RANK_ID*12 + #let b=RANK_ID+1 + #let c=b*12-1 + + #网络特有 拷贝预训练模型到root路径下 + if [ -d /root/.cache/torch/checkpoints/ ];then + echo "File_path exist" + else + mkdir -p /root/.cache/torch/checkpoints/ + fi + cp ${data_path}/mobilenet_v2-b0353104.pth /root/.cache/torch/checkpoints/ + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + #python3 $cur_path/../train.py \ + #--name cifar10-100_500 \ + #--dataset cifar10 \ + #--model_type ViT-B_16 \ + #--pretrained_dir ${ckpt_path}/ViT-B_16.npz \ + #--addr=127.0.0.1 \ + #--train_batch_size=64 \ + #--num_steps=10000 \ + #--npu-fused-sgd \ + #--fp16 \ + #--data_dir ${data_path} \ + #--fp16_opt_level O2 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +export MASTER_ADDR=localhost +export MASTER_PORT=29688 +export HCCL_WHITELIST_DISABLE=1 + +NPUS=($(seq 0 7)) +export NPU_WORLD_SIZE=${#NPUS[@]} +rank=0 +for i in ${NPUS[@]} +do + mkdir -p $cur_path/output/${i}/ + export NPU_CALCULATE_DEVICE=${i} + export ASCEND_DEVICE_ID=${i} + export RANK=${rank} + echo run process ${rank} + python3 $cur_path/../train.py \ + --name cifar10-100_500 \ + --dataset cifar10 \ + --model_type ViT-B_16 \ + --pretrained_dir ${ckpt_path}/ViT-B_16.npz \ + --addr=127.0.0.1 \ + --train_batch_size=64 \ + --num_steps=1000 \ + --npu-fused-sgd \ + --fp16 \ + --learning_rate 0.24 \ + --data_dir ${data_path} \ + --ddp True \ + --fp16_opt_level O2 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${i}.log 2>&1 & + let rank++ +done +wait + + + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +sed -i "s|\r|\n|g" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep FPS $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "FPS=" '{print$2}' | awk -F ")" '{print$1}' |tail -n +5 |awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` +FPS=$(awk 'BEGIN{print '$FPS'*8}') +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep "Prec@5:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F ":" 'END {print $8}'|cut -c 1-6` +train_accuracy=`grep -a "Best Accuracy:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print$NF}'` + +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +#echo "Final Train Accuracy(top5) : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${ActualFPS}'}'` + + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +#grep "Training" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print$2,$6}'|awk -F "(" '{print$2,$3}'|sort -k 1,1 -u -n |awk -F '=' '{print$2}'|awk -F ')' '{print$1}'|awk '{if(length !=0) print $0}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep "Steps" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v X |awk -F "loss=" '{print$2}' | awk -F ")" '{print$1}'|sed '/^$/d' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/dev/cv/image_classification/vit-base_ID0492_for_PyTorch/test/train_performance_8p.sh b/PyTorch/dev/cv/image_classification/vit-base_ID0492_for_PyTorch/test/train_performance_8p.sh index 417f5471895b94c6d2bb6b5639664b28983dd774..c7fa78bc46ce9bc10570896f407e61e5d66f7680 100644 --- a/PyTorch/dev/cv/image_classification/vit-base_ID0492_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/dev/cv/image_classification/vit-base_ID0492_for_PyTorch/test/train_performance_8p.sh @@ -1,184 +1,184 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` - -#集合通信参数,不需要修改 - -export RANK_SIZE=8 -export JOB_ID=10087 -RANK_ID_START=0 - -RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#设置默认日志级别,不需要修改 -export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="vit-base_ID0492_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=64 -#训练step -train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.1 - -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False - - -if [[ $1 == --help || $1 == --h ]];then - echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" - exit 1 -fi - -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --ckpt_path* ]];then - ckpt_path=`echo ${para#*=}` - elif [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` - elif [[ $para == --batch_size* ]];then - batch_size=`echo ${para#*=}` - elif [[ $para == --learning_rate* ]];then - learning_rate=`echo ${para#*=}` - fi -done - -PREC="" -if [[ $precision_mode == "amp" ]];then - PREC="--apex" -fi - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -cd $cur_path -#设置环境变量,不需要修改 -echo "Device ID: $ASCEND_DEVICE_ID" -export RANK_ID=$RANK_ID - -if [ -d $cur_path/output ];then - rm -rf $cur_path/output/* - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi -wait - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#nohup python3 $cur_path/../train.py \ -# --name cifar10-100_500 \ -# --dataset cifar10 \ -# --model_type ViT-B_16 \ -# --pretrained_dir ${ckpt_path}/ViT-B_16.npz \ -# --addr=127.0.0.1 \ -# --train_batch_size=64 \ -# --num_steps=100 \ -# --npu-fused-sgd \ -# --fp16 \ -# --data_dir ${data_path} \ -# --fp16_opt_level O2 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & - - - - - - -export MASTER_ADDR=localhost -export MASTER_PORT=29688 -export HCCL_WHITELIST_DISABLE=1 - -NPUS=($(seq 0 7)) -export NPU_WORLD_SIZE=${#NPUS[@]} -rank=0 -for i in ${NPUS[@]} -do - mkdir -p $cur_path/output/${i}/ - export NPU_CALCULATE_DEVICE=${i} - export ASCEND_DEVICE_ID=${i} - export RANK=${rank} - echo run process ${rank} - python3 $cur_path/../train.py \ - --name cifar10-100_500 \ - --dataset cifar10 \ - --model_type ViT-B_16 \ - --pretrained_dir ${ckpt_path}/ViT-B_16.npz \ - --addr=127.0.0.1 \ - --train_batch_size=64 \ - --num_steps=100 \ - --npu-fused-sgd \ - --fp16 \ - --ddp True \ - --data_dir ${data_path} \ - --fp16_opt_level O2 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${i}.log 2>&1 & - let rank++ -done -wait - -wait - -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -sed -i "s|\r|\n|g" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -FPS=`grep FPS $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "FPS=" '{print$2}' | awk -F ")" '{print$1}' |tail -n +5 |awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` -FPS=$(awk 'BEGIN{print '$FPS'*8}') -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -a "Best Accuracy:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'| awk '{print$NF}'` - -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -#grep /781 $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "loss=" '{print$2}' | awk -F ")" '{print$1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -grep "Steps" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v X |awk -F "loss=" '{print$2}' | awk -F ")" '{print$1}'|sed '/^$/d' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -sed -i "s/ModuleNotFoundError: No module named "impl.lp_norm_reduce"/ /g" `grep ModuleNotFoundError -rl $cur_path/output/$ASCEND_DEVICE_ID/train_*.log` +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 + +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + +RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#设置默认日志级别,不需要修改 +export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="vit-base_ID0492_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=64 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.1 + +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + + +if [[ $1 == --help || $1 == --h ]];then + echo "usage:./train_performance_1p.sh --data_path=data_dir --batch_size=1024 --learning_rate=0.04" + exit 1 +fi + +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + elif [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --batch_size* ]];then + batch_size=`echo ${para#*=}` + elif [[ $para == --learning_rate* ]];then + learning_rate=`echo ${para#*=}` + fi +done + +PREC="" +if [[ $precision_mode == "amp" ]];then + PREC="--apex" +fi + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +cd $cur_path +#设置环境变量,不需要修改 +echo "Device ID: $ASCEND_DEVICE_ID" +export RANK_ID=$RANK_ID + +if [ -d $cur_path/output ];then + rm -rf $cur_path/output/* + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi +wait + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#nohup python3 $cur_path/../train.py \ +# --name cifar10-100_500 \ +# --dataset cifar10 \ +# --model_type ViT-B_16 \ +# --pretrained_dir ${ckpt_path}/ViT-B_16.npz \ +# --addr=127.0.0.1 \ +# --train_batch_size=64 \ +# --num_steps=100 \ +# --npu-fused-sgd \ +# --fp16 \ +# --data_dir ${data_path} \ +# --fp16_opt_level O2 > $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 & + + + + + + +export MASTER_ADDR=localhost +export MASTER_PORT=29688 +export HCCL_WHITELIST_DISABLE=1 + +NPUS=($(seq 0 7)) +export NPU_WORLD_SIZE=${#NPUS[@]} +rank=0 +for i in ${NPUS[@]} +do + mkdir -p $cur_path/output/${i}/ + export NPU_CALCULATE_DEVICE=${i} + export ASCEND_DEVICE_ID=${i} + export RANK=${rank} + echo run process ${rank} + python3 $cur_path/../train.py \ + --name cifar10-100_500 \ + --dataset cifar10 \ + --model_type ViT-B_16 \ + --pretrained_dir ${ckpt_path}/ViT-B_16.npz \ + --addr=127.0.0.1 \ + --train_batch_size=64 \ + --num_steps=100 \ + --npu-fused-sgd \ + --fp16 \ + --ddp True \ + --data_dir ${data_path} \ + --fp16_opt_level O2 > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${i}.log 2>&1 & + let rank++ +done +wait + +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +sed -i "s|\r|\n|g" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep FPS $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "FPS=" '{print$2}' | awk -F ")" '{print$1}' |tail -n +5 |awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` +FPS=$(awk 'BEGIN{print '$FPS'*8}') +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -a "Best Accuracy:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $3}'| awk '{print$NF}'` + +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +#grep /781 $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "loss=" '{print$2}' | awk -F ")" '{print$1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +grep "Steps" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v X |awk -F "loss=" '{print$2}' | awk -F ")" '{print$1}'|sed '/^$/d' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +sed -i "s/ModuleNotFoundError: No module named "impl.lp_norm_reduce"/ /g" `grep ModuleNotFoundError -rl $cur_path/output/$ASCEND_DEVICE_ID/train_*.log` diff --git a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/Dockerfile b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/Dockerfile index 49cbba285f5f05e1af2d734fc9a01bfc5288b0d8..d7deb4196beeb755e9ed5bf77d80a24af2c69365 100644 --- a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/Dockerfile +++ b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/Dockerfile @@ -1,5 +1,5 @@ -ARG FROM_IMAGE_NAME=ascend-pytorch-arm:20.1.0 -FROM ${FROM_IMAGE_NAME} - -COPY requirements.txt . +ARG FROM_IMAGE_NAME=ascend-pytorch-arm:20.1.0 +FROM ${FROM_IMAGE_NAME} + +COPY requirements.txt . RUN pip3.7 install -r requirements.txt \ No newline at end of file diff --git a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/LICENSE b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/LICENSE index 6c4f543d1529bd48135a0be11f3d79592b5f8219..56cd05a13bf21d1cdb2b4deac546f38d4009d5ff 100644 --- a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/LICENSE +++ b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/LICENSE @@ -1,23 +1,23 @@ -MIT License - -Copyright (c) 2019 Microsoft Corporation -Copyright (c) 2017 Jieru Mei meijieru@gmail.com -Copyright 2020 Huawei Technologies Co., Ltd - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +MIT License + +Copyright (c) 2019 Microsoft Corporation +Copyright (c) 2017 Jieru Mei meijieru@gmail.com +Copyright 2020 Huawei Technologies Co., Ltd + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/README.md b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/README.md index 913d6916c8790926d161357e9926200962ba2518..d04156bd7bd70ac487e247c9c4dd2dc0ef940997 100644 --- a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/README.md +++ b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/README.md @@ -1,25 +1,25 @@ -环境 ----------- - pytorch 1.5 - torchvision 0.5.0 - apex 0.1 - numpy - Pillow 8.1.0 - -Host侧训练步骤 ----------- -1.准备数据集
-2.修改run1p.sh,设置训练参数。device_id是用哪张卡,train_data_path是训练网络输入数据,label_data_path是label数据,lr是学习率,model_save_dir是模型保存目录,epoch是训练多少epoch,batch_size是batchsize
-3.执行bash run1p.sh开启单p训练
- - -Docker侧训练步骤 ----------- - -1.导入镜像二进制包docker import ubuntuarmpytorch.tar REPOSITORY:TAG, 比如: - - docker import ubuntuarmpytorch.tar pytorch:b020 - -2.执行docker_start.sh后带三个参数:步骤1生成的REPOSITORY:TAG;数据集路径(训练和评测数据集都放到该路径下);模型执行路径;比如: - - ./docker_start.sh pytorch:b020 /train/imagenet /home/CRNN_for_Pytorch +环境 +---------- + pytorch 1.5 + torchvision 0.5.0 + apex 0.1 + numpy + Pillow 8.1.0 + +Host侧训练步骤 +---------- +1.准备数据集
+2.修改run1p.sh,设置训练参数。device_id是用哪张卡,train_data_path是训练网络输入数据,label_data_path是label数据,lr是学习率,model_save_dir是模型保存目录,epoch是训练多少epoch,batch_size是batchsize
+3.执行bash run1p.sh开启单p训练
+ + +Docker侧训练步骤 +---------- + +1.导入镜像二进制包docker import ubuntuarmpytorch.tar REPOSITORY:TAG, 比如: + + docker import ubuntuarmpytorch.tar pytorch:b020 + +2.执行docker_start.sh后带三个参数:步骤1生成的REPOSITORY:TAG;数据集路径(训练和评测数据集都放到该路径下);模型执行路径;比如: + + ./docker_start.sh pytorch:b020 /train/imagenet /home/CRNN_for_Pytorch diff --git a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/docker_start.sh b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/docker_start.sh index 46ce9a02ec0532d6db324beaee7f7eab501b4565..944bca3cdac8e3f2d47ceb0e2b6eb181a405de11 100644 --- a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/docker_start.sh +++ b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/docker_start.sh @@ -1,25 +1,25 @@ -#!/bin/bash - -docker_image=$1 -data_dir=$2 -model_dir=$3 - -docker run -it --ipc=host \ - --device=/dev/davinci0 \ - --device=/dev/davinci1 \ - --device=/dev/davinci2 \ - --device=/dev/davinci3 \ - --device=/dev/davinci4 \ - --device=/dev/davinci5 \ - --device=/dev/davinci6 \ - --device=/dev/davinci7 \ - --device=/dev/davinci_manager \ - --device=/dev/devmm_svm --device=/dev/hisi_hdc \ - -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ - -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ - -v ${model_dir}:${model_dir} \ - -v ${data_dir}:${data_dir} \ - -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \ - -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \ - -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \ +#!/bin/bash + +docker_image=$1 +data_dir=$2 +model_dir=$3 + +docker run -it --ipc=host \ + --device=/dev/davinci0 \ + --device=/dev/davinci1 \ + --device=/dev/davinci2 \ + --device=/dev/davinci3 \ + --device=/dev/davinci4 \ + --device=/dev/davinci5 \ + --device=/dev/davinci6 \ + --device=/dev/davinci7 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ + -v ${model_dir}:${model_dir} \ + -v ${data_dir}:${data_dir} \ + -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \ + -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \ + -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \ /bin/bash \ No newline at end of file diff --git a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/dscnn_train_pytorch.py b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/dscnn_train_pytorch.py index 2ee8d5ca873cd0e9aceaafb7e3a4dd1e1df491cd..d825d0c0657bc46695ccae2e6738af0f0ea96113 100644 --- a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/dscnn_train_pytorch.py +++ b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/dscnn_train_pytorch.py @@ -1,139 +1,139 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the MIT License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/MIT -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import argparse - -import numpy as np -import torch -from torch.autograd import Variable -from torch.utils.data import DataLoader -import random -import torch.nn as nn -import torch.nn.functional as F - -from PIL import Image -import torch.npu - - -class DataLoad(): - def __init__(self, src_data_root, label_path): - self.images_list = [] - self.label_list = [] - self.num = 0 - - for img_name in os.listdir(src_data_root): - if img_name.endswith(".png"): - src_img_path = os.path.join(src_data_root, img_name) - label_img_path = os.path.join(label_path, img_name) - assert os.path.exists(label_img_path) - self.images_list.append([src_img_path]) - self.label_list.append([label_img_path]) - self.num += 1 - print('train image num: ', self.num) - - def __getitem__(self, index): - - src_image = Image.open(self.images_list[index][0]) - src_image = np.asarray(src_image).astype(np.float32) / 255. - label_image = Image.open(self.label_list[index][0]) - label_image = np.asarray(label_image).astype(np.float32) / 255. - - - src_image = torch.from_numpy(src_image.transpose(2, 0, 1)) - label_image = torch.from_numpy(label_image.transpose(2, 0, 1)) - - return src_image, label_image - - def __len__(self): - return self.num - - -class downsample_net(nn.Module): - def __init__(self): - super(downsample_net, self).__init__() - - self.conv1 = nn.Conv2d(3, 16, 3, 2, 1) - self.conv2 = nn.Conv2d(16, 3, 3, 1, 1) - self.relu = nn.ReLU() - - - def forward(self, x): - x = self.relu(self.conv1(x)) - return self.conv2(x) - - - -def train(src_data_root, label_path, batch_size, model_dir, epoch, lr, device, model_path=None): - dn = downsample_net() - dn = dn.to(device) - - if model_path is not None: - dn.load_state_dict(torch.load(model_path)) - - l1loss = torch.nn.L1Loss().to(device) - - opt = torch.optim.Adam(dn.parameters(), lr=lr) - - dataset = DataLoad(src_data_root, label_path) - train_loader = DataLoader(dataset=dataset, - batch_size=batch_size, - shuffle=True, - num_workers=16) - - all_loss = [] - for ep in range(epoch): - for step, (sample, label) in enumerate(train_loader): - src_image = Variable(sample).to(device) - label_image = Variable(label).to(device) - out = dn(src_image) - - out = nn.functional.interpolate(out, size=[1080, 1920], mode="bilinear") - loss = l1loss(out, label_image) - opt.zero_grad() - loss.backward() - opt.step() - - if step % 50 == 0: - print("epoch {} step {} loss {}".format(ep, step, loss.detach())) - - - - model_path = os.path.join(model_dir, "DSCNN_pytorch_l1_" + str(ep) + ".pkl") - torch.save(dn.state_dict(), model_path) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--device_id', type=int, default=0) - parser.add_argument('--train_data_path', type=str, default='') - parser.add_argument('--label_data_path', type=str, default='') - parser.add_argument('--batch_size', type=int, default=4) - parser.add_argument('--epoch', type=int, default=60) - parser.add_argument('--lr', type=float, default=0.00001) - parser.add_argument('--model_save_dir', type=str, default='') - parser.add_argument('--pre_trained_model_path', type=str, default=None) - args = parser.parse_args() - - device_str = 'npu:' + str(args.device_id) - torch.npu.set_device(device_str) - - train(device=device_str, - src_data_root=args.train_data_path, - batch_size=args.batch_size, - model_dir=args.model_save_dir, - label_path=args.label_data_path, - model_path=args.pre_trained_model_path, - epoch=args.epoch, - lr=args.lr) +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the MIT License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/MIT +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse + +import numpy as np +import torch +from torch.autograd import Variable +from torch.utils.data import DataLoader +import random +import torch.nn as nn +import torch.nn.functional as F + +from PIL import Image +import torch.npu + + +class DataLoad(): + def __init__(self, src_data_root, label_path): + self.images_list = [] + self.label_list = [] + self.num = 0 + + for img_name in os.listdir(src_data_root): + if img_name.endswith(".png"): + src_img_path = os.path.join(src_data_root, img_name) + label_img_path = os.path.join(label_path, img_name) + assert os.path.exists(label_img_path) + self.images_list.append([src_img_path]) + self.label_list.append([label_img_path]) + self.num += 1 + print('train image num: ', self.num) + + def __getitem__(self, index): + + src_image = Image.open(self.images_list[index][0]) + src_image = np.asarray(src_image).astype(np.float32) / 255. + label_image = Image.open(self.label_list[index][0]) + label_image = np.asarray(label_image).astype(np.float32) / 255. + + + src_image = torch.from_numpy(src_image.transpose(2, 0, 1)) + label_image = torch.from_numpy(label_image.transpose(2, 0, 1)) + + return src_image, label_image + + def __len__(self): + return self.num + + +class downsample_net(nn.Module): + def __init__(self): + super(downsample_net, self).__init__() + + self.conv1 = nn.Conv2d(3, 16, 3, 2, 1) + self.conv2 = nn.Conv2d(16, 3, 3, 1, 1) + self.relu = nn.ReLU() + + + def forward(self, x): + x = self.relu(self.conv1(x)) + return self.conv2(x) + + + +def train(src_data_root, label_path, batch_size, model_dir, epoch, lr, device, model_path=None): + dn = downsample_net() + dn = dn.to(device) + + if model_path is not None: + dn.load_state_dict(torch.load(model_path)) + + l1loss = torch.nn.L1Loss().to(device) + + opt = torch.optim.Adam(dn.parameters(), lr=lr) + + dataset = DataLoad(src_data_root, label_path) + train_loader = DataLoader(dataset=dataset, + batch_size=batch_size, + shuffle=True, + num_workers=16) + + all_loss = [] + for ep in range(epoch): + for step, (sample, label) in enumerate(train_loader): + src_image = Variable(sample).to(device) + label_image = Variable(label).to(device) + out = dn(src_image) + + out = nn.functional.interpolate(out, size=[1080, 1920], mode="bilinear") + loss = l1loss(out, label_image) + opt.zero_grad() + loss.backward() + opt.step() + + if step % 50 == 0: + print("epoch {} step {} loss {}".format(ep, step, loss.detach())) + + + + model_path = os.path.join(model_dir, "DSCNN_pytorch_l1_" + str(ep) + ".pkl") + torch.save(dn.state_dict(), model_path) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--device_id', type=int, default=0) + parser.add_argument('--train_data_path', type=str, default='') + parser.add_argument('--label_data_path', type=str, default='') + parser.add_argument('--batch_size', type=int, default=4) + parser.add_argument('--epoch', type=int, default=60) + parser.add_argument('--lr', type=float, default=0.00001) + parser.add_argument('--model_save_dir', type=str, default='') + parser.add_argument('--pre_trained_model_path', type=str, default=None) + args = parser.parse_args() + + device_str = 'npu:' + str(args.device_id) + torch.npu.set_device(device_str) + + train(device=device_str, + src_data_root=args.train_data_path, + batch_size=args.batch_size, + model_dir=args.model_save_dir, + label_path=args.label_data_path, + model_path=args.pre_trained_model_path, + epoch=args.epoch, + lr=args.lr) diff --git a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/npu_set_env.sh b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/npu_set_env.sh index bf52cfcb774356cfd97a4d9244352cc44e4bcaf0..cda0d6f857fecc2a7168498062c7e1275abee6a7 100644 --- a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/npu_set_env.sh +++ b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/npu_set_env.sh @@ -1,30 +1,30 @@ -############## toolkit situation ################ -#export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH -#export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ -#export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/ -#export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so -#export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH - - -############## nnae situation ################ - - -if [ -d /usr/local/Ascend/nnae/latest ];then - export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH - export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/nnae/latest/toolkit/tools/ide_daemon/bin/ - export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/ - export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so - export PYTHONPATH=/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH -else - export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH - export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ - export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/ - export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so - export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH -fi - -# ln -s /usr/local/Ascend/ascend-toolkit/latest/toolkit/bin/adc /usr/local/bin/ - -export SLOG_PRINT_TO_STDOUT=0 - +############## toolkit situation ################ +#export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH +#export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ +#export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/ +#export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so +#export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + + +############## nnae situation ################ + + +if [ -d /usr/local/Ascend/nnae/latest ];then + export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/nnae/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH +else + export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH +fi + +# ln -s /usr/local/Ascend/ascend-toolkit/latest/toolkit/bin/adc /usr/local/bin/ + +export SLOG_PRINT_TO_STDOUT=0 + export TASK_QUEUE_ENABLE=1 \ No newline at end of file diff --git a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/pkltar2onnx.py b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/pkltar2onnx.py index ee7c9af5911167f7d425ef216f53601bd0602508..475d1dbf7c0fce6f94019951b9553dd8274c7a6d 100644 --- a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/pkltar2onnx.py +++ b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/pkltar2onnx.py @@ -1,104 +1,104 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the MIT License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/MIT -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -import torch.nn.functional as F -import numpy as np -from torch.autograd import Variable - -class downsample_net(nn.Module): - def __init__(self): - super(downsample_net, self).__init__() - self.conv1 = nn.Conv2d(3, 16, 3, 2, 1) - self.conv2 = nn.Conv2d(16, 3, 3, 1, 1) - self.relu = nn.ReLU() - weight_list = [ - [ - [ - [ - 0.257 - ] - ], - [ - [ - 0.504 - ] - ], - [ - [ - 0.098 - ] - ] - ], - - [ - [ - [ - -0.148 - ] - ], - [ - [ - -0.291 - ] - ], - [ - [ - 0.439 - - ] - ] - ], - - [ - [ - [ - 0.439 - ] - ], - [ - [ - -0.368 - ] - ], - [ - [ - -0.071 - ] - ] - ] -] - bias_list = [0.0627450980392157, 0.5019607843137255, 0.5019607843137255] - self.weight = Variable(torch.from_numpy(np.array(weight_list, dtype = 'float32'))) - self.bias = Variable(torch.from_numpy(np.array(bias_list, dtype = 'float32'))) - self.conv_define = nn.Conv2d(3, 3, 1, 1) - self.conv_define.weight.data = self.weight - self.conv_define.bias.data = self.bias - - def forward(self, x): - x = self.relu(self.conv1(x)) - out = self.conv2(x) - out = self.conv_define(out) - out = torch.mul(out, 255.) - return out - - -model_path = "/path/to/pkl" -dn = downsample_net() -dn.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')), strict = False) -print(dn) -dummy_input = torch.randn(1, 3, 1152, 1440) -torch.onnx.export(dn, dummy_input, "/path/to/onnx", verbose=True, keep_initializers_as_inputs=True) - +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the MIT License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/MIT +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from torch.autograd import Variable + +class downsample_net(nn.Module): + def __init__(self): + super(downsample_net, self).__init__() + self.conv1 = nn.Conv2d(3, 16, 3, 2, 1) + self.conv2 = nn.Conv2d(16, 3, 3, 1, 1) + self.relu = nn.ReLU() + weight_list = [ + [ + [ + [ + 0.257 + ] + ], + [ + [ + 0.504 + ] + ], + [ + [ + 0.098 + ] + ] + ], + + [ + [ + [ + -0.148 + ] + ], + [ + [ + -0.291 + ] + ], + [ + [ + 0.439 + + ] + ] + ], + + [ + [ + [ + 0.439 + ] + ], + [ + [ + -0.368 + ] + ], + [ + [ + -0.071 + ] + ] + ] +] + bias_list = [0.0627450980392157, 0.5019607843137255, 0.5019607843137255] + self.weight = Variable(torch.from_numpy(np.array(weight_list, dtype = 'float32'))) + self.bias = Variable(torch.from_numpy(np.array(bias_list, dtype = 'float32'))) + self.conv_define = nn.Conv2d(3, 3, 1, 1) + self.conv_define.weight.data = self.weight + self.conv_define.bias.data = self.bias + + def forward(self, x): + x = self.relu(self.conv1(x)) + out = self.conv2(x) + out = self.conv_define(out) + out = torch.mul(out, 255.) + return out + + +model_path = "/path/to/pkl" +dn = downsample_net() +dn.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')), strict = False) +print(dn) +dummy_input = torch.randn(1, 3, 1152, 1440) +torch.onnx.export(dn, dummy_input, "/path/to/onnx", verbose=True, keep_initializers_as_inputs=True) + diff --git a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/requirements.txt b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/requirements.txt index 43aa6450fcd048e0b6ebe1dbc69ffff278649394..628efc0d6d4c6d836e8b64c5541ed0c1d3eee1d2 100644 --- a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/requirements.txt +++ b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/requirements.txt @@ -1,5 +1,5 @@ -torch==1.5.0 -apex -torchvision -numpy -Pillow==8.1.0 +torch==1.5.0 +apex +torchvision +numpy +Pillow==8.1.0 diff --git a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/run1p.sh b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/run1p.sh index 1c59be0db2dbe4755545fcb95708b2da7dd86296..b7755c03137c9aff8cddd09bfb3bc9fbfae28550 100644 --- a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/run1p.sh +++ b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/run1p.sh @@ -1,15 +1,15 @@ -#!/usr/bin/env bash -source npu_set_env.sh -export SLOG_PRINT_TO_STDOUT=0 -export TASK_QUEUE_ENABLE=1 -export PTCOPY_ENABLE=1 - - -python dscnn_train_pytorch.py --device_id=0 \ - --train_data_path=../datasets/train_data \ - --label_data_path=../datasets/label_data \ - --batch_size=8 \ - --epoch=60 \ - --lr=0.00001 \ - --model_save_dir=./models \ +#!/usr/bin/env bash +source npu_set_env.sh +export SLOG_PRINT_TO_STDOUT=0 +export TASK_QUEUE_ENABLE=1 +export PTCOPY_ENABLE=1 + + +python dscnn_train_pytorch.py --device_id=0 \ + --train_data_path=../datasets/train_data \ + --label_data_path=../datasets/label_data \ + --batch_size=8 \ + --epoch=60 \ + --lr=0.00001 \ + --model_save_dir=./models \ # --pre_trained_model_path=./models/DCNN_bilinear_0226_l1_0.025154941563113792_99.pkl \ No newline at end of file diff --git a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/run_to_onnx.sh b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/run_to_onnx.sh index a610204ae523947a9681ca84cdb4e4fe672186d5..f667562759653a231678bb505ff9693e5d4f1317 100644 --- a/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/run_to_onnx.sh +++ b/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch/run_to_onnx.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash -source npu_set_env.sh - +#!/usr/bin/env bash +source npu_set_env.sh + python3.7 pkltar2onnx.py \ No newline at end of file diff --git a/PyTorch/dev/nlp/FairSeq_Transformer_ID0496_for_PyTorch/test/train_full_1p_bs512.sh b/PyTorch/dev/nlp/FairSeq_Transformer_ID0496_for_PyTorch/test/train_full_1p_bs512.sh index ca3f13a1021391cfdfad69efa42a479095088d33..6f0a63d0133c14cb755866978db76c99bcbb3aed 100644 --- a/PyTorch/dev/nlp/FairSeq_Transformer_ID0496_for_PyTorch/test/train_full_1p_bs512.sh +++ b/PyTorch/dev/nlp/FairSeq_Transformer_ID0496_for_PyTorch/test/train_full_1p_bs512.sh @@ -1,214 +1,214 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 -export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID -#集合通信参数,不需要修改 - -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 - -#进入到conda环境 -#source activate py8 - - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="FairSeq_Transformer_ID0496_for_PyTorch" -#训练epoch -train_epochs=2000 -#训练batch_size -batch_size=512 -#训练step -#train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.495 - -#TF2.X独有,不需要修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False -autotune=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_full_1p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` -elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/../ - -#python3 setup.py build_ext --inplace -pip3 install --editable . -#sed -i "s|pass|break|g" train.py -#sed -i "s|data/LibriSpeech|$data_path/LibriSpeech|g" config/libri/asr_example.yaml - -#修改epoch参数 - - -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - - #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 - #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` - #cpustep=`expr $cpucount / 8` - #echo "taskset c steps:" $cpustep - #let a=RANK_ID*$cpustep - #let b=RANK_ID+1 - #let c=b*$cpustep-1 - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - nohup python3 train.py $data_path/iwslt14.tokenized.de-en \ - --arch transformer \ - --optimizer adam \ - --adam-betas '(0.9, 0.98)' \ - --clip-norm 0.0 \ - --lr 0.00006 \ - --lr-scheduler inverse_sqrt \ - --warmup-updates 4000 \ - --dropout 0.00 \ - --weight-decay 0.0001 \ - --source-lang de \ - --target-lang en \ - --decoder-attention-heads 4 \ - --decoder-ffn-embed-dim 1024 \ - --encoder-attention-heads 4 \ - --encoder-ffn-embed-dim 1024 \ - --seed 12345 \ - --fp16 \ - --fp16-scale-window 1500 \ - --ddp-backend no_c10d \ - --disable-validation \ - --distributed-no-spawn \ - --required-batch-size-multiple 512 \ - --batch-size 512 \ - --max-epoch 2000 \ - --max-source-positions 1024 \ - --max-target-positions 1024 \ - --num-workers 1 \ - --log-interval 1 \ - --save-interval 1 \ - --share-decoder-input-output-embed > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -done -wait - -#conda deactivate -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -Time=`grep "iteration" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "time =" '{print $2}'|awk -F "ms" '{print $1}'|tail -n +6|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` -FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${Time}'}'` - - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` -#打印,不需要修改 -train_accuracy="Loss" -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -##获取性能数据 -#吞吐量,不需要修改 -ActualFPS=${FPS} -#单迭代训练时长,不需要修改 -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "loss=" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "loss=" '{print $2}'|awk -F "," '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 +export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + +#进入到conda环境 +#source activate py8 + + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="FairSeq_Transformer_ID0496_for_PyTorch" +#训练epoch +train_epochs=2000 +#训练batch_size +batch_size=512 +#训练step +#train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.495 + +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` +elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ + +#python3 setup.py build_ext --inplace +pip3 install --editable . +#sed -i "s|pass|break|g" train.py +#sed -i "s|data/LibriSpeech|$data_path/LibriSpeech|g" config/libri/asr_example.yaml + +#修改epoch参数 + + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 + #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` + #cpustep=`expr $cpucount / 8` + #echo "taskset c steps:" $cpustep + #let a=RANK_ID*$cpustep + #let b=RANK_ID+1 + #let c=b*$cpustep-1 + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + nohup python3 train.py $data_path/iwslt14.tokenized.de-en \ + --arch transformer \ + --optimizer adam \ + --adam-betas '(0.9, 0.98)' \ + --clip-norm 0.0 \ + --lr 0.00006 \ + --lr-scheduler inverse_sqrt \ + --warmup-updates 4000 \ + --dropout 0.00 \ + --weight-decay 0.0001 \ + --source-lang de \ + --target-lang en \ + --decoder-attention-heads 4 \ + --decoder-ffn-embed-dim 1024 \ + --encoder-attention-heads 4 \ + --encoder-ffn-embed-dim 1024 \ + --seed 12345 \ + --fp16 \ + --fp16-scale-window 1500 \ + --ddp-backend no_c10d \ + --disable-validation \ + --distributed-no-spawn \ + --required-batch-size-multiple 512 \ + --batch-size 512 \ + --max-epoch 2000 \ + --max-source-positions 1024 \ + --max-target-positions 1024 \ + --num-workers 1 \ + --log-interval 1 \ + --save-interval 1 \ + --share-decoder-input-output-embed > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#conda deactivate +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +Time=`grep "iteration" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "time =" '{print $2}'|awk -F "ms" '{print $1}'|tail -n +6|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${Time}'}'` + + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` +#打印,不需要修改 +train_accuracy="Loss" +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "loss=" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "loss=" '{print $2}'|awk -F "," '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/dev/nlp/FairSeq_Transformer_ID0496_for_PyTorch/test/train_performance_1p_bs512.sh b/PyTorch/dev/nlp/FairSeq_Transformer_ID0496_for_PyTorch/test/train_performance_1p_bs512.sh index 460f1e01fcfa105f0cd938570bde10dd13f55991..9649fd43445861bfab2cd7c399dc2f7c80276a58 100644 --- a/PyTorch/dev/nlp/FairSeq_Transformer_ID0496_for_PyTorch/test/train_performance_1p_bs512.sh +++ b/PyTorch/dev/nlp/FairSeq_Transformer_ID0496_for_PyTorch/test/train_performance_1p_bs512.sh @@ -1,214 +1,214 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 -export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID -#集合通信参数,不需要修改 - -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 - -#进入到conda环境 -#source activate py8 - - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="FairSeq_Transformer_ID0496_for_PyTorch" -#训练epoch -train_epochs=1 -#训练batch_size -batch_size=512 -#训练step -#train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.495 - -#TF2.X独有,不需要修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False -autotune=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_full_1p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` -elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/../ - -#python3 setup.py build_ext --inplace -pip3 install --editable . -#sed -i "s|pass|break|g" train.py -#sed -i "s|data/LibriSpeech|$data_path/LibriSpeech|g" config/libri/asr_example.yaml - -#修改epoch参数 - - -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - - #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 - #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` - #cpustep=`expr $cpucount / 8` - #echo "taskset c steps:" $cpustep - #let a=RANK_ID*$cpustep - #let b=RANK_ID+1 - #let c=b*$cpustep-1 - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - nohup python3 train.py $data_path/iwslt14.tokenized.de-en \ - --arch transformer \ - --optimizer adam \ - --adam-betas '(0.9, 0.98)' \ - --clip-norm 0.0 \ - --lr 0.00006 \ - --lr-scheduler inverse_sqrt \ - --warmup-updates 4000 \ - --dropout 0.00 \ - --weight-decay 0.0001 \ - --source-lang de \ - --target-lang en \ - --decoder-attention-heads 4 \ - --decoder-ffn-embed-dim 1024 \ - --encoder-attention-heads 4 \ - --encoder-ffn-embed-dim 1024 \ - --seed 12345 \ - --fp16 \ - --fp16-scale-window 1500 \ - --ddp-backend no_c10d \ - --disable-validation \ - --distributed-no-spawn \ - --required-batch-size-multiple 512 \ - --batch-size 512 \ - --max-epoch 1 \ - --max-source-positions 1024 \ - --max-target-positions 1024 \ - --num-workers 1 \ - --log-interval 1 \ - --save-interval 1 \ - --share-decoder-input-output-embed > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -done -wait - -#conda deactivate -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -#Time=`grep "iteration" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "time =" '{print $2}'|awk -F "ms" '{print $1}'|tail -n +6|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` -Time=`grep "iteration" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "time =" '{print $2}'|awk -F "ms" '{print $1}'| grep -v "^$" |tail -n +6|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` -FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${Time}'}'` - - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -#echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据 -#吞吐量,不需要修改 -ActualFPS=${FPS} -#单迭代训练时长,不需要修改 -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "loss=" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "loss=" '{print $2}'|awk -F "," '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 +export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + +#进入到conda环境 +#source activate py8 + + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="FairSeq_Transformer_ID0496_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=512 +#训练step +#train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.495 + +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` +elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ + +#python3 setup.py build_ext --inplace +pip3 install --editable . +#sed -i "s|pass|break|g" train.py +#sed -i "s|data/LibriSpeech|$data_path/LibriSpeech|g" config/libri/asr_example.yaml + +#修改epoch参数 + + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 + #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` + #cpustep=`expr $cpucount / 8` + #echo "taskset c steps:" $cpustep + #let a=RANK_ID*$cpustep + #let b=RANK_ID+1 + #let c=b*$cpustep-1 + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + nohup python3 train.py $data_path/iwslt14.tokenized.de-en \ + --arch transformer \ + --optimizer adam \ + --adam-betas '(0.9, 0.98)' \ + --clip-norm 0.0 \ + --lr 0.00006 \ + --lr-scheduler inverse_sqrt \ + --warmup-updates 4000 \ + --dropout 0.00 \ + --weight-decay 0.0001 \ + --source-lang de \ + --target-lang en \ + --decoder-attention-heads 4 \ + --decoder-ffn-embed-dim 1024 \ + --encoder-attention-heads 4 \ + --encoder-ffn-embed-dim 1024 \ + --seed 12345 \ + --fp16 \ + --fp16-scale-window 1500 \ + --ddp-backend no_c10d \ + --disable-validation \ + --distributed-no-spawn \ + --required-batch-size-multiple 512 \ + --batch-size 512 \ + --max-epoch 1 \ + --max-source-positions 1024 \ + --max-target-positions 1024 \ + --num-workers 1 \ + --log-interval 1 \ + --save-interval 1 \ + --share-decoder-input-output-embed > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#conda deactivate +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +#Time=`grep "iteration" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "time =" '{print $2}'|awk -F "ms" '{print $1}'|tail -n +6|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` +Time=`grep "iteration" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "time =" '{print $2}'|awk -F "ms" '{print $1}'| grep -v "^$" |tail -n +6|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${Time}'}'` + + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +#echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "loss=" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "loss=" '{print $2}'|awk -F "," '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/dev/nlp/FairSeq_Transformer_ID0496_for_PyTorch/test/train_performance_1p_dynamic.sh b/PyTorch/dev/nlp/FairSeq_Transformer_ID0496_for_PyTorch/test/train_performance_1p_dynamic.sh index e5cf71cffbe40b5d210a712a889231bf089391d9..cfe05e6f440aeceb433cc2d52ebb901bd9819aec 100644 --- a/PyTorch/dev/nlp/FairSeq_Transformer_ID0496_for_PyTorch/test/train_performance_1p_dynamic.sh +++ b/PyTorch/dev/nlp/FairSeq_Transformer_ID0496_for_PyTorch/test/train_performance_1p_dynamic.sh @@ -1,215 +1,215 @@ -1#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -#export ASCEND_SLOG_PRINT_TO_STDOUT=1 -export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID -#集合通信参数,不需要修改 - -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 - -#进入到conda环境 -#source activate py8 - - - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="FairSeq_Transformer_ID0496_for_PyTorch" -#训练epoch -train_epochs=4 -#训练batch_size -batch_size=32 -#训练step -#train_steps=`expr 1281167 / ${batch_size}` -#学习率 -learning_rate=0.495 - -#TF2.X独有,不需要修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False -autotune=False - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_full_1p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - precision_mode=`echo ${para#*=}` -elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/../ - -#python3 setup.py build_ext --inplace -pip3 install --editable . -#sed -i "s|pass|break|g" train.py -#sed -i "s|data/LibriSpeech|$data_path/LibriSpeech|g" config/libri/asr_example.yaml - -#修改epoch参数 - - -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - - #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 - #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` - #cpustep=`expr $cpucount / 8` - #echo "taskset c steps:" $cpustep - #let a=RANK_ID*$cpustep - #let b=RANK_ID+1 - #let c=b*$cpustep-1 - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - nohup python3 train.py $data_path/iwslt14.tokenized.de-en \ - --arch transformer \ - --optimizer adam \ - --adam-betas '(0.9, 0.98)' \ - --clip-norm 0.0 \ - --lr 0.00006 \ - --lr-scheduler inverse_sqrt \ - --warmup-updates 4000 \ - --device-id $ASCEND_DEVICE_ID \ - --weight-decay 0.0001 \ - --source-lang de \ - --target-lang en \ - --decoder-attention-heads 4 \ - --decoder-ffn-embed-dim 1024 \ - --encoder-attention-heads 4 \ - --encoder-ffn-embed-dim 1024 \ - --seed 12345 \ - --fp16 \ - --fp16-scale-window 1500 \ - --ddp-backend no_c10d \ - --disable-validation \ - --distributed-no-spawn \ - --max-tokens 15000 \ - --required-batch-size-multiple 32 \ - --max-epoch ${train_epochs} \ - --max-source-positions 1024 \ - --max-target-positions 1024 \ - --num-workers 1 \ - --log-interval 1 \ - --save-interval 1 \ - --share-decoder-input-output-embed > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -done -wait - -#conda deactivate -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -#Time=`grep "iteration" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "time =" '{print $2}'|awk -F "ms" '{print $1}'| grep -v "^$" |tail -n +6|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` -FPS=`grep -rn "wps=" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F "wps=" '{print $2}' | awk -F "," '{print $1}' | awk '{if(NR>=325){print}}' | awk 'END {print}' |sed s/[[:space:]]//g` -#FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${Time}'}'` - - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` -#打印,不需要修改 -#echo "Final Train Accuracy : ${train_accuracy}" -#echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据 -#吞吐量,不需要修改 -ActualFPS=${FPS} -#单迭代训练时长,不需要修改 -#TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` -TrainingTime=`grep "iteration" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "time =" '{print $2}'|awk -F "ms" '{print $1}'| grep -v "^$" |tail -n +6|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep "loss=" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "loss=" '{print $2}'|awk -F "," '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +1#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#export ASCEND_SLOG_PRINT_TO_STDOUT=1 +export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + +#进入到conda环境 +#source activate py8 + + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="FairSeq_Transformer_ID0496_for_PyTorch" +#训练epoch +train_epochs=4 +#训练batch_size +batch_size=32 +#训练step +#train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.495 + +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` +elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ + +#python3 setup.py build_ext --inplace +pip3 install --editable . +#sed -i "s|pass|break|g" train.py +#sed -i "s|data/LibriSpeech|$data_path/LibriSpeech|g" config/libri/asr_example.yaml + +#修改epoch参数 + + +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 + #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` + #cpustep=`expr $cpucount / 8` + #echo "taskset c steps:" $cpustep + #let a=RANK_ID*$cpustep + #let b=RANK_ID+1 + #let c=b*$cpustep-1 + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + nohup python3 train.py $data_path/iwslt14.tokenized.de-en \ + --arch transformer \ + --optimizer adam \ + --adam-betas '(0.9, 0.98)' \ + --clip-norm 0.0 \ + --lr 0.00006 \ + --lr-scheduler inverse_sqrt \ + --warmup-updates 4000 \ + --device-id $ASCEND_DEVICE_ID \ + --weight-decay 0.0001 \ + --source-lang de \ + --target-lang en \ + --decoder-attention-heads 4 \ + --decoder-ffn-embed-dim 1024 \ + --encoder-attention-heads 4 \ + --encoder-ffn-embed-dim 1024 \ + --seed 12345 \ + --fp16 \ + --fp16-scale-window 1500 \ + --ddp-backend no_c10d \ + --disable-validation \ + --distributed-no-spawn \ + --max-tokens 15000 \ + --required-batch-size-multiple 32 \ + --max-epoch ${train_epochs} \ + --max-source-positions 1024 \ + --max-target-positions 1024 \ + --num-workers 1 \ + --log-interval 1 \ + --save-interval 1 \ + --share-decoder-input-output-embed > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#conda deactivate +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +#Time=`grep "iteration" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "time =" '{print $2}'|awk -F "ms" '{print $1}'| grep -v "^$" |tail -n +6|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` +FPS=`grep -rn "wps=" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F "wps=" '{print $2}' | awk -F "," '{print $1}' | awk '{if(NR>=325){print}}' | awk 'END {print}' |sed s/[[:space:]]//g` +#FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${Time}'}'` + + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +#echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +#TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` +TrainingTime=`grep "iteration" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F "time =" '{print $2}'|awk -F "ms" '{print $1}'| grep -v "^$" |tail -n +6|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "loss=" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "loss=" '{print $2}'|awk -F "," '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/dev/nlp/Speech_Transformer_ID0487_for_PyTorch/modelzoo_level.txt b/PyTorch/dev/nlp/Speech_Transformer_ID0487_for_PyTorch/modelzoo_level.txt index 405b26618a0c92027927a9c583a4b47f640bcf7b..c45626e398eabe6022fe7b2e148f0ffce6400d6e 100644 --- a/PyTorch/dev/nlp/Speech_Transformer_ID0487_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/dev/nlp/Speech_Transformer_ID0487_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:POK +FuncStatus:OK +PerfStatus:POK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modelzoo_level.txt b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_16p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_16p.sh index b58ee068e0706e47caf7a7be758ce659aec75d07..7ef634d6ce6034ea76b1ffa1bcc2d0d11ff0c6d0 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_16p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_16p.sh @@ -1,174 +1,174 @@ -#!/bin/bash - -cur_path=`pwd` -#集合通信参数,不需要修改 -export RANK_SIZE=16 -export MASTER_PORT=29688 -# 数据集路径,保持为空,不需要修改 -data_path="" -conf_path="" -server_index="" -fix_node_ip="" - -#网络名称,同目录名称,需要模型审视修改 -Network="Transformer_ID0105_for_PyTorch" - -export BMMV2_ENABLE=1 -#训练epoch -train_epochs=30 -#训练batch_size,,需要模型审视修改 -batch_size=128 - - - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --epochs* ]];then - epochs=`echo ${para#*=}` - elif [[ $para == --conf_path* ]];then - conf_path=`echo ${para#*=}` - elif [[ $para == --server_index* ]];then - server_index=`echo ${para#*=}` - elif [[ $para == --fix_node_ip* ]];then - fix_node_ip=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` -linux_num=`find $conf_path -name "server_*.info" |wc -l` - -export HCCL_IF_IP=$fix_node_ip -export MASTER_ADDR=$one_node_ip - - -#创建DeviceID输出目录,不需要修改 -if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then - rm -rf $cur_path/output/$ASCEND_DEVICE_ID - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi - -#################启动训练脚本################# - - -# 必要参数替换配置文件 -cd $cur_path/.. -DATA_DIR=./data/dataset/wmt14_en_de_joined_dict/ -MODELDIR="./checkpoints/" -mkdir -p "$MODELDIR" -LOGFILE="$MODELDIR/log" -STAT_FILE="log.txt" - - -start_time=$(date +%s) -NPUS=($(seq 0 7)) -rank_server=`awk 'BEGIN{printf "%.0f\n",8*'${server_index}'}'` -export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'` -rank=0 -for i in ${NPUS[@]} -do - export NPU_CALCULATE_DEVICE=${i} - mkdir -p $cur_path/output/${i}/ - export ASCEND_DEVICE_ID=${i} - export RANK=`awk 'BEGIN{printf "%.0f\n",'${rank}'+'${rank_server}'}'` - echo run process ${rank} - - - python3 train_8p.py \ - $data_path \ - --arch transformer_wmt_en_de \ - --share-all-embeddings \ - --optimizer adam \ - --adam-beta1 0.9 \ - --adam-beta2 0.997 \ - --addr ${one_node_ip} \ - --port 29990 \ - --distributed-world-size ${NPU_WORLD_SIZE} \ - --adam-eps "1e-9" \ - --clip-norm 0.0 \ - --lr-scheduler inverse_sqrt \ - --warmup-init-lr 0.0 \ - --warmup-updates 4000 \ - --lr 0.0006 \ - --min-lr 0.0 \ - --dropout 0.1 \ - --weight-decay 0.0 \ - --criterion label_smoothed_cross_entropy \ - --label-smoothing 0.1 \ - --max-sentences 128\ - --max-tokens 102400 \ - --max-epoch $train_epochs \ - --seed 1 \ - --save-dir $MODELDIR \ - --stat-file $STAT_FILE\ - --log-interval 1\ - --amp\ - --device-id ${rank}\ - --amp-level O2 > $cur_path/output/${i}/train_${i}.log 2>&1 & - let rank++ -done -wait - - - - -##################获取训练数据################ -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -time=` grep -rns "Time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Time" '{print$2}' |awk -F "(" '{print$1}'|tail -n +5|awk '{sum+=$1} END {print"",16*128*NR/sum}'|sed s/[[:space:]]//g` -FPS=`python3 -c "print(${time}*96)"` - -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -rns "Validation" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk 'END {print $6}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -#获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要模型审视修改 -grep -rns "Time" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Loss" '{print$2}' |awk -F "(" '{print$1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +cur_path=`pwd` +#集合通信参数,不需要修改 +export RANK_SIZE=16 +export MASTER_PORT=29688 +# 数据集路径,保持为空,不需要修改 +data_path="" +conf_path="" +server_index="" +fix_node_ip="" + +#网络名称,同目录名称,需要模型审视修改 +Network="Transformer_ID0105_for_PyTorch" + +export BMMV2_ENABLE=1 +#训练epoch +train_epochs=30 +#训练batch_size,,需要模型审视修改 +batch_size=128 + + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --epochs* ]];then + epochs=`echo ${para#*=}` + elif [[ $para == --conf_path* ]];then + conf_path=`echo ${para#*=}` + elif [[ $para == --server_index* ]];then + server_index=`echo ${para#*=}` + elif [[ $para == --fix_node_ip* ]];then + fix_node_ip=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` +linux_num=`find $conf_path -name "server_*.info" |wc -l` + +export HCCL_IF_IP=$fix_node_ip +export MASTER_ADDR=$one_node_ip + + +#创建DeviceID输出目录,不需要修改 +if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then + rm -rf $cur_path/output/$ASCEND_DEVICE_ID + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# + + +# 必要参数替换配置文件 +cd $cur_path/.. +DATA_DIR=./data/dataset/wmt14_en_de_joined_dict/ +MODELDIR="./checkpoints/" +mkdir -p "$MODELDIR" +LOGFILE="$MODELDIR/log" +STAT_FILE="log.txt" + + +start_time=$(date +%s) +NPUS=($(seq 0 7)) +rank_server=`awk 'BEGIN{printf "%.0f\n",8*'${server_index}'}'` +export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'` +rank=0 +for i in ${NPUS[@]} +do + export NPU_CALCULATE_DEVICE=${i} + mkdir -p $cur_path/output/${i}/ + export ASCEND_DEVICE_ID=${i} + export RANK=`awk 'BEGIN{printf "%.0f\n",'${rank}'+'${rank_server}'}'` + echo run process ${rank} + + + python3 train_8p.py \ + $data_path \ + --arch transformer_wmt_en_de \ + --share-all-embeddings \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.997 \ + --addr ${one_node_ip} \ + --port 29990 \ + --distributed-world-size ${NPU_WORLD_SIZE} \ + --adam-eps "1e-9" \ + --clip-norm 0.0 \ + --lr-scheduler inverse_sqrt \ + --warmup-init-lr 0.0 \ + --warmup-updates 4000 \ + --lr 0.0006 \ + --min-lr 0.0 \ + --dropout 0.1 \ + --weight-decay 0.0 \ + --criterion label_smoothed_cross_entropy \ + --label-smoothing 0.1 \ + --max-sentences 128\ + --max-tokens 102400 \ + --max-epoch $train_epochs \ + --seed 1 \ + --save-dir $MODELDIR \ + --stat-file $STAT_FILE\ + --log-interval 1\ + --amp\ + --device-id ${rank}\ + --amp-level O2 > $cur_path/output/${i}/train_${i}.log 2>&1 & + let rank++ +done +wait + + + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +time=` grep -rns "Time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Time" '{print$2}' |awk -F "(" '{print$1}'|tail -n +5|awk '{sum+=$1} END {print"",16*128*NR/sum}'|sed s/[[:space:]]//g` +FPS=`python3 -c "print(${time}*96)"` + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -rns "Validation" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk 'END {print $6}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +#获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要模型审视修改 +grep -rns "Time" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Loss" '{print$2}' |awk -F "(" '{print$1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_1p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_1p.sh index be569813bfd793d973ea794bbf292a32258461a2..9b5488afcb8ceb93d65c97d28d60a969924d268d 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_1p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_1p.sh @@ -1,141 +1,141 @@ -#!/bin/bash - -cur_path=`pwd` -#集合通信参数,不需要修改 -export RANK_SIZE=1 - -export BMMV2_ENABLE=1 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#网络名称,同目录名称,需要模型审视修改 -Network="Transformer_ID0105_for_PyTorch" - -#训练batch_size,,需要模型审视修改 -batch_size=128 - -#训练epoch,不需要修改 -epochs=1 - - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --epochs* ]];then - epochs=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - - - - -#创建DeviceID输出目录,不需要修改 -if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then - rm -rf $cur_path/output/$ASCEND_DEVICE_ID - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi - -#################启动训练脚本################# - - -# 必要参数替换配置文件 -cd $cur_path/.. -DATA_DIR=./data/dataset/wmt14_en_de_joined_dict/ -MODELDIR="./checkpoints/" -mkdir -p "$MODELDIR" -LOGFILE="$MODELDIR/log" -STAT_FILE="log.txt" - - -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 -export PTCOPY_ENABLE=1 -export TASK_QUEUE_ENABLE=1 -export DYNAMIC_OP="ADD#MUL" -start_time=$(date +%s) -python3 -u train_1p.py \ - $data_path \ - --device-id ${ASCEND_DEVICE_ID}\ - --arch transformer_wmt_en_de \ - --share-all-embeddings \ - --optimizer adam \ - --adam-beta1 0.9 \ - --adam-beta2 0.997 \ - --adam-eps "1e-9" \ - --clip-norm 0.0 \ - --lr-scheduler inverse_sqrt \ - --warmup-init-lr 0.0 \ - --warmup-updates 4000 \ - --lr 0.0006 \ - --min-lr 0.0 \ - --dropout 0.1 \ - --weight-decay 0.0 \ - --criterion label_smoothed_cross_entropy \ - --label-smoothing 0.1 \ - --max-sentences 128\ - --max-tokens 102400\ - --seed 1 \ - --save-dir $MODELDIR \ - --save-interval 1\ - --update-freq 8\ - --log-interval 1\ - --stat-file $STAT_FILE\ - --distributed-world-size 1\ - --amp\ - --amp-level O2 > $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - -wait - -##################获取训练数据################ -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -time=` grep -rns "Time" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Time" '{print$2}' |awk -F "(" '{print$1}'|tail -n +5|awk '{sum+=$1} END {print"",128*NR/sum}'|sed s/[[:space:]]//g` -FPS=`python3 -c "print(${time}*96)"` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#打印,不需要修改 -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -#获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要模型审视修改 -grep -rns "Time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Loss" '{print$2}' |awk -F "(" '{print$1}' > ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +cur_path=`pwd` +#集合通信参数,不需要修改 +export RANK_SIZE=1 + +export BMMV2_ENABLE=1 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#网络名称,同目录名称,需要模型审视修改 +Network="Transformer_ID0105_for_PyTorch" + +#训练batch_size,,需要模型审视修改 +batch_size=128 + +#训练epoch,不需要修改 +epochs=1 + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --epochs* ]];then + epochs=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + + + +#创建DeviceID输出目录,不需要修改 +if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then + rm -rf $cur_path/output/$ASCEND_DEVICE_ID + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# + + +# 必要参数替换配置文件 +cd $cur_path/.. +DATA_DIR=./data/dataset/wmt14_en_de_joined_dict/ +MODELDIR="./checkpoints/" +mkdir -p "$MODELDIR" +LOGFILE="$MODELDIR/log" +STAT_FILE="log.txt" + + +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 +export PTCOPY_ENABLE=1 +export TASK_QUEUE_ENABLE=1 +export DYNAMIC_OP="ADD#MUL" +start_time=$(date +%s) +python3 -u train_1p.py \ + $data_path \ + --device-id ${ASCEND_DEVICE_ID}\ + --arch transformer_wmt_en_de \ + --share-all-embeddings \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.997 \ + --adam-eps "1e-9" \ + --clip-norm 0.0 \ + --lr-scheduler inverse_sqrt \ + --warmup-init-lr 0.0 \ + --warmup-updates 4000 \ + --lr 0.0006 \ + --min-lr 0.0 \ + --dropout 0.1 \ + --weight-decay 0.0 \ + --criterion label_smoothed_cross_entropy \ + --label-smoothing 0.1 \ + --max-sentences 128\ + --max-tokens 102400\ + --seed 1 \ + --save-dir $MODELDIR \ + --save-interval 1\ + --update-freq 8\ + --log-interval 1\ + --stat-file $STAT_FILE\ + --distributed-world-size 1\ + --amp\ + --amp-level O2 > $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +time=` grep -rns "Time" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Time" '{print$2}' |awk -F "(" '{print$1}'|tail -n +5|awk '{sum+=$1} END {print"",128*NR/sum}'|sed s/[[:space:]]//g` +FPS=`python3 -c "print(${time}*96)"` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +#获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要模型审视修改 +grep -rns "Time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Loss" '{print$2}' |awk -F "(" '{print$1}' > ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_8p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_8p.sh index 17208843fcb2540fb3a3eadd4db37b09d1e11945..c43698b0978e08bb9016126464e615632acfb17e 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_8p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_8p.sh @@ -1,159 +1,159 @@ -#!/bin/bash - -cur_path=`pwd` -#集合通信参数,不需要修改 -export RANK_SIZE=8 -export MASTER_ADDR=localhost -export MASTER_PORT=29688 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#网络名称,同目录名称,需要模型审视修改 -Network="Transformer_ID0105_for_PyTorch" - -export BMMV2_ENABLE=1 -#训练epoch -train_epochs=30 -#训练batch_size,,需要模型审视修改 -batch_size=128 - - - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --epochs* ]];then - epochs=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - - - - -#创建DeviceID输出目录,不需要修改 -if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then - rm -rf $cur_path/output/$ASCEND_DEVICE_ID - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi - -#################启动训练脚本################# - - -# 必要参数替换配置文件 -cd $cur_path/.. -DATA_DIR=./data/dataset/wmt14_en_de_joined_dict/ -MODELDIR="./checkpoints/" -mkdir -p "$MODELDIR" -LOGFILE="$MODELDIR/log" -STAT_FILE="log.txt" - - -start_time=$(date +%s) -NPUS=($(seq 0 7)) -export NPU_WORLD_SIZE=${#NPUS[@]} -rank=0 -for i in ${NPUS[@]} -do - export NPU_CALCULATE_DEVICE=${i} - mkdir -p $cur_path/output/${i}/ - export ASCEND_DEVICE_ID=${i} - export RANK=${rank} - echo run process ${rank} - - - python3 train_8p_new.py \ - $data_path \ - --arch transformer_wmt_en_de \ - --share-all-embeddings \ - --optimizer adam \ - --adam-beta1 0.9 \ - --adam-beta2 0.997 \ - --addr '127.0.0.1' \ - --adam-eps "1e-9" \ - --clip-norm 0.0 \ - --lr-scheduler inverse_sqrt \ - --warmup-init-lr 0.0 \ - --warmup-updates 4000 \ - --lr 0.0006 \ - --min-lr 0.0 \ - --dropout 0.1 \ - --weight-decay 0.0 \ - --criterion label_smoothed_cross_entropy \ - --label-smoothing 0.1 \ - --max-sentences 128\ - --max-tokens 102400 \ - --max-epoch $train_epochs \ - --seed 1 \ - --save-dir $MODELDIR \ - --stat-file $STAT_FILE\ - --log-interval 1\ - --amp\ - --device-id ${rank}\ - --amp-level O2 > $cur_path/output/${i}/train_${i}.log 2>&1 & - let rank++ -done -wait - - - - -##################获取训练数据################ -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -time=`grep -rns "Time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Time" '{print$2}' |awk -F "(" '{print$1}'|tail -n +5|awk '{sum+=$1} END {print"",8*128*NR/sum}'|sed s/[[:space:]]//g` -FPS=`python3 -c "print(${time}*96)"` -#输出训练精度,需要模型审视修改 -train_accuracy=`grep -rns "Validation" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk 'END {print $6}'` - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' - -#获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'$FPS'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要模型审视修改 -grep -rns "Time" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Loss" '{print$2}' |awk -F "(" '{print$1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +cur_path=`pwd` +#集合通信参数,不需要修改 +export RANK_SIZE=8 +export MASTER_ADDR=localhost +export MASTER_PORT=29688 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#网络名称,同目录名称,需要模型审视修改 +Network="Transformer_ID0105_for_PyTorch" + +export BMMV2_ENABLE=1 +#训练epoch +train_epochs=30 +#训练batch_size,,需要模型审视修改 +batch_size=128 + + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --epochs* ]];then + epochs=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + + + +#创建DeviceID输出目录,不需要修改 +if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then + rm -rf $cur_path/output/$ASCEND_DEVICE_ID + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# + + +# 必要参数替换配置文件 +cd $cur_path/.. +DATA_DIR=./data/dataset/wmt14_en_de_joined_dict/ +MODELDIR="./checkpoints/" +mkdir -p "$MODELDIR" +LOGFILE="$MODELDIR/log" +STAT_FILE="log.txt" + + +start_time=$(date +%s) +NPUS=($(seq 0 7)) +export NPU_WORLD_SIZE=${#NPUS[@]} +rank=0 +for i in ${NPUS[@]} +do + export NPU_CALCULATE_DEVICE=${i} + mkdir -p $cur_path/output/${i}/ + export ASCEND_DEVICE_ID=${i} + export RANK=${rank} + echo run process ${rank} + + + python3 train_8p_new.py \ + $data_path \ + --arch transformer_wmt_en_de \ + --share-all-embeddings \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.997 \ + --addr '127.0.0.1' \ + --adam-eps "1e-9" \ + --clip-norm 0.0 \ + --lr-scheduler inverse_sqrt \ + --warmup-init-lr 0.0 \ + --warmup-updates 4000 \ + --lr 0.0006 \ + --min-lr 0.0 \ + --dropout 0.1 \ + --weight-decay 0.0 \ + --criterion label_smoothed_cross_entropy \ + --label-smoothing 0.1 \ + --max-sentences 128\ + --max-tokens 102400 \ + --max-epoch $train_epochs \ + --seed 1 \ + --save-dir $MODELDIR \ + --stat-file $STAT_FILE\ + --log-interval 1\ + --amp\ + --device-id ${rank}\ + --amp-level O2 > $cur_path/output/${i}/train_${i}.log 2>&1 & + let rank++ +done +wait + + + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +time=`grep -rns "Time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Time" '{print$2}' |awk -F "(" '{print$1}'|tail -n +5|awk '{sum+=$1} END {print"",8*128*NR/sum}'|sed s/[[:space:]]//g` +FPS=`python3 -c "print(${time}*96)"` +#输出训练精度,需要模型审视修改 +train_accuracy=`grep -rns "Validation" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk 'END {print $6}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +#获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'$FPS'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要模型审视修改 +grep -rns "Time" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Loss" '{print$2}' |awk -F "(" '{print$1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_16p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_16p.sh index b358516d854ffd002b0b879011cb8823713d365d..7467af4dbf575f8329b496cc781efc28df5bc967 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_16p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_16p.sh @@ -1,174 +1,174 @@ -#!/bin/bash - -cur_path=`pwd` -#集合通信参数,不需要修改 -export RANK_SIZE=16 -export MASTER_PORT=29688 -export HCCL_WHITELIST_DISABLE=1 -export BMMV2_ENABLE=1 -# 数据集路径,保持为空,不需要修改 -data_path="" -conf_path="" -server_index="" -fix_node_ip="" - -#网络名称,同目录名称,需要模型审视修改 -Network="Transformer_ID0105_for_PyTorch" - -#训练batch_size,,需要模型审视修改 -batch_size=128 - - - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --epochs* ]];then - epochs=`echo ${para#*=}` - elif [[ $para == --conf_path* ]];then - conf_path=`echo ${para#*=}` - elif [[ $para == --server_index* ]];then - server_index=`echo ${para#*=}` - elif [[ $para == --fix_node_ip* ]];then - fix_node_ip=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` -linux_num=`find $conf_path -name "server_*.info" |wc -l` - -export HCCL_IF_IP=$fix_node_ip -export MASTER_ADDR=$one_node_ip - - -#创建DeviceID输出目录,不需要修改 -if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then - rm -rf $cur_path/output/$ASCEND_DEVICE_ID - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi - -#################启动训练脚本################# - - -# 必要参数替换配置文件 -cd $cur_path/.. -DATA_DIR=./data/dataset/wmt14_en_de_joined_dict/ -MODELDIR="./checkpoints/" -mkdir -p "$MODELDIR" -LOGFILE="$MODELDIR/log" -STAT_FILE="log.txt" - -sed -i "s|if i>100:pass|if i>100:break|g" train_8p.py -sed -i "s|if m >=2:pass|if m >=2:break|g" train_8p.py - -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -export ASCEND_GLOBAL_LOG_LEVEL=3 -export PTCOPY_ENABLE=1 -export TASK_QUEUE_ENABLE=1 -export DYNAMIC_OP="ADD#MUL" -start_time=$(date +%s) -NPUS=($(seq 0 7)) -rank_server=`awk 'BEGIN{printf "%.0f\n",8*'${server_index}'}'` -export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'` -rank=0 -for i in ${NPUS[@]} -do - export NPU_CALCULATE_DEVICE=${i} - mkdir -p $cur_path/output/${i}/ - export ASCEND_DEVICE_ID=${i} - export RANK=`awk 'BEGIN{printf "%.0f\n",'${rank}'+'${rank_server}'}'` - echo run process ${rank} - - - python3 train_8p.py \ - $data_path \ - --arch transformer_wmt_en_de \ - --share-all-embeddings \ - --optimizer adam \ - --adam-beta1 0.9 \ - --distributed-world-size ${NPU_WORLD_SIZE} \ - --adam-beta2 0.997 \ - --addr ${one_node_ip} \ - --port 29990 \ - --adam-eps "1e-9" \ - --clip-norm 0.0 \ - --lr-scheduler inverse_sqrt \ - --warmup-init-lr 0.0 \ - --warmup-updates 4000 \ - --lr 0.0006 \ - --min-lr 0.0 \ - --dropout 0.1 \ - --weight-decay 0.0 \ - --criterion label_smoothed_cross_entropy \ - --label-smoothing 0.1 \ - --max-sentences 128\ - --max-tokens 102400 \ - --seed 1 \ - --save-dir $MODELDIR \ - --stat-file $STAT_FILE\ - --log-interval 1\ - --amp\ - --device-id ${rank}\ - --amp-level O2 > $cur_path/output/${i}/train_${i}.log 2>&1 & - let rank++ -done -wait -sed -i "s|if i>100:break|if i>100:pass|g" train_8p.py -sed -i "s|if m >=2:break|if m >=2:pass|g" train_8p.py - - - -##################获取训练数据################ -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -time=` grep -rns "Time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Time" '{print$2}' |awk -F "(" '{print$1}'|tail -n +5|awk '{sum+=$1} END {print"",16*128*NR/sum}'|sed s/[[:space:]]//g` -FPS=`python3 -c "print(${time}*96)"` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#打印,不需要修改 -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -#获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要模型审视修改 -grep -rns "Time" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Loss" '{print$2}' |awk -F "(" '{print$1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +cur_path=`pwd` +#集合通信参数,不需要修改 +export RANK_SIZE=16 +export MASTER_PORT=29688 +export HCCL_WHITELIST_DISABLE=1 +export BMMV2_ENABLE=1 +# 数据集路径,保持为空,不需要修改 +data_path="" +conf_path="" +server_index="" +fix_node_ip="" + +#网络名称,同目录名称,需要模型审视修改 +Network="Transformer_ID0105_for_PyTorch" + +#训练batch_size,,需要模型审视修改 +batch_size=128 + + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --epochs* ]];then + epochs=`echo ${para#*=}` + elif [[ $para == --conf_path* ]];then + conf_path=`echo ${para#*=}` + elif [[ $para == --server_index* ]];then + server_index=`echo ${para#*=}` + elif [[ $para == --fix_node_ip* ]];then + fix_node_ip=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` +linux_num=`find $conf_path -name "server_*.info" |wc -l` + +export HCCL_IF_IP=$fix_node_ip +export MASTER_ADDR=$one_node_ip + + +#创建DeviceID输出目录,不需要修改 +if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then + rm -rf $cur_path/output/$ASCEND_DEVICE_ID + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# + + +# 必要参数替换配置文件 +cd $cur_path/.. +DATA_DIR=./data/dataset/wmt14_en_de_joined_dict/ +MODELDIR="./checkpoints/" +mkdir -p "$MODELDIR" +LOGFILE="$MODELDIR/log" +STAT_FILE="log.txt" + +sed -i "s|if i>100:pass|if i>100:break|g" train_8p.py +sed -i "s|if m >=2:pass|if m >=2:break|g" train_8p.py + +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +export ASCEND_GLOBAL_LOG_LEVEL=3 +export PTCOPY_ENABLE=1 +export TASK_QUEUE_ENABLE=1 +export DYNAMIC_OP="ADD#MUL" +start_time=$(date +%s) +NPUS=($(seq 0 7)) +rank_server=`awk 'BEGIN{printf "%.0f\n",8*'${server_index}'}'` +export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'` +rank=0 +for i in ${NPUS[@]} +do + export NPU_CALCULATE_DEVICE=${i} + mkdir -p $cur_path/output/${i}/ + export ASCEND_DEVICE_ID=${i} + export RANK=`awk 'BEGIN{printf "%.0f\n",'${rank}'+'${rank_server}'}'` + echo run process ${rank} + + + python3 train_8p.py \ + $data_path \ + --arch transformer_wmt_en_de \ + --share-all-embeddings \ + --optimizer adam \ + --adam-beta1 0.9 \ + --distributed-world-size ${NPU_WORLD_SIZE} \ + --adam-beta2 0.997 \ + --addr ${one_node_ip} \ + --port 29990 \ + --adam-eps "1e-9" \ + --clip-norm 0.0 \ + --lr-scheduler inverse_sqrt \ + --warmup-init-lr 0.0 \ + --warmup-updates 4000 \ + --lr 0.0006 \ + --min-lr 0.0 \ + --dropout 0.1 \ + --weight-decay 0.0 \ + --criterion label_smoothed_cross_entropy \ + --label-smoothing 0.1 \ + --max-sentences 128\ + --max-tokens 102400 \ + --seed 1 \ + --save-dir $MODELDIR \ + --stat-file $STAT_FILE\ + --log-interval 1\ + --amp\ + --device-id ${rank}\ + --amp-level O2 > $cur_path/output/${i}/train_${i}.log 2>&1 & + let rank++ +done +wait +sed -i "s|if i>100:break|if i>100:pass|g" train_8p.py +sed -i "s|if m >=2:break|if m >=2:pass|g" train_8p.py + + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +time=` grep -rns "Time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Time" '{print$2}' |awk -F "(" '{print$1}'|tail -n +5|awk '{sum+=$1} END {print"",16*128*NR/sum}'|sed s/[[:space:]]//g` +FPS=`python3 -c "print(${time}*96)"` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +#获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要模型审视修改 +grep -rns "Time" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Loss" '{print$2}' |awk -F "(" '{print$1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh index ddb273a2f15d05668339c84c397cf711063d9215..a30e490ba70b132eb06eacd819b950d393f71972 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_1p.sh @@ -1,146 +1,146 @@ -#!/bin/bash - -cur_path=`pwd` -#集合通信参数,不需要修改 -export RANK_SIZE=1 - -export BMMV2_ENABLE=1 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#网络名称,同目录名称,需要模型审视修改 -Network="Transformer_ID0105_for_PyTorch" - -#训练batch_size,,需要模型审视修改 -batch_size=128 - -#训练epoch,不需要修改 -epochs=1 - - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --epochs* ]];then - epochs=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - - - - -#创建DeviceID输出目录,不需要修改 -if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then - rm -rf $cur_path/output/$ASCEND_DEVICE_ID - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi - -#################启动训练脚本################# - - -# 必要参数替换配置文件 -cd $cur_path/.. -DATA_DIR=./data/dataset/wmt14_en_de_joined_dict/ -MODELDIR="./checkpoints/" -mkdir -p "$MODELDIR" -LOGFILE="$MODELDIR/log" -STAT_FILE="log.txt" -sed -i "s|if i>100:pass|if i>100:break|g" train_1p.py -sed -i "s|if m >=2:pass|if m >=2:break|g" train_1p.py - -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 -export PTCOPY_ENABLE=1 -export TASK_QUEUE_ENABLE=1 -#export DYNAMIC_OP="ADD#MUL" -export COMBINED_ENABLE=1 -export SCALAR_TO_HOST_MEM=1 -start_time=$(date +%s) - -python3 -u train_1p.py \ - $data_path \ - --device-id ${ASCEND_DEVICE_ID}\ - --arch transformer_wmt_en_de \ - --share-all-embeddings \ - --optimizer adam \ - --adam-beta1 0.9 \ - --adam-beta2 0.997 \ - --adam-eps "1e-9" \ - --clip-norm 0.0 \ - --lr-scheduler inverse_sqrt \ - --warmup-init-lr 0.0 \ - --warmup-updates 4000 \ - --lr 0.0006 \ - --min-lr 0.0 \ - --dropout 0.1 \ - --weight-decay 0.0 \ - --criterion cross_entropy \ - --label-smoothing 0.1 \ - --max-sentences 128\ - --max-tokens 102400\ - --seed 1 \ - --save-dir $MODELDIR \ - --save-interval 1\ - --update-freq 8\ - --log-interval 1\ - --stat-file $STAT_FILE\ - --distributed-world-size 1\ - --amp\ - --amp-level O2 > $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - -wait -sed -i "s|if i>100:break|if i>100:pass|g" train_1p.py -sed -i "s|if m >=2:break|if m>=2:pass|g" train_1p.py -##################获取训练数据################ -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -time=` grep -rns "Time" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Time" '{print$2}' |awk -F "(" '{print$1}'|tail -n +5|awk '{sum+=$1} END {print"",128*NR/sum}'|sed s/[[:space:]]//g` -FPS=`python3 -c "print(${time}*96)"` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#打印,不需要修改 -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -#获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要模型审视修改 -grep -rns "Time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Loss" '{print$2}' |awk -F "(" '{print$1}' > ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +cur_path=`pwd` +#集合通信参数,不需要修改 +export RANK_SIZE=1 + +export BMMV2_ENABLE=1 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#网络名称,同目录名称,需要模型审视修改 +Network="Transformer_ID0105_for_PyTorch" + +#训练batch_size,,需要模型审视修改 +batch_size=128 + +#训练epoch,不需要修改 +epochs=1 + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --epochs* ]];then + epochs=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + + + +#创建DeviceID输出目录,不需要修改 +if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then + rm -rf $cur_path/output/$ASCEND_DEVICE_ID + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# + + +# 必要参数替换配置文件 +cd $cur_path/.. +DATA_DIR=./data/dataset/wmt14_en_de_joined_dict/ +MODELDIR="./checkpoints/" +mkdir -p "$MODELDIR" +LOGFILE="$MODELDIR/log" +STAT_FILE="log.txt" +sed -i "s|if i>100:pass|if i>100:break|g" train_1p.py +sed -i "s|if m >=2:pass|if m >=2:break|g" train_1p.py + +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 +export PTCOPY_ENABLE=1 +export TASK_QUEUE_ENABLE=1 +#export DYNAMIC_OP="ADD#MUL" +export COMBINED_ENABLE=1 +export SCALAR_TO_HOST_MEM=1 +start_time=$(date +%s) + +python3 -u train_1p.py \ + $data_path \ + --device-id ${ASCEND_DEVICE_ID}\ + --arch transformer_wmt_en_de \ + --share-all-embeddings \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.997 \ + --adam-eps "1e-9" \ + --clip-norm 0.0 \ + --lr-scheduler inverse_sqrt \ + --warmup-init-lr 0.0 \ + --warmup-updates 4000 \ + --lr 0.0006 \ + --min-lr 0.0 \ + --dropout 0.1 \ + --weight-decay 0.0 \ + --criterion cross_entropy \ + --label-smoothing 0.1 \ + --max-sentences 128\ + --max-tokens 102400\ + --seed 1 \ + --save-dir $MODELDIR \ + --save-interval 1\ + --update-freq 8\ + --log-interval 1\ + --stat-file $STAT_FILE\ + --distributed-world-size 1\ + --amp\ + --amp-level O2 > $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait +sed -i "s|if i>100:break|if i>100:pass|g" train_1p.py +sed -i "s|if m >=2:break|if m>=2:pass|g" train_1p.py +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +time=` grep -rns "Time" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Time" '{print$2}' |awk -F "(" '{print$1}'|tail -n +5|awk '{sum+=$1} END {print"",128*NR/sum}'|sed s/[[:space:]]//g` +FPS=`python3 -c "print(${time}*96)"` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +#获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要模型审视修改 +grep -rns "Time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Loss" '{print$2}' |awk -F "(" '{print$1}' > ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_8p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_8p.sh index 71a10edeead5c15b9abe30d1cd7d3fdcb83147b7..5b8466426591ae7149b25c76d040f4d6bbc1965a 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_8p.sh @@ -1,161 +1,161 @@ -#!/bin/bash - -cur_path=`pwd` -nmon -s3 -c 500 -f -m $cur_path -#集合通信参数,不需要修改 -export RANK_SIZE=8 -export MASTER_ADDR=localhost -export MASTER_PORT=29688 -export HCCL_WHITELIST_DISABLE=1 -export BMMV2_ENABLE=1 -# 数据集路径,保持为空,不需要修改 -data_path="" - -#网络名称,同目录名称,需要模型审视修改 -Network="Transformer_ID0105_for_PyTorch" - -#训练batch_size,,需要模型审视修改 -batch_size=128 - - - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --epochs* ]];then - epochs=`echo ${para#*=}` - fi -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - - - - -#创建DeviceID输出目录,不需要修改 -if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then - rm -rf $cur_path/output/$ASCEND_DEVICE_ID - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -else - mkdir -p $cur_path/output/$ASCEND_DEVICE_ID -fi - -#################启动训练脚本################# - - -# 必要参数替换配置文件 -cd $cur_path/.. -DATA_DIR=./data/dataset/wmt14_en_de_joined_dict/ -MODELDIR="./checkpoints/" -mkdir -p "$MODELDIR" -LOGFILE="$MODELDIR/log" -STAT_FILE="log.txt" - -sed -i "s|if i>100:pass|if i>100:break|g" train_8p_new.py -sed -i "s|if m >=2:pass|if m >=2:break|g" train_8p_new.py - -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 -export PTCOPY_ENABLE=1 -export TASK_QUEUE_ENABLE=1 -export DYNAMIC_OP="ADD#MUL" -start_time=$(date +%s) -NPUS=($(seq 0 7)) -export NPU_WORLD_SIZE=${#NPUS[@]} -rank=0 -for i in ${NPUS[@]} -do - export NPU_CALCULATE_DEVICE=${i} - mkdir -p $cur_path/output/${i}/ - export ASCEND_DEVICE_ID=${i} - export RANK=${rank} - echo run process ${rank} - - - python3 train_8p_new.py \ - $data_path \ - --arch transformer_wmt_en_de \ - --share-all-embeddings \ - --optimizer adam \ - --adam-beta1 0.9 \ - --adam-beta2 0.997 \ - --addr '127.0.0.1' \ - --port 29990 \ - --adam-eps "1e-9" \ - --clip-norm 0.0 \ - --lr-scheduler inverse_sqrt \ - --warmup-init-lr 0.0 \ - --warmup-updates 4000 \ - --lr 0.0006 \ - --min-lr 0.0 \ - --dropout 0.1 \ - --weight-decay 0.0 \ - --criterion label_smoothed_cross_entropy \ - --label-smoothing 0.1 \ - --max-sentences 128\ - --max-tokens 102400 \ - --seed 1 \ - --save-dir $MODELDIR \ - --stat-file $STAT_FILE\ - --log-interval 1\ - --amp\ - --device-id ${rank}\ - --amp-level O2 > $cur_path/output/${i}/train_${i}.log 2>&1 & - let rank++ -done -wait -sed -i "s|if i>100:break|if i>100:pass|g" train_8p_new.py -sed -i "s|if m >=2:break|if m >=2:pass|g" train_8p_new.py - - - -##################获取训练数据################ -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -time=` grep -rns "Time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Time" '{print$2}' |awk -F "(" '{print$1}'|tail -n +5|awk '{sum+=$1} END {print"",8*128*NR/sum}'|sed s/[[:space:]]//g` -FPS=`python3 -c "print(${time}*96)"` -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#打印,不需要修改 -echo "E2E Training Duration sec : $e2e_time" - -#性能看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -#获取性能数据,不需要修改 -#吞吐量 -ActualFPS=${FPS} -#单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要模型审视修改 -grep -rns "Time" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Loss" '{print$2}' |awk -F "(" '{print$1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +cur_path=`pwd` +nmon -s3 -c 500 -f -m $cur_path +#集合通信参数,不需要修改 +export RANK_SIZE=8 +export MASTER_ADDR=localhost +export MASTER_PORT=29688 +export HCCL_WHITELIST_DISABLE=1 +export BMMV2_ENABLE=1 +# 数据集路径,保持为空,不需要修改 +data_path="" + +#网络名称,同目录名称,需要模型审视修改 +Network="Transformer_ID0105_for_PyTorch" + +#训练batch_size,,需要模型审视修改 +batch_size=128 + + + +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --epochs* ]];then + epochs=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + + + +#创建DeviceID输出目录,不需要修改 +if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then + rm -rf $cur_path/output/$ASCEND_DEVICE_ID + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +else + mkdir -p $cur_path/output/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# + + +# 必要参数替换配置文件 +cd $cur_path/.. +DATA_DIR=./data/dataset/wmt14_en_de_joined_dict/ +MODELDIR="./checkpoints/" +mkdir -p "$MODELDIR" +LOGFILE="$MODELDIR/log" +STAT_FILE="log.txt" + +sed -i "s|if i>100:pass|if i>100:break|g" train_8p_new.py +sed -i "s|if m >=2:pass|if m >=2:break|g" train_8p_new.py + +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +export ASCEND_GLOBAL_LOG_LEVEL_ETP=3 +export PTCOPY_ENABLE=1 +export TASK_QUEUE_ENABLE=1 +export DYNAMIC_OP="ADD#MUL" +start_time=$(date +%s) +NPUS=($(seq 0 7)) +export NPU_WORLD_SIZE=${#NPUS[@]} +rank=0 +for i in ${NPUS[@]} +do + export NPU_CALCULATE_DEVICE=${i} + mkdir -p $cur_path/output/${i}/ + export ASCEND_DEVICE_ID=${i} + export RANK=${rank} + echo run process ${rank} + + + python3 train_8p_new.py \ + $data_path \ + --arch transformer_wmt_en_de \ + --share-all-embeddings \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.997 \ + --addr '127.0.0.1' \ + --port 29990 \ + --adam-eps "1e-9" \ + --clip-norm 0.0 \ + --lr-scheduler inverse_sqrt \ + --warmup-init-lr 0.0 \ + --warmup-updates 4000 \ + --lr 0.0006 \ + --min-lr 0.0 \ + --dropout 0.1 \ + --weight-decay 0.0 \ + --criterion label_smoothed_cross_entropy \ + --label-smoothing 0.1 \ + --max-sentences 128\ + --max-tokens 102400 \ + --seed 1 \ + --save-dir $MODELDIR \ + --stat-file $STAT_FILE\ + --log-interval 1\ + --amp\ + --device-id ${rank}\ + --amp-level O2 > $cur_path/output/${i}/train_${i}.log 2>&1 & + let rank++ +done +wait +sed -i "s|if i>100:break|if i>100:pass|g" train_8p_new.py +sed -i "s|if m >=2:break|if m >=2:pass|g" train_8p_new.py + + + +##################获取训练数据################ +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +time=` grep -rns "Time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Time" '{print$2}' |awk -F "(" '{print$1}'|tail -n +5|awk '{sum+=$1} END {print"",8*128*NR/sum}'|sed s/[[:space:]]//g` +FPS=`python3 -c "print(${time}*96)"` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +#获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要模型审视修改 +grep -rns "Time" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Loss" '{print$2}' |awk -F "(" '{print$1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_8p_new.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_8p_new.py index 0179a825cfad84454d00ed41788b4f50e8bb39de..514d8c8326da0a8b4b06f8459aae70719f45e2c1 100644 --- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_8p_new.py +++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_8p_new.py @@ -1,417 +1,417 @@ -#!/usr/bin/env python3 -u -# Copyright (c) 2017-present, Facebook, Inc. -# All rights reserved. -# Copyright 2020 Huawei Technologies Co., Ltd -# -# This source code is licensed under the license found in the LICENSE file in -# the root directory of this source tree. An additional grant of patent rights -# can be found in the PATENTS file in the same directory. -# -# ------------------------------------------------------------------------- -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import collections -import itertools -import os -import math -import torch -import torch.npu -import time -import ctypes - -import sys -import threading - -from copy import deepcopy -from utils import distributed_utils, options, utils -from utils.ddp_trainer import DDPTrainer -from utils.meters import StopwatchMeter, TimeMeter -import data -from data import tokenizer, dictionary, data_utils, load_dataset_splits -from models import build_model -import torch.distributed as dist -import torch.multiprocessing as mp -import numpy as np - -import dllogger as DLLogger -from utils.log_helper import AggregatorBackend, setup_logger - - -NPU_CALCULATE_DEVICE = 0 -if os.getenv('NPU_CALCULATE_DEVICE') and str.isdigit(os.getenv('NPU_CALCULATE_DEVICE')): - NPU_CALCULATE_DEVICE = int(os.getenv('NPU_CALCULATE_DEVICE')) -if torch.npu.current_device() != NPU_CALCULATE_DEVICE: - torch.npu.set_device(f'npu:{NPU_CALCULATE_DEVICE}') -NPU_WORLD_SIZE = int(os.getenv('NPU_WORLD_SIZE')) -RANK = int(os.getenv('RANK')) -torch.distributed.init_process_group('hccl', rank=RANK, world_size=NPU_WORLD_SIZE) -MAX = 2147483647 - -def _gen_seeds(shape): - return np.random.uniform(1, MAX, size=shape).astype(np.float32) -seed_shape = (32 * 1024 * 12, ) - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self, name, fmt=':f'): - self.name = name - self.fmt = fmt - self.reset() - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - - -class ProgressMeter(object): - def __init__(self, num_batches, meters, prefix=""): - self.batch_fmtstr = self._get_batch_fmtstr(num_batches) - self.meters = meters - self.prefix = prefix - - def display(self, batch): - entries = [self.prefix + self.batch_fmtstr.format(batch)] - entries += [str(meter) for meter in self.meters] - print('\t'.join(entries)) - - def _get_batch_fmtstr(self, num_batches): - num_digits = len(str(num_batches // 1)) - fmt = '{:' + str(num_digits) + 'd}' - return '[' + fmt + '/' + fmt.format(num_batches) + ']' - - -def main(): - parser = options.get_training_parser() - args = options.parse_args_and_arch(parser) - print(args) - os.environ['MASTER_ADDR'] = args.addr - os.environ['MASTER_PORT'] = args.port - #mp.spawn(main_worker, nprocs=args.distributed_world_size, args=(args.distributed_world_size, args)) - main_worker(pid_idx=RANK, args=args) - - - -def main_worker(pid_idx, args): - setup_logger(args) - print('pid_idx:',str(pid_idx)) - args.distributed_rank = pid_idx - args.device_id = args.distributed_rank - #dist.init_process_group(backend=args.dist_backend, world_size=NPU_WORLD_SIZE, rank=args.distributed_rank) - loc = 'npu:{}'.format(args.device_id) - torch.npu.set_device(loc) - - if args.max_tokens is None: - args.max_tokens = 6000 - - torch.manual_seed(args.seed) - - src_dict, tgt_dict = data_utils.load_dictionaries(args) - add_extra_items_to_checkpoint({'src_dict': src_dict, 'tgt_dict': tgt_dict}) - datasets = load_dataset_splits(args, ['train', 'valid', 'test'], src_dict, tgt_dict) - - seed = _gen_seeds(seed_shape) - seed = torch.from_numpy(seed) - seed = seed.to(loc) - model = build_model(args, seed=seed) - if args.distributed_world_size > 1 : - print('| num. model params: {}'.format(sum(p.numel() for p in model.parameters()))) - - # Build trainer - trainer = DDPTrainer(args, model) - if args.distributed_world_size > 1 : - print('| model {}, criterion {}'.format(args.arch, trainer.criterion.__class__.__name__)) - print('| training on {} NPUs'.format(args.distributed_world_size)) - - if args.distributed_world_size > 1 : - print('| max sentences per NPU = {}'.format(args.max_sentences)) - - epoch_itr = data.EpochBatchIterator( - dataset=datasets[args.train_subset], - max_tokens=args.max_tokens, - max_sentences=args.max_sentences_valid, - max_positions=args.max_positions, - ignore_invalid_inputs=True, - required_batch_size_multiple=8, - seed=args.seed, - num_shards=args.distributed_world_size, - shard_id=args.distributed_rank, - max_positions_num=96, - - ) - # Load the latest checkpoint if one is available - load_checkpoint(args, trainer, epoch_itr) - - # Train until the learning rate gets too small or model reaches target score - max_epoch = args.max_epoch or math.inf - max_update = args.max_update or math.inf - lr = trainer.get_lr() - train_meter = StopwatchMeter() - train_meter.start() - valid_losses = [None] - valid_subsets = args.valid_subset.split(',') - run_summary = {'loss': float('inf'), - 'val_loss': float('inf'), - 'speed': 0, - 'accuracy': 0} - - # max_update - m=0 - while lr >= args.min_lr and epoch_itr.epoch < max_epoch and trainer.get_num_updates() < max_update: - m=m+1 - if m >=2:pass - DLLogger.log(step=trainer.get_num_updates(), data={'epoch': epoch_itr.epoch}, verbosity=0) - # train for one epoch - train(args, trainer, datasets, epoch_itr) - - if epoch_itr.epoch % args.validate_interval == 0: - valid_losses = validate(args, trainer, datasets, valid_subsets) - DLLogger.log(step=trainer.get_num_updates(), data={'val_loss': valid_losses[0]}, - verbosity=1) - - - if valid_losses[0] < run_summary['val_loss']: - run_summary['val_loss'] = valid_losses[0] - run_summary['loss'] = valid_losses[0] - run_summary['speed'] = trainer.throughput_meter.u_avg - - # Only use first validation loss to update the learning rate - lr = trainer.lr_step(epoch_itr.epoch, valid_losses[0]) - - # Save checkpoint - if epoch_itr.epoch % args.save_interval == 0: - save_checkpoint(args, trainer, epoch_itr, valid_losses[0]) - - train_meter.stop() - DLLogger.log(step=[], data=run_summary, verbosity=0) - DLLogger.log(step='RUN', data={'walltime': train_meter.sum}, verbosity=0) - if args.distributed_world_size > 1 : - print('| done training in {:.1f} seconds'.format(train_meter.sum)) - - -def train(args, trainer, datasets, epoch_itr): - """Train the model for one epoch.""" - - itr = epoch_itr.next_epoch_itr() - - # update parameters every N batches - if epoch_itr.epoch <= len(args.update_freq): - update_freq = args.update_freq[epoch_itr.epoch - 1] - else: - update_freq = args.update_freq[-1] - - num_batches = len(epoch_itr) - - batch_time = AverageMeter('Time', ':6.3f') - sentence_s = AverageMeter('Sentence/s', ':6.3f') - losses = AverageMeter('Loss', ':.4f') - progress = ProgressMeter(int(num_batches/args.distributed_world_size/update_freq), - [batch_time, sentence_s,losses], - prefix = "Epoch: [{}]".format(epoch_itr.epoch)) - - first_valid = args.valid_subset.split(',')[0] - max_update = args.max_update or math.inf - - - # reset meters - DLLogger.flush() - trainer.get_throughput_meter().reset() - - for i, sample in enumerate(itr): - if i>100:pass - if i < num_batches - 1 and (i + 1) % update_freq > 0: - # buffer updates according to --update-freq - loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) - continue - else: - loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) - if loss != None: - losses.update(loss) - - if i >= 4: - t = time.time() - batch_time.update((t - end)/update_freq) - sentence_s.update(args.max_sentences/(t-end)*args.distributed_world_size) - end = time.time() - if i < 4: - end = time.time() - if i >= 4: - if args.distributed_world_size > 1 : - progress.display(int((i+1)/update_freq)) - - - # ignore the first mini-batch in words-per-second calculation - if i == 0: - trainer.get_throughput_meter().reset() - for backend in DLLogger.GLOBAL_LOGGER.backends: - if isinstance(backend, AggregatorBackend): - backend._reset_perf_meter('tokens') - backend._reset_perf_meter('updates') - break - - # Mid epoch checkpoint - num_updates = trainer.get_num_updates() - if args.distributed_world_size > 1 : - if args.save_interval_updates > 0 and num_updates % args.save_interval_updates == 0: - valid_losses = validate(args, trainer, datasets, [first_valid]) - save_checkpoint(args, trainer, epoch_itr, valid_losses[0]) - - if (i + 1) % args.log_interval == 0: - DLLogger.flush() - - if num_updates >= max_update: - break - - if args.distributed_world_size > 1 : - if batch_time.avg > 0: - print("End of epoch, batch_size:", args.max_sentences, 'Time: {:.3f}'.format(batch_time.avg), - ' Sentence/s@all {:.3f}'.format( - args.max_sentences / batch_time.avg * args.distributed_world_size)) - - # Print epoch stats and reset training meters - if args.distributed_world_size > 1 : - DLLogger.log(step=trainer.get_num_updates(), data={'speed': trainer.get_throughput_meter().avg}, verbosity=0) - DLLogger.flush() - - -def validate(args, trainer, datasets, subsets): - """Evaluate the model on the validation set(s) and return the losses.""" - # Reset value iterations counter - trainer._num_val_iterations = 0 - - valid_losses = [] - for subset in subsets: - - if len(subsets) > 1: - print('Validating on \'{}\' subset'.format(subset)) - - # Initialize data iterator - itr = data.EpochBatchIterator( - dataset=datasets[subset], - max_tokens=args.max_tokens, - max_sentences=args.max_sentences_valid, - max_positions=args.max_positions, - ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, - required_batch_size_multiple=8, - seed=args.seed, - num_shards=args.distributed_world_size, - shard_id=args.distributed_rank, - max_positions_num=1024, - ).next_epoch_itr(shuffle=False) - - # reset validation loss meters - if args.distributed_world_size > 1 : - DLLogger.flush() - - subset_losses = [] - for sample in itr: - loss = trainer.valid_step(sample) - subset_losses.append(loss) - subset_loss = sum(subset_losses) / len(subset_losses) - - DLLogger.flush() - - valid_losses.append(subset_loss) - if args.distributed_world_size > 1 : - print(f'Validation loss on subset {subset}: {subset_loss}') - - return valid_losses - - - -def save_checkpoint(args, trainer, epoch_itr, val_loss): - if args.no_save or not distributed_utils.is_master(args): - return - epoch = epoch_itr.epoch - end_of_epoch = epoch_itr.end_of_epoch() - updates = trainer.get_num_updates() - - checkpoint_conds = collections.OrderedDict() - checkpoint_conds['checkpoint{}.pt'.format(epoch)] = ( - end_of_epoch and not args.no_epoch_checkpoints and - epoch % args.save_interval == 0 - ) - checkpoint_conds['checkpoint_{}_{}.pt'.format(epoch, updates)] = ( - not end_of_epoch and args.save_interval_updates > 0 and - updates % args.save_interval_updates == 0 - ) - checkpoint_conds['checkpoint_best.pt'] = ( - val_loss is not None and - (not hasattr(save_checkpoint, 'best') or val_loss < save_checkpoint.best) - ) - checkpoint_conds['checkpoint_last.pt'] = True # keep this last so that it's a symlink - - prev_best = getattr(save_checkpoint, 'best', val_loss) - if val_loss is not None: - save_checkpoint.best = min(val_loss, prev_best) - extra_state = { - 'best': save_checkpoint.best, - 'train_iterator': epoch_itr.state_dict(), - 'val_loss': val_loss, - } - extra_state.update(save_checkpoint.extra_items) - - checkpoints = [os.path.join(args.save_dir, 'checkpoints', fn) for fn, cond in checkpoint_conds.items() if cond] - if len(checkpoints) > 0: - for cp in checkpoints: - trainer.save_checkpoint(cp, extra_state) - - if not end_of_epoch and args.keep_interval_updates > 0: - # remove old checkpoints; checkpoints are sorted in descending order - checkpoints = utils.checkpoint_paths(os.path.join(args.save_dir, 'checkpoints'), - pattern=r'checkpoint_\d+_(\d+)\.pt') - for old_chk in checkpoints[args.keep_interval_updates:]: - os.remove(old_chk) - - -def add_extra_items_to_checkpoint(dict): - if not hasattr(save_checkpoint, 'extra_items'): - save_checkpoint.extra_items = {} - save_checkpoint.extra_items.update(dict) - - -def load_checkpoint(args, trainer, epoch_itr): - """Load a checkpoint and replay dataloader to match.""" - os.makedirs(os.path.join(args.save_dir, 'checkpoints'), exist_ok=True) - checkpoint_path = os.path.join(args.save_dir, 'checkpoints', args.restore_file) - if os.path.isfile(checkpoint_path): - extra_state = trainer.load_checkpoint(checkpoint_path) - if extra_state is not None: - # replay train iterator to match checkpoint - epoch_itr.load_state_dict(extra_state['train_iterator']) - if args.distributed_world_size > 1 : - print('| loaded checkpoint {} (epoch {} @ {} updates)'.format( - checkpoint_path, epoch_itr.epoch, trainer.get_num_updates())) - - trainer.lr_step(epoch_itr.epoch) - trainer.lr_step_update(trainer.get_num_updates()) - if 'best' in extra_state: - save_checkpoint.best = extra_state['best'] - - -if __name__ == '__main__': - main() +#!/usr/bin/env python3 -u +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# Copyright 2020 Huawei Technologies Co., Ltd +# +# This source code is licensed under the license found in the LICENSE file in +# the root directory of this source tree. An additional grant of patent rights +# can be found in the PATENTS file in the same directory. +# +# ------------------------------------------------------------------------- +# +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import collections +import itertools +import os +import math +import torch +import torch.npu +import time +import ctypes + +import sys +import threading + +from copy import deepcopy +from utils import distributed_utils, options, utils +from utils.ddp_trainer import DDPTrainer +from utils.meters import StopwatchMeter, TimeMeter +import data +from data import tokenizer, dictionary, data_utils, load_dataset_splits +from models import build_model +import torch.distributed as dist +import torch.multiprocessing as mp +import numpy as np + +import dllogger as DLLogger +from utils.log_helper import AggregatorBackend, setup_logger + + +NPU_CALCULATE_DEVICE = 0 +if os.getenv('NPU_CALCULATE_DEVICE') and str.isdigit(os.getenv('NPU_CALCULATE_DEVICE')): + NPU_CALCULATE_DEVICE = int(os.getenv('NPU_CALCULATE_DEVICE')) +if torch.npu.current_device() != NPU_CALCULATE_DEVICE: + torch.npu.set_device(f'npu:{NPU_CALCULATE_DEVICE}') +NPU_WORLD_SIZE = int(os.getenv('NPU_WORLD_SIZE')) +RANK = int(os.getenv('RANK')) +torch.distributed.init_process_group('hccl', rank=RANK, world_size=NPU_WORLD_SIZE) +MAX = 2147483647 + +def _gen_seeds(shape): + return np.random.uniform(1, MAX, size=shape).astype(np.float32) +seed_shape = (32 * 1024 * 12, ) + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f'): + self.name = name + self.fmt = fmt + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + + +class ProgressMeter(object): + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + + +def main(): + parser = options.get_training_parser() + args = options.parse_args_and_arch(parser) + print(args) + os.environ['MASTER_ADDR'] = args.addr + os.environ['MASTER_PORT'] = args.port + #mp.spawn(main_worker, nprocs=args.distributed_world_size, args=(args.distributed_world_size, args)) + main_worker(pid_idx=RANK, args=args) + + + +def main_worker(pid_idx, args): + setup_logger(args) + print('pid_idx:',str(pid_idx)) + args.distributed_rank = pid_idx + args.device_id = args.distributed_rank + #dist.init_process_group(backend=args.dist_backend, world_size=NPU_WORLD_SIZE, rank=args.distributed_rank) + loc = 'npu:{}'.format(args.device_id) + torch.npu.set_device(loc) + + if args.max_tokens is None: + args.max_tokens = 6000 + + torch.manual_seed(args.seed) + + src_dict, tgt_dict = data_utils.load_dictionaries(args) + add_extra_items_to_checkpoint({'src_dict': src_dict, 'tgt_dict': tgt_dict}) + datasets = load_dataset_splits(args, ['train', 'valid', 'test'], src_dict, tgt_dict) + + seed = _gen_seeds(seed_shape) + seed = torch.from_numpy(seed) + seed = seed.to(loc) + model = build_model(args, seed=seed) + if args.distributed_world_size > 1 : + print('| num. model params: {}'.format(sum(p.numel() for p in model.parameters()))) + + # Build trainer + trainer = DDPTrainer(args, model) + if args.distributed_world_size > 1 : + print('| model {}, criterion {}'.format(args.arch, trainer.criterion.__class__.__name__)) + print('| training on {} NPUs'.format(args.distributed_world_size)) + + if args.distributed_world_size > 1 : + print('| max sentences per NPU = {}'.format(args.max_sentences)) + + epoch_itr = data.EpochBatchIterator( + dataset=datasets[args.train_subset], + max_tokens=args.max_tokens, + max_sentences=args.max_sentences_valid, + max_positions=args.max_positions, + ignore_invalid_inputs=True, + required_batch_size_multiple=8, + seed=args.seed, + num_shards=args.distributed_world_size, + shard_id=args.distributed_rank, + max_positions_num=96, + + ) + # Load the latest checkpoint if one is available + load_checkpoint(args, trainer, epoch_itr) + + # Train until the learning rate gets too small or model reaches target score + max_epoch = args.max_epoch or math.inf + max_update = args.max_update or math.inf + lr = trainer.get_lr() + train_meter = StopwatchMeter() + train_meter.start() + valid_losses = [None] + valid_subsets = args.valid_subset.split(',') + run_summary = {'loss': float('inf'), + 'val_loss': float('inf'), + 'speed': 0, + 'accuracy': 0} + + # max_update + m=0 + while lr >= args.min_lr and epoch_itr.epoch < max_epoch and trainer.get_num_updates() < max_update: + m=m+1 + if m >=2:pass + DLLogger.log(step=trainer.get_num_updates(), data={'epoch': epoch_itr.epoch}, verbosity=0) + # train for one epoch + train(args, trainer, datasets, epoch_itr) + + if epoch_itr.epoch % args.validate_interval == 0: + valid_losses = validate(args, trainer, datasets, valid_subsets) + DLLogger.log(step=trainer.get_num_updates(), data={'val_loss': valid_losses[0]}, + verbosity=1) + + + if valid_losses[0] < run_summary['val_loss']: + run_summary['val_loss'] = valid_losses[0] + run_summary['loss'] = valid_losses[0] + run_summary['speed'] = trainer.throughput_meter.u_avg + + # Only use first validation loss to update the learning rate + lr = trainer.lr_step(epoch_itr.epoch, valid_losses[0]) + + # Save checkpoint + if epoch_itr.epoch % args.save_interval == 0: + save_checkpoint(args, trainer, epoch_itr, valid_losses[0]) + + train_meter.stop() + DLLogger.log(step=[], data=run_summary, verbosity=0) + DLLogger.log(step='RUN', data={'walltime': train_meter.sum}, verbosity=0) + if args.distributed_world_size > 1 : + print('| done training in {:.1f} seconds'.format(train_meter.sum)) + + +def train(args, trainer, datasets, epoch_itr): + """Train the model for one epoch.""" + + itr = epoch_itr.next_epoch_itr() + + # update parameters every N batches + if epoch_itr.epoch <= len(args.update_freq): + update_freq = args.update_freq[epoch_itr.epoch - 1] + else: + update_freq = args.update_freq[-1] + + num_batches = len(epoch_itr) + + batch_time = AverageMeter('Time', ':6.3f') + sentence_s = AverageMeter('Sentence/s', ':6.3f') + losses = AverageMeter('Loss', ':.4f') + progress = ProgressMeter(int(num_batches/args.distributed_world_size/update_freq), + [batch_time, sentence_s,losses], + prefix = "Epoch: [{}]".format(epoch_itr.epoch)) + + first_valid = args.valid_subset.split(',')[0] + max_update = args.max_update or math.inf + + + # reset meters + DLLogger.flush() + trainer.get_throughput_meter().reset() + + for i, sample in enumerate(itr): + if i>100:pass + if i < num_batches - 1 and (i + 1) % update_freq > 0: + # buffer updates according to --update-freq + loss = trainer.train_step(sample, update_params=False, last_step=(i == len(itr) - 1)) + continue + else: + loss = trainer.train_step(sample, update_params=True, last_step=(i == len(itr) - 1)) + if loss != None: + losses.update(loss) + + if i >= 4: + t = time.time() + batch_time.update((t - end)/update_freq) + sentence_s.update(args.max_sentences/(t-end)*args.distributed_world_size) + end = time.time() + if i < 4: + end = time.time() + if i >= 4: + if args.distributed_world_size > 1 : + progress.display(int((i+1)/update_freq)) + + + # ignore the first mini-batch in words-per-second calculation + if i == 0: + trainer.get_throughput_meter().reset() + for backend in DLLogger.GLOBAL_LOGGER.backends: + if isinstance(backend, AggregatorBackend): + backend._reset_perf_meter('tokens') + backend._reset_perf_meter('updates') + break + + # Mid epoch checkpoint + num_updates = trainer.get_num_updates() + if args.distributed_world_size > 1 : + if args.save_interval_updates > 0 and num_updates % args.save_interval_updates == 0: + valid_losses = validate(args, trainer, datasets, [first_valid]) + save_checkpoint(args, trainer, epoch_itr, valid_losses[0]) + + if (i + 1) % args.log_interval == 0: + DLLogger.flush() + + if num_updates >= max_update: + break + + if args.distributed_world_size > 1 : + if batch_time.avg > 0: + print("End of epoch, batch_size:", args.max_sentences, 'Time: {:.3f}'.format(batch_time.avg), + ' Sentence/s@all {:.3f}'.format( + args.max_sentences / batch_time.avg * args.distributed_world_size)) + + # Print epoch stats and reset training meters + if args.distributed_world_size > 1 : + DLLogger.log(step=trainer.get_num_updates(), data={'speed': trainer.get_throughput_meter().avg}, verbosity=0) + DLLogger.flush() + + +def validate(args, trainer, datasets, subsets): + """Evaluate the model on the validation set(s) and return the losses.""" + # Reset value iterations counter + trainer._num_val_iterations = 0 + + valid_losses = [] + for subset in subsets: + + if len(subsets) > 1: + print('Validating on \'{}\' subset'.format(subset)) + + # Initialize data iterator + itr = data.EpochBatchIterator( + dataset=datasets[subset], + max_tokens=args.max_tokens, + max_sentences=args.max_sentences_valid, + max_positions=args.max_positions, + ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=8, + seed=args.seed, + num_shards=args.distributed_world_size, + shard_id=args.distributed_rank, + max_positions_num=1024, + ).next_epoch_itr(shuffle=False) + + # reset validation loss meters + if args.distributed_world_size > 1 : + DLLogger.flush() + + subset_losses = [] + for sample in itr: + loss = trainer.valid_step(sample) + subset_losses.append(loss) + subset_loss = sum(subset_losses) / len(subset_losses) + + DLLogger.flush() + + valid_losses.append(subset_loss) + if args.distributed_world_size > 1 : + print(f'Validation loss on subset {subset}: {subset_loss}') + + return valid_losses + + + +def save_checkpoint(args, trainer, epoch_itr, val_loss): + if args.no_save or not distributed_utils.is_master(args): + return + epoch = epoch_itr.epoch + end_of_epoch = epoch_itr.end_of_epoch() + updates = trainer.get_num_updates() + + checkpoint_conds = collections.OrderedDict() + checkpoint_conds['checkpoint{}.pt'.format(epoch)] = ( + end_of_epoch and not args.no_epoch_checkpoints and + epoch % args.save_interval == 0 + ) + checkpoint_conds['checkpoint_{}_{}.pt'.format(epoch, updates)] = ( + not end_of_epoch and args.save_interval_updates > 0 and + updates % args.save_interval_updates == 0 + ) + checkpoint_conds['checkpoint_best.pt'] = ( + val_loss is not None and + (not hasattr(save_checkpoint, 'best') or val_loss < save_checkpoint.best) + ) + checkpoint_conds['checkpoint_last.pt'] = True # keep this last so that it's a symlink + + prev_best = getattr(save_checkpoint, 'best', val_loss) + if val_loss is not None: + save_checkpoint.best = min(val_loss, prev_best) + extra_state = { + 'best': save_checkpoint.best, + 'train_iterator': epoch_itr.state_dict(), + 'val_loss': val_loss, + } + extra_state.update(save_checkpoint.extra_items) + + checkpoints = [os.path.join(args.save_dir, 'checkpoints', fn) for fn, cond in checkpoint_conds.items() if cond] + if len(checkpoints) > 0: + for cp in checkpoints: + trainer.save_checkpoint(cp, extra_state) + + if not end_of_epoch and args.keep_interval_updates > 0: + # remove old checkpoints; checkpoints are sorted in descending order + checkpoints = utils.checkpoint_paths(os.path.join(args.save_dir, 'checkpoints'), + pattern=r'checkpoint_\d+_(\d+)\.pt') + for old_chk in checkpoints[args.keep_interval_updates:]: + os.remove(old_chk) + + +def add_extra_items_to_checkpoint(dict): + if not hasattr(save_checkpoint, 'extra_items'): + save_checkpoint.extra_items = {} + save_checkpoint.extra_items.update(dict) + + +def load_checkpoint(args, trainer, epoch_itr): + """Load a checkpoint and replay dataloader to match.""" + os.makedirs(os.path.join(args.save_dir, 'checkpoints'), exist_ok=True) + checkpoint_path = os.path.join(args.save_dir, 'checkpoints', args.restore_file) + if os.path.isfile(checkpoint_path): + extra_state = trainer.load_checkpoint(checkpoint_path) + if extra_state is not None: + # replay train iterator to match checkpoint + epoch_itr.load_state_dict(extra_state['train_iterator']) + if args.distributed_world_size > 1 : + print('| loaded checkpoint {} (epoch {} @ {} updates)'.format( + checkpoint_path, epoch_itr.epoch, trainer.get_num_updates())) + + trainer.lr_step(epoch_itr.epoch) + trainer.lr_step_update(trainer.get_num_updates()) + if 'best' in extra_state: + save_checkpoint.best = extra_state['best'] + + +if __name__ == '__main__': + main() diff --git a/PyTorch/dev/others/RecVAE_ID0347_for_PyTorch/modelzoo_level.txt b/PyTorch/dev/others/RecVAE_ID0347_for_PyTorch/modelzoo_level.txt index 0b49b4fb26c2694a86567bea1b462e7dcb03cc31..31529da2e68f25b61e2a3e698a07537281443c03 100644 --- a/PyTorch/dev/others/RecVAE_ID0347_for_PyTorch/modelzoo_level.txt +++ b/PyTorch/dev/others/RecVAE_ID0347_for_PyTorch/modelzoo_level.txt @@ -1,3 +1,3 @@ -FuncStatus:OK -PerfStatus:OK +FuncStatus:OK +PerfStatus:OK PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/test/train_ID3080_Widedeep_performance_1p.sh b/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/test/train_ID3080_Widedeep_performance_1p.sh index 101b98fcdab28181baa84450308bea55d86ad094..27648b40740aca6c8f159c1b9c88b3b5ee2e4514 100644 --- a/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/test/train_ID3080_Widedeep_performance_1p.sh +++ b/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/test/train_ID3080_Widedeep_performance_1p.sh @@ -1,188 +1,188 @@ -#!/bin/bash - -#当前路径,不需要修改 -cur_path=`pwd` -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID -#集合通信参数,不需要修改 - -export RANK_SIZE=1 -export JOB_ID=10087 -RANK_ID_START=0 - -#进入到conda环境 - -#export PATH=/usr/local/python3.7.5/bin:/home/anaconda3/bin:$PATH -#export LD_LIBRARY_PATH=/home/anaconda3/lib:$LD_LIBRARY_PATH -#source activate py8 - -# 数据集路径,保持为空,不需要修改 -data_path="" - -#基础参数,需要模型审视修改 -#网络名称,同目录名称 -Network="Widedeep_ID3080_for_PyTorch" -#训练epoch -epoch=1 -#训练batch_size -train_batch_size=16 -batch_size=${train_batch_size} -#训练step -#train_steps=`expr 1281167 / ${batch_size}` -#学习率 -#learning_rate=0.495 -PREC="" -#TF2.X独有,不需要修改 -#export NPU_LOOP_SIZE=${train_steps} - -#维测参数,precision_mode需要模型审视修改 -precision_mode="allow_mix_precision" -#维持参数,以下不需要修改 -over_dump=False -data_dump_flag=False -data_dump_step="10" -profiling=False -autotune=False -data_path=./data/criteo_sampled_data.csv - -# 帮助信息,不需要修改 -if [[ $1 == --help || $1 == -h ]];then - echo"usage:./train_full_1p.sh " - echo " " - echo "parameter explain: - --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) - --over_dump if or not over detection, default is False - --data_dump_flag data dump flag, default is False - --data_dump_step data dump step, default is 10 - --profiling if or not profiling for performance debug, default is False - --data_path source data of training - -h/--help show help message - " - exit 1 -fi - -#参数校验,不需要修改 -for para in $* -do - if [[ $para == --precision_mode* ]];then - apex_opt_level=`echo ${para#*=}` - if [[ $apex_opt_level != "O1" ]] && [[ $apex_opt_level != "O2" ]] && [[ $apex_opt_level != "O3" ]]; then - echo "[ERROR] para \"precision_mode\" must be config O1 or O2 or O3" - exit 1 - fi - PREC="--apex --apex-opt-level "$apex_opt_level - -elif [[ $para == --over_dump* ]];then - over_dump=`echo ${para#*=}` - over_dump_path=${cur_path}/output/overflow_dump - mkdir -p ${over_dump_path} - elif [[ $para == --data_dump_flag* ]];then - data_dump_flag=`echo ${para#*=}` - data_dump_path=${cur_path}/output/data_dump - mkdir -p ${data_dump_path} - elif [[ $para == --data_dump_step* ]];then - data_dump_step=`echo ${para#*=}` - elif [[ $para == --profiling* ]];then - profiling=`echo ${para#*=}` - profiling_dump_path=${cur_path}/output/profiling - mkdir -p ${profiling_dump_path} - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --conda_name* ]];then - conda_name=`echo ${para#*=}` - source set_conda.sh - source activate $conda_name -fi - -done - -#校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" -# exit 1 -fi - -#训练开始时间,不需要修改 -start_time=$(date +%s) - -#进入训练脚本目录,需要模型审视修改 -cd $cur_path/../ -for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -do - #设置环境变量,不需要修改 - echo "Device ID: $ASCEND_DEVICE_ID" - export RANK_ID=$RANK_ID - - - - #创建DeviceID输出目录,不需要修改 - if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - else - mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt - fi - - #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 - #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` - #cpustep=`expr $cpucount / 8` - #echo "taskset c steps:" $cpustep - #let a=RANK_ID*$cpustep - #let b=RANK_ID+1 - #let c=b*$cpustep-1 - - #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 - python3 WideDeep/train.py --train_batch_size=${batch_size} --Epochs=${epoch} --graph_mode > $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -done -wait - -#conda deactivate -#训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -#结果打印,不需要修改 -echo "------------------ Final result ------------------" -#输出性能FPS,需要模型审视修改 -Time=`grep Time $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Time" '{print $2}' |tail -n +3 | awk '{sum+=$1} END {print"", sum/NR}'|sed s/[[:space:]]//g` -FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${Time}'}'` - - -#打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -#输出训练精度,需要模型审视修改 -#train_accuracy=`grep Loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk -F " " '{print $10}'` -#打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -#稳定性精度看护结果汇总 -#训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -##获取性能数据 -#吞吐量,不需要修改 -ActualFPS=${FPS} -#单迭代训练时长,不需要修改 -TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` - -#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Time $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Time" '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -#最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -#关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + +#进入到conda环境 + +#export PATH=/usr/local/python3.7.5/bin:/home/anaconda3/bin:$PATH +#export LD_LIBRARY_PATH=/home/anaconda3/lib:$LD_LIBRARY_PATH +#source activate py8 + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Widedeep_ID3080_for_PyTorch" +#训练epoch +epoch=1 +#训练batch_size +train_batch_size=16 +batch_size=${train_batch_size} +#训练step +#train_steps=`expr 1281167 / ${batch_size}` +#学习率 +#learning_rate=0.495 +PREC="" +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False +data_path=./data/criteo_sampled_data.csv + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + apex_opt_level=`echo ${para#*=}` + if [[ $apex_opt_level != "O1" ]] && [[ $apex_opt_level != "O2" ]] && [[ $apex_opt_level != "O3" ]]; then + echo "[ERROR] para \"precision_mode\" must be config O1 or O2 or O3" + exit 1 + fi + PREC="--apex --apex-opt-level "$apex_opt_level + +elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --conda_name* ]];then + conda_name=`echo ${para#*=}` + source set_conda.sh + source activate $conda_name +fi + +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" +# exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 + #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` + #cpustep=`expr $cpucount / 8` + #echo "taskset c steps:" $cpustep + #let a=RANK_ID*$cpustep + #let b=RANK_ID+1 + #let c=b*$cpustep-1 + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + python3 WideDeep/train.py --train_batch_size=${batch_size} --Epochs=${epoch} --graph_mode > $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#conda deactivate +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +Time=`grep Time $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Time" '{print $2}' |tail -n +3 | awk '{sum+=$1} END {print"", sum/NR}'|sed s/[[:space:]]//g` +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${Time}'}'` + + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep Loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk -F " " '{print $10}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Time $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Time" '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/README.CN.md b/README.CN.md index 059ea029af2f2f2b180cca984a55883a648a1ff3..d02f2d6226360c4016fc6fc38d0d795d0beb61b8 100644 --- a/README.CN.md +++ b/README.CN.md @@ -1,621 +1,621 @@ -# 欢迎使用Ascend ModelZoo - -为方便更多开发者使用Ascend ModelZoo,我们将持续增加典型网络和相关预训练模型。如果您有任何需求,请在[modelzoo/issues](https://gitee.com/ascend/modelzoo/issues)提交issue,我们会及时处理。 - -## 如何贡献 - -在开始贡献之前,请先阅读[CONTRIBUTING](https://gitee.com/ascend/modelzoo/blob/master/CONTRIBUTING.md)。 -谢谢! - -## 目录 - - -### PyTorch - -#### built-in - -- [3D_ResNet_ID0421_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/3D_ResNet_ID0421_for_PyTorch) -- [CRNN_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/CRNN_for_PyTorch) -- [DeepMar_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/DeepMar_for_PyTorch) -- [Densenet121_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch) -- [DenseNet161_ID0455_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/DenseNet161_ID0455_for_PyTorch) -- [DenseNet169_ID0454_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch) -- [DenseNet201_ID0453_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/DenseNet201_ID0453_for_PyTorch) -- [EfficientNet-B1_ID1713_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch) -- [EfficientNet-B2_ID1714_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/EfficientNet-B2_ID1714_for_PyTorch) -- [EfficientNet-B3_ID0450_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/EfficientNet-B3_ID0450_for_PyTorch) -- [EfficientNet-B4_ID1632_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/EfficientNet-B4_ID1632_for_PyTorch) -- [EfficientNet-B5_ID1633_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/EfficientNet-B5_ID1633_for_PyTorch) -- [EfficientNet_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch) -- [FaceNet_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch) -- [Gluon_ResNet50_v1b_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/Gluon_ResNet50_v1b_for_PyTorch) -- [Gluon_ResNet50_v1c_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/Gluon_ResNet50_v1c_for_PyTorch) -- [Gluon_ResNet50_v1d_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/Gluon_ResNet50_v1d_for_PyTorch) -- [Googlenet_ID0447_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/Googlenet_ID0447_for_PyTorch) -- [MobileNetV1_ID0094_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/MobileNetV1_ID0094_for_PyTorch) -- [MobileNetV2_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch) -- [MobileNetV3-Large_ID1784_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/MobileNetV3-Large_ID1784_for_PyTorch) -- [ResNest_ID0426_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/ResNest_ID0426_for_PyTorch) -- [ResNet50_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch) -- [Shufflenetv2_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/Shufflenetv2_for_PyTorch) -- [DAL_ID2732_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/DAL_ID2732_for_PyTorch) -- [DB_ID0706_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/DB_ID0706_for_PyTorch) -- [Faster_Mask_RCNN_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch) -- [PSENet_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/PSENet_for_PyTorch) -- [RetinaNet_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch) -- [RFCN_ID0418_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/RFCN_ID0418_for_PyTorch) -- [YoloV3_ID1790_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/YoloV3_ID1790_for_PyTorch) -- [YOLOV4_ID0396_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch) -- [Attention_R2U_Net_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch) -- [AttU_Net_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/semantic_segmentation/AttU_Net_for_PyTorch) -- [DeepLabv3+_ID1695_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/semantic_segmentation/DeepLabv3+_ID1695_for_PyTorch) -- [R2U_Net_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch) -- [Bert-Squad_ID0470_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch) -- [CPM_Finetune_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/CPM_Finetune_for_PyTorch) -- [FOTS_ID0338_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/FOTS_ID0338_for_PyTorch) -- [GRU_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/GRU_for_PyTorch) -- [LSTM_ID0468_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch) -- [mBART_ID2372_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch) -- [Transformer_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/Transformer_for_PyTorch) -- [XLM_ID0740_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch) -- [DCN_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/others/DCN_for_PyTorch) -- [DeepFM_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/others/DeepFM_for_PyTorch) -- [WDL_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/others/WDL_for_PyTorch) - -#### contrib - -- [baseline-rawnet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/audio/baseline-rawnet) -- [deepspeech](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/audio/deepspeech) -- [FastPitch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/audio/FastPitch) -- [speech-transformer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/audio/speech-transformer) -- [Tacotron2_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/audio/Tacotron2_for_PyTorch) -- [tdnn](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/audio/tdnn) -- [WaveGlow](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/audio/WaveGlow) -- [3d_attention_net](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/3d_attention_net) -- [AlexNet_ID2663_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/AlexNet_ID2663_for_PyTorch) -- [AlignedReID](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/AlignedReID) -- [csp_resnext50-mish](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/csp_resnext50-mish) -- [Deit_Small](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Deit_Small) -- [DnCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/DnCNN) -- [DPN-131_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/DPN-131_for_PyTorch) -- [Efficient-3DCNNs_ID1230_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch) -- [EfficientNet-B1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/EfficientNet-B1) -- [EfficientNet-B3](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/EfficientNet-B3) -- [EfficientNet-B5_ID1621_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch) -- [FixRes](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/FixRes) -- [GaitSet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/GaitSet) -- [GENet_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/GENet_for_Pytorch) -- [GhostNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/GhostNet) -- [GoogleNet_ID1623_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/GoogleNet_ID1623_for_PyTorch) -- [HRNet_ID1780_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/HRNet_ID1780_for_PyTorch) -- [InceptionResNetV2_ID1779_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/InceptionResNetV2_ID1779_for_PyTorch) -- [InceptionV3_ID1596_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/InceptionV3_ID1596_for_PyTorch) -- [InceptionV4_ID1778_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/InceptionV4_ID1778_for_PyTorch) -- [LResNet100E-IR](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/LResNet100E-IR) -- [MGN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/MGN) -- [MnasNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/MnasNet) -- [MobileNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/MobileNet) -- [MobileNetV3_large_100_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/MobileNetV3_large_100_for_PyTorch) -- [Moco-v2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Moco-v2) -- [NASNet-A-Mobile](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/NASNet-A-Mobile) -- [OSNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/OSNet) -- [PCB](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/PCB) -- [PnasNet5Large](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/PnasNet5Large) -- [PointNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/PointNet) -- [pointnetCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/pointnetCNN) -- [RegNetX](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/RegNetX) -- [RegNetY-1.6GF](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/RegNetY-1.6GF) -- [ReidStrongBaseline](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ReidStrongBaseline) -- [RepVGG](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/RepVGG) -- [Res2Net101_v1b](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Res2Net101_v1b) -- [ResNeSt50_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ResNeSt50_for_PyTorch) -- [ResNet101_ID1595_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ResNet101_ID1595_for_PyTorch) -- [ResNet152](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ResNet152) -- [ResNet18_ID1593_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ResNet18_ID1593_for_PyTorch) -- [ResNet34_ID1594_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ResNet34_ID1594_for_PyTorch) -- [ResNeXt-50-32x4d_ID1624_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ResNeXt-50-32x4d_ID1624_for_PyTorch) -- [ResNeXt101_32x8d_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch) -- [SE-ResNet-50](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/SE-ResNet-50) -- [SE-ResNext-101-32x4d](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/SE-ResNext-101-32x4d) -- [Se-ResNext-50-32x4d](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d) -- [SENet154](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/SENet154) -- [ShuffleNetV1_ID1625_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ShuffleNetV1_ID1625_for_PyTorch) -- [ShuffleNetV2Plus_ID1626_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ShuffleNetV2Plus_ID1626_for_PyTorch) -- [SkresNet50](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/SkresNet50) -- [SPNASNet_100_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/SPNASNet_100_for_PyTorch) -- [SqueezeNet1_1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/SqueezeNet1_1) -- [Swin-Transformer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Swin-Transformer) -- [TNT](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/TNT) -- [TResNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/TResNet) -- [Vehicle_Re-Identification](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Vehicle_Re-Identification) -- [Vgg16_ID1630_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch) -- [Vgg19_ID1631_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Vgg19_ID1631_for_PyTorch) -- [vit_base_patch32_224](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/vit_base_patch32_224) -- [Vit_small_patch16_224](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Vit_small_patch16_224) -- [VOLO](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/VOLO) -- [VoVNet39](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/VoVNet39) -- [WideResNet101_2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch) -- [WideResNet50_2_ID1627_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/WideResNet50_2_ID1627_for_PyTorch) -- [Xception_ID1777_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch) -- [xcit](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/xcit) -- [AdvancedEAST](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/AdvancedEAST) -- [CascadedMaskRCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/CascadedMaskRCNN) -- [Cascade_RCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/Cascade_RCNN) -- [CenterFace](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/CenterFace) -- [CenterNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/CenterNet) -- [DSFD](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/DSFD) -- [EfficientDetD0](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/EfficientDetD0) -- [FaceBoxes](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/FaceBoxes) -- [FairMOT](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/FairMOT) -- [FCOS](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/FCOS) -- [FOTS](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/FOTS) -- [FSAF_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/FSAF_for_Pytorch) -- [GFocalV2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/GFocalV2) -- [M2Det](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/M2Det) -- [NasFPN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/NasFPN) -- [Pointnetplus](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/Pointnetplus) -- [Pyramidbox](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/Pyramidbox) -- [RCF](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/RCF) -- [RefineDet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/RefineDet) -- [Retinaface](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/Retinaface) -- [RetinaNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/RetinaNet) -- [SimCLR_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch) -- [SOLOv1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/SOLOv1) -- [SOLOv2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/SOLOv2) -- [SSD](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/SSD) -- [SSD-MobileNetV1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/SSD-MobileNetV1) -- [SSD-MobilenetV2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/SSD-MobilenetV2) -- [SSD-Resnet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/SSD-Resnet) -- [StyleGAN2-ADA](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/StyleGAN2-ADA) -- [TextSnake](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/TextSnake) -- [YOLACT](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/YOLACT) -- [YOLACT_plus](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/YOLACT_plus) -- [YOLOR](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/YOLOR) -- [YOLOX](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/YOLOX) -- [ADNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/ADNet) -- [BigGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/BigGAN) -- [CGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/CGAN) -- [Cross-Scale-Non-Local-Attention](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/Cross-Scale-Non-Local-Attention) -- [CycleGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/CycleGAN) -- [DCGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/DCGAN) -- [edsr_x2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/edsr_x2) -- [GAN_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/GAN_Pytorch) -- [Pix2Pix](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/Pix2Pix) -- [RDN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/RDN) -- [Srcnn_x2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/Srcnn_x2_for_Pytorch) -- [SRFlow](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/SRFlow) -- [SRGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/SRGAN) -- [stargan](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/stargan) -- [wdsr](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/wdsr) -- [3Dmppe_RootNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet) -- [AlphaPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/AlphaPose) -- [DeepPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/DeepPose) -- [HigherHRNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/HigherHRNet) -- [Hourglass_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch) -- [Lightweight_OpenPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose) -- [MSPN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/MSPN) -- [ST-GCN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/ST-GCN) -- [TransPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/TransPose) -- [VoxelPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/VoxelPose) -- [3DUNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/3DUNet) -- [DeeplabV3_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch) -- [ENet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/ENet) -- [ErfNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/ErfNet) -- [FastSCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/FastSCNN) -- [FCN8s](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/FCN8s) -- [HRnet-OCR](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/HRnet-OCR) -- [HRNet_SEG_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/HRNet_SEG_for_Pytorch) -- [ICNet_ID1781_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/ICNet_ID1781_for_PyTorch) -- [IntraDA](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/IntraDA) -- [PointRend](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/PointRend) -- [PraNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/PraNet) -- [PSPNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/PSPNet) -- [RCAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/RCAN) -- [RefineNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/RefineNet) -- [SETR](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/SETR) -- [Ultra-Fast-Lane-Detection](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/Ultra-Fast-Lane-Detection) -- [UNet++](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/UNet++) -- [UNet_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/UNet_for_PyTorch) -- [VNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/VNet) -- [Wseg](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/Wseg) -- [BMN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/BMN) -- [BSN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/BSN) -- [C3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/C3D) -- [GloRe](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/GloRe) -- [I3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/I3D) -- [NonLocal](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/NonLocal) -- [R(2+1)D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/R(2+1)D) -- [SiamFC](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/SiamFC) -- [SiamRPN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/SiamRPN) -- [SlowFast](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/SlowFast) -- [TSM](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/TSM) -- [TSN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/TSN) -- [VideoPose3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/VideoPose3D) -- [X3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/X3D) -- [albert](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/nlp/albert) -- [Bertsum_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/nlp/Bertsum_for_PyTorch) -- [roberta](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/nlp/roberta) -- [TextCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/nlp/TextCNN) -- [tinybert](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/nlp/tinybert) -- [C51](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/others/C51) -- [DLRM](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/others/DLRM) -- [DQN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/others/DQN) -- [RotatE](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/others/RotatE) - -#### dev - -- [tacotron2_ID0406_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/audio/tacotron2_ID0406_for_PyTorch) -- [3D_attentionnet_ID0478_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/detection/3D_attentionnet_ID0478_for_PyTorch) -- [FasterRCNN-Resnet50-FPN_ID1552_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/detection/FasterRCNN-Resnet50-FPN_ID1552_for_PyTorch) -- [HRNet_ID0446_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/detection/HRNet_ID0446_for_PyTorch) -- [PointNet_ID0430_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/detection/PointNet_ID0430_for_PyTorch) -- [SSD-ResNet34_ID0411_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/detection/SSD-ResNet34_ID0411_for_PyTorch) -- [2D_Unet_ID0624_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch) -- [2S-AGCN_ID0909_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/2S-AGCN_ID0909_for_PyTorch) -- [3D_Nest_Unet_ID0476_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/3D_Nest_Unet_ID0476_for_PyTorch) -- [ADACOS_ID1082_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ADACOS_ID1082_for_PyTorch) -- [AdaFM-Net_ID1101_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/AdaFM-Net_ID1101_for_PyTorch) -- [ADLayer_ID1087_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ADLayer_ID1087_for_PyTorch) -- [AdvancedEast_ID0473_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/AdvancedEast_ID0473_for_PyTorch) -- [AlexNet_ID0472_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/AlexNet_ID0472_for_PyTorch) -- [ANN_ID2370_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ANN_ID2370_for_PyTorch) -- [ArcFace_ID0852_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ArcFace_ID0852_for_PyTorch) -- [ATS_ID2682_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ATS_ID2682_for_PyTorch) -- [AUTOAUGMENT_ID0792_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/AUTOAUGMENT_ID0792_for_PyTorch) -- [BASNET_ID1134_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/BASNET_ID1134_for_PyTorch) -- [BertBase_ID0490_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch) -- [CDAR_ID2747_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/CDAR_ID2747_for_PyTorch) -- [ConvLSTM_ID1772_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ConvLSTM_ID1772_for_PyTorch) -- [coral-cnn_ID1064_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/coral-cnn_ID1064_for_PyTorch) -- [CrossFormer_ID2449_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/CrossFormer_ID2449_for_PyTorch) -- [CycleGAN_ID0521_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/CycleGAN_ID0521_for_PyTorch) -- [DBPN_ID2917_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/DBPN_ID2917_for_PyTorch) -- [DCAP_ID2836_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/DCAP_ID2836_for_PyTorch) -- [DEEP-HEAD-POSE_ID0796_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/DEEP-HEAD-POSE_ID0796_for_PyTorch) -- [DeepLab-CRF_ID1873_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/DeepLab-CRF_ID1873_for_PyTorch) -- [DeepSort_ID0654_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch) -- [deit_ID2467_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/deit_ID2467_for_PyTorch) -- [DGMS_ID2460_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/DGMS_ID2460_for_PyTorch) -- [EfficientNet-B6_ID1715_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/EfficientNet-B6_ID1715_for_PyTorch) -- [EfficientNet-B7_ID1716_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/EfficientNet-B7_ID1716_for_PyTorch) -- [ESPCN_ID2919_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ESPCN_ID2919_for_PyTorch) -- [ESPCN_ID3002_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ESPCN_ID3002_for_PyTorch) -- [ESRGAN_ID1813_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ESRGAN_ID1813_for_PyTorch) -- [FasterRCNN_ID0100_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/FasterRCNN_ID0100_for_PyTorch) -- [FFDNet_ID0970_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/FFDNet_ID0970_for_PyTorch) -- [FixMatch_ID0989_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/FixMatch_ID0989_for_PyTorch) -- [GENet_ID0671_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/GENet_ID0671_for_PyTorch) -- [GhostNet_ID1622_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/GhostNet_ID1622_for_PyTorch) -- [Hourglass_ID1809_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Hourglass_ID1809_for_PyTorch) -- [ICT_ID1179_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ICT_ID1179_for_PyTorch) -- [InceptionV1_ID1568_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/InceptionV1_ID1568_for_PyTorch) -- [InceptionV2_ID0698_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/InceptionV2_ID0698_for_PyTorch) -- [InceptionV3_ID0445_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch) -- [InceptionV4_ID0444_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/InceptionV4_ID0444_for_PyTorch) -- [InceptionV4_ID2473_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/InceptionV4_ID2473_for_PyTorch) -- [Keyword-MLP_ID2441_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Keyword-MLP_ID2441_for_PyTorch) -- [LADE_ID2445_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/LADE_ID2445_for_PyTorch) -- [MaskRCNN_ID0101_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/MaskRCNN_ID0101_for_PyTorch) -- [mBART_ID1550_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/mBART_ID1550_for_PyTorch) -- [MMAL-NET_ID1116_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/MMAL-NET_ID1116_for_PyTorch) -- [MMOE_ID2865_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/MMOE_ID2865_for_PyTorch) -- [Mnasnet0_75_ID0439_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Mnasnet0_75_ID0439_for_PyTorch) -- [Mnasnet1_0_ID0438_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Mnasnet1_0_ID0438_for_PyTorch) -- [Mnasnet1_3_ID0437_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Mnasnet1_3_ID0437_for_PyTorch) -- [mobilenetv2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/mobilenetv2_for_Pytorch) -- [MobileNetV3-Small_ID1785_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/MobileNetV3-Small_ID1785_for_PyTorch) -- [MSPN_ID0960_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/MSPN_ID0960_for_PyTorch) -- [MTCNN_ID0435_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/MTCNN_ID0435_for_PyTorch) -- [Mutual-Channel-Loss_ID1113_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Mutual-Channel-Loss_ID1113_for_PyTorch) -- [NeuMF_ID0351_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/NeuMF_ID0351_for_PyTorch) -- [PASSRnet_ID0986_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/PASSRnet_ID0986_for_PyTorch) -- [pFedMe_ID1597_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/pFedMe_ID1597_for_PyTorch) -- [PFF_ID1128_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/PFF_ID1128_for_PyTorch) -- [PiT_ID2671_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/PiT_ID2671_for_PyTorch) -- [Pix2Pix_ID0331_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Pix2Pix_ID0331_for_PyTorch) -- [POOLNET_ID0875_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/POOLNET_ID0875_for_PyTorch) -- [Pysot_ID0428_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Pysot_ID0428_for_PyTorch) -- [RAFT_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/RAFT_for_PyTorch) -- [RANet_ID0994_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/RANet_ID0994_for_PyTorch) -- [RES2NET_ID0824_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/RES2NET_ID0824_for_PyTorch) -- [residual_adapters_ID1598_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/residual_adapters_ID1598_for_PyTorch) -- [Resnet101_ID0425_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Resnet101_ID0425_for_PyTorch) -- [ResNet152_ID0424_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch) -- [Resnet152_ID1592_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Resnet152_ID1592_for_PyTorch) -- [Resnet18_ID0423_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Resnet18_ID0423_for_PyTorch) -- [Resnet34_ID0422_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Resnet34_ID0422_for_PyTorch) -- [Resnext101_32x8d_ID0420_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Resnext101_32x8d_ID0420_for_PyTorch) -- [ResNeXt101_ID1717_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ResNeXt101_ID1717_for_PyTorch) -- [ResNeXt50_ID0419_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ResNeXt50_ID0419_for_PyTorch) -- [RRN_ID1182_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/RRN_ID1182_for_PyTorch) -- [RUC_ID2470_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/RUC_ID2470_for_PyTorch) -- [SEResNext_ID0415_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SEResNext_ID0415_for_PyTorch) -- [SG2IM_ID0786_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SG2IM_ID0786_for_PyTorch) -- [SimplePose_ID1038_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SimplePose_ID1038_for_PyTorch) -- [SINGLESHOTPOSE_ID0869_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SINGLESHOTPOSE_ID0869_for_PyTorch) -- [SlowFast_ID0646_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SlowFast_ID0646_for_PyTorch) -- [SmartSketch_ID1046_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SmartSketch_ID1046_for_PyTorch) -- [SqueezeNet_ID0413_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SqueezeNet_ID0413_for_PyTorch) -- [SRCNN_ID1770_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SRCNN_ID1770_for_PyTorch) -- [SRGAN_ID2956_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SRGAN_ID2956_for_PyTorch) -- [SSD-MobileNet_ID1936_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SSD-MobileNet_ID1936_for_PyTorch) -- [STARGAN_ID0725_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/STARGAN_ID0725_for_PyTorch) -- [Swin-Transformer_ID2377_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Swin-Transformer_ID2377_for_PyTorch) -- [TabNet_ID2862_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/TabNet_ID2862_for_PyTorch) -- [Token-to-Token-ViT_ID2668_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Token-to-Token-ViT_ID2668_for_PyTorch) -- [TransformerXL_ID0699_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/TransformerXL_ID0699_for_PyTorch) -- [VAE+GAN_ID0401_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/VAE+GAN_ID0401_for_PyTorch) -- [VGG16_ID0467_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/VGG16_ID0467_for_PyTorch) -- [VGG19_ID0244_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/VGG19_ID0244_for_PyTorch) -- [VGGNet_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch) -- [VGGNet_ID0400_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/VGGNet_ID0400_for_PyTorch) -- [vit-base_ID0492_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/vit-base_ID0492_for_PyTorch) -- [VIT_ID2381_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/VIT_ID2381_for_PyTorch) -- [Wide_resnet101_2_ID0398_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Wide_resnet101_2_ID0398_for_PyTorch) -- [Wide_resnet50_2_ID0397_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Wide_resnet50_2_ID0397_for_PyTorch) -- [Xception_ID1454_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Xception_ID1454_for_PyTorch) -- [ZERO-DCE_ID1040_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ZERO-DCE_ID1040_for_PyTorch) -- [deeplabv3+_ID0326_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_segmentation/deeplabv3+_ID0326_for_PyTorch) -- [DeepLabV3+_ID0458_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_segmentation/DeepLabV3+_ID0458_for_PyTorch) -- [DeepLabV3_ID0621_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_segmentation/DeepLabV3_ID0621_for_PyTorch) -- [SETR_ID1572_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_segmentation/SETR_ID1572_for_PyTorch) -- [GAN_ID1931_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_synthesis/GAN_ID1931_for_PyTorch) -- [DSCNN_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch) -- [FFA-NET_ID1043_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/quality_enhancement/FFA-NET_ID1043_for_PyTorch) -- [BERT-ITPT-FiT_ID0340_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/BERT-ITPT-FiT_ID0340_for_PyTorch) -- [BERT_base_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/BERT_base_for_PyTorch) -- [FairSeq_Transformer_ID0496_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/FairSeq_Transformer_ID0496_for_PyTorch) -- [Retinanet_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/Retinanet_for_PyTorch) -- [Speech_Transformer_ID0487_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/Speech_Transformer_ID0487_for_PyTorch) -- [Swin-Transformer_ID2375_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/Swin-Transformer_ID2375_for_PyTorch) -- [Swin-Transformer_ID2379_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/Swin-Transformer_ID2379_for_PyTorch) -- [Textcnn_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/Textcnn_for_PyTorch) -- [Transformer_ID0105_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch) -- [Multi-Gradient_Descent_ID0349_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/others/Multi-Gradient_Descent_ID0349_for_PyTorch) -- [Widedeep_ID2866_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch) - -### ACL_PyTorch - -#### built-in - -- [EspNet_for_Pytoch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/audio/EspNet_for_Pytoch) -- [Jasper_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/audio/Jasper_for_PyTorch) -- [LSTM](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/audio/LSTM) -- [RawNet2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch) -- [Tacotron2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/audio/Tacotron2_for_Pytorch) -- [TDNN_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch) -- [Wenet_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch) -- [3DUnet_for_PTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/3DUnet_for_PTorch) -- [CascadeRCNN-DCN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/CascadeRCNN-DCN) -- [CascadeRCNN-DCN-101_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/CascadeRCNN-DCN-101_for_Pytorch) -- [CenterNet_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch) -- [CRNN_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch) -- [DB_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/DB_for_PyTorch) -- [Deepmar_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch) -- [Deepsort_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Deepsort_for_Pytorch) -- [Densenet121_Pytorch_Infer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Densenet121_Pytorch_Infer) -- [DPN131_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/DPN131_for_Pytorch) -- [EfficientNet_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch) -- [FasterRCNN-DCN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/FasterRCNN-DCN) -- [Flownet2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Flownet2_for_Pytorch) -- [GoogleNet_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/GoogleNet_for_Pytorch) -- [I3D_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/I3D_for_Pytorch) -- [InceptionV3_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch) -- [InceptionV4_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/InceptionV4_for_Pytorch) -- [MobileNetV2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch) -- [Pelee_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Pelee_for_Pytorch) -- [PSENet_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch) -- [Res2Net_v1b_101_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch) -- [Resnet101_Pytorch_Infer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer) -- [Resnet18_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch) -- [Resnet34_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Resnet34_for_Pytorch) -- [Resnet50_Pytorch_Infer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer) -- [ResNeXt50_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/ResNeXt50_for_Pytorch) -- [SE_ResNet50_Pytorch_Infer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer) -- [Shufflenetv2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch) -- [STGCN_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/STGCN_for_Pytorch) -- [U2-Net_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/U2-Net_for_PyTorch) -- [Vgg16_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch) -- [VGG16_SSD_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/VGG16_SSD_for_PyTorch) -- [Vgg19_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch) -- [Wide_ResNet50_2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Wide_ResNet50_2_for_Pytorch) -- [YolactEdge_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch) -- [Yolov3_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch) -- [Yolov4_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch) -- [Yolov5_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Yolov5_for_Pytorch) -- [Bert_Base_Uncased](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/nlp/Bert_Base_Uncased) -- [CNN_Transformer_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch) -- [textcnn](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/nlp/textcnn) -- [TransformerXL_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/nlp/TransformerXL_for_Pytorch) -- [VilBert_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/nlp/VilBert_for_Pytorch) - -#### contrib - -- [FastPitch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/audio/FastPitch) -- [Jasper](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/audio/Jasper) -- [Speech-Transformer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/audio/Speech-Transformer) -- [tdnn](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/audio/tdnn) -- [3d_attention_net](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/3d_attention_net) -- [AlexNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/AlexNet) -- [baseline_dino_resnet50](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50) -- [BMN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/BMN) -- [C3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/C3D) -- [CSPResneXt50](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/CSPResneXt50) -- [Deit_Small](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Deit_Small) -- [DPN131](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/DPN131) -- [Efficient-3DCNNs](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs) -- [EfficientNet-B1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1) -- [EfficientNet-B3](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B3) -- [EfficientNet-B5](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B5) -- [FixRes](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/FixRes) -- [GaitSet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/GaitSet) -- [GENet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/GENet) -- [GhostNet1.0x](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x) -- [GloRe](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/GloRe) -- [HRNet-Image-Classification](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification) -- [InceptionResnetV2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/InceptionResnetV2) -- [LResNet100E-IR](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/LResNet100E-IR) -- [LV-Vit](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/LV-Vit) -- [Mnasnet1_0](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Mnasnet1_0) -- [MobileNet-v1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/MobileNet-v1) -- [MobileNetV3_large_100](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/MobileNetV3_large_100) -- [Moco-v2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Moco-v2) -- [OSNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/OSNet) -- [PAMTRI](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/PAMTRI) -- [pnasnet5large](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/pnasnet5large) -- [PointNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/PointNet) -- [PointNetCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/PointNetCNN) -- [Pointnetplus](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Pointnetplus) -- [R(2+1)D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/R(2+1)D) -- [RegNetX-1.6GF](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/RegNetX-1.6GF) -- [RegNetY-1.6GF](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/RegNetY-1.6GF) -- [ReID_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/ReID_for_Pytorch) -- [RepVGG](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/RepVGG) -- [Res2Net101_v1b](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b) -- [ResNeSt50](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/ResNeSt50) -- [ResNet101](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/ResNet101) -- [ResNet152](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/ResNet152) -- [ResNet18](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/ResNet18) -- [ResNet34](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/ResNet34) -- [ResNext101_32x8d](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/ResNext101_32x8d) -- [Se-Resnext101](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Se-Resnext101) -- [SENet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/SENet) -- [Shufflenetv1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1) -- [Shufflenetv2+](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+) -- [SimCLR_inference](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/SimCLR_inference) -- [Sknet50](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Sknet50) -- [spnasnet_100](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/spnasnet_100) -- [Squeezenet1_1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Squeezenet1_1) -- [Swin-Transformer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Swin-Transformer) -- [TNT](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/TNT) -- [TResNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/TResNet) -- [vit-small](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/vit-small) -- [vit_base_patch32_224](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224) -- [vovnet39](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/vovnet39) -- [Wide_ResNet101_2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2) -- [X3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/X3D) -- [xception](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/xception) -- [xcit](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/xcit) -- [3DUnet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/3DUnet) -- [AdvancedEAST](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/AdvancedEAST) -- [AlphaPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/AlphaPose) -- [BSN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/BSN) -- [Cascade-RCNN-Resnet101-FPN-DCN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN) -- [Cascade_RCNN_R101](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101) -- [CenterFace](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/CenterFace) -- [CenterNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/CenterNet) -- [CTPN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/CTPN) -- [Deepspeech](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Deepspeech) -- [Detr](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Detr) -- [DSFD](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/DSFD) -- [EfficientDetD0](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/EfficientDetD0) -- [EfficientDetD7](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/EfficientDetD7) -- [FairMOT](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/FairMOT) -- [FasterRCNN_FPN_DCN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/FasterRCNN_FPN_DCN) -- [Fcos](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Fcos) -- [FOTS](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/FOTS) -- [Fsaf](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Fsaf) -- [GFocalV2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/GFocalV2) -- [M2Det](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/M2Det) -- [Nasnetlarge](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Nasnetlarge) -- [NAS_FPN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/NAS_FPN) -- [OpenPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/OpenPose) -- [pyramidbox](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/pyramidbox) -- [RCF](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/RCF) -- [RefineDet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/RefineDet) -- [Retinanet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Retinanet) -- [RFCN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/RFCN) -- [SSD](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/SSD) -- [SSD-MobileNetV1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV1) -- [SSD-MobileNetV2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV2) -- [SSD-Resnet34](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/SSD-Resnet34) -- [TextSnake](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/TextSnake) -- [yolor](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/yolor) -- [YOLOX](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/YOLOX) -- [AlignedReID](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/face/AlignedReID) -- [FaceBoxes](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/face/FaceBoxes) -- [FaceNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/face/FaceNet) -- [ReId-MGN-master](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/face/ReId-MGN-master) -- [reid_PCB_baseline](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline) -- [Retinaface](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/face/Retinaface) -- [BigGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/BigGAN) -- [CGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/CGAN) -- [CycleGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/CycleGAN) -- [DCGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/DCGAN) -- [GAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/GAN) -- [Pix2Pix](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/Pix2Pix) -- [Pix2pixHD](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/Pix2pixHD) -- [StarGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/StarGAN) -- [Cross-Scale-Non-Local-Attention](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention) -- [DnCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/image_process/DnCNN) -- [SRFlow](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/image_process/SRFlow) -- [wdsr](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/image_process/wdsr) -- [3DMPPE-ROOTNET](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET) -- [HigherHRNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/pose_estimation/HigherHRNet) -- [MSPN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/pose_estimation/MSPN) -- [TransPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/pose_estimation/TransPose) -- [VideoPose3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/pose_estimation/VideoPose3D) -- [ADNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet) -- [SRGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/quality_enhancement/SRGAN) -- [3D_HRNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/3D_HRNet) -- [Cascade_Mask_RCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/Cascade_Mask_RCNN) -- [Cascade_RCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/Cascade_RCNN) -- [DeeplabV3](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3) -- [ENet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/ENet) -- [ErfNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/ErfNet) -- [FastSCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/FastSCNN) -- [FCN-8s](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/FCN-8s) -- [GCNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/GCNet) -- [ICNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/ICNet) -- [IntraDA](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/IntraDA) -- [Nested_UNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/Nested_UNet) -- [PointRend](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/PointRend) -- [PraNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/PraNet) -- [PSPnet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/PSPnet) -- [RefineNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/RefineNet) -- [SETR](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/SETR) -- [SiamMask](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/SiamMask) -- [SOLOV1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/SOLOV1) -- [SOLOV2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/SOLOV2) -- [Ultra-Fast-Lane-Detection](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/Ultra-Fast-Lane-Detection) -- [VNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/VNet) -- [Wseg](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/Wseg) -- [YOLACT](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/YOLACT) -- [YOLACT_plus](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus) -- [EDSR](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/super_resolution/EDSR) -- [RCAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/super_resolution/RCAN) -- [RDN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/super_resolution/RDN) -- [Real-ESRGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN) -- [SRCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/super_resolution/SRCNN) -- [SiamFC](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/tracking/SiamFC) -- [SiamRPN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/tracking/SiamRPN) -- [I3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/video_understanding/I3D) -- [NonLocal](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/video_understanding/NonLocal) -- [SlowFast](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/video_understanding/SlowFast) -- [TSM](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/video_understanding/TSM) -- [TSN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/video_understanding/TSN) -- [RotatE](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/knowledge/RotatE) -- [albert](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/nlp/albert) -- [BertSum](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/nlp/BertSum) -- [CNN_Transformer_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/nlp/CNN_Transformer_for_Pytorch) -- [roberta](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/nlp/roberta) -- [TextCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/nlp/TextCNN) -- [tinybert](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/nlp/tinybert) -- [c51](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/rl/c51) -- [DQN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/rl/DQN) - - - - - - - -## 免责声明 - -Ascend ModelZoo仅提供公共数据集下载和预处理脚本。这些数据集不属于ModelZoo,ModelZoo也不对其质量或维护负责。请确保您具有这些数据集的使用许可。基于这些数据集训练的模型仅可用于非商业研究和教育。 - -致数据集所有者: - -如果您不希望您的数据集公布在ModelZoo上或希望更新ModelZoo中属于您的数据集,请在Github/Gitee提交issue,我们将根据您的issue删除或更新您的数据集。衷心感谢您对我们社区的理解和贡献。 - -Ascend ModelZoo的license是Apache 2.0.具体内容,请参见LICENSE文件。 +# 欢迎使用Ascend ModelZoo + +为方便更多开发者使用Ascend ModelZoo,我们将持续增加典型网络和相关预训练模型。如果您有任何需求,请在[modelzoo/issues](https://gitee.com/ascend/modelzoo/issues)提交issue,我们会及时处理。 + +## 如何贡献 + +在开始贡献之前,请先阅读[CONTRIBUTING](https://gitee.com/ascend/modelzoo/blob/master/CONTRIBUTING.md)。 +谢谢! + +## 目录 + + +### PyTorch + +#### built-in + +- [3D_ResNet_ID0421_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/3D_ResNet_ID0421_for_PyTorch) +- [CRNN_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/CRNN_for_PyTorch) +- [DeepMar_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/DeepMar_for_PyTorch) +- [Densenet121_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/Densenet121_for_PyTorch) +- [DenseNet161_ID0455_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/DenseNet161_ID0455_for_PyTorch) +- [DenseNet169_ID0454_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch) +- [DenseNet201_ID0453_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/DenseNet201_ID0453_for_PyTorch) +- [EfficientNet-B1_ID1713_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/EfficientNet-B1_ID1713_for_PyTorch) +- [EfficientNet-B2_ID1714_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/EfficientNet-B2_ID1714_for_PyTorch) +- [EfficientNet-B3_ID0450_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/EfficientNet-B3_ID0450_for_PyTorch) +- [EfficientNet-B4_ID1632_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/EfficientNet-B4_ID1632_for_PyTorch) +- [EfficientNet-B5_ID1633_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/EfficientNet-B5_ID1633_for_PyTorch) +- [EfficientNet_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/EfficientNet_for_PyTorch) +- [FaceNet_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/FaceNet_for_PyTorch) +- [Gluon_ResNet50_v1b_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/Gluon_ResNet50_v1b_for_PyTorch) +- [Gluon_ResNet50_v1c_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/Gluon_ResNet50_v1c_for_PyTorch) +- [Gluon_ResNet50_v1d_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/Gluon_ResNet50_v1d_for_PyTorch) +- [Googlenet_ID0447_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/Googlenet_ID0447_for_PyTorch) +- [MobileNetV1_ID0094_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/MobileNetV1_ID0094_for_PyTorch) +- [MobileNetV2_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/MobileNetV2_for_PyTorch) +- [MobileNetV3-Large_ID1784_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/MobileNetV3-Large_ID1784_for_PyTorch) +- [ResNest_ID0426_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/ResNest_ID0426_for_PyTorch) +- [ResNet50_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch) +- [Shufflenetv2_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/classification/Shufflenetv2_for_PyTorch) +- [DAL_ID2732_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/DAL_ID2732_for_PyTorch) +- [DB_ID0706_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/DB_ID0706_for_PyTorch) +- [Faster_Mask_RCNN_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/Faster_Mask_RCNN_for_PyTorch) +- [PSENet_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/PSENet_for_PyTorch) +- [RetinaNet_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/RetinaNet_for_PyTorch) +- [RFCN_ID0418_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/RFCN_ID0418_for_PyTorch) +- [YoloV3_ID1790_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/YoloV3_ID1790_for_PyTorch) +- [YOLOV4_ID0396_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/detection/YOLOV4_ID0396_for_PyTorch) +- [Attention_R2U_Net_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/semantic_segmentation/Attention_R2U_Net_for_PyTorch) +- [AttU_Net_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/semantic_segmentation/AttU_Net_for_PyTorch) +- [DeepLabv3+_ID1695_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/semantic_segmentation/DeepLabv3+_ID1695_for_PyTorch) +- [R2U_Net_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/cv/semantic_segmentation/R2U_Net_for_PyTorch) +- [Bert-Squad_ID0470_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch) +- [CPM_Finetune_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/CPM_Finetune_for_PyTorch) +- [FOTS_ID0338_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/FOTS_ID0338_for_PyTorch) +- [GRU_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/GRU_for_PyTorch) +- [LSTM_ID0468_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/LSTM_ID0468_for_PyTorch) +- [mBART_ID2372_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/mBART_ID2372_for_PyTorch) +- [Transformer_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/Transformer_for_PyTorch) +- [XLM_ID0740_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/nlp/XLM_ID0740_for_PyTorch) +- [DCN_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/others/DCN_for_PyTorch) +- [DeepFM_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/others/DeepFM_for_PyTorch) +- [WDL_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/built-in/others/WDL_for_PyTorch) + +#### contrib + +- [baseline-rawnet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/audio/baseline-rawnet) +- [deepspeech](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/audio/deepspeech) +- [FastPitch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/audio/FastPitch) +- [speech-transformer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/audio/speech-transformer) +- [Tacotron2_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/audio/Tacotron2_for_PyTorch) +- [tdnn](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/audio/tdnn) +- [WaveGlow](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/audio/WaveGlow) +- [3d_attention_net](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/3d_attention_net) +- [AlexNet_ID2663_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/AlexNet_ID2663_for_PyTorch) +- [AlignedReID](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/AlignedReID) +- [csp_resnext50-mish](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/csp_resnext50-mish) +- [Deit_Small](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Deit_Small) +- [DnCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/DnCNN) +- [DPN-131_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/DPN-131_for_PyTorch) +- [Efficient-3DCNNs_ID1230_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Efficient-3DCNNs_ID1230_for_PyTorch) +- [EfficientNet-B1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/EfficientNet-B1) +- [EfficientNet-B3](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/EfficientNet-B3) +- [EfficientNet-B5_ID1621_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/EfficientNet-B5_ID1621_for_PyTorch) +- [FixRes](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/FixRes) +- [GaitSet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/GaitSet) +- [GENet_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/GENet_for_Pytorch) +- [GhostNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/GhostNet) +- [GoogleNet_ID1623_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/GoogleNet_ID1623_for_PyTorch) +- [HRNet_ID1780_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/HRNet_ID1780_for_PyTorch) +- [InceptionResNetV2_ID1779_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/InceptionResNetV2_ID1779_for_PyTorch) +- [InceptionV3_ID1596_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/InceptionV3_ID1596_for_PyTorch) +- [InceptionV4_ID1778_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/InceptionV4_ID1778_for_PyTorch) +- [LResNet100E-IR](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/LResNet100E-IR) +- [MGN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/MGN) +- [MnasNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/MnasNet) +- [MobileNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/MobileNet) +- [MobileNetV3_large_100_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/MobileNetV3_large_100_for_PyTorch) +- [Moco-v2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Moco-v2) +- [NASNet-A-Mobile](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/NASNet-A-Mobile) +- [OSNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/OSNet) +- [PCB](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/PCB) +- [PnasNet5Large](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/PnasNet5Large) +- [PointNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/PointNet) +- [pointnetCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/pointnetCNN) +- [RegNetX](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/RegNetX) +- [RegNetY-1.6GF](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/RegNetY-1.6GF) +- [ReidStrongBaseline](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ReidStrongBaseline) +- [RepVGG](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/RepVGG) +- [Res2Net101_v1b](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Res2Net101_v1b) +- [ResNeSt50_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ResNeSt50_for_PyTorch) +- [ResNet101_ID1595_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ResNet101_ID1595_for_PyTorch) +- [ResNet152](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ResNet152) +- [ResNet18_ID1593_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ResNet18_ID1593_for_PyTorch) +- [ResNet34_ID1594_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ResNet34_ID1594_for_PyTorch) +- [ResNeXt-50-32x4d_ID1624_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ResNeXt-50-32x4d_ID1624_for_PyTorch) +- [ResNeXt101_32x8d_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ResNeXt101_32x8d_for_PyTorch) +- [SE-ResNet-50](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/SE-ResNet-50) +- [SE-ResNext-101-32x4d](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/SE-ResNext-101-32x4d) +- [Se-ResNext-50-32x4d](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Se-ResNext-50-32x4d) +- [SENet154](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/SENet154) +- [ShuffleNetV1_ID1625_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ShuffleNetV1_ID1625_for_PyTorch) +- [ShuffleNetV2Plus_ID1626_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/ShuffleNetV2Plus_ID1626_for_PyTorch) +- [SkresNet50](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/SkresNet50) +- [SPNASNet_100_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/SPNASNet_100_for_PyTorch) +- [SqueezeNet1_1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/SqueezeNet1_1) +- [Swin-Transformer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Swin-Transformer) +- [TNT](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/TNT) +- [TResNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/TResNet) +- [Vehicle_Re-Identification](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Vehicle_Re-Identification) +- [Vgg16_ID1630_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Vgg16_ID1630_for_PyTorch) +- [Vgg19_ID1631_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Vgg19_ID1631_for_PyTorch) +- [vit_base_patch32_224](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/vit_base_patch32_224) +- [Vit_small_patch16_224](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Vit_small_patch16_224) +- [VOLO](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/VOLO) +- [VoVNet39](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/VoVNet39) +- [WideResNet101_2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/WideResNet101_2_for_Pytorch) +- [WideResNet50_2_ID1627_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/WideResNet50_2_ID1627_for_PyTorch) +- [Xception_ID1777_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/Xception_ID1777_for_PyTorch) +- [xcit](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/classification/xcit) +- [AdvancedEAST](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/AdvancedEAST) +- [CascadedMaskRCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/CascadedMaskRCNN) +- [Cascade_RCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/Cascade_RCNN) +- [CenterFace](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/CenterFace) +- [CenterNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/CenterNet) +- [DSFD](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/DSFD) +- [EfficientDetD0](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/EfficientDetD0) +- [FaceBoxes](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/FaceBoxes) +- [FairMOT](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/FairMOT) +- [FCOS](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/FCOS) +- [FOTS](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/FOTS) +- [FSAF_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/FSAF_for_Pytorch) +- [GFocalV2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/GFocalV2) +- [M2Det](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/M2Det) +- [NasFPN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/NasFPN) +- [Pointnetplus](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/Pointnetplus) +- [Pyramidbox](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/Pyramidbox) +- [RCF](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/RCF) +- [RefineDet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/RefineDet) +- [Retinaface](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/Retinaface) +- [RetinaNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/RetinaNet) +- [SimCLR_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/SimCLR_for_Pytorch) +- [SOLOv1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/SOLOv1) +- [SOLOv2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/SOLOv2) +- [SSD](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/SSD) +- [SSD-MobileNetV1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/SSD-MobileNetV1) +- [SSD-MobilenetV2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/SSD-MobilenetV2) +- [SSD-Resnet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/SSD-Resnet) +- [StyleGAN2-ADA](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/StyleGAN2-ADA) +- [TextSnake](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/TextSnake) +- [YOLACT](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/YOLACT) +- [YOLACT_plus](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/YOLACT_plus) +- [YOLOR](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/YOLOR) +- [YOLOX](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/detection/YOLOX) +- [ADNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/ADNet) +- [BigGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/BigGAN) +- [CGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/CGAN) +- [Cross-Scale-Non-Local-Attention](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/Cross-Scale-Non-Local-Attention) +- [CycleGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/CycleGAN) +- [DCGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/DCGAN) +- [edsr_x2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/edsr_x2) +- [GAN_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/GAN_Pytorch) +- [Pix2Pix](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/Pix2Pix) +- [RDN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/RDN) +- [Srcnn_x2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/Srcnn_x2_for_Pytorch) +- [SRFlow](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/SRFlow) +- [SRGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/SRGAN) +- [stargan](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/stargan) +- [wdsr](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/others/wdsr) +- [3Dmppe_RootNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/3Dmppe_RootNet) +- [AlphaPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/AlphaPose) +- [DeepPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/DeepPose) +- [HigherHRNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/HigherHRNet) +- [Hourglass_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/Hourglass_for_PyTorch) +- [Lightweight_OpenPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/Lightweight_OpenPose) +- [MSPN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/MSPN) +- [ST-GCN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/ST-GCN) +- [TransPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/TransPose) +- [VoxelPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/pose_estimation/VoxelPose) +- [3DUNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/3DUNet) +- [DeeplabV3_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch) +- [ENet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/ENet) +- [ErfNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/ErfNet) +- [FastSCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/FastSCNN) +- [FCN8s](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/FCN8s) +- [HRnet-OCR](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/HRnet-OCR) +- [HRNet_SEG_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/HRNet_SEG_for_Pytorch) +- [ICNet_ID1781_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/ICNet_ID1781_for_PyTorch) +- [IntraDA](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/IntraDA) +- [PointRend](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/PointRend) +- [PraNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/PraNet) +- [PSPNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/PSPNet) +- [RCAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/RCAN) +- [RefineNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/RefineNet) +- [SETR](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/SETR) +- [Ultra-Fast-Lane-Detection](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/Ultra-Fast-Lane-Detection) +- [UNet++](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/UNet++) +- [UNet_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/UNet_for_PyTorch) +- [VNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/VNet) +- [Wseg](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/semantic_segmentation/Wseg) +- [BMN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/BMN) +- [BSN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/BSN) +- [C3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/C3D) +- [GloRe](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/GloRe) +- [I3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/I3D) +- [NonLocal](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/NonLocal) +- [R(2+1)D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/R(2+1)D) +- [SiamFC](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/SiamFC) +- [SiamRPN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/SiamRPN) +- [SlowFast](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/SlowFast) +- [TSM](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/TSM) +- [TSN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/TSN) +- [VideoPose3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/VideoPose3D) +- [X3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/cv/video/X3D) +- [albert](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/nlp/albert) +- [Bertsum_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/nlp/Bertsum_for_PyTorch) +- [roberta](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/nlp/roberta) +- [TextCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/nlp/TextCNN) +- [tinybert](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/nlp/tinybert) +- [C51](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/others/C51) +- [DLRM](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/others/DLRM) +- [DQN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/others/DQN) +- [RotatE](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/contrib/others/RotatE) + +#### dev + +- [tacotron2_ID0406_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/audio/tacotron2_ID0406_for_PyTorch) +- [3D_attentionnet_ID0478_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/detection/3D_attentionnet_ID0478_for_PyTorch) +- [FasterRCNN-Resnet50-FPN_ID1552_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/detection/FasterRCNN-Resnet50-FPN_ID1552_for_PyTorch) +- [HRNet_ID0446_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/detection/HRNet_ID0446_for_PyTorch) +- [PointNet_ID0430_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/detection/PointNet_ID0430_for_PyTorch) +- [SSD-ResNet34_ID0411_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/detection/SSD-ResNet34_ID0411_for_PyTorch) +- [2D_Unet_ID0624_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch) +- [2S-AGCN_ID0909_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/2S-AGCN_ID0909_for_PyTorch) +- [3D_Nest_Unet_ID0476_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/3D_Nest_Unet_ID0476_for_PyTorch) +- [ADACOS_ID1082_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ADACOS_ID1082_for_PyTorch) +- [AdaFM-Net_ID1101_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/AdaFM-Net_ID1101_for_PyTorch) +- [ADLayer_ID1087_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ADLayer_ID1087_for_PyTorch) +- [AdvancedEast_ID0473_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/AdvancedEast_ID0473_for_PyTorch) +- [AlexNet_ID0472_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/AlexNet_ID0472_for_PyTorch) +- [ANN_ID2370_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ANN_ID2370_for_PyTorch) +- [ArcFace_ID0852_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ArcFace_ID0852_for_PyTorch) +- [ATS_ID2682_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ATS_ID2682_for_PyTorch) +- [AUTOAUGMENT_ID0792_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/AUTOAUGMENT_ID0792_for_PyTorch) +- [BASNET_ID1134_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/BASNET_ID1134_for_PyTorch) +- [BertBase_ID0490_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch) +- [CDAR_ID2747_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/CDAR_ID2747_for_PyTorch) +- [ConvLSTM_ID1772_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ConvLSTM_ID1772_for_PyTorch) +- [coral-cnn_ID1064_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/coral-cnn_ID1064_for_PyTorch) +- [CrossFormer_ID2449_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/CrossFormer_ID2449_for_PyTorch) +- [CycleGAN_ID0521_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/CycleGAN_ID0521_for_PyTorch) +- [DBPN_ID2917_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/DBPN_ID2917_for_PyTorch) +- [DCAP_ID2836_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/DCAP_ID2836_for_PyTorch) +- [DEEP-HEAD-POSE_ID0796_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/DEEP-HEAD-POSE_ID0796_for_PyTorch) +- [DeepLab-CRF_ID1873_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/DeepLab-CRF_ID1873_for_PyTorch) +- [DeepSort_ID0654_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/DeepSort_ID0654_for_PyTorch) +- [deit_ID2467_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/deit_ID2467_for_PyTorch) +- [DGMS_ID2460_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/DGMS_ID2460_for_PyTorch) +- [EfficientNet-B6_ID1715_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/EfficientNet-B6_ID1715_for_PyTorch) +- [EfficientNet-B7_ID1716_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/EfficientNet-B7_ID1716_for_PyTorch) +- [ESPCN_ID2919_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ESPCN_ID2919_for_PyTorch) +- [ESPCN_ID3002_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ESPCN_ID3002_for_PyTorch) +- [ESRGAN_ID1813_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ESRGAN_ID1813_for_PyTorch) +- [FasterRCNN_ID0100_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/FasterRCNN_ID0100_for_PyTorch) +- [FFDNet_ID0970_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/FFDNet_ID0970_for_PyTorch) +- [FixMatch_ID0989_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/FixMatch_ID0989_for_PyTorch) +- [GENet_ID0671_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/GENet_ID0671_for_PyTorch) +- [GhostNet_ID1622_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/GhostNet_ID1622_for_PyTorch) +- [Hourglass_ID1809_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Hourglass_ID1809_for_PyTorch) +- [ICT_ID1179_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ICT_ID1179_for_PyTorch) +- [InceptionV1_ID1568_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/InceptionV1_ID1568_for_PyTorch) +- [InceptionV2_ID0698_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/InceptionV2_ID0698_for_PyTorch) +- [InceptionV3_ID0445_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/InceptionV3_ID0445_for_PyTorch) +- [InceptionV4_ID0444_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/InceptionV4_ID0444_for_PyTorch) +- [InceptionV4_ID2473_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/InceptionV4_ID2473_for_PyTorch) +- [Keyword-MLP_ID2441_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Keyword-MLP_ID2441_for_PyTorch) +- [LADE_ID2445_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/LADE_ID2445_for_PyTorch) +- [MaskRCNN_ID0101_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/MaskRCNN_ID0101_for_PyTorch) +- [mBART_ID1550_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/mBART_ID1550_for_PyTorch) +- [MMAL-NET_ID1116_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/MMAL-NET_ID1116_for_PyTorch) +- [MMOE_ID2865_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/MMOE_ID2865_for_PyTorch) +- [Mnasnet0_75_ID0439_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Mnasnet0_75_ID0439_for_PyTorch) +- [Mnasnet1_0_ID0438_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Mnasnet1_0_ID0438_for_PyTorch) +- [Mnasnet1_3_ID0437_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Mnasnet1_3_ID0437_for_PyTorch) +- [mobilenetv2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/mobilenetv2_for_Pytorch) +- [MobileNetV3-Small_ID1785_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/MobileNetV3-Small_ID1785_for_PyTorch) +- [MSPN_ID0960_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/MSPN_ID0960_for_PyTorch) +- [MTCNN_ID0435_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/MTCNN_ID0435_for_PyTorch) +- [Mutual-Channel-Loss_ID1113_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Mutual-Channel-Loss_ID1113_for_PyTorch) +- [NeuMF_ID0351_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/NeuMF_ID0351_for_PyTorch) +- [PASSRnet_ID0986_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/PASSRnet_ID0986_for_PyTorch) +- [pFedMe_ID1597_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/pFedMe_ID1597_for_PyTorch) +- [PFF_ID1128_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/PFF_ID1128_for_PyTorch) +- [PiT_ID2671_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/PiT_ID2671_for_PyTorch) +- [Pix2Pix_ID0331_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Pix2Pix_ID0331_for_PyTorch) +- [POOLNET_ID0875_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/POOLNET_ID0875_for_PyTorch) +- [Pysot_ID0428_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Pysot_ID0428_for_PyTorch) +- [RAFT_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/RAFT_for_PyTorch) +- [RANet_ID0994_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/RANet_ID0994_for_PyTorch) +- [RES2NET_ID0824_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/RES2NET_ID0824_for_PyTorch) +- [residual_adapters_ID1598_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/residual_adapters_ID1598_for_PyTorch) +- [Resnet101_ID0425_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Resnet101_ID0425_for_PyTorch) +- [ResNet152_ID0424_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ResNet152_ID0424_for_PyTorch) +- [Resnet152_ID1592_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Resnet152_ID1592_for_PyTorch) +- [Resnet18_ID0423_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Resnet18_ID0423_for_PyTorch) +- [Resnet34_ID0422_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Resnet34_ID0422_for_PyTorch) +- [Resnext101_32x8d_ID0420_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Resnext101_32x8d_ID0420_for_PyTorch) +- [ResNeXt101_ID1717_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ResNeXt101_ID1717_for_PyTorch) +- [ResNeXt50_ID0419_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ResNeXt50_ID0419_for_PyTorch) +- [RRN_ID1182_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/RRN_ID1182_for_PyTorch) +- [RUC_ID2470_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/RUC_ID2470_for_PyTorch) +- [SEResNext_ID0415_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SEResNext_ID0415_for_PyTorch) +- [SG2IM_ID0786_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SG2IM_ID0786_for_PyTorch) +- [SimplePose_ID1038_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SimplePose_ID1038_for_PyTorch) +- [SINGLESHOTPOSE_ID0869_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SINGLESHOTPOSE_ID0869_for_PyTorch) +- [SlowFast_ID0646_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SlowFast_ID0646_for_PyTorch) +- [SmartSketch_ID1046_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SmartSketch_ID1046_for_PyTorch) +- [SqueezeNet_ID0413_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SqueezeNet_ID0413_for_PyTorch) +- [SRCNN_ID1770_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SRCNN_ID1770_for_PyTorch) +- [SRGAN_ID2956_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SRGAN_ID2956_for_PyTorch) +- [SSD-MobileNet_ID1936_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/SSD-MobileNet_ID1936_for_PyTorch) +- [STARGAN_ID0725_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/STARGAN_ID0725_for_PyTorch) +- [Swin-Transformer_ID2377_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Swin-Transformer_ID2377_for_PyTorch) +- [TabNet_ID2862_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/TabNet_ID2862_for_PyTorch) +- [Token-to-Token-ViT_ID2668_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Token-to-Token-ViT_ID2668_for_PyTorch) +- [TransformerXL_ID0699_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/TransformerXL_ID0699_for_PyTorch) +- [VAE+GAN_ID0401_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/VAE+GAN_ID0401_for_PyTorch) +- [VGG16_ID0467_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/VGG16_ID0467_for_PyTorch) +- [VGG19_ID0244_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/VGG19_ID0244_for_PyTorch) +- [VGGNet_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/VGGNet_for_Pytorch) +- [VGGNet_ID0400_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/VGGNet_ID0400_for_PyTorch) +- [vit-base_ID0492_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/vit-base_ID0492_for_PyTorch) +- [VIT_ID2381_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/VIT_ID2381_for_PyTorch) +- [Wide_resnet101_2_ID0398_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Wide_resnet101_2_ID0398_for_PyTorch) +- [Wide_resnet50_2_ID0397_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Wide_resnet50_2_ID0397_for_PyTorch) +- [Xception_ID1454_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/Xception_ID1454_for_PyTorch) +- [ZERO-DCE_ID1040_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_classification/ZERO-DCE_ID1040_for_PyTorch) +- [deeplabv3+_ID0326_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_segmentation/deeplabv3+_ID0326_for_PyTorch) +- [DeepLabV3+_ID0458_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_segmentation/DeepLabV3+_ID0458_for_PyTorch) +- [DeepLabV3_ID0621_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_segmentation/DeepLabV3_ID0621_for_PyTorch) +- [SETR_ID1572_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_segmentation/SETR_ID1572_for_PyTorch) +- [GAN_ID1931_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/image_synthesis/GAN_ID1931_for_PyTorch) +- [DSCNN_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/quality_enhancement/DSCNN_for_PyTorch) +- [FFA-NET_ID1043_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/cv/quality_enhancement/FFA-NET_ID1043_for_PyTorch) +- [BERT-ITPT-FiT_ID0340_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/BERT-ITPT-FiT_ID0340_for_PyTorch) +- [BERT_base_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/BERT_base_for_PyTorch) +- [FairSeq_Transformer_ID0496_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/FairSeq_Transformer_ID0496_for_PyTorch) +- [Retinanet_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/Retinanet_for_PyTorch) +- [Speech_Transformer_ID0487_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/Speech_Transformer_ID0487_for_PyTorch) +- [Swin-Transformer_ID2375_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/Swin-Transformer_ID2375_for_PyTorch) +- [Swin-Transformer_ID2379_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/Swin-Transformer_ID2379_for_PyTorch) +- [Textcnn_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/Textcnn_for_PyTorch) +- [Transformer_ID0105_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch) +- [Multi-Gradient_Descent_ID0349_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/others/Multi-Gradient_Descent_ID0349_for_PyTorch) +- [Widedeep_ID2866_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch) + +### ACL_PyTorch + +#### built-in + +- [EspNet_for_Pytoch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/audio/EspNet_for_Pytoch) +- [Jasper_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/audio/Jasper_for_PyTorch) +- [LSTM](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/audio/LSTM) +- [RawNet2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/audio/RawNet2_for_Pytorch) +- [Tacotron2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/audio/Tacotron2_for_Pytorch) +- [TDNN_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/audio/TDNN_for_Pytorch) +- [Wenet_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/audio/Wenet_for_Pytorch) +- [3DUnet_for_PTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/3DUnet_for_PTorch) +- [CascadeRCNN-DCN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/CascadeRCNN-DCN) +- [CascadeRCNN-DCN-101_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/CascadeRCNN-DCN-101_for_Pytorch) +- [CenterNet_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/CenterNet_for_Pytorch) +- [CRNN_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/CRNN_for_Pytorch) +- [DB_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/DB_for_PyTorch) +- [Deepmar_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Deepmar_for_Pytorch) +- [Deepsort_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Deepsort_for_Pytorch) +- [Densenet121_Pytorch_Infer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Densenet121_Pytorch_Infer) +- [DPN131_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/DPN131_for_Pytorch) +- [EfficientNet_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/EfficientNet_for_Pytorch) +- [FasterRCNN-DCN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/FasterRCNN-DCN) +- [Flownet2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Flownet2_for_Pytorch) +- [GoogleNet_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/GoogleNet_for_Pytorch) +- [I3D_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/I3D_for_Pytorch) +- [InceptionV3_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/InceptionV3_for_Pytorch) +- [InceptionV4_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/InceptionV4_for_Pytorch) +- [MobileNetV2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/MobileNetV2_for_Pytorch) +- [Pelee_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Pelee_for_Pytorch) +- [PSENet_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/PSENet_for_Pytorch) +- [Res2Net_v1b_101_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Res2Net_v1b_101_for_PyTorch) +- [Resnet101_Pytorch_Infer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Resnet101_Pytorch_Infer) +- [Resnet18_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Resnet18_for_PyTorch) +- [Resnet34_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Resnet34_for_Pytorch) +- [Resnet50_Pytorch_Infer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer) +- [ResNeXt50_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/ResNeXt50_for_Pytorch) +- [SE_ResNet50_Pytorch_Infer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/SE_ResNet50_Pytorch_Infer) +- [Shufflenetv2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Shufflenetv2_for_Pytorch) +- [STGCN_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/STGCN_for_Pytorch) +- [U2-Net_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/U2-Net_for_PyTorch) +- [Vgg16_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Vgg16_for_Pytorch) +- [VGG16_SSD_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/VGG16_SSD_for_PyTorch) +- [Vgg19_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Vgg19_for_Pytorch) +- [Wide_ResNet50_2_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Wide_ResNet50_2_for_Pytorch) +- [YolactEdge_for_PyTorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/YolactEdge_for_PyTorch) +- [Yolov3_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Yolov3_for_Pytorch) +- [Yolov4_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Yolov4_for_Pytorch) +- [Yolov5_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/cv/Yolov5_for_Pytorch) +- [Bert_Base_Uncased](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/nlp/Bert_Base_Uncased) +- [CNN_Transformer_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/nlp/CNN_Transformer_for_Pytorch) +- [textcnn](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/nlp/textcnn) +- [TransformerXL_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/nlp/TransformerXL_for_Pytorch) +- [VilBert_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/nlp/VilBert_for_Pytorch) + +#### contrib + +- [FastPitch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/audio/FastPitch) +- [Jasper](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/audio/Jasper) +- [Speech-Transformer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/audio/Speech-Transformer) +- [tdnn](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/audio/tdnn) +- [3d_attention_net](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/3d_attention_net) +- [AlexNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/AlexNet) +- [baseline_dino_resnet50](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/baseline_dino_resnet50) +- [BMN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/BMN) +- [C3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/C3D) +- [CSPResneXt50](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/CSPResneXt50) +- [Deit_Small](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Deit_Small) +- [DPN131](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/DPN131) +- [Efficient-3DCNNs](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Efficient-3DCNNs) +- [EfficientNet-B1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B1) +- [EfficientNet-B3](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B3) +- [EfficientNet-B5](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/EfficientNet-B5) +- [FixRes](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/FixRes) +- [GaitSet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/GaitSet) +- [GENet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/GENet) +- [GhostNet1.0x](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/GhostNet1.0x) +- [GloRe](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/GloRe) +- [HRNet-Image-Classification](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/HRNet-Image-Classification) +- [InceptionResnetV2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/InceptionResnetV2) +- [LResNet100E-IR](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/LResNet100E-IR) +- [LV-Vit](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/LV-Vit) +- [Mnasnet1_0](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Mnasnet1_0) +- [MobileNet-v1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/MobileNet-v1) +- [MobileNetV3_large_100](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/MobileNetV3_large_100) +- [Moco-v2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Moco-v2) +- [OSNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/OSNet) +- [PAMTRI](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/PAMTRI) +- [pnasnet5large](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/pnasnet5large) +- [PointNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/PointNet) +- [PointNetCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/PointNetCNN) +- [Pointnetplus](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Pointnetplus) +- [R(2+1)D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/R(2+1)D) +- [RegNetX-1.6GF](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/RegNetX-1.6GF) +- [RegNetY-1.6GF](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/RegNetY-1.6GF) +- [ReID_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/ReID_for_Pytorch) +- [RepVGG](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/RepVGG) +- [Res2Net101_v1b](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Res2Net101_v1b) +- [ResNeSt50](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/ResNeSt50) +- [ResNet101](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/ResNet101) +- [ResNet152](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/ResNet152) +- [ResNet18](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/ResNet18) +- [ResNet34](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/ResNet34) +- [ResNext101_32x8d](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/ResNext101_32x8d) +- [Se-Resnext101](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Se-Resnext101) +- [SENet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/SENet) +- [Shufflenetv1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Shufflenetv1) +- [Shufflenetv2+](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Shufflenetv2+) +- [SimCLR_inference](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/SimCLR_inference) +- [Sknet50](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Sknet50) +- [spnasnet_100](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/spnasnet_100) +- [Squeezenet1_1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Squeezenet1_1) +- [Swin-Transformer](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Swin-Transformer) +- [TNT](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/TNT) +- [TResNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/TResNet) +- [vit-small](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/vit-small) +- [vit_base_patch32_224](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/vit_base_patch32_224) +- [vovnet39](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/vovnet39) +- [Wide_ResNet101_2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/Wide_ResNet101_2) +- [X3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/X3D) +- [xception](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/xception) +- [xcit](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/classfication/xcit) +- [3DUnet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/3DUnet) +- [AdvancedEAST](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/AdvancedEAST) +- [AlphaPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/AlphaPose) +- [BSN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/BSN) +- [Cascade-RCNN-Resnet101-FPN-DCN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Cascade-RCNN-Resnet101-FPN-DCN) +- [Cascade_RCNN_R101](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Cascade_RCNN_R101) +- [CenterFace](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/CenterFace) +- [CenterNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/CenterNet) +- [CTPN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/CTPN) +- [Deepspeech](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Deepspeech) +- [Detr](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Detr) +- [DSFD](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/DSFD) +- [EfficientDetD0](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/EfficientDetD0) +- [EfficientDetD7](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/EfficientDetD7) +- [FairMOT](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/FairMOT) +- [FasterRCNN_FPN_DCN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/FasterRCNN_FPN_DCN) +- [Fcos](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Fcos) +- [FOTS](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/FOTS) +- [Fsaf](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Fsaf) +- [GFocalV2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/GFocalV2) +- [M2Det](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/M2Det) +- [Nasnetlarge](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Nasnetlarge) +- [NAS_FPN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/NAS_FPN) +- [OpenPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/OpenPose) +- [pyramidbox](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/pyramidbox) +- [RCF](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/RCF) +- [RefineDet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/RefineDet) +- [Retinanet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/Retinanet) +- [RFCN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/RFCN) +- [SSD](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/SSD) +- [SSD-MobileNetV1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV1) +- [SSD-MobileNetV2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/SSD-MobileNetV2) +- [SSD-Resnet34](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/SSD-Resnet34) +- [TextSnake](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/TextSnake) +- [yolor](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/yolor) +- [YOLOX](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/detection/YOLOX) +- [AlignedReID](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/face/AlignedReID) +- [FaceBoxes](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/face/FaceBoxes) +- [FaceNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/face/FaceNet) +- [ReId-MGN-master](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/face/ReId-MGN-master) +- [reid_PCB_baseline](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/face/reid_PCB_baseline) +- [Retinaface](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/face/Retinaface) +- [BigGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/BigGAN) +- [CGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/CGAN) +- [CycleGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/CycleGAN) +- [DCGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/DCGAN) +- [GAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/GAN) +- [Pix2Pix](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/Pix2Pix) +- [Pix2pixHD](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/Pix2pixHD) +- [StarGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/gan/StarGAN) +- [Cross-Scale-Non-Local-Attention](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/image_process/Cross-Scale-Non-Local-Attention) +- [DnCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/image_process/DnCNN) +- [SRFlow](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/image_process/SRFlow) +- [wdsr](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/image_process/wdsr) +- [3DMPPE-ROOTNET](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/pose_estimation/3DMPPE-ROOTNET) +- [HigherHRNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/pose_estimation/HigherHRNet) +- [MSPN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/pose_estimation/MSPN) +- [TransPose](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/pose_estimation/TransPose) +- [VideoPose3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/pose_estimation/VideoPose3D) +- [ADNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/quality_enhancement/ADNet) +- [SRGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/quality_enhancement/SRGAN) +- [3D_HRNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/3D_HRNet) +- [Cascade_Mask_RCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/Cascade_Mask_RCNN) +- [Cascade_RCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/Cascade_RCNN) +- [DeeplabV3](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/DeeplabV3) +- [ENet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/ENet) +- [ErfNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/ErfNet) +- [FastSCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/FastSCNN) +- [FCN-8s](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/FCN-8s) +- [GCNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/GCNet) +- [ICNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/ICNet) +- [IntraDA](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/IntraDA) +- [Nested_UNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/Nested_UNet) +- [PointRend](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/PointRend) +- [PraNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/PraNet) +- [PSPnet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/PSPnet) +- [RefineNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/RefineNet) +- [SETR](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/SETR) +- [SiamMask](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/SiamMask) +- [SOLOV1](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/SOLOV1) +- [SOLOV2](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/SOLOV2) +- [Ultra-Fast-Lane-Detection](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/Ultra-Fast-Lane-Detection) +- [VNet](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/VNet) +- [Wseg](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/Wseg) +- [YOLACT](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/YOLACT) +- [YOLACT_plus](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/segmentation/YOLACT_plus) +- [EDSR](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/super_resolution/EDSR) +- [RCAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/super_resolution/RCAN) +- [RDN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/super_resolution/RDN) +- [Real-ESRGAN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/super_resolution/Real-ESRGAN) +- [SRCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/super_resolution/SRCNN) +- [SiamFC](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/tracking/SiamFC) +- [SiamRPN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/tracking/SiamRPN) +- [I3D](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/video_understanding/I3D) +- [NonLocal](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/video_understanding/NonLocal) +- [SlowFast](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/video_understanding/SlowFast) +- [TSM](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/video_understanding/TSM) +- [TSN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/cv/video_understanding/TSN) +- [RotatE](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/knowledge/RotatE) +- [albert](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/nlp/albert) +- [BertSum](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/nlp/BertSum) +- [CNN_Transformer_for_Pytorch](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/nlp/CNN_Transformer_for_Pytorch) +- [roberta](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/nlp/roberta) +- [TextCNN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/nlp/TextCNN) +- [tinybert](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/nlp/tinybert) +- [c51](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/rl/c51) +- [DQN](https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/rl/DQN) + + + + + + + +## 免责声明 + +Ascend ModelZoo仅提供公共数据集下载和预处理脚本。这些数据集不属于ModelZoo,ModelZoo也不对其质量或维护负责。请确保您具有这些数据集的使用许可。基于这些数据集训练的模型仅可用于非商业研究和教育。 + +致数据集所有者: + +如果您不希望您的数据集公布在ModelZoo上或希望更新ModelZoo中属于您的数据集,请在Github/Gitee提交issue,我们将根据您的issue删除或更新您的数据集。衷心感谢您对我们社区的理解和贡献。 + +Ascend ModelZoo的license是Apache 2.0.具体内容,请参见LICENSE文件。 diff --git a/Third_Party_Open_Source_Software_Notice b/Third_Party_Open_Source_Software_Notice index e4ddc09e7ff052826027986fbaebcf54a104c5b3..d6cdc5aec11c9d761c049143c884f5ca86ffb355 100644 --- a/Third_Party_Open_Source_Software_Notice +++ b/Third_Party_Open_Source_Software_Notice @@ -1,1975 +1,1975 @@ -OPEN SOURCE SOFTWARE NOTICE - -Please note we provide an open source software notice along with this product and/or this product firmware (in the following just “this product”). The open source software licenses are granted by the respective right holders. And the open source licenses prevail all other license information with regard to the respective open source software contained in the product, including but not limited to End User Software Licensing Agreement. This notice is provided on behalf of Huawei Technologies Co. Ltd. and any of its local subsidiaries which may have provided this product to you in your local country. - -Warranty Disclaimer -THE OPEN SOURCE SOFTWARE IN THIS PRODUCT IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, BUT WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. SEE THE APPLICABLE LICENSES FOR MORE DETAILS. - -Copyright Notice and License Texts - -Software: Eigen 3.3.7 -Copyright notice: -Copyright (C) 2014 Benoit Steiner -Copyright (C) 2013 Christian Seiler -Copyright (C) 2015 Eugene Brevdo -Copyright (C) 2014-2015 Benoit Steiner -Copyright (C) 2015 Navdeep Jaitly -Copyright (C) 2014 Eric Martin -Copyright (C) 2015 Benoit Steiner -Copyright (C) 2016 Rasmus Munk Larsen -Copyright (C) 2016 Benoit Steiner -Copyright (C) 2015 Jianwei Cui -Copyright (C) 2016 Eugene Brevdo -Copyright (C) 2015 Ke Yang -Copyright (C) 2016 Mehdi Goli, Codeplay Software Ltd -Copyright (C) 2014 Navdeep Jaitly -Copyright (C) 2016 Igor Babuschkin -Copyright (C) 2016 Dmitry Vyukov -Copyright (C) EDF R&D, lun sep 30 14:23:30 CEST 2002 -Copyright (C) 2008 Gael Guennebaud -Copyright (C) EDF R&D, lun sep 30 14:23:31 CEST 2002 -Copyright (C) 2008-2010 Gael Guennebaud -Copyright (C) 2008-2016 Gael Guennebaud -Copyright (C) 2009 Mark Borgerding mark a borgerding net -Copyright (C) 2008-2009 Gael Guennebaud -Copyright (C) 2013 Desire Nuentsa -Copyright (C) 2013 Gael Guennebaud -Copyright (C) 2011 Gael Guennebaud -Copyright (C) 2012 Desire NUENTSA WAKAM -Copyright (C) 2009 Benoit Jacob -Copyright (C) 2009 Gael Guennebaud -Copyright (C) 2006-2010 Benoit Jacob -Copyright (C) 2006-2008 Benoit Jacob -Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002 -Copyright (C) 2010 Manuel Yguel -Copyright (C) 2009 Claire Maurice -Copyright (C) 2010,2012 Jitse Niesen -Copyright (c) 2011, Intel Corporation. All rights reserved. -Copyright (C) 2012-2016 Gael Guennebaud -Copyright (C) 2016 Tobias Wood -Copyright (C) 2010 Jitse Niesen -Copyright (C) 2012 Alexey Korepanov -Copyright (C) 2010 Vincent Lejeune -Copyright (C) 2010 Gael Guennebaud -Copyright (C) 2010 Benoit Jacob -Copyright (C) 2017 Gael Guennebaud -Copyright (C) 2009-2010 Gael Guennebaud -Copyright (C) 2008 Benoit Jacob -Copyright (C) 2009 Mathieu Gautier -Copyright (C) 2010 Hauke Heibel -Copyright (C) 2009 Hauke Heibel -Copyright (C) 2008-2015 Gael Guennebaud -Copyright (C) EDF R&D, mar déc 3 18:59:36 CET 2002 -Copyright (C) EDF R&D, lun sep 30 14:23:17 CEST 2002 -Copyright (C) EDF R&D, mar déc 3 18:59:35 CET 2002 -Copyright (C) 2016 Konstantinos Margaritis -Copyright (C) 2007 Julien Pommier -Copyright (C) 2008-2011 Gael Guennebaud -Copyright (C) 2009 Keir Mierle -Copyright (C) 2011 Timothy E. Holy -Copyright (C) 2009 Hauke Heibel -Copyright (C) 2012 Desire Nuentsa -Copyright (C) 2014 Gael Guennebaud -Copyright (C) 2015 Tal Hadad -@copyright (c) 2009-2014 The University of Tennessee and The University of Tennessee Research Foundation. -@copyright (c) 2012-2016 Inria. All rights reserved. -@copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. -Copyright 2007-2009 Kitware, Inc. -Copyright 2012-2013 Inria -Copyright 2012-2013 Emmanuel Agullo -Copyright 2012-2013 Mathieu Faverge -Copyright 2012 Cedric Castagnede -Copyright 2013-2016 Florent Pruvost -Copyright 2016 Codeplay Software Ltd. -Copyright (c) 2006, 2007 Montel Laurent, -Copyright (c) 2008, 2009 Gael Guennebaud, -Copyright (c) 2009 Boudewijn Rempt -@copyright (c) 2012-2014 Inria. All rights reserved. -Copyright 2013 Florent Pruvost -Copyright (c) 2010 Jitse Niesen, -Copyright (C) 2009 Benjamin Schindler -Copyright (C) 2016 Pedro Gonnet (pedro.gonnet@gmail.com) -Copyright (C) 2016 Benoit Steiner (benoit.steiner.goog@gmail.com) -Copyright (C) 2009 Thomas Capricelli -Copyright (C) 2012-2013 Desire Nuentsa -Copyright (C) 2012-2014 Gael Guennebaud -Copyright Jorge More - Argonne National Laboratory -Copyright Burt Garbow - Argonne National Laboratory -Copyright Ken Hillstrom - Argonne National Laboratory -Copyright (C) 2009 Ilya Baran -Copyright (c) 2010, Intel Corp. -Copyright (C) 2009-2010 Benoit Jacob -Copyright (C) 2013-2016 Gael Guennebaud -Copyright (C) 2013 Gauthier Brun -Copyright (C) 2013 Nicolas Carre -Copyright (C) 2013 Jean Ceccato -Copyright (C) 2013 Pierre Zoppitelli -Copyright (C) 2013 Jitse Niesen -Copyright (C) 2014-2017 Gael Guennebaud -Copyright (C) 2013-2014 Gael Guennebaud -Copyright (C) 2011-2014 Gael Guennebaud -Copyright (C) 2012 Désiré Nuentsa-Wakam -Copyright (C) 2015 Gael Guennebaud -Copyright (C) 2012 Gael Guennebaud -Copyright (c) 1994 by Xerox Corporation. All rights reserved. -Copyright (C) 2001 Intel Corporation -Copyright (c) 2001 Intel Corporation. -Copyright (C) 2009 Gael Guennebaud -Copyright (C) 2013 Christoph Hertzberg -Copyright (C) 2015 Eugene Brevdo -Copyright (C) 2016 -Mehdi Goli Codeplay Software Ltd. -Ralph Potter Codeplay Software Ltd. -Luke Iwanski Codeplay Software Ltd. -Copyright (C) 2014 Jianwei Cui -Copyright (C) 2015 Vijay Vasudevan -Copyright (C) 2015 -Mehdi Goli Codeplay Software Ltd. -Ralph Potter Codeplay Software Ltd. -Luke Iwanski Codeplay Software Ltd. -Copyright (C) 2014 Navdeep Jaitly -Copyright (C) 2011 Gael Guennebaud -Copyright (C) 2012 desire Nuentsa -Copyright (C) 2012 Kolja Brix -Copyright (C) 2011 Kolja Brix -Copyright (C) 2011 Andreas Platen -Copyright (C) 2012 Chen-Pang He -Copyright (C) 2009 Jitse Niesen -Copyright (C) 2009-2011 Jitse Niesen -Copyright (C) 2012, 2013 Chen-Pang He -Copyright (C) 2011 Jitse Niesen -Copyright (C) 2012 Giacomo Po -Copyright (C) 2008-2010 Gael Guennebaud -Copyright (C) 2016 Gael Guennebaud -Copyright (C) 2010-2011 Hauke Heibel -Copyright (C) 2012 David Harmon -Copyright (C) 2007-2009 Benoit Jacob -Copyright (C) 2007-2010 Benoit Jacob -Copyright (C) 2008-2009 Benoit Jacob -Copyright (C) 2009 Kenneth Riddile -Copyright (C) 2010 Thomas Capricelli -Copyright (C) 2013 Pavel Holoborodko -Copyright (C) EDF R&D, lun sep 30 14:23:16 CEST 2002 -Copyright (C) EDF R&D, mar déc 3 18:59:37 CET 2002 -Copyright (C) 2006-2009 Benoit Jacob -Copyright (C) 2008-2010 Benoit Jacob -Copyright (c) 2008-2015 Pavel Holoborodko -Copyright (C) 20010-2011 Hauke Heibel -Copyright (c) 2006, Montel Laurent, -Copyright (c) 2007, Allen Winter, -Copyright (c) 2007, Alexander Neundorf, -Copyright (C) 2008 Guillaume Saupin -Copyright (C) 2008-2009 Guillaume Saupin -Copyright (C) 2009 Guillaume Saupin -Copyright (C) 2010-2016 Konstantinos Margaritis -Copyright (C) 2008-2016 Konstantinos Margaritis -Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com) -Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) -Copyright (c) Fabian Giesen, 2016 -Copyright (C) 2010 Konstantinos Margaritis -Copyright (C) 2007 Michael Olbrich -Copyright (C) 2011 Benoit Jacob -Copyright (C) 2011-2012 Jitse Niesen -Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com) -Copyright (C) 2008-2014 Gael Guennebaud -Copyright (C) 2010-2013 Hauke Heibel -Copyright (C) 2006-2008, 2010 Benoit Jacob -Copyright (C) 2010-2016 Gael Guennebaud -Copyright (C) 2009-2015 Gael Guennebaud -Copyright (C) 2009 Ricard Marxer -Copyright (C) 2009-2014 Gael Guennebaud -Copyright (C) 2010-2011 Gael Guennebaud -Copyright (C) 2009 Rohit Garg -Copyright (c) 2006, Timothy A. Davis. -Copyright (c) 1998-2003 by the University of Florida. -Copyright (C) 2012 Désiré Nuentsa-Wakam -Copyright (C) 2008-2012 Gael Guennebaud -LDL Copyright (c) 2005 by Timothy A. Davis. All Rights Reserved. -Copyright (C) 2010 Daniel Lowengrub -Copyright (C) EDF R&D, lun sep 30 14:23:20 CEST 2002 -Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 -Copyright (C) 2009, 2010, 2013 Jitse Niesen -Copyright (C) 2011, 2013 Chen-Pang He -Copyright (C) 2009-2011, 2013 Jitse Niesen -Copyright (C) 2011, 2013 Jitse Niesen -Copyright (C) 2011 Chen-Pang He -Copyright (C) 2010, 2013 Jitse Niesen -Copyright (C) 2010-2014 Gael Guennebaud -Copyright (C) 2012 The Android Open Source Project -(C) Desire NUENTSA WAKAM, INRIA -Copyright (C) EDF R&D, lun sep 30 14:23:18 CEST 2002 -Copyright (C) 2012 Keir Mierle -Copyright (C) 1989, 1991 Free Software Foundation, Inc. -Copyright (C) EDF R&D, lun sep 30 14:23:23 CEST 2002 -Copyright (C) EDF R&D, lun sep 30 14:23:24 CEST 2002 -Copyright (C) EDF R&D, lun sep 30 14:23:27 CEST 2002 -Copyright (C) 2007 Free Software Foundation, Inc. -Copyright (C) 1991, 1999 Free Software Foundation, Inc. -Copyright (C) 2015 Benoit Jacob -Geometric Tools, LLC Copyright (c) 1998-2010 -Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002 -Copyright (C) 2002-2007 Yves Renard -Copyright (C) 2012, 2014 Kolja Brix -Copyright (C) 1997-2001 Andrew Lumsdaine Lie-Quan Lee -Copyright (C) 2012 Desire NUENTSA WAKAM -Copyright (C) 2013 Hauke Heibel -Copyright (C) 2010-2011 Jitse Niesen -Intel Copyright (C) .... -Copyright (C) 2010-2017 Gael Guennebaud -Copyright (C) 20013 Gael Guennebaud -Copyright (C) 2008 Daniel Gomez Ferro -Copyright (C) 2013 Désiré Nuentsa-Wakam -Copyright (C) 2011-2015 Gael Guennebaud -Copyright (C) 20015 Gael Guennebaud -Copyright (C) 2014-2015 Gael Guennebaud - -Copyright 2019 The TensorFlow Authors. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - -From PyTorch: - -Copyright (c) 2016- Facebook, Inc (Adam Paszke) -Copyright (c) 2014- Facebook, Inc (Soumith Chintala) -Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) -Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) -Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) -Copyright (c) 2011-2013 NYU (Clement Farabet) -Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) -Copyright (c) 2006 Idiap Research Institute (Samy Bengio) -Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) - -From Caffe2: - -Copyright (c) 2016-present, Facebook Inc. All rights reserved. - -All contributions by Facebook: -Copyright (c) 2016 Facebook Inc. - -All contributions by Google: -Copyright (c) 2015 Google Inc. -All rights reserved. - -All contributions by Yangqing Jia: -Copyright (c) 2015 Yangqing Jia -All rights reserved. - -All contributions by Kakao Brain: -Copyright 2019-2020 Kakao Brain - -All contributions from Caffe: -Copyright(c) 2013, 2014, 2015, the respective contributors -All rights reserved. - -All other contributions: -Copyright(c) 2015, 2016 the respective contributors -All rights reserved. - -Caffe2 uses a copyright model similar to Caffe: each contributor holds -copyright over their contributions to Caffe2. The project versioning records -all such contribution and copyright details. If a contributor wants to further -mark their specific copyright on a particular contribution, they should -indicate their copyright solely in the commit message of the change when it is -committed. - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America - and IDIAP Research Institute nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - - -Software: crnn e78bb7d -Copyright notice: -Copyright (c) 2017 Jieru Mei meijieru@gmail.com - -License: MIT License (MIT) -MIT License -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - - -Software: pytorch-deeplab-xception b7cbf5a -Copyright notice: -Copyright (c) 2018 Pyjcsx - -License: MIT License (MIT) -MIT License -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - - - -Software: version 1.0.1 -Copyright notice: -Copyright (c) Soumith Chintala 2016 - -License: BSD 3-Clause License -BSD 3-Clause License -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - - -Software: EfficientNet-PyTorch 1.0 -Copyright notice: -Copyright Copyright [yyyy] [name of copyright owner] - -License: Apache License - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - - -Software: PSENet cec7e6 -Copyright notice: -Copyright [yyyy] [name of copyright owner] - -License: Apache License - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - - -Software: examples v1.22.1 -Copyright notice: -Copyright (c) 2017, All rights reserved. - -License: BSD 3-Clause License -BSD 3-Clause License - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - - -Software: pytorch-image-models 0.1-regnet -Copyright notice: -Copyright Copyright [yyyy] [name of copyright owner] - -License: Apache License - - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - - -Software: DeepCTR-Torch 0.2.1 -Copyright notice: -Copyright Copyright [yyyy] [name of copyright owner] - -License: Apache License -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - -Software:Wizyoung 8776cf7 - -The MIT License (MIT) - -Copyright (c) 2020 Wizyoung - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - - -Software:PyTorch -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Software:tpu r1.15 -Software:tensorflow master -Software:NVIDIA master -Software:Tensorflow-densenet -Software:Training v0.7 results -Software:AWS - -Copyright 2017 The TensorFlow Authors. All rights reserved. -Copyright 2016 The TensorFlow Authors. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - -## Some of TensorFlow's code is derived from Caffe, which is subject to the following copyright notice: - -COPYRIGHT - -All contributions by the University of California: - -Copyright (c) 2014, The Regents of the University of California (Regents) -All rights reserved. - -All other contributions: - -Copyright (c) 2014, the respective contributors -All rights reserved. - -Caffe uses a shared copyright model: each contributor holds copyright over -their contributions to Caffe. The project versioning records all such -contribution and copyright details. If a contributor wants to further mark -their specific copyright on a particular contribution, they should indicate -their copyright solely in the commit message of the change when it is -committed. - -LICENSE - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -CONTRIBUTION AGREEMENT - -By contributing to the BVLC/caffe repository through pull-request, comment, -or otherwise, the contributor releases their content to the +OPEN SOURCE SOFTWARE NOTICE + +Please note we provide an open source software notice along with this product and/or this product firmware (in the following just “this product”). The open source software licenses are granted by the respective right holders. And the open source licenses prevail all other license information with regard to the respective open source software contained in the product, including but not limited to End User Software Licensing Agreement. This notice is provided on behalf of Huawei Technologies Co. Ltd. and any of its local subsidiaries which may have provided this product to you in your local country. + +Warranty Disclaimer +THE OPEN SOURCE SOFTWARE IN THIS PRODUCT IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, BUT WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. SEE THE APPLICABLE LICENSES FOR MORE DETAILS. + +Copyright Notice and License Texts + +Software: Eigen 3.3.7 +Copyright notice: +Copyright (C) 2014 Benoit Steiner +Copyright (C) 2013 Christian Seiler +Copyright (C) 2015 Eugene Brevdo +Copyright (C) 2014-2015 Benoit Steiner +Copyright (C) 2015 Navdeep Jaitly +Copyright (C) 2014 Eric Martin +Copyright (C) 2015 Benoit Steiner +Copyright (C) 2016 Rasmus Munk Larsen +Copyright (C) 2016 Benoit Steiner +Copyright (C) 2015 Jianwei Cui +Copyright (C) 2016 Eugene Brevdo +Copyright (C) 2015 Ke Yang +Copyright (C) 2016 Mehdi Goli, Codeplay Software Ltd +Copyright (C) 2014 Navdeep Jaitly +Copyright (C) 2016 Igor Babuschkin +Copyright (C) 2016 Dmitry Vyukov +Copyright (C) EDF R&D, lun sep 30 14:23:30 CEST 2002 +Copyright (C) 2008 Gael Guennebaud +Copyright (C) EDF R&D, lun sep 30 14:23:31 CEST 2002 +Copyright (C) 2008-2010 Gael Guennebaud +Copyright (C) 2008-2016 Gael Guennebaud +Copyright (C) 2009 Mark Borgerding mark a borgerding net +Copyright (C) 2008-2009 Gael Guennebaud +Copyright (C) 2013 Desire Nuentsa +Copyright (C) 2013 Gael Guennebaud +Copyright (C) 2011 Gael Guennebaud +Copyright (C) 2012 Desire NUENTSA WAKAM +Copyright (C) 2009 Benoit Jacob +Copyright (C) 2009 Gael Guennebaud +Copyright (C) 2006-2010 Benoit Jacob +Copyright (C) 2006-2008 Benoit Jacob +Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002 +Copyright (C) 2010 Manuel Yguel +Copyright (C) 2009 Claire Maurice +Copyright (C) 2010,2012 Jitse Niesen +Copyright (c) 2011, Intel Corporation. All rights reserved. +Copyright (C) 2012-2016 Gael Guennebaud +Copyright (C) 2016 Tobias Wood +Copyright (C) 2010 Jitse Niesen +Copyright (C) 2012 Alexey Korepanov +Copyright (C) 2010 Vincent Lejeune +Copyright (C) 2010 Gael Guennebaud +Copyright (C) 2010 Benoit Jacob +Copyright (C) 2017 Gael Guennebaud +Copyright (C) 2009-2010 Gael Guennebaud +Copyright (C) 2008 Benoit Jacob +Copyright (C) 2009 Mathieu Gautier +Copyright (C) 2010 Hauke Heibel +Copyright (C) 2009 Hauke Heibel +Copyright (C) 2008-2015 Gael Guennebaud +Copyright (C) EDF R&D, mar déc 3 18:59:36 CET 2002 +Copyright (C) EDF R&D, lun sep 30 14:23:17 CEST 2002 +Copyright (C) EDF R&D, mar déc 3 18:59:35 CET 2002 +Copyright (C) 2016 Konstantinos Margaritis +Copyright (C) 2007 Julien Pommier +Copyright (C) 2008-2011 Gael Guennebaud +Copyright (C) 2009 Keir Mierle +Copyright (C) 2011 Timothy E. Holy +Copyright (C) 2009 Hauke Heibel +Copyright (C) 2012 Desire Nuentsa +Copyright (C) 2014 Gael Guennebaud +Copyright (C) 2015 Tal Hadad +@copyright (c) 2009-2014 The University of Tennessee and The University of Tennessee Research Foundation. +@copyright (c) 2012-2016 Inria. All rights reserved. +@copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +Copyright 2007-2009 Kitware, Inc. +Copyright 2012-2013 Inria +Copyright 2012-2013 Emmanuel Agullo +Copyright 2012-2013 Mathieu Faverge +Copyright 2012 Cedric Castagnede +Copyright 2013-2016 Florent Pruvost +Copyright 2016 Codeplay Software Ltd. +Copyright (c) 2006, 2007 Montel Laurent, +Copyright (c) 2008, 2009 Gael Guennebaud, +Copyright (c) 2009 Boudewijn Rempt +@copyright (c) 2012-2014 Inria. All rights reserved. +Copyright 2013 Florent Pruvost +Copyright (c) 2010 Jitse Niesen, +Copyright (C) 2009 Benjamin Schindler +Copyright (C) 2016 Pedro Gonnet (pedro.gonnet@gmail.com) +Copyright (C) 2016 Benoit Steiner (benoit.steiner.goog@gmail.com) +Copyright (C) 2009 Thomas Capricelli +Copyright (C) 2012-2013 Desire Nuentsa +Copyright (C) 2012-2014 Gael Guennebaud +Copyright Jorge More - Argonne National Laboratory +Copyright Burt Garbow - Argonne National Laboratory +Copyright Ken Hillstrom - Argonne National Laboratory +Copyright (C) 2009 Ilya Baran +Copyright (c) 2010, Intel Corp. +Copyright (C) 2009-2010 Benoit Jacob +Copyright (C) 2013-2016 Gael Guennebaud +Copyright (C) 2013 Gauthier Brun +Copyright (C) 2013 Nicolas Carre +Copyright (C) 2013 Jean Ceccato +Copyright (C) 2013 Pierre Zoppitelli +Copyright (C) 2013 Jitse Niesen +Copyright (C) 2014-2017 Gael Guennebaud +Copyright (C) 2013-2014 Gael Guennebaud +Copyright (C) 2011-2014 Gael Guennebaud +Copyright (C) 2012 Désiré Nuentsa-Wakam +Copyright (C) 2015 Gael Guennebaud +Copyright (C) 2012 Gael Guennebaud +Copyright (c) 1994 by Xerox Corporation. All rights reserved. +Copyright (C) 2001 Intel Corporation +Copyright (c) 2001 Intel Corporation. +Copyright (C) 2009 Gael Guennebaud +Copyright (C) 2013 Christoph Hertzberg +Copyright (C) 2015 Eugene Brevdo +Copyright (C) 2016 +Mehdi Goli Codeplay Software Ltd. +Ralph Potter Codeplay Software Ltd. +Luke Iwanski Codeplay Software Ltd. +Copyright (C) 2014 Jianwei Cui +Copyright (C) 2015 Vijay Vasudevan +Copyright (C) 2015 +Mehdi Goli Codeplay Software Ltd. +Ralph Potter Codeplay Software Ltd. +Luke Iwanski Codeplay Software Ltd. +Copyright (C) 2014 Navdeep Jaitly +Copyright (C) 2011 Gael Guennebaud +Copyright (C) 2012 desire Nuentsa +Copyright (C) 2012 Kolja Brix +Copyright (C) 2011 Kolja Brix +Copyright (C) 2011 Andreas Platen +Copyright (C) 2012 Chen-Pang He +Copyright (C) 2009 Jitse Niesen +Copyright (C) 2009-2011 Jitse Niesen +Copyright (C) 2012, 2013 Chen-Pang He +Copyright (C) 2011 Jitse Niesen +Copyright (C) 2012 Giacomo Po +Copyright (C) 2008-2010 Gael Guennebaud +Copyright (C) 2016 Gael Guennebaud +Copyright (C) 2010-2011 Hauke Heibel +Copyright (C) 2012 David Harmon +Copyright (C) 2007-2009 Benoit Jacob +Copyright (C) 2007-2010 Benoit Jacob +Copyright (C) 2008-2009 Benoit Jacob +Copyright (C) 2009 Kenneth Riddile +Copyright (C) 2010 Thomas Capricelli +Copyright (C) 2013 Pavel Holoborodko +Copyright (C) EDF R&D, lun sep 30 14:23:16 CEST 2002 +Copyright (C) EDF R&D, mar déc 3 18:59:37 CET 2002 +Copyright (C) 2006-2009 Benoit Jacob +Copyright (C) 2008-2010 Benoit Jacob +Copyright (c) 2008-2015 Pavel Holoborodko +Copyright (C) 20010-2011 Hauke Heibel +Copyright (c) 2006, Montel Laurent, +Copyright (c) 2007, Allen Winter, +Copyright (c) 2007, Alexander Neundorf, +Copyright (C) 2008 Guillaume Saupin +Copyright (C) 2008-2009 Guillaume Saupin +Copyright (C) 2009 Guillaume Saupin +Copyright (C) 2010-2016 Konstantinos Margaritis +Copyright (C) 2008-2016 Konstantinos Margaritis +Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com) +Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) +Copyright (c) Fabian Giesen, 2016 +Copyright (C) 2010 Konstantinos Margaritis +Copyright (C) 2007 Michael Olbrich +Copyright (C) 2011 Benoit Jacob +Copyright (C) 2011-2012 Jitse Niesen +Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com) +Copyright (C) 2008-2014 Gael Guennebaud +Copyright (C) 2010-2013 Hauke Heibel +Copyright (C) 2006-2008, 2010 Benoit Jacob +Copyright (C) 2010-2016 Gael Guennebaud +Copyright (C) 2009-2015 Gael Guennebaud +Copyright (C) 2009 Ricard Marxer +Copyright (C) 2009-2014 Gael Guennebaud +Copyright (C) 2010-2011 Gael Guennebaud +Copyright (C) 2009 Rohit Garg +Copyright (c) 2006, Timothy A. Davis. +Copyright (c) 1998-2003 by the University of Florida. +Copyright (C) 2012 Désiré Nuentsa-Wakam +Copyright (C) 2008-2012 Gael Guennebaud +LDL Copyright (c) 2005 by Timothy A. Davis. All Rights Reserved. +Copyright (C) 2010 Daniel Lowengrub +Copyright (C) EDF R&D, lun sep 30 14:23:20 CEST 2002 +Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 +Copyright (C) 2009, 2010, 2013 Jitse Niesen +Copyright (C) 2011, 2013 Chen-Pang He +Copyright (C) 2009-2011, 2013 Jitse Niesen +Copyright (C) 2011, 2013 Jitse Niesen +Copyright (C) 2011 Chen-Pang He +Copyright (C) 2010, 2013 Jitse Niesen +Copyright (C) 2010-2014 Gael Guennebaud +Copyright (C) 2012 The Android Open Source Project +(C) Desire NUENTSA WAKAM, INRIA +Copyright (C) EDF R&D, lun sep 30 14:23:18 CEST 2002 +Copyright (C) 2012 Keir Mierle +Copyright (C) 1989, 1991 Free Software Foundation, Inc. +Copyright (C) EDF R&D, lun sep 30 14:23:23 CEST 2002 +Copyright (C) EDF R&D, lun sep 30 14:23:24 CEST 2002 +Copyright (C) EDF R&D, lun sep 30 14:23:27 CEST 2002 +Copyright (C) 2007 Free Software Foundation, Inc. +Copyright (C) 1991, 1999 Free Software Foundation, Inc. +Copyright (C) 2015 Benoit Jacob +Geometric Tools, LLC Copyright (c) 1998-2010 +Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002 +Copyright (C) 2002-2007 Yves Renard +Copyright (C) 2012, 2014 Kolja Brix +Copyright (C) 1997-2001 Andrew Lumsdaine Lie-Quan Lee +Copyright (C) 2012 Desire NUENTSA WAKAM +Copyright (C) 2013 Hauke Heibel +Copyright (C) 2010-2011 Jitse Niesen +Intel Copyright (C) .... +Copyright (C) 2010-2017 Gael Guennebaud +Copyright (C) 20013 Gael Guennebaud +Copyright (C) 2008 Daniel Gomez Ferro +Copyright (C) 2013 Désiré Nuentsa-Wakam +Copyright (C) 2011-2015 Gael Guennebaud +Copyright (C) 20015 Gael Guennebaud +Copyright (C) 2014-2015 Gael Guennebaud + +Copyright 2019 The TensorFlow Authors. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +From PyTorch: + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) +Copyright (c) 2006 Idiap Research Institute (Samy Bengio) +Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) + +From Caffe2: + +Copyright (c) 2016-present, Facebook Inc. All rights reserved. + +All contributions by Facebook: +Copyright (c) 2016 Facebook Inc. + +All contributions by Google: +Copyright (c) 2015 Google Inc. +All rights reserved. + +All contributions by Yangqing Jia: +Copyright (c) 2015 Yangqing Jia +All rights reserved. + +All contributions by Kakao Brain: +Copyright 2019-2020 Kakao Brain + +All contributions from Caffe: +Copyright(c) 2013, 2014, 2015, the respective contributors +All rights reserved. + +All other contributions: +Copyright(c) 2015, 2016 the respective contributors +All rights reserved. + +Caffe2 uses a copyright model similar to Caffe: each contributor holds +copyright over their contributions to Caffe2. The project versioning records +all such contribution and copyright details. If a contributor wants to further +mark their specific copyright on a particular contribution, they should +indicate their copyright solely in the commit message of the change when it is +committed. + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America + and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + +Software: crnn e78bb7d +Copyright notice: +Copyright (c) 2017 Jieru Mei meijieru@gmail.com + +License: MIT License (MIT) +MIT License +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + + +Software: pytorch-deeplab-xception b7cbf5a +Copyright notice: +Copyright (c) 2018 Pyjcsx + +License: MIT License (MIT) +MIT License +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + + +Software: version 1.0.1 +Copyright notice: +Copyright (c) Soumith Chintala 2016 + +License: BSD 3-Clause License +BSD 3-Clause License +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +Software: EfficientNet-PyTorch 1.0 +Copyright notice: +Copyright Copyright [yyyy] [name of copyright owner] + +License: Apache License + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + +Software: PSENet cec7e6 +Copyright notice: +Copyright [yyyy] [name of copyright owner] + +License: Apache License + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + +Software: examples v1.22.1 +Copyright notice: +Copyright (c) 2017, All rights reserved. + +License: BSD 3-Clause License +BSD 3-Clause License + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +Software: pytorch-image-models 0.1-regnet +Copyright notice: +Copyright Copyright [yyyy] [name of copyright owner] + +License: Apache License + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + +Software: DeepCTR-Torch 0.2.1 +Copyright notice: +Copyright Copyright [yyyy] [name of copyright owner] + +License: Apache License +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Software:Wizyoung 8776cf7 + +The MIT License (MIT) + +Copyright (c) 2020 Wizyoung + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +Software:PyTorch +BSD 3-Clause License + +Copyright (c) Soumith Chintala 2016, +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Software:tpu r1.15 +Software:tensorflow master +Software:NVIDIA master +Software:Tensorflow-densenet +Software:Training v0.7 results +Software:AWS + +Copyright 2017 The TensorFlow Authors. All rights reserved. +Copyright 2016 The TensorFlow Authors. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +## Some of TensorFlow's code is derived from Caffe, which is subject to the following copyright notice: + +COPYRIGHT + +All contributions by the University of California: + +Copyright (c) 2014, The Regents of the University of California (Regents) +All rights reserved. + +All other contributions: + +Copyright (c) 2014, the respective contributors +All rights reserved. + +Caffe uses a shared copyright model: each contributor holds copyright over +their contributions to Caffe. The project versioning records all such +contribution and copyright details. If a contributor wants to further mark +their specific copyright on a particular contribution, they should indicate +their copyright solely in the commit message of the change when it is +committed. + +LICENSE + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +CONTRIBUTION AGREEMENT + +By contributing to the BVLC/caffe repository through pull-request, comment, +or otherwise, the contributor releases their content to the license and copyright terms herein. \ No newline at end of file